2 












































    2 





    2 


















    2 


















    2 






























    2 

    2 
    2 





















































































































    2 

    2 
    2 













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 






    2 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2011  Intel Corporation. All rights reserved.
 * Copyright (C) 2014 Marvell International Ltd.
 */

#define pr_fmt(fmt) "llcp: %s: " fmt, __func__

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/nfc.h>

#include "nfc.h"
#include "llcp.h"

static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};

static LIST_HEAD(llcp_devices);
/* Protects llcp_devices list */
static DEFINE_SPINLOCK(llcp_devices_lock);

static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);

void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk)
{
        write_lock(&l->lock);
        sk_add_node(sk, &l->head);
        write_unlock(&l->lock);
}

void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *sk)
{
        write_lock(&l->lock);
        sk_del_node_init(sk);
        write_unlock(&l->lock);
}

void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock)
{
        sock->remote_rw = LLCP_DEFAULT_RW;
        sock->remote_miu = LLCP_MAX_MIU + 1;
}

static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
{
        struct nfc_llcp_local *local = sock->local;
        struct sk_buff *s, *tmp;

        skb_queue_purge(&sock->tx_queue);
        skb_queue_purge(&sock->tx_pending_queue);

        if (local == NULL)
                return;

        /* Search for local pending SKBs that are related to this socket */
        skb_queue_walk_safe(&local->tx_queue, s, tmp) {
                if (s->sk != &sock->sk)
                        continue;

                skb_unlink(s, &local->tx_queue);
                kfree_skb(s);
        }
}

static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
                                    int err)
{
        struct sock *sk;
        struct hlist_node *tmp;
        struct nfc_llcp_sock *llcp_sock;

        skb_queue_purge(&local->tx_queue);

        write_lock(&local->sockets.lock);

        sk_for_each_safe(sk, tmp, &local->sockets.head) {
                llcp_sock = nfc_llcp_sock(sk);

                bh_lock_sock(sk);

                nfc_llcp_socket_purge(llcp_sock);

                if (sk->sk_state == LLCP_CONNECTED)
                        nfc_put_device(llcp_sock->dev);

                if (sk->sk_state == LLCP_LISTEN) {
                        struct nfc_llcp_sock *lsk, *n;
                        struct sock *accept_sk;

                        list_for_each_entry_safe(lsk, n,
                                                 &llcp_sock->accept_queue,
                                                 accept_queue) {
                                accept_sk = &lsk->sk;
                                bh_lock_sock(accept_sk);

                                nfc_llcp_accept_unlink(accept_sk);

                                if (err)
                                        accept_sk->sk_err = err;
                                accept_sk->sk_state = LLCP_CLOSED;
                                accept_sk->sk_state_change(sk);

                                bh_unlock_sock(accept_sk);
                        }
                }

                if (err)
                        sk->sk_err = err;
                sk->sk_state = LLCP_CLOSED;
                sk->sk_state_change(sk);

                bh_unlock_sock(sk);

                sk_del_node_init(sk);
        }

        write_unlock(&local->sockets.lock);

        /* If we still have a device, we keep the RAW sockets alive */
        if (device == true)
                return;

        write_lock(&local->raw_sockets.lock);

        sk_for_each_safe(sk, tmp, &local->raw_sockets.head) {
                llcp_sock = nfc_llcp_sock(sk);

                bh_lock_sock(sk);

                nfc_llcp_socket_purge(llcp_sock);

                if (err)
                        sk->sk_err = err;
                sk->sk_state = LLCP_CLOSED;
                sk->sk_state_change(sk);

                bh_unlock_sock(sk);

                sk_del_node_init(sk);
        }

        write_unlock(&local->raw_sockets.lock);
}

static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
{
        /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever
         * we hold a reference to local, we also need to hold a reference to
         * the device to avoid UAF.
         */
        if (!nfc_get_device(local->dev->idx))
                return NULL;

        kref_get(&local->ref);

        return local;
}

static void local_cleanup(struct nfc_llcp_local *local)
{
        nfc_llcp_socket_release(local, false, ENXIO);
        del_timer_sync(&local->link_timer);
        skb_queue_purge(&local->tx_queue);
        cancel_work_sync(&local->tx_work);
        cancel_work_sync(&local->rx_work);
        cancel_work_sync(&local->timeout_work);
        kfree_skb(local->rx_pending);
        local->rx_pending = NULL;
        del_timer_sync(&local->sdreq_timer);
        cancel_work_sync(&local->sdreq_timeout_work);
        nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
}

static void local_release(struct kref *ref)
{
        struct nfc_llcp_local *local;

        local = container_of(ref, struct nfc_llcp_local, ref);

        local_cleanup(local);
        kfree(local);
}

int nfc_llcp_local_put(struct nfc_llcp_local *local)
{
        struct nfc_dev *dev;
        int ret;

        if (local == NULL)
                return 0;

        dev = local->dev;

        ret = kref_put(&local->ref, local_release);
        nfc_put_device(dev);

        return ret;
}

static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
                                               u8 ssap, u8 dsap)
{
        struct sock *sk;
        struct nfc_llcp_sock *llcp_sock, *tmp_sock;

        pr_debug("ssap dsap %d %d\n", ssap, dsap);

        if (ssap == 0 && dsap == 0)
                return NULL;

        read_lock(&local->sockets.lock);

        llcp_sock = NULL;

        sk_for_each(sk, &local->sockets.head) {
                tmp_sock = nfc_llcp_sock(sk);

                if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) {
                        llcp_sock = tmp_sock;
                        sock_hold(&llcp_sock->sk);
                        break;
                }
        }

        read_unlock(&local->sockets.lock);

        return llcp_sock;
}

static void nfc_llcp_sock_put(struct nfc_llcp_sock *sock)
{
        sock_put(&sock->sk);
}

static void nfc_llcp_timeout_work(struct work_struct *work)
{
        struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
                                                    timeout_work);

        nfc_dep_link_down(local->dev);
}

static void nfc_llcp_symm_timer(struct timer_list *t)
{
        struct nfc_llcp_local *local = from_timer(local, t, link_timer);

        pr_err("SYMM timeout\n");

        schedule_work(&local->timeout_work);
}

static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
{
        unsigned long time;
        HLIST_HEAD(nl_sdres_list);
        struct hlist_node *n;
        struct nfc_llcp_sdp_tlv *sdp;
        struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
                                                    sdreq_timeout_work);

        mutex_lock(&local->sdreq_lock);

        time = jiffies - msecs_to_jiffies(3 * local->remote_lto);

        hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) {
                if (time_after(sdp->time, time))
                        continue;

                sdp->sap = LLCP_SDP_UNBOUND;

                hlist_del(&sdp->node);

                hlist_add_head(&sdp->node, &nl_sdres_list);
        }

        if (!hlist_empty(&local->pending_sdreqs))
                mod_timer(&local->sdreq_timer,
                          jiffies + msecs_to_jiffies(3 * local->remote_lto));

        mutex_unlock(&local->sdreq_lock);

        if (!hlist_empty(&nl_sdres_list))
                nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
}

static void nfc_llcp_sdreq_timer(struct timer_list *t)
{
        struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);

        schedule_work(&local->sdreq_timeout_work);
}

struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
{
        struct nfc_llcp_local *local;
        struct nfc_llcp_local *res = NULL;

        spin_lock(&llcp_devices_lock);
        list_for_each_entry(local, &llcp_devices, list)
                if (local->dev == dev) {
                        res = nfc_llcp_local_get(local);
                        break;
                }
        spin_unlock(&llcp_devices_lock);

        return res;
}

static struct nfc_llcp_local *nfc_llcp_remove_local(struct nfc_dev *dev)
{
        struct nfc_llcp_local *local, *tmp;

        spin_lock(&llcp_devices_lock);
        list_for_each_entry_safe(local, tmp, &llcp_devices, list)
                if (local->dev == dev) {
                        list_del(&local->list);
                        spin_unlock(&llcp_devices_lock);
                        return local;
                }
        spin_unlock(&llcp_devices_lock);

        pr_warn("Shutting down device not found\n");

        return NULL;
}

static char *wks[] = {
        NULL,
        NULL, /* SDP */
        "urn:nfc:sn:ip",
        "urn:nfc:sn:obex",
        "urn:nfc:sn:snep",
};

static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
{
        int sap, num_wks;

        pr_debug("%s\n", service_name);

        if (service_name == NULL)
                return -EINVAL;

        num_wks = ARRAY_SIZE(wks);

        for (sap = 0; sap < num_wks; sap++) {
                if (wks[sap] == NULL)
                        continue;

                if (strncmp(wks[sap], service_name, service_name_len) == 0)
                        return sap;
        }

        return -EINVAL;
}

static
struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
                                            const u8 *sn, size_t sn_len,
                                            bool needref)
{
        struct sock *sk;
        struct nfc_llcp_sock *llcp_sock, *tmp_sock;

        pr_debug("sn %zd %p\n", sn_len, sn);

        if (sn == NULL || sn_len == 0)
                return NULL;

        read_lock(&local->sockets.lock);

        llcp_sock = NULL;

        sk_for_each(sk, &local->sockets.head) {
                tmp_sock = nfc_llcp_sock(sk);

                pr_debug("llcp sock %p\n", tmp_sock);

                if (tmp_sock->sk.sk_type == SOCK_STREAM &&
                    tmp_sock->sk.sk_state != LLCP_LISTEN)
                        continue;

                if (tmp_sock->sk.sk_type == SOCK_DGRAM &&
                    tmp_sock->sk.sk_state != LLCP_BOUND)
                        continue;

                if (tmp_sock->service_name == NULL ||
                    tmp_sock->service_name_len == 0)
                        continue;

                if (tmp_sock->service_name_len != sn_len)
                        continue;

                if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) {
                        llcp_sock = tmp_sock;
                        if (needref)
                                sock_hold(&llcp_sock->sk);
                        break;
                }
        }

        read_unlock(&local->sockets.lock);

        pr_debug("Found llcp sock %p\n", llcp_sock);

        return llcp_sock;
}

u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
                         struct nfc_llcp_sock *sock)
{
        mutex_lock(&local->sdp_lock);

        if (sock->service_name != NULL && sock->service_name_len > 0) {
                int ssap = nfc_llcp_wks_sap(sock->service_name,
                                            sock->service_name_len);

                if (ssap > 0) {
                        pr_debug("WKS %d\n", ssap);

                        /* This is a WKS, let's check if it's free */
                        if (test_bit(ssap, &local->local_wks)) {
                                mutex_unlock(&local->sdp_lock);

                                return LLCP_SAP_MAX;
                        }

                        set_bit(ssap, &local->local_wks);
                        mutex_unlock(&local->sdp_lock);

                        return ssap;
                }

                /*
                 * Check if there already is a non WKS socket bound
                 * to this service name.
                 */
                if (nfc_llcp_sock_from_sn(local, sock->service_name,
                                          sock->service_name_len,
                                          false) != NULL) {
                        mutex_unlock(&local->sdp_lock);

                        return LLCP_SAP_MAX;
                }

                mutex_unlock(&local->sdp_lock);

                return LLCP_SDP_UNBOUND;

        } else if (sock->ssap != 0 && sock->ssap < LLCP_WKS_NUM_SAP) {
                if (!test_bit(sock->ssap, &local->local_wks)) {
                        set_bit(sock->ssap, &local->local_wks);
                        mutex_unlock(&local->sdp_lock);

                        return sock->ssap;
                }
        }

        mutex_unlock(&local->sdp_lock);

        return LLCP_SAP_MAX;
}

u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local)
{
        u8 local_ssap;

        mutex_lock(&local->sdp_lock);

        local_ssap = find_first_zero_bit(&local->local_sap, LLCP_LOCAL_NUM_SAP);
        if (local_ssap == LLCP_LOCAL_NUM_SAP) {
                mutex_unlock(&local->sdp_lock);
                return LLCP_SAP_MAX;
        }

        set_bit(local_ssap, &local->local_sap);

        mutex_unlock(&local->sdp_lock);

        return local_ssap + LLCP_LOCAL_SAP_OFFSET;
}

void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap)
{
        u8 local_ssap;
        unsigned long *sdp;

        if (ssap < LLCP_WKS_NUM_SAP) {
                local_ssap = ssap;
                sdp = &local->local_wks;
        } else if (ssap < LLCP_LOCAL_NUM_SAP) {
                atomic_t *client_cnt;

                local_ssap = ssap - LLCP_WKS_NUM_SAP;
                sdp = &local->local_sdp;
                client_cnt = &local->local_sdp_cnt[local_ssap];

                pr_debug("%d clients\n", atomic_read(client_cnt));

                mutex_lock(&local->sdp_lock);

                if (atomic_dec_and_test(client_cnt)) {
                        struct nfc_llcp_sock *l_sock;

                        pr_debug("No more clients for SAP %d\n", ssap);

                        clear_bit(local_ssap, sdp);

                        /* Find the listening sock and set it back to UNBOUND */
                        l_sock = nfc_llcp_sock_get(local, ssap, LLCP_SAP_SDP);
                        if (l_sock) {
                                l_sock->ssap = LLCP_SDP_UNBOUND;
                                nfc_llcp_sock_put(l_sock);
                        }
                }

                mutex_unlock(&local->sdp_lock);

                return;
        } else if (ssap < LLCP_MAX_SAP) {
                local_ssap = ssap - LLCP_LOCAL_NUM_SAP;
                sdp = &local->local_sap;
        } else {
                return;
        }

        mutex_lock(&local->sdp_lock);

        clear_bit(local_ssap, sdp);

        mutex_unlock(&local->sdp_lock);
}

static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local)
{
        u8 ssap;

        mutex_lock(&local->sdp_lock);

        ssap = find_first_zero_bit(&local->local_sdp, LLCP_SDP_NUM_SAP);
        if (ssap == LLCP_SDP_NUM_SAP) {
                mutex_unlock(&local->sdp_lock);

                return LLCP_SAP_MAX;
        }

        pr_debug("SDP ssap %d\n", LLCP_WKS_NUM_SAP + ssap);

        set_bit(ssap, &local->local_sdp);

        mutex_unlock(&local->sdp_lock);

        return LLCP_WKS_NUM_SAP + ssap;
}

static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
{
        u8 *gb_cur, version, version_length;
        u8 lto_length, wks_length, miux_length;
        const u8 *version_tlv = NULL, *lto_tlv = NULL,
           *wks_tlv = NULL, *miux_tlv = NULL;
        __be16 wks = cpu_to_be16(local->local_wks);
        u8 gb_len = 0;
        int ret = 0;

        version = LLCP_VERSION_11;
        version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
                                         1, &version_length);
        if (!version_tlv) {
                ret = -ENOMEM;
                goto out;
        }
        gb_len += version_length;

        lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, &lto_length);
        if (!lto_tlv) {
                ret = -ENOMEM;
                goto out;
        }
        gb_len += lto_length;

        pr_debug("Local wks 0x%lx\n", local->local_wks);
        wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length);
        if (!wks_tlv) {
                ret = -ENOMEM;
                goto out;
        }
        gb_len += wks_length;

        miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0,
                                      &miux_length);
        if (!miux_tlv) {
                ret = -ENOMEM;
                goto out;
        }
        gb_len += miux_length;

        gb_len += ARRAY_SIZE(llcp_magic);

        if (gb_len > NFC_MAX_GT_LEN) {
                ret = -EINVAL;
                goto out;
        }

        gb_cur = local->gb;

        memcpy(gb_cur, llcp_magic, ARRAY_SIZE(llcp_magic));
        gb_cur += ARRAY_SIZE(llcp_magic);

        memcpy(gb_cur, version_tlv, version_length);
        gb_cur += version_length;

        memcpy(gb_cur, lto_tlv, lto_length);
        gb_cur += lto_length;

        memcpy(gb_cur, wks_tlv, wks_length);
        gb_cur += wks_length;

        memcpy(gb_cur, miux_tlv, miux_length);
        gb_cur += miux_length;

        local->gb_len = gb_len;

out:
        kfree(version_tlv);
        kfree(lto_tlv);
        kfree(wks_tlv);
        kfree(miux_tlv);

        return ret;
}

u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
{
        struct nfc_llcp_local *local;

        local = nfc_llcp_find_local(dev);
        if (local == NULL) {
                *general_bytes_len = 0;
                return NULL;
        }

        nfc_llcp_build_gb(local);

        *general_bytes_len = local->gb_len;

        nfc_llcp_local_put(local);

        return local->gb;
}

int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
        struct nfc_llcp_local *local;
        int err;

        if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN)
                return -EINVAL;

        local = nfc_llcp_find_local(dev);
        if (local == NULL) {
                pr_err("No LLCP device\n");
                return -ENODEV;
        }

        memset(local->remote_gb, 0, NFC_MAX_GT_LEN);
        memcpy(local->remote_gb, gb, gb_len);
        local->remote_gb_len = gb_len;

        if (memcmp(local->remote_gb, llcp_magic, 3)) {
                pr_err("MAC does not support LLCP\n");
                err = -EINVAL;
                goto out;
        }

        err = nfc_llcp_parse_gb_tlv(local,
                                     &local->remote_gb[3],
                                     local->remote_gb_len - 3);
out:
        nfc_llcp_local_put(local);
        return err;
}

static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
{
        return (pdu->data[0] & 0xfc) >> 2;
}

static u8 nfc_llcp_ptype(const struct sk_buff *pdu)
{
        return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
}

static u8 nfc_llcp_ssap(const struct sk_buff *pdu)
{
        return pdu->data[1] & 0x3f;
}

static u8 nfc_llcp_ns(const struct sk_buff *pdu)
{
        return pdu->data[2] >> 4;
}

static u8 nfc_llcp_nr(const struct sk_buff *pdu)
{
        return pdu->data[2] & 0xf;
}

static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
{
        pdu->data[2] = (sock->send_n << 4) | (sock->recv_n);
        sock->send_n = (sock->send_n + 1) % 16;
        sock->recv_ack_n = (sock->recv_n - 1) % 16;
}

void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
                               struct sk_buff *skb, u8 direction)
{
        struct sk_buff *skb_copy = NULL, *nskb;
        struct sock *sk;
        u8 *data;

        read_lock(&local->raw_sockets.lock);

        sk_for_each(sk, &local->raw_sockets.head) {
                if (sk->sk_state != LLCP_BOUND)
                        continue;

                if (skb_copy == NULL) {
                        skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
                                                      GFP_ATOMIC, true);

                        if (skb_copy == NULL)
                                continue;

                        data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);

                        data[0] = local->dev ? local->dev->idx : 0xFF;
                        data[1] = direction & 0x01;
                        data[1] |= (RAW_PAYLOAD_LLCP << 1);
                }

                nskb = skb_clone(skb_copy, GFP_ATOMIC);
                if (!nskb)
                        continue;

                if (sock_queue_rcv_skb(sk, nskb))
                        kfree_skb(nskb);
        }

        read_unlock(&local->raw_sockets.lock);

        kfree_skb(skb_copy);
}

static void nfc_llcp_tx_work(struct work_struct *work)
{
        struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
                                                    tx_work);
        struct sk_buff *skb;
        struct sock *sk;
        struct nfc_llcp_sock *llcp_sock;

        skb = skb_dequeue(&local->tx_queue);
        if (skb != NULL) {
                sk = skb->sk;
                llcp_sock = nfc_llcp_sock(sk);

                if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) {
                        kfree_skb(skb);
                        nfc_llcp_send_symm(local->dev);
                } else if (llcp_sock && !llcp_sock->remote_ready) {
                        skb_queue_head(&local->tx_queue, skb);
                        nfc_llcp_send_symm(local->dev);
                } else {
                        struct sk_buff *copy_skb = NULL;
                        u8 ptype = nfc_llcp_ptype(skb);
                        int ret;

                        pr_debug("Sending pending skb\n");
                        print_hex_dump_debug("LLCP Tx: ", DUMP_PREFIX_OFFSET,
                                             16, 1, skb->data, skb->len, true);

                        if (ptype == LLCP_PDU_I)
                                copy_skb = skb_copy(skb, GFP_ATOMIC);

                        __net_timestamp(skb);

                        nfc_llcp_send_to_raw_sock(local, skb,
                                                  NFC_DIRECTION_TX);

                        ret = nfc_data_exchange(local->dev, local->target_idx,
                                                skb, nfc_llcp_recv, local);

                        if (ret) {
                                kfree_skb(copy_skb);
                                goto out;
                        }

                        if (ptype == LLCP_PDU_I && copy_skb)
                                skb_queue_tail(&llcp_sock->tx_pending_queue,
                                               copy_skb);
                }
        } else {
                nfc_llcp_send_symm(local->dev);
        }

out:
        mod_timer(&local->link_timer,
                  jiffies + msecs_to_jiffies(2 * local->remote_lto));
}

static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local *local,
                                                          u8 ssap)
{
        struct sock *sk;
        struct nfc_llcp_sock *llcp_sock;

        read_lock(&local->connecting_sockets.lock);

        sk_for_each(sk, &local->connecting_sockets.head) {
                llcp_sock = nfc_llcp_sock(sk);

                if (llcp_sock->ssap == ssap) {
                        sock_hold(&llcp_sock->sk);
                        goto out;
                }
        }

        llcp_sock = NULL;

out:
        read_unlock(&local->connecting_sockets.lock);

        return llcp_sock;
}

static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
                                                  const u8 *sn, size_t sn_len)
{
        return nfc_llcp_sock_from_sn(local, sn, sn_len, true);
}

static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
{
        u8 type, length;
        const u8 *tlv = &skb->data[2];
        size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;

        while (offset < tlv_array_len) {
                type = tlv[0];
                length = tlv[1];

                pr_debug("type 0x%x length %d\n", type, length);

                if (type == LLCP_TLV_SN) {
                        *sn_len = length;
                        return &tlv[2];
                }

                offset += length + 2;
                tlv += length + 2;
        }

        return NULL;
}

static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
                             struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        struct nfc_llcp_ui_cb *ui_cb;
        u8 dsap, ssap;

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        ui_cb = nfc_llcp_ui_skb_cb(skb);
        ui_cb->dsap = dsap;
        ui_cb->ssap = ssap;

        pr_debug("%d %d\n", dsap, ssap);

        /* We're looking for a bound socket, not a client one */
        llcp_sock = nfc_llcp_sock_get(local, dsap, LLCP_SAP_SDP);
        if (llcp_sock == NULL || llcp_sock->sk.sk_type != SOCK_DGRAM)
                return;

        /* There is no sequence with UI frames */
        skb_pull(skb, LLCP_HEADER_SIZE);
        if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
                /*
                 * UI frames will be freed from the socket layer, so we
                 * need to keep them alive until someone receives them.
                 */
                skb_get(skb);
        } else {
                pr_err("Receive queue is full\n");
        }

        nfc_llcp_sock_put(llcp_sock);
}

static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
                                  const struct sk_buff *skb)
{
        struct sock *new_sk, *parent;
        struct nfc_llcp_sock *sock, *new_sock;
        u8 dsap, ssap, reason;

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        pr_debug("%d %d\n", dsap, ssap);

        if (dsap != LLCP_SAP_SDP) {
                sock = nfc_llcp_sock_get(local, dsap, LLCP_SAP_SDP);
                if (sock == NULL || sock->sk.sk_state != LLCP_LISTEN) {
                        reason = LLCP_DM_NOBOUND;
                        goto fail;
                }
        } else {
                const u8 *sn;
                size_t sn_len;

                sn = nfc_llcp_connect_sn(skb, &sn_len);
                if (sn == NULL) {
                        reason = LLCP_DM_NOBOUND;
                        goto fail;
                }

                pr_debug("Service name length %zu\n", sn_len);

                sock = nfc_llcp_sock_get_sn(local, sn, sn_len);
                if (sock == NULL) {
                        reason = LLCP_DM_NOBOUND;
                        goto fail;
                }
        }

        lock_sock(&sock->sk);

        parent = &sock->sk;

        if (sk_acceptq_is_full(parent)) {
                reason = LLCP_DM_REJ;
                release_sock(&sock->sk);
                sock_put(&sock->sk);
                goto fail;
        }

        if (sock->ssap == LLCP_SDP_UNBOUND) {
                u8 ssap = nfc_llcp_reserve_sdp_ssap(local);

                pr_debug("First client, reserving %d\n", ssap);

                if (ssap == LLCP_SAP_MAX) {
                        reason = LLCP_DM_REJ;
                        release_sock(&sock->sk);
                        sock_put(&sock->sk);
                        goto fail;
                }

                sock->ssap = ssap;
        }

        new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0);
        if (new_sk == NULL) {
                reason = LLCP_DM_REJ;
                release_sock(&sock->sk);
                sock_put(&sock->sk);
                goto fail;
        }

        new_sock = nfc_llcp_sock(new_sk);

        new_sock->local = nfc_llcp_local_get(local);
        if (!new_sock->local) {
                reason = LLCP_DM_REJ;
                sock_put(&new_sock->sk);
                release_sock(&sock->sk);
                sock_put(&sock->sk);
                goto fail;
        }

        new_sock->dev = local->dev;
        new_sock->rw = sock->rw;
        new_sock->miux = sock->miux;
        new_sock->nfc_protocol = sock->nfc_protocol;
        new_sock->dsap = ssap;
        new_sock->target_idx = local->target_idx;
        new_sock->parent = parent;
        new_sock->ssap = sock->ssap;
        if (sock->ssap < LLCP_LOCAL_NUM_SAP && sock->ssap >= LLCP_WKS_NUM_SAP) {
                atomic_t *client_count;

                pr_debug("reserved_ssap %d for %p\n", sock->ssap, new_sock);

                client_count =
                        &local->local_sdp_cnt[sock->ssap - LLCP_WKS_NUM_SAP];

                atomic_inc(client_count);
                new_sock->reserved_ssap = sock->ssap;
        }

        nfc_llcp_parse_connection_tlv(new_sock, &skb->data[LLCP_HEADER_SIZE],
                                      skb->len - LLCP_HEADER_SIZE);

        pr_debug("new sock %p sk %p\n", new_sock, &new_sock->sk);

        nfc_llcp_sock_link(&local->sockets, new_sk);

        nfc_llcp_accept_enqueue(&sock->sk, new_sk);

        nfc_get_device(local->dev->idx);

        new_sk->sk_state = LLCP_CONNECTED;

        /* Wake the listening processes */
        parent->sk_data_ready(parent);

        /* Send CC */
        nfc_llcp_send_cc(new_sock);

        release_sock(&sock->sk);
        sock_put(&sock->sk);

        return;

fail:
        /* Send DM */
        nfc_llcp_send_dm(local, dsap, ssap, reason);
}

int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)
{
        int nr_frames = 0;
        struct nfc_llcp_local *local = sock->local;

        pr_debug("Remote ready %d tx queue len %d remote rw %d",
                 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue),
                 sock->remote_rw);

        /* Try to queue some I frames for transmission */
        while (sock->remote_ready &&
               skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {
                struct sk_buff *pdu;

                pdu = skb_dequeue(&sock->tx_queue);
                if (pdu == NULL)
                        break;

                /* Update N(S)/N(R) */
                nfc_llcp_set_nrns(sock, pdu);

                skb_queue_tail(&local->tx_queue, pdu);
                nr_frames++;
        }

        return nr_frames;
}

static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
                               struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
        u8 dsap, ssap, ptype, ns, nr;

        ptype = nfc_llcp_ptype(skb);
        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);
        ns = nfc_llcp_ns(skb);
        nr = nfc_llcp_nr(skb);

        pr_debug("%d %d R %d S %d\n", dsap, ssap, nr, ns);

        llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
        if (llcp_sock == NULL) {
                nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
                return;
        }

        sk = &llcp_sock->sk;
        lock_sock(sk);
        if (sk->sk_state == LLCP_CLOSED) {
                release_sock(sk);
                nfc_llcp_sock_put(llcp_sock);
        }

        /* Pass the payload upstream */
        if (ptype == LLCP_PDU_I) {
                pr_debug("I frame, queueing on %p\n", &llcp_sock->sk);

                if (ns == llcp_sock->recv_n)
                        llcp_sock->recv_n = (llcp_sock->recv_n + 1) % 16;
                else
                        pr_err("Received out of sequence I PDU\n");

                skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE);
                if (!sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
                        /*
                         * I frames will be freed from the socket layer, so we
                         * need to keep them alive until someone receives them.
                         */
                        skb_get(skb);
                } else {
                        pr_err("Receive queue is full\n");
                }
        }

        /* Remove skbs from the pending queue */
        if (llcp_sock->send_ack_n != nr) {
                struct sk_buff *s, *tmp;
                u8 n;

                llcp_sock->send_ack_n = nr;

                /* Remove and free all skbs until ns == nr */
                skb_queue_walk_safe(&llcp_sock->tx_pending_queue, s, tmp) {
                        n = nfc_llcp_ns(s);

                        skb_unlink(s, &llcp_sock->tx_pending_queue);
                        kfree_skb(s);

                        if (n == nr)
                                break;
                }

                /* Re-queue the remaining skbs for transmission */
                skb_queue_reverse_walk_safe(&llcp_sock->tx_pending_queue,
                                            s, tmp) {
                        skb_unlink(s, &llcp_sock->tx_pending_queue);
                        skb_queue_head(&local->tx_queue, s);
                }
        }

        if (ptype == LLCP_PDU_RR)
                llcp_sock->remote_ready = true;
        else if (ptype == LLCP_PDU_RNR)
                llcp_sock->remote_ready = false;

        if (nfc_llcp_queue_i_frames(llcp_sock) == 0 && ptype == LLCP_PDU_I)
                nfc_llcp_send_rr(llcp_sock);

        release_sock(sk);
        nfc_llcp_sock_put(llcp_sock);
}

static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
                               const struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
        u8 dsap, ssap;

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        if ((dsap == 0) && (ssap == 0)) {
                pr_debug("Connection termination");
                nfc_dep_link_down(local->dev);
                return;
        }

        llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
        if (llcp_sock == NULL) {
                nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
                return;
        }

        sk = &llcp_sock->sk;
        lock_sock(sk);

        nfc_llcp_socket_purge(llcp_sock);

        if (sk->sk_state == LLCP_CLOSED) {
                release_sock(sk);
                nfc_llcp_sock_put(llcp_sock);
        }

        if (sk->sk_state == LLCP_CONNECTED) {
                nfc_put_device(local->dev);
                sk->sk_state = LLCP_CLOSED;
                sk->sk_state_change(sk);
        }

        nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_DISC);

        release_sock(sk);
        nfc_llcp_sock_put(llcp_sock);
}

static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
                             const struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
        u8 dsap, ssap;

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        llcp_sock = nfc_llcp_connecting_sock_get(local, dsap);
        if (llcp_sock == NULL) {
                pr_err("Invalid CC\n");
                nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);

                return;
        }

        sk = &llcp_sock->sk;

        /* Unlink from connecting and link to the client array */
        nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
        nfc_llcp_sock_link(&local->sockets, sk);
        llcp_sock->dsap = ssap;

        nfc_llcp_parse_connection_tlv(llcp_sock, &skb->data[LLCP_HEADER_SIZE],
                                      skb->len - LLCP_HEADER_SIZE);

        sk->sk_state = LLCP_CONNECTED;
        sk->sk_state_change(sk);

        nfc_llcp_sock_put(llcp_sock);
}

static void nfc_llcp_recv_dm(struct nfc_llcp_local *local,
                             const struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
        u8 dsap, ssap, reason;

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);
        reason = skb->data[2];

        pr_debug("%d %d reason %d\n", ssap, dsap, reason);

        switch (reason) {
        case LLCP_DM_NOBOUND:
        case LLCP_DM_REJ:
                llcp_sock = nfc_llcp_connecting_sock_get(local, dsap);
                break;

        default:
                llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
                break;
        }

        if (llcp_sock == NULL) {
                pr_debug("Already closed\n");
                return;
        }

        sk = &llcp_sock->sk;

        sk->sk_err = ENXIO;
        sk->sk_state = LLCP_CLOSED;
        sk->sk_state_change(sk);

        nfc_llcp_sock_put(llcp_sock);
}

static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
                              const struct sk_buff *skb)
{
        struct nfc_llcp_sock *llcp_sock;
        u8 dsap, ssap, type, length, tid, sap;
        const u8 *tlv;
        u16 tlv_len, offset;
        const char *service_name;
        size_t service_name_len;
        struct nfc_llcp_sdp_tlv *sdp;
        HLIST_HEAD(llc_sdres_list);
        size_t sdres_tlvs_len;
        HLIST_HEAD(nl_sdres_list);

        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        pr_debug("%d %d\n", dsap, ssap);

        if (dsap != LLCP_SAP_SDP || ssap != LLCP_SAP_SDP) {
                pr_err("Wrong SNL SAP\n");
                return;
        }

        tlv = &skb->data[LLCP_HEADER_SIZE];
        tlv_len = skb->len - LLCP_HEADER_SIZE;
        offset = 0;
        sdres_tlvs_len = 0;

        while (offset < tlv_len) {
                type = tlv[0];
                length = tlv[1];

                switch (type) {
                case LLCP_TLV_SDREQ:
                        tid = tlv[2];
                        service_name = (char *) &tlv[3];
                        service_name_len = length - 1;

                        pr_debug("Looking for %.16s\n", service_name);

                        if (service_name_len == strlen("urn:nfc:sn:sdp") &&
                            !strncmp(service_name, "urn:nfc:sn:sdp",
                                     service_name_len)) {
                                sap = 1;
                                goto add_snl;
                        }

                        llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
                                                          service_name_len,
                                                          true);
                        if (!llcp_sock) {
                                sap = 0;
                                goto add_snl;
                        }

                        /*
                         * We found a socket but its ssap has not been reserved
                         * yet. We need to assign it for good and send a reply.
                         * The ssap will be freed when the socket is closed.
                         */
                        if (llcp_sock->ssap == LLCP_SDP_UNBOUND) {
                                atomic_t *client_count;

                                sap = nfc_llcp_reserve_sdp_ssap(local);

                                pr_debug("Reserving %d\n", sap);

                                if (sap == LLCP_SAP_MAX) {
                                        sap = 0;
                                        nfc_llcp_sock_put(llcp_sock);
                                        goto add_snl;
                                }

                                client_count =
                                        &local->local_sdp_cnt[sap -
                                                              LLCP_WKS_NUM_SAP];

                                atomic_inc(client_count);

                                llcp_sock->ssap = sap;
                                llcp_sock->reserved_ssap = sap;
                        } else {
                                sap = llcp_sock->ssap;
                        }

                        pr_debug("%p %d\n", llcp_sock, sap);

                        nfc_llcp_sock_put(llcp_sock);
add_snl:
                        sdp = nfc_llcp_build_sdres_tlv(tid, sap);
                        if (sdp == NULL)
                                goto exit;

                        sdres_tlvs_len += sdp->tlv_len;
                        hlist_add_head(&sdp->node, &llc_sdres_list);
                        break;

                case LLCP_TLV_SDRES:
                        mutex_lock(&local->sdreq_lock);

                        pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);

                        hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
                                if (sdp->tid != tlv[2])
                                        continue;

                                sdp->sap = tlv[3];

                                pr_debug("Found: uri=%s, sap=%d\n",
                                         sdp->uri, sdp->sap);

                                hlist_del(&sdp->node);

                                hlist_add_head(&sdp->node, &nl_sdres_list);

                                break;
                        }

                        mutex_unlock(&local->sdreq_lock);
                        break;

                default:
                        pr_err("Invalid SNL tlv value 0x%x\n", type);
                        break;
                }

                offset += length + 2;
                tlv += length + 2;
        }

exit:
        if (!hlist_empty(&nl_sdres_list))
                nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);

        if (!hlist_empty(&llc_sdres_list))
                nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
}

static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)
{
        u8 ptype;
        u16 pdu_len;
        struct sk_buff *new_skb;

        if (skb->len <= LLCP_HEADER_SIZE) {
                pr_err("Malformed AGF PDU\n");
                return;
        }

        skb_pull(skb, LLCP_HEADER_SIZE);

        while (skb->len > LLCP_AGF_PDU_HEADER_SIZE) {
                pdu_len = skb->data[0] << 8 | skb->data[1];

                skb_pull(skb, LLCP_AGF_PDU_HEADER_SIZE);

                if (pdu_len < LLCP_HEADER_SIZE || pdu_len > skb->len) {
                        pr_err("Malformed AGF PDU\n");
                        return;
                }

                ptype = nfc_llcp_ptype(skb);

                if (ptype == LLCP_PDU_SYMM || ptype == LLCP_PDU_AGF)
                        goto next;

                new_skb = nfc_alloc_recv_skb(pdu_len, GFP_KERNEL);
                if (new_skb == NULL) {
                        pr_err("Could not allocate PDU\n");
                        return;
                }

                skb_put_data(new_skb, skb->data, pdu_len);

                nfc_llcp_rx_skb(local, new_skb);

                kfree_skb(new_skb);
next:
                skb_pull(skb, pdu_len);
        }
}

static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb)
{
        u8 dsap, ssap, ptype;

        ptype = nfc_llcp_ptype(skb);
        dsap = nfc_llcp_dsap(skb);
        ssap = nfc_llcp_ssap(skb);

        pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);

        if (ptype != LLCP_PDU_SYMM)
                print_hex_dump_debug("LLCP Rx: ", DUMP_PREFIX_OFFSET, 16, 1,
                                     skb->data, skb->len, true);

        switch (ptype) {
        case LLCP_PDU_SYMM:
                pr_debug("SYMM\n");
                break;

        case LLCP_PDU_UI:
                pr_debug("UI\n");
                nfc_llcp_recv_ui(local, skb);
                break;

        case LLCP_PDU_CONNECT:
                pr_debug("CONNECT\n");
                nfc_llcp_recv_connect(local, skb);
                break;

        case LLCP_PDU_DISC:
                pr_debug("DISC\n");
                nfc_llcp_recv_disc(local, skb);
                break;

        case LLCP_PDU_CC:
                pr_debug("CC\n");
                nfc_llcp_recv_cc(local, skb);
                break;

        case LLCP_PDU_DM:
                pr_debug("DM\n");
                nfc_llcp_recv_dm(local, skb);
                break;

        case LLCP_PDU_SNL:
                pr_debug("SNL\n");
                nfc_llcp_recv_snl(local, skb);
                break;

        case LLCP_PDU_I:
        case LLCP_PDU_RR:
        case LLCP_PDU_RNR:
                pr_debug("I frame\n");
                nfc_llcp_recv_hdlc(local, skb);
                break;

        case LLCP_PDU_AGF:
                pr_debug("AGF frame\n");
                nfc_llcp_recv_agf(local, skb);
                break;
        }
}

static void nfc_llcp_rx_work(struct work_struct *work)
{
        struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
                                                    rx_work);
        struct sk_buff *skb;

        skb = local->rx_pending;
        if (skb == NULL) {
                pr_debug("No pending SKB\n");
                return;
        }

        __net_timestamp(skb);

        nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX);

        nfc_llcp_rx_skb(local, skb);

        schedule_work(&local->tx_work);
        kfree_skb(local->rx_pending);
        local->rx_pending = NULL;
}

static void __nfc_llcp_recv(struct nfc_llcp_local *local, struct sk_buff *skb)
{
        local->rx_pending = skb;
        del_timer(&local->link_timer);
        schedule_work(&local->rx_work);
}

void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
{
        struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;

        if (err < 0) {
                pr_err("LLCP PDU receive err %d\n", err);
                return;
        }

        __nfc_llcp_recv(local, skb);
}

int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
{
        struct nfc_llcp_local *local;

        local = nfc_llcp_find_local(dev);
        if (local == NULL) {
                kfree_skb(skb);
                return -ENODEV;
        }

        __nfc_llcp_recv(local, skb);

        nfc_llcp_local_put(local);

        return 0;
}

void nfc_llcp_mac_is_down(struct nfc_dev *dev)
{
        struct nfc_llcp_local *local;

        local = nfc_llcp_find_local(dev);
        if (local == NULL)
                return;

        local->remote_miu = LLCP_DEFAULT_MIU;
        local->remote_lto = LLCP_DEFAULT_LTO;

        /* Close and purge all existing sockets */
        nfc_llcp_socket_release(local, true, 0);

        nfc_llcp_local_put(local);
}

void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
                        u8 comm_mode, u8 rf_mode)
{
        struct nfc_llcp_local *local;

        pr_debug("rf mode %d\n", rf_mode);

        local = nfc_llcp_find_local(dev);
        if (local == NULL)
                return;

        local->target_idx = target_idx;
        local->comm_mode = comm_mode;
        local->rf_mode = rf_mode;

        if (rf_mode == NFC_RF_INITIATOR) {
                pr_debug("Queueing Tx work\n");

                schedule_work(&local->tx_work);
        } else {
                mod_timer(&local->link_timer,
                          jiffies + msecs_to_jiffies(local->remote_lto));
        }

        nfc_llcp_local_put(local);
}

int nfc_llcp_register_device(struct nfc_dev *ndev)
{
        struct nfc_llcp_local *local;

        local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL);
        if (local == NULL)
                return -ENOMEM;

        /* As we are going to initialize local's refcount, we need to get the
         * nfc_dev to avoid UAF, otherwise there is no point in continuing.
         * See nfc_llcp_local_get().
         */
        local->dev = nfc_get_device(ndev->idx);
        if (!local->dev) {
                kfree(local);
                return -ENODEV;
        }

        INIT_LIST_HEAD(&local->list);
        kref_init(&local->ref);
        mutex_init(&local->sdp_lock);
        timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);

        skb_queue_head_init(&local->tx_queue);
        INIT_WORK(&local->tx_work, nfc_llcp_tx_work);

        local->rx_pending = NULL;
        INIT_WORK(&local->rx_work, nfc_llcp_rx_work);

        INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work);

        rwlock_init(&local->sockets.lock);
        rwlock_init(&local->connecting_sockets.lock);
        rwlock_init(&local->raw_sockets.lock);

        local->lto = 150; /* 1500 ms */
        local->rw = LLCP_MAX_RW;
        local->miux = cpu_to_be16(LLCP_MAX_MIUX);
        local->local_wks = 0x1; /* LLC Link Management */

        nfc_llcp_build_gb(local);

        local->remote_miu = LLCP_DEFAULT_MIU;
        local->remote_lto = LLCP_DEFAULT_LTO;

        mutex_init(&local->sdreq_lock);
        INIT_HLIST_HEAD(&local->pending_sdreqs);
        timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
        INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);

        spin_lock(&llcp_devices_lock);
        list_add(&local->list, &llcp_devices);
        spin_unlock(&llcp_devices_lock);

        return 0;
}

void nfc_llcp_unregister_device(struct nfc_dev *dev)
{
        struct nfc_llcp_local *local = nfc_llcp_remove_local(dev);

        if (local == NULL) {
                pr_debug("No such device\n");
                return;
        }

        local_cleanup(local);

        nfc_llcp_local_put(local);
}

int __init nfc_llcp_init(void)
{
        return nfc_llcp_sock_init();
}

void nfc_llcp_exit(void)
{
        nfc_llcp_sock_exit();
}








































































































































































































































    1 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
// SPDX-License-Identifier: GPL-2.0
/*
* cypress_cy7c63.c
*
* Copyright (c) 2006-2007 Oliver Bock (bock@tfh-berlin.de)
*
*        This driver is based on the Cypress USB Driver by Marcus Maul
*        (cyport) and the 2.0 version of Greg Kroah-Hartman's
*        USB Skeleton driver.
*
*        This is a generic driver for the Cypress CY7C63xxx family.
*        For the time being it enables you to read from and write to
*        the single I/O ports of the device.
*
*        Supported vendors:        AK Modul-Bus Computer GmbH
*                                (Firmware "Port-Chip")
*
*        Supported devices:        CY7C63001A-PC
*                                CY7C63001C-PXC
*                                CY7C63001C-SXC
*
*        Supported functions:        Read/Write Ports
*
*
*        For up-to-date information please visit:
*        http://www.obock.de/kernel/cypress
*/

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/usb.h>

#define DRIVER_AUTHOR                "Oliver Bock (bock@tfh-berlin.de)"
#define DRIVER_DESC                "Cypress CY7C63xxx USB driver"

#define CYPRESS_VENDOR_ID        0xa2c
#define CYPRESS_PRODUCT_ID        0x8

#define CYPRESS_READ_PORT        0x4
#define CYPRESS_WRITE_PORT        0x5

#define CYPRESS_READ_RAM        0x2
#define CYPRESS_WRITE_RAM        0x3
#define CYPRESS_READ_ROM        0x1

#define CYPRESS_READ_PORT_ID0        0
#define CYPRESS_WRITE_PORT_ID0        0
#define CYPRESS_READ_PORT_ID1        0x2
#define CYPRESS_WRITE_PORT_ID1        1

#define CYPRESS_MAX_REQSIZE        8


/* table of devices that work with this driver */
static const struct usb_device_id cypress_table[] = {
        { USB_DEVICE(CYPRESS_VENDOR_ID, CYPRESS_PRODUCT_ID) },
        { }
};
MODULE_DEVICE_TABLE(usb, cypress_table);

/* structure to hold all of our device specific stuff */
struct cypress {
        struct usb_device *        udev;
        unsigned char                port[2];
};

/* used to send usb control messages to device */
static int vendor_command(struct cypress *dev, unsigned char request,
                          unsigned char address, unsigned char data)
{
        int retval = 0;
        unsigned int pipe;
        unsigned char *iobuf;

        /* allocate some memory for the i/o buffer*/
        iobuf = kzalloc(CYPRESS_MAX_REQSIZE, GFP_KERNEL);
        if (!iobuf) {
                retval = -ENOMEM;
                goto error;
        }

        dev_dbg(&dev->udev->dev, "Sending usb_control_msg (data: %d)\n", data);

        /* prepare usb control message and send it upstream */
        pipe = usb_rcvctrlpipe(dev->udev, 0);
        retval = usb_control_msg(dev->udev, pipe, request,
                                 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER,
                                 address, data, iobuf, CYPRESS_MAX_REQSIZE,
                                 USB_CTRL_GET_TIMEOUT);

        /* store returned data (more READs to be added) */
        switch (request) {
                case CYPRESS_READ_PORT:
                        if (address == CYPRESS_READ_PORT_ID0) {
                                dev->port[0] = iobuf[1];
                                dev_dbg(&dev->udev->dev,
                                        "READ_PORT0 returned: %d\n",
                                        dev->port[0]);
                        }
                        else if (address == CYPRESS_READ_PORT_ID1) {
                                dev->port[1] = iobuf[1];
                                dev_dbg(&dev->udev->dev,
                                        "READ_PORT1 returned: %d\n",
                                        dev->port[1]);
                        }
                        break;
        }

        kfree(iobuf);
error:
        return retval;
}

/* write port value */
static ssize_t write_port(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count,
                          int port_num, int write_id)
{
        int value = -1;
        int result = 0;

        struct usb_interface *intf = to_usb_interface(dev);
        struct cypress *cyp = usb_get_intfdata(intf);

        dev_dbg(&cyp->udev->dev, "WRITE_PORT%d called\n", port_num);

        /* validate input data */
        if (sscanf(buf, "%d", &value) < 1) {
                result = -EINVAL;
                goto error;
        }
        if (value < 0 || value > 255) {
                result = -EINVAL;
                goto error;
        }

        result = vendor_command(cyp, CYPRESS_WRITE_PORT, write_id,
                                (unsigned char)value);

        dev_dbg(&cyp->udev->dev, "Result of vendor_command: %d\n\n", result);
error:
        return result < 0 ? result : count;
}

/* attribute callback handler (write) */
static ssize_t port0_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
{
        return write_port(dev, attr, buf, count, 0, CYPRESS_WRITE_PORT_ID0);
}

/* attribute callback handler (write) */
static ssize_t port1_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
{
        return write_port(dev, attr, buf, count, 1, CYPRESS_WRITE_PORT_ID1);
}

/* read port value */
static ssize_t read_port(struct device *dev, struct device_attribute *attr,
                         char *buf, int port_num, int read_id)
{
        int result = 0;

        struct usb_interface *intf = to_usb_interface(dev);
        struct cypress *cyp = usb_get_intfdata(intf);

        dev_dbg(&cyp->udev->dev, "READ_PORT%d called\n", port_num);

        result = vendor_command(cyp, CYPRESS_READ_PORT, read_id, 0);

        dev_dbg(&cyp->udev->dev, "Result of vendor_command: %d\n\n", result);

        return sprintf(buf, "%d", cyp->port[port_num]);
}

/* attribute callback handler (read) */
static ssize_t port0_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        return read_port(dev, attr, buf, 0, CYPRESS_READ_PORT_ID0);
}
static DEVICE_ATTR_RW(port0);

/* attribute callback handler (read) */
static ssize_t port1_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        return read_port(dev, attr, buf, 1, CYPRESS_READ_PORT_ID1);
}
static DEVICE_ATTR_RW(port1);

static struct attribute *cypress_attrs[] = {
        &dev_attr_port0.attr,
        &dev_attr_port1.attr,
        NULL,
};
ATTRIBUTE_GROUPS(cypress);

static int cypress_probe(struct usb_interface *interface,
                         const struct usb_device_id *id)
{
        struct cypress *dev;
        int retval = -ENOMEM;

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                goto error_mem;

        dev->udev = usb_get_dev(interface_to_usbdev(interface));

        /* save our data pointer in this interface device */
        usb_set_intfdata(interface, dev);

        /* let the user know that the device is now attached */
        dev_info(&interface->dev,
                 "Cypress CY7C63xxx device now attached\n");
        return 0;

error_mem:
        return retval;
}

static void cypress_disconnect(struct usb_interface *interface)
{
        struct cypress *dev;

        dev = usb_get_intfdata(interface);

        /* the intfdata can be set to NULL only after the
         * device files have been removed */
        usb_set_intfdata(interface, NULL);

        usb_put_dev(dev->udev);

        dev_info(&interface->dev,
                 "Cypress CY7C63xxx device now disconnected\n");

        kfree(dev);
}

static struct usb_driver cypress_driver = {
        .name = "cypress_cy7c63",
        .probe = cypress_probe,
        .disconnect = cypress_disconnect,
        .id_table = cypress_table,
        .dev_groups = cypress_groups,
};

module_usb_driver(cypress_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);

MODULE_LICENSE("GPL");





























































































































































































































































































































































































































































































    1 








    1 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Mirics MSi001 silicon tuner driver
 *
 * Copyright (C) 2013 Antti Palosaari <crope@iki.fi>
 * Copyright (C) 2014 Antti Palosaari <crope@iki.fi>
 */

#include <linux/module.h>
#include <linux/gcd.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ctrls.h>

static const struct v4l2_frequency_band bands[] = {
        {
                .type = V4L2_TUNER_RF,
                .index = 0,
                .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   =   49000000,
                .rangehigh  =  263000000,
        }, {
                .type = V4L2_TUNER_RF,
                .index = 1,
                .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   =  390000000,
                .rangehigh  =  960000000,
        },
};

struct msi001_dev {
        struct spi_device *spi;
        struct v4l2_subdev sd;

        /* Controls */
        struct v4l2_ctrl_handler hdl;
        struct v4l2_ctrl *bandwidth_auto;
        struct v4l2_ctrl *bandwidth;
        struct v4l2_ctrl *lna_gain;
        struct v4l2_ctrl *mixer_gain;
        struct v4l2_ctrl *if_gain;

        unsigned int f_tuner;
};

static inline struct msi001_dev *sd_to_msi001_dev(struct v4l2_subdev *sd)
{
        return container_of(sd, struct msi001_dev, sd);
}

static int msi001_wreg(struct msi001_dev *dev, u32 data)
{
        /* Register format: 4 bits addr + 20 bits value */
        return spi_write(dev->spi, &data, 3);
};

static int msi001_set_gain(struct msi001_dev *dev, int lna_gain, int mixer_gain,
                           int if_gain)
{
        struct spi_device *spi = dev->spi;
        int ret;
        u32 reg;

        dev_dbg(&spi->dev, "lna=%d mixer=%d if=%d\n",
                lna_gain, mixer_gain, if_gain);

        reg = 1 << 0;
        reg |= (59 - if_gain) << 4;
        reg |= 0 << 10;
        reg |= (1 - mixer_gain) << 12;
        reg |= (1 - lna_gain) << 13;
        reg |= 4 << 14;
        reg |= 0 << 17;
        ret = msi001_wreg(dev, reg);
        if (ret)
                goto err;

        return 0;
err:
        dev_dbg(&spi->dev, "failed %d\n", ret);
        return ret;
};

static int msi001_set_tuner(struct msi001_dev *dev)
{
        struct spi_device *spi = dev->spi;
        int ret, i;
        unsigned int uitmp, div_n, k, k_thresh, k_frac, div_lo, f_if1;
        u32 reg;
        u64 f_vco;
        u8 mode, filter_mode;

        static const struct {
                u32 rf;
                u8 mode;
                u8 div_lo;
        } band_lut[] = {
                { 50000000, 0xe1, 16}, /* AM_MODE2, antenna 2 */
                {108000000, 0x42, 32}, /* VHF_MODE */
                {330000000, 0x44, 16}, /* B3_MODE */
                {960000000, 0x48,  4}, /* B45_MODE */
                {      ~0U, 0x50,  2}, /* BL_MODE */
        };
        static const struct {
                u32 freq;
                u8 filter_mode;
        } if_freq_lut[] = {
                {      0, 0x03}, /* Zero IF */
                { 450000, 0x02}, /* 450 kHz IF */
                {1620000, 0x01}, /* 1.62 MHz IF */
                {2048000, 0x00}, /* 2.048 MHz IF */
        };
        static const struct {
                u32 freq;
                u8 val;
        } bandwidth_lut[] = {
                { 200000, 0x00}, /* 200 kHz */
                { 300000, 0x01}, /* 300 kHz */
                { 600000, 0x02}, /* 600 kHz */
                {1536000, 0x03}, /* 1.536 MHz */
                {5000000, 0x04}, /* 5 MHz */
                {6000000, 0x05}, /* 6 MHz */
                {7000000, 0x06}, /* 7 MHz */
                {8000000, 0x07}, /* 8 MHz */
        };

        unsigned int f_rf = dev->f_tuner;

        /*
         * bandwidth (Hz)
         * 200000, 300000, 600000, 1536000, 5000000, 6000000, 7000000, 8000000
         */
        unsigned int bandwidth;

        /*
         * intermediate frequency (Hz)
         * 0, 450000, 1620000, 2048000
         */
        unsigned int f_if = 0;
        #define F_REF 24000000
        #define DIV_PRE_N 4
        #define        F_VCO_STEP div_lo

        dev_dbg(&spi->dev, "f_rf=%d f_if=%d\n", f_rf, f_if);

        for (i = 0; i < ARRAY_SIZE(band_lut); i++) {
                if (f_rf <= band_lut[i].rf) {
                        mode = band_lut[i].mode;
                        div_lo = band_lut[i].div_lo;
                        break;
                }
        }
        if (i == ARRAY_SIZE(band_lut)) {
                ret = -EINVAL;
                goto err;
        }

        /* AM_MODE is upconverted */
        if ((mode >> 0) & 0x1)
                f_if1 =  5 * F_REF;
        else
                f_if1 =  0;

        for (i = 0; i < ARRAY_SIZE(if_freq_lut); i++) {
                if (f_if == if_freq_lut[i].freq) {
                        filter_mode = if_freq_lut[i].filter_mode;
                        break;
                }
        }
        if (i == ARRAY_SIZE(if_freq_lut)) {
                ret = -EINVAL;
                goto err;
        }

        /* filters */
        bandwidth = dev->bandwidth->val;
        bandwidth = clamp(bandwidth, 200000U, 8000000U);

        for (i = 0; i < ARRAY_SIZE(bandwidth_lut); i++) {
                if (bandwidth <= bandwidth_lut[i].freq) {
                        bandwidth = bandwidth_lut[i].val;
                        break;
                }
        }
        if (i == ARRAY_SIZE(bandwidth_lut)) {
                ret = -EINVAL;
                goto err;
        }

        dev->bandwidth->val = bandwidth_lut[i].freq;

        dev_dbg(&spi->dev, "bandwidth selected=%d\n", bandwidth_lut[i].freq);

        /*
         * Fractional-N synthesizer
         *
         *           +---------------------------------------+
         *           v                                       |
         *  Fref   +----+     +-------+         +----+     +------+     +---+
         * ------> | PD | --> |  VCO  | ------> | /4 | --> | /N.F | <-- | K |
         *         +----+     +-------+         +----+     +------+     +---+
         *                      |
         *                      |
         *                      v
         *                    +-------+  Fout
         *                    | /Rout | ------>
         *                    +-------+
         */

        /* Calculate PLL integer and fractional control word. */
        f_vco = (u64) (f_rf + f_if + f_if1) * div_lo;
        div_n = div_u64_rem(f_vco, DIV_PRE_N * F_REF, &k);
        k_thresh = (DIV_PRE_N * F_REF) / F_VCO_STEP;
        k_frac = div_u64((u64) k * k_thresh, (DIV_PRE_N * F_REF));

        /* Find out greatest common divisor and divide to smaller. */
        uitmp = gcd(k_thresh, k_frac);
        k_thresh /= uitmp;
        k_frac /= uitmp;

        /* Force divide to reg max. Resolution will be reduced. */
        uitmp = DIV_ROUND_UP(k_thresh, 4095);
        k_thresh = DIV_ROUND_CLOSEST(k_thresh, uitmp);
        k_frac = DIV_ROUND_CLOSEST(k_frac, uitmp);

        /* Calculate real RF set. */
        uitmp = (unsigned int) F_REF * DIV_PRE_N * div_n;
        uitmp += (unsigned int) F_REF * DIV_PRE_N * k_frac / k_thresh;
        uitmp /= div_lo;

        dev_dbg(&spi->dev,
                "f_rf=%u:%u f_vco=%llu div_n=%u k_thresh=%u k_frac=%u div_lo=%u\n",
                f_rf, uitmp, f_vco, div_n, k_thresh, k_frac, div_lo);

        ret = msi001_wreg(dev, 0x00000e);
        if (ret)
                goto err;

        ret = msi001_wreg(dev, 0x000003);
        if (ret)
                goto err;

        reg = 0 << 0;
        reg |= mode << 4;
        reg |= filter_mode << 12;
        reg |= bandwidth << 14;
        reg |= 0x02 << 17;
        reg |= 0x00 << 20;
        ret = msi001_wreg(dev, reg);
        if (ret)
                goto err;

        reg = 5 << 0;
        reg |= k_thresh << 4;
        reg |= 1 << 19;
        reg |= 1 << 21;
        ret = msi001_wreg(dev, reg);
        if (ret)
                goto err;

        reg = 2 << 0;
        reg |= k_frac << 4;
        reg |= div_n << 16;
        ret = msi001_wreg(dev, reg);
        if (ret)
                goto err;

        ret = msi001_set_gain(dev, dev->lna_gain->cur.val,
                              dev->mixer_gain->cur.val, dev->if_gain->cur.val);
        if (ret)
                goto err;

        reg = 6 << 0;
        reg |= 63 << 4;
        reg |= 4095 << 10;
        ret = msi001_wreg(dev, reg);
        if (ret)
                goto err;

        return 0;
err:
        dev_dbg(&spi->dev, "failed %d\n", ret);
        return ret;
}

static int msi001_standby(struct v4l2_subdev *sd)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);

        return msi001_wreg(dev, 0x000000);
}

static int msi001_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *v)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);
        struct spi_device *spi = dev->spi;

        dev_dbg(&spi->dev, "index=%d\n", v->index);

        strscpy(v->name, "Mirics MSi001", sizeof(v->name));
        v->type = V4L2_TUNER_RF;
        v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
        v->rangelow =    49000000;
        v->rangehigh =  960000000;

        return 0;
}

static int msi001_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *v)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);
        struct spi_device *spi = dev->spi;

        dev_dbg(&spi->dev, "index=%d\n", v->index);
        return 0;
}

static int msi001_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);
        struct spi_device *spi = dev->spi;

        dev_dbg(&spi->dev, "tuner=%d\n", f->tuner);
        f->frequency = dev->f_tuner;
        return 0;
}

static int msi001_s_frequency(struct v4l2_subdev *sd,
                              const struct v4l2_frequency *f)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);
        struct spi_device *spi = dev->spi;
        unsigned int band;

        dev_dbg(&spi->dev, "tuner=%d type=%d frequency=%u\n",
                f->tuner, f->type, f->frequency);

        if (f->frequency < ((bands[0].rangehigh + bands[1].rangelow) / 2))
                band = 0;
        else
                band = 1;
        dev->f_tuner = clamp_t(unsigned int, f->frequency,
                               bands[band].rangelow, bands[band].rangehigh);

        return msi001_set_tuner(dev);
}

static int msi001_enum_freq_bands(struct v4l2_subdev *sd,
                                  struct v4l2_frequency_band *band)
{
        struct msi001_dev *dev = sd_to_msi001_dev(sd);
        struct spi_device *spi = dev->spi;

        dev_dbg(&spi->dev, "tuner=%d type=%d index=%d\n",
                band->tuner, band->type, band->index);

        if (band->index >= ARRAY_SIZE(bands))
                return -EINVAL;

        band->capability = bands[band->index].capability;
        band->rangelow = bands[band->index].rangelow;
        band->rangehigh = bands[band->index].rangehigh;

        return 0;
}

static const struct v4l2_subdev_tuner_ops msi001_tuner_ops = {
        .standby                  = msi001_standby,
        .g_tuner                  = msi001_g_tuner,
        .s_tuner                  = msi001_s_tuner,
        .g_frequency              = msi001_g_frequency,
        .s_frequency              = msi001_s_frequency,
        .enum_freq_bands          = msi001_enum_freq_bands,
};

static const struct v4l2_subdev_ops msi001_ops = {
        .tuner                    = &msi001_tuner_ops,
};

static int msi001_s_ctrl(struct v4l2_ctrl *ctrl)
{
        struct msi001_dev *dev = container_of(ctrl->handler, struct msi001_dev, hdl);
        struct spi_device *spi = dev->spi;

        int ret;

        dev_dbg(&spi->dev, "id=%d name=%s val=%d min=%lld max=%lld step=%lld\n",
                ctrl->id, ctrl->name, ctrl->val, ctrl->minimum, ctrl->maximum,
                ctrl->step);

        switch (ctrl->id) {
        case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
        case V4L2_CID_RF_TUNER_BANDWIDTH:
                ret = msi001_set_tuner(dev);
                break;
        case  V4L2_CID_RF_TUNER_LNA_GAIN:
                ret = msi001_set_gain(dev, dev->lna_gain->val,
                                      dev->mixer_gain->cur.val,
                                      dev->if_gain->cur.val);
                break;
        case  V4L2_CID_RF_TUNER_MIXER_GAIN:
                ret = msi001_set_gain(dev, dev->lna_gain->cur.val,
                                      dev->mixer_gain->val,
                                      dev->if_gain->cur.val);
                break;
        case  V4L2_CID_RF_TUNER_IF_GAIN:
                ret = msi001_set_gain(dev, dev->lna_gain->cur.val,
                                      dev->mixer_gain->cur.val,
                                      dev->if_gain->val);
                break;
        default:
                dev_dbg(&spi->dev, "unknown control %d\n", ctrl->id);
                ret = -EINVAL;
        }

        return ret;
}

static const struct v4l2_ctrl_ops msi001_ctrl_ops = {
        .s_ctrl                   = msi001_s_ctrl,
};

static int msi001_probe(struct spi_device *spi)
{
        struct msi001_dev *dev;
        int ret;

        dev_dbg(&spi->dev, "\n");

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev) {
                ret = -ENOMEM;
                goto err;
        }

        dev->spi = spi;
        dev->f_tuner = bands[0].rangelow;
        v4l2_spi_subdev_init(&dev->sd, spi, &msi001_ops);

        /* Register controls */
        v4l2_ctrl_handler_init(&dev->hdl, 5);
        dev->bandwidth_auto = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
                        V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1);
        dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
                        V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000);
        if (dev->hdl.error) {
                ret = dev->hdl.error;
                dev_err(&spi->dev, "Could not initialize controls\n");
                /* control init failed, free handler */
                goto err_ctrl_handler_free;
        }

        v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false);
        dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
                        V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1);
        dev->mixer_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
                        V4L2_CID_RF_TUNER_MIXER_GAIN, 0, 1, 1, 1);
        dev->if_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops,
                        V4L2_CID_RF_TUNER_IF_GAIN, 0, 59, 1, 0);
        if (dev->hdl.error) {
                ret = dev->hdl.error;
                dev_err(&spi->dev, "Could not initialize controls\n");
                /* control init failed, free handler */
                goto err_ctrl_handler_free;
        }

        dev->sd.ctrl_handler = &dev->hdl;
        return 0;
err_ctrl_handler_free:
        v4l2_ctrl_handler_free(&dev->hdl);
        kfree(dev);
err:
        return ret;
}

static void msi001_remove(struct spi_device *spi)
{
        struct v4l2_subdev *sd = spi_get_drvdata(spi);
        struct msi001_dev *dev = sd_to_msi001_dev(sd);

        dev_dbg(&spi->dev, "\n");

        /*
         * Registered by v4l2_spi_new_subdev() from master driver, but we must
         * unregister it from here. Weird.
         */
        v4l2_device_unregister_subdev(&dev->sd);
        v4l2_ctrl_handler_free(&dev->hdl);
        kfree(dev);
}

static const struct spi_device_id msi001_id_table[] = {
        {"msi001", 0},
        {}
};
MODULE_DEVICE_TABLE(spi, msi001_id_table);

static struct spi_driver msi001_driver = {
        .driver = {
                .name        = "msi001",
                .suppress_bind_attrs = true,
        },
        .probe                = msi001_probe,
        .remove                = msi001_remove,
        .id_table        = msi001_id_table,
};
module_spi_driver(msi001_driver);

MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
MODULE_DESCRIPTION("Mirics MSi001");
MODULE_LICENSE("GPL");






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 












    4 




    4 
    4 





































































































    4 




















































































































    6 




    6 
    6 




    6 

















































    6 












































































































































    3 




    3 
    3 




    3 

    3 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
// SPDX-License-Identifier: GPL-2.0
/*
 * Implementation of the diskquota system for the LINUX operating system. QUOTA
 * is implemented using the BSD system call interface as the means of
 * communication with the user level. This file contains the generic routines
 * called by the different filesystems on allocation of an inode or block.
 * These routines take care of the administration needed to have a consistent
 * diskquota tracking system. The ideas of both user and group quotas are based
 * on the Melbourne quota system as used on BSD derived systems. The internal
 * implementation is based on one of the several variants of the LINUX
 * inode-subsystem with added complexity of the diskquota system.
 *
 * Author:        Marco van Wieringen <mvw@planets.elm.net>
 *
 * Fixes:   Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96
 *
 *                Revised list management to avoid races
 *                -- Bill Hawes, <whawes@star.net>, 9/98
 *
 *                Fixed races in dquot_transfer(), dqget() and dquot_alloc_...().
 *                As the consequence the locking was moved from dquot_decr_...(),
 *                dquot_incr_...() to calling functions.
 *                invalidate_dquots() now writes modified dquots.
 *                Serialized quota_off() and quota_on() for mount point.
 *                Fixed a few bugs in grow_dquots().
 *                Fixed deadlock in write_dquot() - we no longer account quotas on
 *                quota files
 *                remove_dquot_ref() moved to inode.c - it now traverses through inodes
 *                add_dquot_ref() restarts after blocking
 *                Added check for bogus uid and fixed check for group in quotactl.
 *                Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99
 *
 *                Used struct list_head instead of own list struct
 *                Invalidation of referenced dquots is no longer possible
 *                Improved free_dquots list management
 *                Quota and i_blocks are now updated in one place to avoid races
 *                Warnings are now delayed so we won't block in critical section
 *                Write updated not to require dquot lock
 *                Jan Kara, <jack@suse.cz>, 9/2000
 *
 *                Added dynamic quota structure allocation
 *                Jan Kara <jack@suse.cz> 12/2000
 *
 *                Rewritten quota interface. Implemented new quota format and
 *                formats registering.
 *                Jan Kara, <jack@suse.cz>, 2001,2002
 *
 *                New SMP locking.
 *                Jan Kara, <jack@suse.cz>, 10/2002
 *
 *                Added journalled quota support, fix lock inversion problems
 *                Jan Kara, <jack@suse.cz>, 2003,2004
 *
 * (C) Copyright 1994 - 1997 Marco van Wieringen
 */

#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/mm.h>
#include <linux/time.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/tty.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/kmod.h>
#include <linux/namei.h>
#include <linux/capability.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
#include <linux/sched/mm.h>
#include "../internal.h" /* ugh */

#include <linux/uaccess.h>

/*
 * There are five quota SMP locks:
 * * dq_list_lock protects all lists with quotas and quota formats.
 * * dquot->dq_dqb_lock protects data from dq_dqb
 * * inode->i_lock protects inode->i_blocks, i_bytes and also guards
 *   consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that
 *   dquot_transfer() can stabilize amount it transfers
 * * dq_data_lock protects mem_dqinfo structures and modifications of dquot
 *   pointers in the inode
 * * dq_state_lock protects modifications of quota state (on quotaon and
 *   quotaoff) and readers who care about latest values take it as well.
 *
 * The spinlock ordering is hence:
 *   dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock,
 *   dq_list_lock > dq_state_lock
 *
 * Note that some things (eg. sb pointer, type, id) doesn't change during
 * the life of the dquot structure and so needn't to be protected by a lock
 *
 * Operation accessing dquots via inode pointers are protected by dquot_srcu.
 * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
 * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
 * inode and before dropping dquot references to avoid use of dquots after
 * they are freed. dq_data_lock is used to serialize the pointer setting and
 * clearing operations.
 * Special care needs to be taken about S_NOQUOTA inode flag (marking that
 * inode is a quota file). Functions adding pointers from inode to dquots have
 * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
 * have to do all pointer modifications before dropping dq_data_lock. This makes
 * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
 * then drops all pointers to dquots from an inode.
 *
 * Each dquot has its dq_lock mutex.  Dquot is locked when it is being read to
 * memory (or space for it is being allocated) on the first dqget(), when it is
 * being written out, and when it is being released on the last dqput(). The
 * allocation and release operations are serialized by the dq_lock and by
 * checking the use count in dquot_release().
 *
 * Lock ordering (including related VFS locks) is the following:
 *   s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem
 */

static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
DEFINE_STATIC_SRCU(dquot_srcu);

static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq);

void __quota_error(struct super_block *sb, const char *func,
                   const char *fmt, ...)
{
        if (printk_ratelimit()) {
                va_list args;
                struct va_format vaf;

                va_start(args, fmt);

                vaf.fmt = fmt;
                vaf.va = &args;

                printk(KERN_ERR "Quota error (device %s): %s: %pV\n",
                       sb->s_id, func, &vaf);

                va_end(args);
        }
}
EXPORT_SYMBOL(__quota_error);

#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
static char *quotatypes[] = INITQFNAMES;
#endif
static struct quota_format_type *quota_formats;        /* List of registered formats */
static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;

/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;

int register_quota_format(struct quota_format_type *fmt)
{
        spin_lock(&dq_list_lock);
        fmt->qf_next = quota_formats;
        quota_formats = fmt;
        spin_unlock(&dq_list_lock);
        return 0;
}
EXPORT_SYMBOL(register_quota_format);

void unregister_quota_format(struct quota_format_type *fmt)
{
        struct quota_format_type **actqf;

        spin_lock(&dq_list_lock);
        for (actqf = &quota_formats; *actqf && *actqf != fmt;
             actqf = &(*actqf)->qf_next)
                ;
        if (*actqf)
                *actqf = (*actqf)->qf_next;
        spin_unlock(&dq_list_lock);
}
EXPORT_SYMBOL(unregister_quota_format);

static struct quota_format_type *find_quota_format(int id)
{
        struct quota_format_type *actqf;

        spin_lock(&dq_list_lock);
        for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id;
             actqf = actqf->qf_next)
                ;
        if (!actqf || !try_module_get(actqf->qf_owner)) {
                int qm;

                spin_unlock(&dq_list_lock);

                for (qm = 0; module_names[qm].qm_fmt_id &&
                             module_names[qm].qm_fmt_id != id; qm++)
                        ;
                if (!module_names[qm].qm_fmt_id ||
                    request_module(module_names[qm].qm_mod_name))
                        return NULL;

                spin_lock(&dq_list_lock);
                for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id;
                     actqf = actqf->qf_next)
                        ;
                if (actqf && !try_module_get(actqf->qf_owner))
                        actqf = NULL;
        }
        spin_unlock(&dq_list_lock);
        return actqf;
}

static void put_quota_format(struct quota_format_type *fmt)
{
        module_put(fmt->qf_owner);
}

/*
 * Dquot List Management:
 * The quota code uses five lists for dquot management: the inuse_list,
 * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array.
 * A single dquot structure may be on some of those lists, depending on
 * its current state.
 *
 * All dquots are placed to the end of inuse_list when first created, and this
 * list is used for invalidate operation, which must look at every dquot.
 *
 * When the last reference of a dquot is dropped, the dquot is added to
 * releasing_dquots. We'll then queue work item which will call
 * synchronize_srcu() and after that perform the final cleanup of all the
 * dquots on the list. Each cleaned up dquot is moved to free_dquots list.
 * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot
 * struct.
 *
 * Unused and cleaned up dquots are in the free_dquots list and this list is
 * searched whenever we need an available dquot. Dquots are removed from the
 * list as soon as they are used again and dqstats.free_dquots gives the number
 * of dquots on the list. When dquot is invalidated it's completely released
 * from memory.
 *
 * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
 * dirtied, and this list is searched when writing dirty dquots back to
 * quota file. Note that some filesystems do dirty dquot tracking on their
 * own (e.g. in a journal) and thus don't use dqi_dirty_list.
 *
 * Dquots with a specific identity (device, type and id) are placed on
 * one of the dquot_hash[] hash chains. The provides an efficient search
 * mechanism to locate a specific dquot.
 */

static LIST_HEAD(inuse_list);
static LIST_HEAD(free_dquots);
static LIST_HEAD(releasing_dquots);
static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;

struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);

static qsize_t inode_get_rsv_space(struct inode *inode);
static qsize_t __inode_get_rsv_space(struct inode *inode);
static int __dquot_initialize(struct inode *inode, int type);

static void quota_release_workfn(struct work_struct *work);
static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn);

static inline unsigned int
hashfn(const struct super_block *sb, struct kqid qid)
{
        unsigned int id = from_kqid(&init_user_ns, qid);
        int type = qid.type;
        unsigned long tmp;

        tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
        return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask;
}

/*
 * Following list functions expect dq_list_lock to be held
 */
static inline void insert_dquot_hash(struct dquot *dquot)
{
        struct hlist_head *head;
        head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id);
        hlist_add_head(&dquot->dq_hash, head);
}

static inline void remove_dquot_hash(struct dquot *dquot)
{
        hlist_del_init(&dquot->dq_hash);
}

static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
                                struct kqid qid)
{
        struct dquot *dquot;

        hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash)
                if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
                        return dquot;

        return NULL;
}

/* Add a dquot to the tail of the free list */
static inline void put_dquot_last(struct dquot *dquot)
{
        list_add_tail(&dquot->dq_free, &free_dquots);
        dqstats_inc(DQST_FREE_DQUOTS);
}

static inline void put_releasing_dquots(struct dquot *dquot)
{
        list_add_tail(&dquot->dq_free, &releasing_dquots);
        set_bit(DQ_RELEASING_B, &dquot->dq_flags);
}

static inline void remove_free_dquot(struct dquot *dquot)
{
        if (list_empty(&dquot->dq_free))
                return;
        list_del_init(&dquot->dq_free);
        if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags))
                dqstats_dec(DQST_FREE_DQUOTS);
        else
                clear_bit(DQ_RELEASING_B, &dquot->dq_flags);
}

static inline void put_inuse(struct dquot *dquot)
{
        /* We add to the back of inuse list so we don't have to restart
         * when traversing this list and we block */
        list_add_tail(&dquot->dq_inuse, &inuse_list);
        dqstats_inc(DQST_ALLOC_DQUOTS);
}

static inline void remove_inuse(struct dquot *dquot)
{
        dqstats_dec(DQST_ALLOC_DQUOTS);
        list_del(&dquot->dq_inuse);
}
/*
 * End of list functions needing dq_list_lock
 */

static void wait_on_dquot(struct dquot *dquot)
{
        mutex_lock(&dquot->dq_lock);
        mutex_unlock(&dquot->dq_lock);
}

static inline int dquot_active(struct dquot *dquot)
{
        return test_bit(DQ_ACTIVE_B, &dquot->dq_flags);
}

static inline int dquot_dirty(struct dquot *dquot)
{
        return test_bit(DQ_MOD_B, &dquot->dq_flags);
}

static inline int mark_dquot_dirty(struct dquot *dquot)
{
        return dquot->dq_sb->dq_op->mark_dirty(dquot);
}

/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */
int dquot_mark_dquot_dirty(struct dquot *dquot)
{
        int ret = 1;

        if (!dquot_active(dquot))
                return 0;

        if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
                return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);

        /* If quota is dirty already, we don't have to acquire dq_list_lock */
        if (dquot_dirty(dquot))
                return 1;

        spin_lock(&dq_list_lock);
        if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
                list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
                                info[dquot->dq_id.type].dqi_dirty_list);
                ret = 0;
        }
        spin_unlock(&dq_list_lock);
        return ret;
}
EXPORT_SYMBOL(dquot_mark_dquot_dirty);

/* Dirtify all the dquots - this can block when journalling */
static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots)
{
        int ret, err, cnt;
        struct dquot *dquot;

        ret = err = 0;
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (dquot)
                        /* Even in case of error we have to continue */
                        ret = mark_dquot_dirty(dquot);
                if (!err)
                        err = ret;
        }
        return err;
}

static inline void dqput_all(struct dquot **dquot)
{
        unsigned int cnt;

        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                dqput(dquot[cnt]);
}

static inline int clear_dquot_dirty(struct dquot *dquot)
{
        if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
                return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags);

        spin_lock(&dq_list_lock);
        if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) {
                spin_unlock(&dq_list_lock);
                return 0;
        }
        list_del_init(&dquot->dq_dirty);
        spin_unlock(&dq_list_lock);
        return 1;
}

void mark_info_dirty(struct super_block *sb, int type)
{
        spin_lock(&dq_data_lock);
        sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY;
        spin_unlock(&dq_data_lock);
}
EXPORT_SYMBOL(mark_info_dirty);

/*
 *        Read dquot from disk and alloc space for it
 */

int dquot_acquire(struct dquot *dquot)
{
        int ret = 0, ret2 = 0;
        unsigned int memalloc;
        struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);

        mutex_lock(&dquot->dq_lock);
        memalloc = memalloc_nofs_save();
        if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
                ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
                if (ret < 0)
                        goto out_iolock;
        }
        /* Make sure flags update is visible after dquot has been filled */
        smp_mb__before_atomic();
        set_bit(DQ_READ_B, &dquot->dq_flags);
        /* Instantiate dquot if needed */
        if (!dquot_active(dquot) && !dquot->dq_off) {
                ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
                /* Write the info if needed */
                if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
                        ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
                                        dquot->dq_sb, dquot->dq_id.type);
                }
                if (ret < 0)
                        goto out_iolock;
                if (ret2 < 0) {
                        ret = ret2;
                        goto out_iolock;
                }
        }
        /*
         * Make sure flags update is visible after on-disk struct has been
         * allocated. Paired with smp_rmb() in dqget().
         */
        smp_mb__before_atomic();
        set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_iolock:
        memalloc_nofs_restore(memalloc);
        mutex_unlock(&dquot->dq_lock);
        return ret;
}
EXPORT_SYMBOL(dquot_acquire);

/*
 *        Write dquot to disk
 */
int dquot_commit(struct dquot *dquot)
{
        int ret = 0;
        unsigned int memalloc;
        struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);

        mutex_lock(&dquot->dq_lock);
        memalloc = memalloc_nofs_save();
        if (!clear_dquot_dirty(dquot))
                goto out_lock;
        /* Inactive dquot can be only if there was error during read/init
         * => we have better not writing it */
        if (dquot_active(dquot))
                ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
        else
                ret = -EIO;
out_lock:
        memalloc_nofs_restore(memalloc);
        mutex_unlock(&dquot->dq_lock);
        return ret;
}
EXPORT_SYMBOL(dquot_commit);

/*
 *        Release dquot
 */
int dquot_release(struct dquot *dquot)
{
        int ret = 0, ret2 = 0;
        unsigned int memalloc;
        struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);

        mutex_lock(&dquot->dq_lock);
        memalloc = memalloc_nofs_save();
        /* Check whether we are not racing with some other dqget() */
        if (dquot_is_busy(dquot))
                goto out_dqlock;
        if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
                ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot);
                /* Write the info */
                if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
                        ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
                                                dquot->dq_sb, dquot->dq_id.type);
                }
                if (ret >= 0)
                        ret = ret2;
        }
        clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_dqlock:
        memalloc_nofs_restore(memalloc);
        mutex_unlock(&dquot->dq_lock);
        return ret;
}
EXPORT_SYMBOL(dquot_release);

void dquot_destroy(struct dquot *dquot)
{
        kmem_cache_free(dquot_cachep, dquot);
}
EXPORT_SYMBOL(dquot_destroy);

static inline void do_destroy_dquot(struct dquot *dquot)
{
        dquot->dq_sb->dq_op->destroy_dquot(dquot);
}

/* Invalidate all dquots on the list. Note that this function is called after
 * quota is disabled and pointers from inodes removed so there cannot be new
 * quota users. There can still be some users of quotas due to inodes being
 * just deleted or pruned by prune_icache() (those are not attached to any
 * list) or parallel quotactl call. We have to wait for such users.
 */
static void invalidate_dquots(struct super_block *sb, int type)
{
        struct dquot *dquot, *tmp;

restart:
        flush_delayed_work(&quota_release_work);

        spin_lock(&dq_list_lock);
        list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
                if (dquot->dq_sb != sb)
                        continue;
                if (dquot->dq_id.type != type)
                        continue;
                /* Wait for dquot users */
                if (atomic_read(&dquot->dq_count)) {
                        atomic_inc(&dquot->dq_count);
                        spin_unlock(&dq_list_lock);
                        /*
                         * Once dqput() wakes us up, we know it's time to free
                         * the dquot.
                         * IMPORTANT: we rely on the fact that there is always
                         * at most one process waiting for dquot to free.
                         * Otherwise dq_count would be > 1 and we would never
                         * wake up.
                         */
                        wait_event(dquot_ref_wq,
                                   atomic_read(&dquot->dq_count) == 1);
                        dqput(dquot);
                        /* At this moment dquot() need not exist (it could be
                         * reclaimed by prune_dqcache(). Hence we must
                         * restart. */
                        goto restart;
                }
                /*
                 * The last user already dropped its reference but dquot didn't
                 * get fully cleaned up yet. Restart the scan which flushes the
                 * work cleaning up released dquots.
                 */
                if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
                        spin_unlock(&dq_list_lock);
                        goto restart;
                }
                /*
                 * Quota now has no users and it has been written on last
                 * dqput()
                 */
                remove_dquot_hash(dquot);
                remove_free_dquot(dquot);
                remove_inuse(dquot);
                do_destroy_dquot(dquot);
        }
        spin_unlock(&dq_list_lock);
}

/* Call callback for every active dquot on given filesystem */
int dquot_scan_active(struct super_block *sb,
                      int (*fn)(struct dquot *dquot, unsigned long priv),
                      unsigned long priv)
{
        struct dquot *dquot, *old_dquot = NULL;
        int ret = 0;

        WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount));

        spin_lock(&dq_list_lock);
        list_for_each_entry(dquot, &inuse_list, dq_inuse) {
                if (!dquot_active(dquot))
                        continue;
                if (dquot->dq_sb != sb)
                        continue;
                /* Now we have active dquot so we can just increase use count */
                atomic_inc(&dquot->dq_count);
                spin_unlock(&dq_list_lock);
                dqput(old_dquot);
                old_dquot = dquot;
                /*
                 * ->release_dquot() can be racing with us. Our reference
                 * protects us from new calls to it so just wait for any
                 * outstanding call and recheck the DQ_ACTIVE_B after that.
                 */
                wait_on_dquot(dquot);
                if (dquot_active(dquot)) {
                        ret = fn(dquot, priv);
                        if (ret < 0)
                                goto out;
                }
                spin_lock(&dq_list_lock);
                /* We are safe to continue now because our dquot could not
                 * be moved out of the inuse list while we hold the reference */
        }
        spin_unlock(&dq_list_lock);
out:
        dqput(old_dquot);
        return ret;
}
EXPORT_SYMBOL(dquot_scan_active);

static inline int dquot_write_dquot(struct dquot *dquot)
{
        int ret = dquot->dq_sb->dq_op->write_dquot(dquot);
        if (ret < 0) {
                quota_error(dquot->dq_sb, "Can't write quota structure "
                            "(error %d). Quota may get out of sync!", ret);
                /* Clear dirty bit anyway to avoid infinite loop. */
                clear_dquot_dirty(dquot);
        }
        return ret;
}

/* Write all dquot structures to quota files */
int dquot_writeback_dquots(struct super_block *sb, int type)
{
        struct list_head dirty;
        struct dquot *dquot;
        struct quota_info *dqopt = sb_dqopt(sb);
        int cnt;
        int err, ret = 0;

        WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount));

        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (type != -1 && cnt != type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
                spin_lock(&dq_list_lock);
                /* Move list away to avoid livelock. */
                list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty);
                while (!list_empty(&dirty)) {
                        dquot = list_first_entry(&dirty, struct dquot,
                                                 dq_dirty);

                        WARN_ON(!dquot_active(dquot));
                        /* If the dquot is releasing we should not touch it */
                        if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
                                spin_unlock(&dq_list_lock);
                                flush_delayed_work(&quota_release_work);
                                spin_lock(&dq_list_lock);
                                continue;
                        }

                        /* Now we have active dquot from which someone is
                          * holding reference so we can safely just increase
                         * use count */
                        dqgrab(dquot);
                        spin_unlock(&dq_list_lock);
                        err = dquot_write_dquot(dquot);
                        if (err && !ret)
                                ret = err;
                        dqput(dquot);
                        spin_lock(&dq_list_lock);
                }
                spin_unlock(&dq_list_lock);
        }

        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
                    && info_dirty(&dqopt->info[cnt]))
                        sb->dq_op->write_info(sb, cnt);
        dqstats_inc(DQST_SYNCS);

        return ret;
}
EXPORT_SYMBOL(dquot_writeback_dquots);

/* Write all dquot structures to disk and make them visible from userspace */
int dquot_quota_sync(struct super_block *sb, int type)
{
        struct quota_info *dqopt = sb_dqopt(sb);
        int cnt;
        int ret;

        ret = dquot_writeback_dquots(sb, type);
        if (ret)
                return ret;
        if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
                return 0;

        /* This is not very clever (and fast) but currently I don't know about
         * any other simple way of getting quota data to disk and we must get
         * them there for userspace to be visible... */
        if (sb->s_op->sync_fs) {
                ret = sb->s_op->sync_fs(sb, 1);
                if (ret)
                        return ret;
        }
        ret = sync_blockdev(sb->s_bdev);
        if (ret)
                return ret;

        /*
         * Now when everything is written we can discard the pagecache so
         * that userspace sees the changes.
         */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (type != -1 && cnt != type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
                inode_lock(dqopt->files[cnt]);
                truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
                inode_unlock(dqopt->files[cnt]);
        }

        return 0;
}
EXPORT_SYMBOL(dquot_quota_sync);

static unsigned long
dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
        struct dquot *dquot;
        unsigned long freed = 0;

        spin_lock(&dq_list_lock);
        while (!list_empty(&free_dquots) && sc->nr_to_scan) {
                dquot = list_first_entry(&free_dquots, struct dquot, dq_free);
                remove_dquot_hash(dquot);
                remove_free_dquot(dquot);
                remove_inuse(dquot);
                do_destroy_dquot(dquot);
                sc->nr_to_scan--;
                freed++;
        }
        spin_unlock(&dq_list_lock);
        return freed;
}

static unsigned long
dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
        return vfs_pressure_ratio(
        percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]));
}

/*
 * Safely release dquot and put reference to dquot.
 */
static void quota_release_workfn(struct work_struct *work)
{
        struct dquot *dquot;
        struct list_head rls_head;

        spin_lock(&dq_list_lock);
        /* Exchange the list head to avoid livelock. */
        list_replace_init(&releasing_dquots, &rls_head);
        spin_unlock(&dq_list_lock);
        synchronize_srcu(&dquot_srcu);

restart:
        spin_lock(&dq_list_lock);
        while (!list_empty(&rls_head)) {
                dquot = list_first_entry(&rls_head, struct dquot, dq_free);
                WARN_ON_ONCE(atomic_read(&dquot->dq_count));
                /*
                 * Note that DQ_RELEASING_B protects us from racing with
                 * invalidate_dquots() calls so we are safe to work with the
                 * dquot even after we drop dq_list_lock.
                 */
                if (dquot_dirty(dquot)) {
                        spin_unlock(&dq_list_lock);
                        /* Commit dquot before releasing */
                        dquot_write_dquot(dquot);
                        goto restart;
                }
                if (dquot_active(dquot)) {
                        spin_unlock(&dq_list_lock);
                        dquot->dq_sb->dq_op->release_dquot(dquot);
                        goto restart;
                }
                /* Dquot is inactive and clean, now move it to free list */
                remove_free_dquot(dquot);
                put_dquot_last(dquot);
        }
        spin_unlock(&dq_list_lock);
}

/*
 * Put reference to dquot
 */
void dqput(struct dquot *dquot)
{
        if (!dquot)
                return;
#ifdef CONFIG_QUOTA_DEBUG
        if (!atomic_read(&dquot->dq_count)) {
                quota_error(dquot->dq_sb, "trying to free free dquot of %s %d",
                            quotatypes[dquot->dq_id.type],
                            from_kqid(&init_user_ns, dquot->dq_id));
                BUG();
        }
#endif
        dqstats_inc(DQST_DROPS);

        spin_lock(&dq_list_lock);
        if (atomic_read(&dquot->dq_count) > 1) {
                /* We have more than one user... nothing to do */
                atomic_dec(&dquot->dq_count);
                /* Releasing dquot during quotaoff phase? */
                if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) &&
                    atomic_read(&dquot->dq_count) == 1)
                        wake_up(&dquot_ref_wq);
                spin_unlock(&dq_list_lock);
                return;
        }

        /* Need to release dquot? */
        WARN_ON_ONCE(!list_empty(&dquot->dq_free));
        put_releasing_dquots(dquot);
        atomic_dec(&dquot->dq_count);
        spin_unlock(&dq_list_lock);
        queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
}
EXPORT_SYMBOL(dqput);

struct dquot *dquot_alloc(struct super_block *sb, int type)
{
        return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
}
EXPORT_SYMBOL(dquot_alloc);

static struct dquot *get_empty_dquot(struct super_block *sb, int type)
{
        struct dquot *dquot;

        dquot = sb->dq_op->alloc_dquot(sb, type);
        if(!dquot)
                return NULL;

        mutex_init(&dquot->dq_lock);
        INIT_LIST_HEAD(&dquot->dq_free);
        INIT_LIST_HEAD(&dquot->dq_inuse);
        INIT_HLIST_NODE(&dquot->dq_hash);
        INIT_LIST_HEAD(&dquot->dq_dirty);
        dquot->dq_sb = sb;
        dquot->dq_id = make_kqid_invalid(type);
        atomic_set(&dquot->dq_count, 1);
        spin_lock_init(&dquot->dq_dqb_lock);

        return dquot;
}

/*
 * Get reference to dquot
 *
 * Locking is slightly tricky here. We are guarded from parallel quotaoff()
 * destroying our dquot by:
 *   a) checking for quota flags under dq_list_lock and
 *   b) getting a reference to dquot before we release dq_list_lock
 */
struct dquot *dqget(struct super_block *sb, struct kqid qid)
{
        unsigned int hashent = hashfn(sb, qid);
        struct dquot *dquot, *empty = NULL;

        if (!qid_has_mapping(sb->s_user_ns, qid))
                return ERR_PTR(-EINVAL);

        if (!sb_has_quota_active(sb, qid.type))
                return ERR_PTR(-ESRCH);
we_slept:
        spin_lock(&dq_list_lock);
        spin_lock(&dq_state_lock);
        if (!sb_has_quota_active(sb, qid.type)) {
                spin_unlock(&dq_state_lock);
                spin_unlock(&dq_list_lock);
                dquot = ERR_PTR(-ESRCH);
                goto out;
        }
        spin_unlock(&dq_state_lock);

        dquot = find_dquot(hashent, sb, qid);
        if (!dquot) {
                if (!empty) {
                        spin_unlock(&dq_list_lock);
                        empty = get_empty_dquot(sb, qid.type);
                        if (!empty)
                                schedule();        /* Try to wait for a moment... */
                        goto we_slept;
                }
                dquot = empty;
                empty = NULL;
                dquot->dq_id = qid;
                /* all dquots go on the inuse_list */
                put_inuse(dquot);
                /* hash it first so it can be found */
                insert_dquot_hash(dquot);
                spin_unlock(&dq_list_lock);
                dqstats_inc(DQST_LOOKUPS);
        } else {
                if (!atomic_read(&dquot->dq_count))
                        remove_free_dquot(dquot);
                atomic_inc(&dquot->dq_count);
                spin_unlock(&dq_list_lock);
                dqstats_inc(DQST_CACHE_HITS);
                dqstats_inc(DQST_LOOKUPS);
        }
        /* Wait for dq_lock - after this we know that either dquot_release() is
         * already finished or it will be canceled due to dq_count > 0 test */
        wait_on_dquot(dquot);
        /* Read the dquot / allocate space in quota file */
        if (!dquot_active(dquot)) {
                int err;

                err = sb->dq_op->acquire_dquot(dquot);
                if (err < 0) {
                        dqput(dquot);
                        dquot = ERR_PTR(err);
                        goto out;
                }
        }
        /*
         * Make sure following reads see filled structure - paired with
         * smp_mb__before_atomic() in dquot_acquire().
         */
        smp_rmb();
        /* Has somebody invalidated entry under us? */
        WARN_ON_ONCE(hlist_unhashed(&dquot->dq_hash));
out:
        if (empty)
                do_destroy_dquot(empty);

        return dquot;
}
EXPORT_SYMBOL(dqget);

static inline struct dquot __rcu **i_dquot(struct inode *inode)
{
        return inode->i_sb->s_op->get_dquots(inode);
}

static int dqinit_needed(struct inode *inode, int type)
{
        struct dquot __rcu * const *dquots;
        int cnt;

        if (IS_NOQUOTA(inode))
                return 0;

        dquots = i_dquot(inode);
        if (type != -1)
                return !dquots[type];
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (!dquots[cnt])
                        return 1;
        return 0;
}

/* This routine is guarded by s_umount semaphore */
static int add_dquot_ref(struct super_block *sb, int type)
{
        struct inode *inode, *old_inode = NULL;
#ifdef CONFIG_QUOTA_DEBUG
        int reserved = 0;
#endif
        int err = 0;

        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                spin_lock(&inode->i_lock);
                if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
                    !atomic_read(&inode->i_writecount) ||
                    !dqinit_needed(inode, type)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                spin_unlock(&sb->s_inode_list_lock);

#ifdef CONFIG_QUOTA_DEBUG
                if (unlikely(inode_get_rsv_space(inode) > 0))
                        reserved = 1;
#endif
                iput(old_inode);
                err = __dquot_initialize(inode, type);
                if (err) {
                        iput(inode);
                        goto out;
                }

                /*
                 * We hold a reference to 'inode' so it couldn't have been
                 * removed from s_inodes list while we dropped the
                 * s_inode_list_lock. We cannot iput the inode now as we can be
                 * holding the last reference and we cannot iput it under
                 * s_inode_list_lock. So we keep the reference and iput it
                 * later.
                 */
                old_inode = inode;
                cond_resched();
                spin_lock(&sb->s_inode_list_lock);
        }
        spin_unlock(&sb->s_inode_list_lock);
        iput(old_inode);
out:
#ifdef CONFIG_QUOTA_DEBUG
        if (reserved) {
                quota_error(sb, "Writes happened before quota was turned on "
                        "thus quota information is probably inconsistent. "
                        "Please run quotacheck(8)");
        }
#endif
        return err;
}

static void remove_dquot_ref(struct super_block *sb, int type)
{
        struct inode *inode;
#ifdef CONFIG_QUOTA_DEBUG
        int reserved = 0;
#endif

        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                /*
                 *  We have to scan also I_NEW inodes because they can already
                 *  have quota pointer initialized. Luckily, we need to touch
                 *  only quota pointers and these have separate locking
                 *  (dq_data_lock).
                 */
                spin_lock(&dq_data_lock);
                if (!IS_NOQUOTA(inode)) {
                        struct dquot __rcu **dquots = i_dquot(inode);
                        struct dquot *dquot = srcu_dereference_check(
                                dquots[type], &dquot_srcu,
                                lockdep_is_held(&dq_data_lock));

#ifdef CONFIG_QUOTA_DEBUG
                        if (unlikely(inode_get_rsv_space(inode) > 0))
                                reserved = 1;
#endif
                        rcu_assign_pointer(dquots[type], NULL);
                        if (dquot)
                                dqput(dquot);
                }
                spin_unlock(&dq_data_lock);
        }
        spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
        if (reserved) {
                printk(KERN_WARNING "VFS (%s): Writes happened after quota"
                        " was disabled thus quota information is probably "
                        "inconsistent. Please run quotacheck(8).\n", sb->s_id);
        }
#endif
}

/* Gather all references from inodes and drop them */
static void drop_dquot_ref(struct super_block *sb, int type)
{
        if (sb->dq_op)
                remove_dquot_ref(sb, type);
}

static inline
void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
{
        if (dquot->dq_dqb.dqb_rsvspace >= number)
                dquot->dq_dqb.dqb_rsvspace -= number;
        else {
                WARN_ON_ONCE(1);
                dquot->dq_dqb.dqb_rsvspace = 0;
        }
        if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
            dquot->dq_dqb.dqb_bsoftlimit)
                dquot->dq_dqb.dqb_btime = (time64_t) 0;
        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}

static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
        if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
            dquot->dq_dqb.dqb_curinodes >= number)
                dquot->dq_dqb.dqb_curinodes -= number;
        else
                dquot->dq_dqb.dqb_curinodes = 0;
        if (dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
                dquot->dq_dqb.dqb_itime = (time64_t) 0;
        clear_bit(DQ_INODES_B, &dquot->dq_flags);
}

static void dquot_decr_space(struct dquot *dquot, qsize_t number)
{
        if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
            dquot->dq_dqb.dqb_curspace >= number)
                dquot->dq_dqb.dqb_curspace -= number;
        else
                dquot->dq_dqb.dqb_curspace = 0;
        if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
            dquot->dq_dqb.dqb_bsoftlimit)
                dquot->dq_dqb.dqb_btime = (time64_t) 0;
        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}

struct dquot_warn {
        struct super_block *w_sb;
        struct kqid w_dq_id;
        short w_type;
};

static int warning_issued(struct dquot *dquot, const int warntype)
{
        int flag = (warntype == QUOTA_NL_BHARDWARN ||
                warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B :
                ((warntype == QUOTA_NL_IHARDWARN ||
                warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0);

        if (!flag)
                return 0;
        return test_and_set_bit(flag, &dquot->dq_flags);
}

#ifdef CONFIG_PRINT_QUOTA_WARNING
static int flag_print_warnings = 1;

static int need_print_warning(struct dquot_warn *warn)
{
        if (!flag_print_warnings)
                return 0;

        switch (warn->w_dq_id.type) {
                case USRQUOTA:
                        return uid_eq(current_fsuid(), warn->w_dq_id.uid);
                case GRPQUOTA:
                        return in_group_p(warn->w_dq_id.gid);
                case PRJQUOTA:
                        return 1;
        }
        return 0;
}

/* Print warning to user which exceeded quota */
static void print_warning(struct dquot_warn *warn)
{
        char *msg = NULL;
        struct tty_struct *tty;
        int warntype = warn->w_type;

        if (warntype == QUOTA_NL_IHARDBELOW ||
            warntype == QUOTA_NL_ISOFTBELOW ||
            warntype == QUOTA_NL_BHARDBELOW ||
            warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn))
                return;

        tty = get_current_tty();
        if (!tty)
                return;
        tty_write_message(tty, warn->w_sb->s_id);
        if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN)
                tty_write_message(tty, ": warning, ");
        else
                tty_write_message(tty, ": write failed, ");
        tty_write_message(tty, quotatypes[warn->w_dq_id.type]);
        switch (warntype) {
                case QUOTA_NL_IHARDWARN:
                        msg = " file limit reached.\r\n";
                        break;
                case QUOTA_NL_ISOFTLONGWARN:
                        msg = " file quota exceeded too long.\r\n";
                        break;
                case QUOTA_NL_ISOFTWARN:
                        msg = " file quota exceeded.\r\n";
                        break;
                case QUOTA_NL_BHARDWARN:
                        msg = " block limit reached.\r\n";
                        break;
                case QUOTA_NL_BSOFTLONGWARN:
                        msg = " block quota exceeded too long.\r\n";
                        break;
                case QUOTA_NL_BSOFTWARN:
                        msg = " block quota exceeded.\r\n";
                        break;
        }
        tty_write_message(tty, msg);
        tty_kref_put(tty);
}
#endif

static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
                            int warntype)
{
        if (warning_issued(dquot, warntype))
                return;
        warn->w_type = warntype;
        warn->w_sb = dquot->dq_sb;
        warn->w_dq_id = dquot->dq_id;
}

/*
 * Write warnings to the console and send warning messages over netlink.
 *
 * Note that this function can call into tty and networking code.
 */
static void flush_warnings(struct dquot_warn *warn)
{
        int i;

        for (i = 0; i < MAXQUOTAS; i++) {
                if (warn[i].w_type == QUOTA_NL_NOWARN)
                        continue;
#ifdef CONFIG_PRINT_QUOTA_WARNING
                print_warning(&warn[i]);
#endif
                quota_send_warning(warn[i].w_dq_id,
                                   warn[i].w_sb->s_dev, warn[i].w_type);
        }
}

static int ignore_hardlimit(struct dquot *dquot)
{
        struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];

        return capable(CAP_SYS_RESOURCE) &&
               (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
                !(info->dqi_flags & DQF_ROOT_SQUASH));
}

static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes,
                            struct dquot_warn *warn)
{
        qsize_t newinodes;
        int ret = 0;

        spin_lock(&dquot->dq_dqb_lock);
        newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
        if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
            test_bit(DQ_FAKE_B, &dquot->dq_flags))
                goto add;

        if (dquot->dq_dqb.dqb_ihardlimit &&
            newinodes > dquot->dq_dqb.dqb_ihardlimit &&
            !ignore_hardlimit(dquot)) {
                prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
                ret = -EDQUOT;
                goto out;
        }

        if (dquot->dq_dqb.dqb_isoftlimit &&
            newinodes > dquot->dq_dqb.dqb_isoftlimit &&
            dquot->dq_dqb.dqb_itime &&
            ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime &&
            !ignore_hardlimit(dquot)) {
                prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
                ret = -EDQUOT;
                goto out;
        }

        if (dquot->dq_dqb.dqb_isoftlimit &&
            newinodes > dquot->dq_dqb.dqb_isoftlimit &&
            dquot->dq_dqb.dqb_itime == 0) {
                prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN);
                dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() +
                    sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
        }
add:
        dquot->dq_dqb.dqb_curinodes = newinodes;

out:
        spin_unlock(&dquot->dq_dqb_lock);
        return ret;
}

static int dquot_add_space(struct dquot *dquot, qsize_t space,
                           qsize_t rsv_space, unsigned int flags,
                           struct dquot_warn *warn)
{
        qsize_t tspace;
        struct super_block *sb = dquot->dq_sb;
        int ret = 0;

        spin_lock(&dquot->dq_dqb_lock);
        if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
            test_bit(DQ_FAKE_B, &dquot->dq_flags))
                goto finish;

        tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
                + space + rsv_space;

        if (dquot->dq_dqb.dqb_bhardlimit &&
            tspace > dquot->dq_dqb.dqb_bhardlimit &&
            !ignore_hardlimit(dquot)) {
                if (flags & DQUOT_SPACE_WARN)
                        prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
                ret = -EDQUOT;
                goto finish;
        }

        if (dquot->dq_dqb.dqb_bsoftlimit &&
            tspace > dquot->dq_dqb.dqb_bsoftlimit &&
            dquot->dq_dqb.dqb_btime &&
            ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime &&
            !ignore_hardlimit(dquot)) {
                if (flags & DQUOT_SPACE_WARN)
                        prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
                ret = -EDQUOT;
                goto finish;
        }

        if (dquot->dq_dqb.dqb_bsoftlimit &&
            tspace > dquot->dq_dqb.dqb_bsoftlimit &&
            dquot->dq_dqb.dqb_btime == 0) {
                if (flags & DQUOT_SPACE_WARN) {
                        prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
                        dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() +
                            sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
                } else {
                        /*
                         * We don't allow preallocation to exceed softlimit so exceeding will
                         * be always printed
                         */
                        ret = -EDQUOT;
                        goto finish;
                }
        }
finish:
        /*
         * We have to be careful and go through warning generation & grace time
         * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it
         * only here...
         */
        if (flags & DQUOT_SPACE_NOFAIL)
                ret = 0;
        if (!ret) {
                dquot->dq_dqb.dqb_rsvspace += rsv_space;
                dquot->dq_dqb.dqb_curspace += space;
        }
        spin_unlock(&dquot->dq_dqb_lock);
        return ret;
}

static int info_idq_free(struct dquot *dquot, qsize_t inodes)
{
        qsize_t newinodes;

        if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
            dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
            !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type))
                return QUOTA_NL_NOWARN;

        newinodes = dquot->dq_dqb.dqb_curinodes - inodes;
        if (newinodes <= dquot->dq_dqb.dqb_isoftlimit)
                return QUOTA_NL_ISOFTBELOW;
        if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
            newinodes < dquot->dq_dqb.dqb_ihardlimit)
                return QUOTA_NL_IHARDBELOW;
        return QUOTA_NL_NOWARN;
}

static int info_bdq_free(struct dquot *dquot, qsize_t space)
{
        qsize_t tspace;

        tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace;

        if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
            tspace <= dquot->dq_dqb.dqb_bsoftlimit)
                return QUOTA_NL_NOWARN;

        if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
                return QUOTA_NL_BSOFTBELOW;
        if (tspace >= dquot->dq_dqb.dqb_bhardlimit &&
            tspace - space < dquot->dq_dqb.dqb_bhardlimit)
                return QUOTA_NL_BHARDBELOW;
        return QUOTA_NL_NOWARN;
}

static int inode_quota_active(const struct inode *inode)
{
        struct super_block *sb = inode->i_sb;

        if (IS_NOQUOTA(inode))
                return 0;
        return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
}

/*
 * Initialize quota pointers in inode
 *
 * It is better to call this function outside of any transaction as it
 * might need a lot of space in journal for dquot structure allocation.
 */
static int __dquot_initialize(struct inode *inode, int type)
{
        int cnt, init_needed = 0;
        struct dquot __rcu **dquots;
        struct dquot *got[MAXQUOTAS] = {};
        struct super_block *sb = inode->i_sb;
        qsize_t rsv;
        int ret = 0;

        if (!inode_quota_active(inode))
                return 0;

        dquots = i_dquot(inode);

        /* First get references to structures we might need. */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                struct kqid qid;
                kprojid_t projid;
                int rc;
                struct dquot *dquot;

                if (type != -1 && cnt != type)
                        continue;
                /*
                 * The i_dquot should have been initialized in most cases,
                 * we check it without locking here to avoid unnecessary
                 * dqget()/dqput() calls.
                 */
                if (dquots[cnt])
                        continue;

                if (!sb_has_quota_active(sb, cnt))
                        continue;

                init_needed = 1;

                switch (cnt) {
                case USRQUOTA:
                        qid = make_kqid_uid(inode->i_uid);
                        break;
                case GRPQUOTA:
                        qid = make_kqid_gid(inode->i_gid);
                        break;
                case PRJQUOTA:
                        rc = inode->i_sb->dq_op->get_projid(inode, &projid);
                        if (rc)
                                continue;
                        qid = make_kqid_projid(projid);
                        break;
                }
                dquot = dqget(sb, qid);
                if (IS_ERR(dquot)) {
                        /* We raced with somebody turning quotas off... */
                        if (PTR_ERR(dquot) != -ESRCH) {
                                ret = PTR_ERR(dquot);
                                goto out_put;
                        }
                        dquot = NULL;
                }
                got[cnt] = dquot;
        }

        /* All required i_dquot has been initialized */
        if (!init_needed)
                return 0;

        spin_lock(&dq_data_lock);
        if (IS_NOQUOTA(inode))
                goto out_lock;
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (type != -1 && cnt != type)
                        continue;
                /* Avoid races with quotaoff() */
                if (!sb_has_quota_active(sb, cnt))
                        continue;
                /* We could race with quotaon or dqget() could have failed */
                if (!got[cnt])
                        continue;
                if (!dquots[cnt]) {
                        rcu_assign_pointer(dquots[cnt], got[cnt]);
                        got[cnt] = NULL;
                        /*
                         * Make quota reservation system happy if someone
                         * did a write before quota was turned on
                         */
                        rsv = inode_get_rsv_space(inode);
                        if (unlikely(rsv)) {
                                struct dquot *dquot = srcu_dereference_check(
                                        dquots[cnt], &dquot_srcu,
                                        lockdep_is_held(&dq_data_lock));

                                spin_lock(&inode->i_lock);
                                /* Get reservation again under proper lock */
                                rsv = __inode_get_rsv_space(inode);
                                spin_lock(&dquot->dq_dqb_lock);
                                dquot->dq_dqb.dqb_rsvspace += rsv;
                                spin_unlock(&dquot->dq_dqb_lock);
                                spin_unlock(&inode->i_lock);
                        }
                }
        }
out_lock:
        spin_unlock(&dq_data_lock);
out_put:
        /* Drop unused references */
        dqput_all(got);

        return ret;
}

int dquot_initialize(struct inode *inode)
{
        return __dquot_initialize(inode, -1);
}
EXPORT_SYMBOL(dquot_initialize);

bool dquot_initialize_needed(struct inode *inode)
{
        struct dquot __rcu **dquots;
        int i;

        if (!inode_quota_active(inode))
                return false;

        dquots = i_dquot(inode);
        for (i = 0; i < MAXQUOTAS; i++)
                if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
                        return true;
        return false;
}
EXPORT_SYMBOL(dquot_initialize_needed);

/*
 * Release all quotas referenced by inode.
 *
 * This function only be called on inode free or converting
 * a file to quota file, no other users for the i_dquot in
 * both cases, so we needn't call synchronize_srcu() after
 * clearing i_dquot.
 */
static void __dquot_drop(struct inode *inode)
{
        int cnt;
        struct dquot __rcu **dquots = i_dquot(inode);
        struct dquot *put[MAXQUOTAS];

        spin_lock(&dq_data_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu,
                                        lockdep_is_held(&dq_data_lock));
                rcu_assign_pointer(dquots[cnt], NULL);
        }
        spin_unlock(&dq_data_lock);
        dqput_all(put);
}

void dquot_drop(struct inode *inode)
{
        struct dquot __rcu * const *dquots;
        int cnt;

        if (IS_NOQUOTA(inode))
                return;

        /*
         * Test before calling to rule out calls from proc and such
         * where we are not allowed to block. Note that this is
         * actually reliable test even without the lock - the caller
         * must assure that nobody can come after the DQUOT_DROP and
         * add quota pointers back anyway.
         */
        dquots = i_dquot(inode);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (dquots[cnt])
                        break;
        }

        if (cnt < MAXQUOTAS)
                __dquot_drop(inode);
}
EXPORT_SYMBOL(dquot_drop);

/*
 * inode_reserved_space is managed internally by quota, and protected by
 * i_lock similar to i_blocks+i_bytes.
 */
static qsize_t *inode_reserved_space(struct inode * inode)
{
        /* Filesystem must explicitly define it's own method in order to use
         * quota reservation interface */
        BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
        return inode->i_sb->dq_op->get_reserved_space(inode);
}

static qsize_t __inode_get_rsv_space(struct inode *inode)
{
        if (!inode->i_sb->dq_op->get_reserved_space)
                return 0;
        return *inode_reserved_space(inode);
}

static qsize_t inode_get_rsv_space(struct inode *inode)
{
        qsize_t ret;

        if (!inode->i_sb->dq_op->get_reserved_space)
                return 0;
        spin_lock(&inode->i_lock);
        ret = __inode_get_rsv_space(inode);
        spin_unlock(&inode->i_lock);
        return ret;
}

/*
 * This functions updates i_blocks+i_bytes fields and quota information
 * (together with appropriate checks).
 *
 * NOTE: We absolutely rely on the fact that caller dirties the inode
 * (usually helpers in quotaops.h care about this) and holds a handle for
 * the current transaction so that dquot write and inode write go into the
 * same transaction.
 */

/*
 * This operation can block, but only after everything is updated
 */
int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
        int cnt, ret = 0, index;
        struct dquot_warn warn[MAXQUOTAS];
        int reserve = flags & DQUOT_SPACE_RESERVE;
        struct dquot __rcu **dquots;
        struct dquot *dquot;

        if (!inode_quota_active(inode)) {
                if (reserve) {
                        spin_lock(&inode->i_lock);
                        *inode_reserved_space(inode) += number;
                        spin_unlock(&inode->i_lock);
                } else {
                        inode_add_bytes(inode, number);
                }
                goto out;
        }

        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                warn[cnt].w_type = QUOTA_NL_NOWARN;

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (!dquot)
                        continue;
                if (reserve) {
                        ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]);
                } else {
                        ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]);
                }
                if (ret) {
                        /* Back out changes we already did */
                        for (cnt--; cnt >= 0; cnt--) {
                                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                                if (!dquot)
                                        continue;
                                spin_lock(&dquot->dq_dqb_lock);
                                if (reserve)
                                        dquot_free_reserved_space(dquot, number);
                                else
                                        dquot_decr_space(dquot, number);
                                spin_unlock(&dquot->dq_dqb_lock);
                        }
                        spin_unlock(&inode->i_lock);
                        goto out_flush_warn;
                }
        }
        if (reserve)
                *inode_reserved_space(inode) += number;
        else
                __inode_add_bytes(inode, number);
        spin_unlock(&inode->i_lock);

        if (reserve)
                goto out_flush_warn;
        mark_all_dquot_dirty(dquots);
out_flush_warn:
        srcu_read_unlock(&dquot_srcu, index);
        flush_warnings(warn);
out:
        return ret;
}
EXPORT_SYMBOL(__dquot_alloc_space);

/*
 * This operation can block, but only after everything is updated
 */
int dquot_alloc_inode(struct inode *inode)
{
        int cnt, ret = 0, index;
        struct dquot_warn warn[MAXQUOTAS];
        struct dquot __rcu * const *dquots;
        struct dquot *dquot;

        if (!inode_quota_active(inode))
                return 0;
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                warn[cnt].w_type = QUOTA_NL_NOWARN;

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (!dquot)
                        continue;
                ret = dquot_add_inodes(dquot, 1, &warn[cnt]);
                if (ret) {
                        for (cnt--; cnt >= 0; cnt--) {
                                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                                if (!dquot)
                                        continue;
                                /* Back out changes we already did */
                                spin_lock(&dquot->dq_dqb_lock);
                                dquot_decr_inodes(dquot, 1);
                                spin_unlock(&dquot->dq_dqb_lock);
                        }
                        goto warn_put_all;
                }
        }

warn_put_all:
        spin_unlock(&inode->i_lock);
        if (ret == 0)
                mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        flush_warnings(warn);
        return ret;
}
EXPORT_SYMBOL(dquot_alloc_inode);

/*
 * Convert in-memory reserved quotas to real consumed quotas
 */
void dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
        struct dquot __rcu **dquots;
        struct dquot *dquot;
        int cnt, index;

        if (!inode_quota_active(inode)) {
                spin_lock(&inode->i_lock);
                *inode_reserved_space(inode) -= number;
                __inode_add_bytes(inode, number);
                spin_unlock(&inode->i_lock);
                return;
        }

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        /* Claim reserved quotas to allocated quotas */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (dquot) {
                        spin_lock(&dquot->dq_dqb_lock);
                        if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
                                number = dquot->dq_dqb.dqb_rsvspace;
                        dquot->dq_dqb.dqb_curspace += number;
                        dquot->dq_dqb.dqb_rsvspace -= number;
                        spin_unlock(&dquot->dq_dqb_lock);
                }
        }
        /* Update inode bytes */
        *inode_reserved_space(inode) -= number;
        __inode_add_bytes(inode, number);
        spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        return;
}
EXPORT_SYMBOL(dquot_claim_space_nodirty);

/*
 * Convert allocated space back to in-memory reserved quotas
 */
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
        struct dquot __rcu **dquots;
        struct dquot *dquot;
        int cnt, index;

        if (!inode_quota_active(inode)) {
                spin_lock(&inode->i_lock);
                *inode_reserved_space(inode) += number;
                __inode_sub_bytes(inode, number);
                spin_unlock(&inode->i_lock);
                return;
        }

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        /* Claim reserved quotas to allocated quotas */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (dquot) {
                        spin_lock(&dquot->dq_dqb_lock);
                        if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
                                number = dquot->dq_dqb.dqb_curspace;
                        dquot->dq_dqb.dqb_rsvspace += number;
                        dquot->dq_dqb.dqb_curspace -= number;
                        spin_unlock(&dquot->dq_dqb_lock);
                }
        }
        /* Update inode bytes */
        *inode_reserved_space(inode) += number;
        __inode_sub_bytes(inode, number);
        spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        return;
}
EXPORT_SYMBOL(dquot_reclaim_space_nodirty);

/*
 * This operation can block, but only after everything is updated
 */
void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
        unsigned int cnt;
        struct dquot_warn warn[MAXQUOTAS];
        struct dquot __rcu **dquots;
        struct dquot *dquot;
        int reserve = flags & DQUOT_SPACE_RESERVE, index;

        if (!inode_quota_active(inode)) {
                if (reserve) {
                        spin_lock(&inode->i_lock);
                        *inode_reserved_space(inode) -= number;
                        spin_unlock(&inode->i_lock);
                } else {
                        inode_sub_bytes(inode, number);
                }
                return;
        }

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                int wtype;

                warn[cnt].w_type = QUOTA_NL_NOWARN;
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (!dquot)
                        continue;
                spin_lock(&dquot->dq_dqb_lock);
                wtype = info_bdq_free(dquot, number);
                if (wtype != QUOTA_NL_NOWARN)
                        prepare_warning(&warn[cnt], dquot, wtype);
                if (reserve)
                        dquot_free_reserved_space(dquot, number);
                else
                        dquot_decr_space(dquot, number);
                spin_unlock(&dquot->dq_dqb_lock);
        }
        if (reserve)
                *inode_reserved_space(inode) -= number;
        else
                __inode_sub_bytes(inode, number);
        spin_unlock(&inode->i_lock);

        if (reserve)
                goto out_unlock;
        mark_all_dquot_dirty(dquots);
out_unlock:
        srcu_read_unlock(&dquot_srcu, index);
        flush_warnings(warn);
}
EXPORT_SYMBOL(__dquot_free_space);

/*
 * This operation can block, but only after everything is updated
 */
void dquot_free_inode(struct inode *inode)
{
        unsigned int cnt;
        struct dquot_warn warn[MAXQUOTAS];
        struct dquot __rcu * const *dquots;
        struct dquot *dquot;
        int index;

        if (!inode_quota_active(inode))
                return;

        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
        spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                int wtype;
                warn[cnt].w_type = QUOTA_NL_NOWARN;
                dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
                if (!dquot)
                        continue;
                spin_lock(&dquot->dq_dqb_lock);
                wtype = info_idq_free(dquot, 1);
                if (wtype != QUOTA_NL_NOWARN)
                        prepare_warning(&warn[cnt], dquot, wtype);
                dquot_decr_inodes(dquot, 1);
                spin_unlock(&dquot->dq_dqb_lock);
        }
        spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        flush_warnings(warn);
}
EXPORT_SYMBOL(dquot_free_inode);

/*
 * Transfer the number of inode and blocks from one diskquota to an other.
 * On success, dquot references in transfer_to are consumed and references
 * to original dquots that need to be released are placed there. On failure,
 * references are kept untouched.
 *
 * This operation can block, but only after everything is updated
 * A transaction must be started when entering this function.
 *
 * We are holding reference on transfer_from & transfer_to, no need to
 * protect them by srcu_read_lock().
 */
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
        qsize_t cur_space;
        qsize_t rsv_space = 0;
        qsize_t inode_usage = 1;
        struct dquot __rcu **dquots;
        struct dquot *transfer_from[MAXQUOTAS] = {};
        int cnt, index, ret = 0;
        char is_valid[MAXQUOTAS] = {};
        struct dquot_warn warn_to[MAXQUOTAS];
        struct dquot_warn warn_from_inodes[MAXQUOTAS];
        struct dquot_warn warn_from_space[MAXQUOTAS];

        if (IS_NOQUOTA(inode))
                return 0;

        if (inode->i_sb->dq_op->get_inode_usage) {
                ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage);
                if (ret)
                        return ret;
        }

        /* Initialize the arrays */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                warn_to[cnt].w_type = QUOTA_NL_NOWARN;
                warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
                warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
        }

        spin_lock(&dq_data_lock);
        spin_lock(&inode->i_lock);
        if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
                spin_unlock(&inode->i_lock);
                spin_unlock(&dq_data_lock);
                return 0;
        }
        cur_space = __inode_get_bytes(inode);
        rsv_space = __inode_get_rsv_space(inode);
        dquots = i_dquot(inode);
        /*
         * Build the transfer_from list, check limits, and update usage in
         * the target structures.
         */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                /*
                 * Skip changes for same uid or gid or for turned off quota-type.
                 */
                if (!transfer_to[cnt])
                        continue;
                /* Avoid races with quotaoff() */
                if (!sb_has_quota_active(inode->i_sb, cnt))
                        continue;
                is_valid[cnt] = 1;
                transfer_from[cnt] = srcu_dereference_check(dquots[cnt],
                                &dquot_srcu, lockdep_is_held(&dq_data_lock));
                ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
                                       &warn_to[cnt]);
                if (ret)
                        goto over_quota;
                ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space,
                                      DQUOT_SPACE_WARN, &warn_to[cnt]);
                if (ret) {
                        spin_lock(&transfer_to[cnt]->dq_dqb_lock);
                        dquot_decr_inodes(transfer_to[cnt], inode_usage);
                        spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
                        goto over_quota;
                }
        }

        /* Decrease usage for source structures and update quota pointers */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!is_valid[cnt])
                        continue;
                /* Due to IO error we might not have transfer_from[] structure */
                if (transfer_from[cnt]) {
                        int wtype;

                        spin_lock(&transfer_from[cnt]->dq_dqb_lock);
                        wtype = info_idq_free(transfer_from[cnt], inode_usage);
                        if (wtype != QUOTA_NL_NOWARN)
                                prepare_warning(&warn_from_inodes[cnt],
                                                transfer_from[cnt], wtype);
                        wtype = info_bdq_free(transfer_from[cnt],
                                              cur_space + rsv_space);
                        if (wtype != QUOTA_NL_NOWARN)
                                prepare_warning(&warn_from_space[cnt],
                                                transfer_from[cnt], wtype);
                        dquot_decr_inodes(transfer_from[cnt], inode_usage);
                        dquot_decr_space(transfer_from[cnt], cur_space);
                        dquot_free_reserved_space(transfer_from[cnt],
                                                  rsv_space);
                        spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
                }
                rcu_assign_pointer(dquots[cnt], transfer_to[cnt]);
        }
        spin_unlock(&inode->i_lock);
        spin_unlock(&dq_data_lock);

        /*
         * These arrays are local and we hold dquot references so we don't need
         * the srcu protection but still take dquot_srcu to avoid warning in
         * mark_all_dquot_dirty().
         */
        index = srcu_read_lock(&dquot_srcu);
        mark_all_dquot_dirty((struct dquot __rcu **)transfer_from);
        mark_all_dquot_dirty((struct dquot __rcu **)transfer_to);
        srcu_read_unlock(&dquot_srcu, index);

        flush_warnings(warn_to);
        flush_warnings(warn_from_inodes);
        flush_warnings(warn_from_space);
        /* Pass back references to put */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (is_valid[cnt])
                        transfer_to[cnt] = transfer_from[cnt];
        return 0;
over_quota:
        /* Back out changes we already did */
        for (cnt--; cnt >= 0; cnt--) {
                if (!is_valid[cnt])
                        continue;
                spin_lock(&transfer_to[cnt]->dq_dqb_lock);
                dquot_decr_inodes(transfer_to[cnt], inode_usage);
                dquot_decr_space(transfer_to[cnt], cur_space);
                dquot_free_reserved_space(transfer_to[cnt], rsv_space);
                spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
        }
        spin_unlock(&inode->i_lock);
        spin_unlock(&dq_data_lock);
        flush_warnings(warn_to);
        return ret;
}
EXPORT_SYMBOL(__dquot_transfer);

/* Wrapper for transferring ownership of an inode for uid/gid only
 * Called from FSXXX_setattr()
 */
int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode,
                   struct iattr *iattr)
{
        struct dquot *transfer_to[MAXQUOTAS] = {};
        struct dquot *dquot;
        struct super_block *sb = inode->i_sb;
        int ret;

        if (!inode_quota_active(inode))
                return 0;

        if (i_uid_needs_update(idmap, iattr, inode)) {
                kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode),
                                          iattr->ia_vfsuid);

                dquot = dqget(sb, make_kqid_uid(kuid));
                if (IS_ERR(dquot)) {
                        if (PTR_ERR(dquot) != -ESRCH) {
                                ret = PTR_ERR(dquot);
                                goto out_put;
                        }
                        dquot = NULL;
                }
                transfer_to[USRQUOTA] = dquot;
        }
        if (i_gid_needs_update(idmap, iattr, inode)) {
                kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode),
                                          iattr->ia_vfsgid);

                dquot = dqget(sb, make_kqid_gid(kgid));
                if (IS_ERR(dquot)) {
                        if (PTR_ERR(dquot) != -ESRCH) {
                                ret = PTR_ERR(dquot);
                                goto out_put;
                        }
                        dquot = NULL;
                }
                transfer_to[GRPQUOTA] = dquot;
        }
        ret = __dquot_transfer(inode, transfer_to);
out_put:
        dqput_all(transfer_to);
        return ret;
}
EXPORT_SYMBOL(dquot_transfer);

/*
 * Write info of quota file to disk
 */
int dquot_commit_info(struct super_block *sb, int type)
{
        struct quota_info *dqopt = sb_dqopt(sb);

        return dqopt->ops[type]->write_file_info(sb, type);
}
EXPORT_SYMBOL(dquot_commit_info);

int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
{
        struct quota_info *dqopt = sb_dqopt(sb);

        if (!sb_has_quota_active(sb, qid->type))
                return -ESRCH;
        if (!dqopt->ops[qid->type]->get_next_id)
                return -ENOSYS;
        return dqopt->ops[qid->type]->get_next_id(sb, qid);
}
EXPORT_SYMBOL(dquot_get_next_id);

/*
 * Definitions of diskquota operations.
 */
const struct dquot_operations dquot_operations = {
        .write_dquot        = dquot_commit,
        .acquire_dquot        = dquot_acquire,
        .release_dquot        = dquot_release,
        .mark_dirty        = dquot_mark_dquot_dirty,
        .write_info        = dquot_commit_info,
        .alloc_dquot        = dquot_alloc,
        .destroy_dquot        = dquot_destroy,
        .get_next_id        = dquot_get_next_id,
};
EXPORT_SYMBOL(dquot_operations);

/*
 * Generic helper for ->open on filesystems supporting disk quotas.
 */
int dquot_file_open(struct inode *inode, struct file *file)
{
        int error;

        error = generic_file_open(inode, file);
        if (!error && (file->f_mode & FMODE_WRITE))
                error = dquot_initialize(inode);
        return error;
}
EXPORT_SYMBOL(dquot_file_open);

static void vfs_cleanup_quota_inode(struct super_block *sb, int type)
{
        struct quota_info *dqopt = sb_dqopt(sb);
        struct inode *inode = dqopt->files[type];

        if (!inode)
                return;
        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
                inode_lock(inode);
                inode->i_flags &= ~S_NOQUOTA;
                inode_unlock(inode);
        }
        dqopt->files[type] = NULL;
        iput(inode);
}

/*
 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
 */
int dquot_disable(struct super_block *sb, int type, unsigned int flags)
{
        int cnt;
        struct quota_info *dqopt = sb_dqopt(sb);

        /* s_umount should be held in exclusive mode */
        if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
                up_read(&sb->s_umount);

        /* Cannot turn off usage accounting without turning off limits, or
         * suspend quotas and simultaneously turn quotas off. */
        if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
            || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
            DQUOT_USAGE_ENABLED)))
                return -EINVAL;

        /*
         * Skip everything if there's nothing to do. We have to do this because
         * sometimes we are called when fill_super() failed and calling
         * sync_fs() in such cases does no good.
         */
        if (!sb_any_quota_loaded(sb))
                return 0;

        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (type != -1 && cnt != type)
                        continue;
                if (!sb_has_quota_loaded(sb, cnt))
                        continue;

                if (flags & DQUOT_SUSPENDED) {
                        spin_lock(&dq_state_lock);
                        dqopt->flags |=
                                dquot_state_flag(DQUOT_SUSPENDED, cnt);
                        spin_unlock(&dq_state_lock);
                } else {
                        spin_lock(&dq_state_lock);
                        dqopt->flags &= ~dquot_state_flag(flags, cnt);
                        /* Turning off suspended quotas? */
                        if (!sb_has_quota_loaded(sb, cnt) &&
                            sb_has_quota_suspended(sb, cnt)) {
                                dqopt->flags &=        ~dquot_state_flag(
                                                        DQUOT_SUSPENDED, cnt);
                                spin_unlock(&dq_state_lock);
                                vfs_cleanup_quota_inode(sb, cnt);
                                continue;
                        }
                        spin_unlock(&dq_state_lock);
                }

                /* We still have to keep quota loaded? */
                if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
                        continue;

                /* Note: these are blocking operations */
                drop_dquot_ref(sb, cnt);
                invalidate_dquots(sb, cnt);
                /*
                 * Now all dquots should be invalidated, all writes done so we
                 * should be only users of the info. No locks needed.
                 */
                if (info_dirty(&dqopt->info[cnt]))
                        sb->dq_op->write_info(sb, cnt);
                if (dqopt->ops[cnt]->free_file_info)
                        dqopt->ops[cnt]->free_file_info(sb, cnt);
                put_quota_format(dqopt->info[cnt].dqi_format);
                dqopt->info[cnt].dqi_flags = 0;
                dqopt->info[cnt].dqi_igrace = 0;
                dqopt->info[cnt].dqi_bgrace = 0;
                dqopt->ops[cnt] = NULL;
        }

        /* Skip syncing and setting flags if quota files are hidden */
        if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
                goto put_inodes;

        /* Sync the superblock so that buffers with quota data are written to
         * disk (and so userspace sees correct data afterwards). */
        if (sb->s_op->sync_fs)
                sb->s_op->sync_fs(sb, 1);
        sync_blockdev(sb->s_bdev);
        /* Now the quota files are just ordinary files and we can set the
         * inode flags back. Moreover we discard the pagecache so that
         * userspace sees the writes we did bypassing the pagecache. We
         * must also discard the blockdev buffers so that we see the
         * changes done by userspace on the next quotaon() */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (!sb_has_quota_loaded(sb, cnt) && dqopt->files[cnt]) {
                        inode_lock(dqopt->files[cnt]);
                        truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
                        inode_unlock(dqopt->files[cnt]);
                }
        if (sb->s_bdev)
                invalidate_bdev(sb->s_bdev);
put_inodes:
        /* We are done when suspending quotas */
        if (flags & DQUOT_SUSPENDED)
                return 0;

        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                if (!sb_has_quota_loaded(sb, cnt))
                        vfs_cleanup_quota_inode(sb, cnt);
        return 0;
}
EXPORT_SYMBOL(dquot_disable);

int dquot_quota_off(struct super_block *sb, int type)
{
        return dquot_disable(sb, type,
                             DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
}
EXPORT_SYMBOL(dquot_quota_off);

/*
 *        Turn quotas on on a device
 */

static int vfs_setup_quota_inode(struct inode *inode, int type)
{
        struct super_block *sb = inode->i_sb;
        struct quota_info *dqopt = sb_dqopt(sb);

        if (is_bad_inode(inode))
                return -EUCLEAN;
        if (!S_ISREG(inode->i_mode))
                return -EACCES;
        if (IS_RDONLY(inode))
                return -EROFS;
        if (sb_has_quota_loaded(sb, type))
                return -EBUSY;

        /*
         * Quota files should never be encrypted.  They should be thought of as
         * filesystem metadata, not user data.  New-style internal quota files
         * cannot be encrypted by users anyway, but old-style external quota
         * files could potentially be incorrectly created in an encrypted
         * directory, hence this explicit check.  Some reasons why encrypted
         * quota files don't work include: (1) some filesystems that support
         * encryption don't handle it in their quota_read and quota_write, and
         * (2) cleaning up encrypted quota files at unmount would need special
         * consideration, as quota files are cleaned up later than user files.
         */
        if (IS_ENCRYPTED(inode))
                return -EINVAL;

        dqopt->files[type] = igrab(inode);
        if (!dqopt->files[type])
                return -EIO;
        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
                /* We don't want quota and atime on quota files (deadlocks
                 * possible) Also nobody should write to the file - we use
                 * special IO operations which ignore the immutable bit. */
                inode_lock(inode);
                inode->i_flags |= S_NOQUOTA;
                inode_unlock(inode);
                /*
                 * When S_NOQUOTA is set, remove dquot references as no more
                 * references can be added
                 */
                __dquot_drop(inode);
        }
        return 0;
}

int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
        unsigned int flags)
{
        struct quota_format_type *fmt = find_quota_format(format_id);
        struct quota_info *dqopt = sb_dqopt(sb);
        int error;

        lockdep_assert_held_write(&sb->s_umount);

        /* Just unsuspend quotas? */
        if (WARN_ON_ONCE(flags & DQUOT_SUSPENDED))
                return -EINVAL;

        if (!fmt)
                return -ESRCH;
        if (!sb->dq_op || !sb->s_qcop ||
            (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
                error = -EINVAL;
                goto out_fmt;
        }
        /* Filesystems outside of init_user_ns not yet supported */
        if (sb->s_user_ns != &init_user_ns) {
                error = -EINVAL;
                goto out_fmt;
        }
        /* Usage always has to be set... */
        if (!(flags & DQUOT_USAGE_ENABLED)) {
                error = -EINVAL;
                goto out_fmt;
        }
        if (sb_has_quota_loaded(sb, type)) {
                error = -EBUSY;
                goto out_fmt;
        }

        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
                /* As we bypass the pagecache we must now flush all the
                 * dirty data and invalidate caches so that kernel sees
                 * changes from userspace. It is not enough to just flush
                 * the quota file since if blocksize < pagesize, invalidation
                 * of the cache could fail because of other unrelated dirty
                 * data */
                sync_filesystem(sb);
                invalidate_bdev(sb->s_bdev);
        }

        error = -EINVAL;
        if (!fmt->qf_ops->check_quota_file(sb, type))
                goto out_fmt;

        dqopt->ops[type] = fmt->qf_ops;
        dqopt->info[type].dqi_format = fmt;
        dqopt->info[type].dqi_fmt_id = format_id;
        INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
        error = dqopt->ops[type]->read_file_info(sb, type);
        if (error < 0)
                goto out_fmt;
        if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) {
                spin_lock(&dq_data_lock);
                dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
                spin_unlock(&dq_data_lock);
        }
        spin_lock(&dq_state_lock);
        dqopt->flags |= dquot_state_flag(flags, type);
        spin_unlock(&dq_state_lock);

        error = add_dquot_ref(sb, type);
        if (error)
                dquot_disable(sb, type,
                              DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);

        return error;
out_fmt:
        put_quota_format(fmt);

        return error;
}
EXPORT_SYMBOL(dquot_load_quota_sb);

/*
 * More powerful function for turning on quotas on given quota inode allowing
 * setting of individual quota flags
 */
int dquot_load_quota_inode(struct inode *inode, int type, int format_id,
        unsigned int flags)
{
        int err;

        err = vfs_setup_quota_inode(inode, type);
        if (err < 0)
                return err;
        err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags);
        if (err < 0)
                vfs_cleanup_quota_inode(inode->i_sb, type);
        return err;
}
EXPORT_SYMBOL(dquot_load_quota_inode);

/* Reenable quotas on remount RW */
int dquot_resume(struct super_block *sb, int type)
{
        struct quota_info *dqopt = sb_dqopt(sb);
        int ret = 0, cnt;
        unsigned int flags;

        /* s_umount should be held in exclusive mode */
        if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
                up_read(&sb->s_umount);

        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (type != -1 && cnt != type)
                        continue;
                if (!sb_has_quota_suspended(sb, cnt))
                        continue;

                spin_lock(&dq_state_lock);
                flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
                                                        DQUOT_LIMITS_ENABLED,
                                                        cnt);
                dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
                spin_unlock(&dq_state_lock);

                flags = dquot_generic_flag(flags, cnt);
                ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id,
                                          flags);
                if (ret < 0)
                        vfs_cleanup_quota_inode(sb, cnt);
        }

        return ret;
}
EXPORT_SYMBOL(dquot_resume);

int dquot_quota_on(struct super_block *sb, int type, int format_id,
                   const struct path *path)
{
        int error = security_quota_on(path->dentry);
        if (error)
                return error;
        /* Quota file not on the same filesystem? */
        if (path->dentry->d_sb != sb)
                error = -EXDEV;
        else
                error = dquot_load_quota_inode(d_inode(path->dentry), type,
                                             format_id, DQUOT_USAGE_ENABLED |
                                             DQUOT_LIMITS_ENABLED);
        return error;
}
EXPORT_SYMBOL(dquot_quota_on);

/*
 * This function is used when filesystem needs to initialize quotas
 * during mount time.
 */
int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
                int format_id, int type)
{
        struct dentry *dentry;
        int error;

        dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name));
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);

        error = security_quota_on(dentry);
        if (!error)
                error = dquot_load_quota_inode(d_inode(dentry), type, format_id,
                                DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);

        dput(dentry);
        return error;
}
EXPORT_SYMBOL(dquot_quota_on_mount);

static int dquot_quota_enable(struct super_block *sb, unsigned int flags)
{
        int ret;
        int type;
        struct quota_info *dqopt = sb_dqopt(sb);

        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE))
                return -ENOSYS;
        /* Accounting cannot be turned on while fs is mounted */
        flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT);
        if (!flags)
                return -EINVAL;
        for (type = 0; type < MAXQUOTAS; type++) {
                if (!(flags & qtype_enforce_flag(type)))
                        continue;
                /* Can't enforce without accounting */
                if (!sb_has_quota_usage_enabled(sb, type)) {
                        ret = -EINVAL;
                        goto out_err;
                }
                if (sb_has_quota_limits_enabled(sb, type)) {
                        ret = -EBUSY;
                        goto out_err;
                }
                spin_lock(&dq_state_lock);
                dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
                spin_unlock(&dq_state_lock);
        }
        return 0;
out_err:
        /* Backout enforcement enablement we already did */
        for (type--; type >= 0; type--)  {
                if (flags & qtype_enforce_flag(type))
                        dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
        }
        /* Error code translation for better compatibility with XFS */
        if (ret == -EBUSY)
                ret = -EEXIST;
        return ret;
}

static int dquot_quota_disable(struct super_block *sb, unsigned int flags)
{
        int ret;
        int type;
        struct quota_info *dqopt = sb_dqopt(sb);

        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE))
                return -ENOSYS;
        /*
         * We don't support turning off accounting via quotactl. In principle
         * quota infrastructure can do this but filesystems don't expect
         * userspace to be able to do it.
         */
        if (flags &
                  (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT))
                return -EOPNOTSUPP;

        /* Filter out limits not enabled */
        for (type = 0; type < MAXQUOTAS; type++)
                if (!sb_has_quota_limits_enabled(sb, type))
                        flags &= ~qtype_enforce_flag(type);
        /* Nothing left? */
        if (!flags)
                return -EEXIST;
        for (type = 0; type < MAXQUOTAS; type++) {
                if (flags & qtype_enforce_flag(type)) {
                        ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
                        if (ret < 0)
                                goto out_err;
                }
        }
        return 0;
out_err:
        /* Backout enforcement disabling we already did */
        for (type--; type >= 0; type--)  {
                if (flags & qtype_enforce_flag(type)) {
                        spin_lock(&dq_state_lock);
                        dqopt->flags |=
                                dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
                        spin_unlock(&dq_state_lock);
                }
        }
        return ret;
}

/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
{
        struct mem_dqblk *dm = &dquot->dq_dqb;

        memset(di, 0, sizeof(*di));
        spin_lock(&dquot->dq_dqb_lock);
        di->d_spc_hardlimit = dm->dqb_bhardlimit;
        di->d_spc_softlimit = dm->dqb_bsoftlimit;
        di->d_ino_hardlimit = dm->dqb_ihardlimit;
        di->d_ino_softlimit = dm->dqb_isoftlimit;
        di->d_space = dm->dqb_curspace + dm->dqb_rsvspace;
        di->d_ino_count = dm->dqb_curinodes;
        di->d_spc_timer = dm->dqb_btime;
        di->d_ino_timer = dm->dqb_itime;
        spin_unlock(&dquot->dq_dqb_lock);
}

int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
                    struct qc_dqblk *di)
{
        struct dquot *dquot;

        dquot = dqget(sb, qid);
        if (IS_ERR(dquot))
                return PTR_ERR(dquot);
        do_get_dqblk(dquot, di);
        dqput(dquot);

        return 0;
}
EXPORT_SYMBOL(dquot_get_dqblk);

int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid,
                         struct qc_dqblk *di)
{
        struct dquot *dquot;
        int err;

        if (!sb->dq_op->get_next_id)
                return -ENOSYS;
        err = sb->dq_op->get_next_id(sb, qid);
        if (err < 0)
                return err;
        dquot = dqget(sb, *qid);
        if (IS_ERR(dquot))
                return PTR_ERR(dquot);
        do_get_dqblk(dquot, di);
        dqput(dquot);

        return 0;
}
EXPORT_SYMBOL(dquot_get_next_dqblk);

#define VFS_QC_MASK \
        (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \
         QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \
         QC_SPC_TIMER | QC_INO_TIMER)

/* Generic routine for setting common part of quota structure */
static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
{
        struct mem_dqblk *dm = &dquot->dq_dqb;
        int check_blim = 0, check_ilim = 0;
        struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];

        if (di->d_fieldmask & ~VFS_QC_MASK)
                return -EINVAL;

        if (((di->d_fieldmask & QC_SPC_SOFT) &&
             di->d_spc_softlimit > dqi->dqi_max_spc_limit) ||
            ((di->d_fieldmask & QC_SPC_HARD) &&
             di->d_spc_hardlimit > dqi->dqi_max_spc_limit) ||
            ((di->d_fieldmask & QC_INO_SOFT) &&
             (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) ||
            ((di->d_fieldmask & QC_INO_HARD) &&
             (di->d_ino_hardlimit > dqi->dqi_max_ino_limit)))
                return -ERANGE;

        spin_lock(&dquot->dq_dqb_lock);
        if (di->d_fieldmask & QC_SPACE) {
                dm->dqb_curspace = di->d_space - dm->dqb_rsvspace;
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
        }

        if (di->d_fieldmask & QC_SPC_SOFT)
                dm->dqb_bsoftlimit = di->d_spc_softlimit;
        if (di->d_fieldmask & QC_SPC_HARD)
                dm->dqb_bhardlimit = di->d_spc_hardlimit;
        if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) {
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
        }

        if (di->d_fieldmask & QC_INO_COUNT) {
                dm->dqb_curinodes = di->d_ino_count;
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
        }

        if (di->d_fieldmask & QC_INO_SOFT)
                dm->dqb_isoftlimit = di->d_ino_softlimit;
        if (di->d_fieldmask & QC_INO_HARD)
                dm->dqb_ihardlimit = di->d_ino_hardlimit;
        if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) {
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
        }

        if (di->d_fieldmask & QC_SPC_TIMER) {
                dm->dqb_btime = di->d_spc_timer;
                check_blim = 1;
                set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
        }

        if (di->d_fieldmask & QC_INO_TIMER) {
                dm->dqb_itime = di->d_ino_timer;
                check_ilim = 1;
                set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
        }

        if (check_blim) {
                if (!dm->dqb_bsoftlimit ||
                    dm->dqb_curspace + dm->dqb_rsvspace <= dm->dqb_bsoftlimit) {
                        dm->dqb_btime = 0;
                        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
                } else if (!(di->d_fieldmask & QC_SPC_TIMER))
                        /* Set grace only if user hasn't provided his own... */
                        dm->dqb_btime = ktime_get_real_seconds() + dqi->dqi_bgrace;
        }
        if (check_ilim) {
                if (!dm->dqb_isoftlimit ||
                    dm->dqb_curinodes <= dm->dqb_isoftlimit) {
                        dm->dqb_itime = 0;
                        clear_bit(DQ_INODES_B, &dquot->dq_flags);
                } else if (!(di->d_fieldmask & QC_INO_TIMER))
                        /* Set grace only if user hasn't provided his own... */
                        dm->dqb_itime = ktime_get_real_seconds() + dqi->dqi_igrace;
        }
        if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit ||
            dm->dqb_isoftlimit)
                clear_bit(DQ_FAKE_B, &dquot->dq_flags);
        else
                set_bit(DQ_FAKE_B, &dquot->dq_flags);
        spin_unlock(&dquot->dq_dqb_lock);
        mark_dquot_dirty(dquot);

        return 0;
}

int dquot_set_dqblk(struct super_block *sb, struct kqid qid,
                  struct qc_dqblk *di)
{
        struct dquot *dquot;
        int rc;

        dquot = dqget(sb, qid);
        if (IS_ERR(dquot)) {
                rc = PTR_ERR(dquot);
                goto out;
        }
        rc = do_set_dqblk(dquot, di);
        dqput(dquot);
out:
        return rc;
}
EXPORT_SYMBOL(dquot_set_dqblk);

/* Generic routine for getting common part of quota file information */
int dquot_get_state(struct super_block *sb, struct qc_state *state)
{
        struct mem_dqinfo *mi;
        struct qc_type_state *tstate;
        struct quota_info *dqopt = sb_dqopt(sb);
        int type;

        memset(state, 0, sizeof(*state));
        for (type = 0; type < MAXQUOTAS; type++) {
                if (!sb_has_quota_active(sb, type))
                        continue;
                tstate = state->s_state + type;
                mi = sb_dqopt(sb)->info + type;
                tstate->flags = QCI_ACCT_ENABLED;
                spin_lock(&dq_data_lock);
                if (mi->dqi_flags & DQF_SYS_FILE)
                        tstate->flags |= QCI_SYSFILE;
                if (mi->dqi_flags & DQF_ROOT_SQUASH)
                        tstate->flags |= QCI_ROOT_SQUASH;
                if (sb_has_quota_limits_enabled(sb, type))
                        tstate->flags |= QCI_LIMITS_ENFORCED;
                tstate->spc_timelimit = mi->dqi_bgrace;
                tstate->ino_timelimit = mi->dqi_igrace;
                if (dqopt->files[type]) {
                        tstate->ino = dqopt->files[type]->i_ino;
                        tstate->blocks = dqopt->files[type]->i_blocks;
                }
                tstate->nextents = 1;        /* We don't know... */
                spin_unlock(&dq_data_lock);
        }
        return 0;
}
EXPORT_SYMBOL(dquot_get_state);

/* Generic routine for setting common part of quota file information */
int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
{
        struct mem_dqinfo *mi;

        if ((ii->i_fieldmask & QC_WARNS_MASK) ||
            (ii->i_fieldmask & QC_RT_SPC_TIMER))
                return -EINVAL;
        if (!sb_has_quota_active(sb, type))
                return -ESRCH;
        mi = sb_dqopt(sb)->info + type;
        if (ii->i_fieldmask & QC_FLAGS) {
                if ((ii->i_flags & QCI_ROOT_SQUASH &&
                     mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD))
                        return -EINVAL;
        }
        spin_lock(&dq_data_lock);
        if (ii->i_fieldmask & QC_SPC_TIMER)
                mi->dqi_bgrace = ii->i_spc_timelimit;
        if (ii->i_fieldmask & QC_INO_TIMER)
                mi->dqi_igrace = ii->i_ino_timelimit;
        if (ii->i_fieldmask & QC_FLAGS) {
                if (ii->i_flags & QCI_ROOT_SQUASH)
                        mi->dqi_flags |= DQF_ROOT_SQUASH;
                else
                        mi->dqi_flags &= ~DQF_ROOT_SQUASH;
        }
        spin_unlock(&dq_data_lock);
        mark_info_dirty(sb, type);
        /* Force write to disk */
        return sb->dq_op->write_info(sb, type);
}
EXPORT_SYMBOL(dquot_set_dqinfo);

const struct quotactl_ops dquot_quotactl_sysfile_ops = {
        .quota_enable        = dquot_quota_enable,
        .quota_disable        = dquot_quota_disable,
        .quota_sync        = dquot_quota_sync,
        .get_state        = dquot_get_state,
        .set_info        = dquot_set_dqinfo,
        .get_dqblk        = dquot_get_dqblk,
        .get_nextdqblk        = dquot_get_next_dqblk,
        .set_dqblk        = dquot_set_dqblk
};
EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);

static int do_proc_dqstats(struct ctl_table *table, int write,
                     void *buffer, size_t *lenp, loff_t *ppos)
{
        unsigned int type = (unsigned long *)table->data - dqstats.stat;
        s64 value = percpu_counter_sum(&dqstats.counter[type]);

        /* Filter negative values for non-monotonic counters */
        if (value < 0 && (type == DQST_ALLOC_DQUOTS ||
                          type == DQST_FREE_DQUOTS))
                value = 0;

        /* Update global table */
        dqstats.stat[type] = value;
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}

static struct ctl_table fs_dqstats_table[] = {
        {
                .procname        = "lookups",
                .data                = &dqstats.stat[DQST_LOOKUPS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "drops",
                .data                = &dqstats.stat[DQST_DROPS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "reads",
                .data                = &dqstats.stat[DQST_READS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "writes",
                .data                = &dqstats.stat[DQST_WRITES],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "cache_hits",
                .data                = &dqstats.stat[DQST_CACHE_HITS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "allocated_dquots",
                .data                = &dqstats.stat[DQST_ALLOC_DQUOTS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "free_dquots",
                .data                = &dqstats.stat[DQST_FREE_DQUOTS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
        {
                .procname        = "syncs",
                .data                = &dqstats.stat[DQST_SYNCS],
                .maxlen                = sizeof(unsigned long),
                .mode                = 0444,
                .proc_handler        = do_proc_dqstats,
        },
#ifdef CONFIG_PRINT_QUOTA_WARNING
        {
                .procname        = "warnings",
                .data                = &flag_print_warnings,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#endif
};

static int __init dquot_init(void)
{
        int i, ret;
        unsigned long nr_hash, order;
        struct shrinker *dqcache_shrinker;

        printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);

        register_sysctl_init("fs/quota", fs_dqstats_table);

        dquot_cachep = kmem_cache_create("dquot",
                        sizeof(struct dquot), sizeof(unsigned long) * 4,
                        (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
                                SLAB_PANIC),
                        NULL);

        order = 0;
        dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order);
        if (!dquot_hash)
                panic("Cannot create dquot hash table");

        for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
                ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL);
                if (ret)
                        panic("Cannot create dquot stat counters");
        }

        /* Find power-of-two hlist_heads which can fit into allocation */
        nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
        dq_hash_bits = ilog2(nr_hash);

        nr_hash = 1UL << dq_hash_bits;
        dq_hash_mask = nr_hash - 1;
        for (i = 0; i < nr_hash; i++)
                INIT_HLIST_HEAD(dquot_hash + i);

        pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
                " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));

        dqcache_shrinker = shrinker_alloc(0, "dquota-cache");
        if (!dqcache_shrinker)
                panic("Cannot allocate dquot shrinker");

        dqcache_shrinker->count_objects = dqcache_shrink_count;
        dqcache_shrinker->scan_objects = dqcache_shrink_scan;

        shrinker_register(dqcache_shrinker);

        return 0;
}
fs_initcall(dquot_init);



































    2 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_CORE_H
#define __IEEE802154_CORE_H

#include <net/cfg802154.h>

struct cfg802154_registered_device {
        const struct cfg802154_ops *ops;
        struct list_head list;

        /* wpan_phy index, internal only */
        int wpan_phy_idx;

        /* also protected by devlist_mtx */
        int opencount;
        wait_queue_head_t dev_wait;

        /* protected by RTNL only */
        int num_running_ifaces;

        /* associated wpan interfaces, protected by rtnl or RCU */
        struct list_head wpan_dev_list;
        int devlist_generation, wpan_dev_id;

        /* must be last because of the way we do wpan_phy_priv(),
         * and it should at least be aligned to NETDEV_ALIGN
         */
        struct wpan_phy wpan_phy __aligned(NETDEV_ALIGN);
};

static inline struct cfg802154_registered_device *
wpan_phy_to_rdev(struct wpan_phy *wpan_phy)
{
        BUG_ON(!wpan_phy);
        return container_of(wpan_phy, struct cfg802154_registered_device,
                            wpan_phy);
}

extern struct list_head cfg802154_rdev_list;
extern int cfg802154_rdev_list_generation;

int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
                           struct net *net);
/* free object */
void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
struct cfg802154_registered_device *
cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx);
struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx);

#endif /* __IEEE802154_CORE_H */



































































    3 

    3 
    3 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * kernel/workqueue_internal.h
 *
 * Workqueue internal header file.  Only to be included by workqueue and
 * core kernel subsystems.
 */
#ifndef _KERNEL_WORKQUEUE_INTERNAL_H
#define _KERNEL_WORKQUEUE_INTERNAL_H

#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/preempt.h>

struct worker_pool;

/*
 * The poor guys doing the actual heavy lifting.  All on-duty workers are
 * either serving the manager role, on idle list or on busy hash.  For
 * details on the locking annotation (L, I, X...), refer to workqueue.c.
 *
 * Only to be used in workqueue and async.
 */
struct worker {
        /* on idle list while idle, on busy hash table while busy */
        union {
                struct list_head        entry;        /* L: while idle */
                struct hlist_node        hentry;        /* L: while busy */
        };

        struct work_struct        *current_work;        /* K: work being processed and its */
        work_func_t                current_func;        /* K: function */
        struct pool_workqueue        *current_pwq;        /* K: pwq */
        u64                        current_at;        /* K: runtime at start or last wakeup */
        unsigned int                current_color;        /* K: color */

        int                        sleeping;        /* S: is worker sleeping? */

        /* used by the scheduler to determine a worker's last known identity */
        work_func_t                last_func;        /* K: last work's fn */

        struct list_head        scheduled;        /* L: scheduled works */

        struct task_struct        *task;                /* I: worker task */
        struct worker_pool        *pool;                /* A: the associated pool */
                                                /* L: for rescuers */
        struct list_head        node;                /* A: anchored at pool->workers */
                                                /* A: runs through worker->node */

        unsigned long                last_active;        /* K: last active timestamp */
        unsigned int                flags;                /* L: flags */
        int                        id;                /* I: worker id */

        /*
         * Opaque string set with work_set_desc().  Printed out with task
         * dump for debugging - WARN, BUG, panic or sysrq.
         */
        char                        desc[WORKER_DESC_LEN];

        /* used only by rescuers to point to the target workqueue */
        struct workqueue_struct        *rescue_wq;        /* I: the workqueue to rescue */
};

/**
 * current_wq_worker - return struct worker if %current is a workqueue worker
 */
static inline struct worker *current_wq_worker(void)
{
        if (in_task() && (current->flags & PF_WQ_WORKER))
                return kthread_data(current);
        return NULL;
}

/*
 * Scheduler hooks for concurrency managed workqueue.  Only to be used from
 * sched/ and workqueue.c.
 */
void wq_worker_running(struct task_struct *task);
void wq_worker_sleeping(struct task_struct *task);
void wq_worker_tick(struct task_struct *task);
work_func_t wq_worker_last_func(struct task_struct *task);

#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */





































































































































































































































   13 
































   11 











   14 


























































































































   14 



   14 





   14 
   14 
   14 





































































































































    1 










































    1 
    1 


    1 


    1 
    1 






    1 








































   14 


   14 










































































































































































































































































































































































































































































































































































































































































   14 
   14 
   14 

   14 




















































































































































































   15 
   15 



    5 














   15 


    5 









    5 

    5 


   15 















   15 
   15 
   11 

   15 

    4 
    5 


    4 

   14 






































































































































































































































































































































































































   14 








   14 


   14 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 





    4 




    4 
    4 














    5 
    5 
    4 

    4 

    5 































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 




    4 

    4 




















    4 
    4 




    4 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   13 
   13 




















   14 

   14 


   13 
   13 










   13 


   13 










































































































    4 
    4 
    4 














    4 












    4 



    4 











































































































































































































































































































































    1 

    1 

    1 







































































































































































































    1 

    1 




    1 

    1 


    1 
    1 


    1 
    1 







































































































































































































































































































































   11 

   11 




















































   11 

   11 

































































































































































































































































   14 













































































































































































































































































































   13 

   14 


   14 


































































































































































































































































































































































































































































































































































   13 

   13 





   13 
















   13 

   13 









   13 

   13 






















































































































































































































































































































































































































































    8 










































   14 
















   14 




























































































































































































































































































































































































   14 



   14 













   14 




   14 

















   14 












   14 
















































































































   14 





   14 



   14 
















   14 

   14 



























































   13 
   13 
   13 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 

   14 

















































   14 



    1 

   13 
    2 
   11 
    7 

    4 



   14 












   14 















    8 




    8 


























    8 






































































































































































































































































   16 

   15 

    1 


















   15 


   15 

   15 

   15 
   15 








   14 



   15 

   15 


   15 




   15 







   15 




   15 
   15 
   15 



   14 


   14 

   13 

   13 



   13 
   13 
   13 
   13 










   11 






   11 



   11 






   11 
   11 


   11 
   11 


   11 



























   15 
















   13 



































































































































































































































































































































    8 
    8 












    8 









































    8 















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600
9601
9602
9603
9604
9605
9606
9607
9608
9609
9610
9611
9612
9613
9614
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
9767
9768
9769
9770
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868
9869
9870
9871
9872
9873
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
9893
9894
9895
9896
9897
9898
9899
9900
9901
9902
9903
9904
9905
9906
9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938
9939
9940
9941
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
10028
10029
10030
10031
10032
10033
10034
10035
10036
10037
10038
10039
10040
10041
10042
10043
10044
10045
10046
10047
10048
10049
10050
10051
10052
10053
10054
10055
10056
10057
10058
10059
10060
10061
10062
10063
10064
10065
10066
10067
10068
10069
10070
10071
10072
10073
10074
10075
10076
10077
10078
10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10193
10194
10195
10196
10197
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
10250
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271
10272
10273
10274
10275
10276
10277
10278
10279
10280
10281
10282
10283
10284
10285
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
10336
10337
10338
10339
10340
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359
10360
10361
10362
10363
10364
10365
10366
10367
10368
10369
10370
10371
10372
10373
10374
10375
10376
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388
10389
10390
10391
10392
10393
10394
10395
10396
10397
10398
10399
10400
10401
10402
10403
10404
10405
10406
10407
10408
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419
10420
10421
10422
10423
10424
10425
10426
10427
10428
10429
10430
10431
10432
10433
10434
10435
10436
10437
10438
10439
10440
10441
10442
10443
10444
10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457
10458
10459
10460
10461
10462
10463
10464
10465
10466
10467
10468
10469
10470
10471
10472
10473
10474
10475
10476
10477
10478
10479
10480
10481
10482
10483
10484
10485
10486
10487
10488
10489
10490
10491
10492
10493
10494
10495
10496
10497
10498
10499
10500
10501
10502
10503
10504
10505
10506
10507
10508
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528
10529
10530
10531
10532
10533
10534
10535
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556
10557
10558
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632
10633
10634
10635
10636
10637
10638
10639
10640
10641
10642
10643
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670
10671
10672
10673
10674
10675
10676
10677
10678
10679
10680
10681
10682
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764
10765
10766
10767
10768
10769
10770
10771
10772
10773
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783
10784
10785
10786
10787
10788
10789
10790
10791
10792
10793
10794
10795
10796
10797
10798
10799
10800
10801
10802
10803
10804
10805
10806
10807
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831
10832
10833
10834
10835
10836
10837
10838
10839
10840
10841
10842
10843
10844
10845
10846
10847
10848
10849
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
10860
10861
10862
10863
10864
10865
10866
10867
10868
10869
10870
10871
10872
10873
10874
10875
10876
10877
10878
10879
10880
10881
10882
10883
10884
10885
10886
10887
10888
10889
10890
10891
10892
10893
10894
10895
10896
10897
10898
10899
10900
10901
10902
10903
10904
10905
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916
10917
10918
10919
10920
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940
10941
10942
10943
10944
10945
10946
10947
10948
10949
10950
10951
10952
10953
10954
10955
10956
10957
10958
10959
10960
10961
10962
10963
10964
10965
10966
10967
10968
10969
10970
10971
10972
10973
10974
10975
10976
10977
10978
10979
10980
10981
10982
10983
10984
10985
10986
10987
10988
10989
10990
10991
10992
10993
10994
10995
10996
10997
10998
10999
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009
11010
11011
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021
11022
11023
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035
11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057
11058
11059
11060
11061
11062
11063
11064
11065
11066
11067
11068
11069
11070
11071
11072
11073
11074
11075
11076
11077
11078
11079
11080
11081
11082
11083
11084
11085
11086
11087
11088
11089
11090
11091
11092
11093
11094
11095
11096
11097
11098
11099
11100
11101
11102
11103
11104
11105
11106
11107
11108
11109
11110
11111
11112
11113
11114
11115
11116
11117
11118
11119
11120
11121
11122
11123
11124
11125
11126
11127
11128
11129
11130
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
11141
11142
11143
11144
11145
11146
11147
11148
11149
11150
11151
11152
11153
11154
11155
11156
11157
11158
11159
11160
11161
11162
11163
11164
11165
11166
11167
11168
11169
11170
11171
11172
11173
11174
11175
11176
11177
11178
11179
11180
11181
11182
11183
11184
11185
11186
11187
11188
11189
11190
11191
11192
11193
11194
11195
11196
11197
11198
11199
11200
11201
11202
11203
11204
11205
11206
11207
11208
11209
11210
11211
11212
11213
11214
11215
11216
11217
11218
11219
11220
11221
11222
11223
11224
11225
11226
11227
11228
11229
11230
11231
11232
11233
11234
11235
11236
11237
11238
11239
11240
11241
11242
11243
11244
11245
11246
11247
11248
11249
11250
11251
11252
11253
11254
11255
11256
11257
11258
11259
11260
11261
11262
11263
11264
11265
11266
11267
11268
11269
11270
11271
11272
11273
11274
11275
11276
11277
11278
11279
11280
11281
11282
11283
11284
11285
11286
11287
11288
11289
11290
11291
11292
11293
11294
11295
11296
11297
11298
11299
11300
11301
11302
11303
11304
11305
11306
11307
11308
11309
11310
11311
11312
11313
11314
11315
11316
11317
11318
11319
11320
11321
11322
11323
11324
11325
11326
11327
11328
11329
11330
11331
11332
11333
11334
11335
11336
11337
11338
11339
11340
11341
11342
11343
11344
11345
11346
11347
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358
11359
11360
11361
11362
11363
11364
11365
11366
11367
11368
11369
11370
11371
11372
11373
11374
11375
11376
11377
11378
11379
11380
11381
11382
11383
11384
11385
11386
11387
11388
11389
11390
11391
11392
11393
11394
11395
11396
11397
11398
11399
11400
11401
11402
11403
11404
11405
11406
11407
11408
11409
11410
11411
11412
11413
11414
11415
11416
11417
11418
11419
11420
11421
11422
11423
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434
11435
11436
11437
11438
11439
11440
11441
11442
11443
11444
11445
11446
11447
11448
11449
11450
11451
11452
11453
11454
11455
11456
11457
11458
11459
11460
11461
11462
11463
11464
11465
11466
11467
11468
11469
11470
11471
11472
11473
11474
11475
11476
11477
11478
11479
11480
11481
11482
11483
11484
11485
11486
11487
11488
11489
11490
11491
11492
11493
11494
11495
11496
11497
11498
11499
11500
11501
11502
11503
11504
11505
11506
11507
11508
11509
11510
11511
11512
11513
11514
11515
11516
11517
11518
11519
11520
11521
11522
11523
11524
11525
11526
11527
11528
11529
11530
11531
11532
11533
11534
11535
11536
11537
11538
11539
11540
11541
11542
11543
11544
11545
11546
11547
11548
11549
11550
11551
11552
11553
11554
11555
11556
11557
11558
11559
11560
11561
11562
11563
11564
11565
11566
11567
11568
11569
11570
11571
11572
11573
11574
11575
11576
11577
11578
11579
11580
11581
11582
11583
11584
11585
11586
11587
11588
11589
11590
11591
11592
11593
11594
11595
11596
11597
11598
11599
11600
11601
11602
11603
11604
11605
11606
11607
11608
11609
11610
11611
11612
11613
11614
11615
11616
11617
11618
11619
11620
11621
11622
11623
11624
11625
11626
11627
11628
11629
11630
11631
11632
11633
11634
11635
11636
11637
11638
11639
11640
11641
11642
11643
11644
11645
11646
11647
11648
11649
11650
11651
11652
11653
11654
11655
11656
11657
11658
11659
11660
11661
11662
11663
11664
11665
11666
11667
11668
11669
11670
11671
11672
11673
11674
11675
11676
11677
11678
11679
11680
11681
11682
11683
11684
11685
11686
11687
11688
11689
11690
11691
11692
11693
11694
11695
11696
11697
11698
11699
11700
11701
11702
11703
11704
11705
11706
11707
11708
11709
11710
11711
11712
11713
11714
11715
11716
11717
11718
11719
11720
11721
11722
11723
11724
11725
11726
11727
11728
11729
11730
11731
11732
11733
11734
11735
11736
11737
11738
11739
11740
11741
11742
11743
11744
11745
11746
11747
11748
11749
11750
11751
11752
11753
11754
11755
11756
11757
11758
11759
11760
11761
11762
11763
11764
11765
11766
11767
11768
11769
11770
11771
11772
11773
11774
11775
11776
11777
11778
11779
11780
11781
11782
11783
11784
11785
11786
11787
11788
11789
11790
11791
11792
11793
11794
11795
11796
11797
11798
11799
11800
11801
11802
11803
11804
11805
11806
11807
11808
11809
11810
11811
11812
11813
11814
11815
11816
11817
11818
11819
11820
11821
11822
11823
11824
11825
11826
11827
11828
11829
11830
11831
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *      NET3    Protocol independent device support routines.
 *
 *        Derived from the non IP parts of dev.c 1.0.19
 *              Authors:        Ross Biro
 *                                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                                Mark Evans, <evansmp@uhura.aston.ac.uk>
 *
 *        Additional Authors:
 *                Florian la Roche <rzsfl@rz.uni-sb.de>
 *                Alan Cox <gw4pts@gw4pts.ampr.org>
 *                David Hinds <dahinds@users.sourceforge.net>
 *                Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 *                Adam Sulmicki <adam@cfar.umd.edu>
 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 *
 *        Changes:
 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
 *                                      to 2 if register_netdev gets called
 *                                      before net_dev_init & also removed a
 *                                      few lines of code in the process.
 *                Alan Cox        :        device private ioctl copies fields back.
 *                Alan Cox        :        Transmit queue code does relevant
 *                                        stunts to keep the queue safe.
 *                Alan Cox        :        Fixed double lock.
 *                Alan Cox        :        Fixed promisc NULL pointer trap
 *                ????????        :        Support the full private ioctl range
 *                Alan Cox        :        Moved ioctl permission check into
 *                                        drivers
 *                Tim Kordas        :        SIOCADDMULTI/SIOCDELMULTI
 *                Alan Cox        :        100 backlog just doesn't cut it when
 *                                        you start doing multicast video 8)
 *                Alan Cox        :        Rewrote net_bh and list manager.
 *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
 *                Alan Cox        :        Took out transmit every packet pass
 *                                        Saved a few bytes in the ioctl handler
 *                Alan Cox        :        Network driver sets packet type before
 *                                        calling netif_rx. Saves a function
 *                                        call a packet.
 *                Alan Cox        :        Hashed net_bh()
 *                Richard Kooijman:        Timestamp fixes.
 *                Alan Cox        :        Wrong field in SIOCGIFDSTADDR
 *                Alan Cox        :        Device lock protection.
 *              Alan Cox        :       Fixed nasty side effect of device close
 *                                        changes.
 *                Rudi Cilibrasi        :        Pass the right thing to
 *                                        set_mac_address()
 *                Dave Miller        :        32bit quantity for the device lock to
 *                                        make it work out on a Sparc.
 *                Bjorn Ekwall        :        Added KERNELD hack.
 *                Alan Cox        :        Cleaned up the backlog initialise.
 *                Craig Metz        :        SIOCGIFCONF fix if space for under
 *                                        1 device.
 *            Thomas Bogendoerfer :        Return ENODEV for dev_open, if there
 *                                        is no device open function.
 *                Andi Kleen        :        Fix error reporting for SIOCGIFCONF
 *            Michael Chastain        :        Fix signed/unsigned for SIOCGIFCONF
 *                Cyrus Durgin        :        Cleaned for KMOD
 *                Adam Sulmicki   :        Bug Fix : Network Device Unload
 *                                        A network device unload needs to purge
 *                                        the backlog queue.
 *        Paul Rusty Russell        :        SIOCSIFNAME
 *              Pekka Riikonen  :        Netdev boot-time settings code
 *              Andrew Morton   :       Make unregister_netdevice wait
 *                                      indefinitely on dev->refcnt
 *              J Hadi Salim    :       - Backlog queue sampling
 *                                        - netif_rx() feedback
 */

#include <linux/uaccess.h>
#include <linux/bitmap.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/gro.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
#include <linux/dmaengine.h>
#include <linux/err.h>
#include <linux/ctype.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/mpls.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <trace/events/qdisc.h>
#include <trace/events/xdp.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
#include <linux/hashtable.h>
#include <linux/vmalloc.h>
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_netdev.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <net/devlink.h>
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/rps.h>

#include "dev.h"
#include "net-sysfs.h"

static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
                                           struct net_device *dev,
                                           struct netlink_ext_ack *extack);

static DEFINE_MUTEX(ifalias_mutex);

/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);

static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);

static DECLARE_RWSEM(devnet_rename_sem);

static inline void dev_base_seq_inc(struct net *net)
{
        unsigned int val = net->dev_base_seq + 1;

        WRITE_ONCE(net->dev_base_seq, val ?: 1);
}

static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
        unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));

        return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
}

static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
{
        return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}

static inline void rps_lock_irqsave(struct softnet_data *sd,
                                    unsigned long *flags)
{
        if (IS_ENABLED(CONFIG_RPS))
                spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
        else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_irq_save(*flags);
}

static inline void rps_lock_irq_disable(struct softnet_data *sd)
{
        if (IS_ENABLED(CONFIG_RPS))
                spin_lock_irq(&sd->input_pkt_queue.lock);
        else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_irq_disable();
}

static inline void rps_unlock_irq_restore(struct softnet_data *sd,
                                          unsigned long *flags)
{
        if (IS_ENABLED(CONFIG_RPS))
                spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
        else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_irq_restore(*flags);
}

static inline void rps_unlock_irq_enable(struct softnet_data *sd)
{
        if (IS_ENABLED(CONFIG_RPS))
                spin_unlock_irq(&sd->input_pkt_queue.lock);
        else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_irq_enable();
}

static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
                                                       const char *name)
{
        struct netdev_name_node *name_node;

        name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
        if (!name_node)
                return NULL;
        INIT_HLIST_NODE(&name_node->hlist);
        name_node->dev = dev;
        name_node->name = name;
        return name_node;
}

static struct netdev_name_node *
netdev_name_node_head_alloc(struct net_device *dev)
{
        struct netdev_name_node *name_node;

        name_node = netdev_name_node_alloc(dev, dev->name);
        if (!name_node)
                return NULL;
        INIT_LIST_HEAD(&name_node->list);
        return name_node;
}

static void netdev_name_node_free(struct netdev_name_node *name_node)
{
        kfree(name_node);
}

static void netdev_name_node_add(struct net *net,
                                 struct netdev_name_node *name_node)
{
        hlist_add_head_rcu(&name_node->hlist,
                           dev_name_hash(net, name_node->name));
}

static void netdev_name_node_del(struct netdev_name_node *name_node)
{
        hlist_del_rcu(&name_node->hlist);
}

static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
                                                        const char *name)
{
        struct hlist_head *head = dev_name_hash(net, name);
        struct netdev_name_node *name_node;

        hlist_for_each_entry(name_node, head, hlist)
                if (!strcmp(name_node->name, name))
                        return name_node;
        return NULL;
}

static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
                                                            const char *name)
{
        struct hlist_head *head = dev_name_hash(net, name);
        struct netdev_name_node *name_node;

        hlist_for_each_entry_rcu(name_node, head, hlist)
                if (!strcmp(name_node->name, name))
                        return name_node;
        return NULL;
}

bool netdev_name_in_use(struct net *net, const char *name)
{
        return netdev_name_node_lookup(net, name);
}
EXPORT_SYMBOL(netdev_name_in_use);

int netdev_name_node_alt_create(struct net_device *dev, const char *name)
{
        struct netdev_name_node *name_node;
        struct net *net = dev_net(dev);

        name_node = netdev_name_node_lookup(net, name);
        if (name_node)
                return -EEXIST;
        name_node = netdev_name_node_alloc(dev, name);
        if (!name_node)
                return -ENOMEM;
        netdev_name_node_add(net, name_node);
        /* The node that holds dev->name acts as a head of per-device list. */
        list_add_tail_rcu(&name_node->list, &dev->name_node->list);

        return 0;
}

static void netdev_name_node_alt_free(struct rcu_head *head)
{
        struct netdev_name_node *name_node =
                container_of(head, struct netdev_name_node, rcu);

        kfree(name_node->name);
        netdev_name_node_free(name_node);
}

static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
        netdev_name_node_del(name_node);
        list_del(&name_node->list);
        call_rcu(&name_node->rcu, netdev_name_node_alt_free);
}

int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
{
        struct netdev_name_node *name_node;
        struct net *net = dev_net(dev);

        name_node = netdev_name_node_lookup(net, name);
        if (!name_node)
                return -ENOENT;
        /* lookup might have found our primary name or a name belonging
         * to another device.
         */
        if (name_node == dev->name_node || name_node->dev != dev)
                return -EINVAL;

        __netdev_name_node_alt_destroy(name_node);
        return 0;
}

static void netdev_name_node_alt_flush(struct net_device *dev)
{
        struct netdev_name_node *name_node, *tmp;

        list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) {
                list_del(&name_node->list);
                netdev_name_node_alt_free(&name_node->rcu);
        }
}

/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
        struct netdev_name_node *name_node;
        struct net *net = dev_net(dev);

        ASSERT_RTNL();

        list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
        netdev_name_node_add(net, dev->name_node);
        hlist_add_head_rcu(&dev->index_hlist,
                           dev_index_hash(net, dev->ifindex));

        netdev_for_each_altname(dev, name_node)
                netdev_name_node_add(net, name_node);

        /* We reserved the ifindex, this can't fail */
        WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));

        dev_base_seq_inc(net);
}

/* Device list removal
 * caller must respect a RCU grace period before freeing/reusing dev
 */
static void unlist_netdevice(struct net_device *dev)
{
        struct netdev_name_node *name_node;
        struct net *net = dev_net(dev);

        ASSERT_RTNL();

        xa_erase(&net->dev_by_index, dev->ifindex);

        netdev_for_each_altname(dev, name_node)
                netdev_name_node_del(name_node);

        /* Unlink dev from the device chain */
        list_del_rcu(&dev->dev_list);
        netdev_name_node_del(dev->name_node);
        hlist_del_rcu(&dev->index_hlist);

        dev_base_seq_inc(dev_net(dev));
}

/*
 *        Our notifier list
 */

static RAW_NOTIFIER_HEAD(netdev_chain);

/*
 *        Device drivers call our routines to queue packets here. We empty the
 *        queue in the local softnet handler.
 */

DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);

/* Page_pool has a lockless array/stack to alloc/recycle pages.
 * PP consumers must pay attention to run APIs in the appropriate context
 * (e.g. NAPI context).
 */
static DEFINE_PER_CPU(struct page_pool *, system_page_pool);

#ifdef CONFIG_LOCKDEP
/*
 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 * according to dev->type
 */
static const unsigned short netdev_lock_type[] = {
         ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
         ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
         ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
         ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
         ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
         ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
         ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
         ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
         ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
         ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
         ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
         ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
         ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
         ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
         ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};

static const char *const netdev_lock_name[] = {
        "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
        "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
        "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
        "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
        "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
        "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
        "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
        "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
        "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
        "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
        "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
        "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
        "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
        "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
        "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};

static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];

static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
                if (netdev_lock_type[i] == dev_type)
                        return i;
        /* the last key is used by default */
        return ARRAY_SIZE(netdev_lock_type) - 1;
}

static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
                                                 unsigned short dev_type)
{
        int i;

        i = netdev_lock_pos(dev_type);
        lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
                                   netdev_lock_name[i]);
}

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
        int i;

        i = netdev_lock_pos(dev->type);
        lockdep_set_class_and_name(&dev->addr_list_lock,
                                   &netdev_addr_lock_key[i],
                                   netdev_lock_name[i]);
}
#else
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
                                                 unsigned short dev_type)
{
}

static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
{
}
#endif

/*******************************************************************************
 *
 *                Protocol management and registration routines
 *
 *******************************************************************************/


/*
 *        Add a protocol ID to the list. Now that the input handler is
 *        smarter we can dispense with all the messy stuff that used to be
 *        here.
 *
 *        BEWARE!!! Protocol handlers, mangling input packets,
 *        MUST BE last in hash buckets and checking protocol handlers
 *        MUST start from promiscuous ptype_all chain in net_bh.
 *        It is true now, do not change it.
 *        Explanation follows: if protocol handler, mangling packet, will
 *        be the first on list, it is not able to sense, that packet
 *        is cloned and should be copied-on-write, so that it will
 *        change it and subsequent readers will get broken packet.
 *                                                        --ANK (980803)
 */

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
        if (pt->type == htons(ETH_P_ALL))
                return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
        else
                return pt->dev ? &pt->dev->ptype_specific :
                                 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}

/**
 *        dev_add_pack - add packet handler
 *        @pt: packet type declaration
 *
 *        Add a protocol handler to the networking stack. The passed &packet_type
 *        is linked into kernel lists and may not be freed until it has been
 *        removed from the kernel lists.
 *
 *        This call does not sleep therefore it can not
 *        guarantee all CPU's that are in middle of receiving packets
 *        will see the new packet type (until the next received packet).
 */

void dev_add_pack(struct packet_type *pt)
{
        struct list_head *head = ptype_head(pt);

        spin_lock(&ptype_lock);
        list_add_rcu(&pt->list, head);
        spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);

/**
 *        __dev_remove_pack         - remove packet handler
 *        @pt: packet type declaration
 *
 *        Remove a protocol handler that was previously added to the kernel
 *        protocol handlers by dev_add_pack(). The passed &packet_type is removed
 *        from the kernel lists and can be freed or reused once this function
 *        returns.
 *
 *      The packet type might still be in use by receivers
 *        and must not be freed until after all the CPU's have gone
 *        through a quiescent state.
 */
void __dev_remove_pack(struct packet_type *pt)
{
        struct list_head *head = ptype_head(pt);
        struct packet_type *pt1;

        spin_lock(&ptype_lock);

        list_for_each_entry(pt1, head, list) {
                if (pt == pt1) {
                        list_del_rcu(&pt->list);
                        goto out;
                }
        }

        pr_warn("dev_remove_pack: %p not found\n", pt);
out:
        spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(__dev_remove_pack);

/**
 *        dev_remove_pack         - remove packet handler
 *        @pt: packet type declaration
 *
 *        Remove a protocol handler that was previously added to the kernel
 *        protocol handlers by dev_add_pack(). The passed &packet_type is removed
 *        from the kernel lists and can be freed or reused once this function
 *        returns.
 *
 *        This call sleeps to guarantee that no CPU is looking at the packet
 *        type after return.
 */
void dev_remove_pack(struct packet_type *pt)
{
        __dev_remove_pack(pt);

        synchronize_net();
}
EXPORT_SYMBOL(dev_remove_pack);


/*******************************************************************************
 *
 *                            Device Interface Subroutines
 *
 *******************************************************************************/

/**
 *        dev_get_iflink        - get 'iflink' value of a interface
 *        @dev: targeted interface
 *
 *        Indicates the ifindex the interface is linked to.
 *        Physical interfaces have the same 'ifindex' and 'iflink' values.
 */

int dev_get_iflink(const struct net_device *dev)
{
        if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
                return dev->netdev_ops->ndo_get_iflink(dev);

        return READ_ONCE(dev->ifindex);
}
EXPORT_SYMBOL(dev_get_iflink);

/**
 *        dev_fill_metadata_dst - Retrieve tunnel egress information.
 *        @dev: targeted interface
 *        @skb: The packet.
 *
 *        For better visibility of tunnel traffic OVS needs to retrieve
 *        egress tunnel information for a packet. Following API allows
 *        user to get this info.
 */
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
        struct ip_tunnel_info *info;

        if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
                return -EINVAL;

        info = skb_tunnel_info_unclone(skb);
        if (!info)
                return -ENOMEM;
        if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
                return -EINVAL;

        return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
}
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);

static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
{
        int k = stack->num_paths++;

        if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
                return NULL;

        return &stack->path[k];
}

int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
                          struct net_device_path_stack *stack)
{
        const struct net_device *last_dev;
        struct net_device_path_ctx ctx = {
                .dev        = dev,
        };
        struct net_device_path *path;
        int ret = 0;

        memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
        stack->num_paths = 0;
        while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
                last_dev = ctx.dev;
                path = dev_fwd_path(stack);
                if (!path)
                        return -1;

                memset(path, 0, sizeof(struct net_device_path));
                ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
                if (ret < 0)
                        return -1;

                if (WARN_ON_ONCE(last_dev == ctx.dev))
                        return -1;
        }

        if (!ctx.dev)
                return ret;

        path = dev_fwd_path(stack);
        if (!path)
                return -1;
        path->type = DEV_PATH_ETHERNET;
        path->dev = ctx.dev;

        return ret;
}
EXPORT_SYMBOL_GPL(dev_fill_forward_path);

/**
 *        __dev_get_by_name        - find a device by its name
 *        @net: the applicable net namespace
 *        @name: name to find
 *
 *        Find an interface by name. Must be called under RTNL semaphore.
 *        If the name is found a pointer to the device is returned.
 *        If the name is not found then %NULL is returned. The
 *        reference counters are not incremented so the caller must be
 *        careful with locks.
 */

struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
        struct netdev_name_node *node_name;

        node_name = netdev_name_node_lookup(net, name);
        return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(__dev_get_by_name);

/**
 * dev_get_by_name_rcu        - find a device by its name
 * @net: the applicable net namespace
 * @name: name to find
 *
 * Find an interface by name.
 * If the name is found a pointer to the device is returned.
 * If the name is not found then %NULL is returned.
 * The reference counters are not incremented so the caller must be
 * careful with locks. The caller must hold RCU lock.
 */

struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
{
        struct netdev_name_node *node_name;

        node_name = netdev_name_node_lookup_rcu(net, name);
        return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(dev_get_by_name_rcu);

/* Deprecated for new users, call netdev_get_by_name() instead */
struct net_device *dev_get_by_name(struct net *net, const char *name)
{
        struct net_device *dev;

        rcu_read_lock();
        dev = dev_get_by_name_rcu(net, name);
        dev_hold(dev);
        rcu_read_unlock();
        return dev;
}
EXPORT_SYMBOL(dev_get_by_name);

/**
 *        netdev_get_by_name() - find a device by its name
 *        @net: the applicable net namespace
 *        @name: name to find
 *        @tracker: tracking object for the acquired reference
 *        @gfp: allocation flags for the tracker
 *
 *        Find an interface by name. This can be called from any
 *        context and does its own locking. The returned handle has
 *        the usage count incremented and the caller must use netdev_put() to
 *        release it when it is no longer needed. %NULL is returned if no
 *        matching device is found.
 */
struct net_device *netdev_get_by_name(struct net *net, const char *name,
                                      netdevice_tracker *tracker, gfp_t gfp)
{
        struct net_device *dev;

        dev = dev_get_by_name(net, name);
        if (dev)
                netdev_tracker_alloc(dev, tracker, gfp);
        return dev;
}
EXPORT_SYMBOL(netdev_get_by_name);

/**
 *        __dev_get_by_index - find a device by its ifindex
 *        @net: the applicable net namespace
 *        @ifindex: index of device
 *
 *        Search for an interface by index. Returns %NULL if the device
 *        is not found or a pointer to the device. The device has not
 *        had its reference counter increased so the caller must be careful
 *        about locking. The caller must hold the RTNL semaphore.
 */

struct net_device *__dev_get_by_index(struct net *net, int ifindex)
{
        struct net_device *dev;
        struct hlist_head *head = dev_index_hash(net, ifindex);

        hlist_for_each_entry(dev, head, index_hlist)
                if (dev->ifindex == ifindex)
                        return dev;

        return NULL;
}
EXPORT_SYMBOL(__dev_get_by_index);

/**
 *        dev_get_by_index_rcu - find a device by its ifindex
 *        @net: the applicable net namespace
 *        @ifindex: index of device
 *
 *        Search for an interface by index. Returns %NULL if the device
 *        is not found or a pointer to the device. The device has not
 *        had its reference counter increased so the caller must be careful
 *        about locking. The caller must hold RCU lock.
 */

struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
{
        struct net_device *dev;
        struct hlist_head *head = dev_index_hash(net, ifindex);

        hlist_for_each_entry_rcu(dev, head, index_hlist)
                if (dev->ifindex == ifindex)
                        return dev;

        return NULL;
}
EXPORT_SYMBOL(dev_get_by_index_rcu);

/* Deprecated for new users, call netdev_get_by_index() instead */
struct net_device *dev_get_by_index(struct net *net, int ifindex)
{
        struct net_device *dev;

        rcu_read_lock();
        dev = dev_get_by_index_rcu(net, ifindex);
        dev_hold(dev);
        rcu_read_unlock();
        return dev;
}
EXPORT_SYMBOL(dev_get_by_index);

/**
 *        netdev_get_by_index() - find a device by its ifindex
 *        @net: the applicable net namespace
 *        @ifindex: index of device
 *        @tracker: tracking object for the acquired reference
 *        @gfp: allocation flags for the tracker
 *
 *        Search for an interface by index. Returns NULL if the device
 *        is not found or a pointer to the device. The device returned has
 *        had a reference added and the pointer is safe until the user calls
 *        netdev_put() to indicate they have finished with it.
 */
struct net_device *netdev_get_by_index(struct net *net, int ifindex,
                                       netdevice_tracker *tracker, gfp_t gfp)
{
        struct net_device *dev;

        dev = dev_get_by_index(net, ifindex);
        if (dev)
                netdev_tracker_alloc(dev, tracker, gfp);
        return dev;
}
EXPORT_SYMBOL(netdev_get_by_index);

/**
 *        dev_get_by_napi_id - find a device by napi_id
 *        @napi_id: ID of the NAPI struct
 *
 *        Search for an interface by NAPI ID. Returns %NULL if the device
 *        is not found or a pointer to the device. The device has not had
 *        its reference counter increased so the caller must be careful
 *        about locking. The caller must hold RCU lock.
 */

struct net_device *dev_get_by_napi_id(unsigned int napi_id)
{
        struct napi_struct *napi;

        WARN_ON_ONCE(!rcu_read_lock_held());

        if (napi_id < MIN_NAPI_ID)
                return NULL;

        napi = napi_by_id(napi_id);

        return napi ? napi->dev : NULL;
}
EXPORT_SYMBOL(dev_get_by_napi_id);

/**
 *        netdev_get_name - get a netdevice name, knowing its ifindex.
 *        @net: network namespace
 *        @name: a pointer to the buffer where the name will be stored.
 *        @ifindex: the ifindex of the interface to get the name from.
 */
int netdev_get_name(struct net *net, char *name, int ifindex)
{
        struct net_device *dev;
        int ret;

        down_read(&devnet_rename_sem);
        rcu_read_lock();

        dev = dev_get_by_index_rcu(net, ifindex);
        if (!dev) {
                ret = -ENODEV;
                goto out;
        }

        strcpy(name, dev->name);

        ret = 0;
out:
        rcu_read_unlock();
        up_read(&devnet_rename_sem);
        return ret;
}

/**
 *        dev_getbyhwaddr_rcu - find a device by its hardware address
 *        @net: the applicable net namespace
 *        @type: media type of device
 *        @ha: hardware address
 *
 *        Search for an interface by MAC address. Returns NULL if the device
 *        is not found or a pointer to the device.
 *        The caller must hold RCU or RTNL.
 *        The returned device has not had its ref count increased
 *        and the caller must therefore be careful about locking
 *
 */

struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                       const char *ha)
{
        struct net_device *dev;

        for_each_netdev_rcu(net, dev)
                if (dev->type == type &&
                    !memcmp(dev->dev_addr, ha, dev->addr_len))
                        return dev;

        return NULL;
}
EXPORT_SYMBOL(dev_getbyhwaddr_rcu);

struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
        struct net_device *dev, *ret = NULL;

        rcu_read_lock();
        for_each_netdev_rcu(net, dev)
                if (dev->type == type) {
                        dev_hold(dev);
                        ret = dev;
                        break;
                }
        rcu_read_unlock();
        return ret;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);

/**
 *        __dev_get_by_flags - find any device with given flags
 *        @net: the applicable net namespace
 *        @if_flags: IFF_* values
 *        @mask: bitmask of bits in if_flags to check
 *
 *        Search for any interface with the given flags. Returns NULL if a device
 *        is not found or a pointer to the device. Must be called inside
 *        rtnl_lock(), and result refcount is unchanged.
 */

struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
                                      unsigned short mask)
{
        struct net_device *dev, *ret;

        ASSERT_RTNL();

        ret = NULL;
        for_each_netdev(net, dev) {
                if (((dev->flags ^ if_flags) & mask) == 0) {
                        ret = dev;
                        break;
                }
        }
        return ret;
}
EXPORT_SYMBOL(__dev_get_by_flags);

/**
 *        dev_valid_name - check if name is okay for network device
 *        @name: name string
 *
 *        Network device names need to be valid file names to
 *        allow sysfs to work.  We also disallow any kind of
 *        whitespace.
 */
bool dev_valid_name(const char *name)
{
        if (*name == '\0')
                return false;
        if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
                return false;
        if (!strcmp(name, ".") || !strcmp(name, ".."))
                return false;

        while (*name) {
                if (*name == '/' || *name == ':' || isspace(*name))
                        return false;
                name++;
        }
        return true;
}
EXPORT_SYMBOL(dev_valid_name);

/**
 *        __dev_alloc_name - allocate a name for a device
 *        @net: network namespace to allocate the device name in
 *        @name: name format string
 *        @res: result name string
 *
 *        Passed a format string - eg "lt%d" it will try and find a suitable
 *        id. It scans list of devices to build up a free map, then chooses
 *        the first empty slot. The caller must hold the dev_base or rtnl lock
 *        while allocating the name and adding the device in order to avoid
 *        duplicates.
 *        Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 *        Returns the number of the unit assigned or a negative errno code.
 */

static int __dev_alloc_name(struct net *net, const char *name, char *res)
{
        int i = 0;
        const char *p;
        const int max_netdevices = 8*PAGE_SIZE;
        unsigned long *inuse;
        struct net_device *d;
        char buf[IFNAMSIZ];

        /* Verify the string as this thing may have come from the user.
         * There must be one "%d" and no other "%" characters.
         */
        p = strchr(name, '%');
        if (!p || p[1] != 'd' || strchr(p + 2, '%'))
                return -EINVAL;

        /* Use one page as a bit array of possible slots */
        inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
        if (!inuse)
                return -ENOMEM;

        for_each_netdev(net, d) {
                struct netdev_name_node *name_node;

                netdev_for_each_altname(d, name_node) {
                        if (!sscanf(name_node->name, name, &i))
                                continue;
                        if (i < 0 || i >= max_netdevices)
                                continue;

                        /* avoid cases where sscanf is not exact inverse of printf */
                        snprintf(buf, IFNAMSIZ, name, i);
                        if (!strncmp(buf, name_node->name, IFNAMSIZ))
                                __set_bit(i, inuse);
                }
                if (!sscanf(d->name, name, &i))
                        continue;
                if (i < 0 || i >= max_netdevices)
                        continue;

                /* avoid cases where sscanf is not exact inverse of printf */
                snprintf(buf, IFNAMSIZ, name, i);
                if (!strncmp(buf, d->name, IFNAMSIZ))
                        __set_bit(i, inuse);
        }

        i = find_first_zero_bit(inuse, max_netdevices);
        bitmap_free(inuse);
        if (i == max_netdevices)
                return -ENFILE;

        /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
        strscpy(buf, name, IFNAMSIZ);
        snprintf(res, IFNAMSIZ, buf, i);
        return i;
}

/* Returns negative errno or allocated unit id (see __dev_alloc_name()) */
static int dev_prep_valid_name(struct net *net, struct net_device *dev,
                               const char *want_name, char *out_name,
                               int dup_errno)
{
        if (!dev_valid_name(want_name))
                return -EINVAL;

        if (strchr(want_name, '%'))
                return __dev_alloc_name(net, want_name, out_name);

        if (netdev_name_in_use(net, want_name))
                return -dup_errno;
        if (out_name != want_name)
                strscpy(out_name, want_name, IFNAMSIZ);
        return 0;
}

/**
 *        dev_alloc_name - allocate a name for a device
 *        @dev: device
 *        @name: name format string
 *
 *        Passed a format string - eg "lt%d" it will try and find a suitable
 *        id. It scans list of devices to build up a free map, then chooses
 *        the first empty slot. The caller must hold the dev_base or rtnl lock
 *        while allocating the name and adding the device in order to avoid
 *        duplicates.
 *        Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 *        Returns the number of the unit assigned or a negative errno code.
 */

int dev_alloc_name(struct net_device *dev, const char *name)
{
        return dev_prep_valid_name(dev_net(dev), dev, name, dev->name, ENFILE);
}
EXPORT_SYMBOL(dev_alloc_name);

static int dev_get_valid_name(struct net *net, struct net_device *dev,
                              const char *name)
{
        int ret;

        ret = dev_prep_valid_name(net, dev, name, dev->name, EEXIST);
        return ret < 0 ? ret : 0;
}

/**
 *        dev_change_name - change name of a device
 *        @dev: device
 *        @newname: name (or format string) must be at least IFNAMSIZ
 *
 *        Change name of a device, can pass format strings "eth%d".
 *        for wildcarding.
 */
int dev_change_name(struct net_device *dev, const char *newname)
{
        unsigned char old_assign_type;
        char oldname[IFNAMSIZ];
        int err = 0;
        int ret;
        struct net *net;

        ASSERT_RTNL();
        BUG_ON(!dev_net(dev));

        net = dev_net(dev);

        down_write(&devnet_rename_sem);

        if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
                up_write(&devnet_rename_sem);
                return 0;
        }

        memcpy(oldname, dev->name, IFNAMSIZ);

        err = dev_get_valid_name(net, dev, newname);
        if (err < 0) {
                up_write(&devnet_rename_sem);
                return err;
        }

        if (oldname[0] && !strchr(oldname, '%'))
                netdev_info(dev, "renamed from %s%s\n", oldname,
                            dev->flags & IFF_UP ? " (while UP)" : "");

        old_assign_type = dev->name_assign_type;
        WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED);

rollback:
        ret = device_rename(&dev->dev, dev->name);
        if (ret) {
                memcpy(dev->name, oldname, IFNAMSIZ);
                WRITE_ONCE(dev->name_assign_type, old_assign_type);
                up_write(&devnet_rename_sem);
                return ret;
        }

        up_write(&devnet_rename_sem);

        netdev_adjacent_rename_links(dev, oldname);

        netdev_name_node_del(dev->name_node);

        synchronize_net();

        netdev_name_node_add(net, dev->name_node);

        ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
        ret = notifier_to_errno(ret);

        if (ret) {
                /* err >= 0 after dev_alloc_name() or stores the first errno */
                if (err >= 0) {
                        err = ret;
                        down_write(&devnet_rename_sem);
                        memcpy(dev->name, oldname, IFNAMSIZ);
                        memcpy(oldname, newname, IFNAMSIZ);
                        WRITE_ONCE(dev->name_assign_type, old_assign_type);
                        old_assign_type = NET_NAME_RENAMED;
                        goto rollback;
                } else {
                        netdev_err(dev, "name change rollback failed: %d\n",
                                   ret);
                }
        }

        return err;
}

/**
 *        dev_set_alias - change ifalias of a device
 *        @dev: device
 *        @alias: name up to IFALIASZ
 *        @len: limit of bytes to copy from info
 *
 *        Set ifalias for a device,
 */
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
        struct dev_ifalias *new_alias = NULL;

        if (len >= IFALIASZ)
                return -EINVAL;

        if (len) {
                new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
                if (!new_alias)
                        return -ENOMEM;

                memcpy(new_alias->ifalias, alias, len);
                new_alias->ifalias[len] = 0;
        }

        mutex_lock(&ifalias_mutex);
        new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
                                        mutex_is_locked(&ifalias_mutex));
        mutex_unlock(&ifalias_mutex);

        if (new_alias)
                kfree_rcu(new_alias, rcuhead);

        return len;
}
EXPORT_SYMBOL(dev_set_alias);

/**
 *        dev_get_alias - get ifalias of a device
 *        @dev: device
 *        @name: buffer to store name of ifalias
 *        @len: size of buffer
 *
 *        get ifalias for a device.  Caller must make sure dev cannot go
 *        away,  e.g. rcu read lock or own a reference count to device.
 */
int dev_get_alias(const struct net_device *dev, char *name, size_t len)
{
        const struct dev_ifalias *alias;
        int ret = 0;

        rcu_read_lock();
        alias = rcu_dereference(dev->ifalias);
        if (alias)
                ret = snprintf(name, len, "%s", alias->ifalias);
        rcu_read_unlock();

        return ret;
}

/**
 *        netdev_features_change - device changes features
 *        @dev: device to cause notification
 *
 *        Called to indicate a device has changed features.
 */
void netdev_features_change(struct net_device *dev)
{
        call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
}
EXPORT_SYMBOL(netdev_features_change);

/**
 *        netdev_state_change - device changes state
 *        @dev: device to cause notification
 *
 *        Called to indicate a device has changed state. This function calls
 *        the notifier chains for netdev_chain and sends a NEWLINK message
 *        to the routing socket.
 */
void netdev_state_change(struct net_device *dev)
{
        if (dev->flags & IFF_UP) {
                struct netdev_notifier_change_info change_info = {
                        .info.dev = dev,
                };

                call_netdevice_notifiers_info(NETDEV_CHANGE,
                                              &change_info.info);
                rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
        }
}
EXPORT_SYMBOL(netdev_state_change);

/**
 * __netdev_notify_peers - notify network peers about existence of @dev,
 * to be called when rtnl lock is already held.
 * @dev: network device
 *
 * Generate traffic such that interested network peers are aware of
 * @dev, such as by generating a gratuitous ARP. This may be used when
 * a device wants to inform the rest of the network about some sort of
 * reconfiguration such as a failover event or virtual machine
 * migration.
 */
void __netdev_notify_peers(struct net_device *dev)
{
        ASSERT_RTNL();
        call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
        call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
}
EXPORT_SYMBOL(__netdev_notify_peers);

/**
 * netdev_notify_peers - notify network peers about existence of @dev
 * @dev: network device
 *
 * Generate traffic such that interested network peers are aware of
 * @dev, such as by generating a gratuitous ARP. This may be used when
 * a device wants to inform the rest of the network about some sort of
 * reconfiguration such as a failover event or virtual machine
 * migration.
 */
void netdev_notify_peers(struct net_device *dev)
{
        rtnl_lock();
        __netdev_notify_peers(dev);
        rtnl_unlock();
}
EXPORT_SYMBOL(netdev_notify_peers);

static int napi_threaded_poll(void *data);

static int napi_kthread_create(struct napi_struct *n)
{
        int err = 0;

        /* Create and wake up the kthread once to put it in
         * TASK_INTERRUPTIBLE mode to avoid the blocked task
         * warning and work with loadavg.
         */
        n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
                                n->dev->name, n->napi_id);
        if (IS_ERR(n->thread)) {
                err = PTR_ERR(n->thread);
                pr_err("kthread_run failed with err %d\n", err);
                n->thread = NULL;
        }

        return err;
}

static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        int ret;

        ASSERT_RTNL();
        dev_addr_check(dev);

        if (!netif_device_present(dev)) {
                /* may be detached because parent is runtime-suspended */
                if (dev->dev.parent)
                        pm_runtime_resume(dev->dev.parent);
                if (!netif_device_present(dev))
                        return -ENODEV;
        }

        /* Block netpoll from trying to do any rx path servicing.
         * If we don't do this there is a chance ndo_poll_controller
         * or ndo_poll may be running while we open the device
         */
        netpoll_poll_disable(dev);

        ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
        ret = notifier_to_errno(ret);
        if (ret)
                return ret;

        set_bit(__LINK_STATE_START, &dev->state);

        if (ops->ndo_validate_addr)
                ret = ops->ndo_validate_addr(dev);

        if (!ret && ops->ndo_open)
                ret = ops->ndo_open(dev);

        netpoll_poll_enable(dev);

        if (ret)
                clear_bit(__LINK_STATE_START, &dev->state);
        else {
                dev->flags |= IFF_UP;
                dev_set_rx_mode(dev);
                dev_activate(dev);
                add_device_randomness(dev->dev_addr, dev->addr_len);
        }

        return ret;
}

/**
 *        dev_open        - prepare an interface for use.
 *        @dev: device to open
 *        @extack: netlink extended ack
 *
 *        Takes a device from down to up state. The device's private open
 *        function is invoked and then the multicast lists are loaded. Finally
 *        the device is moved into the up state and a %NETDEV_UP message is
 *        sent to the netdev notifier chain.
 *
 *        Calling this function on an active interface is a nop. On a failure
 *        a negative errno code is returned.
 */
int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
        int ret;

        if (dev->flags & IFF_UP)
                return 0;

        ret = __dev_open(dev, extack);
        if (ret < 0)
                return ret;

        rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL);
        call_netdevice_notifiers(NETDEV_UP, dev);

        return ret;
}
EXPORT_SYMBOL(dev_open);

static void __dev_close_many(struct list_head *head)
{
        struct net_device *dev;

        ASSERT_RTNL();
        might_sleep();

        list_for_each_entry(dev, head, close_list) {
                /* Temporarily disable netpoll until the interface is down */
                netpoll_poll_disable(dev);

                call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);

                clear_bit(__LINK_STATE_START, &dev->state);

                /* Synchronize to scheduled poll. We cannot touch poll list, it
                 * can be even on different cpu. So just clear netif_running().
                 *
                 * dev->stop() will invoke napi_disable() on all of it's
                 * napi_struct instances on this device.
                 */
                smp_mb__after_atomic(); /* Commit netif_running(). */
        }

        dev_deactivate_many(head);

        list_for_each_entry(dev, head, close_list) {
                const struct net_device_ops *ops = dev->netdev_ops;

                /*
                 *        Call the device specific close. This cannot fail.
                 *        Only if device is UP
                 *
                 *        We allow it to be called even after a DETACH hot-plug
                 *        event.
                 */
                if (ops->ndo_stop)
                        ops->ndo_stop(dev);

                dev->flags &= ~IFF_UP;
                netpoll_poll_enable(dev);
        }
}

static void __dev_close(struct net_device *dev)
{
        LIST_HEAD(single);

        list_add(&dev->close_list, &single);
        __dev_close_many(&single);
        list_del(&single);
}

void dev_close_many(struct list_head *head, bool unlink)
{
        struct net_device *dev, *tmp;

        /* Remove the devices that don't need to be closed */
        list_for_each_entry_safe(dev, tmp, head, close_list)
                if (!(dev->flags & IFF_UP))
                        list_del_init(&dev->close_list);

        __dev_close_many(head);

        list_for_each_entry_safe(dev, tmp, head, close_list) {
                rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, NULL);
                call_netdevice_notifiers(NETDEV_DOWN, dev);
                if (unlink)
                        list_del_init(&dev->close_list);
        }
}
EXPORT_SYMBOL(dev_close_many);

/**
 *        dev_close - shutdown an interface.
 *        @dev: device to shutdown
 *
 *        This function moves an active device into down state. A
 *        %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
 *        is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
 *        chain.
 */
void dev_close(struct net_device *dev)
{
        if (dev->flags & IFF_UP) {
                LIST_HEAD(single);

                list_add(&dev->close_list, &single);
                dev_close_many(&single, true);
                list_del(&single);
        }
}
EXPORT_SYMBOL(dev_close);


/**
 *        dev_disable_lro - disable Large Receive Offload on a device
 *        @dev: device
 *
 *        Disable Large Receive Offload (LRO) on a net device.  Must be
 *        called under RTNL.  This is needed if received packets may be
 *        forwarded to another interface.
 */
void dev_disable_lro(struct net_device *dev)
{
        struct net_device *lower_dev;
        struct list_head *iter;

        dev->wanted_features &= ~NETIF_F_LRO;
        netdev_update_features(dev);

        if (unlikely(dev->features & NETIF_F_LRO))
                netdev_WARN(dev, "failed to disable LRO!\n");

        netdev_for_each_lower_dev(dev, lower_dev, iter)
                dev_disable_lro(lower_dev);
}
EXPORT_SYMBOL(dev_disable_lro);

/**
 *        dev_disable_gro_hw - disable HW Generic Receive Offload on a device
 *        @dev: device
 *
 *        Disable HW Generic Receive Offload (GRO_HW) on a net device.  Must be
 *        called under RTNL.  This is needed if Generic XDP is installed on
 *        the device.
 */
static void dev_disable_gro_hw(struct net_device *dev)
{
        dev->wanted_features &= ~NETIF_F_GRO_HW;
        netdev_update_features(dev);

        if (unlikely(dev->features & NETIF_F_GRO_HW))
                netdev_WARN(dev, "failed to disable GRO_HW!\n");
}

const char *netdev_cmd_to_name(enum netdev_cmd cmd)
{
#define N(val)                                                 \
        case NETDEV_##val:                                \
                return "NETDEV_" __stringify(val);
        switch (cmd) {
        N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
        N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
        N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
        N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
        N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
        N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
        N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
        N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
        N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
        N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
        N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
        N(XDP_FEAT_CHANGE)
        }
#undef N
        return "UNKNOWN_NETDEV_EVENT";
}
EXPORT_SYMBOL_GPL(netdev_cmd_to_name);

static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
                                   struct net_device *dev)
{
        struct netdev_notifier_info info = {
                .dev = dev,
        };

        return nb->notifier_call(nb, val, &info);
}

static int call_netdevice_register_notifiers(struct notifier_block *nb,
                                             struct net_device *dev)
{
        int err;

        err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
        err = notifier_to_errno(err);
        if (err)
                return err;

        if (!(dev->flags & IFF_UP))
                return 0;

        call_netdevice_notifier(nb, NETDEV_UP, dev);
        return 0;
}

static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
                                                struct net_device *dev)
{
        if (dev->flags & IFF_UP) {
                call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
                                        dev);
                call_netdevice_notifier(nb, NETDEV_DOWN, dev);
        }
        call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}

static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
                                                 struct net *net)
{
        struct net_device *dev;
        int err;

        for_each_netdev(net, dev) {
                err = call_netdevice_register_notifiers(nb, dev);
                if (err)
                        goto rollback;
        }
        return 0;

rollback:
        for_each_netdev_continue_reverse(net, dev)
                call_netdevice_unregister_notifiers(nb, dev);
        return err;
}

static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
                                                    struct net *net)
{
        struct net_device *dev;

        for_each_netdev(net, dev)
                call_netdevice_unregister_notifiers(nb, dev);
}

static int dev_boot_phase = 1;

/**
 * register_netdevice_notifier - register a network notifier block
 * @nb: notifier
 *
 * Register a notifier to be called when network device events occur.
 * The notifier passed is linked into the kernel structures and must
 * not be reused until it has been unregistered. A negative errno code
 * is returned on a failure.
 *
 * When registered all registration and up events are replayed
 * to the new notifier to allow device to have a race free
 * view of the network device list.
 */

int register_netdevice_notifier(struct notifier_block *nb)
{
        struct net *net;
        int err;

        /* Close race with setup_net() and cleanup_net() */
        down_write(&pernet_ops_rwsem);
        rtnl_lock();
        err = raw_notifier_chain_register(&netdev_chain, nb);
        if (err)
                goto unlock;
        if (dev_boot_phase)
                goto unlock;
        for_each_net(net) {
                err = call_netdevice_register_net_notifiers(nb, net);
                if (err)
                        goto rollback;
        }

unlock:
        rtnl_unlock();
        up_write(&pernet_ops_rwsem);
        return err;

rollback:
        for_each_net_continue_reverse(net)
                call_netdevice_unregister_net_notifiers(nb, net);

        raw_notifier_chain_unregister(&netdev_chain, nb);
        goto unlock;
}
EXPORT_SYMBOL(register_netdevice_notifier);

/**
 * unregister_netdevice_notifier - unregister a network notifier block
 * @nb: notifier
 *
 * Unregister a notifier previously registered by
 * register_netdevice_notifier(). The notifier is unlinked into the
 * kernel structures and may then be reused. A negative errno code
 * is returned on a failure.
 *
 * After unregistering unregister and down device events are synthesized
 * for all devices on the device list to the removed notifier to remove
 * the need for special case cleanup code.
 */

int unregister_netdevice_notifier(struct notifier_block *nb)
{
        struct net *net;
        int err;

        /* Close race with setup_net() and cleanup_net() */
        down_write(&pernet_ops_rwsem);
        rtnl_lock();
        err = raw_notifier_chain_unregister(&netdev_chain, nb);
        if (err)
                goto unlock;

        for_each_net(net)
                call_netdevice_unregister_net_notifiers(nb, net);

unlock:
        rtnl_unlock();
        up_write(&pernet_ops_rwsem);
        return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier);

static int __register_netdevice_notifier_net(struct net *net,
                                             struct notifier_block *nb,
                                             bool ignore_call_fail)
{
        int err;

        err = raw_notifier_chain_register(&net->netdev_chain, nb);
        if (err)
                return err;
        if (dev_boot_phase)
                return 0;

        err = call_netdevice_register_net_notifiers(nb, net);
        if (err && !ignore_call_fail)
                goto chain_unregister;

        return 0;

chain_unregister:
        raw_notifier_chain_unregister(&net->netdev_chain, nb);
        return err;
}

static int __unregister_netdevice_notifier_net(struct net *net,
                                               struct notifier_block *nb)
{
        int err;

        err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
        if (err)
                return err;

        call_netdevice_unregister_net_notifiers(nb, net);
        return 0;
}

/**
 * register_netdevice_notifier_net - register a per-netns network notifier block
 * @net: network namespace
 * @nb: notifier
 *
 * Register a notifier to be called when network device events occur.
 * The notifier passed is linked into the kernel structures and must
 * not be reused until it has been unregistered. A negative errno code
 * is returned on a failure.
 *
 * When registered all registration and up events are replayed
 * to the new notifier to allow device to have a race free
 * view of the network device list.
 */

int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
{
        int err;

        rtnl_lock();
        err = __register_netdevice_notifier_net(net, nb, false);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(register_netdevice_notifier_net);

/**
 * unregister_netdevice_notifier_net - unregister a per-netns
 *                                     network notifier block
 * @net: network namespace
 * @nb: notifier
 *
 * Unregister a notifier previously registered by
 * register_netdevice_notifier_net(). The notifier is unlinked from the
 * kernel structures and may then be reused. A negative errno code
 * is returned on a failure.
 *
 * After unregistering unregister and down device events are synthesized
 * for all devices on the device list to the removed notifier to remove
 * the need for special case cleanup code.
 */

int unregister_netdevice_notifier_net(struct net *net,
                                      struct notifier_block *nb)
{
        int err;

        rtnl_lock();
        err = __unregister_netdevice_notifier_net(net, nb);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier_net);

static void __move_netdevice_notifier_net(struct net *src_net,
                                          struct net *dst_net,
                                          struct notifier_block *nb)
{
        __unregister_netdevice_notifier_net(src_net, nb);
        __register_netdevice_notifier_net(dst_net, nb, true);
}

int register_netdevice_notifier_dev_net(struct net_device *dev,
                                        struct notifier_block *nb,
                                        struct netdev_net_notifier *nn)
{
        int err;

        rtnl_lock();
        err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
        if (!err) {
                nn->nb = nb;
                list_add(&nn->list, &dev->net_notifier_list);
        }
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(register_netdevice_notifier_dev_net);

int unregister_netdevice_notifier_dev_net(struct net_device *dev,
                                          struct notifier_block *nb,
                                          struct netdev_net_notifier *nn)
{
        int err;

        rtnl_lock();
        list_del(&nn->list);
        err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);

static void move_netdevice_notifiers_dev_net(struct net_device *dev,
                                             struct net *net)
{
        struct netdev_net_notifier *nn;

        list_for_each_entry(nn, &dev->net_notifier_list, list)
                __move_netdevice_notifier_net(dev_net(dev), net, nn->nb);
}

/**
 *        call_netdevice_notifiers_info - call all network notifier blocks
 *        @val: value passed unmodified to notifier function
 *        @info: notifier information data
 *
 *        Call all network notifier blocks.  Parameters and return value
 *        are as for raw_notifier_call_chain().
 */

int call_netdevice_notifiers_info(unsigned long val,
                                  struct netdev_notifier_info *info)
{
        struct net *net = dev_net(info->dev);
        int ret;

        ASSERT_RTNL();

        /* Run per-netns notifier block chain first, then run the global one.
         * Hopefully, one day, the global one is going to be removed after
         * all notifier block registrators get converted to be per-netns.
         */
        ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
        if (ret & NOTIFY_STOP_MASK)
                return ret;
        return raw_notifier_call_chain(&netdev_chain, val, info);
}

/**
 *        call_netdevice_notifiers_info_robust - call per-netns notifier blocks
 *                                               for and rollback on error
 *        @val_up: value passed unmodified to notifier function
 *        @val_down: value passed unmodified to the notifier function when
 *                   recovering from an error on @val_up
 *        @info: notifier information data
 *
 *        Call all per-netns network notifier blocks, but not notifier blocks on
 *        the global notifier chain. Parameters and return value are as for
 *        raw_notifier_call_chain_robust().
 */

static int
call_netdevice_notifiers_info_robust(unsigned long val_up,
                                     unsigned long val_down,
                                     struct netdev_notifier_info *info)
{
        struct net *net = dev_net(info->dev);

        ASSERT_RTNL();

        return raw_notifier_call_chain_robust(&net->netdev_chain,
                                              val_up, val_down, info);
}

static int call_netdevice_notifiers_extack(unsigned long val,
                                           struct net_device *dev,
                                           struct netlink_ext_ack *extack)
{
        struct netdev_notifier_info info = {
                .dev = dev,
                .extack = extack,
        };

        return call_netdevice_notifiers_info(val, &info);
}

/**
 *        call_netdevice_notifiers - call all network notifier blocks
 *      @val: value passed unmodified to notifier function
 *      @dev: net_device pointer passed unmodified to notifier function
 *
 *        Call all network notifier blocks.  Parameters and return value
 *        are as for raw_notifier_call_chain().
 */

int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
        return call_netdevice_notifiers_extack(val, dev, NULL);
}
EXPORT_SYMBOL(call_netdevice_notifiers);

/**
 *        call_netdevice_notifiers_mtu - call all network notifier blocks
 *        @val: value passed unmodified to notifier function
 *        @dev: net_device pointer passed unmodified to notifier function
 *        @arg: additional u32 argument passed to the notifier function
 *
 *        Call all network notifier blocks.  Parameters and return value
 *        are as for raw_notifier_call_chain().
 */
static int call_netdevice_notifiers_mtu(unsigned long val,
                                        struct net_device *dev, u32 arg)
{
        struct netdev_notifier_info_ext info = {
                .info.dev = dev,
                .ext.mtu = arg,
        };

        BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);

        return call_netdevice_notifiers_info(val, &info.info);
}

#ifdef CONFIG_NET_INGRESS
static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);

void net_inc_ingress_queue(void)
{
        static_branch_inc(&ingress_needed_key);
}
EXPORT_SYMBOL_GPL(net_inc_ingress_queue);

void net_dec_ingress_queue(void)
{
        static_branch_dec(&ingress_needed_key);
}
EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
#endif

#ifdef CONFIG_NET_EGRESS
static DEFINE_STATIC_KEY_FALSE(egress_needed_key);

void net_inc_egress_queue(void)
{
        static_branch_inc(&egress_needed_key);
}
EXPORT_SYMBOL_GPL(net_inc_egress_queue);

void net_dec_egress_queue(void)
{
        static_branch_dec(&egress_needed_key);
}
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif

DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
{
        int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
        int wanted;

        wanted = atomic_add_return(deferred, &netstamp_wanted);
        if (wanted > 0)
                static_branch_enable(&netstamp_needed_key);
        else
                static_branch_disable(&netstamp_needed_key);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif

void net_enable_timestamp(void)
{
#ifdef CONFIG_JUMP_LABEL
        int wanted = atomic_read(&netstamp_wanted);

        while (wanted > 0) {
                if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted + 1))
                        return;
        }
        atomic_inc(&netstamp_needed_deferred);
        schedule_work(&netstamp_work);
#else
        static_branch_inc(&netstamp_needed_key);
#endif
}
EXPORT_SYMBOL(net_enable_timestamp);

void net_disable_timestamp(void)
{
#ifdef CONFIG_JUMP_LABEL
        int wanted = atomic_read(&netstamp_wanted);

        while (wanted > 1) {
                if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted - 1))
                        return;
        }
        atomic_dec(&netstamp_needed_deferred);
        schedule_work(&netstamp_work);
#else
        static_branch_dec(&netstamp_needed_key);
#endif
}
EXPORT_SYMBOL(net_disable_timestamp);

static inline void net_timestamp_set(struct sk_buff *skb)
{
        skb->tstamp = 0;
        skb->mono_delivery_time = 0;
        if (static_branch_unlikely(&netstamp_needed_key))
                skb->tstamp = ktime_get_real();
}

#define net_timestamp_check(COND, SKB)                                \
        if (static_branch_unlikely(&netstamp_needed_key)) {        \
                if ((COND) && !(SKB)->tstamp)                        \
                        (SKB)->tstamp = ktime_get_real();        \
        }                                                        \

bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
        return __is_skb_forwardable(dev, skb, true);
}
EXPORT_SYMBOL_GPL(is_skb_forwardable);

static int __dev_forward_skb2(struct net_device *dev, struct sk_buff *skb,
                              bool check_mtu)
{
        int ret = ____dev_forward_skb(dev, skb, check_mtu);

        if (likely(!ret)) {
                skb->protocol = eth_type_trans(skb, dev);
                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
        }

        return ret;
}

int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
        return __dev_forward_skb2(dev, skb, true);
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);

/**
 * dev_forward_skb - loopback an skb to another netif
 *
 * @dev: destination network device
 * @skb: buffer to forward
 *
 * return values:
 *        NET_RX_SUCCESS        (no congestion)
 *        NET_RX_DROP     (packet was dropped, but freed)
 *
 * dev_forward_skb can be used for injecting an skb from the
 * start_xmit function of one device into the receive queue
 * of another device.
 *
 * The receiving device may be in another namespace, so
 * we have to clear all information in the skb that could
 * impact namespace isolation.
 */
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
        return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);

int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb)
{
        return __dev_forward_skb2(dev, skb, false) ?: netif_rx_internal(skb);
}

static inline int deliver_skb(struct sk_buff *skb,
                              struct packet_type *pt_prev,
                              struct net_device *orig_dev)
{
        if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                return -ENOMEM;
        refcount_inc(&skb->users);
        return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}

static inline void deliver_ptype_list_skb(struct sk_buff *skb,
                                          struct packet_type **pt,
                                          struct net_device *orig_dev,
                                          __be16 type,
                                          struct list_head *ptype_list)
{
        struct packet_type *ptype, *pt_prev = *pt;

        list_for_each_entry_rcu(ptype, ptype_list, list) {
                if (ptype->type != type)
                        continue;
                if (pt_prev)
                        deliver_skb(skb, pt_prev, orig_dev);
                pt_prev = ptype;
        }
        *pt = pt_prev;
}

static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
        if (!ptype->af_packet_priv || !skb->sk)
                return false;

        if (ptype->id_match)
                return ptype->id_match(ptype, skb->sk);
        else if ((struct sock *)ptype->af_packet_priv == skb->sk)
                return true;

        return false;
}

/**
 * dev_nit_active - return true if any network interface taps are in use
 *
 * @dev: network device to check for the presence of taps
 */
bool dev_nit_active(struct net_device *dev)
{
        return !list_empty(&net_hotdata.ptype_all) ||
               !list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);

/*
 *        Support routine. Sends outgoing frames to any network
 *        taps currently in use.
 */

void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
        struct list_head *ptype_list = &net_hotdata.ptype_all;
        struct packet_type *ptype, *pt_prev = NULL;
        struct sk_buff *skb2 = NULL;

        rcu_read_lock();
again:
        list_for_each_entry_rcu(ptype, ptype_list, list) {
                if (READ_ONCE(ptype->ignore_outgoing))
                        continue;

                /* Never send packets back to the socket
                 * they originated from - MvS (miquels@drinkel.ow.org)
                 */
                if (skb_loop_sk(ptype, skb))
                        continue;

                if (pt_prev) {
                        deliver_skb(skb2, pt_prev, skb->dev);
                        pt_prev = ptype;
                        continue;
                }

                /* need to clone skb, done only once */
                skb2 = skb_clone(skb, GFP_ATOMIC);
                if (!skb2)
                        goto out_unlock;

                net_timestamp_set(skb2);

                /* skb->nh should be correctly
                 * set by sender, so that the second statement is
                 * just protection against buggy protocols.
                 */
                skb_reset_mac_header(skb2);

                if (skb_network_header(skb2) < skb2->data ||
                    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
                        net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
                                             ntohs(skb2->protocol),
                                             dev->name);
                        skb_reset_network_header(skb2);
                }

                skb2->transport_header = skb2->network_header;
                skb2->pkt_type = PACKET_OUTGOING;
                pt_prev = ptype;
        }

        if (ptype_list == &net_hotdata.ptype_all) {
                ptype_list = &dev->ptype_all;
                goto again;
        }
out_unlock:
        if (pt_prev) {
                if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
                        pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
                else
                        kfree_skb(skb2);
        }
        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);

/**
 * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
 * @dev: Network device
 * @txq: number of queues available
 *
 * If real_num_tx_queues is changed the tc mappings may no longer be
 * valid. To resolve this verify the tc mapping remains valid and if
 * not NULL the mapping. With no priorities mapping to this
 * offset/count pair it will no longer be used. In the worst case TC0
 * is invalid nothing can be done so disable priority mappings. If is
 * expected that drivers will fix this mapping if they can before
 * calling netif_set_real_num_tx_queues.
 */
static void netif_setup_tc(struct net_device *dev, unsigned int txq)
{
        int i;
        struct netdev_tc_txq *tc = &dev->tc_to_txq[0];

        /* If TC0 is invalidated disable TC mapping */
        if (tc->offset + tc->count > txq) {
                netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
                dev->num_tc = 0;
                return;
        }

        /* Invalidated prio to tc mappings set to TC0 */
        for (i = 1; i < TC_BITMASK + 1; i++) {
                int q = netdev_get_prio_tc_map(dev, i);

                tc = &dev->tc_to_txq[q];
                if (tc->offset + tc->count > txq) {
                        netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
                                    i, q);
                        netdev_set_prio_tc_map(dev, i, 0);
                }
        }
}

int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
{
        if (dev->num_tc) {
                struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
                int i;

                /* walk through the TCs and see if it falls into any of them */
                for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
                        if ((txq - tc->offset) < tc->count)
                                return i;
                }

                /* didn't find it, just return -1 to indicate no match */
                return -1;
        }

        return 0;
}
EXPORT_SYMBOL(netdev_txq_to_tc);

#ifdef CONFIG_XPS
static struct static_key xps_needed __read_mostly;
static struct static_key xps_rxqs_needed __read_mostly;
static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P)                \
        rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))

static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
                             struct xps_dev_maps *old_maps, int tci, u16 index)
{
        struct xps_map *map = NULL;
        int pos;

        map = xmap_dereference(dev_maps->attr_map[tci]);
        if (!map)
                return false;

        for (pos = map->len; pos--;) {
                if (map->queues[pos] != index)
                        continue;

                if (map->len > 1) {
                        map->queues[pos] = map->queues[--map->len];
                        break;
                }

                if (old_maps)
                        RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
                RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
                kfree_rcu(map, rcu);
                return false;
        }

        return true;
}

static bool remove_xps_queue_cpu(struct net_device *dev,
                                 struct xps_dev_maps *dev_maps,
                                 int cpu, u16 offset, u16 count)
{
        int num_tc = dev_maps->num_tc;
        bool active = false;
        int tci;

        for (tci = cpu * num_tc; num_tc--; tci++) {
                int i, j;

                for (i = count, j = offset; i--; j++) {
                        if (!remove_xps_queue(dev_maps, NULL, tci, j))
                                break;
                }

                active |= i < 0;
        }

        return active;
}

static void reset_xps_maps(struct net_device *dev,
                           struct xps_dev_maps *dev_maps,
                           enum xps_map_type type)
{
        static_key_slow_dec_cpuslocked(&xps_needed);
        if (type == XPS_RXQS)
                static_key_slow_dec_cpuslocked(&xps_rxqs_needed);

        RCU_INIT_POINTER(dev->xps_maps[type], NULL);

        kfree_rcu(dev_maps, rcu);
}

static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
                           u16 offset, u16 count)
{
        struct xps_dev_maps *dev_maps;
        bool active = false;
        int i, j;

        dev_maps = xmap_dereference(dev->xps_maps[type]);
        if (!dev_maps)
                return;

        for (j = 0; j < dev_maps->nr_ids; j++)
                active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
        if (!active)
                reset_xps_maps(dev, dev_maps, type);

        if (type == XPS_CPUS) {
                for (i = offset + (count - 1); count--; i--)
                        netdev_queue_numa_node_write(
                                netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
        }
}

static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
                                   u16 count)
{
        if (!static_key_false(&xps_needed))
                return;

        cpus_read_lock();
        mutex_lock(&xps_map_mutex);

        if (static_key_false(&xps_rxqs_needed))
                clean_xps_maps(dev, XPS_RXQS, offset, count);

        clean_xps_maps(dev, XPS_CPUS, offset, count);

        mutex_unlock(&xps_map_mutex);
        cpus_read_unlock();
}

static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
{
        netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
}

static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
                                      u16 index, bool is_rxqs_map)
{
        struct xps_map *new_map;
        int alloc_len = XPS_MIN_MAP_ALLOC;
        int i, pos;

        for (pos = 0; map && pos < map->len; pos++) {
                if (map->queues[pos] != index)
                        continue;
                return map;
        }

        /* Need to add tx-queue to this CPU's/rx-queue's existing map */
        if (map) {
                if (pos < map->alloc_len)
                        return map;

                alloc_len = map->alloc_len * 2;
        }

        /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
         *  map
         */
        if (is_rxqs_map)
                new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
        else
                new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
                                       cpu_to_node(attr_index));
        if (!new_map)
                return NULL;

        for (i = 0; i < pos; i++)
                new_map->queues[i] = map->queues[i];
        new_map->alloc_len = alloc_len;
        new_map->len = pos;

        return new_map;
}

/* Copy xps maps at a given index */
static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
                              struct xps_dev_maps *new_dev_maps, int index,
                              int tc, bool skip_tc)
{
        int i, tci = index * dev_maps->num_tc;
        struct xps_map *map;

        /* copy maps belonging to foreign traffic classes */
        for (i = 0; i < dev_maps->num_tc; i++, tci++) {
                if (i == tc && skip_tc)
                        continue;

                /* fill in the new device map from the old device map */
                map = xmap_dereference(dev_maps->attr_map[tci]);
                RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
        }
}

/* Must be called under cpus_read_lock */
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
                          u16 index, enum xps_map_type type)
{
        struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
        const unsigned long *online_mask = NULL;
        bool active = false, copy = false;
        int i, j, tci, numa_node_id = -2;
        int maps_sz, num_tc = 1, tc = 0;
        struct xps_map *map, *new_map;
        unsigned int nr_ids;

        WARN_ON_ONCE(index >= dev->num_tx_queues);

        if (dev->num_tc) {
                /* Do not allow XPS on subordinate device directly */
                num_tc = dev->num_tc;
                if (num_tc < 0)
                        return -EINVAL;

                /* If queue belongs to subordinate dev use its map */
                dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;

                tc = netdev_txq_to_tc(dev, index);
                if (tc < 0)
                        return -EINVAL;
        }

        mutex_lock(&xps_map_mutex);

        dev_maps = xmap_dereference(dev->xps_maps[type]);
        if (type == XPS_RXQS) {
                maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
                nr_ids = dev->num_rx_queues;
        } else {
                maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
                if (num_possible_cpus() > 1)
                        online_mask = cpumask_bits(cpu_online_mask);
                nr_ids = nr_cpu_ids;
        }

        if (maps_sz < L1_CACHE_BYTES)
                maps_sz = L1_CACHE_BYTES;

        /* The old dev_maps could be larger or smaller than the one we're
         * setting up now, as dev->num_tc or nr_ids could have been updated in
         * between. We could try to be smart, but let's be safe instead and only
         * copy foreign traffic classes if the two map sizes match.
         */
        if (dev_maps &&
            dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
                copy = true;

        /* allocate memory for queue storage */
        for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
             j < nr_ids;) {
                if (!new_dev_maps) {
                        new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
                        if (!new_dev_maps) {
                                mutex_unlock(&xps_map_mutex);
                                return -ENOMEM;
                        }

                        new_dev_maps->nr_ids = nr_ids;
                        new_dev_maps->num_tc = num_tc;
                }

                tci = j * num_tc + tc;
                map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;

                map = expand_xps_map(map, j, index, type == XPS_RXQS);
                if (!map)
                        goto error;

                RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
        }

        if (!new_dev_maps)
                goto out_no_new_maps;

        if (!dev_maps) {
                /* Increment static keys at most once per type */
                static_key_slow_inc_cpuslocked(&xps_needed);
                if (type == XPS_RXQS)
                        static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
        }

        for (j = 0; j < nr_ids; j++) {
                bool skip_tc = false;

                tci = j * num_tc + tc;
                if (netif_attr_test_mask(j, mask, nr_ids) &&
                    netif_attr_test_online(j, online_mask, nr_ids)) {
                        /* add tx-queue to CPU/rx-queue maps */
                        int pos = 0;

                        skip_tc = true;

                        map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        while ((pos < map->len) && (map->queues[pos] != index))
                                pos++;

                        if (pos == map->len)
                                map->queues[map->len++] = index;
#ifdef CONFIG_NUMA
                        if (type == XPS_CPUS) {
                                if (numa_node_id == -2)
                                        numa_node_id = cpu_to_node(j);
                                else if (numa_node_id != cpu_to_node(j))
                                        numa_node_id = -1;
                        }
#endif
                }

                if (copy)
                        xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
                                          skip_tc);
        }

        rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);

        /* Cleanup old maps */
        if (!dev_maps)
                goto out_no_old_maps;

        for (j = 0; j < dev_maps->nr_ids; j++) {
                for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
                        map = xmap_dereference(dev_maps->attr_map[tci]);
                        if (!map)
                                continue;

                        if (copy) {
                                new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
                                if (map == new_map)
                                        continue;
                        }

                        RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
                        kfree_rcu(map, rcu);
                }
        }

        old_dev_maps = dev_maps;

out_no_old_maps:
        dev_maps = new_dev_maps;
        active = true;

out_no_new_maps:
        if (type == XPS_CPUS)
                /* update Tx queue numa node */
                netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
                                             (numa_node_id >= 0) ?
                                             numa_node_id : NUMA_NO_NODE);

        if (!dev_maps)
                goto out_no_maps;

        /* removes tx-queue from unused CPUs/rx-queues */
        for (j = 0; j < dev_maps->nr_ids; j++) {
                tci = j * dev_maps->num_tc;

                for (i = 0; i < dev_maps->num_tc; i++, tci++) {
                        if (i == tc &&
                            netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
                            netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
                                continue;

                        active |= remove_xps_queue(dev_maps,
                                                   copy ? old_dev_maps : NULL,
                                                   tci, index);
                }
        }

        if (old_dev_maps)
                kfree_rcu(old_dev_maps, rcu);

        /* free map if not active */
        if (!active)
                reset_xps_maps(dev, dev_maps, type);

out_no_maps:
        mutex_unlock(&xps_map_mutex);

        return 0;
error:
        /* remove any maps that we added */
        for (j = 0; j < nr_ids; j++) {
                for (i = num_tc, tci = j * num_tc; i--; tci++) {
                        new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        map = copy ?
                              xmap_dereference(dev_maps->attr_map[tci]) :
                              NULL;
                        if (new_map && new_map != map)
                                kfree(new_map);
                }
        }

        mutex_unlock(&xps_map_mutex);

        kfree(new_dev_maps);
        return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__netif_set_xps_queue);

int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        u16 index)
{
        int ret;

        cpus_read_lock();
        ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
        cpus_read_unlock();

        return ret;
}
EXPORT_SYMBOL(netif_set_xps_queue);

#endif
static void netdev_unbind_all_sb_channels(struct net_device *dev)
{
        struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];

        /* Unbind any subordinate channels */
        while (txq-- != &dev->_tx[0]) {
                if (txq->sb_dev)
                        netdev_unbind_sb_channel(dev, txq->sb_dev);
        }
}

void netdev_reset_tc(struct net_device *dev)
{
#ifdef CONFIG_XPS
        netif_reset_xps_queues_gt(dev, 0);
#endif
        netdev_unbind_all_sb_channels(dev);

        /* Reset TC configuration of device */
        dev->num_tc = 0;
        memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
        memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
}
EXPORT_SYMBOL(netdev_reset_tc);

int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
{
        if (tc >= dev->num_tc)
                return -EINVAL;

#ifdef CONFIG_XPS
        netif_reset_xps_queues(dev, offset, count);
#endif
        dev->tc_to_txq[tc].count = count;
        dev->tc_to_txq[tc].offset = offset;
        return 0;
}
EXPORT_SYMBOL(netdev_set_tc_queue);

int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
{
        if (num_tc > TC_MAX_QUEUE)
                return -EINVAL;

#ifdef CONFIG_XPS
        netif_reset_xps_queues_gt(dev, 0);
#endif
        netdev_unbind_all_sb_channels(dev);

        dev->num_tc = num_tc;
        return 0;
}
EXPORT_SYMBOL(netdev_set_num_tc);

void netdev_unbind_sb_channel(struct net_device *dev,
                              struct net_device *sb_dev)
{
        struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];

#ifdef CONFIG_XPS
        netif_reset_xps_queues_gt(sb_dev, 0);
#endif
        memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
        memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));

        while (txq-- != &dev->_tx[0]) {
                if (txq->sb_dev == sb_dev)
                        txq->sb_dev = NULL;
        }
}
EXPORT_SYMBOL(netdev_unbind_sb_channel);

int netdev_bind_sb_channel_queue(struct net_device *dev,
                                 struct net_device *sb_dev,
                                 u8 tc, u16 count, u16 offset)
{
        /* Make certain the sb_dev and dev are already configured */
        if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
                return -EINVAL;

        /* We cannot hand out queues we don't have */
        if ((offset + count) > dev->real_num_tx_queues)
                return -EINVAL;

        /* Record the mapping */
        sb_dev->tc_to_txq[tc].count = count;
        sb_dev->tc_to_txq[tc].offset = offset;

        /* Provide a way for Tx queue to find the tc_to_txq map or
         * XPS map for itself.
         */
        while (count--)
                netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;

        return 0;
}
EXPORT_SYMBOL(netdev_bind_sb_channel_queue);

int netdev_set_sb_channel(struct net_device *dev, u16 channel)
{
        /* Do not use a multiqueue device to represent a subordinate channel */
        if (netif_is_multiqueue(dev))
                return -ENODEV;

        /* We allow channels 1 - 32767 to be used for subordinate channels.
         * Channel 0 is meant to be "native" mode and used only to represent
         * the main root device. We allow writing 0 to reset the device back
         * to normal mode after being used as a subordinate channel.
         */
        if (channel > S16_MAX)
                return -EINVAL;

        dev->num_tc = -channel;

        return 0;
}
EXPORT_SYMBOL(netdev_set_sb_channel);

/*
 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
 * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
 */
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
        bool disabling;
        int rc;

        disabling = txq < dev->real_num_tx_queues;

        if (txq < 1 || txq > dev->num_tx_queues)
                return -EINVAL;

        if (dev->reg_state == NETREG_REGISTERED ||
            dev->reg_state == NETREG_UNREGISTERING) {
                ASSERT_RTNL();

                rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
                                                  txq);
                if (rc)
                        return rc;

                if (dev->num_tc)
                        netif_setup_tc(dev, txq);

                dev_qdisc_change_real_num_tx(dev, txq);

                dev->real_num_tx_queues = txq;

                if (disabling) {
                        synchronize_net();
                        qdisc_reset_all_tx_gt(dev, txq);
#ifdef CONFIG_XPS
                        netif_reset_xps_queues_gt(dev, txq);
#endif
                }
        } else {
                dev->real_num_tx_queues = txq;
        }

        return 0;
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);

#ifdef CONFIG_SYSFS
/**
 *        netif_set_real_num_rx_queues - set actual number of RX queues used
 *        @dev: Network device
 *        @rxq: Actual number of RX queues
 *
 *        This must be called either with the rtnl_lock held or before
 *        registration of the net device.  Returns 0 on success, or a
 *        negative error code.  If called before registration, it always
 *        succeeds.
 */
int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
{
        int rc;

        if (rxq < 1 || rxq > dev->num_rx_queues)
                return -EINVAL;

        if (dev->reg_state == NETREG_REGISTERED) {
                ASSERT_RTNL();

                rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
                                                  rxq);
                if (rc)
                        return rc;
        }

        dev->real_num_rx_queues = rxq;
        return 0;
}
EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif

/**
 *        netif_set_real_num_queues - set actual number of RX and TX queues used
 *        @dev: Network device
 *        @txq: Actual number of TX queues
 *        @rxq: Actual number of RX queues
 *
 *        Set the real number of both TX and RX queues.
 *        Does nothing if the number of queues is already correct.
 */
int netif_set_real_num_queues(struct net_device *dev,
                              unsigned int txq, unsigned int rxq)
{
        unsigned int old_rxq = dev->real_num_rx_queues;
        int err;

        if (txq < 1 || txq > dev->num_tx_queues ||
            rxq < 1 || rxq > dev->num_rx_queues)
                return -EINVAL;

        /* Start from increases, so the error path only does decreases -
         * decreases can't fail.
         */
        if (rxq > dev->real_num_rx_queues) {
                err = netif_set_real_num_rx_queues(dev, rxq);
                if (err)
                        return err;
        }
        if (txq > dev->real_num_tx_queues) {
                err = netif_set_real_num_tx_queues(dev, txq);
                if (err)
                        goto undo_rx;
        }
        if (rxq < dev->real_num_rx_queues)
                WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
        if (txq < dev->real_num_tx_queues)
                WARN_ON(netif_set_real_num_tx_queues(dev, txq));

        return 0;
undo_rx:
        WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
        return err;
}
EXPORT_SYMBOL(netif_set_real_num_queues);

/**
 * netif_set_tso_max_size() - set the max size of TSO frames supported
 * @dev:        netdev to update
 * @size:        max skb->len of a TSO frame
 *
 * Set the limit on the size of TSO super-frames the device can handle.
 * Unless explicitly set the stack will assume the value of
 * %GSO_LEGACY_MAX_SIZE.
 */
void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
{
        dev->tso_max_size = min(GSO_MAX_SIZE, size);
        if (size < READ_ONCE(dev->gso_max_size))
                netif_set_gso_max_size(dev, size);
        if (size < READ_ONCE(dev->gso_ipv4_max_size))
                netif_set_gso_ipv4_max_size(dev, size);
}
EXPORT_SYMBOL(netif_set_tso_max_size);

/**
 * netif_set_tso_max_segs() - set the max number of segs supported for TSO
 * @dev:        netdev to update
 * @segs:        max number of TCP segments
 *
 * Set the limit on the number of TCP segments the device can generate from
 * a single TSO super-frame.
 * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
 */
void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
{
        dev->tso_max_segs = segs;
        if (segs < READ_ONCE(dev->gso_max_segs))
                netif_set_gso_max_segs(dev, segs);
}
EXPORT_SYMBOL(netif_set_tso_max_segs);

/**
 * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
 * @to:                netdev to update
 * @from:        netdev from which to copy the limits
 */
void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
{
        netif_set_tso_max_size(to, from->tso_max_size);
        netif_set_tso_max_segs(to, from->tso_max_segs);
}
EXPORT_SYMBOL(netif_inherit_tso_max);

/**
 * netif_get_num_default_rss_queues - default number of RSS queues
 *
 * Default value is the number of physical cores if there are only 1 or 2, or
 * divided by 2 if there are more.
 */
int netif_get_num_default_rss_queues(void)
{
        cpumask_var_t cpus;
        int cpu, count = 0;

        if (unlikely(is_kdump_kernel() || !zalloc_cpumask_var(&cpus, GFP_KERNEL)))
                return 1;

        cpumask_copy(cpus, cpu_online_mask);
        for_each_cpu(cpu, cpus) {
                ++count;
                cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
        }
        free_cpumask_var(cpus);

        return count > 2 ? DIV_ROUND_UP(count, 2) : count;
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);

static void __netif_reschedule(struct Qdisc *q)
{
        struct softnet_data *sd;
        unsigned long flags;

        local_irq_save(flags);
        sd = this_cpu_ptr(&softnet_data);
        q->next_sched = NULL;
        *sd->output_queue_tailp = q;
        sd->output_queue_tailp = &q->next_sched;
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_restore(flags);
}

void __netif_schedule(struct Qdisc *q)
{
        if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
                __netif_reschedule(q);
}
EXPORT_SYMBOL(__netif_schedule);

struct dev_kfree_skb_cb {
        enum skb_drop_reason reason;
};

static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
{
        return (struct dev_kfree_skb_cb *)skb->cb;
}

void netif_schedule_queue(struct netdev_queue *txq)
{
        rcu_read_lock();
        if (!netif_xmit_stopped(txq)) {
                struct Qdisc *q = rcu_dereference(txq->qdisc);

                __netif_schedule(q);
        }
        rcu_read_unlock();
}
EXPORT_SYMBOL(netif_schedule_queue);

void netif_tx_wake_queue(struct netdev_queue *dev_queue)
{
        if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
                struct Qdisc *q;

                rcu_read_lock();
                q = rcu_dereference(dev_queue->qdisc);
                __netif_schedule(q);
                rcu_read_unlock();
        }
}
EXPORT_SYMBOL(netif_tx_wake_queue);

void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
        unsigned long flags;

        if (unlikely(!skb))
                return;

        if (likely(refcount_read(&skb->users) == 1)) {
                smp_rmb();
                refcount_set(&skb->users, 0);
        } else if (likely(!refcount_dec_and_test(&skb->users))) {
                return;
        }
        get_kfree_skb_cb(skb)->reason = reason;
        local_irq_save(flags);
        skb->next = __this_cpu_read(softnet_data.completion_queue);
        __this_cpu_write(softnet_data.completion_queue, skb);
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_restore(flags);
}
EXPORT_SYMBOL(dev_kfree_skb_irq_reason);

void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
        if (in_hardirq() || irqs_disabled())
                dev_kfree_skb_irq_reason(skb, reason);
        else
                kfree_skb_reason(skb, reason);
}
EXPORT_SYMBOL(dev_kfree_skb_any_reason);


/**
 * netif_device_detach - mark device as removed
 * @dev: network device
 *
 * Mark device as removed from system and therefore no longer available.
 */
void netif_device_detach(struct net_device *dev)
{
        if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
            netif_running(dev)) {
                netif_tx_stop_all_queues(dev);
        }
}
EXPORT_SYMBOL(netif_device_detach);

/**
 * netif_device_attach - mark device as attached
 * @dev: network device
 *
 * Mark device as attached from system and restart if needed.
 */
void netif_device_attach(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
            netif_running(dev)) {
                netif_tx_wake_all_queues(dev);
                __netdev_watchdog_up(dev);
        }
}
EXPORT_SYMBOL(netif_device_attach);

/*
 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
 * to be used as a distribution range.
 */
static u16 skb_tx_hash(const struct net_device *dev,
                       const struct net_device *sb_dev,
                       struct sk_buff *skb)
{
        u32 hash;
        u16 qoffset = 0;
        u16 qcount = dev->real_num_tx_queues;

        if (dev->num_tc) {
                u8 tc = netdev_get_prio_tc_map(dev, skb->priority);

                qoffset = sb_dev->tc_to_txq[tc].offset;
                qcount = sb_dev->tc_to_txq[tc].count;
                if (unlikely(!qcount)) {
                        net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
                                             sb_dev->name, qoffset, tc);
                        qoffset = 0;
                        qcount = dev->real_num_tx_queues;
                }
        }

        if (skb_rx_queue_recorded(skb)) {
                DEBUG_NET_WARN_ON_ONCE(qcount == 0);
                hash = skb_get_rx_queue(skb);
                if (hash >= qoffset)
                        hash -= qoffset;
                while (unlikely(hash >= qcount))
                        hash -= qcount;
                return hash + qoffset;
        }

        return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}

void skb_warn_bad_offload(const struct sk_buff *skb)
{
        static const netdev_features_t null_features;
        struct net_device *dev = skb->dev;
        const char *name = "";

        if (!net_ratelimit())
                return;

        if (dev) {
                if (dev->dev.parent)
                        name = dev_driver_string(dev->dev.parent);
                else
                        name = netdev_name(dev);
        }
        skb_dump(KERN_WARNING, skb, false);
        WARN(1, "%s: caps=(%pNF, %pNF)\n",
             name, dev ? &dev->features : &null_features,
             skb->sk ? &skb->sk->sk_route_caps : &null_features);
}

/*
 * Invalidate hardware checksum when packet is to be mangled, and
 * complete checksum manually on outgoing path.
 */
int skb_checksum_help(struct sk_buff *skb)
{
        __wsum csum;
        int ret = 0, offset;

        if (skb->ip_summed == CHECKSUM_COMPLETE)
                goto out_set_summed;

        if (unlikely(skb_is_gso(skb))) {
                skb_warn_bad_offload(skb);
                return -EINVAL;
        }

        /* Before computing a checksum, we should make sure no frag could
         * be modified by an external entity : checksum could be wrong.
         */
        if (skb_has_shared_frag(skb)) {
                ret = __skb_linearize(skb);
                if (ret)
                        goto out;
        }

        offset = skb_checksum_start_offset(skb);
        ret = -EINVAL;
        if (unlikely(offset >= skb_headlen(skb))) {
                DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
                WARN_ONCE(true, "offset (%d) >= skb_headlen() (%u)\n",
                          offset, skb_headlen(skb));
                goto out;
        }
        csum = skb_checksum(skb, offset, skb->len - offset, 0);

        offset += skb->csum_offset;
        if (unlikely(offset + sizeof(__sum16) > skb_headlen(skb))) {
                DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
                WARN_ONCE(true, "offset+2 (%zu) > skb_headlen() (%u)\n",
                          offset + sizeof(__sum16), skb_headlen(skb));
                goto out;
        }
        ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
        if (ret)
                goto out;

        *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
        skb->ip_summed = CHECKSUM_NONE;
out:
        return ret;
}
EXPORT_SYMBOL(skb_checksum_help);

int skb_crc32c_csum_help(struct sk_buff *skb)
{
        __le32 crc32c_csum;
        int ret = 0, offset, start;

        if (skb->ip_summed != CHECKSUM_PARTIAL)
                goto out;

        if (unlikely(skb_is_gso(skb)))
                goto out;

        /* Before computing a checksum, we should make sure no frag could
         * be modified by an external entity : checksum could be wrong.
         */
        if (unlikely(skb_has_shared_frag(skb))) {
                ret = __skb_linearize(skb);
                if (ret)
                        goto out;
        }
        start = skb_checksum_start_offset(skb);
        offset = start + offsetof(struct sctphdr, checksum);
        if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
                ret = -EINVAL;
                goto out;
        }

        ret = skb_ensure_writable(skb, offset + sizeof(__le32));
        if (ret)
                goto out;

        crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
                                                  skb->len - start, ~(__u32)0,
                                                  crc32c_csum_stub));
        *(__le32 *)(skb->data + offset) = crc32c_csum;
        skb_reset_csum_not_inet(skb);
out:
        return ret;
}

__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
        __be16 type = skb->protocol;

        /* Tunnel gso handlers can set protocol to ethernet. */
        if (type == htons(ETH_P_TEB)) {
                struct ethhdr *eth;

                if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
                        return 0;

                eth = (struct ethhdr *)skb->data;
                type = eth->h_proto;
        }

        return vlan_get_protocol_and_depth(skb, type, depth);
}


/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
        netdev_err(dev, "hw csum failure\n");
        skb_dump(KERN_ERR, skb, true);
        dump_stack();
}

void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
        DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
}
EXPORT_SYMBOL(netdev_rx_csum_fault);
#endif

/* XXX: check that highmem exists at all on the given machine. */
static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
        int i;

        if (!(dev->features & NETIF_F_HIGHDMA)) {
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

                        if (PageHighMem(skb_frag_page(frag)))
                                return 1;
                }
        }
#endif
        return 0;
}

/* If MPLS offload request, verify we are testing hardware MPLS features
 * instead of standard features for the netdev.
 */
#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
static netdev_features_t net_mpls_features(struct sk_buff *skb,
                                           netdev_features_t features,
                                           __be16 type)
{
        if (eth_p_mpls(type))
                features &= skb->dev->mpls_features;

        return features;
}
#else
static netdev_features_t net_mpls_features(struct sk_buff *skb,
                                           netdev_features_t features,
                                           __be16 type)
{
        return features;
}
#endif

static netdev_features_t harmonize_features(struct sk_buff *skb,
        netdev_features_t features)
{
        __be16 type;

        type = skb_network_protocol(skb, NULL);
        features = net_mpls_features(skb, features, type);

        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, type)) {
                features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
        }
        if (illegal_highdma(skb->dev, skb))
                features &= ~NETIF_F_SG;

        return features;
}

netdev_features_t passthru_features_check(struct sk_buff *skb,
                                          struct net_device *dev,
                                          netdev_features_t features)
{
        return features;
}
EXPORT_SYMBOL(passthru_features_check);

static netdev_features_t dflt_features_check(struct sk_buff *skb,
                                             struct net_device *dev,
                                             netdev_features_t features)
{
        return vlan_features_check(skb, features);
}

static netdev_features_t gso_features_check(const struct sk_buff *skb,
                                            struct net_device *dev,
                                            netdev_features_t features)
{
        u16 gso_segs = skb_shinfo(skb)->gso_segs;

        if (gso_segs > READ_ONCE(dev->gso_max_segs))
                return features & ~NETIF_F_GSO_MASK;

        if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
                return features & ~NETIF_F_GSO_MASK;

        if (!skb_shinfo(skb)->gso_type) {
                skb_warn_bad_offload(skb);
                return features & ~NETIF_F_GSO_MASK;
        }

        /* Support for GSO partial features requires software
         * intervention before we can actually process the packets
         * so we need to strip support for any partial features now
         * and we can pull them back in after we have partially
         * segmented the frame.
         */
        if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
                features &= ~dev->gso_partial_features;

        /* Make sure to clear the IPv4 ID mangling feature if the
         * IPv4 header has the potential to be fragmented.
         */
        if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
                struct iphdr *iph = skb->encapsulation ?
                                    inner_ip_hdr(skb) : ip_hdr(skb);

                if (!(iph->frag_off & htons(IP_DF)))
                        features &= ~NETIF_F_TSO_MANGLEID;
        }

        return features;
}

netdev_features_t netif_skb_features(struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        netdev_features_t features = dev->features;

        if (skb_is_gso(skb))
                features = gso_features_check(skb, dev, features);

        /* If encapsulation offload request, verify we are testing
         * hardware encapsulation features instead of standard
         * features for the netdev
         */
        if (skb->encapsulation)
                features &= dev->hw_enc_features;

        if (skb_vlan_tagged(skb))
                features = netdev_intersect_features(features,
                                                     dev->vlan_features |
                                                     NETIF_F_HW_VLAN_CTAG_TX |
                                                     NETIF_F_HW_VLAN_STAG_TX);

        if (dev->netdev_ops->ndo_features_check)
                features &= dev->netdev_ops->ndo_features_check(skb, dev,
                                                                features);
        else
                features &= dflt_features_check(skb, dev, features);

        return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);

static int xmit_one(struct sk_buff *skb, struct net_device *dev,
                    struct netdev_queue *txq, bool more)
{
        unsigned int len;
        int rc;

        if (dev_nit_active(dev))
                dev_queue_xmit_nit(skb, dev);

        len = skb->len;
        trace_net_dev_start_xmit(skb, dev);
        rc = netdev_start_xmit(skb, dev, txq, more);
        trace_net_dev_xmit(skb, rc, dev, len);

        return rc;
}

struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
                                    struct netdev_queue *txq, int *ret)
{
        struct sk_buff *skb = first;
        int rc = NETDEV_TX_OK;

        while (skb) {
                struct sk_buff *next = skb->next;

                skb_mark_not_on_list(skb);
                rc = xmit_one(skb, dev, txq, next != NULL);
                if (unlikely(!dev_xmit_complete(rc))) {
                        skb->next = next;
                        goto out;
                }

                skb = next;
                if (netif_tx_queue_stopped(txq) && skb) {
                        rc = NETDEV_TX_BUSY;
                        break;
                }
        }

out:
        *ret = rc;
        return skb;
}

static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
                                          netdev_features_t features)
{
        if (skb_vlan_tag_present(skb) &&
            !vlan_hw_offload_capable(features, skb->vlan_proto))
                skb = __vlan_hwaccel_push_inside(skb);
        return skb;
}

int skb_csum_hwoffload_help(struct sk_buff *skb,
                            const netdev_features_t features)
{
        if (unlikely(skb_csum_is_sctp(skb)))
                return !!(features & NETIF_F_SCTP_CRC) ? 0 :
                        skb_crc32c_csum_help(skb);

        if (features & NETIF_F_HW_CSUM)
                return 0;

        if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
                switch (skb->csum_offset) {
                case offsetof(struct tcphdr, check):
                case offsetof(struct udphdr, check):
                        return 0;
                }
        }

        return skb_checksum_help(skb);
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);

static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
{
        netdev_features_t features;

        features = netif_skb_features(skb);
        skb = validate_xmit_vlan(skb, features);
        if (unlikely(!skb))
                goto out_null;

        skb = sk_validate_xmit_skb(skb, dev);
        if (unlikely(!skb))
                goto out_null;

        if (netif_needs_gso(skb, features)) {
                struct sk_buff *segs;

                segs = skb_gso_segment(skb, features);
                if (IS_ERR(segs)) {
                        goto out_kfree_skb;
                } else if (segs) {
                        consume_skb(skb);
                        skb = segs;
                }
        } else {
                if (skb_needs_linearize(skb, features) &&
                    __skb_linearize(skb))
                        goto out_kfree_skb;

                /* If packet is not checksummed and device does not
                 * support checksumming for this protocol, complete
                 * checksumming here.
                 */
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        if (skb->encapsulation)
                                skb_set_inner_transport_header(skb,
                                                               skb_checksum_start_offset(skb));
                        else
                                skb_set_transport_header(skb,
                                                         skb_checksum_start_offset(skb));
                        if (skb_csum_hwoffload_help(skb, features))
                                goto out_kfree_skb;
                }
        }

        skb = validate_xmit_xfrm(skb, features, again);

        return skb;

out_kfree_skb:
        kfree_skb(skb);
out_null:
        dev_core_stats_tx_dropped_inc(dev);
        return NULL;
}

struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
{
        struct sk_buff *next, *head = NULL, *tail;

        for (; skb != NULL; skb = next) {
                next = skb->next;
                skb_mark_not_on_list(skb);

                /* in case skb wont be segmented, point to itself */
                skb->prev = skb;

                skb = validate_xmit_skb(skb, dev, again);
                if (!skb)
                        continue;

                if (!head)
                        head = skb;
                else
                        tail->next = skb;
                /* If skb was segmented, skb->prev points to
                 * the last segment. If not, it still contains skb.
                 */
                tail = skb->prev;
        }
        return head;
}
EXPORT_SYMBOL_GPL(validate_xmit_skb_list);

static void qdisc_pkt_len_init(struct sk_buff *skb)
{
        const struct skb_shared_info *shinfo = skb_shinfo(skb);

        qdisc_skb_cb(skb)->pkt_len = skb->len;

        /* To get more precise estimation of bytes sent on wire,
         * we add to pkt_len the headers size of all segments
         */
        if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
                u16 gso_segs = shinfo->gso_segs;
                unsigned int hdr_len;

                /* mac layer + network layer */
                hdr_len = skb_transport_offset(skb);

                /* + transport layer */
                if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
                        const struct tcphdr *th;
                        struct tcphdr _tcphdr;

                        th = skb_header_pointer(skb, hdr_len,
                                                sizeof(_tcphdr), &_tcphdr);
                        if (likely(th))
                                hdr_len += __tcp_hdrlen(th);
                } else {
                        struct udphdr _udphdr;

                        if (skb_header_pointer(skb, hdr_len,
                                               sizeof(_udphdr), &_udphdr))
                                hdr_len += sizeof(struct udphdr);
                }

                if (shinfo->gso_type & SKB_GSO_DODGY)
                        gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
                                                shinfo->gso_size);

                qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
        }
}

static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
                             struct sk_buff **to_free,
                             struct netdev_queue *txq)
{
        int rc;

        rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
        if (rc == NET_XMIT_SUCCESS)
                trace_qdisc_enqueue(q, txq, skb);
        return rc;
}

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                                 struct net_device *dev,
                                 struct netdev_queue *txq)
{
        spinlock_t *root_lock = qdisc_lock(q);
        struct sk_buff *to_free = NULL;
        bool contended;
        int rc;

        qdisc_calculate_pkt_len(skb, q);

        tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);

        if (q->flags & TCQ_F_NOLOCK) {
                if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
                    qdisc_run_begin(q)) {
                        /* Retest nolock_qdisc_is_empty() within the protection
                         * of q->seqlock to protect from racing with requeuing.
                         */
                        if (unlikely(!nolock_qdisc_is_empty(q))) {
                                rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
                                __qdisc_run(q);
                                qdisc_run_end(q);

                                goto no_lock_out;
                        }

                        qdisc_bstats_cpu_update(q, skb);
                        if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
                            !nolock_qdisc_is_empty(q))
                                __qdisc_run(q);

                        qdisc_run_end(q);
                        return NET_XMIT_SUCCESS;
                }

                rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
                qdisc_run(q);

no_lock_out:
                if (unlikely(to_free))
                        kfree_skb_list_reason(to_free,
                                              tcf_get_drop_reason(to_free));
                return rc;
        }

        if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
                kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
                return NET_XMIT_DROP;
        }
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
         * This permits qdisc->running owner to get the lock more
         * often and dequeue packets faster.
         * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit
         * and then other tasks will only enqueue packets. The packets will be
         * sent after the qdisc owner is scheduled again. To prevent this
         * scenario the task always serialize on the lock.
         */
        contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT);
        if (unlikely(contended))
                spin_lock(&q->busylock);

        spin_lock(root_lock);
        if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
                __qdisc_drop(skb, &to_free);
                rc = NET_XMIT_DROP;
        } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
                   qdisc_run_begin(q)) {
                /*
                 * This is a work-conserving queue; there are no old skbs
                 * waiting to be sent out; and the qdisc is not running -
                 * xmit the skb directly.
                 */

                qdisc_bstats_update(q, skb);

                if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
                                contended = false;
                        }
                        __qdisc_run(q);
                }

                qdisc_run_end(q);
                rc = NET_XMIT_SUCCESS;
        } else {
                WRITE_ONCE(q->owner, smp_processor_id());
                rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
                WRITE_ONCE(q->owner, -1);
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
                                contended = false;
                        }
                        __qdisc_run(q);
                        qdisc_run_end(q);
                }
        }
        spin_unlock(root_lock);
        if (unlikely(to_free))
                kfree_skb_list_reason(to_free,
                                      tcf_get_drop_reason(to_free));
        if (unlikely(contended))
                spin_unlock(&q->busylock);
        return rc;
}

#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
static void skb_update_prio(struct sk_buff *skb)
{
        const struct netprio_map *map;
        const struct sock *sk;
        unsigned int prioidx;

        if (skb->priority)
                return;
        map = rcu_dereference_bh(skb->dev->priomap);
        if (!map)
                return;
        sk = skb_to_full_sk(skb);
        if (!sk)
                return;

        prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);

        if (prioidx < map->priomap_len)
                skb->priority = map->priomap[prioidx];
}
#else
#define skb_update_prio(skb)
#endif

/**
 *        dev_loopback_xmit - loop back @skb
 *        @net: network namespace this loopback is happening in
 *        @sk:  sk needed to be a netfilter okfn
 *        @skb: buffer to transmit
 */
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        skb_reset_mac_header(skb);
        __skb_pull(skb, skb_network_offset(skb));
        skb->pkt_type = PACKET_LOOPBACK;
        if (skb->ip_summed == CHECKSUM_NONE)
                skb->ip_summed = CHECKSUM_UNNECESSARY;
        DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb));
        skb_dst_force(skb);
        netif_rx(skb);
        return 0;
}
EXPORT_SYMBOL(dev_loopback_xmit);

#ifdef CONFIG_NET_EGRESS
static struct netdev_queue *
netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
{
        int qm = skb_get_queue_mapping(skb);

        return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
}

static bool netdev_xmit_txqueue_skipped(void)
{
        return __this_cpu_read(softnet_data.xmit.skip_txqueue);
}

void netdev_xmit_skip_txqueue(bool skip)
{
        __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
}
EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */

#ifdef CONFIG_NET_XGRESS
static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
                  enum skb_drop_reason *drop_reason)
{
        int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
        struct tcf_result res;

        if (!miniq)
                return ret;

        tc_skb_cb(skb)->mru = 0;
        tc_skb_cb(skb)->post_ct = false;
        tcf_set_drop_reason(skb, *drop_reason);

        mini_qdisc_bstats_cpu_update(miniq, skb);
        ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
        /* Only tcf related quirks below. */
        switch (ret) {
        case TC_ACT_SHOT:
                *drop_reason = tcf_get_drop_reason(skb);
                mini_qdisc_qstats_cpu_drop(miniq);
                break;
        case TC_ACT_OK:
        case TC_ACT_RECLASSIFY:
                skb->tc_index = TC_H_MIN(res.classid);
                break;
        }
#endif /* CONFIG_NET_CLS_ACT */
        return ret;
}

static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);

void tcx_inc(void)
{
        static_branch_inc(&tcx_needed_key);
}

void tcx_dec(void)
{
        static_branch_dec(&tcx_needed_key);
}

static __always_inline enum tcx_action_base
tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
        const bool needs_mac)
{
        const struct bpf_mprog_fp *fp;
        const struct bpf_prog *prog;
        int ret = TCX_NEXT;

        if (needs_mac)
                __skb_push(skb, skb->mac_len);
        bpf_mprog_foreach_prog(entry, fp, prog) {
                bpf_compute_data_pointers(skb);
                ret = bpf_prog_run(prog, skb);
                if (ret != TCX_NEXT)
                        break;
        }
        if (needs_mac)
                __skb_pull(skb, skb->mac_len);
        return tcx_action_code(skb, ret);
}

static __always_inline struct sk_buff *
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
                   struct net_device *orig_dev, bool *another)
{
        struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
        enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
        int sch_ret;

        if (!entry)
                return skb;
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
        }

        qdisc_skb_cb(skb)->pkt_len = skb->len;
        tcx_set_ingress(skb, true);

        if (static_branch_unlikely(&tcx_needed_key)) {
                sch_ret = tcx_run(entry, skb, true);
                if (sch_ret != TC_ACT_UNSPEC)
                        goto ingress_verdict;
        }
        sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
ingress_verdict:
        switch (sch_ret) {
        case TC_ACT_REDIRECT:
                /* skb_mac_header check was done by BPF, so we can safely
                 * push the L2 header back before redirecting to another
                 * netdev.
                 */
                __skb_push(skb, skb->mac_len);
                if (skb_do_redirect(skb) == -EAGAIN) {
                        __skb_pull(skb, skb->mac_len);
                        *another = true;
                        break;
                }
                *ret = NET_RX_SUCCESS;
                return NULL;
        case TC_ACT_SHOT:
                kfree_skb_reason(skb, drop_reason);
                *ret = NET_RX_DROP;
                return NULL;
        /* used by tc_run */
        case TC_ACT_STOLEN:
        case TC_ACT_QUEUED:
        case TC_ACT_TRAP:
                consume_skb(skb);
                fallthrough;
        case TC_ACT_CONSUMED:
                *ret = NET_RX_SUCCESS;
                return NULL;
        }

        return skb;
}

static __always_inline struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
        struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
        enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
        int sch_ret;

        if (!entry)
                return skb;

        /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
         * already set by the caller.
         */
        if (static_branch_unlikely(&tcx_needed_key)) {
                sch_ret = tcx_run(entry, skb, false);
                if (sch_ret != TC_ACT_UNSPEC)
                        goto egress_verdict;
        }
        sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
egress_verdict:
        switch (sch_ret) {
        case TC_ACT_REDIRECT:
                /* No need to push/pop skb's mac_header here on egress! */
                skb_do_redirect(skb);
                *ret = NET_XMIT_SUCCESS;
                return NULL;
        case TC_ACT_SHOT:
                kfree_skb_reason(skb, drop_reason);
                *ret = NET_XMIT_DROP;
                return NULL;
        /* used by tc_run */
        case TC_ACT_STOLEN:
        case TC_ACT_QUEUED:
        case TC_ACT_TRAP:
                consume_skb(skb);
                fallthrough;
        case TC_ACT_CONSUMED:
                *ret = NET_XMIT_SUCCESS;
                return NULL;
        }

        return skb;
}
#else
static __always_inline struct sk_buff *
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
                   struct net_device *orig_dev, bool *another)
{
        return skb;
}

static __always_inline struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
        return skb;
}
#endif /* CONFIG_NET_XGRESS */

#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
                               struct xps_dev_maps *dev_maps, unsigned int tci)
{
        int tc = netdev_get_prio_tc_map(dev, skb->priority);
        struct xps_map *map;
        int queue_index = -1;

        if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
                return queue_index;

        tci *= dev_maps->num_tc;
        tci += tc;

        map = rcu_dereference(dev_maps->attr_map[tci]);
        if (map) {
                if (map->len == 1)
                        queue_index = map->queues[0];
                else
                        queue_index = map->queues[reciprocal_scale(
                                                skb_get_hash(skb), map->len)];
                if (unlikely(queue_index >= dev->real_num_tx_queues))
                        queue_index = -1;
        }
        return queue_index;
}
#endif

static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
                         struct sk_buff *skb)
{
#ifdef CONFIG_XPS
        struct xps_dev_maps *dev_maps;
        struct sock *sk = skb->sk;
        int queue_index = -1;

        if (!static_key_false(&xps_needed))
                return -1;

        rcu_read_lock();
        if (!static_key_false(&xps_rxqs_needed))
                goto get_cpus_map;

        dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
        if (dev_maps) {
                int tci = sk_rx_queue_get(sk);

                if (tci >= 0)
                        queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
                                                          tci);
        }

get_cpus_map:
        if (queue_index < 0) {
                dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
                if (dev_maps) {
                        unsigned int tci = skb->sender_cpu - 1;

                        queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
                                                          tci);
                }
        }
        rcu_read_unlock();

        return queue_index;
#else
        return -1;
#endif
}

u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev)
{
        return 0;
}
EXPORT_SYMBOL(dev_pick_tx_zero);

u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
                       struct net_device *sb_dev)
{
        return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
}
EXPORT_SYMBOL(dev_pick_tx_cpu_id);

u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev)
{
        struct sock *sk = skb->sk;
        int queue_index = sk_tx_queue_get(sk);

        sb_dev = sb_dev ? : dev;

        if (queue_index < 0 || skb->ooo_okay ||
            queue_index >= dev->real_num_tx_queues) {
                int new_index = get_xps_queue(dev, sb_dev, skb);

                if (new_index < 0)
                        new_index = skb_tx_hash(dev, sb_dev, skb);

                if (queue_index != new_index && sk &&
                    sk_fullsock(sk) &&
                    rcu_access_pointer(sk->sk_dst_cache))
                        sk_tx_queue_set(sk, new_index);

                queue_index = new_index;
        }

        return queue_index;
}
EXPORT_SYMBOL(netdev_pick_tx);

struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
                                         struct sk_buff *skb,
                                         struct net_device *sb_dev)
{
        int queue_index = 0;

#ifdef CONFIG_XPS
        u32 sender_cpu = skb->sender_cpu - 1;

        if (sender_cpu >= (u32)NR_CPUS)
                skb->sender_cpu = raw_smp_processor_id() + 1;
#endif

        if (dev->real_num_tx_queues != 1) {
                const struct net_device_ops *ops = dev->netdev_ops;

                if (ops->ndo_select_queue)
                        queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
                else
                        queue_index = netdev_pick_tx(dev, skb, sb_dev);

                queue_index = netdev_cap_txqueue(dev, queue_index);
        }

        skb_set_queue_mapping(skb, queue_index);
        return netdev_get_tx_queue(dev, queue_index);
}

/**
 * __dev_queue_xmit() - transmit a buffer
 * @skb:        buffer to transmit
 * @sb_dev:        suboordinate device used for L2 forwarding offload
 *
 * Queue a buffer for transmission to a network device. The caller must
 * have set the device and priority and built the buffer before calling
 * this function. The function can be called from an interrupt.
 *
 * When calling this method, interrupts MUST be enabled. This is because
 * the BH enable code must have IRQs enabled so that it will not deadlock.
 *
 * Regardless of the return value, the skb is consumed, so it is currently
 * difficult to retry a send to this method. (You can bump the ref count
 * before sending to hold a reference for retry if you are careful.)
 *
 * Return:
 * * 0                                - buffer successfully transmitted
 * * positive qdisc return code        - NET_XMIT_DROP etc.
 * * negative errno                - other errors
 */
int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
        struct net_device *dev = skb->dev;
        struct netdev_queue *txq = NULL;
        struct Qdisc *q;
        int rc = -ENOMEM;
        bool again = false;

        skb_reset_mac_header(skb);
        skb_assert_len(skb);

        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
                __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);

        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
         */
        rcu_read_lock_bh();

        skb_update_prio(skb);

        qdisc_pkt_len_init(skb);
        tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
        if (static_branch_unlikely(&egress_needed_key)) {
                if (nf_hook_egress_active()) {
                        skb = nf_hook_egress(skb, &rc, dev);
                        if (!skb)
                                goto out;
                }

                netdev_xmit_skip_txqueue(false);

                nf_skip_egress(skb, true);
                skb = sch_handle_egress(skb, &rc, dev);
                if (!skb)
                        goto out;
                nf_skip_egress(skb, false);

                if (netdev_xmit_txqueue_skipped())
                        txq = netdev_tx_queue_mapping(dev, skb);
        }
#endif
        /* If device/qdisc don't need skb->dst, release it right now while
         * its hot in this cpu cache.
         */
        if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
                skb_dst_drop(skb);
        else
                skb_dst_force(skb);

        if (!txq)
                txq = netdev_core_pick_tx(dev, skb, sb_dev);

        q = rcu_dereference_bh(txq->qdisc);

        trace_net_dev_queue(skb);
        if (q->enqueue) {
                rc = __dev_xmit_skb(skb, q, dev, txq);
                goto out;
        }

        /* The device has no queue. Common case for software devices:
         * loopback, all the sorts of tunnels...

         * Really, it is unlikely that netif_tx_lock protection is necessary
         * here.  (f.e. loopback and IP tunnels are clean ignoring statistics
         * counters.)
         * However, it is possible, that they rely on protection
         * made by us here.

         * Check this and shot the lock. It is not prone from deadlocks.
         *Either shot noqueue qdisc, it is even simpler 8)
         */
        if (dev->flags & IFF_UP) {
                int cpu = smp_processor_id(); /* ok because BHs are off */

                /* Other cpus might concurrently change txq->xmit_lock_owner
                 * to -1 or to their cpu id, but not to our id.
                 */
                if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
                        if (dev_xmit_recursion())
                                goto recursion_alert;

                        skb = validate_xmit_skb(skb, dev, &again);
                        if (!skb)
                                goto out;

                        HARD_TX_LOCK(dev, txq, cpu);

                        if (!netif_xmit_stopped(txq)) {
                                dev_xmit_recursion_inc();
                                skb = dev_hard_start_xmit(skb, dev, txq, &rc);
                                dev_xmit_recursion_dec();
                                if (dev_xmit_complete(rc)) {
                                        HARD_TX_UNLOCK(dev, txq);
                                        goto out;
                                }
                        }
                        HARD_TX_UNLOCK(dev, txq);
                        net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
                                             dev->name);
                } else {
                        /* Recursion is detected! It is possible,
                         * unfortunately
                         */
recursion_alert:
                        net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
                                             dev->name);
                }
        }

        rc = -ENETDOWN;
        rcu_read_unlock_bh();

        dev_core_stats_tx_dropped_inc(dev);
        kfree_skb_list(skb);
        return rc;
out:
        rcu_read_unlock_bh();
        return rc;
}
EXPORT_SYMBOL(__dev_queue_xmit);

int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
        struct net_device *dev = skb->dev;
        struct sk_buff *orig_skb = skb;
        struct netdev_queue *txq;
        int ret = NETDEV_TX_BUSY;
        bool again = false;

        if (unlikely(!netif_running(dev) ||
                     !netif_carrier_ok(dev)))
                goto drop;

        skb = validate_xmit_skb_list(skb, dev, &again);
        if (skb != orig_skb)
                goto drop;

        skb_set_queue_mapping(skb, queue_id);
        txq = skb_get_tx_queue(dev, skb);

        local_bh_disable();

        dev_xmit_recursion_inc();
        HARD_TX_LOCK(dev, txq, smp_processor_id());
        if (!netif_xmit_frozen_or_drv_stopped(txq))
                ret = netdev_start_xmit(skb, dev, txq, false);
        HARD_TX_UNLOCK(dev, txq);
        dev_xmit_recursion_dec();

        local_bh_enable();
        return ret;
drop:
        dev_core_stats_tx_dropped_inc(dev);
        kfree_skb_list(skb);
        return NET_XMIT_DROP;
}
EXPORT_SYMBOL(__dev_direct_xmit);

/*************************************************************************
 *                        Receiver routines
 *************************************************************************/

unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64;           /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */

/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
                                     struct napi_struct *napi)
{
        struct task_struct *thread;

        lockdep_assert_irqs_disabled();

        if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
                /* Paired with smp_mb__before_atomic() in
                 * napi_enable()/dev_set_threaded().
                 * Use READ_ONCE() to guarantee a complete
                 * read on napi->thread. Only call
                 * wake_up_process() when it's not NULL.
                 */
                thread = READ_ONCE(napi->thread);
                if (thread) {
                        /* Avoid doing set_bit() if the thread is in
                         * INTERRUPTIBLE state, cause napi_thread_wait()
                         * makes sure to proceed with napi polling
                         * if the thread is explicitly woken from here.
                         */
                        if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
                                set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
                        wake_up_process(thread);
                        return;
                }
        }

        list_add_tail(&napi->poll_list, &sd->poll_list);
        WRITE_ONCE(napi->list_owner, smp_processor_id());
        /* If not called from net_rx_action()
         * we have to raise NET_RX_SOFTIRQ.
         */
        if (!sd->in_net_rx_action)
                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
}

#ifdef CONFIG_RPS

struct static_key_false rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
struct static_key_false rfs_needed __read_mostly;
EXPORT_SYMBOL(rfs_needed);

static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
            struct rps_dev_flow *rflow, u16 next_cpu)
{
        if (next_cpu < nr_cpu_ids) {
#ifdef CONFIG_RFS_ACCEL
                struct netdev_rx_queue *rxqueue;
                struct rps_dev_flow_table *flow_table;
                struct rps_dev_flow *old_rflow;
                u32 flow_id;
                u16 rxq_index;
                int rc;

                /* Should we steer this flow to a different hardware queue? */
                if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
                    !(dev->features & NETIF_F_NTUPLE))
                        goto out;
                rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
                if (rxq_index == skb_get_rx_queue(skb))
                        goto out;

                rxqueue = dev->_rx + rxq_index;
                flow_table = rcu_dereference(rxqueue->rps_flow_table);
                if (!flow_table)
                        goto out;
                flow_id = skb_get_hash(skb) & flow_table->mask;
                rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
                                                        rxq_index, flow_id);
                if (rc < 0)
                        goto out;
                old_rflow = rflow;
                rflow = &flow_table->flows[flow_id];
                rflow->filter = rc;
                if (old_rflow->filter == rflow->filter)
                        old_rflow->filter = RPS_NO_FILTER;
        out:
#endif
                rflow->last_qtail =
                        per_cpu(softnet_data, next_cpu).input_queue_head;
        }

        rflow->cpu = next_cpu;
        return rflow;
}

/*
 * get_rps_cpu is called from netif_receive_skb and returns the target
 * CPU from the RPS map of the receiving queue for a given skb.
 * rcu_read_lock must be held on entry.
 */
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp)
{
        const struct rps_sock_flow_table *sock_flow_table;
        struct netdev_rx_queue *rxqueue = dev->_rx;
        struct rps_dev_flow_table *flow_table;
        struct rps_map *map;
        int cpu = -1;
        u32 tcpu;
        u32 hash;

        if (skb_rx_queue_recorded(skb)) {
                u16 index = skb_get_rx_queue(skb);

                if (unlikely(index >= dev->real_num_rx_queues)) {
                        WARN_ONCE(dev->real_num_rx_queues > 1,
                                  "%s received packet on queue %u, but number "
                                  "of RX queues is %u\n",
                                  dev->name, index, dev->real_num_rx_queues);
                        goto done;
                }
                rxqueue += index;
        }

        /* Avoid computing hash if RFS/RPS is not active for this rxqueue */

        flow_table = rcu_dereference(rxqueue->rps_flow_table);
        map = rcu_dereference(rxqueue->rps_map);
        if (!flow_table && !map)
                goto done;

        skb_reset_network_header(skb);
        hash = skb_get_hash(skb);
        if (!hash)
                goto done;

        sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
        if (flow_table && sock_flow_table) {
                struct rps_dev_flow *rflow;
                u32 next_cpu;
                u32 ident;

                /* First check into global flow table if there is a match.
                 * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
                 */
                ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
                if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask)
                        goto try_rps;

                next_cpu = ident & net_hotdata.rps_cpu_mask;

                /* OK, now we know there is a match,
                 * we can look at the local (per receive queue) flow table
                 */
                rflow = &flow_table->flows[hash & flow_table->mask];
                tcpu = rflow->cpu;

                /*
                 * If the desired CPU (where last recvmsg was done) is
                 * different from current CPU (one in the rx-queue flow
                 * table entry), switch if one of the following holds:
                 *   - Current CPU is unset (>= nr_cpu_ids).
                 *   - Current CPU is offline.
                 *   - The current CPU's queue tail has advanced beyond the
                 *     last packet that was enqueued using this table entry.
                 *     This guarantees that all previous packets for the flow
                 *     have been dequeued, thus preserving in order delivery.
                 */
                if (unlikely(tcpu != next_cpu) &&
                    (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
                     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
                      rflow->last_qtail)) >= 0)) {
                        tcpu = next_cpu;
                        rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
                }

                if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
                        *rflowp = rflow;
                        cpu = tcpu;
                        goto done;
                }
        }

try_rps:

        if (map) {
                tcpu = map->cpus[reciprocal_scale(hash, map->len)];
                if (cpu_online(tcpu)) {
                        cpu = tcpu;
                        goto done;
                }
        }

done:
        return cpu;
}

#ifdef CONFIG_RFS_ACCEL

/**
 * rps_may_expire_flow - check whether an RFS hardware filter may be removed
 * @dev: Device on which the filter was set
 * @rxq_index: RX queue index
 * @flow_id: Flow ID passed to ndo_rx_flow_steer()
 * @filter_id: Filter ID returned by ndo_rx_flow_steer()
 *
 * Drivers that implement ndo_rx_flow_steer() should periodically call
 * this function for each installed filter and remove the filters for
 * which it returns %true.
 */
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
                         u32 flow_id, u16 filter_id)
{
        struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
        struct rps_dev_flow_table *flow_table;
        struct rps_dev_flow *rflow;
        bool expire = true;
        unsigned int cpu;

        rcu_read_lock();
        flow_table = rcu_dereference(rxqueue->rps_flow_table);
        if (flow_table && flow_id <= flow_table->mask) {
                rflow = &flow_table->flows[flow_id];
                cpu = READ_ONCE(rflow->cpu);
                if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
                    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
                           rflow->last_qtail) <
                     (int)(10 * flow_table->mask)))
                        expire = false;
        }
        rcu_read_unlock();
        return expire;
}
EXPORT_SYMBOL(rps_may_expire_flow);

#endif /* CONFIG_RFS_ACCEL */

/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
        struct softnet_data *sd = data;

        ____napi_schedule(sd, &sd->backlog);
        sd->received_rps++;
}

#endif /* CONFIG_RPS */

/* Called from hardirq (IPI) context */
static void trigger_rx_softirq(void *data)
{
        struct softnet_data *sd = data;

        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        smp_store_release(&sd->defer_ipi_scheduled, 0);
}

/*
 * After we queued a packet into sd->input_pkt_queue,
 * we need to make sure this queue is serviced soon.
 *
 * - If this is another cpu queue, link it to our rps_ipi_list,
 *   and make sure we will process rps_ipi_list from net_rx_action().
 *
 * - If this is our own queue, NAPI schedule our backlog.
 *   Note that this also raises NET_RX_SOFTIRQ.
 */
static void napi_schedule_rps(struct softnet_data *sd)
{
        struct softnet_data *mysd = this_cpu_ptr(&softnet_data);

#ifdef CONFIG_RPS
        if (sd != mysd) {
                sd->rps_ipi_next = mysd->rps_ipi_list;
                mysd->rps_ipi_list = sd;

                /* If not called from net_rx_action() or napi_threaded_poll()
                 * we have to raise NET_RX_SOFTIRQ.
                 */
                if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
                        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
                return;
        }
#endif /* CONFIG_RPS */
        __napi_schedule_irqoff(&mysd->backlog);
}

#ifdef CONFIG_NET_FLOW_LIMIT
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
#endif

static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
{
#ifdef CONFIG_NET_FLOW_LIMIT
        struct sd_flow_limit *fl;
        struct softnet_data *sd;
        unsigned int old_flow, new_flow;

        if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1))
                return false;

        sd = this_cpu_ptr(&softnet_data);

        rcu_read_lock();
        fl = rcu_dereference(sd->flow_limit);
        if (fl) {
                new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
                old_flow = fl->history[fl->history_head];
                fl->history[fl->history_head] = new_flow;

                fl->history_head++;
                fl->history_head &= FLOW_LIMIT_HISTORY - 1;

                if (likely(fl->buckets[old_flow]))
                        fl->buckets[old_flow]--;

                if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
                        fl->count++;
                        rcu_read_unlock();
                        return true;
                }
        }
        rcu_read_unlock();
#endif
        return false;
}

/*
 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
 * queue (may be a remote CPU queue).
 */
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
                              unsigned int *qtail)
{
        enum skb_drop_reason reason;
        struct softnet_data *sd;
        unsigned long flags;
        unsigned int qlen;

        reason = SKB_DROP_REASON_NOT_SPECIFIED;
        sd = &per_cpu(softnet_data, cpu);

        rps_lock_irqsave(sd, &flags);
        if (!netif_running(skb->dev))
                goto drop;
        qlen = skb_queue_len(&sd->input_pkt_queue);
        if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
            !skb_flow_limit(skb, qlen)) {
                if (qlen) {
enqueue:
                        __skb_queue_tail(&sd->input_pkt_queue, skb);
                        input_queue_tail_incr_save(sd, qtail);
                        rps_unlock_irq_restore(sd, &flags);
                        return NET_RX_SUCCESS;
                }

                /* Schedule NAPI for backlog device
                 * We can use non atomic operation since we own the queue lock
                 */
                if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
                        napi_schedule_rps(sd);
                goto enqueue;
        }
        reason = SKB_DROP_REASON_CPU_BACKLOG;

drop:
        sd->dropped++;
        rps_unlock_irq_restore(sd, &flags);

        dev_core_stats_rx_dropped_inc(skb->dev);
        kfree_skb_reason(skb, reason);
        return NET_RX_DROP;
}

static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        struct netdev_rx_queue *rxqueue;

        rxqueue = dev->_rx;

        if (skb_rx_queue_recorded(skb)) {
                u16 index = skb_get_rx_queue(skb);

                if (unlikely(index >= dev->real_num_rx_queues)) {
                        WARN_ONCE(dev->real_num_rx_queues > 1,
                                  "%s received packet on queue %u, but number "
                                  "of RX queues is %u\n",
                                  dev->name, index, dev->real_num_rx_queues);

                        return rxqueue; /* Return first rxqueue */
                }
                rxqueue += index;
        }
        return rxqueue;
}

u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
                             struct bpf_prog *xdp_prog)
{
        void *orig_data, *orig_data_end, *hard_start;
        struct netdev_rx_queue *rxqueue;
        bool orig_bcast, orig_host;
        u32 mac_len, frame_sz;
        __be16 orig_eth_type;
        struct ethhdr *eth;
        u32 metalen, act;
        int off;

        /* The XDP program wants to see the packet starting at the MAC
         * header.
         */
        mac_len = skb->data - skb_mac_header(skb);
        hard_start = skb->data - skb_headroom(skb);

        /* SKB "head" area always have tailroom for skb_shared_info */
        frame_sz = (void *)skb_end_pointer(skb) - hard_start;
        frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        rxqueue = netif_get_rxqueue(skb);
        xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
        xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
                         skb_headlen(skb) + mac_len, true);
        if (skb_is_nonlinear(skb)) {
                skb_shinfo(skb)->xdp_frags_size = skb->data_len;
                xdp_buff_set_frags_flag(xdp);
        } else {
                xdp_buff_clear_frags_flag(xdp);
        }

        orig_data_end = xdp->data_end;
        orig_data = xdp->data;
        eth = (struct ethhdr *)xdp->data;
        orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
        orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
        orig_eth_type = eth->h_proto;

        act = bpf_prog_run_xdp(xdp_prog, xdp);

        /* check if bpf_xdp_adjust_head was used */
        off = xdp->data - orig_data;
        if (off) {
                if (off > 0)
                        __skb_pull(skb, off);
                else if (off < 0)
                        __skb_push(skb, -off);

                skb->mac_header += off;
                skb_reset_network_header(skb);
        }

        /* check if bpf_xdp_adjust_tail was used */
        off = xdp->data_end - orig_data_end;
        if (off != 0) {
                skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
                skb->len += off; /* positive on grow, negative on shrink */
        }

        /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
         * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
         */
        if (xdp_buff_has_frags(xdp))
                skb->data_len = skb_shinfo(skb)->xdp_frags_size;
        else
                skb->data_len = 0;

        /* check if XDP changed eth hdr such SKB needs update */
        eth = (struct ethhdr *)xdp->data;
        if ((orig_eth_type != eth->h_proto) ||
            (orig_host != ether_addr_equal_64bits(eth->h_dest,
                                                  skb->dev->dev_addr)) ||
            (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
                __skb_push(skb, ETH_HLEN);
                skb->pkt_type = PACKET_HOST;
                skb->protocol = eth_type_trans(skb, skb->dev);
        }

        /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
         * before calling us again on redirect path. We do not call do_redirect
         * as we leave that up to the caller.
         *
         * Caller is responsible for managing lifetime of skb (i.e. calling
         * kfree_skb in response to actions it cannot handle/XDP_DROP).
         */
        switch (act) {
        case XDP_REDIRECT:
        case XDP_TX:
                __skb_push(skb, mac_len);
                break;
        case XDP_PASS:
                metalen = xdp->data - xdp->data_meta;
                if (metalen)
                        skb_metadata_set(skb, metalen);
                break;
        }

        return act;
}

static int
netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog)
{
        struct sk_buff *skb = *pskb;
        int err, hroom, troom;

        if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog))
                return 0;

        /* In case we have to go down the path and also linearize,
         * then lets do the pskb_expand_head() work just once here.
         */
        hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
        troom = skb->tail + skb->data_len - skb->end;
        err = pskb_expand_head(skb,
                               hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
                               troom > 0 ? troom + 128 : 0, GFP_ATOMIC);
        if (err)
                return err;

        return skb_linearize(skb);
}

static u32 netif_receive_generic_xdp(struct sk_buff **pskb,
                                     struct xdp_buff *xdp,
                                     struct bpf_prog *xdp_prog)
{
        struct sk_buff *skb = *pskb;
        u32 mac_len, act = XDP_DROP;

        /* Reinjected packets coming from act_mirred or similar should
         * not get XDP generic processing.
         */
        if (skb_is_redirected(skb))
                return XDP_PASS;

        /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM
         * bytes. This is the guarantee that also native XDP provides,
         * thus we need to do it here as well.
         */
        mac_len = skb->data - skb_mac_header(skb);
        __skb_push(skb, mac_len);

        if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
            skb_headroom(skb) < XDP_PACKET_HEADROOM) {
                if (netif_skb_check_for_xdp(pskb, xdp_prog))
                        goto do_drop;
        }

        __skb_pull(*pskb, mac_len);

        act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog);
        switch (act) {
        case XDP_REDIRECT:
        case XDP_TX:
        case XDP_PASS:
                break;
        default:
                bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act);
                fallthrough;
        case XDP_ABORTED:
                trace_xdp_exception((*pskb)->dev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
        do_drop:
                kfree_skb(*pskb);
                break;
        }

        return act;
}

/* When doing generic XDP we have to bypass the qdisc layer and the
 * network taps in order to match in-driver-XDP behavior. This also means
 * that XDP packets are able to starve other packets going through a qdisc,
 * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
 * queues, so they do not have this starvation issue.
 */
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
{
        struct net_device *dev = skb->dev;
        struct netdev_queue *txq;
        bool free_skb = true;
        int cpu, rc;

        txq = netdev_core_pick_tx(dev, skb, NULL);
        cpu = smp_processor_id();
        HARD_TX_LOCK(dev, txq, cpu);
        if (!netif_xmit_frozen_or_drv_stopped(txq)) {
                rc = netdev_start_xmit(skb, dev, txq, 0);
                if (dev_xmit_complete(rc))
                        free_skb = false;
        }
        HARD_TX_UNLOCK(dev, txq);
        if (free_skb) {
                trace_xdp_exception(dev, xdp_prog, XDP_TX);
                dev_core_stats_tx_dropped_inc(dev);
                kfree_skb(skb);
        }
}

static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);

int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
        if (xdp_prog) {
                struct xdp_buff xdp;
                u32 act;
                int err;

                act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
                if (act != XDP_PASS) {
                        switch (act) {
                        case XDP_REDIRECT:
                                err = xdp_do_generic_redirect((*pskb)->dev, *pskb,
                                                              &xdp, xdp_prog);
                                if (err)
                                        goto out_redir;
                                break;
                        case XDP_TX:
                                generic_xdp_tx(*pskb, xdp_prog);
                                break;
                        }
                        return XDP_DROP;
                }
        }
        return XDP_PASS;
out_redir:
        kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
        return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);

static int netif_rx_internal(struct sk_buff *skb)
{
        int ret;

        net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);

        trace_netif_rx(skb);

#ifdef CONFIG_RPS
        if (static_branch_unlikely(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
                int cpu;

                rcu_read_lock();

                cpu = get_rps_cpu(skb->dev, skb, &rflow);
                if (cpu < 0)
                        cpu = smp_processor_id();

                ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

                rcu_read_unlock();
        } else
#endif
        {
                unsigned int qtail;

                ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
        }
        return ret;
}

/**
 *        __netif_rx        -        Slightly optimized version of netif_rx
 *        @skb: buffer to post
 *
 *        This behaves as netif_rx except that it does not disable bottom halves.
 *        As a result this function may only be invoked from the interrupt context
 *        (either hard or soft interrupt).
 */
int __netif_rx(struct sk_buff *skb)
{
        int ret;

        lockdep_assert_once(hardirq_count() | softirq_count());

        trace_netif_rx_entry(skb);
        ret = netif_rx_internal(skb);
        trace_netif_rx_exit(ret);
        return ret;
}
EXPORT_SYMBOL(__netif_rx);

/**
 *        netif_rx        -        post buffer to the network code
 *        @skb: buffer to post
 *
 *        This function receives a packet from a device driver and queues it for
 *        the upper (protocol) levels to process via the backlog NAPI device. It
 *        always succeeds. The buffer may be dropped during processing for
 *        congestion control or by the protocol layers.
 *        The network buffer is passed via the backlog NAPI device. Modern NIC
 *        driver should use NAPI and GRO.
 *        This function can used from interrupt and from process context. The
 *        caller from process context must not disable interrupts before invoking
 *        this function.
 *
 *        return values:
 *        NET_RX_SUCCESS        (no congestion)
 *        NET_RX_DROP     (packet was dropped)
 *
 */
int netif_rx(struct sk_buff *skb)
{
        bool need_bh_off = !(hardirq_count() | softirq_count());
        int ret;

        if (need_bh_off)
                local_bh_disable();
        trace_netif_rx_entry(skb);
        ret = netif_rx_internal(skb);
        trace_netif_rx_exit(ret);
        if (need_bh_off)
                local_bh_enable();
        return ret;
}
EXPORT_SYMBOL(netif_rx);

static __latent_entropy void net_tx_action(struct softirq_action *h)
{
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);

        if (sd->completion_queue) {
                struct sk_buff *clist;

                local_irq_disable();
                clist = sd->completion_queue;
                sd->completion_queue = NULL;
                local_irq_enable();

                while (clist) {
                        struct sk_buff *skb = clist;

                        clist = clist->next;

                        WARN_ON(refcount_read(&skb->users));
                        if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))
                                trace_consume_skb(skb, net_tx_action);
                        else
                                trace_kfree_skb(skb, net_tx_action,
                                                get_kfree_skb_cb(skb)->reason);

                        if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
                                __kfree_skb(skb);
                        else
                                __napi_kfree_skb(skb,
                                                 get_kfree_skb_cb(skb)->reason);
                }
        }

        if (sd->output_queue) {
                struct Qdisc *head;

                local_irq_disable();
                head = sd->output_queue;
                sd->output_queue = NULL;
                sd->output_queue_tailp = &sd->output_queue;
                local_irq_enable();

                rcu_read_lock();

                while (head) {
                        struct Qdisc *q = head;
                        spinlock_t *root_lock = NULL;

                        head = head->next_sched;

                        /* We need to make sure head->next_sched is read
                         * before clearing __QDISC_STATE_SCHED
                         */
                        smp_mb__before_atomic();

                        if (!(q->flags & TCQ_F_NOLOCK)) {
                                root_lock = qdisc_lock(q);
                                spin_lock(root_lock);
                        } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
                                                     &q->state))) {
                                /* There is a synchronize_net() between
                                 * STATE_DEACTIVATED flag being set and
                                 * qdisc_reset()/some_qdisc_is_busy() in
                                 * dev_deactivate(), so we can safely bail out
                                 * early here to avoid data race between
                                 * qdisc_deactivate() and some_qdisc_is_busy()
                                 * for lockless qdisc.
                                 */
                                clear_bit(__QDISC_STATE_SCHED, &q->state);
                                continue;
                        }

                        clear_bit(__QDISC_STATE_SCHED, &q->state);
                        qdisc_run(q);
                        if (root_lock)
                                spin_unlock(root_lock);
                }

                rcu_read_unlock();
        }

        xfrm_dev_backlog(sd);
}

#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
/* This hook is defined here for ATM LANE */
int (*br_fdb_test_addr_hook)(struct net_device *dev,
                             unsigned char *addr) __read_mostly;
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif

/**
 *        netdev_is_rx_handler_busy - check if receive handler is registered
 *        @dev: device to check
 *
 *        Check if a receive handler is already registered for a given device.
 *        Return true if there one.
 *
 *        The caller must hold the rtnl_mutex.
 */
bool netdev_is_rx_handler_busy(struct net_device *dev)
{
        ASSERT_RTNL();
        return dev && rtnl_dereference(dev->rx_handler);
}
EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);

/**
 *        netdev_rx_handler_register - register receive handler
 *        @dev: device to register a handler for
 *        @rx_handler: receive handler to register
 *        @rx_handler_data: data pointer that is used by rx handler
 *
 *        Register a receive handler for a device. This handler will then be
 *        called from __netif_receive_skb. A negative errno code is returned
 *        on a failure.
 *
 *        The caller must hold the rtnl_mutex.
 *
 *        For a general description of rx_handler, see enum rx_handler_result.
 */
int netdev_rx_handler_register(struct net_device *dev,
                               rx_handler_func_t *rx_handler,
                               void *rx_handler_data)
{
        if (netdev_is_rx_handler_busy(dev))
                return -EBUSY;

        if (dev->priv_flags & IFF_NO_RX_HANDLER)
                return -EINVAL;

        /* Note: rx_handler_data must be set before rx_handler */
        rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
        rcu_assign_pointer(dev->rx_handler, rx_handler);

        return 0;
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_register);

/**
 *        netdev_rx_handler_unregister - unregister receive handler
 *        @dev: device to unregister a handler from
 *
 *        Unregister a receive handler from a device.
 *
 *        The caller must hold the rtnl_mutex.
 */
void netdev_rx_handler_unregister(struct net_device *dev)
{

        ASSERT_RTNL();
        RCU_INIT_POINTER(dev->rx_handler, NULL);
        /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
         * section has a guarantee to see a non NULL rx_handler_data
         * as well.
         */
        synchronize_net();
        RCU_INIT_POINTER(dev->rx_handler_data, NULL);
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);

/*
 * Limit the use of PFMEMALLOC reserves to those protocols that implement
 * the special handling of PFMEMALLOC skbs.
 */
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
        switch (skb->protocol) {
        case htons(ETH_P_ARP):
        case htons(ETH_P_IP):
        case htons(ETH_P_IPV6):
        case htons(ETH_P_8021Q):
        case htons(ETH_P_8021AD):
                return true;
        default:
                return false;
        }
}

static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
                             int *ret, struct net_device *orig_dev)
{
        if (nf_hook_ingress_active(skb)) {
                int ingress_retval;

                if (*pt_prev) {
                        *ret = deliver_skb(skb, *pt_prev, orig_dev);
                        *pt_prev = NULL;
                }

                rcu_read_lock();
                ingress_retval = nf_hook_ingress(skb);
                rcu_read_unlock();
                return ingress_retval;
        }
        return 0;
}

static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
                                    struct packet_type **ppt_prev)
{
        struct packet_type *ptype, *pt_prev;
        rx_handler_func_t *rx_handler;
        struct sk_buff *skb = *pskb;
        struct net_device *orig_dev;
        bool deliver_exact = false;
        int ret = NET_RX_DROP;
        __be16 type;

        net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);

        trace_netif_receive_skb(skb);

        orig_dev = skb->dev;

        skb_reset_network_header(skb);
        if (!skb_transport_header_was_set(skb))
                skb_reset_transport_header(skb);
        skb_reset_mac_len(skb);

        pt_prev = NULL;

another_round:
        skb->skb_iif = skb->dev->ifindex;

        __this_cpu_inc(softnet_data.processed);

        if (static_branch_unlikely(&generic_xdp_needed_key)) {
                int ret2;

                migrate_disable();
                ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
                                      &skb);
                migrate_enable();

                if (ret2 != XDP_PASS) {
                        ret = NET_RX_DROP;
                        goto out;
                }
        }

        if (eth_type_vlan(skb->protocol)) {
                skb = skb_vlan_untag(skb);
                if (unlikely(!skb))
                        goto out;
        }

        if (skb_skip_tc_classify(skb))
                goto skip_classify;

        if (pfmemalloc)
                goto skip_taps;

        list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
                if (pt_prev)
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                pt_prev = ptype;
        }

        list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
                if (pt_prev)
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                pt_prev = ptype;
        }

skip_taps:
#ifdef CONFIG_NET_INGRESS
        if (static_branch_unlikely(&ingress_needed_key)) {
                bool another = false;

                nf_skip_egress(skb, true);
                skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
                                         &another);
                if (another)
                        goto another_round;
                if (!skb)
                        goto out;

                nf_skip_egress(skb, false);
                if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
                        goto out;
        }
#endif
        skb_reset_redirect(skb);
skip_classify:
        if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
                goto drop;

        if (skb_vlan_tag_present(skb)) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = NULL;
                }
                if (vlan_do_receive(&skb))
                        goto another_round;
                else if (unlikely(!skb))
                        goto out;
        }

        rx_handler = rcu_dereference(skb->dev->rx_handler);
        if (rx_handler) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = NULL;
                }
                switch (rx_handler(&skb)) {
                case RX_HANDLER_CONSUMED:
                        ret = NET_RX_SUCCESS;
                        goto out;
                case RX_HANDLER_ANOTHER:
                        goto another_round;
                case RX_HANDLER_EXACT:
                        deliver_exact = true;
                        break;
                case RX_HANDLER_PASS:
                        break;
                default:
                        BUG();
                }
        }

        if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) {
check_vlan_id:
                if (skb_vlan_tag_get_id(skb)) {
                        /* Vlan id is non 0 and vlan_do_receive() above couldn't
                         * find vlan device.
                         */
                        skb->pkt_type = PACKET_OTHERHOST;
                } else if (eth_type_vlan(skb->protocol)) {
                        /* Outer header is 802.1P with vlan 0, inner header is
                         * 802.1Q or 802.1AD and vlan_do_receive() above could
                         * not find vlan dev for vlan id 0.
                         */
                        __vlan_hwaccel_clear_tag(skb);
                        skb = skb_vlan_untag(skb);
                        if (unlikely(!skb))
                                goto out;
                        if (vlan_do_receive(&skb))
                                /* After stripping off 802.1P header with vlan 0
                                 * vlan dev is found for inner header.
                                 */
                                goto another_round;
                        else if (unlikely(!skb))
                                goto out;
                        else
                                /* We have stripped outer 802.1P vlan 0 header.
                                 * But could not find vlan dev.
                                 * check again for vlan id to set OTHERHOST.
                                 */
                                goto check_vlan_id;
                }
                /* Note: we might in the future use prio bits
                 * and set skb->priority like in vlan_do_receive()
                 * For the time being, just ignore Priority Code Point
                 */
                __vlan_hwaccel_clear_tag(skb);
        }

        type = skb->protocol;

        /* deliver only exact match when indicated */
        if (likely(!deliver_exact)) {
                deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
                                       &ptype_base[ntohs(type) &
                                                   PTYPE_HASH_MASK]);
        }

        deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
                               &orig_dev->ptype_specific);

        if (unlikely(skb->dev != orig_dev)) {
                deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
                                       &skb->dev->ptype_specific);
        }

        if (pt_prev) {
                if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                        goto drop;
                *ppt_prev = pt_prev;
        } else {
drop:
                if (!deliver_exact)
                        dev_core_stats_rx_dropped_inc(skb->dev);
                else
                        dev_core_stats_rx_nohandler_inc(skb->dev);
                kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
                /* Jamal, now you will not able to escape explaining
                 * me how you were going to use this. :-)
                 */
                ret = NET_RX_DROP;
        }

out:
        /* The invariant here is that if *ppt_prev is not NULL
         * then skb should also be non-NULL.
         *
         * Apparently *ppt_prev assignment above holds this invariant due to
         * skb dereferencing near it.
         */
        *pskb = skb;
        return ret;
}

static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
{
        struct net_device *orig_dev = skb->dev;
        struct packet_type *pt_prev = NULL;
        int ret;

        ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
        if (pt_prev)
                ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
                                         skb->dev, pt_prev, orig_dev);
        return ret;
}

/**
 *        netif_receive_skb_core - special purpose version of netif_receive_skb
 *        @skb: buffer to process
 *
 *        More direct receive version of netif_receive_skb().  It should
 *        only be used by callers that have a need to skip RPS and Generic XDP.
 *        Caller must also take care of handling if ``(page_is_)pfmemalloc``.
 *
 *        This function may only be called from softirq context and interrupts
 *        should be enabled.
 *
 *        Return values (usually ignored):
 *        NET_RX_SUCCESS: no congestion
 *        NET_RX_DROP: packet was dropped
 */
int netif_receive_skb_core(struct sk_buff *skb)
{
        int ret;

        rcu_read_lock();
        ret = __netif_receive_skb_one_core(skb, false);
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL(netif_receive_skb_core);

static inline void __netif_receive_skb_list_ptype(struct list_head *head,
                                                  struct packet_type *pt_prev,
                                                  struct net_device *orig_dev)
{
        struct sk_buff *skb, *next;

        if (!pt_prev)
                return;
        if (list_empty(head))
                return;
        if (pt_prev->list_func != NULL)
                INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
                                   ip_list_rcv, head, pt_prev, orig_dev);
        else
                list_for_each_entry_safe(skb, next, head, list) {
                        skb_list_del_init(skb);
                        pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
                }
}

static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
{
        /* Fast-path assumptions:
         * - There is no RX handler.
         * - Only one packet_type matches.
         * If either of these fails, we will end up doing some per-packet
         * processing in-line, then handling the 'last ptype' for the whole
         * sublist.  This can't cause out-of-order delivery to any single ptype,
         * because the 'last ptype' must be constant across the sublist, and all
         * other ptypes are handled per-packet.
         */
        /* Current (common) ptype of sublist */
        struct packet_type *pt_curr = NULL;
        /* Current (common) orig_dev of sublist */
        struct net_device *od_curr = NULL;
        struct list_head sublist;
        struct sk_buff *skb, *next;

        INIT_LIST_HEAD(&sublist);
        list_for_each_entry_safe(skb, next, head, list) {
                struct net_device *orig_dev = skb->dev;
                struct packet_type *pt_prev = NULL;

                skb_list_del_init(skb);
                __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
                if (!pt_prev)
                        continue;
                if (pt_curr != pt_prev || od_curr != orig_dev) {
                        /* dispatch old sublist */
                        __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
                        /* start new sublist */
                        INIT_LIST_HEAD(&sublist);
                        pt_curr = pt_prev;
                        od_curr = orig_dev;
                }
                list_add_tail(&skb->list, &sublist);
        }

        /* dispatch final sublist */
        __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
}

static int __netif_receive_skb(struct sk_buff *skb)
{
        int ret;

        if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
                unsigned int noreclaim_flag;

                /*
                 * PFMEMALLOC skbs are special, they should
                 * - be delivered to SOCK_MEMALLOC sockets only
                 * - stay away from userspace
                 * - have bounded memory usage
                 *
                 * Use PF_MEMALLOC as this saves us from propagating the allocation
                 * context down to all allocation sites.
                 */
                noreclaim_flag = memalloc_noreclaim_save();
                ret = __netif_receive_skb_one_core(skb, true);
                memalloc_noreclaim_restore(noreclaim_flag);
        } else
                ret = __netif_receive_skb_one_core(skb, false);

        return ret;
}

static void __netif_receive_skb_list(struct list_head *head)
{
        unsigned long noreclaim_flag = 0;
        struct sk_buff *skb, *next;
        bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */

        list_for_each_entry_safe(skb, next, head, list) {
                if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
                        struct list_head sublist;

                        /* Handle the previous sublist */
                        list_cut_before(&sublist, head, &skb->list);
                        if (!list_empty(&sublist))
                                __netif_receive_skb_list_core(&sublist, pfmemalloc);
                        pfmemalloc = !pfmemalloc;
                        /* See comments in __netif_receive_skb */
                        if (pfmemalloc)
                                noreclaim_flag = memalloc_noreclaim_save();
                        else
                                memalloc_noreclaim_restore(noreclaim_flag);
                }
        }
        /* Handle the remaining sublist */
        if (!list_empty(head))
                __netif_receive_skb_list_core(head, pfmemalloc);
        /* Restore pflags */
        if (pfmemalloc)
                memalloc_noreclaim_restore(noreclaim_flag);
}

static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
        struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
        struct bpf_prog *new = xdp->prog;
        int ret = 0;

        switch (xdp->command) {
        case XDP_SETUP_PROG:
                rcu_assign_pointer(dev->xdp_prog, new);
                if (old)
                        bpf_prog_put(old);

                if (old && !new) {
                        static_branch_dec(&generic_xdp_needed_key);
                } else if (new && !old) {
                        static_branch_inc(&generic_xdp_needed_key);
                        dev_disable_lro(dev);
                        dev_disable_gro_hw(dev);
                }
                break;

        default:
                ret = -EINVAL;
                break;
        }

        return ret;
}

static int netif_receive_skb_internal(struct sk_buff *skb)
{
        int ret;

        net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb);

        if (skb_defer_rx_timestamp(skb))
                return NET_RX_SUCCESS;

        rcu_read_lock();
#ifdef CONFIG_RPS
        if (static_branch_unlikely(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
                int cpu = get_rps_cpu(skb->dev, skb, &rflow);

                if (cpu >= 0) {
                        ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
                        rcu_read_unlock();
                        return ret;
                }
        }
#endif
        ret = __netif_receive_skb(skb);
        rcu_read_unlock();
        return ret;
}

void netif_receive_skb_list_internal(struct list_head *head)
{
        struct sk_buff *skb, *next;
        struct list_head sublist;

        INIT_LIST_HEAD(&sublist);
        list_for_each_entry_safe(skb, next, head, list) {
                net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
                                    skb);
                skb_list_del_init(skb);
                if (!skb_defer_rx_timestamp(skb))
                        list_add_tail(&skb->list, &sublist);
        }
        list_splice_init(&sublist, head);

        rcu_read_lock();
#ifdef CONFIG_RPS
        if (static_branch_unlikely(&rps_needed)) {
                list_for_each_entry_safe(skb, next, head, list) {
                        struct rps_dev_flow voidflow, *rflow = &voidflow;
                        int cpu = get_rps_cpu(skb->dev, skb, &rflow);

                        if (cpu >= 0) {
                                /* Will be handled, remove from list */
                                skb_list_del_init(skb);
                                enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
                        }
                }
        }
#endif
        __netif_receive_skb_list(head);
        rcu_read_unlock();
}

/**
 *        netif_receive_skb - process receive buffer from network
 *        @skb: buffer to process
 *
 *        netif_receive_skb() is the main receive data processing function.
 *        It always succeeds. The buffer may be dropped during processing
 *        for congestion control or by the protocol layers.
 *
 *        This function may only be called from softirq context and interrupts
 *        should be enabled.
 *
 *        Return values (usually ignored):
 *        NET_RX_SUCCESS: no congestion
 *        NET_RX_DROP: packet was dropped
 */
int netif_receive_skb(struct sk_buff *skb)
{
        int ret;

        trace_netif_receive_skb_entry(skb);

        ret = netif_receive_skb_internal(skb);
        trace_netif_receive_skb_exit(ret);

        return ret;
}
EXPORT_SYMBOL(netif_receive_skb);

/**
 *        netif_receive_skb_list - process many receive buffers from network
 *        @head: list of skbs to process.
 *
 *        Since return value of netif_receive_skb() is normally ignored, and
 *        wouldn't be meaningful for a list, this function returns void.
 *
 *        This function may only be called from softirq context and interrupts
 *        should be enabled.
 */
void netif_receive_skb_list(struct list_head *head)
{
        struct sk_buff *skb;

        if (list_empty(head))
                return;
        if (trace_netif_receive_skb_list_entry_enabled()) {
                list_for_each_entry(skb, head, list)
                        trace_netif_receive_skb_list_entry(skb);
        }
        netif_receive_skb_list_internal(head);
        trace_netif_receive_skb_list_exit(0);
}
EXPORT_SYMBOL(netif_receive_skb_list);

static DEFINE_PER_CPU(struct work_struct, flush_works);

/* Network device is going away, flush any packets still pending */
static void flush_backlog(struct work_struct *work)
{
        struct sk_buff *skb, *tmp;
        struct softnet_data *sd;

        local_bh_disable();
        sd = this_cpu_ptr(&softnet_data);

        rps_lock_irq_disable(sd);
        skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->input_pkt_queue);
                        dev_kfree_skb_irq(skb);
                        input_queue_head_incr(sd);
                }
        }
        rps_unlock_irq_enable(sd);

        skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->process_queue);
                        kfree_skb(skb);
                        input_queue_head_incr(sd);
                }
        }
        local_bh_enable();
}

static bool flush_required(int cpu)
{
#if IS_ENABLED(CONFIG_RPS)
        struct softnet_data *sd = &per_cpu(softnet_data, cpu);
        bool do_flush;

        rps_lock_irq_disable(sd);

        /* as insertion into process_queue happens with the rps lock held,
         * process_queue access may race only with dequeue
         */
        do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
                   !skb_queue_empty_lockless(&sd->process_queue);
        rps_unlock_irq_enable(sd);

        return do_flush;
#endif
        /* without RPS we can't safely check input_pkt_queue: during a
         * concurrent remote skb_queue_splice() we can detect as empty both
         * input_pkt_queue and process_queue even if the latter could end-up
         * containing a lot of packets.
         */
        return true;
}

static void flush_all_backlogs(void)
{
        static cpumask_t flush_cpus;
        unsigned int cpu;

        /* since we are under rtnl lock protection we can use static data
         * for the cpumask and avoid allocating on stack the possibly
         * large mask
         */
        ASSERT_RTNL();

        cpus_read_lock();

        cpumask_clear(&flush_cpus);
        for_each_online_cpu(cpu) {
                if (flush_required(cpu)) {
                        queue_work_on(cpu, system_highpri_wq,
                                      per_cpu_ptr(&flush_works, cpu));
                        cpumask_set_cpu(cpu, &flush_cpus);
                }
        }

        /* we can have in flight packet[s] on the cpus we are not flushing,
         * synchronize_net() in unregister_netdevice_many() will take care of
         * them
         */
        for_each_cpu(cpu, &flush_cpus)
                flush_work(per_cpu_ptr(&flush_works, cpu));

        cpus_read_unlock();
}

static void net_rps_send_ipi(struct softnet_data *remsd)
{
#ifdef CONFIG_RPS
        while (remsd) {
                struct softnet_data *next = remsd->rps_ipi_next;

                if (cpu_online(remsd->cpu))
                        smp_call_function_single_async(remsd->cpu, &remsd->csd);
                remsd = next;
        }
#endif
}

/*
 * net_rps_action_and_irq_enable sends any pending IPI's for rps.
 * Note: called with local irq disabled, but exits with local irq enabled.
 */
static void net_rps_action_and_irq_enable(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
        struct softnet_data *remsd = sd->rps_ipi_list;

        if (remsd) {
                sd->rps_ipi_list = NULL;

                local_irq_enable();

                /* Send pending IPI's to kick RPS processing on remote cpus. */
                net_rps_send_ipi(remsd);
        } else
#endif
                local_irq_enable();
}

static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
        return sd->rps_ipi_list != NULL;
#else
        return false;
#endif
}

static int process_backlog(struct napi_struct *napi, int quota)
{
        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
        bool again = true;
        int work = 0;

        /* Check if we have pending ipi, its better to send them now,
         * not waiting net_rx_action() end.
         */
        if (sd_has_rps_ipi_waiting(sd)) {
                local_irq_disable();
                net_rps_action_and_irq_enable(sd);
        }

        napi->weight = READ_ONCE(net_hotdata.dev_rx_weight);
        while (again) {
                struct sk_buff *skb;

                while ((skb = __skb_dequeue(&sd->process_queue))) {
                        rcu_read_lock();
                        __netif_receive_skb(skb);
                        rcu_read_unlock();
                        input_queue_head_incr(sd);
                        if (++work >= quota)
                                return work;

                }

                rps_lock_irq_disable(sd);
                if (skb_queue_empty(&sd->input_pkt_queue)) {
                        /*
                         * Inline a custom version of __napi_complete().
                         * only current cpu owns and manipulates this napi,
                         * and NAPI_STATE_SCHED is the only possible flag set
                         * on backlog.
                         * We can use a plain write instead of clear_bit(),
                         * and we dont need an smp_mb() memory barrier.
                         */
                        napi->state = 0;
                        again = false;
                } else {
                        skb_queue_splice_tail_init(&sd->input_pkt_queue,
                                                   &sd->process_queue);
                }
                rps_unlock_irq_enable(sd);
        }

        return work;
}

/**
 * __napi_schedule - schedule for receive
 * @n: entry to schedule
 *
 * The entry's receive function will be scheduled to run.
 * Consider using __napi_schedule_irqoff() if hard irqs are masked.
 */
void __napi_schedule(struct napi_struct *n)
{
        unsigned long flags;

        local_irq_save(flags);
        ____napi_schedule(this_cpu_ptr(&softnet_data), n);
        local_irq_restore(flags);
}
EXPORT_SYMBOL(__napi_schedule);

/**
 *        napi_schedule_prep - check if napi can be scheduled
 *        @n: napi context
 *
 * Test if NAPI routine is already running, and if not mark
 * it as running.  This is used as a condition variable to
 * insure only one NAPI poll instance runs.  We also make
 * sure there is no pending NAPI disable.
 */
bool napi_schedule_prep(struct napi_struct *n)
{
        unsigned long new, val = READ_ONCE(n->state);

        do {
                if (unlikely(val & NAPIF_STATE_DISABLE))
                        return false;
                new = val | NAPIF_STATE_SCHED;

                /* Sets STATE_MISSED bit if STATE_SCHED was already set
                 * This was suggested by Alexander Duyck, as compiler
                 * emits better code than :
                 * if (val & NAPIF_STATE_SCHED)
                 *     new |= NAPIF_STATE_MISSED;
                 */
                new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
                                                   NAPIF_STATE_MISSED;
        } while (!try_cmpxchg(&n->state, &val, new));

        return !(val & NAPIF_STATE_SCHED);
}
EXPORT_SYMBOL(napi_schedule_prep);

/**
 * __napi_schedule_irqoff - schedule for receive
 * @n: entry to schedule
 *
 * Variant of __napi_schedule() assuming hard irqs are masked.
 *
 * On PREEMPT_RT enabled kernels this maps to __napi_schedule()
 * because the interrupt disabled assumption might not be true
 * due to force-threaded interrupts and spinlock substitution.
 */
void __napi_schedule_irqoff(struct napi_struct *n)
{
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                ____napi_schedule(this_cpu_ptr(&softnet_data), n);
        else
                __napi_schedule(n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);

bool napi_complete_done(struct napi_struct *n, int work_done)
{
        unsigned long flags, val, new, timeout = 0;
        bool ret = true;

        /*
         * 1) Don't let napi dequeue from the cpu poll list
         *    just in case its running on a different cpu.
         * 2) If we are busy polling, do nothing here, we have
         *    the guarantee we will be called later.
         */
        if (unlikely(n->state & (NAPIF_STATE_NPSVC |
                                 NAPIF_STATE_IN_BUSY_POLL)))
                return false;

        if (work_done) {
                if (n->gro_bitmask)
                        timeout = READ_ONCE(n->dev->gro_flush_timeout);
                n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
        }
        if (n->defer_hard_irqs_count > 0) {
                n->defer_hard_irqs_count--;
                timeout = READ_ONCE(n->dev->gro_flush_timeout);
                if (timeout)
                        ret = false;
        }
        if (n->gro_bitmask) {
                /* When the NAPI instance uses a timeout and keeps postponing
                 * it, we need to bound somehow the time packets are kept in
                 * the GRO layer
                 */
                napi_gro_flush(n, !!timeout);
        }

        gro_normal_list(n);

        if (unlikely(!list_empty(&n->poll_list))) {
                /* If n->poll_list is not empty, we need to mask irqs */
                local_irq_save(flags);
                list_del_init(&n->poll_list);
                local_irq_restore(flags);
        }
        WRITE_ONCE(n->list_owner, -1);

        val = READ_ONCE(n->state);
        do {
                WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));

                new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
                              NAPIF_STATE_SCHED_THREADED |
                              NAPIF_STATE_PREFER_BUSY_POLL);

                /* If STATE_MISSED was set, leave STATE_SCHED set,
                 * because we will call napi->poll() one more time.
                 * This C code was suggested by Alexander Duyck to help gcc.
                 */
                new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
                                                    NAPIF_STATE_SCHED;
        } while (!try_cmpxchg(&n->state, &val, new));

        if (unlikely(val & NAPIF_STATE_MISSED)) {
                __napi_schedule(n);
                return false;
        }

        if (timeout)
                hrtimer_start(&n->timer, ns_to_ktime(timeout),
                              HRTIMER_MODE_REL_PINNED);
        return ret;
}
EXPORT_SYMBOL(napi_complete_done);

/* must be called under rcu_read_lock(), as we dont take a reference */
struct napi_struct *napi_by_id(unsigned int napi_id)
{
        unsigned int hash = napi_id % HASH_SIZE(napi_hash);
        struct napi_struct *napi;

        hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
                if (napi->napi_id == napi_id)
                        return napi;

        return NULL;
}

static void skb_defer_free_flush(struct softnet_data *sd)
{
        struct sk_buff *skb, *next;

        /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
        if (!READ_ONCE(sd->defer_list))
                return;

        spin_lock(&sd->defer_lock);
        skb = sd->defer_list;
        sd->defer_list = NULL;
        sd->defer_count = 0;
        spin_unlock(&sd->defer_lock);

        while (skb != NULL) {
                next = skb->next;
                napi_consume_skb(skb, 1);
                skb = next;
        }
}

#if defined(CONFIG_NET_RX_BUSY_POLL)

static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
{
        if (!skip_schedule) {
                gro_normal_list(napi);
                __napi_schedule(napi);
                return;
        }

        if (napi->gro_bitmask) {
                /* flush too old packets
                 * If HZ < 1000, flush all packets.
                 */
                napi_gro_flush(napi, HZ >= 1000);
        }

        gro_normal_list(napi);
        clear_bit(NAPI_STATE_SCHED, &napi->state);
}

enum {
        NAPI_F_PREFER_BUSY_POLL        = 1,
        NAPI_F_END_ON_RESCHED        = 2,
};

static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
                           unsigned flags, u16 budget)
{
        bool skip_schedule = false;
        unsigned long timeout;
        int rc;

        /* Busy polling means there is a high chance device driver hard irq
         * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
         * set in napi_schedule_prep().
         * Since we are about to call napi->poll() once more, we can safely
         * clear NAPI_STATE_MISSED.
         *
         * Note: x86 could use a single "lock and ..." instruction
         * to perform these two clear_bit()
         */
        clear_bit(NAPI_STATE_MISSED, &napi->state);
        clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);

        local_bh_disable();

        if (flags & NAPI_F_PREFER_BUSY_POLL) {
                napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
                timeout = READ_ONCE(napi->dev->gro_flush_timeout);
                if (napi->defer_hard_irqs_count && timeout) {
                        hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
                        skip_schedule = true;
                }
        }

        /* All we really want here is to re-enable device interrupts.
         * Ideally, a new ndo_busy_poll_stop() could avoid another round.
         */
        rc = napi->poll(napi, budget);
        /* We can't gro_normal_list() here, because napi->poll() might have
         * rearmed the napi (napi_complete_done()) in which case it could
         * already be running on another CPU.
         */
        trace_napi_poll(napi, rc, budget);
        netpoll_poll_unlock(have_poll_lock);
        if (rc == budget)
                __busy_poll_stop(napi, skip_schedule);
        local_bh_enable();
}

static void __napi_busy_loop(unsigned int napi_id,
                      bool (*loop_end)(void *, unsigned long),
                      void *loop_end_arg, unsigned flags, u16 budget)
{
        unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
        int (*napi_poll)(struct napi_struct *napi, int budget);
        void *have_poll_lock = NULL;
        struct napi_struct *napi;

        WARN_ON_ONCE(!rcu_read_lock_held());

restart:
        napi_poll = NULL;

        napi = napi_by_id(napi_id);
        if (!napi)
                return;

        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_disable();
        for (;;) {
                int work = 0;

                local_bh_disable();
                if (!napi_poll) {
                        unsigned long val = READ_ONCE(napi->state);

                        /* If multiple threads are competing for this napi,
                         * we avoid dirtying napi->state as much as we can.
                         */
                        if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
                                   NAPIF_STATE_IN_BUSY_POLL)) {
                                if (flags & NAPI_F_PREFER_BUSY_POLL)
                                        set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                goto count;
                        }
                        if (cmpxchg(&napi->state, val,
                                    val | NAPIF_STATE_IN_BUSY_POLL |
                                          NAPIF_STATE_SCHED) != val) {
                                if (flags & NAPI_F_PREFER_BUSY_POLL)
                                        set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                goto count;
                        }
                        have_poll_lock = netpoll_poll_lock(napi);
                        napi_poll = napi->poll;
                }
                work = napi_poll(napi, budget);
                trace_napi_poll(napi, work, budget);
                gro_normal_list(napi);
count:
                if (work > 0)
                        __NET_ADD_STATS(dev_net(napi->dev),
                                        LINUX_MIB_BUSYPOLLRXPACKETS, work);
                skb_defer_free_flush(this_cpu_ptr(&softnet_data));
                local_bh_enable();

                if (!loop_end || loop_end(loop_end_arg, start_time))
                        break;

                if (unlikely(need_resched())) {
                        if (flags & NAPI_F_END_ON_RESCHED)
                                break;
                        if (napi_poll)
                                busy_poll_stop(napi, have_poll_lock, flags, budget);
                        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                                preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
                        rcu_read_lock();
                        if (loop_end(loop_end_arg, start_time))
                                return;
                        goto restart;
                }
                cpu_relax();
        }
        if (napi_poll)
                busy_poll_stop(napi, have_poll_lock, flags, budget);
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_enable();
}

void napi_busy_loop_rcu(unsigned int napi_id,
                        bool (*loop_end)(void *, unsigned long),
                        void *loop_end_arg, bool prefer_busy_poll, u16 budget)
{
        unsigned flags = NAPI_F_END_ON_RESCHED;

        if (prefer_busy_poll)
                flags |= NAPI_F_PREFER_BUSY_POLL;

        __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
}

void napi_busy_loop(unsigned int napi_id,
                    bool (*loop_end)(void *, unsigned long),
                    void *loop_end_arg, bool prefer_busy_poll, u16 budget)
{
        unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;

        rcu_read_lock();
        __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
        rcu_read_unlock();
}
EXPORT_SYMBOL(napi_busy_loop);

#endif /* CONFIG_NET_RX_BUSY_POLL */

static void napi_hash_add(struct napi_struct *napi)
{
        if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
                return;

        spin_lock(&napi_hash_lock);

        /* 0..NR_CPUS range is reserved for sender_cpu use */
        do {
                if (unlikely(++napi_gen_id < MIN_NAPI_ID))
                        napi_gen_id = MIN_NAPI_ID;
        } while (napi_by_id(napi_gen_id));
        napi->napi_id = napi_gen_id;

        hlist_add_head_rcu(&napi->napi_hash_node,
                           &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);

        spin_unlock(&napi_hash_lock);
}

/* Warning : caller is responsible to make sure rcu grace period
 * is respected before freeing memory containing @napi
 */
static void napi_hash_del(struct napi_struct *napi)
{
        spin_lock(&napi_hash_lock);

        hlist_del_init_rcu(&napi->napi_hash_node);

        spin_unlock(&napi_hash_lock);
}

static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
{
        struct napi_struct *napi;

        napi = container_of(timer, struct napi_struct, timer);

        /* Note : we use a relaxed variant of napi_schedule_prep() not setting
         * NAPI_STATE_MISSED, since we do not react to a device IRQ.
         */
        if (!napi_disable_pending(napi) &&
            !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
                clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                __napi_schedule_irqoff(napi);
        }

        return HRTIMER_NORESTART;
}

static void init_gro_hash(struct napi_struct *napi)
{
        int i;

        for (i = 0; i < GRO_HASH_BUCKETS; i++) {
                INIT_LIST_HEAD(&napi->gro_hash[i].list);
                napi->gro_hash[i].count = 0;
        }
        napi->gro_bitmask = 0;
}

int dev_set_threaded(struct net_device *dev, bool threaded)
{
        struct napi_struct *napi;
        int err = 0;

        if (dev->threaded == threaded)
                return 0;

        if (threaded) {
                list_for_each_entry(napi, &dev->napi_list, dev_list) {
                        if (!napi->thread) {
                                err = napi_kthread_create(napi);
                                if (err) {
                                        threaded = false;
                                        break;
                                }
                        }
                }
        }

        dev->threaded = threaded;

        /* Make sure kthread is created before THREADED bit
         * is set.
         */
        smp_mb__before_atomic();

        /* Setting/unsetting threaded mode on a napi might not immediately
         * take effect, if the current napi instance is actively being
         * polled. In this case, the switch between threaded mode and
         * softirq mode will happen in the next round of napi_schedule().
         * This should not cause hiccups/stalls to the live traffic.
         */
        list_for_each_entry(napi, &dev->napi_list, dev_list)
                assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);

        return err;
}
EXPORT_SYMBOL(dev_set_threaded);

/**
 * netif_queue_set_napi - Associate queue with the napi
 * @dev: device to which NAPI and queue belong
 * @queue_index: Index of queue
 * @type: queue type as RX or TX
 * @napi: NAPI context, pass NULL to clear previously set NAPI
 *
 * Set queue with its corresponding napi context. This should be done after
 * registering the NAPI handler for the queue-vector and the queues have been
 * mapped to the corresponding interrupt vector.
 */
void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
                          enum netdev_queue_type type, struct napi_struct *napi)
{
        struct netdev_rx_queue *rxq;
        struct netdev_queue *txq;

        if (WARN_ON_ONCE(napi && !napi->dev))
                return;
        if (dev->reg_state >= NETREG_REGISTERED)
                ASSERT_RTNL();

        switch (type) {
        case NETDEV_QUEUE_TYPE_RX:
                rxq = __netif_get_rx_queue(dev, queue_index);
                rxq->napi = napi;
                return;
        case NETDEV_QUEUE_TYPE_TX:
                txq = netdev_get_tx_queue(dev, queue_index);
                txq->napi = napi;
                return;
        default:
                return;
        }
}
EXPORT_SYMBOL(netif_queue_set_napi);

void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
                           int (*poll)(struct napi_struct *, int), int weight)
{
        if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
                return;

        INIT_LIST_HEAD(&napi->poll_list);
        INIT_HLIST_NODE(&napi->napi_hash_node);
        hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
        napi->timer.function = napi_watchdog;
        init_gro_hash(napi);
        napi->skb = NULL;
        INIT_LIST_HEAD(&napi->rx_list);
        napi->rx_count = 0;
        napi->poll = poll;
        if (weight > NAPI_POLL_WEIGHT)
                netdev_err_once(dev, "%s() called with weight %d\n", __func__,
                                weight);
        napi->weight = weight;
        napi->dev = dev;
#ifdef CONFIG_NETPOLL
        napi->poll_owner = -1;
#endif
        napi->list_owner = -1;
        set_bit(NAPI_STATE_SCHED, &napi->state);
        set_bit(NAPI_STATE_NPSVC, &napi->state);
        list_add_rcu(&napi->dev_list, &dev->napi_list);
        napi_hash_add(napi);
        napi_get_frags_check(napi);
        /* Create kthread for this napi if dev->threaded is set.
         * Clear dev->threaded if kthread creation failed so that
         * threaded mode will not be enabled in napi_enable().
         */
        if (dev->threaded && napi_kthread_create(napi))
                dev->threaded = 0;
        netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);

void napi_disable(struct napi_struct *n)
{
        unsigned long val, new;

        might_sleep();
        set_bit(NAPI_STATE_DISABLE, &n->state);

        val = READ_ONCE(n->state);
        do {
                while (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
                        usleep_range(20, 200);
                        val = READ_ONCE(n->state);
                }

                new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
                new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
        } while (!try_cmpxchg(&n->state, &val, new));

        hrtimer_cancel(&n->timer);

        clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable);

/**
 *        napi_enable - enable NAPI scheduling
 *        @n: NAPI context
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 */
void napi_enable(struct napi_struct *n)
{
        unsigned long new, val = READ_ONCE(n->state);

        do {
                BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));

                new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
                if (n->dev->threaded && n->thread)
                        new |= NAPIF_STATE_THREADED;
        } while (!try_cmpxchg(&n->state, &val, new));
}
EXPORT_SYMBOL(napi_enable);

static void flush_gro_hash(struct napi_struct *napi)
{
        int i;

        for (i = 0; i < GRO_HASH_BUCKETS; i++) {
                struct sk_buff *skb, *n;

                list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
                        kfree_skb(skb);
                napi->gro_hash[i].count = 0;
        }
}

/* Must be called in process context */
void __netif_napi_del(struct napi_struct *napi)
{
        if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
                return;

        napi_hash_del(napi);
        list_del_rcu(&napi->dev_list);
        napi_free_frags(napi);

        flush_gro_hash(napi);
        napi->gro_bitmask = 0;

        if (napi->thread) {
                kthread_stop(napi->thread);
                napi->thread = NULL;
        }
}
EXPORT_SYMBOL(__netif_napi_del);

static int __napi_poll(struct napi_struct *n, bool *repoll)
{
        int work, weight;

        weight = n->weight;

        /* This NAPI_STATE_SCHED test is for avoiding a race
         * with netpoll's poll_napi().  Only the entity which
         * obtains the lock and sees NAPI_STATE_SCHED set will
         * actually make the ->poll() call.  Therefore we avoid
         * accidentally calling ->poll() when NAPI is not scheduled.
         */
        work = 0;
        if (napi_is_scheduled(n)) {
                work = n->poll(n, weight);
                trace_napi_poll(n, work, weight);

                xdp_do_check_flushed(n);
        }

        if (unlikely(work > weight))
                netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
                                n->poll, work, weight);

        if (likely(work < weight))
                return work;

        /* Drivers must not modify the NAPI state if they
         * consume the entire weight.  In such cases this code
         * still "owns" the NAPI instance and therefore can
         * move the instance around on the list at-will.
         */
        if (unlikely(napi_disable_pending(n))) {
                napi_complete(n);
                return work;
        }

        /* The NAPI context has more processing work, but busy-polling
         * is preferred. Exit early.
         */
        if (napi_prefer_busy_poll(n)) {
                if (napi_complete_done(n, work)) {
                        /* If timeout is not set, we need to make sure
                         * that the NAPI is re-scheduled.
                         */
                        napi_schedule(n);
                }
                return work;
        }

        if (n->gro_bitmask) {
                /* flush too old packets
                 * If HZ < 1000, flush all packets.
                 */
                napi_gro_flush(n, HZ >= 1000);
        }

        gro_normal_list(n);

        /* Some drivers may have called napi_schedule
         * prior to exhausting their budget.
         */
        if (unlikely(!list_empty(&n->poll_list))) {
                pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
                             n->dev ? n->dev->name : "backlog");
                return work;
        }

        *repoll = true;

        return work;
}

static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
        bool do_repoll = false;
        void *have;
        int work;

        list_del_init(&n->poll_list);

        have = netpoll_poll_lock(n);

        work = __napi_poll(n, &do_repoll);

        if (do_repoll)
                list_add_tail(&n->poll_list, repoll);

        netpoll_poll_unlock(have);

        return work;
}

static int napi_thread_wait(struct napi_struct *napi)
{
        bool woken = false;

        set_current_state(TASK_INTERRUPTIBLE);

        while (!kthread_should_stop()) {
                /* Testing SCHED_THREADED bit here to make sure the current
                 * kthread owns this napi and could poll on this napi.
                 * Testing SCHED bit is not enough because SCHED bit might be
                 * set by some other busy poll thread or by napi_disable().
                 */
                if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
                        WARN_ON(!list_empty(&napi->poll_list));
                        __set_current_state(TASK_RUNNING);
                        return 0;
                }

                schedule();
                /* woken being true indicates this thread owns this napi. */
                woken = true;
                set_current_state(TASK_INTERRUPTIBLE);
        }
        __set_current_state(TASK_RUNNING);

        return -1;
}

static int napi_threaded_poll(void *data)
{
        struct napi_struct *napi = data;
        struct softnet_data *sd;
        void *have;

        while (!napi_thread_wait(napi)) {
                unsigned long last_qs = jiffies;

                for (;;) {
                        bool repoll = false;

                        local_bh_disable();
                        sd = this_cpu_ptr(&softnet_data);
                        sd->in_napi_threaded_poll = true;

                        have = netpoll_poll_lock(napi);
                        __napi_poll(napi, &repoll);
                        netpoll_poll_unlock(have);

                        sd->in_napi_threaded_poll = false;
                        barrier();

                        if (sd_has_rps_ipi_waiting(sd)) {
                                local_irq_disable();
                                net_rps_action_and_irq_enable(sd);
                        }
                        skb_defer_free_flush(sd);
                        local_bh_enable();

                        if (!repoll)
                                break;

                        rcu_softirq_qs_periodic(last_qs);
                        cond_resched();
                }
        }
        return 0;
}

static __latent_entropy void net_rx_action(struct softirq_action *h)
{
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        unsigned long time_limit = jiffies +
                usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs));
        int budget = READ_ONCE(net_hotdata.netdev_budget);
        LIST_HEAD(list);
        LIST_HEAD(repoll);

start:
        sd->in_net_rx_action = true;
        local_irq_disable();
        list_splice_init(&sd->poll_list, &list);
        local_irq_enable();

        for (;;) {
                struct napi_struct *n;

                skb_defer_free_flush(sd);

                if (list_empty(&list)) {
                        if (list_empty(&repoll)) {
                                sd->in_net_rx_action = false;
                                barrier();
                                /* We need to check if ____napi_schedule()
                                 * had refilled poll_list while
                                 * sd->in_net_rx_action was true.
                                 */
                                if (!list_empty(&sd->poll_list))
                                        goto start;
                                if (!sd_has_rps_ipi_waiting(sd))
                                        goto end;
                        }
                        break;
                }

                n = list_first_entry(&list, struct napi_struct, poll_list);
                budget -= napi_poll(n, &repoll);

                /* If softirq window is exhausted then punt.
                 * Allow this to run for 2 jiffies since which will allow
                 * an average latency of 1.5/HZ.
                 */
                if (unlikely(budget <= 0 ||
                             time_after_eq(jiffies, time_limit))) {
                        sd->time_squeeze++;
                        break;
                }
        }

        local_irq_disable();

        list_splice_tail_init(&sd->poll_list, &list);
        list_splice_tail(&repoll, &list);
        list_splice(&list, &sd->poll_list);
        if (!list_empty(&sd->poll_list))
                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        else
                sd->in_net_rx_action = false;

        net_rps_action_and_irq_enable(sd);
end:;
}

struct netdev_adjacent {
        struct net_device *dev;
        netdevice_tracker dev_tracker;

        /* upper master flag, there can only be one master device per list */
        bool master;

        /* lookup ignore flag */
        bool ignore;

        /* counter for the number of times this device was added to us */
        u16 ref_nr;

        /* private field for the users */
        void *private;

        struct list_head list;
        struct rcu_head rcu;
};

static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
                                                 struct list_head *adj_list)
{
        struct netdev_adjacent *adj;

        list_for_each_entry(adj, adj_list, list) {
                if (adj->dev == adj_dev)
                        return adj;
        }
        return NULL;
}

static int ____netdev_has_upper_dev(struct net_device *upper_dev,
                                    struct netdev_nested_priv *priv)
{
        struct net_device *dev = (struct net_device *)priv->data;

        return upper_dev == dev;
}

/**
 * netdev_has_upper_dev - Check if device is linked to an upper device
 * @dev: device
 * @upper_dev: upper device to check
 *
 * Find out if a device is linked to specified upper device and return true
 * in case it is. Note that this checks only immediate upper device,
 * not through a complete stack of devices. The caller must hold the RTNL lock.
 */
bool netdev_has_upper_dev(struct net_device *dev,
                          struct net_device *upper_dev)
{
        struct netdev_nested_priv priv = {
                .data = (void *)upper_dev,
        };

        ASSERT_RTNL();

        return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                             &priv);
}
EXPORT_SYMBOL(netdev_has_upper_dev);

/**
 * netdev_has_upper_dev_all_rcu - Check if device is linked to an upper device
 * @dev: device
 * @upper_dev: upper device to check
 *
 * Find out if a device is linked to specified upper device and return true
 * in case it is. Note that this checks the entire upper device chain.
 * The caller must hold rcu lock.
 */

bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev)
{
        struct netdev_nested_priv priv = {
                .data = (void *)upper_dev,
        };

        return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                               &priv);
}
EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);

/**
 * netdev_has_any_upper_dev - Check if device is linked to some device
 * @dev: device
 *
 * Find out if a device is linked to an upper device and return true in case
 * it is. The caller must hold the RTNL lock.
 */
bool netdev_has_any_upper_dev(struct net_device *dev)
{
        ASSERT_RTNL();

        return !list_empty(&dev->adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_any_upper_dev);

/**
 * netdev_master_upper_dev_get - Get master upper device
 * @dev: device
 *
 * Find a master upper device and return pointer to it or NULL in case
 * it's not there. The caller must hold the RTNL lock.
 */
struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{
        struct netdev_adjacent *upper;

        ASSERT_RTNL();

        if (list_empty(&dev->adj_list.upper))
                return NULL;

        upper = list_first_entry(&dev->adj_list.upper,
                                 struct netdev_adjacent, list);
        if (likely(upper->master))
                return upper->dev;
        return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);

static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
{
        struct netdev_adjacent *upper;

        ASSERT_RTNL();

        if (list_empty(&dev->adj_list.upper))
                return NULL;

        upper = list_first_entry(&dev->adj_list.upper,
                                 struct netdev_adjacent, list);
        if (likely(upper->master) && !upper->ignore)
                return upper->dev;
        return NULL;
}

/**
 * netdev_has_any_lower_dev - Check if device is linked to some device
 * @dev: device
 *
 * Find out if a device is linked to a lower device and return true in case
 * it is. The caller must hold the RTNL lock.
 */
static bool netdev_has_any_lower_dev(struct net_device *dev)
{
        ASSERT_RTNL();

        return !list_empty(&dev->adj_list.lower);
}

void *netdev_adjacent_get_private(struct list_head *adj_list)
{
        struct netdev_adjacent *adj;

        adj = list_entry(adj_list, struct netdev_adjacent, list);

        return adj->private;
}
EXPORT_SYMBOL(netdev_adjacent_get_private);

/**
 * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
 * @dev: device
 * @iter: list_head ** of the current position
 *
 * Gets the next device from the dev's upper list, starting from iter
 * position. The caller must hold RCU read lock.
 */
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
                                                 struct list_head **iter)
{
        struct netdev_adjacent *upper;

        WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());

        upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

        if (&upper->list == &dev->adj_list.upper)
                return NULL;

        *iter = &upper->list;

        return upper->dev;
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);

static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
                                                  struct list_head **iter,
                                                  bool *ignore)
{
        struct netdev_adjacent *upper;

        upper = list_entry((*iter)->next, struct netdev_adjacent, list);

        if (&upper->list == &dev->adj_list.upper)
                return NULL;

        *iter = &upper->list;
        *ignore = upper->ignore;

        return upper->dev;
}

static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
                                                    struct list_head **iter)
{
        struct netdev_adjacent *upper;

        WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());

        upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

        if (&upper->list == &dev->adj_list.upper)
                return NULL;

        *iter = &upper->list;

        return upper->dev;
}

static int __netdev_walk_all_upper_dev(struct net_device *dev,
                                       int (*fn)(struct net_device *dev,
                                         struct netdev_nested_priv *priv),
                                       struct netdev_nested_priv *priv)
{
        struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
        int ret, cur = 0;
        bool ignore;

        now = dev;
        iter = &dev->adj_list.upper;

        while (1) {
                if (now != dev) {
                        ret = fn(now, priv);
                        if (ret)
                                return ret;
                }

                next = NULL;
                while (1) {
                        udev = __netdev_next_upper_dev(now, &iter, &ignore);
                        if (!udev)
                                break;
                        if (ignore)
                                continue;

                        next = udev;
                        niter = &udev->adj_list.upper;
                        dev_stack[cur] = now;
                        iter_stack[cur++] = iter;
                        break;
                }

                if (!next) {
                        if (!cur)
                                return 0;
                        next = dev_stack[--cur];
                        niter = iter_stack[cur];
                }

                now = next;
                iter = niter;
        }

        return 0;
}

int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
                                            struct netdev_nested_priv *priv),
                                  struct netdev_nested_priv *priv)
{
        struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
        int ret, cur = 0;

        now = dev;
        iter = &dev->adj_list.upper;

        while (1) {
                if (now != dev) {
                        ret = fn(now, priv);
                        if (ret)
                                return ret;
                }

                next = NULL;
                while (1) {
                        udev = netdev_next_upper_dev_rcu(now, &iter);
                        if (!udev)
                                break;

                        next = udev;
                        niter = &udev->adj_list.upper;
                        dev_stack[cur] = now;
                        iter_stack[cur++] = iter;
                        break;
                }

                if (!next) {
                        if (!cur)
                                return 0;
                        next = dev_stack[--cur];
                        niter = iter_stack[cur];
                }

                now = next;
                iter = niter;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);

static bool __netdev_has_upper_dev(struct net_device *dev,
                                   struct net_device *upper_dev)
{
        struct netdev_nested_priv priv = {
                .flags = 0,
                .data = (void *)upper_dev,
        };

        ASSERT_RTNL();

        return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
                                           &priv);
}

/**
 * netdev_lower_get_next_private - Get the next ->private from the
 *                                   lower neighbour list
 * @dev: device
 * @iter: list_head ** of the current position
 *
 * Gets the next netdev_adjacent->private from the dev's lower neighbour
 * list, starting from iter position. The caller must hold either hold the
 * RTNL lock or its own locking that guarantees that the neighbour lower
 * list will remain unchanged.
 */
void *netdev_lower_get_next_private(struct net_device *dev,
                                    struct list_head **iter)
{
        struct netdev_adjacent *lower;

        lower = list_entry(*iter, struct netdev_adjacent, list);

        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = lower->list.next;

        return lower->private;
}
EXPORT_SYMBOL(netdev_lower_get_next_private);

/**
 * netdev_lower_get_next_private_rcu - Get the next ->private from the
 *                                       lower neighbour list, RCU
 *                                       variant
 * @dev: device
 * @iter: list_head ** of the current position
 *
 * Gets the next netdev_adjacent->private from the dev's lower neighbour
 * list, starting from iter position. The caller must hold RCU read lock.
 */
void *netdev_lower_get_next_private_rcu(struct net_device *dev,
                                        struct list_head **iter)
{
        struct netdev_adjacent *lower;

        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());

        lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);

        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = &lower->list;

        return lower->private;
}
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);

/**
 * netdev_lower_get_next - Get the next device from the lower neighbour
 *                         list
 * @dev: device
 * @iter: list_head ** of the current position
 *
 * Gets the next netdev_adjacent from the dev's lower neighbour
 * list, starting from iter position. The caller must hold RTNL lock or
 * its own locking that guarantees that the neighbour lower
 * list will remain unchanged.
 */
void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
{
        struct netdev_adjacent *lower;

        lower = list_entry(*iter, struct netdev_adjacent, list);

        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = lower->list.next;

        return lower->dev;
}
EXPORT_SYMBOL(netdev_lower_get_next);

static struct net_device *netdev_next_lower_dev(struct net_device *dev,
                                                struct list_head **iter)
{
        struct netdev_adjacent *lower;

        lower = list_entry((*iter)->next, struct netdev_adjacent, list);

        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = &lower->list;

        return lower->dev;
}

static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
                                                  struct list_head **iter,
                                                  bool *ignore)
{
        struct netdev_adjacent *lower;

        lower = list_entry((*iter)->next, struct netdev_adjacent, list);

        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = &lower->list;
        *ignore = lower->ignore;

        return lower->dev;
}

int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *dev,
                                        struct netdev_nested_priv *priv),
                              struct netdev_nested_priv *priv)
{
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
        int ret, cur = 0;

        now = dev;
        iter = &dev->adj_list.lower;

        while (1) {
                if (now != dev) {
                        ret = fn(now, priv);
                        if (ret)
                                return ret;
                }

                next = NULL;
                while (1) {
                        ldev = netdev_next_lower_dev(now, &iter);
                        if (!ldev)
                                break;

                        next = ldev;
                        niter = &ldev->adj_list.lower;
                        dev_stack[cur] = now;
                        iter_stack[cur++] = iter;
                        break;
                }

                if (!next) {
                        if (!cur)
                                return 0;
                        next = dev_stack[--cur];
                        niter = iter_stack[cur];
                }

                now = next;
                iter = niter;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);

static int __netdev_walk_all_lower_dev(struct net_device *dev,
                                       int (*fn)(struct net_device *dev,
                                         struct netdev_nested_priv *priv),
                                       struct netdev_nested_priv *priv)
{
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
        int ret, cur = 0;
        bool ignore;

        now = dev;
        iter = &dev->adj_list.lower;

        while (1) {
                if (now != dev) {
                        ret = fn(now, priv);
                        if (ret)
                                return ret;
                }

                next = NULL;
                while (1) {
                        ldev = __netdev_next_lower_dev(now, &iter, &ignore);
                        if (!ldev)
                                break;
                        if (ignore)
                                continue;

                        next = ldev;
                        niter = &ldev->adj_list.lower;
                        dev_stack[cur] = now;
                        iter_stack[cur++] = iter;
                        break;
                }

                if (!next) {
                        if (!cur)
                                return 0;
                        next = dev_stack[--cur];
                        niter = iter_stack[cur];
                }

                now = next;
                iter = niter;
        }

        return 0;
}

struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
                                             struct list_head **iter)
{
        struct netdev_adjacent *lower;

        lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
        if (&lower->list == &dev->adj_list.lower)
                return NULL;

        *iter = &lower->list;

        return lower->dev;
}
EXPORT_SYMBOL(netdev_next_lower_dev_rcu);

static u8 __netdev_upper_depth(struct net_device *dev)
{
        struct net_device *udev;
        struct list_head *iter;
        u8 max_depth = 0;
        bool ignore;

        for (iter = &dev->adj_list.upper,
             udev = __netdev_next_upper_dev(dev, &iter, &ignore);
             udev;
             udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
                if (ignore)
                        continue;
                if (max_depth < udev->upper_level)
                        max_depth = udev->upper_level;
        }

        return max_depth;
}

static u8 __netdev_lower_depth(struct net_device *dev)
{
        struct net_device *ldev;
        struct list_head *iter;
        u8 max_depth = 0;
        bool ignore;

        for (iter = &dev->adj_list.lower,
             ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
             ldev;
             ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
                if (ignore)
                        continue;
                if (max_depth < ldev->lower_level)
                        max_depth = ldev->lower_level;
        }

        return max_depth;
}

static int __netdev_update_upper_level(struct net_device *dev,
                                       struct netdev_nested_priv *__unused)
{
        dev->upper_level = __netdev_upper_depth(dev) + 1;
        return 0;
}

#ifdef CONFIG_LOCKDEP
static LIST_HEAD(net_unlink_list);

static void net_unlink_todo(struct net_device *dev)
{
        if (list_empty(&dev->unlink_list))
                list_add_tail(&dev->unlink_list, &net_unlink_list);
}
#endif

static int __netdev_update_lower_level(struct net_device *dev,
                                       struct netdev_nested_priv *priv)
{
        dev->lower_level = __netdev_lower_depth(dev) + 1;

#ifdef CONFIG_LOCKDEP
        if (!priv)
                return 0;

        if (priv->flags & NESTED_SYNC_IMM)
                dev->nested_level = dev->lower_level - 1;
        if (priv->flags & NESTED_SYNC_TODO)
                net_unlink_todo(dev);
#endif
        return 0;
}

int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
                                            struct netdev_nested_priv *priv),
                                  struct netdev_nested_priv *priv)
{
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
        int ret, cur = 0;

        now = dev;
        iter = &dev->adj_list.lower;

        while (1) {
                if (now != dev) {
                        ret = fn(now, priv);
                        if (ret)
                                return ret;
                }

                next = NULL;
                while (1) {
                        ldev = netdev_next_lower_dev_rcu(now, &iter);
                        if (!ldev)
                                break;

                        next = ldev;
                        niter = &ldev->adj_list.lower;
                        dev_stack[cur] = now;
                        iter_stack[cur++] = iter;
                        break;
                }

                if (!next) {
                        if (!cur)
                                return 0;
                        next = dev_stack[--cur];
                        niter = iter_stack[cur];
                }

                now = next;
                iter = niter;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);

/**
 * netdev_lower_get_first_private_rcu - Get the first ->private from the
 *                                       lower neighbour list, RCU
 *                                       variant
 * @dev: device
 *
 * Gets the first netdev_adjacent->private from the dev's lower neighbour
 * list. The caller must hold RCU read lock.
 */
void *netdev_lower_get_first_private_rcu(struct net_device *dev)
{
        struct netdev_adjacent *lower;

        lower = list_first_or_null_rcu(&dev->adj_list.lower,
                        struct netdev_adjacent, list);
        if (lower)
                return lower->private;
        return NULL;
}
EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);

/**
 * netdev_master_upper_dev_get_rcu - Get master upper device
 * @dev: device
 *
 * Find a master upper device and return pointer to it or NULL in case
 * it's not there. The caller must hold the RCU read lock.
 */
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
        struct netdev_adjacent *upper;

        upper = list_first_or_null_rcu(&dev->adj_list.upper,
                                       struct netdev_adjacent, list);
        if (upper && likely(upper->master))
                return upper->dev;
        return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);

static int netdev_adjacent_sysfs_add(struct net_device *dev,
                              struct net_device *adj_dev,
                              struct list_head *dev_list)
{
        char linkname[IFNAMSIZ+7];

        sprintf(linkname, dev_list == &dev->adj_list.upper ?
                "upper_%s" : "lower_%s", adj_dev->name);
        return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
                                 linkname);
}
static void netdev_adjacent_sysfs_del(struct net_device *dev,
                               char *name,
                               struct list_head *dev_list)
{
        char linkname[IFNAMSIZ+7];

        sprintf(linkname, dev_list == &dev->adj_list.upper ?
                "upper_%s" : "lower_%s", name);
        sysfs_remove_link(&(dev->dev.kobj), linkname);
}

static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
                                                 struct net_device *adj_dev,
                                                 struct list_head *dev_list)
{
        return (dev_list == &dev->adj_list.upper ||
                dev_list == &dev->adj_list.lower) &&
                net_eq(dev_net(dev), dev_net(adj_dev));
}

static int __netdev_adjacent_dev_insert(struct net_device *dev,
                                        struct net_device *adj_dev,
                                        struct list_head *dev_list,
                                        void *private, bool master)
{
        struct netdev_adjacent *adj;
        int ret;

        adj = __netdev_find_adj(adj_dev, dev_list);

        if (adj) {
                adj->ref_nr += 1;
                pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
                         dev->name, adj_dev->name, adj->ref_nr);

                return 0;
        }

        adj = kmalloc(sizeof(*adj), GFP_KERNEL);
        if (!adj)
                return -ENOMEM;

        adj->dev = adj_dev;
        adj->master = master;
        adj->ref_nr = 1;
        adj->private = private;
        adj->ignore = false;
        netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL);

        pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
                 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);

        if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
                ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
                if (ret)
                        goto free_adj;
        }

        /* Ensure that master link is always the first item in list. */
        if (master) {
                ret = sysfs_create_link(&(dev->dev.kobj),
                                        &(adj_dev->dev.kobj), "master");
                if (ret)
                        goto remove_symlinks;

                list_add_rcu(&adj->list, dev_list);
        } else {
                list_add_tail_rcu(&adj->list, dev_list);
        }

        return 0;

remove_symlinks:
        if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
                netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
        netdev_put(adj_dev, &adj->dev_tracker);
        kfree(adj);

        return ret;
}

static void __netdev_adjacent_dev_remove(struct net_device *dev,
                                         struct net_device *adj_dev,
                                         u16 ref_nr,
                                         struct list_head *dev_list)
{
        struct netdev_adjacent *adj;

        pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
                 dev->name, adj_dev->name, ref_nr);

        adj = __netdev_find_adj(adj_dev, dev_list);

        if (!adj) {
                pr_err("Adjacency does not exist for device %s from %s\n",
                       dev->name, adj_dev->name);
                WARN_ON(1);
                return;
        }

        if (adj->ref_nr > ref_nr) {
                pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
                         dev->name, adj_dev->name, ref_nr,
                         adj->ref_nr - ref_nr);
                adj->ref_nr -= ref_nr;
                return;
        }

        if (adj->master)
                sysfs_remove_link(&(dev->dev.kobj), "master");

        if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
                netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);

        list_del_rcu(&adj->list);
        pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
                 adj_dev->name, dev->name, adj_dev->name);
        netdev_put(adj_dev, &adj->dev_tracker);
        kfree_rcu(adj, rcu);
}

static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
                                            struct net_device *upper_dev,
                                            struct list_head *up_list,
                                            struct list_head *down_list,
                                            void *private, bool master)
{
        int ret;

        ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
                                           private, master);
        if (ret)
                return ret;

        ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
                                           private, false);
        if (ret) {
                __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
                return ret;
        }

        return 0;
}

static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
                                               struct net_device *upper_dev,
                                               u16 ref_nr,
                                               struct list_head *up_list,
                                               struct list_head *down_list)
{
        __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
        __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
}

static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
                                                struct net_device *upper_dev,
                                                void *private, bool master)
{
        return __netdev_adjacent_dev_link_lists(dev, upper_dev,
                                                &dev->adj_list.upper,
                                                &upper_dev->adj_list.lower,
                                                private, master);
}

static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
                                                   struct net_device *upper_dev)
{
        __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
                                           &dev->adj_list.upper,
                                           &upper_dev->adj_list.lower);
}

static int __netdev_upper_dev_link(struct net_device *dev,
                                   struct net_device *upper_dev, bool master,
                                   void *upper_priv, void *upper_info,
                                   struct netdev_nested_priv *priv,
                                   struct netlink_ext_ack *extack)
{
        struct netdev_notifier_changeupper_info changeupper_info = {
                .info = {
                        .dev = dev,
                        .extack = extack,
                },
                .upper_dev = upper_dev,
                .master = master,
                .linking = true,
                .upper_info = upper_info,
        };
        struct net_device *master_dev;
        int ret = 0;

        ASSERT_RTNL();

        if (dev == upper_dev)
                return -EBUSY;

        /* To prevent loops, check if dev is not upper device to upper_dev. */
        if (__netdev_has_upper_dev(upper_dev, dev))
                return -EBUSY;

        if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
                return -EMLINK;

        if (!master) {
                if (__netdev_has_upper_dev(dev, upper_dev))
                        return -EEXIST;
        } else {
                master_dev = __netdev_master_upper_dev_get(dev);
                if (master_dev)
                        return master_dev == upper_dev ? -EEXIST : -EBUSY;
        }

        ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                            &changeupper_info.info);
        ret = notifier_to_errno(ret);
        if (ret)
                return ret;

        ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
                                                   master);
        if (ret)
                return ret;

        ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                            &changeupper_info.info);
        ret = notifier_to_errno(ret);
        if (ret)
                goto rollback;

        __netdev_update_upper_level(dev, NULL);
        __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);

        __netdev_update_lower_level(upper_dev, priv);
        __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
                                    priv);

        return 0;

rollback:
        __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

        return ret;
}

/**
 * netdev_upper_dev_link - Add a link to the upper device
 * @dev: device
 * @upper_dev: new upper device
 * @extack: netlink extended ack
 *
 * Adds a link to device which is upper to this one. The caller must hold
 * the RTNL lock. On a failure a negative errno code is returned.
 * On success the reference counts are adjusted and the function
 * returns zero.
 */
int netdev_upper_dev_link(struct net_device *dev,
                          struct net_device *upper_dev,
                          struct netlink_ext_ack *extack)
{
        struct netdev_nested_priv priv = {
                .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
                .data = NULL,
        };

        return __netdev_upper_dev_link(dev, upper_dev, false,
                                       NULL, NULL, &priv, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);

/**
 * netdev_master_upper_dev_link - Add a master link to the upper device
 * @dev: device
 * @upper_dev: new upper device
 * @upper_priv: upper device private
 * @upper_info: upper info to be passed down via notifier
 * @extack: netlink extended ack
 *
 * Adds a link to device which is upper to this one. In this case, only
 * one master upper device can be linked, although other non-master devices
 * might be linked as well. The caller must hold the RTNL lock.
 * On a failure a negative errno code is returned. On success the reference
 * counts are adjusted and the function returns zero.
 */
int netdev_master_upper_dev_link(struct net_device *dev,
                                 struct net_device *upper_dev,
                                 void *upper_priv, void *upper_info,
                                 struct netlink_ext_ack *extack)
{
        struct netdev_nested_priv priv = {
                .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
                .data = NULL,
        };

        return __netdev_upper_dev_link(dev, upper_dev, true,
                                       upper_priv, upper_info, &priv, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);

static void __netdev_upper_dev_unlink(struct net_device *dev,
                                      struct net_device *upper_dev,
                                      struct netdev_nested_priv *priv)
{
        struct netdev_notifier_changeupper_info changeupper_info = {
                .info = {
                        .dev = dev,
                },
                .upper_dev = upper_dev,
                .linking = false,
        };

        ASSERT_RTNL();

        changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;

        call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
                                      &changeupper_info.info);

        __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);

        call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                      &changeupper_info.info);

        __netdev_update_upper_level(dev, NULL);
        __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);

        __netdev_update_lower_level(upper_dev, priv);
        __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
                                    priv);
}

/**
 * netdev_upper_dev_unlink - Removes a link to upper device
 * @dev: device
 * @upper_dev: new upper device
 *
 * Removes a link to device which is upper to this one. The caller must hold
 * the RTNL lock.
 */
void netdev_upper_dev_unlink(struct net_device *dev,
                             struct net_device *upper_dev)
{
        struct netdev_nested_priv priv = {
                .flags = NESTED_SYNC_TODO,
                .data = NULL,
        };

        __netdev_upper_dev_unlink(dev, upper_dev, &priv);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);

static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
                                      struct net_device *lower_dev,
                                      bool val)
{
        struct netdev_adjacent *adj;

        adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
        if (adj)
                adj->ignore = val;

        adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
        if (adj)
                adj->ignore = val;
}

static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
                                        struct net_device *lower_dev)
{
        __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
}

static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
                                       struct net_device *lower_dev)
{
        __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
}

int netdev_adjacent_change_prepare(struct net_device *old_dev,
                                   struct net_device *new_dev,
                                   struct net_device *dev,
                                   struct netlink_ext_ack *extack)
{
        struct netdev_nested_priv priv = {
                .flags = 0,
                .data = NULL,
        };
        int err;

        if (!new_dev)
                return 0;

        if (old_dev && new_dev != old_dev)
                netdev_adjacent_dev_disable(dev, old_dev);
        err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
                                      extack);
        if (err) {
                if (old_dev && new_dev != old_dev)
                        netdev_adjacent_dev_enable(dev, old_dev);
                return err;
        }

        return 0;
}
EXPORT_SYMBOL(netdev_adjacent_change_prepare);

void netdev_adjacent_change_commit(struct net_device *old_dev,
                                   struct net_device *new_dev,
                                   struct net_device *dev)
{
        struct netdev_nested_priv priv = {
                .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
                .data = NULL,
        };

        if (!new_dev || !old_dev)
                return;

        if (new_dev == old_dev)
                return;

        netdev_adjacent_dev_enable(dev, old_dev);
        __netdev_upper_dev_unlink(old_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_commit);

void netdev_adjacent_change_abort(struct net_device *old_dev,
                                  struct net_device *new_dev,
                                  struct net_device *dev)
{
        struct netdev_nested_priv priv = {
                .flags = 0,
                .data = NULL,
        };

        if (!new_dev)
                return;

        if (old_dev && new_dev != old_dev)
                netdev_adjacent_dev_enable(dev, old_dev);

        __netdev_upper_dev_unlink(new_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_abort);

/**
 * netdev_bonding_info_change - Dispatch event about slave change
 * @dev: device
 * @bonding_info: info to dispatch
 *
 * Send NETDEV_BONDING_INFO to netdev notifiers with info.
 * The caller must hold the RTNL lock.
 */
void netdev_bonding_info_change(struct net_device *dev,
                                struct netdev_bonding_info *bonding_info)
{
        struct netdev_notifier_bonding_info info = {
                .info.dev = dev,
        };

        memcpy(&info.bonding_info, bonding_info,
               sizeof(struct netdev_bonding_info));
        call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
                                      &info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);

static int netdev_offload_xstats_enable_l3(struct net_device *dev,
                                           struct netlink_ext_ack *extack)
{
        struct netdev_notifier_offload_xstats_info info = {
                .info.dev = dev,
                .info.extack = extack,
                .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
        };
        int err;
        int rc;

        dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
                                         GFP_KERNEL);
        if (!dev->offload_xstats_l3)
                return -ENOMEM;

        rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
                                                  NETDEV_OFFLOAD_XSTATS_DISABLE,
                                                  &info.info);
        err = notifier_to_errno(rc);
        if (err)
                goto free_stats;

        return 0;

free_stats:
        kfree(dev->offload_xstats_l3);
        dev->offload_xstats_l3 = NULL;
        return err;
}

int netdev_offload_xstats_enable(struct net_device *dev,
                                 enum netdev_offload_xstats_type type,
                                 struct netlink_ext_ack *extack)
{
        ASSERT_RTNL();

        if (netdev_offload_xstats_enabled(dev, type))
                return -EALREADY;

        switch (type) {
        case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
                return netdev_offload_xstats_enable_l3(dev, extack);
        }

        WARN_ON(1);
        return -EINVAL;
}
EXPORT_SYMBOL(netdev_offload_xstats_enable);

static void netdev_offload_xstats_disable_l3(struct net_device *dev)
{
        struct netdev_notifier_offload_xstats_info info = {
                .info.dev = dev,
                .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
        };

        call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
                                      &info.info);
        kfree(dev->offload_xstats_l3);
        dev->offload_xstats_l3 = NULL;
}

int netdev_offload_xstats_disable(struct net_device *dev,
                                  enum netdev_offload_xstats_type type)
{
        ASSERT_RTNL();

        if (!netdev_offload_xstats_enabled(dev, type))
                return -EALREADY;

        switch (type) {
        case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
                netdev_offload_xstats_disable_l3(dev);
                return 0;
        }

        WARN_ON(1);
        return -EINVAL;
}
EXPORT_SYMBOL(netdev_offload_xstats_disable);

static void netdev_offload_xstats_disable_all(struct net_device *dev)
{
        netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
}

static struct rtnl_hw_stats64 *
netdev_offload_xstats_get_ptr(const struct net_device *dev,
                              enum netdev_offload_xstats_type type)
{
        switch (type) {
        case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
                return dev->offload_xstats_l3;
        }

        WARN_ON(1);
        return NULL;
}

bool netdev_offload_xstats_enabled(const struct net_device *dev,
                                   enum netdev_offload_xstats_type type)
{
        ASSERT_RTNL();

        return netdev_offload_xstats_get_ptr(dev, type);
}
EXPORT_SYMBOL(netdev_offload_xstats_enabled);

struct netdev_notifier_offload_xstats_ru {
        bool used;
};

struct netdev_notifier_offload_xstats_rd {
        struct rtnl_hw_stats64 stats;
        bool used;
};

static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
                                  const struct rtnl_hw_stats64 *src)
{
        dest->rx_packets          += src->rx_packets;
        dest->tx_packets          += src->tx_packets;
        dest->rx_bytes                  += src->rx_bytes;
        dest->tx_bytes                  += src->tx_bytes;
        dest->rx_errors                  += src->rx_errors;
        dest->tx_errors                  += src->tx_errors;
        dest->rx_dropped          += src->rx_dropped;
        dest->tx_dropped          += src->tx_dropped;
        dest->multicast                  += src->multicast;
}

static int netdev_offload_xstats_get_used(struct net_device *dev,
                                          enum netdev_offload_xstats_type type,
                                          bool *p_used,
                                          struct netlink_ext_ack *extack)
{
        struct netdev_notifier_offload_xstats_ru report_used = {};
        struct netdev_notifier_offload_xstats_info info = {
                .info.dev = dev,
                .info.extack = extack,
                .type = type,
                .report_used = &report_used,
        };
        int rc;

        WARN_ON(!netdev_offload_xstats_enabled(dev, type));
        rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
                                           &info.info);
        *p_used = report_used.used;
        return notifier_to_errno(rc);
}

static int netdev_offload_xstats_get_stats(struct net_device *dev,
                                           enum netdev_offload_xstats_type type,
                                           struct rtnl_hw_stats64 *p_stats,
                                           bool *p_used,
                                           struct netlink_ext_ack *extack)
{
        struct netdev_notifier_offload_xstats_rd report_delta = {};
        struct netdev_notifier_offload_xstats_info info = {
                .info.dev = dev,
                .info.extack = extack,
                .type = type,
                .report_delta = &report_delta,
        };
        struct rtnl_hw_stats64 *stats;
        int rc;

        stats = netdev_offload_xstats_get_ptr(dev, type);
        if (WARN_ON(!stats))
                return -EINVAL;

        rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
                                           &info.info);

        /* Cache whatever we got, even if there was an error, otherwise the
         * successful stats retrievals would get lost.
         */
        netdev_hw_stats64_add(stats, &report_delta.stats);

        if (p_stats)
                *p_stats = *stats;
        *p_used = report_delta.used;

        return notifier_to_errno(rc);
}

int netdev_offload_xstats_get(struct net_device *dev,
                              enum netdev_offload_xstats_type type,
                              struct rtnl_hw_stats64 *p_stats, bool *p_used,
                              struct netlink_ext_ack *extack)
{
        ASSERT_RTNL();

        if (p_stats)
                return netdev_offload_xstats_get_stats(dev, type, p_stats,
                                                       p_used, extack);
        else
                return netdev_offload_xstats_get_used(dev, type, p_used,
                                                      extack);
}
EXPORT_SYMBOL(netdev_offload_xstats_get);

void
netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
                                   const struct rtnl_hw_stats64 *stats)
{
        report_delta->used = true;
        netdev_hw_stats64_add(&report_delta->stats, stats);
}
EXPORT_SYMBOL(netdev_offload_xstats_report_delta);

void
netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
{
        report_used->used = true;
}
EXPORT_SYMBOL(netdev_offload_xstats_report_used);

void netdev_offload_xstats_push_delta(struct net_device *dev,
                                      enum netdev_offload_xstats_type type,
                                      const struct rtnl_hw_stats64 *p_stats)
{
        struct rtnl_hw_stats64 *stats;

        ASSERT_RTNL();

        stats = netdev_offload_xstats_get_ptr(dev, type);
        if (WARN_ON(!stats))
                return;

        netdev_hw_stats64_add(stats, p_stats);
}
EXPORT_SYMBOL(netdev_offload_xstats_push_delta);

/**
 * netdev_get_xmit_slave - Get the xmit slave of master device
 * @dev: device
 * @skb: The packet
 * @all_slaves: assume all the slaves are active
 *
 * The reference counters are not incremented so the caller must be
 * careful with locks. The caller must hold RCU lock.
 * %NULL is returned if no slave is found.
 */

struct net_device *netdev_get_xmit_slave(struct net_device *dev,
                                         struct sk_buff *skb,
                                         bool all_slaves)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (!ops->ndo_get_xmit_slave)
                return NULL;
        return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
}
EXPORT_SYMBOL(netdev_get_xmit_slave);

static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
                                                  struct sock *sk)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (!ops->ndo_sk_get_lower_dev)
                return NULL;
        return ops->ndo_sk_get_lower_dev(dev, sk);
}

/**
 * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
 * @dev: device
 * @sk: the socket
 *
 * %NULL is returned if no lower device is found.
 */

struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
                                            struct sock *sk)
{
        struct net_device *lower;

        lower = netdev_sk_get_lower_dev(dev, sk);
        while (lower) {
                dev = lower;
                lower = netdev_sk_get_lower_dev(dev, sk);
        }

        return dev;
}
EXPORT_SYMBOL(netdev_sk_get_lowest_dev);

static void netdev_adjacent_add_links(struct net_device *dev)
{
        struct netdev_adjacent *iter;

        struct net *net = dev_net(dev);

        list_for_each_entry(iter, &dev->adj_list.upper, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.lower);
                netdev_adjacent_sysfs_add(dev, iter->dev,
                                          &dev->adj_list.upper);
        }

        list_for_each_entry(iter, &dev->adj_list.lower, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.upper);
                netdev_adjacent_sysfs_add(dev, iter->dev,
                                          &dev->adj_list.lower);
        }
}

static void netdev_adjacent_del_links(struct net_device *dev)
{
        struct netdev_adjacent *iter;

        struct net *net = dev_net(dev);

        list_for_each_entry(iter, &dev->adj_list.upper, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.lower);
                netdev_adjacent_sysfs_del(dev, iter->dev->name,
                                          &dev->adj_list.upper);
        }

        list_for_each_entry(iter, &dev->adj_list.lower, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.upper);
                netdev_adjacent_sysfs_del(dev, iter->dev->name,
                                          &dev->adj_list.lower);
        }
}

void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
{
        struct netdev_adjacent *iter;

        struct net *net = dev_net(dev);

        list_for_each_entry(iter, &dev->adj_list.upper, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.lower);
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.lower);
        }

        list_for_each_entry(iter, &dev->adj_list.lower, list) {
                if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.upper);
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.upper);
        }
}

void *netdev_lower_dev_get_private(struct net_device *dev,
                                   struct net_device *lower_dev)
{
        struct netdev_adjacent *lower;

        if (!lower_dev)
                return NULL;
        lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
        if (!lower)
                return NULL;

        return lower->private;
}
EXPORT_SYMBOL(netdev_lower_dev_get_private);


/**
 * netdev_lower_state_changed - Dispatch event about lower device state change
 * @lower_dev: device
 * @lower_state_info: state to dispatch
 *
 * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
 * The caller must hold the RTNL lock.
 */
void netdev_lower_state_changed(struct net_device *lower_dev,
                                void *lower_state_info)
{
        struct netdev_notifier_changelowerstate_info changelowerstate_info = {
                .info.dev = lower_dev,
        };

        ASSERT_RTNL();
        changelowerstate_info.lower_state_info = lower_state_info;
        call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
                                      &changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);

static void dev_change_rx_flags(struct net_device *dev, int flags)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (ops->ndo_change_rx_flags)
                ops->ndo_change_rx_flags(dev, flags);
}

static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
        unsigned int old_flags = dev->flags;
        kuid_t uid;
        kgid_t gid;

        ASSERT_RTNL();

        dev->flags |= IFF_PROMISC;
        dev->promiscuity += inc;
        if (dev->promiscuity == 0) {
                /*
                 * Avoid overflow.
                 * If inc causes overflow, untouch promisc and return error.
                 */
                if (inc < 0)
                        dev->flags &= ~IFF_PROMISC;
                else {
                        dev->promiscuity -= inc;
                        netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
                        return -EOVERFLOW;
                }
        }
        if (dev->flags != old_flags) {
                netdev_info(dev, "%s promiscuous mode\n",
                            dev->flags & IFF_PROMISC ? "entered" : "left");
                if (audit_enabled) {
                        current_uid_gid(&uid, &gid);
                        audit_log(audit_context(), GFP_ATOMIC,
                                  AUDIT_ANOM_PROMISCUOUS,
                                  "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
                                  dev->name, (dev->flags & IFF_PROMISC),
                                  (old_flags & IFF_PROMISC),
                                  from_kuid(&init_user_ns, audit_get_loginuid(current)),
                                  from_kuid(&init_user_ns, uid),
                                  from_kgid(&init_user_ns, gid),
                                  audit_get_sessionid(current));
                }

                dev_change_rx_flags(dev, IFF_PROMISC);
        }
        if (notify)
                __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
        return 0;
}

/**
 *        dev_set_promiscuity        - update promiscuity count on a device
 *        @dev: device
 *        @inc: modifier
 *
 *        Add or remove promiscuity from a device. While the count in the device
 *        remains above zero the interface remains promiscuous. Once it hits zero
 *        the device reverts back to normal filtering operation. A negative inc
 *        value is used to drop promiscuity on the device.
 *        Return 0 if successful or a negative errno code on error.
 */
int dev_set_promiscuity(struct net_device *dev, int inc)
{
        unsigned int old_flags = dev->flags;
        int err;

        err = __dev_set_promiscuity(dev, inc, true);
        if (err < 0)
                return err;
        if (dev->flags != old_flags)
                dev_set_rx_mode(dev);
        return err;
}
EXPORT_SYMBOL(dev_set_promiscuity);

static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
        unsigned int old_flags = dev->flags, old_gflags = dev->gflags;

        ASSERT_RTNL();

        dev->flags |= IFF_ALLMULTI;
        dev->allmulti += inc;
        if (dev->allmulti == 0) {
                /*
                 * Avoid overflow.
                 * If inc causes overflow, untouch allmulti and return error.
                 */
                if (inc < 0)
                        dev->flags &= ~IFF_ALLMULTI;
                else {
                        dev->allmulti -= inc;
                        netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
                        return -EOVERFLOW;
                }
        }
        if (dev->flags ^ old_flags) {
                netdev_info(dev, "%s allmulticast mode\n",
                            dev->flags & IFF_ALLMULTI ? "entered" : "left");
                dev_change_rx_flags(dev, IFF_ALLMULTI);
                dev_set_rx_mode(dev);
                if (notify)
                        __dev_notify_flags(dev, old_flags,
                                           dev->gflags ^ old_gflags, 0, NULL);
        }
        return 0;
}

/**
 *        dev_set_allmulti        - update allmulti count on a device
 *        @dev: device
 *        @inc: modifier
 *
 *        Add or remove reception of all multicast frames to a device. While the
 *        count in the device remains above zero the interface remains listening
 *        to all interfaces. Once it hits zero the device reverts back to normal
 *        filtering operation. A negative @inc value is used to drop the counter
 *        when releasing a resource needing all multicasts.
 *        Return 0 if successful or a negative errno code on error.
 */

int dev_set_allmulti(struct net_device *dev, int inc)
{
        return __dev_set_allmulti(dev, inc, true);
}
EXPORT_SYMBOL(dev_set_allmulti);

/*
 *        Upload unicast and multicast address lists to device and
 *        configure RX filtering. When the device doesn't support unicast
 *        filtering it is put in promiscuous mode while unicast addresses
 *        are present.
 */
void __dev_set_rx_mode(struct net_device *dev)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        /* dev_open will call this function so the list will stay sane. */
        if (!(dev->flags&IFF_UP))
                return;

        if (!netif_device_present(dev))
                return;

        if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
                /* Unicast addresses changes may only happen under the rtnl,
                 * therefore calling __dev_set_promiscuity here is safe.
                 */
                if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
                        __dev_set_promiscuity(dev, 1, false);
                        dev->uc_promisc = true;
                } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
                        __dev_set_promiscuity(dev, -1, false);
                        dev->uc_promisc = false;
                }
        }

        if (ops->ndo_set_rx_mode)
                ops->ndo_set_rx_mode(dev);
}

void dev_set_rx_mode(struct net_device *dev)
{
        netif_addr_lock_bh(dev);
        __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
}

/**
 *        dev_get_flags - get flags reported to userspace
 *        @dev: device
 *
 *        Get the combination of flag bits exported through APIs to userspace.
 */
unsigned int dev_get_flags(const struct net_device *dev)
{
        unsigned int flags;

        flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC |
                                IFF_ALLMULTI |
                                IFF_RUNNING |
                                IFF_LOWER_UP |
                                IFF_DORMANT)) |
                (READ_ONCE(dev->gflags) & (IFF_PROMISC |
                                IFF_ALLMULTI));

        if (netif_running(dev)) {
                if (netif_oper_up(dev))
                        flags |= IFF_RUNNING;
                if (netif_carrier_ok(dev))
                        flags |= IFF_LOWER_UP;
                if (netif_dormant(dev))
                        flags |= IFF_DORMANT;
        }

        return flags;
}
EXPORT_SYMBOL(dev_get_flags);

int __dev_change_flags(struct net_device *dev, unsigned int flags,
                       struct netlink_ext_ack *extack)
{
        unsigned int old_flags = dev->flags;
        int ret;

        ASSERT_RTNL();

        /*
         *        Set the flags on our device.
         */

        dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
                               IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
                               IFF_AUTOMEDIA)) |
                     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
                                    IFF_ALLMULTI));

        /*
         *        Load in the correct multicast list now the flags have changed.
         */

        if ((old_flags ^ flags) & IFF_MULTICAST)
                dev_change_rx_flags(dev, IFF_MULTICAST);

        dev_set_rx_mode(dev);

        /*
         *        Have we downed the interface. We handle IFF_UP ourselves
         *        according to user attempts to set it, rather than blindly
         *        setting it.
         */

        ret = 0;
        if ((old_flags ^ flags) & IFF_UP) {
                if (old_flags & IFF_UP)
                        __dev_close(dev);
                else
                        ret = __dev_open(dev, extack);
        }

        if ((flags ^ dev->gflags) & IFF_PROMISC) {
                int inc = (flags & IFF_PROMISC) ? 1 : -1;
                unsigned int old_flags = dev->flags;

                dev->gflags ^= IFF_PROMISC;

                if (__dev_set_promiscuity(dev, inc, false) >= 0)
                        if (dev->flags != old_flags)
                                dev_set_rx_mode(dev);
        }

        /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
         * is important. Some (broken) drivers set IFF_PROMISC, when
         * IFF_ALLMULTI is requested not asking us and not reporting.
         */
        if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
                int inc = (flags & IFF_ALLMULTI) ? 1 : -1;

                dev->gflags ^= IFF_ALLMULTI;
                __dev_set_allmulti(dev, inc, false);
        }

        return ret;
}

void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
                        unsigned int gchanges, u32 portid,
                        const struct nlmsghdr *nlh)
{
        unsigned int changes = dev->flags ^ old_flags;

        if (gchanges)
                rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC, portid, nlh);

        if (changes & IFF_UP) {
                if (dev->flags & IFF_UP)
                        call_netdevice_notifiers(NETDEV_UP, dev);
                else
                        call_netdevice_notifiers(NETDEV_DOWN, dev);
        }

        if (dev->flags & IFF_UP &&
            (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
                struct netdev_notifier_change_info change_info = {
                        .info = {
                                .dev = dev,
                        },
                        .flags_changed = changes,
                };

                call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
        }
}

/**
 *        dev_change_flags - change device settings
 *        @dev: device
 *        @flags: device state flags
 *        @extack: netlink extended ack
 *
 *        Change settings on device based state flags. The flags are
 *        in the userspace exported format.
 */
int dev_change_flags(struct net_device *dev, unsigned int flags,
                     struct netlink_ext_ack *extack)
{
        int ret;
        unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;

        ret = __dev_change_flags(dev, flags, extack);
        if (ret < 0)
                return ret;

        changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
        __dev_notify_flags(dev, old_flags, changes, 0, NULL);
        return ret;
}
EXPORT_SYMBOL(dev_change_flags);

int __dev_set_mtu(struct net_device *dev, int new_mtu)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (ops->ndo_change_mtu)
                return ops->ndo_change_mtu(dev, new_mtu);

        /* Pairs with all the lockless reads of dev->mtu in the stack */
        WRITE_ONCE(dev->mtu, new_mtu);
        return 0;
}
EXPORT_SYMBOL(__dev_set_mtu);

int dev_validate_mtu(struct net_device *dev, int new_mtu,
                     struct netlink_ext_ack *extack)
{
        /* MTU must be positive, and in range */
        if (new_mtu < 0 || new_mtu < dev->min_mtu) {
                NL_SET_ERR_MSG(extack, "mtu less than device minimum");
                return -EINVAL;
        }

        if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
                NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
                return -EINVAL;
        }
        return 0;
}

/**
 *        dev_set_mtu_ext - Change maximum transfer unit
 *        @dev: device
 *        @new_mtu: new transfer unit
 *        @extack: netlink extended ack
 *
 *        Change the maximum transfer size of the network device.
 */
int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
                    struct netlink_ext_ack *extack)
{
        int err, orig_mtu;

        if (new_mtu == dev->mtu)
                return 0;

        err = dev_validate_mtu(dev, new_mtu, extack);
        if (err)
                return err;

        if (!netif_device_present(dev))
                return -ENODEV;

        err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
        err = notifier_to_errno(err);
        if (err)
                return err;

        orig_mtu = dev->mtu;
        err = __dev_set_mtu(dev, new_mtu);

        if (!err) {
                err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
                                                   orig_mtu);
                err = notifier_to_errno(err);
                if (err) {
                        /* setting mtu back and notifying everyone again,
                         * so that they have a chance to revert changes.
                         */
                        __dev_set_mtu(dev, orig_mtu);
                        call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
                                                     new_mtu);
                }
        }
        return err;
}

int dev_set_mtu(struct net_device *dev, int new_mtu)
{
        struct netlink_ext_ack extack;
        int err;

        memset(&extack, 0, sizeof(extack));
        err = dev_set_mtu_ext(dev, new_mtu, &extack);
        if (err && extack._msg)
                net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
        return err;
}
EXPORT_SYMBOL(dev_set_mtu);

/**
 *        dev_change_tx_queue_len - Change TX queue length of a netdevice
 *        @dev: device
 *        @new_len: new tx queue length
 */
int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
{
        unsigned int orig_len = dev->tx_queue_len;
        int res;

        if (new_len != (unsigned int)new_len)
                return -ERANGE;

        if (new_len != orig_len) {
                dev->tx_queue_len = new_len;
                res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
                res = notifier_to_errno(res);
                if (res)
                        goto err_rollback;
                res = dev_qdisc_change_tx_queue_len(dev);
                if (res)
                        goto err_rollback;
        }

        return 0;

err_rollback:
        netdev_err(dev, "refused to change device tx_queue_len\n");
        dev->tx_queue_len = orig_len;
        return res;
}

/**
 *        dev_set_group - Change group this device belongs to
 *        @dev: device
 *        @new_group: group this device should belong to
 */
void dev_set_group(struct net_device *dev, int new_group)
{
        dev->group = new_group;
}

/**
 *        dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
 *        @dev: device
 *        @addr: new address
 *        @extack: netlink extended ack
 */
int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
                              struct netlink_ext_ack *extack)
{
        struct netdev_notifier_pre_changeaddr_info info = {
                .info.dev = dev,
                .info.extack = extack,
                .dev_addr = addr,
        };
        int rc;

        rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
        return notifier_to_errno(rc);
}
EXPORT_SYMBOL(dev_pre_changeaddr_notify);

/**
 *        dev_set_mac_address - Change Media Access Control Address
 *        @dev: device
 *        @sa: new address
 *        @extack: netlink extended ack
 *
 *        Change the hardware (MAC) address of the device
 */
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
                        struct netlink_ext_ack *extack)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        int err;

        if (!ops->ndo_set_mac_address)
                return -EOPNOTSUPP;
        if (sa->sa_family != dev->type)
                return -EINVAL;
        if (!netif_device_present(dev))
                return -ENODEV;
        err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
        if (err)
                return err;
        if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) {
                err = ops->ndo_set_mac_address(dev, sa);
                if (err)
                        return err;
        }
        dev->addr_assign_type = NET_ADDR_SET;
        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
        return 0;
}
EXPORT_SYMBOL(dev_set_mac_address);

DECLARE_RWSEM(dev_addr_sem);

int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
                             struct netlink_ext_ack *extack)
{
        int ret;

        down_write(&dev_addr_sem);
        ret = dev_set_mac_address(dev, sa, extack);
        up_write(&dev_addr_sem);
        return ret;
}
EXPORT_SYMBOL(dev_set_mac_address_user);

int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
        size_t size = sizeof(sa->sa_data_min);
        struct net_device *dev;
        int ret = 0;

        down_read(&dev_addr_sem);
        rcu_read_lock();

        dev = dev_get_by_name_rcu(net, dev_name);
        if (!dev) {
                ret = -ENODEV;
                goto unlock;
        }
        if (!dev->addr_len)
                memset(sa->sa_data, 0, size);
        else
                memcpy(sa->sa_data, dev->dev_addr,
                       min_t(size_t, size, dev->addr_len));
        sa->sa_family = dev->type;

unlock:
        rcu_read_unlock();
        up_read(&dev_addr_sem);
        return ret;
}
EXPORT_SYMBOL(dev_get_mac_address);

/**
 *        dev_change_carrier - Change device carrier
 *        @dev: device
 *        @new_carrier: new value
 *
 *        Change device carrier
 */
int dev_change_carrier(struct net_device *dev, bool new_carrier)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (!ops->ndo_change_carrier)
                return -EOPNOTSUPP;
        if (!netif_device_present(dev))
                return -ENODEV;
        return ops->ndo_change_carrier(dev, new_carrier);
}

/**
 *        dev_get_phys_port_id - Get device physical port ID
 *        @dev: device
 *        @ppid: port ID
 *
 *        Get device physical port ID
 */
int dev_get_phys_port_id(struct net_device *dev,
                         struct netdev_phys_item_id *ppid)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (!ops->ndo_get_phys_port_id)
                return -EOPNOTSUPP;
        return ops->ndo_get_phys_port_id(dev, ppid);
}

/**
 *        dev_get_phys_port_name - Get device physical port name
 *        @dev: device
 *        @name: port name
 *        @len: limit of bytes to copy to name
 *
 *        Get device physical port name
 */
int dev_get_phys_port_name(struct net_device *dev,
                           char *name, size_t len)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        int err;

        if (ops->ndo_get_phys_port_name) {
                err = ops->ndo_get_phys_port_name(dev, name, len);
                if (err != -EOPNOTSUPP)
                        return err;
        }
        return devlink_compat_phys_port_name_get(dev, name, len);
}

/**
 *        dev_get_port_parent_id - Get the device's port parent identifier
 *        @dev: network device
 *        @ppid: pointer to a storage for the port's parent identifier
 *        @recurse: allow/disallow recursion to lower devices
 *
 *        Get the devices's port parent identifier
 */
int dev_get_port_parent_id(struct net_device *dev,
                           struct netdev_phys_item_id *ppid,
                           bool recurse)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        struct netdev_phys_item_id first = { };
        struct net_device *lower_dev;
        struct list_head *iter;
        int err;

        if (ops->ndo_get_port_parent_id) {
                err = ops->ndo_get_port_parent_id(dev, ppid);
                if (err != -EOPNOTSUPP)
                        return err;
        }

        err = devlink_compat_switch_id_get(dev, ppid);
        if (!recurse || err != -EOPNOTSUPP)
                return err;

        netdev_for_each_lower_dev(dev, lower_dev, iter) {
                err = dev_get_port_parent_id(lower_dev, ppid, true);
                if (err)
                        break;
                if (!first.id_len)
                        first = *ppid;
                else if (memcmp(&first, ppid, sizeof(*ppid)))
                        return -EOPNOTSUPP;
        }

        return err;
}
EXPORT_SYMBOL(dev_get_port_parent_id);

/**
 *        netdev_port_same_parent_id - Indicate if two network devices have
 *        the same port parent identifier
 *        @a: first network device
 *        @b: second network device
 */
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
{
        struct netdev_phys_item_id a_id = { };
        struct netdev_phys_item_id b_id = { };

        if (dev_get_port_parent_id(a, &a_id, true) ||
            dev_get_port_parent_id(b, &b_id, true))
                return false;

        return netdev_phys_item_id_same(&a_id, &b_id);
}
EXPORT_SYMBOL(netdev_port_same_parent_id);

/**
 *        dev_change_proto_down - set carrier according to proto_down.
 *
 *        @dev: device
 *        @proto_down: new value
 */
int dev_change_proto_down(struct net_device *dev, bool proto_down)
{
        if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
                return -EOPNOTSUPP;
        if (!netif_device_present(dev))
                return -ENODEV;
        if (proto_down)
                netif_carrier_off(dev);
        else
                netif_carrier_on(dev);
        dev->proto_down = proto_down;
        return 0;
}

/**
 *        dev_change_proto_down_reason - proto down reason
 *
 *        @dev: device
 *        @mask: proto down mask
 *        @value: proto down value
 */
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
                                  u32 value)
{
        int b;

        if (!mask) {
                dev->proto_down_reason = value;
        } else {
                for_each_set_bit(b, &mask, 32) {
                        if (value & (1 << b))
                                dev->proto_down_reason |= BIT(b);
                        else
                                dev->proto_down_reason &= ~BIT(b);
                }
        }
}

struct bpf_xdp_link {
        struct bpf_link link;
        struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
        int flags;
};

static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags)
{
        if (flags & XDP_FLAGS_HW_MODE)
                return XDP_MODE_HW;
        if (flags & XDP_FLAGS_DRV_MODE)
                return XDP_MODE_DRV;
        if (flags & XDP_FLAGS_SKB_MODE)
                return XDP_MODE_SKB;
        return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB;
}

static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
{
        switch (mode) {
        case XDP_MODE_SKB:
                return generic_xdp_install;
        case XDP_MODE_DRV:
        case XDP_MODE_HW:
                return dev->netdev_ops->ndo_bpf;
        default:
                return NULL;
        }
}

static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
                                         enum bpf_xdp_mode mode)
{
        return dev->xdp_state[mode].link;
}

static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
                                     enum bpf_xdp_mode mode)
{
        struct bpf_xdp_link *link = dev_xdp_link(dev, mode);

        if (link)
                return link->link.prog;
        return dev->xdp_state[mode].prog;
}

u8 dev_xdp_prog_count(struct net_device *dev)
{
        u8 count = 0;
        int i;

        for (i = 0; i < __MAX_XDP_MODE; i++)
                if (dev->xdp_state[i].prog || dev->xdp_state[i].link)
                        count++;
        return count;
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);

u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
        struct bpf_prog *prog = dev_xdp_prog(dev, mode);

        return prog ? prog->aux->id : 0;
}

static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
                             struct bpf_xdp_link *link)
{
        dev->xdp_state[mode].link = link;
        dev->xdp_state[mode].prog = NULL;
}

static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
                             struct bpf_prog *prog)
{
        dev->xdp_state[mode].link = NULL;
        dev->xdp_state[mode].prog = prog;
}

static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
                           bpf_op_t bpf_op, struct netlink_ext_ack *extack,
                           u32 flags, struct bpf_prog *prog)
{
        struct netdev_bpf xdp;
        int err;

        memset(&xdp, 0, sizeof(xdp));
        xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
        xdp.extack = extack;
        xdp.flags = flags;
        xdp.prog = prog;

        /* Drivers assume refcnt is already incremented (i.e, prog pointer is
         * "moved" into driver), so they don't increment it on their own, but
         * they do decrement refcnt when program is detached or replaced.
         * Given net_device also owns link/prog, we need to bump refcnt here
         * to prevent drivers from underflowing it.
         */
        if (prog)
                bpf_prog_inc(prog);
        err = bpf_op(dev, &xdp);
        if (err) {
                if (prog)
                        bpf_prog_put(prog);
                return err;
        }

        if (mode != XDP_MODE_HW)
                bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);

        return 0;
}

static void dev_xdp_uninstall(struct net_device *dev)
{
        struct bpf_xdp_link *link;
        struct bpf_prog *prog;
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;

        ASSERT_RTNL();

        for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
                prog = dev_xdp_prog(dev, mode);
                if (!prog)
                        continue;

                bpf_op = dev_xdp_bpf_op(dev, mode);
                if (!bpf_op)
                        continue;

                WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));

                /* auto-detach link from net device */
                link = dev_xdp_link(dev, mode);
                if (link)
                        link->dev = NULL;
                else
                        bpf_prog_put(prog);

                dev_xdp_set_link(dev, mode, NULL);
        }
}

static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
                          struct bpf_xdp_link *link, struct bpf_prog *new_prog,
                          struct bpf_prog *old_prog, u32 flags)
{
        unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
        struct bpf_prog *cur_prog;
        struct net_device *upper;
        struct list_head *iter;
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;
        int err;

        ASSERT_RTNL();

        /* either link or prog attachment, never both */
        if (link && (new_prog || old_prog))
                return -EINVAL;
        /* link supports only XDP mode flags */
        if (link && (flags & ~XDP_FLAGS_MODES)) {
                NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
                return -EINVAL;
        }
        /* just one XDP mode bit should be set, zero defaults to drv/skb mode */
        if (num_modes > 1) {
                NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
                return -EINVAL;
        }
        /* avoid ambiguity if offload + drv/skb mode progs are both loaded */
        if (!num_modes && dev_xdp_prog_count(dev) > 1) {
                NL_SET_ERR_MSG(extack,
                               "More than one program loaded, unset mode is ambiguous");
                return -EINVAL;
        }
        /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
        if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
                NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
                return -EINVAL;
        }

        mode = dev_xdp_mode(dev, flags);
        /* can't replace attached link */
        if (dev_xdp_link(dev, mode)) {
                NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
                return -EBUSY;
        }

        /* don't allow if an upper device already has a program */
        netdev_for_each_upper_dev_rcu(dev, upper, iter) {
                if (dev_xdp_prog_count(upper) > 0) {
                        NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
                        return -EEXIST;
                }
        }

        cur_prog = dev_xdp_prog(dev, mode);
        /* can't replace attached prog with link */
        if (link && cur_prog) {
                NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
                return -EBUSY;
        }
        if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
                NL_SET_ERR_MSG(extack, "Active program does not match expected");
                return -EEXIST;
        }

        /* put effective new program into new_prog */
        if (link)
                new_prog = link->link.prog;

        if (new_prog) {
                bool offload = mode == XDP_MODE_HW;
                enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
                                               ? XDP_MODE_DRV : XDP_MODE_SKB;

                if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
                        NL_SET_ERR_MSG(extack, "XDP program already attached");
                        return -EBUSY;
                }
                if (!offload && dev_xdp_prog(dev, other_mode)) {
                        NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
                        return -EEXIST;
                }
                if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
                        NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
                        return -EINVAL;
                }
                if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
                        NL_SET_ERR_MSG(extack, "Program bound to different device");
                        return -EINVAL;
                }
                if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
                        NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
                        return -EINVAL;
                }
                if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
                        NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
                        return -EINVAL;
                }
        }

        /* don't call drivers if the effective program didn't change */
        if (new_prog != cur_prog) {
                bpf_op = dev_xdp_bpf_op(dev, mode);
                if (!bpf_op) {
                        NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
                        return -EOPNOTSUPP;
                }

                err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
                if (err)
                        return err;
        }

        if (link)
                dev_xdp_set_link(dev, mode, link);
        else
                dev_xdp_set_prog(dev, mode, new_prog);
        if (cur_prog)
                bpf_prog_put(cur_prog);

        return 0;
}

static int dev_xdp_attach_link(struct net_device *dev,
                               struct netlink_ext_ack *extack,
                               struct bpf_xdp_link *link)
{
        return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
}

static int dev_xdp_detach_link(struct net_device *dev,
                               struct netlink_ext_ack *extack,
                               struct bpf_xdp_link *link)
{
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;

        ASSERT_RTNL();

        mode = dev_xdp_mode(dev, link->flags);
        if (dev_xdp_link(dev, mode) != link)
                return -EINVAL;

        bpf_op = dev_xdp_bpf_op(dev, mode);
        WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
        dev_xdp_set_link(dev, mode, NULL);
        return 0;
}

static void bpf_xdp_link_release(struct bpf_link *link)
{
        struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);

        rtnl_lock();

        /* if racing with net_device's tear down, xdp_link->dev might be
         * already NULL, in which case link was already auto-detached
         */
        if (xdp_link->dev) {
                WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
                xdp_link->dev = NULL;
        }

        rtnl_unlock();
}

static int bpf_xdp_link_detach(struct bpf_link *link)
{
        bpf_xdp_link_release(link);
        return 0;
}

static void bpf_xdp_link_dealloc(struct bpf_link *link)
{
        struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);

        kfree(xdp_link);
}

static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
                                     struct seq_file *seq)
{
        struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
        u32 ifindex = 0;

        rtnl_lock();
        if (xdp_link->dev)
                ifindex = xdp_link->dev->ifindex;
        rtnl_unlock();

        seq_printf(seq, "ifindex:\t%u\n", ifindex);
}

static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
                                       struct bpf_link_info *info)
{
        struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
        u32 ifindex = 0;

        rtnl_lock();
        if (xdp_link->dev)
                ifindex = xdp_link->dev->ifindex;
        rtnl_unlock();

        info->xdp.ifindex = ifindex;
        return 0;
}

static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
                               struct bpf_prog *old_prog)
{
        struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;
        int err = 0;

        rtnl_lock();

        /* link might have been auto-released already, so fail */
        if (!xdp_link->dev) {
                err = -ENOLINK;
                goto out_unlock;
        }

        if (old_prog && link->prog != old_prog) {
                err = -EPERM;
                goto out_unlock;
        }
        old_prog = link->prog;
        if (old_prog->type != new_prog->type ||
            old_prog->expected_attach_type != new_prog->expected_attach_type) {
                err = -EINVAL;
                goto out_unlock;
        }

        if (old_prog == new_prog) {
                /* no-op, don't disturb drivers */
                bpf_prog_put(new_prog);
                goto out_unlock;
        }

        mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags);
        bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
        err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
                              xdp_link->flags, new_prog);
        if (err)
                goto out_unlock;

        old_prog = xchg(&link->prog, new_prog);
        bpf_prog_put(old_prog);

out_unlock:
        rtnl_unlock();
        return err;
}

static const struct bpf_link_ops bpf_xdp_link_lops = {
        .release = bpf_xdp_link_release,
        .dealloc = bpf_xdp_link_dealloc,
        .detach = bpf_xdp_link_detach,
        .show_fdinfo = bpf_xdp_link_show_fdinfo,
        .fill_link_info = bpf_xdp_link_fill_link_info,
        .update_prog = bpf_xdp_link_update,
};

int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
        struct net *net = current->nsproxy->net_ns;
        struct bpf_link_primer link_primer;
        struct netlink_ext_ack extack = {};
        struct bpf_xdp_link *link;
        struct net_device *dev;
        int err, fd;

        rtnl_lock();
        dev = dev_get_by_index(net, attr->link_create.target_ifindex);
        if (!dev) {
                rtnl_unlock();
                return -EINVAL;
        }

        link = kzalloc(sizeof(*link), GFP_USER);
        if (!link) {
                err = -ENOMEM;
                goto unlock;
        }

        bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
        link->dev = dev;
        link->flags = attr->link_create.flags;

        err = bpf_link_prime(&link->link, &link_primer);
        if (err) {
                kfree(link);
                goto unlock;
        }

        err = dev_xdp_attach_link(dev, &extack, link);
        rtnl_unlock();

        if (err) {
                link->dev = NULL;
                bpf_link_cleanup(&link_primer);
                trace_bpf_xdp_link_attach_failed(extack._msg);
                goto out_put_dev;
        }

        fd = bpf_link_settle(&link_primer);
        /* link itself doesn't hold dev's refcnt to not complicate shutdown */
        dev_put(dev);
        return fd;

unlock:
        rtnl_unlock();

out_put_dev:
        dev_put(dev);
        return err;
}

/**
 *        dev_change_xdp_fd - set or clear a bpf program for a device rx path
 *        @dev: device
 *        @extack: netlink extended ack
 *        @fd: new program fd or negative value to clear
 *        @expected_fd: old program fd that userspace expects to replace or clear
 *        @flags: xdp-related flags
 *
 *        Set or clear a bpf program for a device
 */
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, int expected_fd, u32 flags)
{
        enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags);
        struct bpf_prog *new_prog = NULL, *old_prog = NULL;
        int err;

        ASSERT_RTNL();

        if (fd >= 0) {
                new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
                                                 mode != XDP_MODE_SKB);
                if (IS_ERR(new_prog))
                        return PTR_ERR(new_prog);
        }

        if (expected_fd >= 0) {
                old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
                                                 mode != XDP_MODE_SKB);
                if (IS_ERR(old_prog)) {
                        err = PTR_ERR(old_prog);
                        old_prog = NULL;
                        goto err_out;
                }
        }

        err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);

err_out:
        if (err && new_prog)
                bpf_prog_put(new_prog);
        if (old_prog)
                bpf_prog_put(old_prog);
        return err;
}

/**
 * dev_index_reserve() - allocate an ifindex in a namespace
 * @net: the applicable net namespace
 * @ifindex: requested ifindex, pass %0 to get one allocated
 *
 * Allocate a ifindex for a new device. Caller must either use the ifindex
 * to store the device (via list_netdevice()) or call dev_index_release()
 * to give the index up.
 *
 * Return: a suitable unique value for a new device interface number or -errno.
 */
static int dev_index_reserve(struct net *net, u32 ifindex)
{
        int err;

        if (ifindex > INT_MAX) {
                DEBUG_NET_WARN_ON_ONCE(1);
                return -EINVAL;
        }

        if (!ifindex)
                err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
                                      xa_limit_31b, &net->ifindex, GFP_KERNEL);
        else
                err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
        if (err < 0)
                return err;

        return ifindex;
}

static void dev_index_release(struct net *net, int ifindex)
{
        /* Expect only unused indexes, unlist_netdevice() removes the used */
        WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}

/* Delayed registration/unregisteration */
LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
atomic_t dev_unreg_count = ATOMIC_INIT(0);

static void net_set_todo(struct net_device *dev)
{
        list_add_tail(&dev->todo_list, &net_todo_list);
}

static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
        struct net_device *upper, netdev_features_t features)
{
        netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
        netdev_features_t feature;
        int feature_bit;

        for_each_netdev_feature(upper_disables, feature_bit) {
                feature = __NETIF_F_BIT(feature_bit);
                if (!(upper->wanted_features & feature)
                    && (features & feature)) {
                        netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
                                   &feature, upper->name);
                        features &= ~feature;
                }
        }

        return features;
}

static void netdev_sync_lower_features(struct net_device *upper,
        struct net_device *lower, netdev_features_t features)
{
        netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
        netdev_features_t feature;
        int feature_bit;

        for_each_netdev_feature(upper_disables, feature_bit) {
                feature = __NETIF_F_BIT(feature_bit);
                if (!(features & feature) && (lower->features & feature)) {
                        netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
                                   &feature, lower->name);
                        lower->wanted_features &= ~feature;
                        __netdev_update_features(lower);

                        if (unlikely(lower->features & feature))
                                netdev_WARN(upper, "failed to disable %pNF on %s!\n",
                                            &feature, lower->name);
                        else
                                netdev_features_change(lower);
                }
        }
}

static netdev_features_t netdev_fix_features(struct net_device *dev,
        netdev_features_t features)
{
        /* Fix illegal checksum combinations */
        if ((features & NETIF_F_HW_CSUM) &&
            (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
                netdev_warn(dev, "mixed HW and IP checksum settings.\n");
                features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
        }

        /* TSO requires that SG is present as well. */
        if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
                netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
                features &= ~NETIF_F_ALL_TSO;
        }

        if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
                                        !(features & NETIF_F_IP_CSUM)) {
                netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
                features &= ~NETIF_F_TSO;
                features &= ~NETIF_F_TSO_ECN;
        }

        if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
                                         !(features & NETIF_F_IPV6_CSUM)) {
                netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
                features &= ~NETIF_F_TSO6;
        }

        /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
        if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
                features &= ~NETIF_F_TSO_MANGLEID;

        /* TSO ECN requires that TSO is present as well. */
        if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
                features &= ~NETIF_F_TSO_ECN;

        /* Software GSO depends on SG. */
        if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
                netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
                features &= ~NETIF_F_GSO;
        }

        /* GSO partial features require GSO partial be set */
        if ((features & dev->gso_partial_features) &&
            !(features & NETIF_F_GSO_PARTIAL)) {
                netdev_dbg(dev,
                           "Dropping partially supported GSO features since no GSO partial.\n");
                features &= ~dev->gso_partial_features;
        }

        if (!(features & NETIF_F_RXCSUM)) {
                /* NETIF_F_GRO_HW implies doing RXCSUM since every packet
                 * successfully merged by hardware must also have the
                 * checksum verified by hardware.  If the user does not
                 * want to enable RXCSUM, logically, we should disable GRO_HW.
                 */
                if (features & NETIF_F_GRO_HW) {
                        netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
                        features &= ~NETIF_F_GRO_HW;
                }
        }

        /* LRO/HW-GRO features cannot be combined with RX-FCS */
        if (features & NETIF_F_RXFCS) {
                if (features & NETIF_F_LRO) {
                        netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
                        features &= ~NETIF_F_LRO;
                }

                if (features & NETIF_F_GRO_HW) {
                        netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
                        features &= ~NETIF_F_GRO_HW;
                }
        }

        if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
                netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
                features &= ~NETIF_F_LRO;
        }

        if (features & NETIF_F_HW_TLS_TX) {
                bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
                        (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
                bool hw_csum = features & NETIF_F_HW_CSUM;

                if (!ip_csum && !hw_csum) {
                        netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
                        features &= ~NETIF_F_HW_TLS_TX;
                }
        }

        if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
                netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
                features &= ~NETIF_F_HW_TLS_RX;
        }

        return features;
}

int __netdev_update_features(struct net_device *dev)
{
        struct net_device *upper, *lower;
        netdev_features_t features;
        struct list_head *iter;
        int err = -1;

        ASSERT_RTNL();

        features = netdev_get_wanted_features(dev);

        if (dev->netdev_ops->ndo_fix_features)
                features = dev->netdev_ops->ndo_fix_features(dev, features);

        /* driver might be less strict about feature dependencies */
        features = netdev_fix_features(dev, features);

        /* some features can't be enabled if they're off on an upper device */
        netdev_for_each_upper_dev_rcu(dev, upper, iter)
                features = netdev_sync_upper_features(dev, upper, features);

        if (dev->features == features)
                goto sync_lower;

        netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
                &dev->features, &features);

        if (dev->netdev_ops->ndo_set_features)
                err = dev->netdev_ops->ndo_set_features(dev, features);
        else
                err = 0;

        if (unlikely(err < 0)) {
                netdev_err(dev,
                        "set_features() failed (%d); wanted %pNF, left %pNF\n",
                        err, &features, &dev->features);
                /* return non-0 since some features might have changed and
                 * it's better to fire a spurious notification than miss it
                 */
                return -1;
        }

sync_lower:
        /* some features must be disabled on lower devices when disabled
         * on an upper device (think: bonding master or bridge)
         */
        netdev_for_each_lower_dev(dev, lower, iter)
                netdev_sync_lower_features(dev, lower, features);

        if (!err) {
                netdev_features_t diff = features ^ dev->features;

                if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
                        /* udp_tunnel_{get,drop}_rx_info both need
                         * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
                         * device, or they won't do anything.
                         * Thus we need to update dev->features
                         * *before* calling udp_tunnel_get_rx_info,
                         * but *after* calling udp_tunnel_drop_rx_info.
                         */
                        if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
                                dev->features = features;
                                udp_tunnel_get_rx_info(dev);
                        } else {
                                udp_tunnel_drop_rx_info(dev);
                        }
                }

                if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
                        if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
                                dev->features = features;
                                err |= vlan_get_rx_ctag_filter_info(dev);
                        } else {
                                vlan_drop_rx_ctag_filter_info(dev);
                        }
                }

                if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
                        if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
                                dev->features = features;
                                err |= vlan_get_rx_stag_filter_info(dev);
                        } else {
                                vlan_drop_rx_stag_filter_info(dev);
                        }
                }

                dev->features = features;
        }

        return err < 0 ? 0 : 1;
}

/**
 *        netdev_update_features - recalculate device features
 *        @dev: the device to check
 *
 *        Recalculate dev->features set and send notifications if it
 *        has changed. Should be called after driver or hardware dependent
 *        conditions might have changed that influence the features.
 */
void netdev_update_features(struct net_device *dev)
{
        if (__netdev_update_features(dev))
                netdev_features_change(dev);
}
EXPORT_SYMBOL(netdev_update_features);

/**
 *        netdev_change_features - recalculate device features
 *        @dev: the device to check
 *
 *        Recalculate dev->features set and send notifications even
 *        if they have not changed. Should be called instead of
 *        netdev_update_features() if also dev->vlan_features might
 *        have changed to allow the changes to be propagated to stacked
 *        VLAN devices.
 */
void netdev_change_features(struct net_device *dev)
{
        __netdev_update_features(dev);
        netdev_features_change(dev);
}
EXPORT_SYMBOL(netdev_change_features);

/**
 *        netif_stacked_transfer_operstate -        transfer operstate
 *        @rootdev: the root or lower level device to transfer state from
 *        @dev: the device to transfer operstate to
 *
 *        Transfer operational state from root to device. This is normally
 *        called when a stacking relationship exists between the root
 *        device and the device(a leaf device).
 */
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
                                        struct net_device *dev)
{
        if (rootdev->operstate == IF_OPER_DORMANT)
                netif_dormant_on(dev);
        else
                netif_dormant_off(dev);

        if (rootdev->operstate == IF_OPER_TESTING)
                netif_testing_on(dev);
        else
                netif_testing_off(dev);

        if (netif_carrier_ok(rootdev))
                netif_carrier_on(dev);
        else
                netif_carrier_off(dev);
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);

static int netif_alloc_rx_queues(struct net_device *dev)
{
        unsigned int i, count = dev->num_rx_queues;
        struct netdev_rx_queue *rx;
        size_t sz = count * sizeof(*rx);
        int err = 0;

        BUG_ON(count < 1);

        rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!rx)
                return -ENOMEM;

        dev->_rx = rx;

        for (i = 0; i < count; i++) {
                rx[i].dev = dev;

                /* XDP RX-queue setup */
                err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i, 0);
                if (err < 0)
                        goto err_rxq_info;
        }
        return 0;

err_rxq_info:
        /* Rollback successful reg's and free other resources */
        while (i--)
                xdp_rxq_info_unreg(&rx[i].xdp_rxq);
        kvfree(dev->_rx);
        dev->_rx = NULL;
        return err;
}

static void netif_free_rx_queues(struct net_device *dev)
{
        unsigned int i, count = dev->num_rx_queues;

        /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
        if (!dev->_rx)
                return;

        for (i = 0; i < count; i++)
                xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);

        kvfree(dev->_rx);
}

static void netdev_init_one_queue(struct net_device *dev,
                                  struct netdev_queue *queue, void *_unused)
{
        /* Initialize queue lock */
        spin_lock_init(&queue->_xmit_lock);
        netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
        queue->xmit_lock_owner = -1;
        netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
        queue->dev = dev;
#ifdef CONFIG_BQL
        dql_init(&queue->dql, HZ);
#endif
}

static void netif_free_tx_queues(struct net_device *dev)
{
        kvfree(dev->_tx);
}

static int netif_alloc_netdev_queues(struct net_device *dev)
{
        unsigned int count = dev->num_tx_queues;
        struct netdev_queue *tx;
        size_t sz = count * sizeof(*tx);

        if (count < 1 || count > 0xffff)
                return -EINVAL;

        tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!tx)
                return -ENOMEM;

        dev->_tx = tx;

        netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
        spin_lock_init(&dev->tx_global_lock);

        return 0;
}

void netif_tx_stop_all_queues(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                netif_tx_stop_queue(txq);
        }
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);

static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
{
        void __percpu *v;

        /* Drivers implementing ndo_get_peer_dev must support tstat
         * accounting, so that skb_do_redirect() can bump the dev's
         * RX stats upon network namespace switch.
         */
        if (dev->netdev_ops->ndo_get_peer_dev &&
            dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
                return -EOPNOTSUPP;

        switch (dev->pcpu_stat_type) {
        case NETDEV_PCPU_STAT_NONE:
                return 0;
        case NETDEV_PCPU_STAT_LSTATS:
                v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
                break;
        case NETDEV_PCPU_STAT_TSTATS:
                v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
                break;
        case NETDEV_PCPU_STAT_DSTATS:
                v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
                break;
        default:
                return -EINVAL;
        }

        return v ? 0 : -ENOMEM;
}

static void netdev_do_free_pcpu_stats(struct net_device *dev)
{
        switch (dev->pcpu_stat_type) {
        case NETDEV_PCPU_STAT_NONE:
                return;
        case NETDEV_PCPU_STAT_LSTATS:
                free_percpu(dev->lstats);
                break;
        case NETDEV_PCPU_STAT_TSTATS:
                free_percpu(dev->tstats);
                break;
        case NETDEV_PCPU_STAT_DSTATS:
                free_percpu(dev->dstats);
                break;
        }
}

/**
 * register_netdevice() - register a network device
 * @dev: device to register
 *
 * Take a prepared network device structure and make it externally accessible.
 * A %NETDEV_REGISTER message is sent to the netdev notifier chain.
 * Callers must hold the rtnl lock - you may want register_netdev()
 * instead of this.
 */
int register_netdevice(struct net_device *dev)
{
        int ret;
        struct net *net = dev_net(dev);

        BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
                     NETDEV_FEATURE_COUNT);
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();

        might_sleep();

        /* When net_device's are persistent, this will be fatal. */
        BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
        BUG_ON(!net);

        ret = ethtool_check_ops(dev->ethtool_ops);
        if (ret)
                return ret;

        spin_lock_init(&dev->addr_list_lock);
        netdev_set_addr_lockdep_class(dev);

        ret = dev_get_valid_name(net, dev, dev->name);
        if (ret < 0)
                goto out;

        ret = -ENOMEM;
        dev->name_node = netdev_name_node_head_alloc(dev);
        if (!dev->name_node)
                goto out;

        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
                if (ret) {
                        if (ret > 0)
                                ret = -EIO;
                        goto err_free_name;
                }
        }

        if (((dev->hw_features | dev->features) &
             NETIF_F_HW_VLAN_CTAG_FILTER) &&
            (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
             !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
                netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
                ret = -EINVAL;
                goto err_uninit;
        }

        ret = netdev_do_alloc_pcpu_stats(dev);
        if (ret)
                goto err_uninit;

        ret = dev_index_reserve(net, dev->ifindex);
        if (ret < 0)
                goto err_free_pcpu;
        dev->ifindex = ret;

        /* Transfer changeable features to wanted_features and enable
         * software offloads (GSO and GRO).
         */
        dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
        dev->features |= NETIF_F_SOFT_FEATURES;

        if (dev->udp_tunnel_nic_info) {
                dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
                dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
        }

        dev->wanted_features = dev->features & dev->hw_features;

        if (!(dev->flags & IFF_LOOPBACK))
                dev->hw_features |= NETIF_F_NOCACHE_COPY;

        /* If IPv4 TCP segmentation offload is supported we should also
         * allow the device to enable segmenting the frame with the option
         * of ignoring a static IP ID value.  This doesn't enable the
         * feature itself but allows the user to enable it later.
         */
        if (dev->hw_features & NETIF_F_TSO)
                dev->hw_features |= NETIF_F_TSO_MANGLEID;
        if (dev->vlan_features & NETIF_F_TSO)
                dev->vlan_features |= NETIF_F_TSO_MANGLEID;
        if (dev->mpls_features & NETIF_F_TSO)
                dev->mpls_features |= NETIF_F_TSO_MANGLEID;
        if (dev->hw_enc_features & NETIF_F_TSO)
                dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;

        /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
         */
        dev->vlan_features |= NETIF_F_HIGHDMA;

        /* Make NETIF_F_SG inheritable to tunnel devices.
         */
        dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;

        /* Make NETIF_F_SG inheritable to MPLS.
         */
        dev->mpls_features |= NETIF_F_SG;

        ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
        ret = notifier_to_errno(ret);
        if (ret)
                goto err_ifindex_release;

        ret = netdev_register_kobject(dev);

        WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);

        if (ret)
                goto err_uninit_notify;

        __netdev_update_features(dev);

        /*
         *        Default initial state at registry is that the
         *        device is present.
         */

        set_bit(__LINK_STATE_PRESENT, &dev->state);

        linkwatch_init_dev(dev);

        dev_init_scheduler(dev);

        netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL);
        list_netdevice(dev);

        add_device_randomness(dev->dev_addr, dev->addr_len);

        /* If the device has permanent device address, driver should
         * set dev_addr and also addr_assign_type should be set to
         * NET_ADDR_PERM (default value).
         */
        if (dev->addr_assign_type == NET_ADDR_PERM)
                memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

        /* Notify protocols, that a new device appeared. */
        ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
        ret = notifier_to_errno(ret);
        if (ret) {
                /* Expect explicit free_netdev() on failure */
                dev->needs_free_netdev = false;
                unregister_netdevice_queue(dev, NULL);
                goto out;
        }
        /*
         *        Prevent userspace races by waiting until the network
         *        device is fully setup before sending notifications.
         */
        if (!dev->rtnl_link_ops ||
            dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
                rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);

out:
        return ret;

err_uninit_notify:
        call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_ifindex_release:
        dev_index_release(net, dev->ifindex);
err_free_pcpu:
        netdev_do_free_pcpu_stats(dev);
err_uninit:
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
        if (dev->priv_destructor)
                dev->priv_destructor(dev);
err_free_name:
        netdev_name_node_free(dev->name_node);
        goto out;
}
EXPORT_SYMBOL(register_netdevice);

/**
 *        init_dummy_netdev        - init a dummy network device for NAPI
 *        @dev: device to init
 *
 *        This takes a network device structure and initialize the minimum
 *        amount of fields so it can be used to schedule NAPI polls without
 *        registering a full blown interface. This is to be used by drivers
 *        that need to tie several hardware interfaces to a single NAPI
 *        poll scheduler due to HW limitations.
 */
void init_dummy_netdev(struct net_device *dev)
{
        /* Clear everything. Note we don't initialize spinlocks
         * are they aren't supposed to be taken by any of the
         * NAPI code and this dummy netdev is supposed to be
         * only ever used for NAPI polls
         */
        memset(dev, 0, sizeof(struct net_device));

        /* make sure we BUG if trying to hit standard
         * register/unregister code path
         */
        dev->reg_state = NETREG_DUMMY;

        /* NAPI wants this */
        INIT_LIST_HEAD(&dev->napi_list);

        /* a dummy interface is started by default */
        set_bit(__LINK_STATE_PRESENT, &dev->state);
        set_bit(__LINK_STATE_START, &dev->state);

        /* napi_busy_loop stats accounting wants this */
        dev_net_set(dev, &init_net);

        /* Note : We dont allocate pcpu_refcnt for dummy devices,
         * because users of this 'device' dont need to change
         * its refcount.
         */
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);


/**
 *        register_netdev        - register a network device
 *        @dev: device to register
 *
 *        Take a completed network device structure and add it to the kernel
 *        interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
 *        chain. 0 is returned on success. A negative errno code is returned
 *        on a failure to set up the device, or if the name is a duplicate.
 *
 *        This is a wrapper around register_netdevice that takes the rtnl semaphore
 *        and expands the device name if you passed a format string to
 *        alloc_netdev.
 */
int register_netdev(struct net_device *dev)
{
        int err;

        if (rtnl_lock_killable())
                return -EINTR;
        err = register_netdevice(dev);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(register_netdev);

int netdev_refcnt_read(const struct net_device *dev)
{
#ifdef CONFIG_PCPU_DEV_REFCNT
        int i, refcnt = 0;

        for_each_possible_cpu(i)
                refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
        return refcnt;
#else
        return refcount_read(&dev->dev_refcnt);
#endif
}
EXPORT_SYMBOL(netdev_refcnt_read);

int netdev_unregister_timeout_secs __read_mostly = 10;

#define WAIT_REFS_MIN_MSECS 1
#define WAIT_REFS_MAX_MSECS 250
/**
 * netdev_wait_allrefs_any - wait until all references are gone.
 * @list: list of net_devices to wait on
 *
 * This is called when unregistering network devices.
 *
 * Any protocol or device that holds a reference should register
 * for netdevice notification, and cleanup and put back the
 * reference if they receive an UNREGISTER event.
 * We can get stuck here if buggy protocols don't correctly
 * call dev_put.
 */
static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
{
        unsigned long rebroadcast_time, warning_time;
        struct net_device *dev;
        int wait = 0;

        rebroadcast_time = warning_time = jiffies;

        list_for_each_entry(dev, list, todo_list)
                if (netdev_refcnt_read(dev) == 1)
                        return dev;

        while (true) {
                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
                        rtnl_lock();

                        /* Rebroadcast unregister notification */
                        list_for_each_entry(dev, list, todo_list)
                                call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

                        __rtnl_unlock();
                        rcu_barrier();
                        rtnl_lock();

                        list_for_each_entry(dev, list, todo_list)
                                if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
                                             &dev->state)) {
                                        /* We must not have linkwatch events
                                         * pending on unregister. If this
                                         * happens, we simply run the queue
                                         * unscheduled, resulting in a noop
                                         * for this device.
                                         */
                                        linkwatch_run_queue();
                                        break;
                                }

                        __rtnl_unlock();

                        rebroadcast_time = jiffies;
                }

                if (!wait) {
                        rcu_barrier();
                        wait = WAIT_REFS_MIN_MSECS;
                } else {
                        msleep(wait);
                        wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
                }

                list_for_each_entry(dev, list, todo_list)
                        if (netdev_refcnt_read(dev) == 1)
                                return dev;

                if (time_after(jiffies, warning_time +
                               READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
                        list_for_each_entry(dev, list, todo_list) {
                                pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
                                         dev->name, netdev_refcnt_read(dev));
                                ref_tracker_dir_print(&dev->refcnt_tracker, 10);
                        }

                        warning_time = jiffies;
                }
        }
}

/* The sequence is:
 *
 *        rtnl_lock();
 *        ...
 *        register_netdevice(x1);
 *        register_netdevice(x2);
 *        ...
 *        unregister_netdevice(y1);
 *        unregister_netdevice(y2);
 *      ...
 *        rtnl_unlock();
 *        free_netdev(y1);
 *        free_netdev(y2);
 *
 * We are invoked by rtnl_unlock().
 * This allows us to deal with problems:
 * 1) We can delete sysfs objects which invoke hotplug
 *    without deadlocking with linkwatch via keventd.
 * 2) Since we run with the RTNL semaphore not held, we can sleep
 *    safely in order to wait for the netdev refcnt to drop to zero.
 *
 * We must not return until all unregister events added during
 * the interval the lock was held have been completed.
 */
void netdev_run_todo(void)
{
        struct net_device *dev, *tmp;
        struct list_head list;
        int cnt;
#ifdef CONFIG_LOCKDEP
        struct list_head unlink_list;

        list_replace_init(&net_unlink_list, &unlink_list);

        while (!list_empty(&unlink_list)) {
                struct net_device *dev = list_first_entry(&unlink_list,
                                                          struct net_device,
                                                          unlink_list);
                list_del_init(&dev->unlink_list);
                dev->nested_level = dev->lower_level - 1;
        }
#endif

        /* Snapshot list, allow later requests */
        list_replace_init(&net_todo_list, &list);

        __rtnl_unlock();

        /* Wait for rcu callbacks to finish before next phase */
        if (!list_empty(&list))
                rcu_barrier();

        list_for_each_entry_safe(dev, tmp, &list, todo_list) {
                if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
                        netdev_WARN(dev, "run_todo but not unregistering\n");
                        list_del(&dev->todo_list);
                        continue;
                }

                WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
                linkwatch_sync_dev(dev);
        }

        cnt = 0;
        while (!list_empty(&list)) {
                dev = netdev_wait_allrefs_any(&list);
                list_del(&dev->todo_list);

                /* paranoia */
                BUG_ON(netdev_refcnt_read(dev) != 1);
                BUG_ON(!list_empty(&dev->ptype_all));
                BUG_ON(!list_empty(&dev->ptype_specific));
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));

                netdev_do_free_pcpu_stats(dev);
                if (dev->priv_destructor)
                        dev->priv_destructor(dev);
                if (dev->needs_free_netdev)
                        free_netdev(dev);

                cnt++;

                /* Free network device */
                kobject_put(&dev->dev.kobj);
        }
        if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count))
                wake_up(&netdev_unregistering_wq);
}

/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
 * all the same fields in the same order as net_device_stats, with only
 * the type differing, but rtnl_link_stats64 may have additional fields
 * at the end for newer counters.
 */
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
                             const struct net_device_stats *netdev_stats)
{
        size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t);
        const atomic_long_t *src = (atomic_long_t *)netdev_stats;
        u64 *dst = (u64 *)stats64;

        BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
        for (i = 0; i < n; i++)
                dst[i] = (unsigned long)atomic_long_read(&src[i]);
        /* zero out counters that only exist in rtnl_link_stats64 */
        memset((char *)stats64 + n * sizeof(u64), 0,
               sizeof(*stats64) - n * sizeof(u64));
}
EXPORT_SYMBOL(netdev_stats_to_stats64);

static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
                struct net_device *dev)
{
        struct net_device_core_stats __percpu *p;

        p = alloc_percpu_gfp(struct net_device_core_stats,
                             GFP_ATOMIC | __GFP_NOWARN);

        if (p && cmpxchg(&dev->core_stats, NULL, p))
                free_percpu(p);

        /* This READ_ONCE() pairs with the cmpxchg() above */
        return READ_ONCE(dev->core_stats);
}

noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
{
        /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
        struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
        unsigned long __percpu *field;

        if (unlikely(!p)) {
                p = netdev_core_stats_alloc(dev);
                if (!p)
                        return;
        }

        field = (__force unsigned long __percpu *)((__force void *)p + offset);
        this_cpu_inc(*field);
}
EXPORT_SYMBOL_GPL(netdev_core_stats_inc);

/**
 *        dev_get_stats        - get network device statistics
 *        @dev: device to get statistics from
 *        @storage: place to store stats
 *
 *        Get network statistics from device. Return @storage.
 *        The device driver may provide its own method by setting
 *        dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
 *        otherwise the internal statistics structure is used.
 */
struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
                                        struct rtnl_link_stats64 *storage)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        const struct net_device_core_stats __percpu *p;

        if (ops->ndo_get_stats64) {
                memset(storage, 0, sizeof(*storage));
                ops->ndo_get_stats64(dev, storage);
        } else if (ops->ndo_get_stats) {
                netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
        } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
                dev_get_tstats64(dev, storage);
        } else {
                netdev_stats_to_stats64(storage, &dev->stats);
        }

        /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
        p = READ_ONCE(dev->core_stats);
        if (p) {
                const struct net_device_core_stats *core_stats;
                int i;

                for_each_possible_cpu(i) {
                        core_stats = per_cpu_ptr(p, i);
                        storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
                        storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
                        storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
                        storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
                }
        }
        return storage;
}
EXPORT_SYMBOL(dev_get_stats);

/**
 *        dev_fetch_sw_netstats - get per-cpu network device statistics
 *        @s: place to store stats
 *        @netstats: per-cpu network stats to read from
 *
 *        Read per-cpu network statistics and populate the related fields in @s.
 */
void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
                           const struct pcpu_sw_netstats __percpu *netstats)
{
        int cpu;

        for_each_possible_cpu(cpu) {
                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
                const struct pcpu_sw_netstats *stats;
                unsigned int start;

                stats = per_cpu_ptr(netstats, cpu);
                do {
                        start = u64_stats_fetch_begin(&stats->syncp);
                        rx_packets = u64_stats_read(&stats->rx_packets);
                        rx_bytes   = u64_stats_read(&stats->rx_bytes);
                        tx_packets = u64_stats_read(&stats->tx_packets);
                        tx_bytes   = u64_stats_read(&stats->tx_bytes);
                } while (u64_stats_fetch_retry(&stats->syncp, start));

                s->rx_packets += rx_packets;
                s->rx_bytes   += rx_bytes;
                s->tx_packets += tx_packets;
                s->tx_bytes   += tx_bytes;
        }
}
EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);

/**
 *        dev_get_tstats64 - ndo_get_stats64 implementation
 *        @dev: device to get statistics from
 *        @s: place to store stats
 *
 *        Populate @s from dev->stats and dev->tstats. Can be used as
 *        ndo_get_stats64() callback.
 */
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s)
{
        netdev_stats_to_stats64(s, &dev->stats);
        dev_fetch_sw_netstats(s, dev->tstats);
}
EXPORT_SYMBOL_GPL(dev_get_tstats64);

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
        struct netdev_queue *queue = dev_ingress_queue(dev);

#ifdef CONFIG_NET_CLS_ACT
        if (queue)
                return queue;
        queue = kzalloc(sizeof(*queue), GFP_KERNEL);
        if (!queue)
                return NULL;
        netdev_init_one_queue(dev, queue, NULL);
        RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
        RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc);
        rcu_assign_pointer(dev->ingress_queue, queue);
#endif
        return queue;
}

static const struct ethtool_ops default_ethtool_ops;

void netdev_set_default_ethtool_ops(struct net_device *dev,
                                    const struct ethtool_ops *ops)
{
        if (dev->ethtool_ops == &default_ethtool_ops)
                dev->ethtool_ops = ops;
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);

/**
 * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
 * @dev: netdev to enable the IRQ coalescing on
 *
 * Sets a conservative default for SW IRQ coalescing. Users can use
 * sysfs attributes to override the default values.
 */
void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
{
        WARN_ON(dev->reg_state == NETREG_REGISTERED);

        if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
                dev->gro_flush_timeout = 20000;
                dev->napi_defer_hard_irqs = 1;
        }
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);

void netdev_freemem(struct net_device *dev)
{
        char *addr = (char *)dev - dev->padded;

        kvfree(addr);
}

/**
 * alloc_netdev_mqs - allocate network device
 * @sizeof_priv: size of private data to allocate space for
 * @name: device name format string
 * @name_assign_type: origin of device name
 * @setup: callback to initialize device
 * @txqs: the number of TX subqueues to allocate
 * @rxqs: the number of RX subqueues to allocate
 *
 * Allocates a struct net_device with private data area for driver use
 * and performs basic initialization.  Also allocates subqueue structs
 * for each queue on the device.
 */
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
                unsigned char name_assign_type,
                void (*setup)(struct net_device *),
                unsigned int txqs, unsigned int rxqs)
{
        struct net_device *dev;
        unsigned int alloc_size;
        struct net_device *p;

        BUG_ON(strlen(name) >= sizeof(dev->name));

        if (txqs < 1) {
                pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
                return NULL;
        }

        if (rxqs < 1) {
                pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
                return NULL;
        }

        alloc_size = sizeof(struct net_device);
        if (sizeof_priv) {
                /* ensure 32-byte alignment of private area */
                alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
                alloc_size += sizeof_priv;
        }
        /* ensure 32-byte alignment of whole construct */
        alloc_size += NETDEV_ALIGN - 1;

        p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!p)
                return NULL;

        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;

        ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
#ifdef CONFIG_PCPU_DEV_REFCNT
        dev->pcpu_refcnt = alloc_percpu(int);
        if (!dev->pcpu_refcnt)
                goto free_dev;
        __dev_hold(dev);
#else
        refcount_set(&dev->dev_refcnt, 1);
#endif

        if (dev_addr_init(dev))
                goto free_pcpu;

        dev_mc_init(dev);
        dev_uc_init(dev);

        dev_net_set(dev, &init_net);

        dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
        dev->xdp_zc_max_segs = 1;
        dev->gso_max_segs = GSO_MAX_SEGS;
        dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
        dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
        dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
        dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
        dev->tso_max_segs = TSO_MAX_SEGS;
        dev->upper_level = 1;
        dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
        dev->nested_level = 0;
        INIT_LIST_HEAD(&dev->unlink_list);
#endif

        INIT_LIST_HEAD(&dev->napi_list);
        INIT_LIST_HEAD(&dev->unreg_list);
        INIT_LIST_HEAD(&dev->close_list);
        INIT_LIST_HEAD(&dev->link_watch_list);
        INIT_LIST_HEAD(&dev->adj_list.upper);
        INIT_LIST_HEAD(&dev->adj_list.lower);
        INIT_LIST_HEAD(&dev->ptype_all);
        INIT_LIST_HEAD(&dev->ptype_specific);
        INIT_LIST_HEAD(&dev->net_notifier_list);
#ifdef CONFIG_NET_SCHED
        hash_init(dev->qdisc_hash);
#endif
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);

        if (!dev->tx_queue_len) {
                dev->priv_flags |= IFF_NO_QUEUE;
                dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
        }

        dev->num_tx_queues = txqs;
        dev->real_num_tx_queues = txqs;
        if (netif_alloc_netdev_queues(dev))
                goto free_all;

        dev->num_rx_queues = rxqs;
        dev->real_num_rx_queues = rxqs;
        if (netif_alloc_rx_queues(dev))
                goto free_all;

        strcpy(dev->name, name);
        dev->name_assign_type = name_assign_type;
        dev->group = INIT_NETDEV_GROUP;
        if (!dev->ethtool_ops)
                dev->ethtool_ops = &default_ethtool_ops;

        nf_hook_netdev_init(dev);

        return dev;

free_all:
        free_netdev(dev);
        return NULL;

free_pcpu:
#ifdef CONFIG_PCPU_DEV_REFCNT
        free_percpu(dev->pcpu_refcnt);
free_dev:
#endif
        netdev_freemem(dev);
        return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mqs);

/**
 * free_netdev - free network device
 * @dev: device
 *
 * This function does the last stage of destroying an allocated device
 * interface. The reference to the device object is released. If this
 * is the last reference then it will be freed.Must be called in process
 * context.
 */
void free_netdev(struct net_device *dev)
{
        struct napi_struct *p, *n;

        might_sleep();

        /* When called immediately after register_netdevice() failed the unwind
         * handling may still be dismantling the device. Handle that case by
         * deferring the free.
         */
        if (dev->reg_state == NETREG_UNREGISTERING) {
                ASSERT_RTNL();
                dev->needs_free_netdev = true;
                return;
        }

        netif_free_tx_queues(dev);
        netif_free_rx_queues(dev);

        kfree(rcu_dereference_protected(dev->ingress_queue, 1));

        /* Flush device addresses */
        dev_addr_flush(dev);

        list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
                netif_napi_del(p);

        ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
#endif
        free_percpu(dev->core_stats);
        dev->core_stats = NULL;
        free_percpu(dev->xdp_bulkq);
        dev->xdp_bulkq = NULL;

        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                netdev_freemem(dev);
                return;
        }

        BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
        WRITE_ONCE(dev->reg_state, NETREG_RELEASED);

        /* will free via device release */
        put_device(&dev->dev);
}
EXPORT_SYMBOL(free_netdev);

/**
 *        synchronize_net -  Synchronize with packet receive processing
 *
 *        Wait for packets currently being received to be done.
 *        Does not block later packets from starting.
 */
void synchronize_net(void)
{
        might_sleep();
        if (rtnl_is_locked())
                synchronize_rcu_expedited();
        else
                synchronize_rcu();
}
EXPORT_SYMBOL(synchronize_net);

/**
 *        unregister_netdevice_queue - remove device from the kernel
 *        @dev: device
 *        @head: list
 *
 *        This function shuts down a device interface and removes it
 *        from the kernel tables.
 *        If head not NULL, device is queued to be unregistered later.
 *
 *        Callers must hold the rtnl semaphore.  You may want
 *        unregister_netdev() instead of this.
 */

void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
{
        ASSERT_RTNL();

        if (head) {
                list_move_tail(&dev->unreg_list, head);
        } else {
                LIST_HEAD(single);

                list_add(&dev->unreg_list, &single);
                unregister_netdevice_many(&single);
        }
}
EXPORT_SYMBOL(unregister_netdevice_queue);

void unregister_netdevice_many_notify(struct list_head *head,
                                      u32 portid, const struct nlmsghdr *nlh)
{
        struct net_device *dev, *tmp;
        LIST_HEAD(close_head);
        int cnt = 0;

        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();

        if (list_empty(head))
                return;

        list_for_each_entry_safe(dev, tmp, head, unreg_list) {
                /* Some devices call without registering
                 * for initialization unwind. Remove those
                 * devices and proceed with the remaining.
                 */
                if (dev->reg_state == NETREG_UNINITIALIZED) {
                        pr_debug("unregister_netdevice: device %s/%p never was registered\n",
                                 dev->name, dev);

                        WARN_ON(1);
                        list_del(&dev->unreg_list);
                        continue;
                }
                dev->dismantle = true;
                BUG_ON(dev->reg_state != NETREG_REGISTERED);
        }

        /* If device is running, close it first. */
        list_for_each_entry(dev, head, unreg_list)
                list_add_tail(&dev->close_list, &close_head);
        dev_close_many(&close_head, true);

        list_for_each_entry(dev, head, unreg_list) {
                /* And unlink it from device chain. */
                unlist_netdevice(dev);
                WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
        }
        flush_all_backlogs();

        synchronize_net();

        list_for_each_entry(dev, head, unreg_list) {
                struct sk_buff *skb = NULL;

                /* Shutdown queueing discipline. */
                dev_shutdown(dev);
                dev_tcx_uninstall(dev);
                dev_xdp_uninstall(dev);
                bpf_dev_bound_netdev_unregister(dev);

                netdev_offload_xstats_disable_all(dev);

                /* Notify protocols, that we are about to destroy
                 * this device. They should clean all the things.
                 */
                call_netdevice_notifiers(NETDEV_UNREGISTER, dev);

                if (!dev->rtnl_link_ops ||
                    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
                        skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
                                                     GFP_KERNEL, NULL, 0,
                                                     portid, nlh);

                /*
                 *        Flush the unicast and multicast chains
                 */
                dev_uc_flush(dev);
                dev_mc_flush(dev);

                netdev_name_node_alt_flush(dev);
                netdev_name_node_free(dev->name_node);

                call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);

                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);

                if (skb)
                        rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);

                /* Notifier chain MUST detach us all upper devices. */
                WARN_ON(netdev_has_any_upper_dev(dev));
                WARN_ON(netdev_has_any_lower_dev(dev));

                /* Remove entries from kobject tree */
                netdev_unregister_kobject(dev);
#ifdef CONFIG_XPS
                /* Remove XPS queueing entries */
                netif_reset_xps_queues_gt(dev, 0);
#endif
        }

        synchronize_net();

        list_for_each_entry(dev, head, unreg_list) {
                netdev_put(dev, &dev->dev_registered_tracker);
                net_set_todo(dev);
                cnt++;
        }
        atomic_add(cnt, &dev_unreg_count);

        list_del(head);
}

/**
 *        unregister_netdevice_many - unregister many devices
 *        @head: list of devices
 *
 *  Note: As most callers use a stack allocated list_head,
 *  we force a list_del() to make sure stack wont be corrupted later.
 */
void unregister_netdevice_many(struct list_head *head)
{
        unregister_netdevice_many_notify(head, 0, NULL);
}
EXPORT_SYMBOL(unregister_netdevice_many);

/**
 *        unregister_netdev - remove device from the kernel
 *        @dev: device
 *
 *        This function shuts down a device interface and removes it
 *        from the kernel tables.
 *
 *        This is just a wrapper for unregister_netdevice that takes
 *        the rtnl semaphore.  In general you want to use this and not
 *        unregister_netdevice.
 */
void unregister_netdev(struct net_device *dev)
{
        rtnl_lock();
        unregister_netdevice(dev);
        rtnl_unlock();
}
EXPORT_SYMBOL(unregister_netdev);

/**
 *        __dev_change_net_namespace - move device to different nethost namespace
 *        @dev: device
 *        @net: network namespace
 *        @pat: If not NULL name pattern to try if the current device name
 *              is already taken in the destination network namespace.
 *        @new_ifindex: If not zero, specifies device index in the target
 *                      namespace.
 *
 *        This function shuts down a device interface and moves it
 *        to a new network namespace. On success 0 is returned, on
 *        a failure a netagive errno code is returned.
 *
 *        Callers must hold the rtnl semaphore.
 */

int __dev_change_net_namespace(struct net_device *dev, struct net *net,
                               const char *pat, int new_ifindex)
{
        struct netdev_name_node *name_node;
        struct net *net_old = dev_net(dev);
        char new_name[IFNAMSIZ] = {};
        int err, new_nsid;

        ASSERT_RTNL();

        /* Don't allow namespace local devices to be moved. */
        err = -EINVAL;
        if (dev->features & NETIF_F_NETNS_LOCAL)
                goto out;

        /* Ensure the device has been registrered */
        if (dev->reg_state != NETREG_REGISTERED)
                goto out;

        /* Get out if there is nothing todo */
        err = 0;
        if (net_eq(net_old, net))
                goto out;

        /* Pick the destination device name, and ensure
         * we can use it in the destination network namespace.
         */
        err = -EEXIST;
        if (netdev_name_in_use(net, dev->name)) {
                /* We get here if we can't use the current device name */
                if (!pat)
                        goto out;
                err = dev_prep_valid_name(net, dev, pat, new_name, EEXIST);
                if (err < 0)
                        goto out;
        }
        /* Check that none of the altnames conflicts. */
        err = -EEXIST;
        netdev_for_each_altname(dev, name_node)
                if (netdev_name_in_use(net, name_node->name))
                        goto out;

        /* Check that new_ifindex isn't used yet. */
        if (new_ifindex) {
                err = dev_index_reserve(net, new_ifindex);
                if (err < 0)
                        goto out;
        } else {
                /* If there is an ifindex conflict assign a new one */
                err = dev_index_reserve(net, dev->ifindex);
                if (err == -EBUSY)
                        err = dev_index_reserve(net, 0);
                if (err < 0)
                        goto out;
                new_ifindex = err;
        }

        /*
         * And now a mini version of register_netdevice unregister_netdevice.
         */

        /* If device is running close it first. */
        dev_close(dev);

        /* And unlink it from device chain */
        unlist_netdevice(dev);

        synchronize_net();

        /* Shutdown queueing discipline. */
        dev_shutdown(dev);

        /* Notify protocols, that we are about to destroy
         * this device. They should clean all the things.
         *
         * Note that dev->reg_state stays at NETREG_REGISTERED.
         * This is wanted because this way 8021q and macvlan know
         * the device is just moving and can keep their slaves up.
         */
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
        rcu_barrier();

        new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);

        rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
                            new_ifindex);

        /*
         *        Flush the unicast and multicast chains
         */
        dev_uc_flush(dev);
        dev_mc_flush(dev);

        /* Send a netdev-removed uevent to the old namespace */
        kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
        netdev_adjacent_del_links(dev);

        /* Move per-net netdevice notifiers that are following the netdevice */
        move_netdevice_notifiers_dev_net(dev, net);

        /* Actually switch the network namespace */
        dev_net_set(dev, net);
        dev->ifindex = new_ifindex;

        if (new_name[0]) /* Rename the netdev to prepared name */
                strscpy(dev->name, new_name, IFNAMSIZ);

        /* Fixup kobjects */
        dev_set_uevent_suppress(&dev->dev, 1);
        err = device_rename(&dev->dev, dev->name);
        dev_set_uevent_suppress(&dev->dev, 0);
        WARN_ON(err);

        /* Send a netdev-add uevent to the new namespace */
        kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
        netdev_adjacent_add_links(dev);

        /* Adapt owner in case owning user namespace of target network
         * namespace is different from the original one.
         */
        err = netdev_change_owner(dev, net_old, net);
        WARN_ON(err);

        /* Add the device back in the hashes */
        list_netdevice(dev);

        /* Notify protocols, that a new device appeared. */
        call_netdevice_notifiers(NETDEV_REGISTER, dev);

        /*
         *        Prevent userspace races by waiting until the network
         *        device is fully setup before sending notifications.
         */
        rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);

        synchronize_net();
        err = 0;
out:
        return err;
}
EXPORT_SYMBOL_GPL(__dev_change_net_namespace);

static int dev_cpu_dead(unsigned int oldcpu)
{
        struct sk_buff **list_skb;
        struct sk_buff *skb;
        unsigned int cpu;
        struct softnet_data *sd, *oldsd, *remsd = NULL;

        local_irq_disable();
        cpu = smp_processor_id();
        sd = &per_cpu(softnet_data, cpu);
        oldsd = &per_cpu(softnet_data, oldcpu);

        /* Find end of our completion_queue. */
        list_skb = &sd->completion_queue;
        while (*list_skb)
                list_skb = &(*list_skb)->next;
        /* Append completion queue from offline CPU. */
        *list_skb = oldsd->completion_queue;
        oldsd->completion_queue = NULL;

        /* Append output queue from offline CPU. */
        if (oldsd->output_queue) {
                *sd->output_queue_tailp = oldsd->output_queue;
                sd->output_queue_tailp = oldsd->output_queue_tailp;
                oldsd->output_queue = NULL;
                oldsd->output_queue_tailp = &oldsd->output_queue;
        }
        /* Append NAPI poll list from offline CPU, with one exception :
         * process_backlog() must be called by cpu owning percpu backlog.
         * We properly handle process_queue & input_pkt_queue later.
         */
        while (!list_empty(&oldsd->poll_list)) {
                struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
                                                            struct napi_struct,
                                                            poll_list);

                list_del_init(&napi->poll_list);
                if (napi->poll == process_backlog)
                        napi->state = 0;
                else
                        ____napi_schedule(sd, napi);
        }

        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_enable();

#ifdef CONFIG_RPS
        remsd = oldsd->rps_ipi_list;
        oldsd->rps_ipi_list = NULL;
#endif
        /* send out pending IPI's on offline CPU */
        net_rps_send_ipi(remsd);

        /* Process offline CPU's input_pkt_queue */
        while ((skb = __skb_dequeue(&oldsd->process_queue))) {
                netif_rx(skb);
                input_queue_head_incr(oldsd);
        }
        while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
                netif_rx(skb);
                input_queue_head_incr(oldsd);
        }

        return 0;
}

/**
 *        netdev_increment_features - increment feature set by one
 *        @all: current feature set
 *        @one: new feature set
 *        @mask: mask feature set
 *
 *        Computes a new feature set after adding a device with feature set
 *        @one to the master device with current feature set @all.  Will not
 *        enable anything that is off in @mask. Returns the new feature set.
 */
netdev_features_t netdev_increment_features(netdev_features_t all,
        netdev_features_t one, netdev_features_t mask)
{
        if (mask & NETIF_F_HW_CSUM)
                mask |= NETIF_F_CSUM_MASK;
        mask |= NETIF_F_VLAN_CHALLENGED;

        all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
        all &= one | ~NETIF_F_ALL_FOR_ALL;

        /* If one device supports hw checksumming, set for all. */
        if (all & NETIF_F_HW_CSUM)
                all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);

        return all;
}
EXPORT_SYMBOL(netdev_increment_features);

static struct hlist_head * __net_init netdev_create_hash(void)
{
        int i;
        struct hlist_head *hash;

        hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
        if (hash != NULL)
                for (i = 0; i < NETDEV_HASHENTRIES; i++)
                        INIT_HLIST_HEAD(&hash[i]);

        return hash;
}

/* Initialize per network namespace state */
static int __net_init netdev_init(struct net *net)
{
        BUILD_BUG_ON(GRO_HASH_BUCKETS >
                     8 * sizeof_field(struct napi_struct, gro_bitmask));

        INIT_LIST_HEAD(&net->dev_base_head);

        net->dev_name_head = netdev_create_hash();
        if (net->dev_name_head == NULL)
                goto err_name;

        net->dev_index_head = netdev_create_hash();
        if (net->dev_index_head == NULL)
                goto err_idx;

        xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);

        RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);

        return 0;

err_idx:
        kfree(net->dev_name_head);
err_name:
        return -ENOMEM;
}

/**
 *        netdev_drivername - network driver for the device
 *        @dev: network device
 *
 *        Determine network driver for device.
 */
const char *netdev_drivername(const struct net_device *dev)
{
        const struct device_driver *driver;
        const struct device *parent;
        const char *empty = "";

        parent = dev->dev.parent;
        if (!parent)
                return empty;

        driver = parent->driver;
        if (driver && driver->name)
                return driver->name;
        return empty;
}

static void __netdev_printk(const char *level, const struct net_device *dev,
                            struct va_format *vaf)
{
        if (dev && dev->dev.parent) {
                dev_printk_emit(level[1] - '0',
                                dev->dev.parent,
                                "%s %s %s%s: %pV",
                                dev_driver_string(dev->dev.parent),
                                dev_name(dev->dev.parent),
                                netdev_name(dev), netdev_reg_state(dev),
                                vaf);
        } else if (dev) {
                printk("%s%s%s: %pV",
                       level, netdev_name(dev), netdev_reg_state(dev), vaf);
        } else {
                printk("%s(NULL net_device): %pV", level, vaf);
        }
}

void netdev_printk(const char *level, const struct net_device *dev,
                   const char *format, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, format);

        vaf.fmt = format;
        vaf.va = &args;

        __netdev_printk(level, dev, &vaf);

        va_end(args);
}
EXPORT_SYMBOL(netdev_printk);

#define define_netdev_printk_level(func, level)                        \
void func(const struct net_device *dev, const char *fmt, ...)        \
{                                                                \
        struct va_format vaf;                                        \
        va_list args;                                                \
                                                                \
        va_start(args, fmt);                                        \
                                                                \
        vaf.fmt = fmt;                                                \
        vaf.va = &args;                                                \
                                                                \
        __netdev_printk(level, dev, &vaf);                        \
                                                                \
        va_end(args);                                                \
}                                                                \
EXPORT_SYMBOL(func);

define_netdev_printk_level(netdev_emerg, KERN_EMERG);
define_netdev_printk_level(netdev_alert, KERN_ALERT);
define_netdev_printk_level(netdev_crit, KERN_CRIT);
define_netdev_printk_level(netdev_err, KERN_ERR);
define_netdev_printk_level(netdev_warn, KERN_WARNING);
define_netdev_printk_level(netdev_notice, KERN_NOTICE);
define_netdev_printk_level(netdev_info, KERN_INFO);

static void __net_exit netdev_exit(struct net *net)
{
        kfree(net->dev_name_head);
        kfree(net->dev_index_head);
        xa_destroy(&net->dev_by_index);
        if (net != &init_net)
                WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}

static struct pernet_operations __net_initdata netdev_net_ops = {
        .init = netdev_init,
        .exit = netdev_exit,
};

static void __net_exit default_device_exit_net(struct net *net)
{
        struct netdev_name_node *name_node, *tmp;
        struct net_device *dev, *aux;
        /*
         * Push all migratable network devices back to the
         * initial network namespace
         */
        ASSERT_RTNL();
        for_each_netdev_safe(net, dev, aux) {
                int err;
                char fb_name[IFNAMSIZ];

                /* Ignore unmoveable devices (i.e. loopback) */
                if (dev->features & NETIF_F_NETNS_LOCAL)
                        continue;

                /* Leave virtual devices for the generic cleanup */
                if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
                        continue;

                /* Push remaining network devices to init_net */
                snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
                if (netdev_name_in_use(&init_net, fb_name))
                        snprintf(fb_name, IFNAMSIZ, "dev%%d");

                netdev_for_each_altname_safe(dev, name_node, tmp)
                        if (netdev_name_in_use(&init_net, name_node->name))
                                __netdev_name_node_alt_destroy(name_node);

                err = dev_change_net_namespace(dev, &init_net, fb_name);
                if (err) {
                        pr_emerg("%s: failed to move %s to init_net: %d\n",
                                 __func__, dev->name, err);
                        BUG();
                }
        }
}

static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
        /* At exit all network devices most be removed from a network
         * namespace.  Do this in the reverse order of registration.
         * Do this across as many network namespaces as possible to
         * improve batching efficiency.
         */
        struct net_device *dev;
        struct net *net;
        LIST_HEAD(dev_kill_list);

        rtnl_lock();
        list_for_each_entry(net, net_list, exit_list) {
                default_device_exit_net(net);
                cond_resched();
        }

        list_for_each_entry(net, net_list, exit_list) {
                for_each_netdev_reverse(net, dev) {
                        if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
                                dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
                        else
                                unregister_netdevice_queue(dev, &dev_kill_list);
                }
        }
        unregister_netdevice_many(&dev_kill_list);
        rtnl_unlock();
}

static struct pernet_operations __net_initdata default_device_ops = {
        .exit_batch = default_device_exit_batch,
};

static void __init net_dev_struct_check(void)
{
        /* TX read-mostly hotpath */
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
#ifdef CONFIG_XPS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
#endif
#ifdef CONFIG_NETFILTER_EGRESS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
#endif
#ifdef CONFIG_NET_XGRESS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
#endif
        CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);

        /* TXRX read-mostly hotpath */
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, state);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
        CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 46);

        /* RX read-mostly hotpath */
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
#ifdef CONFIG_NETPOLL
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
#endif
#ifdef CONFIG_NET_XGRESS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
        CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
}

/*
 *        Initialize the DEV module. At boot time this walks the device list and
 *        unhooks any devices that fail to initialise (normally hardware not
 *        present) and leaves us with a valid list of present and active devices.
 *
 */

/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */
#define SYSTEM_PERCPU_PAGE_POOL_SIZE        ((1 << 20) / PAGE_SIZE)

static int net_page_pool_create(int cpuid)
{
#if IS_ENABLED(CONFIG_PAGE_POOL)
        struct page_pool_params page_pool_params = {
                .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE,
                .flags = PP_FLAG_SYSTEM_POOL,
                .nid = NUMA_NO_NODE,
        };
        struct page_pool *pp_ptr;

        pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
        if (IS_ERR(pp_ptr))
                return -ENOMEM;

        per_cpu(system_page_pool, cpuid) = pp_ptr;
#endif
        return 0;
}

/*
 *       This is called single threaded during boot, so no need
 *       to take the rtnl semaphore.
 */
static int __init net_dev_init(void)
{
        int i, rc = -ENOMEM;

        BUG_ON(!dev_boot_phase);

        net_dev_struct_check();

        if (dev_proc_init())
                goto out;

        if (netdev_kobject_init())
                goto out;

        for (i = 0; i < PTYPE_HASH_SIZE; i++)
                INIT_LIST_HEAD(&ptype_base[i]);

        if (register_pernet_subsys(&netdev_net_ops))
                goto out;

        /*
         *        Initialise the packet receive queues.
         */

        for_each_possible_cpu(i) {
                struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                struct softnet_data *sd = &per_cpu(softnet_data, i);

                INIT_WORK(flush, flush_backlog);

                skb_queue_head_init(&sd->input_pkt_queue);
                skb_queue_head_init(&sd->process_queue);
#ifdef CONFIG_XFRM_OFFLOAD
                skb_queue_head_init(&sd->xfrm_backlog);
#endif
                INIT_LIST_HEAD(&sd->poll_list);
                sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
                INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
                sd->cpu = i;
#endif
                INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
                spin_lock_init(&sd->defer_lock);

                init_gro_hash(&sd->backlog);
                sd->backlog.poll = process_backlog;
                sd->backlog.weight = weight_p;

                if (net_page_pool_create(i))
                        goto out;
        }

        dev_boot_phase = 0;

        /* The loopback device is special if any other network devices
         * is present in a network namespace the loopback device must
         * be present. Since we now dynamically allocate and free the
         * loopback device ensure this invariant is maintained by
         * keeping the loopback device as the first device on the
         * list of network devices.  Ensuring the loopback devices
         * is the first device that appears and the last network device
         * that disappears.
         */
        if (register_pernet_device(&loopback_net_ops))
                goto out;

        if (register_pernet_device(&default_device_ops))
                goto out;

        open_softirq(NET_TX_SOFTIRQ, net_tx_action);
        open_softirq(NET_RX_SOFTIRQ, net_rx_action);

        rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
                                       NULL, dev_cpu_dead);
        WARN_ON(rc < 0);
        rc = 0;
out:
        if (rc < 0) {
                for_each_possible_cpu(i) {
                        struct page_pool *pp_ptr;

                        pp_ptr = per_cpu(system_page_pool, i);
                        if (!pp_ptr)
                                continue;

                        page_pool_destroy(pp_ptr);
                        per_cpu(system_page_pool, i) = NULL;
                }
        }

        return rc;
}

subsys_initcall(net_dev_init);











































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Generic nexthop implementation
 *
 * Copyright (c) 2017-19 Cumulus Networks
 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
 */

#ifndef __LINUX_NEXTHOP_H
#define __LINUX_NEXTHOP_H

#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/netlink.h>

#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK

struct nexthop;

struct nh_config {
        u32                nh_id;

        u8                nh_family;
        u8                nh_protocol;
        u8                nh_blackhole;
        u8                nh_fdb;
        u32                nh_flags;

        int                nh_ifindex;
        struct net_device *dev;

        union {
                __be32                ipv4;
                struct in6_addr        ipv6;
        } gw;

        struct nlattr        *nh_grp;
        u16                nh_grp_type;
        u16                nh_grp_res_num_buckets;
        unsigned long        nh_grp_res_idle_timer;
        unsigned long        nh_grp_res_unbalanced_timer;
        bool                nh_grp_res_has_num_buckets;
        bool                nh_grp_res_has_idle_timer;
        bool                nh_grp_res_has_unbalanced_timer;

        bool                nh_hw_stats;

        struct nlattr        *nh_encap;
        u16                nh_encap_type;

        u32                nlflags;
        struct nl_info        nlinfo;
};

struct nh_info {
        struct hlist_node        dev_hash;    /* entry on netns devhash */
        struct nexthop                *nh_parent;

        u8                        family;
        bool                        reject_nh;
        bool                        fdb_nh;

        union {
                struct fib_nh_common        fib_nhc;
                struct fib_nh                fib_nh;
                struct fib6_nh                fib6_nh;
        };
};

struct nh_res_bucket {
        struct nh_grp_entry __rcu *nh_entry;
        atomic_long_t                used_time;
        unsigned long                migrated_time;
        bool                        occupied;
        u8                        nh_flags;
};

struct nh_res_table {
        struct net                *net;
        u32                        nhg_id;
        struct delayed_work        upkeep_dw;

        /* List of NHGEs that have too few buckets ("uw" for underweight).
         * Reclaimed buckets will be given to entries in this list.
         */
        struct list_head        uw_nh_entries;
        unsigned long                unbalanced_since;

        u32                        idle_timer;
        u32                        unbalanced_timer;

        u16                        num_nh_buckets;
        struct nh_res_bucket        nh_buckets[] __counted_by(num_nh_buckets);
};

struct nh_grp_entry_stats {
        u64_stats_t packets;
        struct u64_stats_sync syncp;
};

struct nh_grp_entry {
        struct nexthop        *nh;
        struct nh_grp_entry_stats __percpu        *stats;
        u8                weight;

        union {
                struct {
                        atomic_t        upper_bound;
                } hthr;
                struct {
                        /* Member on uw_nh_entries. */
                        struct list_head        uw_nh_entry;

                        u16                        count_buckets;
                        u16                        wants_buckets;
                } res;
        };

        struct list_head nh_list;
        struct nexthop        *nh_parent;  /* nexthop of group with this entry */
        u64                packets_hw;
};

struct nh_group {
        struct nh_group                *spare; /* spare group for removals */
        u16                        num_nh;
        bool                        is_multipath;
        bool                        hash_threshold;
        bool                        resilient;
        bool                        fdb_nh;
        bool                        has_v4;
        bool                        hw_stats;

        struct nh_res_table __rcu *res_table;
        struct nh_grp_entry        nh_entries[] __counted_by(num_nh);
};

struct nexthop {
        struct rb_node                rb_node;    /* entry on netns rbtree */
        struct list_head        fi_list;    /* v4 entries using nh */
        struct list_head        f6i_list;   /* v6 entries using nh */
        struct list_head        fdb_list;   /* fdb entries using this nh */
        struct list_head        grp_list;   /* nh group entries using this nh */
        struct net                *net;

        u32                        id;

        u8                        protocol;   /* app managing this nh */
        u8                        nh_flags;
        bool                        is_group;

        refcount_t                refcnt;
        struct rcu_head                rcu;

        union {
                struct nh_info        __rcu *nh_info;
                struct nh_group __rcu *nh_grp;
        };
};

enum nexthop_event_type {
        NEXTHOP_EVENT_DEL,
        NEXTHOP_EVENT_REPLACE,
        NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
        NEXTHOP_EVENT_BUCKET_REPLACE,
        NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
};

enum nh_notifier_info_type {
        NH_NOTIFIER_INFO_TYPE_SINGLE,
        NH_NOTIFIER_INFO_TYPE_GRP,
        NH_NOTIFIER_INFO_TYPE_RES_TABLE,
        NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
        NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
};

struct nh_notifier_single_info {
        struct net_device *dev;
        u8 gw_family;
        union {
                __be32 ipv4;
                struct in6_addr ipv6;
        };
        u32 id;
        u8 is_reject:1,
           is_fdb:1,
           has_encap:1;
};

struct nh_notifier_grp_entry_info {
        u8 weight;
        struct nh_notifier_single_info nh;
};

struct nh_notifier_grp_info {
        u16 num_nh;
        bool is_fdb;
        bool hw_stats;
        struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};

struct nh_notifier_res_bucket_info {
        u16 bucket_index;
        unsigned int idle_timer_ms;
        bool force;
        struct nh_notifier_single_info old_nh;
        struct nh_notifier_single_info new_nh;
};

struct nh_notifier_res_table_info {
        u16 num_nh_buckets;
        bool hw_stats;
        struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
};

struct nh_notifier_grp_hw_stats_entry_info {
        u32 id;
        u64 packets;
};

struct nh_notifier_grp_hw_stats_info {
        u16 num_nh;
        bool hw_stats_used;
        struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
};

struct nh_notifier_info {
        struct net *net;
        struct netlink_ext_ack *extack;
        u32 id;
        enum nh_notifier_info_type type;
        union {
                struct nh_notifier_single_info *nh;
                struct nh_notifier_grp_info *nh_grp;
                struct nh_notifier_res_table_info *nh_res_table;
                struct nh_notifier_res_bucket_info *nh_res_bucket;
                struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
        };
};

int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
                              struct netlink_ext_ack *extack);
int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
                                 bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
                                     unsigned long *activity);
void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
                                  unsigned int nh_idx,
                                  u64 delta_packets);

/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
void nexthop_free_rcu(struct rcu_head *head);

static inline bool nexthop_get(struct nexthop *nh)
{
        return refcount_inc_not_zero(&nh->refcnt);
}

static inline void nexthop_put(struct nexthop *nh)
{
        if (refcount_dec_and_test(&nh->refcnt))
                call_rcu(&nh->rcu, nexthop_free_rcu);
}

static inline bool nexthop_cmp(const struct nexthop *nh1,
                               const struct nexthop *nh2)
{
        return nh1 == nh2;
}

static inline bool nexthop_is_fdb(const struct nexthop *nh)
{
        if (nh->is_group) {
                const struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                return nh_grp->fdb_nh;
        } else {
                const struct nh_info *nhi;

                nhi = rcu_dereference_rtnl(nh->nh_info);
                return nhi->fdb_nh;
        }
}

static inline bool nexthop_has_v4(const struct nexthop *nh)
{
        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                return nh_grp->has_v4;
        }
        return false;
}

static inline bool nexthop_is_multipath(const struct nexthop *nh)
{
        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                return nh_grp->is_multipath;
        }
        return false;
}

struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);

static inline unsigned int nexthop_num_path(const struct nexthop *nh)
{
        unsigned int rc = 1;

        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                if (nh_grp->is_multipath)
                        rc = nh_grp->num_nh;
        }

        return rc;
}

static inline
struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
{
        /* for_nexthops macros in fib_semantics.c grabs a pointer to
         * the nexthop before checking nhsel
         */
        if (nhsel >= nhg->num_nh)
                return NULL;

        return nhg->nh_entries[nhsel].nh;
}

static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
                            u8 rt_family)
{
        struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
        int i;

        for (i = 0; i < nhg->num_nh; i++) {
                struct nexthop *nhe = nhg->nh_entries[i].nh;
                struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
                struct fib_nh_common *nhc = &nhi->fib_nhc;
                int weight = nhg->nh_entries[i].weight;

                if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
                        return -EMSGSIZE;
        }

        return 0;
}

/* called with rcu lock */
static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{
        const struct nh_info *nhi;

        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                if (nh_grp->num_nh > 1)
                        return false;

                nh = nh_grp->nh_entries[0].nh;
        }

        nhi = rcu_dereference_rtnl(nh->nh_info);
        return nhi->reject_nh;
}

static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
{
        struct nh_info *nhi;
        struct nexthop *nh;

        nh = nexthop_select_path(res->fi->nh, hash);
        nhi = rcu_dereference(nh->nh_info);
        res->nhc = &nhi->fib_nhc;
}

/* called with rcu read lock or rtnl held */
static inline
struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
{
        struct nh_info *nhi;

        BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
        BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);

        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                if (nh_grp->is_multipath) {
                        nh = nexthop_mpath_select(nh_grp, nhsel);
                        if (!nh)
                                return NULL;
                }
        }

        nhi = rcu_dereference_rtnl(nh->nh_info);
        return &nhi->fib_nhc;
}

/* called from fib_table_lookup with rcu_lock */
static inline
struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
                                             int fib_flags,
                                             const struct flowi4 *flp,
                                             int *nhsel)
{
        struct nh_info *nhi;

        if (nh->is_group) {
                struct nh_group *nhg = rcu_dereference(nh->nh_grp);
                int i;

                for (i = 0; i < nhg->num_nh; i++) {
                        struct nexthop *nhe = nhg->nh_entries[i].nh;

                        nhi = rcu_dereference(nhe->nh_info);
                        if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
                                *nhsel = i;
                                return &nhi->fib_nhc;
                        }
                }
        } else {
                nhi = rcu_dereference(nh->nh_info);
                if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
                        *nhsel = 0;
                        return &nhi->fib_nhc;
                }
        }

        return NULL;
}

static inline bool nexthop_uses_dev(const struct nexthop *nh,
                                    const struct net_device *dev)
{
        struct nh_info *nhi;

        if (nh->is_group) {
                struct nh_group *nhg = rcu_dereference(nh->nh_grp);
                int i;

                for (i = 0; i < nhg->num_nh; i++) {
                        struct nexthop *nhe = nhg->nh_entries[i].nh;

                        nhi = rcu_dereference(nhe->nh_info);
                        if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
                                return true;
                }
        } else {
                nhi = rcu_dereference(nh->nh_info);
                if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
                        return true;
        }

        return false;
}

static inline unsigned int fib_info_num_path(const struct fib_info *fi)
{
        if (unlikely(fi->nh))
                return nexthop_num_path(fi->nh);

        return fi->fib_nhs;
}

int fib_check_nexthop(struct nexthop *nh, u8 scope,
                      struct netlink_ext_ack *extack);

static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
{
        if (unlikely(fi->nh))
                return nexthop_fib_nhc(fi->nh, nhsel);

        return &fi->fib_nh[nhsel].nh_common;
}

/* only used when fib_nh is built into fib_info */
static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
{
        WARN_ON(fi->nh);

        return &fi->fib_nh[nhsel];
}

/*
 * IPv6 variants
 */
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
                       struct netlink_ext_ack *extack);

/* Caller should either hold rcu_read_lock(), or RTNL. */
static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
        struct nh_info *nhi;

        if (nh->is_group) {
                struct nh_group *nh_grp;

                nh_grp = rcu_dereference_rtnl(nh->nh_grp);
                nh = nexthop_mpath_select(nh_grp, 0);
                if (!nh)
                        return NULL;
        }

        nhi = rcu_dereference_rtnl(nh->nh_info);
        if (nhi->family == AF_INET6)
                return &nhi->fib6_nh;

        return NULL;
}

static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
        struct fib6_nh *fib6_nh;

        fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
        return fib6_nh->fib_nh_dev;
}

static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
{
        struct nexthop *nh = res->f6i->nh;
        struct nh_info *nhi;

        nh = nexthop_select_path(nh, hash);

        nhi = rcu_dereference_rtnl(nh->nh_info);
        if (nhi->reject_nh) {
                res->fib6_type = RTN_BLACKHOLE;
                res->fib6_flags |= RTF_REJECT;
                res->nh = nexthop_fib6_nh(nh);
        } else {
                res->nh = &nhi->fib6_nh;
        }
}

int nexthop_for_each_fib6_nh(struct nexthop *nh,
                             int (*cb)(struct fib6_nh *nh, void *arg),
                             void *arg);

static inline int nexthop_get_family(struct nexthop *nh)
{
        struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

        return nhi->family;
}

static inline
struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
{
        struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

        return &nhi->fib_nhc;
}

static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
                                                            int hash)
{
        struct nh_info *nhi;
        struct nexthop *nhp;

        nhp = nexthop_select_path(nh, hash);
        if (unlikely(!nhp))
                return NULL;
        nhi = rcu_dereference(nhp->nh_info);
        return &nhi->fib_nhc;
}
#endif























































































































    1 
    1 














































































































































    1 
    1 



















































































































    1 


    1 
    1 
    1 









































































































































































































































































































































































































































































































































































































































































































    1 



    1 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2007-2011 Atheros Communications Inc.
 * Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2016-2017 Erik Stromdahl <erik.stromdahl@gmail.com>
 */

#include <linux/module.h>
#include <linux/usb.h>

#include "debug.h"
#include "core.h"
#include "bmi.h"
#include "hif.h"
#include "htc.h"
#include "usb.h"

static void ath10k_usb_post_recv_transfers(struct ath10k *ar,
                                           struct ath10k_usb_pipe *recv_pipe);

/* inlined helper functions */

static inline enum ath10k_htc_ep_id
eid_from_htc_hdr(struct ath10k_htc_hdr *htc_hdr)
{
        return (enum ath10k_htc_ep_id)htc_hdr->eid;
}

static inline bool is_trailer_only_msg(struct ath10k_htc_hdr *htc_hdr)
{
        return __le16_to_cpu(htc_hdr->len) == htc_hdr->trailer_len;
}

/* pipe/urb operations */
static struct ath10k_urb_context *
ath10k_usb_alloc_urb_from_pipe(struct ath10k_usb_pipe *pipe)
{
        struct ath10k_urb_context *urb_context = NULL;
        unsigned long flags;

        /* bail if this pipe is not initialized */
        if (!pipe->ar_usb)
                return NULL;

        spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags);
        if (!list_empty(&pipe->urb_list_head)) {
                urb_context = list_first_entry(&pipe->urb_list_head,
                                               struct ath10k_urb_context, link);
                list_del(&urb_context->link);
                pipe->urb_cnt--;
        }
        spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags);

        return urb_context;
}

static void ath10k_usb_free_urb_to_pipe(struct ath10k_usb_pipe *pipe,
                                        struct ath10k_urb_context *urb_context)
{
        unsigned long flags;

        /* bail if this pipe is not initialized */
        if (!pipe->ar_usb)
                return;

        spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags);

        pipe->urb_cnt++;
        list_add(&urb_context->link, &pipe->urb_list_head);

        spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags);
}

static void ath10k_usb_cleanup_recv_urb(struct ath10k_urb_context *urb_context)
{
        dev_kfree_skb(urb_context->skb);
        urb_context->skb = NULL;

        ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);
}

static void ath10k_usb_free_pipe_resources(struct ath10k *ar,
                                           struct ath10k_usb_pipe *pipe)
{
        struct ath10k_urb_context *urb_context;

        if (!pipe->ar_usb) {
                /* nothing allocated for this pipe */
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_USB,
                   "usb free resources lpipe %d hpipe 0x%x urbs %d avail %d\n",
                   pipe->logical_pipe_num, pipe->usb_pipe_handle,
                   pipe->urb_alloc, pipe->urb_cnt);

        if (pipe->urb_alloc != pipe->urb_cnt) {
                ath10k_dbg(ar, ATH10K_DBG_USB,
                           "usb urb leak lpipe %d hpipe 0x%x urbs %d avail %d\n",
                           pipe->logical_pipe_num, pipe->usb_pipe_handle,
                           pipe->urb_alloc, pipe->urb_cnt);
        }

        for (;;) {
                urb_context = ath10k_usb_alloc_urb_from_pipe(pipe);

                if (!urb_context)
                        break;

                kfree(urb_context);
        }
}

static void ath10k_usb_cleanup_pipe_resources(struct ath10k *ar)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        int i;

        for (i = 0; i < ATH10K_USB_PIPE_MAX; i++)
                ath10k_usb_free_pipe_resources(ar, &ar_usb->pipes[i]);
}

/* hif usb rx/tx completion functions */

static void ath10k_usb_recv_complete(struct urb *urb)
{
        struct ath10k_urb_context *urb_context = urb->context;
        struct ath10k_usb_pipe *pipe = urb_context->pipe;
        struct ath10k *ar = pipe->ar_usb->ar;
        struct sk_buff *skb;
        int status = 0;

        ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                   "usb recv pipe %d stat %d len %d urb 0x%pK\n",
                   pipe->logical_pipe_num, urb->status, urb->actual_length,
                   urb);

        if (urb->status != 0) {
                status = -EIO;
                switch (urb->status) {
                case -ECONNRESET:
                case -ENOENT:
                case -ESHUTDOWN:
                        /* no need to spew these errors when device
                         * removed or urb killed due to driver shutdown
                         */
                        status = -ECANCELED;
                        break;
                default:
                        ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                                   "usb recv pipe %d ep 0x%2.2x failed: %d\n",
                                   pipe->logical_pipe_num,
                                   pipe->ep_address, urb->status);
                        break;
                }
                goto cleanup_recv_urb;
        }

        if (urb->actual_length == 0)
                goto cleanup_recv_urb;

        skb = urb_context->skb;

        /* we are going to pass it up */
        urb_context->skb = NULL;
        skb_put(skb, urb->actual_length);

        /* note: queue implements a lock */
        skb_queue_tail(&pipe->io_comp_queue, skb);
        schedule_work(&pipe->io_complete_work);

cleanup_recv_urb:
        ath10k_usb_cleanup_recv_urb(urb_context);

        if (status == 0 &&
            pipe->urb_cnt >= pipe->urb_cnt_thresh) {
                /* our free urbs are piling up, post more transfers */
                ath10k_usb_post_recv_transfers(ar, pipe);
        }
}

static void ath10k_usb_transmit_complete(struct urb *urb)
{
        struct ath10k_urb_context *urb_context = urb->context;
        struct ath10k_usb_pipe *pipe = urb_context->pipe;
        struct ath10k *ar = pipe->ar_usb->ar;
        struct sk_buff *skb;

        if (urb->status != 0) {
                ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                           "pipe: %d, failed:%d\n",
                           pipe->logical_pipe_num, urb->status);
        }

        skb = urb_context->skb;
        urb_context->skb = NULL;
        ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);

        /* note: queue implements a lock */
        skb_queue_tail(&pipe->io_comp_queue, skb);
        schedule_work(&pipe->io_complete_work);
}

/* pipe operations */
static void ath10k_usb_post_recv_transfers(struct ath10k *ar,
                                           struct ath10k_usb_pipe *recv_pipe)
{
        struct ath10k_urb_context *urb_context;
        struct urb *urb;
        int usb_status;

        for (;;) {
                urb_context = ath10k_usb_alloc_urb_from_pipe(recv_pipe);
                if (!urb_context)
                        break;

                urb_context->skb = dev_alloc_skb(ATH10K_USB_RX_BUFFER_SIZE);
                if (!urb_context->skb)
                        goto err;

                urb = usb_alloc_urb(0, GFP_ATOMIC);
                if (!urb)
                        goto err;

                usb_fill_bulk_urb(urb,
                                  recv_pipe->ar_usb->udev,
                                  recv_pipe->usb_pipe_handle,
                                  urb_context->skb->data,
                                  ATH10K_USB_RX_BUFFER_SIZE,
                                  ath10k_usb_recv_complete, urb_context);

                ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                           "usb bulk recv submit %d 0x%x ep 0x%2.2x len %d buf 0x%pK\n",
                           recv_pipe->logical_pipe_num,
                           recv_pipe->usb_pipe_handle, recv_pipe->ep_address,
                           ATH10K_USB_RX_BUFFER_SIZE, urb_context->skb);

                usb_anchor_urb(urb, &recv_pipe->urb_submitted);
                usb_status = usb_submit_urb(urb, GFP_ATOMIC);

                if (usb_status) {
                        ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                                   "usb bulk recv failed: %d\n",
                                   usb_status);
                        usb_unanchor_urb(urb);
                        usb_free_urb(urb);
                        goto err;
                }
                usb_free_urb(urb);
        }

        return;

err:
        ath10k_usb_cleanup_recv_urb(urb_context);
}

static void ath10k_usb_flush_all(struct ath10k *ar)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        int i;

        for (i = 0; i < ATH10K_USB_PIPE_MAX; i++) {
                if (ar_usb->pipes[i].ar_usb) {
                        usb_kill_anchored_urbs(&ar_usb->pipes[i].urb_submitted);
                        cancel_work_sync(&ar_usb->pipes[i].io_complete_work);
                }
        }
}

static void ath10k_usb_start_recv_pipes(struct ath10k *ar)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);

        ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA].urb_cnt_thresh = 1;

        ath10k_usb_post_recv_transfers(ar,
                                       &ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA]);
}

static void ath10k_usb_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
        struct ath10k_htc_hdr *htc_hdr;
        struct ath10k_htc_ep *ep;

        htc_hdr = (struct ath10k_htc_hdr *)skb->data;
        ep = &ar->htc.endpoint[htc_hdr->eid];
        ath10k_htc_notify_tx_completion(ep, skb);
        /* The TX complete handler now owns the skb... */
}

static void ath10k_usb_rx_complete(struct ath10k *ar, struct sk_buff *skb)
{
        struct ath10k_htc *htc = &ar->htc;
        struct ath10k_htc_hdr *htc_hdr;
        enum ath10k_htc_ep_id eid;
        struct ath10k_htc_ep *ep;
        u16 payload_len;
        u8 *trailer;
        int ret;

        htc_hdr = (struct ath10k_htc_hdr *)skb->data;
        eid = eid_from_htc_hdr(htc_hdr);
        ep = &ar->htc.endpoint[eid];

        if (ep->service_id == 0) {
                ath10k_warn(ar, "ep %d is not connected\n", eid);
                goto out_free_skb;
        }

        payload_len = le16_to_cpu(htc_hdr->len);
        if (!payload_len) {
                ath10k_warn(ar, "zero length frame received, firmware crashed?\n");
                goto out_free_skb;
        }

        if (payload_len < htc_hdr->trailer_len) {
                ath10k_warn(ar, "malformed frame received, firmware crashed?\n");
                goto out_free_skb;
        }

        if (htc_hdr->flags & ATH10K_HTC_FLAG_TRAILER_PRESENT) {
                trailer = skb->data + sizeof(*htc_hdr) + payload_len -
                          htc_hdr->trailer_len;

                ret = ath10k_htc_process_trailer(htc,
                                                 trailer,
                                                 htc_hdr->trailer_len,
                                                 eid,
                                                 NULL,
                                                 NULL);
                if (ret)
                        goto out_free_skb;

                if (is_trailer_only_msg(htc_hdr))
                        goto out_free_skb;

                /* strip off the trailer from the skb since it should not
                 * be passed on to upper layers
                 */
                skb_trim(skb, skb->len - htc_hdr->trailer_len);
        }

        skb_pull(skb, sizeof(*htc_hdr));
        ep->ep_ops.ep_rx_complete(ar, skb);
        /* The RX complete handler now owns the skb... */

        if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) {
                local_bh_disable();
                napi_schedule(&ar->napi);
                local_bh_enable();
        }

        return;

out_free_skb:
        dev_kfree_skb(skb);
}

static void ath10k_usb_io_comp_work(struct work_struct *work)
{
        struct ath10k_usb_pipe *pipe = container_of(work,
                                                    struct ath10k_usb_pipe,
                                                    io_complete_work);
        struct ath10k *ar = pipe->ar_usb->ar;
        struct sk_buff *skb;

        while ((skb = skb_dequeue(&pipe->io_comp_queue))) {
                if (pipe->flags & ATH10K_USB_PIPE_FLAG_TX)
                        ath10k_usb_tx_complete(ar, skb);
                else
                        ath10k_usb_rx_complete(ar, skb);
        }
}

#define ATH10K_USB_MAX_DIAG_CMD (sizeof(struct ath10k_usb_ctrl_diag_cmd_write))
#define ATH10K_USB_MAX_DIAG_RESP (sizeof(struct ath10k_usb_ctrl_diag_resp_read))

static void ath10k_usb_destroy(struct ath10k *ar)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);

        ath10k_usb_flush_all(ar);
        ath10k_usb_cleanup_pipe_resources(ar);
        usb_set_intfdata(ar_usb->interface, NULL);

        kfree(ar_usb->diag_cmd_buffer);
        kfree(ar_usb->diag_resp_buffer);
}

static int ath10k_usb_hif_start(struct ath10k *ar)
{
        int i;
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);

        ath10k_core_napi_enable(ar);
        ath10k_usb_start_recv_pipes(ar);

        /* set the TX resource avail threshold for each TX pipe */
        for (i = ATH10K_USB_PIPE_TX_CTRL;
             i <= ATH10K_USB_PIPE_TX_DATA_HP; i++) {
                ar_usb->pipes[i].urb_cnt_thresh =
                    ar_usb->pipes[i].urb_alloc / 2;
        }

        return 0;
}

static int ath10k_usb_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
                                struct ath10k_hif_sg_item *items, int n_items)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        struct ath10k_usb_pipe *pipe = &ar_usb->pipes[pipe_id];
        struct ath10k_urb_context *urb_context;
        struct sk_buff *skb;
        struct urb *urb;
        int ret, i;

        for (i = 0; i < n_items; i++) {
                urb_context = ath10k_usb_alloc_urb_from_pipe(pipe);
                if (!urb_context) {
                        ret = -ENOMEM;
                        goto err;
                }

                skb = items[i].transfer_context;
                urb_context->skb = skb;

                urb = usb_alloc_urb(0, GFP_ATOMIC);
                if (!urb) {
                        ret = -ENOMEM;
                        goto err_free_urb_to_pipe;
                }

                usb_fill_bulk_urb(urb,
                                  ar_usb->udev,
                                  pipe->usb_pipe_handle,
                                  skb->data,
                                  skb->len,
                                  ath10k_usb_transmit_complete, urb_context);

                if (!(skb->len % pipe->max_packet_size)) {
                        /* hit a max packet boundary on this pipe */
                        urb->transfer_flags |= URB_ZERO_PACKET;
                }

                usb_anchor_urb(urb, &pipe->urb_submitted);
                ret = usb_submit_urb(urb, GFP_ATOMIC);
                if (ret) {
                        ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
                                   "usb bulk transmit failed: %d\n", ret);
                        usb_unanchor_urb(urb);
                        usb_free_urb(urb);
                        ret = -EINVAL;
                        goto err_free_urb_to_pipe;
                }

                usb_free_urb(urb);
        }

        return 0;

err_free_urb_to_pipe:
        ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);
err:
        return ret;
}

static void ath10k_usb_hif_stop(struct ath10k *ar)
{
        ath10k_usb_flush_all(ar);
        ath10k_core_napi_sync_disable(ar);
}

static u16 ath10k_usb_hif_get_free_queue_number(struct ath10k *ar, u8 pipe_id)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);

        return ar_usb->pipes[pipe_id].urb_cnt;
}

static int ath10k_usb_submit_ctrl_out(struct ath10k *ar,
                                      u8 req, u16 value, u16 index, void *data,
                                      u32 size)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        u8 *buf = NULL;
        int ret;

        if (size > 0) {
                buf = kmemdup(data, size, GFP_KERNEL);
                if (!buf)
                        return -ENOMEM;
        }

        /* note: if successful returns number of bytes transferred */
        ret = usb_control_msg(ar_usb->udev,
                              usb_sndctrlpipe(ar_usb->udev, 0),
                              req,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, value, index, buf,
                              size, 1000);

        if (ret < 0) {
                ath10k_warn(ar, "Failed to submit usb control message: %d\n",
                            ret);
                kfree(buf);
                return ret;
        }

        kfree(buf);

        return 0;
}

static int ath10k_usb_submit_ctrl_in(struct ath10k *ar,
                                     u8 req, u16 value, u16 index, void *data,
                                     u32 size)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        u8 *buf = NULL;
        int ret;

        if (size > 0) {
                buf = kmalloc(size, GFP_KERNEL);
                if (!buf)
                        return -ENOMEM;
        }

        /* note: if successful returns number of bytes transferred */
        ret = usb_control_msg(ar_usb->udev,
                              usb_rcvctrlpipe(ar_usb->udev, 0),
                              req,
                              USB_DIR_IN | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, value, index, buf,
                              size, 2000);

        if (ret < 0) {
                ath10k_warn(ar, "Failed to read usb control message: %d\n",
                            ret);
                kfree(buf);
                return ret;
        }

        memcpy((u8 *)data, buf, size);

        kfree(buf);

        return 0;
}

static int ath10k_usb_ctrl_msg_exchange(struct ath10k *ar,
                                        u8 req_val, u8 *req_buf, u32 req_len,
                                        u8 resp_val, u8 *resp_buf,
                                        u32 *resp_len)
{
        int ret;

        /* send command */
        ret = ath10k_usb_submit_ctrl_out(ar, req_val, 0, 0,
                                         req_buf, req_len);
        if (ret)
                goto err;

        /* get response */
        if (resp_buf) {
                ret = ath10k_usb_submit_ctrl_in(ar, resp_val, 0, 0,
                                                resp_buf, *resp_len);
                if (ret)
                        goto err;
        }

        return 0;
err:
        return ret;
}

static int ath10k_usb_hif_diag_read(struct ath10k *ar, u32 address, void *buf,
                                    size_t buf_len)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        struct ath10k_usb_ctrl_diag_cmd_read *cmd;
        u32 resp_len;
        int ret;

        if (buf_len < sizeof(struct ath10k_usb_ctrl_diag_resp_read))
                return -EINVAL;

        cmd = (struct ath10k_usb_ctrl_diag_cmd_read *)ar_usb->diag_cmd_buffer;
        memset(cmd, 0, sizeof(*cmd));
        cmd->cmd = ATH10K_USB_CTRL_DIAG_CC_READ;
        cmd->address = cpu_to_le32(address);
        resp_len = sizeof(struct ath10k_usb_ctrl_diag_resp_read);

        ret = ath10k_usb_ctrl_msg_exchange(ar,
                                           ATH10K_USB_CONTROL_REQ_DIAG_CMD,
                                           (u8 *)cmd,
                                           sizeof(*cmd),
                                           ATH10K_USB_CONTROL_REQ_DIAG_RESP,
                                           ar_usb->diag_resp_buffer, &resp_len);
        if (ret)
                return ret;

        if (resp_len != sizeof(struct ath10k_usb_ctrl_diag_resp_read))
                return -EMSGSIZE;

        memcpy(buf, ar_usb->diag_resp_buffer,
               sizeof(struct ath10k_usb_ctrl_diag_resp_read));

        return 0;
}

static int ath10k_usb_hif_diag_write(struct ath10k *ar, u32 address,
                                     const void *data, int nbytes)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        struct ath10k_usb_ctrl_diag_cmd_write *cmd;
        int ret;

        if (nbytes != sizeof(cmd->value))
                return -EINVAL;

        cmd = (struct ath10k_usb_ctrl_diag_cmd_write *)ar_usb->diag_cmd_buffer;
        memset(cmd, 0, sizeof(*cmd));
        cmd->cmd = cpu_to_le32(ATH10K_USB_CTRL_DIAG_CC_WRITE);
        cmd->address = cpu_to_le32(address);
        memcpy(&cmd->value, data, nbytes);

        ret = ath10k_usb_ctrl_msg_exchange(ar,
                                           ATH10K_USB_CONTROL_REQ_DIAG_CMD,
                                           (u8 *)cmd,
                                           sizeof(*cmd),
                                           0, NULL, NULL);
        if (ret)
                return ret;

        return 0;
}

static int ath10k_usb_bmi_exchange_msg(struct ath10k *ar,
                                       void *req, u32 req_len,
                                       void *resp, u32 *resp_len)
{
        int ret;

        if (req) {
                ret = ath10k_usb_submit_ctrl_out(ar,
                                                 ATH10K_USB_CONTROL_REQ_SEND_BMI_CMD,
                                                 0, 0, req, req_len);
                if (ret) {
                        ath10k_warn(ar,
                                    "unable to send the bmi data to the device: %d\n",
                                    ret);
                        return ret;
                }
        }

        if (resp) {
                ret = ath10k_usb_submit_ctrl_in(ar,
                                                ATH10K_USB_CONTROL_REQ_RECV_BMI_RESP,
                                                0, 0, resp, *resp_len);
                if (ret) {
                        ath10k_warn(ar,
                                    "Unable to read the bmi data from the device: %d\n",
                                    ret);
                        return ret;
                }
        }

        return 0;
}

static void ath10k_usb_hif_get_default_pipe(struct ath10k *ar,
                                            u8 *ul_pipe, u8 *dl_pipe)
{
        *ul_pipe = ATH10K_USB_PIPE_TX_CTRL;
        *dl_pipe = ATH10K_USB_PIPE_RX_CTRL;
}

static int ath10k_usb_hif_map_service_to_pipe(struct ath10k *ar, u16 svc_id,
                                              u8 *ul_pipe, u8 *dl_pipe)
{
        switch (svc_id) {
        case ATH10K_HTC_SVC_ID_RSVD_CTRL:
        case ATH10K_HTC_SVC_ID_WMI_CONTROL:
                *ul_pipe = ATH10K_USB_PIPE_TX_CTRL;
                /* due to large control packets, shift to data pipe */
                *dl_pipe = ATH10K_USB_PIPE_RX_DATA;
                break;
        case ATH10K_HTC_SVC_ID_HTT_DATA_MSG:
                *ul_pipe = ATH10K_USB_PIPE_TX_DATA_LP;
                /* Disable rxdata2 directly, it will be enabled
                 * if FW enable rxdata2
                 */
                *dl_pipe = ATH10K_USB_PIPE_RX_DATA;
                break;
        default:
                return -EPERM;
        }

        return 0;
}

static int ath10k_usb_hif_power_up(struct ath10k *ar,
                                   enum ath10k_firmware_mode fw_mode)
{
        return 0;
}

static void ath10k_usb_hif_power_down(struct ath10k *ar)
{
        ath10k_usb_flush_all(ar);
}

#ifdef CONFIG_PM

static int ath10k_usb_hif_suspend(struct ath10k *ar)
{
        return -EOPNOTSUPP;
}

static int ath10k_usb_hif_resume(struct ath10k *ar)
{
        return -EOPNOTSUPP;
}
#endif

static const struct ath10k_hif_ops ath10k_usb_hif_ops = {
        .tx_sg                        = ath10k_usb_hif_tx_sg,
        .diag_read                = ath10k_usb_hif_diag_read,
        .diag_write                = ath10k_usb_hif_diag_write,
        .exchange_bmi_msg        = ath10k_usb_bmi_exchange_msg,
        .start                        = ath10k_usb_hif_start,
        .stop                        = ath10k_usb_hif_stop,
        .map_service_to_pipe        = ath10k_usb_hif_map_service_to_pipe,
        .get_default_pipe        = ath10k_usb_hif_get_default_pipe,
        .get_free_queue_number        = ath10k_usb_hif_get_free_queue_number,
        .power_up                = ath10k_usb_hif_power_up,
        .power_down                = ath10k_usb_hif_power_down,
#ifdef CONFIG_PM
        .suspend                = ath10k_usb_hif_suspend,
        .resume                        = ath10k_usb_hif_resume,
#endif
};

static u8 ath10k_usb_get_logical_pipe_num(u8 ep_address, int *urb_count)
{
        u8 pipe_num = ATH10K_USB_PIPE_INVALID;

        switch (ep_address) {
        case ATH10K_USB_EP_ADDR_APP_CTRL_IN:
                pipe_num = ATH10K_USB_PIPE_RX_CTRL;
                *urb_count = RX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_DATA_IN:
                pipe_num = ATH10K_USB_PIPE_RX_DATA;
                *urb_count = RX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_INT_IN:
                pipe_num = ATH10K_USB_PIPE_RX_INT;
                *urb_count = RX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_DATA2_IN:
                pipe_num = ATH10K_USB_PIPE_RX_DATA2;
                *urb_count = RX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_CTRL_OUT:
                pipe_num = ATH10K_USB_PIPE_TX_CTRL;
                *urb_count = TX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_DATA_LP_OUT:
                pipe_num = ATH10K_USB_PIPE_TX_DATA_LP;
                *urb_count = TX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_DATA_MP_OUT:
                pipe_num = ATH10K_USB_PIPE_TX_DATA_MP;
                *urb_count = TX_URB_COUNT;
                break;
        case ATH10K_USB_EP_ADDR_APP_DATA_HP_OUT:
                pipe_num = ATH10K_USB_PIPE_TX_DATA_HP;
                *urb_count = TX_URB_COUNT;
                break;
        default:
                /* note: there may be endpoints not currently used */
                break;
        }

        return pipe_num;
}

static int ath10k_usb_alloc_pipe_resources(struct ath10k *ar,
                                           struct ath10k_usb_pipe *pipe,
                                           int urb_cnt)
{
        struct ath10k_urb_context *urb_context;
        int i;

        INIT_LIST_HEAD(&pipe->urb_list_head);
        init_usb_anchor(&pipe->urb_submitted);

        for (i = 0; i < urb_cnt; i++) {
                urb_context = kzalloc(sizeof(*urb_context), GFP_KERNEL);
                if (!urb_context)
                        return -ENOMEM;

                urb_context->pipe = pipe;

                /* we are only allocate the urb contexts here, the actual URB
                 * is allocated from the kernel as needed to do a transaction
                 */
                pipe->urb_alloc++;
                ath10k_usb_free_urb_to_pipe(pipe, urb_context);
        }

        ath10k_dbg(ar, ATH10K_DBG_USB,
                   "usb alloc resources lpipe %d hpipe 0x%x urbs %d\n",
                   pipe->logical_pipe_num, pipe->usb_pipe_handle,
                   pipe->urb_alloc);

        return 0;
}

static int ath10k_usb_setup_pipe_resources(struct ath10k *ar,
                                           struct usb_interface *interface)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        struct usb_host_interface *iface_desc = interface->cur_altsetting;
        struct usb_endpoint_descriptor *endpoint;
        struct ath10k_usb_pipe *pipe;
        int ret, i, urbcount;
        u8 pipe_num;

        ath10k_dbg(ar, ATH10K_DBG_USB, "usb setting up pipes using interface\n");

        /* walk descriptors and setup pipes */
        for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
                endpoint = &iface_desc->endpoint[i].desc;

                if (ATH10K_USB_IS_BULK_EP(endpoint->bmAttributes)) {
                        ath10k_dbg(ar, ATH10K_DBG_USB,
                                   "usb %s bulk ep 0x%2.2x maxpktsz %d\n",
                                   ATH10K_USB_IS_DIR_IN
                                   (endpoint->bEndpointAddress) ?
                                   "rx" : "tx", endpoint->bEndpointAddress,
                                   le16_to_cpu(endpoint->wMaxPacketSize));
                } else if (ATH10K_USB_IS_INT_EP(endpoint->bmAttributes)) {
                        ath10k_dbg(ar, ATH10K_DBG_USB,
                                   "usb %s int ep 0x%2.2x maxpktsz %d interval %d\n",
                                   ATH10K_USB_IS_DIR_IN
                                   (endpoint->bEndpointAddress) ?
                                   "rx" : "tx", endpoint->bEndpointAddress,
                                   le16_to_cpu(endpoint->wMaxPacketSize),
                                   endpoint->bInterval);
                } else if (ATH10K_USB_IS_ISOC_EP(endpoint->bmAttributes)) {
                        /* TODO for ISO */
                        ath10k_dbg(ar, ATH10K_DBG_USB,
                                   "usb %s isoc ep 0x%2.2x maxpktsz %d interval %d\n",
                                   ATH10K_USB_IS_DIR_IN
                                   (endpoint->bEndpointAddress) ?
                                   "rx" : "tx", endpoint->bEndpointAddress,
                                   le16_to_cpu(endpoint->wMaxPacketSize),
                                   endpoint->bInterval);
                }

                /* Ignore broken descriptors. */
                if (usb_endpoint_maxp(endpoint) == 0)
                        continue;

                urbcount = 0;

                pipe_num =
                    ath10k_usb_get_logical_pipe_num(endpoint->bEndpointAddress,
                                                    &urbcount);
                if (pipe_num == ATH10K_USB_PIPE_INVALID)
                        continue;

                pipe = &ar_usb->pipes[pipe_num];
                if (pipe->ar_usb)
                        /* hmmm..pipe was already setup */
                        continue;

                pipe->ar_usb = ar_usb;
                pipe->logical_pipe_num = pipe_num;
                pipe->ep_address = endpoint->bEndpointAddress;
                pipe->max_packet_size = le16_to_cpu(endpoint->wMaxPacketSize);

                if (ATH10K_USB_IS_BULK_EP(endpoint->bmAttributes)) {
                        if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
                                pipe->usb_pipe_handle =
                                    usb_rcvbulkpipe(ar_usb->udev,
                                                    pipe->ep_address);
                        } else {
                                pipe->usb_pipe_handle =
                                    usb_sndbulkpipe(ar_usb->udev,
                                                    pipe->ep_address);
                        }
                } else if (ATH10K_USB_IS_INT_EP(endpoint->bmAttributes)) {
                        if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
                                pipe->usb_pipe_handle =
                                    usb_rcvintpipe(ar_usb->udev,
                                                   pipe->ep_address);
                        } else {
                                pipe->usb_pipe_handle =
                                    usb_sndintpipe(ar_usb->udev,
                                                   pipe->ep_address);
                        }
                } else if (ATH10K_USB_IS_ISOC_EP(endpoint->bmAttributes)) {
                        /* TODO for ISO */
                        if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
                                pipe->usb_pipe_handle =
                                    usb_rcvisocpipe(ar_usb->udev,
                                                    pipe->ep_address);
                        } else {
                                pipe->usb_pipe_handle =
                                    usb_sndisocpipe(ar_usb->udev,
                                                    pipe->ep_address);
                        }
                }

                pipe->ep_desc = endpoint;

                if (!ATH10K_USB_IS_DIR_IN(pipe->ep_address))
                        pipe->flags |= ATH10K_USB_PIPE_FLAG_TX;

                ret = ath10k_usb_alloc_pipe_resources(ar, pipe, urbcount);
                if (ret)
                        return ret;
        }

        return 0;
}

static int ath10k_usb_create(struct ath10k *ar,
                             struct usb_interface *interface)
{
        struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
        struct usb_device *dev = interface_to_usbdev(interface);
        struct ath10k_usb_pipe *pipe;
        int ret, i;

        usb_set_intfdata(interface, ar_usb);
        spin_lock_init(&ar_usb->cs_lock);
        ar_usb->udev = dev;
        ar_usb->interface = interface;

        for (i = 0; i < ATH10K_USB_PIPE_MAX; i++) {
                pipe = &ar_usb->pipes[i];
                INIT_WORK(&pipe->io_complete_work,
                          ath10k_usb_io_comp_work);
                skb_queue_head_init(&pipe->io_comp_queue);
        }

        ar_usb->diag_cmd_buffer = kzalloc(ATH10K_USB_MAX_DIAG_CMD, GFP_KERNEL);
        if (!ar_usb->diag_cmd_buffer) {
                ret = -ENOMEM;
                goto err;
        }

        ar_usb->diag_resp_buffer = kzalloc(ATH10K_USB_MAX_DIAG_RESP,
                                           GFP_KERNEL);
        if (!ar_usb->diag_resp_buffer) {
                ret = -ENOMEM;
                goto err;
        }

        ret = ath10k_usb_setup_pipe_resources(ar, interface);
        if (ret)
                goto err;

        return 0;

err:
        ath10k_usb_destroy(ar);
        return ret;
}

static int ath10k_usb_napi_poll(struct napi_struct *ctx, int budget)
{
        struct ath10k *ar = container_of(ctx, struct ath10k, napi);
        int done;

        done = ath10k_htt_rx_hl_indication(ar, budget);
        ath10k_dbg(ar, ATH10K_DBG_USB, "napi poll: done: %d, budget:%d\n", done, budget);

        if (done < budget)
                napi_complete_done(ctx, done);

        return done;
}

/* ath10k usb driver registered functions */
static int ath10k_usb_probe(struct usb_interface *interface,
                            const struct usb_device_id *id)
{
        struct ath10k *ar;
        struct ath10k_usb *ar_usb;
        struct usb_device *dev = interface_to_usbdev(interface);
        int ret, vendor_id, product_id;
        enum ath10k_hw_rev hw_rev;
        struct ath10k_bus_params bus_params = {};

        /* Assumption: All USB based chipsets (so far) are QCA9377 based.
         * If there will be newer chipsets that does not use the hw reg
         * setup as defined in qca6174_regs and qca6174_values, this
         * assumption is no longer valid and hw_rev must be setup differently
         * depending on chipset.
         */
        hw_rev = ATH10K_HW_QCA9377;

        ar = ath10k_core_create(sizeof(*ar_usb), &dev->dev, ATH10K_BUS_USB,
                                hw_rev, &ath10k_usb_hif_ops);
        if (!ar) {
                dev_err(&dev->dev, "failed to allocate core\n");
                return -ENOMEM;
        }

        netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_usb_napi_poll);

        usb_get_dev(dev);
        vendor_id = le16_to_cpu(dev->descriptor.idVendor);
        product_id = le16_to_cpu(dev->descriptor.idProduct);

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "usb new func vendor 0x%04x product 0x%04x\n",
                   vendor_id, product_id);

        ar_usb = ath10k_usb_priv(ar);
        ret = ath10k_usb_create(ar, interface);
        if (ret)
                goto err;
        ar_usb->ar = ar;

        ar->dev_id = product_id;
        ar->id.vendor = vendor_id;
        ar->id.device = product_id;

        bus_params.dev_type = ATH10K_DEV_TYPE_HL;
        /* TODO: don't know yet how to get chip_id with USB */
        bus_params.chip_id = 0;
        bus_params.hl_msdu_ids = true;
        ret = ath10k_core_register(ar, &bus_params);
        if (ret) {
                ath10k_warn(ar, "failed to register driver core: %d\n", ret);
                goto err_usb_destroy;
        }

        /* TODO: remove this once USB support is fully implemented */
        ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n");

        return 0;

err_usb_destroy:
        ath10k_usb_destroy(ar);

err:
        ath10k_core_destroy(ar);

        usb_put_dev(dev);

        return ret;
}

static void ath10k_usb_remove(struct usb_interface *interface)
{
        struct ath10k_usb *ar_usb;

        ar_usb = usb_get_intfdata(interface);
        if (!ar_usb)
                return;

        ath10k_core_unregister(ar_usb->ar);
        netif_napi_del(&ar_usb->ar->napi);
        ath10k_usb_destroy(ar_usb->ar);
        usb_put_dev(interface_to_usbdev(interface));
        ath10k_core_destroy(ar_usb->ar);
}

#ifdef CONFIG_PM

static int ath10k_usb_pm_suspend(struct usb_interface *interface,
                                 pm_message_t message)
{
        struct ath10k_usb *ar_usb = usb_get_intfdata(interface);

        ath10k_usb_flush_all(ar_usb->ar);
        return 0;
}

static int ath10k_usb_pm_resume(struct usb_interface *interface)
{
        struct ath10k_usb *ar_usb = usb_get_intfdata(interface);
        struct ath10k *ar = ar_usb->ar;

        ath10k_usb_post_recv_transfers(ar,
                                       &ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA]);

        return 0;
}

#else

#define ath10k_usb_pm_suspend NULL
#define ath10k_usb_pm_resume NULL

#endif

/* table of devices that work with this driver */
static struct usb_device_id ath10k_usb_ids[] = {
        {USB_DEVICE(0x13b1, 0x0042)}, /* Linksys WUSB6100M */
        { /* Terminating entry */ },
};

MODULE_DEVICE_TABLE(usb, ath10k_usb_ids);

static struct usb_driver ath10k_usb_driver = {
        .name = "ath10k_usb",
        .probe = ath10k_usb_probe,
        .suspend = ath10k_usb_pm_suspend,
        .resume = ath10k_usb_pm_resume,
        .disconnect = ath10k_usb_remove,
        .id_table = ath10k_usb_ids,
        .supports_autosuspend = true,
        .disable_hub_initiated_lpm = 1,
};

module_usb_driver(ath10k_usb_driver);

MODULE_AUTHOR("Atheros Communications, Inc.");
MODULE_DESCRIPTION("Driver support for Qualcomm Atheros USB 802.11ac WLAN devices");
MODULE_LICENSE("Dual BSD/GPL");


















































































































































































































































































































































































    7 
    7 
























































































































































































































    7 







    7 
    7 

    7 

    7 









































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
// SPDX-License-Identifier: GPL-2.0
/*
 * Workingset detection
 *
 * Copyright (C) 2013 Red Hat, Inc., Johannes Weiner
 */

#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/writeback.h>
#include <linux/shmem_fs.h>
#include <linux/pagemap.h>
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include "internal.h"

/*
 *                Double CLOCK lists
 *
 * Per node, two clock lists are maintained for file pages: the
 * inactive and the active list.  Freshly faulted pages start out at
 * the head of the inactive list and page reclaim scans pages from the
 * tail.  Pages that are accessed multiple times on the inactive list
 * are promoted to the active list, to protect them from reclaim,
 * whereas active pages are demoted to the inactive list when the
 * active list grows too big.
 *
 *   fault ------------------------+
 *                                 |
 *              +--------------+   |            +-------------+
 *   reclaim <- |   inactive   | <-+-- demotion |    active   | <--+
 *              +--------------+                +-------------+    |
 *                     |                                           |
 *                     +-------------- promotion ------------------+
 *
 *
 *                Access frequency and refault distance
 *
 * A workload is thrashing when its pages are frequently used but they
 * are evicted from the inactive list every time before another access
 * would have promoted them to the active list.
 *
 * In cases where the average access distance between thrashing pages
 * is bigger than the size of memory there is nothing that can be
 * done - the thrashing set could never fit into memory under any
 * circumstance.
 *
 * However, the average access distance could be bigger than the
 * inactive list, yet smaller than the size of memory.  In this case,
 * the set could fit into memory if it weren't for the currently
 * active pages - which may be used more, hopefully less frequently:
 *
 *      +-memory available to cache-+
 *      |                           |
 *      +-inactive------+-active----+
 *  a b | c d e f g h i | J K L M N |
 *      +---------------+-----------+
 *
 * It is prohibitively expensive to accurately track access frequency
 * of pages.  But a reasonable approximation can be made to measure
 * thrashing on the inactive list, after which refaulting pages can be
 * activated optimistically to compete with the existing active pages.
 *
 * Approximating inactive page access frequency - Observations:
 *
 * 1. When a page is accessed for the first time, it is added to the
 *    head of the inactive list, slides every existing inactive page
 *    towards the tail by one slot, and pushes the current tail page
 *    out of memory.
 *
 * 2. When a page is accessed for the second time, it is promoted to
 *    the active list, shrinking the inactive list by one slot.  This
 *    also slides all inactive pages that were faulted into the cache
 *    more recently than the activated page towards the tail of the
 *    inactive list.
 *
 * Thus:
 *
 * 1. The sum of evictions and activations between any two points in
 *    time indicate the minimum number of inactive pages accessed in
 *    between.
 *
 * 2. Moving one inactive page N page slots towards the tail of the
 *    list requires at least N inactive page accesses.
 *
 * Combining these:
 *
 * 1. When a page is finally evicted from memory, the number of
 *    inactive pages accessed while the page was in cache is at least
 *    the number of page slots on the inactive list.
 *
 * 2. In addition, measuring the sum of evictions and activations (E)
 *    at the time of a page's eviction, and comparing it to another
 *    reading (R) at the time the page faults back into memory tells
 *    the minimum number of accesses while the page was not cached.
 *    This is called the refault distance.
 *
 * Because the first access of the page was the fault and the second
 * access the refault, we combine the in-cache distance with the
 * out-of-cache distance to get the complete minimum access distance
 * of this page:
 *
 *      NR_inactive + (R - E)
 *
 * And knowing the minimum access distance of a page, we can easily
 * tell if the page would be able to stay in cache assuming all page
 * slots in the cache were available:
 *
 *   NR_inactive + (R - E) <= NR_inactive + NR_active
 *
 * If we have swap we should consider about NR_inactive_anon and
 * NR_active_anon, so for page cache and anonymous respectively:
 *
 *   NR_inactive_file + (R - E) <= NR_inactive_file + NR_active_file
 *   + NR_inactive_anon + NR_active_anon
 *
 *   NR_inactive_anon + (R - E) <= NR_inactive_anon + NR_active_anon
 *   + NR_inactive_file + NR_active_file
 *
 * Which can be further simplified to:
 *
 *   (R - E) <= NR_active_file + NR_inactive_anon + NR_active_anon
 *
 *   (R - E) <= NR_active_anon + NR_inactive_file + NR_active_file
 *
 * Put into words, the refault distance (out-of-cache) can be seen as
 * a deficit in inactive list space (in-cache).  If the inactive list
 * had (R - E) more page slots, the page would not have been evicted
 * in between accesses, but activated instead.  And on a full system,
 * the only thing eating into inactive list space is active pages.
 *
 *
 *                Refaulting inactive pages
 *
 * All that is known about the active list is that the pages have been
 * accessed more than once in the past.  This means that at any given
 * time there is actually a good chance that pages on the active list
 * are no longer in active use.
 *
 * So when a refault distance of (R - E) is observed and there are at
 * least (R - E) pages in the userspace workingset, the refaulting page
 * is activated optimistically in the hope that (R - E) pages are actually
 * used less frequently than the refaulting page - or even not used at
 * all anymore.
 *
 * That means if inactive cache is refaulting with a suitable refault
 * distance, we assume the cache workingset is transitioning and put
 * pressure on the current workingset.
 *
 * If this is wrong and demotion kicks in, the pages which are truly
 * used more frequently will be reactivated while the less frequently
 * used once will be evicted from memory.
 *
 * But if this is right, the stale pages will be pushed out of memory
 * and the used pages get to stay in cache.
 *
 *                Refaulting active pages
 *
 * If on the other hand the refaulting pages have recently been
 * deactivated, it means that the active list is no longer protecting
 * actively used cache from reclaim. The cache is NOT transitioning to
 * a different workingset; the existing workingset is thrashing in the
 * space allocated to the page cache.
 *
 *
 *                Implementation
 *
 * For each node's LRU lists, a counter for inactive evictions and
 * activations is maintained (node->nonresident_age).
 *
 * On eviction, a snapshot of this counter (along with some bits to
 * identify the node) is stored in the now empty page cache
 * slot of the evicted page.  This is called a shadow entry.
 *
 * On cache misses for which there are shadow entries, an eligible
 * refault distance will immediately activate the refaulting page.
 */

#define WORKINGSET_SHIFT 1
#define EVICTION_SHIFT        ((BITS_PER_LONG - BITS_PER_XA_VALUE) +        \
                         WORKINGSET_SHIFT + NODES_SHIFT + \
                         MEM_CGROUP_ID_SHIFT)
#define EVICTION_MASK        (~0UL >> EVICTION_SHIFT)

/*
 * Eviction timestamps need to be able to cover the full range of
 * actionable refaults. However, bits are tight in the xarray
 * entry, and after storing the identifier for the lruvec there might
 * not be enough left to represent every single actionable refault. In
 * that case, we have to sacrifice granularity for distance, and group
 * evictions into coarser buckets by shaving off lower timestamp bits.
 */
static unsigned int bucket_order __read_mostly;

static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
                         bool workingset)
{
        eviction &= EVICTION_MASK;
        eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
        eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
        eviction = (eviction << WORKINGSET_SHIFT) | workingset;

        return xa_mk_value(eviction);
}

static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
                          unsigned long *evictionp, bool *workingsetp)
{
        unsigned long entry = xa_to_value(shadow);
        int memcgid, nid;
        bool workingset;

        workingset = entry & ((1UL << WORKINGSET_SHIFT) - 1);
        entry >>= WORKINGSET_SHIFT;
        nid = entry & ((1UL << NODES_SHIFT) - 1);
        entry >>= NODES_SHIFT;
        memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
        entry >>= MEM_CGROUP_ID_SHIFT;

        *memcgidp = memcgid;
        *pgdat = NODE_DATA(nid);
        *evictionp = entry;
        *workingsetp = workingset;
}

#ifdef CONFIG_LRU_GEN

static void *lru_gen_eviction(struct folio *folio)
{
        int hist;
        unsigned long token;
        unsigned long min_seq;
        struct lruvec *lruvec;
        struct lru_gen_folio *lrugen;
        int type = folio_is_file_lru(folio);
        int delta = folio_nr_pages(folio);
        int refs = folio_lru_refs(folio);
        int tier = lru_tier_from_refs(refs);
        struct mem_cgroup *memcg = folio_memcg(folio);
        struct pglist_data *pgdat = folio_pgdat(folio);

        BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT);

        lruvec = mem_cgroup_lruvec(memcg, pgdat);
        lrugen = &lruvec->lrugen;
        min_seq = READ_ONCE(lrugen->min_seq[type]);
        token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0);

        hist = lru_hist_from_seq(min_seq);
        atomic_long_add(delta, &lrugen->evicted[hist][type][tier]);

        return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
}

/*
 * Tests if the shadow entry is for a folio that was recently evicted.
 * Fills in @lruvec, @token, @workingset with the values unpacked from shadow.
 */
static bool lru_gen_test_recent(void *shadow, bool file, struct lruvec **lruvec,
                                unsigned long *token, bool *workingset)
{
        int memcg_id;
        unsigned long min_seq;
        struct mem_cgroup *memcg;
        struct pglist_data *pgdat;

        unpack_shadow(shadow, &memcg_id, &pgdat, token, workingset);

        memcg = mem_cgroup_from_id(memcg_id);
        *lruvec = mem_cgroup_lruvec(memcg, pgdat);

        min_seq = READ_ONCE((*lruvec)->lrugen.min_seq[file]);
        return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
}

static void lru_gen_refault(struct folio *folio, void *shadow)
{
        bool recent;
        int hist, tier, refs;
        bool workingset;
        unsigned long token;
        struct lruvec *lruvec;
        struct lru_gen_folio *lrugen;
        int type = folio_is_file_lru(folio);
        int delta = folio_nr_pages(folio);

        rcu_read_lock();

        recent = lru_gen_test_recent(shadow, type, &lruvec, &token, &workingset);
        if (lruvec != folio_lruvec(folio))
                goto unlock;

        mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta);

        if (!recent)
                goto unlock;

        lrugen = &lruvec->lrugen;

        hist = lru_hist_from_seq(READ_ONCE(lrugen->min_seq[type]));
        /* see the comment in folio_lru_refs() */
        refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + workingset;
        tier = lru_tier_from_refs(refs);

        atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
        mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);

        /*
         * Count the following two cases as stalls:
         * 1. For pages accessed through page tables, hotter pages pushed out
         *    hot pages which refaulted immediately.
         * 2. For pages accessed multiple times through file descriptors,
         *    they would have been protected by sort_folio().
         */
        if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) {
                set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset));
                mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
        }
unlock:
        rcu_read_unlock();
}

#else /* !CONFIG_LRU_GEN */

static void *lru_gen_eviction(struct folio *folio)
{
        return NULL;
}

static bool lru_gen_test_recent(void *shadow, bool file, struct lruvec **lruvec,
                                unsigned long *token, bool *workingset)
{
        return false;
}

static void lru_gen_refault(struct folio *folio, void *shadow)
{
}

#endif /* CONFIG_LRU_GEN */

/**
 * workingset_age_nonresident - age non-resident entries as LRU ages
 * @lruvec: the lruvec that was aged
 * @nr_pages: the number of pages to count
 *
 * As in-memory pages are aged, non-resident pages need to be aged as
 * well, in order for the refault distances later on to be comparable
 * to the in-memory dimensions. This function allows reclaim and LRU
 * operations to drive the non-resident aging along in parallel.
 */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
{
        /*
         * Reclaiming a cgroup means reclaiming all its children in a
         * round-robin fashion. That means that each cgroup has an LRU
         * order that is composed of the LRU orders of its child
         * cgroups; and every page has an LRU position not just in the
         * cgroup that owns it, but in all of that group's ancestors.
         *
         * So when the physical inactive list of a leaf cgroup ages,
         * the virtual inactive lists of all its parents, including
         * the root cgroup's, age as well.
         */
        do {
                atomic_long_add(nr_pages, &lruvec->nonresident_age);
        } while ((lruvec = parent_lruvec(lruvec)));
}

/**
 * workingset_eviction - note the eviction of a folio from memory
 * @target_memcg: the cgroup that is causing the reclaim
 * @folio: the folio being evicted
 *
 * Return: a shadow entry to be stored in @folio->mapping->i_pages in place
 * of the evicted @folio so that a later refault can be detected.
 */
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
{
        struct pglist_data *pgdat = folio_pgdat(folio);
        unsigned long eviction;
        struct lruvec *lruvec;
        int memcgid;

        /* Folio is fully exclusive and pins folio's memory cgroup pointer */
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
        VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

        if (lru_gen_enabled())
                return lru_gen_eviction(folio);

        lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
        /* XXX: target_memcg can be NULL, go through lruvec */
        memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
        eviction = atomic_long_read(&lruvec->nonresident_age);
        eviction >>= bucket_order;
        workingset_age_nonresident(lruvec, folio_nr_pages(folio));
        return pack_shadow(memcgid, pgdat, eviction,
                                folio_test_workingset(folio));
}

/**
 * workingset_test_recent - tests if the shadow entry is for a folio that was
 * recently evicted. Also fills in @workingset with the value unpacked from
 * shadow.
 * @shadow: the shadow entry to be tested.
 * @file: whether the corresponding folio is from the file lru.
 * @workingset: where the workingset value unpacked from shadow should
 * be stored.
 *
 * Return: true if the shadow is for a recently evicted folio; false otherwise.
 */
bool workingset_test_recent(void *shadow, bool file, bool *workingset)
{
        struct mem_cgroup *eviction_memcg;
        struct lruvec *eviction_lruvec;
        unsigned long refault_distance;
        unsigned long workingset_size;
        unsigned long refault;
        int memcgid;
        struct pglist_data *pgdat;
        unsigned long eviction;

        rcu_read_lock();

        if (lru_gen_enabled()) {
                bool recent = lru_gen_test_recent(shadow, file,
                                &eviction_lruvec, &eviction, workingset);

                rcu_read_unlock();
                return recent;
        }


        unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
        eviction <<= bucket_order;

        /*
         * Look up the memcg associated with the stored ID. It might
         * have been deleted since the folio's eviction.
         *
         * Note that in rare events the ID could have been recycled
         * for a new cgroup that refaults a shared folio. This is
         * impossible to tell from the available data. However, this
         * should be a rare and limited disturbance, and activations
         * are always speculative anyway. Ultimately, it's the aging
         * algorithm's job to shake out the minimum access frequency
         * for the active cache.
         *
         * XXX: On !CONFIG_MEMCG, this will always return NULL; it
         * would be better if the root_mem_cgroup existed in all
         * configurations instead.
         */
        eviction_memcg = mem_cgroup_from_id(memcgid);
        if (!mem_cgroup_disabled() &&
            (!eviction_memcg || !mem_cgroup_tryget(eviction_memcg))) {
                rcu_read_unlock();
                return false;
        }

        rcu_read_unlock();

        /*
         * Flush stats (and potentially sleep) outside the RCU read section.
         * XXX: With per-memcg flushing and thresholding, is ratelimiting
         * still needed here?
         */
        mem_cgroup_flush_stats_ratelimited(eviction_memcg);

        eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
        refault = atomic_long_read(&eviction_lruvec->nonresident_age);

        /*
         * Calculate the refault distance
         *
         * The unsigned subtraction here gives an accurate distance
         * across nonresident_age overflows in most cases. There is a
         * special case: usually, shadow entries have a short lifetime
         * and are either refaulted or reclaimed along with the inode
         * before they get too old.  But it is not impossible for the
         * nonresident_age to lap a shadow entry in the field, which
         * can then result in a false small refault distance, leading
         * to a false activation should this old entry actually
         * refault again.  However, earlier kernels used to deactivate
         * unconditionally with *every* reclaim invocation for the
         * longest time, so the occasional inappropriate activation
         * leading to pressure on the active list is not a problem.
         */
        refault_distance = (refault - eviction) & EVICTION_MASK;

        /*
         * Compare the distance to the existing workingset size. We
         * don't activate pages that couldn't stay resident even if
         * all the memory was available to the workingset. Whether
         * workingset competition needs to consider anon or not depends
         * on having free swap space.
         */
        workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
        if (!file) {
                workingset_size += lruvec_page_state(eviction_lruvec,
                                                     NR_INACTIVE_FILE);
        }
        if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) {
                workingset_size += lruvec_page_state(eviction_lruvec,
                                                     NR_ACTIVE_ANON);
                if (file) {
                        workingset_size += lruvec_page_state(eviction_lruvec,
                                                     NR_INACTIVE_ANON);
                }
        }

        mem_cgroup_put(eviction_memcg);
        return refault_distance <= workingset_size;
}

/**
 * workingset_refault - Evaluate the refault of a previously evicted folio.
 * @folio: The freshly allocated replacement folio.
 * @shadow: Shadow entry of the evicted folio.
 *
 * Calculates and evaluates the refault distance of the previously
 * evicted folio in the context of the node and the memcg whose memory
 * pressure caused the eviction.
 */
void workingset_refault(struct folio *folio, void *shadow)
{
        bool file = folio_is_file_lru(folio);
        struct pglist_data *pgdat;
        struct mem_cgroup *memcg;
        struct lruvec *lruvec;
        bool workingset;
        long nr;

        if (lru_gen_enabled()) {
                lru_gen_refault(folio, shadow);
                return;
        }

        /*
         * The activation decision for this folio is made at the level
         * where the eviction occurred, as that is where the LRU order
         * during folio reclaim is being determined.
         *
         * However, the cgroup that will own the folio is the one that
         * is actually experiencing the refault event. Make sure the folio is
         * locked to guarantee folio_memcg() stability throughout.
         */
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        nr = folio_nr_pages(folio);
        memcg = folio_memcg(folio);
        pgdat = folio_pgdat(folio);
        lruvec = mem_cgroup_lruvec(memcg, pgdat);

        mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);

        if (!workingset_test_recent(shadow, file, &workingset))
                return;

        folio_set_active(folio);
        workingset_age_nonresident(lruvec, nr);
        mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr);

        /* Folio was active prior to eviction */
        if (workingset) {
                folio_set_workingset(folio);
                /*
                 * XXX: Move to folio_add_lru() when it supports new vs
                 * putback
                 */
                lru_note_cost_refault(folio);
                mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
        }
}

/**
 * workingset_activation - note a page activation
 * @folio: Folio that is being activated.
 */
void workingset_activation(struct folio *folio)
{
        struct mem_cgroup *memcg;

        rcu_read_lock();
        /*
         * Filter non-memcg pages here, e.g. unmap can call
         * mark_page_accessed() on VDSO pages.
         *
         * XXX: See workingset_refault() - this should return
         * root_mem_cgroup even for !CONFIG_MEMCG.
         */
        memcg = folio_memcg_rcu(folio);
        if (!mem_cgroup_disabled() && !memcg)
                goto out;
        workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio));
out:
        rcu_read_unlock();
}

/*
 * Shadow entries reflect the share of the working set that does not
 * fit into memory, so their number depends on the access pattern of
 * the workload.  In most cases, they will refault or get reclaimed
 * along with the inode, but a (malicious) workload that streams
 * through files with a total size several times that of available
 * memory, while preventing the inodes from being reclaimed, can
 * create excessive amounts of shadow nodes.  To keep a lid on this,
 * track shadow nodes and reclaim them when they grow way past the
 * point where they would still be useful.
 */

struct list_lru shadow_nodes;

void workingset_update_node(struct xa_node *node)
{
        struct address_space *mapping;

        /*
         * Track non-empty nodes that contain only shadow entries;
         * unlink those that contain pages or are being freed.
         *
         * Avoid acquiring the list_lru lock when the nodes are
         * already where they should be. The list_empty() test is safe
         * as node->private_list is protected by the i_pages lock.
         */
        mapping = container_of(node->array, struct address_space, i_pages);
        lockdep_assert_held(&mapping->i_pages.xa_lock);

        if (node->count && node->count == node->nr_values) {
                if (list_empty(&node->private_list)) {
                        list_lru_add_obj(&shadow_nodes, &node->private_list);
                        __inc_lruvec_kmem_state(node, WORKINGSET_NODES);
                }
        } else {
                if (!list_empty(&node->private_list)) {
                        list_lru_del_obj(&shadow_nodes, &node->private_list);
                        __dec_lruvec_kmem_state(node, WORKINGSET_NODES);
                }
        }
}

static unsigned long count_shadow_nodes(struct shrinker *shrinker,
                                        struct shrink_control *sc)
{
        unsigned long max_nodes;
        unsigned long nodes;
        unsigned long pages;

        nodes = list_lru_shrink_count(&shadow_nodes, sc);
        if (!nodes)
                return SHRINK_EMPTY;

        /*
         * Approximate a reasonable limit for the nodes
         * containing shadow entries. We don't need to keep more
         * shadow entries than possible pages on the active list,
         * since refault distances bigger than that are dismissed.
         *
         * The size of the active list converges toward 100% of
         * overall page cache as memory grows, with only a tiny
         * inactive list. Assume the total cache size for that.
         *
         * Nodes might be sparsely populated, with only one shadow
         * entry in the extreme case. Obviously, we cannot keep one
         * node for every eligible shadow entry, so compromise on a
         * worst-case density of 1/8th. Below that, not all eligible
         * refaults can be detected anymore.
         *
         * On 64-bit with 7 xa_nodes per page and 64 slots
         * each, this will reclaim shadow entries when they consume
         * ~1.8% of available memory:
         *
         * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
         */
#ifdef CONFIG_MEMCG
        if (sc->memcg) {
                struct lruvec *lruvec;
                int i;

                mem_cgroup_flush_stats_ratelimited(sc->memcg);
                lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
                for (pages = 0, i = 0; i < NR_LRU_LISTS; i++)
                        pages += lruvec_page_state_local(lruvec,
                                                         NR_LRU_BASE + i);
                pages += lruvec_page_state_local(
                        lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT;
                pages += lruvec_page_state_local(
                        lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT;
        } else
#endif
                pages = node_present_pages(sc->nid);

        max_nodes = pages >> (XA_CHUNK_SHIFT - 3);

        if (nodes <= max_nodes)
                return 0;
        return nodes - max_nodes;
}

static enum lru_status shadow_lru_isolate(struct list_head *item,
                                          struct list_lru_one *lru,
                                          spinlock_t *lru_lock,
                                          void *arg) __must_hold(lru_lock)
{
        struct xa_node *node = container_of(item, struct xa_node, private_list);
        struct address_space *mapping;
        int ret;

        /*
         * Page cache insertions and deletions synchronously maintain
         * the shadow node LRU under the i_pages lock and the
         * lru_lock.  Because the page cache tree is emptied before
         * the inode can be destroyed, holding the lru_lock pins any
         * address_space that has nodes on the LRU.
         *
         * We can then safely transition to the i_pages lock to
         * pin only the address_space of the particular node we want
         * to reclaim, take the node off-LRU, and drop the lru_lock.
         */

        mapping = container_of(node->array, struct address_space, i_pages);

        /* Coming from the list, invert the lock order */
        if (!xa_trylock(&mapping->i_pages)) {
                spin_unlock_irq(lru_lock);
                ret = LRU_RETRY;
                goto out;
        }

        /* For page cache we need to hold i_lock */
        if (mapping->host != NULL) {
                if (!spin_trylock(&mapping->host->i_lock)) {
                        xa_unlock(&mapping->i_pages);
                        spin_unlock_irq(lru_lock);
                        ret = LRU_RETRY;
                        goto out;
                }
        }

        list_lru_isolate(lru, item);
        __dec_lruvec_kmem_state(node, WORKINGSET_NODES);

        spin_unlock(lru_lock);

        /*
         * The nodes should only contain one or more shadow entries,
         * no pages, so we expect to be able to remove them all and
         * delete and free the empty node afterwards.
         */
        if (WARN_ON_ONCE(!node->nr_values))
                goto out_invalid;
        if (WARN_ON_ONCE(node->count != node->nr_values))
                goto out_invalid;
        xa_delete_node(node, workingset_update_node);
        __inc_lruvec_kmem_state(node, WORKINGSET_NODERECLAIM);

out_invalid:
        xa_unlock_irq(&mapping->i_pages);
        if (mapping->host != NULL) {
                if (mapping_shrinkable(mapping))
                        inode_add_lru(mapping->host);
                spin_unlock(&mapping->host->i_lock);
        }
        ret = LRU_REMOVED_RETRY;
out:
        cond_resched();
        spin_lock_irq(lru_lock);
        return ret;
}

static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
                                       struct shrink_control *sc)
{
        /* list_lru lock nests inside the IRQ-safe i_pages lock */
        return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate,
                                        NULL);
}

/*
 * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe
 * i_pages lock.
 */
static struct lock_class_key shadow_nodes_key;

static int __init workingset_init(void)
{
        struct shrinker *workingset_shadow_shrinker;
        unsigned int timestamp_bits;
        unsigned int max_order;
        int ret = -ENOMEM;

        BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT);
        /*
         * Calculate the eviction bucket size to cover the longest
         * actionable refault distance, which is currently half of
         * memory (totalram_pages/2). However, memory hotplug may add
         * some more pages at runtime, so keep working with up to
         * double the initial memory by using totalram_pages as-is.
         */
        timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT;
        max_order = fls_long(totalram_pages() - 1);
        if (max_order > timestamp_bits)
                bucket_order = max_order - timestamp_bits;
        pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
               timestamp_bits, max_order, bucket_order);

        workingset_shadow_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE |
                                                    SHRINKER_MEMCG_AWARE,
                                                    "mm-shadow");
        if (!workingset_shadow_shrinker)
                goto err;

        ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key,
                              workingset_shadow_shrinker);
        if (ret)
                goto err_list_lru;

        workingset_shadow_shrinker->count_objects = count_shadow_nodes;
        workingset_shadow_shrinker->scan_objects = scan_shadow_nodes;
        /* ->count reports only fully expendable nodes */
        workingset_shadow_shrinker->seeks = 0;

        shrinker_register(workingset_shadow_shrinker);
        return 0;
err_list_lru:
        shrinker_free(workingset_shadow_shrinker);
err:
        return ret;
}
module_init(workingset_init);











































































































































































































































































































































































































































































  234 















  236 







  236 








  236 

















































































































































  237 




  236 












  237 












  237 








  236 




  237 
  236 




















  234 



  237 
  237 




















  248 



  247 


  248 



  247 


  241 







  242 
  242 

  242 



   17 








  248 

  248 

















  237 


  237 





  237 



  237 





  236 



  236 





  237 
  237 
  237 
  237 
























































































































































































































































































































  255 







  256 




  256 








  256 
  254 
  254 





  256 





























    5 
    5 


    2 





    5 

    5 
    5 

















  242 






















































































    1 




    1 







    1 





    1 


    1 






    1 



    1 





    1 





    1 




    1 







    1 







  252 


  254 














  253 
  249 

  249 









  249 

















  255 
  255 
  252 
  251 








  252 





















































  255 















  256 


















  255 
  237 

  255 
  254 
  254 

    1 




    1 
    1 












    1 

  255 













    4 






    5 

    5 











   56 











    4 

   53 





    1 


    4 

   56 































  233 
  234 














































































  242 
  242 

  242 




  242 
  242 

  236 

  241 
  240 
  241 




  242 
  240 

  236 















  234 

  234 





  234 


    2 

    2 










    2 





  234 


  234 
    2 




    2 



    2 


    2 

























































































































































  233 
















  232 




  233 





  232 








































































































  143 



































































































































































































    6 







    6 

    6 


    6 






    6 

    6 


    6 



    6 





    6 

    6 





    6 







    6 
    6 
    6 





































   17 

   17 



   17 






























































































































































































































































































  233 

  233 
  233 
  233 



   72 

   72 
   72 













  232 


  233 



















































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
// SPDX-License-Identifier: GPL-2.0+
/*
 * (C) Copyright Linus Torvalds 1999
 * (C) Copyright Johannes Erdfelt 1999-2001
 * (C) Copyright Andreas Gal 1999
 * (C) Copyright Gregory P. Smith 1999
 * (C) Copyright Deti Fliegl 1999
 * (C) Copyright Randy Dunlap 2000
 * (C) Copyright David Brownell 2000-2002
 */

#include <linux/bcd.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/sched/task_stack.h>
#include <linux/slab.h>
#include <linux/completion.h>
#include <linux/utsname.h>
#include <linux/mm.h>
#include <asm/io.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/mutex.h>
#include <asm/irq.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include <linux/platform_device.h>
#include <linux/workqueue.h>
#include <linux/pm_runtime.h>
#include <linux/types.h>
#include <linux/genalloc.h>
#include <linux/io.h>
#include <linux/kcov.h>

#include <linux/phy/phy.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <linux/usb/otg.h>

#include "usb.h"
#include "phy.h"


/*-------------------------------------------------------------------------*/

/*
 * USB Host Controller Driver framework
 *
 * Plugs into usbcore (usb_bus) and lets HCDs share code, minimizing
 * HCD-specific behaviors/bugs.
 *
 * This does error checks, tracks devices and urbs, and delegates to a
 * "hc_driver" only for code (and data) that really needs to know about
 * hardware differences.  That includes root hub registers, i/o queues,
 * and so on ... but as little else as possible.
 *
 * Shared code includes most of the "root hub" code (these are emulated,
 * though each HC's hardware works differently) and PCI glue, plus request
 * tracking overhead.  The HCD code should only block on spinlocks or on
 * hardware handshaking; blocking on software events (such as other kernel
 * threads releasing resources, or completing actions) is all generic.
 *
 * Happens the USB 2.0 spec says this would be invisible inside the "USBD",
 * and includes mostly a "HCDI" (HCD Interface) along with some APIs used
 * only by the hub driver ... and that neither should be seen or used by
 * usb client device drivers.
 *
 * Contributors of ideas or unattributed patches include: David Brownell,
 * Roman Weissgaerber, Rory Bolt, Greg Kroah-Hartman, ...
 *
 * HISTORY:
 * 2002-02-21        Pull in most of the usb_bus support from usb.c; some
 *                associated cleanup.  "usb_hcd" still != "usb_bus".
 * 2001-12-12        Initial patch version for Linux 2.5.1 kernel.
 */

/*-------------------------------------------------------------------------*/

/* Keep track of which host controller drivers are loaded */
unsigned long usb_hcds_loaded;
EXPORT_SYMBOL_GPL(usb_hcds_loaded);

/* host controllers we manage */
DEFINE_IDR (usb_bus_idr);
EXPORT_SYMBOL_GPL (usb_bus_idr);

/* used when allocating bus numbers */
#define USB_MAXBUS                64

/* used when updating list of hcds */
DEFINE_MUTEX(usb_bus_idr_lock);        /* exported only for usbfs */
EXPORT_SYMBOL_GPL (usb_bus_idr_lock);

/* used for controlling access to virtual root hubs */
static DEFINE_SPINLOCK(hcd_root_hub_lock);

/* used when updating an endpoint's URB list */
static DEFINE_SPINLOCK(hcd_urb_list_lock);

/* used to protect against unlinking URBs after the device is gone */
static DEFINE_SPINLOCK(hcd_urb_unlink_lock);

/* wait queue for synchronous unlinks */
DECLARE_WAIT_QUEUE_HEAD(usb_kill_urb_queue);

/*-------------------------------------------------------------------------*/

/*
 * Sharable chunks of root hub code.
 */

/*-------------------------------------------------------------------------*/
#define KERNEL_REL        bin2bcd(LINUX_VERSION_MAJOR)
#define KERNEL_VER        bin2bcd(LINUX_VERSION_PATCHLEVEL)

/* usb 3.1 root hub device descriptor */
static const u8 usb31_rh_dev_descriptor[18] = {
        0x12,       /*  __u8  bLength; */
        USB_DT_DEVICE, /* __u8 bDescriptorType; Device */
        0x10, 0x03, /*  __le16 bcdUSB; v3.1 */

        0x09,            /*  __u8  bDeviceClass; HUB_CLASSCODE */
        0x00,            /*  __u8  bDeviceSubClass; */
        0x03,       /*  __u8  bDeviceProtocol; USB 3 hub */
        0x09,       /*  __u8  bMaxPacketSize0; 2^9 = 512 Bytes */

        0x6b, 0x1d, /*  __le16 idVendor; Linux Foundation 0x1d6b */
        0x03, 0x00, /*  __le16 idProduct; device 0x0003 */
        KERNEL_VER, KERNEL_REL, /*  __le16 bcdDevice */

        0x03,       /*  __u8  iManufacturer; */
        0x02,       /*  __u8  iProduct; */
        0x01,       /*  __u8  iSerialNumber; */
        0x01        /*  __u8  bNumConfigurations; */
};

/* usb 3.0 root hub device descriptor */
static const u8 usb3_rh_dev_descriptor[18] = {
        0x12,       /*  __u8  bLength; */
        USB_DT_DEVICE, /* __u8 bDescriptorType; Device */
        0x00, 0x03, /*  __le16 bcdUSB; v3.0 */

        0x09,            /*  __u8  bDeviceClass; HUB_CLASSCODE */
        0x00,            /*  __u8  bDeviceSubClass; */
        0x03,       /*  __u8  bDeviceProtocol; USB 3.0 hub */
        0x09,       /*  __u8  bMaxPacketSize0; 2^9 = 512 Bytes */

        0x6b, 0x1d, /*  __le16 idVendor; Linux Foundation 0x1d6b */
        0x03, 0x00, /*  __le16 idProduct; device 0x0003 */
        KERNEL_VER, KERNEL_REL, /*  __le16 bcdDevice */

        0x03,       /*  __u8  iManufacturer; */
        0x02,       /*  __u8  iProduct; */
        0x01,       /*  __u8  iSerialNumber; */
        0x01        /*  __u8  bNumConfigurations; */
};

/* usb 2.0 root hub device descriptor */
static const u8 usb2_rh_dev_descriptor[18] = {
        0x12,       /*  __u8  bLength; */
        USB_DT_DEVICE, /* __u8 bDescriptorType; Device */
        0x00, 0x02, /*  __le16 bcdUSB; v2.0 */

        0x09,            /*  __u8  bDeviceClass; HUB_CLASSCODE */
        0x00,            /*  __u8  bDeviceSubClass; */
        0x00,       /*  __u8  bDeviceProtocol; [ usb 2.0 no TT ] */
        0x40,       /*  __u8  bMaxPacketSize0; 64 Bytes */

        0x6b, 0x1d, /*  __le16 idVendor; Linux Foundation 0x1d6b */
        0x02, 0x00, /*  __le16 idProduct; device 0x0002 */
        KERNEL_VER, KERNEL_REL, /*  __le16 bcdDevice */

        0x03,       /*  __u8  iManufacturer; */
        0x02,       /*  __u8  iProduct; */
        0x01,       /*  __u8  iSerialNumber; */
        0x01        /*  __u8  bNumConfigurations; */
};

/* no usb 2.0 root hub "device qualifier" descriptor: one speed only */

/* usb 1.1 root hub device descriptor */
static const u8 usb11_rh_dev_descriptor[18] = {
        0x12,       /*  __u8  bLength; */
        USB_DT_DEVICE, /* __u8 bDescriptorType; Device */
        0x10, 0x01, /*  __le16 bcdUSB; v1.1 */

        0x09,            /*  __u8  bDeviceClass; HUB_CLASSCODE */
        0x00,            /*  __u8  bDeviceSubClass; */
        0x00,       /*  __u8  bDeviceProtocol; [ low/full speeds only ] */
        0x40,       /*  __u8  bMaxPacketSize0; 64 Bytes */

        0x6b, 0x1d, /*  __le16 idVendor; Linux Foundation 0x1d6b */
        0x01, 0x00, /*  __le16 idProduct; device 0x0001 */
        KERNEL_VER, KERNEL_REL, /*  __le16 bcdDevice */

        0x03,       /*  __u8  iManufacturer; */
        0x02,       /*  __u8  iProduct; */
        0x01,       /*  __u8  iSerialNumber; */
        0x01        /*  __u8  bNumConfigurations; */
};


/*-------------------------------------------------------------------------*/

/* Configuration descriptors for our root hubs */

static const u8 fs_rh_config_descriptor[] = {

        /* one configuration */
        0x09,       /*  __u8  bLength; */
        USB_DT_CONFIG, /* __u8 bDescriptorType; Configuration */
        0x19, 0x00, /*  __le16 wTotalLength; */
        0x01,       /*  __u8  bNumInterfaces; (1) */
        0x01,       /*  __u8  bConfigurationValue; */
        0x00,       /*  __u8  iConfiguration; */
        0xc0,       /*  __u8  bmAttributes;
                                 Bit 7: must be set,
                                     6: Self-powered,
                                     5: Remote wakeup,
                                     4..0: resvd */
        0x00,       /*  __u8  MaxPower; */

        /* USB 1.1:
         * USB 2.0, single TT organization (mandatory):
         *        one interface, protocol 0
         *
         * USB 2.0, multiple TT organization (optional):
         *        two interfaces, protocols 1 (like single TT)
         *        and 2 (multiple TT mode) ... config is
         *        sometimes settable
         *        NOT IMPLEMENTED
         */

        /* one interface */
        0x09,       /*  __u8  if_bLength; */
        USB_DT_INTERFACE,  /* __u8 if_bDescriptorType; Interface */
        0x00,       /*  __u8  if_bInterfaceNumber; */
        0x00,       /*  __u8  if_bAlternateSetting; */
        0x01,       /*  __u8  if_bNumEndpoints; */
        0x09,       /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
        0x00,       /*  __u8  if_bInterfaceSubClass; */
        0x00,       /*  __u8  if_bInterfaceProtocol; [usb1.1 or single tt] */
        0x00,       /*  __u8  if_iInterface; */

        /* one endpoint (status change endpoint) */
        0x07,       /*  __u8  ep_bLength; */
        USB_DT_ENDPOINT, /* __u8 ep_bDescriptorType; Endpoint */
        0x81,       /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
        0x03,       /*  __u8  ep_bmAttributes; Interrupt */
        0x02, 0x00, /*  __le16 ep_wMaxPacketSize; 1 + (MAX_ROOT_PORTS / 8) */
        0xff        /*  __u8  ep_bInterval; (255ms -- usb 2.0 spec) */
};

static const u8 hs_rh_config_descriptor[] = {

        /* one configuration */
        0x09,       /*  __u8  bLength; */
        USB_DT_CONFIG, /* __u8 bDescriptorType; Configuration */
        0x19, 0x00, /*  __le16 wTotalLength; */
        0x01,       /*  __u8  bNumInterfaces; (1) */
        0x01,       /*  __u8  bConfigurationValue; */
        0x00,       /*  __u8  iConfiguration; */
        0xc0,       /*  __u8  bmAttributes;
                                 Bit 7: must be set,
                                     6: Self-powered,
                                     5: Remote wakeup,
                                     4..0: resvd */
        0x00,       /*  __u8  MaxPower; */

        /* USB 1.1:
         * USB 2.0, single TT organization (mandatory):
         *        one interface, protocol 0
         *
         * USB 2.0, multiple TT organization (optional):
         *        two interfaces, protocols 1 (like single TT)
         *        and 2 (multiple TT mode) ... config is
         *        sometimes settable
         *        NOT IMPLEMENTED
         */

        /* one interface */
        0x09,       /*  __u8  if_bLength; */
        USB_DT_INTERFACE, /* __u8 if_bDescriptorType; Interface */
        0x00,       /*  __u8  if_bInterfaceNumber; */
        0x00,       /*  __u8  if_bAlternateSetting; */
        0x01,       /*  __u8  if_bNumEndpoints; */
        0x09,       /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
        0x00,       /*  __u8  if_bInterfaceSubClass; */
        0x00,       /*  __u8  if_bInterfaceProtocol; [usb1.1 or single tt] */
        0x00,       /*  __u8  if_iInterface; */

        /* one endpoint (status change endpoint) */
        0x07,       /*  __u8  ep_bLength; */
        USB_DT_ENDPOINT, /* __u8 ep_bDescriptorType; Endpoint */
        0x81,       /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
        0x03,       /*  __u8  ep_bmAttributes; Interrupt */
                    /* __le16 ep_wMaxPacketSize; 1 + (MAX_ROOT_PORTS / 8)
                     * see hub.c:hub_configure() for details. */
        (USB_MAXCHILDREN + 1 + 7) / 8, 0x00,
        0x0c        /*  __u8  ep_bInterval; (256ms -- usb 2.0 spec) */
};

static const u8 ss_rh_config_descriptor[] = {
        /* one configuration */
        0x09,       /*  __u8  bLength; */
        USB_DT_CONFIG, /* __u8 bDescriptorType; Configuration */
        0x1f, 0x00, /*  __le16 wTotalLength; */
        0x01,       /*  __u8  bNumInterfaces; (1) */
        0x01,       /*  __u8  bConfigurationValue; */
        0x00,       /*  __u8  iConfiguration; */
        0xc0,       /*  __u8  bmAttributes;
                                 Bit 7: must be set,
                                     6: Self-powered,
                                     5: Remote wakeup,
                                     4..0: resvd */
        0x00,       /*  __u8  MaxPower; */

        /* one interface */
        0x09,       /*  __u8  if_bLength; */
        USB_DT_INTERFACE, /* __u8 if_bDescriptorType; Interface */
        0x00,       /*  __u8  if_bInterfaceNumber; */
        0x00,       /*  __u8  if_bAlternateSetting; */
        0x01,       /*  __u8  if_bNumEndpoints; */
        0x09,       /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
        0x00,       /*  __u8  if_bInterfaceSubClass; */
        0x00,       /*  __u8  if_bInterfaceProtocol; */
        0x00,       /*  __u8  if_iInterface; */

        /* one endpoint (status change endpoint) */
        0x07,       /*  __u8  ep_bLength; */
        USB_DT_ENDPOINT, /* __u8 ep_bDescriptorType; Endpoint */
        0x81,       /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
        0x03,       /*  __u8  ep_bmAttributes; Interrupt */
                    /* __le16 ep_wMaxPacketSize; 1 + (MAX_ROOT_PORTS / 8)
                     * see hub.c:hub_configure() for details. */
        (USB_MAXCHILDREN + 1 + 7) / 8, 0x00,
        0x0c,       /*  __u8  ep_bInterval; (256ms -- usb 2.0 spec) */

        /* one SuperSpeed endpoint companion descriptor */
        0x06,        /* __u8 ss_bLength */
        USB_DT_SS_ENDPOINT_COMP, /* __u8 ss_bDescriptorType; SuperSpeed EP */
                     /* Companion */
        0x00,        /* __u8 ss_bMaxBurst; allows 1 TX between ACKs */
        0x00,        /* __u8 ss_bmAttributes; 1 packet per service interval */
        0x02, 0x00   /* __le16 ss_wBytesPerInterval; 15 bits for max 15 ports */
};

/* authorized_default behaviour:
 * -1 is authorized for all devices (leftover from wireless USB)
 * 0 is unauthorized for all devices
 * 1 is authorized for all devices
 * 2 is authorized for internal devices
 */
#define USB_AUTHORIZE_WIRED        -1
#define USB_AUTHORIZE_NONE        0
#define USB_AUTHORIZE_ALL        1
#define USB_AUTHORIZE_INTERNAL        2

static int authorized_default = CONFIG_USB_DEFAULT_AUTHORIZATION_MODE;
module_param(authorized_default, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(authorized_default,
                "Default USB device authorization: 0 is not authorized, 1 is authorized (default), 2 is authorized for internal devices, -1 is authorized (same as 1)");
/*-------------------------------------------------------------------------*/

/**
 * ascii2desc() - Helper routine for producing UTF-16LE string descriptors
 * @s: Null-terminated ASCII (actually ISO-8859-1) string
 * @buf: Buffer for USB string descriptor (header + UTF-16LE)
 * @len: Length (in bytes; may be odd) of descriptor buffer.
 *
 * Return: The number of bytes filled in: 2 + 2*strlen(s) or @len,
 * whichever is less.
 *
 * Note:
 * USB String descriptors can contain at most 126 characters; input
 * strings longer than that are truncated.
 */
static unsigned
ascii2desc(char const *s, u8 *buf, unsigned len)
{
        unsigned n, t = 2 + 2*strlen(s);

        if (t > 254)
                t = 254;        /* Longest possible UTF string descriptor */
        if (len > t)
                len = t;

        t += USB_DT_STRING << 8;        /* Now t is first 16 bits to store */

        n = len;
        while (n--) {
                *buf++ = t;
                if (!n--)
                        break;
                *buf++ = t >> 8;
                t = (unsigned char)*s++;
        }
        return len;
}

/**
 * rh_string() - provides string descriptors for root hub
 * @id: the string ID number (0: langids, 1: serial #, 2: product, 3: vendor)
 * @hcd: the host controller for this root hub
 * @data: buffer for output packet
 * @len: length of the provided buffer
 *
 * Produces either a manufacturer, product or serial number string for the
 * virtual root hub device.
 *
 * Return: The number of bytes filled in: the length of the descriptor or
 * of the provided buffer, whichever is less.
 */
static unsigned
rh_string(int id, struct usb_hcd const *hcd, u8 *data, unsigned len)
{
        char buf[100];
        char const *s;
        static char const langids[4] = {4, USB_DT_STRING, 0x09, 0x04};

        /* language ids */
        switch (id) {
        case 0:
                /* Array of LANGID codes (0x0409 is MSFT-speak for "en-us") */
                /* See http://www.usb.org/developers/docs/USB_LANGIDs.pdf */
                if (len > 4)
                        len = 4;
                memcpy(data, langids, len);
                return len;
        case 1:
                /* Serial number */
                s = hcd->self.bus_name;
                break;
        case 2:
                /* Product name */
                s = hcd->product_desc;
                break;
        case 3:
                /* Manufacturer */
                snprintf (buf, sizeof buf, "%s %s %s", init_utsname()->sysname,
                        init_utsname()->release, hcd->driver->description);
                s = buf;
                break;
        default:
                /* Can't happen; caller guarantees it */
                return 0;
        }

        return ascii2desc(s, data, len);
}


/* Root hub control transfers execute synchronously */
static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
{
        struct usb_ctrlrequest *cmd;
        u16                typeReq, wValue, wIndex, wLength;
        u8                *ubuf = urb->transfer_buffer;
        unsigned        len = 0;
        int                status;
        u8                patch_wakeup = 0;
        u8                patch_protocol = 0;
        u16                tbuf_size;
        u8                *tbuf = NULL;
        const u8        *bufp;

        might_sleep();

        spin_lock_irq(&hcd_root_hub_lock);
        status = usb_hcd_link_urb_to_ep(hcd, urb);
        spin_unlock_irq(&hcd_root_hub_lock);
        if (status)
                return status;
        urb->hcpriv = hcd;        /* Indicate it's queued */

        cmd = (struct usb_ctrlrequest *) urb->setup_packet;
        typeReq  = (cmd->bRequestType << 8) | cmd->bRequest;
        wValue   = le16_to_cpu (cmd->wValue);
        wIndex   = le16_to_cpu (cmd->wIndex);
        wLength  = le16_to_cpu (cmd->wLength);

        if (wLength > urb->transfer_buffer_length)
                goto error;

        /*
         * tbuf should be at least as big as the
         * USB hub descriptor.
         */
        tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
        tbuf = kzalloc(tbuf_size, GFP_KERNEL);
        if (!tbuf) {
                status = -ENOMEM;
                goto err_alloc;
        }

        bufp = tbuf;


        urb->actual_length = 0;
        switch (typeReq) {

        /* DEVICE REQUESTS */

        /* The root hub's remote wakeup enable bit is implemented using
         * driver model wakeup flags.  If this system supports wakeup
         * through USB, userspace may change the default "allow wakeup"
         * policy through sysfs or these calls.
         *
         * Most root hubs support wakeup from downstream devices, for
         * runtime power management (disabling USB clocks and reducing
         * VBUS power usage).  However, not all of them do so; silicon,
         * board, and BIOS bugs here are not uncommon, so these can't
         * be treated quite like external hubs.
         *
         * Likewise, not all root hubs will pass wakeup events upstream,
         * to wake up the whole system.  So don't assume root hub and
         * controller capabilities are identical.
         */

        case DeviceRequest | USB_REQ_GET_STATUS:
                tbuf[0] = (device_may_wakeup(&hcd->self.root_hub->dev)
                                        << USB_DEVICE_REMOTE_WAKEUP)
                                | (1 << USB_DEVICE_SELF_POWERED);
                tbuf[1] = 0;
                len = 2;
                break;
        case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
                if (wValue == USB_DEVICE_REMOTE_WAKEUP)
                        device_set_wakeup_enable(&hcd->self.root_hub->dev, 0);
                else
                        goto error;
                break;
        case DeviceOutRequest | USB_REQ_SET_FEATURE:
                if (device_can_wakeup(&hcd->self.root_hub->dev)
                                && wValue == USB_DEVICE_REMOTE_WAKEUP)
                        device_set_wakeup_enable(&hcd->self.root_hub->dev, 1);
                else
                        goto error;
                break;
        case DeviceRequest | USB_REQ_GET_CONFIGURATION:
                tbuf[0] = 1;
                len = 1;
                fallthrough;
        case DeviceOutRequest | USB_REQ_SET_CONFIGURATION:
                break;
        case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
                switch (wValue & 0xff00) {
                case USB_DT_DEVICE << 8:
                        switch (hcd->speed) {
                        case HCD_USB32:
                        case HCD_USB31:
                                bufp = usb31_rh_dev_descriptor;
                                break;
                        case HCD_USB3:
                                bufp = usb3_rh_dev_descriptor;
                                break;
                        case HCD_USB2:
                                bufp = usb2_rh_dev_descriptor;
                                break;
                        case HCD_USB11:
                                bufp = usb11_rh_dev_descriptor;
                                break;
                        default:
                                goto error;
                        }
                        len = 18;
                        if (hcd->has_tt)
                                patch_protocol = 1;
                        break;
                case USB_DT_CONFIG << 8:
                        switch (hcd->speed) {
                        case HCD_USB32:
                        case HCD_USB31:
                        case HCD_USB3:
                                bufp = ss_rh_config_descriptor;
                                len = sizeof ss_rh_config_descriptor;
                                break;
                        case HCD_USB2:
                                bufp = hs_rh_config_descriptor;
                                len = sizeof hs_rh_config_descriptor;
                                break;
                        case HCD_USB11:
                                bufp = fs_rh_config_descriptor;
                                len = sizeof fs_rh_config_descriptor;
                                break;
                        default:
                                goto error;
                        }
                        if (device_can_wakeup(&hcd->self.root_hub->dev))
                                patch_wakeup = 1;
                        break;
                case USB_DT_STRING << 8:
                        if ((wValue & 0xff) < 4)
                                urb->actual_length = rh_string(wValue & 0xff,
                                                hcd, ubuf, wLength);
                        else /* unsupported IDs --> "protocol stall" */
                                goto error;
                        break;
                case USB_DT_BOS << 8:
                        goto nongeneric;
                default:
                        goto error;
                }
                break;
        case DeviceRequest | USB_REQ_GET_INTERFACE:
                tbuf[0] = 0;
                len = 1;
                fallthrough;
        case DeviceOutRequest | USB_REQ_SET_INTERFACE:
                break;
        case DeviceOutRequest | USB_REQ_SET_ADDRESS:
                /* wValue == urb->dev->devaddr */
                dev_dbg (hcd->self.controller, "root hub device address %d\n",
                        wValue);
                break;

        /* INTERFACE REQUESTS (no defined feature/status flags) */

        /* ENDPOINT REQUESTS */

        case EndpointRequest | USB_REQ_GET_STATUS:
                /* ENDPOINT_HALT flag */
                tbuf[0] = 0;
                tbuf[1] = 0;
                len = 2;
                fallthrough;
        case EndpointOutRequest | USB_REQ_CLEAR_FEATURE:
        case EndpointOutRequest | USB_REQ_SET_FEATURE:
                dev_dbg (hcd->self.controller, "no endpoint features yet\n");
                break;

        /* CLASS REQUESTS (and errors) */

        default:
nongeneric:
                /* non-generic request */
                switch (typeReq) {
                case GetHubStatus:
                        len = 4;
                        break;
                case GetPortStatus:
                        if (wValue == HUB_PORT_STATUS)
                                len = 4;
                        else
                                /* other port status types return 8 bytes */
                                len = 8;
                        break;
                case GetHubDescriptor:
                        len = sizeof (struct usb_hub_descriptor);
                        break;
                case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
                        /* len is returned by hub_control */
                        break;
                }
                status = hcd->driver->hub_control (hcd,
                        typeReq, wValue, wIndex,
                        tbuf, wLength);

                if (typeReq == GetHubDescriptor)
                        usb_hub_adjust_deviceremovable(hcd->self.root_hub,
                                (struct usb_hub_descriptor *)tbuf);
                break;
error:
                /* "protocol stall" on error */
                status = -EPIPE;
        }

        if (status < 0) {
                len = 0;
                if (status != -EPIPE) {
                        dev_dbg (hcd->self.controller,
                                "CTRL: TypeReq=0x%x val=0x%x "
                                "idx=0x%x len=%d ==> %d\n",
                                typeReq, wValue, wIndex,
                                wLength, status);
                }
        } else if (status > 0) {
                /* hub_control may return the length of data copied. */
                len = status;
                status = 0;
        }
        if (len) {
                if (urb->transfer_buffer_length < len)
                        len = urb->transfer_buffer_length;
                urb->actual_length = len;
                /* always USB_DIR_IN, toward host */
                memcpy (ubuf, bufp, len);

                /* report whether RH hardware supports remote wakeup */
                if (patch_wakeup &&
                                len > offsetof (struct usb_config_descriptor,
                                                bmAttributes))
                        ((struct usb_config_descriptor *)ubuf)->bmAttributes
                                |= USB_CONFIG_ATT_WAKEUP;

                /* report whether RH hardware has an integrated TT */
                if (patch_protocol &&
                                len > offsetof(struct usb_device_descriptor,
                                                bDeviceProtocol))
                        ((struct usb_device_descriptor *) ubuf)->
                                bDeviceProtocol = USB_HUB_PR_HS_SINGLE_TT;
        }

        kfree(tbuf);
 err_alloc:

        /* any errors get returned through the urb completion */
        spin_lock_irq(&hcd_root_hub_lock);
        usb_hcd_unlink_urb_from_ep(hcd, urb);
        usb_hcd_giveback_urb(hcd, urb, status);
        spin_unlock_irq(&hcd_root_hub_lock);
        return 0;
}

/*-------------------------------------------------------------------------*/

/*
 * Root Hub interrupt transfers are polled using a timer if the
 * driver requests it; otherwise the driver is responsible for
 * calling usb_hcd_poll_rh_status() when an event occurs.
 *
 * Completion handler may not sleep. See usb_hcd_giveback_urb() for details.
 */
void usb_hcd_poll_rh_status(struct usb_hcd *hcd)
{
        struct urb        *urb;
        int                length;
        int                status;
        unsigned long        flags;
        char                buffer[6];        /* Any root hubs with > 31 ports? */

        if (unlikely(!hcd->rh_pollable))
                return;
        if (!hcd->uses_new_polling && !hcd->status_urb)
                return;

        length = hcd->driver->hub_status_data(hcd, buffer);
        if (length > 0) {

                /* try to complete the status urb */
                spin_lock_irqsave(&hcd_root_hub_lock, flags);
                urb = hcd->status_urb;
                if (urb) {
                        clear_bit(HCD_FLAG_POLL_PENDING, &hcd->flags);
                        hcd->status_urb = NULL;
                        if (urb->transfer_buffer_length >= length) {
                                status = 0;
                        } else {
                                status = -EOVERFLOW;
                                length = urb->transfer_buffer_length;
                        }
                        urb->actual_length = length;
                        memcpy(urb->transfer_buffer, buffer, length);

                        usb_hcd_unlink_urb_from_ep(hcd, urb);
                        usb_hcd_giveback_urb(hcd, urb, status);
                } else {
                        length = 0;
                        set_bit(HCD_FLAG_POLL_PENDING, &hcd->flags);
                }
                spin_unlock_irqrestore(&hcd_root_hub_lock, flags);
        }

        /* The USB 2.0 spec says 256 ms.  This is close enough and won't
         * exceed that limit if HZ is 100. The math is more clunky than
         * maybe expected, this is to make sure that all timers for USB devices
         * fire at the same time to give the CPU a break in between */
        if (hcd->uses_new_polling ? HCD_POLL_RH(hcd) :
                        (length == 0 && hcd->status_urb != NULL))
                mod_timer (&hcd->rh_timer, (jiffies/(HZ/4) + 1) * (HZ/4));
}
EXPORT_SYMBOL_GPL(usb_hcd_poll_rh_status);

/* timer callback */
static void rh_timer_func (struct timer_list *t)
{
        struct usb_hcd *_hcd = from_timer(_hcd, t, rh_timer);

        usb_hcd_poll_rh_status(_hcd);
}

/*-------------------------------------------------------------------------*/

static int rh_queue_status (struct usb_hcd *hcd, struct urb *urb)
{
        int                retval;
        unsigned long        flags;
        unsigned        len = 1 + (urb->dev->maxchild / 8);

        spin_lock_irqsave (&hcd_root_hub_lock, flags);
        if (hcd->status_urb || urb->transfer_buffer_length < len) {
                dev_dbg (hcd->self.controller, "not queuing rh status urb\n");
                retval = -EINVAL;
                goto done;
        }

        retval = usb_hcd_link_urb_to_ep(hcd, urb);
        if (retval)
                goto done;

        hcd->status_urb = urb;
        urb->hcpriv = hcd;        /* indicate it's queued */
        if (!hcd->uses_new_polling)
                mod_timer(&hcd->rh_timer, (jiffies/(HZ/4) + 1) * (HZ/4));

        /* If a status change has already occurred, report it ASAP */
        else if (HCD_POLL_PENDING(hcd))
                mod_timer(&hcd->rh_timer, jiffies);
        retval = 0;
 done:
        spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
        return retval;
}

static int rh_urb_enqueue (struct usb_hcd *hcd, struct urb *urb)
{
        if (usb_endpoint_xfer_int(&urb->ep->desc))
                return rh_queue_status (hcd, urb);
        if (usb_endpoint_xfer_control(&urb->ep->desc))
                return rh_call_control (hcd, urb);
        return -EINVAL;
}

/*-------------------------------------------------------------------------*/

/* Unlinks of root-hub control URBs are legal, but they don't do anything
 * since these URBs always execute synchronously.
 */
static int usb_rh_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
{
        unsigned long        flags;
        int                rc;

        spin_lock_irqsave(&hcd_root_hub_lock, flags);
        rc = usb_hcd_check_unlink_urb(hcd, urb, status);
        if (rc)
                goto done;

        if (usb_endpoint_num(&urb->ep->desc) == 0) {        /* Control URB */
                ;        /* Do nothing */

        } else {                                /* Status URB */
                if (!hcd->uses_new_polling)
                        del_timer (&hcd->rh_timer);
                if (urb == hcd->status_urb) {
                        hcd->status_urb = NULL;
                        usb_hcd_unlink_urb_from_ep(hcd, urb);
                        usb_hcd_giveback_urb(hcd, urb, status);
                }
        }
 done:
        spin_unlock_irqrestore(&hcd_root_hub_lock, flags);
        return rc;
}


/*-------------------------------------------------------------------------*/

/**
 * usb_bus_init - shared initialization code
 * @bus: the bus structure being initialized
 *
 * This code is used to initialize a usb_bus structure, memory for which is
 * separately managed.
 */
static void usb_bus_init (struct usb_bus *bus)
{
        memset(&bus->devmap, 0, sizeof(bus->devmap));

        bus->devnum_next = 1;

        bus->root_hub = NULL;
        bus->busnum = -1;
        bus->bandwidth_allocated = 0;
        bus->bandwidth_int_reqs  = 0;
        bus->bandwidth_isoc_reqs = 0;
        mutex_init(&bus->devnum_next_mutex);
}

/*-------------------------------------------------------------------------*/

/**
 * usb_register_bus - registers the USB host controller with the usb core
 * @bus: pointer to the bus to register
 *
 * Context: task context, might sleep.
 *
 * Assigns a bus number, and links the controller into usbcore data
 * structures so that it can be seen by scanning the bus list.
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
static int usb_register_bus(struct usb_bus *bus)
{
        int result = -E2BIG;
        int busnum;

        mutex_lock(&usb_bus_idr_lock);
        busnum = idr_alloc(&usb_bus_idr, bus, 1, USB_MAXBUS, GFP_KERNEL);
        if (busnum < 0) {
                pr_err("%s: failed to get bus number\n", usbcore_name);
                goto error_find_busnum;
        }
        bus->busnum = busnum;
        mutex_unlock(&usb_bus_idr_lock);

        usb_notify_add_bus(bus);

        dev_info (bus->controller, "new USB bus registered, assigned bus "
                  "number %d\n", bus->busnum);
        return 0;

error_find_busnum:
        mutex_unlock(&usb_bus_idr_lock);
        return result;
}

/**
 * usb_deregister_bus - deregisters the USB host controller
 * @bus: pointer to the bus to deregister
 *
 * Context: task context, might sleep.
 *
 * Recycles the bus number, and unlinks the controller from usbcore data
 * structures so that it won't be seen by scanning the bus list.
 */
static void usb_deregister_bus (struct usb_bus *bus)
{
        dev_info (bus->controller, "USB bus %d deregistered\n", bus->busnum);

        /*
         * NOTE: make sure that all the devices are removed by the
         * controller code, as well as having it call this when cleaning
         * itself up
         */
        mutex_lock(&usb_bus_idr_lock);
        idr_remove(&usb_bus_idr, bus->busnum);
        mutex_unlock(&usb_bus_idr_lock);

        usb_notify_remove_bus(bus);
}

/**
 * register_root_hub - called by usb_add_hcd() to register a root hub
 * @hcd: host controller for this root hub
 *
 * This function registers the root hub with the USB subsystem.  It sets up
 * the device properly in the device tree and then calls usb_new_device()
 * to register the usb device.  It also assigns the root hub's USB address
 * (always 1).
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
static int register_root_hub(struct usb_hcd *hcd)
{
        struct device *parent_dev = hcd->self.controller;
        struct usb_device *usb_dev = hcd->self.root_hub;
        struct usb_device_descriptor *descr;
        const int devnum = 1;
        int retval;

        usb_dev->devnum = devnum;
        usb_dev->bus->devnum_next = devnum + 1;
        set_bit(devnum, usb_dev->bus->devmap);
        usb_set_device_state(usb_dev, USB_STATE_ADDRESS);

        mutex_lock(&usb_bus_idr_lock);

        usb_dev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
        descr = usb_get_device_descriptor(usb_dev);
        if (IS_ERR(descr)) {
                retval = PTR_ERR(descr);
                mutex_unlock(&usb_bus_idr_lock);
                dev_dbg (parent_dev, "can't read %s device descriptor %d\n",
                                dev_name(&usb_dev->dev), retval);
                return retval;
        }
        usb_dev->descriptor = *descr;
        kfree(descr);

        if (le16_to_cpu(usb_dev->descriptor.bcdUSB) >= 0x0201) {
                retval = usb_get_bos_descriptor(usb_dev);
                if (!retval) {
                        usb_dev->lpm_capable = usb_device_supports_lpm(usb_dev);
                } else if (usb_dev->speed >= USB_SPEED_SUPER) {
                        mutex_unlock(&usb_bus_idr_lock);
                        dev_dbg(parent_dev, "can't read %s bos descriptor %d\n",
                                        dev_name(&usb_dev->dev), retval);
                        return retval;
                }
        }

        retval = usb_new_device (usb_dev);
        if (retval) {
                dev_err (parent_dev, "can't register root hub for %s, %d\n",
                                dev_name(&usb_dev->dev), retval);
        } else {
                spin_lock_irq (&hcd_root_hub_lock);
                hcd->rh_registered = 1;
                spin_unlock_irq (&hcd_root_hub_lock);

                /* Did the HC die before the root hub was registered? */
                if (HCD_DEAD(hcd))
                        usb_hc_died (hcd);        /* This time clean up */
        }
        mutex_unlock(&usb_bus_idr_lock);

        return retval;
}

/*
 * usb_hcd_start_port_resume - a root-hub port is sending a resume signal
 * @bus: the bus which the root hub belongs to
 * @portnum: the port which is being resumed
 *
 * HCDs should call this function when they know that a resume signal is
 * being sent to a root-hub port.  The root hub will be prevented from
 * going into autosuspend until usb_hcd_end_port_resume() is called.
 *
 * The bus's private lock must be held by the caller.
 */
void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum)
{
        unsigned bit = 1 << portnum;

        if (!(bus->resuming_ports & bit)) {
                bus->resuming_ports |= bit;
                pm_runtime_get_noresume(&bus->root_hub->dev);
        }
}
EXPORT_SYMBOL_GPL(usb_hcd_start_port_resume);

/*
 * usb_hcd_end_port_resume - a root-hub port has stopped sending a resume signal
 * @bus: the bus which the root hub belongs to
 * @portnum: the port which is being resumed
 *
 * HCDs should call this function when they know that a resume signal has
 * stopped being sent to a root-hub port.  The root hub will be allowed to
 * autosuspend again.
 *
 * The bus's private lock must be held by the caller.
 */
void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum)
{
        unsigned bit = 1 << portnum;

        if (bus->resuming_ports & bit) {
                bus->resuming_ports &= ~bit;
                pm_runtime_put_noidle(&bus->root_hub->dev);
        }
}
EXPORT_SYMBOL_GPL(usb_hcd_end_port_resume);

/*-------------------------------------------------------------------------*/

/**
 * usb_calc_bus_time - approximate periodic transaction time in nanoseconds
 * @speed: from dev->speed; USB_SPEED_{LOW,FULL,HIGH}
 * @is_input: true iff the transaction sends data to the host
 * @isoc: true for isochronous transactions, false for interrupt ones
 * @bytecount: how many bytes in the transaction.
 *
 * Return: Approximate bus time in nanoseconds for a periodic transaction.
 *
 * Note:
 * See USB 2.0 spec section 5.11.3; only periodic transfers need to be
 * scheduled in software, this function is only used for such scheduling.
 */
long usb_calc_bus_time (int speed, int is_input, int isoc, int bytecount)
{
        unsigned long        tmp;

        switch (speed) {
        case USB_SPEED_LOW:         /* INTR only */
                if (is_input) {
                        tmp = (67667L * (31L + 10L * BitTime (bytecount))) / 1000L;
                        return 64060L + (2 * BW_HUB_LS_SETUP) + BW_HOST_DELAY + tmp;
                } else {
                        tmp = (66700L * (31L + 10L * BitTime (bytecount))) / 1000L;
                        return 64107L + (2 * BW_HUB_LS_SETUP) + BW_HOST_DELAY + tmp;
                }
        case USB_SPEED_FULL:        /* ISOC or INTR */
                if (isoc) {
                        tmp = (8354L * (31L + 10L * BitTime (bytecount))) / 1000L;
                        return ((is_input) ? 7268L : 6265L) + BW_HOST_DELAY + tmp;
                } else {
                        tmp = (8354L * (31L + 10L * BitTime (bytecount))) / 1000L;
                        return 9107L + BW_HOST_DELAY + tmp;
                }
        case USB_SPEED_HIGH:        /* ISOC or INTR */
                /* FIXME adjust for input vs output */
                if (isoc)
                        tmp = HS_NSECS_ISO (bytecount);
                else
                        tmp = HS_NSECS (bytecount);
                return tmp;
        default:
                pr_debug ("%s: bogus device speed!\n", usbcore_name);
                return -1;
        }
}
EXPORT_SYMBOL_GPL(usb_calc_bus_time);


/*-------------------------------------------------------------------------*/

/*
 * Generic HC operations.
 */

/*-------------------------------------------------------------------------*/

/**
 * usb_hcd_link_urb_to_ep - add an URB to its endpoint queue
 * @hcd: host controller to which @urb was submitted
 * @urb: URB being submitted
 *
 * Host controller drivers should call this routine in their enqueue()
 * method.  The HCD's private spinlock must be held and interrupts must
 * be disabled.  The actions carried out here are required for URB
 * submission, as well as for endpoint shutdown and for usb_kill_urb.
 *
 * Return: 0 for no error, otherwise a negative error code (in which case
 * the enqueue() method must fail).  If no error occurs but enqueue() fails
 * anyway, it must call usb_hcd_unlink_urb_from_ep() before releasing
 * the private spinlock and returning.
 */
int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb)
{
        int                rc = 0;

        spin_lock(&hcd_urb_list_lock);

        /* Check that the URB isn't being killed */
        if (unlikely(atomic_read(&urb->reject))) {
                rc = -EPERM;
                goto done;
        }

        if (unlikely(!urb->ep->enabled)) {
                rc = -ENOENT;
                goto done;
        }

        if (unlikely(!urb->dev->can_submit)) {
                rc = -EHOSTUNREACH;
                goto done;
        }

        /*
         * Check the host controller's state and add the URB to the
         * endpoint's queue.
         */
        if (HCD_RH_RUNNING(hcd)) {
                urb->unlinked = 0;
                list_add_tail(&urb->urb_list, &urb->ep->urb_list);
        } else {
                rc = -ESHUTDOWN;
                goto done;
        }
 done:
        spin_unlock(&hcd_urb_list_lock);
        return rc;
}
EXPORT_SYMBOL_GPL(usb_hcd_link_urb_to_ep);

/**
 * usb_hcd_check_unlink_urb - check whether an URB may be unlinked
 * @hcd: host controller to which @urb was submitted
 * @urb: URB being checked for unlinkability
 * @status: error code to store in @urb if the unlink succeeds
 *
 * Host controller drivers should call this routine in their dequeue()
 * method.  The HCD's private spinlock must be held and interrupts must
 * be disabled.  The actions carried out here are required for making
 * sure than an unlink is valid.
 *
 * Return: 0 for no error, otherwise a negative error code (in which case
 * the dequeue() method must fail).  The possible error codes are:
 *
 *        -EIDRM: @urb was not submitted or has already completed.
 *                The completion function may not have been called yet.
 *
 *        -EBUSY: @urb has already been unlinked.
 */
int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
                int status)
{
        struct list_head        *tmp;

        /* insist the urb is still queued */
        list_for_each(tmp, &urb->ep->urb_list) {
                if (tmp == &urb->urb_list)
                        break;
        }
        if (tmp != &urb->urb_list)
                return -EIDRM;

        /* Any status except -EINPROGRESS means something already started to
         * unlink this URB from the hardware.  So there's no more work to do.
         */
        if (urb->unlinked)
                return -EBUSY;
        urb->unlinked = status;
        return 0;
}
EXPORT_SYMBOL_GPL(usb_hcd_check_unlink_urb);

/**
 * usb_hcd_unlink_urb_from_ep - remove an URB from its endpoint queue
 * @hcd: host controller to which @urb was submitted
 * @urb: URB being unlinked
 *
 * Host controller drivers should call this routine before calling
 * usb_hcd_giveback_urb().  The HCD's private spinlock must be held and
 * interrupts must be disabled.  The actions carried out here are required
 * for URB completion.
 */
void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb)
{
        /* clear all state linking urb to this dev (and hcd) */
        spin_lock(&hcd_urb_list_lock);
        list_del_init(&urb->urb_list);
        spin_unlock(&hcd_urb_list_lock);
}
EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep);

/*
 * Some usb host controllers can only perform dma using a small SRAM area,
 * or have restrictions on addressable DRAM.
 * The usb core itself is however optimized for host controllers that can dma
 * using regular system memory - like pci devices doing bus mastering.
 *
 * To support host controllers with limited dma capabilities we provide dma
 * bounce buffers. This feature can be enabled by initializing
 * hcd->localmem_pool using usb_hcd_setup_local_mem().
 *
 * The initialized hcd->localmem_pool then tells the usb code to allocate all
 * data for dma using the genalloc API.
 *
 * So, to summarize...
 *
 * - We need "local" memory, canonical example being
 *   a small SRAM on a discrete controller being the
 *   only memory that the controller can read ...
 *   (a) "normal" kernel memory is no good, and
 *   (b) there's not enough to share
 *
 * - So we use that, even though the primary requirement
 *   is that the memory be "local" (hence addressable
 *   by that device), not "coherent".
 *
 */

static int hcd_alloc_coherent(struct usb_bus *bus,
                              gfp_t mem_flags, dma_addr_t *dma_handle,
                              void **vaddr_handle, size_t size,
                              enum dma_data_direction dir)
{
        unsigned char *vaddr;

        if (*vaddr_handle == NULL) {
                WARN_ON_ONCE(1);
                return -EFAULT;
        }

        vaddr = hcd_buffer_alloc(bus, size + sizeof(unsigned long),
                                 mem_flags, dma_handle);
        if (!vaddr)
                return -ENOMEM;

        /*
         * Store the virtual address of the buffer at the end
         * of the allocated dma buffer. The size of the buffer
         * may be uneven so use unaligned functions instead
         * of just rounding up. It makes sense to optimize for
         * memory footprint over access speed since the amount
         * of memory available for dma may be limited.
         */
        put_unaligned((unsigned long)*vaddr_handle,
                      (unsigned long *)(vaddr + size));

        if (dir == DMA_TO_DEVICE)
                memcpy(vaddr, *vaddr_handle, size);

        *vaddr_handle = vaddr;
        return 0;
}

static void hcd_free_coherent(struct usb_bus *bus, dma_addr_t *dma_handle,
                              void **vaddr_handle, size_t size,
                              enum dma_data_direction dir)
{
        unsigned char *vaddr = *vaddr_handle;

        vaddr = (void *)get_unaligned((unsigned long *)(vaddr + size));

        if (dir == DMA_FROM_DEVICE)
                memcpy(vaddr, *vaddr_handle, size);

        hcd_buffer_free(bus, size + sizeof(vaddr), *vaddr_handle, *dma_handle);

        *vaddr_handle = vaddr;
        *dma_handle = 0;
}

void usb_hcd_unmap_urb_setup_for_dma(struct usb_hcd *hcd, struct urb *urb)
{
        if (IS_ENABLED(CONFIG_HAS_DMA) &&
            (urb->transfer_flags & URB_SETUP_MAP_SINGLE))
                dma_unmap_single(hcd->self.sysdev,
                                urb->setup_dma,
                                sizeof(struct usb_ctrlrequest),
                                DMA_TO_DEVICE);
        else if (urb->transfer_flags & URB_SETUP_MAP_LOCAL)
                hcd_free_coherent(urb->dev->bus,
                                &urb->setup_dma,
                                (void **) &urb->setup_packet,
                                sizeof(struct usb_ctrlrequest),
                                DMA_TO_DEVICE);

        /* Make it safe to call this routine more than once */
        urb->transfer_flags &= ~(URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL);
}
EXPORT_SYMBOL_GPL(usb_hcd_unmap_urb_setup_for_dma);

static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
{
        if (hcd->driver->unmap_urb_for_dma)
                hcd->driver->unmap_urb_for_dma(hcd, urb);
        else
                usb_hcd_unmap_urb_for_dma(hcd, urb);
}

void usb_hcd_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
{
        enum dma_data_direction dir;

        usb_hcd_unmap_urb_setup_for_dma(hcd, urb);

        dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
        if (IS_ENABLED(CONFIG_HAS_DMA) &&
            (urb->transfer_flags & URB_DMA_MAP_SG))
                dma_unmap_sg(hcd->self.sysdev,
                                urb->sg,
                                urb->num_sgs,
                                dir);
        else if (IS_ENABLED(CONFIG_HAS_DMA) &&
                 (urb->transfer_flags & URB_DMA_MAP_PAGE))
                dma_unmap_page(hcd->self.sysdev,
                                urb->transfer_dma,
                                urb->transfer_buffer_length,
                                dir);
        else if (IS_ENABLED(CONFIG_HAS_DMA) &&
                 (urb->transfer_flags & URB_DMA_MAP_SINGLE))
                dma_unmap_single(hcd->self.sysdev,
                                urb->transfer_dma,
                                urb->transfer_buffer_length,
                                dir);
        else if (urb->transfer_flags & URB_MAP_LOCAL)
                hcd_free_coherent(urb->dev->bus,
                                &urb->transfer_dma,
                                &urb->transfer_buffer,
                                urb->transfer_buffer_length,
                                dir);

        /* Make it safe to call this routine more than once */
        urb->transfer_flags &= ~(URB_DMA_MAP_SG | URB_DMA_MAP_PAGE |
                        URB_DMA_MAP_SINGLE | URB_MAP_LOCAL);
}
EXPORT_SYMBOL_GPL(usb_hcd_unmap_urb_for_dma);

static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
                           gfp_t mem_flags)
{
        if (hcd->driver->map_urb_for_dma)
                return hcd->driver->map_urb_for_dma(hcd, urb, mem_flags);
        else
                return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
}

int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
                            gfp_t mem_flags)
{
        enum dma_data_direction dir;
        int ret = 0;

        /* Map the URB's buffers for DMA access.
         * Lower level HCD code should use *_dma exclusively,
         * unless it uses pio or talks to another transport,
         * or uses the provided scatter gather list for bulk.
         */

        if (usb_endpoint_xfer_control(&urb->ep->desc)) {
                if (hcd->self.uses_pio_for_control)
                        return ret;
                if (hcd->localmem_pool) {
                        ret = hcd_alloc_coherent(
                                        urb->dev->bus, mem_flags,
                                        &urb->setup_dma,
                                        (void **)&urb->setup_packet,
                                        sizeof(struct usb_ctrlrequest),
                                        DMA_TO_DEVICE);
                        if (ret)
                                return ret;
                        urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
                } else if (hcd_uses_dma(hcd)) {
                        if (object_is_on_stack(urb->setup_packet)) {
                                WARN_ONCE(1, "setup packet is on stack\n");
                                return -EAGAIN;
                        }

                        urb->setup_dma = dma_map_single(
                                        hcd->self.sysdev,
                                        urb->setup_packet,
                                        sizeof(struct usb_ctrlrequest),
                                        DMA_TO_DEVICE);
                        if (dma_mapping_error(hcd->self.sysdev,
                                                urb->setup_dma))
                                return -EAGAIN;
                        urb->transfer_flags |= URB_SETUP_MAP_SINGLE;
                }
        }

        dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
        if (urb->transfer_buffer_length != 0
            && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
                if (hcd->localmem_pool) {
                        ret = hcd_alloc_coherent(
                                        urb->dev->bus, mem_flags,
                                        &urb->transfer_dma,
                                        &urb->transfer_buffer,
                                        urb->transfer_buffer_length,
                                        dir);
                        if (ret == 0)
                                urb->transfer_flags |= URB_MAP_LOCAL;
                } else if (hcd_uses_dma(hcd)) {
                        if (urb->num_sgs) {
                                int n;

                                /* We don't support sg for isoc transfers ! */
                                if (usb_endpoint_xfer_isoc(&urb->ep->desc)) {
                                        WARN_ON(1);
                                        return -EINVAL;
                                }

                                n = dma_map_sg(
                                                hcd->self.sysdev,
                                                urb->sg,
                                                urb->num_sgs,
                                                dir);
                                if (!n)
                                        ret = -EAGAIN;
                                else
                                        urb->transfer_flags |= URB_DMA_MAP_SG;
                                urb->num_mapped_sgs = n;
                                if (n != urb->num_sgs)
                                        urb->transfer_flags |=
                                                        URB_DMA_SG_COMBINED;
                        } else if (urb->sg) {
                                struct scatterlist *sg = urb->sg;
                                urb->transfer_dma = dma_map_page(
                                                hcd->self.sysdev,
                                                sg_page(sg),
                                                sg->offset,
                                                urb->transfer_buffer_length,
                                                dir);
                                if (dma_mapping_error(hcd->self.sysdev,
                                                urb->transfer_dma))
                                        ret = -EAGAIN;
                                else
                                        urb->transfer_flags |= URB_DMA_MAP_PAGE;
                        } else if (object_is_on_stack(urb->transfer_buffer)) {
                                WARN_ONCE(1, "transfer buffer is on stack\n");
                                ret = -EAGAIN;
                        } else {
                                urb->transfer_dma = dma_map_single(
                                                hcd->self.sysdev,
                                                urb->transfer_buffer,
                                                urb->transfer_buffer_length,
                                                dir);
                                if (dma_mapping_error(hcd->self.sysdev,
                                                urb->transfer_dma))
                                        ret = -EAGAIN;
                                else
                                        urb->transfer_flags |= URB_DMA_MAP_SINGLE;
                        }
                }
                if (ret && (urb->transfer_flags & (URB_SETUP_MAP_SINGLE |
                                URB_SETUP_MAP_LOCAL)))
                        usb_hcd_unmap_urb_for_dma(hcd, urb);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma);

/*-------------------------------------------------------------------------*/

/* may be called in any context with a valid urb->dev usecount
 * caller surrenders "ownership" of urb
 * expects usb_submit_urb() to have sanity checked and conditioned all
 * inputs in the urb
 */
int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
{
        int                        status;
        struct usb_hcd                *hcd = bus_to_hcd(urb->dev->bus);

        /* increment urb's reference count as part of giving it to the HCD
         * (which will control it).  HCD guarantees that it either returns
         * an error or calls giveback(), but not both.
         */
        usb_get_urb(urb);
        atomic_inc(&urb->use_count);
        atomic_inc(&urb->dev->urbnum);
        usbmon_urb_submit(&hcd->self, urb);

        /* NOTE requirements on root-hub callers (usbfs and the hub
         * driver, for now):  URBs' urb->transfer_buffer must be
         * valid and usb_buffer_{sync,unmap}() not be needed, since
         * they could clobber root hub response data.  Also, control
         * URBs must be submitted in process context with interrupts
         * enabled.
         */

        if (is_root_hub(urb->dev)) {
                status = rh_urb_enqueue(hcd, urb);
        } else {
                status = map_urb_for_dma(hcd, urb, mem_flags);
                if (likely(status == 0)) {
                        status = hcd->driver->urb_enqueue(hcd, urb, mem_flags);
                        if (unlikely(status))
                                unmap_urb_for_dma(hcd, urb);
                }
        }

        if (unlikely(status)) {
                usbmon_urb_submit_error(&hcd->self, urb, status);
                urb->hcpriv = NULL;
                INIT_LIST_HEAD(&urb->urb_list);
                atomic_dec(&urb->use_count);
                /*
                 * Order the write of urb->use_count above before the read
                 * of urb->reject below.  Pairs with the memory barriers in
                 * usb_kill_urb() and usb_poison_urb().
                 */
                smp_mb__after_atomic();

                atomic_dec(&urb->dev->urbnum);
                if (atomic_read(&urb->reject))
                        wake_up(&usb_kill_urb_queue);
                usb_put_urb(urb);
        }
        return status;
}

/*-------------------------------------------------------------------------*/

/* this makes the hcd giveback() the urb more quickly, by kicking it
 * off hardware queues (which may take a while) and returning it as
 * soon as practical.  we've already set up the urb's return status,
 * but we can't know if the callback completed already.
 */
static int unlink1(struct usb_hcd *hcd, struct urb *urb, int status)
{
        int                value;

        if (is_root_hub(urb->dev))
                value = usb_rh_urb_dequeue(hcd, urb, status);
        else {

                /* The only reason an HCD might fail this call is if
                 * it has not yet fully queued the urb to begin with.
                 * Such failures should be harmless. */
                value = hcd->driver->urb_dequeue(hcd, urb, status);
        }
        return value;
}

/*
 * called in any context
 *
 * caller guarantees urb won't be recycled till both unlink()
 * and the urb's completion function return
 */
int usb_hcd_unlink_urb (struct urb *urb, int status)
{
        struct usb_hcd                *hcd;
        struct usb_device        *udev = urb->dev;
        int                        retval = -EIDRM;
        unsigned long                flags;

        /* Prevent the device and bus from going away while
         * the unlink is carried out.  If they are already gone
         * then urb->use_count must be 0, since disconnected
         * devices can't have any active URBs.
         */
        spin_lock_irqsave(&hcd_urb_unlink_lock, flags);
        if (atomic_read(&urb->use_count) > 0) {
                retval = 0;
                usb_get_dev(udev);
        }
        spin_unlock_irqrestore(&hcd_urb_unlink_lock, flags);
        if (retval == 0) {
                hcd = bus_to_hcd(urb->dev->bus);
                retval = unlink1(hcd, urb, status);
                if (retval == 0)
                        retval = -EINPROGRESS;
                else if (retval != -EIDRM && retval != -EBUSY)
                        dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n",
                                        urb, retval);
                usb_put_dev(udev);
        }
        return retval;
}

/*-------------------------------------------------------------------------*/

static void __usb_hcd_giveback_urb(struct urb *urb)
{
        struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus);
        struct usb_anchor *anchor = urb->anchor;
        int status = urb->unlinked;

        urb->hcpriv = NULL;
        if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) &&
            urb->actual_length < urb->transfer_buffer_length &&
            !status))
                status = -EREMOTEIO;

        unmap_urb_for_dma(hcd, urb);
        usbmon_urb_complete(&hcd->self, urb, status);
        usb_anchor_suspend_wakeups(anchor);
        usb_unanchor_urb(urb);
        if (likely(status == 0))
                usb_led_activity(USB_LED_EVENT_HOST);

        /* pass ownership to the completion handler */
        urb->status = status;
        /*
         * This function can be called in task context inside another remote
         * coverage collection section, but kcov doesn't support that kind of
         * recursion yet. Only collect coverage in softirq context for now.
         */
        kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum);
        urb->complete(urb);
        kcov_remote_stop_softirq();

        usb_anchor_resume_wakeups(anchor);
        atomic_dec(&urb->use_count);
        /*
         * Order the write of urb->use_count above before the read
         * of urb->reject below.  Pairs with the memory barriers in
         * usb_kill_urb() and usb_poison_urb().
         */
        smp_mb__after_atomic();

        if (unlikely(atomic_read(&urb->reject)))
                wake_up(&usb_kill_urb_queue);
        usb_put_urb(urb);
}

static void usb_giveback_urb_bh(struct work_struct *work)
{
        struct giveback_urb_bh *bh =
                container_of(work, struct giveback_urb_bh, bh);
        struct list_head local_list;

        spin_lock_irq(&bh->lock);
        bh->running = true;
        list_replace_init(&bh->head, &local_list);
        spin_unlock_irq(&bh->lock);

        while (!list_empty(&local_list)) {
                struct urb *urb;

                urb = list_entry(local_list.next, struct urb, urb_list);
                list_del_init(&urb->urb_list);
                bh->completing_ep = urb->ep;
                __usb_hcd_giveback_urb(urb);
                bh->completing_ep = NULL;
        }

        /*
         * giveback new URBs next time to prevent this function
         * from not exiting for a long time.
         */
        spin_lock_irq(&bh->lock);
        if (!list_empty(&bh->head)) {
                if (bh->high_prio)
                        queue_work(system_bh_highpri_wq, &bh->bh);
                else
                        queue_work(system_bh_wq, &bh->bh);
        }
        bh->running = false;
        spin_unlock_irq(&bh->lock);
}

/**
 * usb_hcd_giveback_urb - return URB from HCD to device driver
 * @hcd: host controller returning the URB
 * @urb: urb being returned to the USB device driver.
 * @status: completion status code for the URB.
 *
 * Context: atomic. The completion callback is invoked in caller's context.
 * For HCDs with HCD_BH flag set, the completion callback is invoked in BH
 * context (except for URBs submitted to the root hub which always complete in
 * caller's context).
 *
 * This hands the URB from HCD to its USB device driver, using its
 * completion function.  The HCD has freed all per-urb resources
 * (and is done using urb->hcpriv).  It also released all HCD locks;
 * the device driver won't cause problems if it frees, modifies,
 * or resubmits this URB.
 *
 * If @urb was unlinked, the value of @status will be overridden by
 * @urb->unlinked.  Erroneous short transfers are detected in case
 * the HCD hasn't checked for them.
 */
void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
{
        struct giveback_urb_bh *bh;
        bool running;

        /* pass status to BH via unlinked */
        if (likely(!urb->unlinked))
                urb->unlinked = status;

        if (!hcd_giveback_urb_in_bh(hcd) && !is_root_hub(urb->dev)) {
                __usb_hcd_giveback_urb(urb);
                return;
        }

        if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe))
                bh = &hcd->high_prio_bh;
        else
                bh = &hcd->low_prio_bh;

        spin_lock(&bh->lock);
        list_add_tail(&urb->urb_list, &bh->head);
        running = bh->running;
        spin_unlock(&bh->lock);

        if (running)
                ;
        else if (bh->high_prio)
                queue_work(system_bh_highpri_wq, &bh->bh);
        else
                queue_work(system_bh_wq, &bh->bh);
}
EXPORT_SYMBOL_GPL(usb_hcd_giveback_urb);

/*-------------------------------------------------------------------------*/

/* Cancel all URBs pending on this endpoint and wait for the endpoint's
 * queue to drain completely.  The caller must first insure that no more
 * URBs can be submitted for this endpoint.
 */
void usb_hcd_flush_endpoint(struct usb_device *udev,
                struct usb_host_endpoint *ep)
{
        struct usb_hcd                *hcd;
        struct urb                *urb;

        if (!ep)
                return;
        might_sleep();
        hcd = bus_to_hcd(udev->bus);

        /* No more submits can occur */
        spin_lock_irq(&hcd_urb_list_lock);
rescan:
        list_for_each_entry_reverse(urb, &ep->urb_list, urb_list) {
                int        is_in;

                if (urb->unlinked)
                        continue;
                usb_get_urb (urb);
                is_in = usb_urb_dir_in(urb);
                spin_unlock(&hcd_urb_list_lock);

                /* kick hcd */
                unlink1(hcd, urb, -ESHUTDOWN);
                dev_dbg (hcd->self.controller,
                        "shutdown urb %pK ep%d%s-%s\n",
                        urb, usb_endpoint_num(&ep->desc),
                        is_in ? "in" : "out",
                        usb_ep_type_string(usb_endpoint_type(&ep->desc)));
                usb_put_urb (urb);

                /* list contents may have changed */
                spin_lock(&hcd_urb_list_lock);
                goto rescan;
        }
        spin_unlock_irq(&hcd_urb_list_lock);

        /* Wait until the endpoint queue is completely empty */
        while (!list_empty (&ep->urb_list)) {
                spin_lock_irq(&hcd_urb_list_lock);

                /* The list may have changed while we acquired the spinlock */
                urb = NULL;
                if (!list_empty (&ep->urb_list)) {
                        urb = list_entry (ep->urb_list.prev, struct urb,
                                        urb_list);
                        usb_get_urb (urb);
                }
                spin_unlock_irq(&hcd_urb_list_lock);

                if (urb) {
                        usb_kill_urb (urb);
                        usb_put_urb (urb);
                }
        }
}

/**
 * usb_hcd_alloc_bandwidth - check whether a new bandwidth setting exceeds
 *                                the bus bandwidth
 * @udev: target &usb_device
 * @new_config: new configuration to install
 * @cur_alt: the current alternate interface setting
 * @new_alt: alternate interface setting that is being installed
 *
 * To change configurations, pass in the new configuration in new_config,
 * and pass NULL for cur_alt and new_alt.
 *
 * To reset a device's configuration (put the device in the ADDRESSED state),
 * pass in NULL for new_config, cur_alt, and new_alt.
 *
 * To change alternate interface settings, pass in NULL for new_config,
 * pass in the current alternate interface setting in cur_alt,
 * and pass in the new alternate interface setting in new_alt.
 *
 * Return: An error if the requested bandwidth change exceeds the
 * bus bandwidth or host controller internal resources.
 */
int usb_hcd_alloc_bandwidth(struct usb_device *udev,
                struct usb_host_config *new_config,
                struct usb_host_interface *cur_alt,
                struct usb_host_interface *new_alt)
{
        int num_intfs, i, j;
        struct usb_host_interface *alt = NULL;
        int ret = 0;
        struct usb_hcd *hcd;
        struct usb_host_endpoint *ep;

        hcd = bus_to_hcd(udev->bus);
        if (!hcd->driver->check_bandwidth)
                return 0;

        /* Configuration is being removed - set configuration 0 */
        if (!new_config && !cur_alt) {
                for (i = 1; i < 16; ++i) {
                        ep = udev->ep_out[i];
                        if (ep)
                                hcd->driver->drop_endpoint(hcd, udev, ep);
                        ep = udev->ep_in[i];
                        if (ep)
                                hcd->driver->drop_endpoint(hcd, udev, ep);
                }
                hcd->driver->check_bandwidth(hcd, udev);
                return 0;
        }
        /* Check if the HCD says there's enough bandwidth.  Enable all endpoints
         * each interface's alt setting 0 and ask the HCD to check the bandwidth
         * of the bus.  There will always be bandwidth for endpoint 0, so it's
         * ok to exclude it.
         */
        if (new_config) {
                num_intfs = new_config->desc.bNumInterfaces;
                /* Remove endpoints (except endpoint 0, which is always on the
                 * schedule) from the old config from the schedule
                 */
                for (i = 1; i < 16; ++i) {
                        ep = udev->ep_out[i];
                        if (ep) {
                                ret = hcd->driver->drop_endpoint(hcd, udev, ep);
                                if (ret < 0)
                                        goto reset;
                        }
                        ep = udev->ep_in[i];
                        if (ep) {
                                ret = hcd->driver->drop_endpoint(hcd, udev, ep);
                                if (ret < 0)
                                        goto reset;
                        }
                }
                for (i = 0; i < num_intfs; ++i) {
                        struct usb_host_interface *first_alt;
                        int iface_num;

                        first_alt = &new_config->intf_cache[i]->altsetting[0];
                        iface_num = first_alt->desc.bInterfaceNumber;
                        /* Set up endpoints for alternate interface setting 0 */
                        alt = usb_find_alt_setting(new_config, iface_num, 0);
                        if (!alt)
                                /* No alt setting 0? Pick the first setting. */
                                alt = first_alt;

                        for (j = 0; j < alt->desc.bNumEndpoints; j++) {
                                ret = hcd->driver->add_endpoint(hcd, udev, &alt->endpoint[j]);
                                if (ret < 0)
                                        goto reset;
                        }
                }
        }
        if (cur_alt && new_alt) {
                struct usb_interface *iface = usb_ifnum_to_if(udev,
                                cur_alt->desc.bInterfaceNumber);

                if (!iface)
                        return -EINVAL;
                if (iface->resetting_device) {
                        /*
                         * The USB core just reset the device, so the xHCI host
                         * and the device will think alt setting 0 is installed.
                         * However, the USB core will pass in the alternate
                         * setting installed before the reset as cur_alt.  Dig
                         * out the alternate setting 0 structure, or the first
                         * alternate setting if a broken device doesn't have alt
                         * setting 0.
                         */
                        cur_alt = usb_altnum_to_altsetting(iface, 0);
                        if (!cur_alt)
                                cur_alt = &iface->altsetting[0];
                }

                /* Drop all the endpoints in the current alt setting */
                for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
                        ret = hcd->driver->drop_endpoint(hcd, udev,
                                        &cur_alt->endpoint[i]);
                        if (ret < 0)
                                goto reset;
                }
                /* Add all the endpoints in the new alt setting */
                for (i = 0; i < new_alt->desc.bNumEndpoints; i++) {
                        ret = hcd->driver->add_endpoint(hcd, udev,
                                        &new_alt->endpoint[i]);
                        if (ret < 0)
                                goto reset;
                }
        }
        ret = hcd->driver->check_bandwidth(hcd, udev);
reset:
        if (ret < 0)
                hcd->driver->reset_bandwidth(hcd, udev);
        return ret;
}

/* Disables the endpoint: synchronizes with the hcd to make sure all
 * endpoint state is gone from hardware.  usb_hcd_flush_endpoint() must
 * have been called previously.  Use for set_configuration, set_interface,
 * driver removal, physical disconnect.
 *
 * example:  a qh stored in ep->hcpriv, holding state related to endpoint
 * type, maxpacket size, toggle, halt status, and scheduling.
 */
void usb_hcd_disable_endpoint(struct usb_device *udev,
                struct usb_host_endpoint *ep)
{
        struct usb_hcd                *hcd;

        might_sleep();
        hcd = bus_to_hcd(udev->bus);
        if (hcd->driver->endpoint_disable)
                hcd->driver->endpoint_disable(hcd, ep);
}

/**
 * usb_hcd_reset_endpoint - reset host endpoint state
 * @udev: USB device.
 * @ep:   the endpoint to reset.
 *
 * Resets any host endpoint state such as the toggle bit, sequence
 * number and current window.
 */
void usb_hcd_reset_endpoint(struct usb_device *udev,
                            struct usb_host_endpoint *ep)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        if (hcd->driver->endpoint_reset)
                hcd->driver->endpoint_reset(hcd, ep);
        else {
                int epnum = usb_endpoint_num(&ep->desc);
                int is_out = usb_endpoint_dir_out(&ep->desc);
                int is_control = usb_endpoint_xfer_control(&ep->desc);

                usb_settoggle(udev, epnum, is_out, 0);
                if (is_control)
                        usb_settoggle(udev, epnum, !is_out, 0);
        }
}

/**
 * usb_alloc_streams - allocate bulk endpoint stream IDs.
 * @interface:                alternate setting that includes all endpoints.
 * @eps:                array of endpoints that need streams.
 * @num_eps:                number of endpoints in the array.
 * @num_streams:        number of streams to allocate.
 * @mem_flags:                flags hcd should use to allocate memory.
 *
 * Sets up a group of bulk endpoints to have @num_streams stream IDs available.
 * Drivers may queue multiple transfers to different stream IDs, which may
 * complete in a different order than they were queued.
 *
 * Return: On success, the number of allocated streams. On failure, a negative
 * error code.
 */
int usb_alloc_streams(struct usb_interface *interface,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                unsigned int num_streams, gfp_t mem_flags)
{
        struct usb_hcd *hcd;
        struct usb_device *dev;
        int i, ret;

        dev = interface_to_usbdev(interface);
        hcd = bus_to_hcd(dev->bus);
        if (!hcd->driver->alloc_streams || !hcd->driver->free_streams)
                return -EINVAL;
        if (dev->speed < USB_SPEED_SUPER)
                return -EINVAL;
        if (dev->state < USB_STATE_CONFIGURED)
                return -ENODEV;

        for (i = 0; i < num_eps; i++) {
                /* Streams only apply to bulk endpoints. */
                if (!usb_endpoint_xfer_bulk(&eps[i]->desc))
                        return -EINVAL;
                /* Re-alloc is not allowed */
                if (eps[i]->streams)
                        return -EINVAL;
        }

        ret = hcd->driver->alloc_streams(hcd, dev, eps, num_eps,
                        num_streams, mem_flags);
        if (ret < 0)
                return ret;

        for (i = 0; i < num_eps; i++)
                eps[i]->streams = ret;

        return ret;
}
EXPORT_SYMBOL_GPL(usb_alloc_streams);

/**
 * usb_free_streams - free bulk endpoint stream IDs.
 * @interface:        alternate setting that includes all endpoints.
 * @eps:        array of endpoints to remove streams from.
 * @num_eps:        number of endpoints in the array.
 * @mem_flags:        flags hcd should use to allocate memory.
 *
 * Reverts a group of bulk endpoints back to not using stream IDs.
 * Can fail if we are given bad arguments, or HCD is broken.
 *
 * Return: 0 on success. On failure, a negative error code.
 */
int usb_free_streams(struct usb_interface *interface,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                gfp_t mem_flags)
{
        struct usb_hcd *hcd;
        struct usb_device *dev;
        int i, ret;

        dev = interface_to_usbdev(interface);
        hcd = bus_to_hcd(dev->bus);
        if (dev->speed < USB_SPEED_SUPER)
                return -EINVAL;

        /* Double-free is not allowed */
        for (i = 0; i < num_eps; i++)
                if (!eps[i] || !eps[i]->streams)
                        return -EINVAL;

        ret = hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags);
        if (ret < 0)
                return ret;

        for (i = 0; i < num_eps; i++)
                eps[i]->streams = 0;

        return ret;
}
EXPORT_SYMBOL_GPL(usb_free_streams);

/* Protect against drivers that try to unlink URBs after the device
 * is gone, by waiting until all unlinks for @udev are finished.
 * Since we don't currently track URBs by device, simply wait until
 * nothing is running in the locked region of usb_hcd_unlink_urb().
 */
void usb_hcd_synchronize_unlinks(struct usb_device *udev)
{
        spin_lock_irq(&hcd_urb_unlink_lock);
        spin_unlock_irq(&hcd_urb_unlink_lock);
}

/*-------------------------------------------------------------------------*/

/* called in any context */
int usb_hcd_get_frame_number (struct usb_device *udev)
{
        struct usb_hcd        *hcd = bus_to_hcd(udev->bus);

        if (!HCD_RH_RUNNING(hcd))
                return -ESHUTDOWN;
        return hcd->driver->get_frame_number (hcd);
}

/*-------------------------------------------------------------------------*/
#ifdef CONFIG_USB_HCD_TEST_MODE

static void usb_ehset_completion(struct urb *urb)
{
        struct completion  *done = urb->context;

        complete(done);
}
/*
 * Allocate and initialize a control URB. This request will be used by the
 * EHSET SINGLE_STEP_SET_FEATURE test in which the DATA and STATUS stages
 * of the GetDescriptor request are sent 15 seconds after the SETUP stage.
 * Return NULL if failed.
 */
static struct urb *request_single_step_set_feature_urb(
        struct usb_device        *udev,
        void                        *dr,
        void                        *buf,
        struct completion        *done)
{
        struct urb *urb;
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return NULL;

        urb->pipe = usb_rcvctrlpipe(udev, 0);

        urb->ep = &udev->ep0;
        urb->dev = udev;
        urb->setup_packet = (void *)dr;
        urb->transfer_buffer = buf;
        urb->transfer_buffer_length = USB_DT_DEVICE_SIZE;
        urb->complete = usb_ehset_completion;
        urb->status = -EINPROGRESS;
        urb->actual_length = 0;
        urb->transfer_flags = URB_DIR_IN;
        usb_get_urb(urb);
        atomic_inc(&urb->use_count);
        atomic_inc(&urb->dev->urbnum);
        if (map_urb_for_dma(hcd, urb, GFP_KERNEL)) {
                usb_put_urb(urb);
                usb_free_urb(urb);
                return NULL;
        }

        urb->context = done;
        return urb;
}

int ehset_single_step_set_feature(struct usb_hcd *hcd, int port)
{
        int retval = -ENOMEM;
        struct usb_ctrlrequest *dr;
        struct urb *urb;
        struct usb_device *udev;
        struct usb_device_descriptor *buf;
        DECLARE_COMPLETION_ONSTACK(done);

        /* Obtain udev of the rhub's child port */
        udev = usb_hub_find_child(hcd->self.root_hub, port);
        if (!udev) {
                dev_err(hcd->self.controller, "No device attached to the RootHub\n");
                return -ENODEV;
        }
        buf = kmalloc(USB_DT_DEVICE_SIZE, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
        if (!dr) {
                kfree(buf);
                return -ENOMEM;
        }

        /* Fill Setup packet for GetDescriptor */
        dr->bRequestType = USB_DIR_IN;
        dr->bRequest = USB_REQ_GET_DESCRIPTOR;
        dr->wValue = cpu_to_le16(USB_DT_DEVICE << 8);
        dr->wIndex = 0;
        dr->wLength = cpu_to_le16(USB_DT_DEVICE_SIZE);
        urb = request_single_step_set_feature_urb(udev, dr, buf, &done);
        if (!urb)
                goto cleanup;

        /* Submit just the SETUP stage */
        retval = hcd->driver->submit_single_step_set_feature(hcd, urb, 1);
        if (retval)
                goto out1;
        if (!wait_for_completion_timeout(&done, msecs_to_jiffies(2000))) {
                usb_kill_urb(urb);
                retval = -ETIMEDOUT;
                dev_err(hcd->self.controller,
                        "%s SETUP stage timed out on ep0\n", __func__);
                goto out1;
        }
        msleep(15 * 1000);

        /* Complete remaining DATA and STATUS stages using the same URB */
        urb->status = -EINPROGRESS;
        usb_get_urb(urb);
        atomic_inc(&urb->use_count);
        atomic_inc(&urb->dev->urbnum);
        retval = hcd->driver->submit_single_step_set_feature(hcd, urb, 0);
        if (!retval && !wait_for_completion_timeout(&done,
                                                msecs_to_jiffies(2000))) {
                usb_kill_urb(urb);
                retval = -ETIMEDOUT;
                dev_err(hcd->self.controller,
                        "%s IN stage timed out on ep0\n", __func__);
        }
out1:
        usb_free_urb(urb);
cleanup:
        kfree(dr);
        kfree(buf);
        return retval;
}
EXPORT_SYMBOL_GPL(ehset_single_step_set_feature);
#endif /* CONFIG_USB_HCD_TEST_MODE */

/*-------------------------------------------------------------------------*/

#ifdef        CONFIG_PM

int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg)
{
        struct usb_hcd        *hcd = bus_to_hcd(rhdev->bus);
        int                status;
        int                old_state = hcd->state;

        dev_dbg(&rhdev->dev, "bus %ssuspend, wakeup %d\n",
                        (PMSG_IS_AUTO(msg) ? "auto-" : ""),
                        rhdev->do_remote_wakeup);
        if (HCD_DEAD(hcd)) {
                dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "suspend");
                return 0;
        }

        if (!hcd->driver->bus_suspend) {
                status = -ENOENT;
        } else {
                clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
                hcd->state = HC_STATE_QUIESCING;
                status = hcd->driver->bus_suspend(hcd);
        }
        if (status == 0) {
                usb_set_device_state(rhdev, USB_STATE_SUSPENDED);
                hcd->state = HC_STATE_SUSPENDED;

                if (!PMSG_IS_AUTO(msg))
                        usb_phy_roothub_suspend(hcd->self.sysdev,
                                                hcd->phy_roothub);

                /* Did we race with a root-hub wakeup event? */
                if (rhdev->do_remote_wakeup) {
                        char        buffer[6];

                        status = hcd->driver->hub_status_data(hcd, buffer);
                        if (status != 0) {
                                dev_dbg(&rhdev->dev, "suspend raced with wakeup event\n");
                                hcd_bus_resume(rhdev, PMSG_AUTO_RESUME);
                                status = -EBUSY;
                        }
                }
        } else {
                spin_lock_irq(&hcd_root_hub_lock);
                if (!HCD_DEAD(hcd)) {
                        set_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
                        hcd->state = old_state;
                }
                spin_unlock_irq(&hcd_root_hub_lock);
                dev_dbg(&rhdev->dev, "bus %s fail, err %d\n",
                                "suspend", status);
        }
        return status;
}

int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
{
        struct usb_hcd        *hcd = bus_to_hcd(rhdev->bus);
        int                status;
        int                old_state = hcd->state;

        dev_dbg(&rhdev->dev, "usb %sresume\n",
                        (PMSG_IS_AUTO(msg) ? "auto-" : ""));
        if (HCD_DEAD(hcd)) {
                dev_dbg(&rhdev->dev, "skipped %s of dead bus\n", "resume");
                return 0;
        }

        if (!PMSG_IS_AUTO(msg)) {
                status = usb_phy_roothub_resume(hcd->self.sysdev,
                                                hcd->phy_roothub);
                if (status)
                        return status;
        }

        if (!hcd->driver->bus_resume)
                return -ENOENT;
        if (HCD_RH_RUNNING(hcd))
                return 0;

        hcd->state = HC_STATE_RESUMING;
        status = hcd->driver->bus_resume(hcd);
        clear_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags);
        if (status == 0)
                status = usb_phy_roothub_calibrate(hcd->phy_roothub);

        if (status == 0) {
                struct usb_device *udev;
                int port1;

                spin_lock_irq(&hcd_root_hub_lock);
                if (!HCD_DEAD(hcd)) {
                        usb_set_device_state(rhdev, rhdev->actconfig
                                        ? USB_STATE_CONFIGURED
                                        : USB_STATE_ADDRESS);
                        set_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
                        hcd->state = HC_STATE_RUNNING;
                }
                spin_unlock_irq(&hcd_root_hub_lock);

                /*
                 * Check whether any of the enabled ports on the root hub are
                 * unsuspended.  If they are then a TRSMRCY delay is needed
                 * (this is what the USB-2 spec calls a "global resume").
                 * Otherwise we can skip the delay.
                 */
                usb_hub_for_each_child(rhdev, port1, udev) {
                        if (udev->state != USB_STATE_NOTATTACHED &&
                                        !udev->port_is_suspended) {
                                usleep_range(10000, 11000);        /* TRSMRCY */
                                break;
                        }
                }
        } else {
                hcd->state = old_state;
                usb_phy_roothub_suspend(hcd->self.sysdev, hcd->phy_roothub);
                dev_dbg(&rhdev->dev, "bus %s fail, err %d\n",
                                "resume", status);
                if (status != -ESHUTDOWN)
                        usb_hc_died(hcd);
        }
        return status;
}

/* Workqueue routine for root-hub remote wakeup */
static void hcd_resume_work(struct work_struct *work)
{
        struct usb_hcd *hcd = container_of(work, struct usb_hcd, wakeup_work);
        struct usb_device *udev = hcd->self.root_hub;

        usb_remote_wakeup(udev);
}

/**
 * usb_hcd_resume_root_hub - called by HCD to resume its root hub
 * @hcd: host controller for this root hub
 *
 * The USB host controller calls this function when its root hub is
 * suspended (with the remote wakeup feature enabled) and a remote
 * wakeup request is received.  The routine submits a workqueue request
 * to resume the root hub (that is, manage its downstream ports again).
 */
void usb_hcd_resume_root_hub (struct usb_hcd *hcd)
{
        unsigned long flags;

        spin_lock_irqsave (&hcd_root_hub_lock, flags);
        if (hcd->rh_registered) {
                pm_wakeup_event(&hcd->self.root_hub->dev, 0);
                set_bit(HCD_FLAG_WAKEUP_PENDING, &hcd->flags);
                queue_work(pm_wq, &hcd->wakeup_work);
        }
        spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
}
EXPORT_SYMBOL_GPL(usb_hcd_resume_root_hub);

#endif        /* CONFIG_PM */

/*-------------------------------------------------------------------------*/

#ifdef        CONFIG_USB_OTG

/**
 * usb_bus_start_enum - start immediate enumeration (for OTG)
 * @bus: the bus (must use hcd framework)
 * @port_num: 1-based number of port; usually bus->otg_port
 * Context: atomic
 *
 * Starts enumeration, with an immediate reset followed later by
 * hub_wq identifying and possibly configuring the device.
 * This is needed by OTG controller drivers, where it helps meet
 * HNP protocol timing requirements for starting a port reset.
 *
 * Return: 0 if successful.
 */
int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num)
{
        struct usb_hcd                *hcd;
        int                        status = -EOPNOTSUPP;

        /* NOTE: since HNP can't start by grabbing the bus's address0_sem,
         * boards with root hubs hooked up to internal devices (instead of
         * just the OTG port) may need more attention to resetting...
         */
        hcd = bus_to_hcd(bus);
        if (port_num && hcd->driver->start_port_reset)
                status = hcd->driver->start_port_reset(hcd, port_num);

        /* allocate hub_wq shortly after (first) root port reset finishes;
         * it may issue others, until at least 50 msecs have passed.
         */
        if (status == 0)
                mod_timer(&hcd->rh_timer, jiffies + msecs_to_jiffies(10));
        return status;
}
EXPORT_SYMBOL_GPL(usb_bus_start_enum);

#endif

/*-------------------------------------------------------------------------*/

/**
 * usb_hcd_irq - hook IRQs to HCD framework (bus glue)
 * @irq: the IRQ being raised
 * @__hcd: pointer to the HCD whose IRQ is being signaled
 *
 * If the controller isn't HALTed, calls the driver's irq handler.
 * Checks whether the controller is now dead.
 *
 * Return: %IRQ_HANDLED if the IRQ was handled. %IRQ_NONE otherwise.
 */
irqreturn_t usb_hcd_irq (int irq, void *__hcd)
{
        struct usb_hcd                *hcd = __hcd;
        irqreturn_t                rc;

        if (unlikely(HCD_DEAD(hcd) || !HCD_HW_ACCESSIBLE(hcd)))
                rc = IRQ_NONE;
        else if (hcd->driver->irq(hcd) == IRQ_NONE)
                rc = IRQ_NONE;
        else
                rc = IRQ_HANDLED;

        return rc;
}
EXPORT_SYMBOL_GPL(usb_hcd_irq);

/*-------------------------------------------------------------------------*/

/* Workqueue routine for when the root-hub has died. */
static void hcd_died_work(struct work_struct *work)
{
        struct usb_hcd *hcd = container_of(work, struct usb_hcd, died_work);
        static char *env[] = {
                "ERROR=DEAD",
                NULL
        };

        /* Notify user space that the host controller has died */
        kobject_uevent_env(&hcd->self.root_hub->dev.kobj, KOBJ_OFFLINE, env);
}

/**
 * usb_hc_died - report abnormal shutdown of a host controller (bus glue)
 * @hcd: pointer to the HCD representing the controller
 *
 * This is called by bus glue to report a USB host controller that died
 * while operations may still have been pending.  It's called automatically
 * by the PCI glue, so only glue for non-PCI busses should need to call it.
 *
 * Only call this function with the primary HCD.
 */
void usb_hc_died (struct usb_hcd *hcd)
{
        unsigned long flags;

        dev_err (hcd->self.controller, "HC died; cleaning up\n");

        spin_lock_irqsave (&hcd_root_hub_lock, flags);
        clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
        set_bit(HCD_FLAG_DEAD, &hcd->flags);
        if (hcd->rh_registered) {
                clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);

                /* make hub_wq clean up old urbs and devices */
                usb_set_device_state (hcd->self.root_hub,
                                USB_STATE_NOTATTACHED);
                usb_kick_hub_wq(hcd->self.root_hub);
        }
        if (usb_hcd_is_primary_hcd(hcd) && hcd->shared_hcd) {
                hcd = hcd->shared_hcd;
                clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
                set_bit(HCD_FLAG_DEAD, &hcd->flags);
                if (hcd->rh_registered) {
                        clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);

                        /* make hub_wq clean up old urbs and devices */
                        usb_set_device_state(hcd->self.root_hub,
                                        USB_STATE_NOTATTACHED);
                        usb_kick_hub_wq(hcd->self.root_hub);
                }
        }

        /* Handle the case where this function gets called with a shared HCD */
        if (usb_hcd_is_primary_hcd(hcd))
                schedule_work(&hcd->died_work);
        else
                schedule_work(&hcd->primary_hcd->died_work);

        spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
        /* Make sure that the other roothub is also deallocated. */
}
EXPORT_SYMBOL_GPL (usb_hc_died);

/*-------------------------------------------------------------------------*/

static void init_giveback_urb_bh(struct giveback_urb_bh *bh)
{

        spin_lock_init(&bh->lock);
        INIT_LIST_HEAD(&bh->head);
        INIT_WORK(&bh->bh, usb_giveback_urb_bh);
}

struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
                struct device *sysdev, struct device *dev, const char *bus_name,
                struct usb_hcd *primary_hcd)
{
        struct usb_hcd *hcd;

        hcd = kzalloc(sizeof(*hcd) + driver->hcd_priv_size, GFP_KERNEL);
        if (!hcd)
                return NULL;
        if (primary_hcd == NULL) {
                hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex),
                                GFP_KERNEL);
                if (!hcd->address0_mutex) {
                        kfree(hcd);
                        dev_dbg(dev, "hcd address0 mutex alloc failed\n");
                        return NULL;
                }
                mutex_init(hcd->address0_mutex);
                hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex),
                                GFP_KERNEL);
                if (!hcd->bandwidth_mutex) {
                        kfree(hcd->address0_mutex);
                        kfree(hcd);
                        dev_dbg(dev, "hcd bandwidth mutex alloc failed\n");
                        return NULL;
                }
                mutex_init(hcd->bandwidth_mutex);
                dev_set_drvdata(dev, hcd);
        } else {
                mutex_lock(&usb_port_peer_mutex);
                hcd->address0_mutex = primary_hcd->address0_mutex;
                hcd->bandwidth_mutex = primary_hcd->bandwidth_mutex;
                hcd->primary_hcd = primary_hcd;
                primary_hcd->primary_hcd = primary_hcd;
                hcd->shared_hcd = primary_hcd;
                primary_hcd->shared_hcd = hcd;
                mutex_unlock(&usb_port_peer_mutex);
        }

        kref_init(&hcd->kref);

        usb_bus_init(&hcd->self);
        hcd->self.controller = dev;
        hcd->self.sysdev = sysdev;
        hcd->self.bus_name = bus_name;

        timer_setup(&hcd->rh_timer, rh_timer_func, 0);
#ifdef CONFIG_PM
        INIT_WORK(&hcd->wakeup_work, hcd_resume_work);
#endif

        INIT_WORK(&hcd->died_work, hcd_died_work);

        hcd->driver = driver;
        hcd->speed = driver->flags & HCD_MASK;
        hcd->product_desc = (driver->product_desc) ? driver->product_desc :
                        "USB Host Controller";
        return hcd;
}
EXPORT_SYMBOL_GPL(__usb_create_hcd);

/**
 * usb_create_shared_hcd - create and initialize an HCD structure
 * @driver: HC driver that will use this hcd
 * @dev: device for this HC, stored in hcd->self.controller
 * @bus_name: value to store in hcd->self.bus_name
 * @primary_hcd: a pointer to the usb_hcd structure that is sharing the
 *              PCI device.  Only allocate certain resources for the primary HCD
 *
 * Context: task context, might sleep.
 *
 * Allocate a struct usb_hcd, with extra space at the end for the
 * HC driver's private data.  Initialize the generic members of the
 * hcd structure.
 *
 * Return: On success, a pointer to the created and initialized HCD structure.
 * On failure (e.g. if memory is unavailable), %NULL.
 */
struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
                struct device *dev, const char *bus_name,
                struct usb_hcd *primary_hcd)
{
        return __usb_create_hcd(driver, dev, dev, bus_name, primary_hcd);
}
EXPORT_SYMBOL_GPL(usb_create_shared_hcd);

/**
 * usb_create_hcd - create and initialize an HCD structure
 * @driver: HC driver that will use this hcd
 * @dev: device for this HC, stored in hcd->self.controller
 * @bus_name: value to store in hcd->self.bus_name
 *
 * Context: task context, might sleep.
 *
 * Allocate a struct usb_hcd, with extra space at the end for the
 * HC driver's private data.  Initialize the generic members of the
 * hcd structure.
 *
 * Return: On success, a pointer to the created and initialized HCD
 * structure. On failure (e.g. if memory is unavailable), %NULL.
 */
struct usb_hcd *usb_create_hcd(const struct hc_driver *driver,
                struct device *dev, const char *bus_name)
{
        return __usb_create_hcd(driver, dev, dev, bus_name, NULL);
}
EXPORT_SYMBOL_GPL(usb_create_hcd);

/*
 * Roothubs that share one PCI device must also share the bandwidth mutex.
 * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
 * deallocated.
 *
 * Make sure to deallocate the bandwidth_mutex only when the last HCD is
 * freed.  When hcd_release() is called for either hcd in a peer set,
 * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
 */
static void hcd_release(struct kref *kref)
{
        struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);

        mutex_lock(&usb_port_peer_mutex);
        if (hcd->shared_hcd) {
                struct usb_hcd *peer = hcd->shared_hcd;

                peer->shared_hcd = NULL;
                peer->primary_hcd = NULL;
        } else {
                kfree(hcd->address0_mutex);
                kfree(hcd->bandwidth_mutex);
        }
        mutex_unlock(&usb_port_peer_mutex);
        kfree(hcd);
}

struct usb_hcd *usb_get_hcd (struct usb_hcd *hcd)
{
        if (hcd)
                kref_get (&hcd->kref);
        return hcd;
}
EXPORT_SYMBOL_GPL(usb_get_hcd);

void usb_put_hcd (struct usb_hcd *hcd)
{
        if (hcd)
                kref_put (&hcd->kref, hcd_release);
}
EXPORT_SYMBOL_GPL(usb_put_hcd);

int usb_hcd_is_primary_hcd(struct usb_hcd *hcd)
{
        if (!hcd->primary_hcd)
                return 1;
        return hcd == hcd->primary_hcd;
}
EXPORT_SYMBOL_GPL(usb_hcd_is_primary_hcd);

int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1)
{
        if (!hcd->driver->find_raw_port_number)
                return port1;

        return hcd->driver->find_raw_port_number(hcd, port1);
}

static int usb_hcd_request_irqs(struct usb_hcd *hcd,
                unsigned int irqnum, unsigned long irqflags)
{
        int retval;

        if (hcd->driver->irq) {

                snprintf(hcd->irq_descr, sizeof(hcd->irq_descr), "%s:usb%d",
                                hcd->driver->description, hcd->self.busnum);
                retval = request_irq(irqnum, &usb_hcd_irq, irqflags,
                                hcd->irq_descr, hcd);
                if (retval != 0) {
                        dev_err(hcd->self.controller,
                                        "request interrupt %d failed\n",
                                        irqnum);
                        return retval;
                }
                hcd->irq = irqnum;
                dev_info(hcd->self.controller, "irq %d, %s 0x%08llx\n", irqnum,
                                (hcd->driver->flags & HCD_MEMORY) ?
                                        "io mem" : "io port",
                                (unsigned long long)hcd->rsrc_start);
        } else {
                hcd->irq = 0;
                if (hcd->rsrc_start)
                        dev_info(hcd->self.controller, "%s 0x%08llx\n",
                                        (hcd->driver->flags & HCD_MEMORY) ?
                                                "io mem" : "io port",
                                        (unsigned long long)hcd->rsrc_start);
        }
        return 0;
}

/*
 * Before we free this root hub, flush in-flight peering attempts
 * and disable peer lookups
 */
static void usb_put_invalidate_rhdev(struct usb_hcd *hcd)
{
        struct usb_device *rhdev;

        mutex_lock(&usb_port_peer_mutex);
        rhdev = hcd->self.root_hub;
        hcd->self.root_hub = NULL;
        mutex_unlock(&usb_port_peer_mutex);
        usb_put_dev(rhdev);
}

/**
 * usb_stop_hcd - Halt the HCD
 * @hcd: the usb_hcd that has to be halted
 *
 * Stop the root-hub polling timer and invoke the HCD's ->stop callback.
 */
static void usb_stop_hcd(struct usb_hcd *hcd)
{
        hcd->rh_pollable = 0;
        clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
        del_timer_sync(&hcd->rh_timer);

        hcd->driver->stop(hcd);
        hcd->state = HC_STATE_HALT;

        /* In case the HCD restarted the timer, stop it again. */
        clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
        del_timer_sync(&hcd->rh_timer);
}

/**
 * usb_add_hcd - finish generic HCD structure initialization and register
 * @hcd: the usb_hcd structure to initialize
 * @irqnum: Interrupt line to allocate
 * @irqflags: Interrupt type flags
 *
 * Finish the remaining parts of generic HCD initialization: allocate the
 * buffers of consistent memory, register the bus, request the IRQ line,
 * and call the driver's reset() and start() routines.
 */
int usb_add_hcd(struct usb_hcd *hcd,
                unsigned int irqnum, unsigned long irqflags)
{
        int retval;
        struct usb_device *rhdev;
        struct usb_hcd *shared_hcd;

        if (!hcd->skip_phy_initialization) {
                if (usb_hcd_is_primary_hcd(hcd)) {
                        hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
                        if (IS_ERR(hcd->phy_roothub))
                                return PTR_ERR(hcd->phy_roothub);
                } else {
                        hcd->phy_roothub = usb_phy_roothub_alloc_usb3_phy(hcd->self.sysdev);
                        if (IS_ERR(hcd->phy_roothub))
                                return PTR_ERR(hcd->phy_roothub);
                }

                retval = usb_phy_roothub_init(hcd->phy_roothub);
                if (retval)
                        return retval;

                retval = usb_phy_roothub_set_mode(hcd->phy_roothub,
                                                  PHY_MODE_USB_HOST_SS);
                if (retval)
                        retval = usb_phy_roothub_set_mode(hcd->phy_roothub,
                                                          PHY_MODE_USB_HOST);
                if (retval)
                        goto err_usb_phy_roothub_power_on;

                retval = usb_phy_roothub_power_on(hcd->phy_roothub);
                if (retval)
                        goto err_usb_phy_roothub_power_on;
        }

        dev_info(hcd->self.controller, "%s\n", hcd->product_desc);

        switch (authorized_default) {
        case USB_AUTHORIZE_NONE:
                hcd->dev_policy = USB_DEVICE_AUTHORIZE_NONE;
                break;

        case USB_AUTHORIZE_INTERNAL:
                hcd->dev_policy = USB_DEVICE_AUTHORIZE_INTERNAL;
                break;

        case USB_AUTHORIZE_ALL:
        case USB_AUTHORIZE_WIRED:
        default:
                hcd->dev_policy = USB_DEVICE_AUTHORIZE_ALL;
                break;
        }

        set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);

        /* per default all interfaces are authorized */
        set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);

        /* HC is in reset state, but accessible.  Now do the one-time init,
         * bottom up so that hcds can customize the root hubs before hub_wq
         * starts talking to them.  (Note, bus id is assigned early too.)
         */
        retval = hcd_buffer_create(hcd);
        if (retval != 0) {
                dev_dbg(hcd->self.sysdev, "pool alloc failed\n");
                goto err_create_buf;
        }

        retval = usb_register_bus(&hcd->self);
        if (retval < 0)
                goto err_register_bus;

        rhdev = usb_alloc_dev(NULL, &hcd->self, 0);
        if (rhdev == NULL) {
                dev_err(hcd->self.sysdev, "unable to allocate root hub\n");
                retval = -ENOMEM;
                goto err_allocate_root_hub;
        }
        mutex_lock(&usb_port_peer_mutex);
        hcd->self.root_hub = rhdev;
        mutex_unlock(&usb_port_peer_mutex);

        rhdev->rx_lanes = 1;
        rhdev->tx_lanes = 1;
        rhdev->ssp_rate = USB_SSP_GEN_UNKNOWN;

        switch (hcd->speed) {
        case HCD_USB11:
                rhdev->speed = USB_SPEED_FULL;
                break;
        case HCD_USB2:
                rhdev->speed = USB_SPEED_HIGH;
                break;
        case HCD_USB3:
                rhdev->speed = USB_SPEED_SUPER;
                break;
        case HCD_USB32:
                rhdev->rx_lanes = 2;
                rhdev->tx_lanes = 2;
                rhdev->ssp_rate = USB_SSP_GEN_2x2;
                rhdev->speed = USB_SPEED_SUPER_PLUS;
                break;
        case HCD_USB31:
                rhdev->ssp_rate = USB_SSP_GEN_2x1;
                rhdev->speed = USB_SPEED_SUPER_PLUS;
                break;
        default:
                retval = -EINVAL;
                goto err_set_rh_speed;
        }

        /* wakeup flag init defaults to "everything works" for root hubs,
         * but drivers can override it in reset() if needed, along with
         * recording the overall controller's system wakeup capability.
         */
        device_set_wakeup_capable(&rhdev->dev, 1);

        /* HCD_FLAG_RH_RUNNING doesn't matter until the root hub is
         * registered.  But since the controller can die at any time,
         * let's initialize the flag before touching the hardware.
         */
        set_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);

        /* "reset" is misnamed; its role is now one-time init. the controller
         * should already have been reset (and boot firmware kicked off etc).
         */
        if (hcd->driver->reset) {
                retval = hcd->driver->reset(hcd);
                if (retval < 0) {
                        dev_err(hcd->self.controller, "can't setup: %d\n",
                                        retval);
                        goto err_hcd_driver_setup;
                }
        }
        hcd->rh_pollable = 1;

        retval = usb_phy_roothub_calibrate(hcd->phy_roothub);
        if (retval)
                goto err_hcd_driver_setup;

        /* NOTE: root hub and controller capabilities may not be the same */
        if (device_can_wakeup(hcd->self.controller)
                        && device_can_wakeup(&hcd->self.root_hub->dev))
                dev_dbg(hcd->self.controller, "supports USB remote wakeup\n");

        /* initialize BHs */
        init_giveback_urb_bh(&hcd->high_prio_bh);
        hcd->high_prio_bh.high_prio = true;
        init_giveback_urb_bh(&hcd->low_prio_bh);

        /* enable irqs just before we start the controller,
         * if the BIOS provides legacy PCI irqs.
         */
        if (usb_hcd_is_primary_hcd(hcd) && irqnum) {
                retval = usb_hcd_request_irqs(hcd, irqnum, irqflags);
                if (retval)
                        goto err_request_irq;
        }

        hcd->state = HC_STATE_RUNNING;
        retval = hcd->driver->start(hcd);
        if (retval < 0) {
                dev_err(hcd->self.controller, "startup error %d\n", retval);
                goto err_hcd_driver_start;
        }

        /* starting here, usbcore will pay attention to the shared HCD roothub */
        shared_hcd = hcd->shared_hcd;
        if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) {
                retval = register_root_hub(shared_hcd);
                if (retval != 0)
                        goto err_register_root_hub;

                if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd))
                        usb_hcd_poll_rh_status(shared_hcd);
        }

        /* starting here, usbcore will pay attention to this root hub */
        if (!HCD_DEFER_RH_REGISTER(hcd)) {
                retval = register_root_hub(hcd);
                if (retval != 0)
                        goto err_register_root_hub;

                if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
                        usb_hcd_poll_rh_status(hcd);
        }

        return retval;

err_register_root_hub:
        usb_stop_hcd(hcd);
err_hcd_driver_start:
        if (usb_hcd_is_primary_hcd(hcd) && hcd->irq > 0)
                free_irq(irqnum, hcd);
err_request_irq:
err_hcd_driver_setup:
err_set_rh_speed:
        usb_put_invalidate_rhdev(hcd);
err_allocate_root_hub:
        usb_deregister_bus(&hcd->self);
err_register_bus:
        hcd_buffer_destroy(hcd);
err_create_buf:
        usb_phy_roothub_power_off(hcd->phy_roothub);
err_usb_phy_roothub_power_on:
        usb_phy_roothub_exit(hcd->phy_roothub);

        return retval;
}
EXPORT_SYMBOL_GPL(usb_add_hcd);

/**
 * usb_remove_hcd - shutdown processing for generic HCDs
 * @hcd: the usb_hcd structure to remove
 *
 * Context: task context, might sleep.
 *
 * Disconnects the root hub, then reverses the effects of usb_add_hcd(),
 * invoking the HCD's stop() method.
 */
void usb_remove_hcd(struct usb_hcd *hcd)
{
        struct usb_device *rhdev;
        bool rh_registered;

        if (!hcd) {
                pr_debug("%s: hcd is NULL\n", __func__);
                return;
        }
        rhdev = hcd->self.root_hub;

        dev_info(hcd->self.controller, "remove, state %x\n", hcd->state);

        usb_get_dev(rhdev);
        clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
        if (HC_IS_RUNNING (hcd->state))
                hcd->state = HC_STATE_QUIESCING;

        dev_dbg(hcd->self.controller, "roothub graceful disconnect\n");
        spin_lock_irq (&hcd_root_hub_lock);
        rh_registered = hcd->rh_registered;
        hcd->rh_registered = 0;
        spin_unlock_irq (&hcd_root_hub_lock);

#ifdef CONFIG_PM
        cancel_work_sync(&hcd->wakeup_work);
#endif
        cancel_work_sync(&hcd->died_work);

        mutex_lock(&usb_bus_idr_lock);
        if (rh_registered)
                usb_disconnect(&rhdev);                /* Sets rhdev to NULL */
        mutex_unlock(&usb_bus_idr_lock);

        /*
         * flush_work() isn't needed here because:
         * - driver's disconnect() called from usb_disconnect() should
         *   make sure its URBs are completed during the disconnect()
         *   callback
         *
         * - it is too late to run complete() here since driver may have
         *   been removed already now
         */

        /* Prevent any more root-hub status calls from the timer.
         * The HCD might still restart the timer (if a port status change
         * interrupt occurs), but usb_hcd_poll_rh_status() won't invoke
         * the hub_status_data() callback.
         */
        usb_stop_hcd(hcd);

        if (usb_hcd_is_primary_hcd(hcd)) {
                if (hcd->irq > 0)
                        free_irq(hcd->irq, hcd);
        }

        usb_deregister_bus(&hcd->self);
        hcd_buffer_destroy(hcd);

        usb_phy_roothub_power_off(hcd->phy_roothub);
        usb_phy_roothub_exit(hcd->phy_roothub);

        usb_put_invalidate_rhdev(hcd);
        hcd->flags = 0;
}
EXPORT_SYMBOL_GPL(usb_remove_hcd);

void
usb_hcd_platform_shutdown(struct platform_device *dev)
{
        struct usb_hcd *hcd = platform_get_drvdata(dev);

        /* No need for pm_runtime_put(), we're shutting down */
        pm_runtime_get_sync(&dev->dev);

        if (hcd->driver->shutdown)
                hcd->driver->shutdown(hcd);
}
EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown);

int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr,
                            dma_addr_t dma, size_t size)
{
        int err;
        void *local_mem;

        hcd->localmem_pool = devm_gen_pool_create(hcd->self.sysdev, 4,
                                                  dev_to_node(hcd->self.sysdev),
                                                  dev_name(hcd->self.sysdev));
        if (IS_ERR(hcd->localmem_pool))
                return PTR_ERR(hcd->localmem_pool);

        /*
         * if a physical SRAM address was passed, map it, otherwise
         * allocate system memory as a buffer.
         */
        if (phys_addr)
                local_mem = devm_memremap(hcd->self.sysdev, phys_addr,
                                          size, MEMREMAP_WC);
        else
                local_mem = dmam_alloc_attrs(hcd->self.sysdev, size, &dma,
                                             GFP_KERNEL,
                                             DMA_ATTR_WRITE_COMBINE);

        if (IS_ERR_OR_NULL(local_mem)) {
                if (!local_mem)
                        return -ENOMEM;

                return PTR_ERR(local_mem);
        }

        /*
         * Here we pass a dma_addr_t but the arg type is a phys_addr_t.
         * It's not backed by system memory and thus there's no kernel mapping
         * for it.
         */
        err = gen_pool_add_virt(hcd->localmem_pool, (unsigned long)local_mem,
                                dma, size, dev_to_node(hcd->self.sysdev));
        if (err < 0) {
                dev_err(hcd->self.sysdev, "gen_pool_add_virt failed with %d\n",
                        err);
                return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(usb_hcd_setup_local_mem);

/*-------------------------------------------------------------------------*/

#if IS_ENABLED(CONFIG_USB_MON)

const struct usb_mon_operations *mon_ops;

/*
 * The registration is unlocked.
 * We do it this way because we do not want to lock in hot paths.
 *
 * Notice that the code is minimally error-proof. Because usbmon needs
 * symbols from usbcore, usbcore gets referenced and cannot be unloaded first.
 */

int usb_mon_register(const struct usb_mon_operations *ops)
{

        if (mon_ops)
                return -EBUSY;

        mon_ops = ops;
        mb();
        return 0;
}
EXPORT_SYMBOL_GPL (usb_mon_register);

void usb_mon_deregister (void)
{

        if (mon_ops == NULL) {
                printk(KERN_ERR "USB: monitor was not registered\n");
                return;
        }
        mon_ops = NULL;
        mb();
}
EXPORT_SYMBOL_GPL (usb_mon_deregister);

#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */































































    1 




    1 




    1 

























































































































































































































    1 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * MSI GT683R led driver
 *
 * Copyright (c) 2014 Janne Kanniainen <janne.kanniainen@gmail.com>
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/kernel.h>
#include <linux/leds.h>
#include <linux/module.h>

#include "hid-ids.h"

#define GT683R_BUFFER_SIZE                        8

/*
 * GT683R_LED_OFF: all LEDs are off
 * GT683R_LED_AUDIO: LEDs brightness depends on sound level
 * GT683R_LED_BREATHING: LEDs brightness varies at human breathing rate
 * GT683R_LED_NORMAL: LEDs are fully on when enabled
 */
enum gt683r_led_mode {
        GT683R_LED_OFF = 0,
        GT683R_LED_AUDIO = 2,
        GT683R_LED_BREATHING = 3,
        GT683R_LED_NORMAL = 5
};

enum gt683r_panels {
        GT683R_LED_BACK = 0,
        GT683R_LED_SIDE = 1,
        GT683R_LED_FRONT = 2,
        GT683R_LED_COUNT,
};

static const char * const gt683r_panel_names[] = {
        "back",
        "side",
        "front",
};

struct gt683r_led {
        struct hid_device *hdev;
        struct led_classdev led_devs[GT683R_LED_COUNT];
        struct mutex lock;
        struct work_struct work;
        enum led_brightness brightnesses[GT683R_LED_COUNT];
        enum gt683r_led_mode mode;
};

static const struct hid_device_id gt683r_led_id[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
        { }
};
MODULE_DEVICE_TABLE(hid, gt683r_led_id);

static void gt683r_brightness_set(struct led_classdev *led_cdev,
                                enum led_brightness brightness)
{
        int i;
        struct device *dev = led_cdev->dev->parent;
        struct hid_device *hdev = to_hid_device(dev);
        struct gt683r_led *led = hid_get_drvdata(hdev);

        for (i = 0; i < GT683R_LED_COUNT; i++) {
                if (led_cdev == &led->led_devs[i])
                        break;
        }

        if (i < GT683R_LED_COUNT) {
                led->brightnesses[i] = brightness;
                schedule_work(&led->work);
        }
}

static ssize_t mode_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        u8 sysfs_mode;
        struct hid_device *hdev = to_hid_device(dev->parent);
        struct gt683r_led *led = hid_get_drvdata(hdev);

        if (led->mode == GT683R_LED_NORMAL)
                sysfs_mode = 0;
        else if (led->mode == GT683R_LED_AUDIO)
                sysfs_mode = 1;
        else
                sysfs_mode = 2;

        return scnprintf(buf, PAGE_SIZE, "%u\n", sysfs_mode);
}

static ssize_t mode_store(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
{
        u8 sysfs_mode;
        struct hid_device *hdev = to_hid_device(dev->parent);
        struct gt683r_led *led = hid_get_drvdata(hdev);


        if (kstrtou8(buf, 10, &sysfs_mode) || sysfs_mode > 2)
                return -EINVAL;

        mutex_lock(&led->lock);

        if (sysfs_mode == 0)
                led->mode = GT683R_LED_NORMAL;
        else if (sysfs_mode == 1)
                led->mode = GT683R_LED_AUDIO;
        else
                led->mode = GT683R_LED_BREATHING;

        mutex_unlock(&led->lock);
        schedule_work(&led->work);

        return count;
}

static int gt683r_led_snd_msg(struct gt683r_led *led, u8 *msg)
{
        int ret;

        ret = hid_hw_raw_request(led->hdev, msg[0], msg, GT683R_BUFFER_SIZE,
                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
        if (ret != GT683R_BUFFER_SIZE) {
                hid_err(led->hdev,
                        "failed to send set report request: %i\n", ret);
                if (ret < 0)
                        return ret;
                return -EIO;
        }

        return 0;
}

static int gt683r_leds_set(struct gt683r_led *led, u8 leds)
{
        int ret;
        u8 *buffer;

        buffer = kzalloc(GT683R_BUFFER_SIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        buffer[0] = 0x01;
        buffer[1] = 0x02;
        buffer[2] = 0x30;
        buffer[3] = leds;
        ret = gt683r_led_snd_msg(led, buffer);

        kfree(buffer);
        return ret;
}

static int gt683r_mode_set(struct gt683r_led *led, u8 mode)
{
        int ret;
        u8 *buffer;

        buffer = kzalloc(GT683R_BUFFER_SIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        buffer[0] = 0x01;
        buffer[1] = 0x02;
        buffer[2] = 0x20;
        buffer[3] = mode;
        buffer[4] = 0x01;
        ret = gt683r_led_snd_msg(led, buffer);

        kfree(buffer);
        return ret;
}

static void gt683r_led_work(struct work_struct *work)
{
        int i;
        u8 leds = 0;
        u8 mode;
        struct gt683r_led *led = container_of(work, struct gt683r_led, work);

        mutex_lock(&led->lock);

        for (i = 0; i < GT683R_LED_COUNT; i++) {
                if (led->brightnesses[i])
                        leds |= BIT(i);
        }

        if (gt683r_leds_set(led, leds))
                goto fail;

        if (leds)
                mode = led->mode;
        else
                mode = GT683R_LED_OFF;

        gt683r_mode_set(led, mode);
fail:
        mutex_unlock(&led->lock);
}

static DEVICE_ATTR_RW(mode);

static struct attribute *gt683r_led_attrs[] = {
        &dev_attr_mode.attr,
        NULL
};

static const struct attribute_group gt683r_led_group = {
        .name = "gt683r",
        .attrs = gt683r_led_attrs,
};

static const struct attribute_group *gt683r_led_groups[] = {
        &gt683r_led_group,
        NULL
};

static int gt683r_led_probe(struct hid_device *hdev,
                        const struct hid_device_id *id)
{
        int i;
        int ret;
        int name_sz;
        char *name;
        struct gt683r_led *led;

        led = devm_kzalloc(&hdev->dev, sizeof(*led), GFP_KERNEL);
        if (!led)
                return -ENOMEM;

        mutex_init(&led->lock);
        INIT_WORK(&led->work, gt683r_led_work);

        led->mode = GT683R_LED_NORMAL;
        led->hdev = hdev;
        hid_set_drvdata(hdev, led);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "hid parsing failed\n");
                return ret;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                return ret;
        }

        for (i = 0; i < GT683R_LED_COUNT; i++) {
                name_sz = strlen(dev_name(&hdev->dev)) +
                                strlen(gt683r_panel_names[i]) + 3;

                name = devm_kzalloc(&hdev->dev, name_sz, GFP_KERNEL);
                if (!name) {
                        ret = -ENOMEM;
                        goto fail;
                }

                snprintf(name, name_sz, "%s::%s",
                                dev_name(&hdev->dev), gt683r_panel_names[i]);
                led->led_devs[i].name = name;
                led->led_devs[i].max_brightness = 1;
                led->led_devs[i].brightness_set = gt683r_brightness_set;
                led->led_devs[i].groups = gt683r_led_groups;

                ret = led_classdev_register(&hdev->dev, &led->led_devs[i]);
                if (ret) {
                        hid_err(hdev, "could not register led device\n");
                        goto fail;
                }
        }

        return 0;

fail:
        for (i = i - 1; i >= 0; i--)
                led_classdev_unregister(&led->led_devs[i]);
        hid_hw_stop(hdev);
        return ret;
}

static void gt683r_led_remove(struct hid_device *hdev)
{
        int i;
        struct gt683r_led *led = hid_get_drvdata(hdev);

        for (i = 0; i < GT683R_LED_COUNT; i++)
                led_classdev_unregister(&led->led_devs[i]);
        flush_work(&led->work);
        hid_hw_stop(hdev);
}

static struct hid_driver gt683r_led_driver = {
        .probe = gt683r_led_probe,
        .remove = gt683r_led_remove,
        .name = "gt683r_led",
        .id_table = gt683r_led_id,
};

module_hid_driver(gt683r_led_driver);

MODULE_AUTHOR("Janne Kanniainen");
MODULE_DESCRIPTION("MSI GT683R led driver");
MODULE_LICENSE("GPL");




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _LINUX_RCUREF_H
#define _LINUX_RCUREF_H

#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/limits.h>
#include <linux/lockdep.h>
#include <linux/preempt.h>
#include <linux/rcupdate.h>

#define RCUREF_ONEREF                0x00000000U
#define RCUREF_MAXREF                0x7FFFFFFFU
#define RCUREF_SATURATED        0xA0000000U
#define RCUREF_RELEASED                0xC0000000U
#define RCUREF_DEAD                0xE0000000U
#define RCUREF_NOREF                0xFFFFFFFFU

/**
 * rcuref_init - Initialize a rcuref reference count with the given reference count
 * @ref:        Pointer to the reference count
 * @cnt:        The initial reference count typically '1'
 */
static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
{
        atomic_set(&ref->refcnt, cnt - 1);
}

/**
 * rcuref_read - Read the number of held reference counts of a rcuref
 * @ref:        Pointer to the reference count
 *
 * Return: The number of held references (0 ... N)
 */
static inline unsigned int rcuref_read(rcuref_t *ref)
{
        unsigned int c = atomic_read(&ref->refcnt);

        /* Return 0 if within the DEAD zone. */
        return c >= RCUREF_RELEASED ? 0 : c + 1;
}

extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);

/**
 * rcuref_get - Acquire one reference on a rcuref reference count
 * @ref:        Pointer to the reference count
 *
 * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF.
 *
 * Provides no memory ordering, it is assumed the caller has guaranteed the
 * object memory to be stable (RCU, etc.). It does provide a control dependency
 * and thereby orders future stores. See documentation in lib/rcuref.c
 *
 * Return:
 *        False if the attempt to acquire a reference failed. This happens
 *        when the last reference has been put already
 *
 *        True if a reference was successfully acquired
 */
static inline __must_check bool rcuref_get(rcuref_t *ref)
{
        /*
         * Unconditionally increase the reference count. The saturation and
         * dead zones provide enough tolerance for this.
         */
        if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt)))
                return true;

        /* Handle the cases inside the saturation and dead zones */
        return rcuref_get_slowpath(ref);
}

extern __must_check bool rcuref_put_slowpath(rcuref_t *ref);

/*
 * Internal helper. Do not invoke directly.
 */
static __always_inline __must_check bool __rcuref_put(rcuref_t *ref)
{
        RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(),
                         "suspicious rcuref_put_rcusafe() usage");
        /*
         * Unconditionally decrease the reference count. The saturation and
         * dead zones provide enough tolerance for this.
         */
        if (likely(!atomic_add_negative_release(-1, &ref->refcnt)))
                return false;

        /*
         * Handle the last reference drop and cases inside the saturation
         * and dead zones.
         */
        return rcuref_put_slowpath(ref);
}

/**
 * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe
 * @ref:        Pointer to the reference count
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides an acquire ordering on success such that free()
 * must come after.
 *
 * Can be invoked from contexts, which guarantee that no grace period can
 * happen which would free the object concurrently if the decrement drops
 * the last reference and the slowpath races against a concurrent get() and
 * put() pair. rcu_read_lock()'ed and atomic contexts qualify.
 *
 * Return:
 *        True if this was the last reference with no future references
 *        possible. This signals the caller that it can safely release the
 *        object which is protected by the reference counter.
 *
 *        False if there are still active references or the put() raced
 *        with a concurrent get()/put() pair. Caller is not allowed to
 *        release the protected object.
 */
static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref)
{
        return __rcuref_put(ref);
}

/**
 * rcuref_put -- Release one reference for a rcuref reference count
 * @ref:        Pointer to the reference count
 *
 * Can be invoked from any context.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides an acquire ordering on success such that free()
 * must come after.
 *
 * Return:
 *
 *        True if this was the last reference with no future references
 *        possible. This signals the caller that it can safely schedule the
 *        object, which is protected by the reference counter, for
 *        deconstruction.
 *
 *        False if there are still active references or the put() raced
 *        with a concurrent get()/put() pair. Caller is not allowed to
 *        deconstruct the protected object.
 */
static inline __must_check bool rcuref_put(rcuref_t *ref)
{
        bool released;

        preempt_disable();
        released = __rcuref_put(ref);
        preempt_enable();
        return released;
}

#endif





















   14 






    4 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Fallback per-CPU frame pointer holder
 *
 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#ifndef _ASM_GENERIC_IRQ_REGS_H
#define _ASM_GENERIC_IRQ_REGS_H

#include <linux/percpu.h>

/*
 * Per-cpu current frame pointer - the location of the last exception frame on
 * the stack
 */
DECLARE_PER_CPU(struct pt_regs *, __irq_regs);

static inline struct pt_regs *get_irq_regs(void)
{
        return __this_cpu_read(__irq_regs);
}

static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
        struct pt_regs *old_regs;

        old_regs = __this_cpu_read(__irq_regs);
        __this_cpu_write(__irq_regs, new_regs);
        return old_regs;
}

#endif /* _ASM_GENERIC_IRQ_REGS_H */









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Access to user system call parameters and results
 *
 * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
 *
 * See asm-generic/syscall.h for descriptions of what we must do here.
 */

#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H

#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/thread_info.h>        /* for TS_COMPAT */
#include <asm/unistd.h>

/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];

/*
 * These may not exist, but still put the prototypes in so we
 * can use IS_ENABLED().
 */
extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);

/*
 * Only the low 32 bits of orig_ax are meaningful, so we return int.
 * This importantly ignores the high bits on 64-bit, so comparisons
 * sign-extend the low 32 bits.
 */
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
        return regs->orig_ax;
}

static inline void syscall_rollback(struct task_struct *task,
                                    struct pt_regs *regs)
{
        regs->ax = regs->orig_ax;
}

static inline long syscall_get_error(struct task_struct *task,
                                     struct pt_regs *regs)
{
        unsigned long error = regs->ax;
#ifdef CONFIG_IA32_EMULATION
        /*
         * TS_COMPAT is set for 32-bit syscall entries and then
         * remains set until we return to user mode.
         */
        if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
                /*
                 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
                 * and will match correctly in comparisons.
                 */
                error = (long) (int) error;
#endif
        return IS_ERR_VALUE(error) ? error : 0;
}

static inline long syscall_get_return_value(struct task_struct *task,
                                            struct pt_regs *regs)
{
        return regs->ax;
}

static inline void syscall_set_return_value(struct task_struct *task,
                                            struct pt_regs *regs,
                                            int error, long val)
{
        regs->ax = (long) error ?: val;
}

#ifdef CONFIG_X86_32

static inline void syscall_get_arguments(struct task_struct *task,
                                         struct pt_regs *regs,
                                         unsigned long *args)
{
        memcpy(args, &regs->bx, 6 * sizeof(args[0]));
}

static inline int syscall_get_arch(struct task_struct *task)
{
        return AUDIT_ARCH_I386;
}

#else         /* CONFIG_X86_64 */

static inline void syscall_get_arguments(struct task_struct *task,
                                         struct pt_regs *regs,
                                         unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
        if (task->thread_info.status & TS_COMPAT) {
                *args++ = regs->bx;
                *args++ = regs->cx;
                *args++ = regs->dx;
                *args++ = regs->si;
                *args++ = regs->di;
                *args   = regs->bp;
        } else
# endif
        {
                *args++ = regs->di;
                *args++ = regs->si;
                *args++ = regs->dx;
                *args++ = regs->r10;
                *args++ = regs->r8;
                *args   = regs->r9;
        }
}

static inline int syscall_get_arch(struct task_struct *task)
{
        /* x32 tasks should be considered AUDIT_ARCH_X86_64. */
        return (IS_ENABLED(CONFIG_IA32_EMULATION) &&
                task->thread_info.status & TS_COMPAT)
                ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}

bool do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_emulation(struct pt_regs *regs);

#endif        /* CONFIG_X86_32 */

void do_int80_syscall_32(struct pt_regs *regs);
bool do_fast_syscall_32(struct pt_regs *regs);
bool do_SYSENTER_32(struct pt_regs *regs);

#endif        /* _ASM_X86_SYSCALL_H */















































































































































































































































































































































   10 


   10 
























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/kernel/profile.c
 *  Simple profiling. Manages a direct-mapped profile hit count buffer,
 *  with configurable resolution, support for restricting the cpus on
 *  which profiling is done, and switching between cpu time and
 *  schedule() calls via kernel command line parameters passed at boot.
 *
 *  Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
 *        Red Hat, July 2004
 *  Consolidation of architecture support code for profiling,
 *        Nadia Yvette Chambers, Oracle, July 2004
 *  Amortized hit count accounting via per-cpu open-addressed hashtables
 *        to resolve timer interrupt livelocks, Nadia Yvette Chambers,
 *        Oracle, 2004
 */

#include <linux/export.h>
#include <linux/profile.h>
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/mm.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched/stat.h>

#include <asm/sections.h>
#include <asm/irq_regs.h>
#include <asm/ptrace.h>

struct profile_hit {
        u32 pc, hits;
};
#define PROFILE_GRPSHIFT        3
#define PROFILE_GRPSZ                (1 << PROFILE_GRPSHIFT)
#define NR_PROFILE_HIT                (PAGE_SIZE/sizeof(struct profile_hit))
#define NR_PROFILE_GRP                (NR_PROFILE_HIT/PROFILE_GRPSZ)

static atomic_t *prof_buffer;
static unsigned long prof_len;
static unsigned short int prof_shift;

int prof_on __read_mostly;
EXPORT_SYMBOL_GPL(prof_on);

static cpumask_var_t prof_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
static DEFINE_PER_CPU(int, cpu_profile_flip);
static DEFINE_MUTEX(profile_flip_mutex);
#endif /* CONFIG_SMP */

int profile_setup(char *str)
{
        static const char schedstr[] = "schedule";
        static const char sleepstr[] = "sleep";
        static const char kvmstr[] = "kvm";
        const char *select = NULL;
        int par;

        if (!strncmp(str, sleepstr, strlen(sleepstr))) {
#ifdef CONFIG_SCHEDSTATS
                force_schedstat_enabled();
                prof_on = SLEEP_PROFILING;
                select = sleepstr;
#else
                pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
#endif /* CONFIG_SCHEDSTATS */
        } else if (!strncmp(str, schedstr, strlen(schedstr))) {
                prof_on = SCHED_PROFILING;
                select = schedstr;
        } else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
                prof_on = KVM_PROFILING;
                select = kvmstr;
        } else if (get_option(&str, &par)) {
                prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
                prof_on = CPU_PROFILING;
                pr_info("kernel profiling enabled (shift: %u)\n",
                        prof_shift);
        }

        if (select) {
                if (str[strlen(select)] == ',')
                        str += strlen(select) + 1;
                if (get_option(&str, &par))
                        prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
                pr_info("kernel %s profiling enabled (shift: %u)\n",
                        select, prof_shift);
        }

        return 1;
}
__setup("profile=", profile_setup);


int __ref profile_init(void)
{
        int buffer_bytes;
        if (!prof_on)
                return 0;

        /* only text is profiled */
        prof_len = (_etext - _stext) >> prof_shift;

        if (!prof_len) {
                pr_warn("profiling shift: %u too large\n", prof_shift);
                prof_on = 0;
                return -EINVAL;
        }

        buffer_bytes = prof_len*sizeof(atomic_t);

        if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
                return -ENOMEM;

        cpumask_copy(prof_cpu_mask, cpu_possible_mask);

        prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
        if (prof_buffer)
                return 0;

        prof_buffer = alloc_pages_exact(buffer_bytes,
                                        GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
        if (prof_buffer)
                return 0;

        prof_buffer = vzalloc(buffer_bytes);
        if (prof_buffer)
                return 0;

        free_cpumask_var(prof_cpu_mask);
        return -ENOMEM;
}

#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
/*
 * Each cpu has a pair of open-addressed hashtables for pending
 * profile hits. read_profile() IPI's all cpus to request them
 * to flip buffers and flushes their contents to prof_buffer itself.
 * Flip requests are serialized by the profile_flip_mutex. The sole
 * use of having a second hashtable is for avoiding cacheline
 * contention that would otherwise happen during flushes of pending
 * profile hits required for the accuracy of reported profile hits
 * and so resurrect the interrupt livelock issue.
 *
 * The open-addressed hashtables are indexed by profile buffer slot
 * and hold the number of pending hits to that profile buffer slot on
 * a cpu in an entry. When the hashtable overflows, all pending hits
 * are accounted to their corresponding profile buffer slots with
 * atomic_add() and the hashtable emptied. As numerous pending hits
 * may be accounted to a profile buffer slot in a hashtable entry,
 * this amortizes a number of atomic profile buffer increments likely
 * to be far larger than the number of entries in the hashtable,
 * particularly given that the number of distinct profile buffer
 * positions to which hits are accounted during short intervals (e.g.
 * several seconds) is usually very small. Exclusion from buffer
 * flipping is provided by interrupt disablement (note that for
 * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
 * process context).
 * The hash function is meant to be lightweight as opposed to strong,
 * and was vaguely inspired by ppc64 firmware-supported inverted
 * pagetable hash functions, but uses a full hashtable full of finite
 * collision chains, not just pairs of them.
 *
 * -- nyc
 */
static void __profile_flip_buffers(void *unused)
{
        int cpu = smp_processor_id();

        per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
}

static void profile_flip_buffers(void)
{
        int i, j, cpu;

        mutex_lock(&profile_flip_mutex);
        j = per_cpu(cpu_profile_flip, get_cpu());
        put_cpu();
        on_each_cpu(__profile_flip_buffers, NULL, 1);
        for_each_online_cpu(cpu) {
                struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
                for (i = 0; i < NR_PROFILE_HIT; ++i) {
                        if (!hits[i].hits) {
                                if (hits[i].pc)
                                        hits[i].pc = 0;
                                continue;
                        }
                        atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
                        hits[i].hits = hits[i].pc = 0;
                }
        }
        mutex_unlock(&profile_flip_mutex);
}

static void profile_discard_flip_buffers(void)
{
        int i, cpu;

        mutex_lock(&profile_flip_mutex);
        i = per_cpu(cpu_profile_flip, get_cpu());
        put_cpu();
        on_each_cpu(__profile_flip_buffers, NULL, 1);
        for_each_online_cpu(cpu) {
                struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
                memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
        }
        mutex_unlock(&profile_flip_mutex);
}

static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
{
        unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
        int i, j, cpu;
        struct profile_hit *hits;

        pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
        i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
        secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
        cpu = get_cpu();
        hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
        if (!hits) {
                put_cpu();
                return;
        }
        /*
         * We buffer the global profiler buffer into a per-CPU
         * queue and thus reduce the number of global (and possibly
         * NUMA-alien) accesses. The write-queue is self-coalescing:
         */
        local_irq_save(flags);
        do {
                for (j = 0; j < PROFILE_GRPSZ; ++j) {
                        if (hits[i + j].pc == pc) {
                                hits[i + j].hits += nr_hits;
                                goto out;
                        } else if (!hits[i + j].hits) {
                                hits[i + j].pc = pc;
                                hits[i + j].hits = nr_hits;
                                goto out;
                        }
                }
                i = (i + secondary) & (NR_PROFILE_HIT - 1);
        } while (i != primary);

        /*
         * Add the current hit(s) and flush the write-queue out
         * to the global buffer:
         */
        atomic_add(nr_hits, &prof_buffer[pc]);
        for (i = 0; i < NR_PROFILE_HIT; ++i) {
                atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
                hits[i].pc = hits[i].hits = 0;
        }
out:
        local_irq_restore(flags);
        put_cpu();
}

static int profile_dead_cpu(unsigned int cpu)
{
        struct page *page;
        int i;

        if (cpumask_available(prof_cpu_mask))
                cpumask_clear_cpu(cpu, prof_cpu_mask);

        for (i = 0; i < 2; i++) {
                if (per_cpu(cpu_profile_hits, cpu)[i]) {
                        page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]);
                        per_cpu(cpu_profile_hits, cpu)[i] = NULL;
                        __free_page(page);
                }
        }
        return 0;
}

static int profile_prepare_cpu(unsigned int cpu)
{
        int i, node = cpu_to_mem(cpu);
        struct page *page;

        per_cpu(cpu_profile_flip, cpu) = 0;

        for (i = 0; i < 2; i++) {
                if (per_cpu(cpu_profile_hits, cpu)[i])
                        continue;

                page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
                if (!page) {
                        profile_dead_cpu(cpu);
                        return -ENOMEM;
                }
                per_cpu(cpu_profile_hits, cpu)[i] = page_address(page);

        }
        return 0;
}

static int profile_online_cpu(unsigned int cpu)
{
        if (cpumask_available(prof_cpu_mask))
                cpumask_set_cpu(cpu, prof_cpu_mask);

        return 0;
}

#else /* !CONFIG_SMP */
#define profile_flip_buffers()                do { } while (0)
#define profile_discard_flip_buffers()        do { } while (0)

static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
{
        unsigned long pc;
        pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
        atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
}
#endif /* !CONFIG_SMP */

void profile_hits(int type, void *__pc, unsigned int nr_hits)
{
        if (prof_on != type || !prof_buffer)
                return;
        do_profile_hits(type, __pc, nr_hits);
}
EXPORT_SYMBOL_GPL(profile_hits);

void profile_tick(int type)
{
        struct pt_regs *regs = get_irq_regs();

        if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
            cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
                profile_hit(type, (void *)profile_pc(regs));
}

#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>

/*
 * This function accesses profiling information. The returned data is
 * binary: the sampling step and the actual contents of the profile
 * buffer. Use of the program readprofile is recommended in order to
 * get meaningful info out of these data.
 */
static ssize_t
read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
        unsigned long p = *ppos;
        ssize_t read;
        char *pnt;
        unsigned long sample_step = 1UL << prof_shift;

        profile_flip_buffers();
        if (p >= (prof_len+1)*sizeof(unsigned int))
                return 0;
        if (count > (prof_len+1)*sizeof(unsigned int) - p)
                count = (prof_len+1)*sizeof(unsigned int) - p;
        read = 0;

        while (p < sizeof(unsigned int) && count > 0) {
                if (put_user(*((char *)(&sample_step)+p), buf))
                        return -EFAULT;
                buf++; p++; count--; read++;
        }
        pnt = (char *)prof_buffer + p - sizeof(atomic_t);
        if (copy_to_user(buf, (void *)pnt, count))
                return -EFAULT;
        read += count;
        *ppos += read;
        return read;
}

/* default is to not implement this call */
int __weak setup_profiling_timer(unsigned mult)
{
        return -EINVAL;
}

/*
 * Writing to /proc/profile resets the counters
 *
 * Writing a 'profiling multiplier' value into it also re-sets the profiling
 * interrupt frequency, on architectures that support this.
 */
static ssize_t write_profile(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
{
#ifdef CONFIG_SMP
        if (count == sizeof(int)) {
                unsigned int multiplier;

                if (copy_from_user(&multiplier, buf, sizeof(int)))
                        return -EFAULT;

                if (setup_profiling_timer(multiplier))
                        return -EINVAL;
        }
#endif
        profile_discard_flip_buffers();
        memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
        return count;
}

static const struct proc_ops profile_proc_ops = {
        .proc_read        = read_profile,
        .proc_write        = write_profile,
        .proc_lseek        = default_llseek,
};

int __ref create_proc_profile(void)
{
        struct proc_dir_entry *entry;
#ifdef CONFIG_SMP
        enum cpuhp_state online_state;
#endif

        int err = 0;

        if (!prof_on)
                return 0;
#ifdef CONFIG_SMP
        err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE",
                                profile_prepare_cpu, profile_dead_cpu);
        if (err)
                return err;

        err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE",
                                profile_online_cpu, NULL);
        if (err < 0)
                goto err_state_prep;
        online_state = err;
        err = 0;
#endif
        entry = proc_create("profile", S_IWUSR | S_IRUGO,
                            NULL, &profile_proc_ops);
        if (!entry)
                goto err_state_onl;
        proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));

        return err;
err_state_onl:
#ifdef CONFIG_SMP
        cpuhp_remove_state(online_state);
err_state_prep:
        cpuhp_remove_state(CPUHP_PROFILE_PREPARE);
#endif
        return err;
}
subsys_initcall(create_proc_profile);
#endif /* CONFIG_PROC_FS */




















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SECCOMP_H
#define _LINUX_SECCOMP_H

#include <uapi/linux/seccomp.h>
#include <linux/seccomp_types.h>

#define SECCOMP_FILTER_FLAG_MASK        (SECCOMP_FILTER_FLAG_TSYNC | \
                                         SECCOMP_FILTER_FLAG_LOG | \
                                         SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
                                         SECCOMP_FILTER_FLAG_NEW_LISTENER | \
                                         SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
                                         SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)

/* sizeof() the first published struct seccomp_notif_addfd */
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
#define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0

#ifdef CONFIG_SECCOMP

#include <linux/thread_info.h>
#include <linux/atomic.h>
#include <asm/seccomp.h>

#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
{
        if (unlikely(test_syscall_work(SECCOMP)))
                return  __secure_computing(NULL);
        return 0;
}
#else
extern void secure_computing_strict(int this_syscall);
#endif

extern long prctl_get_seccomp(void);
extern long prctl_set_seccomp(unsigned long, void __user *);

static inline int seccomp_mode(struct seccomp *s)
{
        return s->mode;
}

#else /* CONFIG_SECCOMP */

#include <linux/errno.h>

struct seccomp_data;

#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
static inline int secure_computing(void) { return 0; }
static inline int __secure_computing(const struct seccomp_data *sd) { return 0; }
#else
static inline void secure_computing_strict(int this_syscall) { return; }
#endif

static inline long prctl_get_seccomp(void)
{
        return -EINVAL;
}

static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3)
{
        return -EINVAL;
}

static inline int seccomp_mode(struct seccomp *s)
{
        return SECCOMP_MODE_DISABLED;
}
#endif /* CONFIG_SECCOMP */

#ifdef CONFIG_SECCOMP_FILTER
extern void seccomp_filter_release(struct task_struct *tsk);
extern void get_seccomp_filter(struct task_struct *tsk);
#else  /* CONFIG_SECCOMP_FILTER */
static inline void seccomp_filter_release(struct task_struct *tsk)
{
        return;
}
static inline void get_seccomp_filter(struct task_struct *tsk)
{
        return;
}
#endif /* CONFIG_SECCOMP_FILTER */

#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
extern long seccomp_get_filter(struct task_struct *task,
                               unsigned long filter_off, void __user *data);
extern long seccomp_get_metadata(struct task_struct *task,
                                 unsigned long filter_off, void __user *data);
#else
static inline long seccomp_get_filter(struct task_struct *task,
                                      unsigned long n, void __user *data)
{
        return -EINVAL;
}
static inline long seccomp_get_metadata(struct task_struct *task,
                                        unsigned long filter_off,
                                        void __user *data)
{
        return -EINVAL;
}
#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */

#ifdef CONFIG_SECCOMP_CACHE_DEBUG
struct seq_file;
struct pid_namespace;
struct pid;

int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
                           struct pid *pid, struct task_struct *task);
#endif
#endif /* _LINUX_SECCOMP_H */

























































   73 





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Queued spinlock
 *
 * A 'generic' spinlock implementation that is based on MCS locks. For an
 * architecture that's looking for a 'generic' spinlock, please first consider
 * ticket-lock.h and only come looking here when you've considered all the
 * constraints below and can show your hardware does actually perform better
 * with qspinlock.
 *
 * qspinlock relies on atomic_*_release()/atomic_*_acquire() to be RCsc (or no
 * weaker than RCtso if you're power), where regular code only expects atomic_t
 * to be RCpc.
 *
 * qspinlock relies on a far greater (compared to asm-generic/spinlock.h) set
 * of atomic operations to behave well together, please audit them carefully to
 * ensure they all have forward progress. Many atomic operations may default to
 * cmpxchg() loops which will not have good forward progress properties on
 * LL/SC architectures.
 *
 * One notable example is atomic_fetch_or_acquire(), which x86 cannot (cheaply)
 * do. Carefully read the patches that introduced
 * queued_fetch_set_pending_acquire().
 *
 * qspinlock also heavily relies on mixed size atomic operations, in specific
 * it requires architectures to have xchg16; something which many LL/SC
 * architectures need to implement as a 32bit and+or in order to satisfy the
 * forward progress guarantees mentioned above.
 *
 * Further reading on mixed size atomics that might be relevant:
 *
 *   http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf
 *
 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
 * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
 *
 * Authors: Waiman Long <waiman.long@hpe.com>
 */
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H

#include <asm-generic/qspinlock_types.h>
#include <linux/atomic.h>

#ifndef queued_spin_is_locked
/**
 * queued_spin_is_locked - is the spinlock locked?
 * @lock: Pointer to queued spinlock structure
 * Return: 1 if it is locked, 0 otherwise
 */
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
{
        /*
         * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
         * isn't immediately observable.
         */
        return atomic_read(&lock->val);
}
#endif

/**
 * queued_spin_value_unlocked - is the spinlock structure unlocked?
 * @lock: queued spinlock structure
 * Return: 1 if it is unlocked, 0 otherwise
 *
 * N.B. Whenever there are tasks waiting for the lock, it is considered
 *      locked wrt the lockref code to avoid lock stealing by the lockref
 *      code and change things underneath the lock. This also allows some
 *      optimizations to be applied without conflict with lockref.
 */
static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
{
        return !lock.val.counter;
}

/**
 * queued_spin_is_contended - check if the lock is contended
 * @lock : Pointer to queued spinlock structure
 * Return: 1 if lock contended, 0 otherwise
 */
static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
{
        return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
}
/**
 * queued_spin_trylock - try to acquire the queued spinlock
 * @lock : Pointer to queued spinlock structure
 * Return: 1 if lock acquired, 0 if failed
 */
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
        int val = atomic_read(&lock->val);

        if (unlikely(val))
                return 0;

        return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
}

extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);

#ifndef queued_spin_lock
/**
 * queued_spin_lock - acquire a queued spinlock
 * @lock: Pointer to queued spinlock structure
 */
static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
        int val = 0;

        if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
                return;

        queued_spin_lock_slowpath(lock, val);
}
#endif

#ifndef queued_spin_unlock
/**
 * queued_spin_unlock - release a queued spinlock
 * @lock : Pointer to queued spinlock structure
 */
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
{
        /*
         * unlock() needs release semantics:
         */
        smp_store_release(&lock->locked, 0);
}
#endif

#ifndef virt_spin_lock
static __always_inline bool virt_spin_lock(struct qspinlock *lock)
{
        return false;
}
#endif

/*
 * Remapping spinlock architecture specific functions to the corresponding
 * queued spinlock functions.
 */
#define arch_spin_is_locked(l)                queued_spin_is_locked(l)
#define arch_spin_is_contended(l)        queued_spin_is_contended(l)
#define arch_spin_value_unlocked(l)        queued_spin_value_unlocked(l)
#define arch_spin_lock(l)                queued_spin_lock(l)
#define arch_spin_trylock(l)                queued_spin_trylock(l)
#define arch_spin_unlock(l)                queued_spin_unlock(l)

#endif /* __ASM_GENERIC_QSPINLOCK_H */



































































  107 


























  109 














   90 



   89 









  109 







  107 







































































































































   97 

















































   90 






   90 




















































































































































































































   90 






   90 






































































































































































































































































































































































































































































































































































































































    4 












    4 










    4 




































































































































































































































































































    5 





   47 
























































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PGTABLE_H
#define _LINUX_PGTABLE_H

#include <linux/pfn.h>
#include <asm/pgtable.h>

#define PMD_ORDER        (PMD_SHIFT - PAGE_SHIFT)
#define PUD_ORDER        (PUD_SHIFT - PAGE_SHIFT)

#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU

#include <linux/mm_types.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <asm-generic/pgtable_uffd.h>
#include <linux/page_table_check.h>

#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
        defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
#endif

/*
 * On almost all architectures and configurations, 0 can be used as the
 * upper ceiling to free_pgtables(): on many architectures it has the same
 * effect as using TASK_SIZE.  However, there is one configuration which
 * must impose a more careful limit, to avoid freeing kernel pgtables.
 */
#ifndef USER_PGTABLES_CEILING
#define USER_PGTABLES_CEILING        0UL
#endif

/*
 * This defines the first usable user address. Platforms
 * can override its value with custom FIRST_USER_ADDRESS
 * defined in their respective <asm/pgtable.h>.
 */
#ifndef FIRST_USER_ADDRESS
#define FIRST_USER_ADDRESS        0UL
#endif

/*
 * This defines the generic helper for accessing PMD page
 * table page. Although platforms can still override this
 * via their respective <asm/pgtable.h>.
 */
#ifndef pmd_pgtable
#define pmd_pgtable(pmd) pmd_page(pmd)
#endif

/*
 * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
 *
 * The pXx_index() functions return the index of the entry in the page
 * table page which would control the given virtual address
 *
 * As these functions may be used by the same code for different levels of
 * the page table folding, they are always available, regardless of
 * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
 * because in such cases PTRS_PER_PxD equals 1.
 */

static inline unsigned long pte_index(unsigned long address)
{
        return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}

#ifndef pmd_index
static inline unsigned long pmd_index(unsigned long address)
{
        return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
#define pmd_index pmd_index
#endif

#ifndef pud_index
static inline unsigned long pud_index(unsigned long address)
{
        return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
#define pud_index pud_index
#endif

#ifndef pgd_index
/* Must be a compile-time constant, so implement it as a macro */
#define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
#endif

#ifndef pte_offset_kernel
static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
{
        return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
}
#define pte_offset_kernel pte_offset_kernel
#endif

#ifdef CONFIG_HIGHPTE
#define __pte_map(pmd, address) \
        ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
#define pte_unmap(pte)        do {        \
        kunmap_local((pte));        \
        rcu_read_unlock();        \
} while (0)
#else
static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
{
        return pte_offset_kernel(pmd, address);
}
static inline void pte_unmap(pte_t *pte)
{
        rcu_read_unlock();
}
#endif

void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);

/* Find an entry in the second-level page table.. */
#ifndef pmd_offset
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
        return pud_pgtable(*pud) + pmd_index(address);
}
#define pmd_offset pmd_offset
#endif

#ifndef pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
        return p4d_pgtable(*p4d) + pud_index(address);
}
#define pud_offset pud_offset
#endif

static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
{
        return (pgd + pgd_index(address));
};

/*
 * a shortcut to get a pgd_t in a given mm
 */
#ifndef pgd_offset
#define pgd_offset(mm, address)                pgd_offset_pgd((mm)->pgd, (address))
#endif

/*
 * a shortcut which implies the use of the kernel's pgd, instead
 * of a process's
 */
#ifndef pgd_offset_k
#define pgd_offset_k(address)                pgd_offset(&init_mm, (address))
#endif

/*
 * In many cases it is known that a virtual address is mapped at PMD or PTE
 * level, so instead of traversing all the page table levels, we can get a
 * pointer to the PMD entry in user or kernel page table or translate a virtual
 * address to the pointer in the PTE in the kernel page tables with simple
 * helpers.
 */
static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
{
        return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va);
}

static inline pmd_t *pmd_off_k(unsigned long va)
{
        return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va);
}

static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
        pmd_t *pmd = pmd_off_k(vaddr);

        return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
}

#ifndef pmd_young
static inline int pmd_young(pmd_t pmd)
{
        return 0;
}
#endif

#ifndef pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
        return 0;
}
#endif

/*
 * A facility to provide lazy MMU batching.  This allows PTE updates and
 * page invalidations to be delayed until a call to leave lazy MMU mode
 * is issued.  Some architectures may benefit from doing this, and it is
 * beneficial for both shadow and direct mode hypervisors, which may batch
 * the PTE updates which happen during this window.  Note that using this
 * interface requires that read hazards be removed from the code.  A read
 * hazard could result in the direct mode hypervisor case, since the actual
 * write to the page tables may not yet have taken place, so reads though
 * a raw PTE pointer after it has been modified are not guaranteed to be
 * up to date.  This mode can only be entered and left under the protection of
 * the page table locks for all page tables which may be modified.  In the UP
 * case, this is required so that preemption is disabled, and in the SMP case,
 * it must synchronize the delayed page table writes properly on other CPUs.
 */
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
#define arch_enter_lazy_mmu_mode()        do {} while (0)
#define arch_leave_lazy_mmu_mode()        do {} while (0)
#define arch_flush_lazy_mmu_mode()        do {} while (0)
#endif

#ifndef pte_batch_hint
/**
 * pte_batch_hint - Number of pages that can be added to batch without scanning.
 * @ptep: Page table pointer for the entry.
 * @pte: Page table entry.
 *
 * Some architectures know that a set of contiguous ptes all map the same
 * contiguous memory with the same permissions. In this case, it can provide a
 * hint to aid pte batching without the core code needing to scan every pte.
 *
 * An architecture implementation may ignore the PTE accessed state. Further,
 * the dirty state must apply atomically to all the PTEs described by the hint.
 *
 * May be overridden by the architecture, else pte_batch_hint is always 1.
 */
static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
{
        return 1;
}
#endif

#ifndef pte_advance_pfn
static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
{
        return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
#endif

#define pte_next_pfn(pte) pte_advance_pfn(pte, 1)

#ifndef set_ptes
/**
 * set_ptes - Map consecutive pages to a contiguous range of addresses.
 * @mm: Address space to map the pages into.
 * @addr: Address to map the first page at.
 * @ptep: Page table pointer for the first entry.
 * @pte: Page table entry for the first page.
 * @nr: Number of pages to map.
 *
 * When nr==1, initial state of pte may be present or not present, and new state
 * may be present or not present. When nr>1, initial state of all ptes must be
 * not present, and new state must be present.
 *
 * May be overridden by the architecture, or the architecture can define
 * set_pte() and PFN_PTE_SHIFT.
 *
 * Context: The caller holds the page table lock.  The pages all belong
 * to the same folio.  The PTEs are all in the same PMD.
 */
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, pte_t pte, unsigned int nr)
{
        page_table_check_ptes_set(mm, ptep, pte, nr);

        arch_enter_lazy_mmu_mode();
        for (;;) {
                set_pte(ptep, pte);
                if (--nr == 0)
                        break;
                ptep++;
                pte = pte_next_pfn(pte);
        }
        arch_leave_lazy_mmu_mode();
}
#endif
#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)

#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pte_t *ptep,
                                 pte_t entry, int dirty);
#endif

#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pmd_t *pmdp,
                                 pmd_t entry, int dirty);
extern int pudp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pud_t *pudp,
                                 pud_t entry, int dirty);
#else
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
                                        unsigned long address, pmd_t *pmdp,
                                        pmd_t entry, int dirty)
{
        BUILD_BUG();
        return 0;
}
static inline int pudp_set_access_flags(struct vm_area_struct *vma,
                                        unsigned long address, pud_t *pudp,
                                        pud_t entry, int dirty)
{
        BUILD_BUG();
        return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

#ifndef ptep_get
static inline pte_t ptep_get(pte_t *ptep)
{
        return READ_ONCE(*ptep);
}
#endif

#ifndef pmdp_get
static inline pmd_t pmdp_get(pmd_t *pmdp)
{
        return READ_ONCE(*pmdp);
}
#endif

#ifndef pudp_get
static inline pud_t pudp_get(pud_t *pudp)
{
        return READ_ONCE(*pudp);
}
#endif

#ifndef p4dp_get
static inline p4d_t p4dp_get(p4d_t *p4dp)
{
        return READ_ONCE(*p4dp);
}
#endif

#ifndef pgdp_get
static inline pgd_t pgdp_get(pgd_t *pgdp)
{
        return READ_ONCE(*pgdp);
}
#endif

#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
                                            unsigned long address,
                                            pte_t *ptep)
{
        pte_t pte = ptep_get(ptep);
        int r = 1;
        if (!pte_young(pte))
                r = 0;
        else
                set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
        return r;
}
#endif

#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                                            unsigned long address,
                                            pmd_t *pmdp)
{
        pmd_t pmd = *pmdp;
        int r = 1;
        if (!pmd_young(pmd))
                r = 0;
        else
                set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
        return r;
}
#else
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                                            unsigned long address,
                                            pmd_t *pmdp)
{
        BUILD_BUG();
        return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
int ptep_clear_flush_young(struct vm_area_struct *vma,
                           unsigned long address, pte_t *ptep);
#endif

#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
                                  unsigned long address, pmd_t *pmdp);
#else
/*
 * Despite relevant to THP only, this API is called from generic rmap code
 * under PageTransHuge(), hence needs a dummy implementation for !THP
 */
static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
                                         unsigned long address, pmd_t *pmdp)
{
        BUILD_BUG();
        return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

#ifndef arch_has_hw_nonleaf_pmd_young
/*
 * Return whether the accessed bit in non-leaf PMD entries is supported on the
 * local CPU.
 */
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
        return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
}
#endif

#ifndef arch_has_hw_pte_young
/*
 * Return whether the accessed bit is supported on the local CPU.
 *
 * This stub assumes accessing through an old PTE triggers a page fault.
 * Architectures that automatically set the access bit should overwrite it.
 */
static inline bool arch_has_hw_pte_young(void)
{
        return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
}
#endif

#ifndef arch_check_zapped_pte
static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
                                         pte_t pte)
{
}
#endif

#ifndef arch_check_zapped_pmd
static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
                                         pmd_t pmd)
{
}
#endif

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
                                       unsigned long address,
                                       pte_t *ptep)
{
        pte_t pte = ptep_get(ptep);
        pte_clear(mm, address, ptep);
        page_table_check_pte_clear(mm, pte);
        return pte;
}
#endif

static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep)
{
        ptep_get_and_clear(mm, addr, ptep);
}

#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
/*
 * For walking the pagetables without holding any locks.  Some architectures
 * (eg x86-32 PAE) cannot load the entries atomically without using expensive
 * instructions.  We are guaranteed that a PTE will only either go from not
 * present to present, or present to not present -- it will not switch to a
 * completely different present page without a TLB flush inbetween; which we
 * are blocking by holding interrupts off.
 *
 * Setting ptes from not present to present goes:
 *
 *   ptep->pte_high = h;
 *   smp_wmb();
 *   ptep->pte_low = l;
 *
 * And present to not present goes:
 *
 *   ptep->pte_low = 0;
 *   smp_wmb();
 *   ptep->pte_high = 0;
 *
 * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
 * We load pte_high *after* loading pte_low, which ensures we don't see an older
 * value of pte_high.  *Then* we recheck pte_low, which ensures that we haven't
 * picked up a changed pte high. We might have gotten rubbish values from
 * pte_low and pte_high, but we are guaranteed that pte_low will not have the
 * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
 * operates on present ptes we're safe.
 */
static inline pte_t ptep_get_lockless(pte_t *ptep)
{
        pte_t pte;

        do {
                pte.pte_low = ptep->pte_low;
                smp_rmb();
                pte.pte_high = ptep->pte_high;
                smp_rmb();
        } while (unlikely(pte.pte_low != ptep->pte_low));

        return pte;
}
#define ptep_get_lockless ptep_get_lockless

#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
{
        pmd_t pmd;

        do {
                pmd.pmd_low = pmdp->pmd_low;
                smp_rmb();
                pmd.pmd_high = pmdp->pmd_high;
                smp_rmb();
        } while (unlikely(pmd.pmd_low != pmdp->pmd_low));

        return pmd;
}
#define pmdp_get_lockless pmdp_get_lockless
#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */

/*
 * We require that the PTE can be read atomically.
 */
#ifndef ptep_get_lockless
static inline pte_t ptep_get_lockless(pte_t *ptep)
{
        return ptep_get(ptep);
}
#endif

#ifndef pmdp_get_lockless
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
{
        return pmdp_get(pmdp);
}
static inline void pmdp_get_lockless_sync(void)
{
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
                                            unsigned long address,
                                            pmd_t *pmdp)
{
        pmd_t pmd = *pmdp;

        pmd_clear(pmdp);
        page_table_check_pmd_clear(mm, pmd);

        return pmd;
}
#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
                                            unsigned long address,
                                            pud_t *pudp)
{
        pud_t pud = *pudp;

        pud_clear(pudp);
        page_table_check_pud_clear(mm, pud);

        return pud;
}
#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
                                            unsigned long address, pmd_t *pmdp,
                                            int full)
{
        return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
}
#endif

#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
                                            unsigned long address, pud_t *pudp,
                                            int full)
{
        return pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
                                            unsigned long address, pte_t *ptep,
                                            int full)
{
        return ptep_get_and_clear(mm, address, ptep);
}
#endif

#ifndef get_and_clear_full_ptes
/**
 * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
 *                             the same folio, collecting dirty/accessed bits.
 * @mm: Address space the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries to clear.
 * @full: Whether we are clearing a full mm.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
 * returned PTE.
 *
 * Note that PTE bits in the PTE range besides the PFN can differ. For example,
 * some PTEs might be write-protected.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio.  The PTEs are all in the same PMD.
 */
static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
                unsigned long addr, pte_t *ptep, unsigned int nr, int full)
{
        pte_t pte, tmp_pte;

        pte = ptep_get_and_clear_full(mm, addr, ptep, full);
        while (--nr) {
                ptep++;
                addr += PAGE_SIZE;
                tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
                if (pte_dirty(tmp_pte))
                        pte = pte_mkdirty(pte);
                if (pte_young(tmp_pte))
                        pte = pte_mkyoung(pte);
        }
        return pte;
}
#endif

#ifndef clear_full_ptes
/**
 * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
 *                     folio.
 * @mm: Address space the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries to clear.
 * @full: Whether we are clearing a full mm.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_get_and_clear_full().
 *
 * Note that PTE bits in the PTE range besides the PFN can differ. For example,
 * some PTEs might be write-protected.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio.  The PTEs are all in the same PMD.
 */
static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, unsigned int nr, int full)
{
        for (;;) {
                ptep_get_and_clear_full(mm, addr, ptep, full);
                if (--nr == 0)
                        break;
                ptep++;
                addr += PAGE_SIZE;
        }
}
#endif

/*
 * If two threads concurrently fault at the same page, the thread that
 * won the race updates the PTE and its local TLB/Cache. The other thread
 * gives up, simply does nothing, and continues; on architectures where
 * software can update TLB,  local TLB can be updated here to avoid next page
 * fault. This function updates TLB only, do nothing with cache or others.
 * It is the difference with function update_mmu_cache.
 */
#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
static inline void update_mmu_tlb(struct vm_area_struct *vma,
                                unsigned long address, pte_t *ptep)
{
}
#define __HAVE_ARCH_UPDATE_MMU_TLB
#endif

/*
 * Some architectures may be able to avoid expensive synchronization
 * primitives when modifications are made to PTE's which are already
 * not present, or in the process of an address space destruction.
 */
#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
static inline void pte_clear_not_present_full(struct mm_struct *mm,
                                              unsigned long address,
                                              pte_t *ptep,
                                              int full)
{
        pte_clear(mm, address, ptep);
}
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
                              unsigned long address,
                              pte_t *ptep);
#endif

#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
                              unsigned long address,
                              pmd_t *pmdp);
extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
                              unsigned long address,
                              pud_t *pudp);
#endif

#ifndef pte_mkwrite
static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
        return pte_mkwrite_novma(pte);
}
#endif

#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
        return pmd_mkwrite_novma(pmd);
}
#endif

#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
        pte_t old_pte = ptep_get(ptep);
        set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif

#ifndef wrprotect_ptes
/**
 * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
 *                    folio.
 * @mm: Address space the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries to write-protect.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_set_wrprotect().
 *
 * Note that PTE bits in the PTE range besides the PFN can differ. For example,
 * some PTEs might be write-protected.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio.  The PTEs are all in the same PMD.
 */
static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, unsigned int nr)
{
        for (;;) {
                ptep_set_wrprotect(mm, addr, ptep);
                if (--nr == 0)
                        break;
                ptep++;
                addr += PAGE_SIZE;
        }
}
#endif

/*
 * On some architectures hardware does not set page access bit when accessing
 * memory page, it is responsibility of software setting this bit. It brings
 * out extra page fault penalty to track page access bit. For optimization page
 * access bit can be set during all page fault flow on these arches.
 * To be differentiate with macro pte_mkyoung, this macro is used on platforms
 * where software maintains page access bit.
 */
#ifndef pte_sw_mkyoung
static inline pte_t pte_sw_mkyoung(pte_t pte)
{
        return pte;
}
#define pte_sw_mkyoung        pte_sw_mkyoung
#endif

#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long address, pmd_t *pmdp)
{
        pmd_t old_pmd = *pmdp;
        set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
}
#else
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long address, pmd_t *pmdp)
{
        BUILD_BUG();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void pudp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long address, pud_t *pudp)
{
        pud_t old_pud = *pudp;

        set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
}
#else
static inline void pudp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long address, pud_t *pudp)
{
        BUILD_BUG();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
#endif

#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
                                 unsigned long address, pmd_t *pmdp);
#else
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
                                        unsigned long address,
                                        pmd_t *pmdp)
{
        BUILD_BUG();
        return *pmdp;
}
#define pmdp_collapse_flush pmdp_collapse_flush
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
                                       pgtable_t pgtable);
#endif

#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#endif

#ifndef arch_needs_pgtable_deposit
#define arch_needs_pgtable_deposit() (false)
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
 * This is an implementation of pmdp_establish() that is only suitable for an
 * architecture that doesn't have hardware dirty/accessed bits. In this case we
 * can't race with CPU which sets these bits and non-atomic approach is fine.
 */
static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
        pmd_t old_pmd = *pmdp;
        set_pmd_at(vma->vm_mm, address, pmdp, pmd);
        return old_pmd;
}
#endif

#ifndef __HAVE_ARCH_PMDP_INVALIDATE
extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                            pmd_t *pmdp);
#endif

#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD

/*
 * pmdp_invalidate_ad() invalidates the PMD while changing a transparent
 * hugepage mapping in the page tables. This function is similar to
 * pmdp_invalidate(), but should only be used if the access and dirty bits would
 * not be cleared by the software in the new PMD value. The function ensures
 * that hardware changes of the access and dirty bits updates would not be lost.
 *
 * Doing so can allow in certain architectures to avoid a TLB flush in most
 * cases. Yet, another TLB flush might be necessary later if the PMD update
 * itself requires such flush (e.g., if protection was set to be stricter). Yet,
 * even when a TLB flush is needed because of the update, the caller may be able
 * to batch these TLB flushing operations, so fewer TLB flush operations are
 * needed.
 */
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
                                unsigned long address, pmd_t *pmdp);
#endif

#ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
        return pte_val(pte_a) == pte_val(pte_b);
}
#endif

#ifndef __HAVE_ARCH_PTE_UNUSED
/*
 * Some architectures provide facilities to virtualization guests
 * so that they can flag allocated pages as unused. This allows the
 * host to transparently reclaim unused pages. This function returns
 * whether the pte's page is unused.
 */
static inline int pte_unused(pte_t pte)
{
        return 0;
}
#endif

#ifndef pte_access_permitted
#define pte_access_permitted(pte, write) \
        (pte_present(pte) && (!(write) || pte_write(pte)))
#endif

#ifndef pmd_access_permitted
#define pmd_access_permitted(pmd, write) \
        (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
#endif

#ifndef pud_access_permitted
#define pud_access_permitted(pud, write) \
        (pud_present(pud) && (!(write) || pud_write(pud)))
#endif

#ifndef p4d_access_permitted
#define p4d_access_permitted(p4d, write) \
        (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
#endif

#ifndef pgd_access_permitted
#define pgd_access_permitted(pgd, write) \
        (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
#endif

#ifndef __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
        return pmd_val(pmd_a) == pmd_val(pmd_b);
}
#endif

#ifndef pud_same
static inline int pud_same(pud_t pud_a, pud_t pud_b)
{
        return pud_val(pud_a) == pud_val(pud_b);
}
#define pud_same pud_same
#endif

#ifndef __HAVE_ARCH_P4D_SAME
static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
{
        return p4d_val(p4d_a) == p4d_val(p4d_b);
}
#endif

#ifndef __HAVE_ARCH_PGD_SAME
static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
{
        return pgd_val(pgd_a) == pgd_val(pgd_b);
}
#endif

/*
 * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
 * TLB flush will be required as a result of the "set". For example, use
 * in scenarios where it is known ahead of time that the routine is
 * setting non-present entries, or re-setting an existing entry to the
 * same value. Otherwise, use the typical "set" helpers and flush the
 * TLB.
 */
#define set_pte_safe(ptep, pte) \
({ \
        WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
        set_pte(ptep, pte); \
})

#define set_pmd_safe(pmdp, pmd) \
({ \
        WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
        set_pmd(pmdp, pmd); \
})

#define set_pud_safe(pudp, pud) \
({ \
        WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
        set_pud(pudp, pud); \
})

#define set_p4d_safe(p4dp, p4d) \
({ \
        WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
        set_p4d(p4dp, p4d); \
})

#define set_pgd_safe(pgdp, pgd) \
({ \
        WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
        set_pgd(pgdp, pgd); \
})

#ifndef __HAVE_ARCH_DO_SWAP_PAGE
/*
 * Some architectures support metadata associated with a page. When a
 * page is being swapped out, this metadata must be saved so it can be
 * restored when the page is swapped back in. SPARC M7 and newer
 * processors support an ADI (Application Data Integrity) tag for the
 * page as metadata for the page. arch_do_swap_page() can restore this
 * metadata when a page is swapped back in.
 */
static inline void arch_do_swap_page(struct mm_struct *mm,
                                     struct vm_area_struct *vma,
                                     unsigned long addr,
                                     pte_t pte, pte_t oldpte)
{

}
#endif

#ifndef __HAVE_ARCH_UNMAP_ONE
/*
 * Some architectures support metadata associated with a page. When a
 * page is being swapped out, this metadata must be saved so it can be
 * restored when the page is swapped back in. SPARC M7 and newer
 * processors support an ADI (Application Data Integrity) tag for the
 * page as metadata for the page. arch_unmap_one() can save this
 * metadata on a swap-out of a page.
 */
static inline int arch_unmap_one(struct mm_struct *mm,
                                  struct vm_area_struct *vma,
                                  unsigned long addr,
                                  pte_t orig_pte)
{
        return 0;
}
#endif

/*
 * Allow architectures to preserve additional metadata associated with
 * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
 * prototypes must be defined in the arch-specific asm/pgtable.h file.
 */
#ifndef __HAVE_ARCH_PREPARE_TO_SWAP
static inline int arch_prepare_to_swap(struct page *page)
{
        return 0;
}
#endif

#ifndef __HAVE_ARCH_SWAP_INVALIDATE
static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
{
}

static inline void arch_swap_invalidate_area(int type)
{
}
#endif

#ifndef __HAVE_ARCH_SWAP_RESTORE
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
}
#endif

#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
#define pgd_offset_gate(mm, addr)        pgd_offset(mm, addr)
#endif

#ifndef __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)        (pte)
#endif

#ifndef pte_accessible
# define pte_accessible(mm, pte)        ((void)(pte), 1)
#endif

#ifndef flush_tlb_fix_spurious_fault
#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
#endif

/*
 * When walking page tables, get the address of the next boundary,
 * or the end address of the range if that comes earlier.  Although no
 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 */

#define pgd_addr_end(addr, end)                                                \
({        unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;        \
        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
})

#ifndef p4d_addr_end
#define p4d_addr_end(addr, end)                                                \
({        unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;        \
        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
})
#endif

#ifndef pud_addr_end
#define pud_addr_end(addr, end)                                                \
({        unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;        \
        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
})
#endif

#ifndef pmd_addr_end
#define pmd_addr_end(addr, end)                                                \
({        unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;        \
        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
})
#endif

/*
 * When walking page tables, we usually want to skip any p?d_none entries;
 * and any p?d_bad entries - reporting the error before resetting to none.
 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 */
void pgd_clear_bad(pgd_t *);

#ifndef __PAGETABLE_P4D_FOLDED
void p4d_clear_bad(p4d_t *);
#else
#define p4d_clear_bad(p4d)        do { } while (0)
#endif

#ifndef __PAGETABLE_PUD_FOLDED
void pud_clear_bad(pud_t *);
#else
#define pud_clear_bad(p4d)        do { } while (0)
#endif

void pmd_clear_bad(pmd_t *);

static inline int pgd_none_or_clear_bad(pgd_t *pgd)
{
        if (pgd_none(*pgd))
                return 1;
        if (unlikely(pgd_bad(*pgd))) {
                pgd_clear_bad(pgd);
                return 1;
        }
        return 0;
}

static inline int p4d_none_or_clear_bad(p4d_t *p4d)
{
        if (p4d_none(*p4d))
                return 1;
        if (unlikely(p4d_bad(*p4d))) {
                p4d_clear_bad(p4d);
                return 1;
        }
        return 0;
}

static inline int pud_none_or_clear_bad(pud_t *pud)
{
        if (pud_none(*pud))
                return 1;
        if (unlikely(pud_bad(*pud))) {
                pud_clear_bad(pud);
                return 1;
        }
        return 0;
}

static inline int pmd_none_or_clear_bad(pmd_t *pmd)
{
        if (pmd_none(*pmd))
                return 1;
        if (unlikely(pmd_bad(*pmd))) {
                pmd_clear_bad(pmd);
                return 1;
        }
        return 0;
}

static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
                                             unsigned long addr,
                                             pte_t *ptep)
{
        /*
         * Get the current pte state, but zero it out to make it
         * non-present, preventing the hardware from asynchronously
         * updating it.
         */
        return ptep_get_and_clear(vma->vm_mm, addr, ptep);
}

static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
                                             unsigned long addr,
                                             pte_t *ptep, pte_t pte)
{
        /*
         * The pte is non-present, so there's no hardware state to
         * preserve.
         */
        set_pte_at(vma->vm_mm, addr, ptep, pte);
}

#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
/*
 * Start a pte protection read-modify-write transaction, which
 * protects against asynchronous hardware modifications to the pte.
 * The intention is not to prevent the hardware from making pte
 * updates, but to prevent any updates it may make from being lost.
 *
 * This does not protect against other software modifications of the
 * pte; the appropriate pte lock must be held over the transaction.
 *
 * Note that this interface is intended to be batchable, meaning that
 * ptep_modify_prot_commit may not actually update the pte, but merely
 * queue the update to be done at some later time.  The update must be
 * actually committed before the pte lock is released, however.
 */
static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
                                           unsigned long addr,
                                           pte_t *ptep)
{
        return __ptep_modify_prot_start(vma, addr, ptep);
}

/*
 * Commit an update to a pte, leaving any hardware-controlled bits in
 * the PTE unmodified.
 */
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
                                           unsigned long addr,
                                           pte_t *ptep, pte_t old_pte, pte_t pte)
{
        __ptep_modify_prot_commit(vma, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
#endif /* CONFIG_MMU */

/*
 * No-op macros that just return the current protection value. Defined here
 * because these macros can be used even if CONFIG_MMU is not defined.
 */

#ifndef pgprot_nx
#define pgprot_nx(prot)        (prot)
#endif

#ifndef pgprot_noncached
#define pgprot_noncached(prot)        (prot)
#endif

#ifndef pgprot_writecombine
#define pgprot_writecombine pgprot_noncached
#endif

#ifndef pgprot_writethrough
#define pgprot_writethrough pgprot_noncached
#endif

#ifndef pgprot_device
#define pgprot_device pgprot_noncached
#endif

#ifndef pgprot_mhp
#define pgprot_mhp(prot)        (prot)
#endif

#ifdef CONFIG_MMU
#ifndef pgprot_modify
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
        if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
                newprot = pgprot_noncached(newprot);
        if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
                newprot = pgprot_writecombine(newprot);
        if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
                newprot = pgprot_device(newprot);
        return newprot;
}
#endif
#endif /* CONFIG_MMU */

#ifndef pgprot_encrypted
#define pgprot_encrypted(prot)        (prot)
#endif

#ifndef pgprot_decrypted
#define pgprot_decrypted(prot)        (prot)
#endif

/*
 * A facility to provide batching of the reload of page tables and
 * other process state with the actual context switch code for
 * paravirtualized guests.  By convention, only one of the batched
 * update (lazy) modes (CPU, MMU) should be active at any given time,
 * entry should never be nested, and entry and exits should always be
 * paired.  This is for sanity of maintaining and reasoning about the
 * kernel code.  In this case, the exit (end of the context switch) is
 * in architecture-specific code, and so doesn't need a generic
 * definition.
 */
#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
#define arch_start_context_switch(prev)        do {} while (0)
#endif

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
        return pmd;
}

static inline int pmd_swp_soft_dirty(pmd_t pmd)
{
        return 0;
}

static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
{
        return pmd;
}
#endif
#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
static inline int pte_soft_dirty(pte_t pte)
{
        return 0;
}

static inline int pmd_soft_dirty(pmd_t pmd)
{
        return 0;
}

static inline pte_t pte_mksoft_dirty(pte_t pte)
{
        return pte;
}

static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{
        return pmd;
}

static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
        return pte;
}

static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
{
        return pmd;
}

static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
        return pte;
}

static inline int pte_swp_soft_dirty(pte_t pte)
{
        return 0;
}

static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
        return pte;
}

static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
        return pmd;
}

static inline int pmd_swp_soft_dirty(pmd_t pmd)
{
        return 0;
}

static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
{
        return pmd;
}
#endif

#ifndef __HAVE_PFNMAP_TRACKING
/*
 * Interfaces that can be used by architecture code to keep track of
 * memory type of pfn mappings specified by the remap_pfn_range,
 * vmf_insert_pfn.
 */

/*
 * track_pfn_remap is called when a _new_ pfn mapping is being established
 * by remap_pfn_range() for physical range indicated by pfn and size.
 */
static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
                                  unsigned long pfn, unsigned long addr,
                                  unsigned long size)
{
        return 0;
}

/*
 * track_pfn_insert is called when a _new_ single pfn is established
 * by vmf_insert_pfn().
 */
static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
                                    pfn_t pfn)
{
}

/*
 * track_pfn_copy is called when vma that is covering the pfnmap gets
 * copied through copy_page_range().
 */
static inline int track_pfn_copy(struct vm_area_struct *vma)
{
        return 0;
}

/*
 * untrack_pfn is called while unmapping a pfnmap for a region.
 * untrack can be called for a specific region indicated by pfn and size or
 * can be for the entire vma (in which case pfn, size are zero).
 */
static inline void untrack_pfn(struct vm_area_struct *vma,
                               unsigned long pfn, unsigned long size,
                               bool mm_wr_locked)
{
}

/*
 * untrack_pfn_clear is called while mremapping a pfnmap for a new region
 * or fails to copy pgtable during duplicate vm area.
 */
static inline void untrack_pfn_clear(struct vm_area_struct *vma)
{
}
#else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
                           unsigned long pfn, unsigned long addr,
                           unsigned long size);
extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
                             pfn_t pfn);
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
                        unsigned long size, bool mm_wr_locked);
extern void untrack_pfn_clear(struct vm_area_struct *vma);
#endif

#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
static inline int is_zero_pfn(unsigned long pfn)
{
        extern unsigned long zero_pfn;
        unsigned long offset_from_zero_pfn = pfn - zero_pfn;
        return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
}

#define my_zero_pfn(addr)        page_to_pfn(ZERO_PAGE(addr))

#else
static inline int is_zero_pfn(unsigned long pfn)
{
        extern unsigned long zero_pfn;
        return pfn == zero_pfn;
}

static inline unsigned long my_zero_pfn(unsigned long addr)
{
        extern unsigned long zero_pfn;
        return zero_pfn;
}
#endif
#else
static inline int is_zero_pfn(unsigned long pfn)
{
        return 0;
}

static inline unsigned long my_zero_pfn(unsigned long addr)
{
        return 0;
}
#endif /* CONFIG_MMU */

#ifdef CONFIG_MMU

#ifndef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_trans_huge(pmd_t pmd)
{
        return 0;
}
#ifndef pmd_write
static inline int pmd_write(pmd_t pmd)
{
        BUG();
        return 0;
}
#endif /* pmd_write */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#ifndef pud_write
static inline int pud_write(pud_t pud)
{
        BUG();
        return 0;
}
#endif /* pud_write */

#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline int pmd_devmap(pmd_t pmd)
{
        return 0;
}
static inline int pud_devmap(pud_t pud)
{
        return 0;
}
static inline int pgd_devmap(pgd_t pgd)
{
        return 0;
}
#endif

#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
        !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
static inline int pud_trans_huge(pud_t pud)
{
        return 0;
}
#endif

static inline int pud_trans_unstable(pud_t *pud)
{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
        defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
        pud_t pudval = READ_ONCE(*pud);

        if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
                return 1;
        if (unlikely(pud_bad(pudval))) {
                pud_clear_bad(pud);
                return 1;
        }
#endif
        return 0;
}

#ifndef CONFIG_NUMA_BALANCING
/*
 * In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
 * perfectly valid to indicate "no" in that case, which is why our default
 * implementation defaults to "always no".
 *
 * In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
 * page protection due to NUMA hinting. NUMA hinting faults only apply in
 * accessible VMAs.
 *
 * So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
 * looking at the VMA accessibility is sufficient.
 */
static inline int pte_protnone(pte_t pte)
{
        return 0;
}

static inline int pmd_protnone(pmd_t pmd)
{
        return 0;
}
#endif /* CONFIG_NUMA_BALANCING */

#endif /* CONFIG_MMU */

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP

#ifndef __PAGETABLE_P4D_FOLDED
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
void p4d_clear_huge(p4d_t *p4d);
#else
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
        return 0;
}
static inline void p4d_clear_huge(p4d_t *p4d) { }
#endif /* !__PAGETABLE_P4D_FOLDED */

int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
int pud_clear_huge(pud_t *pud);
int pmd_clear_huge(pmd_t *pmd);
int p4d_free_pud_page(p4d_t *p4d, unsigned long addr);
int pud_free_pmd_page(pud_t *pud, unsigned long addr);
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
#else        /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
        return 0;
}
static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
        return 0;
}
static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
        return 0;
}
static inline void p4d_clear_huge(p4d_t *p4d) { }
static inline int pud_clear_huge(pud_t *pud)
{
        return 0;
}
static inline int pmd_clear_huge(pmd_t *pmd)
{
        return 0;
}
static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
{
        return 0;
}
static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
        return 0;
}
static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
        return 0;
}
#endif        /* CONFIG_HAVE_ARCH_HUGE_VMAP */

#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
 * ARCHes with special requirements for evicting THP backing TLB entries can
 * implement this. Otherwise also, it can help optimize normal TLB flush in
 * THP regime. Stock flush_tlb_range() typically has optimization to nuke the
 * entire TLB if flush span is greater than a threshold, which will
 * likely be true for a single huge page. Thus a single THP flush will
 * invalidate the entire TLB which is not desirable.
 * e.g. see arch/arc: flush_pmd_tlb_range
 */
#define flush_pmd_tlb_range(vma, addr, end)        flush_tlb_range(vma, addr, end)
#define flush_pud_tlb_range(vma, addr, end)        flush_tlb_range(vma, addr, end)
#else
#define flush_pmd_tlb_range(vma, addr, end)        BUILD_BUG()
#define flush_pud_tlb_range(vma, addr, end)        BUILD_BUG()
#endif
#endif

struct file;
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                        unsigned long size, pgprot_t *vma_prot);

#ifndef CONFIG_X86_ESPFIX64
static inline void init_espfix_bsp(void) { }
#endif

extern void __init pgtable_cache_init(void);

#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
{
        return true;
}

static inline bool arch_has_pfn_modify_check(void)
{
        return false;
}
#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */

/*
 * Architecture PAGE_KERNEL_* fallbacks
 *
 * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
 * because they really don't support them, or the port needs to be updated to
 * reflect the required functionality. Below are a set of relatively safe
 * fallbacks, as best effort, which we can count on in lieu of the architectures
 * not defining them on their own yet.
 */

#ifndef PAGE_KERNEL_RO
# define PAGE_KERNEL_RO PAGE_KERNEL
#endif

#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif

/*
 * Page Table Modification bits for pgtbl_mod_mask.
 *
 * These are used by the p?d_alloc_track*() set of functions an in the generic
 * vmalloc/ioremap code to track at which page-table levels entries have been
 * modified. Based on that the code can better decide when vmalloc and ioremap
 * mapping changes need to be synchronized to other page-tables in the system.
 */
#define                __PGTBL_PGD_MODIFIED        0
#define                __PGTBL_P4D_MODIFIED        1
#define                __PGTBL_PUD_MODIFIED        2
#define                __PGTBL_PMD_MODIFIED        3
#define                __PGTBL_PTE_MODIFIED        4

#define                PGTBL_PGD_MODIFIED        BIT(__PGTBL_PGD_MODIFIED)
#define                PGTBL_P4D_MODIFIED        BIT(__PGTBL_P4D_MODIFIED)
#define                PGTBL_PUD_MODIFIED        BIT(__PGTBL_PUD_MODIFIED)
#define                PGTBL_PMD_MODIFIED        BIT(__PGTBL_PMD_MODIFIED)
#define                PGTBL_PTE_MODIFIED        BIT(__PGTBL_PTE_MODIFIED)

/* Page-Table Modification Mask */
typedef unsigned int pgtbl_mod_mask;

#endif /* !__ASSEMBLY__ */

#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
#ifdef CONFIG_PHYS_ADDR_T_64BIT
/*
 * ZSMALLOC needs to know the highest PFN on 32-bit architectures
 * with physical address space extension, but falls back to
 * BITS_PER_LONG otherwise.
 */
#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
#else
#define MAX_POSSIBLE_PHYSMEM_BITS 32
#endif
#endif

#ifndef has_transparent_hugepage
#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
#endif

#ifndef has_transparent_pud_hugepage
#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
#endif
/*
 * On some architectures it depends on the mm if the p4d/pud or pmd
 * layer of the page table hierarchy is folded or not.
 */
#ifndef mm_p4d_folded
#define mm_p4d_folded(mm)        __is_defined(__PAGETABLE_P4D_FOLDED)
#endif

#ifndef mm_pud_folded
#define mm_pud_folded(mm)        __is_defined(__PAGETABLE_PUD_FOLDED)
#endif

#ifndef mm_pmd_folded
#define mm_pmd_folded(mm)        __is_defined(__PAGETABLE_PMD_FOLDED)
#endif

#ifndef p4d_offset_lockless
#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
#endif
#ifndef pud_offset_lockless
#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
#endif
#ifndef pmd_offset_lockless
#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
#endif

/*
 * p?d_leaf() - true if this entry is a final mapping to a physical address.
 * This differs from p?d_huge() by the fact that they are always available (if
 * the architecture supports large pages at the appropriate level) even
 * if CONFIG_HUGETLB_PAGE is not defined.
 * Only meaningful when called on a valid entry.
 */
#ifndef pgd_leaf
#define pgd_leaf(x)        false
#endif
#ifndef p4d_leaf
#define p4d_leaf(x)        false
#endif
#ifndef pud_leaf
#define pud_leaf(x)        false
#endif
#ifndef pmd_leaf
#define pmd_leaf(x)        false
#endif

#ifndef pgd_leaf_size
#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
#endif
#ifndef p4d_leaf_size
#define p4d_leaf_size(x) P4D_SIZE
#endif
#ifndef pud_leaf_size
#define pud_leaf_size(x) PUD_SIZE
#endif
#ifndef pmd_leaf_size
#define pmd_leaf_size(x) PMD_SIZE
#endif
#ifndef pte_leaf_size
#define pte_leaf_size(x) PAGE_SIZE
#endif

/*
 * Some architectures have MMUs that are configurable or selectable at boot
 * time. These lead to variable PTRS_PER_x. For statically allocated arrays it
 * helps to have a static maximum value.
 */

#ifndef MAX_PTRS_PER_PTE
#define MAX_PTRS_PER_PTE PTRS_PER_PTE
#endif

#ifndef MAX_PTRS_PER_PMD
#define MAX_PTRS_PER_PMD PTRS_PER_PMD
#endif

#ifndef MAX_PTRS_PER_PUD
#define MAX_PTRS_PER_PUD PTRS_PER_PUD
#endif

#ifndef MAX_PTRS_PER_P4D
#define MAX_PTRS_PER_P4D PTRS_PER_P4D
#endif

/* description of effects of mapping type and prot in current implementation.
 * this is due to the limited x86 page protection hardware.  The expected
 * behavior is in parens:
 *
 * map_type        prot
 *                PROT_NONE        PROT_READ        PROT_WRITE        PROT_EXEC
 * MAP_SHARED        r: (no) no        r: (yes) yes        r: (no) yes        r: (no) yes
 *                w: (no) no        w: (no) no        w: (yes) yes        w: (no) no
 *                x: (no) no        x: (no) yes        x: (no) yes        x: (yes) yes
 *
 * MAP_PRIVATE        r: (no) no        r: (yes) yes        r: (no) yes        r: (no) yes
 *                w: (no) no        w: (no) no        w: (copy) copy        w: (no) no
 *                x: (no) no        x: (no) yes        x: (no) yes        x: (yes) yes
 *
 * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
 * MAP_PRIVATE (with Enhanced PAN supported):
 *                                                                r: (no) no
 *                                                                w: (no) no
 *                                                                x: (yes) yes
 */
#define DECLARE_VM_GET_PAGE_PROT                                        \
pgprot_t vm_get_page_prot(unsigned long vm_flags)                        \
{                                                                        \
                return protection_map[vm_flags &                        \
                        (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)];        \
}                                                                        \
EXPORT_SYMBOL(vm_get_page_prot);

#endif /* _LINUX_PGTABLE_H */






















   61 































































































































   49 










   50 
   48 
   50 


   50 






   62 
   48 















  108 
  112 
  110 










  112 




   50 




   50 

   50 
   50 






   50 
   49 











   49 



   49 

























































































































































































































































   51 
   51 









   48 

   51 



    1 




   49 















   51 
   51 
   51 













































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
 */

#include <linux/dcache.h>
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/srcu.h>

#include <linux/fsnotify_backend.h>
#include "fsnotify.h"

/*
 * Clear all of the marks on an inode when it is being evicted from core
 */
void __fsnotify_inode_delete(struct inode *inode)
{
        fsnotify_clear_marks_by_inode(inode);
}
EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);

void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
        fsnotify_clear_marks_by_mount(mnt);
}

/**
 * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
 * @sb: superblock being unmounted.
 *
 * Called during unmount with no locks held, so needs to be safe against
 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
 */
static void fsnotify_unmount_inodes(struct super_block *sb)
{
        struct inode *inode, *iput_inode = NULL;

        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                /*
                 * We cannot __iget() an inode in state I_FREEING,
                 * I_WILL_FREE, or I_NEW which is fine because by that point
                 * the inode cannot have any associated watches.
                 */
                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }

                /*
                 * If i_count is zero, the inode cannot have any watches and
                 * doing an __iget/iput with SB_ACTIVE clear would actually
                 * evict all inodes with zero i_count from icache which is
                 * unnecessarily violent and may in fact be illegal to do.
                 * However, we should have been called /after/ evict_inodes
                 * removed all zero refcount inodes, in any case.  Test to
                 * be sure.
                 */
                if (!atomic_read(&inode->i_count)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }

                __iget(inode);
                spin_unlock(&inode->i_lock);
                spin_unlock(&sb->s_inode_list_lock);

                iput(iput_inode);

                /* for each watch, send FS_UNMOUNT and then remove it */
                fsnotify_inode(inode, FS_UNMOUNT);

                fsnotify_inode_delete(inode);

                iput_inode = inode;

                cond_resched();
                spin_lock(&sb->s_inode_list_lock);
        }
        spin_unlock(&sb->s_inode_list_lock);

        iput(iput_inode);
}

void fsnotify_sb_delete(struct super_block *sb)
{
        fsnotify_unmount_inodes(sb);
        fsnotify_clear_marks_by_sb(sb);
        /* Wait for outstanding object references from connectors */
        wait_var_event(&sb->s_fsnotify_connectors,
                       !atomic_long_read(&sb->s_fsnotify_connectors));
}

/*
 * Given an inode, first check if we care what happens to our children.  Inotify
 * and dnotify both tell their parents about events.  If we care about any event
 * on a child we run all of our children and set a dentry flag saying that the
 * parent cares.  Thus when an event happens on a child it can quickly tell
 * if there is a need to find a parent and send the event to the parent.
 */
void __fsnotify_update_child_dentry_flags(struct inode *inode)
{
        struct dentry *alias;
        int watched;

        if (!S_ISDIR(inode->i_mode))
                return;

        /* determine if the children should tell inode about their events */
        watched = fsnotify_inode_watches_children(inode);

        spin_lock(&inode->i_lock);
        /* run all of the dentries associated with this inode.  Since this is a
         * directory, there damn well better only be one item on this list */
        hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
                struct dentry *child;

                /* run all of the children of the original inode and fix their
                 * d_flags to indicate parental interest (their parent is the
                 * original inode) */
                spin_lock(&alias->d_lock);
                hlist_for_each_entry(child, &alias->d_children, d_sib) {
                        if (!child->d_inode)
                                continue;

                        spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
                        if (watched)
                                child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
                        else
                                child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
                        spin_unlock(&child->d_lock);
                }
                spin_unlock(&alias->d_lock);
        }
        spin_unlock(&inode->i_lock);
}

/* Are inode/sb/mount interested in parent and name info with this event? */
static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
                                        __u32 mask)
{
        __u32 marks_mask = 0;

        /* We only send parent/name to inode/sb/mount for events on non-dir */
        if (mask & FS_ISDIR)
                return false;

        /*
         * All events that are possible on child can also may be reported with
         * parent/name info to inode/sb/mount.  Otherwise, a watching parent
         * could result in events reported with unexpected name info to sb/mount.
         */
        BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);

        /* Did either inode/sb/mount subscribe for events with parent/name? */
        marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
        marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
        marks_mask |= fsnotify_parent_needed_mask(mnt_mask);

        /* Did they subscribe for this event with parent/name info? */
        return mask & marks_mask;
}

/* Are there any inode/mount/sb objects that are interested in this event? */
static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
                                           __u32 mask)
{
        __u32 marks_mask = inode->i_fsnotify_mask | mnt_mask |
                           inode->i_sb->s_fsnotify_mask;

        return mask & marks_mask & ALL_FSNOTIFY_EVENTS;
}

/*
 * Notify this dentry's parent about a child's events with child name info
 * if parent is watching or if inode/sb/mount are interested in events with
 * parent and name info.
 *
 * Notify only the child without name info if parent is not watching and
 * inode/sb/mount are not interested in events with parent and name info.
 */
int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
                      int data_type)
{
        const struct path *path = fsnotify_data_path(data, data_type);
        __u32 mnt_mask = path ? real_mount(path->mnt)->mnt_fsnotify_mask : 0;
        struct inode *inode = d_inode(dentry);
        struct dentry *parent;
        bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
        bool parent_needed, parent_interested;
        __u32 p_mask;
        struct inode *p_inode = NULL;
        struct name_snapshot name;
        struct qstr *file_name = NULL;
        int ret = 0;

        /* Optimize the likely case of nobody watching this path */
        if (likely(!parent_watched &&
                   !fsnotify_object_watched(inode, mnt_mask, mask)))
                return 0;

        parent = NULL;
        parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask);
        if (!parent_watched && !parent_needed)
                goto notify;

        /* Does parent inode care about events on children? */
        parent = dget_parent(dentry);
        p_inode = parent->d_inode;
        p_mask = fsnotify_inode_watches_children(p_inode);
        if (unlikely(parent_watched && !p_mask))
                __fsnotify_update_child_dentry_flags(p_inode);

        /*
         * Include parent/name in notification either if some notification
         * groups require parent info or the parent is interested in this event.
         */
        parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
        if (parent_needed || parent_interested) {
                /* When notifying parent, child should be passed as data */
                WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));

                /* Notify both parent and child with child name info */
                take_dentry_name_snapshot(&name, dentry);
                file_name = &name.name;
                if (parent_interested)
                        mask |= FS_EVENT_ON_CHILD;
        }

notify:
        ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0);

        if (file_name)
                release_dentry_name_snapshot(&name);
        dput(parent);

        return ret;
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);

static int fsnotify_handle_inode_event(struct fsnotify_group *group,
                                       struct fsnotify_mark *inode_mark,
                                       u32 mask, const void *data, int data_type,
                                       struct inode *dir, const struct qstr *name,
                                       u32 cookie)
{
        const struct path *path = fsnotify_data_path(data, data_type);
        struct inode *inode = fsnotify_data_inode(data, data_type);
        const struct fsnotify_ops *ops = group->ops;

        if (WARN_ON_ONCE(!ops->handle_inode_event))
                return 0;

        if (WARN_ON_ONCE(!inode && !dir))
                return 0;

        if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
            path && d_unlinked(path->dentry))
                return 0;

        /* Check interest of this mark in case event was sent with two marks */
        if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
                return 0;

        return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
}

static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
                                 const void *data, int data_type,
                                 struct inode *dir, const struct qstr *name,
                                 u32 cookie, struct fsnotify_iter_info *iter_info)
{
        struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
        struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
        int ret;

        if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
            WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
                return 0;

        /*
         * For FS_RENAME, 'dir' is old dir and 'data' is new dentry.
         * The only ->handle_inode_event() backend that supports FS_RENAME is
         * dnotify, where it means file was renamed within same parent.
         */
        if (mask & FS_RENAME) {
                struct dentry *moved = fsnotify_data_dentry(data, data_type);

                if (dir != moved->d_parent->d_inode)
                        return 0;
        }

        if (parent_mark) {
                ret = fsnotify_handle_inode_event(group, parent_mark, mask,
                                                  data, data_type, dir, name, 0);
                if (ret)
                        return ret;
        }

        if (!inode_mark)
                return 0;

        if (mask & FS_EVENT_ON_CHILD) {
                /*
                 * Some events can be sent on both parent dir and child marks
                 * (e.g. FS_ATTRIB).  If both parent dir and child are
                 * watching, report the event once to parent dir with name (if
                 * interested) and once to child without name (if interested).
                 * The child watcher is expecting an event without a file name
                 * and without the FS_EVENT_ON_CHILD flag.
                 */
                mask &= ~FS_EVENT_ON_CHILD;
                dir = NULL;
                name = NULL;
        }

        return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
                                           dir, name, cookie);
}

static int send_to_group(__u32 mask, const void *data, int data_type,
                         struct inode *dir, const struct qstr *file_name,
                         u32 cookie, struct fsnotify_iter_info *iter_info)
{
        struct fsnotify_group *group = NULL;
        __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
        __u32 marks_mask = 0;
        __u32 marks_ignore_mask = 0;
        bool is_dir = mask & FS_ISDIR;
        struct fsnotify_mark *mark;
        int type;

        if (!iter_info->report_mask)
                return 0;

        /* clear ignored on inode modification */
        if (mask & FS_MODIFY) {
                fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
                        if (!(mark->flags &
                              FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
                                mark->ignore_mask = 0;
                }
        }

        /* Are any of the group marks interested in this event? */
        fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
                group = mark->group;
                marks_mask |= mark->mask;
                marks_ignore_mask |=
                        fsnotify_effective_ignore_mask(mark, is_dir, type);
        }

        pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
                 __func__, group, mask, marks_mask, marks_ignore_mask,
                 data, data_type, dir, cookie);

        if (!(test_mask & marks_mask & ~marks_ignore_mask))
                return 0;

        if (group->ops->handle_event) {
                return group->ops->handle_event(group, mask, data, data_type, dir,
                                                file_name, cookie, iter_info);
        }

        return fsnotify_handle_event(group, mask, data, data_type, dir,
                                     file_name, cookie, iter_info);
}

static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
{
        struct fsnotify_mark_connector *conn;
        struct hlist_node *node = NULL;

        conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
        if (conn)
                node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);

        return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
}

static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
{
        struct hlist_node *node = NULL;

        if (mark)
                node = srcu_dereference(mark->obj_list.next,
                                        &fsnotify_mark_srcu);

        return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
}

/*
 * iter_info is a multi head priority queue of marks.
 * Pick a subset of marks from queue heads, all with the same group
 * and set the report_mask to a subset of the selected marks.
 * Returns false if there are no more groups to iterate.
 */
static bool fsnotify_iter_select_report_types(
                struct fsnotify_iter_info *iter_info)
{
        struct fsnotify_group *max_prio_group = NULL;
        struct fsnotify_mark *mark;
        int type;

        /* Choose max prio group among groups of all queue heads */
        fsnotify_foreach_iter_type(type) {
                mark = iter_info->marks[type];
                if (mark &&
                    fsnotify_compare_groups(max_prio_group, mark->group) > 0)
                        max_prio_group = mark->group;
        }

        if (!max_prio_group)
                return false;

        /* Set the report mask for marks from same group as max prio group */
        iter_info->current_group = max_prio_group;
        iter_info->report_mask = 0;
        fsnotify_foreach_iter_type(type) {
                mark = iter_info->marks[type];
                if (mark && mark->group == iter_info->current_group) {
                        /*
                         * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
                         * is watching children and interested in this event,
                         * which is an event possible on child.
                         * But is *this mark* watching children?
                         */
                        if (type == FSNOTIFY_ITER_TYPE_PARENT &&
                            !(mark->mask & FS_EVENT_ON_CHILD) &&
                            !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
                                continue;

                        fsnotify_iter_set_report_type(iter_info, type);
                }
        }

        return true;
}

/*
 * Pop from iter_info multi head queue, the marks that belong to the group of
 * current iteration step.
 */
static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
{
        struct fsnotify_mark *mark;
        int type;

        /*
         * We cannot use fsnotify_foreach_iter_mark_type() here because we
         * may need to advance a mark of type X that belongs to current_group
         * but was not selected for reporting.
         */
        fsnotify_foreach_iter_type(type) {
                mark = iter_info->marks[type];
                if (mark && mark->group == iter_info->current_group)
                        iter_info->marks[type] =
                                fsnotify_next_mark(iter_info->marks[type]);
        }
}

/*
 * fsnotify - This is the main call to fsnotify.
 *
 * The VFS calls into hook specific functions in linux/fsnotify.h.
 * Those functions then in turn call here.  Here will call out to all of the
 * registered fsnotify_group.  Those groups can then use the notification event
 * in whatever means they feel necessary.
 *
 * @mask:        event type and flags
 * @data:        object that event happened on
 * @data_type:        type of object for fanotify_data_XXX() accessors
 * @dir:        optional directory associated with event -
 *                if @file_name is not NULL, this is the directory that
 *                @file_name is relative to
 * @file_name:        optional file name associated with event
 * @inode:        optional inode associated with event -
 *                If @dir and @inode are both non-NULL, event may be
 *                reported to both.
 * @cookie:        inotify rename cookie
 */
int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
             const struct qstr *file_name, struct inode *inode, u32 cookie)
{
        const struct path *path = fsnotify_data_path(data, data_type);
        struct super_block *sb = fsnotify_data_sb(data, data_type);
        struct fsnotify_iter_info iter_info = {};
        struct mount *mnt = NULL;
        struct inode *inode2 = NULL;
        struct dentry *moved;
        int inode2_type;
        int ret = 0;
        __u32 test_mask, marks_mask;

        if (path)
                mnt = real_mount(path->mnt);

        if (!inode) {
                /* Dirent event - report on TYPE_INODE to dir */
                inode = dir;
                /* For FS_RENAME, inode is old_dir and inode2 is new_dir */
                if (mask & FS_RENAME) {
                        moved = fsnotify_data_dentry(data, data_type);
                        inode2 = moved->d_parent->d_inode;
                        inode2_type = FSNOTIFY_ITER_TYPE_INODE2;
                }
        } else if (mask & FS_EVENT_ON_CHILD) {
                /*
                 * Event on child - report on TYPE_PARENT to dir if it is
                 * watching children and on TYPE_INODE to child.
                 */
                inode2 = dir;
                inode2_type = FSNOTIFY_ITER_TYPE_PARENT;
        }

        /*
         * Optimization: srcu_read_lock() has a memory barrier which can
         * be expensive.  It protects walking the *_fsnotify_marks lists.
         * However, if we do not walk the lists, we do not have to do
         * SRCU because we have no references to any objects and do not
         * need SRCU to keep them "alive".
         */
        if (!sb->s_fsnotify_marks &&
            (!mnt || !mnt->mnt_fsnotify_marks) &&
            (!inode || !inode->i_fsnotify_marks) &&
            (!inode2 || !inode2->i_fsnotify_marks))
                return 0;

        marks_mask = sb->s_fsnotify_mask;
        if (mnt)
                marks_mask |= mnt->mnt_fsnotify_mask;
        if (inode)
                marks_mask |= inode->i_fsnotify_mask;
        if (inode2)
                marks_mask |= inode2->i_fsnotify_mask;


        /*
         * If this is a modify event we may need to clear some ignore masks.
         * In that case, the object with ignore masks will have the FS_MODIFY
         * event in its mask.
         * Otherwise, return if none of the marks care about this type of event.
         */
        test_mask = (mask & ALL_FSNOTIFY_EVENTS);
        if (!(test_mask & marks_mask))
                return 0;

        iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);

        iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
                fsnotify_first_mark(&sb->s_fsnotify_marks);
        if (mnt) {
                iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
                        fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
        }
        if (inode) {
                iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] =
                        fsnotify_first_mark(&inode->i_fsnotify_marks);
        }
        if (inode2) {
                iter_info.marks[inode2_type] =
                        fsnotify_first_mark(&inode2->i_fsnotify_marks);
        }

        /*
         * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
         * ignore masks are properly reflected for mount/sb mark notifications.
         * That's why this traversal is so complicated...
         */
        while (fsnotify_iter_select_report_types(&iter_info)) {
                ret = send_to_group(mask, data, data_type, dir, file_name,
                                    cookie, &iter_info);

                if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
                        goto out;

                fsnotify_iter_next(&iter_info);
        }
        ret = 0;
out:
        srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);

        return ret;
}
EXPORT_SYMBOL_GPL(fsnotify);

static __init int fsnotify_init(void)
{
        int ret;

        BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);

        ret = init_srcu_struct(&fsnotify_mark_srcu);
        if (ret)
                panic("initializing fsnotify_mark_srcu");

        fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
                                                    SLAB_PANIC);

        return 0;
}
core_initcall(fsnotify_init);



















































































































































































































































































































































































































































































































































































































































































































































































































































    4 

































































    4 
    4 




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


    4 
















































































































    4 










    4 



    4 




    4 










    4 
    4 

    4 
    4 
    4 

















































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
// SPDX-License-Identifier: GPL-2.0-or-later
/* Virtio ring implementation.
 *
 *  Copyright 2007 Rusty Russell IBM Corporation
 */
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_config.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/hrtimer.h>
#include <linux/dma-mapping.h>
#include <linux/kmsan.h>
#include <linux/spinlock.h>
#include <xen/xen.h>

#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
#define BAD_RING(_vq, fmt, args...)                                \
        do {                                                        \
                dev_err(&(_vq)->vq.vdev->dev,                        \
                        "%s:"fmt, (_vq)->vq.name, ##args);        \
                BUG();                                                \
        } while (0)
/* Caller is supposed to guarantee no reentry. */
#define START_USE(_vq)                                                \
        do {                                                        \
                if ((_vq)->in_use)                                \
                        panic("%s:in_use = %i\n",                \
                              (_vq)->vq.name, (_vq)->in_use);        \
                (_vq)->in_use = __LINE__;                        \
        } while (0)
#define END_USE(_vq) \
        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
#define LAST_ADD_TIME_UPDATE(_vq)                                \
        do {                                                        \
                ktime_t now = ktime_get();                        \
                                                                \
                /* No kick or get, with .1 second between?  Warn. */ \
                if ((_vq)->last_add_time_valid)                        \
                        WARN_ON(ktime_to_ms(ktime_sub(now,        \
                                (_vq)->last_add_time)) > 100);        \
                (_vq)->last_add_time = now;                        \
                (_vq)->last_add_time_valid = true;                \
        } while (0)
#define LAST_ADD_TIME_CHECK(_vq)                                \
        do {                                                        \
                if ((_vq)->last_add_time_valid) {                \
                        WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
                                      (_vq)->last_add_time)) > 100); \
                }                                                \
        } while (0)
#define LAST_ADD_TIME_INVALID(_vq)                                \
        ((_vq)->last_add_time_valid = false)
#else
#define BAD_RING(_vq, fmt, args...)                                \
        do {                                                        \
                dev_err(&_vq->vq.vdev->dev,                        \
                        "%s:"fmt, (_vq)->vq.name, ##args);        \
                (_vq)->broken = true;                                \
        } while (0)
#define START_USE(vq)
#define END_USE(vq)
#define LAST_ADD_TIME_UPDATE(vq)
#define LAST_ADD_TIME_CHECK(vq)
#define LAST_ADD_TIME_INVALID(vq)
#endif

struct vring_desc_state_split {
        void *data;                        /* Data for callback. */
        struct vring_desc *indir_desc;        /* Indirect descriptor, if any. */
};

struct vring_desc_state_packed {
        void *data;                        /* Data for callback. */
        struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
        u16 num;                        /* Descriptor list length. */
        u16 last;                        /* The last desc state in a list. */
};

struct vring_desc_extra {
        dma_addr_t addr;                /* Descriptor DMA addr. */
        u32 len;                        /* Descriptor length. */
        u16 flags;                        /* Descriptor flags. */
        u16 next;                        /* The next desc state in a list. */
};

struct vring_virtqueue_split {
        /* Actual memory layout for this queue. */
        struct vring vring;

        /* Last written value to avail->flags */
        u16 avail_flags_shadow;

        /*
         * Last written value to avail->idx in
         * guest byte order.
         */
        u16 avail_idx_shadow;

        /* Per-descriptor state. */
        struct vring_desc_state_split *desc_state;
        struct vring_desc_extra *desc_extra;

        /* DMA address and size information */
        dma_addr_t queue_dma_addr;
        size_t queue_size_in_bytes;

        /*
         * The parameters for creating vrings are reserved for creating new
         * vring.
         */
        u32 vring_align;
        bool may_reduce_num;
};

struct vring_virtqueue_packed {
        /* Actual memory layout for this queue. */
        struct {
                unsigned int num;
                struct vring_packed_desc *desc;
                struct vring_packed_desc_event *driver;
                struct vring_packed_desc_event *device;
        } vring;

        /* Driver ring wrap counter. */
        bool avail_wrap_counter;

        /* Avail used flags. */
        u16 avail_used_flags;

        /* Index of the next avail descriptor. */
        u16 next_avail_idx;

        /*
         * Last written value to driver->flags in
         * guest byte order.
         */
        u16 event_flags_shadow;

        /* Per-descriptor state. */
        struct vring_desc_state_packed *desc_state;
        struct vring_desc_extra *desc_extra;

        /* DMA address and size information */
        dma_addr_t ring_dma_addr;
        dma_addr_t driver_event_dma_addr;
        dma_addr_t device_event_dma_addr;
        size_t ring_size_in_bytes;
        size_t event_size_in_bytes;
};

struct vring_virtqueue {
        struct virtqueue vq;

        /* Is this a packed ring? */
        bool packed_ring;

        /* Is DMA API used? */
        bool use_dma_api;

        /* Can we use weak barriers? */
        bool weak_barriers;

        /* Other side has made a mess, don't try any more. */
        bool broken;

        /* Host supports indirect buffers */
        bool indirect;

        /* Host publishes avail event idx */
        bool event;

        /* Do DMA mapping by driver */
        bool premapped;

        /* Do unmap or not for desc. Just when premapped is False and
         * use_dma_api is true, this is true.
         */
        bool do_unmap;

        /* Head of free buffer list. */
        unsigned int free_head;
        /* Number we've added since last sync. */
        unsigned int num_added;

        /* Last used index  we've seen.
         * for split ring, it just contains last used index
         * for packed ring:
         * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
         * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
         */
        u16 last_used_idx;

        /* Hint for event idx: already triggered no need to disable. */
        bool event_triggered;

        union {
                /* Available for split ring */
                struct vring_virtqueue_split split;

                /* Available for packed ring */
                struct vring_virtqueue_packed packed;
        };

        /* How to notify other side. FIXME: commonalize hcalls! */
        bool (*notify)(struct virtqueue *vq);

        /* DMA, allocation, and size information */
        bool we_own_ring;

        /* Device used for doing DMA */
        struct device *dma_dev;

#ifdef DEBUG
        /* They're supposed to lock for us. */
        unsigned int in_use;

        /* Figure out if their kicks are too delayed. */
        bool last_add_time_valid;
        ktime_t last_add_time;
#endif
};

static struct virtqueue *__vring_new_virtqueue(unsigned int index,
                                               struct vring_virtqueue_split *vring_split,
                                               struct virtio_device *vdev,
                                               bool weak_barriers,
                                               bool context,
                                               bool (*notify)(struct virtqueue *),
                                               void (*callback)(struct virtqueue *),
                                               const char *name,
                                               struct device *dma_dev);
static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
static void vring_free(struct virtqueue *_vq);

/*
 * Helpers.
 */

#define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)

static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
                                   unsigned int total_sg)
{
        /*
         * If the host supports indirect descriptor tables, and we have multiple
         * buffers, then go indirect. FIXME: tune this threshold
         */
        return (vq->indirect && total_sg > 1 && vq->vq.num_free);
}

/*
 * Modern virtio devices have feature bits to specify whether they need a
 * quirk and bypass the IOMMU. If not there, just use the DMA API.
 *
 * If there, the interaction between virtio and DMA API is messy.
 *
 * On most systems with virtio, physical addresses match bus addresses,
 * and it doesn't particularly matter whether we use the DMA API.
 *
 * On some systems, including Xen and any system with a physical device
 * that speaks virtio behind a physical IOMMU, we must use the DMA API
 * for virtio DMA to work at all.
 *
 * On other systems, including SPARC and PPC64, virtio-pci devices are
 * enumerated as though they are behind an IOMMU, but the virtio host
 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
 * there or somehow map everything as the identity.
 *
 * For the time being, we preserve historic behavior and bypass the DMA
 * API.
 *
 * TODO: install a per-device DMA ops structure that does the right thing
 * taking into account all the above quirks, and use the DMA API
 * unconditionally on data path.
 */

static bool vring_use_dma_api(const struct virtio_device *vdev)
{
        if (!virtio_has_dma_quirk(vdev))
                return true;

        /* Otherwise, we are left to guess. */
        /*
         * In theory, it's possible to have a buggy QEMU-supposed
         * emulated Q35 IOMMU and Xen enabled at the same time.  On
         * such a configuration, virtio has never worked and will
         * not work without an even larger kludge.  Instead, enable
         * the DMA API if we're a Xen guest, which at least allows
         * all of the sensible Xen configurations to work correctly.
         */
        if (xen_domain())
                return true;

        return false;
}

size_t virtio_max_dma_size(const struct virtio_device *vdev)
{
        size_t max_segment_size = SIZE_MAX;

        if (vring_use_dma_api(vdev))
                max_segment_size = dma_max_mapping_size(vdev->dev.parent);

        return max_segment_size;
}
EXPORT_SYMBOL_GPL(virtio_max_dma_size);

static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
                               dma_addr_t *dma_handle, gfp_t flag,
                               struct device *dma_dev)
{
        if (vring_use_dma_api(vdev)) {
                return dma_alloc_coherent(dma_dev, size,
                                          dma_handle, flag);
        } else {
                void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);

                if (queue) {
                        phys_addr_t phys_addr = virt_to_phys(queue);
                        *dma_handle = (dma_addr_t)phys_addr;

                        /*
                         * Sanity check: make sure we dind't truncate
                         * the address.  The only arches I can find that
                         * have 64-bit phys_addr_t but 32-bit dma_addr_t
                         * are certain non-highmem MIPS and x86
                         * configurations, but these configurations
                         * should never allocate physical pages above 32
                         * bits, so this is fine.  Just in case, throw a
                         * warning and abort if we end up with an
                         * unrepresentable address.
                         */
                        if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
                                free_pages_exact(queue, PAGE_ALIGN(size));
                                return NULL;
                        }
                }
                return queue;
        }
}

static void vring_free_queue(struct virtio_device *vdev, size_t size,
                             void *queue, dma_addr_t dma_handle,
                             struct device *dma_dev)
{
        if (vring_use_dma_api(vdev))
                dma_free_coherent(dma_dev, size, queue, dma_handle);
        else
                free_pages_exact(queue, PAGE_ALIGN(size));
}

/*
 * The DMA ops on various arches are rather gnarly right now, and
 * making all of the arch DMA ops work on the vring device itself
 * is a mess.
 */
static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
{
        return vq->dma_dev;
}

/* Map one sg entry. */
static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
                            enum dma_data_direction direction, dma_addr_t *addr)
{
        if (vq->premapped) {
                *addr = sg_dma_address(sg);
                return 0;
        }

        if (!vq->use_dma_api) {
                /*
                 * If DMA is not used, KMSAN doesn't know that the scatterlist
                 * is initialized by the hardware. Explicitly check/unpoison it
                 * depending on the direction.
                 */
                kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction);
                *addr = (dma_addr_t)sg_phys(sg);
                return 0;
        }

        /*
         * We can't use dma_map_sg, because we don't use scatterlists in
         * the way it expects (we don't guarantee that the scatterlist
         * will exist for the lifetime of the mapping).
         */
        *addr = dma_map_page(vring_dma_dev(vq),
                            sg_page(sg), sg->offset, sg->length,
                            direction);

        if (dma_mapping_error(vring_dma_dev(vq), *addr))
                return -ENOMEM;

        return 0;
}

static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
                                   void *cpu_addr, size_t size,
                                   enum dma_data_direction direction)
{
        if (!vq->use_dma_api)
                return (dma_addr_t)virt_to_phys(cpu_addr);

        return dma_map_single(vring_dma_dev(vq),
                              cpu_addr, size, direction);
}

static int vring_mapping_error(const struct vring_virtqueue *vq,
                               dma_addr_t addr)
{
        if (!vq->use_dma_api)
                return 0;

        return dma_mapping_error(vring_dma_dev(vq), addr);
}

static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
{
        vq->vq.num_free = num;

        if (vq->packed_ring)
                vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
        else
                vq->last_used_idx = 0;

        vq->event_triggered = false;
        vq->num_added = 0;

#ifdef DEBUG
        vq->in_use = false;
        vq->last_add_time_valid = false;
#endif
}


/*
 * Split ring specific functions - *_split().
 */

static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
                                           const struct vring_desc *desc)
{
        u16 flags;

        if (!vq->do_unmap)
                return;

        flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);

        dma_unmap_page(vring_dma_dev(vq),
                       virtio64_to_cpu(vq->vq.vdev, desc->addr),
                       virtio32_to_cpu(vq->vq.vdev, desc->len),
                       (flags & VRING_DESC_F_WRITE) ?
                       DMA_FROM_DEVICE : DMA_TO_DEVICE);
}

static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
                                          unsigned int i)
{
        struct vring_desc_extra *extra = vq->split.desc_extra;
        u16 flags;

        flags = extra[i].flags;

        if (flags & VRING_DESC_F_INDIRECT) {
                if (!vq->use_dma_api)
                        goto out;

                dma_unmap_single(vring_dma_dev(vq),
                                 extra[i].addr,
                                 extra[i].len,
                                 (flags & VRING_DESC_F_WRITE) ?
                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
        } else {
                if (!vq->do_unmap)
                        goto out;

                dma_unmap_page(vring_dma_dev(vq),
                               extra[i].addr,
                               extra[i].len,
                               (flags & VRING_DESC_F_WRITE) ?
                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
        }

out:
        return extra[i].next;
}

static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
                                               unsigned int total_sg,
                                               gfp_t gfp)
{
        struct vring_desc *desc;
        unsigned int i;

        /*
         * We require lowmem mappings for the descriptors because
         * otherwise virt_to_phys will give us bogus addresses in the
         * virtqueue.
         */
        gfp &= ~__GFP_HIGHMEM;

        desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
        if (!desc)
                return NULL;

        for (i = 0; i < total_sg; i++)
                desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
        return desc;
}

static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
                                                    struct vring_desc *desc,
                                                    unsigned int i,
                                                    dma_addr_t addr,
                                                    unsigned int len,
                                                    u16 flags,
                                                    bool indirect)
{
        struct vring_virtqueue *vring = to_vvq(vq);
        struct vring_desc_extra *extra = vring->split.desc_extra;
        u16 next;

        desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
        desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
        desc[i].len = cpu_to_virtio32(vq->vdev, len);

        if (!indirect) {
                next = extra[i].next;
                desc[i].next = cpu_to_virtio16(vq->vdev, next);

                extra[i].addr = addr;
                extra[i].len = len;
                extra[i].flags = flags;
        } else
                next = virtio16_to_cpu(vq->vdev, desc[i].next);

        return next;
}

static inline int virtqueue_add_split(struct virtqueue *_vq,
                                      struct scatterlist *sgs[],
                                      unsigned int total_sg,
                                      unsigned int out_sgs,
                                      unsigned int in_sgs,
                                      void *data,
                                      void *ctx,
                                      gfp_t gfp)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct scatterlist *sg;
        struct vring_desc *desc;
        unsigned int i, n, avail, descs_used, prev, err_idx;
        int head;
        bool indirect;

        START_USE(vq);

        BUG_ON(data == NULL);
        BUG_ON(ctx && vq->indirect);

        if (unlikely(vq->broken)) {
                END_USE(vq);
                return -EIO;
        }

        LAST_ADD_TIME_UPDATE(vq);

        BUG_ON(total_sg == 0);

        head = vq->free_head;

        if (virtqueue_use_indirect(vq, total_sg))
                desc = alloc_indirect_split(_vq, total_sg, gfp);
        else {
                desc = NULL;
                WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
        }

        if (desc) {
                /* Use a single buffer which doesn't continue */
                indirect = true;
                /* Set up rest to use this indirect table. */
                i = 0;
                descs_used = 1;
        } else {
                indirect = false;
                desc = vq->split.vring.desc;
                i = head;
                descs_used = total_sg;
        }

        if (unlikely(vq->vq.num_free < descs_used)) {
                pr_debug("Can't add buf len %i - avail = %i\n",
                         descs_used, vq->vq.num_free);
                /* FIXME: for historical reasons, we force a notify here if
                 * there are outgoing parts to the buffer.  Presumably the
                 * host should service the ring ASAP. */
                if (out_sgs)
                        vq->notify(&vq->vq);
                if (indirect)
                        kfree(desc);
                END_USE(vq);
                return -ENOSPC;
        }

        for (n = 0; n < out_sgs; n++) {
                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                        dma_addr_t addr;

                        if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr))
                                goto unmap_release;

                        prev = i;
                        /* Note that we trust indirect descriptor
                         * table since it use stream DMA mapping.
                         */
                        i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
                                                     VRING_DESC_F_NEXT,
                                                     indirect);
                }
        }
        for (; n < (out_sgs + in_sgs); n++) {
                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                        dma_addr_t addr;

                        if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr))
                                goto unmap_release;

                        prev = i;
                        /* Note that we trust indirect descriptor
                         * table since it use stream DMA mapping.
                         */
                        i = virtqueue_add_desc_split(_vq, desc, i, addr,
                                                     sg->length,
                                                     VRING_DESC_F_NEXT |
                                                     VRING_DESC_F_WRITE,
                                                     indirect);
                }
        }
        /* Last one doesn't continue. */
        desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
        if (!indirect && vq->do_unmap)
                vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
                        ~VRING_DESC_F_NEXT;

        if (indirect) {
                /* Now that the indirect table is filled in, map it. */
                dma_addr_t addr = vring_map_single(
                        vq, desc, total_sg * sizeof(struct vring_desc),
                        DMA_TO_DEVICE);
                if (vring_mapping_error(vq, addr)) {
                        if (vq->premapped)
                                goto free_indirect;

                        goto unmap_release;
                }

                virtqueue_add_desc_split(_vq, vq->split.vring.desc,
                                         head, addr,
                                         total_sg * sizeof(struct vring_desc),
                                         VRING_DESC_F_INDIRECT,
                                         false);
        }

        /* We're using some buffers from the free list. */
        vq->vq.num_free -= descs_used;

        /* Update free pointer */
        if (indirect)
                vq->free_head = vq->split.desc_extra[head].next;
        else
                vq->free_head = i;

        /* Store token and indirect buffer state. */
        vq->split.desc_state[head].data = data;
        if (indirect)
                vq->split.desc_state[head].indir_desc = desc;
        else
                vq->split.desc_state[head].indir_desc = ctx;

        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);

        /* Descriptors and available array need to be set before we expose the
         * new available array entries. */
        virtio_wmb(vq->weak_barriers);
        vq->split.avail_idx_shadow++;
        vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
                                                vq->split.avail_idx_shadow);
        vq->num_added++;

        pr_debug("Added buffer head %i to %p\n", head, vq);
        END_USE(vq);

        /* This is very unlikely, but theoretically possible.  Kick
         * just in case. */
        if (unlikely(vq->num_added == (1 << 16) - 1))
                virtqueue_kick(_vq);

        return 0;

unmap_release:
        err_idx = i;

        if (indirect)
                i = 0;
        else
                i = head;

        for (n = 0; n < total_sg; n++) {
                if (i == err_idx)
                        break;
                if (indirect) {
                        vring_unmap_one_split_indirect(vq, &desc[i]);
                        i = virtio16_to_cpu(_vq->vdev, desc[i].next);
                } else
                        i = vring_unmap_one_split(vq, i);
        }

free_indirect:
        if (indirect)
                kfree(desc);

        END_USE(vq);
        return -ENOMEM;
}

static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 new, old;
        bool needs_kick;

        START_USE(vq);
        /* We need to expose available array entries before checking avail
         * event. */
        virtio_mb(vq->weak_barriers);

        old = vq->split.avail_idx_shadow - vq->num_added;
        new = vq->split.avail_idx_shadow;
        vq->num_added = 0;

        LAST_ADD_TIME_CHECK(vq);
        LAST_ADD_TIME_INVALID(vq);

        if (vq->event) {
                needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
                                        vring_avail_event(&vq->split.vring)),
                                              new, old);
        } else {
                needs_kick = !(vq->split.vring.used->flags &
                                        cpu_to_virtio16(_vq->vdev,
                                                VRING_USED_F_NO_NOTIFY));
        }
        END_USE(vq);
        return needs_kick;
}

static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
                             void **ctx)
{
        unsigned int i, j;
        __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);

        /* Clear data ptr. */
        vq->split.desc_state[head].data = NULL;

        /* Put back on free list: unmap first-level descriptors and find end */
        i = head;

        while (vq->split.vring.desc[i].flags & nextflag) {
                vring_unmap_one_split(vq, i);
                i = vq->split.desc_extra[i].next;
                vq->vq.num_free++;
        }

        vring_unmap_one_split(vq, i);
        vq->split.desc_extra[i].next = vq->free_head;
        vq->free_head = head;

        /* Plus final descriptor */
        vq->vq.num_free++;

        if (vq->indirect) {
                struct vring_desc *indir_desc =
                                vq->split.desc_state[head].indir_desc;
                u32 len;

                /* Free the indirect table, if any, now that it's unmapped. */
                if (!indir_desc)
                        return;

                len = vq->split.desc_extra[head].len;

                BUG_ON(!(vq->split.desc_extra[head].flags &
                                VRING_DESC_F_INDIRECT));
                BUG_ON(len == 0 || len % sizeof(struct vring_desc));

                if (vq->do_unmap) {
                        for (j = 0; j < len / sizeof(struct vring_desc); j++)
                                vring_unmap_one_split_indirect(vq, &indir_desc[j]);
                }

                kfree(indir_desc);
                vq->split.desc_state[head].indir_desc = NULL;
        } else if (ctx) {
                *ctx = vq->split.desc_state[head].indir_desc;
        }
}

static bool more_used_split(const struct vring_virtqueue *vq)
{
        return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
                        vq->split.vring.used->idx);
}

static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
                                         unsigned int *len,
                                         void **ctx)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        void *ret;
        unsigned int i;
        u16 last_used;

        START_USE(vq);

        if (unlikely(vq->broken)) {
                END_USE(vq);
                return NULL;
        }

        if (!more_used_split(vq)) {
                pr_debug("No more buffers in queue\n");
                END_USE(vq);
                return NULL;
        }

        /* Only get used array entries after they have been exposed by host. */
        virtio_rmb(vq->weak_barriers);

        last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
        i = virtio32_to_cpu(_vq->vdev,
                        vq->split.vring.used->ring[last_used].id);
        *len = virtio32_to_cpu(_vq->vdev,
                        vq->split.vring.used->ring[last_used].len);

        if (unlikely(i >= vq->split.vring.num)) {
                BAD_RING(vq, "id %u out of range\n", i);
                return NULL;
        }
        if (unlikely(!vq->split.desc_state[i].data)) {
                BAD_RING(vq, "id %u is not a head!\n", i);
                return NULL;
        }

        /* detach_buf_split clears data, so grab it now. */
        ret = vq->split.desc_state[i].data;
        detach_buf_split(vq, i, ctx);
        vq->last_used_idx++;
        /* If we expect an interrupt for the next entry, tell host
         * by writing event index and flush out the write before
         * the read in the next get_buf call. */
        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
                virtio_store_mb(vq->weak_barriers,
                                &vring_used_event(&vq->split.vring),
                                cpu_to_virtio16(_vq->vdev, vq->last_used_idx));

        LAST_ADD_TIME_INVALID(vq);

        END_USE(vq);
        return ret;
}

static void virtqueue_disable_cb_split(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
                vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;

                /*
                 * If device triggered an event already it won't trigger one again:
                 * no need to disable.
                 */
                if (vq->event_triggered)
                        return;

                if (vq->event)
                        /* TODO: this is a hack. Figure out a cleaner value to write. */
                        vring_used_event(&vq->split.vring) = 0x0;
                else
                        vq->split.vring.avail->flags =
                                cpu_to_virtio16(_vq->vdev,
                                                vq->split.avail_flags_shadow);
        }
}

static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 last_used_idx;

        START_USE(vq);

        /* We optimistically turn back on interrupts, then check if there was
         * more to do. */
        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
         * either clear the flags bit or point the event index at the next
         * entry. Always do both to keep code simple. */
        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
                if (!vq->event)
                        vq->split.vring.avail->flags =
                                cpu_to_virtio16(_vq->vdev,
                                                vq->split.avail_flags_shadow);
        }
        vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
                        last_used_idx = vq->last_used_idx);
        END_USE(vq);
        return last_used_idx;
}

static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
                        vq->split.vring.used->idx);
}

static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 bufs;

        START_USE(vq);

        /* We optimistically turn back on interrupts, then check if there was
         * more to do. */
        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
         * either clear the flags bit or point the event index at the next
         * entry. Always update the event index to keep code simple. */
        if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
                vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
                if (!vq->event)
                        vq->split.vring.avail->flags =
                                cpu_to_virtio16(_vq->vdev,
                                                vq->split.avail_flags_shadow);
        }
        /* TODO: tune this threshold */
        bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;

        virtio_store_mb(vq->weak_barriers,
                        &vring_used_event(&vq->split.vring),
                        cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));

        if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
                                        - vq->last_used_idx) > bufs)) {
                END_USE(vq);
                return false;
        }

        END_USE(vq);
        return true;
}

static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        unsigned int i;
        void *buf;

        START_USE(vq);

        for (i = 0; i < vq->split.vring.num; i++) {
                if (!vq->split.desc_state[i].data)
                        continue;
                /* detach_buf_split clears data, so grab it now. */
                buf = vq->split.desc_state[i].data;
                detach_buf_split(vq, i, NULL);
                vq->split.avail_idx_shadow--;
                vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
                                vq->split.avail_idx_shadow);
                END_USE(vq);
                return buf;
        }
        /* That should have freed everything. */
        BUG_ON(vq->vq.num_free != vq->split.vring.num);

        END_USE(vq);
        return NULL;
}

static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
                                       struct vring_virtqueue *vq)
{
        struct virtio_device *vdev;

        vdev = vq->vq.vdev;

        vring_split->avail_flags_shadow = 0;
        vring_split->avail_idx_shadow = 0;

        /* No callback?  Tell other side not to bother us. */
        if (!vq->vq.callback) {
                vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
                if (!vq->event)
                        vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
                                        vring_split->avail_flags_shadow);
        }
}

static void virtqueue_reinit_split(struct vring_virtqueue *vq)
{
        int num;

        num = vq->split.vring.num;

        vq->split.vring.avail->flags = 0;
        vq->split.vring.avail->idx = 0;

        /* reset avail event */
        vq->split.vring.avail->ring[num] = 0;

        vq->split.vring.used->flags = 0;
        vq->split.vring.used->idx = 0;

        /* reset used event */
        *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;

        virtqueue_init(vq, num);

        virtqueue_vring_init_split(&vq->split, vq);
}

static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
                                         struct vring_virtqueue_split *vring_split)
{
        vq->split = *vring_split;

        /* Put everything in free lists. */
        vq->free_head = 0;
}

static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
{
        struct vring_desc_state_split *state;
        struct vring_desc_extra *extra;
        u32 num = vring_split->vring.num;

        state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
        if (!state)
                goto err_state;

        extra = vring_alloc_desc_extra(num);
        if (!extra)
                goto err_extra;

        memset(state, 0, num * sizeof(struct vring_desc_state_split));

        vring_split->desc_state = state;
        vring_split->desc_extra = extra;
        return 0;

err_extra:
        kfree(state);
err_state:
        return -ENOMEM;
}

static void vring_free_split(struct vring_virtqueue_split *vring_split,
                             struct virtio_device *vdev, struct device *dma_dev)
{
        vring_free_queue(vdev, vring_split->queue_size_in_bytes,
                         vring_split->vring.desc,
                         vring_split->queue_dma_addr,
                         dma_dev);

        kfree(vring_split->desc_state);
        kfree(vring_split->desc_extra);
}

static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
                                   struct virtio_device *vdev,
                                   u32 num,
                                   unsigned int vring_align,
                                   bool may_reduce_num,
                                   struct device *dma_dev)
{
        void *queue = NULL;
        dma_addr_t dma_addr;

        /* We assume num is a power of 2. */
        if (!is_power_of_2(num)) {
                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
                return -EINVAL;
        }

        /* TODO: allocate each queue chunk individually */
        for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
                                          &dma_addr,
                                          GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                          dma_dev);
                if (queue)
                        break;
                if (!may_reduce_num)
                        return -ENOMEM;
        }

        if (!num)
                return -ENOMEM;

        if (!queue) {
                /* Try to get a single page. You are my only hope! */
                queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
                                          &dma_addr, GFP_KERNEL | __GFP_ZERO,
                                          dma_dev);
        }
        if (!queue)
                return -ENOMEM;

        vring_init(&vring_split->vring, num, queue, vring_align);

        vring_split->queue_dma_addr = dma_addr;
        vring_split->queue_size_in_bytes = vring_size(num, vring_align);

        vring_split->vring_align = vring_align;
        vring_split->may_reduce_num = may_reduce_num;

        return 0;
}

static struct virtqueue *vring_create_virtqueue_split(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name,
        struct device *dma_dev)
{
        struct vring_virtqueue_split vring_split = {};
        struct virtqueue *vq;
        int err;

        err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
                                      may_reduce_num, dma_dev);
        if (err)
                return NULL;

        vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
                                   context, notify, callback, name, dma_dev);
        if (!vq) {
                vring_free_split(&vring_split, vdev, dma_dev);
                return NULL;
        }

        to_vvq(vq)->we_own_ring = true;

        return vq;
}

static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
{
        struct vring_virtqueue_split vring_split = {};
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct virtio_device *vdev = _vq->vdev;
        int err;

        err = vring_alloc_queue_split(&vring_split, vdev, num,
                                      vq->split.vring_align,
                                      vq->split.may_reduce_num,
                                      vring_dma_dev(vq));
        if (err)
                goto err;

        err = vring_alloc_state_extra_split(&vring_split);
        if (err)
                goto err_state_extra;

        vring_free(&vq->vq);

        virtqueue_vring_init_split(&vring_split, vq);

        virtqueue_init(vq, vring_split.vring.num);
        virtqueue_vring_attach_split(vq, &vring_split);

        return 0;

err_state_extra:
        vring_free_split(&vring_split, vdev, vring_dma_dev(vq));
err:
        virtqueue_reinit_split(vq);
        return -ENOMEM;
}


/*
 * Packed ring specific functions - *_packed().
 */
static bool packed_used_wrap_counter(u16 last_used_idx)
{
        return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
}

static u16 packed_last_used(u16 last_used_idx)
{
        return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
}

static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
                                     const struct vring_desc_extra *extra)
{
        u16 flags;

        flags = extra->flags;

        if (flags & VRING_DESC_F_INDIRECT) {
                if (!vq->use_dma_api)
                        return;

                dma_unmap_single(vring_dma_dev(vq),
                                 extra->addr, extra->len,
                                 (flags & VRING_DESC_F_WRITE) ?
                                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
        } else {
                if (!vq->do_unmap)
                        return;

                dma_unmap_page(vring_dma_dev(vq),
                               extra->addr, extra->len,
                               (flags & VRING_DESC_F_WRITE) ?
                               DMA_FROM_DEVICE : DMA_TO_DEVICE);
        }
}

static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
                                    const struct vring_packed_desc *desc)
{
        u16 flags;

        if (!vq->do_unmap)
                return;

        flags = le16_to_cpu(desc->flags);

        dma_unmap_page(vring_dma_dev(vq),
                       le64_to_cpu(desc->addr),
                       le32_to_cpu(desc->len),
                       (flags & VRING_DESC_F_WRITE) ?
                       DMA_FROM_DEVICE : DMA_TO_DEVICE);
}

static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
                                                       gfp_t gfp)
{
        struct vring_packed_desc *desc;

        /*
         * We require lowmem mappings for the descriptors because
         * otherwise virt_to_phys will give us bogus addresses in the
         * virtqueue.
         */
        gfp &= ~__GFP_HIGHMEM;

        desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);

        return desc;
}

static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
                                         struct scatterlist *sgs[],
                                         unsigned int total_sg,
                                         unsigned int out_sgs,
                                         unsigned int in_sgs,
                                         void *data,
                                         gfp_t gfp)
{
        struct vring_packed_desc *desc;
        struct scatterlist *sg;
        unsigned int i, n, err_idx;
        u16 head, id;
        dma_addr_t addr;

        head = vq->packed.next_avail_idx;
        desc = alloc_indirect_packed(total_sg, gfp);
        if (!desc)
                return -ENOMEM;

        if (unlikely(vq->vq.num_free < 1)) {
                pr_debug("Can't add buf len 1 - avail = 0\n");
                kfree(desc);
                END_USE(vq);
                return -ENOSPC;
        }

        i = 0;
        id = vq->free_head;
        BUG_ON(id == vq->packed.vring.num);

        for (n = 0; n < out_sgs + in_sgs; n++) {
                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                        if (vring_map_one_sg(vq, sg, n < out_sgs ?
                                             DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
                                goto unmap_release;

                        desc[i].flags = cpu_to_le16(n < out_sgs ?
                                                0 : VRING_DESC_F_WRITE);
                        desc[i].addr = cpu_to_le64(addr);
                        desc[i].len = cpu_to_le32(sg->length);
                        i++;
                }
        }

        /* Now that the indirect table is filled in, map it. */
        addr = vring_map_single(vq, desc,
                        total_sg * sizeof(struct vring_packed_desc),
                        DMA_TO_DEVICE);
        if (vring_mapping_error(vq, addr)) {
                if (vq->premapped)
                        goto free_desc;

                goto unmap_release;
        }

        vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
        vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
                                sizeof(struct vring_packed_desc));
        vq->packed.vring.desc[head].id = cpu_to_le16(id);

        if (vq->use_dma_api) {
                vq->packed.desc_extra[id].addr = addr;
                vq->packed.desc_extra[id].len = total_sg *
                                sizeof(struct vring_packed_desc);
                vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
                                                  vq->packed.avail_used_flags;
        }

        /*
         * A driver MUST NOT make the first descriptor in the list
         * available before all subsequent descriptors comprising
         * the list are made available.
         */
        virtio_wmb(vq->weak_barriers);
        vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
                                                vq->packed.avail_used_flags);

        /* We're using some buffers from the free list. */
        vq->vq.num_free -= 1;

        /* Update free pointer */
        n = head + 1;
        if (n >= vq->packed.vring.num) {
                n = 0;
                vq->packed.avail_wrap_counter ^= 1;
                vq->packed.avail_used_flags ^=
                                1 << VRING_PACKED_DESC_F_AVAIL |
                                1 << VRING_PACKED_DESC_F_USED;
        }
        vq->packed.next_avail_idx = n;
        vq->free_head = vq->packed.desc_extra[id].next;

        /* Store token and indirect buffer state. */
        vq->packed.desc_state[id].num = 1;
        vq->packed.desc_state[id].data = data;
        vq->packed.desc_state[id].indir_desc = desc;
        vq->packed.desc_state[id].last = id;

        vq->num_added += 1;

        pr_debug("Added buffer head %i to %p\n", head, vq);
        END_USE(vq);

        return 0;

unmap_release:
        err_idx = i;

        for (i = 0; i < err_idx; i++)
                vring_unmap_desc_packed(vq, &desc[i]);

free_desc:
        kfree(desc);

        END_USE(vq);
        return -ENOMEM;
}

static inline int virtqueue_add_packed(struct virtqueue *_vq,
                                       struct scatterlist *sgs[],
                                       unsigned int total_sg,
                                       unsigned int out_sgs,
                                       unsigned int in_sgs,
                                       void *data,
                                       void *ctx,
                                       gfp_t gfp)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct vring_packed_desc *desc;
        struct scatterlist *sg;
        unsigned int i, n, c, descs_used, err_idx;
        __le16 head_flags, flags;
        u16 head, id, prev, curr, avail_used_flags;
        int err;

        START_USE(vq);

        BUG_ON(data == NULL);
        BUG_ON(ctx && vq->indirect);

        if (unlikely(vq->broken)) {
                END_USE(vq);
                return -EIO;
        }

        LAST_ADD_TIME_UPDATE(vq);

        BUG_ON(total_sg == 0);

        if (virtqueue_use_indirect(vq, total_sg)) {
                err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
                                                    in_sgs, data, gfp);
                if (err != -ENOMEM) {
                        END_USE(vq);
                        return err;
                }

                /* fall back on direct */
        }

        head = vq->packed.next_avail_idx;
        avail_used_flags = vq->packed.avail_used_flags;

        WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);

        desc = vq->packed.vring.desc;
        i = head;
        descs_used = total_sg;

        if (unlikely(vq->vq.num_free < descs_used)) {
                pr_debug("Can't add buf len %i - avail = %i\n",
                         descs_used, vq->vq.num_free);
                END_USE(vq);
                return -ENOSPC;
        }

        id = vq->free_head;
        BUG_ON(id == vq->packed.vring.num);

        curr = id;
        c = 0;
        for (n = 0; n < out_sgs + in_sgs; n++) {
                for (sg = sgs[n]; sg; sg = sg_next(sg)) {
                        dma_addr_t addr;

                        if (vring_map_one_sg(vq, sg, n < out_sgs ?
                                             DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr))
                                goto unmap_release;

                        flags = cpu_to_le16(vq->packed.avail_used_flags |
                                    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
                                    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
                        if (i == head)
                                head_flags = flags;
                        else
                                desc[i].flags = flags;

                        desc[i].addr = cpu_to_le64(addr);
                        desc[i].len = cpu_to_le32(sg->length);
                        desc[i].id = cpu_to_le16(id);

                        if (unlikely(vq->use_dma_api)) {
                                vq->packed.desc_extra[curr].addr = addr;
                                vq->packed.desc_extra[curr].len = sg->length;
                                vq->packed.desc_extra[curr].flags =
                                        le16_to_cpu(flags);
                        }
                        prev = curr;
                        curr = vq->packed.desc_extra[curr].next;

                        if ((unlikely(++i >= vq->packed.vring.num))) {
                                i = 0;
                                vq->packed.avail_used_flags ^=
                                        1 << VRING_PACKED_DESC_F_AVAIL |
                                        1 << VRING_PACKED_DESC_F_USED;
                        }
                }
        }

        if (i <= head)
                vq->packed.avail_wrap_counter ^= 1;

        /* We're using some buffers from the free list. */
        vq->vq.num_free -= descs_used;

        /* Update free pointer */
        vq->packed.next_avail_idx = i;
        vq->free_head = curr;

        /* Store token. */
        vq->packed.desc_state[id].num = descs_used;
        vq->packed.desc_state[id].data = data;
        vq->packed.desc_state[id].indir_desc = ctx;
        vq->packed.desc_state[id].last = prev;

        /*
         * A driver MUST NOT make the first descriptor in the list
         * available before all subsequent descriptors comprising
         * the list are made available.
         */
        virtio_wmb(vq->weak_barriers);
        vq->packed.vring.desc[head].flags = head_flags;
        vq->num_added += descs_used;

        pr_debug("Added buffer head %i to %p\n", head, vq);
        END_USE(vq);

        return 0;

unmap_release:
        err_idx = i;
        i = head;
        curr = vq->free_head;

        vq->packed.avail_used_flags = avail_used_flags;

        for (n = 0; n < total_sg; n++) {
                if (i == err_idx)
                        break;
                vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
                curr = vq->packed.desc_extra[curr].next;
                i++;
                if (i >= vq->packed.vring.num)
                        i = 0;
        }

        END_USE(vq);
        return -EIO;
}

static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 new, old, off_wrap, flags, wrap_counter, event_idx;
        bool needs_kick;
        union {
                struct {
                        __le16 off_wrap;
                        __le16 flags;
                };
                u32 u32;
        } snapshot;

        START_USE(vq);

        /*
         * We need to expose the new flags value before checking notification
         * suppressions.
         */
        virtio_mb(vq->weak_barriers);

        old = vq->packed.next_avail_idx - vq->num_added;
        new = vq->packed.next_avail_idx;
        vq->num_added = 0;

        snapshot.u32 = *(u32 *)vq->packed.vring.device;
        flags = le16_to_cpu(snapshot.flags);

        LAST_ADD_TIME_CHECK(vq);
        LAST_ADD_TIME_INVALID(vq);

        if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
                needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
                goto out;
        }

        off_wrap = le16_to_cpu(snapshot.off_wrap);

        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
        event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
        if (wrap_counter != vq->packed.avail_wrap_counter)
                event_idx -= vq->packed.vring.num;

        needs_kick = vring_need_event(event_idx, new, old);
out:
        END_USE(vq);
        return needs_kick;
}

static void detach_buf_packed(struct vring_virtqueue *vq,
                              unsigned int id, void **ctx)
{
        struct vring_desc_state_packed *state = NULL;
        struct vring_packed_desc *desc;
        unsigned int i, curr;

        state = &vq->packed.desc_state[id];

        /* Clear data ptr. */
        state->data = NULL;

        vq->packed.desc_extra[state->last].next = vq->free_head;
        vq->free_head = id;
        vq->vq.num_free += state->num;

        if (unlikely(vq->use_dma_api)) {
                curr = id;
                for (i = 0; i < state->num; i++) {
                        vring_unmap_extra_packed(vq,
                                                 &vq->packed.desc_extra[curr]);
                        curr = vq->packed.desc_extra[curr].next;
                }
        }

        if (vq->indirect) {
                u32 len;

                /* Free the indirect table, if any, now that it's unmapped. */
                desc = state->indir_desc;
                if (!desc)
                        return;

                if (vq->do_unmap) {
                        len = vq->packed.desc_extra[id].len;
                        for (i = 0; i < len / sizeof(struct vring_packed_desc);
                                        i++)
                                vring_unmap_desc_packed(vq, &desc[i]);
                }
                kfree(desc);
                state->indir_desc = NULL;
        } else if (ctx) {
                *ctx = state->indir_desc;
        }
}

static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
                                       u16 idx, bool used_wrap_counter)
{
        bool avail, used;
        u16 flags;

        flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
        avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
        used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));

        return avail == used && used == used_wrap_counter;
}

static bool more_used_packed(const struct vring_virtqueue *vq)
{
        u16 last_used;
        u16 last_used_idx;
        bool used_wrap_counter;

        last_used_idx = READ_ONCE(vq->last_used_idx);
        last_used = packed_last_used(last_used_idx);
        used_wrap_counter = packed_used_wrap_counter(last_used_idx);
        return is_used_desc_packed(vq, last_used, used_wrap_counter);
}

static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
                                          unsigned int *len,
                                          void **ctx)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 last_used, id, last_used_idx;
        bool used_wrap_counter;
        void *ret;

        START_USE(vq);

        if (unlikely(vq->broken)) {
                END_USE(vq);
                return NULL;
        }

        if (!more_used_packed(vq)) {
                pr_debug("No more buffers in queue\n");
                END_USE(vq);
                return NULL;
        }

        /* Only get used elements after they have been exposed by host. */
        virtio_rmb(vq->weak_barriers);

        last_used_idx = READ_ONCE(vq->last_used_idx);
        used_wrap_counter = packed_used_wrap_counter(last_used_idx);
        last_used = packed_last_used(last_used_idx);
        id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
        *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);

        if (unlikely(id >= vq->packed.vring.num)) {
                BAD_RING(vq, "id %u out of range\n", id);
                return NULL;
        }
        if (unlikely(!vq->packed.desc_state[id].data)) {
                BAD_RING(vq, "id %u is not a head!\n", id);
                return NULL;
        }

        /* detach_buf_packed clears data, so grab it now. */
        ret = vq->packed.desc_state[id].data;
        detach_buf_packed(vq, id, ctx);

        last_used += vq->packed.desc_state[id].num;
        if (unlikely(last_used >= vq->packed.vring.num)) {
                last_used -= vq->packed.vring.num;
                used_wrap_counter ^= 1;
        }

        last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
        WRITE_ONCE(vq->last_used_idx, last_used);

        /*
         * If we expect an interrupt for the next entry, tell host
         * by writing event index and flush out the write before
         * the read in the next get_buf call.
         */
        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
                virtio_store_mb(vq->weak_barriers,
                                &vq->packed.vring.driver->off_wrap,
                                cpu_to_le16(vq->last_used_idx));

        LAST_ADD_TIME_INVALID(vq);

        END_USE(vq);
        return ret;
}

static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;

                /*
                 * If device triggered an event already it won't trigger one again:
                 * no need to disable.
                 */
                if (vq->event_triggered)
                        return;

                vq->packed.vring.driver->flags =
                        cpu_to_le16(vq->packed.event_flags_shadow);
        }
}

static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        START_USE(vq);

        /*
         * We optimistically turn back on interrupts, then check if there was
         * more to do.
         */

        if (vq->event) {
                vq->packed.vring.driver->off_wrap =
                        cpu_to_le16(vq->last_used_idx);
                /*
                 * We need to update event offset and event wrap
                 * counter first before updating event flags.
                 */
                virtio_wmb(vq->weak_barriers);
        }

        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
                vq->packed.event_flags_shadow = vq->event ?
                                VRING_PACKED_EVENT_FLAG_DESC :
                                VRING_PACKED_EVENT_FLAG_ENABLE;
                vq->packed.vring.driver->flags =
                                cpu_to_le16(vq->packed.event_flags_shadow);
        }

        END_USE(vq);
        return vq->last_used_idx;
}

static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        bool wrap_counter;
        u16 used_idx;

        wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
        used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);

        return is_used_desc_packed(vq, used_idx, wrap_counter);
}

static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 used_idx, wrap_counter, last_used_idx;
        u16 bufs;

        START_USE(vq);

        /*
         * We optimistically turn back on interrupts, then check if there was
         * more to do.
         */

        if (vq->event) {
                /* TODO: tune this threshold */
                bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
                last_used_idx = READ_ONCE(vq->last_used_idx);
                wrap_counter = packed_used_wrap_counter(last_used_idx);

                used_idx = packed_last_used(last_used_idx) + bufs;
                if (used_idx >= vq->packed.vring.num) {
                        used_idx -= vq->packed.vring.num;
                        wrap_counter ^= 1;
                }

                vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
                        (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));

                /*
                 * We need to update event offset and event wrap
                 * counter first before updating event flags.
                 */
                virtio_wmb(vq->weak_barriers);
        }

        if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
                vq->packed.event_flags_shadow = vq->event ?
                                VRING_PACKED_EVENT_FLAG_DESC :
                                VRING_PACKED_EVENT_FLAG_ENABLE;
                vq->packed.vring.driver->flags =
                                cpu_to_le16(vq->packed.event_flags_shadow);
        }

        /*
         * We need to update event suppression structure first
         * before re-checking for more used buffers.
         */
        virtio_mb(vq->weak_barriers);

        last_used_idx = READ_ONCE(vq->last_used_idx);
        wrap_counter = packed_used_wrap_counter(last_used_idx);
        used_idx = packed_last_used(last_used_idx);
        if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
                END_USE(vq);
                return false;
        }

        END_USE(vq);
        return true;
}

static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        unsigned int i;
        void *buf;

        START_USE(vq);

        for (i = 0; i < vq->packed.vring.num; i++) {
                if (!vq->packed.desc_state[i].data)
                        continue;
                /* detach_buf clears data, so grab it now. */
                buf = vq->packed.desc_state[i].data;
                detach_buf_packed(vq, i, NULL);
                END_USE(vq);
                return buf;
        }
        /* That should have freed everything. */
        BUG_ON(vq->vq.num_free != vq->packed.vring.num);

        END_USE(vq);
        return NULL;
}

static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
{
        struct vring_desc_extra *desc_extra;
        unsigned int i;

        desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
                                   GFP_KERNEL);
        if (!desc_extra)
                return NULL;

        memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));

        for (i = 0; i < num - 1; i++)
                desc_extra[i].next = i + 1;

        return desc_extra;
}

static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
                              struct virtio_device *vdev,
                              struct device *dma_dev)
{
        if (vring_packed->vring.desc)
                vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
                                 vring_packed->vring.desc,
                                 vring_packed->ring_dma_addr,
                                 dma_dev);

        if (vring_packed->vring.driver)
                vring_free_queue(vdev, vring_packed->event_size_in_bytes,
                                 vring_packed->vring.driver,
                                 vring_packed->driver_event_dma_addr,
                                 dma_dev);

        if (vring_packed->vring.device)
                vring_free_queue(vdev, vring_packed->event_size_in_bytes,
                                 vring_packed->vring.device,
                                 vring_packed->device_event_dma_addr,
                                 dma_dev);

        kfree(vring_packed->desc_state);
        kfree(vring_packed->desc_extra);
}

static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
                                    struct virtio_device *vdev,
                                    u32 num, struct device *dma_dev)
{
        struct vring_packed_desc *ring;
        struct vring_packed_desc_event *driver, *device;
        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
        size_t ring_size_in_bytes, event_size_in_bytes;

        ring_size_in_bytes = num * sizeof(struct vring_packed_desc);

        ring = vring_alloc_queue(vdev, ring_size_in_bytes,
                                 &ring_dma_addr,
                                 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                 dma_dev);
        if (!ring)
                goto err;

        vring_packed->vring.desc         = ring;
        vring_packed->ring_dma_addr      = ring_dma_addr;
        vring_packed->ring_size_in_bytes = ring_size_in_bytes;

        event_size_in_bytes = sizeof(struct vring_packed_desc_event);

        driver = vring_alloc_queue(vdev, event_size_in_bytes,
                                   &driver_event_dma_addr,
                                   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                   dma_dev);
        if (!driver)
                goto err;

        vring_packed->vring.driver          = driver;
        vring_packed->event_size_in_bytes   = event_size_in_bytes;
        vring_packed->driver_event_dma_addr = driver_event_dma_addr;

        device = vring_alloc_queue(vdev, event_size_in_bytes,
                                   &device_event_dma_addr,
                                   GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                   dma_dev);
        if (!device)
                goto err;

        vring_packed->vring.device          = device;
        vring_packed->device_event_dma_addr = device_event_dma_addr;

        vring_packed->vring.num = num;

        return 0;

err:
        vring_free_packed(vring_packed, vdev, dma_dev);
        return -ENOMEM;
}

static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
{
        struct vring_desc_state_packed *state;
        struct vring_desc_extra *extra;
        u32 num = vring_packed->vring.num;

        state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
        if (!state)
                goto err_desc_state;

        memset(state, 0, num * sizeof(struct vring_desc_state_packed));

        extra = vring_alloc_desc_extra(num);
        if (!extra)
                goto err_desc_extra;

        vring_packed->desc_state = state;
        vring_packed->desc_extra = extra;

        return 0;

err_desc_extra:
        kfree(state);
err_desc_state:
        return -ENOMEM;
}

static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
                                        bool callback)
{
        vring_packed->next_avail_idx = 0;
        vring_packed->avail_wrap_counter = 1;
        vring_packed->event_flags_shadow = 0;
        vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;

        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
                vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
                vring_packed->vring.driver->flags =
                        cpu_to_le16(vring_packed->event_flags_shadow);
        }
}

static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
                                          struct vring_virtqueue_packed *vring_packed)
{
        vq->packed = *vring_packed;

        /* Put everything in free lists. */
        vq->free_head = 0;
}

static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
{
        memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
        memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);

        /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
        memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);

        virtqueue_init(vq, vq->packed.vring.num);
        virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
}

static struct virtqueue *vring_create_virtqueue_packed(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name,
        struct device *dma_dev)
{
        struct vring_virtqueue_packed vring_packed = {};
        struct vring_virtqueue *vq;
        int err;

        if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev))
                goto err_ring;

        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
        if (!vq)
                goto err_vq;

        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
        vq->vq.index = index;
        vq->vq.reset = false;
        vq->we_own_ring = true;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
        vq->broken = true;
#else
        vq->broken = false;
#endif
        vq->packed_ring = true;
        vq->dma_dev = dma_dev;
        vq->use_dma_api = vring_use_dma_api(vdev);
        vq->premapped = false;
        vq->do_unmap = vq->use_dma_api;

        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
                !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);

        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
                vq->weak_barriers = false;

        err = vring_alloc_state_extra_packed(&vring_packed);
        if (err)
                goto err_state_extra;

        virtqueue_vring_init_packed(&vring_packed, !!callback);

        virtqueue_init(vq, num);
        virtqueue_vring_attach_packed(vq, &vring_packed);

        spin_lock(&vdev->vqs_list_lock);
        list_add_tail(&vq->vq.list, &vdev->vqs);
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;

err_state_extra:
        kfree(vq);
err_vq:
        vring_free_packed(&vring_packed, vdev, dma_dev);
err_ring:
        return NULL;
}

static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
{
        struct vring_virtqueue_packed vring_packed = {};
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct virtio_device *vdev = _vq->vdev;
        int err;

        if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq)))
                goto err_ring;

        err = vring_alloc_state_extra_packed(&vring_packed);
        if (err)
                goto err_state_extra;

        vring_free(&vq->vq);

        virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);

        virtqueue_init(vq, vring_packed.vring.num);
        virtqueue_vring_attach_packed(vq, &vring_packed);

        return 0;

err_state_extra:
        vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq));
err_ring:
        virtqueue_reinit_packed(vq);
        return -ENOMEM;
}

static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
                                         void (*recycle)(struct virtqueue *vq, void *buf))
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct virtio_device *vdev = vq->vq.vdev;
        void *buf;
        int err;

        if (!vq->we_own_ring)
                return -EPERM;

        if (!vdev->config->disable_vq_and_reset)
                return -ENOENT;

        if (!vdev->config->enable_vq_after_reset)
                return -ENOENT;

        err = vdev->config->disable_vq_and_reset(_vq);
        if (err)
                return err;

        while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
                recycle(_vq, buf);

        return 0;
}

static int virtqueue_enable_after_reset(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct virtio_device *vdev = vq->vq.vdev;

        if (vdev->config->enable_vq_after_reset(_vq))
                return -EBUSY;

        return 0;
}

/*
 * Generic functions and exported symbols.
 */

static inline int virtqueue_add(struct virtqueue *_vq,
                                struct scatterlist *sgs[],
                                unsigned int total_sg,
                                unsigned int out_sgs,
                                unsigned int in_sgs,
                                void *data,
                                void *ctx,
                                gfp_t gfp)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
                                        out_sgs, in_sgs, data, ctx, gfp) :
                                 virtqueue_add_split(_vq, sgs, total_sg,
                                        out_sgs, in_sgs, data, ctx, gfp);
}

/**
 * virtqueue_add_sgs - expose buffers to other end
 * @_vq: the struct virtqueue we're talking about.
 * @sgs: array of terminated scatterlists.
 * @out_sgs: the number of scatterlists readable by other side
 * @in_sgs: the number of scatterlists which are writable (after readable ones)
 * @data: the token identifying the buffer.
 * @gfp: how to do memory allocations (if necessary).
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
 */
int virtqueue_add_sgs(struct virtqueue *_vq,
                      struct scatterlist *sgs[],
                      unsigned int out_sgs,
                      unsigned int in_sgs,
                      void *data,
                      gfp_t gfp)
{
        unsigned int i, total_sg = 0;

        /* Count them first. */
        for (i = 0; i < out_sgs + in_sgs; i++) {
                struct scatterlist *sg;

                for (sg = sgs[i]; sg; sg = sg_next(sg))
                        total_sg++;
        }
        return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
                             data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);

/**
 * virtqueue_add_outbuf - expose output buffers to other end
 * @vq: the struct virtqueue we're talking about.
 * @sg: scatterlist (must be well-formed and terminated!)
 * @num: the number of entries in @sg readable by other side
 * @data: the token identifying the buffer.
 * @gfp: how to do memory allocations (if necessary).
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
 */
int virtqueue_add_outbuf(struct virtqueue *vq,
                         struct scatterlist *sg, unsigned int num,
                         void *data,
                         gfp_t gfp)
{
        return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);

/**
 * virtqueue_add_inbuf - expose input buffers to other end
 * @vq: the struct virtqueue we're talking about.
 * @sg: scatterlist (must be well-formed and terminated!)
 * @num: the number of entries in @sg writable by other side
 * @data: the token identifying the buffer.
 * @gfp: how to do memory allocations (if necessary).
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
 */
int virtqueue_add_inbuf(struct virtqueue *vq,
                        struct scatterlist *sg, unsigned int num,
                        void *data,
                        gfp_t gfp)
{
        return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);

/**
 * virtqueue_add_inbuf_ctx - expose input buffers to other end
 * @vq: the struct virtqueue we're talking about.
 * @sg: scatterlist (must be well-formed and terminated!)
 * @num: the number of entries in @sg writable by other side
 * @data: the token identifying the buffer.
 * @ctx: extra context for the token
 * @gfp: how to do memory allocations (if necessary).
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
 */
int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
                        struct scatterlist *sg, unsigned int num,
                        void *data,
                        void *ctx,
                        gfp_t gfp)
{
        return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);

/**
 * virtqueue_dma_dev - get the dma dev
 * @_vq: the struct virtqueue we're talking about.
 *
 * Returns the dma dev. That can been used for dma api.
 */
struct device *virtqueue_dma_dev(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->use_dma_api)
                return vring_dma_dev(vq);
        else
                return NULL;
}
EXPORT_SYMBOL_GPL(virtqueue_dma_dev);

/**
 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
 * @_vq: the struct virtqueue
 *
 * Instead of virtqueue_kick(), you can do:
 *        if (virtqueue_kick_prepare(vq))
 *                virtqueue_notify(vq);
 *
 * This is sometimes useful because the virtqueue_kick_prepare() needs
 * to be serialized, but the actual virtqueue_notify() call does not.
 */
bool virtqueue_kick_prepare(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
                                 virtqueue_kick_prepare_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);

/**
 * virtqueue_notify - second half of split virtqueue_kick call.
 * @_vq: the struct virtqueue
 *
 * This does not need to be serialized.
 *
 * Returns false if host notify failed or queue is broken, otherwise true.
 */
bool virtqueue_notify(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (unlikely(vq->broken))
                return false;

        /* Prod other side to tell it about changes. */
        if (!vq->notify(_vq)) {
                vq->broken = true;
                return false;
        }
        return true;
}
EXPORT_SYMBOL_GPL(virtqueue_notify);

/**
 * virtqueue_kick - update after add_buf
 * @vq: the struct virtqueue
 *
 * After one or more virtqueue_add_* calls, invoke this to kick
 * the other side.
 *
 * Caller must ensure we don't call this with other virtqueue
 * operations at the same time (except where noted).
 *
 * Returns false if kick failed, otherwise true.
 */
bool virtqueue_kick(struct virtqueue *vq)
{
        if (virtqueue_kick_prepare(vq))
                return virtqueue_notify(vq);
        return true;
}
EXPORT_SYMBOL_GPL(virtqueue_kick);

/**
 * virtqueue_get_buf_ctx - get the next used buffer
 * @_vq: the struct virtqueue we're talking about.
 * @len: the length written into the buffer
 * @ctx: extra context for the token
 *
 * If the device wrote data into the buffer, @len will be set to the
 * amount written.  This means you don't need to clear the buffer
 * beforehand to ensure there's no data leakage in the case of short
 * writes.
 *
 * Caller must ensure we don't call this with other virtqueue
 * operations at the same time (except where noted).
 *
 * Returns NULL if there are no used buffers, or the "data" token
 * handed to virtqueue_add_*().
 */
void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
                            void **ctx)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
                                 virtqueue_get_buf_ctx_split(_vq, len, ctx);
}
EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);

void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
{
        return virtqueue_get_buf_ctx(_vq, len, NULL);
}
EXPORT_SYMBOL_GPL(virtqueue_get_buf);
/**
 * virtqueue_disable_cb - disable callbacks
 * @_vq: the struct virtqueue we're talking about.
 *
 * Note that this is not necessarily synchronous, hence unreliable and only
 * useful as an optimization.
 *
 * Unlike other operations, this need not be serialized.
 */
void virtqueue_disable_cb(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->packed_ring)
                virtqueue_disable_cb_packed(_vq);
        else
                virtqueue_disable_cb_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);

/**
 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
 * @_vq: the struct virtqueue we're talking about.
 *
 * This re-enables callbacks; it returns current queue state
 * in an opaque unsigned value. This value should be later tested by
 * virtqueue_poll, to detect a possible race between the driver checking for
 * more work, and enabling callbacks.
 *
 * Caller must ensure we don't call this with other virtqueue
 * operations at the same time (except where noted).
 */
unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->event_triggered)
                vq->event_triggered = false;

        return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
                                 virtqueue_enable_cb_prepare_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);

/**
 * virtqueue_poll - query pending used buffers
 * @_vq: the struct virtqueue we're talking about.
 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
 *
 * Returns "true" if there are pending used buffers in the queue.
 *
 * This does not need to be serialized.
 */
bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (unlikely(vq->broken))
                return false;

        virtio_mb(vq->weak_barriers);
        return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
                                 virtqueue_poll_split(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_poll);

/**
 * virtqueue_enable_cb - restart callbacks after disable_cb.
 * @_vq: the struct virtqueue we're talking about.
 *
 * This re-enables callbacks; it returns "false" if there are pending
 * buffers in the queue, to detect a possible race between the driver
 * checking for more work, and enabling callbacks.
 *
 * Caller must ensure we don't call this with other virtqueue
 * operations at the same time (except where noted).
 */
bool virtqueue_enable_cb(struct virtqueue *_vq)
{
        unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);

        return !virtqueue_poll(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);

/**
 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
 * @_vq: the struct virtqueue we're talking about.
 *
 * This re-enables callbacks but hints to the other side to delay
 * interrupts until most of the available buffers have been processed;
 * it returns "false" if there are many pending buffers in the queue,
 * to detect a possible race between the driver checking for more work,
 * and enabling callbacks.
 *
 * Caller must ensure we don't call this with other virtqueue
 * operations at the same time (except where noted).
 */
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->event_triggered)
                vq->event_triggered = false;

        return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
                                 virtqueue_enable_cb_delayed_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);

/**
 * virtqueue_detach_unused_buf - detach first unused buffer
 * @_vq: the struct virtqueue we're talking about.
 *
 * Returns NULL or the "data" token handed to virtqueue_add_*().
 * This is not valid on an active queue; it is useful for device
 * shutdown or the reset queue.
 */
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
                                 virtqueue_detach_unused_buf_split(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);

static inline bool more_used(const struct vring_virtqueue *vq)
{
        return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
}

/**
 * vring_interrupt - notify a virtqueue on an interrupt
 * @irq: the IRQ number (ignored)
 * @_vq: the struct virtqueue to notify
 *
 * Calls the callback function of @_vq to process the virtqueue
 * notification.
 */
irqreturn_t vring_interrupt(int irq, void *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!more_used(vq)) {
                pr_debug("virtqueue interrupt with no work for %p\n", vq);
                return IRQ_NONE;
        }

        if (unlikely(vq->broken)) {
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
                dev_warn_once(&vq->vq.vdev->dev,
                              "virtio vring IRQ raised before DRIVER_OK");
                return IRQ_NONE;
#else
                return IRQ_HANDLED;
#endif
        }

        /* Just a hint for performance: so it's ok that this can be racy! */
        if (vq->event)
                vq->event_triggered = true;

        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
        if (vq->vq.callback)
                vq->vq.callback(&vq->vq);

        return IRQ_HANDLED;
}
EXPORT_SYMBOL_GPL(vring_interrupt);

/* Only available for split ring */
static struct virtqueue *__vring_new_virtqueue(unsigned int index,
                                               struct vring_virtqueue_split *vring_split,
                                               struct virtio_device *vdev,
                                               bool weak_barriers,
                                               bool context,
                                               bool (*notify)(struct virtqueue *),
                                               void (*callback)(struct virtqueue *),
                                               const char *name,
                                               struct device *dma_dev)
{
        struct vring_virtqueue *vq;
        int err;

        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return NULL;

        vq = kmalloc(sizeof(*vq), GFP_KERNEL);
        if (!vq)
                return NULL;

        vq->packed_ring = false;
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
        vq->vq.index = index;
        vq->vq.reset = false;
        vq->we_own_ring = false;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
        vq->broken = true;
#else
        vq->broken = false;
#endif
        vq->dma_dev = dma_dev;
        vq->use_dma_api = vring_use_dma_api(vdev);
        vq->premapped = false;
        vq->do_unmap = vq->use_dma_api;

        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
                !context;
        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);

        if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
                vq->weak_barriers = false;

        err = vring_alloc_state_extra_split(vring_split);
        if (err) {
                kfree(vq);
                return NULL;
        }

        virtqueue_vring_init_split(vring_split, vq);

        virtqueue_init(vq, vring_split->vring.num);
        virtqueue_vring_attach_split(vq, vring_split);

        spin_lock(&vdev->vqs_list_lock);
        list_add_tail(&vq->vq.list, &vdev->vqs);
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;
}

struct virtqueue *vring_create_virtqueue(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name)
{

        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return vring_create_virtqueue_packed(index, num, vring_align,
                                vdev, weak_barriers, may_reduce_num,
                                context, notify, callback, name, vdev->dev.parent);

        return vring_create_virtqueue_split(index, num, vring_align,
                        vdev, weak_barriers, may_reduce_num,
                        context, notify, callback, name, vdev->dev.parent);
}
EXPORT_SYMBOL_GPL(vring_create_virtqueue);

struct virtqueue *vring_create_virtqueue_dma(
        unsigned int index,
        unsigned int num,
        unsigned int vring_align,
        struct virtio_device *vdev,
        bool weak_barriers,
        bool may_reduce_num,
        bool context,
        bool (*notify)(struct virtqueue *),
        void (*callback)(struct virtqueue *),
        const char *name,
        struct device *dma_dev)
{

        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return vring_create_virtqueue_packed(index, num, vring_align,
                                vdev, weak_barriers, may_reduce_num,
                                context, notify, callback, name, dma_dev);

        return vring_create_virtqueue_split(index, num, vring_align,
                        vdev, weak_barriers, may_reduce_num,
                        context, notify, callback, name, dma_dev);
}
EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);

/**
 * virtqueue_resize - resize the vring of vq
 * @_vq: the struct virtqueue we're talking about.
 * @num: new ring num
 * @recycle: callback to recycle unused buffers
 *
 * When it is really necessary to create a new vring, it will set the current vq
 * into the reset state. Then call the passed callback to recycle the buffer
 * that is no longer used. Only after the new vring is successfully created, the
 * old vring will be released.
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error.
 * 0: success.
 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
 *  vq can still work normally
 * -EBUSY: Failed to sync with device, vq may not work properly
 * -ENOENT: Transport or device not supported
 * -E2BIG/-EINVAL: num error
 * -EPERM: Operation not permitted
 *
 */
int virtqueue_resize(struct virtqueue *_vq, u32 num,
                     void (*recycle)(struct virtqueue *vq, void *buf))
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        int err;

        if (num > vq->vq.num_max)
                return -E2BIG;

        if (!num)
                return -EINVAL;

        if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
                return 0;

        err = virtqueue_disable_and_recycle(_vq, recycle);
        if (err)
                return err;

        if (vq->packed_ring)
                err = virtqueue_resize_packed(_vq, num);
        else
                err = virtqueue_resize_split(_vq, num);

        return virtqueue_enable_after_reset(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_resize);

/**
 * virtqueue_set_dma_premapped - set the vring premapped mode
 * @_vq: the struct virtqueue we're talking about.
 *
 * Enable the premapped mode of the vq.
 *
 * The vring in premapped mode does not do dma internally, so the driver must
 * do dma mapping in advance. The driver must pass the dma_address through
 * dma_address of scatterlist. When the driver got a used buffer from
 * the vring, it has to unmap the dma address.
 *
 * This function must be called immediately after creating the vq, or after vq
 * reset, and before adding any buffers to it.
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error.
 * 0: success.
 * -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
 */
int virtqueue_set_dma_premapped(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u32 num;

        START_USE(vq);

        num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;

        if (num != vq->vq.num_free) {
                END_USE(vq);
                return -EINVAL;
        }

        if (!vq->use_dma_api) {
                END_USE(vq);
                return -EINVAL;
        }

        vq->premapped = true;
        vq->do_unmap = false;

        END_USE(vq);

        return 0;
}
EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);

/**
 * virtqueue_reset - detach and recycle all unused buffers
 * @_vq: the struct virtqueue we're talking about.
 * @recycle: callback to recycle unused buffers
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
 *
 * Returns zero or a negative error.
 * 0: success.
 * -EBUSY: Failed to sync with device, vq may not work properly
 * -ENOENT: Transport or device not supported
 * -EPERM: Operation not permitted
 */
int virtqueue_reset(struct virtqueue *_vq,
                    void (*recycle)(struct virtqueue *vq, void *buf))
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        int err;

        err = virtqueue_disable_and_recycle(_vq, recycle);
        if (err)
                return err;

        if (vq->packed_ring)
                virtqueue_reinit_packed(vq);
        else
                virtqueue_reinit_split(vq);

        return virtqueue_enable_after_reset(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_reset);

/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int num,
                                      unsigned int vring_align,
                                      struct virtio_device *vdev,
                                      bool weak_barriers,
                                      bool context,
                                      void *pages,
                                      bool (*notify)(struct virtqueue *vq),
                                      void (*callback)(struct virtqueue *vq),
                                      const char *name)
{
        struct vring_virtqueue_split vring_split = {};

        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
                return NULL;

        vring_init(&vring_split.vring, num, pages, vring_align);
        return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
                                     context, notify, callback, name,
                                     vdev->dev.parent);
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue);

static void vring_free(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (vq->we_own_ring) {
                if (vq->packed_ring) {
                        vring_free_queue(vq->vq.vdev,
                                         vq->packed.ring_size_in_bytes,
                                         vq->packed.vring.desc,
                                         vq->packed.ring_dma_addr,
                                         vring_dma_dev(vq));

                        vring_free_queue(vq->vq.vdev,
                                         vq->packed.event_size_in_bytes,
                                         vq->packed.vring.driver,
                                         vq->packed.driver_event_dma_addr,
                                         vring_dma_dev(vq));

                        vring_free_queue(vq->vq.vdev,
                                         vq->packed.event_size_in_bytes,
                                         vq->packed.vring.device,
                                         vq->packed.device_event_dma_addr,
                                         vring_dma_dev(vq));

                        kfree(vq->packed.desc_state);
                        kfree(vq->packed.desc_extra);
                } else {
                        vring_free_queue(vq->vq.vdev,
                                         vq->split.queue_size_in_bytes,
                                         vq->split.vring.desc,
                                         vq->split.queue_dma_addr,
                                         vring_dma_dev(vq));
                }
        }
        if (!vq->packed_ring) {
                kfree(vq->split.desc_state);
                kfree(vq->split.desc_extra);
        }
}

void vring_del_virtqueue(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        spin_lock(&vq->vq.vdev->vqs_list_lock);
        list_del(&_vq->list);
        spin_unlock(&vq->vq.vdev->vqs_list_lock);

        vring_free(_vq);

        kfree(vq);
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);

u32 vring_notification_data(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        u16 next;

        if (vq->packed_ring)
                next = (vq->packed.next_avail_idx &
                                ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
                        vq->packed.avail_wrap_counter <<
                                VRING_PACKED_EVENT_F_WRAP_CTR;
        else
                next = vq->split.avail_idx_shadow;

        return next << 16 | _vq->index;
}
EXPORT_SYMBOL_GPL(vring_notification_data);

/* Manipulates transport-specific feature bits. */
void vring_transport_features(struct virtio_device *vdev)
{
        unsigned int i;

        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
                switch (i) {
                case VIRTIO_RING_F_INDIRECT_DESC:
                        break;
                case VIRTIO_RING_F_EVENT_IDX:
                        break;
                case VIRTIO_F_VERSION_1:
                        break;
                case VIRTIO_F_ACCESS_PLATFORM:
                        break;
                case VIRTIO_F_RING_PACKED:
                        break;
                case VIRTIO_F_ORDER_PLATFORM:
                        break;
                case VIRTIO_F_NOTIFICATION_DATA:
                        break;
                default:
                        /* We don't understand this bit. */
                        __virtio_clear_bit(vdev, i);
                }
        }
}
EXPORT_SYMBOL_GPL(vring_transport_features);

/**
 * virtqueue_get_vring_size - return the size of the virtqueue's vring
 * @_vq: the struct virtqueue containing the vring of interest.
 *
 * Returns the size of the vring.  This is mainly used for boasting to
 * userspace.  Unlike other operations, this need not be serialized.
 */
unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
{

        const struct vring_virtqueue *vq = to_vvq(_vq);

        return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);

/*
 * This function should only be called by the core, not directly by the driver.
 */
void __virtqueue_break(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
        WRITE_ONCE(vq->broken, true);
}
EXPORT_SYMBOL_GPL(__virtqueue_break);

/*
 * This function should only be called by the core, not directly by the driver.
 */
void __virtqueue_unbreak(struct virtqueue *_vq)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
        WRITE_ONCE(vq->broken, false);
}
EXPORT_SYMBOL_GPL(__virtqueue_unbreak);

bool virtqueue_is_broken(const struct virtqueue *_vq)
{
        const struct vring_virtqueue *vq = to_vvq(_vq);

        return READ_ONCE(vq->broken);
}
EXPORT_SYMBOL_GPL(virtqueue_is_broken);

/*
 * This should prevent the device from being used, allowing drivers to
 * recover.  You may need to grab appropriate locks to flush.
 */
void virtio_break_device(struct virtio_device *dev)
{
        struct virtqueue *_vq;

        spin_lock(&dev->vqs_list_lock);
        list_for_each_entry(_vq, &dev->vqs, list) {
                struct vring_virtqueue *vq = to_vvq(_vq);

                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
                WRITE_ONCE(vq->broken, true);
        }
        spin_unlock(&dev->vqs_list_lock);
}
EXPORT_SYMBOL_GPL(virtio_break_device);

/*
 * This should allow the device to be used by the driver. You may
 * need to grab appropriate locks to flush the write to
 * vq->broken. This should only be used in some specific case e.g
 * (probing and restoring). This function should only be called by the
 * core, not directly by the driver.
 */
void __virtio_unbreak_device(struct virtio_device *dev)
{
        struct virtqueue *_vq;

        spin_lock(&dev->vqs_list_lock);
        list_for_each_entry(_vq, &dev->vqs, list) {
                struct vring_virtqueue *vq = to_vvq(_vq);

                /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
                WRITE_ONCE(vq->broken, false);
        }
        spin_unlock(&dev->vqs_list_lock);
}
EXPORT_SYMBOL_GPL(__virtio_unbreak_device);

dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
{
        const struct vring_virtqueue *vq = to_vvq(_vq);

        BUG_ON(!vq->we_own_ring);

        if (vq->packed_ring)
                return vq->packed.ring_dma_addr;

        return vq->split.queue_dma_addr;
}
EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);

dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
{
        const struct vring_virtqueue *vq = to_vvq(_vq);

        BUG_ON(!vq->we_own_ring);

        if (vq->packed_ring)
                return vq->packed.driver_event_dma_addr;

        return vq->split.queue_dma_addr +
                ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
}
EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);

dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
{
        const struct vring_virtqueue *vq = to_vvq(_vq);

        BUG_ON(!vq->we_own_ring);

        if (vq->packed_ring)
                return vq->packed.device_event_dma_addr;

        return vq->split.queue_dma_addr +
                ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
}
EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);

/* Only available for split ring */
const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
{
        return &to_vvq(vq)->split.vring;
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring);

/**
 * virtqueue_dma_map_single_attrs - map DMA for _vq
 * @_vq: the struct virtqueue we're talking about.
 * @ptr: the pointer of the buffer to do dma
 * @size: the size of the buffer to do dma
 * @dir: DMA direction
 * @attrs: DMA Attrs
 *
 * The caller calls this to do dma mapping in advance. The DMA address can be
 * passed to this _vq when it is in pre-mapped mode.
 *
 * return DMA address. Caller should check that by virtqueue_dma_mapping_error().
 */
dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr,
                                          size_t size,
                                          enum dma_data_direction dir,
                                          unsigned long attrs)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!vq->use_dma_api)
                return (dma_addr_t)virt_to_phys(ptr);

        return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs);

/**
 * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq
 * @_vq: the struct virtqueue we're talking about.
 * @addr: the dma address to unmap
 * @size: the size of the buffer
 * @dir: DMA direction
 * @attrs: DMA Attrs
 *
 * Unmap the address that is mapped by the virtqueue_dma_map_* APIs.
 *
 */
void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
                                      size_t size, enum dma_data_direction dir,
                                      unsigned long attrs)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!vq->use_dma_api)
                return;

        dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs);

/**
 * virtqueue_dma_mapping_error - check dma address
 * @_vq: the struct virtqueue we're talking about.
 * @addr: DMA address
 *
 * Returns 0 means dma valid. Other means invalid dma address.
 */
int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!vq->use_dma_api)
                return 0;

        return dma_mapping_error(vring_dma_dev(vq), addr);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error);

/**
 * virtqueue_dma_need_sync - check a dma address needs sync
 * @_vq: the struct virtqueue we're talking about.
 * @addr: DMA address
 *
 * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be
 * synchronized
 *
 * return bool
 */
bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr)
{
        struct vring_virtqueue *vq = to_vvq(_vq);

        if (!vq->use_dma_api)
                return false;

        return dma_need_sync(vring_dma_dev(vq), addr);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync);

/**
 * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu
 * @_vq: the struct virtqueue we're talking about.
 * @addr: DMA address
 * @offset: DMA address offset
 * @size: buf size for sync
 * @dir: DMA direction
 *
 * Before calling this function, use virtqueue_dma_need_sync() to confirm that
 * the DMA address really needs to be synchronized
 *
 */
void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq,
                                             dma_addr_t addr,
                                             unsigned long offset, size_t size,
                                             enum dma_data_direction dir)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct device *dev = vring_dma_dev(vq);

        if (!vq->use_dma_api)
                return;

        dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu);

/**
 * virtqueue_dma_sync_single_range_for_device - dma sync for device
 * @_vq: the struct virtqueue we're talking about.
 * @addr: DMA address
 * @offset: DMA address offset
 * @size: buf size for sync
 * @dir: DMA direction
 *
 * Before calling this function, use virtqueue_dma_need_sync() to confirm that
 * the DMA address really needs to be synchronized
 */
void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq,
                                                dma_addr_t addr,
                                                unsigned long offset, size_t size,
                                                enum dma_data_direction dir)
{
        struct vring_virtqueue *vq = to_vvq(_vq);
        struct device *dev = vring_dma_dev(vq);

        if (!vq->use_dma_api)
                return;

        dma_sync_single_range_for_device(dev, addr, offset, size, dir);
}
EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device);

MODULE_LICENSE("GPL");





































    1 


















































































































































































































































































































    1 

    1 



































































    1 
    1 










































    1 



    1 
    1 

    1 






    1 



































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Hardware dependent layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/major.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/sched/signal.h>
#include <sound/core.h>
#include <sound/control.h>
#include <sound/minors.h>
#include <sound/hwdep.h>
#include <sound/info.h>

MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("Hardware dependent layer");
MODULE_LICENSE("GPL");

static LIST_HEAD(snd_hwdep_devices);
static DEFINE_MUTEX(register_mutex);

static int snd_hwdep_dev_free(struct snd_device *device);
static int snd_hwdep_dev_register(struct snd_device *device);
static int snd_hwdep_dev_disconnect(struct snd_device *device);


static struct snd_hwdep *snd_hwdep_search(struct snd_card *card, int device)
{
        struct snd_hwdep *hwdep;

        list_for_each_entry(hwdep, &snd_hwdep_devices, list)
                if (hwdep->card == card && hwdep->device == device)
                        return hwdep;
        return NULL;
}

static loff_t snd_hwdep_llseek(struct file * file, loff_t offset, int orig)
{
        struct snd_hwdep *hw = file->private_data;
        if (hw->ops.llseek)
                return hw->ops.llseek(hw, file, offset, orig);
        return -ENXIO;
}

static ssize_t snd_hwdep_read(struct file * file, char __user *buf,
                              size_t count, loff_t *offset)
{
        struct snd_hwdep *hw = file->private_data;
        if (hw->ops.read)
                return hw->ops.read(hw, buf, count, offset);
        return -ENXIO;        
}

static ssize_t snd_hwdep_write(struct file * file, const char __user *buf,
                               size_t count, loff_t *offset)
{
        struct snd_hwdep *hw = file->private_data;
        if (hw->ops.write)
                return hw->ops.write(hw, buf, count, offset);
        return -ENXIO;        
}

static int snd_hwdep_open(struct inode *inode, struct file * file)
{
        int major = imajor(inode);
        struct snd_hwdep *hw;
        int err;
        wait_queue_entry_t wait;

        if (major == snd_major) {
                hw = snd_lookup_minor_data(iminor(inode),
                                           SNDRV_DEVICE_TYPE_HWDEP);
#ifdef CONFIG_SND_OSSEMUL
        } else if (major == SOUND_MAJOR) {
                hw = snd_lookup_oss_minor_data(iminor(inode),
                                               SNDRV_OSS_DEVICE_TYPE_DMFM);
#endif
        } else
                return -ENXIO;
        if (hw == NULL)
                return -ENODEV;

        if (!try_module_get(hw->card->module)) {
                snd_card_unref(hw->card);
                return -EFAULT;
        }

        init_waitqueue_entry(&wait, current);
        add_wait_queue(&hw->open_wait, &wait);
        mutex_lock(&hw->open_mutex);
        while (1) {
                if (hw->exclusive && hw->used > 0) {
                        err = -EBUSY;
                        break;
                }
                if (!hw->ops.open) {
                        err = 0;
                        break;
                }
                err = hw->ops.open(hw, file);
                if (err >= 0)
                        break;
                if (err == -EAGAIN) {
                        if (file->f_flags & O_NONBLOCK) {
                                err = -EBUSY;
                                break;
                        }
                } else
                        break;
                set_current_state(TASK_INTERRUPTIBLE);
                mutex_unlock(&hw->open_mutex);
                schedule();
                mutex_lock(&hw->open_mutex);
                if (hw->card->shutdown) {
                        err = -ENODEV;
                        break;
                }
                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        break;
                }
        }
        remove_wait_queue(&hw->open_wait, &wait);
        if (err >= 0) {
                err = snd_card_file_add(hw->card, file);
                if (err >= 0) {
                        file->private_data = hw;
                        hw->used++;
                } else {
                        if (hw->ops.release)
                                hw->ops.release(hw, file);
                }
        }
        mutex_unlock(&hw->open_mutex);
        if (err < 0)
                module_put(hw->card->module);
        snd_card_unref(hw->card);
        return err;
}

static int snd_hwdep_release(struct inode *inode, struct file * file)
{
        int err = 0;
        struct snd_hwdep *hw = file->private_data;
        struct module *mod = hw->card->module;

        scoped_guard(mutex, &hw->open_mutex) {
                if (hw->ops.release)
                        err = hw->ops.release(hw, file);
                if (hw->used > 0)
                        hw->used--;
        }
        wake_up(&hw->open_wait);

        snd_card_file_remove(hw->card, file);
        module_put(mod);
        return err;
}

static __poll_t snd_hwdep_poll(struct file * file, poll_table * wait)
{
        struct snd_hwdep *hw = file->private_data;
        if (hw->ops.poll)
                return hw->ops.poll(hw, file, wait);
        return 0;
}

static int snd_hwdep_info(struct snd_hwdep *hw,
                          struct snd_hwdep_info __user *_info)
{
        struct snd_hwdep_info info;
        
        memset(&info, 0, sizeof(info));
        info.card = hw->card->number;
        strscpy(info.id, hw->id, sizeof(info.id));
        strscpy(info.name, hw->name, sizeof(info.name));
        info.iface = hw->iface;
        if (copy_to_user(_info, &info, sizeof(info)))
                return -EFAULT;
        return 0;
}

static int snd_hwdep_dsp_status(struct snd_hwdep *hw,
                                struct snd_hwdep_dsp_status __user *_info)
{
        struct snd_hwdep_dsp_status info;
        int err;
        
        if (! hw->ops.dsp_status)
                return -ENXIO;
        memset(&info, 0, sizeof(info));
        info.dsp_loaded = hw->dsp_loaded;
        err = hw->ops.dsp_status(hw, &info);
        if (err < 0)
                return err;
        if (copy_to_user(_info, &info, sizeof(info)))
                return -EFAULT;
        return 0;
}

static int snd_hwdep_dsp_load(struct snd_hwdep *hw,
                              struct snd_hwdep_dsp_image *info)
{
        int err;
        
        if (! hw->ops.dsp_load)
                return -ENXIO;
        if (info->index >= 32)
                return -EINVAL;
        /* check whether the dsp was already loaded */
        if (hw->dsp_loaded & (1u << info->index))
                return -EBUSY;
        err = hw->ops.dsp_load(hw, info);
        if (err < 0)
                return err;
        hw->dsp_loaded |= (1u << info->index);
        return 0;
}

static int snd_hwdep_dsp_load_user(struct snd_hwdep *hw,
                                   struct snd_hwdep_dsp_image __user *_info)
{
        struct snd_hwdep_dsp_image info = {};

        if (copy_from_user(&info, _info, sizeof(info)))
                return -EFAULT;
        return snd_hwdep_dsp_load(hw, &info);
}


static long snd_hwdep_ioctl(struct file * file, unsigned int cmd,
                            unsigned long arg)
{
        struct snd_hwdep *hw = file->private_data;
        void __user *argp = (void __user *)arg;
        switch (cmd) {
        case SNDRV_HWDEP_IOCTL_PVERSION:
                return put_user(SNDRV_HWDEP_VERSION, (int __user *)argp);
        case SNDRV_HWDEP_IOCTL_INFO:
                return snd_hwdep_info(hw, argp);
        case SNDRV_HWDEP_IOCTL_DSP_STATUS:
                return snd_hwdep_dsp_status(hw, argp);
        case SNDRV_HWDEP_IOCTL_DSP_LOAD:
                return snd_hwdep_dsp_load_user(hw, argp);
        }
        if (hw->ops.ioctl)
                return hw->ops.ioctl(hw, file, cmd, arg);
        return -ENOTTY;
}

static int snd_hwdep_mmap(struct file * file, struct vm_area_struct * vma)
{
        struct snd_hwdep *hw = file->private_data;
        if (hw->ops.mmap)
                return hw->ops.mmap(hw, file, vma);
        return -ENXIO;
}

static int snd_hwdep_control_ioctl(struct snd_card *card,
                                   struct snd_ctl_file * control,
                                   unsigned int cmd, unsigned long arg)
{
        switch (cmd) {
        case SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE:
                {
                        int device;

                        if (get_user(device, (int __user *)arg))
                                return -EFAULT;

                        scoped_guard(mutex, &register_mutex) {
                                if (device < 0)
                                        device = 0;
                                else if (device < SNDRV_MINOR_HWDEPS)
                                        device++;
                                else
                                        device = SNDRV_MINOR_HWDEPS;

                                while (device < SNDRV_MINOR_HWDEPS) {
                                        if (snd_hwdep_search(card, device))
                                                break;
                                        device++;
                                }
                                if (device >= SNDRV_MINOR_HWDEPS)
                                        device = -1;
                        }
                        if (put_user(device, (int __user *)arg))
                                return -EFAULT;
                        return 0;
                }
        case SNDRV_CTL_IOCTL_HWDEP_INFO:
                {
                        struct snd_hwdep_info __user *info = (struct snd_hwdep_info __user *)arg;
                        int device;
                        struct snd_hwdep *hwdep;

                        if (get_user(device, &info->device))
                                return -EFAULT;
                        scoped_guard(mutex, &register_mutex) {
                                hwdep = snd_hwdep_search(card, device);
                                if (!hwdep)
                                        return -ENXIO;
                                return snd_hwdep_info(hwdep, info);
                        }
                        break;
                }
        }
        return -ENOIOCTLCMD;
}

#ifdef CONFIG_COMPAT
#include "hwdep_compat.c"
#else
#define snd_hwdep_ioctl_compat        NULL
#endif

/*

 */

static const struct file_operations snd_hwdep_f_ops =
{
        .owner =         THIS_MODULE,
        .llseek =        snd_hwdep_llseek,
        .read =         snd_hwdep_read,
        .write =        snd_hwdep_write,
        .open =                snd_hwdep_open,
        .release =        snd_hwdep_release,
        .poll =                snd_hwdep_poll,
        .unlocked_ioctl =        snd_hwdep_ioctl,
        .compat_ioctl =        snd_hwdep_ioctl_compat,
        .mmap =                snd_hwdep_mmap,
};

static void snd_hwdep_free(struct snd_hwdep *hwdep)
{
        if (!hwdep)
                return;
        if (hwdep->private_free)
                hwdep->private_free(hwdep);
        put_device(hwdep->dev);
        kfree(hwdep);
}

/**
 * snd_hwdep_new - create a new hwdep instance
 * @card: the card instance
 * @id: the id string
 * @device: the device index (zero-based)
 * @rhwdep: the pointer to store the new hwdep instance
 *
 * Creates a new hwdep instance with the given index on the card.
 * The callbacks (hwdep->ops) must be set on the returned instance
 * after this call manually by the caller.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_hwdep_new(struct snd_card *card, char *id, int device,
                  struct snd_hwdep **rhwdep)
{
        struct snd_hwdep *hwdep;
        int err;
        static const struct snd_device_ops ops = {
                .dev_free = snd_hwdep_dev_free,
                .dev_register = snd_hwdep_dev_register,
                .dev_disconnect = snd_hwdep_dev_disconnect,
        };

        if (snd_BUG_ON(!card))
                return -ENXIO;
        if (rhwdep)
                *rhwdep = NULL;
        hwdep = kzalloc(sizeof(*hwdep), GFP_KERNEL);
        if (!hwdep)
                return -ENOMEM;

        init_waitqueue_head(&hwdep->open_wait);
        mutex_init(&hwdep->open_mutex);
        hwdep->card = card;
        hwdep->device = device;
        if (id)
                strscpy(hwdep->id, id, sizeof(hwdep->id));

        err = snd_device_alloc(&hwdep->dev, card);
        if (err < 0) {
                snd_hwdep_free(hwdep);
                return err;
        }

        dev_set_name(hwdep->dev, "hwC%iD%i", card->number, device);
#ifdef CONFIG_SND_OSSEMUL
        hwdep->oss_type = -1;
#endif

        err = snd_device_new(card, SNDRV_DEV_HWDEP, hwdep, &ops);
        if (err < 0) {
                snd_hwdep_free(hwdep);
                return err;
        }

        if (rhwdep)
                *rhwdep = hwdep;
        return 0;
}
EXPORT_SYMBOL(snd_hwdep_new);

static int snd_hwdep_dev_free(struct snd_device *device)
{
        snd_hwdep_free(device->device_data);
        return 0;
}

static int snd_hwdep_dev_register(struct snd_device *device)
{
        struct snd_hwdep *hwdep = device->device_data;
        struct snd_card *card = hwdep->card;
        int err;

        guard(mutex)(&register_mutex);
        if (snd_hwdep_search(card, hwdep->device))
                return -EBUSY;
        list_add_tail(&hwdep->list, &snd_hwdep_devices);
        err = snd_register_device(SNDRV_DEVICE_TYPE_HWDEP,
                                  hwdep->card, hwdep->device,
                                  &snd_hwdep_f_ops, hwdep, hwdep->dev);
        if (err < 0) {
                dev_err(hwdep->dev, "unable to register\n");
                list_del(&hwdep->list);
                return err;
        }

#ifdef CONFIG_SND_OSSEMUL
        hwdep->ossreg = 0;
        if (hwdep->oss_type >= 0) {
                if (hwdep->oss_type == SNDRV_OSS_DEVICE_TYPE_DMFM &&
                    hwdep->device)
                        dev_warn(hwdep->dev,
                                 "only hwdep device 0 can be registered as OSS direct FM device!\n");
                else if (snd_register_oss_device(hwdep->oss_type,
                                                 card, hwdep->device,
                                                 &snd_hwdep_f_ops, hwdep) < 0)
                        dev_warn(hwdep->dev,
                                 "unable to register OSS compatibility device\n");
                else
                        hwdep->ossreg = 1;
        }
#endif
        return 0;
}

static int snd_hwdep_dev_disconnect(struct snd_device *device)
{
        struct snd_hwdep *hwdep = device->device_data;

        if (snd_BUG_ON(!hwdep))
                return -ENXIO;
        guard(mutex)(&register_mutex);
        if (snd_hwdep_search(hwdep->card, hwdep->device) != hwdep)
                return -EINVAL;
        guard(mutex)(&hwdep->open_mutex);
        wake_up(&hwdep->open_wait);
#ifdef CONFIG_SND_OSSEMUL
        if (hwdep->ossreg)
                snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device);
#endif
        snd_unregister_device(hwdep->dev);
        list_del_init(&hwdep->list);
        return 0;
}

#ifdef CONFIG_SND_PROC_FS
/*
 *  Info interface
 */

static void snd_hwdep_proc_read(struct snd_info_entry *entry,
                                struct snd_info_buffer *buffer)
{
        struct snd_hwdep *hwdep;

        guard(mutex)(&register_mutex);
        list_for_each_entry(hwdep, &snd_hwdep_devices, list)
                snd_iprintf(buffer, "%02i-%02i: %s\n",
                            hwdep->card->number, hwdep->device, hwdep->name);
}

static struct snd_info_entry *snd_hwdep_proc_entry;

static void __init snd_hwdep_proc_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "hwdep", NULL);
        if (entry) {
                entry->c.text.read = snd_hwdep_proc_read;
                if (snd_info_register(entry) < 0) {
                        snd_info_free_entry(entry);
                        entry = NULL;
                }
        }
        snd_hwdep_proc_entry = entry;
}

static void __exit snd_hwdep_proc_done(void)
{
        snd_info_free_entry(snd_hwdep_proc_entry);
}
#else /* !CONFIG_SND_PROC_FS */
#define snd_hwdep_proc_init()
#define snd_hwdep_proc_done()
#endif /* CONFIG_SND_PROC_FS */


/*
 *  ENTRY functions
 */

static int __init alsa_hwdep_init(void)
{
        snd_hwdep_proc_init();
        snd_ctl_register_ioctl(snd_hwdep_control_ioctl);
        snd_ctl_register_ioctl_compat(snd_hwdep_control_ioctl);
        return 0;
}

static void __exit alsa_hwdep_exit(void)
{
        snd_ctl_unregister_ioctl(snd_hwdep_control_ioctl);
        snd_ctl_unregister_ioctl_compat(snd_hwdep_control_ioctl);
        snd_hwdep_proc_done();
}

module_init(alsa_hwdep_init)
module_exit(alsa_hwdep_exit)






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    7 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
/*
 * videobuf2-core.c - video buffer 2 core framework
 *
 * Copyright (C) 2010 Samsung Electronics
 *
 * Author: Pawel Osciak <pawel@osciak.com>
 *           Marek Szyprowski <m.szyprowski@samsung.com>
 *
 * The vb2_thread implementation was based on code from videobuf-dvb.c:
 *        (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SUSE Labs]
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/freezer.h>
#include <linux/kthread.h>

#include <media/videobuf2-core.h>
#include <media/v4l2-mc.h>

#include <trace/events/vb2.h>

#define PLANE_INDEX_BITS        3
#define PLANE_INDEX_SHIFT        (PAGE_SHIFT + PLANE_INDEX_BITS)
#define PLANE_INDEX_MASK        (BIT_MASK(PLANE_INDEX_BITS) - 1)
#define MAX_BUFFER_INDEX        BIT_MASK(30 - PLANE_INDEX_SHIFT)
#define BUFFER_INDEX_MASK        (MAX_BUFFER_INDEX - 1)

#if BIT(PLANE_INDEX_BITS) != VIDEO_MAX_PLANES
#error PLANE_INDEX_BITS order must be equal to VIDEO_MAX_PLANES
#endif

static int debug;
module_param(debug, int, 0644);

#define dprintk(q, level, fmt, arg...)                                        \
        do {                                                                \
                if (debug >= level)                                        \
                        pr_info("[%s] %s: " fmt, (q)->name, __func__,        \
                                ## arg);                                \
        } while (0)

#ifdef CONFIG_VIDEO_ADV_DEBUG

/*
 * If advanced debugging is on, then count how often each op is called
 * successfully, which can either be per-buffer or per-queue.
 *
 * This makes it easy to check that the 'init' and 'cleanup'
 * (and variations thereof) stay balanced.
 */

#define log_memop(vb, op)                                                \
        dprintk((vb)->vb2_queue, 2, "call_memop(%d, %s)%s\n",                \
                (vb)->index, #op,                                        \
                (vb)->vb2_queue->mem_ops->op ? "" : " (nop)")

#define call_memop(vb, op, args...)                                        \
({                                                                        \
        struct vb2_queue *_q = (vb)->vb2_queue;                                \
        int err;                                                        \
                                                                        \
        log_memop(vb, op);                                                \
        err = _q->mem_ops->op ? _q->mem_ops->op(args) : 0;                \
        if (!err)                                                        \
                (vb)->cnt_mem_ ## op++;                                        \
        err;                                                                \
})

#define call_ptr_memop(op, vb, args...)                                        \
({                                                                        \
        struct vb2_queue *_q = (vb)->vb2_queue;                                \
        void *ptr;                                                        \
                                                                        \
        log_memop(vb, op);                                                \
        ptr = _q->mem_ops->op ? _q->mem_ops->op(vb, args) : NULL;        \
        if (!IS_ERR_OR_NULL(ptr))                                        \
                (vb)->cnt_mem_ ## op++;                                        \
        ptr;                                                                \
})

#define call_void_memop(vb, op, args...)                                \
({                                                                        \
        struct vb2_queue *_q = (vb)->vb2_queue;                                \
                                                                        \
        log_memop(vb, op);                                                \
        if (_q->mem_ops->op)                                                \
                _q->mem_ops->op(args);                                        \
        (vb)->cnt_mem_ ## op++;                                                \
})

#define log_qop(q, op)                                                        \
        dprintk(q, 2, "call_qop(%s)%s\n", #op,                                \
                (q)->ops->op ? "" : " (nop)")

#define call_qop(q, op, args...)                                        \
({                                                                        \
        int err;                                                        \
                                                                        \
        log_qop(q, op);                                                        \
        err = (q)->ops->op ? (q)->ops->op(args) : 0;                        \
        if (!err)                                                        \
                (q)->cnt_ ## op++;                                        \
        err;                                                                \
})

#define call_void_qop(q, op, args...)                                        \
({                                                                        \
        log_qop(q, op);                                                        \
        if ((q)->ops->op)                                                \
                (q)->ops->op(args);                                        \
        (q)->cnt_ ## op++;                                                \
})

#define log_vb_qop(vb, op, args...)                                        \
        dprintk((vb)->vb2_queue, 2, "call_vb_qop(%d, %s)%s\n",                \
                (vb)->index, #op,                                        \
                (vb)->vb2_queue->ops->op ? "" : " (nop)")

#define call_vb_qop(vb, op, args...)                                        \
({                                                                        \
        int err;                                                        \
                                                                        \
        log_vb_qop(vb, op);                                                \
        err = (vb)->vb2_queue->ops->op ?                                \
                (vb)->vb2_queue->ops->op(args) : 0;                        \
        if (!err)                                                        \
                (vb)->cnt_ ## op++;                                        \
        err;                                                                \
})

#define call_void_vb_qop(vb, op, args...)                                \
({                                                                        \
        log_vb_qop(vb, op);                                                \
        if ((vb)->vb2_queue->ops->op)                                        \
                (vb)->vb2_queue->ops->op(args);                                \
        (vb)->cnt_ ## op++;                                                \
})

#else

#define call_memop(vb, op, args...)                                        \
        ((vb)->vb2_queue->mem_ops->op ?                                        \
                (vb)->vb2_queue->mem_ops->op(args) : 0)

#define call_ptr_memop(op, vb, args...)                                        \
        ((vb)->vb2_queue->mem_ops->op ?                                        \
                (vb)->vb2_queue->mem_ops->op(vb, args) : NULL)

#define call_void_memop(vb, op, args...)                                \
        do {                                                                \
                if ((vb)->vb2_queue->mem_ops->op)                        \
                        (vb)->vb2_queue->mem_ops->op(args);                \
        } while (0)

#define call_qop(q, op, args...)                                        \
        ((q)->ops->op ? (q)->ops->op(args) : 0)

#define call_void_qop(q, op, args...)                                        \
        do {                                                                \
                if ((q)->ops->op)                                        \
                        (q)->ops->op(args);                                \
        } while (0)

#define call_vb_qop(vb, op, args...)                                        \
        ((vb)->vb2_queue->ops->op ? (vb)->vb2_queue->ops->op(args) : 0)

#define call_void_vb_qop(vb, op, args...)                                \
        do {                                                                \
                if ((vb)->vb2_queue->ops->op)                                \
                        (vb)->vb2_queue->ops->op(args);                        \
        } while (0)

#endif

#define call_bufop(q, op, args...)                                        \
({                                                                        \
        int ret = 0;                                                        \
        if (q && q->buf_ops && q->buf_ops->op)                                \
                ret = q->buf_ops->op(args);                                \
        ret;                                                                \
})

#define call_void_bufop(q, op, args...)                                        \
({                                                                        \
        if (q && q->buf_ops && q->buf_ops->op)                                \
                q->buf_ops->op(args);                                        \
})

static void __vb2_queue_cancel(struct vb2_queue *q);
static void __enqueue_in_driver(struct vb2_buffer *vb);

static const char *vb2_state_name(enum vb2_buffer_state s)
{
        static const char * const state_names[] = {
                [VB2_BUF_STATE_DEQUEUED] = "dequeued",
                [VB2_BUF_STATE_IN_REQUEST] = "in request",
                [VB2_BUF_STATE_PREPARING] = "preparing",
                [VB2_BUF_STATE_QUEUED] = "queued",
                [VB2_BUF_STATE_ACTIVE] = "active",
                [VB2_BUF_STATE_DONE] = "done",
                [VB2_BUF_STATE_ERROR] = "error",
        };

        if ((unsigned int)(s) < ARRAY_SIZE(state_names))
                return state_names[s];
        return "unknown";
}

/*
 * __vb2_buf_mem_alloc() - allocate video memory for the given buffer
 */
static int __vb2_buf_mem_alloc(struct vb2_buffer *vb)
{
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        int plane;
        int ret = -ENOMEM;

        /*
         * Allocate memory for all planes in this buffer
         * NOTE: mmapped areas should be page aligned
         */
        for (plane = 0; plane < vb->num_planes; ++plane) {
                /* Memops alloc requires size to be page aligned. */
                unsigned long size = PAGE_ALIGN(vb->planes[plane].length);

                /* Did it wrap around? */
                if (size < vb->planes[plane].length)
                        goto free;

                mem_priv = call_ptr_memop(alloc,
                                          vb,
                                          q->alloc_devs[plane] ? : q->dev,
                                          size);
                if (IS_ERR_OR_NULL(mem_priv)) {
                        if (mem_priv)
                                ret = PTR_ERR(mem_priv);
                        goto free;
                }

                /* Associate allocator private data with this plane */
                vb->planes[plane].mem_priv = mem_priv;
        }

        return 0;
free:
        /* Free already allocated memory if one of the allocations failed */
        for (; plane > 0; --plane) {
                call_void_memop(vb, put, vb->planes[plane - 1].mem_priv);
                vb->planes[plane - 1].mem_priv = NULL;
        }

        return ret;
}

/*
 * __vb2_buf_mem_free() - free memory of the given buffer
 */
static void __vb2_buf_mem_free(struct vb2_buffer *vb)
{
        unsigned int plane;

        for (plane = 0; plane < vb->num_planes; ++plane) {
                call_void_memop(vb, put, vb->planes[plane].mem_priv);
                vb->planes[plane].mem_priv = NULL;
                dprintk(vb->vb2_queue, 3, "freed plane %d of buffer %d\n",
                        plane, vb->index);
        }
}

/*
 * __vb2_buf_userptr_put() - release userspace memory associated with
 * a USERPTR buffer
 */
static void __vb2_buf_userptr_put(struct vb2_buffer *vb)
{
        unsigned int plane;

        for (plane = 0; plane < vb->num_planes; ++plane) {
                if (vb->planes[plane].mem_priv)
                        call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv);
                vb->planes[plane].mem_priv = NULL;
        }
}

/*
 * __vb2_plane_dmabuf_put() - release memory associated with
 * a DMABUF shared plane
 */
static void __vb2_plane_dmabuf_put(struct vb2_buffer *vb, struct vb2_plane *p)
{
        if (!p->mem_priv)
                return;

        if (p->dbuf_mapped)
                call_void_memop(vb, unmap_dmabuf, p->mem_priv);

        call_void_memop(vb, detach_dmabuf, p->mem_priv);
        dma_buf_put(p->dbuf);
        p->mem_priv = NULL;
        p->dbuf = NULL;
        p->dbuf_mapped = 0;
}

/*
 * __vb2_buf_dmabuf_put() - release memory associated with
 * a DMABUF shared buffer
 */
static void __vb2_buf_dmabuf_put(struct vb2_buffer *vb)
{
        unsigned int plane;

        for (plane = 0; plane < vb->num_planes; ++plane)
                __vb2_plane_dmabuf_put(vb, &vb->planes[plane]);
}

/*
 * __vb2_buf_mem_prepare() - call ->prepare() on buffer's private memory
 * to sync caches
 */
static void __vb2_buf_mem_prepare(struct vb2_buffer *vb)
{
        unsigned int plane;

        if (vb->synced)
                return;

        vb->synced = 1;
        for (plane = 0; plane < vb->num_planes; ++plane)
                call_void_memop(vb, prepare, vb->planes[plane].mem_priv);
}

/*
 * __vb2_buf_mem_finish() - call ->finish on buffer's private memory
 * to sync caches
 */
static void __vb2_buf_mem_finish(struct vb2_buffer *vb)
{
        unsigned int plane;

        if (!vb->synced)
                return;

        vb->synced = 0;
        for (plane = 0; plane < vb->num_planes; ++plane)
                call_void_memop(vb, finish, vb->planes[plane].mem_priv);
}

/*
 * __setup_offsets() - setup unique offsets ("cookies") for every plane in
 * the buffer.
 */
static void __setup_offsets(struct vb2_buffer *vb)
{
        struct vb2_queue *q = vb->vb2_queue;
        unsigned int plane;
        unsigned long offset = 0;

        /*
         * The offset "cookie" value has the following constraints:
         * - a buffer can have up to 8 planes.
         * - v4l2 mem2mem uses bit 30 to distinguish between
         *   OUTPUT (aka "source", bit 30 is 0) and
         *   CAPTURE (aka "destination", bit 30 is 1) buffers.
         * - must be page aligned
         * That led to this bit mapping when PAGE_SHIFT = 12:
         * |30                |29        15|14       12|11 0|
         * |DST_QUEUE_OFF_BASE|buffer index|plane index| 0  |
         * where there are 15 bits to store the buffer index.
         * Depending on PAGE_SHIFT value we can have fewer bits
         * to store the buffer index.
         */
        offset = vb->index << PLANE_INDEX_SHIFT;

        for (plane = 0; plane < vb->num_planes; ++plane) {
                vb->planes[plane].m.offset = offset + (plane << PAGE_SHIFT);

                dprintk(q, 3, "buffer %d, plane %d offset 0x%08lx\n",
                                vb->index, plane, offset);
        }
}

static void init_buffer_cache_hints(struct vb2_queue *q, struct vb2_buffer *vb)
{
        /*
         * DMA exporter should take care of cache syncs, so we can avoid
         * explicit ->prepare()/->finish() syncs. For other ->memory types
         * we always need ->prepare() or/and ->finish() cache sync.
         */
        if (q->memory == VB2_MEMORY_DMABUF) {
                vb->skip_cache_sync_on_finish = 1;
                vb->skip_cache_sync_on_prepare = 1;
                return;
        }

        /*
         * ->finish() cache sync can be avoided when queue direction is
         * TO_DEVICE.
         */
        if (q->dma_dir == DMA_TO_DEVICE)
                vb->skip_cache_sync_on_finish = 1;
}

/**
 * vb2_queue_add_buffer() - add a buffer to a queue
 * @q:        pointer to &struct vb2_queue with videobuf2 queue.
 * @vb:        pointer to &struct vb2_buffer to be added to the queue.
 * @index: index where add vb2_buffer in the queue
 */
static void vb2_queue_add_buffer(struct vb2_queue *q, struct vb2_buffer *vb, unsigned int index)
{
        WARN_ON(index >= q->max_num_buffers || q->bufs[index] || vb->vb2_queue);

        q->bufs[index] = vb;
        vb->index = index;
        vb->vb2_queue = q;
}

/**
 * vb2_queue_remove_buffer() - remove a buffer from a queue
 * @vb:        pointer to &struct vb2_buffer to be removed from the queue.
 */
static void vb2_queue_remove_buffer(struct vb2_buffer *vb)
{
        vb->vb2_queue->bufs[vb->index] = NULL;
        vb->vb2_queue = NULL;
}

/*
 * __vb2_queue_alloc() - allocate vb2 buffer structures and (for MMAP type)
 * video buffer memory for all buffers/planes on the queue and initializes the
 * queue
 *
 * Returns the number of buffers successfully allocated.
 */
static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory,
                             unsigned int num_buffers, unsigned int num_planes,
                             const unsigned plane_sizes[VB2_MAX_PLANES])
{
        unsigned int q_num_buffers = vb2_get_num_buffers(q);
        unsigned int buffer, plane;
        struct vb2_buffer *vb;
        int ret;

        /*
         * Ensure that the number of already queue + the number of buffers already
         * in the queue is below q->max_num_buffers
         */
        num_buffers = min_t(unsigned int, num_buffers,
                            q->max_num_buffers - q_num_buffers);

        for (buffer = 0; buffer < num_buffers; ++buffer) {
                /* Allocate vb2 buffer structures */
                vb = kzalloc(q->buf_struct_size, GFP_KERNEL);
                if (!vb) {
                        dprintk(q, 1, "memory alloc for buffer struct failed\n");
                        break;
                }

                vb->state = VB2_BUF_STATE_DEQUEUED;
                vb->num_planes = num_planes;
                vb->type = q->type;
                vb->memory = memory;
                init_buffer_cache_hints(q, vb);
                for (plane = 0; plane < num_planes; ++plane) {
                        vb->planes[plane].length = plane_sizes[plane];
                        vb->planes[plane].min_length = plane_sizes[plane];
                }

                vb2_queue_add_buffer(q, vb, q_num_buffers + buffer);
                call_void_bufop(q, init_buffer, vb);

                /* Allocate video buffer memory for the MMAP type */
                if (memory == VB2_MEMORY_MMAP) {
                        ret = __vb2_buf_mem_alloc(vb);
                        if (ret) {
                                dprintk(q, 1, "failed allocating memory for buffer %d\n",
                                        buffer);
                                vb2_queue_remove_buffer(vb);
                                kfree(vb);
                                break;
                        }
                        __setup_offsets(vb);
                        /*
                         * Call the driver-provided buffer initialization
                         * callback, if given. An error in initialization
                         * results in queue setup failure.
                         */
                        ret = call_vb_qop(vb, buf_init, vb);
                        if (ret) {
                                dprintk(q, 1, "buffer %d %p initialization failed\n",
                                        buffer, vb);
                                __vb2_buf_mem_free(vb);
                                vb2_queue_remove_buffer(vb);
                                kfree(vb);
                                break;
                        }
                }
        }

        dprintk(q, 3, "allocated %d buffers, %d plane(s) each\n",
                buffer, num_planes);

        return buffer;
}

/*
 * __vb2_free_mem() - release all video buffer memory for a given queue
 */
static void __vb2_free_mem(struct vb2_queue *q, unsigned int buffers)
{
        unsigned int buffer;
        struct vb2_buffer *vb;
        unsigned int q_num_buffers = vb2_get_num_buffers(q);

        for (buffer = q_num_buffers - buffers; buffer < q_num_buffers;
             ++buffer) {
                vb = vb2_get_buffer(q, buffer);
                if (!vb)
                        continue;

                /* Free MMAP buffers or release USERPTR buffers */
                if (q->memory == VB2_MEMORY_MMAP)
                        __vb2_buf_mem_free(vb);
                else if (q->memory == VB2_MEMORY_DMABUF)
                        __vb2_buf_dmabuf_put(vb);
                else
                        __vb2_buf_userptr_put(vb);
        }
}

/*
 * __vb2_queue_free() - free buffers at the end of the queue - video memory and
 * related information, if no buffers are left return the queue to an
 * uninitialized state. Might be called even if the queue has already been freed.
 */
static void __vb2_queue_free(struct vb2_queue *q, unsigned int buffers)
{
        unsigned int buffer;
        unsigned int q_num_buffers = vb2_get_num_buffers(q);

        lockdep_assert_held(&q->mmap_lock);

        /* Call driver-provided cleanup function for each buffer, if provided */
        for (buffer = q_num_buffers - buffers; buffer < q_num_buffers;
             ++buffer) {
                struct vb2_buffer *vb = vb2_get_buffer(q, buffer);

                if (vb && vb->planes[0].mem_priv)
                        call_void_vb_qop(vb, buf_cleanup, vb);
        }

        /* Release video buffer memory */
        __vb2_free_mem(q, buffers);

#ifdef CONFIG_VIDEO_ADV_DEBUG
        /*
         * Check that all the calls were balanced during the life-time of this
         * queue. If not then dump the counters to the kernel log.
         */
        if (q_num_buffers) {
                bool unbalanced = q->cnt_start_streaming != q->cnt_stop_streaming ||
                                  q->cnt_prepare_streaming != q->cnt_unprepare_streaming ||
                                  q->cnt_wait_prepare != q->cnt_wait_finish;

                if (unbalanced) {
                        pr_info("unbalanced counters for queue %p:\n", q);
                        if (q->cnt_start_streaming != q->cnt_stop_streaming)
                                pr_info("     setup: %u start_streaming: %u stop_streaming: %u\n",
                                        q->cnt_queue_setup, q->cnt_start_streaming,
                                        q->cnt_stop_streaming);
                        if (q->cnt_prepare_streaming != q->cnt_unprepare_streaming)
                                pr_info("     prepare_streaming: %u unprepare_streaming: %u\n",
                                        q->cnt_prepare_streaming, q->cnt_unprepare_streaming);
                        if (q->cnt_wait_prepare != q->cnt_wait_finish)
                                pr_info("     wait_prepare: %u wait_finish: %u\n",
                                        q->cnt_wait_prepare, q->cnt_wait_finish);
                }
                q->cnt_queue_setup = 0;
                q->cnt_wait_prepare = 0;
                q->cnt_wait_finish = 0;
                q->cnt_prepare_streaming = 0;
                q->cnt_start_streaming = 0;
                q->cnt_stop_streaming = 0;
                q->cnt_unprepare_streaming = 0;
        }
        for (buffer = 0; buffer < vb2_get_num_buffers(q); buffer++) {
                struct vb2_buffer *vb = vb2_get_buffer(q, buffer);
                bool unbalanced;

                if (!vb)
                        continue;

                unbalanced = vb->cnt_mem_alloc != vb->cnt_mem_put ||
                             vb->cnt_mem_prepare != vb->cnt_mem_finish ||
                             vb->cnt_mem_get_userptr != vb->cnt_mem_put_userptr ||
                             vb->cnt_mem_attach_dmabuf != vb->cnt_mem_detach_dmabuf ||
                             vb->cnt_mem_map_dmabuf != vb->cnt_mem_unmap_dmabuf ||
                             vb->cnt_buf_queue != vb->cnt_buf_done ||
                             vb->cnt_buf_prepare != vb->cnt_buf_finish ||
                             vb->cnt_buf_init != vb->cnt_buf_cleanup;

                if (unbalanced) {
                        pr_info("unbalanced counters for queue %p, buffer %d:\n",
                                q, buffer);
                        if (vb->cnt_buf_init != vb->cnt_buf_cleanup)
                                pr_info("     buf_init: %u buf_cleanup: %u\n",
                                        vb->cnt_buf_init, vb->cnt_buf_cleanup);
                        if (vb->cnt_buf_prepare != vb->cnt_buf_finish)
                                pr_info("     buf_prepare: %u buf_finish: %u\n",
                                        vb->cnt_buf_prepare, vb->cnt_buf_finish);
                        if (vb->cnt_buf_queue != vb->cnt_buf_done)
                                pr_info("     buf_out_validate: %u buf_queue: %u buf_done: %u buf_request_complete: %u\n",
                                        vb->cnt_buf_out_validate, vb->cnt_buf_queue,
                                        vb->cnt_buf_done, vb->cnt_buf_request_complete);
                        if (vb->cnt_mem_alloc != vb->cnt_mem_put)
                                pr_info("     alloc: %u put: %u\n",
                                        vb->cnt_mem_alloc, vb->cnt_mem_put);
                        if (vb->cnt_mem_prepare != vb->cnt_mem_finish)
                                pr_info("     prepare: %u finish: %u\n",
                                        vb->cnt_mem_prepare, vb->cnt_mem_finish);
                        if (vb->cnt_mem_get_userptr != vb->cnt_mem_put_userptr)
                                pr_info("     get_userptr: %u put_userptr: %u\n",
                                        vb->cnt_mem_get_userptr, vb->cnt_mem_put_userptr);
                        if (vb->cnt_mem_attach_dmabuf != vb->cnt_mem_detach_dmabuf)
                                pr_info("     attach_dmabuf: %u detach_dmabuf: %u\n",
                                        vb->cnt_mem_attach_dmabuf, vb->cnt_mem_detach_dmabuf);
                        if (vb->cnt_mem_map_dmabuf != vb->cnt_mem_unmap_dmabuf)
                                pr_info("     map_dmabuf: %u unmap_dmabuf: %u\n",
                                        vb->cnt_mem_map_dmabuf, vb->cnt_mem_unmap_dmabuf);
                        pr_info("     get_dmabuf: %u num_users: %u\n",
                                vb->cnt_mem_get_dmabuf,
                                vb->cnt_mem_num_users);
                }
        }
#endif

        /* Free vb2 buffers */
        for (buffer = q_num_buffers - buffers; buffer < q_num_buffers;
             ++buffer) {
                struct vb2_buffer *vb = vb2_get_buffer(q, buffer);

                if (!vb)
                        continue;

                vb2_queue_remove_buffer(vb);
                kfree(vb);
        }

        q->num_buffers -= buffers;
        if (!vb2_get_num_buffers(q)) {
                q->memory = VB2_MEMORY_UNKNOWN;
                INIT_LIST_HEAD(&q->queued_list);
        }
}

bool vb2_buffer_in_use(struct vb2_queue *q, struct vb2_buffer *vb)
{
        unsigned int plane;
        for (plane = 0; plane < vb->num_planes; ++plane) {
                void *mem_priv = vb->planes[plane].mem_priv;
                /*
                 * If num_users() has not been provided, call_memop
                 * will return 0, apparently nobody cares about this
                 * case anyway. If num_users() returns more than 1,
                 * we are not the only user of the plane's memory.
                 */
                if (mem_priv && call_memop(vb, num_users, mem_priv) > 1)
                        return true;
        }
        return false;
}
EXPORT_SYMBOL(vb2_buffer_in_use);

/*
 * __buffers_in_use() - return true if any buffers on the queue are in use and
 * the queue cannot be freed (by the means of REQBUFS(0)) call
 */
static bool __buffers_in_use(struct vb2_queue *q)
{
        unsigned int buffer;
        for (buffer = 0; buffer < vb2_get_num_buffers(q); ++buffer) {
                struct vb2_buffer *vb = vb2_get_buffer(q, buffer);

                if (!vb)
                        continue;

                if (vb2_buffer_in_use(q, vb))
                        return true;
        }
        return false;
}

void vb2_core_querybuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb)
{
        call_void_bufop(q, fill_user_buffer, vb, pb);
}
EXPORT_SYMBOL_GPL(vb2_core_querybuf);

/*
 * __verify_userptr_ops() - verify that all memory operations required for
 * USERPTR queue type have been provided
 */
static int __verify_userptr_ops(struct vb2_queue *q)
{
        if (!(q->io_modes & VB2_USERPTR) || !q->mem_ops->get_userptr ||
            !q->mem_ops->put_userptr)
                return -EINVAL;

        return 0;
}

/*
 * __verify_mmap_ops() - verify that all memory operations required for
 * MMAP queue type have been provided
 */
static int __verify_mmap_ops(struct vb2_queue *q)
{
        if (!(q->io_modes & VB2_MMAP) || !q->mem_ops->alloc ||
            !q->mem_ops->put || !q->mem_ops->mmap)
                return -EINVAL;

        return 0;
}

/*
 * __verify_dmabuf_ops() - verify that all memory operations required for
 * DMABUF queue type have been provided
 */
static int __verify_dmabuf_ops(struct vb2_queue *q)
{
        if (!(q->io_modes & VB2_DMABUF) || !q->mem_ops->attach_dmabuf ||
            !q->mem_ops->detach_dmabuf  || !q->mem_ops->map_dmabuf ||
            !q->mem_ops->unmap_dmabuf)
                return -EINVAL;

        return 0;
}

int vb2_verify_memory_type(struct vb2_queue *q,
                enum vb2_memory memory, unsigned int type)
{
        if (memory != VB2_MEMORY_MMAP && memory != VB2_MEMORY_USERPTR &&
            memory != VB2_MEMORY_DMABUF) {
                dprintk(q, 1, "unsupported memory type\n");
                return -EINVAL;
        }

        if (type != q->type) {
                dprintk(q, 1, "requested type is incorrect\n");
                return -EINVAL;
        }

        /*
         * Make sure all the required memory ops for given memory type
         * are available.
         */
        if (memory == VB2_MEMORY_MMAP && __verify_mmap_ops(q)) {
                dprintk(q, 1, "MMAP for current setup unsupported\n");
                return -EINVAL;
        }

        if (memory == VB2_MEMORY_USERPTR && __verify_userptr_ops(q)) {
                dprintk(q, 1, "USERPTR for current setup unsupported\n");
                return -EINVAL;
        }

        if (memory == VB2_MEMORY_DMABUF && __verify_dmabuf_ops(q)) {
                dprintk(q, 1, "DMABUF for current setup unsupported\n");
                return -EINVAL;
        }

        /*
         * Place the busy tests at the end: -EBUSY can be ignored when
         * create_bufs is called with count == 0, but count == 0 should still
         * do the memory and type validation.
         */
        if (vb2_fileio_is_active(q)) {
                dprintk(q, 1, "file io in progress\n");
                return -EBUSY;
        }
        return 0;
}
EXPORT_SYMBOL(vb2_verify_memory_type);

static void set_queue_coherency(struct vb2_queue *q, bool non_coherent_mem)
{
        q->non_coherent_mem = 0;

        if (!vb2_queue_allows_cache_hints(q))
                return;
        q->non_coherent_mem = non_coherent_mem;
}

static bool verify_coherency_flags(struct vb2_queue *q, bool non_coherent_mem)
{
        if (non_coherent_mem != q->non_coherent_mem) {
                dprintk(q, 1, "memory coherency model mismatch\n");
                return false;
        }
        return true;
}

int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
                     unsigned int flags, unsigned int *count)
{
        unsigned int num_buffers, allocated_buffers, num_planes = 0;
        unsigned int q_num_bufs = vb2_get_num_buffers(q);
        unsigned plane_sizes[VB2_MAX_PLANES] = { };
        bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT;
        unsigned int i;
        int ret = 0;

        if (q->streaming) {
                dprintk(q, 1, "streaming active\n");
                return -EBUSY;
        }

        if (q->waiting_in_dqbuf && *count) {
                dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
                return -EBUSY;
        }

        if (*count == 0 || q_num_bufs != 0 ||
            (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory) ||
            !verify_coherency_flags(q, non_coherent_mem)) {
                /*
                 * We already have buffers allocated, so first check if they
                 * are not in use and can be freed.
                 */
                mutex_lock(&q->mmap_lock);
                if (debug && q->memory == VB2_MEMORY_MMAP &&
                    __buffers_in_use(q))
                        dprintk(q, 1, "memory in use, orphaning buffers\n");

                /*
                 * Call queue_cancel to clean up any buffers in the
                 * QUEUED state which is possible if buffers were prepared or
                 * queued without ever calling STREAMON.
                 */
                __vb2_queue_cancel(q);
                __vb2_queue_free(q, q_num_bufs);
                mutex_unlock(&q->mmap_lock);

                /*
                 * In case of REQBUFS(0) return immediately without calling
                 * driver's queue_setup() callback and allocating resources.
                 */
                if (*count == 0)
                        return 0;
        }

        /*
         * Make sure the requested values and current defaults are sane.
         */
        num_buffers = max_t(unsigned int, *count, q->min_queued_buffers);
        num_buffers = min_t(unsigned int, num_buffers, q->max_num_buffers);
        memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
        /*
         * Set this now to ensure that drivers see the correct q->memory value
         * in the queue_setup op.
         */
        mutex_lock(&q->mmap_lock);
        if (!q->bufs)
                q->bufs = kcalloc(q->max_num_buffers, sizeof(*q->bufs), GFP_KERNEL);
        if (!q->bufs)
                ret = -ENOMEM;
        q->memory = memory;
        mutex_unlock(&q->mmap_lock);
        if (ret)
                return ret;
        set_queue_coherency(q, non_coherent_mem);

        /*
         * Ask the driver how many buffers and planes per buffer it requires.
         * Driver also sets the size and allocator context for each plane.
         */
        ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
                       plane_sizes, q->alloc_devs);
        if (ret)
                goto error;

        /* Check that driver has set sane values */
        if (WARN_ON(!num_planes)) {
                ret = -EINVAL;
                goto error;
        }

        for (i = 0; i < num_planes; i++)
                if (WARN_ON(!plane_sizes[i])) {
                        ret = -EINVAL;
                        goto error;
                }

        /* Finally, allocate buffers and video memory */
        allocated_buffers =
                __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
        if (allocated_buffers == 0) {
                dprintk(q, 1, "memory allocation failed\n");
                ret = -ENOMEM;
                goto error;
        }

        /*
         * There is no point in continuing if we can't allocate the minimum
         * number of buffers needed by this vb2_queue.
         */
        if (allocated_buffers < q->min_queued_buffers)
                ret = -ENOMEM;

        /*
         * Check if driver can handle the allocated number of buffers.
         */
        if (!ret && allocated_buffers < num_buffers) {
                num_buffers = allocated_buffers;
                /*
                 * num_planes is set by the previous queue_setup(), but since it
                 * signals to queue_setup() whether it is called from create_bufs()
                 * vs reqbufs() we zero it here to signal that queue_setup() is
                 * called for the reqbufs() case.
                 */
                num_planes = 0;

                ret = call_qop(q, queue_setup, q, &num_buffers,
                               &num_planes, plane_sizes, q->alloc_devs);

                if (!ret && allocated_buffers < num_buffers)
                        ret = -ENOMEM;

                /*
                 * Either the driver has accepted a smaller number of buffers,
                 * or .queue_setup() returned an error
                 */
        }

        mutex_lock(&q->mmap_lock);
        q->num_buffers = allocated_buffers;

        if (ret < 0) {
                /*
                 * Note: __vb2_queue_free() will subtract 'allocated_buffers'
                 * from already queued buffers and it will reset q->memory to
                 * VB2_MEMORY_UNKNOWN.
                 */
                __vb2_queue_free(q, allocated_buffers);
                mutex_unlock(&q->mmap_lock);
                return ret;
        }
        mutex_unlock(&q->mmap_lock);

        /*
         * Return the number of successfully allocated buffers
         * to the userspace.
         */
        *count = allocated_buffers;
        q->waiting_for_buffers = !q->is_output;

        return 0;

error:
        mutex_lock(&q->mmap_lock);
        q->memory = VB2_MEMORY_UNKNOWN;
        mutex_unlock(&q->mmap_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_reqbufs);

int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
                         unsigned int flags, unsigned int *count,
                         unsigned int requested_planes,
                         const unsigned int requested_sizes[])
{
        unsigned int num_planes = 0, num_buffers, allocated_buffers;
        unsigned plane_sizes[VB2_MAX_PLANES] = { };
        bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT;
        unsigned int q_num_bufs = vb2_get_num_buffers(q);
        bool no_previous_buffers = !q_num_bufs;
        int ret = 0;

        if (q_num_bufs == q->max_num_buffers) {
                dprintk(q, 1, "maximum number of buffers already allocated\n");
                return -ENOBUFS;
        }

        if (no_previous_buffers) {
                if (q->waiting_in_dqbuf && *count) {
                        dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
                        return -EBUSY;
                }
                memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
                /*
                 * Set this now to ensure that drivers see the correct q->memory
                 * value in the queue_setup op.
                 */
                mutex_lock(&q->mmap_lock);
                q->memory = memory;
                if (!q->bufs)
                        q->bufs = kcalloc(q->max_num_buffers, sizeof(*q->bufs), GFP_KERNEL);
                if (!q->bufs)
                        ret = -ENOMEM;
                mutex_unlock(&q->mmap_lock);
                if (ret)
                        return ret;
                q->waiting_for_buffers = !q->is_output;
                set_queue_coherency(q, non_coherent_mem);
        } else {
                if (q->memory != memory) {
                        dprintk(q, 1, "memory model mismatch\n");
                        return -EINVAL;
                }
                if (!verify_coherency_flags(q, non_coherent_mem))
                        return -EINVAL;
        }

        num_buffers = min(*count, q->max_num_buffers - q_num_bufs);

        if (requested_planes && requested_sizes) {
                num_planes = requested_planes;
                memcpy(plane_sizes, requested_sizes, sizeof(plane_sizes));
        }

        /*
         * Ask the driver, whether the requested number of buffers, planes per
         * buffer and their sizes are acceptable
         */
        ret = call_qop(q, queue_setup, q, &num_buffers,
                       &num_planes, plane_sizes, q->alloc_devs);
        if (ret)
                goto error;

        /* Finally, allocate buffers and video memory */
        allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
                                num_planes, plane_sizes);
        if (allocated_buffers == 0) {
                dprintk(q, 1, "memory allocation failed\n");
                ret = -ENOMEM;
                goto error;
        }

        /*
         * Check if driver can handle the so far allocated number of buffers.
         */
        if (allocated_buffers < num_buffers) {
                num_buffers = allocated_buffers;

                /*
                 * num_buffers contains the total number of buffers, that the
                 * queue driver has set up
                 */
                ret = call_qop(q, queue_setup, q, &num_buffers,
                               &num_planes, plane_sizes, q->alloc_devs);

                if (!ret && allocated_buffers < num_buffers)
                        ret = -ENOMEM;

                /*
                 * Either the driver has accepted a smaller number of buffers,
                 * or .queue_setup() returned an error
                 */
        }

        mutex_lock(&q->mmap_lock);
        q->num_buffers += allocated_buffers;

        if (ret < 0) {
                /*
                 * Note: __vb2_queue_free() will subtract 'allocated_buffers'
                 * from already queued buffers and it will reset q->memory to
                 * VB2_MEMORY_UNKNOWN.
                 */
                __vb2_queue_free(q, allocated_buffers);
                mutex_unlock(&q->mmap_lock);
                return -ENOMEM;
        }
        mutex_unlock(&q->mmap_lock);

        /*
         * Return the number of successfully allocated buffers
         * to the userspace.
         */
        *count = allocated_buffers;

        return 0;

error:
        if (no_previous_buffers) {
                mutex_lock(&q->mmap_lock);
                q->memory = VB2_MEMORY_UNKNOWN;
                mutex_unlock(&q->mmap_lock);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_create_bufs);

void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
{
        if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
                return NULL;

        return call_ptr_memop(vaddr, vb, vb->planes[plane_no].mem_priv);

}
EXPORT_SYMBOL_GPL(vb2_plane_vaddr);

void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no)
{
        if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
                return NULL;

        return call_ptr_memop(cookie, vb, vb->planes[plane_no].mem_priv);
}
EXPORT_SYMBOL_GPL(vb2_plane_cookie);

void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
{
        struct vb2_queue *q = vb->vb2_queue;
        unsigned long flags;

        if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE))
                return;

        if (WARN_ON(state != VB2_BUF_STATE_DONE &&
                    state != VB2_BUF_STATE_ERROR &&
                    state != VB2_BUF_STATE_QUEUED))
                state = VB2_BUF_STATE_ERROR;

#ifdef CONFIG_VIDEO_ADV_DEBUG
        /*
         * Although this is not a callback, it still does have to balance
         * with the buf_queue op. So update this counter manually.
         */
        vb->cnt_buf_done++;
#endif
        dprintk(q, 4, "done processing on buffer %d, state: %s\n",
                vb->index, vb2_state_name(state));

        if (state != VB2_BUF_STATE_QUEUED)
                __vb2_buf_mem_finish(vb);

        spin_lock_irqsave(&q->done_lock, flags);
        if (state == VB2_BUF_STATE_QUEUED) {
                vb->state = VB2_BUF_STATE_QUEUED;
        } else {
                /* Add the buffer to the done buffers list */
                list_add_tail(&vb->done_entry, &q->done_list);
                vb->state = state;
        }
        atomic_dec(&q->owned_by_drv_count);

        if (state != VB2_BUF_STATE_QUEUED && vb->req_obj.req) {
                media_request_object_unbind(&vb->req_obj);
                media_request_object_put(&vb->req_obj);
        }

        spin_unlock_irqrestore(&q->done_lock, flags);

        trace_vb2_buf_done(q, vb);

        switch (state) {
        case VB2_BUF_STATE_QUEUED:
                return;
        default:
                /* Inform any processes that may be waiting for buffers */
                wake_up(&q->done_wq);
                break;
        }
}
EXPORT_SYMBOL_GPL(vb2_buffer_done);

void vb2_discard_done(struct vb2_queue *q)
{
        struct vb2_buffer *vb;
        unsigned long flags;

        spin_lock_irqsave(&q->done_lock, flags);
        list_for_each_entry(vb, &q->done_list, done_entry)
                vb->state = VB2_BUF_STATE_ERROR;
        spin_unlock_irqrestore(&q->done_lock, flags);
}
EXPORT_SYMBOL_GPL(vb2_discard_done);

/*
 * __prepare_mmap() - prepare an MMAP buffer
 */
static int __prepare_mmap(struct vb2_buffer *vb)
{
        int ret = 0;

        ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
                         vb, vb->planes);
        return ret ? ret : call_vb_qop(vb, buf_prepare, vb);
}

/*
 * __prepare_userptr() - prepare a USERPTR buffer
 */
static int __prepare_userptr(struct vb2_buffer *vb)
{
        struct vb2_plane planes[VB2_MAX_PLANES];
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
        int ret = 0;
        bool reacquired = vb->planes[0].mem_priv == NULL;

        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
        ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
                         vb, planes);
        if (ret)
                return ret;

        for (plane = 0; plane < vb->num_planes; ++plane) {
                /* Skip the plane if already verified */
                if (vb->planes[plane].m.userptr &&
                        vb->planes[plane].m.userptr == planes[plane].m.userptr
                        && vb->planes[plane].length == planes[plane].length)
                        continue;

                dprintk(q, 3, "userspace address for plane %d changed, reacquiring memory\n",
                        plane);

                /* Check if the provided plane buffer is large enough */
                if (planes[plane].length < vb->planes[plane].min_length) {
                        dprintk(q, 1, "provided buffer size %u is less than setup size %u for plane %d\n",
                                                planes[plane].length,
                                                vb->planes[plane].min_length,
                                                plane);
                        ret = -EINVAL;
                        goto err;
                }

                /* Release previously acquired memory if present */
                if (vb->planes[plane].mem_priv) {
                        if (!reacquired) {
                                reacquired = true;
                                vb->copied_timestamp = 0;
                                call_void_vb_qop(vb, buf_cleanup, vb);
                        }
                        call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv);
                }

                vb->planes[plane].mem_priv = NULL;
                vb->planes[plane].bytesused = 0;
                vb->planes[plane].length = 0;
                vb->planes[plane].m.userptr = 0;
                vb->planes[plane].data_offset = 0;

                /* Acquire each plane's memory */
                mem_priv = call_ptr_memop(get_userptr,
                                          vb,
                                          q->alloc_devs[plane] ? : q->dev,
                                          planes[plane].m.userptr,
                                          planes[plane].length);
                if (IS_ERR(mem_priv)) {
                        dprintk(q, 1, "failed acquiring userspace memory for plane %d\n",
                                plane);
                        ret = PTR_ERR(mem_priv);
                        goto err;
                }
                vb->planes[plane].mem_priv = mem_priv;
        }

        /*
         * Now that everything is in order, copy relevant information
         * provided by userspace.
         */
        for (plane = 0; plane < vb->num_planes; ++plane) {
                vb->planes[plane].bytesused = planes[plane].bytesused;
                vb->planes[plane].length = planes[plane].length;
                vb->planes[plane].m.userptr = planes[plane].m.userptr;
                vb->planes[plane].data_offset = planes[plane].data_offset;
        }

        if (reacquired) {
                /*
                 * One or more planes changed, so we must call buf_init to do
                 * the driver-specific initialization on the newly acquired
                 * buffer, if provided.
                 */
                ret = call_vb_qop(vb, buf_init, vb);
                if (ret) {
                        dprintk(q, 1, "buffer initialization failed\n");
                        goto err;
                }
        }

        ret = call_vb_qop(vb, buf_prepare, vb);
        if (ret) {
                dprintk(q, 1, "buffer preparation failed\n");
                call_void_vb_qop(vb, buf_cleanup, vb);
                goto err;
        }

        return 0;
err:
        /* In case of errors, release planes that were already acquired */
        for (plane = 0; plane < vb->num_planes; ++plane) {
                if (vb->planes[plane].mem_priv)
                        call_void_memop(vb, put_userptr,
                                vb->planes[plane].mem_priv);
                vb->planes[plane].mem_priv = NULL;
                vb->planes[plane].m.userptr = 0;
                vb->planes[plane].length = 0;
        }

        return ret;
}

/*
 * __prepare_dmabuf() - prepare a DMABUF buffer
 */
static int __prepare_dmabuf(struct vb2_buffer *vb)
{
        struct vb2_plane planes[VB2_MAX_PLANES];
        struct vb2_queue *q = vb->vb2_queue;
        void *mem_priv;
        unsigned int plane;
        int ret = 0;
        bool reacquired = vb->planes[0].mem_priv == NULL;

        memset(planes, 0, sizeof(planes[0]) * vb->num_planes);
        /* Copy relevant information provided by the userspace */
        ret = call_bufop(vb->vb2_queue, fill_vb2_buffer,
                         vb, planes);
        if (ret)
                return ret;

        for (plane = 0; plane < vb->num_planes; ++plane) {
                struct dma_buf *dbuf = dma_buf_get(planes[plane].m.fd);

                if (IS_ERR_OR_NULL(dbuf)) {
                        dprintk(q, 1, "invalid dmabuf fd for plane %d\n",
                                plane);
                        ret = -EINVAL;
                        goto err;
                }

                /* use DMABUF size if length is not provided */
                if (planes[plane].length == 0)
                        planes[plane].length = dbuf->size;

                if (planes[plane].length < vb->planes[plane].min_length) {
                        dprintk(q, 1, "invalid dmabuf length %u for plane %d, minimum length %u\n",
                                planes[plane].length, plane,
                                vb->planes[plane].min_length);
                        dma_buf_put(dbuf);
                        ret = -EINVAL;
                        goto err;
                }

                /* Skip the plane if already verified */
                if (dbuf == vb->planes[plane].dbuf &&
                        vb->planes[plane].length == planes[plane].length) {
                        dma_buf_put(dbuf);
                        continue;
                }

                dprintk(q, 3, "buffer for plane %d changed\n", plane);

                if (!reacquired) {
                        reacquired = true;
                        vb->copied_timestamp = 0;
                        call_void_vb_qop(vb, buf_cleanup, vb);
                }

                /* Release previously acquired memory if present */
                __vb2_plane_dmabuf_put(vb, &vb->planes[plane]);
                vb->planes[plane].bytesused = 0;
                vb->planes[plane].length = 0;
                vb->planes[plane].m.fd = 0;
                vb->planes[plane].data_offset = 0;

                /* Acquire each plane's memory */
                mem_priv = call_ptr_memop(attach_dmabuf,
                                          vb,
                                          q->alloc_devs[plane] ? : q->dev,
                                          dbuf,
                                          planes[plane].length);
                if (IS_ERR(mem_priv)) {
                        dprintk(q, 1, "failed to attach dmabuf\n");
                        ret = PTR_ERR(mem_priv);
                        dma_buf_put(dbuf);
                        goto err;
                }

                vb->planes[plane].dbuf = dbuf;
                vb->planes[plane].mem_priv = mem_priv;
        }

        /*
         * This pins the buffer(s) with dma_buf_map_attachment()). It's done
         * here instead just before the DMA, while queueing the buffer(s) so
         * userspace knows sooner rather than later if the dma-buf map fails.
         */
        for (plane = 0; plane < vb->num_planes; ++plane) {
                if (vb->planes[plane].dbuf_mapped)
                        continue;

                ret = call_memop(vb, map_dmabuf, vb->planes[plane].mem_priv);
                if (ret) {
                        dprintk(q, 1, "failed to map dmabuf for plane %d\n",
                                plane);
                        goto err;
                }
                vb->planes[plane].dbuf_mapped = 1;
        }

        /*
         * Now that everything is in order, copy relevant information
         * provided by userspace.
         */
        for (plane = 0; plane < vb->num_planes; ++plane) {
                vb->planes[plane].bytesused = planes[plane].bytesused;
                vb->planes[plane].length = planes[plane].length;
                vb->planes[plane].m.fd = planes[plane].m.fd;
                vb->planes[plane].data_offset = planes[plane].data_offset;
        }

        if (reacquired) {
                /*
                 * Call driver-specific initialization on the newly acquired buffer,
                 * if provided.
                 */
                ret = call_vb_qop(vb, buf_init, vb);
                if (ret) {
                        dprintk(q, 1, "buffer initialization failed\n");
                        goto err;
                }
        }

        ret = call_vb_qop(vb, buf_prepare, vb);
        if (ret) {
                dprintk(q, 1, "buffer preparation failed\n");
                call_void_vb_qop(vb, buf_cleanup, vb);
                goto err;
        }

        return 0;
err:
        /* In case of errors, release planes that were already acquired */
        __vb2_buf_dmabuf_put(vb);

        return ret;
}

/*
 * __enqueue_in_driver() - enqueue a vb2_buffer in driver for processing
 */
static void __enqueue_in_driver(struct vb2_buffer *vb)
{
        struct vb2_queue *q = vb->vb2_queue;

        vb->state = VB2_BUF_STATE_ACTIVE;
        atomic_inc(&q->owned_by_drv_count);

        trace_vb2_buf_queue(q, vb);

        call_void_vb_qop(vb, buf_queue, vb);
}

static int __buf_prepare(struct vb2_buffer *vb)
{
        struct vb2_queue *q = vb->vb2_queue;
        enum vb2_buffer_state orig_state = vb->state;
        int ret;

        if (q->error) {
                dprintk(q, 1, "fatal error occurred on queue\n");
                return -EIO;
        }

        if (vb->prepared)
                return 0;
        WARN_ON(vb->synced);

        if (q->is_output) {
                ret = call_vb_qop(vb, buf_out_validate, vb);
                if (ret) {
                        dprintk(q, 1, "buffer validation failed\n");
                        return ret;
                }
        }

        vb->state = VB2_BUF_STATE_PREPARING;

        switch (q->memory) {
        case VB2_MEMORY_MMAP:
                ret = __prepare_mmap(vb);
                break;
        case VB2_MEMORY_USERPTR:
                ret = __prepare_userptr(vb);
                break;
        case VB2_MEMORY_DMABUF:
                ret = __prepare_dmabuf(vb);
                break;
        default:
                WARN(1, "Invalid queue type\n");
                ret = -EINVAL;
                break;
        }

        if (ret) {
                dprintk(q, 1, "buffer preparation failed: %d\n", ret);
                vb->state = orig_state;
                return ret;
        }

        __vb2_buf_mem_prepare(vb);
        vb->prepared = 1;
        vb->state = orig_state;

        return 0;
}

static int vb2_req_prepare(struct media_request_object *obj)
{
        struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);
        int ret;

        if (WARN_ON(vb->state != VB2_BUF_STATE_IN_REQUEST))
                return -EINVAL;

        mutex_lock(vb->vb2_queue->lock);
        ret = __buf_prepare(vb);
        mutex_unlock(vb->vb2_queue->lock);
        return ret;
}

static void __vb2_dqbuf(struct vb2_buffer *vb);

static void vb2_req_unprepare(struct media_request_object *obj)
{
        struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);

        mutex_lock(vb->vb2_queue->lock);
        __vb2_dqbuf(vb);
        vb->state = VB2_BUF_STATE_IN_REQUEST;
        mutex_unlock(vb->vb2_queue->lock);
        WARN_ON(!vb->req_obj.req);
}

static void vb2_req_queue(struct media_request_object *obj)
{
        struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);
        int err;

        mutex_lock(vb->vb2_queue->lock);
        /*
         * There is no method to propagate an error from vb2_core_qbuf(),
         * so if this returns a non-0 value, then WARN.
         *
         * The only exception is -EIO which is returned if q->error is
         * set. We just ignore that, and expect this will be caught the
         * next time vb2_req_prepare() is called.
         */
        err = vb2_core_qbuf(vb->vb2_queue, vb, NULL, NULL);
        WARN_ON_ONCE(err && err != -EIO);
        mutex_unlock(vb->vb2_queue->lock);
}

static void vb2_req_unbind(struct media_request_object *obj)
{
        struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);

        if (vb->state == VB2_BUF_STATE_IN_REQUEST)
                call_void_bufop(vb->vb2_queue, init_buffer, vb);
}

static void vb2_req_release(struct media_request_object *obj)
{
        struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);

        if (vb->state == VB2_BUF_STATE_IN_REQUEST) {
                vb->state = VB2_BUF_STATE_DEQUEUED;
                if (vb->request)
                        media_request_put(vb->request);
                vb->request = NULL;
        }
}

static const struct media_request_object_ops vb2_core_req_ops = {
        .prepare = vb2_req_prepare,
        .unprepare = vb2_req_unprepare,
        .queue = vb2_req_queue,
        .unbind = vb2_req_unbind,
        .release = vb2_req_release,
};

bool vb2_request_object_is_buffer(struct media_request_object *obj)
{
        return obj->ops == &vb2_core_req_ops;
}
EXPORT_SYMBOL_GPL(vb2_request_object_is_buffer);

unsigned int vb2_request_buffer_cnt(struct media_request *req)
{
        struct media_request_object *obj;
        unsigned long flags;
        unsigned int buffer_cnt = 0;

        spin_lock_irqsave(&req->lock, flags);
        list_for_each_entry(obj, &req->objects, list)
                if (vb2_request_object_is_buffer(obj))
                        buffer_cnt++;
        spin_unlock_irqrestore(&req->lock, flags);

        return buffer_cnt;
}
EXPORT_SYMBOL_GPL(vb2_request_buffer_cnt);

int vb2_core_prepare_buf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb)
{
        int ret;

        if (vb->state != VB2_BUF_STATE_DEQUEUED) {
                dprintk(q, 1, "invalid buffer state %s\n",
                        vb2_state_name(vb->state));
                return -EINVAL;
        }
        if (vb->prepared) {
                dprintk(q, 1, "buffer already prepared\n");
                return -EINVAL;
        }

        ret = __buf_prepare(vb);
        if (ret)
                return ret;

        /* Fill buffer information for the userspace */
        call_void_bufop(q, fill_user_buffer, vb, pb);

        dprintk(q, 2, "prepare of buffer %d succeeded\n", vb->index);

        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_prepare_buf);

/*
 * vb2_start_streaming() - Attempt to start streaming.
 * @q:                videobuf2 queue
 *
 * Attempt to start streaming. When this function is called there must be
 * at least q->min_queued_buffers queued up (i.e. the minimum
 * number of buffers required for the DMA engine to function). If the
 * @start_streaming op fails it is supposed to return all the driver-owned
 * buffers back to vb2 in state QUEUED. Check if that happened and if
 * not warn and reclaim them forcefully.
 */
static int vb2_start_streaming(struct vb2_queue *q)
{
        struct vb2_buffer *vb;
        int ret;

        /*
         * If any buffers were queued before streamon,
         * we can now pass them to driver for processing.
         */
        list_for_each_entry(vb, &q->queued_list, queued_entry)
                __enqueue_in_driver(vb);

        /* Tell the driver to start streaming */
        q->start_streaming_called = 1;
        ret = call_qop(q, start_streaming, q,
                       atomic_read(&q->owned_by_drv_count));
        if (!ret)
                return 0;

        q->start_streaming_called = 0;

        dprintk(q, 1, "driver refused to start streaming\n");
        /*
         * If you see this warning, then the driver isn't cleaning up properly
         * after a failed start_streaming(). See the start_streaming()
         * documentation in videobuf2-core.h for more information how buffers
         * should be returned to vb2 in start_streaming().
         */
        if (WARN_ON(atomic_read(&q->owned_by_drv_count))) {
                unsigned i;

                /*
                 * Forcefully reclaim buffers if the driver did not
                 * correctly return them to vb2.
                 */
                for (i = 0; i < vb2_get_num_buffers(q); ++i) {
                        vb = vb2_get_buffer(q, i);

                        if (!vb)
                                continue;

                        if (vb->state == VB2_BUF_STATE_ACTIVE)
                                vb2_buffer_done(vb, VB2_BUF_STATE_QUEUED);
                }
                /* Must be zero now */
                WARN_ON(atomic_read(&q->owned_by_drv_count));
        }
        /*
         * If done_list is not empty, then start_streaming() didn't call
         * vb2_buffer_done(vb, VB2_BUF_STATE_QUEUED) but STATE_ERROR or
         * STATE_DONE.
         */
        WARN_ON(!list_empty(&q->done_list));
        return ret;
}

int vb2_core_qbuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb,
                  struct media_request *req)
{
        enum vb2_buffer_state orig_state;
        int ret;

        if (q->error) {
                dprintk(q, 1, "fatal error occurred on queue\n");
                return -EIO;
        }

        if (!req && vb->state != VB2_BUF_STATE_IN_REQUEST &&
            q->requires_requests) {
                dprintk(q, 1, "qbuf requires a request\n");
                return -EBADR;
        }

        if ((req && q->uses_qbuf) ||
            (!req && vb->state != VB2_BUF_STATE_IN_REQUEST &&
             q->uses_requests)) {
                dprintk(q, 1, "queue in wrong mode (qbuf vs requests)\n");
                return -EBUSY;
        }

        if (req) {
                int ret;

                q->uses_requests = 1;
                if (vb->state != VB2_BUF_STATE_DEQUEUED) {
                        dprintk(q, 1, "buffer %d not in dequeued state\n",
                                vb->index);
                        return -EINVAL;
                }

                if (q->is_output && !vb->prepared) {
                        ret = call_vb_qop(vb, buf_out_validate, vb);
                        if (ret) {
                                dprintk(q, 1, "buffer validation failed\n");
                                return ret;
                        }
                }

                media_request_object_init(&vb->req_obj);

                /* Make sure the request is in a safe state for updating. */
                ret = media_request_lock_for_update(req);
                if (ret)
                        return ret;
                ret = media_request_object_bind(req, &vb2_core_req_ops,
                                                q, true, &vb->req_obj);
                media_request_unlock_for_update(req);
                if (ret)
                        return ret;

                vb->state = VB2_BUF_STATE_IN_REQUEST;

                /*
                 * Increment the refcount and store the request.
                 * The request refcount is decremented again when the
                 * buffer is dequeued. This is to prevent vb2_buffer_done()
                 * from freeing the request from interrupt context, which can
                 * happen if the application closed the request fd after
                 * queueing the request.
                 */
                media_request_get(req);
                vb->request = req;

                /* Fill buffer information for the userspace */
                if (pb) {
                        call_void_bufop(q, copy_timestamp, vb, pb);
                        call_void_bufop(q, fill_user_buffer, vb, pb);
                }

                dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);
                return 0;
        }

        if (vb->state != VB2_BUF_STATE_IN_REQUEST)
                q->uses_qbuf = 1;

        switch (vb->state) {
        case VB2_BUF_STATE_DEQUEUED:
        case VB2_BUF_STATE_IN_REQUEST:
                if (!vb->prepared) {
                        ret = __buf_prepare(vb);
                        if (ret)
                                return ret;
                }
                break;
        case VB2_BUF_STATE_PREPARING:
                dprintk(q, 1, "buffer still being prepared\n");
                return -EINVAL;
        default:
                dprintk(q, 1, "invalid buffer state %s\n",
                        vb2_state_name(vb->state));
                return -EINVAL;
        }

        /*
         * Add to the queued buffers list, a buffer will stay on it until
         * dequeued in dqbuf.
         */
        orig_state = vb->state;
        list_add_tail(&vb->queued_entry, &q->queued_list);
        q->queued_count++;
        q->waiting_for_buffers = false;
        vb->state = VB2_BUF_STATE_QUEUED;

        if (pb)
                call_void_bufop(q, copy_timestamp, vb, pb);

        trace_vb2_qbuf(q, vb);

        /*
         * If already streaming, give the buffer to driver for processing.
         * If not, the buffer will be given to driver on next streamon.
         */
        if (q->start_streaming_called)
                __enqueue_in_driver(vb);

        /* Fill buffer information for the userspace */
        if (pb)
                call_void_bufop(q, fill_user_buffer, vb, pb);

        /*
         * If streamon has been called, and we haven't yet called
         * start_streaming() since not enough buffers were queued, and
         * we now have reached the minimum number of queued buffers,
         * then we can finally call start_streaming().
         */
        if (q->streaming && !q->start_streaming_called &&
            q->queued_count >= q->min_queued_buffers) {
                ret = vb2_start_streaming(q);
                if (ret) {
                        /*
                         * Since vb2_core_qbuf will return with an error,
                         * we should return it to state DEQUEUED since
                         * the error indicates that the buffer wasn't queued.
                         */
                        list_del(&vb->queued_entry);
                        q->queued_count--;
                        vb->state = orig_state;
                        return ret;
                }
        }

        dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);
        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_qbuf);

/*
 * __vb2_wait_for_done_vb() - wait for a buffer to become available
 * for dequeuing
 *
 * Will sleep if required for nonblocking == false.
 */
static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking)
{
        /*
         * All operations on vb_done_list are performed under done_lock
         * spinlock protection. However, buffers may be removed from
         * it and returned to userspace only while holding both driver's
         * lock and the done_lock spinlock. Thus we can be sure that as
         * long as we hold the driver's lock, the list will remain not
         * empty if list_empty() check succeeds.
         */

        for (;;) {
                int ret;

                if (q->waiting_in_dqbuf) {
                        dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
                        return -EBUSY;
                }

                if (!q->streaming) {
                        dprintk(q, 1, "streaming off, will not wait for buffers\n");
                        return -EINVAL;
                }

                if (q->error) {
                        dprintk(q, 1, "Queue in error state, will not wait for buffers\n");
                        return -EIO;
                }

                if (q->last_buffer_dequeued) {
                        dprintk(q, 3, "last buffer dequeued already, will not wait for buffers\n");
                        return -EPIPE;
                }

                if (!list_empty(&q->done_list)) {
                        /*
                         * Found a buffer that we were waiting for.
                         */
                        break;
                }

                if (nonblocking) {
                        dprintk(q, 3, "nonblocking and no buffers to dequeue, will not wait\n");
                        return -EAGAIN;
                }

                q->waiting_in_dqbuf = 1;
                /*
                 * We are streaming and blocking, wait for another buffer to
                 * become ready or for streamoff. Driver's lock is released to
                 * allow streamoff or qbuf to be called while waiting.
                 */
                call_void_qop(q, wait_prepare, q);

                /*
                 * All locks have been released, it is safe to sleep now.
                 */
                dprintk(q, 3, "will sleep waiting for buffers\n");
                ret = wait_event_interruptible(q->done_wq,
                                !list_empty(&q->done_list) || !q->streaming ||
                                q->error);

                /*
                 * We need to reevaluate both conditions again after reacquiring
                 * the locks or return an error if one occurred.
                 */
                call_void_qop(q, wait_finish, q);
                q->waiting_in_dqbuf = 0;
                if (ret) {
                        dprintk(q, 1, "sleep was interrupted\n");
                        return ret;
                }
        }
        return 0;
}

/*
 * __vb2_get_done_vb() - get a buffer ready for dequeuing
 *
 * Will sleep if required for nonblocking == false.
 */
static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
                             void *pb, int nonblocking)
{
        unsigned long flags;
        int ret = 0;

        /*
         * Wait for at least one buffer to become available on the done_list.
         */
        ret = __vb2_wait_for_done_vb(q, nonblocking);
        if (ret)
                return ret;

        /*
         * Driver's lock has been held since we last verified that done_list
         * is not empty, so no need for another list_empty(done_list) check.
         */
        spin_lock_irqsave(&q->done_lock, flags);
        *vb = list_first_entry(&q->done_list, struct vb2_buffer, done_entry);
        /*
         * Only remove the buffer from done_list if all planes can be
         * handled. Some cases such as V4L2 file I/O and DVB have pb
         * == NULL; skip the check then as there's nothing to verify.
         */
        if (pb)
                ret = call_bufop(q, verify_planes_array, *vb, pb);
        if (!ret)
                list_del(&(*vb)->done_entry);
        spin_unlock_irqrestore(&q->done_lock, flags);

        return ret;
}

int vb2_wait_for_all_buffers(struct vb2_queue *q)
{
        if (!q->streaming) {
                dprintk(q, 1, "streaming off, will not wait for buffers\n");
                return -EINVAL;
        }

        if (q->start_streaming_called)
                wait_event(q->done_wq, !atomic_read(&q->owned_by_drv_count));
        return 0;
}
EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);

/*
 * __vb2_dqbuf() - bring back the buffer to the DEQUEUED state
 */
static void __vb2_dqbuf(struct vb2_buffer *vb)
{
        struct vb2_queue *q = vb->vb2_queue;

        /* nothing to do if the buffer is already dequeued */
        if (vb->state == VB2_BUF_STATE_DEQUEUED)
                return;

        vb->state = VB2_BUF_STATE_DEQUEUED;

        call_void_bufop(q, init_buffer, vb);
}

int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
                   bool nonblocking)
{
        struct vb2_buffer *vb = NULL;
        int ret;

        ret = __vb2_get_done_vb(q, &vb, pb, nonblocking);
        if (ret < 0)
                return ret;

        switch (vb->state) {
        case VB2_BUF_STATE_DONE:
                dprintk(q, 3, "returning done buffer\n");
                break;
        case VB2_BUF_STATE_ERROR:
                dprintk(q, 3, "returning done buffer with errors\n");
                break;
        default:
                dprintk(q, 1, "invalid buffer state %s\n",
                        vb2_state_name(vb->state));
                return -EINVAL;
        }

        call_void_vb_qop(vb, buf_finish, vb);
        vb->prepared = 0;

        if (pindex)
                *pindex = vb->index;

        /* Fill buffer information for the userspace */
        if (pb)
                call_void_bufop(q, fill_user_buffer, vb, pb);

        /* Remove from vb2 queue */
        list_del(&vb->queued_entry);
        q->queued_count--;

        trace_vb2_dqbuf(q, vb);

        /* go back to dequeued state */
        __vb2_dqbuf(vb);

        if (WARN_ON(vb->req_obj.req)) {
                media_request_object_unbind(&vb->req_obj);
                media_request_object_put(&vb->req_obj);
        }
        if (vb->request)
                media_request_put(vb->request);
        vb->request = NULL;

        dprintk(q, 2, "dqbuf of buffer %d, state: %s\n",
                vb->index, vb2_state_name(vb->state));

        return 0;

}
EXPORT_SYMBOL_GPL(vb2_core_dqbuf);

/*
 * __vb2_queue_cancel() - cancel and stop (pause) streaming
 *
 * Removes all queued buffers from driver's queue and all buffers queued by
 * userspace from vb2's queue. Returns to state after reqbufs.
 */
static void __vb2_queue_cancel(struct vb2_queue *q)
{
        unsigned int i;

        /*
         * Tell driver to stop all transactions and release all queued
         * buffers.
         */
        if (q->start_streaming_called)
                call_void_qop(q, stop_streaming, q);

        if (q->streaming)
                call_void_qop(q, unprepare_streaming, q);

        /*
         * If you see this warning, then the driver isn't cleaning up properly
         * in stop_streaming(). See the stop_streaming() documentation in
         * videobuf2-core.h for more information how buffers should be returned
         * to vb2 in stop_streaming().
         */
        if (WARN_ON(atomic_read(&q->owned_by_drv_count))) {
                for (i = 0; i < vb2_get_num_buffers(q); i++) {
                        struct vb2_buffer *vb = vb2_get_buffer(q, i);

                        if (!vb)
                                continue;

                        if (vb->state == VB2_BUF_STATE_ACTIVE) {
                                pr_warn("driver bug: stop_streaming operation is leaving buffer %u in active state\n",
                                        vb->index);
                                vb2_buffer_done(vb, VB2_BUF_STATE_ERROR);
                        }
                }
                /* Must be zero now */
                WARN_ON(atomic_read(&q->owned_by_drv_count));
        }

        q->streaming = 0;
        q->start_streaming_called = 0;
        q->queued_count = 0;
        q->error = 0;
        q->uses_requests = 0;
        q->uses_qbuf = 0;

        /*
         * Remove all buffers from vb2's list...
         */
        INIT_LIST_HEAD(&q->queued_list);
        /*
         * ...and done list; userspace will not receive any buffers it
         * has not already dequeued before initiating cancel.
         */
        INIT_LIST_HEAD(&q->done_list);
        atomic_set(&q->owned_by_drv_count, 0);
        wake_up_all(&q->done_wq);

        /*
         * Reinitialize all buffers for next use.
         * Make sure to call buf_finish for any queued buffers. Normally
         * that's done in dqbuf, but that's not going to happen when we
         * cancel the whole queue. Note: this code belongs here, not in
         * __vb2_dqbuf() since in vb2_core_dqbuf() there is a critical
         * call to __fill_user_buffer() after buf_finish(). That order can't
         * be changed, so we can't move the buf_finish() to __vb2_dqbuf().
         */
        for (i = 0; i < vb2_get_num_buffers(q); i++) {
                struct vb2_buffer *vb;
                struct media_request *req;

                vb = vb2_get_buffer(q, i);
                if (!vb)
                        continue;

                req = vb->req_obj.req;
                /*
                 * If a request is associated with this buffer, then
                 * call buf_request_cancel() to give the driver to complete()
                 * related request objects. Otherwise those objects would
                 * never complete.
                 */
                if (req) {
                        enum media_request_state state;
                        unsigned long flags;

                        spin_lock_irqsave(&req->lock, flags);
                        state = req->state;
                        spin_unlock_irqrestore(&req->lock, flags);

                        if (state == MEDIA_REQUEST_STATE_QUEUED)
                                call_void_vb_qop(vb, buf_request_complete, vb);
                }

                __vb2_buf_mem_finish(vb);

                if (vb->prepared) {
                        call_void_vb_qop(vb, buf_finish, vb);
                        vb->prepared = 0;
                }
                __vb2_dqbuf(vb);

                if (vb->req_obj.req) {
                        media_request_object_unbind(&vb->req_obj);
                        media_request_object_put(&vb->req_obj);
                }
                if (vb->request)
                        media_request_put(vb->request);
                vb->request = NULL;
                vb->copied_timestamp = 0;
        }
}

int vb2_core_streamon(struct vb2_queue *q, unsigned int type)
{
        unsigned int q_num_bufs = vb2_get_num_buffers(q);
        int ret;

        if (type != q->type) {
                dprintk(q, 1, "invalid stream type\n");
                return -EINVAL;
        }

        if (q->streaming) {
                dprintk(q, 3, "already streaming\n");
                return 0;
        }

        if (!q_num_bufs) {
                dprintk(q, 1, "no buffers have been allocated\n");
                return -EINVAL;
        }

        if (q_num_bufs < q->min_queued_buffers) {
                dprintk(q, 1, "need at least %u queued buffers\n",
                        q->min_queued_buffers);
                return -EINVAL;
        }

        ret = call_qop(q, prepare_streaming, q);
        if (ret)
                return ret;

        /*
         * Tell driver to start streaming provided sufficient buffers
         * are available.
         */
        if (q->queued_count >= q->min_queued_buffers) {
                ret = vb2_start_streaming(q);
                if (ret)
                        goto unprepare;
        }

        q->streaming = 1;

        dprintk(q, 3, "successful\n");
        return 0;

unprepare:
        call_void_qop(q, unprepare_streaming, q);
        return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_streamon);

void vb2_queue_error(struct vb2_queue *q)
{
        q->error = 1;

        wake_up_all(&q->done_wq);
}
EXPORT_SYMBOL_GPL(vb2_queue_error);

int vb2_core_streamoff(struct vb2_queue *q, unsigned int type)
{
        if (type != q->type) {
                dprintk(q, 1, "invalid stream type\n");
                return -EINVAL;
        }

        /*
         * Cancel will pause streaming and remove all buffers from the driver
         * and vb2, effectively returning control over them to userspace.
         *
         * Note that we do this even if q->streaming == 0: if you prepare or
         * queue buffers, and then call streamoff without ever having called
         * streamon, you would still expect those buffers to be returned to
         * their normal dequeued state.
         */
        __vb2_queue_cancel(q);
        q->waiting_for_buffers = !q->is_output;
        q->last_buffer_dequeued = false;

        dprintk(q, 3, "successful\n");
        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_streamoff);

/*
 * __find_plane_by_offset() - find plane associated with the given offset
 */
static int __find_plane_by_offset(struct vb2_queue *q, unsigned long offset,
                        struct vb2_buffer **vb, unsigned int *plane)
{
        unsigned int buffer;

        /*
         * Sanity checks to ensure the lock is held, MEMORY_MMAP is
         * used and fileio isn't active.
         */
        lockdep_assert_held(&q->mmap_lock);

        if (q->memory != VB2_MEMORY_MMAP) {
                dprintk(q, 1, "queue is not currently set up for mmap\n");
                return -EINVAL;
        }

        if (vb2_fileio_is_active(q)) {
                dprintk(q, 1, "file io in progress\n");
                return -EBUSY;
        }

        /* Get buffer and plane from the offset */
        buffer = (offset >> PLANE_INDEX_SHIFT) & BUFFER_INDEX_MASK;
        *plane = (offset >> PAGE_SHIFT) & PLANE_INDEX_MASK;

        *vb = vb2_get_buffer(q, buffer);
        if (!*vb)
                return -EINVAL;
        if (*plane >= (*vb)->num_planes)
                return -EINVAL;

        return 0;
}

int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type,
                    struct vb2_buffer *vb, unsigned int plane, unsigned int flags)
{
        struct vb2_plane *vb_plane;
        int ret;
        struct dma_buf *dbuf;

        if (q->memory != VB2_MEMORY_MMAP) {
                dprintk(q, 1, "queue is not currently set up for mmap\n");
                return -EINVAL;
        }

        if (!q->mem_ops->get_dmabuf) {
                dprintk(q, 1, "queue does not support DMA buffer exporting\n");
                return -EINVAL;
        }

        if (flags & ~(O_CLOEXEC | O_ACCMODE)) {
                dprintk(q, 1, "queue does support only O_CLOEXEC and access mode flags\n");
                return -EINVAL;
        }

        if (type != q->type) {
                dprintk(q, 1, "invalid buffer type\n");
                return -EINVAL;
        }

        if (plane >= vb->num_planes) {
                dprintk(q, 1, "buffer plane out of range\n");
                return -EINVAL;
        }

        if (vb2_fileio_is_active(q)) {
                dprintk(q, 1, "expbuf: file io in progress\n");
                return -EBUSY;
        }

        vb_plane = &vb->planes[plane];

        dbuf = call_ptr_memop(get_dmabuf,
                              vb,
                              vb_plane->mem_priv,
                              flags & O_ACCMODE);
        if (IS_ERR_OR_NULL(dbuf)) {
                dprintk(q, 1, "failed to export buffer %d, plane %d\n",
                        vb->index, plane);
                return -EINVAL;
        }

        ret = dma_buf_fd(dbuf, flags & ~O_ACCMODE);
        if (ret < 0) {
                dprintk(q, 3, "buffer %d, plane %d failed to export (%d)\n",
                        vb->index, plane, ret);
                dma_buf_put(dbuf);
                return ret;
        }

        dprintk(q, 3, "buffer %d, plane %d exported as %d descriptor\n",
                vb->index, plane, ret);
        *fd = ret;

        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_expbuf);

int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
{
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
        struct vb2_buffer *vb;
        unsigned int plane = 0;
        int ret;
        unsigned long length;

        /*
         * Check memory area access mode.
         */
        if (!(vma->vm_flags & VM_SHARED)) {
                dprintk(q, 1, "invalid vma flags, VM_SHARED needed\n");
                return -EINVAL;
        }
        if (q->is_output) {
                if (!(vma->vm_flags & VM_WRITE)) {
                        dprintk(q, 1, "invalid vma flags, VM_WRITE needed\n");
                        return -EINVAL;
                }
        } else {
                if (!(vma->vm_flags & VM_READ)) {
                        dprintk(q, 1, "invalid vma flags, VM_READ needed\n");
                        return -EINVAL;
                }
        }

        mutex_lock(&q->mmap_lock);

        /*
         * Find the plane corresponding to the offset passed by userspace. This
         * will return an error if not MEMORY_MMAP or file I/O is in progress.
         */
        ret = __find_plane_by_offset(q, offset, &vb, &plane);
        if (ret)
                goto unlock;

        /*
         * MMAP requires page_aligned buffers.
         * The buffer length was page_aligned at __vb2_buf_mem_alloc(),
         * so, we need to do the same here.
         */
        length = PAGE_ALIGN(vb->planes[plane].length);
        if (length < (vma->vm_end - vma->vm_start)) {
                dprintk(q, 1,
                        "MMAP invalid, as it would overflow buffer length\n");
                ret = -EINVAL;
                goto unlock;
        }

        /*
         * vm_pgoff is treated in V4L2 API as a 'cookie' to select a buffer,
         * not as a in-buffer offset. We always want to mmap a whole buffer
         * from its beginning.
         */
        vma->vm_pgoff = 0;

        ret = call_memop(vb, mmap, vb->planes[plane].mem_priv, vma);

unlock:
        mutex_unlock(&q->mmap_lock);
        if (ret)
                return ret;

        dprintk(q, 3, "buffer %u, plane %d successfully mapped\n", vb->index, plane);
        return 0;
}
EXPORT_SYMBOL_GPL(vb2_mmap);

#ifndef CONFIG_MMU
unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
                                    unsigned long addr,
                                    unsigned long len,
                                    unsigned long pgoff,
                                    unsigned long flags)
{
        unsigned long offset = pgoff << PAGE_SHIFT;
        struct vb2_buffer *vb;
        unsigned int plane;
        void *vaddr;
        int ret;

        mutex_lock(&q->mmap_lock);

        /*
         * Find the plane corresponding to the offset passed by userspace. This
         * will return an error if not MEMORY_MMAP or file I/O is in progress.
         */
        ret = __find_plane_by_offset(q, offset, &vb, &plane);
        if (ret)
                goto unlock;

        vaddr = vb2_plane_vaddr(vb, plane);
        mutex_unlock(&q->mmap_lock);
        return vaddr ? (unsigned long)vaddr : -EINVAL;

unlock:
        mutex_unlock(&q->mmap_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
#endif

int vb2_core_queue_init(struct vb2_queue *q)
{
        /*
         * Sanity check
         */
        /*
         * For drivers who don't support max_num_buffers ensure
         * a backward compatibility.
         */
        if (!q->max_num_buffers)
                q->max_num_buffers = VB2_MAX_FRAME;

        /* The maximum is limited by offset cookie encoding pattern */
        q->max_num_buffers = min_t(unsigned int, q->max_num_buffers, MAX_BUFFER_INDEX);

        if (WARN_ON(!q)                          ||
            WARN_ON(!q->ops)                  ||
            WARN_ON(!q->mem_ops)          ||
            WARN_ON(!q->type)                  ||
            WARN_ON(!q->io_modes)          ||
            WARN_ON(!q->ops->queue_setup) ||
            WARN_ON(!q->ops->buf_queue))
                return -EINVAL;

        if (WARN_ON(q->max_num_buffers > MAX_BUFFER_INDEX) ||
            WARN_ON(q->min_queued_buffers > q->max_num_buffers))
                return -EINVAL;

        if (WARN_ON(q->requires_requests && !q->supports_requests))
                return -EINVAL;

        /*
         * This combination is not allowed since a non-zero value of
         * q->min_queued_buffers can cause vb2_core_qbuf() to fail if
         * it has to call start_streaming(), and the Request API expects
         * that queueing a request (and thus queueing a buffer contained
         * in that request) will always succeed. There is no method of
         * propagating an error back to userspace.
         */
        if (WARN_ON(q->supports_requests && q->min_queued_buffers))
                return -EINVAL;

        INIT_LIST_HEAD(&q->queued_list);
        INIT_LIST_HEAD(&q->done_list);
        spin_lock_init(&q->done_lock);
        mutex_init(&q->mmap_lock);
        init_waitqueue_head(&q->done_wq);

        q->memory = VB2_MEMORY_UNKNOWN;

        if (q->buf_struct_size == 0)
                q->buf_struct_size = sizeof(struct vb2_buffer);

        if (q->bidirectional)
                q->dma_dir = DMA_BIDIRECTIONAL;
        else
                q->dma_dir = q->is_output ? DMA_TO_DEVICE : DMA_FROM_DEVICE;

        if (q->name[0] == '\0')
                snprintf(q->name, sizeof(q->name), "%s-%p",
                         q->is_output ? "out" : "cap", q);

        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_queue_init);

static int __vb2_init_fileio(struct vb2_queue *q, int read);
static int __vb2_cleanup_fileio(struct vb2_queue *q);
void vb2_core_queue_release(struct vb2_queue *q)
{
        __vb2_cleanup_fileio(q);
        __vb2_queue_cancel(q);
        mutex_lock(&q->mmap_lock);
        __vb2_queue_free(q, vb2_get_num_buffers(q));
        kfree(q->bufs);
        q->bufs = NULL;
        mutex_unlock(&q->mmap_lock);
}
EXPORT_SYMBOL_GPL(vb2_core_queue_release);

__poll_t vb2_core_poll(struct vb2_queue *q, struct file *file,
                poll_table *wait)
{
        __poll_t req_events = poll_requested_events(wait);
        struct vb2_buffer *vb = NULL;
        unsigned long flags;

        /*
         * poll_wait() MUST be called on the first invocation on all the
         * potential queues of interest, even if we are not interested in their
         * events during this first call. Failure to do so will result in
         * queue's events to be ignored because the poll_table won't be capable
         * of adding new wait queues thereafter.
         */
        poll_wait(file, &q->done_wq, wait);

        if (!q->is_output && !(req_events & (EPOLLIN | EPOLLRDNORM)))
                return 0;
        if (q->is_output && !(req_events & (EPOLLOUT | EPOLLWRNORM)))
                return 0;

        /*
         * Start file I/O emulator only if streaming API has not been used yet.
         */
        if (vb2_get_num_buffers(q) == 0 && !vb2_fileio_is_active(q)) {
                if (!q->is_output && (q->io_modes & VB2_READ) &&
                                (req_events & (EPOLLIN | EPOLLRDNORM))) {
                        if (__vb2_init_fileio(q, 1))
                                return EPOLLERR;
                }
                if (q->is_output && (q->io_modes & VB2_WRITE) &&
                                (req_events & (EPOLLOUT | EPOLLWRNORM))) {
                        if (__vb2_init_fileio(q, 0))
                                return EPOLLERR;
                        /*
                         * Write to OUTPUT queue can be done immediately.
                         */
                        return EPOLLOUT | EPOLLWRNORM;
                }
        }

        /*
         * There is nothing to wait for if the queue isn't streaming, or if the
         * error flag is set.
         */
        if (!vb2_is_streaming(q) || q->error)
                return EPOLLERR;

        /*
         * If this quirk is set and QBUF hasn't been called yet then
         * return EPOLLERR as well. This only affects capture queues, output
         * queues will always initialize waiting_for_buffers to false.
         * This quirk is set by V4L2 for backwards compatibility reasons.
         */
        if (q->quirk_poll_must_check_waiting_for_buffers &&
            q->waiting_for_buffers && (req_events & (EPOLLIN | EPOLLRDNORM)))
                return EPOLLERR;

        /*
         * For output streams you can call write() as long as there are fewer
         * buffers queued than there are buffers available.
         */
        if (q->is_output && q->fileio && q->queued_count < vb2_get_num_buffers(q))
                return EPOLLOUT | EPOLLWRNORM;

        if (list_empty(&q->done_list)) {
                /*
                 * If the last buffer was dequeued from a capture queue,
                 * return immediately. DQBUF will return -EPIPE.
                 */
                if (q->last_buffer_dequeued)
                        return EPOLLIN | EPOLLRDNORM;
        }

        /*
         * Take first buffer available for dequeuing.
         */
        spin_lock_irqsave(&q->done_lock, flags);
        if (!list_empty(&q->done_list))
                vb = list_first_entry(&q->done_list, struct vb2_buffer,
                                        done_entry);
        spin_unlock_irqrestore(&q->done_lock, flags);

        if (vb && (vb->state == VB2_BUF_STATE_DONE
                        || vb->state == VB2_BUF_STATE_ERROR)) {
                return (q->is_output) ?
                                EPOLLOUT | EPOLLWRNORM :
                                EPOLLIN | EPOLLRDNORM;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(vb2_core_poll);

/*
 * struct vb2_fileio_buf - buffer context used by file io emulator
 *
 * vb2 provides a compatibility layer and emulator of file io (read and
 * write) calls on top of streaming API. This structure is used for
 * tracking context related to the buffers.
 */
struct vb2_fileio_buf {
        void *vaddr;
        unsigned int size;
        unsigned int pos;
        unsigned int queued:1;
};

/*
 * struct vb2_fileio_data - queue context used by file io emulator
 *
 * @cur_index:        the index of the buffer currently being read from or
 *                written to. If equal to number of buffers in the vb2_queue
 *                then a new buffer must be dequeued.
 * @initial_index: in the read() case all buffers are queued up immediately
 *                in __vb2_init_fileio() and __vb2_perform_fileio() just cycles
 *                buffers. However, in the write() case no buffers are initially
 *                queued, instead whenever a buffer is full it is queued up by
 *                __vb2_perform_fileio(). Only once all available buffers have
 *                been queued up will __vb2_perform_fileio() start to dequeue
 *                buffers. This means that initially __vb2_perform_fileio()
 *                needs to know what buffer index to use when it is queuing up
 *                the buffers for the first time. That initial index is stored
 *                in this field. Once it is equal to number of buffers in the
 *                vb2_queue all available buffers have been queued and
 *                __vb2_perform_fileio() should start the normal dequeue/queue cycle.
 *
 * vb2 provides a compatibility layer and emulator of file io (read and
 * write) calls on top of streaming API. For proper operation it required
 * this structure to save the driver state between each call of the read
 * or write function.
 */
struct vb2_fileio_data {
        unsigned int count;
        unsigned int type;
        unsigned int memory;
        struct vb2_fileio_buf bufs[VB2_MAX_FRAME];
        unsigned int cur_index;
        unsigned int initial_index;
        unsigned int q_count;
        unsigned int dq_count;
        unsigned read_once:1;
        unsigned write_immediately:1;
};

/*
 * __vb2_init_fileio() - initialize file io emulator
 * @q:                videobuf2 queue
 * @read:        mode selector (1 means read, 0 means write)
 */
static int __vb2_init_fileio(struct vb2_queue *q, int read)
{
        struct vb2_fileio_data *fileio;
        struct vb2_buffer *vb;
        int i, ret;
        unsigned int count = 0;

        /*
         * Sanity check
         */
        if (WARN_ON((read && !(q->io_modes & VB2_READ)) ||
                    (!read && !(q->io_modes & VB2_WRITE))))
                return -EINVAL;

        /*
         * Check if device supports mapping buffers to kernel virtual space.
         */
        if (!q->mem_ops->vaddr)
                return -EBUSY;

        /*
         * Check if streaming api has not been already activated.
         */
        if (q->streaming || vb2_get_num_buffers(q) > 0)
                return -EBUSY;

        /*
         * Start with q->min_queued_buffers + 1, driver can increase it in
         * queue_setup()
         *
         * 'min_queued_buffers' buffers need to be queued up before you
         * can start streaming, plus 1 for userspace (or in this case,
         * kernelspace) processing.
         */
        count = max(2, q->min_queued_buffers + 1);

        dprintk(q, 3, "setting up file io: mode %s, count %d, read_once %d, write_immediately %d\n",
                (read) ? "read" : "write", count, q->fileio_read_once,
                q->fileio_write_immediately);

        fileio = kzalloc(sizeof(*fileio), GFP_KERNEL);
        if (fileio == NULL)
                return -ENOMEM;

        fileio->read_once = q->fileio_read_once;
        fileio->write_immediately = q->fileio_write_immediately;

        /*
         * Request buffers and use MMAP type to force driver
         * to allocate buffers by itself.
         */
        fileio->count = count;
        fileio->memory = VB2_MEMORY_MMAP;
        fileio->type = q->type;
        q->fileio = fileio;
        ret = vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);
        if (ret)
                goto err_kfree;

        /*
         * Userspace can never add or delete buffers later, so there
         * will never be holes. It is safe to assume that vb2_get_buffer(q, 0)
         * will always return a valid vb pointer
         */
        vb = vb2_get_buffer(q, 0);

        /*
         * Check if plane_count is correct
         * (multiplane buffers are not supported).
         */
        if (vb->num_planes != 1) {
                ret = -EBUSY;
                goto err_reqbufs;
        }

        /*
         * Get kernel address of each buffer.
         */
        for (i = 0; i < vb2_get_num_buffers(q); i++) {
                /* vb can never be NULL when using fileio. */
                vb = vb2_get_buffer(q, i);

                fileio->bufs[i].vaddr = vb2_plane_vaddr(vb, 0);
                if (fileio->bufs[i].vaddr == NULL) {
                        ret = -EINVAL;
                        goto err_reqbufs;
                }
                fileio->bufs[i].size = vb2_plane_size(vb, 0);
        }

        /*
         * Read mode requires pre queuing of all buffers.
         */
        if (read) {
                /*
                 * Queue all buffers.
                 */
                for (i = 0; i < vb2_get_num_buffers(q); i++) {
                        struct vb2_buffer *vb2 = vb2_get_buffer(q, i);

                        if (!vb2)
                                continue;

                        ret = vb2_core_qbuf(q, vb2, NULL, NULL);
                        if (ret)
                                goto err_reqbufs;
                        fileio->bufs[i].queued = 1;
                }
                /*
                 * All buffers have been queued, so mark that by setting
                 * initial_index to the number of buffers in the vb2_queue
                 */
                fileio->initial_index = vb2_get_num_buffers(q);
                fileio->cur_index = fileio->initial_index;
        }

        /*
         * Start streaming.
         */
        ret = vb2_core_streamon(q, q->type);
        if (ret)
                goto err_reqbufs;

        return ret;

err_reqbufs:
        fileio->count = 0;
        vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);

err_kfree:
        q->fileio = NULL;
        kfree(fileio);
        return ret;
}

/*
 * __vb2_cleanup_fileio() - free resourced used by file io emulator
 * @q:                videobuf2 queue
 */
static int __vb2_cleanup_fileio(struct vb2_queue *q)
{
        struct vb2_fileio_data *fileio = q->fileio;

        if (fileio) {
                vb2_core_streamoff(q, q->type);
                q->fileio = NULL;
                fileio->count = 0;
                vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);
                kfree(fileio);
                dprintk(q, 3, "file io emulator closed\n");
        }
        return 0;
}

/*
 * __vb2_perform_fileio() - perform a single file io (read or write) operation
 * @q:                videobuf2 queue
 * @data:        pointed to target userspace buffer
 * @count:        number of bytes to read or write
 * @ppos:        file handle position tracking pointer
 * @nonblock:        mode selector (1 means blocking calls, 0 means nonblocking)
 * @read:        access mode selector (1 means read, 0 means write)
 */
static size_t __vb2_perform_fileio(struct vb2_queue *q, char __user *data, size_t count,
                loff_t *ppos, int nonblock, int read)
{
        struct vb2_fileio_data *fileio;
        struct vb2_fileio_buf *buf;
        bool is_multiplanar = q->is_multiplanar;
        /*
         * When using write() to write data to an output video node the vb2 core
         * should copy timestamps if V4L2_BUF_FLAG_TIMESTAMP_COPY is set. Nobody
         * else is able to provide this information with the write() operation.
         */
        bool copy_timestamp = !read && q->copy_timestamp;
        unsigned index;
        int ret;

        dprintk(q, 3, "mode %s, offset %ld, count %zd, %sblocking\n",
                read ? "read" : "write", (long)*ppos, count,
                nonblock ? "non" : "");

        if (!data)
                return -EINVAL;

        if (q->waiting_in_dqbuf) {
                dprintk(q, 3, "another dup()ped fd is %s\n",
                        read ? "reading" : "writing");
                return -EBUSY;
        }

        /*
         * Initialize emulator on first call.
         */
        if (!vb2_fileio_is_active(q)) {
                ret = __vb2_init_fileio(q, read);
                dprintk(q, 3, "vb2_init_fileio result: %d\n", ret);
                if (ret)
                        return ret;
        }
        fileio = q->fileio;

        /*
         * Check if we need to dequeue the buffer.
         */
        index = fileio->cur_index;
        if (index >= vb2_get_num_buffers(q)) {
                struct vb2_buffer *b;

                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
                ret = vb2_core_dqbuf(q, &index, NULL, nonblock);
                dprintk(q, 5, "vb2_dqbuf result: %d\n", ret);
                if (ret)
                        return ret;
                fileio->dq_count += 1;

                fileio->cur_index = index;
                buf = &fileio->bufs[index];

                /* b can never be NULL when using fileio. */
                b = vb2_get_buffer(q, index);

                /*
                 * Get number of bytes filled by the driver
                 */
                buf->pos = 0;
                buf->queued = 0;
                buf->size = read ? vb2_get_plane_payload(b, 0)
                                 : vb2_plane_size(b, 0);
                /* Compensate for data_offset on read in the multiplanar case. */
                if (is_multiplanar && read &&
                                b->planes[0].data_offset < buf->size) {
                        buf->pos = b->planes[0].data_offset;
                        buf->size -= buf->pos;
                }
        } else {
                buf = &fileio->bufs[index];
        }

        /*
         * Limit count on last few bytes of the buffer.
         */
        if (buf->pos + count > buf->size) {
                count = buf->size - buf->pos;
                dprintk(q, 5, "reducing read count: %zd\n", count);
        }

        /*
         * Transfer data to userspace.
         */
        dprintk(q, 3, "copying %zd bytes - buffer %d, offset %u\n",
                count, index, buf->pos);
        if (read)
                ret = copy_to_user(data, buf->vaddr + buf->pos, count);
        else
                ret = copy_from_user(buf->vaddr + buf->pos, data, count);
        if (ret) {
                dprintk(q, 3, "error copying data\n");
                return -EFAULT;
        }

        /*
         * Update counters.
         */
        buf->pos += count;
        *ppos += count;

        /*
         * Queue next buffer if required.
         */
        if (buf->pos == buf->size || (!read && fileio->write_immediately)) {
                /* b can never be NULL when using fileio. */
                struct vb2_buffer *b = vb2_get_buffer(q, index);

                /*
                 * Check if this is the last buffer to read.
                 */
                if (read && fileio->read_once && fileio->dq_count == 1) {
                        dprintk(q, 3, "read limit reached\n");
                        return __vb2_cleanup_fileio(q);
                }

                /*
                 * Call vb2_qbuf and give buffer to the driver.
                 */
                b->planes[0].bytesused = buf->pos;

                if (copy_timestamp)
                        b->timestamp = ktime_get_ns();
                ret = vb2_core_qbuf(q, b, NULL, NULL);
                dprintk(q, 5, "vb2_qbuf result: %d\n", ret);
                if (ret)
                        return ret;

                /*
                 * Buffer has been queued, update the status
                 */
                buf->pos = 0;
                buf->queued = 1;
                buf->size = vb2_plane_size(b, 0);
                fileio->q_count += 1;
                /*
                 * If we are queuing up buffers for the first time, then
                 * increase initial_index by one.
                 */
                if (fileio->initial_index < vb2_get_num_buffers(q))
                        fileio->initial_index++;
                /*
                 * The next buffer to use is either a buffer that's going to be
                 * queued for the first time (initial_index < number of buffers in the vb2_queue)
                 * or it is equal to the number of buffers in the vb2_queue,
                 * meaning that the next time we need to dequeue a buffer since
                 * we've now queued up all the 'first time' buffers.
                 */
                fileio->cur_index = fileio->initial_index;
        }

        /*
         * Return proper number of bytes processed.
         */
        if (ret == 0)
                ret = count;
        return ret;
}

size_t vb2_read(struct vb2_queue *q, char __user *data, size_t count,
                loff_t *ppos, int nonblocking)
{
        return __vb2_perform_fileio(q, data, count, ppos, nonblocking, 1);
}
EXPORT_SYMBOL_GPL(vb2_read);

size_t vb2_write(struct vb2_queue *q, const char __user *data, size_t count,
                loff_t *ppos, int nonblocking)
{
        return __vb2_perform_fileio(q, (char __user *) data, count,
                                                        ppos, nonblocking, 0);
}
EXPORT_SYMBOL_GPL(vb2_write);

struct vb2_threadio_data {
        struct task_struct *thread;
        vb2_thread_fnc fnc;
        void *priv;
        bool stop;
};

static int vb2_thread(void *data)
{
        struct vb2_queue *q = data;
        struct vb2_threadio_data *threadio = q->threadio;
        bool copy_timestamp = false;
        unsigned prequeue = 0;
        unsigned index = 0;
        int ret = 0;

        if (q->is_output) {
                prequeue = vb2_get_num_buffers(q);
                copy_timestamp = q->copy_timestamp;
        }

        set_freezable();

        for (;;) {
                struct vb2_buffer *vb;

                /*
                 * Call vb2_dqbuf to get buffer back.
                 */
                if (prequeue) {
                        vb = vb2_get_buffer(q, index++);
                        if (!vb)
                                continue;
                        prequeue--;
                } else {
                        call_void_qop(q, wait_finish, q);
                        if (!threadio->stop)
                                ret = vb2_core_dqbuf(q, &index, NULL, 0);
                        call_void_qop(q, wait_prepare, q);
                        dprintk(q, 5, "file io: vb2_dqbuf result: %d\n", ret);
                        if (!ret)
                                vb = vb2_get_buffer(q, index);
                }
                if (ret || threadio->stop)
                        break;
                try_to_freeze();

                if (vb->state != VB2_BUF_STATE_ERROR)
                        if (threadio->fnc(vb, threadio->priv))
                                break;
                call_void_qop(q, wait_finish, q);
                if (copy_timestamp)
                        vb->timestamp = ktime_get_ns();
                if (!threadio->stop)
                        ret = vb2_core_qbuf(q, vb, NULL, NULL);
                call_void_qop(q, wait_prepare, q);
                if (ret || threadio->stop)
                        break;
        }

        /* Hmm, linux becomes *very* unhappy without this ... */
        while (!kthread_should_stop()) {
                set_current_state(TASK_INTERRUPTIBLE);
                schedule();
        }
        return 0;
}

/*
 * This function should not be used for anything else but the videobuf2-dvb
 * support. If you think you have another good use-case for this, then please
 * contact the linux-media mailinglist first.
 */
int vb2_thread_start(struct vb2_queue *q, vb2_thread_fnc fnc, void *priv,
                     const char *thread_name)
{
        struct vb2_threadio_data *threadio;
        int ret = 0;

        if (q->threadio)
                return -EBUSY;
        if (vb2_is_busy(q))
                return -EBUSY;
        if (WARN_ON(q->fileio))
                return -EBUSY;

        threadio = kzalloc(sizeof(*threadio), GFP_KERNEL);
        if (threadio == NULL)
                return -ENOMEM;
        threadio->fnc = fnc;
        threadio->priv = priv;

        ret = __vb2_init_fileio(q, !q->is_output);
        dprintk(q, 3, "file io: vb2_init_fileio result: %d\n", ret);
        if (ret)
                goto nomem;
        q->threadio = threadio;
        threadio->thread = kthread_run(vb2_thread, q, "vb2-%s", thread_name);
        if (IS_ERR(threadio->thread)) {
                ret = PTR_ERR(threadio->thread);
                threadio->thread = NULL;
                goto nothread;
        }
        return 0;

nothread:
        __vb2_cleanup_fileio(q);
nomem:
        kfree(threadio);
        return ret;
}
EXPORT_SYMBOL_GPL(vb2_thread_start);

int vb2_thread_stop(struct vb2_queue *q)
{
        struct vb2_threadio_data *threadio = q->threadio;
        int err;

        if (threadio == NULL)
                return 0;
        threadio->stop = true;
        /* Wake up all pending sleeps in the thread */
        vb2_queue_error(q);
        err = kthread_stop(threadio->thread);
        __vb2_cleanup_fileio(q);
        threadio->thread = NULL;
        kfree(threadio);
        q->threadio = NULL;
        return err;
}
EXPORT_SYMBOL_GPL(vb2_thread_stop);

MODULE_DESCRIPTION("Media buffer core framework");
MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>, Marek Szyprowski");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS(DMA_BUF);



























































































































































































































































































































































































































































































































































































































































































































































































































































































  254 


  259 









    3 

  234 







  162 

































  259 




  147 



















   26 




  162 
  161 















  233 


























  245 














  193 






  231 






  209 


   17 

   17 

   17 

   17 

















   82 






























   82 





















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
// SPDX-License-Identifier: GPL-2.0
/*
 * device.h - generic, centralized driver model
 *
 * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
 * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2008-2009 Novell Inc.
 *
 * See Documentation/driver-api/driver-model/ for more information.
 */

#ifndef _DEVICE_H_
#define _DEVICE_H_

#include <linux/dev_printk.h>
#include <linux/energy_model.h>
#include <linux/ioport.h>
#include <linux/kobject.h>
#include <linux/klist.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/mutex.h>
#include <linux/pm.h>
#include <linux/atomic.h>
#include <linux/uidgid.h>
#include <linux/gfp.h>
#include <linux/overflow.h>
#include <linux/device/bus.h>
#include <linux/device/class.h>
#include <linux/device/driver.h>
#include <linux/cleanup.h>
#include <asm/device.h>

struct device;
struct device_private;
struct device_driver;
struct driver_private;
struct module;
struct class;
struct subsys_private;
struct device_node;
struct fwnode_handle;
struct iommu_group;
struct dev_pin_info;
struct dev_iommu;
struct msi_device_data;

/**
 * struct subsys_interface - interfaces to device functions
 * @name:       name of the device function
 * @subsys:     subsystem of the devices to attach to
 * @node:       the list of functions registered at the subsystem
 * @add_dev:    device hookup to device function handler
 * @remove_dev: device hookup to device function handler
 *
 * Simple interfaces attached to a subsystem. Multiple interfaces can
 * attach to a subsystem and its devices. Unlike drivers, they do not
 * exclusively claim or control devices. Interfaces usually represent
 * a specific functionality of a subsystem/class of devices.
 */
struct subsys_interface {
        const char *name;
        const struct bus_type *subsys;
        struct list_head node;
        int (*add_dev)(struct device *dev, struct subsys_interface *sif);
        void (*remove_dev)(struct device *dev, struct subsys_interface *sif);
};

int subsys_interface_register(struct subsys_interface *sif);
void subsys_interface_unregister(struct subsys_interface *sif);

int subsys_system_register(const struct bus_type *subsys,
                           const struct attribute_group **groups);
int subsys_virtual_register(const struct bus_type *subsys,
                            const struct attribute_group **groups);

/*
 * The type of device, "struct device" is embedded in. A class
 * or bus can contain devices of different types
 * like "partitions" and "disks", "mouse" and "event".
 * This identifies the device type and carries type-specific
 * information, equivalent to the kobj_type of a kobject.
 * If "name" is specified, the uevent will contain it in
 * the DEVTYPE variable.
 */
struct device_type {
        const char *name;
        const struct attribute_group **groups;
        int (*uevent)(const struct device *dev, struct kobj_uevent_env *env);
        char *(*devnode)(const struct device *dev, umode_t *mode,
                         kuid_t *uid, kgid_t *gid);
        void (*release)(struct device *dev);

        const struct dev_pm_ops *pm;
};

/**
 * struct device_attribute - Interface for exporting device attributes.
 * @attr: sysfs attribute definition.
 * @show: Show handler.
 * @store: Store handler.
 */
struct device_attribute {
        struct attribute        attr;
        ssize_t (*show)(struct device *dev, struct device_attribute *attr,
                        char *buf);
        ssize_t (*store)(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t count);
};

/**
 * struct dev_ext_attribute - Exported device attribute with extra context.
 * @attr: Exported device attribute.
 * @var: Pointer to context.
 */
struct dev_ext_attribute {
        struct device_attribute attr;
        void *var;
};

ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr,
                          char *buf);
ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count);
ssize_t device_show_int(struct device *dev, struct device_attribute *attr,
                        char *buf);
ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t count);
ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
                        char *buf);
ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t count);

/**
 * DEVICE_ATTR - Define a device attribute.
 * @_name: Attribute name.
 * @_mode: File mode.
 * @_show: Show handler. Optional, but mandatory if attribute is readable.
 * @_store: Store handler. Optional, but mandatory if attribute is writable.
 *
 * Convenience macro for defining a struct device_attribute.
 *
 * For example, ``DEVICE_ATTR(foo, 0644, foo_show, foo_store);`` expands to:
 *
 * .. code-block:: c
 *
 *        struct device_attribute dev_attr_foo = {
 *                .attr        = { .name = "foo", .mode = 0644 },
 *                .show        = foo_show,
 *                .store        = foo_store,
 *        };
 */
#define DEVICE_ATTR(_name, _mode, _show, _store) \
        struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)

/**
 * DEVICE_ATTR_PREALLOC - Define a preallocated device attribute.
 * @_name: Attribute name.
 * @_mode: File mode.
 * @_show: Show handler. Optional, but mandatory if attribute is readable.
 * @_store: Store handler. Optional, but mandatory if attribute is writable.
 *
 * Like DEVICE_ATTR(), but ``SYSFS_PREALLOC`` is set on @_mode.
 */
#define DEVICE_ATTR_PREALLOC(_name, _mode, _show, _store) \
        struct device_attribute dev_attr_##_name = \
                __ATTR_PREALLOC(_name, _mode, _show, _store)

/**
 * DEVICE_ATTR_RW - Define a read-write device attribute.
 * @_name: Attribute name.
 *
 * Like DEVICE_ATTR(), but @_mode is 0644, @_show is <_name>_show,
 * and @_store is <_name>_store.
 */
#define DEVICE_ATTR_RW(_name) \
        struct device_attribute dev_attr_##_name = __ATTR_RW(_name)

/**
 * DEVICE_ATTR_ADMIN_RW - Define an admin-only read-write device attribute.
 * @_name: Attribute name.
 *
 * Like DEVICE_ATTR_RW(), but @_mode is 0600.
 */
#define DEVICE_ATTR_ADMIN_RW(_name) \
        struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600)

/**
 * DEVICE_ATTR_RO - Define a readable device attribute.
 * @_name: Attribute name.
 *
 * Like DEVICE_ATTR(), but @_mode is 0444 and @_show is <_name>_show.
 */
#define DEVICE_ATTR_RO(_name) \
        struct device_attribute dev_attr_##_name = __ATTR_RO(_name)

/**
 * DEVICE_ATTR_ADMIN_RO - Define an admin-only readable device attribute.
 * @_name: Attribute name.
 *
 * Like DEVICE_ATTR_RO(), but @_mode is 0400.
 */
#define DEVICE_ATTR_ADMIN_RO(_name) \
        struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400)

/**
 * DEVICE_ATTR_WO - Define an admin-only writable device attribute.
 * @_name: Attribute name.
 *
 * Like DEVICE_ATTR(), but @_mode is 0200 and @_store is <_name>_store.
 */
#define DEVICE_ATTR_WO(_name) \
        struct device_attribute dev_attr_##_name = __ATTR_WO(_name)

/**
 * DEVICE_ULONG_ATTR - Define a device attribute backed by an unsigned long.
 * @_name: Attribute name.
 * @_mode: File mode.
 * @_var: Identifier of unsigned long.
 *
 * Like DEVICE_ATTR(), but @_show and @_store are automatically provided
 * such that reads and writes to the attribute from userspace affect @_var.
 */
#define DEVICE_ULONG_ATTR(_name, _mode, _var) \
        struct dev_ext_attribute dev_attr_##_name = \
                { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }

/**
 * DEVICE_INT_ATTR - Define a device attribute backed by an int.
 * @_name: Attribute name.
 * @_mode: File mode.
 * @_var: Identifier of int.
 *
 * Like DEVICE_ULONG_ATTR(), but @_var is an int.
 */
#define DEVICE_INT_ATTR(_name, _mode, _var) \
        struct dev_ext_attribute dev_attr_##_name = \
                { __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) }

/**
 * DEVICE_BOOL_ATTR - Define a device attribute backed by a bool.
 * @_name: Attribute name.
 * @_mode: File mode.
 * @_var: Identifier of bool.
 *
 * Like DEVICE_ULONG_ATTR(), but @_var is a bool.
 */
#define DEVICE_BOOL_ATTR(_name, _mode, _var) \
        struct dev_ext_attribute dev_attr_##_name = \
                { __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) }

#define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
        struct device_attribute dev_attr_##_name =                \
                __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)

int device_create_file(struct device *device,
                       const struct device_attribute *entry);
void device_remove_file(struct device *dev,
                        const struct device_attribute *attr);
bool device_remove_file_self(struct device *dev,
                             const struct device_attribute *attr);
int __must_check device_create_bin_file(struct device *dev,
                                        const struct bin_attribute *attr);
void device_remove_bin_file(struct device *dev,
                            const struct bin_attribute *attr);

/* device resource management */
typedef void (*dr_release_t)(struct device *dev, void *res);
typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data);

void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp,
                          int nid, const char *name) __malloc;
#define devres_alloc(release, size, gfp) \
        __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release)
#define devres_alloc_node(release, size, gfp, nid) \
        __devres_alloc_node(release, size, gfp, nid, #release)

void devres_for_each_res(struct device *dev, dr_release_t release,
                         dr_match_t match, void *match_data,
                         void (*fn)(struct device *, void *, void *),
                         void *data);
void devres_free(void *res);
void devres_add(struct device *dev, void *res);
void *devres_find(struct device *dev, dr_release_t release,
                  dr_match_t match, void *match_data);
void *devres_get(struct device *dev, void *new_res,
                 dr_match_t match, void *match_data);
void *devres_remove(struct device *dev, dr_release_t release,
                    dr_match_t match, void *match_data);
int devres_destroy(struct device *dev, dr_release_t release,
                   dr_match_t match, void *match_data);
int devres_release(struct device *dev, dr_release_t release,
                   dr_match_t match, void *match_data);

/* devres group */
void * __must_check devres_open_group(struct device *dev, void *id, gfp_t gfp);
void devres_close_group(struct device *dev, void *id);
void devres_remove_group(struct device *dev, void *id);
int devres_release_group(struct device *dev, void *id);

/* managed devm_k.alloc/kfree for device drivers */
void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2);
void *devm_krealloc(struct device *dev, void *ptr, size_t size,
                    gfp_t gfp) __must_check __realloc_size(3);
__printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp,
                                     const char *fmt, va_list ap) __malloc;
__printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp,
                                    const char *fmt, ...) __malloc;
static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
{
        return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
}
static inline void *devm_kmalloc_array(struct device *dev,
                                       size_t n, size_t size, gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;

        return devm_kmalloc(dev, bytes, flags);
}
static inline void *devm_kcalloc(struct device *dev,
                                 size_t n, size_t size, gfp_t flags)
{
        return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
}
static inline __realloc_size(3, 4) void * __must_check
devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
                return NULL;

        return devm_krealloc(dev, p, bytes, flags);
}

void devm_kfree(struct device *dev, const void *p);
char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp);
void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
        __realloc_size(3);

unsigned long devm_get_free_pages(struct device *dev,
                                  gfp_t gfp_mask, unsigned int order);
void devm_free_pages(struct device *dev, unsigned long addr);

#ifdef CONFIG_HAS_IOMEM
void __iomem *devm_ioremap_resource(struct device *dev,
                                    const struct resource *res);
void __iomem *devm_ioremap_resource_wc(struct device *dev,
                                       const struct resource *res);

void __iomem *devm_of_iomap(struct device *dev,
                            struct device_node *node, int index,
                            resource_size_t *size);
#else

static inline
void __iomem *devm_ioremap_resource(struct device *dev,
                                    const struct resource *res)
{
        return ERR_PTR(-EINVAL);
}

static inline
void __iomem *devm_ioremap_resource_wc(struct device *dev,
                                       const struct resource *res)
{
        return ERR_PTR(-EINVAL);
}

static inline
void __iomem *devm_of_iomap(struct device *dev,
                            struct device_node *node, int index,
                            resource_size_t *size)
{
        return ERR_PTR(-EINVAL);
}

#endif

/* allows to add/remove a custom action to devres stack */
void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
void devm_release_action(struct device *dev, void (*action)(void *), void *data);

int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name);
#define devm_add_action(dev, action, data) \
        __devm_add_action(dev, action, data, #action)

static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *),
                                             void *data, const char *name)
{
        int ret;

        ret = __devm_add_action(dev, action, data, name);
        if (ret)
                action(data);

        return ret;
}
#define devm_add_action_or_reset(dev, action, data) \
        __devm_add_action_or_reset(dev, action, data, #action)

/**
 * devm_alloc_percpu - Resource-managed alloc_percpu
 * @dev: Device to allocate per-cpu memory for
 * @type: Type to allocate per-cpu memory for
 *
 * Managed alloc_percpu. Per-cpu memory allocated with this function is
 * automatically freed on driver detach.
 *
 * RETURNS:
 * Pointer to allocated memory on success, NULL on failure.
 */
#define devm_alloc_percpu(dev, type)      \
        ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \
                                                      __alignof__(type)))

void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
                                   size_t align);
void devm_free_percpu(struct device *dev, void __percpu *pdata);

struct device_dma_parameters {
        /*
         * a low level driver may set these to teach IOMMU code about
         * sg limitations.
         */
        unsigned int max_segment_size;
        unsigned int min_align_mask;
        unsigned long segment_boundary_mask;
};

/**
 * enum device_link_state - Device link states.
 * @DL_STATE_NONE: The presence of the drivers is not being tracked.
 * @DL_STATE_DORMANT: None of the supplier/consumer drivers is present.
 * @DL_STATE_AVAILABLE: The supplier driver is present, but the consumer is not.
 * @DL_STATE_CONSUMER_PROBE: The consumer is probing (supplier driver present).
 * @DL_STATE_ACTIVE: Both the supplier and consumer drivers are present.
 * @DL_STATE_SUPPLIER_UNBIND: The supplier driver is unbinding.
 */
enum device_link_state {
        DL_STATE_NONE = -1,
        DL_STATE_DORMANT = 0,
        DL_STATE_AVAILABLE,
        DL_STATE_CONSUMER_PROBE,
        DL_STATE_ACTIVE,
        DL_STATE_SUPPLIER_UNBIND,
};

/*
 * Device link flags.
 *
 * STATELESS: The core will not remove this link automatically.
 * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
 * PM_RUNTIME: If set, the runtime PM framework will use this link.
 * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
 * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
 * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds.
 * MANAGED: The core tracks presence of supplier/consumer drivers (internal).
 * SYNC_STATE_ONLY: Link only affects sync_state() behavior.
 * INFERRED: Inferred from data (eg: firmware) and not from driver actions.
 */
#define DL_FLAG_STATELESS                BIT(0)
#define DL_FLAG_AUTOREMOVE_CONSUMER        BIT(1)
#define DL_FLAG_PM_RUNTIME                BIT(2)
#define DL_FLAG_RPM_ACTIVE                BIT(3)
#define DL_FLAG_AUTOREMOVE_SUPPLIER        BIT(4)
#define DL_FLAG_AUTOPROBE_CONSUMER        BIT(5)
#define DL_FLAG_MANAGED                        BIT(6)
#define DL_FLAG_SYNC_STATE_ONLY                BIT(7)
#define DL_FLAG_INFERRED                BIT(8)
#define DL_FLAG_CYCLE                        BIT(9)

/**
 * enum dl_dev_state - Device driver presence tracking information.
 * @DL_DEV_NO_DRIVER: There is no driver attached to the device.
 * @DL_DEV_PROBING: A driver is probing.
 * @DL_DEV_DRIVER_BOUND: The driver has been bound to the device.
 * @DL_DEV_UNBINDING: The driver is unbinding from the device.
 */
enum dl_dev_state {
        DL_DEV_NO_DRIVER = 0,
        DL_DEV_PROBING,
        DL_DEV_DRIVER_BOUND,
        DL_DEV_UNBINDING,
};

/**
 * enum device_removable - Whether the device is removable. The criteria for a
 * device to be classified as removable is determined by its subsystem or bus.
 * @DEVICE_REMOVABLE_NOT_SUPPORTED: This attribute is not supported for this
 *                                    device (default).
 * @DEVICE_REMOVABLE_UNKNOWN:  Device location is Unknown.
 * @DEVICE_FIXED: Device is not removable by the user.
 * @DEVICE_REMOVABLE: Device is removable by the user.
 */
enum device_removable {
        DEVICE_REMOVABLE_NOT_SUPPORTED = 0, /* must be 0 */
        DEVICE_REMOVABLE_UNKNOWN,
        DEVICE_FIXED,
        DEVICE_REMOVABLE,
};

/**
 * struct dev_links_info - Device data related to device links.
 * @suppliers: List of links to supplier devices.
 * @consumers: List of links to consumer devices.
 * @defer_sync: Hook to global list of devices that have deferred sync_state.
 * @status: Driver status information.
 */
struct dev_links_info {
        struct list_head suppliers;
        struct list_head consumers;
        struct list_head defer_sync;
        enum dl_dev_state status;
};

/**
 * struct dev_msi_info - Device data related to MSI
 * @domain:        The MSI interrupt domain associated to the device
 * @data:        Pointer to MSI device data
 */
struct dev_msi_info {
#ifdef CONFIG_GENERIC_MSI_IRQ
        struct irq_domain        *domain;
        struct msi_device_data        *data;
#endif
};

/**
 * enum device_physical_location_panel - Describes which panel surface of the
 * system's housing the device connection point resides on.
 * @DEVICE_PANEL_TOP: Device connection point is on the top panel.
 * @DEVICE_PANEL_BOTTOM: Device connection point is on the bottom panel.
 * @DEVICE_PANEL_LEFT: Device connection point is on the left panel.
 * @DEVICE_PANEL_RIGHT: Device connection point is on the right panel.
 * @DEVICE_PANEL_FRONT: Device connection point is on the front panel.
 * @DEVICE_PANEL_BACK: Device connection point is on the back panel.
 * @DEVICE_PANEL_UNKNOWN: The panel with device connection point is unknown.
 */
enum device_physical_location_panel {
        DEVICE_PANEL_TOP,
        DEVICE_PANEL_BOTTOM,
        DEVICE_PANEL_LEFT,
        DEVICE_PANEL_RIGHT,
        DEVICE_PANEL_FRONT,
        DEVICE_PANEL_BACK,
        DEVICE_PANEL_UNKNOWN,
};

/**
 * enum device_physical_location_vertical_position - Describes vertical
 * position of the device connection point on the panel surface.
 * @DEVICE_VERT_POS_UPPER: Device connection point is at upper part of panel.
 * @DEVICE_VERT_POS_CENTER: Device connection point is at center part of panel.
 * @DEVICE_VERT_POS_LOWER: Device connection point is at lower part of panel.
 */
enum device_physical_location_vertical_position {
        DEVICE_VERT_POS_UPPER,
        DEVICE_VERT_POS_CENTER,
        DEVICE_VERT_POS_LOWER,
};

/**
 * enum device_physical_location_horizontal_position - Describes horizontal
 * position of the device connection point on the panel surface.
 * @DEVICE_HORI_POS_LEFT: Device connection point is at left part of panel.
 * @DEVICE_HORI_POS_CENTER: Device connection point is at center part of panel.
 * @DEVICE_HORI_POS_RIGHT: Device connection point is at right part of panel.
 */
enum device_physical_location_horizontal_position {
        DEVICE_HORI_POS_LEFT,
        DEVICE_HORI_POS_CENTER,
        DEVICE_HORI_POS_RIGHT,
};

/**
 * struct device_physical_location - Device data related to physical location
 * of the device connection point.
 * @panel: Panel surface of the system's housing that the device connection
 *         point resides on.
 * @vertical_position: Vertical position of the device connection point within
 *                     the panel.
 * @horizontal_position: Horizontal position of the device connection point
 *                       within the panel.
 * @dock: Set if the device connection point resides in a docking station or
 *        port replicator.
 * @lid: Set if this device connection point resides on the lid of laptop
 *       system.
 */
struct device_physical_location {
        enum device_physical_location_panel panel;
        enum device_physical_location_vertical_position vertical_position;
        enum device_physical_location_horizontal_position horizontal_position;
        bool dock;
        bool lid;
};

/**
 * struct device - The basic device structure
 * @parent:        The device's "parent" device, the device to which it is attached.
 *                 In most cases, a parent device is some sort of bus or host
 *                 controller. If parent is NULL, the device, is a top-level device,
 *                 which is not usually what you want.
 * @p:                Holds the private data of the driver core portions of the device.
 *                 See the comment of the struct device_private for detail.
 * @kobj:        A top-level, abstract class from which other classes are derived.
 * @init_name:        Initial name of the device.
 * @type:        The type of device.
 *                 This identifies the device type and carries type-specific
 *                 information.
 * @mutex:        Mutex to synchronize calls to its driver.
 * @bus:        Type of bus device is on.
 * @driver:        Which driver has allocated this
 * @platform_data: Platform data specific to the device.
 *                 Example: For devices on custom boards, as typical of embedded
 *                 and SOC based hardware, Linux often uses platform_data to point
 *                 to board-specific structures describing devices and how they
 *                 are wired.  That can include what ports are available, chip
 *                 variants, which GPIO pins act in what additional roles, and so
 *                 on.  This shrinks the "Board Support Packages" (BSPs) and
 *                 minimizes board-specific #ifdefs in drivers.
 * @driver_data: Private pointer for driver specific info.
 * @links:        Links to suppliers and consumers of this device.
 * @power:        For device power management.
 *                See Documentation/driver-api/pm/devices.rst for details.
 * @pm_domain:        Provide callbacks that are executed during system suspend,
 *                 hibernation, system resume and during runtime PM transitions
 *                 along with subsystem-level and driver-level callbacks.
 * @em_pd:        device's energy model performance domain
 * @pins:        For device pin management.
 *                See Documentation/driver-api/pin-control.rst for details.
 * @msi:        MSI related data
 * @numa_node:        NUMA node this device is close to.
 * @dma_ops:    DMA mapping operations for this device.
 * @dma_mask:        Dma mask (if dma'ble device).
 * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
 *                 hardware supports 64-bit addresses for consistent allocations
 *                 such descriptors.
 * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller
 *                DMA limit than the device itself supports.
 * @dma_range_map: map for DMA memory ranges relative to that of RAM
 * @dma_parms:        A low level driver may set these to teach IOMMU code about
 *                 segment limitations.
 * @dma_pools:        Dma pools (if dma'ble device).
 * @dma_mem:        Internal for coherent mem override.
 * @cma_area:        Contiguous memory area for dma allocations
 * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
 * @dma_io_tlb_pools:        List of transient swiotlb memory pools.
 * @dma_io_tlb_lock:        Protects changes to the list of active pools.
 * @dma_uses_io_tlb: %true if device has used the software IO TLB.
 * @archdata:        For arch-specific additions.
 * @of_node:        Associated device tree node.
 * @fwnode:        Associated device node supplied by platform firmware.
 * @devt:        For creating the sysfs "dev".
 * @id:                device instance
 * @devres_lock: Spinlock to protect the resource of the device.
 * @devres_head: The resources list of the device.
 * @class:        The class of the device.
 * @groups:        Optional attribute groups.
 * @release:        Callback to free the device after all references have
 *                 gone away. This should be set by the allocator of the
 *                 device (i.e. the bus driver that discovered the device).
 * @iommu_group: IOMMU group the device belongs to.
 * @iommu:        Per device generic IOMMU runtime data
 * @physical_location: Describes physical location of the device connection
 *                point in the system housing.
 * @removable:  Whether the device can be removed from the system. This
 *              should be set by the subsystem / bus driver that discovered
 *              the device.
 *
 * @offline_disabled: If set, the device is permanently online.
 * @offline:        Set after successful invocation of bus type's .offline().
 * @of_node_reused: Set if the device-tree node is shared with an ancestor
 *              device.
 * @state_synced: The hardware state of this device has been synced to match
 *                  the software state of this device by calling the driver/bus
 *                  sync_state() callback.
 * @can_match:        The device has matched with a driver at least once or it is in
 *                a bus (like AMBA) which can't check for matching drivers until
 *                other devices probe successfully.
 * @dma_coherent: this particular device is dma coherent, even if the
 *                architecture supports non-coherent devices.
 * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the
 *                streaming DMA operations (->map_* / ->unmap_* / ->sync_*),
 *                and optionall (if the coherent mask is large enough) also
 *                for dma allocations.  This flag is managed by the dma ops
 *                instance from ->dma_supported.
 *
 * At the lowest level, every device in a Linux system is represented by an
 * instance of struct device. The device structure contains the information
 * that the device model core needs to model the system. Most subsystems,
 * however, track additional information about the devices they host. As a
 * result, it is rare for devices to be represented by bare device structures;
 * instead, that structure, like kobject structures, is usually embedded within
 * a higher-level representation of the device.
 */
struct device {
        struct kobject kobj;
        struct device                *parent;

        struct device_private        *p;

        const char                *init_name; /* initial name of the device */
        const struct device_type *type;

        const struct bus_type        *bus;        /* type of bus device is on */
        struct device_driver *driver;        /* which driver has allocated this
                                           device */
        void                *platform_data;        /* Platform specific data, device
                                           core doesn't touch it */
        void                *driver_data;        /* Driver data, set and get with
                                           dev_set_drvdata/dev_get_drvdata */
        struct mutex                mutex;        /* mutex to synchronize calls to
                                         * its driver.
                                         */

        struct dev_links_info        links;
        struct dev_pm_info        power;
        struct dev_pm_domain        *pm_domain;

#ifdef CONFIG_ENERGY_MODEL
        struct em_perf_domain        *em_pd;
#endif

#ifdef CONFIG_PINCTRL
        struct dev_pin_info        *pins;
#endif
        struct dev_msi_info        msi;
#ifdef CONFIG_DMA_OPS
        const struct dma_map_ops *dma_ops;
#endif
        u64                *dma_mask;        /* dma mask (if dma'able device) */
        u64                coherent_dma_mask;/* Like dma_mask, but for
                                             alloc_coherent mappings as
                                             not all hardware supports
                                             64 bit addresses for consistent
                                             allocations such descriptors. */
        u64                bus_dma_limit;        /* upstream dma constraint */
        const struct bus_dma_region *dma_range_map;

        struct device_dma_parameters *dma_parms;

        struct list_head        dma_pools;        /* dma pools (if dma'ble) */

#ifdef CONFIG_DMA_DECLARE_COHERENT
        struct dma_coherent_mem        *dma_mem; /* internal for coherent mem
                                             override */
#endif
#ifdef CONFIG_DMA_CMA
        struct cma *cma_area;                /* contiguous memory area for dma
                                           allocations */
#endif
#ifdef CONFIG_SWIOTLB
        struct io_tlb_mem *dma_io_tlb_mem;
#endif
#ifdef CONFIG_SWIOTLB_DYNAMIC
        struct list_head dma_io_tlb_pools;
        spinlock_t dma_io_tlb_lock;
        bool dma_uses_io_tlb;
#endif
        /* arch specific additions */
        struct dev_archdata        archdata;

        struct device_node        *of_node; /* associated device tree node */
        struct fwnode_handle        *fwnode; /* firmware device node */

#ifdef CONFIG_NUMA
        int                numa_node;        /* NUMA node this device is close to */
#endif
        dev_t                        devt;        /* dev_t, creates the sysfs "dev" */
        u32                        id;        /* device instance */

        spinlock_t                devres_lock;
        struct list_head        devres_head;

        const struct class        *class;
        const struct attribute_group **groups;        /* optional groups */

        void        (*release)(struct device *dev);
        struct iommu_group        *iommu_group;
        struct dev_iommu        *iommu;

        struct device_physical_location *physical_location;

        enum device_removable        removable;

        bool                        offline_disabled:1;
        bool                        offline:1;
        bool                        of_node_reused:1;
        bool                        state_synced:1;
        bool                        can_match:1;
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
        bool                        dma_coherent:1;
#endif
#ifdef CONFIG_DMA_OPS_BYPASS
        bool                        dma_ops_bypass : 1;
#endif
};

/**
 * struct device_link - Device link representation.
 * @supplier: The device on the supplier end of the link.
 * @s_node: Hook to the supplier device's list of links to consumers.
 * @consumer: The device on the consumer end of the link.
 * @c_node: Hook to the consumer device's list of links to suppliers.
 * @link_dev: device used to expose link details in sysfs
 * @status: The state of the link (with respect to the presence of drivers).
 * @flags: Link flags.
 * @rpm_active: Whether or not the consumer device is runtime-PM-active.
 * @kref: Count repeated addition of the same link.
 * @rm_work: Work structure used for removing the link.
 * @supplier_preactivated: Supplier has been made active before consumer probe.
 */
struct device_link {
        struct device *supplier;
        struct list_head s_node;
        struct device *consumer;
        struct list_head c_node;
        struct device link_dev;
        enum device_link_state status;
        u32 flags;
        refcount_t rpm_active;
        struct kref kref;
        struct work_struct rm_work;
        bool supplier_preactivated; /* Owned by consumer probe. */
};

#define kobj_to_dev(__kobj)        container_of_const(__kobj, struct device, kobj)

/**
 * device_iommu_mapped - Returns true when the device DMA is translated
 *                         by an IOMMU
 * @dev: Device to perform the check on
 */
static inline bool device_iommu_mapped(struct device *dev)
{
        return (dev->iommu_group != NULL);
}

/* Get the wakeup routines, which depend on struct device */
#include <linux/pm_wakeup.h>

/**
 * dev_name - Return a device's name.
 * @dev: Device with name to get.
 * Return: The kobject name of the device, or its initial name if unavailable.
 */
static inline const char *dev_name(const struct device *dev)
{
        /* Use the init name until the kobject becomes available */
        if (dev->init_name)
                return dev->init_name;

        return kobject_name(&dev->kobj);
}

/**
 * dev_bus_name - Return a device's bus/class name, if at all possible
 * @dev: struct device to get the bus/class name of
 *
 * Will return the name of the bus/class the device is attached to.  If it is
 * not attached to a bus/class, an empty string will be returned.
 */
static inline const char *dev_bus_name(const struct device *dev)
{
        return dev->bus ? dev->bus->name : (dev->class ? dev->class->name : "");
}

__printf(2, 3) int dev_set_name(struct device *dev, const char *name, ...);

#ifdef CONFIG_NUMA
static inline int dev_to_node(struct device *dev)
{
        return dev->numa_node;
}
static inline void set_dev_node(struct device *dev, int node)
{
        dev->numa_node = node;
}
#else
static inline int dev_to_node(struct device *dev)
{
        return NUMA_NO_NODE;
}
static inline void set_dev_node(struct device *dev, int node)
{
}
#endif

static inline struct irq_domain *dev_get_msi_domain(const struct device *dev)
{
#ifdef CONFIG_GENERIC_MSI_IRQ
        return dev->msi.domain;
#else
        return NULL;
#endif
}

static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d)
{
#ifdef CONFIG_GENERIC_MSI_IRQ
        dev->msi.domain = d;
#endif
}

static inline void *dev_get_drvdata(const struct device *dev)
{
        return dev->driver_data;
}

static inline void dev_set_drvdata(struct device *dev, void *data)
{
        dev->driver_data = data;
}

static inline struct pm_subsys_data *dev_to_psd(struct device *dev)
{
        return dev ? dev->power.subsys_data : NULL;
}

static inline unsigned int dev_get_uevent_suppress(const struct device *dev)
{
        return dev->kobj.uevent_suppress;
}

static inline void dev_set_uevent_suppress(struct device *dev, int val)
{
        dev->kobj.uevent_suppress = val;
}

static inline int device_is_registered(struct device *dev)
{
        return dev->kobj.state_in_sysfs;
}

static inline void device_enable_async_suspend(struct device *dev)
{
        if (!dev->power.is_prepared)
                dev->power.async_suspend = true;
}

static inline void device_disable_async_suspend(struct device *dev)
{
        if (!dev->power.is_prepared)
                dev->power.async_suspend = false;
}

static inline bool device_async_suspend_enabled(struct device *dev)
{
        return !!dev->power.async_suspend;
}

static inline bool device_pm_not_required(struct device *dev)
{
        return dev->power.no_pm;
}

static inline void device_set_pm_not_required(struct device *dev)
{
        dev->power.no_pm = true;
}

static inline void dev_pm_syscore_device(struct device *dev, bool val)
{
#ifdef CONFIG_PM_SLEEP
        dev->power.syscore = val;
#endif
}

static inline void dev_pm_set_driver_flags(struct device *dev, u32 flags)
{
        dev->power.driver_flags = flags;
}

static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags)
{
        return !!(dev->power.driver_flags & flags);
}

static inline void device_lock(struct device *dev)
{
        mutex_lock(&dev->mutex);
}

static inline int device_lock_interruptible(struct device *dev)
{
        return mutex_lock_interruptible(&dev->mutex);
}

static inline int device_trylock(struct device *dev)
{
        return mutex_trylock(&dev->mutex);
}

static inline void device_unlock(struct device *dev)
{
        mutex_unlock(&dev->mutex);
}

DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T))

static inline void device_lock_assert(struct device *dev)
{
        lockdep_assert_held(&dev->mutex);
}

static inline struct device_node *dev_of_node(struct device *dev)
{
        if (!IS_ENABLED(CONFIG_OF) || !dev)
                return NULL;
        return dev->of_node;
}

static inline bool dev_has_sync_state(struct device *dev)
{
        if (!dev)
                return false;
        if (dev->driver && dev->driver->sync_state)
                return true;
        if (dev->bus && dev->bus->sync_state)
                return true;
        return false;
}

static inline void dev_set_removable(struct device *dev,
                                     enum device_removable removable)
{
        dev->removable = removable;
}

static inline bool dev_is_removable(struct device *dev)
{
        return dev->removable == DEVICE_REMOVABLE;
}

static inline bool dev_removable_is_valid(struct device *dev)
{
        return dev->removable != DEVICE_REMOVABLE_NOT_SUPPORTED;
}

/*
 * High level routines for use by the bus drivers
 */
int __must_check device_register(struct device *dev);
void device_unregister(struct device *dev);
void device_initialize(struct device *dev);
int __must_check device_add(struct device *dev);
void device_del(struct device *dev);

DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T))

int device_for_each_child(struct device *dev, void *data,
                          int (*fn)(struct device *dev, void *data));
int device_for_each_child_reverse(struct device *dev, void *data,
                                  int (*fn)(struct device *dev, void *data));
struct device *device_find_child(struct device *dev, void *data,
                                 int (*match)(struct device *dev, void *data));
struct device *device_find_child_by_name(struct device *parent,
                                         const char *name);
struct device *device_find_any_child(struct device *parent);

int device_rename(struct device *dev, const char *new_name);
int device_move(struct device *dev, struct device *new_parent,
                enum dpm_order dpm_order);
int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid);

static inline bool device_supports_offline(struct device *dev)
{
        return dev->bus && dev->bus->offline && dev->bus->online;
}

#define __device_lock_set_class(dev, name, key)                        \
do {                                                                   \
        struct device *__d2 __maybe_unused = dev;                      \
        lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \
} while (0)

/**
 * device_lock_set_class - Specify a temporary lock class while a device
 *                           is attached to a driver
 * @dev: device to modify
 * @key: lock class key data
 *
 * This must be called with the device_lock() already held, for example
 * from driver ->probe(). Take care to only override the default
 * lockdep_no_validate class.
 */
#ifdef CONFIG_LOCKDEP
#define device_lock_set_class(dev, key)                                    \
do {                                                                       \
        struct device *__d = dev;                                          \
        dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex,               \
                                                &__lockdep_no_validate__), \
                 "overriding existing custom lock class\n");               \
        __device_lock_set_class(__d, #key, key);                           \
} while (0)
#else
#define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key)
#endif

/**
 * device_lock_reset_class - Return a device to the default lockdep novalidate state
 * @dev: device to modify
 *
 * This must be called with the device_lock() already held, for example
 * from driver ->remove().
 */
#define device_lock_reset_class(dev) \
do { \
        struct device *__d __maybe_unused = dev;                       \
        lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex",  \
                                  _THIS_IP_);                          \
} while (0)

void lock_device_hotplug(void);
void unlock_device_hotplug(void);
int lock_device_hotplug_sysfs(void);
int device_offline(struct device *dev);
int device_online(struct device *dev);
void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
void device_set_node(struct device *dev, struct fwnode_handle *fwnode);

static inline int dev_num_vf(struct device *dev)
{
        if (dev->bus && dev->bus->num_vf)
                return dev->bus->num_vf(dev);
        return 0;
}

/*
 * Root device objects for grouping under /sys/devices
 */
struct device *__root_device_register(const char *name, struct module *owner);

/* This is a macro to avoid include problems with THIS_MODULE */
#define root_device_register(name) \
        __root_device_register(name, THIS_MODULE)

void root_device_unregister(struct device *root);

static inline void *dev_get_platdata(const struct device *dev)
{
        return dev->platform_data;
}

/*
 * Manual binding of a device to driver. See drivers/base/bus.c
 * for information on use.
 */
int __must_check device_driver_attach(struct device_driver *drv,
                                      struct device *dev);
int __must_check device_bind_driver(struct device *dev);
void device_release_driver(struct device *dev);
int  __must_check device_attach(struct device *dev);
int __must_check driver_attach(struct device_driver *drv);
void device_initial_probe(struct device *dev);
int __must_check device_reprobe(struct device *dev);

bool device_is_bound(struct device *dev);

/*
 * Easy functions for dynamically creating devices on the fly
 */
__printf(5, 6) struct device *
device_create(const struct class *cls, struct device *parent, dev_t devt,
              void *drvdata, const char *fmt, ...);
__printf(6, 7) struct device *
device_create_with_groups(const struct class *cls, struct device *parent, dev_t devt,
                          void *drvdata, const struct attribute_group **groups,
                          const char *fmt, ...);
void device_destroy(const struct class *cls, dev_t devt);

int __must_check device_add_groups(struct device *dev,
                                   const struct attribute_group **groups);
void device_remove_groups(struct device *dev,
                          const struct attribute_group **groups);

static inline int __must_check device_add_group(struct device *dev,
                                        const struct attribute_group *grp)
{
        const struct attribute_group *groups[] = { grp, NULL };

        return device_add_groups(dev, groups);
}

static inline void device_remove_group(struct device *dev,
                                       const struct attribute_group *grp)
{
        const struct attribute_group *groups[] = { grp, NULL };

        return device_remove_groups(dev, groups);
}

int __must_check devm_device_add_groups(struct device *dev,
                                        const struct attribute_group **groups);
int __must_check devm_device_add_group(struct device *dev,
                                       const struct attribute_group *grp);

/*
 * Platform "fixup" functions - allow the platform to have their say
 * about devices and actions that the general device layer doesn't
 * know about.
 */
/* Notify platform of device discovery */
extern int (*platform_notify)(struct device *dev);

extern int (*platform_notify_remove)(struct device *dev);


/*
 * get_device - atomically increment the reference count for the device.
 *
 */
struct device *get_device(struct device *dev);
void put_device(struct device *dev);

DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T))

bool kill_device(struct device *dev);

#ifdef CONFIG_DEVTMPFS
int devtmpfs_mount(void);
#else
static inline int devtmpfs_mount(void) { return 0; }
#endif

/* drivers/base/power/shutdown.c */
void device_shutdown(void);

/* debugging and troubleshooting/diagnostic helpers. */
const char *dev_driver_string(const struct device *dev);

/* Device links interface. */
struct device_link *device_link_add(struct device *consumer,
                                    struct device *supplier, u32 flags);
void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
void device_link_wait_removal(void);

/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
        MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_CHARDEV_MAJOR(major) \
        MODULE_ALIAS("char-major-" __stringify(major) "-*")

#endif /* _DEVICE_H_ */























  275 



































  275 







































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM workqueue

#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_WORKQUEUE_H

#include <linux/tracepoint.h>
#include <linux/workqueue.h>

struct pool_workqueue;

/**
 * workqueue_queue_work - called when a work gets queued
 * @req_cpu:        the requested cpu
 * @pwq:        pointer to struct pool_workqueue
 * @work:        pointer to struct work_struct
 *
 * This event occurs when a work is queued immediately or once a
 * delayed work is actually queued on a workqueue (ie: once the delay
 * has been reached).
 */
TRACE_EVENT(workqueue_queue_work,

        TP_PROTO(int req_cpu, struct pool_workqueue *pwq,
                 struct work_struct *work),

        TP_ARGS(req_cpu, pwq, work),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
                __string( workqueue,        pwq->wq->name)
                __field( int,        req_cpu        )
                __field( int,        cpu        )
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = work->func;
                __assign_str(workqueue, pwq->wq->name);
                __entry->req_cpu        = req_cpu;
                __entry->cpu                = pwq->pool->cpu;
        ),

        TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d",
                  __entry->work, __entry->function, __get_str(workqueue),
                  __entry->req_cpu, __entry->cpu)
);

/**
 * workqueue_activate_work - called when a work gets activated
 * @work:        pointer to struct work_struct
 *
 * This event occurs when a queued work is put on the active queue,
 * which happens immediately after queueing unless @max_active limit
 * is reached.
 */
TRACE_EVENT(workqueue_activate_work,

        TP_PROTO(struct work_struct *work),

        TP_ARGS(work),

        TP_STRUCT__entry(
                __field( void *,        work        )
        ),

        TP_fast_assign(
                __entry->work                = work;
        ),

        TP_printk("work struct %p", __entry->work)
);

/**
 * workqueue_execute_start - called immediately before the workqueue callback
 * @work:        pointer to struct work_struct
 *
 * Allows to track workqueue execution.
 */
TRACE_EVENT(workqueue_execute_start,

        TP_PROTO(struct work_struct *work),

        TP_ARGS(work),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = work->func;
        ),

        TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
);

/**
 * workqueue_execute_end - called immediately after the workqueue callback
 * @work:        pointer to struct work_struct
 * @function:   pointer to worker function
 *
 * Allows to track workqueue execution.
 */
TRACE_EVENT(workqueue_execute_end,

        TP_PROTO(struct work_struct *work, work_func_t function),

        TP_ARGS(work, function),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = function;
        ),

        TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
);

#endif /*  _TRACE_WORKQUEUE_H */

/* This part must be outside protection */
#include <trace/define_trace.h>





































  261 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/fault-inject.h>
#include <linux/fault-inject-usercopy.h>

static struct {
        struct fault_attr attr;
} fail_usercopy = {
        .attr = FAULT_ATTR_INITIALIZER,
};

static int __init setup_fail_usercopy(char *str)
{
        return setup_fault_attr(&fail_usercopy.attr, str);
}
__setup("fail_usercopy=", setup_fail_usercopy);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

static int __init fail_usercopy_debugfs(void)
{
        struct dentry *dir;

        dir = fault_create_debugfs_attr("fail_usercopy", NULL,
                                        &fail_usercopy.attr);
        if (IS_ERR(dir))
                return PTR_ERR(dir);

        return 0;
}

late_initcall(fail_usercopy_debugfs);

#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */

bool should_fail_usercopy(void)
{
        return should_fail(&fail_usercopy.attr, 1);
}
EXPORT_SYMBOL_GPL(should_fail_usercopy);






























































































































































































































































































































































































































































































































































































































    1 
























































































































































































































































































































































































































































































































































































    2 







    2 








    2 


    1 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
// SPDX-License-Identifier: GPL-2.0-or-later
/* Linux driver for Philips webcam
   USB and Video4Linux interface part.
   (C) 1999-2004 Nemosoft Unv.
   (C) 2004-2006 Luc Saillard (luc@saillard.org)
   (C) 2011 Hans de Goede <hdegoede@redhat.com>

   NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx
   driver and thus may have bugs that are not present in the original version.
   Please send bug reports and support requests to <luc@saillard.org>.
   The decompression routines have been implemented by reverse-engineering the
   Nemosoft binary pwcx module. Caveat emptor.


*/

/*
   This code forms the interface between the USB layers and the Philips
   specific stuff. Some adanved stuff of the driver falls under an
   NDA, signed between me and Philips B.V., Eindhoven, the Netherlands, and
   is thus not distributed in source form. The binary pwcx.o module
   contains the code that falls under the NDA.

   In case you're wondering: 'pwc' stands for "Philips WebCam", but
   I really didn't want to type 'philips_web_cam' every time (I'm lazy as
   any Linux kernel hacker, but I don't like uncomprehensible abbreviations
   without explanation).

   Oh yes, convention: to disctinguish between all the various pointers to
   device-structures, I use these names for the pointer variables:
   udev: struct usb_device *
   vdev: struct video_device (member of pwc_dev)
   pdev: struct pwc_devive *
*/

/* Contributors:
   - Alvarado: adding whitebalance code
   - Alistar Moire: QuickCam 3000 Pro device/product ID
   - Tony Hoyle: Creative Labs Webcam 5 device/product ID
   - Mark Burazin: solving hang in VIDIOCSYNC when camera gets unplugged
   - Jk Fang: Sotec Afina Eye ID
   - Xavier Roche: QuickCam Pro 4000 ID
   - Jens Knudsen: QuickCam Zoom ID
   - J. Debert: QuickCam for Notebooks ID
   - Pham Thanh Nam: webcam snapshot button as an event input device
*/

#include <linux/errno.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/slab.h>
#ifdef CONFIG_USB_PWC_INPUT_EVDEV
#include <linux/usb/input.h>
#endif
#include <linux/vmalloc.h>
#include <asm/io.h>
#include <linux/kernel.h>                /* simple_strtol() */

#include "pwc.h"
#include "pwc-kiara.h"
#include "pwc-timon.h"
#include "pwc-dec23.h"
#include "pwc-dec1.h"

#define CREATE_TRACE_POINTS
#include <trace/events/pwc.h>

/* Function prototypes and driver templates */

/* hotplug device table support */
static const struct usb_device_id pwc_device_table [] = {
        { USB_DEVICE(0x041E, 0x400C) }, /* Creative Webcam 5 */
        { USB_DEVICE(0x041E, 0x4011) }, /* Creative Webcam Pro Ex */

        { USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam 3000 Pro */
        { USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */
        { USB_DEVICE(0x046D, 0x08B2) }, /* Logitech QuickCam 4000 Pro */
        { USB_DEVICE(0x046D, 0x08B3) }, /* Logitech QuickCam Zoom (old model) */
        { USB_DEVICE(0x046D, 0x08B4) }, /* Logitech QuickCam Zoom (new model) */
        { USB_DEVICE(0x046D, 0x08B5) }, /* Logitech QuickCam Orbit/Sphere */
        { USB_DEVICE(0x046D, 0x08B6) }, /* Logitech/Cisco VT Camera */
        { USB_DEVICE(0x046D, 0x08B7) }, /* Logitech ViewPort AV 100 */
        { USB_DEVICE(0x046D, 0x08B8) }, /* Logitech QuickCam */

        { USB_DEVICE(0x0471, 0x0302) }, /* Philips PCA645VC */
        { USB_DEVICE(0x0471, 0x0303) }, /* Philips PCA646VC */
        { USB_DEVICE(0x0471, 0x0304) }, /* Askey VC010 type 2 */
        { USB_DEVICE(0x0471, 0x0307) }, /* Philips PCVC675K (Vesta) */
        { USB_DEVICE(0x0471, 0x0308) }, /* Philips PCVC680K (Vesta Pro) */
        { USB_DEVICE(0x0471, 0x030C) }, /* Philips PCVC690K (Vesta Pro Scan) */
        { USB_DEVICE(0x0471, 0x0310) }, /* Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) */
        { USB_DEVICE(0x0471, 0x0311) }, /* Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) */
        { USB_DEVICE(0x0471, 0x0312) }, /* Philips PCVC750K (ToUCam Pro Scan) */
        { USB_DEVICE(0x0471, 0x0313) }, /* Philips PCVC720K/40 (ToUCam XS) */
        { USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC webcam */
        { USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC webcam */

        { USB_DEVICE(0x04CC, 0x8116) }, /* Sotec Afina Eye */

        { USB_DEVICE(0x055D, 0x9000) }, /* Samsung MPC-C10 */
        { USB_DEVICE(0x055D, 0x9001) }, /* Samsung MPC-C30 */
        { USB_DEVICE(0x055D, 0x9002) },        /* Samsung SNC-35E (Ver3.0) */

        { USB_DEVICE(0x069A, 0x0001) }, /* Askey VC010 type 1 */

        { USB_DEVICE(0x06BE, 0x8116) }, /* AME Co. Afina Eye */

        { USB_DEVICE(0x0d81, 0x1900) }, /* Visionite VCS-UC300 */
        { USB_DEVICE(0x0d81, 0x1910) }, /* Visionite VCS-UM100 */

        { }
};
MODULE_DEVICE_TABLE(usb, pwc_device_table);

static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id);
static void usb_pwc_disconnect(struct usb_interface *intf);
static void pwc_isoc_cleanup(struct pwc_device *pdev);

static struct usb_driver pwc_driver = {
        .name =                        "Philips webcam",        /* name */
        .id_table =                pwc_device_table,
        .probe =                usb_pwc_probe,                /* probe() */
        .disconnect =                usb_pwc_disconnect,        /* disconnect() */
};

#define MAX_DEV_HINTS        20
#define MAX_ISOC_ERRORS        20

#ifdef CONFIG_USB_PWC_DEBUG
        int pwc_trace = PWC_DEBUG_LEVEL;
#endif
static int power_save = -1;
static int leds[2] = { 100, 0 };

/***/

static const struct v4l2_file_operations pwc_fops = {
        .owner =        THIS_MODULE,
        .open =                v4l2_fh_open,
        .release =        vb2_fop_release,
        .read =                vb2_fop_read,
        .poll =                vb2_fop_poll,
        .mmap =                vb2_fop_mmap,
        .unlocked_ioctl = video_ioctl2,
};
static const struct video_device pwc_template = {
        .name =                "Philips Webcam",        /* Filled in later */
        .release =        video_device_release_empty,
        .fops =         &pwc_fops,
        .ioctl_ops =        &pwc_ioctl_ops,
};

/***************************************************************************/
/* Private functions */

static void *pwc_alloc_urb_buffer(struct usb_device *dev,
                                  size_t size, dma_addr_t *dma_handle)
{
        struct device *dmadev = dev->bus->sysdev;
        void *buffer = kmalloc(size, GFP_KERNEL);

        if (!buffer)
                return NULL;

        *dma_handle = dma_map_single(dmadev, buffer, size, DMA_FROM_DEVICE);
        if (dma_mapping_error(dmadev, *dma_handle)) {
                kfree(buffer);
                return NULL;
        }

        return buffer;
}

static void pwc_free_urb_buffer(struct usb_device *dev,
                                size_t size,
                                void *buffer,
                                dma_addr_t dma_handle)
{
        struct device *dmadev = dev->bus->sysdev;

        dma_unmap_single(dmadev, dma_handle, size, DMA_FROM_DEVICE);
        kfree(buffer);
}

static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
{
        unsigned long flags = 0;
        struct pwc_frame_buf *buf = NULL;

        spin_lock_irqsave(&pdev->queued_bufs_lock, flags);
        if (list_empty(&pdev->queued_bufs))
                goto leave;

        buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list);
        list_del(&buf->list);
leave:
        spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags);
        return buf;
}

static void pwc_snapshot_button(struct pwc_device *pdev, int down)
{
        if (down) {
                PWC_TRACE("Snapshot button pressed.\n");
        } else {
                PWC_TRACE("Snapshot button released.\n");
        }

#ifdef CONFIG_USB_PWC_INPUT_EVDEV
        if (pdev->button_dev) {
                input_report_key(pdev->button_dev, KEY_CAMERA, down);
                input_sync(pdev->button_dev);
        }
#endif
}

static void pwc_frame_complete(struct pwc_device *pdev)
{
        struct pwc_frame_buf *fbuf = pdev->fill_buf;

        /* The ToUCam Fun CMOS sensor causes the firmware to send 2 or 3 bogus
           frames on the USB wire after an exposure change. This conditition is
           however detected  in the cam and a bit is set in the header.
           */
        if (pdev->type == 730) {
                unsigned char *ptr = (unsigned char *)fbuf->data;

                if (ptr[1] == 1 && ptr[0] & 0x10) {
                        PWC_TRACE("Hyundai CMOS sensor bug. Dropping frame.\n");
                        pdev->drop_frames += 2;
                }
                if ((ptr[0] ^ pdev->vmirror) & 0x01) {
                        pwc_snapshot_button(pdev, ptr[0] & 0x01);
                }
                if ((ptr[0] ^ pdev->vmirror) & 0x02) {
                        if (ptr[0] & 0x02)
                                PWC_TRACE("Image is mirrored.\n");
                        else
                                PWC_TRACE("Image is normal.\n");
                }
                pdev->vmirror = ptr[0] & 0x03;
                /* Sometimes the trailer of the 730 is still sent as a 4 byte packet
                   after a short frame; this condition is filtered out specifically. A 4 byte
                   frame doesn't make sense anyway.
                   So we get either this sequence:
                   drop_bit set -> 4 byte frame -> short frame -> good frame
                   Or this one:
                   drop_bit set -> short frame -> good frame
                   So we drop either 3 or 2 frames in all!
                   */
                if (fbuf->filled == 4)
                        pdev->drop_frames++;
        } else if (pdev->type == 740 || pdev->type == 720) {
                unsigned char *ptr = (unsigned char *)fbuf->data;
                if ((ptr[0] ^ pdev->vmirror) & 0x01) {
                        pwc_snapshot_button(pdev, ptr[0] & 0x01);
                }
                pdev->vmirror = ptr[0] & 0x03;
        }

        /* In case we were instructed to drop the frame, do so silently. */
        if (pdev->drop_frames > 0) {
                pdev->drop_frames--;
        } else {
                /* Check for underflow first */
                if (fbuf->filled < pdev->frame_total_size) {
                        PWC_DEBUG_FLOW("Frame buffer underflow (%d bytes); discarded.\n",
                                       fbuf->filled);
                } else {
                        fbuf->vb.field = V4L2_FIELD_NONE;
                        fbuf->vb.sequence = pdev->vframe_count;
                        vb2_buffer_done(&fbuf->vb.vb2_buf, VB2_BUF_STATE_DONE);
                        pdev->fill_buf = NULL;
                        pdev->vsync = 0;
                }
        } /* !drop_frames */
        pdev->vframe_count++;
}

/* This gets called for the Isochronous pipe (video). This is done in
 * interrupt time, so it has to be fast, not crash, and not stall. Neat.
 */
static void pwc_isoc_handler(struct urb *urb)
{
        struct pwc_device *pdev = (struct pwc_device *)urb->context;
        struct device *dmadev = urb->dev->bus->sysdev;
        int i, fst, flen;
        unsigned char *iso_buf = NULL;

        trace_pwc_handler_enter(urb, pdev);

        if (urb->status == -ENOENT || urb->status == -ECONNRESET ||
            urb->status == -ESHUTDOWN) {
                PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n",
                               urb, urb->status == -ENOENT ? "" : "a");
                return;
        }

        if (pdev->fill_buf == NULL)
                pdev->fill_buf = pwc_get_next_fill_buf(pdev);

        if (urb->status != 0) {
                const char *errmsg;

                errmsg = "Unknown";
                switch(urb->status) {
                        case -ENOSR:                errmsg = "Buffer error (overrun)"; break;
                        case -EPIPE:                errmsg = "Stalled (device not responding)"; break;
                        case -EOVERFLOW:        errmsg = "Babble (bad cable?)"; break;
                        case -EPROTO:                errmsg = "Bit-stuff error (bad cable?)"; break;
                        case -EILSEQ:                errmsg = "CRC/Timeout (could be anything)"; break;
                        case -ETIME:                errmsg = "Device does not respond"; break;
                }
                PWC_ERROR("pwc_isoc_handler() called with status %d [%s].\n",
                          urb->status, errmsg);
                /* Give up after a number of contiguous errors */
                if (++pdev->visoc_errors > MAX_ISOC_ERRORS)
                {
                        PWC_ERROR("Too many ISOC errors, bailing out.\n");
                        if (pdev->fill_buf) {
                                vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf,
                                                VB2_BUF_STATE_ERROR);
                                pdev->fill_buf = NULL;
                        }
                }
                pdev->vsync = 0; /* Drop the current frame */
                goto handler_end;
        }

        /* Reset ISOC error counter. We did get here, after all. */
        pdev->visoc_errors = 0;

        dma_sync_single_for_cpu(dmadev,
                                urb->transfer_dma,
                                urb->transfer_buffer_length,
                                DMA_FROM_DEVICE);

        /* vsync: 0 = don't copy data
                  1 = sync-hunt
                  2 = synched
         */
        /* Compact data */
        for (i = 0; i < urb->number_of_packets; i++) {
                fst  = urb->iso_frame_desc[i].status;
                flen = urb->iso_frame_desc[i].actual_length;
                iso_buf = urb->transfer_buffer + urb->iso_frame_desc[i].offset;
                if (fst != 0) {
                        PWC_ERROR("Iso frame %d has error %d\n", i, fst);
                        continue;
                }
                if (flen > 0 && pdev->vsync) {
                        struct pwc_frame_buf *fbuf = pdev->fill_buf;

                        if (pdev->vsync == 1) {
                                fbuf->vb.vb2_buf.timestamp = ktime_get_ns();
                                pdev->vsync = 2;
                        }

                        if (flen + fbuf->filled > pdev->frame_total_size) {
                                PWC_ERROR("Frame overflow (%d > %d)\n",
                                          flen + fbuf->filled,
                                          pdev->frame_total_size);
                                pdev->vsync = 0; /* Let's wait for an EOF */
                        } else {
                                memcpy(fbuf->data + fbuf->filled, iso_buf,
                                       flen);
                                fbuf->filled += flen;
                        }
                }
                if (flen < pdev->vlast_packet_size) {
                        /* Shorter packet... end of frame */
                        if (pdev->vsync == 2)
                                pwc_frame_complete(pdev);
                        if (pdev->fill_buf == NULL)
                                pdev->fill_buf = pwc_get_next_fill_buf(pdev);
                        if (pdev->fill_buf) {
                                pdev->fill_buf->filled = 0;
                                pdev->vsync = 1;
                        }
                }
                pdev->vlast_packet_size = flen;
        }

        dma_sync_single_for_device(dmadev,
                                   urb->transfer_dma,
                                   urb->transfer_buffer_length,
                                   DMA_FROM_DEVICE);

handler_end:
        trace_pwc_handler_exit(urb, pdev);

        i = usb_submit_urb(urb, GFP_ATOMIC);
        if (i != 0)
                PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i);
}

/* Both v4l2_lock and vb_queue_lock should be locked when calling this */
static int pwc_isoc_init(struct pwc_device *pdev)
{
        struct usb_device *udev;
        struct urb *urb;
        int i, j, ret;
        struct usb_interface *intf;
        struct usb_host_interface *idesc = NULL;
        int compression = 0; /* 0..3 = uncompressed..high */

        pdev->vsync = 0;
        pdev->vlast_packet_size = 0;
        pdev->fill_buf = NULL;
        pdev->vframe_count = 0;
        pdev->visoc_errors = 0;
        udev = pdev->udev;

retry:
        /* We first try with low compression and then retry with a higher
           compression setting if there is not enough bandwidth. */
        ret = pwc_set_video_mode(pdev, pdev->width, pdev->height, pdev->pixfmt,
                                 pdev->vframes, &compression, 1);

        /* Get the current alternate interface, adjust packet size */
        intf = usb_ifnum_to_if(udev, 0);
        if (intf)
                idesc = usb_altnum_to_altsetting(intf, pdev->valternate);
        if (!idesc)
                return -EIO;

        /* Search video endpoint */
        pdev->vmax_packet_size = -1;
        for (i = 0; i < idesc->desc.bNumEndpoints; i++) {
                if ((idesc->endpoint[i].desc.bEndpointAddress & 0xF) == pdev->vendpoint) {
                        pdev->vmax_packet_size = le16_to_cpu(idesc->endpoint[i].desc.wMaxPacketSize);
                        break;
                }
        }

        if (pdev->vmax_packet_size < 0 || pdev->vmax_packet_size > ISO_MAX_FRAME_SIZE) {
                PWC_ERROR("Failed to find packet size for video endpoint in current alternate setting.\n");
                return -ENFILE; /* Odd error, that should be noticeable */
        }

        /* Set alternate interface */
        PWC_DEBUG_OPEN("Setting alternate interface %d\n", pdev->valternate);
        ret = usb_set_interface(pdev->udev, 0, pdev->valternate);
        if (ret == -ENOSPC && compression < 3) {
                compression++;
                goto retry;
        }
        if (ret < 0)
                return ret;

        /* Allocate and init Isochronuous urbs */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL);
                if (urb == NULL) {
                        pwc_isoc_cleanup(pdev);
                        return -ENOMEM;
                }
                pdev->urbs[i] = urb;
                PWC_DEBUG_MEMORY("Allocated URB at 0x%p\n", urb);

                urb->interval = 1; // devik
                urb->dev = udev;
                urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
                urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
                urb->transfer_buffer_length = ISO_BUFFER_SIZE;
                urb->transfer_buffer = pwc_alloc_urb_buffer(udev,
                                                            urb->transfer_buffer_length,
                                                            &urb->transfer_dma);
                if (urb->transfer_buffer == NULL) {
                        PWC_ERROR("Failed to allocate urb buffer %d\n", i);
                        pwc_isoc_cleanup(pdev);
                        return -ENOMEM;
                }
                urb->complete = pwc_isoc_handler;
                urb->context = pdev;
                urb->start_frame = 0;
                urb->number_of_packets = ISO_FRAMES_PER_DESC;
                for (j = 0; j < ISO_FRAMES_PER_DESC; j++) {
                        urb->iso_frame_desc[j].offset = j * ISO_MAX_FRAME_SIZE;
                        urb->iso_frame_desc[j].length = pdev->vmax_packet_size;
                }
        }

        /* link */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                ret = usb_submit_urb(pdev->urbs[i], GFP_KERNEL);
                if (ret == -ENOSPC && compression < 3) {
                        compression++;
                        pwc_isoc_cleanup(pdev);
                        goto retry;
                }
                if (ret) {
                        PWC_ERROR("isoc_init() submit_urb %d failed with error %d\n", i, ret);
                        pwc_isoc_cleanup(pdev);
                        return ret;
                }
                PWC_DEBUG_MEMORY("URB 0x%p submitted.\n", pdev->urbs[i]);
        }

        /* All is done... */
        PWC_DEBUG_OPEN("<< pwc_isoc_init()\n");
        return 0;
}

static void pwc_iso_stop(struct pwc_device *pdev)
{
        int i;

        /* Unlinking ISOC buffers one by one */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                if (pdev->urbs[i]) {
                        PWC_DEBUG_MEMORY("Unlinking URB %p\n", pdev->urbs[i]);
                        usb_kill_urb(pdev->urbs[i]);
                }
        }
}

static void pwc_iso_free(struct pwc_device *pdev)
{
        int i;

        /* Freeing ISOC buffers one by one */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                struct urb *urb = pdev->urbs[i];

                if (urb) {
                        PWC_DEBUG_MEMORY("Freeing URB\n");
                        if (urb->transfer_buffer)
                                pwc_free_urb_buffer(urb->dev,
                                                    urb->transfer_buffer_length,
                                                    urb->transfer_buffer,
                                                    urb->transfer_dma);
                        usb_free_urb(urb);
                        pdev->urbs[i] = NULL;
                }
        }
}

/* Both v4l2_lock and vb_queue_lock should be locked when calling this */
static void pwc_isoc_cleanup(struct pwc_device *pdev)
{
        PWC_DEBUG_OPEN(">> pwc_isoc_cleanup()\n");

        pwc_iso_stop(pdev);
        pwc_iso_free(pdev);
        usb_set_interface(pdev->udev, 0, 0);

        PWC_DEBUG_OPEN("<< pwc_isoc_cleanup()\n");
}

/* Must be called with vb_queue_lock hold */
static void pwc_cleanup_queued_bufs(struct pwc_device *pdev,
                                    enum vb2_buffer_state state)
{
        unsigned long flags = 0;

        spin_lock_irqsave(&pdev->queued_bufs_lock, flags);
        while (!list_empty(&pdev->queued_bufs)) {
                struct pwc_frame_buf *buf;

                buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf,
                                 list);
                list_del(&buf->list);
                vb2_buffer_done(&buf->vb.vb2_buf, state);
        }
        spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags);
}

#ifdef CONFIG_USB_PWC_DEBUG
static const char *pwc_sensor_type_to_string(unsigned int sensor_type)
{
        switch(sensor_type) {
                case 0x00:
                        return "Hyundai CMOS sensor";
                case 0x20:
                        return "Sony CCD sensor + TDA8787";
                case 0x2E:
                        return "Sony CCD sensor + Exas 98L59";
                case 0x2F:
                        return "Sony CCD sensor + ADI 9804";
                case 0x30:
                        return "Sharp CCD sensor + TDA8787";
                case 0x3E:
                        return "Sharp CCD sensor + Exas 98L59";
                case 0x3F:
                        return "Sharp CCD sensor + ADI 9804";
                case 0x40:
                        return "UPA 1021 sensor";
                case 0x100:
                        return "VGA sensor";
                case 0x101:
                        return "PAL MR sensor";
                default:
                        return "unknown type of sensor";
        }
}
#endif

/***************************************************************************/
/* Video4Linux functions */

static void pwc_video_release(struct v4l2_device *v)
{
        struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev);

        v4l2_ctrl_handler_free(&pdev->ctrl_handler);
        v4l2_device_unregister(&pdev->v4l2_dev);
        kfree(pdev->ctrl_buf);
        kfree(pdev);
}

/***************************************************************************/
/* Videobuf2 operations */

static int queue_setup(struct vb2_queue *vq,
                                unsigned int *nbuffers, unsigned int *nplanes,
                                unsigned int sizes[], struct device *alloc_devs[])
{
        struct pwc_device *pdev = vb2_get_drv_priv(vq);
        int size;

        if (*nbuffers < MIN_FRAMES)
                *nbuffers = MIN_FRAMES;
        else if (*nbuffers > MAX_FRAMES)
                *nbuffers = MAX_FRAMES;

        *nplanes = 1;

        size = pwc_get_size(pdev, MAX_WIDTH, MAX_HEIGHT);
        sizes[0] = PAGE_ALIGN(pwc_image_sizes[size][0] *
                              pwc_image_sizes[size][1] * 3 / 2);

        return 0;
}

static int buffer_init(struct vb2_buffer *vb)
{
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct pwc_frame_buf *buf =
                container_of(vbuf, struct pwc_frame_buf, vb);

        /* need vmalloc since frame buffer > 128K */
        buf->data = vzalloc(PWC_FRAME_SIZE);
        if (buf->data == NULL)
                return -ENOMEM;

        return 0;
}

static int buffer_prepare(struct vb2_buffer *vb)
{
        struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue);

        /* Don't allow queueing new buffers after device disconnection */
        if (!pdev->udev)
                return -ENODEV;

        return 0;
}

static void buffer_finish(struct vb2_buffer *vb)
{
        struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue);
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct pwc_frame_buf *buf =
                container_of(vbuf, struct pwc_frame_buf, vb);

        if (vb->state == VB2_BUF_STATE_DONE) {
                /*
                 * Application has called dqbuf and is getting back a buffer
                 * we've filled, take the pwc data we've stored in buf->data
                 * and decompress it into a usable format, storing the result
                 * in the vb2_buffer.
                 */
                pwc_decompress(pdev, buf);
        }
}

static void buffer_cleanup(struct vb2_buffer *vb)
{
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct pwc_frame_buf *buf =
                container_of(vbuf, struct pwc_frame_buf, vb);

        vfree(buf->data);
}

static void buffer_queue(struct vb2_buffer *vb)
{
        struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue);
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct pwc_frame_buf *buf =
                container_of(vbuf, struct pwc_frame_buf, vb);
        unsigned long flags = 0;

        /* Check the device has not disconnected between prep and queuing */
        if (!pdev->udev) {
                vb2_buffer_done(vb, VB2_BUF_STATE_ERROR);
                return;
        }

        spin_lock_irqsave(&pdev->queued_bufs_lock, flags);
        list_add_tail(&buf->list, &pdev->queued_bufs);
        spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags);
}

static int start_streaming(struct vb2_queue *vq, unsigned int count)
{
        struct pwc_device *pdev = vb2_get_drv_priv(vq);
        int r;

        if (!pdev->udev)
                return -ENODEV;

        if (mutex_lock_interruptible(&pdev->v4l2_lock))
                return -ERESTARTSYS;
        /* Turn on camera and set LEDS on */
        pwc_camera_power(pdev, 1);
        pwc_set_leds(pdev, leds[0], leds[1]);

        r = pwc_isoc_init(pdev);
        if (r) {
                /* If we failed turn camera and LEDS back off */
                pwc_set_leds(pdev, 0, 0);
                pwc_camera_power(pdev, 0);
                /* And cleanup any queued bufs!! */
                pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_QUEUED);
        }
        mutex_unlock(&pdev->v4l2_lock);

        return r;
}

static void stop_streaming(struct vb2_queue *vq)
{
        struct pwc_device *pdev = vb2_get_drv_priv(vq);

        mutex_lock(&pdev->v4l2_lock);
        if (pdev->udev) {
                pwc_set_leds(pdev, 0, 0);
                pwc_camera_power(pdev, 0);
                pwc_isoc_cleanup(pdev);
        }

        pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_ERROR);
        if (pdev->fill_buf)
                vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf,
                                VB2_BUF_STATE_ERROR);
        mutex_unlock(&pdev->v4l2_lock);
}

static const struct vb2_ops pwc_vb_queue_ops = {
        .queue_setup                = queue_setup,
        .buf_init                = buffer_init,
        .buf_prepare                = buffer_prepare,
        .buf_finish                = buffer_finish,
        .buf_cleanup                = buffer_cleanup,
        .buf_queue                = buffer_queue,
        .start_streaming        = start_streaming,
        .stop_streaming                = stop_streaming,
        .wait_prepare                = vb2_ops_wait_prepare,
        .wait_finish                = vb2_ops_wait_finish,
};

/***************************************************************************/
/* USB functions */

/* This function gets called when a new device is plugged in or the usb core
 * is loaded.
 */

static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct pwc_device *pdev = NULL;
        int vendor_id, product_id, type_id;
        int rc;
        int features = 0;
        int compression = 0;
        int my_power_save = power_save;
        char serial_number[30], *name;

        vendor_id = le16_to_cpu(udev->descriptor.idVendor);
        product_id = le16_to_cpu(udev->descriptor.idProduct);

        /* Check if we can handle this device */
        PWC_DEBUG_PROBE("probe() called [%04X %04X], if %d\n",
                vendor_id, product_id,
                intf->altsetting->desc.bInterfaceNumber);

        /* the interfaces are probed one by one. We are only interested in the
           video interface (0) now.
           Interface 1 is the Audio Control, and interface 2 Audio itself.
         */
        if (intf->altsetting->desc.bInterfaceNumber > 0)
                return -ENODEV;

        if (vendor_id == 0x0471) {
                switch (product_id) {
                case 0x0302:
                        PWC_INFO("Philips PCA645VC USB webcam detected.\n");
                        name = "Philips 645 webcam";
                        type_id = 645;
                        break;
                case 0x0303:
                        PWC_INFO("Philips PCA646VC USB webcam detected.\n");
                        name = "Philips 646 webcam";
                        type_id = 646;
                        break;
                case 0x0304:
                        PWC_INFO("Askey VC010 type 2 USB webcam detected.\n");
                        name = "Askey VC010 webcam";
                        type_id = 646;
                        break;
                case 0x0307:
                        PWC_INFO("Philips PCVC675K (Vesta) USB webcam detected.\n");
                        name = "Philips 675 webcam";
                        type_id = 675;
                        break;
                case 0x0308:
                        PWC_INFO("Philips PCVC680K (Vesta Pro) USB webcam detected.\n");
                        name = "Philips 680 webcam";
                        type_id = 680;
                        break;
                case 0x030C:
                        PWC_INFO("Philips PCVC690K (Vesta Pro Scan) USB webcam detected.\n");
                        name = "Philips 690 webcam";
                        type_id = 690;
                        break;
                case 0x0310:
                        PWC_INFO("Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) USB webcam detected.\n");
                        name = "Philips 730 webcam";
                        type_id = 730;
                        break;
                case 0x0311:
                        PWC_INFO("Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) USB webcam detected.\n");
                        name = "Philips 740 webcam";
                        type_id = 740;
                        break;
                case 0x0312:
                        PWC_INFO("Philips PCVC750K (ToUCam Pro Scan) USB webcam detected.\n");
                        name = "Philips 750 webcam";
                        type_id = 750;
                        break;
                case 0x0313:
                        PWC_INFO("Philips PCVC720K/40 (ToUCam XS) USB webcam detected.\n");
                        name = "Philips 720K/40 webcam";
                        type_id = 720;
                        break;
                case 0x0329:
                        PWC_INFO("Philips SPC 900NC USB webcam detected.\n");
                        name = "Philips SPC 900NC webcam";
                        type_id = 740;
                        break;
                case 0x032C:
                        PWC_INFO("Philips SPC 880NC USB webcam detected.\n");
                        name = "Philips SPC 880NC webcam";
                        type_id = 740;
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x069A) {
                switch(product_id) {
                case 0x0001:
                        PWC_INFO("Askey VC010 type 1 USB webcam detected.\n");
                        name = "Askey VC010 webcam";
                        type_id = 645;
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x046d) {
                switch(product_id) {
                case 0x08b0:
                        PWC_INFO("Logitech QuickCam Pro 3000 USB webcam detected.\n");
                        name = "Logitech QuickCam Pro 3000";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x08b1:
                        PWC_INFO("Logitech QuickCam Notebook Pro USB webcam detected.\n");
                        name = "Logitech QuickCam Notebook Pro";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x08b2:
                        PWC_INFO("Logitech QuickCam 4000 Pro USB webcam detected.\n");
                        name = "Logitech QuickCam Pro 4000";
                        type_id = 740; /* CCD sensor */
                        if (my_power_save == -1)
                                my_power_save = 1;
                        break;
                case 0x08b3:
                        PWC_INFO("Logitech QuickCam Zoom USB webcam detected.\n");
                        name = "Logitech QuickCam Zoom";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x08B4:
                        PWC_INFO("Logitech QuickCam Zoom (new model) USB webcam detected.\n");
                        name = "Logitech QuickCam Zoom";
                        type_id = 740; /* CCD sensor */
                        if (my_power_save == -1)
                                my_power_save = 1;
                        break;
                case 0x08b5:
                        PWC_INFO("Logitech QuickCam Orbit/Sphere USB webcam detected.\n");
                        name = "Logitech QuickCam Orbit";
                        type_id = 740; /* CCD sensor */
                        if (my_power_save == -1)
                                my_power_save = 1;
                        features |= FEATURE_MOTOR_PANTILT;
                        break;
                case 0x08b6:
                        PWC_INFO("Logitech/Cisco VT Camera webcam detected.\n");
                        name = "Cisco VT Camera";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x08b7:
                        PWC_INFO("Logitech ViewPort AV 100 webcam detected.\n");
                        name = "Logitech ViewPort AV 100";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x08b8: /* Where this released? */
                        PWC_INFO("Logitech QuickCam detected (reserved ID).\n");
                        name = "Logitech QuickCam (res.)";
                        type_id = 730; /* Assuming CMOS */
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x055d) {
                /* I don't know the difference between the C10 and the C30;
                   I suppose the difference is the sensor, but both cameras
                   work equally well with a type_id of 675
                 */
                switch(product_id) {
                case 0x9000:
                        PWC_INFO("Samsung MPC-C10 USB webcam detected.\n");
                        name = "Samsung MPC-C10";
                        type_id = 675;
                        break;
                case 0x9001:
                        PWC_INFO("Samsung MPC-C30 USB webcam detected.\n");
                        name = "Samsung MPC-C30";
                        type_id = 675;
                        break;
                case 0x9002:
                        PWC_INFO("Samsung SNC-35E (v3.0) USB webcam detected.\n");
                        name = "Samsung MPC-C30";
                        type_id = 740;
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x041e) {
                switch(product_id) {
                case 0x400c:
                        PWC_INFO("Creative Labs Webcam 5 detected.\n");
                        name = "Creative Labs Webcam 5";
                        type_id = 730;
                        if (my_power_save == -1)
                                my_power_save = 1;
                        break;
                case 0x4011:
                        PWC_INFO("Creative Labs Webcam Pro Ex detected.\n");
                        name = "Creative Labs Webcam Pro Ex";
                        type_id = 740;
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x04cc) {
                switch(product_id) {
                case 0x8116:
                        PWC_INFO("Sotec Afina Eye USB webcam detected.\n");
                        name = "Sotec Afina Eye";
                        type_id = 730;
                        break;
                default:
                        return -ENODEV;
                }
        }
        else if (vendor_id == 0x06be) {
                switch(product_id) {
                case 0x8116:
                        /* This is essentially the same cam as the Sotec Afina Eye */
                        PWC_INFO("AME Co. Afina Eye USB webcam detected.\n");
                        name = "AME Co. Afina Eye";
                        type_id = 750;
                        break;
                default:
                        return -ENODEV;
                }

        }
        else if (vendor_id == 0x0d81) {
                switch(product_id) {
                case 0x1900:
                        PWC_INFO("Visionite VCS-UC300 USB webcam detected.\n");
                        name = "Visionite VCS-UC300";
                        type_id = 740; /* CCD sensor */
                        break;
                case 0x1910:
                        PWC_INFO("Visionite VCS-UM100 USB webcam detected.\n");
                        name = "Visionite VCS-UM100";
                        type_id = 730; /* CMOS sensor */
                        break;
                default:
                        return -ENODEV;
                }
        }
        else
                return -ENODEV; /* Not any of the know types; but the list keeps growing. */

        if (my_power_save == -1)
                my_power_save = 0;

        memset(serial_number, 0, 30);
        usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29);
        PWC_DEBUG_PROBE("Device serial number is %s\n", serial_number);

        if (udev->descriptor.bNumConfigurations > 1)
                PWC_WARNING("Warning: more than 1 configuration available.\n");

        /* Allocate structure, initialize pointers, mutexes, etc. and link it to the usb_device */
        pdev = kzalloc(sizeof(struct pwc_device), GFP_KERNEL);
        if (pdev == NULL) {
                PWC_ERROR("Oops, could not allocate memory for pwc_device.\n");
                return -ENOMEM;
        }
        pdev->type = type_id;
        pdev->features = features;
        pwc_construct(pdev); /* set min/max sizes correct */

        mutex_init(&pdev->v4l2_lock);
        mutex_init(&pdev->vb_queue_lock);
        spin_lock_init(&pdev->queued_bufs_lock);
        INIT_LIST_HEAD(&pdev->queued_bufs);

        pdev->udev = udev;
        pdev->power_save = my_power_save;

        /* Init videobuf2 queue structure */
        pdev->vb_queue.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        pdev->vb_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_READ;
        pdev->vb_queue.drv_priv = pdev;
        pdev->vb_queue.buf_struct_size = sizeof(struct pwc_frame_buf);
        pdev->vb_queue.ops = &pwc_vb_queue_ops;
        pdev->vb_queue.mem_ops = &vb2_vmalloc_memops;
        pdev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        rc = vb2_queue_init(&pdev->vb_queue);
        if (rc < 0) {
                PWC_ERROR("Oops, could not initialize vb2 queue.\n");
                goto err_free_mem;
        }

        /* Init video_device structure */
        pdev->vdev = pwc_template;
        strscpy(pdev->vdev.name, name, sizeof(pdev->vdev.name));
        pdev->vdev.queue = &pdev->vb_queue;
        pdev->vdev.queue->lock = &pdev->vb_queue_lock;
        video_set_drvdata(&pdev->vdev, pdev);

        pdev->release = le16_to_cpu(udev->descriptor.bcdDevice);
        PWC_DEBUG_PROBE("Release: %04x\n", pdev->release);

        /* Allocate USB command buffers */
        pdev->ctrl_buf = kmalloc(sizeof(pdev->cmd_buf), GFP_KERNEL);
        if (!pdev->ctrl_buf) {
                PWC_ERROR("Oops, could not allocate memory for pwc_device.\n");
                rc = -ENOMEM;
                goto err_free_mem;
        }

#ifdef CONFIG_USB_PWC_DEBUG
        /* Query sensor type */
        if (pwc_get_cmos_sensor(pdev, &rc) >= 0) {
                PWC_DEBUG_OPEN("This %s camera is equipped with a %s (%d).\n",
                                pdev->vdev.name,
                                pwc_sensor_type_to_string(rc), rc);
        }
#endif

        /* Set the leds off */
        pwc_set_leds(pdev, 0, 0);

        /* Setup initial videomode */
        rc = pwc_set_video_mode(pdev, MAX_WIDTH, MAX_HEIGHT,
                                V4L2_PIX_FMT_YUV420, 30, &compression, 1);
        if (rc)
                goto err_free_mem;

        /* Register controls (and read default values from camera */
        rc = pwc_init_controls(pdev);
        if (rc) {
                PWC_ERROR("Failed to register v4l2 controls (%d).\n", rc);
                goto err_free_mem;
        }

        /* And powerdown the camera until streaming starts */
        pwc_camera_power(pdev, 0);

        /* Register the v4l2_device structure */
        pdev->v4l2_dev.release = pwc_video_release;
        rc = v4l2_device_register(&intf->dev, &pdev->v4l2_dev);
        if (rc) {
                PWC_ERROR("Failed to register v4l2-device (%d).\n", rc);
                goto err_free_controls;
        }

        pdev->v4l2_dev.ctrl_handler = &pdev->ctrl_handler;
        pdev->vdev.v4l2_dev = &pdev->v4l2_dev;
        pdev->vdev.lock = &pdev->v4l2_lock;
        pdev->vdev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING |
                                 V4L2_CAP_READWRITE;

        rc = video_register_device(&pdev->vdev, VFL_TYPE_VIDEO, -1);
        if (rc < 0) {
                PWC_ERROR("Failed to register as video device (%d).\n", rc);
                goto err_unregister_v4l2_dev;
        }
        PWC_INFO("Registered as %s.\n", video_device_node_name(&pdev->vdev));

#ifdef CONFIG_USB_PWC_INPUT_EVDEV
        /* register webcam snapshot button input device */
        pdev->button_dev = input_allocate_device();
        if (!pdev->button_dev) {
                rc = -ENOMEM;
                goto err_video_unreg;
        }

        usb_make_path(udev, pdev->button_phys, sizeof(pdev->button_phys));
        strlcat(pdev->button_phys, "/input0", sizeof(pdev->button_phys));

        pdev->button_dev->name = "PWC snapshot button";
        pdev->button_dev->phys = pdev->button_phys;
        usb_to_input_id(pdev->udev, &pdev->button_dev->id);
        pdev->button_dev->dev.parent = &pdev->udev->dev;
        pdev->button_dev->evbit[0] = BIT_MASK(EV_KEY);
        pdev->button_dev->keybit[BIT_WORD(KEY_CAMERA)] = BIT_MASK(KEY_CAMERA);

        rc = input_register_device(pdev->button_dev);
        if (rc) {
                input_free_device(pdev->button_dev);
                pdev->button_dev = NULL;
                goto err_video_unreg;
        }
#endif

        return 0;

#ifdef CONFIG_USB_PWC_INPUT_EVDEV
err_video_unreg:
        video_unregister_device(&pdev->vdev);
#endif
err_unregister_v4l2_dev:
        v4l2_device_unregister(&pdev->v4l2_dev);
err_free_controls:
        v4l2_ctrl_handler_free(&pdev->ctrl_handler);
err_free_mem:
        kfree(pdev->ctrl_buf);
        kfree(pdev);
        return rc;
}

/* The user yanked out the cable... */
static void usb_pwc_disconnect(struct usb_interface *intf)
{
        struct v4l2_device *v = usb_get_intfdata(intf);
        struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev);

        mutex_lock(&pdev->vb_queue_lock);
        mutex_lock(&pdev->v4l2_lock);
        /* No need to keep the urbs around after disconnection */
        if (pdev->vb_queue.streaming)
                pwc_isoc_cleanup(pdev);
        pdev->udev = NULL;

        v4l2_device_disconnect(&pdev->v4l2_dev);
        video_unregister_device(&pdev->vdev);
        mutex_unlock(&pdev->v4l2_lock);
        mutex_unlock(&pdev->vb_queue_lock);

#ifdef CONFIG_USB_PWC_INPUT_EVDEV
        if (pdev->button_dev)
                input_unregister_device(pdev->button_dev);
#endif

        v4l2_device_put(&pdev->v4l2_dev);
}


/*
 * Initialization code & module stuff
 */

static unsigned int leds_nargs;

#ifdef CONFIG_USB_PWC_DEBUG
module_param_named(trace, pwc_trace, int, 0644);
#endif
module_param(power_save, int, 0644);
module_param_array(leds, int, &leds_nargs, 0444);

#ifdef CONFIG_USB_PWC_DEBUG
MODULE_PARM_DESC(trace, "For debugging purposes");
#endif
MODULE_PARM_DESC(power_save, "Turn power saving for new cameras on or off");
MODULE_PARM_DESC(leds, "LED on,off time in milliseconds");

MODULE_DESCRIPTION("Philips & OEM USB webcam driver");
MODULE_AUTHOR("Luc Saillard <luc@saillard.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS("pwcx");
MODULE_VERSION( PWC_VERSION );

module_usb_driver(pwc_driver);







  230 































































































































































  230 
  233 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/pm_qos.h>

static inline void device_pm_init_common(struct device *dev)
{
        if (!dev->power.early_init) {
                spin_lock_init(&dev->power.lock);
                dev->power.qos = NULL;
                dev->power.early_init = true;
        }
}

#ifdef CONFIG_PM

static inline void pm_runtime_early_init(struct device *dev)
{
        dev->power.disable_depth = 1;
        device_pm_init_common(dev);
}

extern void pm_runtime_init(struct device *dev);
extern void pm_runtime_reinit(struct device *dev);
extern void pm_runtime_remove(struct device *dev);
extern u64 pm_runtime_active_time(struct device *dev);

#define WAKE_IRQ_DEDICATED_ALLOCATED        BIT(0)
#define WAKE_IRQ_DEDICATED_MANAGED        BIT(1)
#define WAKE_IRQ_DEDICATED_REVERSE        BIT(2)
#define WAKE_IRQ_DEDICATED_MASK                (WAKE_IRQ_DEDICATED_ALLOCATED | \
                                         WAKE_IRQ_DEDICATED_MANAGED | \
                                         WAKE_IRQ_DEDICATED_REVERSE)
#define WAKE_IRQ_DEDICATED_ENABLED        BIT(3)

struct wake_irq {
        struct device *dev;
        unsigned int status;
        int irq;
        const char *name;
};

extern void dev_pm_arm_wake_irq(struct wake_irq *wirq);
extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq);
extern void dev_pm_enable_wake_irq_check(struct device *dev,
                                         bool can_change_status);
extern void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable);
extern void dev_pm_enable_wake_irq_complete(struct device *dev);

#ifdef CONFIG_PM_SLEEP

extern void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq);
extern void device_wakeup_detach_irq(struct device *dev);
extern void device_wakeup_arm_wake_irqs(void);
extern void device_wakeup_disarm_wake_irqs(void);

#else

static inline void device_wakeup_attach_irq(struct device *dev,
                                            struct wake_irq *wakeirq) {}

static inline void device_wakeup_detach_irq(struct device *dev)
{
}

#endif /* CONFIG_PM_SLEEP */

/*
 * sysfs.c
 */

extern int dpm_sysfs_add(struct device *dev);
extern void dpm_sysfs_remove(struct device *dev);
extern void rpm_sysfs_remove(struct device *dev);
extern int wakeup_sysfs_add(struct device *dev);
extern void wakeup_sysfs_remove(struct device *dev);
extern int pm_qos_sysfs_add_resume_latency(struct device *dev);
extern void pm_qos_sysfs_remove_resume_latency(struct device *dev);
extern int pm_qos_sysfs_add_flags(struct device *dev);
extern void pm_qos_sysfs_remove_flags(struct device *dev);
extern int pm_qos_sysfs_add_latency_tolerance(struct device *dev);
extern void pm_qos_sysfs_remove_latency_tolerance(struct device *dev);
extern int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid);

#else /* CONFIG_PM */

static inline void pm_runtime_early_init(struct device *dev)
{
        device_pm_init_common(dev);
}

static inline void pm_runtime_init(struct device *dev) {}
static inline void pm_runtime_reinit(struct device *dev) {}
static inline void pm_runtime_remove(struct device *dev) {}

static inline int dpm_sysfs_add(struct device *dev) { return 0; }
static inline void dpm_sysfs_remove(struct device *dev) {}
static inline int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid,
                                         kgid_t kgid) { return 0; }

#endif

#ifdef CONFIG_PM_SLEEP

/* kernel/power/main.c */
extern int pm_async_enabled;

/* drivers/base/power/main.c */
extern struct list_head dpm_list;        /* The active device list */

static inline struct device *to_device(struct list_head *entry)
{
        return container_of(entry, struct device, power.entry);
}

extern void device_pm_sleep_init(struct device *dev);
extern void device_pm_add(struct device *);
extern void device_pm_remove(struct device *);
extern void device_pm_move_before(struct device *, struct device *);
extern void device_pm_move_after(struct device *, struct device *);
extern void device_pm_move_last(struct device *);
extern void device_pm_check_callbacks(struct device *dev);

static inline bool device_pm_initialized(struct device *dev)
{
        return dev->power.in_dpm_list;
}

/* drivers/base/power/wakeup_stats.c */
extern int wakeup_source_sysfs_add(struct device *parent,
                                   struct wakeup_source *ws);
extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);

extern int pm_wakeup_source_sysfs_add(struct device *parent);

#else /* !CONFIG_PM_SLEEP */

static inline void device_pm_sleep_init(struct device *dev) {}

static inline void device_pm_add(struct device *dev) {}

static inline void device_pm_remove(struct device *dev)
{
        pm_runtime_remove(dev);
}

static inline void device_pm_move_before(struct device *deva,
                                         struct device *devb) {}
static inline void device_pm_move_after(struct device *deva,
                                        struct device *devb) {}
static inline void device_pm_move_last(struct device *dev) {}

static inline void device_pm_check_callbacks(struct device *dev) {}

static inline bool device_pm_initialized(struct device *dev)
{
        return device_is_registered(dev);
}

static inline int pm_wakeup_source_sysfs_add(struct device *parent)
{
        return 0;
}

#endif /* !CONFIG_PM_SLEEP */

static inline void device_pm_init(struct device *dev)
{
        device_pm_init_common(dev);
        device_pm_sleep_init(dev);
        pm_runtime_init(dev);
}

































   19 









































































    4 












































    4 

















































































































































    4 






























































































































































































    4 
    4 















    1 




    1 











































































































































































































































































    4 
    4 













    4 
    4 




























































































   48 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
#include <asm/mtrr.h>

#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
#endif

#ifdef CONFIG_HIGHPTE
#define PGTABLE_HIGHMEM __GFP_HIGHMEM
#else
#define PGTABLE_HIGHMEM 0
#endif

#ifndef CONFIG_PARAVIRT
static inline
void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
        tlb_remove_page(tlb, table);
}
#endif

gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;

pgtable_t pte_alloc_one(struct mm_struct *mm)
{
        return __pte_alloc_one(mm, __userpte_alloc_gfp);
}

static int __init setup_userpte(char *arg)
{
        if (!arg)
                return -EINVAL;

        /*
         * "userpte=nohigh" disables allocation of user pagetables in
         * high memory.
         */
        if (strcmp(arg, "nohigh") == 0)
                __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
        else
                return -EINVAL;
        return 0;
}
early_param("userpte", setup_userpte);

void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
        pagetable_pte_dtor(page_ptdesc(pte));
        paravirt_release_pte(page_to_pfn(pte));
        paravirt_tlb_remove_table(tlb, pte);
}

#if CONFIG_PGTABLE_LEVELS > 2
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
        paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
        /*
         * NOTE! For PAE, any changes to the top page-directory-pointer-table
         * entries need a full cr3 reload to flush.
         */
#ifdef CONFIG_X86_PAE
        tlb->need_flush_all = 1;
#endif
        pagetable_pmd_dtor(ptdesc);
        paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc));
}

#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pud);

        pagetable_pud_dtor(ptdesc);
        paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
        paravirt_tlb_remove_table(tlb, virt_to_page(pud));
}

#if CONFIG_PGTABLE_LEVELS > 4
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
        paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
        paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
}
#endif        /* CONFIG_PGTABLE_LEVELS > 4 */
#endif        /* CONFIG_PGTABLE_LEVELS > 3 */
#endif        /* CONFIG_PGTABLE_LEVELS > 2 */

static inline void pgd_list_add(pgd_t *pgd)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pgd);

        list_add(&ptdesc->pt_list, &pgd_list);
}

static inline void pgd_list_del(pgd_t *pgd)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pgd);

        list_del(&ptdesc->pt_list);
}

#define UNSHARED_PTRS_PER_PGD                                \
        (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
#define MAX_UNSHARED_PTRS_PER_PGD                        \
        max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)


static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
{
        virt_to_ptdesc(pgd)->pt_mm = mm;
}

struct mm_struct *pgd_page_get_mm(struct page *page)
{
        return page_ptdesc(page)->pt_mm;
}

static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
        /* If the pgd points to a shared pagetable level (either the
           ptes in non-PAE, or shared PMD in PAE), then just copy the
           references from swapper_pg_dir. */
        if (CONFIG_PGTABLE_LEVELS == 2 ||
            (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
            CONFIG_PGTABLE_LEVELS >= 4) {
                clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
                                swapper_pg_dir + KERNEL_PGD_BOUNDARY,
                                KERNEL_PGD_PTRS);
        }

        /* list required to sync kernel mapping updates */
        if (!SHARED_KERNEL_PMD) {
                pgd_set_mm(pgd, mm);
                pgd_list_add(pgd);
        }
}

static void pgd_dtor(pgd_t *pgd)
{
        if (SHARED_KERNEL_PMD)
                return;

        spin_lock(&pgd_lock);
        pgd_list_del(pgd);
        spin_unlock(&pgd_lock);
}

/*
 * List of all pgd's needed for non-PAE so it can invalidate entries
 * in both cached and uncached pgd's; not needed for PAE since the
 * kernel pmd is shared. If PAE were not to share the pmd a similar
 * tactic would be needed. This is essentially codepath-based locking
 * against pageattr.c; it is the unique case in which a valid change
 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
 * vmalloc faults work because attached pagetables are never freed.
 * -- nyc
 */

#ifdef CONFIG_X86_PAE
/*
 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
 * updating the top-level pagetable entries to guarantee the
 * processor notices the update.  Since this is expensive, and
 * all 4 top-level entries are used almost immediately in a
 * new process's life, we just pre-populate them here.
 *
 * Also, if we're in a paravirt environment where the kernel pmd is
 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
 * and initialize the kernel pmds here.
 */
#define PREALLOCATED_PMDS        UNSHARED_PTRS_PER_PGD
#define MAX_PREALLOCATED_PMDS        MAX_UNSHARED_PTRS_PER_PGD

/*
 * We allocate separate PMDs for the kernel part of the user page-table
 * when PTI is enabled. We need them to map the per-process LDT into the
 * user-space page-table.
 */
#define PREALLOCATED_USER_PMDS         (boot_cpu_has(X86_FEATURE_PTI) ? \
                                        KERNEL_PGD_PTRS : 0)
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS

void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
{
        paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);

        /* Note: almost everything apart from _PAGE_PRESENT is
           reserved at the pmd (PDPT) level. */
        set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));

        /*
         * According to Intel App note "TLBs, Paging-Structure Caches,
         * and Their Invalidation", April 2007, document 317080-001,
         * section 8.1: in PAE mode we explicitly have to flush the
         * TLB via cr3 if the top-level pgd is changed...
         */
        flush_tlb_mm(mm);
}
#else  /* !CONFIG_X86_PAE */

/* No need to prepopulate any pagetable entries in non-PAE modes. */
#define PREALLOCATED_PMDS        0
#define MAX_PREALLOCATED_PMDS        0
#define PREALLOCATED_USER_PMDS         0
#define MAX_PREALLOCATED_USER_PMDS 0
#endif        /* CONFIG_X86_PAE */

static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
        int i;
        struct ptdesc *ptdesc;

        for (i = 0; i < count; i++)
                if (pmds[i]) {
                        ptdesc = virt_to_ptdesc(pmds[i]);

                        pagetable_pmd_dtor(ptdesc);
                        pagetable_free(ptdesc);
                        mm_dec_nr_pmds(mm);
                }
}

static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
        int i;
        bool failed = false;
        gfp_t gfp = GFP_PGTABLE_USER;

        if (mm == &init_mm)
                gfp &= ~__GFP_ACCOUNT;
        gfp &= ~__GFP_HIGHMEM;

        for (i = 0; i < count; i++) {
                pmd_t *pmd = NULL;
                struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);

                if (!ptdesc)
                        failed = true;
                if (ptdesc && !pagetable_pmd_ctor(ptdesc)) {
                        pagetable_free(ptdesc);
                        ptdesc = NULL;
                        failed = true;
                }
                if (ptdesc) {
                        mm_inc_nr_pmds(mm);
                        pmd = ptdesc_address(ptdesc);
                }

                pmds[i] = pmd;
        }

        if (failed) {
                free_pmds(mm, pmds, count);
                return -ENOMEM;
        }

        return 0;
}

/*
 * Mop up any pmd pages which may still be attached to the pgd.
 * Normally they will be freed by munmap/exit_mmap, but any pmd we
 * preallocate which never got a corresponding vma will need to be
 * freed manually.
 */
static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
{
        pgd_t pgd = *pgdp;

        if (pgd_val(pgd) != 0) {
                pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);

                pgd_clear(pgdp);

                paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
                pmd_free(mm, pmd);
                mm_dec_nr_pmds(mm);
        }
}

static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
{
        int i;

        for (i = 0; i < PREALLOCATED_PMDS; i++)
                mop_up_one_pmd(mm, &pgdp[i]);

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION

        if (!boot_cpu_has(X86_FEATURE_PTI))
                return;

        pgdp = kernel_to_user_pgdp(pgdp);

        for (i = 0; i < PREALLOCATED_USER_PMDS; i++)
                mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]);
#endif
}

static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
        p4d_t *p4d;
        pud_t *pud;
        int i;

        p4d = p4d_offset(pgd, 0);
        pud = pud_offset(p4d, 0);

        for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
                pmd_t *pmd = pmds[i];

                if (i >= KERNEL_PGD_BOUNDARY)
                        memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
                               sizeof(pmd_t) * PTRS_PER_PMD);

                pud_populate(mm, pud, pmd);
        }
}

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
                                     pgd_t *k_pgd, pmd_t *pmds[])
{
        pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir);
        pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
        p4d_t *u_p4d;
        pud_t *u_pud;
        int i;

        u_p4d = p4d_offset(u_pgd, 0);
        u_pud = pud_offset(u_p4d, 0);

        s_pgd += KERNEL_PGD_BOUNDARY;
        u_pud += KERNEL_PGD_BOUNDARY;

        for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) {
                pmd_t *pmd = pmds[i];

                memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd),
                       sizeof(pmd_t) * PTRS_PER_PMD);

                pud_populate(mm, u_pud, pmd);
        }

}
#else
static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
                                     pgd_t *k_pgd, pmd_t *pmds[])
{
}
#endif
/*
 * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
 * assumes that pgd should be in one page.
 *
 * But kernel with PAE paging that is not running as a Xen domain
 * only needs to allocate 32 bytes for pgd instead of one page.
 */
#ifdef CONFIG_X86_PAE

#include <linux/slab.h>

#define PGD_SIZE        (PTRS_PER_PGD * sizeof(pgd_t))
#define PGD_ALIGN        32

static struct kmem_cache *pgd_cache;

void __init pgtable_cache_init(void)
{
        /*
         * When PAE kernel is running as a Xen domain, it does not use
         * shared kernel pmd. And this requires a whole page for pgd.
         */
        if (!SHARED_KERNEL_PMD)
                return;

        /*
         * when PAE kernel is not running as a Xen domain, it uses
         * shared kernel pmd. Shared kernel pmd does not require a whole
         * page for pgd. We are able to just allocate a 32-byte for pgd.
         * During boot time, we create a 32-byte slab for pgd table allocation.
         */
        pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
                                      SLAB_PANIC, NULL);
}

static inline pgd_t *_pgd_alloc(void)
{
        /*
         * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
         * We allocate one page for pgd.
         */
        if (!SHARED_KERNEL_PMD)
                return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
                                                 PGD_ALLOCATION_ORDER);

        /*
         * Now PAE kernel is not running as a Xen domain. We can allocate
         * a 32-byte slab for pgd to save memory space.
         */
        return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
}

static inline void _pgd_free(pgd_t *pgd)
{
        if (!SHARED_KERNEL_PMD)
                free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
        else
                kmem_cache_free(pgd_cache, pgd);
}
#else

static inline pgd_t *_pgd_alloc(void)
{
        return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
                                         PGD_ALLOCATION_ORDER);
}

static inline void _pgd_free(pgd_t *pgd)
{
        free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */

pgd_t *pgd_alloc(struct mm_struct *mm)
{
        pgd_t *pgd;
        pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
        pmd_t *pmds[MAX_PREALLOCATED_PMDS];

        pgd = _pgd_alloc();

        if (pgd == NULL)
                goto out;

        mm->pgd = pgd;

        if (sizeof(pmds) != 0 &&
                        preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
                goto out_free_pgd;

        if (sizeof(u_pmds) != 0 &&
                        preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
                goto out_free_pmds;

        if (paravirt_pgd_alloc(mm) != 0)
                goto out_free_user_pmds;

        /*
         * Make sure that pre-populating the pmds is atomic with
         * respect to anything walking the pgd_list, so that they
         * never see a partially populated pgd.
         */
        spin_lock(&pgd_lock);

        pgd_ctor(mm, pgd);
        if (sizeof(pmds) != 0)
                pgd_prepopulate_pmd(mm, pgd, pmds);

        if (sizeof(u_pmds) != 0)
                pgd_prepopulate_user_pmd(mm, pgd, u_pmds);

        spin_unlock(&pgd_lock);

        return pgd;

out_free_user_pmds:
        if (sizeof(u_pmds) != 0)
                free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
out_free_pmds:
        if (sizeof(pmds) != 0)
                free_pmds(mm, pmds, PREALLOCATED_PMDS);
out_free_pgd:
        _pgd_free(pgd);
out:
        return NULL;
}

void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
        pgd_mop_up_pmds(mm, pgd);
        pgd_dtor(pgd);
        paravirt_pgd_free(mm, pgd);
        _pgd_free(pgd);
}

/*
 * Used to set accessed or dirty bits in the page table entries
 * on other architectures. On x86, the accessed and dirty bits
 * are tracked by hardware. However, do_wp_page calls this function
 * to also make the pte writeable at the same time the dirty bit is
 * set. In that case we do actually need to write the PTE.
 */
int ptep_set_access_flags(struct vm_area_struct *vma,
                          unsigned long address, pte_t *ptep,
                          pte_t entry, int dirty)
{
        int changed = !pte_same(*ptep, entry);

        if (changed && dirty)
                set_pte(ptep, entry);

        return changed;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_set_access_flags(struct vm_area_struct *vma,
                          unsigned long address, pmd_t *pmdp,
                          pmd_t entry, int dirty)
{
        int changed = !pmd_same(*pmdp, entry);

        VM_BUG_ON(address & ~HPAGE_PMD_MASK);

        if (changed && dirty) {
                set_pmd(pmdp, entry);
                /*
                 * We had a write-protection fault here and changed the pmd
                 * to to more permissive. No need to flush the TLB for that,
                 * #PF is architecturally guaranteed to do that and in the
                 * worst-case we'll generate a spurious fault.
                 */
        }

        return changed;
}

int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
                          pud_t *pudp, pud_t entry, int dirty)
{
        int changed = !pud_same(*pudp, entry);

        VM_BUG_ON(address & ~HPAGE_PUD_MASK);

        if (changed && dirty) {
                set_pud(pudp, entry);
                /*
                 * We had a write-protection fault here and changed the pud
                 * to to more permissive. No need to flush the TLB for that,
                 * #PF is architecturally guaranteed to do that and in the
                 * worst-case we'll generate a spurious fault.
                 */
        }

        return changed;
}
#endif

int ptep_test_and_clear_young(struct vm_area_struct *vma,
                              unsigned long addr, pte_t *ptep)
{
        int ret = 0;

        if (pte_young(*ptep))
                ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
                                         (unsigned long *) &ptep->pte);

        return ret;
}

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                              unsigned long addr, pmd_t *pmdp)
{
        int ret = 0;

        if (pmd_young(*pmdp))
                ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
                                         (unsigned long *)pmdp);

        return ret;
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
                              unsigned long addr, pud_t *pudp)
{
        int ret = 0;

        if (pud_young(*pudp))
                ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
                                         (unsigned long *)pudp);

        return ret;
}
#endif

int ptep_clear_flush_young(struct vm_area_struct *vma,
                           unsigned long address, pte_t *ptep)
{
        /*
         * On x86 CPUs, clearing the accessed bit without a TLB flush
         * doesn't cause data corruption. [ It could cause incorrect
         * page aging and the (mistaken) reclaim of hot pages, but the
         * chance of that should be relatively low. ]
         *
         * So as a performance optimization don't flush the TLB when
         * clearing the accessed bit, it will eventually be flushed by
         * a context switch or a VM operation anyway. [ In the rare
         * event of it not getting flushed for a long time the delay
         * shouldn't really matter because there's no real memory
         * pressure for swapout to react to. ]
         */
        return ptep_test_and_clear_young(vma, address, ptep);
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_clear_flush_young(struct vm_area_struct *vma,
                           unsigned long address, pmd_t *pmdp)
{
        int young;

        VM_BUG_ON(address & ~HPAGE_PMD_MASK);

        young = pmdp_test_and_clear_young(vma, address, pmdp);
        if (young)
                flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);

        return young;
}

pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
                         pmd_t *pmdp)
{
        /*
         * No flush is necessary. Once an invalid PTE is established, the PTE's
         * access and dirty bits cannot be updated.
         */
        return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
}
#endif

/**
 * reserve_top_address - reserves a hole in the top of kernel address space
 * @reserve - size of hole to reserve
 *
 * Can be used to relocate the fixmap area and poke a hole in the top
 * of kernel address space to make room for a hypervisor.
 */
void __init reserve_top_address(unsigned long reserve)
{
#ifdef CONFIG_X86_32
        BUG_ON(fixmaps_set > 0);
        __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
        printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
               -reserve, __FIXADDR_TOP + PAGE_SIZE);
#endif
}

int fixmaps_set;

void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
        unsigned long address = __fix_to_virt(idx);

#ifdef CONFIG_X86_64
       /*
        * Ensure that the static initial page tables are covering the
        * fixmap completely.
        */
        BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
                     (FIXMAP_PMD_NUM * PTRS_PER_PTE));
#endif

        if (idx >= __end_of_fixed_addresses) {
                BUG();
                return;
        }
        set_pte_vaddr(address, pte);
        fixmaps_set++;
}

void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
                       phys_addr_t phys, pgprot_t flags)
{
        /* Sanitize 'prot' against any unsupported bits: */
        pgprot_val(flags) &= __default_kernel_pte_mask;

        __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
}

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
#ifdef CONFIG_X86_5LEVEL
/**
 * p4d_set_huge - setup kernel P4D mapping
 *
 * No 512GB pages yet -- always return 0
 */
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
        return 0;
}

/**
 * p4d_clear_huge - clear kernel P4D mapping when it is set
 *
 * No 512GB pages yet -- always return 0
 */
void p4d_clear_huge(p4d_t *p4d)
{
}
#endif

/**
 * pud_set_huge - setup kernel PUD mapping
 *
 * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
 * function sets up a huge page only if the complete range has the same MTRR
 * caching mode.
 *
 * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
 * page mapping attempt fails.
 *
 * Returns 1 on success and 0 on failure.
 */
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
        u8 uniform;

        mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
        if (!uniform)
                return 0;

        /* Bail out if we are we on a populated non-leaf entry: */
        if (pud_present(*pud) && !pud_huge(*pud))
                return 0;

        set_pte((pte_t *)pud, pfn_pte(
                (u64)addr >> PAGE_SHIFT,
                __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));

        return 1;
}

/**
 * pmd_set_huge - setup kernel PMD mapping
 *
 * See text over pud_set_huge() above.
 *
 * Returns 1 on success and 0 on failure.
 */
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
        u8 uniform;

        mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
        if (!uniform) {
                pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
                             __func__, addr, addr + PMD_SIZE);
                return 0;
        }

        /* Bail out if we are we on a populated non-leaf entry: */
        if (pmd_present(*pmd) && !pmd_huge(*pmd))
                return 0;

        set_pte((pte_t *)pmd, pfn_pte(
                (u64)addr >> PAGE_SHIFT,
                __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));

        return 1;
}

/**
 * pud_clear_huge - clear kernel PUD mapping when it is set
 *
 * Returns 1 on success and 0 on failure (no PUD map is found).
 */
int pud_clear_huge(pud_t *pud)
{
        if (pud_leaf(*pud)) {
                pud_clear(pud);
                return 1;
        }

        return 0;
}

/**
 * pmd_clear_huge - clear kernel PMD mapping when it is set
 *
 * Returns 1 on success and 0 on failure (no PMD map is found).
 */
int pmd_clear_huge(pmd_t *pmd)
{
        if (pmd_leaf(*pmd)) {
                pmd_clear(pmd);
                return 1;
        }

        return 0;
}

#ifdef CONFIG_X86_64
/**
 * pud_free_pmd_page - Clear pud entry and free pmd page.
 * @pud: Pointer to a PUD.
 * @addr: Virtual address associated with pud.
 *
 * Context: The pud range has been unmapped and TLB purged.
 * Return: 1 if clearing the entry succeeded. 0 otherwise.
 *
 * NOTE: Callers must allow a single page allocation.
 */
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
        pmd_t *pmd, *pmd_sv;
        pte_t *pte;
        int i;

        pmd = pud_pgtable(*pud);
        pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
        if (!pmd_sv)
                return 0;

        for (i = 0; i < PTRS_PER_PMD; i++) {
                pmd_sv[i] = pmd[i];
                if (!pmd_none(pmd[i]))
                        pmd_clear(&pmd[i]);
        }

        pud_clear(pud);

        /* INVLPG to clear all paging-structure caches */
        flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);

        for (i = 0; i < PTRS_PER_PMD; i++) {
                if (!pmd_none(pmd_sv[i])) {
                        pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
                        free_page((unsigned long)pte);
                }
        }

        free_page((unsigned long)pmd_sv);

        pagetable_pmd_dtor(virt_to_ptdesc(pmd));
        free_page((unsigned long)pmd);

        return 1;
}

/**
 * pmd_free_pte_page - Clear pmd entry and free pte page.
 * @pmd: Pointer to a PMD.
 * @addr: Virtual address associated with pmd.
 *
 * Context: The pmd range has been unmapped and TLB purged.
 * Return: 1 if clearing the entry succeeded. 0 otherwise.
 */
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
        pte_t *pte;

        pte = (pte_t *)pmd_page_vaddr(*pmd);
        pmd_clear(pmd);

        /* INVLPG to clear all paging-structure caches */
        flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);

        free_page((unsigned long)pte);

        return 1;
}

#else /* !CONFIG_X86_64 */

/*
 * Disable free page handling on x86-PAE. This assures that ioremap()
 * does not update sync'd pmd entries. See vmalloc_sync_one().
 */
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
        return pmd_none(*pmd);
}

#endif /* CONFIG_X86_64 */
#endif        /* CONFIG_HAVE_ARCH_HUGE_VMAP */

pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
        if (vma->vm_flags & VM_SHADOW_STACK)
                return pte_mkwrite_shstk(pte);

        pte = pte_mkwrite_novma(pte);

        return pte_clear_saveddirty(pte);
}

pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
        if (vma->vm_flags & VM_SHADOW_STACK)
                return pmd_mkwrite_shstk(pmd);

        pmd = pmd_mkwrite_novma(pmd);

        return pmd_clear_saveddirty(pmd);
}

void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
{
        /*
         * Hardware before shadow stack can (rarely) set Dirty=1
         * on a Write=0 PTE. So the below condition
         * only indicates a software bug when shadow stack is
         * supported by the HW. This checking is covered in
         * pte_shstk().
         */
        VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
                        pte_shstk(pte));
}

void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
{
        /* See note in arch_check_zapped_pte() */
        VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
                        pmd_shstk(pmd));
}















































































































































































































































































































































































































































































































































































































































    1 
    1 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
/*
 * Copyright (c) 2016 Intel Corporation
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */

#include <linux/slab.h>
#include <linux/uaccess.h>

#include <drm/drm_plane.h>
#include <drm/drm_drv.h>
#include <drm/drm_print.h>
#include <drm/drm_framebuffer.h>
#include <drm/drm_file.h>
#include <drm/drm_crtc.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_managed.h>
#include <drm/drm_vblank.h>

#include "drm_crtc_internal.h"

/**
 * DOC: overview
 *
 * A plane represents an image source that can be blended with or overlaid on
 * top of a CRTC during the scanout process. Planes take their input data from a
 * &drm_framebuffer object. The plane itself specifies the cropping and scaling
 * of that image, and where it is placed on the visible area of a display
 * pipeline, represented by &drm_crtc. A plane can also have additional
 * properties that specify how the pixels are positioned and blended, like
 * rotation or Z-position. All these properties are stored in &drm_plane_state.
 *
 * Unless explicitly specified (via CRTC property or otherwise), the active area
 * of a CRTC will be black by default. This means portions of the active area
 * which are not covered by a plane will be black, and alpha blending of any
 * planes with the CRTC background will blend with black at the lowest zpos.
 *
 * To create a plane, a KMS drivers allocates and zeroes an instances of
 * &struct drm_plane (possibly as part of a larger structure) and registers it
 * with a call to drm_universal_plane_init().
 *
 * Each plane has a type, see enum drm_plane_type. A plane can be compatible
 * with multiple CRTCs, see &drm_plane.possible_crtcs.
 *
 * Each CRTC must have a unique primary plane userspace can attach to enable
 * the CRTC. In other words, userspace must be able to attach a different
 * primary plane to each CRTC at the same time. Primary planes can still be
 * compatible with multiple CRTCs. There must be exactly as many primary planes
 * as there are CRTCs.
 *
 * Legacy uAPI doesn't expose the primary and cursor planes directly. DRM core
 * relies on the driver to set the primary and optionally the cursor plane used
 * for legacy IOCTLs. This is done by calling drm_crtc_init_with_planes(). All
 * drivers must provide one primary plane per CRTC to avoid surprising legacy
 * userspace too much.
 */

/**
 * DOC: standard plane properties
 *
 * DRM planes have a few standardized properties:
 *
 * type:
 *     Immutable property describing the type of the plane.
 *
 *     For user-space which has enabled the &DRM_CLIENT_CAP_ATOMIC capability,
 *     the plane type is just a hint and is mostly superseded by atomic
 *     test-only commits. The type hint can still be used to come up more
 *     easily with a plane configuration accepted by the driver.
 *
 *     The value of this property can be one of the following:
 *
 *     "Primary":
 *         To light up a CRTC, attaching a primary plane is the most likely to
 *         work if it covers the whole CRTC and doesn't have scaling or
 *         cropping set up.
 *
 *         Drivers may support more features for the primary plane, user-space
 *         can find out with test-only atomic commits.
 *
 *         Some primary planes are implicitly used by the kernel in the legacy
 *         IOCTLs &DRM_IOCTL_MODE_SETCRTC and &DRM_IOCTL_MODE_PAGE_FLIP.
 *         Therefore user-space must not mix explicit usage of any primary
 *         plane (e.g. through an atomic commit) with these legacy IOCTLs.
 *
 *     "Cursor":
 *         To enable this plane, using a framebuffer configured without scaling
 *         or cropping and with the following properties is the most likely to
 *         work:
 *
 *         - If the driver provides the capabilities &DRM_CAP_CURSOR_WIDTH and
 *           &DRM_CAP_CURSOR_HEIGHT, create the framebuffer with this size.
 *           Otherwise, create a framebuffer with the size 64x64.
 *         - If the driver doesn't support modifiers, create a framebuffer with
 *           a linear layout. Otherwise, use the IN_FORMATS plane property.
 *
 *         Drivers may support more features for the cursor plane, user-space
 *         can find out with test-only atomic commits.
 *
 *         Some cursor planes are implicitly used by the kernel in the legacy
 *         IOCTLs &DRM_IOCTL_MODE_CURSOR and &DRM_IOCTL_MODE_CURSOR2.
 *         Therefore user-space must not mix explicit usage of any cursor
 *         plane (e.g. through an atomic commit) with these legacy IOCTLs.
 *
 *         Some drivers may support cursors even if no cursor plane is exposed.
 *         In this case, the legacy cursor IOCTLs can be used to configure the
 *         cursor.
 *
 *     "Overlay":
 *         Neither primary nor cursor.
 *
 *         Overlay planes are the only planes exposed when the
 *         &DRM_CLIENT_CAP_UNIVERSAL_PLANES capability is disabled.
 *
 * IN_FORMATS:
 *     Blob property which contains the set of buffer format and modifier
 *     pairs supported by this plane. The blob is a struct
 *     drm_format_modifier_blob. Without this property the plane doesn't
 *     support buffers with modifiers. Userspace cannot change this property.
 *
 *     Note that userspace can check the &DRM_CAP_ADDFB2_MODIFIERS driver
 *     capability for general modifier support. If this flag is set then every
 *     plane will have the IN_FORMATS property, even when it only supports
 *     DRM_FORMAT_MOD_LINEAR. Before linux kernel release v5.1 there have been
 *     various bugs in this area with inconsistencies between the capability
 *     flag and per-plane properties.
 */

static unsigned int drm_num_planes(struct drm_device *dev)
{
        unsigned int num = 0;
        struct drm_plane *tmp;

        drm_for_each_plane(tmp, dev) {
                num++;
        }

        return num;
}

static inline u32 *
formats_ptr(struct drm_format_modifier_blob *blob)
{
        return (u32 *)(((char *)blob) + blob->formats_offset);
}

static inline struct drm_format_modifier *
modifiers_ptr(struct drm_format_modifier_blob *blob)
{
        return (struct drm_format_modifier *)(((char *)blob) + blob->modifiers_offset);
}

static int create_in_format_blob(struct drm_device *dev, struct drm_plane *plane)
{
        const struct drm_mode_config *config = &dev->mode_config;
        struct drm_property_blob *blob;
        struct drm_format_modifier *mod;
        size_t blob_size, formats_size, modifiers_size;
        struct drm_format_modifier_blob *blob_data;
        unsigned int i, j;

        formats_size = sizeof(__u32) * plane->format_count;
        if (WARN_ON(!formats_size)) {
                /* 0 formats are never expected */
                return 0;
        }

        modifiers_size =
                sizeof(struct drm_format_modifier) * plane->modifier_count;

        blob_size = sizeof(struct drm_format_modifier_blob);
        /* Modifiers offset is a pointer to a struct with a 64 bit field so it
         * should be naturally aligned to 8B.
         */
        BUILD_BUG_ON(sizeof(struct drm_format_modifier_blob) % 8);
        blob_size += ALIGN(formats_size, 8);
        blob_size += modifiers_size;

        blob = drm_property_create_blob(dev, blob_size, NULL);
        if (IS_ERR(blob))
                return -1;

        blob_data = blob->data;
        blob_data->version = FORMAT_BLOB_CURRENT;
        blob_data->count_formats = plane->format_count;
        blob_data->formats_offset = sizeof(struct drm_format_modifier_blob);
        blob_data->count_modifiers = plane->modifier_count;

        blob_data->modifiers_offset =
                ALIGN(blob_data->formats_offset + formats_size, 8);

        memcpy(formats_ptr(blob_data), plane->format_types, formats_size);

        mod = modifiers_ptr(blob_data);
        for (i = 0; i < plane->modifier_count; i++) {
                for (j = 0; j < plane->format_count; j++) {
                        if (!plane->funcs->format_mod_supported ||
                            plane->funcs->format_mod_supported(plane,
                                                               plane->format_types[j],
                                                               plane->modifiers[i])) {
                                mod->formats |= 1ULL << j;
                        }
                }

                mod->modifier = plane->modifiers[i];
                mod->offset = 0;
                mod->pad = 0;
                mod++;
        }

        drm_object_attach_property(&plane->base, config->modifiers_property,
                                   blob->base.id);

        return 0;
}

/**
 * DOC: hotspot properties
 *
 * HOTSPOT_X: property to set mouse hotspot x offset.
 * HOTSPOT_Y: property to set mouse hotspot y offset.
 *
 * When the plane is being used as a cursor image to display a mouse pointer,
 * the "hotspot" is the offset within the cursor image where mouse events
 * are expected to go.
 *
 * Positive values move the hotspot from the top-left corner of the cursor
 * plane towards the right and bottom.
 *
 * Most display drivers do not need this information because the
 * hotspot is not actually connected to anything visible on screen.
 * However, this is necessary for display drivers like the para-virtualized
 * drivers (eg qxl, vbox, virtio, vmwgfx), that are attached to a user console
 * with a mouse pointer.  Since these consoles are often being remoted over a
 * network, they would otherwise have to wait to display the pointer movement to
 * the user until a full network round-trip has occurred.  New mouse events have
 * to be sent from the user's console, over the network to the virtual input
 * devices, forwarded to the desktop for processing, and then the cursor plane's
 * position can be updated and sent back to the user's console over the network.
 * Instead, with the hotspot information, the console can anticipate the new
 * location, and draw the mouse cursor there before the confirmation comes in.
 * To do that correctly, the user's console must be able predict how the
 * desktop will process mouse events, which normally requires the desktop's
 * mouse topology information, ie where each CRTC sits in the mouse coordinate
 * space.  This is typically sent to the para-virtualized drivers using some
 * driver-specific method, and the driver then forwards it to the console by
 * way of the virtual display device or hypervisor.
 *
 * The assumption is generally made that there is only one cursor plane being
 * used this way at a time, and that the desktop is feeding all mouse devices
 * into the same global pointer.  Para-virtualized drivers that require this
 * should only be exposing a single cursor plane, or find some other way
 * to coordinate with a userspace desktop that supports multiple pointers.
 * If the hotspot properties are set, the cursor plane is therefore assumed to be
 * used only for displaying a mouse cursor image, and the position of the combined
 * cursor plane + offset can therefore be used for coordinating with input from a
 * mouse device.
 *
 * The cursor will then be drawn either at the location of the plane in the CRTC
 * console, or as a free-floating cursor plane on the user's console
 * corresponding to their desktop mouse position.
 *
 * DRM clients which would like to work correctly on drivers which expose
 * hotspot properties should advertise DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT.
 * Setting this property on drivers which do not special case
 * cursor planes will return EOPNOTSUPP, which can be used by userspace to
 * gauge requirements of the hardware/drivers they're running on. Advertising
 * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT implies that the userspace client will be
 * correctly setting the hotspot properties.
 */

/**
 * drm_plane_create_hotspot_properties - creates the mouse hotspot
 * properties and attaches them to the given cursor plane
 *
 * @plane: drm cursor plane
 *
 * This function enables the mouse hotspot property on a given
 * cursor plane. Look at the documentation for hotspot properties
 * to get a better understanding for what they're used for.
 *
 * RETURNS:
 * Zero for success or -errno
 */
static int drm_plane_create_hotspot_properties(struct drm_plane *plane)
{
        struct drm_property *prop_x;
        struct drm_property *prop_y;

        drm_WARN_ON(plane->dev,
                    !drm_core_check_feature(plane->dev,
                                            DRIVER_CURSOR_HOTSPOT));

        prop_x = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_X",
                                                  INT_MIN, INT_MAX);
        if (IS_ERR(prop_x))
                return PTR_ERR(prop_x);

        prop_y = drm_property_create_signed_range(plane->dev, 0, "HOTSPOT_Y",
                                                  INT_MIN, INT_MAX);
        if (IS_ERR(prop_y)) {
                drm_property_destroy(plane->dev, prop_x);
                return PTR_ERR(prop_y);
        }

        drm_object_attach_property(&plane->base, prop_x, 0);
        drm_object_attach_property(&plane->base, prop_y, 0);
        plane->hotspot_x_property = prop_x;
        plane->hotspot_y_property = prop_y;

        return 0;
}

__printf(9, 0)
static int __drm_universal_plane_init(struct drm_device *dev,
                                      struct drm_plane *plane,
                                      uint32_t possible_crtcs,
                                      const struct drm_plane_funcs *funcs,
                                      const uint32_t *formats,
                                      unsigned int format_count,
                                      const uint64_t *format_modifiers,
                                      enum drm_plane_type type,
                                      const char *name, va_list ap)
{
        struct drm_mode_config *config = &dev->mode_config;
        static const uint64_t default_modifiers[] = {
                DRM_FORMAT_MOD_LINEAR,
        };
        unsigned int format_modifier_count = 0;
        int ret;

        /* plane index is used with 32bit bitmasks */
        if (WARN_ON(config->num_total_plane >= 32))
                return -EINVAL;

        /*
         * First driver to need more than 64 formats needs to fix this. Each
         * format is encoded as a bit and the current code only supports a u64.
         */
        if (WARN_ON(format_count > 64))
                return -EINVAL;

        WARN_ON(drm_drv_uses_atomic_modeset(dev) &&
                (!funcs->atomic_destroy_state ||
                 !funcs->atomic_duplicate_state));

        ret = drm_mode_object_add(dev, &plane->base, DRM_MODE_OBJECT_PLANE);
        if (ret)
                return ret;

        drm_modeset_lock_init(&plane->mutex);

        plane->base.properties = &plane->properties;
        plane->dev = dev;
        plane->funcs = funcs;
        plane->format_types = kmalloc_array(format_count, sizeof(uint32_t),
                                            GFP_KERNEL);
        if (!plane->format_types) {
                DRM_DEBUG_KMS("out of memory when allocating plane\n");
                drm_mode_object_unregister(dev, &plane->base);
                return -ENOMEM;
        }

        if (format_modifiers) {
                const uint64_t *temp_modifiers = format_modifiers;

                while (*temp_modifiers++ != DRM_FORMAT_MOD_INVALID)
                        format_modifier_count++;
        } else {
                if (!dev->mode_config.fb_modifiers_not_supported) {
                        format_modifiers = default_modifiers;
                        format_modifier_count = ARRAY_SIZE(default_modifiers);
                }
        }

        /* autoset the cap and check for consistency across all planes */
        drm_WARN_ON(dev, config->fb_modifiers_not_supported &&
                                format_modifier_count);

        plane->modifier_count = format_modifier_count;
        plane->modifiers = kmalloc_array(format_modifier_count,
                                         sizeof(format_modifiers[0]),
                                         GFP_KERNEL);

        if (format_modifier_count && !plane->modifiers) {
                DRM_DEBUG_KMS("out of memory when allocating plane\n");
                kfree(plane->format_types);
                drm_mode_object_unregister(dev, &plane->base);
                return -ENOMEM;
        }

        if (name) {
                plane->name = kvasprintf(GFP_KERNEL, name, ap);
        } else {
                plane->name = kasprintf(GFP_KERNEL, "plane-%d",
                                        drm_num_planes(dev));
        }
        if (!plane->name) {
                kfree(plane->format_types);
                kfree(plane->modifiers);
                drm_mode_object_unregister(dev, &plane->base);
                return -ENOMEM;
        }

        memcpy(plane->format_types, formats, format_count * sizeof(uint32_t));
        plane->format_count = format_count;
        memcpy(plane->modifiers, format_modifiers,
               format_modifier_count * sizeof(format_modifiers[0]));
        plane->possible_crtcs = possible_crtcs;
        plane->type = type;

        list_add_tail(&plane->head, &config->plane_list);
        plane->index = config->num_total_plane++;

        drm_object_attach_property(&plane->base,
                                   config->plane_type_property,
                                   plane->type);

        if (drm_core_check_feature(dev, DRIVER_ATOMIC)) {
                drm_object_attach_property(&plane->base, config->prop_fb_id, 0);
                drm_object_attach_property(&plane->base, config->prop_in_fence_fd, -1);
                drm_object_attach_property(&plane->base, config->prop_crtc_id, 0);
                drm_object_attach_property(&plane->base, config->prop_crtc_x, 0);
                drm_object_attach_property(&plane->base, config->prop_crtc_y, 0);
                drm_object_attach_property(&plane->base, config->prop_crtc_w, 0);
                drm_object_attach_property(&plane->base, config->prop_crtc_h, 0);
                drm_object_attach_property(&plane->base, config->prop_src_x, 0);
                drm_object_attach_property(&plane->base, config->prop_src_y, 0);
                drm_object_attach_property(&plane->base, config->prop_src_w, 0);
                drm_object_attach_property(&plane->base, config->prop_src_h, 0);
        }
        if (drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) &&
            type == DRM_PLANE_TYPE_CURSOR) {
                drm_plane_create_hotspot_properties(plane);
        }

        if (format_modifier_count)
                create_in_format_blob(dev, plane);

        return 0;
}

/**
 * drm_universal_plane_init - Initialize a new universal plane object
 * @dev: DRM device
 * @plane: plane object to init
 * @possible_crtcs: bitmask of possible CRTCs
 * @funcs: callbacks for the new plane
 * @formats: array of supported formats (DRM_FORMAT\_\*)
 * @format_count: number of elements in @formats
 * @format_modifiers: array of struct drm_format modifiers terminated by
 *                    DRM_FORMAT_MOD_INVALID
 * @type: type of plane (overlay, primary, cursor)
 * @name: printf style format string for the plane name, or NULL for default name
 *
 * Initializes a plane object of type @type. The &drm_plane_funcs.destroy hook
 * should call drm_plane_cleanup() and kfree() the plane structure. The plane
 * structure should not be allocated with devm_kzalloc().
 *
 * Note: consider using drmm_universal_plane_alloc() instead of
 * drm_universal_plane_init() to let the DRM managed resource infrastructure
 * take care of cleanup and deallocation.
 *
 * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set
 * @format_modifiers to NULL. The plane will advertise the linear modifier.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane,
                             uint32_t possible_crtcs,
                             const struct drm_plane_funcs *funcs,
                             const uint32_t *formats, unsigned int format_count,
                             const uint64_t *format_modifiers,
                             enum drm_plane_type type,
                             const char *name, ...)
{
        va_list ap;
        int ret;

        WARN_ON(!funcs->destroy);

        va_start(ap, name);
        ret = __drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
                                         formats, format_count, format_modifiers,
                                         type, name, ap);
        va_end(ap);
        return ret;
}
EXPORT_SYMBOL(drm_universal_plane_init);

static void drmm_universal_plane_alloc_release(struct drm_device *dev, void *ptr)
{
        struct drm_plane *plane = ptr;

        if (WARN_ON(!plane->dev))
                return;

        drm_plane_cleanup(plane);
}

void *__drmm_universal_plane_alloc(struct drm_device *dev, size_t size,
                                   size_t offset, uint32_t possible_crtcs,
                                   const struct drm_plane_funcs *funcs,
                                   const uint32_t *formats, unsigned int format_count,
                                   const uint64_t *format_modifiers,
                                   enum drm_plane_type type,
                                   const char *name, ...)
{
        void *container;
        struct drm_plane *plane;
        va_list ap;
        int ret;

        if (WARN_ON(!funcs || funcs->destroy))
                return ERR_PTR(-EINVAL);

        container = drmm_kzalloc(dev, size, GFP_KERNEL);
        if (!container)
                return ERR_PTR(-ENOMEM);

        plane = container + offset;

        va_start(ap, name);
        ret = __drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
                                         formats, format_count, format_modifiers,
                                         type, name, ap);
        va_end(ap);
        if (ret)
                return ERR_PTR(ret);

        ret = drmm_add_action_or_reset(dev, drmm_universal_plane_alloc_release,
                                       plane);
        if (ret)
                return ERR_PTR(ret);

        return container;
}
EXPORT_SYMBOL(__drmm_universal_plane_alloc);

void *__drm_universal_plane_alloc(struct drm_device *dev, size_t size,
                                  size_t offset, uint32_t possible_crtcs,
                                  const struct drm_plane_funcs *funcs,
                                  const uint32_t *formats, unsigned int format_count,
                                  const uint64_t *format_modifiers,
                                  enum drm_plane_type type,
                                  const char *name, ...)
{
        void *container;
        struct drm_plane *plane;
        va_list ap;
        int ret;

        if (drm_WARN_ON(dev, !funcs))
                return ERR_PTR(-EINVAL);

        container = kzalloc(size, GFP_KERNEL);
        if (!container)
                return ERR_PTR(-ENOMEM);

        plane = container + offset;

        va_start(ap, name);
        ret = __drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
                                         formats, format_count, format_modifiers,
                                         type, name, ap);
        va_end(ap);
        if (ret)
                goto err_kfree;

        return container;

err_kfree:
        kfree(container);
        return ERR_PTR(ret);
}
EXPORT_SYMBOL(__drm_universal_plane_alloc);

int drm_plane_register_all(struct drm_device *dev)
{
        unsigned int num_planes = 0;
        unsigned int num_zpos = 0;
        struct drm_plane *plane;
        int ret = 0;

        drm_for_each_plane(plane, dev) {
                if (plane->funcs->late_register)
                        ret = plane->funcs->late_register(plane);
                if (ret)
                        return ret;

                if (plane->zpos_property)
                        num_zpos++;
                num_planes++;
        }

        drm_WARN(dev, num_zpos && num_planes != num_zpos,
                 "Mixing planes with and without zpos property is invalid\n");

        return 0;
}

void drm_plane_unregister_all(struct drm_device *dev)
{
        struct drm_plane *plane;

        drm_for_each_plane(plane, dev) {
                if (plane->funcs->early_unregister)
                        plane->funcs->early_unregister(plane);
        }
}

/**
 * drm_plane_cleanup - Clean up the core plane usage
 * @plane: plane to cleanup
 *
 * This function cleans up @plane and removes it from the DRM mode setting
 * core. Note that the function does *not* free the plane structure itself,
 * this is the responsibility of the caller.
 */
void drm_plane_cleanup(struct drm_plane *plane)
{
        struct drm_device *dev = plane->dev;

        drm_modeset_lock_fini(&plane->mutex);

        kfree(plane->format_types);
        kfree(plane->modifiers);
        drm_mode_object_unregister(dev, &plane->base);

        BUG_ON(list_empty(&plane->head));

        /* Note that the plane_list is considered to be static; should we
         * remove the drm_plane at runtime we would have to decrement all
         * the indices on the drm_plane after us in the plane_list.
         */

        list_del(&plane->head);
        dev->mode_config.num_total_plane--;

        WARN_ON(plane->state && !plane->funcs->atomic_destroy_state);
        if (plane->state && plane->funcs->atomic_destroy_state)
                plane->funcs->atomic_destroy_state(plane, plane->state);

        kfree(plane->name);

        memset(plane, 0, sizeof(*plane));
}
EXPORT_SYMBOL(drm_plane_cleanup);

/**
 * drm_plane_from_index - find the registered plane at an index
 * @dev: DRM device
 * @idx: index of registered plane to find for
 *
 * Given a plane index, return the registered plane from DRM device's
 * list of planes with matching index. This is the inverse of drm_plane_index().
 */
struct drm_plane *
drm_plane_from_index(struct drm_device *dev, int idx)
{
        struct drm_plane *plane;

        drm_for_each_plane(plane, dev)
                if (idx == plane->index)
                        return plane;

        return NULL;
}
EXPORT_SYMBOL(drm_plane_from_index);

/**
 * drm_plane_force_disable - Forcibly disable a plane
 * @plane: plane to disable
 *
 * Forces the plane to be disabled.
 *
 * Used when the plane's current framebuffer is destroyed,
 * and when restoring fbdev mode.
 *
 * Note that this function is not suitable for atomic drivers, since it doesn't
 * wire through the lock acquisition context properly and hence can't handle
 * retries or driver private locks. You probably want to use
 * drm_atomic_helper_disable_plane() or
 * drm_atomic_helper_disable_planes_on_crtc() instead.
 */
void drm_plane_force_disable(struct drm_plane *plane)
{
        int ret;

        if (!plane->fb)
                return;

        WARN_ON(drm_drv_uses_atomic_modeset(plane->dev));

        plane->old_fb = plane->fb;
        ret = plane->funcs->disable_plane(plane, NULL);
        if (ret) {
                DRM_ERROR("failed to disable plane with busy fb\n");
                plane->old_fb = NULL;
                return;
        }
        /* disconnect the plane from the fb and crtc: */
        drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;
        plane->fb = NULL;
        plane->crtc = NULL;
}
EXPORT_SYMBOL(drm_plane_force_disable);

/**
 * drm_mode_plane_set_obj_prop - set the value of a property
 * @plane: drm plane object to set property value for
 * @property: property to set
 * @value: value the property should be set to
 *
 * This functions sets a given property on a given plane object. This function
 * calls the driver's ->set_property callback and changes the software state of
 * the property if the callback succeeds.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_mode_plane_set_obj_prop(struct drm_plane *plane,
                                struct drm_property *property,
                                uint64_t value)
{
        int ret = -EINVAL;
        struct drm_mode_object *obj = &plane->base;

        if (plane->funcs->set_property)
                ret = plane->funcs->set_property(plane, property, value);
        if (!ret)
                drm_object_property_set_value(obj, property, value);

        return ret;
}
EXPORT_SYMBOL(drm_mode_plane_set_obj_prop);

int drm_mode_getplane_res(struct drm_device *dev, void *data,
                          struct drm_file *file_priv)
{
        struct drm_mode_get_plane_res *plane_resp = data;
        struct drm_plane *plane;
        uint32_t __user *plane_ptr;
        int count = 0;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        plane_ptr = u64_to_user_ptr(plane_resp->plane_id_ptr);

        /*
         * This ioctl is called twice, once to determine how much space is
         * needed, and the 2nd time to fill it.
         */
        drm_for_each_plane(plane, dev) {
                /*
                 * Unless userspace set the 'universal planes'
                 * capability bit, only advertise overlays.
                 */
                if (plane->type != DRM_PLANE_TYPE_OVERLAY &&
                    !file_priv->universal_planes)
                        continue;

                /*
                 * If we're running on a virtualized driver then,
                 * unless userspace advertizes support for the
                 * virtualized cursor plane, disable cursor planes
                 * because they'll be broken due to missing cursor
                 * hotspot info.
                 */
                if (plane->type == DRM_PLANE_TYPE_CURSOR &&
                    drm_core_check_feature(dev, DRIVER_CURSOR_HOTSPOT) &&
                    file_priv->atomic &&
                    !file_priv->supports_virtualized_cursor_plane)
                        continue;

                if (drm_lease_held(file_priv, plane->base.id)) {
                        if (count < plane_resp->count_planes &&
                            put_user(plane->base.id, plane_ptr + count))
                                return -EFAULT;
                        count++;
                }
        }
        plane_resp->count_planes = count;

        return 0;
}

int drm_mode_getplane(struct drm_device *dev, void *data,
                      struct drm_file *file_priv)
{
        struct drm_mode_get_plane *plane_resp = data;
        struct drm_plane *plane;
        uint32_t __user *format_ptr;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        plane = drm_plane_find(dev, file_priv, plane_resp->plane_id);
        if (!plane)
                return -ENOENT;

        drm_modeset_lock(&plane->mutex, NULL);
        if (plane->state && plane->state->crtc && drm_lease_held(file_priv, plane->state->crtc->base.id))
                plane_resp->crtc_id = plane->state->crtc->base.id;
        else if (!plane->state && plane->crtc && drm_lease_held(file_priv, plane->crtc->base.id))
                plane_resp->crtc_id = plane->crtc->base.id;
        else
                plane_resp->crtc_id = 0;

        if (plane->state && plane->state->fb)
                plane_resp->fb_id = plane->state->fb->base.id;
        else if (!plane->state && plane->fb)
                plane_resp->fb_id = plane->fb->base.id;
        else
                plane_resp->fb_id = 0;
        drm_modeset_unlock(&plane->mutex);

        plane_resp->plane_id = plane->base.id;
        plane_resp->possible_crtcs = drm_lease_filter_crtcs(file_priv,
                                                            plane->possible_crtcs);

        plane_resp->gamma_size = 0;

        /*
         * This ioctl is called twice, once to determine how much space is
         * needed, and the 2nd time to fill it.
         */
        if (plane->format_count &&
            (plane_resp->count_format_types >= plane->format_count)) {
                format_ptr = (uint32_t __user *)(unsigned long)plane_resp->format_type_ptr;
                if (copy_to_user(format_ptr,
                                 plane->format_types,
                                 sizeof(uint32_t) * plane->format_count)) {
                        return -EFAULT;
                }
        }
        plane_resp->count_format_types = plane->format_count;

        return 0;
}

int drm_plane_check_pixel_format(struct drm_plane *plane,
                                 u32 format, u64 modifier)
{
        unsigned int i;

        for (i = 0; i < plane->format_count; i++) {
                if (format == plane->format_types[i])
                        break;
        }
        if (i == plane->format_count)
                return -EINVAL;

        if (plane->funcs->format_mod_supported) {
                if (!plane->funcs->format_mod_supported(plane, format, modifier))
                        return -EINVAL;
        } else {
                if (!plane->modifier_count)
                        return 0;

                for (i = 0; i < plane->modifier_count; i++) {
                        if (modifier == plane->modifiers[i])
                                break;
                }
                if (i == plane->modifier_count)
                        return -EINVAL;
        }

        return 0;
}

static int __setplane_check(struct drm_plane *plane,
                            struct drm_crtc *crtc,
                            struct drm_framebuffer *fb,
                            int32_t crtc_x, int32_t crtc_y,
                            uint32_t crtc_w, uint32_t crtc_h,
                            uint32_t src_x, uint32_t src_y,
                            uint32_t src_w, uint32_t src_h)
{
        int ret;

        /* Check whether this plane is usable on this CRTC */
        if (!(plane->possible_crtcs & drm_crtc_mask(crtc))) {
                DRM_DEBUG_KMS("Invalid crtc for plane\n");
                return -EINVAL;
        }

        /* Check whether this plane supports the fb pixel format. */
        ret = drm_plane_check_pixel_format(plane, fb->format->format,
                                           fb->modifier);
        if (ret) {
                DRM_DEBUG_KMS("Invalid pixel format %p4cc, modifier 0x%llx\n",
                              &fb->format->format, fb->modifier);
                return ret;
        }

        /* Give drivers some help against integer overflows */
        if (crtc_w > INT_MAX ||
            crtc_x > INT_MAX - (int32_t) crtc_w ||
            crtc_h > INT_MAX ||
            crtc_y > INT_MAX - (int32_t) crtc_h) {
                DRM_DEBUG_KMS("Invalid CRTC coordinates %ux%u+%d+%d\n",
                              crtc_w, crtc_h, crtc_x, crtc_y);
                return -ERANGE;
        }

        ret = drm_framebuffer_check_src_coords(src_x, src_y, src_w, src_h, fb);
        if (ret)
                return ret;

        return 0;
}

/**
 * drm_any_plane_has_format - Check whether any plane supports this format and modifier combination
 * @dev: DRM device
 * @format: pixel format (DRM_FORMAT_*)
 * @modifier: data layout modifier
 *
 * Returns:
 * Whether at least one plane supports the specified format and modifier combination.
 */
bool drm_any_plane_has_format(struct drm_device *dev,
                              u32 format, u64 modifier)
{
        struct drm_plane *plane;

        drm_for_each_plane(plane, dev) {
                if (drm_plane_check_pixel_format(plane, format, modifier) == 0)
                        return true;
        }

        return false;
}
EXPORT_SYMBOL(drm_any_plane_has_format);

/*
 * __setplane_internal - setplane handler for internal callers
 *
 * This function will take a reference on the new fb for the plane
 * on success.
 *
 * src_{x,y,w,h} are provided in 16.16 fixed point format
 */
static int __setplane_internal(struct drm_plane *plane,
                               struct drm_crtc *crtc,
                               struct drm_framebuffer *fb,
                               int32_t crtc_x, int32_t crtc_y,
                               uint32_t crtc_w, uint32_t crtc_h,
                               /* src_{x,y,w,h} values are 16.16 fixed point */
                               uint32_t src_x, uint32_t src_y,
                               uint32_t src_w, uint32_t src_h,
                               struct drm_modeset_acquire_ctx *ctx)
{
        int ret = 0;

        WARN_ON(drm_drv_uses_atomic_modeset(plane->dev));

        /* No fb means shut it down */
        if (!fb) {
                plane->old_fb = plane->fb;
                ret = plane->funcs->disable_plane(plane, ctx);
                if (!ret) {
                        plane->crtc = NULL;
                        plane->fb = NULL;
                } else {
                        plane->old_fb = NULL;
                }
                goto out;
        }

        ret = __setplane_check(plane, crtc, fb,
                               crtc_x, crtc_y, crtc_w, crtc_h,
                               src_x, src_y, src_w, src_h);
        if (ret)
                goto out;

        plane->old_fb = plane->fb;
        ret = plane->funcs->update_plane(plane, crtc, fb,
                                         crtc_x, crtc_y, crtc_w, crtc_h,
                                         src_x, src_y, src_w, src_h, ctx);
        if (!ret) {
                plane->crtc = crtc;
                plane->fb = fb;
                drm_framebuffer_get(plane->fb);
        } else {
                plane->old_fb = NULL;
        }

out:
        if (plane->old_fb)
                drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;

        return ret;
}

static int __setplane_atomic(struct drm_plane *plane,
                             struct drm_crtc *crtc,
                             struct drm_framebuffer *fb,
                             int32_t crtc_x, int32_t crtc_y,
                             uint32_t crtc_w, uint32_t crtc_h,
                             uint32_t src_x, uint32_t src_y,
                             uint32_t src_w, uint32_t src_h,
                             struct drm_modeset_acquire_ctx *ctx)
{
        int ret;

        WARN_ON(!drm_drv_uses_atomic_modeset(plane->dev));

        /* No fb means shut it down */
        if (!fb)
                return plane->funcs->disable_plane(plane, ctx);

        /*
         * FIXME: This is redundant with drm_atomic_plane_check(),
         * but the legacy cursor/"async" .update_plane() tricks
         * don't call that so we still need this here. Should remove
         * this when all .update_plane() implementations have been
         * fixed to call drm_atomic_plane_check().
         */
        ret = __setplane_check(plane, crtc, fb,
                               crtc_x, crtc_y, crtc_w, crtc_h,
                               src_x, src_y, src_w, src_h);
        if (ret)
                return ret;

        return plane->funcs->update_plane(plane, crtc, fb,
                                          crtc_x, crtc_y, crtc_w, crtc_h,
                                          src_x, src_y, src_w, src_h, ctx);
}

static int setplane_internal(struct drm_plane *plane,
                             struct drm_crtc *crtc,
                             struct drm_framebuffer *fb,
                             int32_t crtc_x, int32_t crtc_y,
                             uint32_t crtc_w, uint32_t crtc_h,
                             /* src_{x,y,w,h} values are 16.16 fixed point */
                             uint32_t src_x, uint32_t src_y,
                             uint32_t src_w, uint32_t src_h)
{
        struct drm_modeset_acquire_ctx ctx;
        int ret;

        DRM_MODESET_LOCK_ALL_BEGIN(plane->dev, ctx,
                                   DRM_MODESET_ACQUIRE_INTERRUPTIBLE, ret);

        if (drm_drv_uses_atomic_modeset(plane->dev))
                ret = __setplane_atomic(plane, crtc, fb,
                                        crtc_x, crtc_y, crtc_w, crtc_h,
                                        src_x, src_y, src_w, src_h, &ctx);
        else
                ret = __setplane_internal(plane, crtc, fb,
                                          crtc_x, crtc_y, crtc_w, crtc_h,
                                          src_x, src_y, src_w, src_h, &ctx);

        DRM_MODESET_LOCK_ALL_END(plane->dev, ctx, ret);

        return ret;
}

int drm_mode_setplane(struct drm_device *dev, void *data,
                      struct drm_file *file_priv)
{
        struct drm_mode_set_plane *plane_req = data;
        struct drm_plane *plane;
        struct drm_crtc *crtc = NULL;
        struct drm_framebuffer *fb = NULL;
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        /*
         * First, find the plane, crtc, and fb objects.  If not available,
         * we don't bother to call the driver.
         */
        plane = drm_plane_find(dev, file_priv, plane_req->plane_id);
        if (!plane) {
                DRM_DEBUG_KMS("Unknown plane ID %d\n",
                              plane_req->plane_id);
                return -ENOENT;
        }

        if (plane_req->fb_id) {
                fb = drm_framebuffer_lookup(dev, file_priv, plane_req->fb_id);
                if (!fb) {
                        DRM_DEBUG_KMS("Unknown framebuffer ID %d\n",
                                      plane_req->fb_id);
                        return -ENOENT;
                }

                crtc = drm_crtc_find(dev, file_priv, plane_req->crtc_id);
                if (!crtc) {
                        drm_framebuffer_put(fb);
                        DRM_DEBUG_KMS("Unknown crtc ID %d\n",
                                      plane_req->crtc_id);
                        return -ENOENT;
                }
        }

        ret = setplane_internal(plane, crtc, fb,
                                plane_req->crtc_x, plane_req->crtc_y,
                                plane_req->crtc_w, plane_req->crtc_h,
                                plane_req->src_x, plane_req->src_y,
                                plane_req->src_w, plane_req->src_h);

        if (fb)
                drm_framebuffer_put(fb);

        return ret;
}

static int drm_mode_cursor_universal(struct drm_crtc *crtc,
                                     struct drm_mode_cursor2 *req,
                                     struct drm_file *file_priv,
                                     struct drm_modeset_acquire_ctx *ctx)
{
        struct drm_device *dev = crtc->dev;
        struct drm_plane *plane = crtc->cursor;
        struct drm_framebuffer *fb = NULL;
        struct drm_mode_fb_cmd2 fbreq = {
                .width = req->width,
                .height = req->height,
                .pixel_format = DRM_FORMAT_ARGB8888,
                .pitches = { req->width * 4 },
                .handles = { req->handle },
        };
        int32_t crtc_x, crtc_y;
        uint32_t crtc_w = 0, crtc_h = 0;
        uint32_t src_w = 0, src_h = 0;
        int ret = 0;

        BUG_ON(!plane);
        WARN_ON(plane->crtc != crtc && plane->crtc != NULL);

        /*
         * Obtain fb we'll be using (either new or existing) and take an extra
         * reference to it if fb != null.  setplane will take care of dropping
         * the reference if the plane update fails.
         */
        if (req->flags & DRM_MODE_CURSOR_BO) {
                if (req->handle) {
                        fb = drm_internal_framebuffer_create(dev, &fbreq, file_priv);
                        if (IS_ERR(fb)) {
                                DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n");
                                return PTR_ERR(fb);
                        }

                        if (plane->hotspot_x_property && plane->state)
                                plane->state->hotspot_x = req->hot_x;
                        if (plane->hotspot_y_property && plane->state)
                                plane->state->hotspot_y = req->hot_y;
                } else {
                        fb = NULL;
                }
        } else {
                if (plane->state)
                        fb = plane->state->fb;
                else
                        fb = plane->fb;

                if (fb)
                        drm_framebuffer_get(fb);
        }

        if (req->flags & DRM_MODE_CURSOR_MOVE) {
                crtc_x = req->x;
                crtc_y = req->y;
        } else {
                crtc_x = crtc->cursor_x;
                crtc_y = crtc->cursor_y;
        }

        if (fb) {
                crtc_w = fb->width;
                crtc_h = fb->height;
                src_w = fb->width << 16;
                src_h = fb->height << 16;
        }

        if (drm_drv_uses_atomic_modeset(dev))
                ret = __setplane_atomic(plane, crtc, fb,
                                        crtc_x, crtc_y, crtc_w, crtc_h,
                                        0, 0, src_w, src_h, ctx);
        else
                ret = __setplane_internal(plane, crtc, fb,
                                          crtc_x, crtc_y, crtc_w, crtc_h,
                                          0, 0, src_w, src_h, ctx);

        if (fb)
                drm_framebuffer_put(fb);

        /* Update successful; save new cursor position, if necessary */
        if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
                crtc->cursor_x = req->x;
                crtc->cursor_y = req->y;
        }

        return ret;
}

static int drm_mode_cursor_common(struct drm_device *dev,
                                  struct drm_mode_cursor2 *req,
                                  struct drm_file *file_priv)
{
        struct drm_crtc *crtc;
        struct drm_modeset_acquire_ctx ctx;
        int ret = 0;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
                return -EINVAL;

        crtc = drm_crtc_find(dev, file_priv, req->crtc_id);
        if (!crtc) {
                DRM_DEBUG_KMS("Unknown CRTC ID %d\n", req->crtc_id);
                return -ENOENT;
        }

        drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
        ret = drm_modeset_lock(&crtc->mutex, &ctx);
        if (ret)
                goto out;
        /*
         * If this crtc has a universal cursor plane, call that plane's update
         * handler rather than using legacy cursor handlers.
         */
        if (crtc->cursor) {
                ret = drm_modeset_lock(&crtc->cursor->mutex, &ctx);
                if (ret)
                        goto out;

                if (!drm_lease_held(file_priv, crtc->cursor->base.id)) {
                        ret = -EACCES;
                        goto out;
                }

                ret = drm_mode_cursor_universal(crtc, req, file_priv, &ctx);
                goto out;
        }

        if (req->flags & DRM_MODE_CURSOR_BO) {
                if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) {
                        ret = -ENXIO;
                        goto out;
                }
                /* Turns off the cursor if handle is 0 */
                if (crtc->funcs->cursor_set2)
                        ret = crtc->funcs->cursor_set2(crtc, file_priv, req->handle,
                                                      req->width, req->height, req->hot_x, req->hot_y);
                else
                        ret = crtc->funcs->cursor_set(crtc, file_priv, req->handle,
                                                      req->width, req->height);
        }

        if (req->flags & DRM_MODE_CURSOR_MOVE) {
                if (crtc->funcs->cursor_move) {
                        ret = crtc->funcs->cursor_move(crtc, req->x, req->y);
                } else {
                        ret = -EFAULT;
                        goto out;
                }
        }
out:
        if (ret == -EDEADLK) {
                ret = drm_modeset_backoff(&ctx);
                if (!ret)
                        goto retry;
        }

        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);

        return ret;

}


int drm_mode_cursor_ioctl(struct drm_device *dev,
                          void *data, struct drm_file *file_priv)
{
        struct drm_mode_cursor *req = data;
        struct drm_mode_cursor2 new_req;

        memcpy(&new_req, req, sizeof(struct drm_mode_cursor));
        new_req.hot_x = new_req.hot_y = 0;

        return drm_mode_cursor_common(dev, &new_req, file_priv);
}

/*
 * Set the cursor configuration based on user request. This implements the 2nd
 * version of the cursor ioctl, which allows userspace to additionally specify
 * the hotspot of the pointer.
 */
int drm_mode_cursor2_ioctl(struct drm_device *dev,
                           void *data, struct drm_file *file_priv)
{
        struct drm_mode_cursor2 *req = data;

        return drm_mode_cursor_common(dev, req, file_priv);
}

int drm_mode_page_flip_ioctl(struct drm_device *dev,
                             void *data, struct drm_file *file_priv)
{
        struct drm_mode_crtc_page_flip_target *page_flip = data;
        struct drm_crtc *crtc;
        struct drm_plane *plane;
        struct drm_framebuffer *fb = NULL, *old_fb;
        struct drm_pending_vblank_event *e = NULL;
        u32 target_vblank = page_flip->sequence;
        struct drm_modeset_acquire_ctx ctx;
        int ret = -EINVAL;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS)
                return -EINVAL;

        if (page_flip->sequence != 0 && !(page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET))
                return -EINVAL;

        /* Only one of the DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags
         * can be specified
         */
        if ((page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) == DRM_MODE_PAGE_FLIP_TARGET)
                return -EINVAL;

        if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip)
                return -EINVAL;

        crtc = drm_crtc_find(dev, file_priv, page_flip->crtc_id);
        if (!crtc)
                return -ENOENT;

        plane = crtc->primary;

        if (!drm_lease_held(file_priv, plane->base.id))
                return -EACCES;

        if (crtc->funcs->page_flip_target) {
                u32 current_vblank;
                int r;

                r = drm_crtc_vblank_get(crtc);
                if (r)
                        return r;

                current_vblank = (u32)drm_crtc_vblank_count(crtc);

                switch (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) {
                case DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE:
                        if ((int)(target_vblank - current_vblank) > 1) {
                                DRM_DEBUG("Invalid absolute flip target %u, "
                                          "must be <= %u\n", target_vblank,
                                          current_vblank + 1);
                                drm_crtc_vblank_put(crtc);
                                return -EINVAL;
                        }
                        break;
                case DRM_MODE_PAGE_FLIP_TARGET_RELATIVE:
                        if (target_vblank != 0 && target_vblank != 1) {
                                DRM_DEBUG("Invalid relative flip target %u, "
                                          "must be 0 or 1\n", target_vblank);
                                drm_crtc_vblank_put(crtc);
                                return -EINVAL;
                        }
                        target_vblank += current_vblank;
                        break;
                default:
                        target_vblank = current_vblank +
                                !(page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC);
                        break;
                }
        } else if (crtc->funcs->page_flip == NULL ||
                   (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET)) {
                return -EINVAL;
        }

        drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
        ret = drm_modeset_lock(&crtc->mutex, &ctx);
        if (ret)
                goto out;
        ret = drm_modeset_lock(&plane->mutex, &ctx);
        if (ret)
                goto out;

        if (plane->state)
                old_fb = plane->state->fb;
        else
                old_fb = plane->fb;

        if (old_fb == NULL) {
                /* The framebuffer is currently unbound, presumably
                 * due to a hotplug event, that userspace has not
                 * yet discovered.
                 */
                ret = -EBUSY;
                goto out;
        }

        fb = drm_framebuffer_lookup(dev, file_priv, page_flip->fb_id);
        if (!fb) {
                ret = -ENOENT;
                goto out;
        }

        if (plane->state) {
                const struct drm_plane_state *state = plane->state;

                ret = drm_framebuffer_check_src_coords(state->src_x,
                                                       state->src_y,
                                                       state->src_w,
                                                       state->src_h,
                                                       fb);
        } else {
                ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y,
                                              &crtc->mode, fb);
        }
        if (ret)
                goto out;

        /*
         * Only check the FOURCC format code, excluding modifiers. This is
         * enough for all legacy drivers. Atomic drivers have their own
         * checks in their ->atomic_check implementation, which will
         * return -EINVAL if any hw or driver constraint is violated due
         * to modifier changes.
         */
        if (old_fb->format->format != fb->format->format) {
                DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n");
                ret = -EINVAL;
                goto out;
        }

        if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
                e = kzalloc(sizeof *e, GFP_KERNEL);
                if (!e) {
                        ret = -ENOMEM;
                        goto out;
                }

                e->event.base.type = DRM_EVENT_FLIP_COMPLETE;
                e->event.base.length = sizeof(e->event);
                e->event.vbl.user_data = page_flip->user_data;
                e->event.vbl.crtc_id = crtc->base.id;

                ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base);
                if (ret) {
                        kfree(e);
                        e = NULL;
                        goto out;
                }
        }

        plane->old_fb = plane->fb;
        if (crtc->funcs->page_flip_target)
                ret = crtc->funcs->page_flip_target(crtc, fb, e,
                                                    page_flip->flags,
                                                    target_vblank,
                                                    &ctx);
        else
                ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags,
                                             &ctx);
        if (ret) {
                if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT)
                        drm_event_cancel_free(dev, &e->base);
                /* Keep the old fb, don't unref it. */
                plane->old_fb = NULL;
        } else {
                if (!plane->state) {
                        plane->fb = fb;
                        drm_framebuffer_get(fb);
                }
        }

out:
        if (fb)
                drm_framebuffer_put(fb);
        fb = NULL;
        if (plane->old_fb)
                drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;

        if (ret == -EDEADLK) {
                ret = drm_modeset_backoff(&ctx);
                if (!ret)
                        goto retry;
        }

        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);

        if (ret && crtc->funcs->page_flip_target)
                drm_crtc_vblank_put(crtc);

        return ret;
}

/**
 * DOC: damage tracking
 *
 * FB_DAMAGE_CLIPS is an optional plane property which provides a means to
 * specify a list of damage rectangles on a plane in framebuffer coordinates of
 * the framebuffer attached to the plane. In current context damage is the area
 * of plane framebuffer that has changed since last plane update (also called
 * page-flip), irrespective of whether currently attached framebuffer is same as
 * framebuffer attached during last plane update or not.
 *
 * FB_DAMAGE_CLIPS is a hint to kernel which could be helpful for some drivers
 * to optimize internally especially for virtual devices where each framebuffer
 * change needs to be transmitted over network, usb, etc.
 *
 * Since FB_DAMAGE_CLIPS is a hint so it is an optional property. User-space can
 * ignore damage clips property and in that case driver will do a full plane
 * update. In case damage clips are provided then it is guaranteed that the area
 * inside damage clips will be updated to plane. For efficiency driver can do
 * full update or can update more than specified in damage clips. Since driver
 * is free to read more, user-space must always render the entire visible
 * framebuffer. Otherwise there can be corruptions. Also, if a user-space
 * provides damage clips which doesn't encompass the actual damage to
 * framebuffer (since last plane update) can result in incorrect rendering.
 *
 * FB_DAMAGE_CLIPS is a blob property with the layout of blob data is simply an
 * array of &drm_mode_rect. Unlike plane &drm_plane_state.src coordinates,
 * damage clips are not in 16.16 fixed point. Similar to plane src in
 * framebuffer, damage clips cannot be negative. In damage clip, x1/y1 are
 * inclusive and x2/y2 are exclusive. While kernel does not error for overlapped
 * damage clips, it is strongly discouraged.
 *
 * Drivers that are interested in damage interface for plane should enable
 * FB_DAMAGE_CLIPS property by calling drm_plane_enable_fb_damage_clips().
 * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and
 * drm_atomic_helper_damage_iter_next() helper iterator function to get damage
 * rectangles clipped to &drm_plane_state.src.
 *
 * Note that there are two types of damage handling: frame damage and buffer
 * damage, the type of damage handling implemented depends on a driver's upload
 * target. Drivers implementing a per-plane or per-CRTC upload target need to
 * handle frame damage, while drivers implementing a per-buffer upload target
 * need to handle buffer damage.
 *
 * The existing damage helpers only support the frame damage type, there is no
 * buffer age support or similar damage accumulation algorithm implemented yet.
 *
 * Only drivers handling frame damage can use the mentioned damage helpers to
 * iterate over the damaged regions. Drivers that handle buffer damage, must set
 * &drm_plane_state.ignore_damage_clips for drm_atomic_helper_damage_iter_init()
 * to know that damage clips should be ignored and return &drm_plane_state.src
 * as the damage rectangle, to force a full plane update.
 *
 * Drivers with a per-buffer upload target could compare the &drm_plane_state.fb
 * of the old and new plane states to determine if the framebuffer attached to a
 * plane has changed or not since the last plane update. If &drm_plane_state.fb
 * has changed, then &drm_plane_state.ignore_damage_clips must be set to true.
 *
 * That is because drivers with a per-plane upload target, expect the backing
 * storage buffer to not change for a given plane. If the upload buffer changes
 * between page flips, the new upload buffer has to be updated as a whole. This
 * can be improved in the future if support for frame damage is added to the DRM
 * damage helpers, similarly to how user-space already handle this case as it is
 * explained in the following documents:
 *
 *     https://registry.khronos.org/EGL/extensions/KHR/EGL_KHR_swap_buffers_with_damage.txt
 *     https://emersion.fr/blog/2019/intro-to-damage-tracking/
 */

/**
 * drm_plane_enable_fb_damage_clips - Enables plane fb damage clips property.
 * @plane: Plane on which to enable damage clips property.
 *
 * This function lets driver to enable the damage clips property on a plane.
 */
void drm_plane_enable_fb_damage_clips(struct drm_plane *plane)
{
        struct drm_device *dev = plane->dev;
        struct drm_mode_config *config = &dev->mode_config;

        drm_object_attach_property(&plane->base, config->prop_fb_damage_clips,
                                   0);
}
EXPORT_SYMBOL(drm_plane_enable_fb_damage_clips);

/**
 * drm_plane_get_damage_clips_count - Returns damage clips count.
 * @state: Plane state.
 *
 * Simple helper to get the number of &drm_mode_rect clips set by user-space
 * during plane update.
 *
 * Return: Number of clips in plane fb_damage_clips blob property.
 */
unsigned int
drm_plane_get_damage_clips_count(const struct drm_plane_state *state)
{
        return (state && state->fb_damage_clips) ?
                state->fb_damage_clips->length/sizeof(struct drm_mode_rect) : 0;
}
EXPORT_SYMBOL(drm_plane_get_damage_clips_count);

struct drm_mode_rect *
__drm_plane_get_damage_clips(const struct drm_plane_state *state)
{
        return (struct drm_mode_rect *)((state && state->fb_damage_clips) ?
                                        state->fb_damage_clips->data : NULL);
}

/**
 * drm_plane_get_damage_clips - Returns damage clips.
 * @state: Plane state.
 *
 * Note that this function returns uapi type &drm_mode_rect. Drivers might want
 * to use the helper functions drm_atomic_helper_damage_iter_init() and
 * drm_atomic_helper_damage_iter_next() or drm_atomic_helper_damage_merged() if
 * the driver can only handle a single damage region at most.
 *
 * Return: Damage clips in plane fb_damage_clips blob property.
 */
struct drm_mode_rect *
drm_plane_get_damage_clips(const struct drm_plane_state *state)
{
        struct drm_device *dev = state->plane->dev;
        struct drm_mode_config *config = &dev->mode_config;

        /* check that drm_plane_enable_fb_damage_clips() was called */
        if (!drm_mode_obj_find_prop_id(&state->plane->base,
                                       config->prop_fb_damage_clips->base.id))
                drm_warn_once(dev, "drm_plane_enable_fb_damage_clips() not called\n");

        return __drm_plane_get_damage_clips(state);
}
EXPORT_SYMBOL(drm_plane_get_damage_clips);

struct drm_property *
drm_create_scaling_filter_prop(struct drm_device *dev,
                               unsigned int supported_filters)
{
        struct drm_property *prop;
        static const struct drm_prop_enum_list props[] = {
                { DRM_SCALING_FILTER_DEFAULT, "Default" },
                { DRM_SCALING_FILTER_NEAREST_NEIGHBOR, "Nearest Neighbor" },
        };
        unsigned int valid_mode_mask = BIT(DRM_SCALING_FILTER_DEFAULT) |
                                       BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR);
        int i;

        if (WARN_ON((supported_filters & ~valid_mode_mask) ||
                    ((supported_filters & BIT(DRM_SCALING_FILTER_DEFAULT)) == 0)))
                return ERR_PTR(-EINVAL);

        prop = drm_property_create(dev, DRM_MODE_PROP_ENUM,
                                   "SCALING_FILTER",
                                   hweight32(supported_filters));
        if (!prop)
                return ERR_PTR(-ENOMEM);

        for (i = 0; i < ARRAY_SIZE(props); i++) {
                int ret;

                if (!(BIT(props[i].type) & supported_filters))
                        continue;

                ret = drm_property_add_enum(prop, props[i].type,
                                            props[i].name);

                if (ret) {
                        drm_property_destroy(dev, prop);

                        return ERR_PTR(ret);
                }
        }

        return prop;
}

/**
 * drm_plane_create_scaling_filter_property - create a new scaling filter
 * property
 *
 * @plane: drm plane
 * @supported_filters: bitmask of supported scaling filters, must include
 *                       BIT(DRM_SCALING_FILTER_DEFAULT).
 *
 * This function lets driver to enable the scaling filter property on a given
 * plane.
 *
 * RETURNS:
 * Zero for success or -errno
 */
int drm_plane_create_scaling_filter_property(struct drm_plane *plane,
                                             unsigned int supported_filters)
{
        struct drm_property *prop =
                drm_create_scaling_filter_prop(plane->dev, supported_filters);

        if (IS_ERR(prop))
                return PTR_ERR(prop);

        drm_object_attach_property(&plane->base, prop,
                                   DRM_SCALING_FILTER_DEFAULT);
        plane->scaling_filter_property = prop;

        return 0;
}
EXPORT_SYMBOL(drm_plane_create_scaling_filter_property);






















































































































































































































































































































































   11 




































































   40 








   40 


   40 






















   40 






































































































































































































































































































































































































   11 





   11 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
// SPDX-License-Identifier: GPL-2.0
/*
 * usb port device code
 *
 * Copyright (C) 2012 Intel Corp
 *
 * Author: Lan Tianyu <tianyu.lan@intel.com>
 */

#include <linux/kstrtox.h>
#include <linux/slab.h>
#include <linux/pm_qos.h>
#include <linux/component.h>
#include <linux/usb/of.h>

#include "hub.h"

static int usb_port_block_power_off;

static const struct attribute_group *port_dev_group[];

static ssize_t early_stop_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);

        return sysfs_emit(buf, "%s\n", port_dev->early_stop ? "yes" : "no");
}

static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t count)
{
        struct usb_port *port_dev = to_usb_port(dev);
        bool value;

        if (kstrtobool(buf, &value))
                return -EINVAL;

        if (value)
                port_dev->early_stop = 1;
        else
                port_dev->early_stop = 0;

        return count;
}
static DEVICE_ATTR_RW(early_stop);

static ssize_t disable_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);
        struct usb_device *hdev = to_usb_device(dev->parent->parent);
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        struct usb_interface *intf = to_usb_interface(dev->parent);
        int port1 = port_dev->portnum;
        u16 portstatus, unused;
        bool disabled;
        int rc;
        struct kernfs_node *kn;

        if (!hub)
                return -ENODEV;
        hub_get(hub);
        rc = usb_autopm_get_interface(intf);
        if (rc < 0)
                goto out_hub_get;

        /*
         * Prevent deadlock if another process is concurrently
         * trying to unregister hdev.
         */
        kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
        if (!kn) {
                rc = -ENODEV;
                goto out_autopm;
        }
        usb_lock_device(hdev);
        if (hub->disconnected) {
                rc = -ENODEV;
                goto out_hdev_lock;
        }

        usb_hub_port_status(hub, port1, &portstatus, &unused);
        disabled = !usb_port_is_power_on(hub, portstatus);

 out_hdev_lock:
        usb_unlock_device(hdev);
        sysfs_unbreak_active_protection(kn);
 out_autopm:
        usb_autopm_put_interface(intf);
 out_hub_get:
        hub_put(hub);

        if (rc)
                return rc;

        return sysfs_emit(buf, "%s\n", disabled ? "1" : "0");
}

static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        struct usb_port *port_dev = to_usb_port(dev);
        struct usb_device *hdev = to_usb_device(dev->parent->parent);
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        struct usb_interface *intf = to_usb_interface(dev->parent);
        int port1 = port_dev->portnum;
        bool disabled;
        int rc;
        struct kernfs_node *kn;

        if (!hub)
                return -ENODEV;
        rc = kstrtobool(buf, &disabled);
        if (rc)
                return rc;

        hub_get(hub);
        rc = usb_autopm_get_interface(intf);
        if (rc < 0)
                goto out_hub_get;

        /*
         * Prevent deadlock if another process is concurrently
         * trying to unregister hdev.
         */
        kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
        if (!kn) {
                rc = -ENODEV;
                goto out_autopm;
        }
        usb_lock_device(hdev);
        if (hub->disconnected) {
                rc = -ENODEV;
                goto out_hdev_lock;
        }

        if (disabled && port_dev->child)
                usb_disconnect(&port_dev->child);

        rc = usb_hub_set_port_power(hdev, hub, port1, !disabled);

        if (disabled) {
                usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION);
                if (!port_dev->is_superspeed)
                        usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE);
        }

        if (!rc)
                rc = count;

 out_hdev_lock:
        usb_unlock_device(hdev);
        sysfs_unbreak_active_protection(kn);
 out_autopm:
        usb_autopm_put_interface(intf);
 out_hub_get:
        hub_put(hub);

        return rc;
}
static DEVICE_ATTR_RW(disable);

static ssize_t location_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);

        return sprintf(buf, "0x%08x\n", port_dev->location);
}
static DEVICE_ATTR_RO(location);

static ssize_t connect_type_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);
        char *result;

        switch (port_dev->connect_type) {
        case USB_PORT_CONNECT_TYPE_HOT_PLUG:
                result = "hotplug";
                break;
        case USB_PORT_CONNECT_TYPE_HARD_WIRED:
                result = "hardwired";
                break;
        case USB_PORT_NOT_USED:
                result = "not used";
                break;
        default:
                result = "unknown";
                break;
        }

        return sprintf(buf, "%s\n", result);
}
static DEVICE_ATTR_RO(connect_type);

static ssize_t state_show(struct device *dev,
                          struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);
        enum usb_device_state state = READ_ONCE(port_dev->state);

        return sysfs_emit(buf, "%s\n", usb_state_string(state));
}
static DEVICE_ATTR_RO(state);

static ssize_t over_current_count_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);

        return sprintf(buf, "%u\n", port_dev->over_current_count);
}
static DEVICE_ATTR_RO(over_current_count);

static ssize_t quirks_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);

        return sprintf(buf, "%08x\n", port_dev->quirks);
}

static ssize_t quirks_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        struct usb_port *port_dev = to_usb_port(dev);
        u32 value;

        if (kstrtou32(buf, 16, &value))
                return -EINVAL;

        port_dev->quirks = value;
        return count;
}
static DEVICE_ATTR_RW(quirks);

static ssize_t usb3_lpm_permit_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        struct usb_port *port_dev = to_usb_port(dev);
        const char *p;

        if (port_dev->usb3_lpm_u1_permit) {
                if (port_dev->usb3_lpm_u2_permit)
                        p = "u1_u2";
                else
                        p = "u1";
        } else {
                if (port_dev->usb3_lpm_u2_permit)
                        p = "u2";
                else
                        p = "0";
        }

        return sprintf(buf, "%s\n", p);
}

static ssize_t usb3_lpm_permit_store(struct device *dev,
                               struct device_attribute *attr,
                               const char *buf, size_t count)
{
        struct usb_port *port_dev = to_usb_port(dev);
        struct usb_device *udev = port_dev->child;
        struct usb_hcd *hcd;

        if (!strncmp(buf, "u1_u2", 5)) {
                port_dev->usb3_lpm_u1_permit = 1;
                port_dev->usb3_lpm_u2_permit = 1;

        } else if (!strncmp(buf, "u1", 2)) {
                port_dev->usb3_lpm_u1_permit = 1;
                port_dev->usb3_lpm_u2_permit = 0;

        } else if (!strncmp(buf, "u2", 2)) {
                port_dev->usb3_lpm_u1_permit = 0;
                port_dev->usb3_lpm_u2_permit = 1;

        } else if (!strncmp(buf, "0", 1)) {
                port_dev->usb3_lpm_u1_permit = 0;
                port_dev->usb3_lpm_u2_permit = 0;
        } else
                return -EINVAL;

        /* If device is connected to the port, disable or enable lpm
         * to make new u1 u2 setting take effect immediately.
         */
        if (udev) {
                hcd = bus_to_hcd(udev->bus);
                if (!hcd)
                        return -EINVAL;
                usb_lock_device(udev);
                mutex_lock(hcd->bandwidth_mutex);
                if (!usb_disable_lpm(udev))
                        usb_enable_lpm(udev);
                mutex_unlock(hcd->bandwidth_mutex);
                usb_unlock_device(udev);
        }

        return count;
}
static DEVICE_ATTR_RW(usb3_lpm_permit);

static struct attribute *port_dev_attrs[] = {
        &dev_attr_connect_type.attr,
        &dev_attr_state.attr,
        &dev_attr_location.attr,
        &dev_attr_quirks.attr,
        &dev_attr_over_current_count.attr,
        &dev_attr_disable.attr,
        &dev_attr_early_stop.attr,
        NULL,
};

static const struct attribute_group port_dev_attr_grp = {
        .attrs = port_dev_attrs,
};

static const struct attribute_group *port_dev_group[] = {
        &port_dev_attr_grp,
        NULL,
};

static struct attribute *port_dev_usb3_attrs[] = {
        &dev_attr_usb3_lpm_permit.attr,
        NULL,
};

static const struct attribute_group port_dev_usb3_attr_grp = {
        .attrs = port_dev_usb3_attrs,
};

static const struct attribute_group *port_dev_usb3_group[] = {
        &port_dev_attr_grp,
        &port_dev_usb3_attr_grp,
        NULL,
};

static void usb_port_device_release(struct device *dev)
{
        struct usb_port *port_dev = to_usb_port(dev);

        kfree(port_dev->req);
        kfree(port_dev);
}

#ifdef CONFIG_PM
static int usb_port_runtime_resume(struct device *dev)
{
        struct usb_port *port_dev = to_usb_port(dev);
        struct usb_device *hdev = to_usb_device(dev->parent->parent);
        struct usb_interface *intf = to_usb_interface(dev->parent);
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        struct usb_device *udev = port_dev->child;
        struct usb_port *peer = port_dev->peer;
        int port1 = port_dev->portnum;
        int retval;

        if (!hub)
                return -EINVAL;
        if (hub->in_reset) {
                set_bit(port1, hub->power_bits);
                return 0;
        }

        /*
         * Power on our usb3 peer before this usb2 port to prevent a usb3
         * device from degrading to its usb2 connection
         */
        if (!port_dev->is_superspeed && peer)
                pm_runtime_get_sync(&peer->dev);

        retval = usb_autopm_get_interface(intf);
        if (retval < 0)
                return retval;

        retval = usb_hub_set_port_power(hdev, hub, port1, true);
        msleep(hub_power_on_good_delay(hub));
        if (udev && !retval) {
                /*
                 * Our preference is to simply wait for the port to reconnect,
                 * as that is the lowest latency method to restart the port.
                 * However, there are cases where toggling port power results in
                 * the host port and the device port getting out of sync causing
                 * a link training live lock.  Upon timeout, flag the port as
                 * needing warm reset recovery (to be performed later by
                 * usb_port_resume() as requested via usb_wakeup_notification())
                 */
                if (hub_port_debounce_be_connected(hub, port1) < 0) {
                        dev_dbg(&port_dev->dev, "reconnect timeout\n");
                        if (hub_is_superspeed(hdev))
                                set_bit(port1, hub->warm_reset_bits);
                }

                /* Force the child awake to revalidate after the power loss. */
                if (!test_and_set_bit(port1, hub->child_usage_bits)) {
                        pm_runtime_get_noresume(&port_dev->dev);
                        pm_request_resume(&udev->dev);
                }
        }

        usb_autopm_put_interface(intf);

        return retval;
}

static int usb_port_runtime_suspend(struct device *dev)
{
        struct usb_port *port_dev = to_usb_port(dev);
        struct usb_device *hdev = to_usb_device(dev->parent->parent);
        struct usb_interface *intf = to_usb_interface(dev->parent);
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        struct usb_port *peer = port_dev->peer;
        int port1 = port_dev->portnum;
        int retval;

        if (!hub)
                return -EINVAL;
        if (hub->in_reset)
                return -EBUSY;

        if (dev_pm_qos_flags(&port_dev->dev, PM_QOS_FLAG_NO_POWER_OFF)
                        == PM_QOS_FLAGS_ALL)
                return -EAGAIN;

        if (usb_port_block_power_off)
                return -EBUSY;

        retval = usb_autopm_get_interface(intf);
        if (retval < 0)
                return retval;

        retval = usb_hub_set_port_power(hdev, hub, port1, false);
        usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION);
        if (!port_dev->is_superspeed)
                usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE);
        usb_autopm_put_interface(intf);

        /*
         * Our peer usb3 port may now be able to suspend, so
         * asynchronously queue a suspend request to observe that this
         * usb2 port is now off.
         */
        if (!port_dev->is_superspeed && peer)
                pm_runtime_put(&peer->dev);

        return retval;
}
#endif

static void usb_port_shutdown(struct device *dev)
{
        struct usb_port *port_dev = to_usb_port(dev);

        if (port_dev->child) {
                usb_disable_usb2_hardware_lpm(port_dev->child);
                usb_unlocked_disable_lpm(port_dev->child);
        }
}

static const struct dev_pm_ops usb_port_pm_ops = {
#ifdef CONFIG_PM
        .runtime_suspend =        usb_port_runtime_suspend,
        .runtime_resume =        usb_port_runtime_resume,
#endif
};

const struct device_type usb_port_device_type = {
        .name =                "usb_port",
        .release =        usb_port_device_release,
        .pm =                &usb_port_pm_ops,
};

static struct device_driver usb_port_driver = {
        .name = "usb",
        .owner = THIS_MODULE,
        .shutdown = usb_port_shutdown,
};

static int link_peers(struct usb_port *left, struct usb_port *right)
{
        struct usb_port *ss_port, *hs_port;
        int rc;

        if (left->peer == right && right->peer == left)
                return 0;

        if (left->peer || right->peer) {
                struct usb_port *lpeer = left->peer;
                struct usb_port *rpeer = right->peer;
                char *method;

                if (left->location && left->location == right->location)
                        method = "location";
                else
                        method = "default";

                pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n",
                        dev_name(&left->dev), dev_name(&right->dev), method,
                        dev_name(&left->dev),
                        lpeer ? dev_name(&lpeer->dev) : "none",
                        dev_name(&right->dev),
                        rpeer ? dev_name(&rpeer->dev) : "none");
                return -EBUSY;
        }

        rc = sysfs_create_link(&left->dev.kobj, &right->dev.kobj, "peer");
        if (rc)
                return rc;
        rc = sysfs_create_link(&right->dev.kobj, &left->dev.kobj, "peer");
        if (rc) {
                sysfs_remove_link(&left->dev.kobj, "peer");
                return rc;
        }

        /*
         * We need to wake the HiSpeed port to make sure we don't race
         * setting ->peer with usb_port_runtime_suspend().  Otherwise we
         * may miss a suspend event for the SuperSpeed port.
         */
        if (left->is_superspeed) {
                ss_port = left;
                WARN_ON(right->is_superspeed);
                hs_port = right;
        } else {
                ss_port = right;
                WARN_ON(!right->is_superspeed);
                hs_port = left;
        }
        pm_runtime_get_sync(&hs_port->dev);

        left->peer = right;
        right->peer = left;

        /*
         * The SuperSpeed reference is dropped when the HiSpeed port in
         * this relationship suspends, i.e. when it is safe to allow a
         * SuperSpeed connection to drop since there is no risk of a
         * device degrading to its powered-off HiSpeed connection.
         *
         * Also, drop the HiSpeed ref taken above.
         */
        pm_runtime_get_sync(&ss_port->dev);
        pm_runtime_put(&hs_port->dev);

        return 0;
}

static void link_peers_report(struct usb_port *left, struct usb_port *right)
{
        int rc;

        rc = link_peers(left, right);
        if (rc == 0) {
                dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev));
        } else {
                dev_dbg(&left->dev, "failed to peer to %s (%d)\n",
                                dev_name(&right->dev), rc);
                pr_warn_once("usb: port power management may be unreliable\n");
                usb_port_block_power_off = 1;
        }
}

static void unlink_peers(struct usb_port *left, struct usb_port *right)
{
        struct usb_port *ss_port, *hs_port;

        WARN(right->peer != left || left->peer != right,
                        "%s and %s are not peers?\n",
                        dev_name(&left->dev), dev_name(&right->dev));

        /*
         * We wake the HiSpeed port to make sure we don't race its
         * usb_port_runtime_resume() event which takes a SuperSpeed ref
         * when ->peer is !NULL.
         */
        if (left->is_superspeed) {
                ss_port = left;
                hs_port = right;
        } else {
                ss_port = right;
                hs_port = left;
        }

        pm_runtime_get_sync(&hs_port->dev);

        sysfs_remove_link(&left->dev.kobj, "peer");
        right->peer = NULL;
        sysfs_remove_link(&right->dev.kobj, "peer");
        left->peer = NULL;

        /* Drop the SuperSpeed ref held on behalf of the active HiSpeed port */
        pm_runtime_put(&ss_port->dev);

        /* Drop the ref taken above */
        pm_runtime_put(&hs_port->dev);
}

/*
 * For each usb hub device in the system check to see if it is in the
 * peer domain of the given port_dev, and if it is check to see if it
 * has a port that matches the given port by location
 */
static int match_location(struct usb_device *peer_hdev, void *p)
{
        int port1;
        struct usb_hcd *hcd, *peer_hcd;
        struct usb_port *port_dev = p, *peer;
        struct usb_hub *peer_hub = usb_hub_to_struct_hub(peer_hdev);
        struct usb_device *hdev = to_usb_device(port_dev->dev.parent->parent);

        if (!peer_hub || port_dev->connect_type == USB_PORT_NOT_USED)
                return 0;

        hcd = bus_to_hcd(hdev->bus);
        peer_hcd = bus_to_hcd(peer_hdev->bus);
        /* peer_hcd is provisional until we verify it against the known peer */
        if (peer_hcd != hcd->shared_hcd)
                return 0;

        for (port1 = 1; port1 <= peer_hdev->maxchild; port1++) {
                peer = peer_hub->ports[port1 - 1];
                if (peer && peer->connect_type != USB_PORT_NOT_USED &&
                    peer->location == port_dev->location) {
                        link_peers_report(port_dev, peer);
                        return 1; /* done */
                }
        }

        return 0;
}

/*
 * Find the peer port either via explicit platform firmware "location"
 * data, the peer hcd for root hubs, or the upstream peer relationship
 * for all other hubs.
 */
static void find_and_link_peer(struct usb_hub *hub, int port1)
{
        struct usb_port *port_dev = hub->ports[port1 - 1], *peer;
        struct usb_device *hdev = hub->hdev;
        struct usb_device *peer_hdev;
        struct usb_hub *peer_hub;

        /*
         * If location data is available then we can only peer this port
         * by a location match, not the default peer (lest we create a
         * situation where we need to go back and undo a default peering
         * when the port is later peered by location data)
         */
        if (port_dev->location) {
                /* we link the peer in match_location() if found */
                usb_for_each_dev(port_dev, match_location);
                return;
        } else if (!hdev->parent) {
                struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
                struct usb_hcd *peer_hcd = hcd->shared_hcd;

                if (!peer_hcd)
                        return;

                peer_hdev = peer_hcd->self.root_hub;
        } else {
                struct usb_port *upstream;
                struct usb_device *parent = hdev->parent;
                struct usb_hub *parent_hub = usb_hub_to_struct_hub(parent);

                if (!parent_hub)
                        return;

                upstream = parent_hub->ports[hdev->portnum - 1];
                if (!upstream || !upstream->peer)
                        return;

                peer_hdev = upstream->peer->child;
        }

        peer_hub = usb_hub_to_struct_hub(peer_hdev);
        if (!peer_hub || port1 > peer_hdev->maxchild)
                return;

        /*
         * we found a valid default peer, last check is to make sure it
         * does not have location data
         */
        peer = peer_hub->ports[port1 - 1];
        if (peer && peer->location == 0)
                link_peers_report(port_dev, peer);
}

static int connector_bind(struct device *dev, struct device *connector, void *data)
{
        struct usb_port *port_dev = to_usb_port(dev);
        int ret;

        ret = sysfs_create_link(&dev->kobj, &connector->kobj, "connector");
        if (ret)
                return ret;

        ret = sysfs_create_link(&connector->kobj, &dev->kobj, dev_name(dev));
        if (ret) {
                sysfs_remove_link(&dev->kobj, "connector");
                return ret;
        }

        port_dev->connector = data;

        /*
         * If there is already USB device connected to the port, letting the
         * Type-C connector know about it immediately.
         */
        if (port_dev->child)
                typec_attach(port_dev->connector, &port_dev->child->dev);

        return 0;
}

static void connector_unbind(struct device *dev, struct device *connector, void *data)
{
        struct usb_port *port_dev = to_usb_port(dev);

        sysfs_remove_link(&connector->kobj, dev_name(dev));
        sysfs_remove_link(&dev->kobj, "connector");
        port_dev->connector = NULL;
}

static const struct component_ops connector_ops = {
        .bind = connector_bind,
        .unbind = connector_unbind,
};

int usb_hub_create_port_device(struct usb_hub *hub, int port1)
{
        struct usb_port *port_dev;
        struct usb_device *hdev = hub->hdev;
        int retval;

        port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL);
        if (!port_dev)
                return -ENOMEM;

        port_dev->req = kzalloc(sizeof(*(port_dev->req)), GFP_KERNEL);
        if (!port_dev->req) {
                kfree(port_dev);
                return -ENOMEM;
        }

        port_dev->connect_type = usb_of_get_connect_type(hdev, port1);
        hub->ports[port1 - 1] = port_dev;
        port_dev->portnum = port1;
        set_bit(port1, hub->power_bits);
        port_dev->dev.parent = hub->intfdev;
        if (hub_is_superspeed(hdev)) {
                port_dev->is_superspeed = 1;
                port_dev->usb3_lpm_u1_permit = 1;
                port_dev->usb3_lpm_u2_permit = 1;
                port_dev->dev.groups = port_dev_usb3_group;
        } else
                port_dev->dev.groups = port_dev_group;
        port_dev->dev.type = &usb_port_device_type;
        port_dev->dev.driver = &usb_port_driver;
        dev_set_name(&port_dev->dev, "%s-port%d", dev_name(&hub->hdev->dev),
                        port1);
        mutex_init(&port_dev->status_lock);
        retval = device_register(&port_dev->dev);
        if (retval) {
                put_device(&port_dev->dev);
                return retval;
        }

        port_dev->state_kn = sysfs_get_dirent(port_dev->dev.kobj.sd, "state");
        if (!port_dev->state_kn) {
                dev_err(&port_dev->dev, "failed to sysfs_get_dirent 'state'\n");
                retval = -ENODEV;
                goto err_unregister;
        }

        /* Set default policy of port-poweroff disabled. */
        retval = dev_pm_qos_add_request(&port_dev->dev, port_dev->req,
                        DEV_PM_QOS_FLAGS, PM_QOS_FLAG_NO_POWER_OFF);
        if (retval < 0) {
                goto err_put_kn;
        }

        retval = component_add(&port_dev->dev, &connector_ops);
        if (retval) {
                dev_warn(&port_dev->dev, "failed to add component\n");
                goto err_put_kn;
        }

        find_and_link_peer(hub, port1);

        /*
         * Enable runtime pm and hold a refernce that hub_configure()
         * will drop once the PM_QOS_NO_POWER_OFF flag state has been set
         * and the hub has been fully registered (hdev->maxchild set).
         */
        pm_runtime_set_active(&port_dev->dev);
        pm_runtime_get_noresume(&port_dev->dev);
        pm_runtime_enable(&port_dev->dev);
        device_enable_async_suspend(&port_dev->dev);

        /*
         * Keep hidden the ability to enable port-poweroff if the hub
         * does not support power switching.
         */
        if (!hub_is_port_power_switchable(hub))
                return 0;

        /* Attempt to let userspace take over the policy. */
        retval = dev_pm_qos_expose_flags(&port_dev->dev,
                        PM_QOS_FLAG_NO_POWER_OFF);
        if (retval < 0) {
                dev_warn(&port_dev->dev, "failed to expose pm_qos_no_poweroff\n");
                return 0;
        }

        /* Userspace owns the policy, drop the kernel 'no_poweroff' request. */
        retval = dev_pm_qos_remove_request(port_dev->req);
        if (retval >= 0) {
                kfree(port_dev->req);
                port_dev->req = NULL;
        }
        return 0;

err_put_kn:
        sysfs_put(port_dev->state_kn);
err_unregister:
        device_unregister(&port_dev->dev);

        return retval;
}

void usb_hub_remove_port_device(struct usb_hub *hub, int port1)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_port *peer;

        peer = port_dev->peer;
        if (peer)
                unlink_peers(port_dev, peer);
        component_del(&port_dev->dev, &connector_ops);
        sysfs_put(port_dev->state_kn);
        device_unregister(&port_dev->dev);
}




























































































































































































    5 



















    5 
    5 





    5 




































































































































































































































































































































































































































































































   14 


   14 



   14 
    5 






    5 










































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * NetLabel Unlabeled Support
 *
 * This file defines functions for dealing with unlabeled packets for the
 * NetLabel system.  The NetLabel system manages static and dynamic label
 * mappings for network protocols such as CIPSO and RIPSO.
 *
 * Author: Paul Moore <paul@paul-moore.com>
 */

/*
 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008
 */

#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/audit.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/net_namespace.h>
#include <net/netlabel.h>
#include <asm/bug.h>
#include <linux/atomic.h>

#include "netlabel_user.h"
#include "netlabel_addrlist.h"
#include "netlabel_domainhash.h"
#include "netlabel_unlabeled.h"
#include "netlabel_mgmt.h"

/* NOTE: at present we always use init's network namespace since we don't
 *       presently support different namespaces even though the majority of
 *       the functions in this file are "namespace safe" */

/* The unlabeled connection hash table which we use to map network interfaces
 * and addresses of unlabeled packets to a user specified secid value for the
 * LSM.  The hash table is used to lookup the network interface entry
 * (struct netlbl_unlhsh_iface) and then the interface entry is used to
 * lookup an IP address match from an ordered list.  If a network interface
 * match can not be found in the hash table then the default entry
 * (netlbl_unlhsh_def) is used.  The IP address entry list
 * (struct netlbl_unlhsh_addr) is ordered such that the entries with a
 * larger netmask come first.
 */
struct netlbl_unlhsh_tbl {
        struct list_head *tbl;
        u32 size;
};
#define netlbl_unlhsh_addr4_entry(iter) \
        container_of(iter, struct netlbl_unlhsh_addr4, list)
struct netlbl_unlhsh_addr4 {
        u32 secid;

        struct netlbl_af4list list;
        struct rcu_head rcu;
};
#define netlbl_unlhsh_addr6_entry(iter) \
        container_of(iter, struct netlbl_unlhsh_addr6, list)
struct netlbl_unlhsh_addr6 {
        u32 secid;

        struct netlbl_af6list list;
        struct rcu_head rcu;
};
struct netlbl_unlhsh_iface {
        int ifindex;
        struct list_head addr4_list;
        struct list_head addr6_list;

        u32 valid;
        struct list_head list;
        struct rcu_head rcu;
};

/* Argument struct for netlbl_unlhsh_walk() */
struct netlbl_unlhsh_walk_arg {
        struct netlink_callback *nl_cb;
        struct sk_buff *skb;
        u32 seq;
};

/* Unlabeled connection hash table */
/* updates should be so rare that having one spinlock for the entire
 * hash table should be okay */
static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
#define netlbl_unlhsh_rcu_deref(p) \
        rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock))
static struct netlbl_unlhsh_tbl __rcu *netlbl_unlhsh;
static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;

/* Accept unlabeled packets flag */
static u8 netlabel_unlabel_acceptflg;

/* NetLabel Generic NETLINK unlabeled family */
static struct genl_family netlbl_unlabel_gnl_family;

/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
        [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
        [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY,
                                      .len = sizeof(struct in6_addr) },
        [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY,
                                      .len = sizeof(struct in6_addr) },
        [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY,
                                      .len = sizeof(struct in_addr) },
        [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY,
                                      .len = sizeof(struct in_addr) },
        [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING,
                                   .len = IFNAMSIZ - 1 },
        [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY }
};

/*
 * Unlabeled Connection Hash Table Functions
 */

/**
 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table
 * @entry: the entry's RCU field
 *
 * Description:
 * This function is designed to be used as a callback to the call_rcu()
 * function so that memory allocated to a hash table interface entry can be
 * released safely.  It is important to note that this function does not free
 * the IPv4 and IPv6 address lists contained as part of an interface entry.  It
 * is up to the rest of the code to make sure an interface entry is only freed
 * once it's address lists are empty.
 *
 */
static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
{
        struct netlbl_unlhsh_iface *iface;
        struct netlbl_af4list *iter4;
        struct netlbl_af4list *tmp4;
#if IS_ENABLED(CONFIG_IPV6)
        struct netlbl_af6list *iter6;
        struct netlbl_af6list *tmp6;
#endif /* IPv6 */

        iface = container_of(entry, struct netlbl_unlhsh_iface, rcu);

        /* no need for locks here since we are the only one with access to this
         * structure */

        netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) {
                netlbl_af4list_remove_entry(iter4);
                kfree(netlbl_unlhsh_addr4_entry(iter4));
        }
#if IS_ENABLED(CONFIG_IPV6)
        netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
                netlbl_af6list_remove_entry(iter6);
                kfree(netlbl_unlhsh_addr6_entry(iter6));
        }
#endif /* IPv6 */
        kfree(iface);
}

/**
 * netlbl_unlhsh_hash - Hashing function for the hash table
 * @ifindex: the network interface/device to hash
 *
 * Description:
 * This is the hashing function for the unlabeled hash table, it returns the
 * bucket number for the given device/interface.  The caller is responsible for
 * ensuring that the hash table is protected with either a RCU read lock or
 * the hash table lock.
 *
 */
static u32 netlbl_unlhsh_hash(int ifindex)
{
        return ifindex & (netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->size - 1);
}

/**
 * netlbl_unlhsh_search_iface - Search for a matching interface entry
 * @ifindex: the network interface
 *
 * Description:
 * Searches the unlabeled connection hash table and returns a pointer to the
 * interface entry which matches @ifindex, otherwise NULL is returned.  The
 * caller is responsible for ensuring that the hash table is protected with
 * either a RCU read lock or the hash table lock.
 *
 */
static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex)
{
        u32 bkt;
        struct list_head *bkt_list;
        struct netlbl_unlhsh_iface *iter;

        bkt = netlbl_unlhsh_hash(ifindex);
        bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt];
        list_for_each_entry_rcu(iter, bkt_list, list,
                                lockdep_is_held(&netlbl_unlhsh_lock))
                if (iter->valid && iter->ifindex == ifindex)
                        return iter;

        return NULL;
}

/**
 * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table
 * @iface: the associated interface entry
 * @addr: IPv4 address in network byte order
 * @mask: IPv4 address mask in network byte order
 * @secid: LSM secid value for entry
 *
 * Description:
 * Add a new address entry into the unlabeled connection hash table using the
 * interface entry specified by @iface.  On success zero is returned, otherwise
 * a negative value is returned.
 *
 */
static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
                                   const struct in_addr *addr,
                                   const struct in_addr *mask,
                                   u32 secid)
{
        int ret_val;
        struct netlbl_unlhsh_addr4 *entry;

        entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
        if (entry == NULL)
                return -ENOMEM;

        entry->list.addr = addr->s_addr & mask->s_addr;
        entry->list.mask = mask->s_addr;
        entry->list.valid = 1;
        entry->secid = secid;

        spin_lock(&netlbl_unlhsh_lock);
        ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list);
        spin_unlock(&netlbl_unlhsh_lock);

        if (ret_val != 0)
                kfree(entry);
        return ret_val;
}

#if IS_ENABLED(CONFIG_IPV6)
/**
 * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table
 * @iface: the associated interface entry
 * @addr: IPv6 address in network byte order
 * @mask: IPv6 address mask in network byte order
 * @secid: LSM secid value for entry
 *
 * Description:
 * Add a new address entry into the unlabeled connection hash table using the
 * interface entry specified by @iface.  On success zero is returned, otherwise
 * a negative value is returned.
 *
 */
static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
                                   const struct in6_addr *addr,
                                   const struct in6_addr *mask,
                                   u32 secid)
{
        int ret_val;
        struct netlbl_unlhsh_addr6 *entry;

        entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
        if (entry == NULL)
                return -ENOMEM;

        entry->list.addr = *addr;
        entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
        entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
        entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
        entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
        entry->list.mask = *mask;
        entry->list.valid = 1;
        entry->secid = secid;

        spin_lock(&netlbl_unlhsh_lock);
        ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list);
        spin_unlock(&netlbl_unlhsh_lock);

        if (ret_val != 0)
                kfree(entry);
        return 0;
}
#endif /* IPv6 */

/**
 * netlbl_unlhsh_add_iface - Adds a new interface entry to the hash table
 * @ifindex: network interface
 *
 * Description:
 * Add a new, empty, interface entry into the unlabeled connection hash table.
 * On success a pointer to the new interface entry is returned, on failure NULL
 * is returned.
 *
 */
static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
{
        u32 bkt;
        struct netlbl_unlhsh_iface *iface;

        iface = kzalloc(sizeof(*iface), GFP_ATOMIC);
        if (iface == NULL)
                return NULL;

        iface->ifindex = ifindex;
        INIT_LIST_HEAD(&iface->addr4_list);
        INIT_LIST_HEAD(&iface->addr6_list);
        iface->valid = 1;

        spin_lock(&netlbl_unlhsh_lock);
        if (ifindex > 0) {
                bkt = netlbl_unlhsh_hash(ifindex);
                if (netlbl_unlhsh_search_iface(ifindex) != NULL)
                        goto add_iface_failure;
                list_add_tail_rcu(&iface->list,
                             &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]);
        } else {
                INIT_LIST_HEAD(&iface->list);
                if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL)
                        goto add_iface_failure;
                rcu_assign_pointer(netlbl_unlhsh_def, iface);
        }
        spin_unlock(&netlbl_unlhsh_lock);

        return iface;

add_iface_failure:
        spin_unlock(&netlbl_unlhsh_lock);
        kfree(iface);
        return NULL;
}

/**
 * netlbl_unlhsh_add - Adds a new entry to the unlabeled connection hash table
 * @net: network namespace
 * @dev_name: interface name
 * @addr: IP address in network byte order
 * @mask: address mask in network byte order
 * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6)
 * @secid: LSM secid value for the entry
 * @audit_info: NetLabel audit information
 *
 * Description:
 * Adds a new entry to the unlabeled connection hash table.  Returns zero on
 * success, negative values on failure.
 *
 */
int netlbl_unlhsh_add(struct net *net,
                      const char *dev_name,
                      const void *addr,
                      const void *mask,
                      u32 addr_len,
                      u32 secid,
                      struct netlbl_audit *audit_info)
{
        int ret_val;
        int ifindex;
        struct net_device *dev;
        struct netlbl_unlhsh_iface *iface;
        struct audit_buffer *audit_buf = NULL;
        char *secctx = NULL;
        u32 secctx_len;

        if (addr_len != sizeof(struct in_addr) &&
            addr_len != sizeof(struct in6_addr))
                return -EINVAL;

        rcu_read_lock();
        if (dev_name != NULL) {
                dev = dev_get_by_name_rcu(net, dev_name);
                if (dev == NULL) {
                        ret_val = -ENODEV;
                        goto unlhsh_add_return;
                }
                ifindex = dev->ifindex;
                iface = netlbl_unlhsh_search_iface(ifindex);
        } else {
                ifindex = 0;
                iface = rcu_dereference(netlbl_unlhsh_def);
        }
        if (iface == NULL) {
                iface = netlbl_unlhsh_add_iface(ifindex);
                if (iface == NULL) {
                        ret_val = -ENOMEM;
                        goto unlhsh_add_return;
                }
        }
        audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD,
                                              audit_info);
        switch (addr_len) {
        case sizeof(struct in_addr): {
                const struct in_addr *addr4 = addr;
                const struct in_addr *mask4 = mask;

                ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid);
                if (audit_buf != NULL)
                        netlbl_af4list_audit_addr(audit_buf, 1,
                                                  dev_name,
                                                  addr4->s_addr,
                                                  mask4->s_addr);
                break;
        }
#if IS_ENABLED(CONFIG_IPV6)
        case sizeof(struct in6_addr): {
                const struct in6_addr *addr6 = addr;
                const struct in6_addr *mask6 = mask;

                ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid);
                if (audit_buf != NULL)
                        netlbl_af6list_audit_addr(audit_buf, 1,
                                                  dev_name,
                                                  addr6, mask6);
                break;
        }
#endif /* IPv6 */
        default:
                ret_val = -EINVAL;
        }
        if (ret_val == 0)
                atomic_inc(&netlabel_mgmt_protocount);

unlhsh_add_return:
        rcu_read_unlock();
        if (audit_buf != NULL) {
                if (security_secid_to_secctx(secid,
                                             &secctx,
                                             &secctx_len) == 0) {
                        audit_log_format(audit_buf, " sec_obj=%s", secctx);
                        security_release_secctx(secctx, secctx_len);
                }
                audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
                audit_log_end(audit_buf);
        }
        return ret_val;
}

/**
 * netlbl_unlhsh_remove_addr4 - Remove an IPv4 address entry
 * @net: network namespace
 * @iface: interface entry
 * @addr: IP address
 * @mask: IP address mask
 * @audit_info: NetLabel audit information
 *
 * Description:
 * Remove an IP address entry from the unlabeled connection hash table.
 * Returns zero on success, negative values on failure.
 *
 */
static int netlbl_unlhsh_remove_addr4(struct net *net,
                                      struct netlbl_unlhsh_iface *iface,
                                      const struct in_addr *addr,
                                      const struct in_addr *mask,
                                      struct netlbl_audit *audit_info)
{
        struct netlbl_af4list *list_entry;
        struct netlbl_unlhsh_addr4 *entry;
        struct audit_buffer *audit_buf;
        struct net_device *dev;
        char *secctx;
        u32 secctx_len;

        spin_lock(&netlbl_unlhsh_lock);
        list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
                                           &iface->addr4_list);
        spin_unlock(&netlbl_unlhsh_lock);
        if (list_entry != NULL)
                entry = netlbl_unlhsh_addr4_entry(list_entry);
        else
                entry = NULL;

        audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
                                              audit_info);
        if (audit_buf != NULL) {
                dev = dev_get_by_index(net, iface->ifindex);
                netlbl_af4list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr->s_addr, mask->s_addr);
                dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
                        audit_log_format(audit_buf, " sec_obj=%s", secctx);
                        security_release_secctx(secctx, secctx_len);
                }
                audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0);
                audit_log_end(audit_buf);
        }

        if (entry == NULL)
                return -ENOENT;

        kfree_rcu(entry, rcu);
        return 0;
}

#if IS_ENABLED(CONFIG_IPV6)
/**
 * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry
 * @net: network namespace
 * @iface: interface entry
 * @addr: IP address
 * @mask: IP address mask
 * @audit_info: NetLabel audit information
 *
 * Description:
 * Remove an IP address entry from the unlabeled connection hash table.
 * Returns zero on success, negative values on failure.
 *
 */
static int netlbl_unlhsh_remove_addr6(struct net *net,
                                      struct netlbl_unlhsh_iface *iface,
                                      const struct in6_addr *addr,
                                      const struct in6_addr *mask,
                                      struct netlbl_audit *audit_info)
{
        struct netlbl_af6list *list_entry;
        struct netlbl_unlhsh_addr6 *entry;
        struct audit_buffer *audit_buf;
        struct net_device *dev;
        char *secctx;
        u32 secctx_len;

        spin_lock(&netlbl_unlhsh_lock);
        list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list);
        spin_unlock(&netlbl_unlhsh_lock);
        if (list_entry != NULL)
                entry = netlbl_unlhsh_addr6_entry(list_entry);
        else
                entry = NULL;

        audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL,
                                              audit_info);
        if (audit_buf != NULL) {
                dev = dev_get_by_index(net, iface->ifindex);
                netlbl_af6list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr, mask);
                dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
                        audit_log_format(audit_buf, " sec_obj=%s", secctx);
                        security_release_secctx(secctx, secctx_len);
                }
                audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0);
                audit_log_end(audit_buf);
        }

        if (entry == NULL)
                return -ENOENT;

        kfree_rcu(entry, rcu);
        return 0;
}
#endif /* IPv6 */

/**
 * netlbl_unlhsh_condremove_iface - Remove an interface entry
 * @iface: the interface entry
 *
 * Description:
 * Remove an interface entry from the unlabeled connection hash table if it is
 * empty.  An interface entry is considered to be empty if there are no
 * address entries assigned to it.
 *
 */
static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
{
        struct netlbl_af4list *iter4;
#if IS_ENABLED(CONFIG_IPV6)
        struct netlbl_af6list *iter6;
#endif /* IPv6 */

        spin_lock(&netlbl_unlhsh_lock);
        netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
                goto unlhsh_condremove_failure;
#if IS_ENABLED(CONFIG_IPV6)
        netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
                goto unlhsh_condremove_failure;
#endif /* IPv6 */
        iface->valid = 0;
        if (iface->ifindex > 0)
                list_del_rcu(&iface->list);
        else
                RCU_INIT_POINTER(netlbl_unlhsh_def, NULL);
        spin_unlock(&netlbl_unlhsh_lock);

        call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);
        return;

unlhsh_condremove_failure:
        spin_unlock(&netlbl_unlhsh_lock);
}

/**
 * netlbl_unlhsh_remove - Remove an entry from the unlabeled hash table
 * @net: network namespace
 * @dev_name: interface name
 * @addr: IP address in network byte order
 * @mask: address mask in network byte order
 * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6)
 * @audit_info: NetLabel audit information
 *
 * Description:
 * Removes and existing entry from the unlabeled connection hash table.
 * Returns zero on success, negative values on failure.
 *
 */
int netlbl_unlhsh_remove(struct net *net,
                         const char *dev_name,
                         const void *addr,
                         const void *mask,
                         u32 addr_len,
                         struct netlbl_audit *audit_info)
{
        int ret_val;
        struct net_device *dev;
        struct netlbl_unlhsh_iface *iface;

        if (addr_len != sizeof(struct in_addr) &&
            addr_len != sizeof(struct in6_addr))
                return -EINVAL;

        rcu_read_lock();
        if (dev_name != NULL) {
                dev = dev_get_by_name_rcu(net, dev_name);
                if (dev == NULL) {
                        ret_val = -ENODEV;
                        goto unlhsh_remove_return;
                }
                iface = netlbl_unlhsh_search_iface(dev->ifindex);
        } else
                iface = rcu_dereference(netlbl_unlhsh_def);
        if (iface == NULL) {
                ret_val = -ENOENT;
                goto unlhsh_remove_return;
        }
        switch (addr_len) {
        case sizeof(struct in_addr):
                ret_val = netlbl_unlhsh_remove_addr4(net,
                                                     iface, addr, mask,
                                                     audit_info);
                break;
#if IS_ENABLED(CONFIG_IPV6)
        case sizeof(struct in6_addr):
                ret_val = netlbl_unlhsh_remove_addr6(net,
                                                     iface, addr, mask,
                                                     audit_info);
                break;
#endif /* IPv6 */
        default:
                ret_val = -EINVAL;
        }
        if (ret_val == 0) {
                netlbl_unlhsh_condremove_iface(iface);
                atomic_dec(&netlabel_mgmt_protocount);
        }

unlhsh_remove_return:
        rcu_read_unlock();
        return ret_val;
}

/*
 * General Helper Functions
 */

/**
 * netlbl_unlhsh_netdev_handler - Network device notification handler
 * @this: notifier block
 * @event: the event
 * @ptr: the netdevice notifier info (cast to void)
 *
 * Description:
 * Handle network device events, although at present all we care about is a
 * network device going away.  In the case of a device going away we clear any
 * related entries from the unlabeled connection hash table.
 *
 */
static int netlbl_unlhsh_netdev_handler(struct notifier_block *this,
                                        unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netlbl_unlhsh_iface *iface = NULL;

        if (!net_eq(dev_net(dev), &init_net))
                return NOTIFY_DONE;

        /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */
        if (event == NETDEV_DOWN) {
                spin_lock(&netlbl_unlhsh_lock);
                iface = netlbl_unlhsh_search_iface(dev->ifindex);
                if (iface != NULL && iface->valid) {
                        iface->valid = 0;
                        list_del_rcu(&iface->list);
                } else
                        iface = NULL;
                spin_unlock(&netlbl_unlhsh_lock);
        }

        if (iface != NULL)
                call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);

        return NOTIFY_DONE;
}

/**
 * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag
 * @value: desired value
 * @audit_info: NetLabel audit information
 *
 * Description:
 * Set the value of the unlabeled accept flag to @value.
 *
 */
static void netlbl_unlabel_acceptflg_set(u8 value,
                                         struct netlbl_audit *audit_info)
{
        struct audit_buffer *audit_buf;
        u8 old_val;

        old_val = netlabel_unlabel_acceptflg;
        netlabel_unlabel_acceptflg = value;
        audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW,
                                              audit_info);
        if (audit_buf != NULL) {
                audit_log_format(audit_buf,
                                 " unlbl_accept=%u old=%u", value, old_val);
                audit_log_end(audit_buf);
        }
}

/**
 * netlbl_unlabel_addrinfo_get - Get the IPv4/6 address information
 * @info: the Generic NETLINK info block
 * @addr: the IP address
 * @mask: the IP address mask
 * @len: the address length
 *
 * Description:
 * Examine the Generic NETLINK message and extract the IP address information.
 * Returns zero on success, negative values on failure.
 *
 */
static int netlbl_unlabel_addrinfo_get(struct genl_info *info,
                                       void **addr,
                                       void **mask,
                                       u32 *len)
{
        u32 addr_len;

        if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] &&
            info->attrs[NLBL_UNLABEL_A_IPV4MASK]) {
                addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
                if (addr_len != sizeof(struct in_addr) &&
                    addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))
                        return -EINVAL;
                *len = addr_len;
                *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
                *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]);
                return 0;
        } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) {
                addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]);
                if (addr_len != sizeof(struct in6_addr) &&
                    addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK]))
                        return -EINVAL;
                *len = addr_len;
                *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]);
                *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]);
                return 0;
        }

        return -EINVAL;
}

/*
 * NetLabel Command Handlers
 */

/**
 * netlbl_unlabel_accept - Handle an ACCEPT message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated ACCEPT message and set the accept flag accordingly.
 * Returns zero on success, negative values on failure.
 *
 */
static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
{
        u8 value;
        struct netlbl_audit audit_info;

        if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
                value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
                if (value == 1 || value == 0) {
                        netlbl_netlink_auditinfo(&audit_info);
                        netlbl_unlabel_acceptflg_set(value, &audit_info);
                        return 0;
                }
        }

        return -EINVAL;
}

/**
 * netlbl_unlabel_list - Handle a LIST message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated LIST message and respond with the current status.
 * Returns zero on success, negative values on failure.
 *
 */
static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
{
        int ret_val = -EINVAL;
        struct sk_buff *ans_skb;
        void *data;

        ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (ans_skb == NULL)
                goto list_failure;
        data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family,
                                 0, NLBL_UNLABEL_C_LIST);
        if (data == NULL) {
                ret_val = -ENOMEM;
                goto list_failure;
        }

        ret_val = nla_put_u8(ans_skb,
                             NLBL_UNLABEL_A_ACPTFLG,
                             netlabel_unlabel_acceptflg);
        if (ret_val != 0)
                goto list_failure;

        genlmsg_end(ans_skb, data);
        return genlmsg_reply(ans_skb, info);

list_failure:
        kfree_skb(ans_skb);
        return ret_val;
}

/**
 * netlbl_unlabel_staticadd - Handle a STATICADD message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated STATICADD message and add a new unlabeled
 * connection entry to the hash table.  Returns zero on success, negative
 * values on failure.
 *
 */
static int netlbl_unlabel_staticadd(struct sk_buff *skb,
                                    struct genl_info *info)
{
        int ret_val;
        char *dev_name;
        void *addr;
        void *mask;
        u32 addr_len;
        u32 secid;
        struct netlbl_audit audit_info;

        /* Don't allow users to add both IPv4 and IPv6 addresses for a
         * single entry.  However, allow users to create two entries, one each
         * for IPv4 and IPv6, with the same LSM security context which should
         * achieve the same result. */
        if (!info->attrs[NLBL_UNLABEL_A_SECCTX] ||
            !info->attrs[NLBL_UNLABEL_A_IFACE] ||
            !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
              (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
                return -EINVAL;

        netlbl_netlink_auditinfo(&audit_info);

        ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
        if (ret_val != 0)
                return ret_val;
        dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]);
        ret_val = security_secctx_to_secid(
                                  nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]),
                                  nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]),
                                  &secid);
        if (ret_val != 0)
                return ret_val;

        return netlbl_unlhsh_add(&init_net,
                                 dev_name, addr, mask, addr_len, secid,
                                 &audit_info);
}

/**
 * netlbl_unlabel_staticadddef - Handle a STATICADDDEF message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated STATICADDDEF message and add a new default
 * unlabeled connection entry.  Returns zero on success, negative values on
 * failure.
 *
 */
static int netlbl_unlabel_staticadddef(struct sk_buff *skb,
                                       struct genl_info *info)
{
        int ret_val;
        void *addr;
        void *mask;
        u32 addr_len;
        u32 secid;
        struct netlbl_audit audit_info;

        /* Don't allow users to add both IPv4 and IPv6 addresses for a
         * single entry.  However, allow users to create two entries, one each
         * for IPv4 and IPv6, with the same LSM security context which should
         * achieve the same result. */
        if (!info->attrs[NLBL_UNLABEL_A_SECCTX] ||
            !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
              (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
                return -EINVAL;

        netlbl_netlink_auditinfo(&audit_info);

        ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
        if (ret_val != 0)
                return ret_val;
        ret_val = security_secctx_to_secid(
                                  nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]),
                                  nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]),
                                  &secid);
        if (ret_val != 0)
                return ret_val;

        return netlbl_unlhsh_add(&init_net,
                                 NULL, addr, mask, addr_len, secid,
                                 &audit_info);
}

/**
 * netlbl_unlabel_staticremove - Handle a STATICREMOVE message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated STATICREMOVE message and remove the specified
 * unlabeled connection entry.  Returns zero on success, negative values on
 * failure.
 *
 */
static int netlbl_unlabel_staticremove(struct sk_buff *skb,
                                       struct genl_info *info)
{
        int ret_val;
        char *dev_name;
        void *addr;
        void *mask;
        u32 addr_len;
        struct netlbl_audit audit_info;

        /* See the note in netlbl_unlabel_staticadd() about not allowing both
         * IPv4 and IPv6 in the same entry. */
        if (!info->attrs[NLBL_UNLABEL_A_IFACE] ||
            !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
              (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
                return -EINVAL;

        netlbl_netlink_auditinfo(&audit_info);

        ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
        if (ret_val != 0)
                return ret_val;
        dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]);

        return netlbl_unlhsh_remove(&init_net,
                                    dev_name, addr, mask, addr_len,
                                    &audit_info);
}

/**
 * netlbl_unlabel_staticremovedef - Handle a STATICREMOVEDEF message
 * @skb: the NETLINK buffer
 * @info: the Generic NETLINK info block
 *
 * Description:
 * Process a user generated STATICREMOVEDEF message and remove the default
 * unlabeled connection entry.  Returns zero on success, negative values on
 * failure.
 *
 */
static int netlbl_unlabel_staticremovedef(struct sk_buff *skb,
                                          struct genl_info *info)
{
        int ret_val;
        void *addr;
        void *mask;
        u32 addr_len;
        struct netlbl_audit audit_info;

        /* See the note in netlbl_unlabel_staticadd() about not allowing both
         * IPv4 and IPv6 in the same entry. */
        if (!((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^
              (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] ||
               !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
                return -EINVAL;

        netlbl_netlink_auditinfo(&audit_info);

        ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
        if (ret_val != 0)
                return ret_val;

        return netlbl_unlhsh_remove(&init_net,
                                    NULL, addr, mask, addr_len,
                                    &audit_info);
}


/**
 * netlbl_unlabel_staticlist_gen - Generate messages for STATICLIST[DEF]
 * @cmd: command/message
 * @iface: the interface entry
 * @addr4: the IPv4 address entry
 * @addr6: the IPv6 address entry
 * @arg: the netlbl_unlhsh_walk_arg structure
 *
 * Description:
 * This function is designed to be used to generate a response for a
 * STATICLIST or STATICLISTDEF message.  When called either @addr4 or @addr6
 * can be specified, not both, the other unspecified entry should be set to
 * NULL by the caller.  Returns the size of the message on success, negative
 * values on failure.
 *
 */
static int netlbl_unlabel_staticlist_gen(u32 cmd,
                                       const struct netlbl_unlhsh_iface *iface,
                                       const struct netlbl_unlhsh_addr4 *addr4,
                                       const struct netlbl_unlhsh_addr6 *addr6,
                                       void *arg)
{
        int ret_val = -ENOMEM;
        struct netlbl_unlhsh_walk_arg *cb_arg = arg;
        struct net_device *dev;
        void *data;
        u32 secid;
        char *secctx;
        u32 secctx_len;

        data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
                           cb_arg->seq, &netlbl_unlabel_gnl_family,
                           NLM_F_MULTI, cmd);
        if (data == NULL)
                goto list_cb_failure;

        if (iface->ifindex > 0) {
                dev = dev_get_by_index(&init_net, iface->ifindex);
                if (!dev) {
                        ret_val = -ENODEV;
                        goto list_cb_failure;
                }
                ret_val = nla_put_string(cb_arg->skb,
                                         NLBL_UNLABEL_A_IFACE, dev->name);
                dev_put(dev);
                if (ret_val != 0)
                        goto list_cb_failure;
        }

        if (addr4) {
                struct in_addr addr_struct;

                addr_struct.s_addr = addr4->list.addr;
                ret_val = nla_put_in_addr(cb_arg->skb,
                                          NLBL_UNLABEL_A_IPV4ADDR,
                                          addr_struct.s_addr);
                if (ret_val != 0)
                        goto list_cb_failure;

                addr_struct.s_addr = addr4->list.mask;
                ret_val = nla_put_in_addr(cb_arg->skb,
                                          NLBL_UNLABEL_A_IPV4MASK,
                                          addr_struct.s_addr);
                if (ret_val != 0)
                        goto list_cb_failure;

                secid = addr4->secid;
        } else {
                ret_val = nla_put_in6_addr(cb_arg->skb,
                                           NLBL_UNLABEL_A_IPV6ADDR,
                                           &addr6->list.addr);
                if (ret_val != 0)
                        goto list_cb_failure;

                ret_val = nla_put_in6_addr(cb_arg->skb,
                                           NLBL_UNLABEL_A_IPV6MASK,
                                           &addr6->list.mask);
                if (ret_val != 0)
                        goto list_cb_failure;

                secid = addr6->secid;
        }

        ret_val = security_secid_to_secctx(secid, &secctx, &secctx_len);
        if (ret_val != 0)
                goto list_cb_failure;
        ret_val = nla_put(cb_arg->skb,
                          NLBL_UNLABEL_A_SECCTX,
                          secctx_len,
                          secctx);
        security_release_secctx(secctx, secctx_len);
        if (ret_val != 0)
                goto list_cb_failure;

        cb_arg->seq++;
        genlmsg_end(cb_arg->skb, data);
        return 0;

list_cb_failure:
        genlmsg_cancel(cb_arg->skb, data);
        return ret_val;
}

/**
 * netlbl_unlabel_staticlist - Handle a STATICLIST message
 * @skb: the NETLINK buffer
 * @cb: the NETLINK callback
 *
 * Description:
 * Process a user generated STATICLIST message and dump the unlabeled
 * connection hash table in a form suitable for use in a kernel generated
 * STATICLIST message.  Returns the length of @skb.
 *
 */
static int netlbl_unlabel_staticlist(struct sk_buff *skb,
                                     struct netlink_callback *cb)
{
        struct netlbl_unlhsh_walk_arg cb_arg;
        u32 skip_bkt = cb->args[0];
        u32 skip_chain = cb->args[1];
        u32 skip_addr4 = cb->args[2];
        u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
        struct netlbl_unlhsh_iface *iface;
        struct list_head *iter_list;
        struct netlbl_af4list *addr4;
#if IS_ENABLED(CONFIG_IPV6)
        u32 skip_addr6 = cb->args[3];
        struct netlbl_af6list *addr6;
#endif

        cb_arg.nl_cb = cb;
        cb_arg.skb = skb;
        cb_arg.seq = cb->nlh->nlmsg_seq;

        rcu_read_lock();
        for (iter_bkt = skip_bkt;
             iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
             iter_bkt++) {
                iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
                list_for_each_entry_rcu(iface, iter_list, list) {
                        if (!iface->valid ||
                            iter_chain++ < skip_chain)
                                continue;
                        netlbl_af4list_foreach_rcu(addr4,
                                                   &iface->addr4_list) {
                                if (iter_addr4++ < skip_addr4)
                                        continue;
                                if (netlbl_unlabel_staticlist_gen(
                                              NLBL_UNLABEL_C_STATICLIST,
                                              iface,
                                              netlbl_unlhsh_addr4_entry(addr4),
                                              NULL,
                                              &cb_arg) < 0) {
                                        iter_addr4--;
                                        iter_chain--;
                                        goto unlabel_staticlist_return;
                                }
                        }
                        iter_addr4 = 0;
                        skip_addr4 = 0;
#if IS_ENABLED(CONFIG_IPV6)
                        netlbl_af6list_foreach_rcu(addr6,
                                                   &iface->addr6_list) {
                                if (iter_addr6++ < skip_addr6)
                                        continue;
                                if (netlbl_unlabel_staticlist_gen(
                                              NLBL_UNLABEL_C_STATICLIST,
                                              iface,
                                              NULL,
                                              netlbl_unlhsh_addr6_entry(addr6),
                                              &cb_arg) < 0) {
                                        iter_addr6--;
                                        iter_chain--;
                                        goto unlabel_staticlist_return;
                                }
                        }
                        iter_addr6 = 0;
                        skip_addr6 = 0;
#endif /* IPv6 */
                }
                iter_chain = 0;
                skip_chain = 0;
        }

unlabel_staticlist_return:
        rcu_read_unlock();
        cb->args[0] = iter_bkt;
        cb->args[1] = iter_chain;
        cb->args[2] = iter_addr4;
        cb->args[3] = iter_addr6;
        return skb->len;
}

/**
 * netlbl_unlabel_staticlistdef - Handle a STATICLISTDEF message
 * @skb: the NETLINK buffer
 * @cb: the NETLINK callback
 *
 * Description:
 * Process a user generated STATICLISTDEF message and dump the default
 * unlabeled connection entry in a form suitable for use in a kernel generated
 * STATICLISTDEF message.  Returns the length of @skb.
 *
 */
static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
                                        struct netlink_callback *cb)
{
        struct netlbl_unlhsh_walk_arg cb_arg;
        struct netlbl_unlhsh_iface *iface;
        u32 iter_addr4 = 0, iter_addr6 = 0;
        struct netlbl_af4list *addr4;
#if IS_ENABLED(CONFIG_IPV6)
        struct netlbl_af6list *addr6;
#endif

        cb_arg.nl_cb = cb;
        cb_arg.skb = skb;
        cb_arg.seq = cb->nlh->nlmsg_seq;

        rcu_read_lock();
        iface = rcu_dereference(netlbl_unlhsh_def);
        if (iface == NULL || !iface->valid)
                goto unlabel_staticlistdef_return;

        netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) {
                if (iter_addr4++ < cb->args[0])
                        continue;
                if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
                                              iface,
                                              netlbl_unlhsh_addr4_entry(addr4),
                                              NULL,
                                              &cb_arg) < 0) {
                        iter_addr4--;
                        goto unlabel_staticlistdef_return;
                }
        }
#if IS_ENABLED(CONFIG_IPV6)
        netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
                if (iter_addr6++ < cb->args[1])
                        continue;
                if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF,
                                              iface,
                                              NULL,
                                              netlbl_unlhsh_addr6_entry(addr6),
                                              &cb_arg) < 0) {
                        iter_addr6--;
                        goto unlabel_staticlistdef_return;
                }
        }
#endif /* IPv6 */

unlabel_staticlistdef_return:
        rcu_read_unlock();
        cb->args[0] = iter_addr4;
        cb->args[1] = iter_addr6;
        return skb->len;
}

/*
 * NetLabel Generic NETLINK Command Definitions
 */

static const struct genl_small_ops netlbl_unlabel_genl_ops[] = {
        {
        .cmd = NLBL_UNLABEL_C_STATICADD,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = GENL_ADMIN_PERM,
        .doit = netlbl_unlabel_staticadd,
        .dumpit = NULL,
        },
        {
        .cmd = NLBL_UNLABEL_C_STATICREMOVE,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = GENL_ADMIN_PERM,
        .doit = netlbl_unlabel_staticremove,
        .dumpit = NULL,
        },
        {
        .cmd = NLBL_UNLABEL_C_STATICLIST,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = 0,
        .doit = NULL,
        .dumpit = netlbl_unlabel_staticlist,
        },
        {
        .cmd = NLBL_UNLABEL_C_STATICADDDEF,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = GENL_ADMIN_PERM,
        .doit = netlbl_unlabel_staticadddef,
        .dumpit = NULL,
        },
        {
        .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = GENL_ADMIN_PERM,
        .doit = netlbl_unlabel_staticremovedef,
        .dumpit = NULL,
        },
        {
        .cmd = NLBL_UNLABEL_C_STATICLISTDEF,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = 0,
        .doit = NULL,
        .dumpit = netlbl_unlabel_staticlistdef,
        },
        {
        .cmd = NLBL_UNLABEL_C_ACCEPT,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = GENL_ADMIN_PERM,
        .doit = netlbl_unlabel_accept,
        .dumpit = NULL,
        },
        {
        .cmd = NLBL_UNLABEL_C_LIST,
        .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
        .flags = 0,
        .doit = netlbl_unlabel_list,
        .dumpit = NULL,
        },
};

static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
        .hdrsize = 0,
        .name = NETLBL_NLTYPE_UNLABELED_NAME,
        .version = NETLBL_PROTO_VERSION,
        .maxattr = NLBL_UNLABEL_A_MAX,
        .policy = netlbl_unlabel_genl_policy,
        .module = THIS_MODULE,
        .small_ops = netlbl_unlabel_genl_ops,
        .n_small_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
        .resv_start_op = NLBL_UNLABEL_C_STATICLISTDEF + 1,
};

/*
 * NetLabel Generic NETLINK Protocol Functions
 */

/**
 * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
 *
 * Description:
 * Register the unlabeled packet NetLabel component with the Generic NETLINK
 * mechanism.  Returns zero on success, negative values on failure.
 *
 */
int __init netlbl_unlabel_genl_init(void)
{
        return genl_register_family(&netlbl_unlabel_gnl_family);
}

/*
 * NetLabel KAPI Hooks
 */

static struct notifier_block netlbl_unlhsh_netdev_notifier = {
        .notifier_call = netlbl_unlhsh_netdev_handler,
};

/**
 * netlbl_unlabel_init - Initialize the unlabeled connection hash table
 * @size: the number of bits to use for the hash buckets
 *
 * Description:
 * Initializes the unlabeled connection hash table and registers a network
 * device notification handler.  This function should only be called by the
 * NetLabel subsystem itself during initialization.  Returns zero on success,
 * non-zero values on error.
 *
 */
int __init netlbl_unlabel_init(u32 size)
{
        u32 iter;
        struct netlbl_unlhsh_tbl *hsh_tbl;

        if (size == 0)
                return -EINVAL;

        hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
        if (hsh_tbl == NULL)
                return -ENOMEM;
        hsh_tbl->size = 1 << size;
        hsh_tbl->tbl = kcalloc(hsh_tbl->size,
                               sizeof(struct list_head),
                               GFP_KERNEL);
        if (hsh_tbl->tbl == NULL) {
                kfree(hsh_tbl);
                return -ENOMEM;
        }
        for (iter = 0; iter < hsh_tbl->size; iter++)
                INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);

        spin_lock(&netlbl_unlhsh_lock);
        rcu_assign_pointer(netlbl_unlhsh, hsh_tbl);
        spin_unlock(&netlbl_unlhsh_lock);

        register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier);

        return 0;
}

/**
 * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
 * @skb: the packet
 * @family: protocol family
 * @secattr: the security attributes
 *
 * Description:
 * Determine the security attributes, if any, for an unlabled packet and return
 * them in @secattr.  Returns zero on success and negative values on failure.
 *
 */
int netlbl_unlabel_getattr(const struct sk_buff *skb,
                           u16 family,
                           struct netlbl_lsm_secattr *secattr)
{
        struct netlbl_unlhsh_iface *iface;

        rcu_read_lock();
        iface = netlbl_unlhsh_search_iface(skb->skb_iif);
        if (iface == NULL)
                iface = rcu_dereference(netlbl_unlhsh_def);
        if (iface == NULL || !iface->valid)
                goto unlabel_getattr_nolabel;

#if IS_ENABLED(CONFIG_IPV6)
        /* When resolving a fallback label, check the sk_buff version as
         * it is possible (e.g. SCTP) to have family = PF_INET6 while
         * receiving ip_hdr(skb)->version = 4.
         */
        if (family == PF_INET6 && ip_hdr(skb)->version == 4)
                family = PF_INET;
#endif /* IPv6 */

        switch (family) {
        case PF_INET: {
                struct iphdr *hdr4;
                struct netlbl_af4list *addr4;

                hdr4 = ip_hdr(skb);
                addr4 = netlbl_af4list_search(hdr4->saddr,
                                              &iface->addr4_list);
                if (addr4 == NULL)
                        goto unlabel_getattr_nolabel;
                secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
                break;
        }
#if IS_ENABLED(CONFIG_IPV6)
        case PF_INET6: {
                struct ipv6hdr *hdr6;
                struct netlbl_af6list *addr6;

                hdr6 = ipv6_hdr(skb);
                addr6 = netlbl_af6list_search(&hdr6->saddr,
                                              &iface->addr6_list);
                if (addr6 == NULL)
                        goto unlabel_getattr_nolabel;
                secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid;
                break;
        }
#endif /* IPv6 */
        default:
                goto unlabel_getattr_nolabel;
        }
        rcu_read_unlock();

        secattr->flags |= NETLBL_SECATTR_SECID;
        secattr->type = NETLBL_NLTYPE_UNLABELED;
        return 0;

unlabel_getattr_nolabel:
        rcu_read_unlock();
        if (netlabel_unlabel_acceptflg == 0)
                return -ENOMSG;
        secattr->type = NETLBL_NLTYPE_UNLABELED;
        return 0;
}

/**
 * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
 *
 * Description:
 * Set the default NetLabel configuration to allow incoming unlabeled packets
 * and to send unlabeled network traffic by default.
 *
 */
int __init netlbl_unlabel_defconf(void)
{
        int ret_val;
        struct netlbl_dom_map *entry;
        struct netlbl_audit audit_info;

        /* Only the kernel is allowed to call this function and the only time
         * it is called is at bootup before the audit subsystem is reporting
         * messages so don't worry to much about these values. */
        security_current_getsecid_subj(&audit_info.secid);
        audit_info.loginuid = GLOBAL_ROOT_UID;
        audit_info.sessionid = 0;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (entry == NULL)
                return -ENOMEM;
        entry->family = AF_UNSPEC;
        entry->def.type = NETLBL_NLTYPE_UNLABELED;
        ret_val = netlbl_domhsh_add_default(entry, &audit_info);
        if (ret_val != 0)
                return ret_val;

        netlbl_unlabel_acceptflg_set(1, &audit_info);

        return 0;
}



















































































































































































































































































    2 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2007-2012 Siemens AG
 *
 * Written by:
 * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>

#include <net/netlink.h>
#include <net/nl802154.h>
#include <net/mac802154.h>
#include <net/ieee802154_netdev.h>
#include <net/route.h>
#include <net/cfg802154.h>

#include "ieee802154_i.h"
#include "cfg.h"

static void ieee802154_tasklet_handler(struct tasklet_struct *t)
{
        struct ieee802154_local *local = from_tasklet(local, t, tasklet);
        struct sk_buff *skb;

        while ((skb = skb_dequeue(&local->skb_queue))) {
                switch (skb->pkt_type) {
                case IEEE802154_RX_MSG:
                        /* Clear skb->pkt_type in order to not confuse kernel
                         * netstack.
                         */
                        skb->pkt_type = 0;
                        ieee802154_rx(local, skb);
                        break;
                default:
                        WARN(1, "mac802154: Packet is of unknown type %d\n",
                             skb->pkt_type);
                        kfree_skb(skb);
                        break;
                }
        }
}

struct ieee802154_hw *
ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
{
        struct wpan_phy *phy;
        struct ieee802154_local *local;
        size_t priv_size;

        if (WARN_ON(!ops || !(ops->xmit_async || ops->xmit_sync) || !ops->ed ||
                    !ops->start || !ops->stop || !ops->set_channel))
                return NULL;

        /* Ensure 32-byte alignment of our private data and hw private data.
         * We use the wpan_phy priv data for both our ieee802154_local and for
         * the driver's private data
         *
         * in memory it'll be like this:
         *
         * +-------------------------+
         * | struct wpan_phy         |
         * +-------------------------+
         * | struct ieee802154_local |
         * +-------------------------+
         * | driver's private data   |
         * +-------------------------+
         *
         * Due to ieee802154 layer isn't aware of driver and MAC structures,
         * so lets align them here.
         */

        priv_size = ALIGN(sizeof(*local), NETDEV_ALIGN) + priv_data_len;

        phy = wpan_phy_new(&mac802154_config_ops, priv_size);
        if (!phy) {
                pr_err("failure to allocate master IEEE802.15.4 device\n");
                return NULL;
        }

        phy->privid = mac802154_wpan_phy_privid;

        local = wpan_phy_priv(phy);
        local->phy = phy;
        local->hw.phy = local->phy;
        local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
        local->ops = ops;

        INIT_LIST_HEAD(&local->interfaces);
        INIT_LIST_HEAD(&local->rx_beacon_list);
        INIT_LIST_HEAD(&local->rx_mac_cmd_list);
        mutex_init(&local->iflist_mtx);

        tasklet_setup(&local->tasklet, ieee802154_tasklet_handler);

        skb_queue_head_init(&local->skb_queue);

        INIT_WORK(&local->sync_tx_work, ieee802154_xmit_sync_worker);
        INIT_DELAYED_WORK(&local->scan_work, mac802154_scan_worker);
        INIT_WORK(&local->rx_beacon_work, mac802154_rx_beacon_worker);
        INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker);
        INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker);

        init_completion(&local->assoc_done);

        /* init supported flags with 802.15.4 default ranges */
        phy->supported.max_minbe = 8;
        phy->supported.min_maxbe = 3;
        phy->supported.max_maxbe = 8;
        phy->supported.min_frame_retries = 0;
        phy->supported.max_frame_retries = 7;
        phy->supported.max_csma_backoffs = 5;
        phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;

        /* always supported */
        phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE) | BIT(NL802154_IFTYPE_COORD);

        return &local->hw;
}
EXPORT_SYMBOL(ieee802154_alloc_hw);

void ieee802154_configure_durations(struct wpan_phy *phy,
                                    unsigned int page, unsigned int channel)
{
        u32 duration = 0;

        switch (page) {
        case 0:
                if (BIT(channel) & 0x1)
                        /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */
                        duration = 50 * NSEC_PER_USEC;
                else if (BIT(channel) & 0x7FE)
                        /* 915 MHz BPSK        802.15.4-2003: 40 ksym/s */
                        duration = 25 * NSEC_PER_USEC;
                else if (BIT(channel) & 0x7FFF800)
                        /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
                        duration = 16 * NSEC_PER_USEC;
                break;
        case 2:
                if (BIT(channel) & 0x1)
                        /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */
                        duration = 40 * NSEC_PER_USEC;
                else if (BIT(channel) & 0x7FE)
                        /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
                        duration = 16 * NSEC_PER_USEC;
                break;
        case 3:
                if (BIT(channel) & 0x3FFF)
                        /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */
                        duration = 6 * NSEC_PER_USEC;
                break;
        default:
                break;
        }

        if (!duration) {
                pr_debug("Unknown PHY symbol duration\n");
                return;
        }

        phy->symbol_duration = duration;
        phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
        phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
}
EXPORT_SYMBOL(ieee802154_configure_durations);

void ieee802154_free_hw(struct ieee802154_hw *hw)
{
        struct ieee802154_local *local = hw_to_local(hw);

        BUG_ON(!list_empty(&local->interfaces));

        mutex_destroy(&local->iflist_mtx);

        wpan_phy_free(local->phy);
}
EXPORT_SYMBOL(ieee802154_free_hw);

static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
{
        /* TODO warn on empty symbol_duration
         * Should be done when all drivers sets this value.
         */

        wpan_phy->lifs_period =
                (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
        wpan_phy->sifs_period =
                (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
}

int ieee802154_register_hw(struct ieee802154_hw *hw)
{
        struct ieee802154_local *local = hw_to_local(hw);
        char mac_wq_name[IFNAMSIZ + 10] = {};
        struct net_device *dev;
        int rc = -ENOSYS;

        local->workqueue =
                create_singlethread_workqueue(wpan_phy_name(local->phy));
        if (!local->workqueue) {
                rc = -ENOMEM;
                goto out;
        }

        snprintf(mac_wq_name, IFNAMSIZ + 10, "%s-mac-cmds", wpan_phy_name(local->phy));
        local->mac_wq =        create_singlethread_workqueue(mac_wq_name);
        if (!local->mac_wq) {
                rc = -ENOMEM;
                goto out_wq;
        }

        hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        local->ifs_timer.function = ieee802154_xmit_ifs_timer;

        wpan_phy_set_dev(local->phy, local->hw.parent);

        ieee802154_setup_wpan_phy_pib(local->phy);

        ieee802154_configure_durations(local->phy, local->phy->current_page,
                                       local->phy->current_channel);

        if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
                local->phy->supported.min_csma_backoffs = 4;
                local->phy->supported.max_csma_backoffs = 4;
                local->phy->supported.min_maxbe = 5;
                local->phy->supported.max_maxbe = 5;
                local->phy->supported.min_minbe = 3;
                local->phy->supported.max_minbe = 3;
        }

        if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) {
                local->phy->supported.min_frame_retries = 3;
                local->phy->supported.max_frame_retries = 3;
        }

        if (hw->flags & IEEE802154_HW_PROMISCUOUS)
                local->phy->supported.iftypes |= BIT(NL802154_IFTYPE_MONITOR);

        rc = wpan_phy_register(local->phy);
        if (rc < 0)
                goto out_mac_wq;

        rtnl_lock();

        dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM,
                                NL802154_IFTYPE_NODE,
                                cpu_to_le64(0x0000000000000000ULL));
        if (IS_ERR(dev)) {
                rtnl_unlock();
                rc = PTR_ERR(dev);
                goto out_phy;
        }

        rtnl_unlock();

        return 0;

out_phy:
        wpan_phy_unregister(local->phy);
out_mac_wq:
        destroy_workqueue(local->mac_wq);
out_wq:
        destroy_workqueue(local->workqueue);
out:
        return rc;
}
EXPORT_SYMBOL(ieee802154_register_hw);

void ieee802154_unregister_hw(struct ieee802154_hw *hw)
{
        struct ieee802154_local *local = hw_to_local(hw);

        tasklet_kill(&local->tasklet);
        flush_workqueue(local->workqueue);

        rtnl_lock();

        ieee802154_remove_interfaces(local);

        rtnl_unlock();

        destroy_workqueue(local->mac_wq);
        destroy_workqueue(local->workqueue);
        wpan_phy_unregister(local->phy);
}
EXPORT_SYMBOL(ieee802154_unregister_hw);

static int __init ieee802154_init(void)
{
        return ieee802154_iface_init();
}

static void __exit ieee802154_exit(void)
{
        ieee802154_iface_exit();

        rcu_barrier();
}

subsys_initcall(ieee802154_init);
module_exit(ieee802154_exit);

MODULE_DESCRIPTION("IEEE 802.15.4 subsystem");
MODULE_LICENSE("GPL v2");











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 


    9 

    9 






















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
 * dvb_demux.c - DVB kernel demux API
 *
 * Copyright (C) 2000-2001 Ralph  Metzler <ralph@convergence.de>
 *                       & Marcus Metzler <marcus@convergence.de>
 *                         for convergence integrated media GmbH
 */

#define pr_fmt(fmt) "dvb_demux: " fmt

#include <linux/sched/signal.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/string.h>
#include <linux/crc32.h>
#include <linux/uaccess.h>
#include <asm/div64.h>

#include <media/dvb_demux.h>

static int dvb_demux_tscheck;
module_param(dvb_demux_tscheck, int, 0644);
MODULE_PARM_DESC(dvb_demux_tscheck,
                "enable transport stream continuity and TEI check");

static int dvb_demux_speedcheck;
module_param(dvb_demux_speedcheck, int, 0644);
MODULE_PARM_DESC(dvb_demux_speedcheck,
                "enable transport stream speed check");

static int dvb_demux_feed_err_pkts = 1;
module_param(dvb_demux_feed_err_pkts, int, 0644);
MODULE_PARM_DESC(dvb_demux_feed_err_pkts,
                 "when set to 0, drop packets with the TEI bit set (1 by default)");

#define dprintk(fmt, arg...) \
        printk(KERN_DEBUG pr_fmt("%s: " fmt),  __func__, ##arg)

#define dprintk_tscheck(x...) do {                        \
        if (dvb_demux_tscheck && printk_ratelimit())        \
                dprintk(x);                                \
} while (0)

#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG
#  define dprintk_sect_loss(x...) dprintk(x)
#else
#  define dprintk_sect_loss(x...)
#endif

#define set_buf_flags(__feed, __flag)                        \
        do {                                                \
                (__feed)->buffer_flags |= (__flag);        \
        } while (0)

/******************************************************************************
 * static inlined helper functions
 ******************************************************************************/

static inline u16 section_length(const u8 *buf)
{
        return 3 + ((buf[1] & 0x0f) << 8) + buf[2];
}

static inline u16 ts_pid(const u8 *buf)
{
        return ((buf[1] & 0x1f) << 8) + buf[2];
}

static inline u8 payload(const u8 *tsp)
{
        if (!(tsp[3] & 0x10))        // no payload?
                return 0;

        if (tsp[3] & 0x20) {        // adaptation field?
                if (tsp[4] > 183)        // corrupted data?
                        return 0;
                else
                        return 184 - 1 - tsp[4];
        }

        return 184;
}

static u32 dvb_dmx_crc32(struct dvb_demux_feed *f, const u8 *src, size_t len)
{
        return (f->feed.sec.crc_val = crc32_be(f->feed.sec.crc_val, src, len));
}

static void dvb_dmx_memcopy(struct dvb_demux_feed *f, u8 *d, const u8 *s,
                            size_t len)
{
        memcpy(d, s, len);
}

/******************************************************************************
 * Software filter functions
 ******************************************************************************/

static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed,
                                           const u8 *buf)
{
        int count = payload(buf);
        int p;
        int ccok;
        u8 cc;

        if (count == 0)
                return -1;

        p = 188 - count;

        cc = buf[3] & 0x0f;
        ccok = ((feed->cc + 1) & 0x0f) == cc;
        if (!ccok) {
                set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                dprintk_sect_loss("missed packet: %d instead of %d!\n",
                                  cc, (feed->cc + 1) & 0x0f);
        }
        feed->cc = cc;

        if (buf[1] & 0x40)        // PUSI ?
                feed->peslen = 0xfffa;

        feed->peslen += count;

        return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts,
                           &feed->buffer_flags);
}

static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed,
                                          struct dvb_demux_filter *f)
{
        u8 neq = 0;
        int i;

        for (i = 0; i < DVB_DEMUX_MASK_MAX; i++) {
                u8 xor = f->filter.filter_value[i] ^ feed->feed.sec.secbuf[i];

                if (f->maskandmode[i] & xor)
                        return 0;

                neq |= f->maskandnotmode[i] & xor;
        }

        if (f->doneq && !neq)
                return 0;

        return feed->cb.sec(feed->feed.sec.secbuf, feed->feed.sec.seclen,
                            NULL, 0, &f->filter, &feed->buffer_flags);
}

static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed)
{
        struct dvb_demux *demux = feed->demux;
        struct dvb_demux_filter *f = feed->filter;
        struct dmx_section_feed *sec = &feed->feed.sec;
        int section_syntax_indicator;

        if (!sec->is_filtering)
                return 0;

        if (!f)
                return 0;

        if (sec->check_crc) {
                section_syntax_indicator = ((sec->secbuf[1] & 0x80) != 0);
                if (section_syntax_indicator &&
                    demux->check_crc32(feed, sec->secbuf, sec->seclen)) {
                        set_buf_flags(feed, DMX_BUFFER_FLAG_HAD_CRC32_DISCARD);
                        return -1;
                }
        }

        do {
                if (dvb_dmx_swfilter_sectionfilter(feed, f) < 0)
                        return -1;
        } while ((f = f->next) && sec->is_filtering);

        sec->seclen = 0;

        return 0;
}

static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed)
{
        struct dmx_section_feed *sec = &feed->feed.sec;

        if (sec->secbufp < sec->tsfeedp) {
                int n = sec->tsfeedp - sec->secbufp;

                /*
                 * Section padding is done with 0xff bytes entirely.
                 * Due to speed reasons, we won't check all of them
                 * but just first and last.
                 */
                if (sec->secbuf[0] != 0xff || sec->secbuf[n - 1] != 0xff) {
                        set_buf_flags(feed,
                                      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                        dprintk_sect_loss("section ts padding loss: %d/%d\n",
                                          n, sec->tsfeedp);
                        dprintk_sect_loss("pad data: %*ph\n", n, sec->secbuf);
                }
        }

        sec->tsfeedp = sec->secbufp = sec->seclen = 0;
        sec->secbuf = sec->secbuf_base;
}

/*
 * Losless Section Demux 1.4.1 by Emard
 * Valsecchi Patrick:
 *  - middle of section A  (no PUSI)
 *  - end of section A and start of section B
 *    (with PUSI pointing to the start of the second section)
 *
 *  In this case, without feed->pusi_seen you'll receive a garbage section
 *  consisting of the end of section A. Basically because tsfeedp
 *  is incemented and the use=0 condition is not raised
 *  when the second packet arrives.
 *
 * Fix:
 * when demux is started, let feed->pusi_seen = false to
 * prevent initial feeding of garbage from the end of
 * previous section. When you for the first time see PUSI=1
 * then set feed->pusi_seen = true
 */
static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed,
                                              const u8 *buf, u8 len)
{
        struct dvb_demux *demux = feed->demux;
        struct dmx_section_feed *sec = &feed->feed.sec;
        u16 limit, seclen;

        if (sec->tsfeedp >= DMX_MAX_SECFEED_SIZE)
                return 0;

        if (sec->tsfeedp + len > DMX_MAX_SECFEED_SIZE) {
                set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                dprintk_sect_loss("section buffer full loss: %d/%d\n",
                                  sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE,
                                  DMX_MAX_SECFEED_SIZE);
                len = DMX_MAX_SECFEED_SIZE - sec->tsfeedp;
        }

        if (len <= 0)
                return 0;

        demux->memcopy(feed, sec->secbuf_base + sec->tsfeedp, buf, len);
        sec->tsfeedp += len;

        /*
         * Dump all the sections we can find in the data (Emard)
         */
        limit = sec->tsfeedp;
        if (limit > DMX_MAX_SECFEED_SIZE)
                return -1;        /* internal error should never happen */

        /* to be sure always set secbuf */
        sec->secbuf = sec->secbuf_base + sec->secbufp;

        while (sec->secbufp + 2 < limit) {
                seclen = section_length(sec->secbuf);
                if (seclen <= 0 || seclen > DMX_MAX_SECTION_SIZE
                    || seclen + sec->secbufp > limit)
                        return 0;
                sec->seclen = seclen;
                sec->crc_val = ~0;
                /* dump [secbuf .. secbuf+seclen) */
                if (feed->pusi_seen) {
                        dvb_dmx_swfilter_section_feed(feed);
                } else {
                        set_buf_flags(feed,
                                      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                        dprintk_sect_loss("pusi not seen, discarding section data\n");
                }
                sec->secbufp += seclen;        /* secbufp and secbuf moving together is */
                sec->secbuf += seclen;        /* redundant but saves pointer arithmetic */
        }

        return 0;
}

static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
                                           const u8 *buf)
{
        u8 p, count;
        int ccok, dc_i = 0;
        u8 cc;

        count = payload(buf);

        if (count == 0)                /* count == 0 if no payload or out of range */
                return -1;

        p = 188 - count;        /* payload start */

        cc = buf[3] & 0x0f;
        ccok = ((feed->cc + 1) & 0x0f) == cc;

        if (buf[3] & 0x20) {
                /* adaption field present, check for discontinuity_indicator */
                if ((buf[4] > 0) && (buf[5] & 0x80))
                        dc_i = 1;
        }

        if (!ccok || dc_i) {
                if (dc_i) {
                        set_buf_flags(feed,
                                      DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR);
                        dprintk_sect_loss("%d frame with disconnect indicator\n",
                                cc);
                } else {
                        set_buf_flags(feed,
                                      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                        dprintk_sect_loss("discontinuity: %d instead of %d. %d bytes lost\n",
                                cc, (feed->cc + 1) & 0x0f, count + 4);
                }
                /*
                 * those bytes under some circumstances will again be reported
                 * in the following dvb_dmx_swfilter_section_new
                 */

                /*
                 * Discontinuity detected. Reset pusi_seen to
                 * stop feeding of suspicious data until next PUSI=1 arrives
                 *
                 * FIXME: does it make sense if the MPEG-TS is the one
                 *        reporting discontinuity?
                 */

                feed->pusi_seen = false;
                dvb_dmx_swfilter_section_new(feed);
        }
        feed->cc = cc;

        if (buf[1] & 0x40) {
                /* PUSI=1 (is set), section boundary is here */
                if (count > 1 && buf[p] < count) {
                        const u8 *before = &buf[p + 1];
                        u8 before_len = buf[p];
                        const u8 *after = &before[before_len];
                        u8 after_len = count - 1 - before_len;

                        dvb_dmx_swfilter_section_copy_dump(feed, before,
                                                           before_len);
                        /* before start of new section, set pusi_seen */
                        feed->pusi_seen = true;
                        dvb_dmx_swfilter_section_new(feed);
                        dvb_dmx_swfilter_section_copy_dump(feed, after,
                                                           after_len);
                } else if (count > 0) {
                        set_buf_flags(feed,
                                      DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
                        dprintk_sect_loss("PUSI=1 but %d bytes lost\n", count);
                }
        } else {
                /* PUSI=0 (is not set), no section boundary */
                dvb_dmx_swfilter_section_copy_dump(feed, &buf[p], count);
        }

        return 0;
}

static inline void dvb_dmx_swfilter_packet_type(struct dvb_demux_feed *feed,
                                                const u8 *buf)
{
        switch (feed->type) {
        case DMX_TYPE_TS:
                if (!feed->feed.ts.is_filtering)
                        break;
                if (feed->ts_type & TS_PACKET) {
                        if (feed->ts_type & TS_PAYLOAD_ONLY)
                                dvb_dmx_swfilter_payload(feed, buf);
                        else
                                feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
                                            &feed->buffer_flags);
                }
                /* Used only on full-featured devices */
                if (feed->ts_type & TS_DECODER)
                        if (feed->demux->write_to_decoder)
                                feed->demux->write_to_decoder(feed, buf, 188);
                break;

        case DMX_TYPE_SEC:
                if (!feed->feed.sec.is_filtering)
                        break;
                if (dvb_dmx_swfilter_section_packet(feed, buf) < 0)
                        feed->feed.sec.seclen = feed->feed.sec.secbufp = 0;
                break;

        default:
                break;
        }
}

#define DVR_FEED(f)                                                        \
        (((f)->type == DMX_TYPE_TS) &&                                        \
        ((f)->feed.ts.is_filtering) &&                                        \
        (((f)->ts_type & (TS_PACKET | TS_DEMUX)) == TS_PACKET))

static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
{
        struct dvb_demux_feed *feed;
        u16 pid = ts_pid(buf);
        int dvr_done = 0;

        if (dvb_demux_speedcheck) {
                ktime_t cur_time;
                u64 speed_bytes, speed_timedelta;

                demux->speed_pkts_cnt++;

                /* show speed every SPEED_PKTS_INTERVAL packets */
                if (!(demux->speed_pkts_cnt % SPEED_PKTS_INTERVAL)) {
                        cur_time = ktime_get();

                        if (ktime_to_ns(demux->speed_last_time) != 0) {
                                speed_bytes = (u64)demux->speed_pkts_cnt
                                        * 188 * 8;
                                /* convert to 1024 basis */
                                speed_bytes = 1000 * div64_u64(speed_bytes,
                                                1024);
                                speed_timedelta = ktime_ms_delta(cur_time,
                                                        demux->speed_last_time);
                                if (speed_timedelta)
                                        dprintk("TS speed %llu Kbits/sec \n",
                                                div64_u64(speed_bytes,
                                                          speed_timedelta));
                        }

                        demux->speed_last_time = cur_time;
                        demux->speed_pkts_cnt = 0;
                }
        }

        if (buf[1] & 0x80) {
                list_for_each_entry(feed, &demux->feed_list, list_head) {
                        if ((feed->pid != pid) && (feed->pid != 0x2000))
                                continue;
                        set_buf_flags(feed, DMX_BUFFER_FLAG_TEI);
                }
                dprintk_tscheck("TEI detected. PID=0x%x data1=0x%x\n",
                                pid, buf[1]);
                /* data in this packet can't be trusted - drop it unless
                 * module option dvb_demux_feed_err_pkts is set */
                if (!dvb_demux_feed_err_pkts)
                        return;
        } else /* if TEI bit is set, pid may be wrong- skip pkt counter */
                if (demux->cnt_storage && dvb_demux_tscheck) {
                        /* check pkt counter */
                        if (pid < MAX_PID) {
                                if (buf[3] & 0x10)
                                        demux->cnt_storage[pid] =
                                                (demux->cnt_storage[pid] + 1) & 0xf;

                                if ((buf[3] & 0xf) != demux->cnt_storage[pid]) {
                                        list_for_each_entry(feed, &demux->feed_list, list_head) {
                                                if ((feed->pid != pid) && (feed->pid != 0x2000))
                                                        continue;
                                                set_buf_flags(feed,
                                                              DMX_BUFFER_PKT_COUNTER_MISMATCH);
                                        }

                                        dprintk_tscheck("TS packet counter mismatch. PID=0x%x expected 0x%x got 0x%x\n",
                                                        pid, demux->cnt_storage[pid],
                                                        buf[3] & 0xf);
                                        demux->cnt_storage[pid] = buf[3] & 0xf;
                                }
                        }
                        /* end check */
                }

        list_for_each_entry(feed, &demux->feed_list, list_head) {
                if ((feed->pid != pid) && (feed->pid != 0x2000))
                        continue;

                /* copy each packet only once to the dvr device, even
                 * if a PID is in multiple filters (e.g. video + PCR) */
                if ((DVR_FEED(feed)) && (dvr_done++))
                        continue;

                if (feed->pid == pid)
                        dvb_dmx_swfilter_packet_type(feed, buf);
                else if (feed->pid == 0x2000)
                        feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts,
                                    &feed->buffer_flags);
        }
}

void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf,
                              size_t count)
{
        unsigned long flags;

        spin_lock_irqsave(&demux->lock, flags);

        while (count--) {
                if (buf[0] == 0x47)
                        dvb_dmx_swfilter_packet(demux, buf);
                buf += 188;
        }

        spin_unlock_irqrestore(&demux->lock, flags);
}

EXPORT_SYMBOL(dvb_dmx_swfilter_packets);

static inline int find_next_packet(const u8 *buf, int pos, size_t count,
                                   const int pktsize)
{
        int start = pos, lost;

        while (pos < count) {
                if (buf[pos] == 0x47 ||
                    (pktsize == 204 && buf[pos] == 0xB8))
                        break;
                pos++;
        }

        lost = pos - start;
        if (lost) {
                /* This garbage is part of a valid packet? */
                int backtrack = pos - pktsize;
                if (backtrack >= 0 && (buf[backtrack] == 0x47 ||
                    (pktsize == 204 && buf[backtrack] == 0xB8)))
                        return backtrack;
        }

        return pos;
}

/* Filter all pktsize= 188 or 204 sized packets and skip garbage. */
static inline void _dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf,
                size_t count, const int pktsize)
{
        int p = 0, i, j;
        const u8 *q;
        unsigned long flags;

        spin_lock_irqsave(&demux->lock, flags);

        if (demux->tsbufp) { /* tsbuf[0] is now 0x47. */
                i = demux->tsbufp;
                j = pktsize - i;
                if (count < j) {
                        memcpy(&demux->tsbuf[i], buf, count);
                        demux->tsbufp += count;
                        goto bailout;
                }
                memcpy(&demux->tsbuf[i], buf, j);
                if (demux->tsbuf[0] == 0x47) /* double check */
                        dvb_dmx_swfilter_packet(demux, demux->tsbuf);
                demux->tsbufp = 0;
                p += j;
        }

        while (1) {
                p = find_next_packet(buf, p, count, pktsize);
                if (p >= count)
                        break;
                if (count - p < pktsize)
                        break;

                q = &buf[p];

                if (pktsize == 204 && (*q == 0xB8)) {
                        memcpy(demux->tsbuf, q, 188);
                        demux->tsbuf[0] = 0x47;
                        q = demux->tsbuf;
                }
                dvb_dmx_swfilter_packet(demux, q);
                p += pktsize;
        }

        i = count - p;
        if (i) {
                memcpy(demux->tsbuf, &buf[p], i);
                demux->tsbufp = i;
                if (pktsize == 204 && demux->tsbuf[0] == 0xB8)
                        demux->tsbuf[0] = 0x47;
        }

bailout:
        spin_unlock_irqrestore(&demux->lock, flags);
}

void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count)
{
        _dvb_dmx_swfilter(demux, buf, count, 188);
}
EXPORT_SYMBOL(dvb_dmx_swfilter);

void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count)
{
        _dvb_dmx_swfilter(demux, buf, count, 204);
}
EXPORT_SYMBOL(dvb_dmx_swfilter_204);

void dvb_dmx_swfilter_raw(struct dvb_demux *demux, const u8 *buf, size_t count)
{
        unsigned long flags;

        spin_lock_irqsave(&demux->lock, flags);

        demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts,
                           &demux->feed->buffer_flags);

        spin_unlock_irqrestore(&demux->lock, flags);
}
EXPORT_SYMBOL(dvb_dmx_swfilter_raw);

static struct dvb_demux_filter *dvb_dmx_filter_alloc(struct dvb_demux *demux)
{
        int i;

        for (i = 0; i < demux->filternum; i++)
                if (demux->filter[i].state == DMX_STATE_FREE)
                        break;

        if (i == demux->filternum)
                return NULL;

        demux->filter[i].state = DMX_STATE_ALLOCATED;

        return &demux->filter[i];
}

static struct dvb_demux_feed *dvb_dmx_feed_alloc(struct dvb_demux *demux)
{
        int i;

        for (i = 0; i < demux->feednum; i++)
                if (demux->feed[i].state == DMX_STATE_FREE)
                        break;

        if (i == demux->feednum)
                return NULL;

        demux->feed[i].state = DMX_STATE_ALLOCATED;

        return &demux->feed[i];
}

static int dvb_demux_feed_find(struct dvb_demux_feed *feed)
{
        struct dvb_demux_feed *entry;

        list_for_each_entry(entry, &feed->demux->feed_list, list_head)
                if (entry == feed)
                        return 1;

        return 0;
}

static void dvb_demux_feed_add(struct dvb_demux_feed *feed)
{
        spin_lock_irq(&feed->demux->lock);
        if (dvb_demux_feed_find(feed)) {
                pr_err("%s: feed already in list (type=%x state=%x pid=%x)\n",
                       __func__, feed->type, feed->state, feed->pid);
                goto out;
        }

        list_add(&feed->list_head, &feed->demux->feed_list);
out:
        spin_unlock_irq(&feed->demux->lock);
}

static void dvb_demux_feed_del(struct dvb_demux_feed *feed)
{
        spin_lock_irq(&feed->demux->lock);
        if (!(dvb_demux_feed_find(feed))) {
                pr_err("%s: feed not in list (type=%x state=%x pid=%x)\n",
                       __func__, feed->type, feed->state, feed->pid);
                goto out;
        }

        list_del(&feed->list_head);
out:
        spin_unlock_irq(&feed->demux->lock);
}

static int dmx_ts_feed_set(struct dmx_ts_feed *ts_feed, u16 pid, int ts_type,
                           enum dmx_ts_pes pes_type, ktime_t timeout)
{
        struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed;
        struct dvb_demux *demux = feed->demux;

        if (pid > DMX_MAX_PID)
                return -EINVAL;

        if (mutex_lock_interruptible(&demux->mutex))
                return -ERESTARTSYS;

        if (ts_type & TS_DECODER) {
                if (pes_type >= DMX_PES_OTHER) {
                        mutex_unlock(&demux->mutex);
                        return -EINVAL;
                }

                if (demux->pesfilter[pes_type] &&
                    demux->pesfilter[pes_type] != feed) {
                        mutex_unlock(&demux->mutex);
                        return -EINVAL;
                }

                demux->pesfilter[pes_type] = feed;
                demux->pids[pes_type] = pid;
        }

        dvb_demux_feed_add(feed);

        feed->pid = pid;
        feed->timeout = timeout;
        feed->ts_type = ts_type;
        feed->pes_type = pes_type;

        feed->state = DMX_STATE_READY;
        mutex_unlock(&demux->mutex);

        return 0;
}

static int dmx_ts_feed_start_filtering(struct dmx_ts_feed *ts_feed)
{
        struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed;
        struct dvb_demux *demux = feed->demux;
        int ret;

        if (mutex_lock_interruptible(&demux->mutex))
                return -ERESTARTSYS;

        if (feed->state != DMX_STATE_READY || feed->type != DMX_TYPE_TS) {
                mutex_unlock(&demux->mutex);
                return -EINVAL;
        }

        if (!demux->start_feed) {
                mutex_unlock(&demux->mutex);
                return -ENODEV;
        }

        if ((ret = demux->start_feed(feed)) < 0) {
                mutex_unlock(&demux->mutex);
                return ret;
        }

        spin_lock_irq(&demux->lock);
        ts_feed->is_filtering = 1;
        feed->state = DMX_STATE_GO;
        spin_unlock_irq(&demux->lock);
        mutex_unlock(&demux->mutex);

        return 0;
}

static int dmx_ts_feed_stop_filtering(struct dmx_ts_feed *ts_feed)
{
        struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed;
        struct dvb_demux *demux = feed->demux;
        int ret;

        mutex_lock(&demux->mutex);

        if (feed->state < DMX_STATE_GO) {
                mutex_unlock(&demux->mutex);
                return -EINVAL;
        }

        if (!demux->stop_feed) {
                mutex_unlock(&demux->mutex);
                return -ENODEV;
        }

        ret = demux->stop_feed(feed);

        spin_lock_irq(&demux->lock);
        ts_feed->is_filtering = 0;
        feed->state = DMX_STATE_ALLOCATED;
        spin_unlock_irq(&demux->lock);
        mutex_unlock(&demux->mutex);

        return ret;
}

static int dvbdmx_allocate_ts_feed(struct dmx_demux *dmx,
                                   struct dmx_ts_feed **ts_feed,
                                   dmx_ts_cb callback)
{
        struct dvb_demux *demux = (struct dvb_demux *)dmx;
        struct dvb_demux_feed *feed;

        if (mutex_lock_interruptible(&demux->mutex))
                return -ERESTARTSYS;

        if (!(feed = dvb_dmx_feed_alloc(demux))) {
                mutex_unlock(&demux->mutex);
                return -EBUSY;
        }

        feed->type = DMX_TYPE_TS;
        feed->cb.ts = callback;
        feed->demux = demux;
        feed->pid = 0xffff;
        feed->peslen = 0xfffa;
        feed->buffer_flags = 0;

        (*ts_feed) = &feed->feed.ts;
        (*ts_feed)->parent = dmx;
        (*ts_feed)->priv = NULL;
        (*ts_feed)->is_filtering = 0;
        (*ts_feed)->start_filtering = dmx_ts_feed_start_filtering;
        (*ts_feed)->stop_filtering = dmx_ts_feed_stop_filtering;
        (*ts_feed)->set = dmx_ts_feed_set;

        if (!(feed->filter = dvb_dmx_filter_alloc(demux))) {
                feed->state = DMX_STATE_FREE;
                mutex_unlock(&demux->mutex);
                return -EBUSY;
        }

        feed->filter->type = DMX_TYPE_TS;
        feed->filter->feed = feed;
        feed->filter->state = DMX_STATE_READY;

        mutex_unlock(&demux->mutex);

        return 0;
}

static int dvbdmx_release_ts_feed(struct dmx_demux *dmx,
                                  struct dmx_ts_feed *ts_feed)
{
        struct dvb_demux *demux = (struct dvb_demux *)dmx;
        struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed;

        mutex_lock(&demux->mutex);

        if (feed->state == DMX_STATE_FREE) {
                mutex_unlock(&demux->mutex);
                return -EINVAL;
        }

        feed->state = DMX_STATE_FREE;
        feed->filter->state = DMX_STATE_FREE;

        dvb_demux_feed_del(feed);

        feed->pid = 0xffff;

        if (feed->ts_type & TS_DECODER && feed->pes_type < DMX_PES_OTHER)
                demux->pesfilter[feed->pes_type] = NULL;

        mutex_unlock(&demux->mutex);
        return 0;
}

/******************************************************************************
 * dmx_section_feed API calls
 ******************************************************************************/

static int dmx_section_feed_allocate_filter(struct dmx_section_feed *feed,
                                            struct dmx_section_filter **filter)
{
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdemux = dvbdmxfeed->demux;
        struct dvb_demux_filter *dvbdmxfilter;

        if (mutex_lock_interruptible(&dvbdemux->mutex))
                return -ERESTARTSYS;

        dvbdmxfilter = dvb_dmx_filter_alloc(dvbdemux);
        if (!dvbdmxfilter) {
                mutex_unlock(&dvbdemux->mutex);
                return -EBUSY;
        }

        spin_lock_irq(&dvbdemux->lock);
        *filter = &dvbdmxfilter->filter;
        (*filter)->parent = feed;
        (*filter)->priv = NULL;
        dvbdmxfilter->feed = dvbdmxfeed;
        dvbdmxfilter->type = DMX_TYPE_SEC;
        dvbdmxfilter->state = DMX_STATE_READY;
        dvbdmxfilter->next = dvbdmxfeed->filter;
        dvbdmxfeed->filter = dvbdmxfilter;
        spin_unlock_irq(&dvbdemux->lock);

        mutex_unlock(&dvbdemux->mutex);
        return 0;
}

static int dmx_section_feed_set(struct dmx_section_feed *feed,
                                u16 pid, int check_crc)
{
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdmx = dvbdmxfeed->demux;

        if (pid > 0x1fff)
                return -EINVAL;

        if (mutex_lock_interruptible(&dvbdmx->mutex))
                return -ERESTARTSYS;

        dvb_demux_feed_add(dvbdmxfeed);

        dvbdmxfeed->pid = pid;
        dvbdmxfeed->feed.sec.check_crc = check_crc;

        dvbdmxfeed->state = DMX_STATE_READY;
        mutex_unlock(&dvbdmx->mutex);
        return 0;
}

static void prepare_secfilters(struct dvb_demux_feed *dvbdmxfeed)
{
        int i;
        struct dvb_demux_filter *f;
        struct dmx_section_filter *sf;
        u8 mask, mode, doneq;

        if (!(f = dvbdmxfeed->filter))
                return;
        do {
                sf = &f->filter;
                doneq = false;
                for (i = 0; i < DVB_DEMUX_MASK_MAX; i++) {
                        mode = sf->filter_mode[i];
                        mask = sf->filter_mask[i];
                        f->maskandmode[i] = mask & mode;
                        doneq |= f->maskandnotmode[i] = mask & ~mode;
                }
                f->doneq = doneq ? true : false;
        } while ((f = f->next));
}

static int dmx_section_feed_start_filtering(struct dmx_section_feed *feed)
{
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdmx = dvbdmxfeed->demux;
        int ret;

        if (mutex_lock_interruptible(&dvbdmx->mutex))
                return -ERESTARTSYS;

        if (feed->is_filtering) {
                mutex_unlock(&dvbdmx->mutex);
                return -EBUSY;
        }

        if (!dvbdmxfeed->filter) {
                mutex_unlock(&dvbdmx->mutex);
                return -EINVAL;
        }

        dvbdmxfeed->feed.sec.tsfeedp = 0;
        dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base;
        dvbdmxfeed->feed.sec.secbufp = 0;
        dvbdmxfeed->feed.sec.seclen = 0;
        dvbdmxfeed->pusi_seen = false;

        if (!dvbdmx->start_feed) {
                mutex_unlock(&dvbdmx->mutex);
                return -ENODEV;
        }

        prepare_secfilters(dvbdmxfeed);

        if ((ret = dvbdmx->start_feed(dvbdmxfeed)) < 0) {
                mutex_unlock(&dvbdmx->mutex);
                return ret;
        }

        spin_lock_irq(&dvbdmx->lock);
        feed->is_filtering = 1;
        dvbdmxfeed->state = DMX_STATE_GO;
        spin_unlock_irq(&dvbdmx->lock);

        mutex_unlock(&dvbdmx->mutex);
        return 0;
}

static int dmx_section_feed_stop_filtering(struct dmx_section_feed *feed)
{
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdmx = dvbdmxfeed->demux;
        int ret;

        mutex_lock(&dvbdmx->mutex);

        if (!dvbdmx->stop_feed) {
                mutex_unlock(&dvbdmx->mutex);
                return -ENODEV;
        }

        ret = dvbdmx->stop_feed(dvbdmxfeed);

        spin_lock_irq(&dvbdmx->lock);
        dvbdmxfeed->state = DMX_STATE_READY;
        feed->is_filtering = 0;
        spin_unlock_irq(&dvbdmx->lock);

        mutex_unlock(&dvbdmx->mutex);
        return ret;
}

static int dmx_section_feed_release_filter(struct dmx_section_feed *feed,
                                           struct dmx_section_filter *filter)
{
        struct dvb_demux_filter *dvbdmxfilter = (struct dvb_demux_filter *)filter, *f;
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdmx = dvbdmxfeed->demux;

        mutex_lock(&dvbdmx->mutex);

        if (dvbdmxfilter->feed != dvbdmxfeed) {
                mutex_unlock(&dvbdmx->mutex);
                return -EINVAL;
        }

        if (feed->is_filtering) {
                /* release dvbdmx->mutex as far as it is
                   acquired by stop_filtering() itself */
                mutex_unlock(&dvbdmx->mutex);
                feed->stop_filtering(feed);
                mutex_lock(&dvbdmx->mutex);
        }

        spin_lock_irq(&dvbdmx->lock);
        f = dvbdmxfeed->filter;

        if (f == dvbdmxfilter) {
                dvbdmxfeed->filter = dvbdmxfilter->next;
        } else {
                while (f->next != dvbdmxfilter)
                        f = f->next;
                f->next = f->next->next;
        }

        dvbdmxfilter->state = DMX_STATE_FREE;
        spin_unlock_irq(&dvbdmx->lock);
        mutex_unlock(&dvbdmx->mutex);
        return 0;
}

static int dvbdmx_allocate_section_feed(struct dmx_demux *demux,
                                        struct dmx_section_feed **feed,
                                        dmx_section_cb callback)
{
        struct dvb_demux *dvbdmx = (struct dvb_demux *)demux;
        struct dvb_demux_feed *dvbdmxfeed;

        if (mutex_lock_interruptible(&dvbdmx->mutex))
                return -ERESTARTSYS;

        if (!(dvbdmxfeed = dvb_dmx_feed_alloc(dvbdmx))) {
                mutex_unlock(&dvbdmx->mutex);
                return -EBUSY;
        }

        dvbdmxfeed->type = DMX_TYPE_SEC;
        dvbdmxfeed->cb.sec = callback;
        dvbdmxfeed->demux = dvbdmx;
        dvbdmxfeed->pid = 0xffff;
        dvbdmxfeed->buffer_flags = 0;
        dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base;
        dvbdmxfeed->feed.sec.secbufp = dvbdmxfeed->feed.sec.seclen = 0;
        dvbdmxfeed->feed.sec.tsfeedp = 0;
        dvbdmxfeed->filter = NULL;

        (*feed) = &dvbdmxfeed->feed.sec;
        (*feed)->is_filtering = 0;
        (*feed)->parent = demux;
        (*feed)->priv = NULL;

        (*feed)->set = dmx_section_feed_set;
        (*feed)->allocate_filter = dmx_section_feed_allocate_filter;
        (*feed)->start_filtering = dmx_section_feed_start_filtering;
        (*feed)->stop_filtering = dmx_section_feed_stop_filtering;
        (*feed)->release_filter = dmx_section_feed_release_filter;

        mutex_unlock(&dvbdmx->mutex);
        return 0;
}

static int dvbdmx_release_section_feed(struct dmx_demux *demux,
                                       struct dmx_section_feed *feed)
{
        struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed;
        struct dvb_demux *dvbdmx = (struct dvb_demux *)demux;

        mutex_lock(&dvbdmx->mutex);

        if (dvbdmxfeed->state == DMX_STATE_FREE) {
                mutex_unlock(&dvbdmx->mutex);
                return -EINVAL;
        }
        dvbdmxfeed->state = DMX_STATE_FREE;

        dvb_demux_feed_del(dvbdmxfeed);

        dvbdmxfeed->pid = 0xffff;

        mutex_unlock(&dvbdmx->mutex);
        return 0;
}

/******************************************************************************
 * dvb_demux kernel data API calls
 ******************************************************************************/

static int dvbdmx_open(struct dmx_demux *demux)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        if (dvbdemux->users >= MAX_DVB_DEMUX_USERS)
                return -EUSERS;

        dvbdemux->users++;
        return 0;
}

static int dvbdmx_close(struct dmx_demux *demux)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        if (dvbdemux->users == 0)
                return -ENODEV;

        dvbdemux->users--;
        //FIXME: release any unneeded resources if users==0
        return 0;
}

static int dvbdmx_write(struct dmx_demux *demux, const char __user *buf, size_t count)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;
        void *p;

        if ((!demux->frontend) || (demux->frontend->source != DMX_MEMORY_FE))
                return -EINVAL;

        p = memdup_user(buf, count);
        if (IS_ERR(p))
                return PTR_ERR(p);
        if (mutex_lock_interruptible(&dvbdemux->mutex)) {
                kfree(p);
                return -ERESTARTSYS;
        }
        dvb_dmx_swfilter(dvbdemux, p, count);
        kfree(p);
        mutex_unlock(&dvbdemux->mutex);

        if (signal_pending(current))
                return -EINTR;
        return count;
}

static int dvbdmx_add_frontend(struct dmx_demux *demux,
                               struct dmx_frontend *frontend)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;
        struct list_head *head = &dvbdemux->frontend_list;

        list_add(&(frontend->connectivity_list), head);

        return 0;
}

static int dvbdmx_remove_frontend(struct dmx_demux *demux,
                                  struct dmx_frontend *frontend)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;
        struct list_head *pos, *n, *head = &dvbdemux->frontend_list;

        list_for_each_safe(pos, n, head) {
                if (DMX_FE_ENTRY(pos) == frontend) {
                        list_del(pos);
                        return 0;
                }
        }

        return -ENODEV;
}

static struct list_head *dvbdmx_get_frontends(struct dmx_demux *demux)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        if (list_empty(&dvbdemux->frontend_list))
                return NULL;

        return &dvbdemux->frontend_list;
}

static int dvbdmx_connect_frontend(struct dmx_demux *demux,
                                   struct dmx_frontend *frontend)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        if (demux->frontend)
                return -EINVAL;

        mutex_lock(&dvbdemux->mutex);

        demux->frontend = frontend;
        mutex_unlock(&dvbdemux->mutex);
        return 0;
}

static int dvbdmx_disconnect_frontend(struct dmx_demux *demux)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        mutex_lock(&dvbdemux->mutex);

        demux->frontend = NULL;
        mutex_unlock(&dvbdemux->mutex);
        return 0;
}

static int dvbdmx_get_pes_pids(struct dmx_demux *demux, u16 * pids)
{
        struct dvb_demux *dvbdemux = (struct dvb_demux *)demux;

        memcpy(pids, dvbdemux->pids, 5 * sizeof(u16));
        return 0;
}

int dvb_dmx_init(struct dvb_demux *dvbdemux)
{
        int i;
        struct dmx_demux *dmx = &dvbdemux->dmx;

        dvbdemux->cnt_storage = NULL;
        dvbdemux->users = 0;
        dvbdemux->filter = vmalloc(array_size(sizeof(struct dvb_demux_filter),
                                              dvbdemux->filternum));

        if (!dvbdemux->filter)
                return -ENOMEM;

        dvbdemux->feed = vmalloc(array_size(sizeof(struct dvb_demux_feed),
                                            dvbdemux->feednum));
        if (!dvbdemux->feed) {
                vfree(dvbdemux->filter);
                dvbdemux->filter = NULL;
                return -ENOMEM;
        }
        for (i = 0; i < dvbdemux->filternum; i++) {
                dvbdemux->filter[i].state = DMX_STATE_FREE;
                dvbdemux->filter[i].index = i;
        }
        for (i = 0; i < dvbdemux->feednum; i++) {
                dvbdemux->feed[i].state = DMX_STATE_FREE;
                dvbdemux->feed[i].index = i;
        }

        dvbdemux->cnt_storage = vmalloc(MAX_PID + 1);
        if (!dvbdemux->cnt_storage)
                pr_warn("Couldn't allocate memory for TS/TEI check. Disabling it\n");

        INIT_LIST_HEAD(&dvbdemux->frontend_list);

        for (i = 0; i < DMX_PES_OTHER; i++) {
                dvbdemux->pesfilter[i] = NULL;
                dvbdemux->pids[i] = 0xffff;
        }

        INIT_LIST_HEAD(&dvbdemux->feed_list);

        dvbdemux->playing = 0;
        dvbdemux->recording = 0;
        dvbdemux->tsbufp = 0;

        if (!dvbdemux->check_crc32)
                dvbdemux->check_crc32 = dvb_dmx_crc32;

        if (!dvbdemux->memcopy)
                dvbdemux->memcopy = dvb_dmx_memcopy;

        dmx->frontend = NULL;
        dmx->priv = dvbdemux;
        dmx->open = dvbdmx_open;
        dmx->close = dvbdmx_close;
        dmx->write = dvbdmx_write;
        dmx->allocate_ts_feed = dvbdmx_allocate_ts_feed;
        dmx->release_ts_feed = dvbdmx_release_ts_feed;
        dmx->allocate_section_feed = dvbdmx_allocate_section_feed;
        dmx->release_section_feed = dvbdmx_release_section_feed;

        dmx->add_frontend = dvbdmx_add_frontend;
        dmx->remove_frontend = dvbdmx_remove_frontend;
        dmx->get_frontends = dvbdmx_get_frontends;
        dmx->connect_frontend = dvbdmx_connect_frontend;
        dmx->disconnect_frontend = dvbdmx_disconnect_frontend;
        dmx->get_pes_pids = dvbdmx_get_pes_pids;

        mutex_init(&dvbdemux->mutex);
        spin_lock_init(&dvbdemux->lock);

        return 0;
}

EXPORT_SYMBOL(dvb_dmx_init);

void dvb_dmx_release(struct dvb_demux *dvbdemux)
{
        vfree(dvbdemux->cnt_storage);
        vfree(dvbdemux->filter);
        vfree(dvbdemux->feed);
}

EXPORT_SYMBOL(dvb_dmx_release);





































































































































































































































































































































































































































































































































































































    8 



    8 

    6 
    8 








    8 

    8 







    8 








    1 









































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
// SPDX-License-Identifier: GPL-2.0-only
/*
 * LED Class Core
 *
 * Copyright (C) 2005 John Lenz <lenz@cs.wisc.edu>
 * Copyright (C) 2005-2007 Richard Purdie <rpurdie@openedhand.com>
 */

#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/leds.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/property.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <uapi/linux/uleds.h>
#include <linux/of.h>
#include "leds.h"

static DEFINE_MUTEX(leds_lookup_lock);
static LIST_HEAD(leds_lookup_list);

static ssize_t brightness_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);

        /* no lock needed for this */
        led_update_brightness(led_cdev);

        return sprintf(buf, "%u\n", led_cdev->brightness);
}

static ssize_t brightness_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t size)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        unsigned long state;
        ssize_t ret;

        mutex_lock(&led_cdev->led_access);

        if (led_sysfs_is_disabled(led_cdev)) {
                ret = -EBUSY;
                goto unlock;
        }

        ret = kstrtoul(buf, 10, &state);
        if (ret)
                goto unlock;

        if (state == LED_OFF)
                led_trigger_remove(led_cdev);
        led_set_brightness(led_cdev, state);
        flush_work(&led_cdev->set_brightness_work);

        ret = size;
unlock:
        mutex_unlock(&led_cdev->led_access);
        return ret;
}
static DEVICE_ATTR_RW(brightness);

static ssize_t max_brightness_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);

        return sprintf(buf, "%u\n", led_cdev->max_brightness);
}
static DEVICE_ATTR_RO(max_brightness);

#ifdef CONFIG_LEDS_TRIGGERS
static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0);
static struct bin_attribute *led_trigger_bin_attrs[] = {
        &bin_attr_trigger,
        NULL,
};
static const struct attribute_group led_trigger_group = {
        .bin_attrs = led_trigger_bin_attrs,
};
#endif

static struct attribute *led_class_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_max_brightness.attr,
        NULL,
};

static const struct attribute_group led_group = {
        .attrs = led_class_attrs,
};

static const struct attribute_group *led_groups[] = {
        &led_group,
#ifdef CONFIG_LEDS_TRIGGERS
        &led_trigger_group,
#endif
        NULL,
};

#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
static ssize_t brightness_hw_changed_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);

        if (led_cdev->brightness_hw_changed == -1)
                return -ENODATA;

        return sprintf(buf, "%u\n", led_cdev->brightness_hw_changed);
}

static DEVICE_ATTR_RO(brightness_hw_changed);

static int led_add_brightness_hw_changed(struct led_classdev *led_cdev)
{
        struct device *dev = led_cdev->dev;
        int ret;

        ret = device_create_file(dev, &dev_attr_brightness_hw_changed);
        if (ret) {
                dev_err(dev, "Error creating brightness_hw_changed\n");
                return ret;
        }

        led_cdev->brightness_hw_changed_kn =
                sysfs_get_dirent(dev->kobj.sd, "brightness_hw_changed");
        if (!led_cdev->brightness_hw_changed_kn) {
                dev_err(dev, "Error getting brightness_hw_changed kn\n");
                device_remove_file(dev, &dev_attr_brightness_hw_changed);
                return -ENXIO;
        }

        return 0;
}

static void led_remove_brightness_hw_changed(struct led_classdev *led_cdev)
{
        sysfs_put(led_cdev->brightness_hw_changed_kn);
        device_remove_file(led_cdev->dev, &dev_attr_brightness_hw_changed);
}

void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev, unsigned int brightness)
{
        if (WARN_ON(!led_cdev->brightness_hw_changed_kn))
                return;

        led_cdev->brightness_hw_changed = brightness;
        sysfs_notify_dirent(led_cdev->brightness_hw_changed_kn);
}
EXPORT_SYMBOL_GPL(led_classdev_notify_brightness_hw_changed);
#else
static int led_add_brightness_hw_changed(struct led_classdev *led_cdev)
{
        return 0;
}
static void led_remove_brightness_hw_changed(struct led_classdev *led_cdev)
{
}
#endif

/**
 * led_classdev_suspend - suspend an led_classdev.
 * @led_cdev: the led_classdev to suspend.
 */
void led_classdev_suspend(struct led_classdev *led_cdev)
{
        led_cdev->flags |= LED_SUSPENDED;
        led_set_brightness_nopm(led_cdev, 0);
        flush_work(&led_cdev->set_brightness_work);
}
EXPORT_SYMBOL_GPL(led_classdev_suspend);

/**
 * led_classdev_resume - resume an led_classdev.
 * @led_cdev: the led_classdev to resume.
 */
void led_classdev_resume(struct led_classdev *led_cdev)
{
        led_set_brightness_nopm(led_cdev, led_cdev->brightness);

        if (led_cdev->flash_resume)
                led_cdev->flash_resume(led_cdev);

        led_cdev->flags &= ~LED_SUSPENDED;
}
EXPORT_SYMBOL_GPL(led_classdev_resume);

#ifdef CONFIG_PM_SLEEP
static int led_suspend(struct device *dev)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);

        if (led_cdev->flags & LED_CORE_SUSPENDRESUME)
                led_classdev_suspend(led_cdev);

        return 0;
}

static int led_resume(struct device *dev)
{
        struct led_classdev *led_cdev = dev_get_drvdata(dev);

        if (led_cdev->flags & LED_CORE_SUSPENDRESUME)
                led_classdev_resume(led_cdev);

        return 0;
}
#endif

static SIMPLE_DEV_PM_OPS(leds_class_dev_pm_ops, led_suspend, led_resume);

static struct led_classdev *led_module_get(struct device *led_dev)
{
        struct led_classdev *led_cdev;

        if (!led_dev)
                return ERR_PTR(-EPROBE_DEFER);

        led_cdev = dev_get_drvdata(led_dev);

        if (!try_module_get(led_cdev->dev->parent->driver->owner)) {
                put_device(led_cdev->dev);
                return ERR_PTR(-ENODEV);
        }

        return led_cdev;
}

static const struct class leds_class = {
        .name = "leds",
        .dev_groups = led_groups,
        .pm = &leds_class_dev_pm_ops,
};

/**
 * of_led_get() - request a LED device via the LED framework
 * @np: device node to get the LED device from
 * @index: the index of the LED
 *
 * Returns the LED device parsed from the phandle specified in the "leds"
 * property of a device tree node or a negative error-code on failure.
 */
struct led_classdev *of_led_get(struct device_node *np, int index)
{
        struct device *led_dev;
        struct device_node *led_node;

        led_node = of_parse_phandle(np, "leds", index);
        if (!led_node)
                return ERR_PTR(-ENOENT);

        led_dev = class_find_device_by_of_node(&leds_class, led_node);
        of_node_put(led_node);
        put_device(led_dev);

        return led_module_get(led_dev);
}
EXPORT_SYMBOL_GPL(of_led_get);

/**
 * led_put() - release a LED device
 * @led_cdev: LED device
 */
void led_put(struct led_classdev *led_cdev)
{
        module_put(led_cdev->dev->parent->driver->owner);
        put_device(led_cdev->dev);
}
EXPORT_SYMBOL_GPL(led_put);

static void devm_led_release(struct device *dev, void *res)
{
        struct led_classdev **p = res;

        led_put(*p);
}

static struct led_classdev *__devm_led_get(struct device *dev, struct led_classdev *led)
{
        struct led_classdev **dr;

        dr = devres_alloc(devm_led_release, sizeof(struct led_classdev *), GFP_KERNEL);
        if (!dr) {
                led_put(led);
                return ERR_PTR(-ENOMEM);
        }

        *dr = led;
        devres_add(dev, dr);

        return led;
}

/**
 * devm_of_led_get - Resource-managed request of a LED device
 * @dev:        LED consumer
 * @index:        index of the LED to obtain in the consumer
 *
 * The device node of the device is parse to find the request LED device.
 * The LED device returned from this function is automatically released
 * on driver detach.
 *
 * @return a pointer to a LED device or ERR_PTR(errno) on failure.
 */
struct led_classdev *__must_check devm_of_led_get(struct device *dev,
                                                  int index)
{
        struct led_classdev *led;

        if (!dev)
                return ERR_PTR(-EINVAL);

        led = of_led_get(dev->of_node, index);
        if (IS_ERR(led))
                return led;

        return __devm_led_get(dev, led);
}
EXPORT_SYMBOL_GPL(devm_of_led_get);

/**
 * led_get() - request a LED device via the LED framework
 * @dev: device for which to get the LED device
 * @con_id: name of the LED from the device's point of view
 *
 * @return a pointer to a LED device or ERR_PTR(errno) on failure.
 */
struct led_classdev *led_get(struct device *dev, char *con_id)
{
        struct led_lookup_data *lookup;
        const char *provider = NULL;
        struct device *led_dev;

        mutex_lock(&leds_lookup_lock);
        list_for_each_entry(lookup, &leds_lookup_list, list) {
                if (!strcmp(lookup->dev_id, dev_name(dev)) &&
                    !strcmp(lookup->con_id, con_id)) {
                        provider = kstrdup_const(lookup->provider, GFP_KERNEL);
                        break;
                }
        }
        mutex_unlock(&leds_lookup_lock);

        if (!provider)
                return ERR_PTR(-ENOENT);

        led_dev = class_find_device_by_name(&leds_class, provider);
        kfree_const(provider);

        return led_module_get(led_dev);
}
EXPORT_SYMBOL_GPL(led_get);

/**
 * devm_led_get() - request a LED device via the LED framework
 * @dev: device for which to get the LED device
 * @con_id: name of the LED from the device's point of view
 *
 * The LED device returned from this function is automatically released
 * on driver detach.
 *
 * @return a pointer to a LED device or ERR_PTR(errno) on failure.
 */
struct led_classdev *devm_led_get(struct device *dev, char *con_id)
{
        struct led_classdev *led;

        led = led_get(dev, con_id);
        if (IS_ERR(led))
                return led;

        return __devm_led_get(dev, led);
}
EXPORT_SYMBOL_GPL(devm_led_get);

/**
 * led_add_lookup() - Add a LED lookup table entry
 * @led_lookup: the lookup table entry to add
 *
 * Add a LED lookup table entry. On systems without devicetree the lookup table
 * is used by led_get() to find LEDs.
 */
void led_add_lookup(struct led_lookup_data *led_lookup)
{
        mutex_lock(&leds_lookup_lock);
        list_add_tail(&led_lookup->list, &leds_lookup_list);
        mutex_unlock(&leds_lookup_lock);
}
EXPORT_SYMBOL_GPL(led_add_lookup);

/**
 * led_remove_lookup() - Remove a LED lookup table entry
 * @led_lookup: the lookup table entry to remove
 */
void led_remove_lookup(struct led_lookup_data *led_lookup)
{
        mutex_lock(&leds_lookup_lock);
        list_del(&led_lookup->list);
        mutex_unlock(&leds_lookup_lock);
}
EXPORT_SYMBOL_GPL(led_remove_lookup);

/**
 * devm_of_led_get_optional - Resource-managed request of an optional LED device
 * @dev:        LED consumer
 * @index:        index of the LED to obtain in the consumer
 *
 * The device node of the device is parsed to find the requested LED device.
 * The LED device returned from this function is automatically released
 * on driver detach.
 *
 * @return a pointer to a LED device, ERR_PTR(errno) on failure and NULL if the
 * led was not found.
 */
struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev,
                                                        int index)
{
        struct led_classdev *led;

        led = devm_of_led_get(dev, index);
        if (IS_ERR(led) && PTR_ERR(led) == -ENOENT)
                return NULL;

        return led;
}
EXPORT_SYMBOL_GPL(devm_of_led_get_optional);

static int led_classdev_next_name(const char *init_name, char *name,
                                  size_t len)
{
        unsigned int i = 0;
        int ret = 0;
        struct device *dev;

        strscpy(name, init_name, len);

        while ((ret < len) &&
               (dev = class_find_device_by_name(&leds_class, name))) {
                put_device(dev);
                ret = snprintf(name, len, "%s_%u", init_name, ++i);
        }

        if (ret >= len)
                return -ENOMEM;

        return i;
}

/**
 * led_classdev_register_ext - register a new object of led_classdev class
 *                               with init data.
 *
 * @parent: parent of LED device
 * @led_cdev: the led_classdev structure for this device.
 * @init_data: LED class device initialization data
 */
int led_classdev_register_ext(struct device *parent,
                              struct led_classdev *led_cdev,
                              struct led_init_data *init_data)
{
        char composed_name[LED_MAX_NAME_SIZE];
        char final_name[LED_MAX_NAME_SIZE];
        const char *proposed_name = composed_name;
        int ret;

        if (init_data) {
                if (init_data->devname_mandatory && !init_data->devicename) {
                        dev_err(parent, "Mandatory device name is missing");
                        return -EINVAL;
                }
                ret = led_compose_name(parent, init_data, composed_name);
                if (ret < 0)
                        return ret;

                if (init_data->fwnode) {
                        fwnode_property_read_string(init_data->fwnode,
                                "linux,default-trigger",
                                &led_cdev->default_trigger);

                        if (fwnode_property_present(init_data->fwnode,
                                                    "retain-state-shutdown"))
                                led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;

                        fwnode_property_read_u32(init_data->fwnode,
                                "max-brightness",
                                &led_cdev->max_brightness);

                        if (fwnode_property_present(init_data->fwnode, "color"))
                                fwnode_property_read_u32(init_data->fwnode, "color",
                                                         &led_cdev->color);
                }
        } else {
                proposed_name = led_cdev->name;
        }

        ret = led_classdev_next_name(proposed_name, final_name, sizeof(final_name));
        if (ret < 0)
                return ret;

        if (led_cdev->color >= LED_COLOR_ID_MAX)
                dev_warn(parent, "LED %s color identifier out of range\n", final_name);

        mutex_init(&led_cdev->led_access);
        mutex_lock(&led_cdev->led_access);
        led_cdev->dev = device_create_with_groups(&leds_class, parent, 0,
                                                  led_cdev, led_cdev->groups, "%s", final_name);
        if (IS_ERR(led_cdev->dev)) {
                mutex_unlock(&led_cdev->led_access);
                return PTR_ERR(led_cdev->dev);
        }
        if (init_data && init_data->fwnode)
                device_set_node(led_cdev->dev, init_data->fwnode);

        if (ret)
                dev_warn(parent, "Led %s renamed to %s due to name collision",
                                proposed_name, dev_name(led_cdev->dev));

        if (led_cdev->flags & LED_BRIGHT_HW_CHANGED) {
                ret = led_add_brightness_hw_changed(led_cdev);
                if (ret) {
                        device_unregister(led_cdev->dev);
                        led_cdev->dev = NULL;
                        mutex_unlock(&led_cdev->led_access);
                        return ret;
                }
        }

        led_cdev->work_flags = 0;
#ifdef CONFIG_LEDS_TRIGGERS
        init_rwsem(&led_cdev->trigger_lock);
#endif
#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
        led_cdev->brightness_hw_changed = -1;
#endif
        /* add to the list of leds */
        down_write(&leds_list_lock);
        list_add_tail(&led_cdev->node, &leds_list);
        up_write(&leds_list_lock);

        if (!led_cdev->max_brightness)
                led_cdev->max_brightness = LED_FULL;

        led_update_brightness(led_cdev);

        led_init_core(led_cdev);

#ifdef CONFIG_LEDS_TRIGGERS
        /*
         * If no default trigger was given and hw_control_trigger is set,
         * make it the default trigger.
         */
        if (!led_cdev->default_trigger && led_cdev->hw_control_trigger)
                led_cdev->default_trigger = led_cdev->hw_control_trigger;
        led_trigger_set_default(led_cdev);
#endif

        mutex_unlock(&led_cdev->led_access);

        dev_dbg(parent, "Registered led device: %s\n",
                        led_cdev->name);

        return 0;
}
EXPORT_SYMBOL_GPL(led_classdev_register_ext);

/**
 * led_classdev_unregister - unregisters a object of led_properties class.
 * @led_cdev: the led device to unregister
 *
 * Unregisters a previously registered via led_classdev_register object.
 */
void led_classdev_unregister(struct led_classdev *led_cdev)
{
        if (IS_ERR_OR_NULL(led_cdev->dev))
                return;

#ifdef CONFIG_LEDS_TRIGGERS
        down_write(&led_cdev->trigger_lock);
        if (led_cdev->trigger)
                led_trigger_set(led_cdev, NULL);
        up_write(&led_cdev->trigger_lock);
#endif

        led_cdev->flags |= LED_UNREGISTERING;

        /* Stop blinking */
        led_stop_software_blink(led_cdev);

        if (!(led_cdev->flags & LED_RETAIN_AT_SHUTDOWN))
                led_set_brightness(led_cdev, LED_OFF);

        flush_work(&led_cdev->set_brightness_work);

        if (led_cdev->flags & LED_BRIGHT_HW_CHANGED)
                led_remove_brightness_hw_changed(led_cdev);

        device_unregister(led_cdev->dev);

        down_write(&leds_list_lock);
        list_del(&led_cdev->node);
        up_write(&leds_list_lock);

        mutex_destroy(&led_cdev->led_access);
}
EXPORT_SYMBOL_GPL(led_classdev_unregister);

static void devm_led_classdev_release(struct device *dev, void *res)
{
        led_classdev_unregister(*(struct led_classdev **)res);
}

/**
 * devm_led_classdev_register_ext - resource managed led_classdev_register_ext()
 *
 * @parent: parent of LED device
 * @led_cdev: the led_classdev structure for this device.
 * @init_data: LED class device initialization data
 */
int devm_led_classdev_register_ext(struct device *parent,
                                   struct led_classdev *led_cdev,
                                   struct led_init_data *init_data)
{
        struct led_classdev **dr;
        int rc;

        dr = devres_alloc(devm_led_classdev_release, sizeof(*dr), GFP_KERNEL);
        if (!dr)
                return -ENOMEM;

        rc = led_classdev_register_ext(parent, led_cdev, init_data);
        if (rc) {
                devres_free(dr);
                return rc;
        }

        *dr = led_cdev;
        devres_add(parent, dr);

        return 0;
}
EXPORT_SYMBOL_GPL(devm_led_classdev_register_ext);

static int devm_led_classdev_match(struct device *dev, void *res, void *data)
{
        struct led_classdev **p = res;

        if (WARN_ON(!p || !*p))
                return 0;

        return *p == data;
}

/**
 * devm_led_classdev_unregister() - resource managed led_classdev_unregister()
 * @dev: The device to unregister.
 * @led_cdev: the led_classdev structure for this device.
 */
void devm_led_classdev_unregister(struct device *dev,
                                  struct led_classdev *led_cdev)
{
        WARN_ON(devres_release(dev,
                               devm_led_classdev_release,
                               devm_led_classdev_match, led_cdev));
}
EXPORT_SYMBOL_GPL(devm_led_classdev_unregister);

static int __init leds_init(void)
{
        return class_register(&leds_class);
}

static void __exit leds_exit(void)
{
        class_unregister(&leds_class);
}

subsys_initcall(leds_init);
module_exit(leds_exit);

MODULE_AUTHOR("John Lenz, Richard Purdie");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LED Class Interface");

























































































  262 


































































































  251 




  167 




  243 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// SPDX-License-Identifier: GPL-2.0
/*
 * kobject.h - generic kernel object infrastructure.
 *
 * Copyright (c) 2002-2003 Patrick Mochel
 * Copyright (c) 2002-2003 Open Source Development Labs
 * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (c) 2006-2008 Novell Inc.
 *
 * Please read Documentation/core-api/kobject.rst before using the kobject
 * interface, ESPECIALLY the parts about reference counts and object
 * destructors.
 */

#ifndef _KOBJECT_H_
#define _KOBJECT_H_

#include <linux/types.h>
#include <linux/list.h>
#include <linux/sysfs.h>
#include <linux/compiler.h>
#include <linux/container_of.h>
#include <linux/spinlock.h>
#include <linux/kref.h>
#include <linux/kobject_ns.h>
#include <linux/wait.h>
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/uidgid.h>

#define UEVENT_HELPER_PATH_LEN                256
#define UEVENT_NUM_ENVP                        64        /* number of env pointers */
#define UEVENT_BUFFER_SIZE                2048        /* buffer for the variables */

#ifdef CONFIG_UEVENT_HELPER
/* path to the userspace helper executed on an event */
extern char uevent_helper[];
#endif

/* counter to tag the uevent, read only except for the kobject core */
extern atomic64_t uevent_seqnum;

/*
 * The actions here must match the index to the string array
 * in lib/kobject_uevent.c
 *
 * Do not add new actions here without checking with the driver-core
 * maintainers. Action strings are not meant to express subsystem
 * or device specific properties. In most cases you want to send a
 * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event
 * specific variables added to the event environment.
 */
enum kobject_action {
        KOBJ_ADD,
        KOBJ_REMOVE,
        KOBJ_CHANGE,
        KOBJ_MOVE,
        KOBJ_ONLINE,
        KOBJ_OFFLINE,
        KOBJ_BIND,
        KOBJ_UNBIND,
};

struct kobject {
        const char                *name;
        struct list_head        entry;
        struct kobject                *parent;
        struct kset                *kset;
        const struct kobj_type        *ktype;
        struct kernfs_node        *sd; /* sysfs directory entry */
        struct kref                kref;

        unsigned int state_initialized:1;
        unsigned int state_in_sysfs:1;
        unsigned int state_add_uevent_sent:1;
        unsigned int state_remove_uevent_sent:1;
        unsigned int uevent_suppress:1;

#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
        struct delayed_work        release;
#endif
};

__printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...);
__printf(2, 0) int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs);

static inline const char *kobject_name(const struct kobject *kobj)
{
        return kobj->name;
}

void kobject_init(struct kobject *kobj, const struct kobj_type *ktype);
__printf(3, 4) __must_check int kobject_add(struct kobject *kobj,
                                            struct kobject *parent,
                                            const char *fmt, ...);
__printf(4, 5) __must_check int kobject_init_and_add(struct kobject *kobj,
                                                     const struct kobj_type *ktype,
                                                     struct kobject *parent,
                                                     const char *fmt, ...);

void kobject_del(struct kobject *kobj);

struct kobject * __must_check kobject_create_and_add(const char *name, struct kobject *parent);

int __must_check kobject_rename(struct kobject *, const char *new_name);
int __must_check kobject_move(struct kobject *, struct kobject *);

struct kobject *kobject_get(struct kobject *kobj);
struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj);
void kobject_put(struct kobject *kobj);

const void *kobject_namespace(const struct kobject *kobj);
void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid);
char *kobject_get_path(const struct kobject *kobj, gfp_t flag);

struct kobj_type {
        void (*release)(struct kobject *kobj);
        const struct sysfs_ops *sysfs_ops;
        const struct attribute_group **default_groups;
        const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj);
        const void *(*namespace)(const struct kobject *kobj);
        void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid);
};

struct kobj_uevent_env {
        char *argv[3];
        char *envp[UEVENT_NUM_ENVP];
        int envp_idx;
        char buf[UEVENT_BUFFER_SIZE];
        int buflen;
};

struct kset_uevent_ops {
        int (* const filter)(const struct kobject *kobj);
        const char *(* const name)(const struct kobject *kobj);
        int (* const uevent)(const struct kobject *kobj, struct kobj_uevent_env *env);
};

struct kobj_attribute {
        struct attribute attr;
        ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
                        char *buf);
        ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
                         const char *buf, size_t count);
};

extern const struct sysfs_ops kobj_sysfs_ops;

struct sock;

/**
 * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
 *
 * A kset defines a group of kobjects.  They can be individually
 * different "types" but overall these kobjects all want to be grouped
 * together and operated on in the same manner.  ksets are used to
 * define the attribute callbacks and other common events that happen to
 * a kobject.
 *
 * @list: the list of all kobjects for this kset
 * @list_lock: a lock for iterating over the kobjects
 * @kobj: the embedded kobject for this kset (recursion, isn't it fun...)
 * @uevent_ops: the set of uevent operations for this kset.  These are
 * called whenever a kobject has something happen to it so that the kset
 * can add new environment variables, or filter out the uevents if so
 * desired.
 */
struct kset {
        struct list_head list;
        spinlock_t list_lock;
        struct kobject kobj;
        const struct kset_uevent_ops *uevent_ops;
} __randomize_layout;

void kset_init(struct kset *kset);
int __must_check kset_register(struct kset *kset);
void kset_unregister(struct kset *kset);
struct kset * __must_check kset_create_and_add(const char *name, const struct kset_uevent_ops *u,
                                               struct kobject *parent_kobj);

static inline struct kset *to_kset(struct kobject *kobj)
{
        return kobj ? container_of(kobj, struct kset, kobj) : NULL;
}

static inline struct kset *kset_get(struct kset *k)
{
        return k ? to_kset(kobject_get(&k->kobj)) : NULL;
}

static inline void kset_put(struct kset *k)
{
        kobject_put(&k->kobj);
}

static inline const struct kobj_type *get_ktype(const struct kobject *kobj)
{
        return kobj->ktype;
}

struct kobject *kset_find_obj(struct kset *, const char *);

/* The global /sys/kernel/ kobject for people to chain off of */
extern struct kobject *kernel_kobj;
/* The global /sys/kernel/mm/ kobject for people to chain off of */
extern struct kobject *mm_kobj;
/* The global /sys/hypervisor/ kobject for people to chain off of */
extern struct kobject *hypervisor_kobj;
/* The global /sys/power/ kobject for people to chain off of */
extern struct kobject *power_kobj;
/* The global /sys/firmware/ kobject for people to chain off of */
extern struct kobject *firmware_kobj;

int kobject_uevent(struct kobject *kobj, enum kobject_action action);
int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                        char *envp[]);
int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count);

__printf(2, 3)
int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...);

#endif /* _KOBJECT_H_ */




















































































































































































    6 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// SPDX-License-Identifier: GPL-2.0-only
/*
 * HID driver for CMedia CM6533 audio jack controls
 * and HS100B mute buttons
 *
 * Copyright (C) 2015 Ben Chen <ben_chen@bizlinktech.com>
 * Copyright (C) 2021 Thomas Weißschuh <linux@weissschuh.net>
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include "hid-ids.h"

MODULE_AUTHOR("Ben Chen");
MODULE_AUTHOR("Thomas Weißschuh");
MODULE_DESCRIPTION("CM6533 HID jack controls and HS100B mute button");
MODULE_LICENSE("GPL");

#define CM6533_JD_TYPE_COUNT      1
#define CM6533_JD_RAWEV_LEN         16
#define CM6533_JD_SFX_OFFSET          8

#define HS100B_RDESC_ORIG_SIZE   60

/* Fixed report descriptor of HS-100B audio chip
 * Bit 4 is an abolute Microphone mute usage instead of being unassigned.
 */
static __u8 hs100b_rdesc_fixed[] = {
        0x05, 0x0C,         /*  Usage Page (Consumer),          */
        0x09, 0x01,         /*  Usage (Consumer Control),       */
        0xA1, 0x01,         /*  Collection (Application),       */
        0x15, 0x00,         /*      Logical Minimum (0),        */
        0x25, 0x01,         /*      Logical Maximum (1),        */
        0x09, 0xE9,         /*      Usage (Volume Inc),         */
        0x09, 0xEA,         /*      Usage (Volume Dec),         */
        0x75, 0x01,         /*      Report Size (1),            */
        0x95, 0x02,         /*      Report Count (2),           */
        0x81, 0x02,         /*      Input (Variable),           */
        0x09, 0xE2,         /*      Usage (Mute),               */
        0x95, 0x01,         /*      Report Count (1),           */
        0x81, 0x06,         /*      Input (Variable, Relative), */
        0x05, 0x0B,         /*      Usage Page (Telephony),     */
        0x09, 0x2F,         /*      Usage (2Fh),                */
        0x81, 0x02,         /*      Input (Variable),           */
        0x09, 0x20,         /*      Usage (20h),                */
        0x81, 0x06,         /*      Input (Variable, Relative), */
        0x05, 0x0C,         /*      Usage Page (Consumer),      */
        0x09, 0x00,         /*      Usage (00h),                */
        0x95, 0x03,         /*      Report Count (3),           */
        0x81, 0x02,         /*      Input (Variable),           */
        0x26, 0xFF, 0x00,   /*      Logical Maximum (255),      */
        0x09, 0x00,         /*      Usage (00h),                */
        0x75, 0x08,         /*      Report Size (8),            */
        0x95, 0x03,         /*      Report Count (3),           */
        0x81, 0x02,         /*      Input (Variable),           */
        0x09, 0x00,         /*      Usage (00h),                */
        0x95, 0x04,         /*      Report Count (4),           */
        0x91, 0x02,         /*      Output (Variable),          */
        0xC0                /*  End Collection                  */
};

/*
*
*CM6533 audio jack HID raw events:
*
*Plug in:
*01000600 002083xx 080008c0 10000000
*about 3 seconds later...
*01000a00 002083xx 08000380 10000000
*01000600 002083xx 08000380 10000000
*
*Plug out:
*01000400 002083xx 080008c0 x0000000
*/

static const u8 ji_sfx[] = { 0x08, 0x00, 0x08, 0xc0 };
static const u8 ji_in[]  = { 0x01, 0x00, 0x06, 0x00 };
static const u8 ji_out[] = { 0x01, 0x00, 0x04, 0x00 };

static int jack_switch_types[CM6533_JD_TYPE_COUNT] = {
        SW_HEADPHONE_INSERT,
};

struct cmhid {
        struct input_dev *input_dev;
        struct hid_device *hid;
        unsigned short switch_map[CM6533_JD_TYPE_COUNT];
};

static void hp_ev(struct hid_device *hid, struct cmhid *cm, int value)
{
        input_report_switch(cm->input_dev, SW_HEADPHONE_INSERT, value);
        input_sync(cm->input_dev);
}

static int cmhid_raw_event(struct hid_device *hid, struct hid_report *report,
         u8 *data, int len)
{
        struct cmhid *cm = hid_get_drvdata(hid);

        if (len != CM6533_JD_RAWEV_LEN)
                goto out;
        if (memcmp(data+CM6533_JD_SFX_OFFSET, ji_sfx, sizeof(ji_sfx)))
                goto out;

        if (!memcmp(data, ji_out, sizeof(ji_out))) {
                hp_ev(hid, cm, 0);
                goto out;
        }
        if (!memcmp(data, ji_in, sizeof(ji_in))) {
                hp_ev(hid, cm, 1);
                goto out;
        }

out:
        return 0;
}

static int cmhid_input_configured(struct hid_device *hid,
                struct hid_input *hidinput)
{
        struct input_dev *input_dev = hidinput->input;
        struct cmhid *cm = hid_get_drvdata(hid);
        int i;

        cm->input_dev = input_dev;
        memcpy(cm->switch_map, jack_switch_types, sizeof(cm->switch_map));
        input_dev->evbit[0] = BIT(EV_SW);
        for (i = 0; i < CM6533_JD_TYPE_COUNT; i++)
                input_set_capability(cm->input_dev,
                                EV_SW, jack_switch_types[i]);
        return 0;
}

static int cmhid_input_mapping(struct hid_device *hid,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        return -1;
}

static int cmhid_probe(struct hid_device *hid, const struct hid_device_id *id)
{
        int ret;
        struct cmhid *cm;

        cm = kzalloc(sizeof(struct cmhid), GFP_KERNEL);
        if (!cm) {
                ret = -ENOMEM;
                goto allocfail;
        }

        cm->hid = hid;

        hid->quirks |= HID_QUIRK_HIDINPUT_FORCE;
        hid_set_drvdata(hid, cm);

        ret = hid_parse(hid);
        if (ret) {
                hid_err(hid, "parse failed\n");
                goto fail;
        }

        ret = hid_hw_start(hid, HID_CONNECT_DEFAULT | HID_CONNECT_HIDDEV_FORCE);
        if (ret) {
                hid_err(hid, "hw start failed\n");
                goto fail;
        }

        return 0;
fail:
        kfree(cm);
allocfail:
        return ret;
}

static void cmhid_remove(struct hid_device *hid)
{
        struct cmhid *cm = hid_get_drvdata(hid);

        hid_hw_stop(hid);
        kfree(cm);
}

static const struct hid_device_id cmhid_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
        { }
};
MODULE_DEVICE_TABLE(hid, cmhid_devices);

static struct hid_driver cmhid_driver = {
        .name = "cm6533_jd",
        .id_table = cmhid_devices,
        .raw_event = cmhid_raw_event,
        .input_configured = cmhid_input_configured,
        .probe = cmhid_probe,
        .remove = cmhid_remove,
        .input_mapping = cmhid_input_mapping,
};

static __u8 *cmhid_hs100b_report_fixup(struct hid_device *hid, __u8 *rdesc,
                                       unsigned int *rsize)
{
        if (*rsize == HS100B_RDESC_ORIG_SIZE) {
                hid_info(hid, "Fixing CMedia HS-100B report descriptor\n");
                rdesc = hs100b_rdesc_fixed;
                *rsize = sizeof(hs100b_rdesc_fixed);
        }
        return rdesc;
}

static const struct hid_device_id cmhid_hs100b_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CMEDIA_HS100B) },
        { }
};
MODULE_DEVICE_TABLE(hid, cmhid_hs100b_devices);

static struct hid_driver cmhid_hs100b_driver = {
        .name = "cmedia_hs100b",
        .id_table = cmhid_hs100b_devices,
        .report_fixup = cmhid_hs100b_report_fixup,
};

static int cmedia_init(void)
{
        int ret;

        ret = hid_register_driver(&cmhid_driver);
        if (ret)
                return ret;

        ret = hid_register_driver(&cmhid_hs100b_driver);
        if (ret)
                hid_unregister_driver(&cmhid_driver);

        return ret;
}
module_init(cmedia_init);

static void cmedia_exit(void)
{
                hid_unregister_driver(&cmhid_driver);
                hid_unregister_driver(&cmhid_hs100b_driver);
}
module_exit(cmedia_exit);









































































   13 

   13 


































































































































































































































































































































































































































































































































































































































































































































































   13 

   13 



   13 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
/*
 * Copyright (C) 2017-2018 Netronome Systems, Inc.
 *
 * This software is licensed under the GNU General License Version 2,
 * June 1991 as shown in the file COPYING in the top-level directory of this
 * source tree.
 *
 * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
 * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
 * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
 * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
 */

#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/bug.h>
#include <linux/kdev_t.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/proc_ns.h>
#include <linux/rhashtable.h>
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>
#include <net/xdp.h>

/* Protects offdevs, members of bpf_offload_netdev and offload members
 * of all progs.
 * RTNL lock cannot be taken when holding this lock.
 */
static DECLARE_RWSEM(bpf_devs_lock);

struct bpf_offload_dev {
        const struct bpf_prog_offload_ops *ops;
        struct list_head netdevs;
        void *priv;
};

struct bpf_offload_netdev {
        struct rhash_head l;
        struct net_device *netdev;
        struct bpf_offload_dev *offdev; /* NULL when bound-only */
        struct list_head progs;
        struct list_head maps;
        struct list_head offdev_netdevs;
};

static const struct rhashtable_params offdevs_params = {
        .nelem_hint                = 4,
        .key_len                = sizeof(struct net_device *),
        .key_offset                = offsetof(struct bpf_offload_netdev, netdev),
        .head_offset                = offsetof(struct bpf_offload_netdev, l),
        .automatic_shrinking        = true,
};

static struct rhashtable offdevs;

static int bpf_dev_offload_check(struct net_device *netdev)
{
        if (!netdev)
                return -EINVAL;
        if (!netdev->netdev_ops->ndo_bpf)
                return -EOPNOTSUPP;
        return 0;
}

static struct bpf_offload_netdev *
bpf_offload_find_netdev(struct net_device *netdev)
{
        lockdep_assert_held(&bpf_devs_lock);

        return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
}

static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
                                             struct net_device *netdev)
{
        struct bpf_offload_netdev *ondev;
        int err;

        ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
        if (!ondev)
                return -ENOMEM;

        ondev->netdev = netdev;
        ondev->offdev = offdev;
        INIT_LIST_HEAD(&ondev->progs);
        INIT_LIST_HEAD(&ondev->maps);

        err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
        if (err) {
                netdev_warn(netdev, "failed to register for BPF offload\n");
                goto err_free;
        }

        if (offdev)
                list_add(&ondev->offdev_netdevs, &offdev->netdevs);
        return 0;

err_free:
        kfree(ondev);
        return err;
}

static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
{
        struct bpf_prog_offload *offload = prog->aux->offload;

        if (offload->dev_state)
                offload->offdev->ops->destroy(prog);

        list_del_init(&offload->offloads);
        kfree(offload);
        prog->aux->offload = NULL;
}

static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
                               enum bpf_netdev_command cmd)
{
        struct netdev_bpf data = {};
        struct net_device *netdev;

        ASSERT_RTNL();

        data.command = cmd;
        data.offmap = offmap;
        /* Caller must make sure netdev is valid */
        netdev = offmap->netdev;

        return netdev->netdev_ops->ndo_bpf(netdev, &data);
}

static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
{
        WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
        /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
        bpf_map_free_id(&offmap->map);
        list_del_init(&offmap->offloads);
        offmap->netdev = NULL;
}

static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
                                                struct net_device *netdev)
{
        struct bpf_offload_netdev *ondev, *altdev = NULL;
        struct bpf_offloaded_map *offmap, *mtmp;
        struct bpf_prog_offload *offload, *ptmp;

        ASSERT_RTNL();

        ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
        if (WARN_ON(!ondev))
                return;

        WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));

        /* Try to move the objects to another netdev of the device */
        if (offdev) {
                list_del(&ondev->offdev_netdevs);
                altdev = list_first_entry_or_null(&offdev->netdevs,
                                                  struct bpf_offload_netdev,
                                                  offdev_netdevs);
        }

        if (altdev) {
                list_for_each_entry(offload, &ondev->progs, offloads)
                        offload->netdev = altdev->netdev;
                list_splice_init(&ondev->progs, &altdev->progs);

                list_for_each_entry(offmap, &ondev->maps, offloads)
                        offmap->netdev = altdev->netdev;
                list_splice_init(&ondev->maps, &altdev->maps);
        } else {
                list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
                        __bpf_prog_offload_destroy(offload->prog);
                list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
                        __bpf_map_offload_destroy(offmap);
        }

        WARN_ON(!list_empty(&ondev->progs));
        WARN_ON(!list_empty(&ondev->maps));
        kfree(ondev);
}

static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *netdev)
{
        struct bpf_offload_netdev *ondev;
        struct bpf_prog_offload *offload;
        int err;

        offload = kzalloc(sizeof(*offload), GFP_USER);
        if (!offload)
                return -ENOMEM;

        offload->prog = prog;
        offload->netdev = netdev;

        ondev = bpf_offload_find_netdev(offload->netdev);
        /* When program is offloaded require presence of "true"
         * bpf_offload_netdev, avoid the one created for !ondev case below.
         */
        if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) {
                err = -EINVAL;
                goto err_free;
        }
        if (!ondev) {
                /* When only binding to the device, explicitly
                 * create an entry in the hashtable.
                 */
                err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
                if (err)
                        goto err_free;
                ondev = bpf_offload_find_netdev(offload->netdev);
        }
        offload->offdev = ondev->offdev;
        prog->aux->offload = offload;
        list_add_tail(&offload->offloads, &ondev->progs);

        return 0;
err_free:
        kfree(offload);
        return err;
}

int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
{
        struct net_device *netdev;
        int err;

        if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
            attr->prog_type != BPF_PROG_TYPE_XDP)
                return -EINVAL;

        if (attr->prog_flags & ~(BPF_F_XDP_DEV_BOUND_ONLY | BPF_F_XDP_HAS_FRAGS))
                return -EINVAL;

        /* Frags are allowed only if program is dev-bound-only, but not
         * if it is requesting bpf offload.
         */
        if (attr->prog_flags & BPF_F_XDP_HAS_FRAGS &&
            !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY))
                return -EINVAL;

        if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
            attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
                return -EINVAL;

        netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex);
        if (!netdev)
                return -EINVAL;

        err = bpf_dev_offload_check(netdev);
        if (err)
                goto out;

        prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);

        down_write(&bpf_devs_lock);
        err = __bpf_prog_dev_bound_init(prog, netdev);
        up_write(&bpf_devs_lock);

out:
        dev_put(netdev);
        return err;
}

int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog)
{
        int err;

        if (!bpf_prog_is_dev_bound(old_prog->aux))
                return 0;

        if (bpf_prog_is_offloaded(old_prog->aux))
                return -EINVAL;

        new_prog->aux->dev_bound = old_prog->aux->dev_bound;
        new_prog->aux->offload_requested = old_prog->aux->offload_requested;

        down_write(&bpf_devs_lock);
        if (!old_prog->aux->offload) {
                err = -EINVAL;
                goto out;
        }

        err = __bpf_prog_dev_bound_init(new_prog, old_prog->aux->offload->netdev);

out:
        up_write(&bpf_devs_lock);
        return err;
}

int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
{
        struct bpf_prog_offload *offload;
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        offload = prog->aux->offload;
        if (offload) {
                ret = offload->offdev->ops->prepare(prog);
                offload->dev_state = !ret;
        }
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
                                 int insn_idx, int prev_insn_idx)
{
        struct bpf_prog_offload *offload;
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        offload = env->prog->aux->offload;
        if (offload)
                ret = offload->offdev->ops->insn_hook(env, insn_idx,
                                                      prev_insn_idx);
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
{
        struct bpf_prog_offload *offload;
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        offload = env->prog->aux->offload;
        if (offload) {
                if (offload->offdev->ops->finalize)
                        ret = offload->offdev->ops->finalize(env);
                else
                        ret = 0;
        }
        up_read(&bpf_devs_lock);

        return ret;
}

void
bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
                              struct bpf_insn *insn)
{
        const struct bpf_prog_offload_ops *ops;
        struct bpf_prog_offload *offload;
        int ret = -EOPNOTSUPP;

        down_read(&bpf_devs_lock);
        offload = env->prog->aux->offload;
        if (offload) {
                ops = offload->offdev->ops;
                if (!offload->opt_failed && ops->replace_insn)
                        ret = ops->replace_insn(env, off, insn);
                offload->opt_failed |= ret;
        }
        up_read(&bpf_devs_lock);
}

void
bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
{
        struct bpf_prog_offload *offload;
        int ret = -EOPNOTSUPP;

        down_read(&bpf_devs_lock);
        offload = env->prog->aux->offload;
        if (offload) {
                if (!offload->opt_failed && offload->offdev->ops->remove_insns)
                        ret = offload->offdev->ops->remove_insns(env, off, cnt);
                offload->opt_failed |= ret;
        }
        up_read(&bpf_devs_lock);
}

void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
{
        struct bpf_offload_netdev *ondev;
        struct net_device *netdev;

        rtnl_lock();
        down_write(&bpf_devs_lock);
        if (prog->aux->offload) {
                list_del_init(&prog->aux->offload->offloads);

                netdev = prog->aux->offload->netdev;
                __bpf_prog_offload_destroy(prog);

                ondev = bpf_offload_find_netdev(netdev);
                if (!ondev->offdev && list_empty(&ondev->progs))
                        __bpf_offload_dev_netdev_unregister(NULL, netdev);
        }
        up_write(&bpf_devs_lock);
        rtnl_unlock();
}

static int bpf_prog_offload_translate(struct bpf_prog *prog)
{
        struct bpf_prog_offload *offload;
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        offload = prog->aux->offload;
        if (offload)
                ret = offload->offdev->ops->translate(prog);
        up_read(&bpf_devs_lock);

        return ret;
}

static unsigned int bpf_prog_warn_on_exec(const void *ctx,
                                          const struct bpf_insn *insn)
{
        WARN(1, "attempt to execute device eBPF program on the host!");
        return 0;
}

int bpf_prog_offload_compile(struct bpf_prog *prog)
{
        prog->bpf_func = bpf_prog_warn_on_exec;

        return bpf_prog_offload_translate(prog);
}

struct ns_get_path_bpf_prog_args {
        struct bpf_prog *prog;
        struct bpf_prog_info *info;
};

static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data)
{
        struct ns_get_path_bpf_prog_args *args = private_data;
        struct bpf_prog_aux *aux = args->prog->aux;
        struct ns_common *ns;
        struct net *net;

        rtnl_lock();
        down_read(&bpf_devs_lock);

        if (aux->offload) {
                args->info->ifindex = aux->offload->netdev->ifindex;
                net = dev_net(aux->offload->netdev);
                get_net(net);
                ns = &net->ns;
        } else {
                args->info->ifindex = 0;
                ns = NULL;
        }

        up_read(&bpf_devs_lock);
        rtnl_unlock();

        return ns;
}

int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
                               struct bpf_prog *prog)
{
        struct ns_get_path_bpf_prog_args args = {
                .prog        = prog,
                .info        = info,
        };
        struct bpf_prog_aux *aux = prog->aux;
        struct inode *ns_inode;
        struct path ns_path;
        char __user *uinsns;
        int res;
        u32 ulen;

        res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
        if (res) {
                if (!info->ifindex)
                        return -ENODEV;
                return res;
        }

        down_read(&bpf_devs_lock);

        if (!aux->offload) {
                up_read(&bpf_devs_lock);
                return -ENODEV;
        }

        ulen = info->jited_prog_len;
        info->jited_prog_len = aux->offload->jited_len;
        if (info->jited_prog_len && ulen) {
                uinsns = u64_to_user_ptr(info->jited_prog_insns);
                ulen = min_t(u32, info->jited_prog_len, ulen);
                if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
                        up_read(&bpf_devs_lock);
                        return -EFAULT;
                }
        }

        up_read(&bpf_devs_lock);

        ns_inode = ns_path.dentry->d_inode;
        info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
        info->netns_ino = ns_inode->i_ino;
        path_put(&ns_path);

        return 0;
}

const struct bpf_prog_ops bpf_offload_prog_ops = {
};

struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
{
        struct net *net = current->nsproxy->net_ns;
        struct bpf_offload_netdev *ondev;
        struct bpf_offloaded_map *offmap;
        int err;

        if (!capable(CAP_SYS_ADMIN))
                return ERR_PTR(-EPERM);
        if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
            attr->map_type != BPF_MAP_TYPE_HASH)
                return ERR_PTR(-EINVAL);

        offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE);
        if (!offmap)
                return ERR_PTR(-ENOMEM);

        bpf_map_init_from_attr(&offmap->map, attr);

        rtnl_lock();
        down_write(&bpf_devs_lock);
        offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
        err = bpf_dev_offload_check(offmap->netdev);
        if (err)
                goto err_unlock;

        ondev = bpf_offload_find_netdev(offmap->netdev);
        if (!ondev) {
                err = -EINVAL;
                goto err_unlock;
        }

        err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
        if (err)
                goto err_unlock;

        list_add_tail(&offmap->offloads, &ondev->maps);
        up_write(&bpf_devs_lock);
        rtnl_unlock();

        return &offmap->map;

err_unlock:
        up_write(&bpf_devs_lock);
        rtnl_unlock();
        bpf_map_area_free(offmap);
        return ERR_PTR(err);
}

void bpf_map_offload_map_free(struct bpf_map *map)
{
        struct bpf_offloaded_map *offmap = map_to_offmap(map);

        rtnl_lock();
        down_write(&bpf_devs_lock);
        if (offmap->netdev)
                __bpf_map_offload_destroy(offmap);
        up_write(&bpf_devs_lock);
        rtnl_unlock();

        bpf_map_area_free(offmap);
}

u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map)
{
        /* The memory dynamically allocated in netdev dev_ops is not counted */
        return sizeof(struct bpf_offloaded_map);
}

int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
{
        struct bpf_offloaded_map *offmap = map_to_offmap(map);
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        if (offmap->netdev)
                ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_map_offload_update_elem(struct bpf_map *map,
                                void *key, void *value, u64 flags)
{
        struct bpf_offloaded_map *offmap = map_to_offmap(map);
        int ret = -ENODEV;

        if (unlikely(flags > BPF_EXIST))
                return -EINVAL;

        down_read(&bpf_devs_lock);
        if (offmap->netdev)
                ret = offmap->dev_ops->map_update_elem(offmap, key, value,
                                                       flags);
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
{
        struct bpf_offloaded_map *offmap = map_to_offmap(map);
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        if (offmap->netdev)
                ret = offmap->dev_ops->map_delete_elem(offmap, key);
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
        struct bpf_offloaded_map *offmap = map_to_offmap(map);
        int ret = -ENODEV;

        down_read(&bpf_devs_lock);
        if (offmap->netdev)
                ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
        up_read(&bpf_devs_lock);

        return ret;
}

struct ns_get_path_bpf_map_args {
        struct bpf_offloaded_map *offmap;
        struct bpf_map_info *info;
};

static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data)
{
        struct ns_get_path_bpf_map_args *args = private_data;
        struct ns_common *ns;
        struct net *net;

        rtnl_lock();
        down_read(&bpf_devs_lock);

        if (args->offmap->netdev) {
                args->info->ifindex = args->offmap->netdev->ifindex;
                net = dev_net(args->offmap->netdev);
                get_net(net);
                ns = &net->ns;
        } else {
                args->info->ifindex = 0;
                ns = NULL;
        }

        up_read(&bpf_devs_lock);
        rtnl_unlock();

        return ns;
}

int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
        struct ns_get_path_bpf_map_args args = {
                .offmap        = map_to_offmap(map),
                .info        = info,
        };
        struct inode *ns_inode;
        struct path ns_path;
        int res;

        res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args);
        if (res) {
                if (!info->ifindex)
                        return -ENODEV;
                return res;
        }

        ns_inode = ns_path.dentry->d_inode;
        info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
        info->netns_ino = ns_inode->i_ino;
        path_put(&ns_path);

        return 0;
}

static bool __bpf_offload_dev_match(struct bpf_prog *prog,
                                    struct net_device *netdev)
{
        struct bpf_offload_netdev *ondev1, *ondev2;
        struct bpf_prog_offload *offload;

        if (!bpf_prog_is_dev_bound(prog->aux))
                return false;

        offload = prog->aux->offload;
        if (!offload)
                return false;
        if (offload->netdev == netdev)
                return true;

        ondev1 = bpf_offload_find_netdev(offload->netdev);
        ondev2 = bpf_offload_find_netdev(netdev);

        return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev;
}

bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
{
        bool ret;

        down_read(&bpf_devs_lock);
        ret = __bpf_offload_dev_match(prog, netdev);
        up_read(&bpf_devs_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_match);

bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs)
{
        bool ret;

        if (bpf_prog_is_offloaded(lhs->aux) != bpf_prog_is_offloaded(rhs->aux))
                return false;

        down_read(&bpf_devs_lock);
        ret = lhs->aux->offload && rhs->aux->offload &&
              lhs->aux->offload->netdev &&
              lhs->aux->offload->netdev == rhs->aux->offload->netdev;
        up_read(&bpf_devs_lock);

        return ret;
}

bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
{
        struct bpf_offloaded_map *offmap;
        bool ret;

        if (!bpf_map_is_offloaded(map))
                return bpf_map_offload_neutral(map);
        offmap = map_to_offmap(map);

        down_read(&bpf_devs_lock);
        ret = __bpf_offload_dev_match(prog, offmap->netdev);
        up_read(&bpf_devs_lock);

        return ret;
}

int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
                                    struct net_device *netdev)
{
        int err;

        down_write(&bpf_devs_lock);
        err = __bpf_offload_dev_netdev_register(offdev, netdev);
        up_write(&bpf_devs_lock);
        return err;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);

void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
                                       struct net_device *netdev)
{
        down_write(&bpf_devs_lock);
        __bpf_offload_dev_netdev_unregister(offdev, netdev);
        up_write(&bpf_devs_lock);
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);

struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
{
        struct bpf_offload_dev *offdev;

        offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
        if (!offdev)
                return ERR_PTR(-ENOMEM);

        offdev->ops = ops;
        offdev->priv = priv;
        INIT_LIST_HEAD(&offdev->netdevs);

        return offdev;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_create);

void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
{
        WARN_ON(!list_empty(&offdev->netdevs));
        kfree(offdev);
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);

void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
{
        return offdev->priv;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);

void bpf_dev_bound_netdev_unregister(struct net_device *dev)
{
        struct bpf_offload_netdev *ondev;

        ASSERT_RTNL();

        down_write(&bpf_devs_lock);
        ondev = bpf_offload_find_netdev(dev);
        if (ondev && !ondev->offdev)
                __bpf_offload_dev_netdev_unregister(NULL, ondev->netdev);
        up_write(&bpf_devs_lock);
}

int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
                              struct bpf_prog_aux *prog_aux)
{
        if (!bpf_prog_is_dev_bound(prog_aux)) {
                bpf_log(log, "metadata kfuncs require device-bound program\n");
                return -EINVAL;
        }

        if (bpf_prog_is_offloaded(prog_aux)) {
                bpf_log(log, "metadata kfuncs can't be offloaded\n");
                return -EINVAL;
        }

        return 0;
}

void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
{
        const struct xdp_metadata_ops *ops;
        void *p = NULL;

        /* We don't hold bpf_devs_lock while resolving several
         * kfuncs and can race with the unregister_netdevice().
         * We rely on bpf_dev_bound_match() check at attach
         * to render this program unusable.
         */
        down_read(&bpf_devs_lock);
        if (!prog->aux->offload)
                goto out;

        ops = prog->aux->offload->netdev->xdp_metadata_ops;
        if (!ops)
                goto out;

#define XDP_METADATA_KFUNC(name, _, __, xmo) \
        if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo;
        XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC

out:
        up_read(&bpf_devs_lock);

        return p;
}

static int __init bpf_offload_init(void)
{
        return rhashtable_init(&offdevs, &offdevs_params);
}

core_initcall(bpf_offload_init);






























































































































































































































































































































































































































   19 


   19 

















   19 




   18 



   19 


   19 









































































































































    5 




    5 



    5 















































    5 



















































































































































































































































































































































































































    5 




    5 


   48 



   49 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 


    8 
    8 

    8 











    8 

    8 
    8 



    8 





    8 













    8 

    8 
    8 






    8 
    8 

    8 
    8 
    8 




    8 



    8 














    8 






    8 
    7 

    8 

    8 




    8 



    8 













    8 

    8 






    8 



    8 




    8 














    8 






    7 

    8 









    8 



    7 














    8 























































































































































    5 

















































































































































    5 































    5 












    5 

    5 



    5 
    5 



    5 





















    5 




    5 





    5 
    5 






    5 


    5 







    5 









    5 








    5 






























    5 


    1 


    5 

    5 
    1 
    5 





    5 
    5 


























































































































































































    5 





    4 





























    5 








    5 




































    5 




    5 























































































    7 



    7 























    7 










    7 

























































































   88 
   89 

    5 














































































































































































































































































































































































































































































































   46 



















   89 






   87 



   88 

   47 


   47 


   47 



   87 











   46 


   46 





   46 









   46 

   46 
   45 

   45 


   45 








   45 










   45 






   89 




   88 
   86 


    1 


































































































































































    5 






    5 






    5 










    5 


    5 







   87 


   19 






































































































































    5 




















    5 
    5 











    5 






    5 












    5 
    5 















































































































    5 

























    5 
    5 






    5 





















































































































































































































































   19 









   90 


   90 



   89 





   89 

    5 





    5 





    5 
    5 
    5 
    1 
    1 

    1 



















    1 












   89 





    5 










   88 

   90 











   88 

















   90 













   88 
























   90 
























   89 







   90 
   89 







   90 







   89 




   89 












   88 



































   88 

   49 


   48 








   89 


















   90 




   90 



   90 










   89 
   41 



   90 


   89 



   88 
   41 










   90 

   89 








   84 


























    3 
    3 
    2 



    3 


























   84 


   84 
   84 






   30 
   29 













    3 


    3 


    3 

    3 



    3 







    3 













   43 


   41 

   43 



   42 








   42 



   42 



   41 






   42 



    2 

    7 



























































    1 



    1 
    1 
    1 





    1 
    1 



























































































































































































































































































































































  333 

  332 

  333 


  333 


















































































































































































































   19 


   19 
   19 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442

// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/mm/memory.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 */

/*
 * demand-loading started 01.12.91 - seems it is high on the list of
 * things wanted, and it should be easy to implement. - Linus
 */

/*
 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
 * pages started 02.12.91, seems to work. - Linus.
 *
 * Tested sharing by executing about 30 /bin/sh: under the old kernel it
 * would have taken more than the 6M I have free, but it worked well as
 * far as I could see.
 *
 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
 */

/*
 * Real VM (paging to/from disk) started 18.12.91. Much more work and
 * thought has to go into this. Oh, well..
 * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
 *                Found it. Everything seems to work now.
 * 20.12.91  -  Ok, making the swap-device changeable like the root.
 */

/*
 * 05.04.94  -  Multi-page memory management added for v1.1.
 *              Idea by Alex Bligh (alex@cconcepts.co.uk)
 *
 * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
 *                (Gerhard.Wichert@pdb.siemens.de)
 *
 * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
 */

#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/task.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/memremap.h>
#include <linux/kmsan.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/export.h>
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/pfn_t.h>
#include <linux/writeback.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/swapops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
#include <linux/dax.h>
#include <linux/oom.h>
#include <linux/numa.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>

#include <trace/events/kmem.h>

#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>

#include "pgalloc-track.h"
#include "internal.h"
#include "swap.h"

#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
#endif

#ifndef CONFIG_NUMA
unsigned long max_mapnr;
EXPORT_SYMBOL(max_mapnr);

struct page *mem_map;
EXPORT_SYMBOL(mem_map);
#endif

static vm_fault_t do_fault(struct vm_fault *vmf);
static vm_fault_t do_anonymous_page(struct vm_fault *vmf);
static bool vmf_pte_changed(struct vm_fault *vmf);

/*
 * Return true if the original pte was a uffd-wp pte marker (so the pte was
 * wr-protected).
 */
static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
{
        if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
                return false;

        return pte_marker_uffd_wp(vmf->orig_pte);
}

/*
 * A number of key systems in x86 including ioremap() rely on the assumption
 * that high_memory defines the upper bound on direct map memory, then end
 * of ZONE_NORMAL.
 */
void *high_memory;
EXPORT_SYMBOL(high_memory);

/*
 * Randomize the address space (stacks, mmaps, brk, etc.).
 *
 * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
 *   as ancient (libc5 based) binaries can segfault. )
 */
int randomize_va_space __read_mostly =
#ifdef CONFIG_COMPAT_BRK
                                        1;
#else
                                        2;
#endif

#ifndef arch_wants_old_prefaulted_pte
static inline bool arch_wants_old_prefaulted_pte(void)
{
        /*
         * Transitioning a PTE from 'old' to 'young' can be expensive on
         * some architectures, even if it's performed in hardware. By
         * default, "false" means prefaulted entries will be 'young'.
         */
        return false;
}
#endif

static int __init disable_randmaps(char *s)
{
        randomize_va_space = 0;
        return 1;
}
__setup("norandmaps", disable_randmaps);

unsigned long zero_pfn __read_mostly;
EXPORT_SYMBOL(zero_pfn);

unsigned long highest_memmap_pfn __read_mostly;

/*
 * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
 */
static int __init init_zero_pfn(void)
{
        zero_pfn = page_to_pfn(ZERO_PAGE(0));
        return 0;
}
early_initcall(init_zero_pfn);

void mm_trace_rss_stat(struct mm_struct *mm, int member)
{
        trace_rss_stat(mm, member);
}

/*
 * Note: this doesn't free the actual pages themselves. That
 * has been handled earlier when unmapping all the memory regions.
 */
static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
                           unsigned long addr)
{
        pgtable_t token = pmd_pgtable(*pmd);
        pmd_clear(pmd);
        pte_free_tlb(tlb, token, addr);
        mm_dec_nr_ptes(tlb->mm);
}

static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
                                unsigned long addr, unsigned long end,
                                unsigned long floor, unsigned long ceiling)
{
        pmd_t *pmd;
        unsigned long next;
        unsigned long start;

        start = addr;
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                if (pmd_none_or_clear_bad(pmd))
                        continue;
                free_pte_range(tlb, pmd, addr);
        } while (pmd++, addr = next, addr != end);

        start &= PUD_MASK;
        if (start < floor)
                return;
        if (ceiling) {
                ceiling &= PUD_MASK;
                if (!ceiling)
                        return;
        }
        if (end - 1 > ceiling - 1)
                return;

        pmd = pmd_offset(pud, start);
        pud_clear(pud);
        pmd_free_tlb(tlb, pmd, start);
        mm_dec_nr_pmds(tlb->mm);
}

static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
                                unsigned long addr, unsigned long end,
                                unsigned long floor, unsigned long ceiling)
{
        pud_t *pud;
        unsigned long next;
        unsigned long start;

        start = addr;
        pud = pud_offset(p4d, addr);
        do {
                next = pud_addr_end(addr, end);
                if (pud_none_or_clear_bad(pud))
                        continue;
                free_pmd_range(tlb, pud, addr, next, floor, ceiling);
        } while (pud++, addr = next, addr != end);

        start &= P4D_MASK;
        if (start < floor)
                return;
        if (ceiling) {
                ceiling &= P4D_MASK;
                if (!ceiling)
                        return;
        }
        if (end - 1 > ceiling - 1)
                return;

        pud = pud_offset(p4d, start);
        p4d_clear(p4d);
        pud_free_tlb(tlb, pud, start);
        mm_dec_nr_puds(tlb->mm);
}

static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
                                unsigned long addr, unsigned long end,
                                unsigned long floor, unsigned long ceiling)
{
        p4d_t *p4d;
        unsigned long next;
        unsigned long start;

        start = addr;
        p4d = p4d_offset(pgd, addr);
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none_or_clear_bad(p4d))
                        continue;
                free_pud_range(tlb, p4d, addr, next, floor, ceiling);
        } while (p4d++, addr = next, addr != end);

        start &= PGDIR_MASK;
        if (start < floor)
                return;
        if (ceiling) {
                ceiling &= PGDIR_MASK;
                if (!ceiling)
                        return;
        }
        if (end - 1 > ceiling - 1)
                return;

        p4d = p4d_offset(pgd, start);
        pgd_clear(pgd);
        p4d_free_tlb(tlb, p4d, start);
}

/*
 * This function frees user-level page tables of a process.
 */
void free_pgd_range(struct mmu_gather *tlb,
                        unsigned long addr, unsigned long end,
                        unsigned long floor, unsigned long ceiling)
{
        pgd_t *pgd;
        unsigned long next;

        /*
         * The next few lines have given us lots of grief...
         *
         * Why are we testing PMD* at this top level?  Because often
         * there will be no work to do at all, and we'd prefer not to
         * go all the way down to the bottom just to discover that.
         *
         * Why all these "- 1"s?  Because 0 represents both the bottom
         * of the address space and the top of it (using -1 for the
         * top wouldn't help much: the masks would do the wrong thing).
         * The rule is that addr 0 and floor 0 refer to the bottom of
         * the address space, but end 0 and ceiling 0 refer to the top
         * Comparisons need to use "end - 1" and "ceiling - 1" (though
         * that end 0 case should be mythical).
         *
         * Wherever addr is brought up or ceiling brought down, we must
         * be careful to reject "the opposite 0" before it confuses the
         * subsequent tests.  But what about where end is brought down
         * by PMD_SIZE below? no, end can't go down to 0 there.
         *
         * Whereas we round start (addr) and ceiling down, by different
         * masks at different levels, in order to test whether a table
         * now has no other vmas using it, so can be freed, we don't
         * bother to round floor or end up - the tests don't need that.
         */

        addr &= PMD_MASK;
        if (addr < floor) {
                addr += PMD_SIZE;
                if (!addr)
                        return;
        }
        if (ceiling) {
                ceiling &= PMD_MASK;
                if (!ceiling)
                        return;
        }
        if (end - 1 > ceiling - 1)
                end -= PMD_SIZE;
        if (addr > end - 1)
                return;
        /*
         * We add page table cache pages with PAGE_SIZE,
         * (see pte_free_tlb()), flush the tlb if we need
         */
        tlb_change_page_size(tlb, PAGE_SIZE);
        pgd = pgd_offset(tlb->mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
                free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
        } while (pgd++, addr = next, addr != end);
}

void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
                   struct vm_area_struct *vma, unsigned long floor,
                   unsigned long ceiling, bool mm_wr_locked)
{
        do {
                unsigned long addr = vma->vm_start;
                struct vm_area_struct *next;

                /*
                 * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
                 * be 0.  This will underflow and is okay.
                 */
                next = mas_find(mas, ceiling - 1);
                if (unlikely(xa_is_zero(next)))
                        next = NULL;

                /*
                 * Hide vma from rmap and truncate_pagecache before freeing
                 * pgtables
                 */
                if (mm_wr_locked)
                        vma_start_write(vma);
                unlink_anon_vmas(vma);
                unlink_file_vma(vma);

                if (is_vm_hugetlb_page(vma)) {
                        hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
                                floor, next ? next->vm_start : ceiling);
                } else {
                        /*
                         * Optimization: gather nearby vmas into one call down
                         */
                        while (next && next->vm_start <= vma->vm_end + PMD_SIZE
                               && !is_vm_hugetlb_page(next)) {
                                vma = next;
                                next = mas_find(mas, ceiling - 1);
                                if (unlikely(xa_is_zero(next)))
                                        next = NULL;
                                if (mm_wr_locked)
                                        vma_start_write(vma);
                                unlink_anon_vmas(vma);
                                unlink_file_vma(vma);
                        }
                        free_pgd_range(tlb, addr, vma->vm_end,
                                floor, next ? next->vm_start : ceiling);
                }
                vma = next;
        } while (vma);
}

void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
{
        spinlock_t *ptl = pmd_lock(mm, pmd);

        if (likely(pmd_none(*pmd))) {        /* Has another populated it ? */
                mm_inc_nr_ptes(mm);
                /*
                 * Ensure all pte setup (eg. pte page lock and page clearing) are
                 * visible before the pte is made visible to other CPUs by being
                 * put into page tables.
                 *
                 * The other side of the story is the pointer chasing in the page
                 * table walking code (when walking the page table without locking;
                 * ie. most of the time). Fortunately, these data accesses consist
                 * of a chain of data-dependent loads, meaning most CPUs (alpha
                 * being the notable exception) will already guarantee loads are
                 * seen in-order. See the alpha page table accessors for the
                 * smp_rmb() barriers in page table walking code.
                 */
                smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
                pmd_populate(mm, pmd, *pte);
                *pte = NULL;
        }
        spin_unlock(ptl);
}

int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
{
        pgtable_t new = pte_alloc_one(mm);
        if (!new)
                return -ENOMEM;

        pmd_install(mm, pmd, &new);
        if (new)
                pte_free(mm, new);
        return 0;
}

int __pte_alloc_kernel(pmd_t *pmd)
{
        pte_t *new = pte_alloc_one_kernel(&init_mm);
        if (!new)
                return -ENOMEM;

        spin_lock(&init_mm.page_table_lock);
        if (likely(pmd_none(*pmd))) {        /* Has another populated it ? */
                smp_wmb(); /* See comment in pmd_install() */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
        }
        spin_unlock(&init_mm.page_table_lock);
        if (new)
                pte_free_kernel(&init_mm, new);
        return 0;
}

static inline void init_rss_vec(int *rss)
{
        memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
}

static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
{
        int i;

        for (i = 0; i < NR_MM_COUNTERS; i++)
                if (rss[i])
                        add_mm_counter(mm, i, rss[i]);
}

/*
 * This function is called to print an error when a bad pte
 * is found. For example, we might have a PFN-mapped pte in
 * a region that doesn't allow it.
 *
 * The calling function must still handle the error.
 */
static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
                          pte_t pte, struct page *page)
{
        pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
        p4d_t *p4d = p4d_offset(pgd, addr);
        pud_t *pud = pud_offset(p4d, addr);
        pmd_t *pmd = pmd_offset(pud, addr);
        struct address_space *mapping;
        pgoff_t index;
        static unsigned long resume;
        static unsigned long nr_shown;
        static unsigned long nr_unshown;

        /*
         * Allow a burst of 60 reports, then keep quiet for that minute;
         * or allow a steady drip of one report per second.
         */
        if (nr_shown == 60) {
                if (time_before(jiffies, resume)) {
                        nr_unshown++;
                        return;
                }
                if (nr_unshown) {
                        pr_alert("BUG: Bad page map: %lu messages suppressed\n",
                                 nr_unshown);
                        nr_unshown = 0;
                }
                nr_shown = 0;
        }
        if (nr_shown++ == 0)
                resume = jiffies + 60 * HZ;

        mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
        index = linear_page_index(vma, addr);

        pr_alert("BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
                 current->comm,
                 (long long)pte_val(pte), (long long)pmd_val(*pmd));
        if (page)
                dump_page(page, "bad pte");
        pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
                 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
        pr_alert("file:%pD fault:%ps mmap:%ps read_folio:%ps\n",
                 vma->vm_file,
                 vma->vm_ops ? vma->vm_ops->fault : NULL,
                 vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
                 mapping ? mapping->a_ops->read_folio : NULL);
        dump_stack();
        add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}

/*
 * vm_normal_page -- This function gets the "struct page" associated with a pte.
 *
 * "Special" mappings do not wish to be associated with a "struct page" (either
 * it doesn't exist, or it exists but they don't want to touch it). In this
 * case, NULL is returned here. "Normal" mappings do have a struct page.
 *
 * There are 2 broad cases. Firstly, an architecture may define a pte_special()
 * pte bit, in which case this function is trivial. Secondly, an architecture
 * may not have a spare pte bit, which requires a more complicated scheme,
 * described below.
 *
 * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
 * special mapping (even if there are underlying and valid "struct pages").
 * COWed pages of a VM_PFNMAP are always normal.
 *
 * The way we recognize COWed pages within VM_PFNMAP mappings is through the
 * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit
 * set, and the vm_pgoff will point to the first PFN mapped: thus every special
 * mapping will always honor the rule
 *
 *        pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
 *
 * And for normal mappings this is false.
 *
 * This restricts such mappings to be a linear translation from virtual address
 * to pfn. To get around this restriction, we allow arbitrary mappings so long
 * as the vma is not a COW mapping; in that case, we know that all ptes are
 * special (because none can have been COWed).
 *
 *
 * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP.
 *
 * VM_MIXEDMAP mappings can likewise contain memory with or without "struct
 * page" backing, however the difference is that _all_ pages with a struct
 * page (that is, those where pfn_valid is true) are refcounted and considered
 * normal pages by the VM. The disadvantage is that pages are refcounted
 * (which can be slower and simply not an option for some PFNMAP users). The
 * advantage is that we don't have to follow the strict linearity rule of
 * PFNMAP mappings in order to support COWable mappings.
 *
 */
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                            pte_t pte)
{
        unsigned long pfn = pte_pfn(pte);

        if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
                if (likely(!pte_special(pte)))
                        goto check_pfn;
                if (vma->vm_ops && vma->vm_ops->find_special_page)
                        return vma->vm_ops->find_special_page(vma, addr);
                if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
                        return NULL;
                if (is_zero_pfn(pfn))
                        return NULL;
                if (pte_devmap(pte))
                /*
                 * NOTE: New users of ZONE_DEVICE will not set pte_devmap()
                 * and will have refcounts incremented on their struct pages
                 * when they are inserted into PTEs, thus they are safe to
                 * return here. Legacy ZONE_DEVICE pages that set pte_devmap()
                 * do not have refcounts. Example of legacy ZONE_DEVICE is
                 * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers.
                 */
                        return NULL;

                print_bad_pte(vma, addr, pte, NULL);
                return NULL;
        }

        /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */

        if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
                if (vma->vm_flags & VM_MIXEDMAP) {
                        if (!pfn_valid(pfn))
                                return NULL;
                        goto out;
                } else {
                        unsigned long off;
                        off = (addr - vma->vm_start) >> PAGE_SHIFT;
                        if (pfn == vma->vm_pgoff + off)
                                return NULL;
                        if (!is_cow_mapping(vma->vm_flags))
                                return NULL;
                }
        }

        if (is_zero_pfn(pfn))
                return NULL;

check_pfn:
        if (unlikely(pfn > highest_memmap_pfn)) {
                print_bad_pte(vma, addr, pte, NULL);
                return NULL;
        }

        /*
         * NOTE! We still have PageReserved() pages in the page tables.
         * eg. VDSO mappings can cause them to exist.
         */
out:
        return pfn_to_page(pfn);
}

struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
                            pte_t pte)
{
        struct page *page = vm_normal_page(vma, addr, pte);

        if (page)
                return page_folio(page);
        return NULL;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
                                pmd_t pmd)
{
        unsigned long pfn = pmd_pfn(pmd);

        /*
         * There is no pmd_special() but there may be special pmds, e.g.
         * in a direct-access (dax) mapping, so let's just replicate the
         * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
         */
        if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
                if (vma->vm_flags & VM_MIXEDMAP) {
                        if (!pfn_valid(pfn))
                                return NULL;
                        goto out;
                } else {
                        unsigned long off;
                        off = (addr - vma->vm_start) >> PAGE_SHIFT;
                        if (pfn == vma->vm_pgoff + off)
                                return NULL;
                        if (!is_cow_mapping(vma->vm_flags))
                                return NULL;
                }
        }

        if (pmd_devmap(pmd))
                return NULL;
        if (is_huge_zero_pmd(pmd))
                return NULL;
        if (unlikely(pfn > highest_memmap_pfn))
                return NULL;

        /*
         * NOTE! We still have PageReserved() pages in the page tables.
         * eg. VDSO mappings can cause them to exist.
         */
out:
        return pfn_to_page(pfn);
}

struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
                                  unsigned long addr, pmd_t pmd)
{
        struct page *page = vm_normal_page_pmd(vma, addr, pmd);

        if (page)
                return page_folio(page);
        return NULL;
}
#endif

static void restore_exclusive_pte(struct vm_area_struct *vma,
                                  struct page *page, unsigned long address,
                                  pte_t *ptep)
{
        struct folio *folio = page_folio(page);
        pte_t orig_pte;
        pte_t pte;
        swp_entry_t entry;

        orig_pte = ptep_get(ptep);
        pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
        if (pte_swp_soft_dirty(orig_pte))
                pte = pte_mksoft_dirty(pte);

        entry = pte_to_swp_entry(orig_pte);
        if (pte_swp_uffd_wp(orig_pte))
                pte = pte_mkuffd_wp(pte);
        else if (is_writable_device_exclusive_entry(entry))
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);

        VM_BUG_ON_FOLIO(pte_write(pte) && (!folio_test_anon(folio) &&
                                           PageAnonExclusive(page)), folio);

        /*
         * No need to take a page reference as one was already
         * created when the swap entry was made.
         */
        if (folio_test_anon(folio))
                folio_add_anon_rmap_pte(folio, page, vma, address, RMAP_NONE);
        else
                /*
                 * Currently device exclusive access only supports anonymous
                 * memory so the entry shouldn't point to a filebacked page.
                 */
                WARN_ON_ONCE(1);

        set_pte_at(vma->vm_mm, address, ptep, pte);

        /*
         * No need to invalidate - it was non-present before. However
         * secondary CPUs may have mappings that need invalidating.
         */
        update_mmu_cache(vma, address, ptep);
}

/*
 * Tries to restore an exclusive pte if the page lock can be acquired without
 * sleeping.
 */
static int
try_restore_exclusive_pte(pte_t *src_pte, struct vm_area_struct *vma,
                        unsigned long addr)
{
        swp_entry_t entry = pte_to_swp_entry(ptep_get(src_pte));
        struct page *page = pfn_swap_entry_to_page(entry);

        if (trylock_page(page)) {
                restore_exclusive_pte(vma, page, addr, src_pte);
                unlock_page(page);
                return 0;
        }

        return -EBUSY;
}

/*
 * copy one vm_area from one task to the other. Assumes the page tables
 * already present in the new task to be cleared in the whole range
 * covered by this vma.
 */

static unsigned long
copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *dst_vma,
                struct vm_area_struct *src_vma, unsigned long addr, int *rss)
{
        unsigned long vm_flags = dst_vma->vm_flags;
        pte_t orig_pte = ptep_get(src_pte);
        pte_t pte = orig_pte;
        struct folio *folio;
        struct page *page;
        swp_entry_t entry = pte_to_swp_entry(orig_pte);

        if (likely(!non_swap_entry(entry))) {
                if (swap_duplicate(entry) < 0)
                        return -EIO;

                /* make sure dst_mm is on swapoff's mmlist. */
                if (unlikely(list_empty(&dst_mm->mmlist))) {
                        spin_lock(&mmlist_lock);
                        if (list_empty(&dst_mm->mmlist))
                                list_add(&dst_mm->mmlist,
                                                &src_mm->mmlist);
                        spin_unlock(&mmlist_lock);
                }
                /* Mark the swap entry as shared. */
                if (pte_swp_exclusive(orig_pte)) {
                        pte = pte_swp_clear_exclusive(orig_pte);
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
                rss[MM_SWAPENTS]++;
        } else if (is_migration_entry(entry)) {
                folio = pfn_swap_entry_folio(entry);

                rss[mm_counter(folio)]++;

                if (!is_readable_migration_entry(entry) &&
                                is_cow_mapping(vm_flags)) {
                        /*
                         * COW mappings require pages in both parent and child
                         * to be set to read. A previously exclusive entry is
                         * now shared.
                         */
                        entry = make_readable_migration_entry(
                                                        swp_offset(entry));
                        pte = swp_entry_to_pte(entry);
                        if (pte_swp_soft_dirty(orig_pte))
                                pte = pte_swp_mksoft_dirty(pte);
                        if (pte_swp_uffd_wp(orig_pte))
                                pte = pte_swp_mkuffd_wp(pte);
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
        } else if (is_device_private_entry(entry)) {
                page = pfn_swap_entry_to_page(entry);
                folio = page_folio(page);

                /*
                 * Update rss count even for unaddressable pages, as
                 * they should treated just like normal pages in this
                 * respect.
                 *
                 * We will likely want to have some new rss counters
                 * for unaddressable pages, at some point. But for now
                 * keep things as they are.
                 */
                folio_get(folio);
                rss[mm_counter(folio)]++;
                /* Cannot fail as these pages cannot get pinned. */
                folio_try_dup_anon_rmap_pte(folio, page, src_vma);

                /*
                 * We do not preserve soft-dirty information, because so
                 * far, checkpoint/restore is the only feature that
                 * requires that. And checkpoint/restore does not work
                 * when a device driver is involved (you cannot easily
                 * save and restore device driver state).
                 */
                if (is_writable_device_private_entry(entry) &&
                    is_cow_mapping(vm_flags)) {
                        entry = make_readable_device_private_entry(
                                                        swp_offset(entry));
                        pte = swp_entry_to_pte(entry);
                        if (pte_swp_uffd_wp(orig_pte))
                                pte = pte_swp_mkuffd_wp(pte);
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
        } else if (is_device_exclusive_entry(entry)) {
                /*
                 * Make device exclusive entries present by restoring the
                 * original entry then copying as for a present pte. Device
                 * exclusive entries currently only support private writable
                 * (ie. COW) mappings.
                 */
                VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags));
                if (try_restore_exclusive_pte(src_pte, src_vma, addr))
                        return -EBUSY;
                return -ENOENT;
        } else if (is_pte_marker_entry(entry)) {
                pte_marker marker = copy_pte_marker(entry, dst_vma);

                if (marker)
                        set_pte_at(dst_mm, addr, dst_pte,
                                   make_pte_marker(marker));
                return 0;
        }
        if (!userfaultfd_wp(dst_vma))
                pte = pte_swp_clear_uffd_wp(pte);
        set_pte_at(dst_mm, addr, dst_pte, pte);
        return 0;
}

/*
 * Copy a present and normal page.
 *
 * NOTE! The usual case is that this isn't required;
 * instead, the caller can just increase the page refcount
 * and re-use the pte the traditional way.
 *
 * And if we need a pre-allocated page but don't yet have
 * one, return a negative error to let the preallocation
 * code know so that it can do so outside the page table
 * lock.
 */
static inline int
copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
                  pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
                  struct folio **prealloc, struct page *page)
{
        struct folio *new_folio;
        pte_t pte;

        new_folio = *prealloc;
        if (!new_folio)
                return -EAGAIN;

        /*
         * We have a prealloc page, all good!  Take it
         * over and copy the page & arm it.
         */
        *prealloc = NULL;
        copy_user_highpage(&new_folio->page, page, addr, src_vma);
        __folio_mark_uptodate(new_folio);
        folio_add_new_anon_rmap(new_folio, dst_vma, addr);
        folio_add_lru_vma(new_folio, dst_vma);
        rss[MM_ANONPAGES]++;

        /* All done, just insert the new page copy in the child */
        pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot);
        pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
        if (userfaultfd_pte_wp(dst_vma, ptep_get(src_pte)))
                /* Uffd-wp needs to be delivered to dest pte as well */
                pte = pte_mkuffd_wp(pte);
        set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
        return 0;
}

static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
                struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte,
                pte_t pte, unsigned long addr, int nr)
{
        struct mm_struct *src_mm = src_vma->vm_mm;

        /* If it's a COW mapping, write protect it both processes. */
        if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) {
                wrprotect_ptes(src_mm, addr, src_pte, nr);
                pte = pte_wrprotect(pte);
        }

        /* If it's a shared mapping, mark it clean in the child. */
        if (src_vma->vm_flags & VM_SHARED)
                pte = pte_mkclean(pte);
        pte = pte_mkold(pte);

        if (!userfaultfd_wp(dst_vma))
                pte = pte_clear_uffd_wp(pte);

        set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);
}

/*
 * Copy one present PTE, trying to batch-process subsequent PTEs that map
 * consecutive pages of the same folio by copying them as well.
 *
 * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
 * Otherwise, returns the number of copied PTEs (at least 1).
 */
static inline int
copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
                 pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr,
                 int max_nr, int *rss, struct folio **prealloc)
{
        struct page *page;
        struct folio *folio;
        bool any_writable;
        fpb_t flags = 0;
        int err, nr;

        page = vm_normal_page(src_vma, addr, pte);
        if (unlikely(!page))
                goto copy_pte;

        folio = page_folio(page);

        /*
         * If we likely have to copy, just don't bother with batching. Make
         * sure that the common "small folio" case is as fast as possible
         * by keeping the batching logic separate.
         */
        if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) {
                if (src_vma->vm_flags & VM_SHARED)
                        flags |= FPB_IGNORE_DIRTY;
                if (!vma_soft_dirty_enabled(src_vma))
                        flags |= FPB_IGNORE_SOFT_DIRTY;

                nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
                                     &any_writable);
                folio_ref_add(folio, nr);
                if (folio_test_anon(folio)) {
                        if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
                                                                  nr, src_vma))) {
                                folio_ref_sub(folio, nr);
                                return -EAGAIN;
                        }
                        rss[MM_ANONPAGES] += nr;
                        VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio);
                } else {
                        folio_dup_file_rmap_ptes(folio, page, nr);
                        rss[mm_counter_file(folio)] += nr;
                }
                if (any_writable)
                        pte = pte_mkwrite(pte, src_vma);
                __copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte, pte,
                                    addr, nr);
                return nr;
        }

        folio_get(folio);
        if (folio_test_anon(folio)) {
                /*
                 * If this page may have been pinned by the parent process,
                 * copy the page immediately for the child so that we'll always
                 * guarantee the pinned page won't be randomly replaced in the
                 * future.
                 */
                if (unlikely(folio_try_dup_anon_rmap_pte(folio, page, src_vma))) {
                        /* Page may be pinned, we have to copy. */
                        folio_put(folio);
                        err = copy_present_page(dst_vma, src_vma, dst_pte, src_pte,
                                                addr, rss, prealloc, page);
                        return err ? err : 1;
                }
                rss[MM_ANONPAGES]++;
                VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio);
        } else {
                folio_dup_file_rmap_pte(folio, page);
                rss[mm_counter_file(folio)]++;
        }

copy_pte:
        __copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte, pte, addr, 1);
        return 1;
}

static inline struct folio *folio_prealloc(struct mm_struct *src_mm,
                struct vm_area_struct *vma, unsigned long addr, bool need_zero)
{
        struct folio *new_folio;

        if (need_zero)
                new_folio = vma_alloc_zeroed_movable_folio(vma, addr);
        else
                new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma,
                                            addr, false);

        if (!new_folio)
                return NULL;

        if (mem_cgroup_charge(new_folio, src_mm, GFP_KERNEL)) {
                folio_put(new_folio);
                return NULL;
        }
        folio_throttle_swaprate(new_folio, GFP_KERNEL);

        return new_folio;
}

static int
copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
               pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
               unsigned long end)
{
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        struct mm_struct *src_mm = src_vma->vm_mm;
        pte_t *orig_src_pte, *orig_dst_pte;
        pte_t *src_pte, *dst_pte;
        pte_t ptent;
        spinlock_t *src_ptl, *dst_ptl;
        int progress, max_nr, ret = 0;
        int rss[NR_MM_COUNTERS];
        swp_entry_t entry = (swp_entry_t){0};
        struct folio *prealloc = NULL;
        int nr;

again:
        progress = 0;
        init_rss_vec(rss);

        /*
         * copy_pmd_range()'s prior pmd_none_or_clear_bad(src_pmd), and the
         * error handling here, assume that exclusive mmap_lock on dst and src
         * protects anon from unexpected THP transitions; with shmem and file
         * protected by mmap_lock-less collapse skipping areas with anon_vma
         * (whereas vma_needs_copy() skips areas without anon_vma).  A rework
         * can remove such assumptions later, but this is good enough for now.
         */
        dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
        if (!dst_pte) {
                ret = -ENOMEM;
                goto out;
        }
        src_pte = pte_offset_map_nolock(src_mm, src_pmd, addr, &src_ptl);
        if (!src_pte) {
                pte_unmap_unlock(dst_pte, dst_ptl);
                /* ret == 0 */
                goto out;
        }
        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
        orig_src_pte = src_pte;
        orig_dst_pte = dst_pte;
        arch_enter_lazy_mmu_mode();

        do {
                nr = 1;

                /*
                 * We are holding two locks at this point - either of them
                 * could generate latencies in another task on another CPU.
                 */
                if (progress >= 32) {
                        progress = 0;
                        if (need_resched() ||
                            spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
                                break;
                }
                ptent = ptep_get(src_pte);
                if (pte_none(ptent)) {
                        progress++;
                        continue;
                }
                if (unlikely(!pte_present(ptent))) {
                        ret = copy_nonpresent_pte(dst_mm, src_mm,
                                                  dst_pte, src_pte,
                                                  dst_vma, src_vma,
                                                  addr, rss);
                        if (ret == -EIO) {
                                entry = pte_to_swp_entry(ptep_get(src_pte));
                                break;
                        } else if (ret == -EBUSY) {
                                break;
                        } else if (!ret) {
                                progress += 8;
                                continue;
                        }
                        ptent = ptep_get(src_pte);
                        VM_WARN_ON_ONCE(!pte_present(ptent));

                        /*
                         * Device exclusive entry restored, continue by copying
                         * the now present pte.
                         */
                        WARN_ON_ONCE(ret != -ENOENT);
                }
                /* copy_present_ptes() will clear `*prealloc' if consumed */
                max_nr = (end - addr) / PAGE_SIZE;
                ret = copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte,
                                        ptent, addr, max_nr, rss, &prealloc);
                /*
                 * If we need a pre-allocated page for this pte, drop the
                 * locks, allocate, and try again.
                 */
                if (unlikely(ret == -EAGAIN))
                        break;
                if (unlikely(prealloc)) {
                        /*
                         * pre-alloc page cannot be reused by next time so as
                         * to strictly follow mempolicy (e.g., alloc_page_vma()
                         * will allocate page according to address).  This
                         * could only happen if one pinned pte changed.
                         */
                        folio_put(prealloc);
                        prealloc = NULL;
                }
                nr = ret;
                progress += 8 * nr;
        } while (dst_pte += nr, src_pte += nr, addr += PAGE_SIZE * nr,
                 addr != end);

        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(orig_src_pte, src_ptl);
        add_mm_rss_vec(dst_mm, rss);
        pte_unmap_unlock(orig_dst_pte, dst_ptl);
        cond_resched();

        if (ret == -EIO) {
                VM_WARN_ON_ONCE(!entry.val);
                if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) {
                        ret = -ENOMEM;
                        goto out;
                }
                entry.val = 0;
        } else if (ret == -EBUSY) {
                goto out;
        } else if (ret ==  -EAGAIN) {
                prealloc = folio_prealloc(src_mm, src_vma, addr, false);
                if (!prealloc)
                        return -ENOMEM;
        } else if (ret < 0) {
                VM_WARN_ON_ONCE(1);
        }

        /* We've captured and resolved the error. Reset, try again. */
        ret = 0;

        if (addr != end)
                goto again;
out:
        if (unlikely(prealloc))
                folio_put(prealloc);
        return ret;
}

static inline int
copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
               pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
               unsigned long end)
{
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        struct mm_struct *src_mm = src_vma->vm_mm;
        pmd_t *src_pmd, *dst_pmd;
        unsigned long next;

        dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
        if (!dst_pmd)
                return -ENOMEM;
        src_pmd = pmd_offset(src_pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)
                        || pmd_devmap(*src_pmd)) {
                        int err;
                        VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma);
                        err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd,
                                            addr, dst_vma, src_vma);
                        if (err == -ENOMEM)
                                return -ENOMEM;
                        if (!err)
                                continue;
                        /* fall through */
                }
                if (pmd_none_or_clear_bad(src_pmd))
                        continue;
                if (copy_pte_range(dst_vma, src_vma, dst_pmd, src_pmd,
                                   addr, next))
                        return -ENOMEM;
        } while (dst_pmd++, src_pmd++, addr = next, addr != end);
        return 0;
}

static inline int
copy_pud_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
               p4d_t *dst_p4d, p4d_t *src_p4d, unsigned long addr,
               unsigned long end)
{
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        struct mm_struct *src_mm = src_vma->vm_mm;
        pud_t *src_pud, *dst_pud;
        unsigned long next;

        dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
        if (!dst_pud)
                return -ENOMEM;
        src_pud = pud_offset(src_p4d, addr);
        do {
                next = pud_addr_end(addr, end);
                if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
                        int err;

                        VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma);
                        err = copy_huge_pud(dst_mm, src_mm,
                                            dst_pud, src_pud, addr, src_vma);
                        if (err == -ENOMEM)
                                return -ENOMEM;
                        if (!err)
                                continue;
                        /* fall through */
                }
                if (pud_none_or_clear_bad(src_pud))
                        continue;
                if (copy_pmd_range(dst_vma, src_vma, dst_pud, src_pud,
                                   addr, next))
                        return -ENOMEM;
        } while (dst_pud++, src_pud++, addr = next, addr != end);
        return 0;
}

static inline int
copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
               pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long addr,
               unsigned long end)
{
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        p4d_t *src_p4d, *dst_p4d;
        unsigned long next;

        dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
        if (!dst_p4d)
                return -ENOMEM;
        src_p4d = p4d_offset(src_pgd, addr);
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none_or_clear_bad(src_p4d))
                        continue;
                if (copy_pud_range(dst_vma, src_vma, dst_p4d, src_p4d,
                                   addr, next))
                        return -ENOMEM;
        } while (dst_p4d++, src_p4d++, addr = next, addr != end);
        return 0;
}

/*
 * Return true if the vma needs to copy the pgtable during this fork().  Return
 * false when we can speed up fork() by allowing lazy page faults later until
 * when the child accesses the memory range.
 */
static bool
vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
        /*
         * Always copy pgtables when dst_vma has uffd-wp enabled even if it's
         * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable
         * contains uffd-wp protection information, that's something we can't
         * retrieve from page cache, and skip copying will lose those info.
         */
        if (userfaultfd_wp(dst_vma))
                return true;

        if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
                return true;

        if (src_vma->anon_vma)
                return true;

        /*
         * Don't copy ptes where a page fault will fill them correctly.  Fork
         * becomes much lighter when there are big shared or private readonly
         * mappings. The tradeoff is that copy_page_range is more efficient
         * than faulting.
         */
        return false;
}

int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
        pgd_t *src_pgd, *dst_pgd;
        unsigned long next;
        unsigned long addr = src_vma->vm_start;
        unsigned long end = src_vma->vm_end;
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        struct mm_struct *src_mm = src_vma->vm_mm;
        struct mmu_notifier_range range;
        bool is_cow;
        int ret;

        if (!vma_needs_copy(dst_vma, src_vma))
                return 0;

        if (is_vm_hugetlb_page(src_vma))
                return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);

        if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
                /*
                 * We do not free on error cases below as remove_vma
                 * gets called on error from higher level routine
                 */
                ret = track_pfn_copy(src_vma);
                if (ret)
                        return ret;
        }

        /*
         * We need to invalidate the secondary MMU mappings only when
         * there could be a permission downgrade on the ptes of the
         * parent mm. And a permission downgrade will only happen if
         * is_cow_mapping() returns true.
         */
        is_cow = is_cow_mapping(src_vma->vm_flags);

        if (is_cow) {
                mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                        0, src_mm, addr, end);
                mmu_notifier_invalidate_range_start(&range);
                /*
                 * Disabling preemption is not needed for the write side, as
                 * the read side doesn't spin, but goes to the mmap_lock.
                 *
                 * Use the raw variant of the seqcount_t write API to avoid
                 * lockdep complaining about preemptibility.
                 */
                vma_assert_write_locked(src_vma);
                raw_write_seqcount_begin(&src_mm->write_protect_seq);
        }

        ret = 0;
        dst_pgd = pgd_offset(dst_mm, addr);
        src_pgd = pgd_offset(src_mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(src_pgd))
                        continue;
                if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
                                            addr, next))) {
                        untrack_pfn_clear(dst_vma);
                        ret = -ENOMEM;
                        break;
                }
        } while (dst_pgd++, src_pgd++, addr = next, addr != end);

        if (is_cow) {
                raw_write_seqcount_end(&src_mm->write_protect_seq);
                mmu_notifier_invalidate_range_end(&range);
        }
        return ret;
}

/* Whether we should zap all COWed (private) pages too */
static inline bool should_zap_cows(struct zap_details *details)
{
        /* By default, zap all pages */
        if (!details)
                return true;

        /* Or, we zap COWed pages only if the caller wants to */
        return details->even_cows;
}

/* Decides whether we should zap this folio with the folio pointer specified */
static inline bool should_zap_folio(struct zap_details *details,
                                    struct folio *folio)
{
        /* If we can make a decision without *folio.. */
        if (should_zap_cows(details))
                return true;

        /* Otherwise we should only zap non-anon folios */
        return !folio_test_anon(folio);
}

static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
{
        if (!details)
                return false;

        return details->zap_flags & ZAP_FLAG_DROP_MARKER;
}

/*
 * This function makes sure that we'll replace the none pte with an uffd-wp
 * swap special pte marker when necessary. Must be with the pgtable lock held.
 */
static inline void
zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
                              unsigned long addr, pte_t *pte, int nr,
                              struct zap_details *details, pte_t pteval)
{
        /* Zap on anonymous always means dropping everything */
        if (vma_is_anonymous(vma))
                return;

        if (zap_drop_file_uffd_wp(details))
                return;

        for (;;) {
                /* the PFN in the PTE is irrelevant. */
                pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
                if (--nr == 0)
                        break;
                pte++;
                addr += PAGE_SIZE;
        }
}

static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb,
                struct vm_area_struct *vma, struct folio *folio,
                struct page *page, pte_t *pte, pte_t ptent, unsigned int nr,
                unsigned long addr, struct zap_details *details, int *rss,
                bool *force_flush, bool *force_break)
{
        struct mm_struct *mm = tlb->mm;
        bool delay_rmap = false;

        if (!folio_test_anon(folio)) {
                ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);
                if (pte_dirty(ptent)) {
                        folio_mark_dirty(folio);
                        if (tlb_delay_rmap(tlb)) {
                                delay_rmap = true;
                                *force_flush = true;
                        }
                }
                if (pte_young(ptent) && likely(vma_has_recency(vma)))
                        folio_mark_accessed(folio);
                rss[mm_counter(folio)] -= nr;
        } else {
                /* We don't need up-to-date accessed/dirty bits. */
                clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);
                rss[MM_ANONPAGES] -= nr;
        }
        /* Checking a single PTE in a batch is sufficient. */
        arch_check_zapped_pte(vma, ptent);
        tlb_remove_tlb_entries(tlb, pte, nr, addr);
        if (unlikely(userfaultfd_pte_wp(vma, ptent)))
                zap_install_uffd_wp_if_needed(vma, addr, pte, nr, details,
                                              ptent);

        if (!delay_rmap) {
                folio_remove_rmap_ptes(folio, page, nr, vma);

                /* Only sanity-check the first page in a batch. */
                if (unlikely(page_mapcount(page) < 0))
                        print_bad_pte(vma, addr, ptent, page);
        }
        if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) {
                *force_flush = true;
                *force_break = true;
        }
}

/*
 * Zap or skip at least one present PTE, trying to batch-process subsequent
 * PTEs that map consecutive pages of the same folio.
 *
 * Returns the number of processed (skipped or zapped) PTEs (at least 1).
 */
static inline int zap_present_ptes(struct mmu_gather *tlb,
                struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
                unsigned int max_nr, unsigned long addr,
                struct zap_details *details, int *rss, bool *force_flush,
                bool *force_break)
{
        const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
        struct mm_struct *mm = tlb->mm;
        struct folio *folio;
        struct page *page;
        int nr;

        page = vm_normal_page(vma, addr, ptent);
        if (!page) {
                /* We don't need up-to-date accessed/dirty bits. */
                ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
                arch_check_zapped_pte(vma, ptent);
                tlb_remove_tlb_entry(tlb, pte, addr);
                if (userfaultfd_pte_wp(vma, ptent))
                        zap_install_uffd_wp_if_needed(vma, addr, pte, 1,
                                                      details, ptent);
                ksm_might_unmap_zero_page(mm, ptent);
                return 1;
        }

        folio = page_folio(page);
        if (unlikely(!should_zap_folio(details, folio)))
                return 1;

        /*
         * Make sure that the common "small folio" case is as fast as possible
         * by keeping the batching logic separate.
         */
        if (unlikely(folio_test_large(folio) && max_nr != 1)) {
                nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
                                     NULL);

                zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
                                       addr, details, rss, force_flush,
                                       force_break);
                return nr;
        }
        zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, 1, addr,
                               details, rss, force_flush, force_break);
        return 1;
}

static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, pmd_t *pmd,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
{
        bool force_flush = false, force_break = false;
        struct mm_struct *mm = tlb->mm;
        int rss[NR_MM_COUNTERS];
        spinlock_t *ptl;
        pte_t *start_pte;
        pte_t *pte;
        swp_entry_t entry;
        int nr;

        tlb_change_page_size(tlb, PAGE_SIZE);
        init_rss_vec(rss);
        start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return addr;

        flush_tlb_batched_pending(mm);
        arch_enter_lazy_mmu_mode();
        do {
                pte_t ptent = ptep_get(pte);
                struct folio *folio;
                struct page *page;
                int max_nr;

                nr = 1;
                if (pte_none(ptent))
                        continue;

                if (need_resched())
                        break;

                if (pte_present(ptent)) {
                        max_nr = (end - addr) / PAGE_SIZE;
                        nr = zap_present_ptes(tlb, vma, pte, ptent, max_nr,
                                              addr, details, rss, &force_flush,
                                              &force_break);
                        if (unlikely(force_break)) {
                                addr += nr * PAGE_SIZE;
                                break;
                        }
                        continue;
                }

                entry = pte_to_swp_entry(ptent);
                if (is_device_private_entry(entry) ||
                    is_device_exclusive_entry(entry)) {
                        page = pfn_swap_entry_to_page(entry);
                        folio = page_folio(page);
                        if (unlikely(!should_zap_folio(details, folio)))
                                continue;
                        /*
                         * Both device private/exclusive mappings should only
                         * work with anonymous page so far, so we don't need to
                         * consider uffd-wp bit when zap. For more information,
                         * see zap_install_uffd_wp_if_needed().
                         */
                        WARN_ON_ONCE(!vma_is_anonymous(vma));
                        rss[mm_counter(folio)]--;
                        if (is_device_private_entry(entry))
                                folio_remove_rmap_pte(folio, page, vma);
                        folio_put(folio);
                } else if (!non_swap_entry(entry)) {
                        /* Genuine swap entry, hence a private anon page */
                        if (!should_zap_cows(details))
                                continue;
                        rss[MM_SWAPENTS]--;
                        if (unlikely(!free_swap_and_cache(entry)))
                                print_bad_pte(vma, addr, ptent, NULL);
                } else if (is_migration_entry(entry)) {
                        folio = pfn_swap_entry_folio(entry);
                        if (!should_zap_folio(details, folio))
                                continue;
                        rss[mm_counter(folio)]--;
                } else if (pte_marker_entry_uffd_wp(entry)) {
                        /*
                         * For anon: always drop the marker; for file: only
                         * drop the marker if explicitly requested.
                         */
                        if (!vma_is_anonymous(vma) &&
                            !zap_drop_file_uffd_wp(details))
                                continue;
                } else if (is_hwpoison_entry(entry) ||
                           is_poisoned_swp_entry(entry)) {
                        if (!should_zap_cows(details))
                                continue;
                } else {
                        /* We should have covered all the swap entry types */
                        pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
                        WARN_ON_ONCE(1);
                }
                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                zap_install_uffd_wp_if_needed(vma, addr, pte, 1, details, ptent);
        } while (pte += nr, addr += PAGE_SIZE * nr, addr != end);

        add_mm_rss_vec(mm, rss);
        arch_leave_lazy_mmu_mode();

        /* Do the actual TLB flush before dropping ptl */
        if (force_flush) {
                tlb_flush_mmu_tlbonly(tlb);
                tlb_flush_rmaps(tlb, vma);
        }
        pte_unmap_unlock(start_pte, ptl);

        /*
         * If we forced a TLB flush (either due to running out of
         * batch buffers or because we needed to flush dirty TLB
         * entries before releasing the ptl), free the batched
         * memory too. Come back again if we didn't do everything.
         */
        if (force_flush)
                tlb_flush_mmu(tlb);

        return addr;
}

static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, pud_t *pud,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
{
        pmd_t *pmd;
        unsigned long next;

        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
                if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE)
                                __split_huge_pmd(vma, pmd, addr, false, NULL);
                        else if (zap_huge_pmd(tlb, vma, pmd, addr)) {
                                addr = next;
                                continue;
                        }
                        /* fall through */
                } else if (details && details->single_folio &&
                           folio_test_pmd_mappable(details->single_folio) &&
                           next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
                        spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
                        /*
                         * Take and drop THP pmd lock so that we cannot return
                         * prematurely, while zap_huge_pmd() has cleared *pmd,
                         * but not yet decremented compound_mapcount().
                         */
                        spin_unlock(ptl);
                }
                if (pmd_none(*pmd)) {
                        addr = next;
                        continue;
                }
                addr = zap_pte_range(tlb, vma, pmd, addr, next, details);
                if (addr != next)
                        pmd--;
        } while (pmd++, cond_resched(), addr != end);

        return addr;
}

static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, p4d_t *p4d,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
{
        pud_t *pud;
        unsigned long next;

        pud = pud_offset(p4d, addr);
        do {
                next = pud_addr_end(addr, end);
                if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
                        if (next - addr != HPAGE_PUD_SIZE) {
                                mmap_assert_locked(tlb->mm);
                                split_huge_pud(vma, pud, addr);
                        } else if (zap_huge_pud(tlb, vma, pud, addr))
                                goto next;
                        /* fall through */
                }
                if (pud_none_or_clear_bad(pud))
                        continue;
                next = zap_pmd_range(tlb, vma, pud, addr, next, details);
next:
                cond_resched();
        } while (pud++, addr = next, addr != end);

        return addr;
}

static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, pgd_t *pgd,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
{
        p4d_t *p4d;
        unsigned long next;

        p4d = p4d_offset(pgd, addr);
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none_or_clear_bad(p4d))
                        continue;
                next = zap_pud_range(tlb, vma, p4d, addr, next, details);
        } while (p4d++, addr = next, addr != end);

        return addr;
}

void unmap_page_range(struct mmu_gather *tlb,
                             struct vm_area_struct *vma,
                             unsigned long addr, unsigned long end,
                             struct zap_details *details)
{
        pgd_t *pgd;
        unsigned long next;

        BUG_ON(addr >= end);
        tlb_start_vma(tlb, vma);
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
                next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
        } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
}


static void unmap_single_vma(struct mmu_gather *tlb,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr,
                struct zap_details *details, bool mm_wr_locked)
{
        unsigned long start = max(vma->vm_start, start_addr);
        unsigned long end;

        if (start >= vma->vm_end)
                return;
        end = min(vma->vm_end, end_addr);
        if (end <= vma->vm_start)
                return;

        if (vma->vm_file)
                uprobe_munmap(vma, start, end);

        if (unlikely(vma->vm_flags & VM_PFNMAP))
                untrack_pfn(vma, 0, 0, mm_wr_locked);

        if (start != end) {
                if (unlikely(is_vm_hugetlb_page(vma))) {
                        /*
                         * It is undesirable to test vma->vm_file as it
                         * should be non-null for valid hugetlb area.
                         * However, vm_file will be NULL in the error
                         * cleanup path of mmap_region. When
                         * hugetlbfs ->mmap method fails,
                         * mmap_region() nullifies vma->vm_file
                         * before calling this function to clean up.
                         * Since no pte has actually been setup, it is
                         * safe to do nothing in this case.
                         */
                        if (vma->vm_file) {
                                zap_flags_t zap_flags = details ?
                                    details->zap_flags : 0;
                                __unmap_hugepage_range(tlb, vma, start, end,
                                                             NULL, zap_flags);
                        }
                } else
                        unmap_page_range(tlb, vma, start, end, details);
        }
}

/**
 * unmap_vmas - unmap a range of memory covered by a list of vma's
 * @tlb: address of the caller's struct mmu_gather
 * @mas: the maple state
 * @vma: the starting vma
 * @start_addr: virtual address at which to start unmapping
 * @end_addr: virtual address at which to end unmapping
 * @tree_end: The maximum index to check
 * @mm_wr_locked: lock flag
 *
 * Unmap all pages in the vma list.
 *
 * Only addresses between `start' and `end' will be unmapped.
 *
 * The VMA list must be sorted in ascending virtual address order.
 *
 * unmap_vmas() assumes that the caller will flush the whole unmapped address
 * range after unmap_vmas() returns.  So the only responsibility here is to
 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
 * drops the lock and schedules.
 */
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long tree_end,
                bool mm_wr_locked)
{
        struct mmu_notifier_range range;
        struct zap_details details = {
                .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
                /* Careful - we need to zap private pages too! */
                .even_cows = true,
        };

        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
                                start_addr, end_addr);
        mmu_notifier_invalidate_range_start(&range);
        do {
                unsigned long start = start_addr;
                unsigned long end = end_addr;
                hugetlb_zap_begin(vma, &start, &end);
                unmap_single_vma(tlb, vma, start, end, &details,
                                 mm_wr_locked);
                hugetlb_zap_end(vma, &details);
                vma = mas_find(mas, tree_end - 1);
        } while (vma && likely(!xa_is_zero(vma)));
        mmu_notifier_invalidate_range_end(&range);
}

/**
 * zap_page_range_single - remove user pages in a given range
 * @vma: vm_area_struct holding the applicable pages
 * @address: starting address of pages to zap
 * @size: number of bytes to zap
 * @details: details of shared cache invalidation
 *
 * The range must fit into one VMA.
 */
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *details)
{
        const unsigned long end = address + size;
        struct mmu_notifier_range range;
        struct mmu_gather tlb;

        lru_add_drain();
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
                                address, end);
        hugetlb_zap_begin(vma, &range.start, &range.end);
        tlb_gather_mmu(&tlb, vma->vm_mm);
        update_hiwater_rss(vma->vm_mm);
        mmu_notifier_invalidate_range_start(&range);
        /*
         * unmap 'address-end' not 'range.start-range.end' as range
         * could have been expanded for hugetlb pmd sharing.
         */
        unmap_single_vma(&tlb, vma, address, end, details, false);
        mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb);
        hugetlb_zap_end(vma, details);
}

/**
 * zap_vma_ptes - remove ptes mapping the vma
 * @vma: vm_area_struct holding ptes to be zapped
 * @address: starting address of pages to zap
 * @size: number of bytes to zap
 *
 * This function only unmaps ptes assigned to VM_PFNMAP vmas.
 *
 * The entire address range must be fully contained within the vma.
 *
 */
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                unsigned long size)
{
        if (!range_in_vma(vma, address, address + size) ||
                            !(vma->vm_flags & VM_PFNMAP))
                return;

        zap_page_range_single(vma, address, size, NULL);
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);

static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;

        pgd = pgd_offset(mm, addr);
        p4d = p4d_alloc(mm, pgd, addr);
        if (!p4d)
                return NULL;
        pud = pud_alloc(mm, p4d, addr);
        if (!pud)
                return NULL;
        pmd = pmd_alloc(mm, pud, addr);
        if (!pmd)
                return NULL;

        VM_BUG_ON(pmd_trans_huge(*pmd));
        return pmd;
}

pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                        spinlock_t **ptl)
{
        pmd_t *pmd = walk_to_pmd(mm, addr);

        if (!pmd)
                return NULL;
        return pte_alloc_map_lock(mm, pmd, addr, ptl);
}

static int validate_page_before_insert(struct page *page)
{
        struct folio *folio = page_folio(page);

        if (folio_test_anon(folio) || folio_test_slab(folio) ||
            page_has_type(page))
                return -EINVAL;
        flush_dcache_folio(folio);
        return 0;
}

static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
                        unsigned long addr, struct page *page, pgprot_t prot)
{
        struct folio *folio = page_folio(page);

        if (!pte_none(ptep_get(pte)))
                return -EBUSY;
        /* Ok, finally just insert the thing.. */
        folio_get(folio);
        inc_mm_counter(vma->vm_mm, mm_counter_file(folio));
        folio_add_file_rmap_pte(folio, page, vma);
        set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
        return 0;
}

/*
 * This is the old fallback for page remapping.
 *
 * For historical reasons, it only allows reserved pages. Only
 * old drivers should use this, and they needed to mark their
 * pages reserved for the old functions anyway.
 */
static int insert_page(struct vm_area_struct *vma, unsigned long addr,
                        struct page *page, pgprot_t prot)
{
        int retval;
        pte_t *pte;
        spinlock_t *ptl;

        retval = validate_page_before_insert(page);
        if (retval)
                goto out;
        retval = -ENOMEM;
        pte = get_locked_pte(vma->vm_mm, addr, &ptl);
        if (!pte)
                goto out;
        retval = insert_page_into_pte_locked(vma, pte, addr, page, prot);
        pte_unmap_unlock(pte, ptl);
out:
        return retval;
}

static int insert_page_in_batch_locked(struct vm_area_struct *vma, pte_t *pte,
                        unsigned long addr, struct page *page, pgprot_t prot)
{
        int err;

        if (!page_count(page))
                return -EINVAL;
        err = validate_page_before_insert(page);
        if (err)
                return err;
        return insert_page_into_pte_locked(vma, pte, addr, page, prot);
}

/* insert_pages() amortizes the cost of spinlock operations
 * when inserting pages in a loop.
 */
static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
                        struct page **pages, unsigned long *num, pgprot_t prot)
{
        pmd_t *pmd = NULL;
        pte_t *start_pte, *pte;
        spinlock_t *pte_lock;
        struct mm_struct *const mm = vma->vm_mm;
        unsigned long curr_page_idx = 0;
        unsigned long remaining_pages_total = *num;
        unsigned long pages_to_write_in_pmd;
        int ret;
more:
        ret = -EFAULT;
        pmd = walk_to_pmd(mm, addr);
        if (!pmd)
                goto out;

        pages_to_write_in_pmd = min_t(unsigned long,
                remaining_pages_total, PTRS_PER_PTE - pte_index(addr));

        /* Allocate the PTE if necessary; takes PMD lock once only. */
        ret = -ENOMEM;
        if (pte_alloc(mm, pmd))
                goto out;

        while (pages_to_write_in_pmd) {
                int pte_idx = 0;
                const int batch_size = min_t(int, pages_to_write_in_pmd, 8);

                start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
                if (!start_pte) {
                        ret = -EFAULT;
                        goto out;
                }
                for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
                        int err = insert_page_in_batch_locked(vma, pte,
                                addr, pages[curr_page_idx], prot);
                        if (unlikely(err)) {
                                pte_unmap_unlock(start_pte, pte_lock);
                                ret = err;
                                remaining_pages_total -= pte_idx;
                                goto out;
                        }
                        addr += PAGE_SIZE;
                        ++curr_page_idx;
                }
                pte_unmap_unlock(start_pte, pte_lock);
                pages_to_write_in_pmd -= batch_size;
                remaining_pages_total -= batch_size;
        }
        if (remaining_pages_total)
                goto more;
        ret = 0;
out:
        *num = remaining_pages_total;
        return ret;
}

/**
 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
 * @vma: user vma to map to
 * @addr: target start user address of these pages
 * @pages: source kernel pages
 * @num: in: number of pages to map. out: number of pages that were *not*
 * mapped. (0 means all pages were successfully mapped).
 *
 * Preferred over vm_insert_page() when inserting multiple pages.
 *
 * In case of error, we may have mapped a subset of the provided
 * pages. It is the caller's responsibility to account for this case.
 *
 * The same restrictions apply as in vm_insert_page().
 */
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
                        struct page **pages, unsigned long *num)
{
        const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;

        if (addr < vma->vm_start || end_addr >= vma->vm_end)
                return -EFAULT;
        if (!(vma->vm_flags & VM_MIXEDMAP)) {
                BUG_ON(mmap_read_trylock(vma->vm_mm));
                BUG_ON(vma->vm_flags & VM_PFNMAP);
                vm_flags_set(vma, VM_MIXEDMAP);
        }
        /* Defer page refcount checking till we're about to map that page. */
        return insert_pages(vma, addr, pages, num, vma->vm_page_prot);
}
EXPORT_SYMBOL(vm_insert_pages);

/**
 * vm_insert_page - insert single page into user vma
 * @vma: user vma to map to
 * @addr: target user address of this page
 * @page: source kernel page
 *
 * This allows drivers to insert individual pages they've allocated
 * into a user vma.
 *
 * The page has to be a nice clean _individual_ kernel allocation.
 * If you allocate a compound page, you need to have marked it as
 * such (__GFP_COMP), or manually just split the page up yourself
 * (see split_page()).
 *
 * NOTE! Traditionally this was done with "remap_pfn_range()" which
 * took an arbitrary page protection parameter. This doesn't allow
 * that. Your vma protection will have to be set up correctly, which
 * means that if you want a shared writable mapping, you'd better
 * ask for a shared writable mapping!
 *
 * The page does not need to be reserved.
 *
 * Usually this function is called from f_op->mmap() handler
 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
 * Caller must set VM_MIXEDMAP on vma if it wants to call this
 * function from other places, for example from page-fault handler.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
                        struct page *page)
{
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return -EFAULT;
        if (!page_count(page))
                return -EINVAL;
        if (!(vma->vm_flags & VM_MIXEDMAP)) {
                BUG_ON(mmap_read_trylock(vma->vm_mm));
                BUG_ON(vma->vm_flags & VM_PFNMAP);
                vm_flags_set(vma, VM_MIXEDMAP);
        }
        return insert_page(vma, addr, page, vma->vm_page_prot);
}
EXPORT_SYMBOL(vm_insert_page);

/*
 * __vm_map_pages - maps range of kernel pages into user vma
 * @vma: user vma to map to
 * @pages: pointer to array of source kernel pages
 * @num: number of pages in page array
 * @offset: user's requested vm_pgoff
 *
 * This allows drivers to map range of kernel pages into a user vma.
 *
 * Return: 0 on success and error code otherwise.
 */
static int __vm_map_pages(struct vm_area_struct *vma, struct page **pages,
                                unsigned long num, unsigned long offset)
{
        unsigned long count = vma_pages(vma);
        unsigned long uaddr = vma->vm_start;
        int ret, i;

        /* Fail if the user requested offset is beyond the end of the object */
        if (offset >= num)
                return -ENXIO;

        /* Fail if the user requested size exceeds available object size */
        if (count > num - offset)
                return -ENXIO;

        for (i = 0; i < count; i++) {
                ret = vm_insert_page(vma, uaddr, pages[offset + i]);
                if (ret < 0)
                        return ret;
                uaddr += PAGE_SIZE;
        }

        return 0;
}

/**
 * vm_map_pages - maps range of kernel pages starts with non zero offset
 * @vma: user vma to map to
 * @pages: pointer to array of source kernel pages
 * @num: number of pages in page array
 *
 * Maps an object consisting of @num pages, catering for the user's
 * requested vm_pgoff
 *
 * If we fail to insert any page into the vma, the function will return
 * immediately leaving any previously inserted pages present.  Callers
 * from the mmap handler may immediately return the error as their caller
 * will destroy the vma, removing any successfully inserted pages. Other
 * callers should make their own arrangements for calling unmap_region().
 *
 * Context: Process context. Called by mmap handlers.
 * Return: 0 on success and error code otherwise.
 */
int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
                                unsigned long num)
{
        return __vm_map_pages(vma, pages, num, vma->vm_pgoff);
}
EXPORT_SYMBOL(vm_map_pages);

/**
 * vm_map_pages_zero - map range of kernel pages starts with zero offset
 * @vma: user vma to map to
 * @pages: pointer to array of source kernel pages
 * @num: number of pages in page array
 *
 * Similar to vm_map_pages(), except that it explicitly sets the offset
 * to 0. This function is intended for the drivers that did not consider
 * vm_pgoff.
 *
 * Context: Process context. Called by mmap handlers.
 * Return: 0 on success and error code otherwise.
 */
int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
                                unsigned long num)
{
        return __vm_map_pages(vma, pages, num, 0);
}
EXPORT_SYMBOL(vm_map_pages_zero);

static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn, pgprot_t prot, bool mkwrite)
{
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte, entry;
        spinlock_t *ptl;

        pte = get_locked_pte(mm, addr, &ptl);
        if (!pte)
                return VM_FAULT_OOM;
        entry = ptep_get(pte);
        if (!pte_none(entry)) {
                if (mkwrite) {
                        /*
                         * For read faults on private mappings the PFN passed
                         * in may not match the PFN we have mapped if the
                         * mapped PFN is a writeable COW page.  In the mkwrite
                         * case we are creating a writable PTE for a shared
                         * mapping and we expect the PFNs to match. If they
                         * don't match, we are likely racing with block
                         * allocation and mapping invalidation so just skip the
                         * update.
                         */
                        if (pte_pfn(entry) != pfn_t_to_pfn(pfn)) {
                                WARN_ON_ONCE(!is_zero_pfn(pte_pfn(entry)));
                                goto out_unlock;
                        }
                        entry = pte_mkyoung(entry);
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                        if (ptep_set_access_flags(vma, addr, pte, entry, 1))
                                update_mmu_cache(vma, addr, pte);
                }
                goto out_unlock;
        }

        /* Ok, finally just insert the thing.. */
        if (pfn_t_devmap(pfn))
                entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
        else
                entry = pte_mkspecial(pfn_t_pte(pfn, prot));

        if (mkwrite) {
                entry = pte_mkyoung(entry);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        }

        set_pte_at(mm, addr, pte, entry);
        update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */

out_unlock:
        pte_unmap_unlock(pte, ptl);
        return VM_FAULT_NOPAGE;
}

/**
 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
 * @vma: user vma to map to
 * @addr: target user address of this page
 * @pfn: source kernel pfn
 * @pgprot: pgprot flags for the inserted page
 *
 * This is exactly like vmf_insert_pfn(), except that it allows drivers
 * to override pgprot on a per-page basis.
 *
 * This only makes sense for IO mappings, and it makes no sense for
 * COW mappings.  In general, using multiple vmas is preferable;
 * vmf_insert_pfn_prot should only be used if using multiple VMAs is
 * impractical.
 *
 * pgprot typically only differs from @vma->vm_page_prot when drivers set
 * caching- and encryption bits different than those of @vma->vm_page_prot,
 * because the caching- or encryption mode may not be known at mmap() time.
 *
 * This is ok as long as @vma->vm_page_prot is not used by the core vm
 * to set caching and encryption bits for those vmas (except for COW pages).
 * This is ensured by core vm only modifying these page table entries using
 * functions that don't touch caching- or encryption bits, using pte_modify()
 * if needed. (See for example mprotect()).
 *
 * Also when new page-table entries are created, this is only done using the
 * fault() callback, and never using the value of vma->vm_page_prot,
 * except for page-table entries that point to anonymous pages as the result
 * of COW.
 *
 * Context: Process context.  May allocate using %GFP_KERNEL.
 * Return: vm_fault_t value.
 */
vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn, pgprot_t pgprot)
{
        /*
         * Technically, architectures with pte_special can avoid all these
         * restrictions (same for remap_pfn_range).  However we would like
         * consistency in testing and feature parity among all, so we should
         * try to keep these invariants in place for everybody.
         */
        BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
        BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
                                                (VM_PFNMAP|VM_MIXEDMAP));
        BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
        BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));

        if (addr < vma->vm_start || addr >= vma->vm_end)
                return VM_FAULT_SIGBUS;

        if (!pfn_modify_allowed(pfn, pgprot))
                return VM_FAULT_SIGBUS;

        track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));

        return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
                        false);
}
EXPORT_SYMBOL(vmf_insert_pfn_prot);

/**
 * vmf_insert_pfn - insert single pfn into user vma
 * @vma: user vma to map to
 * @addr: target user address of this page
 * @pfn: source kernel pfn
 *
 * Similar to vm_insert_page, this allows drivers to insert individual pages
 * they've allocated into a user vma. Same comments apply.
 *
 * This function should only be called from a vm_ops->fault handler, and
 * in that case the handler should return the result of this function.
 *
 * vma cannot be a COW mapping.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 *
 * Context: Process context.  May allocate using %GFP_KERNEL.
 * Return: vm_fault_t value.
 */
vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn)
{
        return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
}
EXPORT_SYMBOL(vmf_insert_pfn);

static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
{
        /* these checks mirror the abort conditions in vm_normal_page */
        if (vma->vm_flags & VM_MIXEDMAP)
                return true;
        if (pfn_t_devmap(pfn))
                return true;
        if (pfn_t_special(pfn))
                return true;
        if (is_zero_pfn(pfn_t_to_pfn(pfn)))
                return true;
        return false;
}

static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
                unsigned long addr, pfn_t pfn, bool mkwrite)
{
        pgprot_t pgprot = vma->vm_page_prot;
        int err;

        BUG_ON(!vm_mixed_ok(vma, pfn));

        if (addr < vma->vm_start || addr >= vma->vm_end)
                return VM_FAULT_SIGBUS;

        track_pfn_insert(vma, &pgprot, pfn);

        if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
                return VM_FAULT_SIGBUS;

        /*
         * If we don't have pte special, then we have to use the pfn_valid()
         * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
         * refcount the page if pfn_valid is true (hence insert_page rather
         * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
         * without pte special, it would there be refcounted as a normal page.
         */
        if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
            !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
                struct page *page;

                /*
                 * At this point we are committed to insert_page()
                 * regardless of whether the caller specified flags that
                 * result in pfn_t_has_page() == false.
                 */
                page = pfn_to_page(pfn_t_to_pfn(pfn));
                err = insert_page(vma, addr, page, pgprot);
        } else {
                return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
        }

        if (err == -ENOMEM)
                return VM_FAULT_OOM;
        if (err < 0 && err != -EBUSY)
                return VM_FAULT_SIGBUS;

        return VM_FAULT_NOPAGE;
}

vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                pfn_t pfn)
{
        return __vm_insert_mixed(vma, addr, pfn, false);
}
EXPORT_SYMBOL(vmf_insert_mixed);

/*
 *  If the insertion of PTE failed because someone else already added a
 *  different entry in the mean time, we treat that as success as we assume
 *  the same entry was actually inserted.
 */
vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
                unsigned long addr, pfn_t pfn)
{
        return __vm_insert_mixed(vma, addr, pfn, true);
}
EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);

/*
 * maps a range of physical memory into the requested pages. the old
 * mappings are removed. any references to nonexistent pages results
 * in null mappings (currently treated as "copy-on-access")
 */
static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
{
        pte_t *pte, *mapped_pte;
        spinlock_t *ptl;
        int err = 0;

        mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
        arch_enter_lazy_mmu_mode();
        do {
                BUG_ON(!pte_none(ptep_get(pte)));
                if (!pfn_modify_allowed(pfn, prot)) {
                        err = -EACCES;
                        break;
                }
                set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(mapped_pte, ptl);
        return err;
}

static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
{
        pmd_t *pmd;
        unsigned long next;
        int err;

        pfn -= addr >> PAGE_SHIFT;
        pmd = pmd_alloc(mm, pud, addr);
        if (!pmd)
                return -ENOMEM;
        VM_BUG_ON(pmd_trans_huge(*pmd));
        do {
                next = pmd_addr_end(addr, end);
                err = remap_pte_range(mm, pmd, addr, next,
                                pfn + (addr >> PAGE_SHIFT), prot);
                if (err)
                        return err;
        } while (pmd++, addr = next, addr != end);
        return 0;
}

static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
{
        pud_t *pud;
        unsigned long next;
        int err;

        pfn -= addr >> PAGE_SHIFT;
        pud = pud_alloc(mm, p4d, addr);
        if (!pud)
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);
                err = remap_pmd_range(mm, pud, addr, next,
                                pfn + (addr >> PAGE_SHIFT), prot);
                if (err)
                        return err;
        } while (pud++, addr = next, addr != end);
        return 0;
}

static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
{
        p4d_t *p4d;
        unsigned long next;
        int err;

        pfn -= addr >> PAGE_SHIFT;
        p4d = p4d_alloc(mm, pgd, addr);
        if (!p4d)
                return -ENOMEM;
        do {
                next = p4d_addr_end(addr, end);
                err = remap_pud_range(mm, p4d, addr, next,
                                pfn + (addr >> PAGE_SHIFT), prot);
                if (err)
                        return err;
        } while (p4d++, addr = next, addr != end);
        return 0;
}

/*
 * Variant of remap_pfn_range that does not call track_pfn_remap.  The caller
 * must have pre-validated the caching bits of the pgprot_t.
 */
int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
                unsigned long pfn, unsigned long size, pgprot_t prot)
{
        pgd_t *pgd;
        unsigned long next;
        unsigned long end = addr + PAGE_ALIGN(size);
        struct mm_struct *mm = vma->vm_mm;
        int err;

        if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
                return -EINVAL;

        /*
         * Physically remapped pages are special. Tell the
         * rest of the world about it:
         *   VM_IO tells people not to look at these pages
         *        (accesses can have side effects).
         *   VM_PFNMAP tells the core MM that the base pages are just
         *        raw PFN mappings, and do not have a "struct page" associated
         *        with them.
         *   VM_DONTEXPAND
         *      Disable vma merging and expanding with mremap().
         *   VM_DONTDUMP
         *      Omit vma from core dump, even when VM_IO turned off.
         *
         * There's a horrible special case to handle copy-on-write
         * behaviour that some programs depend on. We mark the "original"
         * un-COW'ed pages by matching them up with "vma->vm_pgoff".
         * See vm_normal_page() for details.
         */
        if (is_cow_mapping(vma->vm_flags)) {
                if (addr != vma->vm_start || end != vma->vm_end)
                        return -EINVAL;
                vma->vm_pgoff = pfn;
        }

        vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);

        BUG_ON(addr >= end);
        pfn -= addr >> PAGE_SHIFT;
        pgd = pgd_offset(mm, addr);
        flush_cache_range(vma, addr, end);
        do {
                next = pgd_addr_end(addr, end);
                err = remap_p4d_range(mm, pgd, addr, next,
                                pfn + (addr >> PAGE_SHIFT), prot);
                if (err)
                        return err;
        } while (pgd++, addr = next, addr != end);

        return 0;
}

/**
 * remap_pfn_range - remap kernel memory to userspace
 * @vma: user vma to map to
 * @addr: target page aligned user address to start at
 * @pfn: page frame number of kernel physical memory address
 * @size: size of mapping area
 * @prot: page protection flags for this mapping
 *
 * Note: this is only safe if the mm semaphore is held when called.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                    unsigned long pfn, unsigned long size, pgprot_t prot)
{
        int err;

        err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
        if (err)
                return -EINVAL;

        err = remap_pfn_range_notrack(vma, addr, pfn, size, prot);
        if (err)
                untrack_pfn(vma, pfn, PAGE_ALIGN(size), true);
        return err;
}
EXPORT_SYMBOL(remap_pfn_range);

/**
 * vm_iomap_memory - remap memory to userspace
 * @vma: user vma to map to
 * @start: start of the physical memory to be mapped
 * @len: size of area
 *
 * This is a simplified io_remap_pfn_range() for common driver use. The
 * driver just needs to give us the physical memory range to be mapped,
 * we'll figure out the rest from the vma information.
 *
 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
 * whatever write-combining details or similar.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
{
        unsigned long vm_len, pfn, pages;

        /* Check that the physical memory area passed in looks valid */
        if (start + len < start)
                return -EINVAL;
        /*
         * You *really* shouldn't map things that aren't page-aligned,
         * but we've historically allowed it because IO memory might
         * just have smaller alignment.
         */
        len += start & ~PAGE_MASK;
        pfn = start >> PAGE_SHIFT;
        pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
        if (pfn + pages < pfn)
                return -EINVAL;

        /* We start the mapping 'vm_pgoff' pages into the area */
        if (vma->vm_pgoff > pages)
                return -EINVAL;
        pfn += vma->vm_pgoff;
        pages -= vma->vm_pgoff;

        /* Can we fit all of the mapping? */
        vm_len = vma->vm_end - vma->vm_start;
        if (vm_len >> PAGE_SHIFT > pages)
                return -EINVAL;

        /* Ok, let it rip */
        return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
}
EXPORT_SYMBOL(vm_iomap_memory);

static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                     unsigned long addr, unsigned long end,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
{
        pte_t *pte, *mapped_pte;
        int err = 0;
        spinlock_t *ptl;

        if (create) {
                mapped_pte = pte = (mm == &init_mm) ?
                        pte_alloc_kernel_track(pmd, addr, mask) :
                        pte_alloc_map_lock(mm, pmd, addr, &ptl);
                if (!pte)
                        return -ENOMEM;
        } else {
                mapped_pte = pte = (mm == &init_mm) ?
                        pte_offset_kernel(pmd, addr) :
                        pte_offset_map_lock(mm, pmd, addr, &ptl);
                if (!pte)
                        return -EINVAL;
        }

        arch_enter_lazy_mmu_mode();

        if (fn) {
                do {
                        if (create || !pte_none(ptep_get(pte))) {
                                err = fn(pte++, addr, data);
                                if (err)
                                        break;
                        }
                } while (addr += PAGE_SIZE, addr != end);
        }
        *mask |= PGTBL_PTE_MODIFIED;

        arch_leave_lazy_mmu_mode();

        if (mm != &init_mm)
                pte_unmap_unlock(mapped_pte, ptl);
        return err;
}

static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
                                     unsigned long addr, unsigned long end,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
{
        pmd_t *pmd;
        unsigned long next;
        int err = 0;

        BUG_ON(pud_huge(*pud));

        if (create) {
                pmd = pmd_alloc_track(mm, pud, addr, mask);
                if (!pmd)
                        return -ENOMEM;
        } else {
                pmd = pmd_offset(pud, addr);
        }
        do {
                next = pmd_addr_end(addr, end);
                if (pmd_none(*pmd) && !create)
                        continue;
                if (WARN_ON_ONCE(pmd_leaf(*pmd)))
                        return -EINVAL;
                if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) {
                        if (!create)
                                continue;
                        pmd_clear_bad(pmd);
                }
                err = apply_to_pte_range(mm, pmd, addr, next,
                                         fn, data, create, mask);
                if (err)
                        break;
        } while (pmd++, addr = next, addr != end);

        return err;
}

static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
                                     unsigned long addr, unsigned long end,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
{
        pud_t *pud;
        unsigned long next;
        int err = 0;

        if (create) {
                pud = pud_alloc_track(mm, p4d, addr, mask);
                if (!pud)
                        return -ENOMEM;
        } else {
                pud = pud_offset(p4d, addr);
        }
        do {
                next = pud_addr_end(addr, end);
                if (pud_none(*pud) && !create)
                        continue;
                if (WARN_ON_ONCE(pud_leaf(*pud)))
                        return -EINVAL;
                if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) {
                        if (!create)
                                continue;
                        pud_clear_bad(pud);
                }
                err = apply_to_pmd_range(mm, pud, addr, next,
                                         fn, data, create, mask);
                if (err)
                        break;
        } while (pud++, addr = next, addr != end);

        return err;
}

static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                                     unsigned long addr, unsigned long end,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
{
        p4d_t *p4d;
        unsigned long next;
        int err = 0;

        if (create) {
                p4d = p4d_alloc_track(mm, pgd, addr, mask);
                if (!p4d)
                        return -ENOMEM;
        } else {
                p4d = p4d_offset(pgd, addr);
        }
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none(*p4d) && !create)
                        continue;
                if (WARN_ON_ONCE(p4d_leaf(*p4d)))
                        return -EINVAL;
                if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) {
                        if (!create)
                                continue;
                        p4d_clear_bad(p4d);
                }
                err = apply_to_pud_range(mm, p4d, addr, next,
                                         fn, data, create, mask);
                if (err)
                        break;
        } while (p4d++, addr = next, addr != end);

        return err;
}

static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
                                 unsigned long size, pte_fn_t fn,
                                 void *data, bool create)
{
        pgd_t *pgd;
        unsigned long start = addr, next;
        unsigned long end = addr + size;
        pgtbl_mod_mask mask = 0;
        int err = 0;

        if (WARN_ON(addr >= end))
                return -EINVAL;

        pgd = pgd_offset(mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none(*pgd) && !create)
                        continue;
                if (WARN_ON_ONCE(pgd_leaf(*pgd)))
                        return -EINVAL;
                if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) {
                        if (!create)
                                continue;
                        pgd_clear_bad(pgd);
                }
                err = apply_to_p4d_range(mm, pgd, addr, next,
                                         fn, data, create, &mask);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);

        if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
                arch_sync_kernel_mappings(start, start + size);

        return err;
}

/*
 * Scan a region of virtual memory, filling in page tables as necessary
 * and calling a provided function on each leaf page table.
 */
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
                        unsigned long size, pte_fn_t fn, void *data)
{
        return __apply_to_page_range(mm, addr, size, fn, data, true);
}
EXPORT_SYMBOL_GPL(apply_to_page_range);

/*
 * Scan a region of virtual memory, calling a provided function on
 * each leaf page table where it exists.
 *
 * Unlike apply_to_page_range, this does _not_ fill in page tables
 * where they are absent.
 */
int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
                                 unsigned long size, pte_fn_t fn, void *data)
{
        return __apply_to_page_range(mm, addr, size, fn, data, false);
}
EXPORT_SYMBOL_GPL(apply_to_existing_page_range);

/*
 * handle_pte_fault chooses page fault handler according to an entry which was
 * read non-atomically.  Before making any commitment, on those architectures
 * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
 * parts, do_swap_page must check under lock before unmapping the pte and
 * proceeding (but do_wp_page is only called after already making such a check;
 * and do_anonymous_page can safely check later on).
 */
static inline int pte_unmap_same(struct vm_fault *vmf)
{
        int same = 1;
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
        if (sizeof(pte_t) > sizeof(unsigned long)) {
                spin_lock(vmf->ptl);
                same = pte_same(ptep_get(vmf->pte), vmf->orig_pte);
                spin_unlock(vmf->ptl);
        }
#endif
        pte_unmap(vmf->pte);
        vmf->pte = NULL;
        return same;
}

/*
 * Return:
 *        0:                copied succeeded
 *        -EHWPOISON:        copy failed due to hwpoison in source page
 *        -EAGAIN:        copied failed (some other reason)
 */
static inline int __wp_page_copy_user(struct page *dst, struct page *src,
                                      struct vm_fault *vmf)
{
        int ret;
        void *kaddr;
        void __user *uaddr;
        struct vm_area_struct *vma = vmf->vma;
        struct mm_struct *mm = vma->vm_mm;
        unsigned long addr = vmf->address;

        if (likely(src)) {
                if (copy_mc_user_highpage(dst, src, addr, vma)) {
                        memory_failure_queue(page_to_pfn(src), 0);
                        return -EHWPOISON;
                }
                return 0;
        }

        /*
         * If the source page was a PFN mapping, we don't have
         * a "struct page" for it. We do a best-effort copy by
         * just copying from the original user address. If that
         * fails, we just zero-fill it. Live with it.
         */
        kaddr = kmap_local_page(dst);
        pagefault_disable();
        uaddr = (void __user *)(addr & PAGE_MASK);

        /*
         * On architectures with software "accessed" bits, we would
         * take a double page fault, so mark it accessed here.
         */
        vmf->pte = NULL;
        if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
                pte_t entry;

                vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
                if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
                        /*
                         * Other thread has already handled the fault
                         * and update local tlb only
                         */
                        if (vmf->pte)
                                update_mmu_tlb(vma, addr, vmf->pte);
                        ret = -EAGAIN;
                        goto pte_unlock;
                }

                entry = pte_mkyoung(vmf->orig_pte);
                if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
                        update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
        }

        /*
         * This really shouldn't fail, because the page is there
         * in the page tables. But it might just be unreadable,
         * in which case we just give up and fill the result with
         * zeroes.
         */
        if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
                if (vmf->pte)
                        goto warn;

                /* Re-validate under PTL if the page is still mapped */
                vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
                if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
                        /* The PTE changed under us, update local tlb */
                        if (vmf->pte)
                                update_mmu_tlb(vma, addr, vmf->pte);
                        ret = -EAGAIN;
                        goto pte_unlock;
                }

                /*
                 * The same page can be mapped back since last copy attempt.
                 * Try to copy again under PTL.
                 */
                if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
                        /*
                         * Give a warn in case there can be some obscure
                         * use-case
                         */
warn:
                        WARN_ON_ONCE(1);
                        clear_page(kaddr);
                }
        }

        ret = 0;

pte_unlock:
        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        pagefault_enable();
        kunmap_local(kaddr);
        flush_dcache_page(dst);

        return ret;
}

static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
{
        struct file *vm_file = vma->vm_file;

        if (vm_file)
                return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO;

        /*
         * Special mappings (e.g. VDSO) do not have any file so fake
         * a default GFP_KERNEL for them.
         */
        return GFP_KERNEL;
}

/*
 * Notify the address space that the page is about to become writable so that
 * it can prohibit this or wait for the page to get into an appropriate state.
 *
 * We do this without the lock held, so that it can sleep if it needs to.
 */
static vm_fault_t do_page_mkwrite(struct vm_fault *vmf, struct folio *folio)
{
        vm_fault_t ret;
        unsigned int old_flags = vmf->flags;

        vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;

        if (vmf->vma->vm_file &&
            IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host))
                return VM_FAULT_SIGBUS;

        ret = vmf->vma->vm_ops->page_mkwrite(vmf);
        /* Restore original flags so that caller is not surprised */
        vmf->flags = old_flags;
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
                return ret;
        if (unlikely(!(ret & VM_FAULT_LOCKED))) {
                folio_lock(folio);
                if (!folio->mapping) {
                        folio_unlock(folio);
                        return 0; /* retry */
                }
                ret |= VM_FAULT_LOCKED;
        } else
                VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        return ret;
}

/*
 * Handle dirtying of a page in shared file mapping on a write fault.
 *
 * The function expects the page to be locked and unlocks it.
 */
static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct address_space *mapping;
        struct folio *folio = page_folio(vmf->page);
        bool dirtied;
        bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite;

        dirtied = folio_mark_dirty(folio);
        VM_BUG_ON_FOLIO(folio_test_anon(folio), folio);
        /*
         * Take a local copy of the address_space - folio.mapping may be zeroed
         * by truncate after folio_unlock().   The address_space itself remains
         * pinned by vma->vm_file's reference.  We rely on folio_unlock()'s
         * release semantics to prevent the compiler from undoing this copying.
         */
        mapping = folio_raw_mapping(folio);
        folio_unlock(folio);

        if (!page_mkwrite)
                file_update_time(vma->vm_file);

        /*
         * Throttle page dirtying rate down to writeback speed.
         *
         * mapping may be NULL here because some device drivers do not
         * set page.mapping but still dirty their pages
         *
         * Drop the mmap_lock before waiting on IO, if we can. The file
         * is pinning the mapping, as per above.
         */
        if ((dirtied || page_mkwrite) && mapping) {
                struct file *fpin;

                fpin = maybe_unlock_mmap_for_io(vmf, NULL);
                balance_dirty_pages_ratelimited(mapping);
                if (fpin) {
                        fput(fpin);
                        return VM_FAULT_COMPLETED;
                }
        }

        return 0;
}

/*
 * Handle write page faults for pages that can be reused in the current vma
 *
 * This can happen either due to the mapping being with the VM_SHARED flag,
 * or due to us being the last reference standing to the page. In either
 * case, all we need to do here is to mark the page as writable and update
 * any related book-keeping.
 */
static inline void wp_page_reuse(struct vm_fault *vmf, struct folio *folio)
        __releases(vmf->ptl)
{
        struct vm_area_struct *vma = vmf->vma;
        pte_t entry;

        VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));

        if (folio) {
                VM_BUG_ON(folio_test_anon(folio) &&
                          !PageAnonExclusive(vmf->page));
                /*
                 * Clear the folio's cpupid information as the existing
                 * information potentially belongs to a now completely
                 * unrelated process.
                 */
                folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1);
        }

        flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
        entry = pte_mkyoung(vmf->orig_pte);
        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
                update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        count_vm_event(PGREUSE);
}

/*
 * We could add a bitflag somewhere, but for now, we know that all
 * vm_ops that have a ->map_pages have been audited and don't need
 * the mmap_lock to be held.
 */
static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;

        if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK))
                return 0;
        vma_end_read(vma);
        return VM_FAULT_RETRY;
}

vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;

        if (likely(vma->anon_vma))
                return 0;
        if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
                vma_end_read(vma);
                return VM_FAULT_RETRY;
        }
        if (__anon_vma_prepare(vma))
                return VM_FAULT_OOM;
        return 0;
}

/*
 * Handle the case of a page which we actually need to copy to a new page,
 * either due to COW or unsharing.
 *
 * Called with mmap_lock locked and the old page referenced, but
 * without the ptl held.
 *
 * High level logic flow:
 *
 * - Allocate a page, copy the content of the old page to the new one.
 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
 * - Take the PTL. If the pte changed, bail out and release the allocated page
 * - If the pte is still the way we remember it, update the page table and all
 *   relevant references. This includes dropping the reference the page-table
 *   held to the old page, as well as updating the rmap.
 * - In any case, unlock the PTL and drop the reference we took to the old page.
 */
static vm_fault_t wp_page_copy(struct vm_fault *vmf)
{
        const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
        struct vm_area_struct *vma = vmf->vma;
        struct mm_struct *mm = vma->vm_mm;
        struct folio *old_folio = NULL;
        struct folio *new_folio = NULL;
        pte_t entry;
        int page_copied = 0;
        struct mmu_notifier_range range;
        vm_fault_t ret;
        bool pfn_is_zero;

        delayacct_wpcopy_start();

        if (vmf->page)
                old_folio = page_folio(vmf->page);
        ret = vmf_anon_prepare(vmf);
        if (unlikely(ret))
                goto out;

        pfn_is_zero = is_zero_pfn(pte_pfn(vmf->orig_pte));
        new_folio = folio_prealloc(mm, vma, vmf->address, pfn_is_zero);
        if (!new_folio)
                goto oom;

        if (!pfn_is_zero) {
                int err;

                err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
                if (err) {
                        /*
                         * COW failed, if the fault was solved by other,
                         * it's fine. If not, userspace would re-fault on
                         * the same address and we will handle the fault
                         * from the second attempt.
                         * The -EHWPOISON case will not be retried.
                         */
                        folio_put(new_folio);
                        if (old_folio)
                                folio_put(old_folio);

                        delayacct_wpcopy_end();
                        return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
                }
                kmsan_copy_page_meta(&new_folio->page, vmf->page);
        }

        __folio_mark_uptodate(new_folio);

        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
                                vmf->address & PAGE_MASK,
                                (vmf->address & PAGE_MASK) + PAGE_SIZE);
        mmu_notifier_invalidate_range_start(&range);

        /*
         * Re-check the pte - we dropped the lock
         */
        vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
        if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
                if (old_folio) {
                        if (!folio_test_anon(old_folio)) {
                                dec_mm_counter(mm, mm_counter_file(old_folio));
                                inc_mm_counter(mm, MM_ANONPAGES);
                        }
                } else {
                        ksm_might_unmap_zero_page(mm, vmf->orig_pte);
                        inc_mm_counter(mm, MM_ANONPAGES);
                }
                flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
                entry = mk_pte(&new_folio->page, vma->vm_page_prot);
                entry = pte_sw_mkyoung(entry);
                if (unlikely(unshare)) {
                        if (pte_soft_dirty(vmf->orig_pte))
                                entry = pte_mksoft_dirty(entry);
                        if (pte_uffd_wp(vmf->orig_pte))
                                entry = pte_mkuffd_wp(entry);
                } else {
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                }

                /*
                 * Clear the pte entry and flush it first, before updating the
                 * pte with the new entry, to keep TLBs on different CPUs in
                 * sync. This code used to set the new PTE then flush TLBs, but
                 * that left a window where the new PTE could be loaded into
                 * some TLBs while the old PTE remains in others.
                 */
                ptep_clear_flush(vma, vmf->address, vmf->pte);
                folio_add_new_anon_rmap(new_folio, vma, vmf->address);
                folio_add_lru_vma(new_folio, vma);
                /*
                 * We call the notify macro here because, when using secondary
                 * mmu page tables (such as kvm shadow page tables), we want the
                 * new page to be mapped directly into the secondary page table.
                 */
                BUG_ON(unshare && pte_write(entry));
                set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
                update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
                if (old_folio) {
                        /*
                         * Only after switching the pte to the new page may
                         * we remove the mapcount here. Otherwise another
                         * process may come and find the rmap count decremented
                         * before the pte is switched to the new page, and
                         * "reuse" the old page writing into it while our pte
                         * here still points into it and can be read by other
                         * threads.
                         *
                         * The critical issue is to order this
                         * folio_remove_rmap_pte() with the ptp_clear_flush
                         * above. Those stores are ordered by (if nothing else,)
                         * the barrier present in the atomic_add_negative
                         * in folio_remove_rmap_pte();
                         *
                         * Then the TLB flush in ptep_clear_flush ensures that
                         * no process can access the old page before the
                         * decremented mapcount is visible. And the old page
                         * cannot be reused until after the decremented
                         * mapcount is visible. So transitively, TLBs to
                         * old page will be flushed before it can be reused.
                         */
                        folio_remove_rmap_pte(old_folio, vmf->page, vma);
                }

                /* Free the old page.. */
                new_folio = old_folio;
                page_copied = 1;
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        } else if (vmf->pte) {
                update_mmu_tlb(vma, vmf->address, vmf->pte);
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        }

        mmu_notifier_invalidate_range_end(&range);

        if (new_folio)
                folio_put(new_folio);
        if (old_folio) {
                if (page_copied)
                        free_swap_cache(old_folio);
                folio_put(old_folio);
        }

        delayacct_wpcopy_end();
        return 0;
oom:
        ret = VM_FAULT_OOM;
out:
        if (old_folio)
                folio_put(old_folio);

        delayacct_wpcopy_end();
        return ret;
}

/**
 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
 *                          writeable once the page is prepared
 *
 * @vmf: structure describing the fault
 * @folio: the folio of vmf->page
 *
 * This function handles all that is needed to finish a write page fault in a
 * shared mapping due to PTE being read-only once the mapped page is prepared.
 * It handles locking of PTE and modifying it.
 *
 * The function expects the page to be locked or other protection against
 * concurrent faults / writeback (such as DAX radix tree locks).
 *
 * Return: %0 on success, %VM_FAULT_NOPAGE when PTE got changed before
 * we acquired PTE lock.
 */
static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio)
{
        WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
        vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
                                       &vmf->ptl);
        if (!vmf->pte)
                return VM_FAULT_NOPAGE;
        /*
         * We might have raced with another page fault while we released the
         * pte_offset_map_lock.
         */
        if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) {
                update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                return VM_FAULT_NOPAGE;
        }
        wp_page_reuse(vmf, folio);
        return 0;
}

/*
 * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
 * mapping
 */
static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;

        if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
                vm_fault_t ret;

                pte_unmap_unlock(vmf->pte, vmf->ptl);
                ret = vmf_can_call_fault(vmf);
                if (ret)
                        return ret;

                vmf->flags |= FAULT_FLAG_MKWRITE;
                ret = vma->vm_ops->pfn_mkwrite(vmf);
                if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
                        return ret;
                return finish_mkwrite_fault(vmf, NULL);
        }
        wp_page_reuse(vmf, NULL);
        return 0;
}

static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
        __releases(vmf->ptl)
{
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret = 0;

        folio_get(folio);

        if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                vm_fault_t tmp;

                pte_unmap_unlock(vmf->pte, vmf->ptl);
                tmp = vmf_can_call_fault(vmf);
                if (tmp) {
                        folio_put(folio);
                        return tmp;
                }

                tmp = do_page_mkwrite(vmf, folio);
                if (unlikely(!tmp || (tmp &
                                      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
                        folio_put(folio);
                        return tmp;
                }
                tmp = finish_mkwrite_fault(vmf, folio);
                if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
                        folio_unlock(folio);
                        folio_put(folio);
                        return tmp;
                }
        } else {
                wp_page_reuse(vmf, folio);
                folio_lock(folio);
        }
        ret |= fault_dirty_shared_page(vmf);
        folio_put(folio);

        return ret;
}

static bool wp_can_reuse_anon_folio(struct folio *folio,
                                    struct vm_area_struct *vma)
{
        /*
         * We could currently only reuse a subpage of a large folio if no
         * other subpages of the large folios are still mapped. However,
         * let's just consistently not reuse subpages even if we could
         * reuse in that scenario, and give back a large folio a bit
         * sooner.
         */
        if (folio_test_large(folio))
                return false;

        /*
         * We have to verify under folio lock: these early checks are
         * just an optimization to avoid locking the folio and freeing
         * the swapcache if there is little hope that we can reuse.
         *
         * KSM doesn't necessarily raise the folio refcount.
         */
        if (folio_test_ksm(folio) || folio_ref_count(folio) > 3)
                return false;
        if (!folio_test_lru(folio))
                /*
                 * We cannot easily detect+handle references from
                 * remote LRU caches or references to LRU folios.
                 */
                lru_add_drain();
        if (folio_ref_count(folio) > 1 + folio_test_swapcache(folio))
                return false;
        if (!folio_trylock(folio))
                return false;
        if (folio_test_swapcache(folio))
                folio_free_swap(folio);
        if (folio_test_ksm(folio) || folio_ref_count(folio) != 1) {
                folio_unlock(folio);
                return false;
        }
        /*
         * Ok, we've got the only folio reference from our mapping
         * and the folio is locked, it's dark out, and we're wearing
         * sunglasses. Hit it.
         */
        folio_move_anon_rmap(folio, vma);
        folio_unlock(folio);
        return true;
}

/*
 * This routine handles present pages, when
 * * users try to write to a shared page (FAULT_FLAG_WRITE)
 * * GUP wants to take a R/O pin on a possibly shared anonymous page
 *   (FAULT_FLAG_UNSHARE)
 *
 * It is done by copying the page to a new address and decrementing the
 * shared-page counter for the old page.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus, with FAULT_FLAG_WRITE, we can safely just mark it writable once we've
 * done any necessary COW.
 *
 * In case of FAULT_FLAG_WRITE, we also mark the page dirty at this point even
 * though the page will change only once the write actually happens. This
 * avoids a few races, and potentially makes it more efficient.
 *
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), with pte both mapped and locked.
 * We return with mmap_lock still held, but pte unmapped and unlocked.
 */
static vm_fault_t do_wp_page(struct vm_fault *vmf)
        __releases(vmf->ptl)
{
        const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
        struct vm_area_struct *vma = vmf->vma;
        struct folio *folio = NULL;
        pte_t pte;

        if (likely(!unshare)) {
                if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) {
                        if (!userfaultfd_wp_async(vma)) {
                                pte_unmap_unlock(vmf->pte, vmf->ptl);
                                return handle_userfault(vmf, VM_UFFD_WP);
                        }

                        /*
                         * Nothing needed (cache flush, TLB invalidations,
                         * etc.) because we're only removing the uffd-wp bit,
                         * which is completely invisible to the user.
                         */
                        pte = pte_clear_uffd_wp(ptep_get(vmf->pte));

                        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
                        /*
                         * Update this to be prepared for following up CoW
                         * handling
                         */
                        vmf->orig_pte = pte;
                }

                /*
                 * Userfaultfd write-protect can defer flushes. Ensure the TLB
                 * is flushed in this case before copying.
                 */
                if (unlikely(userfaultfd_wp(vmf->vma) &&
                             mm_tlb_flush_pending(vmf->vma->vm_mm)))
                        flush_tlb_page(vmf->vma, vmf->address);
        }

        vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);

        if (vmf->page)
                folio = page_folio(vmf->page);

        /*
         * Shared mapping: we are guaranteed to have VM_WRITE and
         * FAULT_FLAG_WRITE set at this point.
         */
        if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
                /*
                 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
                 * VM_PFNMAP VMA.
                 *
                 * We should not cow pages in a shared writeable mapping.
                 * Just mark the pages writable and/or call ops->pfn_mkwrite.
                 */
                if (!vmf->page)
                        return wp_pfn_shared(vmf);
                return wp_page_shared(vmf, folio);
        }

        /*
         * Private mapping: create an exclusive anonymous page copy if reuse
         * is impossible. We might miss VM_WRITE for FOLL_FORCE handling.
         *
         * If we encounter a page that is marked exclusive, we must reuse
         * the page without further checks.
         */
        if (folio && folio_test_anon(folio) &&
            (PageAnonExclusive(vmf->page) || wp_can_reuse_anon_folio(folio, vma))) {
                if (!PageAnonExclusive(vmf->page))
                        SetPageAnonExclusive(vmf->page);
                if (unlikely(unshare)) {
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                        return 0;
                }
                wp_page_reuse(vmf, folio);
                return 0;
        }
        /*
         * Ok, we need to copy. Oh, well..
         */
        if (folio)
                folio_get(folio);

        pte_unmap_unlock(vmf->pte, vmf->ptl);
#ifdef CONFIG_KSM
        if (folio && folio_test_ksm(folio))
                count_vm_event(COW_KSM);
#endif
        return wp_page_copy(vmf);
}

static void unmap_mapping_range_vma(struct vm_area_struct *vma,
                unsigned long start_addr, unsigned long end_addr,
                struct zap_details *details)
{
        zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
}

static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
                                            pgoff_t first_index,
                                            pgoff_t last_index,
                                            struct zap_details *details)
{
        struct vm_area_struct *vma;
        pgoff_t vba, vea, zba, zea;

        vma_interval_tree_foreach(vma, root, first_index, last_index) {
                vba = vma->vm_pgoff;
                vea = vba + vma_pages(vma) - 1;
                zba = max(first_index, vba);
                zea = min(last_index, vea);

                unmap_mapping_range_vma(vma,
                        ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
                        ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
                                details);
        }
}

/**
 * unmap_mapping_folio() - Unmap single folio from processes.
 * @folio: The locked folio to be unmapped.
 *
 * Unmap this folio from any userspace process which still has it mmaped.
 * Typically, for efficiency, the range of nearby pages has already been
 * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
 * truncation or invalidation holds the lock on a folio, it may find that
 * the page has been remapped again: and then uses unmap_mapping_folio()
 * to unmap it finally.
 */
void unmap_mapping_folio(struct folio *folio)
{
        struct address_space *mapping = folio->mapping;
        struct zap_details details = { };
        pgoff_t        first_index;
        pgoff_t        last_index;

        VM_BUG_ON(!folio_test_locked(folio));

        first_index = folio->index;
        last_index = folio_next_index(folio) - 1;

        details.even_cows = false;
        details.single_folio = folio;
        details.zap_flags = ZAP_FLAG_DROP_MARKER;

        i_mmap_lock_read(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
                unmap_mapping_range_tree(&mapping->i_mmap, first_index,
                                         last_index, &details);
        i_mmap_unlock_read(mapping);
}

/**
 * unmap_mapping_pages() - Unmap pages from processes.
 * @mapping: The address space containing pages to be unmapped.
 * @start: Index of first page to be unmapped.
 * @nr: Number of pages to be unmapped.  0 to unmap to end of file.
 * @even_cows: Whether to unmap even private COWed pages.
 *
 * Unmap the pages in this address space from any userspace process which
 * has them mmaped.  Generally, you want to remove COWed pages as well when
 * a file is being truncated, but not when invalidating pages from the page
 * cache.
 */
void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
                pgoff_t nr, bool even_cows)
{
        struct zap_details details = { };
        pgoff_t        first_index = start;
        pgoff_t        last_index = start + nr - 1;

        details.even_cows = even_cows;
        if (last_index < first_index)
                last_index = ULONG_MAX;

        i_mmap_lock_read(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
                unmap_mapping_range_tree(&mapping->i_mmap, first_index,
                                         last_index, &details);
        i_mmap_unlock_read(mapping);
}
EXPORT_SYMBOL_GPL(unmap_mapping_pages);

/**
 * unmap_mapping_range - unmap the portion of all mmaps in the specified
 * address_space corresponding to the specified byte range in the underlying
 * file.
 *
 * @mapping: the address space containing mmaps to be unmapped.
 * @holebegin: byte in first page to unmap, relative to the start of
 * the underlying file.  This will be rounded down to a PAGE_SIZE
 * boundary.  Note that this is different from truncate_pagecache(), which
 * must keep the partial page.  In contrast, we must get rid of
 * partial pages.
 * @holelen: size of prospective hole in bytes.  This will be rounded
 * up to a PAGE_SIZE boundary.  A holelen of zero truncates to the
 * end of the file.
 * @even_cows: 1 when truncating a file, unmap even private COWed pages;
 * but 0 when invalidating pagecache, don't throw away private data.
 */
void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows)
{
        pgoff_t hba = (pgoff_t)(holebegin) >> PAGE_SHIFT;
        pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT;

        /* Check for overflow. */
        if (sizeof(holelen) > sizeof(hlen)) {
                long long holeend =
                        (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
                if (holeend & ~(long long)ULONG_MAX)
                        hlen = ULONG_MAX - hba + 1;
        }

        unmap_mapping_pages(mapping, hba, hlen, even_cows);
}
EXPORT_SYMBOL(unmap_mapping_range);

/*
 * Restore a potential device exclusive pte to a working pte entry
 */
static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
{
        struct folio *folio = page_folio(vmf->page);
        struct vm_area_struct *vma = vmf->vma;
        struct mmu_notifier_range range;
        vm_fault_t ret;

        /*
         * We need a reference to lock the folio because we don't hold
         * the PTL so a racing thread can remove the device-exclusive
         * entry and unmap it. If the folio is free the entry must
         * have been removed already. If it happens to have already
         * been re-allocated after being freed all we do is lock and
         * unlock it.
         */
        if (!folio_try_get(folio))
                return 0;

        ret = folio_lock_or_retry(folio, vmf);
        if (ret) {
                folio_put(folio);
                return ret;
        }
        mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
                                vma->vm_mm, vmf->address & PAGE_MASK,
                                (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
        mmu_notifier_invalidate_range_start(&range);

        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
                                &vmf->ptl);
        if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
                restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);

        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        folio_unlock(folio);
        folio_put(folio);

        mmu_notifier_invalidate_range_end(&range);
        return 0;
}

static inline bool should_try_to_free_swap(struct folio *folio,
                                           struct vm_area_struct *vma,
                                           unsigned int fault_flags)
{
        if (!folio_test_swapcache(folio))
                return false;
        if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) ||
            folio_test_mlocked(folio))
                return true;
        /*
         * If we want to map a page that's in the swapcache writable, we
         * have to detect via the refcount if we're really the exclusive
         * user. Try freeing the swapcache to get rid of the swapcache
         * reference only in case it's likely that we'll be the exlusive user.
         */
        return (fault_flags & FAULT_FLAG_WRITE) && !folio_test_ksm(folio) &&
                folio_ref_count(folio) == 2;
}

static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
{
        vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
                                       vmf->address, &vmf->ptl);
        if (!vmf->pte)
                return 0;
        /*
         * Be careful so that we will only recover a special uffd-wp pte into a
         * none pte.  Otherwise it means the pte could have changed, so retry.
         *
         * This should also cover the case where e.g. the pte changed
         * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED.
         * So is_pte_marker() check is not enough to safely drop the pte.
         */
        if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
                pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        return 0;
}

static vm_fault_t do_pte_missing(struct vm_fault *vmf)
{
        if (vma_is_anonymous(vmf->vma))
                return do_anonymous_page(vmf);
        else
                return do_fault(vmf);
}

/*
 * This is actually a page-missing access, but with uffd-wp special pte
 * installed.  It means this pte was wr-protected before being unmapped.
 */
static vm_fault_t pte_marker_handle_uffd_wp(struct vm_fault *vmf)
{
        /*
         * Just in case there're leftover special ptes even after the region
         * got unregistered - we can simply clear them.
         */
        if (unlikely(!userfaultfd_wp(vmf->vma)))
                return pte_marker_clear(vmf);

        return do_pte_missing(vmf);
}

static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
{
        swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte);
        unsigned long marker = pte_marker_get(entry);

        /*
         * PTE markers should never be empty.  If anything weird happened,
         * the best thing to do is to kill the process along with its mm.
         */
        if (WARN_ON_ONCE(!marker))
                return VM_FAULT_SIGBUS;

        /* Higher priority than uffd-wp when data corrupted */
        if (marker & PTE_MARKER_POISONED)
                return VM_FAULT_HWPOISON;

        if (pte_marker_entry_uffd_wp(entry))
                return pte_marker_handle_uffd_wp(vmf);

        /* This is an unknown pte marker */
        return VM_FAULT_SIGBUS;
}

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with pte unmapped and unlocked.
 *
 * We return with the mmap_lock locked or unlocked in the same cases
 * as does filemap_fault().
 */
vm_fault_t do_swap_page(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct folio *swapcache, *folio = NULL;
        struct page *page;
        struct swap_info_struct *si = NULL;
        rmap_t rmap_flags = RMAP_NONE;
        bool need_clear_cache = false;
        bool exclusive = false;
        swp_entry_t entry;
        pte_t pte;
        vm_fault_t ret = 0;
        void *shadow = NULL;

        if (!pte_unmap_same(vmf))
                goto out;

        entry = pte_to_swp_entry(vmf->orig_pte);
        if (unlikely(non_swap_entry(entry))) {
                if (is_migration_entry(entry)) {
                        migration_entry_wait(vma->vm_mm, vmf->pmd,
                                             vmf->address);
                } else if (is_device_exclusive_entry(entry)) {
                        vmf->page = pfn_swap_entry_to_page(entry);
                        ret = remove_device_exclusive_entry(vmf);
                } else if (is_device_private_entry(entry)) {
                        if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
                                /*
                                 * migrate_to_ram is not yet ready to operate
                                 * under VMA lock.
                                 */
                                vma_end_read(vma);
                                ret = VM_FAULT_RETRY;
                                goto out;
                        }

                        vmf->page = pfn_swap_entry_to_page(entry);
                        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
                                        vmf->address, &vmf->ptl);
                        if (unlikely(!vmf->pte ||
                                     !pte_same(ptep_get(vmf->pte),
                                                        vmf->orig_pte)))
                                goto unlock;

                        /*
                         * Get a page reference while we know the page can't be
                         * freed.
                         */
                        get_page(vmf->page);
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                        ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
                        put_page(vmf->page);
                } else if (is_hwpoison_entry(entry)) {
                        ret = VM_FAULT_HWPOISON;
                } else if (is_pte_marker_entry(entry)) {
                        ret = handle_pte_marker(vmf);
                } else {
                        print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL);
                        ret = VM_FAULT_SIGBUS;
                }
                goto out;
        }

        /* Prevent swapoff from happening to us. */
        si = get_swap_device(entry);
        if (unlikely(!si))
                goto out;

        folio = swap_cache_get_folio(entry, vma, vmf->address);
        if (folio)
                page = folio_file_page(folio, swp_offset(entry));
        swapcache = folio;

        if (!folio) {
                if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
                    __swap_count(entry) == 1) {
                        /*
                         * Prevent parallel swapin from proceeding with
                         * the cache flag. Otherwise, another thread may
                         * finish swapin first, free the entry, and swapout
                         * reusing the same entry. It's undetectable as
                         * pte_same() returns true due to entry reuse.
                         */
                        if (swapcache_prepare(entry)) {
                                /* Relax a bit to prevent rapid repeated page faults */
                                schedule_timeout_uninterruptible(1);
                                goto out;
                        }
                        need_clear_cache = true;

                        /* skip swapcache */
                        folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
                                                vma, vmf->address, false);
                        page = &folio->page;
                        if (folio) {
                                __folio_set_locked(folio);
                                __folio_set_swapbacked(folio);

                                if (mem_cgroup_swapin_charge_folio(folio,
                                                        vma->vm_mm, GFP_KERNEL,
                                                        entry)) {
                                        ret = VM_FAULT_OOM;
                                        goto out_page;
                                }
                                mem_cgroup_swapin_uncharge_swap(entry);

                                shadow = get_shadow_from_swap_cache(entry);
                                if (shadow)
                                        workingset_refault(folio, shadow);

                                folio_add_lru(folio);

                                /* To provide entry to swap_read_folio() */
                                folio->swap = entry;
                                swap_read_folio(folio, true, NULL);
                                folio->private = NULL;
                        }
                } else {
                        page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
                                                vmf);
                        if (page)
                                folio = page_folio(page);
                        swapcache = folio;
                }

                if (!folio) {
                        /*
                         * Back out if somebody else faulted in this pte
                         * while we released the pte lock.
                         */
                        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
                                        vmf->address, &vmf->ptl);
                        if (likely(vmf->pte &&
                                   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
                                ret = VM_FAULT_OOM;
                        goto unlock;
                }

                /* Had to read the page from swap area: Major fault */
                ret = VM_FAULT_MAJOR;
                count_vm_event(PGMAJFAULT);
                count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
        } else if (PageHWPoison(page)) {
                /*
                 * hwpoisoned dirty swapcache pages are kept for killing
                 * owner processes (which may be unknown at hwpoison time)
                 */
                ret = VM_FAULT_HWPOISON;
                goto out_release;
        }

        ret |= folio_lock_or_retry(folio, vmf);
        if (ret & VM_FAULT_RETRY)
                goto out_release;

        if (swapcache) {
                /*
                 * Make sure folio_free_swap() or swapoff did not release the
                 * swapcache from under us.  The page pin, and pte_same test
                 * below, are not enough to exclude that.  Even if it is still
                 * swapcache, we need to check that the page's swap has not
                 * changed.
                 */
                if (unlikely(!folio_test_swapcache(folio) ||
                             page_swap_entry(page).val != entry.val))
                        goto out_page;

                /*
                 * KSM sometimes has to copy on read faults, for example, if
                 * page->index of !PageKSM() pages would be nonlinear inside the
                 * anon VMA -- PageKSM() is lost on actual swapout.
                 */
                folio = ksm_might_need_to_copy(folio, vma, vmf->address);
                if (unlikely(!folio)) {
                        ret = VM_FAULT_OOM;
                        folio = swapcache;
                        goto out_page;
                } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) {
                        ret = VM_FAULT_HWPOISON;
                        folio = swapcache;
                        goto out_page;
                }
                if (folio != swapcache)
                        page = folio_page(folio, 0);

                /*
                 * If we want to map a page that's in the swapcache writable, we
                 * have to detect via the refcount if we're really the exclusive
                 * owner. Try removing the extra reference from the local LRU
                 * caches if required.
                 */
                if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache &&
                    !folio_test_ksm(folio) && !folio_test_lru(folio))
                        lru_add_drain();
        }

        folio_throttle_swaprate(folio, GFP_KERNEL);

        /*
         * Back out if somebody else already faulted in this pte.
         */
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
                        &vmf->ptl);
        if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
                goto out_nomap;

        if (unlikely(!folio_test_uptodate(folio))) {
                ret = VM_FAULT_SIGBUS;
                goto out_nomap;
        }

        /*
         * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A swap pte
         * must never point at an anonymous page in the swapcache that is
         * PG_anon_exclusive. Sanity check that this holds and especially, that
         * no filesystem set PG_mappedtodisk on a page in the swapcache. Sanity
         * check after taking the PT lock and making sure that nobody
         * concurrently faulted in this page and set PG_anon_exclusive.
         */
        BUG_ON(!folio_test_anon(folio) && folio_test_mappedtodisk(folio));
        BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));

        /*
         * Check under PT lock (to protect against concurrent fork() sharing
         * the swap entry concurrently) for certainly exclusive pages.
         */
        if (!folio_test_ksm(folio)) {
                exclusive = pte_swp_exclusive(vmf->orig_pte);
                if (folio != swapcache) {
                        /*
                         * We have a fresh page that is not exposed to the
                         * swapcache -> certainly exclusive.
                         */
                        exclusive = true;
                } else if (exclusive && folio_test_writeback(folio) &&
                          data_race(si->flags & SWP_STABLE_WRITES)) {
                        /*
                         * This is tricky: not all swap backends support
                         * concurrent page modifications while under writeback.
                         *
                         * So if we stumble over such a page in the swapcache
                         * we must not set the page exclusive, otherwise we can
                         * map it writable without further checks and modify it
                         * while still under writeback.
                         *
                         * For these problematic swap backends, simply drop the
                         * exclusive marker: this is perfectly fine as we start
                         * writeback only if we fully unmapped the page and
                         * there are no unexpected references on the page after
                         * unmapping succeeded. After fully unmapped, no
                         * further GUP references (FOLL_GET and FOLL_PIN) can
                         * appear, so dropping the exclusive marker and mapping
                         * it only R/O is fine.
                         */
                        exclusive = false;
                }
        }

        /*
         * Some architectures may have to restore extra metadata to the page
         * when reading from swap. This metadata may be indexed by swap entry
         * so this must be called before swap_free().
         */
        arch_swap_restore(entry, folio);

        /*
         * Remove the swap entry and conditionally try to free up the swapcache.
         * We're already holding a reference on the page but haven't mapped it
         * yet.
         */
        swap_free(entry);
        if (should_try_to_free_swap(folio, vma, vmf->flags))
                folio_free_swap(folio);

        inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
        dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
        pte = mk_pte(page, vma->vm_page_prot);

        /*
         * Same logic as in do_wp_page(); however, optimize for pages that are
         * certainly not shared either because we just allocated them without
         * exposing them to the swapcache or because the swap entry indicates
         * exclusivity.
         */
        if (!folio_test_ksm(folio) &&
            (exclusive || folio_ref_count(folio) == 1)) {
                if (vmf->flags & FAULT_FLAG_WRITE) {
                        pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                        vmf->flags &= ~FAULT_FLAG_WRITE;
                }
                rmap_flags |= RMAP_EXCLUSIVE;
        }
        flush_icache_page(vma, page);
        if (pte_swp_soft_dirty(vmf->orig_pte))
                pte = pte_mksoft_dirty(pte);
        if (pte_swp_uffd_wp(vmf->orig_pte))
                pte = pte_mkuffd_wp(pte);
        vmf->orig_pte = pte;

        /* ksm created a completely new copy */
        if (unlikely(folio != swapcache && swapcache)) {
                folio_add_new_anon_rmap(folio, vma, vmf->address);
                folio_add_lru_vma(folio, vma);
        } else {
                folio_add_anon_rmap_pte(folio, page, vma, vmf->address,
                                        rmap_flags);
        }

        VM_BUG_ON(!folio_test_anon(folio) ||
                        (pte_write(pte) && !PageAnonExclusive(page)));
        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
        arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);

        folio_unlock(folio);
        if (folio != swapcache && swapcache) {
                /*
                 * Hold the lock to avoid the swap entry to be reused
                 * until we take the PT lock for the pte_same() check
                 * (to avoid false positives from pte_same). For
                 * further safety release the lock after the swap_free
                 * so that the swap count won't change under a
                 * parallel locked swapcache.
                 */
                folio_unlock(swapcache);
                folio_put(swapcache);
        }

        if (vmf->flags & FAULT_FLAG_WRITE) {
                ret |= do_wp_page(vmf);
                if (ret & VM_FAULT_ERROR)
                        ret &= VM_FAULT_ERROR;
                goto out;
        }

        /* No need to invalidate - it was non-present before */
        update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
unlock:
        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
        /* Clear the swap cache pin for direct swapin after PTL unlock */
        if (need_clear_cache)
                swapcache_clear(si, entry);
        if (si)
                put_swap_device(si);
        return ret;
out_nomap:
        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
out_page:
        folio_unlock(folio);
out_release:
        folio_put(folio);
        if (folio != swapcache && swapcache) {
                folio_unlock(swapcache);
                folio_put(swapcache);
        }
        if (need_clear_cache)
                swapcache_clear(si, entry);
        if (si)
                put_swap_device(si);
        return ret;
}

static bool pte_range_none(pte_t *pte, int nr_pages)
{
        int i;

        for (i = 0; i < nr_pages; i++) {
                if (!pte_none(ptep_get_lockless(pte + i)))
                        return false;
        }

        return true;
}

static struct folio *alloc_anon_folio(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        unsigned long orders;
        struct folio *folio;
        unsigned long addr;
        pte_t *pte;
        gfp_t gfp;
        int order;

        /*
         * If uffd is active for the vma we need per-page fault fidelity to
         * maintain the uffd semantics.
         */
        if (unlikely(userfaultfd_armed(vma)))
                goto fallback;

        /*
         * Get a list of all the (large) orders below PMD_ORDER that are enabled
         * for this vma. Then filter out the orders that can't be allocated over
         * the faulting address and still be fully contained in the vma.
         */
        orders = thp_vma_allowable_orders(vma, vma->vm_flags, false, true, true,
                                          BIT(PMD_ORDER) - 1);
        orders = thp_vma_suitable_orders(vma, vmf->address, orders);

        if (!orders)
                goto fallback;

        pte = pte_offset_map(vmf->pmd, vmf->address & PMD_MASK);
        if (!pte)
                return ERR_PTR(-EAGAIN);

        /*
         * Find the highest order where the aligned range is completely
         * pte_none(). Note that all remaining orders will be completely
         * pte_none().
         */
        order = highest_order(orders);
        while (orders) {
                addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);
                if (pte_range_none(pte + pte_index(addr), 1 << order))
                        break;
                order = next_order(&orders, order);
        }

        pte_unmap(pte);

        /* Try allocating the highest of the remaining orders. */
        gfp = vma_thp_gfp_mask(vma);
        while (orders) {
                addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);
                folio = vma_alloc_folio(gfp, order, vma, addr, true);
                if (folio) {
                        if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
                                folio_put(folio);
                                goto next;
                        }
                        folio_throttle_swaprate(folio, gfp);
                        clear_huge_page(&folio->page, vmf->address, 1 << order);
                        return folio;
                }
next:
                order = next_order(&orders, order);
        }

fallback:
#endif
        return folio_prealloc(vma->vm_mm, vma, vmf->address, true);
}

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_lock still held, but pte unmapped and unlocked.
 */
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
        bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
        struct vm_area_struct *vma = vmf->vma;
        unsigned long addr = vmf->address;
        struct folio *folio;
        vm_fault_t ret = 0;
        int nr_pages = 1;
        pte_t entry;
        int i;

        /* File mapping without ->vm_ops ? */
        if (vma->vm_flags & VM_SHARED)
                return VM_FAULT_SIGBUS;

        /*
         * Use pte_alloc() instead of pte_alloc_map(), so that OOM can
         * be distinguished from a transient failure of pte_offset_map().
         */
        if (pte_alloc(vma->vm_mm, vmf->pmd))
                return VM_FAULT_OOM;

        /* Use the zero-page for reads */
        if (!(vmf->flags & FAULT_FLAG_WRITE) &&
                        !mm_forbids_zeropage(vma->vm_mm)) {
                entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
                                                vma->vm_page_prot));
                vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
                                vmf->address, &vmf->ptl);
                if (!vmf->pte)
                        goto unlock;
                if (vmf_pte_changed(vmf)) {
                        update_mmu_tlb(vma, vmf->address, vmf->pte);
                        goto unlock;
                }
                ret = check_stable_address_space(vma->vm_mm);
                if (ret)
                        goto unlock;
                /* Deliver the page fault to userland, check inside PT lock */
                if (userfaultfd_missing(vma)) {
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                        return handle_userfault(vmf, VM_UFFD_MISSING);
                }
                goto setpte;
        }

        /* Allocate our own private page. */
        if (unlikely(anon_vma_prepare(vma)))
                goto oom;
        /* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */
        folio = alloc_anon_folio(vmf);
        if (IS_ERR(folio))
                return 0;
        if (!folio)
                goto oom;

        nr_pages = folio_nr_pages(folio);
        addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE);

        /*
         * The memory barrier inside __folio_mark_uptodate makes sure that
         * preceding stores to the page contents become visible before
         * the set_pte_at() write.
         */
        __folio_mark_uptodate(folio);

        entry = mk_pte(&folio->page, vma->vm_page_prot);
        entry = pte_sw_mkyoung(entry);
        if (vma->vm_flags & VM_WRITE)
                entry = pte_mkwrite(pte_mkdirty(entry), vma);

        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
        if (!vmf->pte)
                goto release;
        if (nr_pages == 1 && vmf_pte_changed(vmf)) {
                update_mmu_tlb(vma, addr, vmf->pte);
                goto release;
        } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
                for (i = 0; i < nr_pages; i++)
                        update_mmu_tlb(vma, addr + PAGE_SIZE * i, vmf->pte + i);
                goto release;
        }

        ret = check_stable_address_space(vma->vm_mm);
        if (ret)
                goto release;

        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                folio_put(folio);
                return handle_userfault(vmf, VM_UFFD_MISSING);
        }

        folio_ref_add(folio, nr_pages - 1);
        add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
        folio_add_new_anon_rmap(folio, vma, addr);
        folio_add_lru_vma(folio, vma);
setpte:
        if (uffd_wp)
                entry = pte_mkuffd_wp(entry);
        set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);

        /* No need to invalidate - it was non-present before */
        update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages);
unlock:
        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        return ret;
release:
        folio_put(folio);
        goto unlock;
oom:
        return VM_FAULT_OOM;
}

/*
 * The mmap_lock must have been held on entry, and may have been
 * released depending on flags and vma->vm_ops->fault() return value.
 * See filemap_fault() and __lock_page_retry().
 */
static vm_fault_t __do_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct folio *folio;
        vm_fault_t ret;

        /*
         * Preallocate pte before we take page_lock because this might lead to
         * deadlocks for memcg reclaim which waits for pages under writeback:
         *                                lock_page(A)
         *                                SetPageWriteback(A)
         *                                unlock_page(A)
         * lock_page(B)
         *                                lock_page(B)
         * pte_alloc_one
         *   shrink_page_list
         *     wait_on_page_writeback(A)
         *                                SetPageWriteback(B)
         *                                unlock_page(B)
         *                                # flush A, B to clear the writeback
         */
        if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
                vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
        }

        ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
                return ret;

        folio = page_folio(vmf->page);
        if (unlikely(PageHWPoison(vmf->page))) {
                vm_fault_t poisonret = VM_FAULT_HWPOISON;
                if (ret & VM_FAULT_LOCKED) {
                        if (page_mapped(vmf->page))
                                unmap_mapping_folio(folio);
                        /* Retry if a clean folio was removed from the cache. */
                        if (mapping_evict_folio(folio->mapping, folio))
                                poisonret = VM_FAULT_NOPAGE;
                        folio_unlock(folio);
                }
                folio_put(folio);
                vmf->page = NULL;
                return poisonret;
        }

        if (unlikely(!(ret & VM_FAULT_LOCKED)))
                folio_lock(folio);
        else
                VM_BUG_ON_PAGE(!folio_test_locked(folio), vmf->page);

        return ret;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void deposit_prealloc_pte(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;

        pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
        /*
         * We are going to consume the prealloc table,
         * count that as nr_ptes.
         */
        mm_inc_nr_ptes(vma->vm_mm);
        vmf->prealloc_pte = NULL;
}

vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
        struct folio *folio = page_folio(page);
        struct vm_area_struct *vma = vmf->vma;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        pmd_t entry;
        vm_fault_t ret = VM_FAULT_FALLBACK;

        if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
                return ret;

        if (page != &folio->page || folio_order(folio) != HPAGE_PMD_ORDER)
                return ret;

        /*
         * Just backoff if any subpage of a THP is corrupted otherwise
         * the corrupted page may mapped by PMD silently to escape the
         * check.  This kind of THP just can be PTE mapped.  Access to
         * the corrupted subpage should trigger SIGBUS as expected.
         */
        if (unlikely(folio_test_has_hwpoisoned(folio)))
                return ret;

        /*
         * Archs like ppc64 need additional space to store information
         * related to pte entry. Use the preallocated table for that.
         */
        if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
                vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
        }

        vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
        if (unlikely(!pmd_none(*vmf->pmd)))
                goto out;

        flush_icache_pages(vma, page, HPAGE_PMD_NR);

        entry = mk_huge_pmd(page, vma->vm_page_prot);
        if (write)
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);

        add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR);
        folio_add_file_rmap_pmd(folio, page, vma);

        /*
         * deposit and withdraw with pmd lock held
         */
        if (arch_needs_pgtable_deposit())
                deposit_prealloc_pte(vmf);

        set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);

        update_mmu_cache_pmd(vma, haddr, vmf->pmd);

        /* fault is handled */
        ret = 0;
        count_vm_event(THP_FILE_MAPPED);
out:
        spin_unlock(vmf->ptl);
        return ret;
}
#else
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
        return VM_FAULT_FALLBACK;
}
#endif

/**
 * set_pte_range - Set a range of PTEs to point to pages in a folio.
 * @vmf: Fault decription.
 * @folio: The folio that contains @page.
 * @page: The first page to create a PTE for.
 * @nr: The number of PTEs to create.
 * @addr: The first address to create a PTE for.
 */
void set_pte_range(struct vm_fault *vmf, struct folio *folio,
                struct page *page, unsigned int nr, unsigned long addr)
{
        struct vm_area_struct *vma = vmf->vma;
        bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
        bool write = vmf->flags & FAULT_FLAG_WRITE;
        bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
        pte_t entry;

        flush_icache_pages(vma, page, nr);
        entry = mk_pte(page, vma->vm_page_prot);

        if (prefault && arch_wants_old_prefaulted_pte())
                entry = pte_mkold(entry);
        else
                entry = pte_sw_mkyoung(entry);

        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        if (unlikely(uffd_wp))
                entry = pte_mkuffd_wp(entry);
        /* copy-on-write page */
        if (write && !(vma->vm_flags & VM_SHARED)) {
                add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr);
                VM_BUG_ON_FOLIO(nr != 1, folio);
                folio_add_new_anon_rmap(folio, vma, addr);
                folio_add_lru_vma(folio, vma);
        } else {
                add_mm_counter(vma->vm_mm, mm_counter_file(folio), nr);
                folio_add_file_rmap_ptes(folio, page, nr, vma);
        }
        set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);

        /* no need to invalidate: a not-present page won't be cached */
        update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr);
}

static bool vmf_pte_changed(struct vm_fault *vmf)
{
        if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)
                return !pte_same(ptep_get(vmf->pte), vmf->orig_pte);

        return !pte_none(ptep_get(vmf->pte));
}

/**
 * finish_fault - finish page fault once we have prepared the page to fault
 *
 * @vmf: structure describing the fault
 *
 * This function handles all that is needed to finish a page fault once the
 * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
 * given page, adds reverse page mapping, handles memcg charges and LRU
 * addition.
 *
 * The function expects the page to be locked and on success it consumes a
 * reference of a page being mapped (for the PTE which maps it).
 *
 * Return: %0 on success, %VM_FAULT_ code in case of error.
 */
vm_fault_t finish_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct page *page;
        vm_fault_t ret;

        /* Did we COW the page? */
        if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
                page = vmf->cow_page;
        else
                page = vmf->page;

        /*
         * check even for read faults because we might have lost our CoWed
         * page
         */
        if (!(vma->vm_flags & VM_SHARED)) {
                ret = check_stable_address_space(vma->vm_mm);
                if (ret)
                        return ret;
        }

        if (pmd_none(*vmf->pmd)) {
                if (PageTransCompound(page)) {
                        ret = do_set_pmd(vmf, page);
                        if (ret != VM_FAULT_FALLBACK)
                                return ret;
                }

                if (vmf->prealloc_pte)
                        pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
                else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
                        return VM_FAULT_OOM;
        }

        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
                                      vmf->address, &vmf->ptl);
        if (!vmf->pte)
                return VM_FAULT_NOPAGE;

        /* Re-check under ptl */
        if (likely(!vmf_pte_changed(vmf))) {
                struct folio *folio = page_folio(page);

                set_pte_range(vmf, folio, page, 1, vmf->address);
                ret = 0;
        } else {
                update_mmu_tlb(vma, vmf->address, vmf->pte);
                ret = VM_FAULT_NOPAGE;
        }

        pte_unmap_unlock(vmf->pte, vmf->ptl);
        return ret;
}

static unsigned long fault_around_pages __read_mostly =
        65536 >> PAGE_SHIFT;

#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
{
        *val = fault_around_pages << PAGE_SHIFT;
        return 0;
}

/*
 * fault_around_bytes must be rounded down to the nearest page order as it's
 * what do_fault_around() expects to see.
 */
static int fault_around_bytes_set(void *data, u64 val)
{
        if (val / PAGE_SIZE > PTRS_PER_PTE)
                return -EINVAL;

        /*
         * The minimum value is 1 page, however this results in no fault-around
         * at all. See should_fault_around().
         */
        val = max(val, PAGE_SIZE);
        fault_around_pages = rounddown_pow_of_two(val) >> PAGE_SHIFT;

        return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops,
                fault_around_bytes_get, fault_around_bytes_set, "%llu\n");

static int __init fault_around_debugfs(void)
{
        debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
                                   &fault_around_bytes_fops);
        return 0;
}
late_initcall(fault_around_debugfs);
#endif

/*
 * do_fault_around() tries to map few pages around the fault address. The hope
 * is that the pages will be needed soon and this will lower the number of
 * faults to handle.
 *
 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
 * not ready to be mapped: not up-to-date, locked, etc.
 *
 * This function doesn't cross VMA or page table boundaries, in order to call
 * map_pages() and acquire a PTE lock only once.
 *
 * fault_around_pages defines how many pages we'll try to map.
 * do_fault_around() expects it to be set to a power of two less than or equal
 * to PTRS_PER_PTE.
 *
 * The virtual address of the area that we map is naturally aligned to
 * fault_around_pages * PAGE_SIZE rounded down to the machine page size
 * (and therefore to page order).  This way it's easier to guarantee
 * that we don't cross page table boundaries.
 */
static vm_fault_t do_fault_around(struct vm_fault *vmf)
{
        pgoff_t nr_pages = READ_ONCE(fault_around_pages);
        pgoff_t pte_off = pte_index(vmf->address);
        /* The page offset of vmf->address within the VMA. */
        pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;
        pgoff_t from_pte, to_pte;
        vm_fault_t ret;

        /* The PTE offset of the start address, clamped to the VMA. */
        from_pte = max(ALIGN_DOWN(pte_off, nr_pages),
                       pte_off - min(pte_off, vma_off));

        /* The PTE offset of the end address, clamped to the VMA and PTE. */
        to_pte = min3(from_pte + nr_pages, (pgoff_t)PTRS_PER_PTE,
                      pte_off + vma_pages(vmf->vma) - vma_off) - 1;

        if (pmd_none(*vmf->pmd)) {
                vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
        }

        rcu_read_lock();
        ret = vmf->vma->vm_ops->map_pages(vmf,
                        vmf->pgoff + from_pte - pte_off,
                        vmf->pgoff + to_pte - pte_off);
        rcu_read_unlock();

        return ret;
}

/* Return true if we should do read fault-around, false otherwise */
static inline bool should_fault_around(struct vm_fault *vmf)
{
        /* No ->map_pages?  No way to fault around... */
        if (!vmf->vma->vm_ops->map_pages)
                return false;

        if (uffd_disable_fault_around(vmf->vma))
                return false;

        /* A single page implies no faulting 'around' at all. */
        return fault_around_pages > 1;
}

static vm_fault_t do_read_fault(struct vm_fault *vmf)
{
        vm_fault_t ret = 0;
        struct folio *folio;

        /*
         * Let's call ->map_pages() first and use ->fault() as fallback
         * if page by the offset is not ready to be mapped (cold cache or
         * something).
         */
        if (should_fault_around(vmf)) {
                ret = do_fault_around(vmf);
                if (ret)
                        return ret;
        }

        ret = vmf_can_call_fault(vmf);
        if (ret)
                return ret;

        ret = __do_fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
                return ret;

        ret |= finish_fault(vmf);
        folio = page_folio(vmf->page);
        folio_unlock(folio);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
                folio_put(folio);
        return ret;
}

static vm_fault_t do_cow_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct folio *folio;
        vm_fault_t ret;

        ret = vmf_can_call_fault(vmf);
        if (!ret)
                ret = vmf_anon_prepare(vmf);
        if (ret)
                return ret;

        folio = folio_prealloc(vma->vm_mm, vma, vmf->address, false);
        if (!folio)
                return VM_FAULT_OOM;

        vmf->cow_page = &folio->page;

        ret = __do_fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
                goto uncharge_out;
        if (ret & VM_FAULT_DONE_COW)
                return ret;

        copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
        __folio_mark_uptodate(folio);

        ret |= finish_fault(vmf);
        unlock_page(vmf->page);
        put_page(vmf->page);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
                goto uncharge_out;
        return ret;
uncharge_out:
        folio_put(folio);
        return ret;
}

static vm_fault_t do_shared_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret, tmp;
        struct folio *folio;

        ret = vmf_can_call_fault(vmf);
        if (ret)
                return ret;

        ret = __do_fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
                return ret;

        folio = page_folio(vmf->page);

        /*
         * Check if the backing address space wants to know that the page is
         * about to become writable
         */
        if (vma->vm_ops->page_mkwrite) {
                folio_unlock(folio);
                tmp = do_page_mkwrite(vmf, folio);
                if (unlikely(!tmp ||
                                (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
                        folio_put(folio);
                        return tmp;
                }
        }

        ret |= finish_fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
                                        VM_FAULT_RETRY))) {
                folio_unlock(folio);
                folio_put(folio);
                return ret;
        }

        ret |= fault_dirty_shared_page(vmf);
        return ret;
}

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults).
 * The mmap_lock may have been released depending on flags and our
 * return value.  See filemap_fault() and __folio_lock_or_retry().
 * If mmap_lock is released, vma may become invalid (for example
 * by other thread calling munmap()).
 */
static vm_fault_t do_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct mm_struct *vm_mm = vma->vm_mm;
        vm_fault_t ret;

        /*
         * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
         */
        if (!vma->vm_ops->fault) {
                vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
                                               vmf->address, &vmf->ptl);
                if (unlikely(!vmf->pte))
                        ret = VM_FAULT_SIGBUS;
                else {
                        /*
                         * Make sure this is not a temporary clearing of pte
                         * by holding ptl and checking again. A R/M/W update
                         * of pte involves: take ptl, clearing the pte so that
                         * we don't have concurrent modification by hardware
                         * followed by an update.
                         */
                        if (unlikely(pte_none(ptep_get(vmf->pte))))
                                ret = VM_FAULT_SIGBUS;
                        else
                                ret = VM_FAULT_NOPAGE;

                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                }
        } else if (!(vmf->flags & FAULT_FLAG_WRITE))
                ret = do_read_fault(vmf);
        else if (!(vma->vm_flags & VM_SHARED))
                ret = do_cow_fault(vmf);
        else
                ret = do_shared_fault(vmf);

        /* preallocated pagetable is unused: free it */
        if (vmf->prealloc_pte) {
                pte_free(vm_mm, vmf->prealloc_pte);
                vmf->prealloc_pte = NULL;
        }
        return ret;
}

int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
                      unsigned long addr, int page_nid, int *flags)
{
        folio_get(folio);

        /* Record the current PID acceesing VMA */
        vma_set_access_pid_bit(vma);

        count_vm_numa_event(NUMA_HINT_FAULTS);
        if (page_nid == numa_node_id()) {
                count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
                *flags |= TNF_FAULT_LOCAL;
        }

        return mpol_misplaced(folio, vma, addr);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct folio *folio = NULL;
        int nid = NUMA_NO_NODE;
        bool writable = false;
        int last_cpupid;
        int target_nid;
        pte_t pte, old_pte;
        int flags = 0;

        /*
         * The pte cannot be used safely until we verify, while holding the page
         * table lock, that its contents have not changed during fault handling.
         */
        spin_lock(vmf->ptl);
        /* Read the live PTE from the page tables: */
        old_pte = ptep_get(vmf->pte);

        if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                goto out;
        }

        pte = pte_modify(old_pte, vma->vm_page_prot);

        /*
         * Detect now whether the PTE could be writable; this information
         * is only valid while holding the PT lock.
         */
        writable = pte_write(pte);
        if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
            can_change_pte_writable(vma, vmf->address, pte))
                writable = true;

        folio = vm_normal_folio(vma, vmf->address, pte);
        if (!folio || folio_is_zone_device(folio))
                goto out_map;

        /* TODO: handle PTE-mapped THP */
        if (folio_test_large(folio))
                goto out_map;

        /*
         * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
         * much anyway since they can be in shared cache state. This misses
         * the case where a mapping is writable but the process never writes
         * to it but pte_write gets cleared during protection updates and
         * pte_dirty has unpredictable behaviour between PTE scan updates,
         * background writeback, dirty balancing and application behaviour.
         */
        if (!writable)
                flags |= TNF_NO_GROUP;

        /*
         * Flag if the folio is shared between multiple address spaces. This
         * is later used when determining whether to group tasks together
         */
        if (folio_estimated_sharers(folio) > 1 && (vma->vm_flags & VM_SHARED))
                flags |= TNF_SHARED;

        nid = folio_nid(folio);
        /*
         * For memory tiering mode, cpupid of slow memory page is used
         * to record page access time.  So use default value.
         */
        if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
            !node_is_toptier(nid))
                last_cpupid = (-1 & LAST_CPUPID_MASK);
        else
                last_cpupid = folio_last_cpupid(folio);
        target_nid = numa_migrate_prep(folio, vma, vmf->address, nid, &flags);
        if (target_nid == NUMA_NO_NODE) {
                folio_put(folio);
                goto out_map;
        }
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        writable = false;

        /* Migrate to the requested node */
        if (migrate_misplaced_folio(folio, vma, target_nid)) {
                nid = target_nid;
                flags |= TNF_MIGRATED;
        } else {
                flags |= TNF_MIGRATE_FAIL;
                vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
                                               vmf->address, &vmf->ptl);
                if (unlikely(!vmf->pte))
                        goto out;
                if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                        goto out;
                }
                goto out_map;
        }

out:
        if (nid != NUMA_NO_NODE)
                task_numa_fault(last_cpupid, nid, 1, flags);
        return 0;
out_map:
        /*
         * Make it present again, depending on how arch implements
         * non-accessible ptes, some can allow access by kernel mode.
         */
        old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
        pte = pte_modify(old_pte, vma->vm_page_prot);
        pte = pte_mkyoung(pte);
        if (writable)
                pte = pte_mkwrite(pte, vma);
        ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
        update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        goto out;
}

static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        if (vma_is_anonymous(vma))
                return do_huge_pmd_anonymous_page(vmf);
        if (vma->vm_ops->huge_fault)
                return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
        return VM_FAULT_FALLBACK;
}

/* `inline' is required to avoid gcc 4.1.2 build error */
static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
        vm_fault_t ret;

        if (vma_is_anonymous(vma)) {
                if (likely(!unshare) &&
                    userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) {
                        if (userfaultfd_wp_async(vmf->vma))
                                goto split;
                        return handle_userfault(vmf, VM_UFFD_WP);
                }
                return do_huge_pmd_wp_page(vmf);
        }

        if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
                if (vma->vm_ops->huge_fault) {
                        ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
                        if (!(ret & VM_FAULT_FALLBACK))
                                return ret;
                }
        }

split:
        /* COW or write-notify handled on pte level: split pmd. */
        __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);

        return VM_FAULT_FALLBACK;
}

static vm_fault_t create_huge_pud(struct vm_fault *vmf)
{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&                        \
        defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
        struct vm_area_struct *vma = vmf->vma;
        /* No support for anonymous transparent PUD pages yet */
        if (vma_is_anonymous(vma))
                return VM_FAULT_FALLBACK;
        if (vma->vm_ops->huge_fault)
                return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
        return VM_FAULT_FALLBACK;
}

static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&                        \
        defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret;

        /* No support for anonymous transparent PUD pages yet */
        if (vma_is_anonymous(vma))
                goto split;
        if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
                if (vma->vm_ops->huge_fault) {
                        ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
                        if (!(ret & VM_FAULT_FALLBACK))
                                return ret;
                }
        }
split:
        /* COW or write-notify not handled on PUD level: split pud.*/
        __split_huge_pud(vma, vmf->pud, vmf->address);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
        return VM_FAULT_FALLBACK;
}

/*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 *
 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
 * concurrent faults).
 *
 * The mmap_lock may have been released depending on flags and our return value.
 * See filemap_fault() and __folio_lock_or_retry().
 */
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{
        pte_t entry;

        if (unlikely(pmd_none(*vmf->pmd))) {
                /*
                 * Leave __pte_alloc() until later: because vm_ops->fault may
                 * want to allocate huge page, and if we expose page table
                 * for an instant, it will be difficult to retract from
                 * concurrent faults and from rmap lookups.
                 */
                vmf->pte = NULL;
                vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID;
        } else {
                /*
                 * A regular pmd is established and it can't morph into a huge
                 * pmd by anon khugepaged, since that takes mmap_lock in write
                 * mode; but shmem or file collapse to THP could still morph
                 * it into a huge pmd: just retry later if so.
                 */
                vmf->pte = pte_offset_map_nolock(vmf->vma->vm_mm, vmf->pmd,
                                                 vmf->address, &vmf->ptl);
                if (unlikely(!vmf->pte))
                        return 0;
                vmf->orig_pte = ptep_get_lockless(vmf->pte);
                vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;

                if (pte_none(vmf->orig_pte)) {
                        pte_unmap(vmf->pte);
                        vmf->pte = NULL;
                }
        }

        if (!vmf->pte)
                return do_pte_missing(vmf);

        if (!pte_present(vmf->orig_pte))
                return do_swap_page(vmf);

        if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
                return do_numa_page(vmf);

        spin_lock(vmf->ptl);
        entry = vmf->orig_pte;
        if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {
                update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
                goto unlock;
        }
        if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
                if (!pte_write(entry))
                        return do_wp_page(vmf);
                else if (likely(vmf->flags & FAULT_FLAG_WRITE))
                        entry = pte_mkdirty(entry);
        }
        entry = pte_mkyoung(entry);
        if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
                                vmf->flags & FAULT_FLAG_WRITE)) {
                update_mmu_cache_range(vmf, vmf->vma, vmf->address,
                                vmf->pte, 1);
        } else {
                /* Skip spurious TLB flush for retried page fault */
                if (vmf->flags & FAULT_FLAG_TRIED)
                        goto unlock;
                /*
                 * This is needed only for protection faults but the arch code
                 * is not yet telling us if this is a protection fault or not.
                 * This still avoids useless tlb flushes for .text page faults
                 * with threads.
                 */
                if (vmf->flags & FAULT_FLAG_WRITE)
                        flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,
                                                     vmf->pte);
        }
unlock:
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        return 0;
}

/*
 * On entry, we hold either the VMA lock or the mmap_lock
 * (FAULT_FLAG_VMA_LOCK tells you which).  If VM_FAULT_RETRY is set in
 * the result, the mmap_lock is not held on exit.  See filemap_fault()
 * and __folio_lock_or_retry().
 */
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
                unsigned long address, unsigned int flags)
{
        struct vm_fault vmf = {
                .vma = vma,
                .address = address & PAGE_MASK,
                .real_address = address,
                .flags = flags,
                .pgoff = linear_page_index(vma, address),
                .gfp_mask = __get_fault_gfp_mask(vma),
        };
        struct mm_struct *mm = vma->vm_mm;
        unsigned long vm_flags = vma->vm_flags;
        pgd_t *pgd;
        p4d_t *p4d;
        vm_fault_t ret;

        pgd = pgd_offset(mm, address);
        p4d = p4d_alloc(mm, pgd, address);
        if (!p4d)
                return VM_FAULT_OOM;

        vmf.pud = pud_alloc(mm, p4d, address);
        if (!vmf.pud)
                return VM_FAULT_OOM;
retry_pud:
        if (pud_none(*vmf.pud) &&
            thp_vma_allowable_order(vma, vm_flags, false, true, true, PUD_ORDER)) {
                ret = create_huge_pud(&vmf);
                if (!(ret & VM_FAULT_FALLBACK))
                        return ret;
        } else {
                pud_t orig_pud = *vmf.pud;

                barrier();
                if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {

                        /*
                         * TODO once we support anonymous PUDs: NUMA case and
                         * FAULT_FLAG_UNSHARE handling.
                         */
                        if ((flags & FAULT_FLAG_WRITE) && !pud_write(orig_pud)) {
                                ret = wp_huge_pud(&vmf, orig_pud);
                                if (!(ret & VM_FAULT_FALLBACK))
                                        return ret;
                        } else {
                                huge_pud_set_accessed(&vmf, orig_pud);
                                return 0;
                        }
                }
        }

        vmf.pmd = pmd_alloc(mm, vmf.pud, address);
        if (!vmf.pmd)
                return VM_FAULT_OOM;

        /* Huge pud page fault raced with pmd_alloc? */
        if (pud_trans_unstable(vmf.pud))
                goto retry_pud;

        if (pmd_none(*vmf.pmd) &&
            thp_vma_allowable_order(vma, vm_flags, false, true, true, PMD_ORDER)) {
                ret = create_huge_pmd(&vmf);
                if (!(ret & VM_FAULT_FALLBACK))
                        return ret;
        } else {
                vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);

                if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
                        VM_BUG_ON(thp_migration_supported() &&
                                          !is_pmd_migration_entry(vmf.orig_pmd));
                        if (is_pmd_migration_entry(vmf.orig_pmd))
                                pmd_migration_entry_wait(mm, vmf.pmd);
                        return 0;
                }
                if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) {
                        if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
                                return do_huge_pmd_numa_page(&vmf);

                        if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
                            !pmd_write(vmf.orig_pmd)) {
                                ret = wp_huge_pmd(&vmf);
                                if (!(ret & VM_FAULT_FALLBACK))
                                        return ret;
                        } else {
                                huge_pmd_set_accessed(&vmf);
                                return 0;
                        }
                }
        }

        return handle_pte_fault(&vmf);
}

/**
 * mm_account_fault - Do page fault accounting
 * @mm: mm from which memcg should be extracted. It can be NULL.
 * @regs: the pt_regs struct pointer.  When set to NULL, will skip accounting
 *        of perf event counters, but we'll still do the per-task accounting to
 *        the task who triggered this page fault.
 * @address: the faulted address.
 * @flags: the fault flags.
 * @ret: the fault retcode.
 *
 * This will take care of most of the page fault accounting.  Meanwhile, it
 * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter
 * updates.  However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should
 * still be in per-arch page fault handlers at the entry of page fault.
 */
static inline void mm_account_fault(struct mm_struct *mm, struct pt_regs *regs,
                                    unsigned long address, unsigned int flags,
                                    vm_fault_t ret)
{
        bool major;

        /* Incomplete faults will be accounted upon completion. */
        if (ret & VM_FAULT_RETRY)
                return;

        /*
         * To preserve the behavior of older kernels, PGFAULT counters record
         * both successful and failed faults, as opposed to perf counters,
         * which ignore failed cases.
         */
        count_vm_event(PGFAULT);
        count_memcg_event_mm(mm, PGFAULT);

        /*
         * Do not account for unsuccessful faults (e.g. when the address wasn't
         * valid).  That includes arch_vma_access_permitted() failing before
         * reaching here. So this is not a "this many hardware page faults"
         * counter.  We should use the hw profiling for that.
         */
        if (ret & VM_FAULT_ERROR)
                return;

        /*
         * We define the fault as a major fault when the final successful fault
         * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't
         * handle it immediately previously).
         */
        major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED);

        if (major)
                current->maj_flt++;
        else
                current->min_flt++;

        /*
         * If the fault is done for GUP, regs will be NULL.  We only do the
         * accounting for the per thread fault counters who triggered the
         * fault, and we skip the perf event updates.
         */
        if (!regs)
                return;

        if (major)
                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
        else
                perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}

#ifdef CONFIG_LRU_GEN
static void lru_gen_enter_fault(struct vm_area_struct *vma)
{
        /* the LRU algorithm only applies to accesses with recency */
        current->in_lru_fault = vma_has_recency(vma);
}

static void lru_gen_exit_fault(void)
{
        current->in_lru_fault = false;
}
#else
static void lru_gen_enter_fault(struct vm_area_struct *vma)
{
}

static void lru_gen_exit_fault(void)
{
}
#endif /* CONFIG_LRU_GEN */

static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma,
                                       unsigned int *flags)
{
        if (unlikely(*flags & FAULT_FLAG_UNSHARE)) {
                if (WARN_ON_ONCE(*flags & FAULT_FLAG_WRITE))
                        return VM_FAULT_SIGSEGV;
                /*
                 * FAULT_FLAG_UNSHARE only applies to COW mappings. Let's
                 * just treat it like an ordinary read-fault otherwise.
                 */
                if (!is_cow_mapping(vma->vm_flags))
                        *flags &= ~FAULT_FLAG_UNSHARE;
        } else if (*flags & FAULT_FLAG_WRITE) {
                /* Write faults on read-only mappings are impossible ... */
                if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)))
                        return VM_FAULT_SIGSEGV;
                /* ... and FOLL_FORCE only applies to COW mappings. */
                if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) &&
                                 !is_cow_mapping(vma->vm_flags)))
                        return VM_FAULT_SIGSEGV;
        }
#ifdef CONFIG_PER_VMA_LOCK
        /*
         * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of
         * the assumption that lock is dropped on VM_FAULT_RETRY.
         */
        if (WARN_ON_ONCE((*flags &
                        (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)) ==
                        (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)))
                return VM_FAULT_SIGSEGV;
#endif

        return 0;
}

/*
 * By the time we get here, we already hold the mm semaphore
 *
 * The mmap_lock may have been released depending on flags and our
 * return value.  See filemap_fault() and __folio_lock_or_retry().
 */
vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
                           unsigned int flags, struct pt_regs *regs)
{
        /* If the fault handler drops the mmap_lock, vma may be freed */
        struct mm_struct *mm = vma->vm_mm;
        vm_fault_t ret;

        __set_current_state(TASK_RUNNING);

        ret = sanitize_fault_flags(vma, &flags);
        if (ret)
                goto out;

        if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
                                            flags & FAULT_FLAG_INSTRUCTION,
                                            flags & FAULT_FLAG_REMOTE)) {
                ret = VM_FAULT_SIGSEGV;
                goto out;
        }

        /*
         * Enable the memcg OOM handling for faults triggered in user
         * space.  Kernel faults are handled more gracefully.
         */
        if (flags & FAULT_FLAG_USER)
                mem_cgroup_enter_user_fault();

        lru_gen_enter_fault(vma);

        if (unlikely(is_vm_hugetlb_page(vma)))
                ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
        else
                ret = __handle_mm_fault(vma, address, flags);

        lru_gen_exit_fault();

        if (flags & FAULT_FLAG_USER) {
                mem_cgroup_exit_user_fault();
                /*
                 * The task may have entered a memcg OOM situation but
                 * if the allocation error was handled gracefully (no
                 * VM_FAULT_OOM), there is no need to kill anything.
                 * Just clean up the OOM state peacefully.
                 */
                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
                        mem_cgroup_oom_synchronize(false);
        }
out:
        mm_account_fault(mm, regs, address, flags, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(handle_mm_fault);

#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
#include <linux/extable.h>

static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
{
        if (likely(mmap_read_trylock(mm)))
                return true;

        if (regs && !user_mode(regs)) {
                unsigned long ip = exception_ip(regs);
                if (!search_exception_tables(ip))
                        return false;
        }

        return !mmap_read_lock_killable(mm);
}

static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
{
        /*
         * We don't have this operation yet.
         *
         * It should be easy enough to do: it's basically a
         *    atomic_long_try_cmpxchg_acquire()
         * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
         * it also needs the proper lockdep magic etc.
         */
        return false;
}

static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
{
        mmap_read_unlock(mm);
        if (regs && !user_mode(regs)) {
                unsigned long ip = exception_ip(regs);
                if (!search_exception_tables(ip))
                        return false;
        }
        return !mmap_write_lock_killable(mm);
}

/*
 * Helper for page fault handling.
 *
 * This is kind of equivalend to "mmap_read_lock()" followed
 * by "find_extend_vma()", except it's a lot more careful about
 * the locking (and will drop the lock on failure).
 *
 * For example, if we have a kernel bug that causes a page
 * fault, we don't want to just use mmap_read_lock() to get
 * the mm lock, because that would deadlock if the bug were
 * to happen while we're holding the mm lock for writing.
 *
 * So this checks the exception tables on kernel faults in
 * order to only do this all for instructions that are actually
 * expected to fault.
 *
 * We can also actually take the mm lock for writing if we
 * need to extend the vma, which helps the VM layer a lot.
 */
struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
                        unsigned long addr, struct pt_regs *regs)
{
        struct vm_area_struct *vma;

        if (!get_mmap_lock_carefully(mm, regs))
                return NULL;

        vma = find_vma(mm, addr);
        if (likely(vma && (vma->vm_start <= addr)))
                return vma;

        /*
         * Well, dang. We might still be successful, but only
         * if we can extend a vma to do so.
         */
        if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
                mmap_read_unlock(mm);
                return NULL;
        }

        /*
         * We can try to upgrade the mmap lock atomically,
         * in which case we can continue to use the vma
         * we already looked up.
         *
         * Otherwise we'll have to drop the mmap lock and
         * re-take it, and also look up the vma again,
         * re-checking it.
         */
        if (!mmap_upgrade_trylock(mm)) {
                if (!upgrade_mmap_lock_carefully(mm, regs))
                        return NULL;

                vma = find_vma(mm, addr);
                if (!vma)
                        goto fail;
                if (vma->vm_start <= addr)
                        goto success;
                if (!(vma->vm_flags & VM_GROWSDOWN))
                        goto fail;
        }

        if (expand_stack_locked(vma, addr))
                goto fail;

success:
        mmap_write_downgrade(mm);
        return vma;

fail:
        mmap_write_unlock(mm);
        return NULL;
}
#endif

#ifdef CONFIG_PER_VMA_LOCK
/*
 * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
 * stable and not isolated. If the VMA is not found or is being modified the
 * function returns NULL.
 */
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
                                          unsigned long address)
{
        MA_STATE(mas, &mm->mm_mt, address, address);
        struct vm_area_struct *vma;

        rcu_read_lock();
retry:
        vma = mas_walk(&mas);
        if (!vma)
                goto inval;

        if (!vma_start_read(vma))
                goto inval;

        /*
         * find_mergeable_anon_vma uses adjacent vmas which are not locked.
         * This check must happen after vma_start_read(); otherwise, a
         * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
         * from its anon_vma.
         */
        if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma))
                goto inval_end_read;

        /* Check since vm_start/vm_end might change before we lock the VMA */
        if (unlikely(address < vma->vm_start || address >= vma->vm_end))
                goto inval_end_read;

        /* Check if the VMA got isolated after we found it */
        if (vma->detached) {
                vma_end_read(vma);
                count_vm_vma_lock_event(VMA_LOCK_MISS);
                /* The area was replaced with another one */
                goto retry;
        }

        rcu_read_unlock();
        return vma;

inval_end_read:
        vma_end_read(vma);
inval:
        rcu_read_unlock();
        count_vm_vma_lock_event(VMA_LOCK_ABORT);
        return NULL;
}
#endif /* CONFIG_PER_VMA_LOCK */

#ifndef __PAGETABLE_P4D_FOLDED
/*
 * Allocate p4d page table.
 * We've already handled the fast-path in-line.
 */
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
        p4d_t *new = p4d_alloc_one(mm, address);
        if (!new)
                return -ENOMEM;

        spin_lock(&mm->page_table_lock);
        if (pgd_present(*pgd)) {        /* Another has populated it */
                p4d_free(mm, new);
        } else {
                smp_wmb(); /* See comment in pmd_install() */
                pgd_populate(mm, pgd, new);
        }
        spin_unlock(&mm->page_table_lock);
        return 0;
}
#endif /* __PAGETABLE_P4D_FOLDED */

#ifndef __PAGETABLE_PUD_FOLDED
/*
 * Allocate page upper directory.
 * We've already handled the fast-path in-line.
 */
int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
{
        pud_t *new = pud_alloc_one(mm, address);
        if (!new)
                return -ENOMEM;

        spin_lock(&mm->page_table_lock);
        if (!p4d_present(*p4d)) {
                mm_inc_nr_puds(mm);
                smp_wmb(); /* See comment in pmd_install() */
                p4d_populate(mm, p4d, new);
        } else        /* Another has populated it */
                pud_free(mm, new);
        spin_unlock(&mm->page_table_lock);
        return 0;
}
#endif /* __PAGETABLE_PUD_FOLDED */

#ifndef __PAGETABLE_PMD_FOLDED
/*
 * Allocate page middle directory.
 * We've already handled the fast-path in-line.
 */
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
        spinlock_t *ptl;
        pmd_t *new = pmd_alloc_one(mm, address);
        if (!new)
                return -ENOMEM;

        ptl = pud_lock(mm, pud);
        if (!pud_present(*pud)) {
                mm_inc_nr_pmds(mm);
                smp_wmb(); /* See comment in pmd_install() */
                pud_populate(mm, pud, new);
        } else {        /* Another has populated it */
                pmd_free(mm, new);
        }
        spin_unlock(ptl);
        return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */

/**
 * follow_pte - look up PTE at a user virtual address
 * @mm: the mm_struct of the target address space
 * @address: user virtual address
 * @ptepp: location to store found PTE
 * @ptlp: location to store the lock for the PTE
 *
 * On a successful return, the pointer to the PTE is stored in @ptepp;
 * the corresponding lock is taken and its location is stored in @ptlp.
 * The contents of the PTE are only stable until @ptlp is released;
 * any further use, if any, must be protected against invalidation
 * with MMU notifiers.
 *
 * Only IO mappings and raw PFN mappings are allowed.  The mmap semaphore
 * should be taken for read.
 *
 * KVM uses this function.  While it is arguably less bad than ``follow_pfn``,
 * it is not a good general-purpose API.
 *
 * Return: zero on success, -ve otherwise.
 */
int follow_pte(struct mm_struct *mm, unsigned long address,
               pte_t **ptepp, spinlock_t **ptlp)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *ptep;

        pgd = pgd_offset(mm, address);
        if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
                goto out;

        p4d = p4d_offset(pgd, address);
        if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
                goto out;

        pud = pud_offset(p4d, address);
        if (pud_none(*pud) || unlikely(pud_bad(*pud)))
                goto out;

        pmd = pmd_offset(pud, address);
        VM_BUG_ON(pmd_trans_huge(*pmd));

        ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
        if (!ptep)
                goto out;
        if (!pte_present(ptep_get(ptep)))
                goto unlock;
        *ptepp = ptep;
        return 0;
unlock:
        pte_unmap_unlock(ptep, *ptlp);
out:
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(follow_pte);

/**
 * follow_pfn - look up PFN at a user virtual address
 * @vma: memory mapping
 * @address: user virtual address
 * @pfn: location to store found PFN
 *
 * Only IO mappings and raw PFN mappings are allowed.
 *
 * This function does not allow the caller to read the permissions
 * of the PTE.  Do not use it.
 *
 * Return: zero and the pfn at @pfn on success, -ve otherwise.
 */
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
        unsigned long *pfn)
{
        int ret = -EINVAL;
        spinlock_t *ptl;
        pte_t *ptep;

        if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
                return ret;

        ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
        if (ret)
                return ret;
        *pfn = pte_pfn(ptep_get(ptep));
        pte_unmap_unlock(ptep, ptl);
        return 0;
}
EXPORT_SYMBOL(follow_pfn);

#ifdef CONFIG_HAVE_IOREMAP_PROT
int follow_phys(struct vm_area_struct *vma,
                unsigned long address, unsigned int flags,
                unsigned long *prot, resource_size_t *phys)
{
        int ret = -EINVAL;
        pte_t *ptep, pte;
        spinlock_t *ptl;

        if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
                goto out;

        if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
                goto out;
        pte = ptep_get(ptep);

        /* Never return PFNs of anon folios in COW mappings. */
        if (vm_normal_folio(vma, address, pte))
                goto unlock;

        if ((flags & FOLL_WRITE) && !pte_write(pte))
                goto unlock;

        *prot = pgprot_val(pte_pgprot(pte));
        *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;

        ret = 0;
unlock:
        pte_unmap_unlock(ptep, ptl);
out:
        return ret;
}

/**
 * generic_access_phys - generic implementation for iomem mmap access
 * @vma: the vma to access
 * @addr: userspace address, not relative offset within @vma
 * @buf: buffer to read/write
 * @len: length of transfer
 * @write: set to FOLL_WRITE when writing, otherwise reading
 *
 * This is a generic implementation for &vm_operations_struct.access for an
 * iomem mapping. This callback is used by access_process_vm() when the @vma is
 * not page based.
 */
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
                        void *buf, int len, int write)
{
        resource_size_t phys_addr;
        unsigned long prot = 0;
        void __iomem *maddr;
        pte_t *ptep, pte;
        spinlock_t *ptl;
        int offset = offset_in_page(addr);
        int ret = -EINVAL;

        if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
                return -EINVAL;

retry:
        if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
                return -EINVAL;
        pte = ptep_get(ptep);
        pte_unmap_unlock(ptep, ptl);

        prot = pgprot_val(pte_pgprot(pte));
        phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;

        if ((write & FOLL_WRITE) && !pte_write(pte))
                return -EINVAL;

        maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
        if (!maddr)
                return -ENOMEM;

        if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
                goto out_unmap;

        if (!pte_same(pte, ptep_get(ptep))) {
                pte_unmap_unlock(ptep, ptl);
                iounmap(maddr);

                goto retry;
        }

        if (write)
                memcpy_toio(maddr + offset, buf, len);
        else
                memcpy_fromio(buf, maddr + offset, len);
        ret = len;
        pte_unmap_unlock(ptep, ptl);
out_unmap:
        iounmap(maddr);

        return ret;
}
EXPORT_SYMBOL_GPL(generic_access_phys);
#endif

/*
 * Access another process' address space as given in mm.
 */
static int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
                              void *buf, int len, unsigned int gup_flags)
{
        void *old_buf = buf;
        int write = gup_flags & FOLL_WRITE;

        if (mmap_read_lock_killable(mm))
                return 0;

        /* Untag the address before looking up the VMA */
        addr = untagged_addr_remote(mm, addr);

        /* Avoid triggering the temporary warning in __get_user_pages */
        if (!vma_lookup(mm, addr) && !expand_stack(mm, addr))
                return 0;

        /* ignore errors, just check how much was successfully transferred */
        while (len) {
                int bytes, offset;
                void *maddr;
                struct vm_area_struct *vma = NULL;
                struct page *page = get_user_page_vma_remote(mm, addr,
                                                             gup_flags, &vma);

                if (IS_ERR(page)) {
                        /* We might need to expand the stack to access it */
                        vma = vma_lookup(mm, addr);
                        if (!vma) {
                                vma = expand_stack(mm, addr);

                                /* mmap_lock was dropped on failure */
                                if (!vma)
                                        return buf - old_buf;

                                /* Try again if stack expansion worked */
                                continue;
                        }

                        /*
                         * Check if this is a VM_IO | VM_PFNMAP VMA, which
                         * we can access using slightly different code.
                         */
                        bytes = 0;
#ifdef CONFIG_HAVE_IOREMAP_PROT
                        if (vma->vm_ops && vma->vm_ops->access)
                                bytes = vma->vm_ops->access(vma, addr, buf,
                                                            len, write);
#endif
                        if (bytes <= 0)
                                break;
                } else {
                        bytes = len;
                        offset = addr & (PAGE_SIZE-1);
                        if (bytes > PAGE_SIZE-offset)
                                bytes = PAGE_SIZE-offset;

                        maddr = kmap_local_page(page);
                        if (write) {
                                copy_to_user_page(vma, page, addr,
                                                  maddr + offset, buf, bytes);
                                set_page_dirty_lock(page);
                        } else {
                                copy_from_user_page(vma, page, addr,
                                                    buf, maddr + offset, bytes);
                        }
                        unmap_and_put_page(page, maddr);
                }
                len -= bytes;
                buf += bytes;
                addr += bytes;
        }
        mmap_read_unlock(mm);

        return buf - old_buf;
}

/**
 * access_remote_vm - access another process' address space
 * @mm:                the mm_struct of the target address space
 * @addr:        start address to access
 * @buf:        source or destination buffer
 * @len:        number of bytes to transfer
 * @gup_flags:        flags modifying lookup behaviour
 *
 * The caller must hold a reference on @mm.
 *
 * Return: number of bytes copied from source to destination.
 */
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
                void *buf, int len, unsigned int gup_flags)
{
        return __access_remote_vm(mm, addr, buf, len, gup_flags);
}

/*
 * Access another process' address space.
 * Source/target buffer must be kernel space,
 * Do not walk the page table directly, use get_user_pages
 */
int access_process_vm(struct task_struct *tsk, unsigned long addr,
                void *buf, int len, unsigned int gup_flags)
{
        struct mm_struct *mm;
        int ret;

        mm = get_task_mm(tsk);
        if (!mm)
                return 0;

        ret = __access_remote_vm(mm, addr, buf, len, gup_flags);

        mmput(mm);

        return ret;
}
EXPORT_SYMBOL_GPL(access_process_vm);

/*
 * Print the name of a VMA.
 */
void print_vma_addr(char *prefix, unsigned long ip)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;

        /*
         * we might be running from an atomic context so we cannot sleep
         */
        if (!mmap_read_trylock(mm))
                return;

        vma = find_vma(mm, ip);
        if (vma && vma->vm_file) {
                struct file *f = vma->vm_file;
                char *buf = (char *)__get_free_page(GFP_NOWAIT);
                if (buf) {
                        char *p;

                        p = file_path(f, buf, PAGE_SIZE);
                        if (IS_ERR(p))
                                p = "?";
                        printk("%s%s[%lx+%lx]", prefix, kbasename(p),
                                        vma->vm_start,
                                        vma->vm_end - vma->vm_start);
                        free_page((unsigned long)buf);
                }
        }
        mmap_read_unlock(mm);
}

#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
void __might_fault(const char *file, int line)
{
        if (pagefault_disabled())
                return;
        __might_sleep(file, line);
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
        if (current->mm)
                might_lock_read(&current->mm->mmap_lock);
#endif
}
EXPORT_SYMBOL(__might_fault);
#endif

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
/*
 * Process all subpages of the specified huge page with the specified
 * operation.  The target subpage will be processed last to keep its
 * cache lines hot.
 */
static inline int process_huge_page(
        unsigned long addr_hint, unsigned int pages_per_huge_page,
        int (*process_subpage)(unsigned long addr, int idx, void *arg),
        void *arg)
{
        int i, n, base, l, ret;
        unsigned long addr = addr_hint &
                ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);

        /* Process target subpage last to keep its cache lines hot */
        might_sleep();
        n = (addr_hint - addr) / PAGE_SIZE;
        if (2 * n <= pages_per_huge_page) {
                /* If target subpage in first half of huge page */
                base = 0;
                l = n;
                /* Process subpages at the end of huge page */
                for (i = pages_per_huge_page - 1; i >= 2 * n; i--) {
                        cond_resched();
                        ret = process_subpage(addr + i * PAGE_SIZE, i, arg);
                        if (ret)
                                return ret;
                }
        } else {
                /* If target subpage in second half of huge page */
                base = pages_per_huge_page - 2 * (pages_per_huge_page - n);
                l = pages_per_huge_page - n;
                /* Process subpages at the begin of huge page */
                for (i = 0; i < base; i++) {
                        cond_resched();
                        ret = process_subpage(addr + i * PAGE_SIZE, i, arg);
                        if (ret)
                                return ret;
                }
        }
        /*
         * Process remaining subpages in left-right-left-right pattern
         * towards the target subpage
         */
        for (i = 0; i < l; i++) {
                int left_idx = base + i;
                int right_idx = base + 2 * l - 1 - i;

                cond_resched();
                ret = process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg);
                if (ret)
                        return ret;
                cond_resched();
                ret = process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg);
                if (ret)
                        return ret;
        }
        return 0;
}

static void clear_gigantic_page(struct page *page,
                                unsigned long addr,
                                unsigned int pages_per_huge_page)
{
        int i;
        struct page *p;

        might_sleep();
        for (i = 0; i < pages_per_huge_page; i++) {
                p = nth_page(page, i);
                cond_resched();
                clear_user_highpage(p, addr + i * PAGE_SIZE);
        }
}

static int clear_subpage(unsigned long addr, int idx, void *arg)
{
        struct page *page = arg;

        clear_user_highpage(nth_page(page, idx), addr);
        return 0;
}

void clear_huge_page(struct page *page,
                     unsigned long addr_hint, unsigned int pages_per_huge_page)
{
        unsigned long addr = addr_hint &
                ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);

        if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
                clear_gigantic_page(page, addr, pages_per_huge_page);
                return;
        }

        process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page);
}

static int copy_user_gigantic_page(struct folio *dst, struct folio *src,
                                     unsigned long addr,
                                     struct vm_area_struct *vma,
                                     unsigned int pages_per_huge_page)
{
        int i;
        struct page *dst_page;
        struct page *src_page;

        for (i = 0; i < pages_per_huge_page; i++) {
                dst_page = folio_page(dst, i);
                src_page = folio_page(src, i);

                cond_resched();
                if (copy_mc_user_highpage(dst_page, src_page,
                                          addr + i*PAGE_SIZE, vma)) {
                        memory_failure_queue(page_to_pfn(src_page), 0);
                        return -EHWPOISON;
                }
        }
        return 0;
}

struct copy_subpage_arg {
        struct page *dst;
        struct page *src;
        struct vm_area_struct *vma;
};

static int copy_subpage(unsigned long addr, int idx, void *arg)
{
        struct copy_subpage_arg *copy_arg = arg;
        struct page *dst = nth_page(copy_arg->dst, idx);
        struct page *src = nth_page(copy_arg->src, idx);

        if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma)) {
                memory_failure_queue(page_to_pfn(src), 0);
                return -EHWPOISON;
        }
        return 0;
}

int copy_user_large_folio(struct folio *dst, struct folio *src,
                          unsigned long addr_hint, struct vm_area_struct *vma)
{
        unsigned int pages_per_huge_page = folio_nr_pages(dst);
        unsigned long addr = addr_hint &
                ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
        struct copy_subpage_arg arg = {
                .dst = &dst->page,
                .src = &src->page,
                .vma = vma,
        };

        if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES))
                return copy_user_gigantic_page(dst, src, addr, vma,
                                               pages_per_huge_page);

        return process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
}

long copy_folio_from_user(struct folio *dst_folio,
                           const void __user *usr_src,
                           bool allow_pagefault)
{
        void *kaddr;
        unsigned long i, rc = 0;
        unsigned int nr_pages = folio_nr_pages(dst_folio);
        unsigned long ret_val = nr_pages * PAGE_SIZE;
        struct page *subpage;

        for (i = 0; i < nr_pages; i++) {
                subpage = folio_page(dst_folio, i);
                kaddr = kmap_local_page(subpage);
                if (!allow_pagefault)
                        pagefault_disable();
                rc = copy_from_user(kaddr, usr_src + i * PAGE_SIZE, PAGE_SIZE);
                if (!allow_pagefault)
                        pagefault_enable();
                kunmap_local(kaddr);

                ret_val -= (PAGE_SIZE - rc);
                if (rc)
                        break;

                flush_dcache_page(subpage);

                cond_resched();
        }
        return ret_val;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */

#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS

static struct kmem_cache *page_ptl_cachep;

void __init ptlock_cache_init(void)
{
        page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
                        SLAB_PANIC, NULL);
}

bool ptlock_alloc(struct ptdesc *ptdesc)
{
        spinlock_t *ptl;

        ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL);
        if (!ptl)
                return false;
        ptdesc->ptl = ptl;
        return true;
}

void ptlock_free(struct ptdesc *ptdesc)
{
        kmem_cache_free(page_ptl_cachep, ptdesc->ptl);
}
#endif

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 



















    1 

    1 



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
/*
 * Copyright (c) 2010-2011 Atheros Communications Inc.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <asm/unaligned.h>
#include "htc.h"

MODULE_FIRMWARE(HTC_7010_MODULE_FW);
MODULE_FIRMWARE(HTC_9271_MODULE_FW);

static const struct usb_device_id ath9k_hif_usb_ids[] = {
        { USB_DEVICE(0x0cf3, 0x9271) }, /* Atheros */
        { USB_DEVICE(0x0cf3, 0x1006) }, /* Atheros */
        { USB_DEVICE(0x0846, 0x9030) }, /* Netgear N150 */
        { USB_DEVICE(0x07b8, 0x9271) }, /* Altai WA1011N-GU */
        { USB_DEVICE(0x07D1, 0x3A10) }, /* Dlink Wireless 150 */
        { USB_DEVICE(0x13D3, 0x3327) }, /* Azurewave */
        { USB_DEVICE(0x13D3, 0x3328) }, /* Azurewave */
        { USB_DEVICE(0x13D3, 0x3346) }, /* IMC Networks */
        { USB_DEVICE(0x13D3, 0x3348) }, /* Azurewave */
        { USB_DEVICE(0x13D3, 0x3349) }, /* Azurewave */
        { USB_DEVICE(0x13D3, 0x3350) }, /* Azurewave */
        { USB_DEVICE(0x04CA, 0x4605) }, /* Liteon */
        { USB_DEVICE(0x040D, 0x3801) }, /* VIA */
        { USB_DEVICE(0x0cf3, 0xb003) }, /* Ubiquiti WifiStation Ext */
        { USB_DEVICE(0x0cf3, 0xb002) }, /* Ubiquiti WifiStation */
        { USB_DEVICE(0x057c, 0x8403) }, /* AVM FRITZ!WLAN 11N v2 USB */
        { USB_DEVICE(0x0471, 0x209e) }, /* Philips (or NXP) PTA01 */
        { USB_DEVICE(0x1eda, 0x2315) }, /* AirTies */

        { USB_DEVICE(0x0cf3, 0x7015),
          .driver_info = AR9287_USB },  /* Atheros */

        { USB_DEVICE(0x0cf3, 0x7010),
          .driver_info = AR9280_USB },  /* Atheros */
        { USB_DEVICE(0x0846, 0x9018),
          .driver_info = AR9280_USB },  /* Netgear WNDA3200 */
        { USB_DEVICE(0x083A, 0xA704),
          .driver_info = AR9280_USB },  /* SMC Networks */
        { USB_DEVICE(0x0411, 0x017f),
          .driver_info = AR9280_USB },  /* Sony UWA-BR100 */
        { USB_DEVICE(0x0411, 0x0197),
          .driver_info = AR9280_USB },  /* Buffalo WLI-UV-AG300P */
        { USB_DEVICE(0x04da, 0x3904),
          .driver_info = AR9280_USB },
        { USB_DEVICE(0x0930, 0x0a08),
          .driver_info = AR9280_USB },  /* Toshiba WLM-20U2 and GN-1080 */

        { USB_DEVICE(0x0cf3, 0x20ff),
          .driver_info = STORAGE_DEVICE },

        { },
};

MODULE_DEVICE_TABLE(usb, ath9k_hif_usb_ids);

static int __hif_usb_tx(struct hif_device_usb *hif_dev);

static void hif_usb_regout_cb(struct urb *urb)
{
        struct cmd_buf *cmd = urb->context;

        switch (urb->status) {
        case 0:
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                goto free;
        default:
                break;
        }

        if (cmd) {
                ath9k_htc_txcompletion_cb(cmd->hif_dev->htc_handle,
                                          cmd->skb, true);
                kfree(cmd);
        }

        return;
free:
        kfree_skb(cmd->skb);
        kfree(cmd);
}

static int hif_usb_send_regout(struct hif_device_usb *hif_dev,
                               struct sk_buff *skb)
{
        struct urb *urb;
        struct cmd_buf *cmd;
        int ret = 0;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (urb == NULL)
                return -ENOMEM;

        cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
        if (cmd == NULL) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        cmd->skb = skb;
        cmd->hif_dev = hif_dev;

        usb_fill_int_urb(urb, hif_dev->udev,
                         usb_sndintpipe(hif_dev->udev, USB_REG_OUT_PIPE),
                         skb->data, skb->len,
                         hif_usb_regout_cb, cmd, 1);

        usb_anchor_urb(urb, &hif_dev->regout_submitted);
        ret = usb_submit_urb(urb, GFP_KERNEL);
        if (ret) {
                usb_unanchor_urb(urb);
                kfree(cmd);
        }
        usb_free_urb(urb);

        return ret;
}

static void hif_usb_mgmt_cb(struct urb *urb)
{
        struct cmd_buf *cmd = urb->context;
        struct hif_device_usb *hif_dev;
        unsigned long flags;
        bool txok = true;

        if (!cmd || !cmd->skb || !cmd->hif_dev)
                return;

        hif_dev = cmd->hif_dev;

        switch (urb->status) {
        case 0:
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                txok = false;

                /*
                 * If the URBs are being flushed, no need to complete
                 * this packet.
                 */
                spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
                if (hif_dev->tx.flags & HIF_USB_TX_FLUSH) {
                        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
                        dev_kfree_skb_any(cmd->skb);
                        kfree(cmd);
                        return;
                }
                spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

                break;
        default:
                txok = false;
                break;
        }

        skb_pull(cmd->skb, 4);
        ath9k_htc_txcompletion_cb(cmd->hif_dev->htc_handle,
                                  cmd->skb, txok);
        kfree(cmd);
}

static int hif_usb_send_mgmt(struct hif_device_usb *hif_dev,
                             struct sk_buff *skb)
{
        struct urb *urb;
        struct cmd_buf *cmd;
        int ret = 0;
        __le16 *hdr;

        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (urb == NULL)
                return -ENOMEM;

        cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
        if (cmd == NULL) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        cmd->skb = skb;
        cmd->hif_dev = hif_dev;

        hdr = skb_push(skb, 4);
        *hdr++ = cpu_to_le16(skb->len - 4);
        *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);

        usb_fill_bulk_urb(urb, hif_dev->udev,
                         usb_sndbulkpipe(hif_dev->udev, USB_WLAN_TX_PIPE),
                         skb->data, skb->len,
                         hif_usb_mgmt_cb, cmd);

        usb_anchor_urb(urb, &hif_dev->mgmt_submitted);
        ret = usb_submit_urb(urb, GFP_ATOMIC);
        if (ret) {
                usb_unanchor_urb(urb);
                kfree(cmd);
        }
        usb_free_urb(urb);

        return ret;
}

static inline void ath9k_skb_queue_purge(struct hif_device_usb *hif_dev,
                                         struct sk_buff_head *list)
{
        struct sk_buff *skb;

        while ((skb = __skb_dequeue(list)) != NULL) {
                dev_kfree_skb_any(skb);
        }
}

static inline void ath9k_skb_queue_complete(struct hif_device_usb *hif_dev,
                                            struct sk_buff_head *queue,
                                            bool txok)
{
        struct sk_buff *skb;

        while ((skb = __skb_dequeue(queue)) != NULL) {
#ifdef CONFIG_ATH9K_HTC_DEBUGFS
                int ln = skb->len;
#endif
                ath9k_htc_txcompletion_cb(hif_dev->htc_handle,
                                          skb, txok);
                if (txok) {
                        TX_STAT_INC(hif_dev, skb_success);
                        TX_STAT_ADD(hif_dev, skb_success_bytes, ln);
                }
                else
                        TX_STAT_INC(hif_dev, skb_failed);
        }
}

static void hif_usb_tx_cb(struct urb *urb)
{
        struct tx_buf *tx_buf = urb->context;
        struct hif_device_usb *hif_dev;
        bool txok = true;

        if (!tx_buf || !tx_buf->hif_dev)
                return;

        hif_dev = tx_buf->hif_dev;

        switch (urb->status) {
        case 0:
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                txok = false;

                /*
                 * If the URBs are being flushed, no need to add this
                 * URB to the free list.
                 */
                spin_lock(&hif_dev->tx.tx_lock);
                if (hif_dev->tx.flags & HIF_USB_TX_FLUSH) {
                        spin_unlock(&hif_dev->tx.tx_lock);
                        ath9k_skb_queue_purge(hif_dev, &tx_buf->skb_queue);
                        return;
                }
                spin_unlock(&hif_dev->tx.tx_lock);

                break;
        default:
                txok = false;
                break;
        }

        ath9k_skb_queue_complete(hif_dev, &tx_buf->skb_queue, txok);

        /* Re-initialize the SKB queue */
        tx_buf->len = tx_buf->offset = 0;
        __skb_queue_head_init(&tx_buf->skb_queue);

        /* Add this TX buffer to the free list */
        spin_lock(&hif_dev->tx.tx_lock);
        list_move_tail(&tx_buf->list, &hif_dev->tx.tx_buf);
        hif_dev->tx.tx_buf_cnt++;
        if (!(hif_dev->tx.flags & HIF_USB_TX_STOP))
                __hif_usb_tx(hif_dev); /* Check for pending SKBs */
        TX_STAT_INC(hif_dev, buf_completed);
        spin_unlock(&hif_dev->tx.tx_lock);
}

/* TX lock has to be taken */
static int __hif_usb_tx(struct hif_device_usb *hif_dev)
{
        struct tx_buf *tx_buf = NULL;
        struct sk_buff *nskb = NULL;
        int ret = 0, i;
        u16 tx_skb_cnt = 0;
        u8 *buf;
        __le16 *hdr;

        if (hif_dev->tx.tx_skb_cnt == 0)
                return 0;

        /* Check if a free TX buffer is available */
        if (list_empty(&hif_dev->tx.tx_buf))
                return 0;

        tx_buf = list_first_entry(&hif_dev->tx.tx_buf, struct tx_buf, list);
        list_move_tail(&tx_buf->list, &hif_dev->tx.tx_pending);
        hif_dev->tx.tx_buf_cnt--;

        tx_skb_cnt = min_t(u16, hif_dev->tx.tx_skb_cnt, MAX_TX_AGGR_NUM);

        for (i = 0; i < tx_skb_cnt; i++) {
                nskb = __skb_dequeue(&hif_dev->tx.tx_skb_queue);

                /* Should never be NULL */
                BUG_ON(!nskb);

                hif_dev->tx.tx_skb_cnt--;

                buf = tx_buf->buf;
                buf += tx_buf->offset;
                hdr = (__le16 *)buf;
                *hdr++ = cpu_to_le16(nskb->len);
                *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);
                buf += 4;
                memcpy(buf, nskb->data, nskb->len);
                tx_buf->len = nskb->len + 4;

                if (i < (tx_skb_cnt - 1))
                        tx_buf->offset += (((tx_buf->len - 1) / 4) + 1) * 4;

                if (i == (tx_skb_cnt - 1))
                        tx_buf->len += tx_buf->offset;

                __skb_queue_tail(&tx_buf->skb_queue, nskb);
                TX_STAT_INC(hif_dev, skb_queued);
        }

        usb_fill_bulk_urb(tx_buf->urb, hif_dev->udev,
                          usb_sndbulkpipe(hif_dev->udev, USB_WLAN_TX_PIPE),
                          tx_buf->buf, tx_buf->len,
                          hif_usb_tx_cb, tx_buf);

        ret = usb_submit_urb(tx_buf->urb, GFP_ATOMIC);
        if (ret) {
                tx_buf->len = tx_buf->offset = 0;
                ath9k_skb_queue_complete(hif_dev, &tx_buf->skb_queue, false);
                __skb_queue_head_init(&tx_buf->skb_queue);
                list_move_tail(&tx_buf->list, &hif_dev->tx.tx_buf);
                hif_dev->tx.tx_buf_cnt++;
        } else {
                TX_STAT_INC(hif_dev, buf_queued);
        }

        return ret;
}

static int hif_usb_send_tx(struct hif_device_usb *hif_dev, struct sk_buff *skb)
{
        struct ath9k_htc_tx_ctl *tx_ctl;
        unsigned long flags;
        int ret = 0;

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);

        if (hif_dev->tx.flags & HIF_USB_TX_STOP) {
                spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
                return -ENODEV;
        }

        /* Check if the max queue count has been reached */
        if (hif_dev->tx.tx_skb_cnt > MAX_TX_BUF_NUM) {
                spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
                return -ENOMEM;
        }

        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        tx_ctl = HTC_SKB_CB(skb);

        /* Mgmt/Beacon frames don't use the TX buffer pool */
        if ((tx_ctl->type == ATH9K_HTC_MGMT) ||
            (tx_ctl->type == ATH9K_HTC_BEACON)) {
                ret = hif_usb_send_mgmt(hif_dev, skb);
        }

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);

        if ((tx_ctl->type == ATH9K_HTC_NORMAL) ||
            (tx_ctl->type == ATH9K_HTC_AMPDU)) {
                __skb_queue_tail(&hif_dev->tx.tx_skb_queue, skb);
                hif_dev->tx.tx_skb_cnt++;
        }

        /* Check if AMPDUs have to be sent immediately */
        if ((hif_dev->tx.tx_buf_cnt == MAX_TX_URB_NUM) &&
            (hif_dev->tx.tx_skb_cnt < 2)) {
                __hif_usb_tx(hif_dev);
        }

        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        return ret;
}

static void hif_usb_start(void *hif_handle)
{
        struct hif_device_usb *hif_dev = hif_handle;
        unsigned long flags;

        hif_dev->flags |= HIF_USB_START;

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        hif_dev->tx.flags &= ~HIF_USB_TX_STOP;
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
}

static void hif_usb_stop(void *hif_handle)
{
        struct hif_device_usb *hif_dev = hif_handle;
        struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL;
        unsigned long flags;

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        ath9k_skb_queue_complete(hif_dev, &hif_dev->tx.tx_skb_queue, false);
        hif_dev->tx.tx_skb_cnt = 0;
        hif_dev->tx.flags |= HIF_USB_TX_STOP;
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        /* The pending URBs have to be canceled. */
        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        list_for_each_entry_safe(tx_buf, tx_buf_tmp,
                                 &hif_dev->tx.tx_pending, list) {
                usb_get_urb(tx_buf->urb);
                spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
                usb_kill_urb(tx_buf->urb);
                list_del(&tx_buf->list);
                usb_free_urb(tx_buf->urb);
                kfree(tx_buf->buf);
                kfree(tx_buf);
                spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        }
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        usb_kill_anchored_urbs(&hif_dev->mgmt_submitted);
}

static int hif_usb_send(void *hif_handle, u8 pipe_id, struct sk_buff *skb)
{
        struct hif_device_usb *hif_dev = hif_handle;
        int ret = 0;

        switch (pipe_id) {
        case USB_WLAN_TX_PIPE:
                ret = hif_usb_send_tx(hif_dev, skb);
                break;
        case USB_REG_OUT_PIPE:
                ret = hif_usb_send_regout(hif_dev, skb);
                break;
        default:
                dev_err(&hif_dev->udev->dev,
                        "ath9k_htc: Invalid TX pipe: %d\n", pipe_id);
                ret = -EINVAL;
                break;
        }

        return ret;
}

static inline bool check_index(struct sk_buff *skb, u8 idx)
{
        struct ath9k_htc_tx_ctl *tx_ctl;

        tx_ctl = HTC_SKB_CB(skb);

        if ((tx_ctl->type == ATH9K_HTC_AMPDU) &&
            (tx_ctl->sta_idx == idx))
                return true;

        return false;
}

static void hif_usb_sta_drain(void *hif_handle, u8 idx)
{
        struct hif_device_usb *hif_dev = hif_handle;
        struct sk_buff *skb, *tmp;
        unsigned long flags;

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);

        skb_queue_walk_safe(&hif_dev->tx.tx_skb_queue, skb, tmp) {
                if (check_index(skb, idx)) {
                        __skb_unlink(skb, &hif_dev->tx.tx_skb_queue);
                        ath9k_htc_txcompletion_cb(hif_dev->htc_handle,
                                                  skb, false);
                        hif_dev->tx.tx_skb_cnt--;
                        TX_STAT_INC(hif_dev, skb_failed);
                }
        }

        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
}

static struct ath9k_htc_hif hif_usb = {
        .transport = ATH9K_HIF_USB,
        .name = "ath9k_hif_usb",

        .control_ul_pipe = USB_REG_OUT_PIPE,
        .control_dl_pipe = USB_REG_IN_PIPE,

        .start = hif_usb_start,
        .stop = hif_usb_stop,
        .sta_drain = hif_usb_sta_drain,
        .send = hif_usb_send,
};

/* Need to free remain_skb allocated in ath9k_hif_usb_rx_stream
 * in case ath9k_hif_usb_rx_stream wasn't called next time to
 * process the buffer and subsequently free it.
 */
static void ath9k_hif_usb_free_rx_remain_skb(struct hif_device_usb *hif_dev)
{
        unsigned long flags;

        spin_lock_irqsave(&hif_dev->rx_lock, flags);
        if (hif_dev->remain_skb) {
                dev_kfree_skb_any(hif_dev->remain_skb);
                hif_dev->remain_skb = NULL;
                hif_dev->rx_remain_len = 0;
                RX_STAT_INC(hif_dev, skb_dropped);
        }
        spin_unlock_irqrestore(&hif_dev->rx_lock, flags);
}

static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
                                    struct sk_buff *skb)
{
        struct sk_buff *nskb, *skb_pool[MAX_PKT_NUM_IN_TRANSFER];
        int index = 0, i, len = skb->len;
        int rx_remain_len, rx_pkt_len;
        u16 pool_index = 0;
        u8 *ptr;

        spin_lock(&hif_dev->rx_lock);

        rx_remain_len = hif_dev->rx_remain_len;
        rx_pkt_len = hif_dev->rx_transfer_len;

        if (rx_remain_len != 0) {
                struct sk_buff *remain_skb = hif_dev->remain_skb;

                if (remain_skb) {
                        ptr = (u8 *) remain_skb->data;

                        index = rx_remain_len;
                        rx_remain_len -= hif_dev->rx_pad_len;
                        ptr += rx_pkt_len;

                        memcpy(ptr, skb->data, rx_remain_len);

                        rx_pkt_len += rx_remain_len;
                        skb_put(remain_skb, rx_pkt_len);

                        skb_pool[pool_index++] = remain_skb;
                        hif_dev->remain_skb = NULL;
                        hif_dev->rx_remain_len = 0;
                } else {
                        index = rx_remain_len;
                }
        }

        spin_unlock(&hif_dev->rx_lock);

        while (index < len) {
                u16 pkt_len;
                u16 pkt_tag;
                u16 pad_len;
                int chk_idx;

                ptr = (u8 *) skb->data;

                pkt_len = get_unaligned_le16(ptr + index);
                pkt_tag = get_unaligned_le16(ptr + index + 2);

                /* It is supposed that if we have an invalid pkt_tag or
                 * pkt_len then the whole input SKB is considered invalid
                 * and dropped; the associated packets already in skb_pool
                 * are dropped, too.
                 */
                if (pkt_tag != ATH_USB_RX_STREAM_MODE_TAG) {
                        RX_STAT_INC(hif_dev, skb_dropped);
                        goto invalid_pkt;
                }

                if (pkt_len > 2 * MAX_RX_BUF_SIZE) {
                        dev_err(&hif_dev->udev->dev,
                                "ath9k_htc: invalid pkt_len (%x)\n", pkt_len);
                        RX_STAT_INC(hif_dev, skb_dropped);
                        goto invalid_pkt;
                }

                pad_len = 4 - (pkt_len & 0x3);
                if (pad_len == 4)
                        pad_len = 0;

                chk_idx = index;
                index = index + 4 + pkt_len + pad_len;

                if (index > MAX_RX_BUF_SIZE) {
                        spin_lock(&hif_dev->rx_lock);
                        nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC);
                        if (!nskb) {
                                dev_err(&hif_dev->udev->dev,
                                        "ath9k_htc: RX memory allocation error\n");
                                spin_unlock(&hif_dev->rx_lock);
                                goto err;
                        }

                        hif_dev->rx_remain_len = index - MAX_RX_BUF_SIZE;
                        hif_dev->rx_transfer_len =
                                MAX_RX_BUF_SIZE - chk_idx - 4;
                        hif_dev->rx_pad_len = pad_len;

                        skb_reserve(nskb, 32);
                        RX_STAT_INC(hif_dev, skb_allocated);

                        memcpy(nskb->data, &(skb->data[chk_idx+4]),
                               hif_dev->rx_transfer_len);

                        /* Record the buffer pointer */
                        hif_dev->remain_skb = nskb;
                        spin_unlock(&hif_dev->rx_lock);
                } else {
                        if (pool_index == MAX_PKT_NUM_IN_TRANSFER) {
                                dev_err(&hif_dev->udev->dev,
                                        "ath9k_htc: over RX MAX_PKT_NUM\n");
                                goto err;
                        }
                        nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC);
                        if (!nskb) {
                                dev_err(&hif_dev->udev->dev,
                                        "ath9k_htc: RX memory allocation error\n");
                                goto err;
                        }
                        skb_reserve(nskb, 32);
                        RX_STAT_INC(hif_dev, skb_allocated);

                        memcpy(nskb->data, &(skb->data[chk_idx+4]), pkt_len);
                        skb_put(nskb, pkt_len);
                        skb_pool[pool_index++] = nskb;
                }
        }

err:
        for (i = 0; i < pool_index; i++) {
                RX_STAT_ADD(hif_dev, skb_completed_bytes, skb_pool[i]->len);
                ath9k_htc_rx_msg(hif_dev->htc_handle, skb_pool[i],
                                 skb_pool[i]->len, USB_WLAN_RX_PIPE);
                RX_STAT_INC(hif_dev, skb_completed);
        }
        return;
invalid_pkt:
        for (i = 0; i < pool_index; i++) {
                dev_kfree_skb_any(skb_pool[i]);
                RX_STAT_INC(hif_dev, skb_dropped);
        }
        return;
}

static void ath9k_hif_usb_rx_cb(struct urb *urb)
{
        struct rx_buf *rx_buf = urb->context;
        struct hif_device_usb *hif_dev = rx_buf->hif_dev;
        struct sk_buff *skb = rx_buf->skb;
        int ret;

        if (!skb)
                return;

        if (!hif_dev)
                goto free;

        switch (urb->status) {
        case 0:
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                goto free;
        default:
                goto resubmit;
        }

        if (likely(urb->actual_length != 0)) {
                skb_put(skb, urb->actual_length);
                ath9k_hif_usb_rx_stream(hif_dev, skb);
        }

resubmit:
        skb_reset_tail_pointer(skb);
        skb_trim(skb, 0);

        usb_anchor_urb(urb, &hif_dev->rx_submitted);
        ret = usb_submit_urb(urb, GFP_ATOMIC);
        if (ret) {
                usb_unanchor_urb(urb);
                goto free;
        }

        return;
free:
        kfree_skb(skb);
        kfree(rx_buf);
}

static void ath9k_hif_usb_reg_in_cb(struct urb *urb)
{
        struct rx_buf *rx_buf = urb->context;
        struct hif_device_usb *hif_dev = rx_buf->hif_dev;
        struct sk_buff *skb = rx_buf->skb;
        int ret;

        if (!skb)
                return;

        if (!hif_dev)
                goto free_skb;

        switch (urb->status) {
        case 0:
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                goto free_skb;
        default:
                skb_reset_tail_pointer(skb);
                skb_trim(skb, 0);

                goto resubmit;
        }

        if (likely(urb->actual_length != 0)) {
                skb_put(skb, urb->actual_length);

                /*
                 * Process the command first.
                 * skb is either freed here or passed to be
                 * managed to another callback function.
                 */
                ath9k_htc_rx_msg(hif_dev->htc_handle, skb,
                                 skb->len, USB_REG_IN_PIPE);

                skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC);
                if (!skb) {
                        dev_err(&hif_dev->udev->dev,
                                "ath9k_htc: REG_IN memory allocation failure\n");
                        goto free_rx_buf;
                }

                rx_buf->skb = skb;

                usb_fill_int_urb(urb, hif_dev->udev,
                                 usb_rcvintpipe(hif_dev->udev,
                                                 USB_REG_IN_PIPE),
                                 skb->data, MAX_REG_IN_BUF_SIZE,
                                 ath9k_hif_usb_reg_in_cb, rx_buf, 1);
        }

resubmit:
        usb_anchor_urb(urb, &hif_dev->reg_in_submitted);
        ret = usb_submit_urb(urb, GFP_ATOMIC);
        if (ret) {
                usb_unanchor_urb(urb);
                goto free_skb;
        }

        return;
free_skb:
        kfree_skb(skb);
free_rx_buf:
        kfree(rx_buf);
        urb->context = NULL;
}

static void ath9k_hif_usb_dealloc_tx_urbs(struct hif_device_usb *hif_dev)
{
        struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL;
        unsigned long flags;

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        list_for_each_entry_safe(tx_buf, tx_buf_tmp,
                                 &hif_dev->tx.tx_buf, list) {
                list_del(&tx_buf->list);
                usb_free_urb(tx_buf->urb);
                kfree(tx_buf->buf);
                kfree(tx_buf);
        }
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        hif_dev->tx.flags |= HIF_USB_TX_FLUSH;
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        list_for_each_entry_safe(tx_buf, tx_buf_tmp,
                                 &hif_dev->tx.tx_pending, list) {
                usb_get_urb(tx_buf->urb);
                spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);
                usb_kill_urb(tx_buf->urb);
                list_del(&tx_buf->list);
                usb_free_urb(tx_buf->urb);
                kfree(tx_buf->buf);
                kfree(tx_buf);
                spin_lock_irqsave(&hif_dev->tx.tx_lock, flags);
        }
        spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags);

        usb_kill_anchored_urbs(&hif_dev->mgmt_submitted);
}

static int ath9k_hif_usb_alloc_tx_urbs(struct hif_device_usb *hif_dev)
{
        struct tx_buf *tx_buf;
        int i;

        INIT_LIST_HEAD(&hif_dev->tx.tx_buf);
        INIT_LIST_HEAD(&hif_dev->tx.tx_pending);
        spin_lock_init(&hif_dev->tx.tx_lock);
        __skb_queue_head_init(&hif_dev->tx.tx_skb_queue);
        init_usb_anchor(&hif_dev->mgmt_submitted);

        for (i = 0; i < MAX_TX_URB_NUM; i++) {
                tx_buf = kzalloc(sizeof(*tx_buf), GFP_KERNEL);
                if (!tx_buf)
                        goto err;

                tx_buf->buf = kzalloc(MAX_TX_BUF_SIZE, GFP_KERNEL);
                if (!tx_buf->buf)
                        goto err;

                tx_buf->urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!tx_buf->urb)
                        goto err;

                tx_buf->hif_dev = hif_dev;
                __skb_queue_head_init(&tx_buf->skb_queue);

                list_add_tail(&tx_buf->list, &hif_dev->tx.tx_buf);
        }

        hif_dev->tx.tx_buf_cnt = MAX_TX_URB_NUM;

        return 0;
err:
        if (tx_buf) {
                kfree(tx_buf->buf);
                kfree(tx_buf);
        }
        ath9k_hif_usb_dealloc_tx_urbs(hif_dev);
        return -ENOMEM;
}

static void ath9k_hif_usb_dealloc_rx_urbs(struct hif_device_usb *hif_dev)
{
        usb_kill_anchored_urbs(&hif_dev->rx_submitted);
        ath9k_hif_usb_free_rx_remain_skb(hif_dev);
}

static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev)
{
        struct rx_buf *rx_buf = NULL;
        struct sk_buff *skb = NULL;
        struct urb *urb = NULL;
        int i, ret;

        init_usb_anchor(&hif_dev->rx_submitted);
        spin_lock_init(&hif_dev->rx_lock);

        for (i = 0; i < MAX_RX_URB_NUM; i++) {

                rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL);
                if (!rx_buf) {
                        ret = -ENOMEM;
                        goto err_rxb;
                }

                /* Allocate URB */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (urb == NULL) {
                        ret = -ENOMEM;
                        goto err_urb;
                }

                /* Allocate buffer */
                skb = alloc_skb(MAX_RX_BUF_SIZE, GFP_KERNEL);
                if (!skb) {
                        ret = -ENOMEM;
                        goto err_skb;
                }

                rx_buf->hif_dev = hif_dev;
                rx_buf->skb = skb;

                usb_fill_bulk_urb(urb, hif_dev->udev,
                                  usb_rcvbulkpipe(hif_dev->udev,
                                                  USB_WLAN_RX_PIPE),
                                  skb->data, MAX_RX_BUF_SIZE,
                                  ath9k_hif_usb_rx_cb, rx_buf);

                /* Anchor URB */
                usb_anchor_urb(urb, &hif_dev->rx_submitted);

                /* Submit URB */
                ret = usb_submit_urb(urb, GFP_KERNEL);
                if (ret) {
                        usb_unanchor_urb(urb);
                        goto err_submit;
                }

                /*
                 * Drop reference count.
                 * This ensures that the URB is freed when killing them.
                 */
                usb_free_urb(urb);
        }

        return 0;

err_submit:
        kfree_skb(skb);
err_skb:
        usb_free_urb(urb);
err_urb:
        kfree(rx_buf);
err_rxb:
        ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
        return ret;
}

static void ath9k_hif_usb_dealloc_reg_in_urbs(struct hif_device_usb *hif_dev)
{
        usb_kill_anchored_urbs(&hif_dev->reg_in_submitted);
}

static int ath9k_hif_usb_alloc_reg_in_urbs(struct hif_device_usb *hif_dev)
{
        struct rx_buf *rx_buf = NULL;
        struct sk_buff *skb = NULL;
        struct urb *urb = NULL;
        int i, ret;

        init_usb_anchor(&hif_dev->reg_in_submitted);

        for (i = 0; i < MAX_REG_IN_URB_NUM; i++) {

                rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL);
                if (!rx_buf) {
                        ret = -ENOMEM;
                        goto err_rxb;
                }

                /* Allocate URB */
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (urb == NULL) {
                        ret = -ENOMEM;
                        goto err_urb;
                }

                /* Allocate buffer */
                skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_KERNEL);
                if (!skb) {
                        ret = -ENOMEM;
                        goto err_skb;
                }

                rx_buf->hif_dev = hif_dev;
                rx_buf->skb = skb;

                usb_fill_int_urb(urb, hif_dev->udev,
                                  usb_rcvintpipe(hif_dev->udev,
                                                  USB_REG_IN_PIPE),
                                  skb->data, MAX_REG_IN_BUF_SIZE,
                                  ath9k_hif_usb_reg_in_cb, rx_buf, 1);

                /* Anchor URB */
                usb_anchor_urb(urb, &hif_dev->reg_in_submitted);

                /* Submit URB */
                ret = usb_submit_urb(urb, GFP_KERNEL);
                if (ret) {
                        usb_unanchor_urb(urb);
                        goto err_submit;
                }

                /*
                 * Drop reference count.
                 * This ensures that the URB is freed when killing them.
                 */
                usb_free_urb(urb);
        }

        return 0;

err_submit:
        kfree_skb(skb);
err_skb:
        usb_free_urb(urb);
err_urb:
        kfree(rx_buf);
err_rxb:
        ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev);
        return ret;
}

static int ath9k_hif_usb_alloc_urbs(struct hif_device_usb *hif_dev)
{
        /* Register Write */
        init_usb_anchor(&hif_dev->regout_submitted);

        /* TX */
        if (ath9k_hif_usb_alloc_tx_urbs(hif_dev) < 0)
                goto err;

        /* RX */
        if (ath9k_hif_usb_alloc_rx_urbs(hif_dev) < 0)
                goto err_rx;

        /* Register Read */
        if (ath9k_hif_usb_alloc_reg_in_urbs(hif_dev) < 0)
                goto err_reg;

        return 0;
err_reg:
        ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
err_rx:
        ath9k_hif_usb_dealloc_tx_urbs(hif_dev);
err:
        return -ENOMEM;
}

void ath9k_hif_usb_dealloc_urbs(struct hif_device_usb *hif_dev)
{
        usb_kill_anchored_urbs(&hif_dev->regout_submitted);
        ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev);
        ath9k_hif_usb_dealloc_tx_urbs(hif_dev);
        ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
}

static int ath9k_hif_usb_download_fw(struct hif_device_usb *hif_dev)
{
        int transfer, err;
        const void *data = hif_dev->fw_data;
        size_t len = hif_dev->fw_size;
        u32 addr = AR9271_FIRMWARE;
        u8 *buf = kzalloc(4096, GFP_KERNEL);
        u32 firm_offset;

        if (!buf)
                return -ENOMEM;

        while (len) {
                transfer = min_t(size_t, len, 4096);
                memcpy(buf, data, transfer);

                err = usb_control_msg(hif_dev->udev,
                                      usb_sndctrlpipe(hif_dev->udev, 0),
                                      FIRMWARE_DOWNLOAD, 0x40 | USB_DIR_OUT,
                                      addr >> 8, 0, buf, transfer,
                                      USB_MSG_TIMEOUT);
                if (err < 0) {
                        kfree(buf);
                        return err;
                }

                len -= transfer;
                data += transfer;
                addr += transfer;
        }
        kfree(buf);

        if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info))
                firm_offset = AR7010_FIRMWARE_TEXT;
        else
                firm_offset = AR9271_FIRMWARE_TEXT;

        /*
         * Issue FW download complete command to firmware.
         */
        err = usb_control_msg(hif_dev->udev, usb_sndctrlpipe(hif_dev->udev, 0),
                              FIRMWARE_DOWNLOAD_COMP,
                              0x40 | USB_DIR_OUT,
                              firm_offset >> 8, 0, NULL, 0, USB_MSG_TIMEOUT);
        if (err)
                return -EIO;

        dev_info(&hif_dev->udev->dev, "ath9k_htc: Transferred FW: %s, size: %ld\n",
                 hif_dev->fw_name, (unsigned long) hif_dev->fw_size);

        return 0;
}

static int ath9k_hif_usb_dev_init(struct hif_device_usb *hif_dev)
{
        int ret;

        ret = ath9k_hif_usb_download_fw(hif_dev);
        if (ret) {
                dev_err(&hif_dev->udev->dev,
                        "ath9k_htc: Firmware - %s download failed\n",
                        hif_dev->fw_name);
                return ret;
        }

        /* Alloc URBs */
        ret = ath9k_hif_usb_alloc_urbs(hif_dev);
        if (ret) {
                dev_err(&hif_dev->udev->dev,
                        "ath9k_htc: Unable to allocate URBs\n");
                return ret;
        }

        return 0;
}

static void ath9k_hif_usb_dev_deinit(struct hif_device_usb *hif_dev)
{
        ath9k_hif_usb_dealloc_urbs(hif_dev);
}

/*
 * If initialization fails or the FW cannot be retrieved,
 * detach the device.
 */
static void ath9k_hif_usb_firmware_fail(struct hif_device_usb *hif_dev)
{
        struct device *dev = &hif_dev->udev->dev;
        struct device *parent = dev->parent;

        complete_all(&hif_dev->fw_done);

        if (parent)
                device_lock(parent);

        device_release_driver(dev);

        if (parent)
                device_unlock(parent);
}

static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context);

/* taken from iwlwifi */
static int ath9k_hif_request_firmware(struct hif_device_usb *hif_dev,
                                      bool first)
{
        char index[8], *chip;
        int ret;

        if (first) {
                if (htc_use_dev_fw) {
                        hif_dev->fw_minor_index = FIRMWARE_MINOR_IDX_MAX + 1;
                        sprintf(index, "%s", "dev");
                } else {
                        hif_dev->fw_minor_index = FIRMWARE_MINOR_IDX_MAX;
                        sprintf(index, "%d", hif_dev->fw_minor_index);
                }
        } else {
                hif_dev->fw_minor_index--;
                sprintf(index, "%d", hif_dev->fw_minor_index);
        }

        /* test for FW 1.3 */
        if (MAJOR_VERSION_REQ == 1 && hif_dev->fw_minor_index == 3) {
                const char *filename;

                if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info))
                        filename = FIRMWARE_AR7010_1_1;
                else
                        filename = FIRMWARE_AR9271;

                /* expected fw locations:
                 * - htc_9271.fw   (stable version 1.3, depricated)
                 */
                snprintf(hif_dev->fw_name, sizeof(hif_dev->fw_name),
                         "%s", filename);

        } else if (hif_dev->fw_minor_index < FIRMWARE_MINOR_IDX_MIN) {
                dev_err(&hif_dev->udev->dev, "no suitable firmware found!\n");

                return -ENOENT;
        } else {
                if (IS_AR7010_DEVICE(hif_dev->usb_device_id->driver_info))
                        chip = "7010";
                else
                        chip = "9271";

                /* expected fw locations:
                 * - ath9k_htc/htc_9271-1.dev.0.fw (development version)
                 * - ath9k_htc/htc_9271-1.4.0.fw   (stable version)
                 */
                snprintf(hif_dev->fw_name, sizeof(hif_dev->fw_name),
                         "%s/htc_%s-%d.%s.0.fw", HTC_FW_PATH,
                         chip, MAJOR_VERSION_REQ, index);
        }

        ret = request_firmware_nowait(THIS_MODULE, true, hif_dev->fw_name,
                                      &hif_dev->udev->dev, GFP_KERNEL,
                                      hif_dev, ath9k_hif_usb_firmware_cb);
        if (ret) {
                dev_err(&hif_dev->udev->dev,
                        "ath9k_htc: Async request for firmware %s failed\n",
                        hif_dev->fw_name);
                return ret;
        }

        dev_info(&hif_dev->udev->dev, "ath9k_htc: Firmware %s requested\n",
                 hif_dev->fw_name);

        return ret;
}

static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context)
{
        struct hif_device_usb *hif_dev = context;
        int ret;

        if (!fw) {
                ret = ath9k_hif_request_firmware(hif_dev, false);
                if (!ret)
                        return;

                dev_err(&hif_dev->udev->dev,
                        "ath9k_htc: Failed to get firmware %s\n",
                        hif_dev->fw_name);
                goto err_fw;
        }

        hif_dev->htc_handle = ath9k_htc_hw_alloc(hif_dev, &hif_usb,
                                                 &hif_dev->udev->dev);
        if (hif_dev->htc_handle == NULL)
                goto err_dev_alloc;

        hif_dev->fw_data = fw->data;
        hif_dev->fw_size = fw->size;

        /* Proceed with initialization */

        ret = ath9k_hif_usb_dev_init(hif_dev);
        if (ret)
                goto err_dev_init;

        ret = ath9k_htc_hw_init(hif_dev->htc_handle,
                                &hif_dev->interface->dev,
                                hif_dev->usb_device_id->idProduct,
                                hif_dev->udev->product,
                                hif_dev->usb_device_id->driver_info);
        if (ret) {
                ret = -EINVAL;
                goto err_htc_hw_init;
        }

        release_firmware(fw);
        hif_dev->flags |= HIF_USB_READY;
        complete_all(&hif_dev->fw_done);

        return;

err_htc_hw_init:
        ath9k_hif_usb_dev_deinit(hif_dev);
err_dev_init:
        ath9k_htc_hw_free(hif_dev->htc_handle);
err_dev_alloc:
        release_firmware(fw);
err_fw:
        ath9k_hif_usb_firmware_fail(hif_dev);
}

/*
 * An exact copy of the function from zd1211rw.
 */
static int send_eject_command(struct usb_interface *interface)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_host_interface *iface_desc = interface->cur_altsetting;
        struct usb_endpoint_descriptor *endpoint;
        unsigned char *cmd;
        u8 bulk_out_ep;
        int r;

        if (iface_desc->desc.bNumEndpoints < 2)
                return -ENODEV;

        /* Find bulk out endpoint */
        for (r = 1; r >= 0; r--) {
                endpoint = &iface_desc->endpoint[r].desc;
                if (usb_endpoint_dir_out(endpoint) &&
                    usb_endpoint_xfer_bulk(endpoint)) {
                        bulk_out_ep = endpoint->bEndpointAddress;
                        break;
                }
        }
        if (r == -1) {
                dev_err(&udev->dev,
                        "ath9k_htc: Could not find bulk out endpoint\n");
                return -ENODEV;
        }

        cmd = kzalloc(31, GFP_KERNEL);
        if (cmd == NULL)
                return -ENODEV;

        /* USB bulk command block */
        cmd[0] = 0x55;        /* bulk command signature */
        cmd[1] = 0x53;        /* bulk command signature */
        cmd[2] = 0x42;        /* bulk command signature */
        cmd[3] = 0x43;        /* bulk command signature */
        cmd[14] = 6;        /* command length */

        cmd[15] = 0x1b;        /* SCSI command: START STOP UNIT */
        cmd[19] = 0x2;        /* eject disc */

        dev_info(&udev->dev, "Ejecting storage device...\n");
        r = usb_bulk_msg(udev, usb_sndbulkpipe(udev, bulk_out_ep),
                cmd, 31, NULL, 2 * USB_MSG_TIMEOUT);
        kfree(cmd);
        if (r)
                return r;

        /* At this point, the device disconnects and reconnects with the real
         * ID numbers. */

        usb_set_intfdata(interface, NULL);
        return 0;
}

static int ath9k_hif_usb_probe(struct usb_interface *interface,
                               const struct usb_device_id *id)
{
        struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in, *int_out;
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_host_interface *alt;
        struct hif_device_usb *hif_dev;
        int ret = 0;

        /* Verify the expected endpoints are present */
        alt = interface->cur_altsetting;
        if (usb_find_common_endpoints(alt, &bulk_in, &bulk_out, &int_in, &int_out) < 0 ||
            usb_endpoint_num(bulk_in) != USB_WLAN_RX_PIPE ||
            usb_endpoint_num(bulk_out) != USB_WLAN_TX_PIPE ||
            usb_endpoint_num(int_in) != USB_REG_IN_PIPE ||
            usb_endpoint_num(int_out) != USB_REG_OUT_PIPE) {
                dev_err(&udev->dev,
                        "ath9k_htc: Device endpoint numbers are not the expected ones\n");
                return -ENODEV;
        }

        if (id->driver_info == STORAGE_DEVICE)
                return send_eject_command(interface);

        hif_dev = kzalloc(sizeof(struct hif_device_usb), GFP_KERNEL);
        if (!hif_dev) {
                ret = -ENOMEM;
                goto err_alloc;
        }

        usb_get_dev(udev);

        hif_dev->udev = udev;
        hif_dev->interface = interface;
        hif_dev->usb_device_id = id;
#ifdef CONFIG_PM
        udev->reset_resume = 1;
#endif
        usb_set_intfdata(interface, hif_dev);

        init_completion(&hif_dev->fw_done);

        ret = ath9k_hif_request_firmware(hif_dev, true);
        if (ret)
                goto err_fw_req;

        return ret;

err_fw_req:
        usb_set_intfdata(interface, NULL);
        kfree(hif_dev);
        usb_put_dev(udev);
err_alloc:
        return ret;
}

static void ath9k_hif_usb_reboot(struct usb_device *udev)
{
        u32 reboot_cmd = 0xffffffff;
        void *buf;
        int ret;

        buf = kmemdup(&reboot_cmd, 4, GFP_KERNEL);
        if (!buf)
                return;

        ret = usb_interrupt_msg(udev, usb_sndintpipe(udev, USB_REG_OUT_PIPE),
                           buf, 4, NULL, USB_MSG_TIMEOUT);
        if (ret)
                dev_err(&udev->dev, "ath9k_htc: USB reboot failed\n");

        kfree(buf);
}

static void ath9k_hif_usb_disconnect(struct usb_interface *interface)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct hif_device_usb *hif_dev = usb_get_intfdata(interface);
        bool unplugged = udev->state == USB_STATE_NOTATTACHED;

        if (!hif_dev)
                return;

        wait_for_completion(&hif_dev->fw_done);

        if (hif_dev->flags & HIF_USB_READY) {
                ath9k_htc_hw_deinit(hif_dev->htc_handle, unplugged);
                ath9k_htc_hw_free(hif_dev->htc_handle);
        }

        usb_set_intfdata(interface, NULL);

        /* If firmware was loaded we should drop it
         * go back to first stage bootloader. */
        if (!unplugged && (hif_dev->flags & HIF_USB_READY))
                ath9k_hif_usb_reboot(udev);

        kfree(hif_dev);
        dev_info(&udev->dev, "ath9k_htc: USB layer deinitialized\n");
        usb_put_dev(udev);
}

#ifdef CONFIG_PM
static int ath9k_hif_usb_suspend(struct usb_interface *interface,
                                 pm_message_t message)
{
        struct hif_device_usb *hif_dev = usb_get_intfdata(interface);

        /*
         * The device has to be set to FULLSLEEP mode in case no
         * interface is up.
         */
        if (!(hif_dev->flags & HIF_USB_START))
                ath9k_htc_suspend(hif_dev->htc_handle);

        wait_for_completion(&hif_dev->fw_done);

        if (hif_dev->flags & HIF_USB_READY)
                ath9k_hif_usb_dealloc_urbs(hif_dev);

        return 0;
}

static int ath9k_hif_usb_resume(struct usb_interface *interface)
{
        struct hif_device_usb *hif_dev = usb_get_intfdata(interface);
        struct htc_target *htc_handle = hif_dev->htc_handle;
        const struct firmware *fw;
        int ret;

        ret = ath9k_hif_usb_alloc_urbs(hif_dev);
        if (ret)
                return ret;

        if (!(hif_dev->flags & HIF_USB_READY)) {
                ret = -EIO;
                goto fail_resume;
        }

        /* request cached firmware during suspend/resume cycle */
        ret = request_firmware(&fw, hif_dev->fw_name,
                               &hif_dev->udev->dev);
        if (ret)
                goto fail_resume;

        hif_dev->fw_data = fw->data;
        hif_dev->fw_size = fw->size;
        ret = ath9k_hif_usb_download_fw(hif_dev);
        release_firmware(fw);
        if (ret)
                goto fail_resume;

        mdelay(100);

        ret = ath9k_htc_resume(htc_handle);

        if (ret)
                goto fail_resume;

        return 0;

fail_resume:
        ath9k_hif_usb_dealloc_urbs(hif_dev);

        return ret;
}
#endif

static struct usb_driver ath9k_hif_usb_driver = {
        .name = KBUILD_MODNAME,
        .probe = ath9k_hif_usb_probe,
        .disconnect = ath9k_hif_usb_disconnect,
#ifdef CONFIG_PM
        .suspend = ath9k_hif_usb_suspend,
        .resume = ath9k_hif_usb_resume,
        .reset_resume = ath9k_hif_usb_resume,
#endif
        .id_table = ath9k_hif_usb_ids,
        .soft_unbind = 1,
        .disable_hub_initiated_lpm = 1,
};

int ath9k_hif_usb_init(void)
{
        return usb_register(&ath9k_hif_usb_driver);
}

void ath9k_hif_usb_exit(void)
{
        usb_deregister(&ath9k_hif_usb_driver);
}























































   40 






































































































   11 



   11 


















   11 

   11 

   11 

   11 















   11 

   11 

   11 

   11 


























































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Power Management Quality of Service (PM QoS) support base.
 *
 * Copyright (C) 2020 Intel Corporation
 *
 * Authors:
 *        Mark Gross <mgross@linux.intel.com>
 *        Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 *
 * Provided here is an interface for specifying PM QoS dependencies.  It allows
 * entities depending on QoS constraints to register their requests which are
 * aggregated as appropriate to produce effective constraints (target values)
 * that can be monitored by entities needing to respect them, either by polling
 * or through a built-in notification mechanism.
 *
 * In addition to the basic functionality, more specific interfaces for managing
 * global CPU latency QoS requests and frequency QoS requests are provided.
 */

/*#define DEBUG*/

#include <linux/pm_qos.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/string.h>
#include <linux/platform_device.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>

#include <linux/uaccess.h>
#include <linux/export.h>
#include <trace/events/power.h>

/*
 * locking rule: all changes to constraints or notifiers lists
 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
 * held, taken with _irqsave.  One lock to rule them all
 */
static DEFINE_SPINLOCK(pm_qos_lock);

/**
 * pm_qos_read_value - Return the current effective constraint value.
 * @c: List of PM QoS constraint requests.
 */
s32 pm_qos_read_value(struct pm_qos_constraints *c)
{
        return READ_ONCE(c->target_value);
}

static int pm_qos_get_value(struct pm_qos_constraints *c)
{
        if (plist_head_empty(&c->list))
                return c->no_constraint_value;

        switch (c->type) {
        case PM_QOS_MIN:
                return plist_first(&c->list)->prio;

        case PM_QOS_MAX:
                return plist_last(&c->list)->prio;

        default:
                WARN(1, "Unknown PM QoS type in %s\n", __func__);
                return PM_QOS_DEFAULT_VALUE;
        }
}

static void pm_qos_set_value(struct pm_qos_constraints *c, s32 value)
{
        WRITE_ONCE(c->target_value, value);
}

/**
 * pm_qos_update_target - Update a list of PM QoS constraint requests.
 * @c: List of PM QoS requests.
 * @node: Target list entry.
 * @action: Action to carry out (add, update or remove).
 * @value: New request value for the target list entry.
 *
 * Update the given list of PM QoS constraint requests, @c, by carrying an
 * @action involving the @node list entry and @value on it.
 *
 * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node
 * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store
 * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove
 * @node from the list, ignore @value).
 *
 * Return: 1 if the aggregate constraint value has changed, 0  otherwise.
 */
int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
                         enum pm_qos_req_action action, int value)
{
        int prev_value, curr_value, new_value;
        unsigned long flags;

        spin_lock_irqsave(&pm_qos_lock, flags);

        prev_value = pm_qos_get_value(c);
        if (value == PM_QOS_DEFAULT_VALUE)
                new_value = c->default_value;
        else
                new_value = value;

        switch (action) {
        case PM_QOS_REMOVE_REQ:
                plist_del(node, &c->list);
                break;
        case PM_QOS_UPDATE_REQ:
                /*
                 * To change the list, atomically remove, reinit with new value
                 * and add, then see if the aggregate has changed.
                 */
                plist_del(node, &c->list);
                fallthrough;
        case PM_QOS_ADD_REQ:
                plist_node_init(node, new_value);
                plist_add(node, &c->list);
                break;
        default:
                /* no action */
                ;
        }

        curr_value = pm_qos_get_value(c);
        pm_qos_set_value(c, curr_value);

        spin_unlock_irqrestore(&pm_qos_lock, flags);

        trace_pm_qos_update_target(action, prev_value, curr_value);

        if (prev_value == curr_value)
                return 0;

        if (c->notifiers)
                blocking_notifier_call_chain(c->notifiers, curr_value, NULL);

        return 1;
}

/**
 * pm_qos_flags_remove_req - Remove device PM QoS flags request.
 * @pqf: Device PM QoS flags set to remove the request from.
 * @req: Request to remove from the set.
 */
static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf,
                                    struct pm_qos_flags_request *req)
{
        s32 val = 0;

        list_del(&req->node);
        list_for_each_entry(req, &pqf->list, node)
                val |= req->flags;

        pqf->effective_flags = val;
}

/**
 * pm_qos_update_flags - Update a set of PM QoS flags.
 * @pqf: Set of PM QoS flags to update.
 * @req: Request to add to the set, to modify, or to remove from the set.
 * @action: Action to take on the set.
 * @val: Value of the request to add or modify.
 *
 * Return: 1 if the aggregate constraint value has changed, 0 otherwise.
 */
bool pm_qos_update_flags(struct pm_qos_flags *pqf,
                         struct pm_qos_flags_request *req,
                         enum pm_qos_req_action action, s32 val)
{
        unsigned long irqflags;
        s32 prev_value, curr_value;

        spin_lock_irqsave(&pm_qos_lock, irqflags);

        prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;

        switch (action) {
        case PM_QOS_REMOVE_REQ:
                pm_qos_flags_remove_req(pqf, req);
                break;
        case PM_QOS_UPDATE_REQ:
                pm_qos_flags_remove_req(pqf, req);
                fallthrough;
        case PM_QOS_ADD_REQ:
                req->flags = val;
                INIT_LIST_HEAD(&req->node);
                list_add_tail(&req->node, &pqf->list);
                pqf->effective_flags |= val;
                break;
        default:
                /* no action */
                ;
        }

        curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;

        spin_unlock_irqrestore(&pm_qos_lock, irqflags);

        trace_pm_qos_update_flags(action, prev_value, curr_value);

        return prev_value != curr_value;
}

#ifdef CONFIG_CPU_IDLE
/* Definitions related to the CPU latency QoS. */

static struct pm_qos_constraints cpu_latency_constraints = {
        .list = PLIST_HEAD_INIT(cpu_latency_constraints.list),
        .target_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE,
        .default_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE,
        .no_constraint_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE,
        .type = PM_QOS_MIN,
};

static inline bool cpu_latency_qos_value_invalid(s32 value)
{
        return value < 0 && value != PM_QOS_DEFAULT_VALUE;
}

/**
 * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit.
 */
s32 cpu_latency_qos_limit(void)
{
        return pm_qos_read_value(&cpu_latency_constraints);
}

/**
 * cpu_latency_qos_request_active - Check the given PM QoS request.
 * @req: PM QoS request to check.
 *
 * Return: 'true' if @req has been added to the CPU latency QoS list, 'false'
 * otherwise.
 */
bool cpu_latency_qos_request_active(struct pm_qos_request *req)
{
        return req->qos == &cpu_latency_constraints;
}
EXPORT_SYMBOL_GPL(cpu_latency_qos_request_active);

static void cpu_latency_qos_apply(struct pm_qos_request *req,
                                  enum pm_qos_req_action action, s32 value)
{
        int ret = pm_qos_update_target(req->qos, &req->node, action, value);
        if (ret > 0)
                wake_up_all_idle_cpus();
}

/**
 * cpu_latency_qos_add_request - Add new CPU latency QoS request.
 * @req: Pointer to a preallocated handle.
 * @value: Requested constraint value.
 *
 * Use @value to initialize the request handle pointed to by @req, insert it as
 * a new entry to the CPU latency QoS list and recompute the effective QoS
 * constraint for that list.
 *
 * Callers need to save the handle for later use in updates and removal of the
 * QoS request represented by it.
 */
void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value)
{
        if (!req || cpu_latency_qos_value_invalid(value))
                return;

        if (cpu_latency_qos_request_active(req)) {
                WARN(1, KERN_ERR "%s called for already added request\n", __func__);
                return;
        }

        trace_pm_qos_add_request(value);

        req->qos = &cpu_latency_constraints;
        cpu_latency_qos_apply(req, PM_QOS_ADD_REQ, value);
}
EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request);

/**
 * cpu_latency_qos_update_request - Modify existing CPU latency QoS request.
 * @req : QoS request to update.
 * @new_value: New requested constraint value.
 *
 * Use @new_value to update the QoS request represented by @req in the CPU
 * latency QoS list along with updating the effective constraint value for that
 * list.
 */
void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value)
{
        if (!req || cpu_latency_qos_value_invalid(new_value))
                return;

        if (!cpu_latency_qos_request_active(req)) {
                WARN(1, KERN_ERR "%s called for unknown object\n", __func__);
                return;
        }

        trace_pm_qos_update_request(new_value);

        if (new_value == req->node.prio)
                return;

        cpu_latency_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
}
EXPORT_SYMBOL_GPL(cpu_latency_qos_update_request);

/**
 * cpu_latency_qos_remove_request - Remove existing CPU latency QoS request.
 * @req: QoS request to remove.
 *
 * Remove the CPU latency QoS request represented by @req from the CPU latency
 * QoS list along with updating the effective constraint value for that list.
 */
void cpu_latency_qos_remove_request(struct pm_qos_request *req)
{
        if (!req)
                return;

        if (!cpu_latency_qos_request_active(req)) {
                WARN(1, KERN_ERR "%s called for unknown object\n", __func__);
                return;
        }

        trace_pm_qos_remove_request(PM_QOS_DEFAULT_VALUE);

        cpu_latency_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
        memset(req, 0, sizeof(*req));
}
EXPORT_SYMBOL_GPL(cpu_latency_qos_remove_request);

/* User space interface to the CPU latency QoS via misc device. */

static int cpu_latency_qos_open(struct inode *inode, struct file *filp)
{
        struct pm_qos_request *req;

        req = kzalloc(sizeof(*req), GFP_KERNEL);
        if (!req)
                return -ENOMEM;

        cpu_latency_qos_add_request(req, PM_QOS_DEFAULT_VALUE);
        filp->private_data = req;

        return 0;
}

static int cpu_latency_qos_release(struct inode *inode, struct file *filp)
{
        struct pm_qos_request *req = filp->private_data;

        filp->private_data = NULL;

        cpu_latency_qos_remove_request(req);
        kfree(req);

        return 0;
}

static ssize_t cpu_latency_qos_read(struct file *filp, char __user *buf,
                                    size_t count, loff_t *f_pos)
{
        struct pm_qos_request *req = filp->private_data;
        unsigned long flags;
        s32 value;

        if (!req || !cpu_latency_qos_request_active(req))
                return -EINVAL;

        spin_lock_irqsave(&pm_qos_lock, flags);
        value = pm_qos_get_value(&cpu_latency_constraints);
        spin_unlock_irqrestore(&pm_qos_lock, flags);

        return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
}

static ssize_t cpu_latency_qos_write(struct file *filp, const char __user *buf,
                                     size_t count, loff_t *f_pos)
{
        s32 value;

        if (count == sizeof(s32)) {
                if (copy_from_user(&value, buf, sizeof(s32)))
                        return -EFAULT;
        } else {
                int ret;

                ret = kstrtos32_from_user(buf, count, 16, &value);
                if (ret)
                        return ret;
        }

        cpu_latency_qos_update_request(filp->private_data, value);

        return count;
}

static const struct file_operations cpu_latency_qos_fops = {
        .write = cpu_latency_qos_write,
        .read = cpu_latency_qos_read,
        .open = cpu_latency_qos_open,
        .release = cpu_latency_qos_release,
        .llseek = noop_llseek,
};

static struct miscdevice cpu_latency_qos_miscdev = {
        .minor = MISC_DYNAMIC_MINOR,
        .name = "cpu_dma_latency",
        .fops = &cpu_latency_qos_fops,
};

static int __init cpu_latency_qos_init(void)
{
        int ret;

        ret = misc_register(&cpu_latency_qos_miscdev);
        if (ret < 0)
                pr_err("%s: %s setup failed\n", __func__,
                       cpu_latency_qos_miscdev.name);

        return ret;
}
late_initcall(cpu_latency_qos_init);
#endif /* CONFIG_CPU_IDLE */

/* Definitions related to the frequency QoS below. */

static inline bool freq_qos_value_invalid(s32 value)
{
        return value < 0 && value != PM_QOS_DEFAULT_VALUE;
}

/**
 * freq_constraints_init - Initialize frequency QoS constraints.
 * @qos: Frequency QoS constraints to initialize.
 */
void freq_constraints_init(struct freq_constraints *qos)
{
        struct pm_qos_constraints *c;

        c = &qos->min_freq;
        plist_head_init(&c->list);
        c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE;
        c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE;
        c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE;
        c->type = PM_QOS_MAX;
        c->notifiers = &qos->min_freq_notifiers;
        BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);

        c = &qos->max_freq;
        plist_head_init(&c->list);
        c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE;
        c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE;
        c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE;
        c->type = PM_QOS_MIN;
        c->notifiers = &qos->max_freq_notifiers;
        BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
}

/**
 * freq_qos_read_value - Get frequency QoS constraint for a given list.
 * @qos: Constraints to evaluate.
 * @type: QoS request type.
 */
s32 freq_qos_read_value(struct freq_constraints *qos,
                        enum freq_qos_req_type type)
{
        s32 ret;

        switch (type) {
        case FREQ_QOS_MIN:
                ret = IS_ERR_OR_NULL(qos) ?
                        FREQ_QOS_MIN_DEFAULT_VALUE :
                        pm_qos_read_value(&qos->min_freq);
                break;
        case FREQ_QOS_MAX:
                ret = IS_ERR_OR_NULL(qos) ?
                        FREQ_QOS_MAX_DEFAULT_VALUE :
                        pm_qos_read_value(&qos->max_freq);
                break;
        default:
                WARN_ON(1);
                ret = 0;
        }

        return ret;
}

/**
 * freq_qos_apply - Add/modify/remove frequency QoS request.
 * @req: Constraint request to apply.
 * @action: Action to perform (add/update/remove).
 * @value: Value to assign to the QoS request.
 *
 * This is only meant to be called from inside pm_qos, not drivers.
 */
int freq_qos_apply(struct freq_qos_request *req,
                          enum pm_qos_req_action action, s32 value)
{
        int ret;

        switch(req->type) {
        case FREQ_QOS_MIN:
                ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode,
                                           action, value);
                break;
        case FREQ_QOS_MAX:
                ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode,
                                           action, value);
                break;
        default:
                ret = -EINVAL;
        }

        return ret;
}

/**
 * freq_qos_add_request - Insert new frequency QoS request into a given list.
 * @qos: Constraints to update.
 * @req: Preallocated request object.
 * @type: Request type.
 * @value: Request value.
 *
 * Insert a new entry into the @qos list of requests, recompute the effective
 * QoS constraint value for that list and initialize the @req object.  The
 * caller needs to save that object for later use in updates and removal.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_add_request(struct freq_constraints *qos,
                         struct freq_qos_request *req,
                         enum freq_qos_req_type type, s32 value)
{
        int ret;

        if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value))
                return -EINVAL;

        if (WARN(freq_qos_request_active(req),
                 "%s() called for active request\n", __func__))
                return -EINVAL;

        req->qos = qos;
        req->type = type;
        ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value);
        if (ret < 0) {
                req->qos = NULL;
                req->type = 0;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_add_request);

/**
 * freq_qos_update_request - Modify existing frequency QoS request.
 * @req: Request to modify.
 * @new_value: New request value.
 *
 * Update an existing frequency QoS request along with the effective constraint
 * value for the list of requests it belongs to.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
{
        if (!req || freq_qos_value_invalid(new_value))
                return -EINVAL;

        if (WARN(!freq_qos_request_active(req),
                 "%s() called for unknown object\n", __func__))
                return -EINVAL;

        if (req->pnode.prio == new_value)
                return 0;

        return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
}
EXPORT_SYMBOL_GPL(freq_qos_update_request);

/**
 * freq_qos_remove_request - Remove frequency QoS request from its list.
 * @req: Request to remove.
 *
 * Remove the given frequency QoS request from the list of constraints it
 * belongs to and recompute the effective constraint value for that list.
 *
 * Return 1 if the effective constraint value has changed, 0 if the effective
 * constraint value has not changed, or a negative error code on failures.
 */
int freq_qos_remove_request(struct freq_qos_request *req)
{
        int ret;

        if (!req)
                return -EINVAL;

        if (WARN(!freq_qos_request_active(req),
                 "%s() called for unknown object\n", __func__))
                return -EINVAL;

        ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
        req->qos = NULL;
        req->type = 0;

        return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_remove_request);

/**
 * freq_qos_add_notifier - Add frequency QoS change notifier.
 * @qos: List of requests to add the notifier to.
 * @type: Request type.
 * @notifier: Notifier block to add.
 */
int freq_qos_add_notifier(struct freq_constraints *qos,
                          enum freq_qos_req_type type,
                          struct notifier_block *notifier)
{
        int ret;

        if (IS_ERR_OR_NULL(qos) || !notifier)
                return -EINVAL;

        switch (type) {
        case FREQ_QOS_MIN:
                ret = blocking_notifier_chain_register(qos->min_freq.notifiers,
                                                       notifier);
                break;
        case FREQ_QOS_MAX:
                ret = blocking_notifier_chain_register(qos->max_freq.notifiers,
                                                       notifier);
                break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_add_notifier);

/**
 * freq_qos_remove_notifier - Remove frequency QoS change notifier.
 * @qos: List of requests to remove the notifier from.
 * @type: Request type.
 * @notifier: Notifier block to remove.
 */
int freq_qos_remove_notifier(struct freq_constraints *qos,
                             enum freq_qos_req_type type,
                             struct notifier_block *notifier)
{
        int ret;

        if (IS_ERR_OR_NULL(qos) || !notifier)
                return -EINVAL;

        switch (type) {
        case FREQ_QOS_MIN:
                ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers,
                                                         notifier);
                break;
        case FREQ_QOS_MAX:
                ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers,
                                                         notifier);
                break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);





























































































































































    2 
    2 
    2 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 
    2 








    1 































































































































































































































































































































































































































    2 







    2 





    2 
    2 

    2 




    2 











    2 





    2 
    2 
    2 
    2 
    2 
    1 





























































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
// SPDX-License-Identifier: GPL-2.0+
/*
 * cdc-acm.c
 *
 * Copyright (c) 1999 Armin Fuerst        <fuerst@in.tum.de>
 * Copyright (c) 1999 Pavel Machek        <pavel@ucw.cz>
 * Copyright (c) 1999 Johannes Erdfelt        <johannes@erdfelt.com>
 * Copyright (c) 2000 Vojtech Pavlik        <vojtech@suse.cz>
 * Copyright (c) 2004 Oliver Neukum        <oliver@neukum.name>
 * Copyright (c) 2005 David Kubicek        <dave@awk.cz>
 * Copyright (c) 2011 Johan Hovold        <jhovold@gmail.com>
 *
 * USB Abstract Control Model driver for USB modems and ISDN adapters
 *
 * Sponsored by SuSE
 */

#undef DEBUG
#undef VERBOSE_DEBUG

#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/log2.h>
#include <linux/tty.h>
#include <linux/serial.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/tty_ldisc.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/cdc.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include <linux/idr.h>
#include <linux/list.h>

#include "cdc-acm.h"


#define DRIVER_AUTHOR "Armin Fuerst, Pavel Machek, Johannes Erdfelt, Vojtech Pavlik, David Kubicek, Johan Hovold"
#define DRIVER_DESC "USB Abstract Control Model driver for USB modems and ISDN adapters"

static struct usb_driver acm_driver;
static struct tty_driver *acm_tty_driver;

static DEFINE_IDR(acm_minors);
static DEFINE_MUTEX(acm_minors_lock);

static void acm_tty_set_termios(struct tty_struct *tty,
                                const struct ktermios *termios_old);

/*
 * acm_minors accessors
 */

/*
 * Look up an ACM structure by minor. If found and not disconnected, increment
 * its refcount and return it with its mutex held.
 */
static struct acm *acm_get_by_minor(unsigned int minor)
{
        struct acm *acm;

        mutex_lock(&acm_minors_lock);
        acm = idr_find(&acm_minors, minor);
        if (acm) {
                mutex_lock(&acm->mutex);
                if (acm->disconnected) {
                        mutex_unlock(&acm->mutex);
                        acm = NULL;
                } else {
                        tty_port_get(&acm->port);
                        mutex_unlock(&acm->mutex);
                }
        }
        mutex_unlock(&acm_minors_lock);
        return acm;
}

/*
 * Try to find an available minor number and if found, associate it with 'acm'.
 */
static int acm_alloc_minor(struct acm *acm)
{
        int minor;

        mutex_lock(&acm_minors_lock);
        minor = idr_alloc(&acm_minors, acm, 0, ACM_TTY_MINORS, GFP_KERNEL);
        mutex_unlock(&acm_minors_lock);

        return minor;
}

/* Release the minor number associated with 'acm'.  */
static void acm_release_minor(struct acm *acm)
{
        mutex_lock(&acm_minors_lock);
        idr_remove(&acm_minors, acm->minor);
        mutex_unlock(&acm_minors_lock);
}

/*
 * Functions for ACM control messages.
 */

static int acm_ctrl_msg(struct acm *acm, int request, int value,
                                                        void *buf, int len)
{
        int retval;

        retval = usb_autopm_get_interface(acm->control);
        if (retval)
                return retval;

        retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
                request, USB_RT_ACM, value,
                acm->control->altsetting[0].desc.bInterfaceNumber,
                buf, len, USB_CTRL_SET_TIMEOUT);

        dev_dbg(&acm->control->dev,
                "%s - rq 0x%02x, val %#x, len %#x, result %d\n",
                __func__, request, value, len, retval);

        usb_autopm_put_interface(acm->control);

        return retval < 0 ? retval : 0;
}

/* devices aren't required to support these requests.
 * the cdc acm descriptor tells whether they do...
 */
static inline int acm_set_control(struct acm *acm, int control)
{
        if (acm->quirks & QUIRK_CONTROL_LINE_STATE)
                return -EOPNOTSUPP;

        return acm_ctrl_msg(acm, USB_CDC_REQ_SET_CONTROL_LINE_STATE,
                        control, NULL, 0);
}

#define acm_set_line(acm, line) \
        acm_ctrl_msg(acm, USB_CDC_REQ_SET_LINE_CODING, 0, line, sizeof *(line))
#define acm_send_break(acm, ms) \
        acm_ctrl_msg(acm, USB_CDC_REQ_SEND_BREAK, ms, NULL, 0)

static void acm_poison_urbs(struct acm *acm)
{
        int i;

        usb_poison_urb(acm->ctrlurb);
        for (i = 0; i < ACM_NW; i++)
                usb_poison_urb(acm->wb[i].urb);
        for (i = 0; i < acm->rx_buflimit; i++)
                usb_poison_urb(acm->read_urbs[i]);
}

static void acm_unpoison_urbs(struct acm *acm)
{
        int i;

        for (i = 0; i < acm->rx_buflimit; i++)
                usb_unpoison_urb(acm->read_urbs[i]);
        for (i = 0; i < ACM_NW; i++)
                usb_unpoison_urb(acm->wb[i].urb);
        usb_unpoison_urb(acm->ctrlurb);
}


/*
 * Write buffer management.
 * All of these assume proper locks taken by the caller.
 */

static int acm_wb_alloc(struct acm *acm)
{
        int i, wbn;
        struct acm_wb *wb;

        wbn = 0;
        i = 0;
        for (;;) {
                wb = &acm->wb[wbn];
                if (!wb->use) {
                        wb->use = true;
                        wb->len = 0;
                        return wbn;
                }
                wbn = (wbn + 1) % ACM_NW;
                if (++i >= ACM_NW)
                        return -1;
        }
}

static int acm_wb_is_avail(struct acm *acm)
{
        int i, n;
        unsigned long flags;

        n = ACM_NW;
        spin_lock_irqsave(&acm->write_lock, flags);
        for (i = 0; i < ACM_NW; i++)
                if(acm->wb[i].use)
                        n--;
        spin_unlock_irqrestore(&acm->write_lock, flags);
        return n;
}

/*
 * Finish write. Caller must hold acm->write_lock
 */
static void acm_write_done(struct acm *acm, struct acm_wb *wb)
{
        wb->use = false;
        acm->transmitting--;
        usb_autopm_put_interface_async(acm->control);
}

/*
 * Poke write.
 *
 * the caller is responsible for locking
 */

static int acm_start_wb(struct acm *acm, struct acm_wb *wb)
{
        int rc;

        acm->transmitting++;

        wb->urb->transfer_buffer = wb->buf;
        wb->urb->transfer_dma = wb->dmah;
        wb->urb->transfer_buffer_length = wb->len;
        wb->urb->dev = acm->dev;

        rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
        if (rc < 0) {
                if (rc != -EPERM)
                        dev_err(&acm->data->dev,
                                "%s - usb_submit_urb(write bulk) failed: %d\n",
                                __func__, rc);
                acm_write_done(acm, wb);
        }
        return rc;
}

/*
 * attributes exported through sysfs
 */
static ssize_t bmCapabilities_show
(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct acm *acm = usb_get_intfdata(intf);

        return sprintf(buf, "%d", acm->ctrl_caps);
}
static DEVICE_ATTR_RO(bmCapabilities);

static ssize_t wCountryCodes_show
(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct acm *acm = usb_get_intfdata(intf);

        memcpy(buf, acm->country_codes, acm->country_code_size);
        return acm->country_code_size;
}

static DEVICE_ATTR_RO(wCountryCodes);

static ssize_t iCountryCodeRelDate_show
(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct acm *acm = usb_get_intfdata(intf);

        return sprintf(buf, "%d", acm->country_rel_date);
}

static DEVICE_ATTR_RO(iCountryCodeRelDate);
/*
 * Interrupt handlers for various ACM device responses
 */

static void acm_process_notification(struct acm *acm, unsigned char *buf)
{
        int newctrl;
        int difference;
        unsigned long flags;
        struct usb_cdc_notification *dr = (struct usb_cdc_notification *)buf;
        unsigned char *data = buf + sizeof(struct usb_cdc_notification);

        switch (dr->bNotificationType) {
        case USB_CDC_NOTIFY_NETWORK_CONNECTION:
                dev_dbg(&acm->control->dev,
                        "%s - network connection: %d\n", __func__, dr->wValue);
                break;

        case USB_CDC_NOTIFY_SERIAL_STATE:
                if (le16_to_cpu(dr->wLength) != 2) {
                        dev_dbg(&acm->control->dev,
                                "%s - malformed serial state\n", __func__);
                        break;
                }

                newctrl = get_unaligned_le16(data);
                dev_dbg(&acm->control->dev,
                        "%s - serial state: 0x%x\n", __func__, newctrl);

                if (!acm->clocal && (acm->ctrlin & ~newctrl & USB_CDC_SERIAL_STATE_DCD)) {
                        dev_dbg(&acm->control->dev,
                                "%s - calling hangup\n", __func__);
                        tty_port_tty_hangup(&acm->port, false);
                }

                difference = acm->ctrlin ^ newctrl;

                if ((difference & USB_CDC_SERIAL_STATE_DCD) && acm->port.tty) {
                        struct tty_ldisc *ld = tty_ldisc_ref(acm->port.tty);
                        if (ld) {
                                if (ld->ops->dcd_change)
                                        ld->ops->dcd_change(acm->port.tty, newctrl & USB_CDC_SERIAL_STATE_DCD);
                                tty_ldisc_deref(ld);
                        }
                }

                spin_lock_irqsave(&acm->read_lock, flags);
                acm->ctrlin = newctrl;
                acm->oldcount = acm->iocount;

                if (difference & USB_CDC_SERIAL_STATE_DSR)
                        acm->iocount.dsr++;
                if (difference & USB_CDC_SERIAL_STATE_DCD)
                        acm->iocount.dcd++;
                if (newctrl & USB_CDC_SERIAL_STATE_BREAK) {
                        acm->iocount.brk++;
                        tty_insert_flip_char(&acm->port, 0, TTY_BREAK);
                }
                if (newctrl & USB_CDC_SERIAL_STATE_RING_SIGNAL)
                        acm->iocount.rng++;
                if (newctrl & USB_CDC_SERIAL_STATE_FRAMING)
                        acm->iocount.frame++;
                if (newctrl & USB_CDC_SERIAL_STATE_PARITY)
                        acm->iocount.parity++;
                if (newctrl & USB_CDC_SERIAL_STATE_OVERRUN)
                        acm->iocount.overrun++;
                spin_unlock_irqrestore(&acm->read_lock, flags);

                if (newctrl & USB_CDC_SERIAL_STATE_BREAK)
                        tty_flip_buffer_push(&acm->port);

                if (difference)
                        wake_up_all(&acm->wioctl);

                break;

        default:
                dev_dbg(&acm->control->dev,
                        "%s - unknown notification %d received: index %d len %d\n",
                        __func__,
                        dr->bNotificationType, dr->wIndex, dr->wLength);
        }
}

/* control interface reports status changes with "interrupt" transfers */
static void acm_ctrl_irq(struct urb *urb)
{
        struct acm *acm = urb->context;
        struct usb_cdc_notification *dr = urb->transfer_buffer;
        unsigned int current_size = urb->actual_length;
        unsigned int expected_size, copy_size, alloc_size;
        int retval;
        int status = urb->status;

        switch (status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(&acm->control->dev,
                        "%s - urb shutting down with status: %d\n",
                        __func__, status);
                return;
        default:
                dev_dbg(&acm->control->dev,
                        "%s - nonzero urb status received: %d\n",
                        __func__, status);
                goto exit;
        }

        usb_mark_last_busy(acm->dev);

        if (acm->nb_index)
                dr = (struct usb_cdc_notification *)acm->notification_buffer;

        /* size = notification-header + (optional) data */
        expected_size = sizeof(struct usb_cdc_notification) +
                                        le16_to_cpu(dr->wLength);

        if (current_size < expected_size) {
                /* notification is transmitted fragmented, reassemble */
                if (acm->nb_size < expected_size) {
                        u8 *new_buffer;
                        alloc_size = roundup_pow_of_two(expected_size);
                        /* Final freeing is done on disconnect. */
                        new_buffer = krealloc(acm->notification_buffer,
                                              alloc_size, GFP_ATOMIC);
                        if (!new_buffer) {
                                acm->nb_index = 0;
                                goto exit;
                        }

                        acm->notification_buffer = new_buffer;
                        acm->nb_size = alloc_size;
                        dr = (struct usb_cdc_notification *)acm->notification_buffer;
                }

                copy_size = min(current_size,
                                expected_size - acm->nb_index);

                memcpy(&acm->notification_buffer[acm->nb_index],
                       urb->transfer_buffer, copy_size);
                acm->nb_index += copy_size;
                current_size = acm->nb_index;
        }

        if (current_size >= expected_size) {
                /* notification complete */
                acm_process_notification(acm, (unsigned char *)dr);
                acm->nb_index = 0;
        }

exit:
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval && retval != -EPERM && retval != -ENODEV)
                dev_err(&acm->control->dev,
                        "%s - usb_submit_urb failed: %d\n", __func__, retval);
        else
                dev_vdbg(&acm->control->dev,
                        "control resubmission terminated %d\n", retval);
}

static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
{
        int res;

        if (!test_and_clear_bit(index, &acm->read_urbs_free))
                return 0;

        res = usb_submit_urb(acm->read_urbs[index], mem_flags);
        if (res) {
                if (res != -EPERM && res != -ENODEV) {
                        dev_err(&acm->data->dev,
                                "urb %d failed submission with %d\n",
                                index, res);
                } else {
                        dev_vdbg(&acm->data->dev, "intended failure %d\n", res);
                }
                set_bit(index, &acm->read_urbs_free);
                return res;
        } else {
                dev_vdbg(&acm->data->dev, "submitted urb %d\n", index);
        }

        return 0;
}

static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags)
{
        int res;
        int i;

        for (i = 0; i < acm->rx_buflimit; ++i) {
                res = acm_submit_read_urb(acm, i, mem_flags);
                if (res)
                        return res;
        }

        return 0;
}

static void acm_process_read_urb(struct acm *acm, struct urb *urb)
{
        unsigned long flags;

        if (!urb->actual_length)
                return;

        spin_lock_irqsave(&acm->read_lock, flags);
        tty_insert_flip_string(&acm->port, urb->transfer_buffer,
                        urb->actual_length);
        spin_unlock_irqrestore(&acm->read_lock, flags);

        tty_flip_buffer_push(&acm->port);
}

static void acm_read_bulk_callback(struct urb *urb)
{
        struct acm_rb *rb = urb->context;
        struct acm *acm = rb->instance;
        int status = urb->status;
        bool stopped = false;
        bool stalled = false;
        bool cooldown = false;

        dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
                rb->index, urb->actual_length, status);

        switch (status) {
        case 0:
                usb_mark_last_busy(acm->dev);
                acm_process_read_urb(acm, urb);
                break;
        case -EPIPE:
                set_bit(EVENT_RX_STALL, &acm->flags);
                stalled = true;
                break;
        case -ENOENT:
        case -ECONNRESET:
        case -ESHUTDOWN:
                dev_dbg(&acm->data->dev,
                        "%s - urb shutting down with status: %d\n",
                        __func__, status);
                stopped = true;
                break;
        case -EOVERFLOW:
        case -EPROTO:
                dev_dbg(&acm->data->dev,
                        "%s - cooling babbling device\n", __func__);
                usb_mark_last_busy(acm->dev);
                set_bit(rb->index, &acm->urbs_in_error_delay);
                set_bit(ACM_ERROR_DELAY, &acm->flags);
                cooldown = true;
                break;
        default:
                dev_dbg(&acm->data->dev,
                        "%s - nonzero urb status received: %d\n",
                        __func__, status);
                break;
        }

        /*
         * Make sure URB processing is done before marking as free to avoid
         * racing with unthrottle() on another CPU. Matches the barriers
         * implied by the test_and_clear_bit() in acm_submit_read_urb().
         */
        smp_mb__before_atomic();
        set_bit(rb->index, &acm->read_urbs_free);
        /*
         * Make sure URB is marked as free before checking the throttled flag
         * to avoid racing with unthrottle() on another CPU. Matches the
         * smp_mb() in unthrottle().
         */
        smp_mb__after_atomic();

        if (stopped || stalled || cooldown) {
                if (stalled)
                        schedule_delayed_work(&acm->dwork, 0);
                else if (cooldown)
                        schedule_delayed_work(&acm->dwork, HZ / 2);
                return;
        }

        if (test_bit(ACM_THROTTLED, &acm->flags))
                return;

        acm_submit_read_urb(acm, rb->index, GFP_ATOMIC);
}

/* data interface wrote those outgoing bytes */
static void acm_write_bulk(struct urb *urb)
{
        struct acm_wb *wb = urb->context;
        struct acm *acm = wb->instance;
        unsigned long flags;
        int status = urb->status;

        if (status || (urb->actual_length != urb->transfer_buffer_length))
                dev_vdbg(&acm->data->dev, "wrote len %d/%d, status %d\n",
                        urb->actual_length,
                        urb->transfer_buffer_length,
                        status);

        spin_lock_irqsave(&acm->write_lock, flags);
        acm_write_done(acm, wb);
        spin_unlock_irqrestore(&acm->write_lock, flags);
        set_bit(EVENT_TTY_WAKEUP, &acm->flags);
        schedule_delayed_work(&acm->dwork, 0);
}

static void acm_softint(struct work_struct *work)
{
        int i;
        struct acm *acm = container_of(work, struct acm, dwork.work);

        if (test_bit(EVENT_RX_STALL, &acm->flags)) {
                smp_mb(); /* against acm_suspend() */
                if (!acm->susp_count) {
                        for (i = 0; i < acm->rx_buflimit; i++)
                                usb_kill_urb(acm->read_urbs[i]);
                        usb_clear_halt(acm->dev, acm->in);
                        acm_submit_read_urbs(acm, GFP_KERNEL);
                        clear_bit(EVENT_RX_STALL, &acm->flags);
                }
        }

        if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) {
                for (i = 0; i < acm->rx_buflimit; i++)
                        if (test_and_clear_bit(i, &acm->urbs_in_error_delay))
                                acm_submit_read_urb(acm, i, GFP_KERNEL);
        }

        if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
                tty_port_tty_wakeup(&acm->port);
}

/*
 * TTY handlers
 */

static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
{
        struct acm *acm;
        int retval;

        acm = acm_get_by_minor(tty->index);
        if (!acm)
                return -ENODEV;

        retval = tty_standard_install(driver, tty);
        if (retval)
                goto error_init_termios;

        /*
         * Suppress initial echoing for some devices which might send data
         * immediately after acm driver has been installed.
         */
        if (acm->quirks & DISABLE_ECHO)
                tty->termios.c_lflag &= ~ECHO;

        tty->driver_data = acm;

        return 0;

error_init_termios:
        tty_port_put(&acm->port);
        return retval;
}

static int acm_tty_open(struct tty_struct *tty, struct file *filp)
{
        struct acm *acm = tty->driver_data;

        return tty_port_open(&acm->port, tty, filp);
}

static void acm_port_dtr_rts(struct tty_port *port, bool active)
{
        struct acm *acm = container_of(port, struct acm, port);
        int val;
        int res;

        if (active)
                val = USB_CDC_CTRL_DTR | USB_CDC_CTRL_RTS;
        else
                val = 0;

        /* FIXME: add missing ctrlout locking throughout driver */
        acm->ctrlout = val;

        res = acm_set_control(acm, val);
        if (res && (acm->ctrl_caps & USB_CDC_CAP_LINE))
                /* This is broken in too many devices to spam the logs */
                dev_dbg(&acm->control->dev, "failed to set dtr/rts\n");
}

static int acm_port_activate(struct tty_port *port, struct tty_struct *tty)
{
        struct acm *acm = container_of(port, struct acm, port);
        int retval = -ENODEV;
        int i;

        mutex_lock(&acm->mutex);
        if (acm->disconnected)
                goto disconnected;

        retval = usb_autopm_get_interface(acm->control);
        if (retval)
                goto error_get_interface;

        set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
        acm->control->needs_remote_wakeup = 1;

        acm->ctrlurb->dev = acm->dev;
        retval = usb_submit_urb(acm->ctrlurb, GFP_KERNEL);
        if (retval) {
                dev_err(&acm->control->dev,
                        "%s - usb_submit_urb(ctrl irq) failed\n", __func__);
                goto error_submit_urb;
        }

        acm_tty_set_termios(tty, NULL);

        /*
         * Unthrottle device in case the TTY was closed while throttled.
         */
        clear_bit(ACM_THROTTLED, &acm->flags);

        retval = acm_submit_read_urbs(acm, GFP_KERNEL);
        if (retval)
                goto error_submit_read_urbs;

        usb_autopm_put_interface(acm->control);

        mutex_unlock(&acm->mutex);

        return 0;

error_submit_read_urbs:
        for (i = 0; i < acm->rx_buflimit; i++)
                usb_kill_urb(acm->read_urbs[i]);
        usb_kill_urb(acm->ctrlurb);
error_submit_urb:
        usb_autopm_put_interface(acm->control);
error_get_interface:
disconnected:
        mutex_unlock(&acm->mutex);

        return usb_translate_errors(retval);
}

static void acm_port_destruct(struct tty_port *port)
{
        struct acm *acm = container_of(port, struct acm, port);

        if (acm->minor != ACM_MINOR_INVALID)
                acm_release_minor(acm);
        usb_put_intf(acm->control);
        kfree(acm->country_codes);
        kfree(acm);
}

static void acm_port_shutdown(struct tty_port *port)
{
        struct acm *acm = container_of(port, struct acm, port);
        struct urb *urb;
        struct acm_wb *wb;

        /*
         * Need to grab write_lock to prevent race with resume, but no need to
         * hold it due to the tty-port initialised flag.
         */
        acm_poison_urbs(acm);
        spin_lock_irq(&acm->write_lock);
        spin_unlock_irq(&acm->write_lock);

        usb_autopm_get_interface_no_resume(acm->control);
        acm->control->needs_remote_wakeup = 0;
        usb_autopm_put_interface(acm->control);

        for (;;) {
                urb = usb_get_from_anchor(&acm->delayed);
                if (!urb)
                        break;
                wb = urb->context;
                wb->use = false;
                usb_autopm_put_interface_async(acm->control);
        }

        acm_unpoison_urbs(acm);

}

static void acm_tty_cleanup(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;

        tty_port_put(&acm->port);
}

static void acm_tty_hangup(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;

        tty_port_hangup(&acm->port);
}

static void acm_tty_close(struct tty_struct *tty, struct file *filp)
{
        struct acm *acm = tty->driver_data;

        tty_port_close(&acm->port, tty, filp);
}

static ssize_t acm_tty_write(struct tty_struct *tty, const u8 *buf,
                             size_t count)
{
        struct acm *acm = tty->driver_data;
        int stat;
        unsigned long flags;
        int wbn;
        struct acm_wb *wb;

        if (!count)
                return 0;

        dev_vdbg(&acm->data->dev, "%zu bytes from tty layer\n", count);

        spin_lock_irqsave(&acm->write_lock, flags);
        wbn = acm_wb_alloc(acm);
        if (wbn < 0) {
                spin_unlock_irqrestore(&acm->write_lock, flags);
                return 0;
        }
        wb = &acm->wb[wbn];

        if (!acm->dev) {
                wb->use = false;
                spin_unlock_irqrestore(&acm->write_lock, flags);
                return -ENODEV;
        }

        count = (count > acm->writesize) ? acm->writesize : count;
        dev_vdbg(&acm->data->dev, "writing %zu bytes\n", count);
        memcpy(wb->buf, buf, count);
        wb->len = count;

        stat = usb_autopm_get_interface_async(acm->control);
        if (stat) {
                wb->use = false;
                spin_unlock_irqrestore(&acm->write_lock, flags);
                return stat;
        }

        if (acm->susp_count) {
                usb_anchor_urb(wb->urb, &acm->delayed);
                spin_unlock_irqrestore(&acm->write_lock, flags);
                return count;
        }

        stat = acm_start_wb(acm, wb);
        spin_unlock_irqrestore(&acm->write_lock, flags);

        if (stat < 0)
                return stat;
        return count;
}

static unsigned int acm_tty_write_room(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;
        /*
         * Do not let the line discipline to know that we have a reserve,
         * or it might get too enthusiastic.
         */
        return acm_wb_is_avail(acm) ? acm->writesize : 0;
}

static void acm_tty_flush_buffer(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;
        unsigned long flags;
        int i;

        spin_lock_irqsave(&acm->write_lock, flags);
        for (i = 0; i < ACM_NW; i++)
                if (acm->wb[i].use)
                        usb_unlink_urb(acm->wb[i].urb);
        spin_unlock_irqrestore(&acm->write_lock, flags);
}

static unsigned int acm_tty_chars_in_buffer(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;
        /*
         * if the device was unplugged then any remaining characters fell out
         * of the connector ;)
         */
        if (acm->disconnected)
                return 0;
        /*
         * This is inaccurate (overcounts), but it works.
         */
        return (ACM_NW - acm_wb_is_avail(acm)) * acm->writesize;
}

static void acm_tty_throttle(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;

        set_bit(ACM_THROTTLED, &acm->flags);
}

static void acm_tty_unthrottle(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;

        clear_bit(ACM_THROTTLED, &acm->flags);

        /* Matches the smp_mb__after_atomic() in acm_read_bulk_callback(). */
        smp_mb();

        acm_submit_read_urbs(acm, GFP_KERNEL);
}

static int acm_tty_break_ctl(struct tty_struct *tty, int state)
{
        struct acm *acm = tty->driver_data;
        int retval;

        if (!(acm->ctrl_caps & USB_CDC_CAP_BRK))
                return -EOPNOTSUPP;

        retval = acm_send_break(acm, state ? 0xffff : 0);
        if (retval < 0)
                dev_dbg(&acm->control->dev,
                        "%s - send break failed\n", __func__);
        return retval;
}

static int acm_tty_tiocmget(struct tty_struct *tty)
{
        struct acm *acm = tty->driver_data;

        return (acm->ctrlout & USB_CDC_CTRL_DTR ? TIOCM_DTR : 0) |
               (acm->ctrlout & USB_CDC_CTRL_RTS ? TIOCM_RTS : 0) |
               (acm->ctrlin  & USB_CDC_SERIAL_STATE_DSR ? TIOCM_DSR : 0) |
               (acm->ctrlin  & USB_CDC_SERIAL_STATE_RING_SIGNAL ? TIOCM_RI : 0) |
               (acm->ctrlin  & USB_CDC_SERIAL_STATE_DCD ? TIOCM_CD : 0) |
               TIOCM_CTS;
}

static int acm_tty_tiocmset(struct tty_struct *tty,
                            unsigned int set, unsigned int clear)
{
        struct acm *acm = tty->driver_data;
        unsigned int newctrl;

        newctrl = acm->ctrlout;
        set = (set & TIOCM_DTR ? USB_CDC_CTRL_DTR : 0) |
              (set & TIOCM_RTS ? USB_CDC_CTRL_RTS : 0);
        clear = (clear & TIOCM_DTR ? USB_CDC_CTRL_DTR : 0) |
                (clear & TIOCM_RTS ? USB_CDC_CTRL_RTS : 0);

        newctrl = (newctrl & ~clear) | set;

        if (acm->ctrlout == newctrl)
                return 0;
        return acm_set_control(acm, acm->ctrlout = newctrl);
}

static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
        struct acm *acm = tty->driver_data;

        ss->line = acm->minor;
        ss->close_delay        = jiffies_to_msecs(acm->port.close_delay) / 10;
        ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
                                ASYNC_CLOSING_WAIT_NONE :
                                jiffies_to_msecs(acm->port.closing_wait) / 10;
        return 0;
}

static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
        struct acm *acm = tty->driver_data;
        unsigned int closing_wait, close_delay;
        int retval = 0;

        close_delay = msecs_to_jiffies(ss->close_delay * 10);
        closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ?
                        ASYNC_CLOSING_WAIT_NONE :
                        msecs_to_jiffies(ss->closing_wait * 10);

        mutex_lock(&acm->port.mutex);

        if (!capable(CAP_SYS_ADMIN)) {
                if ((close_delay != acm->port.close_delay) ||
                    (closing_wait != acm->port.closing_wait))
                        retval = -EPERM;
        } else {
                acm->port.close_delay  = close_delay;
                acm->port.closing_wait = closing_wait;
        }

        mutex_unlock(&acm->port.mutex);
        return retval;
}

static int wait_serial_change(struct acm *acm, unsigned long arg)
{
        int rv = 0;
        DECLARE_WAITQUEUE(wait, current);
        struct async_icount old, new;

        do {
                spin_lock_irq(&acm->read_lock);
                old = acm->oldcount;
                new = acm->iocount;
                acm->oldcount = new;
                spin_unlock_irq(&acm->read_lock);

                if ((arg & TIOCM_DSR) &&
                        old.dsr != new.dsr)
                        break;
                if ((arg & TIOCM_CD)  &&
                        old.dcd != new.dcd)
                        break;
                if ((arg & TIOCM_RI) &&
                        old.rng != new.rng)
                        break;

                add_wait_queue(&acm->wioctl, &wait);
                set_current_state(TASK_INTERRUPTIBLE);
                schedule();
                remove_wait_queue(&acm->wioctl, &wait);
                if (acm->disconnected) {
                        if (arg & TIOCM_CD)
                                break;
                        else
                                rv = -ENODEV;
                } else {
                        if (signal_pending(current))
                                rv = -ERESTARTSYS;
                }
        } while (!rv);

        

        return rv;
}

static int acm_tty_get_icount(struct tty_struct *tty,
                                        struct serial_icounter_struct *icount)
{
        struct acm *acm = tty->driver_data;

        icount->dsr = acm->iocount.dsr;
        icount->rng = acm->iocount.rng;
        icount->dcd = acm->iocount.dcd;
        icount->frame = acm->iocount.frame;
        icount->overrun = acm->iocount.overrun;
        icount->parity = acm->iocount.parity;
        icount->brk = acm->iocount.brk;

        return 0;
}

static int acm_tty_ioctl(struct tty_struct *tty,
                                        unsigned int cmd, unsigned long arg)
{
        struct acm *acm = tty->driver_data;
        int rv = -ENOIOCTLCMD;

        switch (cmd) {
        case TIOCMIWAIT:
                rv = usb_autopm_get_interface(acm->control);
                if (rv < 0) {
                        rv = -EIO;
                        break;
                }
                rv = wait_serial_change(acm, arg);
                usb_autopm_put_interface(acm->control);
                break;
        }

        return rv;
}

static void acm_tty_set_termios(struct tty_struct *tty,
                                const struct ktermios *termios_old)
{
        struct acm *acm = tty->driver_data;
        struct ktermios *termios = &tty->termios;
        struct usb_cdc_line_coding newline;
        int newctrl = acm->ctrlout;

        newline.dwDTERate = cpu_to_le32(tty_get_baud_rate(tty));
        newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
        newline.bParityType = termios->c_cflag & PARENB ?
                                (termios->c_cflag & PARODD ? 1 : 2) +
                                (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
        newline.bDataBits = tty_get_char_size(termios->c_cflag);

        /* FIXME: Needs to clear unsupported bits in the termios */
        acm->clocal = ((termios->c_cflag & CLOCAL) != 0);

        if (C_BAUD(tty) == B0) {
                newline.dwDTERate = acm->line.dwDTERate;
                newctrl &= ~USB_CDC_CTRL_DTR;
        } else if (termios_old && (termios_old->c_cflag & CBAUD) == B0) {
                newctrl |=  USB_CDC_CTRL_DTR;
        }

        if (newctrl != acm->ctrlout)
                acm_set_control(acm, acm->ctrlout = newctrl);

        if (memcmp(&acm->line, &newline, sizeof newline)) {
                memcpy(&acm->line, &newline, sizeof newline);
                dev_dbg(&acm->control->dev, "%s - set line: %d %d %d %d\n",
                        __func__,
                        le32_to_cpu(newline.dwDTERate),
                        newline.bCharFormat, newline.bParityType,
                        newline.bDataBits);
                acm_set_line(acm, &acm->line);
        }
}

static const struct tty_port_operations acm_port_ops = {
        .dtr_rts = acm_port_dtr_rts,
        .shutdown = acm_port_shutdown,
        .activate = acm_port_activate,
        .destruct = acm_port_destruct,
};

/*
 * USB probe and disconnect routines.
 */

/* Little helpers: write/read buffers free */
static void acm_write_buffers_free(struct acm *acm)
{
        int i;
        struct acm_wb *wb;

        for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
                usb_free_coherent(acm->dev, acm->writesize, wb->buf, wb->dmah);
}

static void acm_read_buffers_free(struct acm *acm)
{
        int i;

        for (i = 0; i < acm->rx_buflimit; i++)
                usb_free_coherent(acm->dev, acm->readsize,
                          acm->read_buffers[i].base, acm->read_buffers[i].dma);
}

/* Little helper: write buffers allocate */
static int acm_write_buffers_alloc(struct acm *acm)
{
        int i;
        struct acm_wb *wb;

        for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) {
                wb->buf = usb_alloc_coherent(acm->dev, acm->writesize, GFP_KERNEL,
                    &wb->dmah);
                if (!wb->buf) {
                        while (i != 0) {
                                --i;
                                --wb;
                                usb_free_coherent(acm->dev, acm->writesize,
                                    wb->buf, wb->dmah);
                        }
                        return -ENOMEM;
                }
        }
        return 0;
}

static int acm_probe(struct usb_interface *intf,
                     const struct usb_device_id *id)
{
        struct usb_cdc_union_desc *union_header = NULL;
        struct usb_cdc_call_mgmt_descriptor *cmgmd = NULL;
        unsigned char *buffer = intf->altsetting->extra;
        int buflen = intf->altsetting->extralen;
        struct usb_interface *control_interface;
        struct usb_interface *data_interface;
        struct usb_endpoint_descriptor *epctrl = NULL;
        struct usb_endpoint_descriptor *epread = NULL;
        struct usb_endpoint_descriptor *epwrite = NULL;
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct usb_cdc_parsed_header h;
        struct acm *acm;
        int minor;
        int ctrlsize, readsize;
        u8 *buf;
        int call_intf_num = -1;
        int data_intf_num = -1;
        unsigned long quirks;
        int num_rx_buf;
        int i;
        int combined_interfaces = 0;
        struct device *tty_dev;
        int rv = -ENOMEM;
        int res;

        /* normal quirks */
        quirks = (unsigned long)id->driver_info;

        if (quirks == IGNORE_DEVICE)
                return -ENODEV;

        memset(&h, 0x00, sizeof(struct usb_cdc_parsed_header));

        num_rx_buf = (quirks == SINGLE_RX_URB) ? 1 : ACM_NR;

        /* handle quirks deadly to normal probing*/
        if (quirks == NO_UNION_NORMAL) {
                data_interface = usb_ifnum_to_if(usb_dev, 1);
                control_interface = usb_ifnum_to_if(usb_dev, 0);
                /* we would crash */
                if (!data_interface || !control_interface)
                        return -ENODEV;
                goto skip_normal_probe;
        }

        /* normal probing*/
        if (!buffer) {
                dev_err(&intf->dev, "Weird descriptor references\n");
                return -EINVAL;
        }

        if (!buflen) {
                if (intf->cur_altsetting->endpoint &&
                                intf->cur_altsetting->endpoint->extralen &&
                                intf->cur_altsetting->endpoint->extra) {
                        dev_dbg(&intf->dev,
                                "Seeking extra descriptors on endpoint\n");
                        buflen = intf->cur_altsetting->endpoint->extralen;
                        buffer = intf->cur_altsetting->endpoint->extra;
                } else {
                        dev_err(&intf->dev,
                                "Zero length descriptor references\n");
                        return -EINVAL;
                }
        }

        cdc_parse_cdc_header(&h, intf, buffer, buflen);
        union_header = h.usb_cdc_union_desc;
        cmgmd = h.usb_cdc_call_mgmt_descriptor;
        if (cmgmd)
                call_intf_num = cmgmd->bDataInterface;

        if (!union_header) {
                if (intf->cur_altsetting->desc.bNumEndpoints == 3) {
                        dev_dbg(&intf->dev, "No union descriptor, assuming single interface\n");
                        combined_interfaces = 1;
                        control_interface = data_interface = intf;
                        goto look_for_collapsed_interface;
                } else if (call_intf_num > 0) {
                        dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
                        data_intf_num = call_intf_num;
                        data_interface = usb_ifnum_to_if(usb_dev, data_intf_num);
                        control_interface = intf;
                } else {
                        dev_dbg(&intf->dev, "No union descriptor, giving up\n");
                        return -ENODEV;
                }
        } else {
                int class = -1;

                data_intf_num = union_header->bSlaveInterface0;
                control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
                data_interface = usb_ifnum_to_if(usb_dev, data_intf_num);

                if (control_interface)
                        class = control_interface->cur_altsetting->desc.bInterfaceClass;

                if (class != USB_CLASS_COMM && class != USB_CLASS_CDC_DATA) {
                        dev_dbg(&intf->dev, "Broken union descriptor, assuming single interface\n");
                        combined_interfaces = 1;
                        control_interface = data_interface = intf;
                        goto look_for_collapsed_interface;
                }
        }

        if (!control_interface || !data_interface) {
                dev_dbg(&intf->dev, "no interfaces\n");
                return -ENODEV;
        }

        if (data_intf_num != call_intf_num)
                dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");

        if (control_interface == data_interface) {
                /* some broken devices designed for windows work this way */
                dev_warn(&intf->dev,"Control and data interfaces are not separated!\n");
                combined_interfaces = 1;
                /* a popular other OS doesn't use it */
                quirks |= NO_CAP_LINE;
                if (data_interface->cur_altsetting->desc.bNumEndpoints != 3) {
                        dev_err(&intf->dev, "This needs exactly 3 endpoints\n");
                        return -EINVAL;
                }
look_for_collapsed_interface:
                res = usb_find_common_endpoints(data_interface->cur_altsetting,
                                &epread, &epwrite, &epctrl, NULL);
                if (res)
                        return res;

                goto made_compressed_probe;
        }

skip_normal_probe:

        /*workaround for switched interfaces */
        if (data_interface->cur_altsetting->desc.bInterfaceClass != USB_CLASS_CDC_DATA) {
                if (control_interface->cur_altsetting->desc.bInterfaceClass == USB_CLASS_CDC_DATA) {
                        dev_dbg(&intf->dev,
                                "Your device has switched interfaces.\n");
                        swap(control_interface, data_interface);
                } else {
                        return -EINVAL;
                }
        }

        /* Accept probe requests only for the control interface */
        if (!combined_interfaces && intf != control_interface)
                return -ENODEV;

        if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
            control_interface->cur_altsetting->desc.bNumEndpoints == 0)
                return -EINVAL;

        epctrl = &control_interface->cur_altsetting->endpoint[0].desc;
        epread = &data_interface->cur_altsetting->endpoint[0].desc;
        epwrite = &data_interface->cur_altsetting->endpoint[1].desc;


        /* workaround for switched endpoints */
        if (!usb_endpoint_dir_in(epread)) {
                /* descriptors are swapped */
                dev_dbg(&intf->dev,
                        "The data interface has switched endpoints\n");
                swap(epread, epwrite);
        }
made_compressed_probe:
        dev_dbg(&intf->dev, "interfaces are valid\n");

        acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
        if (!acm)
                return -ENOMEM;

        tty_port_init(&acm->port);
        acm->port.ops = &acm_port_ops;

        ctrlsize = usb_endpoint_maxp(epctrl);
        readsize = usb_endpoint_maxp(epread) *
                                (quirks == SINGLE_RX_URB ? 1 : 2);
        acm->combined_interfaces = combined_interfaces;
        acm->writesize = usb_endpoint_maxp(epwrite) * 20;
        acm->control = control_interface;
        acm->data = data_interface;

        usb_get_intf(acm->control); /* undone in destruct() */

        minor = acm_alloc_minor(acm);
        if (minor < 0) {
                acm->minor = ACM_MINOR_INVALID;
                goto err_put_port;
        }

        acm->minor = minor;
        acm->dev = usb_dev;
        if (h.usb_cdc_acm_descriptor)
                acm->ctrl_caps = h.usb_cdc_acm_descriptor->bmCapabilities;
        if (quirks & NO_CAP_LINE)
                acm->ctrl_caps &= ~USB_CDC_CAP_LINE;
        acm->ctrlsize = ctrlsize;
        acm->readsize = readsize;
        acm->rx_buflimit = num_rx_buf;
        INIT_DELAYED_WORK(&acm->dwork, acm_softint);
        init_waitqueue_head(&acm->wioctl);
        spin_lock_init(&acm->write_lock);
        spin_lock_init(&acm->read_lock);
        mutex_init(&acm->mutex);
        if (usb_endpoint_xfer_int(epread)) {
                acm->bInterval = epread->bInterval;
                acm->in = usb_rcvintpipe(usb_dev, epread->bEndpointAddress);
        } else {
                acm->in = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
        }
        if (usb_endpoint_xfer_int(epwrite))
                acm->out = usb_sndintpipe(usb_dev, epwrite->bEndpointAddress);
        else
                acm->out = usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress);
        init_usb_anchor(&acm->delayed);
        acm->quirks = quirks;

        buf = usb_alloc_coherent(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
        if (!buf)
                goto err_put_port;
        acm->ctrl_buffer = buf;

        if (acm_write_buffers_alloc(acm) < 0)
                goto err_free_ctrl_buffer;

        acm->ctrlurb = usb_alloc_urb(0, GFP_KERNEL);
        if (!acm->ctrlurb)
                goto err_free_write_buffers;

        for (i = 0; i < num_rx_buf; i++) {
                struct acm_rb *rb = &(acm->read_buffers[i]);
                struct urb *urb;

                rb->base = usb_alloc_coherent(acm->dev, readsize, GFP_KERNEL,
                                                                &rb->dma);
                if (!rb->base)
                        goto err_free_read_urbs;
                rb->index = i;
                rb->instance = acm;

                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb)
                        goto err_free_read_urbs;

                urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                urb->transfer_dma = rb->dma;
                if (usb_endpoint_xfer_int(epread))
                        usb_fill_int_urb(urb, acm->dev, acm->in, rb->base,
                                         acm->readsize,
                                         acm_read_bulk_callback, rb,
                                         acm->bInterval);
                else
                        usb_fill_bulk_urb(urb, acm->dev, acm->in, rb->base,
                                          acm->readsize,
                                          acm_read_bulk_callback, rb);

                acm->read_urbs[i] = urb;
                __set_bit(i, &acm->read_urbs_free);
        }
        for (i = 0; i < ACM_NW; i++) {
                struct acm_wb *snd = &(acm->wb[i]);

                snd->urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!snd->urb)
                        goto err_free_write_urbs;

                if (usb_endpoint_xfer_int(epwrite))
                        usb_fill_int_urb(snd->urb, usb_dev, acm->out,
                                NULL, acm->writesize, acm_write_bulk, snd, epwrite->bInterval);
                else
                        usb_fill_bulk_urb(snd->urb, usb_dev, acm->out,
                                NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                if (quirks & SEND_ZERO_PACKET)
                        snd->urb->transfer_flags |= URB_ZERO_PACKET;
                snd->instance = acm;
        }

        usb_set_intfdata(intf, acm);

        i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
        if (i < 0)
                goto err_free_write_urbs;

        if (h.usb_cdc_country_functional_desc) { /* export the country data */
                struct usb_cdc_country_functional_desc * cfd =
                                        h.usb_cdc_country_functional_desc;

                acm->country_codes = kmalloc(cfd->bLength - 4, GFP_KERNEL);
                if (!acm->country_codes)
                        goto skip_countries;
                acm->country_code_size = cfd->bLength - 4;
                memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
                                                        cfd->bLength - 4);
                acm->country_rel_date = cfd->iCountryCodeRelDate;

                i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
                if (i < 0) {
                        kfree(acm->country_codes);
                        acm->country_codes = NULL;
                        acm->country_code_size = 0;
                        goto skip_countries;
                }

                i = device_create_file(&intf->dev,
                                                &dev_attr_iCountryCodeRelDate);
                if (i < 0) {
                        device_remove_file(&intf->dev, &dev_attr_wCountryCodes);
                        kfree(acm->country_codes);
                        acm->country_codes = NULL;
                        acm->country_code_size = 0;
                        goto skip_countries;
                }
        }

skip_countries:
        usb_fill_int_urb(acm->ctrlurb, usb_dev,
                         usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
                         acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
                         /* works around buggy devices */
                         epctrl->bInterval ? epctrl->bInterval : 16);
        acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        acm->ctrlurb->transfer_dma = acm->ctrl_dma;
        acm->notification_buffer = NULL;
        acm->nb_index = 0;
        acm->nb_size = 0;

        acm->line.dwDTERate = cpu_to_le32(9600);
        acm->line.bDataBits = 8;
        acm_set_line(acm, &acm->line);

        if (!acm->combined_interfaces) {
                rv = usb_driver_claim_interface(&acm_driver, data_interface, acm);
                if (rv)
                        goto err_remove_files;
        }

        tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
                        &control_interface->dev);
        if (IS_ERR(tty_dev)) {
                rv = PTR_ERR(tty_dev);
                goto err_release_data_interface;
        }

        if (quirks & CLEAR_HALT_CONDITIONS) {
                usb_clear_halt(usb_dev, acm->in);
                usb_clear_halt(usb_dev, acm->out);
        }

        dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);

        return 0;

err_release_data_interface:
        if (!acm->combined_interfaces) {
                /* Clear driver data so that disconnect() returns early. */
                usb_set_intfdata(data_interface, NULL);
                usb_driver_release_interface(&acm_driver, data_interface);
        }
err_remove_files:
        if (acm->country_codes) {
                device_remove_file(&acm->control->dev,
                                &dev_attr_wCountryCodes);
                device_remove_file(&acm->control->dev,
                                &dev_attr_iCountryCodeRelDate);
        }
        device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities);
err_free_write_urbs:
        for (i = 0; i < ACM_NW; i++)
                usb_free_urb(acm->wb[i].urb);
err_free_read_urbs:
        for (i = 0; i < num_rx_buf; i++)
                usb_free_urb(acm->read_urbs[i]);
        acm_read_buffers_free(acm);
        usb_free_urb(acm->ctrlurb);
err_free_write_buffers:
        acm_write_buffers_free(acm);
err_free_ctrl_buffer:
        usb_free_coherent(usb_dev, ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
err_put_port:
        tty_port_put(&acm->port);

        return rv;
}

static void acm_disconnect(struct usb_interface *intf)
{
        struct acm *acm = usb_get_intfdata(intf);
        struct tty_struct *tty;
        int i;

        /* sibling interface is already cleaning up */
        if (!acm)
                return;

        acm->disconnected = true;
        /*
         * there is a circular dependency. acm_softint() can resubmit
         * the URBs in error handling so we need to block any
         * submission right away
         */
        acm_poison_urbs(acm);
        mutex_lock(&acm->mutex);
        if (acm->country_codes) {
                device_remove_file(&acm->control->dev,
                                &dev_attr_wCountryCodes);
                device_remove_file(&acm->control->dev,
                                &dev_attr_iCountryCodeRelDate);
        }
        wake_up_all(&acm->wioctl);
        device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities);
        usb_set_intfdata(acm->control, NULL);
        usb_set_intfdata(acm->data, NULL);
        mutex_unlock(&acm->mutex);

        tty = tty_port_tty_get(&acm->port);
        if (tty) {
                tty_vhangup(tty);
                tty_kref_put(tty);
        }

        cancel_delayed_work_sync(&acm->dwork);

        tty_unregister_device(acm_tty_driver, acm->minor);

        usb_free_urb(acm->ctrlurb);
        for (i = 0; i < ACM_NW; i++)
                usb_free_urb(acm->wb[i].urb);
        for (i = 0; i < acm->rx_buflimit; i++)
                usb_free_urb(acm->read_urbs[i]);
        acm_write_buffers_free(acm);
        usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
        acm_read_buffers_free(acm);

        kfree(acm->notification_buffer);

        if (!acm->combined_interfaces)
                usb_driver_release_interface(&acm_driver, intf == acm->control ?
                                        acm->data : acm->control);

        tty_port_put(&acm->port);
}

#ifdef CONFIG_PM
static int acm_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct acm *acm = usb_get_intfdata(intf);
        int cnt;

        spin_lock_irq(&acm->write_lock);
        if (PMSG_IS_AUTO(message)) {
                if (acm->transmitting) {
                        spin_unlock_irq(&acm->write_lock);
                        return -EBUSY;
                }
        }
        cnt = acm->susp_count++;
        spin_unlock_irq(&acm->write_lock);

        if (cnt)
                return 0;

        acm_poison_urbs(acm);
        cancel_delayed_work_sync(&acm->dwork);
        acm->urbs_in_error_delay = 0;

        return 0;
}

static int acm_resume(struct usb_interface *intf)
{
        struct acm *acm = usb_get_intfdata(intf);
        struct urb *urb;
        int rv = 0;

        spin_lock_irq(&acm->write_lock);

        if (--acm->susp_count)
                goto out;

        acm_unpoison_urbs(acm);

        if (tty_port_initialized(&acm->port)) {
                rv = usb_submit_urb(acm->ctrlurb, GFP_ATOMIC);

                for (;;) {
                        urb = usb_get_from_anchor(&acm->delayed);
                        if (!urb)
                                break;

                        acm_start_wb(acm, urb->context);
                }

                /*
                 * delayed error checking because we must
                 * do the write path at all cost
                 */
                if (rv < 0)
                        goto out;

                rv = acm_submit_read_urbs(acm, GFP_ATOMIC);
        }
out:
        spin_unlock_irq(&acm->write_lock);

        return rv;
}

static int acm_reset_resume(struct usb_interface *intf)
{
        struct acm *acm = usb_get_intfdata(intf);

        if (tty_port_initialized(&acm->port))
                tty_port_tty_hangup(&acm->port, false);

        return acm_resume(intf);
}

#endif /* CONFIG_PM */

static int acm_pre_reset(struct usb_interface *intf)
{
        struct acm *acm = usb_get_intfdata(intf);

        clear_bit(EVENT_RX_STALL, &acm->flags);
        acm->nb_index = 0; /* pending control transfers are lost */

        return 0;
}

#define NOKIA_PCSUITE_ACM_INFO(x) \
                USB_DEVICE_AND_INTERFACE_INFO(0x0421, x, \
                USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
                USB_CDC_ACM_PROTO_VENDOR)

#define SAMSUNG_PCSUITE_ACM_INFO(x) \
                USB_DEVICE_AND_INTERFACE_INFO(0x04e7, x, \
                USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
                USB_CDC_ACM_PROTO_VENDOR)

/*
 * USB driver structure.
 */

static const struct usb_device_id acm_ids[] = {
        /* quirky and broken devices */
        { USB_DEVICE(0x0424, 0x274e), /* Microchip Technology, Inc. (formerly SMSC) */
          .driver_info = DISABLE_ECHO, }, /* DISABLE ECHO in termios flag */
        { USB_DEVICE(0x076d, 0x0006), /* Denso Cradle CU-321 */
        .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */
        { USB_DEVICE(0x17ef, 0x7000), /* Lenovo USB modem */
        .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */
        { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x045b, 0x023c),        /* Renesas USB Download mode */
        .driver_info = DISABLE_ECHO,        /* Don't echo banner */
        },
        { USB_DEVICE(0x045b, 0x0248),        /* Renesas USB Download mode */
        .driver_info = DISABLE_ECHO,        /* Don't echo banner */
        },
        { USB_DEVICE(0x045b, 0x024D),        /* Renesas USB Download mode */
        .driver_info = DISABLE_ECHO,        /* Don't echo banner */
        },
        { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */
        .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */
        },
        { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0482, 0x0203), /* KYOCERA AH-K3001V */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x079b, 0x000f), /* BT On-Air USB MODEM */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0ace, 0x1602), /* ZyDAS 56K USB MODEM */
        .driver_info = SINGLE_RX_URB,
        },
        { USB_DEVICE(0x0ace, 0x1608), /* ZyDAS 56K USB MODEM */
        .driver_info = SINGLE_RX_URB, /* firmware bug */
        },
        { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */
        .driver_info = SINGLE_RX_URB, /* firmware bug */
        },
        { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */
        .driver_info = SINGLE_RX_URB,
        },
        { USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0803, 0x3095), /* Zoom Telephonics Model 3095F USB MODEM */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0572, 0x1321), /* Conexant USB MODEM CX93010 */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x0572, 0x1349), /* Hiro (Conexant) USB MODEM H50228 */
        .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
        },
        { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */
        .driver_info = QUIRK_CONTROL_LINE_STATE, },
        { USB_DEVICE(0x2184, 0x001c) },        /* GW Instek AFG-2225 */
        { USB_DEVICE(0x2184, 0x0036) },        /* GW Instek AFG-125 */
        { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */
        },
        /* Motorola H24 HSPA module: */
        { USB_DEVICE(0x22b8, 0x2d91) }, /* modem                                */
        { USB_DEVICE(0x22b8, 0x2d92),   /* modem           + diagnostics        */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d93),   /* modem + AT port                      */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d95),   /* modem + AT port + diagnostics        */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d96),   /* modem                         + NMEA */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d97),   /* modem           + diagnostics + NMEA */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d99),   /* modem + AT port               + NMEA */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },
        { USB_DEVICE(0x22b8, 0x2d9a),   /* modem + AT port + diagnostics + NMEA */
        .driver_info = NO_UNION_NORMAL, /* handle only modem interface          */
        },

        { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */
        .driver_info = NO_UNION_NORMAL, /* union descriptor misplaced on
                                           data interface instead of
                                           communications interface.
                                           Maybe we should define a new
                                           quirk for this. */
        },
        { USB_DEVICE(0x0572, 0x1340), /* Conexant CX93010-2x UCMxx */
        .driver_info = NO_UNION_NORMAL,
        },
        { USB_DEVICE(0x05f9, 0x4002), /* PSC Scanning, Magellan 800i */
        .driver_info = NO_UNION_NORMAL,
        },
        { USB_DEVICE(0x1bbb, 0x0003), /* Alcatel OT-I650 */
        .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
        },
        { USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */
        .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
        },
        { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */
        .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
        },
        { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
        .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
        },
        { USB_DEVICE(0x0c26, 0x0020), /* Icom ICF3400 Serie */
        .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
        },
        { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */
        .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
        },

        { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
        .driver_info = CLEAR_HALT_CONDITIONS,
        },

        /* Nokia S60 phones expose two ACM channels. The first is
         * a modem and is picked up by the standard AT-command
         * information below. The second is 'vendor-specific' but
         * is treated as a serial device at the S60 end, so we want
         * to expose it on Linux too. */
        { NOKIA_PCSUITE_ACM_INFO(0x042D), }, /* Nokia 3250 */
        { NOKIA_PCSUITE_ACM_INFO(0x04D8), }, /* Nokia 5500 Sport */
        { NOKIA_PCSUITE_ACM_INFO(0x04C9), }, /* Nokia E50 */
        { NOKIA_PCSUITE_ACM_INFO(0x0419), }, /* Nokia E60 */
        { NOKIA_PCSUITE_ACM_INFO(0x044D), }, /* Nokia E61 */
        { NOKIA_PCSUITE_ACM_INFO(0x0001), }, /* Nokia E61i */
        { NOKIA_PCSUITE_ACM_INFO(0x0475), }, /* Nokia E62 */
        { NOKIA_PCSUITE_ACM_INFO(0x0508), }, /* Nokia E65 */
        { NOKIA_PCSUITE_ACM_INFO(0x0418), }, /* Nokia E70 */
        { NOKIA_PCSUITE_ACM_INFO(0x0425), }, /* Nokia N71 */
        { NOKIA_PCSUITE_ACM_INFO(0x0486), }, /* Nokia N73 */
        { NOKIA_PCSUITE_ACM_INFO(0x04DF), }, /* Nokia N75 */
        { NOKIA_PCSUITE_ACM_INFO(0x000e), }, /* Nokia N77 */
        { NOKIA_PCSUITE_ACM_INFO(0x0445), }, /* Nokia N80 */
        { NOKIA_PCSUITE_ACM_INFO(0x042F), }, /* Nokia N91 & N91 8GB */
        { NOKIA_PCSUITE_ACM_INFO(0x048E), }, /* Nokia N92 */
        { NOKIA_PCSUITE_ACM_INFO(0x0420), }, /* Nokia N93 */
        { NOKIA_PCSUITE_ACM_INFO(0x04E6), }, /* Nokia N93i  */
        { NOKIA_PCSUITE_ACM_INFO(0x04B2), }, /* Nokia 5700 XpressMusic */
        { NOKIA_PCSUITE_ACM_INFO(0x0134), }, /* Nokia 6110 Navigator (China) */
        { NOKIA_PCSUITE_ACM_INFO(0x046E), }, /* Nokia 6110 Navigator */
        { NOKIA_PCSUITE_ACM_INFO(0x002f), }, /* Nokia 6120 classic &  */
        { NOKIA_PCSUITE_ACM_INFO(0x0088), }, /* Nokia 6121 classic */
        { NOKIA_PCSUITE_ACM_INFO(0x00fc), }, /* Nokia 6124 classic */
        { NOKIA_PCSUITE_ACM_INFO(0x0042), }, /* Nokia E51 */
        { NOKIA_PCSUITE_ACM_INFO(0x00b0), }, /* Nokia E66 */
        { NOKIA_PCSUITE_ACM_INFO(0x00ab), }, /* Nokia E71 */
        { NOKIA_PCSUITE_ACM_INFO(0x0481), }, /* Nokia N76 */
        { NOKIA_PCSUITE_ACM_INFO(0x0007), }, /* Nokia N81 & N81 8GB */
        { NOKIA_PCSUITE_ACM_INFO(0x0071), }, /* Nokia N82 */
        { NOKIA_PCSUITE_ACM_INFO(0x04F0), }, /* Nokia N95 & N95-3 NAM */
        { NOKIA_PCSUITE_ACM_INFO(0x0070), }, /* Nokia N95 8GB  */
        { NOKIA_PCSUITE_ACM_INFO(0x0099), }, /* Nokia 6210 Navigator, RM-367 */
        { NOKIA_PCSUITE_ACM_INFO(0x0128), }, /* Nokia 6210 Navigator, RM-419 */
        { NOKIA_PCSUITE_ACM_INFO(0x008f), }, /* Nokia 6220 Classic */
        { NOKIA_PCSUITE_ACM_INFO(0x00a0), }, /* Nokia 6650 */
        { NOKIA_PCSUITE_ACM_INFO(0x007b), }, /* Nokia N78 */
        { NOKIA_PCSUITE_ACM_INFO(0x0094), }, /* Nokia N85 */
        { NOKIA_PCSUITE_ACM_INFO(0x003a), }, /* Nokia N96 & N96-3  */
        { NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */
        { NOKIA_PCSUITE_ACM_INFO(0x0108), }, /* Nokia 5320 XpressMusic 2G */
        { NOKIA_PCSUITE_ACM_INFO(0x01f5), }, /* Nokia N97, RM-505 */
        { NOKIA_PCSUITE_ACM_INFO(0x02e3), }, /* Nokia 5230, RM-588 */
        { NOKIA_PCSUITE_ACM_INFO(0x0178), }, /* Nokia E63 */
        { NOKIA_PCSUITE_ACM_INFO(0x010e), }, /* Nokia E75 */
        { NOKIA_PCSUITE_ACM_INFO(0x02d9), }, /* Nokia 6760 Slide */
        { NOKIA_PCSUITE_ACM_INFO(0x01d0), }, /* Nokia E52 */
        { NOKIA_PCSUITE_ACM_INFO(0x0223), }, /* Nokia E72 */
        { NOKIA_PCSUITE_ACM_INFO(0x0275), }, /* Nokia X6 */
        { NOKIA_PCSUITE_ACM_INFO(0x026c), }, /* Nokia N97 Mini */
        { NOKIA_PCSUITE_ACM_INFO(0x0154), }, /* Nokia 5800 XpressMusic */
        { NOKIA_PCSUITE_ACM_INFO(0x04ce), }, /* Nokia E90 */
        { NOKIA_PCSUITE_ACM_INFO(0x01d4), }, /* Nokia E55 */
        { NOKIA_PCSUITE_ACM_INFO(0x0302), }, /* Nokia N8 */
        { NOKIA_PCSUITE_ACM_INFO(0x0335), }, /* Nokia E7 */
        { NOKIA_PCSUITE_ACM_INFO(0x03cd), }, /* Nokia C7 */
        { SAMSUNG_PCSUITE_ACM_INFO(0x6651), }, /* Samsung GTi8510 (INNOV8) */

        /* Support for Owen devices */
        { USB_DEVICE(0x03eb, 0x0030), }, /* Owen SI30 */

        /* NOTE: non-Nokia COMM/ACM/0xff is likely MSFT RNDIS... NOT a modem! */

#if IS_ENABLED(CONFIG_INPUT_IMS_PCU)
        { USB_DEVICE(0x04d8, 0x0082),        /* Application mode */
        .driver_info = IGNORE_DEVICE,
        },
        { USB_DEVICE(0x04d8, 0x0083),        /* Bootloader mode */
        .driver_info = IGNORE_DEVICE,
        },
#endif

#if IS_ENABLED(CONFIG_IR_TOY)
        { USB_DEVICE(0x04d8, 0xfd08),
        .driver_info = IGNORE_DEVICE,
        },

        { USB_DEVICE(0x04d8, 0xf58b),
        .driver_info = IGNORE_DEVICE,
        },
#endif

#if IS_ENABLED(CONFIG_USB_SERIAL_XR)
        { USB_DEVICE(0x04e2, 0x1400), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1401), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1402), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1403), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1410), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1411), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1412), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1414), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1420), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1422), .driver_info = IGNORE_DEVICE },
        { USB_DEVICE(0x04e2, 0x1424), .driver_info = IGNORE_DEVICE },
#endif

        /*Samsung phone in firmware update mode */
        { USB_DEVICE(0x04e8, 0x685d),
        .driver_info = IGNORE_DEVICE,
        },

        /* Exclude Infineon Flash Loader utility */
        { USB_DEVICE(0x058b, 0x0041),
        .driver_info = IGNORE_DEVICE,
        },

        /* Exclude ETAS ES58x */
        { USB_DEVICE(0x108c, 0x0159), /* ES581.4 */
        .driver_info = IGNORE_DEVICE,
        },
        { USB_DEVICE(0x108c, 0x0168), /* ES582.1 */
        .driver_info = IGNORE_DEVICE,
        },
        { USB_DEVICE(0x108c, 0x0169), /* ES584.1 */
        .driver_info = IGNORE_DEVICE,
        },

        { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */
        .driver_info = SEND_ZERO_PACKET,
        },
        { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */
        .driver_info = SEND_ZERO_PACKET,
        },

        /* Exclude Goodix Fingerprint Reader */
        { USB_DEVICE(0x27c6, 0x5395),
        .driver_info = IGNORE_DEVICE,
        },

        /* Exclude Heimann Sensor GmbH USB appset demo */
        { USB_DEVICE(0x32a7, 0x0000),
        .driver_info = IGNORE_DEVICE,
        },

        /* control interfaces without any protocol set */
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_PROTO_NONE) },

        /* control interfaces with various AT-command sets */
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_V25TER) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_PCCA101) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_PCCA101_WAKE) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_GSM) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_3G) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },

        { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */
        .driver_info = SEND_ZERO_PACKET,
        },

        { }
};

MODULE_DEVICE_TABLE(usb, acm_ids);

static struct usb_driver acm_driver = {
        .name =                "cdc_acm",
        .probe =        acm_probe,
        .disconnect =        acm_disconnect,
#ifdef CONFIG_PM
        .suspend =        acm_suspend,
        .resume =        acm_resume,
        .reset_resume =        acm_reset_resume,
#endif
        .pre_reset =        acm_pre_reset,
        .id_table =        acm_ids,
#ifdef CONFIG_PM
        .supports_autosuspend = 1,
#endif
        .disable_hub_initiated_lpm = 1,
};

/*
 * TTY driver structures.
 */

static const struct tty_operations acm_ops = {
        .install =                acm_tty_install,
        .open =                        acm_tty_open,
        .close =                acm_tty_close,
        .cleanup =                acm_tty_cleanup,
        .hangup =                acm_tty_hangup,
        .write =                acm_tty_write,
        .write_room =                acm_tty_write_room,
        .flush_buffer =                acm_tty_flush_buffer,
        .ioctl =                acm_tty_ioctl,
        .throttle =                acm_tty_throttle,
        .unthrottle =                acm_tty_unthrottle,
        .chars_in_buffer =        acm_tty_chars_in_buffer,
        .break_ctl =                acm_tty_break_ctl,
        .set_termios =                acm_tty_set_termios,
        .tiocmget =                acm_tty_tiocmget,
        .tiocmset =                acm_tty_tiocmset,
        .get_serial =                get_serial_info,
        .set_serial =                set_serial_info,
        .get_icount =                acm_tty_get_icount,
};

/*
 * Init / exit.
 */

static int __init acm_init(void)
{
        int retval;
        acm_tty_driver = tty_alloc_driver(ACM_TTY_MINORS, TTY_DRIVER_REAL_RAW |
                        TTY_DRIVER_DYNAMIC_DEV);
        if (IS_ERR(acm_tty_driver))
                return PTR_ERR(acm_tty_driver);
        acm_tty_driver->driver_name = "acm",
        acm_tty_driver->name = "ttyACM",
        acm_tty_driver->major = ACM_TTY_MAJOR,
        acm_tty_driver->minor_start = 0,
        acm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL,
        acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
        acm_tty_driver->init_termios = tty_std_termios;
        acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
                                                                HUPCL | CLOCAL;
        tty_set_operations(acm_tty_driver, &acm_ops);

        retval = tty_register_driver(acm_tty_driver);
        if (retval) {
                tty_driver_kref_put(acm_tty_driver);
                return retval;
        }

        retval = usb_register(&acm_driver);
        if (retval) {
                tty_unregister_driver(acm_tty_driver);
                tty_driver_kref_put(acm_tty_driver);
                return retval;
        }

        printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_DESC "\n");

        return 0;
}

static void __exit acm_exit(void)
{
        usb_deregister(&acm_driver);
        tty_unregister_driver(acm_tty_driver);
        tty_driver_kref_put(acm_tty_driver);
        idr_destroy(&acm_minors);
}

module_init(acm_init);
module_exit(acm_exit);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
































































































    9 

    9 








    9 










    9 












    2 









































    9 


    9 


    9 




    9 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 


    9 









































































































































































































































































































    2 
    2 
    2 



























    9 
    9 
    9 





















    2 







    2 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Media entity
 *
 * Copyright (C) 2010 Nokia Corporation
 *
 * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 *             Sakari Ailus <sakari.ailus@iki.fi>
 */

#include <linux/bitmap.h>
#include <linux/list.h>
#include <linux/property.h>
#include <linux/slab.h>
#include <media/media-entity.h>
#include <media/media-device.h>

static inline const char *intf_type(struct media_interface *intf)
{
        switch (intf->type) {
        case MEDIA_INTF_T_DVB_FE:
                return "dvb-frontend";
        case MEDIA_INTF_T_DVB_DEMUX:
                return "dvb-demux";
        case MEDIA_INTF_T_DVB_DVR:
                return "dvb-dvr";
        case MEDIA_INTF_T_DVB_CA:
                return  "dvb-ca";
        case MEDIA_INTF_T_DVB_NET:
                return "dvb-net";
        case MEDIA_INTF_T_V4L_VIDEO:
                return "v4l-video";
        case MEDIA_INTF_T_V4L_VBI:
                return "v4l-vbi";
        case MEDIA_INTF_T_V4L_RADIO:
                return "v4l-radio";
        case MEDIA_INTF_T_V4L_SUBDEV:
                return "v4l-subdev";
        case MEDIA_INTF_T_V4L_SWRADIO:
                return "v4l-swradio";
        case MEDIA_INTF_T_V4L_TOUCH:
                return "v4l-touch";
        default:
                return "unknown-intf";
        }
};

static inline const char *link_type_name(struct media_link *link)
{
        switch (link->flags & MEDIA_LNK_FL_LINK_TYPE) {
        case MEDIA_LNK_FL_DATA_LINK:
                return "data";
        case MEDIA_LNK_FL_INTERFACE_LINK:
                return "interface";
        case MEDIA_LNK_FL_ANCILLARY_LINK:
                return "ancillary";
        default:
                return "unknown";
        }
}

__must_check int media_entity_enum_init(struct media_entity_enum *ent_enum,
                                        struct media_device *mdev)
{
        int idx_max;

        idx_max = ALIGN(mdev->entity_internal_idx_max + 1, BITS_PER_LONG);
        ent_enum->bmap = bitmap_zalloc(idx_max, GFP_KERNEL);
        if (!ent_enum->bmap)
                return -ENOMEM;

        ent_enum->idx_max = idx_max;

        return 0;
}
EXPORT_SYMBOL_GPL(media_entity_enum_init);

void media_entity_enum_cleanup(struct media_entity_enum *ent_enum)
{
        bitmap_free(ent_enum->bmap);
}
EXPORT_SYMBOL_GPL(media_entity_enum_cleanup);

/**
 *  dev_dbg_obj - Prints in debug mode a change on some object
 *
 * @event_name:        Name of the event to report. Could be __func__
 * @gobj:        Pointer to the object
 *
 * Enabled only if DEBUG or CONFIG_DYNAMIC_DEBUG. Otherwise, it
 * won't produce any code.
 */
static void dev_dbg_obj(const char *event_name,  struct media_gobj *gobj)
{
#if defined(DEBUG) || defined (CONFIG_DYNAMIC_DEBUG)
        switch (media_type(gobj)) {
        case MEDIA_GRAPH_ENTITY:
                dev_dbg(gobj->mdev->dev,
                        "%s id %u: entity '%s'\n",
                        event_name, media_id(gobj),
                        gobj_to_entity(gobj)->name);
                break;
        case MEDIA_GRAPH_LINK:
        {
                struct media_link *link = gobj_to_link(gobj);

                dev_dbg(gobj->mdev->dev,
                        "%s id %u: %s link id %u ==> id %u\n",
                        event_name, media_id(gobj), link_type_name(link),
                        media_id(link->gobj0),
                        media_id(link->gobj1));
                break;
        }
        case MEDIA_GRAPH_PAD:
        {
                struct media_pad *pad = gobj_to_pad(gobj);

                dev_dbg(gobj->mdev->dev,
                        "%s id %u: %s%spad '%s':%d\n",
                        event_name, media_id(gobj),
                        pad->flags & MEDIA_PAD_FL_SINK   ? "sink " : "",
                        pad->flags & MEDIA_PAD_FL_SOURCE ? "source " : "",
                        pad->entity->name, pad->index);
                break;
        }
        case MEDIA_GRAPH_INTF_DEVNODE:
        {
                struct media_interface *intf = gobj_to_intf(gobj);
                struct media_intf_devnode *devnode = intf_to_devnode(intf);

                dev_dbg(gobj->mdev->dev,
                        "%s id %u: intf_devnode %s - major: %d, minor: %d\n",
                        event_name, media_id(gobj),
                        intf_type(intf),
                        devnode->major, devnode->minor);
                break;
        }
        }
#endif
}

void media_gobj_create(struct media_device *mdev,
                           enum media_gobj_type type,
                           struct media_gobj *gobj)
{
        BUG_ON(!mdev);

        gobj->mdev = mdev;

        /* Create a per-type unique object ID */
        gobj->id = media_gobj_gen_id(type, ++mdev->id);

        switch (type) {
        case MEDIA_GRAPH_ENTITY:
                list_add_tail(&gobj->list, &mdev->entities);
                break;
        case MEDIA_GRAPH_PAD:
                list_add_tail(&gobj->list, &mdev->pads);
                break;
        case MEDIA_GRAPH_LINK:
                list_add_tail(&gobj->list, &mdev->links);
                break;
        case MEDIA_GRAPH_INTF_DEVNODE:
                list_add_tail(&gobj->list, &mdev->interfaces);
                break;
        }

        mdev->topology_version++;

        dev_dbg_obj(__func__, gobj);
}

void media_gobj_destroy(struct media_gobj *gobj)
{
        /* Do nothing if the object is not linked. */
        if (gobj->mdev == NULL)
                return;

        dev_dbg_obj(__func__, gobj);

        gobj->mdev->topology_version++;

        /* Remove the object from mdev list */
        list_del(&gobj->list);

        gobj->mdev = NULL;
}

/*
 * TODO: Get rid of this.
 */
#define MEDIA_ENTITY_MAX_PADS                512

int media_entity_pads_init(struct media_entity *entity, u16 num_pads,
                           struct media_pad *pads)
{
        struct media_device *mdev = entity->graph_obj.mdev;
        struct media_pad *iter;
        unsigned int i = 0;
        int ret = 0;

        if (num_pads >= MEDIA_ENTITY_MAX_PADS)
                return -E2BIG;

        entity->num_pads = num_pads;
        entity->pads = pads;

        if (mdev)
                mutex_lock(&mdev->graph_mutex);

        media_entity_for_each_pad(entity, iter) {
                iter->entity = entity;
                iter->index = i++;

                if (hweight32(iter->flags & (MEDIA_PAD_FL_SINK |
                                             MEDIA_PAD_FL_SOURCE)) != 1) {
                        ret = -EINVAL;
                        break;
                }

                if (mdev)
                        media_gobj_create(mdev, MEDIA_GRAPH_PAD,
                                          &iter->graph_obj);
        }

        if (ret && mdev) {
                media_entity_for_each_pad(entity, iter)
                        media_gobj_destroy(&iter->graph_obj);
        }

        if (mdev)
                mutex_unlock(&mdev->graph_mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(media_entity_pads_init);

/* -----------------------------------------------------------------------------
 * Graph traversal
 */

/**
 * media_entity_has_pad_interdep - Check interdependency between two pads
 *
 * @entity: The entity
 * @pad0: The first pad index
 * @pad1: The second pad index
 *
 * This function checks the interdependency inside the entity between @pad0
 * and @pad1. If two pads are interdependent they are part of the same pipeline
 * and enabling one of the pads means that the other pad will become "locked"
 * and doesn't allow configuration changes.
 *
 * This function uses the &media_entity_operations.has_pad_interdep() operation
 * to check the dependency inside the entity between @pad0 and @pad1. If the
 * has_pad_interdep operation is not implemented, all pads of the entity are
 * considered to be interdependent.
 *
 * One of @pad0 and @pad1 must be a sink pad and the other one a source pad.
 * The function returns false if both pads are sinks or sources.
 *
 * The caller must hold entity->graph_obj.mdev->mutex.
 *
 * Return: true if the pads are connected internally and false otherwise.
 */
static bool media_entity_has_pad_interdep(struct media_entity *entity,
                                          unsigned int pad0, unsigned int pad1)
{
        if (pad0 >= entity->num_pads || pad1 >= entity->num_pads)
                return false;

        if (entity->pads[pad0].flags & entity->pads[pad1].flags &
            (MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_SOURCE))
                return false;

        if (!entity->ops || !entity->ops->has_pad_interdep)
                return true;

        return entity->ops->has_pad_interdep(entity, pad0, pad1);
}

static struct media_entity *
media_entity_other(struct media_entity *entity, struct media_link *link)
{
        if (link->source->entity == entity)
                return link->sink->entity;
        else
                return link->source->entity;
}

/* push an entity to traversal stack */
static void stack_push(struct media_graph *graph,
                       struct media_entity *entity)
{
        if (graph->top == MEDIA_ENTITY_ENUM_MAX_DEPTH - 1) {
                WARN_ON(1);
                return;
        }
        graph->top++;
        graph->stack[graph->top].link = entity->links.next;
        graph->stack[graph->top].entity = entity;
}

static struct media_entity *stack_pop(struct media_graph *graph)
{
        struct media_entity *entity;

        entity = graph->stack[graph->top].entity;
        graph->top--;

        return entity;
}

#define link_top(en)        ((en)->stack[(en)->top].link)
#define stack_top(en)        ((en)->stack[(en)->top].entity)

/**
 * media_graph_walk_init - Allocate resources for graph walk
 * @graph: Media graph structure that will be used to walk the graph
 * @mdev: Media device
 *
 * Reserve resources for graph walk in media device's current
 * state. The memory must be released using
 * media_graph_walk_cleanup().
 *
 * Returns error on failure, zero on success.
 */
__must_check int media_graph_walk_init(
        struct media_graph *graph, struct media_device *mdev)
{
        return media_entity_enum_init(&graph->ent_enum, mdev);
}
EXPORT_SYMBOL_GPL(media_graph_walk_init);

/**
 * media_graph_walk_cleanup - Release resources related to graph walking
 * @graph: Media graph structure that was used to walk the graph
 */
void media_graph_walk_cleanup(struct media_graph *graph)
{
        media_entity_enum_cleanup(&graph->ent_enum);
}
EXPORT_SYMBOL_GPL(media_graph_walk_cleanup);

void media_graph_walk_start(struct media_graph *graph,
                            struct media_entity *entity)
{
        media_entity_enum_zero(&graph->ent_enum);
        media_entity_enum_set(&graph->ent_enum, entity);

        graph->top = 0;
        graph->stack[graph->top].entity = NULL;
        stack_push(graph, entity);
        dev_dbg(entity->graph_obj.mdev->dev,
                "begin graph walk at '%s'\n", entity->name);
}
EXPORT_SYMBOL_GPL(media_graph_walk_start);

static void media_graph_walk_iter(struct media_graph *graph)
{
        struct media_entity *entity = stack_top(graph);
        struct media_link *link;
        struct media_entity *next;

        link = list_entry(link_top(graph), typeof(*link), list);

        /* If the link is not a data link, don't follow it */
        if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) != MEDIA_LNK_FL_DATA_LINK) {
                link_top(graph) = link_top(graph)->next;
                return;
        }

        /* The link is not enabled so we do not follow. */
        if (!(link->flags & MEDIA_LNK_FL_ENABLED)) {
                link_top(graph) = link_top(graph)->next;
                dev_dbg(entity->graph_obj.mdev->dev,
                        "walk: skipping disabled link '%s':%u -> '%s':%u\n",
                        link->source->entity->name, link->source->index,
                        link->sink->entity->name, link->sink->index);
                return;
        }

        /* Get the entity at the other end of the link. */
        next = media_entity_other(entity, link);

        /* Has the entity already been visited? */
        if (media_entity_enum_test_and_set(&graph->ent_enum, next)) {
                link_top(graph) = link_top(graph)->next;
                dev_dbg(entity->graph_obj.mdev->dev,
                        "walk: skipping entity '%s' (already seen)\n",
                        next->name);
                return;
        }

        /* Push the new entity to stack and start over. */
        link_top(graph) = link_top(graph)->next;
        stack_push(graph, next);
        dev_dbg(entity->graph_obj.mdev->dev, "walk: pushing '%s' on stack\n",
                next->name);
        lockdep_assert_held(&entity->graph_obj.mdev->graph_mutex);
}

struct media_entity *media_graph_walk_next(struct media_graph *graph)
{
        struct media_entity *entity;

        if (stack_top(graph) == NULL)
                return NULL;

        /*
         * Depth first search. Push entity to stack and continue from
         * top of the stack until no more entities on the level can be
         * found.
         */
        while (link_top(graph) != &stack_top(graph)->links)
                media_graph_walk_iter(graph);

        entity = stack_pop(graph);
        dev_dbg(entity->graph_obj.mdev->dev,
                "walk: returning entity '%s'\n", entity->name);

        return entity;
}
EXPORT_SYMBOL_GPL(media_graph_walk_next);

/* -----------------------------------------------------------------------------
 * Pipeline management
 */

/*
 * The pipeline traversal stack stores pads that are reached during graph
 * traversal, with a list of links to be visited to continue the traversal.
 * When a new pad is reached, an entry is pushed on the top of the stack and
 * points to the incoming pad and the first link of the entity.
 *
 * To find further pads in the pipeline, the traversal algorithm follows
 * internal pad dependencies in the entity, and then links in the graph. It
 * does so by iterating over all links of the entity, and following enabled
 * links that originate from a pad that is internally connected to the incoming
 * pad, as reported by the media_entity_has_pad_interdep() function.
 */

/**
 * struct media_pipeline_walk_entry - Entry in the pipeline traversal stack
 *
 * @pad: The media pad being visited
 * @links: Links left to be visited
 */
struct media_pipeline_walk_entry {
        struct media_pad *pad;
        struct list_head *links;
};

/**
 * struct media_pipeline_walk - State used by the media pipeline traversal
 *                                algorithm
 *
 * @mdev: The media device
 * @stack: Depth-first search stack
 * @stack.size: Number of allocated entries in @stack.entries
 * @stack.top: Index of the top stack entry (-1 if the stack is empty)
 * @stack.entries: Stack entries
 */
struct media_pipeline_walk {
        struct media_device *mdev;

        struct {
                unsigned int size;
                int top;
                struct media_pipeline_walk_entry *entries;
        } stack;
};

#define MEDIA_PIPELINE_STACK_GROW_STEP                16

static struct media_pipeline_walk_entry *
media_pipeline_walk_top(struct media_pipeline_walk *walk)
{
        return &walk->stack.entries[walk->stack.top];
}

static bool media_pipeline_walk_empty(struct media_pipeline_walk *walk)
{
        return walk->stack.top == -1;
}

/* Increase the stack size by MEDIA_PIPELINE_STACK_GROW_STEP elements. */
static int media_pipeline_walk_resize(struct media_pipeline_walk *walk)
{
        struct media_pipeline_walk_entry *entries;
        unsigned int new_size;

        /* Safety check, to avoid stack overflows in case of bugs. */
        if (walk->stack.size >= 256)
                return -E2BIG;

        new_size = walk->stack.size + MEDIA_PIPELINE_STACK_GROW_STEP;

        entries = krealloc(walk->stack.entries,
                           new_size * sizeof(*walk->stack.entries),
                           GFP_KERNEL);
        if (!entries)
                return -ENOMEM;

        walk->stack.entries = entries;
        walk->stack.size = new_size;

        return 0;
}

/* Push a new entry on the stack. */
static int media_pipeline_walk_push(struct media_pipeline_walk *walk,
                                    struct media_pad *pad)
{
        struct media_pipeline_walk_entry *entry;
        int ret;

        if (walk->stack.top + 1 >= walk->stack.size) {
                ret = media_pipeline_walk_resize(walk);
                if (ret)
                        return ret;
        }

        walk->stack.top++;
        entry = media_pipeline_walk_top(walk);
        entry->pad = pad;
        entry->links = pad->entity->links.next;

        dev_dbg(walk->mdev->dev,
                "media pipeline: pushed entry %u: '%s':%u\n",
                walk->stack.top, pad->entity->name, pad->index);

        return 0;
}

/*
 * Move the top entry link cursor to the next link. If all links of the entry
 * have been visited, pop the entry itself. Return true if the entry has been
 * popped.
 */
static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk)
{
        struct media_pipeline_walk_entry *entry;

        if (WARN_ON(walk->stack.top < 0))
                return false;

        entry = media_pipeline_walk_top(walk);

        if (entry->links->next == &entry->pad->entity->links) {
                dev_dbg(walk->mdev->dev,
                        "media pipeline: entry %u has no more links, popping\n",
                        walk->stack.top);

                walk->stack.top--;
                return true;
        }

        entry->links = entry->links->next;

        dev_dbg(walk->mdev->dev,
                "media pipeline: moved entry %u to next link\n",
                walk->stack.top);

        return false;
}

/* Free all memory allocated while walking the pipeline. */
static void media_pipeline_walk_destroy(struct media_pipeline_walk *walk)
{
        kfree(walk->stack.entries);
}

/* Add a pad to the pipeline and push it to the stack. */
static int media_pipeline_add_pad(struct media_pipeline *pipe,
                                  struct media_pipeline_walk *walk,
                                  struct media_pad *pad)
{
        struct media_pipeline_pad *ppad;

        list_for_each_entry(ppad, &pipe->pads, list) {
                if (ppad->pad == pad) {
                        dev_dbg(pad->graph_obj.mdev->dev,
                                "media pipeline: already contains pad '%s':%u\n",
                                pad->entity->name, pad->index);
                        return 0;
                }
        }

        ppad = kzalloc(sizeof(*ppad), GFP_KERNEL);
        if (!ppad)
                return -ENOMEM;

        ppad->pipe = pipe;
        ppad->pad = pad;

        list_add_tail(&ppad->list, &pipe->pads);

        dev_dbg(pad->graph_obj.mdev->dev,
                "media pipeline: added pad '%s':%u\n",
                pad->entity->name, pad->index);

        return media_pipeline_walk_push(walk, pad);
}

/* Explore the next link of the entity at the top of the stack. */
static int media_pipeline_explore_next_link(struct media_pipeline *pipe,
                                            struct media_pipeline_walk *walk)
{
        struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk);
        struct media_pad *origin;
        struct media_link *link;
        struct media_pad *local;
        struct media_pad *remote;
        bool last_link;
        int ret;

        origin = entry->pad;
        link = list_entry(entry->links, typeof(*link), list);
        last_link = media_pipeline_walk_pop(walk);

        dev_dbg(walk->mdev->dev,
                "media pipeline: exploring link '%s':%u -> '%s':%u\n",
                link->source->entity->name, link->source->index,
                link->sink->entity->name, link->sink->index);

        /* Get the local pad and remote pad. */
        if (link->source->entity == origin->entity) {
                local = link->source;
                remote = link->sink;
        } else {
                local = link->sink;
                remote = link->source;
        }

        /*
         * Skip links that originate from a different pad than the incoming pad
         * that is not connected internally in the entity to the incoming pad.
         */
        if (origin != local &&
            !media_entity_has_pad_interdep(origin->entity, origin->index,
                                           local->index)) {
                dev_dbg(walk->mdev->dev,
                        "media pipeline: skipping link (no route)\n");
                goto done;
        }

        /*
         * Add the local pad of the link to the pipeline and push it to the
         * stack, if not already present.
         */
        ret = media_pipeline_add_pad(pipe, walk, local);
        if (ret)
                return ret;

        /* Similarly, add the remote pad, but only if the link is enabled. */
        if (!(link->flags & MEDIA_LNK_FL_ENABLED)) {
                dev_dbg(walk->mdev->dev,
                        "media pipeline: skipping link (disabled)\n");
                goto done;
        }

        ret = media_pipeline_add_pad(pipe, walk, remote);
        if (ret)
                return ret;

done:
        /*
         * If we're done iterating over links, iterate over pads of the entity.
         * This is necessary to discover pads that are not connected with any
         * link. Those are dead ends from a pipeline exploration point of view,
         * but are still part of the pipeline and need to be added to enable
         * proper validation.
         */
        if (!last_link)
                return 0;

        dev_dbg(walk->mdev->dev,
                "media pipeline: adding unconnected pads of '%s'\n",
                local->entity->name);

        media_entity_for_each_pad(origin->entity, local) {
                /*
                 * Skip the origin pad (already handled), pad that have links
                 * (already discovered through iterating over links) and pads
                 * not internally connected.
                 */
                if (origin == local || !local->num_links ||
                    !media_entity_has_pad_interdep(origin->entity, origin->index,
                                                   local->index))
                        continue;

                ret = media_pipeline_add_pad(pipe, walk, local);
                if (ret)
                        return ret;
        }

        return 0;
}

static void media_pipeline_cleanup(struct media_pipeline *pipe)
{
        while (!list_empty(&pipe->pads)) {
                struct media_pipeline_pad *ppad;

                ppad = list_first_entry(&pipe->pads, typeof(*ppad), list);
                list_del(&ppad->list);
                kfree(ppad);
        }
}

static int media_pipeline_populate(struct media_pipeline *pipe,
                                   struct media_pad *pad)
{
        struct media_pipeline_walk walk = { };
        struct media_pipeline_pad *ppad;
        int ret;

        /*
         * Populate the media pipeline by walking the media graph, starting
         * from @pad.
         */
        INIT_LIST_HEAD(&pipe->pads);
        pipe->mdev = pad->graph_obj.mdev;

        walk.mdev = pipe->mdev;
        walk.stack.top = -1;
        ret = media_pipeline_add_pad(pipe, &walk, pad);
        if (ret)
                goto done;

        /*
         * Use a depth-first search algorithm: as long as the stack is not
         * empty, explore the next link of the top entry. The
         * media_pipeline_explore_next_link() function will either move to the
         * next link, pop the entry if fully visited, or add new entries on
         * top.
         */
        while (!media_pipeline_walk_empty(&walk)) {
                ret = media_pipeline_explore_next_link(pipe, &walk);
                if (ret)
                        goto done;
        }

        dev_dbg(pad->graph_obj.mdev->dev,
                "media pipeline populated, found pads:\n");

        list_for_each_entry(ppad, &pipe->pads, list)
                dev_dbg(pad->graph_obj.mdev->dev, "- '%s':%u\n",
                        ppad->pad->entity->name, ppad->pad->index);

        WARN_ON(walk.stack.top != -1);

        ret = 0;

done:
        media_pipeline_walk_destroy(&walk);

        if (ret)
                media_pipeline_cleanup(pipe);

        return ret;
}

__must_check int __media_pipeline_start(struct media_pad *pad,
                                        struct media_pipeline *pipe)
{
        struct media_device *mdev = pad->graph_obj.mdev;
        struct media_pipeline_pad *err_ppad;
        struct media_pipeline_pad *ppad;
        int ret;

        lockdep_assert_held(&mdev->graph_mutex);

        /*
         * If the pad is already part of a pipeline, that pipeline must be the
         * same as the pipe given to media_pipeline_start().
         */
        if (WARN_ON(pad->pipe && pad->pipe != pipe))
                return -EINVAL;

        /*
         * If the pipeline has already been started, it is guaranteed to be
         * valid, so just increase the start count.
         */
        if (pipe->start_count) {
                pipe->start_count++;
                return 0;
        }

        /*
         * Populate the pipeline. This populates the media_pipeline pads list
         * with media_pipeline_pad instances for each pad found during graph
         * walk.
         */
        ret = media_pipeline_populate(pipe, pad);
        if (ret)
                return ret;

        /*
         * Now that all the pads in the pipeline have been gathered, perform
         * the validation steps.
         */

        list_for_each_entry(ppad, &pipe->pads, list) {
                struct media_pad *pad = ppad->pad;
                struct media_entity *entity = pad->entity;
                bool has_enabled_link = false;
                struct media_link *link;

                dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name,
                        pad->index);

                /*
                 * 1. Ensure that the pad doesn't already belong to a different
                 * pipeline.
                 */
                if (pad->pipe) {
                        dev_dbg(mdev->dev, "Failed to start pipeline: pad '%s':%u busy\n",
                                pad->entity->name, pad->index);
                        ret = -EBUSY;
                        goto error;
                }

                /*
                 * 2. Validate all active links whose sink is the current pad.
                 * Validation of the source pads is performed in the context of
                 * the connected sink pad to avoid duplicating checks.
                 */
                for_each_media_entity_data_link(entity, link) {
                        /* Skip links unrelated to the current pad. */
                        if (link->sink != pad && link->source != pad)
                                continue;

                        /* Record if the pad has links and enabled links. */
                        if (link->flags & MEDIA_LNK_FL_ENABLED)
                                has_enabled_link = true;

                        /*
                         * Validate the link if it's enabled and has the
                         * current pad as its sink.
                         */
                        if (!(link->flags & MEDIA_LNK_FL_ENABLED))
                                continue;

                        if (link->sink != pad)
                                continue;

                        if (!entity->ops || !entity->ops->link_validate)
                                continue;

                        ret = entity->ops->link_validate(link);
                        if (ret) {
                                dev_dbg(mdev->dev,
                                        "Link '%s':%u -> '%s':%u failed validation: %d\n",
                                        link->source->entity->name,
                                        link->source->index,
                                        link->sink->entity->name,
                                        link->sink->index, ret);
                                goto error;
                        }

                        dev_dbg(mdev->dev,
                                "Link '%s':%u -> '%s':%u is valid\n",
                                link->source->entity->name,
                                link->source->index,
                                link->sink->entity->name,
                                link->sink->index);
                }

                /*
                 * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set,
                 * ensure that it has either no link or an enabled link.
                 */
                if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) &&
                    !has_enabled_link) {
                        dev_dbg(mdev->dev,
                                "Pad '%s':%u must be connected by an enabled link\n",
                                pad->entity->name, pad->index);
                        ret = -ENOLINK;
                        goto error;
                }

                /* Validation passed, store the pipe pointer in the pad. */
                pad->pipe = pipe;
        }

        pipe->start_count++;

        return 0;

error:
        /*
         * Link validation on graph failed. We revert what we did and
         * return the error.
         */

        list_for_each_entry(err_ppad, &pipe->pads, list) {
                if (err_ppad == ppad)
                        break;

                err_ppad->pad->pipe = NULL;
        }

        media_pipeline_cleanup(pipe);

        return ret;
}
EXPORT_SYMBOL_GPL(__media_pipeline_start);

__must_check int media_pipeline_start(struct media_pad *pad,
                                      struct media_pipeline *pipe)
{
        struct media_device *mdev = pad->graph_obj.mdev;
        int ret;

        mutex_lock(&mdev->graph_mutex);
        ret = __media_pipeline_start(pad, pipe);
        mutex_unlock(&mdev->graph_mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(media_pipeline_start);

void __media_pipeline_stop(struct media_pad *pad)
{
        struct media_pipeline *pipe = pad->pipe;
        struct media_pipeline_pad *ppad;

        /*
         * If the following check fails, the driver has performed an
         * unbalanced call to media_pipeline_stop()
         */
        if (WARN_ON(!pipe))
                return;

        if (--pipe->start_count)
                return;

        list_for_each_entry(ppad, &pipe->pads, list)
                ppad->pad->pipe = NULL;

        media_pipeline_cleanup(pipe);

        if (pipe->allocated)
                kfree(pipe);
}
EXPORT_SYMBOL_GPL(__media_pipeline_stop);

void media_pipeline_stop(struct media_pad *pad)
{
        struct media_device *mdev = pad->graph_obj.mdev;

        mutex_lock(&mdev->graph_mutex);
        __media_pipeline_stop(pad);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_pipeline_stop);

__must_check int media_pipeline_alloc_start(struct media_pad *pad)
{
        struct media_device *mdev = pad->graph_obj.mdev;
        struct media_pipeline *new_pipe = NULL;
        struct media_pipeline *pipe;
        int ret;

        mutex_lock(&mdev->graph_mutex);

        /*
         * Is the pad already part of a pipeline? If not, we need to allocate
         * a pipe.
         */
        pipe = media_pad_pipeline(pad);
        if (!pipe) {
                new_pipe = kzalloc(sizeof(*new_pipe), GFP_KERNEL);
                if (!new_pipe) {
                        ret = -ENOMEM;
                        goto out;
                }

                pipe = new_pipe;
                pipe->allocated = true;
        }

        ret = __media_pipeline_start(pad, pipe);
        if (ret)
                kfree(new_pipe);

out:
        mutex_unlock(&mdev->graph_mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(media_pipeline_alloc_start);

struct media_pad *
__media_pipeline_pad_iter_next(struct media_pipeline *pipe,
                               struct media_pipeline_pad_iter *iter,
                               struct media_pad *pad)
{
        if (!pad)
                iter->cursor = pipe->pads.next;

        if (iter->cursor == &pipe->pads)
                return NULL;

        pad = list_entry(iter->cursor, struct media_pipeline_pad, list)->pad;
        iter->cursor = iter->cursor->next;

        return pad;
}
EXPORT_SYMBOL_GPL(__media_pipeline_pad_iter_next);

int media_pipeline_entity_iter_init(struct media_pipeline *pipe,
                                    struct media_pipeline_entity_iter *iter)
{
        return media_entity_enum_init(&iter->ent_enum, pipe->mdev);
}
EXPORT_SYMBOL_GPL(media_pipeline_entity_iter_init);

void media_pipeline_entity_iter_cleanup(struct media_pipeline_entity_iter *iter)
{
        media_entity_enum_cleanup(&iter->ent_enum);
}
EXPORT_SYMBOL_GPL(media_pipeline_entity_iter_cleanup);

struct media_entity *
__media_pipeline_entity_iter_next(struct media_pipeline *pipe,
                                  struct media_pipeline_entity_iter *iter,
                                  struct media_entity *entity)
{
        if (!entity)
                iter->cursor = pipe->pads.next;

        while (iter->cursor != &pipe->pads) {
                struct media_pipeline_pad *ppad;
                struct media_entity *entity;

                ppad = list_entry(iter->cursor, struct media_pipeline_pad, list);
                entity = ppad->pad->entity;
                iter->cursor = iter->cursor->next;

                if (!media_entity_enum_test_and_set(&iter->ent_enum, entity))
                        return entity;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(__media_pipeline_entity_iter_next);

/* -----------------------------------------------------------------------------
 * Links management
 */

static struct media_link *media_add_link(struct list_head *head)
{
        struct media_link *link;

        link = kzalloc(sizeof(*link), GFP_KERNEL);
        if (link == NULL)
                return NULL;

        list_add_tail(&link->list, head);

        return link;
}

static void __media_entity_remove_link(struct media_entity *entity,
                                       struct media_link *link)
{
        struct media_link *rlink, *tmp;
        struct media_entity *remote;

        /* Remove the reverse links for a data link. */
        if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) {
                link->source->num_links--;
                link->sink->num_links--;

                if (link->source->entity == entity)
                        remote = link->sink->entity;
                else
                        remote = link->source->entity;

                list_for_each_entry_safe(rlink, tmp, &remote->links, list) {
                        if (rlink != link->reverse)
                                continue;

                        if (link->source->entity == entity)
                                remote->num_backlinks--;

                        /* Remove the remote link */
                        list_del(&rlink->list);
                        media_gobj_destroy(&rlink->graph_obj);
                        kfree(rlink);

                        if (--remote->num_links == 0)
                                break;
                }
        }

        list_del(&link->list);
        media_gobj_destroy(&link->graph_obj);
        kfree(link);
}

int media_get_pad_index(struct media_entity *entity, u32 pad_type,
                        enum media_pad_signal_type sig_type)
{
        unsigned int i;

        if (!entity)
                return -EINVAL;

        for (i = 0; i < entity->num_pads; i++) {
                if ((entity->pads[i].flags &
                     (MEDIA_PAD_FL_SINK | MEDIA_PAD_FL_SOURCE)) != pad_type)
                        continue;

                if (entity->pads[i].sig_type == sig_type)
                        return i;
        }
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(media_get_pad_index);

int
media_create_pad_link(struct media_entity *source, u16 source_pad,
                         struct media_entity *sink, u16 sink_pad, u32 flags)
{
        struct media_link *link;
        struct media_link *backlink;

        if (flags & MEDIA_LNK_FL_LINK_TYPE)
                return -EINVAL;

        flags |= MEDIA_LNK_FL_DATA_LINK;

        if (WARN_ON(!source || !sink) ||
            WARN_ON(source_pad >= source->num_pads) ||
            WARN_ON(sink_pad >= sink->num_pads))
                return -EINVAL;
        if (WARN_ON(!(source->pads[source_pad].flags & MEDIA_PAD_FL_SOURCE)))
                return -EINVAL;
        if (WARN_ON(!(sink->pads[sink_pad].flags & MEDIA_PAD_FL_SINK)))
                return -EINVAL;

        link = media_add_link(&source->links);
        if (link == NULL)
                return -ENOMEM;

        link->source = &source->pads[source_pad];
        link->sink = &sink->pads[sink_pad];
        link->flags = flags;

        /* Initialize graph object embedded at the new link */
        media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK,
                        &link->graph_obj);

        /* Create the backlink. Backlinks are used to help graph traversal and
         * are not reported to userspace.
         */
        backlink = media_add_link(&sink->links);
        if (backlink == NULL) {
                __media_entity_remove_link(source, link);
                return -ENOMEM;
        }

        backlink->source = &source->pads[source_pad];
        backlink->sink = &sink->pads[sink_pad];
        backlink->flags = flags;
        backlink->is_backlink = true;

        /* Initialize graph object embedded at the new link */
        media_gobj_create(sink->graph_obj.mdev, MEDIA_GRAPH_LINK,
                        &backlink->graph_obj);

        link->reverse = backlink;
        backlink->reverse = link;

        sink->num_backlinks++;
        sink->num_links++;
        source->num_links++;

        link->source->num_links++;
        link->sink->num_links++;

        return 0;
}
EXPORT_SYMBOL_GPL(media_create_pad_link);

int media_create_pad_links(const struct media_device *mdev,
                           const u32 source_function,
                           struct media_entity *source,
                           const u16 source_pad,
                           const u32 sink_function,
                           struct media_entity *sink,
                           const u16 sink_pad,
                           u32 flags,
                           const bool allow_both_undefined)
{
        struct media_entity *entity;
        unsigned function;
        int ret;

        /* Trivial case: 1:1 relation */
        if (source && sink)
                return media_create_pad_link(source, source_pad,
                                             sink, sink_pad, flags);

        /* Worse case scenario: n:n relation */
        if (!source && !sink) {
                if (!allow_both_undefined)
                        return 0;
                media_device_for_each_entity(source, mdev) {
                        if (source->function != source_function)
                                continue;
                        media_device_for_each_entity(sink, mdev) {
                                if (sink->function != sink_function)
                                        continue;
                                ret = media_create_pad_link(source, source_pad,
                                                            sink, sink_pad,
                                                            flags);
                                if (ret)
                                        return ret;
                                flags &= ~(MEDIA_LNK_FL_ENABLED |
                                           MEDIA_LNK_FL_IMMUTABLE);
                        }
                }
                return 0;
        }

        /* Handle 1:n and n:1 cases */
        if (source)
                function = sink_function;
        else
                function = source_function;

        media_device_for_each_entity(entity, mdev) {
                if (entity->function != function)
                        continue;

                if (source)
                        ret = media_create_pad_link(source, source_pad,
                                                    entity, sink_pad, flags);
                else
                        ret = media_create_pad_link(entity, source_pad,
                                                    sink, sink_pad, flags);
                if (ret)
                        return ret;
                flags &= ~(MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE);
        }
        return 0;
}
EXPORT_SYMBOL_GPL(media_create_pad_links);

void __media_entity_remove_links(struct media_entity *entity)
{
        struct media_link *link, *tmp;

        list_for_each_entry_safe(link, tmp, &entity->links, list)
                __media_entity_remove_link(entity, link);

        entity->num_links = 0;
        entity->num_backlinks = 0;
}
EXPORT_SYMBOL_GPL(__media_entity_remove_links);

void media_entity_remove_links(struct media_entity *entity)
{
        struct media_device *mdev = entity->graph_obj.mdev;

        /* Do nothing if the entity is not registered. */
        if (mdev == NULL)
                return;

        mutex_lock(&mdev->graph_mutex);
        __media_entity_remove_links(entity);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_entity_remove_links);

static int __media_entity_setup_link_notify(struct media_link *link, u32 flags)
{
        int ret;

        /* Notify both entities. */
        ret = media_entity_call(link->source->entity, link_setup,
                                link->source, link->sink, flags);
        if (ret < 0 && ret != -ENOIOCTLCMD)
                return ret;

        ret = media_entity_call(link->sink->entity, link_setup,
                                link->sink, link->source, flags);
        if (ret < 0 && ret != -ENOIOCTLCMD) {
                media_entity_call(link->source->entity, link_setup,
                                  link->source, link->sink, link->flags);
                return ret;
        }

        link->flags = flags;
        link->reverse->flags = link->flags;

        return 0;
}

int __media_entity_setup_link(struct media_link *link, u32 flags)
{
        const u32 mask = MEDIA_LNK_FL_ENABLED;
        struct media_device *mdev;
        struct media_pad *source, *sink;
        int ret = -EBUSY;

        if (link == NULL)
                return -EINVAL;

        /* The non-modifiable link flags must not be modified. */
        if ((link->flags & ~mask) != (flags & ~mask))
                return -EINVAL;

        if (link->flags & MEDIA_LNK_FL_IMMUTABLE)
                return link->flags == flags ? 0 : -EINVAL;

        if (link->flags == flags)
                return 0;

        source = link->source;
        sink = link->sink;

        if (!(link->flags & MEDIA_LNK_FL_DYNAMIC) &&
            (media_pad_is_streaming(source) || media_pad_is_streaming(sink)))
                return -EBUSY;

        mdev = source->graph_obj.mdev;

        if (mdev->ops && mdev->ops->link_notify) {
                ret = mdev->ops->link_notify(link, flags,
                                             MEDIA_DEV_NOTIFY_PRE_LINK_CH);
                if (ret < 0)
                        return ret;
        }

        ret = __media_entity_setup_link_notify(link, flags);

        if (mdev->ops && mdev->ops->link_notify)
                mdev->ops->link_notify(link, flags,
                                       MEDIA_DEV_NOTIFY_POST_LINK_CH);

        return ret;
}
EXPORT_SYMBOL_GPL(__media_entity_setup_link);

int media_entity_setup_link(struct media_link *link, u32 flags)
{
        int ret;

        mutex_lock(&link->graph_obj.mdev->graph_mutex);
        ret = __media_entity_setup_link(link, flags);
        mutex_unlock(&link->graph_obj.mdev->graph_mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(media_entity_setup_link);

struct media_link *
media_entity_find_link(struct media_pad *source, struct media_pad *sink)
{
        struct media_link *link;

        for_each_media_entity_data_link(source->entity, link) {
                if (link->source->entity == source->entity &&
                    link->source->index == source->index &&
                    link->sink->entity == sink->entity &&
                    link->sink->index == sink->index)
                        return link;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(media_entity_find_link);

struct media_pad *media_pad_remote_pad_first(const struct media_pad *pad)
{
        struct media_link *link;

        for_each_media_entity_data_link(pad->entity, link) {
                if (!(link->flags & MEDIA_LNK_FL_ENABLED))
                        continue;

                if (link->source == pad)
                        return link->sink;

                if (link->sink == pad)
                        return link->source;
        }

        return NULL;

}
EXPORT_SYMBOL_GPL(media_pad_remote_pad_first);

struct media_pad *
media_entity_remote_pad_unique(const struct media_entity *entity,
                               unsigned int type)
{
        struct media_pad *pad = NULL;
        struct media_link *link;

        list_for_each_entry(link, &entity->links, list) {
                struct media_pad *local_pad;
                struct media_pad *remote_pad;

                if (((link->flags & MEDIA_LNK_FL_LINK_TYPE) !=
                     MEDIA_LNK_FL_DATA_LINK) ||
                    !(link->flags & MEDIA_LNK_FL_ENABLED))
                        continue;

                if (type == MEDIA_PAD_FL_SOURCE) {
                        local_pad = link->sink;
                        remote_pad = link->source;
                } else {
                        local_pad = link->source;
                        remote_pad = link->sink;
                }

                if (local_pad->entity == entity) {
                        if (pad)
                                return ERR_PTR(-ENOTUNIQ);

                        pad = remote_pad;
                }
        }

        if (!pad)
                return ERR_PTR(-ENOLINK);

        return pad;
}
EXPORT_SYMBOL_GPL(media_entity_remote_pad_unique);

struct media_pad *media_pad_remote_pad_unique(const struct media_pad *pad)
{
        struct media_pad *found_pad = NULL;
        struct media_link *link;

        list_for_each_entry(link, &pad->entity->links, list) {
                struct media_pad *remote_pad;

                if (!(link->flags & MEDIA_LNK_FL_ENABLED))
                        continue;

                if (link->sink == pad)
                        remote_pad = link->source;
                else if (link->source == pad)
                        remote_pad = link->sink;
                else
                        continue;

                if (found_pad)
                        return ERR_PTR(-ENOTUNIQ);

                found_pad = remote_pad;
        }

        if (!found_pad)
                return ERR_PTR(-ENOLINK);

        return found_pad;
}
EXPORT_SYMBOL_GPL(media_pad_remote_pad_unique);

int media_entity_get_fwnode_pad(struct media_entity *entity,
                                const struct fwnode_handle *fwnode,
                                unsigned long direction_flags)
{
        struct fwnode_endpoint endpoint;
        unsigned int i;
        int ret;

        if (!entity->ops || !entity->ops->get_fwnode_pad) {
                for (i = 0; i < entity->num_pads; i++) {
                        if (entity->pads[i].flags & direction_flags)
                                return i;
                }

                return -ENXIO;
        }

        ret = fwnode_graph_parse_endpoint(fwnode, &endpoint);
        if (ret)
                return ret;

        ret = entity->ops->get_fwnode_pad(entity, &endpoint);
        if (ret < 0)
                return ret;

        if (ret >= entity->num_pads)
                return -ENXIO;

        if (!(entity->pads[ret].flags & direction_flags))
                return -ENXIO;

        return ret;
}
EXPORT_SYMBOL_GPL(media_entity_get_fwnode_pad);

struct media_pipeline *media_entity_pipeline(struct media_entity *entity)
{
        struct media_pad *pad;

        media_entity_for_each_pad(entity, pad) {
                if (pad->pipe)
                        return pad->pipe;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(media_entity_pipeline);

struct media_pipeline *media_pad_pipeline(struct media_pad *pad)
{
        return pad->pipe;
}
EXPORT_SYMBOL_GPL(media_pad_pipeline);

static void media_interface_init(struct media_device *mdev,
                                 struct media_interface *intf,
                                 u32 gobj_type,
                                 u32 intf_type, u32 flags)
{
        intf->type = intf_type;
        intf->flags = flags;
        INIT_LIST_HEAD(&intf->links);

        media_gobj_create(mdev, gobj_type, &intf->graph_obj);
}

/* Functions related to the media interface via device nodes */

struct media_intf_devnode *media_devnode_create(struct media_device *mdev,
                                                u32 type, u32 flags,
                                                u32 major, u32 minor)
{
        struct media_intf_devnode *devnode;

        devnode = kzalloc(sizeof(*devnode), GFP_KERNEL);
        if (!devnode)
                return NULL;

        devnode->major = major;
        devnode->minor = minor;

        media_interface_init(mdev, &devnode->intf, MEDIA_GRAPH_INTF_DEVNODE,
                             type, flags);

        return devnode;
}
EXPORT_SYMBOL_GPL(media_devnode_create);

void media_devnode_remove(struct media_intf_devnode *devnode)
{
        media_remove_intf_links(&devnode->intf);
        media_gobj_destroy(&devnode->intf.graph_obj);
        kfree(devnode);
}
EXPORT_SYMBOL_GPL(media_devnode_remove);

struct media_link *media_create_intf_link(struct media_entity *entity,
                                            struct media_interface *intf,
                                            u32 flags)
{
        struct media_link *link;

        link = media_add_link(&intf->links);
        if (link == NULL)
                return NULL;

        link->intf = intf;
        link->entity = entity;
        link->flags = flags | MEDIA_LNK_FL_INTERFACE_LINK;

        /* Initialize graph object embedded at the new link */
        media_gobj_create(intf->graph_obj.mdev, MEDIA_GRAPH_LINK,
                        &link->graph_obj);

        return link;
}
EXPORT_SYMBOL_GPL(media_create_intf_link);

void __media_remove_intf_link(struct media_link *link)
{
        list_del(&link->list);
        media_gobj_destroy(&link->graph_obj);
        kfree(link);
}
EXPORT_SYMBOL_GPL(__media_remove_intf_link);

void media_remove_intf_link(struct media_link *link)
{
        struct media_device *mdev = link->graph_obj.mdev;

        /* Do nothing if the intf is not registered. */
        if (mdev == NULL)
                return;

        mutex_lock(&mdev->graph_mutex);
        __media_remove_intf_link(link);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_remove_intf_link);

void __media_remove_intf_links(struct media_interface *intf)
{
        struct media_link *link, *tmp;

        list_for_each_entry_safe(link, tmp, &intf->links, list)
                __media_remove_intf_link(link);

}
EXPORT_SYMBOL_GPL(__media_remove_intf_links);

void media_remove_intf_links(struct media_interface *intf)
{
        struct media_device *mdev = intf->graph_obj.mdev;

        /* Do nothing if the intf is not registered. */
        if (mdev == NULL)
                return;

        mutex_lock(&mdev->graph_mutex);
        __media_remove_intf_links(intf);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_remove_intf_links);

struct media_link *media_create_ancillary_link(struct media_entity *primary,
                                               struct media_entity *ancillary)
{
        struct media_link *link;

        link = media_add_link(&primary->links);
        if (!link)
                return ERR_PTR(-ENOMEM);

        link->gobj0 = &primary->graph_obj;
        link->gobj1 = &ancillary->graph_obj;
        link->flags = MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED |
                      MEDIA_LNK_FL_ANCILLARY_LINK;

        /* Initialize graph object embedded in the new link */
        media_gobj_create(primary->graph_obj.mdev, MEDIA_GRAPH_LINK,
                          &link->graph_obj);

        return link;
}
EXPORT_SYMBOL_GPL(media_create_ancillary_link);

struct media_link *__media_entity_next_link(struct media_entity *entity,
                                            struct media_link *link,
                                            unsigned long link_type)
{
        link = link ? list_next_entry(link, list)
                    : list_first_entry(&entity->links, typeof(*link), list);

        list_for_each_entry_from(link, &entity->links, list)
                if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == link_type)
                        return link;

        return NULL;
}
EXPORT_SYMBOL_GPL(__media_entity_next_link);



























  160 





















  160 













   69 












  155 
































  155 

















  132 

  130 



















   74 

   73 








   73 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * AppArmor security module
 *
 * This file contains AppArmor contexts used to associate "labels" to objects.
 *
 * Copyright (C) 1998-2008 Novell/SUSE
 * Copyright 2009-2010 Canonical Ltd.
 */

#ifndef __AA_CONTEXT_H
#define __AA_CONTEXT_H

#include <linux/cred.h>
#include <linux/slab.h>
#include <linux/sched.h>

#include "label.h"
#include "policy_ns.h"
#include "task.h"

static inline struct aa_label *cred_label(const struct cred *cred)
{
        struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred;

        AA_BUG(!blob);
        return *blob;
}

static inline void set_cred_label(const struct cred *cred,
                                  struct aa_label *label)
{
        struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred;

        AA_BUG(!blob);
        *blob = label;
}

/**
 * aa_cred_raw_label - obtain cred's label
 * @cred: cred to obtain label from  (NOT NULL)
 *
 * Returns: confining label
 *
 * does NOT increment reference count
 */
static inline struct aa_label *aa_cred_raw_label(const struct cred *cred)
{
        struct aa_label *label = cred_label(cred);

        AA_BUG(!label);
        return label;
}

/**
 * aa_get_newest_cred_label - obtain the newest label on a cred
 * @cred: cred to obtain label from (NOT NULL)
 *
 * Returns: newest version of confining label
 */
static inline struct aa_label *aa_get_newest_cred_label(const struct cred *cred)
{
        return aa_get_newest_label(aa_cred_raw_label(cred));
}

/**
 * aa_current_raw_label - find the current tasks confining label
 *
 * Returns: up to date confining label or the ns unconfined label (NOT NULL)
 *
 * This fn will not update the tasks cred to the most up to date version
 * of the label so it is safe to call when inside of locks.
 */
static inline struct aa_label *aa_current_raw_label(void)
{
        return aa_cred_raw_label(current_cred());
}

/**
 * aa_get_current_label - get the newest version of the current tasks label
 *
 * Returns: newest version of confining label (NOT NULL)
 *
 * This fn will not update the tasks cred, so it is safe inside of locks
 *
 * The returned reference must be put with aa_put_label()
 */
static inline struct aa_label *aa_get_current_label(void)
{
        struct aa_label *l = aa_current_raw_label();

        if (label_is_stale(l))
                return aa_get_newest_label(l);
        return aa_get_label(l);
}

#define __end_current_label_crit_section(X) end_current_label_crit_section(X)

/**
 * end_label_crit_section - put a reference found with begin_current_label..
 * @label: label reference to put
 *
 * Should only be used with a reference obtained with
 * begin_current_label_crit_section and never used in situations where the
 * task cred may be updated
 */
static inline void end_current_label_crit_section(struct aa_label *label)
{
        if (label != aa_current_raw_label())
                aa_put_label(label);
}

/**
 * __begin_current_label_crit_section - current's confining label
 *
 * Returns: up to date confining label or the ns unconfined label (NOT NULL)
 *
 * safe to call inside locks
 *
 * The returned reference must be put with __end_current_label_crit_section()
 * This must NOT be used if the task cred could be updated within the
 * critical section between __begin_current_label_crit_section() ..
 * __end_current_label_crit_section()
 */
static inline struct aa_label *__begin_current_label_crit_section(void)
{
        struct aa_label *label = aa_current_raw_label();

        if (label_is_stale(label))
                label = aa_get_newest_label(label);

        return label;
}

/**
 * begin_current_label_crit_section - current's confining label and update it
 *
 * Returns: up to date confining label or the ns unconfined label (NOT NULL)
 *
 * Not safe to call inside locks
 *
 * The returned reference must be put with end_current_label_crit_section()
 * This must NOT be used if the task cred could be updated within the
 * critical section between begin_current_label_crit_section() ..
 * end_current_label_crit_section()
 */
static inline struct aa_label *begin_current_label_crit_section(void)
{
        struct aa_label *label = aa_current_raw_label();

        might_sleep();

        if (label_is_stale(label)) {
                label = aa_get_newest_label(label);
                if (aa_replace_current_label(label) == 0)
                        /* task cred will keep the reference */
                        aa_put_label(label);
        }

        return label;
}

static inline struct aa_ns *aa_get_current_ns(void)
{
        struct aa_label *label;
        struct aa_ns *ns;

        label  = __begin_current_label_crit_section();
        ns = aa_get_ns(labels_ns(label));
        __end_current_label_crit_section(label);

        return ns;
}

#endif /* __AA_CONTEXT_H */


























































































































































































































































































































  256 



































  254 


















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _KERNEL_PRINTK_RINGBUFFER_H
#define _KERNEL_PRINTK_RINGBUFFER_H

#include <linux/atomic.h>
#include <linux/dev_printk.h>

/*
 * Meta information about each stored message.
 *
 * All fields are set by the printk code except for @seq, which is
 * set by the ringbuffer code.
 */
struct printk_info {
        u64        seq;                /* sequence number */
        u64        ts_nsec;        /* timestamp in nanoseconds */
        u16        text_len;        /* length of text message */
        u8        facility;        /* syslog facility */
        u8        flags:5;        /* internal record flags */
        u8        level:3;        /* syslog level */
        u32        caller_id;        /* thread id or processor id */

        struct dev_printk_info        dev_info;
};

/*
 * A structure providing the buffers, used by writers and readers.
 *
 * Writers:
 * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
 * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
 * buffers reserved for that writer.
 *
 * Readers:
 * Using prb_rec_init_rd(), a reader sets all fields before calling
 * prb_read_valid(). Note that the reader provides the @info and @text_buf,
 * buffers. On success, the struct pointed to by @info will be filled and
 * the char array pointed to by @text_buf will be filled with text data.
 */
struct printk_record {
        struct printk_info        *info;
        char                        *text_buf;
        unsigned int                text_buf_size;
};

/* Specifies the logical position and span of a data block. */
struct prb_data_blk_lpos {
        unsigned long        begin;
        unsigned long        next;
};

/*
 * A descriptor: the complete meta-data for a record.
 *
 * @state_var: A bitwise combination of descriptor ID and descriptor state.
 */
struct prb_desc {
        atomic_long_t                        state_var;
        struct prb_data_blk_lpos        text_blk_lpos;
};

/* A ringbuffer of "ID + data" elements. */
struct prb_data_ring {
        unsigned int        size_bits;
        char                *data;
        atomic_long_t        head_lpos;
        atomic_long_t        tail_lpos;
};

/* A ringbuffer of "struct prb_desc" elements. */
struct prb_desc_ring {
        unsigned int                count_bits;
        struct prb_desc                *descs;
        struct printk_info        *infos;
        atomic_long_t                head_id;
        atomic_long_t                tail_id;
        atomic_long_t                last_finalized_seq;
};

/*
 * The high level structure representing the printk ringbuffer.
 *
 * @fail: Count of failed prb_reserve() calls where not even a data-less
 *        record was created.
 */
struct printk_ringbuffer {
        struct prb_desc_ring        desc_ring;
        struct prb_data_ring        text_data_ring;
        atomic_long_t                fail;
};

/*
 * Used by writers as a reserve/commit handle.
 *
 * @rb:         Ringbuffer where the entry is reserved.
 * @irqflags:   Saved irq flags to restore on entry commit.
 * @id:         ID of the reserved descriptor.
 * @text_space: Total occupied buffer space in the text data ring, including
 *              ID, alignment padding, and wrapping data blocks.
 *
 * This structure is an opaque handle for writers. Its contents are only
 * to be used by the ringbuffer implementation.
 */
struct prb_reserved_entry {
        struct printk_ringbuffer        *rb;
        unsigned long                        irqflags;
        unsigned long                        id;
        unsigned int                        text_space;
};

/* The possible responses of a descriptor state-query. */
enum desc_state {
        desc_miss        =  -1,        /* ID mismatch (pseudo state) */
        desc_reserved        = 0x0,        /* reserved, in use by writer */
        desc_committed        = 0x1,        /* committed by writer, could get reopened */
        desc_finalized        = 0x2,        /* committed, no further modification allowed */
        desc_reusable        = 0x3,        /* free, not yet used by any writer */
};

#define _DATA_SIZE(sz_bits)        (1UL << (sz_bits))
#define _DESCS_COUNT(ct_bits)        (1U << (ct_bits))
#define DESC_SV_BITS                (sizeof(unsigned long) * 8)
#define DESC_FLAGS_SHIFT        (DESC_SV_BITS - 2)
#define DESC_FLAGS_MASK                (3UL << DESC_FLAGS_SHIFT)
#define DESC_STATE(sv)                (3UL & (sv >> DESC_FLAGS_SHIFT))
#define DESC_SV(id, state)        (((unsigned long)state << DESC_FLAGS_SHIFT) | id)
#define DESC_ID_MASK                (~DESC_FLAGS_MASK)
#define DESC_ID(sv)                ((sv) & DESC_ID_MASK)

/*
 * Special data block logical position values (for fields of
 * @prb_desc.text_blk_lpos).
 *
 * - Bit0 is used to identify if the record has no data block. (Implemented in
 *   the LPOS_DATALESS() macro.)
 *
 * - Bit1 specifies the reason for not having a data block.
 *
 * These special values could never be real lpos values because of the
 * meta data and alignment padding of data blocks. (See to_blk_size() for
 * details.)
 */
#define FAILED_LPOS                0x1
#define EMPTY_LINE_LPOS                0x3

#define FAILED_BLK_LPOS        \
{                                \
        .begin        = FAILED_LPOS,        \
        .next        = FAILED_LPOS,        \
}

/*
 * Descriptor Bootstrap
 *
 * The descriptor array is minimally initialized to allow immediate usage
 * by readers and writers. The requirements that the descriptor array
 * initialization must satisfy:
 *
 *   Req1
 *     The tail must point to an existing (committed or reusable) descriptor.
 *     This is required by the implementation of prb_first_seq().
 *
 *   Req2
 *     Readers must see that the ringbuffer is initially empty.
 *
 *   Req3
 *     The first record reserved by a writer is assigned sequence number 0.
 *
 * To satisfy Req1, the tail initially points to a descriptor that is
 * minimally initialized (having no data block, i.e. data-less with the
 * data block's lpos @begin and @next values set to FAILED_LPOS).
 *
 * To satisfy Req2, the initial tail descriptor is initialized to the
 * reusable state. Readers recognize reusable descriptors as existing
 * records, but skip over them.
 *
 * To satisfy Req3, the last descriptor in the array is used as the initial
 * head (and tail) descriptor. This allows the first record reserved by a
 * writer (head + 1) to be the first descriptor in the array. (Only the first
 * descriptor in the array could have a valid sequence number of 0.)
 *
 * The first time a descriptor is reserved, it is assigned a sequence number
 * with the value of the array index. A "first time reserved" descriptor can
 * be recognized because it has a sequence number of 0 but does not have an
 * index of 0. (Only the first descriptor in the array could have a valid
 * sequence number of 0.) After the first reservation, all future reservations
 * (recycling) simply involve incrementing the sequence number by the array
 * count.
 *
 *   Hack #1
 *     Only the first descriptor in the array is allowed to have the sequence
 *     number 0. In this case it is not possible to recognize if it is being
 *     reserved the first time (set to index value) or has been reserved
 *     previously (increment by the array count). This is handled by _always_
 *     incrementing the sequence number by the array count when reserving the
 *     first descriptor in the array. In order to satisfy Req3, the sequence
 *     number of the first descriptor in the array is initialized to minus
 *     the array count. Then, upon the first reservation, it is incremented
 *     to 0, thus satisfying Req3.
 *
 *   Hack #2
 *     prb_first_seq() can be called at any time by readers to retrieve the
 *     sequence number of the tail descriptor. However, due to Req2 and Req3,
 *     initially there are no records to report the sequence number of
 *     (sequence numbers are u64 and there is nothing less than 0). To handle
 *     this, the sequence number of the initial tail descriptor is initialized
 *     to 0. Technically this is incorrect, because there is no record with
 *     sequence number 0 (yet) and the tail descriptor is not the first
 *     descriptor in the array. But it allows prb_read_valid() to correctly
 *     report the existence of a record for _any_ given sequence number at all
 *     times. Bootstrapping is complete when the tail is pushed the first
 *     time, thus finally pointing to the first descriptor reserved by a
 *     writer, which has the assigned sequence number 0.
 */

/*
 * Initiating Logical Value Overflows
 *
 * Both logical position (lpos) and ID values can be mapped to array indexes
 * but may experience overflows during the lifetime of the system. To ensure
 * that printk_ringbuffer can handle the overflows for these types, initial
 * values are chosen that map to the correct initial array indexes, but will
 * result in overflows soon.
 *
 *   BLK0_LPOS
 *     The initial @head_lpos and @tail_lpos for data rings. It is at index
 *     0 and the lpos value is such that it will overflow on the first wrap.
 *
 *   DESC0_ID
 *     The initial @head_id and @tail_id for the desc ring. It is at the last
 *     index of the descriptor array (see Req3 above) and the ID value is such
 *     that it will overflow on the second wrap.
 */
#define BLK0_LPOS(sz_bits)        (-(_DATA_SIZE(sz_bits)))
#define DESC0_ID(ct_bits)        DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
#define DESC0_SV(ct_bits)        DESC_SV(DESC0_ID(ct_bits), desc_reusable)

/*
 * Define a ringbuffer with an external text data buffer. The same as
 * DEFINE_PRINTKRB() but requires specifying an external buffer for the
 * text data.
 *
 * Note: The specified external buffer must be of the size:
 *       2 ^ (descbits + avgtextbits)
 */
#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf)                        \
static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = {                                \
        /* the initial head and tail */                                                                \
        [_DESCS_COUNT(descbits) - 1] = {                                                        \
                /* reusable */                                                                        \
                .state_var        = ATOMIC_INIT(DESC0_SV(descbits)),                                \
                /* no associated data block */                                                        \
                .text_blk_lpos        = FAILED_BLK_LPOS,                                                \
        },                                                                                        \
};                                                                                                \
static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = {                                \
        /* this will be the first record reserved by a writer */                                \
        [0] = {                                                                                        \
                /* will be incremented to 0 on the first reservation */                                \
                .seq = -(u64)_DESCS_COUNT(descbits),                                                \
        },                                                                                        \
        /* the initial head and tail */                                                                \
        [_DESCS_COUNT(descbits) - 1] = {                                                        \
                /* reports the first seq value during the bootstrap phase */                        \
                .seq = 0,                                                                        \
        },                                                                                        \
};                                                                                                \
static struct printk_ringbuffer name = {                                                        \
        .desc_ring = {                                                                                \
                .count_bits        = descbits,                                                        \
                .descs                = &_##name##_descs[0],                                                \
                .infos                = &_##name##_infos[0],                                                \
                .head_id        = ATOMIC_INIT(DESC0_ID(descbits)),                                \
                .tail_id        = ATOMIC_INIT(DESC0_ID(descbits)),                                \
                .last_finalized_seq = ATOMIC_INIT(0),                                                \
        },                                                                                        \
        .text_data_ring = {                                                                        \
                .size_bits        = (avgtextbits) + (descbits),                                        \
                .data                = text_buf,                                                        \
                .head_lpos        = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),        \
                .tail_lpos        = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),        \
        },                                                                                        \
        .fail                        = ATOMIC_LONG_INIT(0),                                                \
}

/**
 * DEFINE_PRINTKRB() - Define a ringbuffer.
 *
 * @name:        The name of the ringbuffer variable.
 * @descbits:    The number of descriptors as a power-of-2 value.
 * @avgtextbits: The average text data size per record as a power-of-2 value.
 *
 * This is a macro for defining a ringbuffer and all internal structures
 * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
 * variant where the text data buffer can be specified externally.
 */
#define DEFINE_PRINTKRB(name, descbits, avgtextbits)                                \
static char _##name##_text[1U << ((avgtextbits) + (descbits))]                        \
                        __aligned(__alignof__(unsigned long));                        \
_DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])

/* Writer Interface */

/**
 * prb_rec_init_wr() - Initialize a buffer for writing records.
 *
 * @r:             The record to initialize.
 * @text_buf_size: The needed text buffer size.
 */
static inline void prb_rec_init_wr(struct printk_record *r,
                                   unsigned int text_buf_size)
{
        r->info = NULL;
        r->text_buf = NULL;
        r->text_buf_size = text_buf_size;
}

bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
                 struct printk_record *r);
bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
                         struct printk_record *r, u32 caller_id, unsigned int max_size);
void prb_commit(struct prb_reserved_entry *e);
void prb_final_commit(struct prb_reserved_entry *e);

void prb_init(struct printk_ringbuffer *rb,
              char *text_buf, unsigned int text_buf_size,
              struct prb_desc *descs, unsigned int descs_count_bits,
              struct printk_info *infos);
unsigned int prb_record_text_space(struct prb_reserved_entry *e);

/* Reader Interface */

/**
 * prb_rec_init_rd() - Initialize a buffer for reading records.
 *
 * @r:             The record to initialize.
 * @info:          A buffer to store record meta-data.
 * @text_buf:      A buffer to store text data.
 * @text_buf_size: The size of @text_buf.
 *
 * Initialize all the fields that a reader is interested in. All arguments
 * (except @r) are optional. Only record data for arguments that are
 * non-NULL or non-zero will be read.
 */
static inline void prb_rec_init_rd(struct printk_record *r,
                                   struct printk_info *info,
                                   char *text_buf, unsigned int text_buf_size)
{
        r->info = info;
        r->text_buf = text_buf;
        r->text_buf_size = text_buf_size;
}

/**
 * prb_for_each_record() - Iterate over the records of a ringbuffer.
 *
 * @from: The sequence number to begin with.
 * @rb:   The ringbuffer to iterate over.
 * @s:    A u64 to store the sequence number on each iteration.
 * @r:    A printk_record to store the record on each iteration.
 *
 * This is a macro for conveniently iterating over a ringbuffer.
 * Note that @s may not be the sequence number of the record on each
 * iteration. For the sequence number, @r->info->seq should be checked.
 *
 * Context: Any context.
 */
#define prb_for_each_record(from, rb, s, r) \
for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)

/**
 * prb_for_each_info() - Iterate over the meta data of a ringbuffer.
 *
 * @from: The sequence number to begin with.
 * @rb:   The ringbuffer to iterate over.
 * @s:    A u64 to store the sequence number on each iteration.
 * @i:    A printk_info to store the record meta data on each iteration.
 * @lc:   An unsigned int to store the text line count of each record.
 *
 * This is a macro for conveniently iterating over a ringbuffer.
 * Note that @s may not be the sequence number of the record on each
 * iteration. For the sequence number, @r->info->seq should be checked.
 *
 * Context: Any context.
 */
#define prb_for_each_info(from, rb, s, i, lc) \
for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)

bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
                    struct printk_record *r);
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
                         struct printk_info *info, unsigned int *line_count);

u64 prb_first_seq(struct printk_ringbuffer *rb);
u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
u64 prb_next_seq(struct printk_ringbuffer *rb);
u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);

#ifdef CONFIG_64BIT

#define __u64seq_to_ulseq(u64seq) (u64seq)
#define __ulseq_to_u64seq(rb, ulseq) (ulseq)

#else /* CONFIG_64BIT */

#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)

static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
{
        u64 rb_first_seq = prb_first_seq(rb);
        u64 seq;

        /*
         * The provided sequence is only the lower 32 bits of the ringbuffer
         * sequence. It needs to be expanded to 64bit. Get the first sequence
         * number from the ringbuffer and fold it.
         *
         * Having a 32bit representation in the console is sufficient.
         * If a console ever gets more than 2^31 records behind
         * the ringbuffer then this is the least of the problems.
         *
         * Also the access to the ring buffer is always safe.
         */
        seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);

        return seq;
}

#endif /* CONFIG_64BIT */

#endif /* _KERNEL_PRINTK_RINGBUFFER_H */














































































































































































































































































































































    2 



    2 




    2 


    2 

    2 













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2011 Instituto Nokia de Tecnologia
 *
 * Authors:
 *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
 *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
 *
 * Vendor commands implementation based on net/wireless/nl80211.c
 * which is:
 *
 * Copyright 2006-2010        Johannes Berg <johannes@sipsolutions.net>
 * Copyright 2013-2014  Intel Mobile Communications GmbH
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

#include <net/genetlink.h>
#include <linux/nfc.h>
#include <linux/slab.h>

#include "nfc.h"
#include "llcp.h"

static const struct genl_multicast_group nfc_genl_mcgrps[] = {
        { .name = NFC_GENL_MCAST_EVENT_NAME, },
};

static struct genl_family nfc_genl_family;
static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
        [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
        [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
                                .len = NFC_DEVICE_NAME_MAXSIZE },
        [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
        [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 },
        [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
        [NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
        [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
        [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },
        [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 },
        [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
        [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
        [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
        [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
        [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING,
                                     .len = NFC_FIRMWARE_NAME_MAXSIZE },
        [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 },
        [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY },
        [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 },
        [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
        [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },

};

static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
        [NFC_SDP_ATTR_URI] = { .type = NLA_STRING,
                               .len = U8_MAX - 4 },
        [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
};

static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
                                struct netlink_callback *cb, int flags)
{
        void *hdr;

        hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                          &nfc_genl_family, flags, NFC_CMD_GET_TARGET);
        if (!hdr)
                return -EMSGSIZE;

        genl_dump_check_consistent(cb, hdr);

        if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
            nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
            nla_put_u16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res) ||
            nla_put_u8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res))
                goto nla_put_failure;
        if (target->nfcid1_len > 0 &&
            nla_put(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len,
                    target->nfcid1))
                goto nla_put_failure;
        if (target->sensb_res_len > 0 &&
            nla_put(msg, NFC_ATTR_TARGET_SENSB_RES, target->sensb_res_len,
                    target->sensb_res))
                goto nla_put_failure;
        if (target->sensf_res_len > 0 &&
            nla_put(msg, NFC_ATTR_TARGET_SENSF_RES, target->sensf_res_len,
                    target->sensf_res))
                goto nla_put_failure;

        if (target->is_iso15693) {
                if (nla_put_u8(msg, NFC_ATTR_TARGET_ISO15693_DSFID,
                               target->iso15693_dsfid) ||
                    nla_put(msg, NFC_ATTR_TARGET_ISO15693_UID,
                            sizeof(target->iso15693_uid), target->iso15693_uid))
                        goto nla_put_failure;
        }

        genlmsg_end(msg, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
}

static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
{
        const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        struct nfc_dev *dev;
        u32 idx;

        if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX])
                return ERR_PTR(-EINVAL);

        idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return ERR_PTR(-ENODEV);

        return dev;
}

static int nfc_genl_dump_targets(struct sk_buff *skb,
                                 struct netlink_callback *cb)
{
        int i = cb->args[0];
        struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
        int rc;

        if (!dev) {
                dev = __get_device_from_cb(cb);
                if (IS_ERR(dev))
                        return PTR_ERR(dev);

                cb->args[1] = (long) dev;
        }

        device_lock(&dev->dev);

        cb->seq = dev->targets_generation;

        while (i < dev->n_targets) {
                rc = nfc_genl_send_target(skb, &dev->targets[i], cb,
                                          NLM_F_MULTI);
                if (rc < 0)
                        break;

                i++;
        }

        device_unlock(&dev->dev);

        cb->args[0] = i;

        return skb->len;
}

static int nfc_genl_dump_targets_done(struct netlink_callback *cb)
{
        struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];

        if (dev)
                nfc_put_device(dev);

        return 0;
}

int nfc_genl_targets_found(struct nfc_dev *dev)
{
        struct sk_buff *msg;
        void *hdr;

        dev->genl_data.poll_req_portid = 0;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_TARGETS_FOUND);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_TARGET_LOST);
        if (!hdr)
                goto free_msg;

        if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
            nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_TM_ACTIVATED);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;
        if (nla_put_u32(msg, NFC_ATTR_TM_PROTOCOLS, protocol))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_tm_deactivated(struct nfc_dev *dev)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_TM_DEACTIVATED);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg)
{
        if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) ||
            nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
            nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) ||
            nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) ||
            nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
                return -1;
        return 0;
}

int nfc_genl_device_added(struct nfc_dev *dev)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_DEVICE_ADDED);
        if (!hdr)
                goto free_msg;

        if (nfc_genl_setup_device_added(dev, msg))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_device_removed(struct nfc_dev *dev)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_DEVICE_REMOVED);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
{
        struct sk_buff *msg;
        struct nlattr *sdp_attr, *uri_attr;
        struct nfc_llcp_sdp_tlv *sdres;
        struct hlist_node *n;
        void *hdr;
        int rc = -EMSGSIZE;
        int i;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_LLC_SDRES);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP);
        if (sdp_attr == NULL) {
                rc = -ENOMEM;
                goto nla_put_failure;
        }

        i = 1;
        hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
                pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);

                uri_attr = nla_nest_start_noflag(msg, i++);
                if (uri_attr == NULL) {
                        rc = -ENOMEM;
                        goto nla_put_failure;
                }

                if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
                        goto nla_put_failure;

                if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
                        goto nla_put_failure;

                nla_nest_end(msg, uri_attr);

                hlist_del(&sdres->node);

                nfc_llcp_free_sdp_tlv(sdres);
        }

        nla_nest_end(msg, sdp_attr);

        genlmsg_end(msg, hdr);

        return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);

nla_put_failure:
free_msg:
        nlmsg_free(msg);

        nfc_llcp_free_sdp_tlv_list(sdres_list);

        return rc;
}

int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_SE_ADDED);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
            nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
            nla_put_u8(msg, NFC_ATTR_SE_TYPE, type))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_SE_REMOVED);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
            nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
                            struct nfc_evt_transaction *evt_transaction)
{
        struct nfc_se *se;
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_SE_TRANSACTION);
        if (!hdr)
                goto free_msg;

        se = nfc_find_se(dev, se_idx);
        if (!se)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
            nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
            nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) ||
            nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len,
                    evt_transaction->aid) ||
            nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len,
                    evt_transaction->params))
                goto nla_put_failure;

        /* evt_transaction is no more used */
        devm_kfree(&dev->dev, evt_transaction);

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        /* evt_transaction is no more used */
        devm_kfree(&dev->dev, evt_transaction);
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
{
        const struct nfc_se *se;
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_EVENT_SE_CONNECTIVITY);
        if (!hdr)
                goto free_msg;

        se = nfc_find_se(dev, se_idx);
        if (!se)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
            nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
            nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
                                u32 portid, u32 seq,
                                struct netlink_callback *cb,
                                int flags)
{
        void *hdr;

        hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
                          NFC_CMD_GET_DEVICE);
        if (!hdr)
                return -EMSGSIZE;

        if (cb)
                genl_dump_check_consistent(cb, hdr);

        if (nfc_genl_setup_device_added(dev, msg))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
}

static int nfc_genl_dump_devices(struct sk_buff *skb,
                                 struct netlink_callback *cb)
{
        struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
        struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
        bool first_call = false;

        if (!iter) {
                first_call = true;
                iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL);
                if (!iter)
                        return -ENOMEM;
                cb->args[0] = (long) iter;
        }

        mutex_lock(&nfc_devlist_mutex);

        cb->seq = nfc_devlist_generation;

        if (first_call) {
                nfc_device_iter_init(iter);
                dev = nfc_device_iter_next(iter);
        }

        while (dev) {
                int rc;

                rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid,
                                          cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
                if (rc < 0)
                        break;

                dev = nfc_device_iter_next(iter);
        }

        mutex_unlock(&nfc_devlist_mutex);

        cb->args[1] = (long) dev;

        return skb->len;
}

static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
{
        struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];

        if (iter) {
                nfc_device_iter_exit(iter);
                kfree(iter);
        }

        return 0;
}

int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
                               u8 comm_mode, u8 rf_mode)
{
        struct sk_buff *msg;
        void *hdr;

        pr_debug("DEP link is up\n");

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, NFC_CMD_DEP_LINK_UP);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;
        if (rf_mode == NFC_RF_INITIATOR &&
            nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target_idx))
                goto nla_put_failure;
        if (nla_put_u8(msg, NFC_ATTR_COMM_MODE, comm_mode) ||
            nla_put_u8(msg, NFC_ATTR_RF_MODE, rf_mode))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        dev->dep_link_up = true;

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
{
        struct sk_buff *msg;
        void *hdr;

        pr_debug("DEP link is down\n");

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_CMD_DEP_LINK_DOWN);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info)
{
        struct sk_buff *msg;
        struct nfc_dev *dev;
        u32 idx;
        int rc = -ENOBUFS;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg) {
                rc = -ENOMEM;
                goto out_putdev;
        }

        rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq,
                                  NULL, 0);
        if (rc < 0)
                goto out_free;

        nfc_put_device(dev);

        return genlmsg_reply(msg, info);

out_free:
        nlmsg_free(msg);
out_putdev:
        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_dev_up(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_dev_up(dev);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_dev_down(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_dev_down(dev);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;
        u32 im_protocols = 0, tm_protocols = 0;

        pr_debug("Poll start\n");

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            ((!info->attrs[NFC_ATTR_IM_PROTOCOLS] &&
              !info->attrs[NFC_ATTR_PROTOCOLS]) &&
              !info->attrs[NFC_ATTR_TM_PROTOCOLS]))
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        if (info->attrs[NFC_ATTR_TM_PROTOCOLS])
                tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_TM_PROTOCOLS]);

        if (info->attrs[NFC_ATTR_IM_PROTOCOLS])
                im_protocols = nla_get_u32(info->attrs[NFC_ATTR_IM_PROTOCOLS]);
        else if (info->attrs[NFC_ATTR_PROTOCOLS])
                im_protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        mutex_lock(&dev->genl_data.genl_data_mutex);

        rc = nfc_start_poll(dev, im_protocols, tm_protocols);
        if (!rc)
                dev->genl_data.poll_req_portid = info->snd_portid;

        mutex_unlock(&dev->genl_data.genl_data_mutex);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        device_lock(&dev->dev);

        if (!dev->polling) {
                device_unlock(&dev->dev);
                nfc_put_device(dev);
                return -EINVAL;
        }

        device_unlock(&dev->dev);

        mutex_lock(&dev->genl_data.genl_data_mutex);

        if (dev->genl_data.poll_req_portid != info->snd_portid) {
                rc = -EBUSY;
                goto out;
        }

        rc = nfc_stop_poll(dev);
        dev->genl_data.poll_req_portid = 0;

out:
        mutex_unlock(&dev->genl_data.genl_data_mutex);
        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        u32 device_idx, target_idx, protocol;
        int rc;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_TARGET_INDEX] ||
            !info->attrs[NFC_ATTR_PROTOCOLS])
                return -EINVAL;

        device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(device_idx);
        if (!dev)
                return -ENODEV;

        target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
        protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]);

        nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
        rc = nfc_activate_target(dev, target_idx, protocol);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_deactivate_target(struct sk_buff *skb,
                                      struct genl_info *info)
{
        struct nfc_dev *dev;
        u32 device_idx, target_idx;
        int rc;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_TARGET_INDEX])
                return -EINVAL;

        device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(device_idx);
        if (!dev)
                return -ENODEV;

        target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);

        rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc, tgt_idx;
        u32 idx;
        u8 comm;

        pr_debug("DEP link up\n");

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_COMM_MODE])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
        if (!info->attrs[NFC_ATTR_TARGET_INDEX])
                tgt_idx = NFC_TARGET_IDX_ANY;
        else
                tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);

        comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]);

        if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE)
                return -EINVAL;

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_dep_link_up(dev, tgt_idx, comm);

        nfc_put_device(dev);

        return rc;
}

static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_TARGET_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_dep_link_down(dev);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_send_params(struct sk_buff *msg,
                                struct nfc_llcp_local *local,
                                u32 portid, u32 seq)
{
        void *hdr;

        hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, 0,
                          NFC_CMD_LLC_GET_PARAMS);
        if (!hdr)
                return -EMSGSIZE;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, local->dev->idx) ||
            nla_put_u8(msg, NFC_ATTR_LLC_PARAM_LTO, local->lto) ||
            nla_put_u8(msg, NFC_ATTR_LLC_PARAM_RW, local->rw) ||
            nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux)))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
}

static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        struct nfc_llcp_local *local;
        int rc = 0;
        struct sk_buff *msg = NULL;
        u32 idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_FIRMWARE_NAME])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        device_lock(&dev->dev);

        local = nfc_llcp_find_local(dev);
        if (!local) {
                rc = -ENODEV;
                goto exit;
        }

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg) {
                rc = -ENOMEM;
                goto put_local;
        }

        rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq);

put_local:
        nfc_llcp_local_put(local);

exit:
        device_unlock(&dev->dev);

        nfc_put_device(dev);

        if (rc < 0) {
                if (msg)
                        nlmsg_free(msg);

                return rc;
        }

        return genlmsg_reply(msg, info);
}

static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        struct nfc_llcp_local *local;
        u8 rw = 0;
        u16 miux = 0;
        u32 idx;
        int rc = 0;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            (!info->attrs[NFC_ATTR_LLC_PARAM_LTO] &&
             !info->attrs[NFC_ATTR_LLC_PARAM_RW] &&
             !info->attrs[NFC_ATTR_LLC_PARAM_MIUX]))
                return -EINVAL;

        if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) {
                rw = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_RW]);

                if (rw > LLCP_MAX_RW)
                        return -EINVAL;
        }

        if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) {
                miux = nla_get_u16(info->attrs[NFC_ATTR_LLC_PARAM_MIUX]);

                if (miux > LLCP_MAX_MIUX)
                        return -EINVAL;
        }

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        device_lock(&dev->dev);

        local = nfc_llcp_find_local(dev);
        if (!local) {
                rc = -ENODEV;
                goto exit;
        }

        if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) {
                if (dev->dep_link_up) {
                        rc = -EINPROGRESS;
                        goto put_local;
                }

                local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]);
        }

        if (info->attrs[NFC_ATTR_LLC_PARAM_RW])
                local->rw = rw;

        if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX])
                local->miux = cpu_to_be16(miux);

put_local:
        nfc_llcp_local_put(local);

exit:
        device_unlock(&dev->dev);

        nfc_put_device(dev);

        return rc;
}

static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        struct nfc_llcp_local *local;
        struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
        u32 idx;
        u8 tid;
        char *uri;
        int rc = 0, rem;
        size_t uri_len, tlvs_len;
        struct hlist_head sdreq_list;
        struct nfc_llcp_sdp_tlv *sdreq;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_LLC_SDP])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        device_lock(&dev->dev);

        if (dev->dep_link_up == false) {
                rc = -ENOLINK;
                goto exit;
        }

        local = nfc_llcp_find_local(dev);
        if (!local) {
                rc = -ENODEV;
                goto exit;
        }

        INIT_HLIST_HEAD(&sdreq_list);

        tlvs_len = 0;

        nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
                rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX,
                                                 attr, nfc_sdp_genl_policy,
                                                 info->extack);

                if (rc != 0) {
                        rc = -EINVAL;
                        goto put_local;
                }

                if (!sdp_attrs[NFC_SDP_ATTR_URI])
                        continue;

                uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
                if (uri_len == 0)
                        continue;

                uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
                if (uri == NULL || *uri == 0)
                        continue;

                tid = local->sdreq_next_tid++;

                sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
                if (sdreq == NULL) {
                        rc = -ENOMEM;
                        goto put_local;
                }

                tlvs_len += sdreq->tlv_len;

                hlist_add_head(&sdreq->node, &sdreq_list);
        }

        if (hlist_empty(&sdreq_list)) {
                rc = -EINVAL;
                goto put_local;
        }

        rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);

put_local:
        nfc_llcp_local_put(local);

exit:
        device_unlock(&dev->dev);

        nfc_put_device(dev);

        return rc;
}

static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx;
        char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1];

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        nla_strscpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME],
                    sizeof(firmware_name));

        rc = nfc_fw_download(dev, firmware_name);

        nfc_put_device(dev);
        return rc;
}

int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
                              u32 result)
{
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!msg)
                return -ENOMEM;

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_CMD_FW_DOWNLOAD);
        if (!hdr)
                goto free_msg;

        if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) ||
            nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) ||
            nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);

        return 0;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        return -EMSGSIZE;
}

static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx, se_idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_SE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
        se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_enable_se(dev, se_idx);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        int rc;
        u32 idx, se_idx;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_SE_INDEX])
                return -EINVAL;

        idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
        se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]);

        dev = nfc_get_device(idx);
        if (!dev)
                return -ENODEV;

        rc = nfc_disable_se(dev, se_idx);

        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
                                u32 portid, u32 seq,
                                struct netlink_callback *cb,
                                int flags)
{
        void *hdr;
        struct nfc_se *se, *n;

        list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
                hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
                                  NFC_CMD_GET_SE);
                if (!hdr)
                        goto nla_put_failure;

                if (cb)
                        genl_dump_check_consistent(cb, hdr);

                if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
                    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
                    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
                        goto nla_put_failure;

                genlmsg_end(msg, hdr);
        }

        return 0;

nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
}

static int nfc_genl_dump_ses(struct sk_buff *skb,
                                 struct netlink_callback *cb)
{
        struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
        struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
        bool first_call = false;

        if (!iter) {
                first_call = true;
                iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL);
                if (!iter)
                        return -ENOMEM;
                cb->args[0] = (long) iter;
        }

        mutex_lock(&nfc_devlist_mutex);

        cb->seq = nfc_devlist_generation;

        if (first_call) {
                nfc_device_iter_init(iter);
                dev = nfc_device_iter_next(iter);
        }

        while (dev) {
                int rc;

                rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid,
                                          cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
                if (rc < 0)
                        break;

                dev = nfc_device_iter_next(iter);
        }

        mutex_unlock(&nfc_devlist_mutex);

        cb->args[1] = (long) dev;

        return skb->len;
}

static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
{
        struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];

        if (iter) {
                nfc_device_iter_exit(iter);
                kfree(iter);
        }

        return 0;
}

static int nfc_se_io(struct nfc_dev *dev, u32 se_idx,
                     u8 *apdu, size_t apdu_length,
                     se_io_cb_t cb, void *cb_context)
{
        struct nfc_se *se;
        int rc;

        pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);

        device_lock(&dev->dev);

        if (!device_is_registered(&dev->dev)) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->dev_up) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->ops->se_io) {
                rc = -EOPNOTSUPP;
                goto error;
        }

        se = nfc_find_se(dev, se_idx);
        if (!se) {
                rc = -EINVAL;
                goto error;
        }

        if (se->state != NFC_SE_ENABLED) {
                rc = -ENODEV;
                goto error;
        }

        rc = dev->ops->se_io(dev, se_idx, apdu,
                        apdu_length, cb, cb_context);

        device_unlock(&dev->dev);
        return rc;

error:
        device_unlock(&dev->dev);
        kfree(cb_context);
        return rc;
}

struct se_io_ctx {
        u32 dev_idx;
        u32 se_idx;
};

static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err)
{
        struct se_io_ctx *ctx = context;
        struct sk_buff *msg;
        void *hdr;

        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg) {
                kfree(ctx);
                return;
        }

        hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
                          NFC_CMD_SE_IO);
        if (!hdr)
                goto free_msg;

        if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, ctx->dev_idx) ||
            nla_put_u32(msg, NFC_ATTR_SE_INDEX, ctx->se_idx) ||
            nla_put(msg, NFC_ATTR_SE_APDU, apdu_len, apdu))
                goto nla_put_failure;

        genlmsg_end(msg, hdr);

        genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);

        kfree(ctx);

        return;

nla_put_failure:
free_msg:
        nlmsg_free(msg);
        kfree(ctx);

        return;
}

static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
{
        struct nfc_dev *dev;
        struct se_io_ctx *ctx;
        u32 dev_idx, se_idx;
        u8 *apdu;
        size_t apdu_len;
        int rc;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_SE_INDEX] ||
            !info->attrs[NFC_ATTR_SE_APDU])
                return -EINVAL;

        dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
        se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]);

        dev = nfc_get_device(dev_idx);
        if (!dev)
                return -ENODEV;

        if (!dev->ops || !dev->ops->se_io) {
                rc = -EOPNOTSUPP;
                goto put_dev;
        }

        apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
        if (apdu_len == 0) {
                rc = -EINVAL;
                goto put_dev;
        }

        apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
        if (!apdu) {
                rc = -EINVAL;
                goto put_dev;
        }

        ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
        if (!ctx) {
                rc = -ENOMEM;
                goto put_dev;
        }

        ctx->dev_idx = dev_idx;
        ctx->se_idx = se_idx;

        rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);

put_dev:
        nfc_put_device(dev);
        return rc;
}

static int nfc_genl_vendor_cmd(struct sk_buff *skb,
                               struct genl_info *info)
{
        struct nfc_dev *dev;
        const struct nfc_vendor_cmd *cmd;
        u32 dev_idx, vid, subcmd;
        u8 *data;
        size_t data_len;
        int i, err;

        if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
            !info->attrs[NFC_ATTR_VENDOR_ID] ||
            !info->attrs[NFC_ATTR_VENDOR_SUBCMD])
                return -EINVAL;

        dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
        vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]);
        subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);

        dev = nfc_get_device(dev_idx);
        if (!dev)
                return -ENODEV;

        if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
                err = -ENODEV;
                goto put_dev;
        }

        if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
                data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
                data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
                if (data_len == 0) {
                        err = -EINVAL;
                        goto put_dev;
                }
        } else {
                data = NULL;
                data_len = 0;
        }

        for (i = 0; i < dev->n_vendor_cmds; i++) {
                cmd = &dev->vendor_cmds[i];

                if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
                        continue;

                dev->cur_cmd_info = info;
                err = cmd->doit(dev, data, data_len);
                dev->cur_cmd_info = NULL;
                goto put_dev;
        }

        err = -EOPNOTSUPP;

put_dev:
        nfc_put_device(dev);
        return err;
}

/* message building helper */
static inline void *nfc_hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
                                int flags, u8 cmd)
{
        /* since there is no private header just add the generic one */
        return genlmsg_put(skb, portid, seq, &nfc_genl_family, flags, cmd);
}

static struct sk_buff *
__nfc_alloc_vendor_cmd_skb(struct nfc_dev *dev, int approxlen,
                           u32 portid, u32 seq,
                           enum nfc_attrs attr,
                           u32 oui, u32 subcmd, gfp_t gfp)
{
        struct sk_buff *skb;
        void *hdr;

        skb = nlmsg_new(approxlen + 100, gfp);
        if (!skb)
                return NULL;

        hdr = nfc_hdr_put(skb, portid, seq, 0, NFC_CMD_VENDOR);
        if (!hdr) {
                kfree_skb(skb);
                return NULL;
        }

        if (nla_put_u32(skb, NFC_ATTR_DEVICE_INDEX, dev->idx))
                goto nla_put_failure;
        if (nla_put_u32(skb, NFC_ATTR_VENDOR_ID, oui))
                goto nla_put_failure;
        if (nla_put_u32(skb, NFC_ATTR_VENDOR_SUBCMD, subcmd))
                goto nla_put_failure;

        ((void **)skb->cb)[0] = dev;
        ((void **)skb->cb)[1] = hdr;

        return skb;

nla_put_failure:
        kfree_skb(skb);
        return NULL;
}

struct sk_buff *__nfc_alloc_vendor_cmd_reply_skb(struct nfc_dev *dev,
                                                 enum nfc_attrs attr,
                                                 u32 oui, u32 subcmd,
                                                 int approxlen)
{
        if (WARN_ON(!dev->cur_cmd_info))
                return NULL;

        return __nfc_alloc_vendor_cmd_skb(dev, approxlen,
                                          dev->cur_cmd_info->snd_portid,
                                          dev->cur_cmd_info->snd_seq, attr,
                                          oui, subcmd, GFP_KERNEL);
}
EXPORT_SYMBOL(__nfc_alloc_vendor_cmd_reply_skb);

int nfc_vendor_cmd_reply(struct sk_buff *skb)
{
        struct nfc_dev *dev = ((void **)skb->cb)[0];
        void *hdr = ((void **)skb->cb)[1];

        /* clear CB data for netlink core to own from now on */
        memset(skb->cb, 0, sizeof(skb->cb));

        if (WARN_ON(!dev->cur_cmd_info)) {
                kfree_skb(skb);
                return -EINVAL;
        }

        genlmsg_end(skb, hdr);
        return genlmsg_reply(skb, dev->cur_cmd_info);
}
EXPORT_SYMBOL(nfc_vendor_cmd_reply);

static const struct genl_ops nfc_genl_ops[] = {
        {
                .cmd = NFC_CMD_GET_DEVICE,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_get_device,
                .dumpit = nfc_genl_dump_devices,
                .done = nfc_genl_dump_devices_done,
        },
        {
                .cmd = NFC_CMD_DEV_UP,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_dev_up,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_DEV_DOWN,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_dev_down,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_START_POLL,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_start_poll,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_STOP_POLL,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_stop_poll,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_DEP_LINK_UP,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_dep_link_up,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_DEP_LINK_DOWN,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_dep_link_down,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_GET_TARGET,
                .validate = GENL_DONT_VALIDATE_STRICT |
                            GENL_DONT_VALIDATE_DUMP_STRICT,
                .dumpit = nfc_genl_dump_targets,
                .done = nfc_genl_dump_targets_done,
        },
        {
                .cmd = NFC_CMD_LLC_GET_PARAMS,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_llc_get_params,
        },
        {
                .cmd = NFC_CMD_LLC_SET_PARAMS,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_llc_set_params,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_LLC_SDREQ,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_llc_sdreq,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_FW_DOWNLOAD,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_fw_download,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_ENABLE_SE,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_enable_se,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_DISABLE_SE,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_disable_se,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_GET_SE,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .dumpit = nfc_genl_dump_ses,
                .done = nfc_genl_dump_ses_done,
        },
        {
                .cmd = NFC_CMD_SE_IO,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_se_io,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_ACTIVATE_TARGET,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_activate_target,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_VENDOR,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_vendor_cmd,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = NFC_CMD_DEACTIVATE_TARGET,
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nfc_genl_deactivate_target,
                .flags = GENL_ADMIN_PERM,
        },
};

static struct genl_family nfc_genl_family __ro_after_init = {
        .hdrsize = 0,
        .name = NFC_GENL_NAME,
        .version = NFC_GENL_VERSION,
        .maxattr = NFC_ATTR_MAX,
        .policy = nfc_genl_policy,
        .module = THIS_MODULE,
        .ops = nfc_genl_ops,
        .n_ops = ARRAY_SIZE(nfc_genl_ops),
        .resv_start_op = NFC_CMD_DEACTIVATE_TARGET + 1,
        .mcgrps = nfc_genl_mcgrps,
        .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
};


struct urelease_work {
        struct        work_struct w;
        u32        portid;
};

static void nfc_urelease_event_work(struct work_struct *work)
{
        struct urelease_work *w = container_of(work, struct urelease_work, w);
        struct class_dev_iter iter;
        struct nfc_dev *dev;

        pr_debug("portid %d\n", w->portid);

        mutex_lock(&nfc_devlist_mutex);

        nfc_device_iter_init(&iter);
        dev = nfc_device_iter_next(&iter);

        while (dev) {
                mutex_lock(&dev->genl_data.genl_data_mutex);

                if (dev->genl_data.poll_req_portid == w->portid) {
                        nfc_stop_poll(dev);
                        dev->genl_data.poll_req_portid = 0;
                }

                mutex_unlock(&dev->genl_data.genl_data_mutex);

                dev = nfc_device_iter_next(&iter);
        }

        nfc_device_iter_exit(&iter);

        mutex_unlock(&nfc_devlist_mutex);

        kfree(w);
}

static int nfc_genl_rcv_nl_event(struct notifier_block *this,
                                 unsigned long event, void *ptr)
{
        struct netlink_notify *n = ptr;
        struct urelease_work *w;

        if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC)
                goto out;

        pr_debug("NETLINK_URELEASE event from id %d\n", n->portid);

        w = kmalloc(sizeof(*w), GFP_ATOMIC);
        if (w) {
                INIT_WORK(&w->w, nfc_urelease_event_work);
                w->portid = n->portid;
                schedule_work(&w->w);
        }

out:
        return NOTIFY_DONE;
}

void nfc_genl_data_init(struct nfc_genl_data *genl_data)
{
        genl_data->poll_req_portid = 0;
        mutex_init(&genl_data->genl_data_mutex);
}

void nfc_genl_data_exit(struct nfc_genl_data *genl_data)
{
        mutex_destroy(&genl_data->genl_data_mutex);
}

static struct notifier_block nl_notifier = {
        .notifier_call  = nfc_genl_rcv_nl_event,
};

/**
 * nfc_genl_init() - Initialize netlink interface
 *
 * This initialization function registers the nfc netlink family.
 */
int __init nfc_genl_init(void)
{
        int rc;

        rc = genl_register_family(&nfc_genl_family);
        if (rc)
                return rc;

        netlink_register_notifier(&nl_notifier);

        return 0;
}

/**
 * nfc_genl_exit() - Deinitialize netlink interface
 *
 * This exit function unregisters the nfc netlink family.
 */
void nfc_genl_exit(void)
{
        netlink_unregister_notifier(&nl_notifier);
        genl_unregister_family(&nfc_genl_family);
}



















































   13 




   16 





















   19 


   18 
    5 
   19 




   10 





   10 

   10 


   10 







   10 



































































    3 

    3 

    3 






















































































    3 




    3 
    3 




    3 










    3 


    3 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
// SPDX-License-Identifier: GPL-2.0-or-later
/*
    V4L2 device support.

    Copyright (C) 2008  Hans Verkuil <hverkuil@xs4all.nl>

 */

#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/videodev2.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ctrls.h>

int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev)
{
        if (v4l2_dev == NULL)
                return -EINVAL;

        INIT_LIST_HEAD(&v4l2_dev->subdevs);
        spin_lock_init(&v4l2_dev->lock);
        v4l2_prio_init(&v4l2_dev->prio);
        kref_init(&v4l2_dev->ref);
        get_device(dev);
        v4l2_dev->dev = dev;
        if (dev == NULL) {
                /* If dev == NULL, then name must be filled in by the caller */
                if (WARN_ON(!v4l2_dev->name[0]))
                        return -EINVAL;
                return 0;
        }

        /* Set name to driver name + device name if it is empty. */
        if (!v4l2_dev->name[0])
                snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s %s",
                        dev->driver->name, dev_name(dev));
        if (!dev_get_drvdata(dev))
                dev_set_drvdata(dev, v4l2_dev);
        return 0;
}
EXPORT_SYMBOL_GPL(v4l2_device_register);

static void v4l2_device_release(struct kref *ref)
{
        struct v4l2_device *v4l2_dev =
                container_of(ref, struct v4l2_device, ref);

        if (v4l2_dev->release)
                v4l2_dev->release(v4l2_dev);
}

int v4l2_device_put(struct v4l2_device *v4l2_dev)
{
        return kref_put(&v4l2_dev->ref, v4l2_device_release);
}
EXPORT_SYMBOL_GPL(v4l2_device_put);

int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename,
                                                atomic_t *instance)
{
        int num = atomic_inc_return(instance) - 1;
        int len = strlen(basename);

        if (basename[len - 1] >= '0' && basename[len - 1] <= '9')
                snprintf(v4l2_dev->name, sizeof(v4l2_dev->name),
                                "%s-%d", basename, num);
        else
                snprintf(v4l2_dev->name, sizeof(v4l2_dev->name),
                                "%s%d", basename, num);
        return num;
}
EXPORT_SYMBOL_GPL(v4l2_device_set_name);

void v4l2_device_disconnect(struct v4l2_device *v4l2_dev)
{
        if (v4l2_dev->dev == NULL)
                return;

        if (dev_get_drvdata(v4l2_dev->dev) == v4l2_dev)
                dev_set_drvdata(v4l2_dev->dev, NULL);
        put_device(v4l2_dev->dev);
        v4l2_dev->dev = NULL;
}
EXPORT_SYMBOL_GPL(v4l2_device_disconnect);

void v4l2_device_unregister(struct v4l2_device *v4l2_dev)
{
        struct v4l2_subdev *sd, *next;

        /* Just return if v4l2_dev is NULL or if it was already
         * unregistered before. */
        if (v4l2_dev == NULL || !v4l2_dev->name[0])
                return;
        v4l2_device_disconnect(v4l2_dev);

        /* Unregister subdevs */
        list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) {
                v4l2_device_unregister_subdev(sd);
                if (sd->flags & V4L2_SUBDEV_FL_IS_I2C)
                        v4l2_i2c_subdev_unregister(sd);
                else if (sd->flags & V4L2_SUBDEV_FL_IS_SPI)
                        v4l2_spi_subdev_unregister(sd);
        }
        /* Mark as unregistered, thus preventing duplicate unregistrations */
        v4l2_dev->name[0] = '\0';
}
EXPORT_SYMBOL_GPL(v4l2_device_unregister);

int v4l2_device_register_subdev(struct v4l2_device *v4l2_dev,
                                struct v4l2_subdev *sd)
{
        int err;

        /* Check for valid input */
        if (!v4l2_dev || !sd || sd->v4l2_dev || !sd->name[0])
                return -EINVAL;

        /*
         * The reason to acquire the module here is to avoid unloading
         * a module of sub-device which is registered to a media
         * device. To make it possible to unload modules for media
         * devices that also register sub-devices, do not
         * try_module_get() such sub-device owners.
         */
        sd->owner_v4l2_dev = v4l2_dev->dev && v4l2_dev->dev->driver &&
                sd->owner == v4l2_dev->dev->driver->owner;

        if (!sd->owner_v4l2_dev && !try_module_get(sd->owner))
                return -ENODEV;

        sd->v4l2_dev = v4l2_dev;
        /* This just returns 0 if either of the two args is NULL */
        err = v4l2_ctrl_add_handler(v4l2_dev->ctrl_handler, sd->ctrl_handler,
                                    NULL, true);
        if (err)
                goto error_module;

#if defined(CONFIG_MEDIA_CONTROLLER)
        /* Register the entity. */
        if (v4l2_dev->mdev) {
                err = media_device_register_entity(v4l2_dev->mdev, &sd->entity);
                if (err < 0)
                        goto error_module;
        }
#endif

        if (sd->internal_ops && sd->internal_ops->registered) {
                err = sd->internal_ops->registered(sd);
                if (err)
                        goto error_unregister;
        }

        spin_lock(&v4l2_dev->lock);
        list_add_tail(&sd->list, &v4l2_dev->subdevs);
        spin_unlock(&v4l2_dev->lock);

        return 0;

error_unregister:
#if defined(CONFIG_MEDIA_CONTROLLER)
        media_device_unregister_entity(&sd->entity);
#endif
error_module:
        if (!sd->owner_v4l2_dev)
                module_put(sd->owner);
        sd->v4l2_dev = NULL;
        return err;
}
EXPORT_SYMBOL_GPL(v4l2_device_register_subdev);

static void v4l2_subdev_release(struct v4l2_subdev *sd)
{
        struct module *owner = !sd->owner_v4l2_dev ? sd->owner : NULL;

        if (sd->internal_ops && sd->internal_ops->release)
                sd->internal_ops->release(sd);
        sd->devnode = NULL;
        module_put(owner);
}

static void v4l2_device_release_subdev_node(struct video_device *vdev)
{
        v4l2_subdev_release(video_get_drvdata(vdev));
        kfree(vdev);
}

int __v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev,
                                        bool read_only)
{
        struct video_device *vdev;
        struct v4l2_subdev *sd;
        int err;

        /* Register a device node for every subdev marked with the
         * V4L2_SUBDEV_FL_HAS_DEVNODE flag.
         */
        list_for_each_entry(sd, &v4l2_dev->subdevs, list) {
                if (!(sd->flags & V4L2_SUBDEV_FL_HAS_DEVNODE))
                        continue;

                if (sd->devnode)
                        continue;

                vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
                if (!vdev) {
                        err = -ENOMEM;
                        goto clean_up;
                }

                video_set_drvdata(vdev, sd);
                strscpy(vdev->name, sd->name, sizeof(vdev->name));
                vdev->dev_parent = sd->dev;
                vdev->v4l2_dev = v4l2_dev;
                vdev->fops = &v4l2_subdev_fops;
                vdev->release = v4l2_device_release_subdev_node;
                vdev->ctrl_handler = sd->ctrl_handler;
                if (read_only)
                        set_bit(V4L2_FL_SUBDEV_RO_DEVNODE, &vdev->flags);
                sd->devnode = vdev;
                err = __video_register_device(vdev, VFL_TYPE_SUBDEV, -1, 1,
                                              sd->owner);
                if (err < 0) {
                        sd->devnode = NULL;
                        kfree(vdev);
                        goto clean_up;
                }
#if defined(CONFIG_MEDIA_CONTROLLER)
                sd->entity.info.dev.major = VIDEO_MAJOR;
                sd->entity.info.dev.minor = vdev->minor;

                /* Interface is created by __video_register_device() */
                if (vdev->v4l2_dev->mdev) {
                        struct media_link *link;

                        link = media_create_intf_link(&sd->entity,
                                                      &vdev->intf_devnode->intf,
                                                      MEDIA_LNK_FL_ENABLED |
                                                      MEDIA_LNK_FL_IMMUTABLE);
                        if (!link) {
                                err = -ENOMEM;
                                goto clean_up;
                        }
                }
#endif
        }
        return 0;

clean_up:
        list_for_each_entry(sd, &v4l2_dev->subdevs, list) {
                if (!sd->devnode)
                        break;
                video_unregister_device(sd->devnode);
        }

        return err;
}
EXPORT_SYMBOL_GPL(__v4l2_device_register_subdev_nodes);

void v4l2_device_unregister_subdev(struct v4l2_subdev *sd)
{
        struct v4l2_device *v4l2_dev;

        /* return if it isn't registered */
        if (sd == NULL || sd->v4l2_dev == NULL)
                return;

        v4l2_dev = sd->v4l2_dev;

        spin_lock(&v4l2_dev->lock);
        list_del(&sd->list);
        spin_unlock(&v4l2_dev->lock);

        if (sd->internal_ops && sd->internal_ops->unregistered)
                sd->internal_ops->unregistered(sd);
        sd->v4l2_dev = NULL;

#if defined(CONFIG_MEDIA_CONTROLLER)
        if (v4l2_dev->mdev) {
                /*
                 * No need to explicitly remove links, as both pads and
                 * links are removed by the function below, in the right order
                 */
                media_device_unregister_entity(&sd->entity);
        }
#endif
        if (sd->devnode)
                video_unregister_device(sd->devnode);
        else
                v4l2_subdev_release(sd);
}
EXPORT_SYMBOL_GPL(v4l2_device_unregister_subdev);

















   13 













  256 





























   13 
    1 



   13 






   13 
































































































































































































































































































































































  242 









    2 








  242 










    2 









    2 










    2 







































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
// SPDX-License-Identifier: GPL-2.0-only
#include "cgroup-internal.h"

#include <linux/sched/cputime.h>

#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>

static DEFINE_SPINLOCK(cgroup_rstat_lock);
static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);

static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);

static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
{
        return per_cpu_ptr(cgrp->rstat_cpu, cpu);
}

/**
 * cgroup_rstat_updated - keep track of updated rstat_cpu
 * @cgrp: target cgroup
 * @cpu: cpu on which rstat_cpu was updated
 *
 * @cgrp's rstat_cpu on @cpu was updated.  Put it on the parent's matching
 * rstat_cpu->updated_children list.  See the comment on top of
 * cgroup_rstat_cpu definition for details.
 */
__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
{
        raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
        unsigned long flags;

        /*
         * Speculative already-on-list test. This may race leading to
         * temporary inaccuracies, which is fine.
         *
         * Because @parent's updated_children is terminated with @parent
         * instead of NULL, we can tell whether @cgrp is on the list by
         * testing the next pointer for NULL.
         */
        if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
                return;

        raw_spin_lock_irqsave(cpu_lock, flags);

        /* put @cgrp and all ancestors on the corresponding updated lists */
        while (true) {
                struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
                struct cgroup *parent = cgroup_parent(cgrp);
                struct cgroup_rstat_cpu *prstatc;

                /*
                 * Both additions and removals are bottom-up.  If a cgroup
                 * is already in the tree, all ancestors are.
                 */
                if (rstatc->updated_next)
                        break;

                /* Root has no parent to link it to, but mark it busy */
                if (!parent) {
                        rstatc->updated_next = cgrp;
                        break;
                }

                prstatc = cgroup_rstat_cpu(parent, cpu);
                rstatc->updated_next = prstatc->updated_children;
                prstatc->updated_children = cgrp;

                cgrp = parent;
        }

        raw_spin_unlock_irqrestore(cpu_lock, flags);
}

/**
 * cgroup_rstat_push_children - push children cgroups into the given list
 * @head: current head of the list (= subtree root)
 * @child: first child of the root
 * @cpu: target cpu
 * Return: A new singly linked list of cgroups to be flush
 *
 * Iteratively traverse down the cgroup_rstat_cpu updated tree level by
 * level and push all the parents first before their next level children
 * into a singly linked list built from the tail backward like "pushing"
 * cgroups into a stack. The root is pushed by the caller.
 */
static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
                                                 struct cgroup *child, int cpu)
{
        struct cgroup *chead = child;        /* Head of child cgroup level */
        struct cgroup *ghead = NULL;        /* Head of grandchild cgroup level */
        struct cgroup *parent, *grandchild;
        struct cgroup_rstat_cpu *crstatc;

        child->rstat_flush_next = NULL;

next_level:
        while (chead) {
                child = chead;
                chead = child->rstat_flush_next;
                parent = cgroup_parent(child);

                /* updated_next is parent cgroup terminated */
                while (child != parent) {
                        child->rstat_flush_next = head;
                        head = child;
                        crstatc = cgroup_rstat_cpu(child, cpu);
                        grandchild = crstatc->updated_children;
                        if (grandchild != child) {
                                /* Push the grand child to the next level */
                                crstatc->updated_children = child;
                                grandchild->rstat_flush_next = ghead;
                                ghead = grandchild;
                        }
                        child = crstatc->updated_next;
                        crstatc->updated_next = NULL;
                }
        }

        if (ghead) {
                chead = ghead;
                ghead = NULL;
                goto next_level;
        }
        return head;
}

/**
 * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
 * @root: root of the cgroup subtree to traverse
 * @cpu: target cpu
 * Return: A singly linked list of cgroups to be flushed
 *
 * Walks the updated rstat_cpu tree on @cpu from @root.  During traversal,
 * each returned cgroup is unlinked from the updated tree.
 *
 * The only ordering guarantee is that, for a parent and a child pair
 * covered by a given traversal, the child is before its parent in
 * the list.
 *
 * Note that updated_children is self terminated and points to a list of
 * child cgroups if not empty. Whereas updated_next is like a sibling link
 * within the children list and terminated by the parent cgroup. An exception
 * here is the cgroup root whose updated_next can be self terminated.
 */
static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
{
        raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
        struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu);
        struct cgroup *head = NULL, *parent, *child;
        unsigned long flags;

        /*
         * The _irqsave() is needed because cgroup_rstat_lock is
         * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
         * this lock with the _irq() suffix only disables interrupts on
         * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
         * interrupts on both configurations. The _irqsave() ensures
         * that interrupts are always disabled and later restored.
         */
        raw_spin_lock_irqsave(cpu_lock, flags);

        /* Return NULL if this subtree is not on-list */
        if (!rstatc->updated_next)
                goto unlock_ret;

        /*
         * Unlink @root from its parent. As the updated_children list is
         * singly linked, we have to walk it to find the removal point.
         */
        parent = cgroup_parent(root);
        if (parent) {
                struct cgroup_rstat_cpu *prstatc;
                struct cgroup **nextp;

                prstatc = cgroup_rstat_cpu(parent, cpu);
                nextp = &prstatc->updated_children;
                while (*nextp != root) {
                        struct cgroup_rstat_cpu *nrstatc;

                        nrstatc = cgroup_rstat_cpu(*nextp, cpu);
                        WARN_ON_ONCE(*nextp == parent);
                        nextp = &nrstatc->updated_next;
                }
                *nextp = rstatc->updated_next;
        }

        rstatc->updated_next = NULL;

        /* Push @root to the list first before pushing the children */
        head = root;
        root->rstat_flush_next = NULL;
        child = rstatc->updated_children;
        rstatc->updated_children = root;
        if (child != root)
                head = cgroup_rstat_push_children(head, child, cpu);
unlock_ret:
        raw_spin_unlock_irqrestore(cpu_lock, flags);
        return head;
}

/*
 * A hook for bpf stat collectors to attach to and flush their stats.
 * Together with providing bpf kfuncs for cgroup_rstat_updated() and
 * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
 * collect cgroup stats can integrate with rstat for efficient flushing.
 *
 * A static noinline declaration here could cause the compiler to optimize away
 * the function. A global noinline declaration will keep the definition, but may
 * optimize away the callsite. Therefore, __weak is needed to ensure that the
 * call is still emitted, by telling the compiler that we don't know what the
 * function might eventually be.
 */

__bpf_hook_start();

__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
                                     struct cgroup *parent, int cpu)
{
}

__bpf_hook_end();

/* see cgroup_rstat_flush() */
static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
        __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
{
        int cpu;

        lockdep_assert_held(&cgroup_rstat_lock);

        for_each_possible_cpu(cpu) {
                struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);

                for (; pos; pos = pos->rstat_flush_next) {
                        struct cgroup_subsys_state *css;

                        cgroup_base_stat_flush(pos, cpu);
                        bpf_rstat_flush(pos, cgroup_parent(pos), cpu);

                        rcu_read_lock();
                        list_for_each_entry_rcu(css, &pos->rstat_css_list,
                                                rstat_css_node)
                                css->ss->css_rstat_flush(css, cpu);
                        rcu_read_unlock();
                }

                /* play nice and yield if necessary */
                if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
                        spin_unlock_irq(&cgroup_rstat_lock);
                        if (!cond_resched())
                                cpu_relax();
                        spin_lock_irq(&cgroup_rstat_lock);
                }
        }
}

/**
 * cgroup_rstat_flush - flush stats in @cgrp's subtree
 * @cgrp: target cgroup
 *
 * Collect all per-cpu stats in @cgrp's subtree into the global counters
 * and propagate them upwards.  After this function returns, all cgroups in
 * the subtree have up-to-date ->stat.
 *
 * This also gets all cgroups in the subtree including @cgrp off the
 * ->updated_children lists.
 *
 * This function may block.
 */
__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
{
        might_sleep();

        spin_lock_irq(&cgroup_rstat_lock);
        cgroup_rstat_flush_locked(cgrp);
        spin_unlock_irq(&cgroup_rstat_lock);
}

/**
 * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
 * @cgrp: target cgroup
 *
 * Flush stats in @cgrp's subtree and prevent further flushes.  Must be
 * paired with cgroup_rstat_flush_release().
 *
 * This function may block.
 */
void cgroup_rstat_flush_hold(struct cgroup *cgrp)
        __acquires(&cgroup_rstat_lock)
{
        might_sleep();
        spin_lock_irq(&cgroup_rstat_lock);
        cgroup_rstat_flush_locked(cgrp);
}

/**
 * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
 */
void cgroup_rstat_flush_release(void)
        __releases(&cgroup_rstat_lock)
{
        spin_unlock_irq(&cgroup_rstat_lock);
}

int cgroup_rstat_init(struct cgroup *cgrp)
{
        int cpu;

        /* the root cgrp has rstat_cpu preallocated */
        if (!cgrp->rstat_cpu) {
                cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
                if (!cgrp->rstat_cpu)
                        return -ENOMEM;
        }

        /* ->updated_children list is self terminated */
        for_each_possible_cpu(cpu) {
                struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);

                rstatc->updated_children = cgrp;
                u64_stats_init(&rstatc->bsync);
        }

        return 0;
}

void cgroup_rstat_exit(struct cgroup *cgrp)
{
        int cpu;

        cgroup_rstat_flush(cgrp);

        /* sanity check */
        for_each_possible_cpu(cpu) {
                struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);

                if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
                    WARN_ON_ONCE(rstatc->updated_next))
                        return;
        }

        free_percpu(cgrp->rstat_cpu);
        cgrp->rstat_cpu = NULL;
}

void __init cgroup_rstat_boot(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
}

/*
 * Functions for cgroup basic resource statistics implemented on top of
 * rstat.
 */
static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
                                 struct cgroup_base_stat *src_bstat)
{
        dst_bstat->cputime.utime += src_bstat->cputime.utime;
        dst_bstat->cputime.stime += src_bstat->cputime.stime;
        dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
#ifdef CONFIG_SCHED_CORE
        dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
#endif
}

static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
                                 struct cgroup_base_stat *src_bstat)
{
        dst_bstat->cputime.utime -= src_bstat->cputime.utime;
        dst_bstat->cputime.stime -= src_bstat->cputime.stime;
        dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
#ifdef CONFIG_SCHED_CORE
        dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
#endif
}

static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
{
        struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
        struct cgroup *parent = cgroup_parent(cgrp);
        struct cgroup_rstat_cpu *prstatc;
        struct cgroup_base_stat delta;
        unsigned seq;

        /* Root-level stats are sourced from system-wide CPU stats */
        if (!parent)
                return;

        /* fetch the current per-cpu values */
        do {
                seq = __u64_stats_fetch_begin(&rstatc->bsync);
                delta = rstatc->bstat;
        } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));

        /* propagate per-cpu delta to cgroup and per-cpu global statistics */
        cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
        cgroup_base_stat_add(&cgrp->bstat, &delta);
        cgroup_base_stat_add(&rstatc->last_bstat, &delta);
        cgroup_base_stat_add(&rstatc->subtree_bstat, &delta);

        /* propagate cgroup and per-cpu global delta to parent (unless that's root) */
        if (cgroup_parent(parent)) {
                delta = cgrp->bstat;
                cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
                cgroup_base_stat_add(&parent->bstat, &delta);
                cgroup_base_stat_add(&cgrp->last_bstat, &delta);

                delta = rstatc->subtree_bstat;
                prstatc = cgroup_rstat_cpu(parent, cpu);
                cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
                cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
                cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
        }
}

static struct cgroup_rstat_cpu *
cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
{
        struct cgroup_rstat_cpu *rstatc;

        rstatc = get_cpu_ptr(cgrp->rstat_cpu);
        *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
        return rstatc;
}

static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
                                                 struct cgroup_rstat_cpu *rstatc,
                                                 unsigned long flags)
{
        u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
        cgroup_rstat_updated(cgrp, smp_processor_id());
        put_cpu_ptr(rstatc);
}

void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
{
        struct cgroup_rstat_cpu *rstatc;
        unsigned long flags;

        rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
        rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
        cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
}

void __cgroup_account_cputime_field(struct cgroup *cgrp,
                                    enum cpu_usage_stat index, u64 delta_exec)
{
        struct cgroup_rstat_cpu *rstatc;
        unsigned long flags;

        rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);

        switch (index) {
        case CPUTIME_USER:
        case CPUTIME_NICE:
                rstatc->bstat.cputime.utime += delta_exec;
                break;
        case CPUTIME_SYSTEM:
        case CPUTIME_IRQ:
        case CPUTIME_SOFTIRQ:
                rstatc->bstat.cputime.stime += delta_exec;
                break;
#ifdef CONFIG_SCHED_CORE
        case CPUTIME_FORCEIDLE:
                rstatc->bstat.forceidle_sum += delta_exec;
                break;
#endif
        default:
                break;
        }

        cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
}

/*
 * compute the cputime for the root cgroup by getting the per cpu data
 * at a global level, then categorizing the fields in a manner consistent
 * with how it is done by __cgroup_account_cputime_field for each bit of
 * cpu time attributed to a cgroup.
 */
static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
{
        struct task_cputime *cputime = &bstat->cputime;
        int i;

        memset(bstat, 0, sizeof(*bstat));
        for_each_possible_cpu(i) {
                struct kernel_cpustat kcpustat;
                u64 *cpustat = kcpustat.cpustat;
                u64 user = 0;
                u64 sys = 0;

                kcpustat_cpu_fetch(&kcpustat, i);

                user += cpustat[CPUTIME_USER];
                user += cpustat[CPUTIME_NICE];
                cputime->utime += user;

                sys += cpustat[CPUTIME_SYSTEM];
                sys += cpustat[CPUTIME_IRQ];
                sys += cpustat[CPUTIME_SOFTIRQ];
                cputime->stime += sys;

                cputime->sum_exec_runtime += user;
                cputime->sum_exec_runtime += sys;
                cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];

#ifdef CONFIG_SCHED_CORE
                bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
#endif
        }
}

void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
        struct cgroup *cgrp = seq_css(seq)->cgroup;
        u64 usage, utime, stime;
        struct cgroup_base_stat bstat;
#ifdef CONFIG_SCHED_CORE
        u64 forceidle_time;
#endif

        if (cgroup_parent(cgrp)) {
                cgroup_rstat_flush_hold(cgrp);
                usage = cgrp->bstat.cputime.sum_exec_runtime;
                cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
                               &utime, &stime);
#ifdef CONFIG_SCHED_CORE
                forceidle_time = cgrp->bstat.forceidle_sum;
#endif
                cgroup_rstat_flush_release();
        } else {
                root_cgroup_cputime(&bstat);
                usage = bstat.cputime.sum_exec_runtime;
                utime = bstat.cputime.utime;
                stime = bstat.cputime.stime;
#ifdef CONFIG_SCHED_CORE
                forceidle_time = bstat.forceidle_sum;
#endif
        }

        do_div(usage, NSEC_PER_USEC);
        do_div(utime, NSEC_PER_USEC);
        do_div(stime, NSEC_PER_USEC);
#ifdef CONFIG_SCHED_CORE
        do_div(forceidle_time, NSEC_PER_USEC);
#endif

        seq_printf(seq, "usage_usec %llu\n"
                   "user_usec %llu\n"
                   "system_usec %llu\n",
                   usage, utime, stime);

#ifdef CONFIG_SCHED_CORE
        seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
#endif
}

/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
BTF_ID_FLAGS(func, cgroup_rstat_updated)
BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_rstat_kfunc_ids)

static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
        .owner          = THIS_MODULE,
        .set            = &bpf_rstat_kfunc_ids,
};

static int __init bpf_rstat_kfunc_init(void)
{
        return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
                                         &bpf_rstat_kfunc_set);
}
late_initcall(bpf_rstat_kfunc_init);


































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _INPUT_MT_H
#define _INPUT_MT_H

/*
 * Input Multitouch Library
 *
 * Copyright (c) 2010 Henrik Rydberg
 */

#include <linux/input.h>

#define TRKID_MAX        0xffff

#define INPUT_MT_POINTER        0x0001        /* pointer device, e.g. trackpad */
#define INPUT_MT_DIRECT                0x0002        /* direct device, e.g. touchscreen */
#define INPUT_MT_DROP_UNUSED        0x0004        /* drop contacts not seen in frame */
#define INPUT_MT_TRACK                0x0008        /* use in-kernel tracking */
#define INPUT_MT_SEMI_MT        0x0010        /* semi-mt device, finger count handled manually */

/**
 * struct input_mt_slot - represents the state of an input MT slot
 * @abs: holds current values of ABS_MT axes for this slot
 * @frame: last frame at which input_mt_report_slot_state() was called
 * @key: optional driver designation of this slot
 */
struct input_mt_slot {
        int abs[ABS_MT_LAST - ABS_MT_FIRST + 1];
        unsigned int frame;
        unsigned int key;
};

/**
 * struct input_mt - state of tracked contacts
 * @trkid: stores MT tracking ID for the next contact
 * @num_slots: number of MT slots the device uses
 * @slot: MT slot currently being transmitted
 * @flags: input_mt operation flags
 * @frame: increases every time input_mt_sync_frame() is called
 * @red: reduced cost matrix for in-kernel tracking
 * @slots: array of slots holding current values of tracked contacts
 */
struct input_mt {
        int trkid;
        int num_slots;
        int slot;
        unsigned int flags;
        unsigned int frame;
        int *red;
        struct input_mt_slot slots[] __counted_by(num_slots);
};

static inline void input_mt_set_value(struct input_mt_slot *slot,
                                      unsigned code, int value)
{
        slot->abs[code - ABS_MT_FIRST] = value;
}

static inline int input_mt_get_value(const struct input_mt_slot *slot,
                                     unsigned code)
{
        return slot->abs[code - ABS_MT_FIRST];
}

static inline bool input_mt_is_active(const struct input_mt_slot *slot)
{
        return input_mt_get_value(slot, ABS_MT_TRACKING_ID) >= 0;
}

static inline bool input_mt_is_used(const struct input_mt *mt,
                                    const struct input_mt_slot *slot)
{
        return slot->frame == mt->frame;
}

int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots,
                        unsigned int flags);
void input_mt_destroy_slots(struct input_dev *dev);

static inline int input_mt_new_trkid(struct input_mt *mt)
{
        return mt->trkid++ & TRKID_MAX;
}

static inline void input_mt_slot(struct input_dev *dev, int slot)
{
        input_event(dev, EV_ABS, ABS_MT_SLOT, slot);
}

static inline bool input_is_mt_value(int axis)
{
        return axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST;
}

static inline bool input_is_mt_axis(int axis)
{
        return axis == ABS_MT_SLOT || input_is_mt_value(axis);
}

bool input_mt_report_slot_state(struct input_dev *dev,
                                unsigned int tool_type, bool active);

static inline void input_mt_report_slot_inactive(struct input_dev *dev)
{
        input_mt_report_slot_state(dev, 0, false);
}

void input_mt_report_finger_count(struct input_dev *dev, int count);
void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count);
void input_mt_drop_unused(struct input_dev *dev);

void input_mt_sync_frame(struct input_dev *dev);

/**
 * struct input_mt_pos - contact position
 * @x: horizontal coordinate
 * @y: vertical coordinate
 */
struct input_mt_pos {
        s16 x, y;
};

int input_mt_assign_slots(struct input_dev *dev, int *slots,
                          const struct input_mt_pos *pos, int num_pos,
                          int dmax);

int input_mt_get_slot_by_key(struct input_dev *dev, int key);

#endif































































































































































































































































































































































   14 
   14 
   14 



   14 













   14 














































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
/*
 * This file implement the Wireless Extensions core API.
 *
 * Authors :        Jean Tourrilhes - HPL - <jt@hpl.hp.com>
 * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
 * Copyright        2009 Johannes Berg <johannes@sipsolutions.net>
 * Copyright (C) 2024 Intel Corporation
 *
 * (As all part of the Linux kernel, this file is GPL)
 */
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/wireless.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <net/cfg80211.h>
#include <net/iw_handler.h>
#include <net/netlink.h>
#include <net/wext.h>
#include <net/net_namespace.h>

typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
                               unsigned int, struct iw_request_info *,
                               iw_handler);


/*
 * Meta-data about all the standard Wireless Extension request we
 * know about.
 */
static const struct iw_ioctl_description standard_ioctl[] = {
        [IW_IOCTL_IDX(SIOCSIWCOMMIT)] = {
                .header_type        = IW_HEADER_TYPE_NULL,
        },
        [IW_IOCTL_IDX(SIOCGIWNAME)] = {
                .header_type        = IW_HEADER_TYPE_CHAR,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWNWID)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
                .flags                = IW_DESCR_FLAG_EVENT,
        },
        [IW_IOCTL_IDX(SIOCGIWNWID)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWFREQ)] = {
                .header_type        = IW_HEADER_TYPE_FREQ,
                .flags                = IW_DESCR_FLAG_EVENT,
        },
        [IW_IOCTL_IDX(SIOCGIWFREQ)] = {
                .header_type        = IW_HEADER_TYPE_FREQ,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWMODE)] = {
                .header_type        = IW_HEADER_TYPE_UINT,
                .flags                = IW_DESCR_FLAG_EVENT,
        },
        [IW_IOCTL_IDX(SIOCGIWMODE)] = {
                .header_type        = IW_HEADER_TYPE_UINT,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWSENS)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWSENS)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWRANGE)] = {
                .header_type        = IW_HEADER_TYPE_NULL,
        },
        [IW_IOCTL_IDX(SIOCGIWRANGE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = sizeof(struct iw_range),
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWPRIV)] = {
                .header_type        = IW_HEADER_TYPE_NULL,
        },
        [IW_IOCTL_IDX(SIOCGIWPRIV)] = { /* (handled directly by us) */
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct iw_priv_args),
                .max_tokens        = 16,
                .flags                = IW_DESCR_FLAG_NOMAX,
        },
        [IW_IOCTL_IDX(SIOCSIWSTATS)] = {
                .header_type        = IW_HEADER_TYPE_NULL,
        },
        [IW_IOCTL_IDX(SIOCGIWSTATS)] = { /* (handled directly by us) */
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = sizeof(struct iw_statistics),
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWSPY)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct sockaddr),
                .max_tokens        = IW_MAX_SPY,
        },
        [IW_IOCTL_IDX(SIOCGIWSPY)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct sockaddr) +
                                  sizeof(struct iw_quality),
                .max_tokens        = IW_MAX_SPY,
        },
        [IW_IOCTL_IDX(SIOCSIWTHRSPY)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct iw_thrspy),
                .min_tokens        = 1,
                .max_tokens        = 1,
        },
        [IW_IOCTL_IDX(SIOCGIWTHRSPY)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct iw_thrspy),
                .min_tokens        = 1,
                .max_tokens        = 1,
        },
        [IW_IOCTL_IDX(SIOCSIWAP)] = {
                .header_type        = IW_HEADER_TYPE_ADDR,
        },
        [IW_IOCTL_IDX(SIOCGIWAP)] = {
                .header_type        = IW_HEADER_TYPE_ADDR,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWMLME)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .min_tokens        = sizeof(struct iw_mlme),
                .max_tokens        = sizeof(struct iw_mlme),
        },
        [IW_IOCTL_IDX(SIOCGIWAPLIST)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = sizeof(struct sockaddr) +
                                  sizeof(struct iw_quality),
                .max_tokens        = IW_MAX_AP,
                .flags                = IW_DESCR_FLAG_NOMAX,
        },
        [IW_IOCTL_IDX(SIOCSIWSCAN)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .min_tokens        = 0,
                .max_tokens        = sizeof(struct iw_scan_req),
        },
        [IW_IOCTL_IDX(SIOCGIWSCAN)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_SCAN_MAX_DATA,
                .flags                = IW_DESCR_FLAG_NOMAX,
        },
        [IW_IOCTL_IDX(SIOCSIWESSID)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ESSID_MAX_SIZE,
                .flags                = IW_DESCR_FLAG_EVENT,
        },
        [IW_IOCTL_IDX(SIOCGIWESSID)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ESSID_MAX_SIZE,
                .flags                = IW_DESCR_FLAG_DUMP,
        },
        [IW_IOCTL_IDX(SIOCSIWNICKN)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ESSID_MAX_SIZE,
        },
        [IW_IOCTL_IDX(SIOCGIWNICKN)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ESSID_MAX_SIZE,
        },
        [IW_IOCTL_IDX(SIOCSIWRATE)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWRATE)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWRTS)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWRTS)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWFRAG)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWFRAG)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWTXPOW)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWTXPOW)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWRETRY)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWRETRY)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWENCODE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ENCODING_TOKEN_MAX,
                .flags                = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
        },
        [IW_IOCTL_IDX(SIOCGIWENCODE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_ENCODING_TOKEN_MAX,
                .flags                = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
        },
        [IW_IOCTL_IDX(SIOCSIWPOWER)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWPOWER)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWGENIE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_GENERIC_IE_MAX,
        },
        [IW_IOCTL_IDX(SIOCGIWGENIE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_GENERIC_IE_MAX,
        },
        [IW_IOCTL_IDX(SIOCSIWAUTH)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCGIWAUTH)] = {
                .header_type        = IW_HEADER_TYPE_PARAM,
        },
        [IW_IOCTL_IDX(SIOCSIWENCODEEXT)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .min_tokens        = sizeof(struct iw_encode_ext),
                .max_tokens        = sizeof(struct iw_encode_ext) +
                                  IW_ENCODING_TOKEN_MAX,
        },
        [IW_IOCTL_IDX(SIOCGIWENCODEEXT)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .min_tokens        = sizeof(struct iw_encode_ext),
                .max_tokens        = sizeof(struct iw_encode_ext) +
                                  IW_ENCODING_TOKEN_MAX,
        },
        [IW_IOCTL_IDX(SIOCSIWPMKSA)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .min_tokens        = sizeof(struct iw_pmksa),
                .max_tokens        = sizeof(struct iw_pmksa),
        },
};
static const unsigned int standard_ioctl_num = ARRAY_SIZE(standard_ioctl);

/*
 * Meta-data about all the additional standard Wireless Extension events
 * we know about.
 */
static const struct iw_ioctl_description standard_event[] = {
        [IW_EVENT_IDX(IWEVTXDROP)] = {
                .header_type        = IW_HEADER_TYPE_ADDR,
        },
        [IW_EVENT_IDX(IWEVQUAL)] = {
                .header_type        = IW_HEADER_TYPE_QUAL,
        },
        [IW_EVENT_IDX(IWEVCUSTOM)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_CUSTOM_MAX,
        },
        [IW_EVENT_IDX(IWEVREGISTERED)] = {
                .header_type        = IW_HEADER_TYPE_ADDR,
        },
        [IW_EVENT_IDX(IWEVEXPIRED)] = {
                .header_type        = IW_HEADER_TYPE_ADDR,
        },
        [IW_EVENT_IDX(IWEVGENIE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_GENERIC_IE_MAX,
        },
        [IW_EVENT_IDX(IWEVMICHAELMICFAILURE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = sizeof(struct iw_michaelmicfailure),
        },
        [IW_EVENT_IDX(IWEVASSOCREQIE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_GENERIC_IE_MAX,
        },
        [IW_EVENT_IDX(IWEVASSOCRESPIE)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = IW_GENERIC_IE_MAX,
        },
        [IW_EVENT_IDX(IWEVPMKIDCAND)] = {
                .header_type        = IW_HEADER_TYPE_POINT,
                .token_size        = 1,
                .max_tokens        = sizeof(struct iw_pmkid_cand),
        },
};
static const unsigned int standard_event_num = ARRAY_SIZE(standard_event);

/* Size (in bytes) of various events */
static const int event_type_size[] = {
        IW_EV_LCP_LEN,                        /* IW_HEADER_TYPE_NULL */
        0,
        IW_EV_CHAR_LEN,                        /* IW_HEADER_TYPE_CHAR */
        0,
        IW_EV_UINT_LEN,                        /* IW_HEADER_TYPE_UINT */
        IW_EV_FREQ_LEN,                        /* IW_HEADER_TYPE_FREQ */
        IW_EV_ADDR_LEN,                        /* IW_HEADER_TYPE_ADDR */
        0,
        IW_EV_POINT_LEN,                /* Without variable payload */
        IW_EV_PARAM_LEN,                /* IW_HEADER_TYPE_PARAM */
        IW_EV_QUAL_LEN,                        /* IW_HEADER_TYPE_QUAL */
};

#ifdef CONFIG_COMPAT
static const int compat_event_type_size[] = {
        IW_EV_COMPAT_LCP_LEN,                /* IW_HEADER_TYPE_NULL */
        0,
        IW_EV_COMPAT_CHAR_LEN,                /* IW_HEADER_TYPE_CHAR */
        0,
        IW_EV_COMPAT_UINT_LEN,                /* IW_HEADER_TYPE_UINT */
        IW_EV_COMPAT_FREQ_LEN,                /* IW_HEADER_TYPE_FREQ */
        IW_EV_COMPAT_ADDR_LEN,                /* IW_HEADER_TYPE_ADDR */
        0,
        IW_EV_COMPAT_POINT_LEN,                /* Without variable payload */
        IW_EV_COMPAT_PARAM_LEN,                /* IW_HEADER_TYPE_PARAM */
        IW_EV_COMPAT_QUAL_LEN,                /* IW_HEADER_TYPE_QUAL */
};
#endif


/* IW event code */

void wireless_nlevent_flush(void)
{
        struct sk_buff *skb;
        struct net *net;

        down_read(&net_rwsem);
        for_each_net(net) {
                while ((skb = skb_dequeue(&net->wext_nlevents)))
                        rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
                                    GFP_KERNEL);
        }
        up_read(&net_rwsem);
}
EXPORT_SYMBOL_GPL(wireless_nlevent_flush);

static int wext_netdev_notifier_call(struct notifier_block *nb,
                                     unsigned long state, void *ptr)
{
        /*
         * When a netdev changes state in any way, flush all pending messages
         * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
         * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
         * or similar - all of which could otherwise happen due to delays from
         * schedule_work().
         */
        wireless_nlevent_flush();

        return NOTIFY_OK;
}

static struct notifier_block wext_netdev_notifier = {
        .notifier_call = wext_netdev_notifier_call,
};

static int __net_init wext_pernet_init(struct net *net)
{
        skb_queue_head_init(&net->wext_nlevents);
        return 0;
}

static void __net_exit wext_pernet_exit(struct net *net)
{
        skb_queue_purge(&net->wext_nlevents);
}

static struct pernet_operations wext_pernet_ops = {
        .init = wext_pernet_init,
        .exit = wext_pernet_exit,
};

static int __init wireless_nlevent_init(void)
{
        int err = register_pernet_subsys(&wext_pernet_ops);

        if (err)
                return err;

        err = register_netdevice_notifier(&wext_netdev_notifier);
        if (err)
                unregister_pernet_subsys(&wext_pernet_ops);
        return err;
}

subsys_initcall(wireless_nlevent_init);

/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
        wireless_nlevent_flush();
}

static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process);

static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev,
                                              struct sk_buff *skb)
{
        struct ifinfomsg *r;
        struct nlmsghdr  *nlh;

        nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*r), 0);
        if (!nlh)
                return NULL;

        r = nlmsg_data(nlh);
        r->ifi_family = AF_UNSPEC;
        r->__ifi_pad = 0;
        r->ifi_type = dev->type;
        r->ifi_index = dev->ifindex;
        r->ifi_flags = dev_get_flags(dev);
        r->ifi_change = 0;        /* Wireless changes don't affect those flags */

        if (nla_put_string(skb, IFLA_IFNAME, dev->name))
                goto nla_put_failure;

        return nlh;
 nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return NULL;
}


/*
 * Main event dispatcher. Called from other parts and drivers.
 * Send the event on the appropriate channels.
 * May be called from interrupt context.
 */
void wireless_send_event(struct net_device *        dev,
                         unsigned int                cmd,
                         union iwreq_data *        wrqu,
                         const char *                extra)
{
        const struct iw_ioctl_description *        descr = NULL;
        int extra_len = 0;
        struct iw_event  *event;                /* Mallocated whole event */
        int event_len;                                /* Its size */
        int hdr_len;                                /* Size of the event header */
        int wrqu_off = 0;                        /* Offset in wrqu */
        /* Don't "optimise" the following variable, it will crash */
        unsigned int        cmd_index;                /* *MUST* be unsigned */
        struct sk_buff *skb;
        struct nlmsghdr *nlh;
        struct nlattr *nla;
#ifdef CONFIG_COMPAT
        struct __compat_iw_event *compat_event;
        struct compat_iw_point compat_wrqu;
        struct sk_buff *compskb;
        int ptr_len;
#endif

        /*
         * Nothing in the kernel sends scan events with data, be safe.
         * This is necessary because we cannot fix up scan event data
         * for compat, due to being contained in 'extra', but normally
         * applications are required to retrieve the scan data anyway
         * and no data is included in the event, this codifies that
         * practice.
         */
        if (WARN_ON(cmd == SIOCGIWSCAN && extra))
                extra = NULL;

        /* Get the description of the Event */
        if (cmd <= SIOCIWLAST) {
                cmd_index = IW_IOCTL_IDX(cmd);
                if (cmd_index < standard_ioctl_num)
                        descr = &(standard_ioctl[cmd_index]);
        } else {
                cmd_index = IW_EVENT_IDX(cmd);
                if (cmd_index < standard_event_num)
                        descr = &(standard_event[cmd_index]);
        }
        /* Don't accept unknown events */
        if (descr == NULL) {
                /* Note : we don't return an error to the driver, because
                 * the driver would not know what to do about it. It can't
                 * return an error to the user, because the event is not
                 * initiated by a user request.
                 * The best the driver could do is to log an error message.
                 * We will do it ourselves instead...
                 */
                netdev_err(dev, "(WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
                           cmd);
                return;
        }

        /* Check extra parameters and set extra_len */
        if (descr->header_type == IW_HEADER_TYPE_POINT) {
                /* Check if number of token fits within bounds */
                if (wrqu->data.length > descr->max_tokens) {
                        netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too big (%d)\n",
                                   cmd, wrqu->data.length);
                        return;
                }
                if (wrqu->data.length < descr->min_tokens) {
                        netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too small (%d)\n",
                                   cmd, wrqu->data.length);
                        return;
                }
                /* Calculate extra_len - extra is NULL for restricted events */
                if (extra != NULL)
                        extra_len = wrqu->data.length * descr->token_size;
                /* Always at an offset in wrqu */
                wrqu_off = IW_EV_POINT_OFF;
        }

        /* Total length of the event */
        hdr_len = event_type_size[descr->header_type];
        event_len = hdr_len + extra_len;

        /*
         * The problem for 64/32 bit.
         *
         * On 64-bit, a regular event is laid out as follows:
         *      |  0  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
         *      | event.len | event.cmd |     p a d d i n g     |
         *      | wrqu data ... (with the correct size)         |
         *
         * This padding exists because we manipulate event->u,
         * and 'event' is not packed.
         *
         * An iw_point event is laid out like this instead:
         *      |  0  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
         *      | event.len | event.cmd |     p a d d i n g     |
         *      | iwpnt.len | iwpnt.flg |     p a d d i n g     |
         *      | extra data  ...
         *
         * The second padding exists because struct iw_point is extended,
         * but this depends on the platform...
         *
         * On 32-bit, all the padding shouldn't be there.
         */

        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!skb)
                return;

        /* Send via the RtNetlink event channel */
        nlh = rtnetlink_ifinfo_prep(dev, skb);
        if (WARN_ON(!nlh)) {
                kfree_skb(skb);
                return;
        }

        /* Add the wireless events in the netlink packet */
        nla = nla_reserve(skb, IFLA_WIRELESS, event_len);
        if (!nla) {
                kfree_skb(skb);
                return;
        }
        event = nla_data(nla);

        /* Fill event - first clear to avoid data leaking */
        memset(event, 0, hdr_len);
        event->len = event_len;
        event->cmd = cmd;
        memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
        if (extra_len)
                memcpy(((char *) event) + hdr_len, extra, extra_len);

        nlmsg_end(skb, nlh);
#ifdef CONFIG_COMPAT
        hdr_len = compat_event_type_size[descr->header_type];

        /* ptr_len is remaining size in event header apart from LCP */
        ptr_len = hdr_len - IW_EV_COMPAT_LCP_LEN;
        event_len = hdr_len + extra_len;

        compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (!compskb) {
                kfree_skb(skb);
                return;
        }

        /* Send via the RtNetlink event channel */
        nlh = rtnetlink_ifinfo_prep(dev, compskb);
        if (WARN_ON(!nlh)) {
                kfree_skb(skb);
                kfree_skb(compskb);
                return;
        }

        /* Add the wireless events in the netlink packet */
        nla = nla_reserve(compskb, IFLA_WIRELESS, event_len);
        if (!nla) {
                kfree_skb(skb);
                kfree_skb(compskb);
                return;
        }
        compat_event = nla_data(nla);

        compat_event->len = event_len;
        compat_event->cmd = cmd;
        if (descr->header_type == IW_HEADER_TYPE_POINT) {
                compat_wrqu.length = wrqu->data.length;
                compat_wrqu.flags = wrqu->data.flags;
                memcpy(compat_event->ptr_bytes,
                       ((char *)&compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
                        ptr_len);
                if (extra_len)
                        memcpy(&compat_event->ptr_bytes[ptr_len],
                               extra, extra_len);
        } else {
                /* extra_len must be zero, so no if (extra) needed */
                memcpy(compat_event->ptr_bytes, wrqu, ptr_len);
        }

        nlmsg_end(compskb, nlh);

        skb_shinfo(skb)->frag_list = compskb;
#endif
        skb_queue_tail(&dev_net(dev)->wext_nlevents, skb);
        schedule_work(&wireless_nlevent_work);
}
EXPORT_SYMBOL(wireless_send_event);

#ifdef CONFIG_CFG80211_WEXT
static void wireless_warn_cfg80211_wext(void)
{
        char name[sizeof(current->comm)];

        pr_warn_once("warning: `%s' uses wireless extensions which will stop working for Wi-Fi 7 hardware; use nl80211\n",
                     get_task_comm(name, current));
}
#endif

/* IW handlers */

struct iw_statistics *get_wireless_stats(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
        if ((dev->wireless_handlers != NULL) &&
           (dev->wireless_handlers->get_wireless_stats != NULL))
                return dev->wireless_handlers->get_wireless_stats(dev);
#endif

#ifdef CONFIG_CFG80211_WEXT
        if (dev->ieee80211_ptr &&
            dev->ieee80211_ptr->wiphy &&
            dev->ieee80211_ptr->wiphy->wext &&
            dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
                wireless_warn_cfg80211_wext();
                if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
                                                        WIPHY_FLAG_DISABLE_WEXT))
                        return NULL;
                return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
        }
#endif

        /* not found */
        return NULL;
}

/* noinline to avoid a bogus warning with -O3 */
static noinline int iw_handler_get_iwstats(struct net_device *        dev,
                                  struct iw_request_info *        info,
                                  union iwreq_data *                wrqu,
                                  char *                        extra)
{
        /* Get stats from the driver */
        struct iw_statistics *stats;

        stats = get_wireless_stats(dev);
        if (stats) {
                /* Copy statistics to extra */
                memcpy(extra, stats, sizeof(struct iw_statistics));
                wrqu->data.length = sizeof(struct iw_statistics);

                /* Check if we need to clear the updated flag */
                if (wrqu->data.flags != 0)
                        stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
                return 0;
        } else
                return -EOPNOTSUPP;
}

static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
{
        /* Don't "optimise" the following variable, it will crash */
        unsigned int        index;                /* *MUST* be unsigned */
        const struct iw_handler_def *handlers = NULL;

#ifdef CONFIG_CFG80211_WEXT
        if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
                wireless_warn_cfg80211_wext();
                if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
                                                        WIPHY_FLAG_DISABLE_WEXT))
                        return NULL;
                handlers = dev->ieee80211_ptr->wiphy->wext;
        }
#endif
#ifdef CONFIG_WIRELESS_EXT
        if (dev->wireless_handlers)
                handlers = dev->wireless_handlers;
#endif

        if (!handlers)
                return NULL;

        /* Try as a standard command */
        index = IW_IOCTL_IDX(cmd);
        if (index < handlers->num_standard)
                return handlers->standard[index];

#ifdef CONFIG_WEXT_PRIV
        /* Try as a private command */
        index = cmd - SIOCIWFIRSTPRIV;
        if (index < handlers->num_private)
                return handlers->private[index];
#endif

        /* Not found */
        return NULL;
}

static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
                                   const struct iw_ioctl_description *descr,
                                   iw_handler handler, struct net_device *dev,
                                   struct iw_request_info *info)
{
        int err, extra_size, user_length = 0, essid_compat = 0;
        char *extra;

        /* Calculate space needed by arguments. Always allocate
         * for max space.
         */
        extra_size = descr->max_tokens * descr->token_size;

        /* Check need for ESSID compatibility for WE < 21 */
        switch (cmd) {
        case SIOCSIWESSID:
        case SIOCGIWESSID:
        case SIOCSIWNICKN:
        case SIOCGIWNICKN:
                if (iwp->length == descr->max_tokens + 1)
                        essid_compat = 1;
                else if (IW_IS_SET(cmd) && (iwp->length != 0)) {
                        char essid[IW_ESSID_MAX_SIZE + 1];
                        unsigned int len;
                        len = iwp->length * descr->token_size;

                        if (len > IW_ESSID_MAX_SIZE)
                                return -EFAULT;

                        err = copy_from_user(essid, iwp->pointer, len);
                        if (err)
                                return -EFAULT;

                        if (essid[iwp->length - 1] == '\0')
                                essid_compat = 1;
                }
                break;
        default:
                break;
        }

        iwp->length -= essid_compat;

        /* Check what user space is giving us */
        if (IW_IS_SET(cmd)) {
                /* Check NULL pointer */
                if (!iwp->pointer && iwp->length != 0)
                        return -EFAULT;
                /* Check if number of token fits within bounds */
                if (iwp->length > descr->max_tokens)
                        return -E2BIG;
                if (iwp->length < descr->min_tokens)
                        return -EINVAL;
        } else {
                /* Check NULL pointer */
                if (!iwp->pointer)
                        return -EFAULT;
                /* Save user space buffer size for checking */
                user_length = iwp->length;

                /* Don't check if user_length > max to allow forward
                 * compatibility. The test user_length < min is
                 * implied by the test at the end.
                 */

                /* Support for very large requests */
                if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
                    (user_length > descr->max_tokens)) {
                        /* Allow userspace to GET more than max so
                         * we can support any size GET requests.
                         * There is still a limit : -ENOMEM.
                         */
                        extra_size = user_length * descr->token_size;

                        /* Note : user_length is originally a __u16,
                         * and token_size is controlled by us,
                         * so extra_size won't get negative and
                         * won't overflow...
                         */
                }
        }

        /* Sanity-check to ensure we never end up _allocating_ zero
         * bytes of data for extra.
         */
        if (extra_size <= 0)
                return -EFAULT;

        /* kzalloc() ensures NULL-termination for essid_compat. */
        extra = kzalloc(extra_size, GFP_KERNEL);
        if (!extra)
                return -ENOMEM;

        /* If it is a SET, get all the extra data in here */
        if (IW_IS_SET(cmd) && (iwp->length != 0)) {
                if (copy_from_user(extra, iwp->pointer,
                                   iwp->length *
                                   descr->token_size)) {
                        err = -EFAULT;
                        goto out;
                }

                if (cmd == SIOCSIWENCODEEXT) {
                        struct iw_encode_ext *ee = (void *) extra;

                        if (iwp->length < sizeof(*ee) + ee->key_len) {
                                err = -EFAULT;
                                goto out;
                        }
                }
        }

        if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) {
                /*
                 * If this is a GET, but not NOMAX, it means that the extra
                 * data is not bounded by userspace, but by max_tokens. Thus
                 * set the length to max_tokens. This matches the extra data
                 * allocation.
                 * The driver should fill it with the number of tokens it
                 * provided, and it may check iwp->length rather than having
                 * knowledge of max_tokens. If the driver doesn't change the
                 * iwp->length, this ioctl just copies back max_token tokens
                 * filled with zeroes. Hopefully the driver isn't claiming
                 * them to be valid data.
                 */
                iwp->length = descr->max_tokens;
        }

        err = handler(dev, info, (union iwreq_data *) iwp, extra);

        iwp->length += essid_compat;

        /* If we have something to return to the user */
        if (!err && IW_IS_GET(cmd)) {
                /* Check if there is enough buffer up there */
                if (user_length < iwp->length) {
                        err = -E2BIG;
                        goto out;
                }

                if (copy_to_user(iwp->pointer, extra,
                                 iwp->length *
                                 descr->token_size)) {
                        err = -EFAULT;
                        goto out;
                }
        }

        /* Generate an event to notify listeners of the change */
        if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
            ((err == 0) || (err == -EIWCOMMIT))) {
                union iwreq_data *data = (union iwreq_data *) iwp;

                if (descr->flags & IW_DESCR_FLAG_RESTRICT)
                        /* If the event is restricted, don't
                         * export the payload.
                         */
                        wireless_send_event(dev, cmd, data, NULL);
                else
                        wireless_send_event(dev, cmd, data, extra);
        }

out:
        kfree(extra);
        return err;
}

/*
 * Call the commit handler in the driver
 * (if exist and if conditions are right)
 *
 * Note : our current commit strategy is currently pretty dumb,
 * but we will be able to improve on that...
 * The goal is to try to agreagate as many changes as possible
 * before doing the commit. Drivers that will define a commit handler
 * are usually those that need a reset after changing parameters, so
 * we want to minimise the number of reset.
 * A cool idea is to use a timer : at each "set" command, we re-set the
 * timer, when the timer eventually fires, we call the driver.
 * Hopefully, more on that later.
 *
 * Also, I'm waiting to see how many people will complain about the
 * netif_running(dev) test. I'm open on that one...
 * Hopefully, the driver will remember to do a commit in "open()" ;-)
 */
int call_commit_handler(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
        if (netif_running(dev) &&
            dev->wireless_handlers &&
            dev->wireless_handlers->standard[0])
                /* Call the commit handler on the driver */
                return dev->wireless_handlers->standard[0](dev, NULL,
                                                           NULL, NULL);
        else
                return 0;                /* Command completed successfully */
#else
        /* cfg80211 has no commit */
        return 0;
#endif
}

/*
 * Main IOCTl dispatcher.
 * Check the type of IOCTL and call the appropriate wrapper...
 */
static int wireless_process_ioctl(struct net *net, struct iwreq *iwr,
                                  unsigned int cmd,
                                  struct iw_request_info *info,
                                  wext_ioctl_func standard,
                                  wext_ioctl_func private)
{
        struct net_device *dev;
        iw_handler        handler;

        /* Permissions are already checked in dev_ioctl() before calling us.
         * The copy_to/from_user() of ifr is also dealt with in there */

        /* Make sure the device exist */
        if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL)
                return -ENODEV;

        /* A bunch of special cases, then the generic case...
         * Note that 'cmd' is already filtered in dev_ioctl() with
         * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
        if (cmd == SIOCGIWSTATS)
                return standard(dev, iwr, cmd, info,
                                &iw_handler_get_iwstats);

#ifdef CONFIG_WEXT_PRIV
        if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
                return standard(dev, iwr, cmd, info,
                                iw_handler_get_private);
#endif

        /* Basic check */
        if (!netif_device_present(dev))
                return -ENODEV;

        /* New driver API : try to find the handler */
        handler = get_handler(dev, cmd);
        if (handler) {
                /* Standard and private are not the same */
                if (cmd < SIOCIWFIRSTPRIV)
                        return standard(dev, iwr, cmd, info, handler);
                else if (private)
                        return private(dev, iwr, cmd, info, handler);
        }
        return -EOPNOTSUPP;
}

/* If command is `set a parameter', or `get the encoding parameters',
 * check if the user has the right to do it.
 */
static int wext_permission_check(unsigned int cmd)
{
        if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE ||
             cmd == SIOCGIWENCODEEXT) &&
            !capable(CAP_NET_ADMIN))
                return -EPERM;

        return 0;
}

/* entry point from dev ioctl */
static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr,
                               unsigned int cmd, struct iw_request_info *info,
                               wext_ioctl_func standard,
                               wext_ioctl_func private)
{
        int ret = wext_permission_check(cmd);

        if (ret)
                return ret;

        dev_load(net, iwr->ifr_name);
        rtnl_lock();
        ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private);
        rtnl_unlock();

        return ret;
}

/*
 * Wrapper to call a standard Wireless Extension handler.
 * We do various checks and also take care of moving data between
 * user space and kernel space.
 */
static int ioctl_standard_call(struct net_device *        dev,
                               struct iwreq                *iwr,
                               unsigned int                cmd,
                               struct iw_request_info        *info,
                               iw_handler                handler)
{
        const struct iw_ioctl_description *        descr;
        int                                        ret = -EINVAL;

        /* Get the description of the IOCTL */
        if (IW_IOCTL_IDX(cmd) >= standard_ioctl_num)
                return -EOPNOTSUPP;
        descr = &(standard_ioctl[IW_IOCTL_IDX(cmd)]);

        /* Check if we have a pointer to user space data or not */
        if (descr->header_type != IW_HEADER_TYPE_POINT) {

                /* No extra arguments. Trivial to handle */
                ret = handler(dev, info, &(iwr->u), NULL);

                /* Generate an event to notify listeners of the change */
                if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
                   ((ret == 0) || (ret == -EIWCOMMIT)))
                        wireless_send_event(dev, cmd, &(iwr->u), NULL);
        } else {
                ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr,
                                              handler, dev, info);
        }

        /* Call commit handler if needed and defined */
        if (ret == -EIWCOMMIT)
                ret = call_commit_handler(dev);

        /* Here, we will generate the appropriate event if needed */

        return ret;
}


int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
        struct iw_request_info info = { .cmd = cmd, .flags = 0 };
        struct iwreq iwr;
        int ret;

        if (copy_from_user(&iwr, arg, sizeof(iwr)))
                return -EFAULT;

        iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;

        ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
                                  ioctl_standard_call,
                                  ioctl_private_call);
        if (ret >= 0 &&
            IW_IS_GET(cmd) &&
            copy_to_user(arg, &iwr, sizeof(struct iwreq)))
                return -EFAULT;

        return ret;
}

#ifdef CONFIG_COMPAT
static int compat_standard_call(struct net_device        *dev,
                                struct iwreq                *iwr,
                                unsigned int                cmd,
                                struct iw_request_info        *info,
                                iw_handler                handler)
{
        const struct iw_ioctl_description *descr;
        struct compat_iw_point *iwp_compat;
        struct iw_point iwp;
        int err;

        descr = standard_ioctl + IW_IOCTL_IDX(cmd);

        if (descr->header_type != IW_HEADER_TYPE_POINT)
                return ioctl_standard_call(dev, iwr, cmd, info, handler);

        iwp_compat = (struct compat_iw_point *) &iwr->u.data;
        iwp.pointer = compat_ptr(iwp_compat->pointer);
        iwp.length = iwp_compat->length;
        iwp.flags = iwp_compat->flags;

        err = ioctl_standard_iw_point(&iwp, cmd, descr, handler, dev, info);

        iwp_compat->pointer = ptr_to_compat(iwp.pointer);
        iwp_compat->length = iwp.length;
        iwp_compat->flags = iwp.flags;

        return err;
}

int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
                             unsigned long arg)
{
        void __user *argp = (void __user *)arg;
        struct iw_request_info info;
        struct iwreq iwr;
        char *colon;
        int ret;

        if (copy_from_user(&iwr, argp, sizeof(struct iwreq)))
                return -EFAULT;

        iwr.ifr_name[IFNAMSIZ-1] = 0;
        colon = strchr(iwr.ifr_name, ':');
        if (colon)
                *colon = 0;

        info.cmd = cmd;
        info.flags = IW_REQUEST_FLAG_COMPAT;

        ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
                                  compat_standard_call,
                                  compat_private_call);

        if (ret >= 0 &&
            IW_IS_GET(cmd) &&
            copy_to_user(argp, &iwr, sizeof(struct iwreq)))
                return -EFAULT;

        return ret;
}
#endif

char *iwe_stream_add_event(struct iw_request_info *info, char *stream,
                           char *ends, struct iw_event *iwe, int event_len)
{
        int lcp_len = iwe_stream_lcp_len(info);

        event_len = iwe_stream_event_len_adjust(info, event_len);

        /* Check if it's possible */
        if (likely((stream + event_len) < ends)) {
                iwe->len = event_len;
                /* Beware of alignement issues on 64 bits */
                memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
                memcpy(stream + lcp_len, &iwe->u,
                       event_len - lcp_len);
                stream += event_len;
        }

        return stream;
}
EXPORT_SYMBOL(iwe_stream_add_event);

char *iwe_stream_add_point(struct iw_request_info *info, char *stream,
                           char *ends, struct iw_event *iwe, char *extra)
{
        int event_len = iwe_stream_point_len(info) + iwe->u.data.length;
        int point_len = iwe_stream_point_len(info);
        int lcp_len   = iwe_stream_lcp_len(info);

        /* Check if it's possible */
        if (likely((stream + event_len) < ends)) {
                iwe->len = event_len;
                memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
                memcpy(stream + lcp_len,
                       ((char *) &iwe->u) + IW_EV_POINT_OFF,
                       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
                if (iwe->u.data.length && extra)
                        memcpy(stream + point_len, extra, iwe->u.data.length);
                stream += event_len;
        }

        return stream;
}
EXPORT_SYMBOL(iwe_stream_add_point);

char *iwe_stream_add_value(struct iw_request_info *info, char *event,
                           char *value, char *ends, struct iw_event *iwe,
                           int event_len)
{
        int lcp_len = iwe_stream_lcp_len(info);

        /* Don't duplicate LCP */
        event_len -= IW_EV_LCP_LEN;

        /* Check if it's possible */
        if (likely((value + event_len) < ends)) {
                /* Add new value */
                memcpy(value, &iwe->u, event_len);
                value += event_len;
                /* Patch LCP */
                iwe->len = value - event;
                memcpy(event, (char *) iwe, lcp_len);
        }

        return value;
}
EXPORT_SYMBOL(iwe_stream_add_value);









































































































































































   19 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Mutexes: blocking mutual exclusion locks
 *
 * started by Ingo Molnar:
 *
 *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 *
 * This file contains the main data structure and API definitions.
 */
#ifndef __LINUX_MUTEX_H
#define __LINUX_MUTEX_H

#include <asm/current.h>
#include <linux/list.h>
#include <linux/spinlock_types.h>
#include <linux/lockdep.h>
#include <linux/atomic.h>
#include <asm/processor.h>
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>
#include <linux/cleanup.h>
#include <linux/mutex_types.h>

#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)                        \
                , .dep_map = {                                        \
                        .name = #lockname,                        \
                        .wait_type_inner = LD_WAIT_SLEEP,        \
                }
#else
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
#endif

#ifdef CONFIG_DEBUG_MUTEXES

# define __DEBUG_MUTEX_INITIALIZER(lockname)                                \
        , .magic = &lockname

extern void mutex_destroy(struct mutex *lock);

#else

# define __DEBUG_MUTEX_INITIALIZER(lockname)

static inline void mutex_destroy(struct mutex *lock) {}

#endif

#ifndef CONFIG_PREEMPT_RT
/**
 * mutex_init - initialize the mutex
 * @mutex: the mutex to be initialized
 *
 * Initialize the mutex to unlocked state.
 *
 * It is not allowed to initialize an already locked mutex.
 */
#define mutex_init(mutex)                                                \
do {                                                                        \
        static struct lock_class_key __key;                                \
                                                                        \
        __mutex_init((mutex), #mutex, &__key);                                \
} while (0)

#define __MUTEX_INITIALIZER(lockname) \
                { .owner = ATOMIC_LONG_INIT(0) \
                , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
                , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
                __DEBUG_MUTEX_INITIALIZER(lockname) \
                __DEP_MAP_MUTEX_INITIALIZER(lockname) }

#define DEFINE_MUTEX(mutexname) \
        struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)

extern void __mutex_init(struct mutex *lock, const char *name,
                         struct lock_class_key *key);

/**
 * mutex_is_locked - is the mutex locked
 * @lock: the mutex to be queried
 *
 * Returns true if the mutex is locked, false if unlocked.
 */
extern bool mutex_is_locked(struct mutex *lock);

#else /* !CONFIG_PREEMPT_RT */
/*
 * Preempt-RT variant based on rtmutexes.
 */

#define __MUTEX_INITIALIZER(mutexname)                                        \
{                                                                        \
        .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex)        \
        __DEP_MAP_MUTEX_INITIALIZER(mutexname)                                \
}

#define DEFINE_MUTEX(mutexname)                                                \
        struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)

extern void __mutex_rt_init(struct mutex *lock, const char *name,
                            struct lock_class_key *key);

#define mutex_is_locked(l)        rt_mutex_base_is_locked(&(l)->rtmutex)

#define __mutex_init(mutex, name, key)                        \
do {                                                        \
        rt_mutex_base_init(&(mutex)->rtmutex);                \
        __mutex_rt_init((mutex), name, key);                \
} while (0)

#define mutex_init(mutex)                                \
do {                                                        \
        static struct lock_class_key __key;                \
                                                        \
        __mutex_init((mutex), #mutex, &__key);                \
} while (0)
#endif /* CONFIG_PREEMPT_RT */

/*
 * See kernel/locking/mutex.c for detailed documentation of these APIs.
 * Also see Documentation/locking/mutex-design.rst.
 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);

extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
                                        unsigned int subclass);
extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
                                        unsigned int subclass);
extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass);

#define mutex_lock(lock) mutex_lock_nested(lock, 0)
#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
#define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0)

#define mutex_lock_nest_lock(lock, nest_lock)                                \
do {                                                                        \
        typecheck(struct lockdep_map *, &(nest_lock)->dep_map);        \
        _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);                \
} while (0)

#else
extern void mutex_lock(struct mutex *lock);
extern int __must_check mutex_lock_interruptible(struct mutex *lock);
extern int __must_check mutex_lock_killable(struct mutex *lock);
extern void mutex_lock_io(struct mutex *lock);

# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
# define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock)
#endif

/*
 * NOTE: mutex_trylock() follows the spin_trylock() convention,
 *       not the down_trylock() convention!
 *
 * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
 */
extern int mutex_trylock(struct mutex *lock);
extern void mutex_unlock(struct mutex *lock);

extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);

DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T))
DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0)

#endif /* __LINUX_MUTEX_H */


























































































































































































































































    1 



























































































































































































































































































































































    1 








    1 



    1 








    1 
    1 










    1 
    1 























































































    1 





































































































































































    1 





















    1 
    1 


    1 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
// SPDX-License-Identifier: GPL-2.0
/*
 *  Native support for the I/O-Warrior USB devices
 *
 *  Copyright (c) 2003-2005, 2020  Code Mercenaries GmbH
 *  written by Christian Lucht <lucht@codemercs.com> and
 *  Christoph Jung <jung@codemercs.com>
 *
 *  based on

 *  usb-skeleton.c by Greg Kroah-Hartman  <greg@kroah.com>
 *  brlvger.c by Stephane Dalton  <sdalton@videotron.ca>
 *           and Stephane Doyon   <s.doyon@videotron.ca>
 *
 *  Released under the GPLv2.
 */

#include <linux/module.h>
#include <linux/usb.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/usb/iowarrior.h>

#define DRIVER_AUTHOR "Christian Lucht <lucht@codemercs.com>"
#define DRIVER_DESC "USB IO-Warrior driver"

#define USB_VENDOR_ID_CODEMERCS                1984
/* low speed iowarrior */
#define USB_DEVICE_ID_CODEMERCS_IOW40        0x1500
#define USB_DEVICE_ID_CODEMERCS_IOW24        0x1501
#define USB_DEVICE_ID_CODEMERCS_IOWPV1        0x1511
#define USB_DEVICE_ID_CODEMERCS_IOWPV2        0x1512
/* full speed iowarrior */
#define USB_DEVICE_ID_CODEMERCS_IOW56        0x1503
/* fuller speed iowarrior */
#define USB_DEVICE_ID_CODEMERCS_IOW28        0x1504
#define USB_DEVICE_ID_CODEMERCS_IOW28L        0x1505
#define USB_DEVICE_ID_CODEMERCS_IOW100        0x1506

/* OEMed devices */
#define USB_DEVICE_ID_CODEMERCS_IOW24SAG        0x158a
#define USB_DEVICE_ID_CODEMERCS_IOW56AM                0x158b

/* Get a minor range for your devices from the usb maintainer */
#ifdef CONFIG_USB_DYNAMIC_MINORS
#define IOWARRIOR_MINOR_BASE        0
#else
#define IOWARRIOR_MINOR_BASE        208        // SKELETON_MINOR_BASE 192 + 16, not official yet
#endif

/* interrupt input queue size */
#define MAX_INTERRUPT_BUFFER 16
/*
   maximum number of urbs that are submitted for writes at the same time,
   this applies to the IOWarrior56 only!
   IOWarrior24 and IOWarrior40 use synchronous usb_control_msg calls.
*/
#define MAX_WRITES_IN_FLIGHT 4

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

static struct usb_driver iowarrior_driver;

/*--------------*/
/*     data     */
/*--------------*/

/* Structure to hold all of our device specific stuff */
struct iowarrior {
        struct mutex mutex;                        /* locks this structure */
        struct usb_device *udev;                /* save off the usb device pointer */
        struct usb_interface *interface;        /* the interface for this device */
        unsigned char minor;                        /* the starting minor number for this device */
        struct usb_endpoint_descriptor *int_out_endpoint;        /* endpoint for reading (needed for IOW56 only) */
        struct usb_endpoint_descriptor *int_in_endpoint;        /* endpoint for reading */
        struct urb *int_in_urb;                /* the urb for reading data */
        unsigned char *int_in_buffer;        /* buffer for data to be read */
        unsigned char serial_number;        /* to detect lost packages */
        unsigned char *read_queue;        /* size is MAX_INTERRUPT_BUFFER * packet size */
        wait_queue_head_t read_wait;
        wait_queue_head_t write_wait;        /* wait-queue for writing to the device */
        atomic_t write_busy;                /* number of write-urbs submitted */
        atomic_t read_idx;
        atomic_t intr_idx;
        atomic_t overflow_flag;                /* signals an index 'rollover' */
        int present;                        /* this is 1 as long as the device is connected */
        int opened;                        /* this is 1 if the device is currently open */
        char chip_serial[9];                /* the serial number string of the chip connected */
        int report_size;                /* number of bytes in a report */
        u16 product_id;
        struct usb_anchor submitted;
};

/*--------------*/
/*    globals   */
/*--------------*/

#define USB_REQ_GET_REPORT  0x01
//#if 0
static int usb_get_report(struct usb_device *dev,
                          struct usb_host_interface *inter, unsigned char type,
                          unsigned char id, void *buf, int size)
{
        return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                               USB_REQ_GET_REPORT,
                               USB_DIR_IN | USB_TYPE_CLASS |
                               USB_RECIP_INTERFACE, (type << 8) + id,
                               inter->desc.bInterfaceNumber, buf, size,
                               USB_CTRL_GET_TIMEOUT);
}
//#endif

#define USB_REQ_SET_REPORT 0x09

static int usb_set_report(struct usb_interface *intf, unsigned char type,
                          unsigned char id, void *buf, int size)
{
        return usb_control_msg(interface_to_usbdev(intf),
                               usb_sndctrlpipe(interface_to_usbdev(intf), 0),
                               USB_REQ_SET_REPORT,
                               USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                               (type << 8) + id,
                               intf->cur_altsetting->desc.bInterfaceNumber, buf,
                               size, 1000);
}

/*---------------------*/
/* driver registration */
/*---------------------*/
/* table of devices that work with this driver */
static const struct usb_device_id iowarrior_ids[] = {
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW40)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)},
        {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)},
        {}                        /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, iowarrior_ids);

/*
 * USB callback handler for reading data
 */
static void iowarrior_callback(struct urb *urb)
{
        struct iowarrior *dev = urb->context;
        int intr_idx;
        int read_idx;
        int aux_idx;
        int offset;
        int status = urb->status;
        int retval;

        switch (status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        default:
                goto exit;
        }

        intr_idx = atomic_read(&dev->intr_idx);
        /* aux_idx become previous intr_idx */
        aux_idx = (intr_idx == 0) ? (MAX_INTERRUPT_BUFFER - 1) : (intr_idx - 1);
        read_idx = atomic_read(&dev->read_idx);

        /* queue is not empty and it's interface 0 */
        if ((intr_idx != read_idx)
            && (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0)) {
                /* + 1 for serial number */
                offset = aux_idx * (dev->report_size + 1);
                if (!memcmp
                    (dev->read_queue + offset, urb->transfer_buffer,
                     dev->report_size)) {
                        /* equal values on interface 0 will be ignored */
                        goto exit;
                }
        }

        /* aux_idx become next intr_idx */
        aux_idx = (intr_idx == (MAX_INTERRUPT_BUFFER - 1)) ? 0 : (intr_idx + 1);
        if (read_idx == aux_idx) {
                /* queue full, dropping oldest input */
                read_idx = (++read_idx == MAX_INTERRUPT_BUFFER) ? 0 : read_idx;
                atomic_set(&dev->read_idx, read_idx);
                atomic_set(&dev->overflow_flag, 1);
        }

        /* +1 for serial number */
        offset = intr_idx * (dev->report_size + 1);
        memcpy(dev->read_queue + offset, urb->transfer_buffer,
               dev->report_size);
        *(dev->read_queue + offset + (dev->report_size)) = dev->serial_number++;

        atomic_set(&dev->intr_idx, aux_idx);
        /* tell the blocking read about the new data */
        wake_up_interruptible(&dev->read_wait);

exit:
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(&dev->interface->dev, "%s - usb_submit_urb failed with result %d\n",
                        __func__, retval);

}

/*
 * USB Callback handler for write-ops
 */
static void iowarrior_write_callback(struct urb *urb)
{
        struct iowarrior *dev;
        int status = urb->status;

        dev = urb->context;
        /* sync/async unlink faults aren't errors */
        if (status &&
            !(status == -ENOENT ||
              status == -ECONNRESET || status == -ESHUTDOWN)) {
                dev_dbg(&dev->interface->dev,
                        "nonzero write bulk status received: %d\n", status);
        }
        /* free up our allocated buffer */
        usb_free_coherent(urb->dev, urb->transfer_buffer_length,
                          urb->transfer_buffer, urb->transfer_dma);
        /* tell a waiting writer the interrupt-out-pipe is available again */
        atomic_dec(&dev->write_busy);
        wake_up_interruptible(&dev->write_wait);
}

/*
 *        iowarrior_delete
 */
static inline void iowarrior_delete(struct iowarrior *dev)
{
        dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor);
        kfree(dev->int_in_buffer);
        usb_free_urb(dev->int_in_urb);
        kfree(dev->read_queue);
        usb_put_intf(dev->interface);
        kfree(dev);
}

/*---------------------*/
/* fops implementation */
/*---------------------*/

static int read_index(struct iowarrior *dev)
{
        int intr_idx, read_idx;

        read_idx = atomic_read(&dev->read_idx);
        intr_idx = atomic_read(&dev->intr_idx);

        return (read_idx == intr_idx ? -1 : read_idx);
}

/*
 *  iowarrior_read
 */
static ssize_t iowarrior_read(struct file *file, char __user *buffer,
                              size_t count, loff_t *ppos)
{
        struct iowarrior *dev;
        int read_idx;
        int offset;

        dev = file->private_data;

        /* verify that the device wasn't unplugged */
        if (!dev || !dev->present)
                return -ENODEV;

        dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n",
                dev->minor, count);

        /* read count must be packet size (+ time stamp) */
        if ((count != dev->report_size)
            && (count != (dev->report_size + 1)))
                return -EINVAL;

        /* repeat until no buffer overrun in callback handler occur */
        do {
                atomic_set(&dev->overflow_flag, 0);
                if ((read_idx = read_index(dev)) == -1) {
                        /* queue empty */
                        if (file->f_flags & O_NONBLOCK)
                                return -EAGAIN;
                        else {
                                //next line will return when there is either new data, or the device is unplugged
                                int r = wait_event_interruptible(dev->read_wait,
                                                                 (!dev->present
                                                                  || (read_idx =
                                                                      read_index
                                                                      (dev)) !=
                                                                  -1));
                                if (r) {
                                        //we were interrupted by a signal
                                        return -ERESTART;
                                }
                                if (!dev->present) {
                                        //The device was unplugged
                                        return -ENODEV;
                                }
                                if (read_idx == -1) {
                                        // Can this happen ???
                                        return 0;
                                }
                        }
                }

                offset = read_idx * (dev->report_size + 1);
                if (copy_to_user(buffer, dev->read_queue + offset, count)) {
                        return -EFAULT;
                }
        } while (atomic_read(&dev->overflow_flag));

        read_idx = ++read_idx == MAX_INTERRUPT_BUFFER ? 0 : read_idx;
        atomic_set(&dev->read_idx, read_idx);
        return count;
}

/*
 * iowarrior_write
 */
static ssize_t iowarrior_write(struct file *file,
                               const char __user *user_buffer,
                               size_t count, loff_t *ppos)
{
        struct iowarrior *dev;
        int retval = 0;
        char *buf = NULL;        /* for IOW24 and IOW56 we need a buffer */
        struct urb *int_out_urb = NULL;

        dev = file->private_data;

        mutex_lock(&dev->mutex);
        /* verify that the device wasn't unplugged */
        if (!dev->present) {
                retval = -ENODEV;
                goto exit;
        }
        dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n",
                dev->minor, count);
        /* if count is 0 we're already done */
        if (count == 0) {
                retval = 0;
                goto exit;
        }
        /* We only accept full reports */
        if (count != dev->report_size) {
                retval = -EINVAL;
                goto exit;
        }
        switch (dev->product_id) {
        case USB_DEVICE_ID_CODEMERCS_IOW24:
        case USB_DEVICE_ID_CODEMERCS_IOW24SAG:
        case USB_DEVICE_ID_CODEMERCS_IOWPV1:
        case USB_DEVICE_ID_CODEMERCS_IOWPV2:
        case USB_DEVICE_ID_CODEMERCS_IOW40:
                /* IOW24 and IOW40 use a synchronous call */
                buf = memdup_user(user_buffer, count);
                if (IS_ERR(buf)) {
                        retval = PTR_ERR(buf);
                        goto exit;
                }
                retval = usb_set_report(dev->interface, 2, 0, buf, count);
                kfree(buf);
                goto exit;
        case USB_DEVICE_ID_CODEMERCS_IOW56:
        case USB_DEVICE_ID_CODEMERCS_IOW56AM:
        case USB_DEVICE_ID_CODEMERCS_IOW28:
        case USB_DEVICE_ID_CODEMERCS_IOW28L:
        case USB_DEVICE_ID_CODEMERCS_IOW100:
                /* The IOW56 uses asynchronous IO and more urbs */
                if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) {
                        /* Wait until we are below the limit for submitted urbs */
                        if (file->f_flags & O_NONBLOCK) {
                                retval = -EAGAIN;
                                goto exit;
                        } else {
                                retval = wait_event_interruptible(dev->write_wait,
                                                                  (!dev->present || (atomic_read (&dev-> write_busy) < MAX_WRITES_IN_FLIGHT)));
                                if (retval) {
                                        /* we were interrupted by a signal */
                                        retval = -ERESTART;
                                        goto exit;
                                }
                                if (!dev->present) {
                                        /* The device was unplugged */
                                        retval = -ENODEV;
                                        goto exit;
                                }
                                if (!dev->opened) {
                                        /* We were closed while waiting for an URB */
                                        retval = -ENODEV;
                                        goto exit;
                                }
                        }
                }
                atomic_inc(&dev->write_busy);
                int_out_urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!int_out_urb) {
                        retval = -ENOMEM;
                        goto error_no_urb;
                }
                buf = usb_alloc_coherent(dev->udev, dev->report_size,
                                         GFP_KERNEL, &int_out_urb->transfer_dma);
                if (!buf) {
                        retval = -ENOMEM;
                        dev_dbg(&dev->interface->dev,
                                "Unable to allocate buffer\n");
                        goto error_no_buffer;
                }
                usb_fill_int_urb(int_out_urb, dev->udev,
                                 usb_sndintpipe(dev->udev,
                                                dev->int_out_endpoint->bEndpointAddress),
                                 buf, dev->report_size,
                                 iowarrior_write_callback, dev,
                                 dev->int_out_endpoint->bInterval);
                int_out_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                if (copy_from_user(buf, user_buffer, count)) {
                        retval = -EFAULT;
                        goto error;
                }
                usb_anchor_urb(int_out_urb, &dev->submitted);
                retval = usb_submit_urb(int_out_urb, GFP_KERNEL);
                if (retval) {
                        dev_dbg(&dev->interface->dev,
                                "submit error %d for urb nr.%d\n",
                                retval, atomic_read(&dev->write_busy));
                        usb_unanchor_urb(int_out_urb);
                        goto error;
                }
                /* submit was ok */
                retval = count;
                usb_free_urb(int_out_urb);
                goto exit;
        default:
                /* what do we have here ? An unsupported Product-ID ? */
                dev_err(&dev->interface->dev, "%s - not supported for product=0x%x\n",
                        __func__, dev->product_id);
                retval = -EFAULT;
                goto exit;
        }
error:
        usb_free_coherent(dev->udev, dev->report_size, buf,
                          int_out_urb->transfer_dma);
error_no_buffer:
        usb_free_urb(int_out_urb);
error_no_urb:
        atomic_dec(&dev->write_busy);
        wake_up_interruptible(&dev->write_wait);
exit:
        mutex_unlock(&dev->mutex);
        return retval;
}

/*
 *        iowarrior_ioctl
 */
static long iowarrior_ioctl(struct file *file, unsigned int cmd,
                                                        unsigned long arg)
{
        struct iowarrior *dev = NULL;
        __u8 *buffer;
        __u8 __user *user_buffer;
        int retval;
        int io_res;                /* checks for bytes read/written and copy_to/from_user results */

        dev = file->private_data;
        if (!dev)
                return -ENODEV;

        buffer = kzalloc(dev->report_size, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        mutex_lock(&dev->mutex);

        /* verify that the device wasn't unplugged */
        if (!dev->present) {
                retval = -ENODEV;
                goto error_out;
        }

        dev_dbg(&dev->interface->dev, "minor %d, cmd 0x%.4x, arg %ld\n",
                dev->minor, cmd, arg);

        retval = 0;
        switch (cmd) {
        case IOW_WRITE:
                if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 ||
                    dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG ||
                    dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 ||
                    dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 ||
                    dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) {
                        user_buffer = (__u8 __user *)arg;
                        io_res = copy_from_user(buffer, user_buffer,
                                                dev->report_size);
                        if (io_res) {
                                retval = -EFAULT;
                        } else {
                                io_res = usb_set_report(dev->interface, 2, 0,
                                                        buffer,
                                                        dev->report_size);
                                if (io_res < 0)
                                        retval = io_res;
                        }
                } else {
                        retval = -EINVAL;
                        dev_err(&dev->interface->dev,
                                "ioctl 'IOW_WRITE' is not supported for product=0x%x.\n",
                                dev->product_id);
                }
                break;
        case IOW_READ:
                user_buffer = (__u8 __user *)arg;
                io_res = usb_get_report(dev->udev,
                                        dev->interface->cur_altsetting, 1, 0,
                                        buffer, dev->report_size);
                if (io_res < 0)
                        retval = io_res;
                else {
                        io_res = copy_to_user(user_buffer, buffer, dev->report_size);
                        if (io_res)
                                retval = -EFAULT;
                }
                break;
        case IOW_GETINFO:
                {
                        /* Report available information for the device */
                        struct iowarrior_info info;
                        /* needed for power consumption */
                        struct usb_config_descriptor *cfg_descriptor = &dev->udev->actconfig->desc;

                        memset(&info, 0, sizeof(info));
                        /* directly from the descriptor */
                        info.vendor = le16_to_cpu(dev->udev->descriptor.idVendor);
                        info.product = dev->product_id;
                        info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice);

                        /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */
                        info.speed = dev->udev->speed;
                        info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber;
                        info.report_size = dev->report_size;

                        /* serial number string has been read earlier 8 chars or empty string */
                        memcpy(info.serial, dev->chip_serial,
                               sizeof(dev->chip_serial));
                        if (cfg_descriptor == NULL) {
                                info.power = -1;        /* no information available */
                        } else {
                                /* the MaxPower is stored in units of 2mA to make it fit into a byte-value */
                                info.power = cfg_descriptor->bMaxPower * 2;
                        }
                        io_res = copy_to_user((struct iowarrior_info __user *)arg, &info,
                                         sizeof(struct iowarrior_info));
                        if (io_res)
                                retval = -EFAULT;
                        break;
                }
        default:
                /* return that we did not understand this ioctl call */
                retval = -ENOTTY;
                break;
        }
error_out:
        /* unlock the device */
        mutex_unlock(&dev->mutex);
        kfree(buffer);
        return retval;
}

/*
 *        iowarrior_open
 */
static int iowarrior_open(struct inode *inode, struct file *file)
{
        struct iowarrior *dev = NULL;
        struct usb_interface *interface;
        int subminor;
        int retval = 0;

        subminor = iminor(inode);

        interface = usb_find_interface(&iowarrior_driver, subminor);
        if (!interface) {
                pr_err("%s - error, can't find device for minor %d\n",
                       __func__, subminor);
                return -ENODEV;
        }

        dev = usb_get_intfdata(interface);
        if (!dev)
                return -ENODEV;

        mutex_lock(&dev->mutex);

        /* Only one process can open each device, no sharing. */
        if (dev->opened) {
                retval = -EBUSY;
                goto out;
        }

        /* setup interrupt handler for receiving values */
        if ((retval = usb_submit_urb(dev->int_in_urb, GFP_KERNEL)) < 0) {
                dev_err(&interface->dev, "Error %d while submitting URB\n", retval);
                retval = -EFAULT;
                goto out;
        }
        /* increment our usage count for the driver */
        ++dev->opened;
        /* save our object in the file's private structure */
        file->private_data = dev;
        retval = 0;

out:
        mutex_unlock(&dev->mutex);
        return retval;
}

/*
 *        iowarrior_release
 */
static int iowarrior_release(struct inode *inode, struct file *file)
{
        struct iowarrior *dev;
        int retval = 0;

        dev = file->private_data;
        if (!dev)
                return -ENODEV;

        dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor);

        /* lock our device */
        mutex_lock(&dev->mutex);

        if (dev->opened <= 0) {
                retval = -ENODEV;        /* close called more than once */
                mutex_unlock(&dev->mutex);
        } else {
                dev->opened = 0;        /* we're closing now */
                retval = 0;
                if (dev->present) {
                        /*
                           The device is still connected so we only shutdown
                           pending read-/write-ops.
                         */
                        usb_kill_urb(dev->int_in_urb);
                        wake_up_interruptible(&dev->read_wait);
                        wake_up_interruptible(&dev->write_wait);
                        mutex_unlock(&dev->mutex);
                } else {
                        /* The device was unplugged, cleanup resources */
                        mutex_unlock(&dev->mutex);
                        iowarrior_delete(dev);
                }
        }
        return retval;
}

static __poll_t iowarrior_poll(struct file *file, poll_table * wait)
{
        struct iowarrior *dev = file->private_data;
        __poll_t mask = 0;

        if (!dev->present)
                return EPOLLERR | EPOLLHUP;

        poll_wait(file, &dev->read_wait, wait);
        poll_wait(file, &dev->write_wait, wait);

        if (!dev->present)
                return EPOLLERR | EPOLLHUP;

        if (read_index(dev) != -1)
                mask |= EPOLLIN | EPOLLRDNORM;

        if (atomic_read(&dev->write_busy) < MAX_WRITES_IN_FLIGHT)
                mask |= EPOLLOUT | EPOLLWRNORM;
        return mask;
}

/*
 * File operations needed when we register this driver.
 * This assumes that this driver NEEDS file operations,
 * of course, which means that the driver is expected
 * to have a node in the /dev directory. If the USB
 * device were for a network interface then the driver
 * would use "struct net_driver" instead, and a serial
 * device would use "struct tty_driver".
 */
static const struct file_operations iowarrior_fops = {
        .owner = THIS_MODULE,
        .write = iowarrior_write,
        .read = iowarrior_read,
        .unlocked_ioctl = iowarrior_ioctl,
        .open = iowarrior_open,
        .release = iowarrior_release,
        .poll = iowarrior_poll,
        .llseek = noop_llseek,
};

static char *iowarrior_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
}

/*
 * usb class driver info in order to get a minor number from the usb core,
 * and to have the device registered with devfs and the driver core
 */
static struct usb_class_driver iowarrior_class = {
        .name = "iowarrior%d",
        .devnode = iowarrior_devnode,
        .fops = &iowarrior_fops,
        .minor_base = IOWARRIOR_MINOR_BASE,
};

/*---------------------------------*/
/*  probe and disconnect functions */
/*---------------------------------*/
/*
 *        iowarrior_probe
 *
 *        Called by the usb core when a new device is connected that it thinks
 *        this driver might be interested in.
 */
static int iowarrior_probe(struct usb_interface *interface,
                           const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct iowarrior *dev = NULL;
        struct usb_host_interface *iface_desc;
        int retval = -ENOMEM;
        int res;

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(struct iowarrior), GFP_KERNEL);
        if (!dev)
                return retval;

        mutex_init(&dev->mutex);

        atomic_set(&dev->intr_idx, 0);
        atomic_set(&dev->read_idx, 0);
        atomic_set(&dev->overflow_flag, 0);
        init_waitqueue_head(&dev->read_wait);
        atomic_set(&dev->write_busy, 0);
        init_waitqueue_head(&dev->write_wait);

        dev->udev = udev;
        dev->interface = usb_get_intf(interface);

        iface_desc = interface->cur_altsetting;
        dev->product_id = le16_to_cpu(udev->descriptor.idProduct);

        init_usb_anchor(&dev->submitted);

        res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint);
        if (res) {
                dev_err(&interface->dev, "no interrupt-in endpoint found\n");
                retval = res;
                goto error;
        }

        if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) ||
            (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) ||
            (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) ||
            (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) ||
            (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) {
                res = usb_find_last_int_out_endpoint(iface_desc,
                                &dev->int_out_endpoint);
                if (res) {
                        dev_err(&interface->dev, "no interrupt-out endpoint found\n");
                        retval = res;
                        goto error;
                }
        }

        /* we have to check the report_size often, so remember it in the endianness suitable for our machine */
        dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);

        /*
         * Some devices need the report size to be different than the
         * endpoint size.
         */
        if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) {
                switch (dev->product_id) {
                case USB_DEVICE_ID_CODEMERCS_IOW56:
                case USB_DEVICE_ID_CODEMERCS_IOW56AM:
                        dev->report_size = 7;
                        break;

                case USB_DEVICE_ID_CODEMERCS_IOW28:
                case USB_DEVICE_ID_CODEMERCS_IOW28L:
                        dev->report_size = 4;
                        break;

                case USB_DEVICE_ID_CODEMERCS_IOW100:
                        dev->report_size = 12;
                        break;
                }
        }

        /* create the urb and buffer for reading */
        dev->int_in_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->int_in_urb)
                goto error;
        dev->int_in_buffer = kmalloc(dev->report_size, GFP_KERNEL);
        if (!dev->int_in_buffer)
                goto error;
        usb_fill_int_urb(dev->int_in_urb, dev->udev,
                         usb_rcvintpipe(dev->udev,
                                        dev->int_in_endpoint->bEndpointAddress),
                         dev->int_in_buffer, dev->report_size,
                         iowarrior_callback, dev,
                         dev->int_in_endpoint->bInterval);
        /* create an internal buffer for interrupt data from the device */
        dev->read_queue =
            kmalloc_array(dev->report_size + 1, MAX_INTERRUPT_BUFFER,
                          GFP_KERNEL);
        if (!dev->read_queue)
                goto error;
        /* Get the serial-number of the chip */
        memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial));
        usb_string(udev, udev->descriptor.iSerialNumber, dev->chip_serial,
                   sizeof(dev->chip_serial));
        if (strlen(dev->chip_serial) != 8)
                memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial));

        /* Set the idle timeout to 0, if this is interface 0 */
        if (dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) {
            usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                            0x0A,
                            USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0,
                            0, NULL, 0, USB_CTRL_SET_TIMEOUT);
        }
        /* allow device read and ioctl */
        dev->present = 1;

        /* we can register the device now, as it is ready */
        usb_set_intfdata(interface, dev);

        retval = usb_register_dev(interface, &iowarrior_class);
        if (retval) {
                /* something prevented us from registering this driver */
                dev_err(&interface->dev, "Not able to get a minor for this device.\n");
                goto error;
        }

        dev->minor = interface->minor;

        /* let the user know what node this device is now attached to */
        dev_info(&interface->dev, "IOWarrior product=0x%x, serial=%s interface=%d "
                 "now attached to iowarrior%d\n", dev->product_id, dev->chip_serial,
                 iface_desc->desc.bInterfaceNumber, dev->minor - IOWARRIOR_MINOR_BASE);
        return retval;

error:
        iowarrior_delete(dev);
        return retval;
}

/*
 *        iowarrior_disconnect
 *
 *        Called by the usb core when the device is removed from the system.
 */
static void iowarrior_disconnect(struct usb_interface *interface)
{
        struct iowarrior *dev = usb_get_intfdata(interface);
        int minor = dev->minor;

        usb_deregister_dev(interface, &iowarrior_class);

        mutex_lock(&dev->mutex);

        /* prevent device read, write and ioctl */
        dev->present = 0;

        if (dev->opened) {
                /* There is a process that holds a filedescriptor to the device ,
                   so we only shutdown read-/write-ops going on.
                   Deleting the device is postponed until close() was called.
                 */
                usb_kill_urb(dev->int_in_urb);
                usb_kill_anchored_urbs(&dev->submitted);
                wake_up_interruptible(&dev->read_wait);
                wake_up_interruptible(&dev->write_wait);
                mutex_unlock(&dev->mutex);
        } else {
                /* no process is using the device, cleanup now */
                mutex_unlock(&dev->mutex);
                iowarrior_delete(dev);
        }

        dev_info(&interface->dev, "I/O-Warror #%d now disconnected\n",
                 minor - IOWARRIOR_MINOR_BASE);
}

/* usb specific object needed to register this driver with the usb subsystem */
static struct usb_driver iowarrior_driver = {
        .name = "iowarrior",
        .probe = iowarrior_probe,
        .disconnect = iowarrior_disconnect,
        .id_table = iowarrior_ids,
};

module_usb_driver(iowarrior_driver);



































































   20 










   14 
   20 



























































































































































































































    5 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    6 



    6 










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
// SPDX-License-Identifier: GPL-2.0-or-later
/* Common capabilities, needed by capability.o.
 */

#include <linux/capability.h>
#include <linux/audit.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/lsm_hooks.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/ptrace.h>
#include <linux/xattr.h>
#include <linux/hugetlb.h>
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/prctl.h>
#include <linux/securebits.h>
#include <linux/user_namespace.h>
#include <linux/binfmts.h>
#include <linux/personality.h>
#include <linux/mnt_idmapping.h>
#include <uapi/linux/lsm.h>

/*
 * If a non-root user executes a setuid-root binary in
 * !secure(SECURE_NOROOT) mode, then we raise capabilities.
 * However if fE is also set, then the intent is for only
 * the file capabilities to be applied, and the setuid-root
 * bit is left on either to change the uid (plausible) or
 * to get full privilege on a kernel without file capabilities
 * support.  So in that case we do not raise capabilities.
 *
 * Warn if that happens, once per boot.
 */
static void warn_setuid_and_fcaps_mixed(const char *fname)
{
        static int warned;
        if (!warned) {
                printk(KERN_INFO "warning: `%s' has both setuid-root and"
                        " effective capabilities. Therefore not raising all"
                        " capabilities.\n", fname);
                warned = 1;
        }
}

/**
 * cap_capable - Determine whether a task has a particular effective capability
 * @cred: The credentials to use
 * @targ_ns:  The user namespace in which we need the capability
 * @cap: The capability to check for
 * @opts: Bitmask of options defined in include/linux/security.h
 *
 * Determine whether the nominated task has the specified capability amongst
 * its effective set, returning 0 if it does, -ve if it does not.
 *
 * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
 * and has_capability() functions.  That is, it has the reverse semantics:
 * cap_has_capability() returns 0 when a task has a capability, but the
 * kernel's capable() and has_capability() returns 1 for this case.
 */
int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
                int cap, unsigned int opts)
{
        struct user_namespace *ns = targ_ns;

        /* See if cred has the capability in the target user namespace
         * by examining the target user namespace and all of the target
         * user namespace's parents.
         */
        for (;;) {
                /* Do we have the necessary capabilities? */
                if (ns == cred->user_ns)
                        return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;

                /*
                 * If we're already at a lower level than we're looking for,
                 * we're done searching.
                 */
                if (ns->level <= cred->user_ns->level)
                        return -EPERM;

                /* 
                 * The owner of the user namespace in the parent of the
                 * user namespace has all caps.
                 */
                if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
                        return 0;

                /*
                 * If you have a capability in a parent user ns, then you have
                 * it over all children user namespaces as well.
                 */
                ns = ns->parent;
        }

        /* We never get here */
}

/**
 * cap_settime - Determine whether the current process may set the system clock
 * @ts: The time to set
 * @tz: The timezone to set
 *
 * Determine whether the current process may set the system clock and timezone
 * information, returning 0 if permission granted, -ve if denied.
 */
int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
{
        if (!capable(CAP_SYS_TIME))
                return -EPERM;
        return 0;
}

/**
 * cap_ptrace_access_check - Determine whether the current process may access
 *                           another
 * @child: The process to be accessed
 * @mode: The mode of attachment.
 *
 * If we are in the same or an ancestor user_ns and have all the target
 * task's capabilities, then ptrace access is allowed.
 * If we have the ptrace capability to the target user_ns, then ptrace
 * access is allowed.
 * Else denied.
 *
 * Determine whether a process may access another, returning 0 if permission
 * granted, -ve if denied.
 */
int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
{
        int ret = 0;
        const struct cred *cred, *child_cred;
        const kernel_cap_t *caller_caps;

        rcu_read_lock();
        cred = current_cred();
        child_cred = __task_cred(child);
        if (mode & PTRACE_MODE_FSCREDS)
                caller_caps = &cred->cap_effective;
        else
                caller_caps = &cred->cap_permitted;
        if (cred->user_ns == child_cred->user_ns &&
            cap_issubset(child_cred->cap_permitted, *caller_caps))
                goto out;
        if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
                goto out;
        ret = -EPERM;
out:
        rcu_read_unlock();
        return ret;
}

/**
 * cap_ptrace_traceme - Determine whether another process may trace the current
 * @parent: The task proposed to be the tracer
 *
 * If parent is in the same or an ancestor user_ns and has all current's
 * capabilities, then ptrace access is allowed.
 * If parent has the ptrace capability to current's user_ns, then ptrace
 * access is allowed.
 * Else denied.
 *
 * Determine whether the nominated task is permitted to trace the current
 * process, returning 0 if permission is granted, -ve if denied.
 */
int cap_ptrace_traceme(struct task_struct *parent)
{
        int ret = 0;
        const struct cred *cred, *child_cred;

        rcu_read_lock();
        cred = __task_cred(parent);
        child_cred = current_cred();
        if (cred->user_ns == child_cred->user_ns &&
            cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
                goto out;
        if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
                goto out;
        ret = -EPERM;
out:
        rcu_read_unlock();
        return ret;
}

/**
 * cap_capget - Retrieve a task's capability sets
 * @target: The task from which to retrieve the capability sets
 * @effective: The place to record the effective set
 * @inheritable: The place to record the inheritable set
 * @permitted: The place to record the permitted set
 *
 * This function retrieves the capabilities of the nominated task and returns
 * them to the caller.
 */
int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
               kernel_cap_t *inheritable, kernel_cap_t *permitted)
{
        const struct cred *cred;

        /* Derived from kernel/capability.c:sys_capget. */
        rcu_read_lock();
        cred = __task_cred(target);
        *effective   = cred->cap_effective;
        *inheritable = cred->cap_inheritable;
        *permitted   = cred->cap_permitted;
        rcu_read_unlock();
        return 0;
}

/*
 * Determine whether the inheritable capabilities are limited to the old
 * permitted set.  Returns 1 if they are limited, 0 if they are not.
 */
static inline int cap_inh_is_capped(void)
{
        /* they are so limited unless the current task has the CAP_SETPCAP
         * capability
         */
        if (cap_capable(current_cred(), current_cred()->user_ns,
                        CAP_SETPCAP, CAP_OPT_NONE) == 0)
                return 0;
        return 1;
}

/**
 * cap_capset - Validate and apply proposed changes to current's capabilities
 * @new: The proposed new credentials; alterations should be made here
 * @old: The current task's current credentials
 * @effective: A pointer to the proposed new effective capabilities set
 * @inheritable: A pointer to the proposed new inheritable capabilities set
 * @permitted: A pointer to the proposed new permitted capabilities set
 *
 * This function validates and applies a proposed mass change to the current
 * process's capability sets.  The changes are made to the proposed new
 * credentials, and assuming no error, will be committed by the caller of LSM.
 */
int cap_capset(struct cred *new,
               const struct cred *old,
               const kernel_cap_t *effective,
               const kernel_cap_t *inheritable,
               const kernel_cap_t *permitted)
{
        if (cap_inh_is_capped() &&
            !cap_issubset(*inheritable,
                          cap_combine(old->cap_inheritable,
                                      old->cap_permitted)))
                /* incapable of using this inheritable set */
                return -EPERM;

        if (!cap_issubset(*inheritable,
                          cap_combine(old->cap_inheritable,
                                      old->cap_bset)))
                /* no new pI capabilities outside bounding set */
                return -EPERM;

        /* verify restrictions on target's new Permitted set */
        if (!cap_issubset(*permitted, old->cap_permitted))
                return -EPERM;

        /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
        if (!cap_issubset(*effective, *permitted))
                return -EPERM;

        new->cap_effective   = *effective;
        new->cap_inheritable = *inheritable;
        new->cap_permitted   = *permitted;

        /*
         * Mask off ambient bits that are no longer both permitted and
         * inheritable.
         */
        new->cap_ambient = cap_intersect(new->cap_ambient,
                                         cap_intersect(*permitted,
                                                       *inheritable));
        if (WARN_ON(!cap_ambient_invariant_ok(new)))
                return -EINVAL;
        return 0;
}

/**
 * cap_inode_need_killpriv - Determine if inode change affects privileges
 * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
 *
 * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
 * affects the security markings on that inode, and if it is, should
 * inode_killpriv() be invoked or the change rejected.
 *
 * Return: 1 if security.capability has a value, meaning inode_killpriv()
 * is required, 0 otherwise, meaning inode_killpriv() is not required.
 */
int cap_inode_need_killpriv(struct dentry *dentry)
{
        struct inode *inode = d_backing_inode(dentry);
        int error;

        error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
        return error > 0;
}

/**
 * cap_inode_killpriv - Erase the security markings on an inode
 *
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        The inode/dentry to alter
 *
 * Erase the privilege-enhancing security markings on an inode.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * Return: 0 if successful, -ve on error.
 */
int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
{
        int error;

        error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
        if (error == -EOPNOTSUPP)
                error = 0;
        return error;
}

static bool rootid_owns_currentns(vfsuid_t rootvfsuid)
{
        struct user_namespace *ns;
        kuid_t kroot;

        if (!vfsuid_valid(rootvfsuid))
                return false;

        kroot = vfsuid_into_kuid(rootvfsuid);
        for (ns = current_user_ns();; ns = ns->parent) {
                if (from_kuid(ns, kroot) == 0)
                        return true;
                if (ns == &init_user_ns)
                        break;
        }

        return false;
}

static __u32 sansflags(__u32 m)
{
        return m & ~VFS_CAP_FLAGS_EFFECTIVE;
}

static bool is_v2header(int size, const struct vfs_cap_data *cap)
{
        if (size != XATTR_CAPS_SZ_2)
                return false;
        return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
}

static bool is_v3header(int size, const struct vfs_cap_data *cap)
{
        if (size != XATTR_CAPS_SZ_3)
                return false;
        return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
}

/*
 * getsecurity: We are called for security.* before any attempt to read the
 * xattr from the inode itself.
 *
 * This gives us a chance to read the on-disk value and convert it.  If we
 * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
 *
 * Note we are not called by vfs_getxattr_alloc(), but that is only called
 * by the integrity subsystem, which really wants the unconverted values -
 * so that's good.
 */
int cap_inode_getsecurity(struct mnt_idmap *idmap,
                          struct inode *inode, const char *name, void **buffer,
                          bool alloc)
{
        int size;
        kuid_t kroot;
        vfsuid_t vfsroot;
        u32 nsmagic, magic;
        uid_t root, mappedroot;
        char *tmpbuf = NULL;
        struct vfs_cap_data *cap;
        struct vfs_ns_cap_data *nscap = NULL;
        struct dentry *dentry;
        struct user_namespace *fs_ns;

        if (strcmp(name, "capability") != 0)
                return -EOPNOTSUPP;

        dentry = d_find_any_alias(inode);
        if (!dentry)
                return -EINVAL;
        size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
                                  sizeof(struct vfs_ns_cap_data), GFP_NOFS);
        dput(dentry);
        /* gcc11 complains if we don't check for !tmpbuf */
        if (size < 0 || !tmpbuf)
                goto out_free;

        fs_ns = inode->i_sb->s_user_ns;
        cap = (struct vfs_cap_data *) tmpbuf;
        if (is_v2header(size, cap)) {
                root = 0;
        } else if (is_v3header(size, cap)) {
                nscap = (struct vfs_ns_cap_data *) tmpbuf;
                root = le32_to_cpu(nscap->rootid);
        } else {
                size = -EINVAL;
                goto out_free;
        }

        kroot = make_kuid(fs_ns, root);

        /* If this is an idmapped mount shift the kuid. */
        vfsroot = make_vfsuid(idmap, fs_ns, kroot);

        /* If the root kuid maps to a valid uid in current ns, then return
         * this as a nscap. */
        mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));
        if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
                size = sizeof(struct vfs_ns_cap_data);
                if (alloc) {
                        if (!nscap) {
                                /* v2 -> v3 conversion */
                                nscap = kzalloc(size, GFP_ATOMIC);
                                if (!nscap) {
                                        size = -ENOMEM;
                                        goto out_free;
                                }
                                nsmagic = VFS_CAP_REVISION_3;
                                magic = le32_to_cpu(cap->magic_etc);
                                if (magic & VFS_CAP_FLAGS_EFFECTIVE)
                                        nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
                                memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
                                nscap->magic_etc = cpu_to_le32(nsmagic);
                        } else {
                                /* use allocated v3 buffer */
                                tmpbuf = NULL;
                        }
                        nscap->rootid = cpu_to_le32(mappedroot);
                        *buffer = nscap;
                }
                goto out_free;
        }

        if (!rootid_owns_currentns(vfsroot)) {
                size = -EOVERFLOW;
                goto out_free;
        }

        /* This comes from a parent namespace.  Return as a v2 capability */
        size = sizeof(struct vfs_cap_data);
        if (alloc) {
                if (nscap) {
                        /* v3 -> v2 conversion */
                        cap = kzalloc(size, GFP_ATOMIC);
                        if (!cap) {
                                size = -ENOMEM;
                                goto out_free;
                        }
                        magic = VFS_CAP_REVISION_2;
                        nsmagic = le32_to_cpu(nscap->magic_etc);
                        if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
                                magic |= VFS_CAP_FLAGS_EFFECTIVE;
                        memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
                        cap->magic_etc = cpu_to_le32(magic);
                } else {
                        /* use unconverted v2 */
                        tmpbuf = NULL;
                }
                *buffer = cap;
        }
out_free:
        kfree(tmpbuf);
        return size;
}

/**
 * rootid_from_xattr - translate root uid of vfs caps
 *
 * @value:        vfs caps value which may be modified by this function
 * @size:        size of @ivalue
 * @task_ns:        user namespace of the caller
 */
static vfsuid_t rootid_from_xattr(const void *value, size_t size,
                                  struct user_namespace *task_ns)
{
        const struct vfs_ns_cap_data *nscap = value;
        uid_t rootid = 0;

        if (size == XATTR_CAPS_SZ_3)
                rootid = le32_to_cpu(nscap->rootid);

        return VFSUIDT_INIT(make_kuid(task_ns, rootid));
}

static bool validheader(size_t size, const struct vfs_cap_data *cap)
{
        return is_v2header(size, cap) || is_v3header(size, cap);
}

/**
 * cap_convert_nscap - check vfs caps
 *
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        used to retrieve inode to check permissions on
 * @ivalue:        vfs caps value which may be modified by this function
 * @size:        size of @ivalue
 *
 * User requested a write of security.capability.  If needed, update the
 * xattr to change from v2 to v3, or to fixup the v3 rootid.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * Return: On success, return the new size; on error, return < 0.
 */
int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
                      const void **ivalue, size_t size)
{
        struct vfs_ns_cap_data *nscap;
        uid_t nsrootid;
        const struct vfs_cap_data *cap = *ivalue;
        __u32 magic, nsmagic;
        struct inode *inode = d_backing_inode(dentry);
        struct user_namespace *task_ns = current_user_ns(),
                *fs_ns = inode->i_sb->s_user_ns;
        kuid_t rootid;
        vfsuid_t vfsrootid;
        size_t newsize;

        if (!*ivalue)
                return -EINVAL;
        if (!validheader(size, cap))
                return -EINVAL;
        if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
                return -EPERM;
        if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
                if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
                        /* user is privileged, just write the v2 */
                        return size;

        vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
        if (!vfsuid_valid(vfsrootid))
                return -EINVAL;

        rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
        if (!uid_valid(rootid))
                return -EINVAL;

        nsrootid = from_kuid(fs_ns, rootid);
        if (nsrootid == -1)
                return -EINVAL;

        newsize = sizeof(struct vfs_ns_cap_data);
        nscap = kmalloc(newsize, GFP_ATOMIC);
        if (!nscap)
                return -ENOMEM;
        nscap->rootid = cpu_to_le32(nsrootid);
        nsmagic = VFS_CAP_REVISION_3;
        magic = le32_to_cpu(cap->magic_etc);
        if (magic & VFS_CAP_FLAGS_EFFECTIVE)
                nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
        nscap->magic_etc = cpu_to_le32(nsmagic);
        memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);

        *ivalue = nscap;
        return newsize;
}

/*
 * Calculate the new process capability sets from the capability sets attached
 * to a file.
 */
static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
                                          struct linux_binprm *bprm,
                                          bool *effective,
                                          bool *has_fcap)
{
        struct cred *new = bprm->cred;
        int ret = 0;

        if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
                *effective = true;

        if (caps->magic_etc & VFS_CAP_REVISION_MASK)
                *has_fcap = true;

        /*
         * pP' = (X & fP) | (pI & fI)
         * The addition of pA' is handled later.
         */
        new->cap_permitted.val =
                (new->cap_bset.val & caps->permitted.val) |
                (new->cap_inheritable.val & caps->inheritable.val);

        if (caps->permitted.val & ~new->cap_permitted.val)
                /* insufficient to execute correctly */
                ret = -EPERM;

        /*
         * For legacy apps, with no internal support for recognizing they
         * do not have enough capabilities, we return an error if they are
         * missing some "forced" (aka file-permitted) capabilities.
         */
        return *effective ? ret : 0;
}

/**
 * get_vfs_caps_from_disk - retrieve vfs caps from disk
 *
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        dentry from which @inode is retrieved
 * @cpu_caps:        vfs capabilities
 *
 * Extract the on-exec-apply capability sets for an executable file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
                           const struct dentry *dentry,
                           struct cpu_vfs_cap_data *cpu_caps)
{
        struct inode *inode = d_backing_inode(dentry);
        __u32 magic_etc;
        int size;
        struct vfs_ns_cap_data data, *nscaps = &data;
        struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
        kuid_t rootkuid;
        vfsuid_t rootvfsuid;
        struct user_namespace *fs_ns;

        memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));

        if (!inode)
                return -ENODATA;

        fs_ns = inode->i_sb->s_user_ns;
        size = __vfs_getxattr((struct dentry *)dentry, inode,
                              XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
        if (size == -ENODATA || size == -EOPNOTSUPP)
                /* no data, that's ok */
                return -ENODATA;

        if (size < 0)
                return size;

        if (size < sizeof(magic_etc))
                return -EINVAL;

        cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);

        rootkuid = make_kuid(fs_ns, 0);
        switch (magic_etc & VFS_CAP_REVISION_MASK) {
        case VFS_CAP_REVISION_1:
                if (size != XATTR_CAPS_SZ_1)
                        return -EINVAL;
                break;
        case VFS_CAP_REVISION_2:
                if (size != XATTR_CAPS_SZ_2)
                        return -EINVAL;
                break;
        case VFS_CAP_REVISION_3:
                if (size != XATTR_CAPS_SZ_3)
                        return -EINVAL;
                rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
                break;

        default:
                return -EINVAL;
        }

        rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
        if (!vfsuid_valid(rootvfsuid))
                return -ENODATA;

        /* Limit the caps to the mounter of the filesystem
         * or the more limited uid specified in the xattr.
         */
        if (!rootid_owns_currentns(rootvfsuid))
                return -ENODATA;

        cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
        cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);

        /*
         * Rev1 had just a single 32-bit word, later expanded
         * to a second one for the high bits
         */
        if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
                cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
                cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
        }

        cpu_caps->permitted.val &= CAP_VALID_MASK;
        cpu_caps->inheritable.val &= CAP_VALID_MASK;

        cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);

        return 0;
}

/*
 * Attempt to get the on-exec apply capability sets for an executable file from
 * its xattrs and, if present, apply them to the proposed credentials being
 * constructed by execve().
 */
static int get_file_caps(struct linux_binprm *bprm, const struct file *file,
                         bool *effective, bool *has_fcap)
{
        int rc = 0;
        struct cpu_vfs_cap_data vcaps;

        cap_clear(bprm->cred->cap_permitted);

        if (!file_caps_enabled)
                return 0;

        if (!mnt_may_suid(file->f_path.mnt))
                return 0;

        /*
         * This check is redundant with mnt_may_suid() but is kept to make
         * explicit that capability bits are limited to s_user_ns and its
         * descendants.
         */
        if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
                return 0;

        rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
                                    file->f_path.dentry, &vcaps);
        if (rc < 0) {
                if (rc == -EINVAL)
                        printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
                                        bprm->filename);
                else if (rc == -ENODATA)
                        rc = 0;
                goto out;
        }

        rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);

out:
        if (rc)
                cap_clear(bprm->cred->cap_permitted);

        return rc;
}

static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }

static inline bool __is_real(kuid_t uid, struct cred *cred)
{ return uid_eq(cred->uid, uid); }

static inline bool __is_eff(kuid_t uid, struct cred *cred)
{ return uid_eq(cred->euid, uid); }

static inline bool __is_suid(kuid_t uid, struct cred *cred)
{ return !__is_real(uid, cred) && __is_eff(uid, cred); }

/*
 * handle_privileged_root - Handle case of privileged root
 * @bprm: The execution parameters, including the proposed creds
 * @has_fcap: Are any file capabilities set?
 * @effective: Do we have effective root privilege?
 * @root_uid: This namespace' root UID WRT initial USER namespace
 *
 * Handle the case where root is privileged and hasn't been neutered by
 * SECURE_NOROOT.  If file capabilities are set, they won't be combined with
 * set UID root and nothing is changed.  If we are root, cap_permitted is
 * updated.  If we have become set UID root, the effective bit is set.
 */
static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
                                   bool *effective, kuid_t root_uid)
{
        const struct cred *old = current_cred();
        struct cred *new = bprm->cred;

        if (!root_privileged())
                return;
        /*
         * If the legacy file capability is set, then don't set privs
         * for a setuid root binary run by a non-root user.  Do set it
         * for a root user just to cause least surprise to an admin.
         */
        if (has_fcap && __is_suid(root_uid, new)) {
                warn_setuid_and_fcaps_mixed(bprm->filename);
                return;
        }
        /*
         * To support inheritance of root-permissions and suid-root
         * executables under compatibility mode, we override the
         * capability sets for the file.
         */
        if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
                /* pP' = (cap_bset & ~0) | (pI & ~0) */
                new->cap_permitted = cap_combine(old->cap_bset,
                                                 old->cap_inheritable);
        }
        /*
         * If only the real uid is 0, we do not set the effective bit.
         */
        if (__is_eff(root_uid, new))
                *effective = true;
}

#define __cap_gained(field, target, source) \
        !cap_issubset(target->cap_##field, source->cap_##field)
#define __cap_grew(target, source, cred) \
        !cap_issubset(cred->cap_##target, cred->cap_##source)
#define __cap_full(field, cred) \
        cap_issubset(CAP_FULL_SET, cred->cap_##field)

static inline bool __is_setuid(struct cred *new, const struct cred *old)
{ return !uid_eq(new->euid, old->uid); }

static inline bool __is_setgid(struct cred *new, const struct cred *old)
{ return !gid_eq(new->egid, old->gid); }

/*
 * 1) Audit candidate if current->cap_effective is set
 *
 * We do not bother to audit if 3 things are true:
 *   1) cap_effective has all caps
 *   2) we became root *OR* are were already root
 *   3) root is supposed to have all caps (SECURE_NOROOT)
 * Since this is just a normal root execing a process.
 *
 * Number 1 above might fail if you don't have a full bset, but I think
 * that is interesting information to audit.
 *
 * A number of other conditions require logging:
 * 2) something prevented setuid root getting all caps
 * 3) non-setuid root gets fcaps
 * 4) non-setuid root gets ambient
 */
static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
                                     kuid_t root, bool has_fcap)
{
        bool ret = false;

        if ((__cap_grew(effective, ambient, new) &&
             !(__cap_full(effective, new) &&
               (__is_eff(root, new) || __is_real(root, new)) &&
               root_privileged())) ||
            (root_privileged() &&
             __is_suid(root, new) &&
             !__cap_full(effective, new)) ||
            (!__is_setuid(new, old) &&
             ((has_fcap &&
               __cap_gained(permitted, new, old)) ||
              __cap_gained(ambient, new, old))))

                ret = true;

        return ret;
}

/**
 * cap_bprm_creds_from_file - Set up the proposed credentials for execve().
 * @bprm: The execution parameters, including the proposed creds
 * @file: The file to pull the credentials from
 *
 * Set up the proposed credentials for a new execution context being
 * constructed by execve().  The proposed creds in @bprm->cred is altered,
 * which won't take effect immediately.
 *
 * Return: 0 if successful, -ve on error.
 */
int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)
{
        /* Process setpcap binaries and capabilities for uid 0 */
        const struct cred *old = current_cred();
        struct cred *new = bprm->cred;
        bool effective = false, has_fcap = false, is_setid;
        int ret;
        kuid_t root_uid;

        if (WARN_ON(!cap_ambient_invariant_ok(old)))
                return -EPERM;

        ret = get_file_caps(bprm, file, &effective, &has_fcap);
        if (ret < 0)
                return ret;

        root_uid = make_kuid(new->user_ns, 0);

        handle_privileged_root(bprm, has_fcap, &effective, root_uid);

        /* if we have fs caps, clear dangerous personality flags */
        if (__cap_gained(permitted, new, old))
                bprm->per_clear |= PER_CLEAR_ON_SETID;

        /* Don't let someone trace a set[ug]id/setpcap binary with the revised
         * credentials unless they have the appropriate permit.
         *
         * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
         */
        is_setid = __is_setuid(new, old) || __is_setgid(new, old);

        if ((is_setid || __cap_gained(permitted, new, old)) &&
            ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
             !ptracer_capable(current, new->user_ns))) {
                /* downgrade; they get no more than they had, and maybe less */
                if (!ns_capable(new->user_ns, CAP_SETUID) ||
                    (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
                        new->euid = new->uid;
                        new->egid = new->gid;
                }
                new->cap_permitted = cap_intersect(new->cap_permitted,
                                                   old->cap_permitted);
        }

        new->suid = new->fsuid = new->euid;
        new->sgid = new->fsgid = new->egid;

        /* File caps or setid cancels ambient. */
        if (has_fcap || is_setid)
                cap_clear(new->cap_ambient);

        /*
         * Now that we've computed pA', update pP' to give:
         *   pP' = (X & fP) | (pI & fI) | pA'
         */
        new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);

        /*
         * Set pE' = (fE ? pP' : pA').  Because pA' is zero if fE is set,
         * this is the same as pE' = (fE ? pP' : 0) | pA'.
         */
        if (effective)
                new->cap_effective = new->cap_permitted;
        else
                new->cap_effective = new->cap_ambient;

        if (WARN_ON(!cap_ambient_invariant_ok(new)))
                return -EPERM;

        if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
                ret = audit_log_bprm_fcaps(bprm, new, old);
                if (ret < 0)
                        return ret;
        }

        new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);

        if (WARN_ON(!cap_ambient_invariant_ok(new)))
                return -EPERM;

        /* Check for privilege-elevated exec. */
        if (is_setid ||
            (!__is_real(root_uid, new) &&
             (effective ||
              __cap_grew(permitted, ambient, new))))
                bprm->secureexec = 1;

        return 0;
}

/**
 * cap_inode_setxattr - Determine whether an xattr may be altered
 * @dentry: The inode/dentry being altered
 * @name: The name of the xattr to be changed
 * @value: The value that the xattr will be changed to
 * @size: The size of value
 * @flags: The replacement flag
 *
 * Determine whether an xattr may be altered or set on an inode, returning 0 if
 * permission is granted, -ve if denied.
 *
 * This is used to make sure security xattrs don't get updated or set by those
 * who aren't privileged to do so.
 */
int cap_inode_setxattr(struct dentry *dentry, const char *name,
                       const void *value, size_t size, int flags)
{
        struct user_namespace *user_ns = dentry->d_sb->s_user_ns;

        /* Ignore non-security xattrs */
        if (strncmp(name, XATTR_SECURITY_PREFIX,
                        XATTR_SECURITY_PREFIX_LEN) != 0)
                return 0;

        /*
         * For XATTR_NAME_CAPS the check will be done in
         * cap_convert_nscap(), called by setxattr()
         */
        if (strcmp(name, XATTR_NAME_CAPS) == 0)
                return 0;

        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                return -EPERM;
        return 0;
}

/**
 * cap_inode_removexattr - Determine whether an xattr may be removed
 *
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        The inode/dentry being altered
 * @name:        The name of the xattr to be changed
 *
 * Determine whether an xattr may be removed from an inode, returning 0 if
 * permission is granted, -ve if denied.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * This is used to make sure security xattrs don't get removed by those who
 * aren't privileged to remove them.
 */
int cap_inode_removexattr(struct mnt_idmap *idmap,
                          struct dentry *dentry, const char *name)
{
        struct user_namespace *user_ns = dentry->d_sb->s_user_ns;

        /* Ignore non-security xattrs */
        if (strncmp(name, XATTR_SECURITY_PREFIX,
                        XATTR_SECURITY_PREFIX_LEN) != 0)
                return 0;

        if (strcmp(name, XATTR_NAME_CAPS) == 0) {
                /* security.capability gets namespaced */
                struct inode *inode = d_backing_inode(dentry);
                if (!inode)
                        return -EINVAL;
                if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
                        return -EPERM;
                return 0;
        }

        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                return -EPERM;
        return 0;
}

/*
 * cap_emulate_setxuid() fixes the effective / permitted capabilities of
 * a process after a call to setuid, setreuid, or setresuid.
 *
 *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
 *  {r,e,s}uid != 0, the permitted and effective capabilities are
 *  cleared.
 *
 *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
 *  capabilities of the process are cleared.
 *
 *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
 *  capabilities are set to the permitted capabilities.
 *
 *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
 *  never happen.
 *
 *  -astor
 *
 * cevans - New behaviour, Oct '99
 * A process may, via prctl(), elect to keep its capabilities when it
 * calls setuid() and switches away from uid==0. Both permitted and
 * effective sets will be retained.
 * Without this change, it was impossible for a daemon to drop only some
 * of its privilege. The call to setuid(!=0) would drop all privileges!
 * Keeping uid 0 is not an option because uid 0 owns too many vital
 * files..
 * Thanks to Olaf Kirch and Peter Benie for spotting this.
 */
static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
{
        kuid_t root_uid = make_kuid(old->user_ns, 0);

        if ((uid_eq(old->uid, root_uid) ||
             uid_eq(old->euid, root_uid) ||
             uid_eq(old->suid, root_uid)) &&
            (!uid_eq(new->uid, root_uid) &&
             !uid_eq(new->euid, root_uid) &&
             !uid_eq(new->suid, root_uid))) {
                if (!issecure(SECURE_KEEP_CAPS)) {
                        cap_clear(new->cap_permitted);
                        cap_clear(new->cap_effective);
                }

                /*
                 * Pre-ambient programs expect setresuid to nonroot followed
                 * by exec to drop capabilities.  We should make sure that
                 * this remains the case.
                 */
                cap_clear(new->cap_ambient);
        }
        if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
                cap_clear(new->cap_effective);
        if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
                new->cap_effective = new->cap_permitted;
}

/**
 * cap_task_fix_setuid - Fix up the results of setuid() call
 * @new: The proposed credentials
 * @old: The current task's current credentials
 * @flags: Indications of what has changed
 *
 * Fix up the results of setuid() call before the credential changes are
 * actually applied.
 *
 * Return: 0 to grant the changes, -ve to deny them.
 */
int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
{
        switch (flags) {
        case LSM_SETID_RE:
        case LSM_SETID_ID:
        case LSM_SETID_RES:
                /* juggle the capabilities to follow [RES]UID changes unless
                 * otherwise suppressed */
                if (!issecure(SECURE_NO_SETUID_FIXUP))
                        cap_emulate_setxuid(new, old);
                break;

        case LSM_SETID_FS:
                /* juggle the capabilities to follow FSUID changes, unless
                 * otherwise suppressed
                 *
                 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
                 *          if not, we might be a bit too harsh here.
                 */
                if (!issecure(SECURE_NO_SETUID_FIXUP)) {
                        kuid_t root_uid = make_kuid(old->user_ns, 0);
                        if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
                                new->cap_effective =
                                        cap_drop_fs_set(new->cap_effective);

                        if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
                                new->cap_effective =
                                        cap_raise_fs_set(new->cap_effective,
                                                         new->cap_permitted);
                }
                break;

        default:
                return -EINVAL;
        }

        return 0;
}

/*
 * Rationale: code calling task_setscheduler, task_setioprio, and
 * task_setnice, assumes that
 *   . if capable(cap_sys_nice), then those actions should be allowed
 *   . if not capable(cap_sys_nice), but acting on your own processes,
 *           then those actions should be allowed
 * This is insufficient now since you can call code without suid, but
 * yet with increased caps.
 * So we check for increased caps on the target process.
 */
static int cap_safe_nice(struct task_struct *p)
{
        int is_subset, ret = 0;

        rcu_read_lock();
        is_subset = cap_issubset(__task_cred(p)->cap_permitted,
                                 current_cred()->cap_permitted);
        if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
                ret = -EPERM;
        rcu_read_unlock();

        return ret;
}

/**
 * cap_task_setscheduler - Determine if scheduler policy change is permitted
 * @p: The task to affect
 *
 * Determine if the requested scheduler policy change is permitted for the
 * specified task.
 *
 * Return: 0 if permission is granted, -ve if denied.
 */
int cap_task_setscheduler(struct task_struct *p)
{
        return cap_safe_nice(p);
}

/**
 * cap_task_setioprio - Determine if I/O priority change is permitted
 * @p: The task to affect
 * @ioprio: The I/O priority to set
 *
 * Determine if the requested I/O priority change is permitted for the specified
 * task.
 *
 * Return: 0 if permission is granted, -ve if denied.
 */
int cap_task_setioprio(struct task_struct *p, int ioprio)
{
        return cap_safe_nice(p);
}

/**
 * cap_task_setnice - Determine if task priority change is permitted
 * @p: The task to affect
 * @nice: The nice value to set
 *
 * Determine if the requested task priority change is permitted for the
 * specified task.
 *
 * Return: 0 if permission is granted, -ve if denied.
 */
int cap_task_setnice(struct task_struct *p, int nice)
{
        return cap_safe_nice(p);
}

/*
 * Implement PR_CAPBSET_DROP.  Attempt to remove the specified capability from
 * the current task's bounding set.  Returns 0 on success, -ve on error.
 */
static int cap_prctl_drop(unsigned long cap)
{
        struct cred *new;

        if (!ns_capable(current_user_ns(), CAP_SETPCAP))
                return -EPERM;
        if (!cap_valid(cap))
                return -EINVAL;

        new = prepare_creds();
        if (!new)
                return -ENOMEM;
        cap_lower(new->cap_bset, cap);
        return commit_creds(new);
}

/**
 * cap_task_prctl - Implement process control functions for this security module
 * @option: The process control function requested
 * @arg2: The argument data for this function
 * @arg3: The argument data for this function
 * @arg4: The argument data for this function
 * @arg5: The argument data for this function
 *
 * Allow process control functions (sys_prctl()) to alter capabilities; may
 * also deny access to other functions not otherwise implemented here.
 *
 * Return: 0 or +ve on success, -ENOSYS if this function is not implemented
 * here, other -ve on error.  If -ENOSYS is returned, sys_prctl() and other LSM
 * modules will consider performing the function.
 */
int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
                   unsigned long arg4, unsigned long arg5)
{
        const struct cred *old = current_cred();
        struct cred *new;

        switch (option) {
        case PR_CAPBSET_READ:
                if (!cap_valid(arg2))
                        return -EINVAL;
                return !!cap_raised(old->cap_bset, arg2);

        case PR_CAPBSET_DROP:
                return cap_prctl_drop(arg2);

        /*
         * The next four prctl's remain to assist with transitioning a
         * system from legacy UID=0 based privilege (when filesystem
         * capabilities are not in use) to a system using filesystem
         * capabilities only - as the POSIX.1e draft intended.
         *
         * Note:
         *
         *  PR_SET_SECUREBITS =
         *      issecure_mask(SECURE_KEEP_CAPS_LOCKED)
         *    | issecure_mask(SECURE_NOROOT)
         *    | issecure_mask(SECURE_NOROOT_LOCKED)
         *    | issecure_mask(SECURE_NO_SETUID_FIXUP)
         *    | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
         *
         * will ensure that the current process and all of its
         * children will be locked into a pure
         * capability-based-privilege environment.
         */
        case PR_SET_SECUREBITS:
                if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
                     & (old->securebits ^ arg2))                        /*[1]*/
                    || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))        /*[2]*/
                    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))        /*[3]*/
                    || (cap_capable(current_cred(),
                                    current_cred()->user_ns,
                                    CAP_SETPCAP,
                                    CAP_OPT_NONE) != 0)                        /*[4]*/
                        /*
                         * [1] no changing of bits that are locked
                         * [2] no unlocking of locks
                         * [3] no setting of unsupported bits
                         * [4] doing anything requires privilege (go read about
                         *     the "sendmail capabilities bug")
                         */
                    )
                        /* cannot change a locked bit */
                        return -EPERM;

                new = prepare_creds();
                if (!new)
                        return -ENOMEM;
                new->securebits = arg2;
                return commit_creds(new);

        case PR_GET_SECUREBITS:
                return old->securebits;

        case PR_GET_KEEPCAPS:
                return !!issecure(SECURE_KEEP_CAPS);

        case PR_SET_KEEPCAPS:
                if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
                        return -EINVAL;
                if (issecure(SECURE_KEEP_CAPS_LOCKED))
                        return -EPERM;

                new = prepare_creds();
                if (!new)
                        return -ENOMEM;
                if (arg2)
                        new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
                else
                        new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
                return commit_creds(new);

        case PR_CAP_AMBIENT:
                if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
                        if (arg3 | arg4 | arg5)
                                return -EINVAL;

                        new = prepare_creds();
                        if (!new)
                                return -ENOMEM;
                        cap_clear(new->cap_ambient);
                        return commit_creds(new);
                }

                if (((!cap_valid(arg3)) | arg4 | arg5))
                        return -EINVAL;

                if (arg2 == PR_CAP_AMBIENT_IS_SET) {
                        return !!cap_raised(current_cred()->cap_ambient, arg3);
                } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
                           arg2 != PR_CAP_AMBIENT_LOWER) {
                        return -EINVAL;
                } else {
                        if (arg2 == PR_CAP_AMBIENT_RAISE &&
                            (!cap_raised(current_cred()->cap_permitted, arg3) ||
                             !cap_raised(current_cred()->cap_inheritable,
                                         arg3) ||
                             issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
                                return -EPERM;

                        new = prepare_creds();
                        if (!new)
                                return -ENOMEM;
                        if (arg2 == PR_CAP_AMBIENT_RAISE)
                                cap_raise(new->cap_ambient, arg3);
                        else
                                cap_lower(new->cap_ambient, arg3);
                        return commit_creds(new);
                }

        default:
                /* No functionality available - continue with default */
                return -ENOSYS;
        }
}

/**
 * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
 * @mm: The VM space in which the new mapping is to be made
 * @pages: The size of the mapping
 *
 * Determine whether the allocation of a new virtual mapping by the current
 * task is permitted.
 *
 * Return: 1 if permission is granted, 0 if not.
 */
int cap_vm_enough_memory(struct mm_struct *mm, long pages)
{
        int cap_sys_admin = 0;

        if (cap_capable(current_cred(), &init_user_ns,
                                CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
                cap_sys_admin = 1;

        return cap_sys_admin;
}

/**
 * cap_mmap_addr - check if able to map given addr
 * @addr: address attempting to be mapped
 *
 * If the process is attempting to map memory below dac_mmap_min_addr they need
 * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
 * capability security module.
 *
 * Return: 0 if this mapping should be allowed or -EPERM if not.
 */
int cap_mmap_addr(unsigned long addr)
{
        int ret = 0;

        if (addr < dac_mmap_min_addr) {
                ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
                                  CAP_OPT_NONE);
                /* set PF_SUPERPRIV if it turns out we allow the low mmap */
                if (ret == 0)
                        current->flags |= PF_SUPERPRIV;
        }
        return ret;
}

int cap_mmap_file(struct file *file, unsigned long reqprot,
                  unsigned long prot, unsigned long flags)
{
        return 0;
}

#ifdef CONFIG_SECURITY

static const struct lsm_id capability_lsmid = {
        .name = "capability",
        .id = LSM_ID_CAPABILITY,
};

static struct security_hook_list capability_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(capable, cap_capable),
        LSM_HOOK_INIT(settime, cap_settime),
        LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
        LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
        LSM_HOOK_INIT(capget, cap_capget),
        LSM_HOOK_INIT(capset, cap_capset),
        LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
        LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
        LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
        LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
        LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
        LSM_HOOK_INIT(mmap_file, cap_mmap_file),
        LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
        LSM_HOOK_INIT(task_prctl, cap_task_prctl),
        LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
        LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
        LSM_HOOK_INIT(task_setnice, cap_task_setnice),
        LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
};

static int __init capability_init(void)
{
        security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
                           &capability_lsmid);
        return 0;
}

DEFINE_LSM(capability) = {
        .name = "capability",
        .order = LSM_ORDER_FIRST,
        .init = capability_init,
};

#endif /* CONFIG_SECURITY */










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 




    4 




























































































































































    4 





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 1999 Eric Youngdale
 * Copyright (C) 2014 Christoph Hellwig
 *
 *  SCSI queueing library.
 *      Initial versions: Eric Youngdale (eric@andante.org).
 *                        Based upon conversations with large numbers
 *                        of people at Linux Expo.
 */

#include <linux/bio.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/completion.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
#include <linux/scatterlist.h>
#include <linux/blk-mq.h>
#include <linux/blk-integrity.h>
#include <linux/ratelimit.h>
#include <asm/unaligned.h>

#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
#include <scsi/scsi_dh.h>

#include <trace/events/scsi.h>

#include "scsi_debugfs.h"
#include "scsi_priv.h"
#include "scsi_logging.h"

/*
 * Size of integrity metadata is usually small, 1 inline sg should
 * cover normal cases.
 */
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define  SCSI_INLINE_PROT_SG_CNT  0
#define  SCSI_INLINE_SG_CNT  0
#else
#define  SCSI_INLINE_PROT_SG_CNT  1
#define  SCSI_INLINE_SG_CNT  2
#endif

static struct kmem_cache *scsi_sense_cache;
static DEFINE_MUTEX(scsi_sense_cache_mutex);

static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd);

int scsi_init_sense_cache(struct Scsi_Host *shost)
{
        int ret = 0;

        mutex_lock(&scsi_sense_cache_mutex);
        if (!scsi_sense_cache) {
                scsi_sense_cache =
                        kmem_cache_create_usercopy("scsi_sense_cache",
                                SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
                                0, SCSI_SENSE_BUFFERSIZE, NULL);
                if (!scsi_sense_cache)
                        ret = -ENOMEM;
        }
        mutex_unlock(&scsi_sense_cache_mutex);
        return ret;
}

static void
scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
{
        struct Scsi_Host *host = cmd->device->host;
        struct scsi_device *device = cmd->device;
        struct scsi_target *starget = scsi_target(device);

        /*
         * Set the appropriate busy bit for the device/host.
         *
         * If the host/device isn't busy, assume that something actually
         * completed, and that we should be able to queue a command now.
         *
         * Note that the prior mid-layer assumption that any host could
         * always queue at least one command is now broken.  The mid-layer
         * will implement a user specifiable stall (see
         * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
         * if a command is requeued with no other commands outstanding
         * either for the device or for the host.
         */
        switch (reason) {
        case SCSI_MLQUEUE_HOST_BUSY:
                atomic_set(&host->host_blocked, host->max_host_blocked);
                break;
        case SCSI_MLQUEUE_DEVICE_BUSY:
        case SCSI_MLQUEUE_EH_RETRY:
                atomic_set(&device->device_blocked,
                           device->max_device_blocked);
                break;
        case SCSI_MLQUEUE_TARGET_BUSY:
                atomic_set(&starget->target_blocked,
                           starget->max_target_blocked);
                break;
        }
}

static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs)
{
        struct request *rq = scsi_cmd_to_rq(cmd);

        if (rq->rq_flags & RQF_DONTPREP) {
                rq->rq_flags &= ~RQF_DONTPREP;
                scsi_mq_uninit_cmd(cmd);
        } else {
                WARN_ON_ONCE(true);
        }

        blk_mq_requeue_request(rq, false);
        if (!scsi_host_in_recovery(cmd->device->host))
                blk_mq_delay_kick_requeue_list(rq->q, msecs);
}

/**
 * __scsi_queue_insert - private queue insertion
 * @cmd: The SCSI command being requeued
 * @reason:  The reason for the requeue
 * @unbusy: Whether the queue should be unbusied
 *
 * This is a private queue insertion.  The public interface
 * scsi_queue_insert() always assumes the queue should be unbusied
 * because it's always called before the completion.  This function is
 * for a requeue after completion, which should only occur in this
 * file.
 */
static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
{
        struct scsi_device *device = cmd->device;

        SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
                "Inserting command %p into mlqueue\n", cmd));

        scsi_set_blocked(cmd, reason);

        /*
         * Decrement the counters, since these commands are no longer
         * active on the host/device.
         */
        if (unbusy)
                scsi_device_unbusy(device, cmd);

        /*
         * Requeue this command.  It will go before all other commands
         * that are already in the queue. Schedule requeue work under
         * lock such that the kblockd_schedule_work() call happens
         * before blk_mq_destroy_queue() finishes.
         */
        cmd->result = 0;

        blk_mq_requeue_request(scsi_cmd_to_rq(cmd),
                               !scsi_host_in_recovery(cmd->device->host));
}

/**
 * scsi_queue_insert - Reinsert a command in the queue.
 * @cmd:    command that we are adding to queue.
 * @reason: why we are inserting command to queue.
 *
 * We do this for one of two cases. Either the host is busy and it cannot accept
 * any more commands for the time being, or the device returned QUEUE_FULL and
 * can accept no more commands.
 *
 * Context: This could be called either from an interrupt context or a normal
 * process context.
 */
void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
{
        __scsi_queue_insert(cmd, reason, true);
}

void scsi_failures_reset_retries(struct scsi_failures *failures)
{
        struct scsi_failure *failure;

        failures->total_retries = 0;

        for (failure = failures->failure_definitions; failure->result;
             failure++)
                failure->retries = 0;
}
EXPORT_SYMBOL_GPL(scsi_failures_reset_retries);

/**
 * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry.
 * @scmd: scsi_cmnd to check.
 * @failures: scsi_failures struct that lists failures to check for.
 *
 * Returns -EAGAIN if the caller should retry else 0.
 */
static int scsi_check_passthrough(struct scsi_cmnd *scmd,
                                  struct scsi_failures *failures)
{
        struct scsi_failure *failure;
        struct scsi_sense_hdr sshdr;
        enum sam_status status;

        if (!failures)
                return 0;

        for (failure = failures->failure_definitions; failure->result;
             failure++) {
                if (failure->result == SCMD_FAILURE_RESULT_ANY)
                        goto maybe_retry;

                if (host_byte(scmd->result) &&
                    host_byte(scmd->result) == host_byte(failure->result))
                        goto maybe_retry;

                status = status_byte(scmd->result);
                if (!status)
                        continue;

                if (failure->result == SCMD_FAILURE_STAT_ANY &&
                    !scsi_status_is_good(scmd->result))
                        goto maybe_retry;

                if (status != status_byte(failure->result))
                        continue;

                if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION ||
                    failure->sense == SCMD_FAILURE_SENSE_ANY)
                        goto maybe_retry;

                if (!scsi_command_normalize_sense(scmd, &sshdr))
                        return 0;

                if (failure->sense != sshdr.sense_key)
                        continue;

                if (failure->asc == SCMD_FAILURE_ASC_ANY)
                        goto maybe_retry;

                if (failure->asc != sshdr.asc)
                        continue;

                if (failure->ascq == SCMD_FAILURE_ASCQ_ANY ||
                    failure->ascq == sshdr.ascq)
                        goto maybe_retry;
        }

        return 0;

maybe_retry:
        if (failure->allowed) {
                if (failure->allowed == SCMD_FAILURE_NO_LIMIT ||
                    ++failure->retries <= failure->allowed)
                        return -EAGAIN;
        } else {
                if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT ||
                    ++failures->total_retries <= failures->total_allowed)
                        return -EAGAIN;
        }

        return 0;
}

/**
 * scsi_execute_cmd - insert request and wait for the result
 * @sdev:        scsi_device
 * @cmd:        scsi command
 * @opf:        block layer request cmd_flags
 * @buffer:        data buffer
 * @bufflen:        len of buffer
 * @timeout:        request timeout in HZ
 * @ml_retries:        number of times SCSI midlayer will retry request
 * @args:        Optional args. See struct definition for field descriptions
 *
 * Returns the scsi_cmnd result field if a command was executed, or a negative
 * Linux error code if we didn't get that far.
 */
int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
                     blk_opf_t opf, void *buffer, unsigned int bufflen,
                     int timeout, int ml_retries,
                     const struct scsi_exec_args *args)
{
        static const struct scsi_exec_args default_args;
        struct request *req;
        struct scsi_cmnd *scmd;
        int ret;

        if (!args)
                args = &default_args;
        else if (WARN_ON_ONCE(args->sense &&
                              args->sense_len != SCSI_SENSE_BUFFERSIZE))
                return -EINVAL;

retry:
        req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags);
        if (IS_ERR(req))
                return PTR_ERR(req);

        if (bufflen) {
                ret = blk_rq_map_kern(sdev->request_queue, req,
                                      buffer, bufflen, GFP_NOIO);
                if (ret)
                        goto out;
        }
        scmd = blk_mq_rq_to_pdu(req);
        scmd->cmd_len = COMMAND_SIZE(cmd[0]);
        memcpy(scmd->cmnd, cmd, scmd->cmd_len);
        scmd->allowed = ml_retries;
        scmd->flags |= args->scmd_flags;
        req->timeout = timeout;
        req->rq_flags |= RQF_QUIET;

        /*
         * head injection *required* here otherwise quiesce won't work
         */
        blk_execute_rq(req, true);

        if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) {
                blk_mq_free_request(req);
                goto retry;
        }

        /*
         * Some devices (USB mass-storage in particular) may transfer
         * garbage data together with a residue indicating that the data
         * is invalid.  Prevent the garbage from being misinterpreted
         * and prevent security leaks by zeroing out the excess data.
         */
        if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen))
                memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len);

        if (args->resid)
                *args->resid = scmd->resid_len;
        if (args->sense)
                memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
        if (args->sshdr)
                scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len,
                                     args->sshdr);

        ret = scmd->result;
 out:
        blk_mq_free_request(req);

        return ret;
}
EXPORT_SYMBOL(scsi_execute_cmd);

/*
 * Wake up the error handler if necessary. Avoid as follows that the error
 * handler is not woken up if host in-flight requests number ==
 * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
 * with an RCU read lock in this function to ensure that this function in
 * its entirety either finishes before scsi_eh_scmd_add() increases the
 * host_failed counter or that it notices the shost state change made by
 * scsi_eh_scmd_add().
 */
static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
{
        unsigned long flags;

        rcu_read_lock();
        __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
        if (unlikely(scsi_host_in_recovery(shost))) {
                unsigned int busy = scsi_host_busy(shost);

                spin_lock_irqsave(shost->host_lock, flags);
                if (shost->host_failed || shost->host_eh_scheduled)
                        scsi_eh_wakeup(shost, busy);
                spin_unlock_irqrestore(shost->host_lock, flags);
        }
        rcu_read_unlock();
}

void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
{
        struct Scsi_Host *shost = sdev->host;
        struct scsi_target *starget = scsi_target(sdev);

        scsi_dec_host_busy(shost, cmd);

        if (starget->can_queue > 0)
                atomic_dec(&starget->target_busy);

        sbitmap_put(&sdev->budget_map, cmd->budget_token);
        cmd->budget_token = -1;
}

/*
 * Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with
 * interrupts disabled.
 */
static void scsi_kick_sdev_queue(struct scsi_device *sdev, void *data)
{
        struct scsi_device *current_sdev = data;

        if (sdev != current_sdev)
                blk_mq_run_hw_queues(sdev->request_queue, true);
}

/*
 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
 * and call blk_run_queue for all the scsi_devices on the target -
 * including current_sdev first.
 *
 * Called with *no* scsi locks held.
 */
static void scsi_single_lun_run(struct scsi_device *current_sdev)
{
        struct Scsi_Host *shost = current_sdev->host;
        struct scsi_target *starget = scsi_target(current_sdev);
        unsigned long flags;

        spin_lock_irqsave(shost->host_lock, flags);
        starget->starget_sdev_user = NULL;
        spin_unlock_irqrestore(shost->host_lock, flags);

        /*
         * Call blk_run_queue for all LUNs on the target, starting with
         * current_sdev. We race with others (to set starget_sdev_user),
         * but in most cases, we will be first. Ideally, each LU on the
         * target would get some limited time or requests on the target.
         */
        blk_mq_run_hw_queues(current_sdev->request_queue,
                             shost->queuecommand_may_block);

        spin_lock_irqsave(shost->host_lock, flags);
        if (!starget->starget_sdev_user)
                __starget_for_each_device(starget, current_sdev,
                                          scsi_kick_sdev_queue);
        spin_unlock_irqrestore(shost->host_lock, flags);
}

static inline bool scsi_device_is_busy(struct scsi_device *sdev)
{
        if (scsi_device_busy(sdev) >= sdev->queue_depth)
                return true;
        if (atomic_read(&sdev->device_blocked) > 0)
                return true;
        return false;
}

static inline bool scsi_target_is_busy(struct scsi_target *starget)
{
        if (starget->can_queue > 0) {
                if (atomic_read(&starget->target_busy) >= starget->can_queue)
                        return true;
                if (atomic_read(&starget->target_blocked) > 0)
                        return true;
        }
        return false;
}

static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
{
        if (atomic_read(&shost->host_blocked) > 0)
                return true;
        if (shost->host_self_blocked)
                return true;
        return false;
}

static void scsi_starved_list_run(struct Scsi_Host *shost)
{
        LIST_HEAD(starved_list);
        struct scsi_device *sdev;
        unsigned long flags;

        spin_lock_irqsave(shost->host_lock, flags);
        list_splice_init(&shost->starved_list, &starved_list);

        while (!list_empty(&starved_list)) {
                struct request_queue *slq;

                /*
                 * As long as shost is accepting commands and we have
                 * starved queues, call blk_run_queue. scsi_request_fn
                 * drops the queue_lock and can add us back to the
                 * starved_list.
                 *
                 * host_lock protects the starved_list and starved_entry.
                 * scsi_request_fn must get the host_lock before checking
                 * or modifying starved_list or starved_entry.
                 */
                if (scsi_host_is_busy(shost))
                        break;

                sdev = list_entry(starved_list.next,
                                  struct scsi_device, starved_entry);
                list_del_init(&sdev->starved_entry);
                if (scsi_target_is_busy(scsi_target(sdev))) {
                        list_move_tail(&sdev->starved_entry,
                                       &shost->starved_list);
                        continue;
                }

                /*
                 * Once we drop the host lock, a racing scsi_remove_device()
                 * call may remove the sdev from the starved list and destroy
                 * it and the queue.  Mitigate by taking a reference to the
                 * queue and never touching the sdev again after we drop the
                 * host lock.  Note: if __scsi_remove_device() invokes
                 * blk_mq_destroy_queue() before the queue is run from this
                 * function then blk_run_queue() will return immediately since
                 * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING.
                 */
                slq = sdev->request_queue;
                if (!blk_get_queue(slq))
                        continue;
                spin_unlock_irqrestore(shost->host_lock, flags);

                blk_mq_run_hw_queues(slq, false);
                blk_put_queue(slq);

                spin_lock_irqsave(shost->host_lock, flags);
        }
        /* put any unprocessed entries back */
        list_splice(&starved_list, &shost->starved_list);
        spin_unlock_irqrestore(shost->host_lock, flags);
}

/**
 * scsi_run_queue - Select a proper request queue to serve next.
 * @q:  last request's queue
 *
 * The previous command was completely finished, start a new one if possible.
 */
static void scsi_run_queue(struct request_queue *q)
{
        struct scsi_device *sdev = q->queuedata;

        if (scsi_target(sdev)->single_lun)
                scsi_single_lun_run(sdev);
        if (!list_empty(&sdev->host->starved_list))
                scsi_starved_list_run(sdev->host);

        /* Note: blk_mq_kick_requeue_list() runs the queue asynchronously. */
        blk_mq_kick_requeue_list(q);
}

void scsi_requeue_run_queue(struct work_struct *work)
{
        struct scsi_device *sdev;
        struct request_queue *q;

        sdev = container_of(work, struct scsi_device, requeue_work);
        q = sdev->request_queue;
        scsi_run_queue(q);
}

void scsi_run_host_queues(struct Scsi_Host *shost)
{
        struct scsi_device *sdev;

        shost_for_each_device(sdev, shost)
                scsi_run_queue(sdev->request_queue);
}

static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
{
        if (!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd))) {
                struct scsi_driver *drv = scsi_cmd_to_driver(cmd);

                if (drv->uninit_command)
                        drv->uninit_command(cmd);
        }
}

void scsi_free_sgtables(struct scsi_cmnd *cmd)
{
        if (cmd->sdb.table.nents)
                sg_free_table_chained(&cmd->sdb.table,
                                SCSI_INLINE_SG_CNT);
        if (scsi_prot_sg_count(cmd))
                sg_free_table_chained(&cmd->prot_sdb->table,
                                SCSI_INLINE_PROT_SG_CNT);
}
EXPORT_SYMBOL_GPL(scsi_free_sgtables);

static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
{
        scsi_free_sgtables(cmd);
        scsi_uninit_cmd(cmd);
}

static void scsi_run_queue_async(struct scsi_device *sdev)
{
        if (scsi_host_in_recovery(sdev->host))
                return;

        if (scsi_target(sdev)->single_lun ||
            !list_empty(&sdev->host->starved_list)) {
                kblockd_schedule_work(&sdev->requeue_work);
        } else {
                /*
                 * smp_mb() present in sbitmap_queue_clear() or implied in
                 * .end_io is for ordering writing .device_busy in
                 * scsi_device_unbusy() and reading sdev->restarts.
                 */
                int old = atomic_read(&sdev->restarts);

                /*
                 * ->restarts has to be kept as non-zero if new budget
                 *  contention occurs.
                 *
                 *  No need to run queue when either another re-run
                 *  queue wins in updating ->restarts or a new budget
                 *  contention occurs.
                 */
                if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old)
                        blk_mq_run_hw_queues(sdev->request_queue, true);
        }
}

/* Returns false when no more bytes to process, true if there are more */
static bool scsi_end_request(struct request *req, blk_status_t error,
                unsigned int bytes)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
        struct scsi_device *sdev = cmd->device;
        struct request_queue *q = sdev->request_queue;

        if (blk_update_request(req, error, bytes))
                return true;

        // XXX:
        if (blk_queue_add_random(q))
                add_disk_randomness(req->q->disk);

        WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
                     !(cmd->flags & SCMD_INITIALIZED));
        cmd->flags = 0;

        /*
         * Calling rcu_barrier() is not necessary here because the
         * SCSI error handler guarantees that the function called by
         * call_rcu() has been called before scsi_end_request() is
         * called.
         */
        destroy_rcu_head(&cmd->rcu);

        /*
         * In the MQ case the command gets freed by __blk_mq_end_request,
         * so we have to do all cleanup that depends on it earlier.
         *
         * We also can't kick the queues from irq context, so we
         * will have to defer it to a workqueue.
         */
        scsi_mq_uninit_cmd(cmd);

        /*
         * queue is still alive, so grab the ref for preventing it
         * from being cleaned up during running queue.
         */
        percpu_ref_get(&q->q_usage_counter);

        __blk_mq_end_request(req, error);

        scsi_run_queue_async(sdev);

        percpu_ref_put(&q->q_usage_counter);
        return false;
}

/**
 * scsi_result_to_blk_status - translate a SCSI result code into blk_status_t
 * @result:        scsi error code
 *
 * Translate a SCSI result code into a blk_status_t value.
 */
static blk_status_t scsi_result_to_blk_status(int result)
{
        /*
         * Check the scsi-ml byte first in case we converted a host or status
         * byte.
         */
        switch (scsi_ml_byte(result)) {
        case SCSIML_STAT_OK:
                break;
        case SCSIML_STAT_RESV_CONFLICT:
                return BLK_STS_RESV_CONFLICT;
        case SCSIML_STAT_NOSPC:
                return BLK_STS_NOSPC;
        case SCSIML_STAT_MED_ERROR:
                return BLK_STS_MEDIUM;
        case SCSIML_STAT_TGT_FAILURE:
                return BLK_STS_TARGET;
        case SCSIML_STAT_DL_TIMEOUT:
                return BLK_STS_DURATION_LIMIT;
        }

        switch (host_byte(result)) {
        case DID_OK:
                if (scsi_status_is_good(result))
                        return BLK_STS_OK;
                return BLK_STS_IOERR;
        case DID_TRANSPORT_FAILFAST:
        case DID_TRANSPORT_MARGINAL:
                return BLK_STS_TRANSPORT;
        default:
                return BLK_STS_IOERR;
        }
}

/**
 * scsi_rq_err_bytes - determine number of bytes till the next failure boundary
 * @rq: request to examine
 *
 * Description:
 *     A request could be merge of IOs which require different failure
 *     handling.  This function determines the number of bytes which
 *     can be failed from the beginning of the request without
 *     crossing into area which need to be retried further.
 *
 * Return:
 *     The number of bytes to fail.
 */
static unsigned int scsi_rq_err_bytes(const struct request *rq)
{
        blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
        unsigned int bytes = 0;
        struct bio *bio;

        if (!(rq->rq_flags & RQF_MIXED_MERGE))
                return blk_rq_bytes(rq);

        /*
         * Currently the only 'mixing' which can happen is between
         * different fastfail types.  We can safely fail portions
         * which have all the failfast bits that the first one has -
         * the ones which are at least as eager to fail as the first
         * one.
         */
        for (bio = rq->bio; bio; bio = bio->bi_next) {
                if ((bio->bi_opf & ff) != ff)
                        break;
                bytes += bio->bi_iter.bi_size;
        }

        /* this could lead to infinite loop */
        BUG_ON(blk_rq_bytes(rq) && !bytes);
        return bytes;
}

static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
{
        struct request *req = scsi_cmd_to_rq(cmd);
        unsigned long wait_for;

        if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
                return false;

        wait_for = (cmd->allowed + 1) * req->timeout;
        if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
                scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
                            wait_for/HZ);
                return true;
        }
        return false;
}

/*
 * When ALUA transition state is returned, reprep the cmd to
 * use the ALUA handler's transition timeout. Delay the reprep
 * 1 sec to avoid aggressive retries of the target in that
 * state.
 */
#define ALUA_TRANSITION_REPREP_DELAY        1000

/* Helper for scsi_io_completion() when special action required. */
static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
{
        struct request *req = scsi_cmd_to_rq(cmd);
        int level = 0;
        enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
              ACTION_RETRY, ACTION_DELAYED_RETRY} action;
        struct scsi_sense_hdr sshdr;
        bool sense_valid;
        bool sense_current = true;      /* false implies "deferred sense" */
        blk_status_t blk_stat;

        sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
        if (sense_valid)
                sense_current = !scsi_sense_is_deferred(&sshdr);

        blk_stat = scsi_result_to_blk_status(result);

        if (host_byte(result) == DID_RESET) {
                /* Third party bus reset or reset for error recovery
                 * reasons.  Just retry the command and see what
                 * happens.
                 */
                action = ACTION_RETRY;
        } else if (sense_valid && sense_current) {
                switch (sshdr.sense_key) {
                case UNIT_ATTENTION:
                        if (cmd->device->removable) {
                                /* Detected disc change.  Set a bit
                                 * and quietly refuse further access.
                                 */
                                cmd->device->changed = 1;
                                action = ACTION_FAIL;
                        } else {
                                /* Must have been a power glitch, or a
                                 * bus reset.  Could not have been a
                                 * media change, so we just retry the
                                 * command and see what happens.
                                 */
                                action = ACTION_RETRY;
                        }
                        break;
                case ILLEGAL_REQUEST:
                        /* If we had an ILLEGAL REQUEST returned, then
                         * we may have performed an unsupported
                         * command.  The only thing this should be
                         * would be a ten byte read where only a six
                         * byte read was supported.  Also, on a system
                         * where READ CAPACITY failed, we may have
                         * read past the end of the disk.
                         */
                        if ((cmd->device->use_10_for_rw &&
                            sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
                            (cmd->cmnd[0] == READ_10 ||
                             cmd->cmnd[0] == WRITE_10)) {
                                /* This will issue a new 6-byte command. */
                                cmd->device->use_10_for_rw = 0;
                                action = ACTION_REPREP;
                        } else if (sshdr.asc == 0x10) /* DIX */ {
                                action = ACTION_FAIL;
                                blk_stat = BLK_STS_PROTECTION;
                        /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
                        } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
                                action = ACTION_FAIL;
                                blk_stat = BLK_STS_TARGET;
                        } else
                                action = ACTION_FAIL;
                        break;
                case ABORTED_COMMAND:
                        action = ACTION_FAIL;
                        if (sshdr.asc == 0x10) /* DIF */
                                blk_stat = BLK_STS_PROTECTION;
                        break;
                case NOT_READY:
                        /* If the device is in the process of becoming
                         * ready, or has a temporary blockage, retry.
                         */
                        if (sshdr.asc == 0x04) {
                                switch (sshdr.ascq) {
                                case 0x01: /* becoming ready */
                                case 0x04: /* format in progress */
                                case 0x05: /* rebuild in progress */
                                case 0x06: /* recalculation in progress */
                                case 0x07: /* operation in progress */
                                case 0x08: /* Long write in progress */
                                case 0x09: /* self test in progress */
                                case 0x11: /* notify (enable spinup) required */
                                case 0x14: /* space allocation in progress */
                                case 0x1a: /* start stop unit in progress */
                                case 0x1b: /* sanitize in progress */
                                case 0x1d: /* configuration in progress */
                                case 0x24: /* depopulation in progress */
                                case 0x25: /* depopulation restore in progress */
                                        action = ACTION_DELAYED_RETRY;
                                        break;
                                case 0x0a: /* ALUA state transition */
                                        action = ACTION_DELAYED_REPREP;
                                        break;
                                default:
                                        action = ACTION_FAIL;
                                        break;
                                }
                        } else
                                action = ACTION_FAIL;
                        break;
                case VOLUME_OVERFLOW:
                        /* See SSC3rXX or current. */
                        action = ACTION_FAIL;
                        break;
                case DATA_PROTECT:
                        action = ACTION_FAIL;
                        if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
                            (sshdr.asc == 0x55 &&
                             (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
                                /* Insufficient zone resources */
                                blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
                        }
                        break;
                case COMPLETED:
                        fallthrough;
                default:
                        action = ACTION_FAIL;
                        break;
                }
        } else
                action = ACTION_FAIL;

        if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
                action = ACTION_FAIL;

        switch (action) {
        case ACTION_FAIL:
                /* Give up and fail the remainder of the request */
                if (!(req->rq_flags & RQF_QUIET)) {
                        static DEFINE_RATELIMIT_STATE(_rs,
                                        DEFAULT_RATELIMIT_INTERVAL,
                                        DEFAULT_RATELIMIT_BURST);

                        if (unlikely(scsi_logging_level))
                                level =
                                     SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT,
                                                    SCSI_LOG_MLCOMPLETE_BITS);

                        /*
                         * if logging is enabled the failure will be printed
                         * in scsi_log_completion(), so avoid duplicate messages
                         */
                        if (!level && __ratelimit(&_rs)) {
                                scsi_print_result(cmd, NULL, FAILED);
                                if (sense_valid)
                                        scsi_print_sense(cmd);
                                scsi_print_command(cmd);
                        }
                }
                if (!scsi_end_request(req, blk_stat, scsi_rq_err_bytes(req)))
                        return;
                fallthrough;
        case ACTION_REPREP:
                scsi_mq_requeue_cmd(cmd, 0);
                break;
        case ACTION_DELAYED_REPREP:
                scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY);
                break;
        case ACTION_RETRY:
                /* Retry the same command immediately */
                __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false);
                break;
        case ACTION_DELAYED_RETRY:
                /* Retry the same command after a delay */
                __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false);
                break;
        }
}

/*
 * Helper for scsi_io_completion() when cmd->result is non-zero. Returns a
 * new result that may suppress further error checking. Also modifies
 * *blk_statp in some cases.
 */
static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
                                        blk_status_t *blk_statp)
{
        bool sense_valid;
        bool sense_current = true;        /* false implies "deferred sense" */
        struct request *req = scsi_cmd_to_rq(cmd);
        struct scsi_sense_hdr sshdr;

        sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
        if (sense_valid)
                sense_current = !scsi_sense_is_deferred(&sshdr);

        if (blk_rq_is_passthrough(req)) {
                if (sense_valid) {
                        /*
                         * SG_IO wants current and deferred errors
                         */
                        cmd->sense_len = min(8 + cmd->sense_buffer[7],
                                             SCSI_SENSE_BUFFERSIZE);
                }
                if (sense_current)
                        *blk_statp = scsi_result_to_blk_status(result);
        } else if (blk_rq_bytes(req) == 0 && sense_current) {
                /*
                 * Flush commands do not transfers any data, and thus cannot use
                 * good_bytes != blk_rq_bytes(req) as the signal for an error.
                 * This sets *blk_statp explicitly for the problem case.
                 */
                *blk_statp = scsi_result_to_blk_status(result);
        }
        /*
         * Recovered errors need reporting, but they're always treated as
         * success, so fiddle the result code here.  For passthrough requests
         * we already took a copy of the original into sreq->result which
         * is what gets returned to the user
         */
        if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
                bool do_print = true;
                /*
                 * if ATA PASS-THROUGH INFORMATION AVAILABLE [0x0, 0x1d]
                 * skip print since caller wants ATA registers. Only occurs
                 * on SCSI ATA PASS_THROUGH commands when CK_COND=1
                 */
                if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d))
                        do_print = false;
                else if (req->rq_flags & RQF_QUIET)
                        do_print = false;
                if (do_print)
                        scsi_print_sense(cmd);
                result = 0;
                /* for passthrough, *blk_statp may be set */
                *blk_statp = BLK_STS_OK;
        }
        /*
         * Another corner case: the SCSI status byte is non-zero but 'good'.
         * Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when
         * it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD
         * if it can't fit). Treat SAM_STAT_CONDITION_MET and the related
         * intermediate statuses (both obsolete in SAM-4) as good.
         */
        if ((result & 0xff) && scsi_status_is_good(result)) {
                result = 0;
                *blk_statp = BLK_STS_OK;
        }
        return result;
}

/**
 * scsi_io_completion - Completion processing for SCSI commands.
 * @cmd:        command that is finished.
 * @good_bytes:        number of processed bytes.
 *
 * We will finish off the specified number of sectors. If we are done, the
 * command block will be released and the queue function will be goosed. If we
 * are not done then we have to figure out what to do next:
 *
 *   a) We can call scsi_mq_requeue_cmd().  The request will be
 *        unprepared and put back on the queue.  Then a new command will
 *        be created for it.  This should be used if we made forward
 *        progress, or if we want to switch from READ(10) to READ(6) for
 *        example.
 *
 *   b) We can call scsi_io_completion_action().  The request will be
 *        put back on the queue and retried using the same command as
 *        before, possibly after a delay.
 *
 *   c) We can call scsi_end_request() with blk_stat other than
 *        BLK_STS_OK, to fail the remainder of the request.
 */
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
{
        int result = cmd->result;
        struct request *req = scsi_cmd_to_rq(cmd);
        blk_status_t blk_stat = BLK_STS_OK;

        if (unlikely(result))        /* a nz result may or may not be an error */
                result = scsi_io_completion_nz_result(cmd, result, &blk_stat);

        /*
         * Next deal with any sectors which we were able to correctly
         * handle.
         */
        SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd,
                "%u sectors total, %d bytes done.\n",
                blk_rq_sectors(req), good_bytes));

        /*
         * Failed, zero length commands always need to drop down
         * to retry code. Fast path should return in this block.
         */
        if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) {
                if (likely(!scsi_end_request(req, blk_stat, good_bytes)))
                        return; /* no bytes remaining */
        }

        /* Kill remainder if no retries. */
        if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) {
                if (scsi_end_request(req, blk_stat, blk_rq_bytes(req)))
                        WARN_ONCE(true,
                            "Bytes remaining after failed, no-retry command");
                return;
        }

        /*
         * If there had been no error, but we have leftover bytes in the
         * request just queue the command up again.
         */
        if (likely(result == 0))
                scsi_mq_requeue_cmd(cmd, 0);
        else
                scsi_io_completion_action(cmd, result);
}

static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
                struct request *rq)
{
        return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
               !op_is_write(req_op(rq)) &&
               sdev->host->hostt->dma_need_drain(rq);
}

/**
 * scsi_alloc_sgtables - Allocate and initialize data and integrity scatterlists
 * @cmd: SCSI command data structure to initialize.
 *
 * Initializes @cmd->sdb and also @cmd->prot_sdb if data integrity is enabled
 * for @cmd.
 *
 * Returns:
 * * BLK_STS_OK       - on success
 * * BLK_STS_RESOURCE - if the failure is retryable
 * * BLK_STS_IOERR    - if the failure is fatal
 */
blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
{
        struct scsi_device *sdev = cmd->device;
        struct request *rq = scsi_cmd_to_rq(cmd);
        unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
        struct scatterlist *last_sg = NULL;
        blk_status_t ret;
        bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
        int count;

        if (WARN_ON_ONCE(!nr_segs))
                return BLK_STS_IOERR;

        /*
         * Make sure there is space for the drain.  The driver must adjust
         * max_hw_segments to be prepared for this.
         */
        if (need_drain)
                nr_segs++;

        /*
         * If sg table allocation fails, requeue request later.
         */
        if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
                        cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
                return BLK_STS_RESOURCE;

        /*
         * Next, walk the list, and fill in the addresses and sizes of
         * each segment.
         */
        count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);

        if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
                unsigned int pad_len =
                        (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;

                last_sg->length += pad_len;
                cmd->extra_len += pad_len;
        }

        if (need_drain) {
                sg_unmark_end(last_sg);
                last_sg = sg_next(last_sg);
                sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
                sg_mark_end(last_sg);

                cmd->extra_len += sdev->dma_drain_len;
                count++;
        }

        BUG_ON(count > cmd->sdb.table.nents);
        cmd->sdb.table.nents = count;
        cmd->sdb.length = blk_rq_payload_bytes(rq);

        if (blk_integrity_rq(rq)) {
                struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
                int ivecs;

                if (WARN_ON_ONCE(!prot_sdb)) {
                        /*
                         * This can happen if someone (e.g. multipath)
                         * queues a command to a device on an adapter
                         * that does not support DIX.
                         */
                        ret = BLK_STS_IOERR;
                        goto out_free_sgtables;
                }

                ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);

                if (sg_alloc_table_chained(&prot_sdb->table, ivecs,
                                prot_sdb->table.sgl,
                                SCSI_INLINE_PROT_SG_CNT)) {
                        ret = BLK_STS_RESOURCE;
                        goto out_free_sgtables;
                }

                count = blk_rq_map_integrity_sg(rq->q, rq->bio,
                                                prot_sdb->table.sgl);
                BUG_ON(count > ivecs);
                BUG_ON(count > queue_max_integrity_segments(rq->q));

                cmd->prot_sdb = prot_sdb;
                cmd->prot_sdb->table.nents = count;
        }

        return BLK_STS_OK;
out_free_sgtables:
        scsi_free_sgtables(cmd);
        return ret;
}
EXPORT_SYMBOL(scsi_alloc_sgtables);

/**
 * scsi_initialize_rq - initialize struct scsi_cmnd partially
 * @rq: Request associated with the SCSI command to be initialized.
 *
 * This function initializes the members of struct scsi_cmnd that must be
 * initialized before request processing starts and that won't be
 * reinitialized if a SCSI command is requeued.
 */
static void scsi_initialize_rq(struct request *rq)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

        memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
        cmd->cmd_len = MAX_COMMAND_SIZE;
        cmd->sense_len = 0;
        init_rcu_head(&cmd->rcu);
        cmd->jiffies_at_alloc = jiffies;
        cmd->retries = 0;
}

struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf,
                                   blk_mq_req_flags_t flags)
{
        struct request *rq;

        rq = blk_mq_alloc_request(q, opf, flags);
        if (!IS_ERR(rq))
                scsi_initialize_rq(rq);
        return rq;
}
EXPORT_SYMBOL_GPL(scsi_alloc_request);

/*
 * Only called when the request isn't completed by SCSI, and not freed by
 * SCSI
 */
static void scsi_cleanup_rq(struct request *rq)
{
        if (rq->rq_flags & RQF_DONTPREP) {
                scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq));
                rq->rq_flags &= ~RQF_DONTPREP;
        }
}

/* Called before a request is prepared. See also scsi_mq_prep_fn(). */
void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
{
        struct request *rq = scsi_cmd_to_rq(cmd);

        if (!blk_rq_is_passthrough(rq) && !(cmd->flags & SCMD_INITIALIZED)) {
                cmd->flags |= SCMD_INITIALIZED;
                scsi_initialize_rq(rq);
        }

        cmd->device = dev;
        INIT_LIST_HEAD(&cmd->eh_entry);
        INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
}

static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev,
                struct request *req)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);

        /*
         * Passthrough requests may transfer data, in which case they must
         * a bio attached to them.  Or they might contain a SCSI command
         * that does not transfer data, in which case they may optionally
         * submit a request without an attached bio.
         */
        if (req->bio) {
                blk_status_t ret = scsi_alloc_sgtables(cmd);
                if (unlikely(ret != BLK_STS_OK))
                        return ret;
        } else {
                BUG_ON(blk_rq_bytes(req));

                memset(&cmd->sdb, 0, sizeof(cmd->sdb));
        }

        cmd->transfersize = blk_rq_bytes(req);
        return BLK_STS_OK;
}

static blk_status_t
scsi_device_state_check(struct scsi_device *sdev, struct request *req)
{
        switch (sdev->sdev_state) {
        case SDEV_CREATED:
                return BLK_STS_OK;
        case SDEV_OFFLINE:
        case SDEV_TRANSPORT_OFFLINE:
                /*
                 * If the device is offline we refuse to process any
                 * commands.  The device must be brought online
                 * before trying any recovery commands.
                 */
                if (!sdev->offline_already) {
                        sdev->offline_already = true;
                        sdev_printk(KERN_ERR, sdev,
                                    "rejecting I/O to offline device\n");
                }
                return BLK_STS_IOERR;
        case SDEV_DEL:
                /*
                 * If the device is fully deleted, we refuse to
                 * process any commands as well.
                 */
                sdev_printk(KERN_ERR, sdev,
                            "rejecting I/O to dead device\n");
                return BLK_STS_IOERR;
        case SDEV_BLOCK:
        case SDEV_CREATED_BLOCK:
                return BLK_STS_RESOURCE;
        case SDEV_QUIESCE:
                /*
                 * If the device is blocked we only accept power management
                 * commands.
                 */
                if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM)))
                        return BLK_STS_RESOURCE;
                return BLK_STS_OK;
        default:
                /*
                 * For any other not fully online state we only allow
                 * power management commands.
                 */
                if (req && !(req->rq_flags & RQF_PM))
                        return BLK_STS_OFFLINE;
                return BLK_STS_OK;
        }
}

/*
 * scsi_dev_queue_ready: if we can send requests to sdev, assign one token
 * and return the token else return -1.
 */
static inline int scsi_dev_queue_ready(struct request_queue *q,
                                  struct scsi_device *sdev)
{
        int token;

        token = sbitmap_get(&sdev->budget_map);
        if (token < 0)
                return -1;

        if (!atomic_read(&sdev->device_blocked))
                return token;

        /*
         * Only unblock if no other commands are pending and
         * if device_blocked has decreased to zero
         */
        if (scsi_device_busy(sdev) > 1 ||
            atomic_dec_return(&sdev->device_blocked) > 0) {
                sbitmap_put(&sdev->budget_map, token);
                return -1;
        }

        SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
                         "unblocking device at zero depth\n"));

        return token;
}

/*
 * scsi_target_queue_ready: checks if there we can send commands to target
 * @sdev: scsi device on starget to check.
 */
static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
                                           struct scsi_device *sdev)
{
        struct scsi_target *starget = scsi_target(sdev);
        unsigned int busy;

        if (starget->single_lun) {
                spin_lock_irq(shost->host_lock);
                if (starget->starget_sdev_user &&
                    starget->starget_sdev_user != sdev) {
                        spin_unlock_irq(shost->host_lock);
                        return 0;
                }
                starget->starget_sdev_user = sdev;
                spin_unlock_irq(shost->host_lock);
        }

        if (starget->can_queue <= 0)
                return 1;

        busy = atomic_inc_return(&starget->target_busy) - 1;
        if (atomic_read(&starget->target_blocked) > 0) {
                if (busy)
                        goto starved;

                /*
                 * unblock after target_blocked iterates to zero
                 */
                if (atomic_dec_return(&starget->target_blocked) > 0)
                        goto out_dec;

                SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
                                 "unblocking target at zero depth\n"));
        }

        if (busy >= starget->can_queue)
                goto starved;

        return 1;

starved:
        spin_lock_irq(shost->host_lock);
        list_move_tail(&sdev->starved_entry, &shost->starved_list);
        spin_unlock_irq(shost->host_lock);
out_dec:
        if (starget->can_queue > 0)
                atomic_dec(&starget->target_busy);
        return 0;
}

/*
 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
 * return 0. We must end up running the queue again whenever 0 is
 * returned, else IO can hang.
 */
static inline int scsi_host_queue_ready(struct request_queue *q,
                                   struct Scsi_Host *shost,
                                   struct scsi_device *sdev,
                                   struct scsi_cmnd *cmd)
{
        if (atomic_read(&shost->host_blocked) > 0) {
                if (scsi_host_busy(shost) > 0)
                        goto starved;

                /*
                 * unblock after host_blocked iterates to zero
                 */
                if (atomic_dec_return(&shost->host_blocked) > 0)
                        goto out_dec;

                SCSI_LOG_MLQUEUE(3,
                        shost_printk(KERN_INFO, shost,
                                     "unblocking host at zero depth\n"));
        }

        if (shost->host_self_blocked)
                goto starved;

        /* We're OK to process the command, so we can't be starved */
        if (!list_empty(&sdev->starved_entry)) {
                spin_lock_irq(shost->host_lock);
                if (!list_empty(&sdev->starved_entry))
                        list_del_init(&sdev->starved_entry);
                spin_unlock_irq(shost->host_lock);
        }

        __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);

        return 1;

starved:
        spin_lock_irq(shost->host_lock);
        if (list_empty(&sdev->starved_entry))
                list_add_tail(&sdev->starved_entry, &shost->starved_list);
        spin_unlock_irq(shost->host_lock);
out_dec:
        scsi_dec_host_busy(shost, cmd);
        return 0;
}

/*
 * Busy state exporting function for request stacking drivers.
 *
 * For efficiency, no lock is taken to check the busy state of
 * shost/starget/sdev, since the returned value is not guaranteed and
 * may be changed after request stacking drivers call the function,
 * regardless of taking lock or not.
 *
 * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi
 * needs to return 'not busy'. Otherwise, request stacking drivers
 * may hold requests forever.
 */
static bool scsi_mq_lld_busy(struct request_queue *q)
{
        struct scsi_device *sdev = q->queuedata;
        struct Scsi_Host *shost;

        if (blk_queue_dying(q))
                return false;

        shost = sdev->host;

        /*
         * Ignore host/starget busy state.
         * Since block layer does not have a concept of fairness across
         * multiple queues, congestion of host/starget needs to be handled
         * in SCSI layer.
         */
        if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev))
                return true;

        return false;
}

/*
 * Block layer request completion callback. May be called from interrupt
 * context.
 */
static void scsi_complete(struct request *rq)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
        enum scsi_disposition disposition;

        INIT_LIST_HEAD(&cmd->eh_entry);

        atomic_inc(&cmd->device->iodone_cnt);
        if (cmd->result)
                atomic_inc(&cmd->device->ioerr_cnt);

        disposition = scsi_decide_disposition(cmd);
        if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
                disposition = SUCCESS;

        scsi_log_completion(cmd, disposition);

        switch (disposition) {
        case SUCCESS:
                scsi_finish_command(cmd);
                break;
        case NEEDS_RETRY:
                scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
                break;
        case ADD_TO_MLQUEUE:
                scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
                break;
        default:
                scsi_eh_scmd_add(cmd);
                break;
        }
}

/**
 * scsi_dispatch_cmd - Dispatch a command to the low-level driver.
 * @cmd: command block we are dispatching.
 *
 * Return: nonzero return request was rejected and device's queue needs to be
 * plugged.
 */
static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
{
        struct Scsi_Host *host = cmd->device->host;
        int rtn = 0;

        atomic_inc(&cmd->device->iorequest_cnt);

        /* check if the device is still usable */
        if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
                /* in SDEV_DEL we error all commands. DID_NO_CONNECT
                 * returns an immediate error upwards, and signals
                 * that the device is no longer present */
                cmd->result = DID_NO_CONNECT << 16;
                goto done;
        }

        /* Check to see if the scsi lld made this device blocked. */
        if (unlikely(scsi_device_blocked(cmd->device))) {
                /*
                 * in blocked state, the command is just put back on
                 * the device queue.  The suspend state has already
                 * blocked the queue so future requests should not
                 * occur until the device transitions out of the
                 * suspend state.
                 */
                SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
                        "queuecommand : device blocked\n"));
                atomic_dec(&cmd->device->iorequest_cnt);
                return SCSI_MLQUEUE_DEVICE_BUSY;
        }

        /* Store the LUN value in cmnd, if needed. */
        if (cmd->device->lun_in_cdb)
                cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
                               (cmd->device->lun << 5 & 0xe0);

        scsi_log_send(cmd);

        /*
         * Before we queue this command, check if the command
         * length exceeds what the host adapter can handle.
         */
        if (cmd->cmd_len > cmd->device->host->max_cmd_len) {
                SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
                               "queuecommand : command too long. "
                               "cdb_size=%d host->max_cmd_len=%d\n",
                               cmd->cmd_len, cmd->device->host->max_cmd_len));
                cmd->result = (DID_ABORT << 16);
                goto done;
        }

        if (unlikely(host->shost_state == SHOST_DEL)) {
                cmd->result = (DID_NO_CONNECT << 16);
                goto done;

        }

        trace_scsi_dispatch_cmd_start(cmd);
        rtn = host->hostt->queuecommand(host, cmd);
        if (rtn) {
                atomic_dec(&cmd->device->iorequest_cnt);
                trace_scsi_dispatch_cmd_error(cmd, rtn);
                if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
                    rtn != SCSI_MLQUEUE_TARGET_BUSY)
                        rtn = SCSI_MLQUEUE_HOST_BUSY;

                SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
                        "queuecommand : request rejected\n"));
        }

        return rtn;
 done:
        scsi_done(cmd);
        return 0;
}

/* Size in bytes of the sg-list stored in the scsi-mq command-private data. */
static unsigned int scsi_mq_inline_sgl_size(struct Scsi_Host *shost)
{
        return min_t(unsigned int, shost->sg_tablesize, SCSI_INLINE_SG_CNT) *
                sizeof(struct scatterlist);
}

static blk_status_t scsi_prepare_cmd(struct request *req)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
        struct scsi_device *sdev = req->q->queuedata;
        struct Scsi_Host *shost = sdev->host;
        bool in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
        struct scatterlist *sg;

        scsi_init_command(sdev, cmd);

        cmd->eh_eflags = 0;
        cmd->prot_type = 0;
        cmd->prot_flags = 0;
        cmd->submitter = 0;
        memset(&cmd->sdb, 0, sizeof(cmd->sdb));
        cmd->underflow = 0;
        cmd->transfersize = 0;
        cmd->host_scribble = NULL;
        cmd->result = 0;
        cmd->extra_len = 0;
        cmd->state = 0;
        if (in_flight)
                __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);

        /*
         * Only clear the driver-private command data if the LLD does not supply
         * a function to initialize that data.
         */
        if (!shost->hostt->init_cmd_priv)
                memset(cmd + 1, 0, shost->hostt->cmd_size);

        cmd->prot_op = SCSI_PROT_NORMAL;
        if (blk_rq_bytes(req))
                cmd->sc_data_direction = rq_dma_dir(req);
        else
                cmd->sc_data_direction = DMA_NONE;

        sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
        cmd->sdb.table.sgl = sg;

        if (scsi_host_get_prot(shost)) {
                memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));

                cmd->prot_sdb->table.sgl =
                        (struct scatterlist *)(cmd->prot_sdb + 1);
        }

        /*
         * Special handling for passthrough commands, which don't go to the ULP
         * at all:
         */
        if (blk_rq_is_passthrough(req))
                return scsi_setup_scsi_cmnd(sdev, req);

        if (sdev->handler && sdev->handler->prep_fn) {
                blk_status_t ret = sdev->handler->prep_fn(sdev, req);

                if (ret != BLK_STS_OK)
                        return ret;
        }

        /* Usually overridden by the ULP */
        cmd->allowed = 0;
        memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
        return scsi_cmd_to_driver(cmd)->init_command(cmd);
}

static void scsi_done_internal(struct scsi_cmnd *cmd, bool complete_directly)
{
        struct request *req = scsi_cmd_to_rq(cmd);

        switch (cmd->submitter) {
        case SUBMITTED_BY_BLOCK_LAYER:
                break;
        case SUBMITTED_BY_SCSI_ERROR_HANDLER:
                return scsi_eh_done(cmd);
        case SUBMITTED_BY_SCSI_RESET_IOCTL:
                return;
        }

        if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q)))
                return;
        if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
                return;
        trace_scsi_dispatch_cmd_done(cmd);

        if (complete_directly)
                blk_mq_complete_request_direct(req, scsi_complete);
        else
                blk_mq_complete_request(req);
}

void scsi_done(struct scsi_cmnd *cmd)
{
        scsi_done_internal(cmd, false);
}
EXPORT_SYMBOL(scsi_done);

void scsi_done_direct(struct scsi_cmnd *cmd)
{
        scsi_done_internal(cmd, true);
}
EXPORT_SYMBOL(scsi_done_direct);

static void scsi_mq_put_budget(struct request_queue *q, int budget_token)
{
        struct scsi_device *sdev = q->queuedata;

        sbitmap_put(&sdev->budget_map, budget_token);
}

/*
 * When to reinvoke queueing after a resource shortage. It's 3 msecs to
 * not change behaviour from the previous unplug mechanism, experimentation
 * may prove this needs changing.
 */
#define SCSI_QUEUE_DELAY 3

static int scsi_mq_get_budget(struct request_queue *q)
{
        struct scsi_device *sdev = q->queuedata;
        int token = scsi_dev_queue_ready(q, sdev);

        if (token >= 0)
                return token;

        atomic_inc(&sdev->restarts);

        /*
         * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy).
         * .restarts must be incremented before .device_busy is read because the
         * code in scsi_run_queue_async() depends on the order of these operations.
         */
        smp_mb__after_atomic();

        /*
         * If all in-flight requests originated from this LUN are completed
         * before reading .device_busy, sdev->device_busy will be observed as
         * zero, then blk_mq_delay_run_hw_queues() will dispatch this request
         * soon. Otherwise, completion of one of these requests will observe
         * the .restarts flag, and the request queue will be run for handling
         * this request, see scsi_end_request().
         */
        if (unlikely(scsi_device_busy(sdev) == 0 &&
                                !scsi_device_blocked(sdev)))
                blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);
        return -1;
}

static void scsi_mq_set_rq_budget_token(struct request *req, int token)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);

        cmd->budget_token = token;
}

static int scsi_mq_get_rq_budget_token(struct request *req)
{
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);

        return cmd->budget_token;
}

static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
{
        struct request *req = bd->rq;
        struct request_queue *q = req->q;
        struct scsi_device *sdev = q->queuedata;
        struct Scsi_Host *shost = sdev->host;
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
        blk_status_t ret;
        int reason;

        WARN_ON_ONCE(cmd->budget_token < 0);

        /*
         * If the device is not in running state we will reject some or all
         * commands.
         */
        if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
                ret = scsi_device_state_check(sdev, req);
                if (ret != BLK_STS_OK)
                        goto out_put_budget;
        }

        ret = BLK_STS_RESOURCE;
        if (!scsi_target_queue_ready(shost, sdev))
                goto out_put_budget;
        if (unlikely(scsi_host_in_recovery(shost))) {
                if (cmd->flags & SCMD_FAIL_IF_RECOVERING)
                        ret = BLK_STS_OFFLINE;
                goto out_dec_target_busy;
        }
        if (!scsi_host_queue_ready(q, shost, sdev, cmd))
                goto out_dec_target_busy;

        if (!(req->rq_flags & RQF_DONTPREP)) {
                ret = scsi_prepare_cmd(req);
                if (ret != BLK_STS_OK)
                        goto out_dec_host_busy;
                req->rq_flags |= RQF_DONTPREP;
        } else {
                clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
        }

        cmd->flags &= SCMD_PRESERVED_FLAGS;
        if (sdev->simple_tags)
                cmd->flags |= SCMD_TAGGED;
        if (bd->last)
                cmd->flags |= SCMD_LAST;

        scsi_set_resid(cmd, 0);
        memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
        cmd->submitter = SUBMITTED_BY_BLOCK_LAYER;

        blk_mq_start_request(req);
        reason = scsi_dispatch_cmd(cmd);
        if (reason) {
                scsi_set_blocked(cmd, reason);
                ret = BLK_STS_RESOURCE;
                goto out_dec_host_busy;
        }

        return BLK_STS_OK;

out_dec_host_busy:
        scsi_dec_host_busy(shost, cmd);
out_dec_target_busy:
        if (scsi_target(sdev)->can_queue > 0)
                atomic_dec(&scsi_target(sdev)->target_busy);
out_put_budget:
        scsi_mq_put_budget(q, cmd->budget_token);
        cmd->budget_token = -1;
        switch (ret) {
        case BLK_STS_OK:
                break;
        case BLK_STS_RESOURCE:
        case BLK_STS_ZONE_RESOURCE:
                if (scsi_device_blocked(sdev))
                        ret = BLK_STS_DEV_RESOURCE;
                break;
        case BLK_STS_AGAIN:
                cmd->result = DID_BUS_BUSY << 16;
                if (req->rq_flags & RQF_DONTPREP)
                        scsi_mq_uninit_cmd(cmd);
                break;
        default:
                if (unlikely(!scsi_device_online(sdev)))
                        cmd->result = DID_NO_CONNECT << 16;
                else
                        cmd->result = DID_ERROR << 16;
                /*
                 * Make sure to release all allocated resources when
                 * we hit an error, as we will never see this command
                 * again.
                 */
                if (req->rq_flags & RQF_DONTPREP)
                        scsi_mq_uninit_cmd(cmd);
                scsi_run_queue_async(sdev);
                break;
        }
        return ret;
}

static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
                                unsigned int hctx_idx, unsigned int numa_node)
{
        struct Scsi_Host *shost = set->driver_data;
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
        struct scatterlist *sg;
        int ret = 0;

        cmd->sense_buffer =
                kmem_cache_alloc_node(scsi_sense_cache, GFP_KERNEL, numa_node);
        if (!cmd->sense_buffer)
                return -ENOMEM;

        if (scsi_host_get_prot(shost)) {
                sg = (void *)cmd + sizeof(struct scsi_cmnd) +
                        shost->hostt->cmd_size;
                cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost);
        }

        if (shost->hostt->init_cmd_priv) {
                ret = shost->hostt->init_cmd_priv(shost, cmd);
                if (ret < 0)
                        kmem_cache_free(scsi_sense_cache, cmd->sense_buffer);
        }

        return ret;
}

static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq,
                                 unsigned int hctx_idx)
{
        struct Scsi_Host *shost = set->driver_data;
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

        if (shost->hostt->exit_cmd_priv)
                shost->hostt->exit_cmd_priv(shost, cmd);
        kmem_cache_free(scsi_sense_cache, cmd->sense_buffer);
}


static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
        struct Scsi_Host *shost = hctx->driver_data;

        if (shost->hostt->mq_poll)
                return shost->hostt->mq_poll(shost, hctx->queue_num);

        return 0;
}

static int scsi_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
                          unsigned int hctx_idx)
{
        struct Scsi_Host *shost = data;

        hctx->driver_data = shost;
        return 0;
}

static void scsi_map_queues(struct blk_mq_tag_set *set)
{
        struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set);

        if (shost->hostt->map_queues)
                return shost->hostt->map_queues(shost);
        blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
}

void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
        struct device *dev = shost->dma_dev;

        /*
         * this limit is imposed by hardware restrictions
         */
        blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
                                        SG_MAX_SEGMENTS));

        if (scsi_host_prot_dma(shost)) {
                shost->sg_prot_tablesize =
                        min_not_zero(shost->sg_prot_tablesize,
                                     (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
                BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
                blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
        }

        blk_queue_max_hw_sectors(q, shost->max_sectors);
        blk_queue_segment_boundary(q, shost->dma_boundary);
        dma_set_seg_boundary(dev, shost->dma_boundary);

        blk_queue_max_segment_size(q, shost->max_segment_size);
        blk_queue_virt_boundary(q, shost->virt_boundary_mask);
        dma_set_max_seg_size(dev, queue_max_segment_size(q));

        /*
         * Set a reasonable default alignment:  The larger of 32-byte (dword),
         * which is a common minimum for HBAs, and the minimum DMA alignment,
         * which is set by the platform.
         *
         * Devices that require a bigger alignment can increase it later.
         */
        blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
}
EXPORT_SYMBOL_GPL(__scsi_init_queue);

static const struct blk_mq_ops scsi_mq_ops_no_commit = {
        .get_budget        = scsi_mq_get_budget,
        .put_budget        = scsi_mq_put_budget,
        .queue_rq        = scsi_queue_rq,
        .complete        = scsi_complete,
        .timeout        = scsi_timeout,
#ifdef CONFIG_BLK_DEBUG_FS
        .show_rq        = scsi_show_rq,
#endif
        .init_request        = scsi_mq_init_request,
        .exit_request        = scsi_mq_exit_request,
        .cleanup_rq        = scsi_cleanup_rq,
        .busy                = scsi_mq_lld_busy,
        .map_queues        = scsi_map_queues,
        .init_hctx        = scsi_init_hctx,
        .poll                = scsi_mq_poll,
        .set_rq_budget_token = scsi_mq_set_rq_budget_token,
        .get_rq_budget_token = scsi_mq_get_rq_budget_token,
};


static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
        struct Scsi_Host *shost = hctx->driver_data;

        shost->hostt->commit_rqs(shost, hctx->queue_num);
}

static const struct blk_mq_ops scsi_mq_ops = {
        .get_budget        = scsi_mq_get_budget,
        .put_budget        = scsi_mq_put_budget,
        .queue_rq        = scsi_queue_rq,
        .commit_rqs        = scsi_commit_rqs,
        .complete        = scsi_complete,
        .timeout        = scsi_timeout,
#ifdef CONFIG_BLK_DEBUG_FS
        .show_rq        = scsi_show_rq,
#endif
        .init_request        = scsi_mq_init_request,
        .exit_request        = scsi_mq_exit_request,
        .cleanup_rq        = scsi_cleanup_rq,
        .busy                = scsi_mq_lld_busy,
        .map_queues        = scsi_map_queues,
        .init_hctx        = scsi_init_hctx,
        .poll                = scsi_mq_poll,
        .set_rq_budget_token = scsi_mq_set_rq_budget_token,
        .get_rq_budget_token = scsi_mq_get_rq_budget_token,
};

int scsi_mq_setup_tags(struct Scsi_Host *shost)
{
        unsigned int cmd_size, sgl_size;
        struct blk_mq_tag_set *tag_set = &shost->tag_set;

        sgl_size = max_t(unsigned int, sizeof(struct scatterlist),
                                scsi_mq_inline_sgl_size(shost));
        cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
        if (scsi_host_get_prot(shost))
                cmd_size += sizeof(struct scsi_data_buffer) +
                        sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT;

        memset(tag_set, 0, sizeof(*tag_set));
        if (shost->hostt->commit_rqs)
                tag_set->ops = &scsi_mq_ops;
        else
                tag_set->ops = &scsi_mq_ops_no_commit;
        tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1;
        tag_set->nr_maps = shost->nr_maps ? : 1;
        tag_set->queue_depth = shost->can_queue;
        tag_set->cmd_size = cmd_size;
        tag_set->numa_node = dev_to_node(shost->dma_dev);
        tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
        tag_set->flags |=
                BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
        if (shost->queuecommand_may_block)
                tag_set->flags |= BLK_MQ_F_BLOCKING;
        tag_set->driver_data = shost;
        if (shost->host_tagset)
                tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;

        return blk_mq_alloc_tag_set(tag_set);
}

void scsi_mq_free_tags(struct kref *kref)
{
        struct Scsi_Host *shost = container_of(kref, typeof(*shost),
                                               tagset_refcnt);

        blk_mq_free_tag_set(&shost->tag_set);
        complete(&shost->tagset_freed);
}

/**
 * scsi_device_from_queue - return sdev associated with a request_queue
 * @q: The request queue to return the sdev from
 *
 * Return the sdev associated with a request queue or NULL if the
 * request_queue does not reference a SCSI device.
 */
struct scsi_device *scsi_device_from_queue(struct request_queue *q)
{
        struct scsi_device *sdev = NULL;

        if (q->mq_ops == &scsi_mq_ops_no_commit ||
            q->mq_ops == &scsi_mq_ops)
                sdev = q->queuedata;
        if (!sdev || !get_device(&sdev->sdev_gendev))
                sdev = NULL;

        return sdev;
}
/*
 * pktcdvd should have been integrated into the SCSI layers, but for historical
 * reasons like the old IDE driver it isn't.  This export allows it to safely
 * probe if a given device is a SCSI one and only attach to that.
 */
#ifdef CONFIG_CDROM_PKTCDVD_MODULE
EXPORT_SYMBOL_GPL(scsi_device_from_queue);
#endif

/**
 * scsi_block_requests - Utility function used by low-level drivers to prevent
 * further commands from being queued to the device.
 * @shost:  host in question
 *
 * There is no timer nor any other means by which the requests get unblocked
 * other than the low-level driver calling scsi_unblock_requests().
 */
void scsi_block_requests(struct Scsi_Host *shost)
{
        shost->host_self_blocked = 1;
}
EXPORT_SYMBOL(scsi_block_requests);

/**
 * scsi_unblock_requests - Utility function used by low-level drivers to allow
 * further commands to be queued to the device.
 * @shost:  host in question
 *
 * There is no timer nor any other means by which the requests get unblocked
 * other than the low-level driver calling scsi_unblock_requests(). This is done
 * as an API function so that changes to the internals of the scsi mid-layer
 * won't require wholesale changes to drivers that use this feature.
 */
void scsi_unblock_requests(struct Scsi_Host *shost)
{
        shost->host_self_blocked = 0;
        scsi_run_host_queues(shost);
}
EXPORT_SYMBOL(scsi_unblock_requests);

void scsi_exit_queue(void)
{
        kmem_cache_destroy(scsi_sense_cache);
}

/**
 *        scsi_mode_select - issue a mode select
 *        @sdev:        SCSI device to be queried
 *        @pf:        Page format bit (1 == standard, 0 == vendor specific)
 *        @sp:        Save page bit (0 == don't save, 1 == save)
 *        @buffer: request buffer (may not be smaller than eight bytes)
 *        @len:        length of request buffer.
 *        @timeout: command timeout
 *        @retries: number of retries before failing
 *        @data: returns a structure abstracting the mode header data
 *        @sshdr: place to put sense data (or NULL if no sense to be collected).
 *                must be SCSI_SENSE_BUFFERSIZE big.
 *
 *        Returns zero if successful; negative error number or scsi
 *        status on error
 *
 */
int scsi_mode_select(struct scsi_device *sdev, int pf, int sp,
                     unsigned char *buffer, int len, int timeout, int retries,
                     struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
        unsigned char cmd[10];
        unsigned char *real_buffer;
        const struct scsi_exec_args exec_args = {
                .sshdr = sshdr,
        };
        int ret;

        memset(cmd, 0, sizeof(cmd));
        cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0);

        /*
         * Use MODE SELECT(10) if the device asked for it or if the mode page
         * and the mode select header cannot fit within the maximumm 255 bytes
         * of the MODE SELECT(6) command.
         */
        if (sdev->use_10_for_ms ||
            len + 4 > 255 ||
            data->block_descriptor_length > 255) {
                if (len > 65535 - 8)
                        return -EINVAL;
                real_buffer = kmalloc(8 + len, GFP_KERNEL);
                if (!real_buffer)
                        return -ENOMEM;
                memcpy(real_buffer + 8, buffer, len);
                len += 8;
                real_buffer[0] = 0;
                real_buffer[1] = 0;
                real_buffer[2] = data->medium_type;
                real_buffer[3] = data->device_specific;
                real_buffer[4] = data->longlba ? 0x01 : 0;
                real_buffer[5] = 0;
                put_unaligned_be16(data->block_descriptor_length,
                                   &real_buffer[6]);

                cmd[0] = MODE_SELECT_10;
                put_unaligned_be16(len, &cmd[7]);
        } else {
                if (data->longlba)
                        return -EINVAL;

                real_buffer = kmalloc(4 + len, GFP_KERNEL);
                if (!real_buffer)
                        return -ENOMEM;
                memcpy(real_buffer + 4, buffer, len);
                len += 4;
                real_buffer[0] = 0;
                real_buffer[1] = data->medium_type;
                real_buffer[2] = data->device_specific;
                real_buffer[3] = data->block_descriptor_length;

                cmd[0] = MODE_SELECT;
                cmd[4] = len;
        }

        ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, real_buffer, len,
                               timeout, retries, &exec_args);
        kfree(real_buffer);
        return ret;
}
EXPORT_SYMBOL_GPL(scsi_mode_select);

/**
 *        scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary.
 *        @sdev:        SCSI device to be queried
 *        @dbd:        set to prevent mode sense from returning block descriptors
 *        @modepage: mode page being requested
 *        @subpage: sub-page of the mode page being requested
 *        @buffer: request buffer (may not be smaller than eight bytes)
 *        @len:        length of request buffer.
 *        @timeout: command timeout
 *        @retries: number of retries before failing
 *        @data: returns a structure abstracting the mode header data
 *        @sshdr: place to put sense data (or NULL if no sense to be collected).
 *                must be SCSI_SENSE_BUFFERSIZE big.
 *
 *        Returns zero if successful, or a negative error number on failure
 */
int
scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage,
                  unsigned char *buffer, int len, int timeout, int retries,
                  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
        unsigned char cmd[12];
        int use_10_for_ms;
        int header_length;
        int result;
        struct scsi_sense_hdr my_sshdr;
        struct scsi_failure failure_defs[] = {
                {
                        .sense = UNIT_ATTENTION,
                        .asc = SCMD_FAILURE_ASC_ANY,
                        .ascq = SCMD_FAILURE_ASCQ_ANY,
                        .allowed = retries,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                {}
        };
        struct scsi_failures failures = {
                .failure_definitions = failure_defs,
        };
        const struct scsi_exec_args exec_args = {
                /* caller might not be interested in sense, but we need it */
                .sshdr = sshdr ? : &my_sshdr,
                .failures = &failures,
        };

        memset(data, 0, sizeof(*data));
        memset(&cmd[0], 0, 12);

        dbd = sdev->set_dbd_for_ms ? 8 : dbd;
        cmd[1] = dbd & 0x18;        /* allows DBD and LLBA bits */
        cmd[2] = modepage;
        cmd[3] = subpage;

        sshdr = exec_args.sshdr;

 retry:
        use_10_for_ms = sdev->use_10_for_ms || len > 255;

        if (use_10_for_ms) {
                if (len < 8 || len > 65535)
                        return -EINVAL;

                cmd[0] = MODE_SENSE_10;
                put_unaligned_be16(len, &cmd[7]);
                header_length = 8;
        } else {
                if (len < 4)
                        return -EINVAL;

                cmd[0] = MODE_SENSE;
                cmd[4] = len;
                header_length = 4;
        }

        memset(buffer, 0, len);

        result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buffer, len,
                                  timeout, retries, &exec_args);
        if (result < 0)
                return result;

        /* This code looks awful: what it's doing is making sure an
         * ILLEGAL REQUEST sense return identifies the actual command
         * byte as the problem.  MODE_SENSE commands can return
         * ILLEGAL REQUEST if the code page isn't supported */

        if (!scsi_status_is_good(result)) {
                if (scsi_sense_valid(sshdr)) {
                        if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
                            (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
                                /*
                                 * Invalid command operation code: retry using
                                 * MODE SENSE(6) if this was a MODE SENSE(10)
                                 * request, except if the request mode page is
                                 * too large for MODE SENSE single byte
                                 * allocation length field.
                                 */
                                if (use_10_for_ms) {
                                        if (len > 255)
                                                return -EIO;
                                        sdev->use_10_for_ms = 0;
                                        goto retry;
                                }
                        }
                }
                return -EIO;
        }
        if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b &&
                     (modepage == 6 || modepage == 8))) {
                /* Initio breakage? */
                header_length = 0;
                data->length = 13;
                data->medium_type = 0;
                data->device_specific = 0;
                data->longlba = 0;
                data->block_descriptor_length = 0;
        } else if (use_10_for_ms) {
                data->length = get_unaligned_be16(&buffer[0]) + 2;
                data->medium_type = buffer[2];
                data->device_specific = buffer[3];
                data->longlba = buffer[4] & 0x01;
                data->block_descriptor_length = get_unaligned_be16(&buffer[6]);
        } else {
                data->length = buffer[0] + 1;
                data->medium_type = buffer[1];
                data->device_specific = buffer[2];
                data->block_descriptor_length = buffer[3];
        }
        data->header_length = header_length;

        return 0;
}
EXPORT_SYMBOL(scsi_mode_sense);

/**
 *        scsi_test_unit_ready - test if unit is ready
 *        @sdev:        scsi device to change the state of.
 *        @timeout: command timeout
 *        @retries: number of retries before failing
 *        @sshdr: outpout pointer for decoded sense information.
 *
 *        Returns zero if unsuccessful or an error if TUR failed.  For
 *        removable media, UNIT_ATTENTION sets ->changed flag.
 **/
int
scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
                     struct scsi_sense_hdr *sshdr)
{
        char cmd[] = {
                TEST_UNIT_READY, 0, 0, 0, 0, 0,
        };
        const struct scsi_exec_args exec_args = {
                .sshdr = sshdr,
        };
        int result;

        /* try to eat the UNIT_ATTENTION if there are enough retries */
        do {
                result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, NULL, 0,
                                          timeout, 1, &exec_args);
                if (sdev->removable && result > 0 && scsi_sense_valid(sshdr) &&
                    sshdr->sense_key == UNIT_ATTENTION)
                        sdev->changed = 1;
        } while (result > 0 && scsi_sense_valid(sshdr) &&
                 sshdr->sense_key == UNIT_ATTENTION && --retries);

        return result;
}
EXPORT_SYMBOL(scsi_test_unit_ready);

/**
 *        scsi_device_set_state - Take the given device through the device state model.
 *        @sdev:        scsi device to change the state of.
 *        @state:        state to change to.
 *
 *        Returns zero if successful or an error if the requested
 *        transition is illegal.
 */
int
scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
{
        enum scsi_device_state oldstate = sdev->sdev_state;

        if (state == oldstate)
                return 0;

        switch (state) {
        case SDEV_CREATED:
                switch (oldstate) {
                case SDEV_CREATED_BLOCK:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_RUNNING:
                switch (oldstate) {
                case SDEV_CREATED:
                case SDEV_OFFLINE:
                case SDEV_TRANSPORT_OFFLINE:
                case SDEV_QUIESCE:
                case SDEV_BLOCK:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_QUIESCE:
                switch (oldstate) {
                case SDEV_RUNNING:
                case SDEV_OFFLINE:
                case SDEV_TRANSPORT_OFFLINE:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_OFFLINE:
        case SDEV_TRANSPORT_OFFLINE:
                switch (oldstate) {
                case SDEV_CREATED:
                case SDEV_RUNNING:
                case SDEV_QUIESCE:
                case SDEV_BLOCK:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_BLOCK:
                switch (oldstate) {
                case SDEV_RUNNING:
                case SDEV_CREATED_BLOCK:
                case SDEV_QUIESCE:
                case SDEV_OFFLINE:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_CREATED_BLOCK:
                switch (oldstate) {
                case SDEV_CREATED:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_CANCEL:
                switch (oldstate) {
                case SDEV_CREATED:
                case SDEV_RUNNING:
                case SDEV_QUIESCE:
                case SDEV_OFFLINE:
                case SDEV_TRANSPORT_OFFLINE:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SDEV_DEL:
                switch (oldstate) {
                case SDEV_CREATED:
                case SDEV_RUNNING:
                case SDEV_OFFLINE:
                case SDEV_TRANSPORT_OFFLINE:
                case SDEV_CANCEL:
                case SDEV_BLOCK:
                case SDEV_CREATED_BLOCK:
                        break;
                default:
                        goto illegal;
                }
                break;

        }
        sdev->offline_already = false;
        sdev->sdev_state = state;
        return 0;

 illegal:
        SCSI_LOG_ERROR_RECOVERY(1,
                                sdev_printk(KERN_ERR, sdev,
                                            "Illegal state transition %s->%s",
                                            scsi_device_state_name(oldstate),
                                            scsi_device_state_name(state))
                                );
        return -EINVAL;
}
EXPORT_SYMBOL(scsi_device_set_state);

/**
 *        scsi_evt_emit - emit a single SCSI device uevent
 *        @sdev: associated SCSI device
 *        @evt: event to emit
 *
 *        Send a single uevent (scsi_event) to the associated scsi_device.
 */
static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
{
        int idx = 0;
        char *envp[3];

        switch (evt->evt_type) {
        case SDEV_EVT_MEDIA_CHANGE:
                envp[idx++] = "SDEV_MEDIA_CHANGE=1";
                break;
        case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
                scsi_rescan_device(sdev);
                envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
                break;
        case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
                envp[idx++] = "SDEV_UA=CAPACITY_DATA_HAS_CHANGED";
                break;
        case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
               envp[idx++] = "SDEV_UA=THIN_PROVISIONING_SOFT_THRESHOLD_REACHED";
                break;
        case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
                envp[idx++] = "SDEV_UA=MODE_PARAMETERS_CHANGED";
                break;
        case SDEV_EVT_LUN_CHANGE_REPORTED:
                envp[idx++] = "SDEV_UA=REPORTED_LUNS_DATA_HAS_CHANGED";
                break;
        case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
                envp[idx++] = "SDEV_UA=ASYMMETRIC_ACCESS_STATE_CHANGED";
                break;
        case SDEV_EVT_POWER_ON_RESET_OCCURRED:
                envp[idx++] = "SDEV_UA=POWER_ON_RESET_OCCURRED";
                break;
        default:
                /* do nothing */
                break;
        }

        envp[idx++] = NULL;

        kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp);
}

/**
 *        scsi_evt_thread - send a uevent for each scsi event
 *        @work: work struct for scsi_device
 *
 *        Dispatch queued events to their associated scsi_device kobjects
 *        as uevents.
 */
void scsi_evt_thread(struct work_struct *work)
{
        struct scsi_device *sdev;
        enum scsi_device_event evt_type;
        LIST_HEAD(event_list);

        sdev = container_of(work, struct scsi_device, event_work);

        for (evt_type = SDEV_EVT_FIRST; evt_type <= SDEV_EVT_LAST; evt_type++)
                if (test_and_clear_bit(evt_type, sdev->pending_events))
                        sdev_evt_send_simple(sdev, evt_type, GFP_KERNEL);

        while (1) {
                struct scsi_event *evt;
                struct list_head *this, *tmp;
                unsigned long flags;

                spin_lock_irqsave(&sdev->list_lock, flags);
                list_splice_init(&sdev->event_list, &event_list);
                spin_unlock_irqrestore(&sdev->list_lock, flags);

                if (list_empty(&event_list))
                        break;

                list_for_each_safe(this, tmp, &event_list) {
                        evt = list_entry(this, struct scsi_event, node);
                        list_del(&evt->node);
                        scsi_evt_emit(sdev, evt);
                        kfree(evt);
                }
        }
}

/**
 *         sdev_evt_send - send asserted event to uevent thread
 *        @sdev: scsi_device event occurred on
 *        @evt: event to send
 *
 *        Assert scsi device event asynchronously.
 */
void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt)
{
        unsigned long flags;

#if 0
        /* FIXME: currently this check eliminates all media change events
         * for polled devices.  Need to update to discriminate between AN
         * and polled events */
        if (!test_bit(evt->evt_type, sdev->supported_events)) {
                kfree(evt);
                return;
        }
#endif

        spin_lock_irqsave(&sdev->list_lock, flags);
        list_add_tail(&evt->node, &sdev->event_list);
        schedule_work(&sdev->event_work);
        spin_unlock_irqrestore(&sdev->list_lock, flags);
}
EXPORT_SYMBOL_GPL(sdev_evt_send);

/**
 *         sdev_evt_alloc - allocate a new scsi event
 *        @evt_type: type of event to allocate
 *        @gfpflags: GFP flags for allocation
 *
 *        Allocates and returns a new scsi_event.
 */
struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
                                  gfp_t gfpflags)
{
        struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags);
        if (!evt)
                return NULL;

        evt->evt_type = evt_type;
        INIT_LIST_HEAD(&evt->node);

        /* evt_type-specific initialization, if any */
        switch (evt_type) {
        case SDEV_EVT_MEDIA_CHANGE:
        case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
        case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
        case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
        case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
        case SDEV_EVT_LUN_CHANGE_REPORTED:
        case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
        case SDEV_EVT_POWER_ON_RESET_OCCURRED:
        default:
                /* do nothing */
                break;
        }

        return evt;
}
EXPORT_SYMBOL_GPL(sdev_evt_alloc);

/**
 *         sdev_evt_send_simple - send asserted event to uevent thread
 *        @sdev: scsi_device event occurred on
 *        @evt_type: type of event to send
 *        @gfpflags: GFP flags for allocation
 *
 *        Assert scsi device event asynchronously, given an event type.
 */
void sdev_evt_send_simple(struct scsi_device *sdev,
                          enum scsi_device_event evt_type, gfp_t gfpflags)
{
        struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags);
        if (!evt) {
                sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n",
                            evt_type);
                return;
        }

        sdev_evt_send(sdev, evt);
}
EXPORT_SYMBOL_GPL(sdev_evt_send_simple);

/**
 *        scsi_device_quiesce - Block all commands except power management.
 *        @sdev:        scsi device to quiesce.
 *
 *        This works by trying to transition to the SDEV_QUIESCE state
 *        (which must be a legal transition).  When the device is in this
 *        state, only power management requests will be accepted, all others will
 *        be deferred.
 *
 *        Must be called with user context, may sleep.
 *
 *        Returns zero if unsuccessful or an error if not.
 */
int
scsi_device_quiesce(struct scsi_device *sdev)
{
        struct request_queue *q = sdev->request_queue;
        int err;

        /*
         * It is allowed to call scsi_device_quiesce() multiple times from
         * the same context but concurrent scsi_device_quiesce() calls are
         * not allowed.
         */
        WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);

        if (sdev->quiesced_by == current)
                return 0;

        blk_set_pm_only(q);

        blk_mq_freeze_queue(q);
        /*
         * Ensure that the effect of blk_set_pm_only() will be visible
         * for percpu_ref_tryget() callers that occur after the queue
         * unfreeze even if the queue was already frozen before this function
         * was called. See also https://lwn.net/Articles/573497/.
         */
        synchronize_rcu();
        blk_mq_unfreeze_queue(q);

        mutex_lock(&sdev->state_mutex);
        err = scsi_device_set_state(sdev, SDEV_QUIESCE);
        if (err == 0)
                sdev->quiesced_by = current;
        else
                blk_clear_pm_only(q);
        mutex_unlock(&sdev->state_mutex);

        return err;
}
EXPORT_SYMBOL(scsi_device_quiesce);

/**
 *        scsi_device_resume - Restart user issued commands to a quiesced device.
 *        @sdev:        scsi device to resume.
 *
 *        Moves the device from quiesced back to running and restarts the
 *        queues.
 *
 *        Must be called with user context, may sleep.
 */
void scsi_device_resume(struct scsi_device *sdev)
{
        /* check if the device state was mutated prior to resume, and if
         * so assume the state is being managed elsewhere (for example
         * device deleted during suspend)
         */
        mutex_lock(&sdev->state_mutex);
        if (sdev->sdev_state == SDEV_QUIESCE)
                scsi_device_set_state(sdev, SDEV_RUNNING);
        if (sdev->quiesced_by) {
                sdev->quiesced_by = NULL;
                blk_clear_pm_only(sdev->request_queue);
        }
        mutex_unlock(&sdev->state_mutex);
}
EXPORT_SYMBOL(scsi_device_resume);

static void
device_quiesce_fn(struct scsi_device *sdev, void *data)
{
        scsi_device_quiesce(sdev);
}

void
scsi_target_quiesce(struct scsi_target *starget)
{
        starget_for_each_device(starget, NULL, device_quiesce_fn);
}
EXPORT_SYMBOL(scsi_target_quiesce);

static void
device_resume_fn(struct scsi_device *sdev, void *data)
{
        scsi_device_resume(sdev);
}

void
scsi_target_resume(struct scsi_target *starget)
{
        starget_for_each_device(starget, NULL, device_resume_fn);
}
EXPORT_SYMBOL(scsi_target_resume);

static int __scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
        if (scsi_device_set_state(sdev, SDEV_BLOCK))
                return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);

        return 0;
}

void scsi_start_queue(struct scsi_device *sdev)
{
        if (cmpxchg(&sdev->queue_stopped, 1, 0))
                blk_mq_unquiesce_queue(sdev->request_queue);
}

static void scsi_stop_queue(struct scsi_device *sdev)
{
        /*
         * The atomic variable of ->queue_stopped covers that
         * blk_mq_quiesce_queue* is balanced with blk_mq_unquiesce_queue.
         *
         * The caller needs to wait until quiesce is done.
         */
        if (!cmpxchg(&sdev->queue_stopped, 0, 1))
                blk_mq_quiesce_queue_nowait(sdev->request_queue);
}

/**
 * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
 * @sdev: device to block
 *
 * Pause SCSI command processing on the specified device. Does not sleep.
 *
 * Returns zero if successful or a negative error code upon failure.
 *
 * Notes:
 * This routine transitions the device to the SDEV_BLOCK state (which must be
 * a legal transition). When the device is in this state, command processing
 * is paused until the device leaves the SDEV_BLOCK state. See also
 * scsi_internal_device_unblock_nowait().
 */
int scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
        int ret = __scsi_internal_device_block_nowait(sdev);

        /*
         * The device has transitioned to SDEV_BLOCK.  Stop the
         * block layer from calling the midlayer with this device's
         * request queue.
         */
        if (!ret)
                scsi_stop_queue(sdev);
        return ret;
}
EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);

/**
 * scsi_device_block - try to transition to the SDEV_BLOCK state
 * @sdev: device to block
 * @data: dummy argument, ignored
 *
 * Pause SCSI command processing on the specified device. Callers must wait
 * until all ongoing scsi_queue_rq() calls have finished after this function
 * returns.
 *
 * Note:
 * This routine transitions the device to the SDEV_BLOCK state (which must be
 * a legal transition). When the device is in this state, command processing
 * is paused until the device leaves the SDEV_BLOCK state. See also
 * scsi_internal_device_unblock().
 */
static void scsi_device_block(struct scsi_device *sdev, void *data)
{
        int err;
        enum scsi_device_state state;

        mutex_lock(&sdev->state_mutex);
        err = __scsi_internal_device_block_nowait(sdev);
        state = sdev->sdev_state;
        if (err == 0)
                /*
                 * scsi_stop_queue() must be called with the state_mutex
                 * held. Otherwise a simultaneous scsi_start_queue() call
                 * might unquiesce the queue before we quiesce it.
                 */
                scsi_stop_queue(sdev);

        mutex_unlock(&sdev->state_mutex);

        WARN_ONCE(err, "%s: failed to block %s in state %d\n",
                  __func__, dev_name(&sdev->sdev_gendev), state);
}

/**
 * scsi_internal_device_unblock_nowait - resume a device after a block request
 * @sdev:        device to resume
 * @new_state:        state to set the device to after unblocking
 *
 * Restart the device queue for a previously suspended SCSI device. Does not
 * sleep.
 *
 * Returns zero if successful or a negative error code upon failure.
 *
 * Notes:
 * This routine transitions the device to the SDEV_RUNNING state or to one of
 * the offline states (which must be a legal transition) allowing the midlayer
 * to goose the queue for this device.
 */
int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
                                        enum scsi_device_state new_state)
{
        switch (new_state) {
        case SDEV_RUNNING:
        case SDEV_TRANSPORT_OFFLINE:
                break;
        default:
                return -EINVAL;
        }

        /*
         * Try to transition the scsi device to SDEV_RUNNING or one of the
         * offlined states and goose the device queue if successful.
         */
        switch (sdev->sdev_state) {
        case SDEV_BLOCK:
        case SDEV_TRANSPORT_OFFLINE:
                sdev->sdev_state = new_state;
                break;
        case SDEV_CREATED_BLOCK:
                if (new_state == SDEV_TRANSPORT_OFFLINE ||
                    new_state == SDEV_OFFLINE)
                        sdev->sdev_state = new_state;
                else
                        sdev->sdev_state = SDEV_CREATED;
                break;
        case SDEV_CANCEL:
        case SDEV_OFFLINE:
                break;
        default:
                return -EINVAL;
        }
        scsi_start_queue(sdev);

        return 0;
}
EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);

/**
 * scsi_internal_device_unblock - resume a device after a block request
 * @sdev:        device to resume
 * @new_state:        state to set the device to after unblocking
 *
 * Restart the device queue for a previously suspended SCSI device. May sleep.
 *
 * Returns zero if successful or a negative error code upon failure.
 *
 * Notes:
 * This routine transitions the device to the SDEV_RUNNING state or to one of
 * the offline states (which must be a legal transition) allowing the midlayer
 * to goose the queue for this device.
 */
static int scsi_internal_device_unblock(struct scsi_device *sdev,
                                        enum scsi_device_state new_state)
{
        int ret;

        mutex_lock(&sdev->state_mutex);
        ret = scsi_internal_device_unblock_nowait(sdev, new_state);
        mutex_unlock(&sdev->state_mutex);

        return ret;
}

static int
target_block(struct device *dev, void *data)
{
        if (scsi_is_target_device(dev))
                starget_for_each_device(to_scsi_target(dev), NULL,
                                        scsi_device_block);
        return 0;
}

/**
 * scsi_block_targets - transition all SCSI child devices to SDEV_BLOCK state
 * @dev: a parent device of one or more scsi_target devices
 * @shost: the Scsi_Host to which this device belongs
 *
 * Iterate over all children of @dev, which should be scsi_target devices,
 * and switch all subordinate scsi devices to SDEV_BLOCK state. Wait for
 * ongoing scsi_queue_rq() calls to finish. May sleep.
 *
 * Note:
 * @dev must not itself be a scsi_target device.
 */
void
scsi_block_targets(struct Scsi_Host *shost, struct device *dev)
{
        WARN_ON_ONCE(scsi_is_target_device(dev));
        device_for_each_child(dev, NULL, target_block);
        blk_mq_wait_quiesce_done(&shost->tag_set);
}
EXPORT_SYMBOL_GPL(scsi_block_targets);

static void
device_unblock(struct scsi_device *sdev, void *data)
{
        scsi_internal_device_unblock(sdev, *(enum scsi_device_state *)data);
}

static int
target_unblock(struct device *dev, void *data)
{
        if (scsi_is_target_device(dev))
                starget_for_each_device(to_scsi_target(dev), data,
                                        device_unblock);
        return 0;
}

void
scsi_target_unblock(struct device *dev, enum scsi_device_state new_state)
{
        if (scsi_is_target_device(dev))
                starget_for_each_device(to_scsi_target(dev), &new_state,
                                        device_unblock);
        else
                device_for_each_child(dev, &new_state, target_unblock);
}
EXPORT_SYMBOL_GPL(scsi_target_unblock);

/**
 * scsi_host_block - Try to transition all logical units to the SDEV_BLOCK state
 * @shost: device to block
 *
 * Pause SCSI command processing for all logical units associated with the SCSI
 * host and wait until pending scsi_queue_rq() calls have finished.
 *
 * Returns zero if successful or a negative error code upon failure.
 */
int
scsi_host_block(struct Scsi_Host *shost)
{
        struct scsi_device *sdev;
        int ret;

        /*
         * Call scsi_internal_device_block_nowait so we can avoid
         * calling synchronize_rcu() for each LUN.
         */
        shost_for_each_device(sdev, shost) {
                mutex_lock(&sdev->state_mutex);
                ret = scsi_internal_device_block_nowait(sdev);
                mutex_unlock(&sdev->state_mutex);
                if (ret) {
                        scsi_device_put(sdev);
                        return ret;
                }
        }

        /* Wait for ongoing scsi_queue_rq() calls to finish. */
        blk_mq_wait_quiesce_done(&shost->tag_set);

        return 0;
}
EXPORT_SYMBOL_GPL(scsi_host_block);

int
scsi_host_unblock(struct Scsi_Host *shost, int new_state)
{
        struct scsi_device *sdev;
        int ret = 0;

        shost_for_each_device(sdev, shost) {
                ret = scsi_internal_device_unblock(sdev, new_state);
                if (ret) {
                        scsi_device_put(sdev);
                        break;
                }
        }
        return ret;
}
EXPORT_SYMBOL_GPL(scsi_host_unblock);

/**
 * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt
 * @sgl:        scatter-gather list
 * @sg_count:        number of segments in sg
 * @offset:        offset in bytes into sg, on return offset into the mapped area
 * @len:        bytes to map, on return number of bytes mapped
 *
 * Returns virtual address of the start of the mapped page
 */
void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
                          size_t *offset, size_t *len)
{
        int i;
        size_t sg_len = 0, len_complete = 0;
        struct scatterlist *sg;
        struct page *page;

        WARN_ON(!irqs_disabled());

        for_each_sg(sgl, sg, sg_count, i) {
                len_complete = sg_len; /* Complete sg-entries */
                sg_len += sg->length;
                if (sg_len > *offset)
                        break;
        }

        if (unlikely(i == sg_count)) {
                printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, "
                        "elements %d\n",
                       __func__, sg_len, *offset, sg_count);
                WARN_ON(1);
                return NULL;
        }

        /* Offset starting from the beginning of first page in this sg-entry */
        *offset = *offset - len_complete + sg->offset;

        /* Assumption: contiguous pages can be accessed as "page + i" */
        page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
        *offset &= ~PAGE_MASK;

        /* Bytes in this sg-entry from *offset to the end of the page */
        sg_len = PAGE_SIZE - *offset;
        if (*len > sg_len)
                *len = sg_len;

        return kmap_atomic(page);
}
EXPORT_SYMBOL(scsi_kmap_atomic_sg);

/**
 * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg
 * @virt:        virtual address to be unmapped
 */
void scsi_kunmap_atomic_sg(void *virt)
{
        kunmap_atomic(virt);
}
EXPORT_SYMBOL(scsi_kunmap_atomic_sg);

void sdev_disable_disk_events(struct scsi_device *sdev)
{
        atomic_inc(&sdev->disk_events_disable_depth);
}
EXPORT_SYMBOL(sdev_disable_disk_events);

void sdev_enable_disk_events(struct scsi_device *sdev)
{
        if (WARN_ON_ONCE(atomic_read(&sdev->disk_events_disable_depth) <= 0))
                return;
        atomic_dec(&sdev->disk_events_disable_depth);
}
EXPORT_SYMBOL(sdev_enable_disk_events);

static unsigned char designator_prio(const unsigned char *d)
{
        if (d[1] & 0x30)
                /* not associated with LUN */
                return 0;

        if (d[3] == 0)
                /* invalid length */
                return 0;

        /*
         * Order of preference for lun descriptor:
         * - SCSI name string
         * - NAA IEEE Registered Extended
         * - EUI-64 based 16-byte
         * - EUI-64 based 12-byte
         * - NAA IEEE Registered
         * - NAA IEEE Extended
         * - EUI-64 based 8-byte
         * - SCSI name string (truncated)
         * - T10 Vendor ID
         * as longer descriptors reduce the likelyhood
         * of identification clashes.
         */

        switch (d[1] & 0xf) {
        case 8:
                /* SCSI name string, variable-length UTF-8 */
                return 9;
        case 3:
                switch (d[4] >> 4) {
                case 6:
                        /* NAA registered extended */
                        return 8;
                case 5:
                        /* NAA registered */
                        return 5;
                case 4:
                        /* NAA extended */
                        return 4;
                case 3:
                        /* NAA locally assigned */
                        return 1;
                default:
                        break;
                }
                break;
        case 2:
                switch (d[3]) {
                case 16:
                        /* EUI64-based, 16 byte */
                        return 7;
                case 12:
                        /* EUI64-based, 12 byte */
                        return 6;
                case 8:
                        /* EUI64-based, 8 byte */
                        return 3;
                default:
                        break;
                }
                break;
        case 1:
                /* T10 vendor ID */
                return 1;
        default:
                break;
        }

        return 0;
}

/**
 * scsi_vpd_lun_id - return a unique device identification
 * @sdev: SCSI device
 * @id:   buffer for the identification
 * @id_len:  length of the buffer
 *
 * Copies a unique device identification into @id based
 * on the information in the VPD page 0x83 of the device.
 * The string will be formatted as a SCSI name string.
 *
 * Returns the length of the identification or error on failure.
 * If the identifier is longer than the supplied buffer the actual
 * identifier length is returned and the buffer is not zero-padded.
 */
int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len)
{
        u8 cur_id_prio = 0;
        u8 cur_id_size = 0;
        const unsigned char *d, *cur_id_str;
        const struct scsi_vpd *vpd_pg83;
        int id_size = -EINVAL;

        rcu_read_lock();
        vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
        if (!vpd_pg83) {
                rcu_read_unlock();
                return -ENXIO;
        }

        /* The id string must be at least 20 bytes + terminating NULL byte */
        if (id_len < 21) {
                rcu_read_unlock();
                return -EINVAL;
        }

        memset(id, 0, id_len);
        for (d = vpd_pg83->data + 4;
             d < vpd_pg83->data + vpd_pg83->len;
             d += d[3] + 4) {
                u8 prio = designator_prio(d);

                if (prio == 0 || cur_id_prio > prio)
                        continue;

                switch (d[1] & 0xf) {
                case 0x1:
                        /* T10 Vendor ID */
                        if (cur_id_size > d[3])
                                break;
                        cur_id_prio = prio;
                        cur_id_size = d[3];
                        if (cur_id_size + 4 > id_len)
                                cur_id_size = id_len - 4;
                        cur_id_str = d + 4;
                        id_size = snprintf(id, id_len, "t10.%*pE",
                                           cur_id_size, cur_id_str);
                        break;
                case 0x2:
                        /* EUI-64 */
                        cur_id_prio = prio;
                        cur_id_size = d[3];
                        cur_id_str = d + 4;
                        switch (cur_id_size) {
                        case 8:
                                id_size = snprintf(id, id_len,
                                                   "eui.%8phN",
                                                   cur_id_str);
                                break;
                        case 12:
                                id_size = snprintf(id, id_len,
                                                   "eui.%12phN",
                                                   cur_id_str);
                                break;
                        case 16:
                                id_size = snprintf(id, id_len,
                                                   "eui.%16phN",
                                                   cur_id_str);
                                break;
                        default:
                                break;
                        }
                        break;
                case 0x3:
                        /* NAA */
                        cur_id_prio = prio;
                        cur_id_size = d[3];
                        cur_id_str = d + 4;
                        switch (cur_id_size) {
                        case 8:
                                id_size = snprintf(id, id_len,
                                                   "naa.%8phN",
                                                   cur_id_str);
                                break;
                        case 16:
                                id_size = snprintf(id, id_len,
                                                   "naa.%16phN",
                                                   cur_id_str);
                                break;
                        default:
                                break;
                        }
                        break;
                case 0x8:
                        /* SCSI name string */
                        if (cur_id_size > d[3])
                                break;
                        /* Prefer others for truncated descriptor */
                        if (d[3] > id_len) {
                                prio = 2;
                                if (cur_id_prio > prio)
                                        break;
                        }
                        cur_id_prio = prio;
                        cur_id_size = id_size = d[3];
                        cur_id_str = d + 4;
                        if (cur_id_size >= id_len)
                                cur_id_size = id_len - 1;
                        memcpy(id, cur_id_str, cur_id_size);
                        break;
                default:
                        break;
                }
        }
        rcu_read_unlock();

        return id_size;
}
EXPORT_SYMBOL(scsi_vpd_lun_id);

/*
 * scsi_vpd_tpg_id - return a target port group identifier
 * @sdev: SCSI device
 *
 * Returns the Target Port Group identifier from the information
 * froom VPD page 0x83 of the device.
 *
 * Returns the identifier or error on failure.
 */
int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id)
{
        const unsigned char *d;
        const struct scsi_vpd *vpd_pg83;
        int group_id = -EAGAIN, rel_port = -1;

        rcu_read_lock();
        vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
        if (!vpd_pg83) {
                rcu_read_unlock();
                return -ENXIO;
        }

        d = vpd_pg83->data + 4;
        while (d < vpd_pg83->data + vpd_pg83->len) {
                switch (d[1] & 0xf) {
                case 0x4:
                        /* Relative target port */
                        rel_port = get_unaligned_be16(&d[6]);
                        break;
                case 0x5:
                        /* Target port group */
                        group_id = get_unaligned_be16(&d[6]);
                        break;
                default:
                        break;
                }
                d += d[3] + 4;
        }
        rcu_read_unlock();

        if (group_id >= 0 && rel_id && rel_port != -1)
                *rel_id = rel_port;

        return group_id;
}
EXPORT_SYMBOL(scsi_vpd_tpg_id);

/**
 * scsi_build_sense - build sense data for a command
 * @scmd:        scsi command for which the sense should be formatted
 * @desc:        Sense format (non-zero == descriptor format,
 *              0 == fixed format)
 * @key:        Sense key
 * @asc:        Additional sense code
 * @ascq:        Additional sense code qualifier
 *
 **/
void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq)
{
        scsi_build_sense_buffer(desc, scmd->sense_buffer, key, asc, ascq);
        scmd->result = SAM_STAT_CHECK_CONDITION;
}
EXPORT_SYMBOL_GPL(scsi_build_sense);

#ifdef CONFIG_SCSI_LIB_KUNIT_TEST
#include "scsi_lib_test.c"
#endif





























































































































































































    5 


    2 




    5 

    5 

    5 
    5 
    5 





    5 



    5 
    5 







   14 







   14 

   14 

   14 




   14 


















   14 



   14 
   14 



   14 


   14 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
// SPDX-License-Identifier: GPL-2.0-or-later

#define pr_fmt(fmt) "ref_tracker: " fmt

#include <linux/export.h>
#include <linux/list_sort.h>
#include <linux/ref_tracker.h>
#include <linux/slab.h>
#include <linux/stacktrace.h>
#include <linux/stackdepot.h>

#define REF_TRACKER_STACK_ENTRIES 16
#define STACK_BUF_SIZE 1024

struct ref_tracker {
        struct list_head        head;   /* anchor into dir->list or dir->quarantine */
        bool                        dead;
        depot_stack_handle_t        alloc_stack_handle;
        depot_stack_handle_t        free_stack_handle;
};

struct ref_tracker_dir_stats {
        int total;
        int count;
        struct {
                depot_stack_handle_t stack_handle;
                unsigned int count;
        } stacks[];
};

static struct ref_tracker_dir_stats *
ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
{
        struct ref_tracker_dir_stats *stats;
        struct ref_tracker *tracker;

        stats = kmalloc(struct_size(stats, stacks, limit),
                        GFP_NOWAIT | __GFP_NOWARN);
        if (!stats)
                return ERR_PTR(-ENOMEM);
        stats->total = 0;
        stats->count = 0;

        list_for_each_entry(tracker, &dir->list, head) {
                depot_stack_handle_t stack = tracker->alloc_stack_handle;
                int i;

                ++stats->total;
                for (i = 0; i < stats->count; ++i)
                        if (stats->stacks[i].stack_handle == stack)
                                break;
                if (i >= limit)
                        continue;
                if (i >= stats->count) {
                        stats->stacks[i].stack_handle = stack;
                        stats->stacks[i].count = 0;
                        ++stats->count;
                }
                ++stats->stacks[i].count;
        }

        return stats;
}

struct ostream {
        char *buf;
        int size, used;
};

#define pr_ostream(stream, fmt, args...) \
({ \
        struct ostream *_s = (stream); \
\
        if (!_s->buf) { \
                pr_err(fmt, ##args); \
        } else { \
                int ret, len = _s->size - _s->used; \
                ret = snprintf(_s->buf + _s->used, len, pr_fmt(fmt), ##args); \
                _s->used += min(ret, len); \
        } \
})

static void
__ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir,
                             unsigned int display_limit, struct ostream *s)
{
        struct ref_tracker_dir_stats *stats;
        unsigned int i = 0, skipped;
        depot_stack_handle_t stack;
        char *sbuf;

        lockdep_assert_held(&dir->lock);

        if (list_empty(&dir->list))
                return;

        stats = ref_tracker_get_stats(dir, display_limit);
        if (IS_ERR(stats)) {
                pr_ostream(s, "%s@%pK: couldn't get stats, error %pe\n",
                           dir->name, dir, stats);
                return;
        }

        sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);

        for (i = 0, skipped = stats->total; i < stats->count; ++i) {
                stack = stats->stacks[i].stack_handle;
                if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
                        sbuf[0] = 0;
                pr_ostream(s, "%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
                           stats->stacks[i].count, stats->total, sbuf);
                skipped -= stats->stacks[i].count;
        }

        if (skipped)
                pr_ostream(s, "%s@%pK skipped reports about %d/%d users.\n",
                           dir->name, dir, skipped, stats->total);

        kfree(sbuf);

        kfree(stats);
}

void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
                                  unsigned int display_limit)
{
        struct ostream os = {};

        __ref_tracker_dir_pr_ostream(dir, display_limit, &os);
}
EXPORT_SYMBOL(ref_tracker_dir_print_locked);

void ref_tracker_dir_print(struct ref_tracker_dir *dir,
                           unsigned int display_limit)
{
        unsigned long flags;

        spin_lock_irqsave(&dir->lock, flags);
        ref_tracker_dir_print_locked(dir, display_limit);
        spin_unlock_irqrestore(&dir->lock, flags);
}
EXPORT_SYMBOL(ref_tracker_dir_print);

int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size)
{
        struct ostream os = { .buf = buf, .size = size };
        unsigned long flags;

        spin_lock_irqsave(&dir->lock, flags);
        __ref_tracker_dir_pr_ostream(dir, 16, &os);
        spin_unlock_irqrestore(&dir->lock, flags);

        return os.used;
}
EXPORT_SYMBOL(ref_tracker_dir_snprint);

void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
{
        struct ref_tracker *tracker, *n;
        unsigned long flags;
        bool leak = false;

        dir->dead = true;
        spin_lock_irqsave(&dir->lock, flags);
        list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
                list_del(&tracker->head);
                kfree(tracker);
                dir->quarantine_avail++;
        }
        if (!list_empty(&dir->list)) {
                ref_tracker_dir_print_locked(dir, 16);
                leak = true;
                list_for_each_entry_safe(tracker, n, &dir->list, head) {
                        list_del(&tracker->head);
                        kfree(tracker);
                }
        }
        spin_unlock_irqrestore(&dir->lock, flags);
        WARN_ON_ONCE(leak);
        WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
        WARN_ON_ONCE(refcount_read(&dir->no_tracker) != 1);
}
EXPORT_SYMBOL(ref_tracker_dir_exit);

int ref_tracker_alloc(struct ref_tracker_dir *dir,
                      struct ref_tracker **trackerp,
                      gfp_t gfp)
{
        unsigned long entries[REF_TRACKER_STACK_ENTRIES];
        struct ref_tracker *tracker;
        unsigned int nr_entries;
        gfp_t gfp_mask = gfp | __GFP_NOWARN;
        unsigned long flags;

        WARN_ON_ONCE(dir->dead);

        if (!trackerp) {
                refcount_inc(&dir->no_tracker);
                return 0;
        }
        if (gfp & __GFP_DIRECT_RECLAIM)
                gfp_mask |= __GFP_NOFAIL;
        *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
        if (unlikely(!tracker)) {
                pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
                refcount_inc(&dir->untracked);
                return -ENOMEM;
        }
        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
        tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);

        spin_lock_irqsave(&dir->lock, flags);
        list_add(&tracker->head, &dir->list);
        spin_unlock_irqrestore(&dir->lock, flags);
        return 0;
}
EXPORT_SYMBOL_GPL(ref_tracker_alloc);

int ref_tracker_free(struct ref_tracker_dir *dir,
                     struct ref_tracker **trackerp)
{
        unsigned long entries[REF_TRACKER_STACK_ENTRIES];
        depot_stack_handle_t stack_handle;
        struct ref_tracker *tracker;
        unsigned int nr_entries;
        unsigned long flags;

        WARN_ON_ONCE(dir->dead);

        if (!trackerp) {
                refcount_dec(&dir->no_tracker);
                return 0;
        }
        tracker = *trackerp;
        if (!tracker) {
                refcount_dec(&dir->untracked);
                return -EEXIST;
        }
        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
        stack_handle = stack_depot_save(entries, nr_entries,
                                        GFP_NOWAIT | __GFP_NOWARN);

        spin_lock_irqsave(&dir->lock, flags);
        if (tracker->dead) {
                pr_err("reference already released.\n");
                if (tracker->alloc_stack_handle) {
                        pr_err("allocated in:\n");
                        stack_depot_print(tracker->alloc_stack_handle);
                }
                if (tracker->free_stack_handle) {
                        pr_err("freed in:\n");
                        stack_depot_print(tracker->free_stack_handle);
                }
                spin_unlock_irqrestore(&dir->lock, flags);
                WARN_ON_ONCE(1);
                return -EINVAL;
        }
        tracker->dead = true;

        tracker->free_stack_handle = stack_handle;

        list_move_tail(&tracker->head, &dir->quarantine);
        if (!dir->quarantine_avail) {
                tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
                list_del(&tracker->head);
        } else {
                dir->quarantine_avail--;
                tracker = NULL;
        }
        spin_unlock_irqrestore(&dir->lock, flags);

        kfree(tracker);
        return 0;
}
EXPORT_SYMBOL_GPL(ref_tracker_free);












































































































    5 


    5 
    5 


    5 


    5 


    5 







    1 
    5 





    5 
















   14 








    5 



   14 





















































































































    2 

    1 


    2 





    2 









































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
// SPDX-License-Identifier: GPL-2.0

#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/inetdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>

#include <net/netfilter/nf_nat_masquerade.h>

struct masq_dev_work {
        struct work_struct work;
        struct net *net;
        netns_tracker ns_tracker;
        union nf_inet_addr addr;
        int ifindex;
        int (*iter)(struct nf_conn *i, void *data);
};

#define MAX_MASQ_WORKER_COUNT        16

static DEFINE_MUTEX(masq_mutex);
static unsigned int masq_refcnt __read_mostly;
static atomic_t masq_worker_count __read_mostly;

unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
                       const struct nf_nat_range2 *range,
                       const struct net_device *out)
{
        struct nf_conn *ct;
        struct nf_conn_nat *nat;
        enum ip_conntrack_info ctinfo;
        struct nf_nat_range2 newrange;
        const struct rtable *rt;
        __be32 newsrc, nh;

        WARN_ON(hooknum != NF_INET_POST_ROUTING);

        ct = nf_ct_get(skb, &ctinfo);

        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
                         ctinfo == IP_CT_RELATED_REPLY)));

        /* Source address is 0.0.0.0 - locally generated packet that is
         * probably not supposed to be masqueraded.
         */
        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
                return NF_ACCEPT;

        rt = skb_rtable(skb);
        nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
        newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
        if (!newsrc) {
                pr_info("%s ate my IP address\n", out->name);
                return NF_DROP;
        }

        nat = nf_ct_nat_ext_add(ct);
        if (nat)
                nat->masq_index = out->ifindex;

        /* Transfer from original range. */
        memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
        memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
        newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
        newrange.min_addr.ip = newsrc;
        newrange.max_addr.ip = newsrc;
        newrange.min_proto   = range->min_proto;
        newrange.max_proto   = range->max_proto;

        /* Hand modified range to generic setup. */
        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);

static void iterate_cleanup_work(struct work_struct *work)
{
        struct nf_ct_iter_data iter_data = {};
        struct masq_dev_work *w;

        w = container_of(work, struct masq_dev_work, work);

        iter_data.net = w->net;
        iter_data.data = (void *)w;
        nf_ct_iterate_cleanup_net(w->iter, &iter_data);

        put_net_track(w->net, &w->ns_tracker);
        kfree(w);
        atomic_dec(&masq_worker_count);
        module_put(THIS_MODULE);
}

/* Iterate conntrack table in the background and remove conntrack entries
 * that use the device/address being removed.
 *
 * In case too many work items have been queued already or memory allocation
 * fails iteration is skipped, conntrack entries will time out eventually.
 */
static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
                                 int ifindex,
                                 int (*iter)(struct nf_conn *i, void *data),
                                 gfp_t gfp_flags)
{
        struct masq_dev_work *w;

        if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
                return;

        net = maybe_get_net(net);
        if (!net)
                return;

        if (!try_module_get(THIS_MODULE))
                goto err_module;

        w = kzalloc(sizeof(*w), gfp_flags);
        if (w) {
                /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
                atomic_inc(&masq_worker_count);

                INIT_WORK(&w->work, iterate_cleanup_work);
                w->ifindex = ifindex;
                w->net = net;
                netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
                w->iter = iter;
                if (addr)
                        w->addr = *addr;
                schedule_work(&w->work);
                return;
        }

        module_put(THIS_MODULE);
 err_module:
        put_net(net);
}

static int device_cmp(struct nf_conn *i, void *arg)
{
        const struct nf_conn_nat *nat = nfct_nat(i);
        const struct masq_dev_work *w = arg;

        if (!nat)
                return 0;
        return nat->masq_index == w->ifindex;
}

static int masq_device_event(struct notifier_block *this,
                             unsigned long event,
                             void *ptr)
{
        const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);

        if (event == NETDEV_DOWN) {
                /* Device was downed.  Search entire table for
                 * conntracks which were associated with that device,
                 * and forget them.
                 */

                nf_nat_masq_schedule(net, NULL, dev->ifindex,
                                     device_cmp, GFP_KERNEL);
        }

        return NOTIFY_DONE;
}

static int inet_cmp(struct nf_conn *ct, void *ptr)
{
        struct nf_conntrack_tuple *tuple;
        struct masq_dev_work *w = ptr;

        if (!device_cmp(ct, ptr))
                return 0;

        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;

        return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
}

static int masq_inet_event(struct notifier_block *this,
                           unsigned long event,
                           void *ptr)
{
        const struct in_ifaddr *ifa = ptr;
        const struct in_device *idev;
        const struct net_device *dev;
        union nf_inet_addr addr;

        if (event != NETDEV_DOWN)
                return NOTIFY_DONE;

        /* The masq_dev_notifier will catch the case of the device going
         * down.  So if the inetdev is dead and being destroyed we have
         * no work to do.  Otherwise this is an individual address removal
         * and we have to perform the flush.
         */
        idev = ifa->ifa_dev;
        if (idev->dead)
                return NOTIFY_DONE;

        memset(&addr, 0, sizeof(addr));

        addr.ip = ifa->ifa_address;

        dev = idev->dev;
        nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
                             inet_cmp, GFP_KERNEL);

        return NOTIFY_DONE;
}

static struct notifier_block masq_dev_notifier = {
        .notifier_call        = masq_device_event,
};

static struct notifier_block masq_inet_notifier = {
        .notifier_call        = masq_inet_event,
};

#if IS_ENABLED(CONFIG_IPV6)
static int
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
                       const struct in6_addr *daddr, unsigned int srcprefs,
                       struct in6_addr *saddr)
{
#ifdef CONFIG_IPV6_MODULE
        const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();

        if (!v6_ops)
                return -EHOSTUNREACH;

        return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
#else
        return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
#endif
}

unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
                       const struct net_device *out)
{
        enum ip_conntrack_info ctinfo;
        struct nf_conn_nat *nat;
        struct in6_addr src;
        struct nf_conn *ct;
        struct nf_nat_range2 newrange;

        ct = nf_ct_get(skb, &ctinfo);
        WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
                         ctinfo == IP_CT_RELATED_REPLY)));

        if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
                                   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
                return NF_DROP;

        nat = nf_ct_nat_ext_add(ct);
        if (nat)
                nat->masq_index = out->ifindex;

        newrange.flags                = range->flags | NF_NAT_RANGE_MAP_IPS;
        newrange.min_addr.in6        = src;
        newrange.max_addr.in6        = src;
        newrange.min_proto        = range->min_proto;
        newrange.max_proto        = range->max_proto;

        return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);

/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
 *
 * Defer it to the system workqueue.
 *
 * As we can have 'a lot' of inet_events (depending on amount of ipv6
 * addresses being deleted), we also need to limit work item queue.
 */
static int masq_inet6_event(struct notifier_block *this,
                            unsigned long event, void *ptr)
{
        struct inet6_ifaddr *ifa = ptr;
        const struct net_device *dev;
        union nf_inet_addr addr;

        if (event != NETDEV_DOWN)
                return NOTIFY_DONE;

        dev = ifa->idev->dev;

        memset(&addr, 0, sizeof(addr));

        addr.in6 = ifa->addr;

        nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
                             GFP_ATOMIC);
        return NOTIFY_DONE;
}

static struct notifier_block masq_inet6_notifier = {
        .notifier_call        = masq_inet6_event,
};

static int nf_nat_masquerade_ipv6_register_notifier(void)
{
        return register_inet6addr_notifier(&masq_inet6_notifier);
}
#else
static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
#endif

int nf_nat_masquerade_inet_register_notifiers(void)
{
        int ret = 0;

        mutex_lock(&masq_mutex);
        if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
                ret = -EOVERFLOW;
                goto out_unlock;
        }

        /* check if the notifier was already set */
        if (++masq_refcnt > 1)
                goto out_unlock;

        /* Register for device down reports */
        ret = register_netdevice_notifier(&masq_dev_notifier);
        if (ret)
                goto err_dec;
        /* Register IP address change reports */
        ret = register_inetaddr_notifier(&masq_inet_notifier);
        if (ret)
                goto err_unregister;

        ret = nf_nat_masquerade_ipv6_register_notifier();
        if (ret)
                goto err_unreg_inet;

        mutex_unlock(&masq_mutex);
        return ret;
err_unreg_inet:
        unregister_inetaddr_notifier(&masq_inet_notifier);
err_unregister:
        unregister_netdevice_notifier(&masq_dev_notifier);
err_dec:
        masq_refcnt--;
out_unlock:
        mutex_unlock(&masq_mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);

void nf_nat_masquerade_inet_unregister_notifiers(void)
{
        mutex_lock(&masq_mutex);
        /* check if the notifiers still have clients */
        if (--masq_refcnt > 0)
                goto out_unlock;

        unregister_netdevice_notifier(&masq_dev_notifier);
        unregister_inetaddr_notifier(&masq_inet_notifier);
#if IS_ENABLED(CONFIG_IPV6)
        unregister_inet6addr_notifier(&masq_inet6_notifier);
#endif
out_unlock:
        mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);





















































































































































































































































  242 




  226 




  242 




  242 


































































































































































































































































































































































































































































































































































































































































































































































































































   25 










   25 

   24 

   25 
























   25 





























   17 























   17 
















   17 





































































































































   17 

























   17 

   17 






















   17 


   16 

   17 










































   16 






















  192 




















  192 





















































  190 




















  192 
























  232 













  234 










































































  226 






  225 





  226 


























































































































































































































































































































































































































































































































































































































































































































































  162 










  158 












  226 





















  253 
  254 
































































































































  224 
















  219 

   11 
  227 



  224 

















  162 











  240 

  238 

  215 

  239 

  234 





  171 



  236 
  215 
  171 
  171 









  234 
  206 









  203 


  205 

  205 

  205 



  236 
  195 

  235 
   22 


  235 


  210 
  194 






  236 
   29 







  236 
  195 

    7 




  236 























































































































   93 






  234 
































































































  157 









  157 
  156 




  156 



   82 





   82 





   82 
   82 




   81 




























  230 







  231 


  230 


  231 

  229 
  160 
  230 



































































  158 
  159 


  159 


  159 


  157 








  242 


  242 
  242 



























































  231 

  229 
























  232 






  233 



































  149 






  171 




































  161 









































  162 



  161 










  162 









  226 


  156 



  156 




  155 






















  150 







  150 




  226 




















































  150 
  148 
  150 





  157 










  157 

































  162 

  162 


  162 


  162 
  162 
  162 
  162 










  233 












   62 


  217 




   62 

   62 


   62 








  205 
  205 


  204 
  205 





  162 


  161 










































  162 



  161 
  162 









  161 




  161 









  161 

  162 





  162 
  162 


  162 
  162 



  162 






  158 

  159 



  157 


  157 


   80 


   74 


   64 


   64 



   62 



   58 





   52 



















   15 































































































  261 










  242 
  243 












  231 

  231 

  230 
  231 



















  231 














  228 




  229 
  231 
  207 
  205 
  205 


  231 

  162 







  160 



  231 
  231 



  226 
  226 










  226 







  225 
  226 















  217 
  217 
  211 





    2 





    2 






   15 





   12 























  216 


  160 
  156 
  214 



  147 
  104 




   99 
























   15 






   15 
   15 
   12 
   14 
   14 


















    2 






    2 
    2 
    2 










































































































































































































































































































   83 
   83 



































































































































  101 

  101 


   94 



































































































































































































































































































































































































  254 


    3 
  253 
  252 



  254 











  235 




  235 

  251 





  252 

















  253 















  254 
  254 










































   53 
  127 
  183 
  234 












































































































































































  101 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/core.c - core driver model code (device registration, etc)
 *
 * Copyright (c) 2002-3 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2006 Novell, Inc.
 */

#include <linux/acpi.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fwnode.h>
#include <linux/init.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kdev_t.h>
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/string_helpers.h>
#include <linux/swiotlb.h>
#include <linux/sysfs.h>
#include <linux/dma-map-ops.h> /* for dma_default_coherent */

#include "base.h"
#include "physical_location.h"
#include "power/power.h"

/* Device links support. */
static LIST_HEAD(deferred_sync);
static unsigned int defer_sync_state_count = 1;
static DEFINE_MUTEX(fwnode_link_lock);
static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
static struct workqueue_struct *device_link_wq;

/**
 * __fwnode_link_add - Create a link between two fwnode_handles.
 * @con: Consumer end of the link.
 * @sup: Supplier end of the link.
 * @flags: Link flags.
 *
 * Create a fwnode link between fwnode handles @con and @sup. The fwnode link
 * represents the detail that the firmware lists @sup fwnode as supplying a
 * resource to @con.
 *
 * The driver core will use the fwnode link to create a device link between the
 * two device objects corresponding to @con and @sup when they are created. The
 * driver core will automatically delete the fwnode link between @con and @sup
 * after doing that.
 *
 * Attempts to create duplicate links between the same pair of fwnode handles
 * are ignored and there is no reference counting.
 */
static int __fwnode_link_add(struct fwnode_handle *con,
                             struct fwnode_handle *sup, u8 flags)
{
        struct fwnode_link *link;

        list_for_each_entry(link, &sup->consumers, s_hook)
                if (link->consumer == con) {
                        link->flags |= flags;
                        return 0;
                }

        link = kzalloc(sizeof(*link), GFP_KERNEL);
        if (!link)
                return -ENOMEM;

        link->supplier = sup;
        INIT_LIST_HEAD(&link->s_hook);
        link->consumer = con;
        INIT_LIST_HEAD(&link->c_hook);
        link->flags = flags;

        list_add(&link->s_hook, &sup->consumers);
        list_add(&link->c_hook, &con->suppliers);
        pr_debug("%pfwf Linked as a fwnode consumer to %pfwf\n",
                 con, sup);

        return 0;
}

int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
                    u8 flags)
{
        int ret;

        mutex_lock(&fwnode_link_lock);
        ret = __fwnode_link_add(con, sup, flags);
        mutex_unlock(&fwnode_link_lock);
        return ret;
}

/**
 * __fwnode_link_del - Delete a link between two fwnode_handles.
 * @link: the fwnode_link to be deleted
 *
 * The fwnode_link_lock needs to be held when this function is called.
 */
static void __fwnode_link_del(struct fwnode_link *link)
{
        pr_debug("%pfwf Dropping the fwnode link to %pfwf\n",
                 link->consumer, link->supplier);
        list_del(&link->s_hook);
        list_del(&link->c_hook);
        kfree(link);
}

/**
 * __fwnode_link_cycle - Mark a fwnode link as being part of a cycle.
 * @link: the fwnode_link to be marked
 *
 * The fwnode_link_lock needs to be held when this function is called.
 */
static void __fwnode_link_cycle(struct fwnode_link *link)
{
        pr_debug("%pfwf: cycle: depends on %pfwf\n",
                 link->consumer, link->supplier);
        link->flags |= FWLINK_FLAG_CYCLE;
}

/**
 * fwnode_links_purge_suppliers - Delete all supplier links of fwnode_handle.
 * @fwnode: fwnode whose supplier links need to be deleted
 *
 * Deletes all supplier links connecting directly to @fwnode.
 */
static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode)
{
        struct fwnode_link *link, *tmp;

        mutex_lock(&fwnode_link_lock);
        list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook)
                __fwnode_link_del(link);
        mutex_unlock(&fwnode_link_lock);
}

/**
 * fwnode_links_purge_consumers - Delete all consumer links of fwnode_handle.
 * @fwnode: fwnode whose consumer links need to be deleted
 *
 * Deletes all consumer links connecting directly to @fwnode.
 */
static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode)
{
        struct fwnode_link *link, *tmp;

        mutex_lock(&fwnode_link_lock);
        list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook)
                __fwnode_link_del(link);
        mutex_unlock(&fwnode_link_lock);
}

/**
 * fwnode_links_purge - Delete all links connected to a fwnode_handle.
 * @fwnode: fwnode whose links needs to be deleted
 *
 * Deletes all links connecting directly to a fwnode.
 */
void fwnode_links_purge(struct fwnode_handle *fwnode)
{
        fwnode_links_purge_suppliers(fwnode);
        fwnode_links_purge_consumers(fwnode);
}

void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
{
        struct fwnode_handle *child;

        /* Don't purge consumer links of an added child */
        if (fwnode->dev)
                return;

        fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
        fwnode_links_purge_consumers(fwnode);

        fwnode_for_each_available_child_node(fwnode, child)
                fw_devlink_purge_absent_suppliers(child);
}
EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);

/**
 * __fwnode_links_move_consumers - Move consumer from @from to @to fwnode_handle
 * @from: move consumers away from this fwnode
 * @to: move consumers to this fwnode
 *
 * Move all consumer links from @from fwnode to @to fwnode.
 */
static void __fwnode_links_move_consumers(struct fwnode_handle *from,
                                          struct fwnode_handle *to)
{
        struct fwnode_link *link, *tmp;

        list_for_each_entry_safe(link, tmp, &from->consumers, s_hook) {
                __fwnode_link_add(link->consumer, to, link->flags);
                __fwnode_link_del(link);
        }
}

/**
 * __fw_devlink_pickup_dangling_consumers - Pick up dangling consumers
 * @fwnode: fwnode from which to pick up dangling consumers
 * @new_sup: fwnode of new supplier
 *
 * If the @fwnode has a corresponding struct device and the device supports
 * probing (that is, added to a bus), then we want to let fw_devlink create
 * MANAGED device links to this device, so leave @fwnode and its descendant's
 * fwnode links alone.
 *
 * Otherwise, move its consumers to the new supplier @new_sup.
 */
static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode,
                                                   struct fwnode_handle *new_sup)
{
        struct fwnode_handle *child;

        if (fwnode->dev && fwnode->dev->bus)
                return;

        fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
        __fwnode_links_move_consumers(fwnode, new_sup);

        fwnode_for_each_available_child_node(fwnode, child)
                __fw_devlink_pickup_dangling_consumers(child, new_sup);
}

static DEFINE_MUTEX(device_links_lock);
DEFINE_STATIC_SRCU(device_links_srcu);

static inline void device_links_write_lock(void)
{
        mutex_lock(&device_links_lock);
}

static inline void device_links_write_unlock(void)
{
        mutex_unlock(&device_links_lock);
}

int device_links_read_lock(void) __acquires(&device_links_srcu)
{
        return srcu_read_lock(&device_links_srcu);
}

void device_links_read_unlock(int idx) __releases(&device_links_srcu)
{
        srcu_read_unlock(&device_links_srcu, idx);
}

int device_links_read_lock_held(void)
{
        return srcu_read_lock_held(&device_links_srcu);
}

static void device_link_synchronize_removal(void)
{
        synchronize_srcu(&device_links_srcu);
}

static void device_link_remove_from_lists(struct device_link *link)
{
        list_del_rcu(&link->s_node);
        list_del_rcu(&link->c_node);
}

static bool device_is_ancestor(struct device *dev, struct device *target)
{
        while (target->parent) {
                target = target->parent;
                if (dev == target)
                        return true;
        }
        return false;
}

#define DL_MARKER_FLAGS                (DL_FLAG_INFERRED | \
                                 DL_FLAG_CYCLE | \
                                 DL_FLAG_MANAGED)
static inline bool device_link_flag_is_sync_state_only(u32 flags)
{
        return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY;
}

/**
 * device_is_dependent - Check if one device depends on another one
 * @dev: Device to check dependencies for.
 * @target: Device to check against.
 *
 * Check if @target depends on @dev or any device dependent on it (its child or
 * its consumer etc).  Return 1 if that is the case or 0 otherwise.
 */
static int device_is_dependent(struct device *dev, void *target)
{
        struct device_link *link;
        int ret;

        /*
         * The "ancestors" check is needed to catch the case when the target
         * device has not been completely initialized yet and it is still
         * missing from the list of children of its parent device.
         */
        if (dev == target || device_is_ancestor(dev, target))
                return 1;

        ret = device_for_each_child(dev, target, device_is_dependent);
        if (ret)
                return ret;

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (device_link_flag_is_sync_state_only(link->flags))
                        continue;

                if (link->consumer == target)
                        return 1;

                ret = device_is_dependent(link->consumer, target);
                if (ret)
                        break;
        }
        return ret;
}

static void device_link_init_status(struct device_link *link,
                                    struct device *consumer,
                                    struct device *supplier)
{
        switch (supplier->links.status) {
        case DL_DEV_PROBING:
                switch (consumer->links.status) {
                case DL_DEV_PROBING:
                        /*
                         * A consumer driver can create a link to a supplier
                         * that has not completed its probing yet as long as it
                         * knows that the supplier is already functional (for
                         * example, it has just acquired some resources from the
                         * supplier).
                         */
                        link->status = DL_STATE_CONSUMER_PROBE;
                        break;
                default:
                        link->status = DL_STATE_DORMANT;
                        break;
                }
                break;
        case DL_DEV_DRIVER_BOUND:
                switch (consumer->links.status) {
                case DL_DEV_PROBING:
                        link->status = DL_STATE_CONSUMER_PROBE;
                        break;
                case DL_DEV_DRIVER_BOUND:
                        link->status = DL_STATE_ACTIVE;
                        break;
                default:
                        link->status = DL_STATE_AVAILABLE;
                        break;
                }
                break;
        case DL_DEV_UNBINDING:
                link->status = DL_STATE_SUPPLIER_UNBIND;
                break;
        default:
                link->status = DL_STATE_DORMANT;
                break;
        }
}

static int device_reorder_to_tail(struct device *dev, void *not_used)
{
        struct device_link *link;

        /*
         * Devices that have not been registered yet will be put to the ends
         * of the lists during the registration, so skip them here.
         */
        if (device_is_registered(dev))
                devices_kset_move_last(dev);

        if (device_pm_initialized(dev))
                device_pm_move_last(dev);

        device_for_each_child(dev, NULL, device_reorder_to_tail);
        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (device_link_flag_is_sync_state_only(link->flags))
                        continue;
                device_reorder_to_tail(link->consumer, NULL);
        }

        return 0;
}

/**
 * device_pm_move_to_tail - Move set of devices to the end of device lists
 * @dev: Device to move
 *
 * This is a device_reorder_to_tail() wrapper taking the requisite locks.
 *
 * It moves the @dev along with all of its children and all of its consumers
 * to the ends of the device_kset and dpm_list, recursively.
 */
void device_pm_move_to_tail(struct device *dev)
{
        int idx;

        idx = device_links_read_lock();
        device_pm_lock();
        device_reorder_to_tail(dev, NULL);
        device_pm_unlock();
        device_links_read_unlock(idx);
}

#define to_devlink(dev)        container_of((dev), struct device_link, link_dev)

static ssize_t status_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        const char *output;

        switch (to_devlink(dev)->status) {
        case DL_STATE_NONE:
                output = "not tracked";
                break;
        case DL_STATE_DORMANT:
                output = "dormant";
                break;
        case DL_STATE_AVAILABLE:
                output = "available";
                break;
        case DL_STATE_CONSUMER_PROBE:
                output = "consumer probing";
                break;
        case DL_STATE_ACTIVE:
                output = "active";
                break;
        case DL_STATE_SUPPLIER_UNBIND:
                output = "supplier unbinding";
                break;
        default:
                output = "unknown";
                break;
        }

        return sysfs_emit(buf, "%s\n", output);
}
static DEVICE_ATTR_RO(status);

static ssize_t auto_remove_on_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct device_link *link = to_devlink(dev);
        const char *output;

        if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                output = "supplier unbind";
        else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
                output = "consumer unbind";
        else
                output = "never";

        return sysfs_emit(buf, "%s\n", output);
}
static DEVICE_ATTR_RO(auto_remove_on);

static ssize_t runtime_pm_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct device_link *link = to_devlink(dev);

        return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME));
}
static DEVICE_ATTR_RO(runtime_pm);

static ssize_t sync_state_only_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        struct device_link *link = to_devlink(dev);

        return sysfs_emit(buf, "%d\n",
                          !!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
}
static DEVICE_ATTR_RO(sync_state_only);

static struct attribute *devlink_attrs[] = {
        &dev_attr_status.attr,
        &dev_attr_auto_remove_on.attr,
        &dev_attr_runtime_pm.attr,
        &dev_attr_sync_state_only.attr,
        NULL,
};
ATTRIBUTE_GROUPS(devlink);

static void device_link_release_fn(struct work_struct *work)
{
        struct device_link *link = container_of(work, struct device_link, rm_work);

        /* Ensure that all references to the link object have been dropped. */
        device_link_synchronize_removal();

        pm_runtime_release_supplier(link);
        /*
         * If supplier_preactivated is set, the link has been dropped between
         * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls
         * in __driver_probe_device().  In that case, drop the supplier's
         * PM-runtime usage counter to remove the reference taken by
         * pm_runtime_get_suppliers().
         */
        if (link->supplier_preactivated)
                pm_runtime_put_noidle(link->supplier);

        pm_request_idle(link->supplier);

        put_device(link->consumer);
        put_device(link->supplier);
        kfree(link);
}

static void devlink_dev_release(struct device *dev)
{
        struct device_link *link = to_devlink(dev);

        INIT_WORK(&link->rm_work, device_link_release_fn);
        /*
         * It may take a while to complete this work because of the SRCU
         * synchronization in device_link_release_fn() and if the consumer or
         * supplier devices get deleted when it runs, so put it into the
         * dedicated workqueue.
         */
        queue_work(device_link_wq, &link->rm_work);
}

/**
 * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
 */
void device_link_wait_removal(void)
{
        /*
         * devlink removal jobs are queued in the dedicated work queue.
         * To be sure that all removal jobs are terminated, ensure that any
         * scheduled work has run to completion.
         */
        flush_workqueue(device_link_wq);
}
EXPORT_SYMBOL_GPL(device_link_wait_removal);

static struct class devlink_class = {
        .name = "devlink",
        .dev_groups = devlink_groups,
        .dev_release = devlink_dev_release,
};

static int devlink_add_symlinks(struct device *dev)
{
        int ret;
        size_t len;
        struct device_link *link = to_devlink(dev);
        struct device *sup = link->supplier;
        struct device *con = link->consumer;
        char *buf;

        len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
                  strlen(dev_bus_name(con)) + strlen(dev_name(con)));
        len += strlen(":");
        len += strlen("supplier:") + 1;
        buf = kzalloc(len, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier");
        if (ret)
                goto out;

        ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer");
        if (ret)
                goto err_con;

        snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
        ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf);
        if (ret)
                goto err_con_dev;

        snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
        ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf);
        if (ret)
                goto err_sup_dev;

        goto out;

err_sup_dev:
        snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
        sysfs_remove_link(&sup->kobj, buf);
err_con_dev:
        sysfs_remove_link(&link->link_dev.kobj, "consumer");
err_con:
        sysfs_remove_link(&link->link_dev.kobj, "supplier");
out:
        kfree(buf);
        return ret;
}

static void devlink_remove_symlinks(struct device *dev)
{
        struct device_link *link = to_devlink(dev);
        size_t len;
        struct device *sup = link->supplier;
        struct device *con = link->consumer;
        char *buf;

        sysfs_remove_link(&link->link_dev.kobj, "consumer");
        sysfs_remove_link(&link->link_dev.kobj, "supplier");

        len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)),
                  strlen(dev_bus_name(con)) + strlen(dev_name(con)));
        len += strlen(":");
        len += strlen("supplier:") + 1;
        buf = kzalloc(len, GFP_KERNEL);
        if (!buf) {
                WARN(1, "Unable to properly free device link symlinks!\n");
                return;
        }

        if (device_is_registered(con)) {
                snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
                sysfs_remove_link(&con->kobj, buf);
        }
        snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
        sysfs_remove_link(&sup->kobj, buf);
        kfree(buf);
}

static struct class_interface devlink_class_intf = {
        .class = &devlink_class,
        .add_dev = devlink_add_symlinks,
        .remove_dev = devlink_remove_symlinks,
};

static int __init devlink_class_init(void)
{
        int ret;

        ret = class_register(&devlink_class);
        if (ret)
                return ret;

        ret = class_interface_register(&devlink_class_intf);
        if (ret)
                class_unregister(&devlink_class);

        return ret;
}
postcore_initcall(devlink_class_init);

#define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
                               DL_FLAG_AUTOREMOVE_SUPPLIER | \
                               DL_FLAG_AUTOPROBE_CONSUMER  | \
                               DL_FLAG_SYNC_STATE_ONLY | \
                               DL_FLAG_INFERRED | \
                               DL_FLAG_CYCLE)

#define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
                            DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)

/**
 * device_link_add - Create a link between two devices.
 * @consumer: Consumer end of the link.
 * @supplier: Supplier end of the link.
 * @flags: Link flags.
 *
 * The caller is responsible for the proper synchronization of the link creation
 * with runtime PM.  First, setting the DL_FLAG_PM_RUNTIME flag will cause the
 * runtime PM framework to take the link into account.  Second, if the
 * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will
 * be forced into the active meta state and reference-counted upon the creation
 * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
 * ignored.
 *
 * If DL_FLAG_STATELESS is set in @flags, the caller of this function is
 * expected to release the link returned by it directly with the help of either
 * device_link_del() or device_link_remove().
 *
 * If that flag is not set, however, the caller of this function is handing the
 * management of the link over to the driver core entirely and its return value
 * can only be used to check whether or not the link is present.  In that case,
 * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link
 * flags can be used to indicate to the driver core when the link can be safely
 * deleted.  Namely, setting one of them in @flags indicates to the driver core
 * that the link is not going to be used (by the given caller of this function)
 * after unbinding the consumer or supplier driver, respectively, from its
 * device, so the link can be deleted at that point.  If none of them is set,
 * the link will be maintained until one of the devices pointed to by it (either
 * the consumer or the supplier) is unregistered.
 *
 * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and
 * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent
 * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can
 * be used to request the driver core to automatically probe for a consumer
 * driver after successfully binding a driver to the supplier device.
 *
 * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER,
 * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at
 * the same time is invalid and will cause NULL to be returned upfront.
 * However, if a device link between the given @consumer and @supplier pair
 * exists already when this function is called for them, the existing link will
 * be returned regardless of its current type and status (the link's flags may
 * be modified then).  The caller of this function is then expected to treat
 * the link as though it has just been created, so (in particular) if
 * DL_FLAG_STATELESS was passed in @flags, the link needs to be released
 * explicitly when not needed any more (as stated above).
 *
 * A side effect of the link creation is re-ordering of dpm_list and the
 * devices_kset list by moving the consumer device and all devices depending
 * on it to the ends of these lists (that does not happen to devices that have
 * not been registered when this function is called).
 *
 * The supplier device is required to be registered when this function is called
 * and NULL will be returned if that is not the case.  The consumer device need
 * not be registered, however.
 */
struct device_link *device_link_add(struct device *consumer,
                                    struct device *supplier, u32 flags)
{
        struct device_link *link;

        if (!consumer || !supplier || consumer == supplier ||
            flags & ~DL_ADD_VALID_FLAGS ||
            (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
            (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
             flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
                      DL_FLAG_AUTOREMOVE_SUPPLIER)))
                return NULL;

        if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) {
                if (pm_runtime_get_sync(supplier) < 0) {
                        pm_runtime_put_noidle(supplier);
                        return NULL;
                }
        }

        if (!(flags & DL_FLAG_STATELESS))
                flags |= DL_FLAG_MANAGED;

        if (flags & DL_FLAG_SYNC_STATE_ONLY &&
            !device_link_flag_is_sync_state_only(flags))
                return NULL;

        device_links_write_lock();
        device_pm_lock();

        /*
         * If the supplier has not been fully registered yet or there is a
         * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and
         * the supplier already in the graph, return NULL. If the link is a
         * SYNC_STATE_ONLY link, we don't check for reverse dependencies
         * because it only affects sync_state() callbacks.
         */
        if (!device_pm_initialized(supplier)
            || (!(flags & DL_FLAG_SYNC_STATE_ONLY) &&
                  device_is_dependent(consumer, supplier))) {
                link = NULL;
                goto out;
        }

        /*
         * SYNC_STATE_ONLY links are useless once a consumer device has probed.
         * So, only create it if the consumer hasn't probed yet.
         */
        if (flags & DL_FLAG_SYNC_STATE_ONLY &&
            consumer->links.status != DL_DEV_NO_DRIVER &&
            consumer->links.status != DL_DEV_PROBING) {
                link = NULL;
                goto out;
        }

        /*
         * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed
         * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both
         * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER.
         */
        if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;

        list_for_each_entry(link, &supplier->links.consumers, s_node) {
                if (link->consumer != consumer)
                        continue;

                if (link->flags & DL_FLAG_INFERRED &&
                    !(flags & DL_FLAG_INFERRED))
                        link->flags &= ~DL_FLAG_INFERRED;

                if (flags & DL_FLAG_PM_RUNTIME) {
                        if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
                                pm_runtime_new_link(consumer);
                                link->flags |= DL_FLAG_PM_RUNTIME;
                        }
                        if (flags & DL_FLAG_RPM_ACTIVE)
                                refcount_inc(&link->rpm_active);
                }

                if (flags & DL_FLAG_STATELESS) {
                        kref_get(&link->kref);
                        if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
                            !(link->flags & DL_FLAG_STATELESS)) {
                                link->flags |= DL_FLAG_STATELESS;
                                goto reorder;
                        } else {
                                link->flags |= DL_FLAG_STATELESS;
                                goto out;
                        }
                }

                /*
                 * If the life time of the link following from the new flags is
                 * longer than indicated by the flags of the existing link,
                 * update the existing link to stay around longer.
                 */
                if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) {
                        if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
                                link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
                                link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER;
                        }
                } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) {
                        link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER |
                                         DL_FLAG_AUTOREMOVE_SUPPLIER);
                }
                if (!(link->flags & DL_FLAG_MANAGED)) {
                        kref_get(&link->kref);
                        link->flags |= DL_FLAG_MANAGED;
                        device_link_init_status(link, consumer, supplier);
                }
                if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
                    !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
                        link->flags &= ~DL_FLAG_SYNC_STATE_ONLY;
                        goto reorder;
                }

                goto out;
        }

        link = kzalloc(sizeof(*link), GFP_KERNEL);
        if (!link)
                goto out;

        refcount_set(&link->rpm_active, 1);

        get_device(supplier);
        link->supplier = supplier;
        INIT_LIST_HEAD(&link->s_node);
        get_device(consumer);
        link->consumer = consumer;
        INIT_LIST_HEAD(&link->c_node);
        link->flags = flags;
        kref_init(&link->kref);

        link->link_dev.class = &devlink_class;
        device_set_pm_not_required(&link->link_dev);
        dev_set_name(&link->link_dev, "%s:%s--%s:%s",
                     dev_bus_name(supplier), dev_name(supplier),
                     dev_bus_name(consumer), dev_name(consumer));
        if (device_register(&link->link_dev)) {
                put_device(&link->link_dev);
                link = NULL;
                goto out;
        }

        if (flags & DL_FLAG_PM_RUNTIME) {
                if (flags & DL_FLAG_RPM_ACTIVE)
                        refcount_inc(&link->rpm_active);

                pm_runtime_new_link(consumer);
        }

        /* Determine the initial link state. */
        if (flags & DL_FLAG_STATELESS)
                link->status = DL_STATE_NONE;
        else
                device_link_init_status(link, consumer, supplier);

        /*
         * Some callers expect the link creation during consumer driver probe to
         * resume the supplier even without DL_FLAG_RPM_ACTIVE.
         */
        if (link->status == DL_STATE_CONSUMER_PROBE &&
            flags & DL_FLAG_PM_RUNTIME)
                pm_runtime_resume(supplier);

        list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
        list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);

        if (flags & DL_FLAG_SYNC_STATE_ONLY) {
                dev_dbg(consumer,
                        "Linked as a sync state only consumer to %s\n",
                        dev_name(supplier));
                goto out;
        }

reorder:
        /*
         * Move the consumer and all of the devices depending on it to the end
         * of dpm_list and the devices_kset list.
         *
         * It is necessary to hold dpm_list locked throughout all that or else
         * we may end up suspending with a wrong ordering of it.
         */
        device_reorder_to_tail(consumer, NULL);

        dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier));

out:
        device_pm_unlock();
        device_links_write_unlock();

        if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link)
                pm_runtime_put(supplier);

        return link;
}
EXPORT_SYMBOL_GPL(device_link_add);

static void __device_link_del(struct kref *kref)
{
        struct device_link *link = container_of(kref, struct device_link, kref);

        dev_dbg(link->consumer, "Dropping the link to %s\n",
                dev_name(link->supplier));

        pm_runtime_drop_link(link);

        device_link_remove_from_lists(link);
        device_unregister(&link->link_dev);
}

static void device_link_put_kref(struct device_link *link)
{
        if (link->flags & DL_FLAG_STATELESS)
                kref_put(&link->kref, __device_link_del);
        else if (!device_is_registered(link->consumer))
                __device_link_del(&link->kref);
        else
                WARN(1, "Unable to drop a managed device link reference\n");
}

/**
 * device_link_del - Delete a stateless link between two devices.
 * @link: Device link to delete.
 *
 * The caller must ensure proper synchronization of this function with runtime
 * PM.  If the link was added multiple times, it needs to be deleted as often.
 * Care is required for hotplugged devices:  Their links are purged on removal
 * and calling device_link_del() is then no longer allowed.
 */
void device_link_del(struct device_link *link)
{
        device_links_write_lock();
        device_link_put_kref(link);
        device_links_write_unlock();
}
EXPORT_SYMBOL_GPL(device_link_del);

/**
 * device_link_remove - Delete a stateless link between two devices.
 * @consumer: Consumer end of the link.
 * @supplier: Supplier end of the link.
 *
 * The caller must ensure proper synchronization of this function with runtime
 * PM.
 */
void device_link_remove(void *consumer, struct device *supplier)
{
        struct device_link *link;

        if (WARN_ON(consumer == supplier))
                return;

        device_links_write_lock();

        list_for_each_entry(link, &supplier->links.consumers, s_node) {
                if (link->consumer == consumer) {
                        device_link_put_kref(link);
                        break;
                }
        }

        device_links_write_unlock();
}
EXPORT_SYMBOL_GPL(device_link_remove);

static void device_links_missing_supplier(struct device *dev)
{
        struct device_link *link;

        list_for_each_entry(link, &dev->links.suppliers, c_node) {
                if (link->status != DL_STATE_CONSUMER_PROBE)
                        continue;

                if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
                        WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
                } else {
                        WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
                        WRITE_ONCE(link->status, DL_STATE_DORMANT);
                }
        }
}

static bool dev_is_best_effort(struct device *dev)
{
        return (fw_devlink_best_effort && dev->can_match) ||
                (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT));
}

static struct fwnode_handle *fwnode_links_check_suppliers(
                                                struct fwnode_handle *fwnode)
{
        struct fwnode_link *link;

        if (!fwnode || fw_devlink_is_permissive())
                return NULL;

        list_for_each_entry(link, &fwnode->suppliers, c_hook)
                if (!(link->flags &
                      (FWLINK_FLAG_CYCLE | FWLINK_FLAG_IGNORE)))
                        return link->supplier;

        return NULL;
}

/**
 * device_links_check_suppliers - Check presence of supplier drivers.
 * @dev: Consumer device.
 *
 * Check links from this device to any suppliers.  Walk the list of the device's
 * links to suppliers and see if all of them are available.  If not, simply
 * return -EPROBE_DEFER.
 *
 * We need to guarantee that the supplier will not go away after the check has
 * been positive here.  It only can go away in __device_release_driver() and
 * that function  checks the device's links to consumers.  This means we need to
 * mark the link as "consumer probe in progress" to make the supplier removal
 * wait for us to complete (or bad things may happen).
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
int device_links_check_suppliers(struct device *dev)
{
        struct device_link *link;
        int ret = 0, fwnode_ret = 0;
        struct fwnode_handle *sup_fw;

        /*
         * Device waiting for supplier to become available is not allowed to
         * probe.
         */
        mutex_lock(&fwnode_link_lock);
        sup_fw = fwnode_links_check_suppliers(dev->fwnode);
        if (sup_fw) {
                if (!dev_is_best_effort(dev)) {
                        fwnode_ret = -EPROBE_DEFER;
                        dev_err_probe(dev, -EPROBE_DEFER,
                                    "wait for supplier %pfwf\n", sup_fw);
                } else {
                        fwnode_ret = -EAGAIN;
                }
        }
        mutex_unlock(&fwnode_link_lock);
        if (fwnode_ret == -EPROBE_DEFER)
                return fwnode_ret;

        device_links_write_lock();

        list_for_each_entry(link, &dev->links.suppliers, c_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                if (link->status != DL_STATE_AVAILABLE &&
                    !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) {

                        if (dev_is_best_effort(dev) &&
                            link->flags & DL_FLAG_INFERRED &&
                            !link->supplier->can_match) {
                                ret = -EAGAIN;
                                continue;
                        }

                        device_links_missing_supplier(dev);
                        dev_err_probe(dev, -EPROBE_DEFER,
                                      "supplier %s not ready\n",
                                      dev_name(link->supplier));
                        ret = -EPROBE_DEFER;
                        break;
                }
                WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
        }
        dev->links.status = DL_DEV_PROBING;

        device_links_write_unlock();

        return ret ? ret : fwnode_ret;
}

/**
 * __device_links_queue_sync_state - Queue a device for sync_state() callback
 * @dev: Device to call sync_state() on
 * @list: List head to queue the @dev on
 *
 * Queues a device for a sync_state() callback when the device links write lock
 * isn't held. This allows the sync_state() execution flow to use device links
 * APIs.  The caller must ensure this function is called with
 * device_links_write_lock() held.
 *
 * This function does a get_device() to make sure the device is not freed while
 * on this list.
 *
 * So the caller must also ensure that device_links_flush_sync_list() is called
 * as soon as the caller releases device_links_write_lock().  This is necessary
 * to make sure the sync_state() is called in a timely fashion and the
 * put_device() is called on this device.
 */
static void __device_links_queue_sync_state(struct device *dev,
                                            struct list_head *list)
{
        struct device_link *link;

        if (!dev_has_sync_state(dev))
                return;
        if (dev->state_synced)
                return;

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;
                if (link->status != DL_STATE_ACTIVE)
                        return;
        }

        /*
         * Set the flag here to avoid adding the same device to a list more
         * than once. This can happen if new consumers get added to the device
         * and probed before the list is flushed.
         */
        dev->state_synced = true;

        if (WARN_ON(!list_empty(&dev->links.defer_sync)))
                return;

        get_device(dev);
        list_add_tail(&dev->links.defer_sync, list);
}

/**
 * device_links_flush_sync_list - Call sync_state() on a list of devices
 * @list: List of devices to call sync_state() on
 * @dont_lock_dev: Device for which lock is already held by the caller
 *
 * Calls sync_state() on all the devices that have been queued for it. This
 * function is used in conjunction with __device_links_queue_sync_state(). The
 * @dont_lock_dev parameter is useful when this function is called from a
 * context where a device lock is already held.
 */
static void device_links_flush_sync_list(struct list_head *list,
                                         struct device *dont_lock_dev)
{
        struct device *dev, *tmp;

        list_for_each_entry_safe(dev, tmp, list, links.defer_sync) {
                list_del_init(&dev->links.defer_sync);

                if (dev != dont_lock_dev)
                        device_lock(dev);

                dev_sync_state(dev);

                if (dev != dont_lock_dev)
                        device_unlock(dev);

                put_device(dev);
        }
}

void device_links_supplier_sync_state_pause(void)
{
        device_links_write_lock();
        defer_sync_state_count++;
        device_links_write_unlock();
}

void device_links_supplier_sync_state_resume(void)
{
        struct device *dev, *tmp;
        LIST_HEAD(sync_list);

        device_links_write_lock();
        if (!defer_sync_state_count) {
                WARN(true, "Unmatched sync_state pause/resume!");
                goto out;
        }
        defer_sync_state_count--;
        if (defer_sync_state_count)
                goto out;

        list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
                /*
                 * Delete from deferred_sync list before queuing it to
                 * sync_list because defer_sync is used for both lists.
                 */
                list_del_init(&dev->links.defer_sync);
                __device_links_queue_sync_state(dev, &sync_list);
        }
out:
        device_links_write_unlock();

        device_links_flush_sync_list(&sync_list, NULL);
}

static int sync_state_resume_initcall(void)
{
        device_links_supplier_sync_state_resume();
        return 0;
}
late_initcall(sync_state_resume_initcall);

static void __device_links_supplier_defer_sync(struct device *sup)
{
        if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup))
                list_add_tail(&sup->links.defer_sync, &deferred_sync);
}

static void device_link_drop_managed(struct device_link *link)
{
        link->flags &= ~DL_FLAG_MANAGED;
        WRITE_ONCE(link->status, DL_STATE_NONE);
        kref_put(&link->kref, __device_link_del);
}

static ssize_t waiting_for_supplier_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
{
        bool val;

        device_lock(dev);
        mutex_lock(&fwnode_link_lock);
        val = !!fwnode_links_check_suppliers(dev->fwnode);
        mutex_unlock(&fwnode_link_lock);
        device_unlock(dev);
        return sysfs_emit(buf, "%u\n", val);
}
static DEVICE_ATTR_RO(waiting_for_supplier);

/**
 * device_links_force_bind - Prepares device to be force bound
 * @dev: Consumer device.
 *
 * device_bind_driver() force binds a device to a driver without calling any
 * driver probe functions. So the consumer really isn't going to wait for any
 * supplier before it's bound to the driver. We still want the device link
 * states to be sensible when this happens.
 *
 * In preparation for device_bind_driver(), this function goes through each
 * supplier device links and checks if the supplier is bound. If it is, then
 * the device link status is set to CONSUMER_PROBE. Otherwise, the device link
 * is dropped. Links without the DL_FLAG_MANAGED flag set are ignored.
 */
void device_links_force_bind(struct device *dev)
{
        struct device_link *link, *ln;

        device_links_write_lock();

        list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                if (link->status != DL_STATE_AVAILABLE) {
                        device_link_drop_managed(link);
                        continue;
                }
                WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
        }
        dev->links.status = DL_DEV_PROBING;

        device_links_write_unlock();
}

/**
 * device_links_driver_bound - Update device links after probing its driver.
 * @dev: Device to update the links for.
 *
 * The probe has been successful, so update links from this device to any
 * consumers by changing their status to "available".
 *
 * Also change the status of @dev's links to suppliers to "active".
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
void device_links_driver_bound(struct device *dev)
{
        struct device_link *link, *ln;
        LIST_HEAD(sync_list);

        /*
         * If a device binds successfully, it's expected to have created all
         * the device links it needs to or make new device links as it needs
         * them. So, fw_devlink no longer needs to create device links to any
         * of the device's suppliers.
         *
         * Also, if a child firmware node of this bound device is not added as a
         * device by now, assume it is never going to be added. Make this bound
         * device the fallback supplier to the dangling consumers of the child
         * firmware node because this bound device is probably implementing the
         * child firmware node functionality and we don't want the dangling
         * consumers to defer probe indefinitely waiting for a device for the
         * child firmware node.
         */
        if (dev->fwnode && dev->fwnode->dev == dev) {
                struct fwnode_handle *child;
                fwnode_links_purge_suppliers(dev->fwnode);
                mutex_lock(&fwnode_link_lock);
                fwnode_for_each_available_child_node(dev->fwnode, child)
                        __fw_devlink_pickup_dangling_consumers(child,
                                                               dev->fwnode);
                __fw_devlink_link_to_consumers(dev);
                mutex_unlock(&fwnode_link_lock);
        }
        device_remove_file(dev, &dev_attr_waiting_for_supplier);

        device_links_write_lock();

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                /*
                 * Links created during consumer probe may be in the "consumer
                 * probe" state to start with if the supplier is still probing
                 * when they are created and they may become "active" if the
                 * consumer probe returns first.  Skip them here.
                 */
                if (link->status == DL_STATE_CONSUMER_PROBE ||
                    link->status == DL_STATE_ACTIVE)
                        continue;

                WARN_ON(link->status != DL_STATE_DORMANT);
                WRITE_ONCE(link->status, DL_STATE_AVAILABLE);

                if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER)
                        driver_deferred_probe_add(link->consumer);
        }

        if (defer_sync_state_count)
                __device_links_supplier_defer_sync(dev);
        else
                __device_links_queue_sync_state(dev, &sync_list);

        list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
                struct device *supplier;

                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                supplier = link->supplier;
                if (link->flags & DL_FLAG_SYNC_STATE_ONLY) {
                        /*
                         * When DL_FLAG_SYNC_STATE_ONLY is set, it means no
                         * other DL_MANAGED_LINK_FLAGS have been set. So, it's
                         * save to drop the managed link completely.
                         */
                        device_link_drop_managed(link);
                } else if (dev_is_best_effort(dev) &&
                           link->flags & DL_FLAG_INFERRED &&
                           link->status != DL_STATE_CONSUMER_PROBE &&
                           !link->supplier->can_match) {
                        /*
                         * When dev_is_best_effort() is true, we ignore device
                         * links to suppliers that don't have a driver.  If the
                         * consumer device still managed to probe, there's no
                         * point in maintaining a device link in a weird state
                         * (consumer probed before supplier). So delete it.
                         */
                        device_link_drop_managed(link);
                } else {
                        WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
                        WRITE_ONCE(link->status, DL_STATE_ACTIVE);
                }

                /*
                 * This needs to be done even for the deleted
                 * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last
                 * device link that was preventing the supplier from getting a
                 * sync_state() call.
                 */
                if (defer_sync_state_count)
                        __device_links_supplier_defer_sync(supplier);
                else
                        __device_links_queue_sync_state(supplier, &sync_list);
        }

        dev->links.status = DL_DEV_DRIVER_BOUND;

        device_links_write_unlock();

        device_links_flush_sync_list(&sync_list, dev);
}

/**
 * __device_links_no_driver - Update links of a device without a driver.
 * @dev: Device without a drvier.
 *
 * Delete all non-persistent links from this device to any suppliers.
 *
 * Persistent links stay around, but their status is changed to "available",
 * unless they already are in the "supplier unbind in progress" state in which
 * case they need not be updated.
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
static void __device_links_no_driver(struct device *dev)
{
        struct device_link *link, *ln;

        list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
                        device_link_drop_managed(link);
                        continue;
                }

                if (link->status != DL_STATE_CONSUMER_PROBE &&
                    link->status != DL_STATE_ACTIVE)
                        continue;

                if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
                        WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
                } else {
                        WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
                        WRITE_ONCE(link->status, DL_STATE_DORMANT);
                }
        }

        dev->links.status = DL_DEV_NO_DRIVER;
}

/**
 * device_links_no_driver - Update links after failing driver probe.
 * @dev: Device whose driver has just failed to probe.
 *
 * Clean up leftover links to consumers for @dev and invoke
 * %__device_links_no_driver() to update links to suppliers for it as
 * appropriate.
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
void device_links_no_driver(struct device *dev)
{
        struct device_link *link;

        device_links_write_lock();

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                /*
                 * The probe has failed, so if the status of the link is
                 * "consumer probe" or "active", it must have been added by
                 * a probing consumer while this device was still probing.
                 * Change its state to "dormant", as it represents a valid
                 * relationship, but it is not functionally meaningful.
                 */
                if (link->status == DL_STATE_CONSUMER_PROBE ||
                    link->status == DL_STATE_ACTIVE)
                        WRITE_ONCE(link->status, DL_STATE_DORMANT);
        }

        __device_links_no_driver(dev);

        device_links_write_unlock();
}

/**
 * device_links_driver_cleanup - Update links after driver removal.
 * @dev: Device whose driver has just gone away.
 *
 * Update links to consumers for @dev by changing their status to "dormant" and
 * invoke %__device_links_no_driver() to update links to suppliers for it as
 * appropriate.
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
void device_links_driver_cleanup(struct device *dev)
{
        struct device_link *link, *ln;

        device_links_write_lock();

        list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
                WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);

                /*
                 * autoremove the links between this @dev and its consumer
                 * devices that are not active, i.e. where the link state
                 * has moved to DL_STATE_SUPPLIER_UNBIND.
                 */
                if (link->status == DL_STATE_SUPPLIER_UNBIND &&
                    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                        device_link_drop_managed(link);

                WRITE_ONCE(link->status, DL_STATE_DORMANT);
        }

        list_del_init(&dev->links.defer_sync);
        __device_links_no_driver(dev);

        device_links_write_unlock();
}

/**
 * device_links_busy - Check if there are any busy links to consumers.
 * @dev: Device to check.
 *
 * Check each consumer of the device and return 'true' if its link's status
 * is one of "consumer probe" or "active" (meaning that the given consumer is
 * probing right now or its driver is present).  Otherwise, change the link
 * state to "supplier unbind" to prevent the consumer from being probed
 * successfully going forward.
 *
 * Return 'false' if there are no probing or active consumers.
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
bool device_links_busy(struct device *dev)
{
        struct device_link *link;
        bool ret = false;

        device_links_write_lock();

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;

                if (link->status == DL_STATE_CONSUMER_PROBE
                    || link->status == DL_STATE_ACTIVE) {
                        ret = true;
                        break;
                }
                WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
        }

        dev->links.status = DL_DEV_UNBINDING;

        device_links_write_unlock();
        return ret;
}

/**
 * device_links_unbind_consumers - Force unbind consumers of the given device.
 * @dev: Device to unbind the consumers of.
 *
 * Walk the list of links to consumers for @dev and if any of them is in the
 * "consumer probe" state, wait for all device probes in progress to complete
 * and start over.
 *
 * If that's not the case, change the status of the link to "supplier unbind"
 * and check if the link was in the "active" state.  If so, force the consumer
 * driver to unbind and start over (the consumer will not re-probe as we have
 * changed the state of the link already).
 *
 * Links without the DL_FLAG_MANAGED flag set are ignored.
 */
void device_links_unbind_consumers(struct device *dev)
{
        struct device_link *link;

 start:
        device_links_write_lock();

        list_for_each_entry(link, &dev->links.consumers, s_node) {
                enum device_link_state status;

                if (!(link->flags & DL_FLAG_MANAGED) ||
                    link->flags & DL_FLAG_SYNC_STATE_ONLY)
                        continue;

                status = link->status;
                if (status == DL_STATE_CONSUMER_PROBE) {
                        device_links_write_unlock();

                        wait_for_device_probe();
                        goto start;
                }
                WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
                if (status == DL_STATE_ACTIVE) {
                        struct device *consumer = link->consumer;

                        get_device(consumer);

                        device_links_write_unlock();

                        device_release_driver_internal(consumer, NULL,
                                                       consumer->parent);
                        put_device(consumer);
                        goto start;
                }
        }

        device_links_write_unlock();
}

/**
 * device_links_purge - Delete existing links to other devices.
 * @dev: Target device.
 */
static void device_links_purge(struct device *dev)
{
        struct device_link *link, *ln;

        if (dev->class == &devlink_class)
                return;

        /*
         * Delete all of the remaining links from this device to any other
         * devices (either consumers or suppliers).
         */
        device_links_write_lock();

        list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
                WARN_ON(link->status == DL_STATE_ACTIVE);
                __device_link_del(&link->kref);
        }

        list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) {
                WARN_ON(link->status != DL_STATE_DORMANT &&
                        link->status != DL_STATE_NONE);
                __device_link_del(&link->kref);
        }

        device_links_write_unlock();
}

#define FW_DEVLINK_FLAGS_PERMISSIVE        (DL_FLAG_INFERRED | \
                                         DL_FLAG_SYNC_STATE_ONLY)
#define FW_DEVLINK_FLAGS_ON                (DL_FLAG_INFERRED | \
                                         DL_FLAG_AUTOPROBE_CONSUMER)
#define FW_DEVLINK_FLAGS_RPM                (FW_DEVLINK_FLAGS_ON | \
                                         DL_FLAG_PM_RUNTIME)

static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_RPM;
static int __init fw_devlink_setup(char *arg)
{
        if (!arg)
                return -EINVAL;

        if (strcmp(arg, "off") == 0) {
                fw_devlink_flags = 0;
        } else if (strcmp(arg, "permissive") == 0) {
                fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
        } else if (strcmp(arg, "on") == 0) {
                fw_devlink_flags = FW_DEVLINK_FLAGS_ON;
        } else if (strcmp(arg, "rpm") == 0) {
                fw_devlink_flags = FW_DEVLINK_FLAGS_RPM;
        }
        return 0;
}
early_param("fw_devlink", fw_devlink_setup);

static bool fw_devlink_strict;
static int __init fw_devlink_strict_setup(char *arg)
{
        return kstrtobool(arg, &fw_devlink_strict);
}
early_param("fw_devlink.strict", fw_devlink_strict_setup);

#define FW_DEVLINK_SYNC_STATE_STRICT        0
#define FW_DEVLINK_SYNC_STATE_TIMEOUT        1

#ifndef CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT
static int fw_devlink_sync_state;
#else
static int fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT;
#endif

static int __init fw_devlink_sync_state_setup(char *arg)
{
        if (!arg)
                return -EINVAL;

        if (strcmp(arg, "strict") == 0) {
                fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_STRICT;
                return 0;
        } else if (strcmp(arg, "timeout") == 0) {
                fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT;
                return 0;
        }
        return -EINVAL;
}
early_param("fw_devlink.sync_state", fw_devlink_sync_state_setup);

static inline u32 fw_devlink_get_flags(u8 fwlink_flags)
{
        if (fwlink_flags & FWLINK_FLAG_CYCLE)
                return FW_DEVLINK_FLAGS_PERMISSIVE | DL_FLAG_CYCLE;

        return fw_devlink_flags;
}

static bool fw_devlink_is_permissive(void)
{
        return fw_devlink_flags == FW_DEVLINK_FLAGS_PERMISSIVE;
}

bool fw_devlink_is_strict(void)
{
        return fw_devlink_strict && !fw_devlink_is_permissive();
}

static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode)
{
        if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED)
                return;

        fwnode_call_int_op(fwnode, add_links);
        fwnode->flags |= FWNODE_FLAG_LINKS_ADDED;
}

static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode)
{
        struct fwnode_handle *child = NULL;

        fw_devlink_parse_fwnode(fwnode);

        while ((child = fwnode_get_next_available_child_node(fwnode, child)))
                fw_devlink_parse_fwtree(child);
}

static void fw_devlink_relax_link(struct device_link *link)
{
        if (!(link->flags & DL_FLAG_INFERRED))
                return;

        if (device_link_flag_is_sync_state_only(link->flags))
                return;

        pm_runtime_drop_link(link);
        link->flags = DL_FLAG_MANAGED | FW_DEVLINK_FLAGS_PERMISSIVE;
        dev_dbg(link->consumer, "Relaxing link with %s\n",
                dev_name(link->supplier));
}

static int fw_devlink_no_driver(struct device *dev, void *data)
{
        struct device_link *link = to_devlink(dev);

        if (!link->supplier->can_match)
                fw_devlink_relax_link(link);

        return 0;
}

void fw_devlink_drivers_done(void)
{
        fw_devlink_drv_reg_done = true;
        device_links_write_lock();
        class_for_each_device(&devlink_class, NULL, NULL,
                              fw_devlink_no_driver);
        device_links_write_unlock();
}

static int fw_devlink_dev_sync_state(struct device *dev, void *data)
{
        struct device_link *link = to_devlink(dev);
        struct device *sup = link->supplier;

        if (!(link->flags & DL_FLAG_MANAGED) ||
            link->status == DL_STATE_ACTIVE || sup->state_synced ||
            !dev_has_sync_state(sup))
                return 0;

        if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) {
                dev_warn(sup, "sync_state() pending due to %s\n",
                         dev_name(link->consumer));
                return 0;
        }

        if (!list_empty(&sup->links.defer_sync))
                return 0;

        dev_warn(sup, "Timed out. Forcing sync_state()\n");
        sup->state_synced = true;
        get_device(sup);
        list_add_tail(&sup->links.defer_sync, data);

        return 0;
}

void fw_devlink_probing_done(void)
{
        LIST_HEAD(sync_list);

        device_links_write_lock();
        class_for_each_device(&devlink_class, NULL, &sync_list,
                              fw_devlink_dev_sync_state);
        device_links_write_unlock();
        device_links_flush_sync_list(&sync_list, NULL);
}

/**
 * wait_for_init_devices_probe - Try to probe any device needed for init
 *
 * Some devices might need to be probed and bound successfully before the kernel
 * boot sequence can finish and move on to init/userspace. For example, a
 * network interface might need to be bound to be able to mount a NFS rootfs.
 *
 * With fw_devlink=on by default, some of these devices might be blocked from
 * probing because they are waiting on a optional supplier that doesn't have a
 * driver. While fw_devlink will eventually identify such devices and unblock
 * the probing automatically, it might be too late by the time it unblocks the
 * probing of devices. For example, the IP4 autoconfig might timeout before
 * fw_devlink unblocks probing of the network interface.
 *
 * This function is available to temporarily try and probe all devices that have
 * a driver even if some of their suppliers haven't been added or don't have
 * drivers.
 *
 * The drivers can then decide which of the suppliers are optional vs mandatory
 * and probe the device if possible. By the time this function returns, all such
 * "best effort" probes are guaranteed to be completed. If a device successfully
 * probes in this mode, we delete all fw_devlink discovered dependencies of that
 * device where the supplier hasn't yet probed successfully because they have to
 * be optional dependencies.
 *
 * Any devices that didn't successfully probe go back to being treated as if
 * this function was never called.
 *
 * This also means that some devices that aren't needed for init and could have
 * waited for their optional supplier to probe (when the supplier's module is
 * loaded later on) would end up probing prematurely with limited functionality.
 * So call this function only when boot would fail without it.
 */
void __init wait_for_init_devices_probe(void)
{
        if (!fw_devlink_flags || fw_devlink_is_permissive())
                return;

        /*
         * Wait for all ongoing probes to finish so that the "best effort" is
         * only applied to devices that can't probe otherwise.
         */
        wait_for_device_probe();

        pr_info("Trying to probe devices needed for running init ...\n");
        fw_devlink_best_effort = true;
        driver_deferred_probe_trigger();

        /*
         * Wait for all "best effort" probes to finish before going back to
         * normal enforcement.
         */
        wait_for_device_probe();
        fw_devlink_best_effort = false;
}

static void fw_devlink_unblock_consumers(struct device *dev)
{
        struct device_link *link;

        if (!fw_devlink_flags || fw_devlink_is_permissive())
                return;

        device_links_write_lock();
        list_for_each_entry(link, &dev->links.consumers, s_node)
                fw_devlink_relax_link(link);
        device_links_write_unlock();
}

#define get_dev_from_fwnode(fwnode)        get_device((fwnode)->dev)

static bool fwnode_init_without_drv(struct fwnode_handle *fwnode)
{
        struct device *dev;
        bool ret;

        if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED))
                return false;

        dev = get_dev_from_fwnode(fwnode);
        ret = !dev || dev->links.status == DL_DEV_NO_DRIVER;
        put_device(dev);

        return ret;
}

static bool fwnode_ancestor_init_without_drv(struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent;

        fwnode_for_each_parent_node(fwnode, parent) {
                if (fwnode_init_without_drv(parent)) {
                        fwnode_handle_put(parent);
                        return true;
                }
        }

        return false;
}

/**
 * fwnode_is_ancestor_of - Test if @ancestor is ancestor of @child
 * @ancestor: Firmware which is tested for being an ancestor
 * @child: Firmware which is tested for being the child
 *
 * A node is considered an ancestor of itself too.
 *
 * Return: true if @ancestor is an ancestor of @child. Otherwise, returns false.
 */
static bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor,
                                  const struct fwnode_handle *child)
{
        struct fwnode_handle *parent;

        if (IS_ERR_OR_NULL(ancestor))
                return false;

        if (child == ancestor)
                return true;

        fwnode_for_each_parent_node(child, parent) {
                if (parent == ancestor) {
                        fwnode_handle_put(parent);
                        return true;
                }
        }
        return false;
}

/**
 * fwnode_get_next_parent_dev - Find device of closest ancestor fwnode
 * @fwnode: firmware node
 *
 * Given a firmware node (@fwnode), this function finds its closest ancestor
 * firmware node that has a corresponding struct device and returns that struct
 * device.
 *
 * The caller is responsible for calling put_device() on the returned device
 * pointer.
 *
 * Return: a pointer to the device of the @fwnode's closest ancestor.
 */
static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent;
        struct device *dev;

        fwnode_for_each_parent_node(fwnode, parent) {
                dev = get_dev_from_fwnode(parent);
                if (dev) {
                        fwnode_handle_put(parent);
                        return dev;
                }
        }
        return NULL;
}

/**
 * __fw_devlink_relax_cycles - Relax and mark dependency cycles.
 * @con: Potential consumer device.
 * @sup_handle: Potential supplier's fwnode.
 *
 * Needs to be called with fwnode_lock and device link lock held.
 *
 * Check if @sup_handle or any of its ancestors or suppliers direct/indirectly
 * depend on @con. This function can detect multiple cyles between @sup_handle
 * and @con. When such dependency cycles are found, convert all device links
 * created solely by fw_devlink into SYNC_STATE_ONLY device links. Also, mark
 * all fwnode links in the cycle with FWLINK_FLAG_CYCLE so that when they are
 * converted into a device link in the future, they are created as
 * SYNC_STATE_ONLY device links. This is the equivalent of doing
 * fw_devlink=permissive just between the devices in the cycle. We need to do
 * this because, at this point, fw_devlink can't tell which of these
 * dependencies is not a real dependency.
 *
 * Return true if one or more cycles were found. Otherwise, return false.
 */
static bool __fw_devlink_relax_cycles(struct device *con,
                                 struct fwnode_handle *sup_handle)
{
        struct device *sup_dev = NULL, *par_dev = NULL;
        struct fwnode_link *link;
        struct device_link *dev_link;
        bool ret = false;

        if (!sup_handle)
                return false;

        /*
         * We aren't trying to find all cycles. Just a cycle between con and
         * sup_handle.
         */
        if (sup_handle->flags & FWNODE_FLAG_VISITED)
                return false;

        sup_handle->flags |= FWNODE_FLAG_VISITED;

        sup_dev = get_dev_from_fwnode(sup_handle);

        /* Termination condition. */
        if (sup_dev == con) {
                pr_debug("----- cycle: start -----\n");
                ret = true;
                goto out;
        }

        /*
         * If sup_dev is bound to a driver and @con hasn't started binding to a
         * driver, sup_dev can't be a consumer of @con. So, no need to check
         * further.
         */
        if (sup_dev && sup_dev->links.status ==  DL_DEV_DRIVER_BOUND &&
            con->links.status == DL_DEV_NO_DRIVER) {
                ret = false;
                goto out;
        }

        list_for_each_entry(link, &sup_handle->suppliers, c_hook) {
                if (link->flags & FWLINK_FLAG_IGNORE)
                        continue;

                if (__fw_devlink_relax_cycles(con, link->supplier)) {
                        __fwnode_link_cycle(link);
                        ret = true;
                }
        }

        /*
         * Give priority to device parent over fwnode parent to account for any
         * quirks in how fwnodes are converted to devices.
         */
        if (sup_dev)
                par_dev = get_device(sup_dev->parent);
        else
                par_dev = fwnode_get_next_parent_dev(sup_handle);

        if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
                pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
                         par_dev->fwnode);
                ret = true;
        }

        if (!sup_dev)
                goto out;

        list_for_each_entry(dev_link, &sup_dev->links.suppliers, c_node) {
                /*
                 * Ignore a SYNC_STATE_ONLY flag only if it wasn't marked as
                 * such due to a cycle.
                 */
                if (device_link_flag_is_sync_state_only(dev_link->flags) &&
                    !(dev_link->flags & DL_FLAG_CYCLE))
                        continue;

                if (__fw_devlink_relax_cycles(con,
                                              dev_link->supplier->fwnode)) {
                        pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
                                 dev_link->supplier->fwnode);
                        fw_devlink_relax_link(dev_link);
                        dev_link->flags |= DL_FLAG_CYCLE;
                        ret = true;
                }
        }

out:
        sup_handle->flags &= ~FWNODE_FLAG_VISITED;
        put_device(sup_dev);
        put_device(par_dev);
        return ret;
}

/**
 * fw_devlink_create_devlink - Create a device link from a consumer to fwnode
 * @con: consumer device for the device link
 * @sup_handle: fwnode handle of supplier
 * @link: fwnode link that's being converted to a device link
 *
 * This function will try to create a device link between the consumer device
 * @con and the supplier device represented by @sup_handle.
 *
 * The supplier has to be provided as a fwnode because incorrect cycles in
 * fwnode links can sometimes cause the supplier device to never be created.
 * This function detects such cases and returns an error if it cannot create a
 * device link from the consumer to a missing supplier.
 *
 * Returns,
 * 0 on successfully creating a device link
 * -EINVAL if the device link cannot be created as expected
 * -EAGAIN if the device link cannot be created right now, but it may be
 *  possible to do that in the future
 */
static int fw_devlink_create_devlink(struct device *con,
                                     struct fwnode_handle *sup_handle,
                                     struct fwnode_link *link)
{
        struct device *sup_dev;
        int ret = 0;
        u32 flags;

        if (link->flags & FWLINK_FLAG_IGNORE)
                return 0;

        if (con->fwnode == link->consumer)
                flags = fw_devlink_get_flags(link->flags);
        else
                flags = FW_DEVLINK_FLAGS_PERMISSIVE;

        /*
         * In some cases, a device P might also be a supplier to its child node
         * C. However, this would defer the probe of C until the probe of P
         * completes successfully. This is perfectly fine in the device driver
         * model. device_add() doesn't guarantee probe completion of the device
         * by the time it returns.
         *
         * However, there are a few drivers that assume C will finish probing
         * as soon as it's added and before P finishes probing. So, we provide
         * a flag to let fw_devlink know not to delay the probe of C until the
         * probe of P completes successfully.
         *
         * When such a flag is set, we can't create device links where P is the
         * supplier of C as that would delay the probe of C.
         */
        if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD &&
            fwnode_is_ancestor_of(sup_handle, con->fwnode))
                return -EINVAL;

        /*
         * SYNC_STATE_ONLY device links don't block probing and supports cycles.
         * So, one might expect that cycle detection isn't necessary for them.
         * However, if the device link was marked as SYNC_STATE_ONLY because
         * it's part of a cycle, then we still need to do cycle detection. This
         * is because the consumer and supplier might be part of multiple cycles
         * and we need to detect all those cycles.
         */
        if (!device_link_flag_is_sync_state_only(flags) ||
            flags & DL_FLAG_CYCLE) {
                device_links_write_lock();
                if (__fw_devlink_relax_cycles(con, sup_handle)) {
                        __fwnode_link_cycle(link);
                        flags = fw_devlink_get_flags(link->flags);
                        pr_debug("----- cycle: end -----\n");
                        dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
                                 sup_handle);
                }
                device_links_write_unlock();
        }

        if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
                sup_dev = fwnode_get_next_parent_dev(sup_handle);
        else
                sup_dev = get_dev_from_fwnode(sup_handle);

        if (sup_dev) {
                /*
                 * If it's one of those drivers that don't actually bind to
                 * their device using driver core, then don't wait on this
                 * supplier device indefinitely.
                 */
                if (sup_dev->links.status == DL_DEV_NO_DRIVER &&
                    sup_handle->flags & FWNODE_FLAG_INITIALIZED) {
                        dev_dbg(con,
                                "Not linking %pfwf - dev might never probe\n",
                                sup_handle);
                        ret = -EINVAL;
                        goto out;
                }

                if (con != sup_dev && !device_link_add(con, sup_dev, flags)) {
                        dev_err(con, "Failed to create device link (0x%x) with %s\n",
                                flags, dev_name(sup_dev));
                        ret = -EINVAL;
                }

                goto out;
        }

        /*
         * Supplier or supplier's ancestor already initialized without a struct
         * device or being probed by a driver.
         */
        if (fwnode_init_without_drv(sup_handle) ||
            fwnode_ancestor_init_without_drv(sup_handle)) {
                dev_dbg(con, "Not linking %pfwf - might never become dev\n",
                        sup_handle);
                return -EINVAL;
        }

        ret = -EAGAIN;
out:
        put_device(sup_dev);
        return ret;
}

/**
 * __fw_devlink_link_to_consumers - Create device links to consumers of a device
 * @dev: Device that needs to be linked to its consumers
 *
 * This function looks at all the consumer fwnodes of @dev and creates device
 * links between the consumer device and @dev (supplier).
 *
 * If the consumer device has not been added yet, then this function creates a
 * SYNC_STATE_ONLY link between @dev (supplier) and the closest ancestor device
 * of the consumer fwnode. This is necessary to make sure @dev doesn't get a
 * sync_state() callback before the real consumer device gets to be added and
 * then probed.
 *
 * Once device links are created from the real consumer to @dev (supplier), the
 * fwnode links are deleted.
 */
static void __fw_devlink_link_to_consumers(struct device *dev)
{
        struct fwnode_handle *fwnode = dev->fwnode;
        struct fwnode_link *link, *tmp;

        list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) {
                struct device *con_dev;
                bool own_link = true;
                int ret;

                con_dev = get_dev_from_fwnode(link->consumer);
                /*
                 * If consumer device is not available yet, make a "proxy"
                 * SYNC_STATE_ONLY link from the consumer's parent device to
                 * the supplier device. This is necessary to make sure the
                 * supplier doesn't get a sync_state() callback before the real
                 * consumer can create a device link to the supplier.
                 *
                 * This proxy link step is needed to handle the case where the
                 * consumer's parent device is added before the supplier.
                 */
                if (!con_dev) {
                        con_dev = fwnode_get_next_parent_dev(link->consumer);
                        /*
                         * However, if the consumer's parent device is also the
                         * parent of the supplier, don't create a
                         * consumer-supplier link from the parent to its child
                         * device. Such a dependency is impossible.
                         */
                        if (con_dev &&
                            fwnode_is_ancestor_of(con_dev->fwnode, fwnode)) {
                                put_device(con_dev);
                                con_dev = NULL;
                        } else {
                                own_link = false;
                        }
                }

                if (!con_dev)
                        continue;

                ret = fw_devlink_create_devlink(con_dev, fwnode, link);
                put_device(con_dev);
                if (!own_link || ret == -EAGAIN)
                        continue;

                __fwnode_link_del(link);
        }
}

/**
 * __fw_devlink_link_to_suppliers - Create device links to suppliers of a device
 * @dev: The consumer device that needs to be linked to its suppliers
 * @fwnode: Root of the fwnode tree that is used to create device links
 *
 * This function looks at all the supplier fwnodes of fwnode tree rooted at
 * @fwnode and creates device links between @dev (consumer) and all the
 * supplier devices of the entire fwnode tree at @fwnode.
 *
 * The function creates normal (non-SYNC_STATE_ONLY) device links between @dev
 * and the real suppliers of @dev. Once these device links are created, the
 * fwnode links are deleted.
 *
 * In addition, it also looks at all the suppliers of the entire fwnode tree
 * because some of the child devices of @dev that have not been added yet
 * (because @dev hasn't probed) might already have their suppliers added to
 * driver core. So, this function creates SYNC_STATE_ONLY device links between
 * @dev (consumer) and these suppliers to make sure they don't execute their
 * sync_state() callbacks before these child devices have a chance to create
 * their device links. The fwnode links that correspond to the child devices
 * aren't delete because they are needed later to create the device links
 * between the real consumer and supplier devices.
 */
static void __fw_devlink_link_to_suppliers(struct device *dev,
                                           struct fwnode_handle *fwnode)
{
        bool own_link = (dev->fwnode == fwnode);
        struct fwnode_link *link, *tmp;
        struct fwnode_handle *child = NULL;

        list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
                int ret;
                struct fwnode_handle *sup = link->supplier;

                ret = fw_devlink_create_devlink(dev, sup, link);
                if (!own_link || ret == -EAGAIN)
                        continue;

                __fwnode_link_del(link);
        }

        /*
         * Make "proxy" SYNC_STATE_ONLY device links to represent the needs of
         * all the descendants. This proxy link step is needed to handle the
         * case where the supplier is added before the consumer's parent device
         * (@dev).
         */
        while ((child = fwnode_get_next_available_child_node(fwnode, child)))
                __fw_devlink_link_to_suppliers(dev, child);
}

static void fw_devlink_link_device(struct device *dev)
{
        struct fwnode_handle *fwnode = dev->fwnode;

        if (!fw_devlink_flags)
                return;

        fw_devlink_parse_fwtree(fwnode);

        mutex_lock(&fwnode_link_lock);
        __fw_devlink_link_to_consumers(dev);
        __fw_devlink_link_to_suppliers(dev, fwnode);
        mutex_unlock(&fwnode_link_lock);
}

/* Device links support end. */

int (*platform_notify)(struct device *dev) = NULL;
int (*platform_notify_remove)(struct device *dev) = NULL;
static struct kobject *dev_kobj;

/* /sys/dev/char */
static struct kobject *sysfs_dev_char_kobj;

/* /sys/dev/block */
static struct kobject *sysfs_dev_block_kobj;

static DEFINE_MUTEX(device_hotplug_lock);

void lock_device_hotplug(void)
{
        mutex_lock(&device_hotplug_lock);
}

void unlock_device_hotplug(void)
{
        mutex_unlock(&device_hotplug_lock);
}

int lock_device_hotplug_sysfs(void)
{
        if (mutex_trylock(&device_hotplug_lock))
                return 0;

        /* Avoid busy looping (5 ms of sleep should do). */
        msleep(5);
        return restart_syscall();
}

#ifdef CONFIG_BLOCK
static inline int device_is_not_partition(struct device *dev)
{
        return !(dev->type == &part_type);
}
#else
static inline int device_is_not_partition(struct device *dev)
{
        return 1;
}
#endif

static void device_platform_notify(struct device *dev)
{
        acpi_device_notify(dev);

        software_node_notify(dev);

        if (platform_notify)
                platform_notify(dev);
}

static void device_platform_notify_remove(struct device *dev)
{
        if (platform_notify_remove)
                platform_notify_remove(dev);

        software_node_notify_remove(dev);

        acpi_device_notify_remove(dev);
}

/**
 * dev_driver_string - Return a device's driver name, if at all possible
 * @dev: struct device to get the name of
 *
 * Will return the device's driver's name if it is bound to a device.  If
 * the device is not bound to a driver, it will return the name of the bus
 * it is attached to.  If it is not attached to a bus either, an empty
 * string will be returned.
 */
const char *dev_driver_string(const struct device *dev)
{
        struct device_driver *drv;

        /* dev->driver can change to NULL underneath us because of unbinding,
         * so be careful about accessing it.  dev->bus and dev->class should
         * never change once they are set, so they don't need special care.
         */
        drv = READ_ONCE(dev->driver);
        return drv ? drv->name : dev_bus_name(dev);
}
EXPORT_SYMBOL(dev_driver_string);

#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)

static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
                             char *buf)
{
        struct device_attribute *dev_attr = to_dev_attr(attr);
        struct device *dev = kobj_to_dev(kobj);
        ssize_t ret = -EIO;

        if (dev_attr->show)
                ret = dev_attr->show(dev, dev_attr, buf);
        if (ret >= (ssize_t)PAGE_SIZE) {
                printk("dev_attr_show: %pS returned bad count\n",
                                dev_attr->show);
        }
        return ret;
}

static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
                              const char *buf, size_t count)
{
        struct device_attribute *dev_attr = to_dev_attr(attr);
        struct device *dev = kobj_to_dev(kobj);
        ssize_t ret = -EIO;

        if (dev_attr->store)
                ret = dev_attr->store(dev, dev_attr, buf, count);
        return ret;
}

static const struct sysfs_ops dev_sysfs_ops = {
        .show        = dev_attr_show,
        .store        = dev_attr_store,
};

#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)

ssize_t device_store_ulong(struct device *dev,
                           struct device_attribute *attr,
                           const char *buf, size_t size)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);
        int ret;
        unsigned long new;

        ret = kstrtoul(buf, 0, &new);
        if (ret)
                return ret;
        *(unsigned long *)(ea->var) = new;
        /* Always return full write size even if we didn't consume all */
        return size;
}
EXPORT_SYMBOL_GPL(device_store_ulong);

ssize_t device_show_ulong(struct device *dev,
                          struct device_attribute *attr,
                          char *buf)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);
        return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_ulong);

ssize_t device_store_int(struct device *dev,
                         struct device_attribute *attr,
                         const char *buf, size_t size)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);
        int ret;
        long new;

        ret = kstrtol(buf, 0, &new);
        if (ret)
                return ret;

        if (new > INT_MAX || new < INT_MIN)
                return -EINVAL;
        *(int *)(ea->var) = new;
        /* Always return full write size even if we didn't consume all */
        return size;
}
EXPORT_SYMBOL_GPL(device_store_int);

ssize_t device_show_int(struct device *dev,
                        struct device_attribute *attr,
                        char *buf)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);

        return sysfs_emit(buf, "%d\n", *(int *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_int);

ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t size)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);

        if (kstrtobool(buf, ea->var) < 0)
                return -EINVAL;

        return size;
}
EXPORT_SYMBOL_GPL(device_store_bool);

ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct dev_ext_attribute *ea = to_ext_attr(attr);

        return sysfs_emit(buf, "%d\n", *(bool *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_bool);

/**
 * device_release - free device structure.
 * @kobj: device's kobject.
 *
 * This is called once the reference count for the object
 * reaches 0. We forward the call to the device's release
 * method, which should handle actually freeing the structure.
 */
static void device_release(struct kobject *kobj)
{
        struct device *dev = kobj_to_dev(kobj);
        struct device_private *p = dev->p;

        /*
         * Some platform devices are driven without driver attached
         * and managed resources may have been acquired.  Make sure
         * all resources are released.
         *
         * Drivers still can add resources into device after device
         * is deleted but alive, so release devres here to avoid
         * possible memory leak.
         */
        devres_release_all(dev);

        kfree(dev->dma_range_map);

        if (dev->release)
                dev->release(dev);
        else if (dev->type && dev->type->release)
                dev->type->release(dev);
        else if (dev->class && dev->class->dev_release)
                dev->class->dev_release(dev);
        else
                WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
                        dev_name(dev));
        kfree(p);
}

static const void *device_namespace(const struct kobject *kobj)
{
        const struct device *dev = kobj_to_dev(kobj);
        const void *ns = NULL;

        if (dev->class && dev->class->ns_type)
                ns = dev->class->namespace(dev);

        return ns;
}

static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
{
        const struct device *dev = kobj_to_dev(kobj);

        if (dev->class && dev->class->get_ownership)
                dev->class->get_ownership(dev, uid, gid);
}

static const struct kobj_type device_ktype = {
        .release        = device_release,
        .sysfs_ops        = &dev_sysfs_ops,
        .namespace        = device_namespace,
        .get_ownership        = device_get_ownership,
};


static int dev_uevent_filter(const struct kobject *kobj)
{
        const struct kobj_type *ktype = get_ktype(kobj);

        if (ktype == &device_ktype) {
                const struct device *dev = kobj_to_dev(kobj);
                if (dev->bus)
                        return 1;
                if (dev->class)
                        return 1;
        }
        return 0;
}

static const char *dev_uevent_name(const struct kobject *kobj)
{
        const struct device *dev = kobj_to_dev(kobj);

        if (dev->bus)
                return dev->bus->name;
        if (dev->class)
                return dev->class->name;
        return NULL;
}

static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
        const struct device *dev = kobj_to_dev(kobj);
        int retval = 0;

        /* add device node properties if present */
        if (MAJOR(dev->devt)) {
                const char *tmp;
                const char *name;
                umode_t mode = 0;
                kuid_t uid = GLOBAL_ROOT_UID;
                kgid_t gid = GLOBAL_ROOT_GID;

                add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
                add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
                name = device_get_devnode(dev, &mode, &uid, &gid, &tmp);
                if (name) {
                        add_uevent_var(env, "DEVNAME=%s", name);
                        if (mode)
                                add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
                        if (!uid_eq(uid, GLOBAL_ROOT_UID))
                                add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid));
                        if (!gid_eq(gid, GLOBAL_ROOT_GID))
                                add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid));
                        kfree(tmp);
                }
        }

        if (dev->type && dev->type->name)
                add_uevent_var(env, "DEVTYPE=%s", dev->type->name);

        if (dev->driver)
                add_uevent_var(env, "DRIVER=%s", dev->driver->name);

        /* Add common DT information about the device */
        of_device_uevent(dev, env);

        /* have the bus specific function add its stuff */
        if (dev->bus && dev->bus->uevent) {
                retval = dev->bus->uevent(dev, env);
                if (retval)
                        pr_debug("device: '%s': %s: bus uevent() returned %d\n",
                                 dev_name(dev), __func__, retval);
        }

        /* have the class specific function add its stuff */
        if (dev->class && dev->class->dev_uevent) {
                retval = dev->class->dev_uevent(dev, env);
                if (retval)
                        pr_debug("device: '%s': %s: class uevent() "
                                 "returned %d\n", dev_name(dev),
                                 __func__, retval);
        }

        /* have the device type specific function add its stuff */
        if (dev->type && dev->type->uevent) {
                retval = dev->type->uevent(dev, env);
                if (retval)
                        pr_debug("device: '%s': %s: dev_type uevent() "
                                 "returned %d\n", dev_name(dev),
                                 __func__, retval);
        }

        return retval;
}

static const struct kset_uevent_ops device_uevent_ops = {
        .filter =        dev_uevent_filter,
        .name =                dev_uevent_name,
        .uevent =        dev_uevent,
};

static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct kobject *top_kobj;
        struct kset *kset;
        struct kobj_uevent_env *env = NULL;
        int i;
        int len = 0;
        int retval;

        /* search the kset, the device belongs to */
        top_kobj = &dev->kobj;
        while (!top_kobj->kset && top_kobj->parent)
                top_kobj = top_kobj->parent;
        if (!top_kobj->kset)
                goto out;

        kset = top_kobj->kset;
        if (!kset->uevent_ops || !kset->uevent_ops->uevent)
                goto out;

        /* respect filter */
        if (kset->uevent_ops && kset->uevent_ops->filter)
                if (!kset->uevent_ops->filter(&dev->kobj))
                        goto out;

        env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
        if (!env)
                return -ENOMEM;

        /* let the kset specific function add its keys */
        retval = kset->uevent_ops->uevent(&dev->kobj, env);
        if (retval)
                goto out;

        /* copy keys to file */
        for (i = 0; i < env->envp_idx; i++)
                len += sysfs_emit_at(buf, len, "%s\n", env->envp[i]);
out:
        kfree(env);
        return len;
}

static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        int rc;

        rc = kobject_synth_uevent(&dev->kobj, buf, count);

        if (rc) {
                dev_err(dev, "uevent: failed to send synthetic uevent: %d\n", rc);
                return rc;
        }

        return count;
}
static DEVICE_ATTR_RW(uevent);

static ssize_t online_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        bool val;

        device_lock(dev);
        val = !dev->offline;
        device_unlock(dev);
        return sysfs_emit(buf, "%u\n", val);
}

static ssize_t online_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        bool val;
        int ret;

        ret = kstrtobool(buf, &val);
        if (ret < 0)
                return ret;

        ret = lock_device_hotplug_sysfs();
        if (ret)
                return ret;

        ret = val ? device_online(dev) : device_offline(dev);
        unlock_device_hotplug();
        return ret < 0 ? ret : count;
}
static DEVICE_ATTR_RW(online);

static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
{
        const char *loc;

        switch (dev->removable) {
        case DEVICE_REMOVABLE:
                loc = "removable";
                break;
        case DEVICE_FIXED:
                loc = "fixed";
                break;
        default:
                loc = "unknown";
        }
        return sysfs_emit(buf, "%s\n", loc);
}
static DEVICE_ATTR_RO(removable);

int device_add_groups(struct device *dev, const struct attribute_group **groups)
{
        return sysfs_create_groups(&dev->kobj, groups);
}
EXPORT_SYMBOL_GPL(device_add_groups);

void device_remove_groups(struct device *dev,
                          const struct attribute_group **groups)
{
        sysfs_remove_groups(&dev->kobj, groups);
}
EXPORT_SYMBOL_GPL(device_remove_groups);

union device_attr_group_devres {
        const struct attribute_group *group;
        const struct attribute_group **groups;
};

static void devm_attr_group_remove(struct device *dev, void *res)
{
        union device_attr_group_devres *devres = res;
        const struct attribute_group *group = devres->group;

        dev_dbg(dev, "%s: removing group %p\n", __func__, group);
        sysfs_remove_group(&dev->kobj, group);
}

static void devm_attr_groups_remove(struct device *dev, void *res)
{
        union device_attr_group_devres *devres = res;
        const struct attribute_group **groups = devres->groups;

        dev_dbg(dev, "%s: removing groups %p\n", __func__, groups);
        sysfs_remove_groups(&dev->kobj, groups);
}

/**
 * devm_device_add_group - given a device, create a managed attribute group
 * @dev:        The device to create the group for
 * @grp:        The attribute group to create
 *
 * This function creates a group for the first time.  It will explicitly
 * warn and error if any of the attribute files being created already exist.
 *
 * Returns 0 on success or error code on failure.
 */
int devm_device_add_group(struct device *dev, const struct attribute_group *grp)
{
        union device_attr_group_devres *devres;
        int error;

        devres = devres_alloc(devm_attr_group_remove,
                              sizeof(*devres), GFP_KERNEL);
        if (!devres)
                return -ENOMEM;

        error = sysfs_create_group(&dev->kobj, grp);
        if (error) {
                devres_free(devres);
                return error;
        }

        devres->group = grp;
        devres_add(dev, devres);
        return 0;
}
EXPORT_SYMBOL_GPL(devm_device_add_group);

/**
 * devm_device_add_groups - create a bunch of managed attribute groups
 * @dev:        The device to create the group for
 * @groups:        The attribute groups to create, NULL terminated
 *
 * This function creates a bunch of managed attribute groups.  If an error
 * occurs when creating a group, all previously created groups will be
 * removed, unwinding everything back to the original state when this
 * function was called.  It will explicitly warn and error if any of the
 * attribute files being created already exist.
 *
 * Returns 0 on success or error code from sysfs_create_group on failure.
 */
int devm_device_add_groups(struct device *dev,
                           const struct attribute_group **groups)
{
        union device_attr_group_devres *devres;
        int error;

        devres = devres_alloc(devm_attr_groups_remove,
                              sizeof(*devres), GFP_KERNEL);
        if (!devres)
                return -ENOMEM;

        error = sysfs_create_groups(&dev->kobj, groups);
        if (error) {
                devres_free(devres);
                return error;
        }

        devres->groups = groups;
        devres_add(dev, devres);
        return 0;
}
EXPORT_SYMBOL_GPL(devm_device_add_groups);

static int device_add_attrs(struct device *dev)
{
        const struct class *class = dev->class;
        const struct device_type *type = dev->type;
        int error;

        if (class) {
                error = device_add_groups(dev, class->dev_groups);
                if (error)
                        return error;
        }

        if (type) {
                error = device_add_groups(dev, type->groups);
                if (error)
                        goto err_remove_class_groups;
        }

        error = device_add_groups(dev, dev->groups);
        if (error)
                goto err_remove_type_groups;

        if (device_supports_offline(dev) && !dev->offline_disabled) {
                error = device_create_file(dev, &dev_attr_online);
                if (error)
                        goto err_remove_dev_groups;
        }

        if (fw_devlink_flags && !fw_devlink_is_permissive() && dev->fwnode) {
                error = device_create_file(dev, &dev_attr_waiting_for_supplier);
                if (error)
                        goto err_remove_dev_online;
        }

        if (dev_removable_is_valid(dev)) {
                error = device_create_file(dev, &dev_attr_removable);
                if (error)
                        goto err_remove_dev_waiting_for_supplier;
        }

        if (dev_add_physical_location(dev)) {
                error = device_add_group(dev,
                        &dev_attr_physical_location_group);
                if (error)
                        goto err_remove_dev_removable;
        }

        return 0;

 err_remove_dev_removable:
        device_remove_file(dev, &dev_attr_removable);
 err_remove_dev_waiting_for_supplier:
        device_remove_file(dev, &dev_attr_waiting_for_supplier);
 err_remove_dev_online:
        device_remove_file(dev, &dev_attr_online);
 err_remove_dev_groups:
        device_remove_groups(dev, dev->groups);
 err_remove_type_groups:
        if (type)
                device_remove_groups(dev, type->groups);
 err_remove_class_groups:
        if (class)
                device_remove_groups(dev, class->dev_groups);

        return error;
}

static void device_remove_attrs(struct device *dev)
{
        const struct class *class = dev->class;
        const struct device_type *type = dev->type;

        if (dev->physical_location) {
                device_remove_group(dev, &dev_attr_physical_location_group);
                kfree(dev->physical_location);
        }

        device_remove_file(dev, &dev_attr_removable);
        device_remove_file(dev, &dev_attr_waiting_for_supplier);
        device_remove_file(dev, &dev_attr_online);
        device_remove_groups(dev, dev->groups);

        if (type)
                device_remove_groups(dev, type->groups);

        if (class)
                device_remove_groups(dev, class->dev_groups);
}

static ssize_t dev_show(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        return print_dev_t(buf, dev->devt);
}
static DEVICE_ATTR_RO(dev);

/* /sys/devices/ */
struct kset *devices_kset;

/**
 * devices_kset_move_before - Move device in the devices_kset's list.
 * @deva: Device to move.
 * @devb: Device @deva should come before.
 */
static void devices_kset_move_before(struct device *deva, struct device *devb)
{
        if (!devices_kset)
                return;
        pr_debug("devices_kset: Moving %s before %s\n",
                 dev_name(deva), dev_name(devb));
        spin_lock(&devices_kset->list_lock);
        list_move_tail(&deva->kobj.entry, &devb->kobj.entry);
        spin_unlock(&devices_kset->list_lock);
}

/**
 * devices_kset_move_after - Move device in the devices_kset's list.
 * @deva: Device to move
 * @devb: Device @deva should come after.
 */
static void devices_kset_move_after(struct device *deva, struct device *devb)
{
        if (!devices_kset)
                return;
        pr_debug("devices_kset: Moving %s after %s\n",
                 dev_name(deva), dev_name(devb));
        spin_lock(&devices_kset->list_lock);
        list_move(&deva->kobj.entry, &devb->kobj.entry);
        spin_unlock(&devices_kset->list_lock);
}

/**
 * devices_kset_move_last - move the device to the end of devices_kset's list.
 * @dev: device to move
 */
void devices_kset_move_last(struct device *dev)
{
        if (!devices_kset)
                return;
        pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev));
        spin_lock(&devices_kset->list_lock);
        list_move_tail(&dev->kobj.entry, &devices_kset->list);
        spin_unlock(&devices_kset->list_lock);
}

/**
 * device_create_file - create sysfs attribute file for device.
 * @dev: device.
 * @attr: device attribute descriptor.
 */
int device_create_file(struct device *dev,
                       const struct device_attribute *attr)
{
        int error = 0;

        if (dev) {
                WARN(((attr->attr.mode & S_IWUGO) && !attr->store),
                        "Attribute %s: write permission without 'store'\n",
                        attr->attr.name);
                WARN(((attr->attr.mode & S_IRUGO) && !attr->show),
                        "Attribute %s: read permission without 'show'\n",
                        attr->attr.name);
                error = sysfs_create_file(&dev->kobj, &attr->attr);
        }

        return error;
}
EXPORT_SYMBOL_GPL(device_create_file);

/**
 * device_remove_file - remove sysfs attribute file.
 * @dev: device.
 * @attr: device attribute descriptor.
 */
void device_remove_file(struct device *dev,
                        const struct device_attribute *attr)
{
        if (dev)
                sysfs_remove_file(&dev->kobj, &attr->attr);
}
EXPORT_SYMBOL_GPL(device_remove_file);

/**
 * device_remove_file_self - remove sysfs attribute file from its own method.
 * @dev: device.
 * @attr: device attribute descriptor.
 *
 * See kernfs_remove_self() for details.
 */
bool device_remove_file_self(struct device *dev,
                             const struct device_attribute *attr)
{
        if (dev)
                return sysfs_remove_file_self(&dev->kobj, &attr->attr);
        else
                return false;
}
EXPORT_SYMBOL_GPL(device_remove_file_self);

/**
 * device_create_bin_file - create sysfs binary attribute file for device.
 * @dev: device.
 * @attr: device binary attribute descriptor.
 */
int device_create_bin_file(struct device *dev,
                           const struct bin_attribute *attr)
{
        int error = -EINVAL;
        if (dev)
                error = sysfs_create_bin_file(&dev->kobj, attr);
        return error;
}
EXPORT_SYMBOL_GPL(device_create_bin_file);

/**
 * device_remove_bin_file - remove sysfs binary attribute file
 * @dev: device.
 * @attr: device binary attribute descriptor.
 */
void device_remove_bin_file(struct device *dev,
                            const struct bin_attribute *attr)
{
        if (dev)
                sysfs_remove_bin_file(&dev->kobj, attr);
}
EXPORT_SYMBOL_GPL(device_remove_bin_file);

static void klist_children_get(struct klist_node *n)
{
        struct device_private *p = to_device_private_parent(n);
        struct device *dev = p->device;

        get_device(dev);
}

static void klist_children_put(struct klist_node *n)
{
        struct device_private *p = to_device_private_parent(n);
        struct device *dev = p->device;

        put_device(dev);
}

/**
 * device_initialize - init device structure.
 * @dev: device.
 *
 * This prepares the device for use by other layers by initializing
 * its fields.
 * It is the first half of device_register(), if called by
 * that function, though it can also be called separately, so one
 * may use @dev's fields. In particular, get_device()/put_device()
 * may be used for reference counting of @dev after calling this
 * function.
 *
 * All fields in @dev must be initialized by the caller to 0, except
 * for those explicitly set to some other value.  The simplest
 * approach is to use kzalloc() to allocate the structure containing
 * @dev.
 *
 * NOTE: Use put_device() to give up your reference instead of freeing
 * @dev directly once you have called this function.
 */
void device_initialize(struct device *dev)
{
        dev->kobj.kset = devices_kset;
        kobject_init(&dev->kobj, &device_ktype);
        INIT_LIST_HEAD(&dev->dma_pools);
        mutex_init(&dev->mutex);
        lockdep_set_novalidate_class(&dev->mutex);
        spin_lock_init(&dev->devres_lock);
        INIT_LIST_HEAD(&dev->devres_head);
        device_pm_init(dev);
        set_dev_node(dev, NUMA_NO_NODE);
        INIT_LIST_HEAD(&dev->links.consumers);
        INIT_LIST_HEAD(&dev->links.suppliers);
        INIT_LIST_HEAD(&dev->links.defer_sync);
        dev->links.status = DL_DEV_NO_DRIVER;
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
        dev->dma_coherent = dma_default_coherent;
#endif
        swiotlb_dev_init(dev);
}
EXPORT_SYMBOL_GPL(device_initialize);

struct kobject *virtual_device_parent(struct device *dev)
{
        static struct kobject *virtual_dir = NULL;

        if (!virtual_dir)
                virtual_dir = kobject_create_and_add("virtual",
                                                     &devices_kset->kobj);

        return virtual_dir;
}

struct class_dir {
        struct kobject kobj;
        const struct class *class;
};

#define to_class_dir(obj) container_of(obj, struct class_dir, kobj)

static void class_dir_release(struct kobject *kobj)
{
        struct class_dir *dir = to_class_dir(kobj);
        kfree(dir);
}

static const
struct kobj_ns_type_operations *class_dir_child_ns_type(const struct kobject *kobj)
{
        const struct class_dir *dir = to_class_dir(kobj);
        return dir->class->ns_type;
}

static const struct kobj_type class_dir_ktype = {
        .release        = class_dir_release,
        .sysfs_ops        = &kobj_sysfs_ops,
        .child_ns_type        = class_dir_child_ns_type
};

static struct kobject *class_dir_create_and_add(struct subsys_private *sp,
                                                struct kobject *parent_kobj)
{
        struct class_dir *dir;
        int retval;

        dir = kzalloc(sizeof(*dir), GFP_KERNEL);
        if (!dir)
                return ERR_PTR(-ENOMEM);

        dir->class = sp->class;
        kobject_init(&dir->kobj, &class_dir_ktype);

        dir->kobj.kset = &sp->glue_dirs;

        retval = kobject_add(&dir->kobj, parent_kobj, "%s", sp->class->name);
        if (retval < 0) {
                kobject_put(&dir->kobj);
                return ERR_PTR(retval);
        }
        return &dir->kobj;
}

static DEFINE_MUTEX(gdp_mutex);

static struct kobject *get_device_parent(struct device *dev,
                                         struct device *parent)
{
        struct subsys_private *sp = class_to_subsys(dev->class);
        struct kobject *kobj = NULL;

        if (sp) {
                struct kobject *parent_kobj;
                struct kobject *k;

                /*
                 * If we have no parent, we live in "virtual".
                 * Class-devices with a non class-device as parent, live
                 * in a "glue" directory to prevent namespace collisions.
                 */
                if (parent == NULL)
                        parent_kobj = virtual_device_parent(dev);
                else if (parent->class && !dev->class->ns_type) {
                        subsys_put(sp);
                        return &parent->kobj;
                } else {
                        parent_kobj = &parent->kobj;
                }

                mutex_lock(&gdp_mutex);

                /* find our class-directory at the parent and reference it */
                spin_lock(&sp->glue_dirs.list_lock);
                list_for_each_entry(k, &sp->glue_dirs.list, entry)
                        if (k->parent == parent_kobj) {
                                kobj = kobject_get(k);
                                break;
                        }
                spin_unlock(&sp->glue_dirs.list_lock);
                if (kobj) {
                        mutex_unlock(&gdp_mutex);
                        subsys_put(sp);
                        return kobj;
                }

                /* or create a new class-directory at the parent device */
                k = class_dir_create_and_add(sp, parent_kobj);
                /* do not emit an uevent for this simple "glue" directory */
                mutex_unlock(&gdp_mutex);
                subsys_put(sp);
                return k;
        }

        /* subsystems can specify a default root directory for their devices */
        if (!parent && dev->bus) {
                struct device *dev_root = bus_get_dev_root(dev->bus);

                if (dev_root) {
                        kobj = &dev_root->kobj;
                        put_device(dev_root);
                        return kobj;
                }
        }

        if (parent)
                return &parent->kobj;
        return NULL;
}

static inline bool live_in_glue_dir(struct kobject *kobj,
                                    struct device *dev)
{
        struct subsys_private *sp;
        bool retval;

        if (!kobj || !dev->class)
                return false;

        sp = class_to_subsys(dev->class);
        if (!sp)
                return false;

        if (kobj->kset == &sp->glue_dirs)
                retval = true;
        else
                retval = false;

        subsys_put(sp);
        return retval;
}

static inline struct kobject *get_glue_dir(struct device *dev)
{
        return dev->kobj.parent;
}

/**
 * kobject_has_children - Returns whether a kobject has children.
 * @kobj: the object to test
 *
 * This will return whether a kobject has other kobjects as children.
 *
 * It does NOT account for the presence of attribute files, only sub
 * directories. It also assumes there is no concurrent addition or
 * removal of such children, and thus relies on external locking.
 */
static inline bool kobject_has_children(struct kobject *kobj)
{
        WARN_ON_ONCE(kref_read(&kobj->kref) == 0);

        return kobj->sd && kobj->sd->dir.subdirs;
}

/*
 * make sure cleaning up dir as the last step, we need to make
 * sure .release handler of kobject is run with holding the
 * global lock
 */
static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
{
        unsigned int ref;

        /* see if we live in a "glue" directory */
        if (!live_in_glue_dir(glue_dir, dev))
                return;

        mutex_lock(&gdp_mutex);
        /**
         * There is a race condition between removing glue directory
         * and adding a new device under the glue directory.
         *
         * CPU1:                                         CPU2:
         *
         * device_add()
         *   get_device_parent()
         *     class_dir_create_and_add()
         *       kobject_add_internal()
         *         create_dir()    // create glue_dir
         *
         *                                               device_add()
         *                                                 get_device_parent()
         *                                                   kobject_get() // get glue_dir
         *
         * device_del()
         *   cleanup_glue_dir()
         *     kobject_del(glue_dir)
         *
         *                                               kobject_add()
         *                                                 kobject_add_internal()
         *                                                   create_dir() // in glue_dir
         *                                                     sysfs_create_dir_ns()
         *                                                       kernfs_create_dir_ns(sd)
         *
         *       sysfs_remove_dir() // glue_dir->sd=NULL
         *       sysfs_put()        // free glue_dir->sd
         *
         *                                                         // sd is freed
         *                                                         kernfs_new_node(sd)
         *                                                           kernfs_get(glue_dir)
         *                                                           kernfs_add_one()
         *                                                           kernfs_put()
         *
         * Before CPU1 remove last child device under glue dir, if CPU2 add
         * a new device under glue dir, the glue_dir kobject reference count
         * will be increase to 2 in kobject_get(k). And CPU2 has been called
         * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir()
         * and sysfs_put(). This result in glue_dir->sd is freed.
         *
         * Then the CPU2 will see a stale "empty" but still potentially used
         * glue dir around in kernfs_new_node().
         *
         * In order to avoid this happening, we also should make sure that
         * kernfs_node for glue_dir is released in CPU1 only when refcount
         * for glue_dir kobj is 1.
         */
        ref = kref_read(&glue_dir->kref);
        if (!kobject_has_children(glue_dir) && !--ref)
                kobject_del(glue_dir);
        kobject_put(glue_dir);
        mutex_unlock(&gdp_mutex);
}

static int device_add_class_symlinks(struct device *dev)
{
        struct device_node *of_node = dev_of_node(dev);
        struct subsys_private *sp;
        int error;

        if (of_node) {
                error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node");
                if (error)
                        dev_warn(dev, "Error %d creating of_node link\n",error);
                /* An error here doesn't warrant bringing down the device */
        }

        sp = class_to_subsys(dev->class);
        if (!sp)
                return 0;

        error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem");
        if (error)
                goto out_devnode;

        if (dev->parent && device_is_not_partition(dev)) {
                error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
                                          "device");
                if (error)
                        goto out_subsys;
        }

        /* link in the class directory pointing to the device */
        error = sysfs_create_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev));
        if (error)
                goto out_device;
        goto exit;

out_device:
        sysfs_remove_link(&dev->kobj, "device");
out_subsys:
        sysfs_remove_link(&dev->kobj, "subsystem");
out_devnode:
        sysfs_remove_link(&dev->kobj, "of_node");
exit:
        subsys_put(sp);
        return error;
}

static void device_remove_class_symlinks(struct device *dev)
{
        struct subsys_private *sp = class_to_subsys(dev->class);

        if (dev_of_node(dev))
                sysfs_remove_link(&dev->kobj, "of_node");

        if (!sp)
                return;

        if (dev->parent && device_is_not_partition(dev))
                sysfs_remove_link(&dev->kobj, "device");
        sysfs_remove_link(&dev->kobj, "subsystem");
        sysfs_delete_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev));
        subsys_put(sp);
}

/**
 * dev_set_name - set a device name
 * @dev: device
 * @fmt: format string for the device's name
 */
int dev_set_name(struct device *dev, const char *fmt, ...)
{
        va_list vargs;
        int err;

        va_start(vargs, fmt);
        err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
        va_end(vargs);
        return err;
}
EXPORT_SYMBOL_GPL(dev_set_name);

/* select a /sys/dev/ directory for the device */
static struct kobject *device_to_dev_kobj(struct device *dev)
{
        if (is_blockdev(dev))
                return sysfs_dev_block_kobj;
        else
                return sysfs_dev_char_kobj;
}

static int device_create_sys_dev_entry(struct device *dev)
{
        struct kobject *kobj = device_to_dev_kobj(dev);
        int error = 0;
        char devt_str[15];

        if (kobj) {
                format_dev_t(devt_str, dev->devt);
                error = sysfs_create_link(kobj, &dev->kobj, devt_str);
        }

        return error;
}

static void device_remove_sys_dev_entry(struct device *dev)
{
        struct kobject *kobj = device_to_dev_kobj(dev);
        char devt_str[15];

        if (kobj) {
                format_dev_t(devt_str, dev->devt);
                sysfs_remove_link(kobj, devt_str);
        }
}

static int device_private_init(struct device *dev)
{
        dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
        if (!dev->p)
                return -ENOMEM;
        dev->p->device = dev;
        klist_init(&dev->p->klist_children, klist_children_get,
                   klist_children_put);
        INIT_LIST_HEAD(&dev->p->deferred_probe);
        return 0;
}

/**
 * device_add - add device to device hierarchy.
 * @dev: device.
 *
 * This is part 2 of device_register(), though may be called
 * separately _iff_ device_initialize() has been called separately.
 *
 * This adds @dev to the kobject hierarchy via kobject_add(), adds it
 * to the global and sibling lists for the device, then
 * adds it to the other relevant subsystems of the driver model.
 *
 * Do not call this routine or device_register() more than once for
 * any device structure.  The driver model core is not designed to work
 * with devices that get unregistered and then spring back to life.
 * (Among other things, it's very hard to guarantee that all references
 * to the previous incarnation of @dev have been dropped.)  Allocate
 * and register a fresh new struct device instead.
 *
 * NOTE: _Never_ directly free @dev after calling this function, even
 * if it returned an error! Always use put_device() to give up your
 * reference instead.
 *
 * Rule of thumb is: if device_add() succeeds, you should call
 * device_del() when you want to get rid of it. If device_add() has
 * *not* succeeded, use *only* put_device() to drop the reference
 * count.
 */
int device_add(struct device *dev)
{
        struct subsys_private *sp;
        struct device *parent;
        struct kobject *kobj;
        struct class_interface *class_intf;
        int error = -EINVAL;
        struct kobject *glue_dir = NULL;

        dev = get_device(dev);
        if (!dev)
                goto done;

        if (!dev->p) {
                error = device_private_init(dev);
                if (error)
                        goto done;
        }

        /*
         * for statically allocated devices, which should all be converted
         * some day, we need to initialize the name. We prevent reading back
         * the name, and force the use of dev_name()
         */
        if (dev->init_name) {
                error = dev_set_name(dev, "%s", dev->init_name);
                dev->init_name = NULL;
        }

        if (dev_name(dev))
                error = 0;
        /* subsystems can specify simple device enumeration */
        else if (dev->bus && dev->bus->dev_name)
                error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
        else
                error = -EINVAL;
        if (error)
                goto name_error;

        pr_debug("device: '%s': %s\n", dev_name(dev), __func__);

        parent = get_device(dev->parent);
        kobj = get_device_parent(dev, parent);
        if (IS_ERR(kobj)) {
                error = PTR_ERR(kobj);
                goto parent_error;
        }
        if (kobj)
                dev->kobj.parent = kobj;

        /* use parent numa_node */
        if (parent && (dev_to_node(dev) == NUMA_NO_NODE))
                set_dev_node(dev, dev_to_node(parent));

        /* first, register with generic layer. */
        /* we require the name to be set before, and pass NULL */
        error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
        if (error) {
                glue_dir = kobj;
                goto Error;
        }

        /* notify platform of device entry */
        device_platform_notify(dev);

        error = device_create_file(dev, &dev_attr_uevent);
        if (error)
                goto attrError;

        error = device_add_class_symlinks(dev);
        if (error)
                goto SymlinkError;
        error = device_add_attrs(dev);
        if (error)
                goto AttrsError;
        error = bus_add_device(dev);
        if (error)
                goto BusError;
        error = dpm_sysfs_add(dev);
        if (error)
                goto DPMError;
        device_pm_add(dev);

        if (MAJOR(dev->devt)) {
                error = device_create_file(dev, &dev_attr_dev);
                if (error)
                        goto DevAttrError;

                error = device_create_sys_dev_entry(dev);
                if (error)
                        goto SysEntryError;

                devtmpfs_create_node(dev);
        }

        /* Notify clients of device addition.  This call must come
         * after dpm_sysfs_add() and before kobject_uevent().
         */
        bus_notify(dev, BUS_NOTIFY_ADD_DEVICE);
        kobject_uevent(&dev->kobj, KOBJ_ADD);

        /*
         * Check if any of the other devices (consumers) have been waiting for
         * this device (supplier) to be added so that they can create a device
         * link to it.
         *
         * This needs to happen after device_pm_add() because device_link_add()
         * requires the supplier be registered before it's called.
         *
         * But this also needs to happen before bus_probe_device() to make sure
         * waiting consumers can link to it before the driver is bound to the
         * device and the driver sync_state callback is called for this device.
         */
        if (dev->fwnode && !dev->fwnode->dev) {
                dev->fwnode->dev = dev;
                fw_devlink_link_device(dev);
        }

        bus_probe_device(dev);

        /*
         * If all driver registration is done and a newly added device doesn't
         * match with any driver, don't block its consumers from probing in
         * case the consumer device is able to operate without this supplier.
         */
        if (dev->fwnode && fw_devlink_drv_reg_done && !dev->can_match)
                fw_devlink_unblock_consumers(dev);

        if (parent)
                klist_add_tail(&dev->p->knode_parent,
                               &parent->p->klist_children);

        sp = class_to_subsys(dev->class);
        if (sp) {
                mutex_lock(&sp->mutex);
                /* tie the class to the device */
                klist_add_tail(&dev->p->knode_class, &sp->klist_devices);

                /* notify any interfaces that the device is here */
                list_for_each_entry(class_intf, &sp->interfaces, node)
                        if (class_intf->add_dev)
                                class_intf->add_dev(dev);
                mutex_unlock(&sp->mutex);
                subsys_put(sp);
        }
done:
        put_device(dev);
        return error;
 SysEntryError:
        if (MAJOR(dev->devt))
                device_remove_file(dev, &dev_attr_dev);
 DevAttrError:
        device_pm_remove(dev);
        dpm_sysfs_remove(dev);
 DPMError:
        dev->driver = NULL;
        bus_remove_device(dev);
 BusError:
        device_remove_attrs(dev);
 AttrsError:
        device_remove_class_symlinks(dev);
 SymlinkError:
        device_remove_file(dev, &dev_attr_uevent);
 attrError:
        device_platform_notify_remove(dev);
        kobject_uevent(&dev->kobj, KOBJ_REMOVE);
        glue_dir = get_glue_dir(dev);
        kobject_del(&dev->kobj);
 Error:
        cleanup_glue_dir(dev, glue_dir);
parent_error:
        put_device(parent);
name_error:
        kfree(dev->p);
        dev->p = NULL;
        goto done;
}
EXPORT_SYMBOL_GPL(device_add);

/**
 * device_register - register a device with the system.
 * @dev: pointer to the device structure
 *
 * This happens in two clean steps - initialize the device
 * and add it to the system. The two steps can be called
 * separately, but this is the easiest and most common.
 * I.e. you should only call the two helpers separately if
 * have a clearly defined need to use and refcount the device
 * before it is added to the hierarchy.
 *
 * For more information, see the kerneldoc for device_initialize()
 * and device_add().
 *
 * NOTE: _Never_ directly free @dev after calling this function, even
 * if it returned an error! Always use put_device() to give up the
 * reference initialized in this function instead.
 */
int device_register(struct device *dev)
{
        device_initialize(dev);
        return device_add(dev);
}
EXPORT_SYMBOL_GPL(device_register);

/**
 * get_device - increment reference count for device.
 * @dev: device.
 *
 * This simply forwards the call to kobject_get(), though
 * we do take care to provide for the case that we get a NULL
 * pointer passed in.
 */
struct device *get_device(struct device *dev)
{
        return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL;
}
EXPORT_SYMBOL_GPL(get_device);

/**
 * put_device - decrement reference count.
 * @dev: device in question.
 */
void put_device(struct device *dev)
{
        /* might_sleep(); */
        if (dev)
                kobject_put(&dev->kobj);
}
EXPORT_SYMBOL_GPL(put_device);

bool kill_device(struct device *dev)
{
        /*
         * Require the device lock and set the "dead" flag to guarantee that
         * the update behavior is consistent with the other bitfields near
         * it and that we cannot have an asynchronous probe routine trying
         * to run while we are tearing out the bus/class/sysfs from
         * underneath the device.
         */
        device_lock_assert(dev);

        if (dev->p->dead)
                return false;
        dev->p->dead = true;
        return true;
}
EXPORT_SYMBOL_GPL(kill_device);

/**
 * device_del - delete device from system.
 * @dev: device.
 *
 * This is the first part of the device unregistration
 * sequence. This removes the device from the lists we control
 * from here, has it removed from the other driver model
 * subsystems it was added to in device_add(), and removes it
 * from the kobject hierarchy.
 *
 * NOTE: this should be called manually _iff_ device_add() was
 * also called manually.
 */
void device_del(struct device *dev)
{
        struct subsys_private *sp;
        struct device *parent = dev->parent;
        struct kobject *glue_dir = NULL;
        struct class_interface *class_intf;
        unsigned int noio_flag;

        device_lock(dev);
        kill_device(dev);
        device_unlock(dev);

        if (dev->fwnode && dev->fwnode->dev == dev)
                dev->fwnode->dev = NULL;

        /* Notify clients of device removal.  This call must come
         * before dpm_sysfs_remove().
         */
        noio_flag = memalloc_noio_save();
        bus_notify(dev, BUS_NOTIFY_DEL_DEVICE);

        dpm_sysfs_remove(dev);
        if (parent)
                klist_del(&dev->p->knode_parent);
        if (MAJOR(dev->devt)) {
                devtmpfs_delete_node(dev);
                device_remove_sys_dev_entry(dev);
                device_remove_file(dev, &dev_attr_dev);
        }

        sp = class_to_subsys(dev->class);
        if (sp) {
                device_remove_class_symlinks(dev);

                mutex_lock(&sp->mutex);
                /* notify any interfaces that the device is now gone */
                list_for_each_entry(class_intf, &sp->interfaces, node)
                        if (class_intf->remove_dev)
                                class_intf->remove_dev(dev);
                /* remove the device from the class list */
                klist_del(&dev->p->knode_class);
                mutex_unlock(&sp->mutex);
                subsys_put(sp);
        }
        device_remove_file(dev, &dev_attr_uevent);
        device_remove_attrs(dev);
        bus_remove_device(dev);
        device_pm_remove(dev);
        driver_deferred_probe_del(dev);
        device_platform_notify_remove(dev);
        device_links_purge(dev);

        /*
         * If a device does not have a driver attached, we need to clean
         * up any managed resources. We do this in device_release(), but
         * it's never called (and we leak the device) if a managed
         * resource holds a reference to the device. So release all
         * managed resources here, like we do in driver_detach(). We
         * still need to do so again in device_release() in case someone
         * adds a new resource after this point, though.
         */
        devres_release_all(dev);

        bus_notify(dev, BUS_NOTIFY_REMOVED_DEVICE);
        kobject_uevent(&dev->kobj, KOBJ_REMOVE);
        glue_dir = get_glue_dir(dev);
        kobject_del(&dev->kobj);
        cleanup_glue_dir(dev, glue_dir);
        memalloc_noio_restore(noio_flag);
        put_device(parent);
}
EXPORT_SYMBOL_GPL(device_del);

/**
 * device_unregister - unregister device from system.
 * @dev: device going away.
 *
 * We do this in two parts, like we do device_register(). First,
 * we remove it from all the subsystems with device_del(), then
 * we decrement the reference count via put_device(). If that
 * is the final reference count, the device will be cleaned up
 * via device_release() above. Otherwise, the structure will
 * stick around until the final reference to the device is dropped.
 */
void device_unregister(struct device *dev)
{
        pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
        device_del(dev);
        put_device(dev);
}
EXPORT_SYMBOL_GPL(device_unregister);

static struct device *prev_device(struct klist_iter *i)
{
        struct klist_node *n = klist_prev(i);
        struct device *dev = NULL;
        struct device_private *p;

        if (n) {
                p = to_device_private_parent(n);
                dev = p->device;
        }
        return dev;
}

static struct device *next_device(struct klist_iter *i)
{
        struct klist_node *n = klist_next(i);
        struct device *dev = NULL;
        struct device_private *p;

        if (n) {
                p = to_device_private_parent(n);
                dev = p->device;
        }
        return dev;
}

/**
 * device_get_devnode - path of device node file
 * @dev: device
 * @mode: returned file access mode
 * @uid: returned file owner
 * @gid: returned file group
 * @tmp: possibly allocated string
 *
 * Return the relative path of a possible device node.
 * Non-default names may need to allocate a memory to compose
 * a name. This memory is returned in tmp and needs to be
 * freed by the caller.
 */
const char *device_get_devnode(const struct device *dev,
                               umode_t *mode, kuid_t *uid, kgid_t *gid,
                               const char **tmp)
{
        char *s;

        *tmp = NULL;

        /* the device type may provide a specific name */
        if (dev->type && dev->type->devnode)
                *tmp = dev->type->devnode(dev, mode, uid, gid);
        if (*tmp)
                return *tmp;

        /* the class may provide a specific name */
        if (dev->class && dev->class->devnode)
                *tmp = dev->class->devnode(dev, mode);
        if (*tmp)
                return *tmp;

        /* return name without allocation, tmp == NULL */
        if (strchr(dev_name(dev), '!') == NULL)
                return dev_name(dev);

        /* replace '!' in the name with '/' */
        s = kstrdup_and_replace(dev_name(dev), '!', '/', GFP_KERNEL);
        if (!s)
                return NULL;
        return *tmp = s;
}

/**
 * device_for_each_child - device child iterator.
 * @parent: parent struct device.
 * @fn: function to be called for each device.
 * @data: data for the callback.
 *
 * Iterate over @parent's child devices, and call @fn for each,
 * passing it @data.
 *
 * We check the return of @fn each time. If it returns anything
 * other than 0, we break out and return that value.
 */
int device_for_each_child(struct device *parent, void *data,
                          int (*fn)(struct device *dev, void *data))
{
        struct klist_iter i;
        struct device *child;
        int error = 0;

        if (!parent->p)
                return 0;

        klist_iter_init(&parent->p->klist_children, &i);
        while (!error && (child = next_device(&i)))
                error = fn(child, data);
        klist_iter_exit(&i);
        return error;
}
EXPORT_SYMBOL_GPL(device_for_each_child);

/**
 * device_for_each_child_reverse - device child iterator in reversed order.
 * @parent: parent struct device.
 * @fn: function to be called for each device.
 * @data: data for the callback.
 *
 * Iterate over @parent's child devices, and call @fn for each,
 * passing it @data.
 *
 * We check the return of @fn each time. If it returns anything
 * other than 0, we break out and return that value.
 */
int device_for_each_child_reverse(struct device *parent, void *data,
                                  int (*fn)(struct device *dev, void *data))
{
        struct klist_iter i;
        struct device *child;
        int error = 0;

        if (!parent->p)
                return 0;

        klist_iter_init(&parent->p->klist_children, &i);
        while ((child = prev_device(&i)) && !error)
                error = fn(child, data);
        klist_iter_exit(&i);
        return error;
}
EXPORT_SYMBOL_GPL(device_for_each_child_reverse);

/**
 * device_find_child - device iterator for locating a particular device.
 * @parent: parent struct device
 * @match: Callback function to check device
 * @data: Data to pass to match function
 *
 * This is similar to the device_for_each_child() function above, but it
 * returns a reference to a device that is 'found' for later use, as
 * determined by the @match callback.
 *
 * The callback should return 0 if the device doesn't match and non-zero
 * if it does.  If the callback returns non-zero and a reference to the
 * current device can be obtained, this function will return to the caller
 * and not iterate over any more devices.
 *
 * NOTE: you will need to drop the reference with put_device() after use.
 */
struct device *device_find_child(struct device *parent, void *data,
                                 int (*match)(struct device *dev, void *data))
{
        struct klist_iter i;
        struct device *child;

        if (!parent)
                return NULL;

        klist_iter_init(&parent->p->klist_children, &i);
        while ((child = next_device(&i)))
                if (match(child, data) && get_device(child))
                        break;
        klist_iter_exit(&i);
        return child;
}
EXPORT_SYMBOL_GPL(device_find_child);

/**
 * device_find_child_by_name - device iterator for locating a child device.
 * @parent: parent struct device
 * @name: name of the child device
 *
 * This is similar to the device_find_child() function above, but it
 * returns a reference to a device that has the name @name.
 *
 * NOTE: you will need to drop the reference with put_device() after use.
 */
struct device *device_find_child_by_name(struct device *parent,
                                         const char *name)
{
        struct klist_iter i;
        struct device *child;

        if (!parent)
                return NULL;

        klist_iter_init(&parent->p->klist_children, &i);
        while ((child = next_device(&i)))
                if (sysfs_streq(dev_name(child), name) && get_device(child))
                        break;
        klist_iter_exit(&i);
        return child;
}
EXPORT_SYMBOL_GPL(device_find_child_by_name);

static int match_any(struct device *dev, void *unused)
{
        return 1;
}

/**
 * device_find_any_child - device iterator for locating a child device, if any.
 * @parent: parent struct device
 *
 * This is similar to the device_find_child() function above, but it
 * returns a reference to a child device, if any.
 *
 * NOTE: you will need to drop the reference with put_device() after use.
 */
struct device *device_find_any_child(struct device *parent)
{
        return device_find_child(parent, NULL, match_any);
}
EXPORT_SYMBOL_GPL(device_find_any_child);

int __init devices_init(void)
{
        devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
        if (!devices_kset)
                return -ENOMEM;
        dev_kobj = kobject_create_and_add("dev", NULL);
        if (!dev_kobj)
                goto dev_kobj_err;
        sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj);
        if (!sysfs_dev_block_kobj)
                goto block_kobj_err;
        sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
        if (!sysfs_dev_char_kobj)
                goto char_kobj_err;
        device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
        if (!device_link_wq)
                goto wq_err;

        return 0;

 wq_err:
        kobject_put(sysfs_dev_char_kobj);
 char_kobj_err:
        kobject_put(sysfs_dev_block_kobj);
 block_kobj_err:
        kobject_put(dev_kobj);
 dev_kobj_err:
        kset_unregister(devices_kset);
        return -ENOMEM;
}

static int device_check_offline(struct device *dev, void *not_used)
{
        int ret;

        ret = device_for_each_child(dev, NULL, device_check_offline);
        if (ret)
                return ret;

        return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0;
}

/**
 * device_offline - Prepare the device for hot-removal.
 * @dev: Device to be put offline.
 *
 * Execute the device bus type's .offline() callback, if present, to prepare
 * the device for a subsequent hot-removal.  If that succeeds, the device must
 * not be used until either it is removed or its bus type's .online() callback
 * is executed.
 *
 * Call under device_hotplug_lock.
 */
int device_offline(struct device *dev)
{
        int ret;

        if (dev->offline_disabled)
                return -EPERM;

        ret = device_for_each_child(dev, NULL, device_check_offline);
        if (ret)
                return ret;

        device_lock(dev);
        if (device_supports_offline(dev)) {
                if (dev->offline) {
                        ret = 1;
                } else {
                        ret = dev->bus->offline(dev);
                        if (!ret) {
                                kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
                                dev->offline = true;
                        }
                }
        }
        device_unlock(dev);

        return ret;
}

/**
 * device_online - Put the device back online after successful device_offline().
 * @dev: Device to be put back online.
 *
 * If device_offline() has been successfully executed for @dev, but the device
 * has not been removed subsequently, execute its bus type's .online() callback
 * to indicate that the device can be used again.
 *
 * Call under device_hotplug_lock.
 */
int device_online(struct device *dev)
{
        int ret = 0;

        device_lock(dev);
        if (device_supports_offline(dev)) {
                if (dev->offline) {
                        ret = dev->bus->online(dev);
                        if (!ret) {
                                kobject_uevent(&dev->kobj, KOBJ_ONLINE);
                                dev->offline = false;
                        }
                } else {
                        ret = 1;
                }
        }
        device_unlock(dev);

        return ret;
}

struct root_device {
        struct device dev;
        struct module *owner;
};

static inline struct root_device *to_root_device(struct device *d)
{
        return container_of(d, struct root_device, dev);
}

static void root_device_release(struct device *dev)
{
        kfree(to_root_device(dev));
}

/**
 * __root_device_register - allocate and register a root device
 * @name: root device name
 * @owner: owner module of the root device, usually THIS_MODULE
 *
 * This function allocates a root device and registers it
 * using device_register(). In order to free the returned
 * device, use root_device_unregister().
 *
 * Root devices are dummy devices which allow other devices
 * to be grouped under /sys/devices. Use this function to
 * allocate a root device and then use it as the parent of
 * any device which should appear under /sys/devices/{name}
 *
 * The /sys/devices/{name} directory will also contain a
 * 'module' symlink which points to the @owner directory
 * in sysfs.
 *
 * Returns &struct device pointer on success, or ERR_PTR() on error.
 *
 * Note: You probably want to use root_device_register().
 */
struct device *__root_device_register(const char *name, struct module *owner)
{
        struct root_device *root;
        int err = -ENOMEM;

        root = kzalloc(sizeof(struct root_device), GFP_KERNEL);
        if (!root)
                return ERR_PTR(err);

        err = dev_set_name(&root->dev, "%s", name);
        if (err) {
                kfree(root);
                return ERR_PTR(err);
        }

        root->dev.release = root_device_release;

        err = device_register(&root->dev);
        if (err) {
                put_device(&root->dev);
                return ERR_PTR(err);
        }

#ifdef CONFIG_MODULES        /* gotta find a "cleaner" way to do this */
        if (owner) {
                struct module_kobject *mk = &owner->mkobj;

                err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module");
                if (err) {
                        device_unregister(&root->dev);
                        return ERR_PTR(err);
                }
                root->owner = owner;
        }
#endif

        return &root->dev;
}
EXPORT_SYMBOL_GPL(__root_device_register);

/**
 * root_device_unregister - unregister and free a root device
 * @dev: device going away
 *
 * This function unregisters and cleans up a device that was created by
 * root_device_register().
 */
void root_device_unregister(struct device *dev)
{
        struct root_device *root = to_root_device(dev);

        if (root->owner)
                sysfs_remove_link(&root->dev.kobj, "module");

        device_unregister(dev);
}
EXPORT_SYMBOL_GPL(root_device_unregister);


static void device_create_release(struct device *dev)
{
        pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
        kfree(dev);
}

static __printf(6, 0) struct device *
device_create_groups_vargs(const struct class *class, struct device *parent,
                           dev_t devt, void *drvdata,
                           const struct attribute_group **groups,
                           const char *fmt, va_list args)
{
        struct device *dev = NULL;
        int retval = -ENODEV;

        if (IS_ERR_OR_NULL(class))
                goto error;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev) {
                retval = -ENOMEM;
                goto error;
        }

        device_initialize(dev);
        dev->devt = devt;
        dev->class = class;
        dev->parent = parent;
        dev->groups = groups;
        dev->release = device_create_release;
        dev_set_drvdata(dev, drvdata);

        retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
        if (retval)
                goto error;

        retval = device_add(dev);
        if (retval)
                goto error;

        return dev;

error:
        put_device(dev);
        return ERR_PTR(retval);
}

/**
 * device_create - creates a device and registers it with sysfs
 * @class: pointer to the struct class that this device should be registered to
 * @parent: pointer to the parent struct device of this new device, if any
 * @devt: the dev_t for the char device to be added
 * @drvdata: the data to be added to the device for callbacks
 * @fmt: string for the device's name
 *
 * This function can be used by char device classes.  A struct device
 * will be created in sysfs, registered to the specified class.
 *
 * A "dev" file will be created, showing the dev_t for the device, if
 * the dev_t is not 0,0.
 * If a pointer to a parent struct device is passed in, the newly created
 * struct device will be a child of that device in sysfs.
 * The pointer to the struct device will be returned from the call.
 * Any further sysfs files that might be required can be created using this
 * pointer.
 *
 * Returns &struct device pointer on success, or ERR_PTR() on error.
 */
struct device *device_create(const struct class *class, struct device *parent,
                             dev_t devt, void *drvdata, const char *fmt, ...)
{
        va_list vargs;
        struct device *dev;

        va_start(vargs, fmt);
        dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
                                          fmt, vargs);
        va_end(vargs);
        return dev;
}
EXPORT_SYMBOL_GPL(device_create);

/**
 * device_create_with_groups - creates a device and registers it with sysfs
 * @class: pointer to the struct class that this device should be registered to
 * @parent: pointer to the parent struct device of this new device, if any
 * @devt: the dev_t for the char device to be added
 * @drvdata: the data to be added to the device for callbacks
 * @groups: NULL-terminated list of attribute groups to be created
 * @fmt: string for the device's name
 *
 * This function can be used by char device classes.  A struct device
 * will be created in sysfs, registered to the specified class.
 * Additional attributes specified in the groups parameter will also
 * be created automatically.
 *
 * A "dev" file will be created, showing the dev_t for the device, if
 * the dev_t is not 0,0.
 * If a pointer to a parent struct device is passed in, the newly created
 * struct device will be a child of that device in sysfs.
 * The pointer to the struct device will be returned from the call.
 * Any further sysfs files that might be required can be created using this
 * pointer.
 *
 * Returns &struct device pointer on success, or ERR_PTR() on error.
 */
struct device *device_create_with_groups(const struct class *class,
                                         struct device *parent, dev_t devt,
                                         void *drvdata,
                                         const struct attribute_group **groups,
                                         const char *fmt, ...)
{
        va_list vargs;
        struct device *dev;

        va_start(vargs, fmt);
        dev = device_create_groups_vargs(class, parent, devt, drvdata, groups,
                                         fmt, vargs);
        va_end(vargs);
        return dev;
}
EXPORT_SYMBOL_GPL(device_create_with_groups);

/**
 * device_destroy - removes a device that was created with device_create()
 * @class: pointer to the struct class that this device was registered with
 * @devt: the dev_t of the device that was previously registered
 *
 * This call unregisters and cleans up a device that was created with a
 * call to device_create().
 */
void device_destroy(const struct class *class, dev_t devt)
{
        struct device *dev;

        dev = class_find_device_by_devt(class, devt);
        if (dev) {
                put_device(dev);
                device_unregister(dev);
        }
}
EXPORT_SYMBOL_GPL(device_destroy);

/**
 * device_rename - renames a device
 * @dev: the pointer to the struct device to be renamed
 * @new_name: the new name of the device
 *
 * It is the responsibility of the caller to provide mutual
 * exclusion between two different calls of device_rename
 * on the same device to ensure that new_name is valid and
 * won't conflict with other devices.
 *
 * Note: given that some subsystems (networking and infiniband) use this
 * function, with no immediate plans for this to change, we cannot assume or
 * require that this function not be called at all.
 *
 * However, if you're writing new code, do not call this function. The following
 * text from Kay Sievers offers some insight:
 *
 * Renaming devices is racy at many levels, symlinks and other stuff are not
 * replaced atomically, and you get a "move" uevent, but it's not easy to
 * connect the event to the old and new device. Device nodes are not renamed at
 * all, there isn't even support for that in the kernel now.
 *
 * In the meantime, during renaming, your target name might be taken by another
 * driver, creating conflicts. Or the old name is taken directly after you
 * renamed it -- then you get events for the same DEVPATH, before you even see
 * the "move" event. It's just a mess, and nothing new should ever rely on
 * kernel device renaming. Besides that, it's not even implemented now for
 * other things than (driver-core wise very simple) network devices.
 *
 * Make up a "real" name in the driver before you register anything, or add
 * some other attributes for userspace to find the device, or use udev to add
 * symlinks -- but never rename kernel devices later, it's a complete mess. We
 * don't even want to get into that and try to implement the missing pieces in
 * the core. We really have other pieces to fix in the driver core mess. :)
 */
int device_rename(struct device *dev, const char *new_name)
{
        struct kobject *kobj = &dev->kobj;
        char *old_device_name = NULL;
        int error;

        dev = get_device(dev);
        if (!dev)
                return -EINVAL;

        dev_dbg(dev, "renaming to %s\n", new_name);

        old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
        if (!old_device_name) {
                error = -ENOMEM;
                goto out;
        }

        if (dev->class) {
                struct subsys_private *sp = class_to_subsys(dev->class);

                if (!sp) {
                        error = -EINVAL;
                        goto out;
                }

                error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name,
                                             new_name, kobject_namespace(kobj));
                subsys_put(sp);
                if (error)
                        goto out;
        }

        error = kobject_rename(kobj, new_name);
        if (error)
                goto out;

out:
        put_device(dev);

        kfree(old_device_name);

        return error;
}
EXPORT_SYMBOL_GPL(device_rename);

static int device_move_class_links(struct device *dev,
                                   struct device *old_parent,
                                   struct device *new_parent)
{
        int error = 0;

        if (old_parent)
                sysfs_remove_link(&dev->kobj, "device");
        if (new_parent)
                error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
                                          "device");
        return error;
}

/**
 * device_move - moves a device to a new parent
 * @dev: the pointer to the struct device to be moved
 * @new_parent: the new parent of the device (can be NULL)
 * @dpm_order: how to reorder the dpm_list
 */
int device_move(struct device *dev, struct device *new_parent,
                enum dpm_order dpm_order)
{
        int error;
        struct device *old_parent;
        struct kobject *new_parent_kobj;

        dev = get_device(dev);
        if (!dev)
                return -EINVAL;

        device_pm_lock();
        new_parent = get_device(new_parent);
        new_parent_kobj = get_device_parent(dev, new_parent);
        if (IS_ERR(new_parent_kobj)) {
                error = PTR_ERR(new_parent_kobj);
                put_device(new_parent);
                goto out;
        }

        pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev),
                 __func__, new_parent ? dev_name(new_parent) : "<NULL>");
        error = kobject_move(&dev->kobj, new_parent_kobj);
        if (error) {
                cleanup_glue_dir(dev, new_parent_kobj);
                put_device(new_parent);
                goto out;
        }
        old_parent = dev->parent;
        dev->parent = new_parent;
        if (old_parent)
                klist_remove(&dev->p->knode_parent);
        if (new_parent) {
                klist_add_tail(&dev->p->knode_parent,
                               &new_parent->p->klist_children);
                set_dev_node(dev, dev_to_node(new_parent));
        }

        if (dev->class) {
                error = device_move_class_links(dev, old_parent, new_parent);
                if (error) {
                        /* We ignore errors on cleanup since we're hosed anyway... */
                        device_move_class_links(dev, new_parent, old_parent);
                        if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
                                if (new_parent)
                                        klist_remove(&dev->p->knode_parent);
                                dev->parent = old_parent;
                                if (old_parent) {
                                        klist_add_tail(&dev->p->knode_parent,
                                                       &old_parent->p->klist_children);
                                        set_dev_node(dev, dev_to_node(old_parent));
                                }
                        }
                        cleanup_glue_dir(dev, new_parent_kobj);
                        put_device(new_parent);
                        goto out;
                }
        }
        switch (dpm_order) {
        case DPM_ORDER_NONE:
                break;
        case DPM_ORDER_DEV_AFTER_PARENT:
                device_pm_move_after(dev, new_parent);
                devices_kset_move_after(dev, new_parent);
                break;
        case DPM_ORDER_PARENT_BEFORE_DEV:
                device_pm_move_before(new_parent, dev);
                devices_kset_move_before(new_parent, dev);
                break;
        case DPM_ORDER_DEV_LAST:
                device_pm_move_last(dev);
                devices_kset_move_last(dev);
                break;
        }

        put_device(old_parent);
out:
        device_pm_unlock();
        put_device(dev);
        return error;
}
EXPORT_SYMBOL_GPL(device_move);

static int device_attrs_change_owner(struct device *dev, kuid_t kuid,
                                     kgid_t kgid)
{
        struct kobject *kobj = &dev->kobj;
        const struct class *class = dev->class;
        const struct device_type *type = dev->type;
        int error;

        if (class) {
                /*
                 * Change the device groups of the device class for @dev to
                 * @kuid/@kgid.
                 */
                error = sysfs_groups_change_owner(kobj, class->dev_groups, kuid,
                                                  kgid);
                if (error)
                        return error;
        }

        if (type) {
                /*
                 * Change the device groups of the device type for @dev to
                 * @kuid/@kgid.
                 */
                error = sysfs_groups_change_owner(kobj, type->groups, kuid,
                                                  kgid);
                if (error)
                        return error;
        }

        /* Change the device groups of @dev to @kuid/@kgid. */
        error = sysfs_groups_change_owner(kobj, dev->groups, kuid, kgid);
        if (error)
                return error;

        if (device_supports_offline(dev) && !dev->offline_disabled) {
                /* Change online device attributes of @dev to @kuid/@kgid. */
                error = sysfs_file_change_owner(kobj, dev_attr_online.attr.name,
                                                kuid, kgid);
                if (error)
                        return error;
        }

        return 0;
}

/**
 * device_change_owner - change the owner of an existing device.
 * @dev: device.
 * @kuid: new owner's kuid
 * @kgid: new owner's kgid
 *
 * This changes the owner of @dev and its corresponding sysfs entries to
 * @kuid/@kgid. This function closely mirrors how @dev was added via driver
 * core.
 *
 * Returns 0 on success or error code on failure.
 */
int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
{
        int error;
        struct kobject *kobj = &dev->kobj;
        struct subsys_private *sp;

        dev = get_device(dev);
        if (!dev)
                return -EINVAL;

        /*
         * Change the kobject and the default attributes and groups of the
         * ktype associated with it to @kuid/@kgid.
         */
        error = sysfs_change_owner(kobj, kuid, kgid);
        if (error)
                goto out;

        /*
         * Change the uevent file for @dev to the new owner. The uevent file
         * was created in a separate step when @dev got added and we mirror
         * that step here.
         */
        error = sysfs_file_change_owner(kobj, dev_attr_uevent.attr.name, kuid,
                                        kgid);
        if (error)
                goto out;

        /*
         * Change the device groups, the device groups associated with the
         * device class, and the groups associated with the device type of @dev
         * to @kuid/@kgid.
         */
        error = device_attrs_change_owner(dev, kuid, kgid);
        if (error)
                goto out;

        error = dpm_sysfs_change_owner(dev, kuid, kgid);
        if (error)
                goto out;

        /*
         * Change the owner of the symlink located in the class directory of
         * the device class associated with @dev which points to the actual
         * directory entry for @dev to @kuid/@kgid. This ensures that the
         * symlink shows the same permissions as its target.
         */
        sp = class_to_subsys(dev->class);
        if (!sp) {
                error = -EINVAL;
                goto out;
        }
        error = sysfs_link_change_owner(&sp->subsys.kobj, &dev->kobj, dev_name(dev), kuid, kgid);
        subsys_put(sp);

out:
        put_device(dev);
        return error;
}
EXPORT_SYMBOL_GPL(device_change_owner);

/**
 * device_shutdown - call ->shutdown() on each device to shutdown.
 */
void device_shutdown(void)
{
        struct device *dev, *parent;

        wait_for_device_probe();
        device_block_probing();

        cpufreq_suspend();

        spin_lock(&devices_kset->list_lock);
        /*
         * Walk the devices list backward, shutting down each in turn.
         * Beware that device unplug events may also start pulling
         * devices offline, even as the system is shutting down.
         */
        while (!list_empty(&devices_kset->list)) {
                dev = list_entry(devices_kset->list.prev, struct device,
                                kobj.entry);

                /*
                 * hold reference count of device's parent to
                 * prevent it from being freed because parent's
                 * lock is to be held
                 */
                parent = get_device(dev->parent);
                get_device(dev);
                /*
                 * Make sure the device is off the kset list, in the
                 * event that dev->*->shutdown() doesn't remove it.
                 */
                list_del_init(&dev->kobj.entry);
                spin_unlock(&devices_kset->list_lock);

                /* hold lock to avoid race with probe/release */
                if (parent)
                        device_lock(parent);
                device_lock(dev);

                /* Don't allow any more runtime suspends */
                pm_runtime_get_noresume(dev);
                pm_runtime_barrier(dev);

                if (dev->class && dev->class->shutdown_pre) {
                        if (initcall_debug)
                                dev_info(dev, "shutdown_pre\n");
                        dev->class->shutdown_pre(dev);
                }
                if (dev->bus && dev->bus->shutdown) {
                        if (initcall_debug)
                                dev_info(dev, "shutdown\n");
                        dev->bus->shutdown(dev);
                } else if (dev->driver && dev->driver->shutdown) {
                        if (initcall_debug)
                                dev_info(dev, "shutdown\n");
                        dev->driver->shutdown(dev);
                }

                device_unlock(dev);
                if (parent)
                        device_unlock(parent);

                put_device(dev);
                put_device(parent);

                spin_lock(&devices_kset->list_lock);
        }
        spin_unlock(&devices_kset->list_lock);
}

/*
 * Device logging functions
 */

#ifdef CONFIG_PRINTK
static void
set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
{
        const char *subsys;

        memset(dev_info, 0, sizeof(*dev_info));

        if (dev->class)
                subsys = dev->class->name;
        else if (dev->bus)
                subsys = dev->bus->name;
        else
                return;

        strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));

        /*
         * Add device identifier DEVICE=:
         *   b12:8         block dev_t
         *   c127:3        char dev_t
         *   n8            netdev ifindex
         *   +sound:card0  subsystem:devname
         */
        if (MAJOR(dev->devt)) {
                char c;

                if (strcmp(subsys, "block") == 0)
                        c = 'b';
                else
                        c = 'c';

                snprintf(dev_info->device, sizeof(dev_info->device),
                         "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
        } else if (strcmp(subsys, "net") == 0) {
                struct net_device *net = to_net_dev(dev);

                snprintf(dev_info->device, sizeof(dev_info->device),
                         "n%u", net->ifindex);
        } else {
                snprintf(dev_info->device, sizeof(dev_info->device),
                         "+%s:%s", subsys, dev_name(dev));
        }
}

int dev_vprintk_emit(int level, const struct device *dev,
                     const char *fmt, va_list args)
{
        struct dev_printk_info dev_info;

        set_dev_info(dev, &dev_info);

        return vprintk_emit(0, level, &dev_info, fmt, args);
}
EXPORT_SYMBOL(dev_vprintk_emit);

int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...)
{
        va_list args;
        int r;

        va_start(args, fmt);

        r = dev_vprintk_emit(level, dev, fmt, args);

        va_end(args);

        return r;
}
EXPORT_SYMBOL(dev_printk_emit);

static void __dev_printk(const char *level, const struct device *dev,
                        struct va_format *vaf)
{
        if (dev)
                dev_printk_emit(level[1] - '0', dev, "%s %s: %pV",
                                dev_driver_string(dev), dev_name(dev), vaf);
        else
                printk("%s(NULL device *): %pV", level, vaf);
}

void _dev_printk(const char *level, const struct device *dev,
                 const char *fmt, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, fmt);

        vaf.fmt = fmt;
        vaf.va = &args;

        __dev_printk(level, dev, &vaf);

        va_end(args);
}
EXPORT_SYMBOL(_dev_printk);

#define define_dev_printk_level(func, kern_level)                \
void func(const struct device *dev, const char *fmt, ...)        \
{                                                                \
        struct va_format vaf;                                        \
        va_list args;                                                \
                                                                \
        va_start(args, fmt);                                        \
                                                                \
        vaf.fmt = fmt;                                                \
        vaf.va = &args;                                                \
                                                                \
        __dev_printk(kern_level, dev, &vaf);                        \
                                                                \
        va_end(args);                                                \
}                                                                \
EXPORT_SYMBOL(func);

define_dev_printk_level(_dev_emerg, KERN_EMERG);
define_dev_printk_level(_dev_alert, KERN_ALERT);
define_dev_printk_level(_dev_crit, KERN_CRIT);
define_dev_printk_level(_dev_err, KERN_ERR);
define_dev_printk_level(_dev_warn, KERN_WARNING);
define_dev_printk_level(_dev_notice, KERN_NOTICE);
define_dev_printk_level(_dev_info, KERN_INFO);

#endif

/**
 * dev_err_probe - probe error check and log helper
 * @dev: the pointer to the struct device
 * @err: error value to test
 * @fmt: printf-style format string
 * @...: arguments as specified in the format string
 *
 * This helper implements common pattern present in probe functions for error
 * checking: print debug or error message depending if the error value is
 * -EPROBE_DEFER and propagate error upwards.
 * In case of -EPROBE_DEFER it sets also defer probe reason, which can be
 * checked later by reading devices_deferred debugfs attribute.
 * It replaces code sequence::
 *
 *         if (err != -EPROBE_DEFER)
 *                 dev_err(dev, ...);
 *         else
 *                 dev_dbg(dev, ...);
 *         return err;
 *
 * with::
 *
 *         return dev_err_probe(dev, err, ...);
 *
 * Using this helper in your probe function is totally fine even if @err is
 * known to never be -EPROBE_DEFER.
 * The benefit compared to a normal dev_err() is the standardized format
 * of the error code, it being emitted symbolically (i.e. you get "EAGAIN"
 * instead of "-35") and the fact that the error code is returned which allows
 * more compact error paths.
 *
 * Returns @err.
 */
int dev_err_probe(const struct device *dev, int err, const char *fmt, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;

        if (err != -EPROBE_DEFER) {
                dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
        } else {
                device_set_deferred_probe_reason(dev, &vaf);
                dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
        }

        va_end(args);

        return err;
}
EXPORT_SYMBOL_GPL(dev_err_probe);

static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
{
        return fwnode && !IS_ERR(fwnode->secondary);
}

/**
 * set_primary_fwnode - Change the primary firmware node of a given device.
 * @dev: Device to handle.
 * @fwnode: New primary firmware node of the device.
 *
 * Set the device's firmware node pointer to @fwnode, but if a secondary
 * firmware node of the device is present, preserve it.
 *
 * Valid fwnode cases are:
 *  - primary --> secondary --> -ENODEV
 *  - primary --> NULL
 *  - secondary --> -ENODEV
 *  - NULL
 */
void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
{
        struct device *parent = dev->parent;
        struct fwnode_handle *fn = dev->fwnode;

        if (fwnode) {
                if (fwnode_is_primary(fn))
                        fn = fn->secondary;

                if (fn) {
                        WARN_ON(fwnode->secondary);
                        fwnode->secondary = fn;
                }
                dev->fwnode = fwnode;
        } else {
                if (fwnode_is_primary(fn)) {
                        dev->fwnode = fn->secondary;

                        /* Skip nullifying fn->secondary if the primary is shared */
                        if (parent && fn == parent->fwnode)
                                return;

                        /* Set fn->secondary = NULL, so fn remains the primary fwnode */
                        fn->secondary = NULL;
                } else {
                        dev->fwnode = NULL;
                }
        }
}
EXPORT_SYMBOL_GPL(set_primary_fwnode);

/**
 * set_secondary_fwnode - Change the secondary firmware node of a given device.
 * @dev: Device to handle.
 * @fwnode: New secondary firmware node of the device.
 *
 * If a primary firmware node of the device is present, set its secondary
 * pointer to @fwnode.  Otherwise, set the device's firmware node pointer to
 * @fwnode.
 */
void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
{
        if (fwnode)
                fwnode->secondary = ERR_PTR(-ENODEV);

        if (fwnode_is_primary(dev->fwnode))
                dev->fwnode->secondary = fwnode;
        else
                dev->fwnode = fwnode;
}
EXPORT_SYMBOL_GPL(set_secondary_fwnode);

/**
 * device_set_of_node_from_dev - reuse device-tree node of another device
 * @dev: device whose device-tree node is being set
 * @dev2: device whose device-tree node is being reused
 *
 * Takes another reference to the new device-tree node after first dropping
 * any reference held to the old node.
 */
void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
{
        of_node_put(dev->of_node);
        dev->of_node = of_node_get(dev2->of_node);
        dev->of_node_reused = true;
}
EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);

void device_set_node(struct device *dev, struct fwnode_handle *fwnode)
{
        dev->fwnode = fwnode;
        dev->of_node = to_of_node(fwnode);
}
EXPORT_SYMBOL_GPL(device_set_node);

int device_match_name(struct device *dev, const void *name)
{
        return sysfs_streq(dev_name(dev), name);
}
EXPORT_SYMBOL_GPL(device_match_name);

int device_match_of_node(struct device *dev, const void *np)
{
        return dev->of_node == np;
}
EXPORT_SYMBOL_GPL(device_match_of_node);

int device_match_fwnode(struct device *dev, const void *fwnode)
{
        return dev_fwnode(dev) == fwnode;
}
EXPORT_SYMBOL_GPL(device_match_fwnode);

int device_match_devt(struct device *dev, const void *pdevt)
{
        return dev->devt == *(dev_t *)pdevt;
}
EXPORT_SYMBOL_GPL(device_match_devt);

int device_match_acpi_dev(struct device *dev, const void *adev)
{
        return ACPI_COMPANION(dev) == adev;
}
EXPORT_SYMBOL(device_match_acpi_dev);

int device_match_acpi_handle(struct device *dev, const void *handle)
{
        return ACPI_HANDLE(dev) == handle;
}
EXPORT_SYMBOL(device_match_acpi_handle);

int device_match_any(struct device *dev, const void *unused)
{
        return 1;
}
EXPORT_SYMBOL_GPL(device_match_any);

















































    6 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM filemap

#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_FILEMAP_H

#include <linux/types.h>
#include <linux/tracepoint.h>
#include <linux/mm.h>
#include <linux/memcontrol.h>
#include <linux/device.h>
#include <linux/kdev_t.h>
#include <linux/errseq.h>

DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,

        TP_PROTO(struct folio *folio),

        TP_ARGS(folio),

        TP_STRUCT__entry(
                __field(unsigned long, pfn)
                __field(unsigned long, i_ino)
                __field(unsigned long, index)
                __field(dev_t, s_dev)
                __field(unsigned char, order)
        ),

        TP_fast_assign(
                __entry->pfn = folio_pfn(folio);
                __entry->i_ino = folio->mapping->host->i_ino;
                __entry->index = folio->index;
                if (folio->mapping->host->i_sb)
                        __entry->s_dev = folio->mapping->host->i_sb->s_dev;
                else
                        __entry->s_dev = folio->mapping->host->i_rdev;
                __entry->order = folio_order(folio);
        ),

        TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u",
                MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                __entry->i_ino,
                __entry->pfn,
                __entry->index << PAGE_SHIFT,
                __entry->order)
);

DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache,
        TP_PROTO(struct folio *folio),
        TP_ARGS(folio)
        );

DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache,
        TP_PROTO(struct folio *folio),
        TP_ARGS(folio)
        );

TRACE_EVENT(filemap_set_wb_err,
                TP_PROTO(struct address_space *mapping, errseq_t eseq),

                TP_ARGS(mapping, eseq),

                TP_STRUCT__entry(
                        __field(unsigned long, i_ino)
                        __field(dev_t, s_dev)
                        __field(errseq_t, errseq)
                ),

                TP_fast_assign(
                        __entry->i_ino = mapping->host->i_ino;
                        __entry->errseq = eseq;
                        if (mapping->host->i_sb)
                                __entry->s_dev = mapping->host->i_sb->s_dev;
                        else
                                __entry->s_dev = mapping->host->i_rdev;
                ),

                TP_printk("dev=%d:%d ino=0x%lx errseq=0x%x",
                        MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                        __entry->i_ino, __entry->errseq)
);

TRACE_EVENT(file_check_and_advance_wb_err,
                TP_PROTO(struct file *file, errseq_t old),

                TP_ARGS(file, old),

                TP_STRUCT__entry(
                        __field(struct file *, file)
                        __field(unsigned long, i_ino)
                        __field(dev_t, s_dev)
                        __field(errseq_t, old)
                        __field(errseq_t, new)
                ),

                TP_fast_assign(
                        __entry->file = file;
                        __entry->i_ino = file->f_mapping->host->i_ino;
                        if (file->f_mapping->host->i_sb)
                                __entry->s_dev =
                                        file->f_mapping->host->i_sb->s_dev;
                        else
                                __entry->s_dev =
                                        file->f_mapping->host->i_rdev;
                        __entry->old = old;
                        __entry->new = file->f_wb_err;
                ),

                TP_printk("file=%p dev=%d:%d ino=0x%lx old=0x%x new=0x%x",
                        __entry->file, MAJOR(__entry->s_dev),
                        MINOR(__entry->s_dev), __entry->i_ino, __entry->old,
                        __entry->new)
);
#endif /* _TRACE_FILEMAP_H */

/* This part must be outside protection */
#include <trace/define_trace.h>













   82 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_TIMEX_H
#define _ASM_X86_TIMEX_H

#include <asm/processor.h>
#include <asm/tsc.h>

static inline unsigned long random_get_entropy(void)
{
        if (!IS_ENABLED(CONFIG_X86_TSC) &&
            !cpu_feature_enabled(X86_FEATURE_TSC))
                return random_get_entropy_fallback();
        return rdtsc();
}
#define random_get_entropy random_get_entropy

/* Assume we use the PIT time source for the clock tick */
#define CLOCK_TICK_RATE                PIT_TICK_RATE

#define ARCH_HAS_READ_CURRENT_TIMER

#endif /* _ASM_X86_TIMEX_H */













































































































































































































































































































































































































































































































    4 
    4 










































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002 Andi Kleen
 *
 * This handles calls from both 32bit and 64bit mode.
 *
 * Lock order:
 *        context.ldt_usr_sem
 *          mmap_lock
 *            context.lock
 */

#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>

#include <asm/ldt.h>
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/pgtable_areas.h>

#include <xen/xen.h>

/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)

static inline void *ldt_slot_va(int slot)
{
        return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
}

void load_mm_ldt(struct mm_struct *mm)
{
        struct ldt_struct *ldt;

        /* READ_ONCE synchronizes with smp_store_release */
        ldt = READ_ONCE(mm->context.ldt);

        /*
         * Any change to mm->context.ldt is followed by an IPI to all
         * CPUs with the mm active.  The LDT will not be freed until
         * after the IPI is handled by all such CPUs.  This means that
         * if the ldt_struct changes before we return, the values we see
         * will be safe, and the new values will be loaded before we run
         * any user code.
         *
         * NB: don't try to convert this to use RCU without extreme care.
         * We would still need IRQs off, because we don't want to change
         * the local LDT after an IPI loaded a newer value than the one
         * that we can see.
         */

        if (unlikely(ldt)) {
                if (static_cpu_has(X86_FEATURE_PTI)) {
                        if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
                                /*
                                 * Whoops -- either the new LDT isn't mapped
                                 * (if slot == -1) or is mapped into a bogus
                                 * slot (if slot > 1).
                                 */
                                clear_LDT();
                                return;
                        }

                        /*
                         * If page table isolation is enabled, ldt->entries
                         * will not be mapped in the userspace pagetables.
                         * Tell the CPU to access the LDT through the alias
                         * at ldt_slot_va(ldt->slot).
                         */
                        set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
                } else {
                        set_ldt(ldt->entries, ldt->nr_entries);
                }
        } else {
                clear_LDT();
        }
}

void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
        /*
         * Load the LDT if either the old or new mm had an LDT.
         *
         * An mm will never go from having an LDT to not having an LDT.  Two
         * mms never share an LDT, so we don't gain anything by checking to
         * see whether the LDT changed.  There's also no guarantee that
         * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
         * then prev->context.ldt will also be non-NULL.
         *
         * If we really cared, we could optimize the case where prev == next
         * and we're exiting lazy mode.  Most of the time, if this happens,
         * we don't actually need to reload LDTR, but modify_ldt() is mostly
         * used by legacy code and emulators where we don't need this level of
         * performance.
         *
         * This uses | instead of || because it generates better code.
         */
        if (unlikely((unsigned long)prev->context.ldt |
                     (unsigned long)next->context.ldt))
                load_mm_ldt(next);

        DEBUG_LOCKS_WARN_ON(preemptible());
}

static void refresh_ldt_segments(void)
{
#ifdef CONFIG_X86_64
        unsigned short sel;

        /*
         * Make sure that the cached DS and ES descriptors match the updated
         * LDT.
         */
        savesegment(ds, sel);
        if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
                loadsegment(ds, sel);

        savesegment(es, sel);
        if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
                loadsegment(es, sel);
#endif
}

/* context.lock is held by the task which issued the smp function call */
static void flush_ldt(void *__mm)
{
        struct mm_struct *mm = __mm;

        if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
                return;

        load_mm_ldt(mm);

        refresh_ldt_segments();
}

/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
        struct ldt_struct *new_ldt;
        unsigned int alloc_size;

        if (num_entries > LDT_ENTRIES)
                return NULL;

        new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT);
        if (!new_ldt)
                return NULL;

        BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
        alloc_size = num_entries * LDT_ENTRY_SIZE;

        /*
         * Xen is very picky: it requires a page-aligned LDT that has no
         * trailing nonzero bytes in any page that contains LDT descriptors.
         * Keep it simple: zero the whole allocation and never allocate less
         * than PAGE_SIZE.
         */
        if (alloc_size > PAGE_SIZE)
                new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        else
                new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);

        if (!new_ldt->entries) {
                kfree(new_ldt);
                return NULL;
        }

        /* The new LDT isn't aliased for PTI yet. */
        new_ldt->slot = -1;

        new_ldt->nr_entries = num_entries;
        return new_ldt;
}

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION

static void do_sanity_check(struct mm_struct *mm,
                            bool had_kernel_mapping,
                            bool had_user_mapping)
{
        if (mm->context.ldt) {
                /*
                 * We already had an LDT.  The top-level entry should already
                 * have been allocated and synchronized with the usermode
                 * tables.
                 */
                WARN_ON(!had_kernel_mapping);
                if (boot_cpu_has(X86_FEATURE_PTI))
                        WARN_ON(!had_user_mapping);
        } else {
                /*
                 * This is the first time we're mapping an LDT for this process.
                 * Sync the pgd to the usermode tables.
                 */
                WARN_ON(had_kernel_mapping);
                if (boot_cpu_has(X86_FEATURE_PTI))
                        WARN_ON(had_user_mapping);
        }
}

#ifdef CONFIG_X86_PAE

static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
{
        p4d_t *p4d;
        pud_t *pud;

        if (pgd->pgd == 0)
                return NULL;

        p4d = p4d_offset(pgd, va);
        if (p4d_none(*p4d))
                return NULL;

        pud = pud_offset(p4d, va);
        if (pud_none(*pud))
                return NULL;

        return pmd_offset(pud, va);
}

static void map_ldt_struct_to_user(struct mm_struct *mm)
{
        pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
        pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
        pmd_t *k_pmd, *u_pmd;

        k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
        u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);

        if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
                set_pmd(u_pmd, *k_pmd);
}

static void sanity_check_ldt_mapping(struct mm_struct *mm)
{
        pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
        pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
        bool had_kernel, had_user;
        pmd_t *k_pmd, *u_pmd;

        k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
        u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
        had_kernel = (k_pmd->pmd != 0);
        had_user   = (u_pmd->pmd != 0);

        do_sanity_check(mm, had_kernel, had_user);
}

#else /* !CONFIG_X86_PAE */

static void map_ldt_struct_to_user(struct mm_struct *mm)
{
        pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);

        if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
                set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}

static void sanity_check_ldt_mapping(struct mm_struct *mm)
{
        pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
        bool had_kernel = (pgd->pgd != 0);
        bool had_user   = (kernel_to_user_pgdp(pgd)->pgd != 0);

        do_sanity_check(mm, had_kernel, had_user);
}

#endif /* CONFIG_X86_PAE */

/*
 * If PTI is enabled, this maps the LDT into the kernelmode and
 * usermode tables for the given mm.
 */
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
        unsigned long va;
        bool is_vmalloc;
        spinlock_t *ptl;
        int i, nr_pages;

        if (!boot_cpu_has(X86_FEATURE_PTI))
                return 0;

        /*
         * Any given ldt_struct should have map_ldt_struct() called at most
         * once.
         */
        WARN_ON(ldt->slot != -1);

        /* Check if the current mappings are sane */
        sanity_check_ldt_mapping(mm);

        is_vmalloc = is_vmalloc_addr(ldt->entries);

        nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

        for (i = 0; i < nr_pages; i++) {
                unsigned long offset = i << PAGE_SHIFT;
                const void *src = (char *)ldt->entries + offset;
                unsigned long pfn;
                pgprot_t pte_prot;
                pte_t pte, *ptep;

                va = (unsigned long)ldt_slot_va(slot) + offset;
                pfn = is_vmalloc ? vmalloc_to_pfn(src) :
                        page_to_pfn(virt_to_page(src));
                /*
                 * Treat the PTI LDT range as a *userspace* range.
                 * get_locked_pte() will allocate all needed pagetables
                 * and account for them in this mm.
                 */
                ptep = get_locked_pte(mm, va, &ptl);
                if (!ptep)
                        return -ENOMEM;
                /*
                 * Map it RO so the easy to find address is not a primary
                 * target via some kernel interface which misses a
                 * permission check.
                 */
                pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
                /* Filter out unsuppored __PAGE_KERNEL* bits: */
                pgprot_val(pte_prot) &= __supported_pte_mask;
                pte = pfn_pte(pfn, pte_prot);
                set_pte_at(mm, va, ptep, pte);
                pte_unmap_unlock(ptep, ptl);
        }

        /* Propagate LDT mapping to the user page-table */
        map_ldt_struct_to_user(mm);

        ldt->slot = slot;
        return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
        unsigned long va;
        int i, nr_pages;

        if (!ldt)
                return;

        /* LDT map/unmap is only required for PTI */
        if (!boot_cpu_has(X86_FEATURE_PTI))
                return;

        nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

        for (i = 0; i < nr_pages; i++) {
                unsigned long offset = i << PAGE_SHIFT;
                spinlock_t *ptl;
                pte_t *ptep;

                va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
                ptep = get_locked_pte(mm, va, &ptl);
                if (!WARN_ON_ONCE(!ptep)) {
                        pte_clear(mm, va, ptep);
                        pte_unmap_unlock(ptep, ptl);
                }
        }

        va = (unsigned long)ldt_slot_va(ldt->slot);
        flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
}

#else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */

static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
        return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
}
#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */

static void free_ldt_pgtables(struct mm_struct *mm)
{
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
        struct mmu_gather tlb;
        unsigned long start = LDT_BASE_ADDR;
        unsigned long end = LDT_END_ADDR;

        if (!boot_cpu_has(X86_FEATURE_PTI))
                return;

        /*
         * Although free_pgd_range() is intended for freeing user
         * page-tables, it also works out for kernel mappings on x86.
         * We use tlb_gather_mmu_fullmm() to avoid confusing the
         * range-tracking logic in __tlb_adjust_range().
         */
        tlb_gather_mmu_fullmm(&tlb, mm);
        free_pgd_range(&tlb, start, end, start, end);
        tlb_finish_mmu(&tlb);
#endif
}

/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
        paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}

static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
{
        mutex_lock(&mm->context.lock);

        /* Synchronizes with READ_ONCE in load_mm_ldt. */
        smp_store_release(&mm->context.ldt, ldt);

        /* Activate the LDT for all CPUs using currents mm. */
        on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);

        mutex_unlock(&mm->context.lock);
}

static void free_ldt_struct(struct ldt_struct *ldt)
{
        if (likely(!ldt))
                return;

        paravirt_free_ldt(ldt->entries, ldt->nr_entries);
        if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
                vfree_atomic(ldt->entries);
        else
                free_page((unsigned long)ldt->entries);
        kfree(ldt);
}

/*
 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
 * the new task is not running, so nothing can be installed.
 */
int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
        struct ldt_struct *new_ldt;
        int retval = 0;

        if (!old_mm)
                return 0;

        mutex_lock(&old_mm->context.lock);
        if (!old_mm->context.ldt)
                goto out_unlock;

        new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
        if (!new_ldt) {
                retval = -ENOMEM;
                goto out_unlock;
        }

        memcpy(new_ldt->entries, old_mm->context.ldt->entries,
               new_ldt->nr_entries * LDT_ENTRY_SIZE);
        finalize_ldt_struct(new_ldt);

        retval = map_ldt_struct(mm, new_ldt, 0);
        if (retval) {
                free_ldt_pgtables(mm);
                free_ldt_struct(new_ldt);
                goto out_unlock;
        }
        mm->context.ldt = new_ldt;

out_unlock:
        mutex_unlock(&old_mm->context.lock);
        return retval;
}

/*
 * No need to lock the MM as we are the last user
 *
 * 64bit: Don't touch the LDT register - we're already in the next thread.
 */
void destroy_context_ldt(struct mm_struct *mm)
{
        free_ldt_struct(mm->context.ldt);
        mm->context.ldt = NULL;
}

void ldt_arch_exit_mmap(struct mm_struct *mm)
{
        free_ldt_pgtables(mm);
}

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
        struct mm_struct *mm = current->mm;
        unsigned long entries_size;
        int retval;

        down_read(&mm->context.ldt_usr_sem);

        if (!mm->context.ldt) {
                retval = 0;
                goto out_unlock;
        }

        if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
                bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;

        entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
        if (entries_size > bytecount)
                entries_size = bytecount;

        if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
                retval = -EFAULT;
                goto out_unlock;
        }

        if (entries_size != bytecount) {
                /* Zero-fill the rest and pretend we read bytecount bytes. */
                if (clear_user(ptr + entries_size, bytecount - entries_size)) {
                        retval = -EFAULT;
                        goto out_unlock;
                }
        }
        retval = bytecount;

out_unlock:
        up_read(&mm->context.ldt_usr_sem);
        return retval;
}

static int read_default_ldt(void __user *ptr, unsigned long bytecount)
{
        /* CHECKME: Can we use _one_ random number ? */
#ifdef CONFIG_X86_32
        unsigned long size = 5 * sizeof(struct desc_struct);
#else
        unsigned long size = 128;
#endif
        if (bytecount > size)
                bytecount = size;
        if (clear_user(ptr, bytecount))
                return -EFAULT;
        return bytecount;
}

static bool allow_16bit_segments(void)
{
        if (!IS_ENABLED(CONFIG_X86_16BIT))
                return false;

#ifdef CONFIG_XEN_PV
        /*
         * Xen PV does not implement ESPFIX64, which means that 16-bit
         * segments will not work correctly.  Until either Xen PV implements
         * ESPFIX64 and can signal this fact to the guest or unless someone
         * provides compelling evidence that allowing broken 16-bit segments
         * is worthwhile, disallow 16-bit segments under Xen PV.
         */
        if (xen_pv_domain()) {
                pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
                return false;
        }
#endif

        return true;
}

static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
        struct mm_struct *mm = current->mm;
        struct ldt_struct *new_ldt, *old_ldt;
        unsigned int old_nr_entries, new_nr_entries;
        struct user_desc ldt_info;
        struct desc_struct ldt;
        int error;

        error = -EINVAL;
        if (bytecount != sizeof(ldt_info))
                goto out;
        error = -EFAULT;
        if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
                goto out;

        error = -EINVAL;
        if (ldt_info.entry_number >= LDT_ENTRIES)
                goto out;
        if (ldt_info.contents == 3) {
                if (oldmode)
                        goto out;
                if (ldt_info.seg_not_present == 0)
                        goto out;
        }

        if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
            LDT_empty(&ldt_info)) {
                /* The user wants to clear the entry. */
                memset(&ldt, 0, sizeof(ldt));
        } else {
                if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
                        error = -EINVAL;
                        goto out;
                }

                fill_ldt(&ldt, &ldt_info);
                if (oldmode)
                        ldt.avl = 0;
        }

        if (down_write_killable(&mm->context.ldt_usr_sem))
                return -EINTR;

        old_ldt       = mm->context.ldt;
        old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
        new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);

        error = -ENOMEM;
        new_ldt = alloc_ldt_struct(new_nr_entries);
        if (!new_ldt)
                goto out_unlock;

        if (old_ldt)
                memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);

        new_ldt->entries[ldt_info.entry_number] = ldt;
        finalize_ldt_struct(new_ldt);

        /*
         * If we are using PTI, map the new LDT into the userspace pagetables.
         * If there is already an LDT, use the other slot so that other CPUs
         * will continue to use the old LDT until install_ldt() switches
         * them over to the new LDT.
         */
        error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
        if (error) {
                /*
                 * This only can fail for the first LDT setup. If an LDT is
                 * already installed then the PTE page is already
                 * populated. Mop up a half populated page table.
                 */
                if (!WARN_ON_ONCE(old_ldt))
                        free_ldt_pgtables(mm);
                free_ldt_struct(new_ldt);
                goto out_unlock;
        }

        install_ldt(mm, new_ldt);
        unmap_ldt_struct(mm, old_ldt);
        free_ldt_struct(old_ldt);
        error = 0;

out_unlock:
        up_write(&mm->context.ldt_usr_sem);
out:
        return error;
}

SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
                unsigned long , bytecount)
{
        int ret = -ENOSYS;

        switch (func) {
        case 0:
                ret = read_ldt(ptr, bytecount);
                break;
        case 1:
                ret = write_ldt(ptr, bytecount, 1);
                break;
        case 2:
                ret = read_default_ldt(ptr, bytecount);
                break;
        case 0x11:
                ret = write_ldt(ptr, bytecount, 0);
                break;
        }
        /*
         * The SYSCALL_DEFINE() macros give us an 'unsigned long'
         * return type, but the ABI for sys_modify_ldt() expects
         * 'int'.  This cast gives us an int-sized value in %rax
         * for the return code.  The 'unsigned' is necessary so
         * the compiler does not try to sign-extend the negative
         * return codes into the high half of the register when
         * taking the value from int->long.
         */
        return (unsigned int)ret;
}


















































































































































































































    9 



























































    9 









    9 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_XSTATE_H
#define __X86_KERNEL_FPU_XSTATE_H

#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/xcr.h>

#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
#endif

static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
{
        /*
         * XRSTORS requires these bits set in xcomp_bv, or it will
         * trigger #GP:
         */
        if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED))
                xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}

static inline u64 xstate_get_group_perm(bool guest)
{
        struct fpu *fpu = &current->group_leader->thread.fpu;
        struct fpu_state_perm *perm;

        /* Pairs with WRITE_ONCE() in xstate_request_perm() */
        perm = guest ? &fpu->guest_perm : &fpu->perm;
        return READ_ONCE(perm->__state_perm);
}

static inline u64 xstate_get_host_group_perm(void)
{
        return xstate_get_group_perm(false);
}

enum xstate_copy_mode {
        XSTATE_COPY_FP,
        XSTATE_COPY_FX,
        XSTATE_COPY_XSAVE,
};

struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
                                      u64 xfeatures, u32 pkru_val,
                                      enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
                                    enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);


extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system_xstate(unsigned int legacy_size);

extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);

static inline u64 xfeatures_mask_supervisor(void)
{
        return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}

static inline u64 xfeatures_mask_independent(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
                return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;

        return XFEATURE_MASK_INDEPENDENT;
}

/* XSAVE/XRSTOR wrapper functions */

#ifdef CONFIG_X86_64
#define REX_PREFIX        "0x48, "
#else
#define REX_PREFIX
#endif

/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE                ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT        ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVEC                ".byte " REX_PREFIX "0x0f,0xc7,0x27"
#define XSAVES                ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR                ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS                ".byte " REX_PREFIX "0x0f,0xc7,0x1f"

/*
 * After this @err contains 0 on success or the trap number when the
 * operation raises an exception.
 */
#define XSTATE_OP(op, st, lmask, hmask, err)                                \
        asm volatile("1:" op "\n\t"                                        \
                     "xor %[err], %[err]\n"                                \
                     "2:\n\t"                                                \
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE)        \
                     : [err] "=a" (err)                                        \
                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)        \
                     : "memory")

/*
 * If XSAVES is enabled, it replaces XSAVEC because it supports supervisor
 * states in addition to XSAVEC.
 *
 * Otherwise if XSAVEC is enabled, it replaces XSAVEOPT because it supports
 * compacted storage format in addition to XSAVEOPT.
 *
 * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
 * supports modified optimization which is not supported by XSAVE.
 *
 * We use XSAVE as a fallback.
 *
 * The 661 label is defined in the ALTERNATIVE* macros as the address of the
 * original instruction which gets replaced. We need to use it here as the
 * address of the instruction where we might get an exception at.
 */
#define XSTATE_XSAVE(st, lmask, hmask, err)                                \
        asm volatile(ALTERNATIVE_3(XSAVE,                                \
                                   XSAVEOPT, X86_FEATURE_XSAVEOPT,        \
                                   XSAVEC,   X86_FEATURE_XSAVEC,        \
                                   XSAVES,   X86_FEATURE_XSAVES)        \
                     "\n"                                                \
                     "xor %[err], %[err]\n"                                \
                     "3:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
                     : [err] "=r" (err)                                        \
                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)        \
                     : "memory")

/*
 * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
 * XSAVE area format.
 */
#define XSTATE_XRESTORE(st, lmask, hmask)                                \
        asm volatile(ALTERNATIVE(XRSTOR,                                \
                                 XRSTORS, X86_FEATURE_XSAVES)                \
                     "\n"                                                \
                     "3:\n"                                                \
                     _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE)        \
                     :                                                        \
                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)        \
                     : "memory")

#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
#else
static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { }
#endif

#ifdef CONFIG_X86_64
static inline void xfd_set_state(u64 xfd)
{
        wrmsrl(MSR_IA32_XFD, xfd);
        __this_cpu_write(xfd_state, xfd);
}

static inline void xfd_update_state(struct fpstate *fpstate)
{
        if (fpu_state_size_dynamic()) {
                u64 xfd = fpstate->xfd;

                if (__this_cpu_read(xfd_state) != xfd)
                        xfd_set_state(xfd);
        }
}

extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
static inline void xfd_set_state(u64 xfd) { }

static inline void xfd_update_state(struct fpstate *fpstate) { }

static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
        return -EPERM;
}
#endif

/*
 * Save processor xstate to xsave area.
 *
 * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
 * and command line options. The choice is permanent until the next reboot.
 */
static inline void os_xsave(struct fpstate *fpstate)
{
        u64 mask = fpstate->xfeatures;
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;

        WARN_ON_FPU(!alternatives_patched);
        xfd_validate_state(fpstate, mask, false);

        XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);

        /* We should never fault when copying to a kernel buffer: */
        WARN_ON_FPU(err);
}

/*
 * Restore processor xstate from xsave area.
 *
 * Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
 */
static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
{
        u32 lmask = mask;
        u32 hmask = mask >> 32;

        xfd_validate_state(fpstate, mask, true);
        XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}

/* Restore of supervisor state. Does not require XFD */
static inline void os_xrstor_supervisor(struct fpstate *fpstate)
{
        u64 mask = xfeatures_mask_supervisor();
        u32 lmask = mask;
        u32 hmask = mask >> 32;

        XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}

/*
 * XSAVE itself always writes all requested xfeatures.  Removing features
 * from the request bitmap reduces the features which are written.
 * Generate a mask of features which must be written to a sigframe.  The
 * unset features can be optimized away and not written.
 *
 * This optimization is user-visible.  Only use for states where
 * uninitialized sigframe contents are tolerable, like dynamic features.
 *
 * Users of buffers produced with this optimization must check XSTATE_BV
 * to determine which features have been optimized out.
 */
static inline u64 xfeatures_need_sigframe_write(void)
{
        u64 xfeaures_to_write;

        /* In-use features must be written: */
        xfeaures_to_write = xfeatures_in_use();

        /* Also write all non-optimizable sigframe features: */
        xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
                             ~XFEATURE_MASK_SIGFRAME_INITOPT;

        return xfeaures_to_write;
}

/*
 * Save xstate to user space xsave area.
 *
 * We don't use modified optimization because xrstor/xrstors might track
 * a different application.
 *
 * We don't use compacted format xsave area for backward compatibility for
 * old applications which don't understand the compacted format of the
 * xsave area.
 *
 * The caller has to zero buf::header before calling this because XSAVE*
 * does not touch the reserved fields in the header.
 */
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
        /*
         * Include the features which are not xsaved/rstored by the kernel
         * internally, e.g. PKRU. That's user space ABI and also required
         * to allow the signal handler to modify PKRU.
         */
        struct fpstate *fpstate = current->thread.fpu.fpstate;
        u64 mask = fpstate->user_xfeatures;
        u32 lmask;
        u32 hmask;
        int err;

        /* Optimize away writing unnecessary xfeatures: */
        if (fpu_state_size_dynamic())
                mask &= xfeatures_need_sigframe_write();

        lmask = mask;
        hmask = mask >> 32;
        xfd_validate_state(fpstate, mask, false);

        stac();
        XSTATE_OP(XSAVE, buf, lmask, hmask, err);
        clac();

        return err;
}

/*
 * Restore xstate from user space xsave area.
 */
static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
{
        struct xregs_state *xstate = ((__force struct xregs_state *)buf);
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;

        xfd_validate_state(current->thread.fpu.fpstate, mask, true);

        stac();
        XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
        clac();

        return err;
}

/*
 * Restore xstate from kernel space xsave area, return an error code instead of
 * an exception.
 */
static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask)
{
        struct xregs_state *xstate = &fpstate->regs.xsave;
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;

        /* Ensure that XFD is up to date */
        xfd_update_state(fpstate);

        if (cpu_feature_enabled(X86_FEATURE_XSAVES))
                XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
        else
                XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);

        return err;
}


#endif


















































































  327 


    9 

  253 

  325 











































































   19 






















   20 
   19 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_UACCESS_64_H
#define _ASM_X86_UACCESS_64_H

/*
 * User space memory access functions
 */
#include <linux/compiler.h>
#include <linux/lockdep.h>
#include <linux/kasan-checks.h>
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>

#ifdef CONFIG_ADDRESS_MASKING
/*
 * Mask out tag bits from the address.
 */
static inline unsigned long __untagged_addr(unsigned long addr)
{
        asm (ALTERNATIVE("",
                         "and " __percpu_arg([mask]) ", %[addr]", X86_FEATURE_LAM)
             : [addr] "+r" (addr)
             : [mask] "m" (__my_cpu_var(tlbstate_untag_mask)));

        return addr;
}

#define untagged_addr(addr)        ({                                        \
        unsigned long __addr = (__force unsigned long)(addr);                \
        (__force __typeof__(addr))__untagged_addr(__addr);                \
})

static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
                                                   unsigned long addr)
{
        mmap_assert_locked(mm);
        return addr & (mm)->context.untag_mask;
}

#define untagged_addr_remote(mm, addr)        ({                                \
        unsigned long __addr = (__force unsigned long)(addr);                \
        (__force __typeof__(addr))__untagged_addr_remote(mm, __addr);        \
})

#endif

/*
 * The virtual address space space is logically divided into a kernel
 * half and a user half.  When cast to a signed type, user pointers
 * are positive and kernel pointers are negative.
 */
#define valid_user_address(x) ((__force long)(x) >= 0)

/*
 * User pointers can have tag bits on x86-64.  This scheme tolerates
 * arbitrary values in those bits rather then masking them off.
 *
 * Enforce two rules:
 * 1. 'ptr' must be in the user half of the address space
 * 2. 'ptr+size' must not overflow into kernel addresses
 *
 * Note that addresses around the sign change are not valid addresses,
 * and will GP-fault even with LAM enabled if the sign bit is set (see
 * "CR3.LAM_SUP" that can narrow the canonicality check if we ever
 * enable it, but not remove it entirely).
 *
 * So the "overflow into kernel addresses" does not imply some sudden
 * exact boundary at the sign bit, and we can allow a lot of slop on the
 * size check.
 *
 * In fact, we could probably remove the size check entirely, since
 * any kernel accesses will be in increasing address order starting
 * at 'ptr', and even if the end might be in kernel space, we'll
 * hit the GP faults for non-canonical accesses before we ever get
 * there.
 *
 * That's a separate optimization, for now just handle the small
 * constant case.
 */
static inline bool __access_ok(const void __user *ptr, unsigned long size)
{
        if (__builtin_constant_p(size <= PAGE_SIZE) && size <= PAGE_SIZE) {
                return valid_user_address(ptr);
        } else {
                unsigned long sum = size + (__force unsigned long)ptr;

                return valid_user_address(sum) && sum >= (__force unsigned long)ptr;
        }
}
#define __access_ok __access_ok

/*
 * Copy To/From Userspace
 */

/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
rep_movs_alternative(void *to, const void *from, unsigned len);

static __always_inline __must_check unsigned long
copy_user_generic(void *to, const void *from, unsigned long len)
{
        stac();
        /*
         * If CPU has FSRM feature, use 'rep movs'.
         * Otherwise, use rep_movs_alternative.
         */
        asm volatile(
                "1:\n\t"
                ALTERNATIVE("rep movsb",
                            "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
                "2:\n"
                _ASM_EXTABLE_UA(1b, 2b)
                :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
                : : "memory", "rax");
        clac();
        return len;
}

static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
        return copy_user_generic(dst, (__force void *)src, size);
}

static __always_inline __must_check unsigned long
raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
{
        return copy_user_generic((__force void *)dst, src, size);
}

extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);

static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
                                  unsigned size)
{
        long ret;
        kasan_check_write(dst, size);
        stac();
        ret = __copy_user_nocache(dst, src, size);
        clac();
        return ret;
}

static inline int
__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
        kasan_check_write(dst, size);
        return __copy_user_flushcache(dst, src, size);
}

/*
 * Zero Userspace.
 */

__must_check unsigned long
rep_stos_alternative(void __user *addr, unsigned long len);

static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
{
        might_fault();
        stac();

        /*
         * No memory constraint because it doesn't change any memory gcc
         * knows about.
         */
        asm volatile(
                "1:\n\t"
                ALTERNATIVE("rep stosb",
                            "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
                "2:\n"
               _ASM_EXTABLE_UA(1b, 2b)
               : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
               : "a" (0));

        clac();

        return size;
}

static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
{
        if (__access_ok(to, n))
                return __clear_user(to, n);
        return n;
}
#endif /* _ASM_X86_UACCESS_64_H */

























































































































































































































































































   20 













































   23 

   23 



















































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
// SPDX-License-Identifier: GPL-2.0
/*
 * Tty port functions
 */

#include <linux/types.h>
#include <linux/errno.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/serial.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/serdev.h>
#include "tty.h"

static size_t tty_port_default_receive_buf(struct tty_port *port, const u8 *p,
                                           const u8 *f, size_t count)
{
        struct tty_struct *tty;
        struct tty_ldisc *ld;

        tty = READ_ONCE(port->itty);
        if (!tty)
                return 0;

        ld = tty_ldisc_ref(tty);
        if (!ld)
                return 0;

        count = tty_ldisc_receive_buf(ld, p, f, count);

        tty_ldisc_deref(ld);

        return count;
}

static void tty_port_default_lookahead_buf(struct tty_port *port, const u8 *p,
                                           const u8 *f, size_t count)
{
        struct tty_struct *tty;
        struct tty_ldisc *ld;

        tty = READ_ONCE(port->itty);
        if (!tty)
                return;

        ld = tty_ldisc_ref(tty);
        if (!ld)
                return;

        if (ld->ops->lookahead_buf)
                ld->ops->lookahead_buf(ld->tty, p, f, count);

        tty_ldisc_deref(ld);
}

static void tty_port_default_wakeup(struct tty_port *port)
{
        struct tty_struct *tty = tty_port_tty_get(port);

        if (tty) {
                tty_wakeup(tty);
                tty_kref_put(tty);
        }
}

const struct tty_port_client_operations tty_port_default_client_ops = {
        .receive_buf = tty_port_default_receive_buf,
        .lookahead_buf = tty_port_default_lookahead_buf,
        .write_wakeup = tty_port_default_wakeup,
};
EXPORT_SYMBOL_GPL(tty_port_default_client_ops);

/**
 * tty_port_init - initialize tty_port
 * @port: tty_port to initialize
 *
 * Initializes the state of struct tty_port. When a port was initialized using
 * this function, one has to destroy the port by tty_port_destroy(). Either
 * indirectly by using &tty_port refcounting (tty_port_put()) or directly if
 * refcounting is not used.
 */
void tty_port_init(struct tty_port *port)
{
        memset(port, 0, sizeof(*port));
        tty_buffer_init(port);
        init_waitqueue_head(&port->open_wait);
        init_waitqueue_head(&port->delta_msr_wait);
        mutex_init(&port->mutex);
        mutex_init(&port->buf_mutex);
        spin_lock_init(&port->lock);
        port->close_delay = (50 * HZ) / 100;
        port->closing_wait = (3000 * HZ) / 100;
        port->client_ops = &tty_port_default_client_ops;
        kref_init(&port->kref);
}
EXPORT_SYMBOL(tty_port_init);

/**
 * tty_port_link_device - link tty and tty_port
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 *
 * Provide the tty layer with a link from a tty (specified by @index) to a
 * tty_port (@port). Use this only if neither tty_port_register_device() nor
 * tty_port_install() is used in the driver. If used, this has to be called
 * before tty_register_driver().
 */
void tty_port_link_device(struct tty_port *port,
                struct tty_driver *driver, unsigned index)
{
        if (WARN_ON(index >= driver->num))
                return;
        driver->ports[index] = port;
}
EXPORT_SYMBOL_GPL(tty_port_link_device);

/**
 * tty_port_register_device - register tty device
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 * @device: parent if exists, otherwise NULL
 *
 * It is the same as tty_register_device() except the provided @port is linked
 * to a concrete tty specified by @index. Use this or tty_port_install() (or
 * both). Call tty_port_link_device() as a last resort.
 */
struct device *tty_port_register_device(struct tty_port *port,
                struct tty_driver *driver, unsigned index,
                struct device *device)
{
        return tty_port_register_device_attr(port, driver, index, device, NULL, NULL);
}
EXPORT_SYMBOL_GPL(tty_port_register_device);

/**
 * tty_port_register_device_attr - register tty device
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 * @device: parent if exists, otherwise NULL
 * @drvdata: Driver data to be set to device.
 * @attr_grp: Attribute group to be set on device.
 *
 * It is the same as tty_register_device_attr() except the provided @port is
 * linked to a concrete tty specified by @index. Use this or tty_port_install()
 * (or both). Call tty_port_link_device() as a last resort.
 */
struct device *tty_port_register_device_attr(struct tty_port *port,
                struct tty_driver *driver, unsigned index,
                struct device *device, void *drvdata,
                const struct attribute_group **attr_grp)
{
        tty_port_link_device(port, driver, index);
        return tty_register_device_attr(driver, index, device, drvdata,
                        attr_grp);
}
EXPORT_SYMBOL_GPL(tty_port_register_device_attr);

/**
 * tty_port_register_device_attr_serdev - register tty or serdev device
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 * @host: serial port hardware device
 * @parent: parent if exists, otherwise NULL
 * @drvdata: driver data for the device
 * @attr_grp: attribute group for the device
 *
 * Register a serdev or tty device depending on if the parent device has any
 * defined serdev clients or not.
 */
struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
                struct tty_driver *driver, unsigned index,
                struct device *host, struct device *parent, void *drvdata,
                const struct attribute_group **attr_grp)
{
        struct device *dev;

        tty_port_link_device(port, driver, index);

        dev = serdev_tty_port_register(port, host, parent, driver, index);
        if (PTR_ERR(dev) != -ENODEV) {
                /* Skip creating cdev if we registered a serdev device */
                return dev;
        }

        return tty_register_device_attr(driver, index, parent, drvdata,
                        attr_grp);
}
EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev);

/**
 * tty_port_register_device_serdev - register tty or serdev device
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 * @host: serial port hardware controller device
 * @parent: parent if exists, otherwise NULL
 *
 * Register a serdev or tty device depending on if the parent device has any
 * defined serdev clients or not.
 */
struct device *tty_port_register_device_serdev(struct tty_port *port,
                struct tty_driver *driver, unsigned index,
                struct device *host, struct device *parent)
{
        return tty_port_register_device_attr_serdev(port, driver, index,
                        host, parent, NULL, NULL);
}
EXPORT_SYMBOL_GPL(tty_port_register_device_serdev);

/**
 * tty_port_unregister_device - deregister a tty or serdev device
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @index: index of the tty
 *
 * If a tty or serdev device is registered with a call to
 * tty_port_register_device_serdev() then this function must be called when
 * the device is gone.
 */
void tty_port_unregister_device(struct tty_port *port,
                struct tty_driver *driver, unsigned index)
{
        int ret;

        ret = serdev_tty_port_unregister(port);
        if (ret == 0)
                return;

        tty_unregister_device(driver, index);
}
EXPORT_SYMBOL_GPL(tty_port_unregister_device);

int tty_port_alloc_xmit_buf(struct tty_port *port)
{
        /* We may sleep in get_zeroed_page() */
        mutex_lock(&port->buf_mutex);
        if (port->xmit_buf == NULL) {
                port->xmit_buf = (u8 *)get_zeroed_page(GFP_KERNEL);
                if (port->xmit_buf)
                        kfifo_init(&port->xmit_fifo, port->xmit_buf, PAGE_SIZE);
        }
        mutex_unlock(&port->buf_mutex);
        if (port->xmit_buf == NULL)
                return -ENOMEM;
        return 0;
}
EXPORT_SYMBOL(tty_port_alloc_xmit_buf);

void tty_port_free_xmit_buf(struct tty_port *port)
{
        mutex_lock(&port->buf_mutex);
        free_page((unsigned long)port->xmit_buf);
        port->xmit_buf = NULL;
        INIT_KFIFO(port->xmit_fifo);
        mutex_unlock(&port->buf_mutex);
}
EXPORT_SYMBOL(tty_port_free_xmit_buf);

/**
 * tty_port_destroy - destroy inited port
 * @port: tty port to be destroyed
 *
 * When a port was initialized using tty_port_init(), one has to destroy the
 * port by this function. Either indirectly by using &tty_port refcounting
 * (tty_port_put()) or directly if refcounting is not used.
 */
void tty_port_destroy(struct tty_port *port)
{
        tty_buffer_cancel_work(port);
        tty_buffer_free_all(port);
}
EXPORT_SYMBOL(tty_port_destroy);

static void tty_port_destructor(struct kref *kref)
{
        struct tty_port *port = container_of(kref, struct tty_port, kref);

        /* check if last port ref was dropped before tty release */
        if (WARN_ON(port->itty))
                return;
        free_page((unsigned long)port->xmit_buf);
        tty_port_destroy(port);
        if (port->ops && port->ops->destruct)
                port->ops->destruct(port);
        else
                kfree(port);
}

/**
 * tty_port_put - drop a reference to tty_port
 * @port: port to drop a reference of (can be NULL)
 *
 * The final put will destroy and free up the @port using
 * @port->ops->destruct() hook, or using kfree() if not provided.
 */
void tty_port_put(struct tty_port *port)
{
        if (port)
                kref_put(&port->kref, tty_port_destructor);
}
EXPORT_SYMBOL(tty_port_put);

/**
 * tty_port_tty_get        -        get a tty reference
 * @port: tty port
 *
 * Return a refcount protected tty instance or %NULL if the port is not
 * associated with a tty (eg due to close or hangup).
 */
struct tty_struct *tty_port_tty_get(struct tty_port *port)
{
        unsigned long flags;
        struct tty_struct *tty;

        spin_lock_irqsave(&port->lock, flags);
        tty = tty_kref_get(port->tty);
        spin_unlock_irqrestore(&port->lock, flags);
        return tty;
}
EXPORT_SYMBOL(tty_port_tty_get);

/**
 * tty_port_tty_set        -        set the tty of a port
 * @port: tty port
 * @tty: the tty
 *
 * Associate the port and tty pair. Manages any internal refcounts. Pass %NULL
 * to deassociate a port.
 */
void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
{
        unsigned long flags;

        spin_lock_irqsave(&port->lock, flags);
        tty_kref_put(port->tty);
        port->tty = tty_kref_get(tty);
        spin_unlock_irqrestore(&port->lock, flags);
}
EXPORT_SYMBOL(tty_port_tty_set);

/**
 * tty_port_shutdown - internal helper to shutdown the device
 * @port: tty port to be shut down
 * @tty: the associated tty
 *
 * It is used by tty_port_hangup() and tty_port_close(). Its task is to
 * shutdown the device if it was initialized (note consoles remain
 * functioning). It lowers DTR/RTS (if @tty has HUPCL set) and invokes
 * @port->ops->shutdown().
 */
static void tty_port_shutdown(struct tty_port *port, struct tty_struct *tty)
{
        mutex_lock(&port->mutex);
        if (port->console)
                goto out;

        if (tty_port_initialized(port)) {
                tty_port_set_initialized(port, false);
                /*
                 * Drop DTR/RTS if HUPCL is set. This causes any attached
                 * modem to hang up the line.
                 */
                if (tty && C_HUPCL(tty))
                        tty_port_lower_dtr_rts(port);

                if (port->ops->shutdown)
                        port->ops->shutdown(port);
        }
out:
        mutex_unlock(&port->mutex);
}

/**
 * tty_port_hangup                -        hangup helper
 * @port: tty port
 *
 * Perform port level tty hangup flag and count changes. Drop the tty
 * reference.
 *
 * Caller holds tty lock.
 */
void tty_port_hangup(struct tty_port *port)
{
        struct tty_struct *tty;
        unsigned long flags;

        spin_lock_irqsave(&port->lock, flags);
        port->count = 0;
        tty = port->tty;
        if (tty)
                set_bit(TTY_IO_ERROR, &tty->flags);
        port->tty = NULL;
        spin_unlock_irqrestore(&port->lock, flags);
        tty_port_set_active(port, false);
        tty_port_shutdown(port, tty);
        tty_kref_put(tty);
        wake_up_interruptible(&port->open_wait);
        wake_up_interruptible(&port->delta_msr_wait);
}
EXPORT_SYMBOL(tty_port_hangup);

/**
 * tty_port_tty_hangup - helper to hang up a tty
 * @port: tty port
 * @check_clocal: hang only ttys with %CLOCAL unset?
 */
void tty_port_tty_hangup(struct tty_port *port, bool check_clocal)
{
        struct tty_struct *tty = tty_port_tty_get(port);

        if (tty && (!check_clocal || !C_CLOCAL(tty)))
                tty_hangup(tty);
        tty_kref_put(tty);
}
EXPORT_SYMBOL_GPL(tty_port_tty_hangup);

/**
 * tty_port_tty_wakeup - helper to wake up a tty
 * @port: tty port
 */
void tty_port_tty_wakeup(struct tty_port *port)
{
        port->client_ops->write_wakeup(port);
}
EXPORT_SYMBOL_GPL(tty_port_tty_wakeup);

/**
 * tty_port_carrier_raised        -        carrier raised check
 * @port: tty port
 *
 * Wrapper for the carrier detect logic. For the moment this is used
 * to hide some internal details. This will eventually become entirely
 * internal to the tty port.
 */
bool tty_port_carrier_raised(struct tty_port *port)
{
        if (port->ops->carrier_raised == NULL)
                return true;
        return port->ops->carrier_raised(port);
}
EXPORT_SYMBOL(tty_port_carrier_raised);

/**
 * tty_port_raise_dtr_rts        -        Raise DTR/RTS
 * @port: tty port
 *
 * Wrapper for the DTR/RTS raise logic. For the moment this is used to hide
 * some internal details. This will eventually become entirely internal to the
 * tty port.
 */
void tty_port_raise_dtr_rts(struct tty_port *port)
{
        if (port->ops->dtr_rts)
                port->ops->dtr_rts(port, true);
}
EXPORT_SYMBOL(tty_port_raise_dtr_rts);

/**
 * tty_port_lower_dtr_rts        -        Lower DTR/RTS
 * @port: tty port
 *
 * Wrapper for the DTR/RTS raise logic. For the moment this is used to hide
 * some internal details. This will eventually become entirely internal to the
 * tty port.
 */
void tty_port_lower_dtr_rts(struct tty_port *port)
{
        if (port->ops->dtr_rts)
                port->ops->dtr_rts(port, false);
}
EXPORT_SYMBOL(tty_port_lower_dtr_rts);

/**
 * tty_port_block_til_ready        -        Waiting logic for tty open
 * @port: the tty port being opened
 * @tty: the tty device being bound
 * @filp: the file pointer of the opener or %NULL
 *
 * Implement the core POSIX/SuS tty behaviour when opening a tty device.
 * Handles:
 *
 *        - hangup (both before and during)
 *        - non blocking open
 *        - rts/dtr/dcd
 *        - signals
 *        - port flags and counts
 *
 * The passed @port must implement the @port->ops->carrier_raised method if it
 * can do carrier detect and the @port->ops->dtr_rts method if it supports
 * software management of these lines. Note that the dtr/rts raise is done each
 * iteration as a hangup may have previously dropped them while we wait.
 *
 * Caller holds tty lock.
 *
 * Note: May drop and reacquire tty lock when blocking, so @tty and @port may
 * have changed state (eg., may have been hung up).
 */
int tty_port_block_til_ready(struct tty_port *port,
                                struct tty_struct *tty, struct file *filp)
{
        int do_clocal = 0, retval;
        unsigned long flags;
        DEFINE_WAIT(wait);

        /* if non-blocking mode is set we can pass directly to open unless
         * the port has just hung up or is in another error state.
         */
        if (tty_io_error(tty)) {
                tty_port_set_active(port, true);
                return 0;
        }
        if (filp == NULL || (filp->f_flags & O_NONBLOCK)) {
                /* Indicate we are open */
                if (C_BAUD(tty))
                        tty_port_raise_dtr_rts(port);
                tty_port_set_active(port, true);
                return 0;
        }

        if (C_CLOCAL(tty))
                do_clocal = 1;

        /* Block waiting until we can proceed. We may need to wait for the
         * carrier, but we must also wait for any close that is in progress
         * before the next open may complete.
         */

        retval = 0;

        /* The port lock protects the port counts */
        spin_lock_irqsave(&port->lock, flags);
        port->count--;
        port->blocked_open++;
        spin_unlock_irqrestore(&port->lock, flags);

        while (1) {
                /* Indicate we are open */
                if (C_BAUD(tty) && tty_port_initialized(port))
                        tty_port_raise_dtr_rts(port);

                prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
                /* Check for a hangup or uninitialised port.
                 * Return accordingly.
                 */
                if (tty_hung_up_p(filp) || !tty_port_initialized(port)) {
                        if (port->flags & ASYNC_HUP_NOTIFY)
                                retval = -EAGAIN;
                        else
                                retval = -ERESTARTSYS;
                        break;
                }
                /*
                 * Probe the carrier. For devices with no carrier detect
                 * tty_port_carrier_raised will always return true.
                 * Never ask drivers if CLOCAL is set, this causes troubles
                 * on some hardware.
                 */
                if (do_clocal || tty_port_carrier_raised(port))
                        break;
                if (signal_pending(current)) {
                        retval = -ERESTARTSYS;
                        break;
                }
                tty_unlock(tty);
                schedule();
                tty_lock(tty);
        }
        finish_wait(&port->open_wait, &wait);

        /* Update counts. A parallel hangup will have set count to zero and
         * we must not mess that up further.
         */
        spin_lock_irqsave(&port->lock, flags);
        if (!tty_hung_up_p(filp))
                port->count++;
        port->blocked_open--;
        spin_unlock_irqrestore(&port->lock, flags);
        if (retval == 0)
                tty_port_set_active(port, true);
        return retval;
}
EXPORT_SYMBOL(tty_port_block_til_ready);

static void tty_port_drain_delay(struct tty_port *port, struct tty_struct *tty)
{
        unsigned int bps = tty_get_baud_rate(tty);
        long timeout;

        if (bps > 1200) {
                timeout = (HZ * 10 * port->drain_delay) / bps;
                timeout = max_t(long, timeout, HZ / 10);
        } else {
                timeout = 2 * HZ;
        }
        schedule_timeout_interruptible(timeout);
}

/**
 * tty_port_close_start - helper for tty->ops->close, part 1/2
 * @port: tty_port of the device
 * @tty: tty being closed
 * @filp: passed file pointer
 *
 * Decrements and checks open count. Flushes the port if this is the last
 * close. That means, dropping the data from the outpu buffer on the device and
 * waiting for sending logic to finish. The rest of close handling is performed
 * in tty_port_close_end().
 *
 * Locking: Caller holds tty lock.
 *
 * Return: 1 if this is the last close, otherwise 0
 */
int tty_port_close_start(struct tty_port *port,
                                struct tty_struct *tty, struct file *filp)
{
        unsigned long flags;

        if (tty_hung_up_p(filp))
                return 0;

        spin_lock_irqsave(&port->lock, flags);
        if (tty->count == 1 && port->count != 1) {
                tty_warn(tty, "%s: tty->count = 1 port count = %d\n", __func__,
                         port->count);
                port->count = 1;
        }
        if (--port->count < 0) {
                tty_warn(tty, "%s: bad port count (%d)\n", __func__,
                         port->count);
                port->count = 0;
        }

        if (port->count) {
                spin_unlock_irqrestore(&port->lock, flags);
                return 0;
        }
        spin_unlock_irqrestore(&port->lock, flags);

        tty->closing = 1;

        if (tty_port_initialized(port)) {
                /* Don't block on a stalled port, just pull the chain */
                if (tty->flow.tco_stopped)
                        tty_driver_flush_buffer(tty);
                if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
                        tty_wait_until_sent(tty, port->closing_wait);
                if (port->drain_delay)
                        tty_port_drain_delay(port, tty);
        }
        /* Flush the ldisc buffering */
        tty_ldisc_flush(tty);

        /* Report to caller this is the last port reference */
        return 1;
}
EXPORT_SYMBOL(tty_port_close_start);

/**
 * tty_port_close_end - helper for tty->ops->close, part 2/2
 * @port: tty_port of the device
 * @tty: tty being closed
 *
 * This is a continuation of the first part: tty_port_close_start(). This
 * should be called after turning off the device. It flushes the data from the
 * line discipline and delays the close by @port->close_delay.
 *
 * Locking: Caller holds tty lock.
 */
void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
{
        unsigned long flags;

        tty_ldisc_flush(tty);
        tty->closing = 0;

        spin_lock_irqsave(&port->lock, flags);

        if (port->blocked_open) {
                spin_unlock_irqrestore(&port->lock, flags);
                if (port->close_delay)
                        msleep_interruptible(jiffies_to_msecs(port->close_delay));
                spin_lock_irqsave(&port->lock, flags);
                wake_up_interruptible(&port->open_wait);
        }
        spin_unlock_irqrestore(&port->lock, flags);
        tty_port_set_active(port, false);
}
EXPORT_SYMBOL(tty_port_close_end);

/**
 * tty_port_close - generic tty->ops->close handler
 * @port: tty_port of the device
 * @tty: tty being closed
 * @filp: passed file pointer
 *
 * It is a generic helper to be used in driver's @tty->ops->close. It wraps a
 * sequence of tty_port_close_start(), tty_port_shutdown(), and
 * tty_port_close_end(). The latter two are called only if this is the last
 * close. See the respective functions for the details.
 *
 * Locking: Caller holds tty lock
 */
void tty_port_close(struct tty_port *port, struct tty_struct *tty,
                                                        struct file *filp)
{
        if (tty_port_close_start(port, tty, filp) == 0)
                return;
        tty_port_shutdown(port, tty);
        if (!port->console)
                set_bit(TTY_IO_ERROR, &tty->flags);
        tty_port_close_end(port, tty);
        tty_port_tty_set(port, NULL);
}
EXPORT_SYMBOL(tty_port_close);

/**
 * tty_port_install - generic tty->ops->install handler
 * @port: tty_port of the device
 * @driver: tty_driver for this device
 * @tty: tty to be installed
 *
 * It is the same as tty_standard_install() except the provided @port is linked
 * to a concrete tty specified by @tty. Use this or tty_port_register_device()
 * (or both). Call tty_port_link_device() as a last resort.
 */
int tty_port_install(struct tty_port *port, struct tty_driver *driver,
                struct tty_struct *tty)
{
        tty->port = port;
        return tty_standard_install(driver, tty);
}
EXPORT_SYMBOL_GPL(tty_port_install);

/**
 * tty_port_open - generic tty->ops->open handler
 * @port: tty_port of the device
 * @tty: tty to be opened
 * @filp: passed file pointer
 *
 * It is a generic helper to be used in driver's @tty->ops->open. It activates
 * the devices using @port->ops->activate if not active already. And waits for
 * the device to be ready using tty_port_block_til_ready() (e.g.  raises
 * DTR/CTS and waits for carrier).
 *
 * Note that @port->ops->shutdown is not called when @port->ops->activate
 * returns an error (on the contrary, @tty->ops->close is).
 *
 * Locking: Caller holds tty lock.
 *
 * Note: may drop and reacquire tty lock (in tty_port_block_til_ready()) so
 * @tty and @port may have changed state (eg., may be hung up now).
 */
int tty_port_open(struct tty_port *port, struct tty_struct *tty,
                                                        struct file *filp)
{
        spin_lock_irq(&port->lock);
        ++port->count;
        spin_unlock_irq(&port->lock);
        tty_port_tty_set(port, tty);

        /*
         * Do the device-specific open only if the hardware isn't
         * already initialized. Serialize open and shutdown using the
         * port mutex.
         */

        mutex_lock(&port->mutex);

        if (!tty_port_initialized(port)) {
                clear_bit(TTY_IO_ERROR, &tty->flags);
                if (port->ops->activate) {
                        int retval = port->ops->activate(port, tty);

                        if (retval) {
                                mutex_unlock(&port->mutex);
                                return retval;
                        }
                }
                tty_port_set_initialized(port, true);
        }
        mutex_unlock(&port->mutex);
        return tty_port_block_til_ready(port, tty, filp);
}
EXPORT_SYMBOL(tty_port_open);



























































































































































































































































































































































































































































































































































































































































































































































































   14 





   14 









































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
// SPDX-License-Identifier: GPL-2.0-only

#include <net/sock.h>
#include <linux/ethtool_netlink.h>
#include <linux/pm_runtime.h>
#include "netlink.h"

static struct genl_family ethtool_genl_family;

static bool ethnl_ok __read_mostly;
static u32 ethnl_bcast_seq;

#define ETHTOOL_FLAGS_BASIC (ETHTOOL_FLAG_COMPACT_BITSETS |        \
                             ETHTOOL_FLAG_OMIT_REPLY)
#define ETHTOOL_FLAGS_STATS (ETHTOOL_FLAGS_BASIC | ETHTOOL_FLAG_STATS)

const struct nla_policy ethnl_header_policy[] = {
        [ETHTOOL_A_HEADER_DEV_INDEX]        = { .type = NLA_U32 },
        [ETHTOOL_A_HEADER_DEV_NAME]        = { .type = NLA_NUL_STRING,
                                            .len = ALTIFNAMSIZ - 1 },
        [ETHTOOL_A_HEADER_FLAGS]        = NLA_POLICY_MASK(NLA_U32,
                                                          ETHTOOL_FLAGS_BASIC),
};

const struct nla_policy ethnl_header_policy_stats[] = {
        [ETHTOOL_A_HEADER_DEV_INDEX]        = { .type = NLA_U32 },
        [ETHTOOL_A_HEADER_DEV_NAME]        = { .type = NLA_NUL_STRING,
                                            .len = ALTIFNAMSIZ - 1 },
        [ETHTOOL_A_HEADER_FLAGS]        = NLA_POLICY_MASK(NLA_U32,
                                                          ETHTOOL_FLAGS_STATS),
};

int ethnl_ops_begin(struct net_device *dev)
{
        int ret;

        if (!dev)
                return -ENODEV;

        if (dev->dev.parent)
                pm_runtime_get_sync(dev->dev.parent);

        if (!netif_device_present(dev) ||
            dev->reg_state == NETREG_UNREGISTERING) {
                ret = -ENODEV;
                goto err;
        }

        if (dev->ethtool_ops->begin) {
                ret = dev->ethtool_ops->begin(dev);
                if (ret)
                        goto err;
        }

        return 0;
err:
        if (dev->dev.parent)
                pm_runtime_put(dev->dev.parent);

        return ret;
}

void ethnl_ops_complete(struct net_device *dev)
{
        if (dev->ethtool_ops->complete)
                dev->ethtool_ops->complete(dev);

        if (dev->dev.parent)
                pm_runtime_put(dev->dev.parent);
}

/**
 * ethnl_parse_header_dev_get() - parse request header
 * @req_info:    structure to put results into
 * @header:      nest attribute with request header
 * @net:         request netns
 * @extack:      netlink extack for error reporting
 * @require_dev: fail if no device identified in header
 *
 * Parse request header in nested attribute @nest and puts results into
 * the structure pointed to by @req_info. Extack from @info is used for error
 * reporting. If req_info->dev is not null on return, reference to it has
 * been taken. If error is returned, *req_info is null initialized and no
 * reference is held.
 *
 * Return: 0 on success or negative error code
 */
int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
                               const struct nlattr *header, struct net *net,
                               struct netlink_ext_ack *extack, bool require_dev)
{
        struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
        const struct nlattr *devname_attr;
        struct net_device *dev = NULL;
        u32 flags = 0;
        int ret;

        if (!header) {
                if (!require_dev)
                        return 0;
                NL_SET_ERR_MSG(extack, "request header missing");
                return -EINVAL;
        }
        /* No validation here, command policy should have a nested policy set
         * for the header, therefore validation should have already been done.
         */
        ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
                               NULL, extack);
        if (ret < 0)
                return ret;
        if (tb[ETHTOOL_A_HEADER_FLAGS])
                flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);

        devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
        if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
                u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);

                dev = netdev_get_by_index(net, ifindex, &req_info->dev_tracker,
                                          GFP_KERNEL);
                if (!dev) {
                        NL_SET_ERR_MSG_ATTR(extack,
                                            tb[ETHTOOL_A_HEADER_DEV_INDEX],
                                            "no device matches ifindex");
                        return -ENODEV;
                }
                /* if both ifindex and ifname are passed, they must match */
                if (devname_attr &&
                    strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
                        netdev_put(dev, &req_info->dev_tracker);
                        NL_SET_ERR_MSG_ATTR(extack, header,
                                            "ifindex and name do not match");
                        return -ENODEV;
                }
        } else if (devname_attr) {
                dev = netdev_get_by_name(net, nla_data(devname_attr),
                                         &req_info->dev_tracker, GFP_KERNEL);
                if (!dev) {
                        NL_SET_ERR_MSG_ATTR(extack, devname_attr,
                                            "no device matches name");
                        return -ENODEV;
                }
        } else if (require_dev) {
                NL_SET_ERR_MSG_ATTR(extack, header,
                                    "neither ifindex nor name specified");
                return -EINVAL;
        }

        req_info->dev = dev;
        req_info->flags = flags;
        return 0;
}

/**
 * ethnl_fill_reply_header() - Put common header into a reply message
 * @skb:      skb with the message
 * @dev:      network device to describe in header
 * @attrtype: attribute type to use for the nest
 *
 * Create a nested attribute with attributes describing given network device.
 *
 * Return: 0 on success, error value (-EMSGSIZE only) on error
 */
int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
                            u16 attrtype)
{
        struct nlattr *nest;

        if (!dev)
                return 0;
        nest = nla_nest_start(skb, attrtype);
        if (!nest)
                return -EMSGSIZE;

        if (nla_put_u32(skb, ETHTOOL_A_HEADER_DEV_INDEX, (u32)dev->ifindex) ||
            nla_put_string(skb, ETHTOOL_A_HEADER_DEV_NAME, dev->name))
                goto nla_put_failure;
        /* If more attributes are put into reply header, ethnl_header_size()
         * must be updated to account for them.
         */

        nla_nest_end(skb, nest);
        return 0;

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

/**
 * ethnl_reply_init() - Create skb for a reply and fill device identification
 * @payload:      payload length (without netlink and genetlink header)
 * @dev:          device the reply is about (may be null)
 * @cmd:          ETHTOOL_MSG_* message type for reply
 * @hdr_attrtype: attribute type for common header
 * @info:         genetlink info of the received packet we respond to
 * @ehdrp:        place to store payload pointer returned by genlmsg_new()
 *
 * Return: pointer to allocated skb on success, NULL on error
 */
struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
                                 u16 hdr_attrtype, struct genl_info *info,
                                 void **ehdrp)
{
        struct sk_buff *skb;

        skb = genlmsg_new(payload, GFP_KERNEL);
        if (!skb)
                goto err;
        *ehdrp = genlmsg_put_reply(skb, info, &ethtool_genl_family, 0, cmd);
        if (!*ehdrp)
                goto err_free;

        if (dev) {
                int ret;

                ret = ethnl_fill_reply_header(skb, dev, hdr_attrtype);
                if (ret < 0)
                        goto err_free;
        }
        return skb;

err_free:
        nlmsg_free(skb);
err:
        if (info)
                GENL_SET_ERR_MSG(info, "failed to setup reply message");
        return NULL;
}

void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd)
{
        return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                           &ethtool_genl_family, 0, cmd);
}

void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
{
        return genlmsg_put(skb, 0, ++ethnl_bcast_seq, &ethtool_genl_family, 0,
                           cmd);
}

int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
{
        return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
                                       0, ETHNL_MCGRP_MONITOR, GFP_KERNEL);
}

/* GET request helpers */

/**
 * struct ethnl_dump_ctx - context structure for generic dumpit() callback
 * @ops:        request ops of currently processed message type
 * @req_info:   parsed request header of processed request
 * @reply_data: data needed to compose the reply
 * @pos_ifindex: saved iteration position - ifindex
 *
 * These parameters are kept in struct netlink_callback as context preserved
 * between iterations. They are initialized by ethnl_default_start() and used
 * in ethnl_default_dumpit() and ethnl_default_done().
 */
struct ethnl_dump_ctx {
        const struct ethnl_request_ops        *ops;
        struct ethnl_req_info                *req_info;
        struct ethnl_reply_data                *reply_data;
        unsigned long                        pos_ifindex;
};

static const struct ethnl_request_ops *
ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
        [ETHTOOL_MSG_STRSET_GET]        = &ethnl_strset_request_ops,
        [ETHTOOL_MSG_LINKINFO_GET]        = &ethnl_linkinfo_request_ops,
        [ETHTOOL_MSG_LINKINFO_SET]        = &ethnl_linkinfo_request_ops,
        [ETHTOOL_MSG_LINKMODES_GET]        = &ethnl_linkmodes_request_ops,
        [ETHTOOL_MSG_LINKMODES_SET]        = &ethnl_linkmodes_request_ops,
        [ETHTOOL_MSG_LINKSTATE_GET]        = &ethnl_linkstate_request_ops,
        [ETHTOOL_MSG_DEBUG_GET]                = &ethnl_debug_request_ops,
        [ETHTOOL_MSG_DEBUG_SET]                = &ethnl_debug_request_ops,
        [ETHTOOL_MSG_WOL_GET]                = &ethnl_wol_request_ops,
        [ETHTOOL_MSG_WOL_SET]                = &ethnl_wol_request_ops,
        [ETHTOOL_MSG_FEATURES_GET]        = &ethnl_features_request_ops,
        [ETHTOOL_MSG_PRIVFLAGS_GET]        = &ethnl_privflags_request_ops,
        [ETHTOOL_MSG_PRIVFLAGS_SET]        = &ethnl_privflags_request_ops,
        [ETHTOOL_MSG_RINGS_GET]                = &ethnl_rings_request_ops,
        [ETHTOOL_MSG_RINGS_SET]                = &ethnl_rings_request_ops,
        [ETHTOOL_MSG_CHANNELS_GET]        = &ethnl_channels_request_ops,
        [ETHTOOL_MSG_CHANNELS_SET]        = &ethnl_channels_request_ops,
        [ETHTOOL_MSG_COALESCE_GET]        = &ethnl_coalesce_request_ops,
        [ETHTOOL_MSG_COALESCE_SET]        = &ethnl_coalesce_request_ops,
        [ETHTOOL_MSG_PAUSE_GET]                = &ethnl_pause_request_ops,
        [ETHTOOL_MSG_PAUSE_SET]                = &ethnl_pause_request_ops,
        [ETHTOOL_MSG_EEE_GET]                = &ethnl_eee_request_ops,
        [ETHTOOL_MSG_EEE_SET]                = &ethnl_eee_request_ops,
        [ETHTOOL_MSG_FEC_GET]                = &ethnl_fec_request_ops,
        [ETHTOOL_MSG_FEC_SET]                = &ethnl_fec_request_ops,
        [ETHTOOL_MSG_TSINFO_GET]        = &ethnl_tsinfo_request_ops,
        [ETHTOOL_MSG_MODULE_EEPROM_GET]        = &ethnl_module_eeprom_request_ops,
        [ETHTOOL_MSG_STATS_GET]                = &ethnl_stats_request_ops,
        [ETHTOOL_MSG_PHC_VCLOCKS_GET]        = &ethnl_phc_vclocks_request_ops,
        [ETHTOOL_MSG_MODULE_GET]        = &ethnl_module_request_ops,
        [ETHTOOL_MSG_MODULE_SET]        = &ethnl_module_request_ops,
        [ETHTOOL_MSG_PSE_GET]                = &ethnl_pse_request_ops,
        [ETHTOOL_MSG_PSE_SET]                = &ethnl_pse_request_ops,
        [ETHTOOL_MSG_RSS_GET]                = &ethnl_rss_request_ops,
        [ETHTOOL_MSG_PLCA_GET_CFG]        = &ethnl_plca_cfg_request_ops,
        [ETHTOOL_MSG_PLCA_SET_CFG]        = &ethnl_plca_cfg_request_ops,
        [ETHTOOL_MSG_PLCA_GET_STATUS]        = &ethnl_plca_status_request_ops,
        [ETHTOOL_MSG_MM_GET]                = &ethnl_mm_request_ops,
        [ETHTOOL_MSG_MM_SET]                = &ethnl_mm_request_ops,
};

static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
{
        return (struct ethnl_dump_ctx *)cb->ctx;
}

/**
 * ethnl_default_parse() - Parse request message
 * @req_info:    pointer to structure to put data into
 * @info:         genl_info from the request
 * @request_ops: struct request_ops for request type
 * @require_dev: fail if no device identified in header
 *
 * Parse universal request header and call request specific ->parse_request()
 * callback (if defined) to parse the rest of the message.
 *
 * Return: 0 on success or negative error code
 */
static int ethnl_default_parse(struct ethnl_req_info *req_info,
                               const struct genl_info *info,
                               const struct ethnl_request_ops *request_ops,
                               bool require_dev)
{
        struct nlattr **tb = info->attrs;
        int ret;

        ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr],
                                         genl_info_net(info), info->extack,
                                         require_dev);
        if (ret < 0)
                return ret;

        if (request_ops->parse_request) {
                ret = request_ops->parse_request(req_info, tb, info->extack);
                if (ret < 0)
                        return ret;
        }

        return 0;
}

/**
 * ethnl_init_reply_data() - Initialize reply data for GET request
 * @reply_data: pointer to embedded struct ethnl_reply_data
 * @ops:        instance of struct ethnl_request_ops describing the layout
 * @dev:        network device to initialize the reply for
 *
 * Fills the reply data part with zeros and sets the dev member. Must be called
 * before calling the ->fill_reply() callback (for each iteration when handling
 * dump requests).
 */
static void ethnl_init_reply_data(struct ethnl_reply_data *reply_data,
                                  const struct ethnl_request_ops *ops,
                                  struct net_device *dev)
{
        memset(reply_data, 0, ops->reply_data_size);
        reply_data->dev = dev;
}

/* default ->doit() handler for GET type requests */
static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
{
        struct ethnl_reply_data *reply_data = NULL;
        struct ethnl_req_info *req_info = NULL;
        const u8 cmd = info->genlhdr->cmd;
        const struct ethnl_request_ops *ops;
        int hdr_len, reply_len;
        struct sk_buff *rskb;
        void *reply_payload;
        int ret;

        ops = ethnl_default_requests[cmd];
        if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
                return -EOPNOTSUPP;
        if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
                return -EINVAL;

        req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
        if (!req_info)
                return -ENOMEM;
        reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
        if (!reply_data) {
                kfree(req_info);
                return -ENOMEM;
        }

        ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do);
        if (ret < 0)
                goto err_dev;
        ethnl_init_reply_data(reply_data, ops, req_info->dev);

        rtnl_lock();
        ret = ops->prepare_data(req_info, reply_data, info);
        rtnl_unlock();
        if (ret < 0)
                goto err_cleanup;
        ret = ops->reply_size(req_info, reply_data);
        if (ret < 0)
                goto err_cleanup;
        reply_len = ret;
        ret = -ENOMEM;
        rskb = ethnl_reply_init(reply_len + ethnl_reply_header_size(),
                                req_info->dev, ops->reply_cmd,
                                ops->hdr_attr, info, &reply_payload);
        if (!rskb)
                goto err_cleanup;
        hdr_len = rskb->len;
        ret = ops->fill_reply(rskb, req_info, reply_data);
        if (ret < 0)
                goto err_msg;
        WARN_ONCE(rskb->len - hdr_len > reply_len,
                  "ethnl cmd %d: calculated reply length %d, but consumed %d\n",
                  cmd, reply_len, rskb->len - hdr_len);
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);

        genlmsg_end(rskb, reply_payload);
        netdev_put(req_info->dev, &req_info->dev_tracker);
        kfree(reply_data);
        kfree(req_info);
        return genlmsg_reply(rskb, info);

err_msg:
        WARN_ONCE(ret == -EMSGSIZE, "calculated message payload length (%d) not sufficient\n", reply_len);
        nlmsg_free(rskb);
err_cleanup:
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);
err_dev:
        netdev_put(req_info->dev, &req_info->dev_tracker);
        kfree(reply_data);
        kfree(req_info);
        return ret;
}

static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
                                  const struct ethnl_dump_ctx *ctx,
                                  const struct genl_info *info)
{
        void *ehdr;
        int ret;

        ehdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
                           &ethtool_genl_family, NLM_F_MULTI,
                           ctx->ops->reply_cmd);
        if (!ehdr)
                return -EMSGSIZE;

        ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
        rtnl_lock();
        ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
        rtnl_unlock();
        if (ret < 0)
                goto out;
        ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
        if (ret < 0)
                goto out;
        ret = ctx->ops->fill_reply(skb, ctx->req_info, ctx->reply_data);

out:
        if (ctx->ops->cleanup_data)
                ctx->ops->cleanup_data(ctx->reply_data);
        ctx->reply_data->dev = NULL;
        if (ret < 0)
                genlmsg_cancel(skb, ehdr);
        else
                genlmsg_end(skb, ehdr);
        return ret;
}

/* Default ->dumpit() handler for GET requests. */
static int ethnl_default_dumpit(struct sk_buff *skb,
                                struct netlink_callback *cb)
{
        struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *dev;
        int ret = 0;

        rcu_read_lock();
        for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
                dev_hold(dev);
                rcu_read_unlock();

                ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));

                rcu_read_lock();
                dev_put(dev);

                if (ret < 0 && ret != -EOPNOTSUPP) {
                        if (likely(skb->len))
                                ret = skb->len;
                        break;
                }
                ret = 0;
        }
        rcu_read_unlock();

        return ret;
}

/* generic ->start() handler for GET requests */
static int ethnl_default_start(struct netlink_callback *cb)
{
        const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
        struct ethnl_reply_data *reply_data;
        const struct ethnl_request_ops *ops;
        struct ethnl_req_info *req_info;
        struct genlmsghdr *ghdr;
        int ret;

        BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));

        ghdr = nlmsg_data(cb->nlh);
        ops = ethnl_default_requests[ghdr->cmd];
        if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd))
                return -EOPNOTSUPP;
        req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
        if (!req_info)
                return -ENOMEM;
        reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
        if (!reply_data) {
                ret = -ENOMEM;
                goto free_req_info;
        }

        ret = ethnl_default_parse(req_info, &info->info, ops, false);
        if (req_info->dev) {
                /* We ignore device specification in dump requests but as the
                 * same parser as for non-dump (doit) requests is used, it
                 * would take reference to the device if it finds one
                 */
                netdev_put(req_info->dev, &req_info->dev_tracker);
                req_info->dev = NULL;
        }
        if (ret < 0)
                goto free_reply_data;

        ctx->ops = ops;
        ctx->req_info = req_info;
        ctx->reply_data = reply_data;
        ctx->pos_ifindex = 0;

        return 0;

free_reply_data:
        kfree(reply_data);
free_req_info:
        kfree(req_info);

        return ret;
}

/* default ->done() handler for GET requests */
static int ethnl_default_done(struct netlink_callback *cb)
{
        struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);

        kfree(ctx->reply_data);
        kfree(ctx->req_info);

        return 0;
}

static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
{
        const struct ethnl_request_ops *ops;
        struct ethnl_req_info req_info = {};
        const u8 cmd = info->genlhdr->cmd;
        int ret;

        ops = ethnl_default_requests[cmd];
        if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
                return -EOPNOTSUPP;
        if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
                return -EINVAL;

        ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr],
                                         genl_info_net(info), info->extack,
                                         true);
        if (ret < 0)
                return ret;

        if (ops->set_validate) {
                ret = ops->set_validate(&req_info, info);
                /* 0 means nothing to do */
                if (ret <= 0)
                        goto out_dev;
        }

        rtnl_lock();
        ret = ethnl_ops_begin(req_info.dev);
        if (ret < 0)
                goto out_rtnl;

        ret = ops->set(&req_info, info);
        if (ret <= 0)
                goto out_ops;
        ethtool_notify(req_info.dev, ops->set_ntf_cmd, NULL);

        ret = 0;
out_ops:
        ethnl_ops_complete(req_info.dev);
out_rtnl:
        rtnl_unlock();
out_dev:
        ethnl_parse_header_dev_put(&req_info);
        return ret;
}

static const struct ethnl_request_ops *
ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
        [ETHTOOL_MSG_LINKINFO_NTF]        = &ethnl_linkinfo_request_ops,
        [ETHTOOL_MSG_LINKMODES_NTF]        = &ethnl_linkmodes_request_ops,
        [ETHTOOL_MSG_DEBUG_NTF]                = &ethnl_debug_request_ops,
        [ETHTOOL_MSG_WOL_NTF]                = &ethnl_wol_request_ops,
        [ETHTOOL_MSG_FEATURES_NTF]        = &ethnl_features_request_ops,
        [ETHTOOL_MSG_PRIVFLAGS_NTF]        = &ethnl_privflags_request_ops,
        [ETHTOOL_MSG_RINGS_NTF]                = &ethnl_rings_request_ops,
        [ETHTOOL_MSG_CHANNELS_NTF]        = &ethnl_channels_request_ops,
        [ETHTOOL_MSG_COALESCE_NTF]        = &ethnl_coalesce_request_ops,
        [ETHTOOL_MSG_PAUSE_NTF]                = &ethnl_pause_request_ops,
        [ETHTOOL_MSG_EEE_NTF]                = &ethnl_eee_request_ops,
        [ETHTOOL_MSG_FEC_NTF]                = &ethnl_fec_request_ops,
        [ETHTOOL_MSG_MODULE_NTF]        = &ethnl_module_request_ops,
        [ETHTOOL_MSG_PLCA_NTF]                = &ethnl_plca_cfg_request_ops,
        [ETHTOOL_MSG_MM_NTF]                = &ethnl_mm_request_ops,
};

/* default notification handler */
static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
                                 const void *data)
{
        struct ethnl_reply_data *reply_data;
        const struct ethnl_request_ops *ops;
        struct ethnl_req_info *req_info;
        struct genl_info info;
        struct sk_buff *skb;
        void *reply_payload;
        int reply_len;
        int ret;

        genl_info_init_ntf(&info, &ethtool_genl_family, cmd);

        if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
                      !ethnl_default_notify_ops[cmd],
                      "unexpected notification type %u\n", cmd))
                return;
        ops = ethnl_default_notify_ops[cmd];
        req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
        if (!req_info)
                return;
        reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
        if (!reply_data) {
                kfree(req_info);
                return;
        }

        req_info->dev = dev;
        req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;

        ethnl_init_reply_data(reply_data, ops, dev);
        ret = ops->prepare_data(req_info, reply_data, &info);
        if (ret < 0)
                goto err_cleanup;
        ret = ops->reply_size(req_info, reply_data);
        if (ret < 0)
                goto err_cleanup;
        reply_len = ret + ethnl_reply_header_size();
        skb = genlmsg_new(reply_len, GFP_KERNEL);
        if (!skb)
                goto err_cleanup;
        reply_payload = ethnl_bcastmsg_put(skb, cmd);
        if (!reply_payload)
                goto err_skb;
        ret = ethnl_fill_reply_header(skb, dev, ops->hdr_attr);
        if (ret < 0)
                goto err_msg;
        ret = ops->fill_reply(skb, req_info, reply_data);
        if (ret < 0)
                goto err_msg;
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);

        genlmsg_end(skb, reply_payload);
        kfree(reply_data);
        kfree(req_info);
        ethnl_multicast(skb, dev);
        return;

err_msg:
        WARN_ONCE(ret == -EMSGSIZE,
                  "calculated message payload length (%d) not sufficient\n",
                  reply_len);
err_skb:
        nlmsg_free(skb);
err_cleanup:
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);
        kfree(reply_data);
        kfree(req_info);
        return;
}

/* notifications */

typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd,
                                       const void *data);

static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
        [ETHTOOL_MSG_LINKINFO_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_LINKMODES_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_DEBUG_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_WOL_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_FEATURES_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_PRIVFLAGS_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_RINGS_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_CHANNELS_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_COALESCE_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_PAUSE_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_EEE_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_FEC_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_MODULE_NTF]        = ethnl_default_notify,
        [ETHTOOL_MSG_PLCA_NTF]                = ethnl_default_notify,
        [ETHTOOL_MSG_MM_NTF]                = ethnl_default_notify,
};

void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
{
        if (unlikely(!ethnl_ok))
                return;
        ASSERT_RTNL();

        if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) &&
                   ethnl_notify_handlers[cmd]))
                ethnl_notify_handlers[cmd](dev, cmd, data);
        else
                WARN_ONCE(1, "notification %u not implemented (dev=%s)\n",
                          cmd, netdev_name(dev));
}
EXPORT_SYMBOL(ethtool_notify);

static void ethnl_notify_features(struct netdev_notifier_info *info)
{
        struct net_device *dev = netdev_notifier_info_to_dev(info);

        ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
}

static int ethnl_netdev_event(struct notifier_block *this, unsigned long event,
                              void *ptr)
{
        switch (event) {
        case NETDEV_FEAT_CHANGE:
                ethnl_notify_features(ptr);
                break;
        }

        return NOTIFY_DONE;
}

static struct notifier_block ethnl_netdev_notifier = {
        .notifier_call = ethnl_netdev_event,
};

/* genetlink setup */

static const struct genl_ops ethtool_genl_ops[] = {
        {
                .cmd        = ETHTOOL_MSG_STRSET_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_strset_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_strset_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_LINKINFO_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_linkinfo_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_linkinfo_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_LINKINFO_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_linkinfo_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_linkinfo_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_LINKMODES_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_linkmodes_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_linkmodes_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_LINKMODES_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_linkmodes_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_linkmodes_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_LINKSTATE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_linkstate_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_linkstate_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_DEBUG_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_debug_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_debug_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_DEBUG_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_debug_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_debug_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_WOL_GET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_wol_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_wol_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_WOL_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_wol_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_wol_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_FEATURES_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_features_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_features_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_FEATURES_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_set_features,
                .policy = ethnl_features_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_features_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PRIVFLAGS_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_privflags_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_privflags_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PRIVFLAGS_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_privflags_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_privflags_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_RINGS_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_rings_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_rings_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_RINGS_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_rings_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_rings_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_CHANNELS_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_channels_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_channels_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_CHANNELS_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_channels_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_channels_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_COALESCE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_coalesce_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_coalesce_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_COALESCE_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_coalesce_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_coalesce_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PAUSE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_pause_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_pause_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PAUSE_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_pause_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_pause_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_EEE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_eee_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_eee_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_EEE_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_eee_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_eee_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_TSINFO_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_tsinfo_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_CABLE_TEST_ACT,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_act_cable_test,
                .policy = ethnl_cable_test_act_policy,
                .maxattr = ARRAY_SIZE(ethnl_cable_test_act_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_act_cable_test_tdr,
                .policy = ethnl_cable_test_tdr_act_policy,
                .maxattr = ARRAY_SIZE(ethnl_cable_test_tdr_act_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_TUNNEL_INFO_GET,
                .doit        = ethnl_tunnel_info_doit,
                .start        = ethnl_tunnel_info_start,
                .dumpit        = ethnl_tunnel_info_dumpit,
                .policy = ethnl_tunnel_info_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_FEC_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_fec_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_fec_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_FEC_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_fec_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_MODULE_EEPROM_GET,
                .flags  = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_module_eeprom_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_STATS_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_stats_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PHC_VCLOCKS_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_phc_vclocks_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_MODULE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_module_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_MODULE_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_module_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PSE_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_pse_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_pse_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PSE_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_pse_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_RSS_GET,
                .doit        = ethnl_default_doit,
                .policy = ethnl_rss_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PLCA_GET_CFG,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_plca_get_cfg_policy,
                .maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PLCA_SET_CFG,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_plca_set_cfg_policy,
                .maxattr = ARRAY_SIZE(ethnl_plca_set_cfg_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_PLCA_GET_STATUS,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_plca_get_status_policy,
                .maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_MM_GET,
                .doit        = ethnl_default_doit,
                .start        = ethnl_default_start,
                .dumpit        = ethnl_default_dumpit,
                .done        = ethnl_default_done,
                .policy = ethnl_mm_get_policy,
                .maxattr = ARRAY_SIZE(ethnl_mm_get_policy) - 1,
        },
        {
                .cmd        = ETHTOOL_MSG_MM_SET,
                .flags        = GENL_UNS_ADMIN_PERM,
                .doit        = ethnl_default_set_doit,
                .policy = ethnl_mm_set_policy,
                .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
        },
};

static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
        [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME },
};

static struct genl_family ethtool_genl_family __ro_after_init = {
        .name                = ETHTOOL_GENL_NAME,
        .version        = ETHTOOL_GENL_VERSION,
        .netnsok        = true,
        .parallel_ops        = true,
        .ops                = ethtool_genl_ops,
        .n_ops                = ARRAY_SIZE(ethtool_genl_ops),
        .resv_start_op        = ETHTOOL_MSG_MODULE_GET + 1,
        .mcgrps                = ethtool_nl_mcgrps,
        .n_mcgrps        = ARRAY_SIZE(ethtool_nl_mcgrps),
};

/* module setup */

static int __init ethnl_init(void)
{
        int ret;

        ret = genl_register_family(&ethtool_genl_family);
        if (WARN(ret < 0, "ethtool: genetlink family registration failed"))
                return ret;
        ethnl_ok = true;

        ret = register_netdevice_notifier(&ethnl_netdev_notifier);
        WARN(ret < 0, "ethtool: net device notifier registration failed");
        return ret;
}

subsys_initcall(ethnl_init);



























  459 



























   27 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H

#define HAVE_JUMP_LABEL_BATCH

#include <asm/asm.h>
#include <asm/nops.h>

#ifndef __ASSEMBLY__

#include <linux/stringify.h>
#include <linux/types.h>

#define JUMP_TABLE_ENTRY                                \
        ".pushsection __jump_table,  \"aw\" \n\t"        \
        _ASM_ALIGN "\n\t"                                \
        ".long 1b - . \n\t"                                \
        ".long %l[l_yes] - . \n\t"                        \
        _ASM_PTR "%c0 + %c1 - .\n\t"                        \
        ".popsection \n\t"

#ifdef CONFIG_HAVE_JUMP_LABEL_HACK

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
        asm goto("1:"
                "jmp %l[l_yes] # objtool NOPs this \n\t"
                JUMP_TABLE_ENTRY
                : :  "i" (key), "i" (2 | branch) : : l_yes);

        return false;
l_yes:
        return true;
}

#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */

static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
        asm goto("1:"
                ".byte " __stringify(BYTES_NOP5) "\n\t"
                JUMP_TABLE_ENTRY
                : :  "i" (key), "i" (branch) : : l_yes);

        return false;
l_yes:
        return true;
}

#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */

static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
        asm goto("1:"
                "jmp %l[l_yes]\n\t"
                JUMP_TABLE_ENTRY
                : :  "i" (key), "i" (branch) : : l_yes);

        return false;
l_yes:
        return true;
}

extern int arch_jump_entry_size(struct jump_entry *entry);

#endif        /* __ASSEMBLY__ */

#endif







































































































































































































































































































































































































































































































































































































































    2 






    2 





    2 

    2 


    2 

    2 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 






    3 
    1 





    2 















    2 
    2 









    2 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  HID driver for Logitech receivers
 *
 *  Copyright (c) 2011 Logitech
 */



#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/delay.h>
#include <linux/usb.h> /* For to_usb_interface for kvm extra intf check */
#include <asm/unaligned.h>
#include "hid-ids.h"

#define DJ_MAX_PAIRED_DEVICES                        7
#define DJ_MAX_NUMBER_NOTIFS                        8
#define DJ_RECEIVER_INDEX                        0
#define DJ_DEVICE_INDEX_MIN                        1
#define DJ_DEVICE_INDEX_MAX                        7

#define DJREPORT_SHORT_LENGTH                        15
#define DJREPORT_LONG_LENGTH                        32

#define REPORT_ID_DJ_SHORT                        0x20
#define REPORT_ID_DJ_LONG                        0x21

#define REPORT_ID_HIDPP_SHORT                        0x10
#define REPORT_ID_HIDPP_LONG                        0x11
#define REPORT_ID_HIDPP_VERY_LONG                0x12

#define HIDPP_REPORT_SHORT_LENGTH                7
#define HIDPP_REPORT_LONG_LENGTH                20

#define HIDPP_RECEIVER_INDEX                        0xff

#define REPORT_TYPE_RFREPORT_FIRST                0x01
#define REPORT_TYPE_RFREPORT_LAST                0x1F

/* Command Switch to DJ mode */
#define REPORT_TYPE_CMD_SWITCH                        0x80
#define CMD_SWITCH_PARAM_DEVBITFIELD                0x00
#define CMD_SWITCH_PARAM_TIMEOUT_SECONDS        0x01
#define TIMEOUT_NO_KEEPALIVE                        0x00

/* Command to Get the list of Paired devices */
#define REPORT_TYPE_CMD_GET_PAIRED_DEVICES        0x81

/* Device Paired Notification */
#define REPORT_TYPE_NOTIF_DEVICE_PAIRED                0x41
#define SPFUNCTION_MORE_NOTIF_EXPECTED                0x01
#define SPFUNCTION_DEVICE_LIST_EMPTY                0x02
#define DEVICE_PAIRED_PARAM_SPFUNCTION                0x00
#define DEVICE_PAIRED_PARAM_EQUAD_ID_LSB        0x01
#define DEVICE_PAIRED_PARAM_EQUAD_ID_MSB        0x02
#define DEVICE_PAIRED_RF_REPORT_TYPE                0x03

/* Device Un-Paired Notification */
#define REPORT_TYPE_NOTIF_DEVICE_UNPAIRED        0x40

/* Connection Status Notification */
#define REPORT_TYPE_NOTIF_CONNECTION_STATUS        0x42
#define CONNECTION_STATUS_PARAM_STATUS                0x00
#define STATUS_LINKLOSS                                0x01

/* Error Notification */
#define REPORT_TYPE_NOTIF_ERROR                        0x7F
#define NOTIF_ERROR_PARAM_ETYPE                        0x00
#define ETYPE_KEEPALIVE_TIMEOUT                        0x01

/* supported DJ HID && RF report types */
#define REPORT_TYPE_KEYBOARD                        0x01
#define REPORT_TYPE_MOUSE                        0x02
#define REPORT_TYPE_CONSUMER_CONTROL                0x03
#define REPORT_TYPE_SYSTEM_CONTROL                0x04
#define REPORT_TYPE_MEDIA_CENTER                0x08
#define REPORT_TYPE_LEDS                        0x0E

/* RF Report types bitfield */
#define STD_KEYBOARD                                BIT(1)
#define STD_MOUSE                                BIT(2)
#define MULTIMEDIA                                BIT(3)
#define POWER_KEYS                                BIT(4)
#define KBD_MOUSE                                BIT(5)
#define MEDIA_CENTER                                BIT(8)
#define KBD_LEDS                                BIT(14)
/* Fake (bitnr > NUMBER_OF_HID_REPORTS) bit to track HID++ capability */
#define HIDPP                                        BIT_ULL(63)

/* HID++ Device Connected Notification */
#define REPORT_TYPE_NOTIF_DEVICE_CONNECTED        0x41
#define HIDPP_PARAM_PROTO_TYPE                        0x00
#define HIDPP_PARAM_DEVICE_INFO                        0x01
#define HIDPP_PARAM_EQUAD_LSB                        0x02
#define HIDPP_PARAM_EQUAD_MSB                        0x03
#define HIDPP_PARAM_27MHZ_DEVID                        0x03
#define HIDPP_DEVICE_TYPE_MASK                        GENMASK(3, 0)
#define HIDPP_LINK_STATUS_MASK                        BIT(6)
#define HIDPP_MANUFACTURER_MASK                        BIT(7)
#define HIDPP_27MHZ_SECURE_MASK                        BIT(7)

#define HIDPP_DEVICE_TYPE_KEYBOARD                1
#define HIDPP_DEVICE_TYPE_MOUSE                        2

#define HIDPP_SET_REGISTER                        0x80
#define HIDPP_GET_LONG_REGISTER                        0x83
#define HIDPP_REG_CONNECTION_STATE                0x02
#define HIDPP_REG_PAIRING_INFORMATION                0xB5
#define HIDPP_PAIRING_INFORMATION                0x20
#define HIDPP_FAKE_DEVICE_ARRIVAL                0x02

enum recvr_type {
        recvr_type_dj,
        recvr_type_hidpp,
        recvr_type_gaming_hidpp,
        recvr_type_mouse_only,
        recvr_type_27mhz,
        recvr_type_bluetooth,
        recvr_type_dinovo,
};

struct dj_report {
        u8 report_id;
        u8 device_index;
        u8 report_type;
        u8 report_params[DJREPORT_SHORT_LENGTH - 3];
};

struct hidpp_event {
        u8 report_id;
        u8 device_index;
        u8 sub_id;
        u8 params[HIDPP_REPORT_LONG_LENGTH - 3U];
} __packed;

struct dj_receiver_dev {
        struct hid_device *mouse;
        struct hid_device *keyboard;
        struct hid_device *hidpp;
        struct dj_device *paired_dj_devices[DJ_MAX_PAIRED_DEVICES +
                                            DJ_DEVICE_INDEX_MIN];
        struct list_head list;
        struct kref kref;
        struct work_struct work;
        struct kfifo notif_fifo;
        unsigned long last_query; /* in jiffies */
        bool ready;
        enum recvr_type type;
        unsigned int unnumbered_application;
        spinlock_t lock;
};

struct dj_device {
        struct hid_device *hdev;
        struct dj_receiver_dev *dj_receiver_dev;
        u64 reports_supported;
        u8 device_index;
};

#define WORKITEM_TYPE_EMPTY        0
#define WORKITEM_TYPE_PAIRED        1
#define WORKITEM_TYPE_UNPAIRED        2
#define WORKITEM_TYPE_UNKNOWN        255

struct dj_workitem {
        u8 type;                /* WORKITEM_TYPE_* */
        u8 device_index;
        u8 device_type;
        u8 quad_id_msb;
        u8 quad_id_lsb;
        u64 reports_supported;
};

/* Keyboard descriptor (1) */
static const char kbd_descriptor[] = {
        0x05, 0x01,                /* USAGE_PAGE (generic Desktop)     */
        0x09, 0x06,                /* USAGE (Keyboard)         */
        0xA1, 0x01,                /* COLLECTION (Application)     */
        0x85, 0x01,                /* REPORT_ID (1)            */
        0x95, 0x08,                /*   REPORT_COUNT (8)           */
        0x75, 0x01,                /*   REPORT_SIZE (1)            */
        0x15, 0x00,                /*   LOGICAL_MINIMUM (0)        */
        0x25, 0x01,                /*   LOGICAL_MAXIMUM (1)        */
        0x05, 0x07,                /*   USAGE_PAGE (Keyboard)      */
        0x19, 0xE0,                /*   USAGE_MINIMUM (Left Control)   */
        0x29, 0xE7,                /*   USAGE_MAXIMUM (Right GUI)      */
        0x81, 0x02,                /*   INPUT (Data,Var,Abs)       */
        0x95, 0x06,                /*   REPORT_COUNT (6)           */
        0x75, 0x08,                /*   REPORT_SIZE (8)            */
        0x15, 0x00,                /*   LOGICAL_MINIMUM (0)        */
        0x26, 0xFF, 0x00,        /*   LOGICAL_MAXIMUM (255)      */
        0x05, 0x07,                /*   USAGE_PAGE (Keyboard)      */
        0x19, 0x00,                /*   USAGE_MINIMUM (no event)       */
        0x2A, 0xFF, 0x00,        /*   USAGE_MAXIMUM (reserved)       */
        0x81, 0x00,                /*   INPUT (Data,Ary,Abs)       */
        0x85, 0x0e,                /* REPORT_ID (14)               */
        0x05, 0x08,                /*   USAGE PAGE (LED page)      */
        0x95, 0x05,                /*   REPORT COUNT (5)           */
        0x75, 0x01,                /*   REPORT SIZE (1)            */
        0x15, 0x00,                /*   LOGICAL_MINIMUM (0)        */
        0x25, 0x01,                /*   LOGICAL_MAXIMUM (1)        */
        0x19, 0x01,                /*   USAGE MINIMUM (1)          */
        0x29, 0x05,                /*   USAGE MAXIMUM (5)          */
        0x91, 0x02,                /*   OUTPUT (Data, Variable, Absolute)  */
        0x95, 0x01,                /*   REPORT COUNT (1)           */
        0x75, 0x03,                /*   REPORT SIZE (3)            */
        0x91, 0x01,                /*   OUTPUT (Constant)          */
        0xC0
};

/* Mouse descriptor (2)     */
static const char mse_descriptor[] = {
        0x05, 0x01,                /*  USAGE_PAGE (Generic Desktop)        */
        0x09, 0x02,                /*  USAGE (Mouse)                       */
        0xA1, 0x01,                /*  COLLECTION (Application)            */
        0x85, 0x02,                /*    REPORT_ID = 2                     */
        0x09, 0x01,                /*    USAGE (pointer)                   */
        0xA1, 0x00,                /*    COLLECTION (physical)             */
        0x05, 0x09,                /*      USAGE_PAGE (buttons)            */
        0x19, 0x01,                /*      USAGE_MIN (1)                   */
        0x29, 0x10,                /*      USAGE_MAX (16)                  */
        0x15, 0x00,                /*      LOGICAL_MIN (0)                 */
        0x25, 0x01,                /*      LOGICAL_MAX (1)                 */
        0x95, 0x10,                /*      REPORT_COUNT (16)               */
        0x75, 0x01,                /*      REPORT_SIZE (1)                 */
        0x81, 0x02,                /*      INPUT (data var abs)            */
        0x05, 0x01,                /*      USAGE_PAGE (generic desktop)    */
        0x16, 0x01, 0xF8,        /*      LOGICAL_MIN (-2047)             */
        0x26, 0xFF, 0x07,        /*      LOGICAL_MAX (2047)              */
        0x75, 0x0C,                /*      REPORT_SIZE (12)                */
        0x95, 0x02,                /*      REPORT_COUNT (2)                */
        0x09, 0x30,                /*      USAGE (X)                       */
        0x09, 0x31,                /*      USAGE (Y)                       */
        0x81, 0x06,                /*      INPUT                           */
        0x15, 0x81,                /*      LOGICAL_MIN (-127)              */
        0x25, 0x7F,                /*      LOGICAL_MAX (127)               */
        0x75, 0x08,                /*      REPORT_SIZE (8)                 */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x09, 0x38,                /*      USAGE (wheel)                   */
        0x81, 0x06,                /*      INPUT                           */
        0x05, 0x0C,                /*      USAGE_PAGE(consumer)            */
        0x0A, 0x38, 0x02,        /*      USAGE(AC Pan)                   */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x81, 0x06,                /*      INPUT                           */
        0xC0,                        /*    END_COLLECTION                    */
        0xC0,                        /*  END_COLLECTION                      */
};

/* Mouse descriptor (2) for 27 MHz receiver, only 8 buttons */
static const char mse_27mhz_descriptor[] = {
        0x05, 0x01,                /*  USAGE_PAGE (Generic Desktop)        */
        0x09, 0x02,                /*  USAGE (Mouse)                       */
        0xA1, 0x01,                /*  COLLECTION (Application)            */
        0x85, 0x02,                /*    REPORT_ID = 2                     */
        0x09, 0x01,                /*    USAGE (pointer)                   */
        0xA1, 0x00,                /*    COLLECTION (physical)             */
        0x05, 0x09,                /*      USAGE_PAGE (buttons)            */
        0x19, 0x01,                /*      USAGE_MIN (1)                   */
        0x29, 0x08,                /*      USAGE_MAX (8)                   */
        0x15, 0x00,                /*      LOGICAL_MIN (0)                 */
        0x25, 0x01,                /*      LOGICAL_MAX (1)                 */
        0x95, 0x08,                /*      REPORT_COUNT (8)                */
        0x75, 0x01,                /*      REPORT_SIZE (1)                 */
        0x81, 0x02,                /*      INPUT (data var abs)            */
        0x05, 0x01,                /*      USAGE_PAGE (generic desktop)    */
        0x16, 0x01, 0xF8,        /*      LOGICAL_MIN (-2047)             */
        0x26, 0xFF, 0x07,        /*      LOGICAL_MAX (2047)              */
        0x75, 0x0C,                /*      REPORT_SIZE (12)                */
        0x95, 0x02,                /*      REPORT_COUNT (2)                */
        0x09, 0x30,                /*      USAGE (X)                       */
        0x09, 0x31,                /*      USAGE (Y)                       */
        0x81, 0x06,                /*      INPUT                           */
        0x15, 0x81,                /*      LOGICAL_MIN (-127)              */
        0x25, 0x7F,                /*      LOGICAL_MAX (127)               */
        0x75, 0x08,                /*      REPORT_SIZE (8)                 */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x09, 0x38,                /*      USAGE (wheel)                   */
        0x81, 0x06,                /*      INPUT                           */
        0x05, 0x0C,                /*      USAGE_PAGE(consumer)            */
        0x0A, 0x38, 0x02,        /*      USAGE(AC Pan)                   */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x81, 0x06,                /*      INPUT                           */
        0xC0,                        /*    END_COLLECTION                    */
        0xC0,                        /*  END_COLLECTION                      */
};

/* Mouse descriptor (2) for Bluetooth receiver, low-res hwheel, 12 buttons */
static const char mse_bluetooth_descriptor[] = {
        0x05, 0x01,                /*  USAGE_PAGE (Generic Desktop)        */
        0x09, 0x02,                /*  USAGE (Mouse)                       */
        0xA1, 0x01,                /*  COLLECTION (Application)            */
        0x85, 0x02,                /*    REPORT_ID = 2                     */
        0x09, 0x01,                /*    USAGE (pointer)                   */
        0xA1, 0x00,                /*    COLLECTION (physical)             */
        0x05, 0x09,                /*      USAGE_PAGE (buttons)            */
        0x19, 0x01,                /*      USAGE_MIN (1)                   */
        0x29, 0x08,                /*      USAGE_MAX (8)                   */
        0x15, 0x00,                /*      LOGICAL_MIN (0)                 */
        0x25, 0x01,                /*      LOGICAL_MAX (1)                 */
        0x95, 0x08,                /*      REPORT_COUNT (8)                */
        0x75, 0x01,                /*      REPORT_SIZE (1)                 */
        0x81, 0x02,                /*      INPUT (data var abs)            */
        0x05, 0x01,                /*      USAGE_PAGE (generic desktop)    */
        0x16, 0x01, 0xF8,        /*      LOGICAL_MIN (-2047)             */
        0x26, 0xFF, 0x07,        /*      LOGICAL_MAX (2047)              */
        0x75, 0x0C,                /*      REPORT_SIZE (12)                */
        0x95, 0x02,                /*      REPORT_COUNT (2)                */
        0x09, 0x30,                /*      USAGE (X)                       */
        0x09, 0x31,                /*      USAGE (Y)                       */
        0x81, 0x06,                /*      INPUT                           */
        0x15, 0x81,                /*      LOGICAL_MIN (-127)              */
        0x25, 0x7F,                /*      LOGICAL_MAX (127)               */
        0x75, 0x08,                /*      REPORT_SIZE (8)                 */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x09, 0x38,                /*      USAGE (wheel)                   */
        0x81, 0x06,                /*      INPUT                           */
        0x05, 0x0C,                /*      USAGE_PAGE(consumer)            */
        0x0A, 0x38, 0x02,        /*      USAGE(AC Pan)                   */
        0x15, 0xF9,                /*      LOGICAL_MIN (-7)                */
        0x25, 0x07,                /*      LOGICAL_MAX (7)                 */
        0x75, 0x04,                /*      REPORT_SIZE (4)                 */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x81, 0x06,                /*      INPUT                           */
        0x05, 0x09,                /*      USAGE_PAGE (buttons)            */
        0x19, 0x09,                /*      USAGE_MIN (9)                   */
        0x29, 0x0C,                /*      USAGE_MAX (12)                  */
        0x15, 0x00,                /*      LOGICAL_MIN (0)                 */
        0x25, 0x01,                /*      LOGICAL_MAX (1)                 */
        0x75, 0x01,                /*      REPORT_SIZE (1)                 */
        0x95, 0x04,                /*      REPORT_COUNT (4)                */
        0x81, 0x02,                /*      INPUT (Data,Var,Abs)            */
        0xC0,                        /*    END_COLLECTION                    */
        0xC0,                        /*  END_COLLECTION                      */
};

/* Mouse descriptor (5) for Bluetooth receiver, normal-res hwheel, 8 buttons */
static const char mse5_bluetooth_descriptor[] = {
        0x05, 0x01,                /*  USAGE_PAGE (Generic Desktop)        */
        0x09, 0x02,                /*  Usage (Mouse)                       */
        0xa1, 0x01,                /*  Collection (Application)            */
        0x85, 0x05,                /*   Report ID (5)                      */
        0x09, 0x01,                /*   Usage (Pointer)                    */
        0xa1, 0x00,                /*   Collection (Physical)              */
        0x05, 0x09,                /*    Usage Page (Button)               */
        0x19, 0x01,                /*    Usage Minimum (1)                 */
        0x29, 0x08,                /*    Usage Maximum (8)                 */
        0x15, 0x00,                /*    Logical Minimum (0)               */
        0x25, 0x01,                /*    Logical Maximum (1)               */
        0x95, 0x08,                /*    Report Count (8)                  */
        0x75, 0x01,                /*    Report Size (1)                   */
        0x81, 0x02,                /*    Input (Data,Var,Abs)              */
        0x05, 0x01,                /*    Usage Page (Generic Desktop)      */
        0x16, 0x01, 0xf8,        /*    Logical Minimum (-2047)           */
        0x26, 0xff, 0x07,        /*    Logical Maximum (2047)            */
        0x75, 0x0c,                /*    Report Size (12)                  */
        0x95, 0x02,                /*    Report Count (2)                  */
        0x09, 0x30,                /*    Usage (X)                         */
        0x09, 0x31,                /*    Usage (Y)                         */
        0x81, 0x06,                /*    Input (Data,Var,Rel)              */
        0x15, 0x81,                /*    Logical Minimum (-127)            */
        0x25, 0x7f,                /*    Logical Maximum (127)             */
        0x75, 0x08,                /*    Report Size (8)                   */
        0x95, 0x01,                /*    Report Count (1)                  */
        0x09, 0x38,                /*    Usage (Wheel)                     */
        0x81, 0x06,                /*    Input (Data,Var,Rel)              */
        0x05, 0x0c,                /*    Usage Page (Consumer Devices)     */
        0x0a, 0x38, 0x02,        /*    Usage (AC Pan)                    */
        0x15, 0x81,                /*    Logical Minimum (-127)            */
        0x25, 0x7f,                /*    Logical Maximum (127)             */
        0x75, 0x08,                /*    Report Size (8)                   */
        0x95, 0x01,                /*    Report Count (1)                  */
        0x81, 0x06,                /*    Input (Data,Var,Rel)              */
        0xc0,                        /*   End Collection                     */
        0xc0,                        /*  End Collection                      */
};

/* Gaming Mouse descriptor (2) */
static const char mse_high_res_descriptor[] = {
        0x05, 0x01,                /*  USAGE_PAGE (Generic Desktop)        */
        0x09, 0x02,                /*  USAGE (Mouse)                       */
        0xA1, 0x01,                /*  COLLECTION (Application)            */
        0x85, 0x02,                /*    REPORT_ID = 2                     */
        0x09, 0x01,                /*    USAGE (pointer)                   */
        0xA1, 0x00,                /*    COLLECTION (physical)             */
        0x05, 0x09,                /*      USAGE_PAGE (buttons)            */
        0x19, 0x01,                /*      USAGE_MIN (1)                   */
        0x29, 0x10,                /*      USAGE_MAX (16)                  */
        0x15, 0x00,                /*      LOGICAL_MIN (0)                 */
        0x25, 0x01,                /*      LOGICAL_MAX (1)                 */
        0x95, 0x10,                /*      REPORT_COUNT (16)               */
        0x75, 0x01,                /*      REPORT_SIZE (1)                 */
        0x81, 0x02,                /*      INPUT (data var abs)            */
        0x05, 0x01,                /*      USAGE_PAGE (generic desktop)    */
        0x16, 0x01, 0x80,        /*      LOGICAL_MIN (-32767)            */
        0x26, 0xFF, 0x7F,        /*      LOGICAL_MAX (32767)             */
        0x75, 0x10,                /*      REPORT_SIZE (16)                */
        0x95, 0x02,                /*      REPORT_COUNT (2)                */
        0x09, 0x30,                /*      USAGE (X)                       */
        0x09, 0x31,                /*      USAGE (Y)                       */
        0x81, 0x06,                /*      INPUT                           */
        0x15, 0x81,                /*      LOGICAL_MIN (-127)              */
        0x25, 0x7F,                /*      LOGICAL_MAX (127)               */
        0x75, 0x08,                /*      REPORT_SIZE (8)                 */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x09, 0x38,                /*      USAGE (wheel)                   */
        0x81, 0x06,                /*      INPUT                           */
        0x05, 0x0C,                /*      USAGE_PAGE(consumer)            */
        0x0A, 0x38, 0x02,        /*      USAGE(AC Pan)                   */
        0x95, 0x01,                /*      REPORT_COUNT (1)                */
        0x81, 0x06,                /*      INPUT                           */
        0xC0,                        /*    END_COLLECTION                    */
        0xC0,                        /*  END_COLLECTION                      */
};

/* Consumer Control descriptor (3) */
static const char consumer_descriptor[] = {
        0x05, 0x0C,                /* USAGE_PAGE (Consumer Devices)       */
        0x09, 0x01,                /* USAGE (Consumer Control)            */
        0xA1, 0x01,                /* COLLECTION (Application)            */
        0x85, 0x03,                /* REPORT_ID = 3                       */
        0x75, 0x10,                /* REPORT_SIZE (16)                    */
        0x95, 0x02,                /* REPORT_COUNT (2)                    */
        0x15, 0x01,                /* LOGICAL_MIN (1)                     */
        0x26, 0xFF, 0x02,        /* LOGICAL_MAX (767)                   */
        0x19, 0x01,                /* USAGE_MIN (1)                       */
        0x2A, 0xFF, 0x02,        /* USAGE_MAX (767)                     */
        0x81, 0x00,                /* INPUT (Data Ary Abs)                */
        0xC0,                        /* END_COLLECTION                      */
};                                /*                                     */

/* System control descriptor (4) */
static const char syscontrol_descriptor[] = {
        0x05, 0x01,                /*   USAGE_PAGE (Generic Desktop)      */
        0x09, 0x80,                /*   USAGE (System Control)            */
        0xA1, 0x01,                /*   COLLECTION (Application)          */
        0x85, 0x04,                /*   REPORT_ID = 4                     */
        0x75, 0x02,                /*   REPORT_SIZE (2)                   */
        0x95, 0x01,                /*   REPORT_COUNT (1)                  */
        0x15, 0x01,                /*   LOGICAL_MIN (1)                   */
        0x25, 0x03,                /*   LOGICAL_MAX (3)                   */
        0x09, 0x82,                /*   USAGE (System Sleep)              */
        0x09, 0x81,                /*   USAGE (System Power Down)         */
        0x09, 0x83,                /*   USAGE (System Wake Up)            */
        0x81, 0x60,                /*   INPUT (Data Ary Abs NPrf Null)    */
        0x75, 0x06,                /*   REPORT_SIZE (6)                   */
        0x81, 0x03,                /*   INPUT (Cnst Var Abs)              */
        0xC0,                        /*   END_COLLECTION                    */
};

/* Media descriptor (8) */
static const char media_descriptor[] = {
        0x06, 0xbc, 0xff,        /* Usage Page 0xffbc                   */
        0x09, 0x88,                /* Usage 0x0088                        */
        0xa1, 0x01,                /* BeginCollection                     */
        0x85, 0x08,                /*   Report ID 8                       */
        0x19, 0x01,                /*   Usage Min 0x0001                  */
        0x29, 0xff,                /*   Usage Max 0x00ff                  */
        0x15, 0x01,                /*   Logical Min 1                     */
        0x26, 0xff, 0x00,        /*   Logical Max 255                   */
        0x75, 0x08,                /*   Report Size 8                     */
        0x95, 0x01,                /*   Report Count 1                    */
        0x81, 0x00,                /*   Input                             */
        0xc0,                        /* EndCollection                       */
};                                /*                                     */

/* HIDPP descriptor */
static const char hidpp_descriptor[] = {
        0x06, 0x00, 0xff,        /* Usage Page (Vendor Defined Page 1)  */
        0x09, 0x01,                /* Usage (Vendor Usage 1)              */
        0xa1, 0x01,                /* Collection (Application)            */
        0x85, 0x10,                /*   Report ID (16)                    */
        0x75, 0x08,                /*   Report Size (8)                   */
        0x95, 0x06,                /*   Report Count (6)                  */
        0x15, 0x00,                /*   Logical Minimum (0)               */
        0x26, 0xff, 0x00,        /*   Logical Maximum (255)             */
        0x09, 0x01,                /*   Usage (Vendor Usage 1)            */
        0x81, 0x00,                /*   Input (Data,Arr,Abs)              */
        0x09, 0x01,                /*   Usage (Vendor Usage 1)            */
        0x91, 0x00,                /*   Output (Data,Arr,Abs)             */
        0xc0,                        /* End Collection                      */
        0x06, 0x00, 0xff,        /* Usage Page (Vendor Defined Page 1)  */
        0x09, 0x02,                /* Usage (Vendor Usage 2)              */
        0xa1, 0x01,                /* Collection (Application)            */
        0x85, 0x11,                /*   Report ID (17)                    */
        0x75, 0x08,                /*   Report Size (8)                   */
        0x95, 0x13,                /*   Report Count (19)                 */
        0x15, 0x00,                /*   Logical Minimum (0)               */
        0x26, 0xff, 0x00,        /*   Logical Maximum (255)             */
        0x09, 0x02,                /*   Usage (Vendor Usage 2)            */
        0x81, 0x00,                /*   Input (Data,Arr,Abs)              */
        0x09, 0x02,                /*   Usage (Vendor Usage 2)            */
        0x91, 0x00,                /*   Output (Data,Arr,Abs)             */
        0xc0,                        /* End Collection                      */
        0x06, 0x00, 0xff,        /* Usage Page (Vendor Defined Page 1)  */
        0x09, 0x04,                /* Usage (Vendor Usage 0x04)           */
        0xa1, 0x01,                /* Collection (Application)            */
        0x85, 0x20,                /*   Report ID (32)                    */
        0x75, 0x08,                /*   Report Size (8)                   */
        0x95, 0x0e,                /*   Report Count (14)                 */
        0x15, 0x00,                /*   Logical Minimum (0)               */
        0x26, 0xff, 0x00,        /*   Logical Maximum (255)             */
        0x09, 0x41,                /*   Usage (Vendor Usage 0x41)         */
        0x81, 0x00,                /*   Input (Data,Arr,Abs)              */
        0x09, 0x41,                /*   Usage (Vendor Usage 0x41)         */
        0x91, 0x00,                /*   Output (Data,Arr,Abs)             */
        0x85, 0x21,                /*   Report ID (33)                    */
        0x95, 0x1f,                /*   Report Count (31)                 */
        0x15, 0x00,                /*   Logical Minimum (0)               */
        0x26, 0xff, 0x00,        /*   Logical Maximum (255)             */
        0x09, 0x42,                /*   Usage (Vendor Usage 0x42)         */
        0x81, 0x00,                /*   Input (Data,Arr,Abs)              */
        0x09, 0x42,                /*   Usage (Vendor Usage 0x42)         */
        0x91, 0x00,                /*   Output (Data,Arr,Abs)             */
        0xc0,                        /* End Collection                      */
};

/* Maximum size of all defined hid reports in bytes (including report id) */
#define MAX_REPORT_SIZE 8

/* Make sure all descriptors are present here */
#define MAX_RDESC_SIZE                                \
        (sizeof(kbd_descriptor) +                \
         sizeof(mse_bluetooth_descriptor) +        \
         sizeof(mse5_bluetooth_descriptor) +        \
         sizeof(consumer_descriptor) +                \
         sizeof(syscontrol_descriptor) +        \
         sizeof(media_descriptor) +        \
         sizeof(hidpp_descriptor))

/* Number of possible hid report types that can be created by this driver.
 *
 * Right now, RF report types have the same report types (or report id's)
 * than the hid report created from those RF reports. In the future
 * this doesnt have to be true.
 *
 * For instance, RF report type 0x01 which has a size of 8 bytes, corresponds
 * to hid report id 0x01, this is standard keyboard. Same thing applies to mice
 * reports and consumer control, etc. If a new RF report is created, it doesn't
 * has to have the same report id as its corresponding hid report, so an
 * translation may have to take place for future report types.
 */
#define NUMBER_OF_HID_REPORTS 32
static const u8 hid_reportid_size_map[NUMBER_OF_HID_REPORTS] = {
        [1] = 8,                /* Standard keyboard */
        [2] = 8,                /* Standard mouse */
        [3] = 5,                /* Consumer control */
        [4] = 2,                /* System control */
        [8] = 2,                /* Media Center */
};


#define LOGITECH_DJ_INTERFACE_NUMBER 0x02

static const struct hid_ll_driver logi_dj_ll_driver;

static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev);
static void delayedwork_callback(struct work_struct *work);

static LIST_HEAD(dj_hdev_list);
static DEFINE_MUTEX(dj_hdev_list_lock);

static bool recvr_type_is_bluetooth(enum recvr_type type)
{
        return type == recvr_type_bluetooth || type == recvr_type_dinovo;
}

/*
 * dj/HID++ receivers are really a single logical entity, but for BIOS/Windows
 * compatibility they have multiple USB interfaces. On HID++ receivers we need
 * to listen for input reports on both interfaces. The functions below are used
 * to create a single struct dj_receiver_dev for all interfaces belonging to
 * a single USB-device / receiver.
 */
static struct dj_receiver_dev *dj_find_receiver_dev(struct hid_device *hdev,
                                                    enum recvr_type type)
{
        struct dj_receiver_dev *djrcv_dev;
        char sep;

        /*
         * The bluetooth receiver contains a built-in hub and has separate
         * USB-devices for the keyboard and mouse interfaces.
         */
        sep = recvr_type_is_bluetooth(type) ? '.' : '/';

        /* Try to find an already-probed interface from the same device */
        list_for_each_entry(djrcv_dev, &dj_hdev_list, list) {
                if (djrcv_dev->mouse &&
                    hid_compare_device_paths(hdev, djrcv_dev->mouse, sep)) {
                        kref_get(&djrcv_dev->kref);
                        return djrcv_dev;
                }
                if (djrcv_dev->keyboard &&
                    hid_compare_device_paths(hdev, djrcv_dev->keyboard, sep)) {
                        kref_get(&djrcv_dev->kref);
                        return djrcv_dev;
                }
                if (djrcv_dev->hidpp &&
                    hid_compare_device_paths(hdev, djrcv_dev->hidpp, sep)) {
                        kref_get(&djrcv_dev->kref);
                        return djrcv_dev;
                }
        }

        return NULL;
}

static void dj_release_receiver_dev(struct kref *kref)
{
        struct dj_receiver_dev *djrcv_dev = container_of(kref, struct dj_receiver_dev, kref);

        list_del(&djrcv_dev->list);
        kfifo_free(&djrcv_dev->notif_fifo);
        kfree(djrcv_dev);
}

static void dj_put_receiver_dev(struct hid_device *hdev)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);

        mutex_lock(&dj_hdev_list_lock);

        if (djrcv_dev->mouse == hdev)
                djrcv_dev->mouse = NULL;
        if (djrcv_dev->keyboard == hdev)
                djrcv_dev->keyboard = NULL;
        if (djrcv_dev->hidpp == hdev)
                djrcv_dev->hidpp = NULL;

        kref_put(&djrcv_dev->kref, dj_release_receiver_dev);

        mutex_unlock(&dj_hdev_list_lock);
}

static struct dj_receiver_dev *dj_get_receiver_dev(struct hid_device *hdev,
                                                   enum recvr_type type,
                                                   unsigned int application,
                                                   bool is_hidpp)
{
        struct dj_receiver_dev *djrcv_dev;

        mutex_lock(&dj_hdev_list_lock);

        djrcv_dev = dj_find_receiver_dev(hdev, type);
        if (!djrcv_dev) {
                djrcv_dev = kzalloc(sizeof(*djrcv_dev), GFP_KERNEL);
                if (!djrcv_dev)
                        goto out;

                INIT_WORK(&djrcv_dev->work, delayedwork_callback);
                spin_lock_init(&djrcv_dev->lock);
                if (kfifo_alloc(&djrcv_dev->notif_fifo,
                            DJ_MAX_NUMBER_NOTIFS * sizeof(struct dj_workitem),
                            GFP_KERNEL)) {
                        kfree(djrcv_dev);
                        djrcv_dev = NULL;
                        goto out;
                }
                kref_init(&djrcv_dev->kref);
                list_add_tail(&djrcv_dev->list, &dj_hdev_list);
                djrcv_dev->last_query = jiffies;
                djrcv_dev->type = type;
        }

        if (application == HID_GD_KEYBOARD)
                djrcv_dev->keyboard = hdev;
        if (application == HID_GD_MOUSE)
                djrcv_dev->mouse = hdev;
        if (is_hidpp)
                djrcv_dev->hidpp = hdev;

        hid_set_drvdata(hdev, djrcv_dev);
out:
        mutex_unlock(&dj_hdev_list_lock);
        return djrcv_dev;
}

static void logi_dj_recv_destroy_djhid_device(struct dj_receiver_dev *djrcv_dev,
                                              struct dj_workitem *workitem)
{
        /* Called in delayed work context */
        struct dj_device *dj_dev;
        unsigned long flags;

        spin_lock_irqsave(&djrcv_dev->lock, flags);
        dj_dev = djrcv_dev->paired_dj_devices[workitem->device_index];
        djrcv_dev->paired_dj_devices[workitem->device_index] = NULL;
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        if (dj_dev != NULL) {
                hid_destroy_device(dj_dev->hdev);
                kfree(dj_dev);
        } else {
                hid_err(djrcv_dev->hidpp, "%s: can't destroy a NULL device\n",
                        __func__);
        }
}

static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev,
                                          struct dj_workitem *workitem)
{
        /* Called in delayed work context */
        struct hid_device *djrcv_hdev = djrcv_dev->hidpp;
        struct hid_device *dj_hiddev;
        struct dj_device *dj_dev;
        u8 device_index = workitem->device_index;
        unsigned long flags;

        /* Device index goes from 1 to 6, we need 3 bytes to store the
         * semicolon, the index, and a null terminator
         */
        unsigned char tmpstr[3];

        /* We are the only one ever adding a device, no need to lock */
        if (djrcv_dev->paired_dj_devices[device_index]) {
                /* The device is already known. No need to reallocate it. */
                dbg_hid("%s: device is already known\n", __func__);
                return;
        }

        dj_hiddev = hid_allocate_device();
        if (IS_ERR(dj_hiddev)) {
                hid_err(djrcv_hdev, "%s: hid_allocate_dev failed\n", __func__);
                return;
        }

        dj_hiddev->ll_driver = &logi_dj_ll_driver;

        dj_hiddev->dev.parent = &djrcv_hdev->dev;
        dj_hiddev->bus = BUS_USB;
        dj_hiddev->vendor = djrcv_hdev->vendor;
        dj_hiddev->product = (workitem->quad_id_msb << 8) |
                              workitem->quad_id_lsb;
        if (workitem->device_type) {
                const char *type_str = "Device";

                switch (workitem->device_type) {
                case 0x01: type_str = "Keyboard";        break;
                case 0x02: type_str = "Mouse";                break;
                case 0x03: type_str = "Numpad";                break;
                case 0x04: type_str = "Presenter";        break;
                case 0x07: type_str = "Remote Control";        break;
                case 0x08: type_str = "Trackball";        break;
                case 0x09: type_str = "Touchpad";        break;
                }
                snprintf(dj_hiddev->name, sizeof(dj_hiddev->name),
                        "Logitech Wireless %s PID:%04x",
                        type_str, dj_hiddev->product);
        } else {
                snprintf(dj_hiddev->name, sizeof(dj_hiddev->name),
                        "Logitech Wireless Device PID:%04x",
                        dj_hiddev->product);
        }

        if (djrcv_dev->type == recvr_type_27mhz)
                dj_hiddev->group = HID_GROUP_LOGITECH_27MHZ_DEVICE;
        else
                dj_hiddev->group = HID_GROUP_LOGITECH_DJ_DEVICE;

        memcpy(dj_hiddev->phys, djrcv_hdev->phys, sizeof(djrcv_hdev->phys));
        snprintf(tmpstr, sizeof(tmpstr), ":%d", device_index);
        strlcat(dj_hiddev->phys, tmpstr, sizeof(dj_hiddev->phys));

        dj_dev = kzalloc(sizeof(struct dj_device), GFP_KERNEL);

        if (!dj_dev) {
                hid_err(djrcv_hdev, "%s: failed allocating dj_dev\n", __func__);
                goto dj_device_allocate_fail;
        }

        dj_dev->reports_supported = workitem->reports_supported;
        dj_dev->hdev = dj_hiddev;
        dj_dev->dj_receiver_dev = djrcv_dev;
        dj_dev->device_index = device_index;
        dj_hiddev->driver_data = dj_dev;

        spin_lock_irqsave(&djrcv_dev->lock, flags);
        djrcv_dev->paired_dj_devices[device_index] = dj_dev;
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        if (hid_add_device(dj_hiddev)) {
                hid_err(djrcv_hdev, "%s: failed adding dj_device\n", __func__);
                goto hid_add_device_fail;
        }

        return;

hid_add_device_fail:
        spin_lock_irqsave(&djrcv_dev->lock, flags);
        djrcv_dev->paired_dj_devices[device_index] = NULL;
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);
        kfree(dj_dev);
dj_device_allocate_fail:
        hid_destroy_device(dj_hiddev);
}

static void delayedwork_callback(struct work_struct *work)
{
        struct dj_receiver_dev *djrcv_dev =
                container_of(work, struct dj_receiver_dev, work);

        struct dj_workitem workitem;
        unsigned long flags;
        int count;
        int retval;

        dbg_hid("%s\n", __func__);

        spin_lock_irqsave(&djrcv_dev->lock, flags);

        /*
         * Since we attach to multiple interfaces, we may get scheduled before
         * we are bound to the HID++ interface, catch this.
         */
        if (!djrcv_dev->ready) {
                pr_warn("%s: delayedwork queued before hidpp interface was enumerated\n",
                        __func__);
                spin_unlock_irqrestore(&djrcv_dev->lock, flags);
                return;
        }

        count = kfifo_out(&djrcv_dev->notif_fifo, &workitem, sizeof(workitem));

        if (count != sizeof(workitem)) {
                spin_unlock_irqrestore(&djrcv_dev->lock, flags);
                return;
        }

        if (!kfifo_is_empty(&djrcv_dev->notif_fifo))
                schedule_work(&djrcv_dev->work);

        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        switch (workitem.type) {
        case WORKITEM_TYPE_PAIRED:
                logi_dj_recv_add_djhid_device(djrcv_dev, &workitem);
                break;
        case WORKITEM_TYPE_UNPAIRED:
                logi_dj_recv_destroy_djhid_device(djrcv_dev, &workitem);
                break;
        case WORKITEM_TYPE_UNKNOWN:
                retval = logi_dj_recv_query_paired_devices(djrcv_dev);
                if (retval) {
                        hid_err(djrcv_dev->hidpp, "%s: logi_dj_recv_query_paired_devices error: %d\n",
                                __func__, retval);
                }
                break;
        case WORKITEM_TYPE_EMPTY:
                dbg_hid("%s: device list is empty\n", __func__);
                break;
        }
}

/*
 * Sometimes we receive reports for which we do not have a paired dj_device
 * associated with the device_index or report-type to forward the report to.
 * This means that the original "device paired" notification corresponding
 * to the dj_device never arrived to this driver. Possible reasons for this are:
 * 1) hid-core discards all packets coming from a device during probe().
 * 2) if the receiver is plugged into a KVM switch then the pairing reports
 * are only forwarded to it if the focus is on this PC.
 * This function deals with this by re-asking the receiver for the list of
 * connected devices in the delayed work callback.
 * This function MUST be called with djrcv->lock held.
 */
static void logi_dj_recv_queue_unknown_work(struct dj_receiver_dev *djrcv_dev)
{
        struct dj_workitem workitem = { .type = WORKITEM_TYPE_UNKNOWN };

        /* Rate limit queries done because of unhandled reports to 2/sec */
        if (time_before(jiffies, djrcv_dev->last_query + HZ / 2))
                return;

        kfifo_in(&djrcv_dev->notif_fifo, &workitem, sizeof(workitem));
        schedule_work(&djrcv_dev->work);
}

static void logi_dj_recv_queue_notification(struct dj_receiver_dev *djrcv_dev,
                                           struct dj_report *dj_report)
{
        /* We are called from atomic context (tasklet && djrcv->lock held) */
        struct dj_workitem workitem = {
                .device_index = dj_report->device_index,
        };

        switch (dj_report->report_type) {
        case REPORT_TYPE_NOTIF_DEVICE_PAIRED:
                workitem.type = WORKITEM_TYPE_PAIRED;
                if (dj_report->report_params[DEVICE_PAIRED_PARAM_SPFUNCTION] &
                    SPFUNCTION_DEVICE_LIST_EMPTY) {
                        workitem.type = WORKITEM_TYPE_EMPTY;
                        break;
                }
                fallthrough;
        case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED:
                workitem.quad_id_msb =
                        dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_MSB];
                workitem.quad_id_lsb =
                        dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_LSB];
                workitem.reports_supported = get_unaligned_le32(
                                                dj_report->report_params +
                                                DEVICE_PAIRED_RF_REPORT_TYPE);
                workitem.reports_supported |= HIDPP;
                if (dj_report->report_type == REPORT_TYPE_NOTIF_DEVICE_UNPAIRED)
                        workitem.type = WORKITEM_TYPE_UNPAIRED;
                break;
        default:
                logi_dj_recv_queue_unknown_work(djrcv_dev);
                return;
        }

        kfifo_in(&djrcv_dev->notif_fifo, &workitem, sizeof(workitem));
        schedule_work(&djrcv_dev->work);
}

/*
 * Some quad/bluetooth keyboards have a builtin touchpad in this case we see
 * only 1 paired device with a device_type of REPORT_TYPE_KEYBOARD. For the
 * touchpad to work we must also forward mouse input reports to the dj_hiddev
 * created for the keyboard (instead of forwarding them to a second paired
 * device with a device_type of REPORT_TYPE_MOUSE as we normally would).
 *
 * On Dinovo receivers the keyboard's touchpad and an optional paired actual
 * mouse send separate input reports, INPUT(2) aka STD_MOUSE for the mouse
 * and INPUT(5) aka KBD_MOUSE for the keyboard's touchpad.
 *
 * On MX5x00 receivers (which can also be paired with a Dinovo keyboard)
 * INPUT(2) is used for both an optional paired actual mouse and for the
 * keyboard's touchpad.
 */
static const u16 kbd_builtin_touchpad_ids[] = {
        0xb309, /* Dinovo Edge */
        0xb30c, /* Dinovo Mini */
};

static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev,
                                            struct hidpp_event *hidpp_report,
                                            struct dj_workitem *workitem)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        int i, id;

        workitem->type = WORKITEM_TYPE_PAIRED;
        workitem->device_type = hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] &
                                HIDPP_DEVICE_TYPE_MASK;
        workitem->quad_id_msb = hidpp_report->params[HIDPP_PARAM_EQUAD_MSB];
        workitem->quad_id_lsb = hidpp_report->params[HIDPP_PARAM_EQUAD_LSB];
        switch (workitem->device_type) {
        case REPORT_TYPE_KEYBOARD:
                workitem->reports_supported |= STD_KEYBOARD | MULTIMEDIA |
                                               POWER_KEYS | MEDIA_CENTER |
                                               HIDPP;
                id = (workitem->quad_id_msb << 8) | workitem->quad_id_lsb;
                for (i = 0; i < ARRAY_SIZE(kbd_builtin_touchpad_ids); i++) {
                        if (id == kbd_builtin_touchpad_ids[i]) {
                                if (djrcv_dev->type == recvr_type_dinovo)
                                        workitem->reports_supported |= KBD_MOUSE;
                                else
                                        workitem->reports_supported |= STD_MOUSE;
                                break;
                        }
                }
                break;
        case REPORT_TYPE_MOUSE:
                workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA;
                break;
        }
}

static void logi_hidpp_dev_conn_notif_27mhz(struct hid_device *hdev,
                                            struct hidpp_event *hidpp_report,
                                            struct dj_workitem *workitem)
{
        workitem->type = WORKITEM_TYPE_PAIRED;
        workitem->quad_id_lsb = hidpp_report->params[HIDPP_PARAM_27MHZ_DEVID];
        switch (hidpp_report->device_index) {
        case 1: /* Index 1 is always a mouse */
        case 2: /* Index 2 is always a mouse */
                workitem->device_type = HIDPP_DEVICE_TYPE_MOUSE;
                workitem->reports_supported |= STD_MOUSE | HIDPP;
                break;
        case 3: /* Index 3 is always the keyboard */
                if (hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] & HIDPP_27MHZ_SECURE_MASK) {
                        hid_info(hdev, "Keyboard connection is encrypted\n");
                } else {
                        hid_warn(hdev, "Keyboard events are send over the air in plain-text / unencrypted\n");
                        hid_warn(hdev, "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n");
                }
                fallthrough;
        case 4: /* Index 4 is used for an optional separate numpad */
                workitem->device_type = HIDPP_DEVICE_TYPE_KEYBOARD;
                workitem->reports_supported |= STD_KEYBOARD | MULTIMEDIA |
                                               POWER_KEYS | HIDPP;
                break;
        default:
                hid_warn(hdev, "%s: unexpected device-index %d", __func__,
                         hidpp_report->device_index);
        }
}

static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
                                        struct hidpp_event *hidpp_report)
{
        /* We are called from atomic context (tasklet && djrcv->lock held) */
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        const char *device_type = "UNKNOWN";
        struct dj_workitem workitem = {
                .type = WORKITEM_TYPE_EMPTY,
                .device_index = hidpp_report->device_index,
        };

        switch (hidpp_report->params[HIDPP_PARAM_PROTO_TYPE]) {
        case 0x01:
                device_type = "Bluetooth";
                /* Bluetooth connect packet contents is the same as (e)QUAD */
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                if (!(hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] &
                                                HIDPP_MANUFACTURER_MASK)) {
                        hid_info(hdev, "Non Logitech device connected on slot %d\n",
                                 hidpp_report->device_index);
                        workitem.reports_supported &= ~HIDPP;
                }
                break;
        case 0x02:
                device_type = "27 Mhz";
                logi_hidpp_dev_conn_notif_27mhz(hdev, hidpp_report, &workitem);
                break;
        case 0x03:
                device_type = "QUAD or eQUAD";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x04:
                device_type = "eQUAD step 4 DJ";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x05:
                device_type = "DFU Lite";
                break;
        case 0x06:
                device_type = "eQUAD step 4 Lite";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x07:
                device_type = "eQUAD step 4 Gaming";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
        case 0x08:
                device_type = "eQUAD step 4 for gamepads";
                break;
        case 0x0a:
                device_type = "eQUAD nano Lite";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                break;
        case 0x0c:
                device_type = "eQUAD Lightspeed 1";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
        case 0x0d:
                device_type = "eQUAD Lightspeed 1.1";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
        case 0x0f:
        case 0x11:
                device_type = "eQUAD Lightspeed 1.2";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
        }

        /* custom receiver device (eg. powerplay) */
        if (hidpp_report->device_index == 7) {
                workitem.reports_supported |= HIDPP;
        }

        if (workitem.type == WORKITEM_TYPE_EMPTY) {
                hid_warn(hdev,
                         "unusable device of type %s (0x%02x) connected on slot %d",
                         device_type,
                         hidpp_report->params[HIDPP_PARAM_PROTO_TYPE],
                         hidpp_report->device_index);
                return;
        }

        hid_info(hdev, "device of type %s (0x%02x) connected on slot %d",
                 device_type, hidpp_report->params[HIDPP_PARAM_PROTO_TYPE],
                 hidpp_report->device_index);

        kfifo_in(&djrcv_dev->notif_fifo, &workitem, sizeof(workitem));
        schedule_work(&djrcv_dev->work);
}

static void logi_dj_recv_forward_null_report(struct dj_receiver_dev *djrcv_dev,
                                             struct dj_report *dj_report)
{
        /* We are called from atomic context (tasklet && djrcv->lock held) */
        unsigned int i;
        u8 reportbuffer[MAX_REPORT_SIZE];
        struct dj_device *djdev;

        djdev = djrcv_dev->paired_dj_devices[dj_report->device_index];

        memset(reportbuffer, 0, sizeof(reportbuffer));

        for (i = 0; i < NUMBER_OF_HID_REPORTS; i++) {
                if (djdev->reports_supported & (1 << i)) {
                        reportbuffer[0] = i;
                        if (hid_input_report(djdev->hdev,
                                             HID_INPUT_REPORT,
                                             reportbuffer,
                                             hid_reportid_size_map[i], 1)) {
                                dbg_hid("hid_input_report error sending null "
                                        "report\n");
                        }
                }
        }
}

static void logi_dj_recv_forward_dj(struct dj_receiver_dev *djrcv_dev,
                                    struct dj_report *dj_report)
{
        /* We are called from atomic context (tasklet && djrcv->lock held) */
        struct dj_device *dj_device;

        dj_device = djrcv_dev->paired_dj_devices[dj_report->device_index];

        if ((dj_report->report_type > ARRAY_SIZE(hid_reportid_size_map) - 1) ||
            (hid_reportid_size_map[dj_report->report_type] == 0)) {
                dbg_hid("invalid report type:%x\n", dj_report->report_type);
                return;
        }

        if (hid_input_report(dj_device->hdev,
                        HID_INPUT_REPORT, &dj_report->report_type,
                        hid_reportid_size_map[dj_report->report_type], 1)) {
                dbg_hid("hid_input_report error\n");
        }
}

static void logi_dj_recv_forward_report(struct dj_device *dj_dev, u8 *data,
                                        int size)
{
        /* We are called from atomic context (tasklet && djrcv->lock held) */
        if (hid_input_report(dj_dev->hdev, HID_INPUT_REPORT, data, size, 1))
                dbg_hid("hid_input_report error\n");
}

static void logi_dj_recv_forward_input_report(struct hid_device *hdev,
                                              u8 *data, int size)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        struct dj_device *dj_dev;
        unsigned long flags;
        u8 report = data[0];
        int i;

        if (report > REPORT_TYPE_RFREPORT_LAST) {
                hid_err(hdev, "Unexpected input report number %d\n", report);
                return;
        }

        spin_lock_irqsave(&djrcv_dev->lock, flags);
        for (i = 0; i < (DJ_MAX_PAIRED_DEVICES + DJ_DEVICE_INDEX_MIN); i++) {
                dj_dev = djrcv_dev->paired_dj_devices[i];
                if (dj_dev && (dj_dev->reports_supported & BIT(report))) {
                        logi_dj_recv_forward_report(dj_dev, data, size);
                        spin_unlock_irqrestore(&djrcv_dev->lock, flags);
                        return;
                }
        }

        logi_dj_recv_queue_unknown_work(djrcv_dev);
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        dbg_hid("No dj-devs handling input report number %d\n", report);
}

static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
                                    struct dj_report *dj_report)
{
        struct hid_device *hdev = djrcv_dev->hidpp;
        struct hid_report *report;
        struct hid_report_enum *output_report_enum;
        u8 *data = (u8 *)(&dj_report->device_index);
        unsigned int i;

        output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT];
        report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT];

        if (!report) {
                hid_err(hdev, "%s: unable to find dj report\n", __func__);
                return -ENODEV;
        }

        for (i = 0; i < DJREPORT_SHORT_LENGTH - 1; i++)
                report->field[0]->value[i] = data[i];

        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);

        return 0;
}

static int logi_dj_recv_query_hidpp_devices(struct dj_receiver_dev *djrcv_dev)
{
        static const u8 template[] = {
                REPORT_ID_HIDPP_SHORT,
                HIDPP_RECEIVER_INDEX,
                HIDPP_SET_REGISTER,
                HIDPP_REG_CONNECTION_STATE,
                HIDPP_FAKE_DEVICE_ARRIVAL,
                0x00, 0x00
        };
        u8 *hidpp_report;
        int retval;

        hidpp_report = kmemdup(template, sizeof(template), GFP_KERNEL);
        if (!hidpp_report)
                return -ENOMEM;

        retval = hid_hw_raw_request(djrcv_dev->hidpp,
                                    REPORT_ID_HIDPP_SHORT,
                                    hidpp_report, sizeof(template),
                                    HID_OUTPUT_REPORT,
                                    HID_REQ_SET_REPORT);

        kfree(hidpp_report);
        return (retval < 0) ? retval : 0;
}

static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev)
{
        struct dj_report *dj_report;
        int retval;

        djrcv_dev->last_query = jiffies;

        if (djrcv_dev->type != recvr_type_dj)
                return logi_dj_recv_query_hidpp_devices(djrcv_dev);

        dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
        if (!dj_report)
                return -ENOMEM;
        dj_report->report_id = REPORT_ID_DJ_SHORT;
        dj_report->device_index = HIDPP_RECEIVER_INDEX;
        dj_report->report_type = REPORT_TYPE_CMD_GET_PAIRED_DEVICES;
        retval = logi_dj_recv_send_report(djrcv_dev, dj_report);
        kfree(dj_report);
        return retval;
}


static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
                                          unsigned timeout)
{
        struct hid_device *hdev = djrcv_dev->hidpp;
        struct dj_report *dj_report;
        u8 *buf;
        int retval = 0;

        dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
        if (!dj_report)
                return -ENOMEM;

        if (djrcv_dev->type == recvr_type_dj) {
                dj_report->report_id = REPORT_ID_DJ_SHORT;
                dj_report->device_index = HIDPP_RECEIVER_INDEX;
                dj_report->report_type = REPORT_TYPE_CMD_SWITCH;
                dj_report->report_params[CMD_SWITCH_PARAM_DEVBITFIELD] = 0x3F;
                dj_report->report_params[CMD_SWITCH_PARAM_TIMEOUT_SECONDS] =
                                                                (u8)timeout;

                retval = logi_dj_recv_send_report(djrcv_dev, dj_report);

                /*
                 * Ugly sleep to work around a USB 3.0 bug when the receiver is
                 * still processing the "switch-to-dj" command while we send an
                 * other command.
                 * 50 msec should gives enough time to the receiver to be ready.
                 */
                msleep(50);

                if (retval)
                        return retval;
        }

        /*
         * Magical bits to set up hidpp notifications when the dj devices
         * are connected/disconnected.
         *
         * We can reuse dj_report because HIDPP_REPORT_SHORT_LENGTH is smaller
         * than DJREPORT_SHORT_LENGTH.
         */
        buf = (u8 *)dj_report;

        memset(buf, 0, HIDPP_REPORT_SHORT_LENGTH);

        buf[0] = REPORT_ID_HIDPP_SHORT;
        buf[1] = HIDPP_RECEIVER_INDEX;
        buf[2] = 0x80;
        buf[3] = 0x00;
        buf[4] = 0x00;
        buf[5] = 0x09;
        buf[6] = 0x00;

        retval = hid_hw_raw_request(hdev, REPORT_ID_HIDPP_SHORT, buf,
                        HIDPP_REPORT_SHORT_LENGTH, HID_OUTPUT_REPORT,
                        HID_REQ_SET_REPORT);

        kfree(dj_report);
        return retval;
}


static int logi_dj_ll_open(struct hid_device *hid)
{
        dbg_hid("%s: %s\n", __func__, hid->phys);
        return 0;

}

static void logi_dj_ll_close(struct hid_device *hid)
{
        dbg_hid("%s: %s\n", __func__, hid->phys);
}

/*
 * Register 0xB5 is "pairing information". It is solely intended for the
 * receiver, so do not overwrite the device index.
 */
static u8 unifying_pairing_query[]  = { REPORT_ID_HIDPP_SHORT,
                                        HIDPP_RECEIVER_INDEX,
                                        HIDPP_GET_LONG_REGISTER,
                                        HIDPP_REG_PAIRING_INFORMATION };
static u8 unifying_pairing_answer[] = { REPORT_ID_HIDPP_LONG,
                                        HIDPP_RECEIVER_INDEX,
                                        HIDPP_GET_LONG_REGISTER,
                                        HIDPP_REG_PAIRING_INFORMATION };

static int logi_dj_ll_raw_request(struct hid_device *hid,
                                  unsigned char reportnum, __u8 *buf,
                                  size_t count, unsigned char report_type,
                                  int reqtype)
{
        struct dj_device *djdev = hid->driver_data;
        struct dj_receiver_dev *djrcv_dev = djdev->dj_receiver_dev;
        u8 *out_buf;
        int ret;

        if ((buf[0] == REPORT_ID_HIDPP_SHORT) ||
            (buf[0] == REPORT_ID_HIDPP_LONG) ||
            (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) {
                if (count < 2)
                        return -EINVAL;

                /* special case where we should not overwrite
                 * the device_index */
                if (count == 7 && !memcmp(buf, unifying_pairing_query,
                                          sizeof(unifying_pairing_query)))
                        buf[4] = (buf[4] & 0xf0) | (djdev->device_index - 1);
                else
                        buf[1] = djdev->device_index;
                return hid_hw_raw_request(djrcv_dev->hidpp, reportnum, buf,
                                count, report_type, reqtype);
        }

        if (buf[0] != REPORT_TYPE_LEDS)
                return -EINVAL;

        if (djrcv_dev->type != recvr_type_dj && count >= 2) {
                if (!djrcv_dev->keyboard) {
                        hid_warn(hid, "Received REPORT_TYPE_LEDS request before the keyboard interface was enumerated\n");
                        return 0;
                }
                /* usbhid overrides the report ID and ignores the first byte */
                return hid_hw_raw_request(djrcv_dev->keyboard, 0, buf, count,
                                          report_type, reqtype);
        }

        out_buf = kzalloc(DJREPORT_SHORT_LENGTH, GFP_ATOMIC);
        if (!out_buf)
                return -ENOMEM;

        if (count > DJREPORT_SHORT_LENGTH - 2)
                count = DJREPORT_SHORT_LENGTH - 2;

        out_buf[0] = REPORT_ID_DJ_SHORT;
        out_buf[1] = djdev->device_index;
        memcpy(out_buf + 2, buf, count);

        ret = hid_hw_raw_request(djrcv_dev->hidpp, out_buf[0], out_buf,
                DJREPORT_SHORT_LENGTH, report_type, reqtype);

        kfree(out_buf);
        return ret;
}

static void rdcat(char *rdesc, unsigned int *rsize, const char *data, unsigned int size)
{
        memcpy(rdesc + *rsize, data, size);
        *rsize += size;
}

static int logi_dj_ll_parse(struct hid_device *hid)
{
        struct dj_device *djdev = hid->driver_data;
        unsigned int rsize = 0;
        char *rdesc;
        int retval;

        dbg_hid("%s\n", __func__);

        djdev->hdev->version = 0x0111;
        djdev->hdev->country = 0x00;

        rdesc = kmalloc(MAX_RDESC_SIZE, GFP_KERNEL);
        if (!rdesc)
                return -ENOMEM;

        if (djdev->reports_supported & STD_KEYBOARD) {
                dbg_hid("%s: sending a kbd descriptor, reports_supported: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, kbd_descriptor, sizeof(kbd_descriptor));
        }

        if (djdev->reports_supported & STD_MOUSE) {
                dbg_hid("%s: sending a mouse descriptor, reports_supported: %llx\n",
                        __func__, djdev->reports_supported);
                if (djdev->dj_receiver_dev->type == recvr_type_gaming_hidpp ||
                    djdev->dj_receiver_dev->type == recvr_type_mouse_only)
                        rdcat(rdesc, &rsize, mse_high_res_descriptor,
                              sizeof(mse_high_res_descriptor));
                else if (djdev->dj_receiver_dev->type == recvr_type_27mhz)
                        rdcat(rdesc, &rsize, mse_27mhz_descriptor,
                              sizeof(mse_27mhz_descriptor));
                else if (recvr_type_is_bluetooth(djdev->dj_receiver_dev->type))
                        rdcat(rdesc, &rsize, mse_bluetooth_descriptor,
                              sizeof(mse_bluetooth_descriptor));
                else
                        rdcat(rdesc, &rsize, mse_descriptor,
                              sizeof(mse_descriptor));
        }

        if (djdev->reports_supported & KBD_MOUSE) {
                dbg_hid("%s: sending a kbd-mouse descriptor, reports_supported: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, mse5_bluetooth_descriptor,
                      sizeof(mse5_bluetooth_descriptor));
        }

        if (djdev->reports_supported & MULTIMEDIA) {
                dbg_hid("%s: sending a multimedia report descriptor: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, consumer_descriptor, sizeof(consumer_descriptor));
        }

        if (djdev->reports_supported & POWER_KEYS) {
                dbg_hid("%s: sending a power keys report descriptor: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, syscontrol_descriptor, sizeof(syscontrol_descriptor));
        }

        if (djdev->reports_supported & MEDIA_CENTER) {
                dbg_hid("%s: sending a media center report descriptor: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, media_descriptor, sizeof(media_descriptor));
        }

        if (djdev->reports_supported & KBD_LEDS) {
                dbg_hid("%s: need to send kbd leds report descriptor: %llx\n",
                        __func__, djdev->reports_supported);
        }

        if (djdev->reports_supported & HIDPP) {
                dbg_hid("%s: sending a HID++ descriptor, reports_supported: %llx\n",
                        __func__, djdev->reports_supported);
                rdcat(rdesc, &rsize, hidpp_descriptor,
                      sizeof(hidpp_descriptor));
        }

        retval = hid_parse_report(hid, rdesc, rsize);
        kfree(rdesc);

        return retval;
}

static int logi_dj_ll_start(struct hid_device *hid)
{
        dbg_hid("%s\n", __func__);
        return 0;
}

static void logi_dj_ll_stop(struct hid_device *hid)
{
        dbg_hid("%s\n", __func__);
}

static bool logi_dj_ll_may_wakeup(struct hid_device *hid)
{
        struct dj_device *djdev = hid->driver_data;
        struct dj_receiver_dev *djrcv_dev = djdev->dj_receiver_dev;

        return hid_hw_may_wakeup(djrcv_dev->hidpp);
}

static const struct hid_ll_driver logi_dj_ll_driver = {
        .parse = logi_dj_ll_parse,
        .start = logi_dj_ll_start,
        .stop = logi_dj_ll_stop,
        .open = logi_dj_ll_open,
        .close = logi_dj_ll_close,
        .raw_request = logi_dj_ll_raw_request,
        .may_wakeup = logi_dj_ll_may_wakeup,
};

static int logi_dj_dj_event(struct hid_device *hdev,
                             struct hid_report *report, u8 *data,
                             int size)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        struct dj_report *dj_report = (struct dj_report *) data;
        unsigned long flags;

        /*
         * Here we receive all data coming from iface 2, there are 3 cases:
         *
         * 1) Data is intended for this driver i. e. data contains arrival,
         * departure, etc notifications, in which case we queue them for delayed
         * processing by the work queue. We return 1 to hid-core as no further
         * processing is required from it.
         *
         * 2) Data informs a connection change, if the change means rf link
         * loss, then we must send a null report to the upper layer to discard
         * potentially pressed keys that may be repeated forever by the input
         * layer. Return 1 to hid-core as no further processing is required.
         *
         * 3) Data is an actual input event from a paired DJ device in which
         * case we forward it to the correct hid device (via hid_input_report()
         * ) and return 1 so hid-core does not anything else with it.
         */

        if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) ||
            (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) {
                /*
                 * Device index is wrong, bail out.
                 * This driver can ignore safely the receiver notifications,
                 * so ignore those reports too.
                 */
                if (dj_report->device_index != DJ_RECEIVER_INDEX)
                        hid_err(hdev, "%s: invalid device index:%d\n",
                                __func__, dj_report->device_index);
                return false;
        }

        spin_lock_irqsave(&djrcv_dev->lock, flags);

        if (!djrcv_dev->paired_dj_devices[dj_report->device_index]) {
                /* received an event for an unknown device, bail out */
                logi_dj_recv_queue_notification(djrcv_dev, dj_report);
                goto out;
        }

        switch (dj_report->report_type) {
        case REPORT_TYPE_NOTIF_DEVICE_PAIRED:
                /* pairing notifications are handled above the switch */
                break;
        case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED:
                logi_dj_recv_queue_notification(djrcv_dev, dj_report);
                break;
        case REPORT_TYPE_NOTIF_CONNECTION_STATUS:
                if (dj_report->report_params[CONNECTION_STATUS_PARAM_STATUS] ==
                    STATUS_LINKLOSS) {
                        logi_dj_recv_forward_null_report(djrcv_dev, dj_report);
                }
                break;
        default:
                logi_dj_recv_forward_dj(djrcv_dev, dj_report);
        }

out:
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        return true;
}

static int logi_dj_hidpp_event(struct hid_device *hdev,
                             struct hid_report *report, u8 *data,
                             int size)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        struct hidpp_event *hidpp_report = (struct hidpp_event *) data;
        struct dj_device *dj_dev;
        unsigned long flags;
        u8 device_index = hidpp_report->device_index;

        if (device_index == HIDPP_RECEIVER_INDEX) {
                /* special case were the device wants to know its unifying
                 * name */
                if (size == HIDPP_REPORT_LONG_LENGTH &&
                    !memcmp(data, unifying_pairing_answer,
                            sizeof(unifying_pairing_answer)))
                        device_index = (data[4] & 0x0F) + 1;
                else
                        return false;
        }

        /*
         * Data is from the HID++ collection, in this case, we forward the
         * data to the corresponding child dj device and return 0 to hid-core
         * so he data also goes to the hidraw device of the receiver. This
         * allows a user space application to implement the full HID++ routing
         * via the receiver.
         */

        if ((device_index < DJ_DEVICE_INDEX_MIN) ||
            (device_index > DJ_DEVICE_INDEX_MAX)) {
                /*
                 * Device index is wrong, bail out.
                 * This driver can ignore safely the receiver notifications,
                 * so ignore those reports too.
                 */
                hid_err(hdev, "%s: invalid device index:%d\n", __func__,
                        hidpp_report->device_index);
                return false;
        }

        spin_lock_irqsave(&djrcv_dev->lock, flags);

        dj_dev = djrcv_dev->paired_dj_devices[device_index];

        /*
         * With 27 MHz receivers, we do not get an explicit unpair event,
         * remove the old device if the user has paired a *different* device.
         */
        if (djrcv_dev->type == recvr_type_27mhz && dj_dev &&
            hidpp_report->sub_id == REPORT_TYPE_NOTIF_DEVICE_CONNECTED &&
            hidpp_report->params[HIDPP_PARAM_PROTO_TYPE] == 0x02 &&
            hidpp_report->params[HIDPP_PARAM_27MHZ_DEVID] !=
                                                dj_dev->hdev->product) {
                struct dj_workitem workitem = {
                        .device_index = hidpp_report->device_index,
                        .type = WORKITEM_TYPE_UNPAIRED,
                };
                kfifo_in(&djrcv_dev->notif_fifo, &workitem, sizeof(workitem));
                /* logi_hidpp_recv_queue_notif will queue the work */
                dj_dev = NULL;
        }

        if (dj_dev) {
                logi_dj_recv_forward_report(dj_dev, data, size);
        } else {
                if (hidpp_report->sub_id == REPORT_TYPE_NOTIF_DEVICE_CONNECTED)
                        logi_hidpp_recv_queue_notif(hdev, hidpp_report);
                else
                        logi_dj_recv_queue_unknown_work(djrcv_dev);
        }

        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        return false;
}

static int logi_dj_raw_event(struct hid_device *hdev,
                             struct hid_report *report, u8 *data,
                             int size)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        dbg_hid("%s, size:%d\n", __func__, size);

        if (!djrcv_dev)
                return 0;

        if (!hdev->report_enum[HID_INPUT_REPORT].numbered) {

                if (djrcv_dev->unnumbered_application == HID_GD_KEYBOARD) {
                        /*
                         * For the keyboard, we can reuse the same report by
                         * using the second byte which is constant in the USB
                         * HID report descriptor.
                         */
                        data[1] = data[0];
                        data[0] = REPORT_TYPE_KEYBOARD;

                        logi_dj_recv_forward_input_report(hdev, data, size);

                        /* restore previous state */
                        data[0] = data[1];
                        data[1] = 0;
                }
                /*
                 * Mouse-only receivers send unnumbered mouse data. The 27 MHz
                 * receiver uses 6 byte packets, the nano receiver 8 bytes.
                 */
                if (djrcv_dev->unnumbered_application == HID_GD_MOUSE &&
                    size <= 8) {
                        u8 mouse_report[9];

                        /* Prepend report id */
                        mouse_report[0] = REPORT_TYPE_MOUSE;
                        memcpy(mouse_report + 1, data, size);
                        logi_dj_recv_forward_input_report(hdev, mouse_report,
                                                          size + 1);
                }

                return false;
        }

        switch (data[0]) {
        case REPORT_ID_DJ_SHORT:
                if (size != DJREPORT_SHORT_LENGTH) {
                        hid_err(hdev, "Short DJ report bad size (%d)", size);
                        return false;
                }
                return logi_dj_dj_event(hdev, report, data, size);
        case REPORT_ID_DJ_LONG:
                if (size != DJREPORT_LONG_LENGTH) {
                        hid_err(hdev, "Long DJ report bad size (%d)", size);
                        return false;
                }
                return logi_dj_dj_event(hdev, report, data, size);
        case REPORT_ID_HIDPP_SHORT:
                if (size != HIDPP_REPORT_SHORT_LENGTH) {
                        hid_err(hdev, "Short HID++ report bad size (%d)", size);
                        return false;
                }
                return logi_dj_hidpp_event(hdev, report, data, size);
        case REPORT_ID_HIDPP_LONG:
                if (size != HIDPP_REPORT_LONG_LENGTH) {
                        hid_err(hdev, "Long HID++ report bad size (%d)", size);
                        return false;
                }
                return logi_dj_hidpp_event(hdev, report, data, size);
        }

        logi_dj_recv_forward_input_report(hdev, data, size);

        return false;
}

static int logi_dj_probe(struct hid_device *hdev,
                         const struct hid_device_id *id)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        struct dj_receiver_dev *djrcv_dev;
        struct usb_interface *intf;
        unsigned int no_dj_interfaces = 0;
        bool has_hidpp = false;
        unsigned long flags;
        int retval;

        /*
         * Call to usbhid to fetch the HID descriptors of the current
         * interface subsequently call to the hid/hid-core to parse the
         * fetched descriptors.
         */
        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "%s: parse failed\n", __func__);
                return retval;
        }

        /*
         * Some KVMs add an extra interface for e.g. mouse emulation. If we
         * treat these as logitech-dj interfaces then this causes input events
         * reported through this extra interface to not be reported correctly.
         * To avoid this, we treat these as generic-hid devices.
         */
        switch (id->driver_data) {
        case recvr_type_dj:                no_dj_interfaces = 3; break;
        case recvr_type_hidpp:                no_dj_interfaces = 2; break;
        case recvr_type_gaming_hidpp:        no_dj_interfaces = 3; break;
        case recvr_type_mouse_only:        no_dj_interfaces = 2; break;
        case recvr_type_27mhz:                no_dj_interfaces = 2; break;
        case recvr_type_bluetooth:        no_dj_interfaces = 2; break;
        case recvr_type_dinovo:                no_dj_interfaces = 2; break;
        }
        if (hid_is_usb(hdev)) {
                intf = to_usb_interface(hdev->dev.parent);
                if (intf && intf->altsetting->desc.bInterfaceNumber >=
                                                        no_dj_interfaces) {
                        hdev->quirks |= HID_QUIRK_INPUT_PER_APP;
                        return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
                }
        }

        rep_enum = &hdev->report_enum[HID_INPUT_REPORT];

        /* no input reports, bail out */
        if (list_empty(&rep_enum->report_list))
                return -ENODEV;

        /*
         * Check for the HID++ application.
         * Note: we should theoretically check for HID++ and DJ
         * collections, but this will do.
         */
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                if (rep->application == 0xff000001)
                        has_hidpp = true;
        }

        /*
         * Ignore interfaces without DJ/HID++ collection, they will not carry
         * any data, dont create any hid_device for them.
         */
        if (!has_hidpp && id->driver_data == recvr_type_dj)
                return -ENODEV;

        /* get the current application attached to the node */
        rep = list_first_entry(&rep_enum->report_list, struct hid_report, list);
        djrcv_dev = dj_get_receiver_dev(hdev, id->driver_data,
                                        rep->application, has_hidpp);
        if (!djrcv_dev) {
                hid_err(hdev, "%s: dj_get_receiver_dev failed\n", __func__);
                return -ENOMEM;
        }

        if (!rep_enum->numbered)
                djrcv_dev->unnumbered_application = rep->application;

        /* Starts the usb device and connects to upper interfaces hiddev and
         * hidraw */
        retval = hid_hw_start(hdev, HID_CONNECT_HIDRAW|HID_CONNECT_HIDDEV);
        if (retval) {
                hid_err(hdev, "%s: hid_hw_start returned error\n", __func__);
                goto hid_hw_start_fail;
        }

        if (has_hidpp) {
                retval = logi_dj_recv_switch_to_dj_mode(djrcv_dev, 0);
                if (retval < 0) {
                        hid_err(hdev, "%s: logi_dj_recv_switch_to_dj_mode returned error:%d\n",
                                __func__, retval);
                        goto switch_to_dj_mode_fail;
                }
        }

        /* This is enabling the polling urb on the IN endpoint */
        retval = hid_hw_open(hdev);
        if (retval < 0) {
                hid_err(hdev, "%s: hid_hw_open returned error:%d\n",
                        __func__, retval);
                goto llopen_failed;
        }

        /* Allow incoming packets to arrive: */
        hid_device_io_start(hdev);

        if (has_hidpp) {
                spin_lock_irqsave(&djrcv_dev->lock, flags);
                djrcv_dev->ready = true;
                spin_unlock_irqrestore(&djrcv_dev->lock, flags);
                retval = logi_dj_recv_query_paired_devices(djrcv_dev);
                if (retval < 0) {
                        hid_err(hdev, "%s: logi_dj_recv_query_paired_devices error:%d\n",
                                __func__, retval);
                        /*
                         * This can happen with a KVM, let the probe succeed,
                         * logi_dj_recv_queue_unknown_work will retry later.
                         */
                }
        }

        return 0;

llopen_failed:
switch_to_dj_mode_fail:
        hid_hw_stop(hdev);

hid_hw_start_fail:
        dj_put_receiver_dev(hdev);
        return retval;
}

#ifdef CONFIG_PM
static int logi_dj_reset_resume(struct hid_device *hdev)
{
        int retval;
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);

        if (!djrcv_dev || djrcv_dev->hidpp != hdev)
                return 0;

        retval = logi_dj_recv_switch_to_dj_mode(djrcv_dev, 0);
        if (retval < 0) {
                hid_err(hdev, "%s: logi_dj_recv_switch_to_dj_mode returned error:%d\n",
                        __func__, retval);
        }

        return 0;
}
#endif

static void logi_dj_remove(struct hid_device *hdev)
{
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
        struct dj_device *dj_dev;
        unsigned long flags;
        int i;

        dbg_hid("%s\n", __func__);

        if (!djrcv_dev)
                return hid_hw_stop(hdev);

        /*
         * This ensures that if the work gets requeued from another
         * interface of the same receiver it will be a no-op.
         */
        spin_lock_irqsave(&djrcv_dev->lock, flags);
        djrcv_dev->ready = false;
        spin_unlock_irqrestore(&djrcv_dev->lock, flags);

        cancel_work_sync(&djrcv_dev->work);

        hid_hw_close(hdev);
        hid_hw_stop(hdev);

        /*
         * For proper operation we need access to all interfaces, so we destroy
         * the paired devices when we're unbound from any interface.
         *
         * Note we may still be bound to other interfaces, sharing the same
         * djrcv_dev, so we need locking here.
         */
        for (i = 0; i < (DJ_MAX_PAIRED_DEVICES + DJ_DEVICE_INDEX_MIN); i++) {
                spin_lock_irqsave(&djrcv_dev->lock, flags);
                dj_dev = djrcv_dev->paired_dj_devices[i];
                djrcv_dev->paired_dj_devices[i] = NULL;
                spin_unlock_irqrestore(&djrcv_dev->lock, flags);
                if (dj_dev != NULL) {
                        hid_destroy_device(dj_dev->hdev);
                        kfree(dj_dev);
                }
        }

        dj_put_receiver_dev(hdev);
}

static const struct hid_device_id logi_dj_receivers[] = {
        { /* Logitech unifying receiver (0xc52b) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER),
         .driver_data = recvr_type_dj},
        { /* Logitech unifying receiver (0xc532) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2),
         .driver_data = recvr_type_dj},

        { /* Logitech Nano mouse only receiver (0xc52f) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                         USB_DEVICE_ID_LOGITECH_NANO_RECEIVER),
         .driver_data = recvr_type_mouse_only},
        { /* Logitech Nano (non DJ) receiver (0xc534) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                         USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2),
         .driver_data = recvr_type_hidpp},

        { /* Logitech G700(s) receiver (0xc531) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                         USB_DEVICE_ID_LOGITECH_G700_RECEIVER),
         .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech G602 receiver (0xc537) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                0xc537),
         .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech lightspeed receiver (0xc539) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1),
         .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech powerplay receiver (0xc53a) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY),
         .driver_data = recvr_type_gaming_hidpp},
        { /* Logitech lightspeed receiver (0xc53f) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1),
         .driver_data = recvr_type_gaming_hidpp},

        { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER),
         .driver_data = recvr_type_27mhz},
        { /* Logitech 27 MHz HID++ 1.0 receiver (0xc517) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_S510_RECEIVER_2),
         .driver_data = recvr_type_27mhz},
        { /* Logitech 27 MHz HID++ 1.0 mouse-only receiver (0xc51b) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_27MHZ_MOUSE_RECEIVER),
         .driver_data = recvr_type_27mhz},

        { /* Logitech MX5000 HID++ / bluetooth receiver keyboard intf. (0xc70e) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_MX5000_RECEIVER_KBD_DEV),
         .driver_data = recvr_type_bluetooth},
        { /* Logitech MX5000 HID++ / bluetooth receiver mouse intf. (0xc70a) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_MX5000_RECEIVER_MOUSE_DEV),
         .driver_data = recvr_type_bluetooth},
        { /* Logitech MX5500 HID++ / bluetooth receiver keyboard intf. (0xc71b) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_MX5500_RECEIVER_KBD_DEV),
         .driver_data = recvr_type_bluetooth},
        { /* Logitech MX5500 HID++ / bluetooth receiver mouse intf. (0xc71c) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_MX5500_RECEIVER_MOUSE_DEV),
         .driver_data = recvr_type_bluetooth},

        { /* Logitech Dinovo Edge HID++ / bluetooth receiver keyboard intf. (0xc713) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_DINOVO_EDGE_RECEIVER_KBD_DEV),
         .driver_data = recvr_type_dinovo},
        { /* Logitech Dinovo Edge HID++ / bluetooth receiver mouse intf. (0xc714) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_DINOVO_EDGE_RECEIVER_MOUSE_DEV),
         .driver_data = recvr_type_dinovo},
        { /* Logitech DiNovo Mini HID++ / bluetooth receiver mouse intf. (0xc71e) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_DINOVO_MINI_RECEIVER_KBD_DEV),
         .driver_data = recvr_type_dinovo},
        { /* Logitech DiNovo Mini HID++ / bluetooth receiver keyboard intf. (0xc71f) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_DINOVO_MINI_RECEIVER_MOUSE_DEV),
         .driver_data = recvr_type_dinovo},
        {}
};

MODULE_DEVICE_TABLE(hid, logi_dj_receivers);

static struct hid_driver logi_djreceiver_driver = {
        .name = "logitech-djreceiver",
        .id_table = logi_dj_receivers,
        .probe = logi_dj_probe,
        .remove = logi_dj_remove,
        .raw_event = logi_dj_raw_event,
#ifdef CONFIG_PM
        .reset_resume = logi_dj_reset_resume,
#endif
};

module_hid_driver(logi_djreceiver_driver);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Logitech");
MODULE_AUTHOR("Nestor Lopez Casado");
MODULE_AUTHOR("nlopezcasad@logitech.com");






















































































































































































































































































































































































































































































































































































   16 








    2 

















































































































































































































































































































































   14 







































   16 


   16 





















































   15 



























   14 























   14 
   14 













































   14 



   14 
















   14 











































































   16 


























   14 








    4 































































































   14 
































































    3 







































































































































































































   14 




















































    3 
































































































































































































































































   14 



















   14 














   14 
    4 












   14 
























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_NETLINK_H
#define __NET_NETLINK_H

#include <linux/types.h>
#include <linux/netlink.h>
#include <linux/jiffies.h>
#include <linux/in6.h>

/* ========================================================================
 *         Netlink Messages and Attributes Interface (As Seen On TV)
 * ------------------------------------------------------------------------
 *                          Messages Interface
 * ------------------------------------------------------------------------
 *
 * Message Format:
 *    <--- nlmsg_total_size(payload)  --->
 *    <-- nlmsg_msg_size(payload) ->
 *   +----------+- - -+-------------+- - -+-------- - -
 *   | nlmsghdr | Pad |   Payload   | Pad | nlmsghdr
 *   +----------+- - -+-------------+- - -+-------- - -
 *   nlmsg_data(nlh)---^                   ^
 *   nlmsg_next(nlh)-----------------------+
 *
 * Payload Format:
 *    <---------------------- nlmsg_len(nlh) --------------------->
 *    <------ hdrlen ------>       <- nlmsg_attrlen(nlh, hdrlen) ->
 *   +----------------------+- - -+--------------------------------+
 *   |     Family Header    | Pad |           Attributes           |
 *   +----------------------+- - -+--------------------------------+
 *   nlmsg_attrdata(nlh, hdrlen)---^
 *
 * Data Structures:
 *   struct nlmsghdr                        netlink message header
 *
 * Message Construction:
 *   nlmsg_new()                        create a new netlink message
 *   nlmsg_put()                        add a netlink message to an skb
 *   nlmsg_put_answer()                        callback based nlmsg_put()
 *   nlmsg_end()                        finalize netlink message
 *   nlmsg_get_pos()                        return current position in message
 *   nlmsg_trim()                        trim part of message
 *   nlmsg_cancel()                        cancel message construction
 *   nlmsg_free()                        free a netlink message
 *
 * Message Sending:
 *   nlmsg_multicast()                        multicast message to several groups
 *   nlmsg_unicast()                        unicast a message to a single socket
 *   nlmsg_notify()                        send notification message
 *
 * Message Length Calculations:
 *   nlmsg_msg_size(payload)                length of message w/o padding
 *   nlmsg_total_size(payload)                length of message w/ padding
 *   nlmsg_padlen(payload)                length of padding at tail
 *
 * Message Payload Access:
 *   nlmsg_data(nlh)                        head of message payload
 *   nlmsg_len(nlh)                        length of message payload
 *   nlmsg_attrdata(nlh, hdrlen)        head of attributes data
 *   nlmsg_attrlen(nlh, hdrlen)                length of attributes data
 *
 * Message Parsing:
 *   nlmsg_ok(nlh, remaining)                does nlh fit into remaining bytes?
 *   nlmsg_next(nlh, remaining)                get next netlink message
 *   nlmsg_parse()                        parse attributes of a message
 *   nlmsg_find_attr()                        find an attribute in a message
 *   nlmsg_for_each_msg()                loop over all messages
 *   nlmsg_validate()                        validate netlink message incl. attrs
 *   nlmsg_for_each_attr()                loop over all attributes
 *
 * Misc:
 *   nlmsg_report()                        report back to application?
 *
 * ------------------------------------------------------------------------
 *                          Attributes Interface
 * ------------------------------------------------------------------------
 *
 * Attribute Format:
 *    <------- nla_total_size(payload) ------->
 *    <---- nla_attr_size(payload) ----->
 *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
 *   |  Header  | Pad |     Payload      | Pad |  Header
 *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
 *                     <- nla_len(nla) ->      ^
 *   nla_data(nla)----^                        |
 *   nla_next(nla)-----------------------------'
 *
 * Data Structures:
 *   struct nlattr                        netlink attribute header
 *
 * Attribute Construction:
 *   nla_reserve(skb, type, len)        reserve room for an attribute
 *   nla_reserve_nohdr(skb, len)        reserve room for an attribute w/o hdr
 *   nla_put(skb, type, len, data)        add attribute to skb
 *   nla_put_nohdr(skb, len, data)        add attribute w/o hdr
 *   nla_append(skb, len, data)                append data to skb
 *
 * Attribute Construction for Basic Types:
 *   nla_put_u8(skb, type, value)        add u8 attribute to skb
 *   nla_put_u16(skb, type, value)        add u16 attribute to skb
 *   nla_put_u32(skb, type, value)        add u32 attribute to skb
 *   nla_put_u64_64bit(skb, type,
 *                     value, padattr)        add u64 attribute to skb
 *   nla_put_s8(skb, type, value)        add s8 attribute to skb
 *   nla_put_s16(skb, type, value)        add s16 attribute to skb
 *   nla_put_s32(skb, type, value)        add s32 attribute to skb
 *   nla_put_s64(skb, type, value,
 *               padattr)                add s64 attribute to skb
 *   nla_put_string(skb, type, str)        add string attribute to skb
 *   nla_put_flag(skb, type)                add flag attribute to skb
 *   nla_put_msecs(skb, type, jiffies,
 *                 padattr)                add msecs attribute to skb
 *   nla_put_in_addr(skb, type, addr)        add IPv4 address attribute to skb
 *   nla_put_in6_addr(skb, type, addr)        add IPv6 address attribute to skb
 *
 * Nested Attributes Construction:
 *   nla_nest_start(skb, type)                start a nested attribute
 *   nla_nest_end(skb, nla)                finalize a nested attribute
 *   nla_nest_cancel(skb, nla)                cancel nested attribute construction
 *
 * Attribute Length Calculations:
 *   nla_attr_size(payload)                length of attribute w/o padding
 *   nla_total_size(payload)                length of attribute w/ padding
 *   nla_padlen(payload)                length of padding
 *
 * Attribute Payload Access:
 *   nla_data(nla)                        head of attribute payload
 *   nla_len(nla)                        length of attribute payload
 *
 * Attribute Payload Access for Basic Types:
 *   nla_get_uint(nla)                        get payload for a uint attribute
 *   nla_get_sint(nla)                        get payload for a sint attribute
 *   nla_get_u8(nla)                        get payload for a u8 attribute
 *   nla_get_u16(nla)                        get payload for a u16 attribute
 *   nla_get_u32(nla)                        get payload for a u32 attribute
 *   nla_get_u64(nla)                        get payload for a u64 attribute
 *   nla_get_s8(nla)                        get payload for a s8 attribute
 *   nla_get_s16(nla)                        get payload for a s16 attribute
 *   nla_get_s32(nla)                        get payload for a s32 attribute
 *   nla_get_s64(nla)                        get payload for a s64 attribute
 *   nla_get_flag(nla)                        return 1 if flag is true
 *   nla_get_msecs(nla)                        get payload for a msecs attribute
 *
 * Attribute Misc:
 *   nla_memcpy(dest, nla, count)        copy attribute into memory
 *   nla_memcmp(nla, data, size)        compare attribute with memory area
 *   nla_strscpy(dst, nla, size)        copy attribute to a sized string
 *   nla_strcmp(nla, str)                compare attribute with string
 *
 * Attribute Parsing:
 *   nla_ok(nla, remaining)                does nla fit into remaining bytes?
 *   nla_next(nla, remaining)                get next netlink attribute
 *   nla_validate()                        validate a stream of attributes
 *   nla_validate_nested()                validate a stream of nested attributes
 *   nla_find()                                find attribute in stream of attributes
 *   nla_find_nested()                        find attribute in nested attributes
 *   nla_parse()                        parse and validate stream of attrs
 *   nla_parse_nested()                        parse nested attributes
 *   nla_for_each_attr()                loop over all attributes
 *   nla_for_each_nested()                loop over the nested attributes
 *=========================================================================
 */

 /**
  * Standard attribute types to specify validation policy
  */
enum {
        NLA_UNSPEC,
        NLA_U8,
        NLA_U16,
        NLA_U32,
        NLA_U64,
        NLA_STRING,
        NLA_FLAG,
        NLA_MSECS,
        NLA_NESTED,
        NLA_NESTED_ARRAY,
        NLA_NUL_STRING,
        NLA_BINARY,
        NLA_S8,
        NLA_S16,
        NLA_S32,
        NLA_S64,
        NLA_BITFIELD32,
        NLA_REJECT,
        NLA_BE16,
        NLA_BE32,
        NLA_SINT,
        NLA_UINT,
        __NLA_TYPE_MAX,
};

#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)

struct netlink_range_validation {
        u64 min, max;
};

struct netlink_range_validation_signed {
        s64 min, max;
};

enum nla_policy_validation {
        NLA_VALIDATE_NONE,
        NLA_VALIDATE_RANGE,
        NLA_VALIDATE_RANGE_WARN_TOO_LONG,
        NLA_VALIDATE_MIN,
        NLA_VALIDATE_MAX,
        NLA_VALIDATE_MASK,
        NLA_VALIDATE_RANGE_PTR,
        NLA_VALIDATE_FUNCTION,
};

/**
 * struct nla_policy - attribute validation policy
 * @type: Type of attribute or NLA_UNSPEC
 * @validation_type: type of attribute validation done in addition to
 *        type-specific validation (e.g. range, function call), see
 *        &enum nla_policy_validation
 * @len: Type specific length of payload
 *
 * Policies are defined as arrays of this struct, the array must be
 * accessible by attribute type up to the highest identifier to be expected.
 *
 * Meaning of `len' field:
 *    NLA_STRING           Maximum length of string
 *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
 *    NLA_FLAG             Unused
 *    NLA_BINARY           Maximum length of attribute payload
 *                         (but see also below with the validation type)
 *    NLA_NESTED,
 *    NLA_NESTED_ARRAY     Length verification is done by checking len of
 *                         nested header (or empty); len field is used if
 *                         nested_policy is also used, for the max attr
 *                         number in the nested policy.
 *    NLA_SINT, NLA_UINT,
 *    NLA_U8, NLA_U16,
 *    NLA_U32, NLA_U64,
 *    NLA_S8, NLA_S16,
 *    NLA_S32, NLA_S64,
 *    NLA_BE16, NLA_BE32,
 *    NLA_MSECS            Leaving the length field zero will verify the
 *                         given type fits, using it verifies minimum length
 *                         just like "All other"
 *    NLA_BITFIELD32       Unused
 *    NLA_REJECT           Unused
 *    All other            Minimum length of attribute payload
 *
 * Meaning of validation union:
 *    NLA_BITFIELD32       This is a 32-bit bitmap/bitselector attribute and
 *                         `bitfield32_valid' is the u32 value of valid flags
 *    NLA_REJECT           This attribute is always rejected and `reject_message'
 *                         may point to a string to report as the error instead
 *                         of the generic one in extended ACK.
 *    NLA_NESTED           `nested_policy' to a nested policy to validate, must
 *                         also set `len' to the max attribute number. Use the
 *                         provided NLA_POLICY_NESTED() macro.
 *                         Note that nla_parse() will validate, but of course not
 *                         parse, the nested sub-policies.
 *    NLA_NESTED_ARRAY     `nested_policy' points to a nested policy to validate,
 *                         must also set `len' to the max attribute number. Use
 *                         the provided NLA_POLICY_NESTED_ARRAY() macro.
 *                         The difference to NLA_NESTED is the structure:
 *                         NLA_NESTED has the nested attributes directly inside
 *                         while an array has the nested attributes at another
 *                         level down and the attribute types directly in the
 *                         nesting don't matter.
 *    NLA_UINT,
 *    NLA_U8,
 *    NLA_U16,
 *    NLA_U32,
 *    NLA_U64,
 *    NLA_BE16,
 *    NLA_BE32,
 *    NLA_SINT,
 *    NLA_S8,
 *    NLA_S16,
 *    NLA_S32,
 *    NLA_S64              The `min' and `max' fields are used depending on the
 *                         validation_type field, if that is min/max/range then
 *                         the min, max or both are used (respectively) to check
 *                         the value of the integer attribute.
 *                         Note that in the interest of code simplicity and
 *                         struct size both limits are s16, so you cannot
 *                         enforce a range that doesn't fall within the range
 *                         of s16 - do that using the NLA_POLICY_FULL_RANGE()
 *                         or NLA_POLICY_FULL_RANGE_SIGNED() macros instead.
 *                         Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
 *                         NLA_POLICY_RANGE() macros.
 *    NLA_UINT,
 *    NLA_U8,
 *    NLA_U16,
 *    NLA_U32,
 *    NLA_U64              If the validation_type field instead is set to
 *                         NLA_VALIDATE_RANGE_PTR, `range' must be a pointer
 *                         to a struct netlink_range_validation that indicates
 *                         the min/max values.
 *                         Use NLA_POLICY_FULL_RANGE().
 *    NLA_SINT,
 *    NLA_S8,
 *    NLA_S16,
 *    NLA_S32,
 *    NLA_S64              If the validation_type field instead is set to
 *                         NLA_VALIDATE_RANGE_PTR, `range_signed' must be a
 *                         pointer to a struct netlink_range_validation_signed
 *                         that indicates the min/max values.
 *                         Use NLA_POLICY_FULL_RANGE_SIGNED().
 *
 *    NLA_BINARY           If the validation type is like the ones for integers
 *                         above, then the min/max length (not value like for
 *                         integers) of the attribute is enforced.
 *
 *    All other            Unused - but note that it's a union
 *
 * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
 *    NLA_BINARY           Validation function called for the attribute.
 *    All other            Unused - but note that it's a union
 *
 * Example:
 *
 * static const u32 myvalidflags = 0xff231023;
 *
 * static const struct nla_policy my_policy[ATTR_MAX+1] = {
 *         [ATTR_FOO] = { .type = NLA_U16 },
 *        [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
 *        [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
 *        [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
 * };
 */
struct nla_policy {
        u8                type;
        u8                validation_type;
        u16                len;
        union {
                /**
                 * @strict_start_type: first attribute to validate strictly
                 *
                 * This entry is special, and used for the attribute at index 0
                 * only, and specifies special data about the policy, namely it
                 * specifies the "boundary type" where strict length validation
                 * starts for any attribute types >= this value, also, strict
                 * nesting validation starts here.
                 *
                 * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
                 * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
                 * get the previous pure { .len = xyz } behaviour. The advantage
                 * of this is that types not specified in the policy will be
                 * rejected.
                 *
                 * For completely new families it should be set to 1 so that the
                 * validation is enforced for all attributes. For existing ones
                 * it should be set at least when new attributes are added to
                 * the enum used by the policy, and be set to the new value that
                 * was added to enforce strict validation from thereon.
                 */
                u16 strict_start_type;

                /* private: use NLA_POLICY_*() to set */
                const u32 bitfield32_valid;
                const u32 mask;
                const char *reject_message;
                const struct nla_policy *nested_policy;
                const struct netlink_range_validation *range;
                const struct netlink_range_validation_signed *range_signed;
                struct {
                        s16 min, max;
                };
                int (*validate)(const struct nlattr *attr,
                                struct netlink_ext_ack *extack);
        };
};

#define NLA_POLICY_ETH_ADDR                NLA_POLICY_EXACT_LEN(ETH_ALEN)
#define NLA_POLICY_ETH_ADDR_COMPAT        NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)

#define _NLA_POLICY_NESTED(maxattr, policy) \
        { .type = NLA_NESTED, .nested_policy = policy, .len = maxattr }
#define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
        { .type = NLA_NESTED_ARRAY, .nested_policy = policy, .len = maxattr }
#define NLA_POLICY_NESTED(policy) \
        _NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy)
#define NLA_POLICY_NESTED_ARRAY(policy) \
        _NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy)
#define NLA_POLICY_BITFIELD32(valid) \
        { .type = NLA_BITFIELD32, .bitfield32_valid = valid }

#define __NLA_IS_UINT_TYPE(tp)                                        \
        (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 ||        \
         tp == NLA_U64 || tp == NLA_UINT ||                        \
         tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_IS_SINT_TYPE(tp)                                                \
        (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \
         tp == NLA_SINT)

#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
#define NLA_ENSURE_UINT_TYPE(tp)                        \
        (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp)
#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)                \
        (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||        \
                      tp == NLA_MSECS ||                \
                      tp == NLA_BINARY) + tp)
#define NLA_ENSURE_SINT_TYPE(tp)                        \
        (__NLA_ENSURE(__NLA_IS_SINT_TYPE(tp)) + tp)
#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp)                \
        (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||                \
                      __NLA_IS_SINT_TYPE(tp) ||                \
                      tp == NLA_MSECS ||                \
                      tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp)                \
        (__NLA_ENSURE(tp != NLA_BITFIELD32 &&                \
                      tp != NLA_REJECT &&                \
                      tp != NLA_NESTED &&                \
                      tp != NLA_NESTED_ARRAY) + tp)

#define NLA_POLICY_RANGE(tp, _min, _max) {                \
        .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),        \
        .validation_type = NLA_VALIDATE_RANGE,                \
        .min = _min,                                        \
        .max = _max                                        \
}

#define NLA_POLICY_FULL_RANGE(tp, _range) {                \
        .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp),        \
        .validation_type = NLA_VALIDATE_RANGE_PTR,        \
        .range = _range,                                \
}

#define NLA_POLICY_FULL_RANGE_SIGNED(tp, _range) {        \
        .type = NLA_ENSURE_SINT_TYPE(tp),                \
        .validation_type = NLA_VALIDATE_RANGE_PTR,        \
        .range_signed = _range,                                \
}

#define NLA_POLICY_MIN(tp, _min) {                        \
        .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),        \
        .validation_type = NLA_VALIDATE_MIN,                \
        .min = _min,                                        \
}

#define NLA_POLICY_MAX(tp, _max) {                        \
        .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),        \
        .validation_type = NLA_VALIDATE_MAX,                \
        .max = _max,                                        \
}

#define NLA_POLICY_MASK(tp, _mask) {                        \
        .type = NLA_ENSURE_UINT_TYPE(tp),                \
        .validation_type = NLA_VALIDATE_MASK,                \
        .mask = _mask,                                        \
}

#define NLA_POLICY_VALIDATE_FN(tp, fn, ...) {                \
        .type = NLA_ENSURE_NO_VALIDATION_PTR(tp),        \
        .validation_type = NLA_VALIDATE_FUNCTION,        \
        .validate = fn,                                        \
        .len = __VA_ARGS__ + 0,                                \
}

#define NLA_POLICY_EXACT_LEN(_len)        NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
#define NLA_POLICY_EXACT_LEN_WARN(_len) {                        \
        .type = NLA_BINARY,                                        \
        .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG,        \
        .min = _len,                                                \
        .max = _len                                                \
}
#define NLA_POLICY_MIN_LEN(_len)        NLA_POLICY_MIN(NLA_BINARY, _len)

/**
 * struct nl_info - netlink source information
 * @nlh: Netlink message header of original request
 * @nl_net: Network namespace
 * @portid: Netlink PORTID of requesting application
 * @skip_notify: Skip netlink notifications to user space
 * @skip_notify_kernel: Skip selected in-kernel notifications
 */
struct nl_info {
        struct nlmsghdr                *nlh;
        struct net                *nl_net;
        u32                        portid;
        u8                        skip_notify:1,
                                skip_notify_kernel:1;
};

/**
 * enum netlink_validation - netlink message/attribute validation levels
 * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about
 *        extra data at the end of the message, attributes being longer than
 *        they should be, or unknown attributes being present.
 * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing.
 * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING
 *        this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict().
 * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy.
 *        This can safely be set by the kernel when the given policy has no
 *        NLA_UNSPEC anymore, and can thus be used to ensure policy entries
 *        are enforced going forward.
 * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g.
 *        U8, U16, U32 must have exact size, etc.)
 * @NL_VALIDATE_NESTED: Check that NLA_F_NESTED is set for NLA_NESTED(_ARRAY)
 *        and unset for other policies.
 */
enum netlink_validation {
        NL_VALIDATE_LIBERAL = 0,
        NL_VALIDATE_TRAILING = BIT(0),
        NL_VALIDATE_MAXTYPE = BIT(1),
        NL_VALIDATE_UNSPEC = BIT(2),
        NL_VALIDATE_STRICT_ATTRS = BIT(3),
        NL_VALIDATE_NESTED = BIT(4),
};

#define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\
                                       NL_VALIDATE_MAXTYPE)
#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\
                            NL_VALIDATE_MAXTYPE |\
                            NL_VALIDATE_UNSPEC |\
                            NL_VALIDATE_STRICT_ATTRS |\
                            NL_VALIDATE_NESTED)

int netlink_rcv_skb(struct sk_buff *skb,
                    int (*cb)(struct sk_buff *, struct nlmsghdr *,
                              struct netlink_ext_ack *));
int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
                 unsigned int group, int report, gfp_t flags);

int __nla_validate(const struct nlattr *head, int len, int maxtype,
                   const struct nla_policy *policy, unsigned int validate,
                   struct netlink_ext_ack *extack);
int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
                int len, const struct nla_policy *policy, unsigned int validate,
                struct netlink_ext_ack *extack);
int nla_policy_len(const struct nla_policy *, int);
struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize);
char *nla_strdup(const struct nlattr *nla, gfp_t flags);
int nla_memcpy(void *dest, const struct nlattr *src, int count);
int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
int nla_strcmp(const struct nlattr *nla, const char *str);
struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen);
struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
                                   int attrlen, int padattr);
void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen);
struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype,
                                 int attrlen, int padattr);
void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
               const void *data);
void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
                     const void *data, int padattr);
void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data);
int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
                  const void *data, int padattr);
int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data);
int nla_append(struct sk_buff *skb, int attrlen, const void *data);

/**************************************************************************
 * Netlink Messages
 **************************************************************************/

/**
 * nlmsg_msg_size - length of netlink message not including padding
 * @payload: length of message payload
 */
static inline int nlmsg_msg_size(int payload)
{
        return NLMSG_HDRLEN + payload;
}

/**
 * nlmsg_total_size - length of netlink message including padding
 * @payload: length of message payload
 */
static inline int nlmsg_total_size(int payload)
{
        return NLMSG_ALIGN(nlmsg_msg_size(payload));
}

/**
 * nlmsg_padlen - length of padding at the message's tail
 * @payload: length of message payload
 */
static inline int nlmsg_padlen(int payload)
{
        return nlmsg_total_size(payload) - nlmsg_msg_size(payload);
}

/**
 * nlmsg_data - head of message payload
 * @nlh: netlink message header
 */
static inline void *nlmsg_data(const struct nlmsghdr *nlh)
{
        return (unsigned char *) nlh + NLMSG_HDRLEN;
}

/**
 * nlmsg_len - length of message payload
 * @nlh: netlink message header
 */
static inline int nlmsg_len(const struct nlmsghdr *nlh)
{
        return nlh->nlmsg_len - NLMSG_HDRLEN;
}

/**
 * nlmsg_attrdata - head of attributes data
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 */
static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh,
                                            int hdrlen)
{
        unsigned char *data = nlmsg_data(nlh);
        return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen));
}

/**
 * nlmsg_attrlen - length of attributes data
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 */
static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen)
{
        return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen);
}

/**
 * nlmsg_ok - check if the netlink message fits into the remaining bytes
 * @nlh: netlink message header
 * @remaining: number of bytes remaining in message stream
 */
static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
{
        return (remaining >= (int) sizeof(struct nlmsghdr) &&
                nlh->nlmsg_len >= sizeof(struct nlmsghdr) &&
                nlh->nlmsg_len <= remaining);
}

/**
 * nlmsg_next - next netlink message in message stream
 * @nlh: netlink message header
 * @remaining: number of bytes remaining in message stream
 *
 * Returns the next netlink message in the message stream and
 * decrements remaining by the size of the current message.
 */
static inline struct nlmsghdr *
nlmsg_next(const struct nlmsghdr *nlh, int *remaining)
{
        int totlen = NLMSG_ALIGN(nlh->nlmsg_len);

        *remaining -= totlen;

        return (struct nlmsghdr *) ((unsigned char *) nlh + totlen);
}

/**
 * nla_parse - Parse a stream of attributes into a tb buffer
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @policy: validation policy
 * @extack: extended ACK pointer
 *
 * Parses a stream of attributes and stores a pointer to each attribute in
 * the tb array accessible via the attribute type. Attributes with a type
 * exceeding maxtype will be rejected, policy must be specified, attributes
 * will be validated in the strictest way possible.
 *
 * Returns 0 on success or a negative error code.
 */
static inline int nla_parse(struct nlattr **tb, int maxtype,
                            const struct nlattr *head, int len,
                            const struct nla_policy *policy,
                            struct netlink_ext_ack *extack)
{
        return __nla_parse(tb, maxtype, head, len, policy,
                           NL_VALIDATE_STRICT, extack);
}

/**
 * nla_parse_deprecated - Parse a stream of attributes into a tb buffer
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @policy: validation policy
 * @extack: extended ACK pointer
 *
 * Parses a stream of attributes and stores a pointer to each attribute in
 * the tb array accessible via the attribute type. Attributes with a type
 * exceeding maxtype will be ignored and attributes from the policy are not
 * always strictly validated (only for new attributes).
 *
 * Returns 0 on success or a negative error code.
 */
static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
                                       const struct nlattr *head, int len,
                                       const struct nla_policy *policy,
                                       struct netlink_ext_ack *extack)
{
        return __nla_parse(tb, maxtype, head, len, policy,
                           NL_VALIDATE_LIBERAL, extack);
}

/**
 * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @policy: validation policy
 * @extack: extended ACK pointer
 *
 * Parses a stream of attributes and stores a pointer to each attribute in
 * the tb array accessible via the attribute type. Attributes with a type
 * exceeding maxtype will be rejected as well as trailing data, but the
 * policy is not completely strictly validated (only for new attributes).
 *
 * Returns 0 on success or a negative error code.
 */
static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype,
                                              const struct nlattr *head,
                                              int len,
                                              const struct nla_policy *policy,
                                              struct netlink_ext_ack *extack)
{
        return __nla_parse(tb, maxtype, head, len, policy,
                           NL_VALIDATE_DEPRECATED_STRICT, extack);
}

/**
 * __nlmsg_parse - parse attributes of a netlink message
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @validate: validation strictness
 * @extack: extended ACK report struct
 *
 * See nla_parse()
 */
static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
                                struct nlattr *tb[], int maxtype,
                                const struct nla_policy *policy,
                                unsigned int validate,
                                struct netlink_ext_ack *extack)
{
        if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) {
                NL_SET_ERR_MSG(extack, "Invalid header length");
                return -EINVAL;
        }

        return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
                           nlmsg_attrlen(nlh, hdrlen), policy, validate,
                           extack);
}

/**
 * nlmsg_parse - parse attributes of a netlink message
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * See nla_parse()
 */
static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
                              struct nlattr *tb[], int maxtype,
                              const struct nla_policy *policy,
                              struct netlink_ext_ack *extack)
{
        return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
                             NL_VALIDATE_STRICT, extack);
}

/**
 * nlmsg_parse_deprecated - parse attributes of a netlink message
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * See nla_parse_deprecated()
 */
static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen,
                                         struct nlattr *tb[], int maxtype,
                                         const struct nla_policy *policy,
                                         struct netlink_ext_ack *extack)
{
        return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
                             NL_VALIDATE_LIBERAL, extack);
}

/**
 * nlmsg_parse_deprecated_strict - parse attributes of a netlink message
 * @nlh: netlink message header
 * @hdrlen: length of family specific header
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * See nla_parse_deprecated_strict()
 */
static inline int
nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
                              struct nlattr *tb[], int maxtype,
                              const struct nla_policy *policy,
                              struct netlink_ext_ack *extack)
{
        return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
                             NL_VALIDATE_DEPRECATED_STRICT, extack);
}

/**
 * nlmsg_find_attr - find a specific attribute in a netlink message
 * @nlh: netlink message header
 * @hdrlen: length of familiy specific header
 * @attrtype: type of attribute to look for
 *
 * Returns the first attribute which matches the specified type.
 */
static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
                                             int hdrlen, int attrtype)
{
        return nla_find(nlmsg_attrdata(nlh, hdrlen),
                        nlmsg_attrlen(nlh, hdrlen), attrtype);
}

/**
 * nla_validate_deprecated - Validate a stream of attributes
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * Validates all attributes in the specified attribute stream against the
 * specified policy. Validation is done in liberal mode.
 * See documenation of struct nla_policy for more details.
 *
 * Returns 0 on success or a negative error code.
 */
static inline int nla_validate_deprecated(const struct nlattr *head, int len,
                                          int maxtype,
                                          const struct nla_policy *policy,
                                          struct netlink_ext_ack *extack)
{
        return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL,
                              extack);
}

/**
 * nla_validate - Validate a stream of attributes
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * Validates all attributes in the specified attribute stream against the
 * specified policy. Validation is done in strict mode.
 * See documenation of struct nla_policy for more details.
 *
 * Returns 0 on success or a negative error code.
 */
static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
                               const struct nla_policy *policy,
                               struct netlink_ext_ack *extack)
{
        return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_STRICT,
                              extack);
}

/**
 * nlmsg_validate_deprecated - validate a netlink message including attributes
 * @nlh: netlinket message header
 * @hdrlen: length of familiy specific header
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 */
static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
                                            int hdrlen, int maxtype,
                                            const struct nla_policy *policy,
                                            struct netlink_ext_ack *extack)
{
        if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
                return -EINVAL;

        return __nla_validate(nlmsg_attrdata(nlh, hdrlen),
                              nlmsg_attrlen(nlh, hdrlen), maxtype,
                              policy, NL_VALIDATE_LIBERAL, extack);
}



/**
 * nlmsg_report - need to report back to application?
 * @nlh: netlink message header
 *
 * Returns 1 if a report back to the application is requested.
 */
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
        return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0;
}

/**
 * nlmsg_seq - return the seq number of netlink message
 * @nlh: netlink message header
 *
 * Returns 0 if netlink message is NULL
 */
static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
{
        return nlh ? nlh->nlmsg_seq : 0;
}

/**
 * nlmsg_for_each_attr - iterate over a stream of attributes
 * @pos: loop counter, set to current attribute
 * @nlh: netlink message header
 * @hdrlen: length of familiy specific header
 * @rem: initialized to len, holds bytes currently remaining in stream
 */
#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
        nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \
                          nlmsg_attrlen(nlh, hdrlen), rem)

/**
 * nlmsg_put - Add a new netlink message to an skb
 * @skb: socket buffer to store message in
 * @portid: netlink PORTID of requesting application
 * @seq: sequence number of message
 * @type: message type
 * @payload: length of message payload
 * @flags: message flags
 *
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the message header and payload.
 */
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
                                         int type, int payload, int flags)
{
        if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload)))
                return NULL;

        return __nlmsg_put(skb, portid, seq, type, payload, flags);
}

/**
 * nlmsg_append - Add more data to a nlmsg in a skb
 * @skb: socket buffer to store message in
 * @size: length of message payload
 *
 * Append data to an existing nlmsg, used when constructing a message
 * with multiple fixed-format headers (which is rare).
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the extra payload.
 */
static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
{
        if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size)))
                return NULL;

        if (NLMSG_ALIGN(size) - size)
                memset(skb_tail_pointer(skb) + size, 0,
                       NLMSG_ALIGN(size) - size);
        return __skb_put(skb, NLMSG_ALIGN(size));
}

/**
 * nlmsg_put_answer - Add a new callback based netlink message to an skb
 * @skb: socket buffer to store message in
 * @cb: netlink callback
 * @type: message type
 * @payload: length of message payload
 * @flags: message flags
 *
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the message header and payload.
 */
static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
                                                struct netlink_callback *cb,
                                                int type, int payload,
                                                int flags)
{
        return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                         type, payload, flags);
}

/**
 * nlmsg_new - Allocate a new netlink message
 * @payload: size of the message payload
 * @flags: the type of memory to allocate.
 *
 * Use NLMSG_DEFAULT_SIZE if the size of the payload isn't known
 * and a good default is needed.
 */
static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
{
        return alloc_skb(nlmsg_total_size(payload), flags);
}

/**
 * nlmsg_new_large - Allocate a new netlink message with non-contiguous
 * physical memory
 * @payload: size of the message payload
 *
 * The allocated skb is unable to have frag page for shinfo->frags*,
 * as the NULL setting for skb->head in netlink_skb_destructor() will
 * bypass most of the handling in skb_release_data()
 */
static inline struct sk_buff *nlmsg_new_large(size_t payload)
{
        return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
}

/**
 * nlmsg_end - Finalize a netlink message
 * @skb: socket buffer the message is stored in
 * @nlh: netlink message header
 *
 * Corrects the netlink message header to include the appeneded
 * attributes. Only necessary if attributes have been added to
 * the message.
 */
static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
{
        nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
}

/**
 * nlmsg_get_pos - return current position in netlink message
 * @skb: socket buffer the message is stored in
 *
 * Returns a pointer to the current tail of the message.
 */
static inline void *nlmsg_get_pos(struct sk_buff *skb)
{
        return skb_tail_pointer(skb);
}

/**
 * nlmsg_trim - Trim message to a mark
 * @skb: socket buffer the message is stored in
 * @mark: mark to trim to
 *
 * Trims the message to the provided mark.
 */
static inline void nlmsg_trim(struct sk_buff *skb, const void *mark)
{
        if (mark) {
                WARN_ON((unsigned char *) mark < skb->data);
                skb_trim(skb, (unsigned char *) mark - skb->data);
        }
}

/**
 * nlmsg_cancel - Cancel construction of a netlink message
 * @skb: socket buffer the message is stored in
 * @nlh: netlink message header
 *
 * Removes the complete netlink message including all
 * attributes from the socket buffer again.
 */
static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
{
        nlmsg_trim(skb, nlh);
}

/**
 * nlmsg_free - free a netlink message
 * @skb: socket buffer of netlink message
 */
static inline void nlmsg_free(struct sk_buff *skb)
{
        kfree_skb(skb);
}

/**
 * nlmsg_multicast_filtered - multicast a netlink message with filter function
 * @sk: netlink socket to spread messages to
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: multicast group id
 * @flags: allocation flags
 * @filter: filter function
 * @filter_data: filter function private data
 *
 * Return: 0 on success, negative error code for failure.
 */
static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb,
                                           u32 portid, unsigned int group,
                                           gfp_t flags,
                                           netlink_filter_fn filter,
                                           void *filter_data)
{
        int err;

        NETLINK_CB(skb).dst_group = group;

        err = netlink_broadcast_filtered(sk, skb, portid, group, flags,
                                         filter, filter_data);
        if (err > 0)
                err = 0;

        return err;
}

/**
 * nlmsg_multicast - multicast a netlink message
 * @sk: netlink socket to spread messages to
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: multicast group id
 * @flags: allocation flags
 */
static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
                                  u32 portid, unsigned int group, gfp_t flags)
{
        return nlmsg_multicast_filtered(sk, skb, portid, group, flags,
                                        NULL, NULL);
}

/**
 * nlmsg_unicast - unicast a netlink message
 * @sk: netlink socket to spread message to
 * @skb: netlink message as socket buffer
 * @portid: netlink portid of the destination socket
 */
static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid)
{
        int err;

        err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT);
        if (err > 0)
                err = 0;

        return err;
}

/**
 * nlmsg_for_each_msg - iterate over a stream of messages
 * @pos: loop counter, set to current message
 * @head: head of message stream
 * @len: length of message stream
 * @rem: initialized to len, holds bytes currently remaining in stream
 */
#define nlmsg_for_each_msg(pos, head, len, rem) \
        for (pos = head, rem = len; \
             nlmsg_ok(pos, rem); \
             pos = nlmsg_next(pos, &(rem)))

/**
 * nl_dump_check_consistent - check if sequence is consistent and advertise if not
 * @cb: netlink callback structure that stores the sequence number
 * @nlh: netlink message header to write the flag to
 *
 * This function checks if the sequence (generation) number changed during dump
 * and if it did, advertises it in the netlink message header.
 *
 * The correct way to use it is to set cb->seq to the generation counter when
 * all locks for dumping have been acquired, and then call this function for
 * each message that is generated.
 *
 * Note that due to initialisation concerns, 0 is an invalid sequence number
 * and must not be used by code that uses this functionality.
 */
static inline void
nl_dump_check_consistent(struct netlink_callback *cb,
                         struct nlmsghdr *nlh)
{
        if (cb->prev_seq && cb->seq != cb->prev_seq)
                nlh->nlmsg_flags |= NLM_F_DUMP_INTR;
        cb->prev_seq = cb->seq;
}

/**************************************************************************
 * Netlink Attributes
 **************************************************************************/

/**
 * nla_attr_size - length of attribute not including padding
 * @payload: length of payload
 */
static inline int nla_attr_size(int payload)
{
        return NLA_HDRLEN + payload;
}

/**
 * nla_total_size - total length of attribute including padding
 * @payload: length of payload
 */
static inline int nla_total_size(int payload)
{
        return NLA_ALIGN(nla_attr_size(payload));
}

/**
 * nla_padlen - length of padding at the tail of attribute
 * @payload: length of payload
 */
static inline int nla_padlen(int payload)
{
        return nla_total_size(payload) - nla_attr_size(payload);
}

/**
 * nla_type - attribute type
 * @nla: netlink attribute
 */
static inline int nla_type(const struct nlattr *nla)
{
        return nla->nla_type & NLA_TYPE_MASK;
}

/**
 * nla_data - head of payload
 * @nla: netlink attribute
 */
static inline void *nla_data(const struct nlattr *nla)
{
        return (char *) nla + NLA_HDRLEN;
}

/**
 * nla_len - length of payload
 * @nla: netlink attribute
 */
static inline u16 nla_len(const struct nlattr *nla)
{
        return nla->nla_len - NLA_HDRLEN;
}

/**
 * nla_ok - check if the netlink attribute fits into the remaining bytes
 * @nla: netlink attribute
 * @remaining: number of bytes remaining in attribute stream
 */
static inline int nla_ok(const struct nlattr *nla, int remaining)
{
        return remaining >= (int) sizeof(*nla) &&
               nla->nla_len >= sizeof(*nla) &&
               nla->nla_len <= remaining;
}

/**
 * nla_next - next netlink attribute in attribute stream
 * @nla: netlink attribute
 * @remaining: number of bytes remaining in attribute stream
 *
 * Returns the next netlink attribute in the attribute stream and
 * decrements remaining by the size of the current attribute.
 */
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
{
        unsigned int totlen = NLA_ALIGN(nla->nla_len);

        *remaining -= totlen;
        return (struct nlattr *) ((char *) nla + totlen);
}

/**
 * nla_find_nested - find attribute in a set of nested attributes
 * @nla: attribute containing the nested attributes
 * @attrtype: type of attribute to look for
 *
 * Returns the first attribute which matches the specified type.
 */
static inline struct nlattr *
nla_find_nested(const struct nlattr *nla, int attrtype)
{
        return nla_find(nla_data(nla), nla_len(nla), attrtype);
}

/**
 * nla_parse_nested - parse nested attributes
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @nla: attribute containing the nested attributes
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * See nla_parse()
 */
static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
                                   const struct nlattr *nla,
                                   const struct nla_policy *policy,
                                   struct netlink_ext_ack *extack)
{
        if (!(nla->nla_type & NLA_F_NESTED)) {
                NL_SET_ERR_MSG_ATTR(extack, nla, "NLA_F_NESTED is missing");
                return -EINVAL;
        }

        return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
                           NL_VALIDATE_STRICT, extack);
}

/**
 * nla_parse_nested_deprecated - parse nested attributes
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @nla: attribute containing the nested attributes
 * @policy: validation policy
 * @extack: extended ACK report struct
 *
 * See nla_parse_deprecated()
 */
static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype,
                                              const struct nlattr *nla,
                                              const struct nla_policy *policy,
                                              struct netlink_ext_ack *extack)
{
        return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy,
                           NL_VALIDATE_LIBERAL, extack);
}

/**
 * nla_put_u8 - Add a u8 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
{
        /* temporary variables to work around GCC PR81715 with asan-stack=1 */
        u8 tmp = value;

        return nla_put(skb, attrtype, sizeof(u8), &tmp);
}

/**
 * nla_put_u16 - Add a u16 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
{
        u16 tmp = value;

        return nla_put(skb, attrtype, sizeof(u16), &tmp);
}

/**
 * nla_put_be16 - Add a __be16 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
{
        __be16 tmp = value;

        return nla_put(skb, attrtype, sizeof(__be16), &tmp);
}

/**
 * nla_put_net16 - Add 16-bit network byte order netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value)
{
        __be16 tmp = value;

        return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp);
}

/**
 * nla_put_le16 - Add a __le16 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value)
{
        __le16 tmp = value;

        return nla_put(skb, attrtype, sizeof(__le16), &tmp);
}

/**
 * nla_put_u32 - Add a u32 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
{
        u32 tmp = value;

        return nla_put(skb, attrtype, sizeof(u32), &tmp);
}

/**
 * nla_put_uint - Add a variable-size unsigned int to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value)
{
        u64 tmp64 = value;
        u32 tmp32 = value;

        if (tmp64 == tmp32)
                return nla_put_u32(skb, attrtype, tmp32);
        return nla_put(skb, attrtype, sizeof(u64), &tmp64);
}

/**
 * nla_put_be32 - Add a __be32 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
{
        __be32 tmp = value;

        return nla_put(skb, attrtype, sizeof(__be32), &tmp);
}

/**
 * nla_put_net32 - Add 32-bit network byte order netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value)
{
        __be32 tmp = value;

        return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp);
}

/**
 * nla_put_le32 - Add a __le32 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value)
{
        __le32 tmp = value;

        return nla_put(skb, attrtype, sizeof(__le32), &tmp);
}

/**
 * nla_put_u64_64bit - Add a u64 netlink attribute to a skb and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 * @padattr: attribute type for the padding
 */
static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype,
                                    u64 value, int padattr)
{
        u64 tmp = value;

        return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr);
}

/**
 * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 * @padattr: attribute type for the padding
 */
static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
                               int padattr)
{
        __be64 tmp = value;

        return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr);
}

/**
 * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 * @padattr: attribute type for the padding
 */
static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value,
                                int padattr)
{
        __be64 tmp = value;

        return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp,
                            padattr);
}

/**
 * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 * @padattr: attribute type for the padding
 */
static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value,
                               int padattr)
{
        __le64 tmp = value;

        return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr);
}

/**
 * nla_put_s8 - Add a s8 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value)
{
        s8 tmp = value;

        return nla_put(skb, attrtype, sizeof(s8), &tmp);
}

/**
 * nla_put_s16 - Add a s16 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value)
{
        s16 tmp = value;

        return nla_put(skb, attrtype, sizeof(s16), &tmp);
}

/**
 * nla_put_s32 - Add a s32 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value)
{
        s32 tmp = value;

        return nla_put(skb, attrtype, sizeof(s32), &tmp);
}

/**
 * nla_put_s64 - Add a s64 netlink attribute to a socket buffer and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 * @padattr: attribute type for the padding
 */
static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value,
                              int padattr)
{
        s64 tmp = value;

        return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr);
}

/**
 * nla_put_sint - Add a variable-size signed int to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: numeric value
 */
static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value)
{
        s64 tmp64 = value;
        s32 tmp32 = value;

        if (tmp64 == tmp32)
                return nla_put_s32(skb, attrtype, tmp32);
        return nla_put(skb, attrtype, sizeof(s64), &tmp64);
}

/**
 * nla_put_string - Add a string netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @str: NUL terminated string
 */
static inline int nla_put_string(struct sk_buff *skb, int attrtype,
                                 const char *str)
{
        return nla_put(skb, attrtype, strlen(str) + 1, str);
}

/**
 * nla_put_flag - Add a flag netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 */
static inline int nla_put_flag(struct sk_buff *skb, int attrtype)
{
        return nla_put(skb, attrtype, 0, NULL);
}

/**
 * nla_put_msecs - Add a msecs netlink attribute to a skb and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @njiffies: number of jiffies to convert to msecs
 * @padattr: attribute type for the padding
 */
static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
                                unsigned long njiffies, int padattr)
{
        u64 tmp = jiffies_to_msecs(njiffies);

        return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr);
}

/**
 * nla_put_in_addr - Add an IPv4 address netlink attribute to a socket
 * buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @addr: IPv4 address
 */
static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype,
                                  __be32 addr)
{
        __be32 tmp = addr;

        return nla_put_be32(skb, attrtype, tmp);
}

/**
 * nla_put_in6_addr - Add an IPv6 address netlink attribute to a socket
 * buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @addr: IPv6 address
 */
static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
                                   const struct in6_addr *addr)
{
        return nla_put(skb, attrtype, sizeof(*addr), addr);
}

/**
 * nla_put_bitfield32 - Add a bitfield32 netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @value: value carrying bits
 * @selector: selector of valid bits
 */
static inline int nla_put_bitfield32(struct sk_buff *skb, int attrtype,
                                     __u32 value, __u32 selector)
{
        struct nla_bitfield32 tmp = { value, selector, };

        return nla_put(skb, attrtype, sizeof(tmp), &tmp);
}

/**
 * nla_get_u32 - return payload of u32 attribute
 * @nla: u32 netlink attribute
 */
static inline u32 nla_get_u32(const struct nlattr *nla)
{
        return *(u32 *) nla_data(nla);
}

/**
 * nla_get_be32 - return payload of __be32 attribute
 * @nla: __be32 netlink attribute
 */
static inline __be32 nla_get_be32(const struct nlattr *nla)
{
        return *(__be32 *) nla_data(nla);
}

/**
 * nla_get_le32 - return payload of __le32 attribute
 * @nla: __le32 netlink attribute
 */
static inline __le32 nla_get_le32(const struct nlattr *nla)
{
        return *(__le32 *) nla_data(nla);
}

/**
 * nla_get_u16 - return payload of u16 attribute
 * @nla: u16 netlink attribute
 */
static inline u16 nla_get_u16(const struct nlattr *nla)
{
        return *(u16 *) nla_data(nla);
}

/**
 * nla_get_be16 - return payload of __be16 attribute
 * @nla: __be16 netlink attribute
 */
static inline __be16 nla_get_be16(const struct nlattr *nla)
{
        return *(__be16 *) nla_data(nla);
}

/**
 * nla_get_le16 - return payload of __le16 attribute
 * @nla: __le16 netlink attribute
 */
static inline __le16 nla_get_le16(const struct nlattr *nla)
{
        return *(__le16 *) nla_data(nla);
}

/**
 * nla_get_u8 - return payload of u8 attribute
 * @nla: u8 netlink attribute
 */
static inline u8 nla_get_u8(const struct nlattr *nla)
{
        return *(u8 *) nla_data(nla);
}

/**
 * nla_get_u64 - return payload of u64 attribute
 * @nla: u64 netlink attribute
 */
static inline u64 nla_get_u64(const struct nlattr *nla)
{
        u64 tmp;

        nla_memcpy(&tmp, nla, sizeof(tmp));

        return tmp;
}

/**
 * nla_get_uint - return payload of uint attribute
 * @nla: uint netlink attribute
 */
static inline u64 nla_get_uint(const struct nlattr *nla)
{
        if (nla_len(nla) == sizeof(u32))
                return nla_get_u32(nla);
        return nla_get_u64(nla);
}

/**
 * nla_get_be64 - return payload of __be64 attribute
 * @nla: __be64 netlink attribute
 */
static inline __be64 nla_get_be64(const struct nlattr *nla)
{
        __be64 tmp;

        nla_memcpy(&tmp, nla, sizeof(tmp));

        return tmp;
}

/**
 * nla_get_le64 - return payload of __le64 attribute
 * @nla: __le64 netlink attribute
 */
static inline __le64 nla_get_le64(const struct nlattr *nla)
{
        return *(__le64 *) nla_data(nla);
}

/**
 * nla_get_s32 - return payload of s32 attribute
 * @nla: s32 netlink attribute
 */
static inline s32 nla_get_s32(const struct nlattr *nla)
{
        return *(s32 *) nla_data(nla);
}

/**
 * nla_get_s16 - return payload of s16 attribute
 * @nla: s16 netlink attribute
 */
static inline s16 nla_get_s16(const struct nlattr *nla)
{
        return *(s16 *) nla_data(nla);
}

/**
 * nla_get_s8 - return payload of s8 attribute
 * @nla: s8 netlink attribute
 */
static inline s8 nla_get_s8(const struct nlattr *nla)
{
        return *(s8 *) nla_data(nla);
}

/**
 * nla_get_s64 - return payload of s64 attribute
 * @nla: s64 netlink attribute
 */
static inline s64 nla_get_s64(const struct nlattr *nla)
{
        s64 tmp;

        nla_memcpy(&tmp, nla, sizeof(tmp));

        return tmp;
}

/**
 * nla_get_sint - return payload of uint attribute
 * @nla: uint netlink attribute
 */
static inline s64 nla_get_sint(const struct nlattr *nla)
{
        if (nla_len(nla) == sizeof(s32))
                return nla_get_s32(nla);
        return nla_get_s64(nla);
}

/**
 * nla_get_flag - return payload of flag attribute
 * @nla: flag netlink attribute
 */
static inline int nla_get_flag(const struct nlattr *nla)
{
        return !!nla;
}

/**
 * nla_get_msecs - return payload of msecs attribute
 * @nla: msecs netlink attribute
 *
 * Returns the number of milliseconds in jiffies.
 */
static inline unsigned long nla_get_msecs(const struct nlattr *nla)
{
        u64 msecs = nla_get_u64(nla);

        return msecs_to_jiffies((unsigned long) msecs);
}

/**
 * nla_get_in_addr - return payload of IPv4 address attribute
 * @nla: IPv4 address netlink attribute
 */
static inline __be32 nla_get_in_addr(const struct nlattr *nla)
{
        return *(__be32 *) nla_data(nla);
}

/**
 * nla_get_in6_addr - return payload of IPv6 address attribute
 * @nla: IPv6 address netlink attribute
 */
static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
{
        struct in6_addr tmp;

        nla_memcpy(&tmp, nla, sizeof(tmp));
        return tmp;
}

/**
 * nla_get_bitfield32 - return payload of 32 bitfield attribute
 * @nla: nla_bitfield32 attribute
 */
static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
{
        struct nla_bitfield32 tmp;

        nla_memcpy(&tmp, nla, sizeof(tmp));
        return tmp;
}

/**
 * nla_memdup - duplicate attribute memory (kmemdup)
 * @src: netlink attribute to duplicate from
 * @gfp: GFP mask
 */
static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp)
{
        return kmemdup(nla_data(src), nla_len(src), gfp);
}

/**
 * nla_nest_start_noflag - Start a new level of nested attributes
 * @skb: socket buffer to add attributes to
 * @attrtype: attribute type of container
 *
 * This function exists for backward compatibility to use in APIs which never
 * marked their nest attributes with NLA_F_NESTED flag. New APIs should use
 * nla_nest_start() which sets the flag.
 *
 * Returns the container attribute or NULL on error
 */
static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
                                                   int attrtype)
{
        struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);

        if (nla_put(skb, attrtype, 0, NULL) < 0)
                return NULL;

        return start;
}

/**
 * nla_nest_start - Start a new level of nested attributes, with NLA_F_NESTED
 * @skb: socket buffer to add attributes to
 * @attrtype: attribute type of container
 *
 * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED
 * flag. This is the preferred function to use in new code.
 *
 * Returns the container attribute or NULL on error
 */
static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
{
        return nla_nest_start_noflag(skb, attrtype | NLA_F_NESTED);
}

/**
 * nla_nest_end - Finalize nesting of attributes
 * @skb: socket buffer the attributes are stored in
 * @start: container attribute
 *
 * Corrects the container attribute header to include the all
 * appeneded attributes.
 *
 * Returns the total data length of the skb.
 */
static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
{
        start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
        return skb->len;
}

/**
 * nla_nest_cancel - Cancel nesting of attributes
 * @skb: socket buffer the message is stored in
 * @start: container attribute
 *
 * Removes the container attribute and including all nested
 * attributes. Returns -EMSGSIZE
 */
static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
{
        nlmsg_trim(skb, start);
}

/**
 * __nla_validate_nested - Validate a stream of nested attributes
 * @start: container attribute
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @validate: validation strictness
 * @extack: extended ACK report struct
 *
 * Validates all attributes in the nested attribute stream against the
 * specified policy. Attributes with a type exceeding maxtype will be
 * ignored. See documenation of struct nla_policy for more details.
 *
 * Returns 0 on success or a negative error code.
 */
static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
                                        const struct nla_policy *policy,
                                        unsigned int validate,
                                        struct netlink_ext_ack *extack)
{
        return __nla_validate(nla_data(start), nla_len(start), maxtype, policy,
                              validate, extack);
}

static inline int
nla_validate_nested(const struct nlattr *start, int maxtype,
                    const struct nla_policy *policy,
                    struct netlink_ext_ack *extack)
{
        return __nla_validate_nested(start, maxtype, policy,
                                     NL_VALIDATE_STRICT, extack);
}

static inline int
nla_validate_nested_deprecated(const struct nlattr *start, int maxtype,
                               const struct nla_policy *policy,
                               struct netlink_ext_ack *extack)
{
        return __nla_validate_nested(start, maxtype, policy,
                                     NL_VALIDATE_LIBERAL, extack);
}

/**
 * nla_need_padding_for_64bit - test 64-bit alignment of the next attribute
 * @skb: socket buffer the message is stored in
 *
 * Return true if padding is needed to align the next attribute (nla_data()) to
 * a 64-bit aligned area.
 */
static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
        /* The nlattr header is 4 bytes in size, that's why we test
         * if the skb->data _is_ aligned.  A NOP attribute, plus
         * nlattr header for next attribute, will make nla_data()
         * 8-byte aligned.
         */
        if (IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8))
                return true;
#endif
        return false;
}

/**
 * nla_align_64bit - 64-bit align the nla_data() of next attribute
 * @skb: socket buffer the message is stored in
 * @padattr: attribute type for the padding
 *
 * Conditionally emit a padding netlink attribute in order to make
 * the next attribute we emit have a 64-bit aligned nla_data() area.
 * This will only be done in architectures which do not have
 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined.
 *
 * Returns zero on success or a negative error code.
 */
static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
{
        if (nla_need_padding_for_64bit(skb) &&
            !nla_reserve(skb, padattr, 0))
                return -EMSGSIZE;

        return 0;
}

/**
 * nla_total_size_64bit - total length of attribute including padding
 * @payload: length of payload
 */
static inline int nla_total_size_64bit(int payload)
{
        return NLA_ALIGN(nla_attr_size(payload))
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
                + NLA_ALIGN(nla_attr_size(0))
#endif
                ;
}

/**
 * nla_for_each_attr - iterate over a stream of attributes
 * @pos: loop counter, set to current attribute
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @rem: initialized to len, holds bytes currently remaining in stream
 */
#define nla_for_each_attr(pos, head, len, rem) \
        for (pos = head, rem = len; \
             nla_ok(pos, rem); \
             pos = nla_next(pos, &(rem)))

/**
 * nla_for_each_nested - iterate over nested attributes
 * @pos: loop counter, set to current attribute
 * @nla: attribute containing the nested attributes
 * @rem: initialized to len, holds bytes currently remaining in stream
 */
#define nla_for_each_nested(pos, nla, rem) \
        nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)

/**
 * nla_is_last - Test if attribute is last in stream
 * @nla: attribute to test
 * @rem: bytes remaining in stream
 */
static inline bool nla_is_last(const struct nlattr *nla, int rem)
{
        return nla->nla_len == rem;
}

void nla_get_range_unsigned(const struct nla_policy *pt,
                            struct netlink_range_validation *range);
void nla_get_range_signed(const struct nla_policy *pt,
                          struct netlink_range_validation_signed *range);

struct netlink_policy_dump_state;

int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
                                   const struct nla_policy *policy,
                                   unsigned int maxtype);
int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
                                       const struct nla_policy *policy,
                                       unsigned int maxtype);
bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
int netlink_policy_dump_write(struct sk_buff *skb,
                              struct netlink_policy_dump_state *state);
int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt);
int netlink_policy_dump_write_attr(struct sk_buff *skb,
                                   const struct nla_policy *pt,
                                   int nestattr);
void netlink_policy_dump_free(struct netlink_policy_dump_state *state);

#endif







































































    1 
















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
/*
 * Copyright (c) 2016 Intel Corporation
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */

#include <linux/uaccess.h>

#include <drm/drm_drv.h>
#include <drm/drm_encoder.h>
#include <drm/drm_file.h>
#include <drm/drm_framebuffer.h>
#include <drm/drm_managed.h>
#include <drm/drm_mode_config.h>
#include <drm/drm_print.h>
#include <linux/dma-resv.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

int drm_modeset_register_all(struct drm_device *dev)
{
        int ret;

        ret = drm_plane_register_all(dev);
        if (ret)
                goto err_plane;

        ret = drm_crtc_register_all(dev);
        if  (ret)
                goto err_crtc;

        ret = drm_encoder_register_all(dev);
        if (ret)
                goto err_encoder;

        ret = drm_connector_register_all(dev);
        if (ret)
                goto err_connector;

        return 0;

err_connector:
        drm_encoder_unregister_all(dev);
err_encoder:
        drm_crtc_unregister_all(dev);
err_crtc:
        drm_plane_unregister_all(dev);
err_plane:
        return ret;
}

void drm_modeset_unregister_all(struct drm_device *dev)
{
        drm_connector_unregister_all(dev);
        drm_encoder_unregister_all(dev);
        drm_crtc_unregister_all(dev);
        drm_plane_unregister_all(dev);
}

/**
 * drm_mode_getresources - get graphics configuration
 * @dev: drm device for the ioctl
 * @data: data pointer for the ioctl
 * @file_priv: drm file for the ioctl call
 *
 * Construct a set of configuration description structures and return
 * them to the user, including CRTC, connector and framebuffer configuration.
 *
 * Called by the user via ioctl.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_getresources(struct drm_device *dev, void *data,
                          struct drm_file *file_priv)
{
        struct drm_mode_card_res *card_res = data;
        struct drm_framebuffer *fb;
        struct drm_connector *connector;
        struct drm_crtc *crtc;
        struct drm_encoder *encoder;
        int count, ret = 0;
        uint32_t __user *fb_id;
        uint32_t __user *crtc_id;
        uint32_t __user *connector_id;
        uint32_t __user *encoder_id;
        struct drm_connector_list_iter conn_iter;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        mutex_lock(&file_priv->fbs_lock);
        count = 0;
        fb_id = u64_to_user_ptr(card_res->fb_id_ptr);
        list_for_each_entry(fb, &file_priv->fbs, filp_head) {
                if (count < card_res->count_fbs &&
                    put_user(fb->base.id, fb_id + count)) {
                        mutex_unlock(&file_priv->fbs_lock);
                        return -EFAULT;
                }
                count++;
        }
        card_res->count_fbs = count;
        mutex_unlock(&file_priv->fbs_lock);

        card_res->max_height = dev->mode_config.max_height;
        card_res->min_height = dev->mode_config.min_height;
        card_res->max_width = dev->mode_config.max_width;
        card_res->min_width = dev->mode_config.min_width;

        count = 0;
        crtc_id = u64_to_user_ptr(card_res->crtc_id_ptr);
        drm_for_each_crtc(crtc, dev) {
                if (drm_lease_held(file_priv, crtc->base.id)) {
                        if (count < card_res->count_crtcs &&
                            put_user(crtc->base.id, crtc_id + count))
                                return -EFAULT;
                        count++;
                }
        }
        card_res->count_crtcs = count;

        count = 0;
        encoder_id = u64_to_user_ptr(card_res->encoder_id_ptr);
        drm_for_each_encoder(encoder, dev) {
                if (count < card_res->count_encoders &&
                    put_user(encoder->base.id, encoder_id + count))
                        return -EFAULT;
                count++;
        }
        card_res->count_encoders = count;

        drm_connector_list_iter_begin(dev, &conn_iter);
        count = 0;
        connector_id = u64_to_user_ptr(card_res->connector_id_ptr);
        drm_for_each_connector_iter(connector, &conn_iter) {
                /* only expose writeback connectors if userspace understands them */
                if (!file_priv->writeback_connectors &&
                    (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK))
                        continue;

                if (drm_lease_held(file_priv, connector->base.id)) {
                        if (count < card_res->count_connectors &&
                            put_user(connector->base.id, connector_id + count)) {
                                drm_connector_list_iter_end(&conn_iter);
                                return -EFAULT;
                        }
                        count++;
                }
        }
        card_res->count_connectors = count;
        drm_connector_list_iter_end(&conn_iter);

        return ret;
}

/**
 * drm_mode_config_reset - call ->reset callbacks
 * @dev: drm device
 *
 * This functions calls all the crtc's, encoder's and connector's ->reset
 * callback. Drivers can use this in e.g. their driver load or resume code to
 * reset hardware and software state.
 */
void drm_mode_config_reset(struct drm_device *dev)
{
        struct drm_crtc *crtc;
        struct drm_plane *plane;
        struct drm_encoder *encoder;
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;

        drm_for_each_plane(plane, dev)
                if (plane->funcs->reset)
                        plane->funcs->reset(plane);

        drm_for_each_crtc(crtc, dev)
                if (crtc->funcs->reset)
                        crtc->funcs->reset(crtc);

        drm_for_each_encoder(encoder, dev)
                if (encoder->funcs && encoder->funcs->reset)
                        encoder->funcs->reset(encoder);

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter)
                if (connector->funcs->reset)
                        connector->funcs->reset(connector);
        drm_connector_list_iter_end(&conn_iter);
}
EXPORT_SYMBOL(drm_mode_config_reset);

/*
 * Global properties
 */
static const struct drm_prop_enum_list drm_plane_type_enum_list[] = {
        { DRM_PLANE_TYPE_OVERLAY, "Overlay" },
        { DRM_PLANE_TYPE_PRIMARY, "Primary" },
        { DRM_PLANE_TYPE_CURSOR, "Cursor" },
};

static int drm_mode_create_standard_properties(struct drm_device *dev)
{
        struct drm_property *prop;
        int ret;

        ret = drm_connector_create_standard_properties(dev);
        if (ret)
                return ret;

        prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
                                        "type", drm_plane_type_enum_list,
                                        ARRAY_SIZE(drm_plane_type_enum_list));
        if (!prop)
                return -ENOMEM;
        dev->mode_config.plane_type_property = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "SRC_X", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_src_x = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "SRC_Y", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_src_y = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "SRC_W", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_src_w = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "SRC_H", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_src_h = prop;

        prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC,
                        "CRTC_X", INT_MIN, INT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_crtc_x = prop;

        prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC,
                        "CRTC_Y", INT_MIN, INT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_crtc_y = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "CRTC_W", 0, INT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_crtc_w = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "CRTC_H", 0, INT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_crtc_h = prop;

        prop = drm_property_create_object(dev, DRM_MODE_PROP_ATOMIC,
                        "FB_ID", DRM_MODE_OBJECT_FB);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_fb_id = prop;

        prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC,
                        "IN_FENCE_FD", -1, INT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_in_fence_fd = prop;

        prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
                        "OUT_FENCE_PTR", 0, U64_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_out_fence_ptr = prop;

        prop = drm_property_create_object(dev, DRM_MODE_PROP_ATOMIC,
                        "CRTC_ID", DRM_MODE_OBJECT_CRTC);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_crtc_id = prop;

        prop = drm_property_create(dev,
                        DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB,
                        "FB_DAMAGE_CLIPS", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_fb_damage_clips = prop;

        prop = drm_property_create_bool(dev, DRM_MODE_PROP_ATOMIC,
                        "ACTIVE");
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_active = prop;

        prop = drm_property_create(dev,
                        DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB,
                        "MODE_ID", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_mode_id = prop;

        prop = drm_property_create_bool(dev, 0,
                        "VRR_ENABLED");
        if (!prop)
                return -ENOMEM;
        dev->mode_config.prop_vrr_enabled = prop;

        prop = drm_property_create(dev,
                        DRM_MODE_PROP_BLOB,
                        "DEGAMMA_LUT", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.degamma_lut_property = prop;

        prop = drm_property_create_range(dev,
                        DRM_MODE_PROP_IMMUTABLE,
                        "DEGAMMA_LUT_SIZE", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.degamma_lut_size_property = prop;

        prop = drm_property_create(dev,
                        DRM_MODE_PROP_BLOB,
                        "CTM", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.ctm_property = prop;

        prop = drm_property_create(dev,
                        DRM_MODE_PROP_BLOB,
                        "GAMMA_LUT", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.gamma_lut_property = prop;

        prop = drm_property_create_range(dev,
                        DRM_MODE_PROP_IMMUTABLE,
                        "GAMMA_LUT_SIZE", 0, UINT_MAX);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.gamma_lut_size_property = prop;

        prop = drm_property_create(dev,
                                   DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_BLOB,
                                   "IN_FORMATS", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.modifiers_property = prop;

        return 0;
}

static void drm_mode_config_init_release(struct drm_device *dev, void *ptr)
{
        drm_mode_config_cleanup(dev);
}

/**
 * drmm_mode_config_init - managed DRM mode_configuration structure
 *         initialization
 * @dev: DRM device
 *
 * Initialize @dev's mode_config structure, used for tracking the graphics
 * configuration of @dev.
 *
 * Since this initializes the modeset locks, no locking is possible. Which is no
 * problem, since this should happen single threaded at init time. It is the
 * driver's problem to ensure this guarantee.
 *
 * Cleanup is automatically handled through registering drm_mode_config_cleanup
 * with drmm_add_action().
 *
 * Returns: 0 on success, negative error value on failure.
 */
int drmm_mode_config_init(struct drm_device *dev)
{
        int ret;

        mutex_init(&dev->mode_config.mutex);
        drm_modeset_lock_init(&dev->mode_config.connection_mutex);
        mutex_init(&dev->mode_config.idr_mutex);
        mutex_init(&dev->mode_config.fb_lock);
        mutex_init(&dev->mode_config.blob_lock);
        INIT_LIST_HEAD(&dev->mode_config.fb_list);
        INIT_LIST_HEAD(&dev->mode_config.crtc_list);
        INIT_LIST_HEAD(&dev->mode_config.connector_list);
        INIT_LIST_HEAD(&dev->mode_config.encoder_list);
        INIT_LIST_HEAD(&dev->mode_config.property_list);
        INIT_LIST_HEAD(&dev->mode_config.property_blob_list);
        INIT_LIST_HEAD(&dev->mode_config.plane_list);
        INIT_LIST_HEAD(&dev->mode_config.privobj_list);
        idr_init_base(&dev->mode_config.object_idr, 1);
        idr_init_base(&dev->mode_config.tile_idr, 1);
        ida_init(&dev->mode_config.connector_ida);
        spin_lock_init(&dev->mode_config.connector_list_lock);

        init_llist_head(&dev->mode_config.connector_free_list);
        INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn);

        ret = drm_mode_create_standard_properties(dev);
        if (ret) {
                drm_mode_config_cleanup(dev);
                return ret;
        }

        /* Just to be sure */
        dev->mode_config.num_fb = 0;
        dev->mode_config.num_connector = 0;
        dev->mode_config.num_crtc = 0;
        dev->mode_config.num_encoder = 0;
        dev->mode_config.num_total_plane = 0;

        if (IS_ENABLED(CONFIG_LOCKDEP)) {
                struct drm_modeset_acquire_ctx modeset_ctx;
                struct ww_acquire_ctx resv_ctx;
                struct dma_resv resv;
                int ret;

                dma_resv_init(&resv);

                drm_modeset_acquire_init(&modeset_ctx, 0);
                ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
                                       &modeset_ctx);
                if (ret == -EDEADLK)
                        ret = drm_modeset_backoff(&modeset_ctx);

                ww_acquire_init(&resv_ctx, &reservation_ww_class);
                ret = dma_resv_lock(&resv, &resv_ctx);
                if (ret == -EDEADLK)
                        dma_resv_lock_slow(&resv, &resv_ctx);

                dma_resv_unlock(&resv);
                ww_acquire_fini(&resv_ctx);

                drm_modeset_drop_locks(&modeset_ctx);
                drm_modeset_acquire_fini(&modeset_ctx);
                dma_resv_fini(&resv);
        }

        return drmm_add_action_or_reset(dev, drm_mode_config_init_release,
                                        NULL);
}
EXPORT_SYMBOL(drmm_mode_config_init);

/**
 * drm_mode_config_cleanup - free up DRM mode_config info
 * @dev: DRM device
 *
 * Free up all the connectors and CRTCs associated with this DRM device, then
 * free up the framebuffers and associated buffer objects.
 *
 * Note that since this /should/ happen single-threaded at driver/device
 * teardown time, no locking is required. It's the driver's job to ensure that
 * this guarantee actually holds true.
 *
 * FIXME: With the managed drmm_mode_config_init() it is no longer necessary for
 * drivers to explicitly call this function.
 */
void drm_mode_config_cleanup(struct drm_device *dev)
{
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;
        struct drm_crtc *crtc, *ct;
        struct drm_encoder *encoder, *enct;
        struct drm_framebuffer *fb, *fbt;
        struct drm_property *property, *pt;
        struct drm_property_blob *blob, *bt;
        struct drm_plane *plane, *plt;

        list_for_each_entry_safe(encoder, enct, &dev->mode_config.encoder_list,
                                 head) {
                encoder->funcs->destroy(encoder);
        }

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                /* drm_connector_list_iter holds an full reference to the
                 * current connector itself, which means it is inherently safe
                 * against unreferencing the current connector - but not against
                 * deleting it right away. */
                drm_connector_put(connector);
        }
        drm_connector_list_iter_end(&conn_iter);
        /* connector_iter drops references in a work item. */
        flush_work(&dev->mode_config.connector_free_work);
        if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) {
                drm_connector_list_iter_begin(dev, &conn_iter);
                drm_for_each_connector_iter(connector, &conn_iter)
                        DRM_ERROR("connector %s leaked!\n", connector->name);
                drm_connector_list_iter_end(&conn_iter);
        }

        list_for_each_entry_safe(property, pt, &dev->mode_config.property_list,
                                 head) {
                drm_property_destroy(dev, property);
        }

        list_for_each_entry_safe(plane, plt, &dev->mode_config.plane_list,
                                 head) {
                plane->funcs->destroy(plane);
        }

        list_for_each_entry_safe(crtc, ct, &dev->mode_config.crtc_list, head) {
                crtc->funcs->destroy(crtc);
        }

        list_for_each_entry_safe(blob, bt, &dev->mode_config.property_blob_list,
                                 head_global) {
                drm_property_blob_put(blob);
        }

        /*
         * Single-threaded teardown context, so it's not required to grab the
         * fb_lock to protect against concurrent fb_list access. Contrary, it
         * would actually deadlock with the drm_framebuffer_cleanup function.
         *
         * Also, if there are any framebuffers left, that's a driver leak now,
         * so politely WARN about this.
         */
        WARN_ON(!list_empty(&dev->mode_config.fb_list));
        list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) {
                struct drm_printer p = drm_dbg_printer(dev, DRM_UT_KMS, "[leaked fb]");

                drm_printf(&p, "framebuffer[%u]:\n", fb->base.id);
                drm_framebuffer_print_info(&p, 1, fb);
                drm_framebuffer_free(&fb->base.refcount);
        }

        ida_destroy(&dev->mode_config.connector_ida);
        idr_destroy(&dev->mode_config.tile_idr);
        idr_destroy(&dev->mode_config.object_idr);
        drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
}
EXPORT_SYMBOL(drm_mode_config_cleanup);

static u32 full_encoder_mask(struct drm_device *dev)
{
        struct drm_encoder *encoder;
        u32 encoder_mask = 0;

        drm_for_each_encoder(encoder, dev)
                encoder_mask |= drm_encoder_mask(encoder);

        return encoder_mask;
}

/*
 * For some reason we want the encoder itself included in
 * possible_clones. Make life easy for drivers by allowing them
 * to leave possible_clones unset if no cloning is possible.
 */
static void fixup_encoder_possible_clones(struct drm_encoder *encoder)
{
        if (encoder->possible_clones == 0)
                encoder->possible_clones = drm_encoder_mask(encoder);
}

static void validate_encoder_possible_clones(struct drm_encoder *encoder)
{
        struct drm_device *dev = encoder->dev;
        u32 encoder_mask = full_encoder_mask(dev);
        struct drm_encoder *other;

        drm_for_each_encoder(other, dev) {
                WARN(!!(encoder->possible_clones & drm_encoder_mask(other)) !=
                     !!(other->possible_clones & drm_encoder_mask(encoder)),
                     "possible_clones mismatch: "
                     "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x vs. "
                     "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x\n",
                     encoder->base.id, encoder->name,
                     drm_encoder_mask(encoder), encoder->possible_clones,
                     other->base.id, other->name,
                     drm_encoder_mask(other), other->possible_clones);
        }

        WARN((encoder->possible_clones & drm_encoder_mask(encoder)) == 0 ||
             (encoder->possible_clones & ~encoder_mask) != 0,
             "Bogus possible_clones: "
             "[ENCODER:%d:%s] possible_clones=0x%x (full encoder mask=0x%x)\n",
             encoder->base.id, encoder->name,
             encoder->possible_clones, encoder_mask);
}

static u32 full_crtc_mask(struct drm_device *dev)
{
        struct drm_crtc *crtc;
        u32 crtc_mask = 0;

        drm_for_each_crtc(crtc, dev)
                crtc_mask |= drm_crtc_mask(crtc);

        return crtc_mask;
}

static void validate_encoder_possible_crtcs(struct drm_encoder *encoder)
{
        u32 crtc_mask = full_crtc_mask(encoder->dev);

        WARN((encoder->possible_crtcs & crtc_mask) == 0 ||
             (encoder->possible_crtcs & ~crtc_mask) != 0,
             "Bogus possible_crtcs: "
             "[ENCODER:%d:%s] possible_crtcs=0x%x (full crtc mask=0x%x)\n",
             encoder->base.id, encoder->name,
             encoder->possible_crtcs, crtc_mask);
}

void drm_mode_config_validate(struct drm_device *dev)
{
        struct drm_encoder *encoder;
        struct drm_crtc *crtc;
        struct drm_plane *plane;
        u32 primary_with_crtc = 0, cursor_with_crtc = 0;
        unsigned int num_primary = 0;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        drm_for_each_encoder(encoder, dev)
                fixup_encoder_possible_clones(encoder);

        drm_for_each_encoder(encoder, dev) {
                validate_encoder_possible_clones(encoder);
                validate_encoder_possible_crtcs(encoder);
        }

        drm_for_each_crtc(crtc, dev) {
                WARN(!crtc->primary, "Missing primary plane on [CRTC:%d:%s]\n",
                     crtc->base.id, crtc->name);

                WARN(crtc->cursor && crtc->funcs->cursor_set,
                     "[CRTC:%d:%s] must not have both a cursor plane and a cursor_set func",
                     crtc->base.id, crtc->name);
                WARN(crtc->cursor && crtc->funcs->cursor_set2,
                     "[CRTC:%d:%s] must not have both a cursor plane and a cursor_set2 func",
                     crtc->base.id, crtc->name);
                WARN(crtc->cursor && crtc->funcs->cursor_move,
                     "[CRTC:%d:%s] must not have both a cursor plane and a cursor_move func",
                     crtc->base.id, crtc->name);

                if (crtc->primary) {
                        WARN(!(crtc->primary->possible_crtcs & drm_crtc_mask(crtc)),
                             "Bogus primary plane possible_crtcs: [PLANE:%d:%s] must be compatible with [CRTC:%d:%s]\n",
                             crtc->primary->base.id, crtc->primary->name,
                             crtc->base.id, crtc->name);
                        WARN(primary_with_crtc & drm_plane_mask(crtc->primary),
                             "Primary plane [PLANE:%d:%s] used for multiple CRTCs",
                             crtc->primary->base.id, crtc->primary->name);
                        primary_with_crtc |= drm_plane_mask(crtc->primary);
                }
                if (crtc->cursor) {
                        WARN(!(crtc->cursor->possible_crtcs & drm_crtc_mask(crtc)),
                             "Bogus cursor plane possible_crtcs: [PLANE:%d:%s] must be compatible with [CRTC:%d:%s]\n",
                             crtc->cursor->base.id, crtc->cursor->name,
                             crtc->base.id, crtc->name);
                        WARN(cursor_with_crtc & drm_plane_mask(crtc->cursor),
                             "Cursor plane [PLANE:%d:%s] used for multiple CRTCs",
                             crtc->cursor->base.id, crtc->cursor->name);
                        cursor_with_crtc |= drm_plane_mask(crtc->cursor);
                }
        }

        drm_for_each_plane(plane, dev) {
                if (plane->type == DRM_PLANE_TYPE_PRIMARY)
                        num_primary++;
        }

        WARN(num_primary != dev->mode_config.num_crtc,
             "Must have as many primary planes as there are CRTCs, but have %u primary planes and %u CRTCs",
             num_primary, dev->mode_config.num_crtc);
}

































































































    5 

    5 

    5 
    5 






















































































































































































   90 

   89 



   90 
   90 



   89 



   90 








   89 




   90 
   89 


















































   90 


   90 


   90 


   87 
   88 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
// SPDX-License-Identifier: GPL-2.0
/*
 *  mm/pgtable-generic.c
 *
 *  Generic pgtable methods declared in linux/pgtable.h
 *
 *  Copyright (C) 2010  Linus Torvalds
 */

#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mm_inline.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>

/*
 * If a p?d_bad entry is found while walking page tables, report
 * the error, before resetting entry to p?d_none.  Usually (but
 * very seldom) called out from the p?d_none_or_clear_bad macros.
 */

void pgd_clear_bad(pgd_t *pgd)
{
        pgd_ERROR(*pgd);
        pgd_clear(pgd);
}

#ifndef __PAGETABLE_P4D_FOLDED
void p4d_clear_bad(p4d_t *p4d)
{
        p4d_ERROR(*p4d);
        p4d_clear(p4d);
}
#endif

#ifndef __PAGETABLE_PUD_FOLDED
void pud_clear_bad(pud_t *pud)
{
        pud_ERROR(*pud);
        pud_clear(pud);
}
#endif

/*
 * Note that the pmd variant below can't be stub'ed out just as for p4d/pud
 * above. pmd folding is special and typically pmd_* macros refer to upper
 * level even when folded
 */
void pmd_clear_bad(pmd_t *pmd)
{
        pmd_ERROR(*pmd);
        pmd_clear(pmd);
}

#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
 * Only sets the access flags (dirty, accessed), as well as write
 * permission. Furthermore, we know it always gets set to a "more
 * permissive" setting, which allows most architectures to optimize
 * this. We return whether the PTE actually changed, which in turn
 * instructs the caller to do things like update__mmu_cache.  This
 * used to be done in the caller, but sparc needs minor faults to
 * force that call on sun4c so we changed this macro slightly
 */
int ptep_set_access_flags(struct vm_area_struct *vma,
                          unsigned long address, pte_t *ptep,
                          pte_t entry, int dirty)
{
        int changed = !pte_same(ptep_get(ptep), entry);
        if (changed) {
                set_pte_at(vma->vm_mm, address, ptep, entry);
                flush_tlb_fix_spurious_fault(vma, address, ptep);
        }
        return changed;
}
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
int ptep_clear_flush_young(struct vm_area_struct *vma,
                           unsigned long address, pte_t *ptep)
{
        int young;
        young = ptep_test_and_clear_young(vma, address, ptep);
        if (young)
                flush_tlb_page(vma, address);
        return young;
}
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
                       pte_t *ptep)
{
        struct mm_struct *mm = (vma)->vm_mm;
        pte_t pte;
        pte = ptep_get_and_clear(mm, address, ptep);
        if (pte_accessible(mm, pte))
                flush_tlb_page(vma, address);
        return pte;
}
#endif

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
int pmdp_set_access_flags(struct vm_area_struct *vma,
                          unsigned long address, pmd_t *pmdp,
                          pmd_t entry, int dirty)
{
        int changed = !pmd_same(*pmdp, entry);
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
        if (changed) {
                set_pmd_at(vma->vm_mm, address, pmdp, entry);
                flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        }
        return changed;
}
#endif

#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
int pmdp_clear_flush_young(struct vm_area_struct *vma,
                           unsigned long address, pmd_t *pmdp)
{
        int young;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
        young = pmdp_test_and_clear_young(vma, address, pmdp);
        if (young)
                flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return young;
}
#endif

#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
                            pmd_t *pmdp)
{
        pmd_t pmd;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
        VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
                           !pmd_devmap(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
                            pud_t *pudp)
{
        pud_t pud;

        VM_BUG_ON(address & ~HPAGE_PUD_MASK);
        VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp));
        pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
        flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
        return pud;
}
#endif
#endif

#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
                                pgtable_t pgtable)
{
        assert_spin_locked(pmd_lockptr(mm, pmdp));

        /* FIFO */
        if (!pmd_huge_pte(mm, pmdp))
                INIT_LIST_HEAD(&pgtable->lru);
        else
                list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru);
        pmd_huge_pte(mm, pmdp) = pgtable;
}
#endif

#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
/* no "address" argument so destroys page coloring of some arch */
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
        pgtable_t pgtable;

        assert_spin_locked(pmd_lockptr(mm, pmdp));

        /* FIFO */
        pgtable = pmd_huge_pte(mm, pmdp);
        pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru,
                                                          struct page, lru);
        if (pmd_huge_pte(mm, pmdp))
                list_del(&pgtable->lru);
        return pgtable;
}
#endif

#ifndef __HAVE_ARCH_PMDP_INVALIDATE
pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
{
        pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return old;
}
#endif

#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
                         pmd_t *pmdp)
{
        return pmdp_invalidate(vma, address, pmdp);
}
#endif

#ifndef pmdp_collapse_flush
pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
                          pmd_t *pmdp)
{
        /*
         * pmd and hugepage pte format are same. So we could
         * use the same function.
         */
        pmd_t pmd;

        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
        VM_BUG_ON(pmd_trans_huge(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);

        /* collapse entails shooting down ptes not pmd */
        flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
}
#endif

/* arch define pte_free_defer in asm/pgalloc.h for its own implementation */
#ifndef pte_free_defer
static void pte_free_now(struct rcu_head *head)
{
        struct page *page;

        page = container_of(head, struct page, rcu_head);
        pte_free(NULL /* mm not passed and not used */, (pgtable_t)page);
}

void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
{
        struct page *page;

        page = pgtable;
        call_rcu(&page->rcu_head, pte_free_now);
}
#endif /* pte_free_defer */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#if defined(CONFIG_GUP_GET_PXX_LOW_HIGH) && \
        (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RCU))
/*
 * See the comment above ptep_get_lockless() in include/linux/pgtable.h:
 * the barriers in pmdp_get_lockless() cannot guarantee that the value in
 * pmd_high actually belongs with the value in pmd_low; but holding interrupts
 * off blocks the TLB flush between present updates, which guarantees that a
 * successful __pte_offset_map() points to a page from matched halves.
 */
static unsigned long pmdp_get_lockless_start(void)
{
        unsigned long irqflags;

        local_irq_save(irqflags);
        return irqflags;
}
static void pmdp_get_lockless_end(unsigned long irqflags)
{
        local_irq_restore(irqflags);
}
#else
static unsigned long pmdp_get_lockless_start(void) { return 0; }
static void pmdp_get_lockless_end(unsigned long irqflags) { }
#endif

pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
{
        unsigned long irqflags;
        pmd_t pmdval;

        rcu_read_lock();
        irqflags = pmdp_get_lockless_start();
        pmdval = pmdp_get_lockless(pmd);
        pmdp_get_lockless_end(irqflags);

        if (pmdvalp)
                *pmdvalp = pmdval;
        if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval)))
                goto nomap;
        if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval)))
                goto nomap;
        if (unlikely(pmd_bad(pmdval))) {
                pmd_clear_bad(pmd);
                goto nomap;
        }
        return __pte_map(&pmdval, addr);
nomap:
        rcu_read_unlock();
        return NULL;
}

pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
                             unsigned long addr, spinlock_t **ptlp)
{
        pmd_t pmdval;
        pte_t *pte;

        pte = __pte_offset_map(pmd, addr, &pmdval);
        if (likely(pte))
                *ptlp = pte_lockptr(mm, &pmdval);
        return pte;
}

/*
 * pte_offset_map_lock(mm, pmd, addr, ptlp), and its internal implementation
 * __pte_offset_map_lock() below, is usually called with the pmd pointer for
 * addr, reached by walking down the mm's pgd, p4d, pud for addr: either while
 * holding mmap_lock or vma lock for read or for write; or in truncate or rmap
 * context, while holding file's i_mmap_lock or anon_vma lock for read (or for
 * write). In a few cases, it may be used with pmd pointing to a pmd_t already
 * copied to or constructed on the stack.
 *
 * When successful, it returns the pte pointer for addr, with its page table
 * kmapped if necessary (when CONFIG_HIGHPTE), and locked against concurrent
 * modification by software, with a pointer to that spinlock in ptlp (in some
 * configs mm->page_table_lock, in SPLIT_PTLOCK configs a spinlock in table's
 * struct page).  pte_unmap_unlock(pte, ptl) to unlock and unmap afterwards.
 *
 * But it is unsuccessful, returning NULL with *ptlp unchanged, if there is no
 * page table at *pmd: if, for example, the page table has just been removed,
 * or replaced by the huge pmd of a THP.  (When successful, *pmd is rechecked
 * after acquiring the ptlock, and retried internally if it changed: so that a
 * page table can be safely removed or replaced by THP while holding its lock.)
 *
 * pte_offset_map(pmd, addr), and its internal helper __pte_offset_map() above,
 * just returns the pte pointer for addr, its page table kmapped if necessary;
 * or NULL if there is no page table at *pmd.  It does not attempt to lock the
 * page table, so cannot normally be used when the page table is to be updated,
 * or when entries read must be stable.  But it does take rcu_read_lock(): so
 * that even when page table is racily removed, it remains a valid though empty
 * and disconnected table.  Until pte_unmap(pte) unmaps and rcu_read_unlock()s
 * afterwards.
 *
 * pte_offset_map_nolock(mm, pmd, addr, ptlp), above, is like pte_offset_map();
 * but when successful, it also outputs a pointer to the spinlock in ptlp - as
 * pte_offset_map_lock() does, but in this case without locking it.  This helps
 * the caller to avoid a later pte_lockptr(mm, *pmd), which might by that time
 * act on a changed *pmd: pte_offset_map_nolock() provides the correct spinlock
 * pointer for the page table that it returns.  In principle, the caller should
 * recheck *pmd once the lock is taken; in practice, no callsite needs that -
 * either the mmap_lock for write, or pte_same() check on contents, is enough.
 *
 * Note that free_pgtables(), used after unmapping detached vmas, or when
 * exiting the whole mm, does not take page table lock before freeing a page
 * table, and may not use RCU at all: "outsiders" like khugepaged should avoid
 * pte_offset_map() and co once the vma is detached from mm or mm_users is zero.
 */
pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
                             unsigned long addr, spinlock_t **ptlp)
{
        spinlock_t *ptl;
        pmd_t pmdval;
        pte_t *pte;
again:
        pte = __pte_offset_map(pmd, addr, &pmdval);
        if (unlikely(!pte))
                return pte;
        ptl = pte_lockptr(mm, &pmdval);
        spin_lock(ptl);
        if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
                *ptlp = ptl;
                return pte;
        }
        pte_unmap_unlock(pte, ptl);
        goto again;
}



















































































   13 


   13 

















































































































































































































































































































































































































































































































































































































   14 






   13 






   14 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
#include <net/netdev_rx_queue.h>
#include <net/netdev_queues.h>
#include <net/busy_poll.h>

#include "netdev-genl-gen.h"
#include "dev.h"

struct netdev_nl_dump_ctx {
        unsigned long        ifindex;
        unsigned int        rxq_idx;
        unsigned int        txq_idx;
        unsigned int        napi_id;
};

static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
{
        NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);

        return (struct netdev_nl_dump_ctx *)cb->ctx;
}

static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
                   const struct genl_info *info)
{
        u64 xsk_features = 0;
        u64 xdp_rx_meta = 0;
        void *hdr;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

#define XDP_METADATA_KFUNC(_, flag, __, xmo) \
        if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
                xdp_rx_meta |= flag;
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC

        if (netdev->xsk_tx_metadata_ops) {
                if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
                        xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
                if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
                        xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
        }

        if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
            nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
                              netdev->xdp_features, NETDEV_A_DEV_PAD) ||
            nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
                              xdp_rx_meta, NETDEV_A_DEV_PAD) ||
            nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
                              xsk_features, NETDEV_A_DEV_PAD)) {
                genlmsg_cancel(rsp, hdr);
                return -EINVAL;
        }

        if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
                if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
                                netdev->xdp_zc_max_segs)) {
                        genlmsg_cancel(rsp, hdr);
                        return -EINVAL;
                }
        }

        genlmsg_end(rsp, hdr);

        return 0;
}

static void
netdev_genl_dev_notify(struct net_device *netdev, int cmd)
{
        struct genl_info info;
        struct sk_buff *ntf;

        if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
                                NETDEV_NLGRP_MGMT))
                return;

        genl_info_init_ntf(&info, &netdev_nl_family, cmd);

        ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!ntf)
                return;

        if (netdev_nl_dev_fill(netdev, ntf, &info)) {
                nlmsg_free(ntf);
                return;
        }

        genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
                                0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
}

int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
{
        struct net_device *netdev;
        struct sk_buff *rsp;
        u32 ifindex;
        int err;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
                return -EINVAL;

        ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);

        rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!rsp)
                return -ENOMEM;

        rtnl_lock();

        netdev = __dev_get_by_index(genl_info_net(info), ifindex);
        if (netdev)
                err = netdev_nl_dev_fill(netdev, rsp, info);
        else
                err = -ENODEV;

        rtnl_unlock();

        if (err)
                goto err_free_msg;

        return genlmsg_reply(rsp, info);

err_free_msg:
        nlmsg_free(rsp);
        return err;
}

int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        int err = 0;

        rtnl_lock();
        for_each_netdev_dump(net, netdev, ctx->ifindex) {
                err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
                if (err < 0)
                        break;
        }
        rtnl_unlock();

        return err;
}

static int
netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
                        const struct genl_info *info)
{
        void *hdr;
        pid_t pid;

        if (WARN_ON_ONCE(!napi->dev))
                return -EINVAL;
        if (!(napi->dev->flags & IFF_UP))
                return 0;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        if (napi->napi_id >= MIN_NAPI_ID &&
            nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
                goto nla_put_failure;

        if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
                goto nla_put_failure;

        if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
                goto nla_put_failure;

        if (napi->thread) {
                pid = task_pid_nr(napi->thread);
                if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
                        goto nla_put_failure;
        }

        genlmsg_end(rsp, hdr);

        return 0;

nla_put_failure:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
{
        struct napi_struct *napi;
        struct sk_buff *rsp;
        u32 napi_id;
        int err;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
                return -EINVAL;

        napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);

        rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!rsp)
                return -ENOMEM;

        rtnl_lock();

        napi = napi_by_id(napi_id);
        if (napi)
                err = netdev_nl_napi_fill_one(rsp, napi, info);
        else
                err = -EINVAL;

        rtnl_unlock();

        if (err)
                goto err_free_msg;

        return genlmsg_reply(rsp, info);

err_free_msg:
        nlmsg_free(rsp);
        return err;
}

static int
netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
                        const struct genl_info *info,
                        struct netdev_nl_dump_ctx *ctx)
{
        struct napi_struct *napi;
        int err = 0;

        if (!(netdev->flags & IFF_UP))
                return err;

        list_for_each_entry(napi, &netdev->napi_list, dev_list) {
                if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
                        continue;

                err = netdev_nl_napi_fill_one(rsp, napi, info);
                if (err)
                        return err;
                ctx->napi_id = napi->napi_id;
        }
        return err;
}

int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
        const struct genl_info *info = genl_info_dump(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        u32 ifindex = 0;
        int err = 0;

        if (info->attrs[NETDEV_A_NAPI_IFINDEX])
                ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);

        rtnl_lock();
        if (ifindex) {
                netdev = __dev_get_by_index(net, ifindex);
                if (netdev)
                        err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
                else
                        err = -ENODEV;
        } else {
                for_each_netdev_dump(net, netdev, ctx->ifindex) {
                        err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
                        if (err < 0)
                                break;
                        ctx->napi_id = 0;
                }
        }
        rtnl_unlock();

        return err;
}

static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
                         u32 q_idx, u32 q_type, const struct genl_info *info)
{
        struct netdev_rx_queue *rxq;
        struct netdev_queue *txq;
        void *hdr;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
            nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
            nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
                goto nla_put_failure;

        switch (q_type) {
        case NETDEV_QUEUE_TYPE_RX:
                rxq = __netif_get_rx_queue(netdev, q_idx);
                if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
                                             rxq->napi->napi_id))
                        goto nla_put_failure;
                break;
        case NETDEV_QUEUE_TYPE_TX:
                txq = netdev_get_tx_queue(netdev, q_idx);
                if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
                                             txq->napi->napi_id))
                        goto nla_put_failure;
        }

        genlmsg_end(rsp, hdr);

        return 0;

nla_put_failure:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
                                    u32 q_type)
{
        switch (q_type) {
        case NETDEV_QUEUE_TYPE_RX:
                if (q_id >= netdev->real_num_rx_queues)
                        return -EINVAL;
                return 0;
        case NETDEV_QUEUE_TYPE_TX:
                if (q_id >= netdev->real_num_tx_queues)
                        return -EINVAL;
        }
        return 0;
}

static int
netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
                     u32 q_type, const struct genl_info *info)
{
        int err = 0;

        if (!(netdev->flags & IFF_UP))
                return err;

        err = netdev_nl_queue_validate(netdev, q_idx, q_type);
        if (err)
                return err;

        return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
}

int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
{
        u32 q_id, q_type, ifindex;
        struct net_device *netdev;
        struct sk_buff *rsp;
        int err;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
            GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
            GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
                return -EINVAL;

        q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
        q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
        ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);

        rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!rsp)
                return -ENOMEM;

        rtnl_lock();

        netdev = __dev_get_by_index(genl_info_net(info), ifindex);
        if (netdev)
                err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
        else
                err = -ENODEV;

        rtnl_unlock();

        if (err)
                goto err_free_msg;

        return genlmsg_reply(rsp, info);

err_free_msg:
        nlmsg_free(rsp);
        return err;
}

static int
netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
                         const struct genl_info *info,
                         struct netdev_nl_dump_ctx *ctx)
{
        int err = 0;
        int i;

        if (!(netdev->flags & IFF_UP))
                return err;

        for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) {
                err = netdev_nl_queue_fill_one(rsp, netdev, i,
                                               NETDEV_QUEUE_TYPE_RX, info);
                if (err)
                        return err;
                ctx->rxq_idx = i++;
        }
        for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) {
                err = netdev_nl_queue_fill_one(rsp, netdev, i,
                                               NETDEV_QUEUE_TYPE_TX, info);
                if (err)
                        return err;
                ctx->txq_idx = i++;
        }

        return err;
}

int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
        const struct genl_info *info = genl_info_dump(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        u32 ifindex = 0;
        int err = 0;

        if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
                ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);

        rtnl_lock();
        if (ifindex) {
                netdev = __dev_get_by_index(net, ifindex);
                if (netdev)
                        err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
                else
                        err = -ENODEV;
        } else {
                for_each_netdev_dump(net, netdev, ctx->ifindex) {
                        err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
                        if (err < 0)
                                break;
                        ctx->rxq_idx = 0;
                        ctx->txq_idx = 0;
                }
        }
        rtnl_unlock();

        return err;
}

#define NETDEV_STAT_NOT_SET                (~0ULL)

static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
{
        const u64 *add = _add;
        u64 *sum = _sum;

        while (size) {
                if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
                        *sum += *add;
                sum++;
                add++;
                size -= 8;
        }
}

static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
{
        if (value == NETDEV_STAT_NOT_SET)
                return 0;
        return nla_put_uint(rsp, attr_id, value);
}

static int
netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
{
        if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
            netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
            netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail))
                return -EMSGSIZE;
        return 0;
}

static int
netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
{
        if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
            netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes))
                return -EMSGSIZE;
        return 0;
}

static int
netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
                      u32 q_type, int i, const struct genl_info *info)
{
        const struct netdev_stat_ops *ops = netdev->stat_ops;
        struct netdev_queue_stats_rx rx;
        struct netdev_queue_stats_tx tx;
        void *hdr;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;
        if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
            nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
            nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
                goto nla_put_failure;

        switch (q_type) {
        case NETDEV_QUEUE_TYPE_RX:
                memset(&rx, 0xff, sizeof(rx));
                ops->get_queue_stats_rx(netdev, i, &rx);
                if (!memchr_inv(&rx, 0xff, sizeof(rx)))
                        goto nla_cancel;
                if (netdev_nl_stats_write_rx(rsp, &rx))
                        goto nla_put_failure;
                break;
        case NETDEV_QUEUE_TYPE_TX:
                memset(&tx, 0xff, sizeof(tx));
                ops->get_queue_stats_tx(netdev, i, &tx);
                if (!memchr_inv(&tx, 0xff, sizeof(tx)))
                        goto nla_cancel;
                if (netdev_nl_stats_write_tx(rsp, &tx))
                        goto nla_put_failure;
                break;
        }

        genlmsg_end(rsp, hdr);
        return 0;

nla_cancel:
        genlmsg_cancel(rsp, hdr);
        return 0;
nla_put_failure:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

static int
netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
                         const struct genl_info *info,
                         struct netdev_nl_dump_ctx *ctx)
{
        const struct netdev_stat_ops *ops = netdev->stat_ops;
        int i, err;

        if (!(netdev->flags & IFF_UP))
                return 0;

        i = ctx->rxq_idx;
        while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
                err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
                                            i, info);
                if (err)
                        return err;
                ctx->rxq_idx = i++;
        }
        i = ctx->txq_idx;
        while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
                err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
                                            i, info);
                if (err)
                        return err;
                ctx->txq_idx = i++;
        }

        ctx->rxq_idx = 0;
        ctx->txq_idx = 0;
        return 0;
}

static int
netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
                          const struct genl_info *info)
{
        struct netdev_queue_stats_rx rx_sum, rx;
        struct netdev_queue_stats_tx tx_sum, tx;
        const struct netdev_stat_ops *ops;
        void *hdr;
        int i;

        ops = netdev->stat_ops;
        /* Netdev can't guarantee any complete counters */
        if (!ops->get_base_stats)
                return 0;

        memset(&rx_sum, 0xff, sizeof(rx_sum));
        memset(&tx_sum, 0xff, sizeof(tx_sum));

        ops->get_base_stats(netdev, &rx_sum, &tx_sum);

        /* The op was there, but nothing reported, don't bother */
        if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
            !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
                return 0;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;
        if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
                goto nla_put_failure;

        for (i = 0; i < netdev->real_num_rx_queues; i++) {
                memset(&rx, 0xff, sizeof(rx));
                if (ops->get_queue_stats_rx)
                        ops->get_queue_stats_rx(netdev, i, &rx);
                netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx));
        }
        for (i = 0; i < netdev->real_num_tx_queues; i++) {
                memset(&tx, 0xff, sizeof(tx));
                if (ops->get_queue_stats_tx)
                        ops->get_queue_stats_tx(netdev, i, &tx);
                netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx));
        }

        if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
            netdev_nl_stats_write_tx(rsp, &tx_sum))
                goto nla_put_failure;

        genlmsg_end(rsp, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
                                struct netlink_callback *cb)
{
        struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
        const struct genl_info *info = genl_info_dump(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        unsigned int scope;
        int err = 0;

        scope = 0;
        if (info->attrs[NETDEV_A_QSTATS_SCOPE])
                scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);

        rtnl_lock();
        for_each_netdev_dump(net, netdev, ctx->ifindex) {
                if (!netdev->stat_ops)
                        continue;

                switch (scope) {
                case 0:
                        err = netdev_nl_stats_by_netdev(netdev, skb, info);
                        break;
                case NETDEV_QSTATS_SCOPE_QUEUE:
                        err = netdev_nl_stats_by_queue(netdev, skb, info, ctx);
                        break;
                }
                if (err < 0)
                        break;
        }
        rtnl_unlock();

        return err;
}

static int netdev_genl_netdevice_event(struct notifier_block *nb,
                                       unsigned long event, void *ptr)
{
        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);

        switch (event) {
        case NETDEV_REGISTER:
                netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
                break;
        case NETDEV_UNREGISTER:
                netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
                break;
        case NETDEV_XDP_FEAT_CHANGE:
                netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
                break;
        }

        return NOTIFY_OK;
}

static struct notifier_block netdev_genl_nb = {
        .notifier_call        = netdev_genl_netdevice_event,
};

static int __init netdev_genl_init(void)
{
        int err;

        err = register_netdevice_notifier(&netdev_genl_nb);
        if (err)
                return err;

        err = genl_register_family(&netdev_nl_family);
        if (err)
                goto err_unreg_ntf;

        return 0;

err_unreg_ntf:
        unregister_netdevice_notifier(&netdev_genl_nb);
        return err;
}

subsys_initcall(netdev_genl_init);



















  295 





  294 

























































































































    6 
















































































    1 













    1 

    1 







    1 















































































   65 

   65 















  256 

















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/tomoyo.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include <linux/lsm_hooks.h>
#include <uapi/linux/lsm.h>
#include "common.h"

/**
 * tomoyo_domain - Get "struct tomoyo_domain_info" for current thread.
 *
 * Returns pointer to "struct tomoyo_domain_info" for current thread.
 */
struct tomoyo_domain_info *tomoyo_domain(void)
{
        struct tomoyo_task *s = tomoyo_task(current);

        if (s->old_domain_info && !current->in_execve) {
                atomic_dec(&s->old_domain_info->users);
                s->old_domain_info = NULL;
        }
        return s->domain_info;
}

/**
 * tomoyo_cred_prepare - Target for security_prepare_creds().
 *
 * @new: Pointer to "struct cred".
 * @old: Pointer to "struct cred".
 * @gfp: Memory allocation flags.
 *
 * Returns 0.
 */
static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
                               gfp_t gfp)
{
        /* Restore old_domain_info saved by previous execve() request. */
        struct tomoyo_task *s = tomoyo_task(current);

        if (s->old_domain_info && !current->in_execve) {
                atomic_dec(&s->domain_info->users);
                s->domain_info = s->old_domain_info;
                s->old_domain_info = NULL;
        }
        return 0;
}

/**
 * tomoyo_bprm_committed_creds - Target for security_bprm_committed_creds().
 *
 * @bprm: Pointer to "struct linux_binprm".
 */
static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm)
{
        /* Clear old_domain_info saved by execve() request. */
        struct tomoyo_task *s = tomoyo_task(current);

        atomic_dec(&s->old_domain_info->users);
        s->old_domain_info = NULL;
}

#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
/**
 * tomoyo_bprm_creds_for_exec - Target for security_bprm_creds_for_exec().
 *
 * @bprm: Pointer to "struct linux_binprm".
 *
 * Returns 0.
 */
static int tomoyo_bprm_creds_for_exec(struct linux_binprm *bprm)
{
        /*
         * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
         * for the first time.
         */
        if (!tomoyo_policy_loaded)
                tomoyo_load_policy(bprm->filename);
        return 0;
}
#endif

/**
 * tomoyo_bprm_check_security - Target for security_bprm_check().
 *
 * @bprm: Pointer to "struct linux_binprm".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
{
        struct tomoyo_task *s = tomoyo_task(current);

        /*
         * Execute permission is checked against pathname passed to execve()
         * using current domain.
         */
        if (!s->old_domain_info) {
                const int idx = tomoyo_read_lock();
                const int err = tomoyo_find_next_domain(bprm);

                tomoyo_read_unlock(idx);
                return err;
        }
        /*
         * Read permission is checked against interpreters using next domain.
         */
        return tomoyo_check_open_permission(s->domain_info,
                                            &bprm->file->f_path, O_RDONLY);
}

/**
 * tomoyo_inode_getattr - Target for security_inode_getattr().
 *
 * @path: Pointer to "struct path".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_inode_getattr(const struct path *path)
{
        return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, path, NULL);
}

/**
 * tomoyo_path_truncate - Target for security_path_truncate().
 *
 * @path: Pointer to "struct path".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_truncate(const struct path *path)
{
        return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL);
}

/**
 * tomoyo_file_truncate - Target for security_file_truncate().
 *
 * @file: Pointer to "struct file".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_file_truncate(struct file *file)
{
        return tomoyo_path_truncate(&file->f_path);
}

/**
 * tomoyo_path_unlink - Target for security_path_unlink().
 *
 * @parent: Pointer to "struct path".
 * @dentry: Pointer to "struct dentry".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry)
{
        struct path path = { .mnt = parent->mnt, .dentry = dentry };

        return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL);
}

/**
 * tomoyo_path_mkdir - Target for security_path_mkdir().
 *
 * @parent: Pointer to "struct path".
 * @dentry: Pointer to "struct dentry".
 * @mode:   DAC permission mode.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry,
                             umode_t mode)
{
        struct path path = { .mnt = parent->mnt, .dentry = dentry };

        return tomoyo_path_number_perm(TOMOYO_TYPE_MKDIR, &path,
                                       mode & S_IALLUGO);
}

/**
 * tomoyo_path_rmdir - Target for security_path_rmdir().
 *
 * @parent: Pointer to "struct path".
 * @dentry: Pointer to "struct dentry".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry)
{
        struct path path = { .mnt = parent->mnt, .dentry = dentry };

        return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL);
}

/**
 * tomoyo_path_symlink - Target for security_path_symlink().
 *
 * @parent:   Pointer to "struct path".
 * @dentry:   Pointer to "struct dentry".
 * @old_name: Symlink's content.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry,
                               const char *old_name)
{
        struct path path = { .mnt = parent->mnt, .dentry = dentry };

        return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name);
}

/**
 * tomoyo_path_mknod - Target for security_path_mknod().
 *
 * @parent: Pointer to "struct path".
 * @dentry: Pointer to "struct dentry".
 * @mode:   DAC permission mode.
 * @dev:    Device attributes.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry,
                             umode_t mode, unsigned int dev)
{
        struct path path = { .mnt = parent->mnt, .dentry = dentry };
        int type = TOMOYO_TYPE_CREATE;
        const unsigned int perm = mode & S_IALLUGO;

        switch (mode & S_IFMT) {
        case S_IFCHR:
                type = TOMOYO_TYPE_MKCHAR;
                break;
        case S_IFBLK:
                type = TOMOYO_TYPE_MKBLOCK;
                break;
        default:
                goto no_dev;
        }
        return tomoyo_mkdev_perm(type, &path, perm, dev);
 no_dev:
        switch (mode & S_IFMT) {
        case S_IFIFO:
                type = TOMOYO_TYPE_MKFIFO;
                break;
        case S_IFSOCK:
                type = TOMOYO_TYPE_MKSOCK;
                break;
        }
        return tomoyo_path_number_perm(type, &path, perm);
}

/**
 * tomoyo_path_link - Target for security_path_link().
 *
 * @old_dentry: Pointer to "struct dentry".
 * @new_dir:    Pointer to "struct path".
 * @new_dentry: Pointer to "struct dentry".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir,
                            struct dentry *new_dentry)
{
        struct path path1 = { .mnt = new_dir->mnt, .dentry = old_dentry };
        struct path path2 = { .mnt = new_dir->mnt, .dentry = new_dentry };

        return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2);
}

/**
 * tomoyo_path_rename - Target for security_path_rename().
 *
 * @old_parent: Pointer to "struct path".
 * @old_dentry: Pointer to "struct dentry".
 * @new_parent: Pointer to "struct path".
 * @new_dentry: Pointer to "struct dentry".
 * @flags: Rename options.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_rename(const struct path *old_parent,
                              struct dentry *old_dentry,
                              const struct path *new_parent,
                              struct dentry *new_dentry,
                              const unsigned int flags)
{
        struct path path1 = { .mnt = old_parent->mnt, .dentry = old_dentry };
        struct path path2 = { .mnt = new_parent->mnt, .dentry = new_dentry };

        if (flags & RENAME_EXCHANGE) {
                const int err = tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path2,
                                &path1);

                if (err)
                        return err;
        }
        return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2);
}

/**
 * tomoyo_file_fcntl - Target for security_file_fcntl().
 *
 * @file: Pointer to "struct file".
 * @cmd:  Command for fcntl().
 * @arg:  Argument for @cmd.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
                             unsigned long arg)
{
        if (!(cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND)))
                return 0;
        return tomoyo_check_open_permission(tomoyo_domain(), &file->f_path,
                                            O_WRONLY | (arg & O_APPEND));
}

/**
 * tomoyo_file_open - Target for security_file_open().
 *
 * @f: Pointer to "struct file".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_file_open(struct file *f)
{
        /* Don't check read permission here if called from execve(). */
        /* Illogically, FMODE_EXEC is in f_flags, not f_mode. */
        if (f->f_flags & __FMODE_EXEC)
                return 0;
        return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path,
                                            f->f_flags);
}

/**
 * tomoyo_file_ioctl - Target for security_file_ioctl().
 *
 * @file: Pointer to "struct file".
 * @cmd:  Command for ioctl().
 * @arg:  Argument for @cmd.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
                             unsigned long arg)
{
        return tomoyo_path_number_perm(TOMOYO_TYPE_IOCTL, &file->f_path, cmd);
}

/**
 * tomoyo_path_chmod - Target for security_path_chmod().
 *
 * @path: Pointer to "struct path".
 * @mode: DAC permission mode.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_chmod(const struct path *path, umode_t mode)
{
        return tomoyo_path_number_perm(TOMOYO_TYPE_CHMOD, path,
                                       mode & S_IALLUGO);
}

/**
 * tomoyo_path_chown - Target for security_path_chown().
 *
 * @path: Pointer to "struct path".
 * @uid:  Owner ID.
 * @gid:  Group ID.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
{
        int error = 0;

        if (uid_valid(uid))
                error = tomoyo_path_number_perm(TOMOYO_TYPE_CHOWN, path,
                                                from_kuid(&init_user_ns, uid));
        if (!error && gid_valid(gid))
                error = tomoyo_path_number_perm(TOMOYO_TYPE_CHGRP, path,
                                                from_kgid(&init_user_ns, gid));
        return error;
}

/**
 * tomoyo_path_chroot - Target for security_path_chroot().
 *
 * @path: Pointer to "struct path".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_path_chroot(const struct path *path)
{
        return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL);
}

/**
 * tomoyo_sb_mount - Target for security_sb_mount().
 *
 * @dev_name: Name of device file. Maybe NULL.
 * @path:     Pointer to "struct path".
 * @type:     Name of filesystem type. Maybe NULL.
 * @flags:    Mount options.
 * @data:     Optional data. Maybe NULL.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_sb_mount(const char *dev_name, const struct path *path,
                           const char *type, unsigned long flags, void *data)
{
        return tomoyo_mount_permission(dev_name, path, type, flags, data);
}

/**
 * tomoyo_sb_umount - Target for security_sb_umount().
 *
 * @mnt:   Pointer to "struct vfsmount".
 * @flags: Unmount options.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_sb_umount(struct vfsmount *mnt, int flags)
{
        struct path path = { .mnt = mnt, .dentry = mnt->mnt_root };

        return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL);
}

/**
 * tomoyo_sb_pivotroot - Target for security_sb_pivotroot().
 *
 * @old_path: Pointer to "struct path".
 * @new_path: Pointer to "struct path".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_sb_pivotroot(const struct path *old_path, const struct path *new_path)
{
        return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path);
}

/**
 * tomoyo_socket_listen - Check permission for listen().
 *
 * @sock:    Pointer to "struct socket".
 * @backlog: Backlog parameter.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_socket_listen(struct socket *sock, int backlog)
{
        return tomoyo_socket_listen_permission(sock);
}

/**
 * tomoyo_socket_connect - Check permission for connect().
 *
 * @sock:     Pointer to "struct socket".
 * @addr:     Pointer to "struct sockaddr".
 * @addr_len: Size of @addr.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_socket_connect(struct socket *sock, struct sockaddr *addr,
                                 int addr_len)
{
        return tomoyo_socket_connect_permission(sock, addr, addr_len);
}

/**
 * tomoyo_socket_bind - Check permission for bind().
 *
 * @sock:     Pointer to "struct socket".
 * @addr:     Pointer to "struct sockaddr".
 * @addr_len: Size of @addr.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_socket_bind(struct socket *sock, struct sockaddr *addr,
                              int addr_len)
{
        return tomoyo_socket_bind_permission(sock, addr, addr_len);
}

/**
 * tomoyo_socket_sendmsg - Check permission for sendmsg().
 *
 * @sock: Pointer to "struct socket".
 * @msg:  Pointer to "struct msghdr".
 * @size: Size of message.
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg,
                                 int size)
{
        return tomoyo_socket_sendmsg_permission(sock, msg, size);
}

struct lsm_blob_sizes tomoyo_blob_sizes __ro_after_init = {
        .lbs_task = sizeof(struct tomoyo_task),
};

/**
 * tomoyo_task_alloc - Target for security_task_alloc().
 *
 * @task:        Pointer to "struct task_struct".
 * @clone_flags: clone() flags.
 *
 * Returns 0.
 */
static int tomoyo_task_alloc(struct task_struct *task,
                             unsigned long clone_flags)
{
        struct tomoyo_task *old = tomoyo_task(current);
        struct tomoyo_task *new = tomoyo_task(task);

        new->domain_info = old->domain_info;
        atomic_inc(&new->domain_info->users);
        new->old_domain_info = NULL;
        return 0;
}

/**
 * tomoyo_task_free - Target for security_task_free().
 *
 * @task: Pointer to "struct task_struct".
 */
static void tomoyo_task_free(struct task_struct *task)
{
        struct tomoyo_task *s = tomoyo_task(task);

        if (s->domain_info) {
                atomic_dec(&s->domain_info->users);
                s->domain_info = NULL;
        }
        if (s->old_domain_info) {
                atomic_dec(&s->old_domain_info->users);
                s->old_domain_info = NULL;
        }
}

static const struct lsm_id tomoyo_lsmid = {
        .name = "tomoyo",
        .id = LSM_ID_TOMOYO,
};

/*
 * tomoyo_security_ops is a "struct security_operations" which is used for
 * registering TOMOYO.
 */
static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare),
        LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds),
        LSM_HOOK_INIT(task_alloc, tomoyo_task_alloc),
        LSM_HOOK_INIT(task_free, tomoyo_task_free),
#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
        LSM_HOOK_INIT(bprm_creds_for_exec, tomoyo_bprm_creds_for_exec),
#endif
        LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security),
        LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl),
        LSM_HOOK_INIT(file_open, tomoyo_file_open),
        LSM_HOOK_INIT(file_truncate, tomoyo_file_truncate),
        LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate),
        LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink),
        LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir),
        LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir),
        LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink),
        LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod),
        LSM_HOOK_INIT(path_link, tomoyo_path_link),
        LSM_HOOK_INIT(path_rename, tomoyo_path_rename),
        LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr),
        LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl),
        LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl),
        LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod),
        LSM_HOOK_INIT(path_chown, tomoyo_path_chown),
        LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot),
        LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount),
        LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount),
        LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot),
        LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind),
        LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect),
        LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen),
        LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg),
};

/* Lock for GC. */
DEFINE_SRCU(tomoyo_ss);

int tomoyo_enabled __ro_after_init = 1;

/**
 * tomoyo_init - Register TOMOYO Linux as a LSM module.
 *
 * Returns 0.
 */
static int __init tomoyo_init(void)
{
        struct tomoyo_task *s = tomoyo_task(current);

        /* register ourselves with the security framework */
        security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks),
                           &tomoyo_lsmid);
        pr_info("TOMOYO Linux initialized\n");
        s->domain_info = &tomoyo_kernel_domain;
        atomic_inc(&tomoyo_kernel_domain.users);
        s->old_domain_info = NULL;
        tomoyo_mm_init();

        return 0;
}

DEFINE_LSM(tomoyo) = {
        .name = "tomoyo",
        .enabled = &tomoyo_enabled,
        .flags = LSM_FLAG_LEGACY_MAJOR,
        .blobs = &tomoyo_blob_sizes,
        .init = tomoyo_init,
};























































































  521 











  524 
  521 



  522 

  522 


  524 




























































































  523 



  524 


  519 







  523 


  521 






  521 




    4 

    4 



































































































































  523 


  524 



















  520 
  521 


  524 


  212 
  212 





  358 


  524 






  502 




  502 

































  504 


  502 





  502 








  523 
  524 




  522 





  231 











  524 
  523 









  521 


  522 



  524 




  522 



  524 



  210 




  352 










































  358 



  211 



  522 

  523 

  521 










  505 


































  504 











  505 
  504 



  524 




  352 










  520 
  524 






  524 






  234 
  232 







  524 










  522 


  521 










  524 
  521 














  522 



























  524 
  524 
  522 
  521 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
#include <asm/orc_header.h>

ORC_HEADER;

#define orc_warn(fmt, ...) \
        printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)

#define orc_warn_current(args...)                                        \
({                                                                        \
        static bool dumped_before;                                        \
        if (state->task == current && !state->error) {                        \
                orc_warn(args);                                                \
                if (unwind_debug && !dumped_before) {                        \
                        dumped_before = true;                                \
                        unwind_dump(state);                                \
                }                                                        \
        }                                                                \
})

extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];

static bool orc_init __ro_after_init;
static bool unwind_debug __ro_after_init;
static unsigned int lookup_num_blocks __ro_after_init;

static int __init unwind_debug_cmdline(char *str)
{
        unwind_debug = true;

        return 0;
}
early_param("unwind_debug", unwind_debug_cmdline);

static void unwind_dump(struct unwind_state *state)
{
        static bool dumped_before;
        unsigned long word, *sp;
        struct stack_info stack_info = {0};
        unsigned long visit_mask = 0;

        if (dumped_before)
                return;

        dumped_before = true;

        printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
                        state->stack_info.type, state->stack_info.next_sp,
                        state->stack_mask, state->graph_idx);

        for (sp = __builtin_frame_address(0); sp;
             sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
                if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
                        break;

                for (; sp < stack_info.end; sp++) {

                        word = READ_ONCE_NOCHECK(*sp);

                        printk_deferred("%0*lx: %0*lx (%pB)\n", BITS_PER_LONG/4,
                                        (unsigned long)sp, BITS_PER_LONG/4,
                                        word, (void *)word);
                }
        }
}

static inline unsigned long orc_ip(const int *ip)
{
        return (unsigned long)ip + *ip;
}

static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
                                    unsigned int num_entries, unsigned long ip)
{
        int *first = ip_table;
        int *last = ip_table + num_entries - 1;
        int *mid, *found = first;

        if (!num_entries)
                return NULL;

        /*
         * Do a binary range search to find the rightmost duplicate of a given
         * starting address.  Some entries are section terminators which are
         * "weak" entries for ensuring there are no gaps.  They should be
         * ignored when they conflict with a real entry.
         */
        while (first <= last) {
                mid = first + ((last - first) / 2);

                if (orc_ip(mid) <= ip) {
                        found = mid;
                        first = mid + 1;
                } else
                        last = mid - 1;
        }

        return u_table + (found - ip_table);
}

#ifdef CONFIG_MODULES
static struct orc_entry *orc_module_find(unsigned long ip)
{
        struct module *mod;

        mod = __module_address(ip);
        if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
                return NULL;
        return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
                          mod->arch.num_orcs, ip);
}
#else
static struct orc_entry *orc_module_find(unsigned long ip)
{
        return NULL;
}
#endif

#ifdef CONFIG_DYNAMIC_FTRACE
static struct orc_entry *orc_find(unsigned long ip);

/*
 * Ftrace dynamic trampolines do not have orc entries of their own.
 * But they are copies of the ftrace entries that are static and
 * defined in ftrace_*.S, which do have orc entries.
 *
 * If the unwinder comes across a ftrace trampoline, then find the
 * ftrace function that was used to create it, and use that ftrace
 * function's orc entry, as the placement of the return code in
 * the stack will be identical.
 */
static struct orc_entry *orc_ftrace_find(unsigned long ip)
{
        struct ftrace_ops *ops;
        unsigned long tramp_addr, offset;

        ops = ftrace_ops_trampoline(ip);
        if (!ops)
                return NULL;

        /* Set tramp_addr to the start of the code copied by the trampoline */
        if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
                tramp_addr = (unsigned long)ftrace_regs_caller;
        else
                tramp_addr = (unsigned long)ftrace_caller;

        /* Now place tramp_addr to the location within the trampoline ip is at */
        offset = ip - ops->trampoline;
        tramp_addr += offset;

        /* Prevent unlikely recursion */
        if (ip == tramp_addr)
                return NULL;

        return orc_find(tramp_addr);
}
#else
static struct orc_entry *orc_ftrace_find(unsigned long ip)
{
        return NULL;
}
#endif

/*
 * If we crash with IP==0, the last successfully executed instruction
 * was probably an indirect function call with a NULL function pointer,
 * and we don't have unwind information for NULL.
 * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
 * pointer into its parent and then continue normally from there.
 */
static struct orc_entry null_orc_entry = {
        .sp_offset = sizeof(long),
        .sp_reg = ORC_REG_SP,
        .bp_reg = ORC_REG_UNDEFINED,
        .type = ORC_TYPE_CALL
};

/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
        .type                = ORC_TYPE_CALL,
        .sp_reg                = ORC_REG_BP,
        .sp_offset        = 16,
        .bp_reg                = ORC_REG_PREV_SP,
        .bp_offset        = -16,
};

static struct orc_entry *orc_find(unsigned long ip)
{
        static struct orc_entry *orc;

        if (ip == 0)
                return &null_orc_entry;

        /* For non-init vmlinux addresses, use the fast lookup table: */
        if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
                unsigned int idx, start, stop;

                idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;

                if (unlikely((idx >= lookup_num_blocks-1))) {
                        orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
                                 idx, lookup_num_blocks, (void *)ip);
                        return NULL;
                }

                start = orc_lookup[idx];
                stop = orc_lookup[idx + 1] + 1;

                if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
                             (__start_orc_unwind + stop > __stop_orc_unwind))) {
                        orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
                                 idx, lookup_num_blocks, start, stop, (void *)ip);
                        return NULL;
                }

                return __orc_find(__start_orc_unwind_ip + start,
                                  __start_orc_unwind + start, stop - start, ip);
        }

        /* vmlinux .init slow lookup: */
        if (is_kernel_inittext(ip))
                return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
                                  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);

        /* Module lookup: */
        orc = orc_module_find(ip);
        if (orc)
                return orc;

        return orc_ftrace_find(ip);
}

#ifdef CONFIG_MODULES

static DEFINE_MUTEX(sort_mutex);
static int *cur_orc_ip_table = __start_orc_unwind_ip;
static struct orc_entry *cur_orc_table = __start_orc_unwind;

static void orc_sort_swap(void *_a, void *_b, int size)
{
        struct orc_entry *orc_a, *orc_b;
        int *a = _a, *b = _b, tmp;
        int delta = _b - _a;

        /* Swap the .orc_unwind_ip entries: */
        tmp = *a;
        *a = *b + delta;
        *b = tmp - delta;

        /* Swap the corresponding .orc_unwind entries: */
        orc_a = cur_orc_table + (a - cur_orc_ip_table);
        orc_b = cur_orc_table + (b - cur_orc_ip_table);
        swap(*orc_a, *orc_b);
}

static int orc_sort_cmp(const void *_a, const void *_b)
{
        struct orc_entry *orc_a;
        const int *a = _a, *b = _b;
        unsigned long a_val = orc_ip(a);
        unsigned long b_val = orc_ip(b);

        if (a_val > b_val)
                return 1;
        if (a_val < b_val)
                return -1;

        /*
         * The "weak" section terminator entries need to always be first
         * to ensure the lookup code skips them in favor of real entries.
         * These terminator entries exist to handle any gaps created by
         * whitelisted .o files which didn't get objtool generation.
         */
        orc_a = cur_orc_table + (a - cur_orc_ip_table);
        return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
}

void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
                        void *_orc, size_t orc_size)
{
        int *orc_ip = _orc_ip;
        struct orc_entry *orc = _orc;
        unsigned int num_entries = orc_ip_size / sizeof(int);

        WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
                     orc_size % sizeof(*orc) != 0 ||
                     num_entries != orc_size / sizeof(*orc));

        /*
         * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
         * associate an .orc_unwind_ip table entry with its corresponding
         * .orc_unwind entry so they can both be swapped.
         */
        mutex_lock(&sort_mutex);
        cur_orc_ip_table = orc_ip;
        cur_orc_table = orc;
        sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
        mutex_unlock(&sort_mutex);

        mod->arch.orc_unwind_ip = orc_ip;
        mod->arch.orc_unwind = orc;
        mod->arch.num_orcs = num_entries;
}
#endif

void __init unwind_init(void)
{
        size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
        size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
        size_t num_entries = orc_ip_size / sizeof(int);
        struct orc_entry *orc;
        int i;

        if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
            orc_size % sizeof(struct orc_entry) != 0 ||
            num_entries != orc_size / sizeof(struct orc_entry)) {
                orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
                return;
        }

        /*
         * Note, the orc_unwind and orc_unwind_ip tables were already
         * sorted at build time via the 'sorttable' tool.
         * It's ready for binary search straight away, no need to sort it.
         */

        /* Initialize the fast lookup table: */
        lookup_num_blocks = orc_lookup_end - orc_lookup;
        for (i = 0; i < lookup_num_blocks-1; i++) {
                orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
                                 num_entries,
                                 LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
                if (!orc) {
                        orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
                        return;
                }

                orc_lookup[i] = orc - __start_orc_unwind;
        }

        /* Initialize the ending block: */
        orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
                         LOOKUP_STOP_IP);
        if (!orc) {
                orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
                return;
        }
        orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;

        orc_init = true;
}

unsigned long unwind_get_return_address(struct unwind_state *state)
{
        if (unwind_done(state))
                return 0;

        return __kernel_text_address(state->ip) ? state->ip : 0;
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);

unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
{
        if (unwind_done(state))
                return NULL;

        if (state->regs)
                return &state->regs->ip;

        if (state->sp)
                return (unsigned long *)state->sp - 1;

        return NULL;
}

static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
                            size_t len)
{
        struct stack_info *info = &state->stack_info;
        void *addr = (void *)_addr;

        if (on_stack(info, addr, len))
                return true;

        return !get_stack_info(addr, state->task, info, &state->stack_mask) &&
                on_stack(info, addr, len);
}

static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
                            unsigned long *val)
{
        if (!stack_access_ok(state, addr, sizeof(long)))
                return false;

        *val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
        return true;
}

static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
                             unsigned long *ip, unsigned long *sp)
{
        struct pt_regs *regs = (struct pt_regs *)addr;

        /* x86-32 support will be more complicated due to the &regs->sp hack */
        BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));

        if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                return false;

        *ip = READ_ONCE_NOCHECK(regs->ip);
        *sp = READ_ONCE_NOCHECK(regs->sp);
        return true;
}

static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
                                  unsigned long *ip, unsigned long *sp)
{
        struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;

        if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
                return false;

        *ip = READ_ONCE_NOCHECK(regs->ip);
        *sp = READ_ONCE_NOCHECK(regs->sp);
        return true;
}

/*
 * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
 * value from state->regs.
 *
 * Otherwise, if state->regs just points to IRET regs, and the previous frame
 * had full regs, it's safe to get the value from the previous regs.  This can
 * happen when early/late IRQ entry code gets interrupted by an NMI.
 */
static bool get_reg(struct unwind_state *state, unsigned int reg_off,
                    unsigned long *val)
{
        unsigned int reg = reg_off/8;

        if (!state->regs)
                return false;

        if (state->full_regs) {
                *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
                return true;
        }

        if (state->prev_regs) {
                *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
                return true;
        }

        return false;
}

bool unwind_next_frame(struct unwind_state *state)
{
        unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
        bool indirect = false;

        if (unwind_done(state))
                return false;

        /* Don't let modules unload while we're reading their ORC data. */
        preempt_disable();

        /* End-of-stack check for user tasks: */
        if (state->regs && user_mode(state->regs))
                goto the_end;

        /*
         * Find the orc_entry associated with the text address.
         *
         * For a call frame (as opposed to a signal frame), state->ip points to
         * the instruction after the call.  That instruction's stack layout
         * could be different from the call instruction's layout, for example
         * if the call was to a noreturn function.  So get the ORC data for the
         * call instruction itself.
         */
        orc = orc_find(state->signal ? state->ip : state->ip - 1);
        if (!orc) {
                /*
                 * As a fallback, try to assume this code uses a frame pointer.
                 * This is useful for generated code, like BPF, which ORC
                 * doesn't know about.  This is just a guess, so the rest of
                 * the unwind is no longer considered reliable.
                 */
                orc = &orc_fp_entry;
                state->error = true;
        } else {
                if (orc->type == ORC_TYPE_UNDEFINED)
                        goto err;

                if (orc->type == ORC_TYPE_END_OF_STACK)
                        goto the_end;
        }

        state->signal = orc->signal;

        /* Find the previous frame's stack: */
        switch (orc->sp_reg) {
        case ORC_REG_SP:
                sp = state->sp + orc->sp_offset;
                break;

        case ORC_REG_BP:
                sp = state->bp + orc->sp_offset;
                break;

        case ORC_REG_SP_INDIRECT:
                sp = state->sp;
                indirect = true;
                break;

        case ORC_REG_BP_INDIRECT:
                sp = state->bp + orc->sp_offset;
                indirect = true;
                break;

        case ORC_REG_R10:
                if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
                        orc_warn_current("missing R10 value at %pB\n",
                                         (void *)state->ip);
                        goto err;
                }
                break;

        case ORC_REG_R13:
                if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
                        orc_warn_current("missing R13 value at %pB\n",
                                         (void *)state->ip);
                        goto err;
                }
                break;

        case ORC_REG_DI:
                if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
                        orc_warn_current("missing RDI value at %pB\n",
                                         (void *)state->ip);
                        goto err;
                }
                break;

        case ORC_REG_DX:
                if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
                        orc_warn_current("missing DX value at %pB\n",
                                         (void *)state->ip);
                        goto err;
                }
                break;

        default:
                orc_warn("unknown SP base reg %d at %pB\n",
                         orc->sp_reg, (void *)state->ip);
                goto err;
        }

        if (indirect) {
                if (!deref_stack_reg(state, sp, &sp))
                        goto err;

                if (orc->sp_reg == ORC_REG_SP_INDIRECT)
                        sp += orc->sp_offset;
        }

        /* Find IP, SP and possibly regs: */
        switch (orc->type) {
        case ORC_TYPE_CALL:
                ip_p = sp - sizeof(long);

                if (!deref_stack_reg(state, ip_p, &state->ip))
                        goto err;

                state->ip = unwind_recover_ret_addr(state, state->ip,
                                                    (unsigned long *)ip_p);
                state->sp = sp;
                state->regs = NULL;
                state->prev_regs = NULL;
                break;

        case ORC_TYPE_REGS:
                if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn_current("can't access registers at %pB\n",
                                         (void *)orig_ip);
                        goto err;
                }
                /*
                 * There is a small chance to interrupt at the entry of
                 * arch_rethook_trampoline() where the ORC info doesn't exist.
                 * That point is right after the RET to arch_rethook_trampoline()
                 * which was modified return address.
                 * At that point, the @addr_p of the unwind_recover_rethook()
                 * (this has to point the address of the stack entry storing
                 * the modified return address) must be "SP - (a stack entry)"
                 * because SP is incremented by the RET.
                 */
                state->ip = unwind_recover_rethook(state, state->ip,
                                (unsigned long *)(state->sp - sizeof(long)));
                state->regs = (struct pt_regs *)sp;
                state->prev_regs = NULL;
                state->full_regs = true;
                break;

        case ORC_TYPE_REGS_PARTIAL:
                if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn_current("can't access iret registers at %pB\n",
                                         (void *)orig_ip);
                        goto err;
                }
                /* See ORC_TYPE_REGS case comment. */
                state->ip = unwind_recover_rethook(state, state->ip,
                                (unsigned long *)(state->sp - sizeof(long)));

                if (state->full_regs)
                        state->prev_regs = state->regs;
                state->regs = (void *)sp - IRET_FRAME_OFFSET;
                state->full_regs = false;
                break;

        default:
                orc_warn("unknown .orc_unwind entry type %d at %pB\n",
                         orc->type, (void *)orig_ip);
                goto err;
        }

        /* Find BP: */
        switch (orc->bp_reg) {
        case ORC_REG_UNDEFINED:
                if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
                        state->bp = tmp;
                break;

        case ORC_REG_PREV_SP:
                if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
                        goto err;
                break;

        case ORC_REG_BP:
                if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
                        goto err;
                break;

        default:
                orc_warn("unknown BP base reg %d for ip %pB\n",
                         orc->bp_reg, (void *)orig_ip);
                goto err;
        }

        /* Prevent a recursive loop due to bad ORC data: */
        if (state->stack_info.type == prev_type &&
            on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
            state->sp <= prev_sp) {
                orc_warn_current("stack going in the wrong direction? at %pB\n",
                                 (void *)orig_ip);
                goto err;
        }

        preempt_enable();
        return true;

err:
        state->error = true;

the_end:
        preempt_enable();
        state->stack_info.type = STACK_TYPE_UNKNOWN;
        return false;
}
EXPORT_SYMBOL_GPL(unwind_next_frame);

void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame)
{
        memset(state, 0, sizeof(*state));
        state->task = task;

        if (!orc_init)
                goto err;

        /*
         * Refuse to unwind the stack of a task while it's executing on another
         * CPU.  This check is racy, but that's ok: the unwinder has other
         * checks to prevent it from going off the rails.
         */
        if (task_on_another_cpu(task))
                goto err;

        if (regs) {
                if (user_mode(regs))
                        goto the_end;

                state->ip = regs->ip;
                state->sp = regs->sp;
                state->bp = regs->bp;
                state->regs = regs;
                state->full_regs = true;
                state->signal = true;

        } else if (task == current) {
                asm volatile("lea (%%rip), %0\n\t"
                             "mov %%rsp, %1\n\t"
                             "mov %%rbp, %2\n\t"
                             : "=r" (state->ip), "=r" (state->sp),
                               "=r" (state->bp));

        } else {
                struct inactive_task_frame *frame = (void *)task->thread.sp;

                state->sp = task->thread.sp + sizeof(*frame);
                state->bp = READ_ONCE_NOCHECK(frame->bp);
                state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
                state->signal = (void *)state->ip == ret_from_fork;
        }

        if (get_stack_info((unsigned long *)state->sp, state->task,
                           &state->stack_info, &state->stack_mask)) {
                /*
                 * We weren't on a valid stack.  It's possible that
                 * we overflowed a valid stack into a guard page.
                 * See if the next page up is valid so that we can
                 * generate some kind of backtrace if this happens.
                 */
                void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
                state->error = true;
                if (get_stack_info(next_page, state->task, &state->stack_info,
                                   &state->stack_mask))
                        return;
        }

        /*
         * The caller can provide the address of the first frame directly
         * (first_frame) or indirectly (regs->sp) to indicate which stack frame
         * to start unwinding at.  Skip ahead until we reach it.
         */

        /* When starting from regs, skip the regs frame: */
        if (regs) {
                unwind_next_frame(state);
                return;
        }

        /* Otherwise, skip ahead to the user-specified starting frame: */
        while (!unwind_done(state) &&
               (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
                        state->sp <= (unsigned long)first_frame))
                unwind_next_frame(state);

        return;

err:
        state->error = true;
the_end:
        state->stack_info.type = STACK_TYPE_UNKNOWN;
}
EXPORT_SYMBOL_GPL(__unwind_start);



























  173 

  173 

  164 

  165 





   63 



  164 


  173 



















  241 



  240 


  241 












  241 


  241 

  200 




  241 
  241 
  241 
































    4 

  250 
    4 










  165 




  168 



















  167 


   15 



   15 










    8 


































    8 




    8 








    8 


    8 







    8 


    8 











  251 

  251 

  240 

  198 



  169 

  169 

  169 

  169 


  251 






  233 














  232 



























  178 

































   59 




   59 





















   59 
   59 

   57 









   57 










   57 


   57 











   59 















   59 


   57 
   57 




















  252 
  251 




  167 






  165 
    5 



  164 


  164 



  165 




   49 



   49 
   48 

   48 


   48 



    5 




    5 

    5 




    5 

    5 



  251 
  252 






























  167 


  166 




  167 


  167 



  167 
  167 



    4 














    4 








    4 





  167 



































  167 



   46 









   46 











  162 


  162 

  162 

  162 








  162 



  162 
  160 








  161 








  167 




   46 





   46 








   46 































  253 


  253 

  253 
  156 
   73 













  241 

  241 

  242 




  241 

    5 












    5 





    5 




    5 





    5 





    5 








  241 
  238 


  240 
  238 




   14 



   16 









   20 






   20 


   20 









   16 
   17 



   20 









   16 


   16 






    6 
    6 



   10 


   10 

  239 


   10 


  251 
   18 






  252 





















































































































  248 
  249 































  249 
  248 


  249 
  103 




  238 

  247 



  249 





















  125 
  125 


  125 
  115 




   15 

   15 



  125 



















  253 


  252 
  252 

  250 













































































































  234 




  178 


  234 





  234 
  234 





  234 









  232 








  234 

  233 


  233 
  119 
  118 

  233 







  232 
  231 




  233 
  232 


  233 








  234 

  179 


  179 


  112 

  179 
  179 






  233 
















  241 

  241 
    1 








  242 
  242 

    5 



    5 



    5 

    5 





    5 

  241 



















  241 








  241 


  242 

  241 



















  226 


  224 








  158 













  158 

  156 
  158 




  226 











  234 






  234 


  233 








  234 



































  232 



  231 




  232 


















































































  232 


  232 














  233 



  233 
















  233 




















  233 














  233 































  192 
  228 
   85 
  229 















  226 
  226 











   25 








   25 











   16 








   17 


















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/power/runtime.c - Helper functions for device runtime PM
 *
 * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
 * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
 */
#include <linux/sched/mm.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/export.h>
#include <linux/pm_runtime.h>
#include <linux/pm_wakeirq.h>
#include <linux/rculist.h>
#include <trace/events/rpm.h>

#include "../base.h"
#include "power.h"

typedef int (*pm_callback_t)(struct device *);

static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset)
{
        pm_callback_t cb;
        const struct dev_pm_ops *ops;

        if (dev->pm_domain)
                ops = &dev->pm_domain->ops;
        else if (dev->type && dev->type->pm)
                ops = dev->type->pm;
        else if (dev->class && dev->class->pm)
                ops = dev->class->pm;
        else if (dev->bus && dev->bus->pm)
                ops = dev->bus->pm;
        else
                ops = NULL;

        if (ops)
                cb = *(pm_callback_t *)((void *)ops + cb_offset);
        else
                cb = NULL;

        if (!cb && dev->driver && dev->driver->pm)
                cb = *(pm_callback_t *)((void *)dev->driver->pm + cb_offset);

        return cb;
}

#define RPM_GET_CALLBACK(dev, callback) \
                __rpm_get_callback(dev, offsetof(struct dev_pm_ops, callback))

static int rpm_resume(struct device *dev, int rpmflags);
static int rpm_suspend(struct device *dev, int rpmflags);

/**
 * update_pm_runtime_accounting - Update the time accounting of power states
 * @dev: Device to update the accounting for
 *
 * In order to be able to have time accounting of the various power states
 * (as used by programs such as PowerTOP to show the effectiveness of runtime
 * PM), we need to track the time spent in each state.
 * update_pm_runtime_accounting must be called each time before the
 * runtime_status field is updated, to account the time in the old state
 * correctly.
 */
static void update_pm_runtime_accounting(struct device *dev)
{
        u64 now, last, delta;

        if (dev->power.disable_depth > 0)
                return;

        last = dev->power.accounting_timestamp;

        now = ktime_get_mono_fast_ns();
        dev->power.accounting_timestamp = now;

        /*
         * Because ktime_get_mono_fast_ns() is not monotonic during
         * timekeeping updates, ensure that 'now' is after the last saved
         * timesptamp.
         */
        if (now < last)
                return;

        delta = now - last;

        if (dev->power.runtime_status == RPM_SUSPENDED)
                dev->power.suspended_time += delta;
        else
                dev->power.active_time += delta;
}

static void __update_runtime_status(struct device *dev, enum rpm_status status)
{
        update_pm_runtime_accounting(dev);
        trace_rpm_status(dev, status);
        dev->power.runtime_status = status;
}

static u64 rpm_get_accounted_time(struct device *dev, bool suspended)
{
        u64 time;
        unsigned long flags;

        spin_lock_irqsave(&dev->power.lock, flags);

        update_pm_runtime_accounting(dev);
        time = suspended ? dev->power.suspended_time : dev->power.active_time;

        spin_unlock_irqrestore(&dev->power.lock, flags);

        return time;
}

u64 pm_runtime_active_time(struct device *dev)
{
        return rpm_get_accounted_time(dev, false);
}

u64 pm_runtime_suspended_time(struct device *dev)
{
        return rpm_get_accounted_time(dev, true);
}
EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);

/**
 * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
 * @dev: Device to handle.
 */
static void pm_runtime_deactivate_timer(struct device *dev)
{
        if (dev->power.timer_expires > 0) {
                hrtimer_try_to_cancel(&dev->power.suspend_timer);
                dev->power.timer_expires = 0;
        }
}

/**
 * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests.
 * @dev: Device to handle.
 */
static void pm_runtime_cancel_pending(struct device *dev)
{
        pm_runtime_deactivate_timer(dev);
        /*
         * In case there's a request pending, make sure its work function will
         * return without doing anything.
         */
        dev->power.request = RPM_REQ_NONE;
}

/*
 * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time.
 * @dev: Device to handle.
 *
 * Compute the autosuspend-delay expiration time based on the device's
 * power.last_busy time.  If the delay has already expired or is disabled
 * (negative) or the power.use_autosuspend flag isn't set, return 0.
 * Otherwise return the expiration time in nanoseconds (adjusted to be nonzero).
 *
 * This function may be called either with or without dev->power.lock held.
 * Either way it can be racy, since power.last_busy may be updated at any time.
 */
u64 pm_runtime_autosuspend_expiration(struct device *dev)
{
        int autosuspend_delay;
        u64 expires;

        if (!dev->power.use_autosuspend)
                return 0;

        autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay);
        if (autosuspend_delay < 0)
                return 0;

        expires  = READ_ONCE(dev->power.last_busy);
        expires += (u64)autosuspend_delay * NSEC_PER_MSEC;
        if (expires > ktime_get_mono_fast_ns())
                return expires;        /* Expires in the future */

        return 0;
}
EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);

static int dev_memalloc_noio(struct device *dev, void *data)
{
        return dev->power.memalloc_noio;
}

/*
 * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
 * @dev: Device to handle.
 * @enable: True for setting the flag and False for clearing the flag.
 *
 * Set the flag for all devices in the path from the device to the
 * root device in the device tree if @enable is true, otherwise clear
 * the flag for devices in the path whose siblings don't set the flag.
 *
 * The function should only be called by block device, or network
 * device driver for solving the deadlock problem during runtime
 * resume/suspend:
 *
 *     If memory allocation with GFP_KERNEL is called inside runtime
 *     resume/suspend callback of any one of its ancestors(or the
 *     block device itself), the deadlock may be triggered inside the
 *     memory allocation since it might not complete until the block
 *     device becomes active and the involed page I/O finishes. The
 *     situation is pointed out first by Alan Stern. Network device
 *     are involved in iSCSI kind of situation.
 *
 * The lock of dev_hotplug_mutex is held in the function for handling
 * hotplug race because pm_runtime_set_memalloc_noio() may be called
 * in async probe().
 *
 * The function should be called between device_add() and device_del()
 * on the affected device(block/network device).
 */
void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
{
        static DEFINE_MUTEX(dev_hotplug_mutex);

        mutex_lock(&dev_hotplug_mutex);
        for (;;) {
                bool enabled;

                /* hold power lock since bitfield is not SMP-safe. */
                spin_lock_irq(&dev->power.lock);
                enabled = dev->power.memalloc_noio;
                dev->power.memalloc_noio = enable;
                spin_unlock_irq(&dev->power.lock);

                /*
                 * not need to enable ancestors any more if the device
                 * has been enabled.
                 */
                if (enabled && enable)
                        break;

                dev = dev->parent;

                /*
                 * clear flag of the parent device only if all the
                 * children don't set the flag because ancestor's
                 * flag was set by any one of the descendants.
                 */
                if (!dev || (!enable &&
                    device_for_each_child(dev, NULL, dev_memalloc_noio)))
                        break;
        }
        mutex_unlock(&dev_hotplug_mutex);
}
EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);

/**
 * rpm_check_suspend_allowed - Test whether a device may be suspended.
 * @dev: Device to test.
 */
static int rpm_check_suspend_allowed(struct device *dev)
{
        int retval = 0;

        if (dev->power.runtime_error)
                retval = -EINVAL;
        else if (dev->power.disable_depth > 0)
                retval = -EACCES;
        else if (atomic_read(&dev->power.usage_count))
                retval = -EAGAIN;
        else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count))
                retval = -EBUSY;

        /* Pending resume requests take precedence over suspends. */
        else if ((dev->power.deferred_resume &&
            dev->power.runtime_status == RPM_SUSPENDING) ||
            (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME))
                retval = -EAGAIN;
        else if (__dev_pm_qos_resume_latency(dev) == 0)
                retval = -EPERM;
        else if (dev->power.runtime_status == RPM_SUSPENDED)
                retval = 1;

        return retval;
}

static int rpm_get_suppliers(struct device *dev)
{
        struct device_link *link;

        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
                                device_links_read_lock_held()) {
                int retval;

                if (!(link->flags & DL_FLAG_PM_RUNTIME))
                        continue;

                retval = pm_runtime_get_sync(link->supplier);
                /* Ignore suppliers with disabled runtime PM. */
                if (retval < 0 && retval != -EACCES) {
                        pm_runtime_put_noidle(link->supplier);
                        return retval;
                }
                refcount_inc(&link->rpm_active);
        }
        return 0;
}

/**
 * pm_runtime_release_supplier - Drop references to device link's supplier.
 * @link: Target device link.
 *
 * Drop all runtime PM references associated with @link to its supplier device.
 */
void pm_runtime_release_supplier(struct device_link *link)
{
        struct device *supplier = link->supplier;

        /*
         * The additional power.usage_count check is a safety net in case
         * the rpm_active refcount becomes saturated, in which case
         * refcount_dec_not_one() would return true forever, but it is not
         * strictly necessary.
         */
        while (refcount_dec_not_one(&link->rpm_active) &&
               atomic_read(&supplier->power.usage_count) > 0)
                pm_runtime_put_noidle(supplier);
}

static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
{
        struct device_link *link;

        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
                                device_links_read_lock_held()) {
                pm_runtime_release_supplier(link);
                if (try_to_suspend)
                        pm_request_idle(link->supplier);
        }
}

static void rpm_put_suppliers(struct device *dev)
{
        __rpm_put_suppliers(dev, true);
}

static void rpm_suspend_suppliers(struct device *dev)
{
        struct device_link *link;
        int idx = device_links_read_lock();

        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
                                device_links_read_lock_held())
                pm_request_idle(link->supplier);

        device_links_read_unlock(idx);
}

/**
 * __rpm_callback - Run a given runtime PM callback for a given device.
 * @cb: Runtime PM callback to run.
 * @dev: Device to run the callback for.
 */
static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
        __releases(&dev->power.lock) __acquires(&dev->power.lock)
{
        int retval = 0, idx;
        bool use_links = dev->power.links_count > 0;

        if (dev->power.irq_safe) {
                spin_unlock(&dev->power.lock);
        } else {
                spin_unlock_irq(&dev->power.lock);

                /*
                 * Resume suppliers if necessary.
                 *
                 * The device's runtime PM status cannot change until this
                 * routine returns, so it is safe to read the status outside of
                 * the lock.
                 */
                if (use_links && dev->power.runtime_status == RPM_RESUMING) {
                        idx = device_links_read_lock();

                        retval = rpm_get_suppliers(dev);
                        if (retval) {
                                rpm_put_suppliers(dev);
                                goto fail;
                        }

                        device_links_read_unlock(idx);
                }
        }

        if (cb)
                retval = cb(dev);

        if (dev->power.irq_safe) {
                spin_lock(&dev->power.lock);
        } else {
                /*
                 * If the device is suspending and the callback has returned
                 * success, drop the usage counters of the suppliers that have
                 * been reference counted on its resume.
                 *
                 * Do that if resume fails too.
                 */
                if (use_links &&
                    ((dev->power.runtime_status == RPM_SUSPENDING && !retval) ||
                    (dev->power.runtime_status == RPM_RESUMING && retval))) {
                        idx = device_links_read_lock();

                        __rpm_put_suppliers(dev, false);

fail:
                        device_links_read_unlock(idx);
                }

                spin_lock_irq(&dev->power.lock);
        }

        return retval;
}

/**
 * rpm_callback - Run a given runtime PM callback for a given device.
 * @cb: Runtime PM callback to run.
 * @dev: Device to run the callback for.
 */
static int rpm_callback(int (*cb)(struct device *), struct device *dev)
{
        int retval;

        if (dev->power.memalloc_noio) {
                unsigned int noio_flag;

                /*
                 * Deadlock might be caused if memory allocation with
                 * GFP_KERNEL happens inside runtime_suspend and
                 * runtime_resume callbacks of one block device's
                 * ancestor or the block device itself. Network
                 * device might be thought as part of iSCSI block
                 * device, so network device and its ancestor should
                 * be marked as memalloc_noio too.
                 */
                noio_flag = memalloc_noio_save();
                retval = __rpm_callback(cb, dev);
                memalloc_noio_restore(noio_flag);
        } else {
                retval = __rpm_callback(cb, dev);
        }

        dev->power.runtime_error = retval;
        return retval != -EACCES ? retval : -EIO;
}

/**
 * rpm_idle - Notify device bus type if the device can be suspended.
 * @dev: Device to notify the bus type about.
 * @rpmflags: Flag bits.
 *
 * Check if the device's runtime PM status allows it to be suspended.  If
 * another idle notification has been started earlier, return immediately.  If
 * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
 * run the ->runtime_idle() callback directly. If the ->runtime_idle callback
 * doesn't exist or if it returns 0, call rpm_suspend with the RPM_AUTO flag.
 *
 * This function must be called under dev->power.lock with interrupts disabled.
 */
static int rpm_idle(struct device *dev, int rpmflags)
{
        int (*callback)(struct device *);
        int retval;

        trace_rpm_idle(dev, rpmflags);
        retval = rpm_check_suspend_allowed(dev);
        if (retval < 0)
                ;        /* Conditions are wrong. */

        /* Idle notifications are allowed only in the RPM_ACTIVE state. */
        else if (dev->power.runtime_status != RPM_ACTIVE)
                retval = -EAGAIN;

        /*
         * Any pending request other than an idle notification takes
         * precedence over us, except that the timer may be running.
         */
        else if (dev->power.request_pending &&
            dev->power.request > RPM_REQ_IDLE)
                retval = -EAGAIN;

        /* Act as though RPM_NOWAIT is always set. */
        else if (dev->power.idle_notification)
                retval = -EINPROGRESS;

        if (retval)
                goto out;

        /* Pending requests need to be canceled. */
        dev->power.request = RPM_REQ_NONE;

        callback = RPM_GET_CALLBACK(dev, runtime_idle);

        /* If no callback assume success. */
        if (!callback || dev->power.no_callbacks)
                goto out;

        /* Carry out an asynchronous or a synchronous idle notification. */
        if (rpmflags & RPM_ASYNC) {
                dev->power.request = RPM_REQ_IDLE;
                if (!dev->power.request_pending) {
                        dev->power.request_pending = true;
                        queue_work(pm_wq, &dev->power.work);
                }
                trace_rpm_return_int(dev, _THIS_IP_, 0);
                return 0;
        }

        dev->power.idle_notification = true;

        if (dev->power.irq_safe)
                spin_unlock(&dev->power.lock);
        else
                spin_unlock_irq(&dev->power.lock);

        retval = callback(dev);

        if (dev->power.irq_safe)
                spin_lock(&dev->power.lock);
        else
                spin_lock_irq(&dev->power.lock);

        dev->power.idle_notification = false;
        wake_up_all(&dev->power.wait_queue);

 out:
        trace_rpm_return_int(dev, _THIS_IP_, retval);
        return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
}

/**
 * rpm_suspend - Carry out runtime suspend of given device.
 * @dev: Device to suspend.
 * @rpmflags: Flag bits.
 *
 * Check if the device's runtime PM status allows it to be suspended.
 * Cancel a pending idle notification, autosuspend or suspend. If
 * another suspend has been started earlier, either return immediately
 * or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC
 * flags. If the RPM_ASYNC flag is set then queue a suspend request;
 * otherwise run the ->runtime_suspend() callback directly. When
 * ->runtime_suspend succeeded, if a deferred resume was requested while
 * the callback was running then carry it out, otherwise send an idle
 * notification for its parent (if the suspend succeeded and both
 * ignore_children of parent->power and irq_safe of dev->power are not set).
 * If ->runtime_suspend failed with -EAGAIN or -EBUSY, and if the RPM_AUTO
 * flag is set and the next autosuspend-delay expiration time is in the
 * future, schedule another autosuspend attempt.
 *
 * This function must be called under dev->power.lock with interrupts disabled.
 */
static int rpm_suspend(struct device *dev, int rpmflags)
        __releases(&dev->power.lock) __acquires(&dev->power.lock)
{
        int (*callback)(struct device *);
        struct device *parent = NULL;
        int retval;

        trace_rpm_suspend(dev, rpmflags);

 repeat:
        retval = rpm_check_suspend_allowed(dev);
        if (retval < 0)
                goto out;        /* Conditions are wrong. */

        /* Synchronous suspends are not allowed in the RPM_RESUMING state. */
        if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC))
                retval = -EAGAIN;

        if (retval)
                goto out;

        /* If the autosuspend_delay time hasn't expired yet, reschedule. */
        if ((rpmflags & RPM_AUTO) && dev->power.runtime_status != RPM_SUSPENDING) {
                u64 expires = pm_runtime_autosuspend_expiration(dev);

                if (expires != 0) {
                        /* Pending requests need to be canceled. */
                        dev->power.request = RPM_REQ_NONE;

                        /*
                         * Optimization: If the timer is already running and is
                         * set to expire at or before the autosuspend delay,
                         * avoid the overhead of resetting it.  Just let it
                         * expire; pm_suspend_timer_fn() will take care of the
                         * rest.
                         */
                        if (!(dev->power.timer_expires &&
                            dev->power.timer_expires <= expires)) {
                                /*
                                 * We add a slack of 25% to gather wakeups
                                 * without sacrificing the granularity.
                                 */
                                u64 slack = (u64)READ_ONCE(dev->power.autosuspend_delay) *
                                                    (NSEC_PER_MSEC >> 2);

                                dev->power.timer_expires = expires;
                                hrtimer_start_range_ns(&dev->power.suspend_timer,
                                                       ns_to_ktime(expires),
                                                       slack,
                                                       HRTIMER_MODE_ABS);
                        }
                        dev->power.timer_autosuspends = 1;
                        goto out;
                }
        }

        /* Other scheduled or pending requests need to be canceled. */
        pm_runtime_cancel_pending(dev);

        if (dev->power.runtime_status == RPM_SUSPENDING) {
                DEFINE_WAIT(wait);

                if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
                        retval = -EINPROGRESS;
                        goto out;
                }

                if (dev->power.irq_safe) {
                        spin_unlock(&dev->power.lock);

                        cpu_relax();

                        spin_lock(&dev->power.lock);
                        goto repeat;
                }

                /* Wait for the other suspend running in parallel with us. */
                for (;;) {
                        prepare_to_wait(&dev->power.wait_queue, &wait,
                                        TASK_UNINTERRUPTIBLE);
                        if (dev->power.runtime_status != RPM_SUSPENDING)
                                break;

                        spin_unlock_irq(&dev->power.lock);

                        schedule();

                        spin_lock_irq(&dev->power.lock);
                }
                finish_wait(&dev->power.wait_queue, &wait);
                goto repeat;
        }

        if (dev->power.no_callbacks)
                goto no_callback;        /* Assume success. */

        /* Carry out an asynchronous or a synchronous suspend. */
        if (rpmflags & RPM_ASYNC) {
                dev->power.request = (rpmflags & RPM_AUTO) ?
                    RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND;
                if (!dev->power.request_pending) {
                        dev->power.request_pending = true;
                        queue_work(pm_wq, &dev->power.work);
                }
                goto out;
        }

        __update_runtime_status(dev, RPM_SUSPENDING);

        callback = RPM_GET_CALLBACK(dev, runtime_suspend);

        dev_pm_enable_wake_irq_check(dev, true);
        retval = rpm_callback(callback, dev);
        if (retval)
                goto fail;

        dev_pm_enable_wake_irq_complete(dev);

 no_callback:
        __update_runtime_status(dev, RPM_SUSPENDED);
        pm_runtime_deactivate_timer(dev);

        if (dev->parent) {
                parent = dev->parent;
                atomic_add_unless(&parent->power.child_count, -1, 0);
        }
        wake_up_all(&dev->power.wait_queue);

        if (dev->power.deferred_resume) {
                dev->power.deferred_resume = false;
                rpm_resume(dev, 0);
                retval = -EAGAIN;
                goto out;
        }

        if (dev->power.irq_safe)
                goto out;

        /* Maybe the parent is now able to suspend. */
        if (parent && !parent->power.ignore_children) {
                spin_unlock(&dev->power.lock);

                spin_lock(&parent->power.lock);
                rpm_idle(parent, RPM_ASYNC);
                spin_unlock(&parent->power.lock);

                spin_lock(&dev->power.lock);
        }
        /* Maybe the suppliers are now able to suspend. */
        if (dev->power.links_count > 0) {
                spin_unlock_irq(&dev->power.lock);

                rpm_suspend_suppliers(dev);

                spin_lock_irq(&dev->power.lock);
        }

 out:
        trace_rpm_return_int(dev, _THIS_IP_, retval);

        return retval;

 fail:
        dev_pm_disable_wake_irq_check(dev, true);
        __update_runtime_status(dev, RPM_ACTIVE);
        dev->power.deferred_resume = false;
        wake_up_all(&dev->power.wait_queue);

        if (retval == -EAGAIN || retval == -EBUSY) {
                dev->power.runtime_error = 0;

                /*
                 * If the callback routine failed an autosuspend, and
                 * if the last_busy time has been updated so that there
                 * is a new autosuspend expiration time, automatically
                 * reschedule another autosuspend.
                 */
                if ((rpmflags & RPM_AUTO) &&
                    pm_runtime_autosuspend_expiration(dev) != 0)
                        goto repeat;
        } else {
                pm_runtime_cancel_pending(dev);
        }
        goto out;
}

/**
 * rpm_resume - Carry out runtime resume of given device.
 * @dev: Device to resume.
 * @rpmflags: Flag bits.
 *
 * Check if the device's runtime PM status allows it to be resumed.  Cancel
 * any scheduled or pending requests.  If another resume has been started
 * earlier, either return immediately or wait for it to finish, depending on the
 * RPM_NOWAIT and RPM_ASYNC flags.  Similarly, if there's a suspend running in
 * parallel with this function, either tell the other process to resume after
 * suspending (deferred_resume) or wait for it to finish.  If the RPM_ASYNC
 * flag is set then queue a resume request; otherwise run the
 * ->runtime_resume() callback directly.  Queue an idle notification for the
 * device if the resume succeeded.
 *
 * This function must be called under dev->power.lock with interrupts disabled.
 */
static int rpm_resume(struct device *dev, int rpmflags)
        __releases(&dev->power.lock) __acquires(&dev->power.lock)
{
        int (*callback)(struct device *);
        struct device *parent = NULL;
        int retval = 0;

        trace_rpm_resume(dev, rpmflags);

 repeat:
        if (dev->power.runtime_error) {
                retval = -EINVAL;
        } else if (dev->power.disable_depth > 0) {
                if (dev->power.runtime_status == RPM_ACTIVE &&
                    dev->power.last_status == RPM_ACTIVE)
                        retval = 1;
                else
                        retval = -EACCES;
        }
        if (retval)
                goto out;

        /*
         * Other scheduled or pending requests need to be canceled.  Small
         * optimization: If an autosuspend timer is running, leave it running
         * rather than cancelling it now only to restart it again in the near
         * future.
         */
        dev->power.request = RPM_REQ_NONE;
        if (!dev->power.timer_autosuspends)
                pm_runtime_deactivate_timer(dev);

        if (dev->power.runtime_status == RPM_ACTIVE) {
                retval = 1;
                goto out;
        }

        if (dev->power.runtime_status == RPM_RESUMING ||
            dev->power.runtime_status == RPM_SUSPENDING) {
                DEFINE_WAIT(wait);

                if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
                        if (dev->power.runtime_status == RPM_SUSPENDING) {
                                dev->power.deferred_resume = true;
                                if (rpmflags & RPM_NOWAIT)
                                        retval = -EINPROGRESS;
                        } else {
                                retval = -EINPROGRESS;
                        }
                        goto out;
                }

                if (dev->power.irq_safe) {
                        spin_unlock(&dev->power.lock);

                        cpu_relax();

                        spin_lock(&dev->power.lock);
                        goto repeat;
                }

                /* Wait for the operation carried out in parallel with us. */
                for (;;) {
                        prepare_to_wait(&dev->power.wait_queue, &wait,
                                        TASK_UNINTERRUPTIBLE);
                        if (dev->power.runtime_status != RPM_RESUMING &&
                            dev->power.runtime_status != RPM_SUSPENDING)
                                break;

                        spin_unlock_irq(&dev->power.lock);

                        schedule();

                        spin_lock_irq(&dev->power.lock);
                }
                finish_wait(&dev->power.wait_queue, &wait);
                goto repeat;
        }

        /*
         * See if we can skip waking up the parent.  This is safe only if
         * power.no_callbacks is set, because otherwise we don't know whether
         * the resume will actually succeed.
         */
        if (dev->power.no_callbacks && !parent && dev->parent) {
                spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING);
                if (dev->parent->power.disable_depth > 0 ||
                    dev->parent->power.ignore_children ||
                    dev->parent->power.runtime_status == RPM_ACTIVE) {
                        atomic_inc(&dev->parent->power.child_count);
                        spin_unlock(&dev->parent->power.lock);
                        retval = 1;
                        goto no_callback;        /* Assume success. */
                }
                spin_unlock(&dev->parent->power.lock);
        }

        /* Carry out an asynchronous or a synchronous resume. */
        if (rpmflags & RPM_ASYNC) {
                dev->power.request = RPM_REQ_RESUME;
                if (!dev->power.request_pending) {
                        dev->power.request_pending = true;
                        queue_work(pm_wq, &dev->power.work);
                }
                retval = 0;
                goto out;
        }

        if (!parent && dev->parent) {
                /*
                 * Increment the parent's usage counter and resume it if
                 * necessary.  Not needed if dev is irq-safe; then the
                 * parent is permanently resumed.
                 */
                parent = dev->parent;
                if (dev->power.irq_safe)
                        goto skip_parent;

                spin_unlock(&dev->power.lock);

                pm_runtime_get_noresume(parent);

                spin_lock(&parent->power.lock);
                /*
                 * Resume the parent if it has runtime PM enabled and not been
                 * set to ignore its children.
                 */
                if (!parent->power.disable_depth &&
                    !parent->power.ignore_children) {
                        rpm_resume(parent, 0);
                        if (parent->power.runtime_status != RPM_ACTIVE)
                                retval = -EBUSY;
                }
                spin_unlock(&parent->power.lock);

                spin_lock(&dev->power.lock);
                if (retval)
                        goto out;

                goto repeat;
        }
 skip_parent:

        if (dev->power.no_callbacks)
                goto no_callback;        /* Assume success. */

        __update_runtime_status(dev, RPM_RESUMING);

        callback = RPM_GET_CALLBACK(dev, runtime_resume);

        dev_pm_disable_wake_irq_check(dev, false);
        retval = rpm_callback(callback, dev);
        if (retval) {
                __update_runtime_status(dev, RPM_SUSPENDED);
                pm_runtime_cancel_pending(dev);
                dev_pm_enable_wake_irq_check(dev, false);
        } else {
 no_callback:
                __update_runtime_status(dev, RPM_ACTIVE);
                pm_runtime_mark_last_busy(dev);
                if (parent)
                        atomic_inc(&parent->power.child_count);
        }
        wake_up_all(&dev->power.wait_queue);

        if (retval >= 0)
                rpm_idle(dev, RPM_ASYNC);

 out:
        if (parent && !dev->power.irq_safe) {
                spin_unlock_irq(&dev->power.lock);

                pm_runtime_put(parent);

                spin_lock_irq(&dev->power.lock);
        }

        trace_rpm_return_int(dev, _THIS_IP_, retval);

        return retval;
}

/**
 * pm_runtime_work - Universal runtime PM work function.
 * @work: Work structure used for scheduling the execution of this function.
 *
 * Use @work to get the device object the work is to be done for, determine what
 * is to be done and execute the appropriate runtime PM function.
 */
static void pm_runtime_work(struct work_struct *work)
{
        struct device *dev = container_of(work, struct device, power.work);
        enum rpm_request req;

        spin_lock_irq(&dev->power.lock);

        if (!dev->power.request_pending)
                goto out;

        req = dev->power.request;
        dev->power.request = RPM_REQ_NONE;
        dev->power.request_pending = false;

        switch (req) {
        case RPM_REQ_NONE:
                break;
        case RPM_REQ_IDLE:
                rpm_idle(dev, RPM_NOWAIT);
                break;
        case RPM_REQ_SUSPEND:
                rpm_suspend(dev, RPM_NOWAIT);
                break;
        case RPM_REQ_AUTOSUSPEND:
                rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
                break;
        case RPM_REQ_RESUME:
                rpm_resume(dev, RPM_NOWAIT);
                break;
        }

 out:
        spin_unlock_irq(&dev->power.lock);
}

/**
 * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
 * @timer: hrtimer used by pm_schedule_suspend().
 *
 * Check if the time is right and queue a suspend request.
 */
static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
{
        struct device *dev = container_of(timer, struct device, power.suspend_timer);
        unsigned long flags;
        u64 expires;

        spin_lock_irqsave(&dev->power.lock, flags);

        expires = dev->power.timer_expires;
        /*
         * If 'expires' is after the current time, we've been called
         * too early.
         */
        if (expires > 0 && expires < ktime_get_mono_fast_ns()) {
                dev->power.timer_expires = 0;
                rpm_suspend(dev, dev->power.timer_autosuspends ?
                    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
        }

        spin_unlock_irqrestore(&dev->power.lock, flags);

        return HRTIMER_NORESTART;
}

/**
 * pm_schedule_suspend - Set up a timer to submit a suspend request in future.
 * @dev: Device to suspend.
 * @delay: Time to wait before submitting a suspend request, in milliseconds.
 */
int pm_schedule_suspend(struct device *dev, unsigned int delay)
{
        unsigned long flags;
        u64 expires;
        int retval;

        spin_lock_irqsave(&dev->power.lock, flags);

        if (!delay) {
                retval = rpm_suspend(dev, RPM_ASYNC);
                goto out;
        }

        retval = rpm_check_suspend_allowed(dev);
        if (retval)
                goto out;

        /* Other scheduled or pending requests need to be canceled. */
        pm_runtime_cancel_pending(dev);

        expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC;
        dev->power.timer_expires = expires;
        dev->power.timer_autosuspends = 0;
        hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS);

 out:
        spin_unlock_irqrestore(&dev->power.lock, flags);

        return retval;
}
EXPORT_SYMBOL_GPL(pm_schedule_suspend);

static int rpm_drop_usage_count(struct device *dev)
{
        int ret;

        ret = atomic_sub_return(1, &dev->power.usage_count);
        if (ret >= 0)
                return ret;

        /*
         * Because rpm_resume() does not check the usage counter, it will resume
         * the device even if the usage counter is 0 or negative, so it is
         * sufficient to increment the usage counter here to reverse the change
         * made above.
         */
        atomic_inc(&dev->power.usage_count);
        dev_warn(dev, "Runtime PM usage count underflow!\n");
        return -EINVAL;
}

/**
 * __pm_runtime_idle - Entry point for runtime idle operations.
 * @dev: Device to send idle notification for.
 * @rpmflags: Flag bits.
 *
 * If the RPM_GET_PUT flag is set, decrement the device's usage count and
 * return immediately if it is larger than zero (if it becomes negative, log a
 * warning, increment it, and return an error).  Then carry out an idle
 * notification, either synchronous or asynchronous.
 *
 * This routine may be called in atomic context if the RPM_ASYNC flag is set,
 * or if pm_runtime_irq_safe() has been called.
 */
int __pm_runtime_idle(struct device *dev, int rpmflags)
{
        unsigned long flags;
        int retval;

        if (rpmflags & RPM_GET_PUT) {
                retval = rpm_drop_usage_count(dev);
                if (retval < 0) {
                        return retval;
                } else if (retval > 0) {
                        trace_rpm_usage(dev, rpmflags);
                        return 0;
                }
        }

        might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);

        spin_lock_irqsave(&dev->power.lock, flags);
        retval = rpm_idle(dev, rpmflags);
        spin_unlock_irqrestore(&dev->power.lock, flags);

        return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_idle);

/**
 * __pm_runtime_suspend - Entry point for runtime put/suspend operations.
 * @dev: Device to suspend.
 * @rpmflags: Flag bits.
 *
 * If the RPM_GET_PUT flag is set, decrement the device's usage count and
 * return immediately if it is larger than zero (if it becomes negative, log a
 * warning, increment it, and return an error).  Then carry out a suspend,
 * either synchronous or asynchronous.
 *
 * This routine may be called in atomic context if the RPM_ASYNC flag is set,
 * or if pm_runtime_irq_safe() has been called.
 */
int __pm_runtime_suspend(struct device *dev, int rpmflags)
{
        unsigned long flags;
        int retval;

        if (rpmflags & RPM_GET_PUT) {
                retval = rpm_drop_usage_count(dev);
                if (retval < 0) {
                        return retval;
                } else if (retval > 0) {
                        trace_rpm_usage(dev, rpmflags);
                        return 0;
                }
        }

        might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);

        spin_lock_irqsave(&dev->power.lock, flags);
        retval = rpm_suspend(dev, rpmflags);
        spin_unlock_irqrestore(&dev->power.lock, flags);

        return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_suspend);

/**
 * __pm_runtime_resume - Entry point for runtime resume operations.
 * @dev: Device to resume.
 * @rpmflags: Flag bits.
 *
 * If the RPM_GET_PUT flag is set, increment the device's usage count.  Then
 * carry out a resume, either synchronous or asynchronous.
 *
 * This routine may be called in atomic context if the RPM_ASYNC flag is set,
 * or if pm_runtime_irq_safe() has been called.
 */
int __pm_runtime_resume(struct device *dev, int rpmflags)
{
        unsigned long flags;
        int retval;

        might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
                        dev->power.runtime_status != RPM_ACTIVE);

        if (rpmflags & RPM_GET_PUT)
                atomic_inc(&dev->power.usage_count);

        spin_lock_irqsave(&dev->power.lock, flags);
        retval = rpm_resume(dev, rpmflags);
        spin_unlock_irqrestore(&dev->power.lock, flags);

        return retval;
}
EXPORT_SYMBOL_GPL(__pm_runtime_resume);

/**
 * pm_runtime_get_conditional - Conditionally bump up device usage counter.
 * @dev: Device to handle.
 * @ign_usage_count: Whether or not to look at the current usage counter value.
 *
 * Return -EINVAL if runtime PM is disabled for @dev.
 *
 * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
 * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
 * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
 * without changing the usage counter.
 *
 * If @ign_usage_count is %true, this function can be used to prevent suspending
 * the device when its runtime PM status is %RPM_ACTIVE.
 *
 * If @ign_usage_count is %false, this function can be used to prevent
 * suspending the device when both its runtime PM status is %RPM_ACTIVE and its
 * runtime PM usage counter is not zero.
 *
 * The caller is responsible for decrementing the runtime PM usage counter of
 * @dev after this function has returned a positive value for it.
 */
static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
{
        unsigned long flags;
        int retval;

        spin_lock_irqsave(&dev->power.lock, flags);
        if (dev->power.disable_depth > 0) {
                retval = -EINVAL;
        } else if (dev->power.runtime_status != RPM_ACTIVE) {
                retval = 0;
        } else if (ign_usage_count) {
                retval = 1;
                atomic_inc(&dev->power.usage_count);
        } else {
                retval = atomic_inc_not_zero(&dev->power.usage_count);
        }
        trace_rpm_usage(dev, 0);
        spin_unlock_irqrestore(&dev->power.lock, flags);

        return retval;
}

/**
 * pm_runtime_get_if_active - Bump up runtime PM usage counter if the device is
 *                              in active state
 * @dev: Target device.
 *
 * Increment the runtime PM usage counter of @dev if its runtime PM status is
 * %RPM_ACTIVE, in which case it returns 1. If the device is in a different
 * state, 0 is returned. -EINVAL is returned if runtime PM is disabled for the
 * device, in which case also the usage_count will remain unmodified.
 */
int pm_runtime_get_if_active(struct device *dev)
{
        return pm_runtime_get_conditional(dev, true);
}
EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);

/**
 * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
 * @dev: Target device.
 *
 * Increment the runtime PM usage counter of @dev if its runtime PM status is
 * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
 * it returns 1. If the device is in a different state or its usage_count is 0,
 * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
 * in which case also the usage_count will remain unmodified.
 */
int pm_runtime_get_if_in_use(struct device *dev)
{
        return pm_runtime_get_conditional(dev, false);
}
EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use);

/**
 * __pm_runtime_set_status - Set runtime PM status of a device.
 * @dev: Device to handle.
 * @status: New runtime PM status of the device.
 *
 * If runtime PM of the device is disabled or its power.runtime_error field is
 * different from zero, the status may be changed either to RPM_ACTIVE, or to
 * RPM_SUSPENDED, as long as that reflects the actual state of the device.
 * However, if the device has a parent and the parent is not active, and the
 * parent's power.ignore_children flag is unset, the device's status cannot be
 * set to RPM_ACTIVE, so -EBUSY is returned in that case.
 *
 * If successful, __pm_runtime_set_status() clears the power.runtime_error field
 * and the device parent's counter of unsuspended children is modified to
 * reflect the new status.  If the new status is RPM_SUSPENDED, an idle
 * notification request for the parent is submitted.
 *
 * If @dev has any suppliers (as reflected by device links to them), and @status
 * is RPM_ACTIVE, they will be activated upfront and if the activation of one
 * of them fails, the status of @dev will be changed to RPM_SUSPENDED (instead
 * of the @status value) and the suppliers will be deacticated on exit.  The
 * error returned by the failing supplier activation will be returned in that
 * case.
 */
int __pm_runtime_set_status(struct device *dev, unsigned int status)
{
        struct device *parent = dev->parent;
        bool notify_parent = false;
        unsigned long flags;
        int error = 0;

        if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
                return -EINVAL;

        spin_lock_irqsave(&dev->power.lock, flags);

        /*
         * Prevent PM-runtime from being enabled for the device or return an
         * error if it is enabled already and working.
         */
        if (dev->power.runtime_error || dev->power.disable_depth)
                dev->power.disable_depth++;
        else
                error = -EAGAIN;

        spin_unlock_irqrestore(&dev->power.lock, flags);

        if (error)
                return error;

        /*
         * If the new status is RPM_ACTIVE, the suppliers can be activated
         * upfront regardless of the current status, because next time
         * rpm_put_suppliers() runs, the rpm_active refcounts of the links
         * involved will be dropped down to one anyway.
         */
        if (status == RPM_ACTIVE) {
                int idx = device_links_read_lock();

                error = rpm_get_suppliers(dev);
                if (error)
                        status = RPM_SUSPENDED;

                device_links_read_unlock(idx);
        }

        spin_lock_irqsave(&dev->power.lock, flags);

        if (dev->power.runtime_status == status || !parent)
                goto out_set;

        if (status == RPM_SUSPENDED) {
                atomic_add_unless(&parent->power.child_count, -1, 0);
                notify_parent = !parent->power.ignore_children;
        } else {
                spin_lock_nested(&parent->power.lock, SINGLE_DEPTH_NESTING);

                /*
                 * It is invalid to put an active child under a parent that is
                 * not active, has runtime PM enabled and the
                 * 'power.ignore_children' flag unset.
                 */
                if (!parent->power.disable_depth &&
                    !parent->power.ignore_children &&
                    parent->power.runtime_status != RPM_ACTIVE) {
                        dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n",
                                dev_name(dev),
                                dev_name(parent));
                        error = -EBUSY;
                } else if (dev->power.runtime_status == RPM_SUSPENDED) {
                        atomic_inc(&parent->power.child_count);
                }

                spin_unlock(&parent->power.lock);

                if (error) {
                        status = RPM_SUSPENDED;
                        goto out;
                }
        }

 out_set:
        __update_runtime_status(dev, status);
        if (!error)
                dev->power.runtime_error = 0;

 out:
        spin_unlock_irqrestore(&dev->power.lock, flags);

        if (notify_parent)
                pm_request_idle(parent);

        if (status == RPM_SUSPENDED) {
                int idx = device_links_read_lock();

                rpm_put_suppliers(dev);

                device_links_read_unlock(idx);
        }

        pm_runtime_enable(dev);

        return error;
}
EXPORT_SYMBOL_GPL(__pm_runtime_set_status);

/**
 * __pm_runtime_barrier - Cancel pending requests and wait for completions.
 * @dev: Device to handle.
 *
 * Flush all pending requests for the device from pm_wq and wait for all
 * runtime PM operations involving the device in progress to complete.
 *
 * Should be called under dev->power.lock with interrupts disabled.
 */
static void __pm_runtime_barrier(struct device *dev)
{
        pm_runtime_deactivate_timer(dev);

        if (dev->power.request_pending) {
                dev->power.request = RPM_REQ_NONE;
                spin_unlock_irq(&dev->power.lock);

                cancel_work_sync(&dev->power.work);

                spin_lock_irq(&dev->power.lock);
                dev->power.request_pending = false;
        }

        if (dev->power.runtime_status == RPM_SUSPENDING ||
            dev->power.runtime_status == RPM_RESUMING ||
            dev->power.idle_notification) {
                DEFINE_WAIT(wait);

                /* Suspend, wake-up or idle notification in progress. */
                for (;;) {
                        prepare_to_wait(&dev->power.wait_queue, &wait,
                                        TASK_UNINTERRUPTIBLE);
                        if (dev->power.runtime_status != RPM_SUSPENDING
                            && dev->power.runtime_status != RPM_RESUMING
                            && !dev->power.idle_notification)
                                break;
                        spin_unlock_irq(&dev->power.lock);

                        schedule();

                        spin_lock_irq(&dev->power.lock);
                }
                finish_wait(&dev->power.wait_queue, &wait);
        }
}

/**
 * pm_runtime_barrier - Flush pending requests and wait for completions.
 * @dev: Device to handle.
 *
 * Prevent the device from being suspended by incrementing its usage counter and
 * if there's a pending resume request for the device, wake the device up.
 * Next, make sure that all pending requests for the device have been flushed
 * from pm_wq and wait for all runtime PM operations involving the device in
 * progress to complete.
 *
 * Return value:
 * 1, if there was a resume request pending and the device had to be woken up,
 * 0, otherwise
 */
int pm_runtime_barrier(struct device *dev)
{
        int retval = 0;

        pm_runtime_get_noresume(dev);
        spin_lock_irq(&dev->power.lock);

        if (dev->power.request_pending
            && dev->power.request == RPM_REQ_RESUME) {
                rpm_resume(dev, 0);
                retval = 1;
        }

        __pm_runtime_barrier(dev);

        spin_unlock_irq(&dev->power.lock);
        pm_runtime_put_noidle(dev);

        return retval;
}
EXPORT_SYMBOL_GPL(pm_runtime_barrier);

/**
 * __pm_runtime_disable - Disable runtime PM of a device.
 * @dev: Device to handle.
 * @check_resume: If set, check if there's a resume request for the device.
 *
 * Increment power.disable_depth for the device and if it was zero previously,
 * cancel all pending runtime PM requests for the device and wait for all
 * operations in progress to complete.  The device can be either active or
 * suspended after its runtime PM has been disabled.
 *
 * If @check_resume is set and there's a resume request pending when
 * __pm_runtime_disable() is called and power.disable_depth is zero, the
 * function will wake up the device before disabling its runtime PM.
 */
void __pm_runtime_disable(struct device *dev, bool check_resume)
{
        spin_lock_irq(&dev->power.lock);

        if (dev->power.disable_depth > 0) {
                dev->power.disable_depth++;
                goto out;
        }

        /*
         * Wake up the device if there's a resume request pending, because that
         * means there probably is some I/O to process and disabling runtime PM
         * shouldn't prevent the device from processing the I/O.
         */
        if (check_resume && dev->power.request_pending &&
            dev->power.request == RPM_REQ_RESUME) {
                /*
                 * Prevent suspends and idle notifications from being carried
                 * out after we have woken up the device.
                 */
                pm_runtime_get_noresume(dev);

                rpm_resume(dev, 0);

                pm_runtime_put_noidle(dev);
        }

        /* Update time accounting before disabling PM-runtime. */
        update_pm_runtime_accounting(dev);

        if (!dev->power.disable_depth++) {
                __pm_runtime_barrier(dev);
                dev->power.last_status = dev->power.runtime_status;
        }

 out:
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(__pm_runtime_disable);

/**
 * pm_runtime_enable - Enable runtime PM of a device.
 * @dev: Device to handle.
 */
void pm_runtime_enable(struct device *dev)
{
        unsigned long flags;

        spin_lock_irqsave(&dev->power.lock, flags);

        if (!dev->power.disable_depth) {
                dev_warn(dev, "Unbalanced %s!\n", __func__);
                goto out;
        }

        if (--dev->power.disable_depth > 0)
                goto out;

        dev->power.last_status = RPM_INVALID;
        dev->power.accounting_timestamp = ktime_get_mono_fast_ns();

        if (dev->power.runtime_status == RPM_SUSPENDED &&
            !dev->power.ignore_children &&
            atomic_read(&dev->power.child_count) > 0)
                dev_warn(dev, "Enabling runtime PM for inactive device with active children\n");

out:
        spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_runtime_enable);

static void pm_runtime_disable_action(void *data)
{
        pm_runtime_dont_use_autosuspend(data);
        pm_runtime_disable(data);
}

/**
 * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable.
 *
 * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for
 * you at driver exit time if needed.
 *
 * @dev: Device to handle.
 */
int devm_pm_runtime_enable(struct device *dev)
{
        pm_runtime_enable(dev);

        return devm_add_action_or_reset(dev, pm_runtime_disable_action, dev);
}
EXPORT_SYMBOL_GPL(devm_pm_runtime_enable);

/**
 * pm_runtime_forbid - Block runtime PM of a device.
 * @dev: Device to handle.
 *
 * Increase the device's usage count and clear its power.runtime_auto flag,
 * so that it cannot be suspended at run time until pm_runtime_allow() is called
 * for it.
 */
void pm_runtime_forbid(struct device *dev)
{
        spin_lock_irq(&dev->power.lock);
        if (!dev->power.runtime_auto)
                goto out;

        dev->power.runtime_auto = false;
        atomic_inc(&dev->power.usage_count);
        rpm_resume(dev, 0);

 out:
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_forbid);

/**
 * pm_runtime_allow - Unblock runtime PM of a device.
 * @dev: Device to handle.
 *
 * Decrease the device's usage count and set its power.runtime_auto flag.
 */
void pm_runtime_allow(struct device *dev)
{
        int ret;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.runtime_auto)
                goto out;

        dev->power.runtime_auto = true;
        ret = rpm_drop_usage_count(dev);
        if (ret == 0)
                rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
        else if (ret > 0)
                trace_rpm_usage(dev, RPM_AUTO | RPM_ASYNC);

 out:
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_allow);

/**
 * pm_runtime_no_callbacks - Ignore runtime PM callbacks for a device.
 * @dev: Device to handle.
 *
 * Set the power.no_callbacks flag, which tells the PM core that this
 * device is power-managed through its parent and has no runtime PM
 * callbacks of its own.  The runtime sysfs attributes will be removed.
 */
void pm_runtime_no_callbacks(struct device *dev)
{
        spin_lock_irq(&dev->power.lock);
        dev->power.no_callbacks = 1;
        spin_unlock_irq(&dev->power.lock);
        if (device_is_registered(dev))
                rpm_sysfs_remove(dev);
}
EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);

/**
 * pm_runtime_irq_safe - Leave interrupts disabled during callbacks.
 * @dev: Device to handle
 *
 * Set the power.irq_safe flag, which tells the PM core that the
 * ->runtime_suspend() and ->runtime_resume() callbacks for this device should
 * always be invoked with the spinlock held and interrupts disabled.  It also
 * causes the parent's usage counter to be permanently incremented, preventing
 * the parent from runtime suspending -- otherwise an irq-safe child might have
 * to wait for a non-irq-safe parent.
 */
void pm_runtime_irq_safe(struct device *dev)
{
        if (dev->parent)
                pm_runtime_get_sync(dev->parent);

        spin_lock_irq(&dev->power.lock);
        dev->power.irq_safe = 1;
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_irq_safe);

/**
 * update_autosuspend - Handle a change to a device's autosuspend settings.
 * @dev: Device to handle.
 * @old_delay: The former autosuspend_delay value.
 * @old_use: The former use_autosuspend value.
 *
 * Prevent runtime suspend if the new delay is negative and use_autosuspend is
 * set; otherwise allow it.  Send an idle notification if suspends are allowed.
 *
 * This function must be called under dev->power.lock with interrupts disabled.
 */
static void update_autosuspend(struct device *dev, int old_delay, int old_use)
{
        int delay = dev->power.autosuspend_delay;

        /* Should runtime suspend be prevented now? */
        if (dev->power.use_autosuspend && delay < 0) {

                /* If it used to be allowed then prevent it. */
                if (!old_use || old_delay >= 0) {
                        atomic_inc(&dev->power.usage_count);
                        rpm_resume(dev, 0);
                } else {
                        trace_rpm_usage(dev, 0);
                }
        }

        /* Runtime suspend should be allowed now. */
        else {

                /* If it used to be prevented then allow it. */
                if (old_use && old_delay < 0)
                        atomic_dec(&dev->power.usage_count);

                /* Maybe we can autosuspend now. */
                rpm_idle(dev, RPM_AUTO);
        }
}

/**
 * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value.
 * @dev: Device to handle.
 * @delay: Value of the new delay in milliseconds.
 *
 * Set the device's power.autosuspend_delay value.  If it changes to negative
 * and the power.use_autosuspend flag is set, prevent runtime suspends.  If it
 * changes the other way, allow runtime suspends.
 */
void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
{
        int old_delay, old_use;

        spin_lock_irq(&dev->power.lock);
        old_delay = dev->power.autosuspend_delay;
        old_use = dev->power.use_autosuspend;
        dev->power.autosuspend_delay = delay;
        update_autosuspend(dev, old_delay, old_use);
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);

/**
 * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag.
 * @dev: Device to handle.
 * @use: New value for use_autosuspend.
 *
 * Set the device's power.use_autosuspend flag, and allow or prevent runtime
 * suspends as needed.
 */
void __pm_runtime_use_autosuspend(struct device *dev, bool use)
{
        int old_delay, old_use;

        spin_lock_irq(&dev->power.lock);
        old_delay = dev->power.autosuspend_delay;
        old_use = dev->power.use_autosuspend;
        dev->power.use_autosuspend = use;
        update_autosuspend(dev, old_delay, old_use);
        spin_unlock_irq(&dev->power.lock);
}
EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);

/**
 * pm_runtime_init - Initialize runtime PM fields in given device object.
 * @dev: Device object to initialize.
 */
void pm_runtime_init(struct device *dev)
{
        dev->power.runtime_status = RPM_SUSPENDED;
        dev->power.last_status = RPM_INVALID;
        dev->power.idle_notification = false;

        dev->power.disable_depth = 1;
        atomic_set(&dev->power.usage_count, 0);

        dev->power.runtime_error = 0;

        atomic_set(&dev->power.child_count, 0);
        pm_suspend_ignore_children(dev, false);
        dev->power.runtime_auto = true;

        dev->power.request_pending = false;
        dev->power.request = RPM_REQ_NONE;
        dev->power.deferred_resume = false;
        dev->power.needs_force_resume = 0;
        INIT_WORK(&dev->power.work, pm_runtime_work);

        dev->power.timer_expires = 0;
        hrtimer_init(&dev->power.suspend_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        dev->power.suspend_timer.function = pm_suspend_timer_fn;

        init_waitqueue_head(&dev->power.wait_queue);
}

/**
 * pm_runtime_reinit - Re-initialize runtime PM fields in given device object.
 * @dev: Device object to re-initialize.
 */
void pm_runtime_reinit(struct device *dev)
{
        if (!pm_runtime_enabled(dev)) {
                if (dev->power.runtime_status == RPM_ACTIVE)
                        pm_runtime_set_suspended(dev);
                if (dev->power.irq_safe) {
                        spin_lock_irq(&dev->power.lock);
                        dev->power.irq_safe = 0;
                        spin_unlock_irq(&dev->power.lock);
                        if (dev->parent)
                                pm_runtime_put(dev->parent);
                }
        }
}

/**
 * pm_runtime_remove - Prepare for removing a device from device hierarchy.
 * @dev: Device object being removed from device hierarchy.
 */
void pm_runtime_remove(struct device *dev)
{
        __pm_runtime_disable(dev, false);
        pm_runtime_reinit(dev);
}

/**
 * pm_runtime_get_suppliers - Resume and reference-count supplier devices.
 * @dev: Consumer device.
 */
void pm_runtime_get_suppliers(struct device *dev)
{
        struct device_link *link;
        int idx;

        idx = device_links_read_lock();

        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
                                device_links_read_lock_held())
                if (link->flags & DL_FLAG_PM_RUNTIME) {
                        link->supplier_preactivated = true;
                        pm_runtime_get_sync(link->supplier);
                }

        device_links_read_unlock(idx);
}

/**
 * pm_runtime_put_suppliers - Drop references to supplier devices.
 * @dev: Consumer device.
 */
void pm_runtime_put_suppliers(struct device *dev)
{
        struct device_link *link;
        int idx;

        idx = device_links_read_lock();

        list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
                                device_links_read_lock_held())
                if (link->supplier_preactivated) {
                        link->supplier_preactivated = false;
                        pm_runtime_put(link->supplier);
                }

        device_links_read_unlock(idx);
}

void pm_runtime_new_link(struct device *dev)
{
        spin_lock_irq(&dev->power.lock);
        dev->power.links_count++;
        spin_unlock_irq(&dev->power.lock);
}

static void pm_runtime_drop_link_count(struct device *dev)
{
        spin_lock_irq(&dev->power.lock);
        WARN_ON(dev->power.links_count == 0);
        dev->power.links_count--;
        spin_unlock_irq(&dev->power.lock);
}

/**
 * pm_runtime_drop_link - Prepare for device link removal.
 * @link: Device link going away.
 *
 * Drop the link count of the consumer end of @link and decrement the supplier
 * device's runtime PM usage counter as many times as needed to drop all of the
 * PM runtime reference to it from the consumer.
 */
void pm_runtime_drop_link(struct device_link *link)
{
        if (!(link->flags & DL_FLAG_PM_RUNTIME))
                return;

        pm_runtime_drop_link_count(link->consumer);
        pm_runtime_release_supplier(link);
        pm_request_idle(link->supplier);
}

static bool pm_runtime_need_not_resume(struct device *dev)
{
        return atomic_read(&dev->power.usage_count) <= 1 &&
                (atomic_read(&dev->power.child_count) == 0 ||
                 dev->power.ignore_children);
}

/**
 * pm_runtime_force_suspend - Force a device into suspend state if needed.
 * @dev: Device to suspend.
 *
 * Disable runtime PM so we safely can check the device's runtime PM status and
 * if it is active, invoke its ->runtime_suspend callback to suspend it and
 * change its runtime PM status field to RPM_SUSPENDED.  Also, if the device's
 * usage and children counters don't indicate that the device was in use before
 * the system-wide transition under way, decrement its parent's children counter
 * (if there is a parent).  Keep runtime PM disabled to preserve the state
 * unless we encounter errors.
 *
 * Typically this function may be invoked from a system suspend callback to make
 * sure the device is put into low power state and it should only be used during
 * system-wide PM transitions to sleep states.  It assumes that the analogous
 * pm_runtime_force_resume() will be used to resume the device.
 *
 * Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent
 * state where this function has called the ->runtime_suspend callback but the
 * PM core marks the driver as runtime active.
 */
int pm_runtime_force_suspend(struct device *dev)
{
        int (*callback)(struct device *);
        int ret;

        pm_runtime_disable(dev);
        if (pm_runtime_status_suspended(dev))
                return 0;

        callback = RPM_GET_CALLBACK(dev, runtime_suspend);

        dev_pm_enable_wake_irq_check(dev, true);
        ret = callback ? callback(dev) : 0;
        if (ret)
                goto err;

        dev_pm_enable_wake_irq_complete(dev);

        /*
         * If the device can stay in suspend after the system-wide transition
         * to the working state that will follow, drop the children counter of
         * its parent, but set its status to RPM_SUSPENDED anyway in case this
         * function will be called again for it in the meantime.
         */
        if (pm_runtime_need_not_resume(dev)) {
                pm_runtime_set_suspended(dev);
        } else {
                __update_runtime_status(dev, RPM_SUSPENDED);
                dev->power.needs_force_resume = 1;
        }

        return 0;

err:
        dev_pm_disable_wake_irq_check(dev, true);
        pm_runtime_enable(dev);
        return ret;
}
EXPORT_SYMBOL_GPL(pm_runtime_force_suspend);

/**
 * pm_runtime_force_resume - Force a device into resume state if needed.
 * @dev: Device to resume.
 *
 * Prior invoking this function we expect the user to have brought the device
 * into low power state by a call to pm_runtime_force_suspend(). Here we reverse
 * those actions and bring the device into full power, if it is expected to be
 * used on system resume.  In the other case, we defer the resume to be managed
 * via runtime PM.
 *
 * Typically this function may be invoked from a system resume callback.
 */
int pm_runtime_force_resume(struct device *dev)
{
        int (*callback)(struct device *);
        int ret = 0;

        if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
                goto out;

        /*
         * The value of the parent's children counter is correct already, so
         * just update the status of the device.
         */
        __update_runtime_status(dev, RPM_ACTIVE);

        callback = RPM_GET_CALLBACK(dev, runtime_resume);

        dev_pm_disable_wake_irq_check(dev, false);
        ret = callback ? callback(dev) : 0;
        if (ret) {
                pm_runtime_set_suspended(dev);
                dev_pm_enable_wake_irq_check(dev, false);
                goto out;
        }

        pm_runtime_mark_last_busy(dev);
out:
        dev->power.needs_force_resume = 0;
        pm_runtime_enable(dev);
        return ret;
}
EXPORT_SYMBOL_GPL(pm_runtime_force_resume);




























































































































































    1 



    1 
    1 


















    3 







    1 


























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  ALSA sequencer device management
 *  Copyright (c) 1999 by Takashi Iwai <tiwai@suse.de>
 *
 *----------------------------------------------------------------
 *
 * This device handler separates the card driver module from sequencer
 * stuff (sequencer core, synth drivers, etc), so that user can avoid
 * to spend unnecessary resources e.g. if he needs only listening to
 * MP3s.
 *
 * The card (or lowlevel) driver creates a sequencer device entry
 * via snd_seq_device_new().  This is an entry pointer to communicate
 * with the sequencer device "driver", which is involved with the
 * actual part to communicate with the sequencer core.
 * Each sequencer device entry has an id string and the corresponding
 * driver with the same id is loaded when required.  For example,
 * lowlevel codes to access emu8000 chip on sbawe card are included in
 * emu8000-synth module.  To activate this module, the hardware
 * resources like i/o port are passed via snd_seq_device argument.
 */

#include <linux/device.h>
#include <linux/init.h>
#include <linux/module.h>
#include <sound/core.h>
#include <sound/info.h>
#include <sound/seq_device.h>
#include <sound/seq_kernel.h>
#include <sound/initval.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/mutex.h>

MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
MODULE_DESCRIPTION("ALSA sequencer device management");
MODULE_LICENSE("GPL");

/*
 * bus definition
 */
static int snd_seq_bus_match(struct device *dev, struct device_driver *drv)
{
        struct snd_seq_device *sdev = to_seq_dev(dev);
        struct snd_seq_driver *sdrv = to_seq_drv(drv);

        return strcmp(sdrv->id, sdev->id) == 0 &&
                sdrv->argsize == sdev->argsize;
}

static const struct bus_type snd_seq_bus_type = {
        .name = "snd_seq",
        .match = snd_seq_bus_match,
};

/*
 * proc interface -- just for compatibility
 */
#ifdef CONFIG_SND_PROC_FS
static struct snd_info_entry *info_entry;

static int print_dev_info(struct device *dev, void *data)
{
        struct snd_seq_device *sdev = to_seq_dev(dev);
        struct snd_info_buffer *buffer = data;

        snd_iprintf(buffer, "snd-%s,%s,%d\n", sdev->id,
                    dev->driver ? "loaded" : "empty",
                    dev->driver ? 1 : 0);
        return 0;
}

static void snd_seq_device_info(struct snd_info_entry *entry,
                                struct snd_info_buffer *buffer)
{
        bus_for_each_dev(&snd_seq_bus_type, NULL, buffer, print_dev_info);
}
#endif

/*
 * load all registered drivers (called from seq_clientmgr.c)
 */

#ifdef CONFIG_MODULES
/* flag to block auto-loading */
static atomic_t snd_seq_in_init = ATOMIC_INIT(1); /* blocked as default */

static int request_seq_drv(struct device *dev, void *data)
{
        struct snd_seq_device *sdev = to_seq_dev(dev);

        if (!dev->driver)
                request_module("snd-%s", sdev->id);
        return 0;
}

static void autoload_drivers(struct work_struct *work)
{
        /* avoid reentrance */
        if (atomic_inc_return(&snd_seq_in_init) == 1)
                bus_for_each_dev(&snd_seq_bus_type, NULL, NULL,
                                 request_seq_drv);
        atomic_dec(&snd_seq_in_init);
}

static DECLARE_WORK(autoload_work, autoload_drivers);

static void queue_autoload_drivers(void)
{
        schedule_work(&autoload_work);
}

void snd_seq_autoload_init(void)
{
        atomic_dec(&snd_seq_in_init);
#ifdef CONFIG_SND_SEQUENCER_MODULE
        /* initial autoload only when snd-seq is a module */
        queue_autoload_drivers();
#endif
}
EXPORT_SYMBOL(snd_seq_autoload_init);

void snd_seq_autoload_exit(void)
{
        atomic_inc(&snd_seq_in_init);
}
EXPORT_SYMBOL(snd_seq_autoload_exit);

void snd_seq_device_load_drivers(void)
{
        queue_autoload_drivers();
        flush_work(&autoload_work);
}
EXPORT_SYMBOL(snd_seq_device_load_drivers);

static inline void cancel_autoload_drivers(void)
{
        cancel_work_sync(&autoload_work);
}
#else
static inline void queue_autoload_drivers(void)
{
}

static inline void cancel_autoload_drivers(void)
{
}
#endif

/*
 * device management
 */
static int snd_seq_device_dev_free(struct snd_device *device)
{
        struct snd_seq_device *dev = device->device_data;

        cancel_autoload_drivers();
        if (dev->private_free)
                dev->private_free(dev);
        put_device(&dev->dev);
        return 0;
}

static int snd_seq_device_dev_register(struct snd_device *device)
{
        struct snd_seq_device *dev = device->device_data;
        int err;

        err = device_add(&dev->dev);
        if (err < 0)
                return err;
        if (!dev->dev.driver)
                queue_autoload_drivers();
        return 0;
}

static int snd_seq_device_dev_disconnect(struct snd_device *device)
{
        struct snd_seq_device *dev = device->device_data;

        device_del(&dev->dev);
        return 0;
}

static void snd_seq_dev_release(struct device *dev)
{
        kfree(to_seq_dev(dev));
}

/*
 * register a sequencer device
 * card = card info
 * device = device number (if any)
 * id = id of driver
 * result = return pointer (NULL allowed if unnecessary)
 */
int snd_seq_device_new(struct snd_card *card, int device, const char *id,
                       int argsize, struct snd_seq_device **result)
{
        struct snd_seq_device *dev;
        int err;
        static const struct snd_device_ops dops = {
                .dev_free = snd_seq_device_dev_free,
                .dev_register = snd_seq_device_dev_register,
                .dev_disconnect = snd_seq_device_dev_disconnect,
        };

        if (result)
                *result = NULL;

        if (snd_BUG_ON(!id))
                return -EINVAL;

        dev = kzalloc(sizeof(*dev) + argsize, GFP_KERNEL);
        if (!dev)
                return -ENOMEM;

        /* set up device info */
        dev->card = card;
        dev->device = device;
        dev->id = id;
        dev->argsize = argsize;

        device_initialize(&dev->dev);
        dev->dev.parent = &card->card_dev;
        dev->dev.bus = &snd_seq_bus_type;
        dev->dev.release = snd_seq_dev_release;
        dev_set_name(&dev->dev, "%s-%d-%d", dev->id, card->number, device);

        /* add this device to the list */
        err = snd_device_new(card, SNDRV_DEV_SEQUENCER, dev, &dops);
        if (err < 0) {
                put_device(&dev->dev);
                return err;
        }
        
        if (result)
                *result = dev;

        return 0;
}
EXPORT_SYMBOL(snd_seq_device_new);

/*
 * driver registration
 */
int __snd_seq_driver_register(struct snd_seq_driver *drv, struct module *mod)
{
        if (WARN_ON(!drv->driver.name || !drv->id))
                return -EINVAL;
        drv->driver.bus = &snd_seq_bus_type;
        drv->driver.owner = mod;
        return driver_register(&drv->driver);
}
EXPORT_SYMBOL_GPL(__snd_seq_driver_register);

void snd_seq_driver_unregister(struct snd_seq_driver *drv)
{
        driver_unregister(&drv->driver);
}
EXPORT_SYMBOL_GPL(snd_seq_driver_unregister);

/*
 * module part
 */

static int __init seq_dev_proc_init(void)
{
#ifdef CONFIG_SND_PROC_FS
        info_entry = snd_info_create_module_entry(THIS_MODULE, "drivers",
                                                  snd_seq_root);
        if (info_entry == NULL)
                return -ENOMEM;
        info_entry->content = SNDRV_INFO_CONTENT_TEXT;
        info_entry->c.text.read = snd_seq_device_info;
        if (snd_info_register(info_entry) < 0) {
                snd_info_free_entry(info_entry);
                return -ENOMEM;
        }
#endif
        return 0;
}

static int __init alsa_seq_device_init(void)
{
        int err;

        err = bus_register(&snd_seq_bus_type);
        if (err < 0)
                return err;
        err = seq_dev_proc_init();
        if (err < 0)
                bus_unregister(&snd_seq_bus_type);
        return err;
}

static void __exit alsa_seq_device_exit(void)
{
#ifdef CONFIG_MODULES
        cancel_work_sync(&autoload_work);
#endif
#ifdef CONFIG_SND_PROC_FS
        snd_info_free_entry(info_entry);
#endif
        bus_unregister(&snd_seq_bus_type);
}

subsys_initcall(alsa_seq_device_init)
module_exit(alsa_seq_device_exit)































































































































































































































































































    1 




































































    1 




























































































































































































































































































































































































































































    1 






















































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
/* SPDX-License-Identifier: GPL-2.0-or-later
 *
 * Copyright (C) 2005 David Brownell
 */

#ifndef __LINUX_SPI_H
#define __LINUX_SPI_H

#include <linux/acpi.h>
#include <linux/bits.h>
#include <linux/completion.h>
#include <linux/device.h>
#include <linux/gpio/consumer.h>
#include <linux/kthread.h>
#include <linux/mod_devicetable.h>
#include <linux/overflow.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/u64_stats_sync.h>

#include <uapi/linux/spi/spi.h>

/* Max no. of CS supported per spi device */
#define SPI_CS_CNT_MAX 16

struct dma_chan;
struct software_node;
struct ptp_system_timestamp;
struct spi_controller;
struct spi_transfer;
struct spi_controller_mem_ops;
struct spi_controller_mem_caps;
struct spi_message;

/*
 * INTERFACES between SPI master-side drivers and SPI slave protocol handlers,
 * and SPI infrastructure.
 */
extern const struct bus_type spi_bus_type;

/**
 * struct spi_statistics - statistics for spi transfers
 * @syncp:         seqcount to protect members in this struct for per-cpu update
 *                 on 32-bit systems
 *
 * @messages:      number of spi-messages handled
 * @transfers:     number of spi_transfers handled
 * @errors:        number of errors during spi_transfer
 * @timedout:      number of timeouts during spi_transfer
 *
 * @spi_sync:      number of times spi_sync is used
 * @spi_sync_immediate:
 *                 number of times spi_sync is executed immediately
 *                 in calling context without queuing and scheduling
 * @spi_async:     number of times spi_async is used
 *
 * @bytes:         number of bytes transferred to/from device
 * @bytes_tx:      number of bytes sent to device
 * @bytes_rx:      number of bytes received from device
 *
 * @transfer_bytes_histo:
 *                 transfer bytes histogram
 *
 * @transfers_split_maxsize:
 *                 number of transfers that have been split because of
 *                 maxsize limit
 */
struct spi_statistics {
        struct u64_stats_sync        syncp;

        u64_stats_t                messages;
        u64_stats_t                transfers;
        u64_stats_t                errors;
        u64_stats_t                timedout;

        u64_stats_t                spi_sync;
        u64_stats_t                spi_sync_immediate;
        u64_stats_t                spi_async;

        u64_stats_t                bytes;
        u64_stats_t                bytes_rx;
        u64_stats_t                bytes_tx;

#define SPI_STATISTICS_HISTO_SIZE 17
        u64_stats_t        transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE];

        u64_stats_t        transfers_split_maxsize;
};

#define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count)                \
        do {                                                                \
                struct spi_statistics *__lstats;                        \
                get_cpu();                                                \
                __lstats = this_cpu_ptr(pcpu_stats);                        \
                u64_stats_update_begin(&__lstats->syncp);                \
                u64_stats_add(&__lstats->field, count);                        \
                u64_stats_update_end(&__lstats->syncp);                        \
                put_cpu();                                                \
        } while (0)

#define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field)                \
        do {                                                                \
                struct spi_statistics *__lstats;                        \
                get_cpu();                                                \
                __lstats = this_cpu_ptr(pcpu_stats);                        \
                u64_stats_update_begin(&__lstats->syncp);                \
                u64_stats_inc(&__lstats->field);                        \
                u64_stats_update_end(&__lstats->syncp);                        \
                put_cpu();                                                \
        } while (0)

/**
 * struct spi_delay - SPI delay information
 * @value: Value for the delay
 * @unit: Unit for the delay
 */
struct spi_delay {
#define SPI_DELAY_UNIT_USECS        0
#define SPI_DELAY_UNIT_NSECS        1
#define SPI_DELAY_UNIT_SCK        2
        u16        value;
        u8        unit;
};

extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer);
extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer);
extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
                                                  struct spi_transfer *xfer);

/**
 * struct spi_device - Controller side proxy for an SPI slave device
 * @dev: Driver model representation of the device.
 * @controller: SPI controller used with the device.
 * @max_speed_hz: Maximum clock rate to be used with this chip
 *        (on this board); may be changed by the device's driver.
 *        The spi_transfer.speed_hz can override this for each transfer.
 * @chip_select: Array of physical chipselect, spi->chipselect[i] gives
 *        the corresponding physical CS for logical CS i.
 * @mode: The spi mode defines how data is clocked out and in.
 *        This may be changed by the device's driver.
 *        The "active low" default for chipselect mode can be overridden
 *        (by specifying SPI_CS_HIGH) as can the "MSB first" default for
 *        each word in a transfer (by specifying SPI_LSB_FIRST).
 * @bits_per_word: Data transfers involve one or more words; word sizes
 *        like eight or 12 bits are common.  In-memory wordsizes are
 *        powers of two bytes (e.g. 20 bit samples use 32 bits).
 *        This may be changed by the device's driver, or left at the
 *        default (0) indicating protocol words are eight bit bytes.
 *        The spi_transfer.bits_per_word can override this for each transfer.
 * @rt: Make the pump thread real time priority.
 * @irq: Negative, or the number passed to request_irq() to receive
 *        interrupts from this device.
 * @controller_state: Controller's runtime state
 * @controller_data: Board-specific definitions for controller, such as
 *        FIFO initialization parameters; from board_info.controller_data
 * @modalias: Name of the driver to use with this device, or an alias
 *        for that name.  This appears in the sysfs "modalias" attribute
 *        for driver coldplugging, and in uevents used for hotplugging
 * @driver_override: If the name of a driver is written to this attribute, then
 *        the device will bind to the named driver and only the named driver.
 *        Do not set directly, because core frees it; use driver_set_override() to
 *        set or clear it.
 * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines
 *        (optional, NULL when not using a GPIO line)
 * @word_delay: delay to be inserted between consecutive
 *        words of a transfer
 * @cs_setup: delay to be introduced by the controller after CS is asserted
 * @cs_hold: delay to be introduced by the controller before CS is deasserted
 * @cs_inactive: delay to be introduced by the controller after CS is
 *        deasserted. If @cs_change_delay is used from @spi_transfer, then the
 *        two delays will be added up.
 * @pcpu_statistics: statistics for the spi_device
 * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array
 *
 * A @spi_device is used to interchange data between an SPI slave
 * (usually a discrete chip) and CPU memory.
 *
 * In @dev, the platform_data is used to hold information about this
 * device that's meaningful to the device's protocol driver, but not
 * to its controller.  One example might be an identifier for a chip
 * variant with slightly different functionality; another might be
 * information about how this particular board wires the chip's pins.
 */
struct spi_device {
        struct device                dev;
        struct spi_controller        *controller;
        u32                        max_speed_hz;
        u8                        chip_select[SPI_CS_CNT_MAX];
        u8                        bits_per_word;
        bool                        rt;
#define SPI_NO_TX                BIT(31)                /* No transmit wire */
#define SPI_NO_RX                BIT(30)                /* No receive wire */
        /*
         * TPM specification defines flow control over SPI. Client device
         * can insert a wait state on MISO when address is transmitted by
         * controller on MOSI. Detecting the wait state in software is only
         * possible for full duplex controllers. For controllers that support
         * only half-duplex, the wait state detection needs to be implemented
         * in hardware. TPM devices would set this flag when hardware flow
         * control is expected from SPI controller.
         */
#define SPI_TPM_HW_FLOW                BIT(29)                /* TPM HW flow control */
        /*
         * All bits defined above should be covered by SPI_MODE_KERNEL_MASK.
         * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart,
         * which is defined in 'include/uapi/linux/spi/spi.h'.
         * The bits defined here are from bit 31 downwards, while in
         * SPI_MODE_USER_MASK are from 0 upwards.
         * These bits must not overlap. A static assert check should make sure of that.
         * If adding extra bits, make sure to decrease the bit index below as well.
         */
#define SPI_MODE_KERNEL_MASK        (~(BIT(29) - 1))
        u32                        mode;
        int                        irq;
        void                        *controller_state;
        void                        *controller_data;
        char                        modalias[SPI_NAME_SIZE];
        const char                *driver_override;
        struct gpio_desc        *cs_gpiod[SPI_CS_CNT_MAX];        /* Chip select gpio desc */
        struct spi_delay        word_delay; /* Inter-word delay */
        /* CS delays */
        struct spi_delay        cs_setup;
        struct spi_delay        cs_hold;
        struct spi_delay        cs_inactive;

        /* The statistics */
        struct spi_statistics __percpu        *pcpu_statistics;

        /* Bit mask of the chipselect(s) that the driver need to use from
         * the chipselect array.When the controller is capable to handle
         * multiple chip selects & memories are connected in parallel
         * then more than one bit need to be set in cs_index_mask.
         */
        u32                        cs_index_mask : SPI_CS_CNT_MAX;

        /*
         * Likely need more hooks for more protocol options affecting how
         * the controller talks to each chip, like:
         *  - memory packing (12 bit samples into low bits, others zeroed)
         *  - priority
         *  - chipselect delays
         *  - ...
         */
};

/* Make sure that SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK don't overlap */
static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0,
              "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap");

static inline struct spi_device *to_spi_device(const struct device *dev)
{
        return dev ? container_of(dev, struct spi_device, dev) : NULL;
}

/* Most drivers won't need to care about device refcounting */
static inline struct spi_device *spi_dev_get(struct spi_device *spi)
{
        return (spi && get_device(&spi->dev)) ? spi : NULL;
}

static inline void spi_dev_put(struct spi_device *spi)
{
        if (spi)
                put_device(&spi->dev);
}

/* ctldata is for the bus_controller driver's runtime state */
static inline void *spi_get_ctldata(const struct spi_device *spi)
{
        return spi->controller_state;
}

static inline void spi_set_ctldata(struct spi_device *spi, void *state)
{
        spi->controller_state = state;
}

/* Device driver data */

static inline void spi_set_drvdata(struct spi_device *spi, void *data)
{
        dev_set_drvdata(&spi->dev, data);
}

static inline void *spi_get_drvdata(const struct spi_device *spi)
{
        return dev_get_drvdata(&spi->dev);
}

static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx)
{
        return spi->chip_select[idx];
}

static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect)
{
        spi->chip_select[idx] = chipselect;
}

static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx)
{
        return spi->cs_gpiod[idx];
}

static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod)
{
        spi->cs_gpiod[idx] = csgpiod;
}

static inline bool spi_is_csgpiod(struct spi_device *spi)
{
        u8 idx;

        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                if (spi_get_csgpiod(spi, idx))
                        return true;
        }
        return false;
}

/**
 * struct spi_driver - Host side "protocol" driver
 * @id_table: List of SPI devices supported by this driver
 * @probe: Binds this driver to the SPI device.  Drivers can verify
 *        that the device is actually present, and may need to configure
 *        characteristics (such as bits_per_word) which weren't needed for
 *        the initial configuration done during system setup.
 * @remove: Unbinds this driver from the SPI device
 * @shutdown: Standard shutdown callback used during system state
 *        transitions such as powerdown/halt and kexec
 * @driver: SPI device drivers should initialize the name and owner
 *        field of this structure.
 *
 * This represents the kind of device driver that uses SPI messages to
 * interact with the hardware at the other end of a SPI link.  It's called
 * a "protocol" driver because it works through messages rather than talking
 * directly to SPI hardware (which is what the underlying SPI controller
 * driver does to pass those messages).  These protocols are defined in the
 * specification for the device(s) supported by the driver.
 *
 * As a rule, those device protocols represent the lowest level interface
 * supported by a driver, and it will support upper level interfaces too.
 * Examples of such upper levels include frameworks like MTD, networking,
 * MMC, RTC, filesystem character device nodes, and hardware monitoring.
 */
struct spi_driver {
        const struct spi_device_id *id_table;
        int                        (*probe)(struct spi_device *spi);
        void                        (*remove)(struct spi_device *spi);
        void                        (*shutdown)(struct spi_device *spi);
        struct device_driver        driver;
};

static inline struct spi_driver *to_spi_driver(struct device_driver *drv)
{
        return drv ? container_of(drv, struct spi_driver, driver) : NULL;
}

extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv);

/**
 * spi_unregister_driver - reverse effect of spi_register_driver
 * @sdrv: the driver to unregister
 * Context: can sleep
 */
static inline void spi_unregister_driver(struct spi_driver *sdrv)
{
        if (sdrv)
                driver_unregister(&sdrv->driver);
}

extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select);

/* Use a define to avoid include chaining to get THIS_MODULE */
#define spi_register_driver(driver) \
        __spi_register_driver(THIS_MODULE, driver)

/**
 * module_spi_driver() - Helper macro for registering a SPI driver
 * @__spi_driver: spi_driver struct
 *
 * Helper macro for SPI drivers which do not do anything special in module
 * init/exit. This eliminates a lot of boilerplate. Each module may only
 * use this macro once, and calling it replaces module_init() and module_exit()
 */
#define module_spi_driver(__spi_driver) \
        module_driver(__spi_driver, spi_register_driver, \
                        spi_unregister_driver)

/**
 * struct spi_controller - interface to SPI master or slave controller
 * @dev: device interface to this driver
 * @list: link with the global spi_controller list
 * @bus_num: board-specific (and often SOC-specific) identifier for a
 *        given SPI controller.
 * @num_chipselect: chipselects are used to distinguish individual
 *        SPI slaves, and are numbered from zero to num_chipselects.
 *        each slave has a chipselect signal, but it's common that not
 *        every chipselect is connected to a slave.
 * @dma_alignment: SPI controller constraint on DMA buffers alignment.
 * @mode_bits: flags understood by this controller driver
 * @buswidth_override_bits: flags to override for this controller driver
 * @bits_per_word_mask: A mask indicating which values of bits_per_word are
 *        supported by the driver. Bit n indicates that a bits_per_word n+1 is
 *        supported. If set, the SPI core will reject any transfer with an
 *        unsupported bits_per_word. If not set, this value is simply ignored,
 *        and it's up to the individual driver to perform any validation.
 * @min_speed_hz: Lowest supported transfer speed
 * @max_speed_hz: Highest supported transfer speed
 * @flags: other constraints relevant to this driver
 * @slave: indicates that this is an SPI slave controller
 * @target: indicates that this is an SPI target controller
 * @devm_allocated: whether the allocation of this struct is devres-managed
 * @max_transfer_size: function that returns the max transfer size for
 *        a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
 * @max_message_size: function that returns the max message size for
 *        a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
 * @io_mutex: mutex for physical bus access
 * @add_lock: mutex to avoid adding devices to the same chipselect
 * @bus_lock_spinlock: spinlock for SPI bus locking
 * @bus_lock_mutex: mutex for exclusion of multiple callers
 * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
 * @setup: updates the device mode and clocking records used by a
 *        device's SPI controller; protocol code may call this.  This
 *        must fail if an unrecognized or unsupported mode is requested.
 *        It's always safe to call this unless transfers are pending on
 *        the device whose settings are being modified.
 * @set_cs_timing: optional hook for SPI devices to request SPI master
 * controller for configuring specific CS setup time, hold time and inactive
 * delay interms of clock counts
 * @transfer: adds a message to the controller's transfer queue.
 * @cleanup: frees controller-specific state
 * @can_dma: determine whether this controller supports DMA
 * @dma_map_dev: device which can be used for DMA mapping
 * @cur_rx_dma_dev: device which is currently used for RX DMA mapping
 * @cur_tx_dma_dev: device which is currently used for TX DMA mapping
 * @queued: whether this controller is providing an internal message queue
 * @kworker: pointer to thread struct for message pump
 * @pump_messages: work struct for scheduling work to the message pump
 * @queue_lock: spinlock to synchronise access to message queue
 * @queue: message queue
 * @cur_msg: the currently in-flight message
 * @cur_msg_completion: a completion for the current in-flight message
 * @cur_msg_incomplete: Flag used internally to opportunistically skip
 *        the @cur_msg_completion. This flag is used to check if the driver has
 *        already called spi_finalize_current_message().
 * @cur_msg_need_completion: Flag used internally to opportunistically skip
 *        the @cur_msg_completion. This flag is used to signal the context that
 *        is running spi_finalize_current_message() that it needs to complete()
 * @cur_msg_mapped: message has been mapped for DMA
 * @fallback: fallback to PIO if DMA transfer return failure with
 *        SPI_TRANS_FAIL_NO_START.
 * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
 * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip
 *           selected
 * @xfer_completion: used by core transfer_one_message()
 * @busy: message pump is busy
 * @running: message pump is running
 * @rt: whether this queue is set to run as a realtime task
 * @auto_runtime_pm: the core should ensure a runtime PM reference is held
 *                   while the hardware is prepared, using the parent
 *                   device for the spidev
 * @max_dma_len: Maximum length of a DMA transfer for the device.
 * @prepare_transfer_hardware: a message will soon arrive from the queue
 *        so the subsystem requests the driver to prepare the transfer hardware
 *        by issuing this call
 * @transfer_one_message: the subsystem calls the driver to transfer a single
 *        message while queuing transfers that arrive in the meantime. When the
 *        driver is finished with this message, it must call
 *        spi_finalize_current_message() so the subsystem can issue the next
 *        message
 * @unprepare_transfer_hardware: there are currently no more messages on the
 *        queue so the subsystem notifies the driver that it may relax the
 *        hardware by issuing this call
 *
 * @set_cs: set the logic level of the chip select line.  May be called
 *          from interrupt context.
 * @optimize_message: optimize the message for reuse
 * @unoptimize_message: release resources allocated by optimize_message
 * @prepare_message: set up the controller to transfer a single message,
 *                   for example doing DMA mapping.  Called from threaded
 *                   context.
 * @transfer_one: transfer a single spi_transfer.
 *
 *                  - return 0 if the transfer is finished,
 *                  - return 1 if the transfer is still in progress. When
 *                    the driver is finished with this transfer it must
 *                    call spi_finalize_current_transfer() so the subsystem
 *                    can issue the next transfer. If the transfer fails, the
 *                    driver must set the flag SPI_TRANS_FAIL_IO to
 *                    spi_transfer->error first, before calling
 *                    spi_finalize_current_transfer().
 *                    Note: transfer_one and transfer_one_message are mutually
 *                    exclusive; when both are set, the generic subsystem does
 *                    not call your transfer_one callback.
 * @handle_err: the subsystem calls the driver to handle an error that occurs
 *                in the generic implementation of transfer_one_message().
 * @mem_ops: optimized/dedicated operations for interactions with SPI memory.
 *             This field is optional and should only be implemented if the
 *             controller has native support for memory like operations.
 * @mem_caps: controller capabilities for the handling of memory operations.
 * @unprepare_message: undo any work done by prepare_message().
 * @slave_abort: abort the ongoing transfer request on an SPI slave controller
 * @target_abort: abort the ongoing transfer request on an SPI target controller
 * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
 *        number. Any individual value may be NULL for CS lines that
 *        are not GPIOs (driven by the SPI controller itself).
 * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
 *        GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have
 *        the cs_gpiod assigned if a GPIO line is found for the chipselect.
 * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will
 *        fill in this field with the first unused native CS, to be used by SPI
 *        controller drivers that need to drive a native CS when using GPIO CS.
 * @max_native_cs: When cs_gpiods is used, and this field is filled in,
 *        spi_register_controller() will validate all native CS (including the
 *        unused native CS) against this value.
 * @pcpu_statistics: statistics for the spi_controller
 * @dma_tx: DMA transmit channel
 * @dma_rx: DMA receive channel
 * @dummy_rx: dummy receive buffer for full-duplex devices
 * @dummy_tx: dummy transmit buffer for full-duplex devices
 * @fw_translate_cs: If the boot firmware uses different numbering scheme
 *        what Linux expects, this optional hook can be used to translate
 *        between the two.
 * @ptp_sts_supported: If the driver sets this to true, it must provide a
 *        time snapshot in @spi_transfer->ptp_sts as close as possible to the
 *        moment in time when @spi_transfer->ptp_sts_word_pre and
 *        @spi_transfer->ptp_sts_word_post were transmitted.
 *        If the driver does not set this, the SPI core takes the snapshot as
 *        close to the driver hand-over as possible.
 * @irq_flags: Interrupt enable state during PTP system timestamping
 * @queue_empty: signal green light for opportunistically skipping the queue
 *        for spi_sync transfers.
 * @must_async: disable all fast paths in the core
 *
 * Each SPI controller can communicate with one or more @spi_device
 * children.  These make a small bus, sharing MOSI, MISO and SCK signals
 * but not chip select signals.  Each device may be configured to use a
 * different clock rate, since those shared signals are ignored unless
 * the chip is selected.
 *
 * The driver for an SPI controller manages access to those devices through
 * a queue of spi_message transactions, copying data between CPU memory and
 * an SPI slave device.  For each such message it queues, it calls the
 * message's completion function when the transaction completes.
 */
struct spi_controller {
        struct device        dev;

        struct list_head list;

        /*
         * Other than negative (== assign one dynamically), bus_num is fully
         * board-specific. Usually that simplifies to being SoC-specific.
         * example: one SoC has three SPI controllers, numbered 0..2,
         * and one board's schematics might show it using SPI-2. Software
         * would normally use bus_num=2 for that controller.
         */
        s16                        bus_num;

        /*
         * Chipselects will be integral to many controllers; some others
         * might use board-specific GPIOs.
         */
        u16                        num_chipselect;

        /* Some SPI controllers pose alignment requirements on DMAable
         * buffers; let protocol drivers know about these requirements.
         */
        u16                        dma_alignment;

        /* spi_device.mode flags understood by this controller driver */
        u32                        mode_bits;

        /* spi_device.mode flags override flags for this controller */
        u32                        buswidth_override_bits;

        /* Bitmask of supported bits_per_word for transfers */
        u32                        bits_per_word_mask;
#define SPI_BPW_MASK(bits) BIT((bits) - 1)
#define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1)

        /* Limits on transfer speed */
        u32                        min_speed_hz;
        u32                        max_speed_hz;

        /* Other constraints relevant to this driver */
        u16                        flags;
#define SPI_CONTROLLER_HALF_DUPLEX        BIT(0)        /* Can't do full duplex */
#define SPI_CONTROLLER_NO_RX                BIT(1)        /* Can't do buffer read */
#define SPI_CONTROLLER_NO_TX                BIT(2)        /* Can't do buffer write */
#define SPI_CONTROLLER_MUST_RX                BIT(3)        /* Requires rx */
#define SPI_CONTROLLER_MUST_TX                BIT(4)        /* Requires tx */
#define SPI_CONTROLLER_GPIO_SS                BIT(5)        /* GPIO CS must select slave */
#define SPI_CONTROLLER_SUSPENDED        BIT(6)        /* Currently suspended */
        /*
         * The spi-controller has multi chip select capability and can
         * assert/de-assert more than one chip select at once.
         */
#define SPI_CONTROLLER_MULTI_CS                BIT(7)

        /* Flag indicating if the allocation of this struct is devres-managed */
        bool                        devm_allocated;

        union {
                /* Flag indicating this is an SPI slave controller */
                bool                        slave;
                /* Flag indicating this is an SPI target controller */
                bool                        target;
        };

        /*
         * On some hardware transfer / message size may be constrained
         * the limit may depend on device transfer settings.
         */
        size_t (*max_transfer_size)(struct spi_device *spi);
        size_t (*max_message_size)(struct spi_device *spi);

        /* I/O mutex */
        struct mutex                io_mutex;

        /* Used to avoid adding the same CS twice */
        struct mutex                add_lock;

        /* Lock and mutex for SPI bus locking */
        spinlock_t                bus_lock_spinlock;
        struct mutex                bus_lock_mutex;

        /* Flag indicating that the SPI bus is locked for exclusive use */
        bool                        bus_lock_flag;

        /*
         * Setup mode and clock, etc (SPI driver may call many times).
         *
         * IMPORTANT:  this may be called when transfers to another
         * device are active.  DO NOT UPDATE SHARED REGISTERS in ways
         * which could break those transfers.
         */
        int                        (*setup)(struct spi_device *spi);

        /*
         * set_cs_timing() method is for SPI controllers that supports
         * configuring CS timing.
         *
         * This hook allows SPI client drivers to request SPI controllers
         * to configure specific CS timing through spi_set_cs_timing() after
         * spi_setup().
         */
        int (*set_cs_timing)(struct spi_device *spi);

        /*
         * Bidirectional bulk transfers
         *
         * + The transfer() method may not sleep; its main role is
         *   just to add the message to the queue.
         * + For now there's no remove-from-queue operation, or
         *   any other request management
         * + To a given spi_device, message queueing is pure FIFO
         *
         * + The controller's main job is to process its message queue,
         *   selecting a chip (for masters), then transferring data
         * + If there are multiple spi_device children, the i/o queue
         *   arbitration algorithm is unspecified (round robin, FIFO,
         *   priority, reservations, preemption, etc)
         *
         * + Chipselect stays active during the entire message
         *   (unless modified by spi_transfer.cs_change != 0).
         * + The message transfers use clock and SPI mode parameters
         *   previously established by setup() for this device
         */
        int                        (*transfer)(struct spi_device *spi,
                                                struct spi_message *mesg);

        /* Called on release() to free memory provided by spi_controller */
        void                        (*cleanup)(struct spi_device *spi);

        /*
         * Used to enable core support for DMA handling, if can_dma()
         * exists and returns true then the transfer will be mapped
         * prior to transfer_one() being called.  The driver should
         * not modify or store xfer and dma_tx and dma_rx must be set
         * while the device is prepared.
         */
        bool                        (*can_dma)(struct spi_controller *ctlr,
                                           struct spi_device *spi,
                                           struct spi_transfer *xfer);
        struct device *dma_map_dev;
        struct device *cur_rx_dma_dev;
        struct device *cur_tx_dma_dev;

        /*
         * These hooks are for drivers that want to use the generic
         * controller transfer queueing mechanism. If these are used, the
         * transfer() function above must NOT be specified by the driver.
         * Over time we expect SPI drivers to be phased over to this API.
         */
        bool                                queued;
        struct kthread_worker                *kworker;
        struct kthread_work                pump_messages;
        spinlock_t                        queue_lock;
        struct list_head                queue;
        struct spi_message                *cur_msg;
        struct completion               cur_msg_completion;
        bool                                cur_msg_incomplete;
        bool                                cur_msg_need_completion;
        bool                                busy;
        bool                                running;
        bool                                rt;
        bool                                auto_runtime_pm;
        bool                                cur_msg_mapped;
        bool                            fallback;
        bool                                last_cs_mode_high;
        s8                                last_cs[SPI_CS_CNT_MAX];
        u32                                last_cs_index_mask : SPI_CS_CNT_MAX;
        struct completion               xfer_completion;
        size_t                                max_dma_len;

        int (*optimize_message)(struct spi_message *msg);
        int (*unoptimize_message)(struct spi_message *msg);
        int (*prepare_transfer_hardware)(struct spi_controller *ctlr);
        int (*transfer_one_message)(struct spi_controller *ctlr,
                                    struct spi_message *mesg);
        int (*unprepare_transfer_hardware)(struct spi_controller *ctlr);
        int (*prepare_message)(struct spi_controller *ctlr,
                               struct spi_message *message);
        int (*unprepare_message)(struct spi_controller *ctlr,
                                 struct spi_message *message);
        union {
                int (*slave_abort)(struct spi_controller *ctlr);
                int (*target_abort)(struct spi_controller *ctlr);
        };

        /*
         * These hooks are for drivers that use a generic implementation
         * of transfer_one_message() provided by the core.
         */
        void (*set_cs)(struct spi_device *spi, bool enable);
        int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi,
                            struct spi_transfer *transfer);
        void (*handle_err)(struct spi_controller *ctlr,
                           struct spi_message *message);

        /* Optimized handlers for SPI memory-like operations. */
        const struct spi_controller_mem_ops *mem_ops;
        const struct spi_controller_mem_caps *mem_caps;

        /* GPIO chip select */
        struct gpio_desc        **cs_gpiods;
        bool                        use_gpio_descriptors;
        s8                        unused_native_cs;
        s8                        max_native_cs;

        /* Statistics */
        struct spi_statistics __percpu        *pcpu_statistics;

        /* DMA channels for use with core dmaengine helpers */
        struct dma_chan                *dma_tx;
        struct dma_chan                *dma_rx;

        /* Dummy data for full duplex devices */
        void                        *dummy_rx;
        void                        *dummy_tx;

        int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs);

        /*
         * Driver sets this field to indicate it is able to snapshot SPI
         * transfers (needed e.g. for reading the time of POSIX clocks)
         */
        bool                        ptp_sts_supported;

        /* Interrupt enable state during PTP system timestamping */
        unsigned long                irq_flags;

        /* Flag for enabling opportunistic skipping of the queue in spi_sync */
        bool                        queue_empty;
        bool                        must_async;
};

static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
{
        return dev_get_drvdata(&ctlr->dev);
}

static inline void spi_controller_set_devdata(struct spi_controller *ctlr,
                                              void *data)
{
        dev_set_drvdata(&ctlr->dev, data);
}

static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr)
{
        if (!ctlr || !get_device(&ctlr->dev))
                return NULL;
        return ctlr;
}

static inline void spi_controller_put(struct spi_controller *ctlr)
{
        if (ctlr)
                put_device(&ctlr->dev);
}

static inline bool spi_controller_is_slave(struct spi_controller *ctlr)
{
        return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave;
}

static inline bool spi_controller_is_target(struct spi_controller *ctlr)
{
        return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target;
}

/* PM calls that need to be issued by the driver */
extern int spi_controller_suspend(struct spi_controller *ctlr);
extern int spi_controller_resume(struct spi_controller *ctlr);

/* Calls the driver make to interact with the message queue */
extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr);
extern void spi_finalize_current_message(struct spi_controller *ctlr);
extern void spi_finalize_current_transfer(struct spi_controller *ctlr);

/* Helper calls for driver to timestamp transfer */
void spi_take_timestamp_pre(struct spi_controller *ctlr,
                            struct spi_transfer *xfer,
                            size_t progress, bool irqs_off);
void spi_take_timestamp_post(struct spi_controller *ctlr,
                             struct spi_transfer *xfer,
                             size_t progress, bool irqs_off);

/* The SPI driver core manages memory for the spi_controller classdev */
extern struct spi_controller *__spi_alloc_controller(struct device *host,
                                                unsigned int size, bool slave);

static inline struct spi_controller *spi_alloc_master(struct device *host,
                                                      unsigned int size)
{
        return __spi_alloc_controller(host, size, false);
}

static inline struct spi_controller *spi_alloc_slave(struct device *host,
                                                     unsigned int size)
{
        if (!IS_ENABLED(CONFIG_SPI_SLAVE))
                return NULL;

        return __spi_alloc_controller(host, size, true);
}

static inline struct spi_controller *spi_alloc_host(struct device *dev,
                                                    unsigned int size)
{
        return __spi_alloc_controller(dev, size, false);
}

static inline struct spi_controller *spi_alloc_target(struct device *dev,
                                                      unsigned int size)
{
        if (!IS_ENABLED(CONFIG_SPI_SLAVE))
                return NULL;

        return __spi_alloc_controller(dev, size, true);
}

struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
                                                   unsigned int size,
                                                   bool slave);

static inline struct spi_controller *devm_spi_alloc_master(struct device *dev,
                                                           unsigned int size)
{
        return __devm_spi_alloc_controller(dev, size, false);
}

static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev,
                                                          unsigned int size)
{
        if (!IS_ENABLED(CONFIG_SPI_SLAVE))
                return NULL;

        return __devm_spi_alloc_controller(dev, size, true);
}

static inline struct spi_controller *devm_spi_alloc_host(struct device *dev,
                                                         unsigned int size)
{
        return __devm_spi_alloc_controller(dev, size, false);
}

static inline struct spi_controller *devm_spi_alloc_target(struct device *dev,
                                                           unsigned int size)
{
        if (!IS_ENABLED(CONFIG_SPI_SLAVE))
                return NULL;

        return __devm_spi_alloc_controller(dev, size, true);
}

extern int spi_register_controller(struct spi_controller *ctlr);
extern int devm_spi_register_controller(struct device *dev,
                                        struct spi_controller *ctlr);
extern void spi_unregister_controller(struct spi_controller *ctlr);

#if IS_ENABLED(CONFIG_ACPI)
extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev);
extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
                                                struct acpi_device *adev,
                                                int index);
int acpi_spi_count_resources(struct acpi_device *adev);
#endif

/*
 * SPI resource management while processing a SPI message
 */

typedef void (*spi_res_release_t)(struct spi_controller *ctlr,
                                  struct spi_message *msg,
                                  void *res);

/**
 * struct spi_res - SPI resource management structure
 * @entry:   list entry
 * @release: release code called prior to freeing this resource
 * @data:    extra data allocated for the specific use-case
 *
 * This is based on ideas from devres, but focused on life-cycle
 * management during spi_message processing.
 */
struct spi_res {
        struct list_head        entry;
        spi_res_release_t       release;
        unsigned long long      data[]; /* Guarantee ull alignment */
};

/*---------------------------------------------------------------------------*/

/*
 * I/O INTERFACE between SPI controller and protocol drivers
 *
 * Protocol drivers use a queue of spi_messages, each transferring data
 * between the controller and memory buffers.
 *
 * The spi_messages themselves consist of a series of read+write transfer
 * segments.  Those segments always read the same number of bits as they
 * write; but one or the other is easily ignored by passing a NULL buffer
 * pointer.  (This is unlike most types of I/O API, because SPI hardware
 * is full duplex.)
 *
 * NOTE:  Allocation of spi_transfer and spi_message memory is entirely
 * up to the protocol driver, which guarantees the integrity of both (as
 * well as the data buffers) for as long as the message is queued.
 */

/**
 * struct spi_transfer - a read/write buffer pair
 * @tx_buf: data to be written (DMA-safe memory), or NULL
 * @rx_buf: data to be read (DMA-safe memory), or NULL
 * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped
 * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped
 * @tx_nbits: number of bits used for writing. If 0 the default
 *      (SPI_NBITS_SINGLE) is used.
 * @rx_nbits: number of bits used for reading. If 0 the default
 *      (SPI_NBITS_SINGLE) is used.
 * @len: size of rx and tx buffers (in bytes)
 * @speed_hz: Select a speed other than the device default for this
 *      transfer. If 0 the default (from @spi_device) is used.
 * @bits_per_word: select a bits_per_word other than the device default
 *      for this transfer. If 0 the default (from @spi_device) is used.
 * @dummy_data: indicates transfer is dummy bytes transfer.
 * @cs_off: performs the transfer with chipselect off.
 * @cs_change: affects chipselect after this transfer completes
 * @cs_change_delay: delay between cs deassert and assert when
 *      @cs_change is set and @spi_transfer is not the last in @spi_message
 * @delay: delay to be introduced after this transfer before
 *        (optionally) changing the chipselect status, then starting
 *        the next transfer or completing this @spi_message.
 * @word_delay: inter word delay to be introduced after each word size
 *        (set by bits_per_word) transmission.
 * @effective_speed_hz: the effective SCK-speed that was used to
 *      transfer this transfer. Set to 0 if the SPI bus driver does
 *      not support it.
 * @transfer_list: transfers are sequenced through @spi_message.transfers
 * @tx_sg: Scatterlist for transmit, currently not for client use
 * @rx_sg: Scatterlist for receive, currently not for client use
 * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset
 *        within @tx_buf for which the SPI device is requesting that the time
 *        snapshot for this transfer begins. Upon completing the SPI transfer,
 *        this value may have changed compared to what was requested, depending
 *        on the available snapshotting resolution (DMA transfer,
 *        @ptp_sts_supported is false, etc).
 * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning
 *        that a single byte should be snapshotted).
 *        If the core takes care of the timestamp (if @ptp_sts_supported is false
 *        for this controller), it will set @ptp_sts_word_pre to 0, and
 *        @ptp_sts_word_post to the length of the transfer. This is done
 *        purposefully (instead of setting to spi_transfer->len - 1) to denote
 *        that a transfer-level snapshot taken from within the driver may still
 *        be of higher quality.
 * @ptp_sts: Pointer to a memory location held by the SPI slave device where a
 *        PTP system timestamp structure may lie. If drivers use PIO or their
 *        hardware has some sort of assist for retrieving exact transfer timing,
 *        they can (and should) assert @ptp_sts_supported and populate this
 *        structure using the ptp_read_system_*ts helper functions.
 *        The timestamp must represent the time at which the SPI slave device has
 *        processed the word, i.e. the "pre" timestamp should be taken before
 *        transmitting the "pre" word, and the "post" timestamp after receiving
 *        transmit confirmation from the controller for the "post" word.
 * @timestamped: true if the transfer has been timestamped
 * @error: Error status logged by SPI controller driver.
 *
 * SPI transfers always write the same number of bytes as they read.
 * Protocol drivers should always provide @rx_buf and/or @tx_buf.
 * In some cases, they may also want to provide DMA addresses for
 * the data being transferred; that may reduce overhead, when the
 * underlying driver uses DMA.
 *
 * If the transmit buffer is NULL, zeroes will be shifted out
 * while filling @rx_buf.  If the receive buffer is NULL, the data
 * shifted in will be discarded.  Only "len" bytes shift out (or in).
 * It's an error to try to shift out a partial word.  (For example, by
 * shifting out three bytes with word size of sixteen or twenty bits;
 * the former uses two bytes per word, the latter uses four bytes.)
 *
 * In-memory data values are always in native CPU byte order, translated
 * from the wire byte order (big-endian except with SPI_LSB_FIRST).  So
 * for example when bits_per_word is sixteen, buffers are 2N bytes long
 * (@len = 2N) and hold N sixteen bit words in CPU byte order.
 *
 * When the word size of the SPI transfer is not a power-of-two multiple
 * of eight bits, those in-memory words include extra bits.  In-memory
 * words are always seen by protocol drivers as right-justified, so the
 * undefined (rx) or unused (tx) bits are always the most significant bits.
 *
 * All SPI transfers start with the relevant chipselect active.  Normally
 * it stays selected until after the last transfer in a message.  Drivers
 * can affect the chipselect signal using cs_change.
 *
 * (i) If the transfer isn't the last one in the message, this flag is
 * used to make the chipselect briefly go inactive in the middle of the
 * message.  Toggling chipselect in this way may be needed to terminate
 * a chip command, letting a single spi_message perform all of group of
 * chip transactions together.
 *
 * (ii) When the transfer is the last one in the message, the chip may
 * stay selected until the next transfer.  On multi-device SPI busses
 * with nothing blocking messages going to other devices, this is just
 * a performance hint; starting a message to another device deselects
 * this one.  But in other cases, this can be used to ensure correctness.
 * Some devices need protocol transactions to be built from a series of
 * spi_message submissions, where the content of one message is determined
 * by the results of previous messages and where the whole transaction
 * ends when the chipselect goes inactive.
 *
 * When SPI can transfer in 1x,2x or 4x. It can get this transfer information
 * from device through @tx_nbits and @rx_nbits. In Bi-direction, these
 * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x)
 * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer.
 *
 * The code that submits an spi_message (and its spi_transfers)
 * to the lower layers is responsible for managing its memory.
 * Zero-initialize every field you don't set up explicitly, to
 * insulate against future API updates.  After you submit a message
 * and its transfers, ignore them until its completion callback.
 */
struct spi_transfer {
        /*
         * It's okay if tx_buf == rx_buf (right?).
         * For MicroWire, one buffer must be NULL.
         * Buffers must work with dma_*map_single() calls, unless
         * spi_message.is_dma_mapped reports a pre-existing mapping.
         */
        const void        *tx_buf;
        void                *rx_buf;
        unsigned        len;

#define SPI_TRANS_FAIL_NO_START        BIT(0)
#define SPI_TRANS_FAIL_IO        BIT(1)
        u16                error;

        dma_addr_t        tx_dma;
        dma_addr_t        rx_dma;
        struct sg_table tx_sg;
        struct sg_table rx_sg;

        unsigned        dummy_data:1;
        unsigned        cs_off:1;
        unsigned        cs_change:1;
        unsigned        tx_nbits:3;
        unsigned        rx_nbits:3;
        unsigned        timestamped:1;
#define        SPI_NBITS_SINGLE        0x01 /* 1-bit transfer */
#define        SPI_NBITS_DUAL                0x02 /* 2-bit transfer */
#define        SPI_NBITS_QUAD                0x04 /* 4-bit transfer */
        u8                bits_per_word;
        struct spi_delay        delay;
        struct spi_delay        cs_change_delay;
        struct spi_delay        word_delay;
        u32                speed_hz;

        u32                effective_speed_hz;

        unsigned int        ptp_sts_word_pre;
        unsigned int        ptp_sts_word_post;

        struct ptp_system_timestamp *ptp_sts;

        struct list_head transfer_list;
};

/**
 * struct spi_message - one multi-segment SPI transaction
 * @transfers: list of transfer segments in this transaction
 * @spi: SPI device to which the transaction is queued
 * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual
 *        addresses for each transfer buffer
 * @pre_optimized: peripheral driver pre-optimized the message
 * @optimized: the message is in the optimized state
 * @prepared: spi_prepare_message was called for the this message
 * @status: zero for success, else negative errno
 * @complete: called to report transaction completions
 * @context: the argument to complete() when it's called
 * @frame_length: the total number of bytes in the message
 * @actual_length: the total number of bytes that were transferred in all
 *        successful segments
 * @queue: for use by whichever driver currently owns the message
 * @state: for use by whichever driver currently owns the message
 * @opt_state: for use by whichever driver currently owns the message
 * @resources: for resource management when the SPI message is processed
 *
 * A @spi_message is used to execute an atomic sequence of data transfers,
 * each represented by a struct spi_transfer.  The sequence is "atomic"
 * in the sense that no other spi_message may use that SPI bus until that
 * sequence completes.  On some systems, many such sequences can execute as
 * a single programmed DMA transfer.  On all systems, these messages are
 * queued, and might complete after transactions to other devices.  Messages
 * sent to a given spi_device are always executed in FIFO order.
 *
 * The code that submits an spi_message (and its spi_transfers)
 * to the lower layers is responsible for managing its memory.
 * Zero-initialize every field you don't set up explicitly, to
 * insulate against future API updates.  After you submit a message
 * and its transfers, ignore them until its completion callback.
 */
struct spi_message {
        struct list_head        transfers;

        struct spi_device        *spi;

        unsigned                is_dma_mapped:1;

        /* spi_optimize_message() was called for this message */
        bool                        pre_optimized;
        /* __spi_optimize_message() was called for this message */
        bool                        optimized;

        /* spi_prepare_message() was called for this message */
        bool                        prepared;

        /*
         * REVISIT: we might want a flag affecting the behavior of the
         * last transfer ... allowing things like "read 16 bit length L"
         * immediately followed by "read L bytes".  Basically imposing
         * a specific message scheduling algorithm.
         *
         * Some controller drivers (message-at-a-time queue processing)
         * could provide that as their default scheduling algorithm.  But
         * others (with multi-message pipelines) could need a flag to
         * tell them about such special cases.
         */

        /* Completion is reported through a callback */
        int                        status;
        void                        (*complete)(void *context);
        void                        *context;
        unsigned                frame_length;
        unsigned                actual_length;

        /*
         * For optional use by whatever driver currently owns the
         * spi_message ...  between calls to spi_async and then later
         * complete(), that's the spi_controller controller driver.
         */
        struct list_head        queue;
        void                        *state;
        /*
         * Optional state for use by controller driver between calls to
         * __spi_optimize_message() and __spi_unoptimize_message().
         */
        void                        *opt_state;

        /* List of spi_res resources when the SPI message is processed */
        struct list_head        resources;
};

static inline void spi_message_init_no_memset(struct spi_message *m)
{
        INIT_LIST_HEAD(&m->transfers);
        INIT_LIST_HEAD(&m->resources);
}

static inline void spi_message_init(struct spi_message *m)
{
        memset(m, 0, sizeof *m);
        spi_message_init_no_memset(m);
}

static inline void
spi_message_add_tail(struct spi_transfer *t, struct spi_message *m)
{
        list_add_tail(&t->transfer_list, &m->transfers);
}

static inline void
spi_transfer_del(struct spi_transfer *t)
{
        list_del(&t->transfer_list);
}

static inline int
spi_transfer_delay_exec(struct spi_transfer *t)
{
        return spi_delay_exec(&t->delay, t);
}

/**
 * spi_message_init_with_transfers - Initialize spi_message and append transfers
 * @m: spi_message to be initialized
 * @xfers: An array of SPI transfers
 * @num_xfers: Number of items in the xfer array
 *
 * This function initializes the given spi_message and adds each spi_transfer in
 * the given array to the message.
 */
static inline void
spi_message_init_with_transfers(struct spi_message *m,
struct spi_transfer *xfers, unsigned int num_xfers)
{
        unsigned int i;

        spi_message_init(m);
        for (i = 0; i < num_xfers; ++i)
                spi_message_add_tail(&xfers[i], m);
}

/*
 * It's fine to embed message and transaction structures in other data
 * structures so long as you don't free them while they're in use.
 */
static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags)
{
        struct spi_message_with_transfers {
                struct spi_message m;
                struct spi_transfer t[];
        } *mwt;
        unsigned i;

        mwt = kzalloc(struct_size(mwt, t, ntrans), flags);
        if (!mwt)
                return NULL;

        spi_message_init_no_memset(&mwt->m);
        for (i = 0; i < ntrans; i++)
                spi_message_add_tail(&mwt->t[i], &mwt->m);

        return &mwt->m;
}

static inline void spi_message_free(struct spi_message *m)
{
        kfree(m);
}

extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg);
extern void spi_unoptimize_message(struct spi_message *msg);

extern int spi_setup(struct spi_device *spi);
extern int spi_async(struct spi_device *spi, struct spi_message *message);
extern int spi_slave_abort(struct spi_device *spi);
extern int spi_target_abort(struct spi_device *spi);

static inline size_t
spi_max_message_size(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;

        if (!ctlr->max_message_size)
                return SIZE_MAX;
        return ctlr->max_message_size(spi);
}

static inline size_t
spi_max_transfer_size(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;
        size_t tr_max = SIZE_MAX;
        size_t msg_max = spi_max_message_size(spi);

        if (ctlr->max_transfer_size)
                tr_max = ctlr->max_transfer_size(spi);

        /* Transfer size limit must not be greater than message size limit */
        return min(tr_max, msg_max);
}

/**
 * spi_is_bpw_supported - Check if bits per word is supported
 * @spi: SPI device
 * @bpw: Bits per word
 *
 * This function checks to see if the SPI controller supports @bpw.
 *
 * Returns:
 * True if @bpw is supported, false otherwise.
 */
static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw)
{
        u32 bpw_mask = spi->controller->bits_per_word_mask;

        if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw)))
                return true;

        return false;
}

/**
 * spi_controller_xfer_timeout - Compute a suitable timeout value
 * @ctlr: SPI device
 * @xfer: Transfer descriptor
 *
 * Compute a relevant timeout value for the given transfer. We derive the time
 * that it would take on a single data line and take twice this amount of time
 * with a minimum of 500ms to avoid false positives on loaded systems.
 *
 * Returns: Transfer timeout value in milliseconds.
 */
static inline unsigned int spi_controller_xfer_timeout(struct spi_controller *ctlr,
                                                       struct spi_transfer *xfer)
{
        return max(xfer->len * 8 * 2 / (xfer->speed_hz / 1000), 500U);
}

/*---------------------------------------------------------------------------*/

/* SPI transfer replacement methods which make use of spi_res */

struct spi_replaced_transfers;
typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
                                       struct spi_message *msg,
                                       struct spi_replaced_transfers *res);
/**
 * struct spi_replaced_transfers - structure describing the spi_transfer
 *                                 replacements that have occurred
 *                                 so that they can get reverted
 * @release:            some extra release code to get executed prior to
 *                      releasing this structure
 * @extradata:          pointer to some extra data if requested or NULL
 * @replaced_transfers: transfers that have been replaced and which need
 *                      to get restored
 * @replaced_after:     the transfer after which the @replaced_transfers
 *                      are to get re-inserted
 * @inserted:           number of transfers inserted
 * @inserted_transfers: array of spi_transfers of array-size @inserted,
 *                      that have been replacing replaced_transfers
 *
 * Note: that @extradata will point to @inserted_transfers[@inserted]
 * if some extra allocation is requested, so alignment will be the same
 * as for spi_transfers.
 */
struct spi_replaced_transfers {
        spi_replaced_release_t release;
        void *extradata;
        struct list_head replaced_transfers;
        struct list_head *replaced_after;
        size_t inserted;
        struct spi_transfer inserted_transfers[];
};

/*---------------------------------------------------------------------------*/

/* SPI transfer transformation methods */

extern int spi_split_transfers_maxsize(struct spi_controller *ctlr,
                                       struct spi_message *msg,
                                       size_t maxsize);
extern int spi_split_transfers_maxwords(struct spi_controller *ctlr,
                                        struct spi_message *msg,
                                        size_t maxwords);

/*---------------------------------------------------------------------------*/

/*
 * All these synchronous SPI transfer routines are utilities layered
 * over the core async transfer primitive.  Here, "synchronous" means
 * they will sleep uninterruptibly until the async transfer completes.
 */

extern int spi_sync(struct spi_device *spi, struct spi_message *message);
extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message);
extern int spi_bus_lock(struct spi_controller *ctlr);
extern int spi_bus_unlock(struct spi_controller *ctlr);

/**
 * spi_sync_transfer - synchronous SPI data transfer
 * @spi: device with which data will be exchanged
 * @xfers: An array of spi_transfers
 * @num_xfers: Number of items in the xfer array
 * Context: can sleep
 *
 * Does a synchronous SPI data transfer of the given spi_transfer array.
 *
 * For more specific semantics see spi_sync().
 *
 * Return: zero on success, else a negative error code.
 */
static inline int
spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers,
        unsigned int num_xfers)
{
        struct spi_message msg;

        spi_message_init_with_transfers(&msg, xfers, num_xfers);

        return spi_sync(spi, &msg);
}

/**
 * spi_write - SPI synchronous write
 * @spi: device to which data will be written
 * @buf: data buffer
 * @len: data buffer size
 * Context: can sleep
 *
 * This function writes the buffer @buf.
 * Callable only from contexts that can sleep.
 *
 * Return: zero on success, else a negative error code.
 */
static inline int
spi_write(struct spi_device *spi, const void *buf, size_t len)
{
        struct spi_transfer        t = {
                        .tx_buf                = buf,
                        .len                = len,
                };

        return spi_sync_transfer(spi, &t, 1);
}

/**
 * spi_read - SPI synchronous read
 * @spi: device from which data will be read
 * @buf: data buffer
 * @len: data buffer size
 * Context: can sleep
 *
 * This function reads the buffer @buf.
 * Callable only from contexts that can sleep.
 *
 * Return: zero on success, else a negative error code.
 */
static inline int
spi_read(struct spi_device *spi, void *buf, size_t len)
{
        struct spi_transfer        t = {
                        .rx_buf                = buf,
                        .len                = len,
                };

        return spi_sync_transfer(spi, &t, 1);
}

/* This copies txbuf and rxbuf data; for small transfers only! */
extern int spi_write_then_read(struct spi_device *spi,
                const void *txbuf, unsigned n_tx,
                void *rxbuf, unsigned n_rx);

/**
 * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read
 * @spi: device with which data will be exchanged
 * @cmd: command to be written before data is read back
 * Context: can sleep
 *
 * Callable only from contexts that can sleep.
 *
 * Return: the (unsigned) eight bit number returned by the
 * device, or else a negative error code.
 */
static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd)
{
        ssize_t                        status;
        u8                        result;

        status = spi_write_then_read(spi, &cmd, 1, &result, 1);

        /* Return negative errno or unsigned value */
        return (status < 0) ? status : result;
}

/**
 * spi_w8r16 - SPI synchronous 8 bit write followed by 16 bit read
 * @spi: device with which data will be exchanged
 * @cmd: command to be written before data is read back
 * Context: can sleep
 *
 * The number is returned in wire-order, which is at least sometimes
 * big-endian.
 *
 * Callable only from contexts that can sleep.
 *
 * Return: the (unsigned) sixteen bit number returned by the
 * device, or else a negative error code.
 */
static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
{
        ssize_t                        status;
        u16                        result;

        status = spi_write_then_read(spi, &cmd, 1, &result, 2);

        /* Return negative errno or unsigned value */
        return (status < 0) ? status : result;
}

/**
 * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read
 * @spi: device with which data will be exchanged
 * @cmd: command to be written before data is read back
 * Context: can sleep
 *
 * This function is similar to spi_w8r16, with the exception that it will
 * convert the read 16 bit data word from big-endian to native endianness.
 *
 * Callable only from contexts that can sleep.
 *
 * Return: the (unsigned) sixteen bit number returned by the device in CPU
 * endianness, or else a negative error code.
 */
static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)

{
        ssize_t status;
        __be16 result;

        status = spi_write_then_read(spi, &cmd, 1, &result, 2);
        if (status < 0)
                return status;

        return be16_to_cpu(result);
}

/*---------------------------------------------------------------------------*/

/*
 * INTERFACE between board init code and SPI infrastructure.
 *
 * No SPI driver ever sees these SPI device table segments, but
 * it's how the SPI core (or adapters that get hotplugged) grows
 * the driver model tree.
 *
 * As a rule, SPI devices can't be probed.  Instead, board init code
 * provides a table listing the devices which are present, with enough
 * information to bind and set up the device's driver.  There's basic
 * support for non-static configurations too; enough to handle adding
 * parport adapters, or microcontrollers acting as USB-to-SPI bridges.
 */

/**
 * struct spi_board_info - board-specific template for a SPI device
 * @modalias: Initializes spi_device.modalias; identifies the driver.
 * @platform_data: Initializes spi_device.platform_data; the particular
 *        data stored there is driver-specific.
 * @swnode: Software node for the device.
 * @controller_data: Initializes spi_device.controller_data; some
 *        controllers need hints about hardware setup, e.g. for DMA.
 * @irq: Initializes spi_device.irq; depends on how the board is wired.
 * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits
 *        from the chip datasheet and board-specific signal quality issues.
 * @bus_num: Identifies which spi_controller parents the spi_device; unused
 *        by spi_new_device(), and otherwise depends on board wiring.
 * @chip_select: Initializes spi_device.chip_select; depends on how
 *        the board is wired.
 * @mode: Initializes spi_device.mode; based on the chip datasheet, board
 *        wiring (some devices support both 3WIRE and standard modes), and
 *        possibly presence of an inverter in the chipselect path.
 *
 * When adding new SPI devices to the device tree, these structures serve
 * as a partial device template.  They hold information which can't always
 * be determined by drivers.  Information that probe() can establish (such
 * as the default transfer wordsize) is not included here.
 *
 * These structures are used in two places.  Their primary role is to
 * be stored in tables of board-specific device descriptors, which are
 * declared early in board initialization and then used (much later) to
 * populate a controller's device tree after the that controller's driver
 * initializes.  A secondary (and atypical) role is as a parameter to
 * spi_new_device() call, which happens after those controller drivers
 * are active in some dynamic board configuration models.
 */
struct spi_board_info {
        /*
         * The device name and module name are coupled, like platform_bus;
         * "modalias" is normally the driver name.
         *
         * platform_data goes to spi_device.dev.platform_data,
         * controller_data goes to spi_device.controller_data,
         * IRQ is copied too.
         */
        char                modalias[SPI_NAME_SIZE];
        const void        *platform_data;
        const struct software_node *swnode;
        void                *controller_data;
        int                irq;

        /* Slower signaling on noisy or low voltage boards */
        u32                max_speed_hz;


        /*
         * bus_num is board specific and matches the bus_num of some
         * spi_controller that will probably be registered later.
         *
         * chip_select reflects how this chip is wired to that master;
         * it's less than num_chipselect.
         */
        u16                bus_num;
        u16                chip_select;

        /*
         * mode becomes spi_device.mode, and is essential for chips
         * where the default of SPI_CS_HIGH = 0 is wrong.
         */
        u32                mode;

        /*
         * ... may need additional spi_device chip config data here.
         * avoid stuff protocol drivers can set; but include stuff
         * needed to behave without being bound to a driver:
         *  - quirks like clock rate mattering when not selected
         */
};

#ifdef        CONFIG_SPI
extern int
spi_register_board_info(struct spi_board_info const *info, unsigned n);
#else
/* Board init code may ignore whether SPI is configured or not */
static inline int
spi_register_board_info(struct spi_board_info const *info, unsigned n)
        { return 0; }
#endif

/*
 * If you're hotplugging an adapter with devices (parport, USB, etc)
 * use spi_new_device() to describe each device.  You can also call
 * spi_unregister_device() to start making that device vanish, but
 * normally that would be handled by spi_unregister_controller().
 *
 * You can also use spi_alloc_device() and spi_add_device() to use a two
 * stage registration sequence for each spi_device. This gives the caller
 * some more control over the spi_device structure before it is registered,
 * but requires that caller to initialize fields that would otherwise
 * be defined using the board info.
 */
extern struct spi_device *
spi_alloc_device(struct spi_controller *ctlr);

extern int
spi_add_device(struct spi_device *spi);

extern struct spi_device *
spi_new_device(struct spi_controller *, struct spi_board_info *);

extern void spi_unregister_device(struct spi_device *spi);

extern const struct spi_device_id *
spi_get_device_id(const struct spi_device *sdev);

extern const void *
spi_get_device_match_data(const struct spi_device *sdev);

static inline bool
spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
{
        return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers);
}

#endif /* __LINUX_SPI_H */

















































































































































































































































































































    2 


    2 




















































































































































































































































































































































































































































































































































































































































































































































































    2 



    2 



    2 
    2 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
// SPDX-License-Identifier: GPL-2.0-only
/*
 * atusb.c - Driver for the ATUSB IEEE 802.15.4 dongle
 *
 * Written 2013 by Werner Almesberger <werner@almesberger.net>
 *
 * Copyright (c) 2015 - 2016 Stefan Schmidt <stefan@datenfreihafen.org>
 *
 * Based on at86rf230.c and spi_atusb.c.
 * at86rf230.c is
 * Copyright (C) 2009 Siemens AG
 * Written by: Dmitry Eremin-Solenikov <dmitry.baryshkov@siemens.com>
 *
 * spi_atusb.c is
 * Copyright (c) 2011 Richard Sharpe <realrichardsharpe@gmail.com>
 * Copyright (c) 2011 Stefan Schmidt <stefan@datenfreihafen.org>
 * Copyright (c) 2011 Werner Almesberger <werner@almesberger.net>
 *
 * USB initialization is
 * Copyright (c) 2013 Alexander Aring <alex.aring@gmail.com>
 *
 * Busware HUL support is
 * Copyright (c) 2017 Josef Filzmaier <j.filzmaier@gmx.at>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/usb.h>
#include <linux/skbuff.h>

#include <net/cfg802154.h>
#include <net/mac802154.h>

#include "at86rf230.h"
#include "atusb.h"

#define ATUSB_JEDEC_ATMEL        0x1f        /* JEDEC manufacturer ID */

#define ATUSB_NUM_RX_URBS        4        /* allow for a bit of local latency */
#define ATUSB_ALLOC_DELAY_MS        100        /* delay after failed allocation */
#define ATUSB_TX_TIMEOUT_MS        200        /* on the air timeout */

struct atusb {
        struct ieee802154_hw *hw;
        struct usb_device *usb_dev;
        struct atusb_chip_data *data;
        int shutdown;                        /* non-zero if shutting down */
        int err;                        /* set by first error */

        /* RX variables */
        struct delayed_work work;        /* memory allocations */
        struct usb_anchor idle_urbs;        /* URBs waiting to be submitted */
        struct usb_anchor rx_urbs;        /* URBs waiting for reception */

        /* TX variables */
        struct usb_ctrlrequest tx_dr;
        struct urb *tx_urb;
        struct sk_buff *tx_skb;
        u8 tx_ack_seq;                /* current TX ACK sequence number */

        /* Firmware variable */
        unsigned char fw_ver_maj;        /* Firmware major version number */
        unsigned char fw_ver_min;        /* Firmware minor version number */
        unsigned char fw_hw_type;        /* Firmware hardware type */
};

struct atusb_chip_data {
        u16 t_channel_switch;
        int rssi_base_val;

        int (*set_channel)(struct ieee802154_hw*, u8, u8);
        int (*set_txpower)(struct ieee802154_hw*, s32);
};

static int atusb_write_subreg(struct atusb *atusb, u8 reg, u8 mask,
                              u8 shift, u8 value)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        u8 orig, tmp;
        int ret = 0;

        dev_dbg(&usb_dev->dev, "%s: 0x%02x <- 0x%02x\n", __func__, reg, value);

        ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, reg, &orig, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        /* Write the value only into that part of the register which is allowed
         * by the mask. All other bits stay as before.
         */
        tmp = orig & ~mask;
        tmp |= (value << shift) & mask;

        if (tmp != orig)
                ret = usb_control_msg_send(usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                           tmp, reg, NULL, 0, 1000, GFP_KERNEL);

        return ret;
}

static int atusb_read_subreg(struct atusb *lp,
                             unsigned int addr, unsigned int mask,
                             unsigned int shift)
{
        int reg, ret;

        ret = usb_control_msg_recv(lp->usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, addr, &reg, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        reg = (reg & mask) >> shift;

        return reg;
}

static int atusb_get_and_clear_error(struct atusb *atusb)
{
        int err = atusb->err;

        atusb->err = 0;
        return err;
}

/* ----- skb allocation ---------------------------------------------------- */

#define MAX_PSDU        127
#define MAX_RX_XFER        (1 + MAX_PSDU + 2 + 1)        /* PHR+PSDU+CRC+LQI */

#define SKB_ATUSB(skb)        (*(struct atusb **)(skb)->cb)

static void atusb_in(struct urb *urb);

static int atusb_submit_rx_urb(struct atusb *atusb, struct urb *urb)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        struct sk_buff *skb = urb->context;
        int ret;

        if (!skb) {
                skb = alloc_skb(MAX_RX_XFER, GFP_KERNEL);
                if (!skb) {
                        dev_warn_ratelimited(&usb_dev->dev,
                                             "atusb_in: can't allocate skb\n");
                        return -ENOMEM;
                }
                skb_put(skb, MAX_RX_XFER);
                SKB_ATUSB(skb) = atusb;
        }

        usb_fill_bulk_urb(urb, usb_dev, usb_rcvbulkpipe(usb_dev, 1),
                          skb->data, MAX_RX_XFER, atusb_in, skb);
        usb_anchor_urb(urb, &atusb->rx_urbs);

        ret = usb_submit_urb(urb, GFP_KERNEL);
        if (ret) {
                usb_unanchor_urb(urb);
                kfree_skb(skb);
                urb->context = NULL;
        }
        return ret;
}

static void atusb_work_urbs(struct work_struct *work)
{
        struct atusb *atusb =
            container_of(to_delayed_work(work), struct atusb, work);
        struct usb_device *usb_dev = atusb->usb_dev;
        struct urb *urb;
        int ret;

        if (atusb->shutdown)
                return;

        do {
                urb = usb_get_from_anchor(&atusb->idle_urbs);
                if (!urb)
                        return;
                ret = atusb_submit_rx_urb(atusb, urb);
        } while (!ret);

        usb_anchor_urb(urb, &atusb->idle_urbs);
        dev_warn_ratelimited(&usb_dev->dev,
                             "atusb_in: can't allocate/submit URB (%d)\n", ret);
        schedule_delayed_work(&atusb->work,
                              msecs_to_jiffies(ATUSB_ALLOC_DELAY_MS) + 1);
}

/* ----- Asynchronous USB -------------------------------------------------- */

static void atusb_tx_done(struct atusb *atusb, u8 seq, int reason)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        u8 expect = atusb->tx_ack_seq;

        dev_dbg(&usb_dev->dev, "%s (0x%02x/0x%02x)\n", __func__, seq, expect);
        if (seq == expect) {
                /* TODO check for ifs handling in firmware */
                if (reason == IEEE802154_SUCCESS)
                        ieee802154_xmit_complete(atusb->hw, atusb->tx_skb, false);
                else
                        ieee802154_xmit_error(atusb->hw, atusb->tx_skb, reason);
        } else {
                /* TODO I experience this case when atusb has a tx complete
                 * irq before probing, we should fix the firmware it's an
                 * unlikely case now that seq == expect is then true, but can
                 * happen and fail with a tx_skb = NULL;
                 */
                ieee802154_xmit_hw_error(atusb->hw, atusb->tx_skb);
        }
}

static void atusb_in_good(struct urb *urb)
{
        struct usb_device *usb_dev = urb->dev;
        struct sk_buff *skb = urb->context;
        struct atusb *atusb = SKB_ATUSB(skb);
        int result = IEEE802154_SUCCESS;
        u8 len, lqi, trac;

        if (!urb->actual_length) {
                dev_dbg(&usb_dev->dev, "atusb_in: zero-sized URB ?\n");
                return;
        }

        len = *skb->data;

        switch (urb->actual_length) {
        case 2:
                trac = TRAC_MASK(*(skb->data + 1));
                switch (trac) {
                case TRAC_SUCCESS:
                case TRAC_SUCCESS_DATA_PENDING:
                        /* already IEEE802154_SUCCESS */
                        break;
                case TRAC_CHANNEL_ACCESS_FAILURE:
                        result = IEEE802154_CHANNEL_ACCESS_FAILURE;
                        break;
                case TRAC_NO_ACK:
                        result = IEEE802154_NO_ACK;
                        break;
                default:
                        result = IEEE802154_SYSTEM_ERROR;
                }

                fallthrough;
        case 1:
                atusb_tx_done(atusb, len, result);
                return;
        }

        if (len + 1 > urb->actual_length - 1) {
                dev_dbg(&usb_dev->dev, "atusb_in: frame len %d+1 > URB %u-1\n",
                        len, urb->actual_length);
                return;
        }

        if (!ieee802154_is_valid_psdu_len(len)) {
                dev_dbg(&usb_dev->dev, "atusb_in: frame corrupted\n");
                return;
        }

        lqi = skb->data[len + 1];
        dev_dbg(&usb_dev->dev, "atusb_in: rx len %d lqi 0x%02x\n", len, lqi);
        skb_pull(skb, 1);        /* remove PHR */
        skb_trim(skb, len);        /* get payload only */
        ieee802154_rx_irqsafe(atusb->hw, skb, lqi);
        urb->context = NULL;        /* skb is gone */
}

static void atusb_in(struct urb *urb)
{
        struct usb_device *usb_dev = urb->dev;
        struct sk_buff *skb = urb->context;
        struct atusb *atusb = SKB_ATUSB(skb);

        dev_dbg(&usb_dev->dev, "%s: status %d len %d\n", __func__,
                urb->status, urb->actual_length);
        if (urb->status) {
                if (urb->status == -ENOENT) { /* being killed */
                        kfree_skb(skb);
                        urb->context = NULL;
                        return;
                }
                dev_dbg(&usb_dev->dev, "%s: URB error %d\n", __func__, urb->status);
        } else {
                atusb_in_good(urb);
        }

        usb_anchor_urb(urb, &atusb->idle_urbs);
        if (!atusb->shutdown)
                schedule_delayed_work(&atusb->work, 0);
}

/* ----- URB allocation/deallocation --------------------------------------- */

static void atusb_free_urbs(struct atusb *atusb)
{
        struct urb *urb;

        while (1) {
                urb = usb_get_from_anchor(&atusb->idle_urbs);
                if (!urb)
                        break;
                kfree_skb(urb->context);
                usb_free_urb(urb);
        }
}

static int atusb_alloc_urbs(struct atusb *atusb, int n)
{
        struct urb *urb;

        while (n) {
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
                        atusb_free_urbs(atusb);
                        return -ENOMEM;
                }
                usb_anchor_urb(urb, &atusb->idle_urbs);
                usb_free_urb(urb);
                n--;
        }
        return 0;
}

/* ----- IEEE 802.15.4 interface operations -------------------------------- */

static void atusb_xmit_complete(struct urb *urb)
{
        dev_dbg(&urb->dev->dev, "atusb_xmit urb completed");
}

static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
{
        struct atusb *atusb = hw->priv;
        struct usb_device *usb_dev = atusb->usb_dev;
        int ret;

        dev_dbg(&usb_dev->dev, "%s (%d)\n", __func__, skb->len);
        atusb->tx_skb = skb;
        atusb->tx_ack_seq++;
        atusb->tx_dr.wIndex = cpu_to_le16(atusb->tx_ack_seq);
        atusb->tx_dr.wLength = cpu_to_le16(skb->len);

        usb_fill_control_urb(atusb->tx_urb, usb_dev,
                             usb_sndctrlpipe(usb_dev, 0),
                             (unsigned char *)&atusb->tx_dr, skb->data,
                             skb->len, atusb_xmit_complete, NULL);
        ret = usb_submit_urb(atusb->tx_urb, GFP_ATOMIC);
        dev_dbg(&usb_dev->dev, "%s done (%d)\n", __func__, ret);
        return ret;
}

static int atusb_ed(struct ieee802154_hw *hw, u8 *level)
{
        WARN_ON(!level);
        *level = 0xbe;
        return 0;
}

static int atusb_set_hw_addr_filt(struct ieee802154_hw *hw,
                                  struct ieee802154_hw_addr_filt *filt,
                                  unsigned long changed)
{
        struct atusb *atusb = hw->priv;
        struct device *dev = &atusb->usb_dev->dev;

        if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
                u16 addr = le16_to_cpu(filt->short_addr);

                dev_vdbg(dev, "%s called for saddr\n", __func__);
                usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                     addr, RG_SHORT_ADDR_0, NULL, 0, 1000, GFP_KERNEL);

                usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                     addr >> 8, RG_SHORT_ADDR_1, NULL, 0, 1000, GFP_KERNEL);
        }

        if (changed & IEEE802154_AFILT_PANID_CHANGED) {
                u16 pan = le16_to_cpu(filt->pan_id);

                dev_vdbg(dev, "%s called for pan id\n", __func__);
                usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                     pan, RG_PAN_ID_0, NULL, 0, 1000, GFP_KERNEL);

                usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                     pan >> 8, RG_PAN_ID_1, NULL, 0, 1000, GFP_KERNEL);
        }

        if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
                u8 i, addr[IEEE802154_EXTENDED_ADDR_LEN];

                memcpy(addr, &filt->ieee_addr, IEEE802154_EXTENDED_ADDR_LEN);
                dev_vdbg(dev, "%s called for IEEE addr\n", __func__);
                for (i = 0; i < 8; i++)
                        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                                             addr[i], RG_IEEE_ADDR_0 + i, NULL, 0,
                                             1000, GFP_KERNEL);
        }

        if (changed & IEEE802154_AFILT_PANC_CHANGED) {
                dev_vdbg(dev, "%s called for panc change\n", __func__);
                if (filt->pan_coord)
                        atusb_write_subreg(atusb, SR_AACK_I_AM_COORD, 1);
                else
                        atusb_write_subreg(atusb, SR_AACK_I_AM_COORD, 0);
        }

        return atusb_get_and_clear_error(atusb);
}

static int atusb_start(struct ieee802154_hw *hw)
{
        struct atusb *atusb = hw->priv;
        struct usb_device *usb_dev = atusb->usb_dev;
        int ret;

        dev_dbg(&usb_dev->dev, "%s\n", __func__);
        schedule_delayed_work(&atusb->work, 0);
        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RX_MODE, ATUSB_REQ_TO_DEV, 1, 0,
                             NULL, 0, 1000, GFP_KERNEL);
        ret = atusb_get_and_clear_error(atusb);
        if (ret < 0)
                usb_kill_anchored_urbs(&atusb->idle_urbs);
        return ret;
}

static void atusb_stop(struct ieee802154_hw *hw)
{
        struct atusb *atusb = hw->priv;
        struct usb_device *usb_dev = atusb->usb_dev;

        dev_dbg(&usb_dev->dev, "%s\n", __func__);
        usb_kill_anchored_urbs(&atusb->idle_urbs);
        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RX_MODE, ATUSB_REQ_TO_DEV, 0, 0,
                             NULL, 0, 1000, GFP_KERNEL);
        atusb_get_and_clear_error(atusb);
}

#define ATUSB_MAX_TX_POWERS 0xF
static const s32 atusb_powers[ATUSB_MAX_TX_POWERS + 1] = {
        300, 280, 230, 180, 130, 70, 0, -100, -200, -300, -400, -500, -700,
        -900, -1200, -1700,
};

static int
atusb_txpower(struct ieee802154_hw *hw, s32 mbm)
{
        struct atusb *atusb = hw->priv;

        if (atusb->data)
                return atusb->data->set_txpower(hw, mbm);
        else
                return -ENOTSUPP;
}

static int
atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm)
{
        struct atusb *atusb = hw->priv;
        u32 i;

        for (i = 0; i < hw->phy->supported.tx_powers_size; i++) {
                if (hw->phy->supported.tx_powers[i] == mbm)
                        return atusb_write_subreg(atusb, SR_TX_PWR_23X, i);
        }

        return -EINVAL;
}

static int
hulusb_set_txpower(struct ieee802154_hw *hw, s32 mbm)
{
        u32 i;

        for (i = 0; i < hw->phy->supported.tx_powers_size; i++) {
                if (hw->phy->supported.tx_powers[i] == mbm)
                        return atusb_write_subreg(hw->priv, SR_TX_PWR_212, i);
        }

        return -EINVAL;
}

#define ATUSB_MAX_ED_LEVELS 0xF
static const s32 atusb_ed_levels[ATUSB_MAX_ED_LEVELS + 1] = {
        -9100, -8900, -8700, -8500, -8300, -8100, -7900, -7700, -7500, -7300,
        -7100, -6900, -6700, -6500, -6300, -6100,
};

#define AT86RF212_MAX_TX_POWERS 0x1F
static const s32 at86rf212_powers[AT86RF212_MAX_TX_POWERS + 1] = {
        500, 400, 300, 200, 100, 0, -100, -200, -300, -400, -500, -600, -700,
        -800, -900, -1000, -1100, -1200, -1300, -1400, -1500, -1600, -1700,
        -1800, -1900, -2000, -2100, -2200, -2300, -2400, -2500, -2600,
};

#define AT86RF2XX_MAX_ED_LEVELS 0xF
static const s32 at86rf212_ed_levels_100[AT86RF2XX_MAX_ED_LEVELS + 1] = {
        -10000, -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200,
        -8000, -7800, -7600, -7400, -7200, -7000,
};

static const s32 at86rf212_ed_levels_98[AT86RF2XX_MAX_ED_LEVELS + 1] = {
        -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, -8000,
        -7800, -7600, -7400, -7200, -7000, -6800,
};

static int
atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca)
{
        struct atusb *atusb = hw->priv;
        u8 val;

        /* mapping 802.15.4 to driver spec */
        switch (cca->mode) {
        case NL802154_CCA_ENERGY:
                val = 1;
                break;
        case NL802154_CCA_CARRIER:
                val = 2;
                break;
        case NL802154_CCA_ENERGY_CARRIER:
                switch (cca->opt) {
                case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
                        val = 3;
                        break;
                case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
                        val = 0;
                        break;
                default:
                        return -EINVAL;
                }
                break;
        default:
                return -EINVAL;
        }

        return atusb_write_subreg(atusb, SR_CCA_MODE, val);
}

static int hulusb_set_cca_ed_level(struct atusb *lp, int rssi_base_val)
{
        int cca_ed_thres;

        cca_ed_thres = atusb_read_subreg(lp, SR_CCA_ED_THRES);
        if (cca_ed_thres < 0)
                return cca_ed_thres;

        switch (rssi_base_val) {
        case -98:
                lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_98;
                lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_98);
                lp->hw->phy->cca_ed_level = at86rf212_ed_levels_98[cca_ed_thres];
                break;
        case -100:
                lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100;
                lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100);
                lp->hw->phy->cca_ed_level = at86rf212_ed_levels_100[cca_ed_thres];
                break;
        default:
                WARN_ON(1);
        }

        return 0;
}

static int
atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
{
        struct atusb *atusb = hw->priv;
        u32 i;

        for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
                if (hw->phy->supported.cca_ed_levels[i] == mbm)
                        return atusb_write_subreg(atusb, SR_CCA_ED_THRES, i);
        }

        return -EINVAL;
}

static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
{
        struct atusb *atusb = hw->priv;
        int ret = -ENOTSUPP;

        if (atusb->data) {
                ret = atusb->data->set_channel(hw, page, channel);
                /* @@@ ugly synchronization */
                msleep(atusb->data->t_channel_switch);
        }

        return ret;
}

static int atusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
{
        struct atusb *atusb = hw->priv;
        int ret;

        ret = atusb_write_subreg(atusb, SR_CHANNEL, channel);
        if (ret < 0)
                return ret;
        return 0;
}

static int hulusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
{
        int rc;
        int rssi_base_val;

        struct atusb *lp = hw->priv;

        if (channel == 0)
                rc = atusb_write_subreg(lp, SR_SUB_MODE, 0);
        else
                rc = atusb_write_subreg(lp, SR_SUB_MODE, 1);
        if (rc < 0)
                return rc;

        if (page == 0) {
                rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 0);
                rssi_base_val = -100;
        } else {
                rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 1);
                rssi_base_val = -98;
        }
        if (rc < 0)
                return rc;

        rc = hulusb_set_cca_ed_level(lp, rssi_base_val);
        if (rc < 0)
                return rc;

        return atusb_write_subreg(lp, SR_CHANNEL, channel);
}

static int
atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries)
{
        struct atusb *atusb = hw->priv;
        int ret;

        ret = atusb_write_subreg(atusb, SR_MIN_BE, min_be);
        if (ret)
                return ret;

        ret = atusb_write_subreg(atusb, SR_MAX_BE, max_be);
        if (ret)
                return ret;

        return atusb_write_subreg(atusb, SR_MAX_CSMA_RETRIES, retries);
}

static int
hulusb_set_lbt(struct ieee802154_hw *hw, bool on)
{
        struct atusb *atusb = hw->priv;

        return atusb_write_subreg(atusb, SR_CSMA_LBT_MODE, on);
}

static int
atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries)
{
        struct atusb *atusb = hw->priv;

        return atusb_write_subreg(atusb, SR_MAX_FRAME_RETRIES, retries);
}

static int
atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
{
        struct atusb *atusb = hw->priv;
        int ret;

        if (on) {
                ret = atusb_write_subreg(atusb, SR_AACK_DIS_ACK, 1);
                if (ret < 0)
                        return ret;

                ret = atusb_write_subreg(atusb, SR_AACK_PROM_MODE, 1);
                if (ret < 0)
                        return ret;
        } else {
                ret = atusb_write_subreg(atusb, SR_AACK_PROM_MODE, 0);
                if (ret < 0)
                        return ret;

                ret = atusb_write_subreg(atusb, SR_AACK_DIS_ACK, 0);
                if (ret < 0)
                        return ret;
        }

        return 0;
}

static struct atusb_chip_data atusb_chip_data = {
        .t_channel_switch = 1,
        .rssi_base_val = -91,
        .set_txpower = atusb_set_txpower,
        .set_channel = atusb_set_channel,
};

static struct atusb_chip_data hulusb_chip_data = {
        .t_channel_switch = 11,
        .rssi_base_val = -100,
        .set_txpower = hulusb_set_txpower,
        .set_channel = hulusb_set_channel,
};

static const struct ieee802154_ops atusb_ops = {
        .owner                        = THIS_MODULE,
        .xmit_async                = atusb_xmit,
        .ed                        = atusb_ed,
        .set_channel                = atusb_channel,
        .start                        = atusb_start,
        .stop                        = atusb_stop,
        .set_hw_addr_filt        = atusb_set_hw_addr_filt,
        .set_txpower                = atusb_txpower,
        .set_lbt                = hulusb_set_lbt,
        .set_cca_mode                = atusb_set_cca_mode,
        .set_cca_ed_level        = atusb_set_cca_ed_level,
        .set_csma_params        = atusb_set_csma_params,
        .set_frame_retries        = atusb_set_frame_retries,
        .set_promiscuous_mode        = atusb_set_promiscuous_mode,
};

/* ----- Firmware and chip version information ----------------------------- */

static int atusb_get_and_show_revision(struct atusb *atusb)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        char *hw_name;
        unsigned char buffer[3];
        int ret;

        /* Get a couple of the ATMega Firmware values */
        ret = usb_control_msg_recv(atusb->usb_dev, 0, ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0,
                                   buffer, 3, 1000, GFP_KERNEL);
        if (!ret) {
                atusb->fw_ver_maj = buffer[0];
                atusb->fw_ver_min = buffer[1];
                atusb->fw_hw_type = buffer[2];

                switch (atusb->fw_hw_type) {
                case ATUSB_HW_TYPE_100813:
                case ATUSB_HW_TYPE_101216:
                case ATUSB_HW_TYPE_110131:
                        hw_name = "ATUSB";
                        atusb->data = &atusb_chip_data;
                        break;
                case ATUSB_HW_TYPE_RZUSB:
                        hw_name = "RZUSB";
                        atusb->data = &atusb_chip_data;
                        break;
                case ATUSB_HW_TYPE_HULUSB:
                        hw_name = "HULUSB";
                        atusb->data = &hulusb_chip_data;
                        break;
                default:
                        hw_name = "UNKNOWN";
                        atusb->err = -ENOTSUPP;
                        ret = -ENOTSUPP;
                        break;
                }

                dev_info(&usb_dev->dev,
                         "Firmware: major: %u, minor: %u, hardware type: %s (%d)\n",
                         atusb->fw_ver_maj, atusb->fw_ver_min, hw_name,
                         atusb->fw_hw_type);
        }
        if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 2) {
                dev_info(&usb_dev->dev,
                         "Firmware version (%u.%u) predates our first public release.",
                         atusb->fw_ver_maj, atusb->fw_ver_min);
                dev_info(&usb_dev->dev, "Please update to version 0.2 or newer");
        }

        return ret;
}

static int atusb_get_and_show_build(struct atusb *atusb)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        char *build;
        int ret;

        build = kmalloc(ATUSB_BUILD_SIZE + 1, GFP_KERNEL);
        if (!build)
                return -ENOMEM;

        ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD,
                              ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000);
        if (ret >= 0) {
                build[ret] = 0;
                dev_info(&usb_dev->dev, "Firmware: build %s\n", build);
        }

        kfree(build);
        return ret;
}

static int atusb_get_and_conf_chip(struct atusb *atusb)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        u8 man_id_0, man_id_1, part_num, version_num;
        const char *chip;
        struct ieee802154_hw *hw = atusb->hw;
        int ret;

        ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, RG_MAN_ID_0, &man_id_0, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, RG_MAN_ID_1, &man_id_1, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, RG_PART_NUM, &part_num, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        ret = usb_control_msg_recv(usb_dev, 0, ATUSB_REG_READ, ATUSB_REQ_FROM_DEV,
                                   0, RG_VERSION_NUM, &version_num, 1, 1000, GFP_KERNEL);
        if (ret < 0)
                return ret;

        hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT |
                    IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS;

        hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
                         WPAN_PHY_FLAG_CCA_MODE;

        hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
                                       BIT(NL802154_CCA_CARRIER) |
                                       BIT(NL802154_CCA_ENERGY_CARRIER);
        hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
                                      BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);

        hw->phy->cca.mode = NL802154_CCA_ENERGY;

        hw->phy->current_page = 0;

        if ((man_id_1 << 8 | man_id_0) != ATUSB_JEDEC_ATMEL) {
                dev_err(&usb_dev->dev,
                        "non-Atmel transceiver xxxx%02x%02x\n",
                        man_id_1, man_id_0);
                goto fail;
        }

        switch (part_num) {
        case 2:
                chip = "AT86RF230";
                atusb->hw->phy->supported.channels[0] = 0x7FFF800;
                atusb->hw->phy->current_channel = 11;        /* reset default */
                atusb->hw->phy->supported.tx_powers = atusb_powers;
                atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers);
                hw->phy->supported.cca_ed_levels = atusb_ed_levels;
                hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels);
                break;
        case 3:
                chip = "AT86RF231";
                atusb->hw->phy->supported.channels[0] = 0x7FFF800;
                atusb->hw->phy->current_channel = 11;        /* reset default */
                atusb->hw->phy->supported.tx_powers = atusb_powers;
                atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers);
                hw->phy->supported.cca_ed_levels = atusb_ed_levels;
                hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels);
                break;
        case 7:
                chip = "AT86RF212";
                atusb->hw->flags |= IEEE802154_HW_LBT;
                atusb->hw->phy->supported.channels[0] = 0x00007FF;
                atusb->hw->phy->supported.channels[2] = 0x00007FF;
                atusb->hw->phy->current_channel = 5;
                atusb->hw->phy->supported.lbt = NL802154_SUPPORTED_BOOL_BOTH;
                atusb->hw->phy->supported.tx_powers = at86rf212_powers;
                atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(at86rf212_powers);
                atusb->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100;
                atusb->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100);
                break;
        default:
                dev_err(&usb_dev->dev,
                        "unexpected transceiver, part 0x%02x version 0x%02x\n",
                        part_num, version_num);
                goto fail;
        }

        hw->phy->transmit_power = hw->phy->supported.tx_powers[0];
        hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7];

        dev_info(&usb_dev->dev, "ATUSB: %s version %d\n", chip, version_num);

        return 0;

fail:
        atusb->err = -ENODEV;
        return -ENODEV;
}

static int atusb_set_extended_addr(struct atusb *atusb)
{
        struct usb_device *usb_dev = atusb->usb_dev;
        unsigned char buffer[IEEE802154_EXTENDED_ADDR_LEN];
        __le64 extended_addr;
        u64 addr;
        int ret;

        /* Firmware versions before 0.3 do not support the EUI64_READ command.
         * Just use a random address and be done.
         */
        if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 3) {
                ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
                return 0;
        }

        /* Firmware is new enough so we fetch the address from EEPROM */
        ret = usb_control_msg_recv(atusb->usb_dev, 0, ATUSB_EUI64_READ, ATUSB_REQ_FROM_DEV, 0, 0,
                                   buffer, IEEE802154_EXTENDED_ADDR_LEN, 1000, GFP_KERNEL);
        if (ret < 0) {
                dev_err(&usb_dev->dev, "failed to fetch extended address, random address set\n");
                ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
                return ret;
        }

        memcpy(&extended_addr, buffer, IEEE802154_EXTENDED_ADDR_LEN);
        /* Check if read address is not empty and the unicast bit is set correctly */
        if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) {
                dev_info(&usb_dev->dev, "no permanent extended address found, random address set\n");
                ieee802154_random_extended_addr(&atusb->hw->phy->perm_extended_addr);
        } else {
                atusb->hw->phy->perm_extended_addr = extended_addr;
                addr = swab64((__force u64)atusb->hw->phy->perm_extended_addr);
                dev_info(&usb_dev->dev, "Read permanent extended address %8phC from device\n",
                         &addr);
        }

        return ret;
}

/* ----- Setup ------------------------------------------------------------- */

static int atusb_probe(struct usb_interface *interface,
                       const struct usb_device_id *id)
{
        struct usb_device *usb_dev = interface_to_usbdev(interface);
        struct ieee802154_hw *hw;
        struct atusb *atusb = NULL;
        int ret = -ENOMEM;

        hw = ieee802154_alloc_hw(sizeof(struct atusb), &atusb_ops);
        if (!hw)
                return -ENOMEM;

        atusb = hw->priv;
        atusb->hw = hw;
        atusb->usb_dev = usb_get_dev(usb_dev);
        usb_set_intfdata(interface, atusb);

        atusb->shutdown = 0;
        atusb->err = 0;
        INIT_DELAYED_WORK(&atusb->work, atusb_work_urbs);
        init_usb_anchor(&atusb->idle_urbs);
        init_usb_anchor(&atusb->rx_urbs);

        if (atusb_alloc_urbs(atusb, ATUSB_NUM_RX_URBS))
                goto fail;

        atusb->tx_dr.bRequestType = ATUSB_REQ_TO_DEV;
        atusb->tx_dr.bRequest = ATUSB_TX;
        atusb->tx_dr.wValue = cpu_to_le16(0);

        atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!atusb->tx_urb)
                goto fail;

        hw->parent = &usb_dev->dev;

        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_RF_RESET, ATUSB_REQ_TO_DEV, 0, 0,
                             NULL, 0, 1000, GFP_KERNEL);
        atusb_get_and_conf_chip(atusb);
        atusb_get_and_show_revision(atusb);
        atusb_get_and_show_build(atusb);
        atusb_set_extended_addr(atusb);

        if ((atusb->fw_ver_maj == 0 && atusb->fw_ver_min >= 3) || atusb->fw_ver_maj > 0)
                hw->flags |= IEEE802154_HW_FRAME_RETRIES;

        ret = atusb_get_and_clear_error(atusb);
        if (ret) {
                dev_err(&atusb->usb_dev->dev,
                        "%s: initialization failed, error = %d\n",
                        __func__, ret);
                goto fail;
        }

        ret = ieee802154_register_hw(hw);
        if (ret)
                goto fail;

        /* If we just powered on, we're now in P_ON and need to enter TRX_OFF
         * explicitly. Any resets after that will send us straight to TRX_OFF,
         * making the command below redundant.
         */
        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                             STATE_FORCE_TRX_OFF, RG_TRX_STATE, NULL, 0, 1000, GFP_KERNEL);

        msleep(1);        /* reset => TRX_OFF, tTR13 = 37 us */

#if 0
        /* Calculating the maximum time available to empty the frame buffer
         * on reception:
         *
         * According to [1], the inter-frame gap is
         * R * 20 * 16 us + 128 us
         * where R is a random number from 0 to 7. Furthermore, we have 20 bit
         * times (80 us at 250 kbps) of SHR of the next frame before the
         * transceiver begins storing data in the frame buffer.
         *
         * This yields a minimum time of 208 us between the last data of a
         * frame and the first data of the next frame. This time is further
         * reduced by interrupt latency in the atusb firmware.
         *
         * atusb currently needs about 500 us to retrieve a maximum-sized
         * frame. We therefore have to allow reception of a new frame to begin
         * while we retrieve the previous frame.
         *
         * [1] "JN-AN-1035 Calculating data rates in an IEEE 802.15.4-based
         *      network", Jennic 2006.
         *     http://www.jennic.com/download_file.php?supportFile=JN-AN-1035%20Calculating%20802-15-4%20Data%20Rates-1v0.pdf
         */

        atusb_write_subreg(atusb, SR_RX_SAFE_MODE, 1);
#endif
        usb_control_msg_send(atusb->usb_dev, 0, ATUSB_REG_WRITE, ATUSB_REQ_TO_DEV,
                             0xff, RG_IRQ_MASK, NULL, 0, 1000, GFP_KERNEL);

        ret = atusb_get_and_clear_error(atusb);
        if (!ret)
                return 0;

        dev_err(&atusb->usb_dev->dev,
                "%s: setup failed, error = %d\n",
                __func__, ret);

        ieee802154_unregister_hw(hw);
fail:
        atusb_free_urbs(atusb);
        usb_kill_urb(atusb->tx_urb);
        usb_free_urb(atusb->tx_urb);
        usb_put_dev(usb_dev);
        ieee802154_free_hw(hw);
        return ret;
}

static void atusb_disconnect(struct usb_interface *interface)
{
        struct atusb *atusb = usb_get_intfdata(interface);

        dev_dbg(&atusb->usb_dev->dev, "%s\n", __func__);

        atusb->shutdown = 1;
        cancel_delayed_work_sync(&atusb->work);

        usb_kill_anchored_urbs(&atusb->rx_urbs);
        atusb_free_urbs(atusb);
        usb_kill_urb(atusb->tx_urb);
        usb_free_urb(atusb->tx_urb);

        ieee802154_unregister_hw(atusb->hw);

        usb_put_dev(atusb->usb_dev);

        ieee802154_free_hw(atusb->hw);

        usb_set_intfdata(interface, NULL);

        pr_debug("%s done\n", __func__);
}

/* The devices we work with */
static const struct usb_device_id atusb_device_table[] = {
        {
                .match_flags                = USB_DEVICE_ID_MATCH_DEVICE |
                                          USB_DEVICE_ID_MATCH_INT_INFO,
                .idVendor                = ATUSB_VENDOR_ID,
                .idProduct                = ATUSB_PRODUCT_ID,
                .bInterfaceClass        = USB_CLASS_VENDOR_SPEC
        },
        /* end with null element */
        {}
};
MODULE_DEVICE_TABLE(usb, atusb_device_table);

static struct usb_driver atusb_driver = {
        .name                = "atusb",
        .probe                = atusb_probe,
        .disconnect        = atusb_disconnect,
        .id_table        = atusb_device_table,
};
module_usb_driver(atusb_driver);

MODULE_AUTHOR("Alexander Aring <alex.aring@gmail.com>");
MODULE_AUTHOR("Richard Sharpe <realrichardsharpe@gmail.com>");
MODULE_AUTHOR("Stefan Schmidt <stefan@datenfreihafen.org>");
MODULE_AUTHOR("Werner Almesberger <werner@almesberger.net>");
MODULE_AUTHOR("Josef Filzmaier <j.filzmaier@gmx.at>");
MODULE_DESCRIPTION("ATUSB IEEE 802.15.4 Driver");
MODULE_LICENSE("GPL");




















































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 


























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for some logitech "special" devices
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2006-2007 Jiri Kosina
 *  Copyright (c) 2008 Jiri Slaby
 *  Copyright (c) 2010 Hendrik Iben
 */

/*
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/usb.h>
#include <linux/wait.h>

#include "usbhid/usbhid.h"
#include "hid-ids.h"
#include "hid-lg.h"
#include "hid-lg4ff.h"

#define LG_RDESC                0x001
#define LG_BAD_RELATIVE_KEYS        0x002
#define LG_DUPLICATE_USAGES        0x004
#define LG_EXPANDED_KEYMAP        0x010
#define LG_IGNORE_DOUBLED_WHEEL        0x020
#define LG_WIRELESS                0x040
#define LG_INVERT_HWHEEL        0x080
#define LG_NOGET                0x100
#define LG_FF                        0x200
#define LG_FF2                        0x400
#define LG_RDESC_REL_ABS        0x800
#define LG_FF3                        0x1000
#define LG_FF4                        0x2000

/* Size of the original descriptors of the Driving Force (and Pro) wheels */
#define DF_RDESC_ORIG_SIZE        130
#define DFP_RDESC_ORIG_SIZE        97
#define FV_RDESC_ORIG_SIZE        130
#define MOMO_RDESC_ORIG_SIZE        87
#define MOMO2_RDESC_ORIG_SIZE        87
#define FFG_RDESC_ORIG_SIZE        85
#define FG_RDESC_ORIG_SIZE        82

/* Fixed report descriptors for Logitech Driving Force (and Pro)
 * wheel controllers
 *
 * The original descriptors hide the separate throttle and brake axes in
 * a custom vendor usage page, providing only a combined value as
 * GenericDesktop.Y.
 * These descriptors remove the combined Y axis and instead report
 * separate throttle (Y) and brake (RZ).
 */
static __u8 df_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),                   */
0x09, 0x04,         /*  Usage (Joystick),                       */
0xA1, 0x01,         /*  Collection (Application),               */
0xA1, 0x02,         /*      Collection (Logical),               */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x0A,         /*          Report Size (10),               */
0x14,               /*          Logical Minimum (0),            */
0x26, 0xFF, 0x03,   /*          Logical Maximum (1023),         */
0x34,               /*          Physical Minimum (0),           */
0x46, 0xFF, 0x03,   /*          Physical Maximum (1023),        */
0x09, 0x30,         /*          Usage (X),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x95, 0x0C,         /*          Report Count (12),              */
0x75, 0x01,         /*          Report Size (1),                */
0x25, 0x01,         /*          Logical Maximum (1),            */
0x45, 0x01,         /*          Physical Maximum (1),           */
0x05, 0x09,         /*          Usage (Buttons),                */
0x19, 0x01,         /*          Usage Minimum (1),              */
0x29, 0x0c,         /*          Usage Maximum (12),             */
0x81, 0x02,         /*          Input (Variable),               */
0x95, 0x02,         /*          Report Count (2),               */
0x06, 0x00, 0xFF,   /*          Usage Page (Vendor: 65280),     */
0x09, 0x01,         /*          Usage (?: 1),                   */
0x81, 0x02,         /*          Input (Variable),               */
0x05, 0x01,         /*          Usage Page (Desktop),           */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x08,         /*          Report Size (8),                */
0x81, 0x02,         /*          Input (Variable),               */
0x25, 0x07,         /*          Logical Maximum (7),            */
0x46, 0x3B, 0x01,   /*          Physical Maximum (315),         */
0x75, 0x04,         /*          Report Size (4),                */
0x65, 0x14,         /*          Unit (Degrees),                 */
0x09, 0x39,         /*          Usage (Hat Switch),             */
0x81, 0x42,         /*          Input (Variable, Null State),   */
0x75, 0x01,         /*          Report Size (1),                */
0x95, 0x04,         /*          Report Count (4),               */
0x65, 0x00,         /*          Unit (none),                    */
0x06, 0x00, 0xFF,   /*          Usage Page (Vendor: 65280),     */
0x09, 0x01,         /*          Usage (?: 1),                   */
0x25, 0x01,         /*          Logical Maximum (1),            */
0x45, 0x01,         /*          Physical Maximum (1),           */
0x81, 0x02,         /*          Input (Variable),               */
0x05, 0x01,         /*          Usage Page (Desktop),           */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x08,         /*          Report Size (8),                */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x09, 0x31,         /*          Usage (Y),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x09, 0x35,         /*          Usage (Rz),                     */
0x81, 0x02,         /*          Input (Variable),               */
0xC0,               /*      End Collection,                     */
0xA1, 0x02,         /*      Collection (Logical),               */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x95, 0x07,         /*          Report Count (7),               */
0x75, 0x08,         /*          Report Size (8),                */
0x09, 0x03,         /*          Usage (?: 3),                   */
0x91, 0x02,         /*          Output (Variable),              */
0xC0,               /*      End Collection,                     */
0xC0                /*  End Collection                          */
};

static __u8 dfp_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),                   */
0x09, 0x04,         /*  Usage (Joystick),                       */
0xA1, 0x01,         /*  Collection (Application),               */
0xA1, 0x02,         /*      Collection (Logical),               */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x0E,         /*          Report Size (14),               */
0x14,               /*          Logical Minimum (0),            */
0x26, 0xFF, 0x3F,   /*          Logical Maximum (16383),        */
0x34,               /*          Physical Minimum (0),           */
0x46, 0xFF, 0x3F,   /*          Physical Maximum (16383),       */
0x09, 0x30,         /*          Usage (X),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x95, 0x0E,         /*          Report Count (14),              */
0x75, 0x01,         /*          Report Size (1),                */
0x25, 0x01,         /*          Logical Maximum (1),            */
0x45, 0x01,         /*          Physical Maximum (1),           */
0x05, 0x09,         /*          Usage Page (Button),            */
0x19, 0x01,         /*          Usage Minimum (01h),            */
0x29, 0x0E,         /*          Usage Maximum (0Eh),            */
0x81, 0x02,         /*          Input (Variable),               */
0x05, 0x01,         /*          Usage Page (Desktop),           */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x04,         /*          Report Size (4),                */
0x25, 0x07,         /*          Logical Maximum (7),            */
0x46, 0x3B, 0x01,   /*          Physical Maximum (315),         */
0x65, 0x14,         /*          Unit (Degrees),                 */
0x09, 0x39,         /*          Usage (Hat Switch),             */
0x81, 0x42,         /*          Input (Variable, Nullstate),    */
0x65, 0x00,         /*          Unit,                           */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x75, 0x08,         /*          Report Size (8),                */
0x81, 0x01,         /*          Input (Constant),               */
0x09, 0x31,         /*          Usage (Y),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x09, 0x35,         /*          Usage (Rz),                     */
0x81, 0x02,         /*          Input (Variable),               */
0x81, 0x01,         /*          Input (Constant),               */
0xC0,               /*      End Collection,                     */
0xA1, 0x02,         /*      Collection (Logical),               */
0x09, 0x02,         /*          Usage (02h),                    */
0x95, 0x07,         /*          Report Count (7),               */
0x91, 0x02,         /*          Output (Variable),              */
0xC0,               /*      End Collection,                     */
0xC0                /*  End Collection                          */
};

static __u8 fv_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),                   */
0x09, 0x04,         /*  Usage (Joystick),                       */
0xA1, 0x01,         /*  Collection (Application),               */
0xA1, 0x02,         /*      Collection (Logical),               */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x0A,         /*          Report Size (10),               */
0x15, 0x00,         /*          Logical Minimum (0),            */
0x26, 0xFF, 0x03,   /*          Logical Maximum (1023),         */
0x35, 0x00,         /*          Physical Minimum (0),           */
0x46, 0xFF, 0x03,   /*          Physical Maximum (1023),        */
0x09, 0x30,         /*          Usage (X),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x95, 0x0C,         /*          Report Count (12),              */
0x75, 0x01,         /*          Report Size (1),                */
0x25, 0x01,         /*          Logical Maximum (1),            */
0x45, 0x01,         /*          Physical Maximum (1),           */
0x05, 0x09,         /*          Usage Page (Button),            */
0x19, 0x01,         /*          Usage Minimum (01h),            */
0x29, 0x0C,         /*          Usage Maximum (0Ch),            */
0x81, 0x02,         /*          Input (Variable),               */
0x95, 0x02,         /*          Report Count (2),               */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),             */
0x09, 0x01,         /*          Usage (01h),                    */
0x81, 0x02,         /*          Input (Variable),               */
0x09, 0x02,         /*          Usage (02h),                    */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x08,         /*          Report Size (8),                */
0x81, 0x02,         /*          Input (Variable),               */
0x05, 0x01,         /*          Usage Page (Desktop),           */
0x25, 0x07,         /*          Logical Maximum (7),            */
0x46, 0x3B, 0x01,   /*          Physical Maximum (315),         */
0x75, 0x04,         /*          Report Size (4),                */
0x65, 0x14,         /*          Unit (Degrees),                 */
0x09, 0x39,         /*          Usage (Hat Switch),             */
0x81, 0x42,         /*          Input (Variable, Null State),   */
0x75, 0x01,         /*          Report Size (1),                */
0x95, 0x04,         /*          Report Count (4),               */
0x65, 0x00,         /*          Unit,                           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),             */
0x09, 0x01,         /*          Usage (01h),                    */
0x25, 0x01,         /*          Logical Maximum (1),            */
0x45, 0x01,         /*          Physical Maximum (1),           */
0x81, 0x02,         /*          Input (Variable),               */
0x05, 0x01,         /*          Usage Page (Desktop),           */
0x95, 0x01,         /*          Report Count (1),               */
0x75, 0x08,         /*          Report Size (8),                */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x09, 0x31,         /*          Usage (Y),                      */
0x81, 0x02,         /*          Input (Variable),               */
0x09, 0x32,         /*          Usage (Z),                      */
0x81, 0x02,         /*          Input (Variable),               */
0xC0,               /*      End Collection,                     */
0xA1, 0x02,         /*      Collection (Logical),               */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),          */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),         */
0x95, 0x07,         /*          Report Count (7),               */
0x75, 0x08,         /*          Report Size (8),                */
0x09, 0x03,         /*          Usage (03h),                    */
0x91, 0x02,         /*          Output (Variable),              */
0xC0,               /*      End Collection,                     */
0xC0                /*  End Collection                          */
};

static __u8 momo_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),               */
0x09, 0x04,         /*  Usage (Joystick),                   */
0xA1, 0x01,         /*  Collection (Application),           */
0xA1, 0x02,         /*      Collection (Logical),           */
0x95, 0x01,         /*          Report Count (1),           */
0x75, 0x0A,         /*          Report Size (10),           */
0x15, 0x00,         /*          Logical Minimum (0),        */
0x26, 0xFF, 0x03,   /*          Logical Maximum (1023),     */
0x35, 0x00,         /*          Physical Minimum (0),       */
0x46, 0xFF, 0x03,   /*          Physical Maximum (1023),    */
0x09, 0x30,         /*          Usage (X),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x95, 0x08,         /*          Report Count (8),           */
0x75, 0x01,         /*          Report Size (1),            */
0x25, 0x01,         /*          Logical Maximum (1),        */
0x45, 0x01,         /*          Physical Maximum (1),       */
0x05, 0x09,         /*          Usage Page (Button),        */
0x19, 0x01,         /*          Usage Minimum (01h),        */
0x29, 0x08,         /*          Usage Maximum (08h),        */
0x81, 0x02,         /*          Input (Variable),           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x75, 0x0E,         /*          Report Size (14),           */
0x95, 0x01,         /*          Report Count (1),           */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
0x09, 0x00,         /*          Usage (00h),                */
0x81, 0x02,         /*          Input (Variable),           */
0x05, 0x01,         /*          Usage Page (Desktop),       */
0x75, 0x08,         /*          Report Size (8),            */
0x09, 0x31,         /*          Usage (Y),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x09, 0x32,         /*          Usage (Z),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x09, 0x01,         /*          Usage (01h),                */
0x81, 0x02,         /*          Input (Variable),           */
0xC0,               /*      End Collection,                 */
0xA1, 0x02,         /*      Collection (Logical),           */
0x09, 0x02,         /*          Usage (02h),                */
0x95, 0x07,         /*          Report Count (7),           */
0x91, 0x02,         /*          Output (Variable),          */
0xC0,               /*      End Collection,                 */
0xC0                /*  End Collection                      */
};

static __u8 momo2_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),               */
0x09, 0x04,         /*  Usage (Joystick),                   */
0xA1, 0x01,         /*  Collection (Application),           */
0xA1, 0x02,         /*      Collection (Logical),           */
0x95, 0x01,         /*          Report Count (1),           */
0x75, 0x0A,         /*          Report Size (10),           */
0x15, 0x00,         /*          Logical Minimum (0),        */
0x26, 0xFF, 0x03,   /*          Logical Maximum (1023),     */
0x35, 0x00,         /*          Physical Minimum (0),       */
0x46, 0xFF, 0x03,   /*          Physical Maximum (1023),    */
0x09, 0x30,         /*          Usage (X),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x95, 0x0A,         /*          Report Count (10),          */
0x75, 0x01,         /*          Report Size (1),            */
0x25, 0x01,         /*          Logical Maximum (1),        */
0x45, 0x01,         /*          Physical Maximum (1),       */
0x05, 0x09,         /*          Usage Page (Button),        */
0x19, 0x01,         /*          Usage Minimum (01h),        */
0x29, 0x0A,         /*          Usage Maximum (0Ah),        */
0x81, 0x02,         /*          Input (Variable),           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x09, 0x00,         /*          Usage (00h),                */
0x95, 0x04,         /*          Report Count (4),           */
0x81, 0x02,         /*          Input (Variable),           */
0x95, 0x01,         /*          Report Count (1),           */
0x75, 0x08,         /*          Report Size (8),            */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
0x09, 0x01,         /*          Usage (01h),                */
0x81, 0x02,         /*          Input (Variable),           */
0x05, 0x01,         /*          Usage Page (Desktop),       */
0x09, 0x31,         /*          Usage (Y),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x09, 0x32,         /*          Usage (Z),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x09, 0x00,         /*          Usage (00h),                */
0x81, 0x02,         /*          Input (Variable),           */
0xC0,               /*      End Collection,                 */
0xA1, 0x02,         /*      Collection (Logical),           */
0x09, 0x02,         /*          Usage (02h),                */
0x95, 0x07,         /*          Report Count (7),           */
0x91, 0x02,         /*          Output (Variable),          */
0xC0,               /*      End Collection,                 */
0xC0                /*  End Collection                      */
};

static __u8 ffg_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),               */
0x09, 0x04,         /*  Usage (Joystik),                    */
0xA1, 0x01,         /*  Collection (Application),           */
0xA1, 0x02,         /*      Collection (Logical),           */
0x95, 0x01,         /*          Report Count (1),           */
0x75, 0x0A,         /*          Report Size (10),           */
0x15, 0x00,         /*          Logical Minimum (0),        */
0x26, 0xFF, 0x03,   /*          Logical Maximum (1023),     */
0x35, 0x00,         /*          Physical Minimum (0),       */
0x46, 0xFF, 0x03,   /*          Physical Maximum (1023),    */
0x09, 0x30,         /*          Usage (X),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x95, 0x06,         /*          Report Count (6),           */
0x75, 0x01,         /*          Report Size (1),            */
0x25, 0x01,         /*          Logical Maximum (1),        */
0x45, 0x01,         /*          Physical Maximum (1),       */
0x05, 0x09,         /*          Usage Page (Button),        */
0x19, 0x01,         /*          Usage Minimum (01h),        */
0x29, 0x06,         /*          Usage Maximum (06h),        */
0x81, 0x02,         /*          Input (Variable),           */
0x95, 0x01,         /*          Report Count (1),           */
0x75, 0x08,         /*          Report Size (8),            */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x09, 0x01,         /*          Usage (01h),                */
0x81, 0x02,         /*          Input (Variable),           */
0x05, 0x01,         /*          Usage Page (Desktop),       */
0x81, 0x01,         /*          Input (Constant),           */
0x09, 0x31,         /*          Usage (Y),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x09, 0x32,         /*          Usage (Z),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
0x09, 0x01,         /*          Usage (01h),                */
0x81, 0x02,         /*          Input (Variable),           */
0xC0,               /*      End Collection,                 */
0xA1, 0x02,         /*      Collection (Logical),           */
0x09, 0x02,         /*          Usage (02h),                */
0x95, 0x07,         /*          Report Count (7),           */
0x91, 0x02,         /*          Output (Variable),          */
0xC0,               /*      End Collection,                 */
0xC0                /*  End Collection                      */
};

static __u8 fg_rdesc_fixed[] = {
0x05, 0x01,         /*  Usage Page (Desktop),               */
0x09, 0x04,         /*  Usage (Joystik),                    */
0xA1, 0x01,         /*  Collection (Application),           */
0xA1, 0x02,         /*      Collection (Logical),           */
0x15, 0x00,         /*          Logical Minimum (0),        */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
0x35, 0x00,         /*          Physical Minimum (0),       */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
0x75, 0x08,         /*          Report Size (8),            */
0x95, 0x01,         /*          Report Count (1),           */
0x09, 0x30,         /*          Usage (X),                  */
0x81, 0x02,         /*          Input (Variable),           */
0xA4,               /*  Push,                               */
0x25, 0x01,         /*          Logical Maximum (1),        */
0x45, 0x01,         /*          Physical Maximum (1),       */
0x75, 0x01,         /*          Report Size (1),            */
0x95, 0x02,         /*          Report Count (2),           */
0x81, 0x01,         /*          Input (Constant),           */
0x95, 0x06,         /*          Report Count (6),           */
0x05, 0x09,         /*          Usage Page (Button),        */
0x19, 0x01,         /*          Usage Minimum (01h),        */
0x29, 0x06,         /*          Usage Maximum (06h),        */
0x81, 0x02,         /*          Input (Variable),           */
0x05, 0x01,         /*          Usage Page (Desktop),       */
0xB4,               /*  Pop,                                */
0x81, 0x02,         /*          Input (Constant),           */
0x09, 0x31,         /*          Usage (Y),                  */
0x81, 0x02,         /*          Input (Variable),           */
0x09, 0x32,         /*          Usage (Z),                  */
0x81, 0x02,         /*          Input (Variable),           */
0xC0,               /*      End Collection,                 */
0xA1, 0x02,         /*      Collection (Logical),           */
0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
0x75, 0x08,         /*          Report Size (8),            */
0x95, 0x04,         /*          Report Count (4),           */
0x09, 0x02,         /*          Usage (02h),                */
0xB1, 0x02,         /*          Feature (Variable),         */
0xC0,               /*      End Collection,                 */
0xC0                /*  End Collection,                     */
};

/*
 * Certain Logitech keyboards send in report #3 keys which are far
 * above the logical maximum described in descriptor. This extends
 * the original value of 0x28c of logical maximum to 0x104d
 */
static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
{
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);

        if ((drv_data->quirks & LG_RDESC) && *rsize >= 91 && rdesc[83] == 0x26 &&
                        rdesc[84] == 0x8c && rdesc[85] == 0x02) {
                hid_info(hdev,
                         "fixing up Logitech keyboard report descriptor\n");
                rdesc[84] = rdesc[89] = 0x4d;
                rdesc[85] = rdesc[90] = 0x10;
        }
        if ((drv_data->quirks & LG_RDESC_REL_ABS) && *rsize >= 51 &&
                        rdesc[32] == 0x81 && rdesc[33] == 0x06 &&
                        rdesc[49] == 0x81 && rdesc[50] == 0x06) {
                hid_info(hdev,
                         "fixing up rel/abs in Logitech report descriptor\n");
                rdesc[33] = rdesc[50] = 0x02;
        }

        switch (hdev->product) {

        case USB_DEVICE_ID_LOGITECH_WINGMAN_FG:
                if (*rsize == FG_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Wingman Formula GP report descriptor\n");
                        rdesc = fg_rdesc_fixed;
                        *rsize = sizeof(fg_rdesc_fixed);
                } else {
                        hid_info(hdev,
                                "rdesc size test failed for formula gp\n");
                }
                break;


        case USB_DEVICE_ID_LOGITECH_WINGMAN_FFG:
                if (*rsize == FFG_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Wingman Formula Force GP report descriptor\n");
                        rdesc = ffg_rdesc_fixed;
                        *rsize = sizeof(ffg_rdesc_fixed);
                }
                break;

        /* Several wheels report as this id when operating in emulation mode. */
        case USB_DEVICE_ID_LOGITECH_WHEEL:
                if (*rsize == DF_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Driving Force report descriptor\n");
                        rdesc = df_rdesc_fixed;
                        *rsize = sizeof(df_rdesc_fixed);
                }
                break;

        case USB_DEVICE_ID_LOGITECH_MOMO_WHEEL:
                if (*rsize == MOMO_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Momo Force (Red) report descriptor\n");
                        rdesc = momo_rdesc_fixed;
                        *rsize = sizeof(momo_rdesc_fixed);
                }
                break;

        case USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2:
                if (*rsize == MOMO2_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Momo Racing Force (Black) report descriptor\n");
                        rdesc = momo2_rdesc_fixed;
                        *rsize = sizeof(momo2_rdesc_fixed);
                }
                break;

        case USB_DEVICE_ID_LOGITECH_VIBRATION_WHEEL:
                if (*rsize == FV_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Formula Vibration report descriptor\n");
                        rdesc = fv_rdesc_fixed;
                        *rsize = sizeof(fv_rdesc_fixed);
                }
                break;

        case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
                if (*rsize == DFP_RDESC_ORIG_SIZE) {
                        hid_info(hdev,
                                "fixing up Logitech Driving Force Pro report descriptor\n");
                        rdesc = dfp_rdesc_fixed;
                        *rsize = sizeof(dfp_rdesc_fixed);
                }
                break;

        case USB_DEVICE_ID_LOGITECH_WII_WHEEL:
                if (*rsize >= 101 && rdesc[41] == 0x95 && rdesc[42] == 0x0B &&
                                rdesc[47] == 0x05 && rdesc[48] == 0x09) {
                        hid_info(hdev, "fixing up Logitech Speed Force Wireless report descriptor\n");
                        rdesc[41] = 0x05;
                        rdesc[42] = 0x09;
                        rdesc[47] = 0x95;
                        rdesc[48] = 0x0B;
                }
                break;
        }

        return rdesc;
}

#define lg_map_key_clear(c)        hid_map_usage_clear(hi, usage, bit, max, \
                EV_KEY, (c))

static int lg_ultrax_remote_mapping(struct hid_input *hi,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_LOGIVENDOR)
                return 0;

        set_bit(EV_REP, hi->input->evbit);
        switch (usage->hid & HID_USAGE) {
        /* Reported on Logitech Ultra X Media Remote */
        case 0x004: lg_map_key_clear(KEY_AGAIN);        break;
        case 0x00d: lg_map_key_clear(KEY_HOME);                break;
        case 0x024: lg_map_key_clear(KEY_SHUFFLE);        break;
        case 0x025: lg_map_key_clear(KEY_TV);                break;
        case 0x026: lg_map_key_clear(KEY_MENU);                break;
        case 0x031: lg_map_key_clear(KEY_AUDIO);        break;
        case 0x032: lg_map_key_clear(KEY_TEXT);                break;
        case 0x033: lg_map_key_clear(KEY_LAST);                break;
        case 0x047: lg_map_key_clear(KEY_MP3);                break;
        case 0x048: lg_map_key_clear(KEY_DVD);                break;
        case 0x049: lg_map_key_clear(KEY_MEDIA);        break;
        case 0x04a: lg_map_key_clear(KEY_VIDEO);        break;
        case 0x04b: lg_map_key_clear(KEY_ANGLE);        break;
        case 0x04c: lg_map_key_clear(KEY_LANGUAGE);        break;
        case 0x04d: lg_map_key_clear(KEY_SUBTITLE);        break;
        case 0x051: lg_map_key_clear(KEY_RED);                break;
        case 0x052: lg_map_key_clear(KEY_CLOSE);        break;

        default:
                return 0;
        }
        return 1;
}

static int lg_wireless_mapping(struct hid_input *hi, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER)
                return 0;

        switch (usage->hid & HID_USAGE) {
        case 0x1001: lg_map_key_clear(KEY_MESSENGER);                break;
        case 0x1003: lg_map_key_clear(KEY_SOUND);                break;
        case 0x1004: lg_map_key_clear(KEY_VIDEO);                break;
        case 0x1005: lg_map_key_clear(KEY_AUDIO);                break;
        case 0x100a: lg_map_key_clear(KEY_DOCUMENTS);                break;
        /* The following two entries are Playlist 1 and 2 on the MX3200 */
        case 0x100f: lg_map_key_clear(KEY_FN_1);                break;
        case 0x1010: lg_map_key_clear(KEY_FN_2);                break;
        case 0x1011: lg_map_key_clear(KEY_PREVIOUSSONG);        break;
        case 0x1012: lg_map_key_clear(KEY_NEXTSONG);                break;
        case 0x1013: lg_map_key_clear(KEY_CAMERA);                break;
        case 0x1014: lg_map_key_clear(KEY_MESSENGER);                break;
        case 0x1015: lg_map_key_clear(KEY_RECORD);                break;
        case 0x1016: lg_map_key_clear(KEY_PLAYER);                break;
        case 0x1017: lg_map_key_clear(KEY_EJECTCD);                break;
        case 0x1018: lg_map_key_clear(KEY_MEDIA);                break;
        case 0x1019: lg_map_key_clear(KEY_PROG1);                break;
        case 0x101a: lg_map_key_clear(KEY_PROG2);                break;
        case 0x101b: lg_map_key_clear(KEY_PROG3);                break;
        case 0x101c: lg_map_key_clear(KEY_CYCLEWINDOWS);        break;
        case 0x101f: lg_map_key_clear(KEY_ZOOMIN);                break;
        case 0x1020: lg_map_key_clear(KEY_ZOOMOUT);                break;
        case 0x1021: lg_map_key_clear(KEY_ZOOMRESET);                break;
        case 0x1023: lg_map_key_clear(KEY_CLOSE);                break;
        case 0x1027: lg_map_key_clear(KEY_MENU);                break;
        /* this one is marked as 'Rotate' */
        case 0x1028: lg_map_key_clear(KEY_ANGLE);                break;
        case 0x1029: lg_map_key_clear(KEY_SHUFFLE);                break;
        case 0x102a: lg_map_key_clear(KEY_BACK);                break;
        case 0x102b: lg_map_key_clear(KEY_CYCLEWINDOWS);        break;
        case 0x102d: lg_map_key_clear(KEY_WWW);                        break;
        /* The following two are 'Start/answer call' and 'End/reject call'
           on the MX3200 */
        case 0x1031: lg_map_key_clear(KEY_OK);                        break;
        case 0x1032: lg_map_key_clear(KEY_CANCEL);                break;
        case 0x1041: lg_map_key_clear(KEY_BATTERY);                break;
        case 0x1042: lg_map_key_clear(KEY_WORDPROCESSOR);        break;
        case 0x1043: lg_map_key_clear(KEY_SPREADSHEET);                break;
        case 0x1044: lg_map_key_clear(KEY_PRESENTATION);        break;
        case 0x1045: lg_map_key_clear(KEY_UNDO);                break;
        case 0x1046: lg_map_key_clear(KEY_REDO);                break;
        case 0x1047: lg_map_key_clear(KEY_PRINT);                break;
        case 0x1048: lg_map_key_clear(KEY_SAVE);                break;
        case 0x1049: lg_map_key_clear(KEY_PROG1);                break;
        case 0x104a: lg_map_key_clear(KEY_PROG2);                break;
        case 0x104b: lg_map_key_clear(KEY_PROG3);                break;
        case 0x104c: lg_map_key_clear(KEY_PROG4);                break;

        default:
                return 0;
        }
        return 1;
}

static int lg_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        /* extended mapping for certain Logitech hardware (Logitech cordless
           desktop LX500) */
        static const u8 e_keymap[] = {
                  0,216,  0,213,175,156,  0,  0,  0,  0,
                144,  0,  0,  0,  0,  0,  0,  0,  0,212,
                174,167,152,161,112,  0,  0,  0,154,  0,
                  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                  0,  0,  0,  0,  0,183,184,185,186,187,
                188,189,190,191,192,193,194,  0,  0,  0
        };
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);
        unsigned int hid = usage->hid;

        if (hdev->product == USB_DEVICE_ID_LOGITECH_RECEIVER &&
                        lg_ultrax_remote_mapping(hi, usage, bit, max))
                return 1;

        if ((drv_data->quirks & LG_WIRELESS) && lg_wireless_mapping(hi, usage, bit, max))
                return 1;

        if ((hid & HID_USAGE_PAGE) != HID_UP_BUTTON)
                return 0;

        hid &= HID_USAGE;

        /* Special handling for Logitech Cordless Desktop */
        if (field->application == HID_GD_MOUSE) {
                if ((drv_data->quirks & LG_IGNORE_DOUBLED_WHEEL) &&
                                (hid == 7 || hid == 8))
                        return -1;
        } else {
                if ((drv_data->quirks & LG_EXPANDED_KEYMAP) &&
                                hid < ARRAY_SIZE(e_keymap) &&
                                e_keymap[hid] != 0) {
                        hid_map_usage(hi, usage, bit, max, EV_KEY,
                                        e_keymap[hid]);
                        return 1;
                }
        }

        return 0;
}

static int lg_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);

        if ((drv_data->quirks & LG_BAD_RELATIVE_KEYS) && usage->type == EV_KEY &&
                        (field->flags & HID_MAIN_ITEM_RELATIVE))
                field->flags &= ~HID_MAIN_ITEM_RELATIVE;

        if ((drv_data->quirks & LG_DUPLICATE_USAGES) && (usage->type == EV_KEY ||
                         usage->type == EV_REL || usage->type == EV_ABS))
                clear_bit(usage->code, *bit);

        /* Ensure that Logitech wheels are not given a default fuzz/flat value */
        if (usage->type == EV_ABS && (usage->code == ABS_X ||
                        usage->code == ABS_Y || usage->code == ABS_Z ||
                        usage->code == ABS_RZ)) {
                switch (hdev->product) {
                case USB_DEVICE_ID_LOGITECH_G29_WHEEL:
                case USB_DEVICE_ID_LOGITECH_WINGMAN_FG:
                case USB_DEVICE_ID_LOGITECH_WINGMAN_FFG:
                case USB_DEVICE_ID_LOGITECH_WHEEL:
                case USB_DEVICE_ID_LOGITECH_MOMO_WHEEL:
                case USB_DEVICE_ID_LOGITECH_DFP_WHEEL:
                case USB_DEVICE_ID_LOGITECH_G25_WHEEL:
                case USB_DEVICE_ID_LOGITECH_DFGT_WHEEL:
                case USB_DEVICE_ID_LOGITECH_G27_WHEEL:
                case USB_DEVICE_ID_LOGITECH_WII_WHEEL:
                case USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2:
                case USB_DEVICE_ID_LOGITECH_VIBRATION_WHEEL:
                        field->application = HID_GD_MULTIAXIS;
                        break;
                default:
                        break;
                }
        }

        return 0;
}

static int lg_event(struct hid_device *hdev, struct hid_field *field,
                struct hid_usage *usage, __s32 value)
{
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);

        if ((drv_data->quirks & LG_INVERT_HWHEEL) && usage->code == REL_HWHEEL) {
                input_event(field->hidinput->input, usage->type, usage->code,
                                -value);
                return 1;
        }
        if (drv_data->quirks & LG_FF4) {
                return lg4ff_adjust_input_event(hdev, field, usage, value, drv_data);
        }

        return 0;
}

static int lg_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *rd, int size)
{
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);

        if (drv_data->quirks & LG_FF4)
                return lg4ff_raw_event(hdev, report, rd, size, drv_data);

        return 0;
}

static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct usb_interface *iface;
        __u8 iface_num;
        unsigned int connect_mask = HID_CONNECT_DEFAULT;
        struct lg_drv_data *drv_data;
        int ret;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        iface = to_usb_interface(hdev->dev.parent);
        iface_num = iface->cur_altsetting->desc.bInterfaceNumber;

        /* G29 only work with the 1st interface */
        if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) &&
            (iface_num != 0)) {
                dbg_hid("%s: ignoring ifnum %d\n", __func__, iface_num);
                return -ENODEV;
        }

        drv_data = kzalloc(sizeof(struct lg_drv_data), GFP_KERNEL);
        if (!drv_data) {
                hid_err(hdev, "Insufficient memory, cannot allocate driver data\n");
                return -ENOMEM;
        }
        drv_data->quirks = id->driver_data;

        hid_set_drvdata(hdev, (void *)drv_data);

        if (drv_data->quirks & LG_NOGET)
                hdev->quirks |= HID_QUIRK_NOGET;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                goto err_free;
        }

        if (drv_data->quirks & (LG_FF | LG_FF2 | LG_FF3 | LG_FF4))
                connect_mask &= ~HID_CONNECT_FF;

        ret = hid_hw_start(hdev, connect_mask);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto err_free;
        }

        /* Setup wireless link with Logitech Wii wheel */
        if (hdev->product == USB_DEVICE_ID_LOGITECH_WII_WHEEL) {
                static const unsigned char cbuf[] = {
                        0x00, 0xAF,  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
                };
                u8 *buf = kmemdup(cbuf, sizeof(cbuf), GFP_KERNEL);

                if (!buf) {
                        ret = -ENOMEM;
                        goto err_stop;
                }

                ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf),
                                        HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                if (ret >= 0) {
                        /* insert a little delay of 10 jiffies ~ 40ms */
                        wait_queue_head_t wait;
                        init_waitqueue_head (&wait);
                        wait_event_interruptible_timeout(wait, 0,
                                                         msecs_to_jiffies(40));

                        /* Select random Address */
                        buf[1] = 0xB2;
                        get_random_bytes(&buf[2], 2);

                        ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf),
                                        HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                }
                kfree(buf);
        }

        if (drv_data->quirks & LG_FF)
                ret = lgff_init(hdev);
        else if (drv_data->quirks & LG_FF2)
                ret = lg2ff_init(hdev);
        else if (drv_data->quirks & LG_FF3)
                ret = lg3ff_init(hdev);
        else if (drv_data->quirks & LG_FF4)
                ret = lg4ff_init(hdev);

        if (ret)
                goto err_stop;

        return 0;

err_stop:
        hid_hw_stop(hdev);
err_free:
        kfree(drv_data);
        return ret;
}

static void lg_remove(struct hid_device *hdev)
{
        struct lg_drv_data *drv_data = hid_get_drvdata(hdev);
        if (drv_data->quirks & LG_FF4)
                lg4ff_deinit(hdev);
        hid_hw_stop(hdev);
        kfree(drv_data);
}

static const struct hid_device_id lg_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER),
                .driver_data = LG_RDESC | LG_WIRELESS },

        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER),
                .driver_data = LG_BAD_RELATIVE_KEYS },

        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP),
                .driver_data = LG_DUPLICATE_USAGES },

        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_ELITE_KBD),
                .driver_data = LG_IGNORE_DOUBLED_WHEEL | LG_EXPANDED_KEYMAP },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_CORDLESS_DESKTOP_LX500),
                .driver_data = LG_IGNORE_DOUBLED_WHEEL | LG_EXPANDED_KEYMAP },

        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_EXTREME_3D),
                .driver_data = LG_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DUAL_ACTION),
                .driver_data = LG_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WHEEL),
                .driver_data = LG_NOGET | LG_FF4 },

        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD),
                .driver_data = LG_FF2 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD),
                .driver_data = LG_FF },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2),
                .driver_data = LG_FF },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D),
                .driver_data = LG_FF },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO),
                .driver_data = LG_FF },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL),
                .driver_data = LG_NOGET | LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOMO_WHEEL2),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_VIBRATION_WHEEL),
                .driver_data = LG_FF2 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFP_WHEEL),
                .driver_data = LG_NOGET | LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL),
                .driver_data = LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FG),
                .driver_data = LG_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG),
                .driver_data = LG_NOGET | LG_FF4 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2),
                .driver_data = LG_NOGET | LG_FF2 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940),
                .driver_data = LG_FF3 },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR),
                .driver_data = LG_RDESC_REL_ABS },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER),
                .driver_data = LG_RDESC_REL_ABS },
        { }
};

MODULE_DEVICE_TABLE(hid, lg_devices);

static struct hid_driver lg_driver = {
        .name = "logitech",
        .id_table = lg_devices,
        .report_fixup = lg_report_fixup,
        .input_mapping = lg_input_mapping,
        .input_mapped = lg_input_mapped,
        .event = lg_event,
        .raw_event = lg_raw_event,
        .probe = lg_probe,
        .remove = lg_remove,
};
module_hid_driver(lg_driver);

#ifdef CONFIG_LOGIWHEELS_FF
int lg4ff_no_autoswitch = 0;
module_param_named(lg4ff_no_autoswitch, lg4ff_no_autoswitch, int, S_IRUGO);
MODULE_PARM_DESC(lg4ff_no_autoswitch, "Do not switch multimode wheels to their native mode automatically");
#endif

MODULE_LICENSE("GPL");



































































   46 





















    3 
    3 
    2 







    3 










   31 














   31 


   31 
   31 
   28 
   28 
   28 















   31 

   31 


   31 

   31 
   31 
    2 

   29 
   29 
   29 





   31 


   24 
   24 
   24 
    4 
    3 

    3 







































































   46 


   46 


   39 














   28 


   27 




    5 

    4 





















    3 





    5 





    9 
    7 
















    4 
    3 





















   42 
    6 

   41 


   42 


   42 






   42 





   42 
   39 
   31 
   38 







   33 
    2 










   46 

   46 


   31 

   42 




















    2 




    2 

    2 




















   46 



   46 
   45 

   45 
   46 
   45 
   46 
   46 


































































































    1 






    1 




    1 


    1 
    1 





   34 


   34 


    3 



    3 
    3 
    3 














    3 
















    9 






    9 




    9 

    8 







    8 
    7 

    6 










    2 



    9 
    9 





   26 






   27 
    1 

   27 
   27 












   30 





   25 
   25 

   25 
    7 


   31 





    2 


   31 












   39 

   39 
   34 
    3 




   39 














   39 












    3 

   39 
   39 

   39 















   18 

    2 



    2 



   10 






    5 













    6 


    9 












    6 


    5 
    4 

    4 







    3 




    6 










    9 


    8 
    1 

    7 




    7 


    6 
    6 





    6 





    6 











    6 
    6 
    6 
    6 
    6 






    6 
    9 















   10 



















   15 





   14 






   11 









   11 
    8 
    7 













   15 

   15 














































































   32 









   32 
   32 


   32 


































































































































































































































































































   32 
   32 
   32 











   32 

   32 

   32 

   32 

   32 

   32 

   32 

   32 

   32 

   32 





   32 













































































































   32 




   32 
   32 



   32 
   27 






   21 
   21 

   31 


   32 





















































   28 




















   32 



   32 




   32 
   32 







   32 



   31 




   31 




























   32 


   32 
   32 
   32 
   32 
   32 
    6 

   32 

   32 
   32 
   27 
   32 
    9 
   32 
    9 
   32 
   10 
   32 

   32 

   32 
    1 
   32 
    5 

   32 








































































































































































































   34 




























































































































    7 











































   34 
   33 

   34 






























































































































   39 




   39 
   32 

   32 
   32 











    1 



    1 


























































































































































































   39 










   39 
   31 



























































































































































   39 

   39 





   39 















































   33 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
// SPDX-License-Identifier: GPL-2.0-only
/*
 * The input core
 *
 * Copyright (c) 1999-2002 Vojtech Pavlik
 */


#define pr_fmt(fmt) KBUILD_BASENAME ": " fmt

#include <linux/init.h>
#include <linux/types.h>
#include <linux/idr.h>
#include <linux/input/mt.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/major.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/pm.h>
#include <linux/poll.h>
#include <linux/device.h>
#include <linux/kstrtox.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include "input-compat.h"
#include "input-core-private.h"
#include "input-poller.h"

MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
MODULE_DESCRIPTION("Input core");
MODULE_LICENSE("GPL");

#define INPUT_MAX_CHAR_DEVICES                1024
#define INPUT_FIRST_DYNAMIC_DEV                256
static DEFINE_IDA(input_ida);

static LIST_HEAD(input_dev_list);
static LIST_HEAD(input_handler_list);

/*
 * input_mutex protects access to both input_dev_list and input_handler_list.
 * This also causes input_[un]register_device and input_[un]register_handler
 * be mutually exclusive which simplifies locking in drivers implementing
 * input handlers.
 */
static DEFINE_MUTEX(input_mutex);

static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 };

static const unsigned int input_max_code[EV_CNT] = {
        [EV_KEY] = KEY_MAX,
        [EV_REL] = REL_MAX,
        [EV_ABS] = ABS_MAX,
        [EV_MSC] = MSC_MAX,
        [EV_SW] = SW_MAX,
        [EV_LED] = LED_MAX,
        [EV_SND] = SND_MAX,
        [EV_FF] = FF_MAX,
};

static inline int is_event_supported(unsigned int code,
                                     unsigned long *bm, unsigned int max)
{
        return code <= max && test_bit(code, bm);
}

static int input_defuzz_abs_event(int value, int old_val, int fuzz)
{
        if (fuzz) {
                if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2)
                        return old_val;

                if (value > old_val - fuzz && value < old_val + fuzz)
                        return (old_val * 3 + value) / 4;

                if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2)
                        return (old_val + value) / 2;
        }

        return value;
}

static void input_start_autorepeat(struct input_dev *dev, int code)
{
        if (test_bit(EV_REP, dev->evbit) &&
            dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] &&
            dev->timer.function) {
                dev->repeat_key = code;
                mod_timer(&dev->timer,
                          jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
        }
}

static void input_stop_autorepeat(struct input_dev *dev)
{
        del_timer(&dev->timer);
}

/*
 * Pass event first through all filters and then, if event has not been
 * filtered out, through all open handles. This function is called with
 * dev->event_lock held and interrupts disabled.
 */
static unsigned int input_to_handler(struct input_handle *handle,
                        struct input_value *vals, unsigned int count)
{
        struct input_handler *handler = handle->handler;
        struct input_value *end = vals;
        struct input_value *v;

        if (handler->filter) {
                for (v = vals; v != vals + count; v++) {
                        if (handler->filter(handle, v->type, v->code, v->value))
                                continue;
                        if (end != v)
                                *end = *v;
                        end++;
                }
                count = end - vals;
        }

        if (!count)
                return 0;

        if (handler->events)
                handler->events(handle, vals, count);
        else if (handler->event)
                for (v = vals; v != vals + count; v++)
                        handler->event(handle, v->type, v->code, v->value);

        return count;
}

/*
 * Pass values first through all filters and then, if event has not been
 * filtered out, through all open handles. This function is called with
 * dev->event_lock held and interrupts disabled.
 */
static void input_pass_values(struct input_dev *dev,
                              struct input_value *vals, unsigned int count)
{
        struct input_handle *handle;
        struct input_value *v;

        lockdep_assert_held(&dev->event_lock);

        if (!count)
                return;

        rcu_read_lock();

        handle = rcu_dereference(dev->grab);
        if (handle) {
                count = input_to_handler(handle, vals, count);
        } else {
                list_for_each_entry_rcu(handle, &dev->h_list, d_node)
                        if (handle->open) {
                                count = input_to_handler(handle, vals, count);
                                if (!count)
                                        break;
                        }
        }

        rcu_read_unlock();

        /* trigger auto repeat for key events */
        if (test_bit(EV_REP, dev->evbit) && test_bit(EV_KEY, dev->evbit)) {
                for (v = vals; v != vals + count; v++) {
                        if (v->type == EV_KEY && v->value != 2) {
                                if (v->value)
                                        input_start_autorepeat(dev, v->code);
                                else
                                        input_stop_autorepeat(dev);
                        }
                }
        }
}

#define INPUT_IGNORE_EVENT        0
#define INPUT_PASS_TO_HANDLERS        1
#define INPUT_PASS_TO_DEVICE        2
#define INPUT_SLOT                4
#define INPUT_FLUSH                8
#define INPUT_PASS_TO_ALL        (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE)

static int input_handle_abs_event(struct input_dev *dev,
                                  unsigned int code, int *pval)
{
        struct input_mt *mt = dev->mt;
        bool is_new_slot = false;
        bool is_mt_event;
        int *pold;

        if (code == ABS_MT_SLOT) {
                /*
                 * "Stage" the event; we'll flush it later, when we
                 * get actual touch data.
                 */
                if (mt && *pval >= 0 && *pval < mt->num_slots)
                        mt->slot = *pval;

                return INPUT_IGNORE_EVENT;
        }

        is_mt_event = input_is_mt_value(code);

        if (!is_mt_event) {
                pold = &dev->absinfo[code].value;
        } else if (mt) {
                pold = &mt->slots[mt->slot].abs[code - ABS_MT_FIRST];
                is_new_slot = mt->slot != dev->absinfo[ABS_MT_SLOT].value;
        } else {
                /*
                 * Bypass filtering for multi-touch events when
                 * not employing slots.
                 */
                pold = NULL;
        }

        if (pold) {
                *pval = input_defuzz_abs_event(*pval, *pold,
                                                dev->absinfo[code].fuzz);
                if (*pold == *pval)
                        return INPUT_IGNORE_EVENT;

                *pold = *pval;
        }

        /* Flush pending "slot" event */
        if (is_new_slot) {
                dev->absinfo[ABS_MT_SLOT].value = mt->slot;
                return INPUT_PASS_TO_HANDLERS | INPUT_SLOT;
        }

        return INPUT_PASS_TO_HANDLERS;
}

static int input_get_disposition(struct input_dev *dev,
                          unsigned int type, unsigned int code, int *pval)
{
        int disposition = INPUT_IGNORE_EVENT;
        int value = *pval;

        /* filter-out events from inhibited devices */
        if (dev->inhibited)
                return INPUT_IGNORE_EVENT;

        switch (type) {

        case EV_SYN:
                switch (code) {
                case SYN_CONFIG:
                        disposition = INPUT_PASS_TO_ALL;
                        break;

                case SYN_REPORT:
                        disposition = INPUT_PASS_TO_HANDLERS | INPUT_FLUSH;
                        break;
                case SYN_MT_REPORT:
                        disposition = INPUT_PASS_TO_HANDLERS;
                        break;
                }
                break;

        case EV_KEY:
                if (is_event_supported(code, dev->keybit, KEY_MAX)) {

                        /* auto-repeat bypasses state updates */
                        if (value == 2) {
                                disposition = INPUT_PASS_TO_HANDLERS;
                                break;
                        }

                        if (!!test_bit(code, dev->key) != !!value) {

                                __change_bit(code, dev->key);
                                disposition = INPUT_PASS_TO_HANDLERS;
                        }
                }
                break;

        case EV_SW:
                if (is_event_supported(code, dev->swbit, SW_MAX) &&
                    !!test_bit(code, dev->sw) != !!value) {

                        __change_bit(code, dev->sw);
                        disposition = INPUT_PASS_TO_HANDLERS;
                }
                break;

        case EV_ABS:
                if (is_event_supported(code, dev->absbit, ABS_MAX))
                        disposition = input_handle_abs_event(dev, code, &value);

                break;

        case EV_REL:
                if (is_event_supported(code, dev->relbit, REL_MAX) && value)
                        disposition = INPUT_PASS_TO_HANDLERS;

                break;

        case EV_MSC:
                if (is_event_supported(code, dev->mscbit, MSC_MAX))
                        disposition = INPUT_PASS_TO_ALL;

                break;

        case EV_LED:
                if (is_event_supported(code, dev->ledbit, LED_MAX) &&
                    !!test_bit(code, dev->led) != !!value) {

                        __change_bit(code, dev->led);
                        disposition = INPUT_PASS_TO_ALL;
                }
                break;

        case EV_SND:
                if (is_event_supported(code, dev->sndbit, SND_MAX)) {

                        if (!!test_bit(code, dev->snd) != !!value)
                                __change_bit(code, dev->snd);
                        disposition = INPUT_PASS_TO_ALL;
                }
                break;

        case EV_REP:
                if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) {
                        dev->rep[code] = value;
                        disposition = INPUT_PASS_TO_ALL;
                }
                break;

        case EV_FF:
                if (value >= 0)
                        disposition = INPUT_PASS_TO_ALL;
                break;

        case EV_PWR:
                disposition = INPUT_PASS_TO_ALL;
                break;
        }

        *pval = value;
        return disposition;
}

static void input_event_dispose(struct input_dev *dev, int disposition,
                                unsigned int type, unsigned int code, int value)
{
        if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event)
                dev->event(dev, type, code, value);

        if (!dev->vals)
                return;

        if (disposition & INPUT_PASS_TO_HANDLERS) {
                struct input_value *v;

                if (disposition & INPUT_SLOT) {
                        v = &dev->vals[dev->num_vals++];
                        v->type = EV_ABS;
                        v->code = ABS_MT_SLOT;
                        v->value = dev->mt->slot;
                }

                v = &dev->vals[dev->num_vals++];
                v->type = type;
                v->code = code;
                v->value = value;
        }

        if (disposition & INPUT_FLUSH) {
                if (dev->num_vals >= 2)
                        input_pass_values(dev, dev->vals, dev->num_vals);
                dev->num_vals = 0;
                /*
                 * Reset the timestamp on flush so we won't end up
                 * with a stale one. Note we only need to reset the
                 * monolithic one as we use its presence when deciding
                 * whether to generate a synthetic timestamp.
                 */
                dev->timestamp[INPUT_CLK_MONO] = ktime_set(0, 0);
        } else if (dev->num_vals >= dev->max_vals - 2) {
                dev->vals[dev->num_vals++] = input_value_sync;
                input_pass_values(dev, dev->vals, dev->num_vals);
                dev->num_vals = 0;
        }
}

void input_handle_event(struct input_dev *dev,
                        unsigned int type, unsigned int code, int value)
{
        int disposition;

        lockdep_assert_held(&dev->event_lock);

        disposition = input_get_disposition(dev, type, code, &value);
        if (disposition != INPUT_IGNORE_EVENT) {
                if (type != EV_SYN)
                        add_input_randomness(type, code, value);

                input_event_dispose(dev, disposition, type, code, value);
        }
}

/**
 * input_event() - report new input event
 * @dev: device that generated the event
 * @type: type of the event
 * @code: event code
 * @value: value of the event
 *
 * This function should be used by drivers implementing various input
 * devices to report input events. See also input_inject_event().
 *
 * NOTE: input_event() may be safely used right after input device was
 * allocated with input_allocate_device(), even before it is registered
 * with input_register_device(), but the event will not reach any of the
 * input handlers. Such early invocation of input_event() may be used
 * to 'seed' initial state of a switch or initial position of absolute
 * axis, etc.
 */
void input_event(struct input_dev *dev,
                 unsigned int type, unsigned int code, int value)
{
        unsigned long flags;

        if (is_event_supported(type, dev->evbit, EV_MAX)) {

                spin_lock_irqsave(&dev->event_lock, flags);
                input_handle_event(dev, type, code, value);
                spin_unlock_irqrestore(&dev->event_lock, flags);
        }
}
EXPORT_SYMBOL(input_event);

/**
 * input_inject_event() - send input event from input handler
 * @handle: input handle to send event through
 * @type: type of the event
 * @code: event code
 * @value: value of the event
 *
 * Similar to input_event() but will ignore event if device is
 * "grabbed" and handle injecting event is not the one that owns
 * the device.
 */
void input_inject_event(struct input_handle *handle,
                        unsigned int type, unsigned int code, int value)
{
        struct input_dev *dev = handle->dev;
        struct input_handle *grab;
        unsigned long flags;

        if (is_event_supported(type, dev->evbit, EV_MAX)) {
                spin_lock_irqsave(&dev->event_lock, flags);

                rcu_read_lock();
                grab = rcu_dereference(dev->grab);
                if (!grab || grab == handle)
                        input_handle_event(dev, type, code, value);
                rcu_read_unlock();

                spin_unlock_irqrestore(&dev->event_lock, flags);
        }
}
EXPORT_SYMBOL(input_inject_event);

/**
 * input_alloc_absinfo - allocates array of input_absinfo structs
 * @dev: the input device emitting absolute events
 *
 * If the absinfo struct the caller asked for is already allocated, this
 * functions will not do anything.
 */
void input_alloc_absinfo(struct input_dev *dev)
{
        if (dev->absinfo)
                return;

        dev->absinfo = kcalloc(ABS_CNT, sizeof(*dev->absinfo), GFP_KERNEL);
        if (!dev->absinfo) {
                dev_err(dev->dev.parent ?: &dev->dev,
                        "%s: unable to allocate memory\n", __func__);
                /*
                 * We will handle this allocation failure in
                 * input_register_device() when we refuse to register input
                 * device with ABS bits but without absinfo.
                 */
        }
}
EXPORT_SYMBOL(input_alloc_absinfo);

void input_set_abs_params(struct input_dev *dev, unsigned int axis,
                          int min, int max, int fuzz, int flat)
{
        struct input_absinfo *absinfo;

        __set_bit(EV_ABS, dev->evbit);
        __set_bit(axis, dev->absbit);

        input_alloc_absinfo(dev);
        if (!dev->absinfo)
                return;

        absinfo = &dev->absinfo[axis];
        absinfo->minimum = min;
        absinfo->maximum = max;
        absinfo->fuzz = fuzz;
        absinfo->flat = flat;
}
EXPORT_SYMBOL(input_set_abs_params);

/**
 * input_copy_abs - Copy absinfo from one input_dev to another
 * @dst: Destination input device to copy the abs settings to
 * @dst_axis: ABS_* value selecting the destination axis
 * @src: Source input device to copy the abs settings from
 * @src_axis: ABS_* value selecting the source axis
 *
 * Set absinfo for the selected destination axis by copying it from
 * the specified source input device's source axis.
 * This is useful to e.g. setup a pen/stylus input-device for combined
 * touchscreen/pen hardware where the pen uses the same coordinates as
 * the touchscreen.
 */
void input_copy_abs(struct input_dev *dst, unsigned int dst_axis,
                    const struct input_dev *src, unsigned int src_axis)
{
        /* src must have EV_ABS and src_axis set */
        if (WARN_ON(!(test_bit(EV_ABS, src->evbit) &&
                      test_bit(src_axis, src->absbit))))
                return;

        /*
         * input_alloc_absinfo() may have failed for the source. Our caller is
         * expected to catch this when registering the input devices, which may
         * happen after the input_copy_abs() call.
         */
        if (!src->absinfo)
                return;

        input_set_capability(dst, EV_ABS, dst_axis);
        if (!dst->absinfo)
                return;

        dst->absinfo[dst_axis] = src->absinfo[src_axis];
}
EXPORT_SYMBOL(input_copy_abs);

/**
 * input_grab_device - grabs device for exclusive use
 * @handle: input handle that wants to own the device
 *
 * When a device is grabbed by an input handle all events generated by
 * the device are delivered only to this handle. Also events injected
 * by other input handles are ignored while device is grabbed.
 */
int input_grab_device(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;
        int retval;

        retval = mutex_lock_interruptible(&dev->mutex);
        if (retval)
                return retval;

        if (dev->grab) {
                retval = -EBUSY;
                goto out;
        }

        rcu_assign_pointer(dev->grab, handle);

 out:
        mutex_unlock(&dev->mutex);
        return retval;
}
EXPORT_SYMBOL(input_grab_device);

static void __input_release_device(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;
        struct input_handle *grabber;

        grabber = rcu_dereference_protected(dev->grab,
                                            lockdep_is_held(&dev->mutex));
        if (grabber == handle) {
                rcu_assign_pointer(dev->grab, NULL);
                /* Make sure input_pass_values() notices that grab is gone */
                synchronize_rcu();

                list_for_each_entry(handle, &dev->h_list, d_node)
                        if (handle->open && handle->handler->start)
                                handle->handler->start(handle);
        }
}

/**
 * input_release_device - release previously grabbed device
 * @handle: input handle that owns the device
 *
 * Releases previously grabbed device so that other input handles can
 * start receiving input events. Upon release all handlers attached
 * to the device have their start() method called so they have a change
 * to synchronize device state with the rest of the system.
 */
void input_release_device(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;

        mutex_lock(&dev->mutex);
        __input_release_device(handle);
        mutex_unlock(&dev->mutex);
}
EXPORT_SYMBOL(input_release_device);

/**
 * input_open_device - open input device
 * @handle: handle through which device is being accessed
 *
 * This function should be called by input handlers when they
 * want to start receive events from given input device.
 */
int input_open_device(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;
        int retval;

        retval = mutex_lock_interruptible(&dev->mutex);
        if (retval)
                return retval;

        if (dev->going_away) {
                retval = -ENODEV;
                goto out;
        }

        handle->open++;

        if (dev->users++ || dev->inhibited) {
                /*
                 * Device is already opened and/or inhibited,
                 * so we can exit immediately and report success.
                 */
                goto out;
        }

        if (dev->open) {
                retval = dev->open(dev);
                if (retval) {
                        dev->users--;
                        handle->open--;
                        /*
                         * Make sure we are not delivering any more events
                         * through this handle
                         */
                        synchronize_rcu();
                        goto out;
                }
        }

        if (dev->poller)
                input_dev_poller_start(dev->poller);

 out:
        mutex_unlock(&dev->mutex);
        return retval;
}
EXPORT_SYMBOL(input_open_device);

int input_flush_device(struct input_handle *handle, struct file *file)
{
        struct input_dev *dev = handle->dev;
        int retval;

        retval = mutex_lock_interruptible(&dev->mutex);
        if (retval)
                return retval;

        if (dev->flush)
                retval = dev->flush(dev, file);

        mutex_unlock(&dev->mutex);
        return retval;
}
EXPORT_SYMBOL(input_flush_device);

/**
 * input_close_device - close input device
 * @handle: handle through which device is being accessed
 *
 * This function should be called by input handlers when they
 * want to stop receive events from given input device.
 */
void input_close_device(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;

        mutex_lock(&dev->mutex);

        __input_release_device(handle);

        if (!--dev->users && !dev->inhibited) {
                if (dev->poller)
                        input_dev_poller_stop(dev->poller);
                if (dev->close)
                        dev->close(dev);
        }

        if (!--handle->open) {
                /*
                 * synchronize_rcu() makes sure that input_pass_values()
                 * completed and that no more input events are delivered
                 * through this handle
                 */
                synchronize_rcu();
        }

        mutex_unlock(&dev->mutex);
}
EXPORT_SYMBOL(input_close_device);

/*
 * Simulate keyup events for all keys that are marked as pressed.
 * The function must be called with dev->event_lock held.
 */
static bool input_dev_release_keys(struct input_dev *dev)
{
        bool need_sync = false;
        int code;

        lockdep_assert_held(&dev->event_lock);

        if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) {
                for_each_set_bit(code, dev->key, KEY_CNT) {
                        input_handle_event(dev, EV_KEY, code, 0);
                        need_sync = true;
                }
        }

        return need_sync;
}

/*
 * Prepare device for unregistering
 */
static void input_disconnect_device(struct input_dev *dev)
{
        struct input_handle *handle;

        /*
         * Mark device as going away. Note that we take dev->mutex here
         * not to protect access to dev->going_away but rather to ensure
         * that there are no threads in the middle of input_open_device()
         */
        mutex_lock(&dev->mutex);
        dev->going_away = true;
        mutex_unlock(&dev->mutex);

        spin_lock_irq(&dev->event_lock);

        /*
         * Simulate keyup events for all pressed keys so that handlers
         * are not left with "stuck" keys. The driver may continue
         * generate events even after we done here but they will not
         * reach any handlers.
         */
        if (input_dev_release_keys(dev))
                input_handle_event(dev, EV_SYN, SYN_REPORT, 1);

        list_for_each_entry(handle, &dev->h_list, d_node)
                handle->open = 0;

        spin_unlock_irq(&dev->event_lock);
}

/**
 * input_scancode_to_scalar() - converts scancode in &struct input_keymap_entry
 * @ke: keymap entry containing scancode to be converted.
 * @scancode: pointer to the location where converted scancode should
 *        be stored.
 *
 * This function is used to convert scancode stored in &struct keymap_entry
 * into scalar form understood by legacy keymap handling methods. These
 * methods expect scancodes to be represented as 'unsigned int'.
 */
int input_scancode_to_scalar(const struct input_keymap_entry *ke,
                             unsigned int *scancode)
{
        switch (ke->len) {
        case 1:
                *scancode = *((u8 *)ke->scancode);
                break;

        case 2:
                *scancode = *((u16 *)ke->scancode);
                break;

        case 4:
                *scancode = *((u32 *)ke->scancode);
                break;

        default:
                return -EINVAL;
        }

        return 0;
}
EXPORT_SYMBOL(input_scancode_to_scalar);

/*
 * Those routines handle the default case where no [gs]etkeycode() is
 * defined. In this case, an array indexed by the scancode is used.
 */

static unsigned int input_fetch_keycode(struct input_dev *dev,
                                        unsigned int index)
{
        switch (dev->keycodesize) {
        case 1:
                return ((u8 *)dev->keycode)[index];

        case 2:
                return ((u16 *)dev->keycode)[index];

        default:
                return ((u32 *)dev->keycode)[index];
        }
}

static int input_default_getkeycode(struct input_dev *dev,
                                    struct input_keymap_entry *ke)
{
        unsigned int index;
        int error;

        if (!dev->keycodesize)
                return -EINVAL;

        if (ke->flags & INPUT_KEYMAP_BY_INDEX)
                index = ke->index;
        else {
                error = input_scancode_to_scalar(ke, &index);
                if (error)
                        return error;
        }

        if (index >= dev->keycodemax)
                return -EINVAL;

        ke->keycode = input_fetch_keycode(dev, index);
        ke->index = index;
        ke->len = sizeof(index);
        memcpy(ke->scancode, &index, sizeof(index));

        return 0;
}

static int input_default_setkeycode(struct input_dev *dev,
                                    const struct input_keymap_entry *ke,
                                    unsigned int *old_keycode)
{
        unsigned int index;
        int error;
        int i;

        if (!dev->keycodesize)
                return -EINVAL;

        if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
                index = ke->index;
        } else {
                error = input_scancode_to_scalar(ke, &index);
                if (error)
                        return error;
        }

        if (index >= dev->keycodemax)
                return -EINVAL;

        if (dev->keycodesize < sizeof(ke->keycode) &&
                        (ke->keycode >> (dev->keycodesize * 8)))
                return -EINVAL;

        switch (dev->keycodesize) {
                case 1: {
                        u8 *k = (u8 *)dev->keycode;
                        *old_keycode = k[index];
                        k[index] = ke->keycode;
                        break;
                }
                case 2: {
                        u16 *k = (u16 *)dev->keycode;
                        *old_keycode = k[index];
                        k[index] = ke->keycode;
                        break;
                }
                default: {
                        u32 *k = (u32 *)dev->keycode;
                        *old_keycode = k[index];
                        k[index] = ke->keycode;
                        break;
                }
        }

        if (*old_keycode <= KEY_MAX) {
                __clear_bit(*old_keycode, dev->keybit);
                for (i = 0; i < dev->keycodemax; i++) {
                        if (input_fetch_keycode(dev, i) == *old_keycode) {
                                __set_bit(*old_keycode, dev->keybit);
                                /* Setting the bit twice is useless, so break */
                                break;
                        }
                }
        }

        __set_bit(ke->keycode, dev->keybit);
        return 0;
}

/**
 * input_get_keycode - retrieve keycode currently mapped to a given scancode
 * @dev: input device which keymap is being queried
 * @ke: keymap entry
 *
 * This function should be called by anyone interested in retrieving current
 * keymap. Presently evdev handlers use it.
 */
int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke)
{
        unsigned long flags;
        int retval;

        spin_lock_irqsave(&dev->event_lock, flags);
        retval = dev->getkeycode(dev, ke);
        spin_unlock_irqrestore(&dev->event_lock, flags);

        return retval;
}
EXPORT_SYMBOL(input_get_keycode);

/**
 * input_set_keycode - attribute a keycode to a given scancode
 * @dev: input device which keymap is being updated
 * @ke: new keymap entry
 *
 * This function should be called by anyone needing to update current
 * keymap. Presently keyboard and evdev handlers use it.
 */
int input_set_keycode(struct input_dev *dev,
                      const struct input_keymap_entry *ke)
{
        unsigned long flags;
        unsigned int old_keycode;
        int retval;

        if (ke->keycode > KEY_MAX)
                return -EINVAL;

        spin_lock_irqsave(&dev->event_lock, flags);

        retval = dev->setkeycode(dev, ke, &old_keycode);
        if (retval)
                goto out;

        /* Make sure KEY_RESERVED did not get enabled. */
        __clear_bit(KEY_RESERVED, dev->keybit);

        /*
         * Simulate keyup event if keycode is not present
         * in the keymap anymore
         */
        if (old_keycode > KEY_MAX) {
                dev_warn(dev->dev.parent ?: &dev->dev,
                         "%s: got too big old keycode %#x\n",
                         __func__, old_keycode);
        } else if (test_bit(EV_KEY, dev->evbit) &&
                   !is_event_supported(old_keycode, dev->keybit, KEY_MAX) &&
                   __test_and_clear_bit(old_keycode, dev->key)) {
                /*
                 * We have to use input_event_dispose() here directly instead
                 * of input_handle_event() because the key we want to release
                 * here is considered no longer supported by the device and
                 * input_handle_event() will ignore it.
                 */
                input_event_dispose(dev, INPUT_PASS_TO_HANDLERS,
                                    EV_KEY, old_keycode, 0);
                input_event_dispose(dev, INPUT_PASS_TO_HANDLERS | INPUT_FLUSH,
                                    EV_SYN, SYN_REPORT, 1);
        }

 out:
        spin_unlock_irqrestore(&dev->event_lock, flags);

        return retval;
}
EXPORT_SYMBOL(input_set_keycode);

bool input_match_device_id(const struct input_dev *dev,
                           const struct input_device_id *id)
{
        if (id->flags & INPUT_DEVICE_ID_MATCH_BUS)
                if (id->bustype != dev->id.bustype)
                        return false;

        if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR)
                if (id->vendor != dev->id.vendor)
                        return false;

        if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT)
                if (id->product != dev->id.product)
                        return false;

        if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION)
                if (id->version != dev->id.version)
                        return false;

        if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX) ||
            !bitmap_subset(id->keybit, dev->keybit, KEY_MAX) ||
            !bitmap_subset(id->relbit, dev->relbit, REL_MAX) ||
            !bitmap_subset(id->absbit, dev->absbit, ABS_MAX) ||
            !bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX) ||
            !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) ||
            !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) ||
            !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) ||
            !bitmap_subset(id->swbit, dev->swbit, SW_MAX) ||
            !bitmap_subset(id->propbit, dev->propbit, INPUT_PROP_MAX)) {
                return false;
        }

        return true;
}
EXPORT_SYMBOL(input_match_device_id);

static const struct input_device_id *input_match_device(struct input_handler *handler,
                                                        struct input_dev *dev)
{
        const struct input_device_id *id;

        for (id = handler->id_table; id->flags || id->driver_info; id++) {
                if (input_match_device_id(dev, id) &&
                    (!handler->match || handler->match(handler, dev))) {
                        return id;
                }
        }

        return NULL;
}

static int input_attach_handler(struct input_dev *dev, struct input_handler *handler)
{
        const struct input_device_id *id;
        int error;

        id = input_match_device(handler, dev);
        if (!id)
                return -ENODEV;

        error = handler->connect(handler, dev, id);
        if (error && error != -ENODEV)
                pr_err("failed to attach handler %s to device %s, error: %d\n",
                       handler->name, kobject_name(&dev->dev.kobj), error);

        return error;
}

#ifdef CONFIG_COMPAT

static int input_bits_to_string(char *buf, int buf_size,
                                unsigned long bits, bool skip_empty)
{
        int len = 0;

        if (in_compat_syscall()) {
                u32 dword = bits >> 32;
                if (dword || !skip_empty)
                        len += snprintf(buf, buf_size, "%x ", dword);

                dword = bits & 0xffffffffUL;
                if (dword || !skip_empty || len)
                        len += snprintf(buf + len, max(buf_size - len, 0),
                                        "%x", dword);
        } else {
                if (bits || !skip_empty)
                        len += snprintf(buf, buf_size, "%lx", bits);
        }

        return len;
}

#else /* !CONFIG_COMPAT */

static int input_bits_to_string(char *buf, int buf_size,
                                unsigned long bits, bool skip_empty)
{
        return bits || !skip_empty ?
                snprintf(buf, buf_size, "%lx", bits) : 0;
}

#endif

#ifdef CONFIG_PROC_FS

static struct proc_dir_entry *proc_bus_input_dir;
static DECLARE_WAIT_QUEUE_HEAD(input_devices_poll_wait);
static int input_devices_state;

static inline void input_wakeup_procfs_readers(void)
{
        input_devices_state++;
        wake_up(&input_devices_poll_wait);
}

static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
{
        poll_wait(file, &input_devices_poll_wait, wait);
        if (file->f_version != input_devices_state) {
                file->f_version = input_devices_state;
                return EPOLLIN | EPOLLRDNORM;
        }

        return 0;
}

union input_seq_state {
        struct {
                unsigned short pos;
                bool mutex_acquired;
        };
        void *p;
};

static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
{
        union input_seq_state *state = (union input_seq_state *)&seq->private;
        int error;

        /* We need to fit into seq->private pointer */
        BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));

        error = mutex_lock_interruptible(&input_mutex);
        if (error) {
                state->mutex_acquired = false;
                return ERR_PTR(error);
        }

        state->mutex_acquired = true;

        return seq_list_start(&input_dev_list, *pos);
}

static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        return seq_list_next(v, &input_dev_list, pos);
}

static void input_seq_stop(struct seq_file *seq, void *v)
{
        union input_seq_state *state = (union input_seq_state *)&seq->private;

        if (state->mutex_acquired)
                mutex_unlock(&input_mutex);
}

static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
                                   unsigned long *bitmap, int max)
{
        int i;
        bool skip_empty = true;
        char buf[18];

        seq_printf(seq, "B: %s=", name);

        for (i = BITS_TO_LONGS(max) - 1; i >= 0; i--) {
                if (input_bits_to_string(buf, sizeof(buf),
                                         bitmap[i], skip_empty)) {
                        skip_empty = false;
                        seq_printf(seq, "%s%s", buf, i > 0 ? " " : "");
                }
        }

        /*
         * If no output was produced print a single 0.
         */
        if (skip_empty)
                seq_putc(seq, '0');

        seq_putc(seq, '\n');
}

static int input_devices_seq_show(struct seq_file *seq, void *v)
{
        struct input_dev *dev = container_of(v, struct input_dev, node);
        const char *path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
        struct input_handle *handle;

        seq_printf(seq, "I: Bus=%04x Vendor=%04x Product=%04x Version=%04x\n",
                   dev->id.bustype, dev->id.vendor, dev->id.product, dev->id.version);

        seq_printf(seq, "N: Name=\"%s\"\n", dev->name ? dev->name : "");
        seq_printf(seq, "P: Phys=%s\n", dev->phys ? dev->phys : "");
        seq_printf(seq, "S: Sysfs=%s\n", path ? path : "");
        seq_printf(seq, "U: Uniq=%s\n", dev->uniq ? dev->uniq : "");
        seq_puts(seq, "H: Handlers=");

        list_for_each_entry(handle, &dev->h_list, d_node)
                seq_printf(seq, "%s ", handle->name);
        seq_putc(seq, '\n');

        input_seq_print_bitmap(seq, "PROP", dev->propbit, INPUT_PROP_MAX);

        input_seq_print_bitmap(seq, "EV", dev->evbit, EV_MAX);
        if (test_bit(EV_KEY, dev->evbit))
                input_seq_print_bitmap(seq, "KEY", dev->keybit, KEY_MAX);
        if (test_bit(EV_REL, dev->evbit))
                input_seq_print_bitmap(seq, "REL", dev->relbit, REL_MAX);
        if (test_bit(EV_ABS, dev->evbit))
                input_seq_print_bitmap(seq, "ABS", dev->absbit, ABS_MAX);
        if (test_bit(EV_MSC, dev->evbit))
                input_seq_print_bitmap(seq, "MSC", dev->mscbit, MSC_MAX);
        if (test_bit(EV_LED, dev->evbit))
                input_seq_print_bitmap(seq, "LED", dev->ledbit, LED_MAX);
        if (test_bit(EV_SND, dev->evbit))
                input_seq_print_bitmap(seq, "SND", dev->sndbit, SND_MAX);
        if (test_bit(EV_FF, dev->evbit))
                input_seq_print_bitmap(seq, "FF", dev->ffbit, FF_MAX);
        if (test_bit(EV_SW, dev->evbit))
                input_seq_print_bitmap(seq, "SW", dev->swbit, SW_MAX);

        seq_putc(seq, '\n');

        kfree(path);
        return 0;
}

static const struct seq_operations input_devices_seq_ops = {
        .start        = input_devices_seq_start,
        .next        = input_devices_seq_next,
        .stop        = input_seq_stop,
        .show        = input_devices_seq_show,
};

static int input_proc_devices_open(struct inode *inode, struct file *file)
{
        return seq_open(file, &input_devices_seq_ops);
}

static const struct proc_ops input_devices_proc_ops = {
        .proc_open        = input_proc_devices_open,
        .proc_poll        = input_proc_devices_poll,
        .proc_read        = seq_read,
        .proc_lseek        = seq_lseek,
        .proc_release        = seq_release,
};

static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
{
        union input_seq_state *state = (union input_seq_state *)&seq->private;
        int error;

        /* We need to fit into seq->private pointer */
        BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));

        error = mutex_lock_interruptible(&input_mutex);
        if (error) {
                state->mutex_acquired = false;
                return ERR_PTR(error);
        }

        state->mutex_acquired = true;
        state->pos = *pos;

        return seq_list_start(&input_handler_list, *pos);
}

static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        union input_seq_state *state = (union input_seq_state *)&seq->private;

        state->pos = *pos + 1;
        return seq_list_next(v, &input_handler_list, pos);
}

static int input_handlers_seq_show(struct seq_file *seq, void *v)
{
        struct input_handler *handler = container_of(v, struct input_handler, node);
        union input_seq_state *state = (union input_seq_state *)&seq->private;

        seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
        if (handler->filter)
                seq_puts(seq, " (filter)");
        if (handler->legacy_minors)
                seq_printf(seq, " Minor=%d", handler->minor);
        seq_putc(seq, '\n');

        return 0;
}

static const struct seq_operations input_handlers_seq_ops = {
        .start        = input_handlers_seq_start,
        .next        = input_handlers_seq_next,
        .stop        = input_seq_stop,
        .show        = input_handlers_seq_show,
};

static int input_proc_handlers_open(struct inode *inode, struct file *file)
{
        return seq_open(file, &input_handlers_seq_ops);
}

static const struct proc_ops input_handlers_proc_ops = {
        .proc_open        = input_proc_handlers_open,
        .proc_read        = seq_read,
        .proc_lseek        = seq_lseek,
        .proc_release        = seq_release,
};

static int __init input_proc_init(void)
{
        struct proc_dir_entry *entry;

        proc_bus_input_dir = proc_mkdir("bus/input", NULL);
        if (!proc_bus_input_dir)
                return -ENOMEM;

        entry = proc_create("devices", 0, proc_bus_input_dir,
                            &input_devices_proc_ops);
        if (!entry)
                goto fail1;

        entry = proc_create("handlers", 0, proc_bus_input_dir,
                            &input_handlers_proc_ops);
        if (!entry)
                goto fail2;

        return 0;

 fail2:        remove_proc_entry("devices", proc_bus_input_dir);
 fail1: remove_proc_entry("bus/input", NULL);
        return -ENOMEM;
}

static void input_proc_exit(void)
{
        remove_proc_entry("devices", proc_bus_input_dir);
        remove_proc_entry("handlers", proc_bus_input_dir);
        remove_proc_entry("bus/input", NULL);
}

#else /* !CONFIG_PROC_FS */
static inline void input_wakeup_procfs_readers(void) { }
static inline int input_proc_init(void) { return 0; }
static inline void input_proc_exit(void) { }
#endif

#define INPUT_DEV_STRING_ATTR_SHOW(name)                                \
static ssize_t input_dev_show_##name(struct device *dev,                \
                                     struct device_attribute *attr,        \
                                     char *buf)                                \
{                                                                        \
        struct input_dev *input_dev = to_input_dev(dev);                \
                                                                        \
        return sysfs_emit(buf, "%s\n",                                        \
                          input_dev->name ? input_dev->name : "");        \
}                                                                        \
static DEVICE_ATTR(name, S_IRUGO, input_dev_show_##name, NULL)

INPUT_DEV_STRING_ATTR_SHOW(name);
INPUT_DEV_STRING_ATTR_SHOW(phys);
INPUT_DEV_STRING_ATTR_SHOW(uniq);

static int input_print_modalias_bits(char *buf, int size,
                                     char name, const unsigned long *bm,
                                     unsigned int min_bit, unsigned int max_bit)
{
        int len = 0, i;

        len += snprintf(buf, max(size, 0), "%c", name);
        for (i = min_bit; i < max_bit; i++)
                if (bm[BIT_WORD(i)] & BIT_MASK(i))
                        len += snprintf(buf + len, max(size - len, 0), "%X,", i);
        return len;
}

static int input_print_modalias(char *buf, int size, const struct input_dev *id,
                                int add_cr)
{
        int len;

        len = snprintf(buf, max(size, 0),
                       "input:b%04Xv%04Xp%04Xe%04X-",
                       id->id.bustype, id->id.vendor,
                       id->id.product, id->id.version);

        len += input_print_modalias_bits(buf + len, size - len,
                                'e', id->evbit, 0, EV_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'k', id->keybit, KEY_MIN_INTERESTING, KEY_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'r', id->relbit, 0, REL_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'a', id->absbit, 0, ABS_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'm', id->mscbit, 0, MSC_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'l', id->ledbit, 0, LED_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                's', id->sndbit, 0, SND_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'f', id->ffbit, 0, FF_MAX);
        len += input_print_modalias_bits(buf + len, size - len,
                                'w', id->swbit, 0, SW_MAX);

        if (add_cr)
                len += snprintf(buf + len, max(size - len, 0), "\n");

        return len;
}

static ssize_t input_dev_show_modalias(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
{
        struct input_dev *id = to_input_dev(dev);
        ssize_t len;

        len = input_print_modalias(buf, PAGE_SIZE, id, 1);

        return min_t(int, len, PAGE_SIZE);
}
static DEVICE_ATTR(modalias, S_IRUGO, input_dev_show_modalias, NULL);

static int input_print_bitmap(char *buf, int buf_size, const unsigned long *bitmap,
                              int max, int add_cr);

static ssize_t input_dev_show_properties(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
{
        struct input_dev *input_dev = to_input_dev(dev);
        int len = input_print_bitmap(buf, PAGE_SIZE, input_dev->propbit,
                                     INPUT_PROP_MAX, true);
        return min_t(int, len, PAGE_SIZE);
}
static DEVICE_ATTR(properties, S_IRUGO, input_dev_show_properties, NULL);

static int input_inhibit_device(struct input_dev *dev);
static int input_uninhibit_device(struct input_dev *dev);

static ssize_t inhibited_show(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct input_dev *input_dev = to_input_dev(dev);

        return sysfs_emit(buf, "%d\n", input_dev->inhibited);
}

static ssize_t inhibited_store(struct device *dev,
                               struct device_attribute *attr, const char *buf,
                               size_t len)
{
        struct input_dev *input_dev = to_input_dev(dev);
        ssize_t rv;
        bool inhibited;

        if (kstrtobool(buf, &inhibited))
                return -EINVAL;

        if (inhibited)
                rv = input_inhibit_device(input_dev);
        else
                rv = input_uninhibit_device(input_dev);

        if (rv != 0)
                return rv;

        return len;
}

static DEVICE_ATTR_RW(inhibited);

static struct attribute *input_dev_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_phys.attr,
        &dev_attr_uniq.attr,
        &dev_attr_modalias.attr,
        &dev_attr_properties.attr,
        &dev_attr_inhibited.attr,
        NULL
};

static const struct attribute_group input_dev_attr_group = {
        .attrs        = input_dev_attrs,
};

#define INPUT_DEV_ID_ATTR(name)                                                \
static ssize_t input_dev_show_id_##name(struct device *dev,                \
                                        struct device_attribute *attr,        \
                                        char *buf)                        \
{                                                                        \
        struct input_dev *input_dev = to_input_dev(dev);                \
        return sysfs_emit(buf, "%04x\n", input_dev->id.name);                \
}                                                                        \
static DEVICE_ATTR(name, S_IRUGO, input_dev_show_id_##name, NULL)

INPUT_DEV_ID_ATTR(bustype);
INPUT_DEV_ID_ATTR(vendor);
INPUT_DEV_ID_ATTR(product);
INPUT_DEV_ID_ATTR(version);

static struct attribute *input_dev_id_attrs[] = {
        &dev_attr_bustype.attr,
        &dev_attr_vendor.attr,
        &dev_attr_product.attr,
        &dev_attr_version.attr,
        NULL
};

static const struct attribute_group input_dev_id_attr_group = {
        .name        = "id",
        .attrs        = input_dev_id_attrs,
};

static int input_print_bitmap(char *buf, int buf_size, const unsigned long *bitmap,
                              int max, int add_cr)
{
        int i;
        int len = 0;
        bool skip_empty = true;

        for (i = BITS_TO_LONGS(max) - 1; i >= 0; i--) {
                len += input_bits_to_string(buf + len, max(buf_size - len, 0),
                                            bitmap[i], skip_empty);
                if (len) {
                        skip_empty = false;
                        if (i > 0)
                                len += snprintf(buf + len, max(buf_size - len, 0), " ");
                }
        }

        /*
         * If no output was produced print a single 0.
         */
        if (len == 0)
                len = snprintf(buf, buf_size, "%d", 0);

        if (add_cr)
                len += snprintf(buf + len, max(buf_size - len, 0), "\n");

        return len;
}

#define INPUT_DEV_CAP_ATTR(ev, bm)                                        \
static ssize_t input_dev_show_cap_##bm(struct device *dev,                \
                                       struct device_attribute *attr,        \
                                       char *buf)                        \
{                                                                        \
        struct input_dev *input_dev = to_input_dev(dev);                \
        int len = input_print_bitmap(buf, PAGE_SIZE,                        \
                                     input_dev->bm##bit, ev##_MAX,        \
                                     true);                                \
        return min_t(int, len, PAGE_SIZE);                                \
}                                                                        \
static DEVICE_ATTR(bm, S_IRUGO, input_dev_show_cap_##bm, NULL)

INPUT_DEV_CAP_ATTR(EV, ev);
INPUT_DEV_CAP_ATTR(KEY, key);
INPUT_DEV_CAP_ATTR(REL, rel);
INPUT_DEV_CAP_ATTR(ABS, abs);
INPUT_DEV_CAP_ATTR(MSC, msc);
INPUT_DEV_CAP_ATTR(LED, led);
INPUT_DEV_CAP_ATTR(SND, snd);
INPUT_DEV_CAP_ATTR(FF, ff);
INPUT_DEV_CAP_ATTR(SW, sw);

static struct attribute *input_dev_caps_attrs[] = {
        &dev_attr_ev.attr,
        &dev_attr_key.attr,
        &dev_attr_rel.attr,
        &dev_attr_abs.attr,
        &dev_attr_msc.attr,
        &dev_attr_led.attr,
        &dev_attr_snd.attr,
        &dev_attr_ff.attr,
        &dev_attr_sw.attr,
        NULL
};

static const struct attribute_group input_dev_caps_attr_group = {
        .name        = "capabilities",
        .attrs        = input_dev_caps_attrs,
};

static const struct attribute_group *input_dev_attr_groups[] = {
        &input_dev_attr_group,
        &input_dev_id_attr_group,
        &input_dev_caps_attr_group,
        &input_poller_attribute_group,
        NULL
};

static void input_dev_release(struct device *device)
{
        struct input_dev *dev = to_input_dev(device);

        input_ff_destroy(dev);
        input_mt_destroy_slots(dev);
        kfree(dev->poller);
        kfree(dev->absinfo);
        kfree(dev->vals);
        kfree(dev);

        module_put(THIS_MODULE);
}

/*
 * Input uevent interface - loading event handlers based on
 * device bitfields.
 */
static int input_add_uevent_bm_var(struct kobj_uevent_env *env,
                                   const char *name, const unsigned long *bitmap, int max)
{
        int len;

        if (add_uevent_var(env, "%s", name))
                return -ENOMEM;

        len = input_print_bitmap(&env->buf[env->buflen - 1],
                                 sizeof(env->buf) - env->buflen,
                                 bitmap, max, false);
        if (len >= (sizeof(env->buf) - env->buflen))
                return -ENOMEM;

        env->buflen += len;
        return 0;
}

static int input_add_uevent_modalias_var(struct kobj_uevent_env *env,
                                         const struct input_dev *dev)
{
        int len;

        if (add_uevent_var(env, "MODALIAS="))
                return -ENOMEM;

        len = input_print_modalias(&env->buf[env->buflen - 1],
                                   sizeof(env->buf) - env->buflen,
                                   dev, 0);
        if (len >= (sizeof(env->buf) - env->buflen))
                return -ENOMEM;

        env->buflen += len;
        return 0;
}

#define INPUT_ADD_HOTPLUG_VAR(fmt, val...)                                \
        do {                                                                \
                int err = add_uevent_var(env, fmt, val);                \
                if (err)                                                \
                        return err;                                        \
        } while (0)

#define INPUT_ADD_HOTPLUG_BM_VAR(name, bm, max)                                \
        do {                                                                \
                int err = input_add_uevent_bm_var(env, name, bm, max);        \
                if (err)                                                \
                        return err;                                        \
        } while (0)

#define INPUT_ADD_HOTPLUG_MODALIAS_VAR(dev)                                \
        do {                                                                \
                int err = input_add_uevent_modalias_var(env, dev);        \
                if (err)                                                \
                        return err;                                        \
        } while (0)

static int input_dev_uevent(const struct device *device, struct kobj_uevent_env *env)
{
        const struct input_dev *dev = to_input_dev(device);

        INPUT_ADD_HOTPLUG_VAR("PRODUCT=%x/%x/%x/%x",
                                dev->id.bustype, dev->id.vendor,
                                dev->id.product, dev->id.version);
        if (dev->name)
                INPUT_ADD_HOTPLUG_VAR("NAME=\"%s\"", dev->name);
        if (dev->phys)
                INPUT_ADD_HOTPLUG_VAR("PHYS=\"%s\"", dev->phys);
        if (dev->uniq)
                INPUT_ADD_HOTPLUG_VAR("UNIQ=\"%s\"", dev->uniq);

        INPUT_ADD_HOTPLUG_BM_VAR("PROP=", dev->propbit, INPUT_PROP_MAX);

        INPUT_ADD_HOTPLUG_BM_VAR("EV=", dev->evbit, EV_MAX);
        if (test_bit(EV_KEY, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("KEY=", dev->keybit, KEY_MAX);
        if (test_bit(EV_REL, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("REL=", dev->relbit, REL_MAX);
        if (test_bit(EV_ABS, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("ABS=", dev->absbit, ABS_MAX);
        if (test_bit(EV_MSC, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("MSC=", dev->mscbit, MSC_MAX);
        if (test_bit(EV_LED, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("LED=", dev->ledbit, LED_MAX);
        if (test_bit(EV_SND, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("SND=", dev->sndbit, SND_MAX);
        if (test_bit(EV_FF, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("FF=", dev->ffbit, FF_MAX);
        if (test_bit(EV_SW, dev->evbit))
                INPUT_ADD_HOTPLUG_BM_VAR("SW=", dev->swbit, SW_MAX);

        INPUT_ADD_HOTPLUG_MODALIAS_VAR(dev);

        return 0;
}

#define INPUT_DO_TOGGLE(dev, type, bits, on)                                \
        do {                                                                \
                int i;                                                        \
                bool active;                                                \
                                                                        \
                if (!test_bit(EV_##type, dev->evbit))                        \
                        break;                                                \
                                                                        \
                for_each_set_bit(i, dev->bits##bit, type##_CNT) {        \
                        active = test_bit(i, dev->bits);                \
                        if (!active && !on)                                \
                                continue;                                \
                                                                        \
                        dev->event(dev, EV_##type, i, on ? active : 0);        \
                }                                                        \
        } while (0)

static void input_dev_toggle(struct input_dev *dev, bool activate)
{
        if (!dev->event)
                return;

        INPUT_DO_TOGGLE(dev, LED, led, activate);
        INPUT_DO_TOGGLE(dev, SND, snd, activate);

        if (activate && test_bit(EV_REP, dev->evbit)) {
                dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
                dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
        }
}

/**
 * input_reset_device() - reset/restore the state of input device
 * @dev: input device whose state needs to be reset
 *
 * This function tries to reset the state of an opened input device and
 * bring internal state and state if the hardware in sync with each other.
 * We mark all keys as released, restore LED state, repeat rate, etc.
 */
void input_reset_device(struct input_dev *dev)
{
        unsigned long flags;

        mutex_lock(&dev->mutex);
        spin_lock_irqsave(&dev->event_lock, flags);

        input_dev_toggle(dev, true);
        if (input_dev_release_keys(dev))
                input_handle_event(dev, EV_SYN, SYN_REPORT, 1);

        spin_unlock_irqrestore(&dev->event_lock, flags);
        mutex_unlock(&dev->mutex);
}
EXPORT_SYMBOL(input_reset_device);

static int input_inhibit_device(struct input_dev *dev)
{
        mutex_lock(&dev->mutex);

        if (dev->inhibited)
                goto out;

        if (dev->users) {
                if (dev->close)
                        dev->close(dev);
                if (dev->poller)
                        input_dev_poller_stop(dev->poller);
        }

        spin_lock_irq(&dev->event_lock);
        input_mt_release_slots(dev);
        input_dev_release_keys(dev);
        input_handle_event(dev, EV_SYN, SYN_REPORT, 1);
        input_dev_toggle(dev, false);
        spin_unlock_irq(&dev->event_lock);

        dev->inhibited = true;

out:
        mutex_unlock(&dev->mutex);
        return 0;
}

static int input_uninhibit_device(struct input_dev *dev)
{
        int ret = 0;

        mutex_lock(&dev->mutex);

        if (!dev->inhibited)
                goto out;

        if (dev->users) {
                if (dev->open) {
                        ret = dev->open(dev);
                        if (ret)
                                goto out;
                }
                if (dev->poller)
                        input_dev_poller_start(dev->poller);
        }

        dev->inhibited = false;
        spin_lock_irq(&dev->event_lock);
        input_dev_toggle(dev, true);
        spin_unlock_irq(&dev->event_lock);

out:
        mutex_unlock(&dev->mutex);
        return ret;
}

static int input_dev_suspend(struct device *dev)
{
        struct input_dev *input_dev = to_input_dev(dev);

        spin_lock_irq(&input_dev->event_lock);

        /*
         * Keys that are pressed now are unlikely to be
         * still pressed when we resume.
         */
        if (input_dev_release_keys(input_dev))
                input_handle_event(input_dev, EV_SYN, SYN_REPORT, 1);

        /* Turn off LEDs and sounds, if any are active. */
        input_dev_toggle(input_dev, false);

        spin_unlock_irq(&input_dev->event_lock);

        return 0;
}

static int input_dev_resume(struct device *dev)
{
        struct input_dev *input_dev = to_input_dev(dev);

        spin_lock_irq(&input_dev->event_lock);

        /* Restore state of LEDs and sounds, if any were active. */
        input_dev_toggle(input_dev, true);

        spin_unlock_irq(&input_dev->event_lock);

        return 0;
}

static int input_dev_freeze(struct device *dev)
{
        struct input_dev *input_dev = to_input_dev(dev);

        spin_lock_irq(&input_dev->event_lock);

        /*
         * Keys that are pressed now are unlikely to be
         * still pressed when we resume.
         */
        if (input_dev_release_keys(input_dev))
                input_handle_event(input_dev, EV_SYN, SYN_REPORT, 1);

        spin_unlock_irq(&input_dev->event_lock);

        return 0;
}

static int input_dev_poweroff(struct device *dev)
{
        struct input_dev *input_dev = to_input_dev(dev);

        spin_lock_irq(&input_dev->event_lock);

        /* Turn off LEDs and sounds, if any are active. */
        input_dev_toggle(input_dev, false);

        spin_unlock_irq(&input_dev->event_lock);

        return 0;
}

static const struct dev_pm_ops input_dev_pm_ops = {
        .suspend        = input_dev_suspend,
        .resume                = input_dev_resume,
        .freeze                = input_dev_freeze,
        .poweroff        = input_dev_poweroff,
        .restore        = input_dev_resume,
};

static const struct device_type input_dev_type = {
        .groups                = input_dev_attr_groups,
        .release        = input_dev_release,
        .uevent                = input_dev_uevent,
        .pm                = pm_sleep_ptr(&input_dev_pm_ops),
};

static char *input_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
}

const struct class input_class = {
        .name                = "input",
        .devnode        = input_devnode,
};
EXPORT_SYMBOL_GPL(input_class);

/**
 * input_allocate_device - allocate memory for new input device
 *
 * Returns prepared struct input_dev or %NULL.
 *
 * NOTE: Use input_free_device() to free devices that have not been
 * registered; input_unregister_device() should be used for already
 * registered devices.
 */
struct input_dev *input_allocate_device(void)
{
        static atomic_t input_no = ATOMIC_INIT(-1);
        struct input_dev *dev;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (dev) {
                dev->dev.type = &input_dev_type;
                dev->dev.class = &input_class;
                device_initialize(&dev->dev);
                mutex_init(&dev->mutex);
                spin_lock_init(&dev->event_lock);
                timer_setup(&dev->timer, NULL, 0);
                INIT_LIST_HEAD(&dev->h_list);
                INIT_LIST_HEAD(&dev->node);

                dev_set_name(&dev->dev, "input%lu",
                             (unsigned long)atomic_inc_return(&input_no));

                __module_get(THIS_MODULE);
        }

        return dev;
}
EXPORT_SYMBOL(input_allocate_device);

struct input_devres {
        struct input_dev *input;
};

static int devm_input_device_match(struct device *dev, void *res, void *data)
{
        struct input_devres *devres = res;

        return devres->input == data;
}

static void devm_input_device_release(struct device *dev, void *res)
{
        struct input_devres *devres = res;
        struct input_dev *input = devres->input;

        dev_dbg(dev, "%s: dropping reference to %s\n",
                __func__, dev_name(&input->dev));
        input_put_device(input);
}

/**
 * devm_input_allocate_device - allocate managed input device
 * @dev: device owning the input device being created
 *
 * Returns prepared struct input_dev or %NULL.
 *
 * Managed input devices do not need to be explicitly unregistered or
 * freed as it will be done automatically when owner device unbinds from
 * its driver (or binding fails). Once managed input device is allocated,
 * it is ready to be set up and registered in the same fashion as regular
 * input device. There are no special devm_input_device_[un]register()
 * variants, regular ones work with both managed and unmanaged devices,
 * should you need them. In most cases however, managed input device need
 * not be explicitly unregistered or freed.
 *
 * NOTE: the owner device is set up as parent of input device and users
 * should not override it.
 */
struct input_dev *devm_input_allocate_device(struct device *dev)
{
        struct input_dev *input;
        struct input_devres *devres;

        devres = devres_alloc(devm_input_device_release,
                              sizeof(*devres), GFP_KERNEL);
        if (!devres)
                return NULL;

        input = input_allocate_device();
        if (!input) {
                devres_free(devres);
                return NULL;
        }

        input->dev.parent = dev;
        input->devres_managed = true;

        devres->input = input;
        devres_add(dev, devres);

        return input;
}
EXPORT_SYMBOL(devm_input_allocate_device);

/**
 * input_free_device - free memory occupied by input_dev structure
 * @dev: input device to free
 *
 * This function should only be used if input_register_device()
 * was not called yet or if it failed. Once device was registered
 * use input_unregister_device() and memory will be freed once last
 * reference to the device is dropped.
 *
 * Device should be allocated by input_allocate_device().
 *
 * NOTE: If there are references to the input device then memory
 * will not be freed until last reference is dropped.
 */
void input_free_device(struct input_dev *dev)
{
        if (dev) {
                if (dev->devres_managed)
                        WARN_ON(devres_destroy(dev->dev.parent,
                                                devm_input_device_release,
                                                devm_input_device_match,
                                                dev));
                input_put_device(dev);
        }
}
EXPORT_SYMBOL(input_free_device);

/**
 * input_set_timestamp - set timestamp for input events
 * @dev: input device to set timestamp for
 * @timestamp: the time at which the event has occurred
 *   in CLOCK_MONOTONIC
 *
 * This function is intended to provide to the input system a more
 * accurate time of when an event actually occurred. The driver should
 * call this function as soon as a timestamp is acquired ensuring
 * clock conversions in input_set_timestamp are done correctly.
 *
 * The system entering suspend state between timestamp acquisition and
 * calling input_set_timestamp can result in inaccurate conversions.
 */
void input_set_timestamp(struct input_dev *dev, ktime_t timestamp)
{
        dev->timestamp[INPUT_CLK_MONO] = timestamp;
        dev->timestamp[INPUT_CLK_REAL] = ktime_mono_to_real(timestamp);
        dev->timestamp[INPUT_CLK_BOOT] = ktime_mono_to_any(timestamp,
                                                           TK_OFFS_BOOT);
}
EXPORT_SYMBOL(input_set_timestamp);

/**
 * input_get_timestamp - get timestamp for input events
 * @dev: input device to get timestamp from
 *
 * A valid timestamp is a timestamp of non-zero value.
 */
ktime_t *input_get_timestamp(struct input_dev *dev)
{
        const ktime_t invalid_timestamp = ktime_set(0, 0);

        if (!ktime_compare(dev->timestamp[INPUT_CLK_MONO], invalid_timestamp))
                input_set_timestamp(dev, ktime_get());

        return dev->timestamp;
}
EXPORT_SYMBOL(input_get_timestamp);

/**
 * input_set_capability - mark device as capable of a certain event
 * @dev: device that is capable of emitting or accepting event
 * @type: type of the event (EV_KEY, EV_REL, etc...)
 * @code: event code
 *
 * In addition to setting up corresponding bit in appropriate capability
 * bitmap the function also adjusts dev->evbit.
 */
void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code)
{
        if (type < EV_CNT && input_max_code[type] &&
            code > input_max_code[type]) {
                pr_err("%s: invalid code %u for type %u\n", __func__, code,
                       type);
                dump_stack();
                return;
        }

        switch (type) {
        case EV_KEY:
                __set_bit(code, dev->keybit);
                break;

        case EV_REL:
                __set_bit(code, dev->relbit);
                break;

        case EV_ABS:
                input_alloc_absinfo(dev);
                __set_bit(code, dev->absbit);
                break;

        case EV_MSC:
                __set_bit(code, dev->mscbit);
                break;

        case EV_SW:
                __set_bit(code, dev->swbit);
                break;

        case EV_LED:
                __set_bit(code, dev->ledbit);
                break;

        case EV_SND:
                __set_bit(code, dev->sndbit);
                break;

        case EV_FF:
                __set_bit(code, dev->ffbit);
                break;

        case EV_PWR:
                /* do nothing */
                break;

        default:
                pr_err("%s: unknown type %u (code %u)\n", __func__, type, code);
                dump_stack();
                return;
        }

        __set_bit(type, dev->evbit);
}
EXPORT_SYMBOL(input_set_capability);

static unsigned int input_estimate_events_per_packet(struct input_dev *dev)
{
        int mt_slots;
        int i;
        unsigned int events;

        if (dev->mt) {
                mt_slots = dev->mt->num_slots;
        } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) {
                mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum -
                           dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1,
                mt_slots = clamp(mt_slots, 2, 32);
        } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
                mt_slots = 2;
        } else {
                mt_slots = 0;
        }

        events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */

        if (test_bit(EV_ABS, dev->evbit))
                for_each_set_bit(i, dev->absbit, ABS_CNT)
                        events += input_is_mt_axis(i) ? mt_slots : 1;

        if (test_bit(EV_REL, dev->evbit))
                events += bitmap_weight(dev->relbit, REL_CNT);

        /* Make room for KEY and MSC events */
        events += 7;

        return events;
}

#define INPUT_CLEANSE_BITMASK(dev, type, bits)                                \
        do {                                                                \
                if (!test_bit(EV_##type, dev->evbit))                        \
                        memset(dev->bits##bit, 0,                        \
                                sizeof(dev->bits##bit));                \
        } while (0)

static void input_cleanse_bitmasks(struct input_dev *dev)
{
        INPUT_CLEANSE_BITMASK(dev, KEY, key);
        INPUT_CLEANSE_BITMASK(dev, REL, rel);
        INPUT_CLEANSE_BITMASK(dev, ABS, abs);
        INPUT_CLEANSE_BITMASK(dev, MSC, msc);
        INPUT_CLEANSE_BITMASK(dev, LED, led);
        INPUT_CLEANSE_BITMASK(dev, SND, snd);
        INPUT_CLEANSE_BITMASK(dev, FF, ff);
        INPUT_CLEANSE_BITMASK(dev, SW, sw);
}

static void __input_unregister_device(struct input_dev *dev)
{
        struct input_handle *handle, *next;

        input_disconnect_device(dev);

        mutex_lock(&input_mutex);

        list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
                handle->handler->disconnect(handle);
        WARN_ON(!list_empty(&dev->h_list));

        del_timer_sync(&dev->timer);
        list_del_init(&dev->node);

        input_wakeup_procfs_readers();

        mutex_unlock(&input_mutex);

        device_del(&dev->dev);
}

static void devm_input_device_unregister(struct device *dev, void *res)
{
        struct input_devres *devres = res;
        struct input_dev *input = devres->input;

        dev_dbg(dev, "%s: unregistering device %s\n",
                __func__, dev_name(&input->dev));
        __input_unregister_device(input);
}

/*
 * Generate software autorepeat event. Note that we take
 * dev->event_lock here to avoid racing with input_event
 * which may cause keys get "stuck".
 */
static void input_repeat_key(struct timer_list *t)
{
        struct input_dev *dev = from_timer(dev, t, timer);
        unsigned long flags;

        spin_lock_irqsave(&dev->event_lock, flags);

        if (!dev->inhibited &&
            test_bit(dev->repeat_key, dev->key) &&
            is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) {

                input_set_timestamp(dev, ktime_get());
                input_handle_event(dev, EV_KEY, dev->repeat_key, 2);
                input_handle_event(dev, EV_SYN, SYN_REPORT, 1);

                if (dev->rep[REP_PERIOD])
                        mod_timer(&dev->timer, jiffies +
                                        msecs_to_jiffies(dev->rep[REP_PERIOD]));
        }

        spin_unlock_irqrestore(&dev->event_lock, flags);
}

/**
 * input_enable_softrepeat - enable software autorepeat
 * @dev: input device
 * @delay: repeat delay
 * @period: repeat period
 *
 * Enable software autorepeat on the input device.
 */
void input_enable_softrepeat(struct input_dev *dev, int delay, int period)
{
        dev->timer.function = input_repeat_key;
        dev->rep[REP_DELAY] = delay;
        dev->rep[REP_PERIOD] = period;
}
EXPORT_SYMBOL(input_enable_softrepeat);

bool input_device_enabled(struct input_dev *dev)
{
        lockdep_assert_held(&dev->mutex);

        return !dev->inhibited && dev->users > 0;
}
EXPORT_SYMBOL_GPL(input_device_enabled);

/**
 * input_register_device - register device with input core
 * @dev: device to be registered
 *
 * This function registers device with input core. The device must be
 * allocated with input_allocate_device() and all it's capabilities
 * set up before registering.
 * If function fails the device must be freed with input_free_device().
 * Once device has been successfully registered it can be unregistered
 * with input_unregister_device(); input_free_device() should not be
 * called in this case.
 *
 * Note that this function is also used to register managed input devices
 * (ones allocated with devm_input_allocate_device()). Such managed input
 * devices need not be explicitly unregistered or freed, their tear down
 * is controlled by the devres infrastructure. It is also worth noting
 * that tear down of managed input devices is internally a 2-step process:
 * registered managed input device is first unregistered, but stays in
 * memory and can still handle input_event() calls (although events will
 * not be delivered anywhere). The freeing of managed input device will
 * happen later, when devres stack is unwound to the point where device
 * allocation was made.
 */
int input_register_device(struct input_dev *dev)
{
        struct input_devres *devres = NULL;
        struct input_handler *handler;
        unsigned int packet_size;
        const char *path;
        int error;

        if (test_bit(EV_ABS, dev->evbit) && !dev->absinfo) {
                dev_err(&dev->dev,
                        "Absolute device without dev->absinfo, refusing to register\n");
                return -EINVAL;
        }

        if (dev->devres_managed) {
                devres = devres_alloc(devm_input_device_unregister,
                                      sizeof(*devres), GFP_KERNEL);
                if (!devres)
                        return -ENOMEM;

                devres->input = dev;
        }

        /* Every input device generates EV_SYN/SYN_REPORT events. */
        __set_bit(EV_SYN, dev->evbit);

        /* KEY_RESERVED is not supposed to be transmitted to userspace. */
        __clear_bit(KEY_RESERVED, dev->keybit);

        /* Make sure that bitmasks not mentioned in dev->evbit are clean. */
        input_cleanse_bitmasks(dev);

        packet_size = input_estimate_events_per_packet(dev);
        if (dev->hint_events_per_packet < packet_size)
                dev->hint_events_per_packet = packet_size;

        dev->max_vals = dev->hint_events_per_packet + 2;
        dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL);
        if (!dev->vals) {
                error = -ENOMEM;
                goto err_devres_free;
        }

        /*
         * If delay and period are pre-set by the driver, then autorepeating
         * is handled by the driver itself and we don't do it in input.c.
         */
        if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD])
                input_enable_softrepeat(dev, 250, 33);

        if (!dev->getkeycode)
                dev->getkeycode = input_default_getkeycode;

        if (!dev->setkeycode)
                dev->setkeycode = input_default_setkeycode;

        if (dev->poller)
                input_dev_poller_finalize(dev->poller);

        error = device_add(&dev->dev);
        if (error)
                goto err_free_vals;

        path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
        pr_info("%s as %s\n",
                dev->name ? dev->name : "Unspecified device",
                path ? path : "N/A");
        kfree(path);

        error = mutex_lock_interruptible(&input_mutex);
        if (error)
                goto err_device_del;

        list_add_tail(&dev->node, &input_dev_list);

        list_for_each_entry(handler, &input_handler_list, node)
                input_attach_handler(dev, handler);

        input_wakeup_procfs_readers();

        mutex_unlock(&input_mutex);

        if (dev->devres_managed) {
                dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n",
                        __func__, dev_name(&dev->dev));
                devres_add(dev->dev.parent, devres);
        }
        return 0;

err_device_del:
        device_del(&dev->dev);
err_free_vals:
        kfree(dev->vals);
        dev->vals = NULL;
err_devres_free:
        devres_free(devres);
        return error;
}
EXPORT_SYMBOL(input_register_device);

/**
 * input_unregister_device - unregister previously registered device
 * @dev: device to be unregistered
 *
 * This function unregisters an input device. Once device is unregistered
 * the caller should not try to access it as it may get freed at any moment.
 */
void input_unregister_device(struct input_dev *dev)
{
        if (dev->devres_managed) {
                WARN_ON(devres_destroy(dev->dev.parent,
                                        devm_input_device_unregister,
                                        devm_input_device_match,
                                        dev));
                __input_unregister_device(dev);
                /*
                 * We do not do input_put_device() here because it will be done
                 * when 2nd devres fires up.
                 */
        } else {
                __input_unregister_device(dev);
                input_put_device(dev);
        }
}
EXPORT_SYMBOL(input_unregister_device);

/**
 * input_register_handler - register a new input handler
 * @handler: handler to be registered
 *
 * This function registers a new input handler (interface) for input
 * devices in the system and attaches it to all input devices that
 * are compatible with the handler.
 */
int input_register_handler(struct input_handler *handler)
{
        struct input_dev *dev;
        int error;

        error = mutex_lock_interruptible(&input_mutex);
        if (error)
                return error;

        INIT_LIST_HEAD(&handler->h_list);

        list_add_tail(&handler->node, &input_handler_list);

        list_for_each_entry(dev, &input_dev_list, node)
                input_attach_handler(dev, handler);

        input_wakeup_procfs_readers();

        mutex_unlock(&input_mutex);
        return 0;
}
EXPORT_SYMBOL(input_register_handler);

/**
 * input_unregister_handler - unregisters an input handler
 * @handler: handler to be unregistered
 *
 * This function disconnects a handler from its input devices and
 * removes it from lists of known handlers.
 */
void input_unregister_handler(struct input_handler *handler)
{
        struct input_handle *handle, *next;

        mutex_lock(&input_mutex);

        list_for_each_entry_safe(handle, next, &handler->h_list, h_node)
                handler->disconnect(handle);
        WARN_ON(!list_empty(&handler->h_list));

        list_del_init(&handler->node);

        input_wakeup_procfs_readers();

        mutex_unlock(&input_mutex);
}
EXPORT_SYMBOL(input_unregister_handler);

/**
 * input_handler_for_each_handle - handle iterator
 * @handler: input handler to iterate
 * @data: data for the callback
 * @fn: function to be called for each handle
 *
 * Iterate over @bus's list of devices, and call @fn for each, passing
 * it @data and stop when @fn returns a non-zero value. The function is
 * using RCU to traverse the list and therefore may be using in atomic
 * contexts. The @fn callback is invoked from RCU critical section and
 * thus must not sleep.
 */
int input_handler_for_each_handle(struct input_handler *handler, void *data,
                                  int (*fn)(struct input_handle *, void *))
{
        struct input_handle *handle;
        int retval = 0;

        rcu_read_lock();

        list_for_each_entry_rcu(handle, &handler->h_list, h_node) {
                retval = fn(handle, data);
                if (retval)
                        break;
        }

        rcu_read_unlock();

        return retval;
}
EXPORT_SYMBOL(input_handler_for_each_handle);

/**
 * input_register_handle - register a new input handle
 * @handle: handle to register
 *
 * This function puts a new input handle onto device's
 * and handler's lists so that events can flow through
 * it once it is opened using input_open_device().
 *
 * This function is supposed to be called from handler's
 * connect() method.
 */
int input_register_handle(struct input_handle *handle)
{
        struct input_handler *handler = handle->handler;
        struct input_dev *dev = handle->dev;
        int error;

        /*
         * We take dev->mutex here to prevent race with
         * input_release_device().
         */
        error = mutex_lock_interruptible(&dev->mutex);
        if (error)
                return error;

        /*
         * Filters go to the head of the list, normal handlers
         * to the tail.
         */
        if (handler->filter)
                list_add_rcu(&handle->d_node, &dev->h_list);
        else
                list_add_tail_rcu(&handle->d_node, &dev->h_list);

        mutex_unlock(&dev->mutex);

        /*
         * Since we are supposed to be called from ->connect()
         * which is mutually exclusive with ->disconnect()
         * we can't be racing with input_unregister_handle()
         * and so separate lock is not needed here.
         */
        list_add_tail_rcu(&handle->h_node, &handler->h_list);

        if (handler->start)
                handler->start(handle);

        return 0;
}
EXPORT_SYMBOL(input_register_handle);

/**
 * input_unregister_handle - unregister an input handle
 * @handle: handle to unregister
 *
 * This function removes input handle from device's
 * and handler's lists.
 *
 * This function is supposed to be called from handler's
 * disconnect() method.
 */
void input_unregister_handle(struct input_handle *handle)
{
        struct input_dev *dev = handle->dev;

        list_del_rcu(&handle->h_node);

        /*
         * Take dev->mutex to prevent race with input_release_device().
         */
        mutex_lock(&dev->mutex);
        list_del_rcu(&handle->d_node);
        mutex_unlock(&dev->mutex);

        synchronize_rcu();
}
EXPORT_SYMBOL(input_unregister_handle);

/**
 * input_get_new_minor - allocates a new input minor number
 * @legacy_base: beginning or the legacy range to be searched
 * @legacy_num: size of legacy range
 * @allow_dynamic: whether we can also take ID from the dynamic range
 *
 * This function allocates a new device minor for from input major namespace.
 * Caller can request legacy minor by specifying @legacy_base and @legacy_num
 * parameters and whether ID can be allocated from dynamic range if there are
 * no free IDs in legacy range.
 */
int input_get_new_minor(int legacy_base, unsigned int legacy_num,
                        bool allow_dynamic)
{
        /*
         * This function should be called from input handler's ->connect()
         * methods, which are serialized with input_mutex, so no additional
         * locking is needed here.
         */
        if (legacy_base >= 0) {
                int minor = ida_alloc_range(&input_ida, legacy_base,
                                            legacy_base + legacy_num - 1,
                                            GFP_KERNEL);
                if (minor >= 0 || !allow_dynamic)
                        return minor;
        }

        return ida_alloc_range(&input_ida, INPUT_FIRST_DYNAMIC_DEV,
                               INPUT_MAX_CHAR_DEVICES - 1, GFP_KERNEL);
}
EXPORT_SYMBOL(input_get_new_minor);

/**
 * input_free_minor - release previously allocated minor
 * @minor: minor to be released
 *
 * This function releases previously allocated input minor so that it can be
 * reused later.
 */
void input_free_minor(unsigned int minor)
{
        ida_free(&input_ida, minor);
}
EXPORT_SYMBOL(input_free_minor);

static int __init input_init(void)
{
        int err;

        err = class_register(&input_class);
        if (err) {
                pr_err("unable to register input_dev class\n");
                return err;
        }

        err = input_proc_init();
        if (err)
                goto fail1;

        err = register_chrdev_region(MKDEV(INPUT_MAJOR, 0),
                                     INPUT_MAX_CHAR_DEVICES, "input");
        if (err) {
                pr_err("unable to register char major %d", INPUT_MAJOR);
                goto fail2;
        }

        return 0;

 fail2:        input_proc_exit();
 fail1:        class_unregister(&input_class);
        return err;
}

static void __exit input_exit(void)
{
        input_proc_exit();
        unregister_chrdev_region(MKDEV(INPUT_MAJOR, 0),
                                 INPUT_MAX_CHAR_DEVICES);
        class_unregister(&input_class);
}

subsys_initcall(input_init);
module_exit(input_exit);





















































































































































































































































































































































































    3 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Abilis Systems Single DVB-T Receiver
 * Copyright (C) 2008 Pierrick Hascoet <pierrick.hascoet@abilis.com>
 * Copyright (C) 2010 Devin Heitmueller <dheitmueller@kernellabs.com>
 */
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kref.h>
#include <linux/uaccess.h>
#include <linux/usb.h>

/* header file for usb device driver*/
#include "as102_drv.h"
#include "as10x_cmd.h"
#include "as102_fe.h"
#include "as102_fw.h"
#include <media/dvbdev.h>

int dual_tuner;
module_param_named(dual_tuner, dual_tuner, int, 0644);
MODULE_PARM_DESC(dual_tuner, "Activate Dual-Tuner config (default: off)");

static int fw_upload = 1;
module_param_named(fw_upload, fw_upload, int, 0644);
MODULE_PARM_DESC(fw_upload, "Turn on/off default FW upload (default: on)");

static int pid_filtering;
module_param_named(pid_filtering, pid_filtering, int, 0644);
MODULE_PARM_DESC(pid_filtering, "Activate HW PID filtering (default: off)");

static int ts_auto_disable;
module_param_named(ts_auto_disable, ts_auto_disable, int, 0644);
MODULE_PARM_DESC(ts_auto_disable, "Stream Auto Enable on FW (default: off)");

int elna_enable = 1;
module_param_named(elna_enable, elna_enable, int, 0644);
MODULE_PARM_DESC(elna_enable, "Activate eLNA (default: on)");

DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);

static void as102_stop_stream(struct as102_dev_t *dev)
{
        struct as10x_bus_adapter_t *bus_adap;

        if (dev != NULL)
                bus_adap = &dev->bus_adap;
        else
                return;

        if (bus_adap->ops->stop_stream != NULL)
                bus_adap->ops->stop_stream(dev);

        if (ts_auto_disable) {
                if (mutex_lock_interruptible(&dev->bus_adap.lock))
                        return;

                if (as10x_cmd_stop_streaming(bus_adap) < 0)
                        dev_dbg(&dev->bus_adap.usb_dev->dev,
                                "as10x_cmd_stop_streaming failed\n");

                mutex_unlock(&dev->bus_adap.lock);
        }
}

static int as102_start_stream(struct as102_dev_t *dev)
{
        struct as10x_bus_adapter_t *bus_adap;
        int ret = -EFAULT;

        if (dev != NULL)
                bus_adap = &dev->bus_adap;
        else
                return ret;

        if (bus_adap->ops->start_stream != NULL)
                ret = bus_adap->ops->start_stream(dev);

        if (ts_auto_disable) {
                if (mutex_lock_interruptible(&dev->bus_adap.lock))
                        return -EFAULT;

                ret = as10x_cmd_start_streaming(bus_adap);

                mutex_unlock(&dev->bus_adap.lock);
        }

        return ret;
}

static int as10x_pid_filter(struct as102_dev_t *dev,
                            int index, u16 pid, int onoff) {

        struct as10x_bus_adapter_t *bus_adap = &dev->bus_adap;
        int ret = -EFAULT;

        if (mutex_lock_interruptible(&dev->bus_adap.lock)) {
                dev_dbg(&dev->bus_adap.usb_dev->dev,
                        "amutex_lock_interruptible(lock) failed !\n");
                return -EBUSY;
        }

        switch (onoff) {
        case 0:
                ret = as10x_cmd_del_PID_filter(bus_adap, (uint16_t) pid);
                dev_dbg(&dev->bus_adap.usb_dev->dev,
                        "DEL_PID_FILTER([%02d] 0x%04x) ret = %d\n",
                        index, pid, ret);
                break;
        case 1:
        {
                struct as10x_ts_filter filter;

                filter.type = TS_PID_TYPE_TS;
                filter.idx = 0xFF;
                filter.pid = pid;

                ret = as10x_cmd_add_PID_filter(bus_adap, &filter);
                dev_dbg(&dev->bus_adap.usb_dev->dev,
                        "ADD_PID_FILTER([%02d -> %02d], 0x%04x) ret = %d\n",
                        index, filter.idx, filter.pid, ret);
                break;
        }
        }

        mutex_unlock(&dev->bus_adap.lock);
        return ret;
}

static int as102_dvb_dmx_start_feed(struct dvb_demux_feed *dvbdmxfeed)
{
        int ret = 0;
        struct dvb_demux *demux = dvbdmxfeed->demux;
        struct as102_dev_t *as102_dev = demux->priv;

        if (mutex_lock_interruptible(&as102_dev->sem))
                return -ERESTARTSYS;

        if (pid_filtering)
                as10x_pid_filter(as102_dev, dvbdmxfeed->index,
                                 dvbdmxfeed->pid, 1);

        if (as102_dev->streaming++ == 0)
                ret = as102_start_stream(as102_dev);

        mutex_unlock(&as102_dev->sem);
        return ret;
}

static int as102_dvb_dmx_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
{
        struct dvb_demux *demux = dvbdmxfeed->demux;
        struct as102_dev_t *as102_dev = demux->priv;

        if (mutex_lock_interruptible(&as102_dev->sem))
                return -ERESTARTSYS;

        if (--as102_dev->streaming == 0)
                as102_stop_stream(as102_dev);

        if (pid_filtering)
                as10x_pid_filter(as102_dev, dvbdmxfeed->index,
                                 dvbdmxfeed->pid, 0);

        mutex_unlock(&as102_dev->sem);
        return 0;
}

static int as102_set_tune(void *priv, struct as10x_tune_args *tune_args)
{
        struct as10x_bus_adapter_t *bus_adap = priv;
        int ret;

        /* Set frontend arguments */
        if (mutex_lock_interruptible(&bus_adap->lock))
                return -EBUSY;

        ret =  as10x_cmd_set_tune(bus_adap, tune_args);
        if (ret != 0)
                dev_dbg(&bus_adap->usb_dev->dev,
                        "as10x_cmd_set_tune failed. (err = %d)\n", ret);

        mutex_unlock(&bus_adap->lock);

        return ret;
}

static int as102_get_tps(void *priv, struct as10x_tps *tps)
{
        struct as10x_bus_adapter_t *bus_adap = priv;
        int ret;

        if (mutex_lock_interruptible(&bus_adap->lock))
                return -EBUSY;

        /* send abilis command: GET_TPS */
        ret = as10x_cmd_get_tps(bus_adap, tps);

        mutex_unlock(&bus_adap->lock);

        return ret;
}

static int as102_get_status(void *priv, struct as10x_tune_status *tstate)
{
        struct as10x_bus_adapter_t *bus_adap = priv;
        int ret;

        if (mutex_lock_interruptible(&bus_adap->lock))
                return -EBUSY;

        /* send abilis command: GET_TUNE_STATUS */
        ret = as10x_cmd_get_tune_status(bus_adap, tstate);
        if (ret < 0) {
                dev_dbg(&bus_adap->usb_dev->dev,
                        "as10x_cmd_get_tune_status failed (err = %d)\n",
                        ret);
        }

        mutex_unlock(&bus_adap->lock);

        return ret;
}

static int as102_get_stats(void *priv, struct as10x_demod_stats *demod_stats)
{
        struct as10x_bus_adapter_t *bus_adap = priv;
        int ret;

        if (mutex_lock_interruptible(&bus_adap->lock))
                return -EBUSY;

        /* send abilis command: GET_TUNE_STATUS */
        ret = as10x_cmd_get_demod_stats(bus_adap, demod_stats);
        if (ret < 0) {
                dev_dbg(&bus_adap->usb_dev->dev,
                        "as10x_cmd_get_demod_stats failed (probably not tuned)\n");
        } else {
                dev_dbg(&bus_adap->usb_dev->dev,
                        "demod status: fc: 0x%08x, bad fc: 0x%08x, bytes corrected: 0x%08x , MER: 0x%04x\n",
                        demod_stats->frame_count,
                        demod_stats->bad_frame_count,
                        demod_stats->bytes_fixed_by_rs,
                        demod_stats->mer);
        }
        mutex_unlock(&bus_adap->lock);

        return ret;
}

static int as102_stream_ctrl(void *priv, int acquire, uint32_t elna_cfg)
{
        struct as10x_bus_adapter_t *bus_adap = priv;
        int ret;

        if (mutex_lock_interruptible(&bus_adap->lock))
                return -EBUSY;

        if (acquire) {
                if (elna_enable)
                        as10x_cmd_set_context(bus_adap,
                                              CONTEXT_LNA, elna_cfg);

                ret = as10x_cmd_turn_on(bus_adap);
        } else {
                ret = as10x_cmd_turn_off(bus_adap);
        }

        mutex_unlock(&bus_adap->lock);

        return ret;
}

static const struct as102_fe_ops as102_fe_ops = {
        .set_tune = as102_set_tune,
        .get_tps  = as102_get_tps,
        .get_status = as102_get_status,
        .get_stats = as102_get_stats,
        .stream_ctrl = as102_stream_ctrl,
};

int as102_dvb_register(struct as102_dev_t *as102_dev)
{
        struct device *dev = &as102_dev->bus_adap.usb_dev->dev;
        int ret;

        ret = dvb_register_adapter(&as102_dev->dvb_adap,
                           as102_dev->name, THIS_MODULE,
                           dev, adapter_nr);
        if (ret < 0) {
                dev_err(dev, "%s: dvb_register_adapter() failed: %d\n",
                        __func__, ret);
                return ret;
        }

        as102_dev->dvb_dmx.priv = as102_dev;
        as102_dev->dvb_dmx.filternum = pid_filtering ? 16 : 256;
        as102_dev->dvb_dmx.feednum = 256;
        as102_dev->dvb_dmx.start_feed = as102_dvb_dmx_start_feed;
        as102_dev->dvb_dmx.stop_feed = as102_dvb_dmx_stop_feed;

        as102_dev->dvb_dmx.dmx.capabilities = DMX_TS_FILTERING |
                                              DMX_SECTION_FILTERING;

        as102_dev->dvb_dmxdev.filternum = as102_dev->dvb_dmx.filternum;
        as102_dev->dvb_dmxdev.demux = &as102_dev->dvb_dmx.dmx;
        as102_dev->dvb_dmxdev.capabilities = 0;

        ret = dvb_dmx_init(&as102_dev->dvb_dmx);
        if (ret < 0) {
                dev_err(dev, "%s: dvb_dmx_init() failed: %d\n", __func__, ret);
                goto edmxinit;
        }

        ret = dvb_dmxdev_init(&as102_dev->dvb_dmxdev, &as102_dev->dvb_adap);
        if (ret < 0) {
                dev_err(dev, "%s: dvb_dmxdev_init() failed: %d\n",
                        __func__, ret);
                goto edmxdinit;
        }

        /* Attach the frontend */
        as102_dev->dvb_fe = dvb_attach(as102_attach, as102_dev->name,
                                       &as102_fe_ops,
                                       &as102_dev->bus_adap,
                                       as102_dev->elna_cfg);
        if (!as102_dev->dvb_fe) {
                ret = -ENODEV;
                dev_err(dev, "%s: as102_attach() failed: %d",
                    __func__, ret);
                goto efereg;
        }

        ret =  dvb_register_frontend(&as102_dev->dvb_adap, as102_dev->dvb_fe);
        if (ret < 0) {
                dev_err(dev, "%s: as102_dvb_register_frontend() failed: %d",
                    __func__, ret);
                goto efereg;
        }

        /* init bus mutex for token locking */
        mutex_init(&as102_dev->bus_adap.lock);

        /* init start / stop stream mutex */
        mutex_init(&as102_dev->sem);

        /*
         * try to load as102 firmware. If firmware upload failed, we'll be
         * able to upload it later.
         */
        if (fw_upload)
                try_then_request_module(as102_fw_upload(&as102_dev->bus_adap),
                                "firmware_class");

        pr_info("Registered device %s", as102_dev->name);
        return 0;

efereg:
        dvb_dmxdev_release(&as102_dev->dvb_dmxdev);
edmxdinit:
        dvb_dmx_release(&as102_dev->dvb_dmx);
edmxinit:
        dvb_unregister_adapter(&as102_dev->dvb_adap);
        return ret;
}

void as102_dvb_unregister(struct as102_dev_t *as102_dev)
{
        /* unregister as102 frontend */
        dvb_unregister_frontend(as102_dev->dvb_fe);

        /* detach frontend */
        dvb_frontend_detach(as102_dev->dvb_fe);

        /* unregister demux device */
        dvb_dmxdev_release(&as102_dev->dvb_dmxdev);
        dvb_dmx_release(&as102_dev->dvb_dmx);

        /* unregister dvb adapter */
        dvb_unregister_adapter(&as102_dev->dvb_adap);

        pr_info("Unregistered device %s", as102_dev->name);
}

module_usb_driver(as102_usb_driver);

/* modinfo details */
MODULE_DESCRIPTION(DRIVER_FULL_NAME);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pierrick Hascoet <pierrick.hascoet@abilis.com>");








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 
    4 
































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
// SPDX-License-Identifier: GPL-2.0-or-later
/* 
 *  Parallel SCSI (SPI) transport specific attributes exported to sysfs.
 *
 *  Copyright (c) 2003 Silicon Graphics, Inc.  All rights reserved.
 *  Copyright (c) 2004, 2005 James Bottomley <James.Bottomley@SteelEye.com>
 */
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/suspend.h>
#include <scsi/scsi.h>
#include "scsi_priv.h"
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_transport_spi.h>

#define SPI_NUM_ATTRS 14        /* increase this if you add attributes */
#define SPI_OTHER_ATTRS 1        /* Increase this if you add "always
                                 * on" attributes */
#define SPI_HOST_ATTRS        1

#define SPI_MAX_ECHO_BUFFER_SIZE        4096

#define DV_LOOPS        3
#define DV_TIMEOUT        (10*HZ)
#define DV_RETRIES        3        /* should only need at most 
                                 * two cc/ua clears */

/* Our blacklist flags */
enum {
        SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
};

/* blacklist table, modelled on scsi_devinfo.c */
static struct {
        char *vendor;
        char *model;
        blist_flags_t flags;
} spi_static_device_list[] __initdata = {
        {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
        {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
        {NULL, NULL, 0}
};

/* Private data accessors (keep these out of the header file) */
#define spi_dv_in_progress(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_in_progress)
#define spi_dv_mutex(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_mutex)

struct spi_internal {
        struct scsi_transport_template t;
        struct spi_function_template *f;
};

#define to_spi_internal(tmpl)        container_of(tmpl, struct spi_internal, t)

static const int ppr_to_ps[] = {
        /* The PPR values 0-6 are reserved, fill them in when
         * the committee defines them */
        -1,                        /* 0x00 */
        -1,                        /* 0x01 */
        -1,                        /* 0x02 */
        -1,                        /* 0x03 */
        -1,                        /* 0x04 */
        -1,                        /* 0x05 */
        -1,                        /* 0x06 */
         3125,                        /* 0x07 */
         6250,                        /* 0x08 */
        12500,                        /* 0x09 */
        25000,                        /* 0x0a */
        30300,                        /* 0x0b */
        50000,                        /* 0x0c */
};
/* The PPR values at which you calculate the period in ns by multiplying
 * by 4 */
#define SPI_STATIC_PPR        0x0c

static int sprint_frac(char *dest, int value, int denom)
{
        int frac = value % denom;
        int result = sprintf(dest, "%d", value / denom);

        if (frac == 0)
                return result;
        dest[result++] = '.';

        do {
                denom /= 10;
                sprintf(dest + result, "%d", frac / denom);
                result++;
                frac %= denom;
        } while (frac);

        dest[result++] = '\0';
        return result;
}

static int spi_execute(struct scsi_device *sdev, const void *cmd,
                       enum req_op op, void *buffer, unsigned int bufflen,
                       struct scsi_sense_hdr *sshdr)
{
        blk_opf_t opf = op | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
                        REQ_FAILFAST_DRIVER;
        struct scsi_failure failure_defs[] = {
                {
                        .sense = UNIT_ATTENTION,
                        .asc = SCMD_FAILURE_ASC_ANY,
                        .ascq = SCMD_FAILURE_ASCQ_ANY,
                        .allowed = DV_RETRIES,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                {}
        };
        struct scsi_failures failures = {
                .failure_definitions = failure_defs,
        };
        const struct scsi_exec_args exec_args = {
                /* bypass the SDEV_QUIESCE state with BLK_MQ_REQ_PM */
                .req_flags = BLK_MQ_REQ_PM,
                .sshdr = sshdr,
                .failures = &failures,
        };

        return scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen, DV_TIMEOUT, 1,
                                &exec_args);
}

static struct {
        enum spi_signal_type        value;
        char                        *name;
} signal_types[] = {
        { SPI_SIGNAL_UNKNOWN, "unknown" },
        { SPI_SIGNAL_SE, "SE" },
        { SPI_SIGNAL_LVD, "LVD" },
        { SPI_SIGNAL_HVD, "HVD" },
};

static inline const char *spi_signal_to_string(enum spi_signal_type type)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(signal_types); i++) {
                if (type == signal_types[i].value)
                        return signal_types[i].name;
        }
        return NULL;
}
static inline enum spi_signal_type spi_signal_to_value(const char *name)
{
        int i, len;

        for (i = 0; i < ARRAY_SIZE(signal_types); i++) {
                len =  strlen(signal_types[i].name);
                if (strncmp(name, signal_types[i].name, len) == 0 &&
                    (name[len] == '\n' || name[len] == '\0'))
                        return signal_types[i].value;
        }
        return SPI_SIGNAL_UNKNOWN;
}

static int spi_host_setup(struct transport_container *tc, struct device *dev,
                          struct device *cdev)
{
        struct Scsi_Host *shost = dev_to_shost(dev);

        spi_signalling(shost) = SPI_SIGNAL_UNKNOWN;

        return 0;
}

static int spi_host_configure(struct transport_container *tc,
                              struct device *dev,
                              struct device *cdev);

static DECLARE_TRANSPORT_CLASS(spi_host_class,
                               "spi_host",
                               spi_host_setup,
                               NULL,
                               spi_host_configure);

static int spi_host_match(struct attribute_container *cont,
                          struct device *dev)
{
        struct Scsi_Host *shost;

        if (!scsi_is_host_device(dev))
                return 0;

        shost = dev_to_shost(dev);
        if (!shost->transportt  || shost->transportt->host_attrs.ac.class
            != &spi_host_class.class)
                return 0;

        return &shost->transportt->host_attrs.ac == cont;
}

static int spi_target_configure(struct transport_container *tc,
                                struct device *dev,
                                struct device *cdev);

static int spi_device_configure(struct transport_container *tc,
                                struct device *dev,
                                struct device *cdev)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        struct scsi_target *starget = sdev->sdev_target;
        blist_flags_t bflags;

        bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
                                             &sdev->inquiry[16],
                                             SCSI_DEVINFO_SPI);

        /* Populate the target capability fields with the values
         * gleaned from the device inquiry */

        spi_support_sync(starget) = scsi_device_sync(sdev);
        spi_support_wide(starget) = scsi_device_wide(sdev);
        spi_support_dt(starget) = scsi_device_dt(sdev);
        spi_support_dt_only(starget) = scsi_device_dt_only(sdev);
        spi_support_ius(starget) = scsi_device_ius(sdev);
        if (bflags & SPI_BLIST_NOIUS) {
                dev_info(dev, "Information Units disabled by blacklist\n");
                spi_support_ius(starget) = 0;
        }
        spi_support_qas(starget) = scsi_device_qas(sdev);

        return 0;
}

static int spi_setup_transport_attrs(struct transport_container *tc,
                                     struct device *dev,
                                     struct device *cdev)
{
        struct scsi_target *starget = to_scsi_target(dev);

        spi_period(starget) = -1;        /* illegal value */
        spi_min_period(starget) = 0;
        spi_offset(starget) = 0;        /* async */
        spi_max_offset(starget) = 255;
        spi_width(starget) = 0;        /* narrow */
        spi_max_width(starget) = 1;
        spi_iu(starget) = 0;        /* no IU */
        spi_max_iu(starget) = 1;
        spi_dt(starget) = 0;        /* ST */
        spi_qas(starget) = 0;
        spi_max_qas(starget) = 1;
        spi_wr_flow(starget) = 0;
        spi_rd_strm(starget) = 0;
        spi_rti(starget) = 0;
        spi_pcomp_en(starget) = 0;
        spi_hold_mcs(starget) = 0;
        spi_dv_pending(starget) = 0;
        spi_dv_in_progress(starget) = 0;
        spi_initial_dv(starget) = 0;
        mutex_init(&spi_dv_mutex(starget));

        return 0;
}

#define spi_transport_show_simple(field, format_string)                        \
                                                                        \
static ssize_t                                                                \
show_spi_transport_##field(struct device *dev,                         \
                           struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct scsi_target *starget = transport_class_to_starget(dev);        \
        struct spi_transport_attrs *tp;                                        \
                                                                        \
        tp = (struct spi_transport_attrs *)&starget->starget_data;        \
        return snprintf(buf, 20, format_string, tp->field);                \
}

#define spi_transport_store_simple(field, format_string)                \
                                                                        \
static ssize_t                                                                \
store_spi_transport_##field(struct device *dev,                         \
                            struct device_attribute *attr,                 \
                            const char *buf, size_t count)                \
{                                                                        \
        int val;                                                        \
        struct scsi_target *starget = transport_class_to_starget(dev);        \
        struct spi_transport_attrs *tp;                                        \
                                                                        \
        tp = (struct spi_transport_attrs *)&starget->starget_data;        \
        val = simple_strtoul(buf, NULL, 0);                                \
        tp->field = val;                                                \
        return count;                                                        \
}

#define spi_transport_show_function(field, format_string)                \
                                                                        \
static ssize_t                                                                \
show_spi_transport_##field(struct device *dev,                         \
                           struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct scsi_target *starget = transport_class_to_starget(dev);        \
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);        \
        struct spi_transport_attrs *tp;                                        \
        struct spi_internal *i = to_spi_internal(shost->transportt);        \
        tp = (struct spi_transport_attrs *)&starget->starget_data;        \
        if (i->f->get_##field)                                                \
                i->f->get_##field(starget);                                \
        return snprintf(buf, 20, format_string, tp->field);                \
}

#define spi_transport_store_function(field, format_string)                \
static ssize_t                                                                \
store_spi_transport_##field(struct device *dev,                         \
                            struct device_attribute *attr,                \
                            const char *buf, size_t count)                \
{                                                                        \
        int val;                                                        \
        struct scsi_target *starget = transport_class_to_starget(dev);        \
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);        \
        struct spi_internal *i = to_spi_internal(shost->transportt);        \
                                                                        \
        if (!i->f->set_##field)                                                \
                return -EINVAL;                                                \
        val = simple_strtoul(buf, NULL, 0);                                \
        i->f->set_##field(starget, val);                                \
        return count;                                                        \
}

#define spi_transport_store_max(field, format_string)                        \
static ssize_t                                                                \
store_spi_transport_##field(struct device *dev,                         \
                            struct device_attribute *attr,                \
                            const char *buf, size_t count)                \
{                                                                        \
        int val;                                                        \
        struct scsi_target *starget = transport_class_to_starget(dev);        \
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);        \
        struct spi_internal *i = to_spi_internal(shost->transportt);        \
        struct spi_transport_attrs *tp                                        \
                = (struct spi_transport_attrs *)&starget->starget_data;        \
                                                                        \
        if (!i->f->set_##field)                                                \
                return -EINVAL;                                                \
        val = simple_strtoul(buf, NULL, 0);                                \
        if (val > tp->max_##field)                                        \
                val = tp->max_##field;                                        \
        i->f->set_##field(starget, val);                                \
        return count;                                                        \
}

#define spi_transport_rd_attr(field, format_string)                        \
        spi_transport_show_function(field, format_string)                \
        spi_transport_store_function(field, format_string)                \
static DEVICE_ATTR(field, S_IRUGO,                                \
                   show_spi_transport_##field,                        \
                   store_spi_transport_##field);

#define spi_transport_simple_attr(field, format_string)                        \
        spi_transport_show_simple(field, format_string)                        \
        spi_transport_store_simple(field, format_string)                \
static DEVICE_ATTR(field, S_IRUGO,                                \
                   show_spi_transport_##field,                        \
                   store_spi_transport_##field);

#define spi_transport_max_attr(field, format_string)                        \
        spi_transport_show_function(field, format_string)                \
        spi_transport_store_max(field, format_string)                        \
        spi_transport_simple_attr(max_##field, format_string)                \
static DEVICE_ATTR(field, S_IRUGO,                                \
                   show_spi_transport_##field,                        \
                   store_spi_transport_##field);

/* The Parallel SCSI Tranport Attributes: */
spi_transport_max_attr(offset, "%d\n");
spi_transport_max_attr(width, "%d\n");
spi_transport_max_attr(iu, "%d\n");
spi_transport_rd_attr(dt, "%d\n");
spi_transport_max_attr(qas, "%d\n");
spi_transport_rd_attr(wr_flow, "%d\n");
spi_transport_rd_attr(rd_strm, "%d\n");
spi_transport_rd_attr(rti, "%d\n");
spi_transport_rd_attr(pcomp_en, "%d\n");
spi_transport_rd_attr(hold_mcs, "%d\n");

/* we only care about the first child device that's a real SCSI device
 * so we return 1 to terminate the iteration when we find it */
static int child_iter(struct device *dev, void *data)
{
        if (!scsi_is_sdev_device(dev))
                return 0;

        spi_dv_device(to_scsi_device(dev));
        return 1;
}

static ssize_t
store_spi_revalidate(struct device *dev, struct device_attribute *attr,
                     const char *buf, size_t count)
{
        struct scsi_target *starget = transport_class_to_starget(dev);

        device_for_each_child(&starget->dev, NULL, child_iter);
        return count;
}
static DEVICE_ATTR(revalidate, S_IWUSR, NULL, store_spi_revalidate);

/* Translate the period into ns according to the current spec
 * for SDTR/PPR messages */
static int period_to_str(char *buf, int period)
{
        int len, picosec;

        if (period < 0 || period > 0xff) {
                picosec = -1;
        } else if (period <= SPI_STATIC_PPR) {
                picosec = ppr_to_ps[period];
        } else {
                picosec = period * 4000;
        }

        if (picosec == -1) {
                len = sprintf(buf, "reserved");
        } else {
                len = sprint_frac(buf, picosec, 1000);
        }

        return len;
}

static ssize_t
show_spi_transport_period_helper(char *buf, int period)
{
        int len = period_to_str(buf, period);
        buf[len++] = '\n';
        buf[len] = '\0';
        return len;
}

static ssize_t
store_spi_transport_period_helper(struct device *dev, const char *buf,
                                  size_t count, int *periodp)
{
        int j, picosec, period = -1;
        char *endp;

        picosec = simple_strtoul(buf, &endp, 10) * 1000;
        if (*endp == '.') {
                int mult = 100;
                do {
                        endp++;
                        if (!isdigit(*endp))
                                break;
                        picosec += (*endp - '0') * mult;
                        mult /= 10;
                } while (mult > 0);
        }

        for (j = 0; j <= SPI_STATIC_PPR; j++) {
                if (ppr_to_ps[j] < picosec)
                        continue;
                period = j;
                break;
        }

        if (period == -1)
                period = picosec / 4000;

        if (period > 0xff)
                period = 0xff;

        *periodp = period;

        return count;
}

static ssize_t
show_spi_transport_period(struct device *dev,
                          struct device_attribute *attr, char *buf)
{
        struct scsi_target *starget = transport_class_to_starget(dev);
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
        struct spi_internal *i = to_spi_internal(shost->transportt);
        struct spi_transport_attrs *tp =
                (struct spi_transport_attrs *)&starget->starget_data;

        if (i->f->get_period)
                i->f->get_period(starget);

        return show_spi_transport_period_helper(buf, tp->period);
}

static ssize_t
store_spi_transport_period(struct device *cdev, struct device_attribute *attr,
                           const char *buf, size_t count)
{
        struct scsi_target *starget = transport_class_to_starget(cdev);
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
        struct spi_internal *i = to_spi_internal(shost->transportt);
        struct spi_transport_attrs *tp =
                (struct spi_transport_attrs *)&starget->starget_data;
        int period, retval;

        if (!i->f->set_period)
                return -EINVAL;

        retval = store_spi_transport_period_helper(cdev, buf, count, &period);

        if (period < tp->min_period)
                period = tp->min_period;

        i->f->set_period(starget, period);

        return retval;
}

static DEVICE_ATTR(period, S_IRUGO,
                   show_spi_transport_period,
                   store_spi_transport_period);

static ssize_t
show_spi_transport_min_period(struct device *cdev,
                              struct device_attribute *attr, char *buf)
{
        struct scsi_target *starget = transport_class_to_starget(cdev);
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
        struct spi_internal *i = to_spi_internal(shost->transportt);
        struct spi_transport_attrs *tp =
                (struct spi_transport_attrs *)&starget->starget_data;

        if (!i->f->set_period)
                return -EINVAL;

        return show_spi_transport_period_helper(buf, tp->min_period);
}

static ssize_t
store_spi_transport_min_period(struct device *cdev,
                               struct device_attribute *attr,
                               const char *buf, size_t count)
{
        struct scsi_target *starget = transport_class_to_starget(cdev);
        struct spi_transport_attrs *tp =
                (struct spi_transport_attrs *)&starget->starget_data;

        return store_spi_transport_period_helper(cdev, buf, count,
                                                 &tp->min_period);
}


static DEVICE_ATTR(min_period, S_IRUGO,
                   show_spi_transport_min_period,
                   store_spi_transport_min_period);


static ssize_t show_spi_host_signalling(struct device *cdev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        struct Scsi_Host *shost = transport_class_to_shost(cdev);
        struct spi_internal *i = to_spi_internal(shost->transportt);

        if (i->f->get_signalling)
                i->f->get_signalling(shost);

        return sprintf(buf, "%s\n", spi_signal_to_string(spi_signalling(shost)));
}
static ssize_t store_spi_host_signalling(struct device *dev,
                                         struct device_attribute *attr,
                                         const char *buf, size_t count)
{
        struct Scsi_Host *shost = transport_class_to_shost(dev);
        struct spi_internal *i = to_spi_internal(shost->transportt);
        enum spi_signal_type type = spi_signal_to_value(buf);

        if (!i->f->set_signalling)
                return -EINVAL;

        if (type != SPI_SIGNAL_UNKNOWN)
                i->f->set_signalling(shost, type);

        return count;
}
static DEVICE_ATTR(signalling, S_IRUGO,
                   show_spi_host_signalling,
                   store_spi_host_signalling);

static ssize_t show_spi_host_width(struct device *cdev,
                                      struct device_attribute *attr,
                                      char *buf)
{
        struct Scsi_Host *shost = transport_class_to_shost(cdev);

        return sprintf(buf, "%s\n", shost->max_id == 16 ? "wide" : "narrow");
}
static DEVICE_ATTR(host_width, S_IRUGO,
                   show_spi_host_width, NULL);

static ssize_t show_spi_host_hba_id(struct device *cdev,
                                    struct device_attribute *attr,
                                    char *buf)
{
        struct Scsi_Host *shost = transport_class_to_shost(cdev);

        return sprintf(buf, "%d\n", shost->this_id);
}
static DEVICE_ATTR(hba_id, S_IRUGO,
                   show_spi_host_hba_id, NULL);

#define DV_SET(x, y)                        \
        if(i->f->set_##x)                \
                i->f->set_##x(sdev->sdev_target, y)

enum spi_compare_returns {
        SPI_COMPARE_SUCCESS,
        SPI_COMPARE_FAILURE,
        SPI_COMPARE_SKIP_TEST,
};


/* This is for read/write Domain Validation:  If the device supports
 * an echo buffer, we do read/write tests to it */
static enum spi_compare_returns
spi_dv_device_echo_buffer(struct scsi_device *sdev, u8 *buffer,
                          u8 *ptr, const int retries)
{
        int len = ptr - buffer;
        int j, k, r, result;
        unsigned int pattern = 0x0000ffff;
        struct scsi_sense_hdr sshdr;

        const char spi_write_buffer[] = {
                WRITE_BUFFER, 0x0a, 0, 0, 0, 0, 0, len >> 8, len & 0xff, 0
        };
        const char spi_read_buffer[] = {
                READ_BUFFER, 0x0a, 0, 0, 0, 0, 0, len >> 8, len & 0xff, 0
        };

        /* set up the pattern buffer.  Doesn't matter if we spill
         * slightly beyond since that's where the read buffer is */
        for (j = 0; j < len; ) {

                /* fill the buffer with counting (test a) */
                for ( ; j < min(len, 32); j++)
                        buffer[j] = j;
                k = j;
                /* fill the buffer with alternating words of 0x0 and
                 * 0xffff (test b) */
                for ( ; j < min(len, k + 32); j += 2) {
                        u16 *word = (u16 *)&buffer[j];
                        
                        *word = (j & 0x02) ? 0x0000 : 0xffff;
                }
                k = j;
                /* fill with crosstalk (alternating 0x5555 0xaaa)
                 * (test c) */
                for ( ; j < min(len, k + 32); j += 2) {
                        u16 *word = (u16 *)&buffer[j];

                        *word = (j & 0x02) ? 0x5555 : 0xaaaa;
                }
                k = j;
                /* fill with shifting bits (test d) */
                for ( ; j < min(len, k + 32); j += 4) {
                        u32 *word = (unsigned int *)&buffer[j];
                        u32 roll = (pattern & 0x80000000) ? 1 : 0;
                        
                        *word = pattern;
                        pattern = (pattern << 1) | roll;
                }
                /* don't bother with random data (test e) */
        }

        for (r = 0; r < retries; r++) {
                result = spi_execute(sdev, spi_write_buffer, REQ_OP_DRV_OUT,
                                     buffer, len, &sshdr);
                if (result || !scsi_device_online(sdev)) {

                        scsi_device_set_state(sdev, SDEV_QUIESCE);
                        if (result > 0 && scsi_sense_valid(&sshdr)
                            && sshdr.sense_key == ILLEGAL_REQUEST
                            /* INVALID FIELD IN CDB */
                            && sshdr.asc == 0x24 && sshdr.ascq == 0x00)
                                /* This would mean that the drive lied
                                 * to us about supporting an echo
                                 * buffer (unfortunately some Western
                                 * Digital drives do precisely this)
                                 */
                                return SPI_COMPARE_SKIP_TEST;


                        sdev_printk(KERN_ERR, sdev, "Write Buffer failure %x\n", result);
                        return SPI_COMPARE_FAILURE;
                }

                memset(ptr, 0, len);
                spi_execute(sdev, spi_read_buffer, REQ_OP_DRV_IN,
                            ptr, len, NULL);
                scsi_device_set_state(sdev, SDEV_QUIESCE);

                if (memcmp(buffer, ptr, len) != 0)
                        return SPI_COMPARE_FAILURE;
        }
        return SPI_COMPARE_SUCCESS;
}

/* This is for the simplest form of Domain Validation: a read test
 * on the inquiry data from the device */
static enum spi_compare_returns
spi_dv_device_compare_inquiry(struct scsi_device *sdev, u8 *buffer,
                              u8 *ptr, const int retries)
{
        int r, result;
        const int len = sdev->inquiry_len;
        const char spi_inquiry[] = {
                INQUIRY, 0, 0, 0, len, 0
        };

        for (r = 0; r < retries; r++) {
                memset(ptr, 0, len);

                result = spi_execute(sdev, spi_inquiry, REQ_OP_DRV_IN,
                                     ptr, len, NULL);
                
                if(result || !scsi_device_online(sdev)) {
                        scsi_device_set_state(sdev, SDEV_QUIESCE);
                        return SPI_COMPARE_FAILURE;
                }

                /* If we don't have the inquiry data already, the
                 * first read gets it */
                if (ptr == buffer) {
                        ptr += len;
                        --r;
                        continue;
                }

                if (memcmp(buffer, ptr, len) != 0)
                        /* failure */
                        return SPI_COMPARE_FAILURE;
        }
        return SPI_COMPARE_SUCCESS;
}

static enum spi_compare_returns
spi_dv_retrain(struct scsi_device *sdev, u8 *buffer, u8 *ptr,
               enum spi_compare_returns 
               (*compare_fn)(struct scsi_device *, u8 *, u8 *, int))
{
        struct spi_internal *i = to_spi_internal(sdev->host->transportt);
        struct scsi_target *starget = sdev->sdev_target;
        int period = 0, prevperiod = 0; 
        enum spi_compare_returns retval;


        for (;;) {
                int newperiod;
                retval = compare_fn(sdev, buffer, ptr, DV_LOOPS);

                if (retval == SPI_COMPARE_SUCCESS
                    || retval == SPI_COMPARE_SKIP_TEST)
                        break;

                /* OK, retrain, fallback */
                if (i->f->get_iu)
                        i->f->get_iu(starget);
                if (i->f->get_qas)
                        i->f->get_qas(starget);
                if (i->f->get_period)
                        i->f->get_period(sdev->sdev_target);

                /* Here's the fallback sequence; first try turning off
                 * IU, then QAS (if we can control them), then finally
                 * fall down the periods */
                if (i->f->set_iu && spi_iu(starget)) {
                        starget_printk(KERN_ERR, starget, "Domain Validation Disabling Information Units\n");
                        DV_SET(iu, 0);
                } else if (i->f->set_qas && spi_qas(starget)) {
                        starget_printk(KERN_ERR, starget, "Domain Validation Disabling Quick Arbitration and Selection\n");
                        DV_SET(qas, 0);
                } else {
                        newperiod = spi_period(starget);
                        period = newperiod > period ? newperiod : period;
                        if (period < 0x0d)
                                period++;
                        else
                                period += period >> 1;

                        if (unlikely(period > 0xff || period == prevperiod)) {
                                /* Total failure; set to async and return */
                                starget_printk(KERN_ERR, starget, "Domain Validation Failure, dropping back to Asynchronous\n");
                                DV_SET(offset, 0);
                                return SPI_COMPARE_FAILURE;
                        }
                        starget_printk(KERN_ERR, starget, "Domain Validation detected failure, dropping back\n");
                        DV_SET(period, period);
                        prevperiod = period;
                }
        }
        return retval;
}

static int
spi_dv_device_get_echo_buffer(struct scsi_device *sdev, u8 *buffer)
{
        int l, result;

        /* first off do a test unit ready.  This can error out 
         * because of reservations or some other reason.  If it
         * fails, the device won't let us write to the echo buffer
         * so just return failure */
        
        static const char spi_test_unit_ready[] = {
                TEST_UNIT_READY, 0, 0, 0, 0, 0
        };

        static const char spi_read_buffer_descriptor[] = {
                READ_BUFFER, 0x0b, 0, 0, 0, 0, 0, 0, 4, 0
        };

        
        /* We send a set of three TURs to clear any outstanding 
         * unit attention conditions if they exist (Otherwise the
         * buffer tests won't be happy).  If the TUR still fails
         * (reservation conflict, device not ready, etc) just
         * skip the write tests */
        for (l = 0; ; l++) {
                result = spi_execute(sdev, spi_test_unit_ready, REQ_OP_DRV_IN,
                                     NULL, 0, NULL);

                if(result) {
                        if(l >= 3)
                                return 0;
                } else {
                        /* TUR succeeded */
                        break;
                }
        }

        result = spi_execute(sdev, spi_read_buffer_descriptor, 
                             REQ_OP_DRV_IN, buffer, 4, NULL);

        if (result)
                /* Device has no echo buffer */
                return 0;

        return buffer[3] + ((buffer[2] & 0x1f) << 8);
}

static void
spi_dv_device_internal(struct scsi_device *sdev, u8 *buffer)
{
        struct spi_internal *i = to_spi_internal(sdev->host->transportt);
        struct scsi_target *starget = sdev->sdev_target;
        struct Scsi_Host *shost = sdev->host;
        int len = sdev->inquiry_len;
        int min_period = spi_min_period(starget);
        int max_width = spi_max_width(starget);
        /* first set us up for narrow async */
        DV_SET(offset, 0);
        DV_SET(width, 0);

        if (spi_dv_device_compare_inquiry(sdev, buffer, buffer, DV_LOOPS)
            != SPI_COMPARE_SUCCESS) {
                starget_printk(KERN_ERR, starget, "Domain Validation Initial Inquiry Failed\n");
                /* FIXME: should probably offline the device here? */
                return;
        }

        if (!spi_support_wide(starget)) {
                spi_max_width(starget) = 0;
                max_width = 0;
        }

        /* test width */
        if (i->f->set_width && max_width) {
                i->f->set_width(starget, 1);

                if (spi_dv_device_compare_inquiry(sdev, buffer,
                                                   buffer + len,
                                                   DV_LOOPS)
                    != SPI_COMPARE_SUCCESS) {
                        starget_printk(KERN_ERR, starget, "Wide Transfers Fail\n");
                        i->f->set_width(starget, 0);
                        /* Make sure we don't force wide back on by asking
                         * for a transfer period that requires it */
                        max_width = 0;
                        if (min_period < 10)
                                min_period = 10;
                }
        }

        if (!i->f->set_period)
                return;

        /* device can't handle synchronous */
        if (!spi_support_sync(starget) && !spi_support_dt(starget))
                return;

        /* len == -1 is the signal that we need to ascertain the
         * presence of an echo buffer before trying to use it.  len ==
         * 0 means we don't have an echo buffer */
        len = -1;

 retry:

        /* now set up to the maximum */
        DV_SET(offset, spi_max_offset(starget));
        DV_SET(period, min_period);

        /* try QAS requests; this should be harmless to set if the
         * target supports it */
        if (spi_support_qas(starget) && spi_max_qas(starget)) {
                DV_SET(qas, 1);
        } else {
                DV_SET(qas, 0);
        }

        if (spi_support_ius(starget) && spi_max_iu(starget) &&
            min_period < 9) {
                /* This u320 (or u640). Set IU transfers */
                DV_SET(iu, 1);
                /* Then set the optional parameters */
                DV_SET(rd_strm, 1);
                DV_SET(wr_flow, 1);
                DV_SET(rti, 1);
                if (min_period == 8)
                        DV_SET(pcomp_en, 1);
        } else {
                DV_SET(iu, 0);
        }

        /* now that we've done all this, actually check the bus
         * signal type (if known).  Some devices are stupid on
         * a SE bus and still claim they can try LVD only settings */
        if (i->f->get_signalling)
                i->f->get_signalling(shost);
        if (spi_signalling(shost) == SPI_SIGNAL_SE ||
            spi_signalling(shost) == SPI_SIGNAL_HVD ||
            !spi_support_dt(starget)) {
                DV_SET(dt, 0);
        } else {
                DV_SET(dt, 1);
        }
        /* set width last because it will pull all the other
         * parameters down to required values */
        DV_SET(width, max_width);

        /* Do the read only INQUIRY tests */
        spi_dv_retrain(sdev, buffer, buffer + sdev->inquiry_len,
                       spi_dv_device_compare_inquiry);
        /* See if we actually managed to negotiate and sustain DT */
        if (i->f->get_dt)
                i->f->get_dt(starget);

        /* see if the device has an echo buffer.  If it does we can do
         * the SPI pattern write tests.  Because of some broken
         * devices, we *only* try this on a device that has actually
         * negotiated DT */

        if (len == -1 && spi_dt(starget))
                len = spi_dv_device_get_echo_buffer(sdev, buffer);

        if (len <= 0) {
                starget_printk(KERN_INFO, starget, "Domain Validation skipping write tests\n");
                return;
        }

        if (len > SPI_MAX_ECHO_BUFFER_SIZE) {
                starget_printk(KERN_WARNING, starget, "Echo buffer size %d is too big, trimming to %d\n", len, SPI_MAX_ECHO_BUFFER_SIZE);
                len = SPI_MAX_ECHO_BUFFER_SIZE;
        }

        if (spi_dv_retrain(sdev, buffer, buffer + len,
                           spi_dv_device_echo_buffer)
            == SPI_COMPARE_SKIP_TEST) {
                /* OK, the stupid drive can't do a write echo buffer
                 * test after all, fall back to the read tests */
                len = 0;
                goto retry;
        }
}


/**        spi_dv_device - Do Domain Validation on the device
 *        @sdev:                scsi device to validate
 *
 *        Performs the domain validation on the given device in the
 *        current execution thread.  Since DV operations may sleep,
 *        the current thread must have user context.  Also no SCSI
 *        related locks that would deadlock I/O issued by the DV may
 *        be held.
 */
void
spi_dv_device(struct scsi_device *sdev)
{
        struct scsi_target *starget = sdev->sdev_target;
        const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
        unsigned int sleep_flags;
        u8 *buffer;

        /*
         * Because this function and the power management code both call
         * scsi_device_quiesce(), it is not safe to perform domain validation
         * while suspend or resume is in progress. Hence the
         * lock/unlock_system_sleep() calls.
         */
        sleep_flags = lock_system_sleep();

        if (scsi_autopm_get_device(sdev))
                goto unlock_system_sleep;

        if (unlikely(spi_dv_in_progress(starget)))
                goto put_autopm;

        if (unlikely(scsi_device_get(sdev)))
                goto put_autopm;

        spi_dv_in_progress(starget) = 1;

        buffer = kzalloc(len, GFP_KERNEL);

        if (unlikely(!buffer))
                goto put_sdev;

        /* We need to verify that the actual device will quiesce; the
         * later target quiesce is just a nice to have */
        if (unlikely(scsi_device_quiesce(sdev)))
                goto free_buffer;

        scsi_target_quiesce(starget);

        spi_dv_pending(starget) = 1;
        mutex_lock(&spi_dv_mutex(starget));

        starget_printk(KERN_INFO, starget, "Beginning Domain Validation\n");

        spi_dv_device_internal(sdev, buffer);

        starget_printk(KERN_INFO, starget, "Ending Domain Validation\n");

        mutex_unlock(&spi_dv_mutex(starget));
        spi_dv_pending(starget) = 0;

        scsi_target_resume(starget);

        spi_initial_dv(starget) = 1;

free_buffer:
        kfree(buffer);

put_sdev:
        spi_dv_in_progress(starget) = 0;
        scsi_device_put(sdev);
put_autopm:
        scsi_autopm_put_device(sdev);

unlock_system_sleep:
        unlock_system_sleep(sleep_flags);
}
EXPORT_SYMBOL(spi_dv_device);

struct work_queue_wrapper {
        struct work_struct        work;
        struct scsi_device        *sdev;
};

static void
spi_dv_device_work_wrapper(struct work_struct *work)
{
        struct work_queue_wrapper *wqw =
                container_of(work, struct work_queue_wrapper, work);
        struct scsi_device *sdev = wqw->sdev;

        kfree(wqw);
        spi_dv_device(sdev);
        spi_dv_pending(sdev->sdev_target) = 0;
        scsi_device_put(sdev);
}


/**
 *        spi_schedule_dv_device - schedule domain validation to occur on the device
 *        @sdev:        The device to validate
 *
 *        Identical to spi_dv_device() above, except that the DV will be
 *        scheduled to occur in a workqueue later.  All memory allocations
 *        are atomic, so may be called from any context including those holding
 *        SCSI locks.
 */
void
spi_schedule_dv_device(struct scsi_device *sdev)
{
        struct work_queue_wrapper *wqw =
                kmalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC);

        if (unlikely(!wqw))
                return;

        if (unlikely(spi_dv_pending(sdev->sdev_target))) {
                kfree(wqw);
                return;
        }
        /* Set pending early (dv_device doesn't check it, only sets it) */
        spi_dv_pending(sdev->sdev_target) = 1;
        if (unlikely(scsi_device_get(sdev))) {
                kfree(wqw);
                spi_dv_pending(sdev->sdev_target) = 0;
                return;
        }

        INIT_WORK(&wqw->work, spi_dv_device_work_wrapper);
        wqw->sdev = sdev;

        schedule_work(&wqw->work);
}
EXPORT_SYMBOL(spi_schedule_dv_device);

/**
 * spi_display_xfer_agreement - Print the current target transfer agreement
 * @starget: The target for which to display the agreement
 *
 * Each SPI port is required to maintain a transfer agreement for each
 * other port on the bus.  This function prints a one-line summary of
 * the current agreement; more detailed information is available in sysfs.
 */
void spi_display_xfer_agreement(struct scsi_target *starget)
{
        struct spi_transport_attrs *tp;
        tp = (struct spi_transport_attrs *)&starget->starget_data;

        if (tp->offset > 0 && tp->period > 0) {
                unsigned int picosec, kb100;
                char *scsi = "FAST-?";
                char tmp[8];

                if (tp->period <= SPI_STATIC_PPR) {
                        picosec = ppr_to_ps[tp->period];
                        switch (tp->period) {
                                case  7: scsi = "FAST-320"; break;
                                case  8: scsi = "FAST-160"; break;
                                case  9: scsi = "FAST-80"; break;
                                case 10:
                                case 11: scsi = "FAST-40"; break;
                                case 12: scsi = "FAST-20"; break;
                        }
                } else {
                        picosec = tp->period * 4000;
                        if (tp->period < 25)
                                scsi = "FAST-20";
                        else if (tp->period < 50)
                                scsi = "FAST-10";
                        else
                                scsi = "FAST-5";
                }

                kb100 = (10000000 + picosec / 2) / picosec;
                if (tp->width)
                        kb100 *= 2;
                sprint_frac(tmp, picosec, 1000);

                dev_info(&starget->dev,
                         "%s %sSCSI %d.%d MB/s %s%s%s%s%s%s%s%s (%s ns, offset %d)\n",
                         scsi, tp->width ? "WIDE " : "", kb100/10, kb100 % 10,
                         tp->dt ? "DT" : "ST",
                         tp->iu ? " IU" : "",
                         tp->qas  ? " QAS" : "",
                         tp->rd_strm ? " RDSTRM" : "",
                         tp->rti ? " RTI" : "",
                         tp->wr_flow ? " WRFLOW" : "",
                         tp->pcomp_en ? " PCOMP" : "",
                         tp->hold_mcs ? " HMCS" : "",
                         tmp, tp->offset);
        } else {
                dev_info(&starget->dev, "%sasynchronous\n",
                                tp->width ? "wide " : "");
        }
}
EXPORT_SYMBOL(spi_display_xfer_agreement);

int spi_populate_width_msg(unsigned char *msg, int width)
{
        msg[0] = EXTENDED_MESSAGE;
        msg[1] = 2;
        msg[2] = EXTENDED_WDTR;
        msg[3] = width;
        return 4;
}
EXPORT_SYMBOL_GPL(spi_populate_width_msg);

int spi_populate_sync_msg(unsigned char *msg, int period, int offset)
{
        msg[0] = EXTENDED_MESSAGE;
        msg[1] = 3;
        msg[2] = EXTENDED_SDTR;
        msg[3] = period;
        msg[4] = offset;
        return 5;
}
EXPORT_SYMBOL_GPL(spi_populate_sync_msg);

int spi_populate_ppr_msg(unsigned char *msg, int period, int offset,
                int width, int options)
{
        msg[0] = EXTENDED_MESSAGE;
        msg[1] = 6;
        msg[2] = EXTENDED_PPR;
        msg[3] = period;
        msg[4] = 0;
        msg[5] = offset;
        msg[6] = width;
        msg[7] = options;
        return 8;
}
EXPORT_SYMBOL_GPL(spi_populate_ppr_msg);

/**
 * spi_populate_tag_msg - place a tag message in a buffer
 * @msg:        pointer to the area to place the tag
 * @cmd:        pointer to the scsi command for the tag
 *
 * Notes:
 *        designed to create the correct type of tag message for the 
 *        particular request.  Returns the size of the tag message.
 *        May return 0 if TCQ is disabled for this device.
 **/
int spi_populate_tag_msg(unsigned char *msg, struct scsi_cmnd *cmd)
{
        if (cmd->flags & SCMD_TAGGED) {
                *msg++ = SIMPLE_QUEUE_TAG;
                *msg++ = scsi_cmd_to_rq(cmd)->tag;
                return 2;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(spi_populate_tag_msg);

#ifdef CONFIG_SCSI_CONSTANTS
static const char * const one_byte_msgs[] = {
/* 0x00 */ "Task Complete", NULL /* Extended Message */, "Save Pointers",
/* 0x03 */ "Restore Pointers", "Disconnect", "Initiator Error", 
/* 0x06 */ "Abort Task Set", "Message Reject", "Nop", "Message Parity Error",
/* 0x0a */ "Linked Command Complete", "Linked Command Complete w/flag",
/* 0x0c */ "Target Reset", "Abort Task", "Clear Task Set", 
/* 0x0f */ "Initiate Recovery", "Release Recovery",
/* 0x11 */ "Terminate Process", "Continue Task", "Target Transfer Disable",
/* 0x14 */ NULL, NULL, "Clear ACA", "LUN Reset"
};

static const char * const two_byte_msgs[] = {
/* 0x20 */ "Simple Queue Tag", "Head of Queue Tag", "Ordered Queue Tag",
/* 0x23 */ "Ignore Wide Residue", "ACA"
};

static const char * const extended_msgs[] = {
/* 0x00 */ "Modify Data Pointer", "Synchronous Data Transfer Request",
/* 0x02 */ "SCSI-I Extended Identify", "Wide Data Transfer Request",
/* 0x04 */ "Parallel Protocol Request", "Modify Bidirectional Data Pointer"
};

static void print_nego(const unsigned char *msg, int per, int off, int width)
{
        if (per) {
                char buf[20];
                period_to_str(buf, msg[per]);
                printk("period = %s ns ", buf);
        }

        if (off)
                printk("offset = %d ", msg[off]);
        if (width)
                printk("width = %d ", 8 << msg[width]);
}

static void print_ptr(const unsigned char *msg, int msb, const char *desc)
{
        int ptr = (msg[msb] << 24) | (msg[msb+1] << 16) | (msg[msb+2] << 8) |
                        msg[msb+3];
        printk("%s = %d ", desc, ptr);
}

int spi_print_msg(const unsigned char *msg)
{
        int len = 1, i;
        if (msg[0] == EXTENDED_MESSAGE) {
                len = 2 + msg[1];
                if (len == 2)
                        len += 256;
                if (msg[2] < ARRAY_SIZE(extended_msgs))
                        printk ("%s ", extended_msgs[msg[2]]); 
                else 
                        printk ("Extended Message, reserved code (0x%02x) ",
                                (int) msg[2]);
                switch (msg[2]) {
                case EXTENDED_MODIFY_DATA_POINTER:
                        print_ptr(msg, 3, "pointer");
                        break;
                case EXTENDED_SDTR:
                        print_nego(msg, 3, 4, 0);
                        break;
                case EXTENDED_WDTR:
                        print_nego(msg, 0, 0, 3);
                        break;
                case EXTENDED_PPR:
                        print_nego(msg, 3, 5, 6);
                        break;
                case EXTENDED_MODIFY_BIDI_DATA_PTR:
                        print_ptr(msg, 3, "out");
                        print_ptr(msg, 7, "in");
                        break;
                default:
                for (i = 2; i < len; ++i) 
                        printk("%02x ", msg[i]);
                }
        /* Identify */
        } else if (msg[0] & 0x80) {
                printk("Identify disconnect %sallowed %s %d ",
                        (msg[0] & 0x40) ? "" : "not ",
                        (msg[0] & 0x20) ? "target routine" : "lun",
                        msg[0] & 0x7);
        /* Normal One byte */
        } else if (msg[0] < 0x1f) {
                if (msg[0] < ARRAY_SIZE(one_byte_msgs) && one_byte_msgs[msg[0]])
                        printk("%s ", one_byte_msgs[msg[0]]);
                else
                        printk("reserved (%02x) ", msg[0]);
        } else if (msg[0] == 0x55) {
                printk("QAS Request ");
        /* Two byte */
        } else if (msg[0] <= 0x2f) {
                if ((msg[0] - 0x20) < ARRAY_SIZE(two_byte_msgs))
                        printk("%s %02x ", two_byte_msgs[msg[0] - 0x20], 
                                msg[1]);
                else 
                        printk("reserved two byte (%02x %02x) ", 
                                msg[0], msg[1]);
                len = 2;
        } else 
                printk("reserved ");
        return len;
}
EXPORT_SYMBOL(spi_print_msg);

#else  /* ifndef CONFIG_SCSI_CONSTANTS */

int spi_print_msg(const unsigned char *msg)
{
        int len = 1, i;

        if (msg[0] == EXTENDED_MESSAGE) {
                len = 2 + msg[1];
                if (len == 2)
                        len += 256;
                for (i = 0; i < len; ++i)
                        printk("%02x ", msg[i]);
        /* Identify */
        } else if (msg[0] & 0x80) {
                printk("%02x ", msg[0]);
        /* Normal One byte */
        } else if ((msg[0] < 0x1f) || (msg[0] == 0x55)) {
                printk("%02x ", msg[0]);
        /* Two byte */
        } else if (msg[0] <= 0x2f) {
                printk("%02x %02x", msg[0], msg[1]);
                len = 2;
        } else 
                printk("%02x ", msg[0]);
        return len;
}
EXPORT_SYMBOL(spi_print_msg);
#endif /* ! CONFIG_SCSI_CONSTANTS */

static int spi_device_match(struct attribute_container *cont,
                            struct device *dev)
{
        struct scsi_device *sdev;
        struct Scsi_Host *shost;
        struct spi_internal *i;

        if (!scsi_is_sdev_device(dev))
                return 0;

        sdev = to_scsi_device(dev);
        shost = sdev->host;
        if (!shost->transportt  || shost->transportt->host_attrs.ac.class
            != &spi_host_class.class)
                return 0;
        /* Note: this class has no device attributes, so it has
         * no per-HBA allocation and thus we don't need to distinguish
         * the attribute containers for the device */
        i = to_spi_internal(shost->transportt);
        if (i->f->deny_binding && i->f->deny_binding(sdev->sdev_target))
                return 0;
        return 1;
}

static int spi_target_match(struct attribute_container *cont,
                            struct device *dev)
{
        struct Scsi_Host *shost;
        struct scsi_target *starget;
        struct spi_internal *i;

        if (!scsi_is_target_device(dev))
                return 0;

        shost = dev_to_shost(dev->parent);
        if (!shost->transportt  || shost->transportt->host_attrs.ac.class
            != &spi_host_class.class)
                return 0;

        i = to_spi_internal(shost->transportt);
        starget = to_scsi_target(dev);

        if (i->f->deny_binding && i->f->deny_binding(starget))
                return 0;

        return &i->t.target_attrs.ac == cont;
}

static DECLARE_TRANSPORT_CLASS(spi_transport_class,
                               "spi_transport",
                               spi_setup_transport_attrs,
                               NULL,
                               spi_target_configure);

static DECLARE_ANON_TRANSPORT_CLASS(spi_device_class,
                                    spi_device_match,
                                    spi_device_configure);

static struct attribute *host_attributes[] = {
        &dev_attr_signalling.attr,
        &dev_attr_host_width.attr,
        &dev_attr_hba_id.attr,
        NULL
};

static struct attribute_group host_attribute_group = {
        .attrs = host_attributes,
};

static int spi_host_configure(struct transport_container *tc,
                              struct device *dev,
                              struct device *cdev)
{
        struct kobject *kobj = &cdev->kobj;
        struct Scsi_Host *shost = transport_class_to_shost(cdev);
        struct spi_internal *si = to_spi_internal(shost->transportt);
        struct attribute *attr = &dev_attr_signalling.attr;
        int rc = 0;

        if (si->f->set_signalling)
                rc = sysfs_chmod_file(kobj, attr, attr->mode | S_IWUSR);

        return rc;
}

/* returns true if we should be showing the variable.  Also
 * overloads the return by setting 1<<1 if the attribute should
 * be writeable */
#define TARGET_ATTRIBUTE_HELPER(name) \
        (si->f->show_##name ? S_IRUGO : 0) | \
        (si->f->set_##name ? S_IWUSR : 0)

static umode_t target_attribute_is_visible(struct kobject *kobj,
                                          struct attribute *attr, int i)
{
        struct device *cdev = container_of(kobj, struct device, kobj);
        struct scsi_target *starget = transport_class_to_starget(cdev);
        struct Scsi_Host *shost = transport_class_to_shost(cdev);
        struct spi_internal *si = to_spi_internal(shost->transportt);

        if (attr == &dev_attr_period.attr &&
            spi_support_sync(starget))
                return TARGET_ATTRIBUTE_HELPER(period);
        else if (attr == &dev_attr_min_period.attr &&
                 spi_support_sync(starget))
                return TARGET_ATTRIBUTE_HELPER(period);
        else if (attr == &dev_attr_offset.attr &&
                 spi_support_sync(starget))
                return TARGET_ATTRIBUTE_HELPER(offset);
        else if (attr == &dev_attr_max_offset.attr &&
                 spi_support_sync(starget))
                return TARGET_ATTRIBUTE_HELPER(offset);
        else if (attr == &dev_attr_width.attr &&
                 spi_support_wide(starget))
                return TARGET_ATTRIBUTE_HELPER(width);
        else if (attr == &dev_attr_max_width.attr &&
                 spi_support_wide(starget))
                return TARGET_ATTRIBUTE_HELPER(width);
        else if (attr == &dev_attr_iu.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(iu);
        else if (attr == &dev_attr_max_iu.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(iu);
        else if (attr == &dev_attr_dt.attr &&
                 spi_support_dt(starget))
                return TARGET_ATTRIBUTE_HELPER(dt);
        else if (attr == &dev_attr_qas.attr &&
                 spi_support_qas(starget))
                return TARGET_ATTRIBUTE_HELPER(qas);
        else if (attr == &dev_attr_max_qas.attr &&
                 spi_support_qas(starget))
                return TARGET_ATTRIBUTE_HELPER(qas);
        else if (attr == &dev_attr_wr_flow.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(wr_flow);
        else if (attr == &dev_attr_rd_strm.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(rd_strm);
        else if (attr == &dev_attr_rti.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(rti);
        else if (attr == &dev_attr_pcomp_en.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(pcomp_en);
        else if (attr == &dev_attr_hold_mcs.attr &&
                 spi_support_ius(starget))
                return TARGET_ATTRIBUTE_HELPER(hold_mcs);
        else if (attr == &dev_attr_revalidate.attr)
                return S_IWUSR;

        return 0;
}

static struct attribute *target_attributes[] = {
        &dev_attr_period.attr,
        &dev_attr_min_period.attr,
        &dev_attr_offset.attr,
        &dev_attr_max_offset.attr,
        &dev_attr_width.attr,
        &dev_attr_max_width.attr,
        &dev_attr_iu.attr,
        &dev_attr_max_iu.attr,
        &dev_attr_dt.attr,
        &dev_attr_qas.attr,
        &dev_attr_max_qas.attr,
        &dev_attr_wr_flow.attr,
        &dev_attr_rd_strm.attr,
        &dev_attr_rti.attr,
        &dev_attr_pcomp_en.attr,
        &dev_attr_hold_mcs.attr,
        &dev_attr_revalidate.attr,
        NULL
};

static struct attribute_group target_attribute_group = {
        .attrs = target_attributes,
        .is_visible = target_attribute_is_visible,
};

static int spi_target_configure(struct transport_container *tc,
                                struct device *dev,
                                struct device *cdev)
{
        struct kobject *kobj = &cdev->kobj;

        /* force an update based on parameters read from the device */
        sysfs_update_group(kobj, &target_attribute_group);

        return 0;
}

struct scsi_transport_template *
spi_attach_transport(struct spi_function_template *ft)
{
        struct spi_internal *i = kzalloc(sizeof(struct spi_internal),
                                         GFP_KERNEL);

        if (unlikely(!i))
                return NULL;

        i->t.target_attrs.ac.class = &spi_transport_class.class;
        i->t.target_attrs.ac.grp = &target_attribute_group;
        i->t.target_attrs.ac.match = spi_target_match;
        transport_container_register(&i->t.target_attrs);
        i->t.target_size = sizeof(struct spi_transport_attrs);
        i->t.host_attrs.ac.class = &spi_host_class.class;
        i->t.host_attrs.ac.grp = &host_attribute_group;
        i->t.host_attrs.ac.match = spi_host_match;
        transport_container_register(&i->t.host_attrs);
        i->t.host_size = sizeof(struct spi_host_attrs);
        i->f = ft;

        return &i->t;
}
EXPORT_SYMBOL(spi_attach_transport);

void spi_release_transport(struct scsi_transport_template *t)
{
        struct spi_internal *i = to_spi_internal(t);

        transport_container_unregister(&i->t.target_attrs);
        transport_container_unregister(&i->t.host_attrs);

        kfree(i);
}
EXPORT_SYMBOL(spi_release_transport);

static __init int spi_transport_init(void)
{
        int error = scsi_dev_info_add_list(SCSI_DEVINFO_SPI,
                                           "SCSI Parallel Transport Class");
        if (!error) {
                int i;

                for (i = 0; spi_static_device_list[i].vendor; i++)
                        scsi_dev_info_list_add_keyed(1,        /* compatible */
                                                     spi_static_device_list[i].vendor,
                                                     spi_static_device_list[i].model,
                                                     NULL,
                                                     spi_static_device_list[i].flags,
                                                     SCSI_DEVINFO_SPI);
        }

        error = transport_class_register(&spi_transport_class);
        if (error)
                return error;
        error = anon_transport_class_register(&spi_device_class);
        return transport_class_register(&spi_host_class);
}

static void __exit spi_transport_exit(void)
{
        transport_class_unregister(&spi_transport_class);
        anon_transport_class_unregister(&spi_device_class);
        transport_class_unregister(&spi_host_class);
        scsi_dev_info_remove_list(SCSI_DEVINFO_SPI);
}

MODULE_AUTHOR("Martin Hicks");
MODULE_DESCRIPTION("SPI Transport Attributes");
MODULE_LICENSE("GPL");

module_init(spi_transport_init);
module_exit(spi_transport_exit);













































































































































































































































































































































































































   12 








   12 

   12 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/percpu-vm.c - vmalloc area based chunk allocation
 *
 * Copyright (C) 2010                SUSE Linux Products GmbH
 * Copyright (C) 2010                Tejun Heo <tj@kernel.org>
 *
 * Chunks are mapped into vmalloc areas and populated page by page.
 * This is the default chunk allocator.
 */
#include "internal.h"

static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
                                    unsigned int cpu, int page_idx)
{
        /* must not be used on pre-mapped chunk */
        WARN_ON(chunk->immutable);

        return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
}

/**
 * pcpu_get_pages - get temp pages array
 *
 * Returns pointer to array of pointers to struct page which can be indexed
 * with pcpu_page_idx().  Note that there is only one array and accesses
 * should be serialized by pcpu_alloc_mutex.
 *
 * RETURNS:
 * Pointer to temp pages array on success.
 */
static struct page **pcpu_get_pages(void)
{
        static struct page **pages;
        size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);

        lockdep_assert_held(&pcpu_alloc_mutex);

        if (!pages)
                pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
        return pages;
}

/**
 * pcpu_free_pages - free pages which were allocated for @chunk
 * @chunk: chunk pages were allocated for
 * @pages: array of pages to be freed, indexed by pcpu_page_idx()
 * @page_start: page index of the first page to be freed
 * @page_end: page index of the last page to be freed + 1
 *
 * Free pages [@page_start and @page_end) in @pages for all units.
 * The pages were allocated for @chunk.
 */
static void pcpu_free_pages(struct pcpu_chunk *chunk,
                            struct page **pages, int page_start, int page_end)
{
        unsigned int cpu;
        int i;

        for_each_possible_cpu(cpu) {
                for (i = page_start; i < page_end; i++) {
                        struct page *page = pages[pcpu_page_idx(cpu, i)];

                        if (page)
                                __free_page(page);
                }
        }
}

/**
 * pcpu_alloc_pages - allocates pages for @chunk
 * @chunk: target chunk
 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
 * @page_start: page index of the first page to be allocated
 * @page_end: page index of the last page to be allocated + 1
 * @gfp: allocation flags passed to the underlying allocator
 *
 * Allocate pages [@page_start,@page_end) into @pages for all units.
 * The allocation is for @chunk.  Percpu core doesn't care about the
 * content of @pages and will pass it verbatim to pcpu_map_pages().
 */
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
                            struct page **pages, int page_start, int page_end,
                            gfp_t gfp)
{
        unsigned int cpu, tcpu;
        int i;

        gfp |= __GFP_HIGHMEM;

        for_each_possible_cpu(cpu) {
                for (i = page_start; i < page_end; i++) {
                        struct page **pagep = &pages[pcpu_page_idx(cpu, i)];

                        *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
                        if (!*pagep)
                                goto err;
                }
        }
        return 0;

err:
        while (--i >= page_start)
                __free_page(pages[pcpu_page_idx(cpu, i)]);

        for_each_possible_cpu(tcpu) {
                if (tcpu == cpu)
                        break;
                for (i = page_start; i < page_end; i++)
                        __free_page(pages[pcpu_page_idx(tcpu, i)]);
        }
        return -ENOMEM;
}

/**
 * pcpu_pre_unmap_flush - flush cache prior to unmapping
 * @chunk: chunk the regions to be flushed belongs to
 * @page_start: page index of the first page to be flushed
 * @page_end: page index of the last page to be flushed + 1
 *
 * Pages in [@page_start,@page_end) of @chunk are about to be
 * unmapped.  Flush cache.  As each flushing trial can be very
 * expensive, issue flush on the whole region at once rather than
 * doing it for each cpu.  This could be an overkill but is more
 * scalable.
 */
static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
                                 int page_start, int page_end)
{
        flush_cache_vunmap(
                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
}

static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
{
        vunmap_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT));
}

/**
 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
 * @chunk: chunk of interest
 * @pages: pages array which can be used to pass information to free
 * @page_start: page index of the first page to unmap
 * @page_end: page index of the last page to unmap + 1
 *
 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
 * Corresponding elements in @pages were cleared by the caller and can
 * be used to carry information to pcpu_free_pages() which will be
 * called after all unmaps are finished.  The caller should call
 * proper pre/post flush functions.
 */
static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
                             struct page **pages, int page_start, int page_end)
{
        unsigned int cpu;
        int i;

        for_each_possible_cpu(cpu) {
                for (i = page_start; i < page_end; i++) {
                        struct page *page;

                        page = pcpu_chunk_page(chunk, cpu, i);
                        WARN_ON(!page);
                        pages[pcpu_page_idx(cpu, i)] = page;
                }
                __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
                                   page_end - page_start);
        }
}

/**
 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
 * @chunk: pcpu_chunk the regions to be flushed belong to
 * @page_start: page index of the first page to be flushed
 * @page_end: page index of the last page to be flushed + 1
 *
 * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
 * TLB for the regions.  This can be skipped if the area is to be
 * returned to vmalloc as vmalloc will handle TLB flushing lazily.
 *
 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
 * for the whole region.
 */
static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
                                      int page_start, int page_end)
{
        flush_tlb_kernel_range(
                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
}

static int __pcpu_map_pages(unsigned long addr, struct page **pages,
                            int nr_pages)
{
        return vmap_pages_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT),
                                        PAGE_KERNEL, pages, PAGE_SHIFT);
}

/**
 * pcpu_map_pages - map pages into a pcpu_chunk
 * @chunk: chunk of interest
 * @pages: pages array containing pages to be mapped
 * @page_start: page index of the first page to map
 * @page_end: page index of the last page to map + 1
 *
 * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
 * caller is responsible for calling pcpu_post_map_flush() after all
 * mappings are complete.
 *
 * This function is responsible for setting up whatever is necessary for
 * reverse lookup (addr -> chunk).
 */
static int pcpu_map_pages(struct pcpu_chunk *chunk,
                          struct page **pages, int page_start, int page_end)
{
        unsigned int cpu, tcpu;
        int i, err;

        for_each_possible_cpu(cpu) {
                err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
                                       &pages[pcpu_page_idx(cpu, page_start)],
                                       page_end - page_start);
                if (err < 0)
                        goto err;

                for (i = page_start; i < page_end; i++)
                        pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
                                            chunk);
        }
        return 0;
err:
        for_each_possible_cpu(tcpu) {
                if (tcpu == cpu)
                        break;
                __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
                                   page_end - page_start);
        }
        pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
        return err;
}

/**
 * pcpu_post_map_flush - flush cache after mapping
 * @chunk: pcpu_chunk the regions to be flushed belong to
 * @page_start: page index of the first page to be flushed
 * @page_end: page index of the last page to be flushed + 1
 *
 * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
 * cache.
 *
 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
 * for the whole region.
 */
static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
                                int page_start, int page_end)
{
        flush_cache_vmap(
                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
}

/**
 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
 * @chunk: chunk of interest
 * @page_start: the start page
 * @page_end: the end page
 * @gfp: allocation flags passed to the underlying memory allocator
 *
 * For each cpu, populate and map pages [@page_start,@page_end) into
 * @chunk.
 *
 * CONTEXT:
 * pcpu_alloc_mutex, does GFP_KERNEL allocation.
 */
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
                               int page_start, int page_end, gfp_t gfp)
{
        struct page **pages;

        pages = pcpu_get_pages();
        if (!pages)
                return -ENOMEM;

        if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
                return -ENOMEM;

        if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
                pcpu_free_pages(chunk, pages, page_start, page_end);
                return -ENOMEM;
        }
        pcpu_post_map_flush(chunk, page_start, page_end);

        return 0;
}

/**
 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
 * @chunk: chunk to depopulate
 * @page_start: the start page
 * @page_end: the end page
 *
 * For each cpu, depopulate and unmap pages [@page_start,@page_end)
 * from @chunk.
 *
 * Caller is required to call pcpu_post_unmap_tlb_flush() if not returning the
 * region back to vmalloc() which will lazily flush the tlb.
 *
 * CONTEXT:
 * pcpu_alloc_mutex.
 */
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
                                  int page_start, int page_end)
{
        struct page **pages;

        /*
         * If control reaches here, there must have been at least one
         * successful population attempt so the temp pages array must
         * be available now.
         */
        pages = pcpu_get_pages();
        BUG_ON(!pages);

        /* unmap and free */
        pcpu_pre_unmap_flush(chunk, page_start, page_end);

        pcpu_unmap_pages(chunk, pages, page_start, page_end);

        pcpu_free_pages(chunk, pages, page_start, page_end);
}

static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
        struct pcpu_chunk *chunk;
        struct vm_struct **vms;

        chunk = pcpu_alloc_chunk(gfp);
        if (!chunk)
                return NULL;

        vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
                                pcpu_nr_groups, pcpu_atom_size);
        if (!vms) {
                pcpu_free_chunk(chunk);
                return NULL;
        }

        chunk->data = vms;
        chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];

        pcpu_stats_chunk_alloc();
        trace_percpu_create_chunk(chunk->base_addr);

        return chunk;
}

static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
{
        if (!chunk)
                return;

        pcpu_stats_chunk_dealloc();
        trace_percpu_destroy_chunk(chunk->base_addr);

        if (chunk->data)
                pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
        pcpu_free_chunk(chunk);
}

static struct page *pcpu_addr_to_page(void *addr)
{
        return vmalloc_to_page(addr);
}

static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
{
        /* no extra restriction */
        return 0;
}

/**
 * pcpu_should_reclaim_chunk - determine if a chunk should go into reclaim
 * @chunk: chunk of interest
 *
 * This is the entry point for percpu reclaim.  If a chunk qualifies, it is then
 * isolated and managed in separate lists at the back of pcpu_slot: sidelined
 * and to_depopulate respectively.  The to_depopulate list holds chunks slated
 * for depopulation.  They no longer contribute to pcpu_nr_empty_pop_pages once
 * they are on this list.  Once depopulated, they are moved onto the sidelined
 * list which enables them to be pulled back in for allocation if no other chunk
 * can suffice the allocation.
 */
static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk)
{
        /* do not reclaim either the first chunk or reserved chunk */
        if (chunk == pcpu_first_chunk || chunk == pcpu_reserved_chunk)
                return false;

        /*
         * If it is isolated, it may be on the sidelined list so move it back to
         * the to_depopulate list.  If we hit at least 1/4 pages empty pages AND
         * there is no system-wide shortage of empty pages aside from this
         * chunk, move it to the to_depopulate list.
         */
        return ((chunk->isolated && chunk->nr_empty_pop_pages) ||
                (pcpu_nr_empty_pop_pages >
                 (PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages) &&
                 chunk->nr_empty_pop_pages >= chunk->nr_pages / 4));
}























































































































































    1 






































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
// SPDX-License-Identifier: GPL-2.0+
/*
 *  KOBIL USB Smart Card Terminal Driver
 *
 *  Copyright (C) 2002  KOBIL Systems GmbH
 *  Author: Thomas Wahrenbruch
 *
 *  Contact: linuxusb@kobil.de
 *
 *  This program is largely derived from work by the linux-usb group
 *  and associated source files.  Please see the usb/serial files for
 *  individual credits and copyrights.
 *
 *  Thanks to Greg Kroah-Hartman (greg@kroah.com) for his help and
 *  patience.
 *
 * Supported readers: USB TWIN, KAAN Standard Plus and SecOVID Reader Plus
 * (Adapter K), B1 Professional and KAAN Professional (Adapter B)
 */


#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/ioctl.h>
#include "kobil_sct.h"

#define DRIVER_AUTHOR "KOBIL Systems GmbH - http://www.kobil.com"
#define DRIVER_DESC "KOBIL USB Smart Card Terminal Driver (experimental)"

#define KOBIL_VENDOR_ID                        0x0D46
#define KOBIL_ADAPTER_B_PRODUCT_ID        0x2011
#define KOBIL_ADAPTER_K_PRODUCT_ID        0x2012
#define KOBIL_USBTWIN_PRODUCT_ID        0x0078
#define KOBIL_KAAN_SIM_PRODUCT_ID       0x0081

#define KOBIL_TIMEOUT                500
#define KOBIL_BUF_LENGTH        300


/* Function prototypes */
static int kobil_port_probe(struct usb_serial_port *probe);
static void kobil_port_remove(struct usb_serial_port *probe);
static int  kobil_open(struct tty_struct *tty, struct usb_serial_port *port);
static void kobil_close(struct usb_serial_port *port);
static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
                         const unsigned char *buf, int count);
static unsigned int kobil_write_room(struct tty_struct *tty);
static int  kobil_ioctl(struct tty_struct *tty,
                        unsigned int cmd, unsigned long arg);
static int  kobil_tiocmget(struct tty_struct *tty);
static int  kobil_tiocmset(struct tty_struct *tty,
                           unsigned int set, unsigned int clear);
static void kobil_read_int_callback(struct urb *urb);
static void kobil_write_int_callback(struct urb *urb);
static void kobil_set_termios(struct tty_struct *tty,
                              struct usb_serial_port *port,
                              const struct ktermios *old);
static void kobil_init_termios(struct tty_struct *tty);

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_ADAPTER_B_PRODUCT_ID) },
        { USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_ADAPTER_K_PRODUCT_ID) },
        { USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_USBTWIN_PRODUCT_ID) },
        { USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_KAAN_SIM_PRODUCT_ID) },
        { }                        /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, id_table);

static struct usb_serial_driver kobil_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "kobil",
        },
        .description =                "KOBIL USB smart card terminal",
        .id_table =                id_table,
        .num_ports =                1,
        .num_interrupt_out =        1,
        .port_probe =                kobil_port_probe,
        .port_remove =                kobil_port_remove,
        .ioctl =                kobil_ioctl,
        .set_termios =                kobil_set_termios,
        .init_termios =                kobil_init_termios,
        .tiocmget =                kobil_tiocmget,
        .tiocmset =                kobil_tiocmset,
        .open =                        kobil_open,
        .close =                kobil_close,
        .write =                kobil_write,
        .write_room =                kobil_write_room,
        .read_int_callback =        kobil_read_int_callback,
        .write_int_callback =        kobil_write_int_callback,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &kobil_device, NULL
};

struct kobil_private {
        unsigned char buf[KOBIL_BUF_LENGTH]; /* buffer for the APDU to send */
        int filled;  /* index of the last char in buf */
        int cur_pos; /* index of the next char to send in buf */
        __u16 device_type;
};


static int kobil_port_probe(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct kobil_private *priv;

        priv = kmalloc(sizeof(struct kobil_private), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        priv->filled = 0;
        priv->cur_pos = 0;
        priv->device_type = le16_to_cpu(serial->dev->descriptor.idProduct);

        switch (priv->device_type) {
        case KOBIL_ADAPTER_B_PRODUCT_ID:
                dev_dbg(&serial->dev->dev, "KOBIL B1 PRO / KAAN PRO detected\n");
                break;
        case KOBIL_ADAPTER_K_PRODUCT_ID:
                dev_dbg(&serial->dev->dev, "KOBIL KAAN Standard Plus / SecOVID Reader Plus detected\n");
                break;
        case KOBIL_USBTWIN_PRODUCT_ID:
                dev_dbg(&serial->dev->dev, "KOBIL USBTWIN detected\n");
                break;
        case KOBIL_KAAN_SIM_PRODUCT_ID:
                dev_dbg(&serial->dev->dev, "KOBIL KAAN SIM detected\n");
                break;
        }
        usb_set_serial_port_data(port, priv);

        return 0;
}


static void kobil_port_remove(struct usb_serial_port *port)
{
        struct kobil_private *priv;

        priv = usb_get_serial_port_data(port);
        kfree(priv);
}

static void kobil_init_termios(struct tty_struct *tty)
{
        /* Default to echo off and other sane device settings */
        tty->termios.c_lflag = 0;
        tty->termios.c_iflag &= ~(ISIG | ICANON | ECHO | IEXTEN | XCASE);
        tty->termios.c_iflag |= IGNBRK | IGNPAR | IXOFF;
        /* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */
        tty->termios.c_oflag &= ~ONLCR;
}

static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct device *dev = &port->dev;
        int result = 0;
        struct kobil_private *priv;
        unsigned char *transfer_buffer;
        int transfer_buffer_length = 8;

        priv = usb_get_serial_port_data(port);

        /* allocate memory for transfer buffer */
        transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL);
        if (!transfer_buffer)
                return -ENOMEM;

        /* get hardware version */
        result = usb_control_msg(port->serial->dev,
                          usb_rcvctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_GetMisc,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_IN,
                          SUSBCR_MSC_GetHWVersion,
                          0,
                          transfer_buffer,
                          transfer_buffer_length,
                          KOBIL_TIMEOUT
        );
        dev_dbg(dev, "%s - Send get_HW_version URB returns: %i\n", __func__, result);
        if (result >= 3) {
                dev_dbg(dev, "Hardware version: %i.%i.%i\n", transfer_buffer[0],
                                transfer_buffer[1], transfer_buffer[2]);
        }

        /* get firmware version */
        result = usb_control_msg(port->serial->dev,
                          usb_rcvctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_GetMisc,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_IN,
                          SUSBCR_MSC_GetFWVersion,
                          0,
                          transfer_buffer,
                          transfer_buffer_length,
                          KOBIL_TIMEOUT
        );
        dev_dbg(dev, "%s - Send get_FW_version URB returns: %i\n", __func__, result);
        if (result >= 3) {
                dev_dbg(dev, "Firmware version: %i.%i.%i\n", transfer_buffer[0],
                                transfer_buffer[1], transfer_buffer[2]);
        }

        if (priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID ||
                        priv->device_type == KOBIL_ADAPTER_K_PRODUCT_ID) {
                /* Setting Baudrate, Parity and Stopbits */
                result = usb_control_msg(port->serial->dev,
                          usb_sndctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_SetBaudRateParityAndStopBits,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                          SUSBCR_SBR_9600 | SUSBCR_SPASB_EvenParity |
                                                        SUSBCR_SPASB_1StopBit,
                          0,
                          NULL,
                          0,
                          KOBIL_TIMEOUT
                );
                dev_dbg(dev, "%s - Send set_baudrate URB returns: %i\n", __func__, result);

                /* reset all queues */
                result = usb_control_msg(port->serial->dev,
                          usb_sndctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_Misc,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                          SUSBCR_MSC_ResetAllQueues,
                          0,
                          NULL,
                          0,
                          KOBIL_TIMEOUT
                );
                dev_dbg(dev, "%s - Send reset_all_queues URB returns: %i\n", __func__, result);
        }
        if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID ||
            priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID ||
            priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) {
                /* start reading (Adapter B 'cause PNP string) */
                result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
                dev_dbg(dev, "%s - Send read URB returns: %i\n", __func__, result);
        }

        kfree(transfer_buffer);
        return 0;
}


static void kobil_close(struct usb_serial_port *port)
{
        /* FIXME: Add rts/dtr methods */
        usb_kill_urb(port->interrupt_out_urb);
        usb_kill_urb(port->interrupt_in_urb);
}


static void kobil_read_int_callback(struct urb *urb)
{
        int result;
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        int status = urb->status;

        if (status) {
                dev_dbg(&port->dev, "%s - Read int status not zero: %d\n", __func__, status);
                return;
        }

        if (urb->actual_length) {
                usb_serial_debug_data(&port->dev, __func__, urb->actual_length,
                                                                        data);
                tty_insert_flip_string(&port->port, data, urb->actual_length);
                tty_flip_buffer_push(&port->port);
        }

        result = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC);
        dev_dbg(&port->dev, "%s - Send read URB returns: %i\n", __func__, result);
}


static void kobil_write_int_callback(struct urb *urb)
{
}


static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
                        const unsigned char *buf, int count)
{
        int length = 0;
        int result = 0;
        int todo = 0;
        struct kobil_private *priv;

        if (count == 0) {
                dev_dbg(&port->dev, "%s - write request of 0 bytes\n", __func__);
                return 0;
        }

        priv = usb_get_serial_port_data(port);

        if (count > (KOBIL_BUF_LENGTH - priv->filled)) {
                dev_dbg(&port->dev, "%s - Error: write request bigger than buffer size\n", __func__);
                return -ENOMEM;
        }

        /* Copy data to buffer */
        memcpy(priv->buf + priv->filled, buf, count);
        usb_serial_debug_data(&port->dev, __func__, count, priv->buf + priv->filled);
        priv->filled = priv->filled + count;

        /* only send complete block. TWIN, KAAN SIM and adapter K
           use the same protocol. */
        if (((priv->device_type != KOBIL_ADAPTER_B_PRODUCT_ID) && (priv->filled > 2) && (priv->filled >= (priv->buf[1] + 3))) ||
             ((priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID) && (priv->filled > 3) && (priv->filled >= (priv->buf[2] + 4)))) {
                /* stop reading (except TWIN and KAAN SIM) */
                if ((priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID)
                        || (priv->device_type == KOBIL_ADAPTER_K_PRODUCT_ID))
                        usb_kill_urb(port->interrupt_in_urb);

                todo = priv->filled - priv->cur_pos;

                while (todo > 0) {
                        /* max 8 byte in one urb (endpoint size) */
                        length = min(todo, port->interrupt_out_size);
                        /* copy data to transfer buffer */
                        memcpy(port->interrupt_out_buffer,
                                        priv->buf + priv->cur_pos, length);
                        port->interrupt_out_urb->transfer_buffer_length = length;

                        priv->cur_pos = priv->cur_pos + length;
                        result = usb_submit_urb(port->interrupt_out_urb,
                                        GFP_ATOMIC);
                        dev_dbg(&port->dev, "%s - Send write URB returns: %i\n", __func__, result);
                        todo = priv->filled - priv->cur_pos;

                        if (todo > 0)
                                msleep(24);
                }

                priv->filled = 0;
                priv->cur_pos = 0;

                /* start reading (except TWIN and KAAN SIM) */
                if (priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID ||
                        priv->device_type == KOBIL_ADAPTER_K_PRODUCT_ID) {
                        result = usb_submit_urb(port->interrupt_in_urb,
                                        GFP_ATOMIC);
                        dev_dbg(&port->dev, "%s - Send read URB returns: %i\n", __func__, result);
                }
        }
        return count;
}


static unsigned int kobil_write_room(struct tty_struct *tty)
{
        /* FIXME */
        return 8;
}


static int kobil_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct kobil_private *priv;
        int result;
        unsigned char *transfer_buffer;
        int transfer_buffer_length = 8;

        priv = usb_get_serial_port_data(port);
        if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID
                        || priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) {
                /* This device doesn't support ioctl calls */
                return -EINVAL;
        }

        /* allocate memory for transfer buffer */
        transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL);
        if (!transfer_buffer)
                return -ENOMEM;

        result = usb_control_msg(port->serial->dev,
                          usb_rcvctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_GetStatusLineState,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_IN,
                          0,
                          0,
                          transfer_buffer,
                          transfer_buffer_length,
                          KOBIL_TIMEOUT);

        dev_dbg(&port->dev, "Send get_status_line_state URB returns: %i\n",
                        result);
        if (result < 1) {
                if (result >= 0)
                        result = -EIO;
                goto out_free;
        }

        dev_dbg(&port->dev, "Statusline: %02x\n", transfer_buffer[0]);

        result = 0;
        if ((transfer_buffer[0] & SUSBCR_GSL_DSR) != 0)
                result = TIOCM_DSR;
out_free:
        kfree(transfer_buffer);
        return result;
}

static int kobil_tiocmset(struct tty_struct *tty,
                           unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct device *dev = &port->dev;
        struct kobil_private *priv;
        int result;
        int dtr = 0;
        int rts = 0;

        /* FIXME: locking ? */
        priv = usb_get_serial_port_data(port);
        if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID
                || priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) {
                /* This device doesn't support ioctl calls */
                return -EINVAL;
        }

        if (set & TIOCM_RTS)
                rts = 1;
        if (set & TIOCM_DTR)
                dtr = 1;
        if (clear & TIOCM_RTS)
                rts = 0;
        if (clear & TIOCM_DTR)
                dtr = 0;

        if (priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID) {
                if (dtr != 0)
                        dev_dbg(dev, "%s - Setting DTR\n", __func__);
                else
                        dev_dbg(dev, "%s - Clearing DTR\n", __func__);
                result = usb_control_msg(port->serial->dev,
                          usb_sndctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_SetStatusLinesOrQueues,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                          ((dtr != 0) ? SUSBCR_SSL_SETDTR : SUSBCR_SSL_CLRDTR),
                          0,
                          NULL,
                          0,
                          KOBIL_TIMEOUT);
        } else {
                if (rts != 0)
                        dev_dbg(dev, "%s - Setting RTS\n", __func__);
                else
                        dev_dbg(dev, "%s - Clearing RTS\n", __func__);
                result = usb_control_msg(port->serial->dev,
                        usb_sndctrlpipe(port->serial->dev, 0),
                        SUSBCRequest_SetStatusLinesOrQueues,
                        USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                        ((rts != 0) ? SUSBCR_SSL_SETRTS : SUSBCR_SSL_CLRRTS),
                        0,
                        NULL,
                        0,
                        KOBIL_TIMEOUT);
        }
        dev_dbg(dev, "%s - Send set_status_line URB returns: %i\n", __func__, result);
        return (result < 0) ? result : 0;
}

static void kobil_set_termios(struct tty_struct *tty,
                              struct usb_serial_port *port,
                              const struct ktermios *old)
{
        struct kobil_private *priv;
        int result;
        unsigned short urb_val = 0;
        int c_cflag = tty->termios.c_cflag;
        speed_t speed;

        priv = usb_get_serial_port_data(port);
        if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID ||
                        priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID) {
                /* This device doesn't support ioctl calls */
                tty_termios_copy_hw(&tty->termios, old);
                return;
        }

        speed = tty_get_baud_rate(tty);
        switch (speed) {
        case 1200:
                urb_val = SUSBCR_SBR_1200;
                break;
        default:
                speed = 9600;
                fallthrough;
        case 9600:
                urb_val = SUSBCR_SBR_9600;
                break;
        }
        urb_val |= (c_cflag & CSTOPB) ? SUSBCR_SPASB_2StopBits :
                                                        SUSBCR_SPASB_1StopBit;
        if (c_cflag & PARENB) {
                if  (c_cflag & PARODD)
                        urb_val |= SUSBCR_SPASB_OddParity;
                else
                        urb_val |= SUSBCR_SPASB_EvenParity;
        } else
                urb_val |= SUSBCR_SPASB_NoParity;
        tty->termios.c_cflag &= ~CMSPAR;
        tty_encode_baud_rate(tty, speed, speed);

        result = usb_control_msg(port->serial->dev,
                  usb_sndctrlpipe(port->serial->dev, 0),
                  SUSBCRequest_SetBaudRateParityAndStopBits,
                  USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                  urb_val,
                  0,
                  NULL,
                  0,
                  KOBIL_TIMEOUT
                );
        if (result) {
                dev_err(&port->dev, "failed to update line settings: %d\n",
                                result);
        }
}

static int kobil_ioctl(struct tty_struct *tty,
                                        unsigned int cmd, unsigned long arg)
{
        struct usb_serial_port *port = tty->driver_data;
        struct kobil_private *priv = usb_get_serial_port_data(port);
        int result;

        if (priv->device_type == KOBIL_USBTWIN_PRODUCT_ID ||
                        priv->device_type == KOBIL_KAAN_SIM_PRODUCT_ID)
                /* This device doesn't support ioctl calls */
                return -ENOIOCTLCMD;

        switch (cmd) {
        case TCFLSH:
                result = usb_control_msg(port->serial->dev,
                          usb_sndctrlpipe(port->serial->dev, 0),
                          SUSBCRequest_Misc,
                          USB_TYPE_VENDOR | USB_RECIP_ENDPOINT | USB_DIR_OUT,
                          SUSBCR_MSC_ResetAllQueues,
                          0,
                          NULL,
                          0,
                          KOBIL_TIMEOUT
                        );

                dev_dbg(&port->dev,
                        "%s - Send reset_all_queues (FLUSH) URB returns: %i\n",
                        __func__, result);
                return (result < 0) ? -EIO: 0;
        default:
                return -ENOIOCTLCMD;
        }
}

module_usb_serial_driver(serial_drivers, id_table);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");







































































































































    4 






    4 





    4 


















































































  368 



























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Runtime locking correctness validator
 *
 *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
 *
 * see Documentation/locking/lockdep-design.rst for more details.
 */
#ifndef __LINUX_LOCKDEP_H
#define __LINUX_LOCKDEP_H

#include <linux/lockdep_types.h>
#include <linux/smp.h>
#include <asm/percpu.h>

struct task_struct;

#ifdef CONFIG_LOCKDEP

#include <linux/linkage.h>
#include <linux/list.h>
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>

static inline void lockdep_copy_map(struct lockdep_map *to,
                                    struct lockdep_map *from)
{
        int i;

        *to = *from;
        /*
         * Since the class cache can be modified concurrently we could observe
         * half pointers (64bit arch using 32bit copy insns). Therefore clear
         * the caches and take the performance hit.
         *
         * XXX it doesn't work well with lockdep_set_class_and_subclass(), since
         *     that relies on cache abuse.
         */
        for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
                to->class_cache[i] = NULL;
}

/*
 * Every lock has a list of other locks that were taken after it.
 * We only grow the list, never remove from it:
 */
struct lock_list {
        struct list_head                entry;
        struct lock_class                *class;
        struct lock_class                *links_to;
        const struct lock_trace                *trace;
        u16                                distance;
        /* bitmap of different dependencies from head to this */
        u8                                dep;
        /* used by BFS to record whether "prev -> this" only has -(*R)-> */
        u8                                only_xr;

        /*
         * The parent field is used to implement breadth-first search, and the
         * bit 0 is reused to indicate if the lock has been accessed in BFS.
         */
        struct lock_list                *parent;
};

/**
 * struct lock_chain - lock dependency chain record
 *
 * @irq_context: the same as irq_context in held_lock below
 * @depth:       the number of held locks in this chain
 * @base:        the index in chain_hlocks for this chain
 * @entry:       the collided lock chains in lock_chain hash list
 * @chain_key:   the hash key of this lock_chain
 */
struct lock_chain {
        /* see BUILD_BUG_ON()s in add_chain_cache() */
        unsigned int                        irq_context :  2,
                                        depth       :  6,
                                        base            : 24;
        /* 4 byte hole */
        struct hlist_node                entry;
        u64                                chain_key;
};

/*
 * Initialization, self-test and debugging-output methods:
 */
extern void lockdep_init(void);
extern void lockdep_reset(void);
extern void lockdep_reset_lock(struct lockdep_map *lock);
extern void lockdep_free_key_range(void *start, unsigned long size);
extern asmlinkage void lockdep_sys_exit(void);
extern void lockdep_set_selftest_task(struct task_struct *task);

extern void lockdep_init_task(struct task_struct *task);

/*
 * Split the recursion counter in two to readily detect 'off' vs recursion.
 */
#define LOCKDEP_RECURSION_BITS        16
#define LOCKDEP_OFF                (1U << LOCKDEP_RECURSION_BITS)
#define LOCKDEP_RECURSION_MASK        (LOCKDEP_OFF - 1)

/*
 * lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
 * to header dependencies.
 */

#define lockdep_off()                                        \
do {                                                        \
        current->lockdep_recursion += LOCKDEP_OFF;        \
} while (0)

#define lockdep_on()                                        \
do {                                                        \
        current->lockdep_recursion -= LOCKDEP_OFF;        \
} while (0)

extern void lockdep_register_key(struct lock_class_key *key);
extern void lockdep_unregister_key(struct lock_class_key *key);

/*
 * These methods are used by specific locking variants (spinlocks,
 * rwlocks, mutexes and rwsems) to pass init/acquire/release events
 * to lockdep:
 */

extern void lockdep_init_map_type(struct lockdep_map *lock, const char *name,
        struct lock_class_key *key, int subclass, u8 inner, u8 outer, u8 lock_type);

static inline void
lockdep_init_map_waits(struct lockdep_map *lock, const char *name,
                       struct lock_class_key *key, int subclass, u8 inner, u8 outer)
{
        lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL);
}

static inline void
lockdep_init_map_wait(struct lockdep_map *lock, const char *name,
                      struct lock_class_key *key, int subclass, u8 inner)
{
        lockdep_init_map_waits(lock, name, key, subclass, inner, LD_WAIT_INV);
}

static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
                             struct lock_class_key *key, int subclass)
{
        lockdep_init_map_wait(lock, name, key, subclass, LD_WAIT_INV);
}

/*
 * Reinitialize a lock key - for cases where there is special locking or
 * special initialization of locks so that the validator gets the scope
 * of dependencies wrong: they are either too broad (they need a class-split)
 * or they are too narrow (they suffer from a false class-split):
 */
#define lockdep_set_class(lock, key)                                \
        lockdep_init_map_type(&(lock)->dep_map, #key, key, 0,        \
                              (lock)->dep_map.wait_type_inner,        \
                              (lock)->dep_map.wait_type_outer,        \
                              (lock)->dep_map.lock_type)

#define lockdep_set_class_and_name(lock, key, name)                \
        lockdep_init_map_type(&(lock)->dep_map, name, key, 0,        \
                              (lock)->dep_map.wait_type_inner,        \
                              (lock)->dep_map.wait_type_outer,        \
                              (lock)->dep_map.lock_type)

#define lockdep_set_class_and_subclass(lock, key, sub)                \
        lockdep_init_map_type(&(lock)->dep_map, #key, key, sub,        \
                              (lock)->dep_map.wait_type_inner,        \
                              (lock)->dep_map.wait_type_outer,        \
                              (lock)->dep_map.lock_type)

#define lockdep_set_subclass(lock, sub)                                        \
        lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
                              (lock)->dep_map.wait_type_inner,                \
                              (lock)->dep_map.wait_type_outer,                \
                              (lock)->dep_map.lock_type)

#define lockdep_set_novalidate_class(lock) \
        lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)

/*
 * Compare locking classes
 */
#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key)

static inline int lockdep_match_key(struct lockdep_map *lock,
                                    struct lock_class_key *key)
{
        return lock->key == key;
}

/*
 * Acquire a lock.
 *
 * Values for "read":
 *
 *   0: exclusive (write) acquire
 *   1: read-acquire (no recursion allowed)
 *   2: read-acquire with same-instance recursion allowed
 *
 * Values for check:
 *
 *   0: simple checks (freeing, held-at-exit-time, etc.)
 *   1: full validation
 */
extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                         int trylock, int read, int check,
                         struct lockdep_map *nest_lock, unsigned long ip);

extern void lock_release(struct lockdep_map *lock, unsigned long ip);

extern void lock_sync(struct lockdep_map *lock, unsigned int subclass,
                      int read, int check, struct lockdep_map *nest_lock,
                      unsigned long ip);

/* lock_is_held_type() returns */
#define LOCK_STATE_UNKNOWN        -1
#define LOCK_STATE_NOT_HELD        0
#define LOCK_STATE_HELD                1

/*
 * Same "read" as for lock_acquire(), except -1 means any.
 */
extern int lock_is_held_type(const struct lockdep_map *lock, int read);

static inline int lock_is_held(const struct lockdep_map *lock)
{
        return lock_is_held_type(lock, -1);
}

#define lockdep_is_held(lock)                lock_is_held(&(lock)->dep_map)
#define lockdep_is_held_type(lock, r)        lock_is_held_type(&(lock)->dep_map, (r))

extern void lock_set_class(struct lockdep_map *lock, const char *name,
                           struct lock_class_key *key, unsigned int subclass,
                           unsigned long ip);

#define lock_set_novalidate_class(l, n, i) \
        lock_set_class(l, n, &__lockdep_no_validate__, 0, i)

static inline void lock_set_subclass(struct lockdep_map *lock,
                unsigned int subclass, unsigned long ip)
{
        lock_set_class(lock, lock->name, lock->key, subclass, ip);
}

extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);

#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }

extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);

#define lockdep_depth(tsk)        (debug_locks ? (tsk)->lockdep_depth : 0)

#define lockdep_assert(cond)                \
        do { WARN_ON(debug_locks && !(cond)); } while (0)

#define lockdep_assert_once(cond)        \
        do { WARN_ON_ONCE(debug_locks && !(cond)); } while (0)

#define lockdep_assert_held(l)                \
        lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD)

#define lockdep_assert_not_held(l)        \
        lockdep_assert(lockdep_is_held(l) != LOCK_STATE_HELD)

#define lockdep_assert_held_write(l)        \
        lockdep_assert(lockdep_is_held_type(l, 0))

#define lockdep_assert_held_read(l)        \
        lockdep_assert(lockdep_is_held_type(l, 1))

#define lockdep_assert_held_once(l)                \
        lockdep_assert_once(lockdep_is_held(l) != LOCK_STATE_NOT_HELD)

#define lockdep_assert_none_held_once()                \
        lockdep_assert_once(!current->lockdep_depth)

#define lockdep_recursing(tsk)        ((tsk)->lockdep_recursion)

#define lockdep_pin_lock(l)        lock_pin_lock(&(l)->dep_map)
#define lockdep_repin_lock(l,c)        lock_repin_lock(&(l)->dep_map, (c))
#define lockdep_unpin_lock(l,c)        lock_unpin_lock(&(l)->dep_map, (c))

/*
 * Must use lock_map_aquire_try() with override maps to avoid
 * lockdep thinking they participate in the block chain.
 */
#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)        \
        struct lockdep_map _name = {                        \
                .name = #_name "-wait-type-override",        \
                .wait_type_inner = _wait_type,                \
                .lock_type = LD_LOCK_WAIT_OVERRIDE, }

#else /* !CONFIG_LOCKDEP */

static inline void lockdep_init_task(struct task_struct *task)
{
}

static inline void lockdep_off(void)
{
}

static inline void lockdep_on(void)
{
}

static inline void lockdep_set_selftest_task(struct task_struct *task)
{
}

# define lock_acquire(l, s, t, r, c, n, i)        do { } while (0)
# define lock_release(l, i)                        do { } while (0)
# define lock_downgrade(l, i)                        do { } while (0)
# define lock_set_class(l, n, key, s, i)        do { (void)(key); } while (0)
# define lock_set_novalidate_class(l, n, i)        do { } while (0)
# define lock_set_subclass(l, s, i)                do { } while (0)
# define lockdep_init()                                do { } while (0)
# define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \
                do { (void)(name); (void)(key); } while (0)
# define lockdep_init_map_waits(lock, name, key, sub, inner, outer) \
                do { (void)(name); (void)(key); } while (0)
# define lockdep_init_map_wait(lock, name, key, sub, inner) \
                do { (void)(name); (void)(key); } while (0)
# define lockdep_init_map(lock, name, key, sub) \
                do { (void)(name); (void)(key); } while (0)
# define lockdep_set_class(lock, key)                do { (void)(key); } while (0)
# define lockdep_set_class_and_name(lock, key, name) \
                do { (void)(key); (void)(name); } while (0)
#define lockdep_set_class_and_subclass(lock, key, sub) \
                do { (void)(key); } while (0)
#define lockdep_set_subclass(lock, sub)                do { } while (0)

#define lockdep_set_novalidate_class(lock) do { } while (0)

/*
 * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
 * case since the result is not well defined and the caller should rather
 * #ifdef the call himself.
 */

# define lockdep_reset()                do { debug_locks = 1; } while (0)
# define lockdep_free_key_range(start, size)        do { } while (0)
# define lockdep_sys_exit()                         do { } while (0)

static inline void lockdep_register_key(struct lock_class_key *key)
{
}

static inline void lockdep_unregister_key(struct lock_class_key *key)
{
}

#define lockdep_depth(tsk)        (0)

/*
 * Dummy forward declarations, allow users to write less ifdef-y code
 * and depend on dead code elimination.
 */
extern int lock_is_held(const void *);
extern int lockdep_is_held(const void *);
#define lockdep_is_held_type(l, r)                (1)

#define lockdep_assert(c)                        do { } while (0)
#define lockdep_assert_once(c)                        do { } while (0)

#define lockdep_assert_held(l)                        do { (void)(l); } while (0)
#define lockdep_assert_not_held(l)                do { (void)(l); } while (0)
#define lockdep_assert_held_write(l)                do { (void)(l); } while (0)
#define lockdep_assert_held_read(l)                do { (void)(l); } while (0)
#define lockdep_assert_held_once(l)                do { (void)(l); } while (0)
#define lockdep_assert_none_held_once()        do { } while (0)

#define lockdep_recursing(tsk)                        (0)

#define NIL_COOKIE (struct pin_cookie){ }

#define lockdep_pin_lock(l)                        ({ struct pin_cookie cookie = { }; cookie; })
#define lockdep_repin_lock(l, c)                do { (void)(l); (void)(c); } while (0)
#define lockdep_unpin_lock(l, c)                do { (void)(l); (void)(c); } while (0)

#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)        \
        struct lockdep_map __maybe_unused _name = {}

#endif /* !LOCKDEP */

#ifdef CONFIG_PROVE_LOCKING
void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);

#define lock_set_cmp_fn(lock, ...)        lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
#else
#define lock_set_cmp_fn(lock, ...)        do { } while (0)
#endif

enum xhlock_context_t {
        XHLOCK_HARD,
        XHLOCK_SOFT,
        XHLOCK_CTX_NR,
};

/*
 * To initialize a lockdep_map statically use this macro.
 * Note that _name must not be NULL.
 */
#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), }

static inline void lockdep_invariant_state(bool force) {}
static inline void lockdep_free_task(struct task_struct *task) {}

#ifdef CONFIG_LOCK_STAT

extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);

#define LOCK_CONTENDED(_lock, try, lock)                        \
do {                                                                \
        if (!try(_lock)) {                                        \
                lock_contended(&(_lock)->dep_map, _RET_IP_);        \
                lock(_lock);                                        \
        }                                                        \
        lock_acquired(&(_lock)->dep_map, _RET_IP_);                        \
} while (0)

#define LOCK_CONTENDED_RETURN(_lock, try, lock)                        \
({                                                                \
        int ____err = 0;                                        \
        if (!try(_lock)) {                                        \
                lock_contended(&(_lock)->dep_map, _RET_IP_);        \
                ____err = lock(_lock);                                \
        }                                                        \
        if (!____err)                                                \
                lock_acquired(&(_lock)->dep_map, _RET_IP_);        \
        ____err;                                                \
})

#else /* CONFIG_LOCK_STAT */

#define lock_contended(lockdep_map, ip) do {} while (0)
#define lock_acquired(lockdep_map, ip) do {} while (0)

#define LOCK_CONTENDED(_lock, try, lock) \
        lock(_lock)

#define LOCK_CONTENDED_RETURN(_lock, try, lock) \
        lock(_lock)

#endif /* CONFIG_LOCK_STAT */

#ifdef CONFIG_PROVE_LOCKING
extern void print_irqtrace_events(struct task_struct *curr);
#else
static inline void print_irqtrace_events(struct task_struct *curr)
{
}
#endif

/* Variable used to make lockdep treat read_lock() as recursive in selftests */
#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
extern unsigned int force_read_lock_recursive;
#else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */
#define force_read_lock_recursive 0
#endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */

#ifdef CONFIG_LOCKDEP
extern bool read_lock_is_recursive(void);
#else /* CONFIG_LOCKDEP */
/* If !LOCKDEP, the value is meaningless */
#define read_lock_is_recursive() 0
#endif

/*
 * For trivial one-depth nesting of a lock-class, the following
 * global define can be used. (Subsystems with multiple levels
 * of nesting should define their own lock-nesting subclasses.)
 */
#define SINGLE_DEPTH_NESTING                        1

/*
 * Map the dependency ops to NOP or to real lockdep ops, depending
 * on the per lock-class debug mode:
 */

#define lock_acquire_exclusive(l, s, t, n, i)                lock_acquire(l, s, t, 0, 1, n, i)
#define lock_acquire_shared(l, s, t, n, i)                lock_acquire(l, s, t, 1, 1, n, i)
#define lock_acquire_shared_recursive(l, s, t, n, i)        lock_acquire(l, s, t, 2, 1, n, i)

#define spin_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
#define spin_acquire_nest(l, s, t, n, i)        lock_acquire_exclusive(l, s, t, n, i)
#define spin_release(l, i)                        lock_release(l, i)

#define rwlock_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
#define rwlock_acquire_read(l, s, t, i)                                        \
do {                                                                        \
        if (read_lock_is_recursive())                                        \
                lock_acquire_shared_recursive(l, s, t, NULL, i);        \
        else                                                                \
                lock_acquire_shared(l, s, t, NULL, i);                        \
} while (0)

#define rwlock_release(l, i)                        lock_release(l, i)

#define seqcount_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
#define seqcount_acquire_read(l, s, t, i)        lock_acquire_shared_recursive(l, s, t, NULL, i)
#define seqcount_release(l, i)                        lock_release(l, i)

#define mutex_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
#define mutex_acquire_nest(l, s, t, n, i)        lock_acquire_exclusive(l, s, t, n, i)
#define mutex_release(l, i)                        lock_release(l, i)

#define rwsem_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
#define rwsem_acquire_nest(l, s, t, n, i)        lock_acquire_exclusive(l, s, t, n, i)
#define rwsem_acquire_read(l, s, t, i)                lock_acquire_shared(l, s, t, NULL, i)
#define rwsem_release(l, i)                        lock_release(l, i)

#define lock_map_acquire(l)                        lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_try(l)                        lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_acquire_read(l)                lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_tryread(l)                lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_release(l)                        lock_release(l, _THIS_IP_)
#define lock_map_sync(l)                        lock_sync(l, 0, 0, 1, NULL, _THIS_IP_)

#ifdef CONFIG_PROVE_LOCKING
# define might_lock(lock)                                                \
do {                                                                        \
        typecheck(struct lockdep_map *, &(lock)->dep_map);                \
        lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_);        \
        lock_release(&(lock)->dep_map, _THIS_IP_);                        \
} while (0)
# define might_lock_read(lock)                                                \
do {                                                                        \
        typecheck(struct lockdep_map *, &(lock)->dep_map);                \
        lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_);        \
        lock_release(&(lock)->dep_map, _THIS_IP_);                        \
} while (0)
# define might_lock_nested(lock, subclass)                                \
do {                                                                        \
        typecheck(struct lockdep_map *, &(lock)->dep_map);                \
        lock_acquire(&(lock)->dep_map, subclass, 0, 1, 1, NULL,                \
                     _THIS_IP_);                                        \
        lock_release(&(lock)->dep_map, _THIS_IP_);                        \
} while (0)

DECLARE_PER_CPU(int, hardirqs_enabled);
DECLARE_PER_CPU(int, hardirq_context);
DECLARE_PER_CPU(unsigned int, lockdep_recursion);

#define __lockdep_enabled        (debug_locks && !this_cpu_read(lockdep_recursion))

#define lockdep_assert_irqs_enabled()                                        \
do {                                                                        \
        WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirqs_enabled)); \
} while (0)

#define lockdep_assert_irqs_disabled()                                        \
do {                                                                        \
        WARN_ON_ONCE(__lockdep_enabled && this_cpu_read(hardirqs_enabled)); \
} while (0)

#define lockdep_assert_in_irq()                                                \
do {                                                                        \
        WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \
} while (0)

#define lockdep_assert_no_hardirq()                                        \
do {                                                                        \
        WARN_ON_ONCE(__lockdep_enabled && (this_cpu_read(hardirq_context) || \
                                           !this_cpu_read(hardirqs_enabled))); \
} while (0)

#define lockdep_assert_preemption_enabled()                                \
do {                                                                        \
        WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)        &&                \
                     __lockdep_enabled                        &&                \
                     (preempt_count() != 0                ||                \
                      !this_cpu_read(hardirqs_enabled)));                \
} while (0)

#define lockdep_assert_preemption_disabled()                                \
do {                                                                        \
        WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT)        &&                \
                     __lockdep_enabled                        &&                \
                     (preempt_count() == 0                &&                \
                      this_cpu_read(hardirqs_enabled)));                \
} while (0)

/*
 * Acceptable for protecting per-CPU resources accessed from BH.
 * Much like in_softirq() - semantics are ambiguous, use carefully.
 */
#define lockdep_assert_in_softirq()                                        \
do {                                                                        \
        WARN_ON_ONCE(__lockdep_enabled                        &&                \
                     (!in_softirq() || in_irq() || in_nmi()));                \
} while (0)

#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)

# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
# define lockdep_assert_no_hardirq() do { } while (0)

# define lockdep_assert_preemption_enabled() do { } while (0)
# define lockdep_assert_preemption_disabled() do { } while (0)
# define lockdep_assert_in_softirq() do { } while (0)
#endif

#ifdef CONFIG_PROVE_RAW_LOCK_NESTING

# define lockdep_assert_RT_in_threaded_ctx() do {                        \
                WARN_ONCE(debug_locks && !current->lockdep_recursion &&        \
                          lockdep_hardirq_context() &&                        \
                          !(current->hardirq_threaded || current->irq_config),        \
                          "Not in threaded context on PREEMPT_RT as expected\n");        \
} while (0)

#else

# define lockdep_assert_RT_in_threaded_ctx() do { } while (0)

#endif

#ifdef CONFIG_LOCKDEP
void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
#else
static inline void
lockdep_rcu_suspicious(const char *file, const int line, const char *s)
{
}
#endif

#endif /* __LINUX_LOCKDEP_H */
































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
/*
 * Copyright (c) 1982, 1986 Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Robert Elz at The University of Melbourne.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#ifndef _LINUX_QUOTA_
#define _LINUX_QUOTA_

#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/percpu_counter.h>

#include <linux/dqblk_xfs.h>
#include <linux/dqblk_v1.h>
#include <linux/dqblk_v2.h>

#include <linux/atomic.h>
#include <linux/uidgid.h>
#include <linux/projid.h>
#include <uapi/linux/quota.h>

#undef USRQUOTA
#undef GRPQUOTA
#undef PRJQUOTA
enum quota_type {
        USRQUOTA = 0,                /* element used for user quotas */
        GRPQUOTA = 1,                /* element used for group quotas */
        PRJQUOTA = 2,                /* element used for project quotas */
};

/* Masks for quota types when used as a bitmask */
#define QTYPE_MASK_USR (1 << USRQUOTA)
#define QTYPE_MASK_GRP (1 << GRPQUOTA)
#define QTYPE_MASK_PRJ (1 << PRJQUOTA)

typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
typedef long long qsize_t;        /* Type in which we store sizes */

struct kqid {                        /* Type in which we store the quota identifier */
        union {
                kuid_t uid;
                kgid_t gid;
                kprojid_t projid;
        };
        enum quota_type type;  /* USRQUOTA (uid) or GRPQUOTA (gid) or PRJQUOTA (projid) */
};

extern bool qid_eq(struct kqid left, struct kqid right);
extern bool qid_lt(struct kqid left, struct kqid right);
extern qid_t from_kqid(struct user_namespace *to, struct kqid qid);
extern qid_t from_kqid_munged(struct user_namespace *to, struct kqid qid);
extern bool qid_valid(struct kqid qid);

/**
 *        make_kqid - Map a user-namespace, type, qid tuple into a kqid.
 *        @from: User namespace that the qid is in
 *        @type: The type of quota
 *        @qid: Quota identifier
 *
 *        Maps a user-namespace, type qid tuple into a kernel internal
 *        kqid, and returns that kqid.
 *
 *        When there is no mapping defined for the user-namespace, type,
 *        qid tuple an invalid kqid is returned.  Callers are expected to
 *        test for and handle invalid kqids being returned.
 *        Invalid kqids may be tested for using qid_valid().
 */
static inline struct kqid make_kqid(struct user_namespace *from,
                                    enum quota_type type, qid_t qid)
{
        struct kqid kqid;

        kqid.type = type;
        switch (type) {
        case USRQUOTA:
                kqid.uid = make_kuid(from, qid);
                break;
        case GRPQUOTA:
                kqid.gid = make_kgid(from, qid);
                break;
        case PRJQUOTA:
                kqid.projid = make_kprojid(from, qid);
                break;
        default:
                BUG();
        }
        return kqid;
}

/**
 *        make_kqid_invalid - Explicitly make an invalid kqid
 *        @type: The type of quota identifier
 *
 *        Returns an invalid kqid with the specified type.
 */
static inline struct kqid make_kqid_invalid(enum quota_type type)
{
        struct kqid kqid;

        kqid.type = type;
        switch (type) {
        case USRQUOTA:
                kqid.uid = INVALID_UID;
                break;
        case GRPQUOTA:
                kqid.gid = INVALID_GID;
                break;
        case PRJQUOTA:
                kqid.projid = INVALID_PROJID;
                break;
        default:
                BUG();
        }
        return kqid;
}

/**
 *        make_kqid_uid - Make a kqid from a kuid
 *        @uid: The kuid to make the quota identifier from
 */
static inline struct kqid make_kqid_uid(kuid_t uid)
{
        struct kqid kqid;
        kqid.type = USRQUOTA;
        kqid.uid = uid;
        return kqid;
}

/**
 *        make_kqid_gid - Make a kqid from a kgid
 *        @gid: The kgid to make the quota identifier from
 */
static inline struct kqid make_kqid_gid(kgid_t gid)
{
        struct kqid kqid;
        kqid.type = GRPQUOTA;
        kqid.gid = gid;
        return kqid;
}

/**
 *        make_kqid_projid - Make a kqid from a projid
 *        @projid: The kprojid to make the quota identifier from
 */
static inline struct kqid make_kqid_projid(kprojid_t projid)
{
        struct kqid kqid;
        kqid.type = PRJQUOTA;
        kqid.projid = projid;
        return kqid;
}

/**
 *        qid_has_mapping - Report if a qid maps into a user namespace.
 *        @ns:  The user namespace to see if a value maps into.
 *        @qid: The kernel internal quota identifier to test.
 */
static inline bool qid_has_mapping(struct user_namespace *ns, struct kqid qid)
{
        return from_kqid(ns, qid) != (qid_t) -1;
}


extern spinlock_t dq_data_lock;

/* Maximal numbers of writes for quota operation (insert/delete/update)
 * (over VFS all formats) */
#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC)
#define DQUOT_INIT_REWRITE max(V1_INIT_REWRITE, V2_INIT_REWRITE)
#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, V2_DEL_ALLOC)
#define DQUOT_DEL_REWRITE max(V1_DEL_REWRITE, V2_DEL_REWRITE)

/*
 * Data for one user/group kept in memory
 */
struct mem_dqblk {
        qsize_t dqb_bhardlimit;        /* absolute limit on disk blks alloc */
        qsize_t dqb_bsoftlimit;        /* preferred limit on disk blks */
        qsize_t dqb_curspace;        /* current used space */
        qsize_t dqb_rsvspace;   /* current reserved space for delalloc*/
        qsize_t dqb_ihardlimit;        /* absolute limit on allocated inodes */
        qsize_t dqb_isoftlimit;        /* preferred inode limit */
        qsize_t dqb_curinodes;        /* current # allocated inodes */
        time64_t dqb_btime;        /* time limit for excessive disk use */
        time64_t dqb_itime;        /* time limit for excessive inode use */
};

/*
 * Data for one quotafile kept in memory
 */
struct quota_format_type;

struct mem_dqinfo {
        struct quota_format_type *dqi_format;
        int dqi_fmt_id;                /* Id of the dqi_format - used when turning
                                 * quotas on after remount RW */
        struct list_head dqi_dirty_list;        /* List of dirty dquots [dq_list_lock] */
        unsigned long dqi_flags;        /* DFQ_ flags [dq_data_lock] */
        unsigned int dqi_bgrace;        /* Space grace time [dq_data_lock] */
        unsigned int dqi_igrace;        /* Inode grace time [dq_data_lock] */
        qsize_t dqi_max_spc_limit;        /* Maximum space limit [static] */
        qsize_t dqi_max_ino_limit;        /* Maximum inode limit [static] */
        void *dqi_priv;
};

struct super_block;

/* Mask for flags passed to userspace */
#define DQF_GETINFO_MASK (DQF_ROOT_SQUASH | DQF_SYS_FILE)
/* Mask for flags modifiable from userspace */
#define DQF_SETINFO_MASK DQF_ROOT_SQUASH

enum {
        DQF_INFO_DIRTY_B = DQF_PRIVATE,
};
#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)        /* Is info dirty? */

extern void mark_info_dirty(struct super_block *sb, int type);
static inline int info_dirty(struct mem_dqinfo *info)
{
        return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
}

enum {
        DQST_LOOKUPS,
        DQST_DROPS,
        DQST_READS,
        DQST_WRITES,
        DQST_CACHE_HITS,
        DQST_ALLOC_DQUOTS,
        DQST_FREE_DQUOTS,
        DQST_SYNCS,
        _DQST_DQSTAT_LAST
};

struct dqstats {
        unsigned long stat[_DQST_DQSTAT_LAST];
        struct percpu_counter counter[_DQST_DQSTAT_LAST];
};

extern struct dqstats dqstats;

static inline void dqstats_inc(unsigned int type)
{
        percpu_counter_inc(&dqstats.counter[type]);
}

static inline void dqstats_dec(unsigned int type)
{
        percpu_counter_dec(&dqstats.counter[type]);
}

#define DQ_MOD_B        0        /* dquot modified since read */
#define DQ_BLKS_B        1        /* uid/gid has been warned about blk limit */
#define DQ_INODES_B        2        /* uid/gid has been warned about inode limit */
#define DQ_FAKE_B        3        /* no limits only usage */
#define DQ_READ_B        4        /* dquot was read into memory */
#define DQ_ACTIVE_B        5        /* dquot is active (dquot_release not called) */
#define DQ_RELEASING_B        6        /* dquot is in releasing_dquots list waiting
                                 * to be cleaned up */
#define DQ_LASTSET_B        7        /* Following 6 bits (see QIF_) are reserved\
                                 * for the mask of entries set via SETQUOTA\
                                 * quotactl. They are set under dq_data_lock\
                                 * and the quota format handling dquot can\
                                 * clear them when it sees fit. */

struct dquot {
        struct hlist_node dq_hash;        /* Hash list in memory [dq_list_lock] */
        struct list_head dq_inuse;        /* List of all quotas [dq_list_lock] */
        struct list_head dq_free;        /* Free list element [dq_list_lock] */
        struct list_head dq_dirty;        /* List of dirty dquots [dq_list_lock] */
        struct mutex dq_lock;                /* dquot IO lock */
        spinlock_t dq_dqb_lock;                /* Lock protecting dq_dqb changes */
        atomic_t dq_count;                /* Use count */
        struct super_block *dq_sb;        /* superblock this applies to */
        struct kqid dq_id;                /* ID this applies to (uid, gid, projid) */
        loff_t dq_off;                        /* Offset of dquot on disk [dq_lock, stable once set] */
        unsigned long dq_flags;                /* See DQ_* */
        struct mem_dqblk dq_dqb;        /* Diskquota usage [dq_dqb_lock] */
};

/* Operations which must be implemented by each quota format */
struct quota_format_ops {
        int (*check_quota_file)(struct super_block *sb, int type);        /* Detect whether file is in our format */
        int (*read_file_info)(struct super_block *sb, int type);        /* Read main info about file - called on quotaon() */
        int (*write_file_info)(struct super_block *sb, int type);        /* Write main info about file */
        int (*free_file_info)(struct super_block *sb, int type);        /* Called on quotaoff() */
        int (*read_dqblk)(struct dquot *dquot);                /* Read structure for one user */
        int (*commit_dqblk)(struct dquot *dquot);        /* Write structure for one user */
        int (*release_dqblk)(struct dquot *dquot);        /* Called when last reference to dquot is being dropped */
        int (*get_next_id)(struct super_block *sb, struct kqid *qid);        /* Get next ID with existing structure in the quota file */
};

/* Operations working with dquots */
struct dquot_operations {
        int (*write_dquot) (struct dquot *);                /* Ordinary dquot write */
        struct dquot *(*alloc_dquot)(struct super_block *, int);        /* Allocate memory for new dquot */
        void (*destroy_dquot)(struct dquot *);                /* Free memory for dquot */
        int (*acquire_dquot) (struct dquot *);                /* Quota is going to be created on disk */
        int (*release_dquot) (struct dquot *);                /* Quota is going to be deleted from disk */
        int (*mark_dirty) (struct dquot *);                /* Dquot is marked dirty */
        int (*write_info) (struct super_block *, int);        /* Write of quota "superblock" */
        /* get reserved quota for delayed alloc, value returned is managed by
         * quota code only */
        qsize_t *(*get_reserved_space) (struct inode *);
        int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
        /* Get number of inodes that were charged for a given inode */
        int (*get_inode_usage) (struct inode *, qsize_t *);
        /* Get next ID with active quota structure */
        int (*get_next_id) (struct super_block *sb, struct kqid *qid);
};

struct path;

/* Structure for communicating via ->get_dqblk() & ->set_dqblk() */
struct qc_dqblk {
        int d_fieldmask;        /* mask of fields to change in ->set_dqblk() */
        u64 d_spc_hardlimit;        /* absolute limit on used space */
        u64 d_spc_softlimit;        /* preferred limit on used space */
        u64 d_ino_hardlimit;        /* maximum # allocated inodes */
        u64 d_ino_softlimit;        /* preferred inode limit */
        u64 d_space;                /* Space owned by the user */
        u64 d_ino_count;        /* # inodes owned by the user */
        s64 d_ino_timer;        /* zero if within inode limits */
                                /* if not, we refuse service */
        s64 d_spc_timer;        /* similar to above; for space */
        int d_ino_warns;        /* # warnings issued wrt num inodes */
        int d_spc_warns;        /* # warnings issued wrt used space */
        u64 d_rt_spc_hardlimit;        /* absolute limit on realtime space */
        u64 d_rt_spc_softlimit;        /* preferred limit on RT space */
        u64 d_rt_space;                /* realtime space owned */
        s64 d_rt_spc_timer;        /* similar to above; for RT space */
        int d_rt_spc_warns;        /* # warnings issued wrt RT space */
};

/*
 * Field specifiers for ->set_dqblk() in struct qc_dqblk and also for
 * ->set_info() in struct qc_info
 */
#define        QC_INO_SOFT        (1<<0)
#define        QC_INO_HARD        (1<<1)
#define        QC_SPC_SOFT        (1<<2)
#define        QC_SPC_HARD        (1<<3)
#define        QC_RT_SPC_SOFT        (1<<4)
#define        QC_RT_SPC_HARD        (1<<5)
#define QC_LIMIT_MASK (QC_INO_SOFT | QC_INO_HARD | QC_SPC_SOFT | QC_SPC_HARD | \
                       QC_RT_SPC_SOFT | QC_RT_SPC_HARD)
#define        QC_SPC_TIMER        (1<<6)
#define        QC_INO_TIMER        (1<<7)
#define        QC_RT_SPC_TIMER        (1<<8)
#define QC_TIMER_MASK (QC_SPC_TIMER | QC_INO_TIMER | QC_RT_SPC_TIMER)
#define        QC_SPC_WARNS        (1<<9)
#define        QC_INO_WARNS        (1<<10)
#define        QC_RT_SPC_WARNS        (1<<11)
#define QC_WARNS_MASK (QC_SPC_WARNS | QC_INO_WARNS | QC_RT_SPC_WARNS)
#define        QC_SPACE        (1<<12)
#define        QC_INO_COUNT        (1<<13)
#define        QC_RT_SPACE        (1<<14)
#define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE)
#define QC_FLAGS        (1<<15)

#define QCI_SYSFILE                (1 << 0)        /* Quota file is hidden from userspace */
#define QCI_ROOT_SQUASH                (1 << 1)        /* Root squash turned on */
#define QCI_ACCT_ENABLED        (1 << 2)        /* Quota accounting enabled */
#define QCI_LIMITS_ENFORCED        (1 << 3)        /* Quota limits enforced */

/* Structures for communicating via ->get_state */
struct qc_type_state {
        unsigned int flags;                /* Flags QCI_* */
        unsigned int spc_timelimit;        /* Time after which space softlimit is
                                         * enforced */
        unsigned int ino_timelimit;        /* Ditto for inode softlimit */
        unsigned int rt_spc_timelimit;        /* Ditto for real-time space */
        unsigned int spc_warnlimit;        /* Limit for number of space warnings */
        unsigned int ino_warnlimit;        /* Ditto for inodes */
        unsigned int rt_spc_warnlimit;        /* Ditto for real-time space */
        unsigned long long ino;                /* Inode number of quota file */
        blkcnt_t blocks;                /* Number of 512-byte blocks in the file */
        blkcnt_t nextents;                /* Number of extents in the file */
};

struct qc_state {
        unsigned int s_incoredqs;        /* Number of dquots in core */
        struct qc_type_state s_state[MAXQUOTAS];  /* Per quota type information */
};

/* Structure for communicating via ->set_info */
struct qc_info {
        int i_fieldmask;        /* mask of fields to change in ->set_info() */
        unsigned int i_flags;                /* Flags QCI_* */
        unsigned int i_spc_timelimit;        /* Time after which space softlimit is
                                         * enforced */
        unsigned int i_ino_timelimit;        /* Ditto for inode softlimit */
        unsigned int i_rt_spc_timelimit;/* Ditto for real-time space */
        unsigned int i_spc_warnlimit;        /* Limit for number of space warnings */
        unsigned int i_ino_warnlimit;        /* Limit for number of inode warnings */
        unsigned int i_rt_spc_warnlimit;        /* Ditto for real-time space */
};

/* Operations handling requests from userspace */
struct quotactl_ops {
        int (*quota_on)(struct super_block *, int, int, const struct path *);
        int (*quota_off)(struct super_block *, int);
        int (*quota_enable)(struct super_block *, unsigned int);
        int (*quota_disable)(struct super_block *, unsigned int);
        int (*quota_sync)(struct super_block *, int);
        int (*set_info)(struct super_block *, int, struct qc_info *);
        int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
        int (*get_nextdqblk)(struct super_block *, struct kqid *,
                             struct qc_dqblk *);
        int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
        int (*get_state)(struct super_block *, struct qc_state *);
        int (*rm_xquota)(struct super_block *, unsigned int);
};

struct quota_format_type {
        int qf_fmt_id;        /* Quota format id */
        const struct quota_format_ops *qf_ops;        /* Operations of format */
        struct module *qf_owner;                /* Module implementing quota format */
        struct quota_format_type *qf_next;
};

/**
 * Quota state flags - they come in three flavors - for users, groups and projects.
 *
 * Actual typed flags layout:
 *                                USRQUOTA        GRPQUOTA        PRJQUOTA
 *  DQUOT_USAGE_ENABLED                0x0001                0x0002                0x0004
 *  DQUOT_LIMITS_ENABLED        0x0008                0x0010                0x0020
 *  DQUOT_SUSPENDED                0x0040                0x0080                0x0100
 *
 * Following bits are used for non-typed flags:
 *  DQUOT_QUOTA_SYS_FILE        0x0200
 *  DQUOT_NEGATIVE_USAGE        0x0400
 *  DQUOT_NOLIST_DIRTY                0x0800
 */
enum {
        _DQUOT_USAGE_ENABLED = 0,                /* Track disk usage for users */
        _DQUOT_LIMITS_ENABLED,                        /* Enforce quota limits for users */
        _DQUOT_SUSPENDED,                        /* User diskquotas are off, but
                                                 * we have necessary info in
                                                 * memory to turn them on */
        _DQUOT_STATE_FLAGS
};
#define DQUOT_USAGE_ENABLED        (1 << _DQUOT_USAGE_ENABLED * MAXQUOTAS)
#define DQUOT_LIMITS_ENABLED        (1 << _DQUOT_LIMITS_ENABLED * MAXQUOTAS)
#define DQUOT_SUSPENDED                (1 << _DQUOT_SUSPENDED * MAXQUOTAS)
#define DQUOT_STATE_FLAGS        (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
                                 DQUOT_SUSPENDED)
/* Other quota flags */
#define DQUOT_STATE_LAST        (_DQUOT_STATE_FLAGS * MAXQUOTAS)
#define DQUOT_QUOTA_SYS_FILE        (1 << DQUOT_STATE_LAST)
                                                /* Quota file is a special
                                                 * system file and user cannot
                                                 * touch it. Filesystem is
                                                 * responsible for setting
                                                 * S_NOQUOTA, S_NOATIME flags
                                                 */
#define DQUOT_NEGATIVE_USAGE        (1 << (DQUOT_STATE_LAST + 1))
                                               /* Allow negative quota usage */
/* Do not track dirty dquots in a list */
#define DQUOT_NOLIST_DIRTY        (1 << (DQUOT_STATE_LAST + 2))

static inline unsigned int dquot_state_flag(unsigned int flags, int type)
{
        return flags << type;
}

static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
{
        return (flags >> type) & DQUOT_STATE_FLAGS;
}

/* Bitmap of quota types where flag is set in flags */
static __always_inline unsigned dquot_state_types(unsigned flags, unsigned flag)
{
        BUILD_BUG_ON_NOT_POWER_OF_2(flag);
        return (flags / flag) & ((1 << MAXQUOTAS) - 1);
}

#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
extern void quota_send_warning(struct kqid qid, dev_t dev,
                               const char warntype);
#else
static inline void quota_send_warning(struct kqid qid, dev_t dev,
                                      const char warntype)
{
        return;
}
#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */

struct quota_info {
        unsigned int flags;                        /* Flags for diskquotas on this device */
        struct rw_semaphore dqio_sem;                /* Lock quota file while I/O in progress */
        struct inode *files[MAXQUOTAS];                /* inodes of quotafiles */
        struct mem_dqinfo info[MAXQUOTAS];        /* Information for each quota type */
        const struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
};

int register_quota_format(struct quota_format_type *fmt);
void unregister_quota_format(struct quota_format_type *fmt);

struct quota_module_name {
        int qm_fmt_id;
        char *qm_mod_name;
};

#define INIT_QUOTA_MODULE_NAMES {\
        {QFMT_VFS_OLD, "quota_v1"},\
        {QFMT_VFS_V0, "quota_v2"},\
        {QFMT_VFS_V1, "quota_v2"},\
        {0, NULL}}

#endif /* _QUOTA_ */

























  251 










  251 







  252 




















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// SPDX-License-Identifier: GPL-2.0
/*
 * This file contains functions which manage high resolution tick
 * related events.
 *
 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
 */
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>

#include "tick-internal.h"

/**
 * tick_program_event - program the CPU local timer device for the next event
 */
int tick_program_event(ktime_t expires, int force)
{
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);

        if (unlikely(expires == KTIME_MAX)) {
                /*
                 * We don't need the clock event device any more, stop it.
                 */
                clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
                dev->next_event = KTIME_MAX;
                return 0;
        }

        if (unlikely(clockevent_state_oneshot_stopped(dev))) {
                /*
                 * We need the clock event again, configure it in ONESHOT mode
                 * before using it.
                 */
                clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
        }

        return clockevents_program_event(dev, expires, force);
}

/**
 * tick_resume_oneshot - resume oneshot mode
 */
void tick_resume_oneshot(void)
{
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);

        clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(dev, ktime_get(), true);
}

/**
 * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz)
 */
void tick_setup_oneshot(struct clock_event_device *newdev,
                        void (*handler)(struct clock_event_device *),
                        ktime_t next_event)
{
        newdev->event_handler = handler;
        clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(newdev, next_event, true);
}

/**
 * tick_switch_to_oneshot - switch to oneshot mode
 */
int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
{
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
        struct clock_event_device *dev = td->evtdev;

        if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||
                    !tick_device_is_functional(dev)) {

                pr_info("Clockevents: could not switch to one-shot mode:");
                if (!dev) {
                        pr_cont(" no tick device\n");
                } else {
                        if (!tick_device_is_functional(dev))
                                pr_cont(" %s is not functional.\n", dev->name);
                        else
                                pr_cont(" %s does not support one-shot mode.\n",
                                        dev->name);
                }
                return -EINVAL;
        }

        td->mode = TICKDEV_MODE_ONESHOT;
        dev->event_handler = handler;
        clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
        tick_broadcast_switch_to_oneshot();
        return 0;
}

/**
 * tick_oneshot_mode_active - check whether the system is in oneshot mode
 *
 * returns 1 when either nohz or highres are enabled. otherwise 0.
 */
int tick_oneshot_mode_active(void)
{
        unsigned long flags;
        int ret;

        local_irq_save(flags);
        ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT;
        local_irq_restore(flags);

        return ret;
}

#ifdef CONFIG_HIGH_RES_TIMERS
/**
 * tick_init_highres - switch to high resolution mode
 *
 * Called with interrupts disabled.
 */
int tick_init_highres(void)
{
        return tick_switch_to_oneshot(hrtimer_interrupt);
}
#endif

















































   30 











   20 

   19 































   29 


   28 









































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TIME64_H
#define _LINUX_TIME64_H

#include <linux/math64.h>
#include <vdso/time64.h>

typedef __s64 time64_t;
typedef __u64 timeu64_t;

#include <uapi/linux/time.h>

struct timespec64 {
        time64_t        tv_sec;                        /* seconds */
        long                tv_nsec;                /* nanoseconds */
};

struct itimerspec64 {
        struct timespec64 it_interval;
        struct timespec64 it_value;
};

/* Parameters used to convert the timespec values: */
#define PSEC_PER_NSEC                        1000L

/* Located here for timespec[64]_valid_strict */
#define TIME64_MAX                        ((s64)~((u64)1 << 63))
#define TIME64_MIN                        (-TIME64_MAX - 1)

#define KTIME_MAX                        ((s64)~((u64)1 << 63))
#define KTIME_MIN                        (-KTIME_MAX - 1)
#define KTIME_SEC_MAX                        (KTIME_MAX / NSEC_PER_SEC)
#define KTIME_SEC_MIN                        (KTIME_MIN / NSEC_PER_SEC)

/*
 * Limits for settimeofday():
 *
 * To prevent setting the time close to the wraparound point time setting
 * is limited so a reasonable uptime can be accomodated. Uptime of 30 years
 * should be really sufficient, which means the cutoff is 2232. At that
 * point the cutoff is just a small part of the larger problem.
 */
#define TIME_UPTIME_SEC_MAX                (30LL * 365 * 24 *3600)
#define TIME_SETTOD_SEC_MAX                (KTIME_SEC_MAX - TIME_UPTIME_SEC_MAX)

static inline int timespec64_equal(const struct timespec64 *a,
                                   const struct timespec64 *b)
{
        return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
}

/*
 * lhs < rhs:  return <0
 * lhs == rhs: return 0
 * lhs > rhs:  return >0
 */
static inline int timespec64_compare(const struct timespec64 *lhs, const struct timespec64 *rhs)
{
        if (lhs->tv_sec < rhs->tv_sec)
                return -1;
        if (lhs->tv_sec > rhs->tv_sec)
                return 1;
        return lhs->tv_nsec - rhs->tv_nsec;
}

extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec);

static inline struct timespec64 timespec64_add(struct timespec64 lhs,
                                                struct timespec64 rhs)
{
        struct timespec64 ts_delta;
        set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec,
                                lhs.tv_nsec + rhs.tv_nsec);
        return ts_delta;
}

/*
 * sub = lhs - rhs, in normalized form
 */
static inline struct timespec64 timespec64_sub(struct timespec64 lhs,
                                                struct timespec64 rhs)
{
        struct timespec64 ts_delta;
        set_normalized_timespec64(&ts_delta, lhs.tv_sec - rhs.tv_sec,
                                lhs.tv_nsec - rhs.tv_nsec);
        return ts_delta;
}

/*
 * Returns true if the timespec64 is norm, false if denorm:
 */
static inline bool timespec64_valid(const struct timespec64 *ts)
{
        /* Dates before 1970 are bogus */
        if (ts->tv_sec < 0)
                return false;
        /* Can't have more nanoseconds then a second */
        if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
                return false;
        return true;
}

static inline bool timespec64_valid_strict(const struct timespec64 *ts)
{
        if (!timespec64_valid(ts))
                return false;
        /* Disallow values that could overflow ktime_t */
        if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
                return false;
        return true;
}

static inline bool timespec64_valid_settod(const struct timespec64 *ts)
{
        if (!timespec64_valid(ts))
                return false;
        /* Disallow values which cause overflow issues vs. CLOCK_REALTIME */
        if ((unsigned long long)ts->tv_sec >= TIME_SETTOD_SEC_MAX)
                return false;
        return true;
}

/**
 * timespec64_to_ns - Convert timespec64 to nanoseconds
 * @ts:                pointer to the timespec64 variable to be converted
 *
 * Returns the scalar nanosecond representation of the timespec64
 * parameter.
 */
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
        /* Prevent multiplication overflow / underflow */
        if (ts->tv_sec >= KTIME_SEC_MAX)
                return KTIME_MAX;

        if (ts->tv_sec <= KTIME_SEC_MIN)
                return KTIME_MIN;

        return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}

/**
 * ns_to_timespec64 - Convert nanoseconds to timespec64
 * @nsec:        the nanoseconds value to be converted
 *
 * Returns the timespec64 representation of the nsec parameter.
 */
extern struct timespec64 ns_to_timespec64(s64 nsec);

/**
 * timespec64_add_ns - Adds nanoseconds to a timespec64
 * @a:                pointer to timespec64 to be incremented
 * @ns:                unsigned nanoseconds value to be added
 *
 * This must always be inlined because its used from the x86-64 vdso,
 * which cannot call other kernel functions.
 */
static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns)
{
        a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
        a->tv_nsec = ns;
}

/*
 * timespec64_add_safe assumes both values are positive and checks for
 * overflow. It will return TIME64_MAX in case of overflow.
 */
extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
                                         const struct timespec64 rhs);

#endif /* _LINUX_TIME64_H */
























































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCATTERLIST_H
#define _LINUX_SCATTERLIST_H

#include <linux/string.h>
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <asm/io.h>

struct scatterlist {
        unsigned long        page_link;
        unsigned int        offset;
        unsigned int        length;
        dma_addr_t        dma_address;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
        unsigned int        dma_length;
#endif
#ifdef CONFIG_NEED_SG_DMA_FLAGS
        unsigned int    dma_flags;
#endif
};

/*
 * These macros should be used after a dma_map_sg call has been done
 * to get bus addresses of each of the SG entries and their lengths.
 * You should only work with the number of sg entries dma_map_sg
 * returns, or alternatively stop on the first sg_dma_len(sg) which
 * is 0.
 */
#define sg_dma_address(sg)        ((sg)->dma_address)

#ifdef CONFIG_NEED_SG_DMA_LENGTH
#define sg_dma_len(sg)                ((sg)->dma_length)
#else
#define sg_dma_len(sg)                ((sg)->length)
#endif

struct sg_table {
        struct scatterlist *sgl;        /* the list */
        unsigned int nents;                /* number of mapped entries */
        unsigned int orig_nents;        /* original size of list */
};

struct sg_append_table {
        struct sg_table sgt;                /* The scatter list table */
        struct scatterlist *prv;        /* last populated sge in the table */
        unsigned int total_nents;        /* Total entries in the table */
};

/*
 * Notes on SG table design.
 *
 * We use the unsigned long page_link field in the scatterlist struct to place
 * the page pointer AND encode information about the sg table as well. The two
 * lower bits are reserved for this information.
 *
 * If bit 0 is set, then the page_link contains a pointer to the next sg
 * table list. Otherwise the next entry is at sg + 1.
 *
 * If bit 1 is set, then this sg entry is the last element in a list.
 *
 * See sg_next().
 *
 */

#define SG_CHAIN        0x01UL
#define SG_END                0x02UL

/*
 * We overload the LSB of the page pointer to indicate whether it's
 * a valid sg entry, or whether it points to the start of a new scatterlist.
 * Those low bits are there for everyone! (thanks mason :-)
 */
#define SG_PAGE_LINK_MASK (SG_CHAIN | SG_END)

static inline unsigned int __sg_flags(struct scatterlist *sg)
{
        return sg->page_link & SG_PAGE_LINK_MASK;
}

static inline struct scatterlist *sg_chain_ptr(struct scatterlist *sg)
{
        return (struct scatterlist *)(sg->page_link & ~SG_PAGE_LINK_MASK);
}

static inline bool sg_is_chain(struct scatterlist *sg)
{
        return __sg_flags(sg) & SG_CHAIN;
}

static inline bool sg_is_last(struct scatterlist *sg)
{
        return __sg_flags(sg) & SG_END;
}

/**
 * sg_assign_page - Assign a given page to an SG entry
 * @sg:                    SG entry
 * @page:            The page
 *
 * Description:
 *   Assign page to sg entry. Also see sg_set_page(), the most commonly used
 *   variant.
 *
 **/
static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
{
        unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);

        /*
         * In order for the low bit stealing approach to work, pages
         * must be aligned at a 32-bit boundary as a minimum.
         */
        BUG_ON((unsigned long)page & SG_PAGE_LINK_MASK);
#ifdef CONFIG_DEBUG_SG
        BUG_ON(sg_is_chain(sg));
#endif
        sg->page_link = page_link | (unsigned long) page;
}

/**
 * sg_set_page - Set sg entry to point at given page
 * @sg:                 SG entry
 * @page:         The page
 * @len:         Length of data
 * @offset:         Offset into page
 *
 * Description:
 *   Use this function to set an sg entry pointing at a page, never assign
 *   the page directly. We encode sg table information in the lower bits
 *   of the page pointer. See sg_page() for looking up the page belonging
 *   to an sg entry.
 *
 **/
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
                               unsigned int len, unsigned int offset)
{
        sg_assign_page(sg, page);
        sg->offset = offset;
        sg->length = len;
}

/**
 * sg_set_folio - Set sg entry to point at given folio
 * @sg:                 SG entry
 * @folio:         The folio
 * @len:         Length of data
 * @offset:         Offset into folio
 *
 * Description:
 *   Use this function to set an sg entry pointing at a folio, never assign
 *   the folio directly. We encode sg table information in the lower bits
 *   of the folio pointer. See sg_page() for looking up the page belonging
 *   to an sg entry.
 *
 **/
static inline void sg_set_folio(struct scatterlist *sg, struct folio *folio,
                               size_t len, size_t offset)
{
        WARN_ON_ONCE(len > UINT_MAX);
        WARN_ON_ONCE(offset > UINT_MAX);
        sg_assign_page(sg, &folio->page);
        sg->offset = offset;
        sg->length = len;
}

static inline struct page *sg_page(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
        BUG_ON(sg_is_chain(sg));
#endif
        return (struct page *)((sg)->page_link & ~SG_PAGE_LINK_MASK);
}

/**
 * sg_set_buf - Set sg entry to point at given data
 * @sg:                 SG entry
 * @buf:         Data
 * @buflen:         Data length
 *
 **/
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
                              unsigned int buflen)
{
#ifdef CONFIG_DEBUG_SG
        BUG_ON(!virt_addr_valid(buf));
#endif
        sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}

/*
 * Loop over each sg element, following the pointer to a new list if necessary
 */
#define for_each_sg(sglist, sg, nr, __i)        \
        for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))

/*
 * Loop over each sg element in the given sg_table object.
 */
#define for_each_sgtable_sg(sgt, sg, i)                \
        for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i)

/*
 * Loop over each sg element in the given *DMA mapped* sg_table object.
 * Please use sg_dma_address(sg) and sg_dma_len(sg) to extract DMA addresses
 * of the each element.
 */
#define for_each_sgtable_dma_sg(sgt, sg, i)        \
        for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)

static inline void __sg_chain(struct scatterlist *chain_sg,
                              struct scatterlist *sgl)
{
        /*
         * offset and length are unused for chain entry. Clear them.
         */
        chain_sg->offset = 0;
        chain_sg->length = 0;

        /*
         * Set lowest bit to indicate a link pointer, and make sure to clear
         * the termination bit if it happens to be set.
         */
        chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
}

/**
 * sg_chain - Chain two sglists together
 * @prv:        First scatterlist
 * @prv_nents:        Number of entries in prv
 * @sgl:        Second scatterlist
 *
 * Description:
 *   Links @prv@ and @sgl@ together, to form a longer scatterlist.
 *
 **/
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
                            struct scatterlist *sgl)
{
        __sg_chain(&prv[prv_nents - 1], sgl);
}

/**
 * sg_mark_end - Mark the end of the scatterlist
 * @sg:                 SG entryScatterlist
 *
 * Description:
 *   Marks the passed in sg entry as the termination point for the sg
 *   table. A call to sg_next() on this entry will return NULL.
 *
 **/
static inline void sg_mark_end(struct scatterlist *sg)
{
        /*
         * Set termination bit, clear potential chain bit
         */
        sg->page_link |= SG_END;
        sg->page_link &= ~SG_CHAIN;
}

/**
 * sg_unmark_end - Undo setting the end of the scatterlist
 * @sg:                 SG entryScatterlist
 *
 * Description:
 *   Removes the termination marker from the given entry of the scatterlist.
 *
 **/
static inline void sg_unmark_end(struct scatterlist *sg)
{
        sg->page_link &= ~SG_END;
}

/*
 * One 64-bit architectures there is a 4-byte padding in struct scatterlist
 * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA
 * flags bits to indicate when a specific dma address is a bus address or the
 * buffer may have been bounced via SWIOTLB.
 */
#ifdef CONFIG_NEED_SG_DMA_FLAGS

#define SG_DMA_BUS_ADDRESS        (1 << 0)
#define SG_DMA_SWIOTLB                (1 << 1)

/**
 * sg_dma_is_bus_address - Return whether a given segment was marked
 *                           as a bus address
 * @sg:                 SG entry
 *
 * Description:
 *   Returns true if sg_dma_mark_bus_address() has been called on
 *   this segment.
 **/
static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
{
        return sg->dma_flags & SG_DMA_BUS_ADDRESS;
}

/**
 * sg_dma_mark_bus_address - Mark the scatterlist entry as a bus address
 * @sg:                 SG entry
 *
 * Description:
 *   Marks the passed in sg entry to indicate that the dma_address is
 *   a bus address and doesn't need to be unmapped. This should only be
 *   used by dma_map_sg() implementations to mark bus addresses
 *   so they can be properly cleaned up in dma_unmap_sg().
 **/
static inline void sg_dma_mark_bus_address(struct scatterlist *sg)
{
        sg->dma_flags |= SG_DMA_BUS_ADDRESS;
}

/**
 * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address
 * @sg:                 SG entry
 *
 * Description:
 *   Clears the bus address mark.
 **/
static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
{
        sg->dma_flags &= ~SG_DMA_BUS_ADDRESS;
}

/**
 * sg_dma_is_swiotlb - Return whether the scatterlist was marked for SWIOTLB
 *                        bouncing
 * @sg:                SG entry
 *
 * Description:
 *   Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
 *   elements may have been bounced, so the caller would have to check
 *   individual SG entries with is_swiotlb_buffer().
 */
static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
{
        return sg->dma_flags & SG_DMA_SWIOTLB;
}

/**
 * sg_dma_mark_swiotlb - Mark the scatterlist for SWIOTLB bouncing
 * @sg:                SG entry
 *
 * Description:
 *   Marks a a scatterlist for SWIOTLB bounce. Not all SG entries may be
 *   bounced.
 */
static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
{
        sg->dma_flags |= SG_DMA_SWIOTLB;
}

#else

static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
{
        return false;
}
static inline void sg_dma_mark_bus_address(struct scatterlist *sg)
{
}
static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
{
}
static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
{
        return false;
}
static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
{
}

#endif        /* CONFIG_NEED_SG_DMA_FLAGS */

/**
 * sg_phys - Return physical address of an sg entry
 * @sg:             SG entry
 *
 * Description:
 *   This calls page_to_phys() on the page in this sg entry, and adds the
 *   sg offset. The caller must know that it is legal to call page_to_phys()
 *   on the sg page.
 *
 **/
static inline dma_addr_t sg_phys(struct scatterlist *sg)
{
        return page_to_phys(sg_page(sg)) + sg->offset;
}

/**
 * sg_virt - Return virtual address of an sg entry
 * @sg:      SG entry
 *
 * Description:
 *   This calls page_address() on the page in this sg entry, and adds the
 *   sg offset. The caller must know that the sg page has a valid virtual
 *   mapping.
 *
 **/
static inline void *sg_virt(struct scatterlist *sg)
{
        return page_address(sg_page(sg)) + sg->offset;
}

/**
 * sg_init_marker - Initialize markers in sg table
 * @sgl:           The SG table
 * @nents:           Number of entries in table
 *
 **/
static inline void sg_init_marker(struct scatterlist *sgl,
                                  unsigned int nents)
{
        sg_mark_end(&sgl[nents - 1]);
}

int sg_nents(struct scatterlist *sg);
int sg_nents_for_len(struct scatterlist *sg, u64 len);
struct scatterlist *sg_next(struct scatterlist *);
struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
void sg_init_table(struct scatterlist *, unsigned int);
void sg_init_one(struct scatterlist *, const void *, unsigned int);
int sg_split(struct scatterlist *in, const int in_mapped_nents,
             const off_t skip, const int nb_splits,
             const size_t *split_sizes,
             struct scatterlist **out, int *out_mapped_nents,
             gfp_t gfp_mask);

typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
typedef void (sg_free_fn)(struct scatterlist *, unsigned int);

void __sg_free_table(struct sg_table *, unsigned int, unsigned int,
                     sg_free_fn *, unsigned int);
void sg_free_table(struct sg_table *);
void sg_free_append_table(struct sg_append_table *sgt);
int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
                     struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
int sg_alloc_append_table_from_pages(struct sg_append_table *sgt,
                                     struct page **pages, unsigned int n_pages,
                                     unsigned int offset, unsigned long size,
                                     unsigned int max_segment,
                                     unsigned int left_pages, gfp_t gfp_mask);
int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
                                      unsigned int n_pages, unsigned int offset,
                                      unsigned long size,
                                      unsigned int max_segment, gfp_t gfp_mask);

/**
 * sg_alloc_table_from_pages - Allocate and initialize an sg table from
 *                               an array of pages
 * @sgt:         The sg table header to use
 * @pages:         Pointer to an array of page pointers
 * @n_pages:         Number of pages in the pages array
 * @offset:      Offset from start of the first page to the start of a buffer
 * @size:        Number of valid bytes in the buffer (after offset)
 * @gfp_mask:         GFP allocation mask
 *
 *  Description:
 *    Allocate and initialize an sg table from a list of pages. Contiguous
 *    ranges of the pages are squashed into a single scatterlist node. A user
 *    may provide an offset at a start and a size of valid data in a buffer
 *    specified by the page array. The returned sg table is released by
 *    sg_free_table.
 *
 * Returns:
 *   0 on success, negative error on failure
 */
static inline int sg_alloc_table_from_pages(struct sg_table *sgt,
                                            struct page **pages,
                                            unsigned int n_pages,
                                            unsigned int offset,
                                            unsigned long size, gfp_t gfp_mask)
{
        return sg_alloc_table_from_pages_segment(sgt, pages, n_pages, offset,
                                                 size, UINT_MAX, gfp_mask);
}

#ifdef CONFIG_SGL_ALLOC
struct scatterlist *sgl_alloc_order(unsigned long long length,
                                    unsigned int order, bool chainable,
                                    gfp_t gfp, unsigned int *nent_p);
struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
                              unsigned int *nent_p);
void sgl_free_n_order(struct scatterlist *sgl, int nents, int order);
void sgl_free_order(struct scatterlist *sgl, int order);
void sgl_free(struct scatterlist *sgl);
#endif /* CONFIG_SGL_ALLOC */

size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
                      size_t buflen, off_t skip, bool to_buffer);

size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
                           const void *buf, size_t buflen);
size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
                         void *buf, size_t buflen);

size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
                            const void *buf, size_t buflen, off_t skip);
size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
                          void *buf, size_t buflen, off_t skip);
size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
                       size_t buflen, off_t skip);

/*
 * Maximum number of entries that will be allocated in one piece, if
 * a list larger than this is required then chaining will be utilized.
 */
#define SG_MAX_SINGLE_ALLOC                (PAGE_SIZE / sizeof(struct scatterlist))

/*
 * The maximum number of SG segments that we will put inside a
 * scatterlist (unless chaining is used). Should ideally fit inside a
 * single page, to avoid a higher order allocation.  We could define this
 * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order.  The
 * minimum value is 32
 */
#define SG_CHUNK_SIZE        128

/*
 * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit
 * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
 */
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define SG_MAX_SEGMENTS        SG_CHUNK_SIZE
#else
#define SG_MAX_SEGMENTS        2048
#endif

#ifdef CONFIG_SG_POOL
void sg_free_table_chained(struct sg_table *table,
                           unsigned nents_first_chunk);
int sg_alloc_table_chained(struct sg_table *table, int nents,
                           struct scatterlist *first_chunk,
                           unsigned nents_first_chunk);
#endif

/*
 * sg page iterator
 *
 * Iterates over sg entries page-by-page.  On each successful iteration, you
 * can call sg_page_iter_page(@piter) to get the current page.
 * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to
 * the page's page offset within the sg. The iteration will stop either when a
 * maximum number of sg entries was reached or a terminating sg
 * (sg_last(sg) == true) was reached.
 */
struct sg_page_iter {
        struct scatterlist        *sg;                /* sg holding the page */
        unsigned int                sg_pgoffset;        /* page offset within the sg */

        /* these are internal states, keep away */
        unsigned int                __nents;        /* remaining sg entries */
        int                        __pg_advance;        /* nr pages to advance at the
                                                 * next step */
};

/*
 * sg page iterator for DMA addresses
 *
 * This is the same as sg_page_iter however you can call
 * sg_page_iter_dma_address(@dma_iter) to get the page's DMA
 * address. sg_page_iter_page() cannot be called on this iterator.
 */
struct sg_dma_page_iter {
        struct sg_page_iter base;
};

bool __sg_page_iter_next(struct sg_page_iter *piter);
bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
void __sg_page_iter_start(struct sg_page_iter *piter,
                          struct scatterlist *sglist, unsigned int nents,
                          unsigned long pgoffset);
/**
 * sg_page_iter_page - get the current page held by the page iterator
 * @piter:        page iterator holding the page
 */
static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
{
        return nth_page(sg_page(piter->sg), piter->sg_pgoffset);
}

/**
 * sg_page_iter_dma_address - get the dma address of the current page held by
 * the page iterator.
 * @dma_iter:        page iterator holding the page
 */
static inline dma_addr_t
sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter)
{
        return sg_dma_address(dma_iter->base.sg) +
               (dma_iter->base.sg_pgoffset << PAGE_SHIFT);
}

/**
 * for_each_sg_page - iterate over the pages of the given sg list
 * @sglist:        sglist to iterate over
 * @piter:        page iterator to hold current page, sg, sg_pgoffset
 * @nents:        maximum number of sg entries to iterate over
 * @pgoffset:        starting page offset (in pages)
 *
 * Callers may use sg_page_iter_page() to get each page pointer.
 * In each loop it operates on PAGE_SIZE unit.
 */
#define for_each_sg_page(sglist, piter, nents, pgoffset)                   \
        for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
             __sg_page_iter_next(piter);)

/**
 * for_each_sg_dma_page - iterate over the pages of the given sg list
 * @sglist:        sglist to iterate over
 * @dma_iter:        DMA page iterator to hold current page
 * @dma_nents:        maximum number of sg entries to iterate over, this is the value
 *              returned from dma_map_sg
 * @pgoffset:        starting page offset (in pages)
 *
 * Callers may use sg_page_iter_dma_address() to get each page's DMA address.
 * In each loop it operates on PAGE_SIZE unit.
 */
#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset)            \
        for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents,        \
                                  pgoffset);                                   \
             __sg_page_iter_dma_next(dma_iter);)

/**
 * for_each_sgtable_page - iterate over all pages in the sg_table object
 * @sgt:        sg_table object to iterate over
 * @piter:        page iterator to hold current page
 * @pgoffset:        starting page offset (in pages)
 *
 * Iterates over the all memory pages in the buffer described by
 * a scatterlist stored in the given sg_table object.
 * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit.
 */
#define for_each_sgtable_page(sgt, piter, pgoffset)        \
        for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset)

/**
 * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object
 * @sgt:        sg_table object to iterate over
 * @dma_iter:        DMA page iterator to hold current page
 * @pgoffset:        starting page offset (in pages)
 *
 * Iterates over the all DMA mapped pages in the buffer described by
 * a scatterlist stored in the given sg_table object.
 * See also for_each_sg_dma_page(). In each loop it operates on PAGE_SIZE
 * unit.
 */
#define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset)        \
        for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset)


/*
 * Mapping sg iterator
 *
 * Iterates over sg entries mapping page-by-page.  On each successful
 * iteration, @miter->page points to the mapped page and
 * @miter->length bytes of data can be accessed at @miter->addr.  As
 * long as an iteration is enclosed between start and stop, the user
 * is free to choose control structure and when to stop.
 *
 * @miter->consumed is set to @miter->length on each iteration.  It
 * can be adjusted if the user can't consume all the bytes in one go.
 * Also, a stopped iteration can be resumed by calling next on it.
 * This is useful when iteration needs to release all resources and
 * continue later (e.g. at the next interrupt).
 */

#define SG_MITER_ATOMIC                (1 << 0)         /* use kmap_atomic */
#define SG_MITER_TO_SG                (1 << 1)        /* flush back to phys on unmap */
#define SG_MITER_FROM_SG        (1 << 2)        /* nop */

struct sg_mapping_iter {
        /* the following three fields can be accessed directly */
        struct page                *page;                /* currently mapped page */
        void                        *addr;                /* pointer to the mapped area */
        size_t                        length;                /* length of the mapped area */
        size_t                        consumed;        /* number of consumed bytes */
        struct sg_page_iter        piter;                /* page iterator */

        /* these are internal states, keep away */
        unsigned int                __offset;        /* offset within page */
        unsigned int                __remaining;        /* remaining bytes on page */
        unsigned int                __flags;
};

void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
                    unsigned int nents, unsigned int flags);
bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset);
bool sg_miter_next(struct sg_mapping_iter *miter);
void sg_miter_stop(struct sg_mapping_iter *miter);

#endif /* _LINUX_SCATTERLIST_H */






































































































































































































































































    1 
















































































































































































































































































































































































































































































































































































































































































































































































































   61 





















































































































































































































    1 



    1 

































































   73 





   63 






   63 































































    6 

    6 























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Landlock LSM - Filesystem management and hooks
 *
 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
 * Copyright © 2018-2020 ANSSI
 * Copyright © 2021-2022 Microsoft Corporation
 */

#include <kunit/test.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bits.h>
#include <linux/compiler_types.h>
#include <linux/dcache.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/lsm_hooks.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/path.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/types.h>
#include <linux/wait_bit.h>
#include <linux/workqueue.h>
#include <uapi/linux/landlock.h>

#include "common.h"
#include "cred.h"
#include "fs.h"
#include "limits.h"
#include "object.h"
#include "ruleset.h"
#include "setup.h"

/* Underlying object management */

static void release_inode(struct landlock_object *const object)
        __releases(object->lock)
{
        struct inode *const inode = object->underobj;
        struct super_block *sb;

        if (!inode) {
                spin_unlock(&object->lock);
                return;
        }

        /*
         * Protects against concurrent use by hook_sb_delete() of the reference
         * to the underlying inode.
         */
        object->underobj = NULL;
        /*
         * Makes sure that if the filesystem is concurrently unmounted,
         * hook_sb_delete() will wait for us to finish iput().
         */
        sb = inode->i_sb;
        atomic_long_inc(&landlock_superblock(sb)->inode_refs);
        spin_unlock(&object->lock);
        /*
         * Because object->underobj was not NULL, hook_sb_delete() and
         * get_inode_object() guarantee that it is safe to reset
         * landlock_inode(inode)->object while it is not NULL.  It is therefore
         * not necessary to lock inode->i_lock.
         */
        rcu_assign_pointer(landlock_inode(inode)->object, NULL);
        /*
         * Now, new rules can safely be tied to @inode with get_inode_object().
         */

        iput(inode);
        if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
                wake_up_var(&landlock_superblock(sb)->inode_refs);
}

static const struct landlock_object_underops landlock_fs_underops = {
        .release = release_inode
};

/* Ruleset management */

static struct landlock_object *get_inode_object(struct inode *const inode)
{
        struct landlock_object *object, *new_object;
        struct landlock_inode_security *inode_sec = landlock_inode(inode);

        rcu_read_lock();
retry:
        object = rcu_dereference(inode_sec->object);
        if (object) {
                if (likely(refcount_inc_not_zero(&object->usage))) {
                        rcu_read_unlock();
                        return object;
                }
                /*
                 * We are racing with release_inode(), the object is going
                 * away.  Wait for release_inode(), then retry.
                 */
                spin_lock(&object->lock);
                spin_unlock(&object->lock);
                goto retry;
        }
        rcu_read_unlock();

        /*
         * If there is no object tied to @inode, then create a new one (without
         * holding any locks).
         */
        new_object = landlock_create_object(&landlock_fs_underops, inode);
        if (IS_ERR(new_object))
                return new_object;

        /*
         * Protects against concurrent calls to get_inode_object() or
         * hook_sb_delete().
         */
        spin_lock(&inode->i_lock);
        if (unlikely(rcu_access_pointer(inode_sec->object))) {
                /* Someone else just created the object, bail out and retry. */
                spin_unlock(&inode->i_lock);
                kfree(new_object);

                rcu_read_lock();
                goto retry;
        }

        /*
         * @inode will be released by hook_sb_delete() on its superblock
         * shutdown, or by release_inode() when no more ruleset references the
         * related object.
         */
        ihold(inode);
        rcu_assign_pointer(inode_sec->object, new_object);
        spin_unlock(&inode->i_lock);
        return new_object;
}

/* All access rights that can be tied to files. */
/* clang-format off */
#define ACCESS_FILE ( \
        LANDLOCK_ACCESS_FS_EXECUTE | \
        LANDLOCK_ACCESS_FS_WRITE_FILE | \
        LANDLOCK_ACCESS_FS_READ_FILE | \
        LANDLOCK_ACCESS_FS_TRUNCATE)
/* clang-format on */

/*
 * @path: Should have been checked by get_path_from_fd().
 */
int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
                            const struct path *const path,
                            access_mask_t access_rights)
{
        int err;
        struct landlock_id id = {
                .type = LANDLOCK_KEY_INODE,
        };

        /* Files only get access rights that make sense. */
        if (!d_is_dir(path->dentry) &&
            (access_rights | ACCESS_FILE) != ACCESS_FILE)
                return -EINVAL;
        if (WARN_ON_ONCE(ruleset->num_layers != 1))
                return -EINVAL;

        /* Transforms relative access rights to absolute ones. */
        access_rights |= LANDLOCK_MASK_ACCESS_FS &
                         ~landlock_get_fs_access_mask(ruleset, 0);
        id.key.object = get_inode_object(d_backing_inode(path->dentry));
        if (IS_ERR(id.key.object))
                return PTR_ERR(id.key.object);
        mutex_lock(&ruleset->lock);
        err = landlock_insert_rule(ruleset, id, access_rights);
        mutex_unlock(&ruleset->lock);
        /*
         * No need to check for an error because landlock_insert_rule()
         * increments the refcount for the new object if needed.
         */
        landlock_put_object(id.key.object);
        return err;
}

/* Access-control management */

/*
 * The lifetime of the returned rule is tied to @domain.
 *
 * Returns NULL if no rule is found or if @dentry is negative.
 */
static const struct landlock_rule *
find_rule(const struct landlock_ruleset *const domain,
          const struct dentry *const dentry)
{
        const struct landlock_rule *rule;
        const struct inode *inode;
        struct landlock_id id = {
                .type = LANDLOCK_KEY_INODE,
        };

        /* Ignores nonexistent leafs. */
        if (d_is_negative(dentry))
                return NULL;

        inode = d_backing_inode(dentry);
        rcu_read_lock();
        id.key.object = rcu_dereference(landlock_inode(inode)->object);
        rule = landlock_find_rule(domain, id);
        rcu_read_unlock();
        return rule;
}

/*
 * Allows access to pseudo filesystems that will never be mountable (e.g.
 * sockfs, pipefs), but can still be reachable through
 * /proc/<pid>/fd/<file-descriptor>
 */
static bool is_nouser_or_private(const struct dentry *dentry)
{
        return (dentry->d_sb->s_flags & SB_NOUSER) ||
               (d_is_positive(dentry) &&
                unlikely(IS_PRIVATE(d_backing_inode(dentry))));
}

static access_mask_t
get_raw_handled_fs_accesses(const struct landlock_ruleset *const domain)
{
        access_mask_t access_dom = 0;
        size_t layer_level;

        for (layer_level = 0; layer_level < domain->num_layers; layer_level++)
                access_dom |=
                        landlock_get_raw_fs_access_mask(domain, layer_level);
        return access_dom;
}

static access_mask_t
get_handled_fs_accesses(const struct landlock_ruleset *const domain)
{
        /* Handles all initially denied by default access rights. */
        return get_raw_handled_fs_accesses(domain) |
               LANDLOCK_ACCESS_FS_INITIALLY_DENIED;
}

static const struct landlock_ruleset *
get_fs_domain(const struct landlock_ruleset *const domain)
{
        if (!domain || !get_raw_handled_fs_accesses(domain))
                return NULL;

        return domain;
}

static const struct landlock_ruleset *get_current_fs_domain(void)
{
        return get_fs_domain(landlock_get_current_domain());
}

/*
 * Check that a destination file hierarchy has more restrictions than a source
 * file hierarchy.  This is only used for link and rename actions.
 *
 * @layer_masks_child2: Optional child masks.
 */
static bool no_more_access(
        const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
        const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
        const bool child1_is_directory,
        const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
        const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
        const bool child2_is_directory)
{
        unsigned long access_bit;

        for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
             access_bit++) {
                /* Ignores accesses that only make sense for directories. */
                const bool is_file_access =
                        !!(BIT_ULL(access_bit) & ACCESS_FILE);

                if (child1_is_directory || is_file_access) {
                        /*
                         * Checks if the destination restrictions are a
                         * superset of the source ones (i.e. inherited access
                         * rights without child exceptions):
                         * restrictions(parent2) >= restrictions(child1)
                         */
                        if ((((*layer_masks_parent1)[access_bit] &
                              (*layer_masks_child1)[access_bit]) |
                             (*layer_masks_parent2)[access_bit]) !=
                            (*layer_masks_parent2)[access_bit])
                                return false;
                }

                if (!layer_masks_child2)
                        continue;
                if (child2_is_directory || is_file_access) {
                        /*
                         * Checks inverted restrictions for RENAME_EXCHANGE:
                         * restrictions(parent1) >= restrictions(child2)
                         */
                        if ((((*layer_masks_parent2)[access_bit] &
                              (*layer_masks_child2)[access_bit]) |
                             (*layer_masks_parent1)[access_bit]) !=
                            (*layer_masks_parent1)[access_bit])
                                return false;
                }
        }
        return true;
}

#define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__))
#define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__))

#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST

static void test_no_more_access(struct kunit *const test)
{
        const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
                [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0),
        };
        const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
                [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0),
        };
        const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
        };
        const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1),
        };
        const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) |
                                                          BIT_ULL(1),
        };
        const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {};

        /* Checks without restriction. */
        NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false);
        NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false);
        NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false);

        /*
         * Checks that we can only refer a file if no more access could be
         * inherited.
         */
        NMA_TRUE(&x0, &x0, false, &rx0, NULL, false);
        NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false);
        NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false);
        NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false);

        /* Checks allowed referring with different nested domains. */
        NMA_TRUE(&x0, &x1, false, &x0, NULL, false);
        NMA_TRUE(&x1, &x0, false, &x0, NULL, false);
        NMA_TRUE(&x0, &x01, false, &x0, NULL, false);
        NMA_TRUE(&x0, &x01, false, &rx0, NULL, false);
        NMA_TRUE(&x01, &x0, false, &x0, NULL, false);
        NMA_TRUE(&x01, &x0, false, &rx0, NULL, false);
        NMA_FALSE(&x01, &x01, false, &x0, NULL, false);

        /* Checks that file access rights are also enforced for a directory. */
        NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false);

        /* Checks that directory access rights don't impact file referring... */
        NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false);
        /* ...but only directory referring. */
        NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false);

        /* Checks directory exchange. */
        NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true);
        NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true);
        NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true);
        NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true);
        NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true);

        /* Checks file exchange with directory access rights... */
        NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false);
        NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false);
        NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false);
        NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false);
        /* ...and with file access rights. */
        NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false);
        NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false);
        NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false);
        NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false);
        NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false);

        /*
         * Allowing the following requests should not be a security risk
         * because domain 0 denies execute access, and domain 1 is always
         * nested with domain 0.  However, adding an exception for this case
         * would mean to check all nested domains to make sure none can get
         * more privileges (e.g. processes only sandboxed by domain 0).
         * Moreover, this behavior (i.e. composition of N domains) could then
         * be inconsistent compared to domain 1's ruleset alone (e.g. it might
         * be denied to link/rename with domain 1's ruleset, whereas it would
         * be allowed if nested on top of domain 0).  Another drawback would be
         * to create a cover channel that could enable sandboxed processes to
         * infer most of the filesystem restrictions from their domain.  To
         * make it simple, efficient, safe, and more consistent, this case is
         * always denied.
         */
        NMA_FALSE(&x1, &x1, false, &x0, NULL, false);
        NMA_FALSE(&x1, &x1, false, &rx0, NULL, false);
        NMA_FALSE(&x1, &x1, true, &x0, NULL, false);
        NMA_FALSE(&x1, &x1, true, &rx0, NULL, false);

        /* Checks the same case of exclusive domains with a file... */
        NMA_TRUE(&x1, &x1, false, &x01, NULL, false);
        NMA_FALSE(&x1, &x1, false, &x01, &x0, false);
        NMA_FALSE(&x1, &x1, false, &x01, &x01, false);
        NMA_FALSE(&x1, &x1, false, &x0, &x0, false);
        /* ...and with a directory. */
        NMA_FALSE(&x1, &x1, false, &x0, &x0, true);
        NMA_FALSE(&x1, &x1, true, &x0, &x0, false);
        NMA_FALSE(&x1, &x1, true, &x0, &x0, true);
}

#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */

#undef NMA_TRUE
#undef NMA_FALSE

/*
 * Removes @layer_masks accesses that are not requested.
 *
 * Returns true if the request is allowed, false otherwise.
 */
static bool
scope_to_request(const access_mask_t access_request,
                 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
{
        const unsigned long access_req = access_request;
        unsigned long access_bit;

        if (WARN_ON_ONCE(!layer_masks))
                return true;

        for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
                (*layer_masks)[access_bit] = 0;
        return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
}

#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST

static void test_scope_to_request_with_exec_none(struct kunit *const test)
{
        /* Allows everything. */
        layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};

        /* Checks and scopes with execute. */
        KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE,
                                                 &layer_masks));
        KUNIT_EXPECT_EQ(test, 0,
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
        KUNIT_EXPECT_EQ(test, 0,
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
}

static void test_scope_to_request_with_exec_some(struct kunit *const test)
{
        /* Denies execute and write. */
        layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
                [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1),
        };

        /* Checks and scopes with execute. */
        KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE,
                                                  &layer_masks));
        KUNIT_EXPECT_EQ(test, BIT_ULL(0),
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
        KUNIT_EXPECT_EQ(test, 0,
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
}

static void test_scope_to_request_without_access(struct kunit *const test)
{
        /* Denies execute and write. */
        layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0),
                [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1),
        };

        /* Checks and scopes without access request. */
        KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks));
        KUNIT_EXPECT_EQ(test, 0,
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
        KUNIT_EXPECT_EQ(test, 0,
                        layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
}

#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */

/*
 * Returns true if there is at least one access right different than
 * LANDLOCK_ACCESS_FS_REFER.
 */
static bool
is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
          const access_mask_t access_request)
{
        unsigned long access_bit;
        /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
        const unsigned long access_check = access_request &
                                           ~LANDLOCK_ACCESS_FS_REFER;

        if (!layer_masks)
                return false;

        for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
                if ((*layer_masks)[access_bit])
                        return true;
        }
        return false;
}

#define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__))
#define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__))

#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST

static void test_is_eacces_with_none(struct kunit *const test)
{
        const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};

        IE_FALSE(&layer_masks, 0);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}

static void test_is_eacces_with_refer(struct kunit *const test)
{
        const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0),
        };

        IE_FALSE(&layer_masks, 0);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}

static void test_is_eacces_with_write(struct kunit *const test)
{
        const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {
                [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0),
        };

        IE_FALSE(&layer_masks, 0);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER);
        IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE);

        IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE);
}

#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */

#undef IE_TRUE
#undef IE_FALSE

/**
 * is_access_to_paths_allowed - Check accesses for requests with a common path
 *
 * @domain: Domain to check against.
 * @path: File hierarchy to walk through.
 * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
 *     equal to @layer_masks_parent2 (if any).  This is tied to the unique
 *     requested path for most actions, or the source in case of a refer action
 *     (i.e. rename or link), or the source and destination in case of
 *     RENAME_EXCHANGE.
 * @layer_masks_parent1: Pointer to a matrix of layer masks per access
 *     masks, identifying the layers that forbid a specific access.  Bits from
 *     this matrix can be unset according to the @path walk.  An empty matrix
 *     means that @domain allows all possible Landlock accesses (i.e. not only
 *     those identified by @access_request_parent1).  This matrix can
 *     initially refer to domain layer masks and, when the accesses for the
 *     destination and source are the same, to requested layer masks.
 * @dentry_child1: Dentry to the initial child of the parent1 path.  This
 *     pointer must be NULL for non-refer actions (i.e. not link nor rename).
 * @access_request_parent2: Similar to @access_request_parent1 but for a
 *     request involving a source and a destination.  This refers to the
 *     destination, except in case of RENAME_EXCHANGE where it also refers to
 *     the source.  Must be set to 0 when using a simple path request.
 * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
 *     action.  This must be NULL otherwise.
 * @dentry_child2: Dentry to the initial child of the parent2 path.  This
 *     pointer is only set for RENAME_EXCHANGE actions and must be NULL
 *     otherwise.
 *
 * This helper first checks that the destination has a superset of restrictions
 * compared to the source (if any) for a common path.  Because of
 * RENAME_EXCHANGE actions, source and destinations may be swapped.  It then
 * checks that the collected accesses and the remaining ones are enough to
 * allow the request.
 *
 * Returns:
 * - true if the access request is granted;
 * - false otherwise.
 */
static bool is_access_to_paths_allowed(
        const struct landlock_ruleset *const domain,
        const struct path *const path,
        const access_mask_t access_request_parent1,
        layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
        const struct dentry *const dentry_child1,
        const access_mask_t access_request_parent2,
        layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
        const struct dentry *const dentry_child2)
{
        bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
             child1_is_directory = true, child2_is_directory = true;
        struct path walker_path;
        access_mask_t access_masked_parent1, access_masked_parent2;
        layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
                _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
        layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
        (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;

        if (!access_request_parent1 && !access_request_parent2)
                return true;
        if (WARN_ON_ONCE(!domain || !path))
                return true;
        if (is_nouser_or_private(path->dentry))
                return true;
        if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
                return false;

        if (unlikely(layer_masks_parent2)) {
                if (WARN_ON_ONCE(!dentry_child1))
                        return false;
                /*
                 * For a double request, first check for potential privilege
                 * escalation by looking at domain handled accesses (which are
                 * a superset of the meaningful requested accesses).
                 */
                access_masked_parent1 = access_masked_parent2 =
                        get_handled_fs_accesses(domain);
                is_dom_check = true;
        } else {
                if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
                        return false;
                /* For a simple request, only check for requested accesses. */
                access_masked_parent1 = access_request_parent1;
                access_masked_parent2 = access_request_parent2;
                is_dom_check = false;
        }

        if (unlikely(dentry_child1)) {
                landlock_unmask_layers(
                        find_rule(domain, dentry_child1),
                        landlock_init_layer_masks(
                                domain, LANDLOCK_MASK_ACCESS_FS,
                                &_layer_masks_child1, LANDLOCK_KEY_INODE),
                        &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1));
                layer_masks_child1 = &_layer_masks_child1;
                child1_is_directory = d_is_dir(dentry_child1);
        }
        if (unlikely(dentry_child2)) {
                landlock_unmask_layers(
                        find_rule(domain, dentry_child2),
                        landlock_init_layer_masks(
                                domain, LANDLOCK_MASK_ACCESS_FS,
                                &_layer_masks_child2, LANDLOCK_KEY_INODE),
                        &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2));
                layer_masks_child2 = &_layer_masks_child2;
                child2_is_directory = d_is_dir(dentry_child2);
        }

        walker_path = *path;
        path_get(&walker_path);
        /*
         * We need to walk through all the hierarchy to not miss any relevant
         * restriction.
         */
        while (true) {
                struct dentry *parent_dentry;
                const struct landlock_rule *rule;

                /*
                 * If at least all accesses allowed on the destination are
                 * already allowed on the source, respectively if there is at
                 * least as much as restrictions on the destination than on the
                 * source, then we can safely refer files from the source to
                 * the destination without risking a privilege escalation.
                 * This also applies in the case of RENAME_EXCHANGE, which
                 * implies checks on both direction.  This is crucial for
                 * standalone multilayered security policies.  Furthermore,
                 * this helps avoid policy writers to shoot themselves in the
                 * foot.
                 */
                if (unlikely(is_dom_check &&
                             no_more_access(
                                     layer_masks_parent1, layer_masks_child1,
                                     child1_is_directory, layer_masks_parent2,
                                     layer_masks_child2,
                                     child2_is_directory))) {
                        allowed_parent1 = scope_to_request(
                                access_request_parent1, layer_masks_parent1);
                        allowed_parent2 = scope_to_request(
                                access_request_parent2, layer_masks_parent2);

                        /* Stops when all accesses are granted. */
                        if (allowed_parent1 && allowed_parent2)
                                break;

                        /*
                         * Now, downgrades the remaining checks from domain
                         * handled accesses to requested accesses.
                         */
                        is_dom_check = false;
                        access_masked_parent1 = access_request_parent1;
                        access_masked_parent2 = access_request_parent2;
                }

                rule = find_rule(domain, walker_path.dentry);
                allowed_parent1 = landlock_unmask_layers(
                        rule, access_masked_parent1, layer_masks_parent1,
                        ARRAY_SIZE(*layer_masks_parent1));
                allowed_parent2 = landlock_unmask_layers(
                        rule, access_masked_parent2, layer_masks_parent2,
                        ARRAY_SIZE(*layer_masks_parent2));

                /* Stops when a rule from each layer grants access. */
                if (allowed_parent1 && allowed_parent2)
                        break;
jump_up:
                if (walker_path.dentry == walker_path.mnt->mnt_root) {
                        if (follow_up(&walker_path)) {
                                /* Ignores hidden mount points. */
                                goto jump_up;
                        } else {
                                /*
                                 * Stops at the real root.  Denies access
                                 * because not all layers have granted access.
                                 */
                                break;
                        }
                }
                if (unlikely(IS_ROOT(walker_path.dentry))) {
                        /*
                         * Stops at disconnected root directories.  Only allows
                         * access to internal filesystems (e.g. nsfs, which is
                         * reachable through /proc/<pid>/ns/<namespace>).
                         */
                        allowed_parent1 = allowed_parent2 =
                                !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
                        break;
                }
                parent_dentry = dget_parent(walker_path.dentry);
                dput(walker_path.dentry);
                walker_path.dentry = parent_dentry;
        }
        path_put(&walker_path);

        return allowed_parent1 && allowed_parent2;
}

static int check_access_path(const struct landlock_ruleset *const domain,
                             const struct path *const path,
                             access_mask_t access_request)
{
        layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};

        access_request = landlock_init_layer_masks(
                domain, access_request, &layer_masks, LANDLOCK_KEY_INODE);
        if (is_access_to_paths_allowed(domain, path, access_request,
                                       &layer_masks, NULL, 0, NULL, NULL))
                return 0;
        return -EACCES;
}

static int current_check_access_path(const struct path *const path,
                                     const access_mask_t access_request)
{
        const struct landlock_ruleset *const dom = get_current_fs_domain();

        if (!dom)
                return 0;
        return check_access_path(dom, path, access_request);
}

static access_mask_t get_mode_access(const umode_t mode)
{
        switch (mode & S_IFMT) {
        case S_IFLNK:
                return LANDLOCK_ACCESS_FS_MAKE_SYM;
        case 0:
                /* A zero mode translates to S_IFREG. */
        case S_IFREG:
                return LANDLOCK_ACCESS_FS_MAKE_REG;
        case S_IFDIR:
                return LANDLOCK_ACCESS_FS_MAKE_DIR;
        case S_IFCHR:
                return LANDLOCK_ACCESS_FS_MAKE_CHAR;
        case S_IFBLK:
                return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
        case S_IFIFO:
                return LANDLOCK_ACCESS_FS_MAKE_FIFO;
        case S_IFSOCK:
                return LANDLOCK_ACCESS_FS_MAKE_SOCK;
        default:
                WARN_ON_ONCE(1);
                return 0;
        }
}

static access_mask_t maybe_remove(const struct dentry *const dentry)
{
        if (d_is_negative(dentry))
                return 0;
        return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
                                  LANDLOCK_ACCESS_FS_REMOVE_FILE;
}

/**
 * collect_domain_accesses - Walk through a file path and collect accesses
 *
 * @domain: Domain to check against.
 * @mnt_root: Last directory to check.
 * @dir: Directory to start the walk from.
 * @layer_masks_dom: Where to store the collected accesses.
 *
 * This helper is useful to begin a path walk from the @dir directory to a
 * @mnt_root directory used as a mount point.  This mount point is the common
 * ancestor between the source and the destination of a renamed and linked
 * file.  While walking from @dir to @mnt_root, we record all the domain's
 * allowed accesses in @layer_masks_dom.
 *
 * This is similar to is_access_to_paths_allowed() but much simpler because it
 * only handles walking on the same mount point and only checks one set of
 * accesses.
 *
 * Returns:
 * - true if all the domain access rights are allowed for @dir;
 * - false if the walk reached @mnt_root.
 */
static bool collect_domain_accesses(
        const struct landlock_ruleset *const domain,
        const struct dentry *const mnt_root, struct dentry *dir,
        layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
{
        unsigned long access_dom;
        bool ret = false;

        if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
                return true;
        if (is_nouser_or_private(dir))
                return true;

        access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
                                               layer_masks_dom,
                                               LANDLOCK_KEY_INODE);

        dget(dir);
        while (true) {
                struct dentry *parent_dentry;

                /* Gets all layers allowing all domain accesses. */
                if (landlock_unmask_layers(find_rule(domain, dir), access_dom,
                                           layer_masks_dom,
                                           ARRAY_SIZE(*layer_masks_dom))) {
                        /*
                         * Stops when all handled accesses are allowed by at
                         * least one rule in each layer.
                         */
                        ret = true;
                        break;
                }

                /* We should not reach a root other than @mnt_root. */
                if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
                        break;

                parent_dentry = dget_parent(dir);
                dput(dir);
                dir = parent_dentry;
        }
        dput(dir);
        return ret;
}

/**
 * current_check_refer_path - Check if a rename or link action is allowed
 *
 * @old_dentry: File or directory requested to be moved or linked.
 * @new_dir: Destination parent directory.
 * @new_dentry: Destination file or directory.
 * @removable: Sets to true if it is a rename operation.
 * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
 *
 * Because of its unprivileged constraints, Landlock relies on file hierarchies
 * (and not only inodes) to tie access rights to files.  Being able to link or
 * rename a file hierarchy brings some challenges.  Indeed, moving or linking a
 * file (i.e. creating a new reference to an inode) can have an impact on the
 * actions allowed for a set of files if it would change its parent directory
 * (i.e. reparenting).
 *
 * To avoid trivial access right bypasses, Landlock first checks if the file or
 * directory requested to be moved would gain new access rights inherited from
 * its new hierarchy.  Before returning any error, Landlock then checks that
 * the parent source hierarchy and the destination hierarchy would allow the
 * link or rename action.  If it is not the case, an error with EACCES is
 * returned to inform user space that there is no way to remove or create the
 * requested source file type.  If it should be allowed but the new inherited
 * access rights would be greater than the source access rights, then the
 * kernel returns an error with EXDEV.  Prioritizing EACCES over EXDEV enables
 * user space to abort the whole operation if there is no way to do it, or to
 * manually copy the source to the destination if this remains allowed, e.g.
 * because file creation is allowed on the destination directory but not direct
 * linking.
 *
 * To achieve this goal, the kernel needs to compare two file hierarchies: the
 * one identifying the source file or directory (including itself), and the
 * destination one.  This can be seen as a multilayer partial ordering problem.
 * The kernel walks through these paths and collects in a matrix the access
 * rights that are denied per layer.  These matrices are then compared to see
 * if the destination one has more (or the same) restrictions as the source
 * one.  If this is the case, the requested action will not return EXDEV, which
 * doesn't mean the action is allowed.  The parent hierarchy of the source
 * (i.e. parent directory), and the destination hierarchy must also be checked
 * to verify that they explicitly allow such action (i.e.  referencing,
 * creation and potentially removal rights).  The kernel implementation is then
 * required to rely on potentially four matrices of access rights: one for the
 * source file or directory (i.e. the child), a potentially other one for the
 * other source/destination (in case of RENAME_EXCHANGE), one for the source
 * parent hierarchy and a last one for the destination hierarchy.  These
 * ephemeral matrices take some space on the stack, which limits the number of
 * layers to a deemed reasonable number: 16.
 *
 * Returns:
 * - 0 if access is allowed;
 * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
 * - -EACCES if file removal or creation is denied.
 */
static int current_check_refer_path(struct dentry *const old_dentry,
                                    const struct path *const new_dir,
                                    struct dentry *const new_dentry,
                                    const bool removable, const bool exchange)
{
        const struct landlock_ruleset *const dom = get_current_fs_domain();
        bool allow_parent1, allow_parent2;
        access_mask_t access_request_parent1, access_request_parent2;
        struct path mnt_dir;
        layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
                     layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};

        if (!dom)
                return 0;
        if (WARN_ON_ONCE(dom->num_layers < 1))
                return -EACCES;
        if (unlikely(d_is_negative(old_dentry)))
                return -ENOENT;
        if (exchange) {
                if (unlikely(d_is_negative(new_dentry)))
                        return -ENOENT;
                access_request_parent1 =
                        get_mode_access(d_backing_inode(new_dentry)->i_mode);
        } else {
                access_request_parent1 = 0;
        }
        access_request_parent2 =
                get_mode_access(d_backing_inode(old_dentry)->i_mode);
        if (removable) {
                access_request_parent1 |= maybe_remove(old_dentry);
                access_request_parent2 |= maybe_remove(new_dentry);
        }

        /* The mount points are the same for old and new paths, cf. EXDEV. */
        if (old_dentry->d_parent == new_dir->dentry) {
                /*
                 * The LANDLOCK_ACCESS_FS_REFER access right is not required
                 * for same-directory referer (i.e. no reparenting).
                 */
                access_request_parent1 = landlock_init_layer_masks(
                        dom, access_request_parent1 | access_request_parent2,
                        &layer_masks_parent1, LANDLOCK_KEY_INODE);
                if (is_access_to_paths_allowed(
                            dom, new_dir, access_request_parent1,
                            &layer_masks_parent1, NULL, 0, NULL, NULL))
                        return 0;
                return -EACCES;
        }

        access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
        access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;

        /* Saves the common mount point. */
        mnt_dir.mnt = new_dir->mnt;
        mnt_dir.dentry = new_dir->mnt->mnt_root;

        /* new_dir->dentry is equal to new_dentry->d_parent */
        allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
                                                old_dentry->d_parent,
                                                &layer_masks_parent1);
        allow_parent2 = collect_domain_accesses(
                dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);

        if (allow_parent1 && allow_parent2)
                return 0;

        /*
         * To be able to compare source and destination domain access rights,
         * take into account the @old_dentry access rights aggregated with its
         * parent access rights.  This will be useful to compare with the
         * destination parent access rights.
         */
        if (is_access_to_paths_allowed(
                    dom, &mnt_dir, access_request_parent1, &layer_masks_parent1,
                    old_dentry, access_request_parent2, &layer_masks_parent2,
                    exchange ? new_dentry : NULL))
                return 0;

        /*
         * This prioritizes EACCES over EXDEV for all actions, including
         * renames with RENAME_EXCHANGE.
         */
        if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) ||
                   is_eacces(&layer_masks_parent2, access_request_parent2)))
                return -EACCES;

        /*
         * Gracefully forbids reparenting if the destination directory
         * hierarchy is not a superset of restrictions of the source directory
         * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
         * source or the destination.
         */
        return -EXDEV;
}

/* Inode hooks */

static void hook_inode_free_security(struct inode *const inode)
{
        /*
         * All inodes must already have been untied from their object by
         * release_inode() or hook_sb_delete().
         */
        WARN_ON_ONCE(landlock_inode(inode)->object);
}

/* Super-block hooks */

/*
 * Release the inodes used in a security policy.
 *
 * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
 */
static void hook_sb_delete(struct super_block *const sb)
{
        struct inode *inode, *prev_inode = NULL;

        if (!landlock_initialized)
                return;

        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                struct landlock_object *object;

                /* Only handles referenced inodes. */
                if (!atomic_read(&inode->i_count))
                        continue;

                /*
                 * Protects against concurrent modification of inode (e.g.
                 * from get_inode_object()).
                 */
                spin_lock(&inode->i_lock);
                /*
                 * Checks I_FREEING and I_WILL_FREE  to protect against a race
                 * condition when release_inode() just called iput(), which
                 * could lead to a NULL dereference of inode->security or a
                 * second call to iput() for the same Landlock object.  Also
                 * checks I_NEW because such inode cannot be tied to an object.
                 */
                if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }

                rcu_read_lock();
                object = rcu_dereference(landlock_inode(inode)->object);
                if (!object) {
                        rcu_read_unlock();
                        spin_unlock(&inode->i_lock);
                        continue;
                }
                /* Keeps a reference to this inode until the next loop walk. */
                __iget(inode);
                spin_unlock(&inode->i_lock);

                /*
                 * If there is no concurrent release_inode() ongoing, then we
                 * are in charge of calling iput() on this inode, otherwise we
                 * will just wait for it to finish.
                 */
                spin_lock(&object->lock);
                if (object->underobj == inode) {
                        object->underobj = NULL;
                        spin_unlock(&object->lock);
                        rcu_read_unlock();

                        /*
                         * Because object->underobj was not NULL,
                         * release_inode() and get_inode_object() guarantee
                         * that it is safe to reset
                         * landlock_inode(inode)->object while it is not NULL.
                         * It is therefore not necessary to lock inode->i_lock.
                         */
                        rcu_assign_pointer(landlock_inode(inode)->object, NULL);
                        /*
                         * At this point, we own the ihold() reference that was
                         * originally set up by get_inode_object() and the
                         * __iget() reference that we just set in this loop
                         * walk.  Therefore the following call to iput() will
                         * not sleep nor drop the inode because there is now at
                         * least two references to it.
                         */
                        iput(inode);
                } else {
                        spin_unlock(&object->lock);
                        rcu_read_unlock();
                }

                if (prev_inode) {
                        /*
                         * At this point, we still own the __iget() reference
                         * that we just set in this loop walk.  Therefore we
                         * can drop the list lock and know that the inode won't
                         * disappear from under us until the next loop walk.
                         */
                        spin_unlock(&sb->s_inode_list_lock);
                        /*
                         * We can now actually put the inode reference from the
                         * previous loop walk, which is not needed anymore.
                         */
                        iput(prev_inode);
                        cond_resched();
                        spin_lock(&sb->s_inode_list_lock);
                }
                prev_inode = inode;
        }
        spin_unlock(&sb->s_inode_list_lock);

        /* Puts the inode reference from the last loop walk, if any. */
        if (prev_inode)
                iput(prev_inode);
        /* Waits for pending iput() in release_inode(). */
        wait_var_event(&landlock_superblock(sb)->inode_refs,
                       !atomic_long_read(&landlock_superblock(sb)->inode_refs));
}

/*
 * Because a Landlock security policy is defined according to the filesystem
 * topology (i.e. the mount namespace), changing it may grant access to files
 * not previously allowed.
 *
 * To make it simple, deny any filesystem topology modification by landlocked
 * processes.  Non-landlocked processes may still change the namespace of a
 * landlocked process, but this kind of threat must be handled by a system-wide
 * access-control security policy.
 *
 * This could be lifted in the future if Landlock can safely handle mount
 * namespace updates requested by a landlocked process.  Indeed, we could
 * update the current domain (which is currently read-only) by taking into
 * account the accesses of the source and the destination of a new mount point.
 * However, it would also require to make all the child domains dynamically
 * inherit these new constraints.  Anyway, for backward compatibility reasons,
 * a dedicated user space option would be required (e.g. as a ruleset flag).
 */
static int hook_sb_mount(const char *const dev_name,
                         const struct path *const path, const char *const type,
                         const unsigned long flags, void *const data)
{
        if (!get_current_fs_domain())
                return 0;
        return -EPERM;
}

static int hook_move_mount(const struct path *const from_path,
                           const struct path *const to_path)
{
        if (!get_current_fs_domain())
                return 0;
        return -EPERM;
}

/*
 * Removing a mount point may reveal a previously hidden file hierarchy, which
 * may then grant access to files, which may have previously been forbidden.
 */
static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
{
        if (!get_current_fs_domain())
                return 0;
        return -EPERM;
}

static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
{
        if (!get_current_fs_domain())
                return 0;
        return -EPERM;
}

/*
 * pivot_root(2), like mount(2), changes the current mount namespace.  It must
 * then be forbidden for a landlocked process.
 *
 * However, chroot(2) may be allowed because it only changes the relative root
 * directory of the current process.  Moreover, it can be used to restrict the
 * view of the filesystem.
 */
static int hook_sb_pivotroot(const struct path *const old_path,
                             const struct path *const new_path)
{
        if (!get_current_fs_domain())
                return 0;
        return -EPERM;
}

/* Path hooks */

static int hook_path_link(struct dentry *const old_dentry,
                          const struct path *const new_dir,
                          struct dentry *const new_dentry)
{
        return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
                                        false);
}

static int hook_path_rename(const struct path *const old_dir,
                            struct dentry *const old_dentry,
                            const struct path *const new_dir,
                            struct dentry *const new_dentry,
                            const unsigned int flags)
{
        /* old_dir refers to old_dentry->d_parent and new_dir->mnt */
        return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
                                        !!(flags & RENAME_EXCHANGE));
}

static int hook_path_mkdir(const struct path *const dir,
                           struct dentry *const dentry, const umode_t mode)
{
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
}

static int hook_path_mknod(const struct path *const dir,
                           struct dentry *const dentry, const umode_t mode,
                           const unsigned int dev)
{
        const struct landlock_ruleset *const dom = get_current_fs_domain();

        if (!dom)
                return 0;
        return check_access_path(dom, dir, get_mode_access(mode));
}

static int hook_path_symlink(const struct path *const dir,
                             struct dentry *const dentry,
                             const char *const old_name)
{
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
}

static int hook_path_unlink(const struct path *const dir,
                            struct dentry *const dentry)
{
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
}

static int hook_path_rmdir(const struct path *const dir,
                           struct dentry *const dentry)
{
        return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
}

static int hook_path_truncate(const struct path *const path)
{
        return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE);
}

/* File hooks */

/**
 * get_required_file_open_access - Get access needed to open a file
 *
 * @file: File being opened.
 *
 * Returns the access rights that are required for opening the given file,
 * depending on the file type and open mode.
 */
static access_mask_t
get_required_file_open_access(const struct file *const file)
{
        access_mask_t access = 0;

        if (file->f_mode & FMODE_READ) {
                /* A directory can only be opened in read mode. */
                if (S_ISDIR(file_inode(file)->i_mode))
                        return LANDLOCK_ACCESS_FS_READ_DIR;
                access = LANDLOCK_ACCESS_FS_READ_FILE;
        }
        if (file->f_mode & FMODE_WRITE)
                access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
        /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
        if (file->f_flags & __FMODE_EXEC)
                access |= LANDLOCK_ACCESS_FS_EXECUTE;
        return access;
}

static int hook_file_alloc_security(struct file *const file)
{
        /*
         * Grants all access rights, even if most of them are not checked later
         * on. It is more consistent.
         *
         * Notably, file descriptors for regular files can also be acquired
         * without going through the file_open hook, for example when using
         * memfd_create(2).
         */
        landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS;
        return 0;
}

static int hook_file_open(struct file *const file)
{
        layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
        access_mask_t open_access_request, full_access_request, allowed_access;
        const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE;
        const struct landlock_ruleset *const dom =
                get_fs_domain(landlock_cred(file->f_cred)->domain);

        if (!dom)
                return 0;

        /*
         * Because a file may be opened with O_PATH, get_required_file_open_access()
         * may return 0.  This case will be handled with a future Landlock
         * evolution.
         */
        open_access_request = get_required_file_open_access(file);

        /*
         * We look up more access than what we immediately need for open(), so
         * that we can later authorize operations on opened files.
         */
        full_access_request = open_access_request | optional_access;

        if (is_access_to_paths_allowed(
                    dom, &file->f_path,
                    landlock_init_layer_masks(dom, full_access_request,
                                              &layer_masks, LANDLOCK_KEY_INODE),
                    &layer_masks, NULL, 0, NULL, NULL)) {
                allowed_access = full_access_request;
        } else {
                unsigned long access_bit;
                const unsigned long access_req = full_access_request;

                /*
                 * Calculate the actual allowed access rights from layer_masks.
                 * Add each access right to allowed_access which has not been
                 * vetoed by any layer.
                 */
                allowed_access = 0;
                for_each_set_bit(access_bit, &access_req,
                                 ARRAY_SIZE(layer_masks)) {
                        if (!layer_masks[access_bit])
                                allowed_access |= BIT_ULL(access_bit);
                }
        }

        /*
         * For operations on already opened files (i.e. ftruncate()), it is the
         * access rights at the time of open() which decide whether the
         * operation is permitted. Therefore, we record the relevant subset of
         * file access rights in the opened struct file.
         */
        landlock_file(file)->allowed_access = allowed_access;

        if ((open_access_request & allowed_access) == open_access_request)
                return 0;

        return -EACCES;
}

static int hook_file_truncate(struct file *const file)
{
        /*
         * Allows truncation if the truncate right was available at the time of
         * opening the file, to get a consistent access check as for read, write
         * and execute operations.
         *
         * Note: For checks done based on the file's Landlock allowed access, we
         * enforce them independently of whether the current thread is in a
         * Landlock domain, so that open files passed between independent
         * processes retain their behaviour.
         */
        if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE)
                return 0;
        return -EACCES;
}

static struct security_hook_list landlock_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),

        LSM_HOOK_INIT(sb_delete, hook_sb_delete),
        LSM_HOOK_INIT(sb_mount, hook_sb_mount),
        LSM_HOOK_INIT(move_mount, hook_move_mount),
        LSM_HOOK_INIT(sb_umount, hook_sb_umount),
        LSM_HOOK_INIT(sb_remount, hook_sb_remount),
        LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),

        LSM_HOOK_INIT(path_link, hook_path_link),
        LSM_HOOK_INIT(path_rename, hook_path_rename),
        LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
        LSM_HOOK_INIT(path_mknod, hook_path_mknod),
        LSM_HOOK_INIT(path_symlink, hook_path_symlink),
        LSM_HOOK_INIT(path_unlink, hook_path_unlink),
        LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
        LSM_HOOK_INIT(path_truncate, hook_path_truncate),

        LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security),
        LSM_HOOK_INIT(file_open, hook_file_open),
        LSM_HOOK_INIT(file_truncate, hook_file_truncate),
};

__init void landlock_add_fs_hooks(void)
{
        security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
                           &landlock_lsmid);
}

#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST

/* clang-format off */
static struct kunit_case test_cases[] = {
        KUNIT_CASE(test_no_more_access),
        KUNIT_CASE(test_scope_to_request_with_exec_none),
        KUNIT_CASE(test_scope_to_request_with_exec_some),
        KUNIT_CASE(test_scope_to_request_without_access),
        KUNIT_CASE(test_is_eacces_with_none),
        KUNIT_CASE(test_is_eacces_with_refer),
        KUNIT_CASE(test_is_eacces_with_write),
        {}
};
/* clang-format on */

static struct kunit_suite test_suite = {
        .name = "landlock_fs",
        .test_cases = test_cases,
};

kunit_test_suite(test_suite);

#endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 


   14 



   11 

   11 



























   11 



















   14 

















































































































































  256 


  255 



























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  linux/drivers/net/netconsole.c
 *
 *  Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 *
 *  This file contains the implementation of an IRQ-safe, crash-safe
 *  kernel console implementation that outputs kernel messages to the
 *  network.
 *
 * Modification history:
 *
 * 2001-09-17    started by Ingo Molnar.
 * 2003-08-11    2.6 port by Matt Mackall
 *               simplified options
 *               generic card hooks
 *               works non-modular
 * 2003-09-07    rewritten with netpoll api
 */

/****************************************************************
 *
 ****************************************************************/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mm.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/console.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/netpoll.h>
#include <linux/inet.h>
#include <linux/configfs.h>
#include <linux/etherdevice.h>
#include <linux/utsname.h>

MODULE_AUTHOR("Maintainer: Matt Mackall <mpm@selenic.com>");
MODULE_DESCRIPTION("Console driver for network interfaces");
MODULE_LICENSE("GPL");

#define MAX_PARAM_LENGTH                256
#define MAX_USERDATA_ENTRY_LENGTH        256
#define MAX_USERDATA_VALUE_LENGTH        200
/* The number 3 comes from userdata entry format characters (' ', '=', '\n') */
#define MAX_USERDATA_NAME_LENGTH        (MAX_USERDATA_ENTRY_LENGTH - \
                                        MAX_USERDATA_VALUE_LENGTH - 3)
#define MAX_USERDATA_ITEMS                16
#define MAX_PRINT_CHUNK                        1000

static char config[MAX_PARAM_LENGTH];
module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0);
MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");

static bool oops_only;
module_param(oops_only, bool, 0600);
MODULE_PARM_DESC(oops_only, "Only log oops messages");

#define NETCONSOLE_PARAM_TARGET_PREFIX "cmdline"

#ifndef        MODULE
static int __init option_setup(char *opt)
{
        strscpy(config, opt, MAX_PARAM_LENGTH);
        return 1;
}
__setup("netconsole=", option_setup);
#endif        /* MODULE */

/* Linked list of all configured targets */
static LIST_HEAD(target_list);

/* This needs to be a spinlock because write_msg() cannot sleep */
static DEFINE_SPINLOCK(target_list_lock);

/*
 * Console driver for extended netconsoles.  Registered on the first use to
 * avoid unnecessarily enabling ext message formatting.
 */
static struct console netconsole_ext;

/**
 * struct netconsole_target - Represents a configured netconsole target.
 * @list:        Links this target into the target_list.
 * @group:        Links us into the configfs subsystem hierarchy.
 * @userdata_group:        Links to the userdata configfs hierarchy
 * @userdata_complete:        Cached, formatted string of append
 * @userdata_length:        String length of userdata_complete
 * @enabled:        On / off knob to enable / disable target.
 *                Visible from userspace (read-write).
 *                We maintain a strict 1:1 correspondence between this and
 *                whether the corresponding netpoll is active or inactive.
 *                Also, other parameters of a target may be modified at
 *                runtime only when it is disabled (enabled == 0).
 * @extended:        Denotes whether console is extended or not.
 * @release:        Denotes whether kernel release version should be prepended
 *                to the message. Depends on extended console.
 * @np:                The netpoll structure for this target.
 *                Contains the other userspace visible parameters:
 *                dev_name        (read-write)
 *                local_port        (read-write)
 *                remote_port        (read-write)
 *                local_ip        (read-write)
 *                remote_ip        (read-write)
 *                local_mac        (read-only)
 *                remote_mac        (read-write)
 */
struct netconsole_target {
        struct list_head        list;
#ifdef        CONFIG_NETCONSOLE_DYNAMIC
        struct config_group        group;
        struct config_group        userdata_group;
        char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
        size_t                        userdata_length;
#endif
        bool                        enabled;
        bool                        extended;
        bool                        release;
        struct netpoll                np;
};

#ifdef        CONFIG_NETCONSOLE_DYNAMIC

static struct configfs_subsystem netconsole_subsys;
static DEFINE_MUTEX(dynamic_netconsole_mutex);

static int __init dynamic_netconsole_init(void)
{
        config_group_init(&netconsole_subsys.su_group);
        mutex_init(&netconsole_subsys.su_mutex);
        return configfs_register_subsystem(&netconsole_subsys);
}

static void __exit dynamic_netconsole_exit(void)
{
        configfs_unregister_subsystem(&netconsole_subsys);
}

/*
 * Targets that were created by parsing the boot/module option string
 * do not exist in the configfs hierarchy (and have NULL names) and will
 * never go away, so make these a no-op for them.
 */
static void netconsole_target_get(struct netconsole_target *nt)
{
        if (config_item_name(&nt->group.cg_item))
                config_group_get(&nt->group);
}

static void netconsole_target_put(struct netconsole_target *nt)
{
        if (config_item_name(&nt->group.cg_item))
                config_group_put(&nt->group);
}

#else        /* !CONFIG_NETCONSOLE_DYNAMIC */

static int __init dynamic_netconsole_init(void)
{
        return 0;
}

static void __exit dynamic_netconsole_exit(void)
{
}

/*
 * No danger of targets going away from under us when dynamic
 * reconfigurability is off.
 */
static void netconsole_target_get(struct netconsole_target *nt)
{
}

static void netconsole_target_put(struct netconsole_target *nt)
{
}

static void populate_configfs_item(struct netconsole_target *nt,
                                   int cmdline_count)
{
}
#endif        /* CONFIG_NETCONSOLE_DYNAMIC */

/* Allocate and initialize with defaults.
 * Note that these targets get their config_item fields zeroed-out.
 */
static struct netconsole_target *alloc_and_init(void)
{
        struct netconsole_target *nt;

        nt = kzalloc(sizeof(*nt), GFP_KERNEL);
        if (!nt)
                return nt;

        if (IS_ENABLED(CONFIG_NETCONSOLE_EXTENDED_LOG))
                nt->extended = true;
        if (IS_ENABLED(CONFIG_NETCONSOLE_PREPEND_RELEASE))
                nt->release = true;

        nt->np.name = "netconsole";
        strscpy(nt->np.dev_name, "eth0", IFNAMSIZ);
        nt->np.local_port = 6665;
        nt->np.remote_port = 6666;
        eth_broadcast_addr(nt->np.remote_mac);

        return nt;
}

#ifdef        CONFIG_NETCONSOLE_DYNAMIC

/*
 * Our subsystem hierarchy is:
 *
 * /sys/kernel/config/netconsole/
 *                                |
 *                                <target>/
 *                                |        enabled
 *                                |        release
 *                                |        dev_name
 *                                |        local_port
 *                                |        remote_port
 *                                |        local_ip
 *                                |        remote_ip
 *                                |        local_mac
 *                                |        remote_mac
 *                                |        userdata/
 *                                |                <key>/
 *                                |                        value
 *                                |                ...
 *                                |
 *                                <target>/...
 */

static struct netconsole_target *to_target(struct config_item *item)
{
        struct config_group *cfg_group;

        cfg_group = to_config_group(item);
        if (!cfg_group)
                return NULL;
        return container_of(to_config_group(item),
                            struct netconsole_target, group);
}

/* Get rid of possible trailing newline, returning the new length */
static void trim_newline(char *s, size_t maxlen)
{
        size_t len;

        len = strnlen(s, maxlen);
        if (s[len - 1] == '\n')
                s[len - 1] = '\0';
}

/*
 * Attribute operations for netconsole_target.
 */

static ssize_t enabled_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%d\n", to_target(item)->enabled);
}

static ssize_t extended_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%d\n", to_target(item)->extended);
}

static ssize_t release_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%d\n", to_target(item)->release);
}

static ssize_t dev_name_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%s\n", to_target(item)->np.dev_name);
}

static ssize_t local_port_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%d\n", to_target(item)->np.local_port);
}

static ssize_t remote_port_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%d\n", to_target(item)->np.remote_port);
}

static ssize_t local_ip_show(struct config_item *item, char *buf)
{
        struct netconsole_target *nt = to_target(item);

        if (nt->np.ipv6)
                return sysfs_emit(buf, "%pI6c\n", &nt->np.local_ip.in6);
        else
                return sysfs_emit(buf, "%pI4\n", &nt->np.local_ip);
}

static ssize_t remote_ip_show(struct config_item *item, char *buf)
{
        struct netconsole_target *nt = to_target(item);

        if (nt->np.ipv6)
                return sysfs_emit(buf, "%pI6c\n", &nt->np.remote_ip.in6);
        else
                return sysfs_emit(buf, "%pI4\n", &nt->np.remote_ip);
}

static ssize_t local_mac_show(struct config_item *item, char *buf)
{
        struct net_device *dev = to_target(item)->np.dev;
        static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

        return sysfs_emit(buf, "%pM\n", dev ? dev->dev_addr : bcast);
}

static ssize_t remote_mac_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac);
}

/*
 * This one is special -- targets created through the configfs interface
 * are not enabled (and the corresponding netpoll activated) by default.
 * The user is expected to set the desired parameters first (which
 * would enable him to dynamically add new netpoll targets for new
 * network interfaces as and when they come up).
 */
static ssize_t enabled_store(struct config_item *item,
                const char *buf, size_t count)
{
        struct netconsole_target *nt = to_target(item);
        unsigned long flags;
        bool enabled;
        int err;

        mutex_lock(&dynamic_netconsole_mutex);
        err = kstrtobool(buf, &enabled);
        if (err)
                goto out_unlock;

        err = -EINVAL;
        if ((bool)enabled == nt->enabled) {
                pr_info("network logging has already %s\n",
                        nt->enabled ? "started" : "stopped");
                goto out_unlock;
        }

        if (enabled) {        /* true */
                if (nt->release && !nt->extended) {
                        pr_err("Not enabling netconsole. Release feature requires extended log message");
                        goto out_unlock;
                }

                if (nt->extended && !console_is_registered(&netconsole_ext))
                        register_console(&netconsole_ext);

                /*
                 * Skip netpoll_parse_options() -- all the attributes are
                 * already configured via configfs. Just print them out.
                 */
                netpoll_print_options(&nt->np);

                err = netpoll_setup(&nt->np);
                if (err)
                        goto out_unlock;

                pr_info("network logging started\n");
        } else {        /* false */
                /* We need to disable the netconsole before cleaning it up
                 * otherwise we might end up in write_msg() with
                 * nt->np.dev == NULL and nt->enabled == true
                 */
                spin_lock_irqsave(&target_list_lock, flags);
                nt->enabled = false;
                spin_unlock_irqrestore(&target_list_lock, flags);
                netpoll_cleanup(&nt->np);
        }

        nt->enabled = enabled;

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return err;
}

static ssize_t release_store(struct config_item *item, const char *buf,
                             size_t count)
{
        struct netconsole_target *nt = to_target(item);
        bool release;
        int err;

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                err = -EINVAL;
                goto out_unlock;
        }

        err = kstrtobool(buf, &release);
        if (err)
                goto out_unlock;

        nt->release = release;

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return err;
}

static ssize_t extended_store(struct config_item *item, const char *buf,
                size_t count)
{
        struct netconsole_target *nt = to_target(item);
        bool extended;
        int err;

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                err = -EINVAL;
                goto out_unlock;
        }

        err = kstrtobool(buf, &extended);
        if (err)
                goto out_unlock;

        nt->extended = extended;

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return err;
}

static ssize_t dev_name_store(struct config_item *item, const char *buf,
                size_t count)
{
        struct netconsole_target *nt = to_target(item);

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                mutex_unlock(&dynamic_netconsole_mutex);
                return -EINVAL;
        }

        strscpy(nt->np.dev_name, buf, IFNAMSIZ);
        trim_newline(nt->np.dev_name, IFNAMSIZ);

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
}

static ssize_t local_port_store(struct config_item *item, const char *buf,
                size_t count)
{
        struct netconsole_target *nt = to_target(item);
        int rv = -EINVAL;

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                goto out_unlock;
        }

        rv = kstrtou16(buf, 10, &nt->np.local_port);
        if (rv < 0)
                goto out_unlock;
        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return rv;
}

static ssize_t remote_port_store(struct config_item *item,
                const char *buf, size_t count)
{
        struct netconsole_target *nt = to_target(item);
        int rv = -EINVAL;

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                goto out_unlock;
        }

        rv = kstrtou16(buf, 10, &nt->np.remote_port);
        if (rv < 0)
                goto out_unlock;
        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return rv;
}

static ssize_t local_ip_store(struct config_item *item, const char *buf,
                size_t count)
{
        struct netconsole_target *nt = to_target(item);

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                goto out_unlock;
        }

        if (strnchr(buf, count, ':')) {
                const char *end;

                if (in6_pton(buf, count, nt->np.local_ip.in6.s6_addr, -1, &end) > 0) {
                        if (*end && *end != '\n') {
                                pr_err("invalid IPv6 address at: <%c>\n", *end);
                                goto out_unlock;
                        }
                        nt->np.ipv6 = true;
                } else
                        goto out_unlock;
        } else {
                if (!nt->np.ipv6)
                        nt->np.local_ip.ip = in_aton(buf);
                else
                        goto out_unlock;
        }

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return -EINVAL;
}

static ssize_t remote_ip_store(struct config_item *item, const char *buf,
               size_t count)
{
        struct netconsole_target *nt = to_target(item);

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                goto out_unlock;
        }

        if (strnchr(buf, count, ':')) {
                const char *end;

                if (in6_pton(buf, count, nt->np.remote_ip.in6.s6_addr, -1, &end) > 0) {
                        if (*end && *end != '\n') {
                                pr_err("invalid IPv6 address at: <%c>\n", *end);
                                goto out_unlock;
                        }
                        nt->np.ipv6 = true;
                } else
                        goto out_unlock;
        } else {
                if (!nt->np.ipv6)
                        nt->np.remote_ip.ip = in_aton(buf);
                else
                        goto out_unlock;
        }

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return -EINVAL;
}

static ssize_t remote_mac_store(struct config_item *item, const char *buf,
                size_t count)
{
        struct netconsole_target *nt = to_target(item);
        u8 remote_mac[ETH_ALEN];

        mutex_lock(&dynamic_netconsole_mutex);
        if (nt->enabled) {
                pr_err("target (%s) is enabled, disable to update parameters\n",
                       config_item_name(&nt->group.cg_item));
                goto out_unlock;
        }

        if (!mac_pton(buf, remote_mac))
                goto out_unlock;
        if (buf[3 * ETH_ALEN - 1] && buf[3 * ETH_ALEN - 1] != '\n')
                goto out_unlock;
        memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN);

        mutex_unlock(&dynamic_netconsole_mutex);
        return strnlen(buf, count);
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return -EINVAL;
}

struct userdatum {
        struct config_item item;
        char value[MAX_USERDATA_VALUE_LENGTH];
};

static struct userdatum *to_userdatum(struct config_item *item)
{
        return container_of(item, struct userdatum, item);
}

struct userdata {
        struct config_group group;
};

static struct userdata *to_userdata(struct config_item *item)
{
        return container_of(to_config_group(item), struct userdata, group);
}

static struct netconsole_target *userdata_to_target(struct userdata *ud)
{
        struct config_group *netconsole_group;

        netconsole_group = to_config_group(ud->group.cg_item.ci_parent);
        return to_target(&netconsole_group->cg_item);
}

static ssize_t userdatum_value_show(struct config_item *item, char *buf)
{
        return sysfs_emit(buf, "%s\n", &(to_userdatum(item)->value[0]));
}

static void update_userdata(struct netconsole_target *nt)
{
        int complete_idx = 0, child_count = 0;
        struct list_head *entry;

        /* Clear the current string in case the last userdatum was deleted */
        nt->userdata_length = 0;
        nt->userdata_complete[0] = 0;

        list_for_each(entry, &nt->userdata_group.cg_children) {
                struct userdatum *udm_item;
                struct config_item *item;

                if (child_count >= MAX_USERDATA_ITEMS)
                        break;
                child_count++;

                item = container_of(entry, struct config_item, ci_entry);
                udm_item = to_userdatum(item);

                /* Skip userdata with no value set */
                if (strnlen(udm_item->value, MAX_USERDATA_VALUE_LENGTH) == 0)
                        continue;

                /* This doesn't overflow userdata_complete since it will write
                 * one entry length (1/MAX_USERDATA_ITEMS long), entry count is
                 * checked to not exceed MAX items with child_count above
                 */
                complete_idx += scnprintf(&nt->userdata_complete[complete_idx],
                                          MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n",
                                          item->ci_name, udm_item->value);
        }
        nt->userdata_length = strnlen(nt->userdata_complete,
                                      sizeof(nt->userdata_complete));
}

static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
                                     size_t count)
{
        struct userdatum *udm = to_userdatum(item);
        struct netconsole_target *nt;
        struct userdata *ud;
        int ret;

        if (count > MAX_USERDATA_VALUE_LENGTH)
                return -EMSGSIZE;

        mutex_lock(&dynamic_netconsole_mutex);

        ret = strscpy(udm->value, buf, sizeof(udm->value));
        if (ret < 0)
                goto out_unlock;
        trim_newline(udm->value, sizeof(udm->value));

        ud = to_userdata(item->ci_parent);
        nt = userdata_to_target(ud);
        update_userdata(nt);

        mutex_unlock(&dynamic_netconsole_mutex);
        return count;
out_unlock:
        mutex_unlock(&dynamic_netconsole_mutex);
        return ret;
}

CONFIGFS_ATTR(userdatum_, value);

static struct configfs_attribute *userdatum_attrs[] = {
        &userdatum_attr_value,
        NULL,
};

static void userdatum_release(struct config_item *item)
{
        kfree(to_userdatum(item));
}

static struct configfs_item_operations userdatum_ops = {
        .release = userdatum_release,
};

static const struct config_item_type userdatum_type = {
        .ct_item_ops        = &userdatum_ops,
        .ct_attrs        = userdatum_attrs,
        .ct_owner        = THIS_MODULE,
};

static struct config_item *userdatum_make_item(struct config_group *group,
                                               const char *name)
{
        struct netconsole_target *nt;
        struct userdatum *udm;
        struct userdata *ud;
        size_t child_count;

        if (strlen(name) > MAX_USERDATA_NAME_LENGTH)
                return ERR_PTR(-ENAMETOOLONG);

        ud = to_userdata(&group->cg_item);
        nt = userdata_to_target(ud);
        child_count = list_count_nodes(&nt->userdata_group.cg_children);
        if (child_count >= MAX_USERDATA_ITEMS)
                return ERR_PTR(-ENOSPC);

        udm = kzalloc(sizeof(*udm), GFP_KERNEL);
        if (!udm)
                return ERR_PTR(-ENOMEM);

        config_item_init_type_name(&udm->item, name, &userdatum_type);
        return &udm->item;
}

static void userdatum_drop(struct config_group *group, struct config_item *item)
{
        struct netconsole_target *nt;
        struct userdata *ud;

        ud = to_userdata(&group->cg_item);
        nt = userdata_to_target(ud);

        mutex_lock(&dynamic_netconsole_mutex);
        update_userdata(nt);
        config_item_put(item);
        mutex_unlock(&dynamic_netconsole_mutex);
}

static struct configfs_attribute *userdata_attrs[] = {
        NULL,
};

static struct configfs_group_operations userdata_ops = {
        .make_item                = userdatum_make_item,
        .drop_item                = userdatum_drop,
};

static struct config_item_type userdata_type = {
        .ct_item_ops        = &userdatum_ops,
        .ct_group_ops        = &userdata_ops,
        .ct_attrs        = userdata_attrs,
        .ct_owner        = THIS_MODULE,
};

CONFIGFS_ATTR(, enabled);
CONFIGFS_ATTR(, extended);
CONFIGFS_ATTR(, dev_name);
CONFIGFS_ATTR(, local_port);
CONFIGFS_ATTR(, remote_port);
CONFIGFS_ATTR(, local_ip);
CONFIGFS_ATTR(, remote_ip);
CONFIGFS_ATTR_RO(, local_mac);
CONFIGFS_ATTR(, remote_mac);
CONFIGFS_ATTR(, release);

static struct configfs_attribute *netconsole_target_attrs[] = {
        &attr_enabled,
        &attr_extended,
        &attr_release,
        &attr_dev_name,
        &attr_local_port,
        &attr_remote_port,
        &attr_local_ip,
        &attr_remote_ip,
        &attr_local_mac,
        &attr_remote_mac,
        NULL,
};

/*
 * Item operations and type for netconsole_target.
 */

static void netconsole_target_release(struct config_item *item)
{
        kfree(to_target(item));
}

static struct configfs_item_operations netconsole_target_item_ops = {
        .release                = netconsole_target_release,
};

static const struct config_item_type netconsole_target_type = {
        .ct_attrs                = netconsole_target_attrs,
        .ct_item_ops                = &netconsole_target_item_ops,
        .ct_owner                = THIS_MODULE,
};

static void init_target_config_group(struct netconsole_target *nt,
                                     const char *name)
{
        config_group_init_type_name(&nt->group, name, &netconsole_target_type);
        config_group_init_type_name(&nt->userdata_group, "userdata",
                                    &userdata_type);
        configfs_add_default_group(&nt->userdata_group, &nt->group);
}

static struct netconsole_target *find_cmdline_target(const char *name)
{
        struct netconsole_target *nt, *ret = NULL;
        unsigned long flags;

        spin_lock_irqsave(&target_list_lock, flags);
        list_for_each_entry(nt, &target_list, list) {
                if (!strcmp(nt->group.cg_item.ci_name, name)) {
                        ret = nt;
                        break;
                }
        }
        spin_unlock_irqrestore(&target_list_lock, flags);

        return ret;
}

/*
 * Group operations and type for netconsole_subsys.
 */

static struct config_group *make_netconsole_target(struct config_group *group,
                                                   const char *name)
{
        struct netconsole_target *nt;
        unsigned long flags;

        /* Checking if a target by this name was created at boot time.  If so,
         * attach a configfs entry to that target.  This enables dynamic
         * control.
         */
        if (!strncmp(name, NETCONSOLE_PARAM_TARGET_PREFIX,
                     strlen(NETCONSOLE_PARAM_TARGET_PREFIX))) {
                nt = find_cmdline_target(name);
                if (nt) {
                        init_target_config_group(nt, name);
                        return &nt->group;
                }
        }

        nt = alloc_and_init();
        if (!nt)
                return ERR_PTR(-ENOMEM);

        /* Initialize the config_group member */
        init_target_config_group(nt, name);

        /* Adding, but it is disabled */
        spin_lock_irqsave(&target_list_lock, flags);
        list_add(&nt->list, &target_list);
        spin_unlock_irqrestore(&target_list_lock, flags);

        return &nt->group;
}

static void drop_netconsole_target(struct config_group *group,
                                   struct config_item *item)
{
        unsigned long flags;
        struct netconsole_target *nt = to_target(item);

        spin_lock_irqsave(&target_list_lock, flags);
        list_del(&nt->list);
        spin_unlock_irqrestore(&target_list_lock, flags);

        /*
         * The target may have never been enabled, or was manually disabled
         * before being removed so netpoll may have already been cleaned up.
         */
        if (nt->enabled)
                netpoll_cleanup(&nt->np);

        config_item_put(&nt->group.cg_item);
}

static struct configfs_group_operations netconsole_subsys_group_ops = {
        .make_group        = make_netconsole_target,
        .drop_item        = drop_netconsole_target,
};

static const struct config_item_type netconsole_subsys_type = {
        .ct_group_ops        = &netconsole_subsys_group_ops,
        .ct_owner        = THIS_MODULE,
};

/* The netconsole configfs subsystem */
static struct configfs_subsystem netconsole_subsys = {
        .su_group        = {
                .cg_item        = {
                        .ci_namebuf        = "netconsole",
                        .ci_type        = &netconsole_subsys_type,
                },
        },
};

static void populate_configfs_item(struct netconsole_target *nt,
                                   int cmdline_count)
{
        char target_name[16];

        snprintf(target_name, sizeof(target_name), "%s%d",
                 NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count);
        init_target_config_group(nt, target_name);
}

#endif        /* CONFIG_NETCONSOLE_DYNAMIC */

/* Handle network interface device notifications */
static int netconsole_netdev_event(struct notifier_block *this,
                                   unsigned long event, void *ptr)
{
        unsigned long flags;
        struct netconsole_target *nt;
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        bool stopped = false;

        if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
              event == NETDEV_RELEASE || event == NETDEV_JOIN))
                goto done;

        spin_lock_irqsave(&target_list_lock, flags);
restart:
        list_for_each_entry(nt, &target_list, list) {
                netconsole_target_get(nt);
                if (nt->np.dev == dev) {
                        switch (event) {
                        case NETDEV_CHANGENAME:
                                strscpy(nt->np.dev_name, dev->name, IFNAMSIZ);
                                break;
                        case NETDEV_RELEASE:
                        case NETDEV_JOIN:
                        case NETDEV_UNREGISTER:
                                /* rtnl_lock already held
                                 * we might sleep in __netpoll_cleanup()
                                 */
                                spin_unlock_irqrestore(&target_list_lock, flags);

                                __netpoll_cleanup(&nt->np);

                                spin_lock_irqsave(&target_list_lock, flags);
                                netdev_put(nt->np.dev, &nt->np.dev_tracker);
                                nt->np.dev = NULL;
                                nt->enabled = false;
                                stopped = true;
                                netconsole_target_put(nt);
                                goto restart;
                        }
                }
                netconsole_target_put(nt);
        }
        spin_unlock_irqrestore(&target_list_lock, flags);
        if (stopped) {
                const char *msg = "had an event";

                switch (event) {
                case NETDEV_UNREGISTER:
                        msg = "unregistered";
                        break;
                case NETDEV_RELEASE:
                        msg = "released slaves";
                        break;
                case NETDEV_JOIN:
                        msg = "is joining a master device";
                        break;
                }
                pr_info("network logging stopped on interface %s as it %s\n",
                        dev->name, msg);
        }

done:
        return NOTIFY_DONE;
}

static struct notifier_block netconsole_netdev_notifier = {
        .notifier_call  = netconsole_netdev_event,
};

/**
 * send_ext_msg_udp - send extended log message to target
 * @nt: target to send message to
 * @msg: extended log message to send
 * @msg_len: length of message
 *
 * Transfer extended log @msg to @nt.  If @msg is longer than
 * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with
 * ncfrag header field added to identify them.
 */
static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
                             int msg_len)
{
        static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */
        const char *header, *body;
        int offset = 0;
        int header_len, body_len;
        const char *msg_ready = msg;
        const char *release;
        int release_len = 0;
        int userdata_len = 0;
        char *userdata = NULL;

#ifdef CONFIG_NETCONSOLE_DYNAMIC
        userdata = nt->userdata_complete;
        userdata_len = nt->userdata_length;
#endif

        if (nt->release) {
                release = init_utsname()->release;
                release_len = strlen(release) + 1;
        }

        if (msg_len + release_len + userdata_len <= MAX_PRINT_CHUNK) {
                /* No fragmentation needed */
                if (nt->release) {
                        scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg);
                        msg_len += release_len;
                } else {
                        memcpy(buf, msg, msg_len);
                }

                if (userdata)
                        msg_len += scnprintf(&buf[msg_len],
                                             MAX_PRINT_CHUNK - msg_len,
                                             "%s", userdata);

                msg_ready = buf;
                netpoll_send_udp(&nt->np, msg_ready, msg_len);
                return;
        }

        /* need to insert extra header fields, detect header and body */
        header = msg;
        body = memchr(msg, ';', msg_len);
        if (WARN_ON_ONCE(!body))
                return;

        header_len = body - header;
        body_len = msg_len - header_len - 1;
        body++;

        /*
         * Transfer multiple chunks with the following extra header.
         * "ncfrag=<byte-offset>/<total-bytes>"
         */
        if (nt->release)
                scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release);
        memcpy(buf + release_len, header, header_len);
        header_len += release_len;

        while (offset < body_len + userdata_len) {
                int this_header = header_len;
                int this_offset = 0;
                int this_chunk = 0;

                this_header += scnprintf(buf + this_header,
                                         sizeof(buf) - this_header,
                                         ",ncfrag=%d/%d;", offset,
                                         body_len + userdata_len);

                /* Not all body data has been written yet */
                if (offset < body_len) {
                        this_chunk = min(body_len - offset,
                                         MAX_PRINT_CHUNK - this_header);
                        if (WARN_ON_ONCE(this_chunk <= 0))
                                return;
                        memcpy(buf + this_header, body + offset, this_chunk);
                        this_offset += this_chunk;
                }
                /* Body is fully written and there is pending userdata to write,
                 * append userdata in this chunk
                 */
                if (offset + this_offset >= body_len &&
                    offset + this_offset < userdata_len + body_len) {
                        int sent_userdata = (offset + this_offset) - body_len;
                        int preceding_bytes = this_chunk + this_header;

                        if (WARN_ON_ONCE(sent_userdata < 0))
                                return;

                        this_chunk = min(userdata_len - sent_userdata,
                                         MAX_PRINT_CHUNK - preceding_bytes);
                        if (WARN_ON_ONCE(this_chunk <= 0))
                                return;
                        memcpy(buf + this_header + this_offset,
                               userdata + sent_userdata,
                               this_chunk);
                        this_offset += this_chunk;
                }

                netpoll_send_udp(&nt->np, buf, this_header + this_offset);
                offset += this_offset;
        }
}

static void write_ext_msg(struct console *con, const char *msg,
                          unsigned int len)
{
        struct netconsole_target *nt;
        unsigned long flags;

        if ((oops_only && !oops_in_progress) || list_empty(&target_list))
                return;

        spin_lock_irqsave(&target_list_lock, flags);
        list_for_each_entry(nt, &target_list, list)
                if (nt->extended && nt->enabled && netif_running(nt->np.dev))
                        send_ext_msg_udp(nt, msg, len);
        spin_unlock_irqrestore(&target_list_lock, flags);
}

static void write_msg(struct console *con, const char *msg, unsigned int len)
{
        int frag, left;
        unsigned long flags;
        struct netconsole_target *nt;
        const char *tmp;

        if (oops_only && !oops_in_progress)
                return;
        /* Avoid taking lock and disabling interrupts unnecessarily */
        if (list_empty(&target_list))
                return;

        spin_lock_irqsave(&target_list_lock, flags);
        list_for_each_entry(nt, &target_list, list) {
                if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) {
                        /*
                         * We nest this inside the for-each-target loop above
                         * so that we're able to get as much logging out to
                         * at least one target if we die inside here, instead
                         * of unnecessarily keeping all targets in lock-step.
                         */
                        tmp = msg;
                        for (left = len; left;) {
                                frag = min(left, MAX_PRINT_CHUNK);
                                netpoll_send_udp(&nt->np, tmp, frag);
                                tmp += frag;
                                left -= frag;
                        }
                }
        }
        spin_unlock_irqrestore(&target_list_lock, flags);
}

/* Allocate new target (from boot/module param) and setup netpoll for it */
static struct netconsole_target *alloc_param_target(char *target_config,
                                                    int cmdline_count)
{
        struct netconsole_target *nt;
        int err;

        nt = alloc_and_init();
        if (!nt) {
                err = -ENOMEM;
                goto fail;
        }

        if (*target_config == '+') {
                nt->extended = true;
                target_config++;
        }

        if (*target_config == 'r') {
                if (!nt->extended) {
                        pr_err("Netconsole configuration error. Release feature requires extended log message");
                        err = -EINVAL;
                        goto fail;
                }
                nt->release = true;
                target_config++;
        }

        /* Parse parameters and setup netpoll */
        err = netpoll_parse_options(&nt->np, target_config);
        if (err)
                goto fail;

        err = netpoll_setup(&nt->np);
        if (err)
                goto fail;

        populate_configfs_item(nt, cmdline_count);
        nt->enabled = true;

        return nt;

fail:
        kfree(nt);
        return ERR_PTR(err);
}

/* Cleanup netpoll for given target (from boot/module param) and free it */
static void free_param_target(struct netconsole_target *nt)
{
        netpoll_cleanup(&nt->np);
        kfree(nt);
}

static struct console netconsole_ext = {
        .name        = "netcon_ext",
        .flags        = CON_ENABLED | CON_EXTENDED,
        .write        = write_ext_msg,
};

static struct console netconsole = {
        .name        = "netcon",
        .flags        = CON_ENABLED,
        .write        = write_msg,
};

static int __init init_netconsole(void)
{
        int err;
        struct netconsole_target *nt, *tmp;
        unsigned int count = 0;
        bool extended = false;
        unsigned long flags;
        char *target_config;
        char *input = config;

        if (strnlen(input, MAX_PARAM_LENGTH)) {
                while ((target_config = strsep(&input, ";"))) {
                        nt = alloc_param_target(target_config, count);
                        if (IS_ERR(nt)) {
                                err = PTR_ERR(nt);
                                goto fail;
                        }
                        /* Dump existing printks when we register */
                        if (nt->extended) {
                                extended = true;
                                netconsole_ext.flags |= CON_PRINTBUFFER;
                        } else {
                                netconsole.flags |= CON_PRINTBUFFER;
                        }

                        spin_lock_irqsave(&target_list_lock, flags);
                        list_add(&nt->list, &target_list);
                        spin_unlock_irqrestore(&target_list_lock, flags);
                        count++;
                }
        }

        err = register_netdevice_notifier(&netconsole_netdev_notifier);
        if (err)
                goto fail;

        err = dynamic_netconsole_init();
        if (err)
                goto undonotifier;

        if (extended)
                register_console(&netconsole_ext);
        register_console(&netconsole);
        pr_info("network logging started\n");

        return err;

undonotifier:
        unregister_netdevice_notifier(&netconsole_netdev_notifier);

fail:
        pr_err("cleaning up\n");

        /*
         * Remove all targets and destroy them (only targets created
         * from the boot/module option exist here). Skipping the list
         * lock is safe here, and netpoll_cleanup() will sleep.
         */
        list_for_each_entry_safe(nt, tmp, &target_list, list) {
                list_del(&nt->list);
                free_param_target(nt);
        }

        return err;
}

static void __exit cleanup_netconsole(void)
{
        struct netconsole_target *nt, *tmp;

        if (console_is_registered(&netconsole_ext))
                unregister_console(&netconsole_ext);
        unregister_console(&netconsole);
        dynamic_netconsole_exit();
        unregister_netdevice_notifier(&netconsole_netdev_notifier);

        /*
         * Targets created via configfs pin references on our module
         * and would first be rmdir(2)'ed from userspace. We reach
         * here only when they are already destroyed, and only those
         * created from the boot/module option are left, so remove and
         * destroy them. Skipping the list lock is safe here, and
         * netpoll_cleanup() will sleep.
         */
        list_for_each_entry_safe(nt, tmp, &target_list, list) {
                list_del(&nt->list);
                free_param_target(nt);
        }
}

/*
 * Use late_initcall to ensure netconsole is
 * initialized after network device driver if built-in.
 *
 * late_initcall() and module_init() are identical if built as module.
 */
late_initcall(init_netconsole);
module_exit(cleanup_netconsole);


































   70 

   70 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/* SPDX-License-Identifier: GPL-2.0+ */
/*
 * RCU-based infrastructure for lightweight reader-writer locking
 *
 * Copyright (c) 2015, Red Hat, Inc.
 *
 * Author: Oleg Nesterov <oleg@redhat.com>
 */

#ifndef _LINUX_RCU_SYNC_H_
#define _LINUX_RCU_SYNC_H_

#include <linux/wait.h>
#include <linux/rcupdate.h>

/* Structure to mediate between updaters and fastpath-using readers.  */
struct rcu_sync {
        int                        gp_state;
        int                        gp_count;
        wait_queue_head_t        gp_wait;

        struct rcu_head                cb_head;
};

/**
 * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
 * @rsp: Pointer to rcu_sync structure to use for synchronization
 *
 * Returns true if readers are permitted to use their fastpaths.  Must be
 * invoked within some flavor of RCU read-side critical section.
 */
static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
{
        RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
                         "suspicious rcu_sync_is_idle() usage");
        return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
}

extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);

#define __RCU_SYNC_INITIALIZER(name) {                                        \
                .gp_state = 0,                                                \
                .gp_count = 0,                                                \
                .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),        \
        }

#define        DEFINE_RCU_SYNC(name)        \
        struct rcu_sync name = __RCU_SYNC_INITIALIZER(name)

#endif /* _LINUX_RCU_SYNC_H_ */


















    3 


    3 

    3 



    1 
    1 



    1 















    2 








    1 

    1 




    2 






    1 





























    1 






















































































































































































































































   11 
   11 

    4 

    7 

    9 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
// SPDX-License-Identifier: GPL-2.0
/* dvb-usb-remote.c is part of the DVB USB library.
 *
 * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de)
 * see dvb-usb-init.c for copyright information.
 *
 * This file contains functions for initializing the input-device and for handling remote-control-queries.
 */
#include "dvb-usb-common.h"
#include <linux/usb/input.h>

static unsigned int
legacy_dvb_usb_get_keymap_index(const struct input_keymap_entry *ke,
                                struct rc_map_table *keymap,
                                unsigned int keymap_size)
{
        unsigned int index;
        unsigned int scancode;

        if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
                index = ke->index;
        } else {
                if (input_scancode_to_scalar(ke, &scancode))
                        return keymap_size;

                /* See if we can match the raw key code. */
                for (index = 0; index < keymap_size; index++)
                        if (keymap[index].scancode == scancode)
                                break;

                /* See if there is an unused hole in the map */
                if (index >= keymap_size) {
                        for (index = 0; index < keymap_size; index++) {
                                if (keymap[index].keycode == KEY_RESERVED ||
                                    keymap[index].keycode == KEY_UNKNOWN) {
                                        break;
                                }
                        }
                }
        }

        return index;
}

static int legacy_dvb_usb_getkeycode(struct input_dev *dev,
                                     struct input_keymap_entry *ke)
{
        struct dvb_usb_device *d = input_get_drvdata(dev);
        struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table;
        unsigned int keymap_size = d->props.rc.legacy.rc_map_size;
        unsigned int index;

        index = legacy_dvb_usb_get_keymap_index(ke, keymap, keymap_size);
        if (index >= keymap_size)
                return -EINVAL;

        ke->keycode = keymap[index].keycode;
        if (ke->keycode == KEY_UNKNOWN)
                ke->keycode = KEY_RESERVED;
        ke->len = sizeof(keymap[index].scancode);
        memcpy(&ke->scancode, &keymap[index].scancode, ke->len);
        ke->index = index;

        return 0;
}

static int legacy_dvb_usb_setkeycode(struct input_dev *dev,
                                     const struct input_keymap_entry *ke,
                                     unsigned int *old_keycode)
{
        struct dvb_usb_device *d = input_get_drvdata(dev);
        struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table;
        unsigned int keymap_size = d->props.rc.legacy.rc_map_size;
        unsigned int index;

        index = legacy_dvb_usb_get_keymap_index(ke, keymap, keymap_size);
        /*
         * FIXME: Currently, it is not possible to increase the size of
         * scancode table. For it to happen, one possibility
         * would be to allocate a table with key_map_size + 1,
         * copying data, appending the new key on it, and freeing
         * the old one - or maybe just allocating some spare space
         */
        if (index >= keymap_size)
                return -EINVAL;

        *old_keycode = keymap[index].keycode;
        keymap->keycode = ke->keycode;
        __set_bit(ke->keycode, dev->keybit);

        if (*old_keycode != KEY_RESERVED) {
                __clear_bit(*old_keycode, dev->keybit);
                for (index = 0; index < keymap_size; index++) {
                        if (keymap[index].keycode == *old_keycode) {
                                __set_bit(*old_keycode, dev->keybit);
                                break;
                        }
                }
        }

        return 0;
}

/* Remote-control poll function - called every dib->rc_query_interval ms to see
 * whether the remote control has received anything.
 *
 * TODO: Fix the repeat rate of the input device.
 */
static void legacy_dvb_usb_read_remote_control(struct work_struct *work)
{
        struct dvb_usb_device *d =
                container_of(work, struct dvb_usb_device, rc_query_work.work);
        u32 event;
        int state;

        /* TODO: need a lock here.  We can simply skip checking for the remote control
           if we're busy. */

        /* when the parameter has been set to 1 via sysfs while the driver was running */
        if (dvb_usb_disable_rc_polling)
                return;

        if (d->props.rc.legacy.rc_query(d,&event,&state)) {
                err("error while querying for an remote control event.");
                goto schedule;
        }


        switch (state) {
                case REMOTE_NO_KEY_PRESSED:
                        break;
                case REMOTE_KEY_PRESSED:
                        deb_rc("key pressed\n");
                        d->last_event = event;
                        input_event(d->input_dev, EV_KEY, event, 1);
                        input_sync(d->input_dev);
                        input_event(d->input_dev, EV_KEY, d->last_event, 0);
                        input_sync(d->input_dev);
                        break;
                case REMOTE_KEY_REPEAT:
                        deb_rc("key repeated\n");
                        input_event(d->input_dev, EV_KEY, event, 1);
                        input_sync(d->input_dev);
                        input_event(d->input_dev, EV_KEY, d->last_event, 0);
                        input_sync(d->input_dev);
                        break;
                default:
                        break;
        }

/* improved repeat handling ???
        switch (state) {
                case REMOTE_NO_KEY_PRESSED:
                        deb_rc("NO KEY PRESSED\n");
                        if (d->last_state != REMOTE_NO_KEY_PRESSED) {
                                deb_rc("releasing event %d\n",d->last_event);
                                input_event(d->rc_input_dev, EV_KEY, d->last_event, 0);
                                input_sync(d->rc_input_dev);
                        }
                        d->last_state = REMOTE_NO_KEY_PRESSED;
                        d->last_event = 0;
                        break;
                case REMOTE_KEY_PRESSED:
                        deb_rc("KEY PRESSED\n");
                        deb_rc("pressing event %d\n",event);

                        input_event(d->rc_input_dev, EV_KEY, event, 1);
                        input_sync(d->rc_input_dev);

                        d->last_event = event;
                        d->last_state = REMOTE_KEY_PRESSED;
                        break;
                case REMOTE_KEY_REPEAT:
                        deb_rc("KEY_REPEAT\n");
                        if (d->last_state != REMOTE_NO_KEY_PRESSED) {
                                deb_rc("repeating event %d\n",d->last_event);
                                input_event(d->rc_input_dev, EV_KEY, d->last_event, 2);
                                input_sync(d->rc_input_dev);
                                d->last_state = REMOTE_KEY_REPEAT;
                        }
                default:
                        break;
        }
*/

schedule:
        schedule_delayed_work(&d->rc_query_work,msecs_to_jiffies(d->props.rc.legacy.rc_interval));
}

static int legacy_dvb_usb_remote_init(struct dvb_usb_device *d)
{
        int i, err, rc_interval;
        struct input_dev *input_dev;

        input_dev = input_allocate_device();
        if (!input_dev)
                return -ENOMEM;

        input_dev->evbit[0] = BIT_MASK(EV_KEY);
        input_dev->name = "IR-receiver inside an USB DVB receiver";
        input_dev->phys = d->rc_phys;
        usb_to_input_id(d->udev, &input_dev->id);
        input_dev->dev.parent = &d->udev->dev;
        d->input_dev = input_dev;
        d->rc_dev = NULL;

        input_dev->getkeycode = legacy_dvb_usb_getkeycode;
        input_dev->setkeycode = legacy_dvb_usb_setkeycode;

        /* set the bits for the keys */
        deb_rc("key map size: %d\n", d->props.rc.legacy.rc_map_size);
        for (i = 0; i < d->props.rc.legacy.rc_map_size; i++) {
                deb_rc("setting bit for event %d item %d\n",
                        d->props.rc.legacy.rc_map_table[i].keycode, i);
                set_bit(d->props.rc.legacy.rc_map_table[i].keycode, input_dev->keybit);
        }

        /* setting these two values to non-zero, we have to manage key repeats */
        input_dev->rep[REP_PERIOD] = d->props.rc.legacy.rc_interval;
        input_dev->rep[REP_DELAY]  = d->props.rc.legacy.rc_interval + 150;

        input_set_drvdata(input_dev, d);

        err = input_register_device(input_dev);
        if (err)
                input_free_device(input_dev);

        rc_interval = d->props.rc.legacy.rc_interval;

        INIT_DELAYED_WORK(&d->rc_query_work, legacy_dvb_usb_read_remote_control);

        info("schedule remote query interval to %d msecs.", rc_interval);
        schedule_delayed_work(&d->rc_query_work,
                              msecs_to_jiffies(rc_interval));

        d->state |= DVB_USB_STATE_REMOTE;

        return err;
}

/* Remote-control poll function - called every dib->rc_query_interval ms to see
 * whether the remote control has received anything.
 *
 * TODO: Fix the repeat rate of the input device.
 */
static void dvb_usb_read_remote_control(struct work_struct *work)
{
        struct dvb_usb_device *d =
                container_of(work, struct dvb_usb_device, rc_query_work.work);
        int err;

        /* TODO: need a lock here.  We can simply skip checking for the remote control
           if we're busy. */

        /* when the parameter has been set to 1 via sysfs while the
         * driver was running, or when bulk mode is enabled after IR init
         */
        if (dvb_usb_disable_rc_polling || d->props.rc.core.bulk_mode)
                return;

        err = d->props.rc.core.rc_query(d);
        if (err)
                err("error %d while querying for an remote control event.", err);

        schedule_delayed_work(&d->rc_query_work,
                              msecs_to_jiffies(d->props.rc.core.rc_interval));
}

static int rc_core_dvb_usb_remote_init(struct dvb_usb_device *d)
{
        int err, rc_interval;
        struct rc_dev *dev;

        dev = rc_allocate_device(d->props.rc.core.driver_type);
        if (!dev)
                return -ENOMEM;

        dev->driver_name = d->props.rc.core.module_name;
        dev->map_name = d->props.rc.core.rc_codes;
        dev->change_protocol = d->props.rc.core.change_protocol;
        dev->allowed_protocols = d->props.rc.core.allowed_protos;
        usb_to_input_id(d->udev, &dev->input_id);
        dev->device_name = d->desc->name;
        dev->input_phys = d->rc_phys;
        dev->dev.parent = &d->udev->dev;
        dev->priv = d;
        dev->scancode_mask = d->props.rc.core.scancode_mask;

        err = rc_register_device(dev);
        if (err < 0) {
                rc_free_device(dev);
                return err;
        }

        d->input_dev = NULL;
        d->rc_dev = dev;

        if (!d->props.rc.core.rc_query || d->props.rc.core.bulk_mode)
                return 0;

        /* Polling mode - initialize a work queue for handling it */
        INIT_DELAYED_WORK(&d->rc_query_work, dvb_usb_read_remote_control);

        rc_interval = d->props.rc.core.rc_interval;

        info("schedule remote query interval to %d msecs.", rc_interval);
        schedule_delayed_work(&d->rc_query_work,
                              msecs_to_jiffies(rc_interval));

        return 0;
}

int dvb_usb_remote_init(struct dvb_usb_device *d)
{
        int err;

        if (dvb_usb_disable_rc_polling)
                return 0;

        if (d->props.rc.legacy.rc_map_table && d->props.rc.legacy.rc_query)
                d->props.rc.mode = DVB_RC_LEGACY;
        else if (d->props.rc.core.rc_codes)
                d->props.rc.mode = DVB_RC_CORE;
        else
                return 0;

        usb_make_path(d->udev, d->rc_phys, sizeof(d->rc_phys));
        strlcat(d->rc_phys, "/ir0", sizeof(d->rc_phys));

        /* Start the remote-control polling. */
        if (d->props.rc.legacy.rc_interval < 40)
                d->props.rc.legacy.rc_interval = 100; /* default */

        if (d->props.rc.mode == DVB_RC_LEGACY)
                err = legacy_dvb_usb_remote_init(d);
        else
                err = rc_core_dvb_usb_remote_init(d);
        if (err)
                return err;

        d->state |= DVB_USB_STATE_REMOTE;

        return 0;
}

int dvb_usb_remote_exit(struct dvb_usb_device *d)
{
        if (d->state & DVB_USB_STATE_REMOTE) {
                cancel_delayed_work_sync(&d->rc_query_work);
                if (d->props.rc.mode == DVB_RC_LEGACY)
                        input_unregister_device(d->input_dev);
                else
                        rc_unregister_device(d->rc_dev);
        }
        d->state &= ~DVB_USB_STATE_REMOTE;
        return 0;
}

#define DVB_USB_RC_NEC_EMPTY           0x00
#define DVB_USB_RC_NEC_KEY_PRESSED     0x01
#define DVB_USB_RC_NEC_KEY_REPEATED    0x02
int dvb_usb_nec_rc_key_to_event(struct dvb_usb_device *d,
                u8 keybuf[5], u32 *event, int *state)
{
        int i;
        struct rc_map_table *keymap = d->props.rc.legacy.rc_map_table;
        *event = 0;
        *state = REMOTE_NO_KEY_PRESSED;
        switch (keybuf[0]) {
                case DVB_USB_RC_NEC_EMPTY:
                        break;
                case DVB_USB_RC_NEC_KEY_PRESSED:
                        if ((u8) ~keybuf[1] != keybuf[2] ||
                                (u8) ~keybuf[3] != keybuf[4]) {
                                deb_err("remote control checksum failed.\n");
                                break;
                        }
                        /* See if we can match the raw key code. */
                        for (i = 0; i < d->props.rc.legacy.rc_map_size; i++)
                                if (rc5_custom(&keymap[i]) == keybuf[1] &&
                                        rc5_data(&keymap[i]) == keybuf[3]) {
                                        *event = keymap[i].keycode;
                                        *state = REMOTE_KEY_PRESSED;
                                        return 0;
                                }
                        deb_err("key mapping failed - no appropriate key found in keymapping\n");
                        break;
                case DVB_USB_RC_NEC_KEY_REPEATED:
                        *state = REMOTE_KEY_REPEAT;
                        break;
                default:
                        deb_err("unknown type of remote status: %d\n",keybuf[0]);
                        break;
        }
        return 0;
}
EXPORT_SYMBOL(dvb_usb_nec_rc_key_to_event);
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



    1 


































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/scatterlist.h>
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/usb.h>

#define SIMPLE_IO_TIMEOUT        10000        /* in milliseconds */

/*-------------------------------------------------------------------------*/

static int override_alt = -1;
module_param_named(alt, override_alt, int, 0644);
MODULE_PARM_DESC(alt, ">= 0 to override altsetting selection");
static void complicated_callback(struct urb *urb);

/*-------------------------------------------------------------------------*/

/* FIXME make these public somewhere; usbdevfs.h? */

/* Parameter for usbtest driver. */
struct usbtest_param_32 {
        /* inputs */
        __u32                test_num;        /* 0..(TEST_CASES-1) */
        __u32                iterations;
        __u32                length;
        __u32                vary;
        __u32                sglen;

        /* outputs */
        __s32                duration_sec;
        __s32                duration_usec;
};

/*
 * Compat parameter to the usbtest driver.
 * This supports older user space binaries compiled with 64 bit compiler.
 */
struct usbtest_param_64 {
        /* inputs */
        __u32                test_num;        /* 0..(TEST_CASES-1) */
        __u32                iterations;
        __u32                length;
        __u32                vary;
        __u32                sglen;

        /* outputs */
        __s64                duration_sec;
        __s64                duration_usec;
};

/* IOCTL interface to the driver. */
#define USBTEST_REQUEST_32    _IOWR('U', 100, struct usbtest_param_32)
/* COMPAT IOCTL interface to the driver. */
#define USBTEST_REQUEST_64    _IOWR('U', 100, struct usbtest_param_64)

/*-------------------------------------------------------------------------*/

#define        GENERIC                /* let probe() bind using module params */

/* Some devices that can be used for testing will have "real" drivers.
 * Entries for those need to be enabled here by hand, after disabling
 * that "real" driver.
 */
//#define        IBOT2                /* grab iBOT2 webcams */
//#define        KEYSPAN_19Qi        /* grab un-renumerated serial adapter */

/*-------------------------------------------------------------------------*/

struct usbtest_info {
        const char                *name;
        u8                        ep_in;                /* bulk/intr source */
        u8                        ep_out;                /* bulk/intr sink */
        unsigned                autoconf:1;
        unsigned                ctrl_out:1;
        unsigned                iso:1;                /* try iso in/out */
        unsigned                intr:1;                /* try interrupt in/out */
        int                        alt;
};

/* this is accessed only through usbfs ioctl calls.
 * one ioctl to issue a test ... one lock per device.
 * tests create other threads if they need them.
 * urbs and buffers are allocated dynamically,
 * and data generated deterministically.
 */
struct usbtest_dev {
        struct usb_interface        *intf;
        struct usbtest_info        *info;
        int                        in_pipe;
        int                        out_pipe;
        int                        in_iso_pipe;
        int                        out_iso_pipe;
        int                        in_int_pipe;
        int                        out_int_pipe;
        struct usb_endpoint_descriptor        *iso_in, *iso_out;
        struct usb_endpoint_descriptor        *int_in, *int_out;
        struct mutex                lock;

#define TBUF_SIZE        256
        u8                        *buf;
};

static struct usb_device *testdev_to_usbdev(struct usbtest_dev *test)
{
        return interface_to_usbdev(test->intf);
}

/* set up all urbs so they can be used with either bulk or interrupt */
#define        INTERRUPT_RATE                1        /* msec/transfer */

#define ERROR(tdev, fmt, args...) \
        dev_err(&(tdev)->intf->dev , fmt , ## args)
#define WARNING(tdev, fmt, args...) \
        dev_warn(&(tdev)->intf->dev , fmt , ## args)

#define GUARD_BYTE        0xA5
#define MAX_SGLEN        128

/*-------------------------------------------------------------------------*/

static inline void endpoint_update(int edi,
                                   struct usb_host_endpoint **in,
                                   struct usb_host_endpoint **out,
                                   struct usb_host_endpoint *e)
{
        if (edi) {
                if (!*in)
                        *in = e;
        } else {
                if (!*out)
                        *out = e;
        }
}

static int
get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf)
{
        int                                tmp;
        struct usb_host_interface        *alt;
        struct usb_host_endpoint        *in, *out;
        struct usb_host_endpoint        *iso_in, *iso_out;
        struct usb_host_endpoint        *int_in, *int_out;
        struct usb_device                *udev;

        for (tmp = 0; tmp < intf->num_altsetting; tmp++) {
                unsigned        ep;

                in = out = NULL;
                iso_in = iso_out = NULL;
                int_in = int_out = NULL;
                alt = intf->altsetting + tmp;

                if (override_alt >= 0 &&
                                override_alt != alt->desc.bAlternateSetting)
                        continue;

                /* take the first altsetting with in-bulk + out-bulk;
                 * ignore other endpoints and altsettings.
                 */
                for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
                        struct usb_host_endpoint        *e;
                        int edi;

                        e = alt->endpoint + ep;
                        edi = usb_endpoint_dir_in(&e->desc);

                        switch (usb_endpoint_type(&e->desc)) {
                        case USB_ENDPOINT_XFER_BULK:
                                endpoint_update(edi, &in, &out, e);
                                continue;
                        case USB_ENDPOINT_XFER_INT:
                                if (dev->info->intr)
                                        endpoint_update(edi, &int_in, &int_out, e);
                                continue;
                        case USB_ENDPOINT_XFER_ISOC:
                                if (dev->info->iso)
                                        endpoint_update(edi, &iso_in, &iso_out, e);
                                fallthrough;
                        default:
                                continue;
                        }
                }
                if ((in && out)  ||  iso_in || iso_out || int_in || int_out)
                        goto found;
        }
        return -EINVAL;

found:
        udev = testdev_to_usbdev(dev);
        dev->info->alt = alt->desc.bAlternateSetting;
        if (alt->desc.bAlternateSetting != 0) {
                tmp = usb_set_interface(udev,
                                alt->desc.bInterfaceNumber,
                                alt->desc.bAlternateSetting);
                if (tmp < 0)
                        return tmp;
        }

        if (in)
                dev->in_pipe = usb_rcvbulkpipe(udev,
                        in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
        if (out)
                dev->out_pipe = usb_sndbulkpipe(udev,
                        out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);

        if (iso_in) {
                dev->iso_in = &iso_in->desc;
                dev->in_iso_pipe = usb_rcvisocpipe(udev,
                                iso_in->desc.bEndpointAddress
                                        & USB_ENDPOINT_NUMBER_MASK);
        }

        if (iso_out) {
                dev->iso_out = &iso_out->desc;
                dev->out_iso_pipe = usb_sndisocpipe(udev,
                                iso_out->desc.bEndpointAddress
                                        & USB_ENDPOINT_NUMBER_MASK);
        }

        if (int_in) {
                dev->int_in = &int_in->desc;
                dev->in_int_pipe = usb_rcvintpipe(udev,
                                int_in->desc.bEndpointAddress
                                        & USB_ENDPOINT_NUMBER_MASK);
        }

        if (int_out) {
                dev->int_out = &int_out->desc;
                dev->out_int_pipe = usb_sndintpipe(udev,
                                int_out->desc.bEndpointAddress
                                        & USB_ENDPOINT_NUMBER_MASK);
        }
        return 0;
}

/*-------------------------------------------------------------------------*/

/* Support for testing basic non-queued I/O streams.
 *
 * These just package urbs as requests that can be easily canceled.
 * Each urb's data buffer is dynamically allocated; callers can fill
 * them with non-zero test data (or test for it) when appropriate.
 */

static void simple_callback(struct urb *urb)
{
        complete(urb->context);
}

static struct urb *usbtest_alloc_urb(
        struct usb_device        *udev,
        int                        pipe,
        unsigned long                bytes,
        unsigned                transfer_flags,
        unsigned                offset,
        u8                        bInterval,
        usb_complete_t                complete_fn)
{
        struct urb                *urb;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return urb;

        if (bInterval)
                usb_fill_int_urb(urb, udev, pipe, NULL, bytes, complete_fn,
                                NULL, bInterval);
        else
                usb_fill_bulk_urb(urb, udev, pipe, NULL, bytes, complete_fn,
                                NULL);

        urb->interval = (udev->speed == USB_SPEED_HIGH)
                        ? (INTERRUPT_RATE << 3)
                        : INTERRUPT_RATE;
        urb->transfer_flags = transfer_flags;
        if (usb_pipein(pipe))
                urb->transfer_flags |= URB_SHORT_NOT_OK;

        if ((bytes + offset) == 0)
                return urb;

        if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
                urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset,
                        GFP_KERNEL, &urb->transfer_dma);
        else
                urb->transfer_buffer = kmalloc(bytes + offset, GFP_KERNEL);

        if (!urb->transfer_buffer) {
                usb_free_urb(urb);
                return NULL;
        }

        /* To test unaligned transfers add an offset and fill the
                unused memory with a guard value */
        if (offset) {
                memset(urb->transfer_buffer, GUARD_BYTE, offset);
                urb->transfer_buffer += offset;
                if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
                        urb->transfer_dma += offset;
        }

        /* For inbound transfers use guard byte so that test fails if
                data not correctly copied */
        memset(urb->transfer_buffer,
                        usb_pipein(urb->pipe) ? GUARD_BYTE : 0,
                        bytes);
        return urb;
}

static struct urb *simple_alloc_urb(
        struct usb_device        *udev,
        int                        pipe,
        unsigned long                bytes,
        u8                        bInterval)
{
        return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0,
                        bInterval, simple_callback);
}

static struct urb *complicated_alloc_urb(
        struct usb_device        *udev,
        int                        pipe,
        unsigned long                bytes,
        u8                        bInterval)
{
        return usbtest_alloc_urb(udev, pipe, bytes, URB_NO_TRANSFER_DMA_MAP, 0,
                        bInterval, complicated_callback);
}

static unsigned pattern;
static unsigned mod_pattern;
module_param_named(pattern, mod_pattern, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(mod_pattern, "i/o pattern (0 == zeroes)");

static unsigned get_maxpacket(struct usb_device *udev, int pipe)
{
        struct usb_host_endpoint        *ep;

        ep = usb_pipe_endpoint(udev, pipe);
        return le16_to_cpup(&ep->desc.wMaxPacketSize);
}

static int ss_isoc_get_packet_num(struct usb_device *udev, int pipe)
{
        struct usb_host_endpoint *ep = usb_pipe_endpoint(udev, pipe);

        return USB_SS_MULT(ep->ss_ep_comp.bmAttributes)
                * (1 + ep->ss_ep_comp.bMaxBurst);
}

static void simple_fill_buf(struct urb *urb)
{
        unsigned        i;
        u8                *buf = urb->transfer_buffer;
        unsigned        len = urb->transfer_buffer_length;
        unsigned        maxpacket;

        switch (pattern) {
        default:
                fallthrough;
        case 0:
                memset(buf, 0, len);
                break;
        case 1:                        /* mod63 */
                maxpacket = get_maxpacket(urb->dev, urb->pipe);
                for (i = 0; i < len; i++)
                        *buf++ = (u8) ((i % maxpacket) % 63);
                break;
        }
}

static inline unsigned long buffer_offset(void *buf)
{
        return (unsigned long)buf & (ARCH_KMALLOC_MINALIGN - 1);
}

static int check_guard_bytes(struct usbtest_dev *tdev, struct urb *urb)
{
        u8 *buf = urb->transfer_buffer;
        u8 *guard = buf - buffer_offset(buf);
        unsigned i;

        for (i = 0; guard < buf; i++, guard++) {
                if (*guard != GUARD_BYTE) {
                        ERROR(tdev, "guard byte[%d] %d (not %d)\n",
                                i, *guard, GUARD_BYTE);
                        return -EINVAL;
                }
        }
        return 0;
}

static int simple_check_buf(struct usbtest_dev *tdev, struct urb *urb)
{
        unsigned        i;
        u8                expected;
        u8                *buf = urb->transfer_buffer;
        unsigned        len = urb->actual_length;
        unsigned        maxpacket = get_maxpacket(urb->dev, urb->pipe);

        int ret = check_guard_bytes(tdev, urb);
        if (ret)
                return ret;

        for (i = 0; i < len; i++, buf++) {
                switch (pattern) {
                /* all-zeroes has no synchronization issues */
                case 0:
                        expected = 0;
                        break;
                /* mod63 stays in sync with short-terminated transfers,
                 * or otherwise when host and gadget agree on how large
                 * each usb transfer request should be.  resync is done
                 * with set_interface or set_config.
                 */
                case 1:                        /* mod63 */
                        expected = (i % maxpacket) % 63;
                        break;
                /* always fail unsupported patterns */
                default:
                        expected = !*buf;
                        break;
                }
                if (*buf == expected)
                        continue;
                ERROR(tdev, "buf[%d] = %d (not %d)\n", i, *buf, expected);
                return -EINVAL;
        }
        return 0;
}

static void simple_free_urb(struct urb *urb)
{
        unsigned long offset = buffer_offset(urb->transfer_buffer);

        if (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
                usb_free_coherent(
                        urb->dev,
                        urb->transfer_buffer_length + offset,
                        urb->transfer_buffer - offset,
                        urb->transfer_dma - offset);
        else
                kfree(urb->transfer_buffer - offset);
        usb_free_urb(urb);
}

static int simple_io(
        struct usbtest_dev        *tdev,
        struct urb                *urb,
        int                        iterations,
        int                        vary,
        int                        expected,
        const char                *label
)
{
        struct usb_device        *udev = urb->dev;
        int                        max = urb->transfer_buffer_length;
        struct completion        completion;
        int                        retval = 0;
        unsigned long                expire;

        urb->context = &completion;
        while (retval == 0 && iterations-- > 0) {
                init_completion(&completion);
                if (usb_pipeout(urb->pipe)) {
                        simple_fill_buf(urb);
                        urb->transfer_flags |= URB_ZERO_PACKET;
                }
                retval = usb_submit_urb(urb, GFP_KERNEL);
                if (retval != 0)
                        break;

                expire = msecs_to_jiffies(SIMPLE_IO_TIMEOUT);
                if (!wait_for_completion_timeout(&completion, expire)) {
                        usb_kill_urb(urb);
                        retval = (urb->status == -ENOENT ?
                                  -ETIMEDOUT : urb->status);
                } else {
                        retval = urb->status;
                }

                urb->dev = udev;
                if (retval == 0 && usb_pipein(urb->pipe))
                        retval = simple_check_buf(tdev, urb);

                if (vary) {
                        int        len = urb->transfer_buffer_length;

                        len += vary;
                        len %= max;
                        if (len == 0)
                                len = (vary < max) ? vary : max;
                        urb->transfer_buffer_length = len;
                }

                /* FIXME if endpoint halted, clear halt (and log) */
        }
        urb->transfer_buffer_length = max;

        if (expected != retval)
                dev_err(&udev->dev,
                        "%s failed, iterations left %d, status %d (not %d)\n",
                                label, iterations, retval, expected);
        return retval;
}


/*-------------------------------------------------------------------------*/

/* We use scatterlist primitives to test queued I/O.
 * Yes, this also tests the scatterlist primitives.
 */

static void free_sglist(struct scatterlist *sg, int nents)
{
        unsigned                i;

        if (!sg)
                return;
        for (i = 0; i < nents; i++) {
                if (!sg_page(&sg[i]))
                        continue;
                kfree(sg_virt(&sg[i]));
        }
        kfree(sg);
}

static struct scatterlist *
alloc_sglist(int nents, int max, int vary, struct usbtest_dev *dev, int pipe)
{
        struct scatterlist        *sg;
        unsigned int                n_size = 0;
        unsigned                i;
        unsigned                size = max;
        unsigned                maxpacket =
                get_maxpacket(interface_to_usbdev(dev->intf), pipe);

        if (max == 0)
                return NULL;

        sg = kmalloc_array(nents, sizeof(*sg), GFP_KERNEL);
        if (!sg)
                return NULL;
        sg_init_table(sg, nents);

        for (i = 0; i < nents; i++) {
                char                *buf;
                unsigned        j;

                buf = kzalloc(size, GFP_KERNEL);
                if (!buf) {
                        free_sglist(sg, i);
                        return NULL;
                }

                /* kmalloc pages are always physically contiguous! */
                sg_set_buf(&sg[i], buf, size);

                switch (pattern) {
                case 0:
                        /* already zeroed */
                        break;
                case 1:
                        for (j = 0; j < size; j++)
                                *buf++ = (u8) (((j + n_size) % maxpacket) % 63);
                        n_size += size;
                        break;
                }

                if (vary) {
                        size += vary;
                        size %= max;
                        if (size == 0)
                                size = (vary < max) ? vary : max;
                }
        }

        return sg;
}

struct sg_timeout {
        struct timer_list timer;
        struct usb_sg_request *req;
};

static void sg_timeout(struct timer_list *t)
{
        struct sg_timeout *timeout = from_timer(timeout, t, timer);

        usb_sg_cancel(timeout->req);
}

static int perform_sglist(
        struct usbtest_dev        *tdev,
        unsigned                iterations,
        int                        pipe,
        struct usb_sg_request        *req,
        struct scatterlist        *sg,
        int                        nents
)
{
        struct usb_device        *udev = testdev_to_usbdev(tdev);
        int                        retval = 0;
        struct sg_timeout        timeout = {
                .req = req,
        };

        timer_setup_on_stack(&timeout.timer, sg_timeout, 0);

        while (retval == 0 && iterations-- > 0) {
                retval = usb_sg_init(req, udev, pipe,
                                (udev->speed == USB_SPEED_HIGH)
                                        ? (INTERRUPT_RATE << 3)
                                        : INTERRUPT_RATE,
                                sg, nents, 0, GFP_KERNEL);

                if (retval)
                        break;
                mod_timer(&timeout.timer, jiffies +
                                msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
                usb_sg_wait(req);
                if (!del_timer_sync(&timeout.timer))
                        retval = -ETIMEDOUT;
                else
                        retval = req->status;
                destroy_timer_on_stack(&timeout.timer);

                /* FIXME check resulting data pattern */

                /* FIXME if endpoint halted, clear halt (and log) */
        }

        /* FIXME for unlink or fault handling tests, don't report
         * failure if retval is as we expected ...
         */
        if (retval)
                ERROR(tdev, "perform_sglist failed, "
                                "iterations left %d, status %d\n",
                                iterations, retval);
        return retval;
}


/*-------------------------------------------------------------------------*/

/* unqueued control message testing
 *
 * there's a nice set of device functional requirements in chapter 9 of the
 * usb 2.0 spec, which we can apply to ANY device, even ones that don't use
 * special test firmware.
 *
 * we know the device is configured (or suspended) by the time it's visible
 * through usbfs.  we can't change that, so we won't test enumeration (which
 * worked 'well enough' to get here, this time), power management (ditto),
 * or remote wakeup (which needs human interaction).
 */

static unsigned realworld = 1;
module_param(realworld, uint, 0);
MODULE_PARM_DESC(realworld, "clear to demand stricter spec compliance");

static int get_altsetting(struct usbtest_dev *dev)
{
        struct usb_interface        *iface = dev->intf;
        struct usb_device        *udev = interface_to_usbdev(iface);
        int                        retval;

        retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                        USB_REQ_GET_INTERFACE, USB_DIR_IN|USB_RECIP_INTERFACE,
                        0, iface->altsetting[0].desc.bInterfaceNumber,
                        dev->buf, 1, USB_CTRL_GET_TIMEOUT);
        switch (retval) {
        case 1:
                return dev->buf[0];
        case 0:
                retval = -ERANGE;
                fallthrough;
        default:
                return retval;
        }
}

static int set_altsetting(struct usbtest_dev *dev, int alternate)
{
        struct usb_interface                *iface = dev->intf;
        struct usb_device                *udev;

        if (alternate < 0 || alternate >= 256)
                return -EINVAL;

        udev = interface_to_usbdev(iface);
        return usb_set_interface(udev,
                        iface->altsetting[0].desc.bInterfaceNumber,
                        alternate);
}

static int is_good_config(struct usbtest_dev *tdev, int len)
{
        struct usb_config_descriptor        *config;

        if (len < (int)sizeof(*config))
                return 0;
        config = (struct usb_config_descriptor *) tdev->buf;

        switch (config->bDescriptorType) {
        case USB_DT_CONFIG:
        case USB_DT_OTHER_SPEED_CONFIG:
                if (config->bLength != 9) {
                        ERROR(tdev, "bogus config descriptor length\n");
                        return 0;
                }
                /* this bit 'must be 1' but often isn't */
                if (!realworld && !(config->bmAttributes & 0x80)) {
                        ERROR(tdev, "high bit of config attributes not set\n");
                        return 0;
                }
                if (config->bmAttributes & 0x1f) {        /* reserved == 0 */
                        ERROR(tdev, "reserved config bits set\n");
                        return 0;
                }
                break;
        default:
                return 0;
        }

        if (le16_to_cpu(config->wTotalLength) == len)        /* read it all */
                return 1;
        if (le16_to_cpu(config->wTotalLength) >= TBUF_SIZE)        /* max partial read */
                return 1;
        ERROR(tdev, "bogus config descriptor read size\n");
        return 0;
}

static int is_good_ext(struct usbtest_dev *tdev, u8 *buf)
{
        struct usb_ext_cap_descriptor *ext;
        u32 attr;

        ext = (struct usb_ext_cap_descriptor *) buf;

        if (ext->bLength != USB_DT_USB_EXT_CAP_SIZE) {
                ERROR(tdev, "bogus usb 2.0 extension descriptor length\n");
                return 0;
        }

        attr = le32_to_cpu(ext->bmAttributes);
        /* bits[1:15] is used and others are reserved */
        if (attr & ~0xfffe) {        /* reserved == 0 */
                ERROR(tdev, "reserved bits set\n");
                return 0;
        }

        return 1;
}

static int is_good_ss_cap(struct usbtest_dev *tdev, u8 *buf)
{
        struct usb_ss_cap_descriptor *ss;

        ss = (struct usb_ss_cap_descriptor *) buf;

        if (ss->bLength != USB_DT_USB_SS_CAP_SIZE) {
                ERROR(tdev, "bogus superspeed device capability descriptor length\n");
                return 0;
        }

        /*
         * only bit[1] of bmAttributes is used for LTM and others are
         * reserved
         */
        if (ss->bmAttributes & ~0x02) {        /* reserved == 0 */
                ERROR(tdev, "reserved bits set in bmAttributes\n");
                return 0;
        }

        /* bits[0:3] of wSpeedSupported is used and others are reserved */
        if (le16_to_cpu(ss->wSpeedSupported) & ~0x0f) {        /* reserved == 0 */
                ERROR(tdev, "reserved bits set in wSpeedSupported\n");
                return 0;
        }

        return 1;
}

static int is_good_con_id(struct usbtest_dev *tdev, u8 *buf)
{
        struct usb_ss_container_id_descriptor *con_id;

        con_id = (struct usb_ss_container_id_descriptor *) buf;

        if (con_id->bLength != USB_DT_USB_SS_CONTN_ID_SIZE) {
                ERROR(tdev, "bogus container id descriptor length\n");
                return 0;
        }

        if (con_id->bReserved) {        /* reserved == 0 */
                ERROR(tdev, "reserved bits set\n");
                return 0;
        }

        return 1;
}

/* sanity test for standard requests working with usb_control_mesg() and some
 * of the utility functions which use it.
 *
 * this doesn't test how endpoint halts behave or data toggles get set, since
 * we won't do I/O to bulk/interrupt endpoints here (which is how to change
 * halt or toggle).  toggle testing is impractical without support from hcds.
 *
 * this avoids failing devices linux would normally work with, by not testing
 * config/altsetting operations for devices that only support their defaults.
 * such devices rarely support those needless operations.
 *
 * NOTE that since this is a sanity test, it's not examining boundary cases
 * to see if usbcore, hcd, and device all behave right.  such testing would
 * involve varied read sizes and other operation sequences.
 */
static int ch9_postconfig(struct usbtest_dev *dev)
{
        struct usb_interface        *iface = dev->intf;
        struct usb_device        *udev = interface_to_usbdev(iface);
        int                        i, alt, retval;

        /* [9.2.3] if there's more than one altsetting, we need to be able to
         * set and get each one.  mostly trusts the descriptors from usbcore.
         */
        for (i = 0; i < iface->num_altsetting; i++) {

                /* 9.2.3 constrains the range here */
                alt = iface->altsetting[i].desc.bAlternateSetting;
                if (alt < 0 || alt >= iface->num_altsetting) {
                        dev_err(&iface->dev,
                                        "invalid alt [%d].bAltSetting = %d\n",
                                        i, alt);
                }

                /* [real world] get/set unimplemented if there's only one */
                if (realworld && iface->num_altsetting == 1)
                        continue;

                /* [9.4.10] set_interface */
                retval = set_altsetting(dev, alt);
                if (retval) {
                        dev_err(&iface->dev, "can't set_interface = %d, %d\n",
                                        alt, retval);
                        return retval;
                }

                /* [9.4.4] get_interface always works */
                retval = get_altsetting(dev);
                if (retval != alt) {
                        dev_err(&iface->dev, "get alt should be %d, was %d\n",
                                        alt, retval);
                        return (retval < 0) ? retval : -EDOM;
                }

        }

        /* [real world] get_config unimplemented if there's only one */
        if (!realworld || udev->descriptor.bNumConfigurations != 1) {
                int        expected = udev->actconfig->desc.bConfigurationValue;

                /* [9.4.2] get_configuration always works
                 * ... although some cheap devices (like one TI Hub I've got)
                 * won't return config descriptors except before set_config.
                 */
                retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                                USB_REQ_GET_CONFIGURATION,
                                USB_DIR_IN | USB_RECIP_DEVICE,
                                0, 0, dev->buf, 1, USB_CTRL_GET_TIMEOUT);
                if (retval != 1 || dev->buf[0] != expected) {
                        dev_err(&iface->dev, "get config --> %d %d (1 %d)\n",
                                retval, dev->buf[0], expected);
                        return (retval < 0) ? retval : -EDOM;
                }
        }

        /* there's always [9.4.3] a device descriptor [9.6.1] */
        retval = usb_get_descriptor(udev, USB_DT_DEVICE, 0,
                        dev->buf, sizeof(udev->descriptor));
        if (retval != sizeof(udev->descriptor)) {
                dev_err(&iface->dev, "dev descriptor --> %d\n", retval);
                return (retval < 0) ? retval : -EDOM;
        }

        /*
         * there's always [9.4.3] a bos device descriptor [9.6.2] in USB
         * 3.0 spec
         */
        if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0210) {
                struct usb_bos_descriptor *bos = NULL;
                struct usb_dev_cap_header *header = NULL;
                unsigned total, num, length;
                u8 *buf;

                retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf,
                                sizeof(*udev->bos->desc));
                if (retval != sizeof(*udev->bos->desc)) {
                        dev_err(&iface->dev, "bos descriptor --> %d\n", retval);
                        return (retval < 0) ? retval : -EDOM;
                }

                bos = (struct usb_bos_descriptor *)dev->buf;
                total = le16_to_cpu(bos->wTotalLength);
                num = bos->bNumDeviceCaps;

                if (total > TBUF_SIZE)
                        total = TBUF_SIZE;

                /*
                 * get generic device-level capability descriptors [9.6.2]
                 * in USB 3.0 spec
                 */
                retval = usb_get_descriptor(udev, USB_DT_BOS, 0, dev->buf,
                                total);
                if (retval != total) {
                        dev_err(&iface->dev, "bos descriptor set --> %d\n",
                                        retval);
                        return (retval < 0) ? retval : -EDOM;
                }

                length = sizeof(*udev->bos->desc);
                buf = dev->buf;
                for (i = 0; i < num; i++) {
                        buf += length;
                        if (buf + sizeof(struct usb_dev_cap_header) >
                                        dev->buf + total)
                                break;

                        header = (struct usb_dev_cap_header *)buf;
                        length = header->bLength;

                        if (header->bDescriptorType !=
                                        USB_DT_DEVICE_CAPABILITY) {
                                dev_warn(&udev->dev, "not device capability descriptor, skip\n");
                                continue;
                        }

                        switch (header->bDevCapabilityType) {
                        case USB_CAP_TYPE_EXT:
                                if (buf + USB_DT_USB_EXT_CAP_SIZE >
                                                dev->buf + total ||
                                                !is_good_ext(dev, buf)) {
                                        dev_err(&iface->dev, "bogus usb 2.0 extension descriptor\n");
                                        return -EDOM;
                                }
                                break;
                        case USB_SS_CAP_TYPE:
                                if (buf + USB_DT_USB_SS_CAP_SIZE >
                                                dev->buf + total ||
                                                !is_good_ss_cap(dev, buf)) {
                                        dev_err(&iface->dev, "bogus superspeed device capability descriptor\n");
                                        return -EDOM;
                                }
                                break;
                        case CONTAINER_ID_TYPE:
                                if (buf + USB_DT_USB_SS_CONTN_ID_SIZE >
                                                dev->buf + total ||
                                                !is_good_con_id(dev, buf)) {
                                        dev_err(&iface->dev, "bogus container id descriptor\n");
                                        return -EDOM;
                                }
                                break;
                        default:
                                break;
                        }
                }
        }

        /* there's always [9.4.3] at least one config descriptor [9.6.3] */
        for (i = 0; i < udev->descriptor.bNumConfigurations; i++) {
                retval = usb_get_descriptor(udev, USB_DT_CONFIG, i,
                                dev->buf, TBUF_SIZE);
                if (!is_good_config(dev, retval)) {
                        dev_err(&iface->dev,
                                        "config [%d] descriptor --> %d\n",
                                        i, retval);
                        return (retval < 0) ? retval : -EDOM;
                }

                /* FIXME cross-checking udev->config[i] to make sure usbcore
                 * parsed it right (etc) would be good testing paranoia
                 */
        }

        /* and sometimes [9.2.6.6] speed dependent descriptors */
        if (le16_to_cpu(udev->descriptor.bcdUSB) == 0x0200) {
                struct usb_qualifier_descriptor *d = NULL;

                /* device qualifier [9.6.2] */
                retval = usb_get_descriptor(udev,
                                USB_DT_DEVICE_QUALIFIER, 0, dev->buf,
                                sizeof(struct usb_qualifier_descriptor));
                if (retval == -EPIPE) {
                        if (udev->speed == USB_SPEED_HIGH) {
                                dev_err(&iface->dev,
                                                "hs dev qualifier --> %d\n",
                                                retval);
                                return retval;
                        }
                        /* usb2.0 but not high-speed capable; fine */
                } else if (retval != sizeof(struct usb_qualifier_descriptor)) {
                        dev_err(&iface->dev, "dev qualifier --> %d\n", retval);
                        return (retval < 0) ? retval : -EDOM;
                } else
                        d = (struct usb_qualifier_descriptor *) dev->buf;

                /* might not have [9.6.2] any other-speed configs [9.6.4] */
                if (d) {
                        unsigned max = d->bNumConfigurations;
                        for (i = 0; i < max; i++) {
                                retval = usb_get_descriptor(udev,
                                        USB_DT_OTHER_SPEED_CONFIG, i,
                                        dev->buf, TBUF_SIZE);
                                if (!is_good_config(dev, retval)) {
                                        dev_err(&iface->dev,
                                                "other speed config --> %d\n",
                                                retval);
                                        return (retval < 0) ? retval : -EDOM;
                                }
                        }
                }
        }
        /* FIXME fetch strings from at least the device descriptor */

        /* [9.4.5] get_status always works */
        retval = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, dev->buf);
        if (retval) {
                dev_err(&iface->dev, "get dev status --> %d\n", retval);
                return retval;
        }

        /* FIXME configuration.bmAttributes says if we could try to set/clear
         * the device's remote wakeup feature ... if we can, test that here
         */

        retval = usb_get_std_status(udev, USB_RECIP_INTERFACE,
                        iface->altsetting[0].desc.bInterfaceNumber, dev->buf);
        if (retval) {
                dev_err(&iface->dev, "get interface status --> %d\n", retval);
                return retval;
        }
        /* FIXME get status for each endpoint in the interface */

        return 0;
}

/*-------------------------------------------------------------------------*/

/* use ch9 requests to test whether:
 *   (a) queues work for control, keeping N subtests queued and
 *       active (auto-resubmit) for M loops through the queue.
 *   (b) protocol stalls (control-only) will autorecover.
 *       it's not like bulk/intr; no halt clearing.
 *   (c) short control reads are reported and handled.
 *   (d) queues are always processed in-order
 */

struct ctrl_ctx {
        spinlock_t                lock;
        struct usbtest_dev        *dev;
        struct completion        complete;
        unsigned                count;
        unsigned                pending;
        int                        status;
        struct urb                **urb;
        struct usbtest_param_32        *param;
        int                        last;
};

#define NUM_SUBCASES        16                /* how many test subcases here? */

struct subcase {
        struct usb_ctrlrequest        setup;
        int                        number;
        int                        expected;
};

static void ctrl_complete(struct urb *urb)
{
        struct ctrl_ctx                *ctx = urb->context;
        struct usb_ctrlrequest        *reqp;
        struct subcase                *subcase;
        int                        status = urb->status;
        unsigned long                flags;

        reqp = (struct usb_ctrlrequest *)urb->setup_packet;
        subcase = container_of(reqp, struct subcase, setup);

        spin_lock_irqsave(&ctx->lock, flags);
        ctx->count--;
        ctx->pending--;

        /* queue must transfer and complete in fifo order, unless
         * usb_unlink_urb() is used to unlink something not at the
         * physical queue head (not tested).
         */
        if (subcase->number > 0) {
                if ((subcase->number - ctx->last) != 1) {
                        ERROR(ctx->dev,
                                "subcase %d completed out of order, last %d\n",
                                subcase->number, ctx->last);
                        status = -EDOM;
                        ctx->last = subcase->number;
                        goto error;
                }
        }
        ctx->last = subcase->number;

        /* succeed or fault in only one way? */
        if (status == subcase->expected)
                status = 0;

        /* async unlink for cleanup? */
        else if (status != -ECONNRESET) {

                /* some faults are allowed, not required */
                if (subcase->expected > 0 && (
                          ((status == -subcase->expected        /* happened */
                           || status == 0))))                        /* didn't */
                        status = 0;
                /* sometimes more than one fault is allowed */
                else if (subcase->number == 12 && status == -EPIPE)
                        status = 0;
                else
                        ERROR(ctx->dev, "subtest %d error, status %d\n",
                                        subcase->number, status);
        }

        /* unexpected status codes mean errors; ideally, in hardware */
        if (status) {
error:
                if (ctx->status == 0) {
                        int                i;

                        ctx->status = status;
                        ERROR(ctx->dev, "control queue %02x.%02x, err %d, "
                                        "%d left, subcase %d, len %d/%d\n",
                                        reqp->bRequestType, reqp->bRequest,
                                        status, ctx->count, subcase->number,
                                        urb->actual_length,
                                        urb->transfer_buffer_length);

                        /* FIXME this "unlink everything" exit route should
                         * be a separate test case.
                         */

                        /* unlink whatever's still pending */
                        for (i = 1; i < ctx->param->sglen; i++) {
                                struct urb *u = ctx->urb[
                                                        (i + subcase->number)
                                                        % ctx->param->sglen];

                                if (u == urb || !u->dev)
                                        continue;
                                spin_unlock(&ctx->lock);
                                status = usb_unlink_urb(u);
                                spin_lock(&ctx->lock);
                                switch (status) {
                                case -EINPROGRESS:
                                case -EBUSY:
                                case -EIDRM:
                                        continue;
                                default:
                                        ERROR(ctx->dev, "urb unlink --> %d\n",
                                                        status);
                                }
                        }
                        status = ctx->status;
                }
        }

        /* resubmit if we need to, else mark this as done */
        if ((status == 0) && (ctx->pending < ctx->count)) {
                status = usb_submit_urb(urb, GFP_ATOMIC);
                if (status != 0) {
                        ERROR(ctx->dev,
                                "can't resubmit ctrl %02x.%02x, err %d\n",
                                reqp->bRequestType, reqp->bRequest, status);
                        urb->dev = NULL;
                } else
                        ctx->pending++;
        } else
                urb->dev = NULL;

        /* signal completion when nothing's queued */
        if (ctx->pending == 0)
                complete(&ctx->complete);
        spin_unlock_irqrestore(&ctx->lock, flags);
}

static int
test_ctrl_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param)
{
        struct usb_device        *udev = testdev_to_usbdev(dev);
        struct urb                **urb;
        struct ctrl_ctx                context;
        int                        i;

        if (param->sglen == 0 || param->iterations > UINT_MAX / param->sglen)
                return -EOPNOTSUPP;

        spin_lock_init(&context.lock);
        context.dev = dev;
        init_completion(&context.complete);
        context.count = param->sglen * param->iterations;
        context.pending = 0;
        context.status = -ENOMEM;
        context.param = param;
        context.last = -1;

        /* allocate and init the urbs we'll queue.
         * as with bulk/intr sglists, sglen is the queue depth; it also
         * controls which subtests run (more tests than sglen) or rerun.
         */
        urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL);
        if (!urb)
                return -ENOMEM;
        for (i = 0; i < param->sglen; i++) {
                int                        pipe = usb_rcvctrlpipe(udev, 0);
                unsigned                len;
                struct urb                *u;
                struct usb_ctrlrequest        req;
                struct subcase                *reqp;

                /* sign of this variable means:
                 *  -: tested code must return this (negative) error code
                 *  +: tested code may return this (negative too) error code
                 */
                int                        expected = 0;

                /* requests here are mostly expected to succeed on any
                 * device, but some are chosen to trigger protocol stalls
                 * or short reads.
                 */
                memset(&req, 0, sizeof(req));
                req.bRequest = USB_REQ_GET_DESCRIPTOR;
                req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE;

                switch (i % NUM_SUBCASES) {
                case 0:                /* get device descriptor */
                        req.wValue = cpu_to_le16(USB_DT_DEVICE << 8);
                        len = sizeof(struct usb_device_descriptor);
                        break;
                case 1:                /* get first config descriptor (only) */
                        req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0);
                        len = sizeof(struct usb_config_descriptor);
                        break;
                case 2:                /* get altsetting (OFTEN STALLS) */
                        req.bRequest = USB_REQ_GET_INTERFACE;
                        req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE;
                        /* index = 0 means first interface */
                        len = 1;
                        expected = EPIPE;
                        break;
                case 3:                /* get interface status */
                        req.bRequest = USB_REQ_GET_STATUS;
                        req.bRequestType = USB_DIR_IN|USB_RECIP_INTERFACE;
                        /* interface 0 */
                        len = 2;
                        break;
                case 4:                /* get device status */
                        req.bRequest = USB_REQ_GET_STATUS;
                        req.bRequestType = USB_DIR_IN|USB_RECIP_DEVICE;
                        len = 2;
                        break;
                case 5:                /* get device qualifier (MAY STALL) */
                        req.wValue = cpu_to_le16 (USB_DT_DEVICE_QUALIFIER << 8);
                        len = sizeof(struct usb_qualifier_descriptor);
                        if (udev->speed != USB_SPEED_HIGH)
                                expected = EPIPE;
                        break;
                case 6:                /* get first config descriptor, plus interface */
                        req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0);
                        len = sizeof(struct usb_config_descriptor);
                        len += sizeof(struct usb_interface_descriptor);
                        break;
                case 7:                /* get interface descriptor (ALWAYS STALLS) */
                        req.wValue = cpu_to_le16 (USB_DT_INTERFACE << 8);
                        /* interface == 0 */
                        len = sizeof(struct usb_interface_descriptor);
                        expected = -EPIPE;
                        break;
                /* NOTE: two consecutive stalls in the queue here.
                 *  that tests fault recovery a bit more aggressively. */
                case 8:                /* clear endpoint halt (MAY STALL) */
                        req.bRequest = USB_REQ_CLEAR_FEATURE;
                        req.bRequestType = USB_RECIP_ENDPOINT;
                        /* wValue 0 == ep halt */
                        /* wIndex 0 == ep0 (shouldn't halt!) */
                        len = 0;
                        pipe = usb_sndctrlpipe(udev, 0);
                        expected = EPIPE;
                        break;
                case 9:                /* get endpoint status */
                        req.bRequest = USB_REQ_GET_STATUS;
                        req.bRequestType = USB_DIR_IN|USB_RECIP_ENDPOINT;
                        /* endpoint 0 */
                        len = 2;
                        break;
                case 10:        /* trigger short read (EREMOTEIO) */
                        req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0);
                        len = 1024;
                        expected = -EREMOTEIO;
                        break;
                /* NOTE: two consecutive _different_ faults in the queue. */
                case 11:        /* get endpoint descriptor (ALWAYS STALLS) */
                        req.wValue = cpu_to_le16(USB_DT_ENDPOINT << 8);
                        /* endpoint == 0 */
                        len = sizeof(struct usb_interface_descriptor);
                        expected = EPIPE;
                        break;
                /* NOTE: sometimes even a third fault in the queue! */
                case 12:        /* get string 0 descriptor (MAY STALL) */
                        req.wValue = cpu_to_le16(USB_DT_STRING << 8);
                        /* string == 0, for language IDs */
                        len = sizeof(struct usb_interface_descriptor);
                        /* may succeed when > 4 languages */
                        expected = EREMOTEIO;        /* or EPIPE, if no strings */
                        break;
                case 13:        /* short read, resembling case 10 */
                        req.wValue = cpu_to_le16((USB_DT_CONFIG << 8) | 0);
                        /* last data packet "should" be DATA1, not DATA0 */
                        if (udev->speed == USB_SPEED_SUPER)
                                len = 1024 - 512;
                        else
                                len = 1024 - udev->descriptor.bMaxPacketSize0;
                        expected = -EREMOTEIO;
                        break;
                case 14:        /* short read; try to fill the last packet */
                        req.wValue = cpu_to_le16((USB_DT_DEVICE << 8) | 0);
                        /* device descriptor size == 18 bytes */
                        len = udev->descriptor.bMaxPacketSize0;
                        if (udev->speed == USB_SPEED_SUPER)
                                len = 512;
                        switch (len) {
                        case 8:
                                len = 24;
                                break;
                        case 16:
                                len = 32;
                                break;
                        }
                        expected = -EREMOTEIO;
                        break;
                case 15:
                        req.wValue = cpu_to_le16(USB_DT_BOS << 8);
                        if (udev->bos)
                                len = le16_to_cpu(udev->bos->desc->wTotalLength);
                        else
                                len = sizeof(struct usb_bos_descriptor);
                        if (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0201)
                                expected = -EPIPE;
                        break;
                default:
                        ERROR(dev, "bogus number of ctrl queue testcases!\n");
                        context.status = -EINVAL;
                        goto cleanup;
                }
                req.wLength = cpu_to_le16(len);
                urb[i] = u = simple_alloc_urb(udev, pipe, len, 0);
                if (!u)
                        goto cleanup;

                reqp = kmalloc(sizeof(*reqp), GFP_KERNEL);
                if (!reqp)
                        goto cleanup;
                reqp->setup = req;
                reqp->number = i % NUM_SUBCASES;
                reqp->expected = expected;
                u->setup_packet = (char *) &reqp->setup;

                u->context = &context;
                u->complete = ctrl_complete;
        }

        /* queue the urbs */
        context.urb = urb;
        spin_lock_irq(&context.lock);
        for (i = 0; i < param->sglen; i++) {
                context.status = usb_submit_urb(urb[i], GFP_ATOMIC);
                if (context.status != 0) {
                        ERROR(dev, "can't submit urb[%d], status %d\n",
                                        i, context.status);
                        context.count = context.pending;
                        break;
                }
                context.pending++;
        }
        spin_unlock_irq(&context.lock);

        /* FIXME  set timer and time out; provide a disconnect hook */

        /* wait for the last one to complete */
        if (context.pending > 0)
                wait_for_completion(&context.complete);

cleanup:
        for (i = 0; i < param->sglen; i++) {
                if (!urb[i])
                        continue;
                urb[i]->dev = udev;
                kfree(urb[i]->setup_packet);
                simple_free_urb(urb[i]);
        }
        kfree(urb);
        return context.status;
}
#undef NUM_SUBCASES


/*-------------------------------------------------------------------------*/

static void unlink1_callback(struct urb *urb)
{
        int        status = urb->status;

        /* we "know" -EPIPE (stall) never happens */
        if (!status)
                status = usb_submit_urb(urb, GFP_ATOMIC);
        if (status) {
                urb->status = status;
                complete(urb->context);
        }
}

static int unlink1(struct usbtest_dev *dev, int pipe, int size, int async)
{
        struct urb                *urb;
        struct completion        completion;
        int                        retval = 0;

        init_completion(&completion);
        urb = simple_alloc_urb(testdev_to_usbdev(dev), pipe, size, 0);
        if (!urb)
                return -ENOMEM;
        urb->context = &completion;
        urb->complete = unlink1_callback;

        if (usb_pipeout(urb->pipe)) {
                simple_fill_buf(urb);
                urb->transfer_flags |= URB_ZERO_PACKET;
        }

        /* keep the endpoint busy.  there are lots of hc/hcd-internal
         * states, and testing should get to all of them over time.
         *
         * FIXME want additional tests for when endpoint is STALLing
         * due to errors, or is just NAKing requests.
         */
        retval = usb_submit_urb(urb, GFP_KERNEL);
        if (retval != 0) {
                dev_err(&dev->intf->dev, "submit fail %d\n", retval);
                return retval;
        }

        /* unlinking that should always work.  variable delay tests more
         * hcd states and code paths, even with little other system load.
         */
        msleep(jiffies % (2 * INTERRUPT_RATE));
        if (async) {
                while (!completion_done(&completion)) {
                        retval = usb_unlink_urb(urb);

                        if (retval == 0 && usb_pipein(urb->pipe))
                                retval = simple_check_buf(dev, urb);

                        switch (retval) {
                        case -EBUSY:
                        case -EIDRM:
                                /* we can't unlink urbs while they're completing
                                 * or if they've completed, and we haven't
                                 * resubmitted. "normal" drivers would prevent
                                 * resubmission, but since we're testing unlink
                                 * paths, we can't.
                                 */
                                ERROR(dev, "unlink retry\n");
                                continue;
                        case 0:
                        case -EINPROGRESS:
                                break;

                        default:
                                dev_err(&dev->intf->dev,
                                        "unlink fail %d\n", retval);
                                return retval;
                        }

                        break;
                }
        } else
                usb_kill_urb(urb);

        wait_for_completion(&completion);
        retval = urb->status;
        simple_free_urb(urb);

        if (async)
                return (retval == -ECONNRESET) ? 0 : retval - 1000;
        else
                return (retval == -ENOENT || retval == -EPERM) ?
                                0 : retval - 2000;
}

static int unlink_simple(struct usbtest_dev *dev, int pipe, int len)
{
        int                        retval = 0;

        /* test sync and async paths */
        retval = unlink1(dev, pipe, len, 1);
        if (!retval)
                retval = unlink1(dev, pipe, len, 0);
        return retval;
}

/*-------------------------------------------------------------------------*/

struct queued_ctx {
        struct completion        complete;
        atomic_t                pending;
        unsigned                num;
        int                        status;
        struct urb                **urbs;
};

static void unlink_queued_callback(struct urb *urb)
{
        int                        status = urb->status;
        struct queued_ctx        *ctx = urb->context;

        if (ctx->status)
                goto done;
        if (urb == ctx->urbs[ctx->num - 4] || urb == ctx->urbs[ctx->num - 2]) {
                if (status == -ECONNRESET)
                        goto done;
                /* What error should we report if the URB completed normally? */
        }
        if (status != 0)
                ctx->status = status;

 done:
        if (atomic_dec_and_test(&ctx->pending))
                complete(&ctx->complete);
}

static int unlink_queued(struct usbtest_dev *dev, int pipe, unsigned num,
                unsigned size)
{
        struct queued_ctx        ctx;
        struct usb_device        *udev = testdev_to_usbdev(dev);
        void                        *buf;
        dma_addr_t                buf_dma;
        int                        i;
        int                        retval = -ENOMEM;

        init_completion(&ctx.complete);
        atomic_set(&ctx.pending, 1);        /* One more than the actual value */
        ctx.num = num;
        ctx.status = 0;

        buf = usb_alloc_coherent(udev, size, GFP_KERNEL, &buf_dma);
        if (!buf)
                return retval;
        memset(buf, 0, size);

        /* Allocate and init the urbs we'll queue */
        ctx.urbs = kcalloc(num, sizeof(struct urb *), GFP_KERNEL);
        if (!ctx.urbs)
                goto free_buf;
        for (i = 0; i < num; i++) {
                ctx.urbs[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!ctx.urbs[i])
                        goto free_urbs;
                usb_fill_bulk_urb(ctx.urbs[i], udev, pipe, buf, size,
                                unlink_queued_callback, &ctx);
                ctx.urbs[i]->transfer_dma = buf_dma;
                ctx.urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP;

                if (usb_pipeout(ctx.urbs[i]->pipe)) {
                        simple_fill_buf(ctx.urbs[i]);
                        ctx.urbs[i]->transfer_flags |= URB_ZERO_PACKET;
                }
        }

        /* Submit all the URBs and then unlink URBs num - 4 and num - 2. */
        for (i = 0; i < num; i++) {
                atomic_inc(&ctx.pending);
                retval = usb_submit_urb(ctx.urbs[i], GFP_KERNEL);
                if (retval != 0) {
                        dev_err(&dev->intf->dev, "submit urbs[%d] fail %d\n",
                                        i, retval);
                        atomic_dec(&ctx.pending);
                        ctx.status = retval;
                        break;
                }
        }
        if (i == num) {
                usb_unlink_urb(ctx.urbs[num - 4]);
                usb_unlink_urb(ctx.urbs[num - 2]);
        } else {
                while (--i >= 0)
                        usb_unlink_urb(ctx.urbs[i]);
        }

        if (atomic_dec_and_test(&ctx.pending))                /* The extra count */
                complete(&ctx.complete);
        wait_for_completion(&ctx.complete);
        retval = ctx.status;

 free_urbs:
        for (i = 0; i < num; i++)
                usb_free_urb(ctx.urbs[i]);
        kfree(ctx.urbs);
 free_buf:
        usb_free_coherent(udev, size, buf, buf_dma);
        return retval;
}

/*-------------------------------------------------------------------------*/

static int verify_not_halted(struct usbtest_dev *tdev, int ep, struct urb *urb)
{
        int        retval;
        u16        status;

        /* shouldn't look or act halted */
        retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't get no-halt status, %d\n",
                                ep, retval);
                return retval;
        }
        if (status != 0) {
                ERROR(tdev, "ep %02x bogus status: %04x != 0\n", ep, status);
                return -EINVAL;
        }
        retval = simple_io(tdev, urb, 1, 0, 0, __func__);
        if (retval != 0)
                return -EINVAL;
        return 0;
}

static int verify_halted(struct usbtest_dev *tdev, int ep, struct urb *urb)
{
        int        retval;
        u16        status;

        /* should look and act halted */
        retval = usb_get_std_status(urb->dev, USB_RECIP_ENDPOINT, ep, &status);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't get halt status, %d\n",
                                ep, retval);
                return retval;
        }
        if (status != 1) {
                ERROR(tdev, "ep %02x bogus status: %04x != 1\n", ep, status);
                return -EINVAL;
        }
        retval = simple_io(tdev, urb, 1, 0, -EPIPE, __func__);
        if (retval != -EPIPE)
                return -EINVAL;
        retval = simple_io(tdev, urb, 1, 0, -EPIPE, "verify_still_halted");
        if (retval != -EPIPE)
                return -EINVAL;
        return 0;
}

static int test_halt(struct usbtest_dev *tdev, int ep, struct urb *urb)
{
        int        retval;

        /* shouldn't look or act halted now */
        retval = verify_not_halted(tdev, ep, urb);
        if (retval < 0)
                return retval;

        /* set halt (protocol test only), verify it worked */
        retval = usb_control_msg(urb->dev, usb_sndctrlpipe(urb->dev, 0),
                        USB_REQ_SET_FEATURE, USB_RECIP_ENDPOINT,
                        USB_ENDPOINT_HALT, ep,
                        NULL, 0, USB_CTRL_SET_TIMEOUT);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't set halt, %d\n", ep, retval);
                return retval;
        }
        retval = verify_halted(tdev, ep, urb);
        if (retval < 0) {
                int ret;

                /* clear halt anyways, else further tests will fail */
                ret = usb_clear_halt(urb->dev, urb->pipe);
                if (ret)
                        ERROR(tdev, "ep %02x couldn't clear halt, %d\n",
                              ep, ret);

                return retval;
        }

        /* clear halt (tests API + protocol), verify it worked */
        retval = usb_clear_halt(urb->dev, urb->pipe);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval);
                return retval;
        }
        retval = verify_not_halted(tdev, ep, urb);
        if (retval < 0)
                return retval;

        /* NOTE:  could also verify SET_INTERFACE clear halts ... */

        return 0;
}

static int test_toggle_sync(struct usbtest_dev *tdev, int ep, struct urb *urb)
{
        int        retval;

        /* clear initial data toggle to DATA0 */
        retval = usb_clear_halt(urb->dev, urb->pipe);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval);
                return retval;
        }

        /* transfer 3 data packets, should be DATA0, DATA1, DATA0 */
        retval = simple_io(tdev, urb, 1, 0, 0, __func__);
        if (retval != 0)
                return -EINVAL;

        /* clear halt resets device side data toggle, host should react to it */
        retval = usb_clear_halt(urb->dev, urb->pipe);
        if (retval < 0) {
                ERROR(tdev, "ep %02x couldn't clear halt, %d\n", ep, retval);
                return retval;
        }

        /* host should use DATA0 again after clear halt */
        retval = simple_io(tdev, urb, 1, 0, 0, __func__);

        return retval;
}

static int halt_simple(struct usbtest_dev *dev)
{
        int                        ep;
        int                        retval = 0;
        struct urb                *urb;
        struct usb_device        *udev = testdev_to_usbdev(dev);

        if (udev->speed == USB_SPEED_SUPER)
                urb = simple_alloc_urb(udev, 0, 1024, 0);
        else
                urb = simple_alloc_urb(udev, 0, 512, 0);
        if (urb == NULL)
                return -ENOMEM;

        if (dev->in_pipe) {
                ep = usb_pipeendpoint(dev->in_pipe) | USB_DIR_IN;
                urb->pipe = dev->in_pipe;
                retval = test_halt(dev, ep, urb);
                if (retval < 0)
                        goto done;
        }

        if (dev->out_pipe) {
                ep = usb_pipeendpoint(dev->out_pipe);
                urb->pipe = dev->out_pipe;
                retval = test_halt(dev, ep, urb);
        }
done:
        simple_free_urb(urb);
        return retval;
}

static int toggle_sync_simple(struct usbtest_dev *dev)
{
        int                        ep;
        int                        retval = 0;
        struct urb                *urb;
        struct usb_device        *udev = testdev_to_usbdev(dev);
        unsigned                maxp = get_maxpacket(udev, dev->out_pipe);

        /*
         * Create a URB that causes a transfer of uneven amount of data packets
         * This way the clear toggle has an impact on the data toggle sequence.
         * Use 2 maxpacket length packets and one zero packet.
         */
        urb = simple_alloc_urb(udev, 0,  2 * maxp, 0);
        if (urb == NULL)
                return -ENOMEM;

        urb->transfer_flags |= URB_ZERO_PACKET;

        ep = usb_pipeendpoint(dev->out_pipe);
        urb->pipe = dev->out_pipe;
        retval = test_toggle_sync(dev, ep, urb);

        simple_free_urb(urb);
        return retval;
}

/*-------------------------------------------------------------------------*/

/* Control OUT tests use the vendor control requests from Intel's
 * USB 2.0 compliance test device:  write a buffer, read it back.
 *
 * Intel's spec only _requires_ that it work for one packet, which
 * is pretty weak.   Some HCDs place limits here; most devices will
 * need to be able to handle more than one OUT data packet.  We'll
 * try whatever we're told to try.
 */
static int ctrl_out(struct usbtest_dev *dev,
                unsigned count, unsigned length, unsigned vary, unsigned offset)
{
        unsigned                i, j, len;
        int                        retval;
        u8                        *buf;
        char                        *what = "?";
        struct usb_device        *udev;

        if (length < 1 || length > 0xffff || vary >= length)
                return -EINVAL;

        buf = kmalloc(length + offset, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        buf += offset;
        udev = testdev_to_usbdev(dev);
        len = length;
        retval = 0;

        /* NOTE:  hardware might well act differently if we pushed it
         * with lots back-to-back queued requests.
         */
        for (i = 0; i < count; i++) {
                /* write patterned data */
                for (j = 0; j < len; j++)
                        buf[j] = (u8)(i + j);
                retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                0x5b, USB_DIR_OUT|USB_TYPE_VENDOR,
                                0, 0, buf, len, USB_CTRL_SET_TIMEOUT);
                if (retval != len) {
                        what = "write";
                        if (retval >= 0) {
                                ERROR(dev, "ctrl_out, wlen %d (expected %d)\n",
                                                retval, len);
                                retval = -EBADMSG;
                        }
                        break;
                }

                /* read it back -- assuming nothing intervened!!  */
                retval = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                                0x5c, USB_DIR_IN|USB_TYPE_VENDOR,
                                0, 0, buf, len, USB_CTRL_GET_TIMEOUT);
                if (retval != len) {
                        what = "read";
                        if (retval >= 0) {
                                ERROR(dev, "ctrl_out, rlen %d (expected %d)\n",
                                                retval, len);
                                retval = -EBADMSG;
                        }
                        break;
                }

                /* fail if we can't verify */
                for (j = 0; j < len; j++) {
                        if (buf[j] != (u8)(i + j)) {
                                ERROR(dev, "ctrl_out, byte %d is %d not %d\n",
                                        j, buf[j], (u8)(i + j));
                                retval = -EBADMSG;
                                break;
                        }
                }
                if (retval < 0) {
                        what = "verify";
                        break;
                }

                len += vary;

                /* [real world] the "zero bytes IN" case isn't really used.
                 * hardware can easily trip up in this weird case, since its
                 * status stage is IN, not OUT like other ep0in transfers.
                 */
                if (len > length)
                        len = realworld ? 1 : 0;
        }

        if (retval < 0)
                ERROR(dev, "ctrl_out %s failed, code %d, count %d\n",
                        what, retval, i);

        kfree(buf - offset);
        return retval;
}

/*-------------------------------------------------------------------------*/

/* ISO/BULK tests ... mimics common usage
 *  - buffer length is split into N packets (mostly maxpacket sized)
 *  - multi-buffers according to sglen
 */

struct transfer_context {
        unsigned                count;
        unsigned                pending;
        spinlock_t                lock;
        struct completion        done;
        int                        submit_error;
        unsigned long                errors;
        unsigned long                packet_count;
        struct usbtest_dev        *dev;
        bool                        is_iso;
};

static void complicated_callback(struct urb *urb)
{
        struct transfer_context        *ctx = urb->context;
        unsigned long flags;

        spin_lock_irqsave(&ctx->lock, flags);
        ctx->count--;

        ctx->packet_count += urb->number_of_packets;
        if (urb->error_count > 0)
                ctx->errors += urb->error_count;
        else if (urb->status != 0)
                ctx->errors += (ctx->is_iso ? urb->number_of_packets : 1);
        else if (urb->actual_length != urb->transfer_buffer_length)
                ctx->errors++;
        else if (check_guard_bytes(ctx->dev, urb) != 0)
                ctx->errors++;

        if (urb->status == 0 && ctx->count > (ctx->pending - 1)
                        && !ctx->submit_error) {
                int status = usb_submit_urb(urb, GFP_ATOMIC);
                switch (status) {
                case 0:
                        goto done;
                default:
                        dev_err(&ctx->dev->intf->dev,
                                        "resubmit err %d\n",
                                        status);
                        fallthrough;
                case -ENODEV:                        /* disconnected */
                case -ESHUTDOWN:                /* endpoint disabled */
                        ctx->submit_error = 1;
                        break;
                }
        }

        ctx->pending--;
        if (ctx->pending == 0) {
                if (ctx->errors)
                        dev_err(&ctx->dev->intf->dev,
                                "during the test, %lu errors out of %lu\n",
                                ctx->errors, ctx->packet_count);
                complete(&ctx->done);
        }
done:
        spin_unlock_irqrestore(&ctx->lock, flags);
}

static struct urb *iso_alloc_urb(
        struct usb_device        *udev,
        int                        pipe,
        struct usb_endpoint_descriptor        *desc,
        long                        bytes,
        unsigned offset
)
{
        struct urb                *urb;
        unsigned                i, maxp, packets;

        if (bytes < 0 || !desc)
                return NULL;

        maxp = usb_endpoint_maxp(desc);
        if (udev->speed >= USB_SPEED_SUPER)
                maxp *= ss_isoc_get_packet_num(udev, pipe);
        else
                maxp *= usb_endpoint_maxp_mult(desc);

        packets = DIV_ROUND_UP(bytes, maxp);

        urb = usb_alloc_urb(packets, GFP_KERNEL);
        if (!urb)
                return urb;
        urb->dev = udev;
        urb->pipe = pipe;

        urb->number_of_packets = packets;
        urb->transfer_buffer_length = bytes;
        urb->transfer_buffer = usb_alloc_coherent(udev, bytes + offset,
                                                        GFP_KERNEL,
                                                        &urb->transfer_dma);
        if (!urb->transfer_buffer) {
                usb_free_urb(urb);
                return NULL;
        }
        if (offset) {
                memset(urb->transfer_buffer, GUARD_BYTE, offset);
                urb->transfer_buffer += offset;
                urb->transfer_dma += offset;
        }
        /* For inbound transfers use guard byte so that test fails if
                data not correctly copied */
        memset(urb->transfer_buffer,
                        usb_pipein(urb->pipe) ? GUARD_BYTE : 0,
                        bytes);

        for (i = 0; i < packets; i++) {
                /* here, only the last packet will be short */
                urb->iso_frame_desc[i].length = min((unsigned) bytes, maxp);
                bytes -= urb->iso_frame_desc[i].length;

                urb->iso_frame_desc[i].offset = maxp * i;
        }

        urb->complete = complicated_callback;
        /* urb->context = SET BY CALLER */
        urb->interval = 1 << (desc->bInterval - 1);
        urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
        return urb;
}

static int
test_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param,
                int pipe, struct usb_endpoint_descriptor *desc, unsigned offset)
{
        struct transfer_context        context;
        struct usb_device        *udev;
        unsigned                i;
        unsigned long                packets = 0;
        int                        status = 0;
        struct urb                **urbs;

        if (!param->sglen || param->iterations > UINT_MAX / param->sglen)
                return -EINVAL;

        if (param->sglen > MAX_SGLEN)
                return -EINVAL;

        urbs = kcalloc(param->sglen, sizeof(*urbs), GFP_KERNEL);
        if (!urbs)
                return -ENOMEM;

        memset(&context, 0, sizeof(context));
        context.count = param->iterations * param->sglen;
        context.dev = dev;
        context.is_iso = !!desc;
        init_completion(&context.done);
        spin_lock_init(&context.lock);

        udev = testdev_to_usbdev(dev);

        for (i = 0; i < param->sglen; i++) {
                if (context.is_iso)
                        urbs[i] = iso_alloc_urb(udev, pipe, desc,
                                        param->length, offset);
                else
                        urbs[i] = complicated_alloc_urb(udev, pipe,
                                        param->length, 0);

                if (!urbs[i]) {
                        status = -ENOMEM;
                        goto fail;
                }
                packets += urbs[i]->number_of_packets;
                urbs[i]->context = &context;
        }
        packets *= param->iterations;

        if (context.is_iso) {
                int transaction_num;

                if (udev->speed >= USB_SPEED_SUPER)
                        transaction_num = ss_isoc_get_packet_num(udev, pipe);
                else
                        transaction_num = usb_endpoint_maxp_mult(desc);

                dev_info(&dev->intf->dev,
                        "iso period %d %sframes, wMaxPacket %d, transactions: %d\n",
                        1 << (desc->bInterval - 1),
                        (udev->speed >= USB_SPEED_HIGH) ? "micro" : "",
                        usb_endpoint_maxp(desc),
                        transaction_num);

                dev_info(&dev->intf->dev,
                        "total %lu msec (%lu packets)\n",
                        (packets * (1 << (desc->bInterval - 1)))
                                / ((udev->speed >= USB_SPEED_HIGH) ? 8 : 1),
                        packets);
        }

        spin_lock_irq(&context.lock);
        for (i = 0; i < param->sglen; i++) {
                ++context.pending;
                status = usb_submit_urb(urbs[i], GFP_ATOMIC);
                if (status < 0) {
                        ERROR(dev, "submit iso[%d], error %d\n", i, status);
                        if (i == 0) {
                                spin_unlock_irq(&context.lock);
                                goto fail;
                        }

                        simple_free_urb(urbs[i]);
                        urbs[i] = NULL;
                        context.pending--;
                        context.submit_error = 1;
                        break;
                }
        }
        spin_unlock_irq(&context.lock);

        wait_for_completion(&context.done);

        for (i = 0; i < param->sglen; i++) {
                if (urbs[i])
                        simple_free_urb(urbs[i]);
        }
        /*
         * Isochronous transfers are expected to fail sometimes.  As an
         * arbitrary limit, we will report an error if any submissions
         * fail or if the transfer failure rate is > 10%.
         */
        if (status != 0)
                ;
        else if (context.submit_error)
                status = -EACCES;
        else if (context.errors >
                        (context.is_iso ? context.packet_count / 10 : 0))
                status = -EIO;

        kfree(urbs);
        return status;

fail:
        for (i = 0; i < param->sglen; i++) {
                if (urbs[i])
                        simple_free_urb(urbs[i]);
        }

        kfree(urbs);
        return status;
}

static int test_unaligned_bulk(
        struct usbtest_dev *tdev,
        int pipe,
        unsigned length,
        int iterations,
        unsigned transfer_flags,
        const char *label)
{
        int retval;
        struct urb *urb = usbtest_alloc_urb(testdev_to_usbdev(tdev),
                        pipe, length, transfer_flags, 1, 0, simple_callback);

        if (!urb)
                return -ENOMEM;

        retval = simple_io(tdev, urb, iterations, 0, 0, label);
        simple_free_urb(urb);
        return retval;
}

/* Run tests. */
static int
usbtest_do_ioctl(struct usb_interface *intf, struct usbtest_param_32 *param)
{
        struct usbtest_dev        *dev = usb_get_intfdata(intf);
        struct usb_device        *udev = testdev_to_usbdev(dev);
        struct urb                *urb;
        struct scatterlist        *sg;
        struct usb_sg_request        req;
        unsigned                i;
        int        retval = -EOPNOTSUPP;

        if (param->iterations <= 0)
                return -EINVAL;
        if (param->sglen > MAX_SGLEN)
                return -EINVAL;
        /*
         * Just a bunch of test cases that every HCD is expected to handle.
         *
         * Some may need specific firmware, though it'd be good to have
         * one firmware image to handle all the test cases.
         *
         * FIXME add more tests!  cancel requests, verify the data, control
         * queueing, concurrent read+write threads, and so on.
         */
        switch (param->test_num) {

        case 0:
                dev_info(&intf->dev, "TEST 0:  NOP\n");
                retval = 0;
                break;

        /* Simple non-queued bulk I/O tests */
        case 1:
                if (dev->out_pipe == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 1:  write %d bytes %u times\n",
                                param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk sink (maybe accepts short writes) */
                retval = simple_io(dev, urb, param->iterations, 0, 0, "test1");
                simple_free_urb(urb);
                break;
        case 2:
                if (dev->in_pipe == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 2:  read %d bytes %u times\n",
                                param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk source (maybe generates short writes) */
                retval = simple_io(dev, urb, param->iterations, 0, 0, "test2");
                simple_free_urb(urb);
                break;
        case 3:
                if (dev->out_pipe == 0 || param->vary == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 3:  write/%d 0..%d bytes %u times\n",
                                param->vary, param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->out_pipe, param->length, 0);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk sink (maybe accepts short writes) */
                retval = simple_io(dev, urb, param->iterations, param->vary,
                                        0, "test3");
                simple_free_urb(urb);
                break;
        case 4:
                if (dev->in_pipe == 0 || param->vary == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 4:  read/%d 0..%d bytes %u times\n",
                                param->vary, param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->in_pipe, param->length, 0);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk source (maybe generates short writes) */
                retval = simple_io(dev, urb, param->iterations, param->vary,
                                        0, "test4");
                simple_free_urb(urb);
                break;

        /* Queued bulk I/O tests */
        case 5:
                if (dev->out_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 5:  write %d sglists %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                sg = alloc_sglist(param->sglen, param->length,
                                0, dev, dev->out_pipe);
                if (!sg) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk sink (maybe accepts short writes) */
                retval = perform_sglist(dev, param->iterations, dev->out_pipe,
                                &req, sg, param->sglen);
                free_sglist(sg, param->sglen);
                break;

        case 6:
                if (dev->in_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 6:  read %d sglists %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                sg = alloc_sglist(param->sglen, param->length,
                                0, dev, dev->in_pipe);
                if (!sg) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk source (maybe generates short writes) */
                retval = perform_sglist(dev, param->iterations, dev->in_pipe,
                                &req, sg, param->sglen);
                free_sglist(sg, param->sglen);
                break;
        case 7:
                if (dev->out_pipe == 0 || param->sglen == 0 || param->vary == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 7:  write/%d %d sglists %d entries 0..%d bytes\n",
                                param->vary, param->iterations,
                                param->sglen, param->length);
                sg = alloc_sglist(param->sglen, param->length,
                                param->vary, dev, dev->out_pipe);
                if (!sg) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk sink (maybe accepts short writes) */
                retval = perform_sglist(dev, param->iterations, dev->out_pipe,
                                &req, sg, param->sglen);
                free_sglist(sg, param->sglen);
                break;
        case 8:
                if (dev->in_pipe == 0 || param->sglen == 0 || param->vary == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 8:  read/%d %d sglists %d entries 0..%d bytes\n",
                                param->vary, param->iterations,
                                param->sglen, param->length);
                sg = alloc_sglist(param->sglen, param->length,
                                param->vary, dev, dev->in_pipe);
                if (!sg) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE:  bulk source (maybe generates short writes) */
                retval = perform_sglist(dev, param->iterations, dev->in_pipe,
                                &req, sg, param->sglen);
                free_sglist(sg, param->sglen);
                break;

        /* non-queued sanity tests for control (chapter 9 subset) */
        case 9:
                retval = 0;
                dev_info(&intf->dev,
                        "TEST 9:  ch9 (subset) control tests, %d times\n",
                                param->iterations);
                for (i = param->iterations; retval == 0 && i--; /* NOP */)
                        retval = ch9_postconfig(dev);
                if (retval)
                        dev_err(&intf->dev, "ch9 subset failed, "
                                        "iterations left %d\n", i);
                break;

        /* queued control messaging */
        case 10:
                retval = 0;
                dev_info(&intf->dev,
                                "TEST 10:  queue %d control calls, %d times\n",
                                param->sglen,
                                param->iterations);
                retval = test_ctrl_queue(dev, param);
                break;

        /* simple non-queued unlinks (ring with one urb) */
        case 11:
                if (dev->in_pipe == 0 || !param->length)
                        break;
                retval = 0;
                dev_info(&intf->dev, "TEST 11:  unlink %d reads of %d\n",
                                param->iterations, param->length);
                for (i = param->iterations; retval == 0 && i--; /* NOP */)
                        retval = unlink_simple(dev, dev->in_pipe,
                                                param->length);
                if (retval)
                        dev_err(&intf->dev, "unlink reads failed %d, "
                                "iterations left %d\n", retval, i);
                break;
        case 12:
                if (dev->out_pipe == 0 || !param->length)
                        break;
                retval = 0;
                dev_info(&intf->dev, "TEST 12:  unlink %d writes of %d\n",
                                param->iterations, param->length);
                for (i = param->iterations; retval == 0 && i--; /* NOP */)
                        retval = unlink_simple(dev, dev->out_pipe,
                                                param->length);
                if (retval)
                        dev_err(&intf->dev, "unlink writes failed %d, "
                                "iterations left %d\n", retval, i);
                break;

        /* ep halt tests */
        case 13:
                if (dev->out_pipe == 0 && dev->in_pipe == 0)
                        break;
                retval = 0;
                dev_info(&intf->dev, "TEST 13:  set/clear %d halts\n",
                                param->iterations);
                for (i = param->iterations; retval == 0 && i--; /* NOP */)
                        retval = halt_simple(dev);

                if (retval)
                        ERROR(dev, "halts failed, iterations left %d\n", i);
                break;

        /* control write tests */
        case 14:
                if (!dev->info->ctrl_out)
                        break;
                dev_info(&intf->dev, "TEST 14:  %d ep0out, %d..%d vary %d\n",
                                param->iterations,
                                realworld ? 1 : 0, param->length,
                                param->vary);
                retval = ctrl_out(dev, param->iterations,
                                param->length, param->vary, 0);
                break;

        /* iso write tests */
        case 15:
                if (dev->out_iso_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 15:  write %d iso, %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                /* FIRMWARE:  iso sink */
                retval = test_queue(dev, param,
                                dev->out_iso_pipe, dev->iso_out, 0);
                break;

        /* iso read tests */
        case 16:
                if (dev->in_iso_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 16:  read %d iso, %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                /* FIRMWARE:  iso source */
                retval = test_queue(dev, param,
                                dev->in_iso_pipe, dev->iso_in, 0);
                break;

        /* FIXME scatterlist cancel (needs helper thread) */

        /* Tests for bulk I/O using DMA mapping by core and odd address */
        case 17:
                if (dev->out_pipe == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 17:  write odd addr %d bytes %u times core map\n",
                        param->length, param->iterations);

                retval = test_unaligned_bulk(
                                dev, dev->out_pipe,
                                param->length, param->iterations,
                                0, "test17");
                break;

        case 18:
                if (dev->in_pipe == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 18:  read odd addr %d bytes %u times core map\n",
                        param->length, param->iterations);

                retval = test_unaligned_bulk(
                                dev, dev->in_pipe,
                                param->length, param->iterations,
                                0, "test18");
                break;

        /* Tests for bulk I/O using premapped coherent buffer and odd address */
        case 19:
                if (dev->out_pipe == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 19:  write odd addr %d bytes %u times premapped\n",
                        param->length, param->iterations);

                retval = test_unaligned_bulk(
                                dev, dev->out_pipe,
                                param->length, param->iterations,
                                URB_NO_TRANSFER_DMA_MAP, "test19");
                break;

        case 20:
                if (dev->in_pipe == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 20:  read odd addr %d bytes %u times premapped\n",
                        param->length, param->iterations);

                retval = test_unaligned_bulk(
                                dev, dev->in_pipe,
                                param->length, param->iterations,
                                URB_NO_TRANSFER_DMA_MAP, "test20");
                break;

        /* control write tests with unaligned buffer */
        case 21:
                if (!dev->info->ctrl_out)
                        break;
                dev_info(&intf->dev,
                                "TEST 21:  %d ep0out odd addr, %d..%d vary %d\n",
                                param->iterations,
                                realworld ? 1 : 0, param->length,
                                param->vary);
                retval = ctrl_out(dev, param->iterations,
                                param->length, param->vary, 1);
                break;

        /* unaligned iso tests */
        case 22:
                if (dev->out_iso_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 22:  write %d iso odd, %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                retval = test_queue(dev, param,
                                dev->out_iso_pipe, dev->iso_out, 1);
                break;

        case 23:
                if (dev->in_iso_pipe == 0 || param->sglen == 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 23:  read %d iso odd, %d entries of %d bytes\n",
                                param->iterations,
                                param->sglen, param->length);
                retval = test_queue(dev, param,
                                dev->in_iso_pipe, dev->iso_in, 1);
                break;

        /* unlink URBs from a bulk-OUT queue */
        case 24:
                if (dev->out_pipe == 0 || !param->length || param->sglen < 4)
                        break;
                retval = 0;
                dev_info(&intf->dev, "TEST 24:  unlink from %d queues of "
                                "%d %d-byte writes\n",
                                param->iterations, param->sglen, param->length);
                for (i = param->iterations; retval == 0 && i > 0; --i) {
                        retval = unlink_queued(dev, dev->out_pipe,
                                                param->sglen, param->length);
                        if (retval) {
                                dev_err(&intf->dev,
                                        "unlink queued writes failed %d, "
                                        "iterations left %d\n", retval, i);
                                break;
                        }
                }
                break;

        /* Simple non-queued interrupt I/O tests */
        case 25:
                if (dev->out_int_pipe == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 25: write %d bytes %u times\n",
                                param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->out_int_pipe, param->length,
                                dev->int_out->bInterval);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE: interrupt sink (maybe accepts short writes) */
                retval = simple_io(dev, urb, param->iterations, 0, 0, "test25");
                simple_free_urb(urb);
                break;
        case 26:
                if (dev->in_int_pipe == 0)
                        break;
                dev_info(&intf->dev,
                                "TEST 26: read %d bytes %u times\n",
                                param->length, param->iterations);
                urb = simple_alloc_urb(udev, dev->in_int_pipe, param->length,
                                dev->int_in->bInterval);
                if (!urb) {
                        retval = -ENOMEM;
                        break;
                }
                /* FIRMWARE: interrupt source (maybe generates short writes) */
                retval = simple_io(dev, urb, param->iterations, 0, 0, "test26");
                simple_free_urb(urb);
                break;
        case 27:
                /* We do performance test, so ignore data compare */
                if (dev->out_pipe == 0 || param->sglen == 0 || pattern != 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 27: bulk write %dMbytes\n", (param->iterations *
                        param->sglen * param->length) / (1024 * 1024));
                retval = test_queue(dev, param,
                                dev->out_pipe, NULL, 0);
                break;
        case 28:
                if (dev->in_pipe == 0 || param->sglen == 0 || pattern != 0)
                        break;
                dev_info(&intf->dev,
                        "TEST 28: bulk read %dMbytes\n", (param->iterations *
                        param->sglen * param->length) / (1024 * 1024));
                retval = test_queue(dev, param,
                                dev->in_pipe, NULL, 0);
                break;
        /* Test data Toggle/seq_nr clear between bulk out transfers */
        case 29:
                if (dev->out_pipe == 0)
                        break;
                retval = 0;
                dev_info(&intf->dev, "TEST 29: Clear toggle between bulk writes %d times\n",
                                param->iterations);
                for (i = param->iterations; retval == 0 && i > 0; --i)
                        retval = toggle_sync_simple(dev);

                if (retval)
                        ERROR(dev, "toggle sync failed, iterations left %d\n",
                              i);
                break;
        }
        return retval;
}

/*-------------------------------------------------------------------------*/

/* We only have this one interface to user space, through usbfs.
 * User mode code can scan usbfs to find N different devices (maybe on
 * different busses) to use when testing, and allocate one thread per
 * test.  So discovery is simplified, and we have no device naming issues.
 *
 * Don't use these only as stress/load tests.  Use them along with
 * other USB bus activity:  plugging, unplugging, mousing, mp3 playback,
 * video capture, and so on.  Run different tests at different times, in
 * different sequences.  Nothing here should interact with other devices,
 * except indirectly by consuming USB bandwidth and CPU resources for test
 * threads and request completion.  But the only way to know that for sure
 * is to test when HC queues are in use by many devices.
 *
 * WARNING:  Because usbfs grabs udev->dev.sem before calling this ioctl(),
 * it locks out usbcore in certain code paths.  Notably, if you disconnect
 * the device-under-test, hub_wq will wait block forever waiting for the
 * ioctl to complete ... so that usb_disconnect() can abort the pending
 * urbs and then call usbtest_disconnect().  To abort a test, you're best
 * off just killing the userspace task and waiting for it to exit.
 */

static int
usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf)
{

        struct usbtest_dev        *dev = usb_get_intfdata(intf);
        struct usbtest_param_64 *param_64 = buf;
        struct usbtest_param_32 temp;
        struct usbtest_param_32 *param_32 = buf;
        struct timespec64 start;
        struct timespec64 end;
        struct timespec64 duration;
        int retval = -EOPNOTSUPP;

        /* FIXME USBDEVFS_CONNECTINFO doesn't say how fast the device is. */

        pattern = mod_pattern;

        if (mutex_lock_interruptible(&dev->lock))
                return -ERESTARTSYS;

        /* FIXME: What if a system sleep starts while a test is running? */

        /* some devices, like ez-usb default devices, need a non-default
         * altsetting to have any active endpoints.  some tests change
         * altsettings; force a default so most tests don't need to check.
         */
        if (dev->info->alt >= 0) {
                if (intf->altsetting->desc.bInterfaceNumber) {
                        retval = -ENODEV;
                        goto free_mutex;
                }
                retval = set_altsetting(dev, dev->info->alt);
                if (retval) {
                        dev_err(&intf->dev,
                                        "set altsetting to %d failed, %d\n",
                                        dev->info->alt, retval);
                        goto free_mutex;
                }
        }

        switch (code) {
        case USBTEST_REQUEST_64:
                temp.test_num = param_64->test_num;
                temp.iterations = param_64->iterations;
                temp.length = param_64->length;
                temp.sglen = param_64->sglen;
                temp.vary = param_64->vary;
                param_32 = &temp;
                break;

        case USBTEST_REQUEST_32:
                break;

        default:
                retval = -EOPNOTSUPP;
                goto free_mutex;
        }

        ktime_get_ts64(&start);

        retval = usbtest_do_ioctl(intf, param_32);
        if (retval < 0)
                goto free_mutex;

        ktime_get_ts64(&end);

        duration = timespec64_sub(end, start);

        temp.duration_sec = duration.tv_sec;
        temp.duration_usec = duration.tv_nsec/NSEC_PER_USEC;

        switch (code) {
        case USBTEST_REQUEST_32:
                param_32->duration_sec = temp.duration_sec;
                param_32->duration_usec = temp.duration_usec;
                break;

        case USBTEST_REQUEST_64:
                param_64->duration_sec = temp.duration_sec;
                param_64->duration_usec = temp.duration_usec;
                break;
        }

free_mutex:
        mutex_unlock(&dev->lock);
        return retval;
}

/*-------------------------------------------------------------------------*/

static unsigned force_interrupt;
module_param(force_interrupt, uint, 0);
MODULE_PARM_DESC(force_interrupt, "0 = test default; else interrupt");

#ifdef        GENERIC
static unsigned short vendor;
module_param(vendor, ushort, 0);
MODULE_PARM_DESC(vendor, "vendor code (from usb-if)");

static unsigned short product;
module_param(product, ushort, 0);
MODULE_PARM_DESC(product, "product code (from vendor)");
#endif

static int
usbtest_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_device        *udev;
        struct usbtest_dev        *dev;
        struct usbtest_info        *info;
        char                        *rtest, *wtest;
        char                        *irtest, *iwtest;
        char                        *intrtest, *intwtest;

        udev = interface_to_usbdev(intf);

#ifdef        GENERIC
        /* specify devices by module parameters? */
        if (id->match_flags == 0) {
                /* vendor match required, product match optional */
                if (!vendor || le16_to_cpu(udev->descriptor.idVendor) != (u16)vendor)
                        return -ENODEV;
                if (product && le16_to_cpu(udev->descriptor.idProduct) != (u16)product)
                        return -ENODEV;
                dev_info(&intf->dev, "matched module params, "
                                        "vend=0x%04x prod=0x%04x\n",
                                le16_to_cpu(udev->descriptor.idVendor),
                                le16_to_cpu(udev->descriptor.idProduct));
        }
#endif

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;
        info = (struct usbtest_info *) id->driver_info;
        dev->info = info;
        mutex_init(&dev->lock);

        dev->intf = intf;

        /* cacheline-aligned scratch for i/o */
        dev->buf = kmalloc(TBUF_SIZE, GFP_KERNEL);
        if (dev->buf == NULL) {
                kfree(dev);
                return -ENOMEM;
        }

        /* NOTE this doesn't yet test the handful of difference that are
         * visible with high speed interrupts:  bigger maxpacket (1K) and
         * "high bandwidth" modes (up to 3 packets/uframe).
         */
        rtest = wtest = "";
        irtest = iwtest = "";
        intrtest = intwtest = "";
        if (force_interrupt || udev->speed == USB_SPEED_LOW) {
                if (info->ep_in) {
                        dev->in_pipe = usb_rcvintpipe(udev, info->ep_in);
                        rtest = " intr-in";
                }
                if (info->ep_out) {
                        dev->out_pipe = usb_sndintpipe(udev, info->ep_out);
                        wtest = " intr-out";
                }
        } else {
                if (override_alt >= 0 || info->autoconf) {
                        int status;

                        status = get_endpoints(dev, intf);
                        if (status < 0) {
                                WARNING(dev, "couldn't get endpoints, %d\n",
                                                status);
                                kfree(dev->buf);
                                kfree(dev);
                                return status;
                        }
                        /* may find bulk or ISO pipes */
                } else {
                        if (info->ep_in)
                                dev->in_pipe = usb_rcvbulkpipe(udev,
                                                        info->ep_in);
                        if (info->ep_out)
                                dev->out_pipe = usb_sndbulkpipe(udev,
                                                        info->ep_out);
                }
                if (dev->in_pipe)
                        rtest = " bulk-in";
                if (dev->out_pipe)
                        wtest = " bulk-out";
                if (dev->in_iso_pipe)
                        irtest = " iso-in";
                if (dev->out_iso_pipe)
                        iwtest = " iso-out";
                if (dev->in_int_pipe)
                        intrtest = " int-in";
                if (dev->out_int_pipe)
                        intwtest = " int-out";
        }

        usb_set_intfdata(intf, dev);
        dev_info(&intf->dev, "%s\n", info->name);
        dev_info(&intf->dev, "%s {control%s%s%s%s%s%s%s} tests%s\n",
                        usb_speed_string(udev->speed),
                        info->ctrl_out ? " in/out" : "",
                        rtest, wtest,
                        irtest, iwtest,
                        intrtest, intwtest,
                        info->alt >= 0 ? " (+alt)" : "");
        return 0;
}

static int usbtest_suspend(struct usb_interface *intf, pm_message_t message)
{
        return 0;
}

static int usbtest_resume(struct usb_interface *intf)
{
        return 0;
}


static void usbtest_disconnect(struct usb_interface *intf)
{
        struct usbtest_dev        *dev = usb_get_intfdata(intf);

        usb_set_intfdata(intf, NULL);
        dev_dbg(&intf->dev, "disconnect\n");
        kfree(dev->buf);
        kfree(dev);
}

/* Basic testing only needs a device that can source or sink bulk traffic.
 * Any device can test control transfers (default with GENERIC binding).
 *
 * Several entries work with the default EP0 implementation that's built
 * into EZ-USB chips.  There's a default vendor ID which can be overridden
 * by (very) small config EEPROMS, but otherwise all these devices act
 * identically until firmware is loaded:  only EP0 works.  It turns out
 * to be easy to make other endpoints work, without modifying that EP0
 * behavior.  For now, we expect that kind of firmware.
 */

/* an21xx or fx versions of ez-usb */
static struct usbtest_info ez1_info = {
        .name                = "EZ-USB device",
        .ep_in                = 2,
        .ep_out                = 2,
        .alt                = 1,
};

/* fx2 version of ez-usb */
static struct usbtest_info ez2_info = {
        .name                = "FX2 device",
        .ep_in                = 6,
        .ep_out                = 2,
        .alt                = 1,
};

/* ezusb family device with dedicated usb test firmware,
 */
static struct usbtest_info fw_info = {
        .name                = "usb test device",
        .ep_in                = 2,
        .ep_out                = 2,
        .alt                = 1,
        .autoconf        = 1,                /* iso and ctrl_out need autoconf */
        .ctrl_out        = 1,
        .iso                = 1,                /* iso_ep's are #8 in/out */
};

/* peripheral running Linux and 'zero.c' test firmware, or
 * its user-mode cousin. different versions of this use
 * different hardware with the same vendor/product codes.
 * host side MUST rely on the endpoint descriptors.
 */
static struct usbtest_info gz_info = {
        .name                = "Linux gadget zero",
        .autoconf        = 1,
        .ctrl_out        = 1,
        .iso                = 1,
        .intr                = 1,
        .alt                = 0,
};

static struct usbtest_info um_info = {
        .name                = "Linux user mode test driver",
        .autoconf        = 1,
        .alt                = -1,
};

static struct usbtest_info um2_info = {
        .name                = "Linux user mode ISO test driver",
        .autoconf        = 1,
        .iso                = 1,
        .alt                = -1,
};

#ifdef IBOT2
/* this is a nice source of high speed bulk data;
 * uses an FX2, with firmware provided in the device
 */
static struct usbtest_info ibot2_info = {
        .name                = "iBOT2 webcam",
        .ep_in                = 2,
        .alt                = -1,
};
#endif

#ifdef GENERIC
/* we can use any device to test control traffic */
static struct usbtest_info generic_info = {
        .name                = "Generic USB device",
        .alt                = -1,
};
#endif


static const struct usb_device_id id_table[] = {

        /*-------------------------------------------------------------*/

        /* EZ-USB devices which download firmware to replace (or in our
         * case augment) the default device implementation.
         */

        /* generic EZ-USB FX controller */
        { USB_DEVICE(0x0547, 0x2235),
                .driver_info = (unsigned long) &ez1_info,
        },

        /* CY3671 development board with EZ-USB FX */
        { USB_DEVICE(0x0547, 0x0080),
                .driver_info = (unsigned long) &ez1_info,
        },

        /* generic EZ-USB FX2 controller (or development board) */
        { USB_DEVICE(0x04b4, 0x8613),
                .driver_info = (unsigned long) &ez2_info,
        },

        /* re-enumerated usb test device firmware */
        { USB_DEVICE(0xfff0, 0xfff0),
                .driver_info = (unsigned long) &fw_info,
        },

        /* "Gadget Zero" firmware runs under Linux */
        { USB_DEVICE(0x0525, 0xa4a0),
                .driver_info = (unsigned long) &gz_info,
        },

        /* so does a user-mode variant */
        { USB_DEVICE(0x0525, 0xa4a4),
                .driver_info = (unsigned long) &um_info,
        },

        /* ... and a user-mode variant that talks iso */
        { USB_DEVICE(0x0525, 0xa4a3),
                .driver_info = (unsigned long) &um2_info,
        },

#ifdef KEYSPAN_19Qi
        /* Keyspan 19qi uses an21xx (original EZ-USB) */
        /* this does not coexist with the real Keyspan 19qi driver! */
        { USB_DEVICE(0x06cd, 0x010b),
                .driver_info = (unsigned long) &ez1_info,
        },
#endif

        /*-------------------------------------------------------------*/

#ifdef IBOT2
        /* iBOT2 makes a nice source of high speed bulk-in data */
        /* this does not coexist with a real iBOT2 driver! */
        { USB_DEVICE(0x0b62, 0x0059),
                .driver_info = (unsigned long) &ibot2_info,
        },
#endif

        /*-------------------------------------------------------------*/

#ifdef GENERIC
        /* module params can specify devices to use for control tests */
        { .driver_info = (unsigned long) &generic_info, },
#endif

        /*-------------------------------------------------------------*/

        { }
};
MODULE_DEVICE_TABLE(usb, id_table);

static struct usb_driver usbtest_driver = {
        .name =                "usbtest",
        .id_table =        id_table,
        .probe =        usbtest_probe,
        .unlocked_ioctl = usbtest_ioctl,
        .disconnect =        usbtest_disconnect,
        .suspend =        usbtest_suspend,
        .resume =        usbtest_resume,
};

/*-------------------------------------------------------------------------*/

static int __init usbtest_init(void)
{
#ifdef GENERIC
        if (vendor)
                pr_debug("params: vend=0x%04x prod=0x%04x\n", vendor, product);
#endif
        return usb_register(&usbtest_driver);
}
module_init(usbtest_init);

static void __exit usbtest_exit(void)
{
        usb_deregister(&usbtest_driver);
}
module_exit(usbtest_exit);

MODULE_DESCRIPTION("USB Core/HCD Testing Driver");
MODULE_LICENSE("GPL");














































    1 










































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2006, Johannes Berg <johannes@sipsolutions.net>
 */

/* just for IFNAMSIZ */
#include <linux/if.h>
#include <linux/slab.h>
#include <linux/export.h>
#include "led.h"

void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
{
        if (!atomic_read(&local->assoc_led_active))
                return;
        if (associated)
                led_trigger_event(&local->assoc_led, LED_FULL);
        else
                led_trigger_event(&local->assoc_led, LED_OFF);
}

void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
{
        if (!atomic_read(&local->radio_led_active))
                return;
        if (enabled)
                led_trigger_event(&local->radio_led, LED_FULL);
        else
                led_trigger_event(&local->radio_led, LED_OFF);
}

void ieee80211_alloc_led_names(struct ieee80211_local *local)
{
        local->rx_led.name = kasprintf(GFP_KERNEL, "%srx",
                                       wiphy_name(local->hw.wiphy));
        local->tx_led.name = kasprintf(GFP_KERNEL, "%stx",
                                       wiphy_name(local->hw.wiphy));
        local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc",
                                          wiphy_name(local->hw.wiphy));
        local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio",
                                          wiphy_name(local->hw.wiphy));
}

void ieee80211_free_led_names(struct ieee80211_local *local)
{
        kfree(local->rx_led.name);
        kfree(local->tx_led.name);
        kfree(local->assoc_led.name);
        kfree(local->radio_led.name);
}

static int ieee80211_tx_led_activate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     tx_led);

        atomic_inc(&local->tx_led_active);

        return 0;
}

static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     tx_led);

        atomic_dec(&local->tx_led_active);
}

static int ieee80211_rx_led_activate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     rx_led);

        atomic_inc(&local->rx_led_active);

        return 0;
}

static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     rx_led);

        atomic_dec(&local->rx_led_active);
}

static int ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     assoc_led);

        atomic_inc(&local->assoc_led_active);

        return 0;
}

static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     assoc_led);

        atomic_dec(&local->assoc_led_active);
}

static int ieee80211_radio_led_activate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     radio_led);

        atomic_inc(&local->radio_led_active);

        return 0;
}

static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     radio_led);

        atomic_dec(&local->radio_led_active);
}

static int ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     tpt_led);

        atomic_inc(&local->tpt_led_active);

        return 0;
}

static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
{
        struct ieee80211_local *local = container_of(led_cdev->trigger,
                                                     struct ieee80211_local,
                                                     tpt_led);

        atomic_dec(&local->tpt_led_active);
}

void ieee80211_led_init(struct ieee80211_local *local)
{
        atomic_set(&local->rx_led_active, 0);
        local->rx_led.activate = ieee80211_rx_led_activate;
        local->rx_led.deactivate = ieee80211_rx_led_deactivate;
        if (local->rx_led.name && led_trigger_register(&local->rx_led)) {
                kfree(local->rx_led.name);
                local->rx_led.name = NULL;
        }

        atomic_set(&local->tx_led_active, 0);
        local->tx_led.activate = ieee80211_tx_led_activate;
        local->tx_led.deactivate = ieee80211_tx_led_deactivate;
        if (local->tx_led.name && led_trigger_register(&local->tx_led)) {
                kfree(local->tx_led.name);
                local->tx_led.name = NULL;
        }

        atomic_set(&local->assoc_led_active, 0);
        local->assoc_led.activate = ieee80211_assoc_led_activate;
        local->assoc_led.deactivate = ieee80211_assoc_led_deactivate;
        if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) {
                kfree(local->assoc_led.name);
                local->assoc_led.name = NULL;
        }

        atomic_set(&local->radio_led_active, 0);
        local->radio_led.activate = ieee80211_radio_led_activate;
        local->radio_led.deactivate = ieee80211_radio_led_deactivate;
        if (local->radio_led.name && led_trigger_register(&local->radio_led)) {
                kfree(local->radio_led.name);
                local->radio_led.name = NULL;
        }

        atomic_set(&local->tpt_led_active, 0);
        if (local->tpt_led_trigger) {
                local->tpt_led.activate = ieee80211_tpt_led_activate;
                local->tpt_led.deactivate = ieee80211_tpt_led_deactivate;
                if (led_trigger_register(&local->tpt_led)) {
                        kfree(local->tpt_led_trigger);
                        local->tpt_led_trigger = NULL;
                }
        }
}

void ieee80211_led_exit(struct ieee80211_local *local)
{
        if (local->radio_led.name)
                led_trigger_unregister(&local->radio_led);
        if (local->assoc_led.name)
                led_trigger_unregister(&local->assoc_led);
        if (local->tx_led.name)
                led_trigger_unregister(&local->tx_led);
        if (local->rx_led.name)
                led_trigger_unregister(&local->rx_led);

        if (local->tpt_led_trigger) {
                led_trigger_unregister(&local->tpt_led);
                kfree(local->tpt_led_trigger);
        }
}

const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        return local->radio_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_radio_led_name);

const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        return local->assoc_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_assoc_led_name);

const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        return local->tx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_tx_led_name);

const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        return local->rx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_rx_led_name);

static unsigned long tpt_trig_traffic(struct ieee80211_local *local,
                                      struct tpt_led_trigger *tpt_trig)
{
        unsigned long traffic, delta;

        traffic = tpt_trig->tx_bytes + tpt_trig->rx_bytes;

        delta = traffic - tpt_trig->prev_traffic;
        tpt_trig->prev_traffic = traffic;
        return DIV_ROUND_UP(delta, 1024 / 8);
}

static void tpt_trig_timer(struct timer_list *t)
{
        struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
        struct ieee80211_local *local = tpt_trig->local;
        unsigned long on, off, tpt;
        int i;

        if (!tpt_trig->running)
                return;

        mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));

        tpt = tpt_trig_traffic(local, tpt_trig);

        /* default to just solid on */
        on = 1;
        off = 0;

        for (i = tpt_trig->blink_table_len - 1; i >= 0; i--) {
                if (tpt_trig->blink_table[i].throughput < 0 ||
                    tpt > tpt_trig->blink_table[i].throughput) {
                        off = tpt_trig->blink_table[i].blink_time / 2;
                        on = tpt_trig->blink_table[i].blink_time - off;
                        break;
                }
        }

        led_trigger_blink(&local->tpt_led, on, off);
}

const char *
__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
                                   unsigned int flags,
                                   const struct ieee80211_tpt_blink *blink_table,
                                   unsigned int blink_table_len)
{
        struct ieee80211_local *local = hw_to_local(hw);
        struct tpt_led_trigger *tpt_trig;

        if (WARN_ON(local->tpt_led_trigger))
                return NULL;

        tpt_trig = kzalloc(sizeof(struct tpt_led_trigger), GFP_KERNEL);
        if (!tpt_trig)
                return NULL;

        snprintf(tpt_trig->name, sizeof(tpt_trig->name),
                 "%stpt", wiphy_name(local->hw.wiphy));

        local->tpt_led.name = tpt_trig->name;

        tpt_trig->blink_table = blink_table;
        tpt_trig->blink_table_len = blink_table_len;
        tpt_trig->want = flags;
        tpt_trig->local = local;

        timer_setup(&tpt_trig->timer, tpt_trig_timer, 0);

        local->tpt_led_trigger = tpt_trig;

        return tpt_trig->name;
}
EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger);

static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
{
        struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;

        if (tpt_trig->running)
                return;

        /* reset traffic */
        tpt_trig_traffic(local, tpt_trig);
        tpt_trig->running = true;

        tpt_trig_timer(&tpt_trig->timer);
        mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
}

static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
{
        struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;

        if (!tpt_trig->running)
                return;

        tpt_trig->running = false;
        del_timer_sync(&tpt_trig->timer);

        led_trigger_event(&local->tpt_led, LED_OFF);
}

void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
                                unsigned int types_on, unsigned int types_off)
{
        struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
        bool allowed;

        WARN_ON(types_on & types_off);

        if (!tpt_trig)
                return;

        tpt_trig->active &= ~types_off;
        tpt_trig->active |= types_on;

        /*
         * Regardless of wanted state, we shouldn't blink when
         * the radio is disabled -- this can happen due to some
         * code ordering issues with __ieee80211_recalc_idle()
         * being called before the radio is started.
         */
        allowed = tpt_trig->active & IEEE80211_TPT_LEDTRIG_FL_RADIO;

        if (!allowed || !(tpt_trig->active & tpt_trig->want))
                ieee80211_stop_tpt_led_trig(local);
        else
                ieee80211_start_tpt_led_trig(local);
}















































































   15 




















































































   16 












   14 














































































































































































































































































































































































   14 



















   14 























































   14 
   14 
   14 


   14 


   14 









   14 

   14 


   14 















































































































   14 

   14 

















    4 





































    4 










    4 









    4 






    4 

































































   14 


































   14 
























































   14 
























   14 
   14 

   14 

   14 







   14 





























































   14 
   14 
   14 
   14 
   14 







   14 









































































   14 



















   14 



   14 




   14 












   14 

















   14 
















   14 




   14 







   14 

   14 



























































































































































   14 
   14 




































   14 









   14 

   14 







   14 




   14 









   14 























   14 
   14 




   14 



   14 



   14 




   14 



   14 





   14 










































   14 

   14 



   14 






   14 

   14 








   14 



   14 








   14 












   14 









   13 
   14 


   14 



   14 


   14 



   14 







   12 
    5 


   14 


   14 









   14 















   14 


   14 







   14 









   14 


   14 

























   14 
   14 








   14 














   14 






   14 


















   14 
   14 

   14 


   14 









   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 
   14 

   14 

   14 
   14 

   14 
   14 

   14 


   14 

   14 


   14 


   14 




   14 
   14 
   14 



   14 


   14 


   14 





   14 


   14 


   14 


   14 




   14 


   14 


   14 



   14 
   12 


   14 
   14 

   14 

   14 

   14 


   14 



   14 

   11 


   14 


   14 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 








   14 




   14 


   14 















   14 

   14 


    5 






    5 


    5 


    5 





    5 



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 



















   14 










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Routing netlink socket interface: protocol independent part.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *        Fixes:
 *        Vitaly E. Lavrov                RTA_OK arithmetic was wrong.
 */

#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/if_addr.h>
#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/pci.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>

#include <linux/uaccess.h>

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/devlink.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/addrconf.h>
#endif
#include <linux/dpll.h>

#include "dev.h"

#define RTNL_MAX_TYPE                50
#define RTNL_SLAVE_MAX_TYPE        44

struct rtnl_link {
        rtnl_doit_func                doit;
        rtnl_dumpit_func        dumpit;
        struct module                *owner;
        unsigned int                flags;
        struct rcu_head                rcu;
};

static DEFINE_MUTEX(rtnl_mutex);

void rtnl_lock(void)
{
        mutex_lock(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_lock);

int rtnl_lock_killable(void)
{
        return mutex_lock_killable(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_lock_killable);

static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
        if (head && tail) {
                tail->next = defer_kfree_skb_list;
                defer_kfree_skb_list = head;
        }
}
EXPORT_SYMBOL(rtnl_kfree_skbs);

void __rtnl_unlock(void)
{
        struct sk_buff *head = defer_kfree_skb_list;

        defer_kfree_skb_list = NULL;

        /* Ensure that we didn't actually add any TODO item when __rtnl_unlock()
         * is used. In some places, e.g. in cfg80211, we have code that will do
         * something like
         *   rtnl_lock()
         *   wiphy_lock()
         *   ...
         *   rtnl_unlock()
         *
         * and because netdev_run_todo() acquires the RTNL for items on the list
         * we could cause a situation such as this:
         * Thread 1                        Thread 2
         *                                  rtnl_lock()
         *                                  unregister_netdevice()
         *                                  __rtnl_unlock()
         * rtnl_lock()
         * wiphy_lock()
         * rtnl_unlock()
         *   netdev_run_todo()
         *     __rtnl_unlock()
         *
         *     // list not empty now
         *     // because of thread 2
         *                                  rtnl_lock()
         *     while (!list_empty(...))
         *       rtnl_lock()
         *                                  wiphy_lock()
         * **** DEADLOCK ****
         *
         * However, usage of __rtnl_unlock() is rare, and so we can ensure that
         * it's not used in cases where something is added to do the list.
         */
        WARN_ON(!list_empty(&net_todo_list));

        mutex_unlock(&rtnl_mutex);

        while (head) {
                struct sk_buff *next = head->next;

                kfree_skb(head);
                cond_resched();
                head = next;
        }
}

void rtnl_unlock(void)
{
        /* This fellow will unlock it for us. */
        netdev_run_todo();
}
EXPORT_SYMBOL(rtnl_unlock);

int rtnl_trylock(void)
{
        return mutex_trylock(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_trylock);

int rtnl_is_locked(void)
{
        return mutex_is_locked(&rtnl_mutex);
}
EXPORT_SYMBOL(rtnl_is_locked);

bool refcount_dec_and_rtnl_lock(refcount_t *r)
{
        return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
}
EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);

#ifdef CONFIG_PROVE_LOCKING
bool lockdep_rtnl_is_held(void)
{
        return lockdep_is_held(&rtnl_mutex);
}
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */

static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];

static inline int rtm_msgindex(int msgtype)
{
        int msgindex = msgtype - RTM_BASE;

        /*
         * msgindex < 0 implies someone tried to register a netlink
         * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
         * the message type has not been added to linux/rtnetlink.h
         */
        BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);

        return msgindex;
}

static struct rtnl_link *rtnl_get_link(int protocol, int msgtype)
{
        struct rtnl_link __rcu **tab;

        if (protocol >= ARRAY_SIZE(rtnl_msg_handlers))
                protocol = PF_UNSPEC;

        tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]);
        if (!tab)
                tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]);

        return rcu_dereference_rtnl(tab[msgtype]);
}

static int rtnl_register_internal(struct module *owner,
                                  int protocol, int msgtype,
                                  rtnl_doit_func doit, rtnl_dumpit_func dumpit,
                                  unsigned int flags)
{
        struct rtnl_link *link, *old;
        struct rtnl_link __rcu **tab;
        int msgindex;
        int ret = -ENOBUFS;

        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
        msgindex = rtm_msgindex(msgtype);

        rtnl_lock();
        tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
        if (tab == NULL) {
                tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL);
                if (!tab)
                        goto unlock;

                /* ensures we see the 0 stores */
                rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
        }

        old = rtnl_dereference(tab[msgindex]);
        if (old) {
                link = kmemdup(old, sizeof(*old), GFP_KERNEL);
                if (!link)
                        goto unlock;
        } else {
                link = kzalloc(sizeof(*link), GFP_KERNEL);
                if (!link)
                        goto unlock;
        }

        WARN_ON(link->owner && link->owner != owner);
        link->owner = owner;

        WARN_ON(doit && link->doit && link->doit != doit);
        if (doit)
                link->doit = doit;
        WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit);
        if (dumpit)
                link->dumpit = dumpit;

        WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL &&
                (flags & RTNL_FLAG_BULK_DEL_SUPPORTED));
        link->flags |= flags;

        /* publish protocol:msgtype */
        rcu_assign_pointer(tab[msgindex], link);
        ret = 0;
        if (old)
                kfree_rcu(old, rcu);
unlock:
        rtnl_unlock();
        return ret;
}

/**
 * rtnl_register_module - Register a rtnetlink message type
 *
 * @owner: module registering the hook (THIS_MODULE)
 * @protocol: Protocol family or PF_UNSPEC
 * @msgtype: rtnetlink message type
 * @doit: Function pointer called for each request message
 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
 * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
 *
 * Like rtnl_register, but for use by removable modules.
 */
int rtnl_register_module(struct module *owner,
                         int protocol, int msgtype,
                         rtnl_doit_func doit, rtnl_dumpit_func dumpit,
                         unsigned int flags)
{
        return rtnl_register_internal(owner, protocol, msgtype,
                                      doit, dumpit, flags);
}
EXPORT_SYMBOL_GPL(rtnl_register_module);

/**
 * rtnl_register - Register a rtnetlink message type
 * @protocol: Protocol family or PF_UNSPEC
 * @msgtype: rtnetlink message type
 * @doit: Function pointer called for each request message
 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
 * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
 *
 * Registers the specified function pointers (at least one of them has
 * to be non-NULL) to be called whenever a request message for the
 * specified protocol family and message type is received.
 *
 * The special protocol family PF_UNSPEC may be used to define fallback
 * function pointers for the case when no entry for the specific protocol
 * family exists.
 */
void rtnl_register(int protocol, int msgtype,
                   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
                   unsigned int flags)
{
        int err;

        err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
                                     flags);
        if (err)
                pr_err("Unable to register rtnetlink message handler, "
                       "protocol = %d, message type = %d\n", protocol, msgtype);
}

/**
 * rtnl_unregister - Unregister a rtnetlink message type
 * @protocol: Protocol family or PF_UNSPEC
 * @msgtype: rtnetlink message type
 *
 * Returns 0 on success or a negative error code.
 */
int rtnl_unregister(int protocol, int msgtype)
{
        struct rtnl_link __rcu **tab;
        struct rtnl_link *link;
        int msgindex;

        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
        msgindex = rtm_msgindex(msgtype);

        rtnl_lock();
        tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
        if (!tab) {
                rtnl_unlock();
                return -ENOENT;
        }

        link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
        rtnl_unlock();

        kfree_rcu(link, rcu);

        return 0;
}
EXPORT_SYMBOL_GPL(rtnl_unregister);

/**
 * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
 * @protocol : Protocol family or PF_UNSPEC
 *
 * Identical to calling rtnl_unregster() for all registered message types
 * of a certain protocol family.
 */
void rtnl_unregister_all(int protocol)
{
        struct rtnl_link __rcu **tab;
        struct rtnl_link *link;
        int msgindex;

        BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);

        rtnl_lock();
        tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL);
        if (!tab) {
                rtnl_unlock();
                return;
        }
        for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
                link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
                kfree_rcu(link, rcu);
        }
        rtnl_unlock();

        synchronize_net();

        kfree(tab);
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);

static LIST_HEAD(link_ops);

static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
{
        const struct rtnl_link_ops *ops;

        list_for_each_entry(ops, &link_ops, list) {
                if (!strcmp(ops->kind, kind))
                        return ops;
        }
        return NULL;
}

/**
 * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
 * @ops: struct rtnl_link_ops * to register
 *
 * The caller must hold the rtnl_mutex. This function should be used
 * by drivers that create devices during module initialization. It
 * must be called before registering the devices.
 *
 * Returns 0 on success or a negative error code.
 */
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
        if (rtnl_link_ops_get(ops->kind))
                return -EEXIST;

        /* The check for alloc/setup is here because if ops
         * does not have that filled up, it is not possible
         * to use the ops for creating device. So do not
         * fill up dellink as well. That disables rtnl_dellink.
         */
        if ((ops->alloc || ops->setup) && !ops->dellink)
                ops->dellink = unregister_netdevice_queue;

        list_add_tail(&ops->list, &link_ops);
        return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_link_register);

/**
 * rtnl_link_register - Register rtnl_link_ops with rtnetlink.
 * @ops: struct rtnl_link_ops * to register
 *
 * Returns 0 on success or a negative error code.
 */
int rtnl_link_register(struct rtnl_link_ops *ops)
{
        int err;

        /* Sanity-check max sizes to avoid stack buffer overflow. */
        if (WARN_ON(ops->maxtype > RTNL_MAX_TYPE ||
                    ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE))
                return -EINVAL;

        rtnl_lock();
        err = __rtnl_link_register(ops);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);

static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
{
        struct net_device *dev;
        LIST_HEAD(list_kill);

        for_each_netdev(net, dev) {
                if (dev->rtnl_link_ops == ops)
                        ops->dellink(dev, &list_kill);
        }
        unregister_netdevice_many(&list_kill);
}

/**
 * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
 * @ops: struct rtnl_link_ops * to unregister
 *
 * The caller must hold the rtnl_mutex and guarantee net_namespace_list
 * integrity (hold pernet_ops_rwsem for writing to close the race
 * with setup_net() and cleanup_net()).
 */
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
        struct net *net;

        for_each_net(net) {
                __rtnl_kill_links(net, ops);
        }
        list_del(&ops->list);
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);

/* Return with the rtnl_lock held when there are no network
 * devices unregistering in any network namespace.
 */
static void rtnl_lock_unregistering_all(void)
{
        DEFINE_WAIT_FUNC(wait, woken_wake_function);

        add_wait_queue(&netdev_unregistering_wq, &wait);
        for (;;) {
                rtnl_lock();
                /* We held write locked pernet_ops_rwsem, and parallel
                 * setup_net() and cleanup_net() are not possible.
                 */
                if (!atomic_read(&dev_unreg_count))
                        break;
                __rtnl_unlock();

                wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
        }
        remove_wait_queue(&netdev_unregistering_wq, &wait);
}

/**
 * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
 * @ops: struct rtnl_link_ops * to unregister
 */
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
        /* Close the race with setup_net() and cleanup_net() */
        down_write(&pernet_ops_rwsem);
        rtnl_lock_unregistering_all();
        __rtnl_link_unregister(ops);
        rtnl_unlock();
        up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);

static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
{
        struct net_device *master_dev;
        const struct rtnl_link_ops *ops;
        size_t size = 0;

        rcu_read_lock();

        master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
        if (!master_dev)
                goto out;

        ops = master_dev->rtnl_link_ops;
        if (!ops || !ops->get_slave_size)
                goto out;
        /* IFLA_INFO_SLAVE_DATA + nested data */
        size = nla_total_size(sizeof(struct nlattr)) +
               ops->get_slave_size(master_dev, dev);

out:
        rcu_read_unlock();
        return size;
}

static size_t rtnl_link_get_size(const struct net_device *dev)
{
        const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
        size_t size;

        if (!ops)
                return 0;

        size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
               nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */

        if (ops->get_size)
                /* IFLA_INFO_DATA + nested data */
                size += nla_total_size(sizeof(struct nlattr)) +
                        ops->get_size(dev);

        if (ops->get_xstats_size)
                /* IFLA_INFO_XSTATS */
                size += nla_total_size(ops->get_xstats_size(dev));

        size += rtnl_link_get_slave_info_data_size(dev);

        return size;
}

static LIST_HEAD(rtnl_af_ops);

static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
        const struct rtnl_af_ops *ops;

        ASSERT_RTNL();

        list_for_each_entry(ops, &rtnl_af_ops, list) {
                if (ops->family == family)
                        return ops;
        }

        return NULL;
}

/**
 * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
 * @ops: struct rtnl_af_ops * to register
 *
 * Returns 0 on success or a negative error code.
 */
void rtnl_af_register(struct rtnl_af_ops *ops)
{
        rtnl_lock();
        list_add_tail_rcu(&ops->list, &rtnl_af_ops);
        rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);

/**
 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
 * @ops: struct rtnl_af_ops * to unregister
 */
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
        rtnl_lock();
        list_del_rcu(&ops->list);
        rtnl_unlock();

        synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);

static size_t rtnl_link_get_af_size(const struct net_device *dev,
                                    u32 ext_filter_mask)
{
        struct rtnl_af_ops *af_ops;
        size_t size;

        /* IFLA_AF_SPEC */
        size = nla_total_size(sizeof(struct nlattr));

        rcu_read_lock();
        list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
                if (af_ops->get_link_af_size) {
                        /* AF_* + nested data */
                        size += nla_total_size(sizeof(struct nlattr)) +
                                af_ops->get_link_af_size(dev, ext_filter_mask);
                }
        }
        rcu_read_unlock();

        return size;
}

static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
        struct net_device *master_dev;
        bool ret = false;

        rcu_read_lock();

        master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
        if (master_dev && master_dev->rtnl_link_ops)
                ret = true;
        rcu_read_unlock();
        return ret;
}

static int rtnl_link_slave_info_fill(struct sk_buff *skb,
                                     const struct net_device *dev)
{
        struct net_device *master_dev;
        const struct rtnl_link_ops *ops;
        struct nlattr *slave_data;
        int err;

        master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
        if (!master_dev)
                return 0;
        ops = master_dev->rtnl_link_ops;
        if (!ops)
                return 0;
        if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0)
                return -EMSGSIZE;
        if (ops->fill_slave_info) {
                slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA);
                if (!slave_data)
                        return -EMSGSIZE;
                err = ops->fill_slave_info(skb, master_dev, dev);
                if (err < 0)
                        goto err_cancel_slave_data;
                nla_nest_end(skb, slave_data);
        }
        return 0;

err_cancel_slave_data:
        nla_nest_cancel(skb, slave_data);
        return err;
}

static int rtnl_link_info_fill(struct sk_buff *skb,
                               const struct net_device *dev)
{
        const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
        struct nlattr *data;
        int err;

        if (!ops)
                return 0;
        if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
                return -EMSGSIZE;
        if (ops->fill_xstats) {
                err = ops->fill_xstats(skb, dev);
                if (err < 0)
                        return err;
        }
        if (ops->fill_info) {
                data = nla_nest_start_noflag(skb, IFLA_INFO_DATA);
                if (data == NULL)
                        return -EMSGSIZE;
                err = ops->fill_info(skb, dev);
                if (err < 0)
                        goto err_cancel_data;
                nla_nest_end(skb, data);
        }
        return 0;

err_cancel_data:
        nla_nest_cancel(skb, data);
        return err;
}

static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
{
        struct nlattr *linkinfo;
        int err = -EMSGSIZE;

        linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO);
        if (linkinfo == NULL)
                goto out;

        err = rtnl_link_info_fill(skb, dev);
        if (err < 0)
                goto err_cancel_link;

        err = rtnl_link_slave_info_fill(skb, dev);
        if (err < 0)
                goto err_cancel_link;

        nla_nest_end(skb, linkinfo);
        return 0;

err_cancel_link:
        nla_nest_cancel(skb, linkinfo);
out:
        return err;
}

int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
        struct sock *rtnl = net->rtnl;

        return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL);
}

int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
{
        struct sock *rtnl = net->rtnl;

        return nlmsg_unicast(rtnl, skb, pid);
}
EXPORT_SYMBOL(rtnl_unicast);

void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
                 const struct nlmsghdr *nlh, gfp_t flags)
{
        struct sock *rtnl = net->rtnl;

        nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags);
}
EXPORT_SYMBOL(rtnl_notify);

void rtnl_set_sk_err(struct net *net, u32 group, int error)
{
        struct sock *rtnl = net->rtnl;

        netlink_set_err(rtnl, 0, group, error);
}
EXPORT_SYMBOL(rtnl_set_sk_err);

int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
        struct nlattr *mx;
        int i, valid = 0;

        /* nothing is dumped for dst_default_metrics, so just skip the loop */
        if (metrics == dst_default_metrics.metrics)
                return 0;

        mx = nla_nest_start_noflag(skb, RTA_METRICS);
        if (mx == NULL)
                return -ENOBUFS;

        for (i = 0; i < RTAX_MAX; i++) {
                if (metrics[i]) {
                        if (i == RTAX_CC_ALGO - 1) {
                                char tmp[TCP_CA_NAME_MAX], *name;

                                name = tcp_ca_get_name_by_key(metrics[i], tmp);
                                if (!name)
                                        continue;
                                if (nla_put_string(skb, i + 1, name))
                                        goto nla_put_failure;
                        } else if (i == RTAX_FEATURES - 1) {
                                u32 user_features = metrics[i] & RTAX_FEATURE_MASK;

                                if (!user_features)
                                        continue;
                                BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
                                if (nla_put_u32(skb, i + 1, user_features))
                                        goto nla_put_failure;
                        } else {
                                if (nla_put_u32(skb, i + 1, metrics[i]))
                                        goto nla_put_failure;
                        }
                        valid++;
                }
        }

        if (!valid) {
                nla_nest_cancel(skb, mx);
                return 0;
        }

        return nla_nest_end(skb, mx);

nla_put_failure:
        nla_nest_cancel(skb, mx);
        return -EMSGSIZE;
}
EXPORT_SYMBOL(rtnetlink_put_metrics);

int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
                       long expires, u32 error)
{
        struct rta_cacheinfo ci = {
                .rta_error = error,
                .rta_id =  id,
        };

        if (dst) {
                ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
                ci.rta_used = dst->__use;
                ci.rta_clntref = rcuref_read(&dst->__rcuref);
        }
        if (expires) {
                unsigned long clock;

                clock = jiffies_to_clock_t(abs(expires));
                clock = min_t(unsigned long, clock, INT_MAX);
                ci.rta_expires = (expires > 0) ? clock : -clock;
        }
        return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);

void netdev_set_operstate(struct net_device *dev, int newstate)
{
        unsigned int old = READ_ONCE(dev->operstate);

        do {
                if (old == newstate)
                        return;
        } while (!try_cmpxchg(&dev->operstate, &old, newstate));

        netdev_state_change(dev);
}
EXPORT_SYMBOL(netdev_set_operstate);

static void set_operstate(struct net_device *dev, unsigned char transition)
{
        unsigned char operstate = READ_ONCE(dev->operstate);

        switch (transition) {
        case IF_OPER_UP:
                if ((operstate == IF_OPER_DORMANT ||
                     operstate == IF_OPER_TESTING ||
                     operstate == IF_OPER_UNKNOWN) &&
                    !netif_dormant(dev) && !netif_testing(dev))
                        operstate = IF_OPER_UP;
                break;

        case IF_OPER_TESTING:
                if (netif_oper_up(dev))
                        operstate = IF_OPER_TESTING;
                break;

        case IF_OPER_DORMANT:
                if (netif_oper_up(dev))
                        operstate = IF_OPER_DORMANT;
                break;
        }

        netdev_set_operstate(dev, operstate);
}

static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
{
        return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) |
               (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI));
}

static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
                                           const struct ifinfomsg *ifm)
{
        unsigned int flags = ifm->ifi_flags;

        /* bugwards compatibility: ifi_change == 0 is treated as ~0 */
        if (ifm->ifi_change)
                flags = (flags & ifm->ifi_change) |
                        (rtnl_dev_get_flags(dev) & ~ifm->ifi_change);

        return flags;
}

static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
                                 const struct rtnl_link_stats64 *b)
{
        a->rx_packets = b->rx_packets;
        a->tx_packets = b->tx_packets;
        a->rx_bytes = b->rx_bytes;
        a->tx_bytes = b->tx_bytes;
        a->rx_errors = b->rx_errors;
        a->tx_errors = b->tx_errors;
        a->rx_dropped = b->rx_dropped;
        a->tx_dropped = b->tx_dropped;

        a->multicast = b->multicast;
        a->collisions = b->collisions;

        a->rx_length_errors = b->rx_length_errors;
        a->rx_over_errors = b->rx_over_errors;
        a->rx_crc_errors = b->rx_crc_errors;
        a->rx_frame_errors = b->rx_frame_errors;
        a->rx_fifo_errors = b->rx_fifo_errors;
        a->rx_missed_errors = b->rx_missed_errors;

        a->tx_aborted_errors = b->tx_aborted_errors;
        a->tx_carrier_errors = b->tx_carrier_errors;
        a->tx_fifo_errors = b->tx_fifo_errors;
        a->tx_heartbeat_errors = b->tx_heartbeat_errors;
        a->tx_window_errors = b->tx_window_errors;

        a->rx_compressed = b->rx_compressed;
        a->tx_compressed = b->tx_compressed;

        a->rx_nohandler = b->rx_nohandler;
}

/* All VF info */
static inline int rtnl_vfinfo_size(const struct net_device *dev,
                                   u32 ext_filter_mask)
{
        if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) {
                int num_vfs = dev_num_vf(dev->dev.parent);
                size_t size = nla_total_size(0);
                size += num_vfs *
                        (nla_total_size(0) +
                         nla_total_size(sizeof(struct ifla_vf_mac)) +
                         nla_total_size(sizeof(struct ifla_vf_broadcast)) +
                         nla_total_size(sizeof(struct ifla_vf_vlan)) +
                         nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
                         nla_total_size(MAX_VLAN_LIST_LEN *
                                        sizeof(struct ifla_vf_vlan_info)) +
                         nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
                         nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
                         nla_total_size(sizeof(struct ifla_vf_rate)) +
                         nla_total_size(sizeof(struct ifla_vf_link_state)) +
                         nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
                         nla_total_size(sizeof(struct ifla_vf_trust)));
                if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
                        size += num_vfs *
                                (nla_total_size(0) + /* nest IFLA_VF_STATS */
                                 /* IFLA_VF_STATS_RX_PACKETS */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_TX_PACKETS */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_RX_BYTES */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_TX_BYTES */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_BROADCAST */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_MULTICAST */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_RX_DROPPED */
                                 nla_total_size_64bit(sizeof(__u64)) +
                                 /* IFLA_VF_STATS_TX_DROPPED */
                                 nla_total_size_64bit(sizeof(__u64)));
                }
                return size;
        } else
                return 0;
}

static size_t rtnl_port_size(const struct net_device *dev,
                             u32 ext_filter_mask)
{
        size_t port_size = nla_total_size(4)                /* PORT_VF */
                + nla_total_size(PORT_PROFILE_MAX)        /* PORT_PROFILE */
                + nla_total_size(PORT_UUID_MAX)                /* PORT_INSTANCE_UUID */
                + nla_total_size(PORT_UUID_MAX)                /* PORT_HOST_UUID */
                + nla_total_size(1)                        /* PROT_VDP_REQUEST */
                + nla_total_size(2);                        /* PORT_VDP_RESPONSE */
        size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
        size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
                + port_size;
        size_t port_self_size = nla_total_size(sizeof(struct nlattr))
                + port_size;

        if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
            !(ext_filter_mask & RTEXT_FILTER_VF))
                return 0;
        if (dev_num_vf(dev->dev.parent))
                return port_self_size + vf_ports_size +
                        vf_port_size * dev_num_vf(dev->dev.parent);
        else
                return port_self_size;
}

static size_t rtnl_xdp_size(void)
{
        size_t xdp_size = nla_total_size(0) +        /* nest IFLA_XDP */
                          nla_total_size(1) +        /* XDP_ATTACHED */
                          nla_total_size(4) +        /* XDP_PROG_ID (or 1st mode) */
                          nla_total_size(4);        /* XDP_<mode>_PROG_ID */

        return xdp_size;
}

static size_t rtnl_prop_list_size(const struct net_device *dev)
{
        struct netdev_name_node *name_node;
        unsigned int cnt = 0;

        rcu_read_lock();
        list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
                cnt++;
        rcu_read_unlock();

        if (!cnt)
                return 0;

        return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
}

static size_t rtnl_proto_down_size(const struct net_device *dev)
{
        size_t size = nla_total_size(1);

        if (dev->proto_down_reason)
                size += nla_total_size(0) + nla_total_size(4);

        return size;
}

static size_t rtnl_devlink_port_size(const struct net_device *dev)
{
        size_t size = nla_total_size(0); /* nest IFLA_DEVLINK_PORT */

        if (dev->devlink_port)
                size += devlink_nl_port_handle_size(dev->devlink_port);

        return size;
}

static size_t rtnl_dpll_pin_size(const struct net_device *dev)
{
        size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */

        size += dpll_netdev_pin_handle_size(dev);

        return size;
}

static noinline size_t if_nlmsg_size(const struct net_device *dev,
                                     u32 ext_filter_mask)
{
        return NLMSG_ALIGN(sizeof(struct ifinfomsg))
               + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
               + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */
               + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
               + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap))
               + nla_total_size(sizeof(struct rtnl_link_stats))
               + nla_total_size_64bit(sizeof(struct rtnl_link_stats64))
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
               + nla_total_size(4) /* IFLA_TXQLEN */
               + nla_total_size(4) /* IFLA_WEIGHT */
               + nla_total_size(4) /* IFLA_MTU */
               + nla_total_size(4) /* IFLA_LINK */
               + nla_total_size(4) /* IFLA_MASTER */
               + nla_total_size(1) /* IFLA_CARRIER */
               + nla_total_size(4) /* IFLA_PROMISCUITY */
               + nla_total_size(4) /* IFLA_ALLMULTI */
               + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
               + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
               + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
               + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
               + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
               + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */
               + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */
               + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
               + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
               + nla_total_size(1) /* IFLA_OPERSTATE */
               + nla_total_size(1) /* IFLA_LINKMODE */
               + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
               + nla_total_size(4) /* IFLA_LINK_NETNSID */
               + nla_total_size(4) /* IFLA_GROUP */
               + nla_total_size(ext_filter_mask
                                & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
               + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
               + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
               + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
               + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
               + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
               + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
               + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
               + rtnl_xdp_size() /* IFLA_XDP */
               + nla_total_size(4)  /* IFLA_EVENT */
               + nla_total_size(4)  /* IFLA_NEW_NETNSID */
               + nla_total_size(4)  /* IFLA_NEW_IFINDEX */
               + rtnl_proto_down_size(dev)  /* proto down */
               + nla_total_size(4)  /* IFLA_TARGET_NETNSID */
               + nla_total_size(4)  /* IFLA_CARRIER_UP_COUNT */
               + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
               + nla_total_size(4)  /* IFLA_MIN_MTU */
               + nla_total_size(4)  /* IFLA_MAX_MTU */
               + rtnl_prop_list_size(dev)
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
               + rtnl_devlink_port_size(dev)
               + rtnl_dpll_pin_size(dev)
               + 0;
}

static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
{
        struct nlattr *vf_ports;
        struct nlattr *vf_port;
        int vf;
        int err;

        vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS);
        if (!vf_ports)
                return -EMSGSIZE;

        for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
                vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT);
                if (!vf_port)
                        goto nla_put_failure;
                if (nla_put_u32(skb, IFLA_PORT_VF, vf))
                        goto nla_put_failure;
                err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
                if (err == -EMSGSIZE)
                        goto nla_put_failure;
                if (err) {
                        nla_nest_cancel(skb, vf_port);
                        continue;
                }
                nla_nest_end(skb, vf_port);
        }

        nla_nest_end(skb, vf_ports);

        return 0;

nla_put_failure:
        nla_nest_cancel(skb, vf_ports);
        return -EMSGSIZE;
}

static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
{
        struct nlattr *port_self;
        int err;

        port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF);
        if (!port_self)
                return -EMSGSIZE;

        err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
        if (err) {
                nla_nest_cancel(skb, port_self);
                return (err == -EMSGSIZE) ? err : 0;
        }

        nla_nest_end(skb, port_self);

        return 0;
}

static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
                          u32 ext_filter_mask)
{
        int err;

        if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
            !(ext_filter_mask & RTEXT_FILTER_VF))
                return 0;

        err = rtnl_port_self_fill(skb, dev);
        if (err)
                return err;

        if (dev_num_vf(dev->dev.parent)) {
                err = rtnl_vf_ports_fill(skb, dev);
                if (err)
                        return err;
        }

        return 0;
}

static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
{
        int err;
        struct netdev_phys_item_id ppid;

        err = dev_get_phys_port_id(dev, &ppid);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
                return err;
        }

        if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
                return -EMSGSIZE;

        return 0;
}

static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
{
        char name[IFNAMSIZ];
        int err;

        err = dev_get_phys_port_name(dev, name, sizeof(name));
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
                return err;
        }

        if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
                return -EMSGSIZE;

        return 0;
}

static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
        struct netdev_phys_item_id ppid = { };
        int err;

        err = dev_get_port_parent_id(dev, &ppid, false);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
                return err;
        }

        if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id))
                return -EMSGSIZE;

        return 0;
}

static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
                                              struct net_device *dev)
{
        struct rtnl_link_stats64 *sp;
        struct nlattr *attr;

        attr = nla_reserve_64bit(skb, IFLA_STATS64,
                                 sizeof(struct rtnl_link_stats64), IFLA_PAD);
        if (!attr)
                return -EMSGSIZE;

        sp = nla_data(attr);
        dev_get_stats(dev, sp);

        attr = nla_reserve(skb, IFLA_STATS,
                           sizeof(struct rtnl_link_stats));
        if (!attr)
                return -EMSGSIZE;

        copy_rtnl_link_stats(nla_data(attr), sp);

        return 0;
}

static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
                                               struct net_device *dev,
                                               int vfs_num,
                                               u32 ext_filter_mask)
{
        struct ifla_vf_rss_query_en vf_rss_query_en;
        struct nlattr *vf, *vfstats, *vfvlanlist;
        struct ifla_vf_link_state vf_linkstate;
        struct ifla_vf_vlan_info vf_vlan_info;
        struct ifla_vf_spoofchk vf_spoofchk;
        struct ifla_vf_tx_rate vf_tx_rate;
        struct ifla_vf_stats vf_stats;
        struct ifla_vf_trust vf_trust;
        struct ifla_vf_vlan vf_vlan;
        struct ifla_vf_rate vf_rate;
        struct ifla_vf_mac vf_mac;
        struct ifla_vf_broadcast vf_broadcast;
        struct ifla_vf_info ivi;
        struct ifla_vf_guid node_guid;
        struct ifla_vf_guid port_guid;

        memset(&ivi, 0, sizeof(ivi));

        /* Not all SR-IOV capable drivers support the
         * spoofcheck and "RSS query enable" query.  Preset to
         * -1 so the user space tool can detect that the driver
         * didn't report anything.
         */
        ivi.spoofchk = -1;
        ivi.rss_query_en = -1;
        ivi.trusted = -1;
        /* The default value for VF link state is "auto"
         * IFLA_VF_LINK_STATE_AUTO which equals zero
         */
        ivi.linkstate = 0;
        /* VLAN Protocol by default is 802.1Q */
        ivi.vlan_proto = htons(ETH_P_8021Q);
        if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
                return 0;

        memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
        memset(&node_guid, 0, sizeof(node_guid));
        memset(&port_guid, 0, sizeof(port_guid));

        vf_mac.vf =
                vf_vlan.vf =
                vf_vlan_info.vf =
                vf_rate.vf =
                vf_tx_rate.vf =
                vf_spoofchk.vf =
                vf_linkstate.vf =
                vf_rss_query_en.vf =
                vf_trust.vf =
                node_guid.vf =
                port_guid.vf = ivi.vf;

        memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
        memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len);
        vf_vlan.vlan = ivi.vlan;
        vf_vlan.qos = ivi.qos;
        vf_vlan_info.vlan = ivi.vlan;
        vf_vlan_info.qos = ivi.qos;
        vf_vlan_info.vlan_proto = ivi.vlan_proto;
        vf_tx_rate.rate = ivi.max_tx_rate;
        vf_rate.min_tx_rate = ivi.min_tx_rate;
        vf_rate.max_tx_rate = ivi.max_tx_rate;
        vf_spoofchk.setting = ivi.spoofchk;
        vf_linkstate.link_state = ivi.linkstate;
        vf_rss_query_en.setting = ivi.rss_query_en;
        vf_trust.setting = ivi.trusted;
        vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
        if (!vf)
                return -EMSGSIZE;
        if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
            nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
            nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
            nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
                    &vf_rate) ||
            nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
                    &vf_tx_rate) ||
            nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
                    &vf_spoofchk) ||
            nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
                    &vf_linkstate) ||
            nla_put(skb, IFLA_VF_RSS_QUERY_EN,
                    sizeof(vf_rss_query_en),
                    &vf_rss_query_en) ||
            nla_put(skb, IFLA_VF_TRUST,
                    sizeof(vf_trust), &vf_trust))
                goto nla_put_vf_failure;

        if (dev->netdev_ops->ndo_get_vf_guid &&
            !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid,
                                              &port_guid)) {
                if (nla_put(skb, IFLA_VF_IB_NODE_GUID, sizeof(node_guid),
                            &node_guid) ||
                    nla_put(skb, IFLA_VF_IB_PORT_GUID, sizeof(port_guid),
                            &port_guid))
                        goto nla_put_vf_failure;
        }
        vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST);
        if (!vfvlanlist)
                goto nla_put_vf_failure;
        if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
                    &vf_vlan_info)) {
                nla_nest_cancel(skb, vfvlanlist);
                goto nla_put_vf_failure;
        }
        nla_nest_end(skb, vfvlanlist);
        if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
                memset(&vf_stats, 0, sizeof(vf_stats));
                if (dev->netdev_ops->ndo_get_vf_stats)
                        dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
                                                          &vf_stats);
                vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
                if (!vfstats)
                        goto nla_put_vf_failure;
                if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
                                      vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
                                      vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
                                      vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
                                      vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
                                      vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
                                      vf_stats.multicast, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
                                      vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
                    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
                                      vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
                        nla_nest_cancel(skb, vfstats);
                        goto nla_put_vf_failure;
                }
                nla_nest_end(skb, vfstats);
        }
        nla_nest_end(skb, vf);
        return 0;

nla_put_vf_failure:
        nla_nest_cancel(skb, vf);
        return -EMSGSIZE;
}

static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
                                           struct net_device *dev,
                                           u32 ext_filter_mask)
{
        struct nlattr *vfinfo;
        int i, num_vfs;

        if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
                return 0;

        num_vfs = dev_num_vf(dev->dev.parent);
        if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
                return -EMSGSIZE;

        if (!dev->netdev_ops->ndo_get_vf_config)
                return 0;

        vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST);
        if (!vfinfo)
                return -EMSGSIZE;

        for (i = 0; i < num_vfs; i++) {
                if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) {
                        nla_nest_cancel(skb, vfinfo);
                        return -EMSGSIZE;
                }
        }

        nla_nest_end(skb, vfinfo);
        return 0;
}

static int rtnl_fill_link_ifmap(struct sk_buff *skb,
                                const struct net_device *dev)
{
        struct rtnl_link_ifmap map;

        memset(&map, 0, sizeof(map));
        map.mem_start = READ_ONCE(dev->mem_start);
        map.mem_end   = READ_ONCE(dev->mem_end);
        map.base_addr = READ_ONCE(dev->base_addr);
        map.irq       = READ_ONCE(dev->irq);
        map.dma       = READ_ONCE(dev->dma);
        map.port      = READ_ONCE(dev->if_port);

        if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD))
                return -EMSGSIZE;

        return 0;
}

static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
        const struct bpf_prog *generic_xdp_prog;

        ASSERT_RTNL();

        generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
        if (!generic_xdp_prog)
                return 0;
        return generic_xdp_prog->aux->id;
}

static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
        return dev_xdp_prog_id(dev, XDP_MODE_DRV);
}

static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
        return dev_xdp_prog_id(dev, XDP_MODE_HW);
}

static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
                               u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
                               u32 (*get_prog_id)(struct net_device *dev))
{
        u32 curr_id;
        int err;

        curr_id = get_prog_id(dev);
        if (!curr_id)
                return 0;

        *prog_id = curr_id;
        err = nla_put_u32(skb, attr, curr_id);
        if (err)
                return err;

        if (*mode != XDP_ATTACHED_NONE)
                *mode = XDP_ATTACHED_MULTI;
        else
                *mode = tgt_mode;

        return 0;
}

static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
{
        struct nlattr *xdp;
        u32 prog_id;
        int err;
        u8 mode;

        xdp = nla_nest_start_noflag(skb, IFLA_XDP);
        if (!xdp)
                return -EMSGSIZE;

        prog_id = 0;
        mode = XDP_ATTACHED_NONE;
        err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
                                  IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
        if (err)
                goto err_cancel;
        err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
                                  IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
        if (err)
                goto err_cancel;
        err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
                                  IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
        if (err)
                goto err_cancel;

        err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
        if (err)
                goto err_cancel;

        if (prog_id && mode != XDP_ATTACHED_MULTI) {
                err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
                if (err)
                        goto err_cancel;
        }

        nla_nest_end(skb, xdp);
        return 0;

err_cancel:
        nla_nest_cancel(skb, xdp);
        return err;
}

static u32 rtnl_get_event(unsigned long event)
{
        u32 rtnl_event_type = IFLA_EVENT_NONE;

        switch (event) {
        case NETDEV_REBOOT:
                rtnl_event_type = IFLA_EVENT_REBOOT;
                break;
        case NETDEV_FEAT_CHANGE:
                rtnl_event_type = IFLA_EVENT_FEATURES;
                break;
        case NETDEV_BONDING_FAILOVER:
                rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER;
                break;
        case NETDEV_NOTIFY_PEERS:
                rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS;
                break;
        case NETDEV_RESEND_IGMP:
                rtnl_event_type = IFLA_EVENT_IGMP_RESEND;
                break;
        case NETDEV_CHANGEINFODATA:
                rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS;
                break;
        default:
                break;
        }

        return rtnl_event_type;
}

static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
{
        const struct net_device *upper_dev;
        int ret = 0;

        rcu_read_lock();

        upper_dev = netdev_master_upper_dev_get_rcu(dev);
        if (upper_dev)
                ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);

        rcu_read_unlock();
        return ret;
}

static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev,
                          bool force)
{
        int iflink = dev_get_iflink(dev);

        if (force || READ_ONCE(dev->ifindex) != iflink)
                return nla_put_u32(skb, IFLA_LINK, iflink);

        return 0;
}

static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
                                              struct net_device *dev)
{
        char buf[IFALIASZ];
        int ret;

        ret = dev_get_alias(dev, buf, sizeof(buf));
        return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
}

static int rtnl_fill_link_netnsid(struct sk_buff *skb,
                                  const struct net_device *dev,
                                  struct net *src_net, gfp_t gfp)
{
        bool put_iflink = false;

        if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
                struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);

                if (!net_eq(dev_net(dev), link_net)) {
                        int id = peernet2id_alloc(src_net, link_net, gfp);

                        if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
                                return -EMSGSIZE;

                        put_iflink = true;
                }
        }

        return nla_put_iflink(skb, dev, put_iflink);
}

static int rtnl_fill_link_af(struct sk_buff *skb,
                             const struct net_device *dev,
                             u32 ext_filter_mask)
{
        const struct rtnl_af_ops *af_ops;
        struct nlattr *af_spec;

        af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
        if (!af_spec)
                return -EMSGSIZE;

        list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
                struct nlattr *af;
                int err;

                if (!af_ops->fill_link_af)
                        continue;

                af = nla_nest_start_noflag(skb, af_ops->family);
                if (!af)
                        return -EMSGSIZE;

                err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
                /*
                 * Caller may return ENODATA to indicate that there
                 * was no data to be dumped. This is not an error, it
                 * means we should trim the attribute header and
                 * continue.
                 */
                if (err == -ENODATA)
                        nla_nest_cancel(skb, af);
                else if (err < 0)
                        return -EMSGSIZE;

                nla_nest_end(skb, af);
        }

        nla_nest_end(skb, af_spec);
        return 0;
}

static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
                                 const struct net_device *dev)
{
        struct netdev_name_node *name_node;
        int count = 0;

        list_for_each_entry_rcu(name_node, &dev->name_node->list, list) {
                if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
                        return -EMSGSIZE;
                count++;
        }
        return count;
}

/* RCU protected. */
static int rtnl_fill_prop_list(struct sk_buff *skb,
                               const struct net_device *dev)
{
        struct nlattr *prop_list;
        int ret;

        prop_list = nla_nest_start(skb, IFLA_PROP_LIST);
        if (!prop_list)
                return -EMSGSIZE;

        ret = rtnl_fill_alt_ifnames(skb, dev);
        if (ret <= 0)
                goto nest_cancel;

        nla_nest_end(skb, prop_list);
        return 0;

nest_cancel:
        nla_nest_cancel(skb, prop_list);
        return ret;
}

static int rtnl_fill_proto_down(struct sk_buff *skb,
                                const struct net_device *dev)
{
        struct nlattr *pr;
        u32 preason;

        if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
                goto nla_put_failure;

        preason = dev->proto_down_reason;
        if (!preason)
                return 0;

        pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON);
        if (!pr)
                return -EMSGSIZE;

        if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) {
                nla_nest_cancel(skb, pr);
                goto nla_put_failure;
        }

        nla_nest_end(skb, pr);
        return 0;

nla_put_failure:
        return -EMSGSIZE;
}

static int rtnl_fill_devlink_port(struct sk_buff *skb,
                                  const struct net_device *dev)
{
        struct nlattr *devlink_port_nest;
        int ret;

        devlink_port_nest = nla_nest_start(skb, IFLA_DEVLINK_PORT);
        if (!devlink_port_nest)
                return -EMSGSIZE;

        if (dev->devlink_port) {
                ret = devlink_nl_port_handle_fill(skb, dev->devlink_port);
                if (ret < 0)
                        goto nest_cancel;
        }

        nla_nest_end(skb, devlink_port_nest);
        return 0;

nest_cancel:
        nla_nest_cancel(skb, devlink_port_nest);
        return ret;
}

static int rtnl_fill_dpll_pin(struct sk_buff *skb,
                              const struct net_device *dev)
{
        struct nlattr *dpll_pin_nest;
        int ret;

        dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN);
        if (!dpll_pin_nest)
                return -EMSGSIZE;

        ret = dpll_netdev_add_pin_handle(skb, dev);
        if (ret < 0)
                goto nest_cancel;

        nla_nest_end(skb, dpll_pin_nest);
        return 0;

nest_cancel:
        nla_nest_cancel(skb, dpll_pin_nest);
        return ret;
}

static int rtnl_fill_ifinfo(struct sk_buff *skb,
                            struct net_device *dev, struct net *src_net,
                            int type, u32 pid, u32 seq, u32 change,
                            unsigned int flags, u32 ext_filter_mask,
                            u32 event, int *new_nsid, int new_ifindex,
                            int tgt_netnsid, gfp_t gfp)
{
        struct ifinfomsg *ifm;
        struct nlmsghdr *nlh;
        struct Qdisc *qdisc;

        ASSERT_RTNL();
        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        ifm = nlmsg_data(nlh);
        ifm->ifi_family = AF_UNSPEC;
        ifm->__ifi_pad = 0;
        ifm->ifi_type = dev->type;
        ifm->ifi_index = dev->ifindex;
        ifm->ifi_flags = dev_get_flags(dev);
        ifm->ifi_change = change;

        if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
                goto nla_put_failure;

        qdisc = rtnl_dereference(dev->qdisc);
        if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
            nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
            nla_put_u8(skb, IFLA_OPERSTATE,
                       netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
            nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
            nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
            nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
            nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
            nla_put_u32(skb, IFLA_GROUP, dev->group) ||
            nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
            nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
            nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
            nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
            nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
            nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
            nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
            nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
            nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
            nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
            nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
            put_master_ifindex(skb, dev) ||
            nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
            (qdisc &&
             nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
            nla_put_ifalias(skb, dev) ||
            nla_put_u32(skb, IFLA_CARRIER_CHANGES,
                        atomic_read(&dev->carrier_up_count) +
                        atomic_read(&dev->carrier_down_count)) ||
            nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
                        atomic_read(&dev->carrier_up_count)) ||
            nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
                        atomic_read(&dev->carrier_down_count)))
                goto nla_put_failure;

        if (rtnl_fill_proto_down(skb, dev))
                goto nla_put_failure;

        if (event != IFLA_EVENT_NONE) {
                if (nla_put_u32(skb, IFLA_EVENT, event))
                        goto nla_put_failure;
        }

        if (dev->addr_len) {
                if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
                    nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast))
                        goto nla_put_failure;
        }

        if (rtnl_phys_port_id_fill(skb, dev))
                goto nla_put_failure;

        if (rtnl_phys_port_name_fill(skb, dev))
                goto nla_put_failure;

        if (rtnl_phys_switch_id_fill(skb, dev))
                goto nla_put_failure;

        if (rtnl_fill_stats(skb, dev))
                goto nla_put_failure;

        if (rtnl_fill_vf(skb, dev, ext_filter_mask))
                goto nla_put_failure;

        if (rtnl_port_fill(skb, dev, ext_filter_mask))
                goto nla_put_failure;

        if (rtnl_xdp_fill(skb, dev))
                goto nla_put_failure;

        if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
                if (rtnl_link_fill(skb, dev) < 0)
                        goto nla_put_failure;
        }

        if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
                goto nla_put_failure;

        if (new_nsid &&
            nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
                goto nla_put_failure;
        if (new_ifindex &&
            nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
                goto nla_put_failure;

        if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) &&
            nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr))
                goto nla_put_failure;

        rcu_read_lock();
        if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
                goto nla_put_failure_rcu;
        if (rtnl_fill_link_ifmap(skb, dev))
                goto nla_put_failure_rcu;
        if (rtnl_fill_prop_list(skb, dev))
                goto nla_put_failure_rcu;
        rcu_read_unlock();

        if (dev->dev.parent &&
            nla_put_string(skb, IFLA_PARENT_DEV_NAME,
                           dev_name(dev->dev.parent)))
                goto nla_put_failure;

        if (dev->dev.parent && dev->dev.parent->bus &&
            nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
                           dev->dev.parent->bus->name))
                goto nla_put_failure;

        if (rtnl_fill_devlink_port(skb, dev))
                goto nla_put_failure;

        if (rtnl_fill_dpll_pin(skb, dev))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure_rcu:
        rcu_read_unlock();
nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_IFNAME]                = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
        [IFLA_ADDRESS]                = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
        [IFLA_BROADCAST]        = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
        [IFLA_MAP]                = { .len = sizeof(struct rtnl_link_ifmap) },
        [IFLA_MTU]                = { .type = NLA_U32 },
        [IFLA_LINK]                = { .type = NLA_U32 },
        [IFLA_MASTER]                = { .type = NLA_U32 },
        [IFLA_CARRIER]                = { .type = NLA_U8 },
        [IFLA_TXQLEN]                = { .type = NLA_U32 },
        [IFLA_WEIGHT]                = { .type = NLA_U32 },
        [IFLA_OPERSTATE]        = { .type = NLA_U8 },
        [IFLA_LINKMODE]                = { .type = NLA_U8 },
        [IFLA_LINKINFO]                = { .type = NLA_NESTED },
        [IFLA_NET_NS_PID]        = { .type = NLA_U32 },
        [IFLA_NET_NS_FD]        = { .type = NLA_U32 },
        /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
         * allow 0-length string (needed to remove an alias).
         */
        [IFLA_IFALIAS]                = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
        [IFLA_VFINFO_LIST]        = {. type = NLA_NESTED },
        [IFLA_VF_PORTS]                = { .type = NLA_NESTED },
        [IFLA_PORT_SELF]        = { .type = NLA_NESTED },
        [IFLA_AF_SPEC]                = { .type = NLA_NESTED },
        [IFLA_EXT_MASK]                = { .type = NLA_U32 },
        [IFLA_PROMISCUITY]        = { .type = NLA_U32 },
        [IFLA_NUM_TX_QUEUES]        = { .type = NLA_U32 },
        [IFLA_NUM_RX_QUEUES]        = { .type = NLA_U32 },
        [IFLA_GSO_MAX_SEGS]        = { .type = NLA_U32 },
        [IFLA_GSO_MAX_SIZE]        = { .type = NLA_U32 },
        [IFLA_PHYS_PORT_ID]        = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
        [IFLA_CARRIER_CHANGES]        = { .type = NLA_U32 },  /* ignored */
        [IFLA_PHYS_SWITCH_ID]        = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
        [IFLA_LINK_NETNSID]        = { .type = NLA_S32 },
        [IFLA_PROTO_DOWN]        = { .type = NLA_U8 },
        [IFLA_XDP]                = { .type = NLA_NESTED },
        [IFLA_EVENT]                = { .type = NLA_U32 },
        [IFLA_GROUP]                = { .type = NLA_U32 },
        [IFLA_TARGET_NETNSID]        = { .type = NLA_S32 },
        [IFLA_CARRIER_UP_COUNT]        = { .type = NLA_U32 },
        [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
        [IFLA_MIN_MTU]                = { .type = NLA_U32 },
        [IFLA_MAX_MTU]                = { .type = NLA_U32 },
        [IFLA_PROP_LIST]        = { .type = NLA_NESTED },
        [IFLA_ALT_IFNAME]        = { .type = NLA_STRING,
                                    .len = ALTIFNAMSIZ - 1 },
        [IFLA_PERM_ADDRESS]        = { .type = NLA_REJECT },
        [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
        [IFLA_NEW_IFINDEX]        = NLA_POLICY_MIN(NLA_S32, 1),
        [IFLA_PARENT_DEV_NAME]        = { .type = NLA_NUL_STRING },
        [IFLA_GRO_MAX_SIZE]        = { .type = NLA_U32 },
        [IFLA_TSO_MAX_SIZE]        = { .type = NLA_REJECT },
        [IFLA_TSO_MAX_SEGS]        = { .type = NLA_REJECT },
        [IFLA_ALLMULTI]                = { .type = NLA_REJECT },
        [IFLA_GSO_IPV4_MAX_SIZE]        = { .type = NLA_U32 },
        [IFLA_GRO_IPV4_MAX_SIZE]        = { .type = NLA_U32 },
};

static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
        [IFLA_INFO_KIND]        = { .type = NLA_STRING },
        [IFLA_INFO_DATA]        = { .type = NLA_NESTED },
        [IFLA_INFO_SLAVE_KIND]        = { .type = NLA_STRING },
        [IFLA_INFO_SLAVE_DATA]        = { .type = NLA_NESTED },
};

static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
        [IFLA_VF_MAC]                = { .len = sizeof(struct ifla_vf_mac) },
        [IFLA_VF_BROADCAST]        = { .type = NLA_REJECT },
        [IFLA_VF_VLAN]                = { .len = sizeof(struct ifla_vf_vlan) },
        [IFLA_VF_VLAN_LIST]     = { .type = NLA_NESTED },
        [IFLA_VF_TX_RATE]        = { .len = sizeof(struct ifla_vf_tx_rate) },
        [IFLA_VF_SPOOFCHK]        = { .len = sizeof(struct ifla_vf_spoofchk) },
        [IFLA_VF_RATE]                = { .len = sizeof(struct ifla_vf_rate) },
        [IFLA_VF_LINK_STATE]        = { .len = sizeof(struct ifla_vf_link_state) },
        [IFLA_VF_RSS_QUERY_EN]        = { .len = sizeof(struct ifla_vf_rss_query_en) },
        [IFLA_VF_STATS]                = { .type = NLA_NESTED },
        [IFLA_VF_TRUST]                = { .len = sizeof(struct ifla_vf_trust) },
        [IFLA_VF_IB_NODE_GUID]        = { .len = sizeof(struct ifla_vf_guid) },
        [IFLA_VF_IB_PORT_GUID]        = { .len = sizeof(struct ifla_vf_guid) },
};

static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
        [IFLA_PORT_VF]                = { .type = NLA_U32 },
        [IFLA_PORT_PROFILE]        = { .type = NLA_STRING,
                                    .len = PORT_PROFILE_MAX },
        [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
                                      .len = PORT_UUID_MAX },
        [IFLA_PORT_HOST_UUID]        = { .type = NLA_STRING,
                                    .len = PORT_UUID_MAX },
        [IFLA_PORT_REQUEST]        = { .type = NLA_U8, },
        [IFLA_PORT_RESPONSE]        = { .type = NLA_U16, },

        /* Unused, but we need to keep it here since user space could
         * fill it. It's also broken with regard to NLA_BINARY use in
         * combination with structs.
         */
        [IFLA_PORT_VSI_TYPE]        = { .type = NLA_BINARY,
                                    .len = sizeof(struct ifla_port_vsi) },
};

static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
        [IFLA_XDP_UNSPEC]        = { .strict_start_type = IFLA_XDP_EXPECTED_FD },
        [IFLA_XDP_FD]                = { .type = NLA_S32 },
        [IFLA_XDP_EXPECTED_FD]        = { .type = NLA_S32 },
        [IFLA_XDP_ATTACHED]        = { .type = NLA_U8 },
        [IFLA_XDP_FLAGS]        = { .type = NLA_U32 },
        [IFLA_XDP_PROG_ID]        = { .type = NLA_U32 },
};

static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
{
        const struct rtnl_link_ops *ops = NULL;
        struct nlattr *linfo[IFLA_INFO_MAX + 1];

        if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
                return NULL;

        if (linfo[IFLA_INFO_KIND]) {
                char kind[MODULE_NAME_LEN];

                nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
                ops = rtnl_link_ops_get(kind);
        }

        return ops;
}

static bool link_master_filtered(struct net_device *dev, int master_idx)
{
        struct net_device *master;

        if (!master_idx)
                return false;

        master = netdev_master_upper_dev_get(dev);

        /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
         * another invalid value for ifindex to denote "no master".
         */
        if (master_idx == -1)
                return !!master;

        if (!master || master->ifindex != master_idx)
                return true;

        return false;
}

static bool link_kind_filtered(const struct net_device *dev,
                               const struct rtnl_link_ops *kind_ops)
{
        if (kind_ops && dev->rtnl_link_ops != kind_ops)
                return true;

        return false;
}

static bool link_dump_filtered(struct net_device *dev,
                               int master_idx,
                               const struct rtnl_link_ops *kind_ops)
{
        if (link_master_filtered(dev, master_idx) ||
            link_kind_filtered(dev, kind_ops))
                return true;

        return false;
}

/**
 * rtnl_get_net_ns_capable - Get netns if sufficiently privileged.
 * @sk: netlink socket
 * @netnsid: network namespace identifier
 *
 * Returns the network namespace identified by netnsid on success or an error
 * pointer on failure.
 */
struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid)
{
        struct net *net;

        net = get_net_ns_by_id(sock_net(sk), netnsid);
        if (!net)
                return ERR_PTR(-EINVAL);

        /* For now, the caller is required to have CAP_NET_ADMIN in
         * the user namespace owning the target net ns.
         */
        if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
                put_net(net);
                return ERR_PTR(-EACCES);
        }
        return net;
}
EXPORT_SYMBOL_GPL(rtnl_get_net_ns_capable);

static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
                                      bool strict_check, struct nlattr **tb,
                                      struct netlink_ext_ack *extack)
{
        int hdrlen;

        if (strict_check) {
                struct ifinfomsg *ifm;

                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                        NL_SET_ERR_MSG(extack, "Invalid header for link dump");
                        return -EINVAL;
                }

                ifm = nlmsg_data(nlh);
                if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
                    ifm->ifi_change) {
                        NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request");
                        return -EINVAL;
                }
                if (ifm->ifi_index) {
                        NL_SET_ERR_MSG(extack, "Filter by device index not supported for link dumps");
                        return -EINVAL;
                }

                return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb,
                                                     IFLA_MAX, ifla_policy,
                                                     extack);
        }

        /* A hack to preserve kernel<->userspace interface.
         * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
         * However, before Linux v3.9 the code here assumed rtgenmsg and that's
         * what iproute2 < v3.9.0 used.
         * We can detect the old iproute2. Even including the IFLA_EXT_MASK
         * attribute, its netlink message is shorter than struct ifinfomsg.
         */
        hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
                 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);

        return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy,
                                      extack);
}

static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct rtnl_link_ops *kind_ops = NULL;
        struct netlink_ext_ack *extack = cb->extack;
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        unsigned int flags = NLM_F_MULTI;
        struct nlattr *tb[IFLA_MAX+1];
        struct {
                unsigned long ifindex;
        } *ctx = (void *)cb->ctx;
        struct net *tgt_net = net;
        u32 ext_filter_mask = 0;
        struct net_device *dev;
        int master_idx = 0;
        int netnsid = -1;
        int err, i;

        err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
        if (err < 0) {
                if (cb->strict_check)
                        return err;

                goto walk_entries;
        }

        for (i = 0; i <= IFLA_MAX; ++i) {
                if (!tb[i])
                        continue;

                /* new attributes should only be added with strict checking */
                switch (i) {
                case IFLA_TARGET_NETNSID:
                        netnsid = nla_get_s32(tb[i]);
                        tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid);
                        if (IS_ERR(tgt_net)) {
                                NL_SET_ERR_MSG(extack, "Invalid target network namespace id");
                                return PTR_ERR(tgt_net);
                        }
                        break;
                case IFLA_EXT_MASK:
                        ext_filter_mask = nla_get_u32(tb[i]);
                        break;
                case IFLA_MASTER:
                        master_idx = nla_get_u32(tb[i]);
                        break;
                case IFLA_LINKINFO:
                        kind_ops = linkinfo_to_kind_ops(tb[i]);
                        break;
                default:
                        if (cb->strict_check) {
                                NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request");
                                return -EINVAL;
                        }
                }
        }

        if (master_idx || kind_ops)
                flags |= NLM_F_DUMP_FILTERED;

walk_entries:
        err = 0;
        for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
                if (link_dump_filtered(dev, master_idx, kind_ops))
                        continue;
                err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK,
                                       NETLINK_CB(cb->skb).portid,
                                       nlh->nlmsg_seq, 0, flags,
                                       ext_filter_mask, 0, NULL, 0,
                                       netnsid, GFP_KERNEL);
                if (err < 0)
                        break;
        }
        cb->seq = tgt_net->dev_base_seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        if (netnsid >= 0)
                put_net(tgt_net);

        return err;
}

int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
                             struct netlink_ext_ack *exterr)
{
        const struct ifinfomsg *ifmp;
        const struct nlattr *attrs;
        size_t len;

        ifmp = nla_data(nla_peer);
        attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
        len = nla_len(nla_peer) - sizeof(struct ifinfomsg);

        if (ifmp->ifi_index < 0) {
                NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
                                    "ifindex can't be negative");
                return -EINVAL;
        }

        return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
                                    exterr);
}
EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);

struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
        struct net *net;
        /* Examine the link attributes and figure out which
         * network namespace we are talking about.
         */
        if (tb[IFLA_NET_NS_PID])
                net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
        else if (tb[IFLA_NET_NS_FD])
                net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
        else
                net = get_net(src_net);
        return net;
}
EXPORT_SYMBOL(rtnl_link_get_net);

/* Figure out which network namespace we are talking about by
 * examining the link attributes in the following order:
 *
 * 1. IFLA_NET_NS_PID
 * 2. IFLA_NET_NS_FD
 * 3. IFLA_TARGET_NETNSID
 */
static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
                                               struct nlattr *tb[])
{
        struct net *net;

        if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
                return rtnl_link_get_net(src_net, tb);

        if (!tb[IFLA_TARGET_NETNSID])
                return get_net(src_net);

        net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_TARGET_NETNSID]));
        if (!net)
                return ERR_PTR(-EINVAL);

        return net;
}

static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb,
                                             struct net *src_net,
                                             struct nlattr *tb[], int cap)
{
        struct net *net;

        net = rtnl_link_get_net_by_nlattr(src_net, tb);
        if (IS_ERR(net))
                return net;

        if (!netlink_ns_capable(skb, net->user_ns, cap)) {
                put_net(net);
                return ERR_PTR(-EPERM);
        }

        return net;
}

/* Verify that rtnetlink requests do not pass additional properties
 * potentially referring to different network namespaces.
 */
static int rtnl_ensure_unique_netns(struct nlattr *tb[],
                                    struct netlink_ext_ack *extack,
                                    bool netns_id_only)
{

        if (netns_id_only) {
                if (!tb[IFLA_NET_NS_PID] && !tb[IFLA_NET_NS_FD])
                        return 0;

                NL_SET_ERR_MSG(extack, "specified netns attribute not supported");
                return -EOPNOTSUPP;
        }

        if (tb[IFLA_TARGET_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
                goto invalid_attr;

        if (tb[IFLA_NET_NS_PID] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_FD]))
                goto invalid_attr;

        if (tb[IFLA_NET_NS_FD] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_PID]))
                goto invalid_attr;

        return 0;

invalid_attr:
        NL_SET_ERR_MSG(extack, "multiple netns identifying attributes specified");
        return -EINVAL;
}

static        int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
                             int max_tx_rate)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (!ops->ndo_set_vf_rate)
                return -EOPNOTSUPP;
        if (max_tx_rate && max_tx_rate < min_tx_rate)
                return -EINVAL;

        return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate);
}

static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
                            struct netlink_ext_ack *extack)
{
        if (tb[IFLA_ADDRESS] &&
            nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
                return -EINVAL;

        if (tb[IFLA_BROADCAST] &&
            nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
                return -EINVAL;

        if (tb[IFLA_GSO_MAX_SIZE] &&
            nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) {
                NL_SET_ERR_MSG(extack, "too big gso_max_size");
                return -EINVAL;
        }

        if (tb[IFLA_GSO_MAX_SEGS] &&
            (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS ||
             nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) {
                NL_SET_ERR_MSG(extack, "too big gso_max_segs");
                return -EINVAL;
        }

        if (tb[IFLA_GRO_MAX_SIZE] &&
            nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) {
                NL_SET_ERR_MSG(extack, "too big gro_max_size");
                return -EINVAL;
        }

        if (tb[IFLA_GSO_IPV4_MAX_SIZE] &&
            nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) {
                NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size");
                return -EINVAL;
        }

        if (tb[IFLA_GRO_IPV4_MAX_SIZE] &&
            nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) {
                NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size");
                return -EINVAL;
        }

        if (tb[IFLA_AF_SPEC]) {
                struct nlattr *af;
                int rem, err;

                nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
                        const struct rtnl_af_ops *af_ops;

                        af_ops = rtnl_af_lookup(nla_type(af));
                        if (!af_ops)
                                return -EAFNOSUPPORT;

                        if (!af_ops->set_link_af)
                                return -EOPNOTSUPP;

                        if (af_ops->validate_link_af) {
                                err = af_ops->validate_link_af(dev, af, extack);
                                if (err < 0)
                                        return err;
                        }
                }
        }

        return 0;
}

static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
                                  int guid_type)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
}

static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
{
        if (dev->type != ARPHRD_INFINIBAND)
                return -EOPNOTSUPP;

        return handle_infiniband_guid(dev, ivt, guid_type);
}

static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        int err = -EINVAL;

        if (tb[IFLA_VF_MAC]) {
                struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);

                if (ivm->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_mac)
                        err = ops->ndo_set_vf_mac(dev, ivm->vf,
                                                  ivm->mac);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_VLAN]) {
                struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);

                if (ivv->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_vlan)
                        err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
                                                   ivv->qos,
                                                   htons(ETH_P_8021Q));
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_VLAN_LIST]) {
                struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
                struct nlattr *attr;
                int rem, len = 0;

                err = -EOPNOTSUPP;
                if (!ops->ndo_set_vf_vlan)
                        return err;

                nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
                        if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
                            nla_len(attr) < NLA_HDRLEN) {
                                return -EINVAL;
                        }
                        if (len >= MAX_VLAN_LIST_LEN)
                                return -EOPNOTSUPP;
                        ivvl[len] = nla_data(attr);

                        len++;
                }
                if (len == 0)
                        return -EINVAL;

                if (ivvl[0]->vf >= INT_MAX)
                        return -EINVAL;
                err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
                                           ivvl[0]->qos, ivvl[0]->vlan_proto);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_TX_RATE]) {
                struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
                struct ifla_vf_info ivf;

                if (ivt->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_get_vf_config)
                        err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
                if (err < 0)
                        return err;

                err = rtnl_set_vf_rate(dev, ivt->vf,
                                       ivf.min_tx_rate, ivt->rate);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_RATE]) {
                struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);

                if (ivt->vf >= INT_MAX)
                        return -EINVAL;

                err = rtnl_set_vf_rate(dev, ivt->vf,
                                       ivt->min_tx_rate, ivt->max_tx_rate);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_SPOOFCHK]) {
                struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);

                if (ivs->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_spoofchk)
                        err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
                                                       ivs->setting);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_LINK_STATE]) {
                struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);

                if (ivl->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_link_state)
                        err = ops->ndo_set_vf_link_state(dev, ivl->vf,
                                                         ivl->link_state);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_RSS_QUERY_EN]) {
                struct ifla_vf_rss_query_en *ivrssq_en;

                err = -EOPNOTSUPP;
                ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
                if (ivrssq_en->vf >= INT_MAX)
                        return -EINVAL;
                if (ops->ndo_set_vf_rss_query_en)
                        err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
                                                           ivrssq_en->setting);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_TRUST]) {
                struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);

                if (ivt->vf >= INT_MAX)
                        return -EINVAL;
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_trust)
                        err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
                if (err < 0)
                        return err;
        }

        if (tb[IFLA_VF_IB_NODE_GUID]) {
                struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);

                if (ivt->vf >= INT_MAX)
                        return -EINVAL;
                if (!ops->ndo_set_vf_guid)
                        return -EOPNOTSUPP;
                return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
        }

        if (tb[IFLA_VF_IB_PORT_GUID]) {
                struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);

                if (ivt->vf >= INT_MAX)
                        return -EINVAL;
                if (!ops->ndo_set_vf_guid)
                        return -EOPNOTSUPP;

                return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
        }

        return err;
}

static int do_set_master(struct net_device *dev, int ifindex,
                         struct netlink_ext_ack *extack)
{
        struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
        const struct net_device_ops *ops;
        int err;

        if (upper_dev) {
                if (upper_dev->ifindex == ifindex)
                        return 0;
                ops = upper_dev->netdev_ops;
                if (ops->ndo_del_slave) {
                        err = ops->ndo_del_slave(upper_dev, dev);
                        if (err)
                                return err;
                } else {
                        return -EOPNOTSUPP;
                }
        }

        if (ifindex) {
                upper_dev = __dev_get_by_index(dev_net(dev), ifindex);
                if (!upper_dev)
                        return -EINVAL;
                ops = upper_dev->netdev_ops;
                if (ops->ndo_add_slave) {
                        err = ops->ndo_add_slave(upper_dev, dev, extack);
                        if (err)
                                return err;
                } else {
                        return -EOPNOTSUPP;
                }
        }
        return 0;
}

static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = {
        [IFLA_PROTO_DOWN_REASON_MASK]        = { .type = NLA_U32 },
        [IFLA_PROTO_DOWN_REASON_VALUE]        = { .type = NLA_U32 },
};

static int do_set_proto_down(struct net_device *dev,
                             struct nlattr *nl_proto_down,
                             struct nlattr *nl_proto_down_reason,
                             struct netlink_ext_ack *extack)
{
        struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1];
        unsigned long mask = 0;
        u32 value;
        bool proto_down;
        int err;

        if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) {
                NL_SET_ERR_MSG(extack,  "Protodown not supported by device");
                return -EOPNOTSUPP;
        }

        if (nl_proto_down_reason) {
                err = nla_parse_nested_deprecated(pdreason,
                                                  IFLA_PROTO_DOWN_REASON_MAX,
                                                  nl_proto_down_reason,
                                                  ifla_proto_down_reason_policy,
                                                  NULL);
                if (err < 0)
                        return err;

                if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) {
                        NL_SET_ERR_MSG(extack, "Invalid protodown reason value");
                        return -EINVAL;
                }

                value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]);

                if (pdreason[IFLA_PROTO_DOWN_REASON_MASK])
                        mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]);

                dev_change_proto_down_reason(dev, mask, value);
        }

        if (nl_proto_down) {
                proto_down = nla_get_u8(nl_proto_down);

                /* Don't turn off protodown if there are active reasons */
                if (!proto_down && dev->proto_down_reason) {
                        NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
                        return -EBUSY;
                }
                err = dev_change_proto_down(dev,
                                            proto_down);
                if (err)
                        return err;
        }

        return 0;
}

#define DO_SETLINK_MODIFIED        0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY        0x03
static int do_setlink(const struct sk_buff *skb,
                      struct net_device *dev, struct ifinfomsg *ifm,
                      struct netlink_ext_ack *extack,
                      struct nlattr **tb, int status)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        char ifname[IFNAMSIZ];
        int err;

        if (tb[IFLA_IFNAME])
                nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
        else
                ifname[0] = '\0';

        if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
                const char *pat = ifname[0] ? ifname : NULL;
                struct net *net;
                int new_ifindex;

                net = rtnl_link_get_net_capable(skb, dev_net(dev),
                                                tb, CAP_NET_ADMIN);
                if (IS_ERR(net)) {
                        err = PTR_ERR(net);
                        goto errout;
                }

                if (tb[IFLA_NEW_IFINDEX])
                        new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
                else
                        new_ifindex = 0;

                err = __dev_change_net_namespace(dev, net, pat, new_ifindex);
                put_net(net);
                if (err)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_MAP]) {
                struct rtnl_link_ifmap *u_map;
                struct ifmap k_map;

                if (!ops->ndo_set_config) {
                        err = -EOPNOTSUPP;
                        goto errout;
                }

                if (!netif_device_present(dev)) {
                        err = -ENODEV;
                        goto errout;
                }

                u_map = nla_data(tb[IFLA_MAP]);
                k_map.mem_start = (unsigned long) u_map->mem_start;
                k_map.mem_end = (unsigned long) u_map->mem_end;
                k_map.base_addr = (unsigned short) u_map->base_addr;
                k_map.irq = (unsigned char) u_map->irq;
                k_map.dma = (unsigned char) u_map->dma;
                k_map.port = (unsigned char) u_map->port;

                err = ops->ndo_set_config(dev, &k_map);
                if (err < 0)
                        goto errout;

                status |= DO_SETLINK_NOTIFY;
        }

        if (tb[IFLA_ADDRESS]) {
                struct sockaddr *sa;
                int len;

                len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
                                                  sizeof(*sa));
                sa = kmalloc(len, GFP_KERNEL);
                if (!sa) {
                        err = -ENOMEM;
                        goto errout;
                }
                sa->sa_family = dev->type;
                memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
                       dev->addr_len);
                err = dev_set_mac_address_user(dev, sa, extack);
                kfree(sa);
                if (err)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_MTU]) {
                err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
                if (err < 0)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_GROUP]) {
                dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
                status |= DO_SETLINK_NOTIFY;
        }

        /*
         * Interface selected by interface index but interface
         * name provided implies that a name change has been
         * requested.
         */
        if (ifm->ifi_index > 0 && ifname[0]) {
                err = dev_change_name(dev, ifname);
                if (err < 0)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_IFALIAS]) {
                err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]),
                                    nla_len(tb[IFLA_IFALIAS]));
                if (err < 0)
                        goto errout;
                status |= DO_SETLINK_NOTIFY;
        }

        if (tb[IFLA_BROADCAST]) {
                nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        }

        if (ifm->ifi_flags || ifm->ifi_change) {
                err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
                                       extack);
                if (err < 0)
                        goto errout;
        }

        if (tb[IFLA_MASTER]) {
                err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
                if (err)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_CARRIER]) {
                err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
                if (err)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_TXQLEN]) {
                unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);

                err = dev_change_tx_queue_len(dev, value);
                if (err)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
        }

        if (tb[IFLA_GSO_MAX_SIZE]) {
                u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);

                if (dev->gso_max_size ^ max_size) {
                        netif_set_gso_max_size(dev, max_size);
                        status |= DO_SETLINK_MODIFIED;
                }
        }

        if (tb[IFLA_GSO_MAX_SEGS]) {
                u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);

                if (dev->gso_max_segs ^ max_segs) {
                        netif_set_gso_max_segs(dev, max_segs);
                        status |= DO_SETLINK_MODIFIED;
                }
        }

        if (tb[IFLA_GRO_MAX_SIZE]) {
                u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);

                if (dev->gro_max_size ^ gro_max_size) {
                        netif_set_gro_max_size(dev, gro_max_size);
                        status |= DO_SETLINK_MODIFIED;
                }
        }

        if (tb[IFLA_GSO_IPV4_MAX_SIZE]) {
                u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]);

                if (dev->gso_ipv4_max_size ^ max_size) {
                        netif_set_gso_ipv4_max_size(dev, max_size);
                        status |= DO_SETLINK_MODIFIED;
                }
        }

        if (tb[IFLA_GRO_IPV4_MAX_SIZE]) {
                u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]);

                if (dev->gro_ipv4_max_size ^ gro_max_size) {
                        netif_set_gro_ipv4_max_size(dev, gro_max_size);
                        status |= DO_SETLINK_MODIFIED;
                }
        }

        if (tb[IFLA_OPERSTATE])
                set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));

        if (tb[IFLA_LINKMODE]) {
                unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);

                if (dev->link_mode ^ value)
                        status |= DO_SETLINK_NOTIFY;
                WRITE_ONCE(dev->link_mode, value);
        }

        if (tb[IFLA_VFINFO_LIST]) {
                struct nlattr *vfinfo[IFLA_VF_MAX + 1];
                struct nlattr *attr;
                int rem;

                nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
                        if (nla_type(attr) != IFLA_VF_INFO ||
                            nla_len(attr) < NLA_HDRLEN) {
                                err = -EINVAL;
                                goto errout;
                        }
                        err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX,
                                                          attr,
                                                          ifla_vf_policy,
                                                          NULL);
                        if (err < 0)
                                goto errout;
                        err = do_setvfinfo(dev, vfinfo);
                        if (err < 0)
                                goto errout;
                        status |= DO_SETLINK_NOTIFY;
                }
        }
        err = 0;

        if (tb[IFLA_VF_PORTS]) {
                struct nlattr *port[IFLA_PORT_MAX+1];
                struct nlattr *attr;
                int vf;
                int rem;

                err = -EOPNOTSUPP;
                if (!ops->ndo_set_vf_port)
                        goto errout;

                nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
                        if (nla_type(attr) != IFLA_VF_PORT ||
                            nla_len(attr) < NLA_HDRLEN) {
                                err = -EINVAL;
                                goto errout;
                        }
                        err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX,
                                                          attr,
                                                          ifla_port_policy,
                                                          NULL);
                        if (err < 0)
                                goto errout;
                        if (!port[IFLA_PORT_VF]) {
                                err = -EOPNOTSUPP;
                                goto errout;
                        }
                        vf = nla_get_u32(port[IFLA_PORT_VF]);
                        err = ops->ndo_set_vf_port(dev, vf, port);
                        if (err < 0)
                                goto errout;
                        status |= DO_SETLINK_NOTIFY;
                }
        }
        err = 0;

        if (tb[IFLA_PORT_SELF]) {
                struct nlattr *port[IFLA_PORT_MAX+1];

                err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX,
                                                  tb[IFLA_PORT_SELF],
                                                  ifla_port_policy, NULL);
                if (err < 0)
                        goto errout;

                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_port)
                        err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
                if (err < 0)
                        goto errout;
                status |= DO_SETLINK_NOTIFY;
        }

        if (tb[IFLA_AF_SPEC]) {
                struct nlattr *af;
                int rem;

                nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
                        const struct rtnl_af_ops *af_ops;

                        BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));

                        err = af_ops->set_link_af(dev, af, extack);
                        if (err < 0)
                                goto errout;

                        status |= DO_SETLINK_NOTIFY;
                }
        }
        err = 0;

        if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) {
                err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN],
                                        tb[IFLA_PROTO_DOWN_REASON], extack);
                if (err)
                        goto errout;
                status |= DO_SETLINK_NOTIFY;
        }

        if (tb[IFLA_XDP]) {
                struct nlattr *xdp[IFLA_XDP_MAX + 1];
                u32 xdp_flags = 0;

                err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX,
                                                  tb[IFLA_XDP],
                                                  ifla_xdp_policy, NULL);
                if (err < 0)
                        goto errout;

                if (xdp[IFLA_XDP_ATTACHED] || xdp[IFLA_XDP_PROG_ID]) {
                        err = -EINVAL;
                        goto errout;
                }

                if (xdp[IFLA_XDP_FLAGS]) {
                        xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
                        if (xdp_flags & ~XDP_FLAGS_MASK) {
                                err = -EINVAL;
                                goto errout;
                        }
                        if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1) {
                                err = -EINVAL;
                                goto errout;
                        }
                }

                if (xdp[IFLA_XDP_FD]) {
                        int expected_fd = -1;

                        if (xdp_flags & XDP_FLAGS_REPLACE) {
                                if (!xdp[IFLA_XDP_EXPECTED_FD]) {
                                        err = -EINVAL;
                                        goto errout;
                                }
                                expected_fd =
                                        nla_get_s32(xdp[IFLA_XDP_EXPECTED_FD]);
                        }

                        err = dev_change_xdp_fd(dev, extack,
                                                nla_get_s32(xdp[IFLA_XDP_FD]),
                                                expected_fd,
                                                xdp_flags);
                        if (err)
                                goto errout;
                        status |= DO_SETLINK_NOTIFY;
                }
        }

errout:
        if (status & DO_SETLINK_MODIFIED) {
                if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
                        netdev_state_change(dev);

                if (err < 0)
                        net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
                                             dev->name);
        }

        return err;
}

static struct net_device *rtnl_dev_get(struct net *net,
                                       struct nlattr *tb[])
{
        char ifname[ALTIFNAMSIZ];

        if (tb[IFLA_IFNAME])
                nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
        else if (tb[IFLA_ALT_IFNAME])
                nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ);
        else
                return NULL;

        return __dev_get_by_name(net, ifname);
}

static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ifinfomsg *ifm;
        struct net_device *dev;
        int err;
        struct nlattr *tb[IFLA_MAX+1];

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
                                     ifla_policy, extack);
        if (err < 0)
                goto errout;

        err = rtnl_ensure_unique_netns(tb, extack, false);
        if (err < 0)
                goto errout;

        err = -EINVAL;
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
        else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
                dev = rtnl_dev_get(net, tb);
        else
                goto errout;

        if (dev == NULL) {
                err = -ENODEV;
                goto errout;
        }

        err = validate_linkmsg(dev, tb, extack);
        if (err < 0)
                goto errout;

        err = do_setlink(skb, dev, ifm, extack, tb, 0);
errout:
        return err;
}

static int rtnl_group_dellink(const struct net *net, int group)
{
        struct net_device *dev, *aux;
        LIST_HEAD(list_kill);
        bool found = false;

        if (!group)
                return -EPERM;

        for_each_netdev(net, dev) {
                if (dev->group == group) {
                        const struct rtnl_link_ops *ops;

                        found = true;
                        ops = dev->rtnl_link_ops;
                        if (!ops || !ops->dellink)
                                return -EOPNOTSUPP;
                }
        }

        if (!found)
                return -ENODEV;

        for_each_netdev_safe(net, dev, aux) {
                if (dev->group == group) {
                        const struct rtnl_link_ops *ops;

                        ops = dev->rtnl_link_ops;
                        ops->dellink(dev, &list_kill);
                }
        }
        unregister_netdevice_many(&list_kill);

        return 0;
}

int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh)
{
        const struct rtnl_link_ops *ops;
        LIST_HEAD(list_kill);

        ops = dev->rtnl_link_ops;
        if (!ops || !ops->dellink)
                return -EOPNOTSUPP;

        ops->dellink(dev, &list_kill);
        unregister_netdevice_many_notify(&list_kill, portid, nlh);

        return 0;
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);

static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        u32 portid = NETLINK_CB(skb).portid;
        struct net *tgt_net = net;
        struct net_device *dev = NULL;
        struct ifinfomsg *ifm;
        struct nlattr *tb[IFLA_MAX+1];
        int err;
        int netnsid = -1;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
                                     ifla_policy, extack);
        if (err < 0)
                return err;

        err = rtnl_ensure_unique_netns(tb, extack, true);
        if (err < 0)
                return err;

        if (tb[IFLA_TARGET_NETNSID]) {
                netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
                tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
                if (IS_ERR(tgt_net))
                        return PTR_ERR(tgt_net);
        }

        err = -EINVAL;
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
        else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
                dev = rtnl_dev_get(net, tb);
        else if (tb[IFLA_GROUP])
                err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
        else
                goto out;

        if (!dev) {
                if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0)
                        err = -ENODEV;

                goto out;
        }

        err = rtnl_delete_link(dev, portid, nlh);

out:
        if (netnsid >= 0)
                put_net(tgt_net);

        return err;
}

int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
                        u32 portid, const struct nlmsghdr *nlh)
{
        unsigned int old_flags;
        int err;

        old_flags = dev->flags;
        if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
                err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
                                         NULL);
                if (err < 0)
                        return err;
        }

        if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
                __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags), portid, nlh);
        } else {
                dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
                __dev_notify_flags(dev, old_flags, ~0U, portid, nlh);
        }
        return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);

struct net_device *rtnl_create_link(struct net *net, const char *ifname,
                                    unsigned char name_assign_type,
                                    const struct rtnl_link_ops *ops,
                                    struct nlattr *tb[],
                                    struct netlink_ext_ack *extack)
{
        struct net_device *dev;
        unsigned int num_tx_queues = 1;
        unsigned int num_rx_queues = 1;
        int err;

        if (tb[IFLA_NUM_TX_QUEUES])
                num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
        else if (ops->get_num_tx_queues)
                num_tx_queues = ops->get_num_tx_queues();

        if (tb[IFLA_NUM_RX_QUEUES])
                num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]);
        else if (ops->get_num_rx_queues)
                num_rx_queues = ops->get_num_rx_queues();

        if (num_tx_queues < 1 || num_tx_queues > 4096) {
                NL_SET_ERR_MSG(extack, "Invalid number of transmit queues");
                return ERR_PTR(-EINVAL);
        }

        if (num_rx_queues < 1 || num_rx_queues > 4096) {
                NL_SET_ERR_MSG(extack, "Invalid number of receive queues");
                return ERR_PTR(-EINVAL);
        }

        if (ops->alloc) {
                dev = ops->alloc(tb, ifname, name_assign_type,
                                 num_tx_queues, num_rx_queues);
                if (IS_ERR(dev))
                        return dev;
        } else {
                dev = alloc_netdev_mqs(ops->priv_size, ifname,
                                       name_assign_type, ops->setup,
                                       num_tx_queues, num_rx_queues);
        }

        if (!dev)
                return ERR_PTR(-ENOMEM);

        err = validate_linkmsg(dev, tb, extack);
        if (err < 0) {
                free_netdev(dev);
                return ERR_PTR(err);
        }

        dev_net_set(dev, net);
        dev->rtnl_link_ops = ops;
        dev->rtnl_link_state = RTNL_LINK_INITIALIZING;

        if (tb[IFLA_MTU]) {
                u32 mtu = nla_get_u32(tb[IFLA_MTU]);

                err = dev_validate_mtu(dev, mtu, extack);
                if (err) {
                        free_netdev(dev);
                        return ERR_PTR(err);
                }
                dev->mtu = mtu;
        }
        if (tb[IFLA_ADDRESS]) {
                __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]),
                               nla_len(tb[IFLA_ADDRESS]));
                dev->addr_assign_type = NET_ADDR_SET;
        }
        if (tb[IFLA_BROADCAST])
                memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
                                nla_len(tb[IFLA_BROADCAST]));
        if (tb[IFLA_TXQLEN])
                dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
        if (tb[IFLA_OPERSTATE])
                set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
        if (tb[IFLA_LINKMODE])
                dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
        if (tb[IFLA_GROUP])
                dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
        if (tb[IFLA_GSO_MAX_SIZE])
                netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
        if (tb[IFLA_GSO_MAX_SEGS])
                netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
        if (tb[IFLA_GRO_MAX_SIZE])
                netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
        if (tb[IFLA_GSO_IPV4_MAX_SIZE])
                netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]));
        if (tb[IFLA_GRO_IPV4_MAX_SIZE])
                netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]));

        return dev;
}
EXPORT_SYMBOL(rtnl_create_link);

static int rtnl_group_changelink(const struct sk_buff *skb,
                struct net *net, int group,
                struct ifinfomsg *ifm,
                struct netlink_ext_ack *extack,
                struct nlattr **tb)
{
        struct net_device *dev, *aux;
        int err;

        for_each_netdev_safe(net, dev, aux) {
                if (dev->group == group) {
                        err = validate_linkmsg(dev, tb, extack);
                        if (err < 0)
                                return err;
                        err = do_setlink(skb, dev, ifm, extack, tb, 0);
                        if (err < 0)
                                return err;
                }
        }

        return 0;
}

static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
                               const struct rtnl_link_ops *ops,
                               const struct nlmsghdr *nlh,
                               struct nlattr **tb, struct nlattr **data,
                               struct netlink_ext_ack *extack)
{
        unsigned char name_assign_type = NET_NAME_USER;
        struct net *net = sock_net(skb->sk);
        u32 portid = NETLINK_CB(skb).portid;
        struct net *dest_net, *link_net;
        struct net_device *dev;
        char ifname[IFNAMSIZ];
        int err;

        if (!ops->alloc && !ops->setup)
                return -EOPNOTSUPP;

        if (tb[IFLA_IFNAME]) {
                nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
        } else {
                snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
                name_assign_type = NET_NAME_ENUM;
        }

        dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
        if (IS_ERR(dest_net))
                return PTR_ERR(dest_net);

        if (tb[IFLA_LINK_NETNSID]) {
                int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);

                link_net = get_net_ns_by_id(dest_net, id);
                if (!link_net) {
                        NL_SET_ERR_MSG(extack, "Unknown network namespace id");
                        err =  -EINVAL;
                        goto out;
                }
                err = -EPERM;
                if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
                        goto out;
        } else {
                link_net = NULL;
        }

        dev = rtnl_create_link(link_net ? : dest_net, ifname,
                               name_assign_type, ops, tb, extack);
        if (IS_ERR(dev)) {
                err = PTR_ERR(dev);
                goto out;
        }

        dev->ifindex = ifm->ifi_index;

        if (ops->newlink)
                err = ops->newlink(link_net ? : net, dev, tb, data, extack);
        else
                err = register_netdevice(dev);
        if (err < 0) {
                free_netdev(dev);
                goto out;
        }

        err = rtnl_configure_link(dev, ifm, portid, nlh);
        if (err < 0)
                goto out_unregister;
        if (link_net) {
                err = dev_change_net_namespace(dev, dest_net, ifname);
                if (err < 0)
                        goto out_unregister;
        }
        if (tb[IFLA_MASTER]) {
                err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
                if (err)
                        goto out_unregister;
        }
out:
        if (link_net)
                put_net(link_net);
        put_net(dest_net);
        return err;
out_unregister:
        if (ops->newlink) {
                LIST_HEAD(list_kill);

                ops->dellink(dev, &list_kill);
                unregister_netdevice_many(&list_kill);
        } else {
                unregister_netdevice(dev);
        }
        goto out;
}

struct rtnl_newlink_tbs {
        struct nlattr *tb[IFLA_MAX + 1];
        struct nlattr *attr[RTNL_MAX_TYPE + 1];
        struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
};

static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct rtnl_newlink_tbs *tbs,
                          struct netlink_ext_ack *extack)
{
        struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
        struct nlattr ** const tb = tbs->tb;
        const struct rtnl_link_ops *m_ops;
        struct net_device *master_dev;
        struct net *net = sock_net(skb->sk);
        const struct rtnl_link_ops *ops;
        struct nlattr **slave_data;
        char kind[MODULE_NAME_LEN];
        struct net_device *dev;
        struct ifinfomsg *ifm;
        struct nlattr **data;
        bool link_specified;
        int err;

#ifdef CONFIG_MODULES
replay:
#endif
        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
                                     ifla_policy, extack);
        if (err < 0)
                return err;

        err = rtnl_ensure_unique_netns(tb, extack, false);
        if (err < 0)
                return err;

        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0) {
                link_specified = true;
                dev = __dev_get_by_index(net, ifm->ifi_index);
        } else if (ifm->ifi_index < 0) {
                NL_SET_ERR_MSG(extack, "ifindex can't be negative");
                return -EINVAL;
        } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
                link_specified = true;
                dev = rtnl_dev_get(net, tb);
        } else {
                link_specified = false;
                dev = NULL;
        }

        master_dev = NULL;
        m_ops = NULL;
        if (dev) {
                master_dev = netdev_master_upper_dev_get(dev);
                if (master_dev)
                        m_ops = master_dev->rtnl_link_ops;
        }

        if (tb[IFLA_LINKINFO]) {
                err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
                                                  tb[IFLA_LINKINFO],
                                                  ifla_info_policy, NULL);
                if (err < 0)
                        return err;
        } else
                memset(linkinfo, 0, sizeof(linkinfo));

        if (linkinfo[IFLA_INFO_KIND]) {
                nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
                ops = rtnl_link_ops_get(kind);
        } else {
                kind[0] = '\0';
                ops = NULL;
        }

        data = NULL;
        if (ops) {
                if (ops->maxtype > RTNL_MAX_TYPE)
                        return -EINVAL;

                if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
                        err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
                                                          linkinfo[IFLA_INFO_DATA],
                                                          ops->policy, extack);
                        if (err < 0)
                                return err;
                        data = tbs->attr;
                }
                if (ops->validate) {
                        err = ops->validate(tb, data, extack);
                        if (err < 0)
                                return err;
                }
        }

        slave_data = NULL;
        if (m_ops) {
                if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
                        return -EINVAL;

                if (m_ops->slave_maxtype &&
                    linkinfo[IFLA_INFO_SLAVE_DATA]) {
                        err = nla_parse_nested_deprecated(tbs->slave_attr,
                                                          m_ops->slave_maxtype,
                                                          linkinfo[IFLA_INFO_SLAVE_DATA],
                                                          m_ops->slave_policy,
                                                          extack);
                        if (err < 0)
                                return err;
                        slave_data = tbs->slave_attr;
                }
        }

        if (dev) {
                int status = 0;

                if (nlh->nlmsg_flags & NLM_F_EXCL)
                        return -EEXIST;
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;

                err = validate_linkmsg(dev, tb, extack);
                if (err < 0)
                        return err;

                if (linkinfo[IFLA_INFO_DATA]) {
                        if (!ops || ops != dev->rtnl_link_ops ||
                            !ops->changelink)
                                return -EOPNOTSUPP;

                        err = ops->changelink(dev, tb, data, extack);
                        if (err < 0)
                                return err;
                        status |= DO_SETLINK_NOTIFY;
                }

                if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
                        if (!m_ops || !m_ops->slave_changelink)
                                return -EOPNOTSUPP;

                        err = m_ops->slave_changelink(master_dev, dev, tb,
                                                      slave_data, extack);
                        if (err < 0)
                                return err;
                        status |= DO_SETLINK_NOTIFY;
                }

                return do_setlink(skb, dev, ifm, extack, tb, status);
        }

        if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
                /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
                 * or it's for a group
                */
                if (link_specified)
                        return -ENODEV;
                if (tb[IFLA_GROUP])
                        return rtnl_group_changelink(skb, net,
                                                nla_get_u32(tb[IFLA_GROUP]),
                                                ifm, extack, tb);
                return -ENODEV;
        }

        if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
                return -EOPNOTSUPP;

        if (!ops) {
#ifdef CONFIG_MODULES
                if (kind[0]) {
                        __rtnl_unlock();
                        request_module("rtnl-link-%s", kind);
                        rtnl_lock();
                        ops = rtnl_link_ops_get(kind);
                        if (ops)
                                goto replay;
                }
#endif
                NL_SET_ERR_MSG(extack, "Unknown device type");
                return -EOPNOTSUPP;
        }

        return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack);
}

static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct rtnl_newlink_tbs *tbs;
        int ret;

        tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
        if (!tbs)
                return -ENOMEM;

        ret = __rtnl_newlink(skb, nlh, tbs, extack);
        kfree(tbs);
        return ret;
}

static int rtnl_valid_getlink_req(struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
                                  struct nlattr **tb,
                                  struct netlink_ext_ack *extack)
{
        struct ifinfomsg *ifm;
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for get link");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
                                              ifla_policy, extack);

        ifm = nlmsg_data(nlh);
        if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
            ifm->ifi_change) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for get link request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX,
                                            ifla_policy, extack);
        if (err)
                return err;

        for (i = 0; i <= IFLA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case IFLA_IFNAME:
                case IFLA_ALT_IFNAME:
                case IFLA_EXT_MASK:
                case IFLA_TARGET_NETNSID:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct net *tgt_net = net;
        struct ifinfomsg *ifm;
        struct nlattr *tb[IFLA_MAX+1];
        struct net_device *dev = NULL;
        struct sk_buff *nskb;
        int netnsid = -1;
        int err;
        u32 ext_filter_mask = 0;

        err = rtnl_valid_getlink_req(skb, nlh, tb, extack);
        if (err < 0)
                return err;

        err = rtnl_ensure_unique_netns(tb, extack, true);
        if (err < 0)
                return err;

        if (tb[IFLA_TARGET_NETNSID]) {
                netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
                tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
                if (IS_ERR(tgt_net))
                        return PTR_ERR(tgt_net);
        }

        if (tb[IFLA_EXT_MASK])
                ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);

        err = -EINVAL;
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
        else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
                dev = rtnl_dev_get(tgt_net, tb);
        else
                goto out;

        err = -ENODEV;
        if (dev == NULL)
                goto out;

        err = -ENOBUFS;
        nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
        if (nskb == NULL)
                goto out;

        /* Synchronize the carrier state so we don't report a state
         * that we're not actually going to honour immediately; if
         * the driver just did a carrier off->on transition, we can
         * only TX if link watch work has run, but without this we'd
         * already report carrier on, even if it doesn't work yet.
         */
        linkwatch_sync_dev(dev);

        err = rtnl_fill_ifinfo(nskb, dev, net,
                               RTM_NEWLINK, NETLINK_CB(skb).portid,
                               nlh->nlmsg_seq, 0, 0, ext_filter_mask,
                               0, NULL, 0, netnsid, GFP_KERNEL);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(nskb);
        } else
                err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
out:
        if (netnsid >= 0)
                put_net(tgt_net);

        return err;
}

static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
                           bool *changed, struct netlink_ext_ack *extack)
{
        char *alt_ifname;
        size_t size;
        int err;

        err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
        if (err)
                return err;

        if (cmd == RTM_NEWLINKPROP) {
                size = rtnl_prop_list_size(dev);
                size += nla_total_size(ALTIFNAMSIZ);
                if (size >= U16_MAX) {
                        NL_SET_ERR_MSG(extack,
                                       "effective property list too long");
                        return -EINVAL;
                }
        }

        alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
        if (!alt_ifname)
                return -ENOMEM;

        if (cmd == RTM_NEWLINKPROP) {
                err = netdev_name_node_alt_create(dev, alt_ifname);
                if (!err)
                        alt_ifname = NULL;
        } else if (cmd == RTM_DELLINKPROP) {
                err = netdev_name_node_alt_destroy(dev, alt_ifname);
        } else {
                WARN_ON_ONCE(1);
                err = -EINVAL;
        }

        kfree(alt_ifname);
        if (!err)
                *changed = true;
        return err;
}

static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
                         struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct nlattr *tb[IFLA_MAX + 1];
        struct net_device *dev;
        struct ifinfomsg *ifm;
        bool changed = false;
        struct nlattr *attr;
        int err, rem;

        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
        if (err)
                return err;

        err = rtnl_ensure_unique_netns(tb, extack, true);
        if (err)
                return err;

        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
        else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
                dev = rtnl_dev_get(net, tb);
        else
                return -EINVAL;

        if (!dev)
                return -ENODEV;

        if (!tb[IFLA_PROP_LIST])
                return 0;

        nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) {
                switch (nla_type(attr)) {
                case IFLA_ALT_IFNAME:
                        err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack);
                        if (err)
                                return err;
                        break;
                }
        }

        if (changed)
                netdev_state_change(dev);
        return 0;
}

static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
                            struct netlink_ext_ack *extack)
{
        return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack);
}

static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
                            struct netlink_ext_ack *extack)
{
        return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
}

static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
        struct net *net = sock_net(skb->sk);
        size_t min_ifinfo_dump_size = 0;
        struct nlattr *tb[IFLA_MAX+1];
        u32 ext_filter_mask = 0;
        struct net_device *dev;
        int hdrlen;

        /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
        hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
                 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);

        if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
                if (tb[IFLA_EXT_MASK])
                        ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
        }

        if (!ext_filter_mask)
                return NLMSG_GOODSIZE;
        /*
         * traverse the list of net devices and compute the minimum
         * buffer size based upon the filter mask.
         */
        rcu_read_lock();
        for_each_netdev_rcu(net, dev) {
                min_ifinfo_dump_size = max(min_ifinfo_dump_size,
                                           if_nlmsg_size(dev, ext_filter_mask));
        }
        rcu_read_unlock();

        return nlmsg_total_size(min_ifinfo_dump_size);
}

static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
        int idx;
        int s_idx = cb->family;
        int type = cb->nlh->nlmsg_type - RTM_BASE;
        int ret = 0;

        if (s_idx == 0)
                s_idx = 1;

        for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
                struct rtnl_link __rcu **tab;
                struct rtnl_link *link;
                rtnl_dumpit_func dumpit;

                if (idx < s_idx || idx == PF_PACKET)
                        continue;

                if (type < 0 || type >= RTM_NR_MSGTYPES)
                        continue;

                tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]);
                if (!tab)
                        continue;

                link = rcu_dereference_rtnl(tab[type]);
                if (!link)
                        continue;

                dumpit = link->dumpit;
                if (!dumpit)
                        continue;

                if (idx > s_idx) {
                        memset(&cb->args[0], 0, sizeof(cb->args));
                        cb->prev_seq = 0;
                        cb->seq = 0;
                }
                ret = dumpit(skb, cb);
                if (ret)
                        break;
        }
        cb->family = idx;

        return skb->len ? : ret;
}

struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
                                       unsigned int change,
                                       u32 event, gfp_t flags, int *new_nsid,
                                       int new_ifindex, u32 portid,
                                       const struct nlmsghdr *nlh)
{
        struct net *net = dev_net(dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;
        u32 seq = 0;

        skb = nlmsg_new(if_nlmsg_size(dev, 0), flags);
        if (skb == NULL)
                goto errout;

        if (nlmsg_report(nlh))
                seq = nlmsg_seq(nlh);
        else
                portid = 0;

        err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
                               type, portid, seq, change, 0, 0, event,
                               new_nsid, new_ifindex, -1, flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        return skb;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_LINK, err);
        return NULL;
}

void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags,
                       u32 portid, const struct nlmsghdr *nlh)
{
        struct net *net = dev_net(dev);

        rtnl_notify(skb, net, portid, RTNLGRP_LINK, nlh, flags);
}

static void rtmsg_ifinfo_event(int type, struct net_device *dev,
                               unsigned int change, u32 event,
                               gfp_t flags, int *new_nsid, int new_ifindex,
                               u32 portid, const struct nlmsghdr *nlh)
{
        struct sk_buff *skb;

        if (dev->reg_state != NETREG_REGISTERED)
                return;

        skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
                                     new_ifindex, portid, nlh);
        if (skb)
                rtmsg_ifinfo_send(skb, dev, flags, portid, nlh);
}

void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
                  gfp_t flags, u32 portid, const struct nlmsghdr *nlh)
{
        rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
                           NULL, 0, portid, nlh);
}

void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
                         gfp_t flags, int *new_nsid, int new_ifindex)
{
        rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
                           new_nsid, new_ifindex, 0, NULL);
}

static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
                                   struct net_device *dev,
                                   u8 *addr, u16 vid, u32 pid, u32 seq,
                                   int type, unsigned int flags,
                                   int nlflags, u16 ndm_state)
{
        struct nlmsghdr *nlh;
        struct ndmsg *ndm;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags);
        if (!nlh)
                return -EMSGSIZE;

        ndm = nlmsg_data(nlh);
        ndm->ndm_family  = AF_BRIDGE;
        ndm->ndm_pad1         = 0;
        ndm->ndm_pad2    = 0;
        ndm->ndm_flags         = flags;
        ndm->ndm_type         = 0;
        ndm->ndm_ifindex = dev->ifindex;
        ndm->ndm_state   = ndm_state;

        if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr))
                goto nla_put_failure;
        if (vid)
                if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
                        goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev)
{
        return NLMSG_ALIGN(sizeof(struct ndmsg)) +
               nla_total_size(dev->addr_len) +        /* NDA_LLADDR */
               nla_total_size(sizeof(u16)) +        /* NDA_VLAN */
               0;
}

static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
                            u16 ndm_state)
{
        struct net *net = dev_net(dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC);
        if (!skb)
                goto errout;

        err = nlmsg_populate_fdb_fill(skb, dev, addr, vid,
                                      0, 0, type, NTF_SELF, 0, ndm_state);
        if (err < 0) {
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
        return;
errout:
        rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}

/*
 * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
 */
int ndo_dflt_fdb_add(struct ndmsg *ndm,
                     struct nlattr *tb[],
                     struct net_device *dev,
                     const unsigned char *addr, u16 vid,
                     u16 flags)
{
        int err = -EINVAL;

        /* If aging addresses are supported device will need to
         * implement its own handler for this.
         */
        if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
                netdev_info(dev, "default FDB implementation only supports local addresses\n");
                return err;
        }

        if (tb[NDA_FLAGS_EXT]) {
                netdev_info(dev, "invalid flags given to default FDB implementation\n");
                return err;
        }

        if (vid) {
                netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n");
                return err;
        }

        if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
                err = dev_uc_add_excl(dev, addr);
        else if (is_multicast_ether_addr(addr))
                err = dev_mc_add_excl(dev, addr);

        /* Only return duplicate errors if NLM_F_EXCL is set */
        if (err == -EEXIST && !(flags & NLM_F_EXCL))
                err = 0;

        return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);

static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
                         struct netlink_ext_ack *extack)
{
        u16 vid = 0;

        if (vlan_attr) {
                if (nla_len(vlan_attr) != sizeof(u16)) {
                        NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
                        return -EINVAL;
                }

                vid = nla_get_u16(vlan_attr);

                if (!vid || vid >= VLAN_VID_MASK) {
                        NL_SET_ERR_MSG(extack, "invalid vlan id");
                        return -EINVAL;
                }
        }
        *p_vid = vid;
        return 0;
}

static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ndmsg *ndm;
        struct nlattr *tb[NDA_MAX+1];
        struct net_device *dev;
        u8 *addr;
        u16 vid;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
                                     extack);
        if (err < 0)
                return err;

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_ifindex == 0) {
                NL_SET_ERR_MSG(extack, "invalid ifindex");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, ndm->ndm_ifindex);
        if (dev == NULL) {
                NL_SET_ERR_MSG(extack, "unknown ifindex");
                return -ENODEV;
        }

        if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
                NL_SET_ERR_MSG(extack, "invalid address");
                return -EINVAL;
        }

        if (dev->type != ARPHRD_ETHER) {
                NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
                return -EINVAL;
        }

        addr = nla_data(tb[NDA_LLADDR]);

        err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
        if (err)
                return err;

        err = -EOPNOTSUPP;

        /* Support fdb on master device the net/bridge default case */
        if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
            netif_is_bridge_port(dev)) {
                struct net_device *br_dev = netdev_master_upper_dev_get(dev);
                const struct net_device_ops *ops = br_dev->netdev_ops;

                err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
                                       nlh->nlmsg_flags, extack);
                if (err)
                        goto out;
                else
                        ndm->ndm_flags &= ~NTF_MASTER;
        }

        /* Embedded bridge, macvlan, and any other device support */
        if ((ndm->ndm_flags & NTF_SELF)) {
                if (dev->netdev_ops->ndo_fdb_add)
                        err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
                                                           vid,
                                                           nlh->nlmsg_flags,
                                                           extack);
                else
                        err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
                                               nlh->nlmsg_flags);

                if (!err) {
                        rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH,
                                        ndm->ndm_state);
                        ndm->ndm_flags &= ~NTF_SELF;
                }
        }
out:
        return err;
}

/*
 * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
 */
int ndo_dflt_fdb_del(struct ndmsg *ndm,
                     struct nlattr *tb[],
                     struct net_device *dev,
                     const unsigned char *addr, u16 vid)
{
        int err = -EINVAL;

        /* If aging addresses are supported device will need to
         * implement its own handler for this.
         */
        if (!(ndm->ndm_state & NUD_PERMANENT)) {
                netdev_info(dev, "default FDB implementation only supports local addresses\n");
                return err;
        }

        if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
                err = dev_uc_del(dev, addr);
        else if (is_multicast_ether_addr(addr))
                err = dev_mc_del(dev, addr);

        return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);

static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
        struct net *net = sock_net(skb->sk);
        const struct net_device_ops *ops;
        struct ndmsg *ndm;
        struct nlattr *tb[NDA_MAX+1];
        struct net_device *dev;
        __u8 *addr = NULL;
        int err;
        u16 vid;

        if (!netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;

        if (!del_bulk) {
                err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
                                             NULL, extack);
        } else {
                /* For bulk delete, the drivers will parse the message with
                 * policy.
                 */
                err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
        }
        if (err < 0)
                return err;

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_ifindex == 0) {
                NL_SET_ERR_MSG(extack, "invalid ifindex");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, ndm->ndm_ifindex);
        if (dev == NULL) {
                NL_SET_ERR_MSG(extack, "unknown ifindex");
                return -ENODEV;
        }

        if (!del_bulk) {
                if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
                        NL_SET_ERR_MSG(extack, "invalid address");
                        return -EINVAL;
                }
                addr = nla_data(tb[NDA_LLADDR]);

                err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
                if (err)
                        return err;
        }

        if (dev->type != ARPHRD_ETHER) {
                NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
                return -EINVAL;
        }

        err = -EOPNOTSUPP;

        /* Support fdb on master device the net/bridge default case */
        if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
            netif_is_bridge_port(dev)) {
                struct net_device *br_dev = netdev_master_upper_dev_get(dev);

                ops = br_dev->netdev_ops;
                if (!del_bulk) {
                        if (ops->ndo_fdb_del)
                                err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
                } else {
                        if (ops->ndo_fdb_del_bulk)
                                err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
                }

                if (err)
                        goto out;
                else
                        ndm->ndm_flags &= ~NTF_MASTER;
        }

        /* Embedded bridge, macvlan, and any other device support */
        if (ndm->ndm_flags & NTF_SELF) {
                ops = dev->netdev_ops;
                if (!del_bulk) {
                        if (ops->ndo_fdb_del)
                                err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
                        else
                                err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
                } else {
                        /* in case err was cleared by NTF_MASTER call */
                        err = -EOPNOTSUPP;
                        if (ops->ndo_fdb_del_bulk)
                                err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
                }

                if (!err) {
                        if (!del_bulk)
                                rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
                                                ndm->ndm_state);
                        ndm->ndm_flags &= ~NTF_SELF;
                }
        }
out:
        return err;
}

static int nlmsg_populate_fdb(struct sk_buff *skb,
                              struct netlink_callback *cb,
                              struct net_device *dev,
                              int *idx,
                              struct netdev_hw_addr_list *list)
{
        struct netdev_hw_addr *ha;
        int err;
        u32 portid, seq;

        portid = NETLINK_CB(cb->skb).portid;
        seq = cb->nlh->nlmsg_seq;

        list_for_each_entry(ha, &list->list, list) {
                if (*idx < cb->args[2])
                        goto skip;

                err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
                                              portid, seq,
                                              RTM_NEWNEIGH, NTF_SELF,
                                              NLM_F_MULTI, NUD_PERMANENT);
                if (err < 0)
                        return err;
skip:
                *idx += 1;
        }
        return 0;
}

/**
 * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table.
 * @skb: socket buffer to store message in
 * @cb: netlink callback
 * @dev: netdevice
 * @filter_dev: ignored
 * @idx: the number of FDB table entries dumped is added to *@idx
 *
 * Default netdevice operation to dump the existing unicast address list.
 * Returns number of addresses from list put in skb.
 */
int ndo_dflt_fdb_dump(struct sk_buff *skb,
                      struct netlink_callback *cb,
                      struct net_device *dev,
                      struct net_device *filter_dev,
                      int *idx)
{
        int err;

        if (dev->type != ARPHRD_ETHER)
                return -EINVAL;

        netif_addr_lock_bh(dev);
        err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
        if (err)
                goto out;
        err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
out:
        netif_addr_unlock_bh(dev);
        return err;
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);

static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
                                 int *br_idx, int *brport_idx,
                                 struct netlink_ext_ack *extack)
{
        struct nlattr *tb[NDA_MAX + 1];
        struct ndmsg *ndm;
        int err, i;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request");
                return -EINVAL;
        }

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
            ndm->ndm_flags || ndm->ndm_type) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
                                            NDA_MAX, NULL, extack);
        if (err < 0)
                return err;

        *brport_idx = ndm->ndm_ifindex;
        for (i = 0; i <= NDA_MAX; ++i) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NDA_IFINDEX:
                        if (nla_len(tb[i]) != sizeof(u32)) {
                                NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in fdb dump request");
                                return -EINVAL;
                        }
                        *brport_idx = nla_get_u32(tb[NDA_IFINDEX]);
                        break;
                case NDA_MASTER:
                        if (nla_len(tb[i]) != sizeof(u32)) {
                                NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in fdb dump request");
                                return -EINVAL;
                        }
                        *br_idx = nla_get_u32(tb[NDA_MASTER]);
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb dump request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
                                 int *br_idx, int *brport_idx,
                                 struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFLA_MAX+1];
        int err;

        /* A hack to preserve kernel<->userspace interface.
         * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
         * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
         * So, check for ndmsg with an optional u32 attribute (not used here).
         * Fortunately these sizes don't conflict with the size of ifinfomsg
         * with an optional attribute.
         */
        if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
            (nlmsg_len(nlh) != sizeof(struct ndmsg) +
             nla_attr_size(sizeof(u32)))) {
                struct ifinfomsg *ifm;

                err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg),
                                             tb, IFLA_MAX, ifla_policy,
                                             extack);
                if (err < 0) {
                        return -EINVAL;
                } else if (err == 0) {
                        if (tb[IFLA_MASTER])
                                *br_idx = nla_get_u32(tb[IFLA_MASTER]);
                }

                ifm = nlmsg_data(nlh);
                *brport_idx = ifm->ifi_index;
        }
        return 0;
}

static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct net_device *dev;
        struct net_device *br_dev = NULL;
        const struct net_device_ops *ops = NULL;
        const struct net_device_ops *cops = NULL;
        struct net *net = sock_net(skb->sk);
        struct hlist_head *head;
        int brport_idx = 0;
        int br_idx = 0;
        int h, s_h;
        int idx = 0, s_idx;
        int err = 0;
        int fidx = 0;

        if (cb->strict_check)
                err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
                                            cb->extack);
        else
                err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
                                            cb->extack);
        if (err < 0)
                return err;

        if (br_idx) {
                br_dev = __dev_get_by_index(net, br_idx);
                if (!br_dev)
                        return -ENODEV;

                ops = br_dev->netdev_ops;
        }

        s_h = cb->args[0];
        s_idx = cb->args[1];

        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                idx = 0;
                head = &net->dev_index_head[h];
                hlist_for_each_entry(dev, head, index_hlist) {

                        if (brport_idx && (dev->ifindex != brport_idx))
                                continue;

                        if (!br_idx) { /* user did not specify a specific bridge */
                                if (netif_is_bridge_port(dev)) {
                                        br_dev = netdev_master_upper_dev_get(dev);
                                        cops = br_dev->netdev_ops;
                                }
                        } else {
                                if (dev != br_dev &&
                                    !netif_is_bridge_port(dev))
                                        continue;

                                if (br_dev != netdev_master_upper_dev_get(dev) &&
                                    !netif_is_bridge_master(dev))
                                        continue;
                                cops = ops;
                        }

                        if (idx < s_idx)
                                goto cont;

                        if (netif_is_bridge_port(dev)) {
                                if (cops && cops->ndo_fdb_dump) {
                                        err = cops->ndo_fdb_dump(skb, cb,
                                                                br_dev, dev,
                                                                &fidx);
                                        if (err == -EMSGSIZE)
                                                goto out;
                                }
                        }

                        if (dev->netdev_ops->ndo_fdb_dump)
                                err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
                                                                    dev, NULL,
                                                                    &fidx);
                        else
                                err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
                                                        &fidx);
                        if (err == -EMSGSIZE)
                                goto out;

                        cops = NULL;

                        /* reset fdb offset to 0 for rest of the interfaces */
                        cb->args[2] = 0;
                        fidx = 0;
cont:
                        idx++;
                }
        }

out:
        cb->args[0] = h;
        cb->args[1] = idx;
        cb->args[2] = fidx;

        return skb->len;
}

static int valid_fdb_get_strict(const struct nlmsghdr *nlh,
                                struct nlattr **tb, u8 *ndm_flags,
                                int *br_idx, int *brport_idx, u8 **addr,
                                u16 *vid, struct netlink_ext_ack *extack)
{
        struct ndmsg *ndm;
        int err, i;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for fdb get request");
                return -EINVAL;
        }

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
            ndm->ndm_type) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request");
                return -EINVAL;
        }

        if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) {
                NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
                                            NDA_MAX, nda_policy, extack);
        if (err < 0)
                return err;

        *ndm_flags = ndm->ndm_flags;
        *brport_idx = ndm->ndm_ifindex;
        for (i = 0; i <= NDA_MAX; ++i) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NDA_MASTER:
                        *br_idx = nla_get_u32(tb[i]);
                        break;
                case NDA_LLADDR:
                        if (nla_len(tb[i]) != ETH_ALEN) {
                                NL_SET_ERR_MSG(extack, "Invalid address in fdb get request");
                                return -EINVAL;
                        }
                        *addr = nla_data(tb[i]);
                        break;
                case NDA_VLAN:
                        err = fdb_vid_parse(tb[i], vid, extack);
                        if (err)
                                return err;
                        break;
                case NDA_VNI:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net_device *dev = NULL, *br_dev = NULL;
        const struct net_device_ops *ops = NULL;
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[NDA_MAX + 1];
        struct sk_buff *skb;
        int brport_idx = 0;
        u8 ndm_flags = 0;
        int br_idx = 0;
        u8 *addr = NULL;
        u16 vid = 0;
        int err;

        err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx,
                                   &brport_idx, &addr, &vid, extack);
        if (err < 0)
                return err;

        if (!addr) {
                NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request");
                return -EINVAL;
        }

        if (brport_idx) {
                dev = __dev_get_by_index(net, brport_idx);
                if (!dev) {
                        NL_SET_ERR_MSG(extack, "Unknown device ifindex");
                        return -ENODEV;
                }
        }

        if (br_idx) {
                if (dev) {
                        NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive");
                        return -EINVAL;
                }

                br_dev = __dev_get_by_index(net, br_idx);
                if (!br_dev) {
                        NL_SET_ERR_MSG(extack, "Invalid master ifindex");
                        return -EINVAL;
                }
                ops = br_dev->netdev_ops;
        }

        if (dev) {
                if (!ndm_flags || (ndm_flags & NTF_MASTER)) {
                        if (!netif_is_bridge_port(dev)) {
                                NL_SET_ERR_MSG(extack, "Device is not a bridge port");
                                return -EINVAL;
                        }
                        br_dev = netdev_master_upper_dev_get(dev);
                        if (!br_dev) {
                                NL_SET_ERR_MSG(extack, "Master of device not found");
                                return -EINVAL;
                        }
                        ops = br_dev->netdev_ops;
                } else {
                        if (!(ndm_flags & NTF_SELF)) {
                                NL_SET_ERR_MSG(extack, "Missing NTF_SELF");
                                return -EINVAL;
                        }
                        ops = dev->netdev_ops;
                }
        }

        if (!br_dev && !dev) {
                NL_SET_ERR_MSG(extack, "No device specified");
                return -ENODEV;
        }

        if (!ops || !ops->ndo_fdb_get) {
                NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device");
                return -EOPNOTSUPP;
        }

        skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        if (br_dev)
                dev = br_dev;
        err = ops->ndo_fdb_get(skb, tb, dev, addr, vid,
                               NETLINK_CB(in_skb).portid,
                               nlh->nlmsg_seq, extack);
        if (err)
                goto out;

        return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
out:
        kfree_skb(skb);
        return err;
}

static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
                               unsigned int attrnum, unsigned int flag)
{
        if (mask & flag)
                return nla_put_u8(skb, attrnum, !!(flags & flag));
        return 0;
}

int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                            struct net_device *dev, u16 mode,
                            u32 flags, u32 mask, int nlflags,
                            u32 filter_mask,
                            int (*vlan_fill)(struct sk_buff *skb,
                                             struct net_device *dev,
                                             u32 filter_mask))
{
        struct nlmsghdr *nlh;
        struct ifinfomsg *ifm;
        struct nlattr *br_afspec;
        struct nlattr *protinfo;
        u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
        struct net_device *br_dev = netdev_master_upper_dev_get(dev);
        int err = 0;

        nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
        if (nlh == NULL)
                return -EMSGSIZE;

        ifm = nlmsg_data(nlh);
        ifm->ifi_family = AF_BRIDGE;
        ifm->__ifi_pad = 0;
        ifm->ifi_type = dev->type;
        ifm->ifi_index = dev->ifindex;
        ifm->ifi_flags = dev_get_flags(dev);
        ifm->ifi_change = 0;


        if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
            nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
            nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
            (br_dev &&
             nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
            (dev->addr_len &&
             nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
            (dev->ifindex != dev_get_iflink(dev) &&
             nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
                goto nla_put_failure;

        br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
        if (!br_afspec)
                goto nla_put_failure;

        if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF)) {
                nla_nest_cancel(skb, br_afspec);
                goto nla_put_failure;
        }

        if (mode != BRIDGE_MODE_UNDEF) {
                if (nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
                        nla_nest_cancel(skb, br_afspec);
                        goto nla_put_failure;
                }
        }
        if (vlan_fill) {
                err = vlan_fill(skb, dev, filter_mask);
                if (err) {
                        nla_nest_cancel(skb, br_afspec);
                        goto nla_put_failure;
                }
        }
        nla_nest_end(skb, br_afspec);

        protinfo = nla_nest_start(skb, IFLA_PROTINFO);
        if (!protinfo)
                goto nla_put_failure;

        if (brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_MODE, BR_HAIRPIN_MODE) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_GUARD, BR_BPDU_GUARD) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_FAST_LEAVE,
                                BR_MULTICAST_FAST_LEAVE) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_LEARNING, BR_LEARNING) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_LEARNING_SYNC, BR_LEARNING_SYNC) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_PROXYARP, BR_PROXYARP) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD) ||
            brport_nla_put_flag(skb, flags, mask,
                                IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD)) {
                nla_nest_cancel(skb, protinfo);
                goto nla_put_failure;
        }

        nla_nest_end(skb, protinfo);

        nlmsg_end(skb, nlh);
        return 0;
nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return err ? err : -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);

static int valid_bridge_getlink_req(const struct nlmsghdr *nlh,
                                    bool strict_check, u32 *filter_mask,
                                    struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFLA_MAX+1];
        int err, i;

        if (strict_check) {
                struct ifinfomsg *ifm;

                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                        NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump");
                        return -EINVAL;
                }

                ifm = nlmsg_data(nlh);
                if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
                    ifm->ifi_change || ifm->ifi_index) {
                        NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request");
                        return -EINVAL;
                }

                err = nlmsg_parse_deprecated_strict(nlh,
                                                    sizeof(struct ifinfomsg),
                                                    tb, IFLA_MAX, ifla_policy,
                                                    extack);
        } else {
                err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg),
                                             tb, IFLA_MAX, ifla_policy,
                                             extack);
        }
        if (err < 0)
                return err;

        /* new attributes should only be added with strict checking */
        for (i = 0; i <= IFLA_MAX; ++i) {
                if (!tb[i])
                        continue;

                switch (i) {
                case IFLA_EXT_MASK:
                        *filter_mask = nla_get_u32(tb[i]);
                        break;
                default:
                        if (strict_check) {
                                NL_SET_ERR_MSG(extack, "Unsupported attribute in bridge link dump request");
                                return -EINVAL;
                        }
                }
        }

        return 0;
}

static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        struct net_device *dev;
        int idx = 0;
        u32 portid = NETLINK_CB(cb->skb).portid;
        u32 seq = nlh->nlmsg_seq;
        u32 filter_mask = 0;
        int err;

        err = valid_bridge_getlink_req(nlh, cb->strict_check, &filter_mask,
                                       cb->extack);
        if (err < 0 && cb->strict_check)
                return err;

        rcu_read_lock();
        for_each_netdev_rcu(net, dev) {
                const struct net_device_ops *ops = dev->netdev_ops;
                struct net_device *br_dev = netdev_master_upper_dev_get(dev);

                if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
                        if (idx >= cb->args[0]) {
                                err = br_dev->netdev_ops->ndo_bridge_getlink(
                                                skb, portid, seq, dev,
                                                filter_mask, NLM_F_MULTI);
                                if (err < 0 && err != -EOPNOTSUPP) {
                                        if (likely(skb->len))
                                                break;

                                        goto out_err;
                                }
                        }
                        idx++;
                }

                if (ops->ndo_bridge_getlink) {
                        if (idx >= cb->args[0]) {
                                err = ops->ndo_bridge_getlink(skb, portid,
                                                              seq, dev,
                                                              filter_mask,
                                                              NLM_F_MULTI);
                                if (err < 0 && err != -EOPNOTSUPP) {
                                        if (likely(skb->len))
                                                break;

                                        goto out_err;
                                }
                        }
                        idx++;
                }
        }
        err = skb->len;
out_err:
        rcu_read_unlock();
        cb->args[0] = idx;

        return err;
}

static inline size_t bridge_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct ifinfomsg))
                + nla_total_size(IFNAMSIZ)        /* IFLA_IFNAME */
                + nla_total_size(MAX_ADDR_LEN)        /* IFLA_ADDRESS */
                + nla_total_size(sizeof(u32))        /* IFLA_MASTER */
                + nla_total_size(sizeof(u32))        /* IFLA_MTU */
                + nla_total_size(sizeof(u32))        /* IFLA_LINK */
                + nla_total_size(sizeof(u32))        /* IFLA_OPERSTATE */
                + nla_total_size(sizeof(u8))        /* IFLA_PROTINFO */
                + nla_total_size(sizeof(struct nlattr))        /* IFLA_AF_SPEC */
                + nla_total_size(sizeof(u16))        /* IFLA_BRIDGE_FLAGS */
                + nla_total_size(sizeof(u16));        /* IFLA_BRIDGE_MODE */
}

static int rtnl_bridge_notify(struct net_device *dev)
{
        struct net *net = dev_net(dev);
        struct sk_buff *skb;
        int err = -EOPNOTSUPP;

        if (!dev->netdev_ops->ndo_bridge_getlink)
                return 0;

        skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
        if (!skb) {
                err = -ENOMEM;
                goto errout;
        }

        err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0);
        if (err < 0)
                goto errout;

        /* Notification info is only filled for bridge ports, not the bridge
         * device itself. Therefore, a zero notification length is valid and
         * should not result in an error.
         */
        if (!skb->len)
                goto errout;

        rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
        return 0;
errout:
        WARN_ON(err == -EMSGSIZE);
        kfree_skb(skb);
        if (err)
                rtnl_set_sk_err(net, RTNLGRP_LINK, err);
        return err;
}

static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
                               struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ifinfomsg *ifm;
        struct net_device *dev;
        struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
        int rem, err = -EOPNOTSUPP;
        u16 flags = 0;

        if (nlmsg_len(nlh) < sizeof(*ifm))
                return -EINVAL;

        ifm = nlmsg_data(nlh);
        if (ifm->ifi_family != AF_BRIDGE)
                return -EPFNOSUPPORT;

        dev = __dev_get_by_index(net, ifm->ifi_index);
        if (!dev) {
                NL_SET_ERR_MSG(extack, "unknown ifindex");
                return -ENODEV;
        }

        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
        if (br_spec) {
                nla_for_each_nested(attr, br_spec, rem) {
                        if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
                                if (nla_len(attr) < sizeof(flags))
                                        return -EINVAL;

                                br_flags_attr = attr;
                                flags = nla_get_u16(attr);
                        }

                        if (nla_type(attr) == IFLA_BRIDGE_MODE) {
                                if (nla_len(attr) < sizeof(u16))
                                        return -EINVAL;
                        }
                }
        }

        if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
                struct net_device *br_dev = netdev_master_upper_dev_get(dev);

                if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
                        err = -EOPNOTSUPP;
                        goto out;
                }

                err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags,
                                                             extack);
                if (err)
                        goto out;

                flags &= ~BRIDGE_FLAGS_MASTER;
        }

        if ((flags & BRIDGE_FLAGS_SELF)) {
                if (!dev->netdev_ops->ndo_bridge_setlink)
                        err = -EOPNOTSUPP;
                else
                        err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
                                                                  flags,
                                                                  extack);
                if (!err) {
                        flags &= ~BRIDGE_FLAGS_SELF;

                        /* Generate event to notify upper layer of bridge
                         * change
                         */
                        err = rtnl_bridge_notify(dev);
                }
        }

        if (br_flags_attr)
                memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
        return err;
}

static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
                               struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ifinfomsg *ifm;
        struct net_device *dev;
        struct nlattr *br_spec, *attr = NULL;
        int rem, err = -EOPNOTSUPP;
        u16 flags = 0;
        bool have_flags = false;

        if (nlmsg_len(nlh) < sizeof(*ifm))
                return -EINVAL;

        ifm = nlmsg_data(nlh);
        if (ifm->ifi_family != AF_BRIDGE)
                return -EPFNOSUPPORT;

        dev = __dev_get_by_index(net, ifm->ifi_index);
        if (!dev) {
                NL_SET_ERR_MSG(extack, "unknown ifindex");
                return -ENODEV;
        }

        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
        if (br_spec) {
                nla_for_each_nested(attr, br_spec, rem) {
                        if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
                                if (nla_len(attr) < sizeof(flags))
                                        return -EINVAL;

                                have_flags = true;
                                flags = nla_get_u16(attr);
                                break;
                        }
                }
        }

        if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
                struct net_device *br_dev = netdev_master_upper_dev_get(dev);

                if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
                        err = -EOPNOTSUPP;
                        goto out;
                }

                err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
                if (err)
                        goto out;

                flags &= ~BRIDGE_FLAGS_MASTER;
        }

        if ((flags & BRIDGE_FLAGS_SELF)) {
                if (!dev->netdev_ops->ndo_bridge_dellink)
                        err = -EOPNOTSUPP;
                else
                        err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
                                                                  flags);

                if (!err) {
                        flags &= ~BRIDGE_FLAGS_SELF;

                        /* Generate event to notify upper layer of bridge
                         * change
                         */
                        err = rtnl_bridge_notify(dev);
                }
        }

        if (have_flags)
                memcpy(nla_data(attr), &flags, sizeof(flags));
out:
        return err;
}

static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
{
        return (mask & IFLA_STATS_FILTER_BIT(attrid)) &&
               (!idxattr || idxattr == attrid);
}

static bool
rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id)
{
        return dev->netdev_ops &&
               dev->netdev_ops->ndo_has_offload_stats &&
               dev->netdev_ops->ndo_get_offload_stats &&
               dev->netdev_ops->ndo_has_offload_stats(dev, attr_id);
}

static unsigned int
rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id)
{
        return rtnl_offload_xstats_have_ndo(dev, attr_id) ?
               sizeof(struct rtnl_link_stats64) : 0;
}

static int
rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id,
                             struct sk_buff *skb)
{
        unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id);
        struct nlattr *attr = NULL;
        void *attr_data;
        int err;

        if (!size)
                return -ENODATA;

        attr = nla_reserve_64bit(skb, attr_id, size,
                                 IFLA_OFFLOAD_XSTATS_UNSPEC);
        if (!attr)
                return -EMSGSIZE;

        attr_data = nla_data(attr);
        memset(attr_data, 0, size);

        err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data);
        if (err)
                return err;

        return 0;
}

static unsigned int
rtnl_offload_xstats_get_size_stats(const struct net_device *dev,
                                   enum netdev_offload_xstats_type type)
{
        bool enabled = netdev_offload_xstats_enabled(dev, type);

        return enabled ? sizeof(struct rtnl_hw_stats64) : 0;
}

struct rtnl_offload_xstats_request_used {
        bool request;
        bool used;
};

static int
rtnl_offload_xstats_get_stats(struct net_device *dev,
                              enum netdev_offload_xstats_type type,
                              struct rtnl_offload_xstats_request_used *ru,
                              struct rtnl_hw_stats64 *stats,
                              struct netlink_ext_ack *extack)
{
        bool request;
        bool used;
        int err;

        request = netdev_offload_xstats_enabled(dev, type);
        if (!request) {
                used = false;
                goto out;
        }

        err = netdev_offload_xstats_get(dev, type, stats, &used, extack);
        if (err)
                return err;

out:
        if (ru) {
                ru->request = request;
                ru->used = used;
        }
        return 0;
}

static int
rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id,
                                       struct rtnl_offload_xstats_request_used *ru)
{
        struct nlattr *nest;

        nest = nla_nest_start(skb, attr_id);
        if (!nest)
                return -EMSGSIZE;

        if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request))
                goto nla_put_failure;

        if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used))
                goto nla_put_failure;

        nla_nest_end(skb, nest);
        return 0;

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

static int
rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev,
                                   struct netlink_ext_ack *extack)
{
        enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
        struct rtnl_offload_xstats_request_used ru_l3;
        struct nlattr *nest;
        int err;

        err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack);
        if (err)
                return err;

        nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO);
        if (!nest)
                return -EMSGSIZE;

        if (rtnl_offload_xstats_fill_hw_s_info_one(skb,
                                                   IFLA_OFFLOAD_XSTATS_L3_STATS,
                                                   &ru_l3))
                goto nla_put_failure;

        nla_nest_end(skb, nest);
        return 0;

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
                                    int *prividx, u32 off_filter_mask,
                                    struct netlink_ext_ack *extack)
{
        enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
        int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO;
        int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS;
        int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
        bool have_data = false;
        int err;

        if (*prividx <= attr_id_cpu_hit &&
            (off_filter_mask &
             IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) {
                err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb);
                if (!err) {
                        have_data = true;
                } else if (err != -ENODATA) {
                        *prividx = attr_id_cpu_hit;
                        return err;
                }
        }

        if (*prividx <= attr_id_hw_s_info &&
            (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) {
                *prividx = attr_id_hw_s_info;

                err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack);
                if (err)
                        return err;

                have_data = true;
                *prividx = 0;
        }

        if (*prividx <= attr_id_l3_stats &&
            (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) {
                unsigned int size_l3;
                struct nlattr *attr;

                *prividx = attr_id_l3_stats;

                size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3);
                if (!size_l3)
                        goto skip_l3_stats;
                attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3,
                                         IFLA_OFFLOAD_XSTATS_UNSPEC);
                if (!attr)
                        return -EMSGSIZE;

                err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL,
                                                    nla_data(attr), extack);
                if (err)
                        return err;

                have_data = true;
skip_l3_stats:
                *prividx = 0;
        }

        if (!have_data)
                return -ENODATA;

        *prividx = 0;
        return 0;
}

static unsigned int
rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
                                           enum netdev_offload_xstats_type type)
{
        return nla_total_size(0) +
                /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
                nla_total_size(sizeof(u8)) +
                /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
                nla_total_size(sizeof(u8)) +
                0;
}

static unsigned int
rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev)
{
        enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;

        return nla_total_size(0) +
                /* IFLA_OFFLOAD_XSTATS_L3_STATS */
                rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) +
                0;
}

static int rtnl_offload_xstats_get_size(const struct net_device *dev,
                                        u32 off_filter_mask)
{
        enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
        int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
        int nla_size = 0;
        int size;

        if (off_filter_mask &
            IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) {
                size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit);
                nla_size += nla_total_size_64bit(size);
        }

        if (off_filter_mask &
            IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO))
                nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev);

        if (off_filter_mask &
            IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) {
                size = rtnl_offload_xstats_get_size_stats(dev, t_l3);
                nla_size += nla_total_size_64bit(size);
        }

        if (nla_size != 0)
                nla_size += nla_total_size(0);

        return nla_size;
}

struct rtnl_stats_dump_filters {
        /* mask[0] filters outer attributes. Then individual nests have their
         * filtering mask at the index of the nested attribute.
         */
        u32 mask[IFLA_STATS_MAX + 1];
};

static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
                               int type, u32 pid, u32 seq, u32 change,
                               unsigned int flags,
                               const struct rtnl_stats_dump_filters *filters,
                               int *idxattr, int *prividx,
                               struct netlink_ext_ack *extack)
{
        unsigned int filter_mask = filters->mask[0];
        struct if_stats_msg *ifsm;
        struct nlmsghdr *nlh;
        struct nlattr *attr;
        int s_prividx = *prividx;
        int err;

        ASSERT_RTNL();

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
        if (!nlh)
                return -EMSGSIZE;

        ifsm = nlmsg_data(nlh);
        ifsm->family = PF_UNSPEC;
        ifsm->pad1 = 0;
        ifsm->pad2 = 0;
        ifsm->ifindex = dev->ifindex;
        ifsm->filter_mask = filter_mask;

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) {
                struct rtnl_link_stats64 *sp;

                attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
                                         sizeof(struct rtnl_link_stats64),
                                         IFLA_STATS_UNSPEC);
                if (!attr) {
                        err = -EMSGSIZE;
                        goto nla_put_failure;
                }

                sp = nla_data(attr);
                dev_get_stats(dev, sp);
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) {
                const struct rtnl_link_ops *ops = dev->rtnl_link_ops;

                if (ops && ops->fill_linkxstats) {
                        *idxattr = IFLA_STATS_LINK_XSTATS;
                        attr = nla_nest_start_noflag(skb,
                                                     IFLA_STATS_LINK_XSTATS);
                        if (!attr) {
                                err = -EMSGSIZE;
                                goto nla_put_failure;
                        }

                        err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
                        nla_nest_end(skb, attr);
                        if (err)
                                goto nla_put_failure;
                        *idxattr = 0;
                }
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE,
                             *idxattr)) {
                const struct rtnl_link_ops *ops = NULL;
                const struct net_device *master;

                master = netdev_master_upper_dev_get(dev);
                if (master)
                        ops = master->rtnl_link_ops;
                if (ops && ops->fill_linkxstats) {
                        *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
                        attr = nla_nest_start_noflag(skb,
                                                     IFLA_STATS_LINK_XSTATS_SLAVE);
                        if (!attr) {
                                err = -EMSGSIZE;
                                goto nla_put_failure;
                        }

                        err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
                        nla_nest_end(skb, attr);
                        if (err)
                                goto nla_put_failure;
                        *idxattr = 0;
                }
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
                             *idxattr)) {
                u32 off_filter_mask;

                off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
                *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
                attr = nla_nest_start_noflag(skb,
                                             IFLA_STATS_LINK_OFFLOAD_XSTATS);
                if (!attr) {
                        err = -EMSGSIZE;
                        goto nla_put_failure;
                }

                err = rtnl_offload_xstats_fill(skb, dev, prividx,
                                               off_filter_mask, extack);
                if (err == -ENODATA)
                        nla_nest_cancel(skb, attr);
                else
                        nla_nest_end(skb, attr);

                if (err && err != -ENODATA)
                        goto nla_put_failure;
                *idxattr = 0;
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) {
                struct rtnl_af_ops *af_ops;

                *idxattr = IFLA_STATS_AF_SPEC;
                attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC);
                if (!attr) {
                        err = -EMSGSIZE;
                        goto nla_put_failure;
                }

                rcu_read_lock();
                list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
                        if (af_ops->fill_stats_af) {
                                struct nlattr *af;

                                af = nla_nest_start_noflag(skb,
                                                           af_ops->family);
                                if (!af) {
                                        rcu_read_unlock();
                                        err = -EMSGSIZE;
                                        goto nla_put_failure;
                                }
                                err = af_ops->fill_stats_af(skb, dev);

                                if (err == -ENODATA) {
                                        nla_nest_cancel(skb, af);
                                } else if (err < 0) {
                                        rcu_read_unlock();
                                        goto nla_put_failure;
                                }

                                nla_nest_end(skb, af);
                        }
                }
                rcu_read_unlock();

                nla_nest_end(skb, attr);

                *idxattr = 0;
        }

        nlmsg_end(skb, nlh);

        return 0;

nla_put_failure:
        /* not a multi message or no progress mean a real error */
        if (!(flags & NLM_F_MULTI) || s_prividx == *prividx)
                nlmsg_cancel(skb, nlh);
        else
                nlmsg_end(skb, nlh);

        return err;
}

static size_t if_nlmsg_stats_size(const struct net_device *dev,
                                  const struct rtnl_stats_dump_filters *filters)
{
        size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
        unsigned int filter_mask = filters->mask[0];

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
                size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
                const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
                int attr = IFLA_STATS_LINK_XSTATS;

                if (ops && ops->get_linkxstats_size) {
                        size += nla_total_size(ops->get_linkxstats_size(dev,
                                                                        attr));
                        /* for IFLA_STATS_LINK_XSTATS */
                        size += nla_total_size(0);
                }
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) {
                struct net_device *_dev = (struct net_device *)dev;
                const struct rtnl_link_ops *ops = NULL;
                const struct net_device *master;

                /* netdev_master_upper_dev_get can't take const */
                master = netdev_master_upper_dev_get(_dev);
                if (master)
                        ops = master->rtnl_link_ops;
                if (ops && ops->get_linkxstats_size) {
                        int attr = IFLA_STATS_LINK_XSTATS_SLAVE;

                        size += nla_total_size(ops->get_linkxstats_size(dev,
                                                                        attr));
                        /* for IFLA_STATS_LINK_XSTATS_SLAVE */
                        size += nla_total_size(0);
                }
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) {
                u32 off_filter_mask;

                off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
                size += rtnl_offload_xstats_get_size(dev, off_filter_mask);
        }

        if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
                struct rtnl_af_ops *af_ops;

                /* for IFLA_STATS_AF_SPEC */
                size += nla_total_size(0);

                rcu_read_lock();
                list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
                        if (af_ops->get_stats_af_size) {
                                size += nla_total_size(
                                        af_ops->get_stats_af_size(dev));

                                /* for AF_* */
                                size += nla_total_size(0);
                        }
                }
                rcu_read_unlock();
        }

        return size;
}

#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1)

static const struct nla_policy
rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = {
        [IFLA_STATS_LINK_OFFLOAD_XSTATS] =
                    NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID),
};

static const struct nla_policy
rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = {
        [IFLA_STATS_GET_FILTERS] =
                    NLA_POLICY_NESTED(rtnl_stats_get_policy_filters),
};

static const struct nla_policy
ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = {
        [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1),
};

static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters,
                                        struct rtnl_stats_dump_filters *filters,
                                        struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFLA_STATS_MAX + 1];
        int err;
        int at;

        err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters,
                               rtnl_stats_get_policy_filters, extack);
        if (err < 0)
                return err;

        for (at = 1; at <= IFLA_STATS_MAX; at++) {
                if (tb[at]) {
                        if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) {
                                NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask");
                                return -EINVAL;
                        }
                        filters->mask[at] = nla_get_u32(tb[at]);
                }
        }

        return 0;
}

static int rtnl_stats_get_parse(const struct nlmsghdr *nlh,
                                u32 filter_mask,
                                struct rtnl_stats_dump_filters *filters,
                                struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
        int err;
        int i;

        filters->mask[0] = filter_mask;
        for (i = 1; i < ARRAY_SIZE(filters->mask); i++)
                filters->mask[i] = -1U;

        err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb,
                          IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack);
        if (err < 0)
                return err;

        if (tb[IFLA_STATS_GET_FILTERS]) {
                err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS],
                                                   filters, extack);
                if (err)
                        return err;
        }

        return 0;
}

static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
                                bool is_dump, struct netlink_ext_ack *extack)
{
        struct if_stats_msg *ifsm;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
                return -EINVAL;
        }

        if (!strict_check)
                return 0;

        ifsm = nlmsg_data(nlh);

        /* only requests using strict checks can pass data to influence
         * the dump. The legacy exception is filter_mask.
         */
        if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
                return -EINVAL;
        }
        if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
                NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
                return -EINVAL;
        }

        return 0;
}

static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct netlink_ext_ack *extack)
{
        struct rtnl_stats_dump_filters filters;
        struct net *net = sock_net(skb->sk);
        struct net_device *dev = NULL;
        int idxattr = 0, prividx = 0;
        struct if_stats_msg *ifsm;
        struct sk_buff *nskb;
        int err;

        err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
                                   false, extack);
        if (err)
                return err;

        ifsm = nlmsg_data(nlh);
        if (ifsm->ifindex > 0)
                dev = __dev_get_by_index(net, ifsm->ifindex);
        else
                return -EINVAL;

        if (!dev)
                return -ENODEV;

        if (!ifsm->filter_mask) {
                NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get");
                return -EINVAL;
        }

        err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack);
        if (err)
                return err;

        nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL);
        if (!nskb)
                return -ENOBUFS;

        err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
                                  NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
                                  0, &filters, &idxattr, &prividx, extack);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(nskb);
        } else {
                err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
        }

        return err;
}

static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct netlink_ext_ack *extack = cb->extack;
        int h, s_h, err, s_idx, s_idxattr, s_prividx;
        struct rtnl_stats_dump_filters filters;
        struct net *net = sock_net(skb->sk);
        unsigned int flags = NLM_F_MULTI;
        struct if_stats_msg *ifsm;
        struct hlist_head *head;
        struct net_device *dev;
        int idx = 0;

        s_h = cb->args[0];
        s_idx = cb->args[1];
        s_idxattr = cb->args[2];
        s_prividx = cb->args[3];

        cb->seq = net->dev_base_seq;

        err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack);
        if (err)
                return err;

        ifsm = nlmsg_data(cb->nlh);
        if (!ifsm->filter_mask) {
                NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
                return -EINVAL;
        }

        err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters,
                                   extack);
        if (err)
                return err;

        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
                idx = 0;
                head = &net->dev_index_head[h];
                hlist_for_each_entry(dev, head, index_hlist) {
                        if (idx < s_idx)
                                goto cont;
                        err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
                                                  NETLINK_CB(cb->skb).portid,
                                                  cb->nlh->nlmsg_seq, 0,
                                                  flags, &filters,
                                                  &s_idxattr, &s_prividx,
                                                  extack);
                        /* If we ran out of room on the first message,
                         * we're in trouble
                         */
                        WARN_ON((err == -EMSGSIZE) && (skb->len == 0));

                        if (err < 0)
                                goto out;
                        s_prividx = 0;
                        s_idxattr = 0;
                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
                        idx++;
                }
        }
out:
        cb->args[3] = s_prividx;
        cb->args[2] = s_idxattr;
        cb->args[1] = idx;
        cb->args[0] = h;

        return skb->len;
}

void rtnl_offload_xstats_notify(struct net_device *dev)
{
        struct rtnl_stats_dump_filters response_filters = {};
        struct net *net = dev_net(dev);
        int idxattr = 0, prividx = 0;
        struct sk_buff *skb;
        int err = -ENOBUFS;

        ASSERT_RTNL();

        response_filters.mask[0] |=
                IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
        response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
                IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);

        skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters),
                        GFP_KERNEL);
        if (!skb)
                goto errout;

        err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0,
                                  &response_filters, &idxattr, &prividx, NULL);
        if (err < 0) {
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL);
        return;

errout:
        rtnl_set_sk_err(net, RTNLGRP_STATS, err);
}
EXPORT_SYMBOL(rtnl_offload_xstats_notify);

static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct netlink_ext_ack *extack)
{
        enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
        struct rtnl_stats_dump_filters response_filters = {};
        struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
        struct net *net = sock_net(skb->sk);
        struct net_device *dev = NULL;
        struct if_stats_msg *ifsm;
        bool notify = false;
        int err;

        err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
                                   false, extack);
        if (err)
                return err;

        ifsm = nlmsg_data(nlh);
        if (ifsm->family != AF_UNSPEC) {
                NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC");
                return -EINVAL;
        }

        if (ifsm->ifindex > 0)
                dev = __dev_get_by_index(net, ifsm->ifindex);
        else
                return -EINVAL;

        if (!dev)
                return -ENODEV;

        if (ifsm->filter_mask) {
                NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set");
                return -EINVAL;
        }

        err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX,
                          ifla_stats_set_policy, extack);
        if (err < 0)
                return err;

        if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) {
                u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]);

                if (req)
                        err = netdev_offload_xstats_enable(dev, t_l3, extack);
                else
                        err = netdev_offload_xstats_disable(dev, t_l3);

                if (!err)
                        notify = true;
                else if (err != -EALREADY)
                        return err;

                response_filters.mask[0] |=
                        IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
                response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
                        IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
        }

        if (notify)
                rtnl_offload_xstats_notify(dev);

        return 0;
}

static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh,
                                   struct netlink_ext_ack *extack)
{
        struct br_port_msg *bpm;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request");
                return -EINVAL;
        }

        bpm = nlmsg_data(nlh);
        if (bpm->ifindex) {
                NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request");
                return -EINVAL;
        }
        if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
                NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
                return -EINVAL;
        }

        return 0;
}

struct rtnl_mdb_dump_ctx {
        long idx;
};

static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx;
        struct net *net = sock_net(skb->sk);
        struct net_device *dev;
        int idx, s_idx;
        int err;

        NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx);

        if (cb->strict_check) {
                err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack);
                if (err)
                        return err;
        }

        s_idx = ctx->idx;
        idx = 0;

        for_each_netdev(net, dev) {
                if (idx < s_idx)
                        goto skip;
                if (!dev->netdev_ops->ndo_mdb_dump)
                        goto skip;

                err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb);
                if (err == -EMSGSIZE)
                        goto out;
                /* Moving on to next device, reset markers and sequence
                 * counters since they are all maintained per-device.
                 */
                memset(cb->ctx, 0, sizeof(cb->ctx));
                cb->prev_seq = 0;
                cb->seq = 0;
skip:
                idx++;
        }

out:
        ctx->idx = idx;
        return skb->len;
}

static int rtnl_validate_mdb_entry_get(const struct nlattr *attr,
                                       struct netlink_ext_ack *extack)
{
        struct br_mdb_entry *entry = nla_data(attr);

        if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
                NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
                return -EINVAL;
        }

        if (entry->ifindex) {
                NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified");
                return -EINVAL;
        }

        if (entry->state) {
                NL_SET_ERR_MSG(extack, "Entry state cannot be specified");
                return -EINVAL;
        }

        if (entry->flags) {
                NL_SET_ERR_MSG(extack, "Entry flags cannot be specified");
                return -EINVAL;
        }

        if (entry->vid >= VLAN_VID_MASK) {
                NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
                return -EINVAL;
        }

        if (entry->addr.proto != htons(ETH_P_IP) &&
            entry->addr.proto != htons(ETH_P_IPV6) &&
            entry->addr.proto != 0) {
                NL_SET_ERR_MSG(extack, "Unknown entry protocol");
                return -EINVAL;
        }

        return 0;
}

static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = {
        [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
                                                  rtnl_validate_mdb_entry_get,
                                                  sizeof(struct br_mdb_entry)),
        [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED },
};

static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1];
        struct net *net = sock_net(in_skb->sk);
        struct br_port_msg *bpm;
        struct net_device *dev;
        int err;

        err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb,
                          MDBA_GET_ENTRY_MAX, mdba_get_policy, extack);
        if (err)
                return err;

        bpm = nlmsg_data(nlh);
        if (!bpm->ifindex) {
                NL_SET_ERR_MSG(extack, "Invalid ifindex");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, bpm->ifindex);
        if (!dev) {
                NL_SET_ERR_MSG(extack, "Device doesn't exist");
                return -ENODEV;
        }

        if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) {
                NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute");
                return -EINVAL;
        }

        if (!dev->netdev_ops->ndo_mdb_get) {
                NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
                return -EOPNOTSUPP;
        }

        return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid,
                                            nlh->nlmsg_seq, extack);
}

static int rtnl_validate_mdb_entry(const struct nlattr *attr,
                                   struct netlink_ext_ack *extack)
{
        struct br_mdb_entry *entry = nla_data(attr);

        if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
                NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
                return -EINVAL;
        }

        if (entry->ifindex == 0) {
                NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed");
                return -EINVAL;
        }

        if (entry->addr.proto == htons(ETH_P_IP)) {
                if (!ipv4_is_multicast(entry->addr.u.ip4) &&
                    !ipv4_is_zeronet(entry->addr.u.ip4)) {
                        NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0");
                        return -EINVAL;
                }
                if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
                        NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast");
                        return -EINVAL;
                }
#if IS_ENABLED(CONFIG_IPV6)
        } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
                if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
                        NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes");
                        return -EINVAL;
                }
#endif
        } else if (entry->addr.proto == 0) {
                /* L2 mdb */
                if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
                        NL_SET_ERR_MSG(extack, "L2 entry group is not multicast");
                        return -EINVAL;
                }
        } else {
                NL_SET_ERR_MSG(extack, "Unknown entry protocol");
                return -EINVAL;
        }

        if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
                NL_SET_ERR_MSG(extack, "Unknown entry state");
                return -EINVAL;
        }
        if (entry->vid >= VLAN_VID_MASK) {
                NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
                return -EINVAL;
        }

        return 0;
}

static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = {
        [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 },
        [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
                                                  rtnl_validate_mdb_entry,
                                                  sizeof(struct br_mdb_entry)),
        [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
};

static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
        struct net *net = sock_net(skb->sk);
        struct br_port_msg *bpm;
        struct net_device *dev;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
                                     MDBA_SET_ENTRY_MAX, mdba_policy, extack);
        if (err)
                return err;

        bpm = nlmsg_data(nlh);
        if (!bpm->ifindex) {
                NL_SET_ERR_MSG(extack, "Invalid ifindex");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, bpm->ifindex);
        if (!dev) {
                NL_SET_ERR_MSG(extack, "Device doesn't exist");
                return -ENODEV;
        }

        if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
                NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
                return -EINVAL;
        }

        if (!dev->netdev_ops->ndo_mdb_add) {
                NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
                return -EOPNOTSUPP;
        }

        return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack);
}

static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr,
                                            struct netlink_ext_ack *extack)
{
        struct br_mdb_entry *entry = nla_data(attr);
        struct br_mdb_entry zero_entry = {};

        if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
                NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
                return -EINVAL;
        }

        if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
                NL_SET_ERR_MSG(extack, "Unknown entry state");
                return -EINVAL;
        }

        if (entry->flags) {
                NL_SET_ERR_MSG(extack, "Entry flags cannot be set");
                return -EINVAL;
        }

        if (entry->vid >= VLAN_N_VID - 1) {
                NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
                return -EINVAL;
        }

        if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) {
                NL_SET_ERR_MSG(extack, "Entry address cannot be set");
                return -EINVAL;
        }

        return 0;
}

static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = {
        [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
                                                  rtnl_validate_mdb_entry_del_bulk,
                                                  sizeof(struct br_mdb_entry)),
        [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
};

static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
        struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
        struct net *net = sock_net(skb->sk);
        struct br_port_msg *bpm;
        struct net_device *dev;
        int err;

        if (!del_bulk)
                err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
                                             MDBA_SET_ENTRY_MAX, mdba_policy,
                                             extack);
        else
                err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX,
                                  mdba_del_bulk_policy, extack);
        if (err)
                return err;

        bpm = nlmsg_data(nlh);
        if (!bpm->ifindex) {
                NL_SET_ERR_MSG(extack, "Invalid ifindex");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, bpm->ifindex);
        if (!dev) {
                NL_SET_ERR_MSG(extack, "Device doesn't exist");
                return -ENODEV;
        }

        if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
                NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
                return -EINVAL;
        }

        if (del_bulk) {
                if (!dev->netdev_ops->ndo_mdb_del_bulk) {
                        NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion");
                        return -EOPNOTSUPP;
                }
                return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack);
        }

        if (!dev->netdev_ops->ndo_mdb_del) {
                NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
                return -EOPNOTSUPP;
        }

        return dev->netdev_ops->ndo_mdb_del(dev, tb, extack);
}

/* Process one rtnetlink message. */

static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct rtnl_link *link;
        enum rtnl_kinds kind;
        struct module *owner;
        int err = -EOPNOTSUPP;
        rtnl_doit_func doit;
        unsigned int flags;
        int family;
        int type;

        type = nlh->nlmsg_type;
        if (type > RTM_MAX)
                return -EOPNOTSUPP;

        type -= RTM_BASE;

        /* All the messages must have at least 1 byte length */
        if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
                return 0;

        family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
        kind = rtnl_msgtype_kind(type);

        if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
                return -EPERM;

        rcu_read_lock();
        if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
                struct sock *rtnl;
                rtnl_dumpit_func dumpit;
                u32 min_dump_alloc = 0;

                link = rtnl_get_link(family, type);
                if (!link || !link->dumpit) {
                        family = PF_UNSPEC;
                        link = rtnl_get_link(family, type);
                        if (!link || !link->dumpit)
                                goto err_unlock;
                }
                owner = link->owner;
                dumpit = link->dumpit;
                flags = link->flags;

                if (type == RTM_GETLINK - RTM_BASE)
                        min_dump_alloc = rtnl_calcit(skb, nlh);

                err = 0;
                /* need to do this before rcu_read_unlock() */
                if (!try_module_get(owner))
                        err = -EPROTONOSUPPORT;

                rcu_read_unlock();

                rtnl = net->rtnl;
                if (err == 0) {
                        struct netlink_dump_control c = {
                                .dump                = dumpit,
                                .min_dump_alloc        = min_dump_alloc,
                                .module                = owner,
                                .flags                = flags,
                        };
                        err = netlink_dump_start(rtnl, skb, nlh, &c);
                        /* netlink_dump_start() will keep a reference on
                         * module if dump is still in progress.
                         */
                        module_put(owner);
                }
                return err;
        }

        link = rtnl_get_link(family, type);
        if (!link || !link->doit) {
                family = PF_UNSPEC;
                link = rtnl_get_link(PF_UNSPEC, type);
                if (!link || !link->doit)
                        goto out_unlock;
        }

        owner = link->owner;
        if (!try_module_get(owner)) {
                err = -EPROTONOSUPPORT;
                goto out_unlock;
        }

        flags = link->flags;
        if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) &&
            !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) {
                NL_SET_ERR_MSG(extack, "Bulk delete is not supported");
                module_put(owner);
                goto err_unlock;
        }

        if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
                doit = link->doit;
                rcu_read_unlock();
                if (doit)
                        err = doit(skb, nlh, extack);
                module_put(owner);
                return err;
        }
        rcu_read_unlock();

        rtnl_lock();
        link = rtnl_get_link(family, type);
        if (link && link->doit)
                err = link->doit(skb, nlh, extack);
        rtnl_unlock();

        module_put(owner);

        return err;

out_unlock:
        rcu_read_unlock();
        return err;

err_unlock:
        rcu_read_unlock();
        return -EOPNOTSUPP;
}

static void rtnetlink_rcv(struct sk_buff *skb)
{
        netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
}

static int rtnetlink_bind(struct net *net, int group)
{
        switch (group) {
        case RTNLGRP_IPV4_MROUTE_R:
        case RTNLGRP_IPV6_MROUTE_R:
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                break;
        }
        return 0;
}

static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);

        switch (event) {
        case NETDEV_REBOOT:
        case NETDEV_CHANGEMTU:
        case NETDEV_CHANGEADDR:
        case NETDEV_CHANGENAME:
        case NETDEV_FEAT_CHANGE:
        case NETDEV_BONDING_FAILOVER:
        case NETDEV_POST_TYPE_CHANGE:
        case NETDEV_NOTIFY_PEERS:
        case NETDEV_CHANGEUPPER:
        case NETDEV_RESEND_IGMP:
        case NETDEV_CHANGEINFODATA:
        case NETDEV_CHANGELOWERSTATE:
        case NETDEV_CHANGE_TX_QUEUE_LEN:
                rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
                                   GFP_KERNEL, NULL, 0, 0, NULL);
                break;
        default:
                break;
        }
        return NOTIFY_DONE;
}

static struct notifier_block rtnetlink_dev_notifier = {
        .notifier_call        = rtnetlink_event,
};


static int __net_init rtnetlink_net_init(struct net *net)
{
        struct sock *sk;
        struct netlink_kernel_cfg cfg = {
                .groups                = RTNLGRP_MAX,
                .input                = rtnetlink_rcv,
                .cb_mutex        = &rtnl_mutex,
                .flags                = NL_CFG_F_NONROOT_RECV,
                .bind                = rtnetlink_bind,
        };

        sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
        if (!sk)
                return -ENOMEM;
        net->rtnl = sk;
        return 0;
}

static void __net_exit rtnetlink_net_exit(struct net *net)
{
        netlink_kernel_release(net->rtnl);
        net->rtnl = NULL;
}

static struct pernet_operations rtnetlink_net_ops = {
        .init = rtnetlink_net_init,
        .exit = rtnetlink_net_exit,
};

void __init rtnetlink_init(void)
{
        if (register_pernet_subsys(&rtnetlink_net_ops))
                panic("rtnetlink_init: cannot initialize rtnetlink\n");

        register_netdevice_notifier(&rtnetlink_dev_notifier);

        rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
                      rtnl_dump_ifinfo, 0);
        rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);

        rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
        rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
        rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);

        rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);

        rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
        rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
                      RTNL_FLAG_BULK_DEL_SUPPORTED);
        rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);

        rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
        rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
        rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);

        rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
                      0);
        rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);

        rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0);
        rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
        rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL,
                      RTNL_FLAG_BULK_DEL_SUPPORTED);
}































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef __LINUX_USB_TYPEC_H
#define __LINUX_USB_TYPEC_H

#include <linux/types.h>

/* USB Type-C Specification releases */
#define USB_TYPEC_REV_1_0        0x100 /* 1.0 */
#define USB_TYPEC_REV_1_1        0x110 /* 1.1 */
#define USB_TYPEC_REV_1_2        0x120 /* 1.2 */
#define USB_TYPEC_REV_1_3        0x130 /* 1.3 */
#define USB_TYPEC_REV_1_4        0x140 /* 1.4 */
#define USB_TYPEC_REV_2_0        0x200 /* 2.0 */

struct typec_partner;
struct typec_cable;
struct typec_plug;
struct typec_port;
struct typec_altmode_ops;
struct typec_cable_ops;

struct fwnode_handle;
struct device;

struct usb_power_delivery;
struct usb_power_delivery_desc;

enum typec_port_type {
        TYPEC_PORT_SRC,
        TYPEC_PORT_SNK,
        TYPEC_PORT_DRP,
};

enum typec_port_data {
        TYPEC_PORT_DFP,
        TYPEC_PORT_UFP,
        TYPEC_PORT_DRD,
};

enum typec_plug_type {
        USB_PLUG_NONE,
        USB_PLUG_TYPE_A,
        USB_PLUG_TYPE_B,
        USB_PLUG_TYPE_C,
        USB_PLUG_CAPTIVE,
};

enum typec_data_role {
        TYPEC_DEVICE,
        TYPEC_HOST,
};

enum typec_role {
        TYPEC_SINK,
        TYPEC_SOURCE,
};

static inline int is_sink(enum typec_role role)
{
        return role == TYPEC_SINK;
}

static inline int is_source(enum typec_role role)
{
        return role == TYPEC_SOURCE;
}

enum typec_pwr_opmode {
        TYPEC_PWR_MODE_USB,
        TYPEC_PWR_MODE_1_5A,
        TYPEC_PWR_MODE_3_0A,
        TYPEC_PWR_MODE_PD,
};

enum typec_accessory {
        TYPEC_ACCESSORY_NONE,
        TYPEC_ACCESSORY_AUDIO,
        TYPEC_ACCESSORY_DEBUG,
};

#define TYPEC_MAX_ACCESSORY        3

enum typec_orientation {
        TYPEC_ORIENTATION_NONE,
        TYPEC_ORIENTATION_NORMAL,
        TYPEC_ORIENTATION_REVERSE,
};

/*
 * struct enter_usb_data - Enter_USB Message details
 * @eudo: Enter_USB Data Object
 * @active_link_training: Active Cable Plug Link Training
 *
 * @active_link_training is a flag that should be set with uni-directional SBRX
 * communication, and left 0 with passive cables and with bi-directional SBRX
 * communication.
 */
struct enter_usb_data {
        u32                        eudo;
        unsigned char                active_link_training:1;
};

/*
 * struct usb_pd_identity - USB Power Delivery identity data
 * @id_header: ID Header VDO
 * @cert_stat: Cert Stat VDO
 * @product: Product VDO
 * @vdo: Product Type Specific VDOs
 *
 * USB power delivery Discover Identity command response data.
 *
 * REVISIT: This is USB Power Delivery specific information, so this structure
 * probable belongs to USB Power Delivery header file once we have them.
 */
struct usb_pd_identity {
        u32                        id_header;
        u32                        cert_stat;
        u32                        product;
        u32                        vdo[3];
};

int typec_partner_set_identity(struct typec_partner *partner);
int typec_cable_set_identity(struct typec_cable *cable);

/*
 * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor
 * @svid: Standard or Vendor ID
 * @mode: Index of the Mode
 * @vdo: VDO returned by Discover Modes USB PD command
 * @roles: Only for ports. DRP if the mode is available in both roles
 *
 * Description of an Alternate Mode which a connector, cable plug or partner
 * supports.
 */
struct typec_altmode_desc {
        u16                        svid;
        u8                        mode;
        u32                        vdo;
        /* Only used with ports */
        enum typec_port_data        roles;
};

void typec_partner_set_pd_revision(struct typec_partner *partner, u16 pd_revision);
int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmodes);
struct typec_altmode
*typec_partner_register_altmode(struct typec_partner *partner,
                                const struct typec_altmode_desc *desc);
int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes);
struct typec_altmode
*typec_plug_register_altmode(struct typec_plug *plug,
                             const struct typec_altmode_desc *desc);
struct typec_altmode
*typec_port_register_altmode(struct typec_port *port,
                             const struct typec_altmode_desc *desc);

void typec_port_register_altmodes(struct typec_port *port,
        const struct typec_altmode_ops *ops, void *drvdata,
        struct typec_altmode **altmodes, size_t n);

void typec_port_register_cable_ops(struct typec_altmode **altmodes, int max_altmodes,
                                   const struct typec_cable_ops *ops);

void typec_unregister_altmode(struct typec_altmode *altmode);

struct typec_port *typec_altmode2port(struct typec_altmode *alt);

void typec_altmode_update_active(struct typec_altmode *alt, bool active);

enum typec_plug_index {
        TYPEC_PLUG_SOP_P,
        TYPEC_PLUG_SOP_PP,
};

/*
 * struct typec_plug_desc - USB Type-C Cable Plug Descriptor
 * @index: SOP Prime for the plug connected to DFP and SOP Double Prime for the
 *         plug connected to UFP
 *
 * Represents USB Type-C Cable Plug.
 */
struct typec_plug_desc {
        enum typec_plug_index        index;
};

/*
 * struct typec_cable_desc - USB Type-C Cable Descriptor
 * @type: The plug type from USB PD Cable VDO
 * @active: Is the cable active or passive
 * @identity: Result of Discover Identity command
 * @pd_revision: USB Power Delivery Specification revision if supported
 *
 * Represents USB Type-C Cable attached to USB Type-C port.
 */
struct typec_cable_desc {
        enum typec_plug_type        type;
        unsigned int                active:1;
        struct usb_pd_identity        *identity;
        u16                        pd_revision; /* 0300H = "3.0" */

};

/*
 * struct typec_partner_desc - USB Type-C Partner Descriptor
 * @usb_pd: USB Power Delivery support
 * @accessory: Audio, Debug or none.
 * @identity: Discover Identity command data
 * @pd_revision: USB Power Delivery Specification Revision if supported
 * @attach: Notification about attached USB device
 * @deattach: Notification about removed USB device
 *
 * Details about a partner that is attached to USB Type-C port. If @identity
 * member exists when partner is registered, a directory named "identity" is
 * created to sysfs for the partner device.
 *
 * @pd_revision is based on the setting of the "Specification Revision" field
 * in the message header on the initial "Source Capabilities" message received
 * from the partner, or a "Request" message received from the partner, depending
 * on whether our port is a Sink or a Source.
 */
struct typec_partner_desc {
        unsigned int                usb_pd:1;
        enum typec_accessory        accessory;
        struct usb_pd_identity        *identity;
        u16                        pd_revision; /* 0300H = "3.0" */

        void (*attach)(struct typec_partner *partner, struct device *dev);
        void (*deattach)(struct typec_partner *partner, struct device *dev);
};

/**
 * struct typec_operations - USB Type-C Port Operations
 * @try_role: Set data role preference for DRP port
 * @dr_set: Set Data Role
 * @pr_set: Set Power Role
 * @vconn_set: Source VCONN
 * @port_type_set: Set port type
 * @pd_get: Get available USB Power Delivery Capabilities.
 * @pd_set: Set USB Power Delivery Capabilities.
 */
struct typec_operations {
        int (*try_role)(struct typec_port *port, int role);
        int (*dr_set)(struct typec_port *port, enum typec_data_role role);
        int (*pr_set)(struct typec_port *port, enum typec_role role);
        int (*vconn_set)(struct typec_port *port, enum typec_role role);
        int (*port_type_set)(struct typec_port *port,
                             enum typec_port_type type);
        struct usb_power_delivery **(*pd_get)(struct typec_port *port);
        int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd);
};

enum usb_pd_svdm_ver {
        SVDM_VER_1_0 = 0,
        SVDM_VER_2_0 = 1,
        SVDM_VER_MAX = SVDM_VER_2_0,
};

/*
 * struct typec_capability - USB Type-C Port Capabilities
 * @type: Supported power role of the port
 * @data: Supported data role of the port
 * @revision: USB Type-C Specification release. Binary coded decimal
 * @pd_revision: USB Power Delivery Specification revision if supported
 * @svdm_version: USB PD Structured VDM version if supported
 * @prefer_role: Initial role preference (DRP ports).
 * @accessory: Supported Accessory Modes
 * @fwnode: Optional fwnode of the port
 * @driver_data: Private pointer for driver specific info
 * @pd: Optional USB Power Delivery Support
 * @ops: Port operations vector
 *
 * Static capabilities of a single USB Type-C port.
 */
struct typec_capability {
        enum typec_port_type        type;
        enum typec_port_data        data;
        u16                        revision; /* 0120H = "1.2" */
        u16                        pd_revision; /* 0300H = "3.0" */
        enum usb_pd_svdm_ver        svdm_version;
        int                        prefer_role;
        enum typec_accessory        accessory[TYPEC_MAX_ACCESSORY];
        unsigned int                orientation_aware:1;

        struct fwnode_handle        *fwnode;
        void                        *driver_data;

        struct usb_power_delivery *pd;

        const struct typec_operations        *ops;
};

/* Specific to try_role(). Indicates the user want's to clear the preference. */
#define TYPEC_NO_PREFERRED_ROLE        (-1)

struct typec_port *typec_register_port(struct device *parent,
                                       const struct typec_capability *cap);
void typec_unregister_port(struct typec_port *port);

struct typec_partner *typec_register_partner(struct typec_port *port,
                                             struct typec_partner_desc *desc);
void typec_unregister_partner(struct typec_partner *partner);

struct typec_cable *typec_register_cable(struct typec_port *port,
                                         struct typec_cable_desc *desc);
void typec_unregister_cable(struct typec_cable *cable);

struct typec_cable *typec_cable_get(struct typec_port *port);
void typec_cable_put(struct typec_cable *cable);
int typec_cable_is_active(struct typec_cable *cable);

struct typec_plug *typec_register_plug(struct typec_cable *cable,
                                       struct typec_plug_desc *desc);
void typec_unregister_plug(struct typec_plug *plug);

void typec_set_data_role(struct typec_port *port, enum typec_data_role role);
void typec_set_pwr_role(struct typec_port *port, enum typec_role role);
void typec_set_vconn_role(struct typec_port *port, enum typec_role role);
void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode);

int typec_set_orientation(struct typec_port *port,
                          enum typec_orientation orientation);
enum typec_orientation typec_get_orientation(struct typec_port *port);
int typec_set_mode(struct typec_port *port, int mode);

void *typec_get_drvdata(struct typec_port *port);

int typec_get_fw_cap(struct typec_capability *cap,
                     struct fwnode_handle *fwnode);

int typec_find_pwr_opmode(const char *name);
int typec_find_orientation(const char *name);
int typec_find_port_power_role(const char *name);
int typec_find_power_role(const char *name);
int typec_find_port_data_role(const char *name);

void typec_partner_set_svdm_version(struct typec_partner *partner,
                                    enum usb_pd_svdm_ver svdm_version);
int typec_get_negotiated_svdm_version(struct typec_port *port);

int typec_get_cable_svdm_version(struct typec_port *port);
void typec_cable_set_svdm_version(struct typec_cable *cable, enum usb_pd_svdm_ver svdm_version);

struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner,
                                                        struct usb_power_delivery_desc *desc);

int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_delivery *pd);
int typec_partner_set_usb_power_delivery(struct typec_partner *partner,
                                         struct usb_power_delivery *pd);

/**
 * struct typec_connector - Representation of Type-C port for external drivers
 * @attach: notification about device removal
 * @deattach: notification about device removal
 *
 * Drivers that control the USB and other ports (DisplayPorts, etc.), that are
 * connected to the Type-C connectors, can use these callbacks to inform the
 * Type-C connector class about connections and disconnections. That information
 * can then be used by the typec-port drivers to power on or off parts that are
 * needed or not needed - as an example, in USB mode if USB2 device is
 * enumerated, USB3 components (retimers, phys, and what have you) do not need
 * to be powered on.
 *
 * The attached (enumerated) devices will be liked with the typec-partner device.
 */
struct typec_connector {
        void (*attach)(struct typec_connector *con, struct device *dev);
        void (*deattach)(struct typec_connector *con, struct device *dev);
};

static inline void typec_attach(struct typec_connector *con, struct device *dev)
{
        if (con && con->attach)
                con->attach(con, dev);
}

static inline void typec_deattach(struct typec_connector *con, struct device *dev)
{
        if (con && con->deattach)
                con->deattach(con, dev);
}

#endif /* __LINUX_USB_TYPEC_H */



















































    3 
    5 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _LINUX_FILEATTR_H
#define _LINUX_FILEATTR_H

/* Flags shared betwen flags/xflags */
#define FS_COMMON_FL \
        (FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
         FS_NODUMP_FL |        FS_NOATIME_FL | FS_DAX_FL | \
         FS_PROJINHERIT_FL)

#define FS_XFLAG_COMMON \
        (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \
         FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \
         FS_XFLAG_PROJINHERIT)

/*
 * Merged interface for miscellaneous file attributes.  'flags' originates from
 * ext* and 'fsx_flags' from xfs.  There's some overlap between the two, which
 * is handled by the VFS helpers, so filesystems are free to implement just one
 * or both of these sub-interfaces.
 */
struct fileattr {
        u32        flags;                /* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
        /* struct fsxattr: */
        u32        fsx_xflags;        /* xflags field value (get/set) */
        u32        fsx_extsize;        /* extsize field value (get/set)*/
        u32        fsx_nextents;        /* nextents field value (get)        */
        u32        fsx_projid;        /* project identifier (get/set) */
        u32        fsx_cowextsize;        /* CoW extsize field value (get/set)*/
        /* selectors: */
        bool        flags_valid:1;
        bool        fsx_valid:1;
};

int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa);

void fileattr_fill_xflags(struct fileattr *fa, u32 xflags);
void fileattr_fill_flags(struct fileattr *fa, u32 flags);

/**
 * fileattr_has_fsx - check for extended flags/attributes
 * @fa:                fileattr pointer
 *
 * Return: true if any attributes are present that are not represented in
 * ->flags.
 */
static inline bool fileattr_has_fsx(const struct fileattr *fa)
{
        return fa->fsx_valid &&
                ((fa->fsx_xflags & ~FS_XFLAG_COMMON) || fa->fsx_extsize != 0 ||
                 fa->fsx_projid != 0 ||        fa->fsx_cowextsize != 0);
}

int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
                     struct fileattr *fa);

#endif /* _LINUX_FILEATTR_H */




























































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BLK_INTERNAL_H
#define BLK_INTERNAL_H

#include <linux/blk-crypto.h>
#include <linux/memblock.h>        /* for max_pfn/max_low_pfn */
#include <linux/sched/sysctl.h>
#include <linux/timekeeping.h>
#include <xen/xen.h>
#include "blk-crypto-internal.h"

struct elevator_type;

/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT                (5 * HZ)

extern struct dentry *blk_debugfs_root;

struct blk_flush_queue {
        spinlock_t                mq_flush_lock;
        unsigned int                flush_pending_idx:1;
        unsigned int                flush_running_idx:1;
        blk_status_t                 rq_status;
        unsigned long                flush_pending_since;
        struct list_head        flush_queue[2];
        unsigned long                flush_data_in_flight;
        struct request                *flush_rq;
};

bool is_flush_rq(struct request *req);

struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
                                              gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q);

void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
void submit_bio_noacct_nocheck(struct bio *bio);

static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
{
        rcu_read_lock();
        if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
                goto fail;

        /*
         * The code that increments the pm_only counter must ensure that the
         * counter is globally visible before the queue is unfrozen.
         */
        if (blk_queue_pm_only(q) &&
            (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
                goto fail_put;

        rcu_read_unlock();
        return true;

fail_put:
        blk_queue_exit(q);
fail:
        rcu_read_unlock();
        return false;
}

static inline int bio_queue_enter(struct bio *bio)
{
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);

        if (blk_try_enter_queue(q, false))
                return 0;
        return __bio_queue_enter(q, bio);
}

static inline void blk_wait_io(struct completion *done)
{
        /* Prevent hang_check timer from firing at us during very long I/O */
        unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;

        if (timeout)
                while (!wait_for_completion_io_timeout(done, timeout))
                        ;
        else
                wait_for_completion_io(done);
}

#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
                gfp_t gfp_mask);
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);

bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
                struct page *page, unsigned len, unsigned offset,
                bool *same_page);

static inline bool biovec_phys_mergeable(struct request_queue *q,
                struct bio_vec *vec1, struct bio_vec *vec2)
{
        unsigned long mask = queue_segment_boundary(q);
        phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
        phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;

        /*
         * Merging adjacent physical pages may not work correctly under KMSAN
         * if their metadata pages aren't adjacent. Just disable merging.
         */
        if (IS_ENABLED(CONFIG_KMSAN))
                return false;

        if (addr1 + vec1->bv_len != addr2)
                return false;
        if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
                return false;
        if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
                return false;
        return true;
}

static inline bool __bvec_gap_to_prev(const struct queue_limits *lim,
                struct bio_vec *bprv, unsigned int offset)
{
        return (offset & lim->virt_boundary_mask) ||
                ((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
}

/*
 * Check if adding a bio_vec after bprv with offset would create a gap in
 * the SG list. Most drivers don't care about this, but some do.
 */
static inline bool bvec_gap_to_prev(const struct queue_limits *lim,
                struct bio_vec *bprv, unsigned int offset)
{
        if (!lim->virt_boundary_mask)
                return false;
        return __bvec_gap_to_prev(lim, bprv, offset);
}

static inline bool rq_mergeable(struct request *rq)
{
        if (blk_rq_is_passthrough(rq))
                return false;

        if (req_op(rq) == REQ_OP_FLUSH)
                return false;

        if (req_op(rq) == REQ_OP_WRITE_ZEROES)
                return false;

        if (req_op(rq) == REQ_OP_ZONE_APPEND)
                return false;

        if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
                return false;
        if (rq->rq_flags & RQF_NOMERGE_FLAGS)
                return false;

        return true;
}

/*
 * There are two different ways to handle DISCARD merges:
 *  1) If max_discard_segments > 1, the driver treats every bio as a range and
 *     send the bios to controller together. The ranges don't need to be
 *     contiguous.
 *  2) Otherwise, the request will be normal read/write requests.  The ranges
 *     need to be contiguous.
 */
static inline bool blk_discard_mergable(struct request *req)
{
        if (req_op(req) == REQ_OP_DISCARD &&
            queue_max_discard_segments(req->q) > 1)
                return true;
        return false;
}

static inline unsigned int blk_rq_get_max_segments(struct request *rq)
{
        if (req_op(rq) == REQ_OP_DISCARD)
                return queue_max_discard_segments(rq->q);
        return queue_max_segments(rq->q);
}

static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
                                                     enum req_op op)
{
        if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
                return min(q->limits.max_discard_sectors,
                           UINT_MAX >> SECTOR_SHIFT);

        if (unlikely(op == REQ_OP_WRITE_ZEROES))
                return q->limits.max_write_zeroes_sectors;

        return q->limits.max_sectors;
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
void blk_flush_integrity(void);
bool __bio_integrity_endio(struct bio *);
void bio_integrity_free(struct bio *bio);
static inline bool bio_integrity_endio(struct bio *bio)
{
        if (bio_integrity(bio))
                return __bio_integrity_endio(bio);
        return true;
}

bool blk_integrity_merge_rq(struct request_queue *, struct request *,
                struct request *);
bool blk_integrity_merge_bio(struct request_queue *, struct request *,
                struct bio *);

static inline bool integrity_req_gap_back_merge(struct request *req,
                struct bio *next)
{
        struct bio_integrity_payload *bip = bio_integrity(req->bio);
        struct bio_integrity_payload *bip_next = bio_integrity(next);

        return bvec_gap_to_prev(&req->q->limits,
                                &bip->bip_vec[bip->bip_vcnt - 1],
                                bip_next->bip_vec[0].bv_offset);
}

static inline bool integrity_req_gap_front_merge(struct request *req,
                struct bio *bio)
{
        struct bio_integrity_payload *bip = bio_integrity(bio);
        struct bio_integrity_payload *bip_next = bio_integrity(req->bio);

        return bvec_gap_to_prev(&req->q->limits,
                                &bip->bip_vec[bip->bip_vcnt - 1],
                                bip_next->bip_vec[0].bv_offset);
}

extern const struct attribute_group blk_integrity_attr_group;
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool blk_integrity_merge_rq(struct request_queue *rq,
                struct request *r1, struct request *r2)
{
        return true;
}
static inline bool blk_integrity_merge_bio(struct request_queue *rq,
                struct request *r, struct bio *b)
{
        return true;
}
static inline bool integrity_req_gap_back_merge(struct request *req,
                struct bio *next)
{
        return false;
}
static inline bool integrity_req_gap_front_merge(struct request *req,
                struct bio *bio)
{
        return false;
}

static inline void blk_flush_integrity(void)
{
}
static inline bool bio_integrity_endio(struct bio *bio)
{
        return true;
}
static inline void bio_integrity_free(struct bio *bio)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */

unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);

bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
                unsigned int nr_segs);
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
                        struct bio *bio, unsigned int nr_segs);

/*
 * Plug flush limits
 */
#define BLK_MAX_REQUEST_COUNT        32
#define BLK_PLUG_FLUSH_SIZE        (128 * 1024)

/*
 * Internal elevator interface
 */
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)

bool blk_insert_flush(struct request *rq);

int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
void elevator_disable(struct request_queue *q);
void elevator_exit(struct request_queue *q);
int elv_register_queue(struct request_queue *q, bool uevent);
void elv_unregister_queue(struct request_queue *q);

ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
                char *buf);
ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
                char *buf);
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
                char *buf);
ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
                char *buf);
ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
                                const char *, size_t);

static inline bool bio_may_exceed_limits(struct bio *bio,
                                         const struct queue_limits *lim)
{
        switch (bio_op(bio)) {
        case REQ_OP_DISCARD:
        case REQ_OP_SECURE_ERASE:
        case REQ_OP_WRITE_ZEROES:
                return true; /* non-trivial splitting decisions */
        default:
                break;
        }

        /*
         * All drivers must accept single-segments bios that are <= PAGE_SIZE.
         * This is a quick and dirty check that relies on the fact that
         * bi_io_vec[0] is always valid if a bio has data.  The check might
         * lead to occasional false negatives when bios are cloned, but compared
         * to the performance impact of cloned bios themselves the loop below
         * doesn't matter anyway.
         */
        return lim->chunk_sectors || bio->bi_vcnt != 1 ||
                bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}

struct bio *__bio_split_to_limits(struct bio *bio,
                                  const struct queue_limits *lim,
                                  unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
                unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
                                struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);

int blk_set_default_limits(struct queue_limits *lim);
int blk_dev_init(void);

/*
 * Contribute to IO statistics IFF:
 *
 *        a) it's attached to a gendisk, and
 *        b) the queue had IO stats enabled when this request was started
 */
static inline bool blk_do_io_stat(struct request *rq)
{
        return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
}

void update_io_ticks(struct block_device *part, unsigned long now, bool end);

static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
        req->cmd_flags |= REQ_NOMERGE;
        if (req == q->last_merge)
                q->last_merge = NULL;
}

/*
 * Internal io_context interface
 */
struct io_cq *ioc_find_get_icq(struct request_queue *q);
struct io_cq *ioc_lookup_icq(struct request_queue *q);
#ifdef CONFIG_BLK_ICQ
void ioc_clear_queue(struct request_queue *q);
#else
static inline void ioc_clear_queue(struct request_queue *q)
{
}
#endif /* CONFIG_BLK_ICQ */

#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
        const char *page, size_t count);
extern void blk_throtl_bio_endio(struct bio *bio);
extern void blk_throtl_stat_add(struct request *rq, u64 time);
#else
static inline void blk_throtl_bio_endio(struct bio *bio) { }
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
#endif

struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);

static inline bool blk_queue_may_bounce(struct request_queue *q)
{
        return IS_ENABLED(CONFIG_BOUNCE) &&
                q->limits.bounce == BLK_BOUNCE_HIGH &&
                max_low_pfn >= max_pfn;
}

static inline struct bio *blk_queue_bounce(struct bio *bio,
                struct request_queue *q)
{
        if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
                return __blk_queue_bounce(bio, q);
        return bio;
}

#ifdef CONFIG_BLK_DEV_ZONED
void disk_free_zone_bitmaps(struct gendisk *disk);
int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
                unsigned long arg);
int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
                unsigned int cmd, unsigned long arg);
#else /* CONFIG_BLK_DEV_ZONED */
static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
                unsigned int cmd, unsigned long arg)
{
        return -ENOTTY;
}
static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
                blk_mode_t mode, unsigned int cmd, unsigned long arg)
{
        return -ENOTTY;
}
#endif /* CONFIG_BLK_DEV_ZONED */

struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
void bdev_add(struct block_device *bdev, dev_t dev);

int blk_alloc_ext_minor(void);
void blk_free_ext_minor(unsigned int minor);
#define ADDPART_FLAG_NONE        0
#define ADDPART_FLAG_RAID        1
#define ADDPART_FLAG_WHOLEDISK        2
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
                sector_t length);
int bdev_del_partition(struct gendisk *disk, int partno);
int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
                sector_t length);
void drop_partition(struct block_device *part);

void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);

struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
                struct lock_class_key *lkclass);

int bio_add_hw_page(struct request_queue *q, struct bio *bio,
                struct page *page, unsigned int len, unsigned int offset,
                unsigned int max_sectors, bool *same_page);

/*
 * Clean up a page appropriately, where the page may be pinned, may have a
 * ref taken on it or neither.
 */
static inline void bio_release_page(struct bio *bio, struct page *page)
{
        if (bio_flagged(bio, BIO_PAGE_PINNED))
                unpin_user_page(page);
}

struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id);

int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);

int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
void disk_del_events(struct gendisk *disk);
void disk_release_events(struct gendisk *disk);
void disk_block_events(struct gendisk *disk);
void disk_unblock_events(struct gendisk *disk);
void disk_flush_events(struct gendisk *disk, unsigned int mask);
extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;

extern struct attribute_group blk_trace_attr_group;

blk_mode_t file_to_blk_mode(struct file *file);
int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
                loff_t lstart, loff_t lend);
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);

extern const struct address_space_operations def_blk_aops;

int disk_register_independent_access_ranges(struct gendisk *disk);
void disk_unregister_independent_access_ranges(struct gendisk *disk);

#ifdef CONFIG_FAIL_MAKE_REQUEST
bool should_fail_request(struct block_device *part, unsigned int bytes);
#else /* CONFIG_FAIL_MAKE_REQUEST */
static inline bool should_fail_request(struct block_device *part,
                                        unsigned int bytes)
{
        return false;
}
#endif /* CONFIG_FAIL_MAKE_REQUEST */

/*
 * Optimized request reference counting. Ideally we'd make timeouts be more
 * clever, as that's the only reason we need references at all... But until
 * this happens, this is faster than using refcount_t. Also see:
 *
 * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
 */
#define req_ref_zero_or_close_to_overflow(req)        \
        ((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)

static inline bool req_ref_inc_not_zero(struct request *req)
{
        return atomic_inc_not_zero(&req->ref);
}

static inline bool req_ref_put_and_test(struct request *req)
{
        WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
        return atomic_dec_and_test(&req->ref);
}

static inline void req_ref_set(struct request *req, int value)
{
        atomic_set(&req->ref, value);
}

static inline int req_ref_read(struct request *req)
{
        return atomic_read(&req->ref);
}

static inline u64 blk_time_get_ns(void)
{
        struct blk_plug *plug = current->plug;

        if (!plug || !in_task())
                return ktime_get_ns();

        /*
         * 0 could very well be a valid time, but rather than flag "this is
         * a valid timestamp" separately, just accept that we'll do an extra
         * ktime_get_ns() if we just happen to get 0 as the current time.
         */
        if (!plug->cur_ktime) {
                plug->cur_ktime = ktime_get_ns();
                current->flags |= PF_BLOCK_TS;
        }
        return plug->cur_ktime;
}

static inline ktime_t blk_time_get(void)
{
        return ns_to_ktime(blk_time_get_ns());
}

/*
 * From most significant bit:
 * 1 bit: reserved for other usage, see below
 * 12 bits: original size of bio
 * 51 bits: issue time of bio
 */
#define BIO_ISSUE_RES_BITS      1
#define BIO_ISSUE_SIZE_BITS     12
#define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
#define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
#define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
#define BIO_ISSUE_SIZE_MASK     \
        (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
#define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))

/* Reserved bit for blk-throtl */
#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)

static inline u64 __bio_issue_time(u64 time)
{
        return time & BIO_ISSUE_TIME_MASK;
}

static inline u64 bio_issue_time(struct bio_issue *issue)
{
        return __bio_issue_time(issue->value);
}

static inline sector_t bio_issue_size(struct bio_issue *issue)
{
        return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
}

static inline void bio_issue_init(struct bio_issue *issue,
                                       sector_t size)
{
        size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
        issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
                        (blk_time_get_ns() & BIO_ISSUE_TIME_MASK) |
                        ((u64)size << BIO_ISSUE_SIZE_SHIFT));
}

void bdev_release(struct file *bdev_file);
int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
              const struct blk_holder_ops *hops, struct file *bdev_file);
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);

#endif /* BLK_INTERNAL_H */




























  288 












  244 
  233 




























    5 













   78 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * This file provides wrappers with sanitizer instrumentation for atomic bit
 * operations.
 *
 * To use this functionality, an arch's bitops.h file needs to define each of
 * the below bit operations with an arch_ prefix (e.g. arch_set_bit(),
 * arch___set_bit(), etc.).
 */
#ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H
#define _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H

#include <linux/instrumented.h>

/**
 * set_bit - Atomically set a bit in memory
 * @nr: the bit to set
 * @addr: the address to start counting from
 *
 * This is a relaxed atomic operation (no implied memory barriers).
 *
 * Note that @nr may be almost arbitrarily large; this function is not
 * restricted to acting on a single-word quantity.
 */
static __always_inline void set_bit(long nr, volatile unsigned long *addr)
{
        instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_set_bit(nr, addr);
}

/**
 * clear_bit - Clears a bit in memory
 * @nr: Bit to clear
 * @addr: Address to start counting from
 *
 * This is a relaxed atomic operation (no implied memory barriers).
 */
static __always_inline void clear_bit(long nr, volatile unsigned long *addr)
{
        instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_clear_bit(nr, addr);
}

/**
 * change_bit - Toggle a bit in memory
 * @nr: Bit to change
 * @addr: Address to start counting from
 *
 * This is a relaxed atomic operation (no implied memory barriers).
 *
 * Note that @nr may be almost arbitrarily large; this function is not
 * restricted to acting on a single-word quantity.
 */
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
        instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_change_bit(nr, addr);
}

/**
 * test_and_set_bit - Set a bit and return its old value
 * @nr: Bit to set
 * @addr: Address to count from
 *
 * This is an atomic fully-ordered operation (implied full memory barrier).
 */
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
        kcsan_mb();
        instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_set_bit(nr, addr);
}

/**
 * test_and_clear_bit - Clear a bit and return its old value
 * @nr: Bit to clear
 * @addr: Address to count from
 *
 * This is an atomic fully-ordered operation (implied full memory barrier).
 */
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
        kcsan_mb();
        instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_clear_bit(nr, addr);
}

/**
 * test_and_change_bit - Change a bit and return its old value
 * @nr: Bit to change
 * @addr: Address to count from
 *
 * This is an atomic fully-ordered operation (implied full memory barrier).
 */
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
        kcsan_mb();
        instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_change_bit(nr, addr);
}

#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */





































  248 






















































































  228 




























  326 















  227 













  313 































  247 


  265 










  246 
  246 
























































  248 
  248 









    1 
    1 










   67 
   67 





























































  262 















  233 
  230 









































































































































































































































































































































































































































































































































































   73 













   91 












  256 








    8 




  256 




  184 
























   61 
   61 
   61 
















  230 
  258 


































































  251 















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_LIST_H
#define _LINUX_LIST_H

#include <linux/container_of.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/poison.h>
#include <linux/const.h>

#include <asm/barrier.h>

/*
 * Circular doubly linked list implementation.
 *
 * Some of the internal functions ("__xxx") are useful when
 * manipulating whole lists rather than single entries, as
 * sometimes we already know the next/prev entries and we can
 * generate better code by using them directly rather than
 * using the generic single-entry routines.
 */

#define LIST_HEAD_INIT(name) { &(name), &(name) }

#define LIST_HEAD(name) \
        struct list_head name = LIST_HEAD_INIT(name)

/**
 * INIT_LIST_HEAD - Initialize a list_head structure
 * @list: list_head structure to be initialized.
 *
 * Initializes the list_head to point to itself.  If it is a list header,
 * the result is an empty list.
 */
static inline void INIT_LIST_HEAD(struct list_head *list)
{
        WRITE_ONCE(list->next, list);
        WRITE_ONCE(list->prev, list);
}

#ifdef CONFIG_LIST_HARDENED

#ifdef CONFIG_DEBUG_LIST
# define __list_valid_slowpath
#else
# define __list_valid_slowpath __cold __preserve_most
#endif

/*
 * Performs the full set of list corruption checks before __list_add().
 * On list corruption reports a warning, and returns false.
 */
extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
                                                             struct list_head *prev,
                                                             struct list_head *next);

/*
 * Performs list corruption checks before __list_add(). Returns false if a
 * corruption is detected, true otherwise.
 *
 * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
 * inline to catch non-faulting corruptions, and only if a corruption is
 * detected calls the reporting function __list_add_valid_or_report().
 */
static __always_inline bool __list_add_valid(struct list_head *new,
                                             struct list_head *prev,
                                             struct list_head *next)
{
        bool ret = true;

        if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
                /*
                 * With the hardening version, elide checking if next and prev
                 * are NULL, since the immediate dereference of them below would
                 * result in a fault if NULL.
                 *
                 * With the reduced set of checks, we can afford to inline the
                 * checks, which also gives the compiler a chance to elide some
                 * of them completely if they can be proven at compile-time. If
                 * one of the pre-conditions does not hold, the slow-path will
                 * show a report which pre-condition failed.
                 */
                if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
                        return true;
                ret = false;
        }

        ret &= __list_add_valid_or_report(new, prev, next);
        return ret;
}

/*
 * Performs the full set of list corruption checks before __list_del_entry().
 * On list corruption reports a warning, and returns false.
 */
extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);

/*
 * Performs list corruption checks before __list_del_entry(). Returns false if a
 * corruption is detected, true otherwise.
 *
 * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
 * inline to catch non-faulting corruptions, and only if a corruption is
 * detected calls the reporting function __list_del_entry_valid_or_report().
 */
static __always_inline bool __list_del_entry_valid(struct list_head *entry)
{
        bool ret = true;

        if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
                struct list_head *prev = entry->prev;
                struct list_head *next = entry->next;

                /*
                 * With the hardening version, elide checking if next and prev
                 * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
                 * dereference of them below would result in a fault.
                 */
                if (likely(prev->next == entry && next->prev == entry))
                        return true;
                ret = false;
        }

        ret &= __list_del_entry_valid_or_report(entry);
        return ret;
}
#else
static inline bool __list_add_valid(struct list_head *new,
                                struct list_head *prev,
                                struct list_head *next)
{
        return true;
}
static inline bool __list_del_entry_valid(struct list_head *entry)
{
        return true;
}
#endif

/*
 * Insert a new entry between two known consecutive entries.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
{
        if (!__list_add_valid(new, prev, next))
                return;

        next->prev = new;
        new->next = next;
        new->prev = prev;
        WRITE_ONCE(prev->next, new);
}

/**
 * list_add - add a new entry
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
static inline void list_add(struct list_head *new, struct list_head *head)
{
        __list_add(new, head, head->next);
}


/**
 * list_add_tail - add a new entry
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 */
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
        __list_add(new, head->prev, head);
}

/*
 * Delete a list entry by making the prev/next entries
 * point to each other.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
        next->prev = prev;
        WRITE_ONCE(prev->next, next);
}

/*
 * Delete a list entry and clear the 'prev' pointer.
 *
 * This is a special-purpose list clearing method used in the networking code
 * for lists allocated as per-cpu, where we don't want to incur the extra
 * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this
 * needs to check the node 'prev' pointer instead of calling list_empty().
 */
static inline void __list_del_clearprev(struct list_head *entry)
{
        __list_del(entry->prev, entry->next);
        entry->prev = NULL;
}

static inline void __list_del_entry(struct list_head *entry)
{
        if (!__list_del_entry_valid(entry))
                return;

        __list_del(entry->prev, entry->next);
}

/**
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty() on entry does not return true after this, the entry is
 * in an undefined state.
 */
static inline void list_del(struct list_head *entry)
{
        __list_del_entry(entry);
        entry->next = LIST_POISON1;
        entry->prev = LIST_POISON2;
}

/**
 * list_replace - replace old entry by new one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * If @old was empty, it will be overwritten.
 */
static inline void list_replace(struct list_head *old,
                                struct list_head *new)
{
        new->next = old->next;
        new->next->prev = new;
        new->prev = old->prev;
        new->prev->next = new;
}

/**
 * list_replace_init - replace old entry by new one and initialize the old one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * If @old was empty, it will be overwritten.
 */
static inline void list_replace_init(struct list_head *old,
                                     struct list_head *new)
{
        list_replace(old, new);
        INIT_LIST_HEAD(old);
}

/**
 * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position
 * @entry1: the location to place entry2
 * @entry2: the location to place entry1
 */
static inline void list_swap(struct list_head *entry1,
                             struct list_head *entry2)
{
        struct list_head *pos = entry2->prev;

        list_del(entry2);
        list_replace(entry1, entry2);
        if (pos == entry1)
                pos = entry2;
        list_add(entry1, pos);
}

/**
 * list_del_init - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 */
static inline void list_del_init(struct list_head *entry)
{
        __list_del_entry(entry);
        INIT_LIST_HEAD(entry);
}

/**
 * list_move - delete from one list and add as another's head
 * @list: the entry to move
 * @head: the head that will precede our entry
 */
static inline void list_move(struct list_head *list, struct list_head *head)
{
        __list_del_entry(list);
        list_add(list, head);
}

/**
 * list_move_tail - delete from one list and add as another's tail
 * @list: the entry to move
 * @head: the head that will follow our entry
 */
static inline void list_move_tail(struct list_head *list,
                                  struct list_head *head)
{
        __list_del_entry(list);
        list_add_tail(list, head);
}

/**
 * list_bulk_move_tail - move a subsection of a list to its tail
 * @head: the head that will follow our entry
 * @first: first entry to move
 * @last: last entry to move, can be the same as first
 *
 * Move all entries between @first and including @last before @head.
 * All three entries must belong to the same linked list.
 */
static inline void list_bulk_move_tail(struct list_head *head,
                                       struct list_head *first,
                                       struct list_head *last)
{
        first->prev->next = last->next;
        last->next->prev = first->prev;

        head->prev->next = first;
        first->prev = head->prev;

        last->next = head;
        head->prev = last;
}

/**
 * list_is_first -- tests whether @list is the first entry in list @head
 * @list: the entry to test
 * @head: the head of the list
 */
static inline int list_is_first(const struct list_head *list, const struct list_head *head)
{
        return list->prev == head;
}

/**
 * list_is_last - tests whether @list is the last entry in list @head
 * @list: the entry to test
 * @head: the head of the list
 */
static inline int list_is_last(const struct list_head *list, const struct list_head *head)
{
        return list->next == head;
}

/**
 * list_is_head - tests whether @list is the list @head
 * @list: the entry to test
 * @head: the head of the list
 */
static inline int list_is_head(const struct list_head *list, const struct list_head *head)
{
        return list == head;
}

/**
 * list_empty - tests whether a list is empty
 * @head: the list to test.
 */
static inline int list_empty(const struct list_head *head)
{
        return READ_ONCE(head->next) == head;
}

/**
 * list_del_init_careful - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 *
 * This is the same as list_del_init(), except designed to be used
 * together with list_empty_careful() in a way to guarantee ordering
 * of other memory operations.
 *
 * Any memory operations done before a list_del_init_careful() are
 * guaranteed to be visible after a list_empty_careful() test.
 */
static inline void list_del_init_careful(struct list_head *entry)
{
        __list_del_entry(entry);
        WRITE_ONCE(entry->prev, entry);
        smp_store_release(&entry->next, entry);
}

/**
 * list_empty_careful - tests whether a list is empty and not being modified
 * @head: the list to test
 *
 * Description:
 * tests whether a list is empty _and_ checks that no other CPU might be
 * in the process of modifying either member (next or prev)
 *
 * NOTE: using list_empty_careful() without synchronization
 * can only be safe if the only activity that can happen
 * to the list entry is list_del_init(). Eg. it cannot be used
 * if another CPU could re-list_add() it.
 */
static inline int list_empty_careful(const struct list_head *head)
{
        struct list_head *next = smp_load_acquire(&head->next);
        return list_is_head(next, head) && (next == READ_ONCE(head->prev));
}

/**
 * list_rotate_left - rotate the list to the left
 * @head: the head of the list
 */
static inline void list_rotate_left(struct list_head *head)
{
        struct list_head *first;

        if (!list_empty(head)) {
                first = head->next;
                list_move_tail(first, head);
        }
}

/**
 * list_rotate_to_front() - Rotate list to specific item.
 * @list: The desired new front of the list.
 * @head: The head of the list.
 *
 * Rotates list so that @list becomes the new front of the list.
 */
static inline void list_rotate_to_front(struct list_head *list,
                                        struct list_head *head)
{
        /*
         * Deletes the list head from the list denoted by @head and
         * places it as the tail of @list, this effectively rotates the
         * list so that @list is at the front.
         */
        list_move_tail(head, list);
}

/**
 * list_is_singular - tests whether a list has just one entry.
 * @head: the list to test.
 */
static inline int list_is_singular(const struct list_head *head)
{
        return !list_empty(head) && (head->next == head->prev);
}

static inline void __list_cut_position(struct list_head *list,
                struct list_head *head, struct list_head *entry)
{
        struct list_head *new_first = entry->next;
        list->next = head->next;
        list->next->prev = list;
        list->prev = entry;
        entry->next = list;
        head->next = new_first;
        new_first->prev = head;
}

/**
 * list_cut_position - cut a list into two
 * @list: a new list to add all removed entries
 * @head: a list with entries
 * @entry: an entry within head, could be the head itself
 *        and if so we won't cut the list
 *
 * This helper moves the initial part of @head, up to and
 * including @entry, from @head to @list. You should
 * pass on @entry an element you know is on @head. @list
 * should be an empty list or a list you do not care about
 * losing its data.
 *
 */
static inline void list_cut_position(struct list_head *list,
                struct list_head *head, struct list_head *entry)
{
        if (list_empty(head))
                return;
        if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
                return;
        if (list_is_head(entry, head))
                INIT_LIST_HEAD(list);
        else
                __list_cut_position(list, head, entry);
}

/**
 * list_cut_before - cut a list into two, before given entry
 * @list: a new list to add all removed entries
 * @head: a list with entries
 * @entry: an entry within head, could be the head itself
 *
 * This helper moves the initial part of @head, up to but
 * excluding @entry, from @head to @list.  You should pass
 * in @entry an element you know is on @head.  @list should
 * be an empty list or a list you do not care about losing
 * its data.
 * If @entry == @head, all entries on @head are moved to
 * @list.
 */
static inline void list_cut_before(struct list_head *list,
                                   struct list_head *head,
                                   struct list_head *entry)
{
        if (head->next == entry) {
                INIT_LIST_HEAD(list);
                return;
        }
        list->next = head->next;
        list->next->prev = list;
        list->prev = entry->prev;
        list->prev->next = list;
        head->next = entry;
        entry->prev = head;
}

static inline void __list_splice(const struct list_head *list,
                                 struct list_head *prev,
                                 struct list_head *next)
{
        struct list_head *first = list->next;
        struct list_head *last = list->prev;

        first->prev = prev;
        prev->next = first;

        last->next = next;
        next->prev = last;
}

/**
 * list_splice - join two lists, this is designed for stacks
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 */
static inline void list_splice(const struct list_head *list,
                                struct list_head *head)
{
        if (!list_empty(list))
                __list_splice(list, head, head->next);
}

/**
 * list_splice_tail - join two lists, each list being a queue
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 */
static inline void list_splice_tail(struct list_head *list,
                                struct list_head *head)
{
        if (!list_empty(list))
                __list_splice(list, head->prev, head);
}

/**
 * list_splice_init - join two lists and reinitialise the emptied list.
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 *
 * The list at @list is reinitialised
 */
static inline void list_splice_init(struct list_head *list,
                                    struct list_head *head)
{
        if (!list_empty(list)) {
                __list_splice(list, head, head->next);
                INIT_LIST_HEAD(list);
        }
}

/**
 * list_splice_tail_init - join two lists and reinitialise the emptied list
 * @list: the new list to add.
 * @head: the place to add it in the first list.
 *
 * Each of the lists is a queue.
 * The list at @list is reinitialised
 */
static inline void list_splice_tail_init(struct list_head *list,
                                         struct list_head *head)
{
        if (!list_empty(list)) {
                __list_splice(list, head->prev, head);
                INIT_LIST_HEAD(list);
        }
}

/**
 * list_entry - get the struct for this entry
 * @ptr:        the &struct list_head pointer.
 * @type:        the type of the struct this is embedded in.
 * @member:        the name of the list_head within the struct.
 */
#define list_entry(ptr, type, member) \
        container_of(ptr, type, member)

/**
 * list_first_entry - get the first element from a list
 * @ptr:        the list head to take the element from.
 * @type:        the type of the struct this is embedded in.
 * @member:        the name of the list_head within the struct.
 *
 * Note, that list is expected to be not empty.
 */
#define list_first_entry(ptr, type, member) \
        list_entry((ptr)->next, type, member)

/**
 * list_last_entry - get the last element from a list
 * @ptr:        the list head to take the element from.
 * @type:        the type of the struct this is embedded in.
 * @member:        the name of the list_head within the struct.
 *
 * Note, that list is expected to be not empty.
 */
#define list_last_entry(ptr, type, member) \
        list_entry((ptr)->prev, type, member)

/**
 * list_first_entry_or_null - get the first element from a list
 * @ptr:        the list head to take the element from.
 * @type:        the type of the struct this is embedded in.
 * @member:        the name of the list_head within the struct.
 *
 * Note that if the list is empty, it returns NULL.
 */
#define list_first_entry_or_null(ptr, type, member) ({ \
        struct list_head *head__ = (ptr); \
        struct list_head *pos__ = READ_ONCE(head__->next); \
        pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
})

/**
 * list_next_entry - get the next element in list
 * @pos:        the type * to cursor
 * @member:        the name of the list_head within the struct.
 */
#define list_next_entry(pos, member) \
        list_entry((pos)->member.next, typeof(*(pos)), member)

/**
 * list_next_entry_circular - get the next element in list
 * @pos:        the type * to cursor.
 * @head:        the list head to take the element from.
 * @member:        the name of the list_head within the struct.
 *
 * Wraparound if pos is the last element (return the first element).
 * Note, that list is expected to be not empty.
 */
#define list_next_entry_circular(pos, head, member) \
        (list_is_last(&(pos)->member, head) ? \
        list_first_entry(head, typeof(*(pos)), member) : list_next_entry(pos, member))

/**
 * list_prev_entry - get the prev element in list
 * @pos:        the type * to cursor
 * @member:        the name of the list_head within the struct.
 */
#define list_prev_entry(pos, member) \
        list_entry((pos)->member.prev, typeof(*(pos)), member)

/**
 * list_prev_entry_circular - get the prev element in list
 * @pos:        the type * to cursor.
 * @head:        the list head to take the element from.
 * @member:        the name of the list_head within the struct.
 *
 * Wraparound if pos is the first element (return the last element).
 * Note, that list is expected to be not empty.
 */
#define list_prev_entry_circular(pos, head, member) \
        (list_is_first(&(pos)->member, head) ? \
        list_last_entry(head, typeof(*(pos)), member) : list_prev_entry(pos, member))

/**
 * list_for_each        -        iterate over a list
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:        the head for your list.
 */
#define list_for_each(pos, head) \
        for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)

/**
 * list_for_each_reverse - iterate backwards over a list
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:        the head for your list.
 */
#define list_for_each_reverse(pos, head) \
        for (pos = (head)->prev; pos != (head); pos = pos->prev)

/**
 * list_for_each_rcu - Iterate over a list in an RCU-safe fashion
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:        the head for your list.
 */
#define list_for_each_rcu(pos, head)                  \
        for (pos = rcu_dereference((head)->next); \
             !list_is_head(pos, (head)); \
             pos = rcu_dereference(pos->next))

/**
 * list_for_each_continue - continue iteration over a list
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:        the head for your list.
 *
 * Continue to iterate over a list, continuing after the current position.
 */
#define list_for_each_continue(pos, head) \
        for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)

/**
 * list_for_each_prev        -        iterate over a list backwards
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:        the head for your list.
 */
#define list_for_each_prev(pos, head) \
        for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)

/**
 * list_for_each_safe - iterate over a list safe against removal of list entry
 * @pos:        the &struct list_head to use as a loop cursor.
 * @n:                another &struct list_head to use as temporary storage
 * @head:        the head for your list.
 */
#define list_for_each_safe(pos, n, head) \
        for (pos = (head)->next, n = pos->next; \
             !list_is_head(pos, (head)); \
             pos = n, n = pos->next)

/**
 * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
 * @pos:        the &struct list_head to use as a loop cursor.
 * @n:                another &struct list_head to use as temporary storage
 * @head:        the head for your list.
 */
#define list_for_each_prev_safe(pos, n, head) \
        for (pos = (head)->prev, n = pos->prev; \
             !list_is_head(pos, (head)); \
             pos = n, n = pos->prev)

/**
 * list_count_nodes - count nodes in the list
 * @head:        the head for your list.
 */
static inline size_t list_count_nodes(struct list_head *head)
{
        struct list_head *pos;
        size_t count = 0;

        list_for_each(pos, head)
                count++;

        return count;
}

/**
 * list_entry_is_head - test if the entry points to the head of the list
 * @pos:        the type * to cursor
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 */
#define list_entry_is_head(pos, head, member)                                \
        list_is_head(&pos->member, (head))

/**
 * list_for_each_entry        -        iterate over list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 */
#define list_for_each_entry(pos, head, member)                                \
        for (pos = list_first_entry(head, typeof(*pos), member);        \
             !list_entry_is_head(pos, head, member);                        \
             pos = list_next_entry(pos, member))

/**
 * list_for_each_entry_reverse - iterate backwards over list of given type.
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 */
#define list_for_each_entry_reverse(pos, head, member)                        \
        for (pos = list_last_entry(head, typeof(*pos), member);                \
             !list_entry_is_head(pos, head, member);                         \
             pos = list_prev_entry(pos, member))

/**
 * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
 * @pos:        the type * to use as a start point
 * @head:        the head of the list
 * @member:        the name of the list_head within the struct.
 *
 * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
 */
#define list_prepare_entry(pos, head, member) \
        ((pos) ? : list_entry(head, typeof(*pos), member))

/**
 * list_for_each_entry_continue - continue iteration over list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Continue to iterate over list of given type, continuing after
 * the current position.
 */
#define list_for_each_entry_continue(pos, head, member)                 \
        for (pos = list_next_entry(pos, member);                        \
             !list_entry_is_head(pos, head, member);                        \
             pos = list_next_entry(pos, member))

/**
 * list_for_each_entry_continue_reverse - iterate backwards from the given point
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Start to iterate over list of given type backwards, continuing after
 * the current position.
 */
#define list_for_each_entry_continue_reverse(pos, head, member)                \
        for (pos = list_prev_entry(pos, member);                        \
             !list_entry_is_head(pos, head, member);                        \
             pos = list_prev_entry(pos, member))

/**
 * list_for_each_entry_from - iterate over list of given type from the current point
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Iterate over list of given type, continuing from current position.
 */
#define list_for_each_entry_from(pos, head, member)                         \
        for (; !list_entry_is_head(pos, head, member);                        \
             pos = list_next_entry(pos, member))

/**
 * list_for_each_entry_from_reverse - iterate backwards over list of given type
 *                                    from the current point
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Iterate backwards over list of given type, continuing from current position.
 */
#define list_for_each_entry_from_reverse(pos, head, member)                \
        for (; !list_entry_is_head(pos, head, member);                        \
             pos = list_prev_entry(pos, member))

/**
 * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @pos:        the type * to use as a loop cursor.
 * @n:                another type * to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 */
#define list_for_each_entry_safe(pos, n, head, member)                        \
        for (pos = list_first_entry(head, typeof(*pos), member),        \
                n = list_next_entry(pos, member);                        \
             !list_entry_is_head(pos, head, member);                         \
             pos = n, n = list_next_entry(n, member))

/**
 * list_for_each_entry_safe_continue - continue list iteration safe against removal
 * @pos:        the type * to use as a loop cursor.
 * @n:                another type * to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Iterate over list of given type, continuing after current point,
 * safe against removal of list entry.
 */
#define list_for_each_entry_safe_continue(pos, n, head, member)                 \
        for (pos = list_next_entry(pos, member),                                 \
                n = list_next_entry(pos, member);                                \
             !list_entry_is_head(pos, head, member);                                \
             pos = n, n = list_next_entry(n, member))

/**
 * list_for_each_entry_safe_from - iterate over list from current point safe against removal
 * @pos:        the type * to use as a loop cursor.
 * @n:                another type * to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Iterate over list of given type from current point, safe against
 * removal of list entry.
 */
#define list_for_each_entry_safe_from(pos, n, head, member)                         \
        for (n = list_next_entry(pos, member);                                        \
             !list_entry_is_head(pos, head, member);                                \
             pos = n, n = list_next_entry(n, member))

/**
 * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
 * @pos:        the type * to use as a loop cursor.
 * @n:                another type * to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Iterate backwards over list of given type, safe against removal
 * of list entry.
 */
#define list_for_each_entry_safe_reverse(pos, n, head, member)                \
        for (pos = list_last_entry(head, typeof(*pos), member),                \
                n = list_prev_entry(pos, member);                        \
             !list_entry_is_head(pos, head, member);                         \
             pos = n, n = list_prev_entry(n, member))

/**
 * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
 * @pos:        the loop cursor used in the list_for_each_entry_safe loop
 * @n:                temporary storage used in list_for_each_entry_safe
 * @member:        the name of the list_head within the struct.
 *
 * list_safe_reset_next is not safe to use in general if the list may be
 * modified concurrently (eg. the lock is dropped in the loop body). An
 * exception to this is if the cursor element (pos) is pinned in the list,
 * and list_safe_reset_next is called after re-taking the lock and before
 * completing the current iteration of the loop body.
 */
#define list_safe_reset_next(pos, n, member)                                \
        n = list_next_entry(pos, member)

/*
 * Double linked lists with a single pointer list head.
 * Mostly useful for hash tables where the two pointer list head is
 * too wasteful.
 * You lose the ability to access the tail in O(1).
 */

#define HLIST_HEAD_INIT { .first = NULL }
#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
static inline void INIT_HLIST_NODE(struct hlist_node *h)
{
        h->next = NULL;
        h->pprev = NULL;
}

/**
 * hlist_unhashed - Has node been removed from list and reinitialized?
 * @h: Node to be checked
 *
 * Not that not all removal functions will leave a node in unhashed
 * state.  For example, hlist_nulls_del_init_rcu() does leave the
 * node in unhashed state, but hlist_nulls_del() does not.
 */
static inline int hlist_unhashed(const struct hlist_node *h)
{
        return !h->pprev;
}

/**
 * hlist_unhashed_lockless - Version of hlist_unhashed for lockless use
 * @h: Node to be checked
 *
 * This variant of hlist_unhashed() must be used in lockless contexts
 * to avoid potential load-tearing.  The READ_ONCE() is paired with the
 * various WRITE_ONCE() in hlist helpers that are defined below.
 */
static inline int hlist_unhashed_lockless(const struct hlist_node *h)
{
        return !READ_ONCE(h->pprev);
}

/**
 * hlist_empty - Is the specified hlist_head structure an empty hlist?
 * @h: Structure to check.
 */
static inline int hlist_empty(const struct hlist_head *h)
{
        return !READ_ONCE(h->first);
}

static inline void __hlist_del(struct hlist_node *n)
{
        struct hlist_node *next = n->next;
        struct hlist_node **pprev = n->pprev;

        WRITE_ONCE(*pprev, next);
        if (next)
                WRITE_ONCE(next->pprev, pprev);
}

/**
 * hlist_del - Delete the specified hlist_node from its list
 * @n: Node to delete.
 *
 * Note that this function leaves the node in hashed state.  Use
 * hlist_del_init() or similar instead to unhash @n.
 */
static inline void hlist_del(struct hlist_node *n)
{
        __hlist_del(n);
        n->next = LIST_POISON1;
        n->pprev = LIST_POISON2;
}

/**
 * hlist_del_init - Delete the specified hlist_node from its list and initialize
 * @n: Node to delete.
 *
 * Note that this function leaves the node in unhashed state.
 */
static inline void hlist_del_init(struct hlist_node *n)
{
        if (!hlist_unhashed(n)) {
                __hlist_del(n);
                INIT_HLIST_NODE(n);
        }
}

/**
 * hlist_add_head - add a new entry at the beginning of the hlist
 * @n: new entry to be added
 * @h: hlist head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
{
        struct hlist_node *first = h->first;
        WRITE_ONCE(n->next, first);
        if (first)
                WRITE_ONCE(first->pprev, &n->next);
        WRITE_ONCE(h->first, n);
        WRITE_ONCE(n->pprev, &h->first);
}

/**
 * hlist_add_before - add a new entry before the one specified
 * @n: new entry to be added
 * @next: hlist node to add it before, which must be non-NULL
 */
static inline void hlist_add_before(struct hlist_node *n,
                                    struct hlist_node *next)
{
        WRITE_ONCE(n->pprev, next->pprev);
        WRITE_ONCE(n->next, next);
        WRITE_ONCE(next->pprev, &n->next);
        WRITE_ONCE(*(n->pprev), n);
}

/**
 * hlist_add_behind - add a new entry after the one specified
 * @n: new entry to be added
 * @prev: hlist node to add it after, which must be non-NULL
 */
static inline void hlist_add_behind(struct hlist_node *n,
                                    struct hlist_node *prev)
{
        WRITE_ONCE(n->next, prev->next);
        WRITE_ONCE(prev->next, n);
        WRITE_ONCE(n->pprev, &prev->next);

        if (n->next)
                WRITE_ONCE(n->next->pprev, &n->next);
}

/**
 * hlist_add_fake - create a fake hlist consisting of a single headless node
 * @n: Node to make a fake list out of
 *
 * This makes @n appear to be its own predecessor on a headless hlist.
 * The point of this is to allow things like hlist_del() to work correctly
 * in cases where there is no list.
 */
static inline void hlist_add_fake(struct hlist_node *n)
{
        n->pprev = &n->next;
}

/**
 * hlist_fake: Is this node a fake hlist?
 * @h: Node to check for being a self-referential fake hlist.
 */
static inline bool hlist_fake(struct hlist_node *h)
{
        return h->pprev == &h->next;
}

/**
 * hlist_is_singular_node - is node the only element of the specified hlist?
 * @n: Node to check for singularity.
 * @h: Header for potentially singular list.
 *
 * Check whether the node is the only node of the head without
 * accessing head, thus avoiding unnecessary cache misses.
 */
static inline bool
hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h)
{
        return !n->next && n->pprev == &h->first;
}

/**
 * hlist_move_list - Move an hlist
 * @old: hlist_head for old list.
 * @new: hlist_head for new list.
 *
 * Move a list from one list head to another. Fixup the pprev
 * reference of the first entry if it exists.
 */
static inline void hlist_move_list(struct hlist_head *old,
                                   struct hlist_head *new)
{
        new->first = old->first;
        if (new->first)
                new->first->pprev = &new->first;
        old->first = NULL;
}

/**
 * hlist_splice_init() - move all entries from one list to another
 * @from: hlist_head from which entries will be moved
 * @last: last entry on the @from list
 * @to:   hlist_head to which entries will be moved
 *
 * @to can be empty, @from must contain at least @last.
 */
static inline void hlist_splice_init(struct hlist_head *from,
                                     struct hlist_node *last,
                                     struct hlist_head *to)
{
        if (to->first)
                to->first->pprev = &last->next;
        last->next = to->first;
        to->first = from->first;
        from->first->pprev = &to->first;
        from->first = NULL;
}

#define hlist_entry(ptr, type, member) container_of(ptr,type,member)

#define hlist_for_each(pos, head) \
        for (pos = (head)->first; pos ; pos = pos->next)

#define hlist_for_each_safe(pos, n, head) \
        for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
             pos = n)

#define hlist_entry_safe(ptr, type, member) \
        ({ typeof(ptr) ____ptr = (ptr); \
           ____ptr ? hlist_entry(____ptr, type, member) : NULL; \
        })

/**
 * hlist_for_each_entry        - iterate over list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry(pos, head, member)                                \
        for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\
             pos;                                                        \
             pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))

/**
 * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
 * @pos:        the type * to use as a loop cursor.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_continue(pos, member)                        \
        for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\
             pos;                                                        \
             pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))

/**
 * hlist_for_each_entry_from - iterate over a hlist continuing from current point
 * @pos:        the type * to use as a loop cursor.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_from(pos, member)                                \
        for (; pos;                                                        \
             pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))

/**
 * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @pos:        the type * to use as a loop cursor.
 * @n:                a &struct hlist_node to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_safe(pos, n, head, member)                 \
        for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\
             pos && ({ n = pos->member.next; 1; });                        \
             pos = hlist_entry_safe(n, typeof(*pos), member))

/**
 * hlist_count_nodes - count nodes in the hlist
 * @head:        the head for your hlist.
 */
static inline size_t hlist_count_nodes(struct hlist_head *head)
{
        struct hlist_node *pos;
        size_t count = 0;

        hlist_for_each(pos, head)
                count++;

        return count;
}

#endif











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 



    9 




































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
// SPDX-License-Identifier: GPL-2.0+
/*
 * User-space Probes (UProbes)
 *
 * Copyright (C) IBM Corporation, 2008-2012
 * Authors:
 *        Srikar Dronamraju
 *        Jim Keniston
 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
 */

#include <linux/kernel.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>        /* read_mapping_page */
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/export.h>
#include <linux/rmap.h>                /* anon_vma_prepare */
#include <linux/mmu_notifier.h>        /* set_pte_at_notify */
#include <linux/swap.h>                /* folio_free_swap */
#include <linux/ptrace.h>        /* user_enable_single_step */
#include <linux/kdebug.h>        /* notifier mechanism */
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>

#include <linux/uprobes.h>

#define UINSNS_PER_PAGE                        (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
#define MAX_UPROBE_XOL_SLOTS                UINSNS_PER_PAGE

static struct rb_root uprobes_tree = RB_ROOT;
/*
 * allows us to skip the uprobe_mmap if there are no uprobe events active
 * at this time.  Probably a fine grained per inode count is better?
 */
#define no_uprobe_events()        RB_EMPTY_ROOT(&uprobes_tree)

static DEFINE_SPINLOCK(uprobes_treelock);        /* serialize rbtree access */

#define UPROBES_HASH_SZ        13
/* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v)        (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])

DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);

/* Have a copy of original instruction */
#define UPROBE_COPY_INSN        0

struct uprobe {
        struct rb_node                rb_node;        /* node in the rb tree */
        refcount_t                ref;
        struct rw_semaphore        register_rwsem;
        struct rw_semaphore        consumer_rwsem;
        struct list_head        pending_list;
        struct uprobe_consumer        *consumers;
        struct inode                *inode;                /* Also hold a ref to inode */
        loff_t                        offset;
        loff_t                        ref_ctr_offset;
        unsigned long                flags;

        /*
         * The generic code assumes that it has two members of unknown type
         * owned by the arch-specific code:
         *
         *         insn -        copy_insn() saves the original instruction here for
         *                arch_uprobe_analyze_insn().
         *
         *        ixol -        potentially modified instruction to execute out of
         *                line, copied to xol_area by xol_get_insn_slot().
         */
        struct arch_uprobe        arch;
};

struct delayed_uprobe {
        struct list_head list;
        struct uprobe *uprobe;
        struct mm_struct *mm;
};

static DEFINE_MUTEX(delayed_uprobe_lock);
static LIST_HEAD(delayed_uprobe_list);

/*
 * Execute out of line area: anonymous executable mapping installed
 * by the probed task to execute the copy of the original instruction
 * mangled by set_swbp().
 *
 * On a breakpoint hit, thread contests for a slot.  It frees the
 * slot after singlestep. Currently a fixed number of slots are
 * allocated.
 */
struct xol_area {
        wait_queue_head_t                 wq;                /* if all slots are busy */
        atomic_t                         slot_count;        /* number of in-use slots */
        unsigned long                         *bitmap;        /* 0 = free slot */

        struct vm_special_mapping        xol_mapping;
        struct page                         *pages[2];
        /*
         * We keep the vma's vm_start rather than a pointer to the vma
         * itself.  The probed process or a naughty kernel module could make
         * the vma go away, and we must handle that reasonably gracefully.
         */
        unsigned long                         vaddr;                /* Page(s) of instruction slots */
};

/*
 * valid_vma: Verify if the specified vma is an executable vma
 * Relax restrictions while unregistering: vm_flags might have
 * changed after breakpoint was inserted.
 *        - is_register: indicates if we are in register context.
 *        - Return 1 if the specified virtual address is in an
 *          executable vma.
 */
static bool valid_vma(struct vm_area_struct *vma, bool is_register)
{
        vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;

        if (is_register)
                flags |= VM_WRITE;

        return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;
}

static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
{
        return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
}

static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
{
        return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
}

/**
 * __replace_page - replace page in vma by new page.
 * based on replace_page in mm/ksm.c
 *
 * @vma:      vma that holds the pte pointing to page
 * @addr:     address the old @page is mapped at
 * @old_page: the page we are replacing by new_page
 * @new_page: the modified page we replace page by
 *
 * If @new_page is NULL, only unmap @old_page.
 *
 * Returns 0 on success, negative error code otherwise.
 */
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                                struct page *old_page, struct page *new_page)
{
        struct folio *old_folio = page_folio(old_page);
        struct folio *new_folio;
        struct mm_struct *mm = vma->vm_mm;
        DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0);
        int err;
        struct mmu_notifier_range range;

        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr,
                                addr + PAGE_SIZE);

        if (new_page) {
                new_folio = page_folio(new_page);
                err = mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL);
                if (err)
                        return err;
        }

        /* For folio_free_swap() below */
        folio_lock(old_folio);

        mmu_notifier_invalidate_range_start(&range);
        err = -EAGAIN;
        if (!page_vma_mapped_walk(&pvmw))
                goto unlock;
        VM_BUG_ON_PAGE(addr != pvmw.address, old_page);

        if (new_page) {
                folio_get(new_folio);
                folio_add_new_anon_rmap(new_folio, vma, addr);
                folio_add_lru_vma(new_folio, vma);
        } else
                /* no new page, just dec_mm_counter for old_page */
                dec_mm_counter(mm, MM_ANONPAGES);

        if (!folio_test_anon(old_folio)) {
                dec_mm_counter(mm, mm_counter_file(old_folio));
                inc_mm_counter(mm, MM_ANONPAGES);
        }

        flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
        ptep_clear_flush(vma, addr, pvmw.pte);
        if (new_page)
                set_pte_at_notify(mm, addr, pvmw.pte,
                                  mk_pte(new_page, vma->vm_page_prot));

        folio_remove_rmap_pte(old_folio, old_page, vma);
        if (!folio_mapped(old_folio))
                folio_free_swap(old_folio);
        page_vma_mapped_walk_done(&pvmw);
        folio_put(old_folio);

        err = 0;
 unlock:
        mmu_notifier_invalidate_range_end(&range);
        folio_unlock(old_folio);
        return err;
}

/**
 * is_swbp_insn - check if instruction is breakpoint instruction.
 * @insn: instruction to be checked.
 * Default implementation of is_swbp_insn
 * Returns true if @insn is a breakpoint instruction.
 */
bool __weak is_swbp_insn(uprobe_opcode_t *insn)
{
        return *insn == UPROBE_SWBP_INSN;
}

/**
 * is_trap_insn - check if instruction is breakpoint instruction.
 * @insn: instruction to be checked.
 * Default implementation of is_trap_insn
 * Returns true if @insn is a breakpoint instruction.
 *
 * This function is needed for the case where an architecture has multiple
 * trap instructions (like powerpc).
 */
bool __weak is_trap_insn(uprobe_opcode_t *insn)
{
        return is_swbp_insn(insn);
}

static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
{
        void *kaddr = kmap_atomic(page);
        memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
        kunmap_atomic(kaddr);
}

static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
{
        void *kaddr = kmap_atomic(page);
        memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
        kunmap_atomic(kaddr);
}

static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
{
        uprobe_opcode_t old_opcode;
        bool is_swbp;

        /*
         * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
         * We do not check if it is any other 'trap variant' which could
         * be conditional trap instruction such as the one powerpc supports.
         *
         * The logic is that we do not care if the underlying instruction
         * is a trap variant; uprobes always wins over any other (gdb)
         * breakpoint.
         */
        copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
        is_swbp = is_swbp_insn(&old_opcode);

        if (is_swbp_insn(new_opcode)) {
                if (is_swbp)                /* register: already installed? */
                        return 0;
        } else {
                if (!is_swbp)                /* unregister: was it changed by us? */
                        return 0;
        }

        return 1;
}

static struct delayed_uprobe *
delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
{
        struct delayed_uprobe *du;

        list_for_each_entry(du, &delayed_uprobe_list, list)
                if (du->uprobe == uprobe && du->mm == mm)
                        return du;
        return NULL;
}

static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
{
        struct delayed_uprobe *du;

        if (delayed_uprobe_check(uprobe, mm))
                return 0;

        du  = kzalloc(sizeof(*du), GFP_KERNEL);
        if (!du)
                return -ENOMEM;

        du->uprobe = uprobe;
        du->mm = mm;
        list_add(&du->list, &delayed_uprobe_list);
        return 0;
}

static void delayed_uprobe_delete(struct delayed_uprobe *du)
{
        if (WARN_ON(!du))
                return;
        list_del(&du->list);
        kfree(du);
}

static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
{
        struct list_head *pos, *q;
        struct delayed_uprobe *du;

        if (!uprobe && !mm)
                return;

        list_for_each_safe(pos, q, &delayed_uprobe_list) {
                du = list_entry(pos, struct delayed_uprobe, list);

                if (uprobe && du->uprobe != uprobe)
                        continue;
                if (mm && du->mm != mm)
                        continue;

                delayed_uprobe_delete(du);
        }
}

static bool valid_ref_ctr_vma(struct uprobe *uprobe,
                              struct vm_area_struct *vma)
{
        unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);

        return uprobe->ref_ctr_offset &&
                vma->vm_file &&
                file_inode(vma->vm_file) == uprobe->inode &&
                (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
                vma->vm_start <= vaddr &&
                vma->vm_end > vaddr;
}

static struct vm_area_struct *
find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
{
        VMA_ITERATOR(vmi, mm, 0);
        struct vm_area_struct *tmp;

        for_each_vma(vmi, tmp)
                if (valid_ref_ctr_vma(uprobe, tmp))
                        return tmp;

        return NULL;
}

static int
__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
{
        void *kaddr;
        struct page *page;
        int ret;
        short *ptr;

        if (!vaddr || !d)
                return -EINVAL;

        ret = get_user_pages_remote(mm, vaddr, 1,
                                    FOLL_WRITE, &page, NULL);
        if (unlikely(ret <= 0)) {
                /*
                 * We are asking for 1 page. If get_user_pages_remote() fails,
                 * it may return 0, in that case we have to return error.
                 */
                return ret == 0 ? -EBUSY : ret;
        }

        kaddr = kmap_atomic(page);
        ptr = kaddr + (vaddr & ~PAGE_MASK);

        if (unlikely(*ptr + d < 0)) {
                pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
                        "curr val: %d, delta: %d\n", vaddr, *ptr, d);
                ret = -EINVAL;
                goto out;
        }

        *ptr += d;
        ret = 0;
out:
        kunmap_atomic(kaddr);
        put_page(page);
        return ret;
}

static void update_ref_ctr_warn(struct uprobe *uprobe,
                                struct mm_struct *mm, short d)
{
        pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
                "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
                d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
                (unsigned long long) uprobe->offset,
                (unsigned long long) uprobe->ref_ctr_offset, mm);
}

static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
                          short d)
{
        struct vm_area_struct *rc_vma;
        unsigned long rc_vaddr;
        int ret = 0;

        rc_vma = find_ref_ctr_vma(uprobe, mm);

        if (rc_vma) {
                rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
                ret = __update_ref_ctr(mm, rc_vaddr, d);
                if (ret)
                        update_ref_ctr_warn(uprobe, mm, d);

                if (d > 0)
                        return ret;
        }

        mutex_lock(&delayed_uprobe_lock);
        if (d > 0)
                ret = delayed_uprobe_add(uprobe, mm);
        else
                delayed_uprobe_remove(uprobe, mm);
        mutex_unlock(&delayed_uprobe_lock);

        return ret;
}

/*
 * NOTE:
 * Expect the breakpoint instruction to be the smallest size instruction for
 * the architecture. If an arch has variable length instruction and the
 * breakpoint instruction is not of the smallest length instruction
 * supported by that architecture then we need to modify is_trap_at_addr and
 * uprobe_write_opcode accordingly. This would never be a problem for archs
 * that have fixed length instructions.
 *
 * uprobe_write_opcode - write the opcode at a given virtual address.
 * @auprobe: arch specific probepoint information.
 * @mm: the probed process address space.
 * @vaddr: the virtual address to store the opcode.
 * @opcode: opcode to be written at @vaddr.
 *
 * Called with mm->mmap_lock held for write.
 * Return 0 (success) or a negative errno.
 */
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
                        unsigned long vaddr, uprobe_opcode_t opcode)
{
        struct uprobe *uprobe;
        struct page *old_page, *new_page;
        struct vm_area_struct *vma;
        int ret, is_register, ref_ctr_updated = 0;
        bool orig_page_huge = false;
        unsigned int gup_flags = FOLL_FORCE;

        is_register = is_swbp_insn(&opcode);
        uprobe = container_of(auprobe, struct uprobe, arch);

retry:
        if (is_register)
                gup_flags |= FOLL_SPLIT_PMD;
        /* Read the page with vaddr into memory */
        old_page = get_user_page_vma_remote(mm, vaddr, gup_flags, &vma);
        if (IS_ERR(old_page))
                return PTR_ERR(old_page);

        ret = verify_opcode(old_page, vaddr, &opcode);
        if (ret <= 0)
                goto put_old;

        if (WARN(!is_register && PageCompound(old_page),
                 "uprobe unregister should never work on compound page\n")) {
                ret = -EINVAL;
                goto put_old;
        }

        /* We are going to replace instruction, update ref_ctr. */
        if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
                ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
                if (ret)
                        goto put_old;

                ref_ctr_updated = 1;
        }

        ret = 0;
        if (!is_register && !PageAnon(old_page))
                goto put_old;

        ret = anon_vma_prepare(vma);
        if (ret)
                goto put_old;

        ret = -ENOMEM;
        new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
        if (!new_page)
                goto put_old;

        __SetPageUptodate(new_page);
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);

        if (!is_register) {
                struct page *orig_page;
                pgoff_t index;

                VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);

                index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
                orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
                                          index);

                if (orig_page) {
                        if (PageUptodate(orig_page) &&
                            pages_identical(new_page, orig_page)) {
                                /* let go new_page */
                                put_page(new_page);
                                new_page = NULL;

                                if (PageCompound(orig_page))
                                        orig_page_huge = true;
                        }
                        put_page(orig_page);
                }
        }

        ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page);
        if (new_page)
                put_page(new_page);
put_old:
        put_page(old_page);

        if (unlikely(ret == -EAGAIN))
                goto retry;

        /* Revert back reference counter if instruction update failed. */
        if (ret && is_register && ref_ctr_updated)
                update_ref_ctr(uprobe, mm, -1);

        /* try collapse pmd for compound page */
        if (!ret && orig_page_huge)
                collapse_pte_mapped_thp(mm, vaddr, false);

        return ret;
}

/**
 * set_swbp - store breakpoint at a given address.
 * @auprobe: arch specific probepoint information.
 * @mm: the probed process address space.
 * @vaddr: the virtual address to insert the opcode.
 *
 * For mm @mm, store the breakpoint instruction at @vaddr.
 * Return 0 (success) or a negative errno.
 */
int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
{
        return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
}

/**
 * set_orig_insn - Restore the original instruction.
 * @mm: the probed process address space.
 * @auprobe: arch specific probepoint information.
 * @vaddr: the virtual address to insert the opcode.
 *
 * For mm @mm, restore the original opcode (opcode) at @vaddr.
 * Return 0 (success) or a negative errno.
 */
int __weak
set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
{
        return uprobe_write_opcode(auprobe, mm, vaddr,
                        *(uprobe_opcode_t *)&auprobe->insn);
}

static struct uprobe *get_uprobe(struct uprobe *uprobe)
{
        refcount_inc(&uprobe->ref);
        return uprobe;
}

static void put_uprobe(struct uprobe *uprobe)
{
        if (refcount_dec_and_test(&uprobe->ref)) {
                /*
                 * If application munmap(exec_vma) before uprobe_unregister()
                 * gets called, we don't get a chance to remove uprobe from
                 * delayed_uprobe_list from remove_breakpoint(). Do it here.
                 */
                mutex_lock(&delayed_uprobe_lock);
                delayed_uprobe_remove(uprobe, NULL);
                mutex_unlock(&delayed_uprobe_lock);
                kfree(uprobe);
        }
}

static __always_inline
int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset,
               const struct uprobe *r)
{
        if (l_inode < r->inode)
                return -1;

        if (l_inode > r->inode)
                return 1;

        if (l_offset < r->offset)
                return -1;

        if (l_offset > r->offset)
                return 1;

        return 0;
}

#define __node_2_uprobe(node) \
        rb_entry((node), struct uprobe, rb_node)

struct __uprobe_key {
        struct inode *inode;
        loff_t offset;
};

static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b)
{
        const struct __uprobe_key *a = key;
        return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b));
}

static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
{
        struct uprobe *u = __node_2_uprobe(a);
        return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
}

static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
{
        struct __uprobe_key key = {
                .inode = inode,
                .offset = offset,
        };
        struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);

        if (node)
                return get_uprobe(__node_2_uprobe(node));

        return NULL;
}

/*
 * Find a uprobe corresponding to a given inode:offset
 * Acquires uprobes_treelock
 */
static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
{
        struct uprobe *uprobe;

        spin_lock(&uprobes_treelock);
        uprobe = __find_uprobe(inode, offset);
        spin_unlock(&uprobes_treelock);

        return uprobe;
}

static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
{
        struct rb_node *node;

        node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp);
        if (node)
                return get_uprobe(__node_2_uprobe(node));

        /* get access + creation ref */
        refcount_set(&uprobe->ref, 2);
        return NULL;
}

/*
 * Acquire uprobes_treelock.
 * Matching uprobe already exists in rbtree;
 *        increment (access refcount) and return the matching uprobe.
 *
 * No matching uprobe; insert the uprobe in rb_tree;
 *        get a double refcount (access + creation) and return NULL.
 */
static struct uprobe *insert_uprobe(struct uprobe *uprobe)
{
        struct uprobe *u;

        spin_lock(&uprobes_treelock);
        u = __insert_uprobe(uprobe);
        spin_unlock(&uprobes_treelock);

        return u;
}

static void
ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
{
        pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
                "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
                uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
                (unsigned long long) cur_uprobe->ref_ctr_offset,
                (unsigned long long) uprobe->ref_ctr_offset);
}

static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
                                   loff_t ref_ctr_offset)
{
        struct uprobe *uprobe, *cur_uprobe;

        uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
        if (!uprobe)
                return NULL;

        uprobe->inode = inode;
        uprobe->offset = offset;
        uprobe->ref_ctr_offset = ref_ctr_offset;
        init_rwsem(&uprobe->register_rwsem);
        init_rwsem(&uprobe->consumer_rwsem);

        /* add to uprobes_tree, sorted on inode:offset */
        cur_uprobe = insert_uprobe(uprobe);
        /* a uprobe exists for this inode:offset combination */
        if (cur_uprobe) {
                if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
                        ref_ctr_mismatch_warn(cur_uprobe, uprobe);
                        put_uprobe(cur_uprobe);
                        kfree(uprobe);
                        return ERR_PTR(-EINVAL);
                }
                kfree(uprobe);
                uprobe = cur_uprobe;
        }

        return uprobe;
}

static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
        down_write(&uprobe->consumer_rwsem);
        uc->next = uprobe->consumers;
        uprobe->consumers = uc;
        up_write(&uprobe->consumer_rwsem);
}

/*
 * For uprobe @uprobe, delete the consumer @uc.
 * Return true if the @uc is deleted successfully
 * or return false.
 */
static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
        struct uprobe_consumer **con;
        bool ret = false;

        down_write(&uprobe->consumer_rwsem);
        for (con = &uprobe->consumers; *con; con = &(*con)->next) {
                if (*con == uc) {
                        *con = uc->next;
                        ret = true;
                        break;
                }
        }
        up_write(&uprobe->consumer_rwsem);

        return ret;
}

static int __copy_insn(struct address_space *mapping, struct file *filp,
                        void *insn, int nbytes, loff_t offset)
{
        struct page *page;
        /*
         * Ensure that the page that has the original instruction is populated
         * and in page-cache. If ->read_folio == NULL it must be shmem_mapping(),
         * see uprobe_register().
         */
        if (mapping->a_ops->read_folio)
                page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp);
        else
                page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
        if (IS_ERR(page))
                return PTR_ERR(page);

        copy_from_page(page, offset, insn, nbytes);
        put_page(page);

        return 0;
}

static int copy_insn(struct uprobe *uprobe, struct file *filp)
{
        struct address_space *mapping = uprobe->inode->i_mapping;
        loff_t offs = uprobe->offset;
        void *insn = &uprobe->arch.insn;
        int size = sizeof(uprobe->arch.insn);
        int len, err = -EIO;

        /* Copy only available bytes, -EIO if nothing was read */
        do {
                if (offs >= i_size_read(uprobe->inode))
                        break;

                len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK));
                err = __copy_insn(mapping, filp, insn, len, offs);
                if (err)
                        break;

                insn += len;
                offs += len;
                size -= len;
        } while (size);

        return err;
}

static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
                                struct mm_struct *mm, unsigned long vaddr)
{
        int ret = 0;

        if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
                return ret;

        /* TODO: move this into _register, until then we abuse this sem. */
        down_write(&uprobe->consumer_rwsem);
        if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
                goto out;

        ret = copy_insn(uprobe, file);
        if (ret)
                goto out;

        ret = -ENOTSUPP;
        if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn))
                goto out;

        ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
        if (ret)
                goto out;

        smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */
        set_bit(UPROBE_COPY_INSN, &uprobe->flags);

 out:
        up_write(&uprobe->consumer_rwsem);

        return ret;
}

static inline bool consumer_filter(struct uprobe_consumer *uc,
                                   enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
        return !uc->filter || uc->filter(uc, ctx, mm);
}

static bool filter_chain(struct uprobe *uprobe,
                         enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
        struct uprobe_consumer *uc;
        bool ret = false;

        down_read(&uprobe->consumer_rwsem);
        for (uc = uprobe->consumers; uc; uc = uc->next) {
                ret = consumer_filter(uc, ctx, mm);
                if (ret)
                        break;
        }
        up_read(&uprobe->consumer_rwsem);

        return ret;
}

static int
install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
                        struct vm_area_struct *vma, unsigned long vaddr)
{
        bool first_uprobe;
        int ret;

        ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
        if (ret)
                return ret;

        /*
         * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
         * the task can hit this breakpoint right after __replace_page().
         */
        first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags);
        if (first_uprobe)
                set_bit(MMF_HAS_UPROBES, &mm->flags);

        ret = set_swbp(&uprobe->arch, mm, vaddr);
        if (!ret)
                clear_bit(MMF_RECALC_UPROBES, &mm->flags);
        else if (first_uprobe)
                clear_bit(MMF_HAS_UPROBES, &mm->flags);

        return ret;
}

static int
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{
        set_bit(MMF_RECALC_UPROBES, &mm->flags);
        return set_orig_insn(&uprobe->arch, mm, vaddr);
}

static inline bool uprobe_is_active(struct uprobe *uprobe)
{
        return !RB_EMPTY_NODE(&uprobe->rb_node);
}
/*
 * There could be threads that have already hit the breakpoint. They
 * will recheck the current insn and restart if find_uprobe() fails.
 * See find_active_uprobe().
 */
static void delete_uprobe(struct uprobe *uprobe)
{
        if (WARN_ON(!uprobe_is_active(uprobe)))
                return;

        spin_lock(&uprobes_treelock);
        rb_erase(&uprobe->rb_node, &uprobes_tree);
        spin_unlock(&uprobes_treelock);
        RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
        put_uprobe(uprobe);
}

struct map_info {
        struct map_info *next;
        struct mm_struct *mm;
        unsigned long vaddr;
};

static inline struct map_info *free_map_info(struct map_info *info)
{
        struct map_info *next = info->next;
        kfree(info);
        return next;
}

static struct map_info *
build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
{
        unsigned long pgoff = offset >> PAGE_SHIFT;
        struct vm_area_struct *vma;
        struct map_info *curr = NULL;
        struct map_info *prev = NULL;
        struct map_info *info;
        int more = 0;

 again:
        i_mmap_lock_read(mapping);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                if (!valid_vma(vma, is_register))
                        continue;

                if (!prev && !more) {
                        /*
                         * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
                         * reclaim. This is optimistic, no harm done if it fails.
                         */
                        prev = kmalloc(sizeof(struct map_info),
                                        GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
                        if (prev)
                                prev->next = NULL;
                }
                if (!prev) {
                        more++;
                        continue;
                }

                if (!mmget_not_zero(vma->vm_mm))
                        continue;

                info = prev;
                prev = prev->next;
                info->next = curr;
                curr = info;

                info->mm = vma->vm_mm;
                info->vaddr = offset_to_vaddr(vma, offset);
        }
        i_mmap_unlock_read(mapping);

        if (!more)
                goto out;

        prev = curr;
        while (curr) {
                mmput(curr->mm);
                curr = curr->next;
        }

        do {
                info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
                if (!info) {
                        curr = ERR_PTR(-ENOMEM);
                        goto out;
                }
                info->next = prev;
                prev = info;
        } while (--more);

        goto again;
 out:
        while (prev)
                prev = free_map_info(prev);
        return curr;
}

static int
register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
{
        bool is_register = !!new;
        struct map_info *info;
        int err = 0;

        percpu_down_write(&dup_mmap_sem);
        info = build_map_info(uprobe->inode->i_mapping,
                                        uprobe->offset, is_register);
        if (IS_ERR(info)) {
                err = PTR_ERR(info);
                goto out;
        }

        while (info) {
                struct mm_struct *mm = info->mm;
                struct vm_area_struct *vma;

                if (err && is_register)
                        goto free;

                mmap_write_lock(mm);
                vma = find_vma(mm, info->vaddr);
                if (!vma || !valid_vma(vma, is_register) ||
                    file_inode(vma->vm_file) != uprobe->inode)
                        goto unlock;

                if (vma->vm_start > info->vaddr ||
                    vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
                        goto unlock;

                if (is_register) {
                        /* consult only the "caller", new consumer. */
                        if (consumer_filter(new,
                                        UPROBE_FILTER_REGISTER, mm))
                                err = install_breakpoint(uprobe, mm, vma, info->vaddr);
                } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
                        if (!filter_chain(uprobe,
                                        UPROBE_FILTER_UNREGISTER, mm))
                                err |= remove_breakpoint(uprobe, mm, info->vaddr);
                }

 unlock:
                mmap_write_unlock(mm);
 free:
                mmput(mm);
                info = free_map_info(info);
        }
 out:
        percpu_up_write(&dup_mmap_sem);
        return err;
}

static void
__uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
        int err;

        if (WARN_ON(!consumer_del(uprobe, uc)))
                return;

        err = register_for_each_vma(uprobe, NULL);
        /* TODO : cant unregister? schedule a worker thread */
        if (!uprobe->consumers && !err)
                delete_uprobe(uprobe);
}

/*
 * uprobe_unregister - unregister an already registered probe.
 * @inode: the file in which the probe has to be removed.
 * @offset: offset from the start of the file.
 * @uc: identify which probe if multiple probes are colocated.
 */
void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
        struct uprobe *uprobe;

        uprobe = find_uprobe(inode, offset);
        if (WARN_ON(!uprobe))
                return;

        down_write(&uprobe->register_rwsem);
        __uprobe_unregister(uprobe, uc);
        up_write(&uprobe->register_rwsem);
        put_uprobe(uprobe);
}
EXPORT_SYMBOL_GPL(uprobe_unregister);

/*
 * __uprobe_register - register a probe
 * @inode: the file in which the probe has to be placed.
 * @offset: offset from the start of the file.
 * @uc: information on howto handle the probe..
 *
 * Apart from the access refcount, __uprobe_register() takes a creation
 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
 * inserted into the rbtree (i.e first consumer for a @inode:@offset
 * tuple).  Creation refcount stops uprobe_unregister from freeing the
 * @uprobe even before the register operation is complete. Creation
 * refcount is released when the last @uc for the @uprobe
 * unregisters. Caller of __uprobe_register() is required to keep @inode
 * (and the containing mount) referenced.
 *
 * Return errno if it cannot successully install probes
 * else return 0 (success)
 */
static int __uprobe_register(struct inode *inode, loff_t offset,
                             loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
        struct uprobe *uprobe;
        int ret;

        /* Uprobe must have at least one set consumer */
        if (!uc->handler && !uc->ret_handler)
                return -EINVAL;

        /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
        if (!inode->i_mapping->a_ops->read_folio &&
            !shmem_mapping(inode->i_mapping))
                return -EIO;
        /* Racy, just to catch the obvious mistakes */
        if (offset > i_size_read(inode))
                return -EINVAL;

        /*
         * This ensures that copy_from_page(), copy_to_page() and
         * __update_ref_ctr() can't cross page boundary.
         */
        if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE))
                return -EINVAL;
        if (!IS_ALIGNED(ref_ctr_offset, sizeof(short)))
                return -EINVAL;

 retry:
        uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
        if (!uprobe)
                return -ENOMEM;
        if (IS_ERR(uprobe))
                return PTR_ERR(uprobe);

        /*
         * We can race with uprobe_unregister()->delete_uprobe().
         * Check uprobe_is_active() and retry if it is false.
         */
        down_write(&uprobe->register_rwsem);
        ret = -EAGAIN;
        if (likely(uprobe_is_active(uprobe))) {
                consumer_add(uprobe, uc);
                ret = register_for_each_vma(uprobe, uc);
                if (ret)
                        __uprobe_unregister(uprobe, uc);
        }
        up_write(&uprobe->register_rwsem);
        put_uprobe(uprobe);

        if (unlikely(ret == -EAGAIN))
                goto retry;
        return ret;
}

int uprobe_register(struct inode *inode, loff_t offset,
                    struct uprobe_consumer *uc)
{
        return __uprobe_register(inode, offset, 0, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register);

int uprobe_register_refctr(struct inode *inode, loff_t offset,
                           loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
        return __uprobe_register(inode, offset, ref_ctr_offset, uc);
}
EXPORT_SYMBOL_GPL(uprobe_register_refctr);

/*
 * uprobe_apply - unregister an already registered probe.
 * @inode: the file in which the probe has to be removed.
 * @offset: offset from the start of the file.
 * @uc: consumer which wants to add more or remove some breakpoints
 * @add: add or remove the breakpoints
 */
int uprobe_apply(struct inode *inode, loff_t offset,
                        struct uprobe_consumer *uc, bool add)
{
        struct uprobe *uprobe;
        struct uprobe_consumer *con;
        int ret = -ENOENT;

        uprobe = find_uprobe(inode, offset);
        if (WARN_ON(!uprobe))
                return ret;

        down_write(&uprobe->register_rwsem);
        for (con = uprobe->consumers; con && con != uc ; con = con->next)
                ;
        if (con)
                ret = register_for_each_vma(uprobe, add ? uc : NULL);
        up_write(&uprobe->register_rwsem);
        put_uprobe(uprobe);

        return ret;
}

static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
{
        VMA_ITERATOR(vmi, mm, 0);
        struct vm_area_struct *vma;
        int err = 0;

        mmap_read_lock(mm);
        for_each_vma(vmi, vma) {
                unsigned long vaddr;
                loff_t offset;

                if (!valid_vma(vma, false) ||
                    file_inode(vma->vm_file) != uprobe->inode)
                        continue;

                offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
                if (uprobe->offset <  offset ||
                    uprobe->offset >= offset + vma->vm_end - vma->vm_start)
                        continue;

                vaddr = offset_to_vaddr(vma, uprobe->offset);
                err |= remove_breakpoint(uprobe, mm, vaddr);
        }
        mmap_read_unlock(mm);

        return err;
}

static struct rb_node *
find_node_in_range(struct inode *inode, loff_t min, loff_t max)
{
        struct rb_node *n = uprobes_tree.rb_node;

        while (n) {
                struct uprobe *u = rb_entry(n, struct uprobe, rb_node);

                if (inode < u->inode) {
                        n = n->rb_left;
                } else if (inode > u->inode) {
                        n = n->rb_right;
                } else {
                        if (max < u->offset)
                                n = n->rb_left;
                        else if (min > u->offset)
                                n = n->rb_right;
                        else
                                break;
                }
        }

        return n;
}

/*
 * For a given range in vma, build a list of probes that need to be inserted.
 */
static void build_probe_list(struct inode *inode,
                                struct vm_area_struct *vma,
                                unsigned long start, unsigned long end,
                                struct list_head *head)
{
        loff_t min, max;
        struct rb_node *n, *t;
        struct uprobe *u;

        INIT_LIST_HEAD(head);
        min = vaddr_to_offset(vma, start);
        max = min + (end - start) - 1;

        spin_lock(&uprobes_treelock);
        n = find_node_in_range(inode, min, max);
        if (n) {
                for (t = n; t; t = rb_prev(t)) {
                        u = rb_entry(t, struct uprobe, rb_node);
                        if (u->inode != inode || u->offset < min)
                                break;
                        list_add(&u->pending_list, head);
                        get_uprobe(u);
                }
                for (t = n; (t = rb_next(t)); ) {
                        u = rb_entry(t, struct uprobe, rb_node);
                        if (u->inode != inode || u->offset > max)
                                break;
                        list_add(&u->pending_list, head);
                        get_uprobe(u);
                }
        }
        spin_unlock(&uprobes_treelock);
}

/* @vma contains reference counter, not the probed instruction. */
static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
{
        struct list_head *pos, *q;
        struct delayed_uprobe *du;
        unsigned long vaddr;
        int ret = 0, err = 0;

        mutex_lock(&delayed_uprobe_lock);
        list_for_each_safe(pos, q, &delayed_uprobe_list) {
                du = list_entry(pos, struct delayed_uprobe, list);

                if (du->mm != vma->vm_mm ||
                    !valid_ref_ctr_vma(du->uprobe, vma))
                        continue;

                vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
                ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
                if (ret) {
                        update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
                        if (!err)
                                err = ret;
                }
                delayed_uprobe_delete(du);
        }
        mutex_unlock(&delayed_uprobe_lock);
        return err;
}

/*
 * Called from mmap_region/vma_merge with mm->mmap_lock acquired.
 *
 * Currently we ignore all errors and always return 0, the callers
 * can't handle the failure anyway.
 */
int uprobe_mmap(struct vm_area_struct *vma)
{
        struct list_head tmp_list;
        struct uprobe *uprobe, *u;
        struct inode *inode;

        if (no_uprobe_events())
                return 0;

        if (vma->vm_file &&
            (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
            test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
                delayed_ref_ctr_inc(vma);

        if (!valid_vma(vma, true))
                return 0;

        inode = file_inode(vma->vm_file);
        if (!inode)
                return 0;

        mutex_lock(uprobes_mmap_hash(inode));
        build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
        /*
         * We can race with uprobe_unregister(), this uprobe can be already
         * removed. But in this case filter_chain() must return false, all
         * consumers have gone away.
         */
        list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
                if (!fatal_signal_pending(current) &&
                    filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
                        unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
                        install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
                }
                put_uprobe(uprobe);
        }
        mutex_unlock(uprobes_mmap_hash(inode));

        return 0;
}

static bool
vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
        loff_t min, max;
        struct inode *inode;
        struct rb_node *n;

        inode = file_inode(vma->vm_file);

        min = vaddr_to_offset(vma, start);
        max = min + (end - start) - 1;

        spin_lock(&uprobes_treelock);
        n = find_node_in_range(inode, min, max);
        spin_unlock(&uprobes_treelock);

        return !!n;
}

/*
 * Called in context of a munmap of a vma.
 */
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
        if (no_uprobe_events() || !valid_vma(vma, false))
                return;

        if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
                return;

        if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
             test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
                return;

        if (vma_has_uprobes(vma, start, end))
                set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
}

/* Slot allocation for XOL */
static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
{
        struct vm_area_struct *vma;
        int ret;

        if (mmap_write_lock_killable(mm))
                return -EINTR;

        if (mm->uprobes_state.xol_area) {
                ret = -EALREADY;
                goto fail;
        }

        if (!area->vaddr) {
                /* Try to map as high as possible, this is only a hint. */
                area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
                                                PAGE_SIZE, 0, 0);
                if (IS_ERR_VALUE(area->vaddr)) {
                        ret = area->vaddr;
                        goto fail;
                }
        }

        vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
                                VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
                                &area->xol_mapping);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto fail;
        }

        ret = 0;
        /* pairs with get_xol_area() */
        smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */
 fail:
        mmap_write_unlock(mm);

        return ret;
}

static struct xol_area *__create_xol_area(unsigned long vaddr)
{
        struct mm_struct *mm = current->mm;
        uprobe_opcode_t insn = UPROBE_SWBP_INSN;
        struct xol_area *area;

        area = kmalloc(sizeof(*area), GFP_KERNEL);
        if (unlikely(!area))
                goto out;

        area->bitmap = kcalloc(BITS_TO_LONGS(UINSNS_PER_PAGE), sizeof(long),
                               GFP_KERNEL);
        if (!area->bitmap)
                goto free_area;

        area->xol_mapping.name = "[uprobes]";
        area->xol_mapping.fault = NULL;
        area->xol_mapping.pages = area->pages;
        area->pages[0] = alloc_page(GFP_HIGHUSER);
        if (!area->pages[0])
                goto free_bitmap;
        area->pages[1] = NULL;

        area->vaddr = vaddr;
        init_waitqueue_head(&area->wq);
        /* Reserve the 1st slot for get_trampoline_vaddr() */
        set_bit(0, area->bitmap);
        atomic_set(&area->slot_count, 1);
        arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);

        if (!xol_add_vma(mm, area))
                return area;

        __free_page(area->pages[0]);
 free_bitmap:
        kfree(area->bitmap);
 free_area:
        kfree(area);
 out:
        return NULL;
}

/*
 * get_xol_area - Allocate process's xol_area if necessary.
 * This area will be used for storing instructions for execution out of line.
 *
 * Returns the allocated area or NULL.
 */
static struct xol_area *get_xol_area(void)
{
        struct mm_struct *mm = current->mm;
        struct xol_area *area;

        if (!mm->uprobes_state.xol_area)
                __create_xol_area(0);

        /* Pairs with xol_add_vma() smp_store_release() */
        area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */
        return area;
}

/*
 * uprobe_clear_state - Free the area allocated for slots.
 */
void uprobe_clear_state(struct mm_struct *mm)
{
        struct xol_area *area = mm->uprobes_state.xol_area;

        mutex_lock(&delayed_uprobe_lock);
        delayed_uprobe_remove(NULL, mm);
        mutex_unlock(&delayed_uprobe_lock);

        if (!area)
                return;

        put_page(area->pages[0]);
        kfree(area->bitmap);
        kfree(area);
}

void uprobe_start_dup_mmap(void)
{
        percpu_down_read(&dup_mmap_sem);
}

void uprobe_end_dup_mmap(void)
{
        percpu_up_read(&dup_mmap_sem);
}

void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{
        if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
                set_bit(MMF_HAS_UPROBES, &newmm->flags);
                /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
                set_bit(MMF_RECALC_UPROBES, &newmm->flags);
        }
}

/*
 *  - search for a free slot.
 */
static unsigned long xol_take_insn_slot(struct xol_area *area)
{
        unsigned long slot_addr;
        int slot_nr;

        do {
                slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
                if (slot_nr < UINSNS_PER_PAGE) {
                        if (!test_and_set_bit(slot_nr, area->bitmap))
                                break;

                        slot_nr = UINSNS_PER_PAGE;
                        continue;
                }
                wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
        } while (slot_nr >= UINSNS_PER_PAGE);

        slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
        atomic_inc(&area->slot_count);

        return slot_addr;
}

/*
 * xol_get_insn_slot - allocate a slot for xol.
 * Returns the allocated slot address or 0.
 */
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
{
        struct xol_area *area;
        unsigned long xol_vaddr;

        area = get_xol_area();
        if (!area)
                return 0;

        xol_vaddr = xol_take_insn_slot(area);
        if (unlikely(!xol_vaddr))
                return 0;

        arch_uprobe_copy_ixol(area->pages[0], xol_vaddr,
                              &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));

        return xol_vaddr;
}

/*
 * xol_free_insn_slot - If slot was earlier allocated by
 * @xol_get_insn_slot(), make the slot available for
 * subsequent requests.
 */
static void xol_free_insn_slot(struct task_struct *tsk)
{
        struct xol_area *area;
        unsigned long vma_end;
        unsigned long slot_addr;

        if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
                return;

        slot_addr = tsk->utask->xol_vaddr;
        if (unlikely(!slot_addr))
                return;

        area = tsk->mm->uprobes_state.xol_area;
        vma_end = area->vaddr + PAGE_SIZE;
        if (area->vaddr <= slot_addr && slot_addr < vma_end) {
                unsigned long offset;
                int slot_nr;

                offset = slot_addr - area->vaddr;
                slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
                if (slot_nr >= UINSNS_PER_PAGE)
                        return;

                clear_bit(slot_nr, area->bitmap);
                atomic_dec(&area->slot_count);
                smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
                if (waitqueue_active(&area->wq))
                        wake_up(&area->wq);

                tsk->utask->xol_vaddr = 0;
        }
}

void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
                                  void *src, unsigned long len)
{
        /* Initialize the slot */
        copy_to_page(page, vaddr, src, len);

        /*
         * We probably need flush_icache_user_page() but it needs vma.
         * This should work on most of architectures by default. If
         * architecture needs to do something different it can define
         * its own version of the function.
         */
        flush_dcache_page(page);
}

/**
 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
 * @regs: Reflects the saved state of the task after it has hit a breakpoint
 * instruction.
 * Return the address of the breakpoint instruction.
 */
unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
{
        return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
}

unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
{
        struct uprobe_task *utask = current->utask;

        if (unlikely(utask && utask->active_uprobe))
                return utask->vaddr;

        return instruction_pointer(regs);
}

static struct return_instance *free_ret_instance(struct return_instance *ri)
{
        struct return_instance *next = ri->next;
        put_uprobe(ri->uprobe);
        kfree(ri);
        return next;
}

/*
 * Called with no locks held.
 * Called in context of an exiting or an exec-ing thread.
 */
void uprobe_free_utask(struct task_struct *t)
{
        struct uprobe_task *utask = t->utask;
        struct return_instance *ri;

        if (!utask)
                return;

        if (utask->active_uprobe)
                put_uprobe(utask->active_uprobe);

        ri = utask->return_instances;
        while (ri)
                ri = free_ret_instance(ri);

        xol_free_insn_slot(t);
        kfree(utask);
        t->utask = NULL;
}

/*
 * Allocate a uprobe_task object for the task if necessary.
 * Called when the thread hits a breakpoint.
 *
 * Returns:
 * - pointer to new uprobe_task on success
 * - NULL otherwise
 */
static struct uprobe_task *get_utask(void)
{
        if (!current->utask)
                current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
        return current->utask;
}

static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
{
        struct uprobe_task *n_utask;
        struct return_instance **p, *o, *n;

        n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
        if (!n_utask)
                return -ENOMEM;
        t->utask = n_utask;

        p = &n_utask->return_instances;
        for (o = o_utask->return_instances; o; o = o->next) {
                n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
                if (!n)
                        return -ENOMEM;

                *n = *o;
                get_uprobe(n->uprobe);
                n->next = NULL;

                *p = n;
                p = &n->next;
                n_utask->depth++;
        }

        return 0;
}

static void uprobe_warn(struct task_struct *t, const char *msg)
{
        pr_warn("uprobe: %s:%d failed to %s\n",
                        current->comm, current->pid, msg);
}

static void dup_xol_work(struct callback_head *work)
{
        if (current->flags & PF_EXITING)
                return;

        if (!__create_xol_area(current->utask->dup_xol_addr) &&
                        !fatal_signal_pending(current))
                uprobe_warn(current, "dup xol area");
}

/*
 * Called in context of a new clone/fork from copy_process.
 */
void uprobe_copy_process(struct task_struct *t, unsigned long flags)
{
        struct uprobe_task *utask = current->utask;
        struct mm_struct *mm = current->mm;
        struct xol_area *area;

        t->utask = NULL;

        if (!utask || !utask->return_instances)
                return;

        if (mm == t->mm && !(flags & CLONE_VFORK))
                return;

        if (dup_utask(t, utask))
                return uprobe_warn(t, "dup ret instances");

        /* The task can fork() after dup_xol_work() fails */
        area = mm->uprobes_state.xol_area;
        if (!area)
                return uprobe_warn(t, "dup xol area");

        if (mm == t->mm)
                return;

        t->utask->dup_xol_addr = area->vaddr;
        init_task_work(&t->utask->dup_xol_work, dup_xol_work);
        task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
}

/*
 * Current area->vaddr notion assume the trampoline address is always
 * equal area->vaddr.
 *
 * Returns -1 in case the xol_area is not allocated.
 */
static unsigned long get_trampoline_vaddr(void)
{
        struct xol_area *area;
        unsigned long trampoline_vaddr = -1;

        /* Pairs with xol_add_vma() smp_store_release() */
        area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */
        if (area)
                trampoline_vaddr = area->vaddr;

        return trampoline_vaddr;
}

static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
                                        struct pt_regs *regs)
{
        struct return_instance *ri = utask->return_instances;
        enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;

        while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
                ri = free_ret_instance(ri);
                utask->depth--;
        }
        utask->return_instances = ri;
}

static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
{
        struct return_instance *ri;
        struct uprobe_task *utask;
        unsigned long orig_ret_vaddr, trampoline_vaddr;
        bool chained;

        if (!get_xol_area())
                return;

        utask = get_utask();
        if (!utask)
                return;

        if (utask->depth >= MAX_URETPROBE_DEPTH) {
                printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
                                " nestedness limit pid/tgid=%d/%d\n",
                                current->pid, current->tgid);
                return;
        }

        ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
        if (!ri)
                return;

        trampoline_vaddr = get_trampoline_vaddr();
        orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
        if (orig_ret_vaddr == -1)
                goto fail;

        /* drop the entries invalidated by longjmp() */
        chained = (orig_ret_vaddr == trampoline_vaddr);
        cleanup_return_instances(utask, chained, regs);

        /*
         * We don't want to keep trampoline address in stack, rather keep the
         * original return address of first caller thru all the consequent
         * instances. This also makes breakpoint unwrapping easier.
         */
        if (chained) {
                if (!utask->return_instances) {
                        /*
                         * This situation is not possible. Likely we have an
                         * attack from user-space.
                         */
                        uprobe_warn(current, "handle tail call");
                        goto fail;
                }
                orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
        }

        ri->uprobe = get_uprobe(uprobe);
        ri->func = instruction_pointer(regs);
        ri->stack = user_stack_pointer(regs);
        ri->orig_ret_vaddr = orig_ret_vaddr;
        ri->chained = chained;

        utask->depth++;
        ri->next = utask->return_instances;
        utask->return_instances = ri;

        return;
 fail:
        kfree(ri);
}

/* Prepare to single-step probed instruction out of line. */
static int
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{
        struct uprobe_task *utask;
        unsigned long xol_vaddr;
        int err;

        utask = get_utask();
        if (!utask)
                return -ENOMEM;

        xol_vaddr = xol_get_insn_slot(uprobe);
        if (!xol_vaddr)
                return -ENOMEM;

        utask->xol_vaddr = xol_vaddr;
        utask->vaddr = bp_vaddr;

        err = arch_uprobe_pre_xol(&uprobe->arch, regs);
        if (unlikely(err)) {
                xol_free_insn_slot(current);
                return err;
        }

        utask->active_uprobe = uprobe;
        utask->state = UTASK_SSTEP;
        return 0;
}

/*
 * If we are singlestepping, then ensure this thread is not connected to
 * non-fatal signals until completion of singlestep.  When xol insn itself
 * triggers the signal,  restart the original insn even if the task is
 * already SIGKILL'ed (since coredump should report the correct ip).  This
 * is even more important if the task has a handler for SIGSEGV/etc, The
 * _same_ instruction should be repeated again after return from the signal
 * handler, and SSTEP can never finish in this case.
 */
bool uprobe_deny_signal(void)
{
        struct task_struct *t = current;
        struct uprobe_task *utask = t->utask;

        if (likely(!utask || !utask->active_uprobe))
                return false;

        WARN_ON_ONCE(utask->state != UTASK_SSTEP);

        if (task_sigpending(t)) {
                spin_lock_irq(&t->sighand->siglock);
                clear_tsk_thread_flag(t, TIF_SIGPENDING);
                spin_unlock_irq(&t->sighand->siglock);

                if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
                        utask->state = UTASK_SSTEP_TRAPPED;
                        set_tsk_thread_flag(t, TIF_UPROBE);
                }
        }

        return true;
}

static void mmf_recalc_uprobes(struct mm_struct *mm)
{
        VMA_ITERATOR(vmi, mm, 0);
        struct vm_area_struct *vma;

        for_each_vma(vmi, vma) {
                if (!valid_vma(vma, false))
                        continue;
                /*
                 * This is not strictly accurate, we can race with
                 * uprobe_unregister() and see the already removed
                 * uprobe if delete_uprobe() was not yet called.
                 * Or this uprobe can be filtered out.
                 */
                if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
                        return;
        }

        clear_bit(MMF_HAS_UPROBES, &mm->flags);
}

static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
{
        struct page *page;
        uprobe_opcode_t opcode;
        int result;

        if (WARN_ON_ONCE(!IS_ALIGNED(vaddr, UPROBE_SWBP_INSN_SIZE)))
                return -EINVAL;

        pagefault_disable();
        result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr);
        pagefault_enable();

        if (likely(result == 0))
                goto out;

        /*
         * The NULL 'tsk' here ensures that any faults that occur here
         * will not be accounted to the task.  'mm' *is* current->mm,
         * but we treat this as a 'remote' access since it is
         * essentially a kernel access to the memory.
         */
        result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL);
        if (result < 0)
                return result;

        copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
        put_page(page);
 out:
        /* This needs to return true for any variant of the trap insn */
        return is_trap_insn(&opcode);
}

static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
{
        struct mm_struct *mm = current->mm;
        struct uprobe *uprobe = NULL;
        struct vm_area_struct *vma;

        mmap_read_lock(mm);
        vma = vma_lookup(mm, bp_vaddr);
        if (vma) {
                if (valid_vma(vma, false)) {
                        struct inode *inode = file_inode(vma->vm_file);
                        loff_t offset = vaddr_to_offset(vma, bp_vaddr);

                        uprobe = find_uprobe(inode, offset);
                }

                if (!uprobe)
                        *is_swbp = is_trap_at_addr(mm, bp_vaddr);
        } else {
                *is_swbp = -EFAULT;
        }

        if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
                mmf_recalc_uprobes(mm);
        mmap_read_unlock(mm);

        return uprobe;
}

static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{
        struct uprobe_consumer *uc;
        int remove = UPROBE_HANDLER_REMOVE;
        bool need_prep = false; /* prepare return uprobe, when needed */

        down_read(&uprobe->register_rwsem);
        for (uc = uprobe->consumers; uc; uc = uc->next) {
                int rc = 0;

                if (uc->handler) {
                        rc = uc->handler(uc, regs);
                        WARN(rc & ~UPROBE_HANDLER_MASK,
                                "bad rc=0x%x from %ps()\n", rc, uc->handler);
                }

                if (uc->ret_handler)
                        need_prep = true;

                remove &= rc;
        }

        if (need_prep && !remove)
                prepare_uretprobe(uprobe, regs); /* put bp at return */

        if (remove && uprobe->consumers) {
                WARN_ON(!uprobe_is_active(uprobe));
                unapply_uprobe(uprobe, current->mm);
        }
        up_read(&uprobe->register_rwsem);
}

static void
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
{
        struct uprobe *uprobe = ri->uprobe;
        struct uprobe_consumer *uc;

        down_read(&uprobe->register_rwsem);
        for (uc = uprobe->consumers; uc; uc = uc->next) {
                if (uc->ret_handler)
                        uc->ret_handler(uc, ri->func, regs);
        }
        up_read(&uprobe->register_rwsem);
}

static struct return_instance *find_next_ret_chain(struct return_instance *ri)
{
        bool chained;

        do {
                chained = ri->chained;
                ri = ri->next;        /* can't be NULL if chained */
        } while (chained);

        return ri;
}

static void handle_trampoline(struct pt_regs *regs)
{
        struct uprobe_task *utask;
        struct return_instance *ri, *next;
        bool valid;

        utask = current->utask;
        if (!utask)
                goto sigill;

        ri = utask->return_instances;
        if (!ri)
                goto sigill;

        do {
                /*
                 * We should throw out the frames invalidated by longjmp().
                 * If this chain is valid, then the next one should be alive
                 * or NULL; the latter case means that nobody but ri->func
                 * could hit this trampoline on return. TODO: sigaltstack().
                 */
                next = find_next_ret_chain(ri);
                valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs);

                instruction_pointer_set(regs, ri->orig_ret_vaddr);
                do {
                        if (valid)
                                handle_uretprobe_chain(ri, regs);
                        ri = free_ret_instance(ri);
                        utask->depth--;
                } while (ri != next);
        } while (!valid);

        utask->return_instances = ri;
        return;

 sigill:
        uprobe_warn(current, "handle uretprobe, sending SIGILL.");
        force_sig(SIGILL);

}

bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
{
        return false;
}

bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
                                        struct pt_regs *regs)
{
        return true;
}

/*
 * Run handler and ask thread to singlestep.
 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
 */
static void handle_swbp(struct pt_regs *regs)
{
        struct uprobe *uprobe;
        unsigned long bp_vaddr;
        int is_swbp;

        bp_vaddr = uprobe_get_swbp_addr(regs);
        if (bp_vaddr == get_trampoline_vaddr())
                return handle_trampoline(regs);

        uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
        if (!uprobe) {
                if (is_swbp > 0) {
                        /* No matching uprobe; signal SIGTRAP. */
                        force_sig(SIGTRAP);
                } else {
                        /*
                         * Either we raced with uprobe_unregister() or we can't
                         * access this memory. The latter is only possible if
                         * another thread plays with our ->mm. In both cases
                         * we can simply restart. If this vma was unmapped we
                         * can pretend this insn was not executed yet and get
                         * the (correct) SIGSEGV after restart.
                         */
                        instruction_pointer_set(regs, bp_vaddr);
                }
                return;
        }

        /* change it in advance for ->handler() and restart */
        instruction_pointer_set(regs, bp_vaddr);

        /*
         * TODO: move copy_insn/etc into _register and remove this hack.
         * After we hit the bp, _unregister + _register can install the
         * new and not-yet-analyzed uprobe at the same address, restart.
         */
        if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
                goto out;

        /*
         * Pairs with the smp_wmb() in prepare_uprobe().
         *
         * Guarantees that if we see the UPROBE_COPY_INSN bit set, then
         * we must also see the stores to &uprobe->arch performed by the
         * prepare_uprobe() call.
         */
        smp_rmb();

        /* Tracing handlers use ->utask to communicate with fetch methods */
        if (!get_utask())
                goto out;

        if (arch_uprobe_ignore(&uprobe->arch, regs))
                goto out;

        handler_chain(uprobe, regs);

        if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
                goto out;

        if (!pre_ssout(uprobe, regs, bp_vaddr))
                return;

        /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
out:
        put_uprobe(uprobe);
}

/*
 * Perform required fix-ups and disable singlestep.
 * Allow pending signals to take effect.
 */
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{
        struct uprobe *uprobe;
        int err = 0;

        uprobe = utask->active_uprobe;
        if (utask->state == UTASK_SSTEP_ACK)
                err = arch_uprobe_post_xol(&uprobe->arch, regs);
        else if (utask->state == UTASK_SSTEP_TRAPPED)
                arch_uprobe_abort_xol(&uprobe->arch, regs);
        else
                WARN_ON_ONCE(1);

        put_uprobe(uprobe);
        utask->active_uprobe = NULL;
        utask->state = UTASK_RUNNING;
        xol_free_insn_slot(current);

        spin_lock_irq(&current->sighand->siglock);
        recalc_sigpending(); /* see uprobe_deny_signal() */
        spin_unlock_irq(&current->sighand->siglock);

        if (unlikely(err)) {
                uprobe_warn(current, "execute the probed insn, sending SIGILL.");
                force_sig(SIGILL);
        }
}

/*
 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
 * allows the thread to return from interrupt. After that handle_swbp()
 * sets utask->active_uprobe.
 *
 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
 * and allows the thread to return from interrupt.
 *
 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
 * uprobe_notify_resume().
 */
void uprobe_notify_resume(struct pt_regs *regs)
{
        struct uprobe_task *utask;

        clear_thread_flag(TIF_UPROBE);

        utask = current->utask;
        if (utask && utask->active_uprobe)
                handle_singlestep(utask, regs);
        else
                handle_swbp(regs);
}

/*
 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
 */
int uprobe_pre_sstep_notifier(struct pt_regs *regs)
{
        if (!current->mm)
                return 0;

        if (!test_bit(MMF_HAS_UPROBES, &current->mm->flags) &&
            (!current->utask || !current->utask->return_instances))
                return 0;

        set_thread_flag(TIF_UPROBE);
        return 1;
}

/*
 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
 */
int uprobe_post_sstep_notifier(struct pt_regs *regs)
{
        struct uprobe_task *utask = current->utask;

        if (!current->mm || !utask || !utask->active_uprobe)
                /* task is currently not uprobed */
                return 0;

        utask->state = UTASK_SSTEP_ACK;
        set_thread_flag(TIF_UPROBE);
        return 1;
}

static struct notifier_block uprobe_exception_nb = {
        .notifier_call                = arch_uprobe_exception_notify,
        .priority                = INT_MAX-1,        /* notified after kprobes, kgdb */
};

void __init uprobes_init(void)
{
        int i;

        for (i = 0; i < UPROBES_HASH_SZ; i++)
                mutex_init(&uprobes_mmap_mutex[i]);

        BUG_ON(register_die_notifier(&uprobe_exception_nb));
}



























































































































































































































































   18 




   18 






















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * include/linux/backing-dev.h
 *
 * low-level device information and state which is propagated up through
 * to high-level code.
 */

#ifndef _LINUX_BACKING_DEV_H
#define _LINUX_BACKING_DEV_H

#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/backing-dev-defs.h>
#include <linux/slab.h>

static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
{
        kref_get(&bdi->refcnt);
        return bdi;
}

struct backing_dev_info *bdi_get_by_id(u64 id);
void bdi_put(struct backing_dev_info *bdi);

__printf(2, 3)
int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
__printf(2, 0)
int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
                    va_list args);
void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner);
void bdi_unregister(struct backing_dev_info *bdi);

struct backing_dev_info *bdi_alloc(int node_id);

void wb_start_background_writeback(struct bdi_writeback *wb);
void wb_workfn(struct work_struct *work);

void wb_wait_for_completion(struct wb_completion *done);

extern spinlock_t bdi_lock;
extern struct list_head bdi_list;

extern struct workqueue_struct *bdi_wq;

static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
{
        return test_bit(WB_has_dirty_io, &wb->state);
}

static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
{
        /*
         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
         * any dirty wbs.  See wb_update_write_bandwidth().
         */
        return atomic_long_read(&bdi->tot_write_bandwidth);
}

static inline void wb_stat_mod(struct bdi_writeback *wb,
                                 enum wb_stat_item item, s64 amount)
{
        percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
}

static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
        wb_stat_mod(wb, item, 1);
}

static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
        wb_stat_mod(wb, item, -1);
}

static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
        return percpu_counter_read_positive(&wb->stat[item]);
}

static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
{
        return percpu_counter_sum_positive(&wb->stat[item]);
}

extern void wb_writeout_inc(struct bdi_writeback *wb);

/*
 * maximal error of a stat counter.
 */
static inline unsigned long wb_stat_error(void)
{
#ifdef CONFIG_SMP
        return nr_cpu_ids * WB_STAT_BATCH;
#else
        return 1;
#endif
}

/* BDI ratio is expressed as part per 1000000 for finer granularity. */
#define BDI_RATIO_SCALE 10000

u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio);
int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio);
int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);

/*
 * Flags in backing_dev_info::capability
 *
 * BDI_CAP_WRITEBACK:                Supports dirty page writeback, and dirty pages
 *                                should contribute to accounting
 * BDI_CAP_WRITEBACK_ACCT:        Automatically account writeback pages
 * BDI_CAP_STRICTLIMIT:                Keep number of dirty pages below bdi threshold
 */
#define BDI_CAP_WRITEBACK                (1 << 0)
#define BDI_CAP_WRITEBACK_ACCT                (1 << 1)
#define BDI_CAP_STRICTLIMIT                (1 << 2)

extern struct backing_dev_info noop_backing_dev_info;

int bdi_init(struct backing_dev_info *bdi);

/**
 * writeback_in_progress - determine whether there is writeback in progress
 * @wb: bdi_writeback of interest
 *
 * Determine whether there is writeback waiting to be handled against a
 * bdi_writeback.
 */
static inline bool writeback_in_progress(struct bdi_writeback *wb)
{
        return test_bit(WB_writeback_running, &wb->state);
}

struct backing_dev_info *inode_to_bdi(struct inode *inode);

static inline bool mapping_can_writeback(struct address_space *mapping)
{
        return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
}

#ifdef CONFIG_CGROUP_WRITEBACK

struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css);
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css,
                                    gfp_t gfp);
void wb_memcg_offline(struct mem_cgroup *memcg);
void wb_blkcg_offline(struct cgroup_subsys_state *css);

/**
 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 * @inode: inode of interest
 *
 * Cgroup writeback requires support from the filesystem.  Also, both memcg and
 * iocg have to be on the default hierarchy.  Test whether all conditions are
 * met.
 *
 * Note that the test result may change dynamically on the same inode
 * depending on how memcg and iocg are configured.
 */
static inline bool inode_cgwb_enabled(struct inode *inode)
{
        struct backing_dev_info *bdi = inode_to_bdi(inode);

        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
                (bdi->capabilities & BDI_CAP_WRITEBACK) &&
                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
}

/**
 * wb_find_current - find wb for %current on a bdi
 * @bdi: bdi of interest
 *
 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 * Must be called under rcu_read_lock() which protects the returend wb.
 * NULL if not found.
 */
static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
{
        struct cgroup_subsys_state *memcg_css;
        struct bdi_writeback *wb;

        memcg_css = task_css(current, memory_cgrp_id);
        if (!memcg_css->parent)
                return &bdi->wb;

        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);

        /*
         * %current's blkcg equals the effective blkcg of its memcg.  No
         * need to use the relatively expensive cgroup_get_e_css().
         */
        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
                return wb;
        return NULL;
}

/**
 * wb_get_create_current - get or create wb for %current on a bdi
 * @bdi: bdi of interest
 * @gfp: allocation mask
 *
 * Equivalent to wb_get_create() on %current's memcg.  This function is
 * called from a relatively hot path and optimizes the common cases using
 * wb_find_current().
 */
static inline struct bdi_writeback *
wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
{
        struct bdi_writeback *wb;

        rcu_read_lock();
        wb = wb_find_current(bdi);
        if (wb && unlikely(!wb_tryget(wb)))
                wb = NULL;
        rcu_read_unlock();

        if (unlikely(!wb)) {
                struct cgroup_subsys_state *memcg_css;

                memcg_css = task_get_css(current, memory_cgrp_id);
                wb = wb_get_create(bdi, memcg_css, gfp);
                css_put(memcg_css);
        }
        return wb;
}

/**
 * inode_to_wb - determine the wb of an inode
 * @inode: inode of interest
 *
 * Returns the wb @inode is currently associated with.  The caller must be
 * holding either @inode->i_lock, the i_pages lock, or the
 * associated wb's list_lock.
 */
static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
{
#ifdef CONFIG_LOCKDEP
        WARN_ON_ONCE(debug_locks &&
                     (!lockdep_is_held(&inode->i_lock) &&
                      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
                      !lockdep_is_held(&inode->i_wb->list_lock)));
#endif
        return inode->i_wb;
}

static inline struct bdi_writeback *inode_to_wb_wbc(
                                struct inode *inode,
                                struct writeback_control *wbc)
{
        /*
         * If wbc does not have inode attached, it means cgroup writeback was
         * disabled when wbc started. Just use the default wb in that case.
         */
        return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb;
}

/**
 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 * @inode: target inode
 * @cookie: output param, to be passed to the end function
 *
 * The caller wants to access the wb associated with @inode but isn't
 * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
 * function determines the wb associated with @inode and ensures that the
 * association doesn't change until the transaction is finished with
 * unlocked_inode_to_wb_end().
 *
 * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
 * can't sleep during the transaction.  IRQs may or may not be disabled on
 * return.
 */
static inline struct bdi_writeback *
unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
{
        rcu_read_lock();

        /*
         * Paired with store_release in inode_switch_wbs_work_fn() and
         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
         */
        cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;

        if (unlikely(cookie->locked))
                xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);

        /*
         * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
         * lock.  inode_to_wb() will bark.  Deref directly.
         */
        return inode->i_wb;
}

/**
 * unlocked_inode_to_wb_end - end inode wb access transaction
 * @inode: target inode
 * @cookie: @cookie from unlocked_inode_to_wb_begin()
 */
static inline void unlocked_inode_to_wb_end(struct inode *inode,
                                            struct wb_lock_cookie *cookie)
{
        if (unlikely(cookie->locked))
                xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);

        rcu_read_unlock();
}

#else        /* CONFIG_CGROUP_WRITEBACK */

static inline bool inode_cgwb_enabled(struct inode *inode)
{
        return false;
}

static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
{
        return &bdi->wb;
}

static inline struct bdi_writeback *
wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
{
        return &bdi->wb;
}

static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
{
        return &inode_to_bdi(inode)->wb;
}

static inline struct bdi_writeback *inode_to_wb_wbc(
                                struct inode *inode,
                                struct writeback_control *wbc)
{
        return inode_to_wb(inode);
}


static inline struct bdi_writeback *
unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
{
        return inode_to_wb(inode);
}

static inline void unlocked_inode_to_wb_end(struct inode *inode,
                                            struct wb_lock_cookie *cookie)
{
}

static inline void wb_memcg_offline(struct mem_cgroup *memcg)
{
}

static inline void wb_blkcg_offline(struct cgroup_subsys_state *css)
{
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

const char *bdi_dev_name(struct backing_dev_info *bdi);

#endif        /* _LINUX_BACKING_DEV_H */


























































































  233 





  235 


  235 
  235 
  234 





  234 
  235 


























































  233 
  235 
























































































  235 













  232 




  235 




  235 








  232 
  234 

























  233 










  231 
  235 
  233 


  235 





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
// SPDX-License-Identifier: GPL-2.0+
/*
 * Restartable sequences system call
 *
 * Copyright (C) 2015, Google, Inc.,
 * Paul Turner <pjt@google.com> and Andrew Hunter <ahh@google.com>
 * Copyright (C) 2015-2018, EfficiOS Inc.,
 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 */

#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/rseq.h>
#include <linux/types.h>
#include <asm/ptrace.h>

#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>

/* The original rseq structure size (including padding) is 32 bytes. */
#define ORIG_RSEQ_SIZE                32

#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
                                  RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
                                  RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)

/*
 *
 * Restartable sequences are a lightweight interface that allows
 * user-level code to be executed atomically relative to scheduler
 * preemption and signal delivery. Typically used for implementing
 * per-cpu operations.
 *
 * It allows user-space to perform update operations on per-cpu data
 * without requiring heavy-weight atomic operations.
 *
 * Detailed algorithm of rseq user-space assembly sequences:
 *
 *                     init(rseq_cs)
 *                     cpu = TLS->rseq::cpu_id_start
 *   [1]               TLS->rseq::rseq_cs = rseq_cs
 *   [start_ip]        ----------------------------
 *   [2]               if (cpu != TLS->rseq::cpu_id)
 *                             goto abort_ip;
 *   [3]               <last_instruction_in_cs>
 *   [post_commit_ip]  ----------------------------
 *
 *   The address of jump target abort_ip must be outside the critical
 *   region, i.e.:
 *
 *     [abort_ip] < [start_ip]  || [abort_ip] >= [post_commit_ip]
 *
 *   Steps [2]-[3] (inclusive) need to be a sequence of instructions in
 *   userspace that can handle being interrupted between any of those
 *   instructions, and then resumed to the abort_ip.
 *
 *   1.  Userspace stores the address of the struct rseq_cs assembly
 *       block descriptor into the rseq_cs field of the registered
 *       struct rseq TLS area. This update is performed through a single
 *       store within the inline assembly instruction sequence.
 *       [start_ip]
 *
 *   2.  Userspace tests to check whether the current cpu_id field match
 *       the cpu number loaded before start_ip, branching to abort_ip
 *       in case of a mismatch.
 *
 *       If the sequence is preempted or interrupted by a signal
 *       at or after start_ip and before post_commit_ip, then the kernel
 *       clears TLS->__rseq_abi::rseq_cs, and sets the user-space return
 *       ip to abort_ip before returning to user-space, so the preempted
 *       execution resumes at abort_ip.
 *
 *   3.  Userspace critical section final instruction before
 *       post_commit_ip is the commit. The critical section is
 *       self-terminating.
 *       [post_commit_ip]
 *
 *   4.  <success>
 *
 *   On failure at [2], or if interrupted by preempt or signal delivery
 *   between [1] and [3]:
 *
 *       [abort_ip]
 *   F1. <failure>
 */

static int rseq_update_cpu_node_id(struct task_struct *t)
{
        struct rseq __user *rseq = t->rseq;
        u32 cpu_id = raw_smp_processor_id();
        u32 node_id = cpu_to_node(cpu_id);
        u32 mm_cid = task_mm_cid(t);

        WARN_ON_ONCE((int) mm_cid < 0);
        if (!user_write_access_begin(rseq, t->rseq_len))
                goto efault;
        unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
        unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
        unsafe_put_user(node_id, &rseq->node_id, efault_end);
        unsafe_put_user(mm_cid, &rseq->mm_cid, efault_end);
        /*
         * Additional feature fields added after ORIG_RSEQ_SIZE
         * need to be conditionally updated only if
         * t->rseq_len != ORIG_RSEQ_SIZE.
         */
        user_write_access_end();
        trace_rseq_update(t);
        return 0;

efault_end:
        user_write_access_end();
efault:
        return -EFAULT;
}

static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{
        u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
            mm_cid = 0;

        /*
         * Reset cpu_id_start to its initial state (0).
         */
        if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
                return -EFAULT;
        /*
         * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
         * in after unregistration can figure out that rseq needs to be
         * registered again.
         */
        if (put_user(cpu_id, &t->rseq->cpu_id))
                return -EFAULT;
        /*
         * Reset node_id to its initial state (0).
         */
        if (put_user(node_id, &t->rseq->node_id))
                return -EFAULT;
        /*
         * Reset mm_cid to its initial state (0).
         */
        if (put_user(mm_cid, &t->rseq->mm_cid))
                return -EFAULT;
        /*
         * Additional feature fields added after ORIG_RSEQ_SIZE
         * need to be conditionally reset only if
         * t->rseq_len != ORIG_RSEQ_SIZE.
         */
        return 0;
}

static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
{
        struct rseq_cs __user *urseq_cs;
        u64 ptr;
        u32 __user *usig;
        u32 sig;
        int ret;

#ifdef CONFIG_64BIT
        if (get_user(ptr, &t->rseq->rseq_cs))
                return -EFAULT;
#else
        if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
                return -EFAULT;
#endif
        if (!ptr) {
                memset(rseq_cs, 0, sizeof(*rseq_cs));
                return 0;
        }
        if (ptr >= TASK_SIZE)
                return -EINVAL;
        urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
        if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
                return -EFAULT;

        if (rseq_cs->start_ip >= TASK_SIZE ||
            rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
            rseq_cs->abort_ip >= TASK_SIZE ||
            rseq_cs->version > 0)
                return -EINVAL;
        /* Check for overflow. */
        if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
                return -EINVAL;
        /* Ensure that abort_ip is not in the critical section. */
        if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
                return -EINVAL;

        usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
        ret = get_user(sig, usig);
        if (ret)
                return ret;

        if (current->rseq_sig != sig) {
                printk_ratelimited(KERN_WARNING
                        "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
                        sig, current->rseq_sig, current->pid, usig);
                return -EINVAL;
        }
        return 0;
}

static bool rseq_warn_flags(const char *str, u32 flags)
{
        u32 test_flags;

        if (!flags)
                return false;
        test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
        if (test_flags)
                pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
        test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
        if (test_flags)
                pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
        return true;
}

static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
{
        u32 flags, event_mask;
        int ret;

        if (rseq_warn_flags("rseq_cs", cs_flags))
                return -EINVAL;

        /* Get thread flags. */
        ret = get_user(flags, &t->rseq->flags);
        if (ret)
                return ret;

        if (rseq_warn_flags("rseq", flags))
                return -EINVAL;

        /*
         * Load and clear event mask atomically with respect to
         * scheduler preemption.
         */
        preempt_disable();
        event_mask = t->rseq_event_mask;
        t->rseq_event_mask = 0;
        preempt_enable();

        return !!event_mask;
}

static int clear_rseq_cs(struct task_struct *t)
{
        /*
         * The rseq_cs field is set to NULL on preemption or signal
         * delivery on top of rseq assembly block, as well as on top
         * of code outside of the rseq assembly block. This performs
         * a lazy clear of the rseq_cs field.
         *
         * Set rseq_cs to NULL.
         */
#ifdef CONFIG_64BIT
        return put_user(0UL, &t->rseq->rseq_cs);
#else
        if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
                return -EFAULT;
        return 0;
#endif
}

/*
 * Unsigned comparison will be true when ip >= start_ip, and when
 * ip < start_ip + post_commit_offset.
 */
static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
{
        return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
}

static int rseq_ip_fixup(struct pt_regs *regs)
{
        unsigned long ip = instruction_pointer(regs);
        struct task_struct *t = current;
        struct rseq_cs rseq_cs;
        int ret;

        ret = rseq_get_rseq_cs(t, &rseq_cs);
        if (ret)
                return ret;

        /*
         * Handle potentially not being within a critical section.
         * If not nested over a rseq critical section, restart is useless.
         * Clear the rseq_cs pointer and return.
         */
        if (!in_rseq_cs(ip, &rseq_cs))
                return clear_rseq_cs(t);
        ret = rseq_need_restart(t, rseq_cs.flags);
        if (ret <= 0)
                return ret;
        ret = clear_rseq_cs(t);
        if (ret)
                return ret;
        trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
                            rseq_cs.abort_ip);
        instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
        return 0;
}

/*
 * This resume handler must always be executed between any of:
 * - preemption,
 * - signal delivery,
 * and return to user-space.
 *
 * This is how we can ensure that the entire rseq critical section
 * will issue the commit instruction only if executed atomically with
 * respect to other threads scheduled on the same CPU, and with respect
 * to signal handlers.
 */
void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
{
        struct task_struct *t = current;
        int ret, sig;

        if (unlikely(t->flags & PF_EXITING))
                return;

        /*
         * regs is NULL if and only if the caller is in a syscall path.  Skip
         * fixup and leave rseq_cs as is so that rseq_sycall() will detect and
         * kill a misbehaving userspace on debug kernels.
         */
        if (regs) {
                ret = rseq_ip_fixup(regs);
                if (unlikely(ret < 0))
                        goto error;
        }
        if (unlikely(rseq_update_cpu_node_id(t)))
                goto error;
        return;

error:
        sig = ksig ? ksig->sig : 0;
        force_sigsegv(sig);
}

#ifdef CONFIG_DEBUG_RSEQ

/*
 * Terminate the process if a syscall is issued within a restartable
 * sequence.
 */
void rseq_syscall(struct pt_regs *regs)
{
        unsigned long ip = instruction_pointer(regs);
        struct task_struct *t = current;
        struct rseq_cs rseq_cs;

        if (!t->rseq)
                return;
        if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
                force_sig(SIGSEGV);
}

#endif

/*
 * sys_rseq - setup restartable sequences for caller thread.
 */
SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
                int, flags, u32, sig)
{
        int ret;

        if (flags & RSEQ_FLAG_UNREGISTER) {
                if (flags & ~RSEQ_FLAG_UNREGISTER)
                        return -EINVAL;
                /* Unregister rseq for current thread. */
                if (current->rseq != rseq || !current->rseq)
                        return -EINVAL;
                if (rseq_len != current->rseq_len)
                        return -EINVAL;
                if (current->rseq_sig != sig)
                        return -EPERM;
                ret = rseq_reset_rseq_cpu_node_id(current);
                if (ret)
                        return ret;
                current->rseq = NULL;
                current->rseq_sig = 0;
                current->rseq_len = 0;
                return 0;
        }

        if (unlikely(flags))
                return -EINVAL;

        if (current->rseq) {
                /*
                 * If rseq is already registered, check whether
                 * the provided address differs from the prior
                 * one.
                 */
                if (current->rseq != rseq || rseq_len != current->rseq_len)
                        return -EINVAL;
                if (current->rseq_sig != sig)
                        return -EPERM;
                /* Already registered. */
                return -EBUSY;
        }

        /*
         * If there was no rseq previously registered, ensure the provided rseq
         * is properly aligned, as communcated to user-space through the ELF
         * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
         * size, the required alignment is the original struct rseq alignment.
         *
         * In order to be valid, rseq_len is either the original rseq size, or
         * large enough to contain all supported fields, as communicated to
         * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
         */
        if (rseq_len < ORIG_RSEQ_SIZE ||
            (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
            (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
                                            rseq_len < offsetof(struct rseq, end))))
                return -EINVAL;
        if (!access_ok(rseq, rseq_len))
                return -EFAULT;
        current->rseq = rseq;
        current->rseq_len = rseq_len;
        current->rseq_sig = sig;
        /*
         * If rseq was previously inactive, and has just been
         * registered, ensure the cpu_id_start and cpu_id fields
         * are updated before returning to user-space.
         */
        rseq_set_notify_resume(current);

        return 0;
}

















































  557 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H

#include <linux/build_bug.h>
#include <linux/compiler.h>

#ifndef __ASSEMBLY__

#include <linux/cache.h>
#include <asm/percpu.h>

struct task_struct;

struct pcpu_hot {
        union {
                struct {
                        struct task_struct        *current_task;
                        int                        preempt_count;
                        int                        cpu_number;
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
                        u64                        call_depth;
#endif
                        unsigned long                top_of_stack;
                        void                        *hardirq_stack_ptr;
                        u16                        softirq_pending;
#ifdef CONFIG_X86_64
                        bool                        hardirq_stack_inuse;
#else
                        void                        *softirq_stack_ptr;
#endif
                };
                u8        pad[64];
        };
};
static_assert(sizeof(struct pcpu_hot) == 64);

DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);

/* const-qualified alias to pcpu_hot, aliased by linker. */
DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
                        const_pcpu_hot);

static __always_inline struct task_struct *get_current(void)
{
        if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
                return this_cpu_read_const(const_pcpu_hot.current_task);

        return this_cpu_read_stable(pcpu_hot.current_task);
}

#define current get_current()

#endif /* __ASSEMBLY__ */

#endif /* _ASM_X86_CURRENT_H */

























































    7 
    7 
    1 








    7 




















    1 











    6 














    1 



    1 



    1 
    1 

    1 
    1 
























































































































    8 

    8 



    8 



    5 



    5 




    8 
    8 
    8 























    8 



    8 



    8 
    8 

    8 















    1 







    1 




































































































    3 









    3 
    3 




    3 





    3 




    1 

    1 
















    2 

    2 


    2 
    1 
    1 


    1 




    1 

    1 





    1 


    1 





    1 











    1 

    1 













    2 



















    3 




    1 







    4 






    3 


    3 
    3 










    4 




   24 








   24 















    1 




    1 

































    1 







    3 




    2 




    1 






    4 









    3 




    3 


    2 



    1 



















    1 





    1 




    1 










    3 




    2 






    2 

    1 





























    3 
    3 


    3 



    1 




    1 













    3 






















   24 


















   13 


































































   16 








    4 



    7 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (c) 2001 Paul Stewart
 *  Copyright (c) 2001 Vojtech Pavlik
 *
 *  HID char devices, giving access to raw HID device events.
 */

/*
 *
 * Should you need to contact me, the author, you can do so either by
 * e-mail - mail your message to Paul Stewart <stewart@wetlogic.net>
 */

#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/input.h>
#include <linux/usb.h>
#include <linux/hid.h>
#include <linux/hiddev.h>
#include <linux/compat.h>
#include <linux/vmalloc.h>
#include <linux/nospec.h>
#include "usbhid.h"

#ifdef CONFIG_USB_DYNAMIC_MINORS
#define HIDDEV_MINOR_BASE        0
#define HIDDEV_MINORS                256
#else
#define HIDDEV_MINOR_BASE        96
#define HIDDEV_MINORS                16
#endif
#define HIDDEV_BUFFER_SIZE        2048

struct hiddev_list {
        struct hiddev_usage_ref buffer[HIDDEV_BUFFER_SIZE];
        int head;
        int tail;
        unsigned flags;
        struct fasync_struct *fasync;
        struct hiddev *hiddev;
        struct list_head node;
        struct mutex thread_lock;
};

/*
 * Find a report, given the report's type and ID.  The ID can be specified
 * indirectly by REPORT_ID_FIRST (which returns the first report of the given
 * type) or by (REPORT_ID_NEXT | old_id), which returns the next report of the
 * given type which follows old_id.
 */
static struct hid_report *
hiddev_lookup_report(struct hid_device *hid, struct hiddev_report_info *rinfo)
{
        unsigned int flags = rinfo->report_id & ~HID_REPORT_ID_MASK;
        unsigned int rid = rinfo->report_id & HID_REPORT_ID_MASK;
        struct hid_report_enum *report_enum;
        struct hid_report *report;
        struct list_head *list;

        if (rinfo->report_type < HID_REPORT_TYPE_MIN ||
            rinfo->report_type > HID_REPORT_TYPE_MAX)
                return NULL;

        report_enum = hid->report_enum +
                (rinfo->report_type - HID_REPORT_TYPE_MIN);

        switch (flags) {
        case 0: /* Nothing to do -- report_id is already set correctly */
                break;

        case HID_REPORT_ID_FIRST:
                if (list_empty(&report_enum->report_list))
                        return NULL;

                list = report_enum->report_list.next;
                report = list_entry(list, struct hid_report, list);
                rinfo->report_id = report->id;
                break;

        case HID_REPORT_ID_NEXT:
                report = report_enum->report_id_hash[rid];
                if (!report)
                        return NULL;

                list = report->list.next;
                if (list == &report_enum->report_list)
                        return NULL;

                report = list_entry(list, struct hid_report, list);
                rinfo->report_id = report->id;
                break;

        default:
                return NULL;
        }

        return report_enum->report_id_hash[rinfo->report_id];
}

/*
 * Perform an exhaustive search of the report table for a usage, given its
 * type and usage id.
 */
static struct hid_field *
hiddev_lookup_usage(struct hid_device *hid, struct hiddev_usage_ref *uref)
{
        int i, j;
        struct hid_report *report;
        struct hid_report_enum *report_enum;
        struct hid_field *field;

        if (uref->report_type < HID_REPORT_TYPE_MIN ||
            uref->report_type > HID_REPORT_TYPE_MAX)
                return NULL;

        report_enum = hid->report_enum +
                (uref->report_type - HID_REPORT_TYPE_MIN);

        list_for_each_entry(report, &report_enum->report_list, list) {
                for (i = 0; i < report->maxfield; i++) {
                        field = report->field[i];
                        for (j = 0; j < field->maxusage; j++) {
                                if (field->usage[j].hid == uref->usage_code) {
                                        uref->report_id = report->id;
                                        uref->field_index = i;
                                        uref->usage_index = j;
                                        return field;
                                }
                        }
                }
        }

        return NULL;
}

static void hiddev_send_event(struct hid_device *hid,
                              struct hiddev_usage_ref *uref)
{
        struct hiddev *hiddev = hid->hiddev;
        struct hiddev_list *list;
        unsigned long flags;

        spin_lock_irqsave(&hiddev->list_lock, flags);
        list_for_each_entry(list, &hiddev->list, node) {
                if (uref->field_index != HID_FIELD_INDEX_NONE ||
                    (list->flags & HIDDEV_FLAG_REPORT) != 0) {
                        list->buffer[list->head] = *uref;
                        list->head = (list->head + 1) &
                                (HIDDEV_BUFFER_SIZE - 1);
                        kill_fasync(&list->fasync, SIGIO, POLL_IN);
                }
        }
        spin_unlock_irqrestore(&hiddev->list_lock, flags);

        wake_up_interruptible(&hiddev->wait);
}

/*
 * This is where hid.c calls into hiddev to pass an event that occurred over
 * the interrupt pipe
 */
void hiddev_hid_event(struct hid_device *hid, struct hid_field *field,
                      struct hid_usage *usage, __s32 value)
{
        unsigned type = field->report_type;
        struct hiddev_usage_ref uref;

        uref.report_type =
          (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT :
          ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT :
           ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0));
        uref.report_id = field->report->id;
        uref.field_index = field->index;
        uref.usage_index = (usage - field->usage);
        uref.usage_code = usage->hid;
        uref.value = value;

        hiddev_send_event(hid, &uref);
}
EXPORT_SYMBOL_GPL(hiddev_hid_event);

void hiddev_report_event(struct hid_device *hid, struct hid_report *report)
{
        unsigned type = report->type;
        struct hiddev_usage_ref uref;

        memset(&uref, 0, sizeof(uref));
        uref.report_type =
          (type == HID_INPUT_REPORT) ? HID_REPORT_TYPE_INPUT :
          ((type == HID_OUTPUT_REPORT) ? HID_REPORT_TYPE_OUTPUT :
           ((type == HID_FEATURE_REPORT) ? HID_REPORT_TYPE_FEATURE : 0));
        uref.report_id = report->id;
        uref.field_index = HID_FIELD_INDEX_NONE;

        hiddev_send_event(hid, &uref);
}

/*
 * fasync file op
 */
static int hiddev_fasync(int fd, struct file *file, int on)
{
        struct hiddev_list *list = file->private_data;

        return fasync_helper(fd, file, on, &list->fasync);
}


/*
 * release file op
 */
static int hiddev_release(struct inode * inode, struct file * file)
{
        struct hiddev_list *list = file->private_data;
        unsigned long flags;

        spin_lock_irqsave(&list->hiddev->list_lock, flags);
        list_del(&list->node);
        spin_unlock_irqrestore(&list->hiddev->list_lock, flags);

        mutex_lock(&list->hiddev->existancelock);
        if (!--list->hiddev->open) {
                if (list->hiddev->exist) {
                        hid_hw_close(list->hiddev->hid);
                        hid_hw_power(list->hiddev->hid, PM_HINT_NORMAL);
                } else {
                        mutex_unlock(&list->hiddev->existancelock);
                        kfree(list->hiddev);
                        vfree(list);
                        return 0;
                }
        }

        mutex_unlock(&list->hiddev->existancelock);
        vfree(list);

        return 0;
}

static int __hiddev_open(struct hiddev *hiddev, struct file *file)
{
        struct hiddev_list *list;
        int error;

        lockdep_assert_held(&hiddev->existancelock);

        list = vzalloc(sizeof(*list));
        if (!list)
                return -ENOMEM;

        mutex_init(&list->thread_lock);
        list->hiddev = hiddev;

        if (!hiddev->open++) {
                error = hid_hw_power(hiddev->hid, PM_HINT_FULLON);
                if (error < 0)
                        goto err_drop_count;

                error = hid_hw_open(hiddev->hid);
                if (error < 0)
                        goto err_normal_power;
        }

        spin_lock_irq(&hiddev->list_lock);
        list_add_tail(&list->node, &hiddev->list);
        spin_unlock_irq(&hiddev->list_lock);

        file->private_data = list;

        return 0;

err_normal_power:
        hid_hw_power(hiddev->hid, PM_HINT_NORMAL);
err_drop_count:
        hiddev->open--;
        vfree(list);
        return error;
}

/*
 * open file op
 */
static int hiddev_open(struct inode *inode, struct file *file)
{
        struct usb_interface *intf;
        struct hid_device *hid;
        struct hiddev *hiddev;
        int res;

        intf = usbhid_find_interface(iminor(inode));
        if (!intf)
                return -ENODEV;

        hid = usb_get_intfdata(intf);
        hiddev = hid->hiddev;

        mutex_lock(&hiddev->existancelock);
        res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV;
        mutex_unlock(&hiddev->existancelock);

        return res;
}

/*
 * "write" file op
 */
static ssize_t hiddev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
{
        return -EINVAL;
}

/*
 * "read" file op
 */
static ssize_t hiddev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
{
        DEFINE_WAIT(wait);
        struct hiddev_list *list = file->private_data;
        int event_size;
        int retval;

        event_size = ((list->flags & HIDDEV_FLAG_UREF) != 0) ?
                sizeof(struct hiddev_usage_ref) : sizeof(struct hiddev_event);

        if (count < event_size)
                return 0;

        /* lock against other threads */
        retval = mutex_lock_interruptible(&list->thread_lock);
        if (retval)
                return -ERESTARTSYS;

        while (retval == 0) {
                if (list->head == list->tail) {
                        prepare_to_wait(&list->hiddev->wait, &wait, TASK_INTERRUPTIBLE);

                        while (list->head == list->tail) {
                                if (signal_pending(current)) {
                                        retval = -ERESTARTSYS;
                                        break;
                                }
                                if (!list->hiddev->exist) {
                                        retval = -EIO;
                                        break;
                                }
                                if (file->f_flags & O_NONBLOCK) {
                                        retval = -EAGAIN;
                                        break;
                                }

                                /* let O_NONBLOCK tasks run */
                                mutex_unlock(&list->thread_lock);
                                schedule();
                                if (mutex_lock_interruptible(&list->thread_lock)) {
                                        finish_wait(&list->hiddev->wait, &wait);
                                        return -EINTR;
                                }
                                set_current_state(TASK_INTERRUPTIBLE);
                        }
                        finish_wait(&list->hiddev->wait, &wait);

                }

                if (retval) {
                        mutex_unlock(&list->thread_lock);
                        return retval;
                }


                while (list->head != list->tail &&
                       retval + event_size <= count) {
                        if ((list->flags & HIDDEV_FLAG_UREF) == 0) {
                                if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE) {
                                        struct hiddev_event event;

                                        event.hid = list->buffer[list->tail].usage_code;
                                        event.value = list->buffer[list->tail].value;
                                        if (copy_to_user(buffer + retval, &event, sizeof(struct hiddev_event))) {
                                                mutex_unlock(&list->thread_lock);
                                                return -EFAULT;
                                        }
                                        retval += sizeof(struct hiddev_event);
                                }
                        } else {
                                if (list->buffer[list->tail].field_index != HID_FIELD_INDEX_NONE ||
                                    (list->flags & HIDDEV_FLAG_REPORT) != 0) {

                                        if (copy_to_user(buffer + retval, list->buffer + list->tail, sizeof(struct hiddev_usage_ref))) {
                                                mutex_unlock(&list->thread_lock);
                                                return -EFAULT;
                                        }
                                        retval += sizeof(struct hiddev_usage_ref);
                                }
                        }
                        list->tail = (list->tail + 1) & (HIDDEV_BUFFER_SIZE - 1);
                }

        }
        mutex_unlock(&list->thread_lock);

        return retval;
}

/*
 * "poll" file op
 * No kernel lock - fine
 */
static __poll_t hiddev_poll(struct file *file, poll_table *wait)
{
        struct hiddev_list *list = file->private_data;

        poll_wait(file, &list->hiddev->wait, wait);
        if (list->head != list->tail)
                return EPOLLIN | EPOLLRDNORM | EPOLLOUT;
        if (!list->hiddev->exist)
                return EPOLLERR | EPOLLHUP;
        return 0;
}

/*
 * "ioctl" file op
 */
static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg)
{
        struct hid_device *hid = hiddev->hid;
        struct hiddev_report_info rinfo;
        struct hiddev_usage_ref_multi *uref_multi = NULL;
        struct hiddev_usage_ref *uref;
        struct hid_report *report;
        struct hid_field *field;
        int i;

        uref_multi = kmalloc(sizeof(struct hiddev_usage_ref_multi), GFP_KERNEL);
        if (!uref_multi)
                return -ENOMEM;
        uref = &uref_multi->uref;
        if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) {
                if (copy_from_user(uref_multi, user_arg,
                                   sizeof(*uref_multi)))
                        goto fault;
        } else {
                if (copy_from_user(uref, user_arg, sizeof(*uref)))
                        goto fault;
        }

        switch (cmd) {
        case HIDIOCGUCODE:
                rinfo.report_type = uref->report_type;
                rinfo.report_id = uref->report_id;
                if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL)
                        goto inval;

                if (uref->field_index >= report->maxfield)
                        goto inval;
                uref->field_index = array_index_nospec(uref->field_index,
                                                       report->maxfield);

                field = report->field[uref->field_index];
                if (uref->usage_index >= field->maxusage)
                        goto inval;
                uref->usage_index = array_index_nospec(uref->usage_index,
                                                       field->maxusage);

                uref->usage_code = field->usage[uref->usage_index].hid;

                if (copy_to_user(user_arg, uref, sizeof(*uref)))
                        goto fault;

                goto goodreturn;

        default:
                if (cmd != HIDIOCGUSAGE &&
                    cmd != HIDIOCGUSAGES &&
                    uref->report_type == HID_REPORT_TYPE_INPUT)
                        goto inval;

                if (uref->report_id == HID_REPORT_ID_UNKNOWN) {
                        field = hiddev_lookup_usage(hid, uref);
                        if (field == NULL)
                                goto inval;
                } else {
                        rinfo.report_type = uref->report_type;
                        rinfo.report_id = uref->report_id;
                        if ((report = hiddev_lookup_report(hid, &rinfo)) == NULL)
                                goto inval;

                        if (uref->field_index >= report->maxfield)
                                goto inval;
                        uref->field_index = array_index_nospec(uref->field_index,
                                                               report->maxfield);

                        field = report->field[uref->field_index];

                        if (cmd == HIDIOCGCOLLECTIONINDEX) {
                                if (uref->usage_index >= field->maxusage)
                                        goto inval;
                                uref->usage_index =
                                        array_index_nospec(uref->usage_index,
                                                           field->maxusage);
                        } else if (uref->usage_index >= field->report_count)
                                goto inval;
                }

                if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) {
                        if (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
                            uref->usage_index + uref_multi->num_values >
                            field->report_count)
                                goto inval;

                        uref->usage_index =
                                array_index_nospec(uref->usage_index,
                                                   field->report_count -
                                                   uref_multi->num_values);
                }

                switch (cmd) {
                case HIDIOCGUSAGE:
                        if (uref->usage_index >= field->report_count)
                                goto inval;
                        uref->value = field->value[uref->usage_index];
                        if (copy_to_user(user_arg, uref, sizeof(*uref)))
                                goto fault;
                        goto goodreturn;

                case HIDIOCSUSAGE:
                        if (uref->usage_index >= field->report_count)
                                goto inval;
                        field->value[uref->usage_index] = uref->value;
                        goto goodreturn;

                case HIDIOCGCOLLECTIONINDEX:
                        i = field->usage[uref->usage_index].collection_index;
                        kfree(uref_multi);
                        return i;
                case HIDIOCGUSAGES:
                        for (i = 0; i < uref_multi->num_values; i++)
                                uref_multi->values[i] =
                                    field->value[uref->usage_index + i];
                        if (copy_to_user(user_arg, uref_multi,
                                         sizeof(*uref_multi)))
                                goto fault;
                        goto goodreturn;
                case HIDIOCSUSAGES:
                        for (i = 0; i < uref_multi->num_values; i++)
                                field->value[uref->usage_index + i] =
                                    uref_multi->values[i];
                        goto goodreturn;
                }

goodreturn:
                kfree(uref_multi);
                return 0;
fault:
                kfree(uref_multi);
                return -EFAULT;
inval:
                kfree(uref_multi);
                return -EINVAL;
        }
}

static noinline int hiddev_ioctl_string(struct hiddev *hiddev, unsigned int cmd, void __user *user_arg)
{
        struct hid_device *hid = hiddev->hid;
        struct usb_device *dev = hid_to_usb_dev(hid);
        int idx, len;
        char *buf;

        if (get_user(idx, (int __user *)user_arg))
                return -EFAULT;

        if ((buf = kmalloc(HID_STRING_SIZE, GFP_KERNEL)) == NULL)
                return -ENOMEM;

        if ((len = usb_string(dev, idx, buf, HID_STRING_SIZE-1)) < 0) {
                kfree(buf);
                return -EINVAL;
        }

        if (copy_to_user(user_arg+sizeof(int), buf, len+1)) {
                kfree(buf);
                return -EFAULT;
        }

        kfree(buf);

        return len;
}

static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct hiddev_list *list = file->private_data;
        struct hiddev *hiddev = list->hiddev;
        struct hid_device *hid;
        struct hiddev_collection_info cinfo;
        struct hiddev_report_info rinfo;
        struct hiddev_field_info finfo;
        struct hiddev_devinfo dinfo;
        struct hid_report *report;
        struct hid_field *field;
        void __user *user_arg = (void __user *)arg;
        int i, r = -EINVAL;

        /* Called without BKL by compat methods so no BKL taken */

        mutex_lock(&hiddev->existancelock);
        if (!hiddev->exist) {
                r = -ENODEV;
                goto ret_unlock;
        }

        hid = hiddev->hid;

        switch (cmd) {

        case HIDIOCGVERSION:
                r = put_user(HID_VERSION, (int __user *)arg) ?
                        -EFAULT : 0;
                break;

        case HIDIOCAPPLICATION:
                if (arg >= hid->maxapplication)
                        break;

                for (i = 0; i < hid->maxcollection; i++)
                        if (hid->collection[i].type ==
                            HID_COLLECTION_APPLICATION && arg-- == 0)
                                break;

                if (i < hid->maxcollection)
                        r = hid->collection[i].usage;
                break;

        case HIDIOCGDEVINFO:
                {
                        struct usb_device *dev = hid_to_usb_dev(hid);
                        struct usbhid_device *usbhid = hid->driver_data;

                        memset(&dinfo, 0, sizeof(dinfo));

                        dinfo.bustype = BUS_USB;
                        dinfo.busnum = dev->bus->busnum;
                        dinfo.devnum = dev->devnum;
                        dinfo.ifnum = usbhid->ifnum;
                        dinfo.vendor = le16_to_cpu(dev->descriptor.idVendor);
                        dinfo.product = le16_to_cpu(dev->descriptor.idProduct);
                        dinfo.version = le16_to_cpu(dev->descriptor.bcdDevice);
                        dinfo.num_applications = hid->maxapplication;

                        r = copy_to_user(user_arg, &dinfo, sizeof(dinfo)) ?
                                -EFAULT : 0;
                        break;
                }

        case HIDIOCGFLAG:
                r = put_user(list->flags, (int __user *)arg) ?
                        -EFAULT : 0;
                break;

        case HIDIOCSFLAG:
                {
                        int newflags;

                        if (get_user(newflags, (int __user *)arg)) {
                                r = -EFAULT;
                                break;
                        }

                        if ((newflags & ~HIDDEV_FLAGS) != 0 ||
                            ((newflags & HIDDEV_FLAG_REPORT) != 0 &&
                             (newflags & HIDDEV_FLAG_UREF) == 0))
                                break;

                        list->flags = newflags;

                        r = 0;
                        break;
                }

        case HIDIOCGSTRING:
                r = hiddev_ioctl_string(hiddev, cmd, user_arg);
                break;

        case HIDIOCINITREPORT:
                usbhid_init_reports(hid);
                hiddev->initialized = true;
                r = 0;
                break;

        case HIDIOCGREPORT:
                if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) {
                        r = -EFAULT;
                        break;
                }

                if (rinfo.report_type == HID_REPORT_TYPE_OUTPUT)
                        break;

                report = hiddev_lookup_report(hid, &rinfo);
                if (report == NULL)
                        break;

                hid_hw_request(hid, report, HID_REQ_GET_REPORT);
                hid_hw_wait(hid);

                r = 0;
                break;

        case HIDIOCSREPORT:
                if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) {
                        r = -EFAULT;
                        break;
                }

                if (rinfo.report_type == HID_REPORT_TYPE_INPUT)
                        break;

                report = hiddev_lookup_report(hid, &rinfo);
                if (report == NULL)
                        break;

                hid_hw_request(hid, report, HID_REQ_SET_REPORT);
                hid_hw_wait(hid);

                r = 0;
                break;

        case HIDIOCGREPORTINFO:
                if (copy_from_user(&rinfo, user_arg, sizeof(rinfo))) {
                        r = -EFAULT;
                        break;
                }

                report = hiddev_lookup_report(hid, &rinfo);
                if (report == NULL)
                        break;

                rinfo.num_fields = report->maxfield;

                r = copy_to_user(user_arg, &rinfo, sizeof(rinfo)) ?
                        -EFAULT : 0;
                break;

        case HIDIOCGFIELDINFO:
                if (copy_from_user(&finfo, user_arg, sizeof(finfo))) {
                        r = -EFAULT;
                        break;
                }

                rinfo.report_type = finfo.report_type;
                rinfo.report_id = finfo.report_id;

                report = hiddev_lookup_report(hid, &rinfo);
                if (report == NULL)
                        break;

                if (finfo.field_index >= report->maxfield)
                        break;
                finfo.field_index = array_index_nospec(finfo.field_index,
                                                       report->maxfield);

                field = report->field[finfo.field_index];
                memset(&finfo, 0, sizeof(finfo));
                finfo.report_type = rinfo.report_type;
                finfo.report_id = rinfo.report_id;
                finfo.field_index = field->report_count - 1;
                finfo.maxusage = field->maxusage;
                finfo.flags = field->flags;
                finfo.physical = field->physical;
                finfo.logical = field->logical;
                finfo.application = field->application;
                finfo.logical_minimum = field->logical_minimum;
                finfo.logical_maximum = field->logical_maximum;
                finfo.physical_minimum = field->physical_minimum;
                finfo.physical_maximum = field->physical_maximum;
                finfo.unit_exponent = field->unit_exponent;
                finfo.unit = field->unit;

                r = copy_to_user(user_arg, &finfo, sizeof(finfo)) ?
                        -EFAULT : 0;
                break;

        case HIDIOCGUCODE:
        case HIDIOCGUSAGE:
        case HIDIOCSUSAGE:
        case HIDIOCGUSAGES:
        case HIDIOCSUSAGES:
        case HIDIOCGCOLLECTIONINDEX:
                if (!hiddev->initialized) {
                        usbhid_init_reports(hid);
                        hiddev->initialized = true;
                }
                r = hiddev_ioctl_usage(hiddev, cmd, user_arg);
                break;

        case HIDIOCGCOLLECTIONINFO:
                if (copy_from_user(&cinfo, user_arg, sizeof(cinfo))) {
                        r = -EFAULT;
                        break;
                }

                if (cinfo.index >= hid->maxcollection)
                        break;
                cinfo.index = array_index_nospec(cinfo.index,
                                                 hid->maxcollection);

                cinfo.type = hid->collection[cinfo.index].type;
                cinfo.usage = hid->collection[cinfo.index].usage;
                cinfo.level = hid->collection[cinfo.index].level;

                r = copy_to_user(user_arg, &cinfo, sizeof(cinfo)) ?
                        -EFAULT : 0;
                break;

        default:
                if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ)
                        break;

                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGNAME(0))) {
                        int len = strlen(hid->name) + 1;
                        if (len > _IOC_SIZE(cmd))
                                 len = _IOC_SIZE(cmd);
                        r = copy_to_user(user_arg, hid->name, len) ?
                                -EFAULT : len;
                        break;
                }

                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGPHYS(0))) {
                        int len = strlen(hid->phys) + 1;
                        if (len > _IOC_SIZE(cmd))
                                len = _IOC_SIZE(cmd);
                        r = copy_to_user(user_arg, hid->phys, len) ?
                                -EFAULT : len;
                        break;
                }
        }

ret_unlock:
        mutex_unlock(&hiddev->existancelock);
        return r;
}

static const struct file_operations hiddev_fops = {
        .owner =        THIS_MODULE,
        .read =                hiddev_read,
        .write =        hiddev_write,
        .poll =                hiddev_poll,
        .open =                hiddev_open,
        .release =        hiddev_release,
        .unlocked_ioctl =        hiddev_ioctl,
        .fasync =        hiddev_fasync,
        .compat_ioctl        = compat_ptr_ioctl,
        .llseek                = noop_llseek,
};

static char *hiddev_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
}

static struct usb_class_driver hiddev_class = {
        .name =                "hiddev%d",
        .devnode =        hiddev_devnode,
        .fops =                &hiddev_fops,
        .minor_base =        HIDDEV_MINOR_BASE,
};

/*
 * This is where hid.c calls us to connect a hid device to the hiddev driver
 */
int hiddev_connect(struct hid_device *hid, unsigned int force)
{
        struct hiddev *hiddev;
        struct usbhid_device *usbhid = hid->driver_data;
        int retval;

        if (!force) {
                unsigned int i;
                for (i = 0; i < hid->maxcollection; i++)
                        if (hid->collection[i].type ==
                            HID_COLLECTION_APPLICATION &&
                            !IS_INPUT_APPLICATION(hid->collection[i].usage))
                                break;

                if (i == hid->maxcollection)
                        return -EINVAL;
        }

        if (!(hiddev = kzalloc(sizeof(struct hiddev), GFP_KERNEL)))
                return -ENOMEM;

        init_waitqueue_head(&hiddev->wait);
        INIT_LIST_HEAD(&hiddev->list);
        spin_lock_init(&hiddev->list_lock);
        mutex_init(&hiddev->existancelock);
        hid->hiddev = hiddev;
        hiddev->hid = hid;
        hiddev->exist = 1;
        retval = usb_register_dev(usbhid->intf, &hiddev_class);
        if (retval) {
                hid_err(hid, "Not able to get a minor for this device\n");
                hid->hiddev = NULL;
                kfree(hiddev);
                return retval;
        }

        /*
         * If HID_QUIRK_NO_INIT_REPORTS is set, make sure we don't initialize
         * the reports.
         */
        hiddev->initialized = hid->quirks & HID_QUIRK_NO_INIT_REPORTS;

        hiddev->minor = usbhid->intf->minor;

        return 0;
}

/*
 * This is where hid.c calls us to disconnect a hiddev device from the
 * corresponding hid device (usually because the usb device has disconnected)
 */
static struct usb_class_driver hiddev_class;
void hiddev_disconnect(struct hid_device *hid)
{
        struct hiddev *hiddev = hid->hiddev;
        struct usbhid_device *usbhid = hid->driver_data;

        usb_deregister_dev(usbhid->intf, &hiddev_class);

        mutex_lock(&hiddev->existancelock);
        hiddev->exist = 0;

        if (hiddev->open) {
                hid_hw_close(hiddev->hid);
                wake_up_interruptible(&hiddev->wait);
                mutex_unlock(&hiddev->existancelock);
        } else {
                mutex_unlock(&hiddev->existancelock);
                kfree(hiddev);
        }
}













































































































    1 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// SPDX-License-Identifier: GPL-2.0+
/*
 * PlayStation 2 Trance Vibrator driver
 *
 * Copyright (C) 2006 Sam Hocevar <sam@zoy.org>
 */

/* Standard include files */
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb.h>

#define DRIVER_AUTHOR "Sam Hocevar, sam@zoy.org"
#define DRIVER_DESC "PlayStation 2 Trance Vibrator driver"

#define TRANCEVIBRATOR_VENDOR_ID        0x0b49        /* ASCII Corporation */
#define TRANCEVIBRATOR_PRODUCT_ID        0x064f        /* Trance Vibrator */

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(TRANCEVIBRATOR_VENDOR_ID, TRANCEVIBRATOR_PRODUCT_ID) },
        { },
};
MODULE_DEVICE_TABLE (usb, id_table);

/* Driver-local specific stuff */
struct trancevibrator {
        struct usb_device *udev;
        unsigned int speed;
};

static ssize_t speed_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct trancevibrator *tv = usb_get_intfdata(intf);

        return sprintf(buf, "%d\n", tv->speed);
}

static ssize_t speed_store(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct trancevibrator *tv = usb_get_intfdata(intf);
        int temp, retval, old;

        retval = kstrtoint(buf, 10, &temp);
        if (retval)
                return retval;
        if (temp > 255)
                temp = 255;
        else if (temp < 0)
                temp = 0;
        old = tv->speed;
        tv->speed = temp;

        dev_dbg(&tv->udev->dev, "speed = %d\n", tv->speed);

        /* Set speed */
        retval = usb_control_msg(tv->udev, usb_sndctrlpipe(tv->udev, 0),
                                 0x01, /* vendor request: set speed */
                                 USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_OTHER,
                                 tv->speed, /* speed value */
                                 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
        if (retval) {
                tv->speed = old;
                dev_dbg(&tv->udev->dev, "retval = %d\n", retval);
                return retval;
        }
        return count;
}
static DEVICE_ATTR_RW(speed);

static struct attribute *tv_attrs[] = {
        &dev_attr_speed.attr,
        NULL,
};
ATTRIBUTE_GROUPS(tv);

static int tv_probe(struct usb_interface *interface,
                    const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct trancevibrator *dev;
        int retval;

        dev = kzalloc(sizeof(struct trancevibrator), GFP_KERNEL);
        if (!dev) {
                retval = -ENOMEM;
                goto error;
        }

        dev->udev = usb_get_dev(udev);
        usb_set_intfdata(interface, dev);

        return 0;

error:
        kfree(dev);
        return retval;
}

static void tv_disconnect(struct usb_interface *interface)
{
        struct trancevibrator *dev;

        dev = usb_get_intfdata (interface);
        usb_set_intfdata(interface, NULL);
        usb_put_dev(dev->udev);
        kfree(dev);
}

/* USB subsystem object */
static struct usb_driver tv_driver = {
        .name =                "trancevibrator",
        .probe =        tv_probe,
        .disconnect =        tv_disconnect,
        .id_table =        id_table,
        .dev_groups =        tv_groups,
};

module_usb_driver(tv_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");





















    9 
    9 







































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KERNEL_PRINTK__
#define __KERNEL_PRINTK__

#include <linux/stdarg.h>
#include <linux/init.h>
#include <linux/kern_levels.h>
#include <linux/linkage.h>
#include <linux/ratelimit_types.h>
#include <linux/once_lite.h>

extern const char linux_banner[];
extern const char linux_proc_banner[];

extern int oops_in_progress;        /* If set, an oops, panic(), BUG() or die() is in progress */

#define PRINTK_MAX_SINGLE_HEADER_LEN 2

static inline int printk_get_level(const char *buffer)
{
        if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
                switch (buffer[1]) {
                case '0' ... '7':
                case 'c':        /* KERN_CONT */
                        return buffer[1];
                }
        }
        return 0;
}

static inline const char *printk_skip_level(const char *buffer)
{
        if (printk_get_level(buffer))
                return buffer + 2;

        return buffer;
}

static inline const char *printk_skip_headers(const char *buffer)
{
        while (printk_get_level(buffer))
                buffer = printk_skip_level(buffer);

        return buffer;
}

/* printk's without a loglevel use this.. */
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT

/* We show everything that is MORE important than this.. */
#define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
#define CONSOLE_LOGLEVEL_MIN         1 /* Minimum loglevel we let people use */
#define CONSOLE_LOGLEVEL_DEBUG        10 /* issue debug messages */
#define CONSOLE_LOGLEVEL_MOTORMOUTH 15        /* You can't shut this one up */

/*
 * Default used to be hard-coded at 7, quiet used to be hardcoded at 4,
 * we're now allowing both to be set from kernel config.
 */
#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
#define CONSOLE_LOGLEVEL_QUIET         CONFIG_CONSOLE_LOGLEVEL_QUIET

extern int console_printk[];

#define console_loglevel (console_printk[0])
#define default_message_loglevel (console_printk[1])
#define minimum_console_loglevel (console_printk[2])
#define default_console_loglevel (console_printk[3])

extern void console_verbose(void);

/* strlen("ratelimit") + 1 */
#define DEVKMSG_STR_MAX_SIZE 10
extern char devkmsg_log_str[];
struct ctl_table;

extern int suppress_printk;

struct va_format {
        const char *fmt;
        va_list *va;
};

/*
 * FW_BUG
 * Add this to a message where you are sure the firmware is buggy or behaves
 * really stupid or out of spec. Be aware that the responsible BIOS developer
 * should be able to fix this issue or at least get a concrete idea of the
 * problem by reading your message without the need of looking at the kernel
 * code.
 *
 * Use it for definite and high priority BIOS bugs.
 *
 * FW_WARN
 * Use it for not that clear (e.g. could the kernel messed up things already?)
 * and medium priority BIOS bugs.
 *
 * FW_INFO
 * Use this one if you want to tell the user or vendor about something
 * suspicious, but generally harmless related to the firmware.
 *
 * Use it for information or very low priority BIOS bugs.
 */
#define FW_BUG                "[Firmware Bug]: "
#define FW_WARN                "[Firmware Warn]: "
#define FW_INFO                "[Firmware Info]: "

/*
 * HW_ERR
 * Add this to a message for hardware errors, so that user can report
 * it to hardware vendor instead of LKML or software vendor.
 */
#define HW_ERR                "[Hardware Error]: "

/*
 * DEPRECATED
 * Add this to a message whenever you want to warn user space about the use
 * of a deprecated aspect of an API so they can stop using it
 */
#define DEPRECATED        "[Deprecated]: "

/*
 * Dummy printk for disabled debugging statements to use whilst maintaining
 * gcc's format checking.
 */
#define no_printk(fmt, ...)                                \
({                                                        \
        if (0)                                                \
                printk(fmt, ##__VA_ARGS__);                \
        0;                                                \
})

#ifdef CONFIG_EARLY_PRINTK
extern asmlinkage __printf(1, 2)
void early_printk(const char *fmt, ...);
#else
static inline __printf(1, 2) __cold
void early_printk(const char *s, ...) { }
#endif

struct dev_printk_info;

#ifdef CONFIG_PRINTK
asmlinkage __printf(4, 0)
int vprintk_emit(int facility, int level,
                 const struct dev_printk_info *dev_info,
                 const char *fmt, va_list args);

asmlinkage __printf(1, 0)
int vprintk(const char *fmt, va_list args);

asmlinkage __printf(1, 2) __cold
int _printk(const char *fmt, ...);

/*
 * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
 */
__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);

extern void __printk_safe_enter(void);
extern void __printk_safe_exit(void);
/*
 * The printk_deferred_enter/exit macros are available only as a hack for
 * some code paths that need to defer all printk console printing. Interrupts
 * must be disabled for the deferred duration.
 */
#define printk_deferred_enter __printk_safe_enter
#define printk_deferred_exit __printk_safe_exit

/*
 * Please don't use printk_ratelimit(), because it shares ratelimiting state
 * with all other unrelated printk_ratelimit() callsites.  Instead use
 * printk_ratelimited() or plain old __ratelimit().
 */
extern int __printk_ratelimit(const char *func);
#define printk_ratelimit() __printk_ratelimit(__func__)
extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                                   unsigned int interval_msec);

extern int printk_delay_msec;
extern int dmesg_restrict;

extern void wake_up_klogd(void);

char *log_buf_addr_get(void);
u32 log_buf_len_get(void);
void log_buf_vmcoreinfo_setup(void);
void __init setup_log_buf(int early);
__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
void dump_stack_print_info(const char *log_lvl);
void show_regs_print_info(const char *log_lvl);
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
extern asmlinkage void dump_stack(void) __cold;
void printk_trigger_flush(void);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
{
        return 0;
}
static inline __printf(1, 2) __cold
int _printk(const char *s, ...)
{
        return 0;
}
static inline __printf(1, 2) __cold
int _printk_deferred(const char *s, ...)
{
        return 0;
}

static inline void printk_deferred_enter(void)
{
}

static inline void printk_deferred_exit(void)
{
}

static inline int printk_ratelimit(void)
{
        return 0;
}
static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                                          unsigned int interval_msec)
{
        return false;
}

static inline void wake_up_klogd(void)
{
}

static inline char *log_buf_addr_get(void)
{
        return NULL;
}

static inline u32 log_buf_len_get(void)
{
        return 0;
}

static inline void log_buf_vmcoreinfo_setup(void)
{
}

static inline void setup_log_buf(int early)
{
}

static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...)
{
}

static inline void dump_stack_print_info(const char *log_lvl)
{
}

static inline void show_regs_print_info(const char *log_lvl)
{
}

static inline void dump_stack_lvl(const char *log_lvl)
{
}

static inline void dump_stack(void)
{
}
static inline void printk_trigger_flush(void)
{
}
#endif

bool this_cpu_in_panic(void);

#ifdef CONFIG_SMP
extern int __printk_cpu_sync_try_get(void);
extern void __printk_cpu_sync_wait(void);
extern void __printk_cpu_sync_put(void);

#else

#define __printk_cpu_sync_try_get() true
#define __printk_cpu_sync_wait()
#define __printk_cpu_sync_put()
#endif /* CONFIG_SMP */

/**
 * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk
 *                                 cpu-reentrant spinning lock.
 * @flags: Stack-allocated storage for saving local interrupt state,
 *         to be passed to printk_cpu_sync_put_irqrestore().
 *
 * If the lock is owned by another CPU, spin until it becomes available.
 * Interrupts are restored while spinning.
 *
 * CAUTION: This function must be used carefully. It does not behave like a
 * typical lock. Here are important things to watch out for...
 *
 *     * This function is reentrant on the same CPU. Therefore the calling
 *       code must not assume exclusive access to data if code accessing the
 *       data can run reentrant or within NMI context on the same CPU.
 *
 *     * If there exists usage of this function from NMI context, it becomes
 *       unsafe to perform any type of locking or spinning to wait for other
 *       CPUs after calling this function from any context. This includes
 *       using spinlocks or any other busy-waiting synchronization methods.
 */
#define printk_cpu_sync_get_irqsave(flags)                \
        for (;;) {                                        \
                local_irq_save(flags);                        \
                if (__printk_cpu_sync_try_get())        \
                        break;                                \
                local_irq_restore(flags);                \
                __printk_cpu_sync_wait();                \
        }

/**
 * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning
 *                                    lock and restore interrupts.
 * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave().
 */
#define printk_cpu_sync_put_irqrestore(flags)        \
        do {                                        \
                __printk_cpu_sync_put();        \
                local_irq_restore(flags);        \
        } while (0)

extern int kptr_restrict;

/**
 * pr_fmt - used by the pr_*() macros to generate the printk format string
 * @fmt: format string passed from a pr_*() macro
 *
 * This macro can be used to generate a unified format string for pr_*()
 * macros. A common use is to prefix all pr_*() messages in a file with a common
 * string. For example, defining this at the top of a source file:
 *
 *        #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 *
 * would prefix all pr_info, pr_emerg... messages in the file with the module
 * name.
 */
#ifndef pr_fmt
#define pr_fmt(fmt) fmt
#endif

struct module;

#ifdef CONFIG_PRINTK_INDEX
struct pi_entry {
        const char *fmt;
        const char *func;
        const char *file;
        unsigned int line;

        /*
         * While printk and pr_* have the level stored in the string at compile
         * time, some subsystems dynamically add it at runtime through the
         * format string. For these dynamic cases, we allow the subsystem to
         * tell us the level at compile time.
         *
         * NULL indicates that the level, if any, is stored in fmt.
         */
        const char *level;

        /*
         * The format string used by various subsystem specific printk()
         * wrappers to prefix the message.
         *
         * Note that the static prefix defined by the pr_fmt() macro is stored
         * directly in the message format (@fmt), not here.
         */
        const char *subsys_fmt_prefix;
} __packed;

#define __printk_index_emit(_fmt, _level, _subsys_fmt_prefix)                \
        do {                                                                \
                if (__builtin_constant_p(_fmt) && __builtin_constant_p(_level)) { \
                        /*
                         * We check __builtin_constant_p multiple times here
                         * for the same input because GCC will produce an error
                         * if we try to assign a static variable to fmt if it
                         * is not a constant, even with the outer if statement.
                         */                                                \
                        static const struct pi_entry _entry                \
                        __used = {                                        \
                                .fmt = __builtin_constant_p(_fmt) ? (_fmt) : NULL, \
                                .func = __func__,                        \
                                .file = __FILE__,                        \
                                .line = __LINE__,                        \
                                .level = __builtin_constant_p(_level) ? (_level) : NULL, \
                                .subsys_fmt_prefix = _subsys_fmt_prefix,\
                        };                                                \
                        static const struct pi_entry *_entry_ptr        \
                        __used __section(".printk_index") = &_entry;        \
                }                                                        \
        } while (0)

#else /* !CONFIG_PRINTK_INDEX */
#define __printk_index_emit(...) do {} while (0)
#endif /* CONFIG_PRINTK_INDEX */

/*
 * Some subsystems have their own custom printk that applies a va_format to a
 * generic format, for example, to include a device number or other metadata
 * alongside the format supplied by the caller.
 *
 * In order to store these in the way they would be emitted by the printk
 * infrastructure, the subsystem provides us with the start, fixed string, and
 * any subsequent text in the format string.
 *
 * We take a variable argument list as pr_fmt/dev_fmt/etc are sometimes passed
 * as multiple arguments (eg: `"%s: ", "blah"`), and we must only take the
 * first one.
 *
 * subsys_fmt_prefix must be known at compile time, or compilation will fail
 * (since this is a mistake). If fmt or level is not known at compile time, no
 * index entry will be made (since this can legitimately happen).
 */
#define printk_index_subsys_emit(subsys_fmt_prefix, level, fmt, ...) \
        __printk_index_emit(fmt, level, subsys_fmt_prefix)

#define printk_index_wrap(_p_func, _fmt, ...)                                \
        ({                                                                \
                __printk_index_emit(_fmt, NULL, NULL);                        \
                _p_func(_fmt, ##__VA_ARGS__);                                \
        })


/**
 * printk - print a kernel message
 * @fmt: format string
 *
 * This is printk(). It can be called from any context. We want it to work.
 *
 * If printk indexing is enabled, _printk() is called from printk_index_wrap.
 * Otherwise, printk is simply #defined to _printk.
 *
 * We try to grab the console_lock. If we succeed, it's easy - we log the
 * output and call the console drivers.  If we fail to get the semaphore, we
 * place the output into the log buffer and return. The current holder of
 * the console_sem will notice the new output in console_unlock(); and will
 * send it to the consoles before releasing the lock.
 *
 * One effect of this deferred printing is that code which calls printk() and
 * then changes console_loglevel may break. This is because console_loglevel
 * is inspected when the actual printing occurs.
 *
 * See also:
 * printf(3)
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
#define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
#define printk_deferred(fmt, ...)                                        \
        printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__)

/**
 * pr_emerg - Print an emergency-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_emerg(fmt, ...) \
        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_alert - Print an alert-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_alert(fmt, ...) \
        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_crit - Print a critical-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_crit(fmt, ...) \
        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_err - Print an error-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_err(fmt, ...) \
        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_warn - Print a warning-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt()
 * to generate the format string.
 */
#define pr_warn(fmt, ...) \
        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_notice - Print a notice-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_notice(fmt, ...) \
        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
/**
 * pr_info - Print an info-level message
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_info(fmt, ...) \
        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)

/**
 * pr_cont - Continues a previous log message in the same line.
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_CONT loglevel. It should only be
 * used when continuing a log message with no newline ('\n') enclosed. Otherwise
 * it defaults back to KERN_DEFAULT loglevel.
 */
#define pr_cont(fmt, ...) \
        printk(KERN_CONT fmt, ##__VA_ARGS__)

/**
 * pr_devel - Print a debug-level message conditionally
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is
 * defined. Otherwise it does nothing.
 *
 * It uses pr_fmt() to generate the format string.
 */
#ifdef DEBUG
#define pr_devel(fmt, ...) \
        printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_devel(fmt, ...) \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif


/* If you are writing a driver, please use dev_dbg instead */
#if defined(CONFIG_DYNAMIC_DEBUG) || \
        (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#include <linux/dynamic_debug.h>

/**
 * pr_debug - Print a debug-level message conditionally
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is
 * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with
 * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing.
 *
 * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses
 * pr_fmt() internally).
 */
#define pr_debug(fmt, ...)                        \
        dynamic_pr_debug(fmt, ##__VA_ARGS__)
#elif defined(DEBUG)
#define pr_debug(fmt, ...) \
        printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug(fmt, ...) \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif

/*
 * Print a one-time message (analogous to WARN_ONCE() et al):
 */

#ifdef CONFIG_PRINTK
#define printk_once(fmt, ...)                                        \
        DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__)
#define printk_deferred_once(fmt, ...)                                \
        DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__)
#else
#define printk_once(fmt, ...)                                        \
        no_printk(fmt, ##__VA_ARGS__)
#define printk_deferred_once(fmt, ...)                                \
        no_printk(fmt, ##__VA_ARGS__)
#endif

#define pr_emerg_once(fmt, ...)                                        \
        printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_alert_once(fmt, ...)                                        \
        printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_crit_once(fmt, ...)                                        \
        printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_err_once(fmt, ...)                                        \
        printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
#define pr_warn_once(fmt, ...)                                        \
        printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
#define pr_notice_once(fmt, ...)                                \
        printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
#define pr_info_once(fmt, ...)                                        \
        printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
/* no pr_cont_once, don't do that... */

#if defined(DEBUG)
#define pr_devel_once(fmt, ...)                                        \
        printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_devel_once(fmt, ...)                                        \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif

/* If you are writing a driver, please use dev_dbg instead */
#if defined(DEBUG)
#define pr_debug_once(fmt, ...)                                        \
        printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug_once(fmt, ...)                                        \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif

/*
 * ratelimited messages with local ratelimit_state,
 * no local ratelimit_state used in the !PRINTK case
 */
#ifdef CONFIG_PRINTK
#define printk_ratelimited(fmt, ...)                                        \
({                                                                        \
        static DEFINE_RATELIMIT_STATE(_rs,                                \
                                      DEFAULT_RATELIMIT_INTERVAL,        \
                                      DEFAULT_RATELIMIT_BURST);                \
                                                                        \
        if (__ratelimit(&_rs))                                                \
                printk(fmt, ##__VA_ARGS__);                                \
})
#else
#define printk_ratelimited(fmt, ...)                                        \
        no_printk(fmt, ##__VA_ARGS__)
#endif

#define pr_emerg_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_alert_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_crit_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_err_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
#define pr_warn_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
#define pr_notice_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
#define pr_info_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
/* no pr_cont_ratelimited, don't do that... */

#if defined(DEBUG)
#define pr_devel_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_devel_ratelimited(fmt, ...)                                        \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif

/* If you are writing a driver, please use dev_dbg instead */
#if defined(CONFIG_DYNAMIC_DEBUG) || \
        (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* descriptor check is first to prevent flooding with "callbacks suppressed" */
#define pr_debug_ratelimited(fmt, ...)                                        \
do {                                                                        \
        static DEFINE_RATELIMIT_STATE(_rs,                                \
                                      DEFAULT_RATELIMIT_INTERVAL,        \
                                      DEFAULT_RATELIMIT_BURST);                \
        DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt));                \
        if (DYNAMIC_DEBUG_BRANCH(descriptor) &&                                \
            __ratelimit(&_rs))                                                \
                __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__);        \
} while (0)
#elif defined(DEBUG)
#define pr_debug_ratelimited(fmt, ...)                                        \
        printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug_ratelimited(fmt, ...) \
        no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif

extern const struct file_operations kmsg_fops;

enum {
        DUMP_PREFIX_NONE,
        DUMP_PREFIX_ADDRESS,
        DUMP_PREFIX_OFFSET
};
extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
                              int groupsize, char *linebuf, size_t linebuflen,
                              bool ascii);
#ifdef CONFIG_PRINTK
extern void print_hex_dump(const char *level, const char *prefix_str,
                           int prefix_type, int rowsize, int groupsize,
                           const void *buf, size_t len, bool ascii);
#else
static inline void print_hex_dump(const char *level, const char *prefix_str,
                                  int prefix_type, int rowsize, int groupsize,
                                  const void *buf, size_t len, bool ascii)
{
}
static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
                                        const void *buf, size_t len)
{
}

#endif

#if defined(CONFIG_DYNAMIC_DEBUG) || \
        (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,        \
                             groupsize, buf, len, ascii)        \
        dynamic_hex_dump(prefix_str, prefix_type, rowsize,        \
                         groupsize, buf, len, ascii)
#elif defined(DEBUG)
#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,                \
                             groupsize, buf, len, ascii)                \
        print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize,        \
                       groupsize, buf, len, ascii)
#else
static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
                                        int rowsize, int groupsize,
                                        const void *buf, size_t len, bool ascii)
{
}
#endif

/**
 * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
 * @prefix_str: string to prefix each line with;
 *  caller supplies trailing spaces for alignment if desired
 * @prefix_type: controls whether prefix of an offset, address, or none
 *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
 * @buf: data blob to dump
 * @len: number of bytes in the @buf
 *
 * Calls print_hex_dump(), with log level of KERN_DEBUG,
 * rowsize of 16, groupsize of 1, and ASCII output included.
 */
#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)        \
        print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true)

#endif























































































































































































































































   11 


   11 



   11 
   11 


   11 






















































   13 

   13 





   13 




   13 

   13 



   11 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 
   14 



   14 
   11 







   11 







   14 






































    5 













   13 












   14 















































































































































































































































































































   14 

   14 


   14 





   14 



   14 


    5 




    5 

   14 































































   13 


   13 

   13 



   13 

   13 

   13 














   13 


   13 





   13 




   13 





























   13 













   13 



   13 







   13 






























































































































































































































































































































































































































































































   13 




   13 























   13 
































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        NET3        IP device support routines.
 *
 *        Derived from the IP parts of dev.c 1.0.19
 *                 Authors:        Ross Biro
 *                                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                                Mark Evans, <evansmp@uhura.aston.ac.uk>
 *
 *        Additional Authors:
 *                Alan Cox, <gw4pts@gw4pts.ampr.org>
 *                Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *        Changes:
 *                Alexey Kuznetsov:        pa_* fields are replaced with ifaddr
 *                                        lists.
 *                Cyrus Durgin:                updated for kmod
 *                Matthias Andree:        in devinet_ioctl, compare label and
 *                                        address (4.4BSD alias style support),
 *                                        fall back to comparing just the label
 *                                        if no match found.
 */


#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/slab.h>
#include <linux/hash.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/kmod.h>
#include <linux/netconf.h>

#include <net/arp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/addrconf.h>

#define IPV6ONLY_FLAGS        \
                (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
                 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
                 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)

static struct ipv4_devconf ipv4_devconf = {
        .data = {
                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
                [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
        },
};

static struct ipv4_devconf ipv4_devconf_dflt = {
        .data = {
                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
                [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
        },
};

#define IPV4_DEVCONF_DFLT(net, attr) \
        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)

static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
        [IFA_LOCAL]             = { .type = NLA_U32 },
        [IFA_ADDRESS]           = { .type = NLA_U32 },
        [IFA_BROADCAST]         = { .type = NLA_U32 },
        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
        [IFA_CACHEINFO]                = { .len = sizeof(struct ifa_cacheinfo) },
        [IFA_FLAGS]                = { .type = NLA_U32 },
        [IFA_RT_PRIORITY]        = { .type = NLA_U32 },
        [IFA_TARGET_NETNSID]        = { .type = NLA_S32 },
        [IFA_PROTO]                = { .type = NLA_U8 },
};

struct inet_fill_args {
        u32 portid;
        u32 seq;
        int event;
        unsigned int flags;
        int netnsid;
        int ifindex;
};

#define IN4_ADDR_HSIZE_SHIFT        8
#define IN4_ADDR_HSIZE                (1U << IN4_ADDR_HSIZE_SHIFT)

static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];

static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
        u32 val = (__force u32) addr ^ net_hash_mix(net);

        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
}

static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
{
        u32 hash = inet_addr_hash(net, ifa->ifa_local);

        ASSERT_RTNL();
        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
}

static void inet_hash_remove(struct in_ifaddr *ifa)
{
        ASSERT_RTNL();
        hlist_del_init_rcu(&ifa->hash);
}

/**
 * __ip_dev_find - find the first device with a given source address.
 * @net: the net namespace
 * @addr: the source address
 * @devref: if true, take a reference on the found device
 *
 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 */
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
        struct net_device *result = NULL;
        struct in_ifaddr *ifa;

        rcu_read_lock();
        ifa = inet_lookup_ifaddr_rcu(net, addr);
        if (!ifa) {
                struct flowi4 fl4 = { .daddr = addr };
                struct fib_result res = { 0 };
                struct fib_table *local;

                /* Fallback to FIB local table so that communication
                 * over loopback subnets work.
                 */
                local = fib_get_table(net, RT_TABLE_LOCAL);
                if (local &&
                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
                    res.type == RTN_LOCAL)
                        result = FIB_RES_DEV(res);
        } else {
                result = ifa->ifa_dev->dev;
        }
        if (result && devref)
                dev_hold(result);
        rcu_read_unlock();
        return result;
}
EXPORT_SYMBOL(__ip_dev_find);

/* called under RCU lock */
struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
{
        u32 hash = inet_addr_hash(net, addr);
        struct in_ifaddr *ifa;

        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
                if (ifa->ifa_local == addr &&
                    net_eq(dev_net(ifa->ifa_dev->dev), net))
                        return ifa;

        return NULL;
}

static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);

static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
static void inet_del_ifa(struct in_device *in_dev,
                         struct in_ifaddr __rcu **ifap,
                         int destroy);
#ifdef CONFIG_SYSCTL
static int devinet_sysctl_register(struct in_device *idev);
static void devinet_sysctl_unregister(struct in_device *idev);
#else
static int devinet_sysctl_register(struct in_device *idev)
{
        return 0;
}
static void devinet_sysctl_unregister(struct in_device *idev)
{
}
#endif

/* Locks all the inet devices. */

static struct in_ifaddr *inet_alloc_ifa(void)
{
        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}

static void inet_rcu_free_ifa(struct rcu_head *head)
{
        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
        if (ifa->ifa_dev)
                in_dev_put(ifa->ifa_dev);
        kfree(ifa);
}

static void inet_free_ifa(struct in_ifaddr *ifa)
{
        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}

static void in_dev_free_rcu(struct rcu_head *head)
{
        struct in_device *idev = container_of(head, struct in_device, rcu_head);

        kfree(rcu_dereference_protected(idev->mc_hash, 1));
        kfree(idev);
}

void in_dev_finish_destroy(struct in_device *idev)
{
        struct net_device *dev = idev->dev;

        WARN_ON(idev->ifa_list);
        WARN_ON(idev->mc_list);
#ifdef NET_REFCNT_DEBUG
        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
        netdev_put(dev, &idev->dev_tracker);
        if (!idev->dead)
                pr_err("Freeing alive in_device %p\n", idev);
        else
                call_rcu(&idev->rcu_head, in_dev_free_rcu);
}
EXPORT_SYMBOL(in_dev_finish_destroy);

static struct in_device *inetdev_init(struct net_device *dev)
{
        struct in_device *in_dev;
        int err = -ENOMEM;

        ASSERT_RTNL();

        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
        if (!in_dev)
                goto out;
        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
                        sizeof(in_dev->cnf));
        in_dev->cnf.sysctl = NULL;
        in_dev->dev = dev;
        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
        if (!in_dev->arp_parms)
                goto out_kfree;
        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
                dev_disable_lro(dev);
        /* Reference in_dev->dev */
        netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
        /* Account for reference dev->ip_ptr (below) */
        refcount_set(&in_dev->refcnt, 1);

        err = devinet_sysctl_register(in_dev);
        if (err) {
                in_dev->dead = 1;
                neigh_parms_release(&arp_tbl, in_dev->arp_parms);
                in_dev_put(in_dev);
                in_dev = NULL;
                goto out;
        }
        ip_mc_init_dev(in_dev);
        if (dev->flags & IFF_UP)
                ip_mc_up(in_dev);

        /* we can receive as soon as ip_ptr is set -- do this last */
        rcu_assign_pointer(dev->ip_ptr, in_dev);
out:
        return in_dev ?: ERR_PTR(err);
out_kfree:
        kfree(in_dev);
        in_dev = NULL;
        goto out;
}

static void inetdev_destroy(struct in_device *in_dev)
{
        struct net_device *dev;
        struct in_ifaddr *ifa;

        ASSERT_RTNL();

        dev = in_dev->dev;

        in_dev->dead = 1;

        ip_mc_destroy_dev(in_dev);

        while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
                inet_free_ifa(ifa);
        }

        RCU_INIT_POINTER(dev->ip_ptr, NULL);

        devinet_sysctl_unregister(in_dev);
        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
        arp_ifdown(dev);

        in_dev_put(in_dev);
}

int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
        const struct in_ifaddr *ifa;

        rcu_read_lock();
        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                if (inet_ifa_match(a, ifa)) {
                        if (!b || inet_ifa_match(b, ifa)) {
                                rcu_read_unlock();
                                return 1;
                        }
                }
        }
        rcu_read_unlock();
        return 0;
}

static void __inet_del_ifa(struct in_device *in_dev,
                           struct in_ifaddr __rcu **ifap,
                           int destroy, struct nlmsghdr *nlh, u32 portid)
{
        struct in_ifaddr *promote = NULL;
        struct in_ifaddr *ifa, *ifa1;
        struct in_ifaddr __rcu **last_prim;
        struct in_ifaddr *prev_prom = NULL;
        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);

        ASSERT_RTNL();

        ifa1 = rtnl_dereference(*ifap);
        last_prim = ifap;
        if (in_dev->dead)
                goto no_promotions;

        /* 1. Deleting primary ifaddr forces deletion all secondaries
         * unless alias promotion is set
         **/

        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
                struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;

                while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
                            ifa1->ifa_scope <= ifa->ifa_scope)
                                last_prim = &ifa->ifa_next;

                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
                            ifa1->ifa_mask != ifa->ifa_mask ||
                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
                                ifap1 = &ifa->ifa_next;
                                prev_prom = ifa;
                                continue;
                        }

                        if (!do_promote) {
                                inet_hash_remove(ifa);
                                *ifap1 = ifa->ifa_next;

                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
                                blocking_notifier_call_chain(&inetaddr_chain,
                                                NETDEV_DOWN, ifa);
                                inet_free_ifa(ifa);
                        } else {
                                promote = ifa;
                                break;
                        }
                }
        }

        /* On promotion all secondaries from subnet are changing
         * the primary IP, we must remove all their routes silently
         * and later to add them back with new prefsrc. Do this
         * while all addresses are on the device list.
         */
        for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
                if (ifa1->ifa_mask == ifa->ifa_mask &&
                    inet_ifa_match(ifa1->ifa_address, ifa))
                        fib_del_ifaddr(ifa, ifa1);
        }

no_promotions:
        /* 2. Unlink it */

        *ifap = ifa1->ifa_next;
        inet_hash_remove(ifa1);

        /* 3. Announce address deletion */

        /* Send message first, then call notifier.
           At first sight, FIB update triggered by notifier
           will refer to already deleted ifaddr, that could confuse
           netlink listeners. It is not true: look, gated sees
           that route deleted and if it still thinks that ifaddr
           is valid, it will try to restore deleted routes... Grr.
           So that, this order is correct.
         */
        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);

        if (promote) {
                struct in_ifaddr *next_sec;

                next_sec = rtnl_dereference(promote->ifa_next);
                if (prev_prom) {
                        struct in_ifaddr *last_sec;

                        rcu_assign_pointer(prev_prom->ifa_next, next_sec);

                        last_sec = rtnl_dereference(*last_prim);
                        rcu_assign_pointer(promote->ifa_next, last_sec);
                        rcu_assign_pointer(*last_prim, promote);
                }

                promote->ifa_flags &= ~IFA_F_SECONDARY;
                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
                blocking_notifier_call_chain(&inetaddr_chain,
                                NETDEV_UP, promote);
                for (ifa = next_sec; ifa;
                     ifa = rtnl_dereference(ifa->ifa_next)) {
                        if (ifa1->ifa_mask != ifa->ifa_mask ||
                            !inet_ifa_match(ifa1->ifa_address, ifa))
                                        continue;
                        fib_add_ifaddr(ifa);
                }

        }
        if (destroy)
                inet_free_ifa(ifa1);
}

static void inet_del_ifa(struct in_device *in_dev,
                         struct in_ifaddr __rcu **ifap,
                         int destroy)
{
        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}

static void check_lifetime(struct work_struct *work);

static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);

static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
                             u32 portid, struct netlink_ext_ack *extack)
{
        struct in_ifaddr __rcu **last_primary, **ifap;
        struct in_device *in_dev = ifa->ifa_dev;
        struct in_validator_info ivi;
        struct in_ifaddr *ifa1;
        int ret;

        ASSERT_RTNL();

        if (!ifa->ifa_local) {
                inet_free_ifa(ifa);
                return 0;
        }

        ifa->ifa_flags &= ~IFA_F_SECONDARY;
        last_primary = &in_dev->ifa_list;

        /* Don't set IPv6 only flags to IPv4 addresses */
        ifa->ifa_flags &= ~IPV6ONLY_FLAGS;

        ifap = &in_dev->ifa_list;
        ifa1 = rtnl_dereference(*ifap);

        while (ifa1) {
                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
                    ifa->ifa_scope <= ifa1->ifa_scope)
                        last_primary = &ifa1->ifa_next;
                if (ifa1->ifa_mask == ifa->ifa_mask &&
                    inet_ifa_match(ifa1->ifa_address, ifa)) {
                        if (ifa1->ifa_local == ifa->ifa_local) {
                                inet_free_ifa(ifa);
                                return -EEXIST;
                        }
                        if (ifa1->ifa_scope != ifa->ifa_scope) {
                                NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
                                inet_free_ifa(ifa);
                                return -EINVAL;
                        }
                        ifa->ifa_flags |= IFA_F_SECONDARY;
                }

                ifap = &ifa1->ifa_next;
                ifa1 = rtnl_dereference(*ifap);
        }

        /* Allow any devices that wish to register ifaddr validtors to weigh
         * in now, before changes are committed.  The rntl lock is serializing
         * access here, so the state should not change between a validator call
         * and a final notify on commit.  This isn't invoked on promotion under
         * the assumption that validators are checking the address itself, and
         * not the flags.
         */
        ivi.ivi_addr = ifa->ifa_address;
        ivi.ivi_dev = ifa->ifa_dev;
        ivi.extack = extack;
        ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
                                           NETDEV_UP, &ivi);
        ret = notifier_to_errno(ret);
        if (ret) {
                inet_free_ifa(ifa);
                return ret;
        }

        if (!(ifa->ifa_flags & IFA_F_SECONDARY))
                ifap = last_primary;

        rcu_assign_pointer(ifa->ifa_next, *ifap);
        rcu_assign_pointer(*ifap, ifa);

        inet_hash_insert(dev_net(in_dev->dev), ifa);

        cancel_delayed_work(&check_lifetime_work);
        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);

        /* Send message first, then call notifier.
           Notifier will trigger FIB update, so that
           listeners of netlink will know about new ifaddr */
        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);

        return 0;
}

static int inet_insert_ifa(struct in_ifaddr *ifa)
{
        return __inet_insert_ifa(ifa, NULL, 0, NULL);
}

static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
        struct in_device *in_dev = __in_dev_get_rtnl(dev);

        ASSERT_RTNL();

        if (!in_dev) {
                inet_free_ifa(ifa);
                return -ENOBUFS;
        }
        ipv4_devconf_setall(in_dev);
        neigh_parms_data_state_setall(in_dev->arp_parms);
        if (ifa->ifa_dev != in_dev) {
                WARN_ON(ifa->ifa_dev);
                in_dev_hold(in_dev);
                ifa->ifa_dev = in_dev;
        }
        if (ipv4_is_loopback(ifa->ifa_local))
                ifa->ifa_scope = RT_SCOPE_HOST;
        return inet_insert_ifa(ifa);
}

/* Caller must hold RCU or RTNL :
 * We dont take a reference on found in_device
 */
struct in_device *inetdev_by_index(struct net *net, int ifindex)
{
        struct net_device *dev;
        struct in_device *in_dev = NULL;

        rcu_read_lock();
        dev = dev_get_by_index_rcu(net, ifindex);
        if (dev)
                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
        rcu_read_unlock();
        return in_dev;
}
EXPORT_SYMBOL(inetdev_by_index);

/* Called only from RTNL semaphored context. No locks. */

struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
                                    __be32 mask)
{
        struct in_ifaddr *ifa;

        ASSERT_RTNL();

        in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
                        return ifa;
        }
        return NULL;
}

static int ip_mc_autojoin_config(struct net *net, bool join,
                                 const struct in_ifaddr *ifa)
{
#if defined(CONFIG_IP_MULTICAST)
        struct ip_mreqn mreq = {
                .imr_multiaddr.s_addr = ifa->ifa_address,
                .imr_ifindex = ifa->ifa_dev->dev->ifindex,
        };
        struct sock *sk = net->ipv4.mc_autojoin_sk;
        int ret;

        ASSERT_RTNL();

        lock_sock(sk);
        if (join)
                ret = ip_mc_join_group(sk, &mreq);
        else
                ret = ip_mc_leave_group(sk, &mreq);
        release_sock(sk);

        return ret;
#else
        return -EOPNOTSUPP;
#endif
}

static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
                            struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct in_ifaddr __rcu **ifap;
        struct nlattr *tb[IFA_MAX+1];
        struct in_device *in_dev;
        struct ifaddrmsg *ifm;
        struct in_ifaddr *ifa;
        int err;

        ASSERT_RTNL();

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
                                     ifa_ipv4_policy, extack);
        if (err < 0)
                goto errout;

        ifm = nlmsg_data(nlh);
        in_dev = inetdev_by_index(net, ifm->ifa_index);
        if (!in_dev) {
                NL_SET_ERR_MSG(extack, "ipv4: Device not found");
                err = -ENODEV;
                goto errout;
        }

        for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
             ifap = &ifa->ifa_next) {
                if (tb[IFA_LOCAL] &&
                    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
                        continue;

                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
                        continue;

                if (tb[IFA_ADDRESS] &&
                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
                    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
                        continue;

                if (ipv4_is_multicast(ifa->ifa_address))
                        ip_mc_autojoin_config(net, false, ifa);
                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
                return 0;
        }

        NL_SET_ERR_MSG(extack, "ipv4: Address not found");
        err = -EADDRNOTAVAIL;
errout:
        return err;
}

#define INFINITY_LIFE_TIME        0xFFFFFFFF

static void check_lifetime(struct work_struct *work)
{
        unsigned long now, next, next_sec, next_sched;
        struct in_ifaddr *ifa;
        struct hlist_node *n;
        int i;

        now = jiffies;
        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);

        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
                bool change_needed = false;

                rcu_read_lock();
                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
                        unsigned long age, tstamp;
                        u32 preferred_lft;
                        u32 valid_lft;
                        u32 flags;

                        flags = READ_ONCE(ifa->ifa_flags);
                        if (flags & IFA_F_PERMANENT)
                                continue;

                        preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
                        valid_lft = READ_ONCE(ifa->ifa_valid_lft);
                        tstamp = READ_ONCE(ifa->ifa_tstamp);
                        /* We try to batch several events at once. */
                        age = (now - tstamp +
                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;

                        if (valid_lft != INFINITY_LIFE_TIME &&
                            age >= valid_lft) {
                                change_needed = true;
                        } else if (preferred_lft ==
                                   INFINITY_LIFE_TIME) {
                                continue;
                        } else if (age >= preferred_lft) {
                                if (time_before(tstamp + valid_lft * HZ, next))
                                        next = tstamp + valid_lft * HZ;

                                if (!(flags & IFA_F_DEPRECATED))
                                        change_needed = true;
                        } else if (time_before(tstamp + preferred_lft * HZ,
                                               next)) {
                                next = tstamp + preferred_lft * HZ;
                        }
                }
                rcu_read_unlock();
                if (!change_needed)
                        continue;
                rtnl_lock();
                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
                        unsigned long age;

                        if (ifa->ifa_flags & IFA_F_PERMANENT)
                                continue;

                        /* We try to batch several events at once. */
                        age = (now - ifa->ifa_tstamp +
                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;

                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
                            age >= ifa->ifa_valid_lft) {
                                struct in_ifaddr __rcu **ifap;
                                struct in_ifaddr *tmp;

                                ifap = &ifa->ifa_dev->ifa_list;
                                tmp = rtnl_dereference(*ifap);
                                while (tmp) {
                                        if (tmp == ifa) {
                                                inet_del_ifa(ifa->ifa_dev,
                                                             ifap, 1);
                                                break;
                                        }
                                        ifap = &tmp->ifa_next;
                                        tmp = rtnl_dereference(*ifap);
                                }
                        } else if (ifa->ifa_preferred_lft !=
                                   INFINITY_LIFE_TIME &&
                                   age >= ifa->ifa_preferred_lft &&
                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
                                ifa->ifa_flags |= IFA_F_DEPRECATED;
                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
                        }
                }
                rtnl_unlock();
        }

        next_sec = round_jiffies_up(next);
        next_sched = next;

        /* If rounded timeout is accurate enough, accept it. */
        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
                next_sched = next_sec;

        now = jiffies;
        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;

        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
                        next_sched - now);
}

static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
                             __u32 prefered_lft)
{
        unsigned long timeout;
        u32 flags;

        flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);

        timeout = addrconf_timeout_fixup(valid_lft, HZ);
        if (addrconf_finite_timeout(timeout))
                WRITE_ONCE(ifa->ifa_valid_lft, timeout);
        else
                flags |= IFA_F_PERMANENT;

        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                if (timeout == 0)
                        flags |= IFA_F_DEPRECATED;
                WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
        }
        WRITE_ONCE(ifa->ifa_flags, flags);
        WRITE_ONCE(ifa->ifa_tstamp, jiffies);
        if (!ifa->ifa_cstamp)
                WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}

static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
                                       __u32 *pvalid_lft, __u32 *pprefered_lft,
                                       struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFA_MAX+1];
        struct in_ifaddr *ifa;
        struct ifaddrmsg *ifm;
        struct net_device *dev;
        struct in_device *in_dev;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
                                     ifa_ipv4_policy, extack);
        if (err < 0)
                goto errout;

        ifm = nlmsg_data(nlh);
        err = -EINVAL;

        if (ifm->ifa_prefixlen > 32) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
                goto errout;
        }

        if (!tb[IFA_LOCAL]) {
                NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
                goto errout;
        }

        dev = __dev_get_by_index(net, ifm->ifa_index);
        err = -ENODEV;
        if (!dev) {
                NL_SET_ERR_MSG(extack, "ipv4: Device not found");
                goto errout;
        }

        in_dev = __in_dev_get_rtnl(dev);
        err = -ENOBUFS;
        if (!in_dev)
                goto errout;

        ifa = inet_alloc_ifa();
        if (!ifa)
                /*
                 * A potential indev allocation can be left alive, it stays
                 * assigned to its device and is destroy with it.
                 */
                goto errout;

        ipv4_devconf_setall(in_dev);
        neigh_parms_data_state_setall(in_dev->arp_parms);
        in_dev_hold(in_dev);

        if (!tb[IFA_ADDRESS])
                tb[IFA_ADDRESS] = tb[IFA_LOCAL];

        INIT_HLIST_NODE(&ifa->hash);
        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
        ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
                                         ifm->ifa_flags;
        ifa->ifa_scope = ifm->ifa_scope;
        ifa->ifa_dev = in_dev;

        ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
        ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);

        if (tb[IFA_BROADCAST])
                ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);

        if (tb[IFA_LABEL])
                nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
        else
                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);

        if (tb[IFA_RT_PRIORITY])
                ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);

        if (tb[IFA_PROTO])
                ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);

        if (tb[IFA_CACHEINFO]) {
                struct ifa_cacheinfo *ci;

                ci = nla_data(tb[IFA_CACHEINFO]);
                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
                        NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
                        err = -EINVAL;
                        goto errout_free;
                }
                *pvalid_lft = ci->ifa_valid;
                *pprefered_lft = ci->ifa_prefered;
        }

        return ifa;

errout_free:
        inet_free_ifa(ifa);
errout:
        return ERR_PTR(err);
}

static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
{
        struct in_device *in_dev = ifa->ifa_dev;
        struct in_ifaddr *ifa1;

        if (!ifa->ifa_local)
                return NULL;

        in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
                if (ifa1->ifa_mask == ifa->ifa_mask &&
                    inet_ifa_match(ifa1->ifa_address, ifa) &&
                    ifa1->ifa_local == ifa->ifa_local)
                        return ifa1;
        }
        return NULL;
}

static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
                            struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct in_ifaddr *ifa;
        struct in_ifaddr *ifa_existing;
        __u32 valid_lft = INFINITY_LIFE_TIME;
        __u32 prefered_lft = INFINITY_LIFE_TIME;

        ASSERT_RTNL();

        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
        if (IS_ERR(ifa))
                return PTR_ERR(ifa);

        ifa_existing = find_matching_ifa(ifa);
        if (!ifa_existing) {
                /* It would be best to check for !NLM_F_CREATE here but
                 * userspace already relies on not having to provide this.
                 */
                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
                if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
                        int ret = ip_mc_autojoin_config(net, true, ifa);

                        if (ret < 0) {
                                NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
                                inet_free_ifa(ifa);
                                return ret;
                        }
                }
                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
                                         extack);
        } else {
                u32 new_metric = ifa->ifa_rt_priority;
                u8 new_proto = ifa->ifa_proto;

                inet_free_ifa(ifa);

                if (nlh->nlmsg_flags & NLM_F_EXCL ||
                    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
                        NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
                        return -EEXIST;
                }
                ifa = ifa_existing;

                if (ifa->ifa_rt_priority != new_metric) {
                        fib_modify_prefix_metric(ifa, new_metric);
                        ifa->ifa_rt_priority = new_metric;
                }

                ifa->ifa_proto = new_proto;

                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
                cancel_delayed_work(&check_lifetime_work);
                queue_delayed_work(system_power_efficient_wq,
                                &check_lifetime_work, 0);
                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
        }
        return 0;
}

/*
 *        Determine a default network mask, based on the IP address.
 */

static int inet_abc_len(__be32 addr)
{
        int rc = -1;        /* Something else, probably a multicast. */

        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
                rc = 0;
        else {
                __u32 haddr = ntohl(addr);
                if (IN_CLASSA(haddr))
                        rc = 8;
                else if (IN_CLASSB(haddr))
                        rc = 16;
                else if (IN_CLASSC(haddr))
                        rc = 24;
                else if (IN_CLASSE(haddr))
                        rc = 32;
        }

        return rc;
}


int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
        struct sockaddr_in sin_orig;
        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
        struct in_ifaddr __rcu **ifap = NULL;
        struct in_device *in_dev;
        struct in_ifaddr *ifa = NULL;
        struct net_device *dev;
        char *colon;
        int ret = -EFAULT;
        int tryaddrmatch = 0;

        ifr->ifr_name[IFNAMSIZ - 1] = 0;

        /* save original address for comparison */
        memcpy(&sin_orig, sin, sizeof(*sin));

        colon = strchr(ifr->ifr_name, ':');
        if (colon)
                *colon = 0;

        dev_load(net, ifr->ifr_name);

        switch (cmd) {
        case SIOCGIFADDR:        /* Get interface address */
        case SIOCGIFBRDADDR:        /* Get the broadcast address */
        case SIOCGIFDSTADDR:        /* Get the destination address */
        case SIOCGIFNETMASK:        /* Get the netmask for the interface */
                /* Note that these ioctls will not sleep,
                   so that we do not impose a lock.
                   One day we will be forced to put shlock here (I mean SMP)
                 */
                tryaddrmatch = (sin_orig.sin_family == AF_INET);
                memset(sin, 0, sizeof(*sin));
                sin->sin_family = AF_INET;
                break;

        case SIOCSIFFLAGS:
                ret = -EPERM;
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        goto out;
                break;
        case SIOCSIFADDR:        /* Set interface address (and family) */
        case SIOCSIFBRDADDR:        /* Set the broadcast address */
        case SIOCSIFDSTADDR:        /* Set the destination address */
        case SIOCSIFNETMASK:         /* Set the netmask for the interface */
                ret = -EPERM;
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        goto out;
                ret = -EINVAL;
                if (sin->sin_family != AF_INET)
                        goto out;
                break;
        default:
                ret = -EINVAL;
                goto out;
        }

        rtnl_lock();

        ret = -ENODEV;
        dev = __dev_get_by_name(net, ifr->ifr_name);
        if (!dev)
                goto done;

        if (colon)
                *colon = ':';

        in_dev = __in_dev_get_rtnl(dev);
        if (in_dev) {
                if (tryaddrmatch) {
                        /* Matthias Andree */
                        /* compare label and address (4.4BSD style) */
                        /* note: we only do this for a limited set of ioctls
                           and only if the original address family was AF_INET.
                           This is checked above. */

                        for (ifap = &in_dev->ifa_list;
                             (ifa = rtnl_dereference(*ifap)) != NULL;
                             ifap = &ifa->ifa_next) {
                                if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
                                    sin_orig.sin_addr.s_addr ==
                                                        ifa->ifa_local) {
                                        break; /* found */
                                }
                        }
                }
                /* we didn't get a match, maybe the application is
                   4.3BSD-style and passed in junk so we fall back to
                   comparing just the label */
                if (!ifa) {
                        for (ifap = &in_dev->ifa_list;
                             (ifa = rtnl_dereference(*ifap)) != NULL;
                             ifap = &ifa->ifa_next)
                                if (!strcmp(ifr->ifr_name, ifa->ifa_label))
                                        break;
                }
        }

        ret = -EADDRNOTAVAIL;
        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
                goto done;

        switch (cmd) {
        case SIOCGIFADDR:        /* Get interface address */
                ret = 0;
                sin->sin_addr.s_addr = ifa->ifa_local;
                break;

        case SIOCGIFBRDADDR:        /* Get the broadcast address */
                ret = 0;
                sin->sin_addr.s_addr = ifa->ifa_broadcast;
                break;

        case SIOCGIFDSTADDR:        /* Get the destination address */
                ret = 0;
                sin->sin_addr.s_addr = ifa->ifa_address;
                break;

        case SIOCGIFNETMASK:        /* Get the netmask for the interface */
                ret = 0;
                sin->sin_addr.s_addr = ifa->ifa_mask;
                break;

        case SIOCSIFFLAGS:
                if (colon) {
                        ret = -EADDRNOTAVAIL;
                        if (!ifa)
                                break;
                        ret = 0;
                        if (!(ifr->ifr_flags & IFF_UP))
                                inet_del_ifa(in_dev, ifap, 1);
                        break;
                }
                ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
                break;

        case SIOCSIFADDR:        /* Set interface address (and family) */
                ret = -EINVAL;
                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
                        break;

                if (!ifa) {
                        ret = -ENOBUFS;
                        ifa = inet_alloc_ifa();
                        if (!ifa)
                                break;
                        INIT_HLIST_NODE(&ifa->hash);
                        if (colon)
                                memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
                        else
                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
                } else {
                        ret = 0;
                        if (ifa->ifa_local == sin->sin_addr.s_addr)
                                break;
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_broadcast = 0;
                        ifa->ifa_scope = 0;
                }

                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;

                if (!(dev->flags & IFF_POINTOPOINT)) {
                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
                        if ((dev->flags & IFF_BROADCAST) &&
                            ifa->ifa_prefixlen < 31)
                                ifa->ifa_broadcast = ifa->ifa_address |
                                                     ~ifa->ifa_mask;
                } else {
                        ifa->ifa_prefixlen = 32;
                        ifa->ifa_mask = inet_make_mask(32);
                }
                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
                ret = inet_set_ifa(dev, ifa);
                break;

        case SIOCSIFBRDADDR:        /* Set the broadcast address */
                ret = 0;
                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
                        inet_insert_ifa(ifa);
                }
                break;

        case SIOCSIFDSTADDR:        /* Set the destination address */
                ret = 0;
                if (ifa->ifa_address == sin->sin_addr.s_addr)
                        break;
                ret = -EINVAL;
                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
                        break;
                ret = 0;
                inet_del_ifa(in_dev, ifap, 0);
                ifa->ifa_address = sin->sin_addr.s_addr;
                inet_insert_ifa(ifa);
                break;

        case SIOCSIFNETMASK:         /* Set the netmask for the interface */

                /*
                 *        The mask we set must be legal.
                 */
                ret = -EINVAL;
                if (bad_mask(sin->sin_addr.s_addr, 0))
                        break;
                ret = 0;
                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
                        __be32 old_mask = ifa->ifa_mask;
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_mask = sin->sin_addr.s_addr;
                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);

                        /* See if current broadcast address matches
                         * with current netmask, then recalculate
                         * the broadcast address. Otherwise it's a
                         * funny address, so don't touch it since
                         * the user seems to know what (s)he's doing...
                         */
                        if ((dev->flags & IFF_BROADCAST) &&
                            (ifa->ifa_prefixlen < 31) &&
                            (ifa->ifa_broadcast ==
                             (ifa->ifa_local|~old_mask))) {
                                ifa->ifa_broadcast = (ifa->ifa_local |
                                                      ~sin->sin_addr.s_addr);
                        }
                        inet_insert_ifa(ifa);
                }
                break;
        }
done:
        rtnl_unlock();
out:
        return ret;
}

int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
        struct in_device *in_dev = __in_dev_get_rtnl(dev);
        const struct in_ifaddr *ifa;
        struct ifreq ifr;
        int done = 0;

        if (WARN_ON(size > sizeof(struct ifreq)))
                goto out;

        if (!in_dev)
                goto out;

        in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                if (!buf) {
                        done += size;
                        continue;
                }
                if (len < size)
                        break;
                memset(&ifr, 0, sizeof(struct ifreq));
                strcpy(ifr.ifr_name, ifa->ifa_label);

                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
                                                                ifa->ifa_local;

                if (copy_to_user(buf + done, &ifr, size)) {
                        done = -EFAULT;
                        break;
                }
                len  -= size;
                done += size;
        }
out:
        return done;
}

static __be32 in_dev_select_addr(const struct in_device *in_dev,
                                 int scope)
{
        const struct in_ifaddr *ifa;

        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
                        continue;
                if (ifa->ifa_scope != RT_SCOPE_LINK &&
                    ifa->ifa_scope <= scope)
                        return ifa->ifa_local;
        }

        return 0;
}

__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
        const struct in_ifaddr *ifa;
        __be32 addr = 0;
        unsigned char localnet_scope = RT_SCOPE_HOST;
        struct in_device *in_dev;
        struct net *net = dev_net(dev);
        int master_idx;

        rcu_read_lock();
        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev)
                goto no_in_dev;

        if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
                localnet_scope = RT_SCOPE_LINK;

        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
                        continue;
                if (min(ifa->ifa_scope, localnet_scope) > scope)
                        continue;
                if (!dst || inet_ifa_match(dst, ifa)) {
                        addr = ifa->ifa_local;
                        break;
                }
                if (!addr)
                        addr = ifa->ifa_local;
        }

        if (addr)
                goto out_unlock;
no_in_dev:
        master_idx = l3mdev_master_ifindex_rcu(dev);

        /* For VRFs, the VRF device takes the place of the loopback device,
         * with addresses on it being preferred.  Note in such cases the
         * loopback device will be among the devices that fail the master_idx
         * equality check in the loop below.
         */
        if (master_idx &&
            (dev = dev_get_by_index_rcu(net, master_idx)) &&
            (in_dev = __in_dev_get_rcu(dev))) {
                addr = in_dev_select_addr(in_dev, scope);
                if (addr)
                        goto out_unlock;
        }

        /* Not loopback addresses on loopback should be preferred
           in this case. It is important that lo is the first interface
           in dev_base list.
         */
        for_each_netdev_rcu(net, dev) {
                if (l3mdev_master_ifindex_rcu(dev) != master_idx)
                        continue;

                in_dev = __in_dev_get_rcu(dev);
                if (!in_dev)
                        continue;

                addr = in_dev_select_addr(in_dev, scope);
                if (addr)
                        goto out_unlock;
        }
out_unlock:
        rcu_read_unlock();
        return addr;
}
EXPORT_SYMBOL(inet_select_addr);

static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
                              __be32 local, int scope)
{
        unsigned char localnet_scope = RT_SCOPE_HOST;
        const struct in_ifaddr *ifa;
        __be32 addr = 0;
        int same = 0;

        if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
                localnet_scope = RT_SCOPE_LINK;

        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);

                if (!addr &&
                    (local == ifa->ifa_local || !local) &&
                    min_scope <= scope) {
                        addr = ifa->ifa_local;
                        if (same)
                                break;
                }
                if (!same) {
                        same = (!local || inet_ifa_match(local, ifa)) &&
                                (!dst || inet_ifa_match(dst, ifa));
                        if (same && addr) {
                                if (local || !dst)
                                        break;
                                /* Is the selected addr into dst subnet? */
                                if (inet_ifa_match(addr, ifa))
                                        break;
                                /* No, then can we use new local src? */
                                if (min_scope <= scope) {
                                        addr = ifa->ifa_local;
                                        break;
                                }
                                /* search for large dst subnet for addr */
                                same = 0;
                        }
                }
        }

        return same ? addr : 0;
}

/*
 * Confirm that local IP address exists using wildcards:
 * - net: netns to check, cannot be NULL
 * - in_dev: only on this interface, NULL=any interface
 * - dst: only in the same subnet as dst, 0=any dst
 * - local: address, 0=autoselect the local address
 * - scope: maximum allowed scope value for the local address
 */
__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
                         __be32 dst, __be32 local, int scope)
{
        __be32 addr = 0;
        struct net_device *dev;

        if (in_dev)
                return confirm_addr_indev(in_dev, dst, local, scope);

        rcu_read_lock();
        for_each_netdev_rcu(net, dev) {
                in_dev = __in_dev_get_rcu(dev);
                if (in_dev) {
                        addr = confirm_addr_indev(in_dev, dst, local, scope);
                        if (addr)
                                break;
                }
        }
        rcu_read_unlock();

        return addr;
}
EXPORT_SYMBOL(inet_confirm_addr);

/*
 *        Device notifier
 */

int register_inetaddr_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&inetaddr_chain, nb);
}
EXPORT_SYMBOL(register_inetaddr_notifier);

int unregister_inetaddr_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
}
EXPORT_SYMBOL(unregister_inetaddr_notifier);

int register_inetaddr_validator_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inetaddr_validator_notifier);

int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
            nb);
}
EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);

/* Rename ifa_labels for a device name change. Make some effort to preserve
 * existing alias numbering and to create unique labels if possible.
*/
static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
{
        struct in_ifaddr *ifa;
        int named = 0;

        in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                char old[IFNAMSIZ], *dot;

                memcpy(old, ifa->ifa_label, IFNAMSIZ);
                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
                if (named++ == 0)
                        goto skip;
                dot = strchr(old, ':');
                if (!dot) {
                        sprintf(old, ":%d", named);
                        dot = old;
                }
                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
                        strcat(ifa->ifa_label, dot);
                else
                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
skip:
                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
        }
}

static void inetdev_send_gratuitous_arp(struct net_device *dev,
                                        struct in_device *in_dev)

{
        const struct in_ifaddr *ifa;

        in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                arp_send(ARPOP_REQUEST, ETH_P_ARP,
                         ifa->ifa_local, dev,
                         ifa->ifa_local, NULL,
                         dev->dev_addr, NULL);
        }
}

/* Called only under RTNL semaphore */

static int inetdev_event(struct notifier_block *this, unsigned long event,
                         void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct in_device *in_dev = __in_dev_get_rtnl(dev);

        ASSERT_RTNL();

        if (!in_dev) {
                if (event == NETDEV_REGISTER) {
                        in_dev = inetdev_init(dev);
                        if (IS_ERR(in_dev))
                                return notifier_from_errno(PTR_ERR(in_dev));
                        if (dev->flags & IFF_LOOPBACK) {
                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
                        }
                } else if (event == NETDEV_CHANGEMTU) {
                        /* Re-enabling IP */
                        if (inetdev_valid_mtu(dev->mtu))
                                in_dev = inetdev_init(dev);
                }
                goto out;
        }

        switch (event) {
        case NETDEV_REGISTER:
                pr_debug("%s: bug\n", __func__);
                RCU_INIT_POINTER(dev->ip_ptr, NULL);
                break;
        case NETDEV_UP:
                if (!inetdev_valid_mtu(dev->mtu))
                        break;
                if (dev->flags & IFF_LOOPBACK) {
                        struct in_ifaddr *ifa = inet_alloc_ifa();

                        if (ifa) {
                                INIT_HLIST_NODE(&ifa->hash);
                                ifa->ifa_local =
                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
                                ifa->ifa_prefixlen = 8;
                                ifa->ifa_mask = inet_make_mask(8);
                                in_dev_hold(in_dev);
                                ifa->ifa_dev = in_dev;
                                ifa->ifa_scope = RT_SCOPE_HOST;
                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
                                                 INFINITY_LIFE_TIME);
                                ipv4_devconf_setall(in_dev);
                                neigh_parms_data_state_setall(in_dev->arp_parms);
                                inet_insert_ifa(ifa);
                        }
                }
                ip_mc_up(in_dev);
                fallthrough;
        case NETDEV_CHANGEADDR:
                if (!IN_DEV_ARP_NOTIFY(in_dev))
                        break;
                fallthrough;
        case NETDEV_NOTIFY_PEERS:
                /* Send gratuitous ARP to notify of link change */
                inetdev_send_gratuitous_arp(dev, in_dev);
                break;
        case NETDEV_DOWN:
                ip_mc_down(in_dev);
                break;
        case NETDEV_PRE_TYPE_CHANGE:
                ip_mc_unmap(in_dev);
                break;
        case NETDEV_POST_TYPE_CHANGE:
                ip_mc_remap(in_dev);
                break;
        case NETDEV_CHANGEMTU:
                if (inetdev_valid_mtu(dev->mtu))
                        break;
                /* disable IP when MTU is not enough */
                fallthrough;
        case NETDEV_UNREGISTER:
                inetdev_destroy(in_dev);
                break;
        case NETDEV_CHANGENAME:
                /* Do not notify about label change, this event is
                 * not interesting to applications using netlink.
                 */
                inetdev_changename(dev, in_dev);

                devinet_sysctl_unregister(in_dev);
                devinet_sysctl_register(in_dev);
                break;
        }
out:
        return NOTIFY_DONE;
}

static struct notifier_block ip_netdev_notifier = {
        .notifier_call = inetdev_event,
};

static size_t inet_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
               + nla_total_size(4) /* IFA_ADDRESS */
               + nla_total_size(4) /* IFA_LOCAL */
               + nla_total_size(4) /* IFA_BROADCAST */
               + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
               + nla_total_size(4)  /* IFA_FLAGS */
               + nla_total_size(1)  /* IFA_PROTO */
               + nla_total_size(4)  /* IFA_RT_PRIORITY */
               + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}

static inline u32 cstamp_delta(unsigned long cstamp)
{
        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}

static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
                         unsigned long tstamp, u32 preferred, u32 valid)
{
        struct ifa_cacheinfo ci;

        ci.cstamp = cstamp_delta(cstamp);
        ci.tstamp = cstamp_delta(tstamp);
        ci.ifa_prefered = preferred;
        ci.ifa_valid = valid;

        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}

static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
                            struct inet_fill_args *args)
{
        struct ifaddrmsg *ifm;
        struct nlmsghdr  *nlh;
        unsigned long tstamp;
        u32 preferred, valid;

        nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
                        args->flags);
        if (!nlh)
                return -EMSGSIZE;

        ifm = nlmsg_data(nlh);
        ifm->ifa_family = AF_INET;
        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
        ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
        ifm->ifa_scope = ifa->ifa_scope;
        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;

        if (args->netnsid >= 0 &&
            nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
                goto nla_put_failure;

        tstamp = READ_ONCE(ifa->ifa_tstamp);
        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
                preferred = READ_ONCE(ifa->ifa_preferred_lft);
                valid = READ_ONCE(ifa->ifa_valid_lft);
                if (preferred != INFINITY_LIFE_TIME) {
                        long tval = (jiffies - tstamp) / HZ;

                        if (preferred > tval)
                                preferred -= tval;
                        else
                                preferred = 0;
                        if (valid != INFINITY_LIFE_TIME) {
                                if (valid > tval)
                                        valid -= tval;
                                else
                                        valid = 0;
                        }
                }
        } else {
                preferred = INFINITY_LIFE_TIME;
                valid = INFINITY_LIFE_TIME;
        }
        if ((ifa->ifa_address &&
             nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
            (ifa->ifa_local &&
             nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
            (ifa->ifa_broadcast &&
             nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
            (ifa->ifa_label[0] &&
             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
            (ifa->ifa_proto &&
             nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
            nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
            (ifa->ifa_rt_priority &&
             nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
            put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
                          preferred, valid))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
                                      struct inet_fill_args *fillargs,
                                      struct net **tgt_net, struct sock *sk,
                                      struct netlink_callback *cb)
{
        struct netlink_ext_ack *extack = cb->extack;
        struct nlattr *tb[IFA_MAX+1];
        struct ifaddrmsg *ifm;
        int err, i;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
                return -EINVAL;
        }

        ifm = nlmsg_data(nlh);
        if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
                return -EINVAL;
        }

        fillargs->ifindex = ifm->ifa_index;
        if (fillargs->ifindex) {
                cb->answer_flags |= NLM_F_DUMP_FILTERED;
                fillargs->flags |= NLM_F_DUMP_FILTERED;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
                                            ifa_ipv4_policy, extack);
        if (err < 0)
                return err;

        for (i = 0; i <= IFA_MAX; ++i) {
                if (!tb[i])
                        continue;

                if (i == IFA_TARGET_NETNSID) {
                        struct net *net;

                        fillargs->netnsid = nla_get_s32(tb[i]);

                        net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
                        if (IS_ERR(net)) {
                                fillargs->netnsid = -1;
                                NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
                                return PTR_ERR(net);
                        }
                        *tgt_net = net;
                } else {
                        NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
                            struct netlink_callback *cb, int *s_ip_idx,
                            struct inet_fill_args *fillargs)
{
        struct in_ifaddr *ifa;
        int ip_idx = 0;
        int err;

        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                if (ip_idx < *s_ip_idx) {
                        ip_idx++;
                        continue;
                }
                err = inet_fill_ifaddr(skb, ifa, fillargs);
                if (err < 0)
                        goto done;

                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
                ip_idx++;
        }
        err = 0;
        ip_idx = 0;
done:
        *s_ip_idx = ip_idx;

        return err;
}

/* Combine dev_addr_genid and dev_base_seq to detect changes.
 */
static u32 inet_base_seq(const struct net *net)
{
        u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
                  READ_ONCE(net->dev_base_seq);

        /* Must not return 0 (see nl_dump_check_consistent()).
         * Chose a value far away from 0.
         */
        if (!res)
                res = 0x80000000;
        return res;
}

static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct inet_fill_args fillargs = {
                .portid = NETLINK_CB(cb->skb).portid,
                .seq = nlh->nlmsg_seq,
                .event = RTM_NEWADDR,
                .flags = NLM_F_MULTI,
                .netnsid = -1,
        };
        struct net *net = sock_net(skb->sk);
        struct net *tgt_net = net;
        struct {
                unsigned long ifindex;
                int ip_idx;
        } *ctx = (void *)cb->ctx;
        struct in_device *in_dev;
        struct net_device *dev;
        int err = 0;

        rcu_read_lock();
        if (cb->strict_check) {
                err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
                                                 skb->sk, cb);
                if (err < 0)
                        goto done;

                if (fillargs.ifindex) {
                        err = -ENODEV;
                        dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
                        if (!dev)
                                goto done;
                        in_dev = __in_dev_get_rcu(dev);
                        if (!in_dev)
                                goto done;
                        err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
                                               &fillargs);
                        goto done;
                }
        }

        cb->seq = inet_base_seq(tgt_net);

        for_each_netdev_dump(net, dev, ctx->ifindex) {
                in_dev = __in_dev_get_rcu(dev);
                if (!in_dev)
                        continue;
                err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
                                       &fillargs);
                if (err < 0)
                        goto done;
        }
done:
        if (fillargs.netnsid >= 0)
                put_net(tgt_net);
        rcu_read_unlock();
        return err;
}

static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
                      u32 portid)
{
        struct inet_fill_args fillargs = {
                .portid = portid,
                .seq = nlh ? nlh->nlmsg_seq : 0,
                .event = event,
                .flags = 0,
                .netnsid = -1,
        };
        struct sk_buff *skb;
        int err = -ENOBUFS;
        struct net *net;

        net = dev_net(ifa->ifa_dev->dev);
        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
        if (!skb)
                goto errout;

        err = inet_fill_ifaddr(skb, ifa, &fillargs);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}

static size_t inet_get_link_af_size(const struct net_device *dev,
                                    u32 ext_filter_mask)
{
        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);

        if (!in_dev)
                return 0;

        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
}

static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
                             u32 ext_filter_mask)
{
        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
        struct nlattr *nla;
        int i;

        if (!in_dev)
                return -ENODATA;

        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
        if (!nla)
                return -EMSGSIZE;

        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
                ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);

        return 0;
}

static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
};

static int inet_validate_link_af(const struct net_device *dev,
                                 const struct nlattr *nla,
                                 struct netlink_ext_ack *extack)
{
        struct nlattr *a, *tb[IFLA_INET_MAX+1];
        int err, rem;

        if (dev && !__in_dev_get_rtnl(dev))
                return -EAFNOSUPPORT;

        err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
                                          inet_af_policy, extack);
        if (err < 0)
                return err;

        if (tb[IFLA_INET_CONF]) {
                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
                        int cfgid = nla_type(a);

                        if (nla_len(a) < 4)
                                return -EINVAL;

                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
                                return -EINVAL;
                }
        }

        return 0;
}

static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
                            struct netlink_ext_ack *extack)
{
        struct in_device *in_dev = __in_dev_get_rtnl(dev);
        struct nlattr *a, *tb[IFLA_INET_MAX+1];
        int rem;

        if (!in_dev)
                return -EAFNOSUPPORT;

        if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
                return -EINVAL;

        if (tb[IFLA_INET_CONF]) {
                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
        }

        return 0;
}

static int inet_netconf_msgsize_devconf(int type)
{
        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
                   + nla_total_size(4);        /* NETCONFA_IFINDEX */
        bool all = false;

        if (type == NETCONFA_ALL)
                all = true;

        if (all || type == NETCONFA_FORWARDING)
                size += nla_total_size(4);
        if (all || type == NETCONFA_RP_FILTER)
                size += nla_total_size(4);
        if (all || type == NETCONFA_MC_FORWARDING)
                size += nla_total_size(4);
        if (all || type == NETCONFA_BC_FORWARDING)
                size += nla_total_size(4);
        if (all || type == NETCONFA_PROXY_NEIGH)
                size += nla_total_size(4);
        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
                size += nla_total_size(4);

        return size;
}

static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
                                     const struct ipv4_devconf *devconf,
                                     u32 portid, u32 seq, int event,
                                     unsigned int flags, int type)
{
        struct nlmsghdr  *nlh;
        struct netconfmsg *ncm;
        bool all = false;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
                        flags);
        if (!nlh)
                return -EMSGSIZE;

        if (type == NETCONFA_ALL)
                all = true;

        ncm = nlmsg_data(nlh);
        ncm->ncm_family = AF_INET;

        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
                goto nla_put_failure;

        if (!devconf)
                goto out;

        if ((all || type == NETCONFA_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_FORWARDING,
                        IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
                goto nla_put_failure;
        if ((all || type == NETCONFA_RP_FILTER) &&
            nla_put_s32(skb, NETCONFA_RP_FILTER,
                        IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
                goto nla_put_failure;
        if ((all || type == NETCONFA_MC_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
                        IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
                goto nla_put_failure;
        if ((all || type == NETCONFA_BC_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_BC_FORWARDING,
                        IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
                goto nla_put_failure;
        if ((all || type == NETCONFA_PROXY_NEIGH) &&
            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
                        IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
                goto nla_put_failure;
        if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                        IPV4_DEVCONF_RO(*devconf,
                                        IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
                goto nla_put_failure;

out:
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

void inet_netconf_notify_devconf(struct net *net, int event, int type,
                                 int ifindex, struct ipv4_devconf *devconf)
{
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
        if (!skb)
                goto errout;

        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
                                        event, 0, type);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
}

static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
        [NETCONFA_IFINDEX]        = { .len = sizeof(int) },
        [NETCONFA_FORWARDING]        = { .len = sizeof(int) },
        [NETCONFA_RP_FILTER]        = { .len = sizeof(int) },
        [NETCONFA_PROXY_NEIGH]        = { .len = sizeof(int) },
        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]        = { .len = sizeof(int) },
};

static int inet_netconf_valid_get_req(struct sk_buff *skb,
                                      const struct nlmsghdr *nlh,
                                      struct nlattr **tb,
                                      struct netlink_ext_ack *extack)
{
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
                                              tb, NETCONFA_MAX,
                                              devconf_ipv4_policy, extack);

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
                                            tb, NETCONFA_MAX,
                                            devconf_ipv4_policy, extack);
        if (err)
                return err;

        for (i = 0; i <= NETCONFA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NETCONFA_IFINDEX:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet_netconf_get_devconf(struct sk_buff *in_skb,
                                    struct nlmsghdr *nlh,
                                    struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[NETCONFA_MAX + 1];
        const struct ipv4_devconf *devconf;
        struct in_device *in_dev = NULL;
        struct net_device *dev = NULL;
        struct sk_buff *skb;
        int ifindex;
        int err;

        err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
        if (err)
                return err;

        if (!tb[NETCONFA_IFINDEX])
                return -EINVAL;

        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
        switch (ifindex) {
        case NETCONFA_IFINDEX_ALL:
                devconf = net->ipv4.devconf_all;
                break;
        case NETCONFA_IFINDEX_DEFAULT:
                devconf = net->ipv4.devconf_dflt;
                break;
        default:
                err = -ENODEV;
                dev = dev_get_by_index(net, ifindex);
                if (dev)
                        in_dev = in_dev_get(dev);
                if (!in_dev)
                        goto errout;
                devconf = &in_dev->cnf;
                break;
        }

        err = -ENOBUFS;
        skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
        if (!skb)
                goto errout;

        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
                                        NETLINK_CB(in_skb).portid,
                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
                                        NETCONFA_ALL);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
        if (in_dev)
                in_dev_put(in_dev);
        dev_put(dev);
        return err;
}

static int inet_netconf_dump_devconf(struct sk_buff *skb,
                                     struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        struct {
                unsigned long ifindex;
                unsigned int all_default;
        } *ctx = (void *)cb->ctx;
        const struct in_device *in_dev;
        struct net_device *dev;
        int err = 0;

        if (cb->strict_check) {
                struct netlink_ext_ack *extack = cb->extack;
                struct netconfmsg *ncm;

                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
                        NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
                        return -EINVAL;
                }

                if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
                        NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
                        return -EINVAL;
                }
        }

        rcu_read_lock();
        for_each_netdev_dump(net, dev, ctx->ifindex) {
                in_dev = __in_dev_get_rcu(dev);
                if (!in_dev)
                        continue;
                err = inet_netconf_fill_devconf(skb, dev->ifindex,
                                                &in_dev->cnf,
                                                NETLINK_CB(cb->skb).portid,
                                                nlh->nlmsg_seq,
                                                RTM_NEWNETCONF, NLM_F_MULTI,
                                                NETCONFA_ALL);
                if (err < 0)
                        goto done;
        }
        if (ctx->all_default == 0) {
                err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
                                                net->ipv4.devconf_all,
                                                NETLINK_CB(cb->skb).portid,
                                                nlh->nlmsg_seq,
                                                RTM_NEWNETCONF, NLM_F_MULTI,
                                                NETCONFA_ALL);
                if (err < 0)
                        goto done;
                ctx->all_default++;
        }
        if (ctx->all_default == 1) {
                err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
                                                net->ipv4.devconf_dflt,
                                                NETLINK_CB(cb->skb).portid,
                                                nlh->nlmsg_seq,
                                                RTM_NEWNETCONF, NLM_F_MULTI,
                                                NETCONFA_ALL);
                if (err < 0)
                        goto done;
                ctx->all_default++;
        }
done:
        rcu_read_unlock();
        return err;
}

#ifdef CONFIG_SYSCTL

static void devinet_copy_dflt_conf(struct net *net, int i)
{
        struct net_device *dev;

        rcu_read_lock();
        for_each_netdev_rcu(net, dev) {
                struct in_device *in_dev;

                in_dev = __in_dev_get_rcu(dev);
                if (in_dev && !test_bit(i, in_dev->cnf.state))
                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
        }
        rcu_read_unlock();
}

/* called with RTNL locked */
static void inet_forward_change(struct net *net)
{
        struct net_device *dev;
        int on = IPV4_DEVCONF_ALL(net, FORWARDING);

        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                    NETCONFA_FORWARDING,
                                    NETCONFA_IFINDEX_ALL,
                                    net->ipv4.devconf_all);
        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                    NETCONFA_FORWARDING,
                                    NETCONFA_IFINDEX_DEFAULT,
                                    net->ipv4.devconf_dflt);

        for_each_netdev(net, dev) {
                struct in_device *in_dev;

                if (on)
                        dev_disable_lro(dev);

                in_dev = __in_dev_get_rtnl(dev);
                if (in_dev) {
                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_FORWARDING,
                                                    dev->ifindex, &in_dev->cnf);
                }
        }
}

static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
{
        if (cnf == net->ipv4.devconf_dflt)
                return NETCONFA_IFINDEX_DEFAULT;
        else if (cnf == net->ipv4.devconf_all)
                return NETCONFA_IFINDEX_ALL;
        else {
                struct in_device *idev
                        = container_of(cnf, struct in_device, cnf);
                return idev->dev->ifindex;
        }
}

static int devinet_conf_proc(struct ctl_table *ctl, int write,
                             void *buffer, size_t *lenp, loff_t *ppos)
{
        int old_value = *(int *)ctl->data;
        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
        int new_value = *(int *)ctl->data;

        if (write) {
                struct ipv4_devconf *cnf = ctl->extra1;
                struct net *net = ctl->extra2;
                int i = (int *)ctl->data - cnf->data;
                int ifindex;

                set_bit(i, cnf->state);

                if (cnf == net->ipv4.devconf_dflt)
                        devinet_copy_dflt_conf(net, i);
                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
                        if ((new_value == 0) && (old_value != 0))
                                rt_cache_flush(net);

                if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
                    new_value != old_value)
                        rt_cache_flush(net);

                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_RP_FILTER,
                                                    ifindex, cnf);
                }
                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_PROXY_NEIGH,
                                                    ifindex, cnf);
                }
                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                    ifindex, cnf);
                }
        }

        return ret;
}

static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
                                  void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
        struct net *net = ctl->extra2;
        int ret;

        if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);

        if (write && *valp != val) {
                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
                        if (!rtnl_trylock()) {
                                /* Restore the original values before restarting */
                                *valp = val;
                                *ppos = pos;
                                return restart_syscall();
                        }
                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
                                inet_forward_change(net);
                        } else {
                                struct ipv4_devconf *cnf = ctl->extra1;
                                struct in_device *idev =
                                        container_of(cnf, struct in_device, cnf);
                                if (*valp)
                                        dev_disable_lro(idev->dev);
                                inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                            NETCONFA_FORWARDING,
                                                            idev->dev->ifindex,
                                                            cnf);
                        }
                        rtnl_unlock();
                        rt_cache_flush(net);
                } else
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_FORWARDING,
                                                    NETCONFA_IFINDEX_DEFAULT,
                                                    net->ipv4.devconf_dflt);
        }

        return ret;
}

static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
                                void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
        struct net *net = ctl->extra2;

        if (write && *valp != val)
                rt_cache_flush(net);

        return ret;
}

#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
        { \
                .procname        = name, \
                .data                = ipv4_devconf.data + \
                                  IPV4_DEVCONF_ ## attr - 1, \
                .maxlen                = sizeof(int), \
                .mode                = mval, \
                .proc_handler        = proc, \
                .extra1                = &ipv4_devconf, \
        }

#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)

#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)

#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)

#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)

static struct devinet_sysctl_table {
        struct ctl_table_header *sysctl_header;
        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
} devinet_sysctl = {
        .devinet_vars = {
                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
                                             devinet_sysctl_forward),
                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
                DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),

                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
                                        "accept_source_route"),
                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
                DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
                                        "arp_evict_nocarrier"),
                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
                DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
                                        "force_igmp_version"),
                DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
                                        "igmpv2_unsolicited_report_interval"),
                DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
                                        "igmpv3_unsolicited_report_interval"),
                DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
                                        "ignore_routes_with_linkdown"),
                DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
                                        "drop_gratuitous_arp"),

                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
                                              "promote_secondaries"),
                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
                                              "route_localnet"),
                DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
                                              "drop_unicast_in_l2_multicast"),
        },
};

static int __devinet_sysctl_register(struct net *net, char *dev_name,
                                     int ifindex, struct ipv4_devconf *p)
{
        int i;
        struct devinet_sysctl_table *t;
        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];

        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
        if (!t)
                goto out;

        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
                t->devinet_vars[i].extra1 = p;
                t->devinet_vars[i].extra2 = net;
        }

        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);

        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
        if (!t->sysctl_header)
                goto free;

        p->sysctl = t;

        inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
                                    ifindex, p);
        return 0;

free:
        kfree(t);
out:
        return -ENOMEM;
}

static void __devinet_sysctl_unregister(struct net *net,
                                        struct ipv4_devconf *cnf, int ifindex)
{
        struct devinet_sysctl_table *t = cnf->sysctl;

        if (t) {
                cnf->sysctl = NULL;
                unregister_net_sysctl_table(t->sysctl_header);
                kfree(t);
        }

        inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}

static int devinet_sysctl_register(struct in_device *idev)
{
        int err;

        if (!sysctl_dev_name_is_allowed(idev->dev->name))
                return -EINVAL;

        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
        if (err)
                return err;
        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
                                        idev->dev->ifindex, &idev->cnf);
        if (err)
                neigh_sysctl_unregister(idev->arp_parms);
        return err;
}

static void devinet_sysctl_unregister(struct in_device *idev)
{
        struct net *net = dev_net(idev->dev);

        __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
        neigh_sysctl_unregister(idev->arp_parms);
}

static struct ctl_table ctl_forward_entry[] = {
        {
                .procname        = "ip_forward",
                .data                = &ipv4_devconf.data[
                                        IPV4_DEVCONF_FORWARDING - 1],
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = devinet_sysctl_forward,
                .extra1                = &ipv4_devconf,
                .extra2                = &init_net,
        },
        { },
};
#endif

static __net_init int devinet_init_net(struct net *net)
{
        int err;
        struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
        struct ctl_table *tbl;
        struct ctl_table_header *forw_hdr;
#endif

        err = -ENOMEM;
        all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
        if (!all)
                goto err_alloc_all;

        dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
        if (!dflt)
                goto err_alloc_dflt;

#ifdef CONFIG_SYSCTL
        tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
        if (!tbl)
                goto err_alloc_ctl;

        tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
        tbl[0].extra1 = all;
        tbl[0].extra2 = net;
#endif

        if (!net_eq(net, &init_net)) {
                switch (net_inherit_devconf()) {
                case 3:
                        /* copy from the current netns */
                        memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
                               sizeof(ipv4_devconf));
                        memcpy(dflt,
                               current->nsproxy->net_ns->ipv4.devconf_dflt,
                               sizeof(ipv4_devconf_dflt));
                        break;
                case 0:
                case 1:
                        /* copy from init_net */
                        memcpy(all, init_net.ipv4.devconf_all,
                               sizeof(ipv4_devconf));
                        memcpy(dflt, init_net.ipv4.devconf_dflt,
                               sizeof(ipv4_devconf_dflt));
                        break;
                case 2:
                        /* use compiled values */
                        break;
                }
        }

#ifdef CONFIG_SYSCTL
        err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
        if (err < 0)
                goto err_reg_all;

        err = __devinet_sysctl_register(net, "default",
                                        NETCONFA_IFINDEX_DEFAULT, dflt);
        if (err < 0)
                goto err_reg_dflt;

        err = -ENOMEM;
        forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
                                          ARRAY_SIZE(ctl_forward_entry));
        if (!forw_hdr)
                goto err_reg_ctl;
        net->ipv4.forw_hdr = forw_hdr;
#endif

        net->ipv4.devconf_all = all;
        net->ipv4.devconf_dflt = dflt;
        return 0;

#ifdef CONFIG_SYSCTL
err_reg_ctl:
        __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
err_reg_dflt:
        __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
        kfree(tbl);
err_alloc_ctl:
#endif
        kfree(dflt);
err_alloc_dflt:
        kfree(all);
err_alloc_all:
        return err;
}

static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
        struct ctl_table *tbl;

        tbl = net->ipv4.forw_hdr->ctl_table_arg;
        unregister_net_sysctl_table(net->ipv4.forw_hdr);
        __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
                                    NETCONFA_IFINDEX_DEFAULT);
        __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
                                    NETCONFA_IFINDEX_ALL);
        kfree(tbl);
#endif
        kfree(net->ipv4.devconf_dflt);
        kfree(net->ipv4.devconf_all);
}

static __net_initdata struct pernet_operations devinet_ops = {
        .init = devinet_init_net,
        .exit = devinet_exit_net,
};

static struct rtnl_af_ops inet_af_ops __read_mostly = {
        .family                  = AF_INET,
        .fill_link_af          = inet_fill_link_af,
        .get_link_af_size = inet_get_link_af_size,
        .validate_link_af = inet_validate_link_af,
        .set_link_af          = inet_set_link_af,
};

void __init devinet_init(void)
{
        int i;

        for (i = 0; i < IN4_ADDR_HSIZE; i++)
                INIT_HLIST_HEAD(&inet_addr_lst[i]);

        register_pernet_subsys(&devinet_ops);
        register_netdevice_notifier(&ip_netdev_notifier);

        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);

        rtnl_af_register(&inet_af_ops);

        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
                      RTNL_FLAG_DUMP_UNLOCKED);
        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
                      inet_netconf_dump_devconf,
                      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
}








































































































































































































































    8 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        inet6 interface/address list definitions
 *        Linux INET6 implementation 
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>        
 */

#ifndef _NET_IF_INET6_H
#define _NET_IF_INET6_H

#include <net/snmp.h>
#include <linux/ipv6.h>
#include <linux/refcount.h>

/* inet6_dev.if_flags */

#define IF_RA_OTHERCONF        0x80
#define IF_RA_MANAGED        0x40
#define IF_RA_RCVD        0x20
#define IF_RS_SENT        0x10
#define IF_READY        0x80000000

enum {
        INET6_IFADDR_STATE_PREDAD,
        INET6_IFADDR_STATE_DAD,
        INET6_IFADDR_STATE_POSTDAD,
        INET6_IFADDR_STATE_ERRDAD,
        INET6_IFADDR_STATE_DEAD,
};

struct inet6_ifaddr {
        struct in6_addr                addr;
        __u32                        prefix_len;
        __u32                        rt_priority;

        /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
        __u32                        valid_lft;
        __u32                        prefered_lft;
        refcount_t                refcnt;
        spinlock_t                lock;

        int                        state;

        __u32                        flags;
        __u8                        dad_probes;
        __u8                        stable_privacy_retry;

        __u16                        scope;
        __u64                        dad_nonce;

        unsigned long                cstamp;        /* created timestamp */
        unsigned long                tstamp; /* updated timestamp */

        struct delayed_work        dad_work;

        struct inet6_dev        *idev;
        struct fib6_info        *rt;

        struct hlist_node        addr_lst;
        struct list_head        if_list;
        /*
         * Used to safely traverse idev->addr_list in process context
         * if the idev->lock needed to protect idev->addr_list cannot be held.
         * In that case, add the items to this list temporarily and iterate
         * without holding idev->lock.
         * See addrconf_ifdown and dev_forward_change.
         */
        struct list_head        if_list_aux;

        struct list_head        tmp_list;
        struct inet6_ifaddr        *ifpub;
        int                        regen_count;

        bool                        tokenized;

        u8                        ifa_proto;

        struct rcu_head                rcu;
        struct in6_addr                peer_addr;
};

struct ip6_sf_socklist {
        unsigned int                sl_max;
        unsigned int                sl_count;
        struct rcu_head                rcu;
        struct in6_addr                sl_addr[] __counted_by(sl_max);
};

#define IP6_SFBLOCK        10        /* allocate this many at once */

struct ipv6_mc_socklist {
        struct in6_addr                addr;
        int                        ifindex;
        unsigned int                sfmode;                /* MCAST_{INCLUDE,EXCLUDE} */
        struct ipv6_mc_socklist __rcu *next;
        struct ip6_sf_socklist        __rcu *sflist;
        struct rcu_head                rcu;
};

struct ip6_sf_list {
        struct ip6_sf_list __rcu *sf_next;
        struct in6_addr                sf_addr;
        unsigned long                sf_count[2];        /* include/exclude counts */
        unsigned char                sf_gsresp;        /* include in g & s response? */
        unsigned char                sf_oldin;        /* change state */
        unsigned char                sf_crcount;        /* retrans. left to send */
        struct rcu_head                rcu;
};

#define MAF_TIMER_RUNNING        0x01
#define MAF_LAST_REPORTER        0x02
#define MAF_LOADED                0x04
#define MAF_NOREPORT                0x08
#define MAF_GSQUERY                0x10

struct ifmcaddr6 {
        struct in6_addr                mca_addr;
        struct inet6_dev        *idev;
        struct ifmcaddr6        __rcu *next;
        struct ip6_sf_list        __rcu *mca_sources;
        struct ip6_sf_list        __rcu *mca_tomb;
        unsigned int                mca_sfmode;
        unsigned char                mca_crcount;
        unsigned long                mca_sfcount[2];
        struct delayed_work        mca_work;
        unsigned int                mca_flags;
        int                        mca_users;
        refcount_t                mca_refcnt;
        unsigned long                mca_cstamp;
        unsigned long                mca_tstamp;
        struct rcu_head                rcu;
};

/* Anycast stuff */

struct ipv6_ac_socklist {
        struct in6_addr                acl_addr;
        int                        acl_ifindex;
        struct ipv6_ac_socklist *acl_next;
};

struct ifacaddr6 {
        struct in6_addr                aca_addr;
        struct fib6_info        *aca_rt;
        struct ifacaddr6 __rcu        *aca_next;
        struct hlist_node        aca_addr_lst;
        int                        aca_users;
        refcount_t                aca_refcnt;
        unsigned long                aca_cstamp;
        unsigned long                aca_tstamp;
        struct rcu_head                rcu;
};

#define        IFA_HOST        IPV6_ADDR_LOOPBACK
#define        IFA_LINK        IPV6_ADDR_LINKLOCAL
#define        IFA_SITE        IPV6_ADDR_SITELOCAL

struct ipv6_devstat {
        struct proc_dir_entry        *proc_dir_entry;
        DEFINE_SNMP_STAT(struct ipstats_mib, ipv6);
        DEFINE_SNMP_STAT_ATOMIC(struct icmpv6_mib_device, icmpv6dev);
        DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib_device, icmpv6msgdev);
};

struct inet6_dev {
        struct net_device        *dev;
        netdevice_tracker        dev_tracker;

        struct list_head        addr_list;

        struct ifmcaddr6        __rcu *mc_list;
        struct ifmcaddr6        __rcu *mc_tomb;

        unsigned char                mc_qrv;                /* Query Robustness Variable */
        unsigned char                mc_gq_running;
        unsigned char                mc_ifc_count;
        unsigned char                mc_dad_count;

        unsigned long                mc_v1_seen;        /* Max time we stay in MLDv1 mode */
        unsigned long                mc_qi;                /* Query Interval */
        unsigned long                mc_qri;                /* Query Response Interval */
        unsigned long                mc_maxdelay;

        struct delayed_work        mc_gq_work;        /* general query work */
        struct delayed_work        mc_ifc_work;        /* interface change work */
        struct delayed_work        mc_dad_work;        /* dad complete mc work */
        struct delayed_work        mc_query_work;        /* mld query work */
        struct delayed_work        mc_report_work;        /* mld report work */

        struct sk_buff_head        mc_query_queue;                /* mld query queue */
        struct sk_buff_head        mc_report_queue;        /* mld report queue */

        spinlock_t                mc_query_lock;        /* mld query queue lock */
        spinlock_t                mc_report_lock;        /* mld query report lock */
        struct mutex                mc_lock;        /* mld global lock */

        struct ifacaddr6 __rcu        *ac_list;
        rwlock_t                lock;
        refcount_t                refcnt;
        __u32                        if_flags;
        int                        dead;

        u32                        desync_factor;
        struct list_head        tempaddr_list;

        struct in6_addr                token;

        struct neigh_parms        *nd_parms;
        struct ipv6_devconf        cnf;
        struct ipv6_devstat        stats;

        struct timer_list        rs_timer;
        __s32                        rs_interval;        /* in jiffies */
        __u8                        rs_probes;

        unsigned long                tstamp; /* ipv6InterfaceTable update timestamp */
        struct rcu_head                rcu;

        unsigned int                ra_mtu;
};

static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
{
        /*
         *        +-------+-------+-------+-------+-------+-------+
         *      |   33  |   33  | DST13 | DST14 | DST15 | DST16 |
         *      +-------+-------+-------+-------+-------+-------+
         */

        buf[0]= 0x33;
        buf[1]= 0x33;

        memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32));
}

static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf)
{
        buf[0] = 0x00;
}

static inline void ipv6_ib_mc_map(const struct in6_addr *addr,
                                  const unsigned char *broadcast, char *buf)
{
        unsigned char scope = broadcast[5] & 0xF;

        buf[0]  = 0;                /* Reserved */
        buf[1]  = 0xff;                /* Multicast QPN */
        buf[2]  = 0xff;
        buf[3]  = 0xff;
        buf[4]  = 0xff;
        buf[5]  = 0x10 | scope;        /* scope from broadcast address */
        buf[6]  = 0x60;                /* IPv6 signature */
        buf[7]  = 0x1b;
        buf[8]  = broadcast[8];        /* P_Key */
        buf[9]  = broadcast[9];
        memcpy(buf + 10, addr->s6_addr + 6, 10);
}

static inline int ipv6_ipgre_mc_map(const struct in6_addr *addr,
                                    const unsigned char *broadcast, char *buf)
{
        if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0) {
                memcpy(buf, broadcast, 4);
        } else {
                /* v4mapped? */
                if ((addr->s6_addr32[0] | addr->s6_addr32[1] |
                     (addr->s6_addr32[2] ^ htonl(0x0000ffff))) != 0)
                        return -EINVAL;
                memcpy(buf, &addr->s6_addr32[3], 4);
        }
        return 0;
}

#endif




























  232 









  233 











































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
// SPDX-License-Identifier: GPL-2.0
/*
 * of.c                The helpers for hcd device tree support
 *
 * Copyright (C) 2016 Freescale Semiconductor, Inc.
 *        Author: Peter Chen <peter.chen@freescale.com>
 * Copyright (C) 2017 Johan Hovold <johan@kernel.org>
 */

#include <linux/of.h>
#include <linux/of_graph.h>
#include <linux/usb/of.h>

/**
 * usb_of_get_device_node() - get a USB device node
 * @hub: hub to which device is connected
 * @port1: one-based index of port
 *
 * Look up the node of a USB device given its parent hub device and one-based
 * port number.
 *
 * Return: A pointer to the node with incremented refcount if found, or
 * %NULL otherwise.
 */
struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1)
{
        struct device_node *node;
        u32 reg;

        for_each_child_of_node(hub->dev.of_node, node) {
                if (of_property_read_u32(node, "reg", &reg))
                        continue;

                if (reg == port1)
                        return node;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_of_get_device_node);

/**
 * usb_of_has_combined_node() - determine whether a device has a combined node
 * @udev: USB device
 *
 * Determine whether a USB device has a so called combined node which is
 * shared with its sole interface. This is the case if and only if the device
 * has a node and its descriptors report the following:
 *
 *        1) bDeviceClass is 0 or 9, and
 *        2) bNumConfigurations is 1, and
 *        3) bNumInterfaces is 1.
 *
 * Return: True iff the device has a device node and its descriptors match the
 * criteria for a combined node.
 */
bool usb_of_has_combined_node(struct usb_device *udev)
{
        struct usb_device_descriptor *ddesc = &udev->descriptor;
        struct usb_config_descriptor *cdesc;

        if (!udev->dev.of_node)
                return false;

        switch (ddesc->bDeviceClass) {
        case USB_CLASS_PER_INTERFACE:
        case USB_CLASS_HUB:
                if (ddesc->bNumConfigurations == 1) {
                        cdesc = &udev->config->desc;
                        if (cdesc->bNumInterfaces == 1)
                                return true;
                }
        }

        return false;
}
EXPORT_SYMBOL_GPL(usb_of_has_combined_node);

static bool usb_of_has_devices_or_graph(const struct usb_device *hub)
{
        const struct device_node *np = hub->dev.of_node;
        struct device_node *child;

        if (of_graph_is_present(np))
                return true;

        for_each_child_of_node(np, child)
                if (of_property_present(child, "reg"))
                        return true;

        return false;
}

/**
 * usb_of_get_connect_type() - get a USB hub's port connect_type
 * @hub: hub to which port is for @port1
 * @port1: one-based index of port
 *
 * Get the connect_type of @port1 based on the device node for @hub. If the
 * port is described in the OF graph, the connect_type is "hotplug". If the
 * @hub has a child device has with a 'reg' property equal to @port1 the
 * connect_type is "hard-wired". If there isn't an OF graph or child node at
 * all then the connect_type is "unknown". Otherwise, the port is considered
 * "unused" because it isn't described at all.
 *
 * Return: A connect_type for @port1 based on the device node for @hub.
 */
enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1)
{
        struct device_node *np, *child, *ep, *remote_np;
        enum usb_port_connect_type connect_type;

        /* Only set connect_type if binding has ports/hardwired devices. */
        if (!usb_of_has_devices_or_graph(hub))
                return USB_PORT_CONNECT_TYPE_UNKNOWN;

        /* Assume port is unused if there's a graph or a child node. */
        connect_type = USB_PORT_NOT_USED;

        np = hub->dev.of_node;
        /*
         * Hotplug ports are connected to an available remote node, e.g.
         * usb-a-connector compatible node, in the OF graph.
         */
        if (of_graph_is_present(np)) {
                ep = of_graph_get_endpoint_by_regs(np, port1, -1);
                if (ep) {
                        remote_np = of_graph_get_remote_port_parent(ep);
                        of_node_put(ep);
                        if (of_device_is_available(remote_np))
                                connect_type = USB_PORT_CONNECT_TYPE_HOT_PLUG;
                        of_node_put(remote_np);
                }
        }

        /*
         * Hard-wired ports are child nodes with a reg property corresponding
         * to the port number, i.e. a usb device.
         */
        child = usb_of_get_device_node(hub, port1);
        if (of_device_is_available(child))
                connect_type = USB_PORT_CONNECT_TYPE_HARD_WIRED;
        of_node_put(child);

        return connect_type;
}
EXPORT_SYMBOL_GPL(usb_of_get_connect_type);

/**
 * usb_of_get_interface_node() - get a USB interface node
 * @udev: USB device of interface
 * @config: configuration value
 * @ifnum: interface number
 *
 * Look up the node of a USB interface given its USB device, configuration
 * value and interface number.
 *
 * Return: A pointer to the node with incremented refcount if found, or
 * %NULL otherwise.
 */
struct device_node *
usb_of_get_interface_node(struct usb_device *udev, u8 config, u8 ifnum)
{
        struct device_node *node;
        u32 reg[2];

        for_each_child_of_node(udev->dev.of_node, node) {
                if (of_property_read_u32_array(node, "reg", reg, 2))
                        continue;

                if (reg[0] == ifnum && reg[1] == config)
                        return node;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_of_get_interface_node);




























































    2 




















    2 

    2 


    2 



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   ALSA sequencer System services Client
 *   Copyright (c) 1998-1999 by Frank van de Pol <fvdpol@coil.demon.nl>
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <sound/core.h>
#include "seq_system.h"
#include "seq_timer.h"
#include "seq_queue.h"

/* internal client that provide system services, access to timer etc. */

/*
 * Port "Timer"
 *      - send tempo /start/stop etc. events to this port to manipulate the 
 *        queue's timer. The queue address is specified in
 *          data.queue.queue.
 *      - this port supports subscription. The received timer events are 
 *        broadcasted to all subscribed clients. The modified tempo
 *          value is stored on data.queue.value.
 *          The modifier client/port is not send.
 *
 * Port "Announce"
 *      - does not receive message
 *      - supports supscription. For each client or port attaching to or 
 *        detaching from the system an announcement is send to the subscribed
 *        clients.
 *
 * Idea: the subscription mechanism might also work handy for distributing 
 * synchronisation and timing information. In this case we would ideally have
 * a list of subscribers for each type of sync (time, tick), for each timing
 * queue.
 *
 * NOTE: the queue to be started, stopped, etc. must be specified
 *         in data.queue.addr.queue field.  queue is used only for
 *         scheduling, and no longer referred as affected queue.
 *         They are used only for timer broadcast (see above).
 *                                                        -- iwai
 */


/* client id of our system client */
static int sysclient = -1;

/* port id numbers for this client */
static int announce_port = -1;



/* fill standard header data, source port & channel are filled in */
static int setheader(struct snd_seq_event * ev, int client, int port)
{
        if (announce_port < 0)
                return -ENODEV;

        memset(ev, 0, sizeof(struct snd_seq_event));

        ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK;
        ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED;

        ev->source.client = sysclient;
        ev->source.port = announce_port;
        ev->dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;

        /* fill data */
        /*ev->data.addr.queue = SNDRV_SEQ_ADDRESS_UNKNOWN;*/
        ev->data.addr.client = client;
        ev->data.addr.port = port;

        return 0;
}


/* entry points for broadcasting system events */
void snd_seq_system_broadcast(int client, int port, int type)
{
        struct snd_seq_event ev;
        
        if (setheader(&ev, client, port) < 0)
                return;
        ev.type = type;
        snd_seq_kernel_client_dispatch(sysclient, &ev, 0, 0);
}
EXPORT_SYMBOL_GPL(snd_seq_system_broadcast);

/* entry points for broadcasting system events */
int snd_seq_system_notify(int client, int port, struct snd_seq_event *ev)
{
        ev->flags = SNDRV_SEQ_EVENT_LENGTH_FIXED;
        ev->source.client = sysclient;
        ev->source.port = announce_port;
        ev->dest.client = client;
        ev->dest.port = port;
        return snd_seq_kernel_client_dispatch(sysclient, ev, 0, 0);
}

/* call-back handler for timer events */
static int event_input_timer(struct snd_seq_event * ev, int direct, void *private_data, int atomic, int hop)
{
        return snd_seq_control_queue(ev, atomic, hop);
}

/* register our internal client */
int __init snd_seq_system_client_init(void)
{
        struct snd_seq_port_callback pcallbacks;
        struct snd_seq_port_info *port;
        int err;

        port = kzalloc(sizeof(*port), GFP_KERNEL);
        if (!port)
                return -ENOMEM;

        memset(&pcallbacks, 0, sizeof(pcallbacks));
        pcallbacks.owner = THIS_MODULE;
        pcallbacks.event_input = event_input_timer;

        /* register client */
        sysclient = snd_seq_create_kernel_client(NULL, 0, "System");
        if (sysclient < 0) {
                kfree(port);
                return sysclient;
        }

        /* register timer */
        strcpy(port->name, "Timer");
        port->capability = SNDRV_SEQ_PORT_CAP_WRITE; /* accept queue control */
        port->capability |= SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ; /* for broadcast */
        port->kernel = &pcallbacks;
        port->type = 0;
        port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT;
        port->addr.client = sysclient;
        port->addr.port = SNDRV_SEQ_PORT_SYSTEM_TIMER;
        err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT,
                                        port);
        if (err < 0)
                goto error_port;

        /* register announcement port */
        strcpy(port->name, "Announce");
        port->capability = SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ; /* for broadcast only */
        port->kernel = NULL;
        port->type = 0;
        port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT;
        port->addr.client = sysclient;
        port->addr.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE;
        err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT,
                                        port);
        if (err < 0)
                goto error_port;
        announce_port = port->addr.port;

        kfree(port);
        return 0;

 error_port:
        snd_seq_system_client_done();
        kfree(port);
        return err;
}


/* unregister our internal client */
void snd_seq_system_client_done(void)
{
        int oldsysclient = sysclient;

        if (oldsysclient >= 0) {
                sysclient = -1;
                announce_port = -1;
                snd_seq_delete_kernel_client(oldsysclient);
        }
}





































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TTY_H
#define _LINUX_TTY_H

#include <linux/fs.h>
#include <linux/major.h>
#include <linux/termios.h>
#include <linux/workqueue.h>
#include <linux/tty_driver.h>
#include <linux/tty_ldisc.h>
#include <linux/tty_port.h>
#include <linux/mutex.h>
#include <linux/tty_flags.h>
#include <uapi/linux/tty.h>
#include <linux/rwsem.h>
#include <linux/llist.h>


/*
 * (Note: the *_driver.minor_start values 1, 64, 128, 192 are
 * hardcoded at present.)
 */
#define NR_UNIX98_PTY_DEFAULT        4096      /* Default maximum for Unix98 ptys */
#define NR_UNIX98_PTY_RESERVE        1024          /* Default reserve for main devpts */
#define NR_UNIX98_PTY_MAX        (1 << MINORBITS) /* Absolute limit */

/*
 * This character is the same as _POSIX_VDISABLE: it cannot be used as
 * a c_cc[] character, but indicates that a particular special character
 * isn't in use (eg VINTR has no character etc)
 */
#define __DISABLED_CHAR '\0'

#define INTR_CHAR(tty) ((tty)->termios.c_cc[VINTR])
#define QUIT_CHAR(tty) ((tty)->termios.c_cc[VQUIT])
#define ERASE_CHAR(tty) ((tty)->termios.c_cc[VERASE])
#define KILL_CHAR(tty) ((tty)->termios.c_cc[VKILL])
#define EOF_CHAR(tty) ((tty)->termios.c_cc[VEOF])
#define TIME_CHAR(tty) ((tty)->termios.c_cc[VTIME])
#define MIN_CHAR(tty) ((tty)->termios.c_cc[VMIN])
#define SWTC_CHAR(tty) ((tty)->termios.c_cc[VSWTC])
#define START_CHAR(tty) ((tty)->termios.c_cc[VSTART])
#define STOP_CHAR(tty) ((tty)->termios.c_cc[VSTOP])
#define SUSP_CHAR(tty) ((tty)->termios.c_cc[VSUSP])
#define EOL_CHAR(tty) ((tty)->termios.c_cc[VEOL])
#define REPRINT_CHAR(tty) ((tty)->termios.c_cc[VREPRINT])
#define DISCARD_CHAR(tty) ((tty)->termios.c_cc[VDISCARD])
#define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE])
#define LNEXT_CHAR(tty)        ((tty)->termios.c_cc[VLNEXT])
#define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2])

#define _I_FLAG(tty, f)        ((tty)->termios.c_iflag & (f))
#define _O_FLAG(tty, f)        ((tty)->termios.c_oflag & (f))
#define _C_FLAG(tty, f)        ((tty)->termios.c_cflag & (f))
#define _L_FLAG(tty, f)        ((tty)->termios.c_lflag & (f))

#define I_IGNBRK(tty)        _I_FLAG((tty), IGNBRK)
#define I_BRKINT(tty)        _I_FLAG((tty), BRKINT)
#define I_IGNPAR(tty)        _I_FLAG((tty), IGNPAR)
#define I_PARMRK(tty)        _I_FLAG((tty), PARMRK)
#define I_INPCK(tty)        _I_FLAG((tty), INPCK)
#define I_ISTRIP(tty)        _I_FLAG((tty), ISTRIP)
#define I_INLCR(tty)        _I_FLAG((tty), INLCR)
#define I_IGNCR(tty)        _I_FLAG((tty), IGNCR)
#define I_ICRNL(tty)        _I_FLAG((tty), ICRNL)
#define I_IUCLC(tty)        _I_FLAG((tty), IUCLC)
#define I_IXON(tty)        _I_FLAG((tty), IXON)
#define I_IXANY(tty)        _I_FLAG((tty), IXANY)
#define I_IXOFF(tty)        _I_FLAG((tty), IXOFF)
#define I_IMAXBEL(tty)        _I_FLAG((tty), IMAXBEL)
#define I_IUTF8(tty)        _I_FLAG((tty), IUTF8)

#define O_OPOST(tty)        _O_FLAG((tty), OPOST)
#define O_OLCUC(tty)        _O_FLAG((tty), OLCUC)
#define O_ONLCR(tty)        _O_FLAG((tty), ONLCR)
#define O_OCRNL(tty)        _O_FLAG((tty), OCRNL)
#define O_ONOCR(tty)        _O_FLAG((tty), ONOCR)
#define O_ONLRET(tty)        _O_FLAG((tty), ONLRET)
#define O_OFILL(tty)        _O_FLAG((tty), OFILL)
#define O_OFDEL(tty)        _O_FLAG((tty), OFDEL)
#define O_NLDLY(tty)        _O_FLAG((tty), NLDLY)
#define O_CRDLY(tty)        _O_FLAG((tty), CRDLY)
#define O_TABDLY(tty)        _O_FLAG((tty), TABDLY)
#define O_BSDLY(tty)        _O_FLAG((tty), BSDLY)
#define O_VTDLY(tty)        _O_FLAG((tty), VTDLY)
#define O_FFDLY(tty)        _O_FLAG((tty), FFDLY)

#define C_BAUD(tty)        _C_FLAG((tty), CBAUD)
#define C_CSIZE(tty)        _C_FLAG((tty), CSIZE)
#define C_CSTOPB(tty)        _C_FLAG((tty), CSTOPB)
#define C_CREAD(tty)        _C_FLAG((tty), CREAD)
#define C_PARENB(tty)        _C_FLAG((tty), PARENB)
#define C_PARODD(tty)        _C_FLAG((tty), PARODD)
#define C_HUPCL(tty)        _C_FLAG((tty), HUPCL)
#define C_CLOCAL(tty)        _C_FLAG((tty), CLOCAL)
#define C_CIBAUD(tty)        _C_FLAG((tty), CIBAUD)
#define C_CRTSCTS(tty)        _C_FLAG((tty), CRTSCTS)
#define C_CMSPAR(tty)        _C_FLAG((tty), CMSPAR)

#define L_ISIG(tty)        _L_FLAG((tty), ISIG)
#define L_ICANON(tty)        _L_FLAG((tty), ICANON)
#define L_XCASE(tty)        _L_FLAG((tty), XCASE)
#define L_ECHO(tty)        _L_FLAG((tty), ECHO)
#define L_ECHOE(tty)        _L_FLAG((tty), ECHOE)
#define L_ECHOK(tty)        _L_FLAG((tty), ECHOK)
#define L_ECHONL(tty)        _L_FLAG((tty), ECHONL)
#define L_NOFLSH(tty)        _L_FLAG((tty), NOFLSH)
#define L_TOSTOP(tty)        _L_FLAG((tty), TOSTOP)
#define L_ECHOCTL(tty)        _L_FLAG((tty), ECHOCTL)
#define L_ECHOPRT(tty)        _L_FLAG((tty), ECHOPRT)
#define L_ECHOKE(tty)        _L_FLAG((tty), ECHOKE)
#define L_FLUSHO(tty)        _L_FLAG((tty), FLUSHO)
#define L_PENDIN(tty)        _L_FLAG((tty), PENDIN)
#define L_IEXTEN(tty)        _L_FLAG((tty), IEXTEN)
#define L_EXTPROC(tty)        _L_FLAG((tty), EXTPROC)

struct device;
struct signal_struct;
struct tty_operations;

/**
 * struct tty_struct - state associated with a tty while open
 *
 * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero
 *          frees the structure
 * @dev: class device or %NULL (e.g. ptys, serdev)
 * @driver: &struct tty_driver operating this tty
 * @ops: &struct tty_operations of @driver for this tty (open, close, etc.)
 * @index: index of this tty (e.g. to construct @name like tty12)
 * @ldisc_sem: protects line discipline changes (@ldisc) -- lock tty not pty
 * @ldisc: the current line discipline for this tty (n_tty by default)
 * @atomic_write_lock: protects against concurrent writers, i.e. locks
 *                       @write_cnt, @write_buf and similar
 * @legacy_mutex: leftover from history (BKL -> BTM -> @legacy_mutex),
 *                  protecting several operations on this tty
 * @throttle_mutex: protects against concurrent tty_throttle_safe() and
 *                    tty_unthrottle_safe() (but not tty_unthrottle())
 * @termios_rwsem: protects @termios and @termios_locked
 * @winsize_mutex: protects @winsize
 * @termios: termios for the current tty, copied from/to @driver.termios
 * @termios_locked: locked termios (by %TIOCGLCKTRMIOS and %TIOCSLCKTRMIOS
 *                    ioctls)
 * @name: name of the tty constructed by tty_line_name() (e.g. ttyS3)
 * @flags: bitwise OR of %TTY_THROTTLED, %TTY_IO_ERROR, ...
 * @count: count of open processes, reaching zero cancels all the work for
 *           this tty and drops a @kref too (but does not free this tty)
 * @winsize: size of the terminal "window" (cf. @winsize_mutex)
 * @flow: flow settings grouped together, see also @flow.unused
 * @flow.lock: lock for @flow members
 * @flow.stopped: tty stopped/started by stop_tty()/start_tty()
 * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has
 *                      precedence over @flow.stopped)
 * @flow.unused: alignment for Alpha, so that no members other than @flow.* are
 *                 modified by the same 64b word store. The @flow's __aligned is
 *                 there for the very same reason.
 * @ctrl: control settings grouped together, see also @ctrl.unused
 * @ctrl.lock: lock for @ctrl members
 * @ctrl.pgrp: process group of this tty (setpgrp(2))
 * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both
 *                  @ctrl.lock and @legacy_mutex, readers must use at least one of
 *                  them.
 * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants)
 * @ctrl.packet: packet mode enabled
 * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation
 * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS
 *                handling
 * @receive_room: bytes permitted to feed to @ldisc without any being lost
 * @flow_change: controls behavior of throttling, see tty_throttle_safe() and
 *                 tty_unthrottle_safe()
 * @link: link to another pty (master -> slave and vice versa)
 * @fasync: state for %O_ASYNC (for %SIGIO); managed by fasync_helper()
 * @write_wait: concurrent writers are waiting in this queue until they are
 *                allowed to write
 * @read_wait: readers wait for data in this queue
 * @hangup_work: normally a work to perform a hangup (do_tty_hangup()); while
 *                 freeing the tty, (re)used to release_one_tty()
 * @disc_data: pointer to @ldisc's private data (e.g. to &struct n_tty_data)
 * @driver_data: pointer to @driver's private data (e.g. &struct uart_state)
 * @files_lock:        protects @tty_files list
 * @tty_files: list of (re)openers of this tty (i.e. linked &struct
 *               tty_file_private)
 * @closing: when set during close, n_tty processes only START & STOP chars
 * @write_buf: temporary buffer used during tty_write() to copy user data to
 * @write_cnt: count of bytes written in tty_write() to @write_buf
 * @SAK_work: if the tty has a pending do_SAK, it is queued here
 * @port: persistent storage for this device (i.e. &struct tty_port)
 *
 * All of the state associated with a tty while the tty is open. Persistent
 * storage for tty devices is referenced here as @port and is documented in
 * &struct tty_port.
 */
struct tty_struct {
        struct kref kref;
        int index;
        struct device *dev;
        struct tty_driver *driver;
        struct tty_port *port;
        const struct tty_operations *ops;

        struct tty_ldisc *ldisc;
        struct ld_semaphore ldisc_sem;

        struct mutex atomic_write_lock;
        struct mutex legacy_mutex;
        struct mutex throttle_mutex;
        struct rw_semaphore termios_rwsem;
        struct mutex winsize_mutex;
        struct ktermios termios, termios_locked;
        char name[64];
        unsigned long flags;
        int count;
        unsigned int receive_room;
        struct winsize winsize;

        struct {
                spinlock_t lock;
                bool stopped;
                bool tco_stopped;
                unsigned long unused[0];
        } __aligned(sizeof(unsigned long)) flow;

        struct {
                struct pid *pgrp;
                struct pid *session;
                spinlock_t lock;
                unsigned char pktstatus;
                bool packet;
                unsigned long unused[0];
        } __aligned(sizeof(unsigned long)) ctrl;

        bool hw_stopped;
        bool closing;
        int flow_change;

        struct tty_struct *link;
        struct fasync_struct *fasync;
        wait_queue_head_t write_wait;
        wait_queue_head_t read_wait;
        struct work_struct hangup_work;
        void *disc_data;
        void *driver_data;
        spinlock_t files_lock;
        int write_cnt;
        u8 *write_buf;

        struct list_head tty_files;

#define N_TTY_BUF_SIZE 4096
        struct work_struct SAK_work;
} __randomize_layout;

/* Each of a tty's open files has private_data pointing to tty_file_private */
struct tty_file_private {
        struct tty_struct *tty;
        struct file *file;
        struct list_head list;
};

/**
 * DOC: TTY Struct Flags
 *
 * These bits are used in the :c:member:`tty_struct.flags` field.
 *
 * So that interrupts won't be able to mess up the queues,
 * copy_to_cooked must be atomic with respect to itself, as must
 * tty->write.  Thus, you must use the inline functions set_bit() and
 * clear_bit() to make things atomic.
 *
 * TTY_THROTTLED
 *        Driver input is throttled. The ldisc should call
 *        :c:member:`tty_driver.unthrottle()` in order to resume reception when
 *        it is ready to process more data (at threshold min).
 *
 * TTY_IO_ERROR
 *        If set, causes all subsequent userspace read/write calls on the tty to
 *        fail, returning -%EIO. (May be no ldisc too.)
 *
 * TTY_OTHER_CLOSED
 *        Device is a pty and the other side has closed.
 *
 * TTY_EXCLUSIVE
 *        Exclusive open mode (a single opener).
 *
 * TTY_DO_WRITE_WAKEUP
 *        If set, causes the driver to call the
 *        :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume
 *        transmission when it can accept more data to transmit.
 *
 * TTY_LDISC_OPEN
 *        Indicates that a line discipline is open. For debugging purposes only.
 *
 * TTY_PTY_LOCK
 *        A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic.
 *
 * TTY_NO_WRITE_SPLIT
 *        Prevent driver from splitting up writes into smaller chunks (preserve
 *        write boundaries to driver).
 *
 * TTY_HUPPED
 *        The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`.
 *
 * TTY_HUPPING
 *        The TTY is in the process of hanging up to abort potential readers.
 *
 * TTY_LDISC_CHANGING
 *        Line discipline for this TTY is being changed. I/O should not block
 *        when this is set. Use tty_io_nonblock() to check.
 *
 * TTY_LDISC_HALTED
 *        Line discipline for this TTY was stopped. No work should be queued to
 *        this ldisc.
 */
#define TTY_THROTTLED                0
#define TTY_IO_ERROR                1
#define TTY_OTHER_CLOSED        2
#define TTY_EXCLUSIVE                3
#define TTY_DO_WRITE_WAKEUP        5
#define TTY_LDISC_OPEN                11
#define TTY_PTY_LOCK                16
#define TTY_NO_WRITE_SPLIT        17
#define TTY_HUPPED                18
#define TTY_HUPPING                19
#define TTY_LDISC_CHANGING        20
#define TTY_LDISC_HALTED        22

static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file)
{
        return file->f_flags & O_NONBLOCK ||
                test_bit(TTY_LDISC_CHANGING, &tty->flags);
}

static inline bool tty_io_error(struct tty_struct *tty)
{
        return test_bit(TTY_IO_ERROR, &tty->flags);
}

static inline bool tty_throttled(struct tty_struct *tty)
{
        return test_bit(TTY_THROTTLED, &tty->flags);
}

#ifdef CONFIG_TTY
void tty_kref_put(struct tty_struct *tty);
struct pid *tty_get_pgrp(struct tty_struct *tty);
void tty_vhangup_self(void);
void disassociate_ctty(int priv);
dev_t tty_devnum(struct tty_struct *tty);
void proc_clear_tty(struct task_struct *p);
struct tty_struct *get_current_tty(void);
/* tty_io.c */
int __init tty_init(void);
const char *tty_name(const struct tty_struct *tty);
struct tty_struct *tty_kopen_exclusive(dev_t device);
struct tty_struct *tty_kopen_shared(dev_t device);
void tty_kclose(struct tty_struct *tty);
int tty_dev_name_to_number(const char *name, dev_t *number);
#else
static inline void tty_kref_put(struct tty_struct *tty)
{ }
static inline struct pid *tty_get_pgrp(struct tty_struct *tty)
{ return NULL; }
static inline void tty_vhangup_self(void)
{ }
static inline void disassociate_ctty(int priv)
{ }
static inline dev_t tty_devnum(struct tty_struct *tty)
{ return 0; }
static inline void proc_clear_tty(struct task_struct *p)
{ }
static inline struct tty_struct *get_current_tty(void)
{ return NULL; }
/* tty_io.c */
static inline int __init tty_init(void)
{ return 0; }
static inline const char *tty_name(const struct tty_struct *tty)
{ return "(none)"; }
static inline struct tty_struct *tty_kopen_exclusive(dev_t device)
{ return ERR_PTR(-ENODEV); }
static inline void tty_kclose(struct tty_struct *tty)
{ }
static inline int tty_dev_name_to_number(const char *name, dev_t *number)
{ return -ENOTSUPP; }
#endif

extern struct ktermios tty_std_termios;

int vcs_init(void);

extern const struct class tty_class;

/**
 * tty_kref_get - get a tty reference
 * @tty: tty device
 *
 * Returns: a new reference to a tty object
 *
 * Locking: The caller must hold sufficient locks/counts to ensure that their
 * existing reference cannot go away.
 */
static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
{
        if (tty)
                kref_get(&tty->kref);
        return tty;
}

const char *tty_driver_name(const struct tty_struct *tty);
void tty_wait_until_sent(struct tty_struct *tty, long timeout);
void stop_tty(struct tty_struct *tty);
void start_tty(struct tty_struct *tty);
void tty_write_message(struct tty_struct *tty, char *msg);
int tty_send_xchar(struct tty_struct *tty, u8 ch);
int tty_put_char(struct tty_struct *tty, u8 c);
unsigned int tty_chars_in_buffer(struct tty_struct *tty);
unsigned int tty_write_room(struct tty_struct *tty);
void tty_driver_flush_buffer(struct tty_struct *tty);
void tty_unthrottle(struct tty_struct *tty);
bool tty_throttle_safe(struct tty_struct *tty);
bool tty_unthrottle_safe(struct tty_struct *tty);
int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
int tty_get_icount(struct tty_struct *tty,
                struct serial_icounter_struct *icount);
int tty_get_tiocm(struct tty_struct *tty);
int is_current_pgrp_orphaned(void);
void tty_hangup(struct tty_struct *tty);
void tty_vhangup(struct tty_struct *tty);
int tty_hung_up_p(struct file *filp);
void do_SAK(struct tty_struct *tty);
void __do_SAK(struct tty_struct *tty);
void no_tty(void);
speed_t tty_termios_baud_rate(const struct ktermios *termios);
void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud,
                speed_t obaud);
void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud,
                speed_t obaud);

/**
 * tty_get_baud_rate - get tty bit rates
 * @tty: tty to query
 *
 * Returns: the baud rate as an integer for this terminal
 *
 * Locking: The termios lock must be held by the caller.
 */
static inline speed_t tty_get_baud_rate(const struct tty_struct *tty)
{
        return tty_termios_baud_rate(&tty->termios);
}

unsigned char tty_get_char_size(unsigned int cflag);
unsigned char tty_get_frame_size(unsigned int cflag);

void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old);
bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b);
int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);

void tty_wakeup(struct tty_struct *tty);

int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
void tty_release_struct(struct tty_struct *tty, int idx);
void tty_init_termios(struct tty_struct *tty);
void tty_save_termios(struct tty_struct *tty);
int tty_standard_install(struct tty_driver *driver,
                struct tty_struct *tty);

extern struct mutex tty_mutex;

/* n_tty.c */
void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
#ifdef CONFIG_TTY
void __init n_tty_init(void);
#else
static inline void n_tty_init(void) { }
#endif

/* tty_audit.c */
#ifdef CONFIG_AUDIT
void tty_audit_exit(void);
void tty_audit_fork(struct signal_struct *sig);
int tty_audit_push(void);
#else
static inline void tty_audit_exit(void)
{
}
static inline void tty_audit_fork(struct signal_struct *sig)
{
}
static inline int tty_audit_push(void)
{
        return 0;
}
#endif

/* tty_ioctl.c */
int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd,
                unsigned long arg);

/* vt.c */

int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg);

long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd,
                unsigned long arg);

/* tty_mutex.c */
/* functions for preparation of BKL removal */
void tty_lock(struct tty_struct *tty);
int  tty_lock_interruptible(struct tty_struct *tty);
void tty_unlock(struct tty_struct *tty);
void tty_lock_slave(struct tty_struct *tty);
void tty_unlock_slave(struct tty_struct *tty);
void tty_set_lock_subclass(struct tty_struct *tty);

#endif










































































   73 





   61 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Landlock LSM - Filesystem management and hooks
 *
 * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
 * Copyright © 2018-2020 ANSSI
 */

#ifndef _SECURITY_LANDLOCK_FS_H
#define _SECURITY_LANDLOCK_FS_H

#include <linux/fs.h>
#include <linux/init.h>
#include <linux/rcupdate.h>

#include "ruleset.h"
#include "setup.h"

/**
 * struct landlock_inode_security - Inode security blob
 *
 * Enable to reference a &struct landlock_object tied to an inode (i.e.
 * underlying object).
 */
struct landlock_inode_security {
        /**
         * @object: Weak pointer to an allocated object.  All assignments of a
         * new object are protected by the underlying inode->i_lock.  However,
         * atomically disassociating @object from the inode is only protected
         * by @object->lock, from the time @object's usage refcount drops to
         * zero to the time this pointer is nulled out (cf. release_inode() and
         * hook_sb_delete()).  Indeed, such disassociation doesn't require
         * inode->i_lock thanks to the careful rcu_access_pointer() check
         * performed by get_inode_object().
         */
        struct landlock_object __rcu *object;
};

/**
 * struct landlock_file_security - File security blob
 *
 * This information is populated when opening a file in hook_file_open, and
 * tracks the relevant Landlock access rights that were available at the time
 * of opening the file. Other LSM hooks use these rights in order to authorize
 * operations on already opened files.
 */
struct landlock_file_security {
        /**
         * @allowed_access: Access rights that were available at the time of
         * opening the file. This is not necessarily the full set of access
         * rights available at that time, but it's the necessary subset as
         * needed to authorize later operations on the open file.
         */
        access_mask_t allowed_access;
};

/**
 * struct landlock_superblock_security - Superblock security blob
 *
 * Enable hook_sb_delete() to wait for concurrent calls to release_inode().
 */
struct landlock_superblock_security {
        /**
         * @inode_refs: Number of pending inodes (from this superblock) that
         * are being released by release_inode().
         * Cf. struct super_block->s_fsnotify_inode_refs .
         */
        atomic_long_t inode_refs;
};

static inline struct landlock_file_security *
landlock_file(const struct file *const file)
{
        return file->f_security + landlock_blob_sizes.lbs_file;
}

static inline struct landlock_inode_security *
landlock_inode(const struct inode *const inode)
{
        return inode->i_security + landlock_blob_sizes.lbs_inode;
}

static inline struct landlock_superblock_security *
landlock_superblock(const struct super_block *const superblock)
{
        return superblock->s_security + landlock_blob_sizes.lbs_superblock;
}

__init void landlock_add_fs_hooks(void);

int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
                            const struct path *const path,
                            access_mask_t access_hierarchy);

#endif /* _SECURITY_LANDLOCK_FS_H */
























    2 













    2 
    2 
    1 


    2 





    2 



    2 

    2 



    2 




















    2 













    2 




    2 

    2 












    2 



    2 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright 2019 ARM Ltd.
 *
 * Generic implementation of update_vsyscall and update_vsyscall_tz.
 *
 * Based on the x86 specific implementation.
 */

#include <linux/hrtimer.h>
#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>

#include "timekeeping_internal.h"

static inline void update_vdso_data(struct vdso_data *vdata,
                                    struct timekeeper *tk)
{
        struct vdso_timestamp *vdso_ts;
        u64 nsec, sec;

        vdata[CS_HRES_COARSE].cycle_last        = tk->tkr_mono.cycle_last;
        vdata[CS_HRES_COARSE].mask                = tk->tkr_mono.mask;
        vdata[CS_HRES_COARSE].mult                = tk->tkr_mono.mult;
        vdata[CS_HRES_COARSE].shift                = tk->tkr_mono.shift;
        vdata[CS_RAW].cycle_last                = tk->tkr_raw.cycle_last;
        vdata[CS_RAW].mask                        = tk->tkr_raw.mask;
        vdata[CS_RAW].mult                        = tk->tkr_raw.mult;
        vdata[CS_RAW].shift                        = tk->tkr_raw.shift;

        /* CLOCK_MONOTONIC */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
        vdso_ts->sec        = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;

        nsec = tk->tkr_mono.xtime_nsec;
        nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
        while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
                vdso_ts->sec++;
        }
        vdso_ts->nsec        = nsec;

        /* Copy MONOTONIC time for BOOTTIME */
        sec        = vdso_ts->sec;
        /* Add the boot offset */
        sec        += tk->monotonic_to_boot.tv_sec;
        nsec        += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;

        /* CLOCK_BOOTTIME */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
        vdso_ts->sec        = sec;

        while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
                vdso_ts->sec++;
        }
        vdso_ts->nsec        = nsec;

        /* CLOCK_MONOTONIC_RAW */
        vdso_ts                = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
        vdso_ts->sec        = tk->raw_sec;
        vdso_ts->nsec        = tk->tkr_raw.xtime_nsec;

        /* CLOCK_TAI */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
        vdso_ts->sec        = tk->xtime_sec + (s64)tk->tai_offset;
        vdso_ts->nsec        = tk->tkr_mono.xtime_nsec;
}

void update_vsyscall(struct timekeeper *tk)
{
        struct vdso_data *vdata = __arch_get_k_vdso_data();
        struct vdso_timestamp *vdso_ts;
        s32 clock_mode;
        u64 nsec;

        /* copy vsyscall data */
        vdso_write_begin(vdata);

        clock_mode = tk->tkr_mono.clock->vdso_clock_mode;
        vdata[CS_HRES_COARSE].clock_mode        = clock_mode;
        vdata[CS_RAW].clock_mode                = clock_mode;

        /* CLOCK_REALTIME also required for time() */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
        vdso_ts->sec        = tk->xtime_sec;
        vdso_ts->nsec        = tk->tkr_mono.xtime_nsec;

        /* CLOCK_REALTIME_COARSE */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
        vdso_ts->sec        = tk->xtime_sec;
        vdso_ts->nsec        = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;

        /* CLOCK_MONOTONIC_COARSE */
        vdso_ts                = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
        vdso_ts->sec        = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
        nsec                = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
        nsec                = nsec + tk->wall_to_monotonic.tv_nsec;
        vdso_ts->sec        += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);

        /*
         * Read without the seqlock held by clock_getres().
         * Note: No need to have a second copy.
         */
        WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);

        /*
         * If the current clocksource is not VDSO capable, then spare the
         * update of the high resolution parts.
         */
        if (clock_mode != VDSO_CLOCKMODE_NONE)
                update_vdso_data(vdata, tk);

        __arch_update_vsyscall(vdata, tk);

        vdso_write_end(vdata);

        __arch_sync_vdso_data(vdata);
}

void update_vsyscall_tz(void)
{
        struct vdso_data *vdata = __arch_get_k_vdso_data();

        vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
        vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;

        __arch_sync_vdso_data(vdata);
}

/**
 * vdso_update_begin - Start of a VDSO update section
 *
 * Allows architecture code to safely update the architecture specific VDSO
 * data. Disables interrupts, acquires timekeeper lock to serialize against
 * concurrent updates from timekeeping and invalidates the VDSO data
 * sequence counter to prevent concurrent readers from accessing
 * inconsistent data.
 *
 * Returns: Saved interrupt flags which need to be handed in to
 * vdso_update_end().
 */
unsigned long vdso_update_begin(void)
{
        struct vdso_data *vdata = __arch_get_k_vdso_data();
        unsigned long flags;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        vdso_write_begin(vdata);
        return flags;
}

/**
 * vdso_update_end - End of a VDSO update section
 * @flags:        Interrupt flags as returned from vdso_update_begin()
 *
 * Pairs with vdso_update_begin(). Marks vdso data consistent, invokes data
 * synchronization if the architecture requires it, drops timekeeper lock
 * and restores interrupt flags.
 */
void vdso_update_end(unsigned long flags)
{
        struct vdso_data *vdata = __arch_get_k_vdso_data();

        vdso_write_end(vdata);
        __arch_sync_vdso_data(vdata);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}












































































































































































    5 






























































































































    5 



















































































































































































































































































































































































    4 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Definitions for the 'struct ptr_ring' datastructure.
 *
 *        Author:
 *                Michael S. Tsirkin <mst@redhat.com>
 *
 *        Copyright (C) 2016 Red Hat, Inc.
 *
 *        This is a limited-size FIFO maintaining pointers in FIFO order, with
 *        one CPU producing entries and another consuming entries from a FIFO.
 *
 *        This implementation tries to minimize cache-contention when there is a
 *        single producer and a single consumer CPU.
 */

#ifndef _LINUX_PTR_RING_H
#define _LINUX_PTR_RING_H 1

#ifdef __KERNEL__
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/errno.h>
#endif

struct ptr_ring {
        int producer ____cacheline_aligned_in_smp;
        spinlock_t producer_lock;
        int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
        int consumer_tail; /* next entry to invalidate */
        spinlock_t consumer_lock;
        /* Shared consumer/producer data */
        /* Read-only by both the producer and the consumer */
        int size ____cacheline_aligned_in_smp; /* max entries in queue */
        int batch; /* number of entries to consume in a batch */
        void **queue;
};

/* Note: callers invoking this in a loop must use a compiler barrier,
 * for example cpu_relax().
 *
 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
 * see e.g. ptr_ring_full.
 */
static inline bool __ptr_ring_full(struct ptr_ring *r)
{
        return r->queue[r->producer];
}

static inline bool ptr_ring_full(struct ptr_ring *r)
{
        bool ret;

        spin_lock(&r->producer_lock);
        ret = __ptr_ring_full(r);
        spin_unlock(&r->producer_lock);

        return ret;
}

static inline bool ptr_ring_full_irq(struct ptr_ring *r)
{
        bool ret;

        spin_lock_irq(&r->producer_lock);
        ret = __ptr_ring_full(r);
        spin_unlock_irq(&r->producer_lock);

        return ret;
}

static inline bool ptr_ring_full_any(struct ptr_ring *r)
{
        unsigned long flags;
        bool ret;

        spin_lock_irqsave(&r->producer_lock, flags);
        ret = __ptr_ring_full(r);
        spin_unlock_irqrestore(&r->producer_lock, flags);

        return ret;
}

static inline bool ptr_ring_full_bh(struct ptr_ring *r)
{
        bool ret;

        spin_lock_bh(&r->producer_lock);
        ret = __ptr_ring_full(r);
        spin_unlock_bh(&r->producer_lock);

        return ret;
}

/* Note: callers invoking this in a loop must use a compiler barrier,
 * for example cpu_relax(). Callers must hold producer_lock.
 * Callers are responsible for making sure pointer that is being queued
 * points to a valid data.
 */
static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
        if (unlikely(!r->size) || r->queue[r->producer])
                return -ENOSPC;

        /* Make sure the pointer we are storing points to a valid data. */
        /* Pairs with the dependency ordering in __ptr_ring_consume. */
        smp_wmb();

        WRITE_ONCE(r->queue[r->producer++], ptr);
        if (unlikely(r->producer >= r->size))
                r->producer = 0;
        return 0;
}

/*
 * Note: resize (below) nests producer lock within consumer lock, so if you
 * consume in interrupt or BH context, you must disable interrupts/BH when
 * calling this.
 */
static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
        int ret;

        spin_lock(&r->producer_lock);
        ret = __ptr_ring_produce(r, ptr);
        spin_unlock(&r->producer_lock);

        return ret;
}

static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
{
        int ret;

        spin_lock_irq(&r->producer_lock);
        ret = __ptr_ring_produce(r, ptr);
        spin_unlock_irq(&r->producer_lock);

        return ret;
}

static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
{
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&r->producer_lock, flags);
        ret = __ptr_ring_produce(r, ptr);
        spin_unlock_irqrestore(&r->producer_lock, flags);

        return ret;
}

static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
{
        int ret;

        spin_lock_bh(&r->producer_lock);
        ret = __ptr_ring_produce(r, ptr);
        spin_unlock_bh(&r->producer_lock);

        return ret;
}

static inline void *__ptr_ring_peek(struct ptr_ring *r)
{
        if (likely(r->size))
                return READ_ONCE(r->queue[r->consumer_head]);
        return NULL;
}

/*
 * Test ring empty status without taking any locks.
 *
 * NB: This is only safe to call if ring is never resized.
 *
 * However, if some other CPU consumes ring entries at the same time, the value
 * returned is not guaranteed to be correct.
 *
 * In this case - to avoid incorrectly detecting the ring
 * as empty - the CPU consuming the ring entries is responsible
 * for either consuming all ring entries until the ring is empty,
 * or synchronizing with some other CPU and causing it to
 * re-test __ptr_ring_empty and/or consume the ring enteries
 * after the synchronization point.
 *
 * Note: callers invoking this in a loop must use a compiler barrier,
 * for example cpu_relax().
 */
static inline bool __ptr_ring_empty(struct ptr_ring *r)
{
        if (likely(r->size))
                return !r->queue[READ_ONCE(r->consumer_head)];
        return true;
}

static inline bool ptr_ring_empty(struct ptr_ring *r)
{
        bool ret;

        spin_lock(&r->consumer_lock);
        ret = __ptr_ring_empty(r);
        spin_unlock(&r->consumer_lock);

        return ret;
}

static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
{
        bool ret;

        spin_lock_irq(&r->consumer_lock);
        ret = __ptr_ring_empty(r);
        spin_unlock_irq(&r->consumer_lock);

        return ret;
}

static inline bool ptr_ring_empty_any(struct ptr_ring *r)
{
        unsigned long flags;
        bool ret;

        spin_lock_irqsave(&r->consumer_lock, flags);
        ret = __ptr_ring_empty(r);
        spin_unlock_irqrestore(&r->consumer_lock, flags);

        return ret;
}

static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
{
        bool ret;

        spin_lock_bh(&r->consumer_lock);
        ret = __ptr_ring_empty(r);
        spin_unlock_bh(&r->consumer_lock);

        return ret;
}

/* Must only be called after __ptr_ring_peek returned !NULL */
static inline void __ptr_ring_discard_one(struct ptr_ring *r)
{
        /* Fundamentally, what we want to do is update consumer
         * index and zero out the entry so producer can reuse it.
         * Doing it naively at each consume would be as simple as:
         *       consumer = r->consumer;
         *       r->queue[consumer++] = NULL;
         *       if (unlikely(consumer >= r->size))
         *               consumer = 0;
         *       r->consumer = consumer;
         * but that is suboptimal when the ring is full as producer is writing
         * out new entries in the same cache line.  Defer these updates until a
         * batch of entries has been consumed.
         */
        /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
         * to work correctly.
         */
        int consumer_head = r->consumer_head;
        int head = consumer_head++;

        /* Once we have processed enough entries invalidate them in
         * the ring all at once so producer can reuse their space in the ring.
         * We also do this when we reach end of the ring - not mandatory
         * but helps keep the implementation simple.
         */
        if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
                     consumer_head >= r->size)) {
                /* Zero out entries in the reverse order: this way we touch the
                 * cache line that producer might currently be reading the last;
                 * producer won't make progress and touch other cache lines
                 * besides the first one until we write out all entries.
                 */
                while (likely(head >= r->consumer_tail))
                        r->queue[head--] = NULL;
                r->consumer_tail = consumer_head;
        }
        if (unlikely(consumer_head >= r->size)) {
                consumer_head = 0;
                r->consumer_tail = 0;
        }
        /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
        WRITE_ONCE(r->consumer_head, consumer_head);
}

static inline void *__ptr_ring_consume(struct ptr_ring *r)
{
        void *ptr;

        /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
         * accessing data through the pointer is up to date. Pairs
         * with smp_wmb in __ptr_ring_produce.
         */
        ptr = __ptr_ring_peek(r);
        if (ptr)
                __ptr_ring_discard_one(r);

        return ptr;
}

static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
                                             void **array, int n)
{
        void *ptr;
        int i;

        for (i = 0; i < n; i++) {
                ptr = __ptr_ring_consume(r);
                if (!ptr)
                        break;
                array[i] = ptr;
        }

        return i;
}

/*
 * Note: resize (below) nests producer lock within consumer lock, so if you
 * call this in interrupt or BH context, you must disable interrupts/BH when
 * producing.
 */
static inline void *ptr_ring_consume(struct ptr_ring *r)
{
        void *ptr;

        spin_lock(&r->consumer_lock);
        ptr = __ptr_ring_consume(r);
        spin_unlock(&r->consumer_lock);

        return ptr;
}

static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
{
        void *ptr;

        spin_lock_irq(&r->consumer_lock);
        ptr = __ptr_ring_consume(r);
        spin_unlock_irq(&r->consumer_lock);

        return ptr;
}

static inline void *ptr_ring_consume_any(struct ptr_ring *r)
{
        unsigned long flags;
        void *ptr;

        spin_lock_irqsave(&r->consumer_lock, flags);
        ptr = __ptr_ring_consume(r);
        spin_unlock_irqrestore(&r->consumer_lock, flags);

        return ptr;
}

static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
{
        void *ptr;

        spin_lock_bh(&r->consumer_lock);
        ptr = __ptr_ring_consume(r);
        spin_unlock_bh(&r->consumer_lock);

        return ptr;
}

static inline int ptr_ring_consume_batched(struct ptr_ring *r,
                                           void **array, int n)
{
        int ret;

        spin_lock(&r->consumer_lock);
        ret = __ptr_ring_consume_batched(r, array, n);
        spin_unlock(&r->consumer_lock);

        return ret;
}

static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
                                               void **array, int n)
{
        int ret;

        spin_lock_irq(&r->consumer_lock);
        ret = __ptr_ring_consume_batched(r, array, n);
        spin_unlock_irq(&r->consumer_lock);

        return ret;
}

static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
                                               void **array, int n)
{
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&r->consumer_lock, flags);
        ret = __ptr_ring_consume_batched(r, array, n);
        spin_unlock_irqrestore(&r->consumer_lock, flags);

        return ret;
}

static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
                                              void **array, int n)
{
        int ret;

        spin_lock_bh(&r->consumer_lock);
        ret = __ptr_ring_consume_batched(r, array, n);
        spin_unlock_bh(&r->consumer_lock);

        return ret;
}

/* Cast to structure type and call a function without discarding from FIFO.
 * Function must return a value.
 * Callers must take consumer_lock.
 */
#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))

#define PTR_RING_PEEK_CALL(r, f) ({ \
        typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
        \
        spin_lock(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
        spin_unlock(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v; \
})

#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
        typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
        \
        spin_lock_irq(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
        spin_unlock_irq(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v; \
})

#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
        typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
        \
        spin_lock_bh(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
        spin_unlock_bh(&(r)->consumer_lock); \
        __PTR_RING_PEEK_CALL_v; \
})

#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
        typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
        unsigned long __PTR_RING_PEEK_CALL_f;\
        \
        spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
        __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
        spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
        __PTR_RING_PEEK_CALL_v; \
})

/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
 * documentation for vmalloc for which of them are legal.
 */
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
{
        if (size > KMALLOC_MAX_SIZE / sizeof(void *))
                return NULL;
        return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
}

static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
{
        r->size = size;
        r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
        /* We need to set batch at least to 1 to make logic
         * in __ptr_ring_discard_one work correctly.
         * Batching too much (because ring is small) would cause a lot of
         * burstiness. Needs tuning, for now disable batching.
         */
        if (r->batch > r->size / 2 || !r->batch)
                r->batch = 1;
}

static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
{
        r->queue = __ptr_ring_init_queue_alloc(size, gfp);
        if (!r->queue)
                return -ENOMEM;

        __ptr_ring_set_size(r, size);
        r->producer = r->consumer_head = r->consumer_tail = 0;
        spin_lock_init(&r->producer_lock);
        spin_lock_init(&r->consumer_lock);

        return 0;
}

/*
 * Return entries into ring. Destroy entries that don't fit.
 *
 * Note: this is expected to be a rare slow path operation.
 *
 * Note: producer lock is nested within consumer lock, so if you
 * resize you must make sure all uses nest correctly.
 * In particular if you consume ring in interrupt or BH context, you must
 * disable interrupts/BH when doing so.
 */
static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
                                      void (*destroy)(void *))
{
        unsigned long flags;
        int head;

        spin_lock_irqsave(&r->consumer_lock, flags);
        spin_lock(&r->producer_lock);

        if (!r->size)
                goto done;

        /*
         * Clean out buffered entries (for simplicity). This way following code
         * can test entries for NULL and if not assume they are valid.
         */
        head = r->consumer_head - 1;
        while (likely(head >= r->consumer_tail))
                r->queue[head--] = NULL;
        r->consumer_tail = r->consumer_head;

        /*
         * Go over entries in batch, start moving head back and copy entries.
         * Stop when we run into previously unconsumed entries.
         */
        while (n) {
                head = r->consumer_head - 1;
                if (head < 0)
                        head = r->size - 1;
                if (r->queue[head]) {
                        /* This batch entry will have to be destroyed. */
                        goto done;
                }
                r->queue[head] = batch[--n];
                r->consumer_tail = head;
                /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
                WRITE_ONCE(r->consumer_head, head);
        }

done:
        /* Destroy all entries left in the batch. */
        while (n)
                destroy(batch[--n]);
        spin_unlock(&r->producer_lock);
        spin_unlock_irqrestore(&r->consumer_lock, flags);
}

static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
                                           int size, gfp_t gfp,
                                           void (*destroy)(void *))
{
        int producer = 0;
        void **old;
        void *ptr;

        while ((ptr = __ptr_ring_consume(r)))
                if (producer < size)
                        queue[producer++] = ptr;
                else if (destroy)
                        destroy(ptr);

        if (producer >= size)
                producer = 0;
        __ptr_ring_set_size(r, size);
        r->producer = producer;
        r->consumer_head = 0;
        r->consumer_tail = 0;
        old = r->queue;
        r->queue = queue;

        return old;
}

/*
 * Note: producer lock is nested within consumer lock, so if you
 * resize you must make sure all uses nest correctly.
 * In particular if you consume ring in interrupt or BH context, you must
 * disable interrupts/BH when doing so.
 */
static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
                                  void (*destroy)(void *))
{
        unsigned long flags;
        void **queue = __ptr_ring_init_queue_alloc(size, gfp);
        void **old;

        if (!queue)
                return -ENOMEM;

        spin_lock_irqsave(&(r)->consumer_lock, flags);
        spin_lock(&(r)->producer_lock);

        old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);

        spin_unlock(&(r)->producer_lock);
        spin_unlock_irqrestore(&(r)->consumer_lock, flags);

        kvfree(old);

        return 0;
}

/*
 * Note: producer lock is nested within consumer lock, so if you
 * resize you must make sure all uses nest correctly.
 * In particular if you consume ring in interrupt or BH context, you must
 * disable interrupts/BH when doing so.
 */
static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
                                           unsigned int nrings,
                                           int size,
                                           gfp_t gfp, void (*destroy)(void *))
{
        unsigned long flags;
        void ***queues;
        int i;

        queues = kmalloc_array(nrings, sizeof(*queues), gfp);
        if (!queues)
                goto noqueues;

        for (i = 0; i < nrings; ++i) {
                queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
                if (!queues[i])
                        goto nomem;
        }

        for (i = 0; i < nrings; ++i) {
                spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
                spin_lock(&(rings[i])->producer_lock);
                queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
                                                  size, gfp, destroy);
                spin_unlock(&(rings[i])->producer_lock);
                spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
        }

        for (i = 0; i < nrings; ++i)
                kvfree(queues[i]);

        kfree(queues);

        return 0;

nomem:
        while (--i >= 0)
                kvfree(queues[i]);

        kfree(queues);

noqueues:
        return -ENOMEM;
}

static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
{
        void *ptr;

        if (destroy)
                while ((ptr = ptr_ring_consume(r)))
                        destroy(ptr);
        kvfree(r->queue);
}

#endif /* _LINUX_PTR_RING_H  */

























































































































































































































































   90 






   91 

   91 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MMU_CONTEXT_H
#define _ASM_X86_MMU_CONTEXT_H

#include <asm/desc.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/pkeys.h>

#include <trace/events/tlb.h>

#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/debugreg.h>
#include <asm/gsseg.h>

extern atomic64_t last_mm_ctx_id;

#ifdef CONFIG_PERF_EVENTS
DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
void cr4_update_pce(void *ignored);
#endif

#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
 * ldt_structs can be allocated, used, and freed, but they are never
 * modified while live.
 */
struct ldt_struct {
        /*
         * Xen requires page-aligned LDTs with special permissions.  This is
         * needed to prevent us from installing evil descriptors such as
         * call gates.  On native, we could merge the ldt_struct and LDT
         * allocations, but it's not worth trying to optimize.
         */
        struct desc_struct        *entries;
        unsigned int                nr_entries;

        /*
         * If PTI is in use, then the entries array is not mapped while we're
         * in user mode.  The whole array will be aliased at the addressed
         * given by ldt_slot_va(slot).  We use two slots so that we can allocate
         * and map, and enable a new LDT without invalidating the mapping
         * of an older, still-in-use LDT.
         *
         * slot will be -1 if this LDT doesn't have an alias mapping.
         */
        int                        slot;
};

/*
 * Used for LDT copy/destruction.
 */
static inline void init_new_context_ldt(struct mm_struct *mm)
{
        mm->context.ldt = NULL;
        init_rwsem(&mm->context.ldt_usr_sem);
}
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
void ldt_arch_exit_mmap(struct mm_struct *mm);
#else        /* CONFIG_MODIFY_LDT_SYSCALL */
static inline void init_new_context_ldt(struct mm_struct *mm) { }
static inline int ldt_dup_context(struct mm_struct *oldmm,
                                  struct mm_struct *mm)
{
        return 0;
}
static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif

#ifdef CONFIG_MODIFY_LDT_SYSCALL
extern void load_mm_ldt(struct mm_struct *mm);
extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
static inline void load_mm_ldt(struct mm_struct *mm)
{
        clear_LDT();
}
static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
        DEBUG_LOCKS_WARN_ON(preemptible());
}
#endif

#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
        return mm->context.lam_cr3_mask;
}

static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
        mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
        mm->context.untag_mask = oldmm->context.untag_mask;
}

#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
        return mm->context.untag_mask;
}

static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
        mm->context.untag_mask = -1UL;
}

#define arch_pgtable_dma_compat arch_pgtable_dma_compat
static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
{
        return !mm_lam_cr3_mask(mm) ||
                test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
}
#else

static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
        return 0;
}

static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
}

static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
}
#endif

#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

/*
 * Init a new mm.  Used on mm copies, like at fork()
 * and on mm's that are brand-new, like at execve().
 */
#define init_new_context init_new_context
static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
{
        mutex_init(&mm->context.lock);

        mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
        atomic64_set(&mm->context.tlb_gen, 0);

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
                /* pkey 0 is the default and allocated implicitly */
                mm->context.pkey_allocation_map = 0x1;
                /* -1 means unallocated or invalid */
                mm->context.execute_only_pkey = -1;
        }
#endif
        mm_reset_untag_mask(mm);
        init_new_context_ldt(mm);
        return 0;
}

#define destroy_context destroy_context
static inline void destroy_context(struct mm_struct *mm)
{
        destroy_context_ldt(mm);
}

extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                      struct task_struct *tsk);

extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                               struct task_struct *tsk);
#define switch_mm_irqs_off switch_mm_irqs_off

#define activate_mm(prev, next)                        \
do {                                                \
        paravirt_enter_mmap(next);                \
        switch_mm((prev), (next), NULL);        \
} while (0);

#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm)                        \
do {                                                \
        loadsegment(gs, 0);                        \
} while (0)
#else
#define deactivate_mm(tsk, mm)                        \
do {                                                \
        shstk_free(tsk);                        \
        load_gs_index(0);                        \
        loadsegment(fs, 0);                        \
} while (0)
#endif

static inline void arch_dup_pkeys(struct mm_struct *oldmm,
                                  struct mm_struct *mm)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return;

        /* Duplicate the oldmm pkey state in mm: */
        mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
        mm->context.execute_only_pkey   = oldmm->context.execute_only_pkey;
#endif
}

static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
        arch_dup_pkeys(oldmm, mm);
        paravirt_enter_mmap(mm);
        dup_lam(oldmm, mm);
        return ldt_dup_context(oldmm, mm);
}

static inline void arch_exit_mmap(struct mm_struct *mm)
{
        paravirt_arch_exit_mmap(mm);
        ldt_arch_exit_mmap(mm);
}

#ifdef CONFIG_X86_64
static inline bool is_64bit_mm(struct mm_struct *mm)
{
        return        !IS_ENABLED(CONFIG_IA32_EMULATION) ||
                !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)
{
        return false;
}
#endif

static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
                              unsigned long end)
{
}

/*
 * We only want to enforce protection keys on the current process
 * because we effectively have no access to PKRU for other
 * processes or any way to tell *which * PKRU in a threaded
 * process we could use.
 *
 * So do not enforce things if the VMA is not from the current
 * mm, or if we are in a kernel thread.
 */
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
                bool write, bool execute, bool foreign)
{
        /* pkeys never affect instruction fetches */
        if (execute)
                return true;
        /* allow access if the VMA is not one from this process */
        if (foreign || vma_is_foreign(vma))
                return true;
        return __pkru_allows_pkey(vma_pkey(vma), write);
}

unsigned long __get_current_cr3_fast(void);

#include <asm-generic/mmu_context.h>

#endif /* _ASM_X86_MMU_CONTEXT_H */

























    6 

  254 


















  254 
  255 



























  256 



  256 
  254 













    1 

  255 
  255 















  257 
  257 
  256 















    9 
    9 
    9 
    9 




    9 





   13 
   13 



    6 

    6 
    6 
    6 
    6 
    6 













    4 


    4 



    4 






    4 
    4 






















  256 


  256 




































































































































































  263 

  263 




  263 
  263 






  263 



  263 








  250 




  249 
  249 
  249 


  250 






  249 



  246 










  251 

   17 










  245 





















    2 
    1 

    2 








  256 









  256 








  256 









  243 



  248 






  256 


  250 











  244 



  240 



  240 









  240 





    1 
    1 














  240 


  263 























































































































   85 
   77 

   42 








    4 

   41 
































   41 
   25 

   25 







   24 


   17 
   25 



    1 

    1 


    1 









































   11 

    1 


    1 


    1 
    1 
    1 

   11 

   11 























    4 












    4 

    4 






























































































    2 




















    2 

    2 

    2 
    2 



    2 



















    2 





    2 

    2 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
// SPDX-License-Identifier: GPL-2.0
/*
 * Released under the GPLv2 only.
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/log2.h>
#include <linux/kmsan.h>
#include <linux/usb.h>
#include <linux/wait.h>
#include <linux/usb/hcd.h>
#include <linux/scatterlist.h>

#define to_urb(d) container_of(d, struct urb, kref)


static void urb_destroy(struct kref *kref)
{
        struct urb *urb = to_urb(kref);

        if (urb->transfer_flags & URB_FREE_BUFFER)
                kfree(urb->transfer_buffer);

        kfree(urb);
}

/**
 * usb_init_urb - initializes a urb so that it can be used by a USB driver
 * @urb: pointer to the urb to initialize
 *
 * Initializes a urb so that the USB subsystem can use it properly.
 *
 * If a urb is created with a call to usb_alloc_urb() it is not
 * necessary to call this function.  Only use this if you allocate the
 * space for a struct urb on your own.  If you call this function, be
 * careful when freeing the memory for your urb that it is no longer in
 * use by the USB core.
 *
 * Only use this function if you _really_ understand what you are doing.
 */
void usb_init_urb(struct urb *urb)
{
        if (urb) {
                memset(urb, 0, sizeof(*urb));
                kref_init(&urb->kref);
                INIT_LIST_HEAD(&urb->urb_list);
                INIT_LIST_HEAD(&urb->anchor_list);
        }
}
EXPORT_SYMBOL_GPL(usb_init_urb);

/**
 * usb_alloc_urb - creates a new urb for a USB driver to use
 * @iso_packets: number of iso packets for this urb
 * @mem_flags: the type of memory to allocate, see kmalloc() for a list of
 *        valid options for this.
 *
 * Creates an urb for the USB driver to use, initializes a few internal
 * structures, increments the usage counter, and returns a pointer to it.
 *
 * If the driver want to use this urb for interrupt, control, or bulk
 * endpoints, pass '0' as the number of iso packets.
 *
 * The driver must call usb_free_urb() when it is finished with the urb.
 *
 * Return: A pointer to the new urb, or %NULL if no memory is available.
 */
struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags)
{
        struct urb *urb;

        urb = kmalloc(struct_size(urb, iso_frame_desc, iso_packets),
                      mem_flags);
        if (!urb)
                return NULL;
        usb_init_urb(urb);
        return urb;
}
EXPORT_SYMBOL_GPL(usb_alloc_urb);

/**
 * usb_free_urb - frees the memory used by a urb when all users of it are finished
 * @urb: pointer to the urb to free, may be NULL
 *
 * Must be called when a user of a urb is finished with it.  When the last user
 * of the urb calls this function, the memory of the urb is freed.
 *
 * Note: The transfer buffer associated with the urb is not freed unless the
 * URB_FREE_BUFFER transfer flag is set.
 */
void usb_free_urb(struct urb *urb)
{
        if (urb)
                kref_put(&urb->kref, urb_destroy);
}
EXPORT_SYMBOL_GPL(usb_free_urb);

/**
 * usb_get_urb - increments the reference count of the urb
 * @urb: pointer to the urb to modify, may be NULL
 *
 * This must be  called whenever a urb is transferred from a device driver to a
 * host controller driver.  This allows proper reference counting to happen
 * for urbs.
 *
 * Return: A pointer to the urb with the incremented reference counter.
 */
struct urb *usb_get_urb(struct urb *urb)
{
        if (urb)
                kref_get(&urb->kref);
        return urb;
}
EXPORT_SYMBOL_GPL(usb_get_urb);

/**
 * usb_anchor_urb - anchors an URB while it is processed
 * @urb: pointer to the urb to anchor
 * @anchor: pointer to the anchor
 *
 * This can be called to have access to URBs which are to be executed
 * without bothering to track them
 */
void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor)
{
        unsigned long flags;

        spin_lock_irqsave(&anchor->lock, flags);
        usb_get_urb(urb);
        list_add_tail(&urb->anchor_list, &anchor->urb_list);
        urb->anchor = anchor;

        if (unlikely(anchor->poisoned))
                atomic_inc(&urb->reject);

        spin_unlock_irqrestore(&anchor->lock, flags);
}
EXPORT_SYMBOL_GPL(usb_anchor_urb);

static int usb_anchor_check_wakeup(struct usb_anchor *anchor)
{
        return atomic_read(&anchor->suspend_wakeups) == 0 &&
                list_empty(&anchor->urb_list);
}

/* Callers must hold anchor->lock */
static void __usb_unanchor_urb(struct urb *urb, struct usb_anchor *anchor)
{
        urb->anchor = NULL;
        list_del(&urb->anchor_list);
        usb_put_urb(urb);
        if (usb_anchor_check_wakeup(anchor))
                wake_up(&anchor->wait);
}

/**
 * usb_unanchor_urb - unanchors an URB
 * @urb: pointer to the urb to anchor
 *
 * Call this to stop the system keeping track of this URB
 */
void usb_unanchor_urb(struct urb *urb)
{
        unsigned long flags;
        struct usb_anchor *anchor;

        if (!urb)
                return;

        anchor = urb->anchor;
        if (!anchor)
                return;

        spin_lock_irqsave(&anchor->lock, flags);
        /*
         * At this point, we could be competing with another thread which
         * has the same intention. To protect the urb from being unanchored
         * twice, only the winner of the race gets the job.
         */
        if (likely(anchor == urb->anchor))
                __usb_unanchor_urb(urb, anchor);
        spin_unlock_irqrestore(&anchor->lock, flags);
}
EXPORT_SYMBOL_GPL(usb_unanchor_urb);

/*-------------------------------------------------------------------*/

static const int pipetypes[4] = {
        PIPE_CONTROL, PIPE_ISOCHRONOUS, PIPE_BULK, PIPE_INTERRUPT
};

/**
 * usb_pipe_type_check - sanity check of a specific pipe for a usb device
 * @dev: struct usb_device to be checked
 * @pipe: pipe to check
 *
 * This performs a light-weight sanity check for the endpoint in the
 * given usb device.  It returns 0 if the pipe is valid for the specific usb
 * device, otherwise a negative error code.
 */
int usb_pipe_type_check(struct usb_device *dev, unsigned int pipe)
{
        const struct usb_host_endpoint *ep;

        ep = usb_pipe_endpoint(dev, pipe);
        if (!ep)
                return -EINVAL;
        if (usb_pipetype(pipe) != pipetypes[usb_endpoint_type(&ep->desc)])
                return -EINVAL;
        return 0;
}
EXPORT_SYMBOL_GPL(usb_pipe_type_check);

/**
 * usb_urb_ep_type_check - sanity check of endpoint in the given urb
 * @urb: urb to be checked
 *
 * This performs a light-weight sanity check for the endpoint in the
 * given urb.  It returns 0 if the urb contains a valid endpoint, otherwise
 * a negative error code.
 */
int usb_urb_ep_type_check(const struct urb *urb)
{
        return usb_pipe_type_check(urb->dev, urb->pipe);
}
EXPORT_SYMBOL_GPL(usb_urb_ep_type_check);

/**
 * usb_submit_urb - issue an asynchronous transfer request for an endpoint
 * @urb: pointer to the urb describing the request
 * @mem_flags: the type of memory to allocate, see kmalloc() for a list
 *        of valid options for this.
 *
 * This submits a transfer request, and transfers control of the URB
 * describing that request to the USB subsystem.  Request completion will
 * be indicated later, asynchronously, by calling the completion handler.
 * The three types of completion are success, error, and unlink
 * (a software-induced fault, also called "request cancellation").
 *
 * URBs may be submitted in interrupt context.
 *
 * The caller must have correctly initialized the URB before submitting
 * it.  Functions such as usb_fill_bulk_urb() and usb_fill_control_urb() are
 * available to ensure that most fields are correctly initialized, for
 * the particular kind of transfer, although they will not initialize
 * any transfer flags.
 *
 * If the submission is successful, the complete() callback from the URB
 * will be called exactly once, when the USB core and Host Controller Driver
 * (HCD) are finished with the URB.  When the completion function is called,
 * control of the URB is returned to the device driver which issued the
 * request.  The completion handler may then immediately free or reuse that
 * URB.
 *
 * With few exceptions, USB device drivers should never access URB fields
 * provided by usbcore or the HCD until its complete() is called.
 * The exceptions relate to periodic transfer scheduling.  For both
 * interrupt and isochronous urbs, as part of successful URB submission
 * urb->interval is modified to reflect the actual transfer period used
 * (normally some power of two units).  And for isochronous urbs,
 * urb->start_frame is modified to reflect when the URB's transfers were
 * scheduled to start.
 *
 * Not all isochronous transfer scheduling policies will work, but most
 * host controller drivers should easily handle ISO queues going from now
 * until 10-200 msec into the future.  Drivers should try to keep at
 * least one or two msec of data in the queue; many controllers require
 * that new transfers start at least 1 msec in the future when they are
 * added.  If the driver is unable to keep up and the queue empties out,
 * the behavior for new submissions is governed by the URB_ISO_ASAP flag.
 * If the flag is set, or if the queue is idle, then the URB is always
 * assigned to the first available (and not yet expired) slot in the
 * endpoint's schedule.  If the flag is not set and the queue is active
 * then the URB is always assigned to the next slot in the schedule
 * following the end of the endpoint's previous URB, even if that slot is
 * in the past.  When a packet is assigned in this way to a slot that has
 * already expired, the packet is not transmitted and the corresponding
 * usb_iso_packet_descriptor's status field will return -EXDEV.  If this
 * would happen to all the packets in the URB, submission fails with a
 * -EXDEV error code.
 *
 * For control endpoints, the synchronous usb_control_msg() call is
 * often used (in non-interrupt context) instead of this call.
 * That is often used through convenience wrappers, for the requests
 * that are standardized in the USB 2.0 specification.  For bulk
 * endpoints, a synchronous usb_bulk_msg() call is available.
 *
 * Return:
 * 0 on successful submissions. A negative error number otherwise.
 *
 * Request Queuing:
 *
 * URBs may be submitted to endpoints before previous ones complete, to
 * minimize the impact of interrupt latencies and system overhead on data
 * throughput.  With that queuing policy, an endpoint's queue would never
 * be empty.  This is required for continuous isochronous data streams,
 * and may also be required for some kinds of interrupt transfers. Such
 * queuing also maximizes bandwidth utilization by letting USB controllers
 * start work on later requests before driver software has finished the
 * completion processing for earlier (successful) requests.
 *
 * As of Linux 2.6, all USB endpoint transfer queues support depths greater
 * than one.  This was previously a HCD-specific behavior, except for ISO
 * transfers.  Non-isochronous endpoint queues are inactive during cleanup
 * after faults (transfer errors or cancellation).
 *
 * Reserved Bandwidth Transfers:
 *
 * Periodic transfers (interrupt or isochronous) are performed repeatedly,
 * using the interval specified in the urb.  Submitting the first urb to
 * the endpoint reserves the bandwidth necessary to make those transfers.
 * If the USB subsystem can't allocate sufficient bandwidth to perform
 * the periodic request, submitting such a periodic request should fail.
 *
 * For devices under xHCI, the bandwidth is reserved at configuration time, or
 * when the alt setting is selected.  If there is not enough bus bandwidth, the
 * configuration/alt setting request will fail.  Therefore, submissions to
 * periodic endpoints on devices under xHCI should never fail due to bandwidth
 * constraints.
 *
 * Device drivers must explicitly request that repetition, by ensuring that
 * some URB is always on the endpoint's queue (except possibly for short
 * periods during completion callbacks).  When there is no longer an urb
 * queued, the endpoint's bandwidth reservation is canceled.  This means
 * drivers can use their completion handlers to ensure they keep bandwidth
 * they need, by reinitializing and resubmitting the just-completed urb
 * until the driver longer needs that periodic bandwidth.
 *
 * Memory Flags:
 *
 * The general rules for how to decide which mem_flags to use
 * are the same as for kmalloc.  There are four
 * different possible values; GFP_KERNEL, GFP_NOFS, GFP_NOIO and
 * GFP_ATOMIC.
 *
 * GFP_NOFS is not ever used, as it has not been implemented yet.
 *
 * GFP_ATOMIC is used when
 *   (a) you are inside a completion handler, an interrupt, bottom half,
 *       tasklet or timer, or
 *   (b) you are holding a spinlock or rwlock (does not apply to
 *       semaphores), or
 *   (c) current->state != TASK_RUNNING, this is the case only after
 *       you've changed it.
 *
 * GFP_NOIO is used in the block io path and error handling of storage
 * devices.
 *
 * All other situations use GFP_KERNEL.
 *
 * Some more specific rules for mem_flags can be inferred, such as
 *  (1) start_xmit, timeout, and receive methods of network drivers must
 *      use GFP_ATOMIC (they are called with a spinlock held);
 *  (2) queuecommand methods of scsi drivers must use GFP_ATOMIC (also
 *      called with a spinlock held);
 *  (3) If you use a kernel thread with a network driver you must use
 *      GFP_NOIO, unless (b) or (c) apply;
 *  (4) after you have done a down() you can use GFP_KERNEL, unless (b) or (c)
 *      apply or your are in a storage driver's block io path;
 *  (5) USB probe and disconnect can use GFP_KERNEL unless (b) or (c) apply; and
 *  (6) changing firmware on a running storage or net device uses
 *      GFP_NOIO, unless b) or c) apply
 *
 */
int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
{
        int                                xfertype, max;
        struct usb_device                *dev;
        struct usb_host_endpoint        *ep;
        int                                is_out;
        unsigned int                        allowed;

        if (!urb || !urb->complete)
                return -EINVAL;
        if (urb->hcpriv) {
                WARN_ONCE(1, "URB %pK submitted while active\n", urb);
                return -EBUSY;
        }

        dev = urb->dev;
        if ((!dev) || (dev->state < USB_STATE_UNAUTHENTICATED))
                return -ENODEV;

        /* For now, get the endpoint from the pipe.  Eventually drivers
         * will be required to set urb->ep directly and we will eliminate
         * urb->pipe.
         */
        ep = usb_pipe_endpoint(dev, urb->pipe);
        if (!ep)
                return -ENOENT;

        urb->ep = ep;
        urb->status = -EINPROGRESS;
        urb->actual_length = 0;

        /* Lots of sanity checks, so HCDs can rely on clean data
         * and don't need to duplicate tests
         */
        xfertype = usb_endpoint_type(&ep->desc);
        if (xfertype == USB_ENDPOINT_XFER_CONTROL) {
                struct usb_ctrlrequest *setup =
                                (struct usb_ctrlrequest *) urb->setup_packet;

                if (!setup)
                        return -ENOEXEC;
                is_out = !(setup->bRequestType & USB_DIR_IN) ||
                                !setup->wLength;
                dev_WARN_ONCE(&dev->dev, (usb_pipeout(urb->pipe) != is_out),
                                "BOGUS control dir, pipe %x doesn't match bRequestType %x\n",
                                urb->pipe, setup->bRequestType);
                if (le16_to_cpu(setup->wLength) != urb->transfer_buffer_length) {
                        dev_dbg(&dev->dev, "BOGUS control len %d doesn't match transfer length %d\n",
                                        le16_to_cpu(setup->wLength),
                                        urb->transfer_buffer_length);
                        return -EBADR;
                }
        } else {
                is_out = usb_endpoint_dir_out(&ep->desc);
        }

        /* Clear the internal flags and cache the direction for later use */
        urb->transfer_flags &= ~(URB_DIR_MASK | URB_DMA_MAP_SINGLE |
                        URB_DMA_MAP_PAGE | URB_DMA_MAP_SG | URB_MAP_LOCAL |
                        URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
                        URB_DMA_SG_COMBINED);
        urb->transfer_flags |= (is_out ? URB_DIR_OUT : URB_DIR_IN);
        kmsan_handle_urb(urb, is_out);

        if (xfertype != USB_ENDPOINT_XFER_CONTROL &&
                        dev->state < USB_STATE_CONFIGURED)
                return -ENODEV;

        max = usb_endpoint_maxp(&ep->desc);
        if (max <= 0) {
                dev_dbg(&dev->dev,
                        "bogus endpoint ep%d%s in %s (bad maxpacket %d)\n",
                        usb_endpoint_num(&ep->desc), is_out ? "out" : "in",
                        __func__, max);
                return -EMSGSIZE;
        }

        /* periodic transfers limit size per frame/uframe,
         * but drivers only control those sizes for ISO.
         * while we're checking, initialize return status.
         */
        if (xfertype == USB_ENDPOINT_XFER_ISOC) {
                int        n, len;

                /* SuperSpeed isoc endpoints have up to 16 bursts of up to
                 * 3 packets each
                 */
                if (dev->speed >= USB_SPEED_SUPER) {
                        int     burst = 1 + ep->ss_ep_comp.bMaxBurst;
                        int     mult = USB_SS_MULT(ep->ss_ep_comp.bmAttributes);
                        max *= burst;
                        max *= mult;
                }

                if (dev->speed == USB_SPEED_SUPER_PLUS &&
                    USB_SS_SSP_ISOC_COMP(ep->ss_ep_comp.bmAttributes)) {
                        struct usb_ssp_isoc_ep_comp_descriptor *isoc_ep_comp;

                        isoc_ep_comp = &ep->ssp_isoc_ep_comp;
                        max = le32_to_cpu(isoc_ep_comp->dwBytesPerInterval);
                }

                /* "high bandwidth" mode, 1-3 packets/uframe? */
                if (dev->speed == USB_SPEED_HIGH)
                        max *= usb_endpoint_maxp_mult(&ep->desc);

                if (urb->number_of_packets <= 0)
                        return -EINVAL;
                for (n = 0; n < urb->number_of_packets; n++) {
                        len = urb->iso_frame_desc[n].length;
                        if (len < 0 || len > max)
                                return -EMSGSIZE;
                        urb->iso_frame_desc[n].status = -EXDEV;
                        urb->iso_frame_desc[n].actual_length = 0;
                }
        } else if (urb->num_sgs && !urb->dev->bus->no_sg_constraint) {
                struct scatterlist *sg;
                int i;

                for_each_sg(urb->sg, sg, urb->num_sgs - 1, i)
                        if (sg->length % max)
                                return -EINVAL;
        }

        /* the I/O buffer must be mapped/unmapped, except when length=0 */
        if (urb->transfer_buffer_length > INT_MAX)
                return -EMSGSIZE;

        /*
         * stuff that drivers shouldn't do, but which shouldn't
         * cause problems in HCDs if they get it wrong.
         */

        /* Check that the pipe's type matches the endpoint's type */
        if (usb_pipe_type_check(urb->dev, urb->pipe))
                dev_WARN(&dev->dev, "BOGUS urb xfer, pipe %x != type %x\n",
                        usb_pipetype(urb->pipe), pipetypes[xfertype]);

        /* Check against a simple/standard policy */
        allowed = (URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT | URB_DIR_MASK |
                        URB_FREE_BUFFER);
        switch (xfertype) {
        case USB_ENDPOINT_XFER_BULK:
        case USB_ENDPOINT_XFER_INT:
                if (is_out)
                        allowed |= URB_ZERO_PACKET;
                fallthrough;
        default:                        /* all non-iso endpoints */
                if (!is_out)
                        allowed |= URB_SHORT_NOT_OK;
                break;
        case USB_ENDPOINT_XFER_ISOC:
                allowed |= URB_ISO_ASAP;
                break;
        }
        allowed &= urb->transfer_flags;

        /* warn if submitter gave bogus flags */
        if (allowed != urb->transfer_flags)
                dev_WARN(&dev->dev, "BOGUS urb flags, %x --> %x\n",
                        urb->transfer_flags, allowed);

        /*
         * Force periodic transfer intervals to be legal values that are
         * a power of two (so HCDs don't need to).
         *
         * FIXME want bus->{intr,iso}_sched_horizon values here.  Each HC
         * supports different values... this uses EHCI/UHCI defaults (and
         * EHCI can use smaller non-default values).
         */
        switch (xfertype) {
        case USB_ENDPOINT_XFER_ISOC:
        case USB_ENDPOINT_XFER_INT:
                /* too small? */
                if (urb->interval <= 0)
                        return -EINVAL;

                /* too big? */
                switch (dev->speed) {
                case USB_SPEED_SUPER_PLUS:
                case USB_SPEED_SUPER:        /* units are 125us */
                        /* Handle up to 2^(16-1) microframes */
                        if (urb->interval > (1 << 15))
                                return -EINVAL;
                        max = 1 << 15;
                        break;
                case USB_SPEED_HIGH:        /* units are microframes */
                        /* NOTE usb handles 2^15 */
                        if (urb->interval > (1024 * 8))
                                urb->interval = 1024 * 8;
                        max = 1024 * 8;
                        break;
                case USB_SPEED_FULL:        /* units are frames/msec */
                case USB_SPEED_LOW:
                        if (xfertype == USB_ENDPOINT_XFER_INT) {
                                if (urb->interval > 255)
                                        return -EINVAL;
                                /* NOTE ohci only handles up to 32 */
                                max = 128;
                        } else {
                                if (urb->interval > 1024)
                                        urb->interval = 1024;
                                /* NOTE usb and ohci handle up to 2^15 */
                                max = 1024;
                        }
                        break;
                default:
                        return -EINVAL;
                }
                /* Round down to a power of 2, no more than max */
                urb->interval = min(max, 1 << ilog2(urb->interval));
        }

        return usb_hcd_submit_urb(urb, mem_flags);
}
EXPORT_SYMBOL_GPL(usb_submit_urb);

/*-------------------------------------------------------------------*/

/**
 * usb_unlink_urb - abort/cancel a transfer request for an endpoint
 * @urb: pointer to urb describing a previously submitted request,
 *        may be NULL
 *
 * This routine cancels an in-progress request.  URBs complete only once
 * per submission, and may be canceled only once per submission.
 * Successful cancellation means termination of @urb will be expedited
 * and the completion handler will be called with a status code
 * indicating that the request has been canceled (rather than any other
 * code).
 *
 * Drivers should not call this routine or related routines, such as
 * usb_kill_urb() or usb_unlink_anchored_urbs(), after their disconnect
 * method has returned.  The disconnect function should synchronize with
 * a driver's I/O routines to insure that all URB-related activity has
 * completed before it returns.
 *
 * This request is asynchronous, however the HCD might call the ->complete()
 * callback during unlink. Therefore when drivers call usb_unlink_urb(), they
 * must not hold any locks that may be taken by the completion function.
 * Success is indicated by returning -EINPROGRESS, at which time the URB will
 * probably not yet have been given back to the device driver. When it is
 * eventually called, the completion function will see @urb->status ==
 * -ECONNRESET.
 * Failure is indicated by usb_unlink_urb() returning any other value.
 * Unlinking will fail when @urb is not currently "linked" (i.e., it was
 * never submitted, or it was unlinked before, or the hardware is already
 * finished with it), even if the completion handler has not yet run.
 *
 * The URB must not be deallocated while this routine is running.  In
 * particular, when a driver calls this routine, it must insure that the
 * completion handler cannot deallocate the URB.
 *
 * Return: -EINPROGRESS on success. See description for other values on
 * failure.
 *
 * Unlinking and Endpoint Queues:
 *
 * [The behaviors and guarantees described below do not apply to virtual
 * root hubs but only to endpoint queues for physical USB devices.]
 *
 * Host Controller Drivers (HCDs) place all the URBs for a particular
 * endpoint in a queue.  Normally the queue advances as the controller
 * hardware processes each request.  But when an URB terminates with an
 * error its queue generally stops (see below), at least until that URB's
 * completion routine returns.  It is guaranteed that a stopped queue
 * will not restart until all its unlinked URBs have been fully retired,
 * with their completion routines run, even if that's not until some time
 * after the original completion handler returns.  The same behavior and
 * guarantee apply when an URB terminates because it was unlinked.
 *
 * Bulk and interrupt endpoint queues are guaranteed to stop whenever an
 * URB terminates with any sort of error, including -ECONNRESET, -ENOENT,
 * and -EREMOTEIO.  Control endpoint queues behave the same way except
 * that they are not guaranteed to stop for -EREMOTEIO errors.  Queues
 * for isochronous endpoints are treated differently, because they must
 * advance at fixed rates.  Such queues do not stop when an URB
 * encounters an error or is unlinked.  An unlinked isochronous URB may
 * leave a gap in the stream of packets; it is undefined whether such
 * gaps can be filled in.
 *
 * Note that early termination of an URB because a short packet was
 * received will generate a -EREMOTEIO error if and only if the
 * URB_SHORT_NOT_OK flag is set.  By setting this flag, USB device
 * drivers can build deep queues for large or complex bulk transfers
 * and clean them up reliably after any sort of aborted transfer by
 * unlinking all pending URBs at the first fault.
 *
 * When a control URB terminates with an error other than -EREMOTEIO, it
 * is quite likely that the status stage of the transfer will not take
 * place.
 */
int usb_unlink_urb(struct urb *urb)
{
        if (!urb)
                return -EINVAL;
        if (!urb->dev)
                return -ENODEV;
        if (!urb->ep)
                return -EIDRM;
        return usb_hcd_unlink_urb(urb, -ECONNRESET);
}
EXPORT_SYMBOL_GPL(usb_unlink_urb);

/**
 * usb_kill_urb - cancel a transfer request and wait for it to finish
 * @urb: pointer to URB describing a previously submitted request,
 *        may be NULL
 *
 * This routine cancels an in-progress request.  It is guaranteed that
 * upon return all completion handlers will have finished and the URB
 * will be totally idle and available for reuse.  These features make
 * this an ideal way to stop I/O in a disconnect() callback or close()
 * function.  If the request has not already finished or been unlinked
 * the completion handler will see urb->status == -ENOENT.
 *
 * While the routine is running, attempts to resubmit the URB will fail
 * with error -EPERM.  Thus even if the URB's completion handler always
 * tries to resubmit, it will not succeed and the URB will become idle.
 *
 * The URB must not be deallocated while this routine is running.  In
 * particular, when a driver calls this routine, it must insure that the
 * completion handler cannot deallocate the URB.
 *
 * This routine may not be used in an interrupt context (such as a bottom
 * half or a completion handler), or when holding a spinlock, or in other
 * situations where the caller can't schedule().
 *
 * This routine should not be called by a driver after its disconnect
 * method has returned.
 */
void usb_kill_urb(struct urb *urb)
{
        might_sleep();
        if (!(urb && urb->dev && urb->ep))
                return;
        atomic_inc(&urb->reject);
        /*
         * Order the write of urb->reject above before the read
         * of urb->use_count below.  Pairs with the barriers in
         * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
         */
        smp_mb__after_atomic();

        usb_hcd_unlink_urb(urb, -ENOENT);
        wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);

        atomic_dec(&urb->reject);
}
EXPORT_SYMBOL_GPL(usb_kill_urb);

/**
 * usb_poison_urb - reliably kill a transfer and prevent further use of an URB
 * @urb: pointer to URB describing a previously submitted request,
 *        may be NULL
 *
 * This routine cancels an in-progress request.  It is guaranteed that
 * upon return all completion handlers will have finished and the URB
 * will be totally idle and cannot be reused.  These features make
 * this an ideal way to stop I/O in a disconnect() callback.
 * If the request has not already finished or been unlinked
 * the completion handler will see urb->status == -ENOENT.
 *
 * After and while the routine runs, attempts to resubmit the URB will fail
 * with error -EPERM.  Thus even if the URB's completion handler always
 * tries to resubmit, it will not succeed and the URB will become idle.
 *
 * The URB must not be deallocated while this routine is running.  In
 * particular, when a driver calls this routine, it must insure that the
 * completion handler cannot deallocate the URB.
 *
 * This routine may not be used in an interrupt context (such as a bottom
 * half or a completion handler), or when holding a spinlock, or in other
 * situations where the caller can't schedule().
 *
 * This routine should not be called by a driver after its disconnect
 * method has returned.
 */
void usb_poison_urb(struct urb *urb)
{
        might_sleep();
        if (!urb)
                return;
        atomic_inc(&urb->reject);
        /*
         * Order the write of urb->reject above before the read
         * of urb->use_count below.  Pairs with the barriers in
         * __usb_hcd_giveback_urb() and usb_hcd_submit_urb().
         */
        smp_mb__after_atomic();

        if (!urb->dev || !urb->ep)
                return;

        usb_hcd_unlink_urb(urb, -ENOENT);
        wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
}
EXPORT_SYMBOL_GPL(usb_poison_urb);

void usb_unpoison_urb(struct urb *urb)
{
        if (!urb)
                return;

        atomic_dec(&urb->reject);
}
EXPORT_SYMBOL_GPL(usb_unpoison_urb);

/**
 * usb_block_urb - reliably prevent further use of an URB
 * @urb: pointer to URB to be blocked, may be NULL
 *
 * After the routine has run, attempts to resubmit the URB will fail
 * with error -EPERM.  Thus even if the URB's completion handler always
 * tries to resubmit, it will not succeed and the URB will become idle.
 *
 * The URB must not be deallocated while this routine is running.  In
 * particular, when a driver calls this routine, it must insure that the
 * completion handler cannot deallocate the URB.
 */
void usb_block_urb(struct urb *urb)
{
        if (!urb)
                return;

        atomic_inc(&urb->reject);
}
EXPORT_SYMBOL_GPL(usb_block_urb);

/**
 * usb_kill_anchored_urbs - kill all URBs associated with an anchor
 * @anchor: anchor the requests are bound to
 *
 * This kills all outstanding URBs starting from the back of the queue,
 * with guarantee that no completer callbacks will take place from the
 * anchor after this function returns.
 *
 * This routine should not be called by a driver after its disconnect
 * method has returned.
 */
void usb_kill_anchored_urbs(struct usb_anchor *anchor)
{
        struct urb *victim;
        int surely_empty;

        do {
                spin_lock_irq(&anchor->lock);
                while (!list_empty(&anchor->urb_list)) {
                        victim = list_entry(anchor->urb_list.prev,
                                            struct urb, anchor_list);
                        /* make sure the URB isn't freed before we kill it */
                        usb_get_urb(victim);
                        spin_unlock_irq(&anchor->lock);
                        /* this will unanchor the URB */
                        usb_kill_urb(victim);
                        usb_put_urb(victim);
                        spin_lock_irq(&anchor->lock);
                }
                surely_empty = usb_anchor_check_wakeup(anchor);

                spin_unlock_irq(&anchor->lock);
                cpu_relax();
        } while (!surely_empty);
}
EXPORT_SYMBOL_GPL(usb_kill_anchored_urbs);


/**
 * usb_poison_anchored_urbs - cease all traffic from an anchor
 * @anchor: anchor the requests are bound to
 *
 * this allows all outstanding URBs to be poisoned starting
 * from the back of the queue. Newly added URBs will also be
 * poisoned
 *
 * This routine should not be called by a driver after its disconnect
 * method has returned.
 */
void usb_poison_anchored_urbs(struct usb_anchor *anchor)
{
        struct urb *victim;
        int surely_empty;

        do {
                spin_lock_irq(&anchor->lock);
                anchor->poisoned = 1;
                while (!list_empty(&anchor->urb_list)) {
                        victim = list_entry(anchor->urb_list.prev,
                                            struct urb, anchor_list);
                        /* make sure the URB isn't freed before we kill it */
                        usb_get_urb(victim);
                        spin_unlock_irq(&anchor->lock);
                        /* this will unanchor the URB */
                        usb_poison_urb(victim);
                        usb_put_urb(victim);
                        spin_lock_irq(&anchor->lock);
                }
                surely_empty = usb_anchor_check_wakeup(anchor);

                spin_unlock_irq(&anchor->lock);
                cpu_relax();
        } while (!surely_empty);
}
EXPORT_SYMBOL_GPL(usb_poison_anchored_urbs);

/**
 * usb_unpoison_anchored_urbs - let an anchor be used successfully again
 * @anchor: anchor the requests are bound to
 *
 * Reverses the effect of usb_poison_anchored_urbs
 * the anchor can be used normally after it returns
 */
void usb_unpoison_anchored_urbs(struct usb_anchor *anchor)
{
        unsigned long flags;
        struct urb *lazarus;

        spin_lock_irqsave(&anchor->lock, flags);
        list_for_each_entry(lazarus, &anchor->urb_list, anchor_list) {
                usb_unpoison_urb(lazarus);
        }
        anchor->poisoned = 0;
        spin_unlock_irqrestore(&anchor->lock, flags);
}
EXPORT_SYMBOL_GPL(usb_unpoison_anchored_urbs);
/**
 * usb_unlink_anchored_urbs - asynchronously cancel transfer requests en masse
 * @anchor: anchor the requests are bound to
 *
 * this allows all outstanding URBs to be unlinked starting
 * from the back of the queue. This function is asynchronous.
 * The unlinking is just triggered. It may happen after this
 * function has returned.
 *
 * This routine should not be called by a driver after its disconnect
 * method has returned.
 */
void usb_unlink_anchored_urbs(struct usb_anchor *anchor)
{
        struct urb *victim;

        while ((victim = usb_get_from_anchor(anchor)) != NULL) {
                usb_unlink_urb(victim);
                usb_put_urb(victim);
        }
}
EXPORT_SYMBOL_GPL(usb_unlink_anchored_urbs);

/**
 * usb_anchor_suspend_wakeups
 * @anchor: the anchor you want to suspend wakeups on
 *
 * Call this to stop the last urb being unanchored from waking up any
 * usb_wait_anchor_empty_timeout waiters. This is used in the hcd urb give-
 * back path to delay waking up until after the completion handler has run.
 */
void usb_anchor_suspend_wakeups(struct usb_anchor *anchor)
{
        if (anchor)
                atomic_inc(&anchor->suspend_wakeups);
}
EXPORT_SYMBOL_GPL(usb_anchor_suspend_wakeups);

/**
 * usb_anchor_resume_wakeups
 * @anchor: the anchor you want to resume wakeups on
 *
 * Allow usb_wait_anchor_empty_timeout waiters to be woken up again, and
 * wake up any current waiters if the anchor is empty.
 */
void usb_anchor_resume_wakeups(struct usb_anchor *anchor)
{
        if (!anchor)
                return;

        atomic_dec(&anchor->suspend_wakeups);
        if (usb_anchor_check_wakeup(anchor))
                wake_up(&anchor->wait);
}
EXPORT_SYMBOL_GPL(usb_anchor_resume_wakeups);

/**
 * usb_wait_anchor_empty_timeout - wait for an anchor to be unused
 * @anchor: the anchor you want to become unused
 * @timeout: how long you are willing to wait in milliseconds
 *
 * Call this is you want to be sure all an anchor's
 * URBs have finished
 *
 * Return: Non-zero if the anchor became unused. Zero on timeout.
 */
int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor,
                                  unsigned int timeout)
{
        return wait_event_timeout(anchor->wait,
                                  usb_anchor_check_wakeup(anchor),
                                  msecs_to_jiffies(timeout));
}
EXPORT_SYMBOL_GPL(usb_wait_anchor_empty_timeout);

/**
 * usb_get_from_anchor - get an anchor's oldest urb
 * @anchor: the anchor whose urb you want
 *
 * This will take the oldest urb from an anchor,
 * unanchor and return it
 *
 * Return: The oldest urb from @anchor, or %NULL if @anchor has no
 * urbs associated with it.
 */
struct urb *usb_get_from_anchor(struct usb_anchor *anchor)
{
        struct urb *victim;
        unsigned long flags;

        spin_lock_irqsave(&anchor->lock, flags);
        if (!list_empty(&anchor->urb_list)) {
                victim = list_entry(anchor->urb_list.next, struct urb,
                                    anchor_list);
                usb_get_urb(victim);
                __usb_unanchor_urb(victim, anchor);
        } else {
                victim = NULL;
        }
        spin_unlock_irqrestore(&anchor->lock, flags);

        return victim;
}

EXPORT_SYMBOL_GPL(usb_get_from_anchor);

/**
 * usb_scuttle_anchored_urbs - unanchor all an anchor's urbs
 * @anchor: the anchor whose urbs you want to unanchor
 *
 * use this to get rid of all an anchor's urbs
 */
void usb_scuttle_anchored_urbs(struct usb_anchor *anchor)
{
        struct urb *victim;
        unsigned long flags;
        int surely_empty;

        do {
                spin_lock_irqsave(&anchor->lock, flags);
                while (!list_empty(&anchor->urb_list)) {
                        victim = list_entry(anchor->urb_list.prev,
                                            struct urb, anchor_list);
                        __usb_unanchor_urb(victim, anchor);
                }
                surely_empty = usb_anchor_check_wakeup(anchor);

                spin_unlock_irqrestore(&anchor->lock, flags);
                cpu_relax();
        } while (!surely_empty);
}

EXPORT_SYMBOL_GPL(usb_scuttle_anchored_urbs);

/**
 * usb_anchor_empty - is an anchor empty
 * @anchor: the anchor you want to query
 *
 * Return: 1 if the anchor has no urbs associated with it.
 */
int usb_anchor_empty(struct usb_anchor *anchor)
{
        return list_empty(&anchor->urb_list);
}

EXPORT_SYMBOL_GPL(usb_anchor_empty);











































































































































  201 













































  207 


  206 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/endpoint.c
 *
 * (C) Copyright 2002,2004,2006 Greg Kroah-Hartman
 * (C) Copyright 2002,2004 IBM Corp.
 * (C) Copyright 2006 Novell Inc.
 *
 * Released under the GPLv2 only.
 *
 * Endpoint sysfs stuff
 */

#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include "usb.h"

struct ep_device {
        struct usb_endpoint_descriptor *desc;
        struct usb_device *udev;
        struct device dev;
};
#define to_ep_device(_dev) \
        container_of(_dev, struct ep_device, dev)

struct ep_attribute {
        struct attribute attr;
        ssize_t (*show)(struct usb_device *,
                        struct usb_endpoint_descriptor *, char *);
};
#define to_ep_attribute(_attr) \
        container_of(_attr, struct ep_attribute, attr)

#define usb_ep_attr(field, format_string)                        \
static ssize_t field##_show(struct device *dev,                        \
                               struct device_attribute *attr,        \
                               char *buf)                        \
{                                                                \
        struct ep_device *ep = to_ep_device(dev);                \
        return sprintf(buf, format_string, ep->desc->field);        \
}                                                                \
static DEVICE_ATTR_RO(field)

usb_ep_attr(bLength, "%02x\n");
usb_ep_attr(bEndpointAddress, "%02x\n");
usb_ep_attr(bmAttributes, "%02x\n");
usb_ep_attr(bInterval, "%02x\n");

static ssize_t wMaxPacketSize_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct ep_device *ep = to_ep_device(dev);
        return sprintf(buf, "%04x\n", usb_endpoint_maxp(ep->desc));
}
static DEVICE_ATTR_RO(wMaxPacketSize);

static ssize_t type_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct ep_device *ep = to_ep_device(dev);
        char *type = "unknown";

        switch (usb_endpoint_type(ep->desc)) {
        case USB_ENDPOINT_XFER_CONTROL:
                type = "Control";
                break;
        case USB_ENDPOINT_XFER_ISOC:
                type = "Isoc";
                break;
        case USB_ENDPOINT_XFER_BULK:
                type = "Bulk";
                break;
        case USB_ENDPOINT_XFER_INT:
                type = "Interrupt";
                break;
        }
        return sprintf(buf, "%s\n", type);
}
static DEVICE_ATTR_RO(type);

static ssize_t interval_show(struct device *dev, struct device_attribute *attr,
                             char *buf)
{
        struct ep_device *ep = to_ep_device(dev);
        unsigned int interval;
        char unit;

        interval = usb_decode_interval(ep->desc, ep->udev->speed);
        if (interval % 1000) {
                unit = 'u';
        } else {
                unit = 'm';
                interval /= 1000;
        }

        return sprintf(buf, "%d%cs\n", interval, unit);
}
static DEVICE_ATTR_RO(interval);

static ssize_t direction_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
{
        struct ep_device *ep = to_ep_device(dev);
        char *direction;

        if (usb_endpoint_xfer_control(ep->desc))
                direction = "both";
        else if (usb_endpoint_dir_in(ep->desc))
                direction = "in";
        else
                direction = "out";
        return sprintf(buf, "%s\n", direction);
}
static DEVICE_ATTR_RO(direction);

static struct attribute *ep_dev_attrs[] = {
        &dev_attr_bLength.attr,
        &dev_attr_bEndpointAddress.attr,
        &dev_attr_bmAttributes.attr,
        &dev_attr_bInterval.attr,
        &dev_attr_wMaxPacketSize.attr,
        &dev_attr_interval.attr,
        &dev_attr_type.attr,
        &dev_attr_direction.attr,
        NULL,
};
static const struct attribute_group ep_dev_attr_grp = {
        .attrs = ep_dev_attrs,
};
static const struct attribute_group *ep_dev_groups[] = {
        &ep_dev_attr_grp,
        NULL
};

static void ep_device_release(struct device *dev)
{
        struct ep_device *ep_dev = to_ep_device(dev);

        kfree(ep_dev);
}

const struct device_type usb_ep_device_type = {
        .name =                "usb_endpoint",
        .release = ep_device_release,
};

int usb_create_ep_devs(struct device *parent,
                        struct usb_host_endpoint *endpoint,
                        struct usb_device *udev)
{
        struct ep_device *ep_dev;
        int retval;

        ep_dev = kzalloc(sizeof(*ep_dev), GFP_KERNEL);
        if (!ep_dev) {
                retval = -ENOMEM;
                goto exit;
        }

        ep_dev->desc = &endpoint->desc;
        ep_dev->udev = udev;
        ep_dev->dev.groups = ep_dev_groups;
        ep_dev->dev.type = &usb_ep_device_type;
        ep_dev->dev.parent = parent;
        dev_set_name(&ep_dev->dev, "ep_%02x", endpoint->desc.bEndpointAddress);

        retval = device_register(&ep_dev->dev);
        if (retval)
                goto error_register;

        device_enable_async_suspend(&ep_dev->dev);
        endpoint->ep_dev = ep_dev;
        return retval;

error_register:
        put_device(&ep_dev->dev);
exit:
        return retval;
}

void usb_remove_ep_devs(struct usb_host_endpoint *endpoint)
{
        struct ep_device *ep_dev = endpoint->ep_dev;

        if (ep_dev) {
                device_unregister(&ep_dev->dev);
                endpoint->ep_dev = NULL;
        }
}






























































































































































































   11 




   11 


   11 




   11 










































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
// SPDX-License-Identifier: GPL-2.0-only
/*
 * v4l2-event.c
 *
 * V4L2 events.
 *
 * Copyright (C) 2009--2010 Nokia Corporation.
 *
 * Contact: Sakari Ailus <sakari.ailus@iki.fi>
 */

#include <media/v4l2-dev.h>
#include <media/v4l2-fh.h>
#include <media/v4l2-event.h>

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/export.h>

static unsigned int sev_pos(const struct v4l2_subscribed_event *sev, unsigned int idx)
{
        idx += sev->first;
        return idx >= sev->elems ? idx - sev->elems : idx;
}

static int __v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event)
{
        struct v4l2_kevent *kev;
        struct timespec64 ts;
        unsigned long flags;

        spin_lock_irqsave(&fh->vdev->fh_lock, flags);

        if (list_empty(&fh->available)) {
                spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
                return -ENOENT;
        }

        WARN_ON(fh->navailable == 0);

        kev = list_first_entry(&fh->available, struct v4l2_kevent, list);
        list_del(&kev->list);
        fh->navailable--;

        kev->event.pending = fh->navailable;
        *event = kev->event;
        ts = ns_to_timespec64(kev->ts);
        event->timestamp.tv_sec = ts.tv_sec;
        event->timestamp.tv_nsec = ts.tv_nsec;
        kev->sev->first = sev_pos(kev->sev, 1);
        kev->sev->in_use--;

        spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);

        return 0;
}

int v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event,
                       int nonblocking)
{
        int ret;

        if (nonblocking)
                return __v4l2_event_dequeue(fh, event);

        /* Release the vdev lock while waiting */
        if (fh->vdev->lock)
                mutex_unlock(fh->vdev->lock);

        do {
                ret = wait_event_interruptible(fh->wait,
                                               fh->navailable != 0);
                if (ret < 0)
                        break;

                ret = __v4l2_event_dequeue(fh, event);
        } while (ret == -ENOENT);

        if (fh->vdev->lock)
                mutex_lock(fh->vdev->lock);

        return ret;
}
EXPORT_SYMBOL_GPL(v4l2_event_dequeue);

/* Caller must hold fh->vdev->fh_lock! */
static struct v4l2_subscribed_event *v4l2_event_subscribed(
                struct v4l2_fh *fh, u32 type, u32 id)
{
        struct v4l2_subscribed_event *sev;

        assert_spin_locked(&fh->vdev->fh_lock);

        list_for_each_entry(sev, &fh->subscribed, list)
                if (sev->type == type && sev->id == id)
                        return sev;

        return NULL;
}

static void __v4l2_event_queue_fh(struct v4l2_fh *fh,
                                  const struct v4l2_event *ev, u64 ts)
{
        struct v4l2_subscribed_event *sev;
        struct v4l2_kevent *kev;
        bool copy_payload = true;

        /* Are we subscribed? */
        sev = v4l2_event_subscribed(fh, ev->type, ev->id);
        if (sev == NULL)
                return;

        /* Increase event sequence number on fh. */
        fh->sequence++;

        /* Do we have any free events? */
        if (sev->in_use == sev->elems) {
                /* no, remove the oldest one */
                kev = sev->events + sev_pos(sev, 0);
                list_del(&kev->list);
                sev->in_use--;
                sev->first = sev_pos(sev, 1);
                fh->navailable--;
                if (sev->elems == 1) {
                        if (sev->ops && sev->ops->replace) {
                                sev->ops->replace(&kev->event, ev);
                                copy_payload = false;
                        }
                } else if (sev->ops && sev->ops->merge) {
                        struct v4l2_kevent *second_oldest =
                                sev->events + sev_pos(sev, 0);
                        sev->ops->merge(&kev->event, &second_oldest->event);
                }
        }

        /* Take one and fill it. */
        kev = sev->events + sev_pos(sev, sev->in_use);
        kev->event.type = ev->type;
        if (copy_payload)
                kev->event.u = ev->u;
        kev->event.id = ev->id;
        kev->ts = ts;
        kev->event.sequence = fh->sequence;
        sev->in_use++;
        list_add_tail(&kev->list, &fh->available);

        fh->navailable++;

        wake_up_all(&fh->wait);
}

void v4l2_event_queue(struct video_device *vdev, const struct v4l2_event *ev)
{
        struct v4l2_fh *fh;
        unsigned long flags;
        u64 ts;

        if (vdev == NULL)
                return;

        ts = ktime_get_ns();

        spin_lock_irqsave(&vdev->fh_lock, flags);

        list_for_each_entry(fh, &vdev->fh_list, list)
                __v4l2_event_queue_fh(fh, ev, ts);

        spin_unlock_irqrestore(&vdev->fh_lock, flags);
}
EXPORT_SYMBOL_GPL(v4l2_event_queue);

void v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *ev)
{
        unsigned long flags;
        u64 ts = ktime_get_ns();

        spin_lock_irqsave(&fh->vdev->fh_lock, flags);
        __v4l2_event_queue_fh(fh, ev, ts);
        spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
}
EXPORT_SYMBOL_GPL(v4l2_event_queue_fh);

int v4l2_event_pending(struct v4l2_fh *fh)
{
        return fh->navailable;
}
EXPORT_SYMBOL_GPL(v4l2_event_pending);

void v4l2_event_wake_all(struct video_device *vdev)
{
        struct v4l2_fh *fh;
        unsigned long flags;

        if (!vdev)
                return;

        spin_lock_irqsave(&vdev->fh_lock, flags);

        list_for_each_entry(fh, &vdev->fh_list, list)
                wake_up_all(&fh->wait);

        spin_unlock_irqrestore(&vdev->fh_lock, flags);
}
EXPORT_SYMBOL_GPL(v4l2_event_wake_all);

static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev)
{
        struct v4l2_fh *fh = sev->fh;
        unsigned int i;

        lockdep_assert_held(&fh->subscribe_lock);
        assert_spin_locked(&fh->vdev->fh_lock);

        /* Remove any pending events for this subscription */
        for (i = 0; i < sev->in_use; i++) {
                list_del(&sev->events[sev_pos(sev, i)].list);
                fh->navailable--;
        }
        list_del(&sev->list);
}

int v4l2_event_subscribe(struct v4l2_fh *fh,
                         const struct v4l2_event_subscription *sub, unsigned int elems,
                         const struct v4l2_subscribed_event_ops *ops)
{
        struct v4l2_subscribed_event *sev, *found_ev;
        unsigned long flags;
        unsigned int i;
        int ret = 0;

        if (sub->type == V4L2_EVENT_ALL)
                return -EINVAL;

        if (elems < 1)
                elems = 1;

        sev = kvzalloc(struct_size(sev, events, elems), GFP_KERNEL);
        if (!sev)
                return -ENOMEM;
        sev->elems = elems;
        for (i = 0; i < elems; i++)
                sev->events[i].sev = sev;
        sev->type = sub->type;
        sev->id = sub->id;
        sev->flags = sub->flags;
        sev->fh = fh;
        sev->ops = ops;

        mutex_lock(&fh->subscribe_lock);

        spin_lock_irqsave(&fh->vdev->fh_lock, flags);
        found_ev = v4l2_event_subscribed(fh, sub->type, sub->id);
        if (!found_ev)
                list_add(&sev->list, &fh->subscribed);
        spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);

        if (found_ev) {
                /* Already listening */
                kvfree(sev);
        } else if (sev->ops && sev->ops->add) {
                ret = sev->ops->add(sev, elems);
                if (ret) {
                        spin_lock_irqsave(&fh->vdev->fh_lock, flags);
                        __v4l2_event_unsubscribe(sev);
                        spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
                        kvfree(sev);
                }
        }

        mutex_unlock(&fh->subscribe_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(v4l2_event_subscribe);

void v4l2_event_unsubscribe_all(struct v4l2_fh *fh)
{
        struct v4l2_event_subscription sub;
        struct v4l2_subscribed_event *sev;
        unsigned long flags;

        do {
                sev = NULL;

                spin_lock_irqsave(&fh->vdev->fh_lock, flags);
                if (!list_empty(&fh->subscribed)) {
                        sev = list_first_entry(&fh->subscribed,
                                        struct v4l2_subscribed_event, list);
                        sub.type = sev->type;
                        sub.id = sev->id;
                }
                spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
                if (sev)
                        v4l2_event_unsubscribe(fh, &sub);
        } while (sev);
}
EXPORT_SYMBOL_GPL(v4l2_event_unsubscribe_all);

int v4l2_event_unsubscribe(struct v4l2_fh *fh,
                           const struct v4l2_event_subscription *sub)
{
        struct v4l2_subscribed_event *sev;
        unsigned long flags;

        if (sub->type == V4L2_EVENT_ALL) {
                v4l2_event_unsubscribe_all(fh);
                return 0;
        }

        mutex_lock(&fh->subscribe_lock);

        spin_lock_irqsave(&fh->vdev->fh_lock, flags);

        sev = v4l2_event_subscribed(fh, sub->type, sub->id);
        if (sev != NULL)
                __v4l2_event_unsubscribe(sev);

        spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);

        if (sev && sev->ops && sev->ops->del)
                sev->ops->del(sev);

        mutex_unlock(&fh->subscribe_lock);

        kvfree(sev);

        return 0;
}
EXPORT_SYMBOL_GPL(v4l2_event_unsubscribe);

int v4l2_event_subdev_unsubscribe(struct v4l2_subdev *sd, struct v4l2_fh *fh,
                                  struct v4l2_event_subscription *sub)
{
        return v4l2_event_unsubscribe(fh, sub);
}
EXPORT_SYMBOL_GPL(v4l2_event_subdev_unsubscribe);

static void v4l2_event_src_replace(struct v4l2_event *old,
                                const struct v4l2_event *new)
{
        u32 old_changes = old->u.src_change.changes;

        old->u.src_change = new->u.src_change;
        old->u.src_change.changes |= old_changes;
}

static void v4l2_event_src_merge(const struct v4l2_event *old,
                                struct v4l2_event *new)
{
        new->u.src_change.changes |= old->u.src_change.changes;
}

static const struct v4l2_subscribed_event_ops v4l2_event_src_ch_ops = {
        .replace = v4l2_event_src_replace,
        .merge = v4l2_event_src_merge,
};

int v4l2_src_change_event_subscribe(struct v4l2_fh *fh,
                                const struct v4l2_event_subscription *sub)
{
        if (sub->type == V4L2_EVENT_SOURCE_CHANGE)
                return v4l2_event_subscribe(fh, sub, 0, &v4l2_event_src_ch_ops);
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(v4l2_src_change_event_subscribe);

int v4l2_src_change_event_subdev_subscribe(struct v4l2_subdev *sd,
                struct v4l2_fh *fh, struct v4l2_event_subscription *sub)
{
        return v4l2_src_change_event_subscribe(fh, sub);
}
EXPORT_SYMBOL_GPL(v4l2_src_change_event_subdev_subscribe);
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 








    1 




















    1 



















































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   imon.c:        input and display driver for SoundGraph iMON IR/VFD/LCD
 *
 *   Copyright(C) 2010  Jarod Wilson <jarod@wilsonet.com>
 *   Portions based on the original lirc_imon driver,
 *        Copyright(C) 2004  Venky Raju(dev@venky.ws)
 *
 *   Huge thanks to R. Geoff Newbury for invaluable debugging on the
 *   0xffdc iMON devices, and for sending me one to hack on, without
 *   which the support for them wouldn't be nearly as good. Thanks
 *   also to the numerous 0xffdc device owners that tested auto-config
 *   support for me and provided debug dumps from their devices.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__

#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/ratelimit.h>

#include <linux/input.h>
#include <linux/usb.h>
#include <linux/usb/input.h>
#include <media/rc-core.h>

#include <linux/timer.h>

#define MOD_AUTHOR        "Jarod Wilson <jarod@wilsonet.com>"
#define MOD_DESC        "Driver for SoundGraph iMON MultiMedia IR/Display"
#define MOD_NAME        "imon"
#define MOD_VERSION        "0.9.4"

#define DISPLAY_MINOR_BASE        144
#define DEVICE_NAME        "lcd%d"

#define BUF_CHUNK_SIZE        8
#define BUF_SIZE        128

#define BIT_DURATION        250        /* each bit received is 250us */

#define IMON_CLOCK_ENABLE_PACKETS        2

/*** P R O T O T Y P E S ***/

/* USB Callback prototypes */
static int imon_probe(struct usb_interface *interface,
                      const struct usb_device_id *id);
static void imon_disconnect(struct usb_interface *interface);
static void usb_rx_callback_intf0(struct urb *urb);
static void usb_rx_callback_intf1(struct urb *urb);
static void usb_tx_callback(struct urb *urb);

/* suspend/resume support */
static int imon_resume(struct usb_interface *intf);
static int imon_suspend(struct usb_interface *intf, pm_message_t message);

/* Display file_operations function prototypes */
static int display_open(struct inode *inode, struct file *file);
static int display_close(struct inode *inode, struct file *file);

/* VFD write operation */
static ssize_t vfd_write(struct file *file, const char __user *buf,
                         size_t n_bytes, loff_t *pos);

/* LCD file_operations override function prototypes */
static ssize_t lcd_write(struct file *file, const char __user *buf,
                         size_t n_bytes, loff_t *pos);

/*** G L O B A L S ***/

struct imon_panel_key_table {
        u64 hw_code;
        u32 keycode;
};

struct imon_usb_dev_descr {
        __u16 flags;
#define IMON_NO_FLAGS 0
#define IMON_NEED_20MS_PKT_DELAY 1
#define IMON_SUPPRESS_REPEATED_KEYS 2
        struct imon_panel_key_table key_table[];
};

struct imon_context {
        struct device *dev;
        /* Newer devices have two interfaces */
        struct usb_device *usbdev_intf0;
        struct usb_device *usbdev_intf1;

        bool display_supported;                /* not all controllers do */
        bool display_isopen;                /* display port has been opened */
        bool rf_device;                        /* true if iMON 2.4G LT/DT RF device */
        bool rf_isassociating;                /* RF remote associating */
        bool dev_present_intf0;                /* USB device presence, interface 0 */
        bool dev_present_intf1;                /* USB device presence, interface 1 */

        struct mutex lock;                /* to lock this object */
        wait_queue_head_t remove_ok;        /* For unexpected USB disconnects */

        struct usb_endpoint_descriptor *rx_endpoint_intf0;
        struct usb_endpoint_descriptor *rx_endpoint_intf1;
        struct usb_endpoint_descriptor *tx_endpoint;
        struct urb *rx_urb_intf0;
        struct urb *rx_urb_intf1;
        struct urb *tx_urb;
        bool tx_control;
        unsigned char usb_rx_buf[8];
        unsigned char usb_tx_buf[8];
        unsigned int send_packet_delay;

        struct tx_t {
                unsigned char data_buf[35];        /* user data buffer */
                struct completion finished;        /* wait for write to finish */
                bool busy;                        /* write in progress */
                int status;                        /* status of tx completion */
        } tx;

        u16 vendor;                        /* usb vendor ID */
        u16 product;                        /* usb product ID */

        struct rc_dev *rdev;                /* rc-core device for remote */
        struct input_dev *idev;                /* input device for panel & IR mouse */
        struct input_dev *touch;        /* input device for touchscreen */

        spinlock_t kc_lock;                /* make sure we get keycodes right */
        u32 kc;                                /* current input keycode */
        u32 last_keycode;                /* last reported input keycode */
        u32 rc_scancode;                /* the computed remote scancode */
        u8 rc_toggle;                        /* the computed remote toggle bit */
        u64 rc_proto;                        /* iMON or MCE (RC6) IR protocol? */
        bool release_code;                /* some keys send a release code */

        u8 display_type;                /* store the display type */
        bool pad_mouse;                        /* toggle kbd(0)/mouse(1) mode */

        char name_rdev[128];                /* rc input device name */
        char phys_rdev[64];                /* rc input device phys path */

        char name_idev[128];                /* input device name */
        char phys_idev[64];                /* input device phys path */

        char name_touch[128];                /* touch screen name */
        char phys_touch[64];                /* touch screen phys path */
        struct timer_list ttimer;        /* touch screen timer */
        int touch_x;                        /* x coordinate on touchscreen */
        int touch_y;                        /* y coordinate on touchscreen */
        const struct imon_usb_dev_descr *dev_descr;
                                        /* device description with key */
                                        /* table for front panels */
        /*
         * Fields for deferring free_imon_context().
         *
         * Since reference to "struct imon_context" is stored into
         * "struct file"->private_data, we need to remember
         * how many file descriptors might access this "struct imon_context".
         */
        refcount_t users;
        /*
         * Use a flag for telling display_open()/vfd_write()/lcd_write() that
         * imon_disconnect() was already called.
         */
        bool disconnected;
        /*
         * We need to wait for RCU grace period in order to allow
         * display_open() to safely check ->disconnected and increment ->users.
         */
        struct rcu_head rcu;
};

#define TOUCH_TIMEOUT        (HZ/30)

/* vfd character device file operations */
static const struct file_operations vfd_fops = {
        .owner                = THIS_MODULE,
        .open                = display_open,
        .write                = vfd_write,
        .release        = display_close,
        .llseek                = noop_llseek,
};

/* lcd character device file operations */
static const struct file_operations lcd_fops = {
        .owner                = THIS_MODULE,
        .open                = display_open,
        .write                = lcd_write,
        .release        = display_close,
        .llseek                = noop_llseek,
};

enum {
        IMON_DISPLAY_TYPE_AUTO = 0,
        IMON_DISPLAY_TYPE_VFD  = 1,
        IMON_DISPLAY_TYPE_LCD  = 2,
        IMON_DISPLAY_TYPE_VGA  = 3,
        IMON_DISPLAY_TYPE_NONE = 4,
};

enum {
        IMON_KEY_IMON        = 0,
        IMON_KEY_MCE        = 1,
        IMON_KEY_PANEL        = 2,
};

static struct usb_class_driver imon_vfd_class = {
        .name                = DEVICE_NAME,
        .fops                = &vfd_fops,
        .minor_base        = DISPLAY_MINOR_BASE,
};

static struct usb_class_driver imon_lcd_class = {
        .name                = DEVICE_NAME,
        .fops                = &lcd_fops,
        .minor_base        = DISPLAY_MINOR_BASE,
};

/* imon receiver front panel/knob key table */
static const struct imon_usb_dev_descr imon_default_table = {
        .flags = IMON_NO_FLAGS,
        .key_table = {
                { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */
                { 0x000000001200ffeell, KEY_UP },
                { 0x000000001300ffeell, KEY_DOWN },
                { 0x000000001400ffeell, KEY_LEFT },
                { 0x000000001500ffeell, KEY_RIGHT },
                { 0x000000001600ffeell, KEY_ENTER },
                { 0x000000001700ffeell, KEY_ESC },
                { 0x000000001f00ffeell, KEY_AUDIO },
                { 0x000000002000ffeell, KEY_VIDEO },
                { 0x000000002100ffeell, KEY_CAMERA },
                { 0x000000002700ffeell, KEY_DVD },
                { 0x000000002300ffeell, KEY_TV },
                { 0x000000002b00ffeell, KEY_EXIT },
                { 0x000000002c00ffeell, KEY_SELECT },
                { 0x000000002d00ffeell, KEY_MENU },
                { 0x000000000500ffeell, KEY_PREVIOUS },
                { 0x000000000700ffeell, KEY_REWIND },
                { 0x000000000400ffeell, KEY_STOP },
                { 0x000000003c00ffeell, KEY_PLAYPAUSE },
                { 0x000000000800ffeell, KEY_FASTFORWARD },
                { 0x000000000600ffeell, KEY_NEXT },
                { 0x000000010000ffeell, KEY_RIGHT },
                { 0x000001000000ffeell, KEY_LEFT },
                { 0x000000003d00ffeell, KEY_SELECT },
                { 0x000100000000ffeell, KEY_VOLUMEUP },
                { 0x010000000000ffeell, KEY_VOLUMEDOWN },
                { 0x000000000100ffeell, KEY_MUTE },
                /* 0xffdc iMON MCE VFD */
                { 0x00010000ffffffeell, KEY_VOLUMEUP },
                { 0x01000000ffffffeell, KEY_VOLUMEDOWN },
                { 0x00000001ffffffeell, KEY_MUTE },
                { 0x0000000fffffffeell, KEY_MEDIA },
                { 0x00000012ffffffeell, KEY_UP },
                { 0x00000013ffffffeell, KEY_DOWN },
                { 0x00000014ffffffeell, KEY_LEFT },
                { 0x00000015ffffffeell, KEY_RIGHT },
                { 0x00000016ffffffeell, KEY_ENTER },
                { 0x00000017ffffffeell, KEY_ESC },
                /* iMON Knob values */
                { 0x000100ffffffffeell, KEY_VOLUMEUP },
                { 0x010000ffffffffeell, KEY_VOLUMEDOWN },
                { 0x000008ffffffffeell, KEY_MUTE },
                { 0, KEY_RESERVED },
        }
};

static const struct imon_usb_dev_descr imon_OEM_VFD = {
        .flags = IMON_NEED_20MS_PKT_DELAY,
        .key_table = {
                { 0x000000000f00ffeell, KEY_MEDIA }, /* Go */
                { 0x000000001200ffeell, KEY_UP },
                { 0x000000001300ffeell, KEY_DOWN },
                { 0x000000001400ffeell, KEY_LEFT },
                { 0x000000001500ffeell, KEY_RIGHT },
                { 0x000000001600ffeell, KEY_ENTER },
                { 0x000000001700ffeell, KEY_ESC },
                { 0x000000001f00ffeell, KEY_AUDIO },
                { 0x000000002b00ffeell, KEY_EXIT },
                { 0x000000002c00ffeell, KEY_SELECT },
                { 0x000000002d00ffeell, KEY_MENU },
                { 0x000000000500ffeell, KEY_PREVIOUS },
                { 0x000000000700ffeell, KEY_REWIND },
                { 0x000000000400ffeell, KEY_STOP },
                { 0x000000003c00ffeell, KEY_PLAYPAUSE },
                { 0x000000000800ffeell, KEY_FASTFORWARD },
                { 0x000000000600ffeell, KEY_NEXT },
                { 0x000000010000ffeell, KEY_RIGHT },
                { 0x000001000000ffeell, KEY_LEFT },
                { 0x000000003d00ffeell, KEY_SELECT },
                { 0x000100000000ffeell, KEY_VOLUMEUP },
                { 0x010000000000ffeell, KEY_VOLUMEDOWN },
                { 0x000000000100ffeell, KEY_MUTE },
                /* 0xffdc iMON MCE VFD */
                { 0x00010000ffffffeell, KEY_VOLUMEUP },
                { 0x01000000ffffffeell, KEY_VOLUMEDOWN },
                { 0x00000001ffffffeell, KEY_MUTE },
                { 0x0000000fffffffeell, KEY_MEDIA },
                { 0x00000012ffffffeell, KEY_UP },
                { 0x00000013ffffffeell, KEY_DOWN },
                { 0x00000014ffffffeell, KEY_LEFT },
                { 0x00000015ffffffeell, KEY_RIGHT },
                { 0x00000016ffffffeell, KEY_ENTER },
                { 0x00000017ffffffeell, KEY_ESC },
                /* iMON Knob values */
                { 0x000100ffffffffeell, KEY_VOLUMEUP },
                { 0x010000ffffffffeell, KEY_VOLUMEDOWN },
                { 0x000008ffffffffeell, KEY_MUTE },
                { 0, KEY_RESERVED },
        }
};

/* imon receiver front panel/knob key table for DH102*/
static const struct imon_usb_dev_descr imon_DH102 = {
        .flags = IMON_NO_FLAGS,
        .key_table = {
                { 0x000100000000ffeell, KEY_VOLUMEUP },
                { 0x010000000000ffeell, KEY_VOLUMEDOWN },
                { 0x000000010000ffeell, KEY_MUTE },
                { 0x0000000f0000ffeell, KEY_MEDIA },
                { 0x000000120000ffeell, KEY_UP },
                { 0x000000130000ffeell, KEY_DOWN },
                { 0x000000140000ffeell, KEY_LEFT },
                { 0x000000150000ffeell, KEY_RIGHT },
                { 0x000000160000ffeell, KEY_ENTER },
                { 0x000000170000ffeell, KEY_ESC },
                { 0x0000002b0000ffeell, KEY_EXIT },
                { 0x0000002c0000ffeell, KEY_SELECT },
                { 0x0000002d0000ffeell, KEY_MENU },
                { 0, KEY_RESERVED }
        }
};

/* imon ultrabay front panel key table */
static const struct imon_usb_dev_descr ultrabay_table = {
        .flags = IMON_SUPPRESS_REPEATED_KEYS,
        .key_table = {
                { 0x0000000f0000ffeell, KEY_MEDIA },      /* Go */
                { 0x000000000100ffeell, KEY_UP },
                { 0x000000000001ffeell, KEY_DOWN },
                { 0x000000160000ffeell, KEY_ENTER },
                { 0x0000001f0000ffeell, KEY_AUDIO },      /* Music */
                { 0x000000200000ffeell, KEY_VIDEO },      /* Movie */
                { 0x000000210000ffeell, KEY_CAMERA },     /* Photo */
                { 0x000000270000ffeell, KEY_DVD },        /* DVD */
                { 0x000000230000ffeell, KEY_TV },         /* TV */
                { 0x000000050000ffeell, KEY_PREVIOUS },   /* Previous */
                { 0x000000070000ffeell, KEY_REWIND },
                { 0x000000040000ffeell, KEY_STOP },
                { 0x000000020000ffeell, KEY_PLAYPAUSE },
                { 0x000000080000ffeell, KEY_FASTFORWARD },
                { 0x000000060000ffeell, KEY_NEXT },       /* Next */
                { 0x000100000000ffeell, KEY_VOLUMEUP },
                { 0x010000000000ffeell, KEY_VOLUMEDOWN },
                { 0x000000010000ffeell, KEY_MUTE },
                { 0, KEY_RESERVED },
        }
};

/*
 * USB Device ID for iMON USB Control Boards
 *
 * The Windows drivers contain 6 different inf files, more or less one for
 * each new device until the 0x0034-0x0046 devices, which all use the same
 * driver. Some of the devices in the 34-46 range haven't been definitively
 * identified yet. Early devices have either a TriGem Computer, Inc. or a
 * Samsung vendor ID (0x0aa8 and 0x04e8 respectively), while all later
 * devices use the SoundGraph vendor ID (0x15c2). This driver only supports
 * the ffdc and later devices, which do onboard decoding.
 */
static const struct usb_device_id imon_usb_id_table[] = {
        /*
         * Several devices with this same device ID, all use iMON_PAD.inf
         * SoundGraph iMON PAD (IR & VFD)
         * SoundGraph iMON PAD (IR & LCD)
         * SoundGraph iMON Knob (IR only)
         */
        { USB_DEVICE(0x15c2, 0xffdc),
          .driver_info = (unsigned long)&imon_default_table },

        /*
         * Newer devices, all driven by the latest iMON Windows driver, full
         * list of device IDs extracted via 'strings Setup/data1.hdr |grep 15c2'
         * Need user input to fill in details on unknown devices.
         */
        /* SoundGraph iMON OEM Touch LCD (IR & 7" VGA LCD) */
        { USB_DEVICE(0x15c2, 0x0034),
          .driver_info = (unsigned long)&imon_DH102 },
        /* SoundGraph iMON OEM Touch LCD (IR & 4.3" VGA LCD) */
        { USB_DEVICE(0x15c2, 0x0035),
          .driver_info = (unsigned long)&imon_default_table},
        /* SoundGraph iMON OEM VFD (IR & VFD) */
        { USB_DEVICE(0x15c2, 0x0036),
          .driver_info = (unsigned long)&imon_OEM_VFD },
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x0037),
          .driver_info = (unsigned long)&imon_default_table},
        /* SoundGraph iMON OEM LCD (IR & LCD) */
        { USB_DEVICE(0x15c2, 0x0038),
          .driver_info = (unsigned long)&imon_default_table},
        /* SoundGraph iMON UltraBay (IR & LCD) */
        { USB_DEVICE(0x15c2, 0x0039),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x003a),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x003b),
          .driver_info = (unsigned long)&imon_default_table},
        /* SoundGraph iMON OEM Inside (IR only) */
        { USB_DEVICE(0x15c2, 0x003c),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x003d),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x003e),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x003f),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x0040),
          .driver_info = (unsigned long)&imon_default_table},
        /* SoundGraph iMON MINI (IR only) */
        { USB_DEVICE(0x15c2, 0x0041),
          .driver_info = (unsigned long)&imon_default_table},
        /* Antec Veris Multimedia Station EZ External (IR only) */
        { USB_DEVICE(0x15c2, 0x0042),
          .driver_info = (unsigned long)&imon_default_table},
        /* Antec Veris Multimedia Station Basic Internal (IR only) */
        { USB_DEVICE(0x15c2, 0x0043),
          .driver_info = (unsigned long)&imon_default_table},
        /* Antec Veris Multimedia Station Elite (IR & VFD) */
        { USB_DEVICE(0x15c2, 0x0044),
          .driver_info = (unsigned long)&imon_default_table},
        /* Antec Veris Multimedia Station Premiere (IR & LCD) */
        { USB_DEVICE(0x15c2, 0x0045),
          .driver_info = (unsigned long)&imon_default_table},
        /* device specifics unknown */
        { USB_DEVICE(0x15c2, 0x0046),
          .driver_info = (unsigned long)&imon_default_table},
        {}
};

/* USB Device data */
static struct usb_driver imon_driver = {
        .name                = MOD_NAME,
        .probe                = imon_probe,
        .disconnect        = imon_disconnect,
        .suspend        = imon_suspend,
        .resume                = imon_resume,
        .id_table        = imon_usb_id_table,
};

/* Module bookkeeping bits */
MODULE_AUTHOR(MOD_AUTHOR);
MODULE_DESCRIPTION(MOD_DESC);
MODULE_VERSION(MOD_VERSION);
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(usb, imon_usb_id_table);

static bool debug;
module_param(debug, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug, "Debug messages: 0=no, 1=yes (default: no)");

/* lcd, vfd, vga or none? should be auto-detected, but can be overridden... */
static int display_type;
module_param(display_type, int, S_IRUGO);
MODULE_PARM_DESC(display_type, "Type of attached display. 0=autodetect, 1=vfd, 2=lcd, 3=vga, 4=none (default: autodetect)");

static int pad_stabilize = 1;
module_param(pad_stabilize, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(pad_stabilize, "Apply stabilization algorithm to iMON PAD presses in arrow key mode. 0=disable, 1=enable (default).");

/*
 * In certain use cases, mouse mode isn't really helpful, and could actually
 * cause confusion, so allow disabling it when the IR device is open.
 */
static bool nomouse;
module_param(nomouse, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nomouse, "Disable mouse input device mode when IR device is open. 0=don't disable, 1=disable. (default: don't disable)");

/* threshold at which a pad push registers as an arrow key in kbd mode */
static int pad_thresh;
module_param(pad_thresh, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(pad_thresh, "Threshold at which a pad push registers as an arrow key in kbd mode (default: 28)");


static void free_imon_context(struct imon_context *ictx)
{
        struct device *dev = ictx->dev;

        usb_free_urb(ictx->tx_urb);
        WARN_ON(ictx->dev_present_intf0);
        usb_free_urb(ictx->rx_urb_intf0);
        WARN_ON(ictx->dev_present_intf1);
        usb_free_urb(ictx->rx_urb_intf1);
        kfree_rcu(ictx, rcu);

        dev_dbg(dev, "%s: iMON context freed\n", __func__);
}

/*
 * Called when the Display device (e.g. /dev/lcd0)
 * is opened by the application.
 */
static int display_open(struct inode *inode, struct file *file)
{
        struct usb_interface *interface;
        struct imon_context *ictx = NULL;
        int subminor;
        int retval = 0;

        subminor = iminor(inode);
        interface = usb_find_interface(&imon_driver, subminor);
        if (!interface) {
                pr_err("could not find interface for minor %d\n", subminor);
                retval = -ENODEV;
                goto exit;
        }

        rcu_read_lock();
        ictx = usb_get_intfdata(interface);
        if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) {
                rcu_read_unlock();
                pr_err("no context found for minor %d\n", subminor);
                retval = -ENODEV;
                goto exit;
        }
        rcu_read_unlock();

        mutex_lock(&ictx->lock);

        if (!ictx->display_supported) {
                pr_err("display not supported by device\n");
                retval = -ENODEV;
        } else if (ictx->display_isopen) {
                pr_err("display port is already open\n");
                retval = -EBUSY;
        } else {
                ictx->display_isopen = true;
                file->private_data = ictx;
                dev_dbg(ictx->dev, "display port opened\n");
        }

        mutex_unlock(&ictx->lock);

        if (retval && refcount_dec_and_test(&ictx->users))
                free_imon_context(ictx);

exit:
        return retval;
}

/*
 * Called when the display device (e.g. /dev/lcd0)
 * is closed by the application.
 */
static int display_close(struct inode *inode, struct file *file)
{
        struct imon_context *ictx = file->private_data;
        int retval = 0;

        mutex_lock(&ictx->lock);

        if (!ictx->display_supported) {
                pr_err("display not supported by device\n");
                retval = -ENODEV;
        } else if (!ictx->display_isopen) {
                pr_err("display is not open\n");
                retval = -EIO;
        } else {
                ictx->display_isopen = false;
                dev_dbg(ictx->dev, "display port closed\n");
        }

        mutex_unlock(&ictx->lock);
        if (refcount_dec_and_test(&ictx->users))
                free_imon_context(ictx);
        return retval;
}

/*
 * Sends a packet to the device -- this function must be called with
 * ictx->lock held, or its unlock/lock sequence while waiting for tx
 * to complete can/will lead to a deadlock.
 */
static int send_packet(struct imon_context *ictx)
{
        unsigned int pipe;
        unsigned long timeout;
        int interval = 0;
        int retval = 0;
        struct usb_ctrlrequest *control_req = NULL;

        /* Check if we need to use control or interrupt urb */
        if (!ictx->tx_control) {
                pipe = usb_sndintpipe(ictx->usbdev_intf0,
                                      ictx->tx_endpoint->bEndpointAddress);
                interval = ictx->tx_endpoint->bInterval;

                usb_fill_int_urb(ictx->tx_urb, ictx->usbdev_intf0, pipe,
                                 ictx->usb_tx_buf,
                                 sizeof(ictx->usb_tx_buf),
                                 usb_tx_callback, ictx, interval);

                ictx->tx_urb->actual_length = 0;
        } else {
                /* fill request into kmalloc'ed space: */
                control_req = kmalloc(sizeof(*control_req), GFP_KERNEL);
                if (control_req == NULL)
                        return -ENOMEM;

                /* setup packet is '21 09 0200 0001 0008' */
                control_req->bRequestType = 0x21;
                control_req->bRequest = 0x09;
                control_req->wValue = cpu_to_le16(0x0200);
                control_req->wIndex = cpu_to_le16(0x0001);
                control_req->wLength = cpu_to_le16(0x0008);

                /* control pipe is endpoint 0x00 */
                pipe = usb_sndctrlpipe(ictx->usbdev_intf0, 0);

                /* build the control urb */
                usb_fill_control_urb(ictx->tx_urb, ictx->usbdev_intf0,
                                     pipe, (unsigned char *)control_req,
                                     ictx->usb_tx_buf,
                                     sizeof(ictx->usb_tx_buf),
                                     usb_tx_callback, ictx);
                ictx->tx_urb->actual_length = 0;
        }

        reinit_completion(&ictx->tx.finished);
        ictx->tx.busy = true;
        smp_rmb(); /* ensure later readers know we're busy */

        retval = usb_submit_urb(ictx->tx_urb, GFP_KERNEL);
        if (retval) {
                ictx->tx.busy = false;
                smp_rmb(); /* ensure later readers know we're not busy */
                pr_err_ratelimited("error submitting urb(%d)\n", retval);
        } else {
                /* Wait for transmission to complete (or abort) */
                retval = wait_for_completion_interruptible(
                                &ictx->tx.finished);
                if (retval) {
                        usb_kill_urb(ictx->tx_urb);
                        pr_err_ratelimited("task interrupted\n");
                }

                ictx->tx.busy = false;
                retval = ictx->tx.status;
                if (retval)
                        pr_err_ratelimited("packet tx failed (%d)\n", retval);
        }

        kfree(control_req);

        /*
         * Induce a mandatory delay before returning, as otherwise,
         * send_packet can get called so rapidly as to overwhelm the device,
         * particularly on faster systems and/or those with quirky usb.
         */
        timeout = msecs_to_jiffies(ictx->send_packet_delay);
        set_current_state(TASK_INTERRUPTIBLE);
        schedule_timeout(timeout);

        return retval;
}

/*
 * Sends an associate packet to the iMON 2.4G.
 *
 * This might not be such a good idea, since it has an id collision with
 * some versions of the "IR & VFD" combo. The only way to determine if it
 * is an RF version is to look at the product description string. (Which
 * we currently do not fetch).
 */
static int send_associate_24g(struct imon_context *ictx)
{
        const unsigned char packet[8] = { 0x01, 0x00, 0x00, 0x00,
                                          0x00, 0x00, 0x00, 0x20 };

        if (!ictx) {
                pr_err("no context for device\n");
                return -ENODEV;
        }

        if (!ictx->dev_present_intf0) {
                pr_err("no iMON device present\n");
                return -ENODEV;
        }

        memcpy(ictx->usb_tx_buf, packet, sizeof(packet));

        return send_packet(ictx);
}

/*
 * Sends packets to setup and show clock on iMON display
 *
 * Arguments: year - last 2 digits of year, month - 1..12,
 * day - 1..31, dow - day of the week (0-Sun...6-Sat),
 * hour - 0..23, minute - 0..59, second - 0..59
 */
static int send_set_imon_clock(struct imon_context *ictx,
                               unsigned int year, unsigned int month,
                               unsigned int day, unsigned int dow,
                               unsigned int hour, unsigned int minute,
                               unsigned int second)
{
        unsigned char clock_enable_pkt[IMON_CLOCK_ENABLE_PACKETS][8];
        int retval = 0;
        int i;

        if (!ictx) {
                pr_err("no context for device\n");
                return -ENODEV;
        }

        switch (ictx->display_type) {
        case IMON_DISPLAY_TYPE_LCD:
                clock_enable_pkt[0][0] = 0x80;
                clock_enable_pkt[0][1] = year;
                clock_enable_pkt[0][2] = month-1;
                clock_enable_pkt[0][3] = day;
                clock_enable_pkt[0][4] = hour;
                clock_enable_pkt[0][5] = minute;
                clock_enable_pkt[0][6] = second;

                clock_enable_pkt[1][0] = 0x80;
                clock_enable_pkt[1][1] = 0;
                clock_enable_pkt[1][2] = 0;
                clock_enable_pkt[1][3] = 0;
                clock_enable_pkt[1][4] = 0;
                clock_enable_pkt[1][5] = 0;
                clock_enable_pkt[1][6] = 0;

                if (ictx->product == 0xffdc) {
                        clock_enable_pkt[0][7] = 0x50;
                        clock_enable_pkt[1][7] = 0x51;
                } else {
                        clock_enable_pkt[0][7] = 0x88;
                        clock_enable_pkt[1][7] = 0x8a;
                }

                break;

        case IMON_DISPLAY_TYPE_VFD:
                clock_enable_pkt[0][0] = year;
                clock_enable_pkt[0][1] = month-1;
                clock_enable_pkt[0][2] = day;
                clock_enable_pkt[0][3] = dow;
                clock_enable_pkt[0][4] = hour;
                clock_enable_pkt[0][5] = minute;
                clock_enable_pkt[0][6] = second;
                clock_enable_pkt[0][7] = 0x40;

                clock_enable_pkt[1][0] = 0;
                clock_enable_pkt[1][1] = 0;
                clock_enable_pkt[1][2] = 1;
                clock_enable_pkt[1][3] = 0;
                clock_enable_pkt[1][4] = 0;
                clock_enable_pkt[1][5] = 0;
                clock_enable_pkt[1][6] = 0;
                clock_enable_pkt[1][7] = 0x42;

                break;

        default:
                return -ENODEV;
        }

        for (i = 0; i < IMON_CLOCK_ENABLE_PACKETS; i++) {
                memcpy(ictx->usb_tx_buf, clock_enable_pkt[i], 8);
                retval = send_packet(ictx);
                if (retval) {
                        pr_err("send_packet failed for packet %d\n", i);
                        break;
                }
        }

        return retval;
}

/*
 * These are the sysfs functions to handle the association on the iMON 2.4G LT.
 */
static ssize_t associate_remote_show(struct device *d,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct imon_context *ictx = dev_get_drvdata(d);

        if (!ictx)
                return -ENODEV;

        mutex_lock(&ictx->lock);
        if (ictx->rf_isassociating)
                strscpy(buf, "associating\n", PAGE_SIZE);
        else
                strscpy(buf, "closed\n", PAGE_SIZE);

        dev_info(d, "Visit https://www.lirc.org/html/imon-24g.html for instructions on how to associate your iMON 2.4G DT/LT remote\n");
        mutex_unlock(&ictx->lock);
        return strlen(buf);
}

static ssize_t associate_remote_store(struct device *d,
                                      struct device_attribute *attr,
                                      const char *buf, size_t count)
{
        struct imon_context *ictx;

        ictx = dev_get_drvdata(d);

        if (!ictx)
                return -ENODEV;

        mutex_lock(&ictx->lock);
        ictx->rf_isassociating = true;
        send_associate_24g(ictx);
        mutex_unlock(&ictx->lock);

        return count;
}

/*
 * sysfs functions to control internal imon clock
 */
static ssize_t imon_clock_show(struct device *d,
                               struct device_attribute *attr, char *buf)
{
        struct imon_context *ictx = dev_get_drvdata(d);
        size_t len;

        if (!ictx)
                return -ENODEV;

        mutex_lock(&ictx->lock);

        if (!ictx->display_supported) {
                len = snprintf(buf, PAGE_SIZE, "Not supported.");
        } else {
                len = snprintf(buf, PAGE_SIZE,
                        "To set the clock on your iMON display:\n"
                        "# date \"+%%y %%m %%d %%w %%H %%M %%S\" > imon_clock\n"
                        "%s", ictx->display_isopen ?
                        "\nNOTE: imon device must be closed\n" : "");
        }

        mutex_unlock(&ictx->lock);

        return len;
}

static ssize_t imon_clock_store(struct device *d,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
{
        struct imon_context *ictx = dev_get_drvdata(d);
        ssize_t retval;
        unsigned int year, month, day, dow, hour, minute, second;

        if (!ictx)
                return -ENODEV;

        mutex_lock(&ictx->lock);

        if (!ictx->display_supported) {
                retval = -ENODEV;
                goto exit;
        } else if (ictx->display_isopen) {
                retval = -EBUSY;
                goto exit;
        }

        if (sscanf(buf, "%u %u %u %u %u %u %u",        &year, &month, &day, &dow,
                   &hour, &minute, &second) != 7) {
                retval = -EINVAL;
                goto exit;
        }

        if ((month < 1 || month > 12) ||
            (day < 1 || day > 31) || (dow > 6) ||
            (hour > 23) || (minute > 59) || (second > 59)) {
                retval = -EINVAL;
                goto exit;
        }

        retval = send_set_imon_clock(ictx, year, month, day, dow,
                                     hour, minute, second);
        if (retval)
                goto exit;

        retval = count;
exit:
        mutex_unlock(&ictx->lock);

        return retval;
}


static DEVICE_ATTR_RW(imon_clock);
static DEVICE_ATTR_RW(associate_remote);

static struct attribute *imon_display_sysfs_entries[] = {
        &dev_attr_imon_clock.attr,
        NULL
};

static const struct attribute_group imon_display_attr_group = {
        .attrs = imon_display_sysfs_entries
};

static struct attribute *imon_rf_sysfs_entries[] = {
        &dev_attr_associate_remote.attr,
        NULL
};

static const struct attribute_group imon_rf_attr_group = {
        .attrs = imon_rf_sysfs_entries
};

/*
 * Writes data to the VFD.  The iMON VFD is 2x16 characters
 * and requires data in 5 consecutive USB interrupt packets,
 * each packet but the last carrying 7 bytes.
 *
 * I don't know if the VFD board supports features such as
 * scrolling, clearing rows, blanking, etc. so at
 * the caller must provide a full screen of data.  If fewer
 * than 32 bytes are provided spaces will be appended to
 * generate a full screen.
 */
static ssize_t vfd_write(struct file *file, const char __user *buf,
                         size_t n_bytes, loff_t *pos)
{
        int i;
        int offset;
        int seq;
        int retval = 0;
        struct imon_context *ictx = file->private_data;
        static const unsigned char vfd_packet6[] = {
                0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF };

        if (ictx->disconnected)
                return -ENODEV;

        if (mutex_lock_interruptible(&ictx->lock))
                return -ERESTARTSYS;

        if (!ictx->dev_present_intf0) {
                pr_err_ratelimited("no iMON device present\n");
                retval = -ENODEV;
                goto exit;
        }

        if (n_bytes <= 0 || n_bytes > 32) {
                pr_err_ratelimited("invalid payload size\n");
                retval = -EINVAL;
                goto exit;
        }

        if (copy_from_user(ictx->tx.data_buf, buf, n_bytes)) {
                retval = -EFAULT;
                goto exit;
        }

        /* Pad with spaces */
        for (i = n_bytes; i < 32; ++i)
                ictx->tx.data_buf[i] = ' ';

        for (i = 32; i < 35; ++i)
                ictx->tx.data_buf[i] = 0xFF;

        offset = 0;
        seq = 0;

        do {
                memcpy(ictx->usb_tx_buf, ictx->tx.data_buf + offset, 7);
                ictx->usb_tx_buf[7] = (unsigned char) seq;

                retval = send_packet(ictx);
                if (retval) {
                        pr_err_ratelimited("send packet #%d failed\n", seq / 2);
                        goto exit;
                } else {
                        seq += 2;
                        offset += 7;
                }

        } while (offset < 35);

        /* Send packet #6 */
        memcpy(ictx->usb_tx_buf, &vfd_packet6, sizeof(vfd_packet6));
        ictx->usb_tx_buf[7] = (unsigned char) seq;
        retval = send_packet(ictx);
        if (retval)
                pr_err_ratelimited("send packet #%d failed\n", seq / 2);

exit:
        mutex_unlock(&ictx->lock);

        return (!retval) ? n_bytes : retval;
}

/*
 * Writes data to the LCD.  The iMON OEM LCD screen expects 8-byte
 * packets. We accept data as 16 hexadecimal digits, followed by a
 * newline (to make it easy to drive the device from a command-line
 * -- even though the actual binary data is a bit complicated).
 *
 * The device itself is not a "traditional" text-mode display. It's
 * actually a 16x96 pixel bitmap display. That means if you want to
 * display text, you've got to have your own "font" and translate the
 * text into bitmaps for display. This is really flexible (you can
 * display whatever diacritics you need, and so on), but it's also
 * a lot more complicated than most LCDs...
 */
static ssize_t lcd_write(struct file *file, const char __user *buf,
                         size_t n_bytes, loff_t *pos)
{
        int retval = 0;
        struct imon_context *ictx = file->private_data;

        if (ictx->disconnected)
                return -ENODEV;

        mutex_lock(&ictx->lock);

        if (!ictx->display_supported) {
                pr_err_ratelimited("no iMON display present\n");
                retval = -ENODEV;
                goto exit;
        }

        if (n_bytes != 8) {
                pr_err_ratelimited("invalid payload size: %d (expected 8)\n",
                                   (int)n_bytes);
                retval = -EINVAL;
                goto exit;
        }

        if (copy_from_user(ictx->usb_tx_buf, buf, 8)) {
                retval = -EFAULT;
                goto exit;
        }

        retval = send_packet(ictx);
        if (retval) {
                pr_err_ratelimited("send packet failed!\n");
                goto exit;
        } else {
                dev_dbg(ictx->dev, "%s: write %d bytes to LCD\n",
                        __func__, (int) n_bytes);
        }
exit:
        mutex_unlock(&ictx->lock);
        return (!retval) ? n_bytes : retval;
}

/*
 * Callback function for USB core API: transmit data
 */
static void usb_tx_callback(struct urb *urb)
{
        struct imon_context *ictx;

        if (!urb)
                return;
        ictx = (struct imon_context *)urb->context;
        if (!ictx)
                return;

        ictx->tx.status = urb->status;

        /* notify waiters that write has finished */
        ictx->tx.busy = false;
        smp_rmb(); /* ensure later readers know we're not busy */
        complete(&ictx->tx.finished);
}

/*
 * report touchscreen input
 */
static void imon_touch_display_timeout(struct timer_list *t)
{
        struct imon_context *ictx = from_timer(ictx, t, ttimer);

        if (ictx->display_type != IMON_DISPLAY_TYPE_VGA)
                return;

        input_report_abs(ictx->touch, ABS_X, ictx->touch_x);
        input_report_abs(ictx->touch, ABS_Y, ictx->touch_y);
        input_report_key(ictx->touch, BTN_TOUCH, 0x00);
        input_sync(ictx->touch);
}

/*
 * iMON IR receivers support two different signal sets -- those used by
 * the iMON remotes, and those used by the Windows MCE remotes (which is
 * really just RC-6), but only one or the other at a time, as the signals
 * are decoded onboard the receiver.
 *
 * This function gets called two different ways, one way is from
 * rc_register_device, for initial protocol selection/setup, and the other is
 * via a userspace-initiated protocol change request, either by direct sysfs
 * prodding or by something like ir-keytable. In the rc_register_device case,
 * the imon context lock is already held, but when initiated from userspace,
 * it is not, so we must acquire it prior to calling send_packet, which
 * requires that the lock is held.
 */
static int imon_ir_change_protocol(struct rc_dev *rc, u64 *rc_proto)
{
        int retval;
        struct imon_context *ictx = rc->priv;
        struct device *dev = ictx->dev;
        bool unlock = false;
        unsigned char ir_proto_packet[] = {
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86 };

        if (*rc_proto && !(*rc_proto & rc->allowed_protocols))
                dev_warn(dev, "Looks like you're trying to use an IR protocol this device does not support\n");

        if (*rc_proto & RC_PROTO_BIT_RC6_MCE) {
                dev_dbg(dev, "Configuring IR receiver for MCE protocol\n");
                ir_proto_packet[0] = 0x01;
                *rc_proto = RC_PROTO_BIT_RC6_MCE;
        } else if (*rc_proto & RC_PROTO_BIT_IMON) {
                dev_dbg(dev, "Configuring IR receiver for iMON protocol\n");
                if (!pad_stabilize)
                        dev_dbg(dev, "PAD stabilize functionality disabled\n");
                /* ir_proto_packet[0] = 0x00; // already the default */
                *rc_proto = RC_PROTO_BIT_IMON;
        } else {
                dev_warn(dev, "Unsupported IR protocol specified, overriding to iMON IR protocol\n");
                if (!pad_stabilize)
                        dev_dbg(dev, "PAD stabilize functionality disabled\n");
                /* ir_proto_packet[0] = 0x00; // already the default */
                *rc_proto = RC_PROTO_BIT_IMON;
        }

        memcpy(ictx->usb_tx_buf, &ir_proto_packet, sizeof(ir_proto_packet));

        if (!mutex_is_locked(&ictx->lock)) {
                unlock = true;
                mutex_lock(&ictx->lock);
        }

        retval = send_packet(ictx);
        if (retval)
                goto out;

        ictx->rc_proto = *rc_proto;
        ictx->pad_mouse = false;

out:
        if (unlock)
                mutex_unlock(&ictx->lock);

        return retval;
}

/*
 * The directional pad behaves a bit differently, depending on whether this is
 * one of the older ffdc devices or a newer device. Newer devices appear to
 * have a higher resolution matrix for more precise mouse movement, but it
 * makes things overly sensitive in keyboard mode, so we do some interesting
 * contortions to make it less touchy. Older devices run through the same
 * routine with shorter timeout and a smaller threshold.
 */
static int stabilize(int a, int b, u16 timeout, u16 threshold)
{
        ktime_t ct;
        static ktime_t prev_time;
        static ktime_t hit_time;
        static int x, y, prev_result, hits;
        int result = 0;
        long msec, msec_hit;

        ct = ktime_get();
        msec = ktime_ms_delta(ct, prev_time);
        msec_hit = ktime_ms_delta(ct, hit_time);

        if (msec > 100) {
                x = 0;
                y = 0;
                hits = 0;
        }

        x += a;
        y += b;

        prev_time = ct;

        if (abs(x) > threshold || abs(y) > threshold) {
                if (abs(y) > abs(x))
                        result = (y > 0) ? 0x7F : 0x80;
                else
                        result = (x > 0) ? 0x7F00 : 0x8000;

                x = 0;
                y = 0;

                if (result == prev_result) {
                        hits++;

                        if (hits > 3) {
                                switch (result) {
                                case 0x7F:
                                        y = 17 * threshold / 30;
                                        break;
                                case 0x80:
                                        y -= 17 * threshold / 30;
                                        break;
                                case 0x7F00:
                                        x = 17 * threshold / 30;
                                        break;
                                case 0x8000:
                                        x -= 17 * threshold / 30;
                                        break;
                                }
                        }

                        if (hits == 2 && msec_hit < timeout) {
                                result = 0;
                                hits = 1;
                        }
                } else {
                        prev_result = result;
                        hits = 1;
                        hit_time = ct;
                }
        }

        return result;
}

static u32 imon_remote_key_lookup(struct imon_context *ictx, u32 scancode)
{
        u32 keycode;
        u32 release;
        bool is_release_code = false;

        /* Look for the initial press of a button */
        keycode = rc_g_keycode_from_table(ictx->rdev, scancode);
        ictx->rc_toggle = 0x0;
        ictx->rc_scancode = scancode;

        /* Look for the release of a button */
        if (keycode == KEY_RESERVED) {
                release = scancode & ~0x4000;
                keycode = rc_g_keycode_from_table(ictx->rdev, release);
                if (keycode != KEY_RESERVED)
                        is_release_code = true;
        }

        ictx->release_code = is_release_code;

        return keycode;
}

static u32 imon_mce_key_lookup(struct imon_context *ictx, u32 scancode)
{
        u32 keycode;

#define MCE_KEY_MASK 0x7000
#define MCE_TOGGLE_BIT 0x8000

        /*
         * On some receivers, mce keys decode to 0x8000f04xx and 0x8000f84xx
         * (the toggle bit flipping between alternating key presses), while
         * on other receivers, we see 0x8000f74xx and 0x8000ff4xx. To keep
         * the table trim, we always or in the bits to look up 0x8000ff4xx,
         * but we can't or them into all codes, as some keys are decoded in
         * a different way w/o the same use of the toggle bit...
         */
        if (scancode & 0x80000000)
                scancode = scancode | MCE_KEY_MASK | MCE_TOGGLE_BIT;

        ictx->rc_scancode = scancode;
        keycode = rc_g_keycode_from_table(ictx->rdev, scancode);

        /* not used in mce mode, but make sure we know its false */
        ictx->release_code = false;

        return keycode;
}

static u32 imon_panel_key_lookup(struct imon_context *ictx, u64 code)
{
        const struct imon_panel_key_table *key_table;
        u32 keycode = KEY_RESERVED;
        int i;

        key_table = ictx->dev_descr->key_table;

        for (i = 0; key_table[i].hw_code != 0; i++) {
                if (key_table[i].hw_code == (code | 0xffee)) {
                        keycode = key_table[i].keycode;
                        break;
                }
        }
        ictx->release_code = false;
        return keycode;
}

static bool imon_mouse_event(struct imon_context *ictx,
                             unsigned char *buf, int len)
{
        signed char rel_x = 0x00, rel_y = 0x00;
        u8 right_shift = 1;
        bool mouse_input = true;
        int dir = 0;
        unsigned long flags;

        spin_lock_irqsave(&ictx->kc_lock, flags);

        /* newer iMON device PAD or mouse button */
        if (ictx->product != 0xffdc && (buf[0] & 0x01) && len == 5) {
                rel_x = buf[2];
                rel_y = buf[3];
                right_shift = 1;
        /* 0xffdc iMON PAD or mouse button input */
        } else if (ictx->product == 0xffdc && (buf[0] & 0x40) &&
                        !((buf[1] & 0x01) || ((buf[1] >> 2) & 0x01))) {
                rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 |
                        (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6;
                if (buf[0] & 0x02)
                        rel_x |= ~0x0f;
                rel_x = rel_x + rel_x / 2;
                rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 |
                        (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6;
                if (buf[0] & 0x01)
                        rel_y |= ~0x0f;
                rel_y = rel_y + rel_y / 2;
                right_shift = 2;
        /* some ffdc devices decode mouse buttons differently... */
        } else if (ictx->product == 0xffdc && (buf[0] == 0x68)) {
                right_shift = 2;
        /* ch+/- buttons, which we use for an emulated scroll wheel */
        } else if (ictx->kc == KEY_CHANNELUP && (buf[2] & 0x40) != 0x40) {
                dir = 1;
        } else if (ictx->kc == KEY_CHANNELDOWN && (buf[2] & 0x40) != 0x40) {
                dir = -1;
        } else
                mouse_input = false;

        spin_unlock_irqrestore(&ictx->kc_lock, flags);

        if (mouse_input) {
                dev_dbg(ictx->dev, "sending mouse data via input subsystem\n");

                if (dir) {
                        input_report_rel(ictx->idev, REL_WHEEL, dir);
                } else if (rel_x || rel_y) {
                        input_report_rel(ictx->idev, REL_X, rel_x);
                        input_report_rel(ictx->idev, REL_Y, rel_y);
                } else {
                        input_report_key(ictx->idev, BTN_LEFT, buf[1] & 0x1);
                        input_report_key(ictx->idev, BTN_RIGHT,
                                         buf[1] >> right_shift & 0x1);
                }
                input_sync(ictx->idev);
                spin_lock_irqsave(&ictx->kc_lock, flags);
                ictx->last_keycode = ictx->kc;
                spin_unlock_irqrestore(&ictx->kc_lock, flags);
        }

        return mouse_input;
}

static void imon_touch_event(struct imon_context *ictx, unsigned char *buf)
{
        mod_timer(&ictx->ttimer, jiffies + TOUCH_TIMEOUT);
        ictx->touch_x = (buf[0] << 4) | (buf[1] >> 4);
        ictx->touch_y = 0xfff - ((buf[2] << 4) | (buf[1] & 0xf));
        input_report_abs(ictx->touch, ABS_X, ictx->touch_x);
        input_report_abs(ictx->touch, ABS_Y, ictx->touch_y);
        input_report_key(ictx->touch, BTN_TOUCH, 0x01);
        input_sync(ictx->touch);
}

static void imon_pad_to_keys(struct imon_context *ictx, unsigned char *buf)
{
        int dir = 0;
        signed char rel_x = 0x00, rel_y = 0x00;
        u16 timeout, threshold;
        u32 scancode = KEY_RESERVED;
        unsigned long flags;

        /*
         * The imon directional pad functions more like a touchpad. Bytes 3 & 4
         * contain a position coordinate (x,y), with each component ranging
         * from -14 to 14. We want to down-sample this to only 4 discrete values
         * for up/down/left/right arrow keys. Also, when you get too close to
         * diagonals, it has a tendency to jump back and forth, so lets try to
         * ignore when they get too close.
         */
        if (ictx->product != 0xffdc) {
                /* first, pad to 8 bytes so it conforms with everything else */
                buf[5] = buf[6] = buf[7] = 0;
                timeout = 500;        /* in msecs */
                /* (2*threshold) x (2*threshold) square */
                threshold = pad_thresh ? pad_thresh : 28;
                rel_x = buf[2];
                rel_y = buf[3];

                if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) {
                        if ((buf[1] == 0) && ((rel_x != 0) || (rel_y != 0))) {
                                dir = stabilize((int)rel_x, (int)rel_y,
                                                timeout, threshold);
                                if (!dir) {
                                        spin_lock_irqsave(&ictx->kc_lock,
                                                          flags);
                                        ictx->kc = KEY_UNKNOWN;
                                        spin_unlock_irqrestore(&ictx->kc_lock,
                                                               flags);
                                        return;
                                }
                                buf[2] = dir & 0xFF;
                                buf[3] = (dir >> 8) & 0xFF;
                                scancode = be32_to_cpu(*((__be32 *)buf));
                        }
                } else {
                        /*
                         * Hack alert: instead of using keycodes, we have
                         * to use hard-coded scancodes here...
                         */
                        if (abs(rel_y) > abs(rel_x)) {
                                buf[2] = (rel_y > 0) ? 0x7F : 0x80;
                                buf[3] = 0;
                                if (rel_y > 0)
                                        scancode = 0x01007f00; /* KEY_DOWN */
                                else
                                        scancode = 0x01008000; /* KEY_UP */
                        } else {
                                buf[2] = 0;
                                buf[3] = (rel_x > 0) ? 0x7F : 0x80;
                                if (rel_x > 0)
                                        scancode = 0x0100007f; /* KEY_RIGHT */
                                else
                                        scancode = 0x01000080; /* KEY_LEFT */
                        }
                }

        /*
         * Handle on-board decoded pad events for e.g. older VFD/iMON-Pad
         * device (15c2:ffdc). The remote generates various codes from
         * 0x68nnnnB7 to 0x6AnnnnB7, the left mouse button generates
         * 0x688301b7 and the right one 0x688481b7. All other keys generate
         * 0x2nnnnnnn. Position coordinate is encoded in buf[1] and buf[2] with
         * reversed endianness. Extract direction from buffer, rotate endianness,
         * adjust sign and feed the values into stabilize(). The resulting codes
         * will be 0x01008000, 0x01007F00, which match the newer devices.
         */
        } else {
                timeout = 10;        /* in msecs */
                /* (2*threshold) x (2*threshold) square */
                threshold = pad_thresh ? pad_thresh : 15;

                /* buf[1] is x */
                rel_x = (buf[1] & 0x08) | (buf[1] & 0x10) >> 2 |
                        (buf[1] & 0x20) >> 4 | (buf[1] & 0x40) >> 6;
                if (buf[0] & 0x02)
                        rel_x |= ~0x10+1;
                /* buf[2] is y */
                rel_y = (buf[2] & 0x08) | (buf[2] & 0x10) >> 2 |
                        (buf[2] & 0x20) >> 4 | (buf[2] & 0x40) >> 6;
                if (buf[0] & 0x01)
                        rel_y |= ~0x10+1;

                buf[0] = 0x01;
                buf[1] = buf[4] = buf[5] = buf[6] = buf[7] = 0;

                if (ictx->rc_proto == RC_PROTO_BIT_IMON && pad_stabilize) {
                        dir = stabilize((int)rel_x, (int)rel_y,
                                        timeout, threshold);
                        if (!dir) {
                                spin_lock_irqsave(&ictx->kc_lock, flags);
                                ictx->kc = KEY_UNKNOWN;
                                spin_unlock_irqrestore(&ictx->kc_lock, flags);
                                return;
                        }
                        buf[2] = dir & 0xFF;
                        buf[3] = (dir >> 8) & 0xFF;
                        scancode = be32_to_cpu(*((__be32 *)buf));
                } else {
                        /*
                         * Hack alert: instead of using keycodes, we have
                         * to use hard-coded scancodes here...
                         */
                        if (abs(rel_y) > abs(rel_x)) {
                                buf[2] = (rel_y > 0) ? 0x7F : 0x80;
                                buf[3] = 0;
                                if (rel_y > 0)
                                        scancode = 0x01007f00; /* KEY_DOWN */
                                else
                                        scancode = 0x01008000; /* KEY_UP */
                        } else {
                                buf[2] = 0;
                                buf[3] = (rel_x > 0) ? 0x7F : 0x80;
                                if (rel_x > 0)
                                        scancode = 0x0100007f; /* KEY_RIGHT */
                                else
                                        scancode = 0x01000080; /* KEY_LEFT */
                        }
                }
        }

        if (scancode) {
                spin_lock_irqsave(&ictx->kc_lock, flags);
                ictx->kc = imon_remote_key_lookup(ictx, scancode);
                spin_unlock_irqrestore(&ictx->kc_lock, flags);
        }
}

/*
 * figure out if these is a press or a release. We don't actually
 * care about repeats, as those will be auto-generated within the IR
 * subsystem for repeating scancodes.
 */
static int imon_parse_press_type(struct imon_context *ictx,
                                 unsigned char *buf, u8 ktype)
{
        int press_type = 0;
        unsigned long flags;

        spin_lock_irqsave(&ictx->kc_lock, flags);

        /* key release of 0x02XXXXXX key */
        if (ictx->kc == KEY_RESERVED && buf[0] == 0x02 && buf[3] == 0x00)
                ictx->kc = ictx->last_keycode;

        /* mouse button release on (some) 0xffdc devices */
        else if (ictx->kc == KEY_RESERVED && buf[0] == 0x68 && buf[1] == 0x82 &&
                 buf[2] == 0x81 && buf[3] == 0xb7)
                ictx->kc = ictx->last_keycode;

        /* mouse button release on (some other) 0xffdc devices */
        else if (ictx->kc == KEY_RESERVED && buf[0] == 0x01 && buf[1] == 0x00 &&
                 buf[2] == 0x81 && buf[3] == 0xb7)
                ictx->kc = ictx->last_keycode;

        /* mce-specific button handling, no keyup events */
        else if (ktype == IMON_KEY_MCE) {
                ictx->rc_toggle = buf[2];
                press_type = 1;

        /* incoherent or irrelevant data */
        } else if (ictx->kc == KEY_RESERVED)
                press_type = -EINVAL;

        /* key release of 0xXXXXXXb7 key */
        else if (ictx->release_code)
                press_type = 0;

        /* this is a button press */
        else
                press_type = 1;

        spin_unlock_irqrestore(&ictx->kc_lock, flags);

        return press_type;
}

/*
 * Process the incoming packet
 */
static void imon_incoming_packet(struct imon_context *ictx,
                                 struct urb *urb, int intf)
{
        int len = urb->actual_length;
        unsigned char *buf = urb->transfer_buffer;
        struct device *dev = ictx->dev;
        unsigned long flags;
        u32 kc;
        u64 scancode;
        int press_type = 0;
        ktime_t t;
        static ktime_t prev_time;
        u8 ktype;

        /* filter out junk data on the older 0xffdc imon devices */
        if ((buf[0] == 0xff) && (buf[1] == 0xff) && (buf[2] == 0xff))
                return;

        /* Figure out what key was pressed */
        if (len == 8 && buf[7] == 0xee) {
                scancode = be64_to_cpu(*((__be64 *)buf));
                ktype = IMON_KEY_PANEL;
                kc = imon_panel_key_lookup(ictx, scancode);
                ictx->release_code = false;
        } else {
                scancode = be32_to_cpu(*((__be32 *)buf));
                if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE) {
                        ktype = IMON_KEY_IMON;
                        if (buf[0] == 0x80)
                                ktype = IMON_KEY_MCE;
                        kc = imon_mce_key_lookup(ictx, scancode);
                } else {
                        ktype = IMON_KEY_IMON;
                        kc = imon_remote_key_lookup(ictx, scancode);
                }
        }

        spin_lock_irqsave(&ictx->kc_lock, flags);
        /* keyboard/mouse mode toggle button */
        if (kc == KEY_KEYBOARD && !ictx->release_code) {
                ictx->last_keycode = kc;
                if (!nomouse) {
                        ictx->pad_mouse = !ictx->pad_mouse;
                        dev_dbg(dev, "toggling to %s mode\n",
                                ictx->pad_mouse ? "mouse" : "keyboard");
                        spin_unlock_irqrestore(&ictx->kc_lock, flags);
                        return;
                } else {
                        ictx->pad_mouse = false;
                        dev_dbg(dev, "mouse mode disabled, passing key value\n");
                }
        }

        ictx->kc = kc;
        spin_unlock_irqrestore(&ictx->kc_lock, flags);

        /* send touchscreen events through input subsystem if touchpad data */
        if (ictx->touch && len == 8 && buf[7] == 0x86) {
                imon_touch_event(ictx, buf);
                return;

        /* look for mouse events with pad in mouse mode */
        } else if (ictx->pad_mouse) {
                if (imon_mouse_event(ictx, buf, len))
                        return;
        }

        /* Now for some special handling to convert pad input to arrow keys */
        if (((len == 5) && (buf[0] == 0x01) && (buf[4] == 0x00)) ||
            ((len == 8) && (buf[0] & 0x40) &&
             !(buf[1] & 0x1 || buf[1] >> 2 & 0x1))) {
                len = 8;
                imon_pad_to_keys(ictx, buf);
        }

        if (debug) {
                printk(KERN_INFO "intf%d decoded packet: %*ph\n",
                       intf, len, buf);
        }

        press_type = imon_parse_press_type(ictx, buf, ktype);
        if (press_type < 0)
                goto not_input_data;

        if (ktype != IMON_KEY_PANEL) {
                if (press_type == 0)
                        rc_keyup(ictx->rdev);
                else {
                        enum rc_proto proto;

                        if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE)
                                proto = RC_PROTO_RC6_MCE;
                        else if (ictx->rc_proto == RC_PROTO_BIT_IMON)
                                proto = RC_PROTO_IMON;
                        else
                                return;

                        rc_keydown(ictx->rdev, proto, ictx->rc_scancode,
                                   ictx->rc_toggle);

                        spin_lock_irqsave(&ictx->kc_lock, flags);
                        ictx->last_keycode = ictx->kc;
                        spin_unlock_irqrestore(&ictx->kc_lock, flags);
                }
                return;
        }

        /* Only panel type events left to process now */
        spin_lock_irqsave(&ictx->kc_lock, flags);

        t = ktime_get();
        /* KEY repeats from knob and panel that need to be suppressed */
        if (ictx->kc == KEY_MUTE ||
            ictx->dev_descr->flags & IMON_SUPPRESS_REPEATED_KEYS) {
                if (ictx->kc == ictx->last_keycode &&
                    ktime_ms_delta(t, prev_time) < ictx->idev->rep[REP_DELAY]) {
                        spin_unlock_irqrestore(&ictx->kc_lock, flags);
                        return;
                }
        }

        prev_time = t;
        kc = ictx->kc;

        spin_unlock_irqrestore(&ictx->kc_lock, flags);

        input_report_key(ictx->idev, kc, press_type);
        input_sync(ictx->idev);

        /* panel keys don't generate a release */
        input_report_key(ictx->idev, kc, 0);
        input_sync(ictx->idev);

        spin_lock_irqsave(&ictx->kc_lock, flags);
        ictx->last_keycode = kc;
        spin_unlock_irqrestore(&ictx->kc_lock, flags);

        return;

not_input_data:
        if (len != 8) {
                dev_warn(dev, "imon %s: invalid incoming packet size (len = %d, intf%d)\n",
                         __func__, len, intf);
                return;
        }

        /* iMON 2.4G associate frame */
        if (buf[0] == 0x00 &&
            buf[2] == 0xFF &&                                /* REFID */
            buf[3] == 0xFF &&
            buf[4] == 0xFF &&
            buf[5] == 0xFF &&                                /* iMON 2.4G */
           ((buf[6] == 0x4E && buf[7] == 0xDF) ||        /* LT */
            (buf[6] == 0x5E && buf[7] == 0xDF))) {        /* DT */
                dev_warn(dev, "%s: remote associated refid=%02X\n",
                         __func__, buf[1]);
                ictx->rf_isassociating = false;
        }
}

/*
 * Callback function for USB core API: receive data
 */
static void usb_rx_callback_intf0(struct urb *urb)
{
        struct imon_context *ictx;
        int intfnum = 0;

        if (!urb)
                return;

        ictx = (struct imon_context *)urb->context;
        if (!ictx)
                return;

        /*
         * if we get a callback before we're done configuring the hardware, we
         * can't yet process the data, as there's nowhere to send it, but we
         * still need to submit a new rx URB to avoid wedging the hardware
         */
        if (!ictx->dev_present_intf0)
                goto out;

        switch (urb->status) {
        case -ENOENT:                /* usbcore unlink successful! */
                return;

        case -ESHUTDOWN:        /* transport endpoint was shut down */
                break;

        case 0:
                imon_incoming_packet(ictx, urb, intfnum);
                break;

        default:
                dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
                         __func__, urb->status);
                break;
        }

out:
        usb_submit_urb(ictx->rx_urb_intf0, GFP_ATOMIC);
}

static void usb_rx_callback_intf1(struct urb *urb)
{
        struct imon_context *ictx;
        int intfnum = 1;

        if (!urb)
                return;

        ictx = (struct imon_context *)urb->context;
        if (!ictx)
                return;

        /*
         * if we get a callback before we're done configuring the hardware, we
         * can't yet process the data, as there's nowhere to send it, but we
         * still need to submit a new rx URB to avoid wedging the hardware
         */
        if (!ictx->dev_present_intf1)
                goto out;

        switch (urb->status) {
        case -ENOENT:                /* usbcore unlink successful! */
                return;

        case -ESHUTDOWN:        /* transport endpoint was shut down */
                break;

        case 0:
                imon_incoming_packet(ictx, urb, intfnum);
                break;

        default:
                dev_warn(ictx->dev, "imon %s: status(%d): ignored\n",
                         __func__, urb->status);
                break;
        }

out:
        usb_submit_urb(ictx->rx_urb_intf1, GFP_ATOMIC);
}

/*
 * The 0x15c2:0xffdc device ID was used for umpteen different imon
 * devices, and all of them constantly spew interrupts, even when there
 * is no actual data to report. However, byte 6 of this buffer looks like
 * its unique across device variants, so we're trying to key off that to
 * figure out which display type (if any) and what IR protocol the device
 * actually supports. These devices have their IR protocol hard-coded into
 * their firmware, they can't be changed on the fly like the newer hardware.
 */
static void imon_get_ffdc_type(struct imon_context *ictx)
{
        u8 ffdc_cfg_byte = ictx->usb_rx_buf[6];
        u8 detected_display_type = IMON_DISPLAY_TYPE_NONE;
        u64 allowed_protos = RC_PROTO_BIT_IMON;

        switch (ffdc_cfg_byte) {
        /* iMON Knob, no display, iMON IR + vol knob */
        case 0x21:
                dev_info(ictx->dev, "0xffdc iMON Knob, iMON IR");
                ictx->display_supported = false;
                break;
        /* iMON 2.4G LT (usb stick), no display, iMON RF */
        case 0x4e:
                dev_info(ictx->dev, "0xffdc iMON 2.4G LT, iMON RF");
                ictx->display_supported = false;
                ictx->rf_device = true;
                break;
        /* iMON VFD, no IR (does have vol knob tho) */
        case 0x35:
                dev_info(ictx->dev, "0xffdc iMON VFD + knob, no IR");
                detected_display_type = IMON_DISPLAY_TYPE_VFD;
                break;
        /* iMON VFD, iMON IR */
        case 0x24:
        case 0x30:
        case 0x85:
                dev_info(ictx->dev, "0xffdc iMON VFD, iMON IR");
                detected_display_type = IMON_DISPLAY_TYPE_VFD;
                break;
        /* iMON VFD, MCE IR */
        case 0x46:
        case 0x9e:
                dev_info(ictx->dev, "0xffdc iMON VFD, MCE IR");
                detected_display_type = IMON_DISPLAY_TYPE_VFD;
                allowed_protos = RC_PROTO_BIT_RC6_MCE;
                break;
        /* iMON VFD, iMON or MCE IR */
        case 0x7e:
                dev_info(ictx->dev, "0xffdc iMON VFD, iMON or MCE IR");
                detected_display_type = IMON_DISPLAY_TYPE_VFD;
                allowed_protos |= RC_PROTO_BIT_RC6_MCE;
                break;
        /* iMON LCD, MCE IR */
        case 0x9f:
                dev_info(ictx->dev, "0xffdc iMON LCD, MCE IR");
                detected_display_type = IMON_DISPLAY_TYPE_LCD;
                allowed_protos = RC_PROTO_BIT_RC6_MCE;
                break;
        /* no display, iMON IR */
        case 0x26:
                dev_info(ictx->dev, "0xffdc iMON Inside, iMON IR");
                ictx->display_supported = false;
                break;
        /* Soundgraph iMON UltraBay */
        case 0x98:
                dev_info(ictx->dev, "0xffdc iMON UltraBay, LCD + IR");
                detected_display_type = IMON_DISPLAY_TYPE_LCD;
                allowed_protos = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE;
                ictx->dev_descr = &ultrabay_table;
                break;

        default:
                dev_info(ictx->dev, "Unknown 0xffdc device, defaulting to VFD and iMON IR");
                detected_display_type = IMON_DISPLAY_TYPE_VFD;
                /*
                 * We don't know which one it is, allow user to set the
                 * RC6 one from userspace if IMON wasn't correct.
                 */
                allowed_protos |= RC_PROTO_BIT_RC6_MCE;
                break;
        }

        printk(KERN_CONT " (id 0x%02x)\n", ffdc_cfg_byte);

        ictx->display_type = detected_display_type;
        ictx->rc_proto = allowed_protos;
}

static void imon_set_display_type(struct imon_context *ictx)
{
        u8 configured_display_type = IMON_DISPLAY_TYPE_VFD;

        /*
         * Try to auto-detect the type of display if the user hasn't set
         * it by hand via the display_type modparam. Default is VFD.
         */

        if (display_type == IMON_DISPLAY_TYPE_AUTO) {
                switch (ictx->product) {
                case 0xffdc:
                        /* set in imon_get_ffdc_type() */
                        configured_display_type = ictx->display_type;
                        break;
                case 0x0034:
                case 0x0035:
                        configured_display_type = IMON_DISPLAY_TYPE_VGA;
                        break;
                case 0x0038:
                case 0x0039:
                case 0x0045:
                        configured_display_type = IMON_DISPLAY_TYPE_LCD;
                        break;
                case 0x003c:
                case 0x0041:
                case 0x0042:
                case 0x0043:
                        configured_display_type = IMON_DISPLAY_TYPE_NONE;
                        ictx->display_supported = false;
                        break;
                case 0x0036:
                case 0x0044:
                default:
                        configured_display_type = IMON_DISPLAY_TYPE_VFD;
                        break;
                }
        } else {
                configured_display_type = display_type;
                if (display_type == IMON_DISPLAY_TYPE_NONE)
                        ictx->display_supported = false;
                else
                        ictx->display_supported = true;
                dev_info(ictx->dev, "%s: overriding display type to %d via modparam\n",
                         __func__, display_type);
        }

        ictx->display_type = configured_display_type;
}

static struct rc_dev *imon_init_rdev(struct imon_context *ictx)
{
        struct rc_dev *rdev;
        int ret;
        static const unsigned char fp_packet[] = {
                0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88 };

        rdev = rc_allocate_device(RC_DRIVER_SCANCODE);
        if (!rdev) {
                dev_err(ictx->dev, "remote control dev allocation failed\n");
                goto out;
        }

        snprintf(ictx->name_rdev, sizeof(ictx->name_rdev),
                 "iMON Remote (%04x:%04x)", ictx->vendor, ictx->product);
        usb_make_path(ictx->usbdev_intf0, ictx->phys_rdev,
                      sizeof(ictx->phys_rdev));
        strlcat(ictx->phys_rdev, "/input0", sizeof(ictx->phys_rdev));

        rdev->device_name = ictx->name_rdev;
        rdev->input_phys = ictx->phys_rdev;
        usb_to_input_id(ictx->usbdev_intf0, &rdev->input_id);
        rdev->dev.parent = ictx->dev;

        rdev->priv = ictx;
        /* iMON PAD or MCE */
        rdev->allowed_protocols = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE;
        rdev->change_protocol = imon_ir_change_protocol;
        rdev->driver_name = MOD_NAME;

        /* Enable front-panel buttons and/or knobs */
        memcpy(ictx->usb_tx_buf, &fp_packet, sizeof(fp_packet));
        ret = send_packet(ictx);
        /* Not fatal, but warn about it */
        if (ret)
                dev_info(ictx->dev, "panel buttons/knobs setup failed\n");

        if (ictx->product == 0xffdc) {
                imon_get_ffdc_type(ictx);
                rdev->allowed_protocols = ictx->rc_proto;
        }

        imon_set_display_type(ictx);

        if (ictx->rc_proto == RC_PROTO_BIT_RC6_MCE)
                rdev->map_name = RC_MAP_IMON_MCE;
        else
                rdev->map_name = RC_MAP_IMON_PAD;

        ret = rc_register_device(rdev);
        if (ret < 0) {
                dev_err(ictx->dev, "remote input dev register failed\n");
                goto out;
        }

        return rdev;

out:
        rc_free_device(rdev);
        return NULL;
}

static struct input_dev *imon_init_idev(struct imon_context *ictx)
{
        const struct imon_panel_key_table *key_table;
        struct input_dev *idev;
        int ret, i;

        key_table = ictx->dev_descr->key_table;

        idev = input_allocate_device();
        if (!idev)
                goto out;

        snprintf(ictx->name_idev, sizeof(ictx->name_idev),
                 "iMON Panel, Knob and Mouse(%04x:%04x)",
                 ictx->vendor, ictx->product);
        idev->name = ictx->name_idev;

        usb_make_path(ictx->usbdev_intf0, ictx->phys_idev,
                      sizeof(ictx->phys_idev));
        strlcat(ictx->phys_idev, "/input1", sizeof(ictx->phys_idev));
        idev->phys = ictx->phys_idev;

        idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL);

        idev->keybit[BIT_WORD(BTN_MOUSE)] =
                BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_RIGHT);
        idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y) |
                BIT_MASK(REL_WHEEL);

        /* panel and/or knob code support */
        for (i = 0; key_table[i].hw_code != 0; i++) {
                u32 kc = key_table[i].keycode;
                __set_bit(kc, idev->keybit);
        }

        usb_to_input_id(ictx->usbdev_intf0, &idev->id);
        idev->dev.parent = ictx->dev;
        input_set_drvdata(idev, ictx);

        ret = input_register_device(idev);
        if (ret < 0) {
                dev_err(ictx->dev, "input dev register failed\n");
                goto out;
        }

        return idev;

out:
        input_free_device(idev);
        return NULL;
}

static struct input_dev *imon_init_touch(struct imon_context *ictx)
{
        struct input_dev *touch;
        int ret;

        touch = input_allocate_device();
        if (!touch)
                goto touch_alloc_failed;

        snprintf(ictx->name_touch, sizeof(ictx->name_touch),
                 "iMON USB Touchscreen (%04x:%04x)",
                 ictx->vendor, ictx->product);
        touch->name = ictx->name_touch;

        usb_make_path(ictx->usbdev_intf1, ictx->phys_touch,
                      sizeof(ictx->phys_touch));
        strlcat(ictx->phys_touch, "/input2", sizeof(ictx->phys_touch));
        touch->phys = ictx->phys_touch;

        touch->evbit[0] =
                BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
        touch->keybit[BIT_WORD(BTN_TOUCH)] =
                BIT_MASK(BTN_TOUCH);
        input_set_abs_params(touch, ABS_X,
                             0x00, 0xfff, 0, 0);
        input_set_abs_params(touch, ABS_Y,
                             0x00, 0xfff, 0, 0);

        input_set_drvdata(touch, ictx);

        usb_to_input_id(ictx->usbdev_intf1, &touch->id);
        touch->dev.parent = ictx->dev;
        ret = input_register_device(touch);
        if (ret <  0) {
                dev_info(ictx->dev, "touchscreen input dev register failed\n");
                goto touch_register_failed;
        }

        return touch;

touch_register_failed:
        input_free_device(touch);

touch_alloc_failed:
        return NULL;
}

static bool imon_find_endpoints(struct imon_context *ictx,
                                struct usb_host_interface *iface_desc)
{
        struct usb_endpoint_descriptor *ep;
        struct usb_endpoint_descriptor *rx_endpoint = NULL;
        struct usb_endpoint_descriptor *tx_endpoint = NULL;
        int ifnum = iface_desc->desc.bInterfaceNumber;
        int num_endpts = iface_desc->desc.bNumEndpoints;
        int i, ep_dir, ep_type;
        bool ir_ep_found = false;
        bool display_ep_found = false;
        bool tx_control = false;

        /*
         * Scan the endpoint list and set:
         *        first input endpoint = IR endpoint
         *        first output endpoint = display endpoint
         */
        for (i = 0; i < num_endpts && !(ir_ep_found && display_ep_found); ++i) {
                ep = &iface_desc->endpoint[i].desc;
                ep_dir = ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK;
                ep_type = usb_endpoint_type(ep);

                if (!ir_ep_found && ep_dir == USB_DIR_IN &&
                    ep_type == USB_ENDPOINT_XFER_INT) {

                        rx_endpoint = ep;
                        ir_ep_found = true;
                        dev_dbg(ictx->dev, "%s: found IR endpoint\n", __func__);

                } else if (!display_ep_found && ep_dir == USB_DIR_OUT &&
                           ep_type == USB_ENDPOINT_XFER_INT) {
                        tx_endpoint = ep;
                        display_ep_found = true;
                        dev_dbg(ictx->dev, "%s: found display endpoint\n", __func__);
                }
        }

        if (ifnum == 0) {
                ictx->rx_endpoint_intf0 = rx_endpoint;
                /*
                 * tx is used to send characters to lcd/vfd, associate RF
                 * remotes, set IR protocol, and maybe more...
                 */
                ictx->tx_endpoint = tx_endpoint;
        } else {
                ictx->rx_endpoint_intf1 = rx_endpoint;
        }

        /*
         * If we didn't find a display endpoint, this is probably one of the
         * newer iMON devices that use control urb instead of interrupt
         */
        if (!display_ep_found) {
                tx_control = true;
                display_ep_found = true;
                dev_dbg(ictx->dev, "%s: device uses control endpoint, not interface OUT endpoint\n",
                        __func__);
        }

        /*
         * Some iMON receivers have no display. Unfortunately, it seems
         * that SoundGraph recycles device IDs between devices both with
         * and without... :\
         */
        if (ictx->display_type == IMON_DISPLAY_TYPE_NONE) {
                display_ep_found = false;
                dev_dbg(ictx->dev, "%s: device has no display\n", __func__);
        }

        /*
         * iMON Touch devices have a VGA touchscreen, but no "display", as
         * that refers to e.g. /dev/lcd0 (a character device LCD or VFD).
         */
        if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) {
                display_ep_found = false;
                dev_dbg(ictx->dev, "%s: iMON Touch device found\n", __func__);
        }

        /* Input endpoint is mandatory */
        if (!ir_ep_found)
                pr_err("no valid input (IR) endpoint found\n");

        ictx->tx_control = tx_control;

        if (display_ep_found)
                ictx->display_supported = true;

        return ir_ep_found;

}

static struct imon_context *imon_init_intf0(struct usb_interface *intf,
                                            const struct usb_device_id *id)
{
        struct imon_context *ictx;
        struct urb *rx_urb;
        struct urb *tx_urb;
        struct device *dev = &intf->dev;
        struct usb_host_interface *iface_desc;
        int ret = -ENOMEM;

        ictx = kzalloc(sizeof(*ictx), GFP_KERNEL);
        if (!ictx)
                goto exit;

        rx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!rx_urb)
                goto rx_urb_alloc_failed;
        tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!tx_urb)
                goto tx_urb_alloc_failed;

        mutex_init(&ictx->lock);
        spin_lock_init(&ictx->kc_lock);

        mutex_lock(&ictx->lock);

        ictx->dev = dev;
        ictx->usbdev_intf0 = usb_get_dev(interface_to_usbdev(intf));
        ictx->rx_urb_intf0 = rx_urb;
        ictx->tx_urb = tx_urb;
        ictx->rf_device = false;

        init_completion(&ictx->tx.finished);

        ictx->vendor  = le16_to_cpu(ictx->usbdev_intf0->descriptor.idVendor);
        ictx->product = le16_to_cpu(ictx->usbdev_intf0->descriptor.idProduct);

        /* save drive info for later accessing the panel/knob key table */
        ictx->dev_descr = (struct imon_usb_dev_descr *)id->driver_info;
        /* default send_packet delay is 5ms but some devices need more */
        ictx->send_packet_delay = ictx->dev_descr->flags &
                                  IMON_NEED_20MS_PKT_DELAY ? 20 : 5;

        ret = -ENODEV;
        iface_desc = intf->cur_altsetting;
        if (!imon_find_endpoints(ictx, iface_desc)) {
                goto find_endpoint_failed;
        }

        usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0,
                usb_rcvintpipe(ictx->usbdev_intf0,
                        ictx->rx_endpoint_intf0->bEndpointAddress),
                ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf),
                usb_rx_callback_intf0, ictx,
                ictx->rx_endpoint_intf0->bInterval);

        ret = usb_submit_urb(ictx->rx_urb_intf0, GFP_KERNEL);
        if (ret) {
                pr_err("usb_submit_urb failed for intf0 (%d)\n", ret);
                goto urb_submit_failed;
        }

        ictx->idev = imon_init_idev(ictx);
        if (!ictx->idev) {
                dev_err(dev, "%s: input device setup failed\n", __func__);
                goto idev_setup_failed;
        }

        ictx->rdev = imon_init_rdev(ictx);
        if (!ictx->rdev) {
                dev_err(dev, "%s: rc device setup failed\n", __func__);
                goto rdev_setup_failed;
        }

        ictx->dev_present_intf0 = true;

        mutex_unlock(&ictx->lock);
        return ictx;

rdev_setup_failed:
        input_unregister_device(ictx->idev);
idev_setup_failed:
        usb_kill_urb(ictx->rx_urb_intf0);
urb_submit_failed:
find_endpoint_failed:
        usb_put_dev(ictx->usbdev_intf0);
        mutex_unlock(&ictx->lock);
        usb_free_urb(tx_urb);
tx_urb_alloc_failed:
        usb_free_urb(rx_urb);
rx_urb_alloc_failed:
        kfree(ictx);
exit:
        dev_err(dev, "unable to initialize intf0, err %d\n", ret);

        return NULL;
}

static struct imon_context *imon_init_intf1(struct usb_interface *intf,
                                            struct imon_context *ictx)
{
        struct urb *rx_urb;
        struct usb_host_interface *iface_desc;
        int ret = -ENOMEM;

        rx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!rx_urb)
                goto rx_urb_alloc_failed;

        mutex_lock(&ictx->lock);

        if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) {
                timer_setup(&ictx->ttimer, imon_touch_display_timeout, 0);
        }

        ictx->usbdev_intf1 = usb_get_dev(interface_to_usbdev(intf));
        ictx->rx_urb_intf1 = rx_urb;

        ret = -ENODEV;
        iface_desc = intf->cur_altsetting;
        if (!imon_find_endpoints(ictx, iface_desc))
                goto find_endpoint_failed;

        if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) {
                ictx->touch = imon_init_touch(ictx);
                if (!ictx->touch)
                        goto touch_setup_failed;
        } else
                ictx->touch = NULL;

        usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1,
                usb_rcvintpipe(ictx->usbdev_intf1,
                        ictx->rx_endpoint_intf1->bEndpointAddress),
                ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf),
                usb_rx_callback_intf1, ictx,
                ictx->rx_endpoint_intf1->bInterval);

        ret = usb_submit_urb(ictx->rx_urb_intf1, GFP_KERNEL);

        if (ret) {
                pr_err("usb_submit_urb failed for intf1 (%d)\n", ret);
                goto urb_submit_failed;
        }

        ictx->dev_present_intf1 = true;

        mutex_unlock(&ictx->lock);
        return ictx;

urb_submit_failed:
        if (ictx->touch)
                input_unregister_device(ictx->touch);
touch_setup_failed:
find_endpoint_failed:
        usb_put_dev(ictx->usbdev_intf1);
        ictx->usbdev_intf1 = NULL;
        mutex_unlock(&ictx->lock);
        usb_free_urb(rx_urb);
        ictx->rx_urb_intf1 = NULL;
rx_urb_alloc_failed:
        dev_err(ictx->dev, "unable to initialize intf1, err %d\n", ret);

        return NULL;
}

static void imon_init_display(struct imon_context *ictx,
                              struct usb_interface *intf)
{
        int ret;

        dev_dbg(ictx->dev, "Registering iMON display with sysfs\n");

        /* set up sysfs entry for built-in clock */
        ret = sysfs_create_group(&intf->dev.kobj, &imon_display_attr_group);
        if (ret)
                dev_err(ictx->dev, "Could not create display sysfs entries(%d)",
                        ret);

        if (ictx->display_type == IMON_DISPLAY_TYPE_LCD)
                ret = usb_register_dev(intf, &imon_lcd_class);
        else
                ret = usb_register_dev(intf, &imon_vfd_class);
        if (ret)
                /* Not a fatal error, so ignore */
                dev_info(ictx->dev, "could not get a minor number for display\n");

}

/*
 * Callback function for USB core API: Probe
 */
static int imon_probe(struct usb_interface *interface,
                      const struct usb_device_id *id)
{
        struct usb_device *usbdev = NULL;
        struct usb_host_interface *iface_desc = NULL;
        struct usb_interface *first_if;
        struct device *dev = &interface->dev;
        int ifnum, sysfs_err;
        int ret = 0;
        struct imon_context *ictx = NULL;
        u16 vendor, product;

        usbdev     = usb_get_dev(interface_to_usbdev(interface));
        iface_desc = interface->cur_altsetting;
        ifnum      = iface_desc->desc.bInterfaceNumber;
        vendor     = le16_to_cpu(usbdev->descriptor.idVendor);
        product    = le16_to_cpu(usbdev->descriptor.idProduct);

        dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n",
                __func__, vendor, product, ifnum);

        first_if = usb_ifnum_to_if(usbdev, 0);
        if (!first_if) {
                ret = -ENODEV;
                goto fail;
        }

        if (first_if->dev.driver != interface->dev.driver) {
                dev_err(&interface->dev, "inconsistent driver matching\n");
                ret = -EINVAL;
                goto fail;
        }

        if (ifnum == 0) {
                ictx = imon_init_intf0(interface, id);
                if (!ictx) {
                        pr_err("failed to initialize context!\n");
                        ret = -ENODEV;
                        goto fail;
                }
                refcount_set(&ictx->users, 1);

        } else {
                /* this is the secondary interface on the device */
                struct imon_context *first_if_ctx = usb_get_intfdata(first_if);

                /* fail early if first intf failed to register */
                if (!first_if_ctx) {
                        ret = -ENODEV;
                        goto fail;
                }

                ictx = imon_init_intf1(interface, first_if_ctx);
                if (!ictx) {
                        pr_err("failed to attach to context!\n");
                        ret = -ENODEV;
                        goto fail;
                }
                refcount_inc(&ictx->users);

        }

        usb_set_intfdata(interface, ictx);

        if (ifnum == 0) {
                if (product == 0xffdc && ictx->rf_device) {
                        sysfs_err = sysfs_create_group(&interface->dev.kobj,
                                                       &imon_rf_attr_group);
                        if (sysfs_err)
                                pr_err("Could not create RF sysfs entries(%d)\n",
                                       sysfs_err);
                }

                if (ictx->display_supported)
                        imon_init_display(ictx, interface);
        }

        dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n",
                 vendor, product, ifnum,
                 usbdev->bus->busnum, usbdev->devnum);

        usb_put_dev(usbdev);

        return 0;

fail:
        usb_put_dev(usbdev);
        dev_err(dev, "unable to register, err %d\n", ret);

        return ret;
}

/*
 * Callback function for USB core API: disconnect
 */
static void imon_disconnect(struct usb_interface *interface)
{
        struct imon_context *ictx;
        struct device *dev;
        int ifnum;

        ictx = usb_get_intfdata(interface);
        ictx->disconnected = true;
        dev = ictx->dev;
        ifnum = interface->cur_altsetting->desc.bInterfaceNumber;

        /*
         * sysfs_remove_group is safe to call even if sysfs_create_group
         * hasn't been called
         */
        sysfs_remove_group(&interface->dev.kobj, &imon_display_attr_group);
        sysfs_remove_group(&interface->dev.kobj, &imon_rf_attr_group);

        usb_set_intfdata(interface, NULL);

        /* Abort ongoing write */
        if (ictx->tx.busy) {
                usb_kill_urb(ictx->tx_urb);
                complete(&ictx->tx.finished);
        }

        if (ifnum == 0) {
                ictx->dev_present_intf0 = false;
                usb_kill_urb(ictx->rx_urb_intf0);
                input_unregister_device(ictx->idev);
                rc_unregister_device(ictx->rdev);
                if (ictx->display_supported) {
                        if (ictx->display_type == IMON_DISPLAY_TYPE_LCD)
                                usb_deregister_dev(interface, &imon_lcd_class);
                        else if (ictx->display_type == IMON_DISPLAY_TYPE_VFD)
                                usb_deregister_dev(interface, &imon_vfd_class);
                }
                usb_put_dev(ictx->usbdev_intf0);
        } else {
                ictx->dev_present_intf1 = false;
                usb_kill_urb(ictx->rx_urb_intf1);
                if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) {
                        del_timer_sync(&ictx->ttimer);
                        input_unregister_device(ictx->touch);
                }
                usb_put_dev(ictx->usbdev_intf1);
        }

        if (refcount_dec_and_test(&ictx->users))
                free_imon_context(ictx);

        dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n",
                __func__, ifnum);
}

static int imon_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct imon_context *ictx = usb_get_intfdata(intf);
        int ifnum = intf->cur_altsetting->desc.bInterfaceNumber;

        if (ifnum == 0)
                usb_kill_urb(ictx->rx_urb_intf0);
        else
                usb_kill_urb(ictx->rx_urb_intf1);

        return 0;
}

static int imon_resume(struct usb_interface *intf)
{
        int rc = 0;
        struct imon_context *ictx = usb_get_intfdata(intf);
        int ifnum = intf->cur_altsetting->desc.bInterfaceNumber;

        if (ifnum == 0) {
                usb_fill_int_urb(ictx->rx_urb_intf0, ictx->usbdev_intf0,
                        usb_rcvintpipe(ictx->usbdev_intf0,
                                ictx->rx_endpoint_intf0->bEndpointAddress),
                        ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf),
                        usb_rx_callback_intf0, ictx,
                        ictx->rx_endpoint_intf0->bInterval);

                rc = usb_submit_urb(ictx->rx_urb_intf0, GFP_NOIO);

        } else {
                usb_fill_int_urb(ictx->rx_urb_intf1, ictx->usbdev_intf1,
                        usb_rcvintpipe(ictx->usbdev_intf1,
                                ictx->rx_endpoint_intf1->bEndpointAddress),
                        ictx->usb_rx_buf, sizeof(ictx->usb_rx_buf),
                        usb_rx_callback_intf1, ictx,
                        ictx->rx_endpoint_intf1->bInterval);

                rc = usb_submit_urb(ictx->rx_urb_intf1, GFP_NOIO);
        }

        return rc;
}

module_usb_driver(imon_driver);










  234 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_INTERNAL_H
#define __X86_KERNEL_FPU_INTERNAL_H

extern struct fpstate init_fpstate;

/* CPU feature check wrappers */
static __always_inline __pure bool use_xsave(void)
{
        return cpu_feature_enabled(X86_FEATURE_XSAVE);
}

static __always_inline __pure bool use_fxsr(void)
{
        return cpu_feature_enabled(X86_FEATURE_FXSR);
}

#ifdef CONFIG_X86_DEBUG_FPU
# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
#else
# define WARN_ON_FPU(x) ({ (void)(x); 0; })
#endif

/* Used in init.c */
extern void fpstate_init_user(struct fpstate *fpstate);
extern void fpstate_reset(struct fpu *fpu);

#endif





































































































































    1 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>
#include <linux/hid-debug.h>
#include <linux/input.h>
#include "hid-ids.h"

#include <linux/fb.h>
#include <linux/vmalloc.h>
#include <linux/backlight.h>
#include <linux/lcd.h>

#include <linux/leds.h>

#include <linux/seq_file.h>
#include <linux/debugfs.h>

#include <linux/completion.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <media/rc-core.h>

#include "hid-picolcd.h"


int picolcd_raw_cir(struct picolcd_data *data,
                struct hid_report *report, u8 *raw_data, int size)
{
        unsigned long flags;
        int i, w, sz;
        struct ir_raw_event rawir = {};

        /* ignore if rc_dev is NULL or status is shunned */
        spin_lock_irqsave(&data->lock, flags);
        if (!data->rc_dev || (data->status & PICOLCD_CIR_SHUN)) {
                spin_unlock_irqrestore(&data->lock, flags);
                return 1;
        }
        spin_unlock_irqrestore(&data->lock, flags);

        /* PicoLCD USB packets contain 16-bit intervals in network order,
         * with value negated for pulse. Intervals are in microseconds.
         *
         * Note: some userspace LIRC code for PicoLCD says negated values
         * for space - is it a matter of IR chip? (pulse for my TSOP2236)
         *
         * In addition, the first interval seems to be around 15000 + base
         * interval for non-first report of IR data - thus the quirk below
         * to get RC_CODE to understand Sony and JVC remotes I have at hand
         */
        sz = size > 0 ? min((int)raw_data[0], size-1) : 0;
        for (i = 0; i+1 < sz; i += 2) {
                w = (raw_data[i] << 8) | (raw_data[i+1]);
                rawir.pulse = !!(w & 0x8000);
                rawir.duration = rawir.pulse ? (65536 - w) : w;
                /* Quirk!! - see above */
                if (i == 0 && rawir.duration > 15000)
                        rawir.duration -= 15000;
                ir_raw_event_store(data->rc_dev, &rawir);
        }
        ir_raw_event_handle(data->rc_dev);

        return 1;
}

static int picolcd_cir_open(struct rc_dev *dev)
{
        struct picolcd_data *data = dev->priv;
        unsigned long flags;

        spin_lock_irqsave(&data->lock, flags);
        data->status &= ~PICOLCD_CIR_SHUN;
        spin_unlock_irqrestore(&data->lock, flags);
        return 0;
}

static void picolcd_cir_close(struct rc_dev *dev)
{
        struct picolcd_data *data = dev->priv;
        unsigned long flags;

        spin_lock_irqsave(&data->lock, flags);
        data->status |= PICOLCD_CIR_SHUN;
        spin_unlock_irqrestore(&data->lock, flags);
}

/* initialize CIR input device */
int picolcd_init_cir(struct picolcd_data *data, struct hid_report *report)
{
        struct rc_dev *rdev;
        int ret = 0;

        rdev = rc_allocate_device(RC_DRIVER_IR_RAW);
        if (!rdev)
                return -ENOMEM;

        rdev->priv             = data;
        rdev->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER;
        rdev->open             = picolcd_cir_open;
        rdev->close            = picolcd_cir_close;
        rdev->device_name      = data->hdev->name;
        rdev->input_phys       = data->hdev->phys;
        rdev->input_id.bustype = data->hdev->bus;
        rdev->input_id.vendor  = data->hdev->vendor;
        rdev->input_id.product = data->hdev->product;
        rdev->input_id.version = data->hdev->version;
        rdev->dev.parent       = &data->hdev->dev;
        rdev->driver_name      = PICOLCD_NAME;
        rdev->map_name         = RC_MAP_RC6_MCE;
        rdev->timeout          = MS_TO_US(100);
        rdev->rx_resolution    = 1;

        ret = rc_register_device(rdev);
        if (ret)
                goto err;
        data->rc_dev = rdev;
        return 0;

err:
        rc_free_device(rdev);
        return ret;
}

void picolcd_exit_cir(struct picolcd_data *data)
{
        struct rc_dev *rdev = data->rc_dev;

        data->rc_dev = NULL;
        rc_unregister_device(rdev);
}

















































































































































































   16 










   16 





























   16 




   16 



   16 























































































































































































































































































































































































































































































































































































































































































































































































































































































   16 


   16 


   16 






   10 
   16 




















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Video capture interface for Linux version 2
 *
 *        A generic video device interface for the LINUX operating system
 *        using a set of device structures/vectors for low level operations.
 *
 * Authors:        Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1)
 *              Mauro Carvalho Chehab <mchehab@kernel.org> (version 2)
 *
 * Fixes:        20000516  Claudio Matsuoka <claudio@conectiva.com>
 *                - Added procfs support
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/uaccess.h>

#include <media/v4l2-common.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-event.h>

#define VIDEO_NUM_DEVICES        256
#define VIDEO_NAME              "video4linux"

#define dprintk(fmt, arg...) do {                                        \
                printk(KERN_DEBUG pr_fmt("%s: " fmt),                        \
                       __func__, ##arg);                                \
} while (0)

/*
 *        sysfs stuff
 */

static ssize_t index_show(struct device *cd,
                          struct device_attribute *attr, char *buf)
{
        struct video_device *vdev = to_video_device(cd);

        return sprintf(buf, "%i\n", vdev->index);
}
static DEVICE_ATTR_RO(index);

static ssize_t dev_debug_show(struct device *cd,
                          struct device_attribute *attr, char *buf)
{
        struct video_device *vdev = to_video_device(cd);

        return sprintf(buf, "%i\n", vdev->dev_debug);
}

static ssize_t dev_debug_store(struct device *cd, struct device_attribute *attr,
                          const char *buf, size_t len)
{
        struct video_device *vdev = to_video_device(cd);
        int res = 0;
        u16 value;

        res = kstrtou16(buf, 0, &value);
        if (res)
                return res;

        vdev->dev_debug = value;
        return len;
}
static DEVICE_ATTR_RW(dev_debug);

static ssize_t name_show(struct device *cd,
                         struct device_attribute *attr, char *buf)
{
        struct video_device *vdev = to_video_device(cd);

        return sprintf(buf, "%.*s\n", (int)sizeof(vdev->name), vdev->name);
}
static DEVICE_ATTR_RO(name);

static struct attribute *video_device_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_dev_debug.attr,
        &dev_attr_index.attr,
        NULL,
};
ATTRIBUTE_GROUPS(video_device);

/*
 *        Active devices
 */
static struct video_device *video_devices[VIDEO_NUM_DEVICES];
static DEFINE_MUTEX(videodev_lock);
static DECLARE_BITMAP(devnode_nums[VFL_TYPE_MAX], VIDEO_NUM_DEVICES);

/* Device node utility functions */

/* Note: these utility functions all assume that vfl_type is in the range
   [0, VFL_TYPE_MAX-1]. */

#ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES
/* Return the bitmap corresponding to vfl_type. */
static inline unsigned long *devnode_bits(enum vfl_devnode_type vfl_type)
{
        /* Any types not assigned to fixed minor ranges must be mapped to
           one single bitmap for the purposes of finding a free node number
           since all those unassigned types use the same minor range. */
        int idx = (vfl_type > VFL_TYPE_RADIO) ? VFL_TYPE_MAX - 1 : vfl_type;

        return devnode_nums[idx];
}
#else
/* Return the bitmap corresponding to vfl_type. */
static inline unsigned long *devnode_bits(enum vfl_devnode_type vfl_type)
{
        return devnode_nums[vfl_type];
}
#endif

/* Mark device node number vdev->num as used */
static inline void devnode_set(struct video_device *vdev)
{
        set_bit(vdev->num, devnode_bits(vdev->vfl_type));
}

/* Mark device node number vdev->num as unused */
static inline void devnode_clear(struct video_device *vdev)
{
        clear_bit(vdev->num, devnode_bits(vdev->vfl_type));
}

/* Try to find a free device node number in the range [from, to> */
static inline int devnode_find(struct video_device *vdev, int from, int to)
{
        return find_next_zero_bit(devnode_bits(vdev->vfl_type), to, from);
}

struct video_device *video_device_alloc(void)
{
        return kzalloc(sizeof(struct video_device), GFP_KERNEL);
}
EXPORT_SYMBOL(video_device_alloc);

void video_device_release(struct video_device *vdev)
{
        kfree(vdev);
}
EXPORT_SYMBOL(video_device_release);

void video_device_release_empty(struct video_device *vdev)
{
        /* Do nothing */
        /* Only valid when the video_device struct is a static. */
}
EXPORT_SYMBOL(video_device_release_empty);

static inline void video_get(struct video_device *vdev)
{
        get_device(&vdev->dev);
}

static inline void video_put(struct video_device *vdev)
{
        put_device(&vdev->dev);
}

/* Called when the last user of the video device exits. */
static void v4l2_device_release(struct device *cd)
{
        struct video_device *vdev = to_video_device(cd);
        struct v4l2_device *v4l2_dev = vdev->v4l2_dev;

        mutex_lock(&videodev_lock);
        if (WARN_ON(video_devices[vdev->minor] != vdev)) {
                /* should not happen */
                mutex_unlock(&videodev_lock);
                return;
        }

        /* Free up this device for reuse */
        video_devices[vdev->minor] = NULL;

        /* Delete the cdev on this minor as well */
        cdev_del(vdev->cdev);
        /* Just in case some driver tries to access this from
           the release() callback. */
        vdev->cdev = NULL;

        /* Mark device node number as free */
        devnode_clear(vdev);

        mutex_unlock(&videodev_lock);

#if defined(CONFIG_MEDIA_CONTROLLER)
        if (v4l2_dev->mdev && vdev->vfl_dir != VFL_DIR_M2M) {
                /* Remove interfaces and interface links */
                media_devnode_remove(vdev->intf_devnode);
                if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN)
                        media_device_unregister_entity(&vdev->entity);
        }
#endif

        /* Do not call v4l2_device_put if there is no release callback set.
         * Drivers that have no v4l2_device release callback might free the
         * v4l2_dev instance in the video_device release callback below, so we
         * must perform this check here.
         *
         * TODO: In the long run all drivers that use v4l2_device should use the
         * v4l2_device release callback. This check will then be unnecessary.
         */
        if (v4l2_dev->release == NULL)
                v4l2_dev = NULL;

        /* Release video_device and perform other
           cleanups as needed. */
        vdev->release(vdev);

        /* Decrease v4l2_device refcount */
        if (v4l2_dev)
                v4l2_device_put(v4l2_dev);
}

static struct class video_class = {
        .name = VIDEO_NAME,
        .dev_groups = video_device_groups,
};

struct video_device *video_devdata(struct file *file)
{
        return video_devices[iminor(file_inode(file))];
}
EXPORT_SYMBOL(video_devdata);


/* Priority handling */

static inline bool prio_is_valid(enum v4l2_priority prio)
{
        return prio == V4L2_PRIORITY_BACKGROUND ||
               prio == V4L2_PRIORITY_INTERACTIVE ||
               prio == V4L2_PRIORITY_RECORD;
}

void v4l2_prio_init(struct v4l2_prio_state *global)
{
        memset(global, 0, sizeof(*global));
}
EXPORT_SYMBOL(v4l2_prio_init);

int v4l2_prio_change(struct v4l2_prio_state *global, enum v4l2_priority *local,
                     enum v4l2_priority new)
{
        if (!prio_is_valid(new))
                return -EINVAL;
        if (*local == new)
                return 0;

        atomic_inc(&global->prios[new]);
        if (prio_is_valid(*local))
                atomic_dec(&global->prios[*local]);
        *local = new;
        return 0;
}
EXPORT_SYMBOL(v4l2_prio_change);

void v4l2_prio_open(struct v4l2_prio_state *global, enum v4l2_priority *local)
{
        v4l2_prio_change(global, local, V4L2_PRIORITY_DEFAULT);
}
EXPORT_SYMBOL(v4l2_prio_open);

void v4l2_prio_close(struct v4l2_prio_state *global, enum v4l2_priority local)
{
        if (prio_is_valid(local))
                atomic_dec(&global->prios[local]);
}
EXPORT_SYMBOL(v4l2_prio_close);

enum v4l2_priority v4l2_prio_max(struct v4l2_prio_state *global)
{
        if (atomic_read(&global->prios[V4L2_PRIORITY_RECORD]) > 0)
                return V4L2_PRIORITY_RECORD;
        if (atomic_read(&global->prios[V4L2_PRIORITY_INTERACTIVE]) > 0)
                return V4L2_PRIORITY_INTERACTIVE;
        if (atomic_read(&global->prios[V4L2_PRIORITY_BACKGROUND]) > 0)
                return V4L2_PRIORITY_BACKGROUND;
        return V4L2_PRIORITY_UNSET;
}
EXPORT_SYMBOL(v4l2_prio_max);

int v4l2_prio_check(struct v4l2_prio_state *global, enum v4l2_priority local)
{
        return (local < v4l2_prio_max(global)) ? -EBUSY : 0;
}
EXPORT_SYMBOL(v4l2_prio_check);


static ssize_t v4l2_read(struct file *filp, char __user *buf,
                size_t sz, loff_t *off)
{
        struct video_device *vdev = video_devdata(filp);
        int ret = -ENODEV;

        if (!vdev->fops->read)
                return -EINVAL;
        if (video_is_registered(vdev))
                ret = vdev->fops->read(filp, buf, sz, off);
        if ((vdev->dev_debug & V4L2_DEV_DEBUG_FOP) &&
            (vdev->dev_debug & V4L2_DEV_DEBUG_STREAMING))
                dprintk("%s: read: %zd (%d)\n",
                        video_device_node_name(vdev), sz, ret);
        return ret;
}

static ssize_t v4l2_write(struct file *filp, const char __user *buf,
                size_t sz, loff_t *off)
{
        struct video_device *vdev = video_devdata(filp);
        int ret = -ENODEV;

        if (!vdev->fops->write)
                return -EINVAL;
        if (video_is_registered(vdev))
                ret = vdev->fops->write(filp, buf, sz, off);
        if ((vdev->dev_debug & V4L2_DEV_DEBUG_FOP) &&
            (vdev->dev_debug & V4L2_DEV_DEBUG_STREAMING))
                dprintk("%s: write: %zd (%d)\n",
                        video_device_node_name(vdev), sz, ret);
        return ret;
}

static __poll_t v4l2_poll(struct file *filp, struct poll_table_struct *poll)
{
        struct video_device *vdev = video_devdata(filp);
        __poll_t res = EPOLLERR | EPOLLHUP | EPOLLPRI;

        if (video_is_registered(vdev)) {
                if (!vdev->fops->poll)
                        res = DEFAULT_POLLMASK;
                else
                        res = vdev->fops->poll(filp, poll);
        }
        if (vdev->dev_debug & V4L2_DEV_DEBUG_POLL)
                dprintk("%s: poll: %08x %08x\n",
                        video_device_node_name(vdev), res,
                        poll_requested_events(poll));
        return res;
}

static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
        struct video_device *vdev = video_devdata(filp);
        int ret = -ENODEV;

        if (vdev->fops->unlocked_ioctl) {
                if (video_is_registered(vdev))
                        ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
        } else
                ret = -ENOTTY;

        return ret;
}

#ifdef CONFIG_MMU
#define v4l2_get_unmapped_area NULL
#else
static unsigned long v4l2_get_unmapped_area(struct file *filp,
                unsigned long addr, unsigned long len, unsigned long pgoff,
                unsigned long flags)
{
        struct video_device *vdev = video_devdata(filp);
        int ret;

        if (!vdev->fops->get_unmapped_area)
                return -ENOSYS;
        if (!video_is_registered(vdev))
                return -ENODEV;
        ret = vdev->fops->get_unmapped_area(filp, addr, len, pgoff, flags);
        if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP)
                dprintk("%s: get_unmapped_area (%d)\n",
                        video_device_node_name(vdev), ret);
        return ret;
}
#endif

static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm)
{
        struct video_device *vdev = video_devdata(filp);
        int ret = -ENODEV;

        if (!vdev->fops->mmap)
                return -ENODEV;
        if (video_is_registered(vdev))
                ret = vdev->fops->mmap(filp, vm);
        if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP)
                dprintk("%s: mmap (%d)\n",
                        video_device_node_name(vdev), ret);
        return ret;
}

/* Override for the open function */
static int v4l2_open(struct inode *inode, struct file *filp)
{
        struct video_device *vdev;
        int ret = 0;

        /* Check if the video device is available */
        mutex_lock(&videodev_lock);
        vdev = video_devdata(filp);
        /* return ENODEV if the video device has already been removed. */
        if (vdev == NULL || !video_is_registered(vdev)) {
                mutex_unlock(&videodev_lock);
                return -ENODEV;
        }
        /* and increase the device refcount */
        video_get(vdev);
        mutex_unlock(&videodev_lock);
        if (vdev->fops->open) {
                if (video_is_registered(vdev))
                        ret = vdev->fops->open(filp);
                else
                        ret = -ENODEV;
        }

        if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP)
                dprintk("%s: open (%d)\n",
                        video_device_node_name(vdev), ret);
        /* decrease the refcount in case of an error */
        if (ret)
                video_put(vdev);
        return ret;
}

/* Override for the release function */
static int v4l2_release(struct inode *inode, struct file *filp)
{
        struct video_device *vdev = video_devdata(filp);
        int ret = 0;

        /*
         * We need to serialize the release() with queueing new requests.
         * The release() may trigger the cancellation of a streaming
         * operation, and that should not be mixed with queueing a new
         * request at the same time.
         */
        if (vdev->fops->release) {
                if (v4l2_device_supports_requests(vdev->v4l2_dev)) {
                        mutex_lock(&vdev->v4l2_dev->mdev->req_queue_mutex);
                        ret = vdev->fops->release(filp);
                        mutex_unlock(&vdev->v4l2_dev->mdev->req_queue_mutex);
                } else {
                        ret = vdev->fops->release(filp);
                }
        }

        if (vdev->dev_debug & V4L2_DEV_DEBUG_FOP)
                dprintk("%s: release\n",
                        video_device_node_name(vdev));

        /* decrease the refcount unconditionally since the release()
           return value is ignored. */
        video_put(vdev);
        return ret;
}

static const struct file_operations v4l2_fops = {
        .owner = THIS_MODULE,
        .read = v4l2_read,
        .write = v4l2_write,
        .open = v4l2_open,
        .get_unmapped_area = v4l2_get_unmapped_area,
        .mmap = v4l2_mmap,
        .unlocked_ioctl = v4l2_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl = v4l2_compat_ioctl32,
#endif
        .release = v4l2_release,
        .poll = v4l2_poll,
        .llseek = no_llseek,
};

/**
 * get_index - assign stream index number based on v4l2_dev
 * @vdev: video_device to assign index number to, vdev->v4l2_dev should be assigned
 *
 * Note that when this is called the new device has not yet been registered
 * in the video_device array, but it was able to obtain a minor number.
 *
 * This means that we can always obtain a free stream index number since
 * the worst case scenario is that there are VIDEO_NUM_DEVICES - 1 slots in
 * use of the video_device array.
 *
 * Returns a free index number.
 */
static int get_index(struct video_device *vdev)
{
        /* This can be static since this function is called with the global
           videodev_lock held. */
        static DECLARE_BITMAP(used, VIDEO_NUM_DEVICES);
        int i;

        bitmap_zero(used, VIDEO_NUM_DEVICES);

        for (i = 0; i < VIDEO_NUM_DEVICES; i++) {
                if (video_devices[i] != NULL &&
                    video_devices[i]->v4l2_dev == vdev->v4l2_dev) {
                        __set_bit(video_devices[i]->index, used);
                }
        }

        return find_first_zero_bit(used, VIDEO_NUM_DEVICES);
}

#define SET_VALID_IOCTL(ops, cmd, op) \
        do { if ((ops)->op) __set_bit(_IOC_NR(cmd), valid_ioctls); } while (0)

/* This determines which ioctls are actually implemented in the driver.
   It's a one-time thing which simplifies video_ioctl2 as it can just do
   a bit test.

   Note that drivers can override this by setting bits to 1 in
   vdev->valid_ioctls. If an ioctl is marked as 1 when this function is
   called, then that ioctl will actually be marked as unimplemented.

   It does that by first setting up the local valid_ioctls bitmap, and
   at the end do a:

   vdev->valid_ioctls = valid_ioctls & ~(vdev->valid_ioctls)
 */
static void determine_valid_ioctls(struct video_device *vdev)
{
        const u32 vid_caps = V4L2_CAP_VIDEO_CAPTURE |
                             V4L2_CAP_VIDEO_CAPTURE_MPLANE |
                             V4L2_CAP_VIDEO_OUTPUT |
                             V4L2_CAP_VIDEO_OUTPUT_MPLANE |
                             V4L2_CAP_VIDEO_M2M | V4L2_CAP_VIDEO_M2M_MPLANE;
        const u32 meta_caps = V4L2_CAP_META_CAPTURE |
                              V4L2_CAP_META_OUTPUT;
        DECLARE_BITMAP(valid_ioctls, BASE_VIDIOC_PRIVATE);
        const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops;
        bool is_vid = vdev->vfl_type == VFL_TYPE_VIDEO &&
                      (vdev->device_caps & vid_caps);
        bool is_vbi = vdev->vfl_type == VFL_TYPE_VBI;
        bool is_radio = vdev->vfl_type == VFL_TYPE_RADIO;
        bool is_sdr = vdev->vfl_type == VFL_TYPE_SDR;
        bool is_tch = vdev->vfl_type == VFL_TYPE_TOUCH;
        bool is_meta = vdev->vfl_type == VFL_TYPE_VIDEO &&
                       (vdev->device_caps & meta_caps);
        bool is_rx = vdev->vfl_dir != VFL_DIR_TX;
        bool is_tx = vdev->vfl_dir != VFL_DIR_RX;
        bool is_io_mc = vdev->device_caps & V4L2_CAP_IO_MC;
        bool has_streaming = vdev->device_caps & V4L2_CAP_STREAMING;

        bitmap_zero(valid_ioctls, BASE_VIDIOC_PRIVATE);

        /* vfl_type and vfl_dir independent ioctls */

        SET_VALID_IOCTL(ops, VIDIOC_QUERYCAP, vidioc_querycap);
        __set_bit(_IOC_NR(VIDIOC_G_PRIORITY), valid_ioctls);
        __set_bit(_IOC_NR(VIDIOC_S_PRIORITY), valid_ioctls);

        /* Note: the control handler can also be passed through the filehandle,
           and that can't be tested here. If the bit for these control ioctls
           is set, then the ioctl is valid. But if it is 0, then it can still
           be valid if the filehandle passed the control handler. */
        if (vdev->ctrl_handler || ops->vidioc_queryctrl)
                __set_bit(_IOC_NR(VIDIOC_QUERYCTRL), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_query_ext_ctrl)
                __set_bit(_IOC_NR(VIDIOC_QUERY_EXT_CTRL), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_g_ctrl || ops->vidioc_g_ext_ctrls)
                __set_bit(_IOC_NR(VIDIOC_G_CTRL), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_s_ctrl || ops->vidioc_s_ext_ctrls)
                __set_bit(_IOC_NR(VIDIOC_S_CTRL), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_g_ext_ctrls)
                __set_bit(_IOC_NR(VIDIOC_G_EXT_CTRLS), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_s_ext_ctrls)
                __set_bit(_IOC_NR(VIDIOC_S_EXT_CTRLS), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_try_ext_ctrls)
                __set_bit(_IOC_NR(VIDIOC_TRY_EXT_CTRLS), valid_ioctls);
        if (vdev->ctrl_handler || ops->vidioc_querymenu)
                __set_bit(_IOC_NR(VIDIOC_QUERYMENU), valid_ioctls);
        if (!is_tch) {
                SET_VALID_IOCTL(ops, VIDIOC_G_FREQUENCY, vidioc_g_frequency);
                SET_VALID_IOCTL(ops, VIDIOC_S_FREQUENCY, vidioc_s_frequency);
        }
        SET_VALID_IOCTL(ops, VIDIOC_LOG_STATUS, vidioc_log_status);
#ifdef CONFIG_VIDEO_ADV_DEBUG
        __set_bit(_IOC_NR(VIDIOC_DBG_G_CHIP_INFO), valid_ioctls);
        __set_bit(_IOC_NR(VIDIOC_DBG_G_REGISTER), valid_ioctls);
        __set_bit(_IOC_NR(VIDIOC_DBG_S_REGISTER), valid_ioctls);
#endif
        /* yes, really vidioc_subscribe_event */
        SET_VALID_IOCTL(ops, VIDIOC_DQEVENT, vidioc_subscribe_event);
        SET_VALID_IOCTL(ops, VIDIOC_SUBSCRIBE_EVENT, vidioc_subscribe_event);
        SET_VALID_IOCTL(ops, VIDIOC_UNSUBSCRIBE_EVENT, vidioc_unsubscribe_event);
        if (ops->vidioc_enum_freq_bands || ops->vidioc_g_tuner || ops->vidioc_g_modulator)
                __set_bit(_IOC_NR(VIDIOC_ENUM_FREQ_BANDS), valid_ioctls);

        if (is_vid) {
                /* video specific ioctls */
                if ((is_rx && (ops->vidioc_enum_fmt_vid_cap ||
                               ops->vidioc_enum_fmt_vid_overlay)) ||
                    (is_tx && ops->vidioc_enum_fmt_vid_out))
                        __set_bit(_IOC_NR(VIDIOC_ENUM_FMT), valid_ioctls);
                if ((is_rx && (ops->vidioc_g_fmt_vid_cap ||
                               ops->vidioc_g_fmt_vid_cap_mplane ||
                               ops->vidioc_g_fmt_vid_overlay)) ||
                    (is_tx && (ops->vidioc_g_fmt_vid_out ||
                               ops->vidioc_g_fmt_vid_out_mplane ||
                               ops->vidioc_g_fmt_vid_out_overlay)))
                        __set_bit(_IOC_NR(VIDIOC_G_FMT), valid_ioctls);
                if ((is_rx && (ops->vidioc_s_fmt_vid_cap ||
                               ops->vidioc_s_fmt_vid_cap_mplane ||
                               ops->vidioc_s_fmt_vid_overlay)) ||
                    (is_tx && (ops->vidioc_s_fmt_vid_out ||
                               ops->vidioc_s_fmt_vid_out_mplane ||
                               ops->vidioc_s_fmt_vid_out_overlay)))
                        __set_bit(_IOC_NR(VIDIOC_S_FMT), valid_ioctls);
                if ((is_rx && (ops->vidioc_try_fmt_vid_cap ||
                               ops->vidioc_try_fmt_vid_cap_mplane ||
                               ops->vidioc_try_fmt_vid_overlay)) ||
                    (is_tx && (ops->vidioc_try_fmt_vid_out ||
                               ops->vidioc_try_fmt_vid_out_mplane ||
                               ops->vidioc_try_fmt_vid_out_overlay)))
                        __set_bit(_IOC_NR(VIDIOC_TRY_FMT), valid_ioctls);
                SET_VALID_IOCTL(ops, VIDIOC_OVERLAY, vidioc_overlay);
                SET_VALID_IOCTL(ops, VIDIOC_G_FBUF, vidioc_g_fbuf);
                SET_VALID_IOCTL(ops, VIDIOC_S_FBUF, vidioc_s_fbuf);
                SET_VALID_IOCTL(ops, VIDIOC_G_JPEGCOMP, vidioc_g_jpegcomp);
                SET_VALID_IOCTL(ops, VIDIOC_S_JPEGCOMP, vidioc_s_jpegcomp);
                SET_VALID_IOCTL(ops, VIDIOC_G_ENC_INDEX, vidioc_g_enc_index);
                SET_VALID_IOCTL(ops, VIDIOC_ENCODER_CMD, vidioc_encoder_cmd);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_ENCODER_CMD, vidioc_try_encoder_cmd);
                SET_VALID_IOCTL(ops, VIDIOC_DECODER_CMD, vidioc_decoder_cmd);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_DECODER_CMD, vidioc_try_decoder_cmd);
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMESIZES, vidioc_enum_framesizes);
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMEINTERVALS, vidioc_enum_frameintervals);
                if (ops->vidioc_g_selection &&
                    !test_bit(_IOC_NR(VIDIOC_G_SELECTION), vdev->valid_ioctls)) {
                        __set_bit(_IOC_NR(VIDIOC_G_CROP), valid_ioctls);
                        __set_bit(_IOC_NR(VIDIOC_CROPCAP), valid_ioctls);
                }
                if (ops->vidioc_s_selection &&
                    !test_bit(_IOC_NR(VIDIOC_S_SELECTION), vdev->valid_ioctls))
                        __set_bit(_IOC_NR(VIDIOC_S_CROP), valid_ioctls);
                SET_VALID_IOCTL(ops, VIDIOC_G_SELECTION, vidioc_g_selection);
                SET_VALID_IOCTL(ops, VIDIOC_S_SELECTION, vidioc_s_selection);
        }
        if (is_meta && is_rx) {
                /* metadata capture specific ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_meta_cap);
                SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_meta_cap);
                SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_meta_cap);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_meta_cap);
        } else if (is_meta && is_tx) {
                /* metadata output specific ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_meta_out);
                SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_meta_out);
                SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_meta_out);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_meta_out);
        }
        if (is_vbi) {
                /* vbi specific ioctls */
                if ((is_rx && (ops->vidioc_g_fmt_vbi_cap ||
                               ops->vidioc_g_fmt_sliced_vbi_cap)) ||
                    (is_tx && (ops->vidioc_g_fmt_vbi_out ||
                               ops->vidioc_g_fmt_sliced_vbi_out)))
                        __set_bit(_IOC_NR(VIDIOC_G_FMT), valid_ioctls);
                if ((is_rx && (ops->vidioc_s_fmt_vbi_cap ||
                               ops->vidioc_s_fmt_sliced_vbi_cap)) ||
                    (is_tx && (ops->vidioc_s_fmt_vbi_out ||
                               ops->vidioc_s_fmt_sliced_vbi_out)))
                        __set_bit(_IOC_NR(VIDIOC_S_FMT), valid_ioctls);
                if ((is_rx && (ops->vidioc_try_fmt_vbi_cap ||
                               ops->vidioc_try_fmt_sliced_vbi_cap)) ||
                    (is_tx && (ops->vidioc_try_fmt_vbi_out ||
                               ops->vidioc_try_fmt_sliced_vbi_out)))
                        __set_bit(_IOC_NR(VIDIOC_TRY_FMT), valid_ioctls);
                SET_VALID_IOCTL(ops, VIDIOC_G_SLICED_VBI_CAP, vidioc_g_sliced_vbi_cap);
        } else if (is_tch) {
                /* touch specific ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_vid_cap);
                SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_vid_cap);
                SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_vid_cap);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_vid_cap);
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMESIZES, vidioc_enum_framesizes);
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FRAMEINTERVALS, vidioc_enum_frameintervals);
                SET_VALID_IOCTL(ops, VIDIOC_ENUMINPUT, vidioc_enum_input);
                SET_VALID_IOCTL(ops, VIDIOC_G_INPUT, vidioc_g_input);
                SET_VALID_IOCTL(ops, VIDIOC_S_INPUT, vidioc_s_input);
                SET_VALID_IOCTL(ops, VIDIOC_G_PARM, vidioc_g_parm);
                SET_VALID_IOCTL(ops, VIDIOC_S_PARM, vidioc_s_parm);
        } else if (is_sdr && is_rx) {
                /* SDR receiver specific ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_sdr_cap);
                SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_sdr_cap);
                SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_sdr_cap);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_sdr_cap);
        } else if (is_sdr && is_tx) {
                /* SDR transmitter specific ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_FMT, vidioc_enum_fmt_sdr_out);
                SET_VALID_IOCTL(ops, VIDIOC_G_FMT, vidioc_g_fmt_sdr_out);
                SET_VALID_IOCTL(ops, VIDIOC_S_FMT, vidioc_s_fmt_sdr_out);
                SET_VALID_IOCTL(ops, VIDIOC_TRY_FMT, vidioc_try_fmt_sdr_out);
        }

        if (has_streaming) {
                /* ioctls valid for streaming I/O */
                SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs);
                SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf);
                SET_VALID_IOCTL(ops, VIDIOC_QBUF, vidioc_qbuf);
                SET_VALID_IOCTL(ops, VIDIOC_EXPBUF, vidioc_expbuf);
                SET_VALID_IOCTL(ops, VIDIOC_DQBUF, vidioc_dqbuf);
                SET_VALID_IOCTL(ops, VIDIOC_CREATE_BUFS, vidioc_create_bufs);
                SET_VALID_IOCTL(ops, VIDIOC_PREPARE_BUF, vidioc_prepare_buf);
                SET_VALID_IOCTL(ops, VIDIOC_STREAMON, vidioc_streamon);
                SET_VALID_IOCTL(ops, VIDIOC_STREAMOFF, vidioc_streamoff);
        }

        if (is_vid || is_vbi || is_meta) {
                /* ioctls valid for video, vbi and metadata */
                if (ops->vidioc_s_std)
                        __set_bit(_IOC_NR(VIDIOC_ENUMSTD), valid_ioctls);
                SET_VALID_IOCTL(ops, VIDIOC_S_STD, vidioc_s_std);
                SET_VALID_IOCTL(ops, VIDIOC_G_STD, vidioc_g_std);
                if (is_rx) {
                        SET_VALID_IOCTL(ops, VIDIOC_QUERYSTD, vidioc_querystd);
                        if (is_io_mc) {
                                __set_bit(_IOC_NR(VIDIOC_ENUMINPUT), valid_ioctls);
                                __set_bit(_IOC_NR(VIDIOC_G_INPUT), valid_ioctls);
                                __set_bit(_IOC_NR(VIDIOC_S_INPUT), valid_ioctls);
                        } else {
                                SET_VALID_IOCTL(ops, VIDIOC_ENUMINPUT, vidioc_enum_input);
                                SET_VALID_IOCTL(ops, VIDIOC_G_INPUT, vidioc_g_input);
                                SET_VALID_IOCTL(ops, VIDIOC_S_INPUT, vidioc_s_input);
                        }
                        SET_VALID_IOCTL(ops, VIDIOC_ENUMAUDIO, vidioc_enumaudio);
                        SET_VALID_IOCTL(ops, VIDIOC_G_AUDIO, vidioc_g_audio);
                        SET_VALID_IOCTL(ops, VIDIOC_S_AUDIO, vidioc_s_audio);
                        SET_VALID_IOCTL(ops, VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings);
                        SET_VALID_IOCTL(ops, VIDIOC_S_EDID, vidioc_s_edid);
                }
                if (is_tx) {
                        if (is_io_mc) {
                                __set_bit(_IOC_NR(VIDIOC_ENUMOUTPUT), valid_ioctls);
                                __set_bit(_IOC_NR(VIDIOC_G_OUTPUT), valid_ioctls);
                                __set_bit(_IOC_NR(VIDIOC_S_OUTPUT), valid_ioctls);
                        } else {
                                SET_VALID_IOCTL(ops, VIDIOC_ENUMOUTPUT, vidioc_enum_output);
                                SET_VALID_IOCTL(ops, VIDIOC_G_OUTPUT, vidioc_g_output);
                                SET_VALID_IOCTL(ops, VIDIOC_S_OUTPUT, vidioc_s_output);
                        }
                        SET_VALID_IOCTL(ops, VIDIOC_ENUMAUDOUT, vidioc_enumaudout);
                        SET_VALID_IOCTL(ops, VIDIOC_G_AUDOUT, vidioc_g_audout);
                        SET_VALID_IOCTL(ops, VIDIOC_S_AUDOUT, vidioc_s_audout);
                }
                if (ops->vidioc_g_parm || ops->vidioc_g_std)
                        __set_bit(_IOC_NR(VIDIOC_G_PARM), valid_ioctls);
                SET_VALID_IOCTL(ops, VIDIOC_S_PARM, vidioc_s_parm);
                SET_VALID_IOCTL(ops, VIDIOC_S_DV_TIMINGS, vidioc_s_dv_timings);
                SET_VALID_IOCTL(ops, VIDIOC_G_DV_TIMINGS, vidioc_g_dv_timings);
                SET_VALID_IOCTL(ops, VIDIOC_ENUM_DV_TIMINGS, vidioc_enum_dv_timings);
                SET_VALID_IOCTL(ops, VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap);
                SET_VALID_IOCTL(ops, VIDIOC_G_EDID, vidioc_g_edid);
        }
        if (is_tx && (is_radio || is_sdr)) {
                /* radio transmitter only ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_G_MODULATOR, vidioc_g_modulator);
                SET_VALID_IOCTL(ops, VIDIOC_S_MODULATOR, vidioc_s_modulator);
        }
        if (is_rx && !is_tch) {
                /* receiver only ioctls */
                SET_VALID_IOCTL(ops, VIDIOC_G_TUNER, vidioc_g_tuner);
                SET_VALID_IOCTL(ops, VIDIOC_S_TUNER, vidioc_s_tuner);
                SET_VALID_IOCTL(ops, VIDIOC_S_HW_FREQ_SEEK, vidioc_s_hw_freq_seek);
        }

        bitmap_andnot(vdev->valid_ioctls, valid_ioctls, vdev->valid_ioctls,
                        BASE_VIDIOC_PRIVATE);
}

static int video_register_media_controller(struct video_device *vdev)
{
#if defined(CONFIG_MEDIA_CONTROLLER)
        u32 intf_type;
        int ret;

        /* Memory-to-memory devices are more complex and use
         * their own function to register its mc entities.
         */
        if (!vdev->v4l2_dev->mdev || vdev->vfl_dir == VFL_DIR_M2M)
                return 0;

        vdev->entity.obj_type = MEDIA_ENTITY_TYPE_VIDEO_DEVICE;
        vdev->entity.function = MEDIA_ENT_F_UNKNOWN;

        switch (vdev->vfl_type) {
        case VFL_TYPE_VIDEO:
                intf_type = MEDIA_INTF_T_V4L_VIDEO;
                vdev->entity.function = MEDIA_ENT_F_IO_V4L;
                break;
        case VFL_TYPE_VBI:
                intf_type = MEDIA_INTF_T_V4L_VBI;
                vdev->entity.function = MEDIA_ENT_F_IO_VBI;
                break;
        case VFL_TYPE_SDR:
                intf_type = MEDIA_INTF_T_V4L_SWRADIO;
                vdev->entity.function = MEDIA_ENT_F_IO_SWRADIO;
                break;
        case VFL_TYPE_TOUCH:
                intf_type = MEDIA_INTF_T_V4L_TOUCH;
                vdev->entity.function = MEDIA_ENT_F_IO_V4L;
                break;
        case VFL_TYPE_RADIO:
                intf_type = MEDIA_INTF_T_V4L_RADIO;
                /*
                 * Radio doesn't have an entity at the V4L2 side to represent
                 * radio input or output. Instead, the audio input/output goes
                 * via either physical wires or ALSA.
                 */
                break;
        case VFL_TYPE_SUBDEV:
                intf_type = MEDIA_INTF_T_V4L_SUBDEV;
                /* Entity will be created via v4l2_device_register_subdev() */
                break;
        default:
                return 0;
        }

        if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN) {
                vdev->entity.name = vdev->name;

                /* Needed just for backward compatibility with legacy MC API */
                vdev->entity.info.dev.major = VIDEO_MAJOR;
                vdev->entity.info.dev.minor = vdev->minor;

                ret = media_device_register_entity(vdev->v4l2_dev->mdev,
                                                   &vdev->entity);
                if (ret < 0) {
                        pr_warn("%s: media_device_register_entity failed\n",
                                __func__);
                        return ret;
                }
        }

        vdev->intf_devnode = media_devnode_create(vdev->v4l2_dev->mdev,
                                                  intf_type,
                                                  0, VIDEO_MAJOR,
                                                  vdev->minor);
        if (!vdev->intf_devnode) {
                media_device_unregister_entity(&vdev->entity);
                return -ENOMEM;
        }

        if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN) {
                struct media_link *link;

                link = media_create_intf_link(&vdev->entity,
                                              &vdev->intf_devnode->intf,
                                              MEDIA_LNK_FL_ENABLED |
                                              MEDIA_LNK_FL_IMMUTABLE);
                if (!link) {
                        media_devnode_remove(vdev->intf_devnode);
                        media_device_unregister_entity(&vdev->entity);
                        return -ENOMEM;
                }
        }

        /* FIXME: how to create the other interface links? */

#endif
        return 0;
}

int __video_register_device(struct video_device *vdev,
                            enum vfl_devnode_type type,
                            int nr, int warn_if_nr_in_use,
                            struct module *owner)
{
        int i = 0;
        int ret;
        int minor_offset = 0;
        int minor_cnt = VIDEO_NUM_DEVICES;
        const char *name_base;

        /* A minor value of -1 marks this video device as never
           having been registered */
        vdev->minor = -1;

        /* the release callback MUST be present */
        if (WARN_ON(!vdev->release))
                return -EINVAL;
        /* the v4l2_dev pointer MUST be present */
        if (WARN_ON(!vdev->v4l2_dev))
                return -EINVAL;
        /* the device_caps field MUST be set for all but subdevs */
        if (WARN_ON(type != VFL_TYPE_SUBDEV && !vdev->device_caps))
                return -EINVAL;

        /* v4l2_fh support */
        spin_lock_init(&vdev->fh_lock);
        INIT_LIST_HEAD(&vdev->fh_list);

        /* Part 1: check device type */
        switch (type) {
        case VFL_TYPE_VIDEO:
                name_base = "video";
                break;
        case VFL_TYPE_VBI:
                name_base = "vbi";
                break;
        case VFL_TYPE_RADIO:
                name_base = "radio";
                break;
        case VFL_TYPE_SUBDEV:
                name_base = "v4l-subdev";
                break;
        case VFL_TYPE_SDR:
                /* Use device name 'swradio' because 'sdr' was already taken. */
                name_base = "swradio";
                break;
        case VFL_TYPE_TOUCH:
                name_base = "v4l-touch";
                break;
        default:
                pr_err("%s called with unknown type: %d\n",
                       __func__, type);
                return -EINVAL;
        }

        vdev->vfl_type = type;
        vdev->cdev = NULL;
        if (vdev->dev_parent == NULL)
                vdev->dev_parent = vdev->v4l2_dev->dev;
        if (vdev->ctrl_handler == NULL)
                vdev->ctrl_handler = vdev->v4l2_dev->ctrl_handler;
        /* If the prio state pointer is NULL, then use the v4l2_device
           prio state. */
        if (vdev->prio == NULL)
                vdev->prio = &vdev->v4l2_dev->prio;

        /* Part 2: find a free minor, device node number and device index. */
#ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES
        /* Keep the ranges for the first four types for historical
         * reasons.
         * Newer devices (not yet in place) should use the range
         * of 128-191 and just pick the first free minor there
         * (new style). */
        switch (type) {
        case VFL_TYPE_VIDEO:
                minor_offset = 0;
                minor_cnt = 64;
                break;
        case VFL_TYPE_RADIO:
                minor_offset = 64;
                minor_cnt = 64;
                break;
        case VFL_TYPE_VBI:
                minor_offset = 224;
                minor_cnt = 32;
                break;
        default:
                minor_offset = 128;
                minor_cnt = 64;
                break;
        }
#endif

        /* Pick a device node number */
        mutex_lock(&videodev_lock);
        nr = devnode_find(vdev, nr == -1 ? 0 : nr, minor_cnt);
        if (nr == minor_cnt)
                nr = devnode_find(vdev, 0, minor_cnt);
        if (nr == minor_cnt) {
                pr_err("could not get a free device node number\n");
                mutex_unlock(&videodev_lock);
                return -ENFILE;
        }
#ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES
        /* 1-on-1 mapping of device node number to minor number */
        i = nr;
#else
        /* The device node number and minor numbers are independent, so
           we just find the first free minor number. */
        for (i = 0; i < VIDEO_NUM_DEVICES; i++)
                if (video_devices[i] == NULL)
                        break;
        if (i == VIDEO_NUM_DEVICES) {
                mutex_unlock(&videodev_lock);
                pr_err("could not get a free minor\n");
                return -ENFILE;
        }
#endif
        vdev->minor = i + minor_offset;
        vdev->num = nr;

        /* Should not happen since we thought this minor was free */
        if (WARN_ON(video_devices[vdev->minor])) {
                mutex_unlock(&videodev_lock);
                pr_err("video_device not empty!\n");
                return -ENFILE;
        }
        devnode_set(vdev);
        vdev->index = get_index(vdev);
        video_devices[vdev->minor] = vdev;
        mutex_unlock(&videodev_lock);

        if (vdev->ioctl_ops)
                determine_valid_ioctls(vdev);

        /* Part 3: Initialize the character device */
        vdev->cdev = cdev_alloc();
        if (vdev->cdev == NULL) {
                ret = -ENOMEM;
                goto cleanup;
        }
        vdev->cdev->ops = &v4l2_fops;
        vdev->cdev->owner = owner;
        ret = cdev_add(vdev->cdev, MKDEV(VIDEO_MAJOR, vdev->minor), 1);
        if (ret < 0) {
                pr_err("%s: cdev_add failed\n", __func__);
                kfree(vdev->cdev);
                vdev->cdev = NULL;
                goto cleanup;
        }

        /* Part 4: register the device with sysfs */
        vdev->dev.class = &video_class;
        vdev->dev.devt = MKDEV(VIDEO_MAJOR, vdev->minor);
        vdev->dev.parent = vdev->dev_parent;
        dev_set_name(&vdev->dev, "%s%d", name_base, vdev->num);
        ret = device_register(&vdev->dev);
        if (ret < 0) {
                pr_err("%s: device_register failed\n", __func__);
                goto cleanup;
        }
        /* Register the release callback that will be called when the last
           reference to the device goes away. */
        vdev->dev.release = v4l2_device_release;

        if (nr != -1 && nr != vdev->num && warn_if_nr_in_use)
                pr_warn("%s: requested %s%d, got %s\n", __func__,
                        name_base, nr, video_device_node_name(vdev));

        /* Increase v4l2_device refcount */
        v4l2_device_get(vdev->v4l2_dev);

        /* Part 5: Register the entity. */
        ret = video_register_media_controller(vdev);

        /* Part 6: Activate this minor. The char device can now be used. */
        set_bit(V4L2_FL_REGISTERED, &vdev->flags);

        return 0;

cleanup:
        mutex_lock(&videodev_lock);
        if (vdev->cdev)
                cdev_del(vdev->cdev);
        video_devices[vdev->minor] = NULL;
        devnode_clear(vdev);
        mutex_unlock(&videodev_lock);
        /* Mark this video device as never having been registered. */
        vdev->minor = -1;
        return ret;
}
EXPORT_SYMBOL(__video_register_device);

/**
 *        video_unregister_device - unregister a video4linux device
 *        @vdev: the device to unregister
 *
 *        This unregisters the passed device. Future open calls will
 *        be met with errors.
 */
void video_unregister_device(struct video_device *vdev)
{
        /* Check if vdev was ever registered at all */
        if (!vdev || !video_is_registered(vdev))
                return;

        mutex_lock(&videodev_lock);
        /* This must be in a critical section to prevent a race with v4l2_open.
         * Once this bit has been cleared video_get may never be called again.
         */
        clear_bit(V4L2_FL_REGISTERED, &vdev->flags);
        mutex_unlock(&videodev_lock);
        if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags))
                v4l2_event_wake_all(vdev);
        device_unregister(&vdev->dev);
}
EXPORT_SYMBOL(video_unregister_device);

#if defined(CONFIG_MEDIA_CONTROLLER)

__must_check int video_device_pipeline_start(struct video_device *vdev,
                                             struct media_pipeline *pipe)
{
        struct media_entity *entity = &vdev->entity;

        if (entity->num_pads != 1)
                return -ENODEV;

        return media_pipeline_start(&entity->pads[0], pipe);
}
EXPORT_SYMBOL_GPL(video_device_pipeline_start);

__must_check int __video_device_pipeline_start(struct video_device *vdev,
                                               struct media_pipeline *pipe)
{
        struct media_entity *entity = &vdev->entity;

        if (entity->num_pads != 1)
                return -ENODEV;

        return __media_pipeline_start(&entity->pads[0], pipe);
}
EXPORT_SYMBOL_GPL(__video_device_pipeline_start);

void video_device_pipeline_stop(struct video_device *vdev)
{
        struct media_entity *entity = &vdev->entity;

        if (WARN_ON(entity->num_pads != 1))
                return;

        return media_pipeline_stop(&entity->pads[0]);
}
EXPORT_SYMBOL_GPL(video_device_pipeline_stop);

void __video_device_pipeline_stop(struct video_device *vdev)
{
        struct media_entity *entity = &vdev->entity;

        if (WARN_ON(entity->num_pads != 1))
                return;

        return __media_pipeline_stop(&entity->pads[0]);
}
EXPORT_SYMBOL_GPL(__video_device_pipeline_stop);

__must_check int video_device_pipeline_alloc_start(struct video_device *vdev)
{
        struct media_entity *entity = &vdev->entity;

        if (entity->num_pads != 1)
                return -ENODEV;

        return media_pipeline_alloc_start(&entity->pads[0]);
}
EXPORT_SYMBOL_GPL(video_device_pipeline_alloc_start);

struct media_pipeline *video_device_pipeline(struct video_device *vdev)
{
        struct media_entity *entity = &vdev->entity;

        if (WARN_ON(entity->num_pads != 1))
                return NULL;

        return media_pad_pipeline(&entity->pads[0]);
}
EXPORT_SYMBOL_GPL(video_device_pipeline);

#endif /* CONFIG_MEDIA_CONTROLLER */

/*
 *        Initialise video for linux
 */
static int __init videodev_init(void)
{
        dev_t dev = MKDEV(VIDEO_MAJOR, 0);
        int ret;

        pr_info("Linux video capture interface: v2.00\n");
        ret = register_chrdev_region(dev, VIDEO_NUM_DEVICES, VIDEO_NAME);
        if (ret < 0) {
                pr_warn("videodev: unable to get major %d\n",
                                VIDEO_MAJOR);
                return ret;
        }

        ret = class_register(&video_class);
        if (ret < 0) {
                unregister_chrdev_region(dev, VIDEO_NUM_DEVICES);
                pr_warn("video_dev: class_register failed\n");
                return -EIO;
        }

        return 0;
}

static void __exit videodev_exit(void)
{
        dev_t dev = MKDEV(VIDEO_MAJOR, 0);

        class_unregister(&video_class);
        unregister_chrdev_region(dev, VIDEO_NUM_DEVICES);
}

subsys_initcall(videodev_init);
module_exit(videodev_exit)

MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@kernel.org>, Bill Dirks, Justin Schoeman, Gerd Knorr");
MODULE_DESCRIPTION("Video4Linux2 core driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CHARDEV_MAJOR(VIDEO_MAJOR);

































































































































































































































































































































































































































































































































































































    9 







    9 
    9 
    9 
    9 




    9 



    9 


    9 






































































    9 




    9 

















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Media device
 *
 * Copyright (C) 2010 Nokia Corporation
 *
 * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 *             Sakari Ailus <sakari.ailus@iki.fi>
 */

#include <linux/compat.h>
#include <linux/export.h>
#include <linux/idr.h>
#include <linux/ioctl.h>
#include <linux/media.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/usb.h>
#include <linux/version.h>

#include <media/media-device.h>
#include <media/media-devnode.h>
#include <media/media-entity.h>
#include <media/media-request.h>

#ifdef CONFIG_MEDIA_CONTROLLER

/*
 * Legacy defines from linux/media.h. This is the only place we need this
 * so we just define it here. The media.h header doesn't expose it to the
 * kernel to prevent it from being used by drivers, but here (and only here!)
 * we need it to handle the legacy behavior.
 */
#define MEDIA_ENT_SUBTYPE_MASK                        0x0000ffff
#define MEDIA_ENT_T_DEVNODE_UNKNOWN                (MEDIA_ENT_F_OLD_BASE | \
                                                 MEDIA_ENT_SUBTYPE_MASK)

/* -----------------------------------------------------------------------------
 * Userspace API
 */

static inline void __user *media_get_uptr(__u64 arg)
{
        return (void __user *)(uintptr_t)arg;
}

static int media_device_open(struct file *filp)
{
        return 0;
}

static int media_device_close(struct file *filp)
{
        return 0;
}

static long media_device_get_info(struct media_device *dev, void *arg)
{
        struct media_device_info *info = arg;

        memset(info, 0, sizeof(*info));

        if (dev->driver_name[0])
                strscpy(info->driver, dev->driver_name, sizeof(info->driver));
        else
                strscpy(info->driver, dev->dev->driver->name,
                        sizeof(info->driver));

        strscpy(info->model, dev->model, sizeof(info->model));
        strscpy(info->serial, dev->serial, sizeof(info->serial));
        strscpy(info->bus_info, dev->bus_info, sizeof(info->bus_info));

        info->media_version = LINUX_VERSION_CODE;
        info->driver_version = info->media_version;
        info->hw_revision = dev->hw_revision;

        return 0;
}

static struct media_entity *find_entity(struct media_device *mdev, u32 id)
{
        struct media_entity *entity;
        int next = id & MEDIA_ENT_ID_FLAG_NEXT;

        id &= ~MEDIA_ENT_ID_FLAG_NEXT;

        media_device_for_each_entity(entity, mdev) {
                if (((media_entity_id(entity) == id) && !next) ||
                    ((media_entity_id(entity) > id) && next)) {
                        return entity;
                }
        }

        return NULL;
}

static long media_device_enum_entities(struct media_device *mdev, void *arg)
{
        struct media_entity_desc *entd = arg;
        struct media_entity *ent;

        ent = find_entity(mdev, entd->id);
        if (ent == NULL)
                return -EINVAL;

        memset(entd, 0, sizeof(*entd));

        entd->id = media_entity_id(ent);
        if (ent->name)
                strscpy(entd->name, ent->name, sizeof(entd->name));
        entd->type = ent->function;
        entd->revision = 0;                /* Unused */
        entd->flags = ent->flags;
        entd->group_id = 0;                /* Unused */
        entd->pads = ent->num_pads;
        entd->links = ent->num_links - ent->num_backlinks;

        /*
         * Workaround for a bug at media-ctl <= v1.10 that makes it to
         * do the wrong thing if the entity function doesn't belong to
         * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE
         * Ranges.
         *
         * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE,
         * or, otherwise, will be silently ignored by media-ctl when
         * printing the graphviz diagram. So, map them into the devnode
         * old range.
         */
        if (ent->function < MEDIA_ENT_F_OLD_BASE ||
            ent->function > MEDIA_ENT_F_TUNER) {
                if (is_media_entity_v4l2_subdev(ent))
                        entd->type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN;
                else if (ent->function != MEDIA_ENT_F_IO_V4L)
                        entd->type = MEDIA_ENT_T_DEVNODE_UNKNOWN;
        }

        memcpy(&entd->raw, &ent->info, sizeof(ent->info));

        return 0;
}

static void media_device_kpad_to_upad(const struct media_pad *kpad,
                                      struct media_pad_desc *upad)
{
        upad->entity = media_entity_id(kpad->entity);
        upad->index = kpad->index;
        upad->flags = kpad->flags;
}

static long media_device_enum_links(struct media_device *mdev, void *arg)
{
        struct media_links_enum *links = arg;
        struct media_entity *entity;

        entity = find_entity(mdev, links->entity);
        if (entity == NULL)
                return -EINVAL;

        if (links->pads) {
                unsigned int p;

                for (p = 0; p < entity->num_pads; p++) {
                        struct media_pad_desc pad;

                        memset(&pad, 0, sizeof(pad));
                        media_device_kpad_to_upad(&entity->pads[p], &pad);
                        if (copy_to_user(&links->pads[p], &pad, sizeof(pad)))
                                return -EFAULT;
                }
        }

        if (links->links) {
                struct media_link *link;
                struct media_link_desc __user *ulink_desc = links->links;

                list_for_each_entry(link, &entity->links, list) {
                        struct media_link_desc klink_desc;

                        /* Ignore backlinks. */
                        if (link->source->entity != entity)
                                continue;
                        memset(&klink_desc, 0, sizeof(klink_desc));
                        media_device_kpad_to_upad(link->source,
                                                  &klink_desc.source);
                        media_device_kpad_to_upad(link->sink,
                                                  &klink_desc.sink);
                        klink_desc.flags = link->flags;
                        if (copy_to_user(ulink_desc, &klink_desc,
                                         sizeof(*ulink_desc)))
                                return -EFAULT;
                        ulink_desc++;
                }
        }
        memset(links->reserved, 0, sizeof(links->reserved));

        return 0;
}

static long media_device_setup_link(struct media_device *mdev, void *arg)
{
        struct media_link_desc *linkd = arg;
        struct media_link *link = NULL;
        struct media_entity *source;
        struct media_entity *sink;

        /* Find the source and sink entities and link.
         */
        source = find_entity(mdev, linkd->source.entity);
        sink = find_entity(mdev, linkd->sink.entity);

        if (source == NULL || sink == NULL)
                return -EINVAL;

        if (linkd->source.index >= source->num_pads ||
            linkd->sink.index >= sink->num_pads)
                return -EINVAL;

        link = media_entity_find_link(&source->pads[linkd->source.index],
                                      &sink->pads[linkd->sink.index]);
        if (link == NULL)
                return -EINVAL;

        memset(linkd->reserved, 0, sizeof(linkd->reserved));

        /* Setup the link on both entities. */
        return __media_entity_setup_link(link, linkd->flags);
}

static long media_device_get_topology(struct media_device *mdev, void *arg)
{
        struct media_v2_topology *topo = arg;
        struct media_entity *entity;
        struct media_interface *intf;
        struct media_pad *pad;
        struct media_link *link;
        struct media_v2_entity kentity, __user *uentity;
        struct media_v2_interface kintf, __user *uintf;
        struct media_v2_pad kpad, __user *upad;
        struct media_v2_link klink, __user *ulink;
        unsigned int i;
        int ret = 0;

        topo->topology_version = mdev->topology_version;

        /* Get entities and number of entities */
        i = 0;
        uentity = media_get_uptr(topo->ptr_entities);
        media_device_for_each_entity(entity, mdev) {
                i++;
                if (ret || !uentity)
                        continue;

                if (i > topo->num_entities) {
                        ret = -ENOSPC;
                        continue;
                }

                /* Copy fields to userspace struct if not error */
                memset(&kentity, 0, sizeof(kentity));
                kentity.id = entity->graph_obj.id;
                kentity.function = entity->function;
                kentity.flags = entity->flags;
                strscpy(kentity.name, entity->name,
                        sizeof(kentity.name));

                if (copy_to_user(uentity, &kentity, sizeof(kentity)))
                        ret = -EFAULT;
                uentity++;
        }
        topo->num_entities = i;
        topo->reserved1 = 0;

        /* Get interfaces and number of interfaces */
        i = 0;
        uintf = media_get_uptr(topo->ptr_interfaces);
        media_device_for_each_intf(intf, mdev) {
                i++;
                if (ret || !uintf)
                        continue;

                if (i > topo->num_interfaces) {
                        ret = -ENOSPC;
                        continue;
                }

                memset(&kintf, 0, sizeof(kintf));

                /* Copy intf fields to userspace struct */
                kintf.id = intf->graph_obj.id;
                kintf.intf_type = intf->type;
                kintf.flags = intf->flags;

                if (media_type(&intf->graph_obj) == MEDIA_GRAPH_INTF_DEVNODE) {
                        struct media_intf_devnode *devnode;

                        devnode = intf_to_devnode(intf);

                        kintf.devnode.major = devnode->major;
                        kintf.devnode.minor = devnode->minor;
                }

                if (copy_to_user(uintf, &kintf, sizeof(kintf)))
                        ret = -EFAULT;
                uintf++;
        }
        topo->num_interfaces = i;
        topo->reserved2 = 0;

        /* Get pads and number of pads */
        i = 0;
        upad = media_get_uptr(topo->ptr_pads);
        media_device_for_each_pad(pad, mdev) {
                i++;
                if (ret || !upad)
                        continue;

                if (i > topo->num_pads) {
                        ret = -ENOSPC;
                        continue;
                }

                memset(&kpad, 0, sizeof(kpad));

                /* Copy pad fields to userspace struct */
                kpad.id = pad->graph_obj.id;
                kpad.entity_id = pad->entity->graph_obj.id;
                kpad.flags = pad->flags;
                kpad.index = pad->index;

                if (copy_to_user(upad, &kpad, sizeof(kpad)))
                        ret = -EFAULT;
                upad++;
        }
        topo->num_pads = i;
        topo->reserved3 = 0;

        /* Get links and number of links */
        i = 0;
        ulink = media_get_uptr(topo->ptr_links);
        media_device_for_each_link(link, mdev) {
                if (link->is_backlink)
                        continue;

                i++;

                if (ret || !ulink)
                        continue;

                if (i > topo->num_links) {
                        ret = -ENOSPC;
                        continue;
                }

                memset(&klink, 0, sizeof(klink));

                /* Copy link fields to userspace struct */
                klink.id = link->graph_obj.id;
                klink.source_id = link->gobj0->id;
                klink.sink_id = link->gobj1->id;
                klink.flags = link->flags;

                if (copy_to_user(ulink, &klink, sizeof(klink)))
                        ret = -EFAULT;
                ulink++;
        }
        topo->num_links = i;
        topo->reserved4 = 0;

        return ret;
}

static long media_device_request_alloc(struct media_device *mdev, void *arg)
{
        int *alloc_fd = arg;

        if (!mdev->ops || !mdev->ops->req_validate || !mdev->ops->req_queue)
                return -ENOTTY;

        return media_request_alloc(mdev, alloc_fd);
}

static long copy_arg_from_user(void *karg, void __user *uarg, unsigned int cmd)
{
        if ((_IOC_DIR(cmd) & _IOC_WRITE) &&
            copy_from_user(karg, uarg, _IOC_SIZE(cmd)))
                return -EFAULT;

        return 0;
}

static long copy_arg_to_user(void __user *uarg, void *karg, unsigned int cmd)
{
        if ((_IOC_DIR(cmd) & _IOC_READ) &&
            copy_to_user(uarg, karg, _IOC_SIZE(cmd)))
                return -EFAULT;

        return 0;
}

/* Do acquire the graph mutex */
#define MEDIA_IOC_FL_GRAPH_MUTEX        BIT(0)

#define MEDIA_IOC_ARG(__cmd, func, fl, from_user, to_user)                \
        [_IOC_NR(MEDIA_IOC_##__cmd)] = {                                \
                .cmd = MEDIA_IOC_##__cmd,                                \
                .fn = func,                                                \
                .flags = fl,                                                \
                .arg_from_user = from_user,                                \
                .arg_to_user = to_user,                                        \
        }

#define MEDIA_IOC(__cmd, func, fl)                                        \
        MEDIA_IOC_ARG(__cmd, func, fl, copy_arg_from_user, copy_arg_to_user)

/* the table is indexed by _IOC_NR(cmd) */
struct media_ioctl_info {
        unsigned int cmd;
        unsigned short flags;
        long (*fn)(struct media_device *dev, void *arg);
        long (*arg_from_user)(void *karg, void __user *uarg, unsigned int cmd);
        long (*arg_to_user)(void __user *uarg, void *karg, unsigned int cmd);
};

static const struct media_ioctl_info ioctl_info[] = {
        MEDIA_IOC(DEVICE_INFO, media_device_get_info, MEDIA_IOC_FL_GRAPH_MUTEX),
        MEDIA_IOC(ENUM_ENTITIES, media_device_enum_entities, MEDIA_IOC_FL_GRAPH_MUTEX),
        MEDIA_IOC(ENUM_LINKS, media_device_enum_links, MEDIA_IOC_FL_GRAPH_MUTEX),
        MEDIA_IOC(SETUP_LINK, media_device_setup_link, MEDIA_IOC_FL_GRAPH_MUTEX),
        MEDIA_IOC(G_TOPOLOGY, media_device_get_topology, MEDIA_IOC_FL_GRAPH_MUTEX),
        MEDIA_IOC(REQUEST_ALLOC, media_device_request_alloc, 0),
};

static long media_device_ioctl(struct file *filp, unsigned int cmd,
                               unsigned long __arg)
{
        struct media_devnode *devnode = media_devnode_data(filp);
        struct media_device *dev = devnode->media_dev;
        const struct media_ioctl_info *info;
        void __user *arg = (void __user *)__arg;
        char __karg[256], *karg = __karg;
        long ret;

        if (_IOC_NR(cmd) >= ARRAY_SIZE(ioctl_info)
            || ioctl_info[_IOC_NR(cmd)].cmd != cmd)
                return -ENOIOCTLCMD;

        info = &ioctl_info[_IOC_NR(cmd)];

        if (_IOC_SIZE(info->cmd) > sizeof(__karg)) {
                karg = kmalloc(_IOC_SIZE(info->cmd), GFP_KERNEL);
                if (!karg)
                        return -ENOMEM;
        }

        if (info->arg_from_user) {
                ret = info->arg_from_user(karg, arg, cmd);
                if (ret)
                        goto out_free;
        }

        if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX)
                mutex_lock(&dev->graph_mutex);

        ret = info->fn(dev, karg);

        if (info->flags & MEDIA_IOC_FL_GRAPH_MUTEX)
                mutex_unlock(&dev->graph_mutex);

        if (!ret && info->arg_to_user)
                ret = info->arg_to_user(arg, karg, cmd);

out_free:
        if (karg != __karg)
                kfree(karg);

        return ret;
}

#ifdef CONFIG_COMPAT

struct media_links_enum32 {
        __u32 entity;
        compat_uptr_t pads; /* struct media_pad_desc * */
        compat_uptr_t links; /* struct media_link_desc * */
        __u32 reserved[4];
};

static long media_device_enum_links32(struct media_device *mdev,
                                      struct media_links_enum32 __user *ulinks)
{
        struct media_links_enum links;
        compat_uptr_t pads_ptr, links_ptr;
        int ret;

        memset(&links, 0, sizeof(links));

        if (get_user(links.entity, &ulinks->entity)
            || get_user(pads_ptr, &ulinks->pads)
            || get_user(links_ptr, &ulinks->links))
                return -EFAULT;

        links.pads = compat_ptr(pads_ptr);
        links.links = compat_ptr(links_ptr);

        ret = media_device_enum_links(mdev, &links);
        if (ret)
                return ret;

        if (copy_to_user(ulinks->reserved, links.reserved,
                         sizeof(ulinks->reserved)))
                return -EFAULT;
        return 0;
}

#define MEDIA_IOC_ENUM_LINKS32                _IOWR('|', 0x02, struct media_links_enum32)

static long media_device_compat_ioctl(struct file *filp, unsigned int cmd,
                                      unsigned long arg)
{
        struct media_devnode *devnode = media_devnode_data(filp);
        struct media_device *dev = devnode->media_dev;
        long ret;

        switch (cmd) {
        case MEDIA_IOC_ENUM_LINKS32:
                mutex_lock(&dev->graph_mutex);
                ret = media_device_enum_links32(dev,
                                (struct media_links_enum32 __user *)arg);
                mutex_unlock(&dev->graph_mutex);
                break;

        default:
                return media_device_ioctl(filp, cmd, arg);
        }

        return ret;
}
#endif /* CONFIG_COMPAT */

static const struct media_file_operations media_device_fops = {
        .owner = THIS_MODULE,
        .open = media_device_open,
        .ioctl = media_device_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl = media_device_compat_ioctl,
#endif /* CONFIG_COMPAT */
        .release = media_device_close,
};

/* -----------------------------------------------------------------------------
 * sysfs
 */

static ssize_t model_show(struct device *cd,
                          struct device_attribute *attr, char *buf)
{
        struct media_devnode *devnode = to_media_devnode(cd);
        struct media_device *mdev = devnode->media_dev;

        return sprintf(buf, "%.*s\n", (int)sizeof(mdev->model), mdev->model);
}

static DEVICE_ATTR_RO(model);

/* -----------------------------------------------------------------------------
 * Registration/unregistration
 */

static void media_device_release(struct media_devnode *devnode)
{
        dev_dbg(devnode->parent, "Media device released\n");
}

static void __media_device_unregister_entity(struct media_entity *entity)
{
        struct media_device *mdev = entity->graph_obj.mdev;
        struct media_link *link, *tmp;
        struct media_interface *intf;
        struct media_pad *iter;

        ida_free(&mdev->entity_internal_idx, entity->internal_idx);

        /* Remove all interface links pointing to this entity */
        list_for_each_entry(intf, &mdev->interfaces, graph_obj.list) {
                list_for_each_entry_safe(link, tmp, &intf->links, list) {
                        if (link->entity == entity)
                                __media_remove_intf_link(link);
                }
        }

        /* Remove all data links that belong to this entity */
        __media_entity_remove_links(entity);

        /* Remove all pads that belong to this entity */
        media_entity_for_each_pad(entity, iter)
                media_gobj_destroy(&iter->graph_obj);

        /* Remove the entity */
        media_gobj_destroy(&entity->graph_obj);

        /* invoke entity_notify callbacks to handle entity removal?? */
}

int __must_check media_device_register_entity(struct media_device *mdev,
                                              struct media_entity *entity)
{
        struct media_entity_notify *notify, *next;
        struct media_pad *iter;
        int ret;

        if (entity->function == MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN ||
            entity->function == MEDIA_ENT_F_UNKNOWN)
                dev_warn(mdev->dev,
                         "Entity type for entity %s was not initialized!\n",
                         entity->name);

        /* Warn if we apparently re-register an entity */
        WARN_ON(entity->graph_obj.mdev != NULL);
        entity->graph_obj.mdev = mdev;
        INIT_LIST_HEAD(&entity->links);
        entity->num_links = 0;
        entity->num_backlinks = 0;

        ret = ida_alloc_min(&mdev->entity_internal_idx, 1, GFP_KERNEL);
        if (ret < 0)
                return ret;
        entity->internal_idx = ret;

        mutex_lock(&mdev->graph_mutex);
        mdev->entity_internal_idx_max =
                max(mdev->entity_internal_idx_max, entity->internal_idx);

        /* Initialize media_gobj embedded at the entity */
        media_gobj_create(mdev, MEDIA_GRAPH_ENTITY, &entity->graph_obj);

        /* Initialize objects at the pads */
        media_entity_for_each_pad(entity, iter)
                media_gobj_create(mdev, MEDIA_GRAPH_PAD, &iter->graph_obj);

        /* invoke entity_notify callbacks */
        list_for_each_entry_safe(notify, next, &mdev->entity_notify, list)
                notify->notify(entity, notify->notify_data);

        if (mdev->entity_internal_idx_max
            >= mdev->pm_count_walk.ent_enum.idx_max) {
                struct media_graph new = { .top = 0 };

                /*
                 * Initialise the new graph walk before cleaning up
                 * the old one in order not to spoil the graph walk
                 * object of the media device if graph walk init fails.
                 */
                ret = media_graph_walk_init(&new, mdev);
                if (ret) {
                        __media_device_unregister_entity(entity);
                        mutex_unlock(&mdev->graph_mutex);
                        return ret;
                }
                media_graph_walk_cleanup(&mdev->pm_count_walk);
                mdev->pm_count_walk = new;
        }
        mutex_unlock(&mdev->graph_mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(media_device_register_entity);

void media_device_unregister_entity(struct media_entity *entity)
{
        struct media_device *mdev = entity->graph_obj.mdev;

        if (mdev == NULL)
                return;

        mutex_lock(&mdev->graph_mutex);
        __media_device_unregister_entity(entity);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_device_unregister_entity);

void media_device_init(struct media_device *mdev)
{
        INIT_LIST_HEAD(&mdev->entities);
        INIT_LIST_HEAD(&mdev->interfaces);
        INIT_LIST_HEAD(&mdev->pads);
        INIT_LIST_HEAD(&mdev->links);
        INIT_LIST_HEAD(&mdev->entity_notify);

        mutex_init(&mdev->req_queue_mutex);
        mutex_init(&mdev->graph_mutex);
        ida_init(&mdev->entity_internal_idx);

        atomic_set(&mdev->request_id, 0);

        if (!*mdev->bus_info)
                media_set_bus_info(mdev->bus_info, sizeof(mdev->bus_info),
                                   mdev->dev);

        dev_dbg(mdev->dev, "Media device initialized\n");
}
EXPORT_SYMBOL_GPL(media_device_init);

void media_device_cleanup(struct media_device *mdev)
{
        ida_destroy(&mdev->entity_internal_idx);
        mdev->entity_internal_idx_max = 0;
        media_graph_walk_cleanup(&mdev->pm_count_walk);
        mutex_destroy(&mdev->graph_mutex);
        mutex_destroy(&mdev->req_queue_mutex);
}
EXPORT_SYMBOL_GPL(media_device_cleanup);

int __must_check __media_device_register(struct media_device *mdev,
                                         struct module *owner)
{
        struct media_devnode *devnode;
        int ret;

        devnode = kzalloc(sizeof(*devnode), GFP_KERNEL);
        if (!devnode)
                return -ENOMEM;

        /* Register the device node. */
        mdev->devnode = devnode;
        devnode->fops = &media_device_fops;
        devnode->parent = mdev->dev;
        devnode->release = media_device_release;

        /* Set version 0 to indicate user-space that the graph is static */
        mdev->topology_version = 0;

        ret = media_devnode_register(mdev, devnode, owner);
        if (ret < 0) {
                /* devnode free is handled in media_devnode_*() */
                mdev->devnode = NULL;
                return ret;
        }

        ret = device_create_file(&devnode->dev, &dev_attr_model);
        if (ret < 0) {
                /* devnode free is handled in media_devnode_*() */
                mdev->devnode = NULL;
                media_devnode_unregister_prepare(devnode);
                media_devnode_unregister(devnode);
                return ret;
        }

        dev_dbg(mdev->dev, "Media device registered\n");

        return 0;
}
EXPORT_SYMBOL_GPL(__media_device_register);

void media_device_register_entity_notify(struct media_device *mdev,
                                        struct media_entity_notify *nptr)
{
        mutex_lock(&mdev->graph_mutex);
        list_add_tail(&nptr->list, &mdev->entity_notify);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_device_register_entity_notify);

/*
 * Note: Should be called with mdev->lock held.
 */
static void __media_device_unregister_entity_notify(struct media_device *mdev,
                                        struct media_entity_notify *nptr)
{
        list_del(&nptr->list);
}

void media_device_unregister_entity_notify(struct media_device *mdev,
                                        struct media_entity_notify *nptr)
{
        mutex_lock(&mdev->graph_mutex);
        __media_device_unregister_entity_notify(mdev, nptr);
        mutex_unlock(&mdev->graph_mutex);
}
EXPORT_SYMBOL_GPL(media_device_unregister_entity_notify);

void media_device_unregister(struct media_device *mdev)
{
        struct media_entity *entity;
        struct media_entity *next;
        struct media_interface *intf, *tmp_intf;
        struct media_entity_notify *notify, *nextp;

        if (mdev == NULL)
                return;

        mutex_lock(&mdev->graph_mutex);

        /* Check if mdev was ever registered at all */
        if (!media_devnode_is_registered(mdev->devnode)) {
                mutex_unlock(&mdev->graph_mutex);
                return;
        }

        /* Clear the devnode register bit to avoid races with media dev open */
        media_devnode_unregister_prepare(mdev->devnode);

        /* Remove all entities from the media device */
        list_for_each_entry_safe(entity, next, &mdev->entities, graph_obj.list)
                __media_device_unregister_entity(entity);

        /* Remove all entity_notify callbacks from the media device */
        list_for_each_entry_safe(notify, nextp, &mdev->entity_notify, list)
                __media_device_unregister_entity_notify(mdev, notify);

        /* Remove all interfaces from the media device */
        list_for_each_entry_safe(intf, tmp_intf, &mdev->interfaces,
                                 graph_obj.list) {
                /*
                 * Unlink the interface, but don't free it here; the
                 * module which created it is responsible for freeing
                 * it
                 */
                __media_remove_intf_links(intf);
                media_gobj_destroy(&intf->graph_obj);
        }

        mutex_unlock(&mdev->graph_mutex);

        dev_dbg(mdev->dev, "Media device unregistered\n");

        device_remove_file(&mdev->devnode->dev, &dev_attr_model);
        media_devnode_unregister(mdev->devnode);
        /* devnode free is handled in media_devnode_*() */
        mdev->devnode = NULL;
}
EXPORT_SYMBOL_GPL(media_device_unregister);

#if IS_ENABLED(CONFIG_PCI)
void media_device_pci_init(struct media_device *mdev,
                           struct pci_dev *pci_dev,
                           const char *name)
{
        mdev->dev = &pci_dev->dev;

        if (name)
                strscpy(mdev->model, name, sizeof(mdev->model));
        else
                strscpy(mdev->model, pci_name(pci_dev), sizeof(mdev->model));

        sprintf(mdev->bus_info, "PCI:%s", pci_name(pci_dev));

        mdev->hw_revision = (pci_dev->subsystem_vendor << 16)
                            | pci_dev->subsystem_device;

        media_device_init(mdev);
}
EXPORT_SYMBOL_GPL(media_device_pci_init);
#endif

#if IS_ENABLED(CONFIG_USB)
void __media_device_usb_init(struct media_device *mdev,
                             struct usb_device *udev,
                             const char *board_name,
                             const char *driver_name)
{
        mdev->dev = &udev->dev;

        if (driver_name)
                strscpy(mdev->driver_name, driver_name,
                        sizeof(mdev->driver_name));

        if (board_name)
                strscpy(mdev->model, board_name, sizeof(mdev->model));
        else if (udev->product)
                strscpy(mdev->model, udev->product, sizeof(mdev->model));
        else
                strscpy(mdev->model, "unknown model", sizeof(mdev->model));
        if (udev->serial)
                strscpy(mdev->serial, udev->serial, sizeof(mdev->serial));
        usb_make_path(udev, mdev->bus_info, sizeof(mdev->bus_info));
        mdev->hw_revision = le16_to_cpu(udev->descriptor.bcdDevice);

        media_device_init(mdev);
}
EXPORT_SYMBOL_GPL(__media_device_usb_init);
#endif


#endif /* CONFIG_MEDIA_CONTROLLER */






























   15 













   16 
   16 

   16 








   16 













    6 


    1 


    6 














   16 

   16 


   15 

   16 






























































































































































































































   16 




   16 


   16 



















    8 





    8 
    8 
    8 












































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_MM_INLINE_H
#define LINUX_MM_INLINE_H

#include <linux/atomic.h>
#include <linux/huge_mm.h>
#include <linux/mm_types.h>
#include <linux/swap.h>
#include <linux/string.h>
#include <linux/userfaultfd_k.h>
#include <linux/swapops.h>

/**
 * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
 * @folio: The folio to test.
 *
 * We would like to get this info without a page flag, but the state
 * needs to survive until the folio is last deleted from the LRU, which
 * could be as far down as __page_cache_release.
 *
 * Return: An integer (not a boolean!) used to sort a folio onto the
 * right LRU list and to account folios correctly.
 * 1 if @folio is a regular filesystem backed page cache folio
 * or a lazily freed anonymous folio (e.g. via MADV_FREE).
 * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
 * ram or swap backed folio.
 */
static inline int folio_is_file_lru(struct folio *folio)
{
        return !folio_test_swapbacked(folio);
}

static inline int page_is_file_lru(struct page *page)
{
        return folio_is_file_lru(page_folio(page));
}

static __always_inline void __update_lru_size(struct lruvec *lruvec,
                                enum lru_list lru, enum zone_type zid,
                                long nr_pages)
{
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);

        lockdep_assert_held(&lruvec->lru_lock);
        WARN_ON_ONCE(nr_pages != (int)nr_pages);

        __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
        __mod_zone_page_state(&pgdat->node_zones[zid],
                                NR_ZONE_LRU_BASE + lru, nr_pages);
}

static __always_inline void update_lru_size(struct lruvec *lruvec,
                                enum lru_list lru, enum zone_type zid,
                                long nr_pages)
{
        __update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
        mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif
}

/**
 * __folio_clear_lru_flags - Clear page lru flags before releasing a page.
 * @folio: The folio that was on lru and now has a zero reference.
 */
static __always_inline void __folio_clear_lru_flags(struct folio *folio)
{
        VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);

        __folio_clear_lru(folio);

        /* this shouldn't happen, so leave the flags to bad_page() */
        if (folio_test_active(folio) && folio_test_unevictable(folio))
                return;

        __folio_clear_active(folio);
        __folio_clear_unevictable(folio);
}

/**
 * folio_lru_list - Which LRU list should a folio be on?
 * @folio: The folio to test.
 *
 * Return: The LRU list a folio should be on, as an index
 * into the array of LRU lists.
 */
static __always_inline enum lru_list folio_lru_list(struct folio *folio)
{
        enum lru_list lru;

        VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);

        if (folio_test_unevictable(folio))
                return LRU_UNEVICTABLE;

        lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
        if (folio_test_active(folio))
                lru += LRU_ACTIVE;

        return lru;
}

#ifdef CONFIG_LRU_GEN

#ifdef CONFIG_LRU_GEN_ENABLED
static inline bool lru_gen_enabled(void)
{
        DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);

        return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
}
#else
static inline bool lru_gen_enabled(void)
{
        DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);

        return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
}
#endif

static inline bool lru_gen_in_fault(void)
{
        return current->in_lru_fault;
}

static inline int lru_gen_from_seq(unsigned long seq)
{
        return seq % MAX_NR_GENS;
}

static inline int lru_hist_from_seq(unsigned long seq)
{
        return seq % NR_HIST_GENS;
}

static inline int lru_tier_from_refs(int refs)
{
        VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH));

        /* see the comment in folio_lru_refs() */
        return order_base_2(refs + 1);
}

static inline int folio_lru_refs(struct folio *folio)
{
        unsigned long flags = READ_ONCE(folio->flags);
        bool workingset = flags & BIT(PG_workingset);

        /*
         * Return the number of accesses beyond PG_referenced, i.e., N-1 if the
         * total number of accesses is N>1, since N=0,1 both map to the first
         * tier. lru_tier_from_refs() will account for this off-by-one. Also see
         * the comment on MAX_NR_TIERS.
         */
        return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
}

static inline int folio_lru_gen(struct folio *folio)
{
        unsigned long flags = READ_ONCE(folio->flags);

        return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
}

static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
{
        unsigned long max_seq = lruvec->lrugen.max_seq;

        VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);

        /* see the comment on MIN_NR_GENS */
        return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
}

static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
                                       int old_gen, int new_gen)
{
        int type = folio_is_file_lru(folio);
        int zone = folio_zonenum(folio);
        int delta = folio_nr_pages(folio);
        enum lru_list lru = type * LRU_INACTIVE_FILE;
        struct lru_gen_folio *lrugen = &lruvec->lrugen;

        VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
        VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
        VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);

        if (old_gen >= 0)
                WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
                           lrugen->nr_pages[old_gen][type][zone] - delta);
        if (new_gen >= 0)
                WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
                           lrugen->nr_pages[new_gen][type][zone] + delta);

        /* addition */
        if (old_gen < 0) {
                if (lru_gen_is_active(lruvec, new_gen))
                        lru += LRU_ACTIVE;
                __update_lru_size(lruvec, lru, zone, delta);
                return;
        }

        /* deletion */
        if (new_gen < 0) {
                if (lru_gen_is_active(lruvec, old_gen))
                        lru += LRU_ACTIVE;
                __update_lru_size(lruvec, lru, zone, -delta);
                return;
        }

        /* promotion */
        if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
                __update_lru_size(lruvec, lru, zone, -delta);
                __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
        }

        /* demotion requires isolation, e.g., lru_deactivate_fn() */
        VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
}

static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
        unsigned long seq;
        unsigned long flags;
        int gen = folio_lru_gen(folio);
        int type = folio_is_file_lru(folio);
        int zone = folio_zonenum(folio);
        struct lru_gen_folio *lrugen = &lruvec->lrugen;

        VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);

        if (folio_test_unevictable(folio) || !lrugen->enabled)
                return false;
        /*
         * There are four common cases for this page:
         * 1. If it's hot, i.e., freshly faulted in, add it to the youngest
         *    generation, and it's protected over the rest below.
         * 2. If it can't be evicted immediately, i.e., a dirty page pending
         *    writeback, add it to the second youngest generation.
         * 3. If it should be evicted first, e.g., cold and clean from
         *    folio_rotate_reclaimable(), add it to the oldest generation.
         * 4. Everything else falls between 2 & 3 above and is added to the
         *    second oldest generation if it's considered inactive, or the
         *    oldest generation otherwise. See lru_gen_is_active().
         */
        if (folio_test_active(folio))
                seq = lrugen->max_seq;
        else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
                 (folio_test_reclaim(folio) &&
                  (folio_test_dirty(folio) || folio_test_writeback(folio))))
                seq = lrugen->max_seq - 1;
        else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq)
                seq = lrugen->min_seq[type];
        else
                seq = lrugen->min_seq[type] + 1;

        gen = lru_gen_from_seq(seq);
        flags = (gen + 1UL) << LRU_GEN_PGOFF;
        /* see the comment on MIN_NR_GENS about PG_active */
        set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);

        lru_gen_update_size(lruvec, folio, -1, gen);
        /* for folio_rotate_reclaimable() */
        if (reclaiming)
                list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
        else
                list_add(&folio->lru, &lrugen->folios[gen][type][zone]);

        return true;
}

static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
        unsigned long flags;
        int gen = folio_lru_gen(folio);

        if (gen < 0)
                return false;

        VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
        VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);

        /* for folio_migrate_flags() */
        flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
        flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
        gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;

        lru_gen_update_size(lruvec, folio, gen, -1);
        list_del(&folio->lru);

        return true;
}

#else /* !CONFIG_LRU_GEN */

static inline bool lru_gen_enabled(void)
{
        return false;
}

static inline bool lru_gen_in_fault(void)
{
        return false;
}

static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
        return false;
}

static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
        return false;
}

#endif /* CONFIG_LRU_GEN */

static __always_inline
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
{
        enum lru_list lru = folio_lru_list(folio);

        if (lru_gen_add_folio(lruvec, folio, false))
                return;

        update_lru_size(lruvec, lru, folio_zonenum(folio),
                        folio_nr_pages(folio));
        if (lru != LRU_UNEVICTABLE)
                list_add(&folio->lru, &lruvec->lists[lru]);
}

static __always_inline
void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
{
        enum lru_list lru = folio_lru_list(folio);

        if (lru_gen_add_folio(lruvec, folio, true))
                return;

        update_lru_size(lruvec, lru, folio_zonenum(folio),
                        folio_nr_pages(folio));
        /* This is not expected to be used on LRU_UNEVICTABLE */
        list_add_tail(&folio->lru, &lruvec->lists[lru]);
}

static __always_inline
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
{
        enum lru_list lru = folio_lru_list(folio);

        if (lru_gen_del_folio(lruvec, folio, false))
                return;

        if (lru != LRU_UNEVICTABLE)
                list_del(&folio->lru);
        update_lru_size(lruvec, lru, folio_zonenum(folio),
                        -folio_nr_pages(folio));
}

#ifdef CONFIG_ANON_VMA_NAME
/* mmap_lock should be read-locked */
static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
{
        if (anon_name)
                kref_get(&anon_name->kref);
}

static inline void anon_vma_name_put(struct anon_vma_name *anon_name)
{
        if (anon_name)
                kref_put(&anon_name->kref, anon_vma_name_free);
}

static inline
struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name)
{
        /* Prevent anon_name refcount saturation early on */
        if (kref_read(&anon_name->kref) < REFCOUNT_MAX) {
                anon_vma_name_get(anon_name);
                return anon_name;

        }
        return anon_vma_name_alloc(anon_name->name);
}

static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
                                     struct vm_area_struct *new_vma)
{
        struct anon_vma_name *anon_name = anon_vma_name(orig_vma);

        if (anon_name)
                new_vma->anon_name = anon_vma_name_reuse(anon_name);
}

static inline void free_anon_vma_name(struct vm_area_struct *vma)
{
        /*
         * Not using anon_vma_name because it generates a warning if mmap_lock
         * is not held, which might be the case here.
         */
        anon_vma_name_put(vma->anon_name);
}

static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
                                    struct anon_vma_name *anon_name2)
{
        if (anon_name1 == anon_name2)
                return true;

        return anon_name1 && anon_name2 &&
                !strcmp(anon_name1->name, anon_name2->name);
}

#else /* CONFIG_ANON_VMA_NAME */
static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
                                     struct vm_area_struct *new_vma) {}
static inline void free_anon_vma_name(struct vm_area_struct *vma) {}

static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
                                    struct anon_vma_name *anon_name2)
{
        return true;
}

#endif  /* CONFIG_ANON_VMA_NAME */

static inline void init_tlb_flush_pending(struct mm_struct *mm)
{
        atomic_set(&mm->tlb_flush_pending, 0);
}

static inline void inc_tlb_flush_pending(struct mm_struct *mm)
{
        atomic_inc(&mm->tlb_flush_pending);
        /*
         * The only time this value is relevant is when there are indeed pages
         * to flush. And we'll only flush pages after changing them, which
         * requires the PTL.
         *
         * So the ordering here is:
         *
         *        atomic_inc(&mm->tlb_flush_pending);
         *        spin_lock(&ptl);
         *        ...
         *        set_pte_at();
         *        spin_unlock(&ptl);
         *
         *                                spin_lock(&ptl)
         *                                mm_tlb_flush_pending();
         *                                ....
         *                                spin_unlock(&ptl);
         *
         *        flush_tlb_range();
         *        atomic_dec(&mm->tlb_flush_pending);
         *
         * Where the increment if constrained by the PTL unlock, it thus
         * ensures that the increment is visible if the PTE modification is
         * visible. After all, if there is no PTE modification, nobody cares
         * about TLB flushes either.
         *
         * This very much relies on users (mm_tlb_flush_pending() and
         * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
         * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
         * locks (PPC) the unlock of one doesn't order against the lock of
         * another PTL.
         *
         * The decrement is ordered by the flush_tlb_range(), such that
         * mm_tlb_flush_pending() will not return false unless all flushes have
         * completed.
         */
}

static inline void dec_tlb_flush_pending(struct mm_struct *mm)
{
        /*
         * See inc_tlb_flush_pending().
         *
         * This cannot be smp_mb__before_atomic() because smp_mb() simply does
         * not order against TLB invalidate completion, which is what we need.
         *
         * Therefore we must rely on tlb_flush_*() to guarantee order.
         */
        atomic_dec(&mm->tlb_flush_pending);
}

static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
        /*
         * Must be called after having acquired the PTL; orders against that
         * PTLs release and therefore ensures that if we observe the modified
         * PTE we must also observe the increment from inc_tlb_flush_pending().
         *
         * That is, it only guarantees to return true if there is a flush
         * pending for _this_ PTL.
         */
        return atomic_read(&mm->tlb_flush_pending);
}

static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
{
        /*
         * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
         * for which there is a TLB flush pending in order to guarantee
         * we've seen both that PTE modification and the increment.
         *
         * (no requirement on actually still holding the PTL, that is irrelevant)
         */
        return atomic_read(&mm->tlb_flush_pending) > 1;
}

#ifdef CONFIG_MMU
/*
 * Computes the pte marker to copy from the given source entry into dst_vma.
 * If no marker should be copied, returns 0.
 * The caller should insert a new pte created with make_pte_marker().
 */
static inline pte_marker copy_pte_marker(
                swp_entry_t entry, struct vm_area_struct *dst_vma)
{
        pte_marker srcm = pte_marker_get(entry);
        /* Always copy error entries. */
        pte_marker dstm = srcm & PTE_MARKER_POISONED;

        /* Only copy PTE markers if UFFD register matches. */
        if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
                dstm |= PTE_MARKER_UFFD_WP;

        return dstm;
}
#endif

/*
 * If this pte is wr-protected by uffd-wp in any form, arm the special pte to
 * replace a none pte.  NOTE!  This should only be called when *pte is already
 * cleared so we will never accidentally replace something valuable.  Meanwhile
 * none pte also means we are not demoting the pte so tlb flushed is not needed.
 * E.g., when pte cleared the caller should have taken care of the tlb flush.
 *
 * Must be called with pgtable lock held so that no thread will see the none
 * pte, and if they see it, they'll fault and serialize at the pgtable lock.
 *
 * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled.
 */
static inline void
pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
                              pte_t *pte, pte_t pteval)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
        bool arm_uffd_pte = false;

        /* The current status of the pte should be "cleared" before calling */
        WARN_ON_ONCE(!pte_none(ptep_get(pte)));

        /*
         * NOTE: userfaultfd_wp_unpopulated() doesn't need this whole
         * thing, because when zapping either it means it's dropping the
         * page, or in TTU where the present pte will be quickly replaced
         * with a swap pte.  There's no way of leaking the bit.
         */
        if (vma_is_anonymous(vma) || !userfaultfd_wp(vma))
                return;

        /* A uffd-wp wr-protected normal pte */
        if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval)))
                arm_uffd_pte = true;

        /*
         * A uffd-wp wr-protected swap pte.  Note: this should even cover an
         * existing pte marker with uffd-wp bit set.
         */
        if (unlikely(pte_swp_uffd_wp_any(pteval)))
                arm_uffd_pte = true;

        if (unlikely(arm_uffd_pte))
                set_pte_at(vma->vm_mm, addr, pte,
                           make_pte_marker(PTE_MARKER_UFFD_WP));
#endif
}

static inline bool vma_has_recency(struct vm_area_struct *vma)
{
        if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
                return false;

        if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
                return false;

        return true;
}

#endif














































































































































































































































































































































































































































    8 










    8 

    6 










































    8 

    8 















































































































































































































































































































































































































































































































































































































































    8 

















    2 

















    2 










    8 




    8 


    8 
    8 












    8 

    8 



















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Infrastructure for migratable timers
 *
 * Copyright(C) 2022 linutronix GmbH
 */
#include <linux/cpuhotplug.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/timerqueue.h>
#include <trace/events/ipi.h>

#include "timer_migration.h"
#include "tick-internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/timer_migration.h>

/*
 * The timer migration mechanism is built on a hierarchy of groups. The
 * lowest level group contains CPUs, the next level groups of CPU groups
 * and so forth. The CPU groups are kept per node so for the normal case
 * lock contention won't happen across nodes. Depending on the number of
 * CPUs per node even the next level might be kept as groups of CPU groups
 * per node and only the levels above cross the node topology.
 *
 * Example topology for a two node system with 24 CPUs each.
 *
 * LVL 2                           [GRP2:0]
 *                              GRP1:0 = GRP1:M
 *
 * LVL 1            [GRP1:0]                      [GRP1:1]
 *               GRP0:0 - GRP0:2               GRP0:3 - GRP0:5
 *
 * LVL 0  [GRP0:0]  [GRP0:1]  [GRP0:2]  [GRP0:3]  [GRP0:4]  [GRP0:5]
 * CPUS     0-7       8-15      16-23     24-31     32-39     40-47
 *
 * The groups hold a timer queue of events sorted by expiry time. These
 * queues are updated when CPUs go in idle. When they come out of idle
 * ignore flag of events is set.
 *
 * Each group has a designated migrator CPU/group as long as a CPU/group is
 * active in the group. This designated role is necessary to avoid that all
 * active CPUs in a group try to migrate expired timers from other CPUs,
 * which would result in massive lock bouncing.
 *
 * When a CPU is awake, it checks in it's own timer tick the group
 * hierarchy up to the point where it is assigned the migrator role or if
 * no CPU is active, it also checks the groups where no migrator is set
 * (TMIGR_NONE).
 *
 * If it finds expired timers in one of the group queues it pulls them over
 * from the idle CPU and runs the timer function. After that it updates the
 * group and the parent groups if required.
 *
 * CPUs which go idle arm their CPU local timer hardware for the next local
 * (pinned) timer event. If the next migratable timer expires after the
 * next local timer or the CPU has no migratable timer pending then the
 * CPU does not queue an event in the LVL0 group. If the next migratable
 * timer expires before the next local timer then the CPU queues that timer
 * in the LVL0 group. In both cases the CPU marks itself idle in the LVL0
 * group.
 *
 * When CPU comes out of idle and when a group has at least a single active
 * child, the ignore flag of the tmigr_event is set. This indicates, that
 * the event is ignored even if it is still enqueued in the parent groups
 * timer queue. It will be removed when touching the timer queue the next
 * time. This spares locking in active path as the lock protects (after
 * setup) only event information. For more information about locking,
 * please read the section "Locking rules".
 *
 * If the CPU is the migrator of the group then it delegates that role to
 * the next active CPU in the group or sets migrator to TMIGR_NONE when
 * there is no active CPU in the group. This delegation needs to be
 * propagated up the hierarchy so hand over from other leaves can happen at
 * all hierarchy levels w/o doing a search.
 *
 * When the last CPU in the system goes idle, then it drops all migrator
 * duties up to the top level of the hierarchy (LVL2 in the example). It
 * then has to make sure, that it arms it's own local hardware timer for
 * the earliest event in the system.
 *
 *
 * Lifetime rules:
 * ---------------
 *
 * The groups are built up at init time or when CPUs come online. They are
 * not destroyed when a group becomes empty due to offlining. The group
 * just won't participate in the hierarchy management anymore. Destroying
 * groups would result in interesting race conditions which would just make
 * the whole mechanism slow and complex.
 *
 *
 * Locking rules:
 * --------------
 *
 * For setting up new groups and handling events it's required to lock both
 * child and parent group. The lock ordering is always bottom up. This also
 * includes the per CPU locks in struct tmigr_cpu. For updating the migrator and
 * active CPU/group information atomic_try_cmpxchg() is used instead and only
 * the per CPU tmigr_cpu->lock is held.
 *
 * During the setup of groups tmigr_level_list is required. It is protected by
 * @tmigr_mutex.
 *
 * When @timer_base->lock as well as tmigr related locks are required, the lock
 * ordering is: first @timer_base->lock, afterwards tmigr related locks.
 *
 *
 * Protection of the tmigr group state information:
 * ------------------------------------------------
 *
 * The state information with the list of active children and migrator needs to
 * be protected by a sequence counter. It prevents a race when updates in child
 * groups are propagated in changed order. The state update is performed
 * lockless and group wise. The following scenario describes what happens
 * without updating the sequence counter:
 *
 * Therefore, let's take three groups and four CPUs (CPU2 and CPU3 as well
 * as GRP0:1 will not change during the scenario):
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:0, GRP0:1
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = CPU0           migrator = CPU2
 *           active   = CPU0           active   = CPU2
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             active      idle           active      idle
 *
 *
 * 1. CPU0 goes idle. As the update is performed group wise, in the first step
 *    only GRP0:0 is updated. The update of GRP1:0 is pending as CPU0 has to
 *    walk the hierarchy.
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:0, GRP0:1
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *       --> migrator = TMIGR_NONE     migrator = CPU2
 *       --> active   =                active   = CPU2
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *         --> idle        idle           active      idle
 *
 * 2. While CPU0 goes idle and continues to update the state, CPU1 comes out of
 *    idle. CPU1 updates GRP0:0. The update for GRP1:0 is pending as CPU1 also
 *    has to walk the hierarchy. Both CPUs (CPU0 and CPU1) now walk the
 *    hierarchy to perform the needed update from their point of view. The
 *    currently visible state looks the following:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:0, GRP0:1
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *       --> migrator = CPU1           migrator = CPU2
 *       --> active   = CPU1           active   = CPU2
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle    --> active         active      idle
 *
 * 3. Here is the race condition: CPU1 managed to propagate its changes (from
 *    step 2) through the hierarchy to GRP1:0 before CPU0 (step 1) did. The
 *    active members of GRP1:0 remain unchanged after the update since it is
 *    still valid from CPU1 current point of view:
 *
 *    LVL 1            [GRP1:0]
 *                 --> migrator = GRP0:1
 *                 --> active   = GRP0:0, GRP0:1
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = CPU1           migrator = CPU2
 *           active   = CPU1           active   = CPU2
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        active         active      idle
 *
 * 4. Now CPU0 finally propagates its changes (from step 1) to GRP1:0.
 *
 *    LVL 1            [GRP1:0]
 *                 --> migrator = GRP0:1
 *                 --> active   = GRP0:1
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = CPU1           migrator = CPU2
 *           active   = CPU1           active   = CPU2
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        active         active      idle
 *
 *
 * The race of CPU0 vs. CPU1 led to an inconsistent state in GRP1:0. CPU1 is
 * active and is correctly listed as active in GRP0:0. However GRP1:0 does not
 * have GRP0:0 listed as active, which is wrong. The sequence counter has been
 * added to avoid inconsistent states during updates. The state is updated
 * atomically only if all members, including the sequence counter, match the
 * expected value (compare-and-exchange).
 *
 * Looking back at the previous example with the addition of the sequence
 * counter: The update as performed by CPU0 in step 4 will fail. CPU1 changed
 * the sequence number during the update in step 3 so the expected old value (as
 * seen by CPU0 before starting the walk) does not match.
 *
 * Prevent race between new event and last CPU going inactive
 * ----------------------------------------------------------
 *
 * When the last CPU is going idle and there is a concurrent update of a new
 * first global timer of an idle CPU, the group and child states have to be read
 * while holding the lock in tmigr_update_events(). The following scenario shows
 * what happens, when this is not done.
 *
 * 1. Only CPU2 is active:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:1
 *                     next_expiry = KTIME_MAX
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = CPU2
 *           active   =                active   = CPU2
 *           next_expiry = KTIME_MAX   next_expiry = KTIME_MAX
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           active      idle
 *
 * 2. Now CPU 2 goes idle (and has no global timer, that has to be handled) and
 *    propagates that to GRP0:1:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:1
 *                     next_expiry = KTIME_MAX
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE --> migrator = TMIGR_NONE
 *           active   =            --> active   =
 *           next_expiry = KTIME_MAX   next_expiry = KTIME_MAX
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle       --> idle        idle
 *
 * 3. Now the idle state is propagated up to GRP1:0. As this is now the last
 *    child going idle in top level group, the expiry of the next group event
 *    has to be handed back to make sure no event is lost. As there is no event
 *    enqueued, KTIME_MAX is handed back to CPU2.
 *
 *    LVL 1            [GRP1:0]
 *                 --> migrator = TMIGR_NONE
 *                 --> active   =
 *                     next_expiry = KTIME_MAX
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = TMIGR_NONE
 *           active   =                active   =
 *           next_expiry = KTIME_MAX   next_expiry = KTIME_MAX
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle       --> idle        idle
 *
 * 4. CPU 0 has a new timer queued from idle and it expires at TIMER0. CPU0
 *    propagates that to GRP0:0:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = TMIGR_NONE
 *                     active   =
 *                     next_expiry = KTIME_MAX
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = TMIGR_NONE
 *           active   =                active   =
 *       --> next_expiry = TIMER0      next_expiry  = KTIME_MAX
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           idle        idle
 *
 * 5. GRP0:0 is not active, so the new timer has to be propagated to
 *    GRP1:0. Therefore the GRP1:0 state has to be read. When the stalled value
 *    (from step 2) is read, the timer is enqueued into GRP1:0, but nothing is
 *    handed back to CPU0, as it seems that there is still an active child in
 *    top level group.
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = TMIGR_NONE
 *                     active   =
 *                 --> next_expiry = TIMER0
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = TMIGR_NONE
 *           active   =                active   =
 *           next_expiry = TIMER0      next_expiry  = KTIME_MAX
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           idle        idle
 *
 * This is prevented by reading the state when holding the lock (when a new
 * timer has to be propagated from idle path)::
 *
 *   CPU2 (tmigr_inactive_up())          CPU0 (tmigr_new_timer_up())
 *   --------------------------          ---------------------------
 *   // step 3:
 *   cmpxchg(&GRP1:0->state);
 *   tmigr_update_events() {
 *       spin_lock(&GRP1:0->lock);
 *       // ... update events ...
 *       // hand back first expiry when GRP1:0 is idle
 *       spin_unlock(&GRP1:0->lock);
 *       // ^^^ release state modification
 *   }
 *                                       tmigr_update_events() {
 *                                           spin_lock(&GRP1:0->lock)
 *                                           // ^^^ acquire state modification
 *                                           group_state = atomic_read(&GRP1:0->state)
 *                                           // .... update events ...
 *                                           // hand back first expiry when GRP1:0 is idle
 *                                           spin_unlock(&GRP1:0->lock) <3>
 *                                           // ^^^ makes state visible for other
 *                                           // callers of tmigr_new_timer_up()
 *                                       }
 *
 * When CPU0 grabs the lock directly after cmpxchg, the first timer is reported
 * back to CPU0 and also later on to CPU2. So no timer is missed. A concurrent
 * update of the group state from active path is no problem, as the upcoming CPU
 * will take care of the group events.
 *
 * Required event and timerqueue update after a remote expiry:
 * -----------------------------------------------------------
 *
 * After expiring timers of a remote CPU, a walk through the hierarchy and
 * update of events and timerqueues is required. It is obviously needed if there
 * is a 'new' global timer but also if there is no new global timer but the
 * remote CPU is still idle.
 *
 * 1. CPU0 and CPU1 are idle and have both a global timer expiring at the same
 *    time. So both have an event enqueued in the timerqueue of GRP0:0. CPU3 is
 *    also idle and has no global timer pending. CPU2 is the only active CPU and
 *    thus also the migrator:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:1
 *                 --> timerqueue = evt-GRP0:0
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = CPU2
 *           active   =                active   = CPU2
 *           groupevt.ignore = false   groupevt.ignore = true
 *           groupevt.cpu = CPU0       groupevt.cpu =
 *           timerqueue = evt-CPU0,    timerqueue =
 *                        evt-CPU1
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           active      idle
 *
 * 2. CPU2 starts to expire remote timers. It starts with LVL0 group
 *    GRP0:1. There is no event queued in the timerqueue, so CPU2 continues with
 *    the parent of GRP0:1: GRP1:0. In GRP1:0 it dequeues the first event. It
 *    looks at tmigr_event::cpu struct member and expires the pending timer(s)
 *    of CPU0.
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:1
 *                 --> timerqueue =
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = CPU2
 *           active   =                active   = CPU2
 *           groupevt.ignore = false   groupevt.ignore = true
 *       --> groupevt.cpu = CPU0       groupevt.cpu =
 *           timerqueue = evt-CPU0,    timerqueue =
 *                        evt-CPU1
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           active      idle
 *
 * 3. Some work has to be done after expiring the timers of CPU0. If we stop
 *    here, then CPU1's pending global timer(s) will not expire in time and the
 *    timerqueue of GRP0:0 has still an event for CPU0 enqueued which has just
 *    been processed. So it is required to walk the hierarchy from CPU0's point
 *    of view and update it accordingly. CPU0's event will be removed from the
 *    timerqueue because it has no pending timer. If CPU0 would have a timer
 *    pending then it has to expire after CPU1's first timer because all timers
 *    from this period were just expired. Either way CPU1's event will be first
 *    in GRP0:0's timerqueue and therefore set in the CPU field of the group
 *    event which is then enqueued in GRP1:0's timerqueue as GRP0:0 is still not
 *    active:
 *
 *    LVL 1            [GRP1:0]
 *                     migrator = GRP0:1
 *                     active   = GRP0:1
 *                 --> timerqueue = evt-GRP0:0
 *                   /                \
 *    LVL 0  [GRP0:0]                  [GRP0:1]
 *           migrator = TMIGR_NONE     migrator = CPU2
 *           active   =                active   = CPU2
 *           groupevt.ignore = false   groupevt.ignore = true
 *       --> groupevt.cpu = CPU1       groupevt.cpu =
 *       --> timerqueue = evt-CPU1     timerqueue =
 *              /         \                /         \
 *    CPUs     0           1              2           3
 *             idle        idle           active      idle
 *
 * Now CPU2 (migrator) will continue step 2 at GRP1:0 and will expire the
 * timer(s) of CPU1.
 *
 * The hierarchy walk in step 3 can be skipped if the migrator notices that a
 * CPU of GRP0:0 is active again. The CPU will mark GRP0:0 active and take care
 * of the group as migrator and any needed updates within the hierarchy.
 */

static DEFINE_MUTEX(tmigr_mutex);
static struct list_head *tmigr_level_list __read_mostly;

static unsigned int tmigr_hierarchy_levels __read_mostly;
static unsigned int tmigr_crossnode_level __read_mostly;

static DEFINE_PER_CPU(struct tmigr_cpu, tmigr_cpu);

#define TMIGR_NONE        0xFF
#define BIT_CNT                8

static inline bool tmigr_is_not_available(struct tmigr_cpu *tmc)
{
        return !(tmc->tmgroup && tmc->online);
}

/*
 * Returns true, when @childmask corresponds to the group migrator or when the
 * group is not active - so no migrator is set.
 */
static bool tmigr_check_migrator(struct tmigr_group *group, u8 childmask)
{
        union tmigr_state s;

        s.state = atomic_read(&group->migr_state);

        if ((s.migrator == childmask) || (s.migrator == TMIGR_NONE))
                return true;

        return false;
}

static bool tmigr_check_migrator_and_lonely(struct tmigr_group *group, u8 childmask)
{
        bool lonely, migrator = false;
        unsigned long active;
        union tmigr_state s;

        s.state = atomic_read(&group->migr_state);

        if ((s.migrator == childmask) || (s.migrator == TMIGR_NONE))
                migrator = true;

        active = s.active;
        lonely = bitmap_weight(&active, BIT_CNT) <= 1;

        return (migrator && lonely);
}

static bool tmigr_check_lonely(struct tmigr_group *group)
{
        unsigned long active;
        union tmigr_state s;

        s.state = atomic_read(&group->migr_state);

        active = s.active;

        return bitmap_weight(&active, BIT_CNT) <= 1;
}

typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *);

static void __walk_groups(up_f up, void *data,
                          struct tmigr_cpu *tmc)
{
        struct tmigr_group *child = NULL, *group = tmc->tmgroup;

        do {
                WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels);

                if (up(group, child, data))
                        break;

                child = group;
                group = group->parent;
        } while (group);
}

static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc)
{
        lockdep_assert_held(&tmc->lock);

        __walk_groups(up, data, tmc);
}

/**
 * struct tmigr_walk - data required for walking the hierarchy
 * @nextexp:                Next CPU event expiry information which is handed into
 *                        the timer migration code by the timer code
 *                        (get_next_timer_interrupt())
 * @firstexp:                Contains the first event expiry information when last
 *                        active CPU of hierarchy is on the way to idle to make
 *                        sure CPU will be back in time.
 * @evt:                Pointer to tmigr_event which needs to be queued (of idle
 *                        child group)
 * @childmask:                childmask of child group
 * @remote:                Is set, when the new timer path is executed in
 *                        tmigr_handle_remote_cpu()
 */
struct tmigr_walk {
        u64                        nextexp;
        u64                        firstexp;
        struct tmigr_event        *evt;
        u8                        childmask;
        bool                        remote;
};

/**
 * struct tmigr_remote_data - data required for remote expiry hierarchy walk
 * @basej:                timer base in jiffies
 * @now:                timer base monotonic
 * @firstexp:                returns expiry of the first timer in the idle timer
 *                        migration hierarchy to make sure the timer is handled in
 *                        time; it is stored in the per CPU tmigr_cpu struct of
 *                        CPU which expires remote timers
 * @childmask:                childmask of child group
 * @check:                is set if there is the need to handle remote timers;
 *                        required in tmigr_requires_handle_remote() only
 * @tmc_active:                this flag indicates, whether the CPU which triggers
 *                        the hierarchy walk is !idle in the timer migration
 *                        hierarchy. When the CPU is idle and the whole hierarchy is
 *                        idle, only the first event of the top level has to be
 *                        considered.
 */
struct tmigr_remote_data {
        unsigned long        basej;
        u64                now;
        u64                firstexp;
        u8                childmask;
        bool                check;
        bool                tmc_active;
};

/*
 * Returns the next event of the timerqueue @group->events
 *
 * Removes timers with ignore flag and update next_expiry of the group. Values
 * of the group event are updated in tmigr_update_events() only.
 */
static struct tmigr_event *tmigr_next_groupevt(struct tmigr_group *group)
{
        struct timerqueue_node *node = NULL;
        struct tmigr_event *evt = NULL;

        lockdep_assert_held(&group->lock);

        WRITE_ONCE(group->next_expiry, KTIME_MAX);

        while ((node = timerqueue_getnext(&group->events))) {
                evt = container_of(node, struct tmigr_event, nextevt);

                if (!evt->ignore) {
                        WRITE_ONCE(group->next_expiry, evt->nextevt.expires);
                        return evt;
                }

                /*
                 * Remove next timers with ignore flag, because the group lock
                 * is held anyway
                 */
                if (!timerqueue_del(&group->events, node))
                        break;
        }

        return NULL;
}

/*
 * Return the next event (with the expiry equal or before @now)
 *
 * Event, which is returned, is also removed from the queue.
 */
static struct tmigr_event *tmigr_next_expired_groupevt(struct tmigr_group *group,
                                                       u64 now)
{
        struct tmigr_event *evt = tmigr_next_groupevt(group);

        if (!evt || now < evt->nextevt.expires)
                return NULL;

        /*
         * The event is ready to expire. Remove it and update next group event.
         */
        timerqueue_del(&group->events, &evt->nextevt);
        tmigr_next_groupevt(group);

        return evt;
}

static u64 tmigr_next_groupevt_expires(struct tmigr_group *group)
{
        struct tmigr_event *evt;

        evt = tmigr_next_groupevt(group);

        if (!evt)
                return KTIME_MAX;
        else
                return evt->nextevt.expires;
}

static bool tmigr_active_up(struct tmigr_group *group,
                            struct tmigr_group *child,
                            void *ptr)
{
        union tmigr_state curstate, newstate;
        struct tmigr_walk *data = ptr;
        bool walk_done;
        u8 childmask;

        childmask = data->childmask;
        /*
         * No memory barrier is required here in contrast to
         * tmigr_inactive_up(), as the group state change does not depend on the
         * child state.
         */
        curstate.state = atomic_read(&group->migr_state);

        do {
                newstate = curstate;
                walk_done = true;

                if (newstate.migrator == TMIGR_NONE) {
                        newstate.migrator = childmask;

                        /* Changes need to be propagated */
                        walk_done = false;
                }

                newstate.active |= childmask;
                newstate.seq++;

        } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));

        if ((walk_done == false) && group->parent)
                data->childmask = group->childmask;

        /*
         * The group is active (again). The group event might be still queued
         * into the parent group's timerqueue but can now be handled by the
         * migrator of this group. Therefore the ignore flag for the group event
         * is updated to reflect this.
         *
         * The update of the ignore flag in the active path is done lockless. In
         * worst case the migrator of the parent group observes the change too
         * late and expires remotely all events belonging to this group. The
         * lock is held while updating the ignore flag in idle path. So this
         * state change will not be lost.
         */
        group->groupevt.ignore = true;

        trace_tmigr_group_set_cpu_active(group, newstate, childmask);

        return walk_done;
}

static void __tmigr_cpu_activate(struct tmigr_cpu *tmc)
{
        struct tmigr_walk data;

        data.childmask = tmc->childmask;

        trace_tmigr_cpu_active(tmc);

        tmc->cpuevt.ignore = true;
        WRITE_ONCE(tmc->wakeup, KTIME_MAX);

        walk_groups(&tmigr_active_up, &data, tmc);
}

/**
 * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
 *
 * Call site timer_clear_idle() is called with interrupts disabled.
 */
void tmigr_cpu_activate(void)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);

        if (tmigr_is_not_available(tmc))
                return;

        if (WARN_ON_ONCE(!tmc->idle))
                return;

        raw_spin_lock(&tmc->lock);
        tmc->idle = false;
        __tmigr_cpu_activate(tmc);
        raw_spin_unlock(&tmc->lock);
}

/*
 * Returns true, if there is nothing to be propagated to the next level
 *
 * @data->firstexp is set to expiry of first gobal event of the (top level of
 * the) hierarchy, but only when hierarchy is completely idle.
 *
 * The child and group states need to be read under the lock, to prevent a race
 * against a concurrent tmigr_inactive_up() run when the last CPU goes idle. See
 * also section "Prevent race between new event and last CPU going inactive" in
 * the documentation at the top.
 *
 * This is the only place where the group event expiry value is set.
 */
static
bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
                         struct tmigr_walk *data)
{
        struct tmigr_event *evt, *first_childevt;
        union tmigr_state childstate, groupstate;
        bool remote = data->remote;
        bool walk_done = false;
        u64 nextexp;

        if (child) {
                raw_spin_lock(&child->lock);
                raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING);

                childstate.state = atomic_read(&child->migr_state);
                groupstate.state = atomic_read(&group->migr_state);

                if (childstate.active) {
                        walk_done = true;
                        goto unlock;
                }

                first_childevt = tmigr_next_groupevt(child);
                nextexp = child->next_expiry;
                evt = &child->groupevt;

                evt->ignore = (nextexp == KTIME_MAX) ? true : false;
        } else {
                nextexp = data->nextexp;

                first_childevt = evt = data->evt;

                /*
                 * Walking the hierarchy is required in any case when a
                 * remote expiry was done before. This ensures to not lose
                 * already queued events in non active groups (see section
                 * "Required event and timerqueue update after a remote
                 * expiry" in the documentation at the top).
                 *
                 * The two call sites which are executed without a remote expiry
                 * before, are not prevented from propagating changes through
                 * the hierarchy by the return:
                 *  - When entering this path by tmigr_new_timer(), @evt->ignore
                 *    is never set.
                 *  - tmigr_inactive_up() takes care of the propagation by
                 *    itself and ignores the return value. But an immediate
                 *    return is possible if there is a parent, sparing group
                 *    locking at this level, because the upper walking call to
                 *    the parent will take care about removing this event from
                 *    within the group and update next_expiry accordingly.
                 *
                 * However if there is no parent, ie: the hierarchy has only a
                 * single level so @group is the top level group, make sure the
                 * first event information of the group is updated properly and
                 * also handled properly, so skip this fast return path.
                 */
                if (evt->ignore && !remote && group->parent)
                        return true;

                raw_spin_lock(&group->lock);

                childstate.state = 0;
                groupstate.state = atomic_read(&group->migr_state);
        }

        /*
         * If the child event is already queued in the group, remove it from the
         * queue when the expiry time changed only or when it could be ignored.
         */
        if (timerqueue_node_queued(&evt->nextevt)) {
                if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
                        /* Make sure not to miss a new CPU event with the same expiry */
                        evt->cpu = first_childevt->cpu;
                        goto check_toplvl;
                }

                if (!timerqueue_del(&group->events, &evt->nextevt))
                        WRITE_ONCE(group->next_expiry, KTIME_MAX);
        }

        if (evt->ignore) {
                /*
                 * When the next child event could be ignored (nextexp is
                 * KTIME_MAX) and there was no remote timer handling before or
                 * the group is already active, there is no need to walk the
                 * hierarchy even if there is a parent group.
                 *
                 * The other way round: even if the event could be ignored, but
                 * if a remote timer handling was executed before and the group
                 * is not active, walking the hierarchy is required to not miss
                 * an enqueued timer in the non active group. The enqueued timer
                 * of the group needs to be propagated to a higher level to
                 * ensure it is handled.
                 */
                if (!remote || groupstate.active)
                        walk_done = true;
        } else {
                evt->nextevt.expires = nextexp;
                evt->cpu = first_childevt->cpu;

                if (timerqueue_add(&group->events, &evt->nextevt))
                        WRITE_ONCE(group->next_expiry, nextexp);
        }

check_toplvl:
        if (!group->parent && (groupstate.migrator == TMIGR_NONE)) {
                walk_done = true;

                /*
                 * Nothing to do when update was done during remote timer
                 * handling. First timer in top level group which needs to be
                 * handled when top level group is not active, is calculated
                 * directly in tmigr_handle_remote_up().
                 */
                if (remote)
                        goto unlock;

                /*
                 * The top level group is idle and it has to be ensured the
                 * global timers are handled in time. (This could be optimized
                 * by keeping track of the last global scheduled event and only
                 * arming it on the CPU if the new event is earlier. Not sure if
                 * its worth the complexity.)
                 */
                data->firstexp = tmigr_next_groupevt_expires(group);
        }

        trace_tmigr_update_events(child, group, childstate, groupstate,
                                  nextexp);

unlock:
        raw_spin_unlock(&group->lock);

        if (child)
                raw_spin_unlock(&child->lock);

        return walk_done;
}

static bool tmigr_new_timer_up(struct tmigr_group *group,
                               struct tmigr_group *child,
                               void *ptr)
{
        struct tmigr_walk *data = ptr;

        return tmigr_update_events(group, child, data);
}

/*
 * Returns the expiry of the next timer that needs to be handled. KTIME_MAX is
 * returned, if an active CPU will handle all the timer migration hierarchy
 * timers.
 */
static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp)
{
        struct tmigr_walk data = { .nextexp = nextexp,
                                   .firstexp = KTIME_MAX,
                                   .evt = &tmc->cpuevt };

        lockdep_assert_held(&tmc->lock);

        if (tmc->remote)
                return KTIME_MAX;

        trace_tmigr_cpu_new_timer(tmc);

        tmc->cpuevt.ignore = false;
        data.remote = false;

        walk_groups(&tmigr_new_timer_up, &data, tmc);

        /* If there is a new first global event, make sure it is handled */
        return data.firstexp;
}

static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now,
                                    unsigned long jif)
{
        struct timer_events tevt;
        struct tmigr_walk data;
        struct tmigr_cpu *tmc;

        tmc = per_cpu_ptr(&tmigr_cpu, cpu);

        raw_spin_lock_irq(&tmc->lock);

        /*
         * If the remote CPU is offline then the timers have been migrated to
         * another CPU.
         *
         * If tmigr_cpu::remote is set, at the moment another CPU already
         * expires the timers of the remote CPU.
         *
         * If tmigr_event::ignore is set, then the CPU returns from idle and
         * takes care of its timers.
         *
         * If the next event expires in the future, then the event has been
         * updated and there are no timers to expire right now. The CPU which
         * updated the event takes care when hierarchy is completely
         * idle. Otherwise the migrator does it as the event is enqueued.
         */
        if (!tmc->online || tmc->remote || tmc->cpuevt.ignore ||
            now < tmc->cpuevt.nextevt.expires) {
                raw_spin_unlock_irq(&tmc->lock);
                return;
        }

        trace_tmigr_handle_remote_cpu(tmc);

        tmc->remote = true;
        WRITE_ONCE(tmc->wakeup, KTIME_MAX);

        /* Drop the lock to allow the remote CPU to exit idle */
        raw_spin_unlock_irq(&tmc->lock);

        if (cpu != smp_processor_id())
                timer_expire_remote(cpu);

        /*
         * Lock ordering needs to be preserved - timer_base locks before tmigr
         * related locks (see section "Locking rules" in the documentation at
         * the top). During fetching the next timer interrupt, also tmc->lock
         * needs to be held. Otherwise there is a possible race window against
         * the CPU itself when it comes out of idle, updates the first timer in
         * the hierarchy and goes back to idle.
         *
         * timer base locks are dropped as fast as possible: After checking
         * whether the remote CPU went offline in the meantime and after
         * fetching the next remote timer interrupt. Dropping the locks as fast
         * as possible keeps the locking region small and prevents holding
         * several (unnecessary) locks during walking the hierarchy for updating
         * the timerqueue and group events.
         */
        local_irq_disable();
        timer_lock_remote_bases(cpu);
        raw_spin_lock(&tmc->lock);

        /*
         * When the CPU went offline in the meantime, no hierarchy walk has to
         * be done for updating the queued events, because the walk was
         * already done during marking the CPU offline in the hierarchy.
         *
         * When the CPU is no longer idle, the CPU takes care of the timers and
         * also of the timers in the hierarchy.
         *
         * (See also section "Required event and timerqueue update after a
         * remote expiry" in the documentation at the top)
         */
        if (!tmc->online || !tmc->idle) {
                timer_unlock_remote_bases(cpu);
                goto unlock;
        }

        /* next        event of CPU */
        fetch_next_timer_interrupt_remote(jif, now, &tevt, cpu);
        timer_unlock_remote_bases(cpu);

        data.nextexp = tevt.global;
        data.firstexp = KTIME_MAX;
        data.evt = &tmc->cpuevt;
        data.remote = true;

        /*
         * The update is done even when there is no 'new' global timer pending
         * on the remote CPU (see section "Required event and timerqueue update
         * after a remote expiry" in the documentation at the top)
         */
        walk_groups(&tmigr_new_timer_up, &data, tmc);

unlock:
        tmc->remote = false;
        raw_spin_unlock_irq(&tmc->lock);
}

static bool tmigr_handle_remote_up(struct tmigr_group *group,
                                   struct tmigr_group *child,
                                   void *ptr)
{
        struct tmigr_remote_data *data = ptr;
        struct tmigr_event *evt;
        unsigned long jif;
        u8 childmask;
        u64 now;

        jif = data->basej;
        now = data->now;

        childmask = data->childmask;

        trace_tmigr_handle_remote(group);
again:
        /*
         * Handle the group only if @childmask is the migrator or if the
         * group has no migrator. Otherwise the group is active and is
         * handled by its own migrator.
         */
        if (!tmigr_check_migrator(group, childmask))
                return true;

        raw_spin_lock_irq(&group->lock);

        evt = tmigr_next_expired_groupevt(group, now);

        if (evt) {
                unsigned int remote_cpu = evt->cpu;

                raw_spin_unlock_irq(&group->lock);

                tmigr_handle_remote_cpu(remote_cpu, now, jif);

                /* check if there is another event, that needs to be handled */
                goto again;
        }

        /*
         * Update of childmask for the next level and keep track of the expiry
         * of the first event that needs to be handled (group->next_expiry was
         * updated by tmigr_next_expired_groupevt(), next was set by
         * tmigr_handle_remote_cpu()).
         */
        data->childmask = group->childmask;
        data->firstexp = group->next_expiry;

        raw_spin_unlock_irq(&group->lock);

        return false;
}

/**
 * tmigr_handle_remote() - Handle global timers of remote idle CPUs
 *
 * Called from the timer soft interrupt with interrupts enabled.
 */
void tmigr_handle_remote(void)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        struct tmigr_remote_data data;

        if (tmigr_is_not_available(tmc))
                return;

        data.childmask = tmc->childmask;
        data.firstexp = KTIME_MAX;

        /*
         * NOTE: This is a doubled check because the migrator test will be done
         * in tmigr_handle_remote_up() anyway. Keep this check to speed up the
         * return when nothing has to be done.
         */
        if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask)) {
                /*
                 * If this CPU was an idle migrator, make sure to clear its wakeup
                 * value so it won't chase timers that have already expired elsewhere.
                 * This avoids endless requeue from tmigr_new_timer().
                 */
                if (READ_ONCE(tmc->wakeup) == KTIME_MAX)
                        return;
        }

        data.now = get_jiffies_update(&data.basej);

        /*
         * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to
         * KTIME_MAX. Even if tmc->lock is not held during the whole remote
         * handling, tmc->wakeup is fine to be stale as it is called in
         * interrupt context and tick_nohz_next_event() is executed in interrupt
         * exit path only after processing the last pending interrupt.
         */

        __walk_groups(&tmigr_handle_remote_up, &data, tmc);

        raw_spin_lock_irq(&tmc->lock);
        WRITE_ONCE(tmc->wakeup, data.firstexp);
        raw_spin_unlock_irq(&tmc->lock);
}

static bool tmigr_requires_handle_remote_up(struct tmigr_group *group,
                                            struct tmigr_group *child,
                                            void *ptr)
{
        struct tmigr_remote_data *data = ptr;
        u8 childmask;

        childmask = data->childmask;

        /*
         * Handle the group only if the child is the migrator or if the group
         * has no migrator. Otherwise the group is active and is handled by its
         * own migrator.
         */
        if (!tmigr_check_migrator(group, childmask))
                return true;

        /*
         * When there is a parent group and the CPU which triggered the
         * hierarchy walk is not active, proceed the walk to reach the top level
         * group before reading the next_expiry value.
         */
        if (group->parent && !data->tmc_active)
                goto out;

        /*
         * The lock is required on 32bit architectures to read the variable
         * consistently with a concurrent writer. On 64bit the lock is not
         * required because the read operation is not split and so it is always
         * consistent.
         */
        if (IS_ENABLED(CONFIG_64BIT)) {
                data->firstexp = READ_ONCE(group->next_expiry);
                if (data->now >= data->firstexp) {
                        data->check = true;
                        return true;
                }
        } else {
                raw_spin_lock(&group->lock);
                data->firstexp = group->next_expiry;
                if (data->now >= group->next_expiry) {
                        data->check = true;
                        raw_spin_unlock(&group->lock);
                        return true;
                }
                raw_spin_unlock(&group->lock);
        }

out:
        /* Update of childmask for the next level */
        data->childmask = group->childmask;
        return false;
}

/**
 * tmigr_requires_handle_remote() - Check the need of remote timer handling
 *
 * Must be called with interrupts disabled.
 */
bool tmigr_requires_handle_remote(void)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        struct tmigr_remote_data data;
        unsigned long jif;
        bool ret = false;

        if (tmigr_is_not_available(tmc))
                return ret;

        data.now = get_jiffies_update(&jif);
        data.childmask = tmc->childmask;
        data.firstexp = KTIME_MAX;
        data.tmc_active = !tmc->idle;
        data.check = false;

        /*
         * If the CPU is active, walk the hierarchy to check whether a remote
         * expiry is required.
         *
         * Check is done lockless as interrupts are disabled and @tmc->idle is
         * set only by the local CPU.
         */
        if (!tmc->idle) {
                __walk_groups(&tmigr_requires_handle_remote_up, &data, tmc);

                return data.check;
        }

        /*
         * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock
         * is required on 32bit architectures to read the variable consistently
         * with a concurrent writer. On 64bit the lock is not required because
         * the read operation is not split and so it is always consistent.
         */
        if (IS_ENABLED(CONFIG_64BIT)) {
                if (data.now >= READ_ONCE(tmc->wakeup))
                        return true;
        } else {
                raw_spin_lock(&tmc->lock);
                if (data.now >= tmc->wakeup)
                        ret = true;
                raw_spin_unlock(&tmc->lock);
        }

        return ret;
}

/**
 * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
 * @nextexp:        Next expiry of global timer (or KTIME_MAX if not)
 *
 * The CPU is already deactivated in the timer migration
 * hierarchy. tick_nohz_get_sleep_length() calls tick_nohz_next_event()
 * and thereby the timer idle path is executed once more. @tmc->wakeup
 * holds the first timer, when the timer migration hierarchy is
 * completely idle.
 *
 * Returns the first timer that needs to be handled by this CPU or KTIME_MAX if
 * nothing needs to be done.
 */
u64 tmigr_cpu_new_timer(u64 nextexp)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        u64 ret;

        if (tmigr_is_not_available(tmc))
                return nextexp;

        raw_spin_lock(&tmc->lock);

        ret = READ_ONCE(tmc->wakeup);
        if (nextexp != KTIME_MAX) {
                if (nextexp != tmc->cpuevt.nextevt.expires ||
                    tmc->cpuevt.ignore) {
                        ret = tmigr_new_timer(tmc, nextexp);
                }
        }
        /*
         * Make sure the reevaluation of timers in idle path will not miss an
         * event.
         */
        WRITE_ONCE(tmc->wakeup, ret);

        trace_tmigr_cpu_new_timer_idle(tmc, nextexp);
        raw_spin_unlock(&tmc->lock);
        return ret;
}

static bool tmigr_inactive_up(struct tmigr_group *group,
                              struct tmigr_group *child,
                              void *ptr)
{
        union tmigr_state curstate, newstate, childstate;
        struct tmigr_walk *data = ptr;
        bool walk_done;
        u8 childmask;

        childmask = data->childmask;
        childstate.state = 0;

        /*
         * The memory barrier is paired with the cmpxchg() in tmigr_active_up()
         * to make sure the updates of child and group states are ordered. The
         * ordering is mandatory, as the group state change depends on the child
         * state.
         */
        curstate.state = atomic_read_acquire(&group->migr_state);

        for (;;) {
                if (child)
                        childstate.state = atomic_read(&child->migr_state);

                newstate = curstate;
                walk_done = true;

                /* Reset active bit when the child is no longer active */
                if (!childstate.active)
                        newstate.active &= ~childmask;

                if (newstate.migrator == childmask) {
                        /*
                         * Find a new migrator for the group, because the child
                         * group is idle!
                         */
                        if (!childstate.active) {
                                unsigned long new_migr_bit, active = newstate.active;

                                new_migr_bit = find_first_bit(&active, BIT_CNT);

                                if (new_migr_bit != BIT_CNT) {
                                        newstate.migrator = BIT(new_migr_bit);
                                } else {
                                        newstate.migrator = TMIGR_NONE;

                                        /* Changes need to be propagated */
                                        walk_done = false;
                                }
                        }
                }

                newstate.seq++;

                WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active));

                if (atomic_try_cmpxchg(&group->migr_state, &curstate.state,
                                       newstate.state))
                        break;

                /*
                 * The memory barrier is paired with the cmpxchg() in
                 * tmigr_active_up() to make sure the updates of child and group
                 * states are ordered. It is required only when the above
                 * try_cmpxchg() fails.
                 */
                smp_mb__after_atomic();
        }

        data->remote = false;

        /* Event Handling */
        tmigr_update_events(group, child, data);

        if (group->parent && (walk_done == false))
                data->childmask = group->childmask;

        /*
         * data->firstexp was set by tmigr_update_events() and contains the
         * expiry of the first global event which needs to be handled. It
         * differs from KTIME_MAX if:
         * - group is the top level group and
         * - group is idle (which means CPU was the last active CPU in the
         *   hierarchy) and
         * - there is a pending event in the hierarchy
         */
        WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);

        trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);

        return walk_done;
}

static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp)
{
        struct tmigr_walk data = { .nextexp = nextexp,
                                   .firstexp = KTIME_MAX,
                                   .evt = &tmc->cpuevt,
                                   .childmask = tmc->childmask };

        /*
         * If nextexp is KTIME_MAX, the CPU event will be ignored because the
         * local timer expires before the global timer, no global timer is set
         * or CPU goes offline.
         */
        if (nextexp != KTIME_MAX)
                tmc->cpuevt.ignore = false;

        walk_groups(&tmigr_inactive_up, &data, tmc);
        return data.firstexp;
}

/**
 * tmigr_cpu_deactivate() - Put current CPU into inactive state
 * @nextexp:        The next global timer expiry of the current CPU
 *
 * Must be called with interrupts disabled.
 *
 * Return: the next event expiry of the current CPU or the next event expiry
 * from the hierarchy if this CPU is the top level migrator or the hierarchy is
 * completely idle.
 */
u64 tmigr_cpu_deactivate(u64 nextexp)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        u64 ret;

        if (tmigr_is_not_available(tmc))
                return nextexp;

        raw_spin_lock(&tmc->lock);

        ret = __tmigr_cpu_deactivate(tmc, nextexp);

        tmc->idle = true;

        /*
         * Make sure the reevaluation of timers in idle path will not miss an
         * event.
         */
        WRITE_ONCE(tmc->wakeup, ret);

        trace_tmigr_cpu_idle(tmc, nextexp);
        raw_spin_unlock(&tmc->lock);
        return ret;
}

/**
 * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
 *                         go idle
 * @nextevt:        The next global timer expiry of the current CPU
 *
 * Return:
 * * KTIME_MAX                - when it is probable that nothing has to be done (not
 *                          the only one in the level 0 group; and if it is the
 *                          only one in level 0 group, but there are more than a
 *                          single group active on the way to top level)
 * * nextevt                - when CPU is offline and has to handle timer on his own
 *                          or when on the way to top in every group only a single
 *                          child is active but @nextevt is before the lowest
 *                          next_expiry encountered while walking up to top level.
 * * next_expiry        - value of lowest expiry encountered while walking groups
 *                          if only a single child is active on each and @nextevt
 *                          is after this lowest expiry.
 */
u64 tmigr_quick_check(u64 nextevt)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        struct tmigr_group *group = tmc->tmgroup;

        if (tmigr_is_not_available(tmc))
                return nextevt;

        if (WARN_ON_ONCE(tmc->idle))
                return nextevt;

        if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask))
                return KTIME_MAX;

        do {
                if (!tmigr_check_lonely(group)) {
                        return KTIME_MAX;
                } else {
                        /*
                         * Since current CPU is active, events may not be sorted
                         * from bottom to the top because the CPU's event is ignored
                         * up to the top and its sibling's events not propagated upwards.
                         * Thus keep track of the lowest observed expiry.
                         */
                        nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry));
                        if (!group->parent)
                                return nextevt;
                }
                group = group->parent;
        } while (group);

        return KTIME_MAX;
}

static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
                             int node)
{
        union tmigr_state s;

        raw_spin_lock_init(&group->lock);

        group->level = lvl;
        group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE;

        group->num_children = 0;

        s.migrator = TMIGR_NONE;
        s.active = 0;
        s.seq = 0;
        atomic_set(&group->migr_state, s.state);

        timerqueue_init_head(&group->events);
        timerqueue_init(&group->groupevt.nextevt);
        group->groupevt.nextevt.expires = KTIME_MAX;
        WRITE_ONCE(group->next_expiry, KTIME_MAX);
        group->groupevt.ignore = true;
}

static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
                                           unsigned int lvl)
{
        struct tmigr_group *tmp, *group = NULL;

        lockdep_assert_held(&tmigr_mutex);

        /* Try to attach to an existing group first */
        list_for_each_entry(tmp, &tmigr_level_list[lvl], list) {
                /*
                 * If @lvl is below the cross NUMA node level, check whether
                 * this group belongs to the same NUMA node.
                 */
                if (lvl < tmigr_crossnode_level && tmp->numa_node != node)
                        continue;

                /* Capacity left? */
                if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP)
                        continue;

                /*
                 * TODO: A possible further improvement: Make sure that all CPU
                 * siblings end up in the same group of the lowest level of the
                 * hierarchy. Rely on the topology sibling mask would be a
                 * reasonable solution.
                 */

                group = tmp;
                break;
        }

        if (group)
                return group;

        /* Allocate and        set up a new group */
        group = kzalloc_node(sizeof(*group), GFP_KERNEL, node);
        if (!group)
                return ERR_PTR(-ENOMEM);

        tmigr_init_group(group, lvl, node);

        /* Setup successful. Add it to the hierarchy */
        list_add(&group->list, &tmigr_level_list[lvl]);
        trace_tmigr_group_set(group);
        return group;
}

static void tmigr_connect_child_parent(struct tmigr_group *child,
                                       struct tmigr_group *parent)
{
        union tmigr_state childstate;

        raw_spin_lock_irq(&child->lock);
        raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);

        child->parent = parent;
        child->childmask = BIT(parent->num_children++);

        raw_spin_unlock(&parent->lock);
        raw_spin_unlock_irq(&child->lock);

        trace_tmigr_connect_child_parent(child);

        /*
         * To prevent inconsistent states, active children need to be active in
         * the new parent as well. Inactive children are already marked inactive
         * in the parent group:
         *
         * * When new groups were created by tmigr_setup_groups() starting from
         *   the lowest level (and not higher then one level below the current
         *   top level), then they are not active. They will be set active when
         *   the new online CPU comes active.
         *
         * * But if a new group above the current top level is required, it is
         *   mandatory to propagate the active state of the already existing
         *   child to the new parent. So tmigr_connect_child_parent() is
         *   executed with the formerly top level group (child) and the newly
         *   created group (parent).
         */
        childstate.state = atomic_read(&child->migr_state);
        if (childstate.migrator != TMIGR_NONE) {
                struct tmigr_walk data;

                data.childmask = child->childmask;

                /*
                 * There is only one new level per time. When connecting the
                 * child and the parent and set the child active when the parent
                 * is inactive, the parent needs to be the uppermost
                 * level. Otherwise there went something wrong!
                 */
                WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
        }
}

static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
{
        struct tmigr_group *group, *child, **stack;
        int top = 0, err = 0, i = 0;
        struct list_head *lvllist;

        stack = kcalloc(tmigr_hierarchy_levels, sizeof(*stack), GFP_KERNEL);
        if (!stack)
                return -ENOMEM;

        do {
                group = tmigr_get_group(cpu, node, i);
                if (IS_ERR(group)) {
                        err = PTR_ERR(group);
                        break;
                }

                top = i;
                stack[i++] = group;

                /*
                 * When booting only less CPUs of a system than CPUs are
                 * available, not all calculated hierarchy levels are required.
                 *
                 * The loop is aborted as soon as the highest level, which might
                 * be different from tmigr_hierarchy_levels, contains only a
                 * single group.
                 */
                if (group->parent || i == tmigr_hierarchy_levels ||
                    (list_empty(&tmigr_level_list[i]) &&
                     list_is_singular(&tmigr_level_list[i - 1])))
                        break;

        } while (i < tmigr_hierarchy_levels);

        do {
                group = stack[--i];

                if (err < 0) {
                        list_del(&group->list);
                        kfree(group);
                        continue;
                }

                WARN_ON_ONCE(i != group->level);

                /*
                 * Update tmc -> group / child -> group connection
                 */
                if (i == 0) {
                        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);

                        raw_spin_lock_irq(&group->lock);

                        tmc->tmgroup = group;
                        tmc->childmask = BIT(group->num_children++);

                        raw_spin_unlock_irq(&group->lock);

                        trace_tmigr_connect_cpu_parent(tmc);

                        /* There are no children that need to be connected */
                        continue;
                } else {
                        child = stack[i - 1];
                        tmigr_connect_child_parent(child, group);
                }

                /* check if uppermost level was newly created */
                if (top != i)
                        continue;

                WARN_ON_ONCE(top == 0);

                lvllist = &tmigr_level_list[top];
                if (group->num_children == 1 && list_is_singular(lvllist)) {
                        lvllist = &tmigr_level_list[top - 1];
                        list_for_each_entry(child, lvllist, list) {
                                if (child->parent)
                                        continue;

                                tmigr_connect_child_parent(child, group);
                        }
                }
        } while (i > 0);

        kfree(stack);

        return err;
}

static int tmigr_add_cpu(unsigned int cpu)
{
        int node = cpu_to_node(cpu);
        int ret;

        mutex_lock(&tmigr_mutex);
        ret = tmigr_setup_groups(cpu, node);
        mutex_unlock(&tmigr_mutex);

        return ret;
}

static int tmigr_cpu_online(unsigned int cpu)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        int ret;

        /* First online attempt? Initialize CPU data */
        if (!tmc->tmgroup) {
                raw_spin_lock_init(&tmc->lock);

                ret = tmigr_add_cpu(cpu);
                if (ret < 0)
                        return ret;

                if (tmc->childmask == 0)
                        return -EINVAL;

                timerqueue_init(&tmc->cpuevt.nextevt);
                tmc->cpuevt.nextevt.expires = KTIME_MAX;
                tmc->cpuevt.ignore = true;
                tmc->cpuevt.cpu = cpu;

                tmc->remote = false;
                WRITE_ONCE(tmc->wakeup, KTIME_MAX);
        }
        raw_spin_lock_irq(&tmc->lock);
        trace_tmigr_cpu_online(tmc);
        tmc->idle = timer_base_is_idle();
        if (!tmc->idle)
                __tmigr_cpu_activate(tmc);
        tmc->online = true;
        raw_spin_unlock_irq(&tmc->lock);
        return 0;
}

/*
 * tmigr_trigger_active() - trigger a CPU to become active again
 *
 * This function is executed on a CPU which is part of cpu_online_mask, when the
 * last active CPU in the hierarchy is offlining. With this, it is ensured that
 * the other CPU is active and takes over the migrator duty.
 */
static long tmigr_trigger_active(void *unused)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);

        WARN_ON_ONCE(!tmc->online || tmc->idle);

        return 0;
}

static int tmigr_cpu_offline(unsigned int cpu)
{
        struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
        int migrator;
        u64 firstexp;

        raw_spin_lock_irq(&tmc->lock);
        tmc->online = false;
        WRITE_ONCE(tmc->wakeup, KTIME_MAX);

        /*
         * CPU has to handle the local events on his own, when on the way to
         * offline; Therefore nextevt value is set to KTIME_MAX
         */
        firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
        trace_tmigr_cpu_offline(tmc);
        raw_spin_unlock_irq(&tmc->lock);

        if (firstexp != KTIME_MAX) {
                migrator = cpumask_any_but(cpu_online_mask, cpu);
                work_on_cpu(migrator, tmigr_trigger_active, NULL);
        }

        return 0;
}

static int __init tmigr_init(void)
{
        unsigned int cpulvl, nodelvl, cpus_per_node, i;
        unsigned int nnodes = num_possible_nodes();
        unsigned int ncpus = num_possible_cpus();
        int ret = -ENOMEM;

        BUILD_BUG_ON_NOT_POWER_OF_2(TMIGR_CHILDREN_PER_GROUP);

        /* Nothing to do if running on UP */
        if (ncpus == 1)
                return 0;

        /*
         * Calculate the required hierarchy levels. Unfortunately there is no
         * reliable information available, unless all possible CPUs have been
         * brought up and all NUMA nodes are populated.
         *
         * Estimate the number of levels with the number of possible nodes and
         * the number of possible CPUs. Assume CPUs are spread evenly across
         * nodes. We cannot rely on cpumask_of_node() because it only works for
         * online CPUs.
         */
        cpus_per_node = DIV_ROUND_UP(ncpus, nnodes);

        /* Calc the hierarchy levels required to hold the CPUs of a node */
        cpulvl = DIV_ROUND_UP(order_base_2(cpus_per_node),
                              ilog2(TMIGR_CHILDREN_PER_GROUP));

        /* Calculate the extra levels to connect all nodes */
        nodelvl = DIV_ROUND_UP(order_base_2(nnodes),
                               ilog2(TMIGR_CHILDREN_PER_GROUP));

        tmigr_hierarchy_levels = cpulvl + nodelvl;

        /*
         * If a NUMA node spawns more than one CPU level group then the next
         * level(s) of the hierarchy contains groups which handle all CPU groups
         * of the same NUMA node. The level above goes across NUMA nodes. Store
         * this information for the setup code to decide in which level node
         * matching is no longer required.
         */
        tmigr_crossnode_level = cpulvl;

        tmigr_level_list = kcalloc(tmigr_hierarchy_levels, sizeof(struct list_head), GFP_KERNEL);
        if (!tmigr_level_list)
                goto err;

        for (i = 0; i < tmigr_hierarchy_levels; i++)
                INIT_LIST_HEAD(&tmigr_level_list[i]);

        pr_info("Timer migration: %d hierarchy levels; %d children per group;"
                " %d crossnode level\n",
                tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP,
                tmigr_crossnode_level);

        ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online",
                                tmigr_cpu_online, tmigr_cpu_offline);
        if (ret)
                goto err;

        return 0;

err:
        pr_err("Timer migration setup failed\n");
        return ret;
}
late_initcall(tmigr_init);






























































































































































































































   12 
   12 







   12 
   11 






   12 






   12 
   12 


   12 











   11 

























   11 















   12 































































    2 





    2 

    2 
    2 













    2 
    2 
    2 
    2 






    2 
    2 


























































































































































   12 


   12 


   12 


































    2 

    2 
    2 
    2 





























   12 



    2 

   12 
    6 

   12 

    9 
    2 
    2 


    2 





    9 




    9 

   12 
    5 

    5 

    1 



    5 
    5 





    5 








   12 
   12 

    1 
    6 
























































    2 









    2 
    2 



    2 
    2 


















































































































































































































































   11 

   11 



   12 
    7 

   11 



   12 
   12 

   12 




    1 

    1 













    1 
    2 







    2 
    2 


   12 
































































































































































































































   12 


   12 

















   12 

   12 








































































































































































































































































































   12 


   12 



   12 









   12 
   11 

















































   12 







































































































































































































































































































































































































































































































































































































































   12 







   12 




   12 






   12 






   12 







   12 




   12 

   12 









   59 



   61 
   61 


   48 










   58 















   14 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/percpu.c - percpu memory allocator
 *
 * Copyright (C) 2009                SUSE Linux Products GmbH
 * Copyright (C) 2009                Tejun Heo <tj@kernel.org>
 *
 * Copyright (C) 2017                Facebook Inc.
 * Copyright (C) 2017                Dennis Zhou <dennis@kernel.org>
 *
 * The percpu allocator handles both static and dynamic areas.  Percpu
 * areas are allocated in chunks which are divided into units.  There is
 * a 1-to-1 mapping for units to possible cpus.  These units are grouped
 * based on NUMA properties of the machine.
 *
 *  c0                           c1                         c2
 *  -------------------          -------------------        ------------
 * | u0 | u1 | u2 | u3 |        | u0 | u1 | u2 | u3 |      | u0 | u1 | u
 *  -------------------  ......  -------------------  ....  ------------
 *
 * Allocation is done by offsets into a unit's address space.  Ie., an
 * area of 512 bytes at 6k in c1 occupies 512 bytes at 6k in c1:u0,
 * c1:u1, c1:u2, etc.  On NUMA machines, the mapping may be non-linear
 * and even sparse.  Access is handled by configuring percpu base
 * registers according to the cpu to unit mappings and offsetting the
 * base address using pcpu_unit_size.
 *
 * There is special consideration for the first chunk which must handle
 * the static percpu variables in the kernel image as allocation services
 * are not online yet.  In short, the first chunk is structured like so:
 *
 *                  <Static | [Reserved] | Dynamic>
 *
 * The static data is copied from the original section managed by the
 * linker.  The reserved section, if non-zero, primarily manages static
 * percpu variables from kernel modules.  Finally, the dynamic section
 * takes care of normal allocations.
 *
 * The allocator organizes chunks into lists according to free size and
 * memcg-awareness.  To make a percpu allocation memcg-aware the __GFP_ACCOUNT
 * flag should be passed.  All memcg-aware allocations are sharing one set
 * of chunks and all unaccounted allocations and allocations performed
 * by processes belonging to the root memory cgroup are using the second set.
 *
 * The allocator tries to allocate from the fullest chunk first. Each chunk
 * is managed by a bitmap with metadata blocks.  The allocation map is updated
 * on every allocation and free to reflect the current state while the boundary
 * map is only updated on allocation.  Each metadata block contains
 * information to help mitigate the need to iterate over large portions
 * of the bitmap.  The reverse mapping from page to chunk is stored in
 * the page's index.  Lastly, units are lazily backed and grow in unison.
 *
 * There is a unique conversion that goes on here between bytes and bits.
 * Each bit represents a fragment of size PCPU_MIN_ALLOC_SIZE.  The chunk
 * tracks the number of pages it is responsible for in nr_pages.  Helper
 * functions are used to convert from between the bytes, bits, and blocks.
 * All hints are managed in bits unless explicitly stated.
 *
 * To use this allocator, arch code should do the following:
 *
 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
 *   regular address to percpu pointer and back if they need to be
 *   different from the default
 *
 * - use pcpu_setup_first_chunk() during percpu area initialization to
 *   setup the first chunk containing the kernel static percpu area
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/pfn.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/kmemleak.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/memcontrol.h>

#include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/io.h>

#define CREATE_TRACE_POINTS
#include <trace/events/percpu.h>

#include "percpu-internal.h"

/*
 * The slots are sorted by the size of the biggest continuous free area.
 * 1-31 bytes share the same slot.
 */
#define PCPU_SLOT_BASE_SHIFT                5
/* chunks in slots below this are subject to being sidelined on failed alloc */
#define PCPU_SLOT_FAIL_THRESHOLD        3

#define PCPU_EMPTY_POP_PAGES_LOW        2
#define PCPU_EMPTY_POP_PAGES_HIGH        4

#ifdef CONFIG_SMP
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr
#define __addr_to_pcpu_ptr(addr)                                        \
        (void __percpu *)((unsigned long)(addr) -                        \
                          (unsigned long)pcpu_base_addr        +                \
                          (unsigned long)__per_cpu_start)
#endif
#ifndef __pcpu_ptr_to_addr
#define __pcpu_ptr_to_addr(ptr)                                                \
        (void __force *)((unsigned long)(ptr) +                                \
                         (unsigned long)pcpu_base_addr -                \
                         (unsigned long)__per_cpu_start)
#endif
#else        /* CONFIG_SMP */
/* on UP, it's always identity mapped */
#define __addr_to_pcpu_ptr(addr)        (void __percpu *)(addr)
#define __pcpu_ptr_to_addr(ptr)                (void __force *)(ptr)
#endif        /* CONFIG_SMP */

static int pcpu_unit_pages __ro_after_init;
static int pcpu_unit_size __ro_after_init;
static int pcpu_nr_units __ro_after_init;
static int pcpu_atom_size __ro_after_init;
int pcpu_nr_slots __ro_after_init;
static int pcpu_free_slot __ro_after_init;
int pcpu_sidelined_slot __ro_after_init;
int pcpu_to_depopulate_slot __ro_after_init;
static size_t pcpu_chunk_struct_size __ro_after_init;

/* cpus with the lowest and highest unit addresses */
static unsigned int pcpu_low_unit_cpu __ro_after_init;
static unsigned int pcpu_high_unit_cpu __ro_after_init;

/* the address of the first chunk which starts with the kernel static area */
void *pcpu_base_addr __ro_after_init;

static const int *pcpu_unit_map __ro_after_init;                /* cpu -> unit */
const unsigned long *pcpu_unit_offsets __ro_after_init;        /* cpu -> unit offset */

/* group information, used for vm allocation */
static int pcpu_nr_groups __ro_after_init;
static const unsigned long *pcpu_group_offsets __ro_after_init;
static const size_t *pcpu_group_sizes __ro_after_init;

/*
 * The first chunk which always exists.  Note that unlike other
 * chunks, this one can be allocated and mapped in several different
 * ways and thus often doesn't live in the vmalloc area.
 */
struct pcpu_chunk *pcpu_first_chunk __ro_after_init;

/*
 * Optional reserved chunk.  This chunk reserves part of the first
 * chunk and serves it for reserved allocations.  When the reserved
 * region doesn't exist, the following variable is NULL.
 */
struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;

DEFINE_SPINLOCK(pcpu_lock);        /* all internal data structures */
static DEFINE_MUTEX(pcpu_alloc_mutex);        /* chunk create/destroy, [de]pop, map ext */

struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */

/*
 * The number of empty populated pages, protected by pcpu_lock.
 * The reserved chunk doesn't contribute to the count.
 */
int pcpu_nr_empty_pop_pages;

/*
 * The number of populated pages in use by the allocator, protected by
 * pcpu_lock.  This number is kept per a unit per chunk (i.e. when a page gets
 * allocated/deallocated, it is allocated/deallocated in all units of a chunk
 * and increments/decrements this count by 1).
 */
static unsigned long pcpu_nr_populated;

/*
 * Balance work is used to populate or destroy chunks asynchronously.  We
 * try to keep the number of populated free pages between
 * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
 * empty chunk.
 */
static void pcpu_balance_workfn(struct work_struct *work);
static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
static bool pcpu_async_enabled __read_mostly;
static bool pcpu_atomic_alloc_failed;

static void pcpu_schedule_balance_work(void)
{
        if (pcpu_async_enabled)
                schedule_work(&pcpu_balance_work);
}

/**
 * pcpu_addr_in_chunk - check if the address is served from this chunk
 * @chunk: chunk of interest
 * @addr: percpu address
 *
 * RETURNS:
 * True if the address is served from this chunk.
 */
static bool pcpu_addr_in_chunk(struct pcpu_chunk *chunk, void *addr)
{
        void *start_addr, *end_addr;

        if (!chunk)
                return false;

        start_addr = chunk->base_addr + chunk->start_offset;
        end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE -
                   chunk->end_offset;

        return addr >= start_addr && addr < end_addr;
}

static int __pcpu_size_to_slot(int size)
{
        int highbit = fls(size);        /* size is in bytes */
        return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
}

static int pcpu_size_to_slot(int size)
{
        if (size == pcpu_unit_size)
                return pcpu_free_slot;
        return __pcpu_size_to_slot(size);
}

static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
{
        const struct pcpu_block_md *chunk_md = &chunk->chunk_md;

        if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE ||
            chunk_md->contig_hint == 0)
                return 0;

        return pcpu_size_to_slot(chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE);
}

/* set the pointer to a chunk in a page struct */
static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
{
        page->index = (unsigned long)pcpu;
}

/* obtain pointer to a chunk from a page struct */
static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
{
        return (struct pcpu_chunk *)page->index;
}

static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
{
        return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
}

static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
{
        return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
}

static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
                                     unsigned int cpu, int page_idx)
{
        return (unsigned long)chunk->base_addr +
               pcpu_unit_page_offset(cpu, page_idx);
}

/*
 * The following are helper functions to help access bitmaps and convert
 * between bitmap offsets to address offsets.
 */
static unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index)
{
        return chunk->alloc_map +
               (index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG);
}

static unsigned long pcpu_off_to_block_index(int off)
{
        return off / PCPU_BITMAP_BLOCK_BITS;
}

static unsigned long pcpu_off_to_block_off(int off)
{
        return off & (PCPU_BITMAP_BLOCK_BITS - 1);
}

static unsigned long pcpu_block_off_to_off(int index, int off)
{
        return index * PCPU_BITMAP_BLOCK_BITS + off;
}

/**
 * pcpu_check_block_hint - check against the contig hint
 * @block: block of interest
 * @bits: size of allocation
 * @align: alignment of area (max PAGE_SIZE)
 *
 * Check to see if the allocation can fit in the block's contig hint.
 * Note, a chunk uses the same hints as a block so this can also check against
 * the chunk's contig hint.
 */
static bool pcpu_check_block_hint(struct pcpu_block_md *block, int bits,
                                  size_t align)
{
        int bit_off = ALIGN(block->contig_hint_start, align) -
                block->contig_hint_start;

        return bit_off + bits <= block->contig_hint;
}

/*
 * pcpu_next_hint - determine which hint to use
 * @block: block of interest
 * @alloc_bits: size of allocation
 *
 * This determines if we should scan based on the scan_hint or first_free.
 * In general, we want to scan from first_free to fulfill allocations by
 * first fit.  However, if we know a scan_hint at position scan_hint_start
 * cannot fulfill an allocation, we can begin scanning from there knowing
 * the contig_hint will be our fallback.
 */
static int pcpu_next_hint(struct pcpu_block_md *block, int alloc_bits)
{
        /*
         * The three conditions below determine if we can skip past the
         * scan_hint.  First, does the scan hint exist.  Second, is the
         * contig_hint after the scan_hint (possibly not true iff
         * contig_hint == scan_hint).  Third, is the allocation request
         * larger than the scan_hint.
         */
        if (block->scan_hint &&
            block->contig_hint_start > block->scan_hint_start &&
            alloc_bits > block->scan_hint)
                return block->scan_hint_start + block->scan_hint;

        return block->first_free;
}

/**
 * pcpu_next_md_free_region - finds the next hint free area
 * @chunk: chunk of interest
 * @bit_off: chunk offset
 * @bits: size of free area
 *
 * Helper function for pcpu_for_each_md_free_region.  It checks
 * block->contig_hint and performs aggregation across blocks to find the
 * next hint.  It modifies bit_off and bits in-place to be consumed in the
 * loop.
 */
static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
                                     int *bits)
{
        int i = pcpu_off_to_block_index(*bit_off);
        int block_off = pcpu_off_to_block_off(*bit_off);
        struct pcpu_block_md *block;

        *bits = 0;
        for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
             block++, i++) {
                /* handles contig area across blocks */
                if (*bits) {
                        *bits += block->left_free;
                        if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
                                continue;
                        return;
                }

                /*
                 * This checks three things.  First is there a contig_hint to
                 * check.  Second, have we checked this hint before by
                 * comparing the block_off.  Third, is this the same as the
                 * right contig hint.  In the last case, it spills over into
                 * the next block and should be handled by the contig area
                 * across blocks code.
                 */
                *bits = block->contig_hint;
                if (*bits && block->contig_hint_start >= block_off &&
                    *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
                        *bit_off = pcpu_block_off_to_off(i,
                                        block->contig_hint_start);
                        return;
                }
                /* reset to satisfy the second predicate above */
                block_off = 0;

                *bits = block->right_free;
                *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
        }
}

/**
 * pcpu_next_fit_region - finds fit areas for a given allocation request
 * @chunk: chunk of interest
 * @alloc_bits: size of allocation
 * @align: alignment of area (max PAGE_SIZE)
 * @bit_off: chunk offset
 * @bits: size of free area
 *
 * Finds the next free region that is viable for use with a given size and
 * alignment.  This only returns if there is a valid area to be used for this
 * allocation.  block->first_free is returned if the allocation request fits
 * within the block to see if the request can be fulfilled prior to the contig
 * hint.
 */
static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
                                 int align, int *bit_off, int *bits)
{
        int i = pcpu_off_to_block_index(*bit_off);
        int block_off = pcpu_off_to_block_off(*bit_off);
        struct pcpu_block_md *block;

        *bits = 0;
        for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
             block++, i++) {
                /* handles contig area across blocks */
                if (*bits) {
                        *bits += block->left_free;
                        if (*bits >= alloc_bits)
                                return;
                        if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
                                continue;
                }

                /* check block->contig_hint */
                *bits = ALIGN(block->contig_hint_start, align) -
                        block->contig_hint_start;
                /*
                 * This uses the block offset to determine if this has been
                 * checked in the prior iteration.
                 */
                if (block->contig_hint &&
                    block->contig_hint_start >= block_off &&
                    block->contig_hint >= *bits + alloc_bits) {
                        int start = pcpu_next_hint(block, alloc_bits);

                        *bits += alloc_bits + block->contig_hint_start -
                                 start;
                        *bit_off = pcpu_block_off_to_off(i, start);
                        return;
                }
                /* reset to satisfy the second predicate above */
                block_off = 0;

                *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
                                 align);
                *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off;
                *bit_off = pcpu_block_off_to_off(i, *bit_off);
                if (*bits >= alloc_bits)
                        return;
        }

        /* no valid offsets were found - fail condition */
        *bit_off = pcpu_chunk_map_bits(chunk);
}

/*
 * Metadata free area iterators.  These perform aggregation of free areas
 * based on the metadata blocks and return the offset @bit_off and size in
 * bits of the free area @bits.  pcpu_for_each_fit_region only returns when
 * a fit is found for the allocation request.
 */
#define pcpu_for_each_md_free_region(chunk, bit_off, bits)                \
        for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits));        \
             (bit_off) < pcpu_chunk_map_bits((chunk));                        \
             (bit_off) += (bits) + 1,                                        \
             pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))

#define pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits)     \
        for (pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
                                  &(bits));                                      \
             (bit_off) < pcpu_chunk_map_bits((chunk));                              \
             (bit_off) += (bits),                                              \
             pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
                                  &(bits)))

/**
 * pcpu_mem_zalloc - allocate memory
 * @size: bytes to allocate
 * @gfp: allocation flags
 *
 * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
 * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
 * This is to facilitate passing through whitelisted flags.  The
 * returned memory is always zeroed.
 *
 * RETURNS:
 * Pointer to the allocated area on success, NULL on failure.
 */
static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
{
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;

        if (size <= PAGE_SIZE)
                return kzalloc(size, gfp);
        else
                return __vmalloc(size, gfp | __GFP_ZERO);
}

/**
 * pcpu_mem_free - free memory
 * @ptr: memory to free
 *
 * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
 */
static void pcpu_mem_free(void *ptr)
{
        kvfree(ptr);
}

static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot,
                              bool move_front)
{
        if (chunk != pcpu_reserved_chunk) {
                if (move_front)
                        list_move(&chunk->list, &pcpu_chunk_lists[slot]);
                else
                        list_move_tail(&chunk->list, &pcpu_chunk_lists[slot]);
        }
}

static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot)
{
        __pcpu_chunk_move(chunk, slot, true);
}

/**
 * pcpu_chunk_relocate - put chunk in the appropriate chunk slot
 * @chunk: chunk of interest
 * @oslot: the previous slot it was on
 *
 * This function is called after an allocation or free changed @chunk.
 * New slot according to the changed state is determined and @chunk is
 * moved to the slot.  Note that the reserved chunk is never put on
 * chunk slots.
 *
 * CONTEXT:
 * pcpu_lock.
 */
static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
{
        int nslot = pcpu_chunk_slot(chunk);

        /* leave isolated chunks in-place */
        if (chunk->isolated)
                return;

        if (oslot != nslot)
                __pcpu_chunk_move(chunk, nslot, oslot < nslot);
}

static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
{
        lockdep_assert_held(&pcpu_lock);

        if (!chunk->isolated) {
                chunk->isolated = true;
                pcpu_nr_empty_pop_pages -= chunk->nr_empty_pop_pages;
        }
        list_move(&chunk->list, &pcpu_chunk_lists[pcpu_to_depopulate_slot]);
}

static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
{
        lockdep_assert_held(&pcpu_lock);

        if (chunk->isolated) {
                chunk->isolated = false;
                pcpu_nr_empty_pop_pages += chunk->nr_empty_pop_pages;
                pcpu_chunk_relocate(chunk, -1);
        }
}

/*
 * pcpu_update_empty_pages - update empty page counters
 * @chunk: chunk of interest
 * @nr: nr of empty pages
 *
 * This is used to keep track of the empty pages now based on the premise
 * a md_block covers a page.  The hint update functions recognize if a block
 * is made full or broken to calculate deltas for keeping track of free pages.
 */
static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
{
        chunk->nr_empty_pop_pages += nr;
        if (chunk != pcpu_reserved_chunk && !chunk->isolated)
                pcpu_nr_empty_pop_pages += nr;
}

/*
 * pcpu_region_overlap - determines if two regions overlap
 * @a: start of first region, inclusive
 * @b: end of first region, exclusive
 * @x: start of second region, inclusive
 * @y: end of second region, exclusive
 *
 * This is used to determine if the hint region [a, b) overlaps with the
 * allocated region [x, y).
 */
static inline bool pcpu_region_overlap(int a, int b, int x, int y)
{
        return (a < y) && (x < b);
}

/**
 * pcpu_block_update - updates a block given a free area
 * @block: block of interest
 * @start: start offset in block
 * @end: end offset in block
 *
 * Updates a block given a known free area.  The region [start, end) is
 * expected to be the entirety of the free area within a block.  Chooses
 * the best starting offset if the contig hints are equal.
 */
static void pcpu_block_update(struct pcpu_block_md *block, int start, int end)
{
        int contig = end - start;

        block->first_free = min(block->first_free, start);
        if (start == 0)
                block->left_free = contig;

        if (end == block->nr_bits)
                block->right_free = contig;

        if (contig > block->contig_hint) {
                /* promote the old contig_hint to be the new scan_hint */
                if (start > block->contig_hint_start) {
                        if (block->contig_hint > block->scan_hint) {
                                block->scan_hint_start =
                                        block->contig_hint_start;
                                block->scan_hint = block->contig_hint;
                        } else if (start < block->scan_hint_start) {
                                /*
                                 * The old contig_hint == scan_hint.  But, the
                                 * new contig is larger so hold the invariant
                                 * scan_hint_start < contig_hint_start.
                                 */
                                block->scan_hint = 0;
                        }
                } else {
                        block->scan_hint = 0;
                }
                block->contig_hint_start = start;
                block->contig_hint = contig;
        } else if (contig == block->contig_hint) {
                if (block->contig_hint_start &&
                    (!start ||
                     __ffs(start) > __ffs(block->contig_hint_start))) {
                        /* start has a better alignment so use it */
                        block->contig_hint_start = start;
                        if (start < block->scan_hint_start &&
                            block->contig_hint > block->scan_hint)
                                block->scan_hint = 0;
                } else if (start > block->scan_hint_start ||
                           block->contig_hint > block->scan_hint) {
                        /*
                         * Knowing contig == contig_hint, update the scan_hint
                         * if it is farther than or larger than the current
                         * scan_hint.
                         */
                        block->scan_hint_start = start;
                        block->scan_hint = contig;
                }
        } else {
                /*
                 * The region is smaller than the contig_hint.  So only update
                 * the scan_hint if it is larger than or equal and farther than
                 * the current scan_hint.
                 */
                if ((start < block->contig_hint_start &&
                     (contig > block->scan_hint ||
                      (contig == block->scan_hint &&
                       start > block->scan_hint_start)))) {
                        block->scan_hint_start = start;
                        block->scan_hint = contig;
                }
        }
}

/*
 * pcpu_block_update_scan - update a block given a free area from a scan
 * @chunk: chunk of interest
 * @bit_off: chunk offset
 * @bits: size of free area
 *
 * Finding the final allocation spot first goes through pcpu_find_block_fit()
 * to find a block that can hold the allocation and then pcpu_alloc_area()
 * where a scan is used.  When allocations require specific alignments,
 * we can inadvertently create holes which will not be seen in the alloc
 * or free paths.
 *
 * This takes a given free area hole and updates a block as it may change the
 * scan_hint.  We need to scan backwards to ensure we don't miss free bits
 * from alignment.
 */
static void pcpu_block_update_scan(struct pcpu_chunk *chunk, int bit_off,
                                   int bits)
{
        int s_off = pcpu_off_to_block_off(bit_off);
        int e_off = s_off + bits;
        int s_index, l_bit;
        struct pcpu_block_md *block;

        if (e_off > PCPU_BITMAP_BLOCK_BITS)
                return;

        s_index = pcpu_off_to_block_index(bit_off);
        block = chunk->md_blocks + s_index;

        /* scan backwards in case of alignment skipping free bits */
        l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index), s_off);
        s_off = (s_off == l_bit) ? 0 : l_bit + 1;

        pcpu_block_update(block, s_off, e_off);
}

/**
 * pcpu_chunk_refresh_hint - updates metadata about a chunk
 * @chunk: chunk of interest
 * @full_scan: if we should scan from the beginning
 *
 * Iterates over the metadata blocks to find the largest contig area.
 * A full scan can be avoided on the allocation path as this is triggered
 * if we broke the contig_hint.  In doing so, the scan_hint will be before
 * the contig_hint or after if the scan_hint == contig_hint.  This cannot
 * be prevented on freeing as we want to find the largest area possibly
 * spanning blocks.
 */
static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan)
{
        struct pcpu_block_md *chunk_md = &chunk->chunk_md;
        int bit_off, bits;

        /* promote scan_hint to contig_hint */
        if (!full_scan && chunk_md->scan_hint) {
                bit_off = chunk_md->scan_hint_start + chunk_md->scan_hint;
                chunk_md->contig_hint_start = chunk_md->scan_hint_start;
                chunk_md->contig_hint = chunk_md->scan_hint;
                chunk_md->scan_hint = 0;
        } else {
                bit_off = chunk_md->first_free;
                chunk_md->contig_hint = 0;
        }

        bits = 0;
        pcpu_for_each_md_free_region(chunk, bit_off, bits)
                pcpu_block_update(chunk_md, bit_off, bit_off + bits);
}

/**
 * pcpu_block_refresh_hint
 * @chunk: chunk of interest
 * @index: index of the metadata block
 *
 * Scans over the block beginning at first_free and updates the block
 * metadata accordingly.
 */
static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
{
        struct pcpu_block_md *block = chunk->md_blocks + index;
        unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
        unsigned int start, end;        /* region start, region end */

        /* promote scan_hint to contig_hint */
        if (block->scan_hint) {
                start = block->scan_hint_start + block->scan_hint;
                block->contig_hint_start = block->scan_hint_start;
                block->contig_hint = block->scan_hint;
                block->scan_hint = 0;
        } else {
                start = block->first_free;
                block->contig_hint = 0;
        }

        block->right_free = 0;

        /* iterate over free areas and update the contig hints */
        for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS)
                pcpu_block_update(block, start, end);
}

/**
 * pcpu_block_update_hint_alloc - update hint on allocation path
 * @chunk: chunk of interest
 * @bit_off: chunk offset
 * @bits: size of request
 *
 * Updates metadata for the allocation path.  The metadata only has to be
 * refreshed by a full scan iff the chunk's contig hint is broken.  Block level
 * scans are required if the block's contig hint is broken.
 */
static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
                                         int bits)
{
        struct pcpu_block_md *chunk_md = &chunk->chunk_md;
        int nr_empty_pages = 0;
        struct pcpu_block_md *s_block, *e_block, *block;
        int s_index, e_index;        /* block indexes of the freed allocation */
        int s_off, e_off;        /* block offsets of the freed allocation */

        /*
         * Calculate per block offsets.
         * The calculation uses an inclusive range, but the resulting offsets
         * are [start, end).  e_index always points to the last block in the
         * range.
         */
        s_index = pcpu_off_to_block_index(bit_off);
        e_index = pcpu_off_to_block_index(bit_off + bits - 1);
        s_off = pcpu_off_to_block_off(bit_off);
        e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;

        s_block = chunk->md_blocks + s_index;
        e_block = chunk->md_blocks + e_index;

        /*
         * Update s_block.
         */
        if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS)
                nr_empty_pages++;

        /*
         * block->first_free must be updated if the allocation takes its place.
         * If the allocation breaks the contig_hint, a scan is required to
         * restore this hint.
         */
        if (s_off == s_block->first_free)
                s_block->first_free = find_next_zero_bit(
                                        pcpu_index_alloc_map(chunk, s_index),
                                        PCPU_BITMAP_BLOCK_BITS,
                                        s_off + bits);

        if (pcpu_region_overlap(s_block->scan_hint_start,
                                s_block->scan_hint_start + s_block->scan_hint,
                                s_off,
                                s_off + bits))
                s_block->scan_hint = 0;

        if (pcpu_region_overlap(s_block->contig_hint_start,
                                s_block->contig_hint_start +
                                s_block->contig_hint,
                                s_off,
                                s_off + bits)) {
                /* block contig hint is broken - scan to fix it */
                if (!s_off)
                        s_block->left_free = 0;
                pcpu_block_refresh_hint(chunk, s_index);
        } else {
                /* update left and right contig manually */
                s_block->left_free = min(s_block->left_free, s_off);
                if (s_index == e_index)
                        s_block->right_free = min_t(int, s_block->right_free,
                                        PCPU_BITMAP_BLOCK_BITS - e_off);
                else
                        s_block->right_free = 0;
        }

        /*
         * Update e_block.
         */
        if (s_index != e_index) {
                if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS)
                        nr_empty_pages++;

                /*
                 * When the allocation is across blocks, the end is along
                 * the left part of the e_block.
                 */
                e_block->first_free = find_next_zero_bit(
                                pcpu_index_alloc_map(chunk, e_index),
                                PCPU_BITMAP_BLOCK_BITS, e_off);

                if (e_off == PCPU_BITMAP_BLOCK_BITS) {
                        /* reset the block */
                        e_block++;
                } else {
                        if (e_off > e_block->scan_hint_start)
                                e_block->scan_hint = 0;

                        e_block->left_free = 0;
                        if (e_off > e_block->contig_hint_start) {
                                /* contig hint is broken - scan to fix it */
                                pcpu_block_refresh_hint(chunk, e_index);
                        } else {
                                e_block->right_free =
                                        min_t(int, e_block->right_free,
                                              PCPU_BITMAP_BLOCK_BITS - e_off);
                        }
                }

                /* update in-between md_blocks */
                nr_empty_pages += (e_index - s_index - 1);
                for (block = s_block + 1; block < e_block; block++) {
                        block->scan_hint = 0;
                        block->contig_hint = 0;
                        block->left_free = 0;
                        block->right_free = 0;
                }
        }

        /*
         * If the allocation is not atomic, some blocks may not be
         * populated with pages, while we account it here.  The number
         * of pages will be added back with pcpu_chunk_populated()
         * when populating pages.
         */
        if (nr_empty_pages)
                pcpu_update_empty_pages(chunk, -nr_empty_pages);

        if (pcpu_region_overlap(chunk_md->scan_hint_start,
                                chunk_md->scan_hint_start +
                                chunk_md->scan_hint,
                                bit_off,
                                bit_off + bits))
                chunk_md->scan_hint = 0;

        /*
         * The only time a full chunk scan is required is if the chunk
         * contig hint is broken.  Otherwise, it means a smaller space
         * was used and therefore the chunk contig hint is still correct.
         */
        if (pcpu_region_overlap(chunk_md->contig_hint_start,
                                chunk_md->contig_hint_start +
                                chunk_md->contig_hint,
                                bit_off,
                                bit_off + bits))
                pcpu_chunk_refresh_hint(chunk, false);
}

/**
 * pcpu_block_update_hint_free - updates the block hints on the free path
 * @chunk: chunk of interest
 * @bit_off: chunk offset
 * @bits: size of request
 *
 * Updates metadata for the allocation path.  This avoids a blind block
 * refresh by making use of the block contig hints.  If this fails, it scans
 * forward and backward to determine the extent of the free area.  This is
 * capped at the boundary of blocks.
 *
 * A chunk update is triggered if a page becomes free, a block becomes free,
 * or the free spans across blocks.  This tradeoff is to minimize iterating
 * over the block metadata to update chunk_md->contig_hint.
 * chunk_md->contig_hint may be off by up to a page, but it will never be more
 * than the available space.  If the contig hint is contained in one block, it
 * will be accurate.
 */
static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
                                        int bits)
{
        int nr_empty_pages = 0;
        struct pcpu_block_md *s_block, *e_block, *block;
        int s_index, e_index;        /* block indexes of the freed allocation */
        int s_off, e_off;        /* block offsets of the freed allocation */
        int start, end;                /* start and end of the whole free area */

        /*
         * Calculate per block offsets.
         * The calculation uses an inclusive range, but the resulting offsets
         * are [start, end).  e_index always points to the last block in the
         * range.
         */
        s_index = pcpu_off_to_block_index(bit_off);
        e_index = pcpu_off_to_block_index(bit_off + bits - 1);
        s_off = pcpu_off_to_block_off(bit_off);
        e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;

        s_block = chunk->md_blocks + s_index;
        e_block = chunk->md_blocks + e_index;

        /*
         * Check if the freed area aligns with the block->contig_hint.
         * If it does, then the scan to find the beginning/end of the
         * larger free area can be avoided.
         *
         * start and end refer to beginning and end of the free area
         * within each their respective blocks.  This is not necessarily
         * the entire free area as it may span blocks past the beginning
         * or end of the block.
         */
        start = s_off;
        if (s_off == s_block->contig_hint + s_block->contig_hint_start) {
                start = s_block->contig_hint_start;
        } else {
                /*
                 * Scan backwards to find the extent of the free area.
                 * find_last_bit returns the starting bit, so if the start bit
                 * is returned, that means there was no last bit and the
                 * remainder of the chunk is free.
                 */
                int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index),
                                          start);
                start = (start == l_bit) ? 0 : l_bit + 1;
        }

        end = e_off;
        if (e_off == e_block->contig_hint_start)
                end = e_block->contig_hint_start + e_block->contig_hint;
        else
                end = find_next_bit(pcpu_index_alloc_map(chunk, e_index),
                                    PCPU_BITMAP_BLOCK_BITS, end);

        /* update s_block */
        e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS;
        if (!start && e_off == PCPU_BITMAP_BLOCK_BITS)
                nr_empty_pages++;
        pcpu_block_update(s_block, start, e_off);

        /* freeing in the same block */
        if (s_index != e_index) {
                /* update e_block */
                if (end == PCPU_BITMAP_BLOCK_BITS)
                        nr_empty_pages++;
                pcpu_block_update(e_block, 0, end);

                /* reset md_blocks in the middle */
                nr_empty_pages += (e_index - s_index - 1);
                for (block = s_block + 1; block < e_block; block++) {
                        block->first_free = 0;
                        block->scan_hint = 0;
                        block->contig_hint_start = 0;
                        block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
                        block->left_free = PCPU_BITMAP_BLOCK_BITS;
                        block->right_free = PCPU_BITMAP_BLOCK_BITS;
                }
        }

        if (nr_empty_pages)
                pcpu_update_empty_pages(chunk, nr_empty_pages);

        /*
         * Refresh chunk metadata when the free makes a block free or spans
         * across blocks.  The contig_hint may be off by up to a page, but if
         * the contig_hint is contained in a block, it will be accurate with
         * the else condition below.
         */
        if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index)
                pcpu_chunk_refresh_hint(chunk, true);
        else
                pcpu_block_update(&chunk->chunk_md,
                                  pcpu_block_off_to_off(s_index, start),
                                  end);
}

/**
 * pcpu_is_populated - determines if the region is populated
 * @chunk: chunk of interest
 * @bit_off: chunk offset
 * @bits: size of area
 * @next_off: return value for the next offset to start searching
 *
 * For atomic allocations, check if the backing pages are populated.
 *
 * RETURNS:
 * Bool if the backing pages are populated.
 * next_index is to skip over unpopulated blocks in pcpu_find_block_fit.
 */
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
                              int *next_off)
{
        unsigned int start, end;

        start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
        end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);

        start = find_next_zero_bit(chunk->populated, end, start);
        if (start >= end)
                return true;

        end = find_next_bit(chunk->populated, end, start + 1);

        *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
        return false;
}

/**
 * pcpu_find_block_fit - finds the block index to start searching
 * @chunk: chunk of interest
 * @alloc_bits: size of request in allocation units
 * @align: alignment of area (max PAGE_SIZE bytes)
 * @pop_only: use populated regions only
 *
 * Given a chunk and an allocation spec, find the offset to begin searching
 * for a free region.  This iterates over the bitmap metadata blocks to
 * find an offset that will be guaranteed to fit the requirements.  It is
 * not quite first fit as if the allocation does not fit in the contig hint
 * of a block or chunk, it is skipped.  This errs on the side of caution
 * to prevent excess iteration.  Poor alignment can cause the allocator to
 * skip over blocks and chunks that have valid free areas.
 *
 * RETURNS:
 * The offset in the bitmap to begin searching.
 * -1 if no offset is found.
 */
static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
                               size_t align, bool pop_only)
{
        struct pcpu_block_md *chunk_md = &chunk->chunk_md;
        int bit_off, bits, next_off;

        /*
         * This is an optimization to prevent scanning by assuming if the
         * allocation cannot fit in the global hint, there is memory pressure
         * and creating a new chunk would happen soon.
         */
        if (!pcpu_check_block_hint(chunk_md, alloc_bits, align))
                return -1;

        bit_off = pcpu_next_hint(chunk_md, alloc_bits);
        bits = 0;
        pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) {
                if (!pop_only || pcpu_is_populated(chunk, bit_off, bits,
                                                   &next_off))
                        break;

                bit_off = next_off;
                bits = 0;
        }

        if (bit_off == pcpu_chunk_map_bits(chunk))
                return -1;

        return bit_off;
}

/*
 * pcpu_find_zero_area - modified from bitmap_find_next_zero_area_off()
 * @map: the address to base the search on
 * @size: the bitmap size in bits
 * @start: the bitnumber to start searching at
 * @nr: the number of zeroed bits we're looking for
 * @align_mask: alignment mask for zero area
 * @largest_off: offset of the largest area skipped
 * @largest_bits: size of the largest area skipped
 *
 * The @align_mask should be one less than a power of 2.
 *
 * This is a modified version of bitmap_find_next_zero_area_off() to remember
 * the largest area that was skipped.  This is imperfect, but in general is
 * good enough.  The largest remembered region is the largest failed region
 * seen.  This does not include anything we possibly skipped due to alignment.
 * pcpu_block_update_scan() does scan backwards to try and recover what was
 * lost to alignment.  While this can cause scanning to miss earlier possible
 * free areas, smaller allocations will eventually fill those holes.
 */
static unsigned long pcpu_find_zero_area(unsigned long *map,
                                         unsigned long size,
                                         unsigned long start,
                                         unsigned long nr,
                                         unsigned long align_mask,
                                         unsigned long *largest_off,
                                         unsigned long *largest_bits)
{
        unsigned long index, end, i, area_off, area_bits;
again:
        index = find_next_zero_bit(map, size, start);

        /* Align allocation */
        index = __ALIGN_MASK(index, align_mask);
        area_off = index;

        end = index + nr;
        if (end > size)
                return end;
        i = find_next_bit(map, end, index);
        if (i < end) {
                area_bits = i - area_off;
                /* remember largest unused area with best alignment */
                if (area_bits > *largest_bits ||
                    (area_bits == *largest_bits && *largest_off &&
                     (!area_off || __ffs(area_off) > __ffs(*largest_off)))) {
                        *largest_off = area_off;
                        *largest_bits = area_bits;
                }

                start = i + 1;
                goto again;
        }
        return index;
}

/**
 * pcpu_alloc_area - allocates an area from a pcpu_chunk
 * @chunk: chunk of interest
 * @alloc_bits: size of request in allocation units
 * @align: alignment of area (max PAGE_SIZE)
 * @start: bit_off to start searching
 *
 * This function takes in a @start offset to begin searching to fit an
 * allocation of @alloc_bits with alignment @align.  It needs to scan
 * the allocation map because if it fits within the block's contig hint,
 * @start will be block->first_free. This is an attempt to fill the
 * allocation prior to breaking the contig hint.  The allocation and
 * boundary maps are updated accordingly if it confirms a valid
 * free area.
 *
 * RETURNS:
 * Allocated addr offset in @chunk on success.
 * -1 if no matching area is found.
 */
static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
                           size_t align, int start)
{
        struct pcpu_block_md *chunk_md = &chunk->chunk_md;
        size_t align_mask = (align) ? (align - 1) : 0;
        unsigned long area_off = 0, area_bits = 0;
        int bit_off, end, oslot;

        lockdep_assert_held(&pcpu_lock);

        oslot = pcpu_chunk_slot(chunk);

        /*
         * Search to find a fit.
         */
        end = min_t(int, start + alloc_bits + PCPU_BITMAP_BLOCK_BITS,
                    pcpu_chunk_map_bits(chunk));
        bit_off = pcpu_find_zero_area(chunk->alloc_map, end, start, alloc_bits,
                                      align_mask, &area_off, &area_bits);
        if (bit_off >= end)
                return -1;

        if (area_bits)
                pcpu_block_update_scan(chunk, area_off, area_bits);

        /* update alloc map */
        bitmap_set(chunk->alloc_map, bit_off, alloc_bits);

        /* update boundary map */
        set_bit(bit_off, chunk->bound_map);
        bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
        set_bit(bit_off + alloc_bits, chunk->bound_map);

        chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;

        /* update first free bit */
        if (bit_off == chunk_md->first_free)
                chunk_md->first_free = find_next_zero_bit(
                                        chunk->alloc_map,
                                        pcpu_chunk_map_bits(chunk),
                                        bit_off + alloc_bits);

        pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);

        pcpu_chunk_relocate(chunk, oslot);

        return bit_off * PCPU_MIN_ALLOC_SIZE;
}

/**
 * pcpu_free_area - frees the corresponding offset
 * @chunk: chunk of interest
 * @off: addr offset into chunk
 *
 * This function determines the size of an allocation to free using
 * the boundary bitmap and clears the allocation map.
 *
 * RETURNS:
 * Number of freed bytes.
 */
static int pcpu_free_area(struct pcpu_chunk *chunk, int off)
{
        struct pcpu_block_md *chunk_md = &chunk->chunk_md;
        int bit_off, bits, end, oslot, freed;

        lockdep_assert_held(&pcpu_lock);
        pcpu_stats_area_dealloc(chunk);

        oslot = pcpu_chunk_slot(chunk);

        bit_off = off / PCPU_MIN_ALLOC_SIZE;

        /* find end index */
        end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
                            bit_off + 1);
        bits = end - bit_off;
        bitmap_clear(chunk->alloc_map, bit_off, bits);

        freed = bits * PCPU_MIN_ALLOC_SIZE;

        /* update metadata */
        chunk->free_bytes += freed;

        /* update first free bit */
        chunk_md->first_free = min(chunk_md->first_free, bit_off);

        pcpu_block_update_hint_free(chunk, bit_off, bits);

        pcpu_chunk_relocate(chunk, oslot);

        return freed;
}

static void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits)
{
        block->scan_hint = 0;
        block->contig_hint = nr_bits;
        block->left_free = nr_bits;
        block->right_free = nr_bits;
        block->first_free = 0;
        block->nr_bits = nr_bits;
}

static void pcpu_init_md_blocks(struct pcpu_chunk *chunk)
{
        struct pcpu_block_md *md_block;

        /* init the chunk's block */
        pcpu_init_md_block(&chunk->chunk_md, pcpu_chunk_map_bits(chunk));

        for (md_block = chunk->md_blocks;
             md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk);
             md_block++)
                pcpu_init_md_block(md_block, PCPU_BITMAP_BLOCK_BITS);
}

/**
 * pcpu_alloc_first_chunk - creates chunks that serve the first chunk
 * @tmp_addr: the start of the region served
 * @map_size: size of the region served
 *
 * This is responsible for creating the chunks that serve the first chunk.  The
 * base_addr is page aligned down of @tmp_addr while the region end is page
 * aligned up.  Offsets are kept track of to determine the region served. All
 * this is done to appease the bitmap allocator in avoiding partial blocks.
 *
 * RETURNS:
 * Chunk serving the region at @tmp_addr of @map_size.
 */
static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
                                                         int map_size)
{
        struct pcpu_chunk *chunk;
        unsigned long aligned_addr;
        int start_offset, offset_bits, region_size, region_bits;
        size_t alloc_size;

        /* region calculations */
        aligned_addr = tmp_addr & PAGE_MASK;

        start_offset = tmp_addr - aligned_addr;
        region_size = ALIGN(start_offset + map_size, PAGE_SIZE);

        /* allocate chunk */
        alloc_size = struct_size(chunk, populated,
                                 BITS_TO_LONGS(region_size >> PAGE_SHIFT));
        chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!chunk)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        INIT_LIST_HEAD(&chunk->list);

        chunk->base_addr = (void *)aligned_addr;
        chunk->start_offset = start_offset;
        chunk->end_offset = region_size - chunk->start_offset - map_size;

        chunk->nr_pages = region_size >> PAGE_SHIFT;
        region_bits = pcpu_chunk_map_bits(chunk);

        alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]);
        chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!chunk->alloc_map)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        alloc_size =
                BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]);
        chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!chunk->bound_map)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]);
        chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!chunk->md_blocks)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

#ifdef CONFIG_MEMCG_KMEM
        /* first chunk is free to use */
        chunk->obj_cgroups = NULL;
#endif
        pcpu_init_md_blocks(chunk);

        /* manage populated page bitmap */
        chunk->immutable = true;
        bitmap_fill(chunk->populated, chunk->nr_pages);
        chunk->nr_populated = chunk->nr_pages;
        chunk->nr_empty_pop_pages = chunk->nr_pages;

        chunk->free_bytes = map_size;

        if (chunk->start_offset) {
                /* hide the beginning of the bitmap */
                offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
                bitmap_set(chunk->alloc_map, 0, offset_bits);
                set_bit(0, chunk->bound_map);
                set_bit(offset_bits, chunk->bound_map);

                chunk->chunk_md.first_free = offset_bits;

                pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
        }

        if (chunk->end_offset) {
                /* hide the end of the bitmap */
                offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE;
                bitmap_set(chunk->alloc_map,
                           pcpu_chunk_map_bits(chunk) - offset_bits,
                           offset_bits);
                set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE,
                        chunk->bound_map);
                set_bit(region_bits, chunk->bound_map);

                pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk)
                                             - offset_bits, offset_bits);
        }

        return chunk;
}

static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
{
        struct pcpu_chunk *chunk;
        int region_bits;

        chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
        if (!chunk)
                return NULL;

        INIT_LIST_HEAD(&chunk->list);
        chunk->nr_pages = pcpu_unit_pages;
        region_bits = pcpu_chunk_map_bits(chunk);

        chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
                                           sizeof(chunk->alloc_map[0]), gfp);
        if (!chunk->alloc_map)
                goto alloc_map_fail;

        chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
                                           sizeof(chunk->bound_map[0]), gfp);
        if (!chunk->bound_map)
                goto bound_map_fail;

        chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
                                           sizeof(chunk->md_blocks[0]), gfp);
        if (!chunk->md_blocks)
                goto md_blocks_fail;

#ifdef CONFIG_MEMCG_KMEM
        if (!mem_cgroup_kmem_disabled()) {
                chunk->obj_cgroups =
                        pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
                                        sizeof(struct obj_cgroup *), gfp);
                if (!chunk->obj_cgroups)
                        goto objcg_fail;
        }
#endif

        pcpu_init_md_blocks(chunk);

        /* init metadata */
        chunk->free_bytes = chunk->nr_pages * PAGE_SIZE;

        return chunk;

#ifdef CONFIG_MEMCG_KMEM
objcg_fail:
        pcpu_mem_free(chunk->md_blocks);
#endif
md_blocks_fail:
        pcpu_mem_free(chunk->bound_map);
bound_map_fail:
        pcpu_mem_free(chunk->alloc_map);
alloc_map_fail:
        pcpu_mem_free(chunk);

        return NULL;
}

static void pcpu_free_chunk(struct pcpu_chunk *chunk)
{
        if (!chunk)
                return;
#ifdef CONFIG_MEMCG_KMEM
        pcpu_mem_free(chunk->obj_cgroups);
#endif
        pcpu_mem_free(chunk->md_blocks);
        pcpu_mem_free(chunk->bound_map);
        pcpu_mem_free(chunk->alloc_map);
        pcpu_mem_free(chunk);
}

/**
 * pcpu_chunk_populated - post-population bookkeeping
 * @chunk: pcpu_chunk which got populated
 * @page_start: the start page
 * @page_end: the end page
 *
 * Pages in [@page_start,@page_end) have been populated to @chunk.  Update
 * the bookkeeping information accordingly.  Must be called after each
 * successful population.
 */
static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
                                 int page_end)
{
        int nr = page_end - page_start;

        lockdep_assert_held(&pcpu_lock);

        bitmap_set(chunk->populated, page_start, nr);
        chunk->nr_populated += nr;
        pcpu_nr_populated += nr;

        pcpu_update_empty_pages(chunk, nr);
}

/**
 * pcpu_chunk_depopulated - post-depopulation bookkeeping
 * @chunk: pcpu_chunk which got depopulated
 * @page_start: the start page
 * @page_end: the end page
 *
 * Pages in [@page_start,@page_end) have been depopulated from @chunk.
 * Update the bookkeeping information accordingly.  Must be called after
 * each successful depopulation.
 */
static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
                                   int page_start, int page_end)
{
        int nr = page_end - page_start;

        lockdep_assert_held(&pcpu_lock);

        bitmap_clear(chunk->populated, page_start, nr);
        chunk->nr_populated -= nr;
        pcpu_nr_populated -= nr;

        pcpu_update_empty_pages(chunk, -nr);
}

/*
 * Chunk management implementation.
 *
 * To allow different implementations, chunk alloc/free and
 * [de]population are implemented in a separate file which is pulled
 * into this file and compiled together.  The following functions
 * should be implemented.
 *
 * pcpu_populate_chunk                - populate the specified range of a chunk
 * pcpu_depopulate_chunk        - depopulate the specified range of a chunk
 * pcpu_post_unmap_tlb_flush        - flush tlb for the specified range of a chunk
 * pcpu_create_chunk                - create a new chunk
 * pcpu_destroy_chunk                - destroy a chunk, always preceded by full depop
 * pcpu_addr_to_page                - translate address to physical address
 * pcpu_verify_alloc_info        - check alloc_info is acceptable during init
 */
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
                               int page_start, int page_end, gfp_t gfp);
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
                                  int page_start, int page_end);
static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
                                      int page_start, int page_end);
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
static struct page *pcpu_addr_to_page(void *addr);
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);

#ifdef CONFIG_NEED_PER_CPU_KM
#include "percpu-km.c"
#else
#include "percpu-vm.c"
#endif

/**
 * pcpu_chunk_addr_search - determine chunk containing specified address
 * @addr: address for which the chunk needs to be determined.
 *
 * This is an internal function that handles all but static allocations.
 * Static percpu address values should never be passed into the allocator.
 *
 * RETURNS:
 * The address of the found chunk.
 */
static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
{
        /* is it in the dynamic region (first chunk)? */
        if (pcpu_addr_in_chunk(pcpu_first_chunk, addr))
                return pcpu_first_chunk;

        /* is it in the reserved region? */
        if (pcpu_addr_in_chunk(pcpu_reserved_chunk, addr))
                return pcpu_reserved_chunk;

        /*
         * The address is relative to unit0 which might be unused and
         * thus unmapped.  Offset the address to the unit space of the
         * current processor before looking it up in the vmalloc
         * space.  Note that any possible cpu id can be used here, so
         * there's no need to worry about preemption or cpu hotplug.
         */
        addr += pcpu_unit_offsets[raw_smp_processor_id()];
        return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
}

#ifdef CONFIG_MEMCG_KMEM
static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
                                      struct obj_cgroup **objcgp)
{
        struct obj_cgroup *objcg;

        if (!memcg_kmem_online() || !(gfp & __GFP_ACCOUNT))
                return true;

        objcg = current_obj_cgroup();
        if (!objcg)
                return true;

        if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
                return false;

        *objcgp = objcg;
        return true;
}

static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
                                       struct pcpu_chunk *chunk, int off,
                                       size_t size)
{
        if (!objcg)
                return;

        if (likely(chunk && chunk->obj_cgroups)) {
                obj_cgroup_get(objcg);
                chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;

                rcu_read_lock();
                mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
                                pcpu_obj_full_size(size));
                rcu_read_unlock();
        } else {
                obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
        }
}

static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
{
        struct obj_cgroup *objcg;

        if (unlikely(!chunk->obj_cgroups))
                return;

        objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
        if (!objcg)
                return;
        chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;

        obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));

        rcu_read_lock();
        mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
                        -pcpu_obj_full_size(size));
        rcu_read_unlock();

        obj_cgroup_put(objcg);
}

#else /* CONFIG_MEMCG_KMEM */
static bool
pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
{
        return true;
}

static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
                                       struct pcpu_chunk *chunk, int off,
                                       size_t size)
{
}

static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
{
}
#endif /* CONFIG_MEMCG_KMEM */

/**
 * pcpu_alloc - the percpu allocator
 * @size: size of area to allocate in bytes
 * @align: alignment of area (max PAGE_SIZE)
 * @reserved: allocate from the reserved chunk if available
 * @gfp: allocation flags
 *
 * Allocate percpu area of @size bytes aligned at @align.  If @gfp doesn't
 * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN
 * then no warning will be triggered on invalid or failed allocation
 * requests.
 *
 * RETURNS:
 * Percpu pointer to the allocated area on success, NULL on failure.
 */
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                 gfp_t gfp)
{
        gfp_t pcpu_gfp;
        bool is_atomic;
        bool do_warn;
        struct obj_cgroup *objcg = NULL;
        static int warn_limit = 10;
        struct pcpu_chunk *chunk, *next;
        const char *err;
        int slot, off, cpu, ret;
        unsigned long flags;
        void __percpu *ptr;
        size_t bits, bit_align;

        gfp = current_gfp_context(gfp);
        /* whitelisted flags that can be passed to the backing allocators */
        pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
        is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
        do_warn = !(gfp & __GFP_NOWARN);

        /*
         * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
         * therefore alignment must be a minimum of that many bytes.
         * An allocation may have internal fragmentation from rounding up
         * of up to PCPU_MIN_ALLOC_SIZE - 1 bytes.
         */
        if (unlikely(align < PCPU_MIN_ALLOC_SIZE))
                align = PCPU_MIN_ALLOC_SIZE;

        size = ALIGN(size, PCPU_MIN_ALLOC_SIZE);
        bits = size >> PCPU_MIN_ALLOC_SHIFT;
        bit_align = align >> PCPU_MIN_ALLOC_SHIFT;

        if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
                     !is_power_of_2(align))) {
                WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
                     size, align);
                return NULL;
        }

        if (unlikely(!pcpu_memcg_pre_alloc_hook(size, gfp, &objcg)))
                return NULL;

        if (!is_atomic) {
                /*
                 * pcpu_balance_workfn() allocates memory under this mutex,
                 * and it may wait for memory reclaim. Allow current task
                 * to become OOM victim, in case of memory pressure.
                 */
                if (gfp & __GFP_NOFAIL) {
                        mutex_lock(&pcpu_alloc_mutex);
                } else if (mutex_lock_killable(&pcpu_alloc_mutex)) {
                        pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);
                        return NULL;
                }
        }

        spin_lock_irqsave(&pcpu_lock, flags);

        /* serve reserved allocations from the reserved chunk if available */
        if (reserved && pcpu_reserved_chunk) {
                chunk = pcpu_reserved_chunk;

                off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic);
                if (off < 0) {
                        err = "alloc from reserved chunk failed";
                        goto fail_unlock;
                }

                off = pcpu_alloc_area(chunk, bits, bit_align, off);
                if (off >= 0)
                        goto area_found;

                err = "alloc from reserved chunk failed";
                goto fail_unlock;
        }

restart:
        /* search through normal chunks */
        for (slot = pcpu_size_to_slot(size); slot <= pcpu_free_slot; slot++) {
                list_for_each_entry_safe(chunk, next, &pcpu_chunk_lists[slot],
                                         list) {
                        off = pcpu_find_block_fit(chunk, bits, bit_align,
                                                  is_atomic);
                        if (off < 0) {
                                if (slot < PCPU_SLOT_FAIL_THRESHOLD)
                                        pcpu_chunk_move(chunk, 0);
                                continue;
                        }

                        off = pcpu_alloc_area(chunk, bits, bit_align, off);
                        if (off >= 0) {
                                pcpu_reintegrate_chunk(chunk);
                                goto area_found;
                        }
                }
        }

        spin_unlock_irqrestore(&pcpu_lock, flags);

        if (is_atomic) {
                err = "atomic alloc failed, no space left";
                goto fail;
        }

        /* No space left.  Create a new chunk. */
        if (list_empty(&pcpu_chunk_lists[pcpu_free_slot])) {
                chunk = pcpu_create_chunk(pcpu_gfp);
                if (!chunk) {
                        err = "failed to allocate new chunk";
                        goto fail;
                }

                spin_lock_irqsave(&pcpu_lock, flags);
                pcpu_chunk_relocate(chunk, -1);
        } else {
                spin_lock_irqsave(&pcpu_lock, flags);
        }

        goto restart;

area_found:
        pcpu_stats_area_alloc(chunk, size);
        spin_unlock_irqrestore(&pcpu_lock, flags);

        /* populate if not all pages are already there */
        if (!is_atomic) {
                unsigned int page_end, rs, re;

                rs = PFN_DOWN(off);
                page_end = PFN_UP(off + size);

                for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) {
                        WARN_ON(chunk->immutable);

                        ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);

                        spin_lock_irqsave(&pcpu_lock, flags);
                        if (ret) {
                                pcpu_free_area(chunk, off);
                                err = "failed to populate";
                                goto fail_unlock;
                        }
                        pcpu_chunk_populated(chunk, rs, re);
                        spin_unlock_irqrestore(&pcpu_lock, flags);
                }

                mutex_unlock(&pcpu_alloc_mutex);
        }

        if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
                pcpu_schedule_balance_work();

        /* clear the areas and return address relative to base address */
        for_each_possible_cpu(cpu)
                memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);

        ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
        kmemleak_alloc_percpu(ptr, size, gfp);

        trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align,
                                  chunk->base_addr, off, ptr,
                                  pcpu_obj_full_size(size), gfp);

        pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);

        return ptr;

fail_unlock:
        spin_unlock_irqrestore(&pcpu_lock, flags);
fail:
        trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);

        if (do_warn && warn_limit) {
                pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
                        size, align, is_atomic, err);
                if (!is_atomic)
                        dump_stack();
                if (!--warn_limit)
                        pr_info("limit reached, disable warning\n");
        }

        if (is_atomic) {
                /* see the flag handling in pcpu_balance_workfn() */
                pcpu_atomic_alloc_failed = true;
                pcpu_schedule_balance_work();
        } else {
                mutex_unlock(&pcpu_alloc_mutex);
        }

        pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);

        return NULL;
}

/**
 * __alloc_percpu_gfp - allocate dynamic percpu area
 * @size: size of area to allocate in bytes
 * @align: alignment of area (max PAGE_SIZE)
 * @gfp: allocation flags
 *
 * Allocate zero-filled percpu area of @size bytes aligned at @align.  If
 * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
 * be called from any context but is a lot more likely to fail. If @gfp
 * has __GFP_NOWARN then no warning will be triggered on invalid or failed
 * allocation requests.
 *
 * RETURNS:
 * Percpu pointer to the allocated area on success, NULL on failure.
 */
void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
{
        return pcpu_alloc(size, align, false, gfp);
}
EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);

/**
 * __alloc_percpu - allocate dynamic percpu area
 * @size: size of area to allocate in bytes
 * @align: alignment of area (max PAGE_SIZE)
 *
 * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL).
 */
void __percpu *__alloc_percpu(size_t size, size_t align)
{
        return pcpu_alloc(size, align, false, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(__alloc_percpu);

/**
 * __alloc_reserved_percpu - allocate reserved percpu area
 * @size: size of area to allocate in bytes
 * @align: alignment of area (max PAGE_SIZE)
 *
 * Allocate zero-filled percpu area of @size bytes aligned at @align
 * from reserved percpu area if arch has set it up; otherwise,
 * allocation is served from the same dynamic area.  Might sleep.
 * Might trigger writeouts.
 *
 * CONTEXT:
 * Does GFP_KERNEL allocation.
 *
 * RETURNS:
 * Percpu pointer to the allocated area on success, NULL on failure.
 */
void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
{
        return pcpu_alloc(size, align, true, GFP_KERNEL);
}

/**
 * pcpu_balance_free - manage the amount of free chunks
 * @empty_only: free chunks only if there are no populated pages
 *
 * If empty_only is %false, reclaim all fully free chunks regardless of the
 * number of populated pages.  Otherwise, only reclaim chunks that have no
 * populated pages.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 */
static void pcpu_balance_free(bool empty_only)
{
        LIST_HEAD(to_free);
        struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
        struct pcpu_chunk *chunk, *next;

        lockdep_assert_held(&pcpu_lock);

        /*
         * There's no reason to keep around multiple unused chunks and VM
         * areas can be scarce.  Destroy all free chunks except for one.
         */
        list_for_each_entry_safe(chunk, next, free_head, list) {
                WARN_ON(chunk->immutable);

                /* spare the first one */
                if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
                        continue;

                if (!empty_only || chunk->nr_empty_pop_pages == 0)
                        list_move(&chunk->list, &to_free);
        }

        if (list_empty(&to_free))
                return;

        spin_unlock_irq(&pcpu_lock);
        list_for_each_entry_safe(chunk, next, &to_free, list) {
                unsigned int rs, re;

                for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
                        pcpu_depopulate_chunk(chunk, rs, re);
                        spin_lock_irq(&pcpu_lock);
                        pcpu_chunk_depopulated(chunk, rs, re);
                        spin_unlock_irq(&pcpu_lock);
                }
                pcpu_destroy_chunk(chunk);
                cond_resched();
        }
        spin_lock_irq(&pcpu_lock);
}

/**
 * pcpu_balance_populated - manage the amount of populated pages
 *
 * Maintain a certain amount of populated pages to satisfy atomic allocations.
 * It is possible that this is called when physical memory is scarce causing
 * OOM killer to be triggered.  We should avoid doing so until an actual
 * allocation causes the failure as it is possible that requests can be
 * serviced from already backed regions.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 */
static void pcpu_balance_populated(void)
{
        /* gfp flags passed to underlying allocators */
        const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
        struct pcpu_chunk *chunk;
        int slot, nr_to_pop, ret;

        lockdep_assert_held(&pcpu_lock);

        /*
         * Ensure there are certain number of free populated pages for
         * atomic allocs.  Fill up from the most packed so that atomic
         * allocs don't increase fragmentation.  If atomic allocation
         * failed previously, always populate the maximum amount.  This
         * should prevent atomic allocs larger than PAGE_SIZE from keeping
         * failing indefinitely; however, large atomic allocs are not
         * something we support properly and can be highly unreliable and
         * inefficient.
         */
retry_pop:
        if (pcpu_atomic_alloc_failed) {
                nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
                /* best effort anyway, don't worry about synchronization */
                pcpu_atomic_alloc_failed = false;
        } else {
                nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
                                  pcpu_nr_empty_pop_pages,
                                  0, PCPU_EMPTY_POP_PAGES_HIGH);
        }

        for (slot = pcpu_size_to_slot(PAGE_SIZE); slot <= pcpu_free_slot; slot++) {
                unsigned int nr_unpop = 0, rs, re;

                if (!nr_to_pop)
                        break;

                list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
                        nr_unpop = chunk->nr_pages - chunk->nr_populated;
                        if (nr_unpop)
                                break;
                }

                if (!nr_unpop)
                        continue;

                /* @chunk can't go away while pcpu_alloc_mutex is held */
                for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
                        int nr = min_t(int, re - rs, nr_to_pop);

                        spin_unlock_irq(&pcpu_lock);
                        ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
                        cond_resched();
                        spin_lock_irq(&pcpu_lock);
                        if (!ret) {
                                nr_to_pop -= nr;
                                pcpu_chunk_populated(chunk, rs, rs + nr);
                        } else {
                                nr_to_pop = 0;
                        }

                        if (!nr_to_pop)
                                break;
                }
        }

        if (nr_to_pop) {
                /* ran out of chunks to populate, create a new one and retry */
                spin_unlock_irq(&pcpu_lock);
                chunk = pcpu_create_chunk(gfp);
                cond_resched();
                spin_lock_irq(&pcpu_lock);
                if (chunk) {
                        pcpu_chunk_relocate(chunk, -1);
                        goto retry_pop;
                }
        }
}

/**
 * pcpu_reclaim_populated - scan over to_depopulate chunks and free empty pages
 *
 * Scan over chunks in the depopulate list and try to release unused populated
 * pages back to the system.  Depopulated chunks are sidelined to prevent
 * repopulating these pages unless required.  Fully free chunks are reintegrated
 * and freed accordingly (1 is kept around).  If we drop below the empty
 * populated pages threshold, reintegrate the chunk if it has empty free pages.
 * Each chunk is scanned in the reverse order to keep populated pages close to
 * the beginning of the chunk.
 *
 * CONTEXT:
 * pcpu_lock (can be dropped temporarily)
 *
 */
static void pcpu_reclaim_populated(void)
{
        struct pcpu_chunk *chunk;
        struct pcpu_block_md *block;
        int freed_page_start, freed_page_end;
        int i, end;
        bool reintegrate;

        lockdep_assert_held(&pcpu_lock);

        /*
         * Once a chunk is isolated to the to_depopulate list, the chunk is no
         * longer discoverable to allocations whom may populate pages.  The only
         * other accessor is the free path which only returns area back to the
         * allocator not touching the populated bitmap.
         */
        while ((chunk = list_first_entry_or_null(
                        &pcpu_chunk_lists[pcpu_to_depopulate_slot],
                        struct pcpu_chunk, list))) {
                WARN_ON(chunk->immutable);

                /*
                 * Scan chunk's pages in the reverse order to keep populated
                 * pages close to the beginning of the chunk.
                 */
                freed_page_start = chunk->nr_pages;
                freed_page_end = 0;
                reintegrate = false;
                for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) {
                        /* no more work to do */
                        if (chunk->nr_empty_pop_pages == 0)
                                break;

                        /* reintegrate chunk to prevent atomic alloc failures */
                        if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_HIGH) {
                                reintegrate = true;
                                break;
                        }

                        /*
                         * If the page is empty and populated, start or
                         * extend the (i, end) range.  If i == 0, decrease
                         * i and perform the depopulation to cover the last
                         * (first) page in the chunk.
                         */
                        block = chunk->md_blocks + i;
                        if (block->contig_hint == PCPU_BITMAP_BLOCK_BITS &&
                            test_bit(i, chunk->populated)) {
                                if (end == -1)
                                        end = i;
                                if (i > 0)
                                        continue;
                                i--;
                        }

                        /* depopulate if there is an active range */
                        if (end == -1)
                                continue;

                        spin_unlock_irq(&pcpu_lock);
                        pcpu_depopulate_chunk(chunk, i + 1, end + 1);
                        cond_resched();
                        spin_lock_irq(&pcpu_lock);

                        pcpu_chunk_depopulated(chunk, i + 1, end + 1);
                        freed_page_start = min(freed_page_start, i + 1);
                        freed_page_end = max(freed_page_end, end + 1);

                        /* reset the range and continue */
                        end = -1;
                }

                /* batch tlb flush per chunk to amortize cost */
                if (freed_page_start < freed_page_end) {
                        spin_unlock_irq(&pcpu_lock);
                        pcpu_post_unmap_tlb_flush(chunk,
                                                  freed_page_start,
                                                  freed_page_end);
                        cond_resched();
                        spin_lock_irq(&pcpu_lock);
                }

                if (reintegrate || chunk->free_bytes == pcpu_unit_size)
                        pcpu_reintegrate_chunk(chunk);
                else
                        list_move_tail(&chunk->list,
                                       &pcpu_chunk_lists[pcpu_sidelined_slot]);
        }
}

/**
 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
 * @work: unused
 *
 * For each chunk type, manage the number of fully free chunks and the number of
 * populated pages.  An important thing to consider is when pages are freed and
 * how they contribute to the global counts.
 */
static void pcpu_balance_workfn(struct work_struct *work)
{
        /*
         * pcpu_balance_free() is called twice because the first time we may
         * trim pages in the active pcpu_nr_empty_pop_pages which may cause us
         * to grow other chunks.  This then gives pcpu_reclaim_populated() time
         * to move fully free chunks to the active list to be freed if
         * appropriate.
         */
        mutex_lock(&pcpu_alloc_mutex);
        spin_lock_irq(&pcpu_lock);

        pcpu_balance_free(false);
        pcpu_reclaim_populated();
        pcpu_balance_populated();
        pcpu_balance_free(true);

        spin_unlock_irq(&pcpu_lock);
        mutex_unlock(&pcpu_alloc_mutex);
}

/**
 * pcpu_alloc_size - the size of the dynamic percpu area
 * @ptr: pointer to the dynamic percpu area
 *
 * Returns the size of the @ptr allocation.  This is undefined for statically
 * defined percpu variables as there is no corresponding chunk->bound_map.
 *
 * RETURNS:
 * The size of the dynamic percpu area.
 *
 * CONTEXT:
 * Can be called from atomic context.
 */
size_t pcpu_alloc_size(void __percpu *ptr)
{
        struct pcpu_chunk *chunk;
        unsigned long bit_off, end;
        void *addr;

        if (!ptr)
                return 0;

        addr = __pcpu_ptr_to_addr(ptr);
        /* No pcpu_lock here: ptr has not been freed, so chunk is still alive */
        chunk = pcpu_chunk_addr_search(addr);
        bit_off = (addr - chunk->base_addr) / PCPU_MIN_ALLOC_SIZE;
        end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
                            bit_off + 1);
        return (end - bit_off) * PCPU_MIN_ALLOC_SIZE;
}

/**
 * free_percpu - free percpu area
 * @ptr: pointer to area to free
 *
 * Free percpu area @ptr.
 *
 * CONTEXT:
 * Can be called from atomic context.
 */
void free_percpu(void __percpu *ptr)
{
        void *addr;
        struct pcpu_chunk *chunk;
        unsigned long flags;
        int size, off;
        bool need_balance = false;

        if (!ptr)
                return;

        kmemleak_free_percpu(ptr);

        addr = __pcpu_ptr_to_addr(ptr);
        chunk = pcpu_chunk_addr_search(addr);
        off = addr - chunk->base_addr;

        spin_lock_irqsave(&pcpu_lock, flags);
        size = pcpu_free_area(chunk, off);

        pcpu_memcg_free_hook(chunk, off, size);

        /*
         * If there are more than one fully free chunks, wake up grim reaper.
         * If the chunk is isolated, it may be in the process of being
         * reclaimed.  Let reclaim manage cleaning up of that chunk.
         */
        if (!chunk->isolated && chunk->free_bytes == pcpu_unit_size) {
                struct pcpu_chunk *pos;

                list_for_each_entry(pos, &pcpu_chunk_lists[pcpu_free_slot], list)
                        if (pos != chunk) {
                                need_balance = true;
                                break;
                        }
        } else if (pcpu_should_reclaim_chunk(chunk)) {
                pcpu_isolate_chunk(chunk);
                need_balance = true;
        }

        trace_percpu_free_percpu(chunk->base_addr, off, ptr);

        spin_unlock_irqrestore(&pcpu_lock, flags);

        if (need_balance)
                pcpu_schedule_balance_work();
}
EXPORT_SYMBOL_GPL(free_percpu);

bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
{
#ifdef CONFIG_SMP
        const size_t static_size = __per_cpu_end - __per_cpu_start;
        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
        unsigned int cpu;

        for_each_possible_cpu(cpu) {
                void *start = per_cpu_ptr(base, cpu);
                void *va = (void *)addr;

                if (va >= start && va < start + static_size) {
                        if (can_addr) {
                                *can_addr = (unsigned long) (va - start);
                                *can_addr += (unsigned long)
                                        per_cpu_ptr(base, get_boot_cpu_id());
                        }
                        return true;
                }
        }
#endif
        /* on UP, can't distinguish from other static vars, always false */
        return false;
}

/**
 * is_kernel_percpu_address - test whether address is from static percpu area
 * @addr: address to test
 *
 * Test whether @addr belongs to in-kernel static percpu area.  Module
 * static percpu areas are not considered.  For those, use
 * is_module_percpu_address().
 *
 * RETURNS:
 * %true if @addr is from in-kernel static percpu area, %false otherwise.
 */
bool is_kernel_percpu_address(unsigned long addr)
{
        return __is_kernel_percpu_address(addr, NULL);
}

/**
 * per_cpu_ptr_to_phys - convert translated percpu address to physical address
 * @addr: the address to be converted to physical address
 *
 * Given @addr which is dereferenceable address obtained via one of
 * percpu access macros, this function translates it into its physical
 * address.  The caller is responsible for ensuring @addr stays valid
 * until this function finishes.
 *
 * percpu allocator has special setup for the first chunk, which currently
 * supports either embedding in linear address space or vmalloc mapping,
 * and, from the second one, the backing allocator (currently either vm or
 * km) provides translation.
 *
 * The addr can be translated simply without checking if it falls into the
 * first chunk. But the current code reflects better how percpu allocator
 * actually works, and the verification can discover both bugs in percpu
 * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
 * code.
 *
 * RETURNS:
 * The physical address for @addr.
 */
phys_addr_t per_cpu_ptr_to_phys(void *addr)
{
        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
        bool in_first_chunk = false;
        unsigned long first_low, first_high;
        unsigned int cpu;

        /*
         * The following test on unit_low/high isn't strictly
         * necessary but will speed up lookups of addresses which
         * aren't in the first chunk.
         *
         * The address check is against full chunk sizes.  pcpu_base_addr
         * points to the beginning of the first chunk including the
         * static region.  Assumes good intent as the first chunk may
         * not be full (ie. < pcpu_unit_pages in size).
         */
        first_low = (unsigned long)pcpu_base_addr +
                    pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
        first_high = (unsigned long)pcpu_base_addr +
                     pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
        if ((unsigned long)addr >= first_low &&
            (unsigned long)addr < first_high) {
                for_each_possible_cpu(cpu) {
                        void *start = per_cpu_ptr(base, cpu);

                        if (addr >= start && addr < start + pcpu_unit_size) {
                                in_first_chunk = true;
                                break;
                        }
                }
        }

        if (in_first_chunk) {
                if (!is_vmalloc_addr(addr))
                        return __pa(addr);
                else
                        return page_to_phys(vmalloc_to_page(addr)) +
                               offset_in_page(addr);
        } else
                return page_to_phys(pcpu_addr_to_page(addr)) +
                       offset_in_page(addr);
}

/**
 * pcpu_alloc_alloc_info - allocate percpu allocation info
 * @nr_groups: the number of groups
 * @nr_units: the number of units
 *
 * Allocate ai which is large enough for @nr_groups groups containing
 * @nr_units units.  The returned ai's groups[0].cpu_map points to the
 * cpu_map array which is long enough for @nr_units and filled with
 * NR_CPUS.  It's the caller's responsibility to initialize cpu_map
 * pointer of other groups.
 *
 * RETURNS:
 * Pointer to the allocated pcpu_alloc_info on success, NULL on
 * failure.
 */
struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
                                                      int nr_units)
{
        struct pcpu_alloc_info *ai;
        size_t base_size, ai_size;
        void *ptr;
        int unit;

        base_size = ALIGN(struct_size(ai, groups, nr_groups),
                          __alignof__(ai->groups[0].cpu_map[0]));
        ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);

        ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE);
        if (!ptr)
                return NULL;
        ai = ptr;
        ptr += base_size;

        ai->groups[0].cpu_map = ptr;

        for (unit = 0; unit < nr_units; unit++)
                ai->groups[0].cpu_map[unit] = NR_CPUS;

        ai->nr_groups = nr_groups;
        ai->__ai_size = PFN_ALIGN(ai_size);

        return ai;
}

/**
 * pcpu_free_alloc_info - free percpu allocation info
 * @ai: pcpu_alloc_info to free
 *
 * Free @ai which was allocated by pcpu_alloc_alloc_info().
 */
void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
{
        memblock_free(ai, ai->__ai_size);
}

/**
 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
 * @lvl: loglevel
 * @ai: allocation info to dump
 *
 * Print out information about @ai using loglevel @lvl.
 */
static void pcpu_dump_alloc_info(const char *lvl,
                                 const struct pcpu_alloc_info *ai)
{
        int group_width = 1, cpu_width = 1, width;
        char empty_str[] = "--------";
        int alloc = 0, alloc_end = 0;
        int group, v;
        int upa, apl;        /* units per alloc, allocs per line */

        v = ai->nr_groups;
        while (v /= 10)
                group_width++;

        v = num_possible_cpus();
        while (v /= 10)
                cpu_width++;
        empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';

        upa = ai->alloc_size / ai->unit_size;
        width = upa * (cpu_width + 1) + group_width + 3;
        apl = rounddown_pow_of_two(max(60 / width, 1));

        printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
               lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
               ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);

        for (group = 0; group < ai->nr_groups; group++) {
                const struct pcpu_group_info *gi = &ai->groups[group];
                int unit = 0, unit_end = 0;

                BUG_ON(gi->nr_units % upa);
                for (alloc_end += gi->nr_units / upa;
                     alloc < alloc_end; alloc++) {
                        if (!(alloc % apl)) {
                                pr_cont("\n");
                                printk("%spcpu-alloc: ", lvl);
                        }
                        pr_cont("[%0*d] ", group_width, group);

                        for (unit_end += upa; unit < unit_end; unit++)
                                if (gi->cpu_map[unit] != NR_CPUS)
                                        pr_cont("%0*d ",
                                                cpu_width, gi->cpu_map[unit]);
                                else
                                        pr_cont("%s ", empty_str);
                }
        }
        pr_cont("\n");
}

/**
 * pcpu_setup_first_chunk - initialize the first percpu chunk
 * @ai: pcpu_alloc_info describing how to percpu area is shaped
 * @base_addr: mapped address
 *
 * Initialize the first percpu chunk which contains the kernel static
 * percpu area.  This function is to be called from arch percpu area
 * setup path.
 *
 * @ai contains all information necessary to initialize the first
 * chunk and prime the dynamic percpu allocator.
 *
 * @ai->static_size is the size of static percpu area.
 *
 * @ai->reserved_size, if non-zero, specifies the amount of bytes to
 * reserve after the static area in the first chunk.  This reserves
 * the first chunk such that it's available only through reserved
 * percpu allocation.  This is primarily used to serve module percpu
 * static areas on architectures where the addressing model has
 * limited offset range for symbol relocations to guarantee module
 * percpu symbols fall inside the relocatable range.
 *
 * @ai->dyn_size determines the number of bytes available for dynamic
 * allocation in the first chunk.  The area between @ai->static_size +
 * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused.
 *
 * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE
 * and equal to or larger than @ai->static_size + @ai->reserved_size +
 * @ai->dyn_size.
 *
 * @ai->atom_size is the allocation atom size and used as alignment
 * for vm areas.
 *
 * @ai->alloc_size is the allocation size and always multiple of
 * @ai->atom_size.  This is larger than @ai->atom_size if
 * @ai->unit_size is larger than @ai->atom_size.
 *
 * @ai->nr_groups and @ai->groups describe virtual memory layout of
 * percpu areas.  Units which should be colocated are put into the
 * same group.  Dynamic VM areas will be allocated according to these
 * groupings.  If @ai->nr_groups is zero, a single group containing
 * all units is assumed.
 *
 * The caller should have mapped the first chunk at @base_addr and
 * copied static data to each unit.
 *
 * The first chunk will always contain a static and a dynamic region.
 * However, the static region is not managed by any chunk.  If the first
 * chunk also contains a reserved region, it is served by two chunks -
 * one for the reserved region and one for the dynamic region.  They
 * share the same vm, but use offset regions in the area allocation map.
 * The chunk serving the dynamic region is circulated in the chunk slots
 * and available for dynamic allocation like any other chunk.
 */
void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                                   void *base_addr)
{
        size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
        size_t static_size, dyn_size;
        unsigned long *group_offsets;
        size_t *group_sizes;
        unsigned long *unit_off;
        unsigned int cpu;
        int *unit_map;
        int group, unit, i;
        unsigned long tmp_addr;
        size_t alloc_size;

#define PCPU_SETUP_BUG_ON(cond)        do {                                        \
        if (unlikely(cond)) {                                                \
                pr_emerg("failed to initialize, %s\n", #cond);                \
                pr_emerg("cpu_possible_mask=%*pb\n",                        \
                         cpumask_pr_args(cpu_possible_mask));                \
                pcpu_dump_alloc_info(KERN_EMERG, ai);                        \
                BUG();                                                        \
        }                                                                \
} while (0)

        /* sanity checks */
        PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
#ifdef CONFIG_SMP
        PCPU_SETUP_BUG_ON(!ai->static_size);
        PCPU_SETUP_BUG_ON(offset_in_page(__per_cpu_start));
#endif
        PCPU_SETUP_BUG_ON(!base_addr);
        PCPU_SETUP_BUG_ON(offset_in_page(base_addr));
        PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
        PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size));
        PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
        PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
        PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
        PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
        PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
                            IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
        PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);

        /* process group information and build config tables accordingly */
        alloc_size = ai->nr_groups * sizeof(group_offsets[0]);
        group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!group_offsets)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        alloc_size = ai->nr_groups * sizeof(group_sizes[0]);
        group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!group_sizes)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        alloc_size = nr_cpu_ids * sizeof(unit_map[0]);
        unit_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!unit_map)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        alloc_size = nr_cpu_ids * sizeof(unit_off[0]);
        unit_off = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
        if (!unit_off)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      alloc_size);

        for (cpu = 0; cpu < nr_cpu_ids; cpu++)
                unit_map[cpu] = UINT_MAX;

        pcpu_low_unit_cpu = NR_CPUS;
        pcpu_high_unit_cpu = NR_CPUS;

        for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
                const struct pcpu_group_info *gi = &ai->groups[group];

                group_offsets[group] = gi->base_offset;
                group_sizes[group] = gi->nr_units * ai->unit_size;

                for (i = 0; i < gi->nr_units; i++) {
                        cpu = gi->cpu_map[i];
                        if (cpu == NR_CPUS)
                                continue;

                        PCPU_SETUP_BUG_ON(cpu >= nr_cpu_ids);
                        PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
                        PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);

                        unit_map[cpu] = unit + i;
                        unit_off[cpu] = gi->base_offset + i * ai->unit_size;

                        /* determine low/high unit_cpu */
                        if (pcpu_low_unit_cpu == NR_CPUS ||
                            unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
                                pcpu_low_unit_cpu = cpu;
                        if (pcpu_high_unit_cpu == NR_CPUS ||
                            unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
                                pcpu_high_unit_cpu = cpu;
                }
        }
        pcpu_nr_units = unit;

        for_each_possible_cpu(cpu)
                PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);

        /* we're done parsing the input, undefine BUG macro and dump config */
#undef PCPU_SETUP_BUG_ON
        pcpu_dump_alloc_info(KERN_DEBUG, ai);

        pcpu_nr_groups = ai->nr_groups;
        pcpu_group_offsets = group_offsets;
        pcpu_group_sizes = group_sizes;
        pcpu_unit_map = unit_map;
        pcpu_unit_offsets = unit_off;

        /* determine basic parameters */
        pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
        pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
        pcpu_atom_size = ai->atom_size;
        pcpu_chunk_struct_size = struct_size((struct pcpu_chunk *)0, populated,
                                             BITS_TO_LONGS(pcpu_unit_pages));

        pcpu_stats_save_ai(ai);

        /*
         * Allocate chunk slots.  The slots after the active slots are:
         *   sidelined_slot - isolated, depopulated chunks
         *   free_slot - fully free chunks
         *   to_depopulate_slot - isolated, chunks to depopulate
         */
        pcpu_sidelined_slot = __pcpu_size_to_slot(pcpu_unit_size) + 1;
        pcpu_free_slot = pcpu_sidelined_slot + 1;
        pcpu_to_depopulate_slot = pcpu_free_slot + 1;
        pcpu_nr_slots = pcpu_to_depopulate_slot + 1;
        pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots *
                                          sizeof(pcpu_chunk_lists[0]),
                                          SMP_CACHE_BYTES);
        if (!pcpu_chunk_lists)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]));

        for (i = 0; i < pcpu_nr_slots; i++)
                INIT_LIST_HEAD(&pcpu_chunk_lists[i]);

        /*
         * The end of the static region needs to be aligned with the
         * minimum allocation size as this offsets the reserved and
         * dynamic region.  The first chunk ends page aligned by
         * expanding the dynamic region, therefore the dynamic region
         * can be shrunk to compensate while still staying above the
         * configured sizes.
         */
        static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE);
        dyn_size = ai->dyn_size - (static_size - ai->static_size);

        /*
         * Initialize first chunk:
         * This chunk is broken up into 3 parts:
         *                < static | [reserved] | dynamic >
         * - static - there is no backing chunk because these allocations can
         *   never be freed.
         * - reserved (pcpu_reserved_chunk) - exists primarily to serve
         *   allocations from module load.
         * - dynamic (pcpu_first_chunk) - serves the dynamic part of the first
         *   chunk.
         */
        tmp_addr = (unsigned long)base_addr + static_size;
        if (ai->reserved_size)
                pcpu_reserved_chunk = pcpu_alloc_first_chunk(tmp_addr,
                                                ai->reserved_size);
        tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size;
        pcpu_first_chunk = pcpu_alloc_first_chunk(tmp_addr, dyn_size);

        pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
        pcpu_chunk_relocate(pcpu_first_chunk, -1);

        /* include all regions of the first chunk */
        pcpu_nr_populated += PFN_DOWN(size_sum);

        pcpu_stats_chunk_alloc();
        trace_percpu_create_chunk(base_addr);

        /* we're done */
        pcpu_base_addr = base_addr;
}

#ifdef CONFIG_SMP

const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
        [PCPU_FC_AUTO]        = "auto",
        [PCPU_FC_EMBED]        = "embed",
        [PCPU_FC_PAGE]        = "page",
};

enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;

static int __init percpu_alloc_setup(char *str)
{
        if (!str)
                return -EINVAL;

        if (0)
                /* nada */;
#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
        else if (!strcmp(str, "embed"))
                pcpu_chosen_fc = PCPU_FC_EMBED;
#endif
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
        else if (!strcmp(str, "page"))
                pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
        else
                pr_warn("unknown allocator %s specified\n", str);

        return 0;
}
early_param("percpu_alloc", percpu_alloc_setup);

/*
 * pcpu_embed_first_chunk() is used by the generic percpu setup.
 * Build it if needed by the arch config or the generic setup is going
 * to be used.
 */
#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
        !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
#define BUILD_EMBED_FIRST_CHUNK
#endif

/* build pcpu_page_first_chunk() iff needed by the arch config */
#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
#define BUILD_PAGE_FIRST_CHUNK
#endif

/* pcpu_build_alloc_info() is used by both embed and page first chunk */
#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
/**
 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
 * @reserved_size: the size of reserved percpu area in bytes
 * @dyn_size: minimum free size for dynamic allocation in bytes
 * @atom_size: allocation atom size
 * @cpu_distance_fn: callback to determine distance between cpus, optional
 *
 * This function determines grouping of units, their mappings to cpus
 * and other parameters considering needed percpu size, allocation
 * atom size and distances between CPUs.
 *
 * Groups are always multiples of atom size and CPUs which are of
 * LOCAL_DISTANCE both ways are grouped together and share space for
 * units in the same group.  The returned configuration is guaranteed
 * to have CPUs on different nodes on different groups and >=75% usage
 * of allocated virtual address space.
 *
 * RETURNS:
 * On success, pointer to the new allocation_info is returned.  On
 * failure, ERR_PTR value is returned.
 */
static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
                                size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
{
        static int group_map[NR_CPUS] __initdata;
        static int group_cnt[NR_CPUS] __initdata;
        static struct cpumask mask __initdata;
        const size_t static_size = __per_cpu_end - __per_cpu_start;
        int nr_groups = 1, nr_units = 0;
        size_t size_sum, min_unit_size, alloc_size;
        int upa, max_upa, best_upa;        /* units_per_alloc */
        int last_allocs, group, unit;
        unsigned int cpu, tcpu;
        struct pcpu_alloc_info *ai;
        unsigned int *cpu_map;

        /* this function may be called multiple times */
        memset(group_map, 0, sizeof(group_map));
        memset(group_cnt, 0, sizeof(group_cnt));
        cpumask_clear(&mask);

        /* calculate size_sum and ensure dyn_size is enough for early alloc */
        size_sum = PFN_ALIGN(static_size + reserved_size +
                            max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
        dyn_size = size_sum - static_size - reserved_size;

        /*
         * Determine min_unit_size, alloc_size and max_upa such that
         * alloc_size is multiple of atom_size and is the smallest
         * which can accommodate 4k aligned segments which are equal to
         * or larger than min_unit_size.
         */
        min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);

        /* determine the maximum # of units that can fit in an allocation */
        alloc_size = roundup(min_unit_size, atom_size);
        upa = alloc_size / min_unit_size;
        while (alloc_size % upa || (offset_in_page(alloc_size / upa)))
                upa--;
        max_upa = upa;

        cpumask_copy(&mask, cpu_possible_mask);

        /* group cpus according to their proximity */
        for (group = 0; !cpumask_empty(&mask); group++) {
                /* pop the group's first cpu */
                cpu = cpumask_first(&mask);
                group_map[cpu] = group;
                group_cnt[group]++;
                cpumask_clear_cpu(cpu, &mask);

                for_each_cpu(tcpu, &mask) {
                        if (!cpu_distance_fn ||
                            (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
                             cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
                                group_map[tcpu] = group;
                                group_cnt[group]++;
                                cpumask_clear_cpu(tcpu, &mask);
                        }
                }
        }
        nr_groups = group;

        /*
         * Wasted space is caused by a ratio imbalance of upa to group_cnt.
         * Expand the unit_size until we use >= 75% of the units allocated.
         * Related to atom_size, which could be much larger than the unit_size.
         */
        last_allocs = INT_MAX;
        best_upa = 0;
        for (upa = max_upa; upa; upa--) {
                int allocs = 0, wasted = 0;

                if (alloc_size % upa || (offset_in_page(alloc_size / upa)))
                        continue;

                for (group = 0; group < nr_groups; group++) {
                        int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
                        allocs += this_allocs;
                        wasted += this_allocs * upa - group_cnt[group];
                }

                /*
                 * Don't accept if wastage is over 1/3.  The
                 * greater-than comparison ensures upa==1 always
                 * passes the following check.
                 */
                if (wasted > num_possible_cpus() / 3)
                        continue;

                /* and then don't consume more memory */
                if (allocs > last_allocs)
                        break;
                last_allocs = allocs;
                best_upa = upa;
        }
        BUG_ON(!best_upa);
        upa = best_upa;

        /* allocate and fill alloc_info */
        for (group = 0; group < nr_groups; group++)
                nr_units += roundup(group_cnt[group], upa);

        ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
        if (!ai)
                return ERR_PTR(-ENOMEM);
        cpu_map = ai->groups[0].cpu_map;

        for (group = 0; group < nr_groups; group++) {
                ai->groups[group].cpu_map = cpu_map;
                cpu_map += roundup(group_cnt[group], upa);
        }

        ai->static_size = static_size;
        ai->reserved_size = reserved_size;
        ai->dyn_size = dyn_size;
        ai->unit_size = alloc_size / upa;
        ai->atom_size = atom_size;
        ai->alloc_size = alloc_size;

        for (group = 0, unit = 0; group < nr_groups; group++) {
                struct pcpu_group_info *gi = &ai->groups[group];

                /*
                 * Initialize base_offset as if all groups are located
                 * back-to-back.  The caller should update this to
                 * reflect actual allocation.
                 */
                gi->base_offset = unit * ai->unit_size;

                for_each_possible_cpu(cpu)
                        if (group_map[cpu] == group)
                                gi->cpu_map[gi->nr_units++] = cpu;
                gi->nr_units = roundup(gi->nr_units, upa);
                unit += gi->nr_units;
        }
        BUG_ON(unit != nr_units);

        return ai;
}

static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
                                   pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
        const unsigned long goal = __pa(MAX_DMA_ADDRESS);
#ifdef CONFIG_NUMA
        int node = NUMA_NO_NODE;
        void *ptr;

        if (cpu_to_nd_fn)
                node = cpu_to_nd_fn(cpu);

        if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
                ptr = memblock_alloc_from(size, align, goal);
                pr_info("cpu %d has no node %d or node-local memory\n",
                        cpu, node);
                pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
                         cpu, size, (u64)__pa(ptr));
        } else {
                ptr = memblock_alloc_try_nid(size, align, goal,
                                             MEMBLOCK_ALLOC_ACCESSIBLE,
                                             node);

                pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
                         cpu, size, node, (u64)__pa(ptr));
        }
        return ptr;
#else
        return memblock_alloc_from(size, align, goal);
#endif
}

static void __init pcpu_fc_free(void *ptr, size_t size)
{
        memblock_free(ptr, size);
}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */

#if defined(BUILD_EMBED_FIRST_CHUNK)
/**
 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
 * @reserved_size: the size of reserved percpu area in bytes
 * @dyn_size: minimum free size for dynamic allocation in bytes
 * @atom_size: allocation atom size
 * @cpu_distance_fn: callback to determine distance between cpus, optional
 * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
 *
 * This is a helper to ease setting up embedded first percpu chunk and
 * can be called where pcpu_setup_first_chunk() is expected.
 *
 * If this function is used to setup the first chunk, it is allocated
 * by calling pcpu_fc_alloc and used as-is without being mapped into
 * vmalloc area.  Allocations are always whole multiples of @atom_size
 * aligned to @atom_size.
 *
 * This enables the first chunk to piggy back on the linear physical
 * mapping which often uses larger page size.  Please note that this
 * can result in very sparse cpu->unit mapping on NUMA machines thus
 * requiring large vmalloc address space.  Don't use this allocator if
 * vmalloc space is not orders of magnitude larger than distances
 * between node memory addresses (ie. 32bit NUMA machines).
 *
 * @dyn_size specifies the minimum dynamic area size.
 *
 * If the needed size is smaller than the minimum or specified unit
 * size, the leftover is returned using pcpu_fc_free.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                  size_t atom_size,
                                  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
                                  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
        void *base = (void *)ULONG_MAX;
        void **areas = NULL;
        struct pcpu_alloc_info *ai;
        size_t size_sum, areas_size;
        unsigned long max_distance;
        int group, i, highest_group, rc = 0;

        ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
                                   cpu_distance_fn);
        if (IS_ERR(ai))
                return PTR_ERR(ai);

        size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
        areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));

        areas = memblock_alloc(areas_size, SMP_CACHE_BYTES);
        if (!areas) {
                rc = -ENOMEM;
                goto out_free;
        }

        /* allocate, copy and determine base address & max_distance */
        highest_group = 0;
        for (group = 0; group < ai->nr_groups; group++) {
                struct pcpu_group_info *gi = &ai->groups[group];
                unsigned int cpu = NR_CPUS;
                void *ptr;

                for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
                        cpu = gi->cpu_map[i];
                BUG_ON(cpu == NR_CPUS);

                /* allocate space for the whole group */
                ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
                if (!ptr) {
                        rc = -ENOMEM;
                        goto out_free_areas;
                }
                /* kmemleak tracks the percpu allocations separately */
                kmemleak_ignore_phys(__pa(ptr));
                areas[group] = ptr;

                base = min(ptr, base);
                if (ptr > areas[highest_group])
                        highest_group = group;
        }
        max_distance = areas[highest_group] - base;
        max_distance += ai->unit_size * ai->groups[highest_group].nr_units;

        /* warn if maximum distance is further than 75% of vmalloc space */
        if (max_distance > VMALLOC_TOTAL * 3 / 4) {
                pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n",
                                max_distance, VMALLOC_TOTAL);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
                /* and fail if we have fallback */
                rc = -EINVAL;
                goto out_free_areas;
#endif
        }

        /*
         * Copy data and free unused parts.  This should happen after all
         * allocations are complete; otherwise, we may end up with
         * overlapping groups.
         */
        for (group = 0; group < ai->nr_groups; group++) {
                struct pcpu_group_info *gi = &ai->groups[group];
                void *ptr = areas[group];

                for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
                        if (gi->cpu_map[i] == NR_CPUS) {
                                /* unused unit, free whole */
                                pcpu_fc_free(ptr, ai->unit_size);
                                continue;
                        }
                        /* copy and return the unused part */
                        memcpy(ptr, __per_cpu_load, ai->static_size);
                        pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
                }
        }

        /* base address is now known, determine group base offsets */
        for (group = 0; group < ai->nr_groups; group++) {
                ai->groups[group].base_offset = areas[group] - base;
        }

        pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
                PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
                ai->dyn_size, ai->unit_size);

        pcpu_setup_first_chunk(ai, base);
        goto out_free;

out_free_areas:
        for (group = 0; group < ai->nr_groups; group++)
                if (areas[group])
                        pcpu_fc_free(areas[group],
                                ai->groups[group].nr_units * ai->unit_size);
out_free:
        pcpu_free_alloc_info(ai);
        if (areas)
                memblock_free(areas, areas_size);
        return rc;
}
#endif /* BUILD_EMBED_FIRST_CHUNK */

#ifdef BUILD_PAGE_FIRST_CHUNK
#include <asm/pgalloc.h>

#ifndef P4D_TABLE_SIZE
#define P4D_TABLE_SIZE PAGE_SIZE
#endif

#ifndef PUD_TABLE_SIZE
#define PUD_TABLE_SIZE PAGE_SIZE
#endif

#ifndef PMD_TABLE_SIZE
#define PMD_TABLE_SIZE PAGE_SIZE
#endif

#ifndef PTE_TABLE_SIZE
#define PTE_TABLE_SIZE PAGE_SIZE
#endif
void __init __weak pcpu_populate_pte(unsigned long addr)
{
        pgd_t *pgd = pgd_offset_k(addr);
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;

        if (pgd_none(*pgd)) {
                p4d = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
                if (!p4d)
                        goto err_alloc;
                pgd_populate(&init_mm, pgd, p4d);
        }

        p4d = p4d_offset(pgd, addr);
        if (p4d_none(*p4d)) {
                pud = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
                if (!pud)
                        goto err_alloc;
                p4d_populate(&init_mm, p4d, pud);
        }

        pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
                pmd = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
                if (!pmd)
                        goto err_alloc;
                pud_populate(&init_mm, pud, pmd);
        }

        pmd = pmd_offset(pud, addr);
        if (!pmd_present(*pmd)) {
                pte_t *new;

                new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
                if (!new)
                        goto err_alloc;
                pmd_populate_kernel(&init_mm, pmd, new);
        }

        return;

err_alloc:
        panic("%s: Failed to allocate memory\n", __func__);
}

/**
 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
 * @reserved_size: the size of reserved percpu area in bytes
 * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
 *
 * This is a helper to ease setting up page-remapped first percpu
 * chunk and can be called where pcpu_setup_first_chunk() is expected.
 *
 * This is the basic allocator.  Static percpu area is allocated
 * page-by-page into vmalloc area.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
        static struct vm_struct vm;
        struct pcpu_alloc_info *ai;
        char psize_str[16];
        int unit_pages;
        size_t pages_size;
        struct page **pages;
        int unit, i, j, rc = 0;
        int upa;
        int nr_g0_units;

        snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);

        ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
        if (IS_ERR(ai))
                return PTR_ERR(ai);
        BUG_ON(ai->nr_groups != 1);
        upa = ai->alloc_size/ai->unit_size;
        nr_g0_units = roundup(num_possible_cpus(), upa);
        if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) {
                pcpu_free_alloc_info(ai);
                return -EINVAL;
        }

        unit_pages = ai->unit_size >> PAGE_SHIFT;

        /* unaligned allocations can't be freed, round up to page size */
        pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
                               sizeof(pages[0]));
        pages = memblock_alloc(pages_size, SMP_CACHE_BYTES);
        if (!pages)
                panic("%s: Failed to allocate %zu bytes\n", __func__,
                      pages_size);

        /* allocate pages */
        j = 0;
        for (unit = 0; unit < num_possible_cpus(); unit++) {
                unsigned int cpu = ai->groups[0].cpu_map[unit];
                for (i = 0; i < unit_pages; i++) {
                        void *ptr;

                        ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
                        if (!ptr) {
                                pr_warn("failed to allocate %s page for cpu%u\n",
                                                psize_str, cpu);
                                goto enomem;
                        }
                        /* kmemleak tracks the percpu allocations separately */
                        kmemleak_ignore_phys(__pa(ptr));
                        pages[j++] = virt_to_page(ptr);
                }
        }

        /* allocate vm area, map the pages and copy static data */
        vm.flags = VM_ALLOC;
        vm.size = num_possible_cpus() * ai->unit_size;
        vm_area_register_early(&vm, PAGE_SIZE);

        for (unit = 0; unit < num_possible_cpus(); unit++) {
                unsigned long unit_addr =
                        (unsigned long)vm.addr + unit * ai->unit_size;

                for (i = 0; i < unit_pages; i++)
                        pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));

                /* pte already populated, the following shouldn't fail */
                rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
                                      unit_pages);
                if (rc < 0)
                        panic("failed to map percpu area, err=%d\n", rc);

                flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size);

                /* copy static data */
                memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
        }

        /* we're ready, commit */
        pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
                unit_pages, psize_str, ai->static_size,
                ai->reserved_size, ai->dyn_size);

        pcpu_setup_first_chunk(ai, vm.addr);
        goto out_free_ar;

enomem:
        while (--j >= 0)
                pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
        rc = -ENOMEM;
out_free_ar:
        memblock_free(pages, pages_size);
        pcpu_free_alloc_info(ai);
        return rc;
}
#endif /* BUILD_PAGE_FIRST_CHUNK */

#ifndef        CONFIG_HAVE_SETUP_PER_CPU_AREA
/*
 * Generic SMP percpu area setup.
 *
 * The embedding helper is used because its behavior closely resembles
 * the original non-dynamic generic percpu area setup.  This is
 * important because many archs have addressing restrictions and might
 * fail if the percpu area is located far away from the previous
 * location.  As an added bonus, in non-NUMA cases, embedding is
 * generally a good idea TLB-wise because percpu area can piggy back
 * on the physical linear memory mapping which uses large page
 * mappings on applicable archs.
 */
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);

void __init setup_per_cpu_areas(void)
{
        unsigned long delta;
        unsigned int cpu;
        int rc;

        /*
         * Always reserve area for module percpu variables.  That's
         * what the legacy allocator did.
         */
        rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
                                    PAGE_SIZE, NULL, NULL);
        if (rc < 0)
                panic("Failed to initialize percpu areas.");

        delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
        for_each_possible_cpu(cpu)
                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
}
#endif        /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

#else        /* CONFIG_SMP */

/*
 * UP percpu area setup.
 *
 * UP always uses km-based percpu allocator with identity mapping.
 * Static percpu variables are indistinguishable from the usual static
 * variables and don't require any special preparation.
 */
void __init setup_per_cpu_areas(void)
{
        const size_t unit_size =
                roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
                                         PERCPU_DYNAMIC_RESERVE));
        struct pcpu_alloc_info *ai;
        void *fc;

        ai = pcpu_alloc_alloc_info(1, 1);
        fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
        if (!ai || !fc)
                panic("Failed to allocate memory for percpu areas.");
        /* kmemleak tracks the percpu allocations separately */
        kmemleak_ignore_phys(__pa(fc));

        ai->dyn_size = unit_size;
        ai->unit_size = unit_size;
        ai->atom_size = unit_size;
        ai->alloc_size = unit_size;
        ai->groups[0].nr_units = 1;
        ai->groups[0].cpu_map[0] = 0;

        pcpu_setup_first_chunk(ai, fc);
        pcpu_free_alloc_info(ai);
}

#endif        /* CONFIG_SMP */

/*
 * pcpu_nr_pages - calculate total number of populated backing pages
 *
 * This reflects the number of pages populated to back chunks.  Metadata is
 * excluded in the number exposed in meminfo as the number of backing pages
 * scales with the number of cpus and can quickly outweigh the memory used for
 * metadata.  It also keeps this calculation nice and simple.
 *
 * RETURNS:
 * Total number of populated backing pages in use by the allocator.
 */
unsigned long pcpu_nr_pages(void)
{
        return pcpu_nr_populated * pcpu_nr_units;
}

/*
 * Percpu allocator is initialized early during boot when neither slab or
 * workqueue is available.  Plug async management until everything is up
 * and running.
 */
static int __init percpu_enable_async(void)
{
        pcpu_async_enabled = true;
        return 0;
}
subsys_initcall(percpu_enable_async);



























































































































































    1 












































    6 






    3 





    6 


    6 


    6 

    6 



    6 










    6 









    3 


    3 


    3 
    3 














    6 









































































    1 

    1 



    1 


    1 


    1 













    1 




























































   12 













   12 
    3 
   10 































































































































































































































































































































    6 



    6 
    6 
    6 

    6 




    6 


    6 




    6 



    6 




    6 



    6 


    6 





    6 



































































































































    7 


    2 
    2 
    2 











    7 









    9 


    4 











    5 


    9 

    6 

    6 
    6 









    6 
    6 
    6 

    6 
    6 
    6 








    7 


    7 


    2 






    2 
    2 



    7 
    4 










    7 

    4 

    4 


    4 
























































    7 




    5 









































    6 









    6 





    6 
    6 



    6 




    5 



    5 





    6 

    5 

    1 
    5 



    3 




    4 
    4 





    4 
    4 

    4 



    4 


    4 
    3 

    3 
    3 































































































































































































































































































































    1 





    1 









































































    6 

    6 

    6 









































    6 



    6 











    6 



    6 



























    6 

















































































































































































































































   11 

   11 






   10 


   11 
    3 



   11 

   11 

















   11 
    9 


    8 




    9 
    5 
    9 




    1 







    7 

   11 
    6 



























    6 
































    6 
    6 











    1 
    1 


    1 
    1 




    1 





   10 







    1 


































   11 


























































































































































































































    6 





    6 


    6 






























































   11 

















    1 
    3 
    1 



    3 










    1 









    1 



    1 



    1 





    1 












    1 


    1 





    1 







    1 






















    1 
    1 




































































































































































    6 














    6 
    6 


    6 















    6 
    6 


    6 

    6 
    6 
    4 
    4 


    6 

    6 

    6 

    6 




    6 





















    5 
    4 




    3 





    3 
    2 














    3 


    2 






    2 







    2 





    2 




    2 
    2 

    1 





    1 









    3 






    2 



    2 



    3 
    6 
    2 



   10 







    7 


    7 


    7 

    8 

































































































































































    7 









    6 


    4 
    4 

















    2 

    4 






    4 





    3 



    3 




    3 


    3 




    3 















    2 

    3 
    3 





    3 

    3 


    3 

















    3 








    3 

    3 










    3 
    3 


    3 


    3 




    6 
    6 
    6 




































    1 



    1 


    1 




    1 



    1 



    1 




    1 













































    1 
































































































































































































































































   12 









    7 


    5 

    5 


    3 


    3 


    3 















































































    5 
































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 




















    1 



















































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
/*
 * Resizable virtual memory filesystem for Linux.
 *
 * Copyright (C) 2000 Linus Torvalds.
 *                 2000 Transmeta Corp.
 *                 2000-2001 Christoph Rohland
 *                 2000-2001 SAP AG
 *                 2002 Red Hat Inc.
 * Copyright (C) 2002-2011 Hugh Dickins.
 * Copyright (C) 2011 Google Inc.
 * Copyright (C) 2002-2005 VERITAS Software Corporation.
 * Copyright (C) 2004 Andi Kleen, SuSE Labs
 *
 * Extended attribute support for tmpfs:
 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
 *
 * tiny-shmem:
 * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
 *
 * This file is released under the GPL.
 */

#include <linux/fs.h>
#include <linux/init.h>
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/ramfs.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/fileattr.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/sched/signal.h>
#include <linux/export.h>
#include <linux/shmem_fs.h>
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
#include <linux/fs_parser.h>
#include <linux/swapfile.h>
#include <linux/iversion.h>
#include "swap.h"

static struct vfsmount *shm_mnt __ro_after_init;

#ifdef CONFIG_SHMEM
/*
 * This virtual memory filesystem is heavily based on the ramfs. It
 * extends ramfs by the ability to use swap and honor resource limits
 * which makes it a completely usable filesystem.
 */

#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/mman.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/percpu_counter.h>
#include <linux/falloc.h>
#include <linux/splice.h>
#include <linux/security.h>
#include <linux/swapops.h>
#include <linux/mempolicy.h>
#include <linux/namei.h>
#include <linux/ctype.h>
#include <linux/migrate.h>
#include <linux/highmem.h>
#include <linux/seq_file.h>
#include <linux/magic.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
#include <linux/rmap.h>
#include <linux/uuid.h>
#include <linux/quotaops.h>
#include <linux/rcupdate_wait.h>

#include <linux/uaccess.h>

#include "internal.h"

#define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
#define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)

/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20

/* Pretend that one inode + its dentry occupy this much memory */
#define BOGO_INODE_SIZE 1024

/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
#define SHORT_SYMLINK_LEN 128

/*
 * shmem_fallocate communicates with shmem_fault or shmem_writepage via
 * inode->i_private (with i_rwsem making sure that it has only one user at
 * a time): we would prefer not to enlarge the shmem inode just for that.
 */
struct shmem_falloc {
        wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
        pgoff_t start;                /* start of range currently being fallocated */
        pgoff_t next;                /* the next page offset to be fallocated */
        pgoff_t nr_falloced;        /* how many new pages have been fallocated */
        pgoff_t nr_unswapped;        /* how often writepage refused to swap out */
};

struct shmem_options {
        unsigned long long blocks;
        unsigned long long inodes;
        struct mempolicy *mpol;
        kuid_t uid;
        kgid_t gid;
        umode_t mode;
        bool full_inums;
        int huge;
        int seen;
        bool noswap;
        unsigned short quota_types;
        struct shmem_quota_limits qlimits;
#define SHMEM_SEEN_BLOCKS 1
#define SHMEM_SEEN_INODES 2
#define SHMEM_SEEN_HUGE 4
#define SHMEM_SEEN_INUMS 8
#define SHMEM_SEEN_NOSWAP 16
#define SHMEM_SEEN_QUOTA 32
};

#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
        return totalram_pages() / 2;
}

static unsigned long shmem_default_max_inodes(void)
{
        unsigned long nr_pages = totalram_pages();

        return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
                        ULONG_MAX / BOGO_INODE_SIZE);
}
#endif

static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
                        struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
                        struct mm_struct *fault_mm, vm_fault_t *fault_type);

static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
        return sb->s_fs_info;
}

/*
 * shmem_file_setup pre-accounts the whole fixed size of a VM object,
 * for shared memory and for shared anonymous (/dev/zero) mappings
 * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
 * consistent with the pre-accounting of private mappings ...
 */
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
        return (flags & VM_NORESERVE) ?
                0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
}

static inline void shmem_unacct_size(unsigned long flags, loff_t size)
{
        if (!(flags & VM_NORESERVE))
                vm_unacct_memory(VM_ACCT(size));
}

static inline int shmem_reacct_size(unsigned long flags,
                loff_t oldsize, loff_t newsize)
{
        if (!(flags & VM_NORESERVE)) {
                if (VM_ACCT(newsize) > VM_ACCT(oldsize))
                        return security_vm_enough_memory_mm(current->mm,
                                        VM_ACCT(newsize) - VM_ACCT(oldsize));
                else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
                        vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
        }
        return 0;
}

/*
 * ... whereas tmpfs objects are accounted incrementally as
 * pages are allocated, in order to allow large sparse files.
 * shmem_get_folio reports shmem_acct_blocks failure as -ENOSPC not -ENOMEM,
 * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
 */
static inline int shmem_acct_blocks(unsigned long flags, long pages)
{
        if (!(flags & VM_NORESERVE))
                return 0;

        return security_vm_enough_memory_mm(current->mm,
                        pages * VM_ACCT(PAGE_SIZE));
}

static inline void shmem_unacct_blocks(unsigned long flags, long pages)
{
        if (flags & VM_NORESERVE)
                vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}

static int shmem_inode_acct_blocks(struct inode *inode, long pages)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        int err = -ENOSPC;

        if (shmem_acct_blocks(info->flags, pages))
                return err;

        might_sleep();        /* when quotas */
        if (sbinfo->max_blocks) {
                if (!percpu_counter_limited_add(&sbinfo->used_blocks,
                                                sbinfo->max_blocks, pages))
                        goto unacct;

                err = dquot_alloc_block_nodirty(inode, pages);
                if (err) {
                        percpu_counter_sub(&sbinfo->used_blocks, pages);
                        goto unacct;
                }
        } else {
                err = dquot_alloc_block_nodirty(inode, pages);
                if (err)
                        goto unacct;
        }

        return 0;

unacct:
        shmem_unacct_blocks(info->flags, pages);
        return err;
}

static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);

        might_sleep();        /* when quotas */
        dquot_free_block_nodirty(inode, pages);

        if (sbinfo->max_blocks)
                percpu_counter_sub(&sbinfo->used_blocks, pages);
        shmem_unacct_blocks(info->flags, pages);
}

static const struct super_operations shmem_ops;
static const struct address_space_operations shmem_aops;
static const struct file_operations shmem_file_operations;
static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations;
static const struct vm_operations_struct shmem_vm_ops;
static const struct vm_operations_struct shmem_anon_vm_ops;
static struct file_system_type shmem_fs_type;

bool shmem_mapping(struct address_space *mapping)
{
        return mapping->a_ops == &shmem_aops;
}
EXPORT_SYMBOL_GPL(shmem_mapping);

bool vma_is_anon_shmem(struct vm_area_struct *vma)
{
        return vma->vm_ops == &shmem_anon_vm_ops;
}

bool vma_is_shmem(struct vm_area_struct *vma)
{
        return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
}

static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);

#ifdef CONFIG_TMPFS_QUOTA

static int shmem_enable_quotas(struct super_block *sb,
                               unsigned short quota_types)
{
        int type, err = 0;

        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
        for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
                if (!(quota_types & (1 << type)))
                        continue;
                err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
                                          DQUOT_USAGE_ENABLED |
                                          DQUOT_LIMITS_ENABLED);
                if (err)
                        goto out_err;
        }
        return 0;

out_err:
        pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
                type, err);
        for (type--; type >= 0; type--)
                dquot_quota_off(sb, type);
        return err;
}

static void shmem_disable_quotas(struct super_block *sb)
{
        int type;

        for (type = 0; type < SHMEM_MAXQUOTAS; type++)
                dquot_quota_off(sb, type);
}

static struct dquot __rcu **shmem_get_dquots(struct inode *inode)
{
        return SHMEM_I(inode)->i_dquot;
}
#endif /* CONFIG_TMPFS_QUOTA */

/*
 * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
 * produces a novel ino for the newly allocated inode.
 *
 * It may also be called when making a hard link to permit the space needed by
 * each dentry. However, in that case, no new inode number is needed since that
 * internally draws from another pool of inode numbers (currently global
 * get_next_ino()). This case is indicated by passing NULL as inop.
 */
#define SHMEM_INO_BATCH 1024
static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        ino_t ino;

        if (!(sb->s_flags & SB_KERNMOUNT)) {
                raw_spin_lock(&sbinfo->stat_lock);
                if (sbinfo->max_inodes) {
                        if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
                                raw_spin_unlock(&sbinfo->stat_lock);
                                return -ENOSPC;
                        }
                        sbinfo->free_ispace -= BOGO_INODE_SIZE;
                }
                if (inop) {
                        ino = sbinfo->next_ino++;
                        if (unlikely(is_zero_ino(ino)))
                                ino = sbinfo->next_ino++;
                        if (unlikely(!sbinfo->full_inums &&
                                     ino > UINT_MAX)) {
                                /*
                                 * Emulate get_next_ino uint wraparound for
                                 * compatibility
                                 */
                                if (IS_ENABLED(CONFIG_64BIT))
                                        pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n",
                                                __func__, MINOR(sb->s_dev));
                                sbinfo->next_ino = 1;
                                ino = sbinfo->next_ino++;
                        }
                        *inop = ino;
                }
                raw_spin_unlock(&sbinfo->stat_lock);
        } else if (inop) {
                /*
                 * __shmem_file_setup, one of our callers, is lock-free: it
                 * doesn't hold stat_lock in shmem_reserve_inode since
                 * max_inodes is always 0, and is called from potentially
                 * unknown contexts. As such, use a per-cpu batched allocator
                 * which doesn't require the per-sb stat_lock unless we are at
                 * the batch boundary.
                 *
                 * We don't need to worry about inode{32,64} since SB_KERNMOUNT
                 * shmem mounts are not exposed to userspace, so we don't need
                 * to worry about things like glibc compatibility.
                 */
                ino_t *next_ino;

                next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
                ino = *next_ino;
                if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
                        raw_spin_lock(&sbinfo->stat_lock);
                        ino = sbinfo->next_ino;
                        sbinfo->next_ino += SHMEM_INO_BATCH;
                        raw_spin_unlock(&sbinfo->stat_lock);
                        if (unlikely(is_zero_ino(ino)))
                                ino++;
                }
                *inop = ino;
                *next_ino = ++ino;
                put_cpu();
        }

        return 0;
}

static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        if (sbinfo->max_inodes) {
                raw_spin_lock(&sbinfo->stat_lock);
                sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
                raw_spin_unlock(&sbinfo->stat_lock);
        }
}

/**
 * shmem_recalc_inode - recalculate the block usage of an inode
 * @inode: inode to recalc
 * @alloced: the change in number of pages allocated to inode
 * @swapped: the change in number of pages swapped from inode
 *
 * We have to calculate the free blocks since the mm can drop
 * undirtied hole pages behind our back.
 *
 * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
 */
static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        long freed;

        spin_lock(&info->lock);
        info->alloced += alloced;
        info->swapped += swapped;
        freed = info->alloced - info->swapped -
                READ_ONCE(inode->i_mapping->nrpages);
        /*
         * Special case: whereas normally shmem_recalc_inode() is called
         * after i_mapping->nrpages has already been adjusted (up or down),
         * shmem_writepage() has to raise swapped before nrpages is lowered -
         * to stop a racing shmem_recalc_inode() from thinking that a page has
         * been freed.  Compensate here, to avoid the need for a followup call.
         */
        if (swapped > 0)
                freed += swapped;
        if (freed > 0)
                info->alloced -= freed;
        spin_unlock(&info->lock);

        /* The quota case may block */
        if (freed > 0)
                shmem_inode_unacct_blocks(inode, freed);
}

bool shmem_charge(struct inode *inode, long pages)
{
        struct address_space *mapping = inode->i_mapping;

        if (shmem_inode_acct_blocks(inode, pages))
                return false;

        /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
        xa_lock_irq(&mapping->i_pages);
        mapping->nrpages += pages;
        xa_unlock_irq(&mapping->i_pages);

        shmem_recalc_inode(inode, pages, 0);
        return true;
}

void shmem_uncharge(struct inode *inode, long pages)
{
        /* pages argument is currently unused: keep it to help debugging */
        /* nrpages adjustment done by __filemap_remove_folio() or caller */

        shmem_recalc_inode(inode, 0, 0);
}

/*
 * Replace item expected in xarray by a new item, while holding xa_lock.
 */
static int shmem_replace_entry(struct address_space *mapping,
                        pgoff_t index, void *expected, void *replacement)
{
        XA_STATE(xas, &mapping->i_pages, index);
        void *item;

        VM_BUG_ON(!expected);
        VM_BUG_ON(!replacement);
        item = xas_load(&xas);
        if (item != expected)
                return -ENOENT;
        xas_store(&xas, replacement);
        return 0;
}

/*
 * Sometimes, before we decide whether to proceed or to fail, we must check
 * that an entry was not already brought back from swap by a racing thread.
 *
 * Checking page is not enough: by the time a SwapCache page is locked, it
 * might be reused, and again be SwapCache, using the same swap as before.
 */
static bool shmem_confirm_swap(struct address_space *mapping,
                               pgoff_t index, swp_entry_t swap)
{
        return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap);
}

/*
 * Definitions for "huge tmpfs": tmpfs mounted with the huge= option
 *
 * SHMEM_HUGE_NEVER:
 *        disables huge pages for the mount;
 * SHMEM_HUGE_ALWAYS:
 *        enables huge pages for the mount;
 * SHMEM_HUGE_WITHIN_SIZE:
 *        only allocate huge pages if the page will be fully within i_size,
 *        also respect fadvise()/madvise() hints;
 * SHMEM_HUGE_ADVISE:
 *        only allocate huge pages if requested with fadvise()/madvise();
 */

#define SHMEM_HUGE_NEVER        0
#define SHMEM_HUGE_ALWAYS        1
#define SHMEM_HUGE_WITHIN_SIZE        2
#define SHMEM_HUGE_ADVISE        3

/*
 * Special values.
 * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled:
 *
 * SHMEM_HUGE_DENY:
 *        disables huge on shm_mnt and all mounts, for emergency use;
 * SHMEM_HUGE_FORCE:
 *        enables huge on shm_mnt and all mounts, w/o needing option, for testing;
 *
 */
#define SHMEM_HUGE_DENY                (-1)
#define SHMEM_HUGE_FORCE        (-2)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* ifdef here to avoid bloating shmem.o when not necessary */

static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;

bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
                   struct mm_struct *mm, unsigned long vm_flags)
{
        loff_t i_size;

        if (!S_ISREG(inode->i_mode))
                return false;
        if (mm && ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &mm->flags)))
                return false;
        if (shmem_huge == SHMEM_HUGE_DENY)
                return false;
        if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE)
                return true;

        switch (SHMEM_SB(inode->i_sb)->huge) {
        case SHMEM_HUGE_ALWAYS:
                return true;
        case SHMEM_HUGE_WITHIN_SIZE:
                index = round_up(index + 1, HPAGE_PMD_NR);
                i_size = round_up(i_size_read(inode), PAGE_SIZE);
                if (i_size >> PAGE_SHIFT >= index)
                        return true;
                fallthrough;
        case SHMEM_HUGE_ADVISE:
                if (mm && (vm_flags & VM_HUGEPAGE))
                        return true;
                fallthrough;
        default:
                return false;
        }
}

#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
        if (!strcmp(str, "never"))
                return SHMEM_HUGE_NEVER;
        if (!strcmp(str, "always"))
                return SHMEM_HUGE_ALWAYS;
        if (!strcmp(str, "within_size"))
                return SHMEM_HUGE_WITHIN_SIZE;
        if (!strcmp(str, "advise"))
                return SHMEM_HUGE_ADVISE;
        if (!strcmp(str, "deny"))
                return SHMEM_HUGE_DENY;
        if (!strcmp(str, "force"))
                return SHMEM_HUGE_FORCE;
        return -EINVAL;
}
#endif

#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
static const char *shmem_format_huge(int huge)
{
        switch (huge) {
        case SHMEM_HUGE_NEVER:
                return "never";
        case SHMEM_HUGE_ALWAYS:
                return "always";
        case SHMEM_HUGE_WITHIN_SIZE:
                return "within_size";
        case SHMEM_HUGE_ADVISE:
                return "advise";
        case SHMEM_HUGE_DENY:
                return "deny";
        case SHMEM_HUGE_FORCE:
                return "force";
        default:
                VM_BUG_ON(1);
                return "bad_val";
        }
}
#endif

static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                struct shrink_control *sc, unsigned long nr_to_split)
{
        LIST_HEAD(list), *pos, *next;
        LIST_HEAD(to_remove);
        struct inode *inode;
        struct shmem_inode_info *info;
        struct folio *folio;
        unsigned long batch = sc ? sc->nr_to_scan : 128;
        int split = 0;

        if (list_empty(&sbinfo->shrinklist))
                return SHRINK_STOP;

        spin_lock(&sbinfo->shrinklist_lock);
        list_for_each_safe(pos, next, &sbinfo->shrinklist) {
                info = list_entry(pos, struct shmem_inode_info, shrinklist);

                /* pin the inode */
                inode = igrab(&info->vfs_inode);

                /* inode is about to be evicted */
                if (!inode) {
                        list_del_init(&info->shrinklist);
                        goto next;
                }

                /* Check if there's anything to gain */
                if (round_up(inode->i_size, PAGE_SIZE) ==
                                round_up(inode->i_size, HPAGE_PMD_SIZE)) {
                        list_move(&info->shrinklist, &to_remove);
                        goto next;
                }

                list_move(&info->shrinklist, &list);
next:
                sbinfo->shrinklist_len--;
                if (!--batch)
                        break;
        }
        spin_unlock(&sbinfo->shrinklist_lock);

        list_for_each_safe(pos, next, &to_remove) {
                info = list_entry(pos, struct shmem_inode_info, shrinklist);
                inode = &info->vfs_inode;
                list_del_init(&info->shrinklist);
                iput(inode);
        }

        list_for_each_safe(pos, next, &list) {
                int ret;
                pgoff_t index;

                info = list_entry(pos, struct shmem_inode_info, shrinklist);
                inode = &info->vfs_inode;

                if (nr_to_split && split >= nr_to_split)
                        goto move_back;

                index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT;
                folio = filemap_get_folio(inode->i_mapping, index);
                if (IS_ERR(folio))
                        goto drop;

                /* No huge page at the end of the file: nothing to split */
                if (!folio_test_large(folio)) {
                        folio_put(folio);
                        goto drop;
                }

                /*
                 * Move the inode on the list back to shrinklist if we failed
                 * to lock the page at this time.
                 *
                 * Waiting for the lock may lead to deadlock in the
                 * reclaim path.
                 */
                if (!folio_trylock(folio)) {
                        folio_put(folio);
                        goto move_back;
                }

                ret = split_folio(folio);
                folio_unlock(folio);
                folio_put(folio);

                /* If split failed move the inode on the list back to shrinklist */
                if (ret)
                        goto move_back;

                split++;
drop:
                list_del_init(&info->shrinklist);
                goto put;
move_back:
                /*
                 * Make sure the inode is either on the global list or deleted
                 * from any local list before iput() since it could be deleted
                 * in another thread once we put the inode (then the local list
                 * is corrupted).
                 */
                spin_lock(&sbinfo->shrinklist_lock);
                list_move(&info->shrinklist, &sbinfo->shrinklist);
                sbinfo->shrinklist_len++;
                spin_unlock(&sbinfo->shrinklist_lock);
put:
                iput(inode);
        }

        return split;
}

static long shmem_unused_huge_scan(struct super_block *sb,
                struct shrink_control *sc)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);

        if (!READ_ONCE(sbinfo->shrinklist_len))
                return SHRINK_STOP;

        return shmem_unused_huge_shrink(sbinfo, sc, 0);
}

static long shmem_unused_huge_count(struct super_block *sb,
                struct shrink_control *sc)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        return READ_ONCE(sbinfo->shrinklist_len);
}
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */

#define shmem_huge SHMEM_HUGE_DENY

static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                struct shrink_control *sc, unsigned long nr_to_split)
{
        return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

/*
 * Somewhat like filemap_add_folio, but error if expected item has gone.
 */
static int shmem_add_to_page_cache(struct folio *folio,
                                   struct address_space *mapping,
                                   pgoff_t index, void *expected, gfp_t gfp)
{
        XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
        long nr = folio_nr_pages(folio);

        VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
        VM_BUG_ON(expected && folio_test_large(folio));

        folio_ref_add(folio, nr);
        folio->mapping = mapping;
        folio->index = index;

        gfp &= GFP_RECLAIM_MASK;
        folio_throttle_swaprate(folio, gfp);

        do {
                xas_lock_irq(&xas);
                if (expected != xas_find_conflict(&xas)) {
                        xas_set_err(&xas, -EEXIST);
                        goto unlock;
                }
                if (expected && xas_find_conflict(&xas)) {
                        xas_set_err(&xas, -EEXIST);
                        goto unlock;
                }
                xas_store(&xas, folio);
                if (xas_error(&xas))
                        goto unlock;
                if (folio_test_pmd_mappable(folio))
                        __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
                __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
                __lruvec_stat_mod_folio(folio, NR_SHMEM, nr);
                mapping->nrpages += nr;
unlock:
                xas_unlock_irq(&xas);
        } while (xas_nomem(&xas, gfp));

        if (xas_error(&xas)) {
                folio->mapping = NULL;
                folio_ref_sub(folio, nr);
                return xas_error(&xas);
        }

        return 0;
}

/*
 * Somewhat like filemap_remove_folio, but substitutes swap for @folio.
 */
static void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
{
        struct address_space *mapping = folio->mapping;
        long nr = folio_nr_pages(folio);
        int error;

        xa_lock_irq(&mapping->i_pages);
        error = shmem_replace_entry(mapping, folio->index, folio, radswap);
        folio->mapping = NULL;
        mapping->nrpages -= nr;
        __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
        __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
        xa_unlock_irq(&mapping->i_pages);
        folio_put(folio);
        BUG_ON(error);
}

/*
 * Remove swap entry from page cache, free the swap and its page cache.
 */
static int shmem_free_swap(struct address_space *mapping,
                           pgoff_t index, void *radswap)
{
        void *old;

        old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
        if (old != radswap)
                return -ENOENT;
        free_swap_and_cache(radix_to_swp_entry(radswap));
        return 0;
}

/*
 * Determine (in bytes) how many of the shmem object's pages mapped by the
 * given offsets are swapped out.
 *
 * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
 * as long as the inode doesn't go away and racy results are not a problem.
 */
unsigned long shmem_partial_swap_usage(struct address_space *mapping,
                                                pgoff_t start, pgoff_t end)
{
        XA_STATE(xas, &mapping->i_pages, start);
        struct page *page;
        unsigned long swapped = 0;
        unsigned long max = end - 1;

        rcu_read_lock();
        xas_for_each(&xas, page, max) {
                if (xas_retry(&xas, page))
                        continue;
                if (xa_is_value(page))
                        swapped++;
                if (xas.xa_index == max)
                        break;
                if (need_resched()) {
                        xas_pause(&xas);
                        cond_resched_rcu();
                }
        }
        rcu_read_unlock();

        return swapped << PAGE_SHIFT;
}

/*
 * Determine (in bytes) how many of the shmem object's pages mapped by the
 * given vma is swapped out.
 *
 * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
 * as long as the inode doesn't go away and racy results are not a problem.
 */
unsigned long shmem_swap_usage(struct vm_area_struct *vma)
{
        struct inode *inode = file_inode(vma->vm_file);
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct address_space *mapping = inode->i_mapping;
        unsigned long swapped;

        /* Be careful as we don't hold info->lock */
        swapped = READ_ONCE(info->swapped);

        /*
         * The easier cases are when the shmem object has nothing in swap, or
         * the vma maps it whole. Then we can simply use the stats that we
         * already track.
         */
        if (!swapped)
                return 0;

        if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
                return swapped << PAGE_SHIFT;

        /* Here comes the more involved part */
        return shmem_partial_swap_usage(mapping, vma->vm_pgoff,
                                        vma->vm_pgoff + vma_pages(vma));
}

/*
 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
 */
void shmem_unlock_mapping(struct address_space *mapping)
{
        struct folio_batch fbatch;
        pgoff_t index = 0;

        folio_batch_init(&fbatch);
        /*
         * Minor point, but we might as well stop if someone else SHM_LOCKs it.
         */
        while (!mapping_unevictable(mapping) &&
               filemap_get_folios(mapping, &index, ~0UL, &fbatch)) {
                check_move_unevictable_folios(&fbatch);
                folio_batch_release(&fbatch);
                cond_resched();
        }
}

static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
{
        struct folio *folio;

        /*
         * At first avoid shmem_get_folio(,,,SGP_READ): that fails
         * beyond i_size, and reports fallocated folios as holes.
         */
        folio = filemap_get_entry(inode->i_mapping, index);
        if (!folio)
                return folio;
        if (!xa_is_value(folio)) {
                folio_lock(folio);
                if (folio->mapping == inode->i_mapping)
                        return folio;
                /* The folio has been swapped out */
                folio_unlock(folio);
                folio_put(folio);
        }
        /*
         * But read a folio back from swap if any of it is within i_size
         * (although in some cases this is just a waste of time).
         */
        folio = NULL;
        shmem_get_folio(inode, index, &folio, SGP_READ);
        return folio;
}

/*
 * Remove range of pages and swap entries from page cache, and free them.
 * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
 */
static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                                                 bool unfalloc)
{
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
        pgoff_t end = (lend + 1) >> PAGE_SHIFT;
        struct folio_batch fbatch;
        pgoff_t indices[PAGEVEC_SIZE];
        struct folio *folio;
        bool same_folio;
        long nr_swaps_freed = 0;
        pgoff_t index;
        int i;

        if (lend == -1)
                end = -1;        /* unsigned, so actually very big */

        if (info->fallocend > start && info->fallocend <= end && !unfalloc)
                info->fallocend = start;

        folio_batch_init(&fbatch);
        index = start;
        while (index < end && find_lock_entries(mapping, &index, end - 1,
                        &fbatch, indices)) {
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        folio = fbatch.folios[i];

                        if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
                                nr_swaps_freed += !shmem_free_swap(mapping,
                                                        indices[i], folio);
                                continue;
                        }

                        if (!unfalloc || !folio_test_uptodate(folio))
                                truncate_inode_folio(mapping, folio);
                        folio_unlock(folio);
                }
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
                cond_resched();
        }

        /*
         * When undoing a failed fallocate, we want none of the partial folio
         * zeroing and splitting below, but shall want to truncate the whole
         * folio when !uptodate indicates that it was added by this fallocate,
         * even when [lstart, lend] covers only a part of the folio.
         */
        if (unfalloc)
                goto whole_folios;

        same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
        folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
        if (folio) {
                same_folio = lend < folio_pos(folio) + folio_size(folio);
                folio_mark_dirty(folio);
                if (!truncate_inode_partial_folio(folio, lstart, lend)) {
                        start = folio_next_index(folio);
                        if (same_folio)
                                end = folio->index;
                }
                folio_unlock(folio);
                folio_put(folio);
                folio = NULL;
        }

        if (!same_folio)
                folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT);
        if (folio) {
                folio_mark_dirty(folio);
                if (!truncate_inode_partial_folio(folio, lstart, lend))
                        end = folio->index;
                folio_unlock(folio);
                folio_put(folio);
        }

whole_folios:

        index = start;
        while (index < end) {
                cond_resched();

                if (!find_get_entries(mapping, &index, end - 1, &fbatch,
                                indices)) {
                        /* If all gone or hole-punch or unfalloc, we're done */
                        if (index == start || end != -1)
                                break;
                        /* But if truncating, restart to make sure all gone */
                        index = start;
                        continue;
                }
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        folio = fbatch.folios[i];

                        if (xa_is_value(folio)) {
                                if (unfalloc)
                                        continue;
                                if (shmem_free_swap(mapping, indices[i], folio)) {
                                        /* Swap was replaced by page: retry */
                                        index = indices[i];
                                        break;
                                }
                                nr_swaps_freed++;
                                continue;
                        }

                        folio_lock(folio);

                        if (!unfalloc || !folio_test_uptodate(folio)) {
                                if (folio_mapping(folio) != mapping) {
                                        /* Page was replaced by swap: retry */
                                        folio_unlock(folio);
                                        index = indices[i];
                                        break;
                                }
                                VM_BUG_ON_FOLIO(folio_test_writeback(folio),
                                                folio);

                                if (!folio_test_large(folio)) {
                                        truncate_inode_folio(mapping, folio);
                                } else if (truncate_inode_partial_folio(folio, lstart, lend)) {
                                        /*
                                         * If we split a page, reset the loop so
                                         * that we pick up the new sub pages.
                                         * Otherwise the THP was entirely
                                         * dropped or the target range was
                                         * zeroed, so just continue the loop as
                                         * is.
                                         */
                                        if (!folio_test_large(folio)) {
                                                folio_unlock(folio);
                                                index = start;
                                                break;
                                        }
                                }
                        }
                        folio_unlock(folio);
                }
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
        }

        shmem_recalc_inode(inode, 0, -nr_swaps_freed);
}

void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
        shmem_undo_range(inode, lstart, lend, false);
        inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
        inode_inc_iversion(inode);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);

static int shmem_getattr(struct mnt_idmap *idmap,
                         const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int query_flags)
{
        struct inode *inode = path->dentry->d_inode;
        struct shmem_inode_info *info = SHMEM_I(inode);

        if (info->alloced - info->swapped != inode->i_mapping->nrpages)
                shmem_recalc_inode(inode, 0, 0);

        if (info->fsflags & FS_APPEND_FL)
                stat->attributes |= STATX_ATTR_APPEND;
        if (info->fsflags & FS_IMMUTABLE_FL)
                stat->attributes |= STATX_ATTR_IMMUTABLE;
        if (info->fsflags & FS_NODUMP_FL)
                stat->attributes |= STATX_ATTR_NODUMP;
        stat->attributes_mask |= (STATX_ATTR_APPEND |
                        STATX_ATTR_IMMUTABLE |
                        STATX_ATTR_NODUMP);
        generic_fillattr(idmap, request_mask, inode, stat);

        if (shmem_is_huge(inode, 0, false, NULL, 0))
                stat->blksize = HPAGE_PMD_SIZE;

        if (request_mask & STATX_BTIME) {
                stat->result_mask |= STATX_BTIME;
                stat->btime.tv_sec = info->i_crtime.tv_sec;
                stat->btime.tv_nsec = info->i_crtime.tv_nsec;
        }

        return 0;
}

static int shmem_setattr(struct mnt_idmap *idmap,
                         struct dentry *dentry, struct iattr *attr)
{
        struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);
        int error;
        bool update_mtime = false;
        bool update_ctime = true;

        error = setattr_prepare(idmap, dentry, attr);
        if (error)
                return error;

        if ((info->seals & F_SEAL_EXEC) && (attr->ia_valid & ATTR_MODE)) {
                if ((inode->i_mode ^ attr->ia_mode) & 0111) {
                        return -EPERM;
                }
        }

        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
                loff_t oldsize = inode->i_size;
                loff_t newsize = attr->ia_size;

                /* protected by i_rwsem */
                if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
                    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
                        return -EPERM;

                if (newsize != oldsize) {
                        error = shmem_reacct_size(SHMEM_I(inode)->flags,
                                        oldsize, newsize);
                        if (error)
                                return error;
                        i_size_write(inode, newsize);
                        update_mtime = true;
                } else {
                        update_ctime = false;
                }
                if (newsize <= oldsize) {
                        loff_t holebegin = round_up(newsize, PAGE_SIZE);
                        if (oldsize > holebegin)
                                unmap_mapping_range(inode->i_mapping,
                                                        holebegin, 0, 1);
                        if (info->alloced)
                                shmem_truncate_range(inode,
                                                        newsize, (loff_t)-1);
                        /* unmap again to remove racily COWed private pages */
                        if (oldsize > holebegin)
                                unmap_mapping_range(inode->i_mapping,
                                                        holebegin, 0, 1);
                }
        }

        if (is_quota_modification(idmap, inode, attr)) {
                error = dquot_initialize(inode);
                if (error)
                        return error;
        }

        /* Transfer quota accounting */
        if (i_uid_needs_update(idmap, attr, inode) ||
            i_gid_needs_update(idmap, attr, inode)) {
                error = dquot_transfer(idmap, inode, attr);
                if (error)
                        return error;
        }

        setattr_copy(idmap, inode, attr);
        if (attr->ia_valid & ATTR_MODE)
                error = posix_acl_chmod(idmap, dentry, inode->i_mode);
        if (!error && update_ctime) {
                inode_set_ctime_current(inode);
                if (update_mtime)
                        inode_set_mtime_to_ts(inode, inode_get_ctime(inode));
                inode_inc_iversion(inode);
        }
        return error;
}

static void shmem_evict_inode(struct inode *inode)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        size_t freed = 0;

        if (shmem_mapping(inode->i_mapping)) {
                shmem_unacct_size(info->flags, inode->i_size);
                inode->i_size = 0;
                mapping_set_exiting(inode->i_mapping);
                shmem_truncate_range(inode, 0, (loff_t)-1);
                if (!list_empty(&info->shrinklist)) {
                        spin_lock(&sbinfo->shrinklist_lock);
                        if (!list_empty(&info->shrinklist)) {
                                list_del_init(&info->shrinklist);
                                sbinfo->shrinklist_len--;
                        }
                        spin_unlock(&sbinfo->shrinklist_lock);
                }
                while (!list_empty(&info->swaplist)) {
                        /* Wait while shmem_unuse() is scanning this inode... */
                        wait_var_event(&info->stop_eviction,
                                       !atomic_read(&info->stop_eviction));
                        mutex_lock(&shmem_swaplist_mutex);
                        /* ...but beware of the race if we peeked too early */
                        if (!atomic_read(&info->stop_eviction))
                                list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
        }

        simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
        shmem_free_inode(inode->i_sb, freed);
        WARN_ON(inode->i_blocks);
        clear_inode(inode);
#ifdef CONFIG_TMPFS_QUOTA
        dquot_free_inode(inode);
        dquot_drop(inode);
#endif
}

static int shmem_find_swap_entries(struct address_space *mapping,
                                   pgoff_t start, struct folio_batch *fbatch,
                                   pgoff_t *indices, unsigned int type)
{
        XA_STATE(xas, &mapping->i_pages, start);
        struct folio *folio;
        swp_entry_t entry;

        rcu_read_lock();
        xas_for_each(&xas, folio, ULONG_MAX) {
                if (xas_retry(&xas, folio))
                        continue;

                if (!xa_is_value(folio))
                        continue;

                entry = radix_to_swp_entry(folio);
                /*
                 * swapin error entries can be found in the mapping. But they're
                 * deliberately ignored here as we've done everything we can do.
                 */
                if (swp_type(entry) != type)
                        continue;

                indices[folio_batch_count(fbatch)] = xas.xa_index;
                if (!folio_batch_add(fbatch, folio))
                        break;

                if (need_resched()) {
                        xas_pause(&xas);
                        cond_resched_rcu();
                }
        }
        rcu_read_unlock();

        return xas.xa_index;
}

/*
 * Move the swapped pages for an inode to page cache. Returns the count
 * of pages swapped in, or the error in case of failure.
 */
static int shmem_unuse_swap_entries(struct inode *inode,
                struct folio_batch *fbatch, pgoff_t *indices)
{
        int i = 0;
        int ret = 0;
        int error = 0;
        struct address_space *mapping = inode->i_mapping;

        for (i = 0; i < folio_batch_count(fbatch); i++) {
                struct folio *folio = fbatch->folios[i];

                if (!xa_is_value(folio))
                        continue;
                error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE,
                                        mapping_gfp_mask(mapping), NULL, NULL);
                if (error == 0) {
                        folio_unlock(folio);
                        folio_put(folio);
                        ret++;
                }
                if (error == -ENOMEM)
                        break;
                error = 0;
        }
        return error ? error : ret;
}

/*
 * If swap found in inode, free it and move page from swapcache to filecache.
 */
static int shmem_unuse_inode(struct inode *inode, unsigned int type)
{
        struct address_space *mapping = inode->i_mapping;
        pgoff_t start = 0;
        struct folio_batch fbatch;
        pgoff_t indices[PAGEVEC_SIZE];
        int ret = 0;

        do {
                folio_batch_init(&fbatch);
                shmem_find_swap_entries(mapping, start, &fbatch, indices, type);
                if (folio_batch_count(&fbatch) == 0) {
                        ret = 0;
                        break;
                }

                ret = shmem_unuse_swap_entries(inode, &fbatch, indices);
                if (ret < 0)
                        break;

                start = indices[folio_batch_count(&fbatch) - 1];
        } while (true);

        return ret;
}

/*
 * Read all the shared memory data that resides in the swap
 * device 'type' back into memory, so the swap device can be
 * unused.
 */
int shmem_unuse(unsigned int type)
{
        struct shmem_inode_info *info, *next;
        int error = 0;

        if (list_empty(&shmem_swaplist))
                return 0;

        mutex_lock(&shmem_swaplist_mutex);
        list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
                if (!info->swapped) {
                        list_del_init(&info->swaplist);
                        continue;
                }
                /*
                 * Drop the swaplist mutex while searching the inode for swap;
                 * but before doing so, make sure shmem_evict_inode() will not
                 * remove placeholder inode from swaplist, nor let it be freed
                 * (igrab() would protect from unlink, but not from unmount).
                 */
                atomic_inc(&info->stop_eviction);
                mutex_unlock(&shmem_swaplist_mutex);

                error = shmem_unuse_inode(&info->vfs_inode, type);
                cond_resched();

                mutex_lock(&shmem_swaplist_mutex);
                next = list_next_entry(info, swaplist);
                if (!info->swapped)
                        list_del_init(&info->swaplist);
                if (atomic_dec_and_test(&info->stop_eviction))
                        wake_up_var(&info->stop_eviction);
                if (error)
                        break;
        }
        mutex_unlock(&shmem_swaplist_mutex);

        return error;
}

/*
 * Move the page from the page cache to the swap cache.
 */
static int shmem_writepage(struct page *page, struct writeback_control *wbc)
{
        struct folio *folio = page_folio(page);
        struct address_space *mapping = folio->mapping;
        struct inode *inode = mapping->host;
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        swp_entry_t swap;
        pgoff_t index;

        /*
         * Our capabilities prevent regular writeback or sync from ever calling
         * shmem_writepage; but a stacking filesystem might use ->writepage of
         * its underlying filesystem, in which case tmpfs should write out to
         * swap only in response to memory pressure, and not for the writeback
         * threads or sync.
         */
        if (WARN_ON_ONCE(!wbc->for_reclaim))
                goto redirty;

        if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap))
                goto redirty;

        if (!total_swap_pages)
                goto redirty;

        /*
         * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
         * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
         * and its shmem_writeback() needs them to be split when swapping.
         */
        if (folio_test_large(folio)) {
                /* Ensure the subpages are still dirty */
                folio_test_set_dirty(folio);
                if (split_huge_page(page) < 0)
                        goto redirty;
                folio = page_folio(page);
                folio_clear_dirty(folio);
        }

        index = folio->index;

        /*
         * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
         * value into swapfile.c, the only way we can correctly account for a
         * fallocated folio arriving here is now to initialize it and write it.
         *
         * That's okay for a folio already fallocated earlier, but if we have
         * not yet completed the fallocation, then (a) we want to keep track
         * of this folio in case we have to undo it, and (b) it may not be a
         * good idea to continue anyway, once we're pushing into swap.  So
         * reactivate the folio, and let shmem_fallocate() quit when too many.
         */
        if (!folio_test_uptodate(folio)) {
                if (inode->i_private) {
                        struct shmem_falloc *shmem_falloc;
                        spin_lock(&inode->i_lock);
                        shmem_falloc = inode->i_private;
                        if (shmem_falloc &&
                            !shmem_falloc->waitq &&
                            index >= shmem_falloc->start &&
                            index < shmem_falloc->next)
                                shmem_falloc->nr_unswapped++;
                        else
                                shmem_falloc = NULL;
                        spin_unlock(&inode->i_lock);
                        if (shmem_falloc)
                                goto redirty;
                }
                folio_zero_range(folio, 0, folio_size(folio));
                flush_dcache_folio(folio);
                folio_mark_uptodate(folio);
        }

        swap = folio_alloc_swap(folio);
        if (!swap.val)
                goto redirty;

        /*
         * Add inode to shmem_unuse()'s list of swapped-out inodes,
         * if it's not already there.  Do it now before the folio is
         * moved to swap cache, when its pagelock no longer protects
         * the inode from eviction.  But don't unlock the mutex until
         * we've incremented swapped, because shmem_unuse_inode() will
         * prune a !swapped inode from the swaplist under this mutex.
         */
        mutex_lock(&shmem_swaplist_mutex);
        if (list_empty(&info->swaplist))
                list_add(&info->swaplist, &shmem_swaplist);

        if (add_to_swap_cache(folio, swap,
                        __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
                        NULL) == 0) {
                shmem_recalc_inode(inode, 0, 1);
                swap_shmem_alloc(swap);
                shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));

                mutex_unlock(&shmem_swaplist_mutex);
                BUG_ON(folio_mapped(folio));
                return swap_writepage(&folio->page, wbc);
        }

        mutex_unlock(&shmem_swaplist_mutex);
        put_swap_folio(folio, swap);
redirty:
        folio_mark_dirty(folio);
        if (wbc->for_reclaim)
                return AOP_WRITEPAGE_ACTIVATE;        /* Return with folio locked */
        folio_unlock(folio);
        return 0;
}

#if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
        char buffer[64];

        if (!mpol || mpol->mode == MPOL_DEFAULT)
                return;                /* show nothing */

        mpol_to_str(buffer, sizeof(buffer), mpol);

        seq_printf(seq, ",mpol=%s", buffer);
}

static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
{
        struct mempolicy *mpol = NULL;
        if (sbinfo->mpol) {
                raw_spin_lock(&sbinfo->stat_lock);        /* prevent replace/use races */
                mpol = sbinfo->mpol;
                mpol_get(mpol);
                raw_spin_unlock(&sbinfo->stat_lock);
        }
        return mpol;
}
#else /* !CONFIG_NUMA || !CONFIG_TMPFS */
static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
{
}
static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
{
        return NULL;
}
#endif /* CONFIG_NUMA && CONFIG_TMPFS */

static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
                        pgoff_t index, unsigned int order, pgoff_t *ilx);

static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp,
                        struct shmem_inode_info *info, pgoff_t index)
{
        struct mempolicy *mpol;
        pgoff_t ilx;
        struct folio *folio;

        mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
        folio = swap_cluster_readahead(swap, gfp, mpol, ilx);
        mpol_cond_put(mpol);

        return folio;
}

/*
 * Make sure huge_gfp is always more limited than limit_gfp.
 * Some of the flags set permissions, while others set limitations.
 */
static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
{
        gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
        gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
        gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
        gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);

        /* Allow allocations only from the originally specified zones. */
        result |= zoneflags;

        /*
         * Minimize the result gfp by taking the union with the deny flags,
         * and the intersection of the allow flags.
         */
        result |= (limit_gfp & denyflags);
        result |= (huge_gfp & limit_gfp) & allowflags;

        return result;
}

static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
                struct shmem_inode_info *info, pgoff_t index)
{
        struct mempolicy *mpol;
        pgoff_t ilx;
        struct page *page;

        mpol = shmem_get_pgoff_policy(info, index, HPAGE_PMD_ORDER, &ilx);
        page = alloc_pages_mpol(gfp, HPAGE_PMD_ORDER, mpol, ilx, numa_node_id());
        mpol_cond_put(mpol);

        return page_rmappable_folio(page);
}

static struct folio *shmem_alloc_folio(gfp_t gfp,
                struct shmem_inode_info *info, pgoff_t index)
{
        struct mempolicy *mpol;
        pgoff_t ilx;
        struct page *page;

        mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
        page = alloc_pages_mpol(gfp, 0, mpol, ilx, numa_node_id());
        mpol_cond_put(mpol);

        return (struct folio *)page;
}

static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
                struct inode *inode, pgoff_t index,
                struct mm_struct *fault_mm, bool huge)
{
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct folio *folio;
        long pages;
        int error;

        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                huge = false;

        if (huge) {
                pages = HPAGE_PMD_NR;
                index = round_down(index, HPAGE_PMD_NR);

                /*
                 * Check for conflict before waiting on a huge allocation.
                 * Conflict might be that a huge page has just been allocated
                 * and added to page cache by a racing thread, or that there
                 * is already at least one small page in the huge extent.
                 * Be careful to retry when appropriate, but not forever!
                 * Elsewhere -EEXIST would be the right code, but not here.
                 */
                if (xa_find(&mapping->i_pages, &index,
                                index + HPAGE_PMD_NR - 1, XA_PRESENT))
                        return ERR_PTR(-E2BIG);

                folio = shmem_alloc_hugefolio(gfp, info, index);
                if (!folio)
                        count_vm_event(THP_FILE_FALLBACK);
        } else {
                pages = 1;
                folio = shmem_alloc_folio(gfp, info, index);
        }
        if (!folio)
                return ERR_PTR(-ENOMEM);

        __folio_set_locked(folio);
        __folio_set_swapbacked(folio);

        gfp &= GFP_RECLAIM_MASK;
        error = mem_cgroup_charge(folio, fault_mm, gfp);
        if (error) {
                if (xa_find(&mapping->i_pages, &index,
                                index + pages - 1, XA_PRESENT)) {
                        error = -EEXIST;
                } else if (huge) {
                        count_vm_event(THP_FILE_FALLBACK);
                        count_vm_event(THP_FILE_FALLBACK_CHARGE);
                }
                goto unlock;
        }

        error = shmem_add_to_page_cache(folio, mapping, index, NULL, gfp);
        if (error)
                goto unlock;

        error = shmem_inode_acct_blocks(inode, pages);
        if (error) {
                struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
                long freed;
                /*
                 * Try to reclaim some space by splitting a few
                 * large folios beyond i_size on the filesystem.
                 */
                shmem_unused_huge_shrink(sbinfo, NULL, 2);
                /*
                 * And do a shmem_recalc_inode() to account for freed pages:
                 * except our folio is there in cache, so not quite balanced.
                 */
                spin_lock(&info->lock);
                freed = pages + info->alloced - info->swapped -
                        READ_ONCE(mapping->nrpages);
                if (freed > 0)
                        info->alloced -= freed;
                spin_unlock(&info->lock);
                if (freed > 0)
                        shmem_inode_unacct_blocks(inode, freed);
                error = shmem_inode_acct_blocks(inode, pages);
                if (error) {
                        filemap_remove_folio(folio);
                        goto unlock;
                }
        }

        shmem_recalc_inode(inode, pages, 0);
        folio_add_lru(folio);
        return folio;

unlock:
        folio_unlock(folio);
        folio_put(folio);
        return ERR_PTR(error);
}

/*
 * When a page is moved from swapcache to shmem filecache (either by the
 * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of
 * shmem_unuse_inode()), it may have been read in earlier from swap, in
 * ignorance of the mapping it belongs to.  If that mapping has special
 * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
 * we may need to copy to a suitable page before moving to filecache.
 *
 * In a future release, this may well be extended to respect cpuset and
 * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
 * but for now it is a simple matter of zone.
 */
static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
{
        return folio_zonenum(folio) > gfp_zone(gfp);
}

static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
                                struct shmem_inode_info *info, pgoff_t index)
{
        struct folio *old, *new;
        struct address_space *swap_mapping;
        swp_entry_t entry;
        pgoff_t swap_index;
        int error;

        old = *foliop;
        entry = old->swap;
        swap_index = swp_offset(entry);
        swap_mapping = swap_address_space(entry);

        /*
         * We have arrived here because our zones are constrained, so don't
         * limit chance of success by further cpuset and node constraints.
         */
        gfp &= ~GFP_CONSTRAINT_MASK;
        VM_BUG_ON_FOLIO(folio_test_large(old), old);
        new = shmem_alloc_folio(gfp, info, index);
        if (!new)
                return -ENOMEM;

        folio_get(new);
        folio_copy(new, old);
        flush_dcache_folio(new);

        __folio_set_locked(new);
        __folio_set_swapbacked(new);
        folio_mark_uptodate(new);
        new->swap = entry;
        folio_set_swapcache(new);

        /*
         * Our caller will very soon move newpage out of swapcache, but it's
         * a nice clean interface for us to replace oldpage by newpage there.
         */
        xa_lock_irq(&swap_mapping->i_pages);
        error = shmem_replace_entry(swap_mapping, swap_index, old, new);
        if (!error) {
                mem_cgroup_migrate(old, new);
                __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
                __lruvec_stat_mod_folio(new, NR_SHMEM, 1);
                __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
                __lruvec_stat_mod_folio(old, NR_SHMEM, -1);
        }
        xa_unlock_irq(&swap_mapping->i_pages);

        if (unlikely(error)) {
                /*
                 * Is this possible?  I think not, now that our callers check
                 * both PageSwapCache and page_private after getting page lock;
                 * but be defensive.  Reverse old to newpage for clear and free.
                 */
                old = new;
        } else {
                folio_add_lru(new);
                *foliop = new;
        }

        folio_clear_swapcache(old);
        old->private = NULL;

        folio_unlock(old);
        folio_put_refs(old, 2);
        return error;
}

static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
                                         struct folio *folio, swp_entry_t swap)
{
        struct address_space *mapping = inode->i_mapping;
        swp_entry_t swapin_error;
        void *old;

        swapin_error = make_poisoned_swp_entry();
        old = xa_cmpxchg_irq(&mapping->i_pages, index,
                             swp_to_radix_entry(swap),
                             swp_to_radix_entry(swapin_error), 0);
        if (old != swp_to_radix_entry(swap))
                return;

        folio_wait_writeback(folio);
        delete_from_swap_cache(folio);
        /*
         * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
         * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
         * in shmem_evict_inode().
         */
        shmem_recalc_inode(inode, -1, -1);
        swap_free(swap);
}

/*
 * Swap in the folio pointed to by *foliop.
 * Caller has to make sure that *foliop contains a valid swapped folio.
 * Returns 0 and the folio in foliop if success. On failure, returns the
 * error code and NULL in *foliop.
 */
static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
                             struct folio **foliop, enum sgp_type sgp,
                             gfp_t gfp, struct mm_struct *fault_mm,
                             vm_fault_t *fault_type)
{
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct swap_info_struct *si;
        struct folio *folio = NULL;
        swp_entry_t swap;
        int error;

        VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
        swap = radix_to_swp_entry(*foliop);
        *foliop = NULL;

        if (is_poisoned_swp_entry(swap))
                return -EIO;

        si = get_swap_device(swap);
        if (!si) {
                if (!shmem_confirm_swap(mapping, index, swap))
                        return -EEXIST;
                else
                        return -EINVAL;
        }

        /* Look it up and read it in.. */
        folio = swap_cache_get_folio(swap, NULL, 0);
        if (!folio) {
                /* Or update major stats only when swapin succeeds?? */
                if (fault_type) {
                        *fault_type |= VM_FAULT_MAJOR;
                        count_vm_event(PGMAJFAULT);
                        count_memcg_event_mm(fault_mm, PGMAJFAULT);
                }
                /* Here we actually start the io */
                folio = shmem_swapin_cluster(swap, gfp, info, index);
                if (!folio) {
                        error = -ENOMEM;
                        goto failed;
                }
        }

        /* We have to do this with folio locked to prevent races */
        folio_lock(folio);
        if (!folio_test_swapcache(folio) ||
            folio->swap.val != swap.val ||
            !shmem_confirm_swap(mapping, index, swap)) {
                error = -EEXIST;
                goto unlock;
        }
        if (!folio_test_uptodate(folio)) {
                error = -EIO;
                goto failed;
        }
        folio_wait_writeback(folio);

        /*
         * Some architectures may have to restore extra metadata to the
         * folio after reading from swap.
         */
        arch_swap_restore(swap, folio);

        if (shmem_should_replace_folio(folio, gfp)) {
                error = shmem_replace_folio(&folio, gfp, info, index);
                if (error)
                        goto failed;
        }

        error = shmem_add_to_page_cache(folio, mapping, index,
                                        swp_to_radix_entry(swap), gfp);
        if (error)
                goto failed;

        shmem_recalc_inode(inode, 0, -1);

        if (sgp == SGP_WRITE)
                folio_mark_accessed(folio);

        delete_from_swap_cache(folio);
        folio_mark_dirty(folio);
        swap_free(swap);
        put_swap_device(si);

        *foliop = folio;
        return 0;
failed:
        if (!shmem_confirm_swap(mapping, index, swap))
                error = -EEXIST;
        if (error == -EIO)
                shmem_set_folio_swapin_error(inode, index, folio, swap);
unlock:
        if (folio) {
                folio_unlock(folio);
                folio_put(folio);
        }
        put_swap_device(si);

        return error;
}

/*
 * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate
 *
 * If we allocate a new one we do not mark it dirty. That's up to the
 * vm. If we swap it in we mark it dirty since we also free the swap
 * entry since a page cannot live in both the swap and page cache.
 *
 * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL.
 */
static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
                struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
                struct vm_fault *vmf, vm_fault_t *fault_type)
{
        struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
        struct mm_struct *fault_mm;
        struct folio *folio;
        int error;
        bool alloced;

        if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping)))
                return -EINVAL;

        if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
                return -EFBIG;
repeat:
        if (sgp <= SGP_CACHE &&
            ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode))
                return -EINVAL;

        alloced = false;
        fault_mm = vma ? vma->vm_mm : NULL;

        folio = filemap_get_entry(inode->i_mapping, index);
        if (folio && vma && userfaultfd_minor(vma)) {
                if (!xa_is_value(folio))
                        folio_put(folio);
                *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
                return 0;
        }

        if (xa_is_value(folio)) {
                error = shmem_swapin_folio(inode, index, &folio,
                                           sgp, gfp, fault_mm, fault_type);
                if (error == -EEXIST)
                        goto repeat;

                *foliop = folio;
                return error;
        }

        if (folio) {
                folio_lock(folio);

                /* Has the folio been truncated or swapped out? */
                if (unlikely(folio->mapping != inode->i_mapping)) {
                        folio_unlock(folio);
                        folio_put(folio);
                        goto repeat;
                }
                if (sgp == SGP_WRITE)
                        folio_mark_accessed(folio);
                if (folio_test_uptodate(folio))
                        goto out;
                /* fallocated folio */
                if (sgp != SGP_READ)
                        goto clear;
                folio_unlock(folio);
                folio_put(folio);
        }

        /*
         * SGP_READ: succeed on hole, with NULL folio, letting caller zero.
         * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail.
         */
        *foliop = NULL;
        if (sgp == SGP_READ)
                return 0;
        if (sgp == SGP_NOALLOC)
                return -ENOENT;

        /*
         * Fast cache lookup and swap lookup did not find it: allocate.
         */

        if (vma && userfaultfd_missing(vma)) {
                *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
                return 0;
        }

        if (shmem_is_huge(inode, index, false, fault_mm,
                          vma ? vma->vm_flags : 0)) {
                gfp_t huge_gfp;

                huge_gfp = vma_thp_gfp_mask(vma);
                huge_gfp = limit_gfp_mask(huge_gfp, gfp);
                folio = shmem_alloc_and_add_folio(huge_gfp,
                                inode, index, fault_mm, true);
                if (!IS_ERR(folio)) {
                        count_vm_event(THP_FILE_ALLOC);
                        goto alloced;
                }
                if (PTR_ERR(folio) == -EEXIST)
                        goto repeat;
        }

        folio = shmem_alloc_and_add_folio(gfp, inode, index, fault_mm, false);
        if (IS_ERR(folio)) {
                error = PTR_ERR(folio);
                if (error == -EEXIST)
                        goto repeat;
                folio = NULL;
                goto unlock;
        }

alloced:
        alloced = true;
        if (folio_test_pmd_mappable(folio) &&
            DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
                                        folio_next_index(folio) - 1) {
                struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
                struct shmem_inode_info *info = SHMEM_I(inode);
                /*
                 * Part of the large folio is beyond i_size: subject
                 * to shrink under memory pressure.
                 */
                spin_lock(&sbinfo->shrinklist_lock);
                /*
                 * _careful to defend against unlocked access to
                 * ->shrink_list in shmem_unused_huge_shrink()
                 */
                if (list_empty_careful(&info->shrinklist)) {
                        list_add_tail(&info->shrinklist,
                                      &sbinfo->shrinklist);
                        sbinfo->shrinklist_len++;
                }
                spin_unlock(&sbinfo->shrinklist_lock);
        }

        if (sgp == SGP_WRITE)
                folio_set_referenced(folio);
        /*
         * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio.
         */
        if (sgp == SGP_FALLOC)
                sgp = SGP_WRITE;
clear:
        /*
         * Let SGP_WRITE caller clear ends if write does not fill folio;
         * but SGP_FALLOC on a folio fallocated earlier must initialize
         * it now, lest undo on failure cancel our earlier guarantee.
         */
        if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) {
                long i, n = folio_nr_pages(folio);

                for (i = 0; i < n; i++)
                        clear_highpage(folio_page(folio, i));
                flush_dcache_folio(folio);
                folio_mark_uptodate(folio);
        }

        /* Perhaps the file has been truncated since we checked */
        if (sgp <= SGP_CACHE &&
            ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
                error = -EINVAL;
                goto unlock;
        }
out:
        *foliop = folio;
        return 0;

        /*
         * Error recovery.
         */
unlock:
        if (alloced)
                filemap_remove_folio(folio);
        shmem_recalc_inode(inode, 0, 0);
        if (folio) {
                folio_unlock(folio);
                folio_put(folio);
        }
        return error;
}

/**
 * shmem_get_folio - find, and lock a shmem folio.
 * @inode:        inode to search
 * @index:        the page index.
 * @foliop:        pointer to the folio if found
 * @sgp:        SGP_* flags to control behavior
 *
 * Looks up the page cache entry at @inode & @index.  If a folio is
 * present, it is returned locked with an increased refcount.
 *
 * If the caller modifies data in the folio, it must call folio_mark_dirty()
 * before unlocking the folio to ensure that the folio is not reclaimed.
 * There is no need to reserve space before calling folio_mark_dirty().
 *
 * When no folio is found, the behavior depends on @sgp:
 *  - for SGP_READ, *@foliop is %NULL and 0 is returned
 *  - for SGP_NOALLOC, *@foliop is %NULL and -ENOENT is returned
 *  - for all other flags a new folio is allocated, inserted into the
 *    page cache and returned locked in @foliop.
 *
 * Context: May sleep.
 * Return: 0 if successful, else a negative error code.
 */
int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
                enum sgp_type sgp)
{
        return shmem_get_folio_gfp(inode, index, foliop, sgp,
                        mapping_gfp_mask(inode->i_mapping), NULL, NULL);
}
EXPORT_SYMBOL_GPL(shmem_get_folio);

/*
 * This is like autoremove_wake_function, but it removes the wait queue
 * entry unconditionally - even if something else had already woken the
 * target.
 */
static int synchronous_wake_function(wait_queue_entry_t *wait,
                        unsigned int mode, int sync, void *key)
{
        int ret = default_wake_function(wait, mode, sync, key);
        list_del_init(&wait->entry);
        return ret;
}

/*
 * Trinity finds that probing a hole which tmpfs is punching can
 * prevent the hole-punch from ever completing: which in turn
 * locks writers out with its hold on i_rwsem.  So refrain from
 * faulting pages into the hole while it's being punched.  Although
 * shmem_undo_range() does remove the additions, it may be unable to
 * keep up, as each new page needs its own unmap_mapping_range() call,
 * and the i_mmap tree grows ever slower to scan if new vmas are added.
 *
 * It does not matter if we sometimes reach this check just before the
 * hole-punch begins, so that one fault then races with the punch:
 * we just need to make racing faults a rare case.
 *
 * The implementation below would be much simpler if we just used a
 * standard mutex or completion: but we cannot take i_rwsem in fault,
 * and bloating every shmem inode for this unlikely case would be sad.
 */
static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode)
{
        struct shmem_falloc *shmem_falloc;
        struct file *fpin = NULL;
        vm_fault_t ret = 0;

        spin_lock(&inode->i_lock);
        shmem_falloc = inode->i_private;
        if (shmem_falloc &&
            shmem_falloc->waitq &&
            vmf->pgoff >= shmem_falloc->start &&
            vmf->pgoff < shmem_falloc->next) {
                wait_queue_head_t *shmem_falloc_waitq;
                DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);

                ret = VM_FAULT_NOPAGE;
                fpin = maybe_unlock_mmap_for_io(vmf, NULL);
                shmem_falloc_waitq = shmem_falloc->waitq;
                prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
                                TASK_UNINTERRUPTIBLE);
                spin_unlock(&inode->i_lock);
                schedule();

                /*
                 * shmem_falloc_waitq points into the shmem_fallocate()
                 * stack of the hole-punching task: shmem_falloc_waitq
                 * is usually invalid by the time we reach here, but
                 * finish_wait() does not dereference it in that case;
                 * though i_lock needed lest racing with wake_up_all().
                 */
                spin_lock(&inode->i_lock);
                finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
        }
        spin_unlock(&inode->i_lock);
        if (fpin) {
                fput(fpin);
                ret = VM_FAULT_RETRY;
        }
        return ret;
}

static vm_fault_t shmem_fault(struct vm_fault *vmf)
{
        struct inode *inode = file_inode(vmf->vma->vm_file);
        gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        struct folio *folio = NULL;
        vm_fault_t ret = 0;
        int err;

        /*
         * Trinity finds that probing a hole which tmpfs is punching can
         * prevent the hole-punch from ever completing: noted in i_private.
         */
        if (unlikely(inode->i_private)) {
                ret = shmem_falloc_wait(vmf, inode);
                if (ret)
                        return ret;
        }

        WARN_ON_ONCE(vmf->page != NULL);
        err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
                                  gfp, vmf, &ret);
        if (err)
                return vmf_error(err);
        if (folio) {
                vmf->page = folio_file_page(folio, vmf->pgoff);
                ret |= VM_FAULT_LOCKED;
        }
        return ret;
}

unsigned long shmem_get_unmapped_area(struct file *file,
                                      unsigned long uaddr, unsigned long len,
                                      unsigned long pgoff, unsigned long flags)
{
        unsigned long (*get_area)(struct file *,
                unsigned long, unsigned long, unsigned long, unsigned long);
        unsigned long addr;
        unsigned long offset;
        unsigned long inflated_len;
        unsigned long inflated_addr;
        unsigned long inflated_offset;

        if (len > TASK_SIZE)
                return -ENOMEM;

        get_area = current->mm->get_unmapped_area;
        addr = get_area(file, uaddr, len, pgoff, flags);

        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                return addr;
        if (IS_ERR_VALUE(addr))
                return addr;
        if (addr & ~PAGE_MASK)
                return addr;
        if (addr > TASK_SIZE - len)
                return addr;

        if (shmem_huge == SHMEM_HUGE_DENY)
                return addr;
        if (len < HPAGE_PMD_SIZE)
                return addr;
        if (flags & MAP_FIXED)
                return addr;
        /*
         * Our priority is to support MAP_SHARED mapped hugely;
         * and support MAP_PRIVATE mapped hugely too, until it is COWed.
         * But if caller specified an address hint and we allocated area there
         * successfully, respect that as before.
         */
        if (uaddr == addr)
                return addr;

        if (shmem_huge != SHMEM_HUGE_FORCE) {
                struct super_block *sb;

                if (file) {
                        VM_BUG_ON(file->f_op != &shmem_file_operations);
                        sb = file_inode(file)->i_sb;
                } else {
                        /*
                         * Called directly from mm/mmap.c, or drivers/char/mem.c
                         * for "/dev/zero", to create a shared anonymous object.
                         */
                        if (IS_ERR(shm_mnt))
                                return addr;
                        sb = shm_mnt->mnt_sb;
                }
                if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
                        return addr;
        }

        offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
        if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
                return addr;
        if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
                return addr;

        inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
        if (inflated_len > TASK_SIZE)
                return addr;
        if (inflated_len < len)
                return addr;

        inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
        if (IS_ERR_VALUE(inflated_addr))
                return addr;
        if (inflated_addr & ~PAGE_MASK)
                return addr;

        inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
        inflated_addr += offset - inflated_offset;
        if (inflated_offset > offset)
                inflated_addr += HPAGE_PMD_SIZE;

        if (inflated_addr > TASK_SIZE - len)
                return addr;
        return inflated_addr;
}

#ifdef CONFIG_NUMA
static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
{
        struct inode *inode = file_inode(vma->vm_file);
        return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
}

static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
                                          unsigned long addr, pgoff_t *ilx)
{
        struct inode *inode = file_inode(vma->vm_file);
        pgoff_t index;

        /*
         * Bias interleave by inode number to distribute better across nodes;
         * but this interface is independent of which page order is used, so
         * supplies only that bias, letting caller apply the offset (adjusted
         * by page order, as in shmem_get_pgoff_policy() and get_vma_policy()).
         */
        *ilx = inode->i_ino;
        index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
        return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
}

static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
                        pgoff_t index, unsigned int order, pgoff_t *ilx)
{
        struct mempolicy *mpol;

        /* Bias interleave by inode number to distribute better across nodes */
        *ilx = info->vfs_inode.i_ino + (index >> order);

        mpol = mpol_shared_policy_lookup(&info->policy, index);
        return mpol ? mpol : get_task_policy(current);
}
#else
static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
                        pgoff_t index, unsigned int order, pgoff_t *ilx)
{
        *ilx = 0;
        return NULL;
}
#endif /* CONFIG_NUMA */

int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
{
        struct inode *inode = file_inode(file);
        struct shmem_inode_info *info = SHMEM_I(inode);
        int retval = -ENOMEM;

        /*
         * What serializes the accesses to info->flags?
         * ipc_lock_object() when called from shmctl_do_lock(),
         * no serialization needed when called from shm_destroy().
         */
        if (lock && !(info->flags & VM_LOCKED)) {
                if (!user_shm_lock(inode->i_size, ucounts))
                        goto out_nomem;
                info->flags |= VM_LOCKED;
                mapping_set_unevictable(file->f_mapping);
        }
        if (!lock && (info->flags & VM_LOCKED) && ucounts) {
                user_shm_unlock(inode->i_size, ucounts);
                info->flags &= ~VM_LOCKED;
                mapping_clear_unevictable(file->f_mapping);
        }
        retval = 0;

out_nomem:
        return retval;
}

static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct inode *inode = file_inode(file);
        struct shmem_inode_info *info = SHMEM_I(inode);
        int ret;

        ret = seal_check_write(info->seals, vma);
        if (ret)
                return ret;

        /* arm64 - allow memory tagging on RAM-based files */
        vm_flags_set(vma, VM_MTE_ALLOWED);

        file_accessed(file);
        /* This is anonymous shared memory if it is unlinked at the time of mmap */
        if (inode->i_nlink)
                vma->vm_ops = &shmem_vm_ops;
        else
                vma->vm_ops = &shmem_anon_vm_ops;
        return 0;
}

static int shmem_file_open(struct inode *inode, struct file *file)
{
        file->f_mode |= FMODE_CAN_ODIRECT;
        return generic_file_open(inode, file);
}

#ifdef CONFIG_TMPFS_XATTR
static int shmem_initxattrs(struct inode *, const struct xattr *, void *);

/*
 * chattr's fsflags are unrelated to extended attributes,
 * but tmpfs has chosen to enable them under the same config option.
 */
static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
{
        unsigned int i_flags = 0;

        if (fsflags & FS_NOATIME_FL)
                i_flags |= S_NOATIME;
        if (fsflags & FS_APPEND_FL)
                i_flags |= S_APPEND;
        if (fsflags & FS_IMMUTABLE_FL)
                i_flags |= S_IMMUTABLE;
        /*
         * But FS_NODUMP_FL does not require any action in i_flags.
         */
        inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE);
}
#else
static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
{
}
#define shmem_initxattrs NULL
#endif

static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
{
        return &SHMEM_I(inode)->dir_offsets;
}

static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
                                             struct super_block *sb,
                                             struct inode *dir, umode_t mode,
                                             dev_t dev, unsigned long flags)
{
        struct inode *inode;
        struct shmem_inode_info *info;
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        ino_t ino;
        int err;

        err = shmem_reserve_inode(sb, &ino);
        if (err)
                return ERR_PTR(err);

        inode = new_inode(sb);
        if (!inode) {
                shmem_free_inode(sb, 0);
                return ERR_PTR(-ENOSPC);
        }

        inode->i_ino = ino;
        inode_init_owner(idmap, inode, dir, mode);
        inode->i_blocks = 0;
        simple_inode_init_ts(inode);
        inode->i_generation = get_random_u32();
        info = SHMEM_I(inode);
        memset(info, 0, (char *)inode - (char *)info);
        spin_lock_init(&info->lock);
        atomic_set(&info->stop_eviction, 0);
        info->seals = F_SEAL_SEAL;
        info->flags = flags & VM_NORESERVE;
        info->i_crtime = inode_get_mtime(inode);
        info->fsflags = (dir == NULL) ? 0 :
                SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
        if (info->fsflags)
                shmem_set_inode_flags(inode, info->fsflags);
        INIT_LIST_HEAD(&info->shrinklist);
        INIT_LIST_HEAD(&info->swaplist);
        simple_xattrs_init(&info->xattrs);
        cache_no_acl(inode);
        if (sbinfo->noswap)
                mapping_set_unevictable(inode->i_mapping);
        mapping_set_large_folios(inode->i_mapping);

        switch (mode & S_IFMT) {
        default:
                inode->i_op = &shmem_special_inode_operations;
                init_special_inode(inode, mode, dev);
                break;
        case S_IFREG:
                inode->i_mapping->a_ops = &shmem_aops;
                inode->i_op = &shmem_inode_operations;
                inode->i_fop = &shmem_file_operations;
                mpol_shared_policy_init(&info->policy,
                                         shmem_get_sbmpol(sbinfo));
                break;
        case S_IFDIR:
                inc_nlink(inode);
                /* Some things misbehave if size == 0 on a directory */
                inode->i_size = 2 * BOGO_DIRENT_SIZE;
                inode->i_op = &shmem_dir_inode_operations;
                inode->i_fop = &simple_offset_dir_operations;
                simple_offset_init(shmem_get_offset_ctx(inode));
                break;
        case S_IFLNK:
                /*
                 * Must not load anything in the rbtree,
                 * mpol_free_shared_policy will not be called.
                 */
                mpol_shared_policy_init(&info->policy, NULL);
                break;
        }

        lockdep_annotate_inode_mutex_key(inode);
        return inode;
}

#ifdef CONFIG_TMPFS_QUOTA
static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
                                     struct super_block *sb, struct inode *dir,
                                     umode_t mode, dev_t dev, unsigned long flags)
{
        int err;
        struct inode *inode;

        inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
        if (IS_ERR(inode))
                return inode;

        err = dquot_initialize(inode);
        if (err)
                goto errout;

        err = dquot_alloc_inode(inode);
        if (err) {
                dquot_drop(inode);
                goto errout;
        }
        return inode;

errout:
        inode->i_flags |= S_NOQUOTA;
        iput(inode);
        return ERR_PTR(err);
}
#else
static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
                                     struct super_block *sb, struct inode *dir,
                                     umode_t mode, dev_t dev, unsigned long flags)
{
        return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
}
#endif /* CONFIG_TMPFS_QUOTA */

#ifdef CONFIG_USERFAULTFD
int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
                           struct vm_area_struct *dst_vma,
                           unsigned long dst_addr,
                           unsigned long src_addr,
                           uffd_flags_t flags,
                           struct folio **foliop)
{
        struct inode *inode = file_inode(dst_vma->vm_file);
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct address_space *mapping = inode->i_mapping;
        gfp_t gfp = mapping_gfp_mask(mapping);
        pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
        void *page_kaddr;
        struct folio *folio;
        int ret;
        pgoff_t max_off;

        if (shmem_inode_acct_blocks(inode, 1)) {
                /*
                 * We may have got a page, returned -ENOENT triggering a retry,
                 * and now we find ourselves with -ENOMEM. Release the page, to
                 * avoid a BUG_ON in our caller.
                 */
                if (unlikely(*foliop)) {
                        folio_put(*foliop);
                        *foliop = NULL;
                }
                return -ENOMEM;
        }

        if (!*foliop) {
                ret = -ENOMEM;
                folio = shmem_alloc_folio(gfp, info, pgoff);
                if (!folio)
                        goto out_unacct_blocks;

                if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) {
                        page_kaddr = kmap_local_folio(folio, 0);
                        /*
                         * The read mmap_lock is held here.  Despite the
                         * mmap_lock being read recursive a deadlock is still
                         * possible if a writer has taken a lock.  For example:
                         *
                         * process A thread 1 takes read lock on own mmap_lock
                         * process A thread 2 calls mmap, blocks taking write lock
                         * process B thread 1 takes page fault, read lock on own mmap lock
                         * process B thread 2 calls mmap, blocks taking write lock
                         * process A thread 1 blocks taking read lock on process B
                         * process B thread 1 blocks taking read lock on process A
                         *
                         * Disable page faults to prevent potential deadlock
                         * and retry the copy outside the mmap_lock.
                         */
                        pagefault_disable();
                        ret = copy_from_user(page_kaddr,
                                             (const void __user *)src_addr,
                                             PAGE_SIZE);
                        pagefault_enable();
                        kunmap_local(page_kaddr);

                        /* fallback to copy_from_user outside mmap_lock */
                        if (unlikely(ret)) {
                                *foliop = folio;
                                ret = -ENOENT;
                                /* don't free the page */
                                goto out_unacct_blocks;
                        }

                        flush_dcache_folio(folio);
                } else {                /* ZEROPAGE */
                        clear_user_highpage(&folio->page, dst_addr);
                }
        } else {
                folio = *foliop;
                VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
                *foliop = NULL;
        }

        VM_BUG_ON(folio_test_locked(folio));
        VM_BUG_ON(folio_test_swapbacked(folio));
        __folio_set_locked(folio);
        __folio_set_swapbacked(folio);
        __folio_mark_uptodate(folio);

        ret = -EFAULT;
        max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
        if (unlikely(pgoff >= max_off))
                goto out_release;

        ret = mem_cgroup_charge(folio, dst_vma->vm_mm, gfp);
        if (ret)
                goto out_release;
        ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp);
        if (ret)
                goto out_release;

        ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
                                       &folio->page, true, flags);
        if (ret)
                goto out_delete_from_cache;

        shmem_recalc_inode(inode, 1, 0);
        folio_unlock(folio);
        return 0;
out_delete_from_cache:
        filemap_remove_folio(folio);
out_release:
        folio_unlock(folio);
        folio_put(folio);
out_unacct_blocks:
        shmem_inode_unacct_blocks(inode, 1);
        return ret;
}
#endif /* CONFIG_USERFAULTFD */

#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;

static int
shmem_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata)
{
        struct inode *inode = mapping->host;
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t index = pos >> PAGE_SHIFT;
        struct folio *folio;
        int ret = 0;

        /* i_rwsem is held by caller */
        if (unlikely(info->seals & (F_SEAL_GROW |
                                   F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
                if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
                        return -EPERM;
                if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
                        return -EPERM;
        }

        ret = shmem_get_folio(inode, index, &folio, SGP_WRITE);
        if (ret)
                return ret;

        *pagep = folio_file_page(folio, index);
        if (PageHWPoison(*pagep)) {
                folio_unlock(folio);
                folio_put(folio);
                *pagep = NULL;
                return -EIO;
        }

        return 0;
}

static int
shmem_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
{
        struct folio *folio = page_folio(page);
        struct inode *inode = mapping->host;

        if (pos + copied > inode->i_size)
                i_size_write(inode, pos + copied);

        if (!folio_test_uptodate(folio)) {
                if (copied < folio_size(folio)) {
                        size_t from = offset_in_folio(folio, pos);
                        folio_zero_segments(folio, 0, from,
                                        from + copied, folio_size(folio));
                }
                folio_mark_uptodate(folio);
        }
        folio_mark_dirty(folio);
        folio_unlock(folio);
        folio_put(folio);

        return copied;
}

static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        pgoff_t index;
        unsigned long offset;
        int error = 0;
        ssize_t retval = 0;
        loff_t *ppos = &iocb->ki_pos;

        index = *ppos >> PAGE_SHIFT;
        offset = *ppos & ~PAGE_MASK;

        for (;;) {
                struct folio *folio = NULL;
                struct page *page = NULL;
                pgoff_t end_index;
                unsigned long nr, ret;
                loff_t i_size = i_size_read(inode);

                end_index = i_size >> PAGE_SHIFT;
                if (index > end_index)
                        break;
                if (index == end_index) {
                        nr = i_size & ~PAGE_MASK;
                        if (nr <= offset)
                                break;
                }

                error = shmem_get_folio(inode, index, &folio, SGP_READ);
                if (error) {
                        if (error == -EINVAL)
                                error = 0;
                        break;
                }
                if (folio) {
                        folio_unlock(folio);

                        page = folio_file_page(folio, index);
                        if (PageHWPoison(page)) {
                                folio_put(folio);
                                error = -EIO;
                                break;
                        }
                }

                /*
                 * We must evaluate after, since reads (unlike writes)
                 * are called without i_rwsem protection against truncate
                 */
                nr = PAGE_SIZE;
                i_size = i_size_read(inode);
                end_index = i_size >> PAGE_SHIFT;
                if (index == end_index) {
                        nr = i_size & ~PAGE_MASK;
                        if (nr <= offset) {
                                if (folio)
                                        folio_put(folio);
                                break;
                        }
                }
                nr -= offset;

                if (folio) {
                        /*
                         * If users can be writing to this page using arbitrary
                         * virtual addresses, take care about potential aliasing
                         * before reading the page on the kernel side.
                         */
                        if (mapping_writably_mapped(mapping))
                                flush_dcache_page(page);
                        /*
                         * Mark the page accessed if we read the beginning.
                         */
                        if (!offset)
                                folio_mark_accessed(folio);
                        /*
                         * Ok, we have the page, and it's up-to-date, so
                         * now we can copy it to user space...
                         */
                        ret = copy_page_to_iter(page, offset, nr, to);
                        folio_put(folio);

                } else if (user_backed_iter(to)) {
                        /*
                         * Copy to user tends to be so well optimized, but
                         * clear_user() not so much, that it is noticeably
                         * faster to copy the zero page instead of clearing.
                         */
                        ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to);
                } else {
                        /*
                         * But submitting the same page twice in a row to
                         * splice() - or others? - can result in confusion:
                         * so don't attempt that optimization on pipes etc.
                         */
                        ret = iov_iter_zero(nr, to);
                }

                retval += ret;
                offset += ret;
                index += offset >> PAGE_SHIFT;
                offset &= ~PAGE_MASK;

                if (!iov_iter_count(to))
                        break;
                if (ret < nr) {
                        error = -EFAULT;
                        break;
                }
                cond_resched();
        }

        *ppos = ((loff_t) index << PAGE_SHIFT) + offset;
        file_accessed(file);
        return retval ? retval : error;
}

static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;

        inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret <= 0)
                goto unlock;
        ret = file_remove_privs(file);
        if (ret)
                goto unlock;
        ret = file_update_time(file);
        if (ret)
                goto unlock;
        ret = generic_perform_write(iocb, from);
unlock:
        inode_unlock(inode);
        return ret;
}

static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
                              struct pipe_buffer *buf)
{
        return true;
}

static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
                                  struct pipe_buffer *buf)
{
}

static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
                                    struct pipe_buffer *buf)
{
        return false;
}

static const struct pipe_buf_operations zero_pipe_buf_ops = {
        .release        = zero_pipe_buf_release,
        .try_steal        = zero_pipe_buf_try_steal,
        .get                = zero_pipe_buf_get,
};

static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
                                        loff_t fpos, size_t size)
{
        size_t offset = fpos & ~PAGE_MASK;

        size = min_t(size_t, size, PAGE_SIZE - offset);

        if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
                struct pipe_buffer *buf = pipe_head_buf(pipe);

                *buf = (struct pipe_buffer) {
                        .ops        = &zero_pipe_buf_ops,
                        .page        = ZERO_PAGE(0),
                        .offset        = offset,
                        .len        = size,
                };
                pipe->head++;
        }

        return size;
}

static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
                                      struct pipe_inode_info *pipe,
                                      size_t len, unsigned int flags)
{
        struct inode *inode = file_inode(in);
        struct address_space *mapping = inode->i_mapping;
        struct folio *folio = NULL;
        size_t total_spliced = 0, used, npages, n, part;
        loff_t isize;
        int error = 0;

        /* Work out how much data we can actually add into the pipe */
        used = pipe_occupancy(pipe->head, pipe->tail);
        npages = max_t(ssize_t, pipe->max_usage - used, 0);
        len = min_t(size_t, len, npages * PAGE_SIZE);

        do {
                if (*ppos >= i_size_read(inode))
                        break;

                error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
                                        SGP_READ);
                if (error) {
                        if (error == -EINVAL)
                                error = 0;
                        break;
                }
                if (folio) {
                        folio_unlock(folio);

                        if (folio_test_hwpoison(folio) ||
                            (folio_test_large(folio) &&
                             folio_test_has_hwpoisoned(folio))) {
                                error = -EIO;
                                break;
                        }
                }

                /*
                 * i_size must be checked after we know the pages are Uptodate.
                 *
                 * Checking i_size after the check allows us to calculate
                 * the correct value for "nr", which means the zero-filled
                 * part of the page is not copied back to userspace (unless
                 * another truncate extends the file - this is desired though).
                 */
                isize = i_size_read(inode);
                if (unlikely(*ppos >= isize))
                        break;
                part = min_t(loff_t, isize - *ppos, len);

                if (folio) {
                        /*
                         * If users can be writing to this page using arbitrary
                         * virtual addresses, take care about potential aliasing
                         * before reading the page on the kernel side.
                         */
                        if (mapping_writably_mapped(mapping))
                                flush_dcache_folio(folio);
                        folio_mark_accessed(folio);
                        /*
                         * Ok, we have the page, and it's up-to-date, so we can
                         * now splice it into the pipe.
                         */
                        n = splice_folio_into_pipe(pipe, folio, *ppos, part);
                        folio_put(folio);
                        folio = NULL;
                } else {
                        n = splice_zeropage_into_pipe(pipe, *ppos, part);
                }

                if (!n)
                        break;
                len -= n;
                total_spliced += n;
                *ppos += n;
                in->f_ra.prev_pos = *ppos;
                if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
                        break;

                cond_resched();
        } while (len);

        if (folio)
                folio_put(folio);

        file_accessed(in);
        return total_spliced ? total_spliced : error;
}

static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;

        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
        if (offset < 0)
                return -ENXIO;

        inode_lock(inode);
        /* We're holding i_rwsem so we can access i_size directly */
        offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
        inode_unlock(inode);
        return offset;
}

static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                                                         loff_t len)
{
        struct inode *inode = file_inode(file);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_falloc shmem_falloc;
        pgoff_t start, index, end, undo_fallocend;
        int error;

        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;

        inode_lock(inode);

        if (mode & FALLOC_FL_PUNCH_HOLE) {
                struct address_space *mapping = file->f_mapping;
                loff_t unmap_start = round_up(offset, PAGE_SIZE);
                loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
                DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);

                /* protected by i_rwsem */
                if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
                        error = -EPERM;
                        goto out;
                }

                shmem_falloc.waitq = &shmem_falloc_waitq;
                shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
                shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
                spin_lock(&inode->i_lock);
                inode->i_private = &shmem_falloc;
                spin_unlock(&inode->i_lock);

                if ((u64)unmap_end > (u64)unmap_start)
                        unmap_mapping_range(mapping, unmap_start,
                                            1 + unmap_end - unmap_start, 0);
                shmem_truncate_range(inode, offset, offset + len - 1);
                /* No need to unmap again: hole-punching leaves COWed pages */

                spin_lock(&inode->i_lock);
                inode->i_private = NULL;
                wake_up_all(&shmem_falloc_waitq);
                WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
                spin_unlock(&inode->i_lock);
                error = 0;
                goto out;
        }

        /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
        error = inode_newsize_ok(inode, offset + len);
        if (error)
                goto out;

        if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
                error = -EPERM;
                goto out;
        }

        start = offset >> PAGE_SHIFT;
        end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        /* Try to avoid a swapstorm if len is impossible to satisfy */
        if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
                error = -ENOSPC;
                goto out;
        }

        shmem_falloc.waitq = NULL;
        shmem_falloc.start = start;
        shmem_falloc.next  = start;
        shmem_falloc.nr_falloced = 0;
        shmem_falloc.nr_unswapped = 0;
        spin_lock(&inode->i_lock);
        inode->i_private = &shmem_falloc;
        spin_unlock(&inode->i_lock);

        /*
         * info->fallocend is only relevant when huge pages might be
         * involved: to prevent split_huge_page() freeing fallocated
         * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size.
         */
        undo_fallocend = info->fallocend;
        if (info->fallocend < end)
                info->fallocend = end;

        for (index = start; index < end; ) {
                struct folio *folio;

                /*
                 * Good, the fallocate(2) manpage permits EINTR: we may have
                 * been interrupted because we are using up too much memory.
                 */
                if (signal_pending(current))
                        error = -EINTR;
                else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
                        error = -ENOMEM;
                else
                        error = shmem_get_folio(inode, index, &folio,
                                                SGP_FALLOC);
                if (error) {
                        info->fallocend = undo_fallocend;
                        /* Remove the !uptodate folios we added */
                        if (index > start) {
                                shmem_undo_range(inode,
                                    (loff_t)start << PAGE_SHIFT,
                                    ((loff_t)index << PAGE_SHIFT) - 1, true);
                        }
                        goto undone;
                }

                /*
                 * Here is a more important optimization than it appears:
                 * a second SGP_FALLOC on the same large folio will clear it,
                 * making it uptodate and un-undoable if we fail later.
                 */
                index = folio_next_index(folio);
                /* Beware 32-bit wraparound */
                if (!index)
                        index--;

                /*
                 * Inform shmem_writepage() how far we have reached.
                 * No need for lock or barrier: we have the page lock.
                 */
                if (!folio_test_uptodate(folio))
                        shmem_falloc.nr_falloced += index - shmem_falloc.next;
                shmem_falloc.next = index;

                /*
                 * If !uptodate, leave it that way so that freeable folios
                 * can be recognized if we need to rollback on error later.
                 * But mark it dirty so that memory pressure will swap rather
                 * than free the folios we are allocating (and SGP_CACHE folios
                 * might still be clean: we now need to mark those dirty too).
                 */
                folio_mark_dirty(folio);
                folio_unlock(folio);
                folio_put(folio);
                cond_resched();
        }

        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
                i_size_write(inode, offset + len);
undone:
        spin_lock(&inode->i_lock);
        inode->i_private = NULL;
        spin_unlock(&inode->i_lock);
out:
        if (!error)
                file_modified(file);
        inode_unlock(inode);
        return error;
}

static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);

        buf->f_type = TMPFS_MAGIC;
        buf->f_bsize = PAGE_SIZE;
        buf->f_namelen = NAME_MAX;
        if (sbinfo->max_blocks) {
                buf->f_blocks = sbinfo->max_blocks;
                buf->f_bavail =
                buf->f_bfree  = sbinfo->max_blocks -
                                percpu_counter_sum(&sbinfo->used_blocks);
        }
        if (sbinfo->max_inodes) {
                buf->f_files = sbinfo->max_inodes;
                buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
        }
        /* else leave those fields 0 like simple_statfs */

        buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b);

        return 0;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
static int
shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
            struct dentry *dentry, umode_t mode, dev_t dev)
{
        struct inode *inode;
        int error;

        inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
        if (IS_ERR(inode))
                return PTR_ERR(inode);

        error = simple_acl_create(dir, inode);
        if (error)
                goto out_iput;
        error = security_inode_init_security(inode, dir, &dentry->d_name,
                                             shmem_initxattrs, NULL);
        if (error && error != -EOPNOTSUPP)
                goto out_iput;

        error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
        if (error)
                goto out_iput;

        dir->i_size += BOGO_DIRENT_SIZE;
        inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
        inode_inc_iversion(dir);
        d_instantiate(dentry, inode);
        dget(dentry); /* Extra count - pin the dentry in core */
        return error;

out_iput:
        iput(inode);
        return error;
}

static int
shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
              struct file *file, umode_t mode)
{
        struct inode *inode;
        int error;

        inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                goto err_out;
        }
        error = security_inode_init_security(inode, dir, NULL,
                                             shmem_initxattrs, NULL);
        if (error && error != -EOPNOTSUPP)
                goto out_iput;
        error = simple_acl_create(dir, inode);
        if (error)
                goto out_iput;
        d_tmpfile(file, inode);

err_out:
        return finish_open_simple(file, error);
out_iput:
        iput(inode);
        return error;
}

static int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                       struct dentry *dentry, umode_t mode)
{
        int error;

        error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0);
        if (error)
                return error;
        inc_nlink(dir);
        return 0;
}

static int shmem_create(struct mnt_idmap *idmap, struct inode *dir,
                        struct dentry *dentry, umode_t mode, bool excl)
{
        return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0);
}

/*
 * Link a file..
 */
static int shmem_link(struct dentry *old_dentry, struct inode *dir,
                      struct dentry *dentry)
{
        struct inode *inode = d_inode(old_dentry);
        int ret = 0;

        /*
         * No ordinary (disk based) filesystem counts links as inodes;
         * but each new link needs a new dentry, pinning lowmem, and
         * tmpfs dentries cannot be pruned until they are unlinked.
         * But if an O_TMPFILE file is linked into the tmpfs, the
         * first link must skip that, to get the accounting right.
         */
        if (inode->i_nlink) {
                ret = shmem_reserve_inode(inode->i_sb, NULL);
                if (ret)
                        goto out;
        }

        ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
        if (ret) {
                if (inode->i_nlink)
                        shmem_free_inode(inode->i_sb, 0);
                goto out;
        }

        dir->i_size += BOGO_DIRENT_SIZE;
        inode_set_mtime_to_ts(dir,
                              inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
        inode_inc_iversion(dir);
        inc_nlink(inode);
        ihold(inode);        /* New dentry reference */
        dget(dentry);        /* Extra pinning count for the created dentry */
        d_instantiate(dentry, inode);
out:
        return ret;
}

static int shmem_unlink(struct inode *dir, struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);

        if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
                shmem_free_inode(inode->i_sb, 0);

        simple_offset_remove(shmem_get_offset_ctx(dir), dentry);

        dir->i_size -= BOGO_DIRENT_SIZE;
        inode_set_mtime_to_ts(dir,
                              inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
        inode_inc_iversion(dir);
        drop_nlink(inode);
        dput(dentry);        /* Undo the count from "create" - does all the work */
        return 0;
}

static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
        if (!simple_offset_empty(dentry))
                return -ENOTEMPTY;

        drop_nlink(d_inode(dentry));
        drop_nlink(dir);
        return shmem_unlink(dir, dentry);
}

static int shmem_whiteout(struct mnt_idmap *idmap,
                          struct inode *old_dir, struct dentry *old_dentry)
{
        struct dentry *whiteout;
        int error;

        whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
        if (!whiteout)
                return -ENOMEM;

        error = shmem_mknod(idmap, old_dir, whiteout,
                            S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
        dput(whiteout);
        if (error)
                return error;

        /*
         * Cheat and hash the whiteout while the old dentry is still in
         * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
         *
         * d_lookup() will consistently find one of them at this point,
         * not sure which one, but that isn't even important.
         */
        d_rehash(whiteout);
        return 0;
}

/*
 * The VFS layer already does all the dentry stuff for rename,
 * we just have to decrement the usage count for the target if
 * it exists so that the VFS layer correctly free's it when it
 * gets overwritten.
 */
static int shmem_rename2(struct mnt_idmap *idmap,
                         struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir, struct dentry *new_dentry,
                         unsigned int flags)
{
        struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = S_ISDIR(inode->i_mode);
        int error;

        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;

        if (flags & RENAME_EXCHANGE)
                return simple_offset_rename_exchange(old_dir, old_dentry,
                                                     new_dir, new_dentry);

        if (!simple_offset_empty(new_dentry))
                return -ENOTEMPTY;

        if (flags & RENAME_WHITEOUT) {
                error = shmem_whiteout(idmap, old_dir, old_dentry);
                if (error)
                        return error;
        }

        simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
        error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
        if (error)
                return error;

        if (d_really_is_positive(new_dentry)) {
                (void) shmem_unlink(new_dir, new_dentry);
                if (they_are_dirs) {
                        drop_nlink(d_inode(new_dentry));
                        drop_nlink(old_dir);
                }
        } else if (they_are_dirs) {
                drop_nlink(old_dir);
                inc_nlink(new_dir);
        }

        old_dir->i_size -= BOGO_DIRENT_SIZE;
        new_dir->i_size += BOGO_DIRENT_SIZE;
        simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        inode_inc_iversion(old_dir);
        inode_inc_iversion(new_dir);
        return 0;
}

static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
                         struct dentry *dentry, const char *symname)
{
        int error;
        int len;
        struct inode *inode;
        struct folio *folio;

        len = strlen(symname) + 1;
        if (len > PAGE_SIZE)
                return -ENAMETOOLONG;

        inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
                                VM_NORESERVE);
        if (IS_ERR(inode))
                return PTR_ERR(inode);

        error = security_inode_init_security(inode, dir, &dentry->d_name,
                                             shmem_initxattrs, NULL);
        if (error && error != -EOPNOTSUPP)
                goto out_iput;

        error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
        if (error)
                goto out_iput;

        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
                inode->i_link = kmemdup(symname, len, GFP_KERNEL);
                if (!inode->i_link) {
                        error = -ENOMEM;
                        goto out_remove_offset;
                }
                inode->i_op = &shmem_short_symlink_operations;
        } else {
                inode_nohighmem(inode);
                inode->i_mapping->a_ops = &shmem_aops;
                error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
                if (error)
                        goto out_remove_offset;
                inode->i_op = &shmem_symlink_inode_operations;
                memcpy(folio_address(folio), symname, len);
                folio_mark_uptodate(folio);
                folio_mark_dirty(folio);
                folio_unlock(folio);
                folio_put(folio);
        }
        dir->i_size += BOGO_DIRENT_SIZE;
        inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
        inode_inc_iversion(dir);
        d_instantiate(dentry, inode);
        dget(dentry);
        return 0;

out_remove_offset:
        simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
out_iput:
        iput(inode);
        return error;
}

static void shmem_put_link(void *arg)
{
        folio_mark_accessed(arg);
        folio_put(arg);
}

static const char *shmem_get_link(struct dentry *dentry, struct inode *inode,
                                  struct delayed_call *done)
{
        struct folio *folio = NULL;
        int error;

        if (!dentry) {
                folio = filemap_get_folio(inode->i_mapping, 0);
                if (IS_ERR(folio))
                        return ERR_PTR(-ECHILD);
                if (PageHWPoison(folio_page(folio, 0)) ||
                    !folio_test_uptodate(folio)) {
                        folio_put(folio);
                        return ERR_PTR(-ECHILD);
                }
        } else {
                error = shmem_get_folio(inode, 0, &folio, SGP_READ);
                if (error)
                        return ERR_PTR(error);
                if (!folio)
                        return ERR_PTR(-ECHILD);
                if (PageHWPoison(folio_page(folio, 0))) {
                        folio_unlock(folio);
                        folio_put(folio);
                        return ERR_PTR(-ECHILD);
                }
                folio_unlock(folio);
        }
        set_delayed_call(done, shmem_put_link, folio);
        return folio_address(folio);
}

#ifdef CONFIG_TMPFS_XATTR

static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
        struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));

        fileattr_fill_flags(fa, info->fsflags & SHMEM_FL_USER_VISIBLE);

        return 0;
}

static int shmem_fileattr_set(struct mnt_idmap *idmap,
                              struct dentry *dentry, struct fileattr *fa)
{
        struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);

        if (fileattr_has_fsx(fa))
                return -EOPNOTSUPP;
        if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE)
                return -EOPNOTSUPP;

        info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) |
                (fa->flags & SHMEM_FL_USER_MODIFIABLE);

        shmem_set_inode_flags(inode, info->fsflags);
        inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
        return 0;
}

/*
 * Superblocks without xattr inode operations may get some security.* xattr
 * support from the LSM "for free". As soon as we have any other xattrs
 * like ACLs, we also need to implement the security.* handlers at
 * filesystem level, though.
 */

/*
 * Callback for security_inode_init_security() for acquiring xattrs.
 */
static int shmem_initxattrs(struct inode *inode,
                            const struct xattr *xattr_array, void *fs_info)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        const struct xattr *xattr;
        struct simple_xattr *new_xattr;
        size_t ispace = 0;
        size_t len;

        if (sbinfo->max_inodes) {
                for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                        ispace += simple_xattr_space(xattr->name,
                                xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
                }
                if (ispace) {
                        raw_spin_lock(&sbinfo->stat_lock);
                        if (sbinfo->free_ispace < ispace)
                                ispace = 0;
                        else
                                sbinfo->free_ispace -= ispace;
                        raw_spin_unlock(&sbinfo->stat_lock);
                        if (!ispace)
                                return -ENOSPC;
                }
        }

        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
                if (!new_xattr)
                        break;

                len = strlen(xattr->name) + 1;
                new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
                                          GFP_KERNEL_ACCOUNT);
                if (!new_xattr->name) {
                        kvfree(new_xattr);
                        break;
                }

                memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
                       XATTR_SECURITY_PREFIX_LEN);
                memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
                       xattr->name, len);

                simple_xattr_add(&info->xattrs, new_xattr);
        }

        if (xattr->name != NULL) {
                if (ispace) {
                        raw_spin_lock(&sbinfo->stat_lock);
                        sbinfo->free_ispace += ispace;
                        raw_spin_unlock(&sbinfo->stat_lock);
                }
                simple_xattrs_free(&info->xattrs, NULL);
                return -ENOMEM;
        }

        return 0;
}

static int shmem_xattr_handler_get(const struct xattr_handler *handler,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, void *buffer, size_t size)
{
        struct shmem_inode_info *info = SHMEM_I(inode);

        name = xattr_full_name(handler, name);
        return simple_xattr_get(&info->xattrs, name, buffer, size);
}

static int shmem_xattr_handler_set(const struct xattr_handler *handler,
                                   struct mnt_idmap *idmap,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *value,
                                   size_t size, int flags)
{
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        struct simple_xattr *old_xattr;
        size_t ispace = 0;

        name = xattr_full_name(handler, name);
        if (value && sbinfo->max_inodes) {
                ispace = simple_xattr_space(name, size);
                raw_spin_lock(&sbinfo->stat_lock);
                if (sbinfo->free_ispace < ispace)
                        ispace = 0;
                else
                        sbinfo->free_ispace -= ispace;
                raw_spin_unlock(&sbinfo->stat_lock);
                if (!ispace)
                        return -ENOSPC;
        }

        old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
        if (!IS_ERR(old_xattr)) {
                ispace = 0;
                if (old_xattr && sbinfo->max_inodes)
                        ispace = simple_xattr_space(old_xattr->name,
                                                    old_xattr->size);
                simple_xattr_free(old_xattr);
                old_xattr = NULL;
                inode_set_ctime_current(inode);
                inode_inc_iversion(inode);
        }
        if (ispace) {
                raw_spin_lock(&sbinfo->stat_lock);
                sbinfo->free_ispace += ispace;
                raw_spin_unlock(&sbinfo->stat_lock);
        }
        return PTR_ERR(old_xattr);
}

static const struct xattr_handler shmem_security_xattr_handler = {
        .prefix = XATTR_SECURITY_PREFIX,
        .get = shmem_xattr_handler_get,
        .set = shmem_xattr_handler_set,
};

static const struct xattr_handler shmem_trusted_xattr_handler = {
        .prefix = XATTR_TRUSTED_PREFIX,
        .get = shmem_xattr_handler_get,
        .set = shmem_xattr_handler_set,
};

static const struct xattr_handler shmem_user_xattr_handler = {
        .prefix = XATTR_USER_PREFIX,
        .get = shmem_xattr_handler_get,
        .set = shmem_xattr_handler_set,
};

static const struct xattr_handler * const shmem_xattr_handlers[] = {
        &shmem_security_xattr_handler,
        &shmem_trusted_xattr_handler,
        &shmem_user_xattr_handler,
        NULL
};

static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
        struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
        return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */

static const struct inode_operations shmem_short_symlink_operations = {
        .getattr        = shmem_getattr,
        .setattr        = shmem_setattr,
        .get_link        = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
        .listxattr        = shmem_listxattr,
#endif
};

static const struct inode_operations shmem_symlink_inode_operations = {
        .getattr        = shmem_getattr,
        .setattr        = shmem_setattr,
        .get_link        = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
        .listxattr        = shmem_listxattr,
#endif
};

static struct dentry *shmem_get_parent(struct dentry *child)
{
        return ERR_PTR(-ESTALE);
}

static int shmem_match(struct inode *ino, void *vfh)
{
        __u32 *fh = vfh;
        __u64 inum = fh[2];
        inum = (inum << 32) | fh[1];
        return ino->i_ino == inum && fh[0] == ino->i_generation;
}

/* Find any alias of inode, but prefer a hashed alias */
static struct dentry *shmem_find_alias(struct inode *inode)
{
        struct dentry *alias = d_find_alias(inode);

        return alias ?: d_find_any_alias(inode);
}

static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
                struct fid *fid, int fh_len, int fh_type)
{
        struct inode *inode;
        struct dentry *dentry = NULL;
        u64 inum;

        if (fh_len < 3)
                return NULL;

        inum = fid->raw[2];
        inum = (inum << 32) | fid->raw[1];

        inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
                        shmem_match, fid->raw);
        if (inode) {
                dentry = shmem_find_alias(inode);
                iput(inode);
        }

        return dentry;
}

static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
                                struct inode *parent)
{
        if (*len < 3) {
                *len = 3;
                return FILEID_INVALID;
        }

        if (inode_unhashed(inode)) {
                /* Unfortunately insert_inode_hash is not idempotent,
                 * so as we hash inodes here rather than at creation
                 * time, we need a lock to ensure we only try
                 * to do it once
                 */
                static DEFINE_SPINLOCK(lock);
                spin_lock(&lock);
                if (inode_unhashed(inode))
                        __insert_inode_hash(inode,
                                            inode->i_ino + inode->i_generation);
                spin_unlock(&lock);
        }

        fh[0] = inode->i_generation;
        fh[1] = inode->i_ino;
        fh[2] = ((__u64)inode->i_ino) >> 32;

        *len = 3;
        return 1;
}

static const struct export_operations shmem_export_ops = {
        .get_parent     = shmem_get_parent,
        .encode_fh      = shmem_encode_fh,
        .fh_to_dentry        = shmem_fh_to_dentry,
};

enum shmem_param {
        Opt_gid,
        Opt_huge,
        Opt_mode,
        Opt_mpol,
        Opt_nr_blocks,
        Opt_nr_inodes,
        Opt_size,
        Opt_uid,
        Opt_inode32,
        Opt_inode64,
        Opt_noswap,
        Opt_quota,
        Opt_usrquota,
        Opt_grpquota,
        Opt_usrquota_block_hardlimit,
        Opt_usrquota_inode_hardlimit,
        Opt_grpquota_block_hardlimit,
        Opt_grpquota_inode_hardlimit,
};

static const struct constant_table shmem_param_enums_huge[] = {
        {"never",        SHMEM_HUGE_NEVER },
        {"always",        SHMEM_HUGE_ALWAYS },
        {"within_size",        SHMEM_HUGE_WITHIN_SIZE },
        {"advise",        SHMEM_HUGE_ADVISE },
        {}
};

const struct fs_parameter_spec shmem_fs_parameters[] = {
        fsparam_u32   ("gid",                Opt_gid),
        fsparam_enum  ("huge",                Opt_huge,  shmem_param_enums_huge),
        fsparam_u32oct("mode",                Opt_mode),
        fsparam_string("mpol",                Opt_mpol),
        fsparam_string("nr_blocks",        Opt_nr_blocks),
        fsparam_string("nr_inodes",        Opt_nr_inodes),
        fsparam_string("size",                Opt_size),
        fsparam_u32   ("uid",                Opt_uid),
        fsparam_flag  ("inode32",        Opt_inode32),
        fsparam_flag  ("inode64",        Opt_inode64),
        fsparam_flag  ("noswap",        Opt_noswap),
#ifdef CONFIG_TMPFS_QUOTA
        fsparam_flag  ("quota",                Opt_quota),
        fsparam_flag  ("usrquota",        Opt_usrquota),
        fsparam_flag  ("grpquota",        Opt_grpquota),
        fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
        fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
        fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
        fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
#endif
        {}
};

static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
{
        struct shmem_options *ctx = fc->fs_private;
        struct fs_parse_result result;
        unsigned long long size;
        char *rest;
        int opt;
        kuid_t kuid;
        kgid_t kgid;

        opt = fs_parse(fc, shmem_fs_parameters, param, &result);
        if (opt < 0)
                return opt;

        switch (opt) {
        case Opt_size:
                size = memparse(param->string, &rest);
                if (*rest == '%') {
                        size <<= PAGE_SHIFT;
                        size *= totalram_pages();
                        do_div(size, 100);
                        rest++;
                }
                if (*rest)
                        goto bad_value;
                ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE);
                ctx->seen |= SHMEM_SEEN_BLOCKS;
                break;
        case Opt_nr_blocks:
                ctx->blocks = memparse(param->string, &rest);
                if (*rest || ctx->blocks > LONG_MAX)
                        goto bad_value;
                ctx->seen |= SHMEM_SEEN_BLOCKS;
                break;
        case Opt_nr_inodes:
                ctx->inodes = memparse(param->string, &rest);
                if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
                        goto bad_value;
                ctx->seen |= SHMEM_SEEN_INODES;
                break;
        case Opt_mode:
                ctx->mode = result.uint_32 & 07777;
                break;
        case Opt_uid:
                kuid = make_kuid(current_user_ns(), result.uint_32);
                if (!uid_valid(kuid))
                        goto bad_value;

                /*
                 * The requested uid must be representable in the
                 * filesystem's idmapping.
                 */
                if (!kuid_has_mapping(fc->user_ns, kuid))
                        goto bad_value;

                ctx->uid = kuid;
                break;
        case Opt_gid:
                kgid = make_kgid(current_user_ns(), result.uint_32);
                if (!gid_valid(kgid))
                        goto bad_value;

                /*
                 * The requested gid must be representable in the
                 * filesystem's idmapping.
                 */
                if (!kgid_has_mapping(fc->user_ns, kgid))
                        goto bad_value;

                ctx->gid = kgid;
                break;
        case Opt_huge:
                ctx->huge = result.uint_32;
                if (ctx->huge != SHMEM_HUGE_NEVER &&
                    !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
                      has_transparent_hugepage()))
                        goto unsupported_parameter;
                ctx->seen |= SHMEM_SEEN_HUGE;
                break;
        case Opt_mpol:
                if (IS_ENABLED(CONFIG_NUMA)) {
                        mpol_put(ctx->mpol);
                        ctx->mpol = NULL;
                        if (mpol_parse_str(param->string, &ctx->mpol))
                                goto bad_value;
                        break;
                }
                goto unsupported_parameter;
        case Opt_inode32:
                ctx->full_inums = false;
                ctx->seen |= SHMEM_SEEN_INUMS;
                break;
        case Opt_inode64:
                if (sizeof(ino_t) < 8) {
                        return invalfc(fc,
                                       "Cannot use inode64 with <64bit inums in kernel\n");
                }
                ctx->full_inums = true;
                ctx->seen |= SHMEM_SEEN_INUMS;
                break;
        case Opt_noswap:
                if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) {
                        return invalfc(fc,
                                       "Turning off swap in unprivileged tmpfs mounts unsupported");
                }
                ctx->noswap = true;
                ctx->seen |= SHMEM_SEEN_NOSWAP;
                break;
        case Opt_quota:
                if (fc->user_ns != &init_user_ns)
                        return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
                break;
        case Opt_usrquota:
                if (fc->user_ns != &init_user_ns)
                        return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_USR;
                break;
        case Opt_grpquota:
                if (fc->user_ns != &init_user_ns)
                        return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
                ctx->seen |= SHMEM_SEEN_QUOTA;
                ctx->quota_types |= QTYPE_MASK_GRP;
                break;
        case Opt_usrquota_block_hardlimit:
                size = memparse(param->string, &rest);
                if (*rest || !size)
                        goto bad_value;
                if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
                        return invalfc(fc,
                                       "User quota block hardlimit too large.");
                ctx->qlimits.usrquota_bhardlimit = size;
                break;
        case Opt_grpquota_block_hardlimit:
                size = memparse(param->string, &rest);
                if (*rest || !size)
                        goto bad_value;
                if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
                        return invalfc(fc,
                                       "Group quota block hardlimit too large.");
                ctx->qlimits.grpquota_bhardlimit = size;
                break;
        case Opt_usrquota_inode_hardlimit:
                size = memparse(param->string, &rest);
                if (*rest || !size)
                        goto bad_value;
                if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
                        return invalfc(fc,
                                       "User quota inode hardlimit too large.");
                ctx->qlimits.usrquota_ihardlimit = size;
                break;
        case Opt_grpquota_inode_hardlimit:
                size = memparse(param->string, &rest);
                if (*rest || !size)
                        goto bad_value;
                if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
                        return invalfc(fc,
                                       "Group quota inode hardlimit too large.");
                ctx->qlimits.grpquota_ihardlimit = size;
                break;
        }
        return 0;

unsupported_parameter:
        return invalfc(fc, "Unsupported parameter '%s'", param->key);
bad_value:
        return invalfc(fc, "Bad value for '%s'", param->key);
}

static int shmem_parse_options(struct fs_context *fc, void *data)
{
        char *options = data;

        if (options) {
                int err = security_sb_eat_lsm_opts(options, &fc->security);
                if (err)
                        return err;
        }

        while (options != NULL) {
                char *this_char = options;
                for (;;) {
                        /*
                         * NUL-terminate this option: unfortunately,
                         * mount options form a comma-separated list,
                         * but mpol's nodelist may also contain commas.
                         */
                        options = strchr(options, ',');
                        if (options == NULL)
                                break;
                        options++;
                        if (!isdigit(*options)) {
                                options[-1] = '\0';
                                break;
                        }
                }
                if (*this_char) {
                        char *value = strchr(this_char, '=');
                        size_t len = 0;
                        int err;

                        if (value) {
                                *value++ = '\0';
                                len = strlen(value);
                        }
                        err = vfs_parse_fs_string(fc, this_char, value, len);
                        if (err < 0)
                                return err;
                }
        }
        return 0;
}

/*
 * Reconfigure a shmem filesystem.
 */
static int shmem_reconfigure(struct fs_context *fc)
{
        struct shmem_options *ctx = fc->fs_private;
        struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
        unsigned long used_isp;
        struct mempolicy *mpol = NULL;
        const char *err;

        raw_spin_lock(&sbinfo->stat_lock);
        used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;

        if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
                if (!sbinfo->max_blocks) {
                        err = "Cannot retroactively limit size";
                        goto out;
                }
                if (percpu_counter_compare(&sbinfo->used_blocks,
                                           ctx->blocks) > 0) {
                        err = "Too small a size for current use";
                        goto out;
                }
        }
        if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) {
                if (!sbinfo->max_inodes) {
                        err = "Cannot retroactively limit inodes";
                        goto out;
                }
                if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
                        err = "Too few inodes for current use";
                        goto out;
                }
        }

        if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums &&
            sbinfo->next_ino > UINT_MAX) {
                err = "Current inum too high to switch to 32-bit inums";
                goto out;
        }
        if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) {
                err = "Cannot disable swap on remount";
                goto out;
        }
        if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) {
                err = "Cannot enable swap on remount if it was disabled on first mount";
                goto out;
        }

        if (ctx->seen & SHMEM_SEEN_QUOTA &&
            !sb_any_quota_loaded(fc->root->d_sb)) {
                err = "Cannot enable quota on remount";
                goto out;
        }

#ifdef CONFIG_TMPFS_QUOTA
#define CHANGED_LIMIT(name)                                                \
        (ctx->qlimits.name## hardlimit &&                                \
        (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))

        if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
            CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
                err = "Cannot change global quota limit on remount";
                goto out;
        }
#endif /* CONFIG_TMPFS_QUOTA */

        if (ctx->seen & SHMEM_SEEN_HUGE)
                sbinfo->huge = ctx->huge;
        if (ctx->seen & SHMEM_SEEN_INUMS)
                sbinfo->full_inums = ctx->full_inums;
        if (ctx->seen & SHMEM_SEEN_BLOCKS)
                sbinfo->max_blocks  = ctx->blocks;
        if (ctx->seen & SHMEM_SEEN_INODES) {
                sbinfo->max_inodes  = ctx->inodes;
                sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
        }

        /*
         * Preserve previous mempolicy unless mpol remount option was specified.
         */
        if (ctx->mpol) {
                mpol = sbinfo->mpol;
                sbinfo->mpol = ctx->mpol;        /* transfers initial ref */
                ctx->mpol = NULL;
        }

        if (ctx->noswap)
                sbinfo->noswap = true;

        raw_spin_unlock(&sbinfo->stat_lock);
        mpol_put(mpol);
        return 0;
out:
        raw_spin_unlock(&sbinfo->stat_lock);
        return invalfc(fc, "%s", err);
}

static int shmem_show_options(struct seq_file *seq, struct dentry *root)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
        struct mempolicy *mpol;

        if (sbinfo->max_blocks != shmem_default_max_blocks())
                seq_printf(seq, ",size=%luk", K(sbinfo->max_blocks));
        if (sbinfo->max_inodes != shmem_default_max_inodes())
                seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
        if (sbinfo->mode != (0777 | S_ISVTX))
                seq_printf(seq, ",mode=%03ho", sbinfo->mode);
        if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
                seq_printf(seq, ",uid=%u",
                                from_kuid_munged(&init_user_ns, sbinfo->uid));
        if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
                seq_printf(seq, ",gid=%u",
                                from_kgid_munged(&init_user_ns, sbinfo->gid));

        /*
         * Showing inode{64,32} might be useful even if it's the system default,
         * since then people don't have to resort to checking both here and
         * /proc/config.gz to confirm 64-bit inums were successfully applied
         * (which may not even exist if IKCONFIG_PROC isn't enabled).
         *
         * We hide it when inode64 isn't the default and we are using 32-bit
         * inodes, since that probably just means the feature isn't even under
         * consideration.
         *
         * As such:
         *
         *                     +-----------------+-----------------+
         *                     | TMPFS_INODE64=y | TMPFS_INODE64=n |
         *  +------------------+-----------------+-----------------+
         *  | full_inums=true  | show            | show            |
         *  | full_inums=false | show            | hide            |
         *  +------------------+-----------------+-----------------+
         *
         */
        if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums)
                seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */
        if (sbinfo->huge)
                seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge));
#endif
        mpol = shmem_get_sbmpol(sbinfo);
        shmem_show_mpol(seq, mpol);
        mpol_put(mpol);
        if (sbinfo->noswap)
                seq_printf(seq, ",noswap");
#ifdef CONFIG_TMPFS_QUOTA
        if (sb_has_quota_active(root->d_sb, USRQUOTA))
                seq_printf(seq, ",usrquota");
        if (sb_has_quota_active(root->d_sb, GRPQUOTA))
                seq_printf(seq, ",grpquota");
        if (sbinfo->qlimits.usrquota_bhardlimit)
                seq_printf(seq, ",usrquota_block_hardlimit=%lld",
                           sbinfo->qlimits.usrquota_bhardlimit);
        if (sbinfo->qlimits.grpquota_bhardlimit)
                seq_printf(seq, ",grpquota_block_hardlimit=%lld",
                           sbinfo->qlimits.grpquota_bhardlimit);
        if (sbinfo->qlimits.usrquota_ihardlimit)
                seq_printf(seq, ",usrquota_inode_hardlimit=%lld",
                           sbinfo->qlimits.usrquota_ihardlimit);
        if (sbinfo->qlimits.grpquota_ihardlimit)
                seq_printf(seq, ",grpquota_inode_hardlimit=%lld",
                           sbinfo->qlimits.grpquota_ihardlimit);
#endif
        return 0;
}

#endif /* CONFIG_TMPFS */

static void shmem_put_super(struct super_block *sb)
{
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);

#ifdef CONFIG_TMPFS_QUOTA
        shmem_disable_quotas(sb);
#endif
        free_percpu(sbinfo->ino_batch);
        percpu_counter_destroy(&sbinfo->used_blocks);
        mpol_put(sbinfo->mpol);
        kfree(sbinfo);
        sb->s_fs_info = NULL;
}

static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
{
        struct shmem_options *ctx = fc->fs_private;
        struct inode *inode;
        struct shmem_sb_info *sbinfo;
        int error = -ENOMEM;

        /* Round up to L1_CACHE_BYTES to resist false sharing */
        sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
                                L1_CACHE_BYTES), GFP_KERNEL);
        if (!sbinfo)
                return error;

        sb->s_fs_info = sbinfo;

#ifdef CONFIG_TMPFS
        /*
         * Per default we only allow half of the physical ram per
         * tmpfs instance, limiting inodes to one per page of lowmem;
         * but the internal instance is left unlimited.
         */
        if (!(sb->s_flags & SB_KERNMOUNT)) {
                if (!(ctx->seen & SHMEM_SEEN_BLOCKS))
                        ctx->blocks = shmem_default_max_blocks();
                if (!(ctx->seen & SHMEM_SEEN_INODES))
                        ctx->inodes = shmem_default_max_inodes();
                if (!(ctx->seen & SHMEM_SEEN_INUMS))
                        ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64);
                sbinfo->noswap = ctx->noswap;
        } else {
                sb->s_flags |= SB_NOUSER;
        }
        sb->s_export_op = &shmem_export_ops;
        sb->s_flags |= SB_NOSEC | SB_I_VERSION;
#else
        sb->s_flags |= SB_NOUSER;
#endif
        sbinfo->max_blocks = ctx->blocks;
        sbinfo->max_inodes = ctx->inodes;
        sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
        if (sb->s_flags & SB_KERNMOUNT) {
                sbinfo->ino_batch = alloc_percpu(ino_t);
                if (!sbinfo->ino_batch)
                        goto failed;
        }
        sbinfo->uid = ctx->uid;
        sbinfo->gid = ctx->gid;
        sbinfo->full_inums = ctx->full_inums;
        sbinfo->mode = ctx->mode;
        sbinfo->huge = ctx->huge;
        sbinfo->mpol = ctx->mpol;
        ctx->mpol = NULL;

        raw_spin_lock_init(&sbinfo->stat_lock);
        if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                goto failed;
        spin_lock_init(&sbinfo->shrinklist_lock);
        INIT_LIST_HEAD(&sbinfo->shrinklist);

        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_SIZE;
        sb->s_blocksize_bits = PAGE_SHIFT;
        sb->s_magic = TMPFS_MAGIC;
        sb->s_op = &shmem_ops;
        sb->s_time_gran = 1;
#ifdef CONFIG_TMPFS_XATTR
        sb->s_xattr = shmem_xattr_handlers;
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
        sb->s_flags |= SB_POSIXACL;
#endif
        uuid_t uuid;
        uuid_gen(&uuid);
        super_set_uuid(sb, uuid.b, sizeof(uuid));

#ifdef CONFIG_TMPFS_QUOTA
        if (ctx->seen & SHMEM_SEEN_QUOTA) {
                sb->dq_op = &shmem_quota_operations;
                sb->s_qcop = &dquot_quotactl_sysfile_ops;
                sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;

                /* Copy the default limits from ctx into sbinfo */
                memcpy(&sbinfo->qlimits, &ctx->qlimits,
                       sizeof(struct shmem_quota_limits));

                if (shmem_enable_quotas(sb, ctx->quota_types))
                        goto failed;
        }
#endif /* CONFIG_TMPFS_QUOTA */

        inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL,
                                S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                goto failed;
        }
        inode->i_uid = sbinfo->uid;
        inode->i_gid = sbinfo->gid;
        sb->s_root = d_make_root(inode);
        if (!sb->s_root)
                goto failed;
        return 0;

failed:
        shmem_put_super(sb);
        return error;
}

static int shmem_get_tree(struct fs_context *fc)
{
        return get_tree_nodev(fc, shmem_fill_super);
}

static void shmem_free_fc(struct fs_context *fc)
{
        struct shmem_options *ctx = fc->fs_private;

        if (ctx) {
                mpol_put(ctx->mpol);
                kfree(ctx);
        }
}

static const struct fs_context_operations shmem_fs_context_ops = {
        .free                        = shmem_free_fc,
        .get_tree                = shmem_get_tree,
#ifdef CONFIG_TMPFS
        .parse_monolithic        = shmem_parse_options,
        .parse_param                = shmem_parse_one,
        .reconfigure                = shmem_reconfigure,
#endif
};

static struct kmem_cache *shmem_inode_cachep __ro_after_init;

static struct inode *shmem_alloc_inode(struct super_block *sb)
{
        struct shmem_inode_info *info;
        info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL);
        if (!info)
                return NULL;
        return &info->vfs_inode;
}

static void shmem_free_in_core_inode(struct inode *inode)
{
        if (S_ISLNK(inode->i_mode))
                kfree(inode->i_link);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}

static void shmem_destroy_inode(struct inode *inode)
{
        if (S_ISREG(inode->i_mode))
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
        if (S_ISDIR(inode->i_mode))
                simple_offset_destroy(shmem_get_offset_ctx(inode));
}

static void shmem_init_inode(void *foo)
{
        struct shmem_inode_info *info = foo;
        inode_init_once(&info->vfs_inode);
}

static void __init shmem_init_inodecache(void)
{
        shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
                                sizeof(struct shmem_inode_info),
                                0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode);
}

static void __init shmem_destroy_inodecache(void)
{
        kmem_cache_destroy(shmem_inode_cachep);
}

/* Keep the page in page cache instead of truncating it */
static int shmem_error_remove_folio(struct address_space *mapping,
                                   struct folio *folio)
{
        return 0;
}

static const struct address_space_operations shmem_aops = {
        .writepage        = shmem_writepage,
        .dirty_folio        = noop_dirty_folio,
#ifdef CONFIG_TMPFS
        .write_begin        = shmem_write_begin,
        .write_end        = shmem_write_end,
#endif
#ifdef CONFIG_MIGRATION
        .migrate_folio        = migrate_folio,
#endif
        .error_remove_folio = shmem_error_remove_folio,
};

static const struct file_operations shmem_file_operations = {
        .mmap                = shmem_mmap,
        .open                = shmem_file_open,
        .get_unmapped_area = shmem_get_unmapped_area,
#ifdef CONFIG_TMPFS
        .llseek                = shmem_file_llseek,
        .read_iter        = shmem_file_read_iter,
        .write_iter        = shmem_file_write_iter,
        .fsync                = noop_fsync,
        .splice_read        = shmem_file_splice_read,
        .splice_write        = iter_file_splice_write,
        .fallocate        = shmem_fallocate,
#endif
};

static const struct inode_operations shmem_inode_operations = {
        .getattr        = shmem_getattr,
        .setattr        = shmem_setattr,
#ifdef CONFIG_TMPFS_XATTR
        .listxattr        = shmem_listxattr,
        .set_acl        = simple_set_acl,
        .fileattr_get        = shmem_fileattr_get,
        .fileattr_set        = shmem_fileattr_set,
#endif
};

static const struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
        .getattr        = shmem_getattr,
        .create                = shmem_create,
        .lookup                = simple_lookup,
        .link                = shmem_link,
        .unlink                = shmem_unlink,
        .symlink        = shmem_symlink,
        .mkdir                = shmem_mkdir,
        .rmdir                = shmem_rmdir,
        .mknod                = shmem_mknod,
        .rename                = shmem_rename2,
        .tmpfile        = shmem_tmpfile,
        .get_offset_ctx        = shmem_get_offset_ctx,
#endif
#ifdef CONFIG_TMPFS_XATTR
        .listxattr        = shmem_listxattr,
        .fileattr_get        = shmem_fileattr_get,
        .fileattr_set        = shmem_fileattr_set,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
        .setattr        = shmem_setattr,
        .set_acl        = simple_set_acl,
#endif
};

static const struct inode_operations shmem_special_inode_operations = {
        .getattr        = shmem_getattr,
#ifdef CONFIG_TMPFS_XATTR
        .listxattr        = shmem_listxattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
        .setattr        = shmem_setattr,
        .set_acl        = simple_set_acl,
#endif
};

static const struct super_operations shmem_ops = {
        .alloc_inode        = shmem_alloc_inode,
        .free_inode        = shmem_free_in_core_inode,
        .destroy_inode        = shmem_destroy_inode,
#ifdef CONFIG_TMPFS
        .statfs                = shmem_statfs,
        .show_options        = shmem_show_options,
#endif
#ifdef CONFIG_TMPFS_QUOTA
        .get_dquots        = shmem_get_dquots,
#endif
        .evict_inode        = shmem_evict_inode,
        .drop_inode        = generic_delete_inode,
        .put_super        = shmem_put_super,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        .nr_cached_objects        = shmem_unused_huge_count,
        .free_cached_objects        = shmem_unused_huge_scan,
#endif
};

static const struct vm_operations_struct shmem_vm_ops = {
        .fault                = shmem_fault,
        .map_pages        = filemap_map_pages,
#ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,
#endif
};

static const struct vm_operations_struct shmem_anon_vm_ops = {
        .fault                = shmem_fault,
        .map_pages        = filemap_map_pages,
#ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,
#endif
};

int shmem_init_fs_context(struct fs_context *fc)
{
        struct shmem_options *ctx;

        ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;

        ctx->mode = 0777 | S_ISVTX;
        ctx->uid = current_fsuid();
        ctx->gid = current_fsgid();

        fc->fs_private = ctx;
        fc->ops = &shmem_fs_context_ops;
        return 0;
}

static struct file_system_type shmem_fs_type = {
        .owner                = THIS_MODULE,
        .name                = "tmpfs",
        .init_fs_context = shmem_init_fs_context,
#ifdef CONFIG_TMPFS
        .parameters        = shmem_fs_parameters,
#endif
        .kill_sb        = kill_litter_super,
        .fs_flags        = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
};

void __init shmem_init(void)
{
        int error;

        shmem_init_inodecache();

#ifdef CONFIG_TMPFS_QUOTA
        error = register_quota_format(&shmem_quota_format);
        if (error < 0) {
                pr_err("Could not register quota format\n");
                goto out3;
        }
#endif

        error = register_filesystem(&shmem_fs_type);
        if (error) {
                pr_err("Could not register tmpfs\n");
                goto out2;
        }

        shm_mnt = kern_mount(&shmem_fs_type);
        if (IS_ERR(shm_mnt)) {
                error = PTR_ERR(shm_mnt);
                pr_err("Could not kern_mount tmpfs\n");
                goto out1;
        }

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
                SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
        else
                shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
#endif
        return;

out1:
        unregister_filesystem(&shmem_fs_type);
out2:
#ifdef CONFIG_TMPFS_QUOTA
        unregister_quota_format(&shmem_quota_format);
out3:
#endif
        shmem_destroy_inodecache();
        shm_mnt = ERR_PTR(error);
}

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS)
static ssize_t shmem_enabled_show(struct kobject *kobj,
                                  struct kobj_attribute *attr, char *buf)
{
        static const int values[] = {
                SHMEM_HUGE_ALWAYS,
                SHMEM_HUGE_WITHIN_SIZE,
                SHMEM_HUGE_ADVISE,
                SHMEM_HUGE_NEVER,
                SHMEM_HUGE_DENY,
                SHMEM_HUGE_FORCE,
        };
        int len = 0;
        int i;

        for (i = 0; i < ARRAY_SIZE(values); i++) {
                len += sysfs_emit_at(buf, len,
                                shmem_huge == values[i] ? "%s[%s]" : "%s%s",
                                i ? " " : "", shmem_format_huge(values[i]));
        }
        len += sysfs_emit_at(buf, len, "\n");

        return len;
}

static ssize_t shmem_enabled_store(struct kobject *kobj,
                struct kobj_attribute *attr, const char *buf, size_t count)
{
        char tmp[16];
        int huge;

        if (count + 1 > sizeof(tmp))
                return -EINVAL;
        memcpy(tmp, buf, count);
        tmp[count] = '\0';
        if (count && tmp[count - 1] == '\n')
                tmp[count - 1] = '\0';

        huge = shmem_parse_huge(tmp);
        if (huge == -EINVAL)
                return -EINVAL;
        if (!has_transparent_hugepage() &&
                        huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
                return -EINVAL;

        shmem_huge = huge;
        if (shmem_huge > SHMEM_HUGE_DENY)
                SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
        return count;
}

struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */

#else /* !CONFIG_SHMEM */

/*
 * tiny-shmem: simple shmemfs and tmpfs using ramfs code
 *
 * This is intended for small system where the benefits of the full
 * shmem code (swap-backed and resource-limited) are outweighed by
 * their complexity. On systems without swap this code should be
 * effectively equivalent, but much lighter weight.
 */

static struct file_system_type shmem_fs_type = {
        .name                = "tmpfs",
        .init_fs_context = ramfs_init_fs_context,
        .parameters        = ramfs_fs_parameters,
        .kill_sb        = ramfs_kill_sb,
        .fs_flags        = FS_USERNS_MOUNT,
};

void __init shmem_init(void)
{
        BUG_ON(register_filesystem(&shmem_fs_type) != 0);

        shm_mnt = kern_mount(&shmem_fs_type);
        BUG_ON(IS_ERR(shm_mnt));
}

int shmem_unuse(unsigned int type)
{
        return 0;
}

int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
{
        return 0;
}

void shmem_unlock_mapping(struct address_space *mapping)
{
}

#ifdef CONFIG_MMU
unsigned long shmem_get_unmapped_area(struct file *file,
                                      unsigned long addr, unsigned long len,
                                      unsigned long pgoff, unsigned long flags)
{
        return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
}
#endif

void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
        truncate_inode_pages_range(inode->i_mapping, lstart, lend);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);

#define shmem_vm_ops                                generic_file_vm_ops
#define shmem_anon_vm_ops                        generic_file_vm_ops
#define shmem_file_operations                        ramfs_file_operations
#define shmem_acct_size(flags, size)                0
#define shmem_unacct_size(flags, size)                do {} while (0)

static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
                                struct super_block *sb, struct inode *dir,
                                umode_t mode, dev_t dev, unsigned long flags)
{
        struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
        return inode ? inode : ERR_PTR(-ENOSPC);
}

#endif /* CONFIG_SHMEM */

/* common code */

static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name,
                        loff_t size, unsigned long flags, unsigned int i_flags)
{
        struct inode *inode;
        struct file *res;

        if (IS_ERR(mnt))
                return ERR_CAST(mnt);

        if (size < 0 || size > MAX_LFS_FILESIZE)
                return ERR_PTR(-EINVAL);

        if (shmem_acct_size(flags, size))
                return ERR_PTR(-ENOMEM);

        if (is_idmapped_mnt(mnt))
                return ERR_PTR(-EINVAL);

        inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
                                S_IFREG | S_IRWXUGO, 0, flags);
        if (IS_ERR(inode)) {
                shmem_unacct_size(flags, size);
                return ERR_CAST(inode);
        }
        inode->i_flags |= i_flags;
        inode->i_size = size;
        clear_nlink(inode);        /* It is unlinked */
        res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
        if (!IS_ERR(res))
                res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
                                &shmem_file_operations);
        if (IS_ERR(res))
                iput(inode);
        return res;
}

/**
 * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
 *         kernel internal.  There will be NO LSM permission checks against the
 *         underlying inode.  So users of this interface must do LSM checks at a
 *        higher layer.  The users are the big_key and shm implementations.  LSM
 *        checks are provided at the key or shm level rather than the inode.
 * @name: name for dentry (to be seen in /proc/<pid>/maps
 * @size: size to be set for the file
 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
 */
struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags)
{
        return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
}
EXPORT_SYMBOL_GPL(shmem_kernel_file_setup);

/**
 * shmem_file_setup - get an unlinked file living in tmpfs
 * @name: name for dentry (to be seen in /proc/<pid>/maps
 * @size: size to be set for the file
 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
 */
struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
{
        return __shmem_file_setup(shm_mnt, name, size, flags, 0);
}
EXPORT_SYMBOL_GPL(shmem_file_setup);

/**
 * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs
 * @mnt: the tmpfs mount where the file will be created
 * @name: name for dentry (to be seen in /proc/<pid>/maps
 * @size: size to be set for the file
 * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
 */
struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
                                       loff_t size, unsigned long flags)
{
        return __shmem_file_setup(mnt, name, size, flags, 0);
}
EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);

/**
 * shmem_zero_setup - setup a shared anonymous mapping
 * @vma: the vma to be mmapped is prepared by do_mmap
 */
int shmem_zero_setup(struct vm_area_struct *vma)
{
        struct file *file;
        loff_t size = vma->vm_end - vma->vm_start;

        /*
         * Cloning a new file under mmap_lock leads to a lock ordering conflict
         * between XFS directory reading and selinux: since this file is only
         * accessible to the user through its mapping, use S_PRIVATE flag to
         * bypass file security, in the same way as shmem_kernel_file_setup().
         */
        file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
        if (IS_ERR(file))
                return PTR_ERR(file);

        if (vma->vm_file)
                fput(vma->vm_file);
        vma->vm_file = file;
        vma->vm_ops = &shmem_anon_vm_ops;

        return 0;
}

/**
 * shmem_read_folio_gfp - read into page cache, using specified page allocation flags.
 * @mapping:        the folio's address_space
 * @index:        the folio index
 * @gfp:        the page allocator flags to use if allocating
 *
 * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
 * with any new page allocations done using the specified allocation flags.
 * But read_cache_page_gfp() uses the ->read_folio() method: which does not
 * suit tmpfs, since it may have pages in swapcache, and needs to find those
 * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
 *
 * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in
 * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
 */
struct folio *shmem_read_folio_gfp(struct address_space *mapping,
                pgoff_t index, gfp_t gfp)
{
#ifdef CONFIG_SHMEM
        struct inode *inode = mapping->host;
        struct folio *folio;
        int error;

        error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
                                    gfp, NULL, NULL);
        if (error)
                return ERR_PTR(error);

        folio_unlock(folio);
        return folio;
#else
        /*
         * The tiny !SHMEM case uses ramfs without swap
         */
        return mapping_read_folio_gfp(mapping, index, gfp);
#endif
}
EXPORT_SYMBOL_GPL(shmem_read_folio_gfp);

struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
                                         pgoff_t index, gfp_t gfp)
{
        struct folio *folio = shmem_read_folio_gfp(mapping, index, gfp);
        struct page *page;

        if (IS_ERR(folio))
                return &folio->page;

        page = folio_file_page(folio, index);
        if (PageHWPoison(page)) {
                folio_put(folio);
                return ERR_PTR(-EIO);
        }

        return page;
}
EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);





























































  256 





  256 



























  252 







  254 







  255 




  255 


  256 



  255 

























































































































   10 

   10 























   10 





   10 


















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
 *
 * Provides a framework for enqueueing and running callbacks from hardirq
 * context. The enqueueing is NMI-safe.
 */

#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/irq_work.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/smpboot.h>
#include <asm/processor.h>
#include <linux/kasan.h>

#include <trace/events/ipi.h>

static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(struct llist_head, lazy_list);
static DEFINE_PER_CPU(struct task_struct *, irq_workd);

static void wake_irq_workd(void)
{
        struct task_struct *tsk = __this_cpu_read(irq_workd);

        if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
                wake_up_process(tsk);
}

#ifdef CONFIG_SMP
static void irq_work_wake(struct irq_work *entry)
{
        wake_irq_workd();
}

static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
        IRQ_WORK_INIT_HARD(irq_work_wake);
#endif

static int irq_workd_should_run(unsigned int cpu)
{
        return !llist_empty(this_cpu_ptr(&lazy_list));
}

/*
 * Claim the entry so that no one else will poke at it.
 */
static bool irq_work_claim(struct irq_work *work)
{
        int oflags;

        oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
        /*
         * If the work is already pending, no need to raise the IPI.
         * The pairing smp_mb() in irq_work_single() makes sure
         * everything we did before is visible.
         */
        if (oflags & IRQ_WORK_PENDING)
                return false;
        return true;
}

void __weak arch_irq_work_raise(void)
{
        /*
         * Lame architectures will get the timer tick callback
         */
}

static __always_inline void irq_work_raise(struct irq_work *work)
{
        if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt())
                trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);

        arch_irq_work_raise();
}

/* Enqueue on current CPU, work must already be claimed and preempt disabled */
static void __irq_work_queue_local(struct irq_work *work)
{
        struct llist_head *list;
        bool rt_lazy_work = false;
        bool lazy_work = false;
        int work_flags;

        work_flags = atomic_read(&work->node.a_flags);
        if (work_flags & IRQ_WORK_LAZY)
                lazy_work = true;
        else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
                 !(work_flags & IRQ_WORK_HARD_IRQ))
                rt_lazy_work = true;

        if (lazy_work || rt_lazy_work)
                list = this_cpu_ptr(&lazy_list);
        else
                list = this_cpu_ptr(&raised_list);

        if (!llist_add(&work->node.llist, list))
                return;

        /* If the work is "lazy", handle it from next tick if any */
        if (!lazy_work || tick_nohz_tick_stopped())
                irq_work_raise(work);
}

/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work)
{
        /* Only queue if not already pending */
        if (!irq_work_claim(work))
                return false;

        /* Queue the entry and raise the IPI if needed. */
        preempt_disable();
        __irq_work_queue_local(work);
        preempt_enable();

        return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue);

/*
 * Enqueue the irq_work @work on @cpu unless it's already pending
 * somewhere.
 *
 * Can be re-enqueued while the callback is still in progress.
 */
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
#ifndef CONFIG_SMP
        return irq_work_queue(work);

#else /* CONFIG_SMP: */
        /* All work should have been flushed before going offline */
        WARN_ON_ONCE(cpu_is_offline(cpu));

        /* Only queue if not already pending */
        if (!irq_work_claim(work))
                return false;

        kasan_record_aux_stack_noalloc(work);

        preempt_disable();
        if (cpu != smp_processor_id()) {
                /* Arch remote IPI send/receive backend aren't NMI safe */
                WARN_ON_ONCE(in_nmi());

                /*
                 * On PREEMPT_RT the items which are not marked as
                 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
                 * item is used on the remote CPU to wake the thread.
                 */
                if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
                    !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {

                        if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
                                goto out;

                        work = &per_cpu(irq_work_wakeup, cpu);
                        if (!irq_work_claim(work))
                                goto out;
                }

                __smp_call_single_queue(cpu, &work->node.llist);
        } else {
                __irq_work_queue_local(work);
        }
out:
        preempt_enable();

        return true;
#endif /* CONFIG_SMP */
}

bool irq_work_needs_cpu(void)
{
        struct llist_head *raised, *lazy;

        raised = this_cpu_ptr(&raised_list);
        lazy = this_cpu_ptr(&lazy_list);

        if (llist_empty(raised) || arch_irq_work_has_interrupt())
                if (llist_empty(lazy))
                        return false;

        /* All work should have been flushed before going offline */
        WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));

        return true;
}

void irq_work_single(void *arg)
{
        struct irq_work *work = arg;
        int flags;

        /*
         * Clear the PENDING bit, after this point the @work can be re-used.
         * The PENDING bit acts as a lock, and we own it, so we can clear it
         * without atomic ops.
         */
        flags = atomic_read(&work->node.a_flags);
        flags &= ~IRQ_WORK_PENDING;
        atomic_set(&work->node.a_flags, flags);

        /*
         * See irq_work_claim().
         */
        smp_mb();

        lockdep_irq_work_enter(flags);
        work->func(work);
        lockdep_irq_work_exit(flags);

        /*
         * Clear the BUSY bit, if set, and return to the free state if no-one
         * else claimed it meanwhile.
         */
        (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);

        if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
            !arch_irq_work_has_interrupt())
                rcuwait_wake_up(&work->irqwait);
}

static void irq_work_run_list(struct llist_head *list)
{
        struct irq_work *work, *tmp;
        struct llist_node *llnode;

        /*
         * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
         * in a per-CPU thread in preemptible context. Only the items which are
         * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
         */
        BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));

        if (llist_empty(list))
                return;

        llnode = llist_del_all(list);
        llist_for_each_entry_safe(work, tmp, llnode, node.llist)
                irq_work_single(work);
}

/*
 * hotplug calls this through:
 *  hotplug_cfd() -> flush_smp_call_function_queue()
 */
void irq_work_run(void)
{
        irq_work_run_list(this_cpu_ptr(&raised_list));
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                irq_work_run_list(this_cpu_ptr(&lazy_list));
        else
                wake_irq_workd();
}
EXPORT_SYMBOL_GPL(irq_work_run);

void irq_work_tick(void)
{
        struct llist_head *raised = this_cpu_ptr(&raised_list);

        if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
                irq_work_run_list(raised);

        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                irq_work_run_list(this_cpu_ptr(&lazy_list));
        else
                wake_irq_workd();
}

/*
 * Synchronize against the irq_work @entry, ensures the entry is not
 * currently in use.
 */
void irq_work_sync(struct irq_work *work)
{
        lockdep_assert_irqs_enabled();
        might_sleep();

        if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
            !arch_irq_work_has_interrupt()) {
                rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
                                   TASK_UNINTERRUPTIBLE);
                return;
        }

        while (irq_work_is_busy(work))
                cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);

static void run_irq_workd(unsigned int cpu)
{
        irq_work_run_list(this_cpu_ptr(&lazy_list));
}

static void irq_workd_setup(unsigned int cpu)
{
        sched_set_fifo_low(current);
}

static struct smp_hotplug_thread irqwork_threads = {
        .store                  = &irq_workd,
        .setup                        = irq_workd_setup,
        .thread_should_run      = irq_workd_should_run,
        .thread_fn              = run_irq_workd,
        .thread_comm            = "irq_work/%u",
};

static __init int irq_work_init_threads(void)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
        return 0;
}
early_initcall(irq_work_init_threads);

































    2 







    2 




    2 







    2 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * i2c-core.h - interfaces internal to the I2C framework
 */

#include <linux/kconfig.h>
#include <linux/rwsem.h>

struct i2c_devinfo {
        struct list_head        list;
        int                        busnum;
        struct i2c_board_info        board_info;
};

/* board_lock protects board_list and first_dynamic_bus_num.
 * only i2c core components are allowed to use these symbols.
 */
extern struct rw_semaphore        __i2c_board_lock;
extern struct list_head        __i2c_board_list;
extern int                __i2c_first_dynamic_bus_num;

int i2c_check_7bit_addr_validity_strict(unsigned short addr);
int i2c_dev_irq_from_resources(const struct resource *resources,
                               unsigned int num_resources);

/*
 * We only allow atomic transfers for very late communication, e.g. to access a
 * PMIC when powering down. Atomic transfers are a corner case and not for
 * generic use!
 */
static inline bool i2c_in_atomic_xfer_mode(void)
{
        return system_state > SYSTEM_RUNNING &&
               (IS_ENABLED(CONFIG_PREEMPT_COUNT) ? !preemptible() : irqs_disabled());
}

static inline int __i2c_lock_bus_helper(struct i2c_adapter *adap)
{
        int ret = 0;

        if (i2c_in_atomic_xfer_mode()) {
                WARN(!adap->algo->master_xfer_atomic && !adap->algo->smbus_xfer_atomic,
                     "No atomic I2C transfer handler for '%s'\n", dev_name(&adap->dev));
                ret = i2c_trylock_bus(adap, I2C_LOCK_SEGMENT) ? 0 : -EAGAIN;
        } else {
                i2c_lock_bus(adap, I2C_LOCK_SEGMENT);
        }

        return ret;
}

static inline int __i2c_check_suspended(struct i2c_adapter *adap)
{
        if (test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags)) {
                if (!test_and_set_bit(I2C_ALF_SUSPEND_REPORTED, &adap->locked_flags))
                        dev_WARN(&adap->dev, "Transfer while suspended\n");
                return -ESHUTDOWN;
        }

        return 0;
}

#ifdef CONFIG_ACPI
void i2c_acpi_register_devices(struct i2c_adapter *adap);

int i2c_acpi_get_irq(struct i2c_client *client, bool *wake_capable);
#else /* CONFIG_ACPI */
static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }

static inline int i2c_acpi_get_irq(struct i2c_client *client, bool *wake_capable)
{
        return 0;
}
#endif /* CONFIG_ACPI */
extern struct notifier_block i2c_acpi_notifier;

#ifdef CONFIG_ACPI_I2C_OPREGION
int i2c_acpi_install_space_handler(struct i2c_adapter *adapter);
void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter);
#else /* CONFIG_ACPI_I2C_OPREGION */
static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter) { return 0; }
static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter) { }
#endif /* CONFIG_ACPI_I2C_OPREGION */

#ifdef CONFIG_OF
void of_i2c_register_devices(struct i2c_adapter *adap);
#else
static inline void of_i2c_register_devices(struct i2c_adapter *adap) { }
#endif
extern struct notifier_block i2c_of_notifier;

#if IS_ENABLED(CONFIG_I2C_SMBUS)
int i2c_setup_smbus_alert(struct i2c_adapter *adap);
#else
static inline int i2c_setup_smbus_alert(struct i2c_adapter *adap)
{
        return 0;
}
#endif











































































































































































































































































































































































































































































































































































































































































































































































































































































    3 











    3 



    3 

    3 


    3 
    3 






















































































































































































































































































































































































































    3 


























































































































































































































































    4 






































































































































































































































































































































































































































































    1 




    1 




















































































































































































































































































































    3 

    3 

























































































































































































































































































































































































































































































































































































    4 




    1 

    4 











    4 


    3 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  USB Wacom tablet support - system specific code
 */

#include "wacom_wac.h"
#include "wacom.h"
#include <linux/input/mt.h>

#define WAC_MSG_RETRIES                5
#define WAC_CMD_RETRIES                10

#define DEV_ATTR_RW_PERM (S_IRUGO | S_IWUSR | S_IWGRP)
#define DEV_ATTR_WO_PERM (S_IWUSR | S_IWGRP)
#define DEV_ATTR_RO_PERM (S_IRUSR | S_IRGRP)

static int wacom_get_report(struct hid_device *hdev, u8 type, u8 *buf,
                            size_t size, unsigned int retries)
{
        int retval;

        do {
                retval = hid_hw_raw_request(hdev, buf[0], buf, size, type,
                                HID_REQ_GET_REPORT);
        } while ((retval == -ETIMEDOUT || retval == -EAGAIN) && --retries);

        if (retval < 0)
                hid_err(hdev, "wacom_get_report: ran out of retries "
                        "(last error = %d)\n", retval);

        return retval;
}

static int wacom_set_report(struct hid_device *hdev, u8 type, u8 *buf,
                            size_t size, unsigned int retries)
{
        int retval;

        do {
                retval = hid_hw_raw_request(hdev, buf[0], buf, size, type,
                                HID_REQ_SET_REPORT);
        } while ((retval == -ETIMEDOUT || retval == -EAGAIN) && --retries);

        if (retval < 0)
                hid_err(hdev, "wacom_set_report: ran out of retries "
                        "(last error = %d)\n", retval);

        return retval;
}

static void wacom_wac_queue_insert(struct hid_device *hdev,
                                   struct kfifo_rec_ptr_2 *fifo,
                                   u8 *raw_data, int size)
{
        bool warned = false;

        while (kfifo_avail(fifo) < size) {
                if (!warned)
                        hid_warn(hdev, "%s: kfifo has filled, starting to drop events\n", __func__);
                warned = true;

                kfifo_skip(fifo);
        }

        kfifo_in(fifo, raw_data, size);
}

static void wacom_wac_queue_flush(struct hid_device *hdev,
                                  struct kfifo_rec_ptr_2 *fifo)
{
        while (!kfifo_is_empty(fifo)) {
                u8 buf[WACOM_PKGLEN_MAX];
                int size;
                int err;

                size = kfifo_out(fifo, buf, sizeof(buf));
                err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false);
                if (err) {
                        hid_warn(hdev, "%s: unable to flush event due to error %d\n",
                                 __func__, err);
                }
        }
}

static int wacom_wac_pen_serial_enforce(struct hid_device *hdev,
                struct hid_report *report, u8 *raw_data, int report_size)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;
        bool flush = false;
        bool insert = false;
        int i, j;

        if (wacom_wac->serial[0] || !(features->quirks & WACOM_QUIRK_TOOLSERIAL))
                return 0;

        /* Queue events which have invalid tool type or serial number */
        for (i = 0; i < report->maxfield; i++) {
                for (j = 0; j < report->field[i]->maxusage; j++) {
                        struct hid_field *field = report->field[i];
                        struct hid_usage *usage = &field->usage[j];
                        unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);
                        unsigned int offset;
                        unsigned int size;
                        unsigned int value;

                        if (equivalent_usage != HID_DG_INRANGE &&
                            equivalent_usage != HID_DG_TOOLSERIALNUMBER &&
                            equivalent_usage != WACOM_HID_WD_SERIALHI &&
                            equivalent_usage != WACOM_HID_WD_TOOLTYPE)
                                continue;

                        offset = field->report_offset;
                        size = field->report_size;
                        value = hid_field_extract(hdev, raw_data+1, offset + j * size, size);

                        /* If we go out of range, we need to flush the queue ASAP */
                        if (equivalent_usage == HID_DG_INRANGE)
                                value = !value;

                        if (value) {
                                flush = true;
                                switch (equivalent_usage) {
                                case HID_DG_TOOLSERIALNUMBER:
                                        wacom_wac->serial[0] = value;
                                        break;

                                case WACOM_HID_WD_SERIALHI:
                                        wacom_wac->serial[0] |= ((__u64)value) << 32;
                                        break;

                                case WACOM_HID_WD_TOOLTYPE:
                                        wacom_wac->id[0] = value;
                                        break;
                                }
                        }
                        else {
                                insert = true;
                        }
                }
        }

        if (flush)
                wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo);
        else if (insert)
                wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo,
                                       raw_data, report_size);

        return insert && !flush;
}

static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *raw_data, int size)
{
        struct wacom *wacom = hid_get_drvdata(hdev);

        if (wacom->wacom_wac.features.type == BOOTLOADER)
                return 0;

        if (size > WACOM_PKGLEN_MAX)
                return 1;

        if (wacom_wac_pen_serial_enforce(hdev, report, raw_data, size))
                return -1;

        memcpy(wacom->wacom_wac.data, raw_data, size);

        wacom_wac_irq(&wacom->wacom_wac, size);

        return 0;
}

static int wacom_open(struct input_dev *dev)
{
        struct wacom *wacom = input_get_drvdata(dev);

        return hid_hw_open(wacom->hdev);
}

static void wacom_close(struct input_dev *dev)
{
        struct wacom *wacom = input_get_drvdata(dev);

        /*
         * wacom->hdev should never be null, but surprisingly, I had the case
         * once while unplugging the Wacom Wireless Receiver.
         */
        if (wacom->hdev)
                hid_hw_close(wacom->hdev);
}

/*
 * Calculate the resolution of the X or Y axis using hidinput_calc_abs_res.
 */
static int wacom_calc_hid_res(int logical_extents, int physical_extents,
                               unsigned unit, int exponent)
{
        struct hid_field field = {
                .logical_maximum = logical_extents,
                .physical_maximum = physical_extents,
                .unit = unit,
                .unit_exponent = exponent,
        };

        return hidinput_calc_abs_res(&field, ABS_X);
}

static void wacom_hid_usage_quirk(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_features *features = &wacom->wacom_wac.features;
        unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);

        /*
         * The Dell Canvas 27 needs to be switched to its vendor-defined
         * report to provide the best resolution.
         */
        if (hdev->vendor == USB_VENDOR_ID_WACOM &&
            hdev->product == 0x4200 &&
            field->application == HID_UP_MSVENDOR) {
                wacom->wacom_wac.mode_report = field->report->id;
                wacom->wacom_wac.mode_value = 2;
        }

        /*
         * ISDv4 devices which predate HID's adoption of the
         * HID_DG_BARELSWITCH2 usage use 0x000D0000 in its
         * position instead. We can accurately detect if a
         * usage with that value should be HID_DG_BARRELSWITCH2
         * based on the surrounding usages, which have remained
         * constant across generations.
         */
        if (features->type == HID_GENERIC &&
            usage->hid == 0x000D0000 &&
            field->application == HID_DG_PEN &&
            field->physical == HID_DG_STYLUS) {
                int i = usage->usage_index;

                if (i-4 >= 0 && i+1 < field->maxusage &&
                    field->usage[i-4].hid == HID_DG_TIPSWITCH &&
                    field->usage[i-3].hid == HID_DG_BARRELSWITCH &&
                    field->usage[i-2].hid == HID_DG_ERASER &&
                    field->usage[i-1].hid == HID_DG_INVERT &&
                    field->usage[i+1].hid == HID_DG_INRANGE) {
                        usage->hid = HID_DG_BARRELSWITCH2;
                }
        }

        /*
         * Wacom's AES devices use different vendor-defined usages to
         * report serial number information compared to their branded
         * hardware. The usages are also sometimes ill-defined and do
         * not have the correct logical min/max values set. Lets patch
         * the descriptor to use the branded usage convention and fix
         * the errors.
         */
        if (usage->hid == WACOM_HID_WT_SERIALNUMBER &&
            field->report_size == 16 &&
            field->index + 2 < field->report->maxfield) {
                struct hid_field *a = field->report->field[field->index + 1];
                struct hid_field *b = field->report->field[field->index + 2];

                if (a->maxusage > 0 &&
                    a->usage[0].hid == HID_DG_TOOLSERIALNUMBER &&
                    a->report_size == 32 &&
                    b->maxusage > 0 &&
                    b->usage[0].hid == 0xFF000000 &&
                    b->report_size == 8) {
                        features->quirks |= WACOM_QUIRK_AESPEN;
                        usage->hid = WACOM_HID_WD_TOOLTYPE;
                        field->logical_minimum = S16_MIN;
                        field->logical_maximum = S16_MAX;
                        a->logical_minimum = S32_MIN;
                        a->logical_maximum = S32_MAX;
                        b->usage[0].hid = WACOM_HID_WD_SERIALHI;
                        b->logical_minimum = 0;
                        b->logical_maximum = U8_MAX;
                }
        }

        /* 2nd-generation Intuos Pro Large has incorrect Y maximum */
        if (hdev->vendor == USB_VENDOR_ID_WACOM &&
            hdev->product == 0x0358 &&
            WACOM_PEN_FIELD(field) &&
            equivalent_usage == HID_GD_Y) {
                field->logical_maximum = 43200;
        }
}

static void wacom_feature_mapping(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_features *features = &wacom->wacom_wac.features;
        struct hid_data *hid_data = &wacom->wacom_wac.hid_data;
        unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid);
        u8 *data;
        int ret;
        u32 n;

        wacom_hid_usage_quirk(hdev, field, usage);

        switch (equivalent_usage) {
        case WACOM_HID_WD_TOUCH_RING_SETTING:
                wacom->generic_has_leds = true;
                break;
        case HID_DG_CONTACTMAX:
                /* leave touch_max as is if predefined */
                if (!features->touch_max) {
                        /* read manually */
                        n = hid_report_len(field->report);
                        data = hid_alloc_report_buf(field->report, GFP_KERNEL);
                        if (!data)
                                break;
                        data[0] = field->report->id;
                        ret = wacom_get_report(hdev, HID_FEATURE_REPORT,
                                               data, n, WAC_CMD_RETRIES);
                        if (ret == n && features->type == HID_GENERIC) {
                                ret = hid_report_raw_event(hdev,
                                        HID_FEATURE_REPORT, data, n, 0);
                        } else if (ret == 2 && features->type != HID_GENERIC) {
                                features->touch_max = data[1];
                        } else {
                                features->touch_max = 16;
                                hid_warn(hdev, "wacom_feature_mapping: "
                                         "could not get HID_DG_CONTACTMAX, "
                                         "defaulting to %d\n",
                                          features->touch_max);
                        }
                        kfree(data);
                }
                break;
        case HID_DG_INPUTMODE:
                /* Ignore if value index is out of bounds. */
                if (usage->usage_index >= field->report_count) {
                        dev_err(&hdev->dev, "HID_DG_INPUTMODE out of range\n");
                        break;
                }

                hid_data->inputmode = field->report->id;
                hid_data->inputmode_index = usage->usage_index;
                break;

        case HID_UP_DIGITIZER:
                if (field->report->id == 0x0B &&
                    (field->application == WACOM_HID_G9_PEN ||
                     field->application == WACOM_HID_G11_PEN)) {
                        wacom->wacom_wac.mode_report = field->report->id;
                        wacom->wacom_wac.mode_value = 0;
                }
                break;

        case WACOM_HID_WD_DATAMODE:
                wacom->wacom_wac.mode_report = field->report->id;
                wacom->wacom_wac.mode_value = 2;
                break;

        case WACOM_HID_UP_G9:
        case WACOM_HID_UP_G11:
                if (field->report->id == 0x03 &&
                    (field->application == WACOM_HID_G9_TOUCHSCREEN ||
                     field->application == WACOM_HID_G11_TOUCHSCREEN)) {
                        wacom->wacom_wac.mode_report = field->report->id;
                        wacom->wacom_wac.mode_value = 0;
                }
                break;
        case WACOM_HID_WD_OFFSETLEFT:
        case WACOM_HID_WD_OFFSETTOP:
        case WACOM_HID_WD_OFFSETRIGHT:
        case WACOM_HID_WD_OFFSETBOTTOM:
                /* read manually */
                n = hid_report_len(field->report);
                data = hid_alloc_report_buf(field->report, GFP_KERNEL);
                if (!data)
                        break;
                data[0] = field->report->id;
                ret = wacom_get_report(hdev, HID_FEATURE_REPORT,
                                        data, n, WAC_CMD_RETRIES);
                if (ret == n) {
                        ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT,
                                                   data, n, 0);
                } else {
                        hid_warn(hdev, "%s: could not retrieve sensor offsets\n",
                                 __func__);
                }
                kfree(data);
                break;
        }
}

/*
 * Interface Descriptor of wacom devices can be incomplete and
 * inconsistent so wacom_features table is used to store stylus
 * device's packet lengths, various maximum values, and tablet
 * resolution based on product ID's.
 *
 * For devices that contain 2 interfaces, wacom_features table is
 * inaccurate for the touch interface.  Since the Interface Descriptor
 * for touch interfaces has pretty complete data, this function exists
 * to query tablet for this missing information instead of hard coding in
 * an additional table.
 *
 * A typical Interface Descriptor for a stylus will contain a
 * boot mouse application collection that is not of interest and this
 * function will ignore it.
 *
 * It also contains a digitizer application collection that also is not
 * of interest since any information it contains would be duplicate
 * of what is in wacom_features. Usually it defines a report of an array
 * of bytes that could be used as max length of the stylus packet returned.
 * If it happens to define a Digitizer-Stylus Physical Collection then
 * the X and Y logical values contain valid data but it is ignored.
 *
 * A typical Interface Descriptor for a touch interface will contain a
 * Digitizer-Finger Physical Collection which will define both logical
 * X/Y maximum as well as the physical size of tablet. Since touch
 * interfaces haven't supported pressure or distance, this is enough
 * information to override invalid values in the wacom_features table.
 *
 * Intuos5 touch interface and 3rd gen Bamboo Touch do not contain useful
 * data. We deal with them after returning from this function.
 */
static void wacom_usage_mapping(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_features *features = &wacom->wacom_wac.features;
        bool finger = WACOM_FINGER_FIELD(field);
        bool pen = WACOM_PEN_FIELD(field);
        unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);

        /*
        * Requiring Stylus Usage will ignore boot mouse
        * X/Y values and some cases of invalid Digitizer X/Y
        * values commonly reported.
        */
        if (pen)
                features->device_type |= WACOM_DEVICETYPE_PEN;
        else if (finger)
                features->device_type |= WACOM_DEVICETYPE_TOUCH;
        else
                return;

        wacom_hid_usage_quirk(hdev, field, usage);

        switch (equivalent_usage) {
        case HID_GD_X:
                features->x_max = field->logical_maximum;
                if (finger) {
                        features->x_phy = field->physical_maximum;
                        if ((features->type != BAMBOO_PT) &&
                            (features->type != BAMBOO_TOUCH)) {
                                features->unit = field->unit;
                                features->unitExpo = field->unit_exponent;
                        }
                }
                break;
        case HID_GD_Y:
                features->y_max = field->logical_maximum;
                if (finger) {
                        features->y_phy = field->physical_maximum;
                        if ((features->type != BAMBOO_PT) &&
                            (features->type != BAMBOO_TOUCH)) {
                                features->unit = field->unit;
                                features->unitExpo = field->unit_exponent;
                        }
                }
                break;
        case HID_DG_TIPPRESSURE:
                if (pen)
                        features->pressure_max = field->logical_maximum;
                break;
        }

        if (features->type == HID_GENERIC)
                wacom_wac_usage_mapping(hdev, field, usage);
}

static void wacom_post_parse_hid(struct hid_device *hdev,
                                 struct wacom_features *features)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;

        if (features->type == HID_GENERIC) {
                /* Any last-minute generic device setup */
                if (wacom_wac->has_mode_change) {
                        if (wacom_wac->is_direct_mode)
                                features->device_type |= WACOM_DEVICETYPE_DIRECT;
                        else
                                features->device_type &= ~WACOM_DEVICETYPE_DIRECT;
                }

                if (features->touch_max > 1) {
                        if (features->device_type & WACOM_DEVICETYPE_DIRECT)
                                input_mt_init_slots(wacom_wac->touch_input,
                                                    wacom_wac->features.touch_max,
                                                    INPUT_MT_DIRECT);
                        else
                                input_mt_init_slots(wacom_wac->touch_input,
                                                    wacom_wac->features.touch_max,
                                                    INPUT_MT_POINTER);
                }
        }
}

static void wacom_parse_hid(struct hid_device *hdev,
                           struct wacom_features *features)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *hreport;
        int i, j;

        /* check features first */
        rep_enum = &hdev->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(hreport, &rep_enum->report_list, list) {
                for (i = 0; i < hreport->maxfield; i++) {
                        /* Ignore if report count is out of bounds. */
                        if (hreport->field[i]->report_count < 1)
                                continue;

                        for (j = 0; j < hreport->field[i]->maxusage; j++) {
                                wacom_feature_mapping(hdev, hreport->field[i],
                                                hreport->field[i]->usage + j);
                        }
                }
        }

        /* now check the input usages */
        rep_enum = &hdev->report_enum[HID_INPUT_REPORT];
        list_for_each_entry(hreport, &rep_enum->report_list, list) {

                if (!hreport->maxfield)
                        continue;

                for (i = 0; i < hreport->maxfield; i++)
                        for (j = 0; j < hreport->field[i]->maxusage; j++)
                                wacom_usage_mapping(hdev, hreport->field[i],
                                                hreport->field[i]->usage + j);
        }

        wacom_post_parse_hid(hdev, features);
}

static int wacom_hid_set_device_mode(struct hid_device *hdev)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct hid_data *hid_data = &wacom->wacom_wac.hid_data;
        struct hid_report *r;
        struct hid_report_enum *re;

        if (hid_data->inputmode < 0)
                return 0;

        re = &(hdev->report_enum[HID_FEATURE_REPORT]);
        r = re->report_id_hash[hid_data->inputmode];
        if (r) {
                r->field[0]->value[hid_data->inputmode_index] = 2;
                hid_hw_request(hdev, r, HID_REQ_SET_REPORT);
        }
        return 0;
}

static int wacom_set_device_mode(struct hid_device *hdev,
                                 struct wacom_wac *wacom_wac)
{
        u8 *rep_data;
        struct hid_report *r;
        struct hid_report_enum *re;
        u32 length;
        int error = -ENOMEM, limit = 0;

        if (wacom_wac->mode_report < 0)
                return 0;

        re = &(hdev->report_enum[HID_FEATURE_REPORT]);
        r = re->report_id_hash[wacom_wac->mode_report];
        if (!r)
                return -EINVAL;

        rep_data = hid_alloc_report_buf(r, GFP_KERNEL);
        if (!rep_data)
                return -ENOMEM;

        length = hid_report_len(r);

        do {
                rep_data[0] = wacom_wac->mode_report;
                rep_data[1] = wacom_wac->mode_value;

                error = wacom_set_report(hdev, HID_FEATURE_REPORT, rep_data,
                                         length, 1);
                if (error >= 0)
                        error = wacom_get_report(hdev, HID_FEATURE_REPORT,
                                                 rep_data, length, 1);
        } while (error >= 0 &&
                 rep_data[1] != wacom_wac->mode_report &&
                 limit++ < WAC_MSG_RETRIES);

        kfree(rep_data);

        return error < 0 ? error : 0;
}

static int wacom_bt_query_tablet_data(struct hid_device *hdev, u8 speed,
                struct wacom_features *features)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        int ret;
        u8 rep_data[2];

        switch (features->type) {
        case GRAPHIRE_BT:
                rep_data[0] = 0x03;
                rep_data[1] = 0x00;
                ret = wacom_set_report(hdev, HID_FEATURE_REPORT, rep_data, 2,
                                        3);

                if (ret >= 0) {
                        rep_data[0] = speed == 0 ? 0x05 : 0x06;
                        rep_data[1] = 0x00;

                        ret = wacom_set_report(hdev, HID_FEATURE_REPORT,
                                                rep_data, 2, 3);

                        if (ret >= 0) {
                                wacom->wacom_wac.bt_high_speed = speed;
                                return 0;
                        }
                }

                /*
                 * Note that if the raw queries fail, it's not a hard failure
                 * and it is safe to continue
                 */
                hid_warn(hdev, "failed to poke device, command %d, err %d\n",
                         rep_data[0], ret);
                break;
        case INTUOS4WL:
                if (speed == 1)
                        wacom->wacom_wac.bt_features &= ~0x20;
                else
                        wacom->wacom_wac.bt_features |= 0x20;

                rep_data[0] = 0x03;
                rep_data[1] = wacom->wacom_wac.bt_features;

                ret = wacom_set_report(hdev, HID_FEATURE_REPORT, rep_data, 2,
                                        1);
                if (ret >= 0)
                        wacom->wacom_wac.bt_high_speed = speed;
                break;
        }

        return 0;
}

/*
 * Switch the tablet into its most-capable mode. Wacom tablets are
 * typically configured to power-up in a mode which sends mouse-like
 * reports to the OS. To get absolute position, pressure data, etc.
 * from the tablet, it is necessary to switch the tablet out of this
 * mode and into one which sends the full range of tablet data.
 */
static int _wacom_query_tablet_data(struct wacom *wacom)
{
        struct hid_device *hdev = wacom->hdev;
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;

        if (hdev->bus == BUS_BLUETOOTH)
                return wacom_bt_query_tablet_data(hdev, 1, features);

        if (features->type != HID_GENERIC) {
                if (features->device_type & WACOM_DEVICETYPE_TOUCH) {
                        if (features->type > TABLETPC) {
                                /* MT Tablet PC touch */
                                wacom_wac->mode_report = 3;
                                wacom_wac->mode_value = 4;
                        } else if (features->type == WACOM_24HDT) {
                                wacom_wac->mode_report = 18;
                                wacom_wac->mode_value = 2;
                        } else if (features->type == WACOM_27QHDT) {
                                wacom_wac->mode_report = 131;
                                wacom_wac->mode_value = 2;
                        } else if (features->type == BAMBOO_PAD) {
                                wacom_wac->mode_report = 2;
                                wacom_wac->mode_value = 2;
                        }
                } else if (features->device_type & WACOM_DEVICETYPE_PEN) {
                        if (features->type <= BAMBOO_PT) {
                                wacom_wac->mode_report = 2;
                                wacom_wac->mode_value = 2;
                        }
                }
        }

        wacom_set_device_mode(hdev, wacom_wac);

        if (features->type == HID_GENERIC)
                return wacom_hid_set_device_mode(hdev);

        return 0;
}

static void wacom_retrieve_hid_descriptor(struct hid_device *hdev,
                                         struct wacom_features *features)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct usb_interface *intf = wacom->intf;

        /* default features */
        features->x_fuzz = 4;
        features->y_fuzz = 4;
        features->pressure_fuzz = 0;
        features->distance_fuzz = 1;
        features->tilt_fuzz = 1;

        /*
         * The wireless device HID is basic and layout conflicts with
         * other tablets (monitor and touch interface can look like pen).
         * Skip the query for this type and modify defaults based on
         * interface number.
         */
        if (features->type == WIRELESS && intf) {
                if (intf->cur_altsetting->desc.bInterfaceNumber == 0)
                        features->device_type = WACOM_DEVICETYPE_WL_MONITOR;
                else
                        features->device_type = WACOM_DEVICETYPE_NONE;
                return;
        }

        wacom_parse_hid(hdev, features);
}

struct wacom_hdev_data {
        struct list_head list;
        struct kref kref;
        struct hid_device *dev;
        struct wacom_shared shared;
};

static LIST_HEAD(wacom_udev_list);
static DEFINE_MUTEX(wacom_udev_list_lock);

static bool wacom_are_sibling(struct hid_device *hdev,
                struct hid_device *sibling)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_features *features = &wacom->wacom_wac.features;
        struct wacom *sibling_wacom = hid_get_drvdata(sibling);
        struct wacom_features *sibling_features = &sibling_wacom->wacom_wac.features;
        __u32 oVid = features->oVid ? features->oVid : hdev->vendor;
        __u32 oPid = features->oPid ? features->oPid : hdev->product;

        /* The defined oVid/oPid must match that of the sibling */
        if (features->oVid != HID_ANY_ID && sibling->vendor != oVid)
                return false;
        if (features->oPid != HID_ANY_ID && sibling->product != oPid)
                return false;

        /*
         * Devices with the same VID/PID must share the same physical
         * device path, while those with different VID/PID must share
         * the same physical parent device path.
         */
        if (hdev->vendor == sibling->vendor && hdev->product == sibling->product) {
                if (!hid_compare_device_paths(hdev, sibling, '/'))
                        return false;
        } else {
                if (!hid_compare_device_paths(hdev, sibling, '.'))
                        return false;
        }

        /* Skip the remaining heuristics unless you are a HID_GENERIC device */
        if (features->type != HID_GENERIC)
                return true;

        /*
         * Direct-input devices may not be siblings of indirect-input
         * devices.
         */
        if ((features->device_type & WACOM_DEVICETYPE_DIRECT) &&
            !(sibling_features->device_type & WACOM_DEVICETYPE_DIRECT))
                return false;

        /*
         * Indirect-input devices may not be siblings of direct-input
         * devices.
         */
        if (!(features->device_type & WACOM_DEVICETYPE_DIRECT) &&
            (sibling_features->device_type & WACOM_DEVICETYPE_DIRECT))
                return false;

        /* Pen devices may only be siblings of touch devices */
        if ((features->device_type & WACOM_DEVICETYPE_PEN) &&
            !(sibling_features->device_type & WACOM_DEVICETYPE_TOUCH))
                return false;

        /* Touch devices may only be siblings of pen devices */
        if ((features->device_type & WACOM_DEVICETYPE_TOUCH) &&
            !(sibling_features->device_type & WACOM_DEVICETYPE_PEN))
                return false;

        /*
         * No reason could be found for these two devices to NOT be
         * siblings, so there's a good chance they ARE siblings
         */
        return true;
}

static struct wacom_hdev_data *wacom_get_hdev_data(struct hid_device *hdev)
{
        struct wacom_hdev_data *data;

        /* Try to find an already-probed interface from the same device */
        list_for_each_entry(data, &wacom_udev_list, list) {
                if (hid_compare_device_paths(hdev, data->dev, '/')) {
                        kref_get(&data->kref);
                        return data;
                }
        }

        /* Fallback to finding devices that appear to be "siblings" */
        list_for_each_entry(data, &wacom_udev_list, list) {
                if (wacom_are_sibling(hdev, data->dev)) {
                        kref_get(&data->kref);
                        return data;
                }
        }

        return NULL;
}

static void wacom_release_shared_data(struct kref *kref)
{
        struct wacom_hdev_data *data =
                container_of(kref, struct wacom_hdev_data, kref);

        mutex_lock(&wacom_udev_list_lock);
        list_del(&data->list);
        mutex_unlock(&wacom_udev_list_lock);

        kfree(data);
}

static void wacom_remove_shared_data(void *res)
{
        struct wacom *wacom = res;
        struct wacom_hdev_data *data;
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;

        if (wacom_wac->shared) {
                data = container_of(wacom_wac->shared, struct wacom_hdev_data,
                                    shared);

                if (wacom_wac->shared->touch == wacom->hdev)
                        wacom_wac->shared->touch = NULL;
                else if (wacom_wac->shared->pen == wacom->hdev)
                        wacom_wac->shared->pen = NULL;

                kref_put(&data->kref, wacom_release_shared_data);
                wacom_wac->shared = NULL;
        }
}

static int wacom_add_shared_data(struct hid_device *hdev)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_hdev_data *data;
        int retval = 0;

        mutex_lock(&wacom_udev_list_lock);

        data = wacom_get_hdev_data(hdev);
        if (!data) {
                data = kzalloc(sizeof(struct wacom_hdev_data), GFP_KERNEL);
                if (!data) {
                        mutex_unlock(&wacom_udev_list_lock);
                        return -ENOMEM;
                }

                kref_init(&data->kref);
                data->dev = hdev;
                list_add_tail(&data->list, &wacom_udev_list);
        }

        mutex_unlock(&wacom_udev_list_lock);

        wacom_wac->shared = &data->shared;

        retval = devm_add_action_or_reset(&hdev->dev, wacom_remove_shared_data, wacom);
        if (retval)
                return retval;

        if (wacom_wac->features.device_type & WACOM_DEVICETYPE_TOUCH)
                wacom_wac->shared->touch = hdev;
        else if (wacom_wac->features.device_type & WACOM_DEVICETYPE_PEN)
                wacom_wac->shared->pen = hdev;

        return retval;
}

static int wacom_led_control(struct wacom *wacom)
{
        unsigned char *buf;
        int retval;
        unsigned char report_id = WAC_CMD_LED_CONTROL;
        int buf_size = 9;

        if (!wacom->led.groups)
                return -ENOTSUPP;

        if (wacom->wacom_wac.features.type == REMOTE)
                return -ENOTSUPP;

        if (wacom->wacom_wac.pid) { /* wireless connected */
                report_id = WAC_CMD_WL_LED_CONTROL;
                buf_size = 13;
        }
        else if (wacom->wacom_wac.features.type == INTUOSP2_BT) {
                report_id = WAC_CMD_WL_INTUOSP2;
                buf_size = 51;
        }
        buf = kzalloc(buf_size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        if (wacom->wacom_wac.features.type == HID_GENERIC) {
                buf[0] = WAC_CMD_LED_CONTROL_GENERIC;
                buf[1] = wacom->led.llv;
                buf[2] = wacom->led.groups[0].select & 0x03;

        } else if ((wacom->wacom_wac.features.type >= INTUOS5S &&
            wacom->wacom_wac.features.type <= INTUOSPL)) {
                /*
                 * Touch Ring and crop mark LED luminance may take on
                 * one of four values:
                 *    0 = Low; 1 = Medium; 2 = High; 3 = Off
                 */
                int ring_led = wacom->led.groups[0].select & 0x03;
                int ring_lum = (((wacom->led.llv & 0x60) >> 5) - 1) & 0x03;
                int crop_lum = 0;
                unsigned char led_bits = (crop_lum << 4) | (ring_lum << 2) | (ring_led);

                buf[0] = report_id;
                if (wacom->wacom_wac.pid) {
                        wacom_get_report(wacom->hdev, HID_FEATURE_REPORT,
                                         buf, buf_size, WAC_CMD_RETRIES);
                        buf[0] = report_id;
                        buf[4] = led_bits;
                } else
                        buf[1] = led_bits;
        }
        else if (wacom->wacom_wac.features.type == INTUOSP2_BT) {
                buf[0] = report_id;
                buf[4] = 100; // Power Connection LED (ORANGE)
                buf[5] = 100; // BT Connection LED (BLUE)
                buf[6] = 100; // Paper Mode (RED?)
                buf[7] = 100; // Paper Mode (GREEN?)
                buf[8] = 100; // Paper Mode (BLUE?)
                buf[9] = wacom->led.llv;
                buf[10] = wacom->led.groups[0].select & 0x03;
        }
        else {
                int led = wacom->led.groups[0].select | 0x4;

                if (wacom->wacom_wac.features.type == WACOM_21UX2 ||
                    wacom->wacom_wac.features.type == WACOM_24HD)
                        led |= (wacom->led.groups[1].select << 4) | 0x40;

                buf[0] = report_id;
                buf[1] = led;
                buf[2] = wacom->led.llv;
                buf[3] = wacom->led.hlv;
                buf[4] = wacom->led.img_lum;
        }

        retval = wacom_set_report(wacom->hdev, HID_FEATURE_REPORT, buf, buf_size,
                                  WAC_CMD_RETRIES);
        kfree(buf);

        return retval;
}

static int wacom_led_putimage(struct wacom *wacom, int button_id, u8 xfer_id,
                const unsigned len, const void *img)
{
        unsigned char *buf;
        int i, retval;
        const unsigned chunk_len = len / 4; /* 4 chunks are needed to be sent */

        buf = kzalloc(chunk_len + 3 , GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        /* Send 'start' command */
        buf[0] = WAC_CMD_ICON_START;
        buf[1] = 1;
        retval = wacom_set_report(wacom->hdev, HID_FEATURE_REPORT, buf, 2,
                                  WAC_CMD_RETRIES);
        if (retval < 0)
                goto out;

        buf[0] = xfer_id;
        buf[1] = button_id & 0x07;
        for (i = 0; i < 4; i++) {
                buf[2] = i;
                memcpy(buf + 3, img + i * chunk_len, chunk_len);

                retval = wacom_set_report(wacom->hdev, HID_FEATURE_REPORT,
                                          buf, chunk_len + 3, WAC_CMD_RETRIES);
                if (retval < 0)
                        break;
        }

        /* Send 'stop' */
        buf[0] = WAC_CMD_ICON_START;
        buf[1] = 0;
        wacom_set_report(wacom->hdev, HID_FEATURE_REPORT, buf, 2,
                         WAC_CMD_RETRIES);

out:
        kfree(buf);
        return retval;
}

static ssize_t wacom_led_select_store(struct device *dev, int set_id,
                                      const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
        unsigned int id;
        int err;

        err = kstrtouint(buf, 10, &id);
        if (err)
                return err;

        mutex_lock(&wacom->lock);

        wacom->led.groups[set_id].select = id & 0x3;
        err = wacom_led_control(wacom);

        mutex_unlock(&wacom->lock);

        return err < 0 ? err : count;
}

#define DEVICE_LED_SELECT_ATTR(SET_ID)                                        \
static ssize_t wacom_led##SET_ID##_select_store(struct device *dev,        \
        struct device_attribute *attr, const char *buf, size_t count)        \
{                                                                        \
        return wacom_led_select_store(dev, SET_ID, buf, count);                \
}                                                                        \
static ssize_t wacom_led##SET_ID##_select_show(struct device *dev,        \
        struct device_attribute *attr, char *buf)                        \
{                                                                        \
        struct hid_device *hdev = to_hid_device(dev);\
        struct wacom *wacom = hid_get_drvdata(hdev);                        \
        return scnprintf(buf, PAGE_SIZE, "%d\n",                        \
                         wacom->led.groups[SET_ID].select);                \
}                                                                        \
static DEVICE_ATTR(status_led##SET_ID##_select, DEV_ATTR_RW_PERM,        \
                    wacom_led##SET_ID##_select_show,                        \
                    wacom_led##SET_ID##_select_store)

DEVICE_LED_SELECT_ATTR(0);
DEVICE_LED_SELECT_ATTR(1);

static ssize_t wacom_luminance_store(struct wacom *wacom, u8 *dest,
                                     const char *buf, size_t count)
{
        unsigned int value;
        int err;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        mutex_lock(&wacom->lock);

        *dest = value & 0x7f;
        err = wacom_led_control(wacom);

        mutex_unlock(&wacom->lock);

        return err < 0 ? err : count;
}

#define DEVICE_LUMINANCE_ATTR(name, field)                                \
static ssize_t wacom_##name##_luminance_store(struct device *dev,        \
        struct device_attribute *attr, const char *buf, size_t count)        \
{                                                                        \
        struct hid_device *hdev = to_hid_device(dev);\
        struct wacom *wacom = hid_get_drvdata(hdev);                        \
                                                                        \
        return wacom_luminance_store(wacom, &wacom->led.field,                \
                                     buf, count);                        \
}                                                                        \
static ssize_t wacom_##name##_luminance_show(struct device *dev,        \
        struct device_attribute *attr, char *buf)                        \
{                                                                        \
        struct wacom *wacom = dev_get_drvdata(dev);                        \
        return scnprintf(buf, PAGE_SIZE, "%d\n", wacom->led.field);        \
}                                                                        \
static DEVICE_ATTR(name##_luminance, DEV_ATTR_RW_PERM,                        \
                   wacom_##name##_luminance_show,                        \
                   wacom_##name##_luminance_store)

DEVICE_LUMINANCE_ATTR(status0, llv);
DEVICE_LUMINANCE_ATTR(status1, hlv);
DEVICE_LUMINANCE_ATTR(buttons, img_lum);

static ssize_t wacom_button_image_store(struct device *dev, int button_id,
                                        const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
        int err;
        unsigned len;
        u8 xfer_id;

        if (hdev->bus == BUS_BLUETOOTH) {
                len = 256;
                xfer_id = WAC_CMD_ICON_BT_XFER;
        } else {
                len = 1024;
                xfer_id = WAC_CMD_ICON_XFER;
        }

        if (count != len)
                return -EINVAL;

        mutex_lock(&wacom->lock);

        err = wacom_led_putimage(wacom, button_id, xfer_id, len, buf);

        mutex_unlock(&wacom->lock);

        return err < 0 ? err : count;
}

#define DEVICE_BTNIMG_ATTR(BUTTON_ID)                                        \
static ssize_t wacom_btnimg##BUTTON_ID##_store(struct device *dev,        \
        struct device_attribute *attr, const char *buf, size_t count)        \
{                                                                        \
        return wacom_button_image_store(dev, BUTTON_ID, buf, count);        \
}                                                                        \
static DEVICE_ATTR(button##BUTTON_ID##_rawimg, DEV_ATTR_WO_PERM,        \
                   NULL, wacom_btnimg##BUTTON_ID##_store)

DEVICE_BTNIMG_ATTR(0);
DEVICE_BTNIMG_ATTR(1);
DEVICE_BTNIMG_ATTR(2);
DEVICE_BTNIMG_ATTR(3);
DEVICE_BTNIMG_ATTR(4);
DEVICE_BTNIMG_ATTR(5);
DEVICE_BTNIMG_ATTR(6);
DEVICE_BTNIMG_ATTR(7);

static struct attribute *cintiq_led_attrs[] = {
        &dev_attr_status_led0_select.attr,
        &dev_attr_status_led1_select.attr,
        NULL
};

static const struct attribute_group cintiq_led_attr_group = {
        .name = "wacom_led",
        .attrs = cintiq_led_attrs,
};

static struct attribute *intuos4_led_attrs[] = {
        &dev_attr_status0_luminance.attr,
        &dev_attr_status1_luminance.attr,
        &dev_attr_status_led0_select.attr,
        &dev_attr_buttons_luminance.attr,
        &dev_attr_button0_rawimg.attr,
        &dev_attr_button1_rawimg.attr,
        &dev_attr_button2_rawimg.attr,
        &dev_attr_button3_rawimg.attr,
        &dev_attr_button4_rawimg.attr,
        &dev_attr_button5_rawimg.attr,
        &dev_attr_button6_rawimg.attr,
        &dev_attr_button7_rawimg.attr,
        NULL
};

static const struct attribute_group intuos4_led_attr_group = {
        .name = "wacom_led",
        .attrs = intuos4_led_attrs,
};

static struct attribute *intuos5_led_attrs[] = {
        &dev_attr_status0_luminance.attr,
        &dev_attr_status_led0_select.attr,
        NULL
};

static const struct attribute_group intuos5_led_attr_group = {
        .name = "wacom_led",
        .attrs = intuos5_led_attrs,
};

static struct attribute *generic_led_attrs[] = {
        &dev_attr_status0_luminance.attr,
        &dev_attr_status_led0_select.attr,
        NULL
};

static const struct attribute_group generic_led_attr_group = {
        .name = "wacom_led",
        .attrs = generic_led_attrs,
};

struct wacom_sysfs_group_devres {
        const struct attribute_group *group;
        struct kobject *root;
};

static void wacom_devm_sysfs_group_release(struct device *dev, void *res)
{
        struct wacom_sysfs_group_devres *devres = res;
        struct kobject *kobj = devres->root;

        dev_dbg(dev, "%s: dropping reference to %s\n",
                __func__, devres->group->name);
        sysfs_remove_group(kobj, devres->group);
}

static int __wacom_devm_sysfs_create_group(struct wacom *wacom,
                                           struct kobject *root,
                                           const struct attribute_group *group)
{
        struct wacom_sysfs_group_devres *devres;
        int error;

        devres = devres_alloc(wacom_devm_sysfs_group_release,
                              sizeof(struct wacom_sysfs_group_devres),
                              GFP_KERNEL);
        if (!devres)
                return -ENOMEM;

        devres->group = group;
        devres->root = root;

        error = sysfs_create_group(devres->root, group);
        if (error) {
                devres_free(devres);
                return error;
        }

        devres_add(&wacom->hdev->dev, devres);

        return 0;
}

static int wacom_devm_sysfs_create_group(struct wacom *wacom,
                                         const struct attribute_group *group)
{
        return __wacom_devm_sysfs_create_group(wacom, &wacom->hdev->dev.kobj,
                                               group);
}

static void wacom_devm_kfifo_release(struct device *dev, void *res)
{
        struct kfifo_rec_ptr_2 *devres = res;

        kfifo_free(devres);
}

static int wacom_devm_kfifo_alloc(struct wacom *wacom)
{
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct kfifo_rec_ptr_2 *pen_fifo;
        int error;

        pen_fifo = devres_alloc(wacom_devm_kfifo_release,
                              sizeof(struct kfifo_rec_ptr_2),
                              GFP_KERNEL);

        if (!pen_fifo)
                return -ENOMEM;

        error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
        if (error) {
                devres_free(pen_fifo);
                return error;
        }

        devres_add(&wacom->hdev->dev, pen_fifo);
        wacom_wac->pen_fifo = pen_fifo;

        return 0;
}

enum led_brightness wacom_leds_brightness_get(struct wacom_led *led)
{
        struct wacom *wacom = led->wacom;

        if (wacom->led.max_hlv)
                return led->hlv * LED_FULL / wacom->led.max_hlv;

        if (wacom->led.max_llv)
                return led->llv * LED_FULL / wacom->led.max_llv;

        /* device doesn't support brightness tuning */
        return LED_FULL;
}

static enum led_brightness __wacom_led_brightness_get(struct led_classdev *cdev)
{
        struct wacom_led *led = container_of(cdev, struct wacom_led, cdev);
        struct wacom *wacom = led->wacom;

        if (wacom->led.groups[led->group].select != led->id)
                return LED_OFF;

        return wacom_leds_brightness_get(led);
}

static int wacom_led_brightness_set(struct led_classdev *cdev,
                                    enum led_brightness brightness)
{
        struct wacom_led *led = container_of(cdev, struct wacom_led, cdev);
        struct wacom *wacom = led->wacom;
        int error;

        mutex_lock(&wacom->lock);

        if (!wacom->led.groups || (brightness == LED_OFF &&
            wacom->led.groups[led->group].select != led->id)) {
                error = 0;
                goto out;
        }

        led->llv = wacom->led.llv = wacom->led.max_llv * brightness / LED_FULL;
        led->hlv = wacom->led.hlv = wacom->led.max_hlv * brightness / LED_FULL;

        wacom->led.groups[led->group].select = led->id;

        error = wacom_led_control(wacom);

out:
        mutex_unlock(&wacom->lock);

        return error;
}

static void wacom_led_readonly_brightness_set(struct led_classdev *cdev,
                                               enum led_brightness brightness)
{
}

static int wacom_led_register_one(struct device *dev, struct wacom *wacom,
                                  struct wacom_led *led, unsigned int group,
                                  unsigned int id, bool read_only)
{
        int error;
        char *name;

        name = devm_kasprintf(dev, GFP_KERNEL,
                              "%s::wacom-%d.%d",
                              dev_name(dev),
                              group,
                              id);
        if (!name)
                return -ENOMEM;

        if (!read_only) {
                led->trigger.name = name;
                error = devm_led_trigger_register(dev, &led->trigger);
                if (error) {
                        hid_err(wacom->hdev,
                                "failed to register LED trigger %s: %d\n",
                                led->cdev.name, error);
                        return error;
                }
        }

        led->group = group;
        led->id = id;
        led->wacom = wacom;
        led->llv = wacom->led.llv;
        led->hlv = wacom->led.hlv;
        led->cdev.name = name;
        led->cdev.max_brightness = LED_FULL;
        led->cdev.flags = LED_HW_PLUGGABLE;
        led->cdev.brightness_get = __wacom_led_brightness_get;
        if (!read_only) {
                led->cdev.brightness_set_blocking = wacom_led_brightness_set;
                led->cdev.default_trigger = led->cdev.name;
        } else {
                led->cdev.brightness_set = wacom_led_readonly_brightness_set;
        }

        error = devm_led_classdev_register(dev, &led->cdev);
        if (error) {
                hid_err(wacom->hdev,
                        "failed to register LED %s: %d\n",
                        led->cdev.name, error);
                led->cdev.name = NULL;
                return error;
        }

        return 0;
}

static void wacom_led_groups_release_one(void *data)
{
        struct wacom_group_leds *group = data;

        devres_release_group(group->dev, group);
}

static int wacom_led_groups_alloc_and_register_one(struct device *dev,
                                                   struct wacom *wacom,
                                                   int group_id, int count,
                                                   bool read_only)
{
        struct wacom_led *leds;
        int i, error;

        if (group_id >= wacom->led.count || count <= 0)
                return -EINVAL;

        if (!devres_open_group(dev, &wacom->led.groups[group_id], GFP_KERNEL))
                return -ENOMEM;

        leds = devm_kcalloc(dev, count, sizeof(struct wacom_led), GFP_KERNEL);
        if (!leds) {
                error = -ENOMEM;
                goto err;
        }

        wacom->led.groups[group_id].leds = leds;
        wacom->led.groups[group_id].count = count;

        for (i = 0; i < count; i++) {
                error = wacom_led_register_one(dev, wacom, &leds[i],
                                               group_id, i, read_only);
                if (error)
                        goto err;
        }

        wacom->led.groups[group_id].dev = dev;

        devres_close_group(dev, &wacom->led.groups[group_id]);

        /*
         * There is a bug (?) in devm_led_classdev_register() in which its
         * increments the refcount of the parent. If the parent is an input
         * device, that means the ref count never reaches 0 when
         * devm_input_device_release() gets called.
         * This means that the LEDs are still there after disconnect.
         * Manually force the release of the group so that the leds are released
         * once we are done using them.
         */
        error = devm_add_action_or_reset(&wacom->hdev->dev,
                                         wacom_led_groups_release_one,
                                         &wacom->led.groups[group_id]);
        if (error)
                return error;

        return 0;

err:
        devres_release_group(dev, &wacom->led.groups[group_id]);
        return error;
}

struct wacom_led *wacom_led_find(struct wacom *wacom, unsigned int group_id,
                                 unsigned int id)
{
        struct wacom_group_leds *group;

        if (group_id >= wacom->led.count)
                return NULL;

        group = &wacom->led.groups[group_id];

        if (!group->leds)
                return NULL;

        id %= group->count;

        return &group->leds[id];
}

/*
 * wacom_led_next: gives the next available led with a wacom trigger.
 *
 * returns the next available struct wacom_led which has its default trigger
 * or the current one if none is available.
 */
struct wacom_led *wacom_led_next(struct wacom *wacom, struct wacom_led *cur)
{
        struct wacom_led *next_led;
        int group, next;

        if (!wacom || !cur)
                return NULL;

        group = cur->group;
        next = cur->id;

        do {
                next_led = wacom_led_find(wacom, group, ++next);
                if (!next_led || next_led == cur)
                        return next_led;
        } while (next_led->cdev.trigger != &next_led->trigger);

        return next_led;
}

static void wacom_led_groups_release(void *data)
{
        struct wacom *wacom = data;

        wacom->led.groups = NULL;
        wacom->led.count = 0;
}

static int wacom_led_groups_allocate(struct wacom *wacom, int count)
{
        struct device *dev = &wacom->hdev->dev;
        struct wacom_group_leds *groups;
        int error;

        groups = devm_kcalloc(dev, count, sizeof(struct wacom_group_leds),
                              GFP_KERNEL);
        if (!groups)
                return -ENOMEM;

        error = devm_add_action_or_reset(dev, wacom_led_groups_release, wacom);
        if (error)
                return error;

        wacom->led.groups = groups;
        wacom->led.count = count;

        return 0;
}

static int wacom_leds_alloc_and_register(struct wacom *wacom, int group_count,
                                         int led_per_group, bool read_only)
{
        struct device *dev;
        int i, error;

        if (!wacom->wacom_wac.pad_input)
                return -EINVAL;

        dev = &wacom->wacom_wac.pad_input->dev;

        error = wacom_led_groups_allocate(wacom, group_count);
        if (error)
                return error;

        for (i = 0; i < group_count; i++) {
                error = wacom_led_groups_alloc_and_register_one(dev, wacom, i,
                                                                led_per_group,
                                                                read_only);
                if (error)
                        return error;
        }

        return 0;
}

int wacom_initialize_leds(struct wacom *wacom)
{
        int error;

        if (!(wacom->wacom_wac.features.device_type & WACOM_DEVICETYPE_PAD))
                return 0;

        /* Initialize default values */
        switch (wacom->wacom_wac.features.type) {
        case HID_GENERIC:
                if (!wacom->generic_has_leds)
                        return 0;
                wacom->led.llv = 100;
                wacom->led.max_llv = 100;

                error = wacom_leds_alloc_and_register(wacom, 1, 4, false);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }

                error = wacom_devm_sysfs_create_group(wacom,
                                                      &generic_led_attr_group);
                break;

        case INTUOS4S:
        case INTUOS4:
        case INTUOS4WL:
        case INTUOS4L:
                wacom->led.llv = 10;
                wacom->led.hlv = 20;
                wacom->led.max_llv = 127;
                wacom->led.max_hlv = 127;
                wacom->led.img_lum = 10;

                error = wacom_leds_alloc_and_register(wacom, 1, 4, false);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }

                error = wacom_devm_sysfs_create_group(wacom,
                                                      &intuos4_led_attr_group);
                break;

        case WACOM_24HD:
        case WACOM_21UX2:
                wacom->led.llv = 0;
                wacom->led.hlv = 0;
                wacom->led.img_lum = 0;

                error = wacom_leds_alloc_and_register(wacom, 2, 4, false);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }

                error = wacom_devm_sysfs_create_group(wacom,
                                                      &cintiq_led_attr_group);
                break;

        case INTUOS5S:
        case INTUOS5:
        case INTUOS5L:
        case INTUOSPS:
        case INTUOSPM:
        case INTUOSPL:
                wacom->led.llv = 32;
                wacom->led.max_llv = 96;

                error = wacom_leds_alloc_and_register(wacom, 1, 4, false);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }

                error = wacom_devm_sysfs_create_group(wacom,
                                                      &intuos5_led_attr_group);
                break;

        case INTUOSP2_BT:
                wacom->led.llv = 50;
                wacom->led.max_llv = 100;
                error = wacom_leds_alloc_and_register(wacom, 1, 4, false);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }
                return 0;

        case REMOTE:
                wacom->led.llv = 255;
                wacom->led.max_llv = 255;
                error = wacom_led_groups_allocate(wacom, 5);
                if (error) {
                        hid_err(wacom->hdev,
                                "cannot create leds err: %d\n", error);
                        return error;
                }
                return 0;

        default:
                return 0;
        }

        if (error) {
                hid_err(wacom->hdev,
                        "cannot create sysfs group err: %d\n", error);
                return error;
        }

        return 0;
}

static void wacom_init_work(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, init_work.work);

        _wacom_query_tablet_data(wacom);
        wacom_led_control(wacom);
}

static void wacom_query_tablet_data(struct wacom *wacom)
{
        schedule_delayed_work(&wacom->init_work, msecs_to_jiffies(1000));
}

static enum power_supply_property wacom_battery_props[] = {
        POWER_SUPPLY_PROP_MODEL_NAME,
        POWER_SUPPLY_PROP_PRESENT,
        POWER_SUPPLY_PROP_STATUS,
        POWER_SUPPLY_PROP_SCOPE,
        POWER_SUPPLY_PROP_CAPACITY
};

static int wacom_battery_get_property(struct power_supply *psy,
                                      enum power_supply_property psp,
                                      union power_supply_propval *val)
{
        struct wacom_battery *battery = power_supply_get_drvdata(psy);
        int ret = 0;

        switch (psp) {
                case POWER_SUPPLY_PROP_MODEL_NAME:
                        val->strval = battery->wacom->wacom_wac.name;
                        break;
                case POWER_SUPPLY_PROP_PRESENT:
                        val->intval = battery->bat_connected;
                        break;
                case POWER_SUPPLY_PROP_SCOPE:
                        val->intval = POWER_SUPPLY_SCOPE_DEVICE;
                        break;
                case POWER_SUPPLY_PROP_CAPACITY:
                        val->intval = battery->battery_capacity;
                        break;
                case POWER_SUPPLY_PROP_STATUS:
                        if (battery->bat_status != WACOM_POWER_SUPPLY_STATUS_AUTO)
                                val->intval = battery->bat_status;
                        else if (battery->bat_charging)
                                val->intval = POWER_SUPPLY_STATUS_CHARGING;
                        else if (battery->battery_capacity == 100 &&
                                    battery->ps_connected)
                                val->intval = POWER_SUPPLY_STATUS_FULL;
                        else if (battery->ps_connected)
                                val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
                        else
                                val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
                        break;
                default:
                        ret = -EINVAL;
                        break;
        }

        return ret;
}

static int __wacom_initialize_battery(struct wacom *wacom,
                                      struct wacom_battery *battery)
{
        static atomic_t battery_no = ATOMIC_INIT(0);
        struct device *dev = &wacom->hdev->dev;
        struct power_supply_config psy_cfg = { .drv_data = battery, };
        struct power_supply *ps_bat;
        struct power_supply_desc *bat_desc = &battery->bat_desc;
        unsigned long n;
        int error;

        if (!devres_open_group(dev, bat_desc, GFP_KERNEL))
                return -ENOMEM;

        battery->wacom = wacom;

        n = atomic_inc_return(&battery_no) - 1;

        bat_desc->properties = wacom_battery_props;
        bat_desc->num_properties = ARRAY_SIZE(wacom_battery_props);
        bat_desc->get_property = wacom_battery_get_property;
        sprintf(battery->bat_name, "wacom_battery_%ld", n);
        bat_desc->name = battery->bat_name;
        bat_desc->type = POWER_SUPPLY_TYPE_BATTERY;
        bat_desc->use_for_apm = 0;

        ps_bat = devm_power_supply_register(dev, bat_desc, &psy_cfg);
        if (IS_ERR(ps_bat)) {
                error = PTR_ERR(ps_bat);
                goto err;
        }

        power_supply_powers(ps_bat, &wacom->hdev->dev);

        battery->battery = ps_bat;

        devres_close_group(dev, bat_desc);
        return 0;

err:
        devres_release_group(dev, bat_desc);
        return error;
}

static int wacom_initialize_battery(struct wacom *wacom)
{
        if (wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY)
                return __wacom_initialize_battery(wacom, &wacom->battery);

        return 0;
}

static void wacom_destroy_battery(struct wacom *wacom)
{
        if (wacom->battery.battery) {
                devres_release_group(&wacom->hdev->dev,
                                     &wacom->battery.bat_desc);
                wacom->battery.battery = NULL;
        }
}

static void wacom_aes_battery_handler(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, aes_battery_work.work);

        wacom_destroy_battery(wacom);
}

static ssize_t wacom_show_speed(struct device *dev,
                                struct device_attribute
                                *attr, char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);

        return sysfs_emit(buf, "%i\n", wacom->wacom_wac.bt_high_speed);
}

static ssize_t wacom_store_speed(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
        u8 new_speed;

        if (kstrtou8(buf, 0, &new_speed))
                return -EINVAL;

        if (new_speed != 0 && new_speed != 1)
                return -EINVAL;

        wacom_bt_query_tablet_data(hdev, new_speed, &wacom->wacom_wac.features);

        return count;
}

static DEVICE_ATTR(speed, DEV_ATTR_RW_PERM,
                wacom_show_speed, wacom_store_speed);


static ssize_t wacom_show_remote_mode(struct kobject *kobj,
                                      struct kobj_attribute *kattr,
                                      char *buf, int index)
{
        struct device *dev = kobj_to_dev(kobj->parent);
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
        u8 mode;

        mode = wacom->led.groups[index].select;
        return sprintf(buf, "%d\n", mode < 3 ? mode : -1);
}

#define DEVICE_EKR_ATTR_GROUP(SET_ID)                                        \
static ssize_t wacom_show_remote##SET_ID##_mode(struct kobject *kobj,        \
                               struct kobj_attribute *kattr, char *buf)        \
{                                                                        \
        return wacom_show_remote_mode(kobj, kattr, buf, SET_ID);        \
}                                                                        \
static struct kobj_attribute remote##SET_ID##_mode_attr = {                \
        .attr = {.name = "remote_mode",                                        \
                .mode = DEV_ATTR_RO_PERM},                                \
        .show = wacom_show_remote##SET_ID##_mode,                        \
};                                                                        \
static struct attribute *remote##SET_ID##_serial_attrs[] = {                \
        &remote##SET_ID##_mode_attr.attr,                                \
        NULL                                                                \
};                                                                        \
static const struct attribute_group remote##SET_ID##_serial_group = {        \
        .name = NULL,                                                        \
        .attrs = remote##SET_ID##_serial_attrs,                                \
}

DEVICE_EKR_ATTR_GROUP(0);
DEVICE_EKR_ATTR_GROUP(1);
DEVICE_EKR_ATTR_GROUP(2);
DEVICE_EKR_ATTR_GROUP(3);
DEVICE_EKR_ATTR_GROUP(4);

static int wacom_remote_create_attr_group(struct wacom *wacom, __u32 serial,
                                          int index)
{
        int error = 0;
        struct wacom_remote *remote = wacom->remote;

        remote->remotes[index].group.name = devm_kasprintf(&wacom->hdev->dev,
                                                          GFP_KERNEL,
                                                          "%d", serial);
        if (!remote->remotes[index].group.name)
                return -ENOMEM;

        error = __wacom_devm_sysfs_create_group(wacom, remote->remote_dir,
                                                &remote->remotes[index].group);
        if (error) {
                remote->remotes[index].group.name = NULL;
                hid_err(wacom->hdev,
                        "cannot create sysfs group err: %d\n", error);
                return error;
        }

        return 0;
}

static int wacom_cmd_unpair_remote(struct wacom *wacom, unsigned char selector)
{
        const size_t buf_size = 2;
        unsigned char *buf;
        int retval;

        buf = kzalloc(buf_size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        buf[0] = WAC_CMD_DELETE_PAIRING;
        buf[1] = selector;

        retval = wacom_set_report(wacom->hdev, HID_OUTPUT_REPORT, buf,
                                  buf_size, WAC_CMD_RETRIES);
        kfree(buf);

        return retval;
}

static ssize_t wacom_store_unpair_remote(struct kobject *kobj,
                                         struct kobj_attribute *attr,
                                         const char *buf, size_t count)
{
        unsigned char selector = 0;
        struct device *dev = kobj_to_dev(kobj->parent);
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
        int err;

        if (!strncmp(buf, "*\n", 2)) {
                selector = WAC_CMD_UNPAIR_ALL;
        } else {
                hid_info(wacom->hdev, "remote: unrecognized unpair code: %s\n",
                         buf);
                return -1;
        }

        mutex_lock(&wacom->lock);

        err = wacom_cmd_unpair_remote(wacom, selector);
        mutex_unlock(&wacom->lock);

        return err < 0 ? err : count;
}

static struct kobj_attribute unpair_remote_attr = {
        .attr = {.name = "unpair_remote", .mode = 0200},
        .store = wacom_store_unpair_remote,
};

static const struct attribute *remote_unpair_attrs[] = {
        &unpair_remote_attr.attr,
        NULL
};

static void wacom_remotes_destroy(void *data)
{
        struct wacom *wacom = data;
        struct wacom_remote *remote = wacom->remote;

        if (!remote)
                return;

        kobject_put(remote->remote_dir);
        kfifo_free(&remote->remote_fifo);
        wacom->remote = NULL;
}

static int wacom_initialize_remotes(struct wacom *wacom)
{
        int error = 0;
        struct wacom_remote *remote;
        int i;

        if (wacom->wacom_wac.features.type != REMOTE)
                return 0;

        remote = devm_kzalloc(&wacom->hdev->dev, sizeof(*wacom->remote),
                              GFP_KERNEL);
        if (!remote)
                return -ENOMEM;

        wacom->remote = remote;

        spin_lock_init(&remote->remote_lock);

        error = kfifo_alloc(&remote->remote_fifo,
                        5 * sizeof(struct wacom_remote_work_data),
                        GFP_KERNEL);
        if (error) {
                hid_err(wacom->hdev, "failed allocating remote_fifo\n");
                return -ENOMEM;
        }

        remote->remotes[0].group = remote0_serial_group;
        remote->remotes[1].group = remote1_serial_group;
        remote->remotes[2].group = remote2_serial_group;
        remote->remotes[3].group = remote3_serial_group;
        remote->remotes[4].group = remote4_serial_group;

        remote->remote_dir = kobject_create_and_add("wacom_remote",
                                                    &wacom->hdev->dev.kobj);
        if (!remote->remote_dir)
                return -ENOMEM;

        error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs);

        if (error) {
                hid_err(wacom->hdev,
                        "cannot create sysfs group err: %d\n", error);
                return error;
        }

        for (i = 0; i < WACOM_MAX_REMOTES; i++) {
                wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN;
                remote->remotes[i].serial = 0;
        }

        error = devm_add_action_or_reset(&wacom->hdev->dev,
                                         wacom_remotes_destroy, wacom);
        if (error)
                return error;

        return 0;
}

static struct input_dev *wacom_allocate_input(struct wacom *wacom)
{
        struct input_dev *input_dev;
        struct hid_device *hdev = wacom->hdev;
        struct wacom_wac *wacom_wac = &(wacom->wacom_wac);

        input_dev = devm_input_allocate_device(&hdev->dev);
        if (!input_dev)
                return NULL;

        input_dev->name = wacom_wac->features.name;
        input_dev->phys = hdev->phys;
        input_dev->dev.parent = &hdev->dev;
        input_dev->open = wacom_open;
        input_dev->close = wacom_close;
        input_dev->uniq = hdev->uniq;
        input_dev->id.bustype = hdev->bus;
        input_dev->id.vendor  = hdev->vendor;
        input_dev->id.product = wacom_wac->pid ? wacom_wac->pid : hdev->product;
        input_dev->id.version = hdev->version;
        input_set_drvdata(input_dev, wacom);

        return input_dev;
}

static int wacom_allocate_inputs(struct wacom *wacom)
{
        struct wacom_wac *wacom_wac = &(wacom->wacom_wac);

        wacom_wac->pen_input = wacom_allocate_input(wacom);
        wacom_wac->touch_input = wacom_allocate_input(wacom);
        wacom_wac->pad_input = wacom_allocate_input(wacom);
        if (!wacom_wac->pen_input ||
            !wacom_wac->touch_input ||
            !wacom_wac->pad_input)
                return -ENOMEM;

        wacom_wac->pen_input->name = wacom_wac->pen_name;
        wacom_wac->touch_input->name = wacom_wac->touch_name;
        wacom_wac->pad_input->name = wacom_wac->pad_name;

        return 0;
}

static int wacom_setup_inputs(struct wacom *wacom)
{
        struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
        struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
        int error = 0;

        pen_input_dev = wacom_wac->pen_input;
        touch_input_dev = wacom_wac->touch_input;
        pad_input_dev = wacom_wac->pad_input;

        if (!pen_input_dev || !touch_input_dev || !pad_input_dev)
                return -EINVAL;

        error = wacom_setup_pen_input_capabilities(pen_input_dev, wacom_wac);
        if (error) {
                /* no pen in use on this interface */
                input_free_device(pen_input_dev);
                wacom_wac->pen_input = NULL;
                pen_input_dev = NULL;
        }

        error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac);
        if (error) {
                /* no touch in use on this interface */
                input_free_device(touch_input_dev);
                wacom_wac->touch_input = NULL;
                touch_input_dev = NULL;
        }

        error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
        if (error) {
                /* no pad events using this interface */
                input_free_device(pad_input_dev);
                wacom_wac->pad_input = NULL;
                pad_input_dev = NULL;
        }

        return 0;
}

static int wacom_register_inputs(struct wacom *wacom)
{
        struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
        struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
        int error = 0;

        pen_input_dev = wacom_wac->pen_input;
        touch_input_dev = wacom_wac->touch_input;
        pad_input_dev = wacom_wac->pad_input;

        if (pen_input_dev) {
                error = input_register_device(pen_input_dev);
                if (error)
                        goto fail;
        }

        if (touch_input_dev) {
                error = input_register_device(touch_input_dev);
                if (error)
                        goto fail;
        }

        if (pad_input_dev) {
                error = input_register_device(pad_input_dev);
                if (error)
                        goto fail;
        }

        return 0;

fail:
        wacom_wac->pad_input = NULL;
        wacom_wac->touch_input = NULL;
        wacom_wac->pen_input = NULL;
        return error;
}

/*
 * Not all devices report physical dimensions from HID.
 * Compute the default from hardcoded logical dimension
 * and resolution before driver overwrites them.
 */
static void wacom_set_default_phy(struct wacom_features *features)
{
        if (features->x_resolution) {
                features->x_phy = (features->x_max * 100) /
                                        features->x_resolution;
                features->y_phy = (features->y_max * 100) /
                                        features->y_resolution;
        }
}

static void wacom_calculate_res(struct wacom_features *features)
{
        /* set unit to "100th of a mm" for devices not reported by HID */
        if (!features->unit) {
                features->unit = 0x11;
                features->unitExpo = -3;
        }

        features->x_resolution = wacom_calc_hid_res(features->x_max,
                                                    features->x_phy,
                                                    features->unit,
                                                    features->unitExpo);
        features->y_resolution = wacom_calc_hid_res(features->y_max,
                                                    features->y_phy,
                                                    features->unit,
                                                    features->unitExpo);
}

void wacom_battery_work(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, battery_work);

        if ((wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
             !wacom->battery.battery) {
                wacom_initialize_battery(wacom);
        }
        else if (!(wacom->wacom_wac.features.quirks & WACOM_QUIRK_BATTERY) &&
                 wacom->battery.battery) {
                wacom_destroy_battery(wacom);
        }
}

static size_t wacom_compute_pktlen(struct hid_device *hdev)
{
        struct hid_report_enum *report_enum;
        struct hid_report *report;
        size_t size = 0;

        report_enum = hdev->report_enum + HID_INPUT_REPORT;

        list_for_each_entry(report, &report_enum->report_list, list) {
                size_t report_size = hid_report_len(report);
                if (report_size > size)
                        size = report_size;
        }

        return size;
}

static void wacom_update_name(struct wacom *wacom, const char *suffix)
{
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;
        char name[WACOM_NAME_MAX - 20]; /* Leave some room for suffixes */

        /* Generic devices name unspecified */
        if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) {
                char *product_name = wacom->hdev->name;

                if (hid_is_usb(wacom->hdev)) {
                        struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent);
                        struct usb_device *dev = interface_to_usbdev(intf);
                        product_name = dev->product;
                }

                if (wacom->hdev->bus == BUS_I2C) {
                        snprintf(name, sizeof(name), "%s %X",
                                 features->name, wacom->hdev->product);
                } else if (strstr(product_name, "Wacom") ||
                           strstr(product_name, "wacom") ||
                           strstr(product_name, "WACOM")) {
                        if (strscpy(name, product_name, sizeof(name)) < 0) {
                                hid_warn(wacom->hdev, "String overflow while assembling device name");
                        }
                } else {
                        snprintf(name, sizeof(name), "Wacom %s", product_name);
                }

                /* strip out excess whitespaces */
                while (1) {
                        char *gap = strstr(name, "  ");
                        if (gap == NULL)
                                break;
                        /* shift everything including the terminator */
                        memmove(gap, gap+1, strlen(gap));
                }

                /* get rid of trailing whitespace */
                if (name[strlen(name)-1] == ' ')
                        name[strlen(name)-1] = '\0';
        } else {
                if (strscpy(name, features->name, sizeof(name)) < 0) {
                        hid_warn(wacom->hdev, "String overflow while assembling device name");
                }
        }

        snprintf(wacom_wac->name, sizeof(wacom_wac->name), "%s%s",
                 name, suffix);

        /* Append the device type to the name */
        snprintf(wacom_wac->pen_name, sizeof(wacom_wac->pen_name),
                "%s%s Pen", name, suffix);
        snprintf(wacom_wac->touch_name, sizeof(wacom_wac->touch_name),
                "%s%s Finger", name, suffix);
        snprintf(wacom_wac->pad_name, sizeof(wacom_wac->pad_name),
                "%s%s Pad", name, suffix);
}

static void wacom_release_resources(struct wacom *wacom)
{
        struct hid_device *hdev = wacom->hdev;

        if (!wacom->resources)
                return;

        devres_release_group(&hdev->dev, wacom);

        wacom->resources = false;

        wacom->wacom_wac.pen_input = NULL;
        wacom->wacom_wac.touch_input = NULL;
        wacom->wacom_wac.pad_input = NULL;
}

static void wacom_set_shared_values(struct wacom_wac *wacom_wac)
{
        if (wacom_wac->features.device_type & WACOM_DEVICETYPE_TOUCH) {
                wacom_wac->shared->type = wacom_wac->features.type;
                wacom_wac->shared->touch_input = wacom_wac->touch_input;
        }

        if (wacom_wac->has_mute_touch_switch) {
                wacom_wac->shared->has_mute_touch_switch = true;
                /* Hardware touch switch may be off. Wait until
                 * we know the switch state to decide is_touch_on.
                 * Softkey state should be initialized to "on" to
                 * match historic default.
                 */
                if (wacom_wac->is_soft_touch_switch)
                        wacom_wac->shared->is_touch_on = true;
        }

        if (wacom_wac->shared->has_mute_touch_switch &&
            wacom_wac->shared->touch_input) {
                set_bit(EV_SW, wacom_wac->shared->touch_input->evbit);
                input_set_capability(wacom_wac->shared->touch_input, EV_SW,
                                     SW_MUTE_DEVICE);
        }
}

static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
{
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;
        struct hid_device *hdev = wacom->hdev;
        int error;
        unsigned int connect_mask = HID_CONNECT_HIDRAW;

        features->pktlen = wacom_compute_pktlen(hdev);
        if (features->pktlen > WACOM_PKGLEN_MAX)
                return -EINVAL;

        if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL))
                return -ENOMEM;

        wacom->resources = true;

        error = wacom_allocate_inputs(wacom);
        if (error)
                goto fail;

        /*
         * Bamboo Pad has a generic hid handling for the Pen, and we switch it
         * into debug mode for the touch part.
         * We ignore the other interfaces.
         */
        if (features->type == BAMBOO_PAD) {
                if (features->pktlen == WACOM_PKGLEN_PENABLED) {
                        features->type = HID_GENERIC;
                } else if ((features->pktlen != WACOM_PKGLEN_BPAD_TOUCH) &&
                           (features->pktlen != WACOM_PKGLEN_BPAD_TOUCH_USB)) {
                        error = -ENODEV;
                        goto fail;
                }
        }

        /* set the default size in case we do not get them from hid */
        wacom_set_default_phy(features);

        /* Retrieve the physical and logical size for touch devices */
        wacom_retrieve_hid_descriptor(hdev, features);
        wacom_setup_device_quirks(wacom);

        if (features->device_type == WACOM_DEVICETYPE_NONE &&
            features->type != WIRELESS) {
                error = features->type == HID_GENERIC ? -ENODEV : 0;

                dev_warn(&hdev->dev, "Unknown device_type for '%s'. %s.",
                         hdev->name,
                         error ? "Ignoring" : "Assuming pen");

                if (error)
                        goto fail;

                features->device_type |= WACOM_DEVICETYPE_PEN;
        }

        wacom_calculate_res(features);

        wacom_update_name(wacom, wireless ? " (WL)" : "");

        /* pen only Bamboo neither support touch nor pad */
        if ((features->type == BAMBOO_PEN) &&
            ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
            (features->device_type & WACOM_DEVICETYPE_PAD))) {
                error = -ENODEV;
                goto fail;
        }

        error = wacom_add_shared_data(hdev);
        if (error)
                goto fail;

        error = wacom_setup_inputs(wacom);
        if (error)
                goto fail;

        if (features->type == HID_GENERIC)
                connect_mask |= HID_CONNECT_DRIVER;

        /* Regular HID work starts now */
        error = hid_hw_start(hdev, connect_mask);
        if (error) {
                hid_err(hdev, "hw start failed\n");
                goto fail;
        }

        error = wacom_register_inputs(wacom);
        if (error)
                goto fail;

        if (wacom->wacom_wac.features.device_type & WACOM_DEVICETYPE_PAD) {
                error = wacom_initialize_leds(wacom);
                if (error)
                        goto fail;

                error = wacom_initialize_remotes(wacom);
                if (error)
                        goto fail;
        }

        if (!wireless) {
                /* Note that if query fails it is not a hard failure */
                wacom_query_tablet_data(wacom);
        }

        /* touch only Bamboo doesn't support pen */
        if ((features->type == BAMBOO_TOUCH) &&
            (features->device_type & WACOM_DEVICETYPE_PEN)) {
                cancel_delayed_work_sync(&wacom->init_work);
                _wacom_query_tablet_data(wacom);
                error = -ENODEV;
                goto fail_quirks;
        }

        if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) {
                error = hid_hw_open(hdev);
                if (error) {
                        hid_err(hdev, "hw open failed\n");
                        goto fail_quirks;
                }
        }

        wacom_set_shared_values(wacom_wac);
        devres_close_group(&hdev->dev, wacom);

        return 0;

fail_quirks:
        hid_hw_stop(hdev);
fail:
        wacom_release_resources(wacom);
        return error;
}

static void wacom_wireless_work(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, wireless_work);
        struct usb_device *usbdev = wacom->usbdev;
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct hid_device *hdev1, *hdev2;
        struct wacom *wacom1, *wacom2;
        struct wacom_wac *wacom_wac1, *wacom_wac2;
        int error;

        /*
         * Regardless if this is a disconnect or a new tablet,
         * remove any existing input and battery devices.
         */

        wacom_destroy_battery(wacom);

        if (!usbdev)
                return;

        /* Stylus interface */
        hdev1 = usb_get_intfdata(usbdev->config->interface[1]);
        wacom1 = hid_get_drvdata(hdev1);
        wacom_wac1 = &(wacom1->wacom_wac);
        wacom_release_resources(wacom1);

        /* Touch interface */
        hdev2 = usb_get_intfdata(usbdev->config->interface[2]);
        wacom2 = hid_get_drvdata(hdev2);
        wacom_wac2 = &(wacom2->wacom_wac);
        wacom_release_resources(wacom2);

        if (wacom_wac->pid == 0) {
                hid_info(wacom->hdev, "wireless tablet disconnected\n");
        } else {
                const struct hid_device_id *id = wacom_ids;

                hid_info(wacom->hdev, "wireless tablet connected with PID %x\n",
                         wacom_wac->pid);

                while (id->bus) {
                        if (id->vendor == USB_VENDOR_ID_WACOM &&
                            id->product == wacom_wac->pid)
                                break;
                        id++;
                }

                if (!id->bus) {
                        hid_info(wacom->hdev, "ignoring unknown PID.\n");
                        return;
                }

                /* Stylus interface */
                wacom_wac1->features =
                        *((struct wacom_features *)id->driver_data);

                wacom_wac1->pid = wacom_wac->pid;
                hid_hw_stop(hdev1);
                error = wacom_parse_and_register(wacom1, true);
                if (error)
                        goto fail;

                /* Touch interface */
                if (wacom_wac1->features.touch_max ||
                    (wacom_wac1->features.type >= INTUOSHT &&
                    wacom_wac1->features.type <= BAMBOO_PT)) {
                        wacom_wac2->features =
                                *((struct wacom_features *)id->driver_data);
                        wacom_wac2->pid = wacom_wac->pid;
                        hid_hw_stop(hdev2);
                        error = wacom_parse_and_register(wacom2, true);
                        if (error)
                                goto fail;
                }

                if (strscpy(wacom_wac->name, wacom_wac1->name,
                        sizeof(wacom_wac->name)) < 0) {
                        hid_warn(wacom->hdev, "String overflow while assembling device name");
                }
        }

        return;

fail:
        wacom_release_resources(wacom1);
        wacom_release_resources(wacom2);
        return;
}

static void wacom_remote_destroy_battery(struct wacom *wacom, int index)
{
        struct wacom_remote *remote = wacom->remote;

        if (remote->remotes[index].battery.battery) {
                devres_release_group(&wacom->hdev->dev,
                                     &remote->remotes[index].battery.bat_desc);
                remote->remotes[index].battery.battery = NULL;
                remote->remotes[index].active_time = 0;
        }
}

static void wacom_remote_destroy_one(struct wacom *wacom, unsigned int index)
{
        struct wacom_remote *remote = wacom->remote;
        u32 serial = remote->remotes[index].serial;
        int i;
        unsigned long flags;

        for (i = 0; i < WACOM_MAX_REMOTES; i++) {
                if (remote->remotes[i].serial == serial) {

                        spin_lock_irqsave(&remote->remote_lock, flags);
                        remote->remotes[i].registered = false;
                        spin_unlock_irqrestore(&remote->remote_lock, flags);

                        wacom_remote_destroy_battery(wacom, i);

                        if (remote->remotes[i].group.name)
                                devres_release_group(&wacom->hdev->dev,
                                                     &remote->remotes[i]);

                        remote->remotes[i].serial = 0;
                        remote->remotes[i].group.name = NULL;
                        wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN;
                }
        }
}

static int wacom_remote_create_one(struct wacom *wacom, u32 serial,
                                   unsigned int index)
{
        struct wacom_remote *remote = wacom->remote;
        struct device *dev = &wacom->hdev->dev;
        int error, k;

        /* A remote can pair more than once with an EKR,
         * check to make sure this serial isn't already paired.
         */
        for (k = 0; k < WACOM_MAX_REMOTES; k++) {
                if (remote->remotes[k].serial == serial)
                        break;
        }

        if (k < WACOM_MAX_REMOTES) {
                remote->remotes[index].serial = serial;
                return 0;
        }

        if (!devres_open_group(dev, &remote->remotes[index], GFP_KERNEL))
                return -ENOMEM;

        error = wacom_remote_create_attr_group(wacom, serial, index);
        if (error)
                goto fail;

        remote->remotes[index].input = wacom_allocate_input(wacom);
        if (!remote->remotes[index].input) {
                error = -ENOMEM;
                goto fail;
        }
        remote->remotes[index].input->uniq = remote->remotes[index].group.name;
        remote->remotes[index].input->name = wacom->wacom_wac.pad_name;

        if (!remote->remotes[index].input->name) {
                error = -EINVAL;
                goto fail;
        }

        error = wacom_setup_pad_input_capabilities(remote->remotes[index].input,
                                                   &wacom->wacom_wac);
        if (error)
                goto fail;

        remote->remotes[index].serial = serial;

        error = input_register_device(remote->remotes[index].input);
        if (error)
                goto fail;

        error = wacom_led_groups_alloc_and_register_one(
                                        &remote->remotes[index].input->dev,
                                        wacom, index, 3, true);
        if (error)
                goto fail;

        remote->remotes[index].registered = true;

        devres_close_group(dev, &remote->remotes[index]);
        return 0;

fail:
        devres_release_group(dev, &remote->remotes[index]);
        remote->remotes[index].serial = 0;
        return error;
}

static int wacom_remote_attach_battery(struct wacom *wacom, int index)
{
        struct wacom_remote *remote = wacom->remote;
        int error;

        if (!remote->remotes[index].registered)
                return 0;

        if (remote->remotes[index].battery.battery)
                return 0;

        if (!remote->remotes[index].active_time)
                return 0;

        if (wacom->led.groups[index].select == WACOM_STATUS_UNKNOWN)
                return 0;

        error = __wacom_initialize_battery(wacom,
                                        &wacom->remote->remotes[index].battery);
        if (error)
                return error;

        return 0;
}

static void wacom_remote_work(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, remote_work);
        struct wacom_remote *remote = wacom->remote;
        ktime_t kt = ktime_get();
        struct wacom_remote_work_data remote_work_data;
        unsigned long flags;
        unsigned int count;
        u32 work_serial;
        int i;

        spin_lock_irqsave(&remote->remote_lock, flags);

        count = kfifo_out(&remote->remote_fifo, &remote_work_data,
                          sizeof(remote_work_data));

        if (count != sizeof(remote_work_data)) {
                hid_err(wacom->hdev,
                        "workitem triggered without status available\n");
                spin_unlock_irqrestore(&remote->remote_lock, flags);
                return;
        }

        if (!kfifo_is_empty(&remote->remote_fifo))
                wacom_schedule_work(&wacom->wacom_wac, WACOM_WORKER_REMOTE);

        spin_unlock_irqrestore(&remote->remote_lock, flags);

        for (i = 0; i < WACOM_MAX_REMOTES; i++) {
                work_serial = remote_work_data.remote[i].serial;
                if (work_serial) {

                        if (kt - remote->remotes[i].active_time > WACOM_REMOTE_BATTERY_TIMEOUT
                            && remote->remotes[i].active_time != 0)
                                wacom_remote_destroy_battery(wacom, i);

                        if (remote->remotes[i].serial == work_serial) {
                                wacom_remote_attach_battery(wacom, i);
                                continue;
                        }

                        if (remote->remotes[i].serial)
                                wacom_remote_destroy_one(wacom, i);

                        wacom_remote_create_one(wacom, work_serial, i);

                } else if (remote->remotes[i].serial) {
                        wacom_remote_destroy_one(wacom, i);
                }
        }
}

static void wacom_mode_change_work(struct work_struct *work)
{
        struct wacom *wacom = container_of(work, struct wacom, mode_change_work);
        struct wacom_shared *shared = wacom->wacom_wac.shared;
        struct wacom *wacom1 = NULL;
        struct wacom *wacom2 = NULL;
        bool is_direct = wacom->wacom_wac.is_direct_mode;
        int error = 0;

        if (shared->pen) {
                wacom1 = hid_get_drvdata(shared->pen);
                wacom_release_resources(wacom1);
                hid_hw_stop(wacom1->hdev);
                wacom1->wacom_wac.has_mode_change = true;
                wacom1->wacom_wac.is_direct_mode = is_direct;
        }

        if (shared->touch) {
                wacom2 = hid_get_drvdata(shared->touch);
                wacom_release_resources(wacom2);
                hid_hw_stop(wacom2->hdev);
                wacom2->wacom_wac.has_mode_change = true;
                wacom2->wacom_wac.is_direct_mode = is_direct;
        }

        if (wacom1) {
                error = wacom_parse_and_register(wacom1, false);
                if (error)
                        return;
        }

        if (wacom2) {
                error = wacom_parse_and_register(wacom2, false);
                if (error)
                        return;
        }

        return;
}

static int wacom_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        struct wacom *wacom;
        struct wacom_wac *wacom_wac;
        struct wacom_features *features;
        int error;

        if (!id->driver_data)
                return -EINVAL;

        hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS;

        /* hid-core sets this quirk for the boot interface */
        hdev->quirks &= ~HID_QUIRK_NOGET;

        wacom = devm_kzalloc(&hdev->dev, sizeof(struct wacom), GFP_KERNEL);
        if (!wacom)
                return -ENOMEM;

        hid_set_drvdata(hdev, wacom);
        wacom->hdev = hdev;

        wacom_wac = &wacom->wacom_wac;
        wacom_wac->features = *((struct wacom_features *)id->driver_data);
        features = &wacom_wac->features;

        if (features->check_for_hid_type && features->hid_type != hdev->type)
                return -ENODEV;

        error = wacom_devm_kfifo_alloc(wacom);
        if (error)
                return error;

        wacom_wac->hid_data.inputmode = -1;
        wacom_wac->mode_report = -1;

        if (hid_is_usb(hdev)) {
                struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
                struct usb_device *dev = interface_to_usbdev(intf);

                wacom->usbdev = dev;
                wacom->intf = intf;
        }

        mutex_init(&wacom->lock);
        INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work);
        INIT_DELAYED_WORK(&wacom->aes_battery_work, wacom_aes_battery_handler);
        INIT_WORK(&wacom->wireless_work, wacom_wireless_work);
        INIT_WORK(&wacom->battery_work, wacom_battery_work);
        INIT_WORK(&wacom->remote_work, wacom_remote_work);
        INIT_WORK(&wacom->mode_change_work, wacom_mode_change_work);
        timer_setup(&wacom->idleprox_timer, &wacom_idleprox_timeout, TIMER_DEFERRABLE);

        /* ask for the report descriptor to be loaded by HID */
        error = hid_parse(hdev);
        if (error) {
                hid_err(hdev, "parse failed\n");
                return error;
        }

        if (features->type == BOOTLOADER) {
                hid_warn(hdev, "Using device in hidraw-only mode");
                return hid_hw_start(hdev, HID_CONNECT_HIDRAW);
        }

        error = wacom_parse_and_register(wacom, false);
        if (error)
                return error;

        if (hdev->bus == BUS_BLUETOOTH) {
                error = device_create_file(&hdev->dev, &dev_attr_speed);
                if (error)
                        hid_warn(hdev,
                                 "can't create sysfs speed attribute err: %d\n",
                                 error);
        }

        wacom_wac->probe_complete = true;
        return 0;
}

static void wacom_remove(struct hid_device *hdev)
{
        struct wacom *wacom = hid_get_drvdata(hdev);
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
        struct wacom_features *features = &wacom_wac->features;

        if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
                hid_hw_close(hdev);

        hid_hw_stop(hdev);

        cancel_delayed_work_sync(&wacom->init_work);
        cancel_work_sync(&wacom->wireless_work);
        cancel_work_sync(&wacom->battery_work);
        cancel_work_sync(&wacom->remote_work);
        cancel_work_sync(&wacom->mode_change_work);
        del_timer_sync(&wacom->idleprox_timer);
        if (hdev->bus == BUS_BLUETOOTH)
                device_remove_file(&hdev->dev, &dev_attr_speed);

        /* make sure we don't trigger the LEDs */
        wacom_led_groups_release(wacom);

        if (wacom->wacom_wac.features.type != REMOTE)
                wacom_release_resources(wacom);
}

#ifdef CONFIG_PM
static int wacom_resume(struct hid_device *hdev)
{
        struct wacom *wacom = hid_get_drvdata(hdev);

        mutex_lock(&wacom->lock);

        /* switch to wacom mode first */
        _wacom_query_tablet_data(wacom);
        wacom_led_control(wacom);

        mutex_unlock(&wacom->lock);

        return 0;
}

static int wacom_reset_resume(struct hid_device *hdev)
{
        return wacom_resume(hdev);
}
#endif /* CONFIG_PM */

static struct hid_driver wacom_driver = {
        .name =                "wacom",
        .id_table =        wacom_ids,
        .probe =        wacom_probe,
        .remove =        wacom_remove,
        .report =        wacom_wac_report,
#ifdef CONFIG_PM
        .resume =        wacom_resume,
        .reset_resume =        wacom_reset_resume,
#endif
        .raw_event =        wacom_raw_event,
};
module_hid_driver(wacom_driver);

MODULE_VERSION(DRIVER_VERSION);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_DELAY_H
#define _LINUX_DELAY_H

/*
 * Copyright (C) 1993 Linus Torvalds
 *
 * Delay routines, using a pre-computed "loops_per_jiffy" value.
 *
 * Please note that ndelay(), udelay() and mdelay() may return early for
 * several reasons:
 *  1. computed loops_per_jiffy too low (due to the time taken to
 *     execute the timer interrupt.)
 *  2. cache behaviour affecting the time it takes to execute the
 *     loop function.
 *  3. CPU clock rate changes.
 *
 * Please see this thread:
 *   https://lists.openwall.net/linux-kernel/2011/01/09/56
 */

#include <linux/math.h>
#include <linux/sched.h>

extern unsigned long loops_per_jiffy;

#include <asm/delay.h>

/*
 * Using udelay() for intervals greater than a few milliseconds can
 * risk overflow for high loops_per_jiffy (high bogomips) machines. The
 * mdelay() provides a wrapper to prevent this.  For delays greater
 * than MAX_UDELAY_MS milliseconds, the wrapper is used.  Architecture
 * specific values can be defined in asm-???/delay.h as an override.
 * The 2nd mdelay() definition ensures GCC will optimize away the 
 * while loop for the common cases where n <= MAX_UDELAY_MS  --  Paul G.
 */

#ifndef MAX_UDELAY_MS
#define MAX_UDELAY_MS        5
#endif

#ifndef mdelay
#define mdelay(n) (\
        (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \
        ({unsigned long __ms=(n); while (__ms--) udelay(1000);}))
#endif

#ifndef ndelay
static inline void ndelay(unsigned long x)
{
        udelay(DIV_ROUND_UP(x, 1000));
}
#define ndelay(x) ndelay(x)
#endif

extern unsigned long lpj_fine;
void calibrate_delay(void);
unsigned long calibrate_delay_is_known(void);
void __attribute__((weak)) calibration_delay_done(void);
void msleep(unsigned int msecs);
unsigned long msleep_interruptible(unsigned int msecs);
void usleep_range_state(unsigned long min, unsigned long max,
                        unsigned int state);

static inline void usleep_range(unsigned long min, unsigned long max)
{
        usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
}

static inline void usleep_idle_range(unsigned long min, unsigned long max)
{
        usleep_range_state(min, max, TASK_IDLE);
}

static inline void ssleep(unsigned int seconds)
{
        msleep(seconds * 1000);
}

/* see Documentation/timers/timers-howto.rst for the thresholds */
static inline void fsleep(unsigned long usecs)
{
        if (usecs <= 10)
                udelay(usecs);
        else if (usecs <= 20000)
                usleep_range(usecs, 2 * usecs);
        else
                msleep(DIV_ROUND_UP(usecs, 1000));
}

#endif /* defined(_LINUX_DELAY_H) */




































































































































































































































    1 


    1 
    1 


    1 


    1 




















































































































































































































































































































































































































































































































































































































































































    1 


    1 
    1 

    1 





































    1 

    1 


    1 













































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
/*
 * Copyright (c) 2006-2008 Intel Corporation
 * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
 *
 * DRM core CRTC related functions
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 *
 * Authors:
 *      Keith Packard
 *        Eric Anholt <eric@anholt.net>
 *      Dave Airlie <airlied@linux.ie>
 *      Jesse Barnes <jesse.barnes@intel.com>
 */

#include <linux/export.h>
#include <linux/moduleparam.h>

#include <drm/drm_bridge.h>
#include <drm/drm_client.h>
#include <drm/drm_crtc.h>
#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_modeset_helper_vtables.h>
#include <drm/drm_print.h>
#include <drm/drm_probe_helper.h>
#include <drm/drm_sysfs.h>

#include "drm_crtc_helper_internal.h"

/**
 * DOC: output probing helper overview
 *
 * This library provides some helper code for output probing. It provides an
 * implementation of the core &drm_connector_funcs.fill_modes interface with
 * drm_helper_probe_single_connector_modes().
 *
 * It also provides support for polling connectors with a work item and for
 * generic hotplug interrupt handling where the driver doesn't or cannot keep
 * track of a per-connector hpd interrupt.
 *
 * This helper library can be used independently of the modeset helper library.
 * Drivers can also overwrite different parts e.g. use their own hotplug
 * handling code to avoid probing unrelated outputs.
 *
 * The probe helpers share the function table structures with other display
 * helper libraries. See &struct drm_connector_helper_funcs for the details.
 */

static bool drm_kms_helper_poll = true;
module_param_named(poll, drm_kms_helper_poll, bool, 0600);

static enum drm_mode_status
drm_mode_validate_flag(const struct drm_display_mode *mode,
                       int flags)
{
        if ((mode->flags & DRM_MODE_FLAG_INTERLACE) &&
            !(flags & DRM_MODE_FLAG_INTERLACE))
                return MODE_NO_INTERLACE;

        if ((mode->flags & DRM_MODE_FLAG_DBLSCAN) &&
            !(flags & DRM_MODE_FLAG_DBLSCAN))
                return MODE_NO_DBLESCAN;

        if ((mode->flags & DRM_MODE_FLAG_3D_MASK) &&
            !(flags & DRM_MODE_FLAG_3D_MASK))
                return MODE_NO_STEREO;

        return MODE_OK;
}

static int
drm_mode_validate_pipeline(struct drm_display_mode *mode,
                           struct drm_connector *connector,
                           struct drm_modeset_acquire_ctx *ctx,
                           enum drm_mode_status *status)
{
        struct drm_device *dev = connector->dev;
        struct drm_encoder *encoder;
        int ret;

        /* Step 1: Validate against connector */
        ret = drm_connector_mode_valid(connector, mode, ctx, status);
        if (ret || *status != MODE_OK)
                return ret;

        /* Step 2: Validate against encoders and crtcs */
        drm_connector_for_each_possible_encoder(connector, encoder) {
                struct drm_bridge *bridge;
                struct drm_crtc *crtc;

                *status = drm_encoder_mode_valid(encoder, mode);
                if (*status != MODE_OK) {
                        /* No point in continuing for crtc check as this encoder
                         * will not accept the mode anyway. If all encoders
                         * reject the mode then, at exit, ret will not be
                         * MODE_OK. */
                        continue;
                }

                bridge = drm_bridge_chain_get_first_bridge(encoder);
                *status = drm_bridge_chain_mode_valid(bridge,
                                                      &connector->display_info,
                                                      mode);
                if (*status != MODE_OK) {
                        /* There is also no point in continuing for crtc check
                         * here. */
                        continue;
                }

                drm_for_each_crtc(crtc, dev) {
                        if (!drm_encoder_crtc_ok(encoder, crtc))
                                continue;

                        *status = drm_crtc_mode_valid(crtc, mode);
                        if (*status == MODE_OK) {
                                /* If we get to this point there is at least
                                 * one combination of encoder+crtc that works
                                 * for this mode. Lets return now. */
                                return 0;
                        }
                }
        }

        return 0;
}

static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector)
{
        struct drm_cmdline_mode *cmdline_mode;
        struct drm_display_mode *mode;

        cmdline_mode = &connector->cmdline_mode;
        if (!cmdline_mode->specified)
                return 0;

        /* Only add a GTF mode if we find no matching probed modes */
        list_for_each_entry(mode, &connector->probed_modes, head) {
                if (mode->hdisplay != cmdline_mode->xres ||
                    mode->vdisplay != cmdline_mode->yres)
                        continue;

                if (cmdline_mode->refresh_specified) {
                        /* The probed mode's vrefresh is set until later */
                        if (drm_mode_vrefresh(mode) != cmdline_mode->refresh)
                                continue;
                }

                /* Mark the matching mode as being preferred by the user */
                mode->type |= DRM_MODE_TYPE_USERDEF;
                return 0;
        }

        mode = drm_mode_create_from_cmdline_mode(connector->dev,
                                                 cmdline_mode);
        if (mode == NULL)
                return 0;

        drm_mode_probed_add(connector, mode);
        return 1;
}

enum drm_mode_status drm_crtc_mode_valid(struct drm_crtc *crtc,
                                         const struct drm_display_mode *mode)
{
        const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;

        if (!crtc_funcs || !crtc_funcs->mode_valid)
                return MODE_OK;

        return crtc_funcs->mode_valid(crtc, mode);
}

enum drm_mode_status drm_encoder_mode_valid(struct drm_encoder *encoder,
                                            const struct drm_display_mode *mode)
{
        const struct drm_encoder_helper_funcs *encoder_funcs =
                encoder->helper_private;

        if (!encoder_funcs || !encoder_funcs->mode_valid)
                return MODE_OK;

        return encoder_funcs->mode_valid(encoder, mode);
}

int
drm_connector_mode_valid(struct drm_connector *connector,
                         struct drm_display_mode *mode,
                         struct drm_modeset_acquire_ctx *ctx,
                         enum drm_mode_status *status)
{
        const struct drm_connector_helper_funcs *connector_funcs =
                connector->helper_private;
        int ret = 0;

        if (!connector_funcs)
                *status = MODE_OK;
        else if (connector_funcs->mode_valid_ctx)
                ret = connector_funcs->mode_valid_ctx(connector, mode, ctx,
                                                      status);
        else if (connector_funcs->mode_valid)
                *status = connector_funcs->mode_valid(connector, mode);
        else
                *status = MODE_OK;

        return ret;
}

static void drm_kms_helper_disable_hpd(struct drm_device *dev)
{
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                const struct drm_connector_helper_funcs *funcs =
                        connector->helper_private;

                if (funcs && funcs->disable_hpd)
                        funcs->disable_hpd(connector);
        }
        drm_connector_list_iter_end(&conn_iter);
}

static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
{
        bool poll = false;
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                const struct drm_connector_helper_funcs *funcs =
                        connector->helper_private;

                if (funcs && funcs->enable_hpd)
                        funcs->enable_hpd(connector);

                if (connector->polled & (DRM_CONNECTOR_POLL_CONNECT |
                                         DRM_CONNECTOR_POLL_DISCONNECT))
                        poll = true;
        }
        drm_connector_list_iter_end(&conn_iter);

        return poll;
}

#define DRM_OUTPUT_POLL_PERIOD (10*HZ)
static void reschedule_output_poll_work(struct drm_device *dev)
{
        unsigned long delay = DRM_OUTPUT_POLL_PERIOD;

        if (dev->mode_config.delayed_event)
                /*
                 * FIXME:
                 *
                 * Use short (1s) delay to handle the initial delayed event.
                 * This delay should not be needed, but Optimus/nouveau will
                 * fail in a mysterious way if the delayed event is handled as
                 * soon as possible like it is done in
                 * drm_helper_probe_single_connector_modes() in case the poll
                 * was enabled before.
                 */
                delay = HZ;

        schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
}

/**
 * drm_kms_helper_poll_enable - re-enable output polling.
 * @dev: drm_device
 *
 * This function re-enables the output polling work, after it has been
 * temporarily disabled using drm_kms_helper_poll_disable(), for example over
 * suspend/resume.
 *
 * Drivers can call this helper from their device resume implementation. It is
 * not an error to call this even when output polling isn't enabled.
 *
 * If device polling was never initialized before, this call will trigger a
 * warning and return.
 *
 * Note that calls to enable and disable polling must be strictly ordered, which
 * is automatically the case when they're only call from suspend/resume
 * callbacks.
 */
void drm_kms_helper_poll_enable(struct drm_device *dev)
{
        if (drm_WARN_ON_ONCE(dev, !dev->mode_config.poll_enabled) ||
            !drm_kms_helper_poll || dev->mode_config.poll_running)
                return;

        if (drm_kms_helper_enable_hpd(dev) ||
            dev->mode_config.delayed_event)
                reschedule_output_poll_work(dev);

        dev->mode_config.poll_running = true;
}
EXPORT_SYMBOL(drm_kms_helper_poll_enable);

/**
 * drm_kms_helper_poll_reschedule - reschedule the output polling work
 * @dev: drm_device
 *
 * This function reschedules the output polling work, after polling for a
 * connector has been enabled.
 *
 * Drivers must call this helper after enabling polling for a connector by
 * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
 * in drm_connector::polled. Note that after disabling polling by clearing these
 * flags for a connector will stop the output polling work automatically if
 * the polling is disabled for all other connectors as well.
 *
 * The function can be called only after polling has been enabled by calling
 * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
 */
void drm_kms_helper_poll_reschedule(struct drm_device *dev)
{
        if (dev->mode_config.poll_running)
                reschedule_output_poll_work(dev);
}
EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);

static enum drm_connector_status
drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
{
        const struct drm_connector_helper_funcs *funcs = connector->helper_private;
        struct drm_modeset_acquire_ctx ctx;
        int ret;

        drm_modeset_acquire_init(&ctx, 0);

retry:
        ret = drm_modeset_lock(&connector->dev->mode_config.connection_mutex, &ctx);
        if (!ret) {
                if (funcs->detect_ctx)
                        ret = funcs->detect_ctx(connector, &ctx, force);
                else if (connector->funcs->detect)
                        ret = connector->funcs->detect(connector, force);
                else
                        ret = connector_status_connected;
        }

        if (ret == -EDEADLK) {
                drm_modeset_backoff(&ctx);
                goto retry;
        }

        if (WARN_ON(ret < 0))
                ret = connector_status_unknown;

        if (ret != connector->status)
                connector->epoch_counter += 1;

        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);

        return ret;
}

/**
 * drm_helper_probe_detect - probe connector status
 * @connector: connector to probe
 * @ctx: acquire_ctx, or NULL to let this function handle locking.
 * @force: Whether destructive probe operations should be performed.
 *
 * This function calls the detect callbacks of the connector.
 * This function returns &drm_connector_status, or
 * if @ctx is set, it might also return -EDEADLK.
 */
int
drm_helper_probe_detect(struct drm_connector *connector,
                        struct drm_modeset_acquire_ctx *ctx,
                        bool force)
{
        const struct drm_connector_helper_funcs *funcs = connector->helper_private;
        struct drm_device *dev = connector->dev;
        int ret;

        if (!ctx)
                return drm_helper_probe_detect_ctx(connector, force);

        ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx);
        if (ret)
                return ret;

        if (funcs->detect_ctx)
                ret = funcs->detect_ctx(connector, ctx, force);
        else if (connector->funcs->detect)
                ret = connector->funcs->detect(connector, force);
        else
                ret = connector_status_connected;

        if (ret != connector->status)
                connector->epoch_counter += 1;

        return ret;
}
EXPORT_SYMBOL(drm_helper_probe_detect);

static int drm_helper_probe_get_modes(struct drm_connector *connector)
{
        const struct drm_connector_helper_funcs *connector_funcs =
                connector->helper_private;
        int count;

        count = connector_funcs->get_modes(connector);

        /* The .get_modes() callback should not return negative values. */
        if (count < 0) {
                drm_err(connector->dev, ".get_modes() returned %pe\n",
                        ERR_PTR(count));
                count = 0;
        }

        /*
         * Fallback for when DDC probe failed in drm_get_edid() and thus skipped
         * override/firmware EDID.
         */
        if (count == 0 && connector->status == connector_status_connected)
                count = drm_edid_override_connector_update(connector);

        return count;
}

static int __drm_helper_update_and_validate(struct drm_connector *connector,
                                            uint32_t maxX, uint32_t maxY,
                                            struct drm_modeset_acquire_ctx *ctx)
{
        struct drm_device *dev = connector->dev;
        struct drm_display_mode *mode;
        int mode_flags = 0;
        int ret;

        drm_connector_list_update(connector);

        if (connector->interlace_allowed)
                mode_flags |= DRM_MODE_FLAG_INTERLACE;
        if (connector->doublescan_allowed)
                mode_flags |= DRM_MODE_FLAG_DBLSCAN;
        if (connector->stereo_allowed)
                mode_flags |= DRM_MODE_FLAG_3D_MASK;

        list_for_each_entry(mode, &connector->modes, head) {
                if (mode->status != MODE_OK)
                        continue;

                mode->status = drm_mode_validate_driver(dev, mode);
                if (mode->status != MODE_OK)
                        continue;

                mode->status = drm_mode_validate_size(mode, maxX, maxY);
                if (mode->status != MODE_OK)
                        continue;

                mode->status = drm_mode_validate_flag(mode, mode_flags);
                if (mode->status != MODE_OK)
                        continue;

                ret = drm_mode_validate_pipeline(mode, connector, ctx,
                                                 &mode->status);
                if (ret) {
                        drm_dbg_kms(dev,
                                    "drm_mode_validate_pipeline failed: %d\n",
                                    ret);

                        if (drm_WARN_ON_ONCE(dev, ret != -EDEADLK))
                                mode->status = MODE_ERROR;
                        else
                                return -EDEADLK;
                }

                if (mode->status != MODE_OK)
                        continue;
                mode->status = drm_mode_validate_ycbcr420(mode, connector);
        }

        return 0;
}

/**
 * drm_helper_probe_single_connector_modes - get complete set of display modes
 * @connector: connector to probe
 * @maxX: max width for modes
 * @maxY: max height for modes
 *
 * Based on the helper callbacks implemented by @connector in struct
 * &drm_connector_helper_funcs try to detect all valid modes.  Modes will first
 * be added to the connector's probed_modes list, then culled (based on validity
 * and the @maxX, @maxY parameters) and put into the normal modes list.
 *
 * Intended to be used as a generic implementation of the
 * &drm_connector_funcs.fill_modes() vfunc for drivers that use the CRTC helpers
 * for output mode filtering and detection.
 *
 * The basic procedure is as follows
 *
 * 1. All modes currently on the connector's modes list are marked as stale
 *
 * 2. New modes are added to the connector's probed_modes list with
 *    drm_mode_probed_add(). New modes start their life with status as OK.
 *    Modes are added from a single source using the following priority order.
 *
 *    - &drm_connector_helper_funcs.get_modes vfunc
 *    - if the connector status is connector_status_connected, standard
 *      VESA DMT modes up to 1024x768 are automatically added
 *      (drm_add_modes_noedid())
 *
 *    Finally modes specified via the kernel command line (video=...) are
 *    added in addition to what the earlier probes produced
 *    (drm_helper_probe_add_cmdline_mode()). These modes are generated
 *    using the VESA GTF/CVT formulas.
 *
 * 3. Modes are moved from the probed_modes list to the modes list. Potential
 *    duplicates are merged together (see drm_connector_list_update()).
 *    After this step the probed_modes list will be empty again.
 *
 * 4. Any non-stale mode on the modes list then undergoes validation
 *
 *    - drm_mode_validate_basic() performs basic sanity checks
 *    - drm_mode_validate_size() filters out modes larger than @maxX and @maxY
 *      (if specified)
 *    - drm_mode_validate_flag() checks the modes against basic connector
 *      capabilities (interlace_allowed,doublescan_allowed,stereo_allowed)
 *    - the optional &drm_connector_helper_funcs.mode_valid or
 *      &drm_connector_helper_funcs.mode_valid_ctx helpers can perform driver
 *      and/or sink specific checks
 *    - the optional &drm_crtc_helper_funcs.mode_valid,
 *      &drm_bridge_funcs.mode_valid and &drm_encoder_helper_funcs.mode_valid
 *      helpers can perform driver and/or source specific checks which are also
 *      enforced by the modeset/atomic helpers
 *
 * 5. Any mode whose status is not OK is pruned from the connector's modes list,
 *    accompanied by a debug message indicating the reason for the mode's
 *    rejection (see drm_mode_prune_invalid()).
 *
 * Returns:
 * The number of modes found on @connector.
 */
int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
                                            uint32_t maxX, uint32_t maxY)
{
        struct drm_device *dev = connector->dev;
        struct drm_display_mode *mode;
        int count = 0, ret;
        enum drm_connector_status old_status;
        struct drm_modeset_acquire_ctx ctx;

        WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));

        drm_modeset_acquire_init(&ctx, 0);

        DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
                        connector->name);

retry:
        ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx);
        if (ret == -EDEADLK) {
                drm_modeset_backoff(&ctx);
                goto retry;
        } else
                WARN_ON(ret < 0);

        /* set all old modes to the stale state */
        list_for_each_entry(mode, &connector->modes, head)
                mode->status = MODE_STALE;

        old_status = connector->status;

        if (connector->force) {
                if (connector->force == DRM_FORCE_ON ||
                    connector->force == DRM_FORCE_ON_DIGITAL)
                        connector->status = connector_status_connected;
                else
                        connector->status = connector_status_disconnected;
                if (connector->funcs->force)
                        connector->funcs->force(connector);
        } else {
                ret = drm_helper_probe_detect(connector, &ctx, true);

                if (ret == -EDEADLK) {
                        drm_modeset_backoff(&ctx);
                        goto retry;
                } else if (WARN(ret < 0, "Invalid return value %i for connector detection\n", ret))
                        ret = connector_status_unknown;

                connector->status = ret;
        }

        /*
         * Normally either the driver's hpd code or the poll loop should
         * pick up any changes and fire the hotplug event. But if
         * userspace sneaks in a probe, we might miss a change. Hence
         * check here, and if anything changed start the hotplug code.
         */
        if (old_status != connector->status) {
                DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
                              connector->base.id,
                              connector->name,
                              drm_get_connector_status_name(old_status),
                              drm_get_connector_status_name(connector->status));

                /*
                 * The hotplug event code might call into the fb
                 * helpers, and so expects that we do not hold any
                 * locks. Fire up the poll struct instead, it will
                 * disable itself again.
                 */
                dev->mode_config.delayed_event = true;
                if (dev->mode_config.poll_enabled)
                        mod_delayed_work(system_wq,
                                         &dev->mode_config.output_poll_work,
                                         0);
        }

        /*
         * Re-enable polling in case the global poll config changed but polling
         * is still initialized.
         */
        if (dev->mode_config.poll_enabled)
                drm_kms_helper_poll_enable(dev);

        if (connector->status == connector_status_disconnected) {
                DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n",
                        connector->base.id, connector->name);
                drm_connector_update_edid_property(connector, NULL);
                drm_mode_prune_invalid(dev, &connector->modes, false);
                goto exit;
        }

        count = drm_helper_probe_get_modes(connector);

        if (count == 0 && (connector->status == connector_status_connected ||
                           connector->status == connector_status_unknown)) {
                count = drm_add_modes_noedid(connector, 1024, 768);

                /*
                 * Section 4.2.2.6 (EDID Corruption Detection) of the DP 1.4a
                 * Link CTS specifies that 640x480 (the official "failsafe"
                 * mode) needs to be the default if there's no EDID.
                 */
                if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
                        drm_set_preferred_mode(connector, 640, 480);
        }
        count += drm_helper_probe_add_cmdline_mode(connector);
        if (count != 0) {
                ret = __drm_helper_update_and_validate(connector, maxX, maxY, &ctx);
                if (ret == -EDEADLK) {
                        drm_modeset_backoff(&ctx);
                        goto retry;
                }
        }

        drm_mode_prune_invalid(dev, &connector->modes, true);

        /*
         * Displayport spec section 5.2.1.2 ("Video Timing Format") says that
         * all detachable sinks shall support 640x480 @60Hz as a fail safe
         * mode. If all modes were pruned, perhaps because they need more
         * lanes or a higher pixel clock than available, at least try to add
         * in 640x480.
         */
        if (list_empty(&connector->modes) &&
            connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
                count = drm_add_modes_noedid(connector, 640, 480);
                ret = __drm_helper_update_and_validate(connector, maxX, maxY, &ctx);
                if (ret == -EDEADLK) {
                        drm_modeset_backoff(&ctx);
                        goto retry;
                }
                drm_mode_prune_invalid(dev, &connector->modes, true);
        }

exit:
        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);

        if (list_empty(&connector->modes))
                return 0;

        drm_mode_sort(&connector->modes);

        DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed modes :\n", connector->base.id,
                        connector->name);
        list_for_each_entry(mode, &connector->modes, head) {
                drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
                drm_mode_debug_printmodeline(mode);
        }

        return count;
}
EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);

/**
 * drm_kms_helper_hotplug_event - fire off KMS hotplug events
 * @dev: drm_device whose connector state changed
 *
 * This function fires off the uevent for userspace and also calls the
 * output_poll_changed function, which is most commonly used to inform the fbdev
 * emulation code and allow it to update the fbcon output configuration.
 *
 * Drivers should call this from their hotplug handling code when a change is
 * detected. Note that this function does not do any output detection of its
 * own, like drm_helper_hpd_irq_event() does - this is assumed to be done by the
 * driver already.
 *
 * This function must be called from process context with no mode
 * setting locks held.
 *
 * If only a single connector has changed, consider calling
 * drm_kms_helper_connector_hotplug_event() instead.
 */
void drm_kms_helper_hotplug_event(struct drm_device *dev)
{
        /* send a uevent + call fbdev */
        drm_sysfs_hotplug_event(dev);
        if (dev->mode_config.funcs->output_poll_changed)
                dev->mode_config.funcs->output_poll_changed(dev);

        drm_client_dev_hotplug(dev);
}
EXPORT_SYMBOL(drm_kms_helper_hotplug_event);

/**
 * drm_kms_helper_connector_hotplug_event - fire off a KMS connector hotplug event
 * @connector: drm_connector which has changed
 *
 * This is the same as drm_kms_helper_hotplug_event(), except it fires a more
 * fine-grained uevent for a single connector.
 */
void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;

        /* send a uevent + call fbdev */
        drm_sysfs_connector_hotplug_event(connector);
        if (dev->mode_config.funcs->output_poll_changed)
                dev->mode_config.funcs->output_poll_changed(dev);

        drm_client_dev_hotplug(dev);
}
EXPORT_SYMBOL(drm_kms_helper_connector_hotplug_event);

static void output_poll_execute(struct work_struct *work)
{
        struct delayed_work *delayed_work = to_delayed_work(work);
        struct drm_device *dev = container_of(delayed_work, struct drm_device, mode_config.output_poll_work);
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;
        enum drm_connector_status old_status;
        bool repoll = false, changed;
        u64 old_epoch_counter;

        if (!dev->mode_config.poll_enabled)
                return;

        /* Pick up any changes detected by the probe functions. */
        changed = dev->mode_config.delayed_event;
        dev->mode_config.delayed_event = false;

        if (!drm_kms_helper_poll) {
                if (dev->mode_config.poll_running) {
                        drm_kms_helper_disable_hpd(dev);
                        dev->mode_config.poll_running = false;
                }
                goto out;
        }

        if (!mutex_trylock(&dev->mode_config.mutex)) {
                repoll = true;
                goto out;
        }

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                /* Ignore forced connectors. */
                if (connector->force)
                        continue;

                /* Ignore HPD capable connectors and connectors where we don't
                 * want any hotplug detection at all for polling. */
                if (!connector->polled || connector->polled == DRM_CONNECTOR_POLL_HPD)
                        continue;

                old_status = connector->status;
                /* if we are connected and don't want to poll for disconnect
                   skip it */
                if (old_status == connector_status_connected &&
                    !(connector->polled & DRM_CONNECTOR_POLL_DISCONNECT))
                        continue;

                repoll = true;

                old_epoch_counter = connector->epoch_counter;
                connector->status = drm_helper_probe_detect(connector, NULL, false);
                if (old_epoch_counter != connector->epoch_counter) {
                        const char *old, *new;

                        /*
                         * The poll work sets force=false when calling detect so
                         * that drivers can avoid to do disruptive tests (e.g.
                         * when load detect cycles could cause flickering on
                         * other, running displays). This bears the risk that we
                         * flip-flop between unknown here in the poll work and
                         * the real state when userspace forces a full detect
                         * call after receiving a hotplug event due to this
                         * change.
                         *
                         * Hence clamp an unknown detect status to the old
                         * value.
                         */
                        if (connector->status == connector_status_unknown) {
                                connector->status = old_status;
                                continue;
                        }

                        old = drm_get_connector_status_name(old_status);
                        new = drm_get_connector_status_name(connector->status);

                        DRM_DEBUG_KMS("[CONNECTOR:%d:%s] "
                                      "status updated from %s to %s\n",
                                      connector->base.id,
                                      connector->name,
                                      old, new);
                        DRM_DEBUG_KMS("[CONNECTOR:%d:%s] epoch counter %llu -> %llu\n",
                                      connector->base.id, connector->name,
                                      old_epoch_counter, connector->epoch_counter);

                        changed = true;
                }
        }
        drm_connector_list_iter_end(&conn_iter);

        mutex_unlock(&dev->mode_config.mutex);

out:
        if (changed)
                drm_kms_helper_hotplug_event(dev);

        if (repoll)
                schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
}

/**
 * drm_kms_helper_is_poll_worker - is %current task an output poll worker?
 *
 * Determine if %current task is an output poll worker.  This can be used
 * to select distinct code paths for output polling versus other contexts.
 *
 * One use case is to avoid a deadlock between the output poll worker and
 * the autosuspend worker wherein the latter waits for polling to finish
 * upon calling drm_kms_helper_poll_disable(), while the former waits for
 * runtime suspend to finish upon calling pm_runtime_get_sync() in a
 * connector ->detect hook.
 */
bool drm_kms_helper_is_poll_worker(void)
{
        struct work_struct *work = current_work();

        return work && work->func == output_poll_execute;
}
EXPORT_SYMBOL(drm_kms_helper_is_poll_worker);

/**
 * drm_kms_helper_poll_disable - disable output polling
 * @dev: drm_device
 *
 * This function disables the output polling work.
 *
 * Drivers can call this helper from their device suspend implementation. It is
 * not an error to call this even when output polling isn't enabled or already
 * disabled. Polling is re-enabled by calling drm_kms_helper_poll_enable().
 *
 * If however, the polling was never initialized, this call will trigger a
 * warning and return
 *
 * Note that calls to enable and disable polling must be strictly ordered, which
 * is automatically the case when they're only call from suspend/resume
 * callbacks.
 */
void drm_kms_helper_poll_disable(struct drm_device *dev)
{
        if (drm_WARN_ON(dev, !dev->mode_config.poll_enabled))
                return;

        if (dev->mode_config.poll_running)
                drm_kms_helper_disable_hpd(dev);

        cancel_delayed_work_sync(&dev->mode_config.output_poll_work);

        dev->mode_config.poll_running = false;
}
EXPORT_SYMBOL(drm_kms_helper_poll_disable);

/**
 * drm_kms_helper_poll_init - initialize and enable output polling
 * @dev: drm_device
 *
 * This function initializes and then also enables output polling support for
 * @dev. Drivers which do not have reliable hotplug support in hardware can use
 * this helper infrastructure to regularly poll such connectors for changes in
 * their connection state.
 *
 * Drivers can control which connectors are polled by setting the
 * DRM_CONNECTOR_POLL_CONNECT and DRM_CONNECTOR_POLL_DISCONNECT flags. On
 * connectors where probing live outputs can result in visual distortion drivers
 * should not set the DRM_CONNECTOR_POLL_DISCONNECT flag to avoid this.
 * Connectors which have no flag or only DRM_CONNECTOR_POLL_HPD set are
 * completely ignored by the polling logic.
 *
 * Note that a connector can be both polled and probed from the hotplug handler,
 * in case the hotplug interrupt is known to be unreliable.
 */
void drm_kms_helper_poll_init(struct drm_device *dev)
{
        INIT_DELAYED_WORK(&dev->mode_config.output_poll_work, output_poll_execute);
        dev->mode_config.poll_enabled = true;

        drm_kms_helper_poll_enable(dev);
}
EXPORT_SYMBOL(drm_kms_helper_poll_init);

/**
 * drm_kms_helper_poll_fini - disable output polling and clean it up
 * @dev: drm_device
 */
void drm_kms_helper_poll_fini(struct drm_device *dev)
{
        if (!dev->mode_config.poll_enabled)
                return;

        drm_kms_helper_poll_disable(dev);

        dev->mode_config.poll_enabled = false;
}
EXPORT_SYMBOL(drm_kms_helper_poll_fini);

static bool check_connector_changed(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        enum drm_connector_status old_status;
        u64 old_epoch_counter;

        /* Only handle HPD capable connectors. */
        drm_WARN_ON(dev, !(connector->polled & DRM_CONNECTOR_POLL_HPD));

        drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex));

        old_status = connector->status;
        old_epoch_counter = connector->epoch_counter;
        connector->status = drm_helper_probe_detect(connector, NULL, false);

        if (old_epoch_counter == connector->epoch_counter) {
                drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Same epoch counter %llu\n",
                            connector->base.id,
                            connector->name,
                            connector->epoch_counter);

                return false;
        }

        drm_dbg_kms(dev, "[CONNECTOR:%d:%s] status updated from %s to %s\n",
                    connector->base.id,
                    connector->name,
                    drm_get_connector_status_name(old_status),
                    drm_get_connector_status_name(connector->status));

        drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Changed epoch counter %llu => %llu\n",
                    connector->base.id,
                    connector->name,
                    old_epoch_counter,
                    connector->epoch_counter);

        return true;
}

/**
 * drm_connector_helper_hpd_irq_event - hotplug processing
 * @connector: drm_connector
 *
 * Drivers can use this helper function to run a detect cycle on a connector
 * which has the DRM_CONNECTOR_POLL_HPD flag set in its &polled member.
 *
 * This helper function is useful for drivers which can track hotplug
 * interrupts for a single connector. Drivers that want to send a
 * hotplug event for all connectors or can't track hotplug interrupts
 * per connector need to use drm_helper_hpd_irq_event().
 *
 * This function must be called from process context with no mode
 * setting locks held.
 *
 * Note that a connector can be both polled and probed from the hotplug
 * handler, in case the hotplug interrupt is known to be unreliable.
 *
 * Returns:
 * A boolean indicating whether the connector status changed or not
 */
bool drm_connector_helper_hpd_irq_event(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        bool changed;

        mutex_lock(&dev->mode_config.mutex);
        changed = check_connector_changed(connector);
        mutex_unlock(&dev->mode_config.mutex);

        if (changed) {
                drm_kms_helper_connector_hotplug_event(connector);
                drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Sent hotplug event\n",
                            connector->base.id,
                            connector->name);
        }

        return changed;
}
EXPORT_SYMBOL(drm_connector_helper_hpd_irq_event);

/**
 * drm_helper_hpd_irq_event - hotplug processing
 * @dev: drm_device
 *
 * Drivers can use this helper function to run a detect cycle on all connectors
 * which have the DRM_CONNECTOR_POLL_HPD flag set in their &polled member. All
 * other connectors are ignored, which is useful to avoid reprobing fixed
 * panels.
 *
 * This helper function is useful for drivers which can't or don't track hotplug
 * interrupts for each connector.
 *
 * Drivers which support hotplug interrupts for each connector individually and
 * which have a more fine-grained detect logic can use
 * drm_connector_helper_hpd_irq_event(). Alternatively, they should bypass this
 * code and directly call drm_kms_helper_hotplug_event() in case the connector
 * state changed.
 *
 * This function must be called from process context with no mode
 * setting locks held.
 *
 * Note that a connector can be both polled and probed from the hotplug handler,
 * in case the hotplug interrupt is known to be unreliable.
 *
 * Returns:
 * A boolean indicating whether the connector status changed or not
 */
bool drm_helper_hpd_irq_event(struct drm_device *dev)
{
        struct drm_connector *connector, *first_changed_connector = NULL;
        struct drm_connector_list_iter conn_iter;
        int changed = 0;

        if (!dev->mode_config.poll_enabled)
                return false;

        mutex_lock(&dev->mode_config.mutex);
        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                /* Only handle HPD capable connectors. */
                if (!(connector->polled & DRM_CONNECTOR_POLL_HPD))
                        continue;

                if (check_connector_changed(connector)) {
                        if (!first_changed_connector) {
                                drm_connector_get(connector);
                                first_changed_connector = connector;
                        }

                        changed++;
                }
        }
        drm_connector_list_iter_end(&conn_iter);
        mutex_unlock(&dev->mode_config.mutex);

        if (changed == 1)
                drm_kms_helper_connector_hotplug_event(first_changed_connector);
        else if (changed > 0)
                drm_kms_helper_hotplug_event(dev);

        if (first_changed_connector)
                drm_connector_put(first_changed_connector);

        return changed;
}
EXPORT_SYMBOL(drm_helper_hpd_irq_event);

/**
 * drm_crtc_helper_mode_valid_fixed - Validates a display mode
 * @crtc: the crtc
 * @mode: the mode to validate
 * @fixed_mode: the display hardware's mode
 *
 * Returns:
 * MODE_OK on success, or another mode-status code otherwise.
 */
enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
                                                      const struct drm_display_mode *mode,
                                                      const struct drm_display_mode *fixed_mode)
{
        if (mode->hdisplay != fixed_mode->hdisplay && mode->vdisplay != fixed_mode->vdisplay)
                return MODE_ONE_SIZE;
        else if (mode->hdisplay != fixed_mode->hdisplay)
                return MODE_ONE_WIDTH;
        else if (mode->vdisplay != fixed_mode->vdisplay)
                return MODE_ONE_HEIGHT;

        return MODE_OK;
}
EXPORT_SYMBOL(drm_crtc_helper_mode_valid_fixed);

/**
 * drm_connector_helper_get_modes_fixed - Duplicates a display mode for a connector
 * @connector: the connector
 * @fixed_mode: the display hardware's mode
 *
 * This function duplicates a display modes for a connector. Drivers for hardware
 * that only supports a single fixed mode can use this function in their connector's
 * get_modes helper.
 *
 * Returns:
 * The number of created modes.
 */
int drm_connector_helper_get_modes_fixed(struct drm_connector *connector,
                                         const struct drm_display_mode *fixed_mode)
{
        struct drm_device *dev = connector->dev;
        struct drm_display_mode *mode;

        mode = drm_mode_duplicate(dev, fixed_mode);
        if (!mode) {
                drm_err(dev, "Failed to duplicate mode " DRM_MODE_FMT "\n",
                        DRM_MODE_ARG(fixed_mode));
                return 0;
        }

        if (mode->name[0] == '\0')
                drm_mode_set_name(mode);

        mode->type |= DRM_MODE_TYPE_PREFERRED;
        drm_mode_probed_add(connector, mode);

        if (mode->width_mm)
                connector->display_info.width_mm = mode->width_mm;
        if (mode->height_mm)
                connector->display_info.height_mm = mode->height_mm;

        return 1;
}
EXPORT_SYMBOL(drm_connector_helper_get_modes_fixed);

/**
 * drm_connector_helper_get_modes - Read EDID and update connector.
 * @connector: The connector
 *
 * Read the EDID using drm_edid_read() (which requires that connector->ddc is
 * set), and update the connector using the EDID.
 *
 * This can be used as the "default" connector helper .get_modes() hook if the
 * driver does not need any special processing. This is sets the example what
 * custom .get_modes() hooks should do regarding EDID read and connector update.
 *
 * Returns: Number of modes.
 */
int drm_connector_helper_get_modes(struct drm_connector *connector)
{
        const struct drm_edid *drm_edid;
        int count;

        drm_edid = drm_edid_read(connector);

        /*
         * Unconditionally update the connector. If the EDID was read
         * successfully, fill in the connector information derived from the
         * EDID. Otherwise, if the EDID is NULL, clear the connector
         * information.
         */
        drm_edid_connector_update(connector, drm_edid);

        count = drm_edid_connector_add_modes(connector);

        drm_edid_free(drm_edid);

        return count;
}
EXPORT_SYMBOL(drm_connector_helper_get_modes);

/**
 * drm_connector_helper_tv_get_modes - Fills the modes availables to a TV connector
 * @connector: The connector
 *
 * Fills the available modes for a TV connector based on the supported
 * TV modes, and the default mode expressed by the kernel command line.
 *
 * This can be used as the default TV connector helper .get_modes() hook
 * if the driver does not need any special processing.
 *
 * Returns:
 * The number of modes added to the connector.
 */
int drm_connector_helper_tv_get_modes(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct drm_property *tv_mode_property =
                dev->mode_config.tv_mode_property;
        struct drm_cmdline_mode *cmdline = &connector->cmdline_mode;
        unsigned int ntsc_modes = BIT(DRM_MODE_TV_MODE_NTSC) |
                BIT(DRM_MODE_TV_MODE_NTSC_443) |
                BIT(DRM_MODE_TV_MODE_NTSC_J) |
                BIT(DRM_MODE_TV_MODE_PAL_M);
        unsigned int pal_modes = BIT(DRM_MODE_TV_MODE_PAL) |
                BIT(DRM_MODE_TV_MODE_PAL_N) |
                BIT(DRM_MODE_TV_MODE_SECAM);
        unsigned int tv_modes[2] = { UINT_MAX, UINT_MAX };
        unsigned int i, supported_tv_modes = 0;

        if (!tv_mode_property)
                return 0;

        for (i = 0; i < tv_mode_property->num_values; i++)
                supported_tv_modes |= BIT(tv_mode_property->values[i]);

        if ((supported_tv_modes & ntsc_modes) &&
            (supported_tv_modes & pal_modes)) {
                uint64_t default_mode;

                if (drm_object_property_get_default_value(&connector->base,
                                                          tv_mode_property,
                                                          &default_mode))
                        return 0;

                if (cmdline->tv_mode_specified)
                        default_mode = cmdline->tv_mode;

                if (BIT(default_mode) & ntsc_modes) {
                        tv_modes[0] = DRM_MODE_TV_MODE_NTSC;
                        tv_modes[1] = DRM_MODE_TV_MODE_PAL;
                } else {
                        tv_modes[0] = DRM_MODE_TV_MODE_PAL;
                        tv_modes[1] = DRM_MODE_TV_MODE_NTSC;
                }
        } else if (supported_tv_modes & ntsc_modes) {
                tv_modes[0] = DRM_MODE_TV_MODE_NTSC;
        } else if (supported_tv_modes & pal_modes) {
                tv_modes[0] = DRM_MODE_TV_MODE_PAL;
        } else {
                return 0;
        }

        for (i = 0; i < ARRAY_SIZE(tv_modes); i++) {
                struct drm_display_mode *mode;

                if (tv_modes[i] == DRM_MODE_TV_MODE_NTSC)
                        mode = drm_mode_analog_ntsc_480i(dev);
                else if (tv_modes[i] == DRM_MODE_TV_MODE_PAL)
                        mode = drm_mode_analog_pal_576i(dev);
                else
                        break;
                if (!mode)
                        return i;
                if (!i)
                        mode->type |= DRM_MODE_TYPE_PREFERRED;
                drm_mode_probed_add(connector, mode);
        }

        return i;
}
EXPORT_SYMBOL(drm_connector_helper_tv_get_modes);

























































































































































































































  241 




  242 




















































  242 
  242 
  242 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
#include <linux/fs.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
#include <linux/cookie.h>
#include <linux/proc_fs.h>

#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>

/*
 *        Our network namespace constructor/destructor lists
 */

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;

LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);

/* Protects net_namespace_list. Nests iside rtnl_lock() */
DECLARE_RWSEM(net_rwsem);
EXPORT_SYMBOL_GPL(net_rwsem);

#ifdef CONFIG_KEYS
static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
#endif

struct net init_net;
EXPORT_SYMBOL(init_net);

static bool init_net_initialized;
/*
 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
 * init_net_initialized and first_device pointer.
 * This is internal net namespace object. Please, don't use it
 * outside.
 */
DECLARE_RWSEM(pernet_ops_rwsem);
EXPORT_SYMBOL_GPL(pernet_ops_rwsem);

#define MIN_PERNET_OPS_ID        \
        ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))

#define INITIAL_NET_GEN_PTRS        13 /* +1 for len +2 for rcu_head */

static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

DEFINE_COOKIE(net_cookie);

static struct net_generic *net_alloc_generic(void)
{
        struct net_generic *ng;
        unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);

        ng = kzalloc(generic_size, GFP_KERNEL);
        if (ng)
                ng->s.len = max_gen_ptrs;

        return ng;
}

static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
        struct net_generic *ng, *old_ng;

        BUG_ON(id < MIN_PERNET_OPS_ID);

        old_ng = rcu_dereference_protected(net->gen,
                                           lockdep_is_held(&pernet_ops_rwsem));
        if (old_ng->s.len > id) {
                old_ng->ptr[id] = data;
                return 0;
        }

        ng = net_alloc_generic();
        if (!ng)
                return -ENOMEM;

        /*
         * Some synchronisation notes:
         *
         * The net_generic explores the net->gen array inside rcu
         * read section. Besides once set the net->gen->ptr[x]
         * pointer never changes (see rules in netns/generic.h).
         *
         * That said, we simply duplicate this array and schedule
         * the old copy for kfree after a grace period.
         */

        memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
               (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
        ng->ptr[id] = data;

        rcu_assign_pointer(net->gen, ng);
        kfree_rcu(old_ng, s.rcu);
        return 0;
}

static int ops_init(const struct pernet_operations *ops, struct net *net)
{
        struct net_generic *ng;
        int err = -ENOMEM;
        void *data = NULL;

        if (ops->id && ops->size) {
                data = kzalloc(ops->size, GFP_KERNEL);
                if (!data)
                        goto out;

                err = net_assign_generic(net, *ops->id, data);
                if (err)
                        goto cleanup;
        }
        err = 0;
        if (ops->init)
                err = ops->init(net);
        if (!err)
                return 0;

        if (ops->id && ops->size) {
                ng = rcu_dereference_protected(net->gen,
                                               lockdep_is_held(&pernet_ops_rwsem));
                ng->ptr[*ops->id] = NULL;
        }

cleanup:
        kfree(data);

out:
        return err;
}

static void ops_pre_exit_list(const struct pernet_operations *ops,
                              struct list_head *net_exit_list)
{
        struct net *net;

        if (ops->pre_exit) {
                list_for_each_entry(net, net_exit_list, exit_list)
                        ops->pre_exit(net);
        }
}

static void ops_exit_list(const struct pernet_operations *ops,
                          struct list_head *net_exit_list)
{
        struct net *net;
        if (ops->exit) {
                list_for_each_entry(net, net_exit_list, exit_list) {
                        ops->exit(net);
                        cond_resched();
                }
        }
        if (ops->exit_batch)
                ops->exit_batch(net_exit_list);
}

static void ops_free_list(const struct pernet_operations *ops,
                          struct list_head *net_exit_list)
{
        struct net *net;
        if (ops->size && ops->id) {
                list_for_each_entry(net, net_exit_list, exit_list)
                        kfree(net_generic(net, *ops->id));
        }
}

/* should be called with nsid_lock held */
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
        int min = 0, max = 0;

        if (reqid >= 0) {
                min = reqid;
                max = reqid + 1;
        }

        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
}

/* This function is used by idr_for_each(). If net is equal to peer, the
 * function returns the id so that idr_for_each() stops. Because we cannot
 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 * NET_ID_ZERO (-1) for it.
 */
#define NET_ID_ZERO -1
static int net_eq_idr(int id, void *net, void *peer)
{
        if (net_eq(net, peer))
                return id ? : NET_ID_ZERO;
        return 0;
}

/* Must be called from RCU-critical section or with nsid_lock held */
static int __peernet2id(const struct net *net, struct net *peer)
{
        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);

        /* Magic value for id 0. */
        if (id == NET_ID_ZERO)
                return 0;
        if (id > 0)
                return id;

        return NETNSA_NSID_NOT_ASSIGNED;
}

static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
                              struct nlmsghdr *nlh, gfp_t gfp);
/* This function returns the id of a peer netns. If no id is assigned, one will
 * be allocated and returned.
 */
int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
        int id;

        if (refcount_read(&net->ns.count) == 0)
                return NETNSA_NSID_NOT_ASSIGNED;

        spin_lock_bh(&net->nsid_lock);
        id = __peernet2id(net, peer);
        if (id >= 0) {
                spin_unlock_bh(&net->nsid_lock);
                return id;
        }

        /* When peer is obtained from RCU lists, we may race with
         * its cleanup. Check whether it's alive, and this guarantees
         * we never hash a peer back to net->netns_ids, after it has
         * just been idr_remove()'d from there in cleanup_net().
         */
        if (!maybe_get_net(peer)) {
                spin_unlock_bh(&net->nsid_lock);
                return NETNSA_NSID_NOT_ASSIGNED;
        }

        id = alloc_netid(net, peer, -1);
        spin_unlock_bh(&net->nsid_lock);

        put_net(peer);
        if (id < 0)
                return NETNSA_NSID_NOT_ASSIGNED;

        rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);

        return id;
}
EXPORT_SYMBOL_GPL(peernet2id_alloc);

/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(const struct net *net, struct net *peer)
{
        int id;

        rcu_read_lock();
        id = __peernet2id(net, peer);
        rcu_read_unlock();

        return id;
}
EXPORT_SYMBOL(peernet2id);

/* This function returns true is the peer netns has an id assigned into the
 * current netns.
 */
bool peernet_has_id(const struct net *net, struct net *peer)
{
        return peernet2id(net, peer) >= 0;
}

struct net *get_net_ns_by_id(const struct net *net, int id)
{
        struct net *peer;

        if (id < 0)
                return NULL;

        rcu_read_lock();
        peer = idr_find(&net->netns_ids, id);
        if (peer)
                peer = maybe_get_net(peer);
        rcu_read_unlock();

        return peer;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);

/* init code that must occur even if setup_net() is not called. */
static __net_init void preinit_net(struct net *net)
{
        ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
}

/*
 * setup_net runs the initializers for the network namespace object.
 */
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
        /* Must be called with pernet_ops_rwsem held */
        const struct pernet_operations *ops, *saved_ops;
        LIST_HEAD(net_exit_list);
        LIST_HEAD(dev_kill_list);
        int error = 0;

        refcount_set(&net->ns.count, 1);
        ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");

        refcount_set(&net->passive, 1);
        get_random_bytes(&net->hash_mix, sizeof(u32));
        preempt_disable();
        net->net_cookie = gen_cookie_next(&net_cookie);
        preempt_enable();
        net->dev_base_seq = 1;
        net->user_ns = user_ns;
        idr_init(&net->netns_ids);
        spin_lock_init(&net->nsid_lock);
        mutex_init(&net->ipv4.ra_mutex);

        list_for_each_entry(ops, &pernet_list, list) {
                error = ops_init(ops, net);
                if (error < 0)
                        goto out_undo;
        }
        down_write(&net_rwsem);
        list_add_tail_rcu(&net->list, &net_namespace_list);
        up_write(&net_rwsem);
out:
        return error;

out_undo:
        /* Walk through the list backwards calling the exit functions
         * for the pernet modules whose init functions did not fail.
         */
        list_add(&net->exit_list, &net_exit_list);
        saved_ops = ops;
        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
                ops_pre_exit_list(ops, &net_exit_list);

        synchronize_rcu();

        ops = saved_ops;
        rtnl_lock();
        list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
                if (ops->exit_batch_rtnl)
                        ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
        }
        unregister_netdevice_many(&dev_kill_list);
        rtnl_unlock();

        ops = saved_ops;
        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
                ops_exit_list(ops, &net_exit_list);

        ops = saved_ops;
        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
                ops_free_list(ops, &net_exit_list);

        rcu_barrier();
        goto out;
}

static int __net_init net_defaults_init_net(struct net *net)
{
        net->core.sysctl_somaxconn = SOMAXCONN;
        /* Limits per socket sk_omem_alloc usage.
         * TCP zerocopy regular usage needs 128 KB.
         */
        net->core.sysctl_optmem_max = 128 * 1024;
        net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;

        return 0;
}

static struct pernet_operations net_defaults_ops = {
        .init = net_defaults_init_net,
};

static __init int net_defaults_init(void)
{
        if (register_pernet_subsys(&net_defaults_ops))
                panic("Cannot initialize net default settings");

        return 0;
}

core_initcall(net_defaults_init);

#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
{
        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
}

static void dec_net_namespaces(struct ucounts *ucounts)
{
        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}

static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;

static struct net *net_alloc(void)
{
        struct net *net = NULL;
        struct net_generic *ng;

        ng = net_alloc_generic();
        if (!ng)
                goto out;

        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
        if (!net)
                goto out_free;

#ifdef CONFIG_KEYS
        net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
        if (!net->key_domain)
                goto out_free_2;
        refcount_set(&net->key_domain->usage, 1);
#endif

        rcu_assign_pointer(net->gen, ng);
out:
        return net;

#ifdef CONFIG_KEYS
out_free_2:
        kmem_cache_free(net_cachep, net);
        net = NULL;
#endif
out_free:
        kfree(ng);
        goto out;
}

static void net_free(struct net *net)
{
        if (refcount_dec_and_test(&net->passive)) {
                kfree(rcu_access_pointer(net->gen));

                /* There should not be any trackers left there. */
                ref_tracker_dir_exit(&net->notrefcnt_tracker);

                kmem_cache_free(net_cachep, net);
        }
}

void net_drop_ns(void *p)
{
        struct net *net = (struct net *)p;

        if (net)
                net_free(net);
}

struct net *copy_net_ns(unsigned long flags,
                        struct user_namespace *user_ns, struct net *old_net)
{
        struct ucounts *ucounts;
        struct net *net;
        int rv;

        if (!(flags & CLONE_NEWNET))
                return get_net(old_net);

        ucounts = inc_net_namespaces(user_ns);
        if (!ucounts)
                return ERR_PTR(-ENOSPC);

        net = net_alloc();
        if (!net) {
                rv = -ENOMEM;
                goto dec_ucounts;
        }

        preinit_net(net);
        refcount_set(&net->passive, 1);
        net->ucounts = ucounts;
        get_user_ns(user_ns);

        rv = down_read_killable(&pernet_ops_rwsem);
        if (rv < 0)
                goto put_userns;

        rv = setup_net(net, user_ns);

        up_read(&pernet_ops_rwsem);

        if (rv < 0) {
put_userns:
#ifdef CONFIG_KEYS
                key_remove_domain(net->key_domain);
#endif
                put_user_ns(user_ns);
                net_free(net);
dec_ucounts:
                dec_net_namespaces(ucounts);
                return ERR_PTR(rv);
        }
        return net;
}

/**
 * net_ns_get_ownership - get sysfs ownership data for @net
 * @net: network namespace in question (can be NULL)
 * @uid: kernel user ID for sysfs objects
 * @gid: kernel group ID for sysfs objects
 *
 * Returns the uid/gid pair of root in the user namespace associated with the
 * given network namespace.
 */
void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
{
        if (net) {
                kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
                kgid_t ns_root_gid = make_kgid(net->user_ns, 0);

                if (uid_valid(ns_root_uid))
                        *uid = ns_root_uid;

                if (gid_valid(ns_root_gid))
                        *gid = ns_root_gid;
        } else {
                *uid = GLOBAL_ROOT_UID;
                *gid = GLOBAL_ROOT_GID;
        }
}
EXPORT_SYMBOL_GPL(net_ns_get_ownership);

static void unhash_nsid(struct net *net, struct net *last)
{
        struct net *tmp;
        /* This function is only called from cleanup_net() work,
         * and this work is the only process, that may delete
         * a net from net_namespace_list. So, when the below
         * is executing, the list may only grow. Thus, we do not
         * use for_each_net_rcu() or net_rwsem.
         */
        for_each_net(tmp) {
                int id;

                spin_lock_bh(&tmp->nsid_lock);
                id = __peernet2id(tmp, net);
                if (id >= 0)
                        idr_remove(&tmp->netns_ids, id);
                spin_unlock_bh(&tmp->nsid_lock);
                if (id >= 0)
                        rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
                                          GFP_KERNEL);
                if (tmp == last)
                        break;
        }
        spin_lock_bh(&net->nsid_lock);
        idr_destroy(&net->netns_ids);
        spin_unlock_bh(&net->nsid_lock);
}

static LLIST_HEAD(cleanup_list);

static void cleanup_net(struct work_struct *work)
{
        const struct pernet_operations *ops;
        struct net *net, *tmp, *last;
        struct llist_node *net_kill_list;
        LIST_HEAD(net_exit_list);
        LIST_HEAD(dev_kill_list);

        /* Atomically snapshot the list of namespaces to cleanup */
        net_kill_list = llist_del_all(&cleanup_list);

        down_read(&pernet_ops_rwsem);

        /* Don't let anyone else find us. */
        down_write(&net_rwsem);
        llist_for_each_entry(net, net_kill_list, cleanup_list)
                list_del_rcu(&net->list);
        /* Cache last net. After we unlock rtnl, no one new net
         * added to net_namespace_list can assign nsid pointer
         * to a net from net_kill_list (see peernet2id_alloc()).
         * So, we skip them in unhash_nsid().
         *
         * Note, that unhash_nsid() does not delete nsid links
         * between net_kill_list's nets, as they've already
         * deleted from net_namespace_list. But, this would be
         * useless anyway, as netns_ids are destroyed there.
         */
        last = list_last_entry(&net_namespace_list, struct net, list);
        up_write(&net_rwsem);

        llist_for_each_entry(net, net_kill_list, cleanup_list) {
                unhash_nsid(net, last);
                list_add_tail(&net->exit_list, &net_exit_list);
        }

        /* Run all of the network namespace pre_exit methods */
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_pre_exit_list(ops, &net_exit_list);

        /*
         * Another CPU might be rcu-iterating the list, wait for it.
         * This needs to be before calling the exit() notifiers, so
         * the rcu_barrier() below isn't sufficient alone.
         * Also the pre_exit() and exit() methods need this barrier.
         */
        synchronize_rcu_expedited();

        rtnl_lock();
        list_for_each_entry_reverse(ops, &pernet_list, list) {
                if (ops->exit_batch_rtnl)
                        ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
        }
        unregister_netdevice_many(&dev_kill_list);
        rtnl_unlock();

        /* Run all of the network namespace exit methods */
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_exit_list(ops, &net_exit_list);

        /* Free the net generic variables */
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_free_list(ops, &net_exit_list);

        up_read(&pernet_ops_rwsem);

        /* Ensure there are no outstanding rcu callbacks using this
         * network namespace.
         */
        rcu_barrier();

        /* Finally it is safe to free my network namespace structure */
        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
                list_del_init(&net->exit_list);
                dec_net_namespaces(net->ucounts);
#ifdef CONFIG_KEYS
                key_remove_domain(net->key_domain);
#endif
                put_user_ns(net->user_ns);
                net_free(net);
        }
}

/**
 * net_ns_barrier - wait until concurrent net_cleanup_work is done
 *
 * cleanup_net runs from work queue and will first remove namespaces
 * from the global list, then run net exit functions.
 *
 * Call this in module exit path to make sure that all netns
 * ->exit ops have been invoked before the function is removed.
 */
void net_ns_barrier(void)
{
        down_write(&pernet_ops_rwsem);
        up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL(net_ns_barrier);

static DECLARE_WORK(net_cleanup_work, cleanup_net);

void __put_net(struct net *net)
{
        ref_tracker_dir_exit(&net->refcnt_tracker);
        /* Cleanup the network namespace in process context */
        if (llist_add(&net->cleanup_list, &cleanup_list))
                queue_work(netns_wq, &net_cleanup_work);
}
EXPORT_SYMBOL_GPL(__put_net);

/**
 * get_net_ns - increment the refcount of the network namespace
 * @ns: common namespace (net)
 *
 * Returns the net's common namespace.
 */
struct ns_common *get_net_ns(struct ns_common *ns)
{
        return &get_net(container_of(ns, struct net, ns))->ns;
}
EXPORT_SYMBOL_GPL(get_net_ns);

struct net *get_net_ns_by_fd(int fd)
{
        struct fd f = fdget(fd);
        struct net *net = ERR_PTR(-EINVAL);

        if (!f.file)
                return ERR_PTR(-EBADF);

        if (proc_ns_file(f.file)) {
                struct ns_common *ns = get_proc_ns(file_inode(f.file));
                if (ns->ops == &netns_operations)
                        net = get_net(container_of(ns, struct net, ns));
        }
        fdput(f);

        return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
#endif

struct net *get_net_ns_by_pid(pid_t pid)
{
        struct task_struct *tsk;
        struct net *net;

        /* Lookup the network namespace */
        net = ERR_PTR(-ESRCH);
        rcu_read_lock();
        tsk = find_task_by_vpid(pid);
        if (tsk) {
                struct nsproxy *nsproxy;
                task_lock(tsk);
                nsproxy = tsk->nsproxy;
                if (nsproxy)
                        net = get_net(nsproxy->net_ns);
                task_unlock(tsk);
        }
        rcu_read_unlock();
        return net;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);

static __net_init int net_ns_net_init(struct net *net)
{
#ifdef CONFIG_NET_NS
        net->ns.ops = &netns_operations;
#endif
        return ns_alloc_inum(&net->ns);
}

static __net_exit void net_ns_net_exit(struct net *net)
{
        ns_free_inum(&net->ns);
}

static struct pernet_operations __net_initdata net_ns_ops = {
        .init = net_ns_net_init,
        .exit = net_ns_net_exit,
};

static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
        [NETNSA_NONE]                = { .type = NLA_UNSPEC },
        [NETNSA_NSID]                = { .type = NLA_S32 },
        [NETNSA_PID]                = { .type = NLA_U32 },
        [NETNSA_FD]                = { .type = NLA_U32 },
        [NETNSA_TARGET_NSID]        = { .type = NLA_S32 },
};

static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct nlattr *tb[NETNSA_MAX + 1];
        struct nlattr *nla;
        struct net *peer;
        int nsid, err;

        err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
                                     NETNSA_MAX, rtnl_net_policy, extack);
        if (err < 0)
                return err;
        if (!tb[NETNSA_NSID]) {
                NL_SET_ERR_MSG(extack, "nsid is missing");
                return -EINVAL;
        }
        nsid = nla_get_s32(tb[NETNSA_NSID]);

        if (tb[NETNSA_PID]) {
                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
                nla = tb[NETNSA_PID];
        } else if (tb[NETNSA_FD]) {
                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
                nla = tb[NETNSA_FD];
        } else {
                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
                return -EINVAL;
        }
        if (IS_ERR(peer)) {
                NL_SET_BAD_ATTR(extack, nla);
                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
                return PTR_ERR(peer);
        }

        spin_lock_bh(&net->nsid_lock);
        if (__peernet2id(net, peer) >= 0) {
                spin_unlock_bh(&net->nsid_lock);
                err = -EEXIST;
                NL_SET_BAD_ATTR(extack, nla);
                NL_SET_ERR_MSG(extack,
                               "Peer netns already has a nsid assigned");
                goto out;
        }

        err = alloc_netid(net, peer, nsid);
        spin_unlock_bh(&net->nsid_lock);
        if (err >= 0) {
                rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
                                  nlh, GFP_KERNEL);
                err = 0;
        } else if (err == -ENOSPC && nsid >= 0) {
                err = -EEXIST;
                NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
                NL_SET_ERR_MSG(extack, "The specified nsid is already used");
        }
out:
        put_net(peer);
        return err;
}

static int rtnl_net_get_size(void)
{
        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
               + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
               ;
}

struct net_fill_args {
        u32 portid;
        u32 seq;
        int flags;
        int cmd;
        int nsid;
        bool add_ref;
        int ref_nsid;
};

static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
{
        struct nlmsghdr *nlh;
        struct rtgenmsg *rth;

        nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
                        args->flags);
        if (!nlh)
                return -EMSGSIZE;

        rth = nlmsg_data(nlh);
        rth->rtgen_family = AF_UNSPEC;

        if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
                goto nla_put_failure;

        if (args->add_ref &&
            nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int rtnl_net_valid_getid_req(struct sk_buff *skb,
                                    const struct nlmsghdr *nlh,
                                    struct nlattr **tb,
                                    struct netlink_ext_ack *extack)
{
        int i, err;

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
                                              tb, NETNSA_MAX, rtnl_net_policy,
                                              extack);

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
                                            NETNSA_MAX, rtnl_net_policy,
                                            extack);
        if (err)
                return err;

        for (i = 0; i <= NETNSA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NETNSA_PID:
                case NETNSA_FD:
                case NETNSA_NSID:
                case NETNSA_TARGET_NSID:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct nlattr *tb[NETNSA_MAX + 1];
        struct net_fill_args fillargs = {
                .portid = NETLINK_CB(skb).portid,
                .seq = nlh->nlmsg_seq,
                .cmd = RTM_NEWNSID,
        };
        struct net *peer, *target = net;
        struct nlattr *nla;
        struct sk_buff *msg;
        int err;

        err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
        if (err < 0)
                return err;
        if (tb[NETNSA_PID]) {
                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
                nla = tb[NETNSA_PID];
        } else if (tb[NETNSA_FD]) {
                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
                nla = tb[NETNSA_FD];
        } else if (tb[NETNSA_NSID]) {
                peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
                if (!peer)
                        peer = ERR_PTR(-ENOENT);
                nla = tb[NETNSA_NSID];
        } else {
                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
                return -EINVAL;
        }

        if (IS_ERR(peer)) {
                NL_SET_BAD_ATTR(extack, nla);
                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
                return PTR_ERR(peer);
        }

        if (tb[NETNSA_TARGET_NSID]) {
                int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);

                target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
                if (IS_ERR(target)) {
                        NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
                        NL_SET_ERR_MSG(extack,
                                       "Target netns reference is invalid");
                        err = PTR_ERR(target);
                        goto out;
                }
                fillargs.add_ref = true;
                fillargs.ref_nsid = peernet2id(net, peer);
        }

        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
        if (!msg) {
                err = -ENOMEM;
                goto out;
        }

        fillargs.nsid = peernet2id(target, peer);
        err = rtnl_net_fill(msg, &fillargs);
        if (err < 0)
                goto err_out;

        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
        goto out;

err_out:
        nlmsg_free(msg);
out:
        if (fillargs.add_ref)
                put_net(target);
        put_net(peer);
        return err;
}

struct rtnl_net_dump_cb {
        struct net *tgt_net;
        struct net *ref_net;
        struct sk_buff *skb;
        struct net_fill_args fillargs;
        int idx;
        int s_idx;
};

/* Runs in RCU-critical section. */
static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
        int ret;

        if (net_cb->idx < net_cb->s_idx)
                goto cont;

        net_cb->fillargs.nsid = id;
        if (net_cb->fillargs.add_ref)
                net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
        ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
        if (ret < 0)
                return ret;

cont:
        net_cb->idx++;
        return 0;
}

static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
                                   struct rtnl_net_dump_cb *net_cb,
                                   struct netlink_callback *cb)
{
        struct netlink_ext_ack *extack = cb->extack;
        struct nlattr *tb[NETNSA_MAX + 1];
        int err, i;

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
                                            NETNSA_MAX, rtnl_net_policy,
                                            extack);
        if (err < 0)
                return err;

        for (i = 0; i <= NETNSA_MAX; i++) {
                if (!tb[i])
                        continue;

                if (i == NETNSA_TARGET_NSID) {
                        struct net *net;

                        net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
                        if (IS_ERR(net)) {
                                NL_SET_BAD_ATTR(extack, tb[i]);
                                NL_SET_ERR_MSG(extack,
                                               "Invalid target network namespace id");
                                return PTR_ERR(net);
                        }
                        net_cb->fillargs.add_ref = true;
                        net_cb->ref_net = net_cb->tgt_net;
                        net_cb->tgt_net = net;
                } else {
                        NL_SET_BAD_ATTR(extack, tb[i]);
                        NL_SET_ERR_MSG(extack,
                                       "Unsupported attribute in dump request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct rtnl_net_dump_cb net_cb = {
                .tgt_net = sock_net(skb->sk),
                .skb = skb,
                .fillargs = {
                        .portid = NETLINK_CB(cb->skb).portid,
                        .seq = cb->nlh->nlmsg_seq,
                        .flags = NLM_F_MULTI,
                        .cmd = RTM_NEWNSID,
                },
                .idx = 0,
                .s_idx = cb->args[0],
        };
        int err = 0;

        if (cb->strict_check) {
                err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
                if (err < 0)
                        goto end;
        }

        rcu_read_lock();
        idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
        rcu_read_unlock();

        cb->args[0] = net_cb.idx;
end:
        if (net_cb.fillargs.add_ref)
                put_net(net_cb.tgt_net);
        return err < 0 ? err : skb->len;
}

static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
                              struct nlmsghdr *nlh, gfp_t gfp)
{
        struct net_fill_args fillargs = {
                .portid = portid,
                .seq = nlh ? nlh->nlmsg_seq : 0,
                .cmd = cmd,
                .nsid = id,
        };
        struct sk_buff *msg;
        int err = -ENOMEM;

        msg = nlmsg_new(rtnl_net_get_size(), gfp);
        if (!msg)
                goto out;

        err = rtnl_net_fill(msg, &fillargs);
        if (err < 0)
                goto err_out;

        rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
        return;

err_out:
        nlmsg_free(msg);
out:
        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}

#ifdef CONFIG_NET_NS
static void __init netns_ipv4_struct_check(void)
{
        /* TX readonly hotpath cache lines */
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_early_retrans);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_tso_win_divisor);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_tso_rtt_log);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_autocorking);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_min_snd_mss);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_notsent_lowat);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_limit_output_bytes);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_min_rtt_wlen);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_tcp_wmem);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
                                      sysctl_ip_fwd_use_pmtu);
        CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);

        /* TXRX readonly hotpath cache lines */
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
                                      sysctl_tcp_moderate_rcvbuf);
        CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);

        /* RX readonly hotpath cache line */
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
                                      sysctl_ip_early_demux);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
                                      sysctl_tcp_early_demux);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
                                      sysctl_tcp_reordering);
        CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
                                      sysctl_tcp_rmem);
        CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
}
#endif

void __init net_ns_init(void)
{
        struct net_generic *ng;

#ifdef CONFIG_NET_NS
        netns_ipv4_struct_check();
        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
                                        SMP_CACHE_BYTES,
                                        SLAB_PANIC|SLAB_ACCOUNT, NULL);

        /* Create workqueue for cleanup */
        netns_wq = create_singlethread_workqueue("netns");
        if (!netns_wq)
                panic("Could not create netns workq");
#endif

        ng = net_alloc_generic();
        if (!ng)
                panic("Could not allocate generic netns");

        rcu_assign_pointer(init_net.gen, ng);

#ifdef CONFIG_KEYS
        init_net.key_domain = &init_net_key_domain;
#endif
        down_write(&pernet_ops_rwsem);
        preinit_net(&init_net);
        if (setup_net(&init_net, &init_user_ns))
                panic("Could not setup the initial network namespace");

        init_net_initialized = true;
        up_write(&pernet_ops_rwsem);

        if (register_pernet_subsys(&net_ns_ops))
                panic("Could not register network namespace subsystems");

        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
                      RTNL_FLAG_DOIT_UNLOCKED);
        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
                      RTNL_FLAG_DOIT_UNLOCKED);
}

static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
        ops_pre_exit_list(ops, net_exit_list);
        synchronize_rcu();

        if (ops->exit_batch_rtnl) {
                LIST_HEAD(dev_kill_list);

                rtnl_lock();
                ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
                unregister_netdevice_many(&dev_kill_list);
                rtnl_unlock();
        }
        ops_exit_list(ops, net_exit_list);

        ops_free_list(ops, net_exit_list);
}

#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
                                        struct pernet_operations *ops)
{
        struct net *net;
        int error;
        LIST_HEAD(net_exit_list);

        list_add_tail(&ops->list, list);
        if (ops->init || (ops->id && ops->size)) {
                /* We held write locked pernet_ops_rwsem, and parallel
                 * setup_net() and cleanup_net() are not possible.
                 */
                for_each_net(net) {
                        error = ops_init(ops, net);
                        if (error)
                                goto out_undo;
                        list_add_tail(&net->exit_list, &net_exit_list);
                }
        }
        return 0;

out_undo:
        /* If I have an error cleanup all namespaces I initialized */
        list_del(&ops->list);
        free_exit_list(ops, &net_exit_list);
        return error;
}

static void __unregister_pernet_operations(struct pernet_operations *ops)
{
        struct net *net;
        LIST_HEAD(net_exit_list);

        list_del(&ops->list);
        /* See comment in __register_pernet_operations() */
        for_each_net(net)
                list_add_tail(&net->exit_list, &net_exit_list);

        free_exit_list(ops, &net_exit_list);
}

#else

static int __register_pernet_operations(struct list_head *list,
                                        struct pernet_operations *ops)
{
        if (!init_net_initialized) {
                list_add_tail(&ops->list, list);
                return 0;
        }

        return ops_init(ops, &init_net);
}

static void __unregister_pernet_operations(struct pernet_operations *ops)
{
        if (!init_net_initialized) {
                list_del(&ops->list);
        } else {
                LIST_HEAD(net_exit_list);
                list_add(&init_net.exit_list, &net_exit_list);
                free_exit_list(ops, &net_exit_list);
        }
}

#endif /* CONFIG_NET_NS */

static DEFINE_IDA(net_generic_ids);

static int register_pernet_operations(struct list_head *list,
                                      struct pernet_operations *ops)
{
        int error;

        if (ops->id) {
                error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
                                GFP_KERNEL);
                if (error < 0)
                        return error;
                *ops->id = error;
                max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
        }
        error = __register_pernet_operations(list, ops);
        if (error) {
                rcu_barrier();
                if (ops->id)
                        ida_free(&net_generic_ids, *ops->id);
        }

        return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
        __unregister_pernet_operations(ops);
        rcu_barrier();
        if (ops->id)
                ida_free(&net_generic_ids, *ops->id);
}

/**
 *      register_pernet_subsys - register a network namespace subsystem
 *        @ops:  pernet operations structure for the subsystem
 *
 *        Register a subsystem which has init and exit functions
 *        that are called when network namespaces are created and
 *        destroyed respectively.
 *
 *        When registered all network namespace init functions are
 *        called for every existing network namespace.  Allowing kernel
 *        modules to have a race free view of the set of network namespaces.
 *
 *        When a new network namespace is created all of the init
 *        methods are called in the order in which they were registered.
 *
 *        When a network namespace is destroyed all of the exit methods
 *        are called in the reverse of the order with which they were
 *        registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
        int error;
        down_write(&pernet_ops_rwsem);
        error =  register_pernet_operations(first_device, ops);
        up_write(&pernet_ops_rwsem);
        return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *        @ops: pernet operations structure to manipulate
 *
 *        Remove the pernet operations structure from the list to be
 *        used when network namespaces are created or destroyed.  In
 *        addition run the exit method for all existing network
 *        namespaces.
 */
void unregister_pernet_subsys(struct pernet_operations *ops)
{
        down_write(&pernet_ops_rwsem);
        unregister_pernet_operations(ops);
        up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

/**
 *      register_pernet_device - register a network namespace device
 *        @ops:  pernet operations structure for the subsystem
 *
 *        Register a device which has init and exit functions
 *        that are called when network namespaces are created and
 *        destroyed respectively.
 *
 *        When registered all network namespace init functions are
 *        called for every existing network namespace.  Allowing kernel
 *        modules to have a race free view of the set of network namespaces.
 *
 *        When a new network namespace is created all of the init
 *        methods are called in the order in which they were registered.
 *
 *        When a network namespace is destroyed all of the exit methods
 *        are called in the reverse of the order with which they were
 *        registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
        int error;
        down_write(&pernet_ops_rwsem);
        error = register_pernet_operations(&pernet_list, ops);
        if (!error && (first_device == &pernet_list))
                first_device = &ops->list;
        up_write(&pernet_ops_rwsem);
        return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *        @ops: pernet operations structure to manipulate
 *
 *        Remove the pernet operations structure from the list to be
 *        used when network namespaces are created or destroyed.  In
 *        addition run the exit method for all existing network
 *        namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
        down_write(&pernet_ops_rwsem);
        if (&ops->list == first_device)
                first_device = first_device->next;
        unregister_pernet_operations(ops);
        up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);

#ifdef CONFIG_NET_NS
static struct ns_common *netns_get(struct task_struct *task)
{
        struct net *net = NULL;
        struct nsproxy *nsproxy;

        task_lock(task);
        nsproxy = task->nsproxy;
        if (nsproxy)
                net = get_net(nsproxy->net_ns);
        task_unlock(task);

        return net ? &net->ns : NULL;
}

static inline struct net *to_net_ns(struct ns_common *ns)
{
        return container_of(ns, struct net, ns);
}

static void netns_put(struct ns_common *ns)
{
        put_net(to_net_ns(ns));
}

static int netns_install(struct nsset *nsset, struct ns_common *ns)
{
        struct nsproxy *nsproxy = nsset->nsproxy;
        struct net *net = to_net_ns(ns);

        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
            !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        put_net(nsproxy->net_ns);
        nsproxy->net_ns = get_net(net);
        return 0;
}

static struct user_namespace *netns_owner(struct ns_common *ns)
{
        return to_net_ns(ns)->user_ns;
}

const struct proc_ns_operations netns_operations = {
        .name                = "net",
        .type                = CLONE_NEWNET,
        .get                = netns_get,
        .put                = netns_put,
        .install        = netns_install,
        .owner                = netns_owner,
};
#endif

























































  274 









































































































































































































































  167 





















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Copyright (C) 2001 Momchil Velikov
 * Portions Copyright (C) 2001 Christoph Hellwig
 * Copyright (C) 2006 Nick Piggin
 * Copyright (C) 2012 Konstantin Khlebnikov
 */
#ifndef _LINUX_RADIX_TREE_H
#define _LINUX_RADIX_TREE_H

#include <linux/bitops.h>
#include <linux/gfp_types.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/math.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/xarray.h>
#include <linux/local_lock.h>

/* Keep unconverted code working */
#define radix_tree_root                xarray
#define radix_tree_node                xa_node

struct radix_tree_preload {
        local_lock_t lock;
        unsigned nr;
        /* nodes->parent points to next preallocated node */
        struct radix_tree_node *nodes;
};
DECLARE_PER_CPU(struct radix_tree_preload, radix_tree_preloads);

/*
 * The bottom two bits of the slot determine how the remaining bits in the
 * slot are interpreted:
 *
 * 00 - data pointer
 * 10 - internal entry
 * x1 - value entry
 *
 * The internal entry may be a pointer to the next level in the tree, a
 * sibling entry, or an indicator that the entry in this slot has been moved
 * to another location in the tree and the lookup should be restarted.  While
 * NULL fits the 'data pointer' pattern, it means that there is no entry in
 * the tree for this index (no matter what level of the tree it is found at).
 * This means that storing a NULL entry in the tree is the same as deleting
 * the entry from the tree.
 */
#define RADIX_TREE_ENTRY_MASK                3UL
#define RADIX_TREE_INTERNAL_NODE        2UL

static inline bool radix_tree_is_internal_node(void *ptr)
{
        return ((unsigned long)ptr & RADIX_TREE_ENTRY_MASK) ==
                                RADIX_TREE_INTERNAL_NODE;
}

/*** radix-tree API starts here ***/

#define RADIX_TREE_MAP_SHIFT        XA_CHUNK_SHIFT
#define RADIX_TREE_MAP_SIZE        (1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK        (RADIX_TREE_MAP_SIZE-1)

#define RADIX_TREE_MAX_TAGS        XA_MAX_MARKS
#define RADIX_TREE_TAG_LONGS        XA_MARK_LONGS

#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
                                          RADIX_TREE_MAP_SHIFT))

/* The IDR tag is stored in the low bits of xa_flags */
#define ROOT_IS_IDR        ((__force gfp_t)4)
/* The top bits of xa_flags are used to store the root tags */
#define ROOT_TAG_SHIFT        (__GFP_BITS_SHIFT)

#define RADIX_TREE_INIT(name, mask)        XARRAY_INIT(name, mask)

#define RADIX_TREE(name, mask) \
        struct radix_tree_root name = RADIX_TREE_INIT(name, mask)

#define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask)

static inline bool radix_tree_empty(const struct radix_tree_root *root)
{
        return root->xa_head == NULL;
}

/**
 * struct radix_tree_iter - radix tree iterator state
 *
 * @index:        index of current slot
 * @next_index:        one beyond the last index for this chunk
 * @tags:        bit-mask for tag-iterating
 * @node:        node that contains current slot
 *
 * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
 * subinterval of slots contained within one radix tree leaf node.  It is
 * described by a pointer to its first slot and a struct radix_tree_iter
 * which holds the chunk's position in the tree and its size.  For tagged
 * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
 * radix tree tag.
 */
struct radix_tree_iter {
        unsigned long        index;
        unsigned long        next_index;
        unsigned long        tags;
        struct radix_tree_node *node;
};

/**
 * Radix-tree synchronization
 *
 * The radix-tree API requires that users provide all synchronisation (with
 * specific exceptions, noted below).
 *
 * Synchronization of access to the data items being stored in the tree, and
 * management of their lifetimes must be completely managed by API users.
 *
 * For API usage, in general,
 * - any function _modifying_ the tree or tags (inserting or deleting
 *   items, setting or clearing tags) must exclude other modifications, and
 *   exclude any functions reading the tree.
 * - any function _reading_ the tree or tags (looking up items or tags,
 *   gang lookups) must exclude modifications to the tree, but may occur
 *   concurrently with other readers.
 *
 * The notable exceptions to this rule are the following functions:
 * __radix_tree_lookup
 * radix_tree_lookup
 * radix_tree_lookup_slot
 * radix_tree_tag_get
 * radix_tree_gang_lookup
 * radix_tree_gang_lookup_tag
 * radix_tree_gang_lookup_tag_slot
 * radix_tree_tagged
 *
 * The first 7 functions are able to be called locklessly, using RCU. The
 * caller must ensure calls to these functions are made within rcu_read_lock()
 * regions. Other readers (lock-free or otherwise) and modifications may be
 * running concurrently.
 *
 * It is still required that the caller manage the synchronization and lifetimes
 * of the items. So if RCU lock-free lookups are used, typically this would mean
 * that the items have their own locks, or are amenable to lock-free access; and
 * that the items are freed by RCU (or only freed after having been deleted from
 * the radix tree *and* a synchronize_rcu() grace period).
 *
 * (Note, rcu_assign_pointer and rcu_dereference are not needed to control
 * access to data items when inserting into or looking up from the radix tree)
 *
 * Note that the value returned by radix_tree_tag_get() may not be relied upon
 * if only the RCU read lock is held.  Functions to set/clear tags and to
 * delete nodes running concurrently with it may affect its result such that
 * two consecutive reads in the same locked section may return different
 * values.  If reliability is required, modification functions must also be
 * excluded from concurrency.
 *
 * radix_tree_tagged is able to be called without locking or RCU.
 */

/**
 * radix_tree_deref_slot - dereference a slot
 * @slot: slot pointer, returned by radix_tree_lookup_slot
 *
 * For use with radix_tree_lookup_slot().  Caller must hold tree at least read
 * locked across slot lookup and dereference. Not required if write lock is
 * held (ie. items cannot be concurrently inserted).
 *
 * radix_tree_deref_retry must be used to confirm validity of the pointer if
 * only the read lock is held.
 *
 * Return: entry stored in that slot.
 */
static inline void *radix_tree_deref_slot(void __rcu **slot)
{
        return rcu_dereference(*slot);
}

/**
 * radix_tree_deref_slot_protected - dereference a slot with tree lock held
 * @slot: slot pointer, returned by radix_tree_lookup_slot
 *
 * Similar to radix_tree_deref_slot.  The caller does not hold the RCU read
 * lock but it must hold the tree lock to prevent parallel updates.
 *
 * Return: entry stored in that slot.
 */
static inline void *radix_tree_deref_slot_protected(void __rcu **slot,
                                                        spinlock_t *treelock)
{
        return rcu_dereference_protected(*slot, lockdep_is_held(treelock));
}

/**
 * radix_tree_deref_retry        - check radix_tree_deref_slot
 * @arg:        pointer returned by radix_tree_deref_slot
 * Returns:        0 if retry is not required, otherwise retry is required
 *
 * radix_tree_deref_retry must be used with radix_tree_deref_slot.
 */
static inline int radix_tree_deref_retry(void *arg)
{
        return unlikely(radix_tree_is_internal_node(arg));
}

/**
 * radix_tree_exception        - radix_tree_deref_slot returned either exception?
 * @arg:        value returned by radix_tree_deref_slot
 * Returns:        0 if well-aligned pointer, non-0 if either kind of exception.
 */
static inline int radix_tree_exception(void *arg)
{
        return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
}

int radix_tree_insert(struct radix_tree_root *, unsigned long index,
                        void *);
void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
                          struct radix_tree_node **nodep, void __rcu ***slotp);
void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
                                        unsigned long index);
void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
                          void __rcu **slot, void *entry);
void radix_tree_iter_replace(struct radix_tree_root *,
                const struct radix_tree_iter *, void __rcu **slot, void *entry);
void radix_tree_replace_slot(struct radix_tree_root *,
                             void __rcu **slot, void *entry);
void radix_tree_iter_delete(struct radix_tree_root *,
                        struct radix_tree_iter *iter, void __rcu **slot);
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
void *radix_tree_delete(struct radix_tree_root *, unsigned long);
unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
                        void **results, unsigned long first_index,
                        unsigned int max_items);
int radix_tree_preload(gfp_t gfp_mask);
int radix_tree_maybe_preload(gfp_t gfp_mask);
void radix_tree_init(void);
void *radix_tree_tag_set(struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
void *radix_tree_tag_clear(struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
int radix_tree_tag_get(const struct radix_tree_root *,
                        unsigned long index, unsigned int tag);
void radix_tree_iter_tag_clear(struct radix_tree_root *,
                const struct radix_tree_iter *iter, unsigned int tag);
unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
                void **results, unsigned long first_index,
                unsigned int max_items, unsigned int tag);
unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
                void __rcu ***results, unsigned long first_index,
                unsigned int max_items, unsigned int tag);
int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);

static inline void radix_tree_preload_end(void)
{
        local_unlock(&radix_tree_preloads.lock);
}

void __rcu **idr_get_free(struct radix_tree_root *root,
                              struct radix_tree_iter *iter, gfp_t gfp,
                              unsigned long max);

enum {
        RADIX_TREE_ITER_TAG_MASK = 0x0f,        /* tag index in lower nybble */
        RADIX_TREE_ITER_TAGGED   = 0x10,        /* lookup tagged slots */
        RADIX_TREE_ITER_CONTIG   = 0x20,        /* stop at first hole */
};

/**
 * radix_tree_iter_init - initialize radix tree iterator
 *
 * @iter:        pointer to iterator state
 * @start:        iteration starting index
 * Returns:        NULL
 */
static __always_inline void __rcu **
radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
{
        /*
         * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it
         * in the case of a successful tagged chunk lookup.  If the lookup was
         * unsuccessful or non-tagged then nobody cares about ->tags.
         *
         * Set index to zero to bypass next_index overflow protection.
         * See the comment in radix_tree_next_chunk() for details.
         */
        iter->index = 0;
        iter->next_index = start;
        return NULL;
}

/**
 * radix_tree_next_chunk - find next chunk of slots for iteration
 *
 * @root:        radix tree root
 * @iter:        iterator state
 * @flags:        RADIX_TREE_ITER_* flags and tag index
 * Returns:        pointer to chunk first slot, or NULL if there no more left
 *
 * This function looks up the next chunk in the radix tree starting from
 * @iter->next_index.  It returns a pointer to the chunk's first slot.
 * Also it fills @iter with data about chunk: position in the tree (index),
 * its end (next_index), and constructs a bit mask for tagged iterating (tags).
 */
void __rcu **radix_tree_next_chunk(const struct radix_tree_root *,
                             struct radix_tree_iter *iter, unsigned flags);

/**
 * radix_tree_iter_lookup - look up an index in the radix tree
 * @root: radix tree root
 * @iter: iterator state
 * @index: key to look up
 *
 * If @index is present in the radix tree, this function returns the slot
 * containing it and updates @iter to describe the entry.  If @index is not
 * present, it returns NULL.
 */
static inline void __rcu **
radix_tree_iter_lookup(const struct radix_tree_root *root,
                        struct radix_tree_iter *iter, unsigned long index)
{
        radix_tree_iter_init(iter, index);
        return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
}

/**
 * radix_tree_iter_retry - retry this chunk of the iteration
 * @iter:        iterator state
 *
 * If we iterate over a tree protected only by the RCU lock, a race
 * against deletion or creation may result in seeing a slot for which
 * radix_tree_deref_retry() returns true.  If so, call this function
 * and continue the iteration.
 */
static inline __must_check
void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
{
        iter->next_index = iter->index;
        iter->tags = 0;
        return NULL;
}

static inline unsigned long
__radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
{
        return iter->index + slots;
}

/**
 * radix_tree_iter_resume - resume iterating when the chunk may be invalid
 * @slot: pointer to current slot
 * @iter: iterator state
 * Returns: New slot pointer
 *
 * If the iterator needs to release then reacquire a lock, the chunk may
 * have been invalidated by an insertion or deletion.  Call this function
 * before releasing the lock to continue the iteration from the next index.
 */
void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
                                        struct radix_tree_iter *iter);

/**
 * radix_tree_chunk_size - get current chunk size
 *
 * @iter:        pointer to radix tree iterator
 * Returns:        current chunk size
 */
static __always_inline long
radix_tree_chunk_size(struct radix_tree_iter *iter)
{
        return iter->next_index - iter->index;
}

/**
 * radix_tree_next_slot - find next slot in chunk
 *
 * @slot:        pointer to current slot
 * @iter:        pointer to iterator state
 * @flags:        RADIX_TREE_ITER_*, should be constant
 * Returns:        pointer to next slot, or NULL if there no more left
 *
 * This function updates @iter->index in the case of a successful lookup.
 * For tagged lookup it also eats @iter->tags.
 *
 * There are several cases where 'slot' can be passed in as NULL to this
 * function.  These cases result from the use of radix_tree_iter_resume() or
 * radix_tree_iter_retry().  In these cases we don't end up dereferencing
 * 'slot' because either:
 * a) we are doing tagged iteration and iter->tags has been set to 0, or
 * b) we are doing non-tagged iteration, and iter->index and iter->next_index
 *    have been set up so that radix_tree_chunk_size() returns 1 or 0.
 */
static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
                                struct radix_tree_iter *iter, unsigned flags)
{
        if (flags & RADIX_TREE_ITER_TAGGED) {
                iter->tags >>= 1;
                if (unlikely(!iter->tags))
                        return NULL;
                if (likely(iter->tags & 1ul)) {
                        iter->index = __radix_tree_iter_add(iter, 1);
                        slot++;
                        goto found;
                }
                if (!(flags & RADIX_TREE_ITER_CONTIG)) {
                        unsigned offset = __ffs(iter->tags);

                        iter->tags >>= offset++;
                        iter->index = __radix_tree_iter_add(iter, offset);
                        slot += offset;
                        goto found;
                }
        } else {
                long count = radix_tree_chunk_size(iter);

                while (--count > 0) {
                        slot++;
                        iter->index = __radix_tree_iter_add(iter, 1);

                        if (likely(*slot))
                                goto found;
                        if (flags & RADIX_TREE_ITER_CONTIG) {
                                /* forbid switching to the next chunk */
                                iter->next_index = 0;
                                break;
                        }
                }
        }
        return NULL;

 found:
        return slot;
}

/**
 * radix_tree_for_each_slot - iterate over non-empty slots
 *
 * @slot:        the void** variable for pointer to slot
 * @root:        the struct radix_tree_root pointer
 * @iter:        the struct radix_tree_iter pointer
 * @start:        iteration starting index
 *
 * @slot points to radix tree slot, @iter->index contains its index.
 */
#define radix_tree_for_each_slot(slot, root, iter, start)                \
        for (slot = radix_tree_iter_init(iter, start) ;                        \
             slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;        \
             slot = radix_tree_next_slot(slot, iter, 0))

/**
 * radix_tree_for_each_tagged - iterate over tagged slots
 *
 * @slot:        the void** variable for pointer to slot
 * @root:        the struct radix_tree_root pointer
 * @iter:        the struct radix_tree_iter pointer
 * @start:        iteration starting index
 * @tag:        tag index
 *
 * @slot points to radix tree slot, @iter->index contains its index.
 */
#define radix_tree_for_each_tagged(slot, root, iter, start, tag)        \
        for (slot = radix_tree_iter_init(iter, start) ;                        \
             slot || (slot = radix_tree_next_chunk(root, iter,                \
                              RADIX_TREE_ITER_TAGGED | tag)) ;                \
             slot = radix_tree_next_slot(slot, iter,                        \
                                RADIX_TREE_ITER_TAGGED | tag))

#endif /* _LINUX_RADIX_TREE_H */


























































































































































































































   10 















































   20 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef LLIST_H
#define LLIST_H
/*
 * Lock-less NULL terminated single linked list
 *
 * Cases where locking is not needed:
 * If there are multiple producers and multiple consumers, llist_add can be
 * used in producers and llist_del_all can be used in consumers simultaneously
 * without locking. Also a single consumer can use llist_del_first while
 * multiple producers simultaneously use llist_add, without any locking.
 *
 * Cases where locking is needed:
 * If we have multiple consumers with llist_del_first used in one consumer, and
 * llist_del_first or llist_del_all used in other consumers, then a lock is
 * needed.  This is because llist_del_first depends on list->first->next not
 * changing, but without lock protection, there's no way to be sure about that
 * if a preemption happens in the middle of the delete operation and on being
 * preempted back, the list->first is the same as before causing the cmpxchg in
 * llist_del_first to succeed. For example, while a llist_del_first operation
 * is in progress in one consumer, then a llist_del_first, llist_add,
 * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
 * consumer may cause violations.
 *
 * This can be summarized as follows:
 *
 *           |   add    | del_first |  del_all
 * add       |    -     |     -     |     -
 * del_first |          |     L     |     L
 * del_all   |          |           |     -
 *
 * Where, a particular row's operation can happen concurrently with a column's
 * operation, with "-" being no lock needed, while "L" being lock is needed.
 *
 * The list entries deleted via llist_del_all can be traversed with
 * traversing function such as llist_for_each etc.  But the list
 * entries can not be traversed safely before deleted from the list.
 * The order of deleted entries is from the newest to the oldest added
 * one.  If you want to traverse from the oldest to the newest, you
 * must reverse the order by yourself before traversing.
 *
 * The basic atomic operation of this list is cmpxchg on long.  On
 * architectures that don't have NMI-safe cmpxchg implementation, the
 * list can NOT be used in NMI handlers.  So code that uses the list in
 * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
 *
 * Copyright 2010,2011 Intel Corp.
 *   Author: Huang Ying <ying.huang@intel.com>
 */

#include <linux/atomic.h>
#include <linux/container_of.h>
#include <linux/stddef.h>
#include <linux/types.h>

struct llist_head {
        struct llist_node *first;
};

struct llist_node {
        struct llist_node *next;
};

#define LLIST_HEAD_INIT(name)        { NULL }
#define LLIST_HEAD(name)        struct llist_head name = LLIST_HEAD_INIT(name)

/**
 * init_llist_head - initialize lock-less list head
 * @head:        the head for your lock-less list
 */
static inline void init_llist_head(struct llist_head *list)
{
        list->first = NULL;
}

/**
 * init_llist_node - initialize lock-less list node
 * @node:        the node to be initialised
 *
 * In cases where there is a need to test if a node is on
 * a list or not, this initialises the node to clearly
 * not be on any list.
 */
static inline void init_llist_node(struct llist_node *node)
{
        node->next = node;
}

/**
 * llist_on_list - test if a lock-list list node is on a list
 * @node:        the node to test
 *
 * When a node is on a list the ->next pointer will be NULL or
 * some other node.  It can never point to itself.  We use that
 * in init_llist_node() to record that a node is not on any list,
 * and here to test whether it is on any list.
 */
static inline bool llist_on_list(const struct llist_node *node)
{
        return node->next != node;
}

/**
 * llist_entry - get the struct of this entry
 * @ptr:        the &struct llist_node pointer.
 * @type:        the type of the struct this is embedded in.
 * @member:        the name of the llist_node within the struct.
 */
#define llist_entry(ptr, type, member)                \
        container_of(ptr, type, member)

/**
 * member_address_is_nonnull - check whether the member address is not NULL
 * @ptr:        the object pointer (struct type * that contains the llist_node)
 * @member:        the name of the llist_node within the struct.
 *
 * This macro is conceptually the same as
 *        &ptr->member != NULL
 * but it works around the fact that compilers can decide that taking a member
 * address is never a NULL pointer.
 *
 * Real objects that start at a high address and have a member at NULL are
 * unlikely to exist, but such pointers may be returned e.g. by the
 * container_of() macro.
 */
#define member_address_is_nonnull(ptr, member)        \
        ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0)

/**
 * llist_for_each - iterate over some deleted entries of a lock-less list
 * @pos:        the &struct llist_node to use as a loop cursor
 * @node:        the first entry of deleted list entries
 *
 * In general, some entries of the lock-less list can be traversed
 * safely only after being deleted from list, so start with an entry
 * instead of list head.
 *
 * If being used on entries deleted from lock-less list directly, the
 * traverse order is from the newest to the oldest added entry.  If
 * you want to traverse from the oldest to the newest, you must
 * reverse the order by yourself before traversing.
 */
#define llist_for_each(pos, node)                        \
        for ((pos) = (node); pos; (pos) = (pos)->next)

/**
 * llist_for_each_safe - iterate over some deleted entries of a lock-less list
 *                         safe against removal of list entry
 * @pos:        the &struct llist_node to use as a loop cursor
 * @n:                another &struct llist_node to use as temporary storage
 * @node:        the first entry of deleted list entries
 *
 * In general, some entries of the lock-less list can be traversed
 * safely only after being deleted from list, so start with an entry
 * instead of list head.
 *
 * If being used on entries deleted from lock-less list directly, the
 * traverse order is from the newest to the oldest added entry.  If
 * you want to traverse from the oldest to the newest, you must
 * reverse the order by yourself before traversing.
 */
#define llist_for_each_safe(pos, n, node)                        \
        for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))

/**
 * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
 * @pos:        the type * to use as a loop cursor.
 * @node:        the fist entry of deleted list entries.
 * @member:        the name of the llist_node with the struct.
 *
 * In general, some entries of the lock-less list can be traversed
 * safely only after being removed from list, so start with an entry
 * instead of list head.
 *
 * If being used on entries deleted from lock-less list directly, the
 * traverse order is from the newest to the oldest added entry.  If
 * you want to traverse from the oldest to the newest, you must
 * reverse the order by yourself before traversing.
 */
#define llist_for_each_entry(pos, node, member)                                \
        for ((pos) = llist_entry((node), typeof(*(pos)), member);        \
             member_address_is_nonnull(pos, member);                        \
             (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))

/**
 * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type
 *                               safe against removal of list entry
 * @pos:        the type * to use as a loop cursor.
 * @n:                another type * to use as temporary storage
 * @node:        the first entry of deleted list entries.
 * @member:        the name of the llist_node with the struct.
 *
 * In general, some entries of the lock-less list can be traversed
 * safely only after being removed from list, so start with an entry
 * instead of list head.
 *
 * If being used on entries deleted from lock-less list directly, the
 * traverse order is from the newest to the oldest added entry.  If
 * you want to traverse from the oldest to the newest, you must
 * reverse the order by yourself before traversing.
 */
#define llist_for_each_entry_safe(pos, n, node, member)                               \
        for (pos = llist_entry((node), typeof(*pos), member);                       \
             member_address_is_nonnull(pos, member) &&                               \
                (n = llist_entry(pos->member.next, typeof(*n), member), true); \
             pos = n)

/**
 * llist_empty - tests whether a lock-less list is empty
 * @head:        the list to test
 *
 * Not guaranteed to be accurate or up to date.  Just a quick way to
 * test whether the list is empty without deleting something from the
 * list.
 */
static inline bool llist_empty(const struct llist_head *head)
{
        return READ_ONCE(head->first) == NULL;
}

static inline struct llist_node *llist_next(struct llist_node *node)
{
        return node->next;
}

extern bool llist_add_batch(struct llist_node *new_first,
                            struct llist_node *new_last,
                            struct llist_head *head);

static inline bool __llist_add_batch(struct llist_node *new_first,
                                     struct llist_node *new_last,
                                     struct llist_head *head)
{
        new_last->next = head->first;
        head->first = new_first;
        return new_last->next == NULL;
}

/**
 * llist_add - add a new entry
 * @new:        new entry to be added
 * @head:        the head for your lock-less list
 *
 * Returns true if the list was empty prior to adding this entry.
 */
static inline bool llist_add(struct llist_node *new, struct llist_head *head)
{
        return llist_add_batch(new, new, head);
}

static inline bool __llist_add(struct llist_node *new, struct llist_head *head)
{
        return __llist_add_batch(new, new, head);
}

/**
 * llist_del_all - delete all entries from lock-less list
 * @head:        the head of lock-less list to delete all entries
 *
 * If list is empty, return NULL, otherwise, delete all entries and
 * return the pointer to the first entry.  The order of entries
 * deleted is from the newest to the oldest added one.
 */
static inline struct llist_node *llist_del_all(struct llist_head *head)
{
        return xchg(&head->first, NULL);
}

static inline struct llist_node *__llist_del_all(struct llist_head *head)
{
        struct llist_node *first = head->first;

        head->first = NULL;
        return first;
}

extern struct llist_node *llist_del_first(struct llist_head *head);

/**
 * llist_del_first_init - delete first entry from lock-list and mark is as being off-list
 * @head:        the head of lock-less list to delete from.
 *
 * This behave the same as llist_del_first() except that llist_init_node() is called
 * on the returned node so that llist_on_list() will report false for the node.
 */
static inline struct llist_node *llist_del_first_init(struct llist_head *head)
{
        struct llist_node *n = llist_del_first(head);

        if (n)
                init_llist_node(n);
        return n;
}

extern bool llist_del_first_this(struct llist_head *head,
                                 struct llist_node *this);

struct llist_node *llist_reverse_order(struct llist_node *head);

#endif /* LLIST_H */

































































   37 

   37 
   37 
   36 



   37 








   73 




































































   74 






   72 












   72 





















   73 








   74 



   73 





   74 

   72 























































































































































































   14 







   14 

   14 




   14 
   14 



   14 
    1 

   14 
   12 
   14 

   11 

   14 
   14 
   14 
   14 


   14 

   14 














    6 




















   29 

  360 
   29 


   23 


    6 
    6 
























    9 
    8 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/fs/file_table.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
 */

#include <linux/string.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/eventpoll.h>
#include <linux/rcupdate.h>
#include <linux/mount.h>
#include <linux/capability.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/task_work.h>
#include <linux/swap.h>
#include <linux/kmemleak.h>

#include <linux/atomic.h>

#include "internal.h"

/* sysctl tunables... */
static struct files_stat_struct files_stat = {
        .max_files = NR_FILE
};

/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __ro_after_init;

static struct percpu_counter nr_files __cacheline_aligned_in_smp;

/* Container for backing file with optional user path */
struct backing_file {
        struct file file;
        struct path user_path;
};

static inline struct backing_file *backing_file(struct file *f)
{
        return container_of(f, struct backing_file, file);
}

struct path *backing_file_user_path(struct file *f)
{
        return &backing_file(f)->user_path;
}
EXPORT_SYMBOL_GPL(backing_file_user_path);

static inline void file_free(struct file *f)
{
        security_file_free(f);
        if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
                percpu_counter_dec(&nr_files);
        put_cred(f->f_cred);
        if (unlikely(f->f_mode & FMODE_BACKING)) {
                path_put(backing_file_user_path(f));
                kfree(backing_file(f));
        } else {
                kmem_cache_free(filp_cachep, f);
        }
}

/*
 * Return the total number of open files in the system
 */
static long get_nr_files(void)
{
        return percpu_counter_read_positive(&nr_files);
}

/*
 * Return the maximum number of open files in the system
 */
unsigned long get_max_files(void)
{
        return files_stat.max_files;
}
EXPORT_SYMBOL_GPL(get_max_files);

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)

/*
 * Handle nr_files sysctl
 */
static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
                         size_t *lenp, loff_t *ppos)
{
        files_stat.nr_files = get_nr_files();
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}

static struct ctl_table fs_stat_sysctls[] = {
        {
                .procname        = "file-nr",
                .data                = &files_stat,
                .maxlen                = sizeof(files_stat),
                .mode                = 0444,
                .proc_handler        = proc_nr_files,
        },
        {
                .procname        = "file-max",
                .data                = &files_stat.max_files,
                .maxlen                = sizeof(files_stat.max_files),
                .mode                = 0644,
                .proc_handler        = proc_doulongvec_minmax,
                .extra1                = SYSCTL_LONG_ZERO,
                .extra2                = SYSCTL_LONG_MAX,
        },
        {
                .procname        = "nr_open",
                .data                = &sysctl_nr_open,
                .maxlen                = sizeof(unsigned int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = &sysctl_nr_open_min,
                .extra2                = &sysctl_nr_open_max,
        },
};

static int __init init_fs_stat_sysctls(void)
{
        register_sysctl_init("fs", fs_stat_sysctls);
        if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
                struct ctl_table_header *hdr;
                hdr = register_sysctl_mount_point("fs/binfmt_misc");
                kmemleak_not_leak(hdr);
        }
        return 0;
}
fs_initcall(init_fs_stat_sysctls);
#endif

static int init_file(struct file *f, int flags, const struct cred *cred)
{
        int error;

        f->f_cred = get_cred(cred);
        error = security_file_alloc(f);
        if (unlikely(error)) {
                put_cred(f->f_cred);
                return error;
        }

        rwlock_init(&f->f_owner.lock);
        spin_lock_init(&f->f_lock);
        mutex_init(&f->f_pos_lock);
        f->f_flags = flags;
        f->f_mode = OPEN_FMODE(flags);
        /* f->f_version: 0 */

        /*
         * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While
         * fget-rcu pattern users need to be able to handle spurious
         * refcount bumps we should reinitialize the reused file first.
         */
        atomic_long_set(&f->f_count, 1);
        return 0;
}

/* Find an unused file structure and return a pointer to it.
 * Returns an error pointer if some error happend e.g. we over file
 * structures limit, run out of memory or operation is not permitted.
 *
 * Be very careful using this.  You are responsible for
 * getting write access to any mount that you might assign
 * to this filp, if it is opened for write.  If this is not
 * done, you will imbalance int the mount's writer count
 * and a warning at __fput() time.
 */
struct file *alloc_empty_file(int flags, const struct cred *cred)
{
        static long old_max;
        struct file *f;
        int error;

        /*
         * Privileged users can go above max_files
         */
        if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
                /*
                 * percpu_counters are inaccurate.  Do an expensive check before
                 * we go and fail.
                 */
                if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
                        goto over;
        }

        f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
        if (unlikely(!f))
                return ERR_PTR(-ENOMEM);

        error = init_file(f, flags, cred);
        if (unlikely(error)) {
                kmem_cache_free(filp_cachep, f);
                return ERR_PTR(error);
        }

        percpu_counter_inc(&nr_files);

        return f;

over:
        /* Ran out of filps - report that */
        if (get_nr_files() > old_max) {
                pr_info("VFS: file-max limit %lu reached\n", get_max_files());
                old_max = get_nr_files();
        }
        return ERR_PTR(-ENFILE);
}

/*
 * Variant of alloc_empty_file() that doesn't check and modify nr_files.
 *
 * This is only for kernel internal use, and the allocate file must not be
 * installed into file tables or such.
 */
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
{
        struct file *f;
        int error;

        f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
        if (unlikely(!f))
                return ERR_PTR(-ENOMEM);

        error = init_file(f, flags, cred);
        if (unlikely(error)) {
                kmem_cache_free(filp_cachep, f);
                return ERR_PTR(error);
        }

        f->f_mode |= FMODE_NOACCOUNT;

        return f;
}

/*
 * Variant of alloc_empty_file() that allocates a backing_file container
 * and doesn't check and modify nr_files.
 *
 * This is only for kernel internal use, and the allocate file must not be
 * installed into file tables or such.
 */
struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
{
        struct backing_file *ff;
        int error;

        ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
        if (unlikely(!ff))
                return ERR_PTR(-ENOMEM);

        error = init_file(&ff->file, flags, cred);
        if (unlikely(error)) {
                kfree(ff);
                return ERR_PTR(error);
        }

        ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
        return &ff->file;
}

/**
 * file_init_path - initialize a 'struct file' based on path
 *
 * @file: the file to set up
 * @path: the (dentry, vfsmount) pair for the new file
 * @fop: the 'struct file_operations' for the new file
 */
static void file_init_path(struct file *file, const struct path *path,
                           const struct file_operations *fop)
{
        file->f_path = *path;
        file->f_inode = path->dentry->d_inode;
        file->f_mapping = path->dentry->d_inode->i_mapping;
        file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
        file->f_sb_err = file_sample_sb_err(file);
        if (fop->llseek)
                file->f_mode |= FMODE_LSEEK;
        if ((file->f_mode & FMODE_READ) &&
             likely(fop->read || fop->read_iter))
                file->f_mode |= FMODE_CAN_READ;
        if ((file->f_mode & FMODE_WRITE) &&
             likely(fop->write || fop->write_iter))
                file->f_mode |= FMODE_CAN_WRITE;
        file->f_iocb_flags = iocb_flags(file);
        file->f_mode |= FMODE_OPENED;
        file->f_op = fop;
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_inc(path->dentry->d_inode);
}

/**
 * alloc_file - allocate and initialize a 'struct file'
 *
 * @path: the (dentry, vfsmount) pair for the new file
 * @flags: O_... flags with which the new file will be opened
 * @fop: the 'struct file_operations' for the new file
 */
static struct file *alloc_file(const struct path *path, int flags,
                const struct file_operations *fop)
{
        struct file *file;

        file = alloc_empty_file(flags, current_cred());
        if (!IS_ERR(file))
                file_init_path(file, path, fop);
        return file;
}

static inline int alloc_path_pseudo(const char *name, struct inode *inode,
                                    struct vfsmount *mnt, struct path *path)
{
        struct qstr this = QSTR_INIT(name, strlen(name));

        path->dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
        if (!path->dentry)
                return -ENOMEM;
        path->mnt = mntget(mnt);
        d_instantiate(path->dentry, inode);
        return 0;
}

struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
                               const char *name, int flags,
                               const struct file_operations *fops)
{
        int ret;
        struct path path;
        struct file *file;

        ret = alloc_path_pseudo(name, inode, mnt, &path);
        if (ret)
                return ERR_PTR(ret);

        file = alloc_file(&path, flags, fops);
        if (IS_ERR(file)) {
                ihold(inode);
                path_put(&path);
        }
        return file;
}
EXPORT_SYMBOL(alloc_file_pseudo);

struct file *alloc_file_pseudo_noaccount(struct inode *inode,
                                         struct vfsmount *mnt, const char *name,
                                         int flags,
                                         const struct file_operations *fops)
{
        int ret;
        struct path path;
        struct file *file;

        ret = alloc_path_pseudo(name, inode, mnt, &path);
        if (ret)
                return ERR_PTR(ret);

        file = alloc_empty_file_noaccount(flags, current_cred());
        if (IS_ERR(file)) {
                ihold(inode);
                path_put(&path);
                return file;
        }
        file_init_path(file, &path, fops);
        return file;
}
EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);

struct file *alloc_file_clone(struct file *base, int flags,
                                const struct file_operations *fops)
{
        struct file *f = alloc_file(&base->f_path, flags, fops);
        if (!IS_ERR(f)) {
                path_get(&f->f_path);
                f->f_mapping = base->f_mapping;
        }
        return f;
}

/* the real guts of fput() - releasing the last reference to file
 */
static void __fput(struct file *file)
{
        struct dentry *dentry = file->f_path.dentry;
        struct vfsmount *mnt = file->f_path.mnt;
        struct inode *inode = file->f_inode;
        fmode_t mode = file->f_mode;

        if (unlikely(!(file->f_mode & FMODE_OPENED)))
                goto out;

        might_sleep();

        fsnotify_close(file);
        /*
         * The function eventpoll_release() should be the first called
         * in the file cleanup chain.
         */
        eventpoll_release(file);
        locks_remove_file(file);

        security_file_release(file);
        if (unlikely(file->f_flags & FASYNC)) {
                if (file->f_op->fasync)
                        file->f_op->fasync(-1, file, 0);
        }
        if (file->f_op->release)
                file->f_op->release(inode, file);
        if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
                     !(mode & FMODE_PATH))) {
                cdev_put(inode->i_cdev);
        }
        fops_put(file->f_op);
        put_pid(file->f_owner.pid);
        put_file_access(file);
        dput(dentry);
        if (unlikely(mode & FMODE_NEED_UNMOUNT))
                dissolve_on_fput(mnt);
        mntput(mnt);
out:
        file_free(file);
}

static LLIST_HEAD(delayed_fput_list);
static void delayed_fput(struct work_struct *unused)
{
        struct llist_node *node = llist_del_all(&delayed_fput_list);
        struct file *f, *t;

        llist_for_each_entry_safe(f, t, node, f_llist)
                __fput(f);
}

static void ____fput(struct callback_head *work)
{
        __fput(container_of(work, struct file, f_task_work));
}

/*
 * If kernel thread really needs to have the final fput() it has done
 * to complete, call this.  The only user right now is the boot - we
 * *do* need to make sure our writes to binaries on initramfs has
 * not left us with opened struct file waiting for __fput() - execve()
 * won't work without that.  Please, don't add more callers without
 * very good reasons; in particular, never call that with locks
 * held and never call that from a thread that might need to do
 * some work on any kind of umount.
 */
void flush_delayed_fput(void)
{
        delayed_fput(NULL);
}
EXPORT_SYMBOL_GPL(flush_delayed_fput);

static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);

void fput(struct file *file)
{
        if (atomic_long_dec_and_test(&file->f_count)) {
                struct task_struct *task = current;

                if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
                        file_free(file);
                        return;
                }
                if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
                        init_task_work(&file->f_task_work, ____fput);
                        if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
                                return;
                        /*
                         * After this task has run exit_task_work(),
                         * task_work_add() will fail.  Fall through to delayed
                         * fput to avoid leaking *file.
                         */
                }

                if (llist_add(&file->f_llist, &delayed_fput_list))
                        schedule_delayed_work(&delayed_fput_work, 1);
        }
}

/*
 * synchronous analog of fput(); for kernel threads that might be needed
 * in some umount() (and thus can't use flush_delayed_fput() without
 * risking deadlocks), need to wait for completion of __fput() and know
 * for this specific struct file it won't involve anything that would
 * need them.  Use only if you really need it - at the very least,
 * don't blindly convert fput() by kernel thread to that.
 */
void __fput_sync(struct file *file)
{
        if (atomic_long_dec_and_test(&file->f_count))
                __fput(file);
}

EXPORT_SYMBOL(fput);
EXPORT_SYMBOL(__fput_sync);

void __init files_init(void)
{
        filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
                                SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN |
                                SLAB_PANIC | SLAB_ACCOUNT, NULL);
        percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}

/*
 * One file with associated inode and dcache is very roughly 1K. Per default
 * do not use more than 10% of our memory for files.
 */
void __init files_maxfiles_init(void)
{
        unsigned long n;
        unsigned long nr_pages = totalram_pages();
        unsigned long memreserve = (nr_pages - nr_free_pages()) * 3/2;

        memreserve = min(memreserve, nr_pages - 1);
        n = ((nr_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;

        files_stat.max_files = max_t(unsigned long, n, NR_FILE);
}





















































































































































































































    4 
    4 

    4 




    4 














    4 



























































































































    4 


    4 






    4 







    4 










































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
// SPDX-License-Identifier: GPL-2.0
/*
 * attribute_container.c - implementation of a simple container for classes
 *
 * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com>
 *
 * The basic idea here is to enable a device to be attached to an
 * aritrary numer of classes without having to allocate storage for them.
 * Instead, the contained classes select the devices they need to attach
 * to via a matching function.
 */

#include <linux/attribute_container.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mutex.h>

#include "base.h"

/* This is a private structure used to tie the classdev and the
 * container .. it should never be visible outside this file */
struct internal_container {
        struct klist_node node;
        struct attribute_container *cont;
        struct device classdev;
};

static void internal_container_klist_get(struct klist_node *n)
{
        struct internal_container *ic =
                container_of(n, struct internal_container, node);
        get_device(&ic->classdev);
}

static void internal_container_klist_put(struct klist_node *n)
{
        struct internal_container *ic =
                container_of(n, struct internal_container, node);
        put_device(&ic->classdev);
}


/**
 * attribute_container_classdev_to_container - given a classdev, return the container
 *
 * @classdev: the class device created by attribute_container_add_device.
 *
 * Returns the container associated with this classdev.
 */
struct attribute_container *
attribute_container_classdev_to_container(struct device *classdev)
{
        struct internal_container *ic =
                container_of(classdev, struct internal_container, classdev);
        return ic->cont;
}
EXPORT_SYMBOL_GPL(attribute_container_classdev_to_container);

static LIST_HEAD(attribute_container_list);

static DEFINE_MUTEX(attribute_container_mutex);

/**
 * attribute_container_register - register an attribute container
 *
 * @cont: The container to register.  This must be allocated by the
 *        callee and should also be zeroed by it.
 */
int
attribute_container_register(struct attribute_container *cont)
{
        INIT_LIST_HEAD(&cont->node);
        klist_init(&cont->containers, internal_container_klist_get,
                   internal_container_klist_put);

        mutex_lock(&attribute_container_mutex);
        list_add_tail(&cont->node, &attribute_container_list);
        mutex_unlock(&attribute_container_mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(attribute_container_register);

/**
 * attribute_container_unregister - remove a container registration
 *
 * @cont: previously registered container to remove
 */
int
attribute_container_unregister(struct attribute_container *cont)
{
        int retval = -EBUSY;

        mutex_lock(&attribute_container_mutex);
        spin_lock(&cont->containers.k_lock);
        if (!list_empty(&cont->containers.k_list))
                goto out;
        retval = 0;
        list_del(&cont->node);
 out:
        spin_unlock(&cont->containers.k_lock);
        mutex_unlock(&attribute_container_mutex);
        return retval;

}
EXPORT_SYMBOL_GPL(attribute_container_unregister);

/* private function used as class release */
static void attribute_container_release(struct device *classdev)
{
        struct internal_container *ic
                = container_of(classdev, struct internal_container, classdev);
        struct device *dev = classdev->parent;

        kfree(ic);
        put_device(dev);
}

/**
 * attribute_container_add_device - see if any container is interested in dev
 *
 * @dev: device to add attributes to
 * @fn:         function to trigger addition of class device.
 *
 * This function allocates storage for the class device(s) to be
 * attached to dev (one for each matching attribute_container).  If no
 * fn is provided, the code will simply register the class device via
 * device_add.  If a function is provided, it is expected to add
 * the class device at the appropriate time.  One of the things that
 * might be necessary is to allocate and initialise the classdev and
 * then add it a later time.  To do this, call this routine for
 * allocation and initialisation and then use
 * attribute_container_device_trigger() to call device_add() on
 * it.  Note: after this, the class device contains a reference to dev
 * which is not relinquished until the release of the classdev.
 */
void
attribute_container_add_device(struct device *dev,
                               int (*fn)(struct attribute_container *,
                                         struct device *,
                                         struct device *))
{
        struct attribute_container *cont;

        mutex_lock(&attribute_container_mutex);
        list_for_each_entry(cont, &attribute_container_list, node) {
                struct internal_container *ic;

                if (attribute_container_no_classdevs(cont))
                        continue;

                if (!cont->match(cont, dev))
                        continue;

                ic = kzalloc(sizeof(*ic), GFP_KERNEL);
                if (!ic) {
                        dev_err(dev, "failed to allocate class container\n");
                        continue;
                }

                ic->cont = cont;
                device_initialize(&ic->classdev);
                ic->classdev.parent = get_device(dev);
                ic->classdev.class = cont->class;
                cont->class->dev_release = attribute_container_release;
                dev_set_name(&ic->classdev, "%s", dev_name(dev));
                if (fn)
                        fn(cont, dev, &ic->classdev);
                else
                        attribute_container_add_class_device(&ic->classdev);
                klist_add_tail(&ic->node, &cont->containers);
        }
        mutex_unlock(&attribute_container_mutex);
}

/* FIXME: can't break out of this unless klist_iter_exit is also
 * called before doing the break
 */
#define klist_for_each_entry(pos, head, member, iter) \
        for (klist_iter_init(head, iter); (pos = ({ \
                struct klist_node *n = klist_next(iter); \
                n ? container_of(n, typeof(*pos), member) : \
                        ({ klist_iter_exit(iter) ; NULL; }); \
        })) != NULL;)


/**
 * attribute_container_remove_device - make device eligible for removal.
 *
 * @dev:  The generic device
 * @fn:          A function to call to remove the device
 *
 * This routine triggers device removal.  If fn is NULL, then it is
 * simply done via device_unregister (note that if something
 * still has a reference to the classdev, then the memory occupied
 * will not be freed until the classdev is released).  If you want a
 * two phase release: remove from visibility and then delete the
 * device, then you should use this routine with a fn that calls
 * device_del() and then use attribute_container_device_trigger()
 * to do the final put on the classdev.
 */
void
attribute_container_remove_device(struct device *dev,
                                  void (*fn)(struct attribute_container *,
                                             struct device *,
                                             struct device *))
{
        struct attribute_container *cont;

        mutex_lock(&attribute_container_mutex);
        list_for_each_entry(cont, &attribute_container_list, node) {
                struct internal_container *ic;
                struct klist_iter iter;

                if (attribute_container_no_classdevs(cont))
                        continue;

                if (!cont->match(cont, dev))
                        continue;

                klist_for_each_entry(ic, &cont->containers, node, &iter) {
                        if (dev != ic->classdev.parent)
                                continue;
                        klist_del(&ic->node);
                        if (fn)
                                fn(cont, dev, &ic->classdev);
                        else {
                                attribute_container_remove_attrs(&ic->classdev);
                                device_unregister(&ic->classdev);
                        }
                }
        }
        mutex_unlock(&attribute_container_mutex);
}

static int
do_attribute_container_device_trigger_safe(struct device *dev,
                                           struct attribute_container *cont,
                                           int (*fn)(struct attribute_container *,
                                                     struct device *, struct device *),
                                           int (*undo)(struct attribute_container *,
                                                       struct device *, struct device *))
{
        int ret;
        struct internal_container *ic, *failed;
        struct klist_iter iter;

        if (attribute_container_no_classdevs(cont))
                return fn(cont, dev, NULL);

        klist_for_each_entry(ic, &cont->containers, node, &iter) {
                if (dev == ic->classdev.parent) {
                        ret = fn(cont, dev, &ic->classdev);
                        if (ret) {
                                failed = ic;
                                klist_iter_exit(&iter);
                                goto fail;
                        }
                }
        }
        return 0;

fail:
        if (!undo)
                return ret;

        /* Attempt to undo the work partially done. */
        klist_for_each_entry(ic, &cont->containers, node, &iter) {
                if (ic == failed) {
                        klist_iter_exit(&iter);
                        break;
                }
                if (dev == ic->classdev.parent)
                        undo(cont, dev, &ic->classdev);
        }
        return ret;
}

/**
 * attribute_container_device_trigger_safe - execute a trigger for each
 * matching classdev or fail all of them.
 *
 * @dev:  The generic device to run the trigger for
 * @fn:   the function to execute for each classdev.
 * @undo: A function to undo the work previously done in case of error
 *
 * This function is a safe version of
 * attribute_container_device_trigger. It stops on the first error and
 * undo the partial work that has been done, on previous classdev.  It
 * is guaranteed that either they all succeeded, or none of them
 * succeeded.
 */
int
attribute_container_device_trigger_safe(struct device *dev,
                                        int (*fn)(struct attribute_container *,
                                                  struct device *,
                                                  struct device *),
                                        int (*undo)(struct attribute_container *,
                                                    struct device *,
                                                    struct device *))
{
        struct attribute_container *cont, *failed = NULL;
        int ret = 0;

        mutex_lock(&attribute_container_mutex);

        list_for_each_entry(cont, &attribute_container_list, node) {

                if (!cont->match(cont, dev))
                        continue;

                ret = do_attribute_container_device_trigger_safe(dev, cont,
                                                                 fn, undo);
                if (ret) {
                        failed = cont;
                        break;
                }
        }

        if (ret && !WARN_ON(!undo)) {
                list_for_each_entry(cont, &attribute_container_list, node) {

                        if (failed == cont)
                                break;

                        if (!cont->match(cont, dev))
                                continue;

                        do_attribute_container_device_trigger_safe(dev, cont,
                                                                   undo, NULL);
                }
        }

        mutex_unlock(&attribute_container_mutex);
        return ret;

}

/**
 * attribute_container_device_trigger - execute a trigger for each matching classdev
 *
 * @dev:  The generic device to run the trigger for
 * @fn:   the function to execute for each classdev.
 *
 * This function is for executing a trigger when you need to know both
 * the container and the classdev.  If you only care about the
 * container, then use attribute_container_trigger() instead.
 */
void
attribute_container_device_trigger(struct device *dev,
                                   int (*fn)(struct attribute_container *,
                                             struct device *,
                                             struct device *))
{
        struct attribute_container *cont;

        mutex_lock(&attribute_container_mutex);
        list_for_each_entry(cont, &attribute_container_list, node) {
                struct internal_container *ic;
                struct klist_iter iter;

                if (!cont->match(cont, dev))
                        continue;

                if (attribute_container_no_classdevs(cont)) {
                        fn(cont, dev, NULL);
                        continue;
                }

                klist_for_each_entry(ic, &cont->containers, node, &iter) {
                        if (dev == ic->classdev.parent)
                                fn(cont, dev, &ic->classdev);
                }
        }
        mutex_unlock(&attribute_container_mutex);
}

/**
 * attribute_container_trigger - trigger a function for each matching container
 *
 * @dev:  The generic device to activate the trigger for
 * @fn:          the function to trigger
 *
 * This routine triggers a function that only needs to know the
 * matching containers (not the classdev) associated with a device.
 * It is more lightweight than attribute_container_device_trigger, so
 * should be used in preference unless the triggering function
 * actually needs to know the classdev.
 */
void
attribute_container_trigger(struct device *dev,
                            int (*fn)(struct attribute_container *,
                                      struct device *))
{
        struct attribute_container *cont;

        mutex_lock(&attribute_container_mutex);
        list_for_each_entry(cont, &attribute_container_list, node) {
                if (cont->match(cont, dev))
                        fn(cont, dev);
        }
        mutex_unlock(&attribute_container_mutex);
}

/**
 * attribute_container_add_attrs - add attributes
 *
 * @classdev: The class device
 *
 * This simply creates all the class device sysfs files from the
 * attributes listed in the container
 */
int
attribute_container_add_attrs(struct device *classdev)
{
        struct attribute_container *cont =
                attribute_container_classdev_to_container(classdev);
        struct device_attribute **attrs = cont->attrs;
        int i, error;

        BUG_ON(attrs && cont->grp);

        if (!attrs && !cont->grp)
                return 0;

        if (cont->grp)
                return sysfs_create_group(&classdev->kobj, cont->grp);

        for (i = 0; attrs[i]; i++) {
                sysfs_attr_init(&attrs[i]->attr);
                error = device_create_file(classdev, attrs[i]);
                if (error)
                        return error;
        }

        return 0;
}

/**
 * attribute_container_add_class_device - same function as device_add
 *
 * @classdev:        the class device to add
 *
 * This performs essentially the same function as device_add except for
 * attribute containers, namely add the classdev to the system and then
 * create the attribute files
 */
int
attribute_container_add_class_device(struct device *classdev)
{
        int error = device_add(classdev);

        if (error)
                return error;
        return attribute_container_add_attrs(classdev);
}

/**
 * attribute_container_add_class_device_adapter - simple adapter for triggers
 *
 * @cont: the container to register.
 * @dev:  the generic device to activate the trigger for
 * @classdev:        the class device to add
 *
 * This function is identical to attribute_container_add_class_device except
 * that it is designed to be called from the triggers
 */
int
attribute_container_add_class_device_adapter(struct attribute_container *cont,
                                             struct device *dev,
                                             struct device *classdev)
{
        return attribute_container_add_class_device(classdev);
}

/**
 * attribute_container_remove_attrs - remove any attribute files
 *
 * @classdev: The class device to remove the files from
 *
 */
void
attribute_container_remove_attrs(struct device *classdev)
{
        struct attribute_container *cont =
                attribute_container_classdev_to_container(classdev);
        struct device_attribute **attrs = cont->attrs;
        int i;

        if (!attrs && !cont->grp)
                return;

        if (cont->grp) {
                sysfs_remove_group(&classdev->kobj, cont->grp);
                return ;
        }

        for (i = 0; attrs[i]; i++)
                device_remove_file(classdev, attrs[i]);
}

/**
 * attribute_container_class_device_del - equivalent of class_device_del
 *
 * @classdev: the class device
 *
 * This function simply removes all the attribute files and then calls
 * device_del.
 */
void
attribute_container_class_device_del(struct device *classdev)
{
        attribute_container_remove_attrs(classdev);
        device_del(classdev);
}

/**
 * attribute_container_find_class_device - find the corresponding class_device
 *
 * @cont:        the container
 * @dev:        the generic device
 *
 * Looks up the device in the container's list of class devices and returns
 * the corresponding class_device.
 */
struct device *
attribute_container_find_class_device(struct attribute_container *cont,
                                      struct device *dev)
{
        struct device *cdev = NULL;
        struct internal_container *ic;
        struct klist_iter iter;

        klist_for_each_entry(ic, &cont->containers, node, &iter) {
                if (ic->classdev.parent == dev) {
                        cdev = &ic->classdev;
                        /* FIXME: must exit iterator then break */
                        klist_iter_exit(&iter);
                        break;
                }
        }

        return cdev;
}
EXPORT_SYMBOL_GPL(attribute_container_find_class_device);






























































































   34 




   33 
















   33 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Derived from arch/ppc/mm/extable.c and arch/i386/mm/extable.c.
 *
 * Copyright (C) 2004 Paul Mackerras, IBM Corp.
 */

#include <linux/bsearch.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sort.h>
#include <linux/uaccess.h>
#include <linux/extable.h>

#ifndef ARCH_HAS_RELATIVE_EXTABLE
#define ex_to_insn(x)        ((x)->insn)
#else
static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
{
        return (unsigned long)&x->insn + x->insn;
}
#endif

#ifndef ARCH_HAS_RELATIVE_EXTABLE
#define swap_ex                NULL
#else
static void swap_ex(void *a, void *b, int size)
{
        struct exception_table_entry *x = a, *y = b, tmp;
        int delta = b - a;

        tmp = *x;
        x->insn = y->insn + delta;
        y->insn = tmp.insn - delta;

#ifdef swap_ex_entry_fixup
        swap_ex_entry_fixup(x, y, tmp, delta);
#else
        x->fixup = y->fixup + delta;
        y->fixup = tmp.fixup - delta;
#endif
}
#endif /* ARCH_HAS_RELATIVE_EXTABLE */

/*
 * The exception table needs to be sorted so that the binary
 * search that we use to find entries in it works properly.
 * This is used both for the kernel exception table and for
 * the exception tables of modules that get loaded.
 */
static int cmp_ex_sort(const void *a, const void *b)
{
        const struct exception_table_entry *x = a, *y = b;

        /* avoid overflow */
        if (ex_to_insn(x) > ex_to_insn(y))
                return 1;
        if (ex_to_insn(x) < ex_to_insn(y))
                return -1;
        return 0;
}

void sort_extable(struct exception_table_entry *start,
                  struct exception_table_entry *finish)
{
        sort(start, finish - start, sizeof(struct exception_table_entry),
             cmp_ex_sort, swap_ex);
}

#ifdef CONFIG_MODULES
/*
 * If the exception table is sorted, any referring to the module init
 * will be at the beginning or the end.
 */
void trim_init_extable(struct module *m)
{
        /*trim the beginning*/
        while (m->num_exentries &&
               within_module_init(ex_to_insn(&m->extable[0]), m)) {
                m->extable++;
                m->num_exentries--;
        }
        /*trim the end*/
        while (m->num_exentries &&
               within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
                                  m))
                m->num_exentries--;
}
#endif /* CONFIG_MODULES */

static int cmp_ex_search(const void *key, const void *elt)
{
        const struct exception_table_entry *_elt = elt;
        unsigned long _key = *(unsigned long *)key;

        /* avoid overflow */
        if (_key > ex_to_insn(_elt))
                return 1;
        if (_key < ex_to_insn(_elt))
                return -1;
        return 0;
}

/*
 * Search one exception table for an entry corresponding to the
 * given instruction address, and return the address of the entry,
 * or NULL if none is found.
 * We use a binary search, and thus we assume that the table is
 * already sorted.
 */
const struct exception_table_entry *
search_extable(const struct exception_table_entry *base,
               const size_t num,
               unsigned long value)
{
        return bsearch(&value, base, num,
                       sizeof(struct exception_table_entry), cmp_ex_search);
}






























  206 
  205 




  207 
  206 








































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_LOCAL_LOCK_H
# error "Do not include directly, include linux/local_lock.h"
#endif

#include <linux/percpu-defs.h>
#include <linux/lockdep.h>

#ifndef CONFIG_PREEMPT_RT

typedef struct {
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map        dep_map;
        struct task_struct        *owner;
#endif
} local_lock_t;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCAL_LOCK_DEBUG_INIT(lockname)                \
        .dep_map = {                                        \
                .name = #lockname,                        \
                .wait_type_inner = LD_WAIT_CONFIG,        \
                .lock_type = LD_LOCK_PERCPU,                \
        },                                                \
        .owner = NULL,

static inline void local_lock_acquire(local_lock_t *l)
{
        lock_map_acquire(&l->dep_map);
        DEBUG_LOCKS_WARN_ON(l->owner);
        l->owner = current;
}

static inline void local_lock_release(local_lock_t *l)
{
        DEBUG_LOCKS_WARN_ON(l->owner != current);
        l->owner = NULL;
        lock_map_release(&l->dep_map);
}

static inline void local_lock_debug_init(local_lock_t *l)
{
        l->owner = NULL;
}
#else /* CONFIG_DEBUG_LOCK_ALLOC */
# define LOCAL_LOCK_DEBUG_INIT(lockname)
static inline void local_lock_acquire(local_lock_t *l) { }
static inline void local_lock_release(local_lock_t *l) { }
static inline void local_lock_debug_init(local_lock_t *l) { }
#endif /* !CONFIG_DEBUG_LOCK_ALLOC */

#define INIT_LOCAL_LOCK(lockname)        { LOCAL_LOCK_DEBUG_INIT(lockname) }

#define __local_lock_init(lock)                                        \
do {                                                                \
        static struct lock_class_key __key;                        \
                                                                \
        debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
        lockdep_init_map_type(&(lock)->dep_map, #lock, &__key,  \
                              0, LD_WAIT_CONFIG, LD_WAIT_INV,        \
                              LD_LOCK_PERCPU);                        \
        local_lock_debug_init(lock);                                \
} while (0)

#define __local_lock(lock)                                        \
        do {                                                        \
                preempt_disable();                                \
                local_lock_acquire(this_cpu_ptr(lock));                \
        } while (0)

#define __local_lock_irq(lock)                                        \
        do {                                                        \
                local_irq_disable();                                \
                local_lock_acquire(this_cpu_ptr(lock));                \
        } while (0)

#define __local_lock_irqsave(lock, flags)                        \
        do {                                                        \
                local_irq_save(flags);                                \
                local_lock_acquire(this_cpu_ptr(lock));                \
        } while (0)

#define __local_unlock(lock)                                        \
        do {                                                        \
                local_lock_release(this_cpu_ptr(lock));                \
                preempt_enable();                                \
        } while (0)

#define __local_unlock_irq(lock)                                \
        do {                                                        \
                local_lock_release(this_cpu_ptr(lock));                \
                local_irq_enable();                                \
        } while (0)

#define __local_unlock_irqrestore(lock, flags)                        \
        do {                                                        \
                local_lock_release(this_cpu_ptr(lock));                \
                local_irq_restore(flags);                        \
        } while (0)

#else /* !CONFIG_PREEMPT_RT */

/*
 * On PREEMPT_RT local_lock maps to a per CPU spinlock, which protects the
 * critical section while staying preemptible.
 */
typedef spinlock_t local_lock_t;

#define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname))

#define __local_lock_init(l)                                        \
        do {                                                        \
                local_spin_lock_init((l));                        \
        } while (0)

#define __local_lock(__lock)                                        \
        do {                                                        \
                migrate_disable();                                \
                spin_lock(this_cpu_ptr((__lock)));                \
        } while (0)

#define __local_lock_irq(lock)                        __local_lock(lock)

#define __local_lock_irqsave(lock, flags)                        \
        do {                                                        \
                typecheck(unsigned long, flags);                \
                flags = 0;                                        \
                __local_lock(lock);                                \
        } while (0)

#define __local_unlock(__lock)                                        \
        do {                                                        \
                spin_unlock(this_cpu_ptr((__lock)));                \
                migrate_enable();                                \
        } while (0)

#define __local_unlock_irq(lock)                __local_unlock(lock)

#define __local_unlock_irqrestore(lock, flags)        __local_unlock(lock)

#endif /* CONFIG_PREEMPT_RT */



















































































































































































    4 
































































































    4 
    4 


    4 
























    4 















































































    4 













    4 






    4 



    4 




































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
 *
 * This file contains spurious interrupt handling.
 */

#include <linux/jiffies.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/moduleparam.h>
#include <linux/timer.h>

#include "internals.h"

static int irqfixup __read_mostly;

#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
static void poll_spurious_irqs(struct timer_list *unused);
static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
static int irq_poll_cpu;
static atomic_t irq_poll_active;

/*
 * We wait here for a poller to finish.
 *
 * If the poll runs on this CPU, then we yell loudly and return
 * false. That will leave the interrupt line disabled in the worst
 * case, but it should never happen.
 *
 * We wait until the poller is done and then recheck disabled and
 * action (about to be disabled). Only if it's still active, we return
 * true and let the handler run.
 */
bool irq_wait_for_poll(struct irq_desc *desc)
        __must_hold(&desc->lock)
{
        if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
                      "irq poll in progress on cpu %d for irq %d\n",
                      smp_processor_id(), desc->irq_data.irq))
                return false;

#ifdef CONFIG_SMP
        do {
                raw_spin_unlock(&desc->lock);
                while (irqd_irq_inprogress(&desc->irq_data))
                        cpu_relax();
                raw_spin_lock(&desc->lock);
        } while (irqd_irq_inprogress(&desc->irq_data));
        /* Might have been disabled in meantime */
        return !irqd_irq_disabled(&desc->irq_data) && desc->action;
#else
        return false;
#endif
}


/*
 * Recovery handler for misrouted interrupts.
 */
static int try_one_irq(struct irq_desc *desc, bool force)
{
        irqreturn_t ret = IRQ_NONE;
        struct irqaction *action;

        raw_spin_lock(&desc->lock);

        /*
         * PER_CPU, nested thread interrupts and interrupts explicitly
         * marked polled are excluded from polling.
         */
        if (irq_settings_is_per_cpu(desc) ||
            irq_settings_is_nested_thread(desc) ||
            irq_settings_is_polled(desc))
                goto out;

        /*
         * Do not poll disabled interrupts unless the spurious
         * disabled poller asks explicitly.
         */
        if (irqd_irq_disabled(&desc->irq_data) && !force)
                goto out;

        /*
         * All handlers must agree on IRQF_SHARED, so we test just the
         * first.
         */
        action = desc->action;
        if (!action || !(action->flags & IRQF_SHARED) ||
            (action->flags & __IRQF_TIMER))
                goto out;

        /* Already running on another processor */
        if (irqd_irq_inprogress(&desc->irq_data)) {
                /*
                 * Already running: If it is shared get the other
                 * CPU to go looking for our mystery interrupt too
                 */
                desc->istate |= IRQS_PENDING;
                goto out;
        }

        /* Mark it poll in progress */
        desc->istate |= IRQS_POLL_INPROGRESS;
        do {
                if (handle_irq_event(desc) == IRQ_HANDLED)
                        ret = IRQ_HANDLED;
                /* Make sure that there is still a valid action */
                action = desc->action;
        } while ((desc->istate & IRQS_PENDING) && action);
        desc->istate &= ~IRQS_POLL_INPROGRESS;
out:
        raw_spin_unlock(&desc->lock);
        return ret == IRQ_HANDLED;
}

static int misrouted_irq(int irq)
{
        struct irq_desc *desc;
        int i, ok = 0;

        if (atomic_inc_return(&irq_poll_active) != 1)
                goto out;

        irq_poll_cpu = smp_processor_id();

        for_each_irq_desc(i, desc) {
                if (!i)
                         continue;

                if (i == irq)        /* Already tried */
                        continue;

                if (try_one_irq(desc, false))
                        ok = 1;
        }
out:
        atomic_dec(&irq_poll_active);
        /* So the caller can adjust the irq error counts */
        return ok;
}

static void poll_spurious_irqs(struct timer_list *unused)
{
        struct irq_desc *desc;
        int i;

        if (atomic_inc_return(&irq_poll_active) != 1)
                goto out;
        irq_poll_cpu = smp_processor_id();

        for_each_irq_desc(i, desc) {
                unsigned int state;

                if (!i)
                         continue;

                /* Racy but it doesn't matter */
                state = desc->istate;
                barrier();
                if (!(state & IRQS_SPURIOUS_DISABLED))
                        continue;

                local_irq_disable();
                try_one_irq(desc, true);
                local_irq_enable();
        }
out:
        atomic_dec(&irq_poll_active);
        mod_timer(&poll_spurious_irq_timer,
                  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}

static inline int bad_action_ret(irqreturn_t action_ret)
{
        unsigned int r = action_ret;

        if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
                return 0;
        return 1;
}

/*
 * If 99,900 of the previous 100,000 interrupts have not been handled
 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
 * and try to turn the IRQ off.
 *
 * (The other 100-of-100,000 interrupts may have been a correctly
 *  functioning device sharing an IRQ with the failing one)
 */
static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
        unsigned int irq = irq_desc_get_irq(desc);
        struct irqaction *action;
        unsigned long flags;

        if (bad_action_ret(action_ret)) {
                printk(KERN_ERR "irq event %d: bogus return value %x\n",
                                irq, action_ret);
        } else {
                printk(KERN_ERR "irq %d: nobody cared (try booting with "
                                "the \"irqpoll\" option)\n", irq);
        }
        dump_stack();
        printk(KERN_ERR "handlers:\n");

        /*
         * We need to take desc->lock here. note_interrupt() is called
         * w/o desc->lock held, but IRQ_PROGRESS set. We might race
         * with something else removing an action. It's ok to take
         * desc->lock here. See synchronize_irq().
         */
        raw_spin_lock_irqsave(&desc->lock, flags);
        for_each_action_of_desc(desc, action) {
                printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler);
                if (action->thread_fn)
                        printk(KERN_CONT " threaded [<%p>] %ps",
                                        action->thread_fn, action->thread_fn);
                printk(KERN_CONT "\n");
        }
        raw_spin_unlock_irqrestore(&desc->lock, flags);
}

static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
{
        static int count = 100;

        if (count > 0) {
                count--;
                __report_bad_irq(desc, action_ret);
        }
}

static inline int
try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
                  irqreturn_t action_ret)
{
        struct irqaction *action;

        if (!irqfixup)
                return 0;

        /* We didn't actually handle the IRQ - see if it was misrouted? */
        if (action_ret == IRQ_NONE)
                return 1;

        /*
         * But for 'irqfixup == 2' we also do it for handled interrupts if
         * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
         * traditional PC timer interrupt.. Legacy)
         */
        if (irqfixup < 2)
                return 0;

        if (!irq)
                return 1;

        /*
         * Since we don't get the descriptor lock, "action" can
         * change under us.  We don't really care, but we don't
         * want to follow a NULL pointer. So tell the compiler to
         * just load it once by using a barrier.
         */
        action = desc->action;
        barrier();
        return action && (action->flags & IRQF_IRQPOLL);
}

#define SPURIOUS_DEFERRED        0x80000000

void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
{
        unsigned int irq;

        if (desc->istate & IRQS_POLL_INPROGRESS ||
            irq_settings_is_polled(desc))
                return;

        if (bad_action_ret(action_ret)) {
                report_bad_irq(desc, action_ret);
                return;
        }

        /*
         * We cannot call note_interrupt from the threaded handler
         * because we need to look at the compound of all handlers
         * (primary and threaded). Aside of that in the threaded
         * shared case we have no serialization against an incoming
         * hardware interrupt while we are dealing with a threaded
         * result.
         *
         * So in case a thread is woken, we just note the fact and
         * defer the analysis to the next hardware interrupt.
         *
         * The threaded handlers store whether they successfully
         * handled an interrupt and we check whether that number
         * changed versus the last invocation.
         *
         * We could handle all interrupts with the delayed by one
         * mechanism, but for the non forced threaded case we'd just
         * add pointless overhead to the straight hardirq interrupts
         * for the sake of a few lines less code.
         */
        if (action_ret & IRQ_WAKE_THREAD) {
                /*
                 * There is a thread woken. Check whether one of the
                 * shared primary handlers returned IRQ_HANDLED. If
                 * not we defer the spurious detection to the next
                 * interrupt.
                 */
                if (action_ret == IRQ_WAKE_THREAD) {
                        int handled;
                        /*
                         * We use bit 31 of thread_handled_last to
                         * denote the deferred spurious detection
                         * active. No locking necessary as
                         * thread_handled_last is only accessed here
                         * and we have the guarantee that hard
                         * interrupts are not reentrant.
                         */
                        if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
                                desc->threads_handled_last |= SPURIOUS_DEFERRED;
                                return;
                        }
                        /*
                         * Check whether one of the threaded handlers
                         * returned IRQ_HANDLED since the last
                         * interrupt happened.
                         *
                         * For simplicity we just set bit 31, as it is
                         * set in threads_handled_last as well. So we
                         * avoid extra masking. And we really do not
                         * care about the high bits of the handled
                         * count. We just care about the count being
                         * different than the one we saw before.
                         */
                        handled = atomic_read(&desc->threads_handled);
                        handled |= SPURIOUS_DEFERRED;
                        if (handled != desc->threads_handled_last) {
                                action_ret = IRQ_HANDLED;
                                /*
                                 * Note: We keep the SPURIOUS_DEFERRED
                                 * bit set. We are handling the
                                 * previous invocation right now.
                                 * Keep it for the current one, so the
                                 * next hardware interrupt will
                                 * account for it.
                                 */
                                desc->threads_handled_last = handled;
                        } else {
                                /*
                                 * None of the threaded handlers felt
                                 * responsible for the last interrupt
                                 *
                                 * We keep the SPURIOUS_DEFERRED bit
                                 * set in threads_handled_last as we
                                 * need to account for the current
                                 * interrupt as well.
                                 */
                                action_ret = IRQ_NONE;
                        }
                } else {
                        /*
                         * One of the primary handlers returned
                         * IRQ_HANDLED. So we don't care about the
                         * threaded handlers on the same line. Clear
                         * the deferred detection bit.
                         *
                         * In theory we could/should check whether the
                         * deferred bit is set and take the result of
                         * the previous run into account here as
                         * well. But it's really not worth the
                         * trouble. If every other interrupt is
                         * handled we never trigger the spurious
                         * detector. And if this is just the one out
                         * of 100k unhandled ones which is handled
                         * then we merily delay the spurious detection
                         * by one hard interrupt. Not a real problem.
                         */
                        desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
                }
        }

        if (unlikely(action_ret == IRQ_NONE)) {
                /*
                 * If we are seeing only the odd spurious IRQ caused by
                 * bus asynchronicity then don't eventually trigger an error,
                 * otherwise the counter becomes a doomsday timer for otherwise
                 * working systems
                 */
                if (time_after(jiffies, desc->last_unhandled + HZ/10))
                        desc->irqs_unhandled = 1;
                else
                        desc->irqs_unhandled++;
                desc->last_unhandled = jiffies;
        }

        irq = irq_desc_get_irq(desc);
        if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
                int ok = misrouted_irq(irq);
                if (action_ret == IRQ_NONE)
                        desc->irqs_unhandled -= ok;
        }

        if (likely(!desc->irqs_unhandled))
                return;

        /* Now getting into unhandled irq detection */
        desc->irq_count++;
        if (likely(desc->irq_count < 100000))
                return;

        desc->irq_count = 0;
        if (unlikely(desc->irqs_unhandled > 99900)) {
                /*
                 * The interrupt is stuck
                 */
                __report_bad_irq(desc, action_ret);
                /*
                 * Now kill the IRQ
                 */
                printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
                desc->istate |= IRQS_SPURIOUS_DISABLED;
                desc->depth++;
                irq_disable(desc);

                mod_timer(&poll_spurious_irq_timer,
                          jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
        }
        desc->irqs_unhandled = 0;
}

bool noirqdebug __read_mostly;

int noirqdebug_setup(char *str)
{
        noirqdebug = 1;
        printk(KERN_INFO "IRQ lockup detection disabled\n");

        return 1;
}

__setup("noirqdebug", noirqdebug_setup);
module_param(noirqdebug, bool, 0644);
MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");

static int __init irqfixup_setup(char *str)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
                pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
                return 1;
        }
        irqfixup = 1;
        printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
        printk(KERN_WARNING "This may impact system performance.\n");

        return 1;
}

__setup("irqfixup", irqfixup_setup);
module_param(irqfixup, int, 0644);

static int __init irqpoll_setup(char *str)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
                pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
                return 1;
        }
        irqfixup = 2;
        printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
                                "enabled\n");
        printk(KERN_WARNING "This may significantly impact system "
                                "performance\n");
        return 1;
}

__setup("irqpoll", irqpoll_setup);






















   87 


   87 








   85 


















   70 






   72 













   68 


   67 
   65 




   68 
   13 


















   47 















   47 











   47 




   47 
   22 

   46 

















   72 
   71 










































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
// SPDX-License-Identifier: GPL-2.0
/*
 * Lockless hierarchical page accounting & limiting
 *
 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
 */

#include <linux/page_counter.h>
#include <linux/atomic.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/bug.h>
#include <asm/page.h>

static void propagate_protected_usage(struct page_counter *c,
                                      unsigned long usage)
{
        unsigned long protected, old_protected;
        long delta;

        if (!c->parent)
                return;

        protected = min(usage, READ_ONCE(c->min));
        old_protected = atomic_long_read(&c->min_usage);
        if (protected != old_protected) {
                old_protected = atomic_long_xchg(&c->min_usage, protected);
                delta = protected - old_protected;
                if (delta)
                        atomic_long_add(delta, &c->parent->children_min_usage);
        }

        protected = min(usage, READ_ONCE(c->low));
        old_protected = atomic_long_read(&c->low_usage);
        if (protected != old_protected) {
                old_protected = atomic_long_xchg(&c->low_usage, protected);
                delta = protected - old_protected;
                if (delta)
                        atomic_long_add(delta, &c->parent->children_low_usage);
        }
}

/**
 * page_counter_cancel - take pages out of the local counter
 * @counter: counter
 * @nr_pages: number of pages to cancel
 */
void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
{
        long new;

        new = atomic_long_sub_return(nr_pages, &counter->usage);
        /* More uncharges than charges? */
        if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
                      new, nr_pages)) {
                new = 0;
                atomic_long_set(&counter->usage, new);
        }
        propagate_protected_usage(counter, new);
}

/**
 * page_counter_charge - hierarchically charge pages
 * @counter: counter
 * @nr_pages: number of pages to charge
 *
 * NOTE: This does not consider any configured counter limits.
 */
void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
{
        struct page_counter *c;

        for (c = counter; c; c = c->parent) {
                long new;

                new = atomic_long_add_return(nr_pages, &c->usage);
                propagate_protected_usage(c, new);
                /*
                 * This is indeed racy, but we can live with some
                 * inaccuracy in the watermark.
                 */
                if (new > READ_ONCE(c->watermark))
                        WRITE_ONCE(c->watermark, new);
        }
}

/**
 * page_counter_try_charge - try to hierarchically charge pages
 * @counter: counter
 * @nr_pages: number of pages to charge
 * @fail: points first counter to hit its limit, if any
 *
 * Returns %true on success, or %false and @fail if the counter or one
 * of its ancestors has hit its configured limit.
 */
bool page_counter_try_charge(struct page_counter *counter,
                             unsigned long nr_pages,
                             struct page_counter **fail)
{
        struct page_counter *c;

        for (c = counter; c; c = c->parent) {
                long new;
                /*
                 * Charge speculatively to avoid an expensive CAS.  If
                 * a bigger charge fails, it might falsely lock out a
                 * racing smaller charge and send it into reclaim
                 * early, but the error is limited to the difference
                 * between the two sizes, which is less than 2M/4M in
                 * case of a THP locking out a regular page charge.
                 *
                 * The atomic_long_add_return() implies a full memory
                 * barrier between incrementing the count and reading
                 * the limit.  When racing with page_counter_set_max(),
                 * we either see the new limit or the setter sees the
                 * counter has changed and retries.
                 */
                new = atomic_long_add_return(nr_pages, &c->usage);
                if (new > c->max) {
                        atomic_long_sub(nr_pages, &c->usage);
                        /*
                         * This is racy, but we can live with some
                         * inaccuracy in the failcnt which is only used
                         * to report stats.
                         */
                        data_race(c->failcnt++);
                        *fail = c;
                        goto failed;
                }
                propagate_protected_usage(c, new);
                /*
                 * Just like with failcnt, we can live with some
                 * inaccuracy in the watermark.
                 */
                if (new > READ_ONCE(c->watermark))
                        WRITE_ONCE(c->watermark, new);
        }
        return true;

failed:
        for (c = counter; c != *fail; c = c->parent)
                page_counter_cancel(c, nr_pages);

        return false;
}

/**
 * page_counter_uncharge - hierarchically uncharge pages
 * @counter: counter
 * @nr_pages: number of pages to uncharge
 */
void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
{
        struct page_counter *c;

        for (c = counter; c; c = c->parent)
                page_counter_cancel(c, nr_pages);
}

/**
 * page_counter_set_max - set the maximum number of pages allowed
 * @counter: counter
 * @nr_pages: limit to set
 *
 * Returns 0 on success, -EBUSY if the current number of pages on the
 * counter already exceeds the specified limit.
 *
 * The caller must serialize invocations on the same counter.
 */
int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
{
        for (;;) {
                unsigned long old;
                long usage;

                /*
                 * Update the limit while making sure that it's not
                 * below the concurrently-changing counter value.
                 *
                 * The xchg implies two full memory barriers before
                 * and after, so the read-swap-read is ordered and
                 * ensures coherency with page_counter_try_charge():
                 * that function modifies the count before checking
                 * the limit, so if it sees the old limit, we see the
                 * modified counter and retry.
                 */
                usage = page_counter_read(counter);

                if (usage > nr_pages)
                        return -EBUSY;

                old = xchg(&counter->max, nr_pages);

                if (page_counter_read(counter) <= usage || nr_pages >= old)
                        return 0;

                counter->max = old;
                cond_resched();
        }
}

/**
 * page_counter_set_min - set the amount of protected memory
 * @counter: counter
 * @nr_pages: value to set
 *
 * The caller must serialize invocations on the same counter.
 */
void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
{
        struct page_counter *c;

        WRITE_ONCE(counter->min, nr_pages);

        for (c = counter; c; c = c->parent)
                propagate_protected_usage(c, atomic_long_read(&c->usage));
}

/**
 * page_counter_set_low - set the amount of protected memory
 * @counter: counter
 * @nr_pages: value to set
 *
 * The caller must serialize invocations on the same counter.
 */
void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
{
        struct page_counter *c;

        WRITE_ONCE(counter->low, nr_pages);

        for (c = counter; c; c = c->parent)
                propagate_protected_usage(c, atomic_long_read(&c->usage));
}

/**
 * page_counter_memparse - memparse() for page counter limits
 * @buf: string to parse
 * @max: string meaning maximum possible value
 * @nr_pages: returns the result in number of pages
 *
 * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
 * limited to %PAGE_COUNTER_MAX.
 */
int page_counter_memparse(const char *buf, const char *max,
                          unsigned long *nr_pages)
{
        char *end;
        u64 bytes;

        if (!strcmp(buf, max)) {
                *nr_pages = PAGE_COUNTER_MAX;
                return 0;
        }

        bytes = memparse(buf, &end);
        if (*end != '\0')
                return -EINVAL;

        *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);

        return 0;
}













































































































































































































   12 
    9 






   10 


    7 
    7 


    7 







    5 
    2 



    5 







































































































































































































































































































































































   12 


   12 








    3 

















    3 

    3 
   12 


   12 

   12 
    3 
















































































































































































































































































































































































































































    3 









    5 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
 * dvbdev.c
 *
 * Copyright (C) 2000 Ralph  Metzler <ralph@convergence.de>
 *                  & Marcus Metzler <marcus@convergence.de>
 *                    for convergence integrated media GmbH
 */

#define pr_fmt(fmt) "dvbdev: " fmt

#include <linux/types.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <media/dvbdev.h>

/* Due to enum tuner_pad_index */
#include <media/tuner.h>

static DEFINE_MUTEX(dvbdev_mutex);
static LIST_HEAD(dvbdevfops_list);
static int dvbdev_debug;

module_param(dvbdev_debug, int, 0644);
MODULE_PARM_DESC(dvbdev_debug, "Turn on/off device debugging (default:off).");

#define dprintk(fmt, arg...) do {                                        \
        if (dvbdev_debug)                                                \
                printk(KERN_DEBUG pr_fmt("%s: " fmt),                        \
                       __func__, ##arg);                                \
} while (0)

static LIST_HEAD(dvb_adapter_list);
static DEFINE_MUTEX(dvbdev_register_lock);

static const char * const dnames[] = {
        [DVB_DEVICE_VIDEO] =                "video",
        [DVB_DEVICE_AUDIO] =                "audio",
        [DVB_DEVICE_SEC] =                "sec",
        [DVB_DEVICE_FRONTEND] =                "frontend",
        [DVB_DEVICE_DEMUX] =                "demux",
        [DVB_DEVICE_DVR] =                "dvr",
        [DVB_DEVICE_CA] =                "ca",
        [DVB_DEVICE_NET] =                "net",
        [DVB_DEVICE_OSD] =                "osd"
};

#ifdef CONFIG_DVB_DYNAMIC_MINORS
#define MAX_DVB_MINORS                256
#define DVB_MAX_IDS                MAX_DVB_MINORS
#else
#define DVB_MAX_IDS                4

static const u8 minor_type[] = {
        [DVB_DEVICE_VIDEO]      = 0,
        [DVB_DEVICE_AUDIO]      = 1,
        [DVB_DEVICE_SEC]        = 2,
        [DVB_DEVICE_FRONTEND]   = 3,
        [DVB_DEVICE_DEMUX]      = 4,
        [DVB_DEVICE_DVR]        = 5,
        [DVB_DEVICE_CA]         = 6,
        [DVB_DEVICE_NET]        = 7,
        [DVB_DEVICE_OSD]        = 8,
};

#define nums2minor(num, type, id) \
        (((num) << 6) | ((id) << 4) | minor_type[type])

#define MAX_DVB_MINORS                (DVB_MAX_ADAPTERS * 64)
#endif

static struct class *dvb_class;

static struct dvb_device *dvb_minors[MAX_DVB_MINORS];
static DECLARE_RWSEM(minor_rwsem);

static int dvb_device_open(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev;

        mutex_lock(&dvbdev_mutex);
        down_read(&minor_rwsem);
        dvbdev = dvb_minors[iminor(inode)];

        if (dvbdev && dvbdev->fops) {
                int err = 0;
                const struct file_operations *new_fops;

                new_fops = fops_get(dvbdev->fops);
                if (!new_fops)
                        goto fail;
                file->private_data = dvb_device_get(dvbdev);
                replace_fops(file, new_fops);
                if (file->f_op->open)
                        err = file->f_op->open(inode, file);
                up_read(&minor_rwsem);
                mutex_unlock(&dvbdev_mutex);
                if (err)
                        dvb_device_put(dvbdev);
                return err;
        }
fail:
        up_read(&minor_rwsem);
        mutex_unlock(&dvbdev_mutex);
        return -ENODEV;
}

static const struct file_operations dvb_device_fops = {
        .owner =        THIS_MODULE,
        .open =                dvb_device_open,
        .llseek =        noop_llseek,
};

static struct cdev dvb_device_cdev;

int dvb_generic_open(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;

        if (!dvbdev)
                return -ENODEV;

        if (!dvbdev->users)
                return -EBUSY;

        if ((file->f_flags & O_ACCMODE) == O_RDONLY) {
                if (!dvbdev->readers)
                        return -EBUSY;
                dvbdev->readers--;
        } else {
                if (!dvbdev->writers)
                        return -EBUSY;
                dvbdev->writers--;
        }

        dvbdev->users--;
        return 0;
}
EXPORT_SYMBOL(dvb_generic_open);

int dvb_generic_release(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;

        if (!dvbdev)
                return -ENODEV;

        if ((file->f_flags & O_ACCMODE) == O_RDONLY)
                dvbdev->readers++;
        else
                dvbdev->writers++;

        dvbdev->users++;

        dvb_device_put(dvbdev);

        return 0;
}
EXPORT_SYMBOL(dvb_generic_release);

long dvb_generic_ioctl(struct file *file,
                       unsigned int cmd, unsigned long arg)
{
        struct dvb_device *dvbdev = file->private_data;

        if (!dvbdev)
                return -ENODEV;

        if (!dvbdev->kernel_ioctl)
                return -EINVAL;

        return dvb_usercopy(file, cmd, arg, dvbdev->kernel_ioctl);
}
EXPORT_SYMBOL(dvb_generic_ioctl);

static int dvbdev_get_free_id(struct dvb_adapter *adap, int type)
{
        u32 id = 0;

        while (id < DVB_MAX_IDS) {
                struct dvb_device *dev;

                list_for_each_entry(dev, &adap->device_list, list_head)
                        if (dev->type == type && dev->id == id)
                                goto skip;
                return id;
skip:
                id++;
        }
        return -ENFILE;
}

static void dvb_media_device_free(struct dvb_device *dvbdev)
{
#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
        if (dvbdev->entity) {
                media_device_unregister_entity(dvbdev->entity);
                kfree(dvbdev->entity);
                kfree(dvbdev->pads);
                dvbdev->entity = NULL;
                dvbdev->pads = NULL;
        }

        if (dvbdev->tsout_entity) {
                int i;

                for (i = 0; i < dvbdev->tsout_num_entities; i++) {
                        media_device_unregister_entity(&dvbdev->tsout_entity[i]);
                        kfree(dvbdev->tsout_entity[i].name);
                }
                kfree(dvbdev->tsout_entity);
                kfree(dvbdev->tsout_pads);
                dvbdev->tsout_entity = NULL;
                dvbdev->tsout_pads = NULL;

                dvbdev->tsout_num_entities = 0;
        }

        if (dvbdev->intf_devnode) {
                media_devnode_remove(dvbdev->intf_devnode);
                dvbdev->intf_devnode = NULL;
        }

        if (dvbdev->adapter->conn) {
                media_device_unregister_entity(dvbdev->adapter->conn);
                kfree(dvbdev->adapter->conn);
                dvbdev->adapter->conn = NULL;
                kfree(dvbdev->adapter->conn_pads);
                dvbdev->adapter->conn_pads = NULL;
        }
#endif
}

#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
static int dvb_create_tsout_entity(struct dvb_device *dvbdev,
                                   const char *name, int npads)
{
        int i;

        dvbdev->tsout_pads = kcalloc(npads, sizeof(*dvbdev->tsout_pads),
                                     GFP_KERNEL);
        if (!dvbdev->tsout_pads)
                return -ENOMEM;

        dvbdev->tsout_entity = kcalloc(npads, sizeof(*dvbdev->tsout_entity),
                                       GFP_KERNEL);
        if (!dvbdev->tsout_entity)
                return -ENOMEM;

        dvbdev->tsout_num_entities = npads;

        for (i = 0; i < npads; i++) {
                struct media_pad *pads = &dvbdev->tsout_pads[i];
                struct media_entity *entity = &dvbdev->tsout_entity[i];
                int ret;

                entity->name = kasprintf(GFP_KERNEL, "%s #%d", name, i);
                if (!entity->name)
                        return -ENOMEM;

                entity->function = MEDIA_ENT_F_IO_DTV;
                pads->flags = MEDIA_PAD_FL_SINK;

                ret = media_entity_pads_init(entity, 1, pads);
                if (ret < 0)
                        return ret;

                ret = media_device_register_entity(dvbdev->adapter->mdev,
                                                   entity);
                if (ret < 0)
                        return ret;
        }
        return 0;
}

#define DEMUX_TSOUT        "demux-tsout"
#define DVR_TSOUT        "dvr-tsout"

static int dvb_create_media_entity(struct dvb_device *dvbdev,
                                   int type, int demux_sink_pads)
{
        int i, ret, npads;

        switch (type) {
        case DVB_DEVICE_FRONTEND:
                npads = 2;
                break;
        case DVB_DEVICE_DVR:
                ret = dvb_create_tsout_entity(dvbdev, DVR_TSOUT,
                                              demux_sink_pads);
                return ret;
        case DVB_DEVICE_DEMUX:
                npads = 1 + demux_sink_pads;
                ret = dvb_create_tsout_entity(dvbdev, DEMUX_TSOUT,
                                              demux_sink_pads);
                if (ret < 0)
                        return ret;
                break;
        case DVB_DEVICE_CA:
                npads = 2;
                break;
        case DVB_DEVICE_NET:
                /*
                 * We should be creating entities for the MPE/ULE
                 * decapsulation hardware (or software implementation).
                 *
                 * However, the number of for the MPE/ULE decaps may not be
                 * fixed. As we don't have yet dynamic support for PADs at
                 * the Media Controller, let's not create the decap
                 * entities yet.
                 */
                return 0;
        default:
                return 0;
        }

        dvbdev->entity = kzalloc(sizeof(*dvbdev->entity), GFP_KERNEL);
        if (!dvbdev->entity)
                return -ENOMEM;

        dvbdev->entity->name = dvbdev->name;

        if (npads) {
                dvbdev->pads = kcalloc(npads, sizeof(*dvbdev->pads),
                                       GFP_KERNEL);
                if (!dvbdev->pads) {
                        kfree(dvbdev->entity);
                        dvbdev->entity = NULL;
                        return -ENOMEM;
                }
        }

        switch (type) {
        case DVB_DEVICE_FRONTEND:
                dvbdev->entity->function = MEDIA_ENT_F_DTV_DEMOD;
                dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK;
                dvbdev->pads[1].flags = MEDIA_PAD_FL_SOURCE;
                break;
        case DVB_DEVICE_DEMUX:
                dvbdev->entity->function = MEDIA_ENT_F_TS_DEMUX;
                dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK;
                for (i = 1; i < npads; i++)
                        dvbdev->pads[i].flags = MEDIA_PAD_FL_SOURCE;
                break;
        case DVB_DEVICE_CA:
                dvbdev->entity->function = MEDIA_ENT_F_DTV_CA;
                dvbdev->pads[0].flags = MEDIA_PAD_FL_SINK;
                dvbdev->pads[1].flags = MEDIA_PAD_FL_SOURCE;
                break;
        default:
                /* Should never happen, as the first switch prevents it */
                kfree(dvbdev->entity);
                kfree(dvbdev->pads);
                dvbdev->entity = NULL;
                dvbdev->pads = NULL;
                return 0;
        }

        if (npads) {
                ret = media_entity_pads_init(dvbdev->entity, npads, dvbdev->pads);
                if (ret)
                        return ret;
        }
        ret = media_device_register_entity(dvbdev->adapter->mdev,
                                           dvbdev->entity);
        if (ret)
                return ret;

        pr_info("%s: media entity '%s' registered.\n",
                __func__, dvbdev->entity->name);

        return 0;
}
#endif

static int dvb_register_media_device(struct dvb_device *dvbdev,
                                     int type, int minor,
                                     unsigned int demux_sink_pads)
{
#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
        struct media_link *link;
        u32 intf_type;
        int ret;

        if (!dvbdev->adapter->mdev)
                return 0;

        ret = dvb_create_media_entity(dvbdev, type, demux_sink_pads);
        if (ret)
                return ret;

        switch (type) {
        case DVB_DEVICE_FRONTEND:
                intf_type = MEDIA_INTF_T_DVB_FE;
                break;
        case DVB_DEVICE_DEMUX:
                intf_type = MEDIA_INTF_T_DVB_DEMUX;
                break;
        case DVB_DEVICE_DVR:
                intf_type = MEDIA_INTF_T_DVB_DVR;
                break;
        case DVB_DEVICE_CA:
                intf_type = MEDIA_INTF_T_DVB_CA;
                break;
        case DVB_DEVICE_NET:
                intf_type = MEDIA_INTF_T_DVB_NET;
                break;
        default:
                return 0;
        }

        dvbdev->intf_devnode = media_devnode_create(dvbdev->adapter->mdev,
                                                    intf_type, 0,
                                                    DVB_MAJOR, minor);

        if (!dvbdev->intf_devnode)
                return -ENOMEM;

        /*
         * Create the "obvious" link, e. g. the ones that represent
         * a direct association between an interface and an entity.
         * Other links should be created elsewhere, like:
         *                DVB FE intf    -> tuner
         *                DVB demux intf -> dvr
         */

        if (!dvbdev->entity)
                return 0;

        link = media_create_intf_link(dvbdev->entity,
                                      &dvbdev->intf_devnode->intf,
                                      MEDIA_LNK_FL_ENABLED |
                                      MEDIA_LNK_FL_IMMUTABLE);
        if (!link)
                return -ENOMEM;
#endif
        return 0;
}

int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                        const struct dvb_device *template, void *priv,
                        enum dvb_device_type type, int demux_sink_pads)
{
        struct dvb_device *dvbdev;
        struct file_operations *dvbdevfops = NULL;
        struct dvbdevfops_node *node = NULL, *new_node = NULL;
        struct device *clsdev;
        int minor;
        int id, ret;

        mutex_lock(&dvbdev_register_lock);

        id = dvbdev_get_free_id(adap, type);
        if (id < 0) {
                mutex_unlock(&dvbdev_register_lock);
                *pdvbdev = NULL;
                pr_err("%s: couldn't find free device id\n", __func__);
                return -ENFILE;
        }

        *pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL);
        if (!dvbdev) {
                mutex_unlock(&dvbdev_register_lock);
                return -ENOMEM;
        }

        /*
         * When a device of the same type is probe()d more than once,
         * the first allocated fops are used. This prevents memory leaks
         * that can occur when the same device is probe()d repeatedly.
         */
        list_for_each_entry(node, &dvbdevfops_list, list_head) {
                if (node->fops->owner == adap->module &&
                    node->type == type && node->template == template) {
                        dvbdevfops = node->fops;
                        break;
                }
        }

        if (!dvbdevfops) {
                dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
                if (!dvbdevfops) {
                        kfree(dvbdev);
                        *pdvbdev = NULL;
                        mutex_unlock(&dvbdev_register_lock);
                        return -ENOMEM;
                }

                new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
                if (!new_node) {
                        kfree(dvbdevfops);
                        kfree(dvbdev);
                        *pdvbdev = NULL;
                        mutex_unlock(&dvbdev_register_lock);
                        return -ENOMEM;
                }

                new_node->fops = dvbdevfops;
                new_node->type = type;
                new_node->template = template;
                list_add_tail(&new_node->list_head, &dvbdevfops_list);
        }

        memcpy(dvbdev, template, sizeof(struct dvb_device));
        kref_init(&dvbdev->ref);
        dvbdev->type = type;
        dvbdev->id = id;
        dvbdev->adapter = adap;
        dvbdev->priv = priv;
        dvbdev->fops = dvbdevfops;
        init_waitqueue_head(&dvbdev->wait_queue);
        dvbdevfops->owner = adap->module;
        list_add_tail(&dvbdev->list_head, &adap->device_list);
        down_write(&minor_rwsem);
#ifdef CONFIG_DVB_DYNAMIC_MINORS
        for (minor = 0; minor < MAX_DVB_MINORS; minor++)
                if (!dvb_minors[minor])
                        break;
        if (minor == MAX_DVB_MINORS) {
                if (new_node) {
                        list_del(&new_node->list_head);
                        kfree(dvbdevfops);
                        kfree(new_node);
                }
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
                *pdvbdev = NULL;
                up_write(&minor_rwsem);
                mutex_unlock(&dvbdev_register_lock);
                return -EINVAL;
        }
#else
        minor = nums2minor(adap->num, type, id);
#endif
        dvbdev->minor = minor;
        dvb_minors[minor] = dvb_device_get(dvbdev);
        up_write(&minor_rwsem);
        ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads);
        if (ret) {
                pr_err("%s: dvb_register_media_device failed to create the mediagraph\n",
                       __func__);
                if (new_node) {
                        list_del(&new_node->list_head);
                        kfree(dvbdevfops);
                        kfree(new_node);
                }
                dvb_media_device_free(dvbdev);
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
                *pdvbdev = NULL;
                mutex_unlock(&dvbdev_register_lock);
                return ret;
        }

        clsdev = device_create(dvb_class, adap->device,
                               MKDEV(DVB_MAJOR, minor),
                               dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id);
        if (IS_ERR(clsdev)) {
                pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n",
                       __func__, adap->num, dnames[type], id, PTR_ERR(clsdev));
                if (new_node) {
                        list_del(&new_node->list_head);
                        kfree(dvbdevfops);
                        kfree(new_node);
                }
                dvb_media_device_free(dvbdev);
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
                *pdvbdev = NULL;
                mutex_unlock(&dvbdev_register_lock);
                return PTR_ERR(clsdev);
        }

        dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n",
                adap->num, dnames[type], id, minor, minor);

        mutex_unlock(&dvbdev_register_lock);
        return 0;
}
EXPORT_SYMBOL(dvb_register_device);

void dvb_remove_device(struct dvb_device *dvbdev)
{
        if (!dvbdev)
                return;

        down_write(&minor_rwsem);
        dvb_minors[dvbdev->minor] = NULL;
        dvb_device_put(dvbdev);
        up_write(&minor_rwsem);

        dvb_media_device_free(dvbdev);

        device_destroy(dvb_class, MKDEV(DVB_MAJOR, dvbdev->minor));

        list_del(&dvbdev->list_head);
}
EXPORT_SYMBOL(dvb_remove_device);

static void dvb_free_device(struct kref *ref)
{
        struct dvb_device *dvbdev = container_of(ref, struct dvb_device, ref);

        kfree(dvbdev);
}

struct dvb_device *dvb_device_get(struct dvb_device *dvbdev)
{
        kref_get(&dvbdev->ref);
        return dvbdev;
}
EXPORT_SYMBOL(dvb_device_get);

void dvb_device_put(struct dvb_device *dvbdev)
{
        if (dvbdev)
                kref_put(&dvbdev->ref, dvb_free_device);
}

void dvb_unregister_device(struct dvb_device *dvbdev)
{
        dvb_remove_device(dvbdev);
        dvb_device_put(dvbdev);
}
EXPORT_SYMBOL(dvb_unregister_device);

#ifdef CONFIG_MEDIA_CONTROLLER_DVB

static int dvb_create_io_intf_links(struct dvb_adapter *adap,
                                    struct media_interface *intf,
                                    char *name)
{
        struct media_device *mdev = adap->mdev;
        struct media_entity *entity;
        struct media_link *link;

        media_device_for_each_entity(entity, mdev) {
                if (entity->function == MEDIA_ENT_F_IO_DTV) {
                        if (strncmp(entity->name, name, strlen(name)))
                                continue;
                        link = media_create_intf_link(entity, intf,
                                                      MEDIA_LNK_FL_ENABLED |
                                                      MEDIA_LNK_FL_IMMUTABLE);
                        if (!link)
                                return -ENOMEM;
                }
        }
        return 0;
}

int dvb_create_media_graph(struct dvb_adapter *adap,
                           bool create_rf_connector)
{
        struct media_device *mdev = adap->mdev;
        struct media_entity *entity, *tuner = NULL, *demod = NULL, *conn;
        struct media_entity *demux = NULL, *ca = NULL;
        struct media_link *link;
        struct media_interface *intf;
        unsigned int demux_pad = 0;
        unsigned int dvr_pad = 0;
        unsigned int ntuner = 0, ndemod = 0;
        int ret, pad_source, pad_sink;
        static const char *connector_name = "Television";

        if (!mdev)
                return 0;

        media_device_for_each_entity(entity, mdev) {
                switch (entity->function) {
                case MEDIA_ENT_F_TUNER:
                        tuner = entity;
                        ntuner++;
                        break;
                case MEDIA_ENT_F_DTV_DEMOD:
                        demod = entity;
                        ndemod++;
                        break;
                case MEDIA_ENT_F_TS_DEMUX:
                        demux = entity;
                        break;
                case MEDIA_ENT_F_DTV_CA:
                        ca = entity;
                        break;
                }
        }

        /*
         * Prepare to signalize to media_create_pad_links() that multiple
         * entities of the same type exists and a 1:n or n:1 links need to be
         * created.
         * NOTE: if both tuner and demod have multiple instances, it is up
         * to the caller driver to create such links.
         */
        if (ntuner > 1)
                tuner = NULL;
        if (ndemod > 1)
                demod = NULL;

        if (create_rf_connector) {
                conn = kzalloc(sizeof(*conn), GFP_KERNEL);
                if (!conn)
                        return -ENOMEM;
                adap->conn = conn;

                adap->conn_pads = kzalloc(sizeof(*adap->conn_pads), GFP_KERNEL);
                if (!adap->conn_pads)
                        return -ENOMEM;

                conn->flags = MEDIA_ENT_FL_CONNECTOR;
                conn->function = MEDIA_ENT_F_CONN_RF;
                conn->name = connector_name;
                adap->conn_pads->flags = MEDIA_PAD_FL_SOURCE;

                ret = media_entity_pads_init(conn, 1, adap->conn_pads);
                if (ret)
                        return ret;

                ret = media_device_register_entity(mdev, conn);
                if (ret)
                        return ret;

                if (!ntuner) {
                        ret = media_create_pad_links(mdev,
                                                     MEDIA_ENT_F_CONN_RF,
                                                     conn, 0,
                                                     MEDIA_ENT_F_DTV_DEMOD,
                                                     demod, 0,
                                                     MEDIA_LNK_FL_ENABLED,
                                                     false);
                } else {
                        pad_sink = media_get_pad_index(tuner, MEDIA_PAD_FL_SINK,
                                                       PAD_SIGNAL_ANALOG);
                        if (pad_sink < 0)
                                return -EINVAL;
                        ret = media_create_pad_links(mdev,
                                                     MEDIA_ENT_F_CONN_RF,
                                                     conn, 0,
                                                     MEDIA_ENT_F_TUNER,
                                                     tuner, pad_sink,
                                                     MEDIA_LNK_FL_ENABLED,
                                                     false);
                }
                if (ret)
                        return ret;
        }

        if (ntuner && ndemod) {
                /* NOTE: first found tuner source pad presumed correct */
                pad_source = media_get_pad_index(tuner, MEDIA_PAD_FL_SOURCE,
                                                 PAD_SIGNAL_ANALOG);
                if (pad_source < 0)
                        return -EINVAL;
                ret = media_create_pad_links(mdev,
                                             MEDIA_ENT_F_TUNER,
                                             tuner, pad_source,
                                             MEDIA_ENT_F_DTV_DEMOD,
                                             demod, 0, MEDIA_LNK_FL_ENABLED,
                                             false);
                if (ret)
                        return ret;
        }

        if (ndemod && demux) {
                ret = media_create_pad_links(mdev,
                                             MEDIA_ENT_F_DTV_DEMOD,
                                             demod, 1,
                                             MEDIA_ENT_F_TS_DEMUX,
                                             demux, 0, MEDIA_LNK_FL_ENABLED,
                                             false);
                if (ret)
                        return ret;
        }
        if (demux && ca) {
                ret = media_create_pad_link(demux, 1, ca,
                                            0, MEDIA_LNK_FL_ENABLED);
                if (ret)
                        return ret;
        }

        /* Create demux links for each ringbuffer/pad */
        if (demux) {
                media_device_for_each_entity(entity, mdev) {
                        if (entity->function == MEDIA_ENT_F_IO_DTV) {
                                if (!strncmp(entity->name, DVR_TSOUT,
                                             strlen(DVR_TSOUT))) {
                                        ret = media_create_pad_link(demux,
                                                                    ++dvr_pad,
                                                                    entity, 0, 0);
                                        if (ret)
                                                return ret;
                                }
                                if (!strncmp(entity->name, DEMUX_TSOUT,
                                             strlen(DEMUX_TSOUT))) {
                                        ret = media_create_pad_link(demux,
                                                                    ++demux_pad,
                                                                    entity, 0, 0);
                                        if (ret)
                                                return ret;
                                }
                        }
                }
        }

        /* Create interface links for FE->tuner, DVR->demux and CA->ca */
        media_device_for_each_intf(intf, mdev) {
                if (intf->type == MEDIA_INTF_T_DVB_CA && ca) {
                        link = media_create_intf_link(ca, intf,
                                                      MEDIA_LNK_FL_ENABLED |
                                                      MEDIA_LNK_FL_IMMUTABLE);
                        if (!link)
                                return -ENOMEM;
                }

                if (intf->type == MEDIA_INTF_T_DVB_FE && tuner) {
                        link = media_create_intf_link(tuner, intf,
                                                      MEDIA_LNK_FL_ENABLED |
                                                      MEDIA_LNK_FL_IMMUTABLE);
                        if (!link)
                                return -ENOMEM;
                }
#if 0
                /*
                 * Indirect link - let's not create yet, as we don't know how
                 *                   to handle indirect links, nor if this will
                 *                   actually be needed.
                 */
                if (intf->type == MEDIA_INTF_T_DVB_DVR && demux) {
                        link = media_create_intf_link(demux, intf,
                                                      MEDIA_LNK_FL_ENABLED |
                                                      MEDIA_LNK_FL_IMMUTABLE);
                        if (!link)
                                return -ENOMEM;
                }
#endif
                if (intf->type == MEDIA_INTF_T_DVB_DVR) {
                        ret = dvb_create_io_intf_links(adap, intf, DVR_TSOUT);
                        if (ret)
                                return ret;
                }
                if (intf->type == MEDIA_INTF_T_DVB_DEMUX) {
                        ret = dvb_create_io_intf_links(adap, intf, DEMUX_TSOUT);
                        if (ret)
                                return ret;
                }
        }
        return 0;
}
EXPORT_SYMBOL_GPL(dvb_create_media_graph);
#endif

static int dvbdev_check_free_adapter_num(int num)
{
        struct list_head *entry;

        list_for_each(entry, &dvb_adapter_list) {
                struct dvb_adapter *adap;

                adap = list_entry(entry, struct dvb_adapter, list_head);
                if (adap->num == num)
                        return 0;
        }
        return 1;
}

static int dvbdev_get_free_adapter_num(void)
{
        int num = 0;

        while (num < DVB_MAX_ADAPTERS) {
                if (dvbdev_check_free_adapter_num(num))
                        return num;
                num++;
        }

        return -ENFILE;
}

int dvb_register_adapter(struct dvb_adapter *adap, const char *name,
                         struct module *module, struct device *device,
                         short *adapter_nums)
{
        int i, num;

        mutex_lock(&dvbdev_register_lock);

        for (i = 0; i < DVB_MAX_ADAPTERS; ++i) {
                num = adapter_nums[i];
                if (num >= 0  &&  num < DVB_MAX_ADAPTERS) {
                /* use the one the driver asked for */
                        if (dvbdev_check_free_adapter_num(num))
                                break;
                } else {
                        num = dvbdev_get_free_adapter_num();
                        break;
                }
                num = -1;
        }

        if (num < 0) {
                mutex_unlock(&dvbdev_register_lock);
                return -ENFILE;
        }

        memset(adap, 0, sizeof(struct dvb_adapter));
        INIT_LIST_HEAD(&adap->device_list);

        pr_info("DVB: registering new adapter (%s)\n", name);

        adap->num = num;
        adap->name = name;
        adap->module = module;
        adap->device = device;
        adap->mfe_shared = 0;
        adap->mfe_dvbdev = NULL;
        mutex_init(&adap->mfe_lock);

#ifdef CONFIG_MEDIA_CONTROLLER_DVB
        mutex_init(&adap->mdev_lock);
#endif

        list_add_tail(&adap->list_head, &dvb_adapter_list);

        mutex_unlock(&dvbdev_register_lock);

        return num;
}
EXPORT_SYMBOL(dvb_register_adapter);

int dvb_unregister_adapter(struct dvb_adapter *adap)
{
        mutex_lock(&dvbdev_register_lock);
        list_del(&adap->list_head);
        mutex_unlock(&dvbdev_register_lock);
        return 0;
}
EXPORT_SYMBOL(dvb_unregister_adapter);

/*
 * if the miracle happens and "generic_usercopy()" is included into
 * the kernel, then this can vanish. please don't make the mistake and
 * define this as video_usercopy(). this will introduce a dependency
 * to the v4l "videodev.o" module, which is unnecessary for some
 * cards (ie. the budget dvb-cards don't need the v4l module...)
 */
int dvb_usercopy(struct file *file,
                 unsigned int cmd, unsigned long arg,
                 int (*func)(struct file *file,
                             unsigned int cmd, void *arg))
{
        char    sbuf[128];
        void    *mbuf = NULL;
        void    *parg = NULL;
        int     err  = -EINVAL;

        /*  Copy arguments into temp kernel buffer  */
        switch (_IOC_DIR(cmd)) {
        case _IOC_NONE:
                /*
                 * For this command, the pointer is actually an integer
                 * argument.
                 */
                parg = (void *)arg;
                break;
        case _IOC_READ: /* some v4l ioctls are marked wrong ... */
        case _IOC_WRITE:
        case (_IOC_WRITE | _IOC_READ):
                if (_IOC_SIZE(cmd) <= sizeof(sbuf)) {
                        parg = sbuf;
                } else {
                        /* too big to allocate from stack */
                        mbuf = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL);
                        if (!mbuf)
                                return -ENOMEM;
                        parg = mbuf;
                }

                err = -EFAULT;
                if (copy_from_user(parg, (void __user *)arg, _IOC_SIZE(cmd)))
                        goto out;
                break;
        }

        /* call driver */
        err = func(file, cmd, parg);
        if (err == -ENOIOCTLCMD)
                err = -ENOTTY;

        if (err < 0)
                goto out;

        /*  Copy results into user buffer  */
        switch (_IOC_DIR(cmd)) {
        case _IOC_READ:
        case (_IOC_WRITE | _IOC_READ):
                if (copy_to_user((void __user *)arg, parg, _IOC_SIZE(cmd)))
                        err = -EFAULT;
                break;
        }

out:
        kfree(mbuf);
        return err;
}

#if IS_ENABLED(CONFIG_I2C)
struct i2c_client *dvb_module_probe(const char *module_name,
                                    const char *name,
                                    struct i2c_adapter *adap,
                                    unsigned char addr,
                                    void *platform_data)
{
        struct i2c_client *client;
        struct i2c_board_info *board_info;

        board_info = kzalloc(sizeof(*board_info), GFP_KERNEL);
        if (!board_info)
                return NULL;

        if (name)
                strscpy(board_info->type, name, I2C_NAME_SIZE);
        else
                strscpy(board_info->type, module_name, I2C_NAME_SIZE);

        board_info->addr = addr;
        board_info->platform_data = platform_data;
        request_module(module_name);
        client = i2c_new_client_device(adap, board_info);
        if (!i2c_client_has_driver(client)) {
                kfree(board_info);
                return NULL;
        }

        if (!try_module_get(client->dev.driver->owner)) {
                i2c_unregister_device(client);
                client = NULL;
        }

        kfree(board_info);
        return client;
}
EXPORT_SYMBOL_GPL(dvb_module_probe);

void dvb_module_release(struct i2c_client *client)
{
        if (!client)
                return;

        module_put(client->dev.driver->owner);
        i2c_unregister_device(client);
}
EXPORT_SYMBOL_GPL(dvb_module_release);
#endif

static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct dvb_device *dvbdev = dev_get_drvdata(dev);

        add_uevent_var(env, "DVB_ADAPTER_NUM=%d", dvbdev->adapter->num);
        add_uevent_var(env, "DVB_DEVICE_TYPE=%s", dnames[dvbdev->type]);
        add_uevent_var(env, "DVB_DEVICE_NUM=%d", dvbdev->id);
        return 0;
}

static char *dvb_devnode(const struct device *dev, umode_t *mode)
{
        const struct dvb_device *dvbdev = dev_get_drvdata(dev);

        return kasprintf(GFP_KERNEL, "dvb/adapter%d/%s%d",
                dvbdev->adapter->num, dnames[dvbdev->type], dvbdev->id);
}

static int __init init_dvbdev(void)
{
        int retval;
        dev_t dev = MKDEV(DVB_MAJOR, 0);

        retval = register_chrdev_region(dev, MAX_DVB_MINORS, "DVB");
        if (retval != 0) {
                pr_err("dvb-core: unable to get major %d\n", DVB_MAJOR);
                return retval;
        }

        cdev_init(&dvb_device_cdev, &dvb_device_fops);
        retval = cdev_add(&dvb_device_cdev, dev, MAX_DVB_MINORS);
        if (retval != 0) {
                pr_err("dvb-core: unable register character device\n");
                goto error;
        }

        dvb_class = class_create("dvb");
        if (IS_ERR(dvb_class)) {
                retval = PTR_ERR(dvb_class);
                goto error;
        }
        dvb_class->dev_uevent = dvb_uevent;
        dvb_class->devnode = dvb_devnode;
        return 0;

error:
        cdev_del(&dvb_device_cdev);
        unregister_chrdev_region(dev, MAX_DVB_MINORS);
        return retval;
}

static void __exit exit_dvbdev(void)
{
        struct dvbdevfops_node *node, *next;

        class_destroy(dvb_class);
        cdev_del(&dvb_device_cdev);
        unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS);

        list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) {
                list_del(&node->list_head);
                kfree(node->fops);
                kfree(node);
        }
}

subsys_initcall(init_dvbdev);
module_exit(exit_dvbdev);

MODULE_DESCRIPTION("DVB Core Driver");
MODULE_AUTHOR("Marcus Metzler, Ralph Metzler, Holger Waechtler");
MODULE_LICENSE("GPL");




























































































































































































































































































































































































































































































    2 

    2 








    2 



    2 


    2 



    2 
    2 







    2 

































    2 




    2 


    2 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  ALSA sequencer Memory Manager
 *  Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl>
 *                        Jaroslav Kysela <perex@perex.cz>
 *                2000 by Takashi Iwai <tiwai@suse.de>
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/mm.h>
#include <sound/core.h>

#include <sound/seq_kernel.h>
#include "seq_memory.h"
#include "seq_queue.h"
#include "seq_info.h"
#include "seq_lock.h"

static inline int snd_seq_pool_available(struct snd_seq_pool *pool)
{
        return pool->total_elements - atomic_read(&pool->counter);
}

static inline int snd_seq_output_ok(struct snd_seq_pool *pool)
{
        return snd_seq_pool_available(pool) >= pool->room;
}

/*
 * Variable length event:
 * The event like sysex uses variable length type.
 * The external data may be stored in three different formats.
 * 1) kernel space
 *    This is the normal case.
 *      ext.data.len = length
 *      ext.data.ptr = buffer pointer
 * 2) user space
 *    When an event is generated via read(), the external data is
 *    kept in user space until expanded.
 *      ext.data.len = length | SNDRV_SEQ_EXT_USRPTR
 *      ext.data.ptr = userspace pointer
 * 3) chained cells
 *    When the variable length event is enqueued (in prioq or fifo),
 *    the external data is decomposed to several cells.
 *      ext.data.len = length | SNDRV_SEQ_EXT_CHAINED
 *      ext.data.ptr = the additiona cell head
 *         -> cell.next -> cell.next -> ..
 */

/*
 * exported:
 * call dump function to expand external data.
 */

static int get_var_len(const struct snd_seq_event *event)
{
        if ((event->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE)
                return -EINVAL;

        return event->data.ext.len & ~SNDRV_SEQ_EXT_MASK;
}

static int dump_var_event(const struct snd_seq_event *event,
                          snd_seq_dump_func_t func, void *private_data,
                          int offset, int maxlen)
{
        int len, err;
        struct snd_seq_event_cell *cell;

        len = get_var_len(event);
        if (len <= 0)
                return len;
        if (len <= offset)
                return 0;
        if (maxlen && len > offset + maxlen)
                len = offset + maxlen;

        if (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR) {
                char buf[32];
                char __user *curptr = (char __force __user *)event->data.ext.ptr;
                curptr += offset;
                len -= offset;
                while (len > 0) {
                        int size = sizeof(buf);
                        if (len < size)
                                size = len;
                        if (copy_from_user(buf, curptr, size))
                                return -EFAULT;
                        err = func(private_data, buf, size);
                        if (err < 0)
                                return err;
                        curptr += size;
                        len -= size;
                }
                return 0;
        }
        if (!(event->data.ext.len & SNDRV_SEQ_EXT_CHAINED))
                return func(private_data, event->data.ext.ptr + offset,
                            len - offset);

        cell = (struct snd_seq_event_cell *)event->data.ext.ptr;
        for (; len > 0 && cell; cell = cell->next) {
                int size = sizeof(struct snd_seq_event);
                char *curptr = (char *)&cell->event;

                if (offset >= size) {
                        offset -= size;
                        len -= size;
                        continue;
                }
                if (len < size)
                        size = len;
                err = func(private_data, curptr + offset, size - offset);
                if (err < 0)
                        return err;
                offset = 0;
                len -= size;
        }
        return 0;
}

int snd_seq_dump_var_event(const struct snd_seq_event *event,
                           snd_seq_dump_func_t func, void *private_data)
{
        return dump_var_event(event, func, private_data, 0, 0);
}
EXPORT_SYMBOL(snd_seq_dump_var_event);


/*
 * exported:
 * expand the variable length event to linear buffer space.
 */

static int seq_copy_in_kernel(void *ptr, void *src, int size)
{
        char **bufptr = ptr;

        memcpy(*bufptr, src, size);
        *bufptr += size;
        return 0;
}

static int seq_copy_in_user(void *ptr, void *src, int size)
{
        char __user **bufptr = ptr;

        if (copy_to_user(*bufptr, src, size))
                return -EFAULT;
        *bufptr += size;
        return 0;
}

static int expand_var_event(const struct snd_seq_event *event,
                            int offset, int size, char *buf, bool in_kernel)
{
        if (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR) {
                if (! in_kernel)
                        return -EINVAL;
                if (copy_from_user(buf,
                                   (char __force __user *)event->data.ext.ptr + offset,
                                   size))
                        return -EFAULT;
                return 0;
        }
        return dump_var_event(event,
                             in_kernel ? seq_copy_in_kernel : seq_copy_in_user,
                             &buf, offset, size);
}

int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char *buf,
                             int in_kernel, int size_aligned)
{
        int len, newlen, err;

        len = get_var_len(event);
        if (len < 0)
                return len;
        newlen = len;
        if (size_aligned > 0)
                newlen = roundup(len, size_aligned);
        if (count < newlen)
                return -EAGAIN;
        err = expand_var_event(event, 0, len, buf, in_kernel);
        if (err < 0)
                return err;
        if (len != newlen) {
                if (in_kernel)
                        memset(buf + len, 0, newlen - len);
                else if (clear_user((__force void __user *)buf + len,
                                    newlen - len))
                        return -EFAULT;
        }
        return newlen;
}
EXPORT_SYMBOL(snd_seq_expand_var_event);

int snd_seq_expand_var_event_at(const struct snd_seq_event *event, int count,
                                char *buf, int offset)
{
        int len, err;

        len = get_var_len(event);
        if (len < 0)
                return len;
        if (len <= offset)
                return 0;
        len -= offset;
        if (len > count)
                len = count;
        err = expand_var_event(event, offset, count, buf, true);
        if (err < 0)
                return err;
        return len;
}
EXPORT_SYMBOL_GPL(snd_seq_expand_var_event_at);

/*
 * release this cell, free extended data if available
 */

static inline void free_cell(struct snd_seq_pool *pool,
                             struct snd_seq_event_cell *cell)
{
        cell->next = pool->free;
        pool->free = cell;
        atomic_dec(&pool->counter);
}

void snd_seq_cell_free(struct snd_seq_event_cell * cell)
{
        struct snd_seq_pool *pool;

        if (snd_BUG_ON(!cell))
                return;
        pool = cell->pool;
        if (snd_BUG_ON(!pool))
                return;

        guard(spinlock_irqsave)(&pool->lock);
        free_cell(pool, cell);
        if (snd_seq_ev_is_variable(&cell->event)) {
                if (cell->event.data.ext.len & SNDRV_SEQ_EXT_CHAINED) {
                        struct snd_seq_event_cell *curp, *nextptr;
                        curp = cell->event.data.ext.ptr;
                        for (; curp; curp = nextptr) {
                                nextptr = curp->next;
                                curp->next = pool->free;
                                free_cell(pool, curp);
                        }
                }
        }
        if (waitqueue_active(&pool->output_sleep)) {
                /* has enough space now? */
                if (snd_seq_output_ok(pool))
                        wake_up(&pool->output_sleep);
        }
}


/*
 * allocate an event cell.
 */
static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
                              struct snd_seq_event_cell **cellp,
                              int nonblock, struct file *file,
                              struct mutex *mutexp)
{
        struct snd_seq_event_cell *cell;
        unsigned long flags;
        int err = -EAGAIN;
        wait_queue_entry_t wait;

        if (pool == NULL)
                return -EINVAL;

        *cellp = NULL;

        init_waitqueue_entry(&wait, current);
        spin_lock_irqsave(&pool->lock, flags);
        if (pool->ptr == NULL) {        /* not initialized */
                pr_debug("ALSA: seq: pool is not initialized\n");
                err = -EINVAL;
                goto __error;
        }
        while (pool->free == NULL && ! nonblock && ! pool->closing) {

                set_current_state(TASK_INTERRUPTIBLE);
                add_wait_queue(&pool->output_sleep, &wait);
                spin_unlock_irqrestore(&pool->lock, flags);
                if (mutexp)
                        mutex_unlock(mutexp);
                schedule();
                if (mutexp)
                        mutex_lock(mutexp);
                spin_lock_irqsave(&pool->lock, flags);
                remove_wait_queue(&pool->output_sleep, &wait);
                /* interrupted? */
                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        goto __error;
                }
        }
        if (pool->closing) { /* closing.. */
                err = -ENOMEM;
                goto __error;
        }

        cell = pool->free;
        if (cell) {
                int used;
                pool->free = cell->next;
                atomic_inc(&pool->counter);
                used = atomic_read(&pool->counter);
                if (pool->max_used < used)
                        pool->max_used = used;
                pool->event_alloc_success++;
                /* clear cell pointers */
                cell->next = NULL;
                err = 0;
        } else
                pool->event_alloc_failures++;
        *cellp = cell;

__error:
        spin_unlock_irqrestore(&pool->lock, flags);
        return err;
}


/*
 * duplicate the event to a cell.
 * if the event has external data, the data is decomposed to additional
 * cells.
 */
int snd_seq_event_dup(struct snd_seq_pool *pool, struct snd_seq_event *event,
                      struct snd_seq_event_cell **cellp, int nonblock,
                      struct file *file, struct mutex *mutexp)
{
        int ncells, err;
        unsigned int extlen;
        struct snd_seq_event_cell *cell;
        int size;

        *cellp = NULL;

        ncells = 0;
        extlen = 0;
        if (snd_seq_ev_is_variable(event)) {
                extlen = event->data.ext.len & ~SNDRV_SEQ_EXT_MASK;
                ncells = DIV_ROUND_UP(extlen, sizeof(struct snd_seq_event));
        }
        if (ncells >= pool->total_elements)
                return -ENOMEM;

        err = snd_seq_cell_alloc(pool, &cell, nonblock, file, mutexp);
        if (err < 0)
                return err;

        /* copy the event */
        size = snd_seq_event_packet_size(event);
        memcpy(&cell->ump, event, size);
#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
        if (size < sizeof(cell->event))
                cell->ump.raw.extra = 0;
#endif

        /* decompose */
        if (snd_seq_ev_is_variable(event)) {
                int len = extlen;
                int is_chained = event->data.ext.len & SNDRV_SEQ_EXT_CHAINED;
                int is_usrptr = event->data.ext.len & SNDRV_SEQ_EXT_USRPTR;
                struct snd_seq_event_cell *src, *tmp, *tail;
                char *buf;

                cell->event.data.ext.len = extlen | SNDRV_SEQ_EXT_CHAINED;
                cell->event.data.ext.ptr = NULL;

                src = (struct snd_seq_event_cell *)event->data.ext.ptr;
                buf = (char *)event->data.ext.ptr;
                tail = NULL;

                while (ncells-- > 0) {
                        size = sizeof(struct snd_seq_event);
                        if (len < size)
                                size = len;
                        err = snd_seq_cell_alloc(pool, &tmp, nonblock, file,
                                                 mutexp);
                        if (err < 0)
                                goto __error;
                        if (cell->event.data.ext.ptr == NULL)
                                cell->event.data.ext.ptr = tmp;
                        if (tail)
                                tail->next = tmp;
                        tail = tmp;
                        /* copy chunk */
                        if (is_chained && src) {
                                tmp->event = src->event;
                                src = src->next;
                        } else if (is_usrptr) {
                                if (copy_from_user(&tmp->event, (char __force __user *)buf, size)) {
                                        err = -EFAULT;
                                        goto __error;
                                }
                        } else {
                                memcpy(&tmp->event, buf, size);
                        }
                        buf += size;
                        len -= size;
                }
        }

        *cellp = cell;
        return 0;

__error:
        snd_seq_cell_free(cell);
        return err;
}
  

/* poll wait */
int snd_seq_pool_poll_wait(struct snd_seq_pool *pool, struct file *file,
                           poll_table *wait)
{
        poll_wait(file, &pool->output_sleep, wait);
        return snd_seq_output_ok(pool);
}


/* allocate room specified number of events */
int snd_seq_pool_init(struct snd_seq_pool *pool)
{
        int cell;
        struct snd_seq_event_cell *cellptr;

        if (snd_BUG_ON(!pool))
                return -EINVAL;

        cellptr = kvmalloc_array(pool->size,
                                 sizeof(struct snd_seq_event_cell),
                                 GFP_KERNEL);
        if (!cellptr)
                return -ENOMEM;

        /* add new cells to the free cell list */
        guard(spinlock_irq)(&pool->lock);
        if (pool->ptr) {
                kvfree(cellptr);
                return 0;
        }

        pool->ptr = cellptr;
        pool->free = NULL;

        for (cell = 0; cell < pool->size; cell++) {
                cellptr = pool->ptr + cell;
                cellptr->pool = pool;
                cellptr->next = pool->free;
                pool->free = cellptr;
        }
        pool->room = (pool->size + 1) / 2;

        /* init statistics */
        pool->max_used = 0;
        pool->total_elements = pool->size;
        return 0;
}

/* refuse the further insertion to the pool */
void snd_seq_pool_mark_closing(struct snd_seq_pool *pool)
{
        if (snd_BUG_ON(!pool))
                return;
        guard(spinlock_irqsave)(&pool->lock);
        pool->closing = 1;
}

/* remove events */
int snd_seq_pool_done(struct snd_seq_pool *pool)
{
        struct snd_seq_event_cell *ptr;

        if (snd_BUG_ON(!pool))
                return -EINVAL;

        /* wait for closing all threads */
        if (waitqueue_active(&pool->output_sleep))
                wake_up(&pool->output_sleep);

        while (atomic_read(&pool->counter) > 0)
                schedule_timeout_uninterruptible(1);
        
        /* release all resources */
        scoped_guard(spinlock_irq, &pool->lock) {
                ptr = pool->ptr;
                pool->ptr = NULL;
                pool->free = NULL;
                pool->total_elements = 0;
        }

        kvfree(ptr);

        guard(spinlock_irq)(&pool->lock);
        pool->closing = 0;

        return 0;
}


/* init new memory pool */
struct snd_seq_pool *snd_seq_pool_new(int poolsize)
{
        struct snd_seq_pool *pool;

        /* create pool block */
        pool = kzalloc(sizeof(*pool), GFP_KERNEL);
        if (!pool)
                return NULL;
        spin_lock_init(&pool->lock);
        pool->ptr = NULL;
        pool->free = NULL;
        pool->total_elements = 0;
        atomic_set(&pool->counter, 0);
        pool->closing = 0;
        init_waitqueue_head(&pool->output_sleep);
        
        pool->size = poolsize;

        /* init statistics */
        pool->max_used = 0;
        return pool;
}

/* remove memory pool */
int snd_seq_pool_delete(struct snd_seq_pool **ppool)
{
        struct snd_seq_pool *pool = *ppool;

        *ppool = NULL;
        if (pool == NULL)
                return 0;
        snd_seq_pool_mark_closing(pool);
        snd_seq_pool_done(pool);
        kfree(pool);
        return 0;
}

/* exported to seq_clientmgr.c */
void snd_seq_info_pool(struct snd_info_buffer *buffer,
                       struct snd_seq_pool *pool, char *space)
{
        if (pool == NULL)
                return;
        snd_iprintf(buffer, "%sPool size          : %d\n", space, pool->total_elements);
        snd_iprintf(buffer, "%sCells in use       : %d\n", space, atomic_read(&pool->counter));
        snd_iprintf(buffer, "%sPeak cells in use  : %d\n", space, pool->max_used);
        snd_iprintf(buffer, "%sAlloc success      : %d\n", space, pool->event_alloc_success);
        snd_iprintf(buffer, "%sAlloc failures     : %d\n", space, pool->event_alloc_failures);
}



































































































































    1 
    1 

    1 











































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
/*
 * Copyright (c) 2006-2008 Intel Corporation
 * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
 * Copyright (c) 2008 Red Hat Inc.
 *
 * DRM core CRTC related functions
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 *
 * Authors:
 *      Keith Packard
 *        Eric Anholt <eric@anholt.net>
 *      Dave Airlie <airlied@linux.ie>
 *      Jesse Barnes <jesse.barnes@intel.com>
 */
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/dma-fence.h>
#include <linux/uaccess.h>
#include <drm/drm_blend.h>
#include <drm/drm_crtc.h>
#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_framebuffer.h>
#include <drm/drm_managed.h>
#include <drm/drm_modeset_lock.h>
#include <drm/drm_atomic.h>
#include <drm/drm_auth.h>
#include <drm/drm_debugfs_crc.h>
#include <drm/drm_drv.h>
#include <drm/drm_print.h>
#include <drm/drm_file.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

/**
 * DOC: overview
 *
 * A CRTC represents the overall display pipeline. It receives pixel data from
 * &drm_plane and blends them together. The &drm_display_mode is also attached
 * to the CRTC, specifying display timings. On the output side the data is fed
 * to one or more &drm_encoder, which are then each connected to one
 * &drm_connector.
 *
 * To create a CRTC, a KMS driver allocates and zeroes an instance of
 * &struct drm_crtc (possibly as part of a larger structure) and registers it
 * with a call to drm_crtc_init_with_planes().
 *
 * The CRTC is also the entry point for legacy modeset operations (see
 * &drm_crtc_funcs.set_config), legacy plane operations (see
 * &drm_crtc_funcs.page_flip and &drm_crtc_funcs.cursor_set2), and other legacy
 * operations like &drm_crtc_funcs.gamma_set. For atomic drivers all these
 * features are controlled through &drm_property and
 * &drm_mode_config_funcs.atomic_check.
 */

/**
 * drm_crtc_from_index - find the registered CRTC at an index
 * @dev: DRM device
 * @idx: index of registered CRTC to find for
 *
 * Given a CRTC index, return the registered CRTC from DRM device's
 * list of CRTCs with matching index. This is the inverse of drm_crtc_index().
 * It's useful in the vblank callbacks (like &drm_driver.enable_vblank or
 * &drm_driver.disable_vblank), since that still deals with indices instead
 * of pointers to &struct drm_crtc."
 */
struct drm_crtc *drm_crtc_from_index(struct drm_device *dev, int idx)
{
        struct drm_crtc *crtc;

        drm_for_each_crtc(crtc, dev)
                if (idx == crtc->index)
                        return crtc;

        return NULL;
}
EXPORT_SYMBOL(drm_crtc_from_index);

int drm_crtc_force_disable(struct drm_crtc *crtc)
{
        struct drm_mode_set set = {
                .crtc = crtc,
        };

        WARN_ON(drm_drv_uses_atomic_modeset(crtc->dev));

        return drm_mode_set_config_internal(&set);
}

int drm_crtc_register_all(struct drm_device *dev)
{
        struct drm_crtc *crtc;
        int ret = 0;

        drm_for_each_crtc(crtc, dev) {
                drm_debugfs_crtc_add(crtc);

                if (crtc->funcs->late_register)
                        ret = crtc->funcs->late_register(crtc);
                if (ret)
                        return ret;
        }

        return 0;
}

void drm_crtc_unregister_all(struct drm_device *dev)
{
        struct drm_crtc *crtc;

        drm_for_each_crtc(crtc, dev) {
                if (crtc->funcs->early_unregister)
                        crtc->funcs->early_unregister(crtc);
                drm_debugfs_crtc_remove(crtc);
        }
}

static int drm_crtc_crc_init(struct drm_crtc *crtc)
{
#ifdef CONFIG_DEBUG_FS
        spin_lock_init(&crtc->crc.lock);
        init_waitqueue_head(&crtc->crc.wq);
        crtc->crc.source = kstrdup("auto", GFP_KERNEL);
        if (!crtc->crc.source)
                return -ENOMEM;
#endif
        return 0;
}

static void drm_crtc_crc_fini(struct drm_crtc *crtc)
{
#ifdef CONFIG_DEBUG_FS
        kfree(crtc->crc.source);
#endif
}

static const struct dma_fence_ops drm_crtc_fence_ops;

static struct drm_crtc *fence_to_crtc(struct dma_fence *fence)
{
        BUG_ON(fence->ops != &drm_crtc_fence_ops);
        return container_of(fence->lock, struct drm_crtc, fence_lock);
}

static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence)
{
        struct drm_crtc *crtc = fence_to_crtc(fence);

        return crtc->dev->driver->name;
}

static const char *drm_crtc_fence_get_timeline_name(struct dma_fence *fence)
{
        struct drm_crtc *crtc = fence_to_crtc(fence);

        return crtc->timeline_name;
}

static const struct dma_fence_ops drm_crtc_fence_ops = {
        .get_driver_name = drm_crtc_fence_get_driver_name,
        .get_timeline_name = drm_crtc_fence_get_timeline_name,
};

struct dma_fence *drm_crtc_create_fence(struct drm_crtc *crtc)
{
        struct dma_fence *fence;

        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
        if (!fence)
                return NULL;

        dma_fence_init(fence, &drm_crtc_fence_ops, &crtc->fence_lock,
                       crtc->fence_context, ++crtc->fence_seqno);

        return fence;
}

/**
 * DOC: standard CRTC properties
 *
 * DRM CRTCs have a few standardized properties:
 *
 * ACTIVE:
 *         Atomic property for setting the power state of the CRTC. When set to 1
 *         the CRTC will actively display content. When set to 0 the CRTC will be
 *         powered off. There is no expectation that user-space will reset CRTC
 *         resources like the mode and planes when setting ACTIVE to 0.
 *
 *         User-space can rely on an ACTIVE change to 1 to never fail an atomic
 *         test as long as no other property has changed. If a change to ACTIVE
 *         fails an atomic test, this is a driver bug. For this reason setting
 *         ACTIVE to 0 must not release internal resources (like reserved memory
 *         bandwidth or clock generators).
 *
 *         Note that the legacy DPMS property on connectors is internally routed
 *         to control this property for atomic drivers.
 * MODE_ID:
 *         Atomic property for setting the CRTC display timings. The value is the
 *         ID of a blob containing the DRM mode info. To disable the CRTC,
 *         user-space must set this property to 0.
 *
 *         Setting MODE_ID to 0 will release reserved resources for the CRTC.
 * SCALING_FILTER:
 *         Atomic property for setting the scaling filter for CRTC scaler
 *
 *         The value of this property can be one of the following:
 *
 *         Default:
 *                 Driver's default scaling filter
 *         Nearest Neighbor:
 *                 Nearest Neighbor scaling filter
 */

__printf(6, 0)
static int __drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
                                       struct drm_plane *primary,
                                       struct drm_plane *cursor,
                                       const struct drm_crtc_funcs *funcs,
                                       const char *name, va_list ap)
{
        struct drm_mode_config *config = &dev->mode_config;
        int ret;

        WARN_ON(primary && primary->type != DRM_PLANE_TYPE_PRIMARY);
        WARN_ON(cursor && cursor->type != DRM_PLANE_TYPE_CURSOR);

        /* crtc index is used with 32bit bitmasks */
        if (WARN_ON(config->num_crtc >= 32))
                return -EINVAL;

        WARN_ON(drm_drv_uses_atomic_modeset(dev) &&
                (!funcs->atomic_destroy_state ||
                 !funcs->atomic_duplicate_state));

        crtc->dev = dev;
        crtc->funcs = funcs;

        INIT_LIST_HEAD(&crtc->commit_list);
        spin_lock_init(&crtc->commit_lock);

        drm_modeset_lock_init(&crtc->mutex);
        ret = drm_mode_object_add(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
        if (ret)
                return ret;

        if (name) {
                crtc->name = kvasprintf(GFP_KERNEL, name, ap);
        } else {
                crtc->name = kasprintf(GFP_KERNEL, "crtc-%d", config->num_crtc);
        }
        if (!crtc->name) {
                drm_mode_object_unregister(dev, &crtc->base);
                return -ENOMEM;
        }

        crtc->fence_context = dma_fence_context_alloc(1);
        spin_lock_init(&crtc->fence_lock);
        snprintf(crtc->timeline_name, sizeof(crtc->timeline_name),
                 "CRTC:%d-%s", crtc->base.id, crtc->name);

        crtc->base.properties = &crtc->properties;

        list_add_tail(&crtc->head, &config->crtc_list);
        crtc->index = config->num_crtc++;

        crtc->primary = primary;
        crtc->cursor = cursor;
        if (primary && !primary->possible_crtcs)
                primary->possible_crtcs = drm_crtc_mask(crtc);
        if (cursor && !cursor->possible_crtcs)
                cursor->possible_crtcs = drm_crtc_mask(crtc);

        ret = drm_crtc_crc_init(crtc);
        if (ret) {
                drm_mode_object_unregister(dev, &crtc->base);
                return ret;
        }

        if (drm_core_check_feature(dev, DRIVER_ATOMIC)) {
                drm_object_attach_property(&crtc->base, config->prop_active, 0);
                drm_object_attach_property(&crtc->base, config->prop_mode_id, 0);
                drm_object_attach_property(&crtc->base,
                                           config->prop_out_fence_ptr, 0);
                drm_object_attach_property(&crtc->base,
                                           config->prop_vrr_enabled, 0);
        }

        return 0;
}

/**
 * drm_crtc_init_with_planes - Initialise a new CRTC object with
 *    specified primary and cursor planes.
 * @dev: DRM device
 * @crtc: CRTC object to init
 * @primary: Primary plane for CRTC
 * @cursor: Cursor plane for CRTC
 * @funcs: callbacks for the new CRTC
 * @name: printf style format string for the CRTC name, or NULL for default name
 *
 * Inits a new object created as base part of a driver crtc object. Drivers
 * should use this function instead of drm_crtc_init(), which is only provided
 * for backwards compatibility with drivers which do not yet support universal
 * planes). For really simple hardware which has only 1 plane look at
 * drm_simple_display_pipe_init() instead.
 * The &drm_crtc_funcs.destroy hook should call drm_crtc_cleanup() and kfree()
 * the crtc structure. The crtc structure should not be allocated with
 * devm_kzalloc().
 *
 * The @primary and @cursor planes are only relevant for legacy uAPI, see
 * &drm_crtc.primary and &drm_crtc.cursor.
 *
 * Note: consider using drmm_crtc_alloc_with_planes() or
 * drmm_crtc_init_with_planes() instead of drm_crtc_init_with_planes()
 * to let the DRM managed resource infrastructure take care of cleanup
 * and deallocation.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
                              struct drm_plane *primary,
                              struct drm_plane *cursor,
                              const struct drm_crtc_funcs *funcs,
                              const char *name, ...)
{
        va_list ap;
        int ret;

        WARN_ON(!funcs->destroy);

        va_start(ap, name);
        ret = __drm_crtc_init_with_planes(dev, crtc, primary, cursor, funcs,
                                          name, ap);
        va_end(ap);

        return ret;
}
EXPORT_SYMBOL(drm_crtc_init_with_planes);

static void drmm_crtc_init_with_planes_cleanup(struct drm_device *dev,
                                               void *ptr)
{
        struct drm_crtc *crtc = ptr;

        drm_crtc_cleanup(crtc);
}

__printf(6, 0)
static int __drmm_crtc_init_with_planes(struct drm_device *dev,
                                        struct drm_crtc *crtc,
                                        struct drm_plane *primary,
                                        struct drm_plane *cursor,
                                        const struct drm_crtc_funcs *funcs,
                                        const char *name,
                                        va_list args)
{
        int ret;

        drm_WARN_ON(dev, funcs && funcs->destroy);

        ret = __drm_crtc_init_with_planes(dev, crtc, primary, cursor, funcs,
                                          name, args);
        if (ret)
                return ret;

        ret = drmm_add_action_or_reset(dev, drmm_crtc_init_with_planes_cleanup,
                                       crtc);
        if (ret)
                return ret;

        return 0;
}

/**
 * drmm_crtc_init_with_planes - Initialise a new CRTC object with
 *    specified primary and cursor planes.
 * @dev: DRM device
 * @crtc: CRTC object to init
 * @primary: Primary plane for CRTC
 * @cursor: Cursor plane for CRTC
 * @funcs: callbacks for the new CRTC
 * @name: printf style format string for the CRTC name, or NULL for default name
 *
 * Inits a new object created as base part of a driver crtc object. Drivers
 * should use this function instead of drm_crtc_init(), which is only provided
 * for backwards compatibility with drivers which do not yet support universal
 * planes). For really simple hardware which has only 1 plane look at
 * drm_simple_display_pipe_init() instead.
 *
 * Cleanup is automatically handled through registering
 * drmm_crtc_cleanup() with drmm_add_action(). The crtc structure should
 * be allocated with drmm_kzalloc().
 *
 * The @drm_crtc_funcs.destroy hook must be NULL.
 *
 * The @primary and @cursor planes are only relevant for legacy uAPI, see
 * &drm_crtc.primary and &drm_crtc.cursor.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drmm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
                               struct drm_plane *primary,
                               struct drm_plane *cursor,
                               const struct drm_crtc_funcs *funcs,
                               const char *name, ...)
{
        va_list ap;
        int ret;

        va_start(ap, name);
        ret = __drmm_crtc_init_with_planes(dev, crtc, primary, cursor, funcs,
                                           name, ap);
        va_end(ap);
        if (ret)
                return ret;

        return 0;
}
EXPORT_SYMBOL(drmm_crtc_init_with_planes);

void *__drmm_crtc_alloc_with_planes(struct drm_device *dev,
                                    size_t size, size_t offset,
                                    struct drm_plane *primary,
                                    struct drm_plane *cursor,
                                    const struct drm_crtc_funcs *funcs,
                                    const char *name, ...)
{
        void *container;
        struct drm_crtc *crtc;
        va_list ap;
        int ret;

        if (WARN_ON(!funcs || funcs->destroy))
                return ERR_PTR(-EINVAL);

        container = drmm_kzalloc(dev, size, GFP_KERNEL);
        if (!container)
                return ERR_PTR(-ENOMEM);

        crtc = container + offset;

        va_start(ap, name);
        ret = __drmm_crtc_init_with_planes(dev, crtc, primary, cursor, funcs,
                                           name, ap);
        va_end(ap);
        if (ret)
                return ERR_PTR(ret);

        return container;
}
EXPORT_SYMBOL(__drmm_crtc_alloc_with_planes);

/**
 * drm_crtc_cleanup - Clean up the core crtc usage
 * @crtc: CRTC to cleanup
 *
 * This function cleans up @crtc and removes it from the DRM mode setting
 * core. Note that the function does *not* free the crtc structure itself,
 * this is the responsibility of the caller.
 */
void drm_crtc_cleanup(struct drm_crtc *crtc)
{
        struct drm_device *dev = crtc->dev;

        /* Note that the crtc_list is considered to be static; should we
         * remove the drm_crtc at runtime we would have to decrement all
         * the indices on the drm_crtc after us in the crtc_list.
         */

        drm_crtc_crc_fini(crtc);

        kfree(crtc->gamma_store);
        crtc->gamma_store = NULL;

        drm_modeset_lock_fini(&crtc->mutex);

        drm_mode_object_unregister(dev, &crtc->base);
        list_del(&crtc->head);
        dev->mode_config.num_crtc--;

        WARN_ON(crtc->state && !crtc->funcs->atomic_destroy_state);
        if (crtc->state && crtc->funcs->atomic_destroy_state)
                crtc->funcs->atomic_destroy_state(crtc, crtc->state);

        kfree(crtc->name);

        memset(crtc, 0, sizeof(*crtc));
}
EXPORT_SYMBOL(drm_crtc_cleanup);

/**
 * drm_mode_getcrtc - get CRTC configuration
 * @dev: drm device for the ioctl
 * @data: data pointer for the ioctl
 * @file_priv: drm file for the ioctl call
 *
 * Construct a CRTC configuration structure to return to the user.
 *
 * Called by the user via ioctl.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_getcrtc(struct drm_device *dev,
                     void *data, struct drm_file *file_priv)
{
        struct drm_mode_crtc *crtc_resp = data;
        struct drm_crtc *crtc;
        struct drm_plane *plane;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        crtc = drm_crtc_find(dev, file_priv, crtc_resp->crtc_id);
        if (!crtc)
                return -ENOENT;

        plane = crtc->primary;

        crtc_resp->gamma_size = crtc->gamma_size;

        drm_modeset_lock(&plane->mutex, NULL);
        if (plane->state && plane->state->fb)
                crtc_resp->fb_id = plane->state->fb->base.id;
        else if (!plane->state && plane->fb)
                crtc_resp->fb_id = plane->fb->base.id;
        else
                crtc_resp->fb_id = 0;

        if (plane->state) {
                crtc_resp->x = plane->state->src_x >> 16;
                crtc_resp->y = plane->state->src_y >> 16;
        }
        drm_modeset_unlock(&plane->mutex);

        drm_modeset_lock(&crtc->mutex, NULL);
        if (crtc->state) {
                if (crtc->state->enable) {
                        drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->state->mode);
                        crtc_resp->mode_valid = 1;
                } else {
                        crtc_resp->mode_valid = 0;
                }
        } else {
                crtc_resp->x = crtc->x;
                crtc_resp->y = crtc->y;

                if (crtc->enabled) {
                        drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->mode);
                        crtc_resp->mode_valid = 1;

                } else {
                        crtc_resp->mode_valid = 0;
                }
        }
        if (!file_priv->aspect_ratio_allowed)
                crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
        drm_modeset_unlock(&crtc->mutex);

        return 0;
}

static int __drm_mode_set_config_internal(struct drm_mode_set *set,
                                          struct drm_modeset_acquire_ctx *ctx)
{
        struct drm_crtc *crtc = set->crtc;
        struct drm_framebuffer *fb;
        struct drm_crtc *tmp;
        int ret;

        WARN_ON(drm_drv_uses_atomic_modeset(crtc->dev));

        /*
         * NOTE: ->set_config can also disable other crtcs (if we steal all
         * connectors from it), hence we need to refcount the fbs across all
         * crtcs. Atomic modeset will have saner semantics ...
         */
        drm_for_each_crtc(tmp, crtc->dev) {
                struct drm_plane *plane = tmp->primary;

                plane->old_fb = plane->fb;
        }

        fb = set->fb;

        ret = crtc->funcs->set_config(set, ctx);
        if (ret == 0) {
                struct drm_plane *plane = crtc->primary;

                plane->crtc = fb ? crtc : NULL;
                plane->fb = fb;
        }

        drm_for_each_crtc(tmp, crtc->dev) {
                struct drm_plane *plane = tmp->primary;

                if (plane->fb)
                        drm_framebuffer_get(plane->fb);
                if (plane->old_fb)
                        drm_framebuffer_put(plane->old_fb);
                plane->old_fb = NULL;
        }

        return ret;
}

/**
 * drm_mode_set_config_internal - helper to call &drm_mode_config_funcs.set_config
 * @set: modeset config to set
 *
 * This is a little helper to wrap internal calls to the
 * &drm_mode_config_funcs.set_config driver interface. The only thing it adds is
 * correct refcounting dance.
 *
 * This should only be used by non-atomic legacy drivers.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_set_config_internal(struct drm_mode_set *set)
{
        WARN_ON(drm_drv_uses_atomic_modeset(set->crtc->dev));

        return __drm_mode_set_config_internal(set, NULL);
}
EXPORT_SYMBOL(drm_mode_set_config_internal);

/**
 * drm_crtc_check_viewport - Checks that a framebuffer is big enough for the
 *     CRTC viewport
 * @crtc: CRTC that framebuffer will be displayed on
 * @x: x panning
 * @y: y panning
 * @mode: mode that framebuffer will be displayed under
 * @fb: framebuffer to check size of
 */
int drm_crtc_check_viewport(const struct drm_crtc *crtc,
                            int x, int y,
                            const struct drm_display_mode *mode,
                            const struct drm_framebuffer *fb)

{
        int hdisplay, vdisplay;

        drm_mode_get_hv_timing(mode, &hdisplay, &vdisplay);

        if (crtc->state &&
            drm_rotation_90_or_270(crtc->primary->state->rotation))
                swap(hdisplay, vdisplay);

        return drm_framebuffer_check_src_coords(x << 16, y << 16,
                                                hdisplay << 16, vdisplay << 16,
                                                fb);
}
EXPORT_SYMBOL(drm_crtc_check_viewport);

/**
 * drm_mode_setcrtc - set CRTC configuration
 * @dev: drm device for the ioctl
 * @data: data pointer for the ioctl
 * @file_priv: drm file for the ioctl call
 *
 * Build a new CRTC configuration based on user request.
 *
 * Called by the user via ioctl.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_setcrtc(struct drm_device *dev, void *data,
                     struct drm_file *file_priv)
{
        struct drm_mode_config *config = &dev->mode_config;
        struct drm_mode_crtc *crtc_req = data;
        struct drm_crtc *crtc;
        struct drm_plane *plane;
        struct drm_connector **connector_set = NULL, *connector;
        struct drm_framebuffer *fb = NULL;
        struct drm_display_mode *mode = NULL;
        struct drm_mode_set set;
        uint32_t __user *set_connectors_ptr;
        struct drm_modeset_acquire_ctx ctx;
        int ret, i, num_connectors = 0;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        /*
         * Universal plane src offsets are only 16.16, prevent havoc for
         * drivers using universal plane code internally.
         */
        if (crtc_req->x & 0xffff0000 || crtc_req->y & 0xffff0000)
                return -ERANGE;

        crtc = drm_crtc_find(dev, file_priv, crtc_req->crtc_id);
        if (!crtc) {
                DRM_DEBUG_KMS("Unknown CRTC ID %d\n", crtc_req->crtc_id);
                return -ENOENT;
        }
        DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);

        plane = crtc->primary;

        /* allow disabling with the primary plane leased */
        if (crtc_req->mode_valid && !drm_lease_held(file_priv, plane->base.id))
                return -EACCES;

        DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx,
                                   DRM_MODESET_ACQUIRE_INTERRUPTIBLE, ret);

        if (crtc_req->mode_valid) {
                /* If we have a mode we need a framebuffer. */
                /* If we pass -1, set the mode with the currently bound fb */
                if (crtc_req->fb_id == -1) {
                        struct drm_framebuffer *old_fb;

                        if (plane->state)
                                old_fb = plane->state->fb;
                        else
                                old_fb = plane->fb;

                        if (!old_fb) {
                                DRM_DEBUG_KMS("CRTC doesn't have current FB\n");
                                ret = -EINVAL;
                                goto out;
                        }

                        fb = old_fb;
                        /* Make refcounting symmetric with the lookup path. */
                        drm_framebuffer_get(fb);
                } else {
                        fb = drm_framebuffer_lookup(dev, file_priv, crtc_req->fb_id);
                        if (!fb) {
                                DRM_DEBUG_KMS("Unknown FB ID%d\n",
                                                crtc_req->fb_id);
                                ret = -ENOENT;
                                goto out;
                        }
                }

                mode = drm_mode_create(dev);
                if (!mode) {
                        ret = -ENOMEM;
                        goto out;
                }
                if (!file_priv->aspect_ratio_allowed &&
                    (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) {
                        DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n");
                        ret = -EINVAL;
                        goto out;
                }


                ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode);
                if (ret) {
                        DRM_DEBUG_KMS("Invalid mode (ret=%d, status=%s)\n",
                                      ret, drm_get_mode_status_name(mode->status));
                        drm_mode_debug_printmodeline(mode);
                        goto out;
                }

                /*
                 * Check whether the primary plane supports the fb pixel format.
                 * Drivers not implementing the universal planes API use a
                 * default formats list provided by the DRM core which doesn't
                 * match real hardware capabilities. Skip the check in that
                 * case.
                 */
                if (!plane->format_default) {
                        ret = drm_plane_check_pixel_format(plane,
                                                           fb->format->format,
                                                           fb->modifier);
                        if (ret) {
                                DRM_DEBUG_KMS("Invalid pixel format %p4cc, modifier 0x%llx\n",
                                              &fb->format->format,
                                              fb->modifier);
                                goto out;
                        }
                }

                ret = drm_crtc_check_viewport(crtc, crtc_req->x, crtc_req->y,
                                              mode, fb);
                if (ret)
                        goto out;

        }

        if (crtc_req->count_connectors == 0 && mode) {
                DRM_DEBUG_KMS("Count connectors is 0 but mode set\n");
                ret = -EINVAL;
                goto out;
        }

        if (crtc_req->count_connectors > 0 && (!mode || !fb)) {
                DRM_DEBUG_KMS("Count connectors is %d but no mode or fb set\n",
                          crtc_req->count_connectors);
                ret = -EINVAL;
                goto out;
        }

        if (crtc_req->count_connectors > 0) {
                u32 out_id;

                /* Avoid unbounded kernel memory allocation */
                if (crtc_req->count_connectors > config->num_connector) {
                        ret = -EINVAL;
                        goto out;
                }

                connector_set = kmalloc_array(crtc_req->count_connectors,
                                              sizeof(struct drm_connector *),
                                              GFP_KERNEL);
                if (!connector_set) {
                        ret = -ENOMEM;
                        goto out;
                }

                for (i = 0; i < crtc_req->count_connectors; i++) {
                        connector_set[i] = NULL;
                        set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr;
                        if (get_user(out_id, &set_connectors_ptr[i])) {
                                ret = -EFAULT;
                                goto out;
                        }

                        connector = drm_connector_lookup(dev, file_priv, out_id);
                        if (!connector) {
                                DRM_DEBUG_KMS("Connector id %d unknown\n",
                                                out_id);
                                ret = -ENOENT;
                                goto out;
                        }
                        DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
                                        connector->base.id,
                                        connector->name);

                        connector_set[i] = connector;
                        num_connectors++;
                }
        }

        set.crtc = crtc;
        set.x = crtc_req->x;
        set.y = crtc_req->y;
        set.mode = mode;
        set.connectors = connector_set;
        set.num_connectors = num_connectors;
        set.fb = fb;

        if (drm_drv_uses_atomic_modeset(dev))
                ret = crtc->funcs->set_config(&set, &ctx);
        else
                ret = __drm_mode_set_config_internal(&set, &ctx);

out:
        if (fb)
                drm_framebuffer_put(fb);

        if (connector_set) {
                for (i = 0; i < num_connectors; i++) {
                        if (connector_set[i])
                                drm_connector_put(connector_set[i]);
                }
        }
        kfree(connector_set);
        drm_mode_destroy(dev, mode);

        /* In case we need to retry... */
        connector_set = NULL;
        fb = NULL;
        mode = NULL;
        num_connectors = 0;

        DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);

        return ret;
}

int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj,
                               struct drm_property *property,
                               uint64_t value)
{
        int ret = -EINVAL;
        struct drm_crtc *crtc = obj_to_crtc(obj);

        if (crtc->funcs->set_property)
                ret = crtc->funcs->set_property(crtc, property, value);
        if (!ret)
                drm_object_property_set_value(obj, property, value);

        return ret;
}

/**
 * drm_crtc_create_scaling_filter_property - create a new scaling filter
 * property
 *
 * @crtc: drm CRTC
 * @supported_filters: bitmask of supported scaling filters, must include
 *                       BIT(DRM_SCALING_FILTER_DEFAULT).
 *
 * This function lets driver to enable the scaling filter property on a given
 * CRTC.
 *
 * RETURNS:
 * Zero for success or -errno
 */
int drm_crtc_create_scaling_filter_property(struct drm_crtc *crtc,
                                            unsigned int supported_filters)
{
        struct drm_property *prop =
                drm_create_scaling_filter_prop(crtc->dev, supported_filters);

        if (IS_ERR(prop))
                return PTR_ERR(prop);

        drm_object_attach_property(&crtc->base, prop,
                                   DRM_SCALING_FILTER_DEFAULT);
        crtc->scaling_filter_property = prop;

        return 0;
}
EXPORT_SYMBOL(drm_crtc_create_scaling_filter_property);

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright 2002-2005, Instant802 Networks, Inc.
 * Copyright 2005, Devicescape Software, Inc.
 * Copyright 2006-2007        Jiri Benc <jbenc@suse.cz>
 * Copyright 2007-2010        Johannes Berg <johannes@sipsolutions.net>
 * Copyright 2013-2015  Intel Mobile Communications GmbH
 * Copyright (C) 2018-2024 Intel Corporation
 */

#ifndef IEEE80211_I_H
#define IEEE80211_I_H

#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/if_ether.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/etherdevice.h>
#include <linux/leds.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/rbtree.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
#include <net/fq.h>
#include "key.h"
#include "sta_info.h"
#include "debug.h"
#include "drop.h"

extern const struct cfg80211_ops mac80211_config_ops;

struct ieee80211_local;
struct ieee80211_mesh_fast_tx;

/* Maximum number of broadcast/multicast frames to buffer when some of the
 * associated stations are using power saving. */
#define AP_MAX_BC_BUFFER 128

/* Maximum number of frames buffered to all STAs, including multicast frames.
 * Note: increasing this limit increases the potential memory requirement. Each
 * frame can be up to about 2 kB long. */
#define TOTAL_MAX_TX_BUFFER 512

/* Required encryption head and tailroom */
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18

/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL        INT_MIN

/*
 * Some APs experience problems when working with U-APSD. Decreasing the
 * probability of that happening by using legacy mode for all ACs but VO isn't
 * enough.
 *
 * Cisco 4410N originally forced us to enable VO by default only because it
 * treated non-VO ACs as legacy.
 *
 * However some APs (notably Netgear R7000) silently reclassify packets to
 * different ACs. Since u-APSD ACs require trigger frames for frame retrieval
 * clients would never see some frames (e.g. ARP responses) or would fetch them
 * accidentally after a long time.
 *
 * It makes little sense to enable u-APSD queues by default because it needs
 * userspace applications to be aware of it to actually take advantage of the
 * possible additional powersavings. Implicitly depending on driver autotrigger
 * frame support doesn't make much sense.
 */
#define IEEE80211_DEFAULT_UAPSD_QUEUES 0

#define IEEE80211_DEFAULT_MAX_SP_LEN                \
        IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL

extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];

#define IEEE80211_DEAUTH_FRAME_LEN        (24 /* hdr */ + 2 /* reason */)

#define IEEE80211_MAX_NAN_INSTANCE_ID 255

enum ieee80211_status_data {
        IEEE80211_STATUS_TYPE_MASK        = 0x00f,
        IEEE80211_STATUS_TYPE_INVALID        = 0,
        IEEE80211_STATUS_TYPE_SMPS        = 1,
        IEEE80211_STATUS_SUBDATA_MASK        = 0xff0,
};

static inline bool
ieee80211_sta_keep_active(struct sta_info *sta, u8 ac)
{
        /* Keep a station's queues on the active list for deficit accounting
         * purposes if it was active or queued during the last 100ms.
         */
        return time_before_eq(jiffies, sta->airtime[ac].last_active + HZ / 10);
}

struct ieee80211_bss {
        u32 device_ts_beacon, device_ts_presp;

        bool wmm_used;
        bool uapsd_supported;

#define IEEE80211_MAX_SUPP_RATES 32
        u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
        size_t supp_rates_len;
        struct ieee80211_rate *beacon_rate;

        u32 vht_cap_info;

        /*
         * During association, we save an ERP value from a probe response so
         * that we can feed ERP info to the driver when handling the
         * association completes. these fields probably won't be up-to-date
         * otherwise, you probably don't want to use them.
         */
        bool has_erp_value;
        u8 erp_value;

        /* Keep track of the corruption of the last beacon/probe response. */
        u8 corrupt_data;

        /* Keep track of what bits of information we have valid info for. */
        u8 valid_data;
};

/**
 * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
 * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
 * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
 *
 * These are bss flags that are attached to a bss in the
 * @corrupt_data field of &struct ieee80211_bss.
 */
enum ieee80211_bss_corrupt_data_flags {
        IEEE80211_BSS_CORRUPT_BEACON                = BIT(0),
        IEEE80211_BSS_CORRUPT_PROBE_RESP        = BIT(1)
};

/**
 * enum ieee80211_bss_valid_data_flags - BSS valid data flags
 * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
 * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
 * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
 *
 * These are bss flags that are attached to a bss in the
 * @valid_data field of &struct ieee80211_bss.  They show which parts
 * of the data structure were received as a result of an un-corrupted
 * beacon/probe response.
 */
enum ieee80211_bss_valid_data_flags {
        IEEE80211_BSS_VALID_WMM                        = BIT(1),
        IEEE80211_BSS_VALID_RATES                = BIT(2),
        IEEE80211_BSS_VALID_ERP                        = BIT(3)
};

typedef unsigned __bitwise ieee80211_tx_result;
#define TX_CONTINUE        ((__force ieee80211_tx_result) 0u)
#define TX_DROP                ((__force ieee80211_tx_result) 1u)
#define TX_QUEUED        ((__force ieee80211_tx_result) 2u)

#define IEEE80211_TX_UNICAST                BIT(1)
#define IEEE80211_TX_PS_BUFFERED        BIT(2)

struct ieee80211_tx_data {
        struct sk_buff *skb;
        struct sk_buff_head skbs;
        struct ieee80211_local *local;
        struct ieee80211_sub_if_data *sdata;
        struct sta_info *sta;
        struct ieee80211_key *key;
        struct ieee80211_tx_rate rate;

        unsigned int flags;
};

/**
 * enum ieee80211_packet_rx_flags - packet RX flags
 * @IEEE80211_RX_AMSDU: a-MSDU packet
 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
 * @IEEE80211_RX_DEFERRED_RELEASE: frame was subjected to receive reordering
 *
 * These are per-frame flags that are attached to a frame in the
 * @rx_flags field of &struct ieee80211_rx_status.
 */
enum ieee80211_packet_rx_flags {
        IEEE80211_RX_AMSDU                        = BIT(3),
        IEEE80211_RX_MALFORMED_ACTION_FRM        = BIT(4),
        IEEE80211_RX_DEFERRED_RELEASE                = BIT(5),
};

/**
 * enum ieee80211_rx_flags - RX data flags
 *
 * @IEEE80211_RX_CMNTR: received on cooked monitor already
 * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported
 *        to cfg80211_report_obss_beacon().
 *
 * These flags are used across handling multiple interfaces
 * for a single frame.
 */
enum ieee80211_rx_flags {
        IEEE80211_RX_CMNTR                = BIT(0),
        IEEE80211_RX_BEACON_REPORTED        = BIT(1),
};

struct ieee80211_rx_data {
        struct list_head *list;
        struct sk_buff *skb;
        struct ieee80211_local *local;
        struct ieee80211_sub_if_data *sdata;
        struct ieee80211_link_data *link;
        struct sta_info *sta;
        struct link_sta_info *link_sta;
        struct ieee80211_key *key;

        unsigned int flags;

        /*
         * Index into sequence numbers array, 0..16
         * since the last (16) is used for non-QoS,
         * will be 16 on non-QoS frames.
         */
        int seqno_idx;

        /*
         * Index into the security IV/PN arrays, 0..16
         * since the last (16) is used for CCMP-encrypted
         * management frames, will be set to 16 on mgmt
         * frames and 0 on non-QoS frames.
         */
        int security_idx;

        int link_id;

        union {
                struct {
                        u32 iv32;
                        u16 iv16;
                } tkip;
                struct {
                        u8 pn[IEEE80211_CCMP_PN_LEN];
                } ccm_gcm;
        };
};

struct ieee80211_csa_settings {
        const u16 *counter_offsets_beacon;
        const u16 *counter_offsets_presp;

        int n_counter_offsets_beacon;
        int n_counter_offsets_presp;

        u8 count;
};

struct ieee80211_color_change_settings {
        u16 counter_offset_beacon;
        u16 counter_offset_presp;
        u8 count;
};

struct beacon_data {
        u8 *head, *tail;
        int head_len, tail_len;
        struct ieee80211_meshconf_ie *meshconf;
        u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
        u8 cntdwn_current_counter;
        struct cfg80211_mbssid_elems *mbssid_ies;
        struct cfg80211_rnr_elems *rnr_ies;
        struct rcu_head rcu_head;
};

struct probe_resp {
        struct rcu_head rcu_head;
        int len;
        u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
        u8 data[];
};

struct fils_discovery_data {
        struct rcu_head rcu_head;
        int len;
        u8 data[];
};

struct unsol_bcast_probe_resp_data {
        struct rcu_head rcu_head;
        int len;
        u8 data[];
};

struct ps_data {
        /* yes, this looks ugly, but guarantees that we can later use
         * bitmap_empty :)
         * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */
        u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]
                        __aligned(__alignof__(unsigned long));
        struct sk_buff_head bc_buf;
        atomic_t num_sta_ps; /* number of stations in PS mode */
        int dtim_count;
        bool dtim_bc_mc;
};

struct ieee80211_if_ap {
        struct list_head vlans; /* write-protected with RTNL and local->mtx */

        struct ps_data ps;
        atomic_t num_mcast_sta; /* number of stations receiving multicast */

        bool multicast_to_unicast;
        bool active;
};

struct ieee80211_if_vlan {
        struct list_head list; /* write-protected with RTNL and local->mtx */

        /* used for all tx if the VLAN is configured to 4-addr mode */
        struct sta_info __rcu *sta;
        atomic_t num_mcast_sta; /* number of stations receiving multicast */
};

struct mesh_stats {
        __u32 fwded_mcast;                /* Mesh forwarded multicast frames */
        __u32 fwded_unicast;                /* Mesh forwarded unicast frames */
        __u32 fwded_frames;                /* Mesh total forwarded frames */
        __u32 dropped_frames_ttl;        /* Not transmitted since mesh_ttl == 0*/
        __u32 dropped_frames_no_route;        /* Not transmitted, no route found */
};

#define PREQ_Q_F_START                0x1
#define PREQ_Q_F_REFRESH        0x2
struct mesh_preq_queue {
        struct list_head list;
        u8 dst[ETH_ALEN];
        u8 flags;
};

struct ieee80211_roc_work {
        struct list_head list;

        struct ieee80211_sub_if_data *sdata;

        struct ieee80211_channel *chan;

        bool started, abort, hw_begun, notified;
        bool on_channel;

        unsigned long start_time;

        u32 duration, req_duration;
        struct sk_buff *frame;
        u64 cookie, mgmt_tx_cookie;
        enum ieee80211_roc_type type;
};

/* flags used in struct ieee80211_if_managed.flags */
enum ieee80211_sta_flags {
        IEEE80211_STA_CONNECTION_POLL        = BIT(1),
        IEEE80211_STA_CONTROL_PORT        = BIT(2),
        IEEE80211_STA_MFP_ENABLED        = BIT(6),
        IEEE80211_STA_UAPSD_ENABLED        = BIT(7),
        IEEE80211_STA_NULLFUNC_ACKED        = BIT(8),
        IEEE80211_STA_ENABLE_RRM        = BIT(15),
};

enum ieee80211_conn_mode {
        IEEE80211_CONN_MODE_S1G,
        IEEE80211_CONN_MODE_LEGACY,
        IEEE80211_CONN_MODE_HT,
        IEEE80211_CONN_MODE_VHT,
        IEEE80211_CONN_MODE_HE,
        IEEE80211_CONN_MODE_EHT,
};

#define IEEE80211_CONN_MODE_HIGHEST        IEEE80211_CONN_MODE_EHT

enum ieee80211_conn_bw_limit {
        IEEE80211_CONN_BW_LIMIT_20,
        IEEE80211_CONN_BW_LIMIT_40,
        IEEE80211_CONN_BW_LIMIT_80,
        IEEE80211_CONN_BW_LIMIT_160, /* also 80+80 */
        IEEE80211_CONN_BW_LIMIT_320,
};

struct ieee80211_conn_settings {
        enum ieee80211_conn_mode mode;
        enum ieee80211_conn_bw_limit bw_limit;
};

extern const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited;

struct ieee80211_mgd_auth_data {
        struct cfg80211_bss *bss;
        unsigned long timeout;
        int tries;
        u16 algorithm, expected_transaction;

        u8 key[WLAN_KEY_LEN_WEP104];
        u8 key_len, key_idx;
        bool done, waiting;
        bool peer_confirmed;
        bool timeout_started;
        int link_id;

        u8 ap_addr[ETH_ALEN] __aligned(2);

        u16 sae_trans, sae_status;
        size_t data_len;
        u8 data[];
};

struct ieee80211_mgd_assoc_data {
        struct {
                struct cfg80211_bss *bss;

                u8 addr[ETH_ALEN] __aligned(2);

                u8 ap_ht_param;

                struct ieee80211_vht_cap ap_vht_cap;

                size_t elems_len;
                u8 *elems; /* pointing to inside ie[] below */

                struct ieee80211_conn_settings conn;

                u16 status;

                bool disabled;
        } link[IEEE80211_MLD_MAX_NUM_LINKS];

        u8 ap_addr[ETH_ALEN] __aligned(2);

        /* this is for a workaround, so we use it only for non-MLO */
        const u8 *supp_rates;
        u8 supp_rates_len;

        unsigned long timeout;
        int tries;

        u8 prev_ap_addr[ETH_ALEN];
        u8 ssid[IEEE80211_MAX_SSID_LEN];
        u8 ssid_len;
        bool wmm, uapsd;
        bool need_beacon;
        bool synced;
        bool timeout_started;
        bool comeback; /* whether the AP has requested association comeback */
        bool s1g;
        bool spp_amsdu;

        unsigned int assoc_link_id;

        u8 fils_nonces[2 * FILS_NONCE_LEN];
        u8 fils_kek[FILS_MAX_KEK_LEN];
        size_t fils_kek_len;

        size_t ie_len;
        u8 *ie_pos; /* used to fill ie[] with link[].elems */
        u8 ie[];
};

struct ieee80211_sta_tx_tspec {
        /* timestamp of the first packet in the time slice */
        unsigned long time_slice_start;

        u32 admitted_time; /* in usecs, unlike over the air */
        u8 tsid;
        s8 up; /* signed to be able to invalidate with -1 during teardown */

        /* consumed TX time in microseconds in the time slice */
        u32 consumed_tx_time;
        enum {
                TX_TSPEC_ACTION_NONE = 0,
                TX_TSPEC_ACTION_DOWNGRADE,
                TX_TSPEC_ACTION_STOP_DOWNGRADE,
        } action;
        bool downgraded;
};

/* Advertised TID-to-link mapping info */
struct ieee80211_adv_ttlm_info {
        /* time in TUs at which the new mapping is established, or 0 if there is
         * no planned advertised TID-to-link mapping
         */
        u16 switch_time;
        u32 duration; /* duration of the planned T2L map in TUs */
        u16 map; /* map of usable links for all TIDs */
        bool active; /* whether the advertised mapping is active or not */
};

DECLARE_EWMA(beacon_signal, 4, 4)

struct ieee80211_if_managed {
        struct timer_list timer;
        struct timer_list conn_mon_timer;
        struct timer_list bcn_mon_timer;
        struct wiphy_work monitor_work;
        struct wiphy_work beacon_connection_loss_work;
        struct wiphy_work csa_connection_drop_work;

        unsigned long beacon_timeout;
        unsigned long probe_timeout;
        int probe_send_count;
        bool nullfunc_failed;
        u8 connection_loss:1,
           driver_disconnect:1,
           reconnect:1,
           associated:1;

        struct ieee80211_mgd_auth_data *auth_data;
        struct ieee80211_mgd_assoc_data *assoc_data;

        bool powersave; /* powersave requested for this iface */
        bool broken_ap; /* AP is broken -- turn off powersave */

        unsigned int flags;

        u16 mcast_seq_last;

        bool status_acked;
        bool status_received;
        __le16 status_fc;

        enum {
                IEEE80211_MFP_DISABLED,
                IEEE80211_MFP_OPTIONAL,
                IEEE80211_MFP_REQUIRED
        } mfp; /* management frame protection */

        /*
         * Bitmask of enabled u-apsd queues,
         * IEEE80211_WMM_IE_STA_QOSINFO_AC_BE & co. Needs a new association
         * to take effect.
         */
        unsigned int uapsd_queues;

        /*
         * Maximum number of buffered frames AP can deliver during a
         * service period, IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL or similar.
         * Needs a new association to take effect.
         */
        unsigned int uapsd_max_sp_len;

        u8 use_4addr;

        /*
         * State variables for keeping track of RSSI of the AP currently
         * connected to and informing driver when RSSI has gone
         * below/above a certain threshold.
         */
        int rssi_min_thold, rssi_max_thold;

        struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
        struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
        struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
        struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
        struct ieee80211_s1g_cap s1g_capa; /* configured S1G overrides */
        struct ieee80211_s1g_cap s1g_capa_mask; /* valid s1g_capa bits */

        /* TDLS support */
        u8 tdls_peer[ETH_ALEN] __aligned(2);
        struct wiphy_delayed_work tdls_peer_del_work;
        struct sk_buff *orig_teardown_skb; /* The original teardown skb */
        struct sk_buff *teardown_skb; /* A copy to send through the AP */
        spinlock_t teardown_lock; /* To lock changing teardown_skb */
        bool tdls_wider_bw_prohibited;

        /* WMM-AC TSPEC support */
        struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS];
        /* Use a separate work struct so that we can do something here
         * while the sdata->work is flushing the queues, for example.
         * otherwise, in scenarios where we hardly get any traffic out
         * on the BE queue, but there's a lot of VO traffic, we might
         * get stuck in a downgraded situation and flush takes forever.
         */
        struct wiphy_delayed_work tx_tspec_wk;

        /* Information elements from the last transmitted (Re)Association
         * Request frame.
         */
        u8 *assoc_req_ies;
        size_t assoc_req_ies_len;

        struct wiphy_delayed_work ml_reconf_work;
        u16 removed_links;

        /* TID-to-link mapping support */
        struct wiphy_delayed_work ttlm_work;
        struct ieee80211_adv_ttlm_info ttlm_info;

        /* dialog token enumerator for neg TTLM request */
        u8 dialog_token_alloc;
        struct wiphy_delayed_work neg_ttlm_timeout_work;
};

struct ieee80211_if_ibss {
        struct timer_list timer;
        struct wiphy_work csa_connection_drop_work;

        unsigned long last_scan_completed;

        u32 basic_rates;

        bool fixed_bssid;
        bool fixed_channel;
        bool privacy;

        bool control_port;
        bool userspace_handles_dfs;

        u8 bssid[ETH_ALEN] __aligned(2);
        u8 ssid[IEEE80211_MAX_SSID_LEN];
        u8 ssid_len, ie_len;
        u8 *ie;
        struct cfg80211_chan_def chandef;

        unsigned long ibss_join_req;
        /* probe response/beacon for IBSS */
        struct beacon_data __rcu *presp;

        struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
        struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */

        spinlock_t incomplete_lock;
        struct list_head incomplete_stations;

        enum {
                IEEE80211_IBSS_MLME_SEARCH,
                IEEE80211_IBSS_MLME_JOINED,
        } state;
};

/**
 * struct ieee80211_if_ocb - OCB mode state
 *
 * @housekeeping_timer: timer for periodic invocation of a housekeeping task
 * @wrkq_flags: OCB deferred task action
 * @incomplete_lock: delayed STA insertion lock
 * @incomplete_stations: list of STAs waiting for delayed insertion
 * @joined: indication if the interface is connected to an OCB network
 */
struct ieee80211_if_ocb {
        struct timer_list housekeeping_timer;
        unsigned long wrkq_flags;

        spinlock_t incomplete_lock;
        struct list_head incomplete_stations;

        bool joined;
};

/**
 * struct ieee80211_mesh_sync_ops - Extensible synchronization framework interface
 *
 * these declarations define the interface, which enables
 * vendor-specific mesh synchronization
 *
 * @rx_bcn_presp: beacon/probe response was received
 * @adjust_tsf: TSF adjustment method
 */
struct ieee80211_mesh_sync_ops {
        void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype,
                             struct ieee80211_mgmt *mgmt, unsigned int len,
                             const struct ieee80211_meshconf_ie *mesh_cfg,
                             struct ieee80211_rx_status *rx_status);

        /* should be called with beacon_data under RCU read lock */
        void (*adjust_tsf)(struct ieee80211_sub_if_data *sdata,
                           struct beacon_data *beacon);
        /* add other framework functions here */
};

struct mesh_csa_settings {
        struct rcu_head rcu_head;
        struct cfg80211_csa_settings settings;
};

/**
 * struct mesh_table
 *
 * @known_gates: list of known mesh gates and their mpaths by the station. The
 * gate's mpath may or may not be resolved and active.
 * @gates_lock: protects updates to known_gates
 * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
 * @walk_head: linked list containing all mesh_path objects
 * @walk_lock: lock protecting walk_head
 * @entries: number of entries in the table
 */
struct mesh_table {
        struct hlist_head known_gates;
        spinlock_t gates_lock;
        struct rhashtable rhead;
        struct hlist_head walk_head;
        spinlock_t walk_lock;
        atomic_t entries;                /* Up to MAX_MESH_NEIGHBOURS */
};

/**
 * struct mesh_tx_cache - mesh fast xmit header cache
 *
 * @rht: hash table containing struct ieee80211_mesh_fast_tx, using skb DA as key
 * @walk_head: linked list containing all ieee80211_mesh_fast_tx objects
 * @walk_lock: lock protecting walk_head and rht
 */
struct mesh_tx_cache {
        struct rhashtable rht;
        struct hlist_head walk_head;
        spinlock_t walk_lock;
};

struct ieee80211_if_mesh {
        struct timer_list housekeeping_timer;
        struct timer_list mesh_path_timer;
        struct timer_list mesh_path_root_timer;

        unsigned long wrkq_flags;
        unsigned long mbss_changed[64 / BITS_PER_LONG];

        bool userspace_handles_dfs;

        u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
        size_t mesh_id_len;
        /* Active Path Selection Protocol Identifier */
        u8 mesh_pp_id;
        /* Active Path Selection Metric Identifier */
        u8 mesh_pm_id;
        /* Congestion Control Mode Identifier */
        u8 mesh_cc_id;
        /* Synchronization Protocol Identifier */
        u8 mesh_sp_id;
        /* Authentication Protocol Identifier */
        u8 mesh_auth_id;
        /* Local mesh Sequence Number */
        u32 sn;
        /* Last used PREQ ID */
        u32 preq_id;
        atomic_t mpaths;
        /* Timestamp of last SN update */
        unsigned long last_sn_update;
        /* Time when it's ok to send next PERR */
        unsigned long next_perr;
        /* Timestamp of last PREQ sent */
        unsigned long last_preq;
        struct mesh_rmc *rmc;
        spinlock_t mesh_preq_queue_lock;
        struct mesh_preq_queue preq_queue;
        int preq_queue_len;
        struct mesh_stats mshstats;
        struct mesh_config mshcfg;
        atomic_t estab_plinks;
        atomic_t mesh_seqnum;
        bool accepting_plinks;
        int num_gates;
        struct beacon_data __rcu *beacon;
        const u8 *ie;
        u8 ie_len;
        enum {
                IEEE80211_MESH_SEC_NONE = 0x0,
                IEEE80211_MESH_SEC_AUTHED = 0x1,
                IEEE80211_MESH_SEC_SECURED = 0x2,
        } security;
        bool user_mpm;
        /* Extensible Synchronization Framework */
        const struct ieee80211_mesh_sync_ops *sync_ops;
        s64 sync_offset_clockdrift_max;
        spinlock_t sync_offset_lock;
        /* mesh power save */
        enum nl80211_mesh_power_mode nonpeer_pm;
        int ps_peers_light_sleep;
        int ps_peers_deep_sleep;
        struct ps_data ps;
        /* Channel Switching Support */
        struct mesh_csa_settings __rcu *csa;
        enum {
                IEEE80211_MESH_CSA_ROLE_NONE,
                IEEE80211_MESH_CSA_ROLE_INIT,
                IEEE80211_MESH_CSA_ROLE_REPEATER,
        } csa_role;
        u8 chsw_ttl;
        u16 pre_value;

        /* offset from skb->data while building IE */
        int meshconf_offset;

        struct mesh_table mesh_paths;
        struct mesh_table mpp_paths; /* Store paths for MPP&MAP */
        int mesh_paths_generation;
        int mpp_paths_generation;
        struct mesh_tx_cache tx_cache;
};

#ifdef CONFIG_MAC80211_MESH
#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name)        \
        do { (msh)->mshstats.name++; } while (0)
#else
#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \
        do { } while (0)
#endif

/**
 * enum ieee80211_sub_if_data_flags - virtual interface flags
 *
 * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
 * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
 *        associated stations and deliver multicast frames both
 *        back to wireless media and to the local net stack.
 * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume.
 * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver
 * @IEEE80211_SDATA_DISCONNECT_HW_RESTART: Disconnect after hardware restart
 *  recovery
 */
enum ieee80211_sub_if_data_flags {
        IEEE80211_SDATA_ALLMULTI                = BIT(0),
        IEEE80211_SDATA_DONT_BRIDGE_PACKETS        = BIT(3),
        IEEE80211_SDATA_DISCONNECT_RESUME        = BIT(4),
        IEEE80211_SDATA_IN_DRIVER                = BIT(5),
        IEEE80211_SDATA_DISCONNECT_HW_RESTART        = BIT(6),
};

/**
 * enum ieee80211_sdata_state_bits - virtual interface state bits
 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
 *        mirrors netif_running() but is separate for interface type
 *        change handling while the interface is up
 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
 *        mode, so queues are stopped
 * @SDATA_STATE_OFFCHANNEL_BEACON_STOPPED: Beaconing was stopped due
 *        to offchannel, reset when offchannel returns
 */
enum ieee80211_sdata_state_bits {
        SDATA_STATE_RUNNING,
        SDATA_STATE_OFFCHANNEL,
        SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
};

/**
 * enum ieee80211_chanctx_mode - channel context configuration mode
 *
 * @IEEE80211_CHANCTX_SHARED: channel context may be used by
 *        multiple interfaces
 * @IEEE80211_CHANCTX_EXCLUSIVE: channel context can be used
 *        only by a single interface. This can be used for example for
 *        non-fixed channel IBSS.
 */
enum ieee80211_chanctx_mode {
        IEEE80211_CHANCTX_SHARED,
        IEEE80211_CHANCTX_EXCLUSIVE
};

/**
 * enum ieee80211_chanctx_replace_state - channel context replacement state
 *
 * This is used for channel context in-place reservations that require channel
 * context switch/swap.
 *
 * @IEEE80211_CHANCTX_REPLACE_NONE: no replacement is taking place
 * @IEEE80211_CHANCTX_WILL_BE_REPLACED: this channel context will be replaced
 *        by a (not yet registered) channel context pointed by %replace_ctx.
 * @IEEE80211_CHANCTX_REPLACES_OTHER: this (not yet registered) channel context
 *        replaces an existing channel context pointed to by %replace_ctx.
 */
enum ieee80211_chanctx_replace_state {
        IEEE80211_CHANCTX_REPLACE_NONE,
        IEEE80211_CHANCTX_WILL_BE_REPLACED,
        IEEE80211_CHANCTX_REPLACES_OTHER,
};

struct ieee80211_chanctx {
        struct list_head list;
        struct rcu_head rcu_head;

        struct list_head assigned_links;
        struct list_head reserved_links;

        enum ieee80211_chanctx_replace_state replace_state;
        struct ieee80211_chanctx *replace_ctx;

        enum ieee80211_chanctx_mode mode;
        bool driver_present;

        /* temporary data for search algorithm etc. */
        struct ieee80211_chan_req req;

        struct ieee80211_chanctx_conf conf;
};

struct mac80211_qos_map {
        struct cfg80211_qos_map qos_map;
        struct rcu_head rcu_head;
};

enum txq_info_flags {
        IEEE80211_TXQ_STOP,
        IEEE80211_TXQ_AMPDU,
        IEEE80211_TXQ_NO_AMSDU,
        IEEE80211_TXQ_DIRTY,
};

/**
 * struct txq_info - per tid queue
 *
 * @tin: contains packets split into multiple flows
 * @def_cvars: codel vars for the @tin's default_flow
 * @cstats: code statistics for this queue
 * @frags: used to keep fragments created after dequeue
 * @schedule_order: used with ieee80211_local->active_txqs
 * @schedule_round: counter to prevent infinite loops on TXQ scheduling
 * @flags: TXQ flags from &enum txq_info_flags
 * @txq: the driver visible part
 */
struct txq_info {
        struct fq_tin tin;
        struct codel_vars def_cvars;
        struct codel_stats cstats;

        u16 schedule_round;
        struct list_head schedule_order;

        struct sk_buff_head frags;

        unsigned long flags;

        /* keep last! */
        struct ieee80211_txq txq;
};

struct ieee80211_if_mntr {
        u32 flags;
        u8 mu_follow_addr[ETH_ALEN] __aligned(2);

        struct list_head list;
};

/**
 * struct ieee80211_if_nan - NAN state
 *
 * @conf: current NAN configuration
 * @func_lock: lock for @func_inst_ids
 * @function_inst_ids: a bitmap of available instance_id's
 */
struct ieee80211_if_nan {
        struct cfg80211_nan_conf conf;

        /* protects function_inst_ids */
        spinlock_t func_lock;
        struct idr function_inst_ids;
};

struct ieee80211_link_data_managed {
        u8 bssid[ETH_ALEN] __aligned(2);

        u8 dtim_period;
        enum ieee80211_smps_mode req_smps, /* requested smps mode */
                                 driver_smps_mode; /* smps mode request */

        struct ieee80211_conn_settings conn;

        s16 p2p_noa_index;

        bool tdls_chan_switch_prohibited;

        bool have_beacon;
        bool tracking_signal_avg;
        bool disable_wmm_tracking;
        bool operating_11g_mode;

        bool csa_waiting_bcn;
        bool csa_ignored_same_chan;
        struct wiphy_delayed_work chswitch_work;

        struct wiphy_work request_smps_work;
        /* used to reconfigure hardware SM PS */
        struct wiphy_work recalc_smps;

        bool beacon_crc_valid;
        u32 beacon_crc;
        struct ewma_beacon_signal ave_beacon_signal;
        int last_ave_beacon_signal;

        /*
         * Number of Beacon frames used in ave_beacon_signal. This can be used
         * to avoid generating less reliable cqm events that would be based
         * only on couple of received frames.
         */
        unsigned int count_beacon_signal;

        /* Number of times beacon loss was invoked. */
        unsigned int beacon_loss_count;

        /*
         * Last Beacon frame signal strength average (ave_beacon_signal / 16)
         * that triggered a cqm event. 0 indicates that no event has been
         * generated for the current association.
         */
        int last_cqm_event_signal;

        int wmm_last_param_set;
        int mu_edca_last_param_set;

        u8 bss_param_ch_cnt;
};

struct ieee80211_link_data_ap {
        struct beacon_data __rcu *beacon;
        struct probe_resp __rcu *probe_resp;
        struct fils_discovery_data __rcu *fils_discovery;
        struct unsol_bcast_probe_resp_data __rcu *unsol_bcast_probe_resp;

        /* to be used after channel switch. */
        struct cfg80211_beacon_data *next_beacon;
};

struct ieee80211_link_data {
        struct ieee80211_sub_if_data *sdata;
        unsigned int link_id;

        struct list_head assigned_chanctx_list; /* protected by wiphy mutex */
        struct list_head reserved_chanctx_list; /* protected by wiphy mutex */

        /* multicast keys only */
        struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
                                        NUM_DEFAULT_MGMT_KEYS +
                                        NUM_DEFAULT_BEACON_KEYS];
        struct ieee80211_key __rcu *default_multicast_key;
        struct ieee80211_key __rcu *default_mgmt_key;
        struct ieee80211_key __rcu *default_beacon_key;

        struct wiphy_work csa_finalize_work;

        bool operating_11g_mode;

        struct ieee80211_chan_req csa_chanreq;

        struct wiphy_work color_change_finalize_work;
        struct delayed_work color_collision_detect_work;
        u64 color_bitmap;

        /* context reservation -- protected with wiphy mutex */
        struct ieee80211_chanctx *reserved_chanctx;
        struct ieee80211_chan_req reserved;
        bool reserved_radar_required;
        bool reserved_ready;

        u8 needed_rx_chains;
        enum ieee80211_smps_mode smps_mode;

        int user_power_level; /* in dBm */
        int ap_power_level; /* in dBm */

        bool radar_required;
        struct wiphy_delayed_work dfs_cac_timer_work;

        union {
                struct ieee80211_link_data_managed mgd;
                struct ieee80211_link_data_ap ap;
        } u;

        struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];

        struct ieee80211_bss_conf *conf;

#ifdef CONFIG_MAC80211_DEBUGFS
        struct dentry *debugfs_dir;
#endif
};

struct ieee80211_sub_if_data {
        struct list_head list;

        struct wireless_dev wdev;

        /* keys */
        struct list_head key_list;

        /* count for keys needing tailroom space allocation */
        int crypto_tx_tailroom_needed_cnt;
        int crypto_tx_tailroom_pending_dec;
        struct wiphy_delayed_work dec_tailroom_needed_wk;

        struct net_device *dev;
        struct ieee80211_local *local;

        unsigned int flags;

        unsigned long state;

        bool csa_blocked_tx;

        char name[IFNAMSIZ];

        struct ieee80211_fragment_cache frags;

        /* TID bitmap for NoAck policy */
        u16 noack_map;

        /* bit field of ACM bits (BIT(802.1D tag)) */
        u8 wmm_acm;

        struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS];
        struct ieee80211_key __rcu *default_unicast_key;

        u16 sequence_number;
        u16 mld_mcast_seq;
        __be16 control_port_protocol;
        bool control_port_no_encrypt;
        bool control_port_no_preauth;
        bool control_port_over_nl80211;

        atomic_t num_tx_queued;
        struct mac80211_qos_map __rcu *qos_map;

        struct wiphy_work work;
        struct sk_buff_head skb_queue;
        struct sk_buff_head status_queue;

        /*
         * AP this belongs to: self in AP mode and
         * corresponding AP in VLAN mode, NULL for
         * all others (might be needed later in IBSS)
         */
        struct ieee80211_if_ap *bss;

        /* bitmap of allowed (non-MCS) rate indexes for rate control */
        u32 rc_rateidx_mask[NUM_NL80211_BANDS];

        bool rc_has_mcs_mask[NUM_NL80211_BANDS];
        u8  rc_rateidx_mcs_mask[NUM_NL80211_BANDS][IEEE80211_HT_MCS_MASK_LEN];

        bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS];
        u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX];

        /* Beacon frame (non-MCS) rate (as a bitmap) */
        u32 beacon_rateidx_mask[NUM_NL80211_BANDS];
        bool beacon_rate_set;

        union {
                struct ieee80211_if_ap ap;
                struct ieee80211_if_vlan vlan;
                struct ieee80211_if_managed mgd;
                struct ieee80211_if_ibss ibss;
                struct ieee80211_if_mesh mesh;
                struct ieee80211_if_ocb ocb;
                struct ieee80211_if_mntr mntr;
                struct ieee80211_if_nan nan;
        } u;

        struct ieee80211_link_data deflink;
        struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];

        /* for ieee80211_set_active_links_async() */
        struct wiphy_work activate_links_work;
        u16 desired_active_links;

#ifdef CONFIG_MAC80211_DEBUGFS
        struct {
                struct dentry *subdir_stations;
                struct dentry *default_unicast_key;
                struct dentry *default_multicast_key;
                struct dentry *default_mgmt_key;
                struct dentry *default_beacon_key;
        } debugfs;
#endif

        /* must be last, dynamically sized area in this! */
        struct ieee80211_vif vif;
};

static inline
struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
{
        return container_of(p, struct ieee80211_sub_if_data, vif);
}

#define sdata_dereference(p, sdata) \
        wiphy_dereference(sdata->local->hw.wiphy, p)

#define for_each_sdata_link(_local, _link)                                \
        /* outer loop just to define the variables ... */                \
        for (struct ieee80211_sub_if_data *___sdata = NULL;                \
             !___sdata;                                                        \
             ___sdata = (void *)~0 /* always stop */)                        \
        list_for_each_entry(___sdata, &(_local)->interfaces, list)        \
        if (ieee80211_sdata_running(___sdata))                                \
        for (int ___link_id = 0;                                        \
             ___link_id < ARRAY_SIZE(___sdata->link);                        \
             ___link_id++)                                                \
        if ((_link = wiphy_dereference((local)->hw.wiphy,                \
                                       ___sdata->link[___link_id])))

static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
                                struct cfg80211_rnr_elems *rnr_elems,
                                u8 i)
{
        int len = 0;

        if (!elems || !elems->cnt || i > elems->cnt)
                return 0;

        if (i < elems->cnt) {
                len = elems->elem[i].len;
                if (rnr_elems) {
                        len += rnr_elems->elem[i].len;
                        for (i = elems->cnt; i < rnr_elems->cnt; i++)
                                len += rnr_elems->elem[i].len;
                }
                return len;
        }

        /* i == elems->cnt, calculate total length of all MBSSID elements */
        for (i = 0; i < elems->cnt; i++)
                len += elems->elem[i].len;

        if (rnr_elems) {
                for (i = 0; i < rnr_elems->cnt; i++)
                        len += rnr_elems->elem[i].len;
        }

        return len;
}

enum {
        IEEE80211_RX_MSG        = 1,
        IEEE80211_TX_STATUS_MSG        = 2,
};

enum queue_stop_reason {
        IEEE80211_QUEUE_STOP_REASON_DRIVER,
        IEEE80211_QUEUE_STOP_REASON_PS,
        IEEE80211_QUEUE_STOP_REASON_CSA,
        IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
        IEEE80211_QUEUE_STOP_REASON_SUSPEND,
        IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
        IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
        IEEE80211_QUEUE_STOP_REASON_FLUSH,
        IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
        IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
        IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,

        IEEE80211_QUEUE_STOP_REASONS,
};

#ifdef CONFIG_MAC80211_LEDS
struct tpt_led_trigger {
        char name[32];
        const struct ieee80211_tpt_blink *blink_table;
        unsigned int blink_table_len;
        struct timer_list timer;
        struct ieee80211_local *local;
        unsigned long prev_traffic;
        unsigned long tx_bytes, rx_bytes;
        unsigned int active, want;
        bool running;
};
#endif

/**
 * enum mac80211_scan_flags - currently active scan mode
 *
 * @SCAN_SW_SCANNING: We're currently in the process of scanning but may as
 *        well be on the operating channel
 * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to
 *        determine if we are on the operating channel or not
 * @SCAN_ONCHANNEL_SCANNING:  Do a software scan on only the current operating
 *        channel. This should not interrupt normal traffic.
 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
 *        that the scan completed.
 * @SCAN_ABORTED: Set for our scan work function when the driver reported
 *        a scan complete for an aborted scan.
 * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
 *        cancelled.
 * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
 *        and could send a probe request after receiving a beacon.
 * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
 */
enum mac80211_scan_flags {
        SCAN_SW_SCANNING,
        SCAN_HW_SCANNING,
        SCAN_ONCHANNEL_SCANNING,
        SCAN_COMPLETED,
        SCAN_ABORTED,
        SCAN_HW_CANCELLED,
        SCAN_BEACON_WAIT,
        SCAN_BEACON_DONE,
};

/**
 * enum mac80211_scan_state - scan state machine states
 *
 * @SCAN_DECISION: Main entry point to the scan state machine, this state
 *        determines if we should keep on scanning or switch back to the
 *        operating channel
 * @SCAN_SET_CHANNEL: Set the next channel to be scanned
 * @SCAN_SEND_PROBE: Send probe requests and wait for probe responses
 * @SCAN_SUSPEND: Suspend the scan and go back to operating channel to
 *        send out data
 * @SCAN_RESUME: Resume the scan and scan the next channel
 * @SCAN_ABORT: Abort the scan and go back to operating channel
 */
enum mac80211_scan_state {
        SCAN_DECISION,
        SCAN_SET_CHANNEL,
        SCAN_SEND_PROBE,
        SCAN_SUSPEND,
        SCAN_RESUME,
        SCAN_ABORT,
};

DECLARE_STATIC_KEY_FALSE(aql_disable);

struct ieee80211_local {
        /* embed the driver visible part.
         * don't cast (use the static inlines below), but we keep
         * it first anyway so they become a no-op */
        struct ieee80211_hw hw;

        struct fq fq;
        struct codel_vars *cvars;
        struct codel_params cparams;

        /* protects active_txqs and txqi->schedule_order */
        spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
        struct list_head active_txqs[IEEE80211_NUM_ACS];
        u16 schedule_round[IEEE80211_NUM_ACS];

        /* serializes ieee80211_handle_wake_tx_queue */
        spinlock_t handle_wake_tx_queue_lock;

        u16 airtime_flags;
        u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
        u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
        u32 aql_threshold;
        atomic_t aql_total_pending_airtime;
        atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];

        const struct ieee80211_ops *ops;

        /*
         * private workqueue to mac80211. mac80211 makes this accessible
         * via ieee80211_queue_work()
         */
        struct workqueue_struct *workqueue;

        unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES];
        int q_stop_reasons[IEEE80211_MAX_QUEUES][IEEE80211_QUEUE_STOP_REASONS];
        /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
        spinlock_t queue_stop_reason_lock;

        int open_count;
        int monitors, cooked_mntrs;
        /* number of interfaces with corresponding FIF_ flags */
        int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
            fif_probe_req;
        bool probe_req_reg;
        bool rx_mcast_action_reg;
        unsigned int filter_flags; /* FIF_* */

        bool wiphy_ciphers_allocated;

        struct cfg80211_chan_def dflt_chandef;
        bool emulate_chanctx;

        /* protects the aggregated multicast list and filter calls */
        spinlock_t filter_lock;

        /* used for uploading changed mc list */
        struct wiphy_work reconfig_filter;

        /* aggregated multicast list */
        struct netdev_hw_addr_list mc_list;

        bool tim_in_locked_section; /* see ieee80211_beacon_get() */

        /*
         * suspended is true if we finished all the suspend _and_ we have
         * not yet come up from resume. This is to be used by mac80211
         * to ensure driver sanity during suspend and mac80211's own
         * sanity. It can eventually be used for WoW as well.
         */
        bool suspended;

        /* suspending is true during the whole suspend process */
        bool suspending;

        /*
         * Resuming is true while suspended, but when we're reprogramming the
         * hardware -- at that time it's allowed to use ieee80211_queue_work()
         * again even though some other parts of the stack are still suspended
         * and we still drop received frames to avoid waking the stack.
         */
        bool resuming;

        /*
         * quiescing is true during the suspend process _only_ to
         * ease timer cancelling etc.
         */
        bool quiescing;

        /* device is started */
        bool started;

        /* device is during a HW reconfig */
        bool in_reconfig;

        /* reconfiguration failed ... suppress some warnings etc. */
        bool reconfig_failure;

        /* wowlan is enabled -- don't reconfig on resume */
        bool wowlan;

        struct wiphy_work radar_detected_work;

        /* number of RX chains the hardware has */
        u8 rx_chains;

        /* bitmap of which sbands were copied */
        u8 sband_allocated;

        int tx_headroom; /* required headroom for hardware/radiotap */

        /* Tasklet and skb queue to process calls from IRQ mode. All frames
         * added to skb_queue will be processed, but frames in
         * skb_queue_unreliable may be dropped if the total length of these
         * queues increases over the limit. */
#define IEEE80211_IRQSAFE_QUEUE_LIMIT 128
        struct tasklet_struct tasklet;
        struct sk_buff_head skb_queue;
        struct sk_buff_head skb_queue_unreliable;

        spinlock_t rx_path_lock;

        /* Station data */
        /*
         * The list, hash table and counter are protected
         * by the wiphy mutex, reads are done with RCU.
         */
        spinlock_t tim_lock;
        unsigned long num_sta;
        struct list_head sta_list;
        struct rhltable sta_hash;
        struct rhltable link_sta_hash;
        struct timer_list sta_cleanup;
        int sta_generation;

        struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
        struct tasklet_struct tx_pending_tasklet;
        struct tasklet_struct wake_txqs_tasklet;

        atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];

        /* number of interfaces with allmulti RX */
        atomic_t iff_allmultis;

        struct rate_control_ref *rate_ctrl;

        struct arc4_ctx wep_tx_ctx;
        struct arc4_ctx wep_rx_ctx;
        u32 wep_iv;

        /* see iface.c */
        struct list_head interfaces;
        struct list_head mon_list; /* only that are IFF_UP && !cooked */
        struct mutex iflist_mtx;

        /* Scanning and BSS list */
        unsigned long scanning;
        struct cfg80211_ssid scan_ssid;
        struct cfg80211_scan_request *int_scan_req;
        struct cfg80211_scan_request __rcu *scan_req;
        struct ieee80211_scan_request *hw_scan_req;
        struct cfg80211_chan_def scan_chandef;
        enum nl80211_band hw_scan_band;
        int scan_channel_idx;
        int scan_ies_len;
        int hw_scan_ies_bufsize;
        struct cfg80211_scan_info scan_info;

        struct wiphy_work sched_scan_stopped_work;
        struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
        struct cfg80211_sched_scan_request __rcu *sched_scan_req;
        u8 scan_addr[ETH_ALEN];

        unsigned long leave_oper_channel_time;
        enum mac80211_scan_state next_scan_state;
        struct wiphy_delayed_work scan_work;
        struct ieee80211_sub_if_data __rcu *scan_sdata;

        /* Temporary remain-on-channel for off-channel operations */
        struct ieee80211_channel *tmp_channel;

        /* channel contexts */
        struct list_head chanctx_list;

#ifdef CONFIG_MAC80211_LEDS
        struct led_trigger tx_led, rx_led, assoc_led, radio_led;
        struct led_trigger tpt_led;
        atomic_t tx_led_active, rx_led_active, assoc_led_active;
        atomic_t radio_led_active, tpt_led_active;
        struct tpt_led_trigger *tpt_led_trigger;
#endif

#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
        /* SNMP counters */
        /* dot11CountersTable */
        u32 dot11TransmittedFragmentCount;
        u32 dot11MulticastTransmittedFrameCount;
        u32 dot11FailedCount;
        u32 dot11RetryCount;
        u32 dot11MultipleRetryCount;
        u32 dot11FrameDuplicateCount;
        u32 dot11ReceivedFragmentCount;
        u32 dot11MulticastReceivedFrameCount;
        u32 dot11TransmittedFrameCount;

        /* TX/RX handler statistics */
        unsigned int tx_handlers_drop;
        unsigned int tx_handlers_queued;
        unsigned int tx_handlers_drop_wep;
        unsigned int tx_handlers_drop_not_assoc;
        unsigned int tx_handlers_drop_unauth_port;
        unsigned int rx_handlers_drop;
        unsigned int rx_handlers_queued;
        unsigned int rx_handlers_drop_nullfunc;
        unsigned int rx_handlers_drop_defrag;
        unsigned int tx_expand_skb_head;
        unsigned int tx_expand_skb_head_cloned;
        unsigned int rx_expand_skb_head_defrag;
        unsigned int rx_handlers_fragments;
        unsigned int tx_status_drop;
#define I802_DEBUG_INC(c) (c)++
#else /* CONFIG_MAC80211_DEBUG_COUNTERS */
#define I802_DEBUG_INC(c) do { } while (0)
#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */


        int total_ps_buffered; /* total number of all buffered unicast and
                                * multicast packets for power saving stations
                                */

        bool pspolling;
        /*
         * PS can only be enabled when we have exactly one managed
         * interface (and monitors) in PS, this then points there.
         */
        struct ieee80211_sub_if_data *ps_sdata;
        struct wiphy_work dynamic_ps_enable_work;
        struct wiphy_work dynamic_ps_disable_work;
        struct timer_list dynamic_ps_timer;
        struct notifier_block ifa_notifier;
        struct notifier_block ifa6_notifier;

        /*
         * The dynamic ps timeout configured from user space via WEXT -
         * this will override whatever chosen by mac80211 internally.
         */
        int dynamic_ps_forced_timeout;

        int user_power_level; /* in dBm, for all interfaces */

        struct work_struct restart_work;

#ifdef CONFIG_MAC80211_DEBUGFS
        struct local_debugfsdentries {
                struct dentry *rcdir;
                struct dentry *keys;
        } debugfs;
        bool force_tx_status;
#endif

        /*
         * Remain-on-channel support
         */
        struct wiphy_delayed_work roc_work;
        struct list_head roc_list;
        struct wiphy_work hw_roc_start, hw_roc_done;
        unsigned long hw_roc_start_time;
        u64 roc_cookie_counter;

        struct idr ack_status_frames;
        spinlock_t ack_status_lock;

        struct ieee80211_sub_if_data __rcu *p2p_sdata;

        /* virtual monitor interface */
        struct ieee80211_sub_if_data __rcu *monitor_sdata;
        struct ieee80211_chan_req monitor_chanreq;

        /* extended capabilities provided by mac80211 */
        u8 ext_capa[8];

        bool wbrf_supported;
};

static inline struct ieee80211_sub_if_data *
IEEE80211_DEV_TO_SUB_IF(const struct net_device *dev)
{
        return netdev_priv(dev);
}

static inline struct ieee80211_sub_if_data *
IEEE80211_WDEV_TO_SUB_IF(struct wireless_dev *wdev)
{
        return container_of(wdev, struct ieee80211_sub_if_data, wdev);
}

static inline struct ieee80211_supported_band *
ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_chanctx_conf *chanctx_conf;
        enum nl80211_band band;

        WARN_ON(ieee80211_vif_is_mld(&sdata->vif));

        rcu_read_lock();
        chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);

        if (!chanctx_conf) {
                rcu_read_unlock();
                return NULL;
        }

        band = chanctx_conf->def.chan->band;
        rcu_read_unlock();

        return local->hw.wiphy->bands[band];
}

static inline struct ieee80211_supported_band *
ieee80211_get_link_sband(struct ieee80211_link_data *link)
{
        struct ieee80211_local *local = link->sdata->local;
        struct ieee80211_chanctx_conf *chanctx_conf;
        enum nl80211_band band;

        rcu_read_lock();
        chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
        if (!chanctx_conf) {
                rcu_read_unlock();
                return NULL;
        }

        band = chanctx_conf->def.chan->band;
        rcu_read_unlock();

        return local->hw.wiphy->bands[band];
}

/* this struct holds the value parsing from channel switch IE  */
struct ieee80211_csa_ie {
        struct ieee80211_chan_req chanreq;
        u8 mode;
        u8 count;
        u8 ttl;
        u16 pre_value;
        u16 reason_code;
        u32 max_switch_time;
};

enum ieee80211_elems_parse_error {
        IEEE80211_PARSE_ERR_INVALID_END                = BIT(0),
        IEEE80211_PARSE_ERR_DUP_ELEM                = BIT(1),
        IEEE80211_PARSE_ERR_BAD_ELEM_SIZE        = BIT(2),
        IEEE80211_PARSE_ERR_UNEXPECTED_ELEM        = BIT(3),
        IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC        = BIT(4),
};

/* Parsed Information Elements */
struct ieee802_11_elems {
        const u8 *ie_start;
        size_t total_len;
        u32 crc;

        /* pointers to IEs */
        const struct ieee80211_tdls_lnkie *lnk_id;
        const struct ieee80211_ch_switch_timing *ch_sw_timing;
        const u8 *ext_capab;
        const u8 *ssid;
        const u8 *supp_rates;
        const u8 *ds_params;
        const struct ieee80211_tim_ie *tim;
        const u8 *rsn;
        const u8 *rsnx;
        const u8 *erp_info;
        const u8 *ext_supp_rates;
        const u8 *wmm_info;
        const u8 *wmm_param;
        const struct ieee80211_ht_cap *ht_cap_elem;
        const struct ieee80211_ht_operation *ht_operation;
        const struct ieee80211_vht_cap *vht_cap_elem;
        const struct ieee80211_vht_operation *vht_operation;
        const struct ieee80211_meshconf_ie *mesh_config;
        const u8 *he_cap;
        const struct ieee80211_he_operation *he_operation;
        const struct ieee80211_he_spr *he_spr;
        const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
        const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
        const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
        const u8 *uora_element;
        const u8 *mesh_id;
        const u8 *peering;
        const __le16 *awake_window;
        const u8 *preq;
        const u8 *prep;
        const u8 *perr;
        const struct ieee80211_rann_ie *rann;
        const struct ieee80211_channel_sw_ie *ch_switch_ie;
        const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
        const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
        const u8 *max_channel_switch_time;
        const u8 *country_elem;
        const u8 *pwr_constr_elem;
        const u8 *cisco_dtpc_elem;
        const struct ieee80211_timeout_interval_ie *timeout_int;
        const u8 *opmode_notif;
        const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
        struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
        const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
        const struct ieee80211_multiple_bssid_configuration *mbssid_config_ie;
        const struct ieee80211_bssid_index *bssid_index;
        u8 max_bssid_indicator;
        u8 dtim_count;
        u8 dtim_period;
        const struct ieee80211_addba_ext_ie *addba_ext_ie;
        const struct ieee80211_s1g_cap *s1g_capab;
        const struct ieee80211_s1g_oper_ie *s1g_oper;
        const struct ieee80211_s1g_bcn_compat_ie *s1g_bcn_compat;
        const struct ieee80211_aid_response_ie *aid_resp;
        const struct ieee80211_eht_cap_elem *eht_cap;
        const struct ieee80211_eht_operation *eht_operation;
        const struct ieee80211_multi_link_elem *ml_basic;
        const struct ieee80211_multi_link_elem *ml_reconf;
        const struct ieee80211_bandwidth_indication *bandwidth_indication;
        const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT];

        /* length of them, respectively */
        u8 ext_capab_len;
        u8 ssid_len;
        u8 supp_rates_len;
        u8 tim_len;
        u8 rsn_len;
        u8 rsnx_len;
        u8 ext_supp_rates_len;
        u8 wmm_info_len;
        u8 wmm_param_len;
        u8 he_cap_len;
        u8 mesh_id_len;
        u8 peering_len;
        u8 preq_len;
        u8 prep_len;
        u8 perr_len;
        u8 country_elem_len;
        u8 bssid_index_len;
        u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
        u8 tx_pwr_env_num;
        u8 eht_cap_len;

        /* mult-link element can be de-fragmented and thus u8 is not sufficient */
        size_t ml_basic_len;
        size_t ml_reconf_len;

        u8 ttlm_num;

        /*
         * store the per station profile pointer and length in case that the
         * parsing also handled Multi-Link element parsing for a specific link
         * ID.
         */
        struct ieee80211_mle_per_sta_profile *prof;
        size_t sta_prof_len;

        /* whether/which parse error occurred while retrieving these elements */
        u8 parse_error;
};

static inline struct ieee80211_local *hw_to_local(
        struct ieee80211_hw *hw)
{
        return container_of(hw, struct ieee80211_local, hw);
}

static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq)
{
        return container_of(txq, struct txq_info, txq);
}

static inline bool txq_has_queue(struct ieee80211_txq *txq)
{
        struct txq_info *txqi = to_txq_info(txq);

        return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
}

static inline bool
ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
{
        return status->flag & RX_FLAG_MACTIME;
}

void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata);

/* This function returns the number of multicast stations connected to this
 * interface. It returns -1 if that number is not tracked, that is for netdevs
 * not in AP or AP_VLAN mode or when using 4addr.
 */
static inline int
ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata)
{
        if (sdata->vif.type == NL80211_IFTYPE_AP)
                return atomic_read(&sdata->u.ap.num_mcast_sta);
        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
                return atomic_read(&sdata->u.vlan.num_mcast_sta);
        return -1;
}

u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
                                     struct ieee80211_rx_status *status,
                                     unsigned int mpdu_len,
                                     unsigned int mpdu_offset);
int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
int ieee80211_hw_conf_chan(struct ieee80211_local *local);
void ieee80211_hw_conf_init(struct ieee80211_local *local);
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
                                      u64 changed);
void ieee80211_vif_cfg_change_notify(struct ieee80211_sub_if_data *sdata,
                                     u64 changed);
void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
                                       struct ieee80211_link_data *link,
                                       u64 changed);
void ieee80211_configure_filter(struct ieee80211_local *local);
u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);

u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
                             u64 *cookie, gfp_t gfp);

void ieee80211_check_fast_rx(struct sta_info *sta);
void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_clear_fast_rx(struct sta_info *sta);

bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata,
                           const u8 *addr, int *out_link_id);

/* STA code */
void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
                       struct cfg80211_auth_request *req);
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
                        struct cfg80211_assoc_request *req);
int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
                         struct cfg80211_deauth_request *req);
int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
                           struct cfg80211_disassoc_request *req);
void ieee80211_send_pspoll(struct ieee80211_local *local,
                           struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_ps(struct ieee80211_local *local);
void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
                                  struct sk_buff *skb);
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
                                 struct sk_buff *skb);
void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata);
void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
                                  __le16 fc, bool acked);
void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
                                   u8 reason, bool tx);
void ieee80211_mgd_setup_link(struct ieee80211_link_data *link);
void ieee80211_mgd_stop_link(struct ieee80211_link_data *link);
void ieee80211_mgd_set_link_qos_params(struct ieee80211_link_data *link);

/* IBSS code */
void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
                              const u8 *bssid, const u8 *addr, u32 supp_rates);
int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
                        struct cfg80211_ibss_params *params);
int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
                                   struct sk_buff *skb);
int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
                              struct cfg80211_csa_settings *csa_settings,
                              u64 *changed);
int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata,
                              u64 *changed);
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata);

/* OCB code */
void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
                             const u8 *bssid, const u8 *addr, u32 supp_rates);
void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata);
int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
                       struct ocb_setup *setup);
int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata);

/* mesh code */
void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
                                   struct sk_buff *skb);
int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
                              struct cfg80211_csa_settings *csa_settings,
                              u64 *changed);
int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata,
                              u64 *changed);

/* scan/BSS handling */
void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work);
int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
                                const u8 *ssid, u8 ssid_len,
                                struct ieee80211_channel **channels,
                                unsigned int n_channels);
int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
                           struct cfg80211_scan_request *req);
void ieee80211_scan_cancel(struct ieee80211_local *local);
void ieee80211_run_deferred_scan(struct ieee80211_local *local);
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb);

void ieee80211_inform_bss(struct wiphy *wiphy, struct cfg80211_bss *bss,
                          const struct cfg80211_bss_ies *ies, void *data);

void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
struct ieee80211_bss *
ieee80211_bss_info_update(struct ieee80211_local *local,
                          struct ieee80211_rx_status *rx_status,
                          struct ieee80211_mgmt *mgmt,
                          size_t len,
                          struct ieee80211_channel *channel);
void ieee80211_rx_bss_put(struct ieee80211_local *local,
                          struct ieee80211_bss *bss);

/* scheduled scan handling */
int
__ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
                                     struct cfg80211_sched_scan_request *req);
int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
                                       struct cfg80211_sched_scan_request *req);
int ieee80211_request_sched_scan_stop(struct ieee80211_local *local);
void ieee80211_sched_scan_end(struct ieee80211_local *local);
void ieee80211_sched_scan_stopped_work(struct wiphy *wiphy,
                                       struct wiphy_work *work);

/* off-channel/mgmt-tx */
void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
void ieee80211_offchannel_return(struct ieee80211_local *local);
void ieee80211_roc_setup(struct ieee80211_local *local);
void ieee80211_start_next_roc(struct ieee80211_local *local);
void ieee80211_roc_purge(struct ieee80211_local *local,
                         struct ieee80211_sub_if_data *sdata);
int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
                                struct ieee80211_channel *chan,
                                unsigned int duration, u64 *cookie);
int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
                                       struct wireless_dev *wdev, u64 cookie);
int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
                      struct cfg80211_mgmt_tx_params *params, u64 *cookie);
int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
                                  struct wireless_dev *wdev, u64 cookie);

/* channel switch handling */
void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
                             struct cfg80211_csa_settings *params);

/* color change handling */
void ieee80211_color_change_finalize_work(struct wiphy *wiphy,
                                          struct wiphy_work *work);
void ieee80211_color_collision_detection_work(struct work_struct *work);

/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX        (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
                                         NETIF_F_HW_CSUM | NETIF_F_SG | \
                                         NETIF_F_HIGHDMA | NETIF_F_GSO_SOFTWARE | \
                                         NETIF_F_HW_TC)
#define MAC80211_SUPPORTED_FEATURES_RX        (NETIF_F_RXCSUM)
#define MAC80211_SUPPORTED_FEATURES        (MAC80211_SUPPORTED_FEATURES_TX | \
                                         MAC80211_SUPPORTED_FEATURES_RX)

int ieee80211_iface_init(void);
void ieee80211_iface_exit(void);
int ieee80211_if_add(struct ieee80211_local *local, const char *name,
                     unsigned char name_assign_type,
                     struct wireless_dev **new_wdev, enum nl80211_iftype type,
                     struct vif_params *params);
int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
                             enum nl80211_iftype type);
void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
void ieee80211_remove_interfaces(struct ieee80211_local *local);
u32 ieee80211_idle_off(struct ieee80211_local *local);
void ieee80211_recalc_idle(struct ieee80211_local *local);
void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
                                    const int offset);
int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);
void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
void ieee80211_del_virtual_monitor(struct ieee80211_local *local);

bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
                              bool update_bss);
void ieee80211_recalc_offload(struct ieee80211_local *local);

static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
{
        return test_bit(SDATA_STATE_RUNNING, &sdata->state);
}

/* link handling */
void ieee80211_link_setup(struct ieee80211_link_data *link);
void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
                         int link_id,
                         struct ieee80211_link_data *link,
                         struct ieee80211_bss_conf *link_conf);
void ieee80211_link_stop(struct ieee80211_link_data *link);
int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
                            u16 new_links, u16 dormant_links);
static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata)
{
        ieee80211_vif_set_links(sdata, 0, 0);
}

/* tx handling */
void ieee80211_clear_tx_pending(struct ieee80211_local *local);
void ieee80211_tx_pending(struct tasklet_struct *t);
netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
                                         struct net_device *dev);
netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
                                       struct net_device *dev);
netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
                                            struct net_device *dev);
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
                                  struct net_device *dev,
                                  u32 info_flags,
                                  u32 ctrl_flags,
                                  u64 *cookie);
void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
                              struct sk_buff_head *skbs);
struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
                              struct sk_buff *skb, u32 info_flags);
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
                          int retry_count, bool send_to_cooked,
                          struct ieee80211_tx_status *status);

void ieee80211_check_fast_xmit(struct sta_info *sta);
void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_clear_fast_xmit(struct sta_info *sta);
int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
                              const u8 *buf, size_t len,
                              const u8 *dest, __be16 proto, bool unencrypted,
                              int link_id, u64 *cookie);
int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
                              const u8 *buf, size_t len);
void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
                           struct sta_info *sta,
                           struct ieee80211_fast_tx *fast_tx,
                           struct sk_buff *skb, bool ampdu,
                           const u8 *da, const u8 *sa);
void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
                          struct sta_info *sta, struct sk_buff *skb);

/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
                                     struct ieee80211_sta_ht_cap *ht_cap);
bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
                                       struct ieee80211_supported_band *sband,
                                       const struct ieee80211_ht_cap *ht_cap_ie,
                                       struct link_sta_info *link_sta);
void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
                          const u8 *da, u16 tid,
                          u16 initiator, u16 reason_code);
int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
                               enum ieee80211_smps_mode smps, const u8 *da,
                               const u8 *bssid, int link_id);
bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old,
                                   enum ieee80211_smps_mode smps_mode_new);

void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
                                    u16 initiator, u16 reason, bool stop);
void __ieee80211_start_rx_ba_session(struct sta_info *sta,
                                     u8 dialog_token, u16 timeout,
                                     u16 start_seq_num, u16 ba_policy, u16 tid,
                                     u16 buf_size, bool tx, bool auto_seq,
                                     const struct ieee80211_addba_ext_ie *addbaext);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
                                         enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
                             struct sta_info *sta,
                             struct ieee80211_mgmt *mgmt, size_t len);
void ieee80211_process_addba_resp(struct ieee80211_local *local,
                                  struct sta_info *sta,
                                  struct ieee80211_mgmt *mgmt,
                                  size_t len);
void ieee80211_process_addba_request(struct ieee80211_local *local,
                                     struct sta_info *sta,
                                     struct ieee80211_mgmt *mgmt,
                                     size_t len);

int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
                                   enum ieee80211_agg_stop_reason reason);
void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
                              struct tid_ampdu_tx *tid_tx);
void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
                             struct tid_ampdu_tx *tid_tx);
void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work);
void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);

u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
enum nl80211_smps_mode
ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps);

/* VHT */
void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
                                    struct ieee80211_supported_band *sband,
                                    const struct ieee80211_vht_cap *vht_cap_ie,
                                    const struct ieee80211_vht_cap *vht_cap_ie2,
                                    struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta);
void ieee80211_sta_init_nss(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width
ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
                                 struct ieee80211_link_data *link,
                                 struct ieee80211_mgmt *mgmt);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
                                  struct link_sta_info *sta,
                                  u8 opmode, enum nl80211_band band);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
                                 struct link_sta_info *sta,
                                 u8 opmode, enum nl80211_band band);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
                                      struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
                                     u16 vht_mask[NL80211_VHT_NSS_MAX]);
enum nl80211_chan_width
ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *sta);

/* HE */
void
ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
                                  struct ieee80211_supported_band *sband,
                                  const u8 *he_cap_ie, u8 he_cap_len,
                                  const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
                                  struct link_sta_info *link_sta);
void
ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
                                const struct ieee80211_he_spr *he_spr_ie_elem);

void
ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
                        const struct ieee80211_he_operation *he_op_ie_elem);

/* S1G */
void ieee80211_s1g_sta_rate_init(struct sta_info *sta);
bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb);
void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
                                 struct sk_buff *skb);
void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
                                     struct sk_buff *skb);

/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
                                       struct ieee80211_mgmt *mgmt,
                                       size_t len);
/**
 * ieee80211_parse_ch_switch_ie - parses channel switch IEs
 * @sdata: the sdata of the interface which has received the frame
 * @elems: parsed 802.11 elements received with the frame
 * @current_band: indicates the current band
 * @vht_cap_info: VHT capabilities of the transmitter
 * @conn: contains information about own capabilities and restrictions
 *        to decide which channel switch announcements can be accepted
 * @bssid: the currently connected bssid (for reporting)
 * @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
 *        All of them will be filled with if success only.
 * Return: 0 on success, <0 on error and >0 if there is nothing to parse.
 */
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
                                 struct ieee802_11_elems *elems,
                                 enum nl80211_band current_band,
                                 u32 vht_cap_info,
                                 struct ieee80211_conn_settings *conn,
                                 u8 *bssid,
                                 struct ieee80211_csa_ie *csa_ie);

/* Suspend/resume and hw reconfiguration */
int ieee80211_reconfig(struct ieee80211_local *local);
void ieee80211_stop_device(struct ieee80211_local *local);

int __ieee80211_suspend(struct ieee80211_hw *hw,
                        struct cfg80211_wowlan *wowlan);

static inline int __ieee80211_resume(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) &&
             !test_bit(SCAN_COMPLETED, &local->scanning),
                "%s: resume with hardware scan still in progress\n",
                wiphy_name(hw->wiphy));

        return ieee80211_reconfig(hw_to_local(hw));
}

/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode);
enum ieee80211_conn_bw_limit
ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef);
int ieee80211_frame_duration(enum nl80211_band band, size_t len,
                             int rate, int erp, int short_preamble);
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
                                           struct ieee80211_tx_queue_params *qparam,
                                           int ac);
void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
                               bool bss_notify, bool enable_qos);
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
                    struct sta_info *sta, struct sk_buff *skb);

void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
                                 struct sk_buff *skb, int tid, int link_id,
                                 enum nl80211_band band);

/* sta_out needs to be checked for ERR_PTR() before using */
int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
                            struct sk_buff *skb,
                            struct sta_info **sta_out);

static inline void
ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
                          struct sk_buff *skb, int tid,
                          enum nl80211_band band)
{
        rcu_read_lock();
        __ieee80211_tx_skb_tid_band(sdata, skb, tid, -1, band);
        rcu_read_unlock();
}

void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
                          struct sk_buff *skb, int tid, int link_id);

static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
                                    struct sk_buff *skb)
{
        /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */
        ieee80211_tx_skb_tid(sdata, skb, 7, -1);
}

/**
 * struct ieee80211_elems_parse_params - element parsing parameters
 * @mode: connection mode for parsing
 * @start: pointer to the elements
 * @len: length of the elements
 * @action: %true if the elements came from an action frame
 * @filter: bitmap of element IDs to filter out while calculating
 *        the element CRC
 * @crc: CRC starting value
 * @bss: the BSS to parse this as, for multi-BSSID cases this can
 *        represent a non-transmitting BSS in which case the data
 *        for that non-transmitting BSS is returned
 * @link_id: the link ID to parse elements for, if a STA profile
 *        is present in the multi-link element, or -1 to ignore;
 *        note that the code currently assumes parsing an association
 *        (or re-association) response frame if this is given
 * @from_ap: frame is received from an AP (currently used only
 *        for EHT capabilities parsing)
 */
struct ieee80211_elems_parse_params {
        enum ieee80211_conn_mode mode;
        const u8 *start;
        size_t len;
        bool action;
        u64 filter;
        u32 crc;
        struct cfg80211_bss *bss;
        int link_id;
        bool from_ap;
};

struct ieee802_11_elems *
ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params);

static inline struct ieee802_11_elems *
ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                           u64 filter, u32 crc,
                           struct cfg80211_bss *bss)
{
        struct ieee80211_elems_parse_params params = {
                .mode = IEEE80211_CONN_MODE_HIGHEST,
                .start = start,
                .len = len,
                .action = action,
                .filter = filter,
                .crc = crc,
                .bss = bss,
                .link_id = -1,
        };

        return ieee802_11_parse_elems_full(&params);
}

static inline struct ieee802_11_elems *
ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
                       struct cfg80211_bss *bss)
{
        return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss);
}

extern const int ieee802_1d_to_ac[8];

static inline int ieee80211_ac_from_tid(int tid)
{
        return ieee802_1d_to_ac[tid & 7];
}

void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy,
                                      struct wiphy_work *work);
void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy,
                                       struct wiphy_work *work);
void ieee80211_dynamic_ps_timer(struct timer_list *t);
void ieee80211_send_nullfunc(struct ieee80211_local *local,
                             struct ieee80211_sub_if_data *sdata,
                             bool powersave);
void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
                                   struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
                             struct ieee80211_hdr *hdr, bool ack, u16 tx_time);

void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
                                     unsigned long queues,
                                     enum queue_stop_reason reason,
                                     bool refcounted);
void ieee80211_stop_vif_queues(struct ieee80211_local *local,
                               struct ieee80211_sub_if_data *sdata,
                               enum queue_stop_reason reason);
void ieee80211_wake_vif_queues(struct ieee80211_local *local,
                               struct ieee80211_sub_if_data *sdata,
                               enum queue_stop_reason reason);
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
                                     unsigned long queues,
                                     enum queue_stop_reason reason,
                                     bool refcounted);
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
                                    enum queue_stop_reason reason,
                                    bool refcounted);
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
                                    enum queue_stop_reason reason,
                                    bool refcounted);
void ieee80211_add_pending_skb(struct ieee80211_local *local,
                               struct sk_buff *skb);
void ieee80211_add_pending_skbs(struct ieee80211_local *local,
                                struct sk_buff_head *skbs);
void ieee80211_flush_queues(struct ieee80211_local *local,
                            struct ieee80211_sub_if_data *sdata, bool drop);
void __ieee80211_flush_queues(struct ieee80211_local *local,
                              struct ieee80211_sub_if_data *sdata,
                              unsigned int queues, bool drop);

static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
{
        /*
         * It's unsafe to try to do any work during reconfigure flow.
         * When the flow ends the work will be requeued.
         */
        if (local->in_reconfig)
                return false;

        /*
         * If quiescing is set, we are racing with __ieee80211_suspend.
         * __ieee80211_suspend flushes the workers after setting quiescing,
         * and we check quiescing / suspended before enqueing new workers.
         * We should abort the worker to avoid the races below.
         */
        if (local->quiescing)
                return false;

        /*
         * We might already be suspended if the following scenario occurs:
         * __ieee80211_suspend                Control path
         *
         *                                if (local->quiescing)
         *                                        return;
         * local->quiescing = true;
         * flush_workqueue();
         *                                queue_work(...);
         * local->suspended = true;
         * local->quiescing = false;
         *                                worker starts running...
         */
        if (local->suspended)
                return false;

        return true;
}

int ieee80211_txq_setup_flows(struct ieee80211_local *local);
void ieee80211_txq_set_params(struct ieee80211_local *local);
void ieee80211_txq_teardown_flows(struct ieee80211_local *local);
void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
                        struct sta_info *sta,
                        struct txq_info *txq, int tid);
void ieee80211_txq_purge(struct ieee80211_local *local,
                         struct txq_info *txqi);
void ieee80211_purge_sta_txqs(struct sta_info *sta);
void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
                               struct ieee80211_sub_if_data *sdata);
void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats,
                              struct txq_info *txqi);
void ieee80211_wake_txqs(struct tasklet_struct *t);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
                         u16 transaction, u16 auth_alg, u16 status,
                         const u8 *extra, size_t extra_len, const u8 *bssid,
                         const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
                         u32 tx_flags);
void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
                                    const u8 *da, const u8 *bssid,
                                    u16 stype, u16 reason,
                                    bool send_frame, u8 *frame_buf);

enum {
        IEEE80211_PROBE_FLAG_DIRECTED                = BIT(0),
        IEEE80211_PROBE_FLAG_MIN_CONTENT        = BIT(1),
        IEEE80211_PROBE_FLAG_RANDOM_SN                = BIT(2),
};

int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
                             size_t buffer_len,
                             struct ieee80211_scan_ies *ie_desc,
                             const u8 *ie, size_t ie_len,
                             u8 bands_used, u32 *rate_masks,
                             struct cfg80211_chan_def *chandef,
                             u32 flags);
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
                                          const u8 *src, const u8 *dst,
                                          u32 ratemask,
                                          struct ieee80211_channel *chan,
                                          const u8 *ssid, size_t ssid_len,
                                          const u8 *ie, size_t ie_len,
                                          u32 flags);
u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
                            struct ieee802_11_elems *elems,
                            enum nl80211_band band, u32 *basic_rates);
int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
                                 struct ieee80211_link_data *link,
                                 enum ieee80211_smps_mode smps_mode);
void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata,
                           struct ieee80211_link_data *link);
void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
                                  int link_id);

size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset);
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
                              u16 cap);
u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
                               const struct cfg80211_chan_def *chandef,
                               u16 prot_mode, bool rifs_mode);
void ieee80211_ie_build_wide_bw_cs(u8 *pos,
                                   const struct cfg80211_chan_def *chandef);
u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
                               u32 cap);
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
                                const struct cfg80211_chan_def *chandef);
u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata);
u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
                                const struct ieee80211_sta_eht_cap *eht_cap);
int ieee80211_parse_bitrates(enum nl80211_chan_width width,
                             const struct ieee80211_supported_band *sband,
                             const u8 *srates, int srates_len, u32 *rates);
u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata,
                                struct ieee80211_sta_s1g_cap *caps,
                                struct sk_buff *skb);
void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata,
                                  struct sk_buff *skb);

/* element building in SKBs */
int ieee80211_put_srates_elem(struct sk_buff *skb,
                              const struct ieee80211_supported_band *sband,
                              u32 basic_rates, u32 rate_flags, u32 masked_rates,
                              u8 element_id);
int ieee80211_put_he_cap(struct sk_buff *skb,
                         struct ieee80211_sub_if_data *sdata,
                         const struct ieee80211_supported_band *sband,
                         const struct ieee80211_conn_settings *conn);
int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
                              struct ieee80211_sub_if_data *sdata,
                              enum ieee80211_smps_mode smps_mode);
int ieee80211_put_eht_cap(struct sk_buff *skb,
                          struct ieee80211_sub_if_data *sdata,
                          const struct ieee80211_supported_band *sband,
                          const struct ieee80211_conn_settings *conn);

/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
                               struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
                                const struct ieee80211_vht_operation *oper,
                                const struct ieee80211_ht_operation *htop,
                                struct cfg80211_chan_def *chandef);
void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info,
                                struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local,
                                    const struct ieee80211_he_operation *he_oper,
                                    const struct ieee80211_eht_operation *eht_oper,
                                    struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
                                struct cfg80211_chan_def *chandef);
void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef,
                                 struct ieee80211_conn_settings *conn);
static inline void
ieee80211_chanreq_downgrade(struct ieee80211_chan_req *chanreq,
                            struct ieee80211_conn_settings *conn)
{
        ieee80211_chandef_downgrade(&chanreq->oper, conn);
        if (WARN_ON(!conn))
                return;
        if (conn->mode < IEEE80211_CONN_MODE_EHT)
                chanreq->ap.chan = NULL;
}

bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a,
                                 const struct ieee80211_chan_req *b);

int __must_check
ieee80211_link_use_channel(struct ieee80211_link_data *link,
                           const struct ieee80211_chan_req *req,
                           enum ieee80211_chanctx_mode mode);
int __must_check
ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
                               const struct ieee80211_chan_req *req,
                               enum ieee80211_chanctx_mode mode,
                               bool radar_required);
int __must_check
ieee80211_link_use_reserved_context(struct ieee80211_link_data *link);
int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);

int __must_check
ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
                              const struct ieee80211_chan_req *req,
                              u64 *changed);
void ieee80211_link_release_channel(struct ieee80211_link_data *link);
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
                                          bool clear);
int ieee80211_chanctx_refcount(struct ieee80211_local *local,
                               struct ieee80211_chanctx *ctx);

void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
                                   struct ieee80211_chanctx *chanctx);
void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
                                      struct ieee80211_chanctx *ctx,
                                      struct ieee80211_link_data *rsvd_for);
bool ieee80211_is_radar_required(struct ieee80211_local *local);

void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
                                       struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
                              struct cfg80211_csa_settings *csa_settings);

void ieee80211_recalc_dtim(struct ieee80211_local *local,
                           struct ieee80211_sub_if_data *sdata);
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
                                 const struct cfg80211_chan_def *chandef,
                                 enum ieee80211_chanctx_mode chanmode,
                                 u8 radar_detect);
int ieee80211_max_num_channels(struct ieee80211_local *local);
void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
                                       struct ieee80211_chanctx *ctx);

/* TDLS */
int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
                        const u8 *peer, int link_id,
                        u8 action_code, u8 dialog_token, u16 status_code,
                        u32 peer_capability, bool initiator,
                        const u8 *extra_ies, size_t extra_ies_len);
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
                        const u8 *peer, enum nl80211_tdls_operation oper);
void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk);
int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
                                  const u8 *addr, u8 oper_class,
                                  struct cfg80211_chan_def *chandef);
void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
                                          struct net_device *dev,
                                          const u8 *addr);
void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link);
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
                                      const u8 *peer, u16 reason);
void
ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
                                      struct sk_buff *skb);


const char *ieee80211_get_reason_code_string(u16 reason_code);
u16 ieee80211_encode_usf(int val);
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
                        enum nl80211_iftype type);

extern const struct ethtool_ops ieee80211_ethtool_ops;

u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
                                       struct ieee80211_vif *vif,
                                       struct ieee80211_sta *pubsta,
                                       int len, bool ampdu);
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
#else
#define debug_noinline
#endif

void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);

u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata);

void
ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
                                    struct ieee80211_supported_band *sband,
                                    const u8 *he_cap_ie, u8 he_cap_len,
                                    const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
                                    u8 eht_cap_len,
                                    struct link_sta_info *link_sta);
void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata,
                                    struct ieee80211_mgmt *mgmt, size_t len);
void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
                                    struct ieee80211_mgmt *mgmt, size_t len);
int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata,
                           struct cfg80211_ttlm_params *params);

void ieee80211_check_wbrf_support(struct ieee80211_local *local);
void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);

#if IS_ENABLED(CONFIG_MAC80211_KUNIT_TEST)
#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
#define VISIBLE_IF_MAC80211_KUNIT
ieee80211_rx_result
ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx);
#else
#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym)
#define VISIBLE_IF_MAC80211_KUNIT static
#endif

#endif /* IEEE80211_I_H */





































































    7 












































































































    6 





    6 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// SPDX-License-Identifier: GPL-2.0-only
/*
 * LED support for the input layer
 *
 * Copyright 2010-2015 Samuel Thibault <samuel.thibault@ens-lyon.org>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/leds.h>
#include <linux/input.h>

#if IS_ENABLED(CONFIG_VT)
#define VT_TRIGGER(_name)        .trigger = _name
#else
#define VT_TRIGGER(_name)        .trigger = NULL
#endif

#if IS_ENABLED(CONFIG_SND_CTL_LED)
#define AUDIO_TRIGGER(_name)        .trigger = _name
#else
#define AUDIO_TRIGGER(_name)        .trigger = NULL
#endif

static const struct {
        const char *name;
        const char *trigger;
} input_led_info[LED_CNT] = {
        [LED_NUML]        = { "numlock", VT_TRIGGER("kbd-numlock") },
        [LED_CAPSL]        = { "capslock", VT_TRIGGER("kbd-capslock") },
        [LED_SCROLLL]        = { "scrolllock", VT_TRIGGER("kbd-scrolllock") },
        [LED_COMPOSE]        = { "compose" },
        [LED_KANA]        = { "kana", VT_TRIGGER("kbd-kanalock") },
        [LED_SLEEP]        = { "sleep" } ,
        [LED_SUSPEND]        = { "suspend" },
        [LED_MUTE]        = { "mute", AUDIO_TRIGGER("audio-mute") },
        [LED_MISC]        = { "misc" },
        [LED_MAIL]        = { "mail" },
        [LED_CHARGING]        = { "charging" },
};

struct input_led {
        struct led_classdev cdev;
        struct input_handle *handle;
        unsigned int code; /* One of LED_* constants */
};

struct input_leds {
        struct input_handle handle;
        unsigned int num_leds;
        struct input_led leds[] __counted_by(num_leds);
};

static enum led_brightness input_leds_brightness_get(struct led_classdev *cdev)
{
        struct input_led *led = container_of(cdev, struct input_led, cdev);
        struct input_dev *input = led->handle->dev;

        return test_bit(led->code, input->led) ? cdev->max_brightness : 0;
}

static void input_leds_brightness_set(struct led_classdev *cdev,
                                      enum led_brightness brightness)
{
        struct input_led *led = container_of(cdev, struct input_led, cdev);

        input_inject_event(led->handle, EV_LED, led->code, !!brightness);
}

static void input_leds_event(struct input_handle *handle, unsigned int type,
                             unsigned int code, int value)
{
}

static int input_leds_get_count(struct input_dev *dev)
{
        unsigned int led_code;
        int count = 0;

        for_each_set_bit(led_code, dev->ledbit, LED_CNT)
                if (input_led_info[led_code].name)
                        count++;

        return count;
}

static int input_leds_connect(struct input_handler *handler,
                              struct input_dev *dev,
                              const struct input_device_id *id)
{
        struct input_leds *leds;
        struct input_led *led;
        unsigned int num_leds;
        unsigned int led_code;
        int led_no;
        int error;

        num_leds = input_leds_get_count(dev);
        if (!num_leds)
                return -ENXIO;

        leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL);
        if (!leds)
                return -ENOMEM;

        leds->num_leds = num_leds;

        leds->handle.dev = dev;
        leds->handle.handler = handler;
        leds->handle.name = "leds";
        leds->handle.private = leds;

        error = input_register_handle(&leds->handle);
        if (error)
                goto err_free_mem;

        error = input_open_device(&leds->handle);
        if (error)
                goto err_unregister_handle;

        led_no = 0;
        for_each_set_bit(led_code, dev->ledbit, LED_CNT) {
                if (!input_led_info[led_code].name)
                        continue;

                led = &leds->leds[led_no];
                led->handle = &leds->handle;
                led->code = led_code;

                led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s",
                                           dev_name(&dev->dev),
                                           input_led_info[led_code].name);
                if (!led->cdev.name) {
                        error = -ENOMEM;
                        goto err_unregister_leds;
                }

                led->cdev.max_brightness = 1;
                led->cdev.brightness_get = input_leds_brightness_get;
                led->cdev.brightness_set = input_leds_brightness_set;
                led->cdev.default_trigger = input_led_info[led_code].trigger;

                error = led_classdev_register(&dev->dev, &led->cdev);
                if (error) {
                        dev_err(&dev->dev, "failed to register LED %s: %d\n",
                                led->cdev.name, error);
                        kfree(led->cdev.name);
                        goto err_unregister_leds;
                }

                led_no++;
        }

        return 0;

err_unregister_leds:
        while (--led_no >= 0) {
                struct input_led *led = &leds->leds[led_no];

                led_classdev_unregister(&led->cdev);
                kfree(led->cdev.name);
        }

        input_close_device(&leds->handle);

err_unregister_handle:
        input_unregister_handle(&leds->handle);

err_free_mem:
        kfree(leds);
        return error;
}

static void input_leds_disconnect(struct input_handle *handle)
{
        struct input_leds *leds = handle->private;
        int i;

        for (i = 0; i < leds->num_leds; i++) {
                struct input_led *led = &leds->leds[i];

                led_classdev_unregister(&led->cdev);
                kfree(led->cdev.name);
        }

        input_close_device(handle);
        input_unregister_handle(handle);

        kfree(leds);
}

static const struct input_device_id input_leds_ids[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
                .evbit = { BIT_MASK(EV_LED) },
        },
        { },
};
MODULE_DEVICE_TABLE(input, input_leds_ids);

static struct input_handler input_leds_handler = {
        .event =        input_leds_event,
        .connect =        input_leds_connect,
        .disconnect =        input_leds_disconnect,
        .name =                "leds",
        .id_table =        input_leds_ids,
};

static int __init input_leds_init(void)
{
        return input_register_handler(&input_leds_handler);
}
module_init(input_leds_init);

static void __exit input_leds_exit(void)
{
        input_unregister_handler(&input_leds_handler);
}
module_exit(input_leds_exit);

MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
MODULE_AUTHOR("Dmitry Torokhov <dmitry.torokhov@gmail.com>");
MODULE_DESCRIPTION("Input -> LEDs Bridge");
MODULE_LICENSE("GPL v2");

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __SOUND_PCM_H
#define __SOUND_PCM_H

/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 *                   Abramo Bagnara <abramo@alsa-project.org>
 */

#include <sound/asound.h>
#include <sound/memalloc.h>
#include <sound/minors.h>
#include <linux/poll.h>
#include <linux/mm.h>
#include <linux/bitops.h>
#include <linux/pm_qos.h>
#include <linux/refcount.h>
#include <linux/uio.h>

#define snd_pcm_substream_chip(substream) ((substream)->private_data)
#define snd_pcm_chip(pcm) ((pcm)->private_data)

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
#include <sound/pcm_oss.h>
#endif

/*
 *  Hardware (lowlevel) section
 */

struct snd_pcm_hardware {
        unsigned int info;                /* SNDRV_PCM_INFO_* */
        u64 formats;                        /* SNDRV_PCM_FMTBIT_* */
        u32 subformats;                        /* for S32_LE, SNDRV_PCM_SUBFMTBIT_* */
        unsigned int rates;                /* SNDRV_PCM_RATE_* */
        unsigned int rate_min;                /* min rate */
        unsigned int rate_max;                /* max rate */
        unsigned int channels_min;        /* min channels */
        unsigned int channels_max;        /* max channels */
        size_t buffer_bytes_max;        /* max buffer size */
        size_t period_bytes_min;        /* min period size */
        size_t period_bytes_max;        /* max period size */
        unsigned int periods_min;        /* min # of periods */
        unsigned int periods_max;        /* max # of periods */
        size_t fifo_size;                /* fifo size in bytes */
};

struct snd_pcm_status64;
struct snd_pcm_substream;

struct snd_pcm_audio_tstamp_config; /* definitions further down */
struct snd_pcm_audio_tstamp_report;

struct snd_pcm_ops {
        int (*open)(struct snd_pcm_substream *substream);
        int (*close)(struct snd_pcm_substream *substream);
        int (*ioctl)(struct snd_pcm_substream * substream,
                     unsigned int cmd, void *arg);
        int (*hw_params)(struct snd_pcm_substream *substream,
                         struct snd_pcm_hw_params *params);
        int (*hw_free)(struct snd_pcm_substream *substream);
        int (*prepare)(struct snd_pcm_substream *substream);
        int (*trigger)(struct snd_pcm_substream *substream, int cmd);
        int (*sync_stop)(struct snd_pcm_substream *substream);
        snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *substream);
        int (*get_time_info)(struct snd_pcm_substream *substream,
                        struct timespec64 *system_ts, struct timespec64 *audio_ts,
                        struct snd_pcm_audio_tstamp_config *audio_tstamp_config,
                        struct snd_pcm_audio_tstamp_report *audio_tstamp_report);
        int (*fill_silence)(struct snd_pcm_substream *substream, int channel,
                            unsigned long pos, unsigned long bytes);
        int (*copy)(struct snd_pcm_substream *substream, int channel,
                    unsigned long pos, struct iov_iter *iter, unsigned long bytes);
        struct page *(*page)(struct snd_pcm_substream *substream,
                             unsigned long offset);
        int (*mmap)(struct snd_pcm_substream *substream, struct vm_area_struct *vma);
        int (*ack)(struct snd_pcm_substream *substream);
};

/*
 *
 */

#if defined(CONFIG_SND_DYNAMIC_MINORS)
#define SNDRV_PCM_DEVICES        (SNDRV_OS_MINORS-2)
#else
#define SNDRV_PCM_DEVICES        8
#endif

#define SNDRV_PCM_IOCTL1_RESET                0
/* 1 is absent slot. */
#define SNDRV_PCM_IOCTL1_CHANNEL_INFO        2
/* 3 is absent slot. */
#define SNDRV_PCM_IOCTL1_FIFO_SIZE        4

#define SNDRV_PCM_TRIGGER_STOP                0
#define SNDRV_PCM_TRIGGER_START                1
#define SNDRV_PCM_TRIGGER_PAUSE_PUSH        3
#define SNDRV_PCM_TRIGGER_PAUSE_RELEASE        4
#define SNDRV_PCM_TRIGGER_SUSPEND        5
#define SNDRV_PCM_TRIGGER_RESUME        6
#define SNDRV_PCM_TRIGGER_DRAIN                7

#define SNDRV_PCM_POS_XRUN                ((snd_pcm_uframes_t)-1)

/* If you change this don't forget to change rates[] table in pcm_native.c */
#define SNDRV_PCM_RATE_5512                (1U<<0)                /* 5512Hz */
#define SNDRV_PCM_RATE_8000                (1U<<1)                /* 8000Hz */
#define SNDRV_PCM_RATE_11025                (1U<<2)                /* 11025Hz */
#define SNDRV_PCM_RATE_16000                (1U<<3)                /* 16000Hz */
#define SNDRV_PCM_RATE_22050                (1U<<4)                /* 22050Hz */
#define SNDRV_PCM_RATE_32000                (1U<<5)                /* 32000Hz */
#define SNDRV_PCM_RATE_44100                (1U<<6)                /* 44100Hz */
#define SNDRV_PCM_RATE_48000                (1U<<7)                /* 48000Hz */
#define SNDRV_PCM_RATE_64000                (1U<<8)                /* 64000Hz */
#define SNDRV_PCM_RATE_88200                (1U<<9)                /* 88200Hz */
#define SNDRV_PCM_RATE_96000                (1U<<10)        /* 96000Hz */
#define SNDRV_PCM_RATE_176400                (1U<<11)        /* 176400Hz */
#define SNDRV_PCM_RATE_192000                (1U<<12)        /* 192000Hz */
#define SNDRV_PCM_RATE_352800                (1U<<13)        /* 352800Hz */
#define SNDRV_PCM_RATE_384000                (1U<<14)        /* 384000Hz */

#define SNDRV_PCM_RATE_CONTINUOUS        (1U<<30)        /* continuous range */
#define SNDRV_PCM_RATE_KNOT                (1U<<31)        /* supports more non-continuos rates */

#define SNDRV_PCM_RATE_8000_44100        (SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_11025|\
                                         SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_22050|\
                                         SNDRV_PCM_RATE_32000|SNDRV_PCM_RATE_44100)
#define SNDRV_PCM_RATE_8000_48000        (SNDRV_PCM_RATE_8000_44100|SNDRV_PCM_RATE_48000)
#define SNDRV_PCM_RATE_8000_96000        (SNDRV_PCM_RATE_8000_48000|SNDRV_PCM_RATE_64000|\
                                         SNDRV_PCM_RATE_88200|SNDRV_PCM_RATE_96000)
#define SNDRV_PCM_RATE_8000_192000        (SNDRV_PCM_RATE_8000_96000|SNDRV_PCM_RATE_176400|\
                                         SNDRV_PCM_RATE_192000)
#define SNDRV_PCM_RATE_8000_384000        (SNDRV_PCM_RATE_8000_192000|\
                                         SNDRV_PCM_RATE_352800|\
                                         SNDRV_PCM_RATE_384000)
#define _SNDRV_PCM_FMTBIT(fmt)                (1ULL << (__force int)SNDRV_PCM_FORMAT_##fmt)
#define SNDRV_PCM_FMTBIT_S8                _SNDRV_PCM_FMTBIT(S8)
#define SNDRV_PCM_FMTBIT_U8                _SNDRV_PCM_FMTBIT(U8)
#define SNDRV_PCM_FMTBIT_S16_LE                _SNDRV_PCM_FMTBIT(S16_LE)
#define SNDRV_PCM_FMTBIT_S16_BE                _SNDRV_PCM_FMTBIT(S16_BE)
#define SNDRV_PCM_FMTBIT_U16_LE                _SNDRV_PCM_FMTBIT(U16_LE)
#define SNDRV_PCM_FMTBIT_U16_BE                _SNDRV_PCM_FMTBIT(U16_BE)
#define SNDRV_PCM_FMTBIT_S24_LE                _SNDRV_PCM_FMTBIT(S24_LE)
#define SNDRV_PCM_FMTBIT_S24_BE                _SNDRV_PCM_FMTBIT(S24_BE)
#define SNDRV_PCM_FMTBIT_U24_LE                _SNDRV_PCM_FMTBIT(U24_LE)
#define SNDRV_PCM_FMTBIT_U24_BE                _SNDRV_PCM_FMTBIT(U24_BE)
// For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
// available bit count in most significant bit. It's for the case of so-called 'left-justified' or
// `right-padding` sample which has less width than 32 bit.
#define SNDRV_PCM_FMTBIT_S32_LE                _SNDRV_PCM_FMTBIT(S32_LE)
#define SNDRV_PCM_FMTBIT_S32_BE                _SNDRV_PCM_FMTBIT(S32_BE)
#define SNDRV_PCM_FMTBIT_U32_LE                _SNDRV_PCM_FMTBIT(U32_LE)
#define SNDRV_PCM_FMTBIT_U32_BE                _SNDRV_PCM_FMTBIT(U32_BE)
#define SNDRV_PCM_FMTBIT_FLOAT_LE        _SNDRV_PCM_FMTBIT(FLOAT_LE)
#define SNDRV_PCM_FMTBIT_FLOAT_BE        _SNDRV_PCM_FMTBIT(FLOAT_BE)
#define SNDRV_PCM_FMTBIT_FLOAT64_LE        _SNDRV_PCM_FMTBIT(FLOAT64_LE)
#define SNDRV_PCM_FMTBIT_FLOAT64_BE        _SNDRV_PCM_FMTBIT(FLOAT64_BE)
#define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE _SNDRV_PCM_FMTBIT(IEC958_SUBFRAME_LE)
#define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE _SNDRV_PCM_FMTBIT(IEC958_SUBFRAME_BE)
#define SNDRV_PCM_FMTBIT_MU_LAW                _SNDRV_PCM_FMTBIT(MU_LAW)
#define SNDRV_PCM_FMTBIT_A_LAW                _SNDRV_PCM_FMTBIT(A_LAW)
#define SNDRV_PCM_FMTBIT_IMA_ADPCM        _SNDRV_PCM_FMTBIT(IMA_ADPCM)
#define SNDRV_PCM_FMTBIT_MPEG                _SNDRV_PCM_FMTBIT(MPEG)
#define SNDRV_PCM_FMTBIT_GSM                _SNDRV_PCM_FMTBIT(GSM)
#define SNDRV_PCM_FMTBIT_S20_LE        _SNDRV_PCM_FMTBIT(S20_LE)
#define SNDRV_PCM_FMTBIT_U20_LE        _SNDRV_PCM_FMTBIT(U20_LE)
#define SNDRV_PCM_FMTBIT_S20_BE        _SNDRV_PCM_FMTBIT(S20_BE)
#define SNDRV_PCM_FMTBIT_U20_BE        _SNDRV_PCM_FMTBIT(U20_BE)
#define SNDRV_PCM_FMTBIT_SPECIAL        _SNDRV_PCM_FMTBIT(SPECIAL)
#define SNDRV_PCM_FMTBIT_S24_3LE        _SNDRV_PCM_FMTBIT(S24_3LE)
#define SNDRV_PCM_FMTBIT_U24_3LE        _SNDRV_PCM_FMTBIT(U24_3LE)
#define SNDRV_PCM_FMTBIT_S24_3BE        _SNDRV_PCM_FMTBIT(S24_3BE)
#define SNDRV_PCM_FMTBIT_U24_3BE        _SNDRV_PCM_FMTBIT(U24_3BE)
#define SNDRV_PCM_FMTBIT_S20_3LE        _SNDRV_PCM_FMTBIT(S20_3LE)
#define SNDRV_PCM_FMTBIT_U20_3LE        _SNDRV_PCM_FMTBIT(U20_3LE)
#define SNDRV_PCM_FMTBIT_S20_3BE        _SNDRV_PCM_FMTBIT(S20_3BE)
#define SNDRV_PCM_FMTBIT_U20_3BE        _SNDRV_PCM_FMTBIT(U20_3BE)
#define SNDRV_PCM_FMTBIT_S18_3LE        _SNDRV_PCM_FMTBIT(S18_3LE)
#define SNDRV_PCM_FMTBIT_U18_3LE        _SNDRV_PCM_FMTBIT(U18_3LE)
#define SNDRV_PCM_FMTBIT_S18_3BE        _SNDRV_PCM_FMTBIT(S18_3BE)
#define SNDRV_PCM_FMTBIT_U18_3BE        _SNDRV_PCM_FMTBIT(U18_3BE)
#define SNDRV_PCM_FMTBIT_G723_24        _SNDRV_PCM_FMTBIT(G723_24)
#define SNDRV_PCM_FMTBIT_G723_24_1B        _SNDRV_PCM_FMTBIT(G723_24_1B)
#define SNDRV_PCM_FMTBIT_G723_40        _SNDRV_PCM_FMTBIT(G723_40)
#define SNDRV_PCM_FMTBIT_G723_40_1B        _SNDRV_PCM_FMTBIT(G723_40_1B)
#define SNDRV_PCM_FMTBIT_DSD_U8                _SNDRV_PCM_FMTBIT(DSD_U8)
#define SNDRV_PCM_FMTBIT_DSD_U16_LE        _SNDRV_PCM_FMTBIT(DSD_U16_LE)
#define SNDRV_PCM_FMTBIT_DSD_U32_LE        _SNDRV_PCM_FMTBIT(DSD_U32_LE)
#define SNDRV_PCM_FMTBIT_DSD_U16_BE        _SNDRV_PCM_FMTBIT(DSD_U16_BE)
#define SNDRV_PCM_FMTBIT_DSD_U32_BE        _SNDRV_PCM_FMTBIT(DSD_U32_BE)

#ifdef SNDRV_LITTLE_ENDIAN
#define SNDRV_PCM_FMTBIT_S16                SNDRV_PCM_FMTBIT_S16_LE
#define SNDRV_PCM_FMTBIT_U16                SNDRV_PCM_FMTBIT_U16_LE
#define SNDRV_PCM_FMTBIT_S24                SNDRV_PCM_FMTBIT_S24_LE
#define SNDRV_PCM_FMTBIT_U24                SNDRV_PCM_FMTBIT_U24_LE
#define SNDRV_PCM_FMTBIT_S32                SNDRV_PCM_FMTBIT_S32_LE
#define SNDRV_PCM_FMTBIT_U32                SNDRV_PCM_FMTBIT_U32_LE
#define SNDRV_PCM_FMTBIT_FLOAT                SNDRV_PCM_FMTBIT_FLOAT_LE
#define SNDRV_PCM_FMTBIT_FLOAT64        SNDRV_PCM_FMTBIT_FLOAT64_LE
#define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE
#define SNDRV_PCM_FMTBIT_S20                SNDRV_PCM_FMTBIT_S20_LE
#define SNDRV_PCM_FMTBIT_U20                SNDRV_PCM_FMTBIT_U20_LE
#endif
#ifdef SNDRV_BIG_ENDIAN
#define SNDRV_PCM_FMTBIT_S16                SNDRV_PCM_FMTBIT_S16_BE
#define SNDRV_PCM_FMTBIT_U16                SNDRV_PCM_FMTBIT_U16_BE
#define SNDRV_PCM_FMTBIT_S24                SNDRV_PCM_FMTBIT_S24_BE
#define SNDRV_PCM_FMTBIT_U24                SNDRV_PCM_FMTBIT_U24_BE
#define SNDRV_PCM_FMTBIT_S32                SNDRV_PCM_FMTBIT_S32_BE
#define SNDRV_PCM_FMTBIT_U32                SNDRV_PCM_FMTBIT_U32_BE
#define SNDRV_PCM_FMTBIT_FLOAT                SNDRV_PCM_FMTBIT_FLOAT_BE
#define SNDRV_PCM_FMTBIT_FLOAT64        SNDRV_PCM_FMTBIT_FLOAT64_BE
#define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE
#define SNDRV_PCM_FMTBIT_S20                SNDRV_PCM_FMTBIT_S20_BE
#define SNDRV_PCM_FMTBIT_U20                SNDRV_PCM_FMTBIT_U20_BE
#endif

#define _SNDRV_PCM_SUBFMTBIT(fmt)        BIT((__force int)SNDRV_PCM_SUBFORMAT_##fmt)
#define SNDRV_PCM_SUBFMTBIT_STD                _SNDRV_PCM_SUBFMTBIT(STD)
#define SNDRV_PCM_SUBFMTBIT_MSBITS_MAX        _SNDRV_PCM_SUBFMTBIT(MSBITS_MAX)
#define SNDRV_PCM_SUBFMTBIT_MSBITS_20        _SNDRV_PCM_SUBFMTBIT(MSBITS_20)
#define SNDRV_PCM_SUBFMTBIT_MSBITS_24        _SNDRV_PCM_SUBFMTBIT(MSBITS_24)

struct snd_pcm_file {
        struct snd_pcm_substream *substream;
        int no_compat_mmap;
        unsigned int user_pversion;        /* supported protocol version */
};

struct snd_pcm_hw_rule;
typedef int (*snd_pcm_hw_rule_func_t)(struct snd_pcm_hw_params *params,
                                      struct snd_pcm_hw_rule *rule);

struct snd_pcm_hw_rule {
        unsigned int cond;
        int var;
        int deps[5];

        snd_pcm_hw_rule_func_t func;
        void *private;
};

struct snd_pcm_hw_constraints {
        struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - 
                         SNDRV_PCM_HW_PARAM_FIRST_MASK + 1];
        struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL -
                             SNDRV_PCM_HW_PARAM_FIRST_INTERVAL + 1];
        unsigned int rules_num;
        unsigned int rules_all;
        struct snd_pcm_hw_rule *rules;
};

static inline struct snd_mask *constrs_mask(struct snd_pcm_hw_constraints *constrs,
                                            snd_pcm_hw_param_t var)
{
        return &constrs->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK];
}

static inline struct snd_interval *constrs_interval(struct snd_pcm_hw_constraints *constrs,
                                                    snd_pcm_hw_param_t var)
{
        return &constrs->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
}

struct snd_ratnum {
        unsigned int num;
        unsigned int den_min, den_max, den_step;
};

struct snd_ratden {
        unsigned int num_min, num_max, num_step;
        unsigned int den;
};

struct snd_pcm_hw_constraint_ratnums {
        int nrats;
        const struct snd_ratnum *rats;
};

struct snd_pcm_hw_constraint_ratdens {
        int nrats;
        const struct snd_ratden *rats;
};

struct snd_pcm_hw_constraint_list {
        const unsigned int *list;
        unsigned int count;
        unsigned int mask;
};

struct snd_pcm_hw_constraint_ranges {
        unsigned int count;
        const struct snd_interval *ranges;
        unsigned int mask;
};

/*
 * userspace-provided audio timestamp config to kernel,
 * structure is for internal use only and filled with dedicated unpack routine
 */
struct snd_pcm_audio_tstamp_config {
        /* 5 of max 16 bits used */
        u32 type_requested:4;
        u32 report_delay:1; /* add total delay to A/D or D/A */
};

static inline void snd_pcm_unpack_audio_tstamp_config(__u32 data,
                                                struct snd_pcm_audio_tstamp_config *config)
{
        config->type_requested = data & 0xF;
        config->report_delay = (data >> 4) & 1;
}

/*
 * kernel-provided audio timestamp report to user-space
 * structure is for internal use only and read by dedicated pack routine
 */
struct snd_pcm_audio_tstamp_report {
        /* 6 of max 16 bits used for bit-fields */

        /* for backwards compatibility */
        u32 valid:1;

        /* actual type if hardware could not support requested timestamp */
        u32 actual_type:4;

        /* accuracy represented in ns units */
        u32 accuracy_report:1; /* 0 if accuracy unknown, 1 if accuracy field is valid */
        u32 accuracy; /* up to 4.29s, will be packed in separate field  */
};

static inline void snd_pcm_pack_audio_tstamp_report(__u32 *data, __u32 *accuracy,
                                                const struct snd_pcm_audio_tstamp_report *report)
{
        u32 tmp;

        tmp = report->accuracy_report;
        tmp <<= 4;
        tmp |= report->actual_type;
        tmp <<= 1;
        tmp |= report->valid;

        *data &= 0xffff; /* zero-clear MSBs */
        *data |= (tmp << 16);
        *accuracy = report->accuracy;
}


struct snd_pcm_runtime {
        /* -- Status -- */
        snd_pcm_state_t state;                /* stream state */
        snd_pcm_state_t suspended_state; /* suspended stream state */
        struct snd_pcm_substream *trigger_master;
        struct timespec64 trigger_tstamp;        /* trigger timestamp */
        bool trigger_tstamp_latched;     /* trigger timestamp latched in low-level driver/hardware */
        int overrange;
        snd_pcm_uframes_t avail_max;
        snd_pcm_uframes_t hw_ptr_base;        /* Position at buffer restart */
        snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
        unsigned long hw_ptr_jiffies;        /* Time when hw_ptr is updated */
        unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */
        snd_pcm_sframes_t delay;        /* extra delay; typically FIFO size */
        u64 hw_ptr_wrap;                /* offset for hw_ptr due to boundary wrap-around */

        /* -- HW params -- */
        snd_pcm_access_t access;        /* access mode */
        snd_pcm_format_t format;        /* SNDRV_PCM_FORMAT_* */
        snd_pcm_subformat_t subformat;        /* subformat */
        unsigned int rate;                /* rate in Hz */
        unsigned int channels;                /* channels */
        snd_pcm_uframes_t period_size;        /* period size */
        unsigned int periods;                /* periods */
        snd_pcm_uframes_t buffer_size;        /* buffer size */
        snd_pcm_uframes_t min_align;        /* Min alignment for the format */
        size_t byte_align;
        unsigned int frame_bits;
        unsigned int sample_bits;
        unsigned int info;
        unsigned int rate_num;
        unsigned int rate_den;
        unsigned int no_period_wakeup: 1;

        /* -- SW params; see struct snd_pcm_sw_params for comments -- */
        int tstamp_mode;
          unsigned int period_step;
        snd_pcm_uframes_t start_threshold;
        snd_pcm_uframes_t stop_threshold;
        snd_pcm_uframes_t silence_threshold;
        snd_pcm_uframes_t silence_size;
        snd_pcm_uframes_t boundary;

        /* internal data of auto-silencer */
        snd_pcm_uframes_t silence_start; /* starting pointer to silence area */
        snd_pcm_uframes_t silence_filled; /* already filled part of silence area */

        union snd_pcm_sync_id sync;        /* hardware synchronization ID */

        /* -- mmap -- */
        struct snd_pcm_mmap_status *status;
        struct snd_pcm_mmap_control *control;

        /* -- locking / scheduling -- */
        snd_pcm_uframes_t twake;         /* do transfer (!poll) wakeup if non-zero */
        wait_queue_head_t sleep;        /* poll sleep */
        wait_queue_head_t tsleep;        /* transfer sleep */
        struct snd_fasync *fasync;
        bool stop_operating;                /* sync_stop will be called */
        struct mutex buffer_mutex;        /* protect for buffer changes */
        atomic_t buffer_accessing;        /* >0: in r/w operation, <0: blocked */

        /* -- private section -- */
        void *private_data;
        void (*private_free)(struct snd_pcm_runtime *runtime);

        /* -- hardware description -- */
        struct snd_pcm_hardware hw;
        struct snd_pcm_hw_constraints hw_constraints;

        /* -- timer -- */
        unsigned int timer_resolution;        /* timer resolution */
        int tstamp_type;                /* timestamp type */

        /* -- DMA -- */           
        unsigned char *dma_area;        /* DMA area */
        dma_addr_t dma_addr;                /* physical bus address (not accessible from main CPU) */
        size_t dma_bytes;                /* size of DMA area */

        struct snd_dma_buffer *dma_buffer_p;        /* allocated buffer */
        unsigned int buffer_changed:1;        /* buffer allocation changed; set only in managed mode */

        /* -- audio timestamp config -- */
        struct snd_pcm_audio_tstamp_config audio_tstamp_config;
        struct snd_pcm_audio_tstamp_report audio_tstamp_report;
        struct timespec64 driver_tstamp;

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        /* -- OSS things -- */
        struct snd_pcm_oss_runtime oss;
#endif
};

struct snd_pcm_group {                /* keep linked substreams */
        spinlock_t lock;
        struct mutex mutex;
        struct list_head substreams;
        refcount_t refs;
};

struct pid;

struct snd_pcm_substream {
        struct snd_pcm *pcm;
        struct snd_pcm_str *pstr;
        void *private_data;                /* copied from pcm->private_data */
        int number;
        char name[32];                        /* substream name */
        int stream;                        /* stream (direction) */
        struct pm_qos_request latency_pm_qos_req; /* pm_qos request */
        size_t buffer_bytes_max;        /* limit ring buffer size */
        struct snd_dma_buffer dma_buffer;
        size_t dma_max;
        /* -- hardware operations -- */
        const struct snd_pcm_ops *ops;
        /* -- runtime information -- */
        struct snd_pcm_runtime *runtime;
        /* -- timer section -- */
        struct snd_timer *timer;                /* timer */
        unsigned timer_running: 1;        /* time is running */
        long wait_time;        /* time in ms for R/W to wait for avail */
        /* -- next substream -- */
        struct snd_pcm_substream *next;
        /* -- linked substreams -- */
        struct list_head link_list;        /* linked list member */
        struct snd_pcm_group self_group;        /* fake group for non linked substream (with substream lock inside) */
        struct snd_pcm_group *group;                /* pointer to current group */
        /* -- assigned files -- */
        int ref_count;
        atomic_t mmap_count;
        unsigned int f_flags;
        void (*pcm_release)(struct snd_pcm_substream *);
        struct pid *pid;
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        /* -- OSS things -- */
        struct snd_pcm_oss_substream oss;
#endif
#ifdef CONFIG_SND_VERBOSE_PROCFS
        struct snd_info_entry *proc_root;
#endif /* CONFIG_SND_VERBOSE_PROCFS */
        /* misc flags */
        unsigned int hw_opened: 1;
        unsigned int managed_buffer_alloc:1;
};

#define SUBSTREAM_BUSY(substream) ((substream)->ref_count > 0)


struct snd_pcm_str {
        int stream;                                /* stream (direction) */
        struct snd_pcm *pcm;
        /* -- substreams -- */
        unsigned int substream_count;
        unsigned int substream_opened;
        struct snd_pcm_substream *substream;
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        /* -- OSS things -- */
        struct snd_pcm_oss_stream oss;
#endif
#ifdef CONFIG_SND_VERBOSE_PROCFS
        struct snd_info_entry *proc_root;
#ifdef CONFIG_SND_PCM_XRUN_DEBUG
        unsigned int xrun_debug;        /* 0 = disabled, 1 = verbose, 2 = stacktrace */
#endif
#endif
        struct snd_kcontrol *chmap_kctl; /* channel-mapping controls */
        struct device *dev;
};

struct snd_pcm {
        struct snd_card *card;
        struct list_head list;
        int device; /* device number */
        unsigned int info_flags;
        unsigned short dev_class;
        unsigned short dev_subclass;
        char id[64];
        char name[80];
        struct snd_pcm_str streams[2];
        struct mutex open_mutex;
        wait_queue_head_t open_wait;
        void *private_data;
        void (*private_free) (struct snd_pcm *pcm);
        bool internal; /* pcm is for internal use only */
        bool nonatomic; /* whole PCM operations are in non-atomic context */
        bool no_device_suspend; /* don't invoke device PM suspend */
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        struct snd_pcm_oss oss;
#endif
};

/*
 *  Registering
 */

extern const struct file_operations snd_pcm_f_ops[2];

int snd_pcm_new(struct snd_card *card, const char *id, int device,
                int playback_count, int capture_count,
                struct snd_pcm **rpcm);
int snd_pcm_new_internal(struct snd_card *card, const char *id, int device,
                int playback_count, int capture_count,
                struct snd_pcm **rpcm);
int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count);

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
struct snd_pcm_notify {
        int (*n_register) (struct snd_pcm * pcm);
        int (*n_disconnect) (struct snd_pcm * pcm);
        int (*n_unregister) (struct snd_pcm * pcm);
        struct list_head list;
};
int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree);
#endif

/*
 *  Native I/O
 */

int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info);
int snd_pcm_info_user(struct snd_pcm_substream *substream,
                      struct snd_pcm_info __user *info);
int snd_pcm_status64(struct snd_pcm_substream *substream,
                     struct snd_pcm_status64 *status);
int snd_pcm_start(struct snd_pcm_substream *substream);
int snd_pcm_stop(struct snd_pcm_substream *substream, snd_pcm_state_t status);
int snd_pcm_drain_done(struct snd_pcm_substream *substream);
int snd_pcm_stop_xrun(struct snd_pcm_substream *substream);
#ifdef CONFIG_PM
int snd_pcm_suspend_all(struct snd_pcm *pcm);
#else
static inline int snd_pcm_suspend_all(struct snd_pcm *pcm)
{
        return 0;
}
#endif
int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg);
int snd_pcm_open_substream(struct snd_pcm *pcm, int stream, struct file *file,
                           struct snd_pcm_substream **rsubstream);
void snd_pcm_release_substream(struct snd_pcm_substream *substream);
int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, struct file *file,
                             struct snd_pcm_substream **rsubstream);
void snd_pcm_detach_substream(struct snd_pcm_substream *substream);
int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);


#ifdef CONFIG_SND_DEBUG
void snd_pcm_debug_name(struct snd_pcm_substream *substream,
                           char *name, size_t len);
#else
static inline void
snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
{
        *buf = 0;
}
#endif

/*
 *  PCM library
 */

/**
 * snd_pcm_stream_linked - Check whether the substream is linked with others
 * @substream: substream to check
 *
 * Return: true if the given substream is being linked with others
 */
static inline int snd_pcm_stream_linked(struct snd_pcm_substream *substream)
{
        return substream->group != &substream->self_group;
}

void snd_pcm_stream_lock(struct snd_pcm_substream *substream);
void snd_pcm_stream_unlock(struct snd_pcm_substream *substream);
void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream);
void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream);
unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream);
unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream);

/**
 * snd_pcm_stream_lock_irqsave - Lock the PCM stream
 * @substream: PCM substream
 * @flags: irq flags
 *
 * This locks the PCM stream like snd_pcm_stream_lock() but with the local
 * IRQ (only when nonatomic is false).  In nonatomic case, this is identical
 * as snd_pcm_stream_lock().
 */
#define snd_pcm_stream_lock_irqsave(substream, flags)                 \
        do {                                                         \
                typecheck(unsigned long, flags);                 \
                flags = _snd_pcm_stream_lock_irqsave(substream); \
        } while (0)
void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
                                      unsigned long flags);

/**
 * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking
 * @substream: PCM substream
 * @flags: irq flags
 *
 * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with
 * the single-depth lockdep subclass.
 */
#define snd_pcm_stream_lock_irqsave_nested(substream, flags)                \
        do {                                                                \
                typecheck(unsigned long, flags);                        \
                flags = _snd_pcm_stream_lock_irqsave_nested(substream); \
        } while (0)

/* definitions for guard(); use like guard(pcm_stream_lock) */
DEFINE_LOCK_GUARD_1(pcm_stream_lock, struct snd_pcm_substream,
                    snd_pcm_stream_lock(_T->lock),
                    snd_pcm_stream_unlock(_T->lock))
DEFINE_LOCK_GUARD_1(pcm_stream_lock_irq, struct snd_pcm_substream,
                    snd_pcm_stream_lock_irq(_T->lock),
                    snd_pcm_stream_unlock_irq(_T->lock))
DEFINE_LOCK_GUARD_1(pcm_stream_lock_irqsave, struct snd_pcm_substream,
                    snd_pcm_stream_lock_irqsave(_T->lock, _T->flags),
                    snd_pcm_stream_unlock_irqrestore(_T->lock, _T->flags),
                    unsigned long flags)

/**
 * snd_pcm_group_for_each_entry - iterate over the linked substreams
 * @s: the iterator
 * @substream: the substream
 *
 * Iterate over the all linked substreams to the given @substream.
 * When @substream isn't linked with any others, this gives returns @substream
 * itself once.
 */
#define snd_pcm_group_for_each_entry(s, substream) \
        list_for_each_entry(s, &substream->group->substreams, link_list)

#define for_each_pcm_streams(stream)                        \
        for (stream  = SNDRV_PCM_STREAM_PLAYBACK;        \
             stream <= SNDRV_PCM_STREAM_LAST;                \
             stream++)

/**
 * snd_pcm_running - Check whether the substream is in a running state
 * @substream: substream to check
 *
 * Return: true if the given substream is in the state RUNNING, or in the
 * state DRAINING for playback.
 */
static inline int snd_pcm_running(struct snd_pcm_substream *substream)
{
        return (substream->runtime->state == SNDRV_PCM_STATE_RUNNING ||
                (substream->runtime->state == SNDRV_PCM_STATE_DRAINING &&
                 substream->stream == SNDRV_PCM_STREAM_PLAYBACK));
}

/**
 * __snd_pcm_set_state - Change the current PCM state
 * @runtime: PCM runtime to set
 * @state: the current state to set
 *
 * Call within the stream lock
 */
static inline void __snd_pcm_set_state(struct snd_pcm_runtime *runtime,
                                       snd_pcm_state_t state)
{
        runtime->state = state;
        runtime->status->state = state; /* copy for mmap */
}

/**
 * bytes_to_samples - Unit conversion of the size from bytes to samples
 * @runtime: PCM runtime instance
 * @size: size in bytes
 *
 * Return: the size in samples
 */
static inline ssize_t bytes_to_samples(struct snd_pcm_runtime *runtime, ssize_t size)
{
        return size * 8 / runtime->sample_bits;
}

/**
 * bytes_to_frames - Unit conversion of the size from bytes to frames
 * @runtime: PCM runtime instance
 * @size: size in bytes
 *
 * Return: the size in frames
 */
static inline snd_pcm_sframes_t bytes_to_frames(struct snd_pcm_runtime *runtime, ssize_t size)
{
        return size * 8 / runtime->frame_bits;
}

/**
 * samples_to_bytes - Unit conversion of the size from samples to bytes
 * @runtime: PCM runtime instance
 * @size: size in samples
 *
 * Return: the byte size
 */
static inline ssize_t samples_to_bytes(struct snd_pcm_runtime *runtime, ssize_t size)
{
        return size * runtime->sample_bits / 8;
}

/**
 * frames_to_bytes - Unit conversion of the size from frames to bytes
 * @runtime: PCM runtime instance
 * @size: size in frames
 *
 * Return: the byte size
 */
static inline ssize_t frames_to_bytes(struct snd_pcm_runtime *runtime, snd_pcm_sframes_t size)
{
        return size * runtime->frame_bits / 8;
}

/**
 * frame_aligned - Check whether the byte size is aligned to frames
 * @runtime: PCM runtime instance
 * @bytes: size in bytes
 *
 * Return: true if aligned, or false if not
 */
static inline int frame_aligned(struct snd_pcm_runtime *runtime, ssize_t bytes)
{
        return bytes % runtime->byte_align == 0;
}

/**
 * snd_pcm_lib_buffer_bytes - Get the buffer size of the current PCM in bytes
 * @substream: PCM substream
 *
 * Return: buffer byte size
 */
static inline size_t snd_pcm_lib_buffer_bytes(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return frames_to_bytes(runtime, runtime->buffer_size);
}

/**
 * snd_pcm_lib_period_bytes - Get the period size of the current PCM in bytes
 * @substream: PCM substream
 *
 * Return: period byte size
 */
static inline size_t snd_pcm_lib_period_bytes(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return frames_to_bytes(runtime, runtime->period_size);
}

/**
 * snd_pcm_playback_avail - Get the available (writable) space for playback
 * @runtime: PCM runtime instance
 *
 * Result is between 0 ... (boundary - 1)
 *
 * Return: available frame size
 */
static inline snd_pcm_uframes_t snd_pcm_playback_avail(struct snd_pcm_runtime *runtime)
{
        snd_pcm_sframes_t avail = runtime->status->hw_ptr + runtime->buffer_size - runtime->control->appl_ptr;
        if (avail < 0)
                avail += runtime->boundary;
        else if ((snd_pcm_uframes_t) avail >= runtime->boundary)
                avail -= runtime->boundary;
        return avail;
}

/**
 * snd_pcm_capture_avail - Get the available (readable) space for capture
 * @runtime: PCM runtime instance
 *
 * Result is between 0 ... (boundary - 1)
 *
 * Return: available frame size
 */
static inline snd_pcm_uframes_t snd_pcm_capture_avail(struct snd_pcm_runtime *runtime)
{
        snd_pcm_sframes_t avail = runtime->status->hw_ptr - runtime->control->appl_ptr;
        if (avail < 0)
                avail += runtime->boundary;
        return avail;
}

/**
 * snd_pcm_playback_hw_avail - Get the queued space for playback
 * @runtime: PCM runtime instance
 *
 * Return: available frame size
 */
static inline snd_pcm_sframes_t snd_pcm_playback_hw_avail(struct snd_pcm_runtime *runtime)
{
        return runtime->buffer_size - snd_pcm_playback_avail(runtime);
}

/**
 * snd_pcm_capture_hw_avail - Get the free space for capture
 * @runtime: PCM runtime instance
 *
 * Return: available frame size
 */
static inline snd_pcm_sframes_t snd_pcm_capture_hw_avail(struct snd_pcm_runtime *runtime)
{
        return runtime->buffer_size - snd_pcm_capture_avail(runtime);
}

/**
 * snd_pcm_playback_ready - check whether the playback buffer is available
 * @substream: the pcm substream instance
 *
 * Checks whether enough free space is available on the playback buffer.
 *
 * Return: Non-zero if available, or zero if not.
 */
static inline int snd_pcm_playback_ready(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return snd_pcm_playback_avail(runtime) >= runtime->control->avail_min;
}

/**
 * snd_pcm_capture_ready - check whether the capture buffer is available
 * @substream: the pcm substream instance
 *
 * Checks whether enough capture data is available on the capture buffer.
 *
 * Return: Non-zero if available, or zero if not.
 */
static inline int snd_pcm_capture_ready(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return snd_pcm_capture_avail(runtime) >= runtime->control->avail_min;
}

/**
 * snd_pcm_playback_data - check whether any data exists on the playback buffer
 * @substream: the pcm substream instance
 *
 * Checks whether any data exists on the playback buffer.
 *
 * Return: Non-zero if any data exists, or zero if not. If stop_threshold
 * is bigger or equal to boundary, then this function returns always non-zero.
 */
static inline int snd_pcm_playback_data(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        
        if (runtime->stop_threshold >= runtime->boundary)
                return 1;
        return snd_pcm_playback_avail(runtime) < runtime->buffer_size;
}

/**
 * snd_pcm_playback_empty - check whether the playback buffer is empty
 * @substream: the pcm substream instance
 *
 * Checks whether the playback buffer is empty.
 *
 * Return: Non-zero if empty, or zero if not.
 */
static inline int snd_pcm_playback_empty(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return snd_pcm_playback_avail(runtime) >= runtime->buffer_size;
}

/**
 * snd_pcm_capture_empty - check whether the capture buffer is empty
 * @substream: the pcm substream instance
 *
 * Checks whether the capture buffer is empty.
 *
 * Return: Non-zero if empty, or zero if not.
 */
static inline int snd_pcm_capture_empty(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        return snd_pcm_capture_avail(runtime) == 0;
}

/**
 * snd_pcm_trigger_done - Mark the master substream
 * @substream: the pcm substream instance
 * @master: the linked master substream
 *
 * When multiple substreams of the same card are linked and the hardware
 * supports the single-shot operation, the driver calls this in the loop
 * in snd_pcm_group_for_each_entry() for marking the substream as "done".
 * Then most of trigger operations are performed only to the given master
 * substream.
 *
 * The trigger_master mark is cleared at timestamp updates at the end
 * of trigger operations.
 */
static inline void snd_pcm_trigger_done(struct snd_pcm_substream *substream, 
                                        struct snd_pcm_substream *master)
{
        substream->runtime->trigger_master = master;
}

static inline int hw_is_mask(int var)
{
        return var >= SNDRV_PCM_HW_PARAM_FIRST_MASK &&
                var <= SNDRV_PCM_HW_PARAM_LAST_MASK;
}

static inline int hw_is_interval(int var)
{
        return var >= SNDRV_PCM_HW_PARAM_FIRST_INTERVAL &&
                var <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL;
}

static inline struct snd_mask *hw_param_mask(struct snd_pcm_hw_params *params,
                                     snd_pcm_hw_param_t var)
{
        return &params->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK];
}

static inline struct snd_interval *hw_param_interval(struct snd_pcm_hw_params *params,
                                             snd_pcm_hw_param_t var)
{
        return &params->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
}

static inline const struct snd_mask *hw_param_mask_c(const struct snd_pcm_hw_params *params,
                                             snd_pcm_hw_param_t var)
{
        return &params->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK];
}

static inline const struct snd_interval *hw_param_interval_c(const struct snd_pcm_hw_params *params,
                                                     snd_pcm_hw_param_t var)
{
        return &params->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
}

/**
 * params_channels - Get the number of channels from the hw params
 * @p: hw params
 *
 * Return: the number of channels
 */
static inline unsigned int params_channels(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_CHANNELS)->min;
}

/**
 * params_rate - Get the sample rate from the hw params
 * @p: hw params
 *
 * Return: the sample rate
 */
static inline unsigned int params_rate(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_RATE)->min;
}

/**
 * params_period_size - Get the period size (in frames) from the hw params
 * @p: hw params
 *
 * Return: the period size in frames
 */
static inline unsigned int params_period_size(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min;
}

/**
 * params_periods - Get the number of periods from the hw params
 * @p: hw params
 *
 * Return: the number of periods
 */
static inline unsigned int params_periods(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIODS)->min;
}

/**
 * params_buffer_size - Get the buffer size (in frames) from the hw params
 * @p: hw params
 *
 * Return: the buffer size in frames
 */
static inline unsigned int params_buffer_size(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min;
}

/**
 * params_buffer_bytes - Get the buffer size (in bytes) from the hw params
 * @p: hw params
 *
 * Return: the buffer size in bytes
 */
static inline unsigned int params_buffer_bytes(const struct snd_pcm_hw_params *p)
{
        return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min;
}

int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v);
int snd_interval_list(struct snd_interval *i, unsigned int count,
                      const unsigned int *list, unsigned int mask);
int snd_interval_ranges(struct snd_interval *i, unsigned int count,
                        const struct snd_interval *list, unsigned int mask);
int snd_interval_ratnum(struct snd_interval *i,
                        unsigned int rats_count, const struct snd_ratnum *rats,
                        unsigned int *nump, unsigned int *denp);

void _snd_pcm_hw_params_any(struct snd_pcm_hw_params *params);
void _snd_pcm_hw_param_setempty(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var);

int snd_pcm_hw_refine(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params);

int snd_pcm_hw_constraint_mask64(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
                                 u_int64_t mask);
int snd_pcm_hw_constraint_minmax(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
                                 unsigned int min, unsigned int max);
int snd_pcm_hw_constraint_integer(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var);
int snd_pcm_hw_constraint_list(struct snd_pcm_runtime *runtime, 
                               unsigned int cond,
                               snd_pcm_hw_param_t var,
                               const struct snd_pcm_hw_constraint_list *l);
int snd_pcm_hw_constraint_ranges(struct snd_pcm_runtime *runtime,
                                 unsigned int cond,
                                 snd_pcm_hw_param_t var,
                                 const struct snd_pcm_hw_constraint_ranges *r);
int snd_pcm_hw_constraint_ratnums(struct snd_pcm_runtime *runtime, 
                                  unsigned int cond,
                                  snd_pcm_hw_param_t var,
                                  const struct snd_pcm_hw_constraint_ratnums *r);
int snd_pcm_hw_constraint_ratdens(struct snd_pcm_runtime *runtime, 
                                  unsigned int cond,
                                  snd_pcm_hw_param_t var,
                                  const struct snd_pcm_hw_constraint_ratdens *r);
int snd_pcm_hw_constraint_msbits(struct snd_pcm_runtime *runtime, 
                                 unsigned int cond,
                                 unsigned int width,
                                 unsigned int msbits);
int snd_pcm_hw_constraint_step(struct snd_pcm_runtime *runtime,
                               unsigned int cond,
                               snd_pcm_hw_param_t var,
                               unsigned long step);
int snd_pcm_hw_constraint_pow2(struct snd_pcm_runtime *runtime,
                               unsigned int cond,
                               snd_pcm_hw_param_t var);
int snd_pcm_hw_rule_noresample(struct snd_pcm_runtime *runtime,
                               unsigned int base_rate);
int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime,
                        unsigned int cond,
                        int var,
                        snd_pcm_hw_rule_func_t func, void *private,
                        int dep, ...);

/**
 * snd_pcm_hw_constraint_single() - Constrain parameter to a single value
 * @runtime: PCM runtime instance
 * @var: The hw_params variable to constrain
 * @val: The value to constrain to
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
static inline int snd_pcm_hw_constraint_single(
        struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
        unsigned int val)
{
        return snd_pcm_hw_constraint_minmax(runtime, var, val, val);
}

int snd_pcm_format_signed(snd_pcm_format_t format);
int snd_pcm_format_unsigned(snd_pcm_format_t format);
int snd_pcm_format_linear(snd_pcm_format_t format);
int snd_pcm_format_little_endian(snd_pcm_format_t format);
int snd_pcm_format_big_endian(snd_pcm_format_t format);
#if 0 /* just for kernel-doc */
/**
 * snd_pcm_format_cpu_endian - Check the PCM format is CPU-endian
 * @format: the format to check
 *
 * Return: 1 if the given PCM format is CPU-endian, 0 if
 * opposite, or a negative error code if endian not specified.
 */
int snd_pcm_format_cpu_endian(snd_pcm_format_t format);
#endif /* DocBook */
#ifdef SNDRV_LITTLE_ENDIAN
#define snd_pcm_format_cpu_endian(format) snd_pcm_format_little_endian(format)
#else
#define snd_pcm_format_cpu_endian(format) snd_pcm_format_big_endian(format)
#endif
int snd_pcm_format_width(snd_pcm_format_t format);                        /* in bits */
int snd_pcm_format_physical_width(snd_pcm_format_t format);                /* in bits */
ssize_t snd_pcm_format_size(snd_pcm_format_t format, size_t samples);
const unsigned char *snd_pcm_format_silence_64(snd_pcm_format_t format);
int snd_pcm_format_set_silence(snd_pcm_format_t format, void *buf, unsigned int frames);

void snd_pcm_set_ops(struct snd_pcm * pcm, int direction,
                     const struct snd_pcm_ops *ops);
void snd_pcm_set_sync(struct snd_pcm_substream *substream);
int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream,
                      unsigned int cmd, void *arg);                      
void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substream);
void snd_pcm_period_elapsed(struct snd_pcm_substream *substream);
snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
                                     void *buf, bool interleaved,
                                     snd_pcm_uframes_t frames, bool in_kernel);

static inline snd_pcm_sframes_t
snd_pcm_lib_write(struct snd_pcm_substream *substream,
                  const void __user *buf, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false);
}

static inline snd_pcm_sframes_t
snd_pcm_lib_read(struct snd_pcm_substream *substream,
                 void __user *buf, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false);
}

static inline snd_pcm_sframes_t
snd_pcm_lib_writev(struct snd_pcm_substream *substream,
                   void __user **bufs, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, (void *)bufs, false, frames, false);
}

static inline snd_pcm_sframes_t
snd_pcm_lib_readv(struct snd_pcm_substream *substream,
                  void __user **bufs, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, (void *)bufs, false, frames, false);
}

static inline snd_pcm_sframes_t
snd_pcm_kernel_write(struct snd_pcm_substream *substream,
                     const void *buf, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, (void *)buf, true, frames, true);
}

static inline snd_pcm_sframes_t
snd_pcm_kernel_read(struct snd_pcm_substream *substream,
                    void *buf, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, buf, true, frames, true);
}

static inline snd_pcm_sframes_t
snd_pcm_kernel_writev(struct snd_pcm_substream *substream,
                      void **bufs, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, bufs, false, frames, true);
}

static inline snd_pcm_sframes_t
snd_pcm_kernel_readv(struct snd_pcm_substream *substream,
                     void **bufs, snd_pcm_uframes_t frames)
{
        return __snd_pcm_lib_xfer(substream, bufs, false, frames, true);
}

int snd_pcm_hw_limit_rates(struct snd_pcm_hardware *hw);

static inline int
snd_pcm_limit_hw_rates(struct snd_pcm_runtime *runtime)
{
        return snd_pcm_hw_limit_rates(&runtime->hw);
}

unsigned int snd_pcm_rate_to_rate_bit(unsigned int rate);
unsigned int snd_pcm_rate_bit_to_rate(unsigned int rate_bit);
unsigned int snd_pcm_rate_mask_intersect(unsigned int rates_a,
                                         unsigned int rates_b);
unsigned int snd_pcm_rate_range_to_bits(unsigned int rate_min,
                                        unsigned int rate_max);

/**
 * snd_pcm_set_runtime_buffer - Set the PCM runtime buffer
 * @substream: PCM substream to set
 * @bufp: the buffer information, NULL to clear
 *
 * Copy the buffer information to runtime->dma_buffer when @bufp is non-NULL.
 * Otherwise it clears the current buffer information.
 */
static inline void snd_pcm_set_runtime_buffer(struct snd_pcm_substream *substream,
                                              struct snd_dma_buffer *bufp)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (bufp) {
                runtime->dma_buffer_p = bufp;
                runtime->dma_area = bufp->area;
                runtime->dma_addr = bufp->addr;
                runtime->dma_bytes = bufp->bytes;
        } else {
                runtime->dma_buffer_p = NULL;
                runtime->dma_area = NULL;
                runtime->dma_addr = 0;
                runtime->dma_bytes = 0;
        }
}

/**
 * snd_pcm_gettime - Fill the timespec64 depending on the timestamp mode
 * @runtime: PCM runtime instance
 * @tv: timespec64 to fill
 */
static inline void snd_pcm_gettime(struct snd_pcm_runtime *runtime,
                                   struct timespec64 *tv)
{
        switch (runtime->tstamp_type) {
        case SNDRV_PCM_TSTAMP_TYPE_MONOTONIC:
                ktime_get_ts64(tv);
                break;
        case SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW:
                ktime_get_raw_ts64(tv);
                break;
        default:
                ktime_get_real_ts64(tv);
                break;
        }
}

/*
 *  Memory
 */

void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream);
void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm);
void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
                                  int type, struct device *data,
                                  size_t size, size_t max);
void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
                                          int type, void *data,
                                          size_t size, size_t max);
int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size);
int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream);

int snd_pcm_set_managed_buffer(struct snd_pcm_substream *substream, int type,
                               struct device *data, size_t size, size_t max);
int snd_pcm_set_managed_buffer_all(struct snd_pcm *pcm, int type,
                                   struct device *data,
                                   size_t size, size_t max);

/**
 * snd_pcm_set_fixed_buffer - Preallocate and set up the fixed size PCM buffer
 * @substream: the pcm substream instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 *
 * This is a variant of snd_pcm_set_managed_buffer(), but this pre-allocates
 * only the given sized buffer and doesn't allow re-allocation nor dynamic
 * allocation of a larger buffer unlike the standard one.
 * The function may return -ENOMEM error, hence the caller must check it.
 *
 * Return: zero if successful, or a negative error code
 */
static inline int __must_check
snd_pcm_set_fixed_buffer(struct snd_pcm_substream *substream, int type,
                                 struct device *data, size_t size)
{
        return snd_pcm_set_managed_buffer(substream, type, data, size, 0);
}

/**
 * snd_pcm_set_fixed_buffer_all - Preallocate and set up the fixed size PCM buffer
 * @pcm: the pcm instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 *
 * Apply the set up of the fixed buffer via snd_pcm_set_fixed_buffer() for
 * all substream.  If any of allocation fails, it returns -ENOMEM, hence the
 * caller must check the return value.
 *
 * Return: zero if successful, or a negative error code
 */
static inline int __must_check
snd_pcm_set_fixed_buffer_all(struct snd_pcm *pcm, int type,
                             struct device *data, size_t size)
{
        return snd_pcm_set_managed_buffer_all(pcm, type, data, size, 0);
}

int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream,
                                      size_t size, gfp_t gfp_flags);
int snd_pcm_lib_free_vmalloc_buffer(struct snd_pcm_substream *substream);
struct page *snd_pcm_lib_get_vmalloc_page(struct snd_pcm_substream *substream,
                                          unsigned long offset);
/**
 * snd_pcm_lib_alloc_vmalloc_buffer - allocate virtual DMA buffer
 * @substream: the substream to allocate the buffer to
 * @size: the requested buffer size, in bytes
 *
 * Allocates the PCM substream buffer using vmalloc(), i.e., the memory is
 * contiguous in kernel virtual space, but not in physical memory.  Use this
 * if the buffer is accessed by kernel code but not by device DMA.
 *
 * Return: 1 if the buffer was changed, 0 if not changed, or a negative error
 * code.
 */
static inline int snd_pcm_lib_alloc_vmalloc_buffer
                        (struct snd_pcm_substream *substream, size_t size)
{
        return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size,
                                                 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
}

/**
 * snd_pcm_lib_alloc_vmalloc_32_buffer - allocate 32-bit-addressable buffer
 * @substream: the substream to allocate the buffer to
 * @size: the requested buffer size, in bytes
 *
 * This function works like snd_pcm_lib_alloc_vmalloc_buffer(), but uses
 * vmalloc_32(), i.e., the pages are allocated from 32-bit-addressable memory.
 *
 * Return: 1 if the buffer was changed, 0 if not changed, or a negative error
 * code.
 */
static inline int snd_pcm_lib_alloc_vmalloc_32_buffer
                        (struct snd_pcm_substream *substream, size_t size)
{
        return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size,
                                                 GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
}

#define snd_pcm_get_dma_buf(substream) ((substream)->runtime->dma_buffer_p)

/**
 * snd_pcm_sgbuf_get_addr - Get the DMA address at the corresponding offset
 * @substream: PCM substream
 * @ofs: byte offset
 *
 * Return: DMA address
 */
static inline dma_addr_t
snd_pcm_sgbuf_get_addr(struct snd_pcm_substream *substream, unsigned int ofs)
{
        return snd_sgbuf_get_addr(snd_pcm_get_dma_buf(substream), ofs);
}

/**
 * snd_pcm_sgbuf_get_chunk_size - Compute the max size that fits within the
 * contig. page from the given size
 * @substream: PCM substream
 * @ofs: byte offset
 * @size: byte size to examine
 *
 * Return: chunk size
 */
static inline unsigned int
snd_pcm_sgbuf_get_chunk_size(struct snd_pcm_substream *substream,
                             unsigned int ofs, unsigned int size)
{
        return snd_sgbuf_get_chunk_size(snd_pcm_get_dma_buf(substream), ofs, size);
}

/**
 * snd_pcm_mmap_data_open - increase the mmap counter
 * @area: VMA
 *
 * PCM mmap callback should handle this counter properly
 */
static inline void snd_pcm_mmap_data_open(struct vm_area_struct *area)
{
        struct snd_pcm_substream *substream = (struct snd_pcm_substream *)area->vm_private_data;
        atomic_inc(&substream->mmap_count);
}

/**
 * snd_pcm_mmap_data_close - decrease the mmap counter
 * @area: VMA
 *
 * PCM mmap callback should handle this counter properly
 */
static inline void snd_pcm_mmap_data_close(struct vm_area_struct *area)
{
        struct snd_pcm_substream *substream = (struct snd_pcm_substream *)area->vm_private_data;
        atomic_dec(&substream->mmap_count);
}

int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
                             struct vm_area_struct *area);
/* mmap for io-memory area */
#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_ALPHA)
#define SNDRV_PCM_INFO_MMAP_IOMEM        SNDRV_PCM_INFO_MMAP
int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area);
#else
#define SNDRV_PCM_INFO_MMAP_IOMEM        0
#define snd_pcm_lib_mmap_iomem        NULL
#endif

/**
 * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer
 * @dma: DMA number
 * @max: pointer to store the max size
 */
static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max)
{
        *max = dma < 4 ? 64 * 1024 : 128 * 1024;
}

/*
 *  Misc
 */

#define SNDRV_PCM_DEFAULT_CON_SPDIF        (IEC958_AES0_CON_EMPHASIS_NONE|\
                                         (IEC958_AES1_CON_ORIGINAL<<8)|\
                                         (IEC958_AES1_CON_PCM_CODER<<8)|\
                                         (IEC958_AES3_CON_FS_48000<<24))

const char *snd_pcm_format_name(snd_pcm_format_t format);

/**
 * snd_pcm_direction_name - Get a string naming the direction of a stream
 * @direction: Stream's direction, one of SNDRV_PCM_STREAM_XXX
 *
 * Returns a string naming the direction of the stream.
 */
static inline const char *snd_pcm_direction_name(int direction)
{
        if (direction == SNDRV_PCM_STREAM_PLAYBACK)
                return "Playback";
        else
                return "Capture";
}

/**
 * snd_pcm_stream_str - Get a string naming the direction of a stream
 * @substream: the pcm substream instance
 *
 * Return: A string naming the direction of the stream.
 */
static inline const char *snd_pcm_stream_str(struct snd_pcm_substream *substream)
{
        return snd_pcm_direction_name(substream->stream);
}

/*
 * PCM channel-mapping control API
 */
/* array element of channel maps */
struct snd_pcm_chmap_elem {
        unsigned char channels;
        unsigned char map[15];
};

/* channel map information; retrieved via snd_kcontrol_chip() */
struct snd_pcm_chmap {
        struct snd_pcm *pcm;        /* assigned PCM instance */
        int stream;                /* PLAYBACK or CAPTURE */
        struct snd_kcontrol *kctl;
        const struct snd_pcm_chmap_elem *chmap;
        unsigned int max_channels;
        unsigned int channel_mask;        /* optional: active channels bitmask */
        void *private_data;        /* optional: private data pointer */
};

/**
 * snd_pcm_chmap_substream - get the PCM substream assigned to the given chmap info
 * @info: chmap information
 * @idx: the substream number index
 *
 * Return: the matched PCM substream, or NULL if not found
 */
static inline struct snd_pcm_substream *
snd_pcm_chmap_substream(struct snd_pcm_chmap *info, unsigned int idx)
{
        struct snd_pcm_substream *s;
        for (s = info->pcm->streams[info->stream].substream; s; s = s->next)
                if (s->number == idx)
                        return s;
        return NULL;
}

/* ALSA-standard channel maps (RL/RR prior to C/LFE) */
extern const struct snd_pcm_chmap_elem snd_pcm_std_chmaps[];
/* Other world's standard channel maps (C/LFE prior to RL/RR) */
extern const struct snd_pcm_chmap_elem snd_pcm_alt_chmaps[];

/* bit masks to be passed to snd_pcm_chmap.channel_mask field */
#define SND_PCM_CHMAP_MASK_24        ((1U << 2) | (1U << 4))
#define SND_PCM_CHMAP_MASK_246        (SND_PCM_CHMAP_MASK_24 | (1U << 6))
#define SND_PCM_CHMAP_MASK_2468        (SND_PCM_CHMAP_MASK_246 | (1U << 8))

int snd_pcm_add_chmap_ctls(struct snd_pcm *pcm, int stream,
                           const struct snd_pcm_chmap_elem *chmap,
                           int max_channels,
                           unsigned long private_value,
                           struct snd_pcm_chmap **info_ret);

/**
 * pcm_format_to_bits - Strong-typed conversion of pcm_format to bitwise
 * @pcm_format: PCM format
 *
 * Return: 64bit mask corresponding to the given PCM format
 */
static inline u64 pcm_format_to_bits(snd_pcm_format_t pcm_format)
{
        return 1ULL << (__force int) pcm_format;
}

/**
 * pcm_for_each_format - helper to iterate for each format type
 * @f: the iterator variable in snd_pcm_format_t type
 */
#define pcm_for_each_format(f)                                                \
        for ((f) = SNDRV_PCM_FORMAT_FIRST;                                \
             (__force int)(f) <= (__force int)SNDRV_PCM_FORMAT_LAST;        \
             (f) = (__force snd_pcm_format_t)((__force int)(f) + 1))

/* printk helpers */
#define pcm_err(pcm, fmt, args...) \
        dev_err((pcm)->card->dev, fmt, ##args)
#define pcm_warn(pcm, fmt, args...) \
        dev_warn((pcm)->card->dev, fmt, ##args)
#define pcm_dbg(pcm, fmt, args...) \
        dev_dbg((pcm)->card->dev, fmt, ##args)

/* helpers for copying between iov_iter and iomem */
int copy_to_iter_fromio(struct iov_iter *itert, const void __iomem *src,
                        size_t count);
int copy_from_iter_toio(void __iomem *dst, struct iov_iter *iter, size_t count);

struct snd_pcm_status64 {
        snd_pcm_state_t state;                /* stream state */
        u8 rsvd[4];
        s64 trigger_tstamp_sec;                /* time when stream was started/stopped/paused */
        s64 trigger_tstamp_nsec;
        s64 tstamp_sec;                        /* reference timestamp */
        s64 tstamp_nsec;
        snd_pcm_uframes_t appl_ptr;        /* appl ptr */
        snd_pcm_uframes_t hw_ptr;        /* hw ptr */
        snd_pcm_sframes_t delay;        /* current delay in frames */
        snd_pcm_uframes_t avail;        /* number of frames available */
        snd_pcm_uframes_t avail_max;        /* max frames available on hw since last status */
        snd_pcm_uframes_t overrange;        /* count of ADC (capture) overrange detections from last status */
        snd_pcm_state_t suspended_state; /* suspended stream state */
        __u32 audio_tstamp_data;         /* needed for 64-bit alignment, used for configs/report to/from userspace */
        s64 audio_tstamp_sec;                /* sample counter, wall clock, PHC or on-demand sync'ed */
        s64 audio_tstamp_nsec;
        s64 driver_tstamp_sec;                /* useful in case reference system tstamp is reported with delay */
        s64 driver_tstamp_nsec;
        __u32 audio_tstamp_accuracy;        /* in ns units, only valid if indicated in audio_tstamp_data */
        unsigned char reserved[52-4*sizeof(s64)]; /* must be filled with zero */
};

#define SNDRV_PCM_IOCTL_STATUS64        _IOR('A', 0x20, struct snd_pcm_status64)
#define SNDRV_PCM_IOCTL_STATUS_EXT64        _IOWR('A', 0x24, struct snd_pcm_status64)

struct snd_pcm_status32 {
        snd_pcm_state_t state;                /* stream state */
        s32 trigger_tstamp_sec;        /* time when stream was started/stopped/paused */
        s32 trigger_tstamp_nsec;
        s32 tstamp_sec;                /* reference timestamp */
        s32 tstamp_nsec;
        u32 appl_ptr;                /* appl ptr */
        u32 hw_ptr;                /* hw ptr */
        s32 delay;                /* current delay in frames */
        u32 avail;                /* number of frames available */
        u32 avail_max;                /* max frames available on hw since last status */
        u32 overrange;                /* count of ADC (capture) overrange detections from last status */
        snd_pcm_state_t suspended_state;        /* suspended stream state */
        u32 audio_tstamp_data;        /* needed for 64-bit alignment, used for configs/report to/from userspace */
        s32 audio_tstamp_sec;        /* sample counter, wall clock, PHC or on-demand sync'ed */
        s32 audio_tstamp_nsec;
        s32 driver_tstamp_sec;        /* useful in case reference system tstamp is reported with delay */
        s32 driver_tstamp_nsec;
        u32 audio_tstamp_accuracy;        /* in ns units, only valid if indicated in audio_tstamp_data */
        unsigned char reserved[52-4*sizeof(s32)]; /* must be filled with zero */
};

#define SNDRV_PCM_IOCTL_STATUS32        _IOR('A', 0x20, struct snd_pcm_status32)
#define SNDRV_PCM_IOCTL_STATUS_EXT32        _IOWR('A', 0x24, struct snd_pcm_status32)

#endif /* __SOUND_PCM_H */
























































   55 
























































































































































































































































































































   55 




















   55 































































































































































































































































































































































































































































   55 
   55 


   55 





   55 



















   55 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
// SPDX-License-Identifier: GPL-2.0
/*
 * device_cgroup.c - device cgroup subsystem
 *
 * Copyright 2007 IBM Corp
 */

#include <linux/bpf-cgroup.h>
#include <linux/device_cgroup.h>
#include <linux/cgroup.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/mutex.h>

#ifdef CONFIG_CGROUP_DEVICE

static DEFINE_MUTEX(devcgroup_mutex);

enum devcg_behavior {
        DEVCG_DEFAULT_NONE,
        DEVCG_DEFAULT_ALLOW,
        DEVCG_DEFAULT_DENY,
};

/*
 * exception list locking rules:
 * hold devcgroup_mutex for update/read.
 * hold rcu_read_lock() for read.
 */

struct dev_exception_item {
        u32 major, minor;
        short type;
        short access;
        struct list_head list;
        struct rcu_head rcu;
};

struct dev_cgroup {
        struct cgroup_subsys_state css;
        struct list_head exceptions;
        enum devcg_behavior behavior;
};

static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
{
        return s ? container_of(s, struct dev_cgroup, css) : NULL;
}

static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
{
        return css_to_devcgroup(task_css(task, devices_cgrp_id));
}

/*
 * called under devcgroup_mutex
 */
static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
{
        struct dev_exception_item *ex, *tmp, *new;

        lockdep_assert_held(&devcgroup_mutex);

        list_for_each_entry(ex, orig, list) {
                new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
                if (!new)
                        goto free_and_exit;
                list_add_tail(&new->list, dest);
        }

        return 0;

free_and_exit:
        list_for_each_entry_safe(ex, tmp, dest, list) {
                list_del(&ex->list);
                kfree(ex);
        }
        return -ENOMEM;
}

static void dev_exceptions_move(struct list_head *dest, struct list_head *orig)
{
        struct dev_exception_item *ex, *tmp;

        lockdep_assert_held(&devcgroup_mutex);

        list_for_each_entry_safe(ex, tmp, orig, list) {
                list_move_tail(&ex->list, dest);
        }
}

/*
 * called under devcgroup_mutex
 */
static int dev_exception_add(struct dev_cgroup *dev_cgroup,
                             struct dev_exception_item *ex)
{
        struct dev_exception_item *excopy, *walk;

        lockdep_assert_held(&devcgroup_mutex);

        excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
        if (!excopy)
                return -ENOMEM;

        list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
                if (walk->type != ex->type)
                        continue;
                if (walk->major != ex->major)
                        continue;
                if (walk->minor != ex->minor)
                        continue;

                walk->access |= ex->access;
                kfree(excopy);
                excopy = NULL;
        }

        if (excopy != NULL)
                list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
        return 0;
}

/*
 * called under devcgroup_mutex
 */
static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
                             struct dev_exception_item *ex)
{
        struct dev_exception_item *walk, *tmp;

        lockdep_assert_held(&devcgroup_mutex);

        list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
                if (walk->type != ex->type)
                        continue;
                if (walk->major != ex->major)
                        continue;
                if (walk->minor != ex->minor)
                        continue;

                walk->access &= ~ex->access;
                if (!walk->access) {
                        list_del_rcu(&walk->list);
                        kfree_rcu(walk, rcu);
                }
        }
}

static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
{
        struct dev_exception_item *ex, *tmp;

        list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
                list_del_rcu(&ex->list);
                kfree_rcu(ex, rcu);
        }
}

/**
 * dev_exception_clean - frees all entries of the exception list
 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
 *
 * called under devcgroup_mutex
 */
static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
{
        lockdep_assert_held(&devcgroup_mutex);

        __dev_exception_clean(dev_cgroup);
}

static inline bool is_devcg_online(const struct dev_cgroup *devcg)
{
        return (devcg->behavior != DEVCG_DEFAULT_NONE);
}

/**
 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
 *                       parent's
 * @css: css getting online
 * returns 0 in case of success, error code otherwise
 */
static int devcgroup_online(struct cgroup_subsys_state *css)
{
        struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
        struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
        int ret = 0;

        mutex_lock(&devcgroup_mutex);

        if (parent_dev_cgroup == NULL)
                dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
        else {
                ret = dev_exceptions_copy(&dev_cgroup->exceptions,
                                          &parent_dev_cgroup->exceptions);
                if (!ret)
                        dev_cgroup->behavior = parent_dev_cgroup->behavior;
        }
        mutex_unlock(&devcgroup_mutex);

        return ret;
}

static void devcgroup_offline(struct cgroup_subsys_state *css)
{
        struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);

        mutex_lock(&devcgroup_mutex);
        dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
        mutex_unlock(&devcgroup_mutex);
}

/*
 * called from kernel/cgroup/cgroup.c with cgroup_lock() held.
 */
static struct cgroup_subsys_state *
devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
        struct dev_cgroup *dev_cgroup;

        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
        if (!dev_cgroup)
                return ERR_PTR(-ENOMEM);
        INIT_LIST_HEAD(&dev_cgroup->exceptions);
        dev_cgroup->behavior = DEVCG_DEFAULT_NONE;

        return &dev_cgroup->css;
}

static void devcgroup_css_free(struct cgroup_subsys_state *css)
{
        struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);

        __dev_exception_clean(dev_cgroup);
        kfree(dev_cgroup);
}

#define DEVCG_ALLOW 1
#define DEVCG_DENY 2
#define DEVCG_LIST 3

#define MAJMINLEN 13
#define ACCLEN 4

static void set_access(char *acc, short access)
{
        int idx = 0;
        memset(acc, 0, ACCLEN);
        if (access & DEVCG_ACC_READ)
                acc[idx++] = 'r';
        if (access & DEVCG_ACC_WRITE)
                acc[idx++] = 'w';
        if (access & DEVCG_ACC_MKNOD)
                acc[idx++] = 'm';
}

static char type_to_char(short type)
{
        if (type == DEVCG_DEV_ALL)
                return 'a';
        if (type == DEVCG_DEV_CHAR)
                return 'c';
        if (type == DEVCG_DEV_BLOCK)
                return 'b';
        return 'X';
}

static void set_majmin(char *str, unsigned m)
{
        if (m == ~0)
                strcpy(str, "*");
        else
                sprintf(str, "%u", m);
}

static int devcgroup_seq_show(struct seq_file *m, void *v)
{
        struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
        struct dev_exception_item *ex;
        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];

        rcu_read_lock();
        /*
         * To preserve the compatibility:
         * - Only show the "all devices" when the default policy is to allow
         * - List the exceptions in case the default policy is to deny
         * This way, the file remains as a "whitelist of devices"
         */
        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
                set_access(acc, DEVCG_ACC_MASK);
                set_majmin(maj, ~0);
                set_majmin(min, ~0);
                seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL),
                           maj, min, acc);
        } else {
                list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
                        set_access(acc, ex->access);
                        set_majmin(maj, ex->major);
                        set_majmin(min, ex->minor);
                        seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
                                   maj, min, acc);
                }
        }
        rcu_read_unlock();

        return 0;
}

/**
 * match_exception        - iterates the exception list trying to find a complete match
 * @exceptions: list of exceptions
 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
 *
 * It is considered a complete match if an exception is found that will
 * contain the entire range of provided parameters.
 *
 * Return: true in case it matches an exception completely
 */
static bool match_exception(struct list_head *exceptions, short type,
                            u32 major, u32 minor, short access)
{
        struct dev_exception_item *ex;

        list_for_each_entry_rcu(ex, exceptions, list) {
                if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
                        continue;
                if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
                        continue;
                if (ex->major != ~0 && ex->major != major)
                        continue;
                if (ex->minor != ~0 && ex->minor != minor)
                        continue;
                /* provided access cannot have more than the exception rule */
                if (access & (~ex->access))
                        continue;
                return true;
        }
        return false;
}

/**
 * match_exception_partial - iterates the exception list trying to find a partial match
 * @exceptions: list of exceptions
 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
 *
 * It is considered a partial match if an exception's range is found to
 * contain *any* of the devices specified by provided parameters. This is
 * used to make sure no extra access is being granted that is forbidden by
 * any of the exception list.
 *
 * Return: true in case the provided range mat matches an exception completely
 */
static bool match_exception_partial(struct list_head *exceptions, short type,
                                    u32 major, u32 minor, short access)
{
        struct dev_exception_item *ex;

        list_for_each_entry_rcu(ex, exceptions, list,
                                lockdep_is_held(&devcgroup_mutex)) {
                if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
                        continue;
                if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
                        continue;
                /*
                 * We must be sure that both the exception and the provided
                 * range aren't masking all devices
                 */
                if (ex->major != ~0 && major != ~0 && ex->major != major)
                        continue;
                if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
                        continue;
                /*
                 * In order to make sure the provided range isn't matching
                 * an exception, all its access bits shouldn't match the
                 * exception's access bits
                 */
                if (!(access & ex->access))
                        continue;
                return true;
        }
        return false;
}

/**
 * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
 * @dev_cgroup: dev cgroup to be tested against
 * @refex: new exception
 * @behavior: behavior of the exception's dev_cgroup
 *
 * This is used to make sure a child cgroup won't have more privileges
 * than its parent
 */
static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
                          struct dev_exception_item *refex,
                          enum devcg_behavior behavior)
{
        bool match = false;

        RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
                         !lockdep_is_held(&devcgroup_mutex),
                         "device_cgroup:verify_new_ex called without proper synchronization");

        if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
                if (behavior == DEVCG_DEFAULT_ALLOW) {
                        /*
                         * new exception in the child doesn't matter, only
                         * adding extra restrictions
                         */ 
                        return true;
                } else {
                        /*
                         * new exception in the child will add more devices
                         * that can be accessed, so it can't match any of
                         * parent's exceptions, even slightly
                         */ 
                        match = match_exception_partial(&dev_cgroup->exceptions,
                                                        refex->type,
                                                        refex->major,
                                                        refex->minor,
                                                        refex->access);

                        if (match)
                                return false;
                        return true;
                }
        } else {
                /*
                 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
                 * the new exception will add access to more devices and must
                 * be contained completely in an parent's exception to be
                 * allowed
                 */
                match = match_exception(&dev_cgroup->exceptions, refex->type,
                                        refex->major, refex->minor,
                                        refex->access);

                if (match)
                        /* parent has an exception that matches the proposed */
                        return true;
                else
                        return false;
        }
        return false;
}

/*
 * parent_has_perm:
 * when adding a new allow rule to a device exception list, the rule
 * must be allowed in the parent device
 */
static int parent_has_perm(struct dev_cgroup *childcg,
                                  struct dev_exception_item *ex)
{
        struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);

        if (!parent)
                return 1;
        return verify_new_ex(parent, ex, childcg->behavior);
}

/**
 * parent_allows_removal - verify if it's ok to remove an exception
 * @childcg: child cgroup from where the exception will be removed
 * @ex: exception being removed
 *
 * When removing an exception in cgroups with default ALLOW policy, it must
 * be checked if removing it will give the child cgroup more access than the
 * parent.
 *
 * Return: true if it's ok to remove exception, false otherwise
 */
static bool parent_allows_removal(struct dev_cgroup *childcg,
                                  struct dev_exception_item *ex)
{
        struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);

        if (!parent)
                return true;

        /* It's always allowed to remove access to devices */
        if (childcg->behavior == DEVCG_DEFAULT_DENY)
                return true;

        /*
         * Make sure you're not removing part or a whole exception existing in
         * the parent cgroup
         */
        return !match_exception_partial(&parent->exceptions, ex->type,
                                        ex->major, ex->minor, ex->access);
}

/**
 * may_allow_all - checks if it's possible to change the behavior to
 *                   allow based on parent's rules.
 * @parent: device cgroup's parent
 * returns: != 0 in case it's allowed, 0 otherwise
 */
static inline int may_allow_all(struct dev_cgroup *parent)
{
        if (!parent)
                return 1;
        return parent->behavior == DEVCG_DEFAULT_ALLOW;
}

/**
 * revalidate_active_exceptions - walks through the active exception list and
 *                                   revalidates the exceptions based on parent's
 *                                   behavior and exceptions. The exceptions that
 *                                   are no longer valid will be removed.
 *                                   Called with devcgroup_mutex held.
 * @devcg: cgroup which exceptions will be checked
 *
 * This is one of the three key functions for hierarchy implementation.
 * This function is responsible for re-evaluating all the cgroup's active
 * exceptions due to a parent's exception change.
 * Refer to Documentation/admin-guide/cgroup-v1/devices.rst for more details.
 */
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
        struct dev_exception_item *ex;
        struct list_head *this, *tmp;

        list_for_each_safe(this, tmp, &devcg->exceptions) {
                ex = container_of(this, struct dev_exception_item, list);
                if (!parent_has_perm(devcg, ex))
                        dev_exception_rm(devcg, ex);
        }
}

/**
 * propagate_exception - propagates a new exception to the children
 * @devcg_root: device cgroup that added a new exception
 * @ex: new exception to be propagated
 *
 * returns: 0 in case of success, != 0 in case of error
 */
static int propagate_exception(struct dev_cgroup *devcg_root,
                               struct dev_exception_item *ex)
{
        struct cgroup_subsys_state *pos;
        int rc = 0;

        rcu_read_lock();

        css_for_each_descendant_pre(pos, &devcg_root->css) {
                struct dev_cgroup *devcg = css_to_devcgroup(pos);

                /*
                 * Because devcgroup_mutex is held, no devcg will become
                 * online or offline during the tree walk (see on/offline
                 * methods), and online ones are safe to access outside RCU
                 * read lock without bumping refcnt.
                 */
                if (pos == &devcg_root->css || !is_devcg_online(devcg))
                        continue;

                rcu_read_unlock();

                /*
                 * in case both root's behavior and devcg is allow, a new
                 * restriction means adding to the exception list
                 */
                if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
                    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
                        rc = dev_exception_add(devcg, ex);
                        if (rc)
                                return rc;
                } else {
                        /*
                         * in the other possible cases:
                         * root's behavior: allow, devcg's: deny
                         * root's behavior: deny, devcg's: deny
                         * the exception will be removed
                         */
                        dev_exception_rm(devcg, ex);
                }
                revalidate_active_exceptions(devcg);

                rcu_read_lock();
        }

        rcu_read_unlock();
        return rc;
}

/*
 * Modify the exception list using allow/deny rules.
 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 * so we can give a container CAP_MKNOD to let it create devices but not
 * modify the exception list.
 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
 *
 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 * new access is only allowed if you're in the top-level cgroup, or your
 * parent cgroup has the access you're asking for.
 */
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
                                   int filetype, char *buffer)
{
        const char *b;
        char temp[12];                /* 11 + 1 characters needed for a u32 */
        int count, rc = 0;
        struct dev_exception_item ex;
        struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
        struct dev_cgroup tmp_devcgrp;

        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

        memset(&ex, 0, sizeof(ex));
        memset(&tmp_devcgrp, 0, sizeof(tmp_devcgrp));
        b = buffer;

        switch (*b) {
        case 'a':
                switch (filetype) {
                case DEVCG_ALLOW:
                        if (css_has_online_children(&devcgroup->css))
                                return -EINVAL;

                        if (!may_allow_all(parent))
                                return -EPERM;
                        if (!parent) {
                                devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
                                dev_exception_clean(devcgroup);
                                break;
                        }

                        INIT_LIST_HEAD(&tmp_devcgrp.exceptions);
                        rc = dev_exceptions_copy(&tmp_devcgrp.exceptions,
                                                 &devcgroup->exceptions);
                        if (rc)
                                return rc;
                        dev_exception_clean(devcgroup);
                        rc = dev_exceptions_copy(&devcgroup->exceptions,
                                                 &parent->exceptions);
                        if (rc) {
                                dev_exceptions_move(&devcgroup->exceptions,
                                                    &tmp_devcgrp.exceptions);
                                return rc;
                        }
                        devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
                        dev_exception_clean(&tmp_devcgrp);
                        break;
                case DEVCG_DENY:
                        if (css_has_online_children(&devcgroup->css))
                                return -EINVAL;

                        dev_exception_clean(devcgroup);
                        devcgroup->behavior = DEVCG_DEFAULT_DENY;
                        break;
                default:
                        return -EINVAL;
                }
                return 0;
        case 'b':
                ex.type = DEVCG_DEV_BLOCK;
                break;
        case 'c':
                ex.type = DEVCG_DEV_CHAR;
                break;
        default:
                return -EINVAL;
        }
        b++;
        if (!isspace(*b))
                return -EINVAL;
        b++;
        if (*b == '*') {
                ex.major = ~0;
                b++;
        } else if (isdigit(*b)) {
                memset(temp, 0, sizeof(temp));
                for (count = 0; count < sizeof(temp) - 1; count++) {
                        temp[count] = *b;
                        b++;
                        if (!isdigit(*b))
                                break;
                }
                rc = kstrtou32(temp, 10, &ex.major);
                if (rc)
                        return -EINVAL;
        } else {
                return -EINVAL;
        }
        if (*b != ':')
                return -EINVAL;
        b++;

        /* read minor */
        if (*b == '*') {
                ex.minor = ~0;
                b++;
        } else if (isdigit(*b)) {
                memset(temp, 0, sizeof(temp));
                for (count = 0; count < sizeof(temp) - 1; count++) {
                        temp[count] = *b;
                        b++;
                        if (!isdigit(*b))
                                break;
                }
                rc = kstrtou32(temp, 10, &ex.minor);
                if (rc)
                        return -EINVAL;
        } else {
                return -EINVAL;
        }
        if (!isspace(*b))
                return -EINVAL;
        for (b++, count = 0; count < 3; count++, b++) {
                switch (*b) {
                case 'r':
                        ex.access |= DEVCG_ACC_READ;
                        break;
                case 'w':
                        ex.access |= DEVCG_ACC_WRITE;
                        break;
                case 'm':
                        ex.access |= DEVCG_ACC_MKNOD;
                        break;
                case '\n':
                case '\0':
                        count = 3;
                        break;
                default:
                        return -EINVAL;
                }
        }

        switch (filetype) {
        case DEVCG_ALLOW:
                /*
                 * If the default policy is to allow by default, try to remove
                 * an matching exception instead. And be silent about it: we
                 * don't want to break compatibility
                 */
                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
                        /* Check if the parent allows removing it first */
                        if (!parent_allows_removal(devcgroup, &ex))
                                return -EPERM;
                        dev_exception_rm(devcgroup, &ex);
                        break;
                }

                if (!parent_has_perm(devcgroup, &ex))
                        return -EPERM;
                rc = dev_exception_add(devcgroup, &ex);
                break;
        case DEVCG_DENY:
                /*
                 * If the default policy is to deny by default, try to remove
                 * an matching exception instead. And be silent about it: we
                 * don't want to break compatibility
                 */
                if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
                        dev_exception_rm(devcgroup, &ex);
                else
                        rc = dev_exception_add(devcgroup, &ex);

                if (rc)
                        break;
                /* we only propagate new restrictions */
                rc = propagate_exception(devcgroup, &ex);
                break;
        default:
                rc = -EINVAL;
        }
        return rc;
}

static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
                                      char *buf, size_t nbytes, loff_t off)
{
        int retval;

        mutex_lock(&devcgroup_mutex);
        retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
                                         of_cft(of)->private, strstrip(buf));
        mutex_unlock(&devcgroup_mutex);
        return retval ?: nbytes;
}

static struct cftype dev_cgroup_files[] = {
        {
                .name = "allow",
                .write = devcgroup_access_write,
                .private = DEVCG_ALLOW,
        },
        {
                .name = "deny",
                .write = devcgroup_access_write,
                .private = DEVCG_DENY,
        },
        {
                .name = "list",
                .seq_show = devcgroup_seq_show,
                .private = DEVCG_LIST,
        },
        { }        /* terminate */
};

struct cgroup_subsys devices_cgrp_subsys = {
        .css_alloc = devcgroup_css_alloc,
        .css_free = devcgroup_css_free,
        .css_online = devcgroup_online,
        .css_offline = devcgroup_offline,
        .legacy_cftypes = dev_cgroup_files,
};

/**
 * devcgroup_legacy_check_permission - checks if an inode operation is permitted
 * @type: device type
 * @major: device major number
 * @minor: device minor number
 * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD
 *
 * returns 0 on success, -EPERM case the operation is not permitted
 */
static int devcgroup_legacy_check_permission(short type, u32 major, u32 minor,
                                        short access)
{
        struct dev_cgroup *dev_cgroup;
        bool rc;

        rcu_read_lock();
        dev_cgroup = task_devcgroup(current);
        if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
                /* Can't match any of the exceptions, even partially */
                rc = !match_exception_partial(&dev_cgroup->exceptions,
                                              type, major, minor, access);
        else
                /* Need to match completely one exception to be allowed */
                rc = match_exception(&dev_cgroup->exceptions, type, major,
                                     minor, access);
        rcu_read_unlock();

        if (!rc)
                return -EPERM;

        return 0;
}

#endif /* CONFIG_CGROUP_DEVICE */

#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)

int devcgroup_check_permission(short type, u32 major, u32 minor, short access)
{
        int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);

        if (rc)
                return rc;

        #ifdef CONFIG_CGROUP_DEVICE
        return devcgroup_legacy_check_permission(type, major, minor, access);

        #else /* CONFIG_CGROUP_DEVICE */
        return 0;

        #endif /* CONFIG_CGROUP_DEVICE */
}
EXPORT_SYMBOL(devcgroup_check_permission);
#endif /* defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) */























    8 





    8 






    8 





    8 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PGALLOC_TRACK_H
#define _LINUX_PGALLOC_TRACK_H

#if defined(CONFIG_MMU)
static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
                                     unsigned long address,
                                     pgtbl_mod_mask *mod_mask)
{
        if (unlikely(pgd_none(*pgd))) {
                if (__p4d_alloc(mm, pgd, address))
                        return NULL;
                *mod_mask |= PGTBL_PGD_MODIFIED;
        }

        return p4d_offset(pgd, address);
}

static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
                                     unsigned long address,
                                     pgtbl_mod_mask *mod_mask)
{
        if (unlikely(p4d_none(*p4d))) {
                if (__pud_alloc(mm, p4d, address))
                        return NULL;
                *mod_mask |= PGTBL_P4D_MODIFIED;
        }

        return pud_offset(p4d, address);
}

static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
                                     unsigned long address,
                                     pgtbl_mod_mask *mod_mask)
{
        if (unlikely(pud_none(*pud))) {
                if (__pmd_alloc(mm, pud, address))
                        return NULL;
                *mod_mask |= PGTBL_PUD_MODIFIED;
        }

        return pmd_offset(pud, address);
}
#endif /* CONFIG_MMU */

#define pte_alloc_kernel_track(pmd, address, mask)                        \
        ((unlikely(pmd_none(*(pmd))) &&                                        \
          (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
                NULL: pte_offset_kernel(pmd, address))

#endif /* _LINUX_PGALLOC_TRACK_H */





























































































































































































































































































































































































































































































































    1 



    1 



    1 



    1 









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
// SPDX-License-Identifier: GPL-2.0
/*
  USB Driver layer for GSM modems

  Copyright (C) 2005  Matthias Urlichs <smurf@smurf.noris.de>

  Portions copied from the Keyspan driver by Hugh Blemings <hugh@blemings.org>

  History: see the git log.

  Work sponsored by: Sigos GmbH, Germany <info@sigos.de>

  This driver exists because the "normal" serial driver doesn't work too well
  with GSM modems. Issues:
  - data loss -- one single Receive URB is not nearly enough
  - controlling the baud rate doesn't make sense
*/

#define DRIVER_AUTHOR "Matthias Urlichs <smurf@smurf.noris.de>"
#define DRIVER_DESC "USB Driver for GSM modems"

#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/cdc.h>
#include <linux/usb/serial.h>
#include <linux/serial.h>
#include "usb-wwan.h"

/*
 * Generate DTR/RTS signals on the port using the SET_CONTROL_LINE_STATE request
 * in CDC ACM.
 */
static int usb_wwan_send_setup(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct usb_wwan_port_private *portdata;
        int val = 0;
        int ifnum;
        int res;

        portdata = usb_get_serial_port_data(port);

        if (portdata->dtr_state)
                val |= USB_CDC_CTRL_DTR;
        if (portdata->rts_state)
                val |= USB_CDC_CTRL_RTS;

        ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber;

        res = usb_autopm_get_interface(serial->interface);
        if (res)
                return res;

        res = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
                                USB_CDC_REQ_SET_CONTROL_LINE_STATE,
                                USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                                val, ifnum, NULL, 0, USB_CTRL_SET_TIMEOUT);

        usb_autopm_put_interface(port->serial->interface);

        return res;
}

void usb_wwan_dtr_rts(struct usb_serial_port *port, int on)
{
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata;

        intfdata = usb_get_serial_data(port->serial);

        if (!intfdata->use_send_setup)
                return;

        portdata = usb_get_serial_port_data(port);
        /* FIXME: locking */
        portdata->rts_state = on;
        portdata->dtr_state = on;

        usb_wwan_send_setup(port);
}
EXPORT_SYMBOL(usb_wwan_dtr_rts);

int usb_wwan_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        unsigned int value;
        struct usb_wwan_port_private *portdata;

        portdata = usb_get_serial_port_data(port);

        value = ((portdata->rts_state) ? TIOCM_RTS : 0) |
            ((portdata->dtr_state) ? TIOCM_DTR : 0) |
            ((portdata->cts_state) ? TIOCM_CTS : 0) |
            ((portdata->dsr_state) ? TIOCM_DSR : 0) |
            ((portdata->dcd_state) ? TIOCM_CAR : 0) |
            ((portdata->ri_state) ? TIOCM_RNG : 0);

        return value;
}
EXPORT_SYMBOL(usb_wwan_tiocmget);

int usb_wwan_tiocmset(struct tty_struct *tty,
                      unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata;

        portdata = usb_get_serial_port_data(port);
        intfdata = usb_get_serial_data(port->serial);

        if (!intfdata->use_send_setup)
                return -EINVAL;

        /* FIXME: what locks portdata fields ? */
        if (set & TIOCM_RTS)
                portdata->rts_state = 1;
        if (set & TIOCM_DTR)
                portdata->dtr_state = 1;

        if (clear & TIOCM_RTS)
                portdata->rts_state = 0;
        if (clear & TIOCM_DTR)
                portdata->dtr_state = 0;
        return usb_wwan_send_setup(port);
}
EXPORT_SYMBOL(usb_wwan_tiocmset);

int usb_wwan_write(struct tty_struct *tty, struct usb_serial_port *port,
                   const unsigned char *buf, int count)
{
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata;
        int i;
        int left, todo;
        struct urb *this_urb = NULL;        /* spurious */
        int err;
        unsigned long flags;

        portdata = usb_get_serial_port_data(port);
        intfdata = usb_get_serial_data(port->serial);

        dev_dbg(&port->dev, "%s: write (%d chars)\n", __func__, count);

        left = count;
        for (i = 0; left > 0 && i < N_OUT_URB; i++) {
                todo = left;
                if (todo > OUT_BUFLEN)
                        todo = OUT_BUFLEN;

                this_urb = portdata->out_urbs[i];
                if (test_and_set_bit(i, &portdata->out_busy)) {
                        if (time_before(jiffies,
                                        portdata->tx_start_time[i] + 10 * HZ))
                                continue;
                        usb_unlink_urb(this_urb);
                        continue;
                }
                dev_dbg(&port->dev, "%s: endpoint %d buf %d\n", __func__,
                        usb_pipeendpoint(this_urb->pipe), i);

                err = usb_autopm_get_interface_async(port->serial->interface);
                if (err < 0) {
                        clear_bit(i, &portdata->out_busy);
                        break;
                }

                /* send the data */
                memcpy(this_urb->transfer_buffer, buf, todo);
                this_urb->transfer_buffer_length = todo;

                spin_lock_irqsave(&intfdata->susp_lock, flags);
                if (intfdata->suspended) {
                        usb_anchor_urb(this_urb, &portdata->delayed);
                        spin_unlock_irqrestore(&intfdata->susp_lock, flags);
                } else {
                        intfdata->in_flight++;
                        spin_unlock_irqrestore(&intfdata->susp_lock, flags);
                        err = usb_submit_urb(this_urb, GFP_ATOMIC);
                        if (err) {
                                dev_err(&port->dev,
                                        "%s: submit urb %d failed: %d\n",
                                        __func__, i, err);
                                clear_bit(i, &portdata->out_busy);
                                spin_lock_irqsave(&intfdata->susp_lock, flags);
                                intfdata->in_flight--;
                                spin_unlock_irqrestore(&intfdata->susp_lock,
                                                       flags);
                                usb_autopm_put_interface_async(port->serial->interface);
                                break;
                        }
                }

                portdata->tx_start_time[i] = jiffies;
                buf += todo;
                left -= todo;
        }

        count -= left;
        dev_dbg(&port->dev, "%s: wrote (did %d)\n", __func__, count);
        return count;
}
EXPORT_SYMBOL(usb_wwan_write);

static void usb_wwan_indat_callback(struct urb *urb)
{
        int err;
        int endpoint;
        struct usb_serial_port *port;
        struct device *dev;
        unsigned char *data = urb->transfer_buffer;
        int status = urb->status;

        endpoint = usb_pipeendpoint(urb->pipe);
        port = urb->context;
        dev = &port->dev;

        if (status) {
                dev_dbg(dev, "%s: nonzero status: %d on endpoint %02x.\n",
                        __func__, status, endpoint);

                /* don't resubmit on fatal errors */
                if (status == -ESHUTDOWN || status == -ENOENT)
                        return;
        } else {
                if (urb->actual_length) {
                        tty_insert_flip_string(&port->port, data,
                                        urb->actual_length);
                        tty_flip_buffer_push(&port->port);
                } else
                        dev_dbg(dev, "%s: empty read urb received\n", __func__);
        }
        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err) {
                if (err != -EPERM && err != -ENODEV) {
                        dev_err(dev, "%s: resubmit read urb failed. (%d)\n",
                                __func__, err);
                        /* busy also in error unless we are killed */
                        usb_mark_last_busy(port->serial->dev);
                }
        } else {
                usb_mark_last_busy(port->serial->dev);
        }
}

static void usb_wwan_outdat_callback(struct urb *urb)
{
        struct usb_serial_port *port;
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata;
        unsigned long flags;
        int i;

        port = urb->context;
        intfdata = usb_get_serial_data(port->serial);

        usb_serial_port_softint(port);
        usb_autopm_put_interface_async(port->serial->interface);
        portdata = usb_get_serial_port_data(port);
        spin_lock_irqsave(&intfdata->susp_lock, flags);
        intfdata->in_flight--;
        spin_unlock_irqrestore(&intfdata->susp_lock, flags);

        for (i = 0; i < N_OUT_URB; ++i) {
                if (portdata->out_urbs[i] == urb) {
                        smp_mb__before_atomic();
                        clear_bit(i, &portdata->out_busy);
                        break;
                }
        }
}

unsigned int usb_wwan_write_room(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_wwan_port_private *portdata;
        int i;
        unsigned int data_len = 0;
        struct urb *this_urb;

        portdata = usb_get_serial_port_data(port);

        for (i = 0; i < N_OUT_URB; i++) {
                this_urb = portdata->out_urbs[i];
                if (this_urb && !test_bit(i, &portdata->out_busy))
                        data_len += OUT_BUFLEN;
        }

        dev_dbg(&port->dev, "%s: %u\n", __func__, data_len);
        return data_len;
}
EXPORT_SYMBOL(usb_wwan_write_room);

unsigned int usb_wwan_chars_in_buffer(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_wwan_port_private *portdata;
        int i;
        unsigned int data_len = 0;
        struct urb *this_urb;

        portdata = usb_get_serial_port_data(port);

        for (i = 0; i < N_OUT_URB; i++) {
                this_urb = portdata->out_urbs[i];
                /* FIXME: This locking is insufficient as this_urb may
                   go unused during the test */
                if (this_urb && test_bit(i, &portdata->out_busy))
                        data_len += this_urb->transfer_buffer_length;
        }
        dev_dbg(&port->dev, "%s: %u\n", __func__, data_len);
        return data_len;
}
EXPORT_SYMBOL(usb_wwan_chars_in_buffer);

int usb_wwan_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata;
        struct usb_serial *serial = port->serial;
        int i, err;
        struct urb *urb;

        portdata = usb_get_serial_port_data(port);
        intfdata = usb_get_serial_data(serial);

        if (port->interrupt_in_urb) {
                err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
                if (err) {
                        dev_err(&port->dev, "%s: submit int urb failed: %d\n",
                                __func__, err);
                }
        }

        /* Start reading from the IN endpoint */
        for (i = 0; i < N_IN_URB; i++) {
                urb = portdata->in_urbs[i];
                if (!urb)
                        continue;
                err = usb_submit_urb(urb, GFP_KERNEL);
                if (err) {
                        dev_err(&port->dev,
                                "%s: submit read urb %d failed: %d\n",
                                __func__, i, err);
                }
        }

        spin_lock_irq(&intfdata->susp_lock);
        if (++intfdata->open_ports == 1)
                serial->interface->needs_remote_wakeup = 1;
        spin_unlock_irq(&intfdata->susp_lock);
        /* this balances a get in the generic USB serial code */
        usb_autopm_put_interface(serial->interface);

        return 0;
}
EXPORT_SYMBOL(usb_wwan_open);

static void unbusy_queued_urb(struct urb *urb,
                                        struct usb_wwan_port_private *portdata)
{
        int i;

        for (i = 0; i < N_OUT_URB; i++) {
                if (urb == portdata->out_urbs[i]) {
                        clear_bit(i, &portdata->out_busy);
                        break;
                }
        }
}

void usb_wwan_close(struct usb_serial_port *port)
{
        int i;
        struct usb_serial *serial = port->serial;
        struct usb_wwan_port_private *portdata;
        struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial);
        struct urb *urb;

        portdata = usb_get_serial_port_data(port);

        /*
         * Need to take susp_lock to make sure port is not already being
         * resumed, but no need to hold it due to the tty-port initialized
         * flag.
         */
        spin_lock_irq(&intfdata->susp_lock);
        if (--intfdata->open_ports == 0)
                serial->interface->needs_remote_wakeup = 0;
        spin_unlock_irq(&intfdata->susp_lock);

        for (;;) {
                urb = usb_get_from_anchor(&portdata->delayed);
                if (!urb)
                        break;
                unbusy_queued_urb(urb, portdata);
                usb_autopm_put_interface_async(serial->interface);
        }

        for (i = 0; i < N_IN_URB; i++)
                usb_kill_urb(portdata->in_urbs[i]);
        for (i = 0; i < N_OUT_URB; i++)
                usb_kill_urb(portdata->out_urbs[i]);
        usb_kill_urb(port->interrupt_in_urb);

        usb_autopm_get_interface_no_resume(serial->interface);
}
EXPORT_SYMBOL(usb_wwan_close);

static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port,
                                      int endpoint,
                                      int dir, void *ctx, char *buf, int len,
                                      void (*callback) (struct urb *))
{
        struct usb_serial *serial = port->serial;
        struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial);
        struct urb *urb;

        urb = usb_alloc_urb(0, GFP_KERNEL);        /* No ISO */
        if (!urb)
                return NULL;

        usb_fill_bulk_urb(urb, serial->dev,
                          usb_sndbulkpipe(serial->dev, endpoint) | dir,
                          buf, len, callback, ctx);

        if (intfdata->use_zlp && dir == USB_DIR_OUT)
                urb->transfer_flags |= URB_ZERO_PACKET;

        return urb;
}

int usb_wwan_port_probe(struct usb_serial_port *port)
{
        struct usb_wwan_port_private *portdata;
        struct urb *urb;
        u8 *buffer;
        int i;

        if (!port->bulk_in_size || !port->bulk_out_size)
                return -ENODEV;

        portdata = kzalloc(sizeof(*portdata), GFP_KERNEL);
        if (!portdata)
                return -ENOMEM;

        init_usb_anchor(&portdata->delayed);

        for (i = 0; i < N_IN_URB; i++) {
                buffer = (u8 *)__get_free_page(GFP_KERNEL);
                if (!buffer)
                        goto bail_out_error;
                portdata->in_buffer[i] = buffer;

                urb = usb_wwan_setup_urb(port, port->bulk_in_endpointAddress,
                                                USB_DIR_IN, port,
                                                buffer, IN_BUFLEN,
                                                usb_wwan_indat_callback);
                portdata->in_urbs[i] = urb;
        }

        for (i = 0; i < N_OUT_URB; i++) {
                buffer = kmalloc(OUT_BUFLEN, GFP_KERNEL);
                if (!buffer)
                        goto bail_out_error2;
                portdata->out_buffer[i] = buffer;

                urb = usb_wwan_setup_urb(port, port->bulk_out_endpointAddress,
                                                USB_DIR_OUT, port,
                                                buffer, OUT_BUFLEN,
                                                usb_wwan_outdat_callback);
                portdata->out_urbs[i] = urb;
        }

        usb_set_serial_port_data(port, portdata);

        return 0;

bail_out_error2:
        for (i = 0; i < N_OUT_URB; i++) {
                usb_free_urb(portdata->out_urbs[i]);
                kfree(portdata->out_buffer[i]);
        }
bail_out_error:
        for (i = 0; i < N_IN_URB; i++) {
                usb_free_urb(portdata->in_urbs[i]);
                free_page((unsigned long)portdata->in_buffer[i]);
        }
        kfree(portdata);

        return -ENOMEM;
}
EXPORT_SYMBOL_GPL(usb_wwan_port_probe);

void usb_wwan_port_remove(struct usb_serial_port *port)
{
        int i;
        struct usb_wwan_port_private *portdata;

        portdata = usb_get_serial_port_data(port);
        usb_set_serial_port_data(port, NULL);

        for (i = 0; i < N_IN_URB; i++) {
                usb_free_urb(portdata->in_urbs[i]);
                free_page((unsigned long)portdata->in_buffer[i]);
        }
        for (i = 0; i < N_OUT_URB; i++) {
                usb_free_urb(portdata->out_urbs[i]);
                kfree(portdata->out_buffer[i]);
        }

        kfree(portdata);
}
EXPORT_SYMBOL(usb_wwan_port_remove);

#ifdef CONFIG_PM
static void stop_urbs(struct usb_serial *serial)
{
        int i, j;
        struct usb_serial_port *port;
        struct usb_wwan_port_private *portdata;

        for (i = 0; i < serial->num_ports; ++i) {
                port = serial->port[i];
                portdata = usb_get_serial_port_data(port);
                if (!portdata)
                        continue;
                for (j = 0; j < N_IN_URB; j++)
                        usb_kill_urb(portdata->in_urbs[j]);
                for (j = 0; j < N_OUT_URB; j++)
                        usb_kill_urb(portdata->out_urbs[j]);
                usb_kill_urb(port->interrupt_in_urb);
        }
}

int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message)
{
        struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial);

        spin_lock_irq(&intfdata->susp_lock);
        if (PMSG_IS_AUTO(message)) {
                if (intfdata->in_flight) {
                        spin_unlock_irq(&intfdata->susp_lock);
                        return -EBUSY;
                }
        }
        intfdata->suspended = 1;
        spin_unlock_irq(&intfdata->susp_lock);

        stop_urbs(serial);

        return 0;
}
EXPORT_SYMBOL(usb_wwan_suspend);

/* Caller must hold susp_lock. */
static int usb_wwan_submit_delayed_urbs(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct usb_wwan_intf_private *data = usb_get_serial_data(serial);
        struct usb_wwan_port_private *portdata;
        struct urb *urb;
        int err_count = 0;
        int err;

        portdata = usb_get_serial_port_data(port);

        for (;;) {
                urb = usb_get_from_anchor(&portdata->delayed);
                if (!urb)
                        break;

                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err) {
                        dev_err(&port->dev, "%s: submit urb failed: %d\n",
                                        __func__, err);
                        err_count++;
                        unbusy_queued_urb(urb, portdata);
                        usb_autopm_put_interface_async(serial->interface);
                        continue;
                }
                data->in_flight++;
        }

        if (err_count)
                return -EIO;

        return 0;
}

int usb_wwan_resume(struct usb_serial *serial)
{
        int i, j;
        struct usb_serial_port *port;
        struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial);
        struct usb_wwan_port_private *portdata;
        struct urb *urb;
        int err;
        int err_count = 0;

        spin_lock_irq(&intfdata->susp_lock);
        for (i = 0; i < serial->num_ports; i++) {
                port = serial->port[i];

                if (!tty_port_initialized(&port->port))
                        continue;

                portdata = usb_get_serial_port_data(port);

                if (port->interrupt_in_urb) {
                        err = usb_submit_urb(port->interrupt_in_urb,
                                        GFP_ATOMIC);
                        if (err) {
                                dev_err(&port->dev,
                                        "%s: submit int urb failed: %d\n",
                                        __func__, err);
                                err_count++;
                        }
                }

                err = usb_wwan_submit_delayed_urbs(port);
                if (err)
                        err_count++;

                for (j = 0; j < N_IN_URB; j++) {
                        urb = portdata->in_urbs[j];
                        err = usb_submit_urb(urb, GFP_ATOMIC);
                        if (err < 0) {
                                dev_err(&port->dev,
                                        "%s: submit read urb %d failed: %d\n",
                                        __func__, i, err);
                                err_count++;
                        }
                }
        }
        intfdata->suspended = 0;
        spin_unlock_irq(&intfdata->susp_lock);

        if (err_count)
                return -EIO;

        return 0;
}
EXPORT_SYMBOL(usb_wwan_resume);
#endif

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL v2");







  511 




1
2
3
4
5
6
7
8
9
10
11
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/processor.h>

static inline int phys_addr_valid(resource_size_t addr)
{
#ifdef CONFIG_PHYS_ADDR_T_64BIT
        return !(addr >> boot_cpu_data.x86_phys_bits);
#else
        return 1;
#endif
}




























































































































































































































































































































































































































































    2 





































    2 
    2 
















    2 















    2 

















































































































   13 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_GENERIC_NETLINK_H
#define __NET_GENERIC_NETLINK_H

#include <linux/genetlink.h>
#include <net/netlink.h>
#include <net/net_namespace.h>

#define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN)

/* Binding to multicast group requires %CAP_NET_ADMIN */
#define GENL_MCAST_CAP_NET_ADMIN        BIT(0)
/* Binding to multicast group requires %CAP_SYS_ADMIN */
#define GENL_MCAST_CAP_SYS_ADMIN        BIT(1)

/**
 * struct genl_multicast_group - generic netlink multicast group
 * @name: name of the multicast group, names are per-family
 * @flags: GENL_MCAST_* flags
 */
struct genl_multicast_group {
        char                        name[GENL_NAMSIZ];
        u8                        flags;
};

struct genl_split_ops;
struct genl_info;

/**
 * struct genl_family - generic netlink family
 * @hdrsize: length of user specific header in bytes
 * @name: name of family
 * @version: protocol version
 * @maxattr: maximum number of attributes supported
 * @policy: netlink policy
 * @netnsok: set to true if the family can handle network
 *        namespaces and should be presented in all of them
 * @parallel_ops: operations can be called in parallel and aren't
 *        synchronized by the core genetlink code
 * @pre_doit: called before an operation's doit callback, it may
 *        do additional, common, filtering and return an error
 * @post_doit: called after an operation's doit callback, it may
 *        undo operations done by pre_doit, for example release locks
 * @bind: called when family multicast group is added to a netlink socket
 * @unbind: called when family multicast group is removed from a netlink socket
 * @module: pointer to the owning module (set to THIS_MODULE)
 * @mcgrps: multicast groups used by this family
 * @n_mcgrps: number of multicast groups
 * @resv_start_op: first operation for which reserved fields of the header
 *        can be validated and policies are required (see below);
 *        new families should leave this field at zero
 * @ops: the operations supported by this family
 * @n_ops: number of operations supported by this family
 * @small_ops: the small-struct operations supported by this family
 * @n_small_ops: number of small-struct operations supported by this family
 * @split_ops: the split do/dump form of operation definition
 * @n_split_ops: number of entries in @split_ops, not that with split do/dump
 *        ops the number of entries is not the same as number of commands
 * @sock_priv_size: the size of per-socket private memory
 * @sock_priv_init: the per-socket private memory initializer
 * @sock_priv_destroy: the per-socket private memory destructor
 *
 * Attribute policies (the combination of @policy and @maxattr fields)
 * can be attached at the family level or at the operation level.
 * If both are present the per-operation policy takes precedence.
 * For operations before @resv_start_op lack of policy means that the core
 * will perform no attribute parsing or validation. For newer operations
 * if policy is not provided core will reject all TLV attributes.
 */
struct genl_family {
        unsigned int                hdrsize;
        char                        name[GENL_NAMSIZ];
        unsigned int                version;
        unsigned int                maxattr;
        u8                        netnsok:1;
        u8                        parallel_ops:1;
        u8                        n_ops;
        u8                        n_small_ops;
        u8                        n_split_ops;
        u8                        n_mcgrps;
        u8                        resv_start_op;
        const struct nla_policy *policy;
        int                        (*pre_doit)(const struct genl_split_ops *ops,
                                            struct sk_buff *skb,
                                            struct genl_info *info);
        void                        (*post_doit)(const struct genl_split_ops *ops,
                                             struct sk_buff *skb,
                                             struct genl_info *info);
        int                        (*bind)(int mcgrp);
        void                        (*unbind)(int mcgrp);
        const struct genl_ops *        ops;
        const struct genl_small_ops *small_ops;
        const struct genl_split_ops *split_ops;
        const struct genl_multicast_group *mcgrps;
        struct module                *module;

        size_t                        sock_priv_size;
        void                        (*sock_priv_init)(void *priv);
        void                        (*sock_priv_destroy)(void *priv);

/* private: internal use only */
        /* protocol family identifier */
        int                        id;
        /* starting number of multicast group IDs in this family */
        unsigned int                mcgrp_offset;
        /* list of per-socket privs */
        struct xarray                *sock_privs;
};

/**
 * struct genl_info - receiving information
 * @snd_seq: sending sequence number
 * @snd_portid: netlink portid of sender
 * @family: generic netlink family
 * @nlhdr: netlink message header
 * @genlhdr: generic netlink message header
 * @attrs: netlink attributes
 * @_net: network namespace
 * @user_ptr: user pointers
 * @extack: extended ACK report struct
 */
struct genl_info {
        u32                        snd_seq;
        u32                        snd_portid;
        const struct genl_family *family;
        const struct nlmsghdr *        nlhdr;
        struct genlmsghdr *        genlhdr;
        struct nlattr **        attrs;
        possible_net_t                _net;
        void *                        user_ptr[2];
        struct netlink_ext_ack *extack;
};

static inline struct net *genl_info_net(const struct genl_info *info)
{
        return read_pnet(&info->_net);
}

static inline void genl_info_net_set(struct genl_info *info, struct net *net)
{
        write_pnet(&info->_net, net);
}

static inline void *genl_info_userhdr(const struct genl_info *info)
{
        return (u8 *)info->genlhdr + GENL_HDRLEN;
}

#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)

#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \
        NL_SET_ERR_MSG_FMT((info)->extack, msg, ##args)

/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({                                \
        const struct genl_info *__info = (info);                        \
                                                                        \
        NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
})

enum genl_validate_flags {
        GENL_DONT_VALIDATE_STRICT                = BIT(0),
        GENL_DONT_VALIDATE_DUMP                        = BIT(1),
        GENL_DONT_VALIDATE_DUMP_STRICT                = BIT(2),
};

/**
 * struct genl_small_ops - generic netlink operations (small version)
 * @cmd: command identifier
 * @internal_flags: flags used by the family
 * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
 * @validate: validation flags from enum genl_validate_flags
 * @doit: standard command callback
 * @dumpit: callback for dumpers
 *
 * This is a cut-down version of struct genl_ops for users who don't need
 * most of the ancillary infra and want to save space.
 */
struct genl_small_ops {
        int        (*doit)(struct sk_buff *skb, struct genl_info *info);
        int        (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb);
        u8        cmd;
        u8        internal_flags;
        u8        flags;
        u8        validate;
};

/**
 * struct genl_ops - generic netlink operations
 * @cmd: command identifier
 * @internal_flags: flags used by the family
 * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
 * @maxattr: maximum number of attributes supported
 * @policy: netlink policy (takes precedence over family policy)
 * @validate: validation flags from enum genl_validate_flags
 * @doit: standard command callback
 * @start: start callback for dumps
 * @dumpit: callback for dumpers
 * @done: completion callback for dumps
 */
struct genl_ops {
        int                       (*doit)(struct sk_buff *skb,
                                       struct genl_info *info);
        int                       (*start)(struct netlink_callback *cb);
        int                       (*dumpit)(struct sk_buff *skb,
                                         struct netlink_callback *cb);
        int                       (*done)(struct netlink_callback *cb);
        const struct nla_policy *policy;
        unsigned int                maxattr;
        u8                        cmd;
        u8                        internal_flags;
        u8                        flags;
        u8                        validate;
};

/**
 * struct genl_split_ops - generic netlink operations (do/dump split version)
 * @cmd: command identifier
 * @internal_flags: flags used by the family
 * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
 * @validate: validation flags from enum genl_validate_flags
 * @policy: netlink policy (takes precedence over family policy)
 * @maxattr: maximum number of attributes supported
 *
 * Do callbacks:
 * @pre_doit: called before an operation's @doit callback, it may
 *        do additional, common, filtering and return an error
 * @doit: standard command callback
 * @post_doit: called after an operation's @doit callback, it may
 *        undo operations done by pre_doit, for example release locks
 *
 * Dump callbacks:
 * @start: start callback for dumps
 * @dumpit: callback for dumpers
 * @done: completion callback for dumps
 *
 * Do callbacks can be used if %GENL_CMD_CAP_DO is set in @flags.
 * Dump callbacks can be used if %GENL_CMD_CAP_DUMP is set in @flags.
 * Exactly one of those flags must be set.
 */
struct genl_split_ops {
        union {
                struct {
                        int (*pre_doit)(const struct genl_split_ops *ops,
                                        struct sk_buff *skb,
                                        struct genl_info *info);
                        int (*doit)(struct sk_buff *skb,
                                    struct genl_info *info);
                        void (*post_doit)(const struct genl_split_ops *ops,
                                          struct sk_buff *skb,
                                          struct genl_info *info);
                };
                struct {
                        int (*start)(struct netlink_callback *cb);
                        int (*dumpit)(struct sk_buff *skb,
                                      struct netlink_callback *cb);
                        int (*done)(struct netlink_callback *cb);
                };
        };
        const struct nla_policy *policy;
        unsigned int                maxattr;
        u8                        cmd;
        u8                        internal_flags;
        u8                        flags;
        u8                        validate;
};

/**
 * struct genl_dumpit_info - info that is available during dumpit op call
 * @op: generic netlink ops - for internal genl code usage
 * @attrs: netlink attributes
 * @info: struct genl_info describing the request
 */
struct genl_dumpit_info {
        struct genl_split_ops op;
        struct genl_info info;
};

static inline const struct genl_dumpit_info *
genl_dumpit_info(struct netlink_callback *cb)
{
        return cb->data;
}

static inline const struct genl_info *
genl_info_dump(struct netlink_callback *cb)
{
        return &genl_dumpit_info(cb)->info;
}

/**
 * genl_info_init_ntf() - initialize genl_info for notifications
 * @info:   genl_info struct to set up
 * @family: pointer to the genetlink family
 * @cmd:    command to be used in the notification
 *
 * Initialize a locally declared struct genl_info to pass to various APIs.
 * Intended to be used when creating notifications.
 */
static inline void
genl_info_init_ntf(struct genl_info *info, const struct genl_family *family,
                   u8 cmd)
{
        struct genlmsghdr *hdr = (void *) &info->user_ptr[0];

        memset(info, 0, sizeof(*info));
        info->family = family;
        info->genlhdr = hdr;
        hdr->cmd = cmd;
}

static inline bool genl_info_is_ntf(const struct genl_info *info)
{
        return !info->nlhdr;
}

void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk);
void *genl_sk_priv_get(struct genl_family *family, struct sock *sk);
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
                 struct genl_info *info, u32 group, gfp_t flags);

void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
                  const struct genl_family *family, int flags, u8 cmd);

static inline void *
__genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
{
        return genlmsg_put(skb, info->snd_portid, info->snd_seq, info->family,
                           flags, info->genlhdr->cmd);
}

/**
 * genlmsg_iput - start genetlink message based on genl_info
 * @skb: skb in which message header will be placed
 * @info: genl_info as provided to do/dump handlers
 *
 * Convenience wrapper which starts a genetlink message based on
 * information in user request. @info should be either the struct passed
 * by genetlink core to do/dump handlers (when constructing replies to
 * such requests) or a struct initialized by genl_info_init_ntf()
 * when constructing notifications.
 *
 * Returns pointer to new genetlink header.
 */
static inline void *
genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
{
        return __genlmsg_iput(skb, info, 0);
}

/**
 * genlmsg_nlhdr - Obtain netlink header from user specified header
 * @user_hdr: user header as returned from genlmsg_put()
 *
 * Returns pointer to netlink header.
 */
static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
{
        return (struct nlmsghdr *)((char *)user_hdr -
                                   GENL_HDRLEN -
                                   NLMSG_HDRLEN);
}

/**
 * genlmsg_parse_deprecated - parse attributes of a genetlink message
 * @nlh: netlink message header
 * @family: genetlink message family
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 */
static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh,
                                           const struct genl_family *family,
                                           struct nlattr *tb[], int maxtype,
                                           const struct nla_policy *policy,
                                           struct netlink_ext_ack *extack)
{
        return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
                             policy, NL_VALIDATE_LIBERAL, extack);
}

/**
 * genlmsg_parse - parse attributes of a genetlink message
 * @nlh: netlink message header
 * @family: genetlink message family
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @extack: extended ACK report struct
 */
static inline int genlmsg_parse(const struct nlmsghdr *nlh,
                                const struct genl_family *family,
                                struct nlattr *tb[], int maxtype,
                                const struct nla_policy *policy,
                                struct netlink_ext_ack *extack)
{
        return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
                             policy, NL_VALIDATE_STRICT, extack);
}

/**
 * genl_dump_check_consistent - check if sequence is consistent and advertise if not
 * @cb: netlink callback structure that stores the sequence number
 * @user_hdr: user header as returned from genlmsg_put()
 *
 * Cf. nl_dump_check_consistent(), this just provides a wrapper to make it
 * simpler to use with generic netlink.
 */
static inline void genl_dump_check_consistent(struct netlink_callback *cb,
                                              void *user_hdr)
{
        nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr));
}

/**
 * genlmsg_put_reply - Add generic netlink header to a reply message
 * @skb: socket buffer holding the message
 * @info: receiver info
 * @family: generic netlink family
 * @flags: netlink message flags
 * @cmd: generic netlink command
 *
 * Returns pointer to user specific header
 */
static inline void *genlmsg_put_reply(struct sk_buff *skb,
                                      struct genl_info *info,
                                      const struct genl_family *family,
                                      int flags, u8 cmd)
{
        return genlmsg_put(skb, info->snd_portid, info->snd_seq, family,
                           flags, cmd);
}

/**
 * genlmsg_end - Finalize a generic netlink message
 * @skb: socket buffer the message is stored in
 * @hdr: user specific header
 */
static inline void genlmsg_end(struct sk_buff *skb, void *hdr)
{
        nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
}

/**
 * genlmsg_cancel - Cancel construction of a generic netlink message
 * @skb: socket buffer the message is stored in
 * @hdr: generic netlink message header
 */
static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
{
        if (hdr)
                nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
}

/**
 * genlmsg_multicast_netns_filtered - multicast a netlink message
 *                                      to a specific netns with filter
 *                                      function
 * @family: the generic netlink family
 * @net: the net namespace
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: offset of multicast group in groups array
 * @flags: allocation flags
 * @filter: filter function
 * @filter_data: filter function private data
 *
 * Return: 0 on success, negative error code for failure.
 */
static inline int
genlmsg_multicast_netns_filtered(const struct genl_family *family,
                                 struct net *net, struct sk_buff *skb,
                                 u32 portid, unsigned int group, gfp_t flags,
                                 netlink_filter_fn filter,
                                 void *filter_data)
{
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
        group = family->mcgrp_offset + group;
        return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group,
                                        flags, filter, filter_data);
}

/**
 * genlmsg_multicast_netns - multicast a netlink message to a specific netns
 * @family: the generic netlink family
 * @net: the net namespace
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: offset of multicast group in groups array
 * @flags: allocation flags
 */
static inline int genlmsg_multicast_netns(const struct genl_family *family,
                                          struct net *net, struct sk_buff *skb,
                                          u32 portid, unsigned int group, gfp_t flags)
{
        return genlmsg_multicast_netns_filtered(family, net, skb, portid,
                                                group, flags, NULL, NULL);
}

/**
 * genlmsg_multicast - multicast a netlink message to the default netns
 * @family: the generic netlink family
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: offset of multicast group in groups array
 * @flags: allocation flags
 */
static inline int genlmsg_multicast(const struct genl_family *family,
                                    struct sk_buff *skb, u32 portid,
                                    unsigned int group, gfp_t flags)
{
        return genlmsg_multicast_netns(family, &init_net, skb,
                                       portid, group, flags);
}

/**
 * genlmsg_multicast_allns - multicast a netlink message to all net namespaces
 * @family: the generic netlink family
 * @skb: netlink message as socket buffer
 * @portid: own netlink portid to avoid sending to yourself
 * @group: offset of multicast group in groups array
 * @flags: allocation flags
 *
 * This function must hold the RTNL or rcu_read_lock().
 */
int genlmsg_multicast_allns(const struct genl_family *family,
                            struct sk_buff *skb, u32 portid,
                            unsigned int group, gfp_t flags);

/**
 * genlmsg_unicast - unicast a netlink message
 * @net: network namespace to look up @portid in
 * @skb: netlink message as socket buffer
 * @portid: netlink portid of the destination socket
 */
static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid)
{
        return nlmsg_unicast(net->genl_sock, skb, portid);
}

/**
 * genlmsg_reply - reply to a request
 * @skb: netlink message to be sent back
 * @info: receiver information
 */
static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
{
        return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid);
}

/**
 * genlmsg_data - head of message payload
 * @gnlh: genetlink message header
 */
static inline void *genlmsg_data(const struct genlmsghdr *gnlh)
{
        return ((unsigned char *) gnlh + GENL_HDRLEN);
}

/**
 * genlmsg_len - length of message payload
 * @gnlh: genetlink message header
 */
static inline int genlmsg_len(const struct genlmsghdr *gnlh)
{
        struct nlmsghdr *nlh = (struct nlmsghdr *)((unsigned char *)gnlh -
                                                        NLMSG_HDRLEN);
        return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN);
}

/**
 * genlmsg_msg_size - length of genetlink message not including padding
 * @payload: length of message payload
 */
static inline int genlmsg_msg_size(int payload)
{
        return GENL_HDRLEN + payload;
}

/**
 * genlmsg_total_size - length of genetlink message including padding
 * @payload: length of message payload
 */
static inline int genlmsg_total_size(int payload)
{
        return NLMSG_ALIGN(genlmsg_msg_size(payload));
}

/**
 * genlmsg_new - Allocate a new generic netlink message
 * @payload: size of the message payload
 * @flags: the type of memory to allocate.
 */
static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
{
        return nlmsg_new(genlmsg_total_size(payload), flags);
}

/**
 * genl_set_err - report error to genetlink broadcast listeners
 * @family: the generic netlink family
 * @net: the network namespace to report the error to
 * @portid: the PORTID of a process that we want to skip (if any)
 * @group: the broadcast group that will notice the error
 *         (this is the offset of the multicast group in the groups array)
 * @code: error code, must be negative (as usual in kernelspace)
 *
 * This function returns the number of broadcast listeners that have set the
 * NETLINK_RECV_NO_ENOBUFS socket option.
 */
static inline int genl_set_err(const struct genl_family *family,
                               struct net *net, u32 portid,
                               u32 group, int code)
{
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
        group = family->mcgrp_offset + group;
        return netlink_set_err(net->genl_sock, portid, group, code);
}

static inline int genl_has_listeners(const struct genl_family *family,
                                     struct net *net, unsigned int group)
{
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
        group = family->mcgrp_offset + group;
        return netlink_has_listeners(net->genl_sock, group);
}
#endif        /* __NET_GENERIC_NETLINK_H */



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  324 
  325 
  324 








































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
/*
 *  kernel/cpuset.c
 *
 *  Processor and Memory placement constraints for sets of tasks.
 *
 *  Copyright (C) 2003 BULL SA.
 *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
 *  Copyright (C) 2006 Google, Inc
 *
 *  Portions derived from Patrick Mochel's sysfs code.
 *  sysfs is Copyright (c) 2001-3 Patrick Mochel
 *
 *  2003-10-10 Written by Simon Derr.
 *  2003-10-22 Updates by Stephen Hemminger.
 *  2004 May-July Rework by Paul Jackson.
 *  2006 Rework by Paul Menage to use generic cgroups
 *  2008 Rework of the scheduler domains and CPU hotplug handling
 *       by Max Krasnyansky
 *
 *  This file is subject to the terms and conditions of the GNU General Public
 *  License.  See the file COPYING in the main directory of the Linux
 *  distribution for more details.
 */

#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/export.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/deadline.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/oom.h>
#include <linux/sched/isolation.h>
#include <linux/cgroup.h>
#include <linux/wait.h>
#include <linux/workqueue.h>

DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);

/*
 * There could be abnormal cpuset configurations for cpu or memory
 * node binding, add this key to provide a quick low-cost judgment
 * of the situation.
 */
DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key);

/* See "Frequency meter" comments, below. */

struct fmeter {
        int cnt;                /* unprocessed events count */
        int val;                /* most recent output value */
        time64_t time;                /* clock (secs) when val computed */
        spinlock_t lock;        /* guards read or write of above */
};

/*
 * Invalid partition error code
 */
enum prs_errcode {
        PERR_NONE = 0,
        PERR_INVCPUS,
        PERR_INVPARENT,
        PERR_NOTPART,
        PERR_NOTEXCL,
        PERR_NOCPUS,
        PERR_HOTPLUG,
        PERR_CPUSEMPTY,
        PERR_HKEEPING,
};

static const char * const perr_strings[] = {
        [PERR_INVCPUS]   = "Invalid cpu list in cpuset.cpus.exclusive",
        [PERR_INVPARENT] = "Parent is an invalid partition root",
        [PERR_NOTPART]   = "Parent is not a partition root",
        [PERR_NOTEXCL]   = "Cpu list in cpuset.cpus not exclusive",
        [PERR_NOCPUS]    = "Parent unable to distribute cpu downstream",
        [PERR_HOTPLUG]   = "No cpu available due to hotplug",
        [PERR_CPUSEMPTY] = "cpuset.cpus is empty",
        [PERR_HKEEPING]  = "partition config conflicts with housekeeping setup",
};

struct cpuset {
        struct cgroup_subsys_state css;

        unsigned long flags;                /* "unsigned long" so bitops work */

        /*
         * On default hierarchy:
         *
         * The user-configured masks can only be changed by writing to
         * cpuset.cpus and cpuset.mems, and won't be limited by the
         * parent masks.
         *
         * The effective masks is the real masks that apply to the tasks
         * in the cpuset. They may be changed if the configured masks are
         * changed or hotplug happens.
         *
         * effective_mask == configured_mask & parent's effective_mask,
         * and if it ends up empty, it will inherit the parent's mask.
         *
         *
         * On legacy hierarchy:
         *
         * The user-configured masks are always the same with effective masks.
         */

        /* user-configured CPUs and Memory Nodes allow to tasks */
        cpumask_var_t cpus_allowed;
        nodemask_t mems_allowed;

        /* effective CPUs and Memory Nodes allow to tasks */
        cpumask_var_t effective_cpus;
        nodemask_t effective_mems;

        /*
         * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
         *
         * This exclusive CPUs must be a subset of cpus_allowed. A parent
         * cgroup can only grant exclusive CPUs to one of its children.
         *
         * When the cgroup becomes a valid partition root, effective_xcpus
         * defaults to cpus_allowed if not set. The effective_cpus of a valid
         * partition root comes solely from its effective_xcpus and some of the
         * effective_xcpus may be distributed to sub-partitions below & hence
         * excluded from its effective_cpus.
         */
        cpumask_var_t effective_xcpus;

        /*
         * Exclusive CPUs as requested by the user (default hierarchy only)
         */
        cpumask_var_t exclusive_cpus;

        /*
         * This is old Memory Nodes tasks took on.
         *
         * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
         * - A new cpuset's old_mems_allowed is initialized when some
         *   task is moved into it.
         * - old_mems_allowed is used in cpuset_migrate_mm() when we change
         *   cpuset.mems_allowed and have tasks' nodemask updated, and
         *   then old_mems_allowed is updated to mems_allowed.
         */
        nodemask_t old_mems_allowed;

        struct fmeter fmeter;                /* memory_pressure filter */

        /*
         * Tasks are being attached to this cpuset.  Used to prevent
         * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
         */
        int attach_in_progress;

        /* partition number for rebuild_sched_domains() */
        int pn;

        /* for custom sched domain */
        int relax_domain_level;

        /* number of valid sub-partitions */
        int nr_subparts;

        /* partition root state */
        int partition_root_state;

        /*
         * Default hierarchy only:
         * use_parent_ecpus - set if using parent's effective_cpus
         * child_ecpus_count - # of children with use_parent_ecpus set
         */
        int use_parent_ecpus;
        int child_ecpus_count;

        /*
         * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
         * know when to rebuild associated root domain bandwidth information.
         */
        int nr_deadline_tasks;
        int nr_migrate_dl_tasks;
        u64 sum_migrate_dl_bw;

        /* Invalid partition error code, not lock protected */
        enum prs_errcode prs_err;

        /* Handle for cpuset.cpus.partition */
        struct cgroup_file partition_file;

        /* Remote partition silbling list anchored at remote_children */
        struct list_head remote_sibling;
};

/*
 * Exclusive CPUs distributed out to sub-partitions of top_cpuset
 */
static cpumask_var_t        subpartitions_cpus;

/*
 * Exclusive CPUs in isolated partitions
 */
static cpumask_var_t        isolated_cpus;

/* List of remote partition root children */
static struct list_head remote_children;

/*
 * Partition root states:
 *
 *   0 - member (not a partition root)
 *   1 - partition root
 *   2 - partition root without load balancing (isolated)
 *  -1 - invalid partition root
 *  -2 - invalid isolated partition root
 */
#define PRS_MEMBER                0
#define PRS_ROOT                1
#define PRS_ISOLATED                2
#define PRS_INVALID_ROOT        -1
#define PRS_INVALID_ISOLATED        -2

static inline bool is_prs_invalid(int prs_state)
{
        return prs_state < 0;
}

/*
 * Temporary cpumasks for working with partitions that are passed among
 * functions to avoid memory allocation in inner functions.
 */
struct tmpmasks {
        cpumask_var_t addmask, delmask;        /* For partition root */
        cpumask_var_t new_cpus;                /* For update_cpumasks_hier() */
};

static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
{
        return css ? container_of(css, struct cpuset, css) : NULL;
}

/* Retrieve the cpuset for a task */
static inline struct cpuset *task_cs(struct task_struct *task)
{
        return css_cs(task_css(task, cpuset_cgrp_id));
}

static inline struct cpuset *parent_cs(struct cpuset *cs)
{
        return css_cs(cs->css.parent);
}

void inc_dl_tasks_cs(struct task_struct *p)
{
        struct cpuset *cs = task_cs(p);

        cs->nr_deadline_tasks++;
}

void dec_dl_tasks_cs(struct task_struct *p)
{
        struct cpuset *cs = task_cs(p);

        cs->nr_deadline_tasks--;
}

/* bits in struct cpuset flags field */
typedef enum {
        CS_ONLINE,
        CS_CPU_EXCLUSIVE,
        CS_MEM_EXCLUSIVE,
        CS_MEM_HARDWALL,
        CS_MEMORY_MIGRATE,
        CS_SCHED_LOAD_BALANCE,
        CS_SPREAD_PAGE,
        CS_SPREAD_SLAB,
} cpuset_flagbits_t;

/* convenient tests for these bits */
static inline bool is_cpuset_online(struct cpuset *cs)
{
        return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
}

static inline int is_cpu_exclusive(const struct cpuset *cs)
{
        return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
}

static inline int is_mem_exclusive(const struct cpuset *cs)
{
        return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
}

static inline int is_mem_hardwall(const struct cpuset *cs)
{
        return test_bit(CS_MEM_HARDWALL, &cs->flags);
}

static inline int is_sched_load_balance(const struct cpuset *cs)
{
        return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
}

static inline int is_memory_migrate(const struct cpuset *cs)
{
        return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
}

static inline int is_spread_page(const struct cpuset *cs)
{
        return test_bit(CS_SPREAD_PAGE, &cs->flags);
}

static inline int is_spread_slab(const struct cpuset *cs)
{
        return test_bit(CS_SPREAD_SLAB, &cs->flags);
}

static inline int is_partition_valid(const struct cpuset *cs)
{
        return cs->partition_root_state > 0;
}

static inline int is_partition_invalid(const struct cpuset *cs)
{
        return cs->partition_root_state < 0;
}

/*
 * Callers should hold callback_lock to modify partition_root_state.
 */
static inline void make_partition_invalid(struct cpuset *cs)
{
        if (cs->partition_root_state > 0)
                cs->partition_root_state = -cs->partition_root_state;
}

/*
 * Send notification event of whenever partition_root_state changes.
 */
static inline void notify_partition_change(struct cpuset *cs, int old_prs)
{
        if (old_prs == cs->partition_root_state)
                return;
        cgroup_file_notify(&cs->partition_file);

        /* Reset prs_err if not invalid */
        if (is_partition_valid(cs))
                WRITE_ONCE(cs->prs_err, PERR_NONE);
}

static struct cpuset top_cpuset = {
        .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
                  (1 << CS_MEM_EXCLUSIVE)),
        .partition_root_state = PRS_ROOT,
        .remote_sibling = LIST_HEAD_INIT(top_cpuset.remote_sibling),
};

/**
 * cpuset_for_each_child - traverse online children of a cpuset
 * @child_cs: loop cursor pointing to the current child
 * @pos_css: used for iteration
 * @parent_cs: target cpuset to walk children of
 *
 * Walk @child_cs through the online children of @parent_cs.  Must be used
 * with RCU read locked.
 */
#define cpuset_for_each_child(child_cs, pos_css, parent_cs)                \
        css_for_each_child((pos_css), &(parent_cs)->css)                \
                if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))

/**
 * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
 * @des_cs: loop cursor pointing to the current descendant
 * @pos_css: used for iteration
 * @root_cs: target cpuset to walk ancestor of
 *
 * Walk @des_cs through the online descendants of @root_cs.  Must be used
 * with RCU read locked.  The caller may modify @pos_css by calling
 * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
 * iteration and the first node to be visited.
 */
#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)        \
        css_for_each_descendant_pre((pos_css), &(root_cs)->css)                \
                if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))

/*
 * There are two global locks guarding cpuset structures - cpuset_mutex and
 * callback_lock. We also require taking task_lock() when dereferencing a
 * task's cpuset pointer. See "The task_lock() exception", at the end of this
 * comment.  The cpuset code uses only cpuset_mutex. Other kernel subsystems
 * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
 * structures. Note that cpuset_mutex needs to be a mutex as it is used in
 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
 * correctness.
 *
 * A task must hold both locks to modify cpusets.  If a task holds
 * cpuset_mutex, it blocks others, ensuring that it is the only task able to
 * also acquire callback_lock and be able to modify cpusets.  It can perform
 * various checks on the cpuset structure first, knowing nothing will change.
 * It can also allocate memory while just holding cpuset_mutex.  While it is
 * performing these checks, various callback routines can briefly acquire
 * callback_lock to query cpusets.  Once it is ready to make the changes, it
 * takes callback_lock, blocking everyone else.
 *
 * Calls to the kernel memory allocator can not be made while holding
 * callback_lock, as that would risk double tripping on callback_lock
 * from one of the callbacks into the cpuset code from within
 * __alloc_pages().
 *
 * If a task is only holding callback_lock, then it has read-only
 * access to cpusets.
 *
 * Now, the task_struct fields mems_allowed and mempolicy may be changed
 * by other task, we use alloc_lock in the task_struct fields to protect
 * them.
 *
 * The cpuset_common_file_read() handlers only hold callback_lock across
 * small pieces of code, such as when reading out possibly multi-word
 * cpumasks and nodemasks.
 *
 * Accessing a task's cpuset should be done in accordance with the
 * guidelines for accessing subsystem state in kernel/cgroup.c
 */

static DEFINE_MUTEX(cpuset_mutex);

void cpuset_lock(void)
{
        mutex_lock(&cpuset_mutex);
}

void cpuset_unlock(void)
{
        mutex_unlock(&cpuset_mutex);
}

static DEFINE_SPINLOCK(callback_lock);

static struct workqueue_struct *cpuset_migrate_mm_wq;

/*
 * CPU / memory hotplug is handled asynchronously.
 */
static void cpuset_hotplug_workfn(struct work_struct *work);
static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);

static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);

static inline void check_insane_mems_config(nodemask_t *nodes)
{
        if (!cpusets_insane_config() &&
                movable_only_nodes(nodes)) {
                static_branch_enable(&cpusets_insane_config_key);
                pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n"
                        "Cpuset allocations might fail even with a lot of memory available.\n",
                        nodemask_pr_args(nodes));
        }
}

/*
 * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
 * on default hierarchy or when the cpuset_v2_mode flag is set by mounting
 * the v1 cpuset cgroup filesystem with the "cpuset_v2_mode" mount option.
 * With v2 behavior, "cpus" and "mems" are always what the users have
 * requested and won't be changed by hotplug events. Only the effective
 * cpus or mems will be affected.
 */
static inline bool is_in_v2_mode(void)
{
        return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
              (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
}

/**
 * partition_is_populated - check if partition has tasks
 * @cs: partition root to be checked
 * @excluded_child: a child cpuset to be excluded in task checking
 * Return: true if there are tasks, false otherwise
 *
 * It is assumed that @cs is a valid partition root. @excluded_child should
 * be non-NULL when this cpuset is going to become a partition itself.
 */
static inline bool partition_is_populated(struct cpuset *cs,
                                          struct cpuset *excluded_child)
{
        struct cgroup_subsys_state *css;
        struct cpuset *child;

        if (cs->css.cgroup->nr_populated_csets)
                return true;
        if (!excluded_child && !cs->nr_subparts)
                return cgroup_is_populated(cs->css.cgroup);

        rcu_read_lock();
        cpuset_for_each_child(child, css, cs) {
                if (child == excluded_child)
                        continue;
                if (is_partition_valid(child))
                        continue;
                if (cgroup_is_populated(child->css.cgroup)) {
                        rcu_read_unlock();
                        return true;
                }
        }
        rcu_read_unlock();
        return false;
}

/*
 * Return in pmask the portion of a task's cpusets's cpus_allowed that
 * are online and are capable of running the task.  If none are found,
 * walk up the cpuset hierarchy until we find one that does have some
 * appropriate cpus.
 *
 * One way or another, we guarantee to return some non-empty subset
 * of cpu_online_mask.
 *
 * Call with callback_lock or cpuset_mutex held.
 */
static void guarantee_online_cpus(struct task_struct *tsk,
                                  struct cpumask *pmask)
{
        const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
        struct cpuset *cs;

        if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
                cpumask_copy(pmask, cpu_online_mask);

        rcu_read_lock();
        cs = task_cs(tsk);

        while (!cpumask_intersects(cs->effective_cpus, pmask)) {
                cs = parent_cs(cs);
                if (unlikely(!cs)) {
                        /*
                         * The top cpuset doesn't have any online cpu as a
                         * consequence of a race between cpuset_hotplug_work
                         * and cpu hotplug notifier.  But we know the top
                         * cpuset's effective_cpus is on its way to be
                         * identical to cpu_online_mask.
                         */
                        goto out_unlock;
                }
        }
        cpumask_and(pmask, pmask, cs->effective_cpus);

out_unlock:
        rcu_read_unlock();
}

/*
 * Return in *pmask the portion of a cpusets's mems_allowed that
 * are online, with memory.  If none are online with memory, walk
 * up the cpuset hierarchy until we find one that does have some
 * online mems.  The top cpuset always has some mems online.
 *
 * One way or another, we guarantee to return some non-empty subset
 * of node_states[N_MEMORY].
 *
 * Call with callback_lock or cpuset_mutex held.
 */
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
{
        while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
                cs = parent_cs(cs);
        nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
}

/*
 * update task's spread flag if cpuset's page/slab spread flag is set
 *
 * Call with callback_lock or cpuset_mutex held. The check can be skipped
 * if on default hierarchy.
 */
static void cpuset_update_task_spread_flags(struct cpuset *cs,
                                        struct task_struct *tsk)
{
        if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
                return;

        if (is_spread_page(cs))
                task_set_spread_page(tsk);
        else
                task_clear_spread_page(tsk);

        if (is_spread_slab(cs))
                task_set_spread_slab(tsk);
        else
                task_clear_spread_slab(tsk);
}

/*
 * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
 *
 * One cpuset is a subset of another if all its allowed CPUs and
 * Memory Nodes are a subset of the other, and its exclusive flags
 * are only set if the other's are set.  Call holding cpuset_mutex.
 */

static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
{
        return        cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
}

/**
 * alloc_cpumasks - allocate three cpumasks for cpuset
 * @cs:  the cpuset that have cpumasks to be allocated.
 * @tmp: the tmpmasks structure pointer
 * Return: 0 if successful, -ENOMEM otherwise.
 *
 * Only one of the two input arguments should be non-NULL.
 */
static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
        cpumask_var_t *pmask1, *pmask2, *pmask3, *pmask4;

        if (cs) {
                pmask1 = &cs->cpus_allowed;
                pmask2 = &cs->effective_cpus;
                pmask3 = &cs->effective_xcpus;
                pmask4 = &cs->exclusive_cpus;
        } else {
                pmask1 = &tmp->new_cpus;
                pmask2 = &tmp->addmask;
                pmask3 = &tmp->delmask;
                pmask4 = NULL;
        }

        if (!zalloc_cpumask_var(pmask1, GFP_KERNEL))
                return -ENOMEM;

        if (!zalloc_cpumask_var(pmask2, GFP_KERNEL))
                goto free_one;

        if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
                goto free_two;

        if (pmask4 && !zalloc_cpumask_var(pmask4, GFP_KERNEL))
                goto free_three;


        return 0;

free_three:
        free_cpumask_var(*pmask3);
free_two:
        free_cpumask_var(*pmask2);
free_one:
        free_cpumask_var(*pmask1);
        return -ENOMEM;
}

/**
 * free_cpumasks - free cpumasks in a tmpmasks structure
 * @cs:  the cpuset that have cpumasks to be free.
 * @tmp: the tmpmasks structure pointer
 */
static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
        if (cs) {
                free_cpumask_var(cs->cpus_allowed);
                free_cpumask_var(cs->effective_cpus);
                free_cpumask_var(cs->effective_xcpus);
                free_cpumask_var(cs->exclusive_cpus);
        }
        if (tmp) {
                free_cpumask_var(tmp->new_cpus);
                free_cpumask_var(tmp->addmask);
                free_cpumask_var(tmp->delmask);
        }
}

/**
 * alloc_trial_cpuset - allocate a trial cpuset
 * @cs: the cpuset that the trial cpuset duplicates
 */
static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
{
        struct cpuset *trial;

        trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
        if (!trial)
                return NULL;

        if (alloc_cpumasks(trial, NULL)) {
                kfree(trial);
                return NULL;
        }

        cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
        cpumask_copy(trial->effective_cpus, cs->effective_cpus);
        cpumask_copy(trial->effective_xcpus, cs->effective_xcpus);
        cpumask_copy(trial->exclusive_cpus, cs->exclusive_cpus);
        return trial;
}

/**
 * free_cpuset - free the cpuset
 * @cs: the cpuset to be freed
 */
static inline void free_cpuset(struct cpuset *cs)
{
        free_cpumasks(cs, NULL);
        kfree(cs);
}

static inline struct cpumask *fetch_xcpus(struct cpuset *cs)
{
        return !cpumask_empty(cs->exclusive_cpus) ? cs->exclusive_cpus :
               cpumask_empty(cs->effective_xcpus) ? cs->cpus_allowed
                                                  : cs->effective_xcpus;
}

/*
 * cpusets_are_exclusive() - check if two cpusets are exclusive
 *
 * Return true if exclusive, false if not
 */
static inline bool cpusets_are_exclusive(struct cpuset *cs1, struct cpuset *cs2)
{
        struct cpumask *xcpus1 = fetch_xcpus(cs1);
        struct cpumask *xcpus2 = fetch_xcpus(cs2);

        if (cpumask_intersects(xcpus1, xcpus2))
                return false;
        return true;
}

/*
 * validate_change_legacy() - Validate conditions specific to legacy (v1)
 *                            behavior.
 */
static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial)
{
        struct cgroup_subsys_state *css;
        struct cpuset *c, *par;
        int ret;

        WARN_ON_ONCE(!rcu_read_lock_held());

        /* Each of our child cpusets must be a subset of us */
        ret = -EBUSY;
        cpuset_for_each_child(c, css, cur)
                if (!is_cpuset_subset(c, trial))
                        goto out;

        /* On legacy hierarchy, we must be a subset of our parent cpuset. */
        ret = -EACCES;
        par = parent_cs(cur);
        if (par && !is_cpuset_subset(trial, par))
                goto out;

        ret = 0;
out:
        return ret;
}

/*
 * validate_change() - Used to validate that any proposed cpuset change
 *                       follows the structural rules for cpusets.
 *
 * If we replaced the flag and mask values of the current cpuset
 * (cur) with those values in the trial cpuset (trial), would
 * our various subset and exclusive rules still be valid?  Presumes
 * cpuset_mutex held.
 *
 * 'cur' is the address of an actual, in-use cpuset.  Operations
 * such as list traversal that depend on the actual address of the
 * cpuset in the list must use cur below, not trial.
 *
 * 'trial' is the address of bulk structure copy of cur, with
 * perhaps one or more of the fields cpus_allowed, mems_allowed,
 * or flags changed to new, trial values.
 *
 * Return 0 if valid, -errno if not.
 */

static int validate_change(struct cpuset *cur, struct cpuset *trial)
{
        struct cgroup_subsys_state *css;
        struct cpuset *c, *par;
        int ret = 0;

        rcu_read_lock();

        if (!is_in_v2_mode())
                ret = validate_change_legacy(cur, trial);
        if (ret)
                goto out;

        /* Remaining checks don't apply to root cpuset */
        if (cur == &top_cpuset)
                goto out;

        par = parent_cs(cur);

        /*
         * Cpusets with tasks - existing or newly being attached - can't
         * be changed to have empty cpus_allowed or mems_allowed.
         */
        ret = -ENOSPC;
        if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
                if (!cpumask_empty(cur->cpus_allowed) &&
                    cpumask_empty(trial->cpus_allowed))
                        goto out;
                if (!nodes_empty(cur->mems_allowed) &&
                    nodes_empty(trial->mems_allowed))
                        goto out;
        }

        /*
         * We can't shrink if we won't have enough room for SCHED_DEADLINE
         * tasks.
         */
        ret = -EBUSY;
        if (is_cpu_exclusive(cur) &&
            !cpuset_cpumask_can_shrink(cur->cpus_allowed,
                                       trial->cpus_allowed))
                goto out;

        /*
         * If either I or some sibling (!= me) is exclusive, we can't
         * overlap
         */
        ret = -EINVAL;
        cpuset_for_each_child(c, css, par) {
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur) {
                        if (!cpusets_are_exclusive(trial, c))
                                goto out;
                }
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
                    nodes_intersects(trial->mems_allowed, c->mems_allowed))
                        goto out;
        }

        ret = 0;
out:
        rcu_read_unlock();
        return ret;
}

#ifdef CONFIG_SMP
/*
 * Helper routine for generate_sched_domains().
 * Do cpusets a, b have overlapping effective cpus_allowed masks?
 */
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
        return cpumask_intersects(a->effective_cpus, b->effective_cpus);
}

static void
update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
{
        if (dattr->relax_domain_level < c->relax_domain_level)
                dattr->relax_domain_level = c->relax_domain_level;
        return;
}

static void update_domain_attr_tree(struct sched_domain_attr *dattr,
                                    struct cpuset *root_cs)
{
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;

        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
                /* skip the whole subtree if @cp doesn't have any CPU */
                if (cpumask_empty(cp->cpus_allowed)) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }

                if (is_sched_load_balance(cp))
                        update_domain_attr(dattr, cp);
        }
        rcu_read_unlock();
}

/* Must be called with cpuset_mutex held.  */
static inline int nr_cpusets(void)
{
        /* jump label reference count + the top-level cpuset */
        return static_key_count(&cpusets_enabled_key.key) + 1;
}

/*
 * generate_sched_domains()
 *
 * This function builds a partial partition of the systems CPUs
 * A 'partial partition' is a set of non-overlapping subsets whose
 * union is a subset of that set.
 * The output of this function needs to be passed to kernel/sched/core.c
 * partition_sched_domains() routine, which will rebuild the scheduler's
 * load balancing domains (sched domains) as specified by that partial
 * partition.
 *
 * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
 * for a background explanation of this.
 *
 * Does not return errors, on the theory that the callers of this
 * routine would rather not worry about failures to rebuild sched
 * domains when operating in the severe memory shortage situations
 * that could cause allocation failures below.
 *
 * Must be called with cpuset_mutex held.
 *
 * The three key local variables below are:
 *    cp - cpuset pointer, used (together with pos_css) to perform a
 *           top-down scan of all cpusets. For our purposes, rebuilding
 *           the schedulers sched domains, we can ignore !is_sched_load_
 *           balance cpusets.
 *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
 *           that need to be load balanced, for convenient iterative
 *           access by the subsequent code that finds the best partition,
 *           i.e the set of domains (subsets) of CPUs such that the
 *           cpus_allowed of every cpuset marked is_sched_load_balance
 *           is a subset of one of these domains, while there are as
 *           many such domains as possible, each as small as possible.
 * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
 *           the kernel/sched/core.c routine partition_sched_domains() in a
 *           convenient format, that can be easily compared to the prior
 *           value to determine what partition elements (sched domains)
 *           were changed (added or removed.)
 *
 * Finding the best partition (set of domains):
 *        The triple nested loops below over i, j, k scan over the
 *        load balanced cpusets (using the array of cpuset pointers in
 *        csa[]) looking for pairs of cpusets that have overlapping
 *        cpus_allowed, but which don't have the same 'pn' partition
 *        number and gives them in the same partition number.  It keeps
 *        looping on the 'restart' label until it can no longer find
 *        any such pairs.
 *
 *        The union of the cpus_allowed masks from the set of
 *        all cpusets having the same 'pn' value then form the one
 *        element of the partition (one sched domain) to be passed to
 *        partition_sched_domains().
 */
static int generate_sched_domains(cpumask_var_t **domains,
                        struct sched_domain_attr **attributes)
{
        struct cpuset *cp;        /* top-down scan of cpusets */
        struct cpuset **csa;        /* array of all cpuset ptrs */
        int csn;                /* how many cpuset ptrs in csa so far */
        int i, j, k;                /* indices for partition finding loops */
        cpumask_var_t *doms;        /* resulting partition; i.e. sched domains */
        struct sched_domain_attr *dattr;  /* attributes for custom domains */
        int ndoms = 0;                /* number of sched domains in result */
        int nslot;                /* next empty doms[] struct cpumask slot */
        struct cgroup_subsys_state *pos_css;
        bool root_load_balance = is_sched_load_balance(&top_cpuset);

        doms = NULL;
        dattr = NULL;
        csa = NULL;

        /* Special case for the 99% of systems with one, full, sched domain */
        if (root_load_balance && !top_cpuset.nr_subparts) {
                ndoms = 1;
                doms = alloc_sched_domains(ndoms);
                if (!doms)
                        goto done;

                dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
                if (dattr) {
                        *dattr = SD_ATTR_INIT;
                        update_domain_attr_tree(dattr, &top_cpuset);
                }
                cpumask_and(doms[0], top_cpuset.effective_cpus,
                            housekeeping_cpumask(HK_TYPE_DOMAIN));

                goto done;
        }

        csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL);
        if (!csa)
                goto done;
        csn = 0;

        rcu_read_lock();
        if (root_load_balance)
                csa[csn++] = &top_cpuset;
        cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
                if (cp == &top_cpuset)
                        continue;
                /*
                 * Continue traversing beyond @cp iff @cp has some CPUs and
                 * isn't load balancing.  The former is obvious.  The
                 * latter: All child cpusets contain a subset of the
                 * parent's cpus, so just skip them, and then we call
                 * update_domain_attr_tree() to calc relax_domain_level of
                 * the corresponding sched domain.
                 *
                 * If root is load-balancing, we can skip @cp if it
                 * is a subset of the root's effective_cpus.
                 */
                if (!cpumask_empty(cp->cpus_allowed) &&
                    !(is_sched_load_balance(cp) &&
                      cpumask_intersects(cp->cpus_allowed,
                                         housekeeping_cpumask(HK_TYPE_DOMAIN))))
                        continue;

                if (root_load_balance &&
                    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
                        continue;

                if (is_sched_load_balance(cp) &&
                    !cpumask_empty(cp->effective_cpus))
                        csa[csn++] = cp;

                /* skip @cp's subtree if not a partition root */
                if (!is_partition_valid(cp))
                        pos_css = css_rightmost_descendant(pos_css);
        }
        rcu_read_unlock();

        for (i = 0; i < csn; i++)
                csa[i]->pn = i;
        ndoms = csn;

restart:
        /* Find the best partition (set of sched domains) */
        for (i = 0; i < csn; i++) {
                struct cpuset *a = csa[i];
                int apn = a->pn;

                for (j = 0; j < csn; j++) {
                        struct cpuset *b = csa[j];
                        int bpn = b->pn;

                        if (apn != bpn && cpusets_overlap(a, b)) {
                                for (k = 0; k < csn; k++) {
                                        struct cpuset *c = csa[k];

                                        if (c->pn == bpn)
                                                c->pn = apn;
                                }
                                ndoms--;        /* one less element */
                                goto restart;
                        }
                }
        }

        /*
         * Now we know how many domains to create.
         * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
         */
        doms = alloc_sched_domains(ndoms);
        if (!doms)
                goto done;

        /*
         * The rest of the code, including the scheduler, can deal with
         * dattr==NULL case. No need to abort if alloc fails.
         */
        dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr),
                              GFP_KERNEL);

        for (nslot = 0, i = 0; i < csn; i++) {
                struct cpuset *a = csa[i];
                struct cpumask *dp;
                int apn = a->pn;

                if (apn < 0) {
                        /* Skip completed partitions */
                        continue;
                }

                dp = doms[nslot];

                if (nslot == ndoms) {
                        static int warnings = 10;
                        if (warnings) {
                                pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
                                        nslot, ndoms, csn, i, apn);
                                warnings--;
                        }
                        continue;
                }

                cpumask_clear(dp);
                if (dattr)
                        *(dattr + nslot) = SD_ATTR_INIT;
                for (j = i; j < csn; j++) {
                        struct cpuset *b = csa[j];

                        if (apn == b->pn) {
                                cpumask_or(dp, dp, b->effective_cpus);
                                cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN));
                                if (dattr)
                                        update_domain_attr_tree(dattr + nslot, b);

                                /* Done with this partition */
                                b->pn = -1;
                        }
                }
                nslot++;
        }
        BUG_ON(nslot != ndoms);

done:
        kfree(csa);

        /*
         * Fallback to the default domain if kmalloc() failed.
         * See comments in partition_sched_domains().
         */
        if (doms == NULL)
                ndoms = 1;

        *domains    = doms;
        *attributes = dattr;
        return ndoms;
}

static void dl_update_tasks_root_domain(struct cpuset *cs)
{
        struct css_task_iter it;
        struct task_struct *task;

        if (cs->nr_deadline_tasks == 0)
                return;

        css_task_iter_start(&cs->css, 0, &it);

        while ((task = css_task_iter_next(&it)))
                dl_add_task_root_domain(task);

        css_task_iter_end(&it);
}

static void dl_rebuild_rd_accounting(void)
{
        struct cpuset *cs = NULL;
        struct cgroup_subsys_state *pos_css;

        lockdep_assert_held(&cpuset_mutex);
        lockdep_assert_cpus_held();
        lockdep_assert_held(&sched_domains_mutex);

        rcu_read_lock();

        /*
         * Clear default root domain DL accounting, it will be computed again
         * if a task belongs to it.
         */
        dl_clear_root_domain(&def_root_domain);

        cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {

                if (cpumask_empty(cs->effective_cpus)) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }

                css_get(&cs->css);

                rcu_read_unlock();

                dl_update_tasks_root_domain(cs);

                rcu_read_lock();
                css_put(&cs->css);
        }
        rcu_read_unlock();
}

static void
partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
                                    struct sched_domain_attr *dattr_new)
{
        mutex_lock(&sched_domains_mutex);
        partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
        dl_rebuild_rd_accounting();
        mutex_unlock(&sched_domains_mutex);
}

/*
 * Rebuild scheduler domains.
 *
 * If the flag 'sched_load_balance' of any cpuset with non-empty
 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
 * which has that flag enabled, or if any cpuset with a non-empty
 * 'cpus' is removed, then call this routine to rebuild the
 * scheduler's dynamic sched domains.
 *
 * Call with cpuset_mutex held.  Takes cpus_read_lock().
 */
static void rebuild_sched_domains_locked(void)
{
        struct cgroup_subsys_state *pos_css;
        struct sched_domain_attr *attr;
        cpumask_var_t *doms;
        struct cpuset *cs;
        int ndoms;

        lockdep_assert_cpus_held();
        lockdep_assert_held(&cpuset_mutex);

        /*
         * If we have raced with CPU hotplug, return early to avoid
         * passing doms with offlined cpu to partition_sched_domains().
         * Anyways, cpuset_hotplug_workfn() will rebuild sched domains.
         *
         * With no CPUs in any subpartitions, top_cpuset's effective CPUs
         * should be the same as the active CPUs, so checking only top_cpuset
         * is enough to detect racing CPU offlines.
         */
        if (cpumask_empty(subpartitions_cpus) &&
            !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
                return;

        /*
         * With subpartition CPUs, however, the effective CPUs of a partition
         * root should be only a subset of the active CPUs.  Since a CPU in any
         * partition root could be offlined, all must be checked.
         */
        if (top_cpuset.nr_subparts) {
                rcu_read_lock();
                cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
                        if (!is_partition_valid(cs)) {
                                pos_css = css_rightmost_descendant(pos_css);
                                continue;
                        }
                        if (!cpumask_subset(cs->effective_cpus,
                                            cpu_active_mask)) {
                                rcu_read_unlock();
                                return;
                        }
                }
                rcu_read_unlock();
        }

        /* Generate domain masks and attrs */
        ndoms = generate_sched_domains(&doms, &attr);

        /* Have scheduler rebuild the domains */
        partition_and_rebuild_sched_domains(ndoms, doms, attr);
}
#else /* !CONFIG_SMP */
static void rebuild_sched_domains_locked(void)
{
}
#endif /* CONFIG_SMP */

void rebuild_sched_domains(void)
{
        cpus_read_lock();
        mutex_lock(&cpuset_mutex);
        rebuild_sched_domains_locked();
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
}

/**
 * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
 * @new_cpus: the temp variable for the new effective_cpus mask
 *
 * Iterate through each task of @cs updating its cpus_allowed to the
 * effective cpuset's.  As this function is called with cpuset_mutex held,
 * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
 * is used instead of effective_cpus to make sure all offline CPUs are also
 * included as hotplug code won't update cpumasks for tasks in top_cpuset.
 */
static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
        struct css_task_iter it;
        struct task_struct *task;
        bool top_cs = cs == &top_cpuset;

        css_task_iter_start(&cs->css, 0, &it);
        while ((task = css_task_iter_next(&it))) {
                const struct cpumask *possible_mask = task_cpu_possible_mask(task);

                if (top_cs) {
                        /*
                         * Percpu kthreads in top_cpuset are ignored
                         */
                        if (kthread_is_per_cpu(task))
                                continue;
                        cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus);
                } else {
                        cpumask_and(new_cpus, possible_mask, cs->effective_cpus);
                }
                set_cpus_allowed_ptr(task, new_cpus);
        }
        css_task_iter_end(&it);
}

/**
 * compute_effective_cpumask - Compute the effective cpumask of the cpuset
 * @new_cpus: the temp variable for the new effective_cpus mask
 * @cs: the cpuset the need to recompute the new effective_cpus mask
 * @parent: the parent cpuset
 *
 * The result is valid only if the given cpuset isn't a partition root.
 */
static void compute_effective_cpumask(struct cpumask *new_cpus,
                                      struct cpuset *cs, struct cpuset *parent)
{
        cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus);
}

/*
 * Commands for update_parent_effective_cpumask
 */
enum partition_cmd {
        partcmd_enable,                /* Enable partition root          */
        partcmd_enablei,        /* Enable isolated partition root */
        partcmd_disable,        /* Disable partition root          */
        partcmd_update,                /* Update parent's effective_cpus */
        partcmd_invalidate,        /* Make partition invalid          */
};

static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
                       int turning_on);
static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
                                    struct tmpmasks *tmp);

/*
 * Update partition exclusive flag
 *
 * Return: 0 if successful, an error code otherwise
 */
static int update_partition_exclusive(struct cpuset *cs, int new_prs)
{
        bool exclusive = (new_prs > 0);

        if (exclusive && !is_cpu_exclusive(cs)) {
                if (update_flag(CS_CPU_EXCLUSIVE, cs, 1))
                        return PERR_NOTEXCL;
        } else if (!exclusive && is_cpu_exclusive(cs)) {
                /* Turning off CS_CPU_EXCLUSIVE will not return error */
                update_flag(CS_CPU_EXCLUSIVE, cs, 0);
        }
        return 0;
}

/*
 * Update partition load balance flag and/or rebuild sched domain
 *
 * Changing load balance flag will automatically call
 * rebuild_sched_domains_locked().
 * This function is for cgroup v2 only.
 */
static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
{
        int new_prs = cs->partition_root_state;
        bool rebuild_domains = (new_prs > 0) || (old_prs > 0);
        bool new_lb;

        /*
         * If cs is not a valid partition root, the load balance state
         * will follow its parent.
         */
        if (new_prs > 0) {
                new_lb = (new_prs != PRS_ISOLATED);
        } else {
                new_lb = is_sched_load_balance(parent_cs(cs));
        }
        if (new_lb != !!is_sched_load_balance(cs)) {
                rebuild_domains = true;
                if (new_lb)
                        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
                else
                        clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        }

        if (rebuild_domains)
                rebuild_sched_domains_locked();
}

/*
 * tasks_nocpu_error - Return true if tasks will have no effective_cpus
 */
static bool tasks_nocpu_error(struct cpuset *parent, struct cpuset *cs,
                              struct cpumask *xcpus)
{
        /*
         * A populated partition (cs or parent) can't have empty effective_cpus
         */
        return (cpumask_subset(parent->effective_cpus, xcpus) &&
                partition_is_populated(parent, cs)) ||
               (!cpumask_intersects(xcpus, cpu_active_mask) &&
                partition_is_populated(cs, NULL));
}

static void reset_partition_data(struct cpuset *cs)
{
        struct cpuset *parent = parent_cs(cs);

        if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
                return;

        lockdep_assert_held(&callback_lock);

        cs->nr_subparts = 0;
        if (cpumask_empty(cs->exclusive_cpus)) {
                cpumask_clear(cs->effective_xcpus);
                if (is_cpu_exclusive(cs))
                        clear_bit(CS_CPU_EXCLUSIVE, &cs->flags);
        }
        if (!cpumask_and(cs->effective_cpus,
                         parent->effective_cpus, cs->cpus_allowed)) {
                cs->use_parent_ecpus = true;
                parent->child_ecpus_count++;
                cpumask_copy(cs->effective_cpus, parent->effective_cpus);
        }
}

/*
 * partition_xcpus_newstate - Exclusive CPUs state change
 * @old_prs: old partition_root_state
 * @new_prs: new partition_root_state
 * @xcpus: exclusive CPUs with state change
 */
static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
{
        WARN_ON_ONCE(old_prs == new_prs);
        if (new_prs == PRS_ISOLATED)
                cpumask_or(isolated_cpus, isolated_cpus, xcpus);
        else
                cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
}

/*
 * partition_xcpus_add - Add new exclusive CPUs to partition
 * @new_prs: new partition_root_state
 * @parent: parent cpuset
 * @xcpus: exclusive CPUs to be added
 * Return: true if isolated_cpus modified, false otherwise
 *
 * Remote partition if parent == NULL
 */
static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
                                struct cpumask *xcpus)
{
        bool isolcpus_updated;

        WARN_ON_ONCE(new_prs < 0);
        lockdep_assert_held(&callback_lock);
        if (!parent)
                parent = &top_cpuset;


        if (parent == &top_cpuset)
                cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);

        isolcpus_updated = (new_prs != parent->partition_root_state);
        if (isolcpus_updated)
                partition_xcpus_newstate(parent->partition_root_state, new_prs,
                                         xcpus);

        cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
        return isolcpus_updated;
}

/*
 * partition_xcpus_del - Remove exclusive CPUs from partition
 * @old_prs: old partition_root_state
 * @parent: parent cpuset
 * @xcpus: exclusive CPUs to be removed
 * Return: true if isolated_cpus modified, false otherwise
 *
 * Remote partition if parent == NULL
 */
static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
                                struct cpumask *xcpus)
{
        bool isolcpus_updated;

        WARN_ON_ONCE(old_prs < 0);
        lockdep_assert_held(&callback_lock);
        if (!parent)
                parent = &top_cpuset;

        if (parent == &top_cpuset)
                cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);

        isolcpus_updated = (old_prs != parent->partition_root_state);
        if (isolcpus_updated)
                partition_xcpus_newstate(old_prs, parent->partition_root_state,
                                         xcpus);

        cpumask_and(xcpus, xcpus, cpu_active_mask);
        cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
        return isolcpus_updated;
}

static void update_unbound_workqueue_cpumask(bool isolcpus_updated)
{
        int ret;

        lockdep_assert_cpus_held();

        if (!isolcpus_updated)
                return;

        ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
        WARN_ON_ONCE(ret < 0);
}

/**
 * cpuset_cpu_is_isolated - Check if the given CPU is isolated
 * @cpu: the CPU number to be checked
 * Return: true if CPU is used in an isolated partition, false otherwise
 */
bool cpuset_cpu_is_isolated(int cpu)
{
        return cpumask_test_cpu(cpu, isolated_cpus);
}
EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated);

/*
 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
 * @cs: cpuset
 * @xcpus: effective exclusive CPUs value to be set
 * Return: true if xcpus is not empty, false otherwise.
 *
 * Starting with exclusive_cpus (cpus_allowed if exclusive_cpus is not set),
 * it must be a subset of cpus_allowed and parent's effective_xcpus.
 */
static bool compute_effective_exclusive_cpumask(struct cpuset *cs,
                                                struct cpumask *xcpus)
{
        struct cpuset *parent = parent_cs(cs);

        if (!xcpus)
                xcpus = cs->effective_xcpus;

        if (!cpumask_empty(cs->exclusive_cpus))
                cpumask_and(xcpus, cs->exclusive_cpus, cs->cpus_allowed);
        else
                cpumask_copy(xcpus, cs->cpus_allowed);

        return cpumask_and(xcpus, xcpus, parent->effective_xcpus);
}

static inline bool is_remote_partition(struct cpuset *cs)
{
        return !list_empty(&cs->remote_sibling);
}

static inline bool is_local_partition(struct cpuset *cs)
{
        return is_partition_valid(cs) && !is_remote_partition(cs);
}

/*
 * remote_partition_enable - Enable current cpuset as a remote partition root
 * @cs: the cpuset to update
 * @new_prs: new partition_root_state
 * @tmp: temparary masks
 * Return: 1 if successful, 0 if error
 *
 * Enable the current cpuset to become a remote partition root taking CPUs
 * directly from the top cpuset. cpuset_mutex must be held by the caller.
 */
static int remote_partition_enable(struct cpuset *cs, int new_prs,
                                   struct tmpmasks *tmp)
{
        bool isolcpus_updated;

        /*
         * The user must have sysadmin privilege.
         */
        if (!capable(CAP_SYS_ADMIN))
                return 0;

        /*
         * The requested exclusive_cpus must not be allocated to other
         * partitions and it can't use up all the root's effective_cpus.
         *
         * Note that if there is any local partition root above it or
         * remote partition root underneath it, its exclusive_cpus must
         * have overlapped with subpartitions_cpus.
         */
        compute_effective_exclusive_cpumask(cs, tmp->new_cpus);
        if (cpumask_empty(tmp->new_cpus) ||
            cpumask_intersects(tmp->new_cpus, subpartitions_cpus) ||
            cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
                return 0;

        spin_lock_irq(&callback_lock);
        isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
        list_add(&cs->remote_sibling, &remote_children);
        if (cs->use_parent_ecpus) {
                struct cpuset *parent = parent_cs(cs);

                cs->use_parent_ecpus = false;
                parent->child_ecpus_count--;
        }
        spin_unlock_irq(&callback_lock);
        update_unbound_workqueue_cpumask(isolcpus_updated);

        /*
         * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
         */
        update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
        update_sibling_cpumasks(&top_cpuset, NULL, tmp);
        return 1;
}

/*
 * remote_partition_disable - Remove current cpuset from remote partition list
 * @cs: the cpuset to update
 * @tmp: temparary masks
 *
 * The effective_cpus is also updated.
 *
 * cpuset_mutex must be held by the caller.
 */
static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
{
        bool isolcpus_updated;

        compute_effective_exclusive_cpumask(cs, tmp->new_cpus);
        WARN_ON_ONCE(!is_remote_partition(cs));
        WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus));

        spin_lock_irq(&callback_lock);
        list_del_init(&cs->remote_sibling);
        isolcpus_updated = partition_xcpus_del(cs->partition_root_state,
                                               NULL, tmp->new_cpus);
        cs->partition_root_state = -cs->partition_root_state;
        if (!cs->prs_err)
                cs->prs_err = PERR_INVCPUS;
        reset_partition_data(cs);
        spin_unlock_irq(&callback_lock);
        update_unbound_workqueue_cpumask(isolcpus_updated);

        /*
         * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
         */
        update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
        update_sibling_cpumasks(&top_cpuset, NULL, tmp);
}

/*
 * remote_cpus_update - cpus_exclusive change of remote partition
 * @cs: the cpuset to be updated
 * @newmask: the new effective_xcpus mask
 * @tmp: temparary masks
 *
 * top_cpuset and subpartitions_cpus will be updated or partition can be
 * invalidated.
 */
static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
                               struct tmpmasks *tmp)
{
        bool adding, deleting;
        int prs = cs->partition_root_state;
        int isolcpus_updated = 0;

        if (WARN_ON_ONCE(!is_remote_partition(cs)))
                return;

        WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));

        if (cpumask_empty(newmask))
                goto invalidate;

        adding   = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus);
        deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask);

        /*
         * Additions of remote CPUs is only allowed if those CPUs are
         * not allocated to other partitions and there are effective_cpus
         * left in the top cpuset.
         */
        if (adding && (!capable(CAP_SYS_ADMIN) ||
                       cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
                       cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)))
                goto invalidate;

        spin_lock_irq(&callback_lock);
        if (adding)
                isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask);
        if (deleting)
                isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask);
        spin_unlock_irq(&callback_lock);
        update_unbound_workqueue_cpumask(isolcpus_updated);

        /*
         * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
         */
        update_tasks_cpumask(&top_cpuset, tmp->new_cpus);
        update_sibling_cpumasks(&top_cpuset, NULL, tmp);
        return;

invalidate:
        remote_partition_disable(cs, tmp);
}

/*
 * remote_partition_check - check if a child remote partition needs update
 * @cs: the cpuset to be updated
 * @newmask: the new effective_xcpus mask
 * @delmask: temporary mask for deletion (not in tmp)
 * @tmp: temparary masks
 *
 * This should be called before the given cs has updated its cpus_allowed
 * and/or effective_xcpus.
 */
static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
                                   struct cpumask *delmask, struct tmpmasks *tmp)
{
        struct cpuset *child, *next;
        int disable_cnt = 0;

        /*
         * Compute the effective exclusive CPUs that will be deleted.
         */
        if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) ||
            !cpumask_intersects(delmask, subpartitions_cpus))
                return;        /* No deletion of exclusive CPUs in partitions */

        /*
         * Searching the remote children list to look for those that will
         * be impacted by the deletion of exclusive CPUs.
         *
         * Since a cpuset must be removed from the remote children list
         * before it can go offline and holding cpuset_mutex will prevent
         * any change in cpuset status. RCU read lock isn't needed.
         */
        lockdep_assert_held(&cpuset_mutex);
        list_for_each_entry_safe(child, next, &remote_children, remote_sibling)
                if (cpumask_intersects(child->effective_cpus, delmask)) {
                        remote_partition_disable(child, tmp);
                        disable_cnt++;
                }
        if (disable_cnt)
                rebuild_sched_domains_locked();
}

/*
 * prstate_housekeeping_conflict - check for partition & housekeeping conflicts
 * @prstate: partition root state to be checked
 * @new_cpus: cpu mask
 * Return: true if there is conflict, false otherwise
 *
 * CPUs outside of housekeeping_cpumask(HK_TYPE_DOMAIN) can only be used in
 * an isolated partition.
 */
static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
{
        const struct cpumask *hk_domain = housekeeping_cpumask(HK_TYPE_DOMAIN);
        bool all_in_hk = cpumask_subset(new_cpus, hk_domain);

        if (!all_in_hk && (prstate != PRS_ISOLATED))
                return true;

        return false;
}

/**
 * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
 * @cs:      The cpuset that requests change in partition root state
 * @cmd:     Partition root state change command
 * @newmask: Optional new cpumask for partcmd_update
 * @tmp:     Temporary addmask and delmask
 * Return:   0 or a partition root state error code
 *
 * For partcmd_enable*, the cpuset is being transformed from a non-partition
 * root to a partition root. The effective_xcpus (cpus_allowed if
 * effective_xcpus not set) mask of the given cpuset will be taken away from
 * parent's effective_cpus. The function will return 0 if all the CPUs listed
 * in effective_xcpus can be granted or an error code will be returned.
 *
 * For partcmd_disable, the cpuset is being transformed from a partition
 * root back to a non-partition root. Any CPUs in effective_xcpus will be
 * given back to parent's effective_cpus. 0 will always be returned.
 *
 * For partcmd_update, if the optional newmask is specified, the cpu list is
 * to be changed from effective_xcpus to newmask. Otherwise, effective_xcpus is
 * assumed to remain the same. The cpuset should either be a valid or invalid
 * partition root. The partition root state may change from valid to invalid
 * or vice versa. An error code will be returned if transitioning from
 * invalid to valid violates the exclusivity rule.
 *
 * For partcmd_invalidate, the current partition will be made invalid.
 *
 * The partcmd_enable* and partcmd_disable commands are used by
 * update_prstate(). An error code may be returned and the caller will check
 * for error.
 *
 * The partcmd_update command is used by update_cpumasks_hier() with newmask
 * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
 * by update_cpumask() with NULL newmask. In both cases, the callers won't
 * check for error and so partition_root_state and prs_error will be updated
 * directly.
 */
static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
                                           struct cpumask *newmask,
                                           struct tmpmasks *tmp)
{
        struct cpuset *parent = parent_cs(cs);
        int adding;        /* Adding cpus to parent's effective_cpus        */
        int deleting;        /* Deleting cpus from parent's effective_cpus        */
        int old_prs, new_prs;
        int part_error = PERR_NONE;        /* Partition error? */
        int subparts_delta = 0;
        struct cpumask *xcpus;                /* cs effective_xcpus */
        int isolcpus_updated = 0;
        bool nocpu;

        lockdep_assert_held(&cpuset_mutex);

        /*
         * new_prs will only be changed for the partcmd_update and
         * partcmd_invalidate commands.
         */
        adding = deleting = false;
        old_prs = new_prs = cs->partition_root_state;
        xcpus = !cpumask_empty(cs->exclusive_cpus)
                ? cs->effective_xcpus : cs->cpus_allowed;

        if (cmd == partcmd_invalidate) {
                if (is_prs_invalid(old_prs))
                        return 0;

                /*
                 * Make the current partition invalid.
                 */
                if (is_partition_valid(parent))
                        adding = cpumask_and(tmp->addmask,
                                             xcpus, parent->effective_xcpus);
                if (old_prs > 0) {
                        new_prs = -old_prs;
                        subparts_delta--;
                }
                goto write_error;
        }

        /*
         * The parent must be a partition root.
         * The new cpumask, if present, or the current cpus_allowed must
         * not be empty.
         */
        if (!is_partition_valid(parent)) {
                return is_partition_invalid(parent)
                       ? PERR_INVPARENT : PERR_NOTPART;
        }
        if (!newmask && cpumask_empty(cs->cpus_allowed))
                return PERR_CPUSEMPTY;

        nocpu = tasks_nocpu_error(parent, cs, xcpus);

        if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) {
                /*
                 * Enabling partition root is not allowed if its
                 * effective_xcpus is empty or doesn't overlap with
                 * parent's effective_xcpus.
                 */
                if (cpumask_empty(xcpus) ||
                    !cpumask_intersects(xcpus, parent->effective_xcpus))
                        return PERR_INVCPUS;

                if (prstate_housekeeping_conflict(new_prs, xcpus))
                        return PERR_HKEEPING;

                /*
                 * A parent can be left with no CPU as long as there is no
                 * task directly associated with the parent partition.
                 */
                if (nocpu)
                        return PERR_NOCPUS;

                cpumask_copy(tmp->delmask, xcpus);
                deleting = true;
                subparts_delta++;
                new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
        } else if (cmd == partcmd_disable) {
                /*
                 * May need to add cpus to parent's effective_cpus for
                 * valid partition root.
                 */
                adding = !is_prs_invalid(old_prs) &&
                          cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
                if (adding)
                        subparts_delta--;
                new_prs = PRS_MEMBER;
        } else if (newmask) {
                /*
                 * Empty cpumask is not allowed
                 */
                if (cpumask_empty(newmask)) {
                        part_error = PERR_CPUSEMPTY;
                        goto write_error;
                }

                /*
                 * partcmd_update with newmask:
                 *
                 * Compute add/delete mask to/from effective_cpus
                 *
                 * For valid partition:
                 *   addmask = exclusive_cpus & ~newmask
                 *                              & parent->effective_xcpus
                 *   delmask = newmask & ~exclusive_cpus
                 *                       & parent->effective_xcpus
                 *
                 * For invalid partition:
                 *   delmask = newmask & parent->effective_xcpus
                 */
                if (is_prs_invalid(old_prs)) {
                        adding = false;
                        deleting = cpumask_and(tmp->delmask,
                                        newmask, parent->effective_xcpus);
                } else {
                        cpumask_andnot(tmp->addmask, xcpus, newmask);
                        adding = cpumask_and(tmp->addmask, tmp->addmask,
                                             parent->effective_xcpus);

                        cpumask_andnot(tmp->delmask, newmask, xcpus);
                        deleting = cpumask_and(tmp->delmask, tmp->delmask,
                                               parent->effective_xcpus);
                }
                /*
                 * Make partition invalid if parent's effective_cpus could
                 * become empty and there are tasks in the parent.
                 */
                if (nocpu && (!adding ||
                    !cpumask_intersects(tmp->addmask, cpu_active_mask))) {
                        part_error = PERR_NOCPUS;
                        deleting = false;
                        adding = cpumask_and(tmp->addmask,
                                             xcpus, parent->effective_xcpus);
                }
        } else {
                /*
                 * partcmd_update w/o newmask
                 *
                 * delmask = effective_xcpus & parent->effective_cpus
                 *
                 * This can be called from:
                 * 1) update_cpumasks_hier()
                 * 2) cpuset_hotplug_update_tasks()
                 *
                 * Check to see if it can be transitioned from valid to
                 * invalid partition or vice versa.
                 *
                 * A partition error happens when parent has tasks and all
                 * its effective CPUs will have to be distributed out.
                 */
                WARN_ON_ONCE(!is_partition_valid(parent));
                if (nocpu) {
                        part_error = PERR_NOCPUS;
                        if (is_partition_valid(cs))
                                adding = cpumask_and(tmp->addmask,
                                                xcpus, parent->effective_xcpus);
                } else if (is_partition_invalid(cs) &&
                           cpumask_subset(xcpus, parent->effective_xcpus)) {
                        struct cgroup_subsys_state *css;
                        struct cpuset *child;
                        bool exclusive = true;

                        /*
                         * Convert invalid partition to valid has to
                         * pass the cpu exclusivity test.
                         */
                        rcu_read_lock();
                        cpuset_for_each_child(child, css, parent) {
                                if (child == cs)
                                        continue;
                                if (!cpusets_are_exclusive(cs, child)) {
                                        exclusive = false;
                                        break;
                                }
                        }
                        rcu_read_unlock();
                        if (exclusive)
                                deleting = cpumask_and(tmp->delmask,
                                                xcpus, parent->effective_cpus);
                        else
                                part_error = PERR_NOTEXCL;
                }
        }

write_error:
        if (part_error)
                WRITE_ONCE(cs->prs_err, part_error);

        if (cmd == partcmd_update) {
                /*
                 * Check for possible transition between valid and invalid
                 * partition root.
                 */
                switch (cs->partition_root_state) {
                case PRS_ROOT:
                case PRS_ISOLATED:
                        if (part_error) {
                                new_prs = -old_prs;
                                subparts_delta--;
                        }
                        break;
                case PRS_INVALID_ROOT:
                case PRS_INVALID_ISOLATED:
                        if (!part_error) {
                                new_prs = -old_prs;
                                subparts_delta++;
                        }
                        break;
                }
        }

        if (!adding && !deleting && (new_prs == old_prs))
                return 0;

        /*
         * Transitioning between invalid to valid or vice versa may require
         * changing CS_CPU_EXCLUSIVE. In the case of partcmd_update,
         * validate_change() has already been successfully called and
         * CPU lists in cs haven't been updated yet. So defer it to later.
         */
        if ((old_prs != new_prs) && (cmd != partcmd_update))  {
                int err = update_partition_exclusive(cs, new_prs);

                if (err)
                        return err;
        }

        /*
         * Change the parent's effective_cpus & effective_xcpus (top cpuset
         * only).
         *
         * Newly added CPUs will be removed from effective_cpus and
         * newly deleted ones will be added back to effective_cpus.
         */
        spin_lock_irq(&callback_lock);
        if (old_prs != new_prs) {
                cs->partition_root_state = new_prs;
                if (new_prs <= 0)
                        cs->nr_subparts = 0;
        }
        /*
         * Adding to parent's effective_cpus means deletion CPUs from cs
         * and vice versa.
         */
        if (adding)
                isolcpus_updated += partition_xcpus_del(old_prs, parent,
                                                        tmp->addmask);
        if (deleting)
                isolcpus_updated += partition_xcpus_add(new_prs, parent,
                                                        tmp->delmask);

        if (is_partition_valid(parent)) {
                parent->nr_subparts += subparts_delta;
                WARN_ON_ONCE(parent->nr_subparts < 0);
        }
        spin_unlock_irq(&callback_lock);
        update_unbound_workqueue_cpumask(isolcpus_updated);

        if ((old_prs != new_prs) && (cmd == partcmd_update))
                update_partition_exclusive(cs, new_prs);

        if (adding || deleting) {
                update_tasks_cpumask(parent, tmp->addmask);
                update_sibling_cpumasks(parent, cs, tmp);
        }

        /*
         * For partcmd_update without newmask, it is being called from
         * cpuset_hotplug_workfn() where cpus_read_lock() wasn't taken.
         * Update the load balance flag and scheduling domain if
         * cpus_read_trylock() is successful.
         */
        if ((cmd == partcmd_update) && !newmask && cpus_read_trylock()) {
                update_partition_sd_lb(cs, old_prs);
                cpus_read_unlock();
        }

        notify_partition_change(cs, old_prs);
        return 0;
}

/**
 * compute_partition_effective_cpumask - compute effective_cpus for partition
 * @cs: partition root cpuset
 * @new_ecpus: previously computed effective_cpus to be updated
 *
 * Compute the effective_cpus of a partition root by scanning effective_xcpus
 * of child partition roots and excluding their effective_xcpus.
 *
 * This has the side effect of invalidating valid child partition roots,
 * if necessary. Since it is called from either cpuset_hotplug_update_tasks()
 * or update_cpumasks_hier() where parent and children are modified
 * successively, we don't need to call update_parent_effective_cpumask()
 * and the child's effective_cpus will be updated in later iterations.
 *
 * Note that rcu_read_lock() is assumed to be held.
 */
static void compute_partition_effective_cpumask(struct cpuset *cs,
                                                struct cpumask *new_ecpus)
{
        struct cgroup_subsys_state *css;
        struct cpuset *child;
        bool populated = partition_is_populated(cs, NULL);

        /*
         * Check child partition roots to see if they should be
         * invalidated when
         *  1) child effective_xcpus not a subset of new
         *     excluisve_cpus
         *  2) All the effective_cpus will be used up and cp
         *     has tasks
         */
        compute_effective_exclusive_cpumask(cs, new_ecpus);
        cpumask_and(new_ecpus, new_ecpus, cpu_active_mask);

        rcu_read_lock();
        cpuset_for_each_child(child, css, cs) {
                if (!is_partition_valid(child))
                        continue;

                child->prs_err = 0;
                if (!cpumask_subset(child->effective_xcpus,
                                    cs->effective_xcpus))
                        child->prs_err = PERR_INVCPUS;
                else if (populated &&
                         cpumask_subset(new_ecpus, child->effective_xcpus))
                        child->prs_err = PERR_NOCPUS;

                if (child->prs_err) {
                        int old_prs = child->partition_root_state;

                        /*
                         * Invalidate child partition
                         */
                        spin_lock_irq(&callback_lock);
                        make_partition_invalid(child);
                        cs->nr_subparts--;
                        child->nr_subparts = 0;
                        spin_unlock_irq(&callback_lock);
                        notify_partition_change(child, old_prs);
                        continue;
                }
                cpumask_andnot(new_ecpus, new_ecpus,
                               child->effective_xcpus);
        }
        rcu_read_unlock();
}

/*
 * update_cpumasks_hier() flags
 */
#define HIER_CHECKALL                0x01        /* Check all cpusets with no skipping */
#define HIER_NO_SD_REBUILD        0x02        /* Don't rebuild sched domains */

/*
 * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
 * @cs:  the cpuset to consider
 * @tmp: temp variables for calculating effective_cpus & partition setup
 * @force: don't skip any descendant cpusets if set
 *
 * When configured cpumask is changed, the effective cpumasks of this cpuset
 * and all its descendants need to be updated.
 *
 * On legacy hierarchy, effective_cpus will be the same with cpu_allowed.
 *
 * Called with cpuset_mutex held
 */
static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
                                 int flags)
{
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;
        bool need_rebuild_sched_domains = false;
        int old_prs, new_prs;

        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, cs) {
                struct cpuset *parent = parent_cs(cp);
                bool remote = is_remote_partition(cp);
                bool update_parent = false;

                /*
                 * Skip descendent remote partition that acquires CPUs
                 * directly from top cpuset unless it is cs.
                 */
                if (remote && (cp != cs)) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }

                /*
                 * Update effective_xcpus if exclusive_cpus set.
                 * The case when exclusive_cpus isn't set is handled later.
                 */
                if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) {
                        spin_lock_irq(&callback_lock);
                        compute_effective_exclusive_cpumask(cp, NULL);
                        spin_unlock_irq(&callback_lock);
                }

                old_prs = new_prs = cp->partition_root_state;
                if (remote || (is_partition_valid(parent) &&
                               is_partition_valid(cp)))
                        compute_partition_effective_cpumask(cp, tmp->new_cpus);
                else
                        compute_effective_cpumask(tmp->new_cpus, cp, parent);

                /*
                 * A partition with no effective_cpus is allowed as long as
                 * there is no task associated with it. Call
                 * update_parent_effective_cpumask() to check it.
                 */
                if (is_partition_valid(cp) && cpumask_empty(tmp->new_cpus)) {
                        update_parent = true;
                        goto update_parent_effective;
                }

                /*
                 * If it becomes empty, inherit the effective mask of the
                 * parent, which is guaranteed to have some CPUs unless
                 * it is a partition root that has explicitly distributed
                 * out all its CPUs.
                 */
                if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) {
                        cpumask_copy(tmp->new_cpus, parent->effective_cpus);
                        if (!cp->use_parent_ecpus) {
                                cp->use_parent_ecpus = true;
                                parent->child_ecpus_count++;
                        }
                } else if (cp->use_parent_ecpus) {
                        cp->use_parent_ecpus = false;
                        WARN_ON_ONCE(!parent->child_ecpus_count);
                        parent->child_ecpus_count--;
                }

                if (remote)
                        goto get_css;

                /*
                 * Skip the whole subtree if
                 * 1) the cpumask remains the same,
                 * 2) has no partition root state,
                 * 3) HIER_CHECKALL flag not set, and
                 * 4) for v2 load balance state same as its parent.
                 */
                if (!cp->partition_root_state && !(flags & HIER_CHECKALL) &&
                    cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
                    (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
                    (is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }

update_parent_effective:
                /*
                 * update_parent_effective_cpumask() should have been called
                 * for cs already in update_cpumask(). We should also call
                 * update_tasks_cpumask() again for tasks in the parent
                 * cpuset if the parent's effective_cpus changes.
                 */
                if ((cp != cs) && old_prs) {
                        switch (parent->partition_root_state) {
                        case PRS_ROOT:
                        case PRS_ISOLATED:
                                update_parent = true;
                                break;

                        default:
                                /*
                                 * When parent is not a partition root or is
                                 * invalid, child partition roots become
                                 * invalid too.
                                 */
                                if (is_partition_valid(cp))
                                        new_prs = -cp->partition_root_state;
                                WRITE_ONCE(cp->prs_err,
                                           is_partition_invalid(parent)
                                           ? PERR_INVPARENT : PERR_NOTPART);
                                break;
                        }
                }
get_css:
                if (!css_tryget_online(&cp->css))
                        continue;
                rcu_read_unlock();

                if (update_parent) {
                        update_parent_effective_cpumask(cp, partcmd_update, NULL, tmp);
                        /*
                         * The cpuset partition_root_state may become
                         * invalid. Capture it.
                         */
                        new_prs = cp->partition_root_state;
                }

                spin_lock_irq(&callback_lock);
                cpumask_copy(cp->effective_cpus, tmp->new_cpus);
                cp->partition_root_state = new_prs;
                /*
                 * Make sure effective_xcpus is properly set for a valid
                 * partition root.
                 */
                if ((new_prs > 0) && cpumask_empty(cp->exclusive_cpus))
                        cpumask_and(cp->effective_xcpus,
                                    cp->cpus_allowed, parent->effective_xcpus);
                else if (new_prs < 0)
                        reset_partition_data(cp);
                spin_unlock_irq(&callback_lock);

                notify_partition_change(cp, old_prs);

                WARN_ON(!is_in_v2_mode() &&
                        !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));

                update_tasks_cpumask(cp, cp->effective_cpus);

                /*
                 * On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE
                 * from parent if current cpuset isn't a valid partition root
                 * and their load balance states differ.
                 */
                if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
                    !is_partition_valid(cp) &&
                    (is_sched_load_balance(parent) != is_sched_load_balance(cp))) {
                        if (is_sched_load_balance(parent))
                                set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
                        else
                                clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
                }

                /*
                 * On legacy hierarchy, if the effective cpumask of any non-
                 * empty cpuset is changed, we need to rebuild sched domains.
                 * On default hierarchy, the cpuset needs to be a partition
                 * root as well.
                 */
                if (!cpumask_empty(cp->cpus_allowed) &&
                    is_sched_load_balance(cp) &&
                   (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
                    is_partition_valid(cp)))
                        need_rebuild_sched_domains = true;

                rcu_read_lock();
                css_put(&cp->css);
        }
        rcu_read_unlock();

        if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD))
                rebuild_sched_domains_locked();
}

/**
 * update_sibling_cpumasks - Update siblings cpumasks
 * @parent:  Parent cpuset
 * @cs:      Current cpuset
 * @tmp:     Temp variables
 */
static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
                                    struct tmpmasks *tmp)
{
        struct cpuset *sibling;
        struct cgroup_subsys_state *pos_css;

        lockdep_assert_held(&cpuset_mutex);

        /*
         * Check all its siblings and call update_cpumasks_hier()
         * if their effective_cpus will need to be changed.
         *
         * With the addition of effective_xcpus which is a subset of
         * cpus_allowed. It is possible a change in parent's effective_cpus
         * due to a change in a child partition's effective_xcpus will impact
         * its siblings even if they do not inherit parent's effective_cpus
         * directly.
         *
         * The update_cpumasks_hier() function may sleep. So we have to
         * release the RCU read lock before calling it. HIER_NO_SD_REBUILD
         * flag is used to suppress rebuild of sched domains as the callers
         * will take care of that.
         */
        rcu_read_lock();
        cpuset_for_each_child(sibling, pos_css, parent) {
                if (sibling == cs)
                        continue;
                if (!sibling->use_parent_ecpus &&
                    !is_partition_valid(sibling)) {
                        compute_effective_cpumask(tmp->new_cpus, sibling,
                                                  parent);
                        if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus))
                                continue;
                }
                if (!css_tryget_online(&sibling->css))
                        continue;

                rcu_read_unlock();
                update_cpumasks_hier(sibling, tmp, HIER_NO_SD_REBUILD);
                rcu_read_lock();
                css_put(&sibling->css);
        }
        rcu_read_unlock();
}

/**
 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
 * @cs: the cpuset to consider
 * @trialcs: trial cpuset
 * @buf: buffer of cpu numbers written to this cpuset
 */
static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
                          const char *buf)
{
        int retval;
        struct tmpmasks tmp;
        struct cpuset *parent = parent_cs(cs);
        bool invalidate = false;
        int hier_flags = 0;
        int old_prs = cs->partition_root_state;

        /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
        if (cs == &top_cpuset)
                return -EACCES;

        /*
         * An empty cpus_allowed is ok only if the cpuset has no tasks.
         * Since cpulist_parse() fails on an empty mask, we special case
         * that parsing.  The validate_change() call ensures that cpusets
         * with tasks have cpus.
         */
        if (!*buf) {
                cpumask_clear(trialcs->cpus_allowed);
                cpumask_clear(trialcs->effective_xcpus);
        } else {
                retval = cpulist_parse(buf, trialcs->cpus_allowed);
                if (retval < 0)
                        return retval;

                if (!cpumask_subset(trialcs->cpus_allowed,
                                    top_cpuset.cpus_allowed))
                        return -EINVAL;

                /*
                 * When exclusive_cpus isn't explicitly set, it is constrainted
                 * by cpus_allowed and parent's effective_xcpus. Otherwise,
                 * trialcs->effective_xcpus is used as a temporary cpumask
                 * for checking validity of the partition root.
                 */
                if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs))
                        compute_effective_exclusive_cpumask(trialcs, NULL);
        }

        /* Nothing to do if the cpus didn't change */
        if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
                return 0;

        if (alloc_cpumasks(NULL, &tmp))
                return -ENOMEM;

        if (old_prs) {
                if (is_partition_valid(cs) &&
                    cpumask_empty(trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_INVCPUS;
                } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_HKEEPING;
                } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_NOCPUS;
                }
        }

        /*
         * Check all the descendants in update_cpumasks_hier() if
         * effective_xcpus is to be changed.
         */
        if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
                hier_flags = HIER_CHECKALL;

        retval = validate_change(cs, trialcs);

        if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
                struct cgroup_subsys_state *css;
                struct cpuset *cp;

                /*
                 * The -EINVAL error code indicates that partition sibling
                 * CPU exclusivity rule has been violated. We still allow
                 * the cpumask change to proceed while invalidating the
                 * partition. However, any conflicting sibling partitions
                 * have to be marked as invalid too.
                 */
                invalidate = true;
                rcu_read_lock();
                cpuset_for_each_child(cp, css, parent) {
                        struct cpumask *xcpus = fetch_xcpus(trialcs);

                        if (is_partition_valid(cp) &&
                            cpumask_intersects(xcpus, cp->effective_xcpus)) {
                                rcu_read_unlock();
                                update_parent_effective_cpumask(cp, partcmd_invalidate, NULL, &tmp);
                                rcu_read_lock();
                        }
                }
                rcu_read_unlock();
                retval = 0;
        }

        if (retval < 0)
                goto out_free;

        if (is_partition_valid(cs) ||
           (is_partition_invalid(cs) && !invalidate)) {
                struct cpumask *xcpus = trialcs->effective_xcpus;

                if (cpumask_empty(xcpus) && is_partition_invalid(cs))
                        xcpus = trialcs->cpus_allowed;

                /*
                 * Call remote_cpus_update() to handle valid remote partition
                 */
                if (is_remote_partition(cs))
                        remote_cpus_update(cs, xcpus, &tmp);
                else if (invalidate)
                        update_parent_effective_cpumask(cs, partcmd_invalidate,
                                                        NULL, &tmp);
                else
                        update_parent_effective_cpumask(cs, partcmd_update,
                                                        xcpus, &tmp);
        } else if (!cpumask_empty(cs->exclusive_cpus)) {
                /*
                 * Use trialcs->effective_cpus as a temp cpumask
                 */
                remote_partition_check(cs, trialcs->effective_xcpus,
                                       trialcs->effective_cpus, &tmp);
        }

        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
        cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus);
        if ((old_prs > 0) && !is_partition_valid(cs))
                reset_partition_data(cs);
        spin_unlock_irq(&callback_lock);

        /* effective_cpus/effective_xcpus will be updated here */
        update_cpumasks_hier(cs, &tmp, hier_flags);

        /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
        if (cs->partition_root_state)
                update_partition_sd_lb(cs, old_prs);
out_free:
        free_cpumasks(NULL, &tmp);
        return retval;
}

/**
 * update_exclusive_cpumask - update the exclusive_cpus mask of a cpuset
 * @cs: the cpuset to consider
 * @trialcs: trial cpuset
 * @buf: buffer of cpu numbers written to this cpuset
 *
 * The tasks' cpumask will be updated if cs is a valid partition root.
 */
static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
                                    const char *buf)
{
        int retval;
        struct tmpmasks tmp;
        struct cpuset *parent = parent_cs(cs);
        bool invalidate = false;
        int hier_flags = 0;
        int old_prs = cs->partition_root_state;

        if (!*buf) {
                cpumask_clear(trialcs->exclusive_cpus);
                cpumask_clear(trialcs->effective_xcpus);
        } else {
                retval = cpulist_parse(buf, trialcs->exclusive_cpus);
                if (retval < 0)
                        return retval;
                if (!is_cpu_exclusive(cs))
                        set_bit(CS_CPU_EXCLUSIVE, &trialcs->flags);
        }

        /* Nothing to do if the CPUs didn't change */
        if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus))
                return 0;

        if (*buf)
                compute_effective_exclusive_cpumask(trialcs, NULL);

        /*
         * Check all the descendants in update_cpumasks_hier() if
         * effective_xcpus is to be changed.
         */
        if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
                hier_flags = HIER_CHECKALL;

        retval = validate_change(cs, trialcs);
        if (retval)
                return retval;

        if (alloc_cpumasks(NULL, &tmp))
                return -ENOMEM;

        if (old_prs) {
                if (cpumask_empty(trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_INVCPUS;
                } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_HKEEPING;
                } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) {
                        invalidate = true;
                        cs->prs_err = PERR_NOCPUS;
                }

                if (is_remote_partition(cs)) {
                        if (invalidate)
                                remote_partition_disable(cs, &tmp);
                        else
                                remote_cpus_update(cs, trialcs->effective_xcpus,
                                                   &tmp);
                } else if (invalidate) {
                        update_parent_effective_cpumask(cs, partcmd_invalidate,
                                                        NULL, &tmp);
                } else {
                        update_parent_effective_cpumask(cs, partcmd_update,
                                                trialcs->effective_xcpus, &tmp);
                }
        } else if (!cpumask_empty(trialcs->exclusive_cpus)) {
                /*
                 * Use trialcs->effective_cpus as a temp cpumask
                 */
                remote_partition_check(cs, trialcs->effective_xcpus,
                                       trialcs->effective_cpus, &tmp);
        }
        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus);
        cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus);
        if ((old_prs > 0) && !is_partition_valid(cs))
                reset_partition_data(cs);
        spin_unlock_irq(&callback_lock);

        /*
         * Call update_cpumasks_hier() to update effective_cpus/effective_xcpus
         * of the subtree when it is a valid partition root or effective_xcpus
         * is updated.
         */
        if (is_partition_valid(cs) || hier_flags)
                update_cpumasks_hier(cs, &tmp, hier_flags);

        /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
        if (cs->partition_root_state)
                update_partition_sd_lb(cs, old_prs);

        free_cpumasks(NULL, &tmp);
        return 0;
}

/*
 * Migrate memory region from one set of nodes to another.  This is
 * performed asynchronously as it can be called from process migration path
 * holding locks involved in process management.  All mm migrations are
 * performed in the queued order and can be waited for by flushing
 * cpuset_migrate_mm_wq.
 */

struct cpuset_migrate_mm_work {
        struct work_struct        work;
        struct mm_struct        *mm;
        nodemask_t                from;
        nodemask_t                to;
};

static void cpuset_migrate_mm_workfn(struct work_struct *work)
{
        struct cpuset_migrate_mm_work *mwork =
                container_of(work, struct cpuset_migrate_mm_work, work);

        /* on a wq worker, no need to worry about %current's mems_allowed */
        do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
        mmput(mwork->mm);
        kfree(mwork);
}

static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
                                                        const nodemask_t *to)
{
        struct cpuset_migrate_mm_work *mwork;

        if (nodes_equal(*from, *to)) {
                mmput(mm);
                return;
        }

        mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
        if (mwork) {
                mwork->mm = mm;
                mwork->from = *from;
                mwork->to = *to;
                INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
                queue_work(cpuset_migrate_mm_wq, &mwork->work);
        } else {
                mmput(mm);
        }
}

static void cpuset_post_attach(void)
{
        flush_workqueue(cpuset_migrate_mm_wq);
}

/*
 * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
 * @tsk: the task to change
 * @newmems: new nodes that the task will be set
 *
 * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
 * and rebind an eventual tasks' mempolicy. If the task is allocating in
 * parallel, it might temporarily see an empty intersection, which results in
 * a seqlock check and retry before OOM or allocation failure.
 */
static void cpuset_change_task_nodemask(struct task_struct *tsk,
                                        nodemask_t *newmems)
{
        task_lock(tsk);

        local_irq_disable();
        write_seqcount_begin(&tsk->mems_allowed_seq);

        nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
        mpol_rebind_task(tsk, newmems);
        tsk->mems_allowed = *newmems;

        write_seqcount_end(&tsk->mems_allowed_seq);
        local_irq_enable();

        task_unlock(tsk);
}

static void *cpuset_being_rebound;

/**
 * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
 * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
 *
 * Iterate through each task of @cs updating its mems_allowed to the
 * effective cpuset's.  As this function is called with cpuset_mutex held,
 * cpuset membership stays stable.
 */
static void update_tasks_nodemask(struct cpuset *cs)
{
        static nodemask_t newmems;        /* protected by cpuset_mutex */
        struct css_task_iter it;
        struct task_struct *task;

        cpuset_being_rebound = cs;                /* causes mpol_dup() rebind */

        guarantee_online_mems(cs, &newmems);

        /*
         * The mpol_rebind_mm() call takes mmap_lock, which we couldn't
         * take while holding tasklist_lock.  Forks can happen - the
         * mpol_dup() cpuset_being_rebound check will catch such forks,
         * and rebind their vma mempolicies too.  Because we still hold
         * the global cpuset_mutex, we know that no other rebind effort
         * will be contending for the global variable cpuset_being_rebound.
         * It's ok if we rebind the same mm twice; mpol_rebind_mm()
         * is idempotent.  Also migrate pages in each mm to new nodes.
         */
        css_task_iter_start(&cs->css, 0, &it);
        while ((task = css_task_iter_next(&it))) {
                struct mm_struct *mm;
                bool migrate;

                cpuset_change_task_nodemask(task, &newmems);

                mm = get_task_mm(task);
                if (!mm)
                        continue;

                migrate = is_memory_migrate(cs);

                mpol_rebind_mm(mm, &cs->mems_allowed);
                if (migrate)
                        cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
                else
                        mmput(mm);
        }
        css_task_iter_end(&it);

        /*
         * All the tasks' nodemasks have been updated, update
         * cs->old_mems_allowed.
         */
        cs->old_mems_allowed = newmems;

        /* We're done rebinding vmas to this cpuset's new mems_allowed. */
        cpuset_being_rebound = NULL;
}

/*
 * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
 * @cs: the cpuset to consider
 * @new_mems: a temp variable for calculating new effective_mems
 *
 * When configured nodemask is changed, the effective nodemasks of this cpuset
 * and all its descendants need to be updated.
 *
 * On legacy hierarchy, effective_mems will be the same with mems_allowed.
 *
 * Called with cpuset_mutex held
 */
static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
{
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;

        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, cs) {
                struct cpuset *parent = parent_cs(cp);

                nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);

                /*
                 * If it becomes empty, inherit the effective mask of the
                 * parent, which is guaranteed to have some MEMs.
                 */
                if (is_in_v2_mode() && nodes_empty(*new_mems))
                        *new_mems = parent->effective_mems;

                /* Skip the whole subtree if the nodemask remains the same. */
                if (nodes_equal(*new_mems, cp->effective_mems)) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }

                if (!css_tryget_online(&cp->css))
                        continue;
                rcu_read_unlock();

                spin_lock_irq(&callback_lock);
                cp->effective_mems = *new_mems;
                spin_unlock_irq(&callback_lock);

                WARN_ON(!is_in_v2_mode() &&
                        !nodes_equal(cp->mems_allowed, cp->effective_mems));

                update_tasks_nodemask(cp);

                rcu_read_lock();
                css_put(&cp->css);
        }
        rcu_read_unlock();
}

/*
 * Handle user request to change the 'mems' memory placement
 * of a cpuset.  Needs to validate the request, update the
 * cpusets mems_allowed, and for each task in the cpuset,
 * update mems_allowed and rebind task's mempolicy and any vma
 * mempolicies and if the cpuset is marked 'memory_migrate',
 * migrate the tasks pages to the new memory.
 *
 * Call with cpuset_mutex held. May take callback_lock during call.
 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
 * lock each such tasks mm->mmap_lock, scan its vma's and rebind
 * their mempolicies to the cpusets new mems_allowed.
 */
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
                           const char *buf)
{
        int retval;

        /*
         * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
         * it's read-only
         */
        if (cs == &top_cpuset) {
                retval = -EACCES;
                goto done;
        }

        /*
         * An empty mems_allowed is ok iff there are no tasks in the cpuset.
         * Since nodelist_parse() fails on an empty mask, we special case
         * that parsing.  The validate_change() call ensures that cpusets
         * with tasks have memory.
         */
        if (!*buf) {
                nodes_clear(trialcs->mems_allowed);
        } else {
                retval = nodelist_parse(buf, trialcs->mems_allowed);
                if (retval < 0)
                        goto done;

                if (!nodes_subset(trialcs->mems_allowed,
                                  top_cpuset.mems_allowed)) {
                        retval = -EINVAL;
                        goto done;
                }
        }

        if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
                retval = 0;                /* Too easy - nothing to do */
                goto done;
        }
        retval = validate_change(cs, trialcs);
        if (retval < 0)
                goto done;

        check_insane_mems_config(&trialcs->mems_allowed);

        spin_lock_irq(&callback_lock);
        cs->mems_allowed = trialcs->mems_allowed;
        spin_unlock_irq(&callback_lock);

        /* use trialcs->mems_allowed as a temp variable */
        update_nodemasks_hier(cs, &trialcs->mems_allowed);
done:
        return retval;
}

bool current_cpuset_is_being_rebound(void)
{
        bool ret;

        rcu_read_lock();
        ret = task_cs(current) == cpuset_being_rebound;
        rcu_read_unlock();

        return ret;
}

static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
#ifdef CONFIG_SMP
        if (val < -1 || val >= sched_domain_level_max)
                return -EINVAL;
#endif

        if (val != cs->relax_domain_level) {
                cs->relax_domain_level = val;
                if (!cpumask_empty(cs->cpus_allowed) &&
                    is_sched_load_balance(cs))
                        rebuild_sched_domains_locked();
        }

        return 0;
}

/**
 * update_tasks_flags - update the spread flags of tasks in the cpuset.
 * @cs: the cpuset in which each task's spread flags needs to be changed
 *
 * Iterate through each task of @cs updating its spread flags.  As this
 * function is called with cpuset_mutex held, cpuset membership stays
 * stable.
 */
static void update_tasks_flags(struct cpuset *cs)
{
        struct css_task_iter it;
        struct task_struct *task;

        css_task_iter_start(&cs->css, 0, &it);
        while ((task = css_task_iter_next(&it)))
                cpuset_update_task_spread_flags(cs, task);
        css_task_iter_end(&it);
}

/*
 * update_flag - read a 0 or a 1 in a file and update associated flag
 * bit:                the bit to update (see cpuset_flagbits_t)
 * cs:                the cpuset to update
 * turning_on:         whether the flag is being set or cleared
 *
 * Call with cpuset_mutex held.
 */

static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
                       int turning_on)
{
        struct cpuset *trialcs;
        int balance_flag_changed;
        int spread_flag_changed;
        int err;

        trialcs = alloc_trial_cpuset(cs);
        if (!trialcs)
                return -ENOMEM;

        if (turning_on)
                set_bit(bit, &trialcs->flags);
        else
                clear_bit(bit, &trialcs->flags);

        err = validate_change(cs, trialcs);
        if (err < 0)
                goto out;

        balance_flag_changed = (is_sched_load_balance(cs) !=
                                is_sched_load_balance(trialcs));

        spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
                        || (is_spread_page(cs) != is_spread_page(trialcs)));

        spin_lock_irq(&callback_lock);
        cs->flags = trialcs->flags;
        spin_unlock_irq(&callback_lock);

        if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
                rebuild_sched_domains_locked();

        if (spread_flag_changed)
                update_tasks_flags(cs);
out:
        free_cpuset(trialcs);
        return err;
}

/**
 * update_prstate - update partition_root_state
 * @cs: the cpuset to update
 * @new_prs: new partition root state
 * Return: 0 if successful, != 0 if error
 *
 * Call with cpuset_mutex held.
 */
static int update_prstate(struct cpuset *cs, int new_prs)
{
        int err = PERR_NONE, old_prs = cs->partition_root_state;
        struct cpuset *parent = parent_cs(cs);
        struct tmpmasks tmpmask;
        bool new_xcpus_state = false;

        if (old_prs == new_prs)
                return 0;

        /*
         * Treat a previously invalid partition root as if it is a "member".
         */
        if (new_prs && is_prs_invalid(old_prs))
                old_prs = PRS_MEMBER;

        if (alloc_cpumasks(NULL, &tmpmask))
                return -ENOMEM;

        /*
         * Setup effective_xcpus if not properly set yet, it will be cleared
         * later if partition becomes invalid.
         */
        if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) {
                spin_lock_irq(&callback_lock);
                cpumask_and(cs->effective_xcpus,
                            cs->cpus_allowed, parent->effective_xcpus);
                spin_unlock_irq(&callback_lock);
        }

        err = update_partition_exclusive(cs, new_prs);
        if (err)
                goto out;

        if (!old_prs) {
                enum partition_cmd cmd = (new_prs == PRS_ROOT)
                                       ? partcmd_enable : partcmd_enablei;

                /*
                 * cpus_allowed cannot be empty.
                 */
                if (cpumask_empty(cs->cpus_allowed)) {
                        err = PERR_CPUSEMPTY;
                        goto out;
                }

                err = update_parent_effective_cpumask(cs, cmd, NULL, &tmpmask);
                /*
                 * If an attempt to become local partition root fails,
                 * try to become a remote partition root instead.
                 */
                if (err && remote_partition_enable(cs, new_prs, &tmpmask))
                        err = 0;
        } else if (old_prs && new_prs) {
                /*
                 * A change in load balance state only, no change in cpumasks.
                 */
                new_xcpus_state = true;
        } else {
                /*
                 * Switching back to member is always allowed even if it
                 * disables child partitions.
                 */
                if (is_remote_partition(cs))
                        remote_partition_disable(cs, &tmpmask);
                else
                        update_parent_effective_cpumask(cs, partcmd_disable,
                                                        NULL, &tmpmask);

                /*
                 * Invalidation of child partitions will be done in
                 * update_cpumasks_hier().
                 */
        }
out:
        /*
         * Make partition invalid & disable CS_CPU_EXCLUSIVE if an error
         * happens.
         */
        if (err) {
                new_prs = -new_prs;
                update_partition_exclusive(cs, new_prs);
        }

        spin_lock_irq(&callback_lock);
        cs->partition_root_state = new_prs;
        WRITE_ONCE(cs->prs_err, err);
        if (!is_partition_valid(cs))
                reset_partition_data(cs);
        else if (new_xcpus_state)
                partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
        spin_unlock_irq(&callback_lock);
        update_unbound_workqueue_cpumask(new_xcpus_state);

        /* Force update if switching back to member */
        update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0);

        /* Update sched domains and load balance flag */
        update_partition_sd_lb(cs, old_prs);

        notify_partition_change(cs, old_prs);
        free_cpumasks(NULL, &tmpmask);
        return 0;
}

/*
 * Frequency meter - How fast is some event occurring?
 *
 * These routines manage a digitally filtered, constant time based,
 * event frequency meter.  There are four routines:
 *   fmeter_init() - initialize a frequency meter.
 *   fmeter_markevent() - called each time the event happens.
 *   fmeter_getrate() - returns the recent rate of such events.
 *   fmeter_update() - internal routine used to update fmeter.
 *
 * A common data structure is passed to each of these routines,
 * which is used to keep track of the state required to manage the
 * frequency meter and its digital filter.
 *
 * The filter works on the number of events marked per unit time.
 * The filter is single-pole low-pass recursive (IIR).  The time unit
 * is 1 second.  Arithmetic is done using 32-bit integers scaled to
 * simulate 3 decimal digits of precision (multiplied by 1000).
 *
 * With an FM_COEF of 933, and a time base of 1 second, the filter
 * has a half-life of 10 seconds, meaning that if the events quit
 * happening, then the rate returned from the fmeter_getrate()
 * will be cut in half each 10 seconds, until it converges to zero.
 *
 * It is not worth doing a real infinitely recursive filter.  If more
 * than FM_MAXTICKS ticks have elapsed since the last filter event,
 * just compute FM_MAXTICKS ticks worth, by which point the level
 * will be stable.
 *
 * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
 * arithmetic overflow in the fmeter_update() routine.
 *
 * Given the simple 32 bit integer arithmetic used, this meter works
 * best for reporting rates between one per millisecond (msec) and
 * one per 32 (approx) seconds.  At constant rates faster than one
 * per msec it maxes out at values just under 1,000,000.  At constant
 * rates between one per msec, and one per second it will stabilize
 * to a value N*1000, where N is the rate of events per second.
 * At constant rates between one per second and one per 32 seconds,
 * it will be choppy, moving up on the seconds that have an event,
 * and then decaying until the next event.  At rates slower than
 * about one in 32 seconds, it decays all the way back to zero between
 * each event.
 */

#define FM_COEF 933                /* coefficient for half-life of 10 secs */
#define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
#define FM_MAXCNT 1000000        /* limit cnt to avoid overflow */
#define FM_SCALE 1000                /* faux fixed point scale */

/* Initialize a frequency meter */
static void fmeter_init(struct fmeter *fmp)
{
        fmp->cnt = 0;
        fmp->val = 0;
        fmp->time = 0;
        spin_lock_init(&fmp->lock);
}

/* Internal meter update - process cnt events and update value */
static void fmeter_update(struct fmeter *fmp)
{
        time64_t now;
        u32 ticks;

        now = ktime_get_seconds();
        ticks = now - fmp->time;

        if (ticks == 0)
                return;

        ticks = min(FM_MAXTICKS, ticks);
        while (ticks-- > 0)
                fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
        fmp->time = now;

        fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
        fmp->cnt = 0;
}

/* Process any previous ticks, then bump cnt by one (times scale). */
static void fmeter_markevent(struct fmeter *fmp)
{
        spin_lock(&fmp->lock);
        fmeter_update(fmp);
        fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
        spin_unlock(&fmp->lock);
}

/* Process any previous ticks, then return current value. */
static int fmeter_getrate(struct fmeter *fmp)
{
        int val;

        spin_lock(&fmp->lock);
        fmeter_update(fmp);
        val = fmp->val;
        spin_unlock(&fmp->lock);
        return val;
}

static struct cpuset *cpuset_attach_old_cs;

/*
 * Check to see if a cpuset can accept a new task
 * For v1, cpus_allowed and mems_allowed can't be empty.
 * For v2, effective_cpus can't be empty.
 * Note that in v1, effective_cpus = cpus_allowed.
 */
static int cpuset_can_attach_check(struct cpuset *cs)
{
        if (cpumask_empty(cs->effective_cpus) ||
           (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
                return -ENOSPC;
        return 0;
}

static void reset_migrate_dl_data(struct cpuset *cs)
{
        cs->nr_migrate_dl_tasks = 0;
        cs->sum_migrate_dl_bw = 0;
}

/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
        struct cgroup_subsys_state *css;
        struct cpuset *cs, *oldcs;
        struct task_struct *task;
        bool cpus_updated, mems_updated;
        int ret;

        /* used later by cpuset_attach() */
        cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
        oldcs = cpuset_attach_old_cs;
        cs = css_cs(css);

        mutex_lock(&cpuset_mutex);

        /* Check to see if task is allowed in the cpuset */
        ret = cpuset_can_attach_check(cs);
        if (ret)
                goto out_unlock;

        cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus);
        mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);

        cgroup_taskset_for_each(task, css, tset) {
                ret = task_can_attach(task);
                if (ret)
                        goto out_unlock;

                /*
                 * Skip rights over task check in v2 when nothing changes,
                 * migration permission derives from hierarchy ownership in
                 * cgroup_procs_write_permission()).
                 */
                if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
                    (cpus_updated || mems_updated)) {
                        ret = security_task_setscheduler(task);
                        if (ret)
                                goto out_unlock;
                }

                if (dl_task(task)) {
                        cs->nr_migrate_dl_tasks++;
                        cs->sum_migrate_dl_bw += task->dl.dl_bw;
                }
        }

        if (!cs->nr_migrate_dl_tasks)
                goto out_success;

        if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
                int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);

                if (unlikely(cpu >= nr_cpu_ids)) {
                        reset_migrate_dl_data(cs);
                        ret = -EINVAL;
                        goto out_unlock;
                }

                ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
                if (ret) {
                        reset_migrate_dl_data(cs);
                        goto out_unlock;
                }
        }

out_success:
        /*
         * Mark attach is in progress.  This makes validate_change() fail
         * changes which zero cpus/mems_allowed.
         */
        cs->attach_in_progress++;
out_unlock:
        mutex_unlock(&cpuset_mutex);
        return ret;
}

static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
        struct cgroup_subsys_state *css;
        struct cpuset *cs;

        cgroup_taskset_first(tset, &css);
        cs = css_cs(css);

        mutex_lock(&cpuset_mutex);
        cs->attach_in_progress--;
        if (!cs->attach_in_progress)
                wake_up(&cpuset_attach_wq);

        if (cs->nr_migrate_dl_tasks) {
                int cpu = cpumask_any(cs->effective_cpus);

                dl_bw_free(cpu, cs->sum_migrate_dl_bw);
                reset_migrate_dl_data(cs);
        }

        mutex_unlock(&cpuset_mutex);
}

/*
 * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach_task()
 * but we can't allocate it dynamically there.  Define it global and
 * allocate from cpuset_init().
 */
static cpumask_var_t cpus_attach;
static nodemask_t cpuset_attach_nodemask_to;

static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
{
        lockdep_assert_held(&cpuset_mutex);

        if (cs != &top_cpuset)
                guarantee_online_cpus(task, cpus_attach);
        else
                cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
                               subpartitions_cpus);
        /*
         * can_attach beforehand should guarantee that this doesn't
         * fail.  TODO: have a better way to handle failure here
         */
        WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));

        cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
        cpuset_update_task_spread_flags(cs, task);
}

static void cpuset_attach(struct cgroup_taskset *tset)
{
        struct task_struct *task;
        struct task_struct *leader;
        struct cgroup_subsys_state *css;
        struct cpuset *cs;
        struct cpuset *oldcs = cpuset_attach_old_cs;
        bool cpus_updated, mems_updated;

        cgroup_taskset_first(tset, &css);
        cs = css_cs(css);

        lockdep_assert_cpus_held();        /* see cgroup_attach_lock() */
        mutex_lock(&cpuset_mutex);
        cpus_updated = !cpumask_equal(cs->effective_cpus,
                                      oldcs->effective_cpus);
        mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);

        /*
         * In the default hierarchy, enabling cpuset in the child cgroups
         * will trigger a number of cpuset_attach() calls with no change
         * in effective cpus and mems. In that case, we can optimize out
         * by skipping the task iteration and update.
         */
        if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
            !cpus_updated && !mems_updated) {
                cpuset_attach_nodemask_to = cs->effective_mems;
                goto out;
        }

        guarantee_online_mems(cs, &cpuset_attach_nodemask_to);

        cgroup_taskset_for_each(task, css, tset)
                cpuset_attach_task(cs, task);

        /*
         * Change mm for all threadgroup leaders. This is expensive and may
         * sleep and should be moved outside migration path proper. Skip it
         * if there is no change in effective_mems and CS_MEMORY_MIGRATE is
         * not set.
         */
        cpuset_attach_nodemask_to = cs->effective_mems;
        if (!is_memory_migrate(cs) && !mems_updated)
                goto out;

        cgroup_taskset_for_each_leader(leader, css, tset) {
                struct mm_struct *mm = get_task_mm(leader);

                if (mm) {
                        mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);

                        /*
                         * old_mems_allowed is the same with mems_allowed
                         * here, except if this task is being moved
                         * automatically due to hotplug.  In that case
                         * @mems_allowed has been updated and is empty, so
                         * @old_mems_allowed is the right nodesets that we
                         * migrate mm from.
                         */
                        if (is_memory_migrate(cs))
                                cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
                                                  &cpuset_attach_nodemask_to);
                        else
                                mmput(mm);
                }
        }

out:
        cs->old_mems_allowed = cpuset_attach_nodemask_to;

        if (cs->nr_migrate_dl_tasks) {
                cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
                oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
                reset_migrate_dl_data(cs);
        }

        cs->attach_in_progress--;
        if (!cs->attach_in_progress)
                wake_up(&cpuset_attach_wq);

        mutex_unlock(&cpuset_mutex);
}

/* The various types of files and directories in a cpuset file system */

typedef enum {
        FILE_MEMORY_MIGRATE,
        FILE_CPULIST,
        FILE_MEMLIST,
        FILE_EFFECTIVE_CPULIST,
        FILE_EFFECTIVE_MEMLIST,
        FILE_SUBPARTS_CPULIST,
        FILE_EXCLUSIVE_CPULIST,
        FILE_EFFECTIVE_XCPULIST,
        FILE_ISOLATED_CPULIST,
        FILE_CPU_EXCLUSIVE,
        FILE_MEM_EXCLUSIVE,
        FILE_MEM_HARDWALL,
        FILE_SCHED_LOAD_BALANCE,
        FILE_PARTITION_ROOT,
        FILE_SCHED_RELAX_DOMAIN_LEVEL,
        FILE_MEMORY_PRESSURE_ENABLED,
        FILE_MEMORY_PRESSURE,
        FILE_SPREAD_PAGE,
        FILE_SPREAD_SLAB,
} cpuset_filetype_t;

static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
                            u64 val)
{
        struct cpuset *cs = css_cs(css);
        cpuset_filetype_t type = cft->private;
        int retval = 0;

        cpus_read_lock();
        mutex_lock(&cpuset_mutex);
        if (!is_cpuset_online(cs)) {
                retval = -ENODEV;
                goto out_unlock;
        }

        switch (type) {
        case FILE_CPU_EXCLUSIVE:
                retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
                break;
        case FILE_MEM_EXCLUSIVE:
                retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
                break;
        case FILE_MEM_HARDWALL:
                retval = update_flag(CS_MEM_HARDWALL, cs, val);
                break;
        case FILE_SCHED_LOAD_BALANCE:
                retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
                break;
        case FILE_MEMORY_MIGRATE:
                retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
                break;
        case FILE_MEMORY_PRESSURE_ENABLED:
                cpuset_memory_pressure_enabled = !!val;
                break;
        case FILE_SPREAD_PAGE:
                retval = update_flag(CS_SPREAD_PAGE, cs, val);
                break;
        case FILE_SPREAD_SLAB:
                retval = update_flag(CS_SPREAD_SLAB, cs, val);
                break;
        default:
                retval = -EINVAL;
                break;
        }
out_unlock:
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
        return retval;
}

static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
                            s64 val)
{
        struct cpuset *cs = css_cs(css);
        cpuset_filetype_t type = cft->private;
        int retval = -ENODEV;

        cpus_read_lock();
        mutex_lock(&cpuset_mutex);
        if (!is_cpuset_online(cs))
                goto out_unlock;

        switch (type) {
        case FILE_SCHED_RELAX_DOMAIN_LEVEL:
                retval = update_relax_domain_level(cs, val);
                break;
        default:
                retval = -EINVAL;
                break;
        }
out_unlock:
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
        return retval;
}

/*
 * Common handling for a write to a "cpus" or "mems" file.
 */
static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
                                    char *buf, size_t nbytes, loff_t off)
{
        struct cpuset *cs = css_cs(of_css(of));
        struct cpuset *trialcs;
        int retval = -ENODEV;

        buf = strstrip(buf);

        /*
         * CPU or memory hotunplug may leave @cs w/o any execution
         * resources, in which case the hotplug code asynchronously updates
         * configuration and transfers all tasks to the nearest ancestor
         * which can execute.
         *
         * As writes to "cpus" or "mems" may restore @cs's execution
         * resources, wait for the previously scheduled operations before
         * proceeding, so that we don't end up keep removing tasks added
         * after execution capability is restored.
         *
         * cpuset_hotplug_work calls back into cgroup core via
         * cgroup_transfer_tasks() and waiting for it from a cgroupfs
         * operation like this one can lead to a deadlock through kernfs
         * active_ref protection.  Let's break the protection.  Losing the
         * protection is okay as we check whether @cs is online after
         * grabbing cpuset_mutex anyway.  This only happens on the legacy
         * hierarchies.
         */
        css_get(&cs->css);
        kernfs_break_active_protection(of->kn);
        flush_work(&cpuset_hotplug_work);

        cpus_read_lock();
        mutex_lock(&cpuset_mutex);
        if (!is_cpuset_online(cs))
                goto out_unlock;

        trialcs = alloc_trial_cpuset(cs);
        if (!trialcs) {
                retval = -ENOMEM;
                goto out_unlock;
        }

        switch (of_cft(of)->private) {
        case FILE_CPULIST:
                retval = update_cpumask(cs, trialcs, buf);
                break;
        case FILE_EXCLUSIVE_CPULIST:
                retval = update_exclusive_cpumask(cs, trialcs, buf);
                break;
        case FILE_MEMLIST:
                retval = update_nodemask(cs, trialcs, buf);
                break;
        default:
                retval = -EINVAL;
                break;
        }

        free_cpuset(trialcs);
out_unlock:
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
        kernfs_unbreak_active_protection(of->kn);
        css_put(&cs->css);
        flush_workqueue(cpuset_migrate_mm_wq);
        return retval ?: nbytes;
}

/*
 * These ascii lists should be read in a single call, by using a user
 * buffer large enough to hold the entire map.  If read in smaller
 * chunks, there is no guarantee of atomicity.  Since the display format
 * used, list of ranges of sequential numbers, is variable length,
 * and since these maps can change value dynamically, one could read
 * gibberish by doing partial reads while a list was changing.
 */
static int cpuset_common_seq_show(struct seq_file *sf, void *v)
{
        struct cpuset *cs = css_cs(seq_css(sf));
        cpuset_filetype_t type = seq_cft(sf)->private;
        int ret = 0;

        spin_lock_irq(&callback_lock);

        switch (type) {
        case FILE_CPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
                break;
        case FILE_MEMLIST:
                seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
                break;
        case FILE_EFFECTIVE_CPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
                break;
        case FILE_EFFECTIVE_MEMLIST:
                seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
                break;
        case FILE_EXCLUSIVE_CPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->exclusive_cpus));
                break;
        case FILE_EFFECTIVE_XCPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_xcpus));
                break;
        case FILE_SUBPARTS_CPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus));
                break;
        case FILE_ISOLATED_CPULIST:
                seq_printf(sf, "%*pbl\n", cpumask_pr_args(isolated_cpus));
                break;
        default:
                ret = -EINVAL;
        }

        spin_unlock_irq(&callback_lock);
        return ret;
}

static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
{
        struct cpuset *cs = css_cs(css);
        cpuset_filetype_t type = cft->private;
        switch (type) {
        case FILE_CPU_EXCLUSIVE:
                return is_cpu_exclusive(cs);
        case FILE_MEM_EXCLUSIVE:
                return is_mem_exclusive(cs);
        case FILE_MEM_HARDWALL:
                return is_mem_hardwall(cs);
        case FILE_SCHED_LOAD_BALANCE:
                return is_sched_load_balance(cs);
        case FILE_MEMORY_MIGRATE:
                return is_memory_migrate(cs);
        case FILE_MEMORY_PRESSURE_ENABLED:
                return cpuset_memory_pressure_enabled;
        case FILE_MEMORY_PRESSURE:
                return fmeter_getrate(&cs->fmeter);
        case FILE_SPREAD_PAGE:
                return is_spread_page(cs);
        case FILE_SPREAD_SLAB:
                return is_spread_slab(cs);
        default:
                BUG();
        }

        /* Unreachable but makes gcc happy */
        return 0;
}

static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
{
        struct cpuset *cs = css_cs(css);
        cpuset_filetype_t type = cft->private;
        switch (type) {
        case FILE_SCHED_RELAX_DOMAIN_LEVEL:
                return cs->relax_domain_level;
        default:
                BUG();
        }

        /* Unreachable but makes gcc happy */
        return 0;
}

static int sched_partition_show(struct seq_file *seq, void *v)
{
        struct cpuset *cs = css_cs(seq_css(seq));
        const char *err, *type = NULL;

        switch (cs->partition_root_state) {
        case PRS_ROOT:
                seq_puts(seq, "root\n");
                break;
        case PRS_ISOLATED:
                seq_puts(seq, "isolated\n");
                break;
        case PRS_MEMBER:
                seq_puts(seq, "member\n");
                break;
        case PRS_INVALID_ROOT:
                type = "root";
                fallthrough;
        case PRS_INVALID_ISOLATED:
                if (!type)
                        type = "isolated";
                err = perr_strings[READ_ONCE(cs->prs_err)];
                if (err)
                        seq_printf(seq, "%s invalid (%s)\n", type, err);
                else
                        seq_printf(seq, "%s invalid\n", type);
                break;
        }
        return 0;
}

static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
                                     size_t nbytes, loff_t off)
{
        struct cpuset *cs = css_cs(of_css(of));
        int val;
        int retval = -ENODEV;

        buf = strstrip(buf);

        /*
         * Convert "root" to ENABLED, and convert "member" to DISABLED.
         */
        if (!strcmp(buf, "root"))
                val = PRS_ROOT;
        else if (!strcmp(buf, "member"))
                val = PRS_MEMBER;
        else if (!strcmp(buf, "isolated"))
                val = PRS_ISOLATED;
        else
                return -EINVAL;

        css_get(&cs->css);
        cpus_read_lock();
        mutex_lock(&cpuset_mutex);
        if (!is_cpuset_online(cs))
                goto out_unlock;

        retval = update_prstate(cs, val);
out_unlock:
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
        css_put(&cs->css);
        return retval ?: nbytes;
}

/*
 * for the common functions, 'private' gives the type of file
 */

static struct cftype legacy_files[] = {
        {
                .name = "cpus",
                .seq_show = cpuset_common_seq_show,
                .write = cpuset_write_resmask,
                .max_write_len = (100U + 6 * NR_CPUS),
                .private = FILE_CPULIST,
        },

        {
                .name = "mems",
                .seq_show = cpuset_common_seq_show,
                .write = cpuset_write_resmask,
                .max_write_len = (100U + 6 * MAX_NUMNODES),
                .private = FILE_MEMLIST,
        },

        {
                .name = "effective_cpus",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_EFFECTIVE_CPULIST,
        },

        {
                .name = "effective_mems",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_EFFECTIVE_MEMLIST,
        },

        {
                .name = "cpu_exclusive",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_CPU_EXCLUSIVE,
        },

        {
                .name = "mem_exclusive",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_MEM_EXCLUSIVE,
        },

        {
                .name = "mem_hardwall",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_MEM_HARDWALL,
        },

        {
                .name = "sched_load_balance",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_SCHED_LOAD_BALANCE,
        },

        {
                .name = "sched_relax_domain_level",
                .read_s64 = cpuset_read_s64,
                .write_s64 = cpuset_write_s64,
                .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
        },

        {
                .name = "memory_migrate",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_MEMORY_MIGRATE,
        },

        {
                .name = "memory_pressure",
                .read_u64 = cpuset_read_u64,
                .private = FILE_MEMORY_PRESSURE,
        },

        {
                .name = "memory_spread_page",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_SPREAD_PAGE,
        },

        {
                /* obsolete, may be removed in the future */
                .name = "memory_spread_slab",
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_SPREAD_SLAB,
        },

        {
                .name = "memory_pressure_enabled",
                .flags = CFTYPE_ONLY_ON_ROOT,
                .read_u64 = cpuset_read_u64,
                .write_u64 = cpuset_write_u64,
                .private = FILE_MEMORY_PRESSURE_ENABLED,
        },

        { }        /* terminate */
};

/*
 * This is currently a minimal set for the default hierarchy. It can be
 * expanded later on by migrating more features and control files from v1.
 */
static struct cftype dfl_files[] = {
        {
                .name = "cpus",
                .seq_show = cpuset_common_seq_show,
                .write = cpuset_write_resmask,
                .max_write_len = (100U + 6 * NR_CPUS),
                .private = FILE_CPULIST,
                .flags = CFTYPE_NOT_ON_ROOT,
        },

        {
                .name = "mems",
                .seq_show = cpuset_common_seq_show,
                .write = cpuset_write_resmask,
                .max_write_len = (100U + 6 * MAX_NUMNODES),
                .private = FILE_MEMLIST,
                .flags = CFTYPE_NOT_ON_ROOT,
        },

        {
                .name = "cpus.effective",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_EFFECTIVE_CPULIST,
        },

        {
                .name = "mems.effective",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_EFFECTIVE_MEMLIST,
        },

        {
                .name = "cpus.partition",
                .seq_show = sched_partition_show,
                .write = sched_partition_write,
                .private = FILE_PARTITION_ROOT,
                .flags = CFTYPE_NOT_ON_ROOT,
                .file_offset = offsetof(struct cpuset, partition_file),
        },

        {
                .name = "cpus.exclusive",
                .seq_show = cpuset_common_seq_show,
                .write = cpuset_write_resmask,
                .max_write_len = (100U + 6 * NR_CPUS),
                .private = FILE_EXCLUSIVE_CPULIST,
                .flags = CFTYPE_NOT_ON_ROOT,
        },

        {
                .name = "cpus.exclusive.effective",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_EFFECTIVE_XCPULIST,
                .flags = CFTYPE_NOT_ON_ROOT,
        },

        {
                .name = "cpus.subpartitions",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_SUBPARTS_CPULIST,
                .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
        },

        {
                .name = "cpus.isolated",
                .seq_show = cpuset_common_seq_show,
                .private = FILE_ISOLATED_CPULIST,
                .flags = CFTYPE_ONLY_ON_ROOT,
        },

        { }        /* terminate */
};


/**
 * cpuset_css_alloc - Allocate a cpuset css
 * @parent_css: Parent css of the control group that the new cpuset will be
 *              part of
 * Return: cpuset css on success, -ENOMEM on failure.
 *
 * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
 * top cpuset css otherwise.
 */
static struct cgroup_subsys_state *
cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
{
        struct cpuset *cs;

        if (!parent_css)
                return &top_cpuset.css;

        cs = kzalloc(sizeof(*cs), GFP_KERNEL);
        if (!cs)
                return ERR_PTR(-ENOMEM);

        if (alloc_cpumasks(cs, NULL)) {
                kfree(cs);
                return ERR_PTR(-ENOMEM);
        }

        __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        nodes_clear(cs->mems_allowed);
        nodes_clear(cs->effective_mems);
        fmeter_init(&cs->fmeter);
        cs->relax_domain_level = -1;
        INIT_LIST_HEAD(&cs->remote_sibling);

        /* Set CS_MEMORY_MIGRATE for default hierarchy */
        if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
                __set_bit(CS_MEMORY_MIGRATE, &cs->flags);

        return &cs->css;
}

static int cpuset_css_online(struct cgroup_subsys_state *css)
{
        struct cpuset *cs = css_cs(css);
        struct cpuset *parent = parent_cs(cs);
        struct cpuset *tmp_cs;
        struct cgroup_subsys_state *pos_css;

        if (!parent)
                return 0;

        cpus_read_lock();
        mutex_lock(&cpuset_mutex);

        set_bit(CS_ONLINE, &cs->flags);
        if (is_spread_page(parent))
                set_bit(CS_SPREAD_PAGE, &cs->flags);
        if (is_spread_slab(parent))
                set_bit(CS_SPREAD_SLAB, &cs->flags);

        cpuset_inc();

        spin_lock_irq(&callback_lock);
        if (is_in_v2_mode()) {
                cpumask_copy(cs->effective_cpus, parent->effective_cpus);
                cs->effective_mems = parent->effective_mems;
                cs->use_parent_ecpus = true;
                parent->child_ecpus_count++;
                /*
                 * Clear CS_SCHED_LOAD_BALANCE if parent is isolated
                 */
                if (!is_sched_load_balance(parent))
                        clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        }

        /*
         * For v2, clear CS_SCHED_LOAD_BALANCE if parent is isolated
         */
        if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
            !is_sched_load_balance(parent))
                clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);

        spin_unlock_irq(&callback_lock);

        if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
                goto out_unlock;

        /*
         * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
         * set.  This flag handling is implemented in cgroup core for
         * historical reasons - the flag may be specified during mount.
         *
         * Currently, if any sibling cpusets have exclusive cpus or mem, we
         * refuse to clone the configuration - thereby refusing the task to
         * be entered, and as a result refusing the sys_unshare() or
         * clone() which initiated it.  If this becomes a problem for some
         * users who wish to allow that scenario, then this could be
         * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
         * (and likewise for mems) to the new cgroup.
         */
        rcu_read_lock();
        cpuset_for_each_child(tmp_cs, pos_css, parent) {
                if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
                        rcu_read_unlock();
                        goto out_unlock;
                }
        }
        rcu_read_unlock();

        spin_lock_irq(&callback_lock);
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
        spin_unlock_irq(&callback_lock);
out_unlock:
        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
        return 0;
}

/*
 * If the cpuset being removed has its flag 'sched_load_balance'
 * enabled, then simulate turning sched_load_balance off, which
 * will call rebuild_sched_domains_locked(). That is not needed
 * in the default hierarchy where only changes in partition
 * will cause repartitioning.
 *
 * If the cpuset has the 'sched.partition' flag enabled, simulate
 * turning 'sched.partition" off.
 */

static void cpuset_css_offline(struct cgroup_subsys_state *css)
{
        struct cpuset *cs = css_cs(css);

        cpus_read_lock();
        mutex_lock(&cpuset_mutex);

        if (is_partition_valid(cs))
                update_prstate(cs, 0);

        if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
            is_sched_load_balance(cs))
                update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);

        if (cs->use_parent_ecpus) {
                struct cpuset *parent = parent_cs(cs);

                cs->use_parent_ecpus = false;
                parent->child_ecpus_count--;
        }

        cpuset_dec();
        clear_bit(CS_ONLINE, &cs->flags);

        mutex_unlock(&cpuset_mutex);
        cpus_read_unlock();
}

static void cpuset_css_free(struct cgroup_subsys_state *css)
{
        struct cpuset *cs = css_cs(css);

        free_cpuset(cs);
}

static void cpuset_bind(struct cgroup_subsys_state *root_css)
{
        mutex_lock(&cpuset_mutex);
        spin_lock_irq(&callback_lock);

        if (is_in_v2_mode()) {
                cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
                cpumask_copy(top_cpuset.effective_xcpus, cpu_possible_mask);
                top_cpuset.mems_allowed = node_possible_map;
        } else {
                cpumask_copy(top_cpuset.cpus_allowed,
                             top_cpuset.effective_cpus);
                top_cpuset.mems_allowed = top_cpuset.effective_mems;
        }

        spin_unlock_irq(&callback_lock);
        mutex_unlock(&cpuset_mutex);
}

/*
 * In case the child is cloned into a cpuset different from its parent,
 * additional checks are done to see if the move is allowed.
 */
static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
{
        struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
        bool same_cs;
        int ret;

        rcu_read_lock();
        same_cs = (cs == task_cs(current));
        rcu_read_unlock();

        if (same_cs)
                return 0;

        lockdep_assert_held(&cgroup_mutex);
        mutex_lock(&cpuset_mutex);

        /* Check to see if task is allowed in the cpuset */
        ret = cpuset_can_attach_check(cs);
        if (ret)
                goto out_unlock;

        ret = task_can_attach(task);
        if (ret)
                goto out_unlock;

        ret = security_task_setscheduler(task);
        if (ret)
                goto out_unlock;

        /*
         * Mark attach is in progress.  This makes validate_change() fail
         * changes which zero cpus/mems_allowed.
         */
        cs->attach_in_progress++;
out_unlock:
        mutex_unlock(&cpuset_mutex);
        return ret;
}

static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
{
        struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
        bool same_cs;

        rcu_read_lock();
        same_cs = (cs == task_cs(current));
        rcu_read_unlock();

        if (same_cs)
                return;

        mutex_lock(&cpuset_mutex);
        cs->attach_in_progress--;
        if (!cs->attach_in_progress)
                wake_up(&cpuset_attach_wq);
        mutex_unlock(&cpuset_mutex);
}

/*
 * Make sure the new task conform to the current state of its parent,
 * which could have been changed by cpuset just after it inherits the
 * state from the parent and before it sits on the cgroup's task list.
 */
static void cpuset_fork(struct task_struct *task)
{
        struct cpuset *cs;
        bool same_cs;

        rcu_read_lock();
        cs = task_cs(task);
        same_cs = (cs == task_cs(current));
        rcu_read_unlock();

        if (same_cs) {
                if (cs == &top_cpuset)
                        return;

                set_cpus_allowed_ptr(task, current->cpus_ptr);
                task->mems_allowed = current->mems_allowed;
                return;
        }

        /* CLONE_INTO_CGROUP */
        mutex_lock(&cpuset_mutex);
        guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
        cpuset_attach_task(cs, task);

        cs->attach_in_progress--;
        if (!cs->attach_in_progress)
                wake_up(&cpuset_attach_wq);

        mutex_unlock(&cpuset_mutex);
}

struct cgroup_subsys cpuset_cgrp_subsys = {
        .css_alloc        = cpuset_css_alloc,
        .css_online        = cpuset_css_online,
        .css_offline        = cpuset_css_offline,
        .css_free        = cpuset_css_free,
        .can_attach        = cpuset_can_attach,
        .cancel_attach        = cpuset_cancel_attach,
        .attach                = cpuset_attach,
        .post_attach        = cpuset_post_attach,
        .bind                = cpuset_bind,
        .can_fork        = cpuset_can_fork,
        .cancel_fork        = cpuset_cancel_fork,
        .fork                = cpuset_fork,
        .legacy_cftypes        = legacy_files,
        .dfl_cftypes        = dfl_files,
        .early_init        = true,
        .threaded        = true,
};

/**
 * cpuset_init - initialize cpusets at system boot
 *
 * Description: Initialize top_cpuset
 **/

int __init cpuset_init(void)
{
        BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
        BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
        BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL));
        BUG_ON(!alloc_cpumask_var(&top_cpuset.exclusive_cpus, GFP_KERNEL));
        BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
        BUG_ON(!zalloc_cpumask_var(&isolated_cpus, GFP_KERNEL));

        cpumask_setall(top_cpuset.cpus_allowed);
        nodes_setall(top_cpuset.mems_allowed);
        cpumask_setall(top_cpuset.effective_cpus);
        cpumask_setall(top_cpuset.effective_xcpus);
        cpumask_setall(top_cpuset.exclusive_cpus);
        nodes_setall(top_cpuset.effective_mems);

        fmeter_init(&top_cpuset.fmeter);
        set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
        top_cpuset.relax_domain_level = -1;
        INIT_LIST_HEAD(&remote_children);

        BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));

        return 0;
}

/*
 * If CPU and/or memory hotplug handlers, below, unplug any CPUs
 * or memory nodes, we need to walk over the cpuset hierarchy,
 * removing that CPU or node from all cpusets.  If this removes the
 * last CPU or node from a cpuset, then move the tasks in the empty
 * cpuset to its next-highest non-empty parent.
 */
static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
{
        struct cpuset *parent;

        /*
         * Find its next-highest non-empty parent, (top cpuset
         * has online cpus, so can't be empty).
         */
        parent = parent_cs(cs);
        while (cpumask_empty(parent->cpus_allowed) ||
                        nodes_empty(parent->mems_allowed))
                parent = parent_cs(parent);

        if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
                pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
                pr_cont_cgroup_name(cs->css.cgroup);
                pr_cont("\n");
        }
}

static void
hotplug_update_tasks_legacy(struct cpuset *cs,
                            struct cpumask *new_cpus, nodemask_t *new_mems,
                            bool cpus_updated, bool mems_updated)
{
        bool is_empty;

        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, new_cpus);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->mems_allowed = *new_mems;
        cs->effective_mems = *new_mems;
        spin_unlock_irq(&callback_lock);

        /*
         * Don't call update_tasks_cpumask() if the cpuset becomes empty,
         * as the tasks will be migrated to an ancestor.
         */
        if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
                update_tasks_cpumask(cs, new_cpus);
        if (mems_updated && !nodes_empty(cs->mems_allowed))
                update_tasks_nodemask(cs);

        is_empty = cpumask_empty(cs->cpus_allowed) ||
                   nodes_empty(cs->mems_allowed);

        /*
         * Move tasks to the nearest ancestor with execution resources,
         * This is full cgroup operation which will also call back into
         * cpuset. Should be done outside any lock.
         */
        if (is_empty) {
                mutex_unlock(&cpuset_mutex);
                remove_tasks_in_empty_cpuset(cs);
                mutex_lock(&cpuset_mutex);
        }
}

static void
hotplug_update_tasks(struct cpuset *cs,
                     struct cpumask *new_cpus, nodemask_t *new_mems,
                     bool cpus_updated, bool mems_updated)
{
        /* A partition root is allowed to have empty effective cpus */
        if (cpumask_empty(new_cpus) && !is_partition_valid(cs))
                cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
        if (nodes_empty(*new_mems))
                *new_mems = parent_cs(cs)->effective_mems;

        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->effective_mems = *new_mems;
        spin_unlock_irq(&callback_lock);

        if (cpus_updated)
                update_tasks_cpumask(cs, new_cpus);
        if (mems_updated)
                update_tasks_nodemask(cs);
}

static bool force_rebuild;

void cpuset_force_rebuild(void)
{
        force_rebuild = true;
}

/*
 * Attempt to acquire a cpus_read_lock while a hotplug operation may be in
 * progress.
 * Return: true if successful, false otherwise
 *
 * To avoid circular lock dependency between cpuset_mutex and cpus_read_lock,
 * cpus_read_trylock() is used here to acquire the lock.
 */
static bool cpuset_hotplug_cpus_read_trylock(void)
{
        int retries = 0;

        while (!cpus_read_trylock()) {
                /*
                 * CPU hotplug still in progress. Retry 5 times
                 * with a 10ms wait before bailing out.
                 */
                if (++retries > 5)
                        return false;
                msleep(10);
        }
        return true;
}

/**
 * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
 * @cs: cpuset in interest
 * @tmp: the tmpmasks structure pointer
 *
 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
 * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
 * all its tasks are moved to the nearest ancestor with both resources.
 */
static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
{
        static cpumask_t new_cpus;
        static nodemask_t new_mems;
        bool cpus_updated;
        bool mems_updated;
        bool remote;
        int partcmd = -1;
        struct cpuset *parent;
retry:
        wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);

        mutex_lock(&cpuset_mutex);

        /*
         * We have raced with task attaching. We wait until attaching
         * is finished, so we won't attach a task to an empty cpuset.
         */
        if (cs->attach_in_progress) {
                mutex_unlock(&cpuset_mutex);
                goto retry;
        }

        parent = parent_cs(cs);
        compute_effective_cpumask(&new_cpus, cs, parent);
        nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);

        if (!tmp || !cs->partition_root_state)
                goto update_tasks;

        /*
         * Compute effective_cpus for valid partition root, may invalidate
         * child partition roots if necessary.
         */
        remote = is_remote_partition(cs);
        if (remote || (is_partition_valid(cs) && is_partition_valid(parent)))
                compute_partition_effective_cpumask(cs, &new_cpus);

        if (remote && cpumask_empty(&new_cpus) &&
            partition_is_populated(cs, NULL) &&
            cpuset_hotplug_cpus_read_trylock()) {
                remote_partition_disable(cs, tmp);
                compute_effective_cpumask(&new_cpus, cs, parent);
                remote = false;
                cpuset_force_rebuild();
                cpus_read_unlock();
        }

        /*
         * Force the partition to become invalid if either one of
         * the following conditions hold:
         * 1) empty effective cpus but not valid empty partition.
         * 2) parent is invalid or doesn't grant any cpus to child
         *    partitions.
         */
        if (is_local_partition(cs) && (!is_partition_valid(parent) ||
                                tasks_nocpu_error(parent, cs, &new_cpus)))
                partcmd = partcmd_invalidate;
        /*
         * On the other hand, an invalid partition root may be transitioned
         * back to a regular one.
         */
        else if (is_partition_valid(parent) && is_partition_invalid(cs))
                partcmd = partcmd_update;

        /*
         * cpus_read_lock needs to be held before calling
         * update_parent_effective_cpumask(). To avoid circular lock
         * dependency between cpuset_mutex and cpus_read_lock,
         * cpus_read_trylock() is used here to acquire the lock.
         */
        if (partcmd >= 0) {
                if (!cpuset_hotplug_cpus_read_trylock())
                        goto update_tasks;

                update_parent_effective_cpumask(cs, partcmd, NULL, tmp);
                cpus_read_unlock();
                if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) {
                        compute_partition_effective_cpumask(cs, &new_cpus);
                        cpuset_force_rebuild();
                }
        }

update_tasks:
        cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
        mems_updated = !nodes_equal(new_mems, cs->effective_mems);
        if (!cpus_updated && !mems_updated)
                goto unlock;        /* Hotplug doesn't affect this cpuset */

        if (mems_updated)
                check_insane_mems_config(&new_mems);

        if (is_in_v2_mode())
                hotplug_update_tasks(cs, &new_cpus, &new_mems,
                                     cpus_updated, mems_updated);
        else
                hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
                                            cpus_updated, mems_updated);

unlock:
        mutex_unlock(&cpuset_mutex);
}

/**
 * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
 * @work: unused
 *
 * This function is called after either CPU or memory configuration has
 * changed and updates cpuset accordingly.  The top_cpuset is always
 * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
 * order to make cpusets transparent (of no affect) on systems that are
 * actively using CPU hotplug but making no active use of cpusets.
 *
 * Non-root cpusets are only affected by offlining.  If any CPUs or memory
 * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
 * all descendants.
 *
 * Note that CPU offlining during suspend is ignored.  We don't modify
 * cpusets across suspend/resume cycles at all.
 */
static void cpuset_hotplug_workfn(struct work_struct *work)
{
        static cpumask_t new_cpus;
        static nodemask_t new_mems;
        bool cpus_updated, mems_updated;
        bool on_dfl = is_in_v2_mode();
        struct tmpmasks tmp, *ptmp = NULL;

        if (on_dfl && !alloc_cpumasks(NULL, &tmp))
                ptmp = &tmp;

        mutex_lock(&cpuset_mutex);

        /* fetch the available cpus/mems and find out which changed how */
        cpumask_copy(&new_cpus, cpu_active_mask);
        new_mems = node_states[N_MEMORY];

        /*
         * If subpartitions_cpus is populated, it is likely that the check
         * below will produce a false positive on cpus_updated when the cpu
         * list isn't changed. It is extra work, but it is better to be safe.
         */
        cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus) ||
                       !cpumask_empty(subpartitions_cpus);
        mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);

        /*
         * In the rare case that hotplug removes all the cpus in
         * subpartitions_cpus, we assumed that cpus are updated.
         */
        if (!cpus_updated && top_cpuset.nr_subparts)
                cpus_updated = true;

        /* For v1, synchronize cpus_allowed to cpu_active_mask */
        if (cpus_updated) {
                spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
                /*
                 * Make sure that CPUs allocated to child partitions
                 * do not show up in effective_cpus. If no CPU is left,
                 * we clear the subpartitions_cpus & let the child partitions
                 * fight for the CPUs again.
                 */
                if (!cpumask_empty(subpartitions_cpus)) {
                        if (cpumask_subset(&new_cpus, subpartitions_cpus)) {
                                top_cpuset.nr_subparts = 0;
                                cpumask_clear(subpartitions_cpus);
                        } else {
                                cpumask_andnot(&new_cpus, &new_cpus,
                                               subpartitions_cpus);
                        }
                }
                cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
                spin_unlock_irq(&callback_lock);
                /* we don't mess with cpumasks of tasks in top_cpuset */
        }

        /* synchronize mems_allowed to N_MEMORY */
        if (mems_updated) {
                spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        top_cpuset.mems_allowed = new_mems;
                top_cpuset.effective_mems = new_mems;
                spin_unlock_irq(&callback_lock);
                update_tasks_nodemask(&top_cpuset);
        }

        mutex_unlock(&cpuset_mutex);

        /* if cpus or mems changed, we need to propagate to descendants */
        if (cpus_updated || mems_updated) {
                struct cpuset *cs;
                struct cgroup_subsys_state *pos_css;

                rcu_read_lock();
                cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
                        if (cs == &top_cpuset || !css_tryget_online(&cs->css))
                                continue;
                        rcu_read_unlock();

                        cpuset_hotplug_update_tasks(cs, ptmp);

                        rcu_read_lock();
                        css_put(&cs->css);
                }
                rcu_read_unlock();
        }

        /* rebuild sched domains if cpus_allowed has changed */
        if (cpus_updated || force_rebuild) {
                force_rebuild = false;
                rebuild_sched_domains();
        }

        free_cpumasks(NULL, ptmp);
}

void cpuset_update_active_cpus(void)
{
        /*
         * We're inside cpu hotplug critical region which usually nests
         * inside cgroup synchronization.  Bounce actual hotplug processing
         * to a work item to avoid reverse locking order.
         */
        schedule_work(&cpuset_hotplug_work);
}

void cpuset_wait_for_hotplug(void)
{
        flush_work(&cpuset_hotplug_work);
}

/*
 * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
 * Call this routine anytime after node_states[N_MEMORY] changes.
 * See cpuset_update_active_cpus() for CPU hotplug handling.
 */
static int cpuset_track_online_nodes(struct notifier_block *self,
                                unsigned long action, void *arg)
{
        schedule_work(&cpuset_hotplug_work);
        return NOTIFY_OK;
}

/**
 * cpuset_init_smp - initialize cpus_allowed
 *
 * Description: Finish top cpuset after cpu, node maps are initialized
 */
void __init cpuset_init_smp(void)
{
        /*
         * cpus_allowd/mems_allowed set to v2 values in the initial
         * cpuset_bind() call will be reset to v1 values in another
         * cpuset_bind() call when v1 cpuset is mounted.
         */
        top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;

        cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
        top_cpuset.effective_mems = node_states[N_MEMORY];

        hotplug_memory_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI);

        cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
        BUG_ON(!cpuset_migrate_mm_wq);
}

/**
 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
 * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
 *
 * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
 * attached to the specified @tsk.  Guaranteed to return some non-empty
 * subset of cpu_online_mask, even if this means going outside the
 * tasks cpuset, except when the task is in the top cpuset.
 **/

void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
        unsigned long flags;
        struct cpuset *cs;

        spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();

        cs = task_cs(tsk);
        if (cs != &top_cpuset)
                guarantee_online_cpus(tsk, pmask);
        /*
         * Tasks in the top cpuset won't get update to their cpumasks
         * when a hotplug online/offline event happens. So we include all
         * offline cpus in the allowed cpu list.
         */
        if ((cs == &top_cpuset) || cpumask_empty(pmask)) {
                const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);

                /*
                 * We first exclude cpus allocated to partitions. If there is no
                 * allowable online cpu left, we fall back to all possible cpus.
                 */
                cpumask_andnot(pmask, possible_mask, subpartitions_cpus);
                if (!cpumask_intersects(pmask, cpu_online_mask))
                        cpumask_copy(pmask, possible_mask);
        }

        rcu_read_unlock();
        spin_unlock_irqrestore(&callback_lock, flags);
}

/**
 * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
 * @tsk: pointer to task_struct with which the scheduler is struggling
 *
 * Description: In the case that the scheduler cannot find an allowed cpu in
 * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
 * mode however, this value is the same as task_cs(tsk)->effective_cpus,
 * which will not contain a sane cpumask during cases such as cpu hotplugging.
 * This is the absolute last resort for the scheduler and it is only used if
 * _every_ other avenue has been traveled.
 *
 * Returns true if the affinity of @tsk was changed, false otherwise.
 **/

bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
        const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
        const struct cpumask *cs_mask;
        bool changed = false;

        rcu_read_lock();
        cs_mask = task_cs(tsk)->cpus_allowed;
        if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
                do_set_cpus_allowed(tsk, cs_mask);
                changed = true;
        }
        rcu_read_unlock();

        /*
         * We own tsk->cpus_allowed, nobody can change it under us.
         *
         * But we used cs && cs->cpus_allowed lockless and thus can
         * race with cgroup_attach_task() or update_cpumask() and get
         * the wrong tsk->cpus_allowed. However, both cases imply the
         * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
         * which takes task_rq_lock().
         *
         * If we are called after it dropped the lock we must see all
         * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
         * set any mask even if it is not right from task_cs() pov,
         * the pending set_cpus_allowed_ptr() will fix things.
         *
         * select_fallback_rq() will fix things ups and set cpu_possible_mask
         * if required.
         */
        return changed;
}

void __init cpuset_init_current_mems_allowed(void)
{
        nodes_setall(current->mems_allowed);
}

/**
 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
 *
 * Description: Returns the nodemask_t mems_allowed of the cpuset
 * attached to the specified @tsk.  Guaranteed to return some non-empty
 * subset of node_states[N_MEMORY], even if this means going outside the
 * tasks cpuset.
 **/

nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
{
        nodemask_t mask;
        unsigned long flags;

        spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();
        guarantee_online_mems(task_cs(tsk), &mask);
        rcu_read_unlock();
        spin_unlock_irqrestore(&callback_lock, flags);

        return mask;
}

/**
 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
 * @nodemask: the nodemask to be checked
 *
 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
 */
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
{
        return nodes_intersects(*nodemask, current->mems_allowed);
}

/*
 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
 * mem_hardwall ancestor to the specified cpuset.  Call holding
 * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
 * (an unusual configuration), then returns the root cpuset.
 */
static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
{
        while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
                cs = parent_cs(cs);
        return cs;
}

/*
 * cpuset_node_allowed - Can we allocate on a memory node?
 * @node: is this an allowed node?
 * @gfp_mask: memory allocation flags
 *
 * If we're in interrupt, yes, we can always allocate.  If @node is set in
 * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
 * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
 * yes.  If current has access to memory reserves as an oom victim, yes.
 * Otherwise, no.
 *
 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
 * and do not allow allocations outside the current tasks cpuset
 * unless the task has been OOM killed.
 * GFP_KERNEL allocations are not so marked, so can escape to the
 * nearest enclosing hardwalled ancestor cpuset.
 *
 * Scanning up parent cpusets requires callback_lock.  The
 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
 * current tasks mems_allowed came up empty on the first pass over
 * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
 * cpuset are short of memory, might require taking the callback_lock.
 *
 * The first call here from mm/page_alloc:get_page_from_freelist()
 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
 * so no allocation on a node outside the cpuset is allowed (unless
 * in interrupt, of course).
 *
 * The second pass through get_page_from_freelist() doesn't even call
 * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
 * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
 * in alloc_flags.  That logic and the checks below have the combined
 * affect that:
 *        in_interrupt - any node ok (current task context irrelevant)
 *        GFP_ATOMIC   - any node ok
 *        tsk_is_oom_victim   - any node ok
 *        GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
 *        GFP_USER     - only nodes in current tasks mems allowed ok.
 */
bool cpuset_node_allowed(int node, gfp_t gfp_mask)
{
        struct cpuset *cs;                /* current cpuset ancestors */
        bool allowed;                        /* is allocation in zone z allowed? */
        unsigned long flags;

        if (in_interrupt())
                return true;
        if (node_isset(node, current->mems_allowed))
                return true;
        /*
         * Allow tasks that have access to memory reserves because they have
         * been OOM killed to get memory anywhere.
         */
        if (unlikely(tsk_is_oom_victim(current)))
                return true;
        if (gfp_mask & __GFP_HARDWALL)        /* If hardwall request, stop here */
                return false;

        if (current->flags & PF_EXITING) /* Let dying task have memory */
                return true;

        /* Not hardwall and node outside mems_allowed: scan up cpusets */
        spin_lock_irqsave(&callback_lock, flags);

        rcu_read_lock();
        cs = nearest_hardwall_ancestor(task_cs(current));
        allowed = node_isset(node, cs->mems_allowed);
        rcu_read_unlock();

        spin_unlock_irqrestore(&callback_lock, flags);
        return allowed;
}

/**
 * cpuset_spread_node() - On which node to begin search for a page
 * @rotor: round robin rotor
 *
 * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
 * tasks in a cpuset with is_spread_page or is_spread_slab set),
 * and if the memory allocation used cpuset_mem_spread_node()
 * to determine on which node to start looking, as it will for
 * certain page cache or slab cache pages such as used for file
 * system buffers and inode caches, then instead of starting on the
 * local node to look for a free page, rather spread the starting
 * node around the tasks mems_allowed nodes.
 *
 * We don't have to worry about the returned node being offline
 * because "it can't happen", and even if it did, it would be ok.
 *
 * The routines calling guarantee_online_mems() are careful to
 * only set nodes in task->mems_allowed that are online.  So it
 * should not be possible for the following code to return an
 * offline node.  But if it did, that would be ok, as this routine
 * is not returning the node where the allocation must be, only
 * the node where the search should start.  The zonelist passed to
 * __alloc_pages() will include all nodes.  If the slab allocator
 * is passed an offline node, it will fall back to the local node.
 * See kmem_cache_alloc_node().
 */
static int cpuset_spread_node(int *rotor)
{
        return *rotor = next_node_in(*rotor, current->mems_allowed);
}

/**
 * cpuset_mem_spread_node() - On which node to begin search for a file page
 */
int cpuset_mem_spread_node(void)
{
        if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
                current->cpuset_mem_spread_rotor =
                        node_random(&current->mems_allowed);

        return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
}

/**
 * cpuset_slab_spread_node() - On which node to begin search for a slab page
 */
int cpuset_slab_spread_node(void)
{
        if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
                current->cpuset_slab_spread_rotor =
                        node_random(&current->mems_allowed);

        return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
}
EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);

/**
 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
 * @tsk1: pointer to task_struct of some task.
 * @tsk2: pointer to task_struct of some other task.
 *
 * Description: Return true if @tsk1's mems_allowed intersects the
 * mems_allowed of @tsk2.  Used by the OOM killer to determine if
 * one of the task's memory usage might impact the memory available
 * to the other.
 **/

int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
                                   const struct task_struct *tsk2)
{
        return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
}

/**
 * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
 *
 * Description: Prints current's name, cpuset name, and cached copy of its
 * mems_allowed to the kernel log.
 */
void cpuset_print_current_mems_allowed(void)
{
        struct cgroup *cgrp;

        rcu_read_lock();

        cgrp = task_cs(current)->css.cgroup;
        pr_cont(",cpuset=");
        pr_cont_cgroup_name(cgrp);
        pr_cont(",mems_allowed=%*pbl",
                nodemask_pr_args(&current->mems_allowed));

        rcu_read_unlock();
}

/*
 * Collection of memory_pressure is suppressed unless
 * this flag is enabled by writing "1" to the special
 * cpuset file 'memory_pressure_enabled' in the root cpuset.
 */

int cpuset_memory_pressure_enabled __read_mostly;

/*
 * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
 *
 * Keep a running average of the rate of synchronous (direct)
 * page reclaim efforts initiated by tasks in each cpuset.
 *
 * This represents the rate at which some task in the cpuset
 * ran low on memory on all nodes it was allowed to use, and
 * had to enter the kernels page reclaim code in an effort to
 * create more free memory by tossing clean pages or swapping
 * or writing dirty pages.
 *
 * Display to user space in the per-cpuset read-only file
 * "memory_pressure".  Value displayed is an integer
 * representing the recent rate of entry into the synchronous
 * (direct) page reclaim by any task attached to the cpuset.
 */

void __cpuset_memory_pressure_bump(void)
{
        rcu_read_lock();
        fmeter_markevent(&task_cs(current)->fmeter);
        rcu_read_unlock();
}

#ifdef CONFIG_PROC_PID_CPUSET
/*
 * proc_cpuset_show()
 *  - Print tasks cpuset path into seq_file.
 *  - Used for /proc/<pid>/cpuset.
 *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
 *    doesn't really matter if tsk->cpuset changes after we read it,
 *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
 *    anyway.
 */
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
{
        char *buf;
        struct cgroup_subsys_state *css;
        int retval;

        retval = -ENOMEM;
        buf = kmalloc(PATH_MAX, GFP_KERNEL);
        if (!buf)
                goto out;

        css = task_get_css(tsk, cpuset_cgrp_id);
        retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
                                current->nsproxy->cgroup_ns);
        css_put(css);
        if (retval == -E2BIG)
                retval = -ENAMETOOLONG;
        if (retval < 0)
                goto out_free;
        seq_puts(m, buf);
        seq_putc(m, '\n');
        retval = 0;
out_free:
        kfree(buf);
out:
        return retval;
}
#endif /* CONFIG_PROC_PID_CPUSET */

/* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
        seq_printf(m, "Mems_allowed:\t%*pb\n",
                   nodemask_pr_args(&task->mems_allowed));
        seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
                   nodemask_pr_args(&task->mems_allowed));
}



















































    1 
    1 


























































































































































































































































































































































    1 




    1 



    1 



































    1 

    1 
    1 

    1 































































































































































































































































































































































































































    2 


    2 



    2 

    2 


    1 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 







    1 





    1 


    1 

    1 








    1 












    1 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 


















    2 












    1 


    1 
    1 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
// SPDX-License-Identifier: GPL-2.0-or-later
// SPI init/core code
//
// Copyright (C) 2005 David Brownell
// Copyright (C) 2008 Secret Lab Technologies Ltd.

#include <linux/acpi.h>
#include <linux/cache.h>
#include <linux/clk/clk-conf.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/export.h>
#include <linux/gpio/consumer.h>
#include <linux/highmem.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/percpu.h>
#include <linux/platform_data/x86/apple.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/property.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/sched/rt.h>
#include <linux/slab.h>
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>
#include <uapi/linux/sched/types.h>

#define CREATE_TRACE_POINTS
#include <trace/events/spi.h>
EXPORT_TRACEPOINT_SYMBOL(spi_transfer_start);
EXPORT_TRACEPOINT_SYMBOL(spi_transfer_stop);

#include "internals.h"

static DEFINE_IDR(spi_master_idr);

static void spidev_release(struct device *dev)
{
        struct spi_device        *spi = to_spi_device(dev);

        spi_controller_put(spi->controller);
        kfree(spi->driver_override);
        free_percpu(spi->pcpu_statistics);
        kfree(spi);
}

static ssize_t
modalias_show(struct device *dev, struct device_attribute *a, char *buf)
{
        const struct spi_device        *spi = to_spi_device(dev);
        int len;

        len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1);
        if (len != -ENODEV)
                return len;

        return sysfs_emit(buf, "%s%s\n", SPI_MODULE_PREFIX, spi->modalias);
}
static DEVICE_ATTR_RO(modalias);

static ssize_t driver_override_store(struct device *dev,
                                     struct device_attribute *a,
                                     const char *buf, size_t count)
{
        struct spi_device *spi = to_spi_device(dev);
        int ret;

        ret = driver_set_override(dev, &spi->driver_override, buf, count);
        if (ret)
                return ret;

        return count;
}

static ssize_t driver_override_show(struct device *dev,
                                    struct device_attribute *a, char *buf)
{
        const struct spi_device *spi = to_spi_device(dev);
        ssize_t len;

        device_lock(dev);
        len = sysfs_emit(buf, "%s\n", spi->driver_override ? : "");
        device_unlock(dev);
        return len;
}
static DEVICE_ATTR_RW(driver_override);

static struct spi_statistics __percpu *spi_alloc_pcpu_stats(struct device *dev)
{
        struct spi_statistics __percpu *pcpu_stats;

        if (dev)
                pcpu_stats = devm_alloc_percpu(dev, struct spi_statistics);
        else
                pcpu_stats = alloc_percpu_gfp(struct spi_statistics, GFP_KERNEL);

        if (pcpu_stats) {
                int cpu;

                for_each_possible_cpu(cpu) {
                        struct spi_statistics *stat;

                        stat = per_cpu_ptr(pcpu_stats, cpu);
                        u64_stats_init(&stat->syncp);
                }
        }
        return pcpu_stats;
}

static ssize_t spi_emit_pcpu_stats(struct spi_statistics __percpu *stat,
                                   char *buf, size_t offset)
{
        u64 val = 0;
        int i;

        for_each_possible_cpu(i) {
                const struct spi_statistics *pcpu_stats;
                u64_stats_t *field;
                unsigned int start;
                u64 inc;

                pcpu_stats = per_cpu_ptr(stat, i);
                field = (void *)pcpu_stats + offset;
                do {
                        start = u64_stats_fetch_begin(&pcpu_stats->syncp);
                        inc = u64_stats_read(field);
                } while (u64_stats_fetch_retry(&pcpu_stats->syncp, start));
                val += inc;
        }
        return sysfs_emit(buf, "%llu\n", val);
}

#define SPI_STATISTICS_ATTRS(field, file)                                \
static ssize_t spi_controller_##field##_show(struct device *dev,        \
                                             struct device_attribute *attr, \
                                             char *buf)                        \
{                                                                        \
        struct spi_controller *ctlr = container_of(dev,                        \
                                         struct spi_controller, dev);        \
        return spi_statistics_##field##_show(ctlr->pcpu_statistics, buf); \
}                                                                        \
static struct device_attribute dev_attr_spi_controller_##field = {        \
        .attr = { .name = file, .mode = 0444 },                                \
        .show = spi_controller_##field##_show,                                \
};                                                                        \
static ssize_t spi_device_##field##_show(struct device *dev,                \
                                         struct device_attribute *attr,        \
                                        char *buf)                        \
{                                                                        \
        struct spi_device *spi = to_spi_device(dev);                        \
        return spi_statistics_##field##_show(spi->pcpu_statistics, buf); \
}                                                                        \
static struct device_attribute dev_attr_spi_device_##field = {                \
        .attr = { .name = file, .mode = 0444 },                                \
        .show = spi_device_##field##_show,                                \
}

#define SPI_STATISTICS_SHOW_NAME(name, file, field)                        \
static ssize_t spi_statistics_##name##_show(struct spi_statistics __percpu *stat, \
                                            char *buf)                        \
{                                                                        \
        return spi_emit_pcpu_stats(stat, buf,                                \
                        offsetof(struct spi_statistics, field));        \
}                                                                        \
SPI_STATISTICS_ATTRS(name, file)

#define SPI_STATISTICS_SHOW(field)                                        \
        SPI_STATISTICS_SHOW_NAME(field, __stringify(field),                \
                                 field)

SPI_STATISTICS_SHOW(messages);
SPI_STATISTICS_SHOW(transfers);
SPI_STATISTICS_SHOW(errors);
SPI_STATISTICS_SHOW(timedout);

SPI_STATISTICS_SHOW(spi_sync);
SPI_STATISTICS_SHOW(spi_sync_immediate);
SPI_STATISTICS_SHOW(spi_async);

SPI_STATISTICS_SHOW(bytes);
SPI_STATISTICS_SHOW(bytes_rx);
SPI_STATISTICS_SHOW(bytes_tx);

#define SPI_STATISTICS_TRANSFER_BYTES_HISTO(index, number)                \
        SPI_STATISTICS_SHOW_NAME(transfer_bytes_histo##index,                \
                                 "transfer_bytes_histo_" number,        \
                                 transfer_bytes_histo[index])
SPI_STATISTICS_TRANSFER_BYTES_HISTO(0,  "0-1");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(1,  "2-3");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(2,  "4-7");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(3,  "8-15");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(4,  "16-31");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(5,  "32-63");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(6,  "64-127");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(7,  "128-255");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(8,  "256-511");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(9,  "512-1023");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(10, "1024-2047");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(11, "2048-4095");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(12, "4096-8191");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(13, "8192-16383");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(14, "16384-32767");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+");

SPI_STATISTICS_SHOW(transfers_split_maxsize);

static struct attribute *spi_dev_attrs[] = {
        &dev_attr_modalias.attr,
        &dev_attr_driver_override.attr,
        NULL,
};

static const struct attribute_group spi_dev_group = {
        .attrs  = spi_dev_attrs,
};

static struct attribute *spi_device_statistics_attrs[] = {
        &dev_attr_spi_device_messages.attr,
        &dev_attr_spi_device_transfers.attr,
        &dev_attr_spi_device_errors.attr,
        &dev_attr_spi_device_timedout.attr,
        &dev_attr_spi_device_spi_sync.attr,
        &dev_attr_spi_device_spi_sync_immediate.attr,
        &dev_attr_spi_device_spi_async.attr,
        &dev_attr_spi_device_bytes.attr,
        &dev_attr_spi_device_bytes_rx.attr,
        &dev_attr_spi_device_bytes_tx.attr,
        &dev_attr_spi_device_transfer_bytes_histo0.attr,
        &dev_attr_spi_device_transfer_bytes_histo1.attr,
        &dev_attr_spi_device_transfer_bytes_histo2.attr,
        &dev_attr_spi_device_transfer_bytes_histo3.attr,
        &dev_attr_spi_device_transfer_bytes_histo4.attr,
        &dev_attr_spi_device_transfer_bytes_histo5.attr,
        &dev_attr_spi_device_transfer_bytes_histo6.attr,
        &dev_attr_spi_device_transfer_bytes_histo7.attr,
        &dev_attr_spi_device_transfer_bytes_histo8.attr,
        &dev_attr_spi_device_transfer_bytes_histo9.attr,
        &dev_attr_spi_device_transfer_bytes_histo10.attr,
        &dev_attr_spi_device_transfer_bytes_histo11.attr,
        &dev_attr_spi_device_transfer_bytes_histo12.attr,
        &dev_attr_spi_device_transfer_bytes_histo13.attr,
        &dev_attr_spi_device_transfer_bytes_histo14.attr,
        &dev_attr_spi_device_transfer_bytes_histo15.attr,
        &dev_attr_spi_device_transfer_bytes_histo16.attr,
        &dev_attr_spi_device_transfers_split_maxsize.attr,
        NULL,
};

static const struct attribute_group spi_device_statistics_group = {
        .name  = "statistics",
        .attrs  = spi_device_statistics_attrs,
};

static const struct attribute_group *spi_dev_groups[] = {
        &spi_dev_group,
        &spi_device_statistics_group,
        NULL,
};

static struct attribute *spi_controller_statistics_attrs[] = {
        &dev_attr_spi_controller_messages.attr,
        &dev_attr_spi_controller_transfers.attr,
        &dev_attr_spi_controller_errors.attr,
        &dev_attr_spi_controller_timedout.attr,
        &dev_attr_spi_controller_spi_sync.attr,
        &dev_attr_spi_controller_spi_sync_immediate.attr,
        &dev_attr_spi_controller_spi_async.attr,
        &dev_attr_spi_controller_bytes.attr,
        &dev_attr_spi_controller_bytes_rx.attr,
        &dev_attr_spi_controller_bytes_tx.attr,
        &dev_attr_spi_controller_transfer_bytes_histo0.attr,
        &dev_attr_spi_controller_transfer_bytes_histo1.attr,
        &dev_attr_spi_controller_transfer_bytes_histo2.attr,
        &dev_attr_spi_controller_transfer_bytes_histo3.attr,
        &dev_attr_spi_controller_transfer_bytes_histo4.attr,
        &dev_attr_spi_controller_transfer_bytes_histo5.attr,
        &dev_attr_spi_controller_transfer_bytes_histo6.attr,
        &dev_attr_spi_controller_transfer_bytes_histo7.attr,
        &dev_attr_spi_controller_transfer_bytes_histo8.attr,
        &dev_attr_spi_controller_transfer_bytes_histo9.attr,
        &dev_attr_spi_controller_transfer_bytes_histo10.attr,
        &dev_attr_spi_controller_transfer_bytes_histo11.attr,
        &dev_attr_spi_controller_transfer_bytes_histo12.attr,
        &dev_attr_spi_controller_transfer_bytes_histo13.attr,
        &dev_attr_spi_controller_transfer_bytes_histo14.attr,
        &dev_attr_spi_controller_transfer_bytes_histo15.attr,
        &dev_attr_spi_controller_transfer_bytes_histo16.attr,
        &dev_attr_spi_controller_transfers_split_maxsize.attr,
        NULL,
};

static const struct attribute_group spi_controller_statistics_group = {
        .name  = "statistics",
        .attrs  = spi_controller_statistics_attrs,
};

static const struct attribute_group *spi_master_groups[] = {
        &spi_controller_statistics_group,
        NULL,
};

static void spi_statistics_add_transfer_stats(struct spi_statistics __percpu *pcpu_stats,
                                              struct spi_transfer *xfer,
                                              struct spi_controller *ctlr)
{
        int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1;
        struct spi_statistics *stats;

        if (l2len < 0)
                l2len = 0;

        get_cpu();
        stats = this_cpu_ptr(pcpu_stats);
        u64_stats_update_begin(&stats->syncp);

        u64_stats_inc(&stats->transfers);
        u64_stats_inc(&stats->transfer_bytes_histo[l2len]);

        u64_stats_add(&stats->bytes, xfer->len);
        if ((xfer->tx_buf) &&
            (xfer->tx_buf != ctlr->dummy_tx))
                u64_stats_add(&stats->bytes_tx, xfer->len);
        if ((xfer->rx_buf) &&
            (xfer->rx_buf != ctlr->dummy_rx))
                u64_stats_add(&stats->bytes_rx, xfer->len);

        u64_stats_update_end(&stats->syncp);
        put_cpu();
}

/*
 * modalias support makes "modprobe $MODALIAS" new-style hotplug work,
 * and the sysfs version makes coldplug work too.
 */
static const struct spi_device_id *spi_match_id(const struct spi_device_id *id, const char *name)
{
        while (id->name[0]) {
                if (!strcmp(name, id->name))
                        return id;
                id++;
        }
        return NULL;
}

const struct spi_device_id *spi_get_device_id(const struct spi_device *sdev)
{
        const struct spi_driver *sdrv = to_spi_driver(sdev->dev.driver);

        return spi_match_id(sdrv->id_table, sdev->modalias);
}
EXPORT_SYMBOL_GPL(spi_get_device_id);

const void *spi_get_device_match_data(const struct spi_device *sdev)
{
        const void *match;

        match = device_get_match_data(&sdev->dev);
        if (match)
                return match;

        return (const void *)spi_get_device_id(sdev)->driver_data;
}
EXPORT_SYMBOL_GPL(spi_get_device_match_data);

static int spi_match_device(struct device *dev, struct device_driver *drv)
{
        const struct spi_device        *spi = to_spi_device(dev);
        const struct spi_driver        *sdrv = to_spi_driver(drv);

        /* Check override first, and if set, only use the named driver */
        if (spi->driver_override)
                return strcmp(spi->driver_override, drv->name) == 0;

        /* Attempt an OF style match */
        if (of_driver_match_device(dev, drv))
                return 1;

        /* Then try ACPI */
        if (acpi_driver_match_device(dev, drv))
                return 1;

        if (sdrv->id_table)
                return !!spi_match_id(sdrv->id_table, spi->modalias);

        return strcmp(spi->modalias, drv->name) == 0;
}

static int spi_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct spi_device                *spi = to_spi_device(dev);
        int rc;

        rc = acpi_device_uevent_modalias(dev, env);
        if (rc != -ENODEV)
                return rc;

        return add_uevent_var(env, "MODALIAS=%s%s", SPI_MODULE_PREFIX, spi->modalias);
}

static int spi_probe(struct device *dev)
{
        const struct spi_driver                *sdrv = to_spi_driver(dev->driver);
        struct spi_device                *spi = to_spi_device(dev);
        int ret;

        ret = of_clk_set_defaults(dev->of_node, false);
        if (ret)
                return ret;

        if (dev->of_node) {
                spi->irq = of_irq_get(dev->of_node, 0);
                if (spi->irq == -EPROBE_DEFER)
                        return -EPROBE_DEFER;
                if (spi->irq < 0)
                        spi->irq = 0;
        }

        ret = dev_pm_domain_attach(dev, true);
        if (ret)
                return ret;

        if (sdrv->probe) {
                ret = sdrv->probe(spi);
                if (ret)
                        dev_pm_domain_detach(dev, true);
        }

        return ret;
}

static void spi_remove(struct device *dev)
{
        const struct spi_driver                *sdrv = to_spi_driver(dev->driver);

        if (sdrv->remove)
                sdrv->remove(to_spi_device(dev));

        dev_pm_domain_detach(dev, true);
}

static void spi_shutdown(struct device *dev)
{
        if (dev->driver) {
                const struct spi_driver        *sdrv = to_spi_driver(dev->driver);

                if (sdrv->shutdown)
                        sdrv->shutdown(to_spi_device(dev));
        }
}

const struct bus_type spi_bus_type = {
        .name                = "spi",
        .dev_groups        = spi_dev_groups,
        .match                = spi_match_device,
        .uevent                = spi_uevent,
        .probe                = spi_probe,
        .remove                = spi_remove,
        .shutdown        = spi_shutdown,
};
EXPORT_SYMBOL_GPL(spi_bus_type);

/**
 * __spi_register_driver - register a SPI driver
 * @owner: owner module of the driver to register
 * @sdrv: the driver to register
 * Context: can sleep
 *
 * Return: zero on success, else a negative error code.
 */
int __spi_register_driver(struct module *owner, struct spi_driver *sdrv)
{
        sdrv->driver.owner = owner;
        sdrv->driver.bus = &spi_bus_type;

        /*
         * For Really Good Reasons we use spi: modaliases not of:
         * modaliases for DT so module autoloading won't work if we
         * don't have a spi_device_id as well as a compatible string.
         */
        if (sdrv->driver.of_match_table) {
                const struct of_device_id *of_id;

                for (of_id = sdrv->driver.of_match_table; of_id->compatible[0];
                     of_id++) {
                        const char *of_name;

                        /* Strip off any vendor prefix */
                        of_name = strnchr(of_id->compatible,
                                          sizeof(of_id->compatible), ',');
                        if (of_name)
                                of_name++;
                        else
                                of_name = of_id->compatible;

                        if (sdrv->id_table) {
                                const struct spi_device_id *spi_id;

                                spi_id = spi_match_id(sdrv->id_table, of_name);
                                if (spi_id)
                                        continue;
                        } else {
                                if (strcmp(sdrv->driver.name, of_name) == 0)
                                        continue;
                        }

                        pr_warn("SPI driver %s has no spi_device_id for %s\n",
                                sdrv->driver.name, of_id->compatible);
                }
        }

        return driver_register(&sdrv->driver);
}
EXPORT_SYMBOL_GPL(__spi_register_driver);

/*-------------------------------------------------------------------------*/

/*
 * SPI devices should normally not be created by SPI device drivers; that
 * would make them board-specific.  Similarly with SPI controller drivers.
 * Device registration normally goes into like arch/.../mach.../board-YYY.c
 * with other readonly (flashable) information about mainboard devices.
 */

struct boardinfo {
        struct list_head        list;
        struct spi_board_info        board_info;
};

static LIST_HEAD(board_list);
static LIST_HEAD(spi_controller_list);

/*
 * Used to protect add/del operation for board_info list and
 * spi_controller list, and their matching process also used
 * to protect object of type struct idr.
 */
static DEFINE_MUTEX(board_lock);

/**
 * spi_alloc_device - Allocate a new SPI device
 * @ctlr: Controller to which device is connected
 * Context: can sleep
 *
 * Allows a driver to allocate and initialize a spi_device without
 * registering it immediately.  This allows a driver to directly
 * fill the spi_device with device parameters before calling
 * spi_add_device() on it.
 *
 * Caller is responsible to call spi_add_device() on the returned
 * spi_device structure to add it to the SPI controller.  If the caller
 * needs to discard the spi_device without adding it, then it should
 * call spi_dev_put() on it.
 *
 * Return: a pointer to the new device, or NULL.
 */
struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
{
        struct spi_device        *spi;

        if (!spi_controller_get(ctlr))
                return NULL;

        spi = kzalloc(sizeof(*spi), GFP_KERNEL);
        if (!spi) {
                spi_controller_put(ctlr);
                return NULL;
        }

        spi->pcpu_statistics = spi_alloc_pcpu_stats(NULL);
        if (!spi->pcpu_statistics) {
                kfree(spi);
                spi_controller_put(ctlr);
                return NULL;
        }

        spi->controller = ctlr;
        spi->dev.parent = &ctlr->dev;
        spi->dev.bus = &spi_bus_type;
        spi->dev.release = spidev_release;
        spi->mode = ctlr->buswidth_override_bits;

        device_initialize(&spi->dev);
        return spi;
}
EXPORT_SYMBOL_GPL(spi_alloc_device);

static void spi_dev_set_name(struct spi_device *spi)
{
        struct acpi_device *adev = ACPI_COMPANION(&spi->dev);

        if (adev) {
                dev_set_name(&spi->dev, "spi-%s", acpi_dev_name(adev));
                return;
        }

        dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->controller->dev),
                     spi_get_chipselect(spi, 0));
}

/*
 * Zero(0) is a valid physical CS value and can be located at any
 * logical CS in the spi->chip_select[]. If all the physical CS
 * are initialized to 0 then It would be difficult to differentiate
 * between a valid physical CS 0 & an unused logical CS whose physical
 * CS can be 0. As a solution to this issue initialize all the CS to -1.
 * Now all the unused logical CS will have -1 physical CS value & can be
 * ignored while performing physical CS validity checks.
 */
#define SPI_INVALID_CS                ((s8)-1)

static inline bool is_valid_cs(s8 chip_select)
{
        return chip_select != SPI_INVALID_CS;
}

static inline int spi_dev_check_cs(struct device *dev,
                                   struct spi_device *spi, u8 idx,
                                   struct spi_device *new_spi, u8 new_idx)
{
        u8 cs, cs_new;
        u8 idx_new;

        cs = spi_get_chipselect(spi, idx);
        for (idx_new = new_idx; idx_new < SPI_CS_CNT_MAX; idx_new++) {
                cs_new = spi_get_chipselect(new_spi, idx_new);
                if (is_valid_cs(cs) && is_valid_cs(cs_new) && cs == cs_new) {
                        dev_err(dev, "chipselect %u already in use\n", cs_new);
                        return -EBUSY;
                }
        }
        return 0;
}

static int spi_dev_check(struct device *dev, void *data)
{
        struct spi_device *spi = to_spi_device(dev);
        struct spi_device *new_spi = data;
        int status, idx;

        if (spi->controller == new_spi->controller) {
                for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                        status = spi_dev_check_cs(dev, spi, idx, new_spi, 0);
                        if (status)
                                return status;
                }
        }
        return 0;
}

static void spi_cleanup(struct spi_device *spi)
{
        if (spi->controller->cleanup)
                spi->controller->cleanup(spi);
}

static int __spi_add_device(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;
        struct device *dev = ctlr->dev.parent;
        int status, idx;
        u8 cs;

        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                /* Chipselects are numbered 0..max; validate. */
                cs = spi_get_chipselect(spi, idx);
                if (is_valid_cs(cs) && cs >= ctlr->num_chipselect) {
                        dev_err(dev, "cs%d >= max %d\n", spi_get_chipselect(spi, idx),
                                ctlr->num_chipselect);
                        return -EINVAL;
                }
        }

        /*
         * Make sure that multiple logical CS doesn't map to the same physical CS.
         * For example, spi->chip_select[0] != spi->chip_select[1] and so on.
         */
        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                status = spi_dev_check_cs(dev, spi, idx, spi, idx + 1);
                if (status)
                        return status;
        }

        /* Set the bus ID string */
        spi_dev_set_name(spi);

        /*
         * We need to make sure there's no other device with this
         * chipselect **BEFORE** we call setup(), else we'll trash
         * its configuration.
         */
        status = bus_for_each_dev(&spi_bus_type, NULL, spi, spi_dev_check);
        if (status)
                return status;

        /* Controller may unregister concurrently */
        if (IS_ENABLED(CONFIG_SPI_DYNAMIC) &&
            !device_is_registered(&ctlr->dev)) {
                return -ENODEV;
        }

        if (ctlr->cs_gpiods) {
                u8 cs;

                for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                        cs = spi_get_chipselect(spi, idx);
                        if (is_valid_cs(cs))
                                spi_set_csgpiod(spi, idx, ctlr->cs_gpiods[cs]);
                }
        }

        /*
         * Drivers may modify this initial i/o setup, but will
         * normally rely on the device being setup.  Devices
         * using SPI_CS_HIGH can't coexist well otherwise...
         */
        status = spi_setup(spi);
        if (status < 0) {
                dev_err(dev, "can't setup %s, status %d\n",
                                dev_name(&spi->dev), status);
                return status;
        }

        /* Device may be bound to an active driver when this returns */
        status = device_add(&spi->dev);
        if (status < 0) {
                dev_err(dev, "can't add %s, status %d\n",
                                dev_name(&spi->dev), status);
                spi_cleanup(spi);
        } else {
                dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev));
        }

        return status;
}

/**
 * spi_add_device - Add spi_device allocated with spi_alloc_device
 * @spi: spi_device to register
 *
 * Companion function to spi_alloc_device.  Devices allocated with
 * spi_alloc_device can be added onto the SPI bus with this function.
 *
 * Return: 0 on success; negative errno on failure
 */
int spi_add_device(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;
        int status;

        /* Set the bus ID string */
        spi_dev_set_name(spi);

        mutex_lock(&ctlr->add_lock);
        status = __spi_add_device(spi);
        mutex_unlock(&ctlr->add_lock);
        return status;
}
EXPORT_SYMBOL_GPL(spi_add_device);

static void spi_set_all_cs_unused(struct spi_device *spi)
{
        u8 idx;

        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++)
                spi_set_chipselect(spi, idx, SPI_INVALID_CS);
}

/**
 * spi_new_device - instantiate one new SPI device
 * @ctlr: Controller to which device is connected
 * @chip: Describes the SPI device
 * Context: can sleep
 *
 * On typical mainboards, this is purely internal; and it's not needed
 * after board init creates the hard-wired devices.  Some development
 * platforms may not be able to use spi_register_board_info though, and
 * this is exported so that for example a USB or parport based adapter
 * driver could add devices (which it would learn about out-of-band).
 *
 * Return: the new device, or NULL.
 */
struct spi_device *spi_new_device(struct spi_controller *ctlr,
                                  struct spi_board_info *chip)
{
        struct spi_device        *proxy;
        int                        status;

        /*
         * NOTE:  caller did any chip->bus_num checks necessary.
         *
         * Also, unless we change the return value convention to use
         * error-or-pointer (not NULL-or-pointer), troubleshootability
         * suggests syslogged diagnostics are best here (ugh).
         */

        proxy = spi_alloc_device(ctlr);
        if (!proxy)
                return NULL;

        WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias));

        /* Use provided chip-select for proxy device */
        spi_set_all_cs_unused(proxy);
        spi_set_chipselect(proxy, 0, chip->chip_select);

        proxy->max_speed_hz = chip->max_speed_hz;
        proxy->mode = chip->mode;
        proxy->irq = chip->irq;
        strscpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias));
        proxy->dev.platform_data = (void *) chip->platform_data;
        proxy->controller_data = chip->controller_data;
        proxy->controller_state = NULL;
        /*
         * spi->chip_select[i] gives the corresponding physical CS for logical CS i
         * logical CS number is represented by setting the ith bit in spi->cs_index_mask
         * So, for example, if spi->cs_index_mask = 0x01 then logical CS number is 0 and
         * spi->chip_select[0] will give the physical CS.
         * By default spi->chip_select[0] will hold the physical CS number so, set
         * spi->cs_index_mask as 0x01.
         */
        proxy->cs_index_mask = 0x01;

        if (chip->swnode) {
                status = device_add_software_node(&proxy->dev, chip->swnode);
                if (status) {
                        dev_err(&ctlr->dev, "failed to add software node to '%s': %d\n",
                                chip->modalias, status);
                        goto err_dev_put;
                }
        }

        status = spi_add_device(proxy);
        if (status < 0)
                goto err_dev_put;

        return proxy;

err_dev_put:
        device_remove_software_node(&proxy->dev);
        spi_dev_put(proxy);
        return NULL;
}
EXPORT_SYMBOL_GPL(spi_new_device);

/**
 * spi_unregister_device - unregister a single SPI device
 * @spi: spi_device to unregister
 *
 * Start making the passed SPI device vanish. Normally this would be handled
 * by spi_unregister_controller().
 */
void spi_unregister_device(struct spi_device *spi)
{
        if (!spi)
                return;

        if (spi->dev.of_node) {
                of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
                of_node_put(spi->dev.of_node);
        }
        if (ACPI_COMPANION(&spi->dev))
                acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev));
        device_remove_software_node(&spi->dev);
        device_del(&spi->dev);
        spi_cleanup(spi);
        put_device(&spi->dev);
}
EXPORT_SYMBOL_GPL(spi_unregister_device);

static void spi_match_controller_to_boardinfo(struct spi_controller *ctlr,
                                              struct spi_board_info *bi)
{
        struct spi_device *dev;

        if (ctlr->bus_num != bi->bus_num)
                return;

        dev = spi_new_device(ctlr, bi);
        if (!dev)
                dev_err(ctlr->dev.parent, "can't create new device for %s\n",
                        bi->modalias);
}

/**
 * spi_register_board_info - register SPI devices for a given board
 * @info: array of chip descriptors
 * @n: how many descriptors are provided
 * Context: can sleep
 *
 * Board-specific early init code calls this (probably during arch_initcall)
 * with segments of the SPI device table.  Any device nodes are created later,
 * after the relevant parent SPI controller (bus_num) is defined.  We keep
 * this table of devices forever, so that reloading a controller driver will
 * not make Linux forget about these hard-wired devices.
 *
 * Other code can also call this, e.g. a particular add-on board might provide
 * SPI devices through its expansion connector, so code initializing that board
 * would naturally declare its SPI devices.
 *
 * The board info passed can safely be __initdata ... but be careful of
 * any embedded pointers (platform_data, etc), they're copied as-is.
 *
 * Return: zero on success, else a negative error code.
 */
int spi_register_board_info(struct spi_board_info const *info, unsigned n)
{
        struct boardinfo *bi;
        int i;

        if (!n)
                return 0;

        bi = kcalloc(n, sizeof(*bi), GFP_KERNEL);
        if (!bi)
                return -ENOMEM;

        for (i = 0; i < n; i++, bi++, info++) {
                struct spi_controller *ctlr;

                memcpy(&bi->board_info, info, sizeof(*info));

                mutex_lock(&board_lock);
                list_add_tail(&bi->list, &board_list);
                list_for_each_entry(ctlr, &spi_controller_list, list)
                        spi_match_controller_to_boardinfo(ctlr,
                                                          &bi->board_info);
                mutex_unlock(&board_lock);
        }

        return 0;
}

/*-------------------------------------------------------------------------*/

/* Core methods for SPI resource management */

/**
 * spi_res_alloc - allocate a spi resource that is life-cycle managed
 *                 during the processing of a spi_message while using
 *                 spi_transfer_one
 * @spi:     the SPI device for which we allocate memory
 * @release: the release code to execute for this resource
 * @size:    size to alloc and return
 * @gfp:     GFP allocation flags
 *
 * Return: the pointer to the allocated data
 *
 * This may get enhanced in the future to allocate from a memory pool
 * of the @spi_device or @spi_controller to avoid repeated allocations.
 */
static void *spi_res_alloc(struct spi_device *spi, spi_res_release_t release,
                           size_t size, gfp_t gfp)
{
        struct spi_res *sres;

        sres = kzalloc(sizeof(*sres) + size, gfp);
        if (!sres)
                return NULL;

        INIT_LIST_HEAD(&sres->entry);
        sres->release = release;

        return sres->data;
}

/**
 * spi_res_free - free an SPI resource
 * @res: pointer to the custom data of a resource
 */
static void spi_res_free(void *res)
{
        struct spi_res *sres = container_of(res, struct spi_res, data);

        if (!res)
                return;

        WARN_ON(!list_empty(&sres->entry));
        kfree(sres);
}

/**
 * spi_res_add - add a spi_res to the spi_message
 * @message: the SPI message
 * @res:     the spi_resource
 */
static void spi_res_add(struct spi_message *message, void *res)
{
        struct spi_res *sres = container_of(res, struct spi_res, data);

        WARN_ON(!list_empty(&sres->entry));
        list_add_tail(&sres->entry, &message->resources);
}

/**
 * spi_res_release - release all SPI resources for this message
 * @ctlr:  the @spi_controller
 * @message: the @spi_message
 */
static void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
{
        struct spi_res *res, *tmp;

        list_for_each_entry_safe_reverse(res, tmp, &message->resources, entry) {
                if (res->release)
                        res->release(ctlr, message, res->data);

                list_del(&res->entry);

                kfree(res);
        }
}

/*-------------------------------------------------------------------------*/
static inline bool spi_is_last_cs(struct spi_device *spi)
{
        u8 idx;
        bool last = false;

        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                if (spi->cs_index_mask & BIT(idx)) {
                        if (spi->controller->last_cs[idx] == spi_get_chipselect(spi, idx))
                                last = true;
                }
        }
        return last;
}


static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
{
        bool activate = enable;
        u8 idx;

        /*
         * Avoid calling into the driver (or doing delays) if the chip select
         * isn't actually changing from the last time this was called.
         */
        if (!force && ((enable && spi->controller->last_cs_index_mask == spi->cs_index_mask &&
                        spi_is_last_cs(spi)) ||
                       (!enable && spi->controller->last_cs_index_mask == spi->cs_index_mask &&
                        !spi_is_last_cs(spi))) &&
            (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
                return;

        trace_spi_set_cs(spi, activate);

        spi->controller->last_cs_index_mask = spi->cs_index_mask;
        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++)
                spi->controller->last_cs[idx] = enable ? spi_get_chipselect(spi, 0) : SPI_INVALID_CS;
        spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;

        if (spi->mode & SPI_CS_HIGH)
                enable = !enable;

        /*
         * Handle chip select delays for GPIO based CS or controllers without
         * programmable chip select timing.
         */
        if ((spi_is_csgpiod(spi) || !spi->controller->set_cs_timing) && !activate)
                spi_delay_exec(&spi->cs_hold, NULL);

        if (spi_is_csgpiod(spi)) {
                if (!(spi->mode & SPI_NO_CS)) {
                        /*
                         * Historically ACPI has no means of the GPIO polarity and
                         * thus the SPISerialBus() resource defines it on the per-chip
                         * basis. In order to avoid a chain of negations, the GPIO
                         * polarity is considered being Active High. Even for the cases
                         * when _DSD() is involved (in the updated versions of ACPI)
                         * the GPIO CS polarity must be defined Active High to avoid
                         * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
                         * into account.
                         */
                        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) {
                                if ((spi->cs_index_mask & BIT(idx)) && spi_get_csgpiod(spi, idx)) {
                                        if (has_acpi_companion(&spi->dev))
                                                gpiod_set_value_cansleep(spi_get_csgpiod(spi, idx),
                                                                         !enable);
                                        else
                                                /* Polarity handled by GPIO library */
                                                gpiod_set_value_cansleep(spi_get_csgpiod(spi, idx),
                                                                         activate);

                                        if (activate)
                                                spi_delay_exec(&spi->cs_setup, NULL);
                                        else
                                                spi_delay_exec(&spi->cs_inactive, NULL);
                                }
                        }
                }
                /* Some SPI masters need both GPIO CS & slave_select */
                if ((spi->controller->flags & SPI_CONTROLLER_GPIO_SS) &&
                    spi->controller->set_cs)
                        spi->controller->set_cs(spi, !enable);
        } else if (spi->controller->set_cs) {
                spi->controller->set_cs(spi, !enable);
        }

        if (spi_is_csgpiod(spi) || !spi->controller->set_cs_timing) {
                if (activate)
                        spi_delay_exec(&spi->cs_setup, NULL);
                else
                        spi_delay_exec(&spi->cs_inactive, NULL);
        }
}

#ifdef CONFIG_HAS_DMA
static int spi_map_buf_attrs(struct spi_controller *ctlr, struct device *dev,
                             struct sg_table *sgt, void *buf, size_t len,
                             enum dma_data_direction dir, unsigned long attrs)
{
        const bool vmalloced_buf = is_vmalloc_addr(buf);
        unsigned int max_seg_size = dma_get_max_seg_size(dev);
#ifdef CONFIG_HIGHMEM
        const bool kmap_buf = ((unsigned long)buf >= PKMAP_BASE &&
                                (unsigned long)buf < (PKMAP_BASE +
                                        (LAST_PKMAP * PAGE_SIZE)));
#else
        const bool kmap_buf = false;
#endif
        int desc_len;
        int sgs;
        struct page *vm_page;
        struct scatterlist *sg;
        void *sg_buf;
        size_t min;
        int i, ret;

        if (vmalloced_buf || kmap_buf) {
                desc_len = min_t(unsigned long, max_seg_size, PAGE_SIZE);
                sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
        } else if (virt_addr_valid(buf)) {
                desc_len = min_t(size_t, max_seg_size, ctlr->max_dma_len);
                sgs = DIV_ROUND_UP(len, desc_len);
        } else {
                return -EINVAL;
        }

        ret = sg_alloc_table(sgt, sgs, GFP_KERNEL);
        if (ret != 0)
                return ret;

        sg = &sgt->sgl[0];
        for (i = 0; i < sgs; i++) {

                if (vmalloced_buf || kmap_buf) {
                        /*
                         * Next scatterlist entry size is the minimum between
                         * the desc_len and the remaining buffer length that
                         * fits in a page.
                         */
                        min = min_t(size_t, desc_len,
                                    min_t(size_t, len,
                                          PAGE_SIZE - offset_in_page(buf)));
                        if (vmalloced_buf)
                                vm_page = vmalloc_to_page(buf);
                        else
                                vm_page = kmap_to_page(buf);
                        if (!vm_page) {
                                sg_free_table(sgt);
                                return -ENOMEM;
                        }
                        sg_set_page(sg, vm_page,
                                    min, offset_in_page(buf));
                } else {
                        min = min_t(size_t, len, desc_len);
                        sg_buf = buf;
                        sg_set_buf(sg, sg_buf, min);
                }

                buf += min;
                len -= min;
                sg = sg_next(sg);
        }

        ret = dma_map_sgtable(dev, sgt, dir, attrs);
        if (ret < 0) {
                sg_free_table(sgt);
                return ret;
        }

        return 0;
}

int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
                struct sg_table *sgt, void *buf, size_t len,
                enum dma_data_direction dir)
{
        return spi_map_buf_attrs(ctlr, dev, sgt, buf, len, dir, 0);
}

static void spi_unmap_buf_attrs(struct spi_controller *ctlr,
                                struct device *dev, struct sg_table *sgt,
                                enum dma_data_direction dir,
                                unsigned long attrs)
{
        if (sgt->orig_nents) {
                dma_unmap_sgtable(dev, sgt, dir, attrs);
                sg_free_table(sgt);
                sgt->orig_nents = 0;
                sgt->nents = 0;
        }
}

void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
                   struct sg_table *sgt, enum dma_data_direction dir)
{
        spi_unmap_buf_attrs(ctlr, dev, sgt, dir, 0);
}

static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
{
        struct device *tx_dev, *rx_dev;
        struct spi_transfer *xfer;
        int ret;

        if (!ctlr->can_dma)
                return 0;

        if (ctlr->dma_tx)
                tx_dev = ctlr->dma_tx->device->dev;
        else if (ctlr->dma_map_dev)
                tx_dev = ctlr->dma_map_dev;
        else
                tx_dev = ctlr->dev.parent;

        if (ctlr->dma_rx)
                rx_dev = ctlr->dma_rx->device->dev;
        else if (ctlr->dma_map_dev)
                rx_dev = ctlr->dma_map_dev;
        else
                rx_dev = ctlr->dev.parent;

        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                /* The sync is done before each transfer. */
                unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC;

                if (!ctlr->can_dma(ctlr, msg->spi, xfer))
                        continue;

                if (xfer->tx_buf != NULL) {
                        ret = spi_map_buf_attrs(ctlr, tx_dev, &xfer->tx_sg,
                                                (void *)xfer->tx_buf,
                                                xfer->len, DMA_TO_DEVICE,
                                                attrs);
                        if (ret != 0)
                                return ret;
                }

                if (xfer->rx_buf != NULL) {
                        ret = spi_map_buf_attrs(ctlr, rx_dev, &xfer->rx_sg,
                                                xfer->rx_buf, xfer->len,
                                                DMA_FROM_DEVICE, attrs);
                        if (ret != 0) {
                                spi_unmap_buf_attrs(ctlr, tx_dev,
                                                &xfer->tx_sg, DMA_TO_DEVICE,
                                                attrs);

                                return ret;
                        }
                }
        }

        ctlr->cur_rx_dma_dev = rx_dev;
        ctlr->cur_tx_dma_dev = tx_dev;
        ctlr->cur_msg_mapped = true;

        return 0;
}

static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
{
        struct device *rx_dev = ctlr->cur_rx_dma_dev;
        struct device *tx_dev = ctlr->cur_tx_dma_dev;
        struct spi_transfer *xfer;

        if (!ctlr->cur_msg_mapped || !ctlr->can_dma)
                return 0;

        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                /* The sync has already been done after each transfer. */
                unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC;

                if (!ctlr->can_dma(ctlr, msg->spi, xfer))
                        continue;

                spi_unmap_buf_attrs(ctlr, rx_dev, &xfer->rx_sg,
                                    DMA_FROM_DEVICE, attrs);
                spi_unmap_buf_attrs(ctlr, tx_dev, &xfer->tx_sg,
                                    DMA_TO_DEVICE, attrs);
        }

        ctlr->cur_msg_mapped = false;

        return 0;
}

static void spi_dma_sync_for_device(struct spi_controller *ctlr,
                                    struct spi_transfer *xfer)
{
        struct device *rx_dev = ctlr->cur_rx_dma_dev;
        struct device *tx_dev = ctlr->cur_tx_dma_dev;

        if (!ctlr->cur_msg_mapped)
                return;

        if (xfer->tx_sg.orig_nents)
                dma_sync_sgtable_for_device(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
        if (xfer->rx_sg.orig_nents)
                dma_sync_sgtable_for_device(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
}

static void spi_dma_sync_for_cpu(struct spi_controller *ctlr,
                                 struct spi_transfer *xfer)
{
        struct device *rx_dev = ctlr->cur_rx_dma_dev;
        struct device *tx_dev = ctlr->cur_tx_dma_dev;

        if (!ctlr->cur_msg_mapped)
                return;

        if (xfer->rx_sg.orig_nents)
                dma_sync_sgtable_for_cpu(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
        if (xfer->tx_sg.orig_nents)
                dma_sync_sgtable_for_cpu(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
}
#else /* !CONFIG_HAS_DMA */
static inline int __spi_map_msg(struct spi_controller *ctlr,
                                struct spi_message *msg)
{
        return 0;
}

static inline int __spi_unmap_msg(struct spi_controller *ctlr,
                                  struct spi_message *msg)
{
        return 0;
}

static void spi_dma_sync_for_device(struct spi_controller *ctrl,
                                    struct spi_transfer *xfer)
{
}

static void spi_dma_sync_for_cpu(struct spi_controller *ctrl,
                                 struct spi_transfer *xfer)
{
}
#endif /* !CONFIG_HAS_DMA */

static inline int spi_unmap_msg(struct spi_controller *ctlr,
                                struct spi_message *msg)
{
        struct spi_transfer *xfer;

        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                /*
                 * Restore the original value of tx_buf or rx_buf if they are
                 * NULL.
                 */
                if (xfer->tx_buf == ctlr->dummy_tx)
                        xfer->tx_buf = NULL;
                if (xfer->rx_buf == ctlr->dummy_rx)
                        xfer->rx_buf = NULL;
        }

        return __spi_unmap_msg(ctlr, msg);
}

static int spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
{
        struct spi_transfer *xfer;
        void *tmp;
        unsigned int max_tx, max_rx;

        if ((ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX))
                && !(msg->spi->mode & SPI_3WIRE)) {
                max_tx = 0;
                max_rx = 0;

                list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                        if ((ctlr->flags & SPI_CONTROLLER_MUST_TX) &&
                            !xfer->tx_buf)
                                max_tx = max(xfer->len, max_tx);
                        if ((ctlr->flags & SPI_CONTROLLER_MUST_RX) &&
                            !xfer->rx_buf)
                                max_rx = max(xfer->len, max_rx);
                }

                if (max_tx) {
                        tmp = krealloc(ctlr->dummy_tx, max_tx,
                                       GFP_KERNEL | GFP_DMA | __GFP_ZERO);
                        if (!tmp)
                                return -ENOMEM;
                        ctlr->dummy_tx = tmp;
                }

                if (max_rx) {
                        tmp = krealloc(ctlr->dummy_rx, max_rx,
                                       GFP_KERNEL | GFP_DMA);
                        if (!tmp)
                                return -ENOMEM;
                        ctlr->dummy_rx = tmp;
                }

                if (max_tx || max_rx) {
                        list_for_each_entry(xfer, &msg->transfers,
                                            transfer_list) {
                                if (!xfer->len)
                                        continue;
                                if (!xfer->tx_buf)
                                        xfer->tx_buf = ctlr->dummy_tx;
                                if (!xfer->rx_buf)
                                        xfer->rx_buf = ctlr->dummy_rx;
                        }
                }
        }

        return __spi_map_msg(ctlr, msg);
}

static int spi_transfer_wait(struct spi_controller *ctlr,
                             struct spi_message *msg,
                             struct spi_transfer *xfer)
{
        struct spi_statistics __percpu *statm = ctlr->pcpu_statistics;
        struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics;
        u32 speed_hz = xfer->speed_hz;
        unsigned long long ms;

        if (spi_controller_is_slave(ctlr)) {
                if (wait_for_completion_interruptible(&ctlr->xfer_completion)) {
                        dev_dbg(&msg->spi->dev, "SPI transfer interrupted\n");
                        return -EINTR;
                }
        } else {
                if (!speed_hz)
                        speed_hz = 100000;

                /*
                 * For each byte we wait for 8 cycles of the SPI clock.
                 * Since speed is defined in Hz and we want milliseconds,
                 * use respective multiplier, but before the division,
                 * otherwise we may get 0 for short transfers.
                 */
                ms = 8LL * MSEC_PER_SEC * xfer->len;
                do_div(ms, speed_hz);

                /*
                 * Increase it twice and add 200 ms tolerance, use
                 * predefined maximum in case of overflow.
                 */
                ms += ms + 200;
                if (ms > UINT_MAX)
                        ms = UINT_MAX;

                ms = wait_for_completion_timeout(&ctlr->xfer_completion,
                                                 msecs_to_jiffies(ms));

                if (ms == 0) {
                        SPI_STATISTICS_INCREMENT_FIELD(statm, timedout);
                        SPI_STATISTICS_INCREMENT_FIELD(stats, timedout);
                        dev_err(&msg->spi->dev,
                                "SPI transfer timed out\n");
                        return -ETIMEDOUT;
                }

                if (xfer->error & SPI_TRANS_FAIL_IO)
                        return -EIO;
        }

        return 0;
}

static void _spi_transfer_delay_ns(u32 ns)
{
        if (!ns)
                return;
        if (ns <= NSEC_PER_USEC) {
                ndelay(ns);
        } else {
                u32 us = DIV_ROUND_UP(ns, NSEC_PER_USEC);

                if (us <= 10)
                        udelay(us);
                else
                        usleep_range(us, us + DIV_ROUND_UP(us, 10));
        }
}

int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer)
{
        u32 delay = _delay->value;
        u32 unit = _delay->unit;
        u32 hz;

        if (!delay)
                return 0;

        switch (unit) {
        case SPI_DELAY_UNIT_USECS:
                delay *= NSEC_PER_USEC;
                break;
        case SPI_DELAY_UNIT_NSECS:
                /* Nothing to do here */
                break;
        case SPI_DELAY_UNIT_SCK:
                /* Clock cycles need to be obtained from spi_transfer */
                if (!xfer)
                        return -EINVAL;
                /*
                 * If there is unknown effective speed, approximate it
                 * by underestimating with half of the requested Hz.
                 */
                hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2;
                if (!hz)
                        return -EINVAL;

                /* Convert delay to nanoseconds */
                delay *= DIV_ROUND_UP(NSEC_PER_SEC, hz);
                break;
        default:
                return -EINVAL;
        }

        return delay;
}
EXPORT_SYMBOL_GPL(spi_delay_to_ns);

int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer)
{
        int delay;

        might_sleep();

        if (!_delay)
                return -EINVAL;

        delay = spi_delay_to_ns(_delay, xfer);
        if (delay < 0)
                return delay;

        _spi_transfer_delay_ns(delay);

        return 0;
}
EXPORT_SYMBOL_GPL(spi_delay_exec);

static void _spi_transfer_cs_change_delay(struct spi_message *msg,
                                          struct spi_transfer *xfer)
{
        u32 default_delay_ns = 10 * NSEC_PER_USEC;
        u32 delay = xfer->cs_change_delay.value;
        u32 unit = xfer->cs_change_delay.unit;
        int ret;

        /* Return early on "fast" mode - for everything but USECS */
        if (!delay) {
                if (unit == SPI_DELAY_UNIT_USECS)
                        _spi_transfer_delay_ns(default_delay_ns);
                return;
        }

        ret = spi_delay_exec(&xfer->cs_change_delay, xfer);
        if (ret) {
                dev_err_once(&msg->spi->dev,
                             "Use of unsupported delay unit %i, using default of %luus\n",
                             unit, default_delay_ns / NSEC_PER_USEC);
                _spi_transfer_delay_ns(default_delay_ns);
        }
}

void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
                                                  struct spi_transfer *xfer)
{
        _spi_transfer_cs_change_delay(msg, xfer);
}
EXPORT_SYMBOL_GPL(spi_transfer_cs_change_delay_exec);

/*
 * spi_transfer_one_message - Default implementation of transfer_one_message()
 *
 * This is a standard implementation of transfer_one_message() for
 * drivers which implement a transfer_one() operation.  It provides
 * standard handling of delays and chip select management.
 */
static int spi_transfer_one_message(struct spi_controller *ctlr,
                                    struct spi_message *msg)
{
        struct spi_transfer *xfer;
        bool keep_cs = false;
        int ret = 0;
        struct spi_statistics __percpu *statm = ctlr->pcpu_statistics;
        struct spi_statistics __percpu *stats = msg->spi->pcpu_statistics;

        xfer = list_first_entry(&msg->transfers, struct spi_transfer, transfer_list);
        spi_set_cs(msg->spi, !xfer->cs_off, false);

        SPI_STATISTICS_INCREMENT_FIELD(statm, messages);
        SPI_STATISTICS_INCREMENT_FIELD(stats, messages);

        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                trace_spi_transfer_start(msg, xfer);

                spi_statistics_add_transfer_stats(statm, xfer, ctlr);
                spi_statistics_add_transfer_stats(stats, xfer, ctlr);

                if (!ctlr->ptp_sts_supported) {
                        xfer->ptp_sts_word_pre = 0;
                        ptp_read_system_prets(xfer->ptp_sts);
                }

                if ((xfer->tx_buf || xfer->rx_buf) && xfer->len) {
                        reinit_completion(&ctlr->xfer_completion);

fallback_pio:
                        spi_dma_sync_for_device(ctlr, xfer);
                        ret = ctlr->transfer_one(ctlr, msg->spi, xfer);
                        if (ret < 0) {
                                spi_dma_sync_for_cpu(ctlr, xfer);

                                if (ctlr->cur_msg_mapped &&
                                   (xfer->error & SPI_TRANS_FAIL_NO_START)) {
                                        __spi_unmap_msg(ctlr, msg);
                                        ctlr->fallback = true;
                                        xfer->error &= ~SPI_TRANS_FAIL_NO_START;
                                        goto fallback_pio;
                                }

                                SPI_STATISTICS_INCREMENT_FIELD(statm,
                                                               errors);
                                SPI_STATISTICS_INCREMENT_FIELD(stats,
                                                               errors);
                                dev_err(&msg->spi->dev,
                                        "SPI transfer failed: %d\n", ret);
                                goto out;
                        }

                        if (ret > 0) {
                                ret = spi_transfer_wait(ctlr, msg, xfer);
                                if (ret < 0)
                                        msg->status = ret;
                        }

                        spi_dma_sync_for_cpu(ctlr, xfer);
                } else {
                        if (xfer->len)
                                dev_err(&msg->spi->dev,
                                        "Bufferless transfer has length %u\n",
                                        xfer->len);
                }

                if (!ctlr->ptp_sts_supported) {
                        ptp_read_system_postts(xfer->ptp_sts);
                        xfer->ptp_sts_word_post = xfer->len;
                }

                trace_spi_transfer_stop(msg, xfer);

                if (msg->status != -EINPROGRESS)
                        goto out;

                spi_transfer_delay_exec(xfer);

                if (xfer->cs_change) {
                        if (list_is_last(&xfer->transfer_list,
                                         &msg->transfers)) {
                                keep_cs = true;
                        } else {
                                if (!xfer->cs_off)
                                        spi_set_cs(msg->spi, false, false);
                                _spi_transfer_cs_change_delay(msg, xfer);
                                if (!list_next_entry(xfer, transfer_list)->cs_off)
                                        spi_set_cs(msg->spi, true, false);
                        }
                } else if (!list_is_last(&xfer->transfer_list, &msg->transfers) &&
                           xfer->cs_off != list_next_entry(xfer, transfer_list)->cs_off) {
                        spi_set_cs(msg->spi, xfer->cs_off, false);
                }

                msg->actual_length += xfer->len;
        }

out:
        if (ret != 0 || !keep_cs)
                spi_set_cs(msg->spi, false, false);

        if (msg->status == -EINPROGRESS)
                msg->status = ret;

        if (msg->status && ctlr->handle_err)
                ctlr->handle_err(ctlr, msg);

        spi_finalize_current_message(ctlr);

        return ret;
}

/**
 * spi_finalize_current_transfer - report completion of a transfer
 * @ctlr: the controller reporting completion
 *
 * Called by SPI drivers using the core transfer_one_message()
 * implementation to notify it that the current interrupt driven
 * transfer has finished and the next one may be scheduled.
 */
void spi_finalize_current_transfer(struct spi_controller *ctlr)
{
        complete(&ctlr->xfer_completion);
}
EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);

static void spi_idle_runtime_pm(struct spi_controller *ctlr)
{
        if (ctlr->auto_runtime_pm) {
                pm_runtime_mark_last_busy(ctlr->dev.parent);
                pm_runtime_put_autosuspend(ctlr->dev.parent);
        }
}

static int __spi_pump_transfer_message(struct spi_controller *ctlr,
                struct spi_message *msg, bool was_busy)
{
        struct spi_transfer *xfer;
        int ret;

        if (!was_busy && ctlr->auto_runtime_pm) {
                ret = pm_runtime_get_sync(ctlr->dev.parent);
                if (ret < 0) {
                        pm_runtime_put_noidle(ctlr->dev.parent);
                        dev_err(&ctlr->dev, "Failed to power device: %d\n",
                                ret);

                        msg->status = ret;
                        spi_finalize_current_message(ctlr);

                        return ret;
                }
        }

        if (!was_busy)
                trace_spi_controller_busy(ctlr);

        if (!was_busy && ctlr->prepare_transfer_hardware) {
                ret = ctlr->prepare_transfer_hardware(ctlr);
                if (ret) {
                        dev_err(&ctlr->dev,
                                "failed to prepare transfer hardware: %d\n",
                                ret);

                        if (ctlr->auto_runtime_pm)
                                pm_runtime_put(ctlr->dev.parent);

                        msg->status = ret;
                        spi_finalize_current_message(ctlr);

                        return ret;
                }
        }

        trace_spi_message_start(msg);

        if (ctlr->prepare_message) {
                ret = ctlr->prepare_message(ctlr, msg);
                if (ret) {
                        dev_err(&ctlr->dev, "failed to prepare message: %d\n",
                                ret);
                        msg->status = ret;
                        spi_finalize_current_message(ctlr);
                        return ret;
                }
                msg->prepared = true;
        }

        ret = spi_map_msg(ctlr, msg);
        if (ret) {
                msg->status = ret;
                spi_finalize_current_message(ctlr);
                return ret;
        }

        if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
                list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                        xfer->ptp_sts_word_pre = 0;
                        ptp_read_system_prets(xfer->ptp_sts);
                }
        }

        /*
         * Drivers implementation of transfer_one_message() must arrange for
         * spi_finalize_current_message() to get called. Most drivers will do
         * this in the calling context, but some don't. For those cases, a
         * completion is used to guarantee that this function does not return
         * until spi_finalize_current_message() is done accessing
         * ctlr->cur_msg.
         * Use of the following two flags enable to opportunistically skip the
         * use of the completion since its use involves expensive spin locks.
         * In case of a race with the context that calls
         * spi_finalize_current_message() the completion will always be used,
         * due to strict ordering of these flags using barriers.
         */
        WRITE_ONCE(ctlr->cur_msg_incomplete, true);
        WRITE_ONCE(ctlr->cur_msg_need_completion, false);
        reinit_completion(&ctlr->cur_msg_completion);
        smp_wmb(); /* Make these available to spi_finalize_current_message() */

        ret = ctlr->transfer_one_message(ctlr, msg);
        if (ret) {
                dev_err(&ctlr->dev,
                        "failed to transfer one message from queue\n");
                return ret;
        }

        WRITE_ONCE(ctlr->cur_msg_need_completion, true);
        smp_mb(); /* See spi_finalize_current_message()... */
        if (READ_ONCE(ctlr->cur_msg_incomplete))
                wait_for_completion(&ctlr->cur_msg_completion);

        return 0;
}

/**
 * __spi_pump_messages - function which processes SPI message queue
 * @ctlr: controller to process queue for
 * @in_kthread: true if we are in the context of the message pump thread
 *
 * This function checks if there is any SPI message in the queue that
 * needs processing and if so call out to the driver to initialize hardware
 * and transfer each message.
 *
 * Note that it is called both from the kthread itself and also from
 * inside spi_sync(); the queue extraction handling at the top of the
 * function should deal with this safely.
 */
static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
{
        struct spi_message *msg;
        bool was_busy = false;
        unsigned long flags;
        int ret;

        /* Take the I/O mutex */
        mutex_lock(&ctlr->io_mutex);

        /* Lock queue */
        spin_lock_irqsave(&ctlr->queue_lock, flags);

        /* Make sure we are not already running a message */
        if (ctlr->cur_msg)
                goto out_unlock;

        /* Check if the queue is idle */
        if (list_empty(&ctlr->queue) || !ctlr->running) {
                if (!ctlr->busy)
                        goto out_unlock;

                /* Defer any non-atomic teardown to the thread */
                if (!in_kthread) {
                        if (!ctlr->dummy_rx && !ctlr->dummy_tx &&
                            !ctlr->unprepare_transfer_hardware) {
                                spi_idle_runtime_pm(ctlr);
                                ctlr->busy = false;
                                ctlr->queue_empty = true;
                                trace_spi_controller_idle(ctlr);
                        } else {
                                kthread_queue_work(ctlr->kworker,
                                                   &ctlr->pump_messages);
                        }
                        goto out_unlock;
                }

                ctlr->busy = false;
                spin_unlock_irqrestore(&ctlr->queue_lock, flags);

                kfree(ctlr->dummy_rx);
                ctlr->dummy_rx = NULL;
                kfree(ctlr->dummy_tx);
                ctlr->dummy_tx = NULL;
                if (ctlr->unprepare_transfer_hardware &&
                    ctlr->unprepare_transfer_hardware(ctlr))
                        dev_err(&ctlr->dev,
                                "failed to unprepare transfer hardware\n");
                spi_idle_runtime_pm(ctlr);
                trace_spi_controller_idle(ctlr);

                spin_lock_irqsave(&ctlr->queue_lock, flags);
                ctlr->queue_empty = true;
                goto out_unlock;
        }

        /* Extract head of queue */
        msg = list_first_entry(&ctlr->queue, struct spi_message, queue);
        ctlr->cur_msg = msg;

        list_del_init(&msg->queue);
        if (ctlr->busy)
                was_busy = true;
        else
                ctlr->busy = true;
        spin_unlock_irqrestore(&ctlr->queue_lock, flags);

        ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
        kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);

        ctlr->cur_msg = NULL;
        ctlr->fallback = false;

        mutex_unlock(&ctlr->io_mutex);

        /* Prod the scheduler in case transfer_one() was busy waiting */
        if (!ret)
                cond_resched();
        return;

out_unlock:
        spin_unlock_irqrestore(&ctlr->queue_lock, flags);
        mutex_unlock(&ctlr->io_mutex);
}

/**
 * spi_pump_messages - kthread work function which processes spi message queue
 * @work: pointer to kthread work struct contained in the controller struct
 */
static void spi_pump_messages(struct kthread_work *work)
{
        struct spi_controller *ctlr =
                container_of(work, struct spi_controller, pump_messages);

        __spi_pump_messages(ctlr, true);
}

/**
 * spi_take_timestamp_pre - helper to collect the beginning of the TX timestamp
 * @ctlr: Pointer to the spi_controller structure of the driver
 * @xfer: Pointer to the transfer being timestamped
 * @progress: How many words (not bytes) have been transferred so far
 * @irqs_off: If true, will disable IRQs and preemption for the duration of the
 *              transfer, for less jitter in time measurement. Only compatible
 *              with PIO drivers. If true, must follow up with
 *              spi_take_timestamp_post or otherwise system will crash.
 *              WARNING: for fully predictable results, the CPU frequency must
 *              also be under control (governor).
 *
 * This is a helper for drivers to collect the beginning of the TX timestamp
 * for the requested byte from the SPI transfer. The frequency with which this
 * function must be called (once per word, once for the whole transfer, once
 * per batch of words etc) is arbitrary as long as the @tx buffer offset is
 * greater than or equal to the requested byte at the time of the call. The
 * timestamp is only taken once, at the first such call. It is assumed that
 * the driver advances its @tx buffer pointer monotonically.
 */
void spi_take_timestamp_pre(struct spi_controller *ctlr,
                            struct spi_transfer *xfer,
                            size_t progress, bool irqs_off)
{
        if (!xfer->ptp_sts)
                return;

        if (xfer->timestamped)
                return;

        if (progress > xfer->ptp_sts_word_pre)
                return;

        /* Capture the resolution of the timestamp */
        xfer->ptp_sts_word_pre = progress;

        if (irqs_off) {
                local_irq_save(ctlr->irq_flags);
                preempt_disable();
        }

        ptp_read_system_prets(xfer->ptp_sts);
}
EXPORT_SYMBOL_GPL(spi_take_timestamp_pre);

/**
 * spi_take_timestamp_post - helper to collect the end of the TX timestamp
 * @ctlr: Pointer to the spi_controller structure of the driver
 * @xfer: Pointer to the transfer being timestamped
 * @progress: How many words (not bytes) have been transferred so far
 * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU.
 *
 * This is a helper for drivers to collect the end of the TX timestamp for
 * the requested byte from the SPI transfer. Can be called with an arbitrary
 * frequency: only the first call where @tx exceeds or is equal to the
 * requested word will be timestamped.
 */
void spi_take_timestamp_post(struct spi_controller *ctlr,
                             struct spi_transfer *xfer,
                             size_t progress, bool irqs_off)
{
        if (!xfer->ptp_sts)
                return;

        if (xfer->timestamped)
                return;

        if (progress < xfer->ptp_sts_word_post)
                return;

        ptp_read_system_postts(xfer->ptp_sts);

        if (irqs_off) {
                local_irq_restore(ctlr->irq_flags);
                preempt_enable();
        }

        /* Capture the resolution of the timestamp */
        xfer->ptp_sts_word_post = progress;

        xfer->timestamped = 1;
}
EXPORT_SYMBOL_GPL(spi_take_timestamp_post);

/**
 * spi_set_thread_rt - set the controller to pump at realtime priority
 * @ctlr: controller to boost priority of
 *
 * This can be called because the controller requested realtime priority
 * (by setting the ->rt value before calling spi_register_controller()) or
 * because a device on the bus said that its transfers needed realtime
 * priority.
 *
 * NOTE: at the moment if any device on a bus says it needs realtime then
 * the thread will be at realtime priority for all transfers on that
 * controller.  If this eventually becomes a problem we may see if we can
 * find a way to boost the priority only temporarily during relevant
 * transfers.
 */
static void spi_set_thread_rt(struct spi_controller *ctlr)
{
        dev_info(&ctlr->dev,
                "will run message pump with realtime priority\n");
        sched_set_fifo(ctlr->kworker->task);
}

static int spi_init_queue(struct spi_controller *ctlr)
{
        ctlr->running = false;
        ctlr->busy = false;
        ctlr->queue_empty = true;

        ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev));
        if (IS_ERR(ctlr->kworker)) {
                dev_err(&ctlr->dev, "failed to create message pump kworker\n");
                return PTR_ERR(ctlr->kworker);
        }

        kthread_init_work(&ctlr->pump_messages, spi_pump_messages);

        /*
         * Controller config will indicate if this controller should run the
         * message pump with high (realtime) priority to reduce the transfer
         * latency on the bus by minimising the delay between a transfer
         * request and the scheduling of the message pump thread. Without this
         * setting the message pump thread will remain at default priority.
         */
        if (ctlr->rt)
                spi_set_thread_rt(ctlr);

        return 0;
}

/**
 * spi_get_next_queued_message() - called by driver to check for queued
 * messages
 * @ctlr: the controller to check for queued messages
 *
 * If there are more messages in the queue, the next message is returned from
 * this call.
 *
 * Return: the next message in the queue, else NULL if the queue is empty.
 */
struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr)
{
        struct spi_message *next;
        unsigned long flags;

        /* Get a pointer to the next message, if any */
        spin_lock_irqsave(&ctlr->queue_lock, flags);
        next = list_first_entry_or_null(&ctlr->queue, struct spi_message,
                                        queue);
        spin_unlock_irqrestore(&ctlr->queue_lock, flags);

        return next;
}
EXPORT_SYMBOL_GPL(spi_get_next_queued_message);

/*
 * __spi_unoptimize_message - shared implementation of spi_unoptimize_message()
 *                            and spi_maybe_unoptimize_message()
 * @msg: the message to unoptimize
 *
 * Peripheral drivers should use spi_unoptimize_message() and callers inside
 * core should use spi_maybe_unoptimize_message() rather than calling this
 * function directly.
 *
 * It is not valid to call this on a message that is not currently optimized.
 */
static void __spi_unoptimize_message(struct spi_message *msg)
{
        struct spi_controller *ctlr = msg->spi->controller;

        if (ctlr->unoptimize_message)
                ctlr->unoptimize_message(msg);

        spi_res_release(ctlr, msg);

        msg->optimized = false;
        msg->opt_state = NULL;
}

/*
 * spi_maybe_unoptimize_message - unoptimize msg not managed by a peripheral
 * @msg: the message to unoptimize
 *
 * This function is used to unoptimize a message if and only if it was
 * optimized by the core (via spi_maybe_optimize_message()).
 */
static void spi_maybe_unoptimize_message(struct spi_message *msg)
{
        if (!msg->pre_optimized && msg->optimized)
                __spi_unoptimize_message(msg);
}

/**
 * spi_finalize_current_message() - the current message is complete
 * @ctlr: the controller to return the message to
 *
 * Called by the driver to notify the core that the message in the front of the
 * queue is complete and can be removed from the queue.
 */
void spi_finalize_current_message(struct spi_controller *ctlr)
{
        struct spi_transfer *xfer;
        struct spi_message *mesg;
        int ret;

        mesg = ctlr->cur_msg;

        if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
                list_for_each_entry(xfer, &mesg->transfers, transfer_list) {
                        ptp_read_system_postts(xfer->ptp_sts);
                        xfer->ptp_sts_word_post = xfer->len;
                }
        }

        if (unlikely(ctlr->ptp_sts_supported))
                list_for_each_entry(xfer, &mesg->transfers, transfer_list)
                        WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped);

        spi_unmap_msg(ctlr, mesg);

        if (mesg->prepared && ctlr->unprepare_message) {
                ret = ctlr->unprepare_message(ctlr, mesg);
                if (ret) {
                        dev_err(&ctlr->dev, "failed to unprepare message: %d\n",
                                ret);
                }
        }

        mesg->prepared = false;

        spi_maybe_unoptimize_message(mesg);

        WRITE_ONCE(ctlr->cur_msg_incomplete, false);
        smp_mb(); /* See __spi_pump_transfer_message()... */
        if (READ_ONCE(ctlr->cur_msg_need_completion))
                complete(&ctlr->cur_msg_completion);

        trace_spi_message_done(mesg);

        mesg->state = NULL;
        if (mesg->complete)
                mesg->complete(mesg->context);
}
EXPORT_SYMBOL_GPL(spi_finalize_current_message);

static int spi_start_queue(struct spi_controller *ctlr)
{
        unsigned long flags;

        spin_lock_irqsave(&ctlr->queue_lock, flags);

        if (ctlr->running || ctlr->busy) {
                spin_unlock_irqrestore(&ctlr->queue_lock, flags);
                return -EBUSY;
        }

        ctlr->running = true;
        ctlr->cur_msg = NULL;
        spin_unlock_irqrestore(&ctlr->queue_lock, flags);

        kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);

        return 0;
}

static int spi_stop_queue(struct spi_controller *ctlr)
{
        unsigned long flags;
        unsigned limit = 500;
        int ret = 0;

        spin_lock_irqsave(&ctlr->queue_lock, flags);

        /*
         * This is a bit lame, but is optimized for the common execution path.
         * A wait_queue on the ctlr->busy could be used, but then the common
         * execution path (pump_messages) would be required to call wake_up or
         * friends on every SPI message. Do this instead.
         */
        while ((!list_empty(&ctlr->queue) || ctlr->busy) && limit--) {
                spin_unlock_irqrestore(&ctlr->queue_lock, flags);
                usleep_range(10000, 11000);
                spin_lock_irqsave(&ctlr->queue_lock, flags);
        }

        if (!list_empty(&ctlr->queue) || ctlr->busy)
                ret = -EBUSY;
        else
                ctlr->running = false;

        spin_unlock_irqrestore(&ctlr->queue_lock, flags);

        return ret;
}

static int spi_destroy_queue(struct spi_controller *ctlr)
{
        int ret;

        ret = spi_stop_queue(ctlr);

        /*
         * kthread_flush_worker will block until all work is done.
         * If the reason that stop_queue timed out is that the work will never
         * finish, then it does no good to call flush/stop thread, so
         * return anyway.
         */
        if (ret) {
                dev_err(&ctlr->dev, "problem destroying queue\n");
                return ret;
        }

        kthread_destroy_worker(ctlr->kworker);

        return 0;
}

static int __spi_queued_transfer(struct spi_device *spi,
                                 struct spi_message *msg,
                                 bool need_pump)
{
        struct spi_controller *ctlr = spi->controller;
        unsigned long flags;

        spin_lock_irqsave(&ctlr->queue_lock, flags);

        if (!ctlr->running) {
                spin_unlock_irqrestore(&ctlr->queue_lock, flags);
                return -ESHUTDOWN;
        }
        msg->actual_length = 0;
        msg->status = -EINPROGRESS;

        list_add_tail(&msg->queue, &ctlr->queue);
        ctlr->queue_empty = false;
        if (!ctlr->busy && need_pump)
                kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);

        spin_unlock_irqrestore(&ctlr->queue_lock, flags);
        return 0;
}

/**
 * spi_queued_transfer - transfer function for queued transfers
 * @spi: SPI device which is requesting transfer
 * @msg: SPI message which is to handled is queued to driver queue
 *
 * Return: zero on success, else a negative error code.
 */
static int spi_queued_transfer(struct spi_device *spi, struct spi_message *msg)
{
        return __spi_queued_transfer(spi, msg, true);
}

static int spi_controller_initialize_queue(struct spi_controller *ctlr)
{
        int ret;

        ctlr->transfer = spi_queued_transfer;
        if (!ctlr->transfer_one_message)
                ctlr->transfer_one_message = spi_transfer_one_message;

        /* Initialize and start queue */
        ret = spi_init_queue(ctlr);
        if (ret) {
                dev_err(&ctlr->dev, "problem initializing queue\n");
                goto err_init_queue;
        }
        ctlr->queued = true;
        ret = spi_start_queue(ctlr);
        if (ret) {
                dev_err(&ctlr->dev, "problem starting queue\n");
                goto err_start_queue;
        }

        return 0;

err_start_queue:
        spi_destroy_queue(ctlr);
err_init_queue:
        return ret;
}

/**
 * spi_flush_queue - Send all pending messages in the queue from the callers'
 *                     context
 * @ctlr: controller to process queue for
 *
 * This should be used when one wants to ensure all pending messages have been
 * sent before doing something. Is used by the spi-mem code to make sure SPI
 * memory operations do not preempt regular SPI transfers that have been queued
 * before the spi-mem operation.
 */
void spi_flush_queue(struct spi_controller *ctlr)
{
        if (ctlr->transfer == spi_queued_transfer)
                __spi_pump_messages(ctlr, false);
}

/*-------------------------------------------------------------------------*/

#if defined(CONFIG_OF)
static void of_spi_parse_dt_cs_delay(struct device_node *nc,
                                     struct spi_delay *delay, const char *prop)
{
        u32 value;

        if (!of_property_read_u32(nc, prop, &value)) {
                if (value > U16_MAX) {
                        delay->value = DIV_ROUND_UP(value, 1000);
                        delay->unit = SPI_DELAY_UNIT_USECS;
                } else {
                        delay->value = value;
                        delay->unit = SPI_DELAY_UNIT_NSECS;
                }
        }
}

static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
                           struct device_node *nc)
{
        u32 value, cs[SPI_CS_CNT_MAX];
        int rc, idx;

        /* Mode (clock phase/polarity/etc.) */
        if (of_property_read_bool(nc, "spi-cpha"))
                spi->mode |= SPI_CPHA;
        if (of_property_read_bool(nc, "spi-cpol"))
                spi->mode |= SPI_CPOL;
        if (of_property_read_bool(nc, "spi-3wire"))
                spi->mode |= SPI_3WIRE;
        if (of_property_read_bool(nc, "spi-lsb-first"))
                spi->mode |= SPI_LSB_FIRST;
        if (of_property_read_bool(nc, "spi-cs-high"))
                spi->mode |= SPI_CS_HIGH;

        /* Device DUAL/QUAD mode */
        if (!of_property_read_u32(nc, "spi-tx-bus-width", &value)) {
                switch (value) {
                case 0:
                        spi->mode |= SPI_NO_TX;
                        break;
                case 1:
                        break;
                case 2:
                        spi->mode |= SPI_TX_DUAL;
                        break;
                case 4:
                        spi->mode |= SPI_TX_QUAD;
                        break;
                case 8:
                        spi->mode |= SPI_TX_OCTAL;
                        break;
                default:
                        dev_warn(&ctlr->dev,
                                "spi-tx-bus-width %d not supported\n",
                                value);
                        break;
                }
        }

        if (!of_property_read_u32(nc, "spi-rx-bus-width", &value)) {
                switch (value) {
                case 0:
                        spi->mode |= SPI_NO_RX;
                        break;
                case 1:
                        break;
                case 2:
                        spi->mode |= SPI_RX_DUAL;
                        break;
                case 4:
                        spi->mode |= SPI_RX_QUAD;
                        break;
                case 8:
                        spi->mode |= SPI_RX_OCTAL;
                        break;
                default:
                        dev_warn(&ctlr->dev,
                                "spi-rx-bus-width %d not supported\n",
                                value);
                        break;
                }
        }

        if (spi_controller_is_slave(ctlr)) {
                if (!of_node_name_eq(nc, "slave")) {
                        dev_err(&ctlr->dev, "%pOF is not called 'slave'\n",
                                nc);
                        return -EINVAL;
                }
                return 0;
        }

        if (ctlr->num_chipselect > SPI_CS_CNT_MAX) {
                dev_err(&ctlr->dev, "No. of CS is more than max. no. of supported CS\n");
                return -EINVAL;
        }

        spi_set_all_cs_unused(spi);

        /* Device address */
        rc = of_property_read_variable_u32_array(nc, "reg", &cs[0], 1,
                                                 SPI_CS_CNT_MAX);
        if (rc < 0) {
                dev_err(&ctlr->dev, "%pOF has no valid 'reg' property (%d)\n",
                        nc, rc);
                return rc;
        }
        if (rc > ctlr->num_chipselect) {
                dev_err(&ctlr->dev, "%pOF has number of CS > ctlr->num_chipselect (%d)\n",
                        nc, rc);
                return rc;
        }
        if ((of_property_read_bool(nc, "parallel-memories")) &&
            (!(ctlr->flags & SPI_CONTROLLER_MULTI_CS))) {
                dev_err(&ctlr->dev, "SPI controller doesn't support multi CS\n");
                return -EINVAL;
        }
        for (idx = 0; idx < rc; idx++)
                spi_set_chipselect(spi, idx, cs[idx]);

        /*
         * By default spi->chip_select[0] will hold the physical CS number,
         * so set bit 0 in spi->cs_index_mask.
         */
        spi->cs_index_mask = BIT(0);

        /* Device speed */
        if (!of_property_read_u32(nc, "spi-max-frequency", &value))
                spi->max_speed_hz = value;

        /* Device CS delays */
        of_spi_parse_dt_cs_delay(nc, &spi->cs_setup, "spi-cs-setup-delay-ns");
        of_spi_parse_dt_cs_delay(nc, &spi->cs_hold, "spi-cs-hold-delay-ns");
        of_spi_parse_dt_cs_delay(nc, &spi->cs_inactive, "spi-cs-inactive-delay-ns");

        return 0;
}

static struct spi_device *
of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc)
{
        struct spi_device *spi;
        int rc;

        /* Alloc an spi_device */
        spi = spi_alloc_device(ctlr);
        if (!spi) {
                dev_err(&ctlr->dev, "spi_device alloc error for %pOF\n", nc);
                rc = -ENOMEM;
                goto err_out;
        }

        /* Select device driver */
        rc = of_alias_from_compatible(nc, spi->modalias,
                                      sizeof(spi->modalias));
        if (rc < 0) {
                dev_err(&ctlr->dev, "cannot find modalias for %pOF\n", nc);
                goto err_out;
        }

        rc = of_spi_parse_dt(ctlr, spi, nc);
        if (rc)
                goto err_out;

        /* Store a pointer to the node in the device structure */
        of_node_get(nc);

        device_set_node(&spi->dev, of_fwnode_handle(nc));

        /* Register the new device */
        rc = spi_add_device(spi);
        if (rc) {
                dev_err(&ctlr->dev, "spi_device register error %pOF\n", nc);
                goto err_of_node_put;
        }

        return spi;

err_of_node_put:
        of_node_put(nc);
err_out:
        spi_dev_put(spi);
        return ERR_PTR(rc);
}

/**
 * of_register_spi_devices() - Register child devices onto the SPI bus
 * @ctlr:        Pointer to spi_controller device
 *
 * Registers an spi_device for each child node of controller node which
 * represents a valid SPI slave.
 */
static void of_register_spi_devices(struct spi_controller *ctlr)
{
        struct spi_device *spi;
        struct device_node *nc;

        for_each_available_child_of_node(ctlr->dev.of_node, nc) {
                if (of_node_test_and_set_flag(nc, OF_POPULATED))
                        continue;
                spi = of_register_spi_device(ctlr, nc);
                if (IS_ERR(spi)) {
                        dev_warn(&ctlr->dev,
                                 "Failed to create SPI device for %pOF\n", nc);
                        of_node_clear_flag(nc, OF_POPULATED);
                }
        }
}
#else
static void of_register_spi_devices(struct spi_controller *ctlr) { }
#endif

/**
 * spi_new_ancillary_device() - Register ancillary SPI device
 * @spi:         Pointer to the main SPI device registering the ancillary device
 * @chip_select: Chip Select of the ancillary device
 *
 * Register an ancillary SPI device; for example some chips have a chip-select
 * for normal device usage and another one for setup/firmware upload.
 *
 * This may only be called from main SPI device's probe routine.
 *
 * Return: 0 on success; negative errno on failure
 */
struct spi_device *spi_new_ancillary_device(struct spi_device *spi,
                                             u8 chip_select)
{
        struct spi_controller *ctlr = spi->controller;
        struct spi_device *ancillary;
        int rc = 0;

        /* Alloc an spi_device */
        ancillary = spi_alloc_device(ctlr);
        if (!ancillary) {
                rc = -ENOMEM;
                goto err_out;
        }

        strscpy(ancillary->modalias, "dummy", sizeof(ancillary->modalias));

        /* Use provided chip-select for ancillary device */
        spi_set_all_cs_unused(ancillary);
        spi_set_chipselect(ancillary, 0, chip_select);

        /* Take over SPI mode/speed from SPI main device */
        ancillary->max_speed_hz = spi->max_speed_hz;
        ancillary->mode = spi->mode;
        /*
         * By default spi->chip_select[0] will hold the physical CS number,
         * so set bit 0 in spi->cs_index_mask.
         */
        ancillary->cs_index_mask = BIT(0);

        WARN_ON(!mutex_is_locked(&ctlr->add_lock));

        /* Register the new device */
        rc = __spi_add_device(ancillary);
        if (rc) {
                dev_err(&spi->dev, "failed to register ancillary device\n");
                goto err_out;
        }

        return ancillary;

err_out:
        spi_dev_put(ancillary);
        return ERR_PTR(rc);
}
EXPORT_SYMBOL_GPL(spi_new_ancillary_device);

#ifdef CONFIG_ACPI
struct acpi_spi_lookup {
        struct spi_controller         *ctlr;
        u32                        max_speed_hz;
        u32                        mode;
        int                        irq;
        u8                        bits_per_word;
        u8                        chip_select;
        int                        n;
        int                        index;
};

static int acpi_spi_count(struct acpi_resource *ares, void *data)
{
        struct acpi_resource_spi_serialbus *sb;
        int *count = data;

        if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
                return 1;

        sb = &ares->data.spi_serial_bus;
        if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_SPI)
                return 1;

        *count = *count + 1;

        return 1;
}

/**
 * acpi_spi_count_resources - Count the number of SpiSerialBus resources
 * @adev:        ACPI device
 *
 * Return: the number of SpiSerialBus resources in the ACPI-device's
 * resource-list; or a negative error code.
 */
int acpi_spi_count_resources(struct acpi_device *adev)
{
        LIST_HEAD(r);
        int count = 0;
        int ret;

        ret = acpi_dev_get_resources(adev, &r, acpi_spi_count, &count);
        if (ret < 0)
                return ret;

        acpi_dev_free_resource_list(&r);

        return count;
}
EXPORT_SYMBOL_GPL(acpi_spi_count_resources);

static void acpi_spi_parse_apple_properties(struct acpi_device *dev,
                                            struct acpi_spi_lookup *lookup)
{
        const union acpi_object *obj;

        if (!x86_apple_machine)
                return;

        if (!acpi_dev_get_property(dev, "spiSclkPeriod", ACPI_TYPE_BUFFER, &obj)
            && obj->buffer.length >= 4)
                lookup->max_speed_hz  = NSEC_PER_SEC / *(u32 *)obj->buffer.pointer;

        if (!acpi_dev_get_property(dev, "spiWordSize", ACPI_TYPE_BUFFER, &obj)
            && obj->buffer.length == 8)
                lookup->bits_per_word = *(u64 *)obj->buffer.pointer;

        if (!acpi_dev_get_property(dev, "spiBitOrder", ACPI_TYPE_BUFFER, &obj)
            && obj->buffer.length == 8 && !*(u64 *)obj->buffer.pointer)
                lookup->mode |= SPI_LSB_FIRST;

        if (!acpi_dev_get_property(dev, "spiSPO", ACPI_TYPE_BUFFER, &obj)
            && obj->buffer.length == 8 &&  *(u64 *)obj->buffer.pointer)
                lookup->mode |= SPI_CPOL;

        if (!acpi_dev_get_property(dev, "spiSPH", ACPI_TYPE_BUFFER, &obj)
            && obj->buffer.length == 8 &&  *(u64 *)obj->buffer.pointer)
                lookup->mode |= SPI_CPHA;
}

static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
{
        struct acpi_spi_lookup *lookup = data;
        struct spi_controller *ctlr = lookup->ctlr;

        if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
                struct acpi_resource_spi_serialbus *sb;
                acpi_handle parent_handle;
                acpi_status status;

                sb = &ares->data.spi_serial_bus;
                if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_SPI) {

                        if (lookup->index != -1 && lookup->n++ != lookup->index)
                                return 1;

                        status = acpi_get_handle(NULL,
                                                 sb->resource_source.string_ptr,
                                                 &parent_handle);

                        if (ACPI_FAILURE(status))
                                return -ENODEV;

                        if (ctlr) {
                                if (ACPI_HANDLE(ctlr->dev.parent) != parent_handle)
                                        return -ENODEV;
                        } else {
                                struct acpi_device *adev;

                                adev = acpi_fetch_acpi_dev(parent_handle);
                                if (!adev)
                                        return -ENODEV;

                                ctlr = acpi_spi_find_controller_by_adev(adev);
                                if (!ctlr)
                                        return -EPROBE_DEFER;

                                lookup->ctlr = ctlr;
                        }

                        /*
                         * ACPI DeviceSelection numbering is handled by the
                         * host controller driver in Windows and can vary
                         * from driver to driver. In Linux we always expect
                         * 0 .. max - 1 so we need to ask the driver to
                         * translate between the two schemes.
                         */
                        if (ctlr->fw_translate_cs) {
                                int cs = ctlr->fw_translate_cs(ctlr,
                                                sb->device_selection);
                                if (cs < 0)
                                        return cs;
                                lookup->chip_select = cs;
                        } else {
                                lookup->chip_select = sb->device_selection;
                        }

                        lookup->max_speed_hz = sb->connection_speed;
                        lookup->bits_per_word = sb->data_bit_length;

                        if (sb->clock_phase == ACPI_SPI_SECOND_PHASE)
                                lookup->mode |= SPI_CPHA;
                        if (sb->clock_polarity == ACPI_SPI_START_HIGH)
                                lookup->mode |= SPI_CPOL;
                        if (sb->device_polarity == ACPI_SPI_ACTIVE_HIGH)
                                lookup->mode |= SPI_CS_HIGH;
                }
        } else if (lookup->irq < 0) {
                struct resource r;

                if (acpi_dev_resource_interrupt(ares, 0, &r))
                        lookup->irq = r.start;
        }

        /* Always tell the ACPI core to skip this resource */
        return 1;
}

/**
 * acpi_spi_device_alloc - Allocate a spi device, and fill it in with ACPI information
 * @ctlr: controller to which the spi device belongs
 * @adev: ACPI Device for the spi device
 * @index: Index of the spi resource inside the ACPI Node
 *
 * This should be used to allocate a new SPI device from and ACPI Device node.
 * The caller is responsible for calling spi_add_device to register the SPI device.
 *
 * If ctlr is set to NULL, the Controller for the SPI device will be looked up
 * using the resource.
 * If index is set to -1, index is not used.
 * Note: If index is -1, ctlr must be set.
 *
 * Return: a pointer to the new device, or ERR_PTR on error.
 */
struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr,
                                         struct acpi_device *adev,
                                         int index)
{
        acpi_handle parent_handle = NULL;
        struct list_head resource_list;
        struct acpi_spi_lookup lookup = {};
        struct spi_device *spi;
        int ret;

        if (!ctlr && index == -1)
                return ERR_PTR(-EINVAL);

        lookup.ctlr                = ctlr;
        lookup.irq                = -1;
        lookup.index                = index;
        lookup.n                = 0;

        INIT_LIST_HEAD(&resource_list);
        ret = acpi_dev_get_resources(adev, &resource_list,
                                     acpi_spi_add_resource, &lookup);
        acpi_dev_free_resource_list(&resource_list);

        if (ret < 0)
                /* Found SPI in _CRS but it points to another controller */
                return ERR_PTR(ret);

        if (!lookup.max_speed_hz &&
            ACPI_SUCCESS(acpi_get_parent(adev->handle, &parent_handle)) &&
            ACPI_HANDLE(lookup.ctlr->dev.parent) == parent_handle) {
                /* Apple does not use _CRS but nested devices for SPI slaves */
                acpi_spi_parse_apple_properties(adev, &lookup);
        }

        if (!lookup.max_speed_hz)
                return ERR_PTR(-ENODEV);

        spi = spi_alloc_device(lookup.ctlr);
        if (!spi) {
                dev_err(&lookup.ctlr->dev, "failed to allocate SPI device for %s\n",
                        dev_name(&adev->dev));
                return ERR_PTR(-ENOMEM);
        }

        spi_set_all_cs_unused(spi);
        spi_set_chipselect(spi, 0, lookup.chip_select);

        ACPI_COMPANION_SET(&spi->dev, adev);
        spi->max_speed_hz        = lookup.max_speed_hz;
        spi->mode                |= lookup.mode;
        spi->irq                = lookup.irq;
        spi->bits_per_word        = lookup.bits_per_word;
        /*
         * By default spi->chip_select[0] will hold the physical CS number,
         * so set bit 0 in spi->cs_index_mask.
         */
        spi->cs_index_mask        = BIT(0);

        return spi;
}
EXPORT_SYMBOL_GPL(acpi_spi_device_alloc);

static acpi_status acpi_register_spi_device(struct spi_controller *ctlr,
                                            struct acpi_device *adev)
{
        struct spi_device *spi;

        if (acpi_bus_get_status(adev) || !adev->status.present ||
            acpi_device_enumerated(adev))
                return AE_OK;

        spi = acpi_spi_device_alloc(ctlr, adev, -1);
        if (IS_ERR(spi)) {
                if (PTR_ERR(spi) == -ENOMEM)
                        return AE_NO_MEMORY;
                else
                        return AE_OK;
        }

        acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias,
                          sizeof(spi->modalias));

        if (spi->irq < 0)
                spi->irq = acpi_dev_gpio_irq_get(adev, 0);

        acpi_device_set_enumerated(adev);

        adev->power.flags.ignore_parent = true;
        if (spi_add_device(spi)) {
                adev->power.flags.ignore_parent = false;
                dev_err(&ctlr->dev, "failed to add SPI device %s from ACPI\n",
                        dev_name(&adev->dev));
                spi_dev_put(spi);
        }

        return AE_OK;
}

static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
                                       void *data, void **return_value)
{
        struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
        struct spi_controller *ctlr = data;

        if (!adev)
                return AE_OK;

        return acpi_register_spi_device(ctlr, adev);
}

#define SPI_ACPI_ENUMERATE_MAX_DEPTH                32

static void acpi_register_spi_devices(struct spi_controller *ctlr)
{
        acpi_status status;
        acpi_handle handle;

        handle = ACPI_HANDLE(ctlr->dev.parent);
        if (!handle)
                return;

        status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
                                     SPI_ACPI_ENUMERATE_MAX_DEPTH,
                                     acpi_spi_add_device, NULL, ctlr, NULL);
        if (ACPI_FAILURE(status))
                dev_warn(&ctlr->dev, "failed to enumerate SPI slaves\n");
}
#else
static inline void acpi_register_spi_devices(struct spi_controller *ctlr) {}
#endif /* CONFIG_ACPI */

static void spi_controller_release(struct device *dev)
{
        struct spi_controller *ctlr;

        ctlr = container_of(dev, struct spi_controller, dev);
        kfree(ctlr);
}

static struct class spi_master_class = {
        .name                = "spi_master",
        .dev_release        = spi_controller_release,
        .dev_groups        = spi_master_groups,
};

#ifdef CONFIG_SPI_SLAVE
/**
 * spi_slave_abort - abort the ongoing transfer request on an SPI slave
 *                     controller
 * @spi: device used for the current transfer
 */
int spi_slave_abort(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;

        if (spi_controller_is_slave(ctlr) && ctlr->slave_abort)
                return ctlr->slave_abort(ctlr);

        return -ENOTSUPP;
}
EXPORT_SYMBOL_GPL(spi_slave_abort);

int spi_target_abort(struct spi_device *spi)
{
        struct spi_controller *ctlr = spi->controller;

        if (spi_controller_is_target(ctlr) && ctlr->target_abort)
                return ctlr->target_abort(ctlr);

        return -ENOTSUPP;
}
EXPORT_SYMBOL_GPL(spi_target_abort);

static ssize_t slave_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct spi_controller *ctlr = container_of(dev, struct spi_controller,
                                                   dev);
        struct device *child;

        child = device_find_any_child(&ctlr->dev);
        return sysfs_emit(buf, "%s\n", child ? to_spi_device(child)->modalias : NULL);
}

static ssize_t slave_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count)
{
        struct spi_controller *ctlr = container_of(dev, struct spi_controller,
                                                   dev);
        struct spi_device *spi;
        struct device *child;
        char name[32];
        int rc;

        rc = sscanf(buf, "%31s", name);
        if (rc != 1 || !name[0])
                return -EINVAL;

        child = device_find_any_child(&ctlr->dev);
        if (child) {
                /* Remove registered slave */
                device_unregister(child);
                put_device(child);
        }

        if (strcmp(name, "(null)")) {
                /* Register new slave */
                spi = spi_alloc_device(ctlr);
                if (!spi)
                        return -ENOMEM;

                strscpy(spi->modalias, name, sizeof(spi->modalias));

                rc = spi_add_device(spi);
                if (rc) {
                        spi_dev_put(spi);
                        return rc;
                }
        }

        return count;
}

static DEVICE_ATTR_RW(slave);

static struct attribute *spi_slave_attrs[] = {
        &dev_attr_slave.attr,
        NULL,
};

static const struct attribute_group spi_slave_group = {
        .attrs = spi_slave_attrs,
};

static const struct attribute_group *spi_slave_groups[] = {
        &spi_controller_statistics_group,
        &spi_slave_group,
        NULL,
};

static struct class spi_slave_class = {
        .name                = "spi_slave",
        .dev_release        = spi_controller_release,
        .dev_groups        = spi_slave_groups,
};
#else
extern struct class spi_slave_class;        /* dummy */
#endif

/**
 * __spi_alloc_controller - allocate an SPI master or slave controller
 * @dev: the controller, possibly using the platform_bus
 * @size: how much zeroed driver-private data to allocate; the pointer to this
 *        memory is in the driver_data field of the returned device, accessible
 *        with spi_controller_get_devdata(); the memory is cacheline aligned;
 *        drivers granting DMA access to portions of their private data need to
 *        round up @size using ALIGN(size, dma_get_cache_alignment()).
 * @slave: flag indicating whether to allocate an SPI master (false) or SPI
 *        slave (true) controller
 * Context: can sleep
 *
 * This call is used only by SPI controller drivers, which are the
 * only ones directly touching chip registers.  It's how they allocate
 * an spi_controller structure, prior to calling spi_register_controller().
 *
 * This must be called from context that can sleep.
 *
 * The caller is responsible for assigning the bus number and initializing the
 * controller's methods before calling spi_register_controller(); and (after
 * errors adding the device) calling spi_controller_put() to prevent a memory
 * leak.
 *
 * Return: the SPI controller structure on success, else NULL.
 */
struct spi_controller *__spi_alloc_controller(struct device *dev,
                                              unsigned int size, bool slave)
{
        struct spi_controller        *ctlr;
        size_t ctlr_size = ALIGN(sizeof(*ctlr), dma_get_cache_alignment());

        if (!dev)
                return NULL;

        ctlr = kzalloc(size + ctlr_size, GFP_KERNEL);
        if (!ctlr)
                return NULL;

        device_initialize(&ctlr->dev);
        INIT_LIST_HEAD(&ctlr->queue);
        spin_lock_init(&ctlr->queue_lock);
        spin_lock_init(&ctlr->bus_lock_spinlock);
        mutex_init(&ctlr->bus_lock_mutex);
        mutex_init(&ctlr->io_mutex);
        mutex_init(&ctlr->add_lock);
        ctlr->bus_num = -1;
        ctlr->num_chipselect = 1;
        ctlr->slave = slave;
        if (IS_ENABLED(CONFIG_SPI_SLAVE) && slave)
                ctlr->dev.class = &spi_slave_class;
        else
                ctlr->dev.class = &spi_master_class;
        ctlr->dev.parent = dev;
        pm_suspend_ignore_children(&ctlr->dev, true);
        spi_controller_set_devdata(ctlr, (void *)ctlr + ctlr_size);

        return ctlr;
}
EXPORT_SYMBOL_GPL(__spi_alloc_controller);

static void devm_spi_release_controller(struct device *dev, void *ctlr)
{
        spi_controller_put(*(struct spi_controller **)ctlr);
}

/**
 * __devm_spi_alloc_controller - resource-managed __spi_alloc_controller()
 * @dev: physical device of SPI controller
 * @size: how much zeroed driver-private data to allocate
 * @slave: whether to allocate an SPI master (false) or SPI slave (true)
 * Context: can sleep
 *
 * Allocate an SPI controller and automatically release a reference on it
 * when @dev is unbound from its driver.  Drivers are thus relieved from
 * having to call spi_controller_put().
 *
 * The arguments to this function are identical to __spi_alloc_controller().
 *
 * Return: the SPI controller structure on success, else NULL.
 */
struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
                                                   unsigned int size,
                                                   bool slave)
{
        struct spi_controller **ptr, *ctlr;

        ptr = devres_alloc(devm_spi_release_controller, sizeof(*ptr),
                           GFP_KERNEL);
        if (!ptr)
                return NULL;

        ctlr = __spi_alloc_controller(dev, size, slave);
        if (ctlr) {
                ctlr->devm_allocated = true;
                *ptr = ctlr;
                devres_add(dev, ptr);
        } else {
                devres_free(ptr);
        }

        return ctlr;
}
EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller);

/**
 * spi_get_gpio_descs() - grab chip select GPIOs for the master
 * @ctlr: The SPI master to grab GPIO descriptors for
 */
static int spi_get_gpio_descs(struct spi_controller *ctlr)
{
        int nb, i;
        struct gpio_desc **cs;
        struct device *dev = &ctlr->dev;
        unsigned long native_cs_mask = 0;
        unsigned int num_cs_gpios = 0;

        nb = gpiod_count(dev, "cs");
        if (nb < 0) {
                /* No GPIOs at all is fine, else return the error */
                if (nb == -ENOENT)
                        return 0;
                return nb;
        }

        ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);

        cs = devm_kcalloc(dev, ctlr->num_chipselect, sizeof(*cs),
                          GFP_KERNEL);
        if (!cs)
                return -ENOMEM;
        ctlr->cs_gpiods = cs;

        for (i = 0; i < nb; i++) {
                /*
                 * Most chipselects are active low, the inverted
                 * semantics are handled by special quirks in gpiolib,
                 * so initializing them GPIOD_OUT_LOW here means
                 * "unasserted", in most cases this will drive the physical
                 * line high.
                 */
                cs[i] = devm_gpiod_get_index_optional(dev, "cs", i,
                                                      GPIOD_OUT_LOW);
                if (IS_ERR(cs[i]))
                        return PTR_ERR(cs[i]);

                if (cs[i]) {
                        /*
                         * If we find a CS GPIO, name it after the device and
                         * chip select line.
                         */
                        char *gpioname;

                        gpioname = devm_kasprintf(dev, GFP_KERNEL, "%s CS%d",
                                                  dev_name(dev), i);
                        if (!gpioname)
                                return -ENOMEM;
                        gpiod_set_consumer_name(cs[i], gpioname);
                        num_cs_gpios++;
                        continue;
                }

                if (ctlr->max_native_cs && i >= ctlr->max_native_cs) {
                        dev_err(dev, "Invalid native chip select %d\n", i);
                        return -EINVAL;
                }
                native_cs_mask |= BIT(i);
        }

        ctlr->unused_native_cs = ffs(~native_cs_mask) - 1;

        if ((ctlr->flags & SPI_CONTROLLER_GPIO_SS) && num_cs_gpios &&
            ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) {
                dev_err(dev, "No unused native chip select available\n");
                return -EINVAL;
        }

        return 0;
}

static int spi_controller_check_ops(struct spi_controller *ctlr)
{
        /*
         * The controller may implement only the high-level SPI-memory like
         * operations if it does not support regular SPI transfers, and this is
         * valid use case.
         * If ->mem_ops or ->mem_ops->exec_op is NULL, we request that at least
         * one of the ->transfer_xxx() method be implemented.
         */
        if (!ctlr->mem_ops || !ctlr->mem_ops->exec_op) {
                if (!ctlr->transfer && !ctlr->transfer_one &&
                   !ctlr->transfer_one_message) {
                        return -EINVAL;
                }
        }

        return 0;
}

/* Allocate dynamic bus number using Linux idr */
static int spi_controller_id_alloc(struct spi_controller *ctlr, int start, int end)
{
        int id;

        mutex_lock(&board_lock);
        id = idr_alloc(&spi_master_idr, ctlr, start, end, GFP_KERNEL);
        mutex_unlock(&board_lock);
        if (WARN(id < 0, "couldn't get idr"))
                return id == -ENOSPC ? -EBUSY : id;
        ctlr->bus_num = id;
        return 0;
}

/**
 * spi_register_controller - register SPI master or slave controller
 * @ctlr: initialized master, originally from spi_alloc_master() or
 *        spi_alloc_slave()
 * Context: can sleep
 *
 * SPI controllers connect to their drivers using some non-SPI bus,
 * such as the platform bus.  The final stage of probe() in that code
 * includes calling spi_register_controller() to hook up to this SPI bus glue.
 *
 * SPI controllers use board specific (often SOC specific) bus numbers,
 * and board-specific addressing for SPI devices combines those numbers
 * with chip select numbers.  Since SPI does not directly support dynamic
 * device identification, boards need configuration tables telling which
 * chip is at which address.
 *
 * This must be called from context that can sleep.  It returns zero on
 * success, else a negative error code (dropping the controller's refcount).
 * After a successful return, the caller is responsible for calling
 * spi_unregister_controller().
 *
 * Return: zero on success, else a negative error code.
 */
int spi_register_controller(struct spi_controller *ctlr)
{
        struct device                *dev = ctlr->dev.parent;
        struct boardinfo        *bi;
        int                        first_dynamic;
        int                        status;
        int                        idx;

        if (!dev)
                return -ENODEV;

        /*
         * Make sure all necessary hooks are implemented before registering
         * the SPI controller.
         */
        status = spi_controller_check_ops(ctlr);
        if (status)
                return status;

        if (ctlr->bus_num < 0)
                ctlr->bus_num = of_alias_get_id(ctlr->dev.of_node, "spi");
        if (ctlr->bus_num >= 0) {
                /* Devices with a fixed bus num must check-in with the num */
                status = spi_controller_id_alloc(ctlr, ctlr->bus_num, ctlr->bus_num + 1);
                if (status)
                        return status;
        }
        if (ctlr->bus_num < 0) {
                first_dynamic = of_alias_get_highest_id("spi");
                if (first_dynamic < 0)
                        first_dynamic = 0;
                else
                        first_dynamic++;

                status = spi_controller_id_alloc(ctlr, first_dynamic, 0);
                if (status)
                        return status;
        }
        ctlr->bus_lock_flag = 0;
        init_completion(&ctlr->xfer_completion);
        init_completion(&ctlr->cur_msg_completion);
        if (!ctlr->max_dma_len)
                ctlr->max_dma_len = INT_MAX;

        /*
         * Register the device, then userspace will see it.
         * Registration fails if the bus ID is in use.
         */
        dev_set_name(&ctlr->dev, "spi%u", ctlr->bus_num);

        if (!spi_controller_is_slave(ctlr) && ctlr->use_gpio_descriptors) {
                status = spi_get_gpio_descs(ctlr);
                if (status)
                        goto free_bus_id;
                /*
                 * A controller using GPIO descriptors always
                 * supports SPI_CS_HIGH if need be.
                 */
                ctlr->mode_bits |= SPI_CS_HIGH;
        }

        /*
         * Even if it's just one always-selected device, there must
         * be at least one chipselect.
         */
        if (!ctlr->num_chipselect) {
                status = -EINVAL;
                goto free_bus_id;
        }

        /* Setting last_cs to SPI_INVALID_CS means no chip selected */
        for (idx = 0; idx < SPI_CS_CNT_MAX; idx++)
                ctlr->last_cs[idx] = SPI_INVALID_CS;

        status = device_add(&ctlr->dev);
        if (status < 0)
                goto free_bus_id;
        dev_dbg(dev, "registered %s %s\n",
                        spi_controller_is_slave(ctlr) ? "slave" : "master",
                        dev_name(&ctlr->dev));

        /*
         * If we're using a queued driver, start the queue. Note that we don't
         * need the queueing logic if the driver is only supporting high-level
         * memory operations.
         */
        if (ctlr->transfer) {
                dev_info(dev, "controller is unqueued, this is deprecated\n");
        } else if (ctlr->transfer_one || ctlr->transfer_one_message) {
                status = spi_controller_initialize_queue(ctlr);
                if (status) {
                        device_del(&ctlr->dev);
                        goto free_bus_id;
                }
        }
        /* Add statistics */
        ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev);
        if (!ctlr->pcpu_statistics) {
                dev_err(dev, "Error allocating per-cpu statistics\n");
                status = -ENOMEM;
                goto destroy_queue;
        }

        mutex_lock(&board_lock);
        list_add_tail(&ctlr->list, &spi_controller_list);
        list_for_each_entry(bi, &board_list, list)
                spi_match_controller_to_boardinfo(ctlr, &bi->board_info);
        mutex_unlock(&board_lock);

        /* Register devices from the device tree and ACPI */
        of_register_spi_devices(ctlr);
        acpi_register_spi_devices(ctlr);
        return status;

destroy_queue:
        spi_destroy_queue(ctlr);
free_bus_id:
        mutex_lock(&board_lock);
        idr_remove(&spi_master_idr, ctlr->bus_num);
        mutex_unlock(&board_lock);
        return status;
}
EXPORT_SYMBOL_GPL(spi_register_controller);

static void devm_spi_unregister(struct device *dev, void *res)
{
        spi_unregister_controller(*(struct spi_controller **)res);
}

/**
 * devm_spi_register_controller - register managed SPI master or slave
 *        controller
 * @dev:    device managing SPI controller
 * @ctlr: initialized controller, originally from spi_alloc_master() or
 *        spi_alloc_slave()
 * Context: can sleep
 *
 * Register a SPI device as with spi_register_controller() which will
 * automatically be unregistered and freed.
 *
 * Return: zero on success, else a negative error code.
 */
int devm_spi_register_controller(struct device *dev,
                                 struct spi_controller *ctlr)
{
        struct spi_controller **ptr;
        int ret;

        ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return -ENOMEM;

        ret = spi_register_controller(ctlr);
        if (!ret) {
                *ptr = ctlr;
                devres_add(dev, ptr);
        } else {
                devres_free(ptr);
        }

        return ret;
}
EXPORT_SYMBOL_GPL(devm_spi_register_controller);

static int __unregister(struct device *dev, void *null)
{
        spi_unregister_device(to_spi_device(dev));
        return 0;
}

/**
 * spi_unregister_controller - unregister SPI master or slave controller
 * @ctlr: the controller being unregistered
 * Context: can sleep
 *
 * This call is used only by SPI controller drivers, which are the
 * only ones directly touching chip registers.
 *
 * This must be called from context that can sleep.
 *
 * Note that this function also drops a reference to the controller.
 */
void spi_unregister_controller(struct spi_controller *ctlr)
{
        struct spi_controller *found;
        int id = ctlr->bus_num;

        /* Prevent addition of new devices, unregister existing ones */
        if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
                mutex_lock(&ctlr->add_lock);

        device_for_each_child(&ctlr->dev, NULL, __unregister);

        /* First make sure that this controller was ever added */
        mutex_lock(&board_lock);
        found = idr_find(&spi_master_idr, id);
        mutex_unlock(&board_lock);
        if (ctlr->queued) {
                if (spi_destroy_queue(ctlr))
                        dev_err(&ctlr->dev, "queue remove failed\n");
        }
        mutex_lock(&board_lock);
        list_del(&ctlr->list);
        mutex_unlock(&board_lock);

        device_del(&ctlr->dev);

        /* Free bus id */
        mutex_lock(&board_lock);
        if (found == ctlr)
                idr_remove(&spi_master_idr, id);
        mutex_unlock(&board_lock);

        if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
                mutex_unlock(&ctlr->add_lock);

        /*
         * Release the last reference on the controller if its driver
         * has not yet been converted to devm_spi_alloc_master/slave().
         */
        if (!ctlr->devm_allocated)
                put_device(&ctlr->dev);
}
EXPORT_SYMBOL_GPL(spi_unregister_controller);

static inline int __spi_check_suspended(const struct spi_controller *ctlr)
{
        return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0;
}

static inline void __spi_mark_suspended(struct spi_controller *ctlr)
{
        mutex_lock(&ctlr->bus_lock_mutex);
        ctlr->flags |= SPI_CONTROLLER_SUSPENDED;
        mutex_unlock(&ctlr->bus_lock_mutex);
}

static inline void __spi_mark_resumed(struct spi_controller *ctlr)
{
        mutex_lock(&ctlr->bus_lock_mutex);
        ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED;
        mutex_unlock(&ctlr->bus_lock_mutex);
}

int spi_controller_suspend(struct spi_controller *ctlr)
{
        int ret = 0;

        /* Basically no-ops for non-queued controllers */
        if (ctlr->queued) {
                ret = spi_stop_queue(ctlr);
                if (ret)
                        dev_err(&ctlr->dev, "queue stop failed\n");
        }

        __spi_mark_suspended(ctlr);
        return ret;
}
EXPORT_SYMBOL_GPL(spi_controller_suspend);

int spi_controller_resume(struct spi_controller *ctlr)
{
        int ret = 0;

        __spi_mark_resumed(ctlr);

        if (ctlr->queued) {
                ret = spi_start_queue(ctlr);
                if (ret)
                        dev_err(&ctlr->dev, "queue restart failed\n");
        }
        return ret;
}
EXPORT_SYMBOL_GPL(spi_controller_resume);

/*-------------------------------------------------------------------------*/

/* Core methods for spi_message alterations */

static void __spi_replace_transfers_release(struct spi_controller *ctlr,
                                            struct spi_message *msg,
                                            void *res)
{
        struct spi_replaced_transfers *rxfer = res;
        size_t i;

        /* Call extra callback if requested */
        if (rxfer->release)
                rxfer->release(ctlr, msg, res);

        /* Insert replaced transfers back into the message */
        list_splice(&rxfer->replaced_transfers, rxfer->replaced_after);

        /* Remove the formerly inserted entries */
        for (i = 0; i < rxfer->inserted; i++)
                list_del(&rxfer->inserted_transfers[i].transfer_list);
}

/**
 * spi_replace_transfers - replace transfers with several transfers
 *                         and register change with spi_message.resources
 * @msg:           the spi_message we work upon
 * @xfer_first:    the first spi_transfer we want to replace
 * @remove:        number of transfers to remove
 * @insert:        the number of transfers we want to insert instead
 * @release:       extra release code necessary in some circumstances
 * @extradatasize: extra data to allocate (with alignment guarantees
 *                 of struct @spi_transfer)
 * @gfp:           gfp flags
 *
 * Returns: pointer to @spi_replaced_transfers,
 *          PTR_ERR(...) in case of errors.
 */
static struct spi_replaced_transfers *spi_replace_transfers(
        struct spi_message *msg,
        struct spi_transfer *xfer_first,
        size_t remove,
        size_t insert,
        spi_replaced_release_t release,
        size_t extradatasize,
        gfp_t gfp)
{
        struct spi_replaced_transfers *rxfer;
        struct spi_transfer *xfer;
        size_t i;

        /* Allocate the structure using spi_res */
        rxfer = spi_res_alloc(msg->spi, __spi_replace_transfers_release,
                              struct_size(rxfer, inserted_transfers, insert)
                              + extradatasize,
                              gfp);
        if (!rxfer)
                return ERR_PTR(-ENOMEM);

        /* The release code to invoke before running the generic release */
        rxfer->release = release;

        /* Assign extradata */
        if (extradatasize)
                rxfer->extradata =
                        &rxfer->inserted_transfers[insert];

        /* Init the replaced_transfers list */
        INIT_LIST_HEAD(&rxfer->replaced_transfers);

        /*
         * Assign the list_entry after which we should reinsert
         * the @replaced_transfers - it may be spi_message.messages!
         */
        rxfer->replaced_after = xfer_first->transfer_list.prev;

        /* Remove the requested number of transfers */
        for (i = 0; i < remove; i++) {
                /*
                 * If the entry after replaced_after it is msg->transfers
                 * then we have been requested to remove more transfers
                 * than are in the list.
                 */
                if (rxfer->replaced_after->next == &msg->transfers) {
                        dev_err(&msg->spi->dev,
                                "requested to remove more spi_transfers than are available\n");
                        /* Insert replaced transfers back into the message */
                        list_splice(&rxfer->replaced_transfers,
                                    rxfer->replaced_after);

                        /* Free the spi_replace_transfer structure... */
                        spi_res_free(rxfer);

                        /* ...and return with an error */
                        return ERR_PTR(-EINVAL);
                }

                /*
                 * Remove the entry after replaced_after from list of
                 * transfers and add it to list of replaced_transfers.
                 */
                list_move_tail(rxfer->replaced_after->next,
                               &rxfer->replaced_transfers);
        }

        /*
         * Create copy of the given xfer with identical settings
         * based on the first transfer to get removed.
         */
        for (i = 0; i < insert; i++) {
                /* We need to run in reverse order */
                xfer = &rxfer->inserted_transfers[insert - 1 - i];

                /* Copy all spi_transfer data */
                memcpy(xfer, xfer_first, sizeof(*xfer));

                /* Add to list */
                list_add(&xfer->transfer_list, rxfer->replaced_after);

                /* Clear cs_change and delay for all but the last */
                if (i) {
                        xfer->cs_change = false;
                        xfer->delay.value = 0;
                }
        }

        /* Set up inserted... */
        rxfer->inserted = insert;

        /* ...and register it with spi_res/spi_message */
        spi_res_add(msg, rxfer);

        return rxfer;
}

static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
                                        struct spi_message *msg,
                                        struct spi_transfer **xferp,
                                        size_t maxsize)
{
        struct spi_transfer *xfer = *xferp, *xfers;
        struct spi_replaced_transfers *srt;
        size_t offset;
        size_t count, i;

        /* Calculate how many we have to replace */
        count = DIV_ROUND_UP(xfer->len, maxsize);

        /* Create replacement */
        srt = spi_replace_transfers(msg, xfer, 1, count, NULL, 0, GFP_KERNEL);
        if (IS_ERR(srt))
                return PTR_ERR(srt);
        xfers = srt->inserted_transfers;

        /*
         * Now handle each of those newly inserted spi_transfers.
         * Note that the replacements spi_transfers all are preset
         * to the same values as *xferp, so tx_buf, rx_buf and len
         * are all identical (as well as most others)
         * so we just have to fix up len and the pointers.
         *
         * This also includes support for the depreciated
         * spi_message.is_dma_mapped interface.
         */

        /*
         * The first transfer just needs the length modified, so we
         * run it outside the loop.
         */
        xfers[0].len = min_t(size_t, maxsize, xfer[0].len);

        /* All the others need rx_buf/tx_buf also set */
        for (i = 1, offset = maxsize; i < count; offset += maxsize, i++) {
                /* Update rx_buf, tx_buf and DMA */
                if (xfers[i].rx_buf)
                        xfers[i].rx_buf += offset;
                if (xfers[i].rx_dma)
                        xfers[i].rx_dma += offset;
                if (xfers[i].tx_buf)
                        xfers[i].tx_buf += offset;
                if (xfers[i].tx_dma)
                        xfers[i].tx_dma += offset;

                /* Update length */
                xfers[i].len = min(maxsize, xfers[i].len - offset);
        }

        /*
         * We set up xferp to the last entry we have inserted,
         * so that we skip those already split transfers.
         */
        *xferp = &xfers[count - 1];

        /* Increment statistics counters */
        SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics,
                                       transfers_split_maxsize);
        SPI_STATISTICS_INCREMENT_FIELD(msg->spi->pcpu_statistics,
                                       transfers_split_maxsize);

        return 0;
}

/**
 * spi_split_transfers_maxsize - split spi transfers into multiple transfers
 *                               when an individual transfer exceeds a
 *                               certain size
 * @ctlr:    the @spi_controller for this transfer
 * @msg:   the @spi_message to transform
 * @maxsize:  the maximum when to apply this
 *
 * This function allocates resources that are automatically freed during the
 * spi message unoptimize phase so this function should only be called from
 * optimize_message callbacks.
 *
 * Return: status of transformation
 */
int spi_split_transfers_maxsize(struct spi_controller *ctlr,
                                struct spi_message *msg,
                                size_t maxsize)
{
        struct spi_transfer *xfer;
        int ret;

        /*
         * Iterate over the transfer_list,
         * but note that xfer is advanced to the last transfer inserted
         * to avoid checking sizes again unnecessarily (also xfer does
         * potentially belong to a different list by the time the
         * replacement has happened).
         */
        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                if (xfer->len > maxsize) {
                        ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer,
                                                           maxsize);
                        if (ret)
                                return ret;
                }
        }

        return 0;
}
EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);


/**
 * spi_split_transfers_maxwords - split SPI transfers into multiple transfers
 *                                when an individual transfer exceeds a
 *                                certain number of SPI words
 * @ctlr:     the @spi_controller for this transfer
 * @msg:      the @spi_message to transform
 * @maxwords: the number of words to limit each transfer to
 *
 * This function allocates resources that are automatically freed during the
 * spi message unoptimize phase so this function should only be called from
 * optimize_message callbacks.
 *
 * Return: status of transformation
 */
int spi_split_transfers_maxwords(struct spi_controller *ctlr,
                                 struct spi_message *msg,
                                 size_t maxwords)
{
        struct spi_transfer *xfer;

        /*
         * Iterate over the transfer_list,
         * but note that xfer is advanced to the last transfer inserted
         * to avoid checking sizes again unnecessarily (also xfer does
         * potentially belong to a different list by the time the
         * replacement has happened).
         */
        list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                size_t maxsize;
                int ret;

                maxsize = maxwords * roundup_pow_of_two(BITS_TO_BYTES(xfer->bits_per_word));
                if (xfer->len > maxsize) {
                        ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer,
                                                           maxsize);
                        if (ret)
                                return ret;
                }
        }

        return 0;
}
EXPORT_SYMBOL_GPL(spi_split_transfers_maxwords);

/*-------------------------------------------------------------------------*/

/*
 * Core methods for SPI controller protocol drivers. Some of the
 * other core methods are currently defined as inline functions.
 */

static int __spi_validate_bits_per_word(struct spi_controller *ctlr,
                                        u8 bits_per_word)
{
        if (ctlr->bits_per_word_mask) {
                /* Only 32 bits fit in the mask */
                if (bits_per_word > 32)
                        return -EINVAL;
                if (!(ctlr->bits_per_word_mask & SPI_BPW_MASK(bits_per_word)))
                        return -EINVAL;
        }

        return 0;
}

/**
 * spi_set_cs_timing - configure CS setup, hold, and inactive delays
 * @spi: the device that requires specific CS timing configuration
 *
 * Return: zero on success, else a negative error code.
 */
static int spi_set_cs_timing(struct spi_device *spi)
{
        struct device *parent = spi->controller->dev.parent;
        int status = 0;

        if (spi->controller->set_cs_timing && !spi_get_csgpiod(spi, 0)) {
                if (spi->controller->auto_runtime_pm) {
                        status = pm_runtime_get_sync(parent);
                        if (status < 0) {
                                pm_runtime_put_noidle(parent);
                                dev_err(&spi->controller->dev, "Failed to power device: %d\n",
                                        status);
                                return status;
                        }

                        status = spi->controller->set_cs_timing(spi);
                        pm_runtime_mark_last_busy(parent);
                        pm_runtime_put_autosuspend(parent);
                } else {
                        status = spi->controller->set_cs_timing(spi);
                }
        }
        return status;
}

/**
 * spi_setup - setup SPI mode and clock rate
 * @spi: the device whose settings are being modified
 * Context: can sleep, and no requests are queued to the device
 *
 * SPI protocol drivers may need to update the transfer mode if the
 * device doesn't work with its default.  They may likewise need
 * to update clock rates or word sizes from initial values.  This function
 * changes those settings, and must be called from a context that can sleep.
 * Except for SPI_CS_HIGH, which takes effect immediately, the changes take
 * effect the next time the device is selected and data is transferred to
 * or from it.  When this function returns, the SPI device is deselected.
 *
 * Note that this call will fail if the protocol driver specifies an option
 * that the underlying controller or its driver does not support.  For
 * example, not all hardware supports wire transfers using nine bit words,
 * LSB-first wire encoding, or active-high chipselects.
 *
 * Return: zero on success, else a negative error code.
 */
int spi_setup(struct spi_device *spi)
{
        unsigned        bad_bits, ugly_bits;
        int                status = 0;

        /*
         * Check mode to prevent that any two of DUAL, QUAD and NO_MOSI/MISO
         * are set at the same time.
         */
        if ((hweight_long(spi->mode &
                (SPI_TX_DUAL | SPI_TX_QUAD | SPI_NO_TX)) > 1) ||
            (hweight_long(spi->mode &
                (SPI_RX_DUAL | SPI_RX_QUAD | SPI_NO_RX)) > 1)) {
                dev_err(&spi->dev,
                "setup: can not select any two of dual, quad and no-rx/tx at the same time\n");
                return -EINVAL;
        }
        /* If it is SPI_3WIRE mode, DUAL and QUAD should be forbidden */
        if ((spi->mode & SPI_3WIRE) && (spi->mode &
                (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
                 SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL)))
                return -EINVAL;
        /*
         * Help drivers fail *cleanly* when they need options
         * that aren't supported with their current controller.
         * SPI_CS_WORD has a fallback software implementation,
         * so it is ignored here.
         */
        bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD |
                                 SPI_NO_TX | SPI_NO_RX);
        ugly_bits = bad_bits &
                    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
                     SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL);
        if (ugly_bits) {
                dev_warn(&spi->dev,
                         "setup: ignoring unsupported mode bits %x\n",
                         ugly_bits);
                spi->mode &= ~ugly_bits;
                bad_bits &= ~ugly_bits;
        }
        if (bad_bits) {
                dev_err(&spi->dev, "setup: unsupported mode bits %x\n",
                        bad_bits);
                return -EINVAL;
        }

        if (!spi->bits_per_word) {
                spi->bits_per_word = 8;
        } else {
                /*
                 * Some controllers may not support the default 8 bits-per-word
                 * so only perform the check when this is explicitly provided.
                 */
                status = __spi_validate_bits_per_word(spi->controller,
                                                      spi->bits_per_word);
                if (status)
                        return status;
        }

        if (spi->controller->max_speed_hz &&
            (!spi->max_speed_hz ||
             spi->max_speed_hz > spi->controller->max_speed_hz))
                spi->max_speed_hz = spi->controller->max_speed_hz;

        mutex_lock(&spi->controller->io_mutex);

        if (spi->controller->setup) {
                status = spi->controller->setup(spi);
                if (status) {
                        mutex_unlock(&spi->controller->io_mutex);
                        dev_err(&spi->controller->dev, "Failed to setup device: %d\n",
                                status);
                        return status;
                }
        }

        status = spi_set_cs_timing(spi);
        if (status) {
                mutex_unlock(&spi->controller->io_mutex);
                return status;
        }

        if (spi->controller->auto_runtime_pm && spi->controller->set_cs) {
                status = pm_runtime_resume_and_get(spi->controller->dev.parent);
                if (status < 0) {
                        mutex_unlock(&spi->controller->io_mutex);
                        dev_err(&spi->controller->dev, "Failed to power device: %d\n",
                                status);
                        return status;
                }

                /*
                 * We do not want to return positive value from pm_runtime_get,
                 * there are many instances of devices calling spi_setup() and
                 * checking for a non-zero return value instead of a negative
                 * return value.
                 */
                status = 0;

                spi_set_cs(spi, false, true);
                pm_runtime_mark_last_busy(spi->controller->dev.parent);
                pm_runtime_put_autosuspend(spi->controller->dev.parent);
        } else {
                spi_set_cs(spi, false, true);
        }

        mutex_unlock(&spi->controller->io_mutex);

        if (spi->rt && !spi->controller->rt) {
                spi->controller->rt = true;
                spi_set_thread_rt(spi->controller);
        }

        trace_spi_setup(spi, status);

        dev_dbg(&spi->dev, "setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d\n",
                        spi->mode & SPI_MODE_X_MASK,
                        (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
                        (spi->mode & SPI_LSB_FIRST) ? "lsb, " : "",
                        (spi->mode & SPI_3WIRE) ? "3wire, " : "",
                        (spi->mode & SPI_LOOP) ? "loopback, " : "",
                        spi->bits_per_word, spi->max_speed_hz,
                        status);

        return status;
}
EXPORT_SYMBOL_GPL(spi_setup);

static int _spi_xfer_word_delay_update(struct spi_transfer *xfer,
                                       struct spi_device *spi)
{
        int delay1, delay2;

        delay1 = spi_delay_to_ns(&xfer->word_delay, xfer);
        if (delay1 < 0)
                return delay1;

        delay2 = spi_delay_to_ns(&spi->word_delay, xfer);
        if (delay2 < 0)
                return delay2;

        if (delay1 < delay2)
                memcpy(&xfer->word_delay, &spi->word_delay,
                       sizeof(xfer->word_delay));

        return 0;
}

static int __spi_validate(struct spi_device *spi, struct spi_message *message)
{
        struct spi_controller *ctlr = spi->controller;
        struct spi_transfer *xfer;
        int w_size;

        if (list_empty(&message->transfers))
                return -EINVAL;

        message->spi = spi;

        /*
         * Half-duplex links include original MicroWire, and ones with
         * only one data pin like SPI_3WIRE (switches direction) or where
         * either MOSI or MISO is missing.  They can also be caused by
         * software limitations.
         */
        if ((ctlr->flags & SPI_CONTROLLER_HALF_DUPLEX) ||
            (spi->mode & SPI_3WIRE)) {
                unsigned flags = ctlr->flags;

                list_for_each_entry(xfer, &message->transfers, transfer_list) {
                        if (xfer->rx_buf && xfer->tx_buf)
                                return -EINVAL;
                        if ((flags & SPI_CONTROLLER_NO_TX) && xfer->tx_buf)
                                return -EINVAL;
                        if ((flags & SPI_CONTROLLER_NO_RX) && xfer->rx_buf)
                                return -EINVAL;
                }
        }

        /*
         * Set transfer bits_per_word and max speed as spi device default if
         * it is not set for this transfer.
         * Set transfer tx_nbits and rx_nbits as single transfer default
         * (SPI_NBITS_SINGLE) if it is not set for this transfer.
         * Ensure transfer word_delay is at least as long as that required by
         * device itself.
         */
        message->frame_length = 0;
        list_for_each_entry(xfer, &message->transfers, transfer_list) {
                xfer->effective_speed_hz = 0;
                message->frame_length += xfer->len;
                if (!xfer->bits_per_word)
                        xfer->bits_per_word = spi->bits_per_word;

                if (!xfer->speed_hz)
                        xfer->speed_hz = spi->max_speed_hz;

                if (ctlr->max_speed_hz && xfer->speed_hz > ctlr->max_speed_hz)
                        xfer->speed_hz = ctlr->max_speed_hz;

                if (__spi_validate_bits_per_word(ctlr, xfer->bits_per_word))
                        return -EINVAL;

                /*
                 * SPI transfer length should be multiple of SPI word size
                 * where SPI word size should be power-of-two multiple.
                 */
                if (xfer->bits_per_word <= 8)
                        w_size = 1;
                else if (xfer->bits_per_word <= 16)
                        w_size = 2;
                else
                        w_size = 4;

                /* No partial transfers accepted */
                if (xfer->len % w_size)
                        return -EINVAL;

                if (xfer->speed_hz && ctlr->min_speed_hz &&
                    xfer->speed_hz < ctlr->min_speed_hz)
                        return -EINVAL;

                if (xfer->tx_buf && !xfer->tx_nbits)
                        xfer->tx_nbits = SPI_NBITS_SINGLE;
                if (xfer->rx_buf && !xfer->rx_nbits)
                        xfer->rx_nbits = SPI_NBITS_SINGLE;
                /*
                 * Check transfer tx/rx_nbits:
                 * 1. check the value matches one of single, dual and quad
                 * 2. check tx/rx_nbits match the mode in spi_device
                 */
                if (xfer->tx_buf) {
                        if (spi->mode & SPI_NO_TX)
                                return -EINVAL;
                        if (xfer->tx_nbits != SPI_NBITS_SINGLE &&
                                xfer->tx_nbits != SPI_NBITS_DUAL &&
                                xfer->tx_nbits != SPI_NBITS_QUAD)
                                return -EINVAL;
                        if ((xfer->tx_nbits == SPI_NBITS_DUAL) &&
                                !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
                                return -EINVAL;
                        if ((xfer->tx_nbits == SPI_NBITS_QUAD) &&
                                !(spi->mode & SPI_TX_QUAD))
                                return -EINVAL;
                }
                /* Check transfer rx_nbits */
                if (xfer->rx_buf) {
                        if (spi->mode & SPI_NO_RX)
                                return -EINVAL;
                        if (xfer->rx_nbits != SPI_NBITS_SINGLE &&
                                xfer->rx_nbits != SPI_NBITS_DUAL &&
                                xfer->rx_nbits != SPI_NBITS_QUAD)
                                return -EINVAL;
                        if ((xfer->rx_nbits == SPI_NBITS_DUAL) &&
                                !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
                                return -EINVAL;
                        if ((xfer->rx_nbits == SPI_NBITS_QUAD) &&
                                !(spi->mode & SPI_RX_QUAD))
                                return -EINVAL;
                }

                if (_spi_xfer_word_delay_update(xfer, spi))
                        return -EINVAL;
        }

        message->status = -EINPROGRESS;

        return 0;
}

/*
 * spi_split_transfers - generic handling of transfer splitting
 * @msg: the message to split
 *
 * Under certain conditions, a SPI controller may not support arbitrary
 * transfer sizes or other features required by a peripheral. This function
 * will split the transfers in the message into smaller transfers that are
 * supported by the controller.
 *
 * Controllers with special requirements not covered here can also split
 * transfers in the optimize_message() callback.
 *
 * Context: can sleep
 * Return: zero on success, else a negative error code
 */
static int spi_split_transfers(struct spi_message *msg)
{
        struct spi_controller *ctlr = msg->spi->controller;
        struct spi_transfer *xfer;
        int ret;

        /*
         * If an SPI controller does not support toggling the CS line on each
         * transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
         * for the CS line, we can emulate the CS-per-word hardware function by
         * splitting transfers into one-word transfers and ensuring that
         * cs_change is set for each transfer.
         */
        if ((msg->spi->mode & SPI_CS_WORD) &&
            (!(ctlr->mode_bits & SPI_CS_WORD) || spi_is_csgpiod(msg->spi))) {
                ret = spi_split_transfers_maxwords(ctlr, msg, 1);
                if (ret)
                        return ret;

                list_for_each_entry(xfer, &msg->transfers, transfer_list) {
                        /* Don't change cs_change on the last entry in the list */
                        if (list_is_last(&xfer->transfer_list, &msg->transfers))
                                break;

                        xfer->cs_change = 1;
                }
        } else {
                ret = spi_split_transfers_maxsize(ctlr, msg,
                                                  spi_max_transfer_size(msg->spi));
                if (ret)
                        return ret;
        }

        return 0;
}

/*
 * __spi_optimize_message - shared implementation for spi_optimize_message()
 *                          and spi_maybe_optimize_message()
 * @spi: the device that will be used for the message
 * @msg: the message to optimize
 *
 * Peripheral drivers will call spi_optimize_message() and the spi core will
 * call spi_maybe_optimize_message() instead of calling this directly.
 *
 * It is not valid to call this on a message that has already been optimized.
 *
 * Return: zero on success, else a negative error code
 */
static int __spi_optimize_message(struct spi_device *spi,
                                  struct spi_message *msg)
{
        struct spi_controller *ctlr = spi->controller;
        int ret;

        ret = __spi_validate(spi, msg);
        if (ret)
                return ret;

        ret = spi_split_transfers(msg);
        if (ret)
                return ret;

        if (ctlr->optimize_message) {
                ret = ctlr->optimize_message(msg);
                if (ret) {
                        spi_res_release(ctlr, msg);
                        return ret;
                }
        }

        msg->optimized = true;

        return 0;
}

/*
 * spi_maybe_optimize_message - optimize message if it isn't already pre-optimized
 * @spi: the device that will be used for the message
 * @msg: the message to optimize
 * Return: zero on success, else a negative error code
 */
static int spi_maybe_optimize_message(struct spi_device *spi,
                                      struct spi_message *msg)
{
        if (msg->pre_optimized)
                return 0;

        return __spi_optimize_message(spi, msg);
}

/**
 * spi_optimize_message - do any one-time validation and setup for a SPI message
 * @spi: the device that will be used for the message
 * @msg: the message to optimize
 *
 * Peripheral drivers that reuse the same message repeatedly may call this to
 * perform as much message prep as possible once, rather than repeating it each
 * time a message transfer is performed to improve throughput and reduce CPU
 * usage.
 *
 * Once a message has been optimized, it cannot be modified with the exception
 * of updating the contents of any xfer->tx_buf (the pointer can't be changed,
 * only the data in the memory it points to).
 *
 * Calls to this function must be balanced with calls to spi_unoptimize_message()
 * to avoid leaking resources.
 *
 * Context: can sleep
 * Return: zero on success, else a negative error code
 */
int spi_optimize_message(struct spi_device *spi, struct spi_message *msg)
{
        int ret;

        ret = __spi_optimize_message(spi, msg);
        if (ret)
                return ret;

        /*
         * This flag indicates that the peripheral driver called spi_optimize_message()
         * and therefore we shouldn't unoptimize message automatically when finalizing
         * the message but rather wait until spi_unoptimize_message() is called
         * by the peripheral driver.
         */
        msg->pre_optimized = true;

        return 0;
}
EXPORT_SYMBOL_GPL(spi_optimize_message);

/**
 * spi_unoptimize_message - releases any resources allocated by spi_optimize_message()
 * @msg: the message to unoptimize
 *
 * Calls to this function must be balanced with calls to spi_optimize_message().
 *
 * Context: can sleep
 */
void spi_unoptimize_message(struct spi_message *msg)
{
        __spi_unoptimize_message(msg);
        msg->pre_optimized = false;
}
EXPORT_SYMBOL_GPL(spi_unoptimize_message);

static int __spi_async(struct spi_device *spi, struct spi_message *message)
{
        struct spi_controller *ctlr = spi->controller;
        struct spi_transfer *xfer;

        /*
         * Some controllers do not support doing regular SPI transfers. Return
         * ENOTSUPP when this is the case.
         */
        if (!ctlr->transfer)
                return -ENOTSUPP;

        SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_async);
        SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_async);

        trace_spi_message_submit(message);

        if (!ctlr->ptp_sts_supported) {
                list_for_each_entry(xfer, &message->transfers, transfer_list) {
                        xfer->ptp_sts_word_pre = 0;
                        ptp_read_system_prets(xfer->ptp_sts);
                }
        }

        return ctlr->transfer(spi, message);
}

/**
 * spi_async - asynchronous SPI transfer
 * @spi: device with which data will be exchanged
 * @message: describes the data transfers, including completion callback
 * Context: any (IRQs may be blocked, etc)
 *
 * This call may be used in_irq and other contexts which can't sleep,
 * as well as from task contexts which can sleep.
 *
 * The completion callback is invoked in a context which can't sleep.
 * Before that invocation, the value of message->status is undefined.
 * When the callback is issued, message->status holds either zero (to
 * indicate complete success) or a negative error code.  After that
 * callback returns, the driver which issued the transfer request may
 * deallocate the associated memory; it's no longer in use by any SPI
 * core or controller driver code.
 *
 * Note that although all messages to a spi_device are handled in
 * FIFO order, messages may go to different devices in other orders.
 * Some device might be higher priority, or have various "hard" access
 * time requirements, for example.
 *
 * On detection of any fault during the transfer, processing of
 * the entire message is aborted, and the device is deselected.
 * Until returning from the associated message completion callback,
 * no other spi_message queued to that device will be processed.
 * (This rule applies equally to all the synchronous transfer calls,
 * which are wrappers around this core asynchronous primitive.)
 *
 * Return: zero on success, else a negative error code.
 */
int spi_async(struct spi_device *spi, struct spi_message *message)
{
        struct spi_controller *ctlr = spi->controller;
        int ret;
        unsigned long flags;

        ret = spi_maybe_optimize_message(spi, message);
        if (ret)
                return ret;

        spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);

        if (ctlr->bus_lock_flag)
                ret = -EBUSY;
        else
                ret = __spi_async(spi, message);

        spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);

        spi_maybe_unoptimize_message(message);

        return ret;
}
EXPORT_SYMBOL_GPL(spi_async);

static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct spi_message *msg)
{
        bool was_busy;
        int ret;

        mutex_lock(&ctlr->io_mutex);

        was_busy = ctlr->busy;

        ctlr->cur_msg = msg;
        ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
        if (ret)
                dev_err(&ctlr->dev, "noqueue transfer failed\n");
        ctlr->cur_msg = NULL;
        ctlr->fallback = false;

        if (!was_busy) {
                kfree(ctlr->dummy_rx);
                ctlr->dummy_rx = NULL;
                kfree(ctlr->dummy_tx);
                ctlr->dummy_tx = NULL;
                if (ctlr->unprepare_transfer_hardware &&
                    ctlr->unprepare_transfer_hardware(ctlr))
                        dev_err(&ctlr->dev,
                                "failed to unprepare transfer hardware\n");
                spi_idle_runtime_pm(ctlr);
        }

        mutex_unlock(&ctlr->io_mutex);
}

/*-------------------------------------------------------------------------*/

/*
 * Utility methods for SPI protocol drivers, layered on
 * top of the core.  Some other utility methods are defined as
 * inline functions.
 */

static void spi_complete(void *arg)
{
        complete(arg);
}

static int __spi_sync(struct spi_device *spi, struct spi_message *message)
{
        DECLARE_COMPLETION_ONSTACK(done);
        unsigned long flags;
        int status;
        struct spi_controller *ctlr = spi->controller;

        if (__spi_check_suspended(ctlr)) {
                dev_warn_once(&spi->dev, "Attempted to sync while suspend\n");
                return -ESHUTDOWN;
        }

        status = spi_maybe_optimize_message(spi, message);
        if (status)
                return status;

        SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync);
        SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync);

        /*
         * Checking queue_empty here only guarantees async/sync message
         * ordering when coming from the same context. It does not need to
         * guard against reentrancy from a different context. The io_mutex
         * will catch those cases.
         */
        if (READ_ONCE(ctlr->queue_empty) && !ctlr->must_async) {
                message->actual_length = 0;
                message->status = -EINPROGRESS;

                trace_spi_message_submit(message);

                SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync_immediate);
                SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync_immediate);

                __spi_transfer_message_noqueue(ctlr, message);

                return message->status;
        }

        /*
         * There are messages in the async queue that could have originated
         * from the same context, so we need to preserve ordering.
         * Therefor we send the message to the async queue and wait until they
         * are completed.
         */
        message->complete = spi_complete;
        message->context = &done;

        spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
        status = __spi_async(spi, message);
        spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);

        if (status == 0) {
                wait_for_completion(&done);
                status = message->status;
        }
        message->complete = NULL;
        message->context = NULL;

        return status;
}

/**
 * spi_sync - blocking/synchronous SPI data transfers
 * @spi: device with which data will be exchanged
 * @message: describes the data transfers
 * Context: can sleep
 *
 * This call may only be used from a context that may sleep.  The sleep
 * is non-interruptible, and has no timeout.  Low-overhead controller
 * drivers may DMA directly into and out of the message buffers.
 *
 * Note that the SPI device's chip select is active during the message,
 * and then is normally disabled between messages.  Drivers for some
 * frequently-used devices may want to minimize costs of selecting a chip,
 * by leaving it selected in anticipation that the next message will go
 * to the same chip.  (That may increase power usage.)
 *
 * Also, the caller is guaranteeing that the memory associated with the
 * message will not be freed before this call returns.
 *
 * Return: zero on success, else a negative error code.
 */
int spi_sync(struct spi_device *spi, struct spi_message *message)
{
        int ret;

        mutex_lock(&spi->controller->bus_lock_mutex);
        ret = __spi_sync(spi, message);
        mutex_unlock(&spi->controller->bus_lock_mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(spi_sync);

/**
 * spi_sync_locked - version of spi_sync with exclusive bus usage
 * @spi: device with which data will be exchanged
 * @message: describes the data transfers
 * Context: can sleep
 *
 * This call may only be used from a context that may sleep.  The sleep
 * is non-interruptible, and has no timeout.  Low-overhead controller
 * drivers may DMA directly into and out of the message buffers.
 *
 * This call should be used by drivers that require exclusive access to the
 * SPI bus. It has to be preceded by a spi_bus_lock call. The SPI bus must
 * be released by a spi_bus_unlock call when the exclusive access is over.
 *
 * Return: zero on success, else a negative error code.
 */
int spi_sync_locked(struct spi_device *spi, struct spi_message *message)
{
        return __spi_sync(spi, message);
}
EXPORT_SYMBOL_GPL(spi_sync_locked);

/**
 * spi_bus_lock - obtain a lock for exclusive SPI bus usage
 * @ctlr: SPI bus master that should be locked for exclusive bus access
 * Context: can sleep
 *
 * This call may only be used from a context that may sleep.  The sleep
 * is non-interruptible, and has no timeout.
 *
 * This call should be used by drivers that require exclusive access to the
 * SPI bus. The SPI bus must be released by a spi_bus_unlock call when the
 * exclusive access is over. Data transfer must be done by spi_sync_locked
 * and spi_async_locked calls when the SPI bus lock is held.
 *
 * Return: always zero.
 */
int spi_bus_lock(struct spi_controller *ctlr)
{
        unsigned long flags;

        mutex_lock(&ctlr->bus_lock_mutex);

        spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
        ctlr->bus_lock_flag = 1;
        spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);

        /* Mutex remains locked until spi_bus_unlock() is called */

        return 0;
}
EXPORT_SYMBOL_GPL(spi_bus_lock);

/**
 * spi_bus_unlock - release the lock for exclusive SPI bus usage
 * @ctlr: SPI bus master that was locked for exclusive bus access
 * Context: can sleep
 *
 * This call may only be used from a context that may sleep.  The sleep
 * is non-interruptible, and has no timeout.
 *
 * This call releases an SPI bus lock previously obtained by an spi_bus_lock
 * call.
 *
 * Return: always zero.
 */
int spi_bus_unlock(struct spi_controller *ctlr)
{
        ctlr->bus_lock_flag = 0;

        mutex_unlock(&ctlr->bus_lock_mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(spi_bus_unlock);

/* Portable code must never pass more than 32 bytes */
#define        SPI_BUFSIZ        max(32, SMP_CACHE_BYTES)

static u8        *buf;

/**
 * spi_write_then_read - SPI synchronous write followed by read
 * @spi: device with which data will be exchanged
 * @txbuf: data to be written (need not be DMA-safe)
 * @n_tx: size of txbuf, in bytes
 * @rxbuf: buffer into which data will be read (need not be DMA-safe)
 * @n_rx: size of rxbuf, in bytes
 * Context: can sleep
 *
 * This performs a half duplex MicroWire style transaction with the
 * device, sending txbuf and then reading rxbuf.  The return value
 * is zero for success, else a negative errno status code.
 * This call may only be used from a context that may sleep.
 *
 * Parameters to this routine are always copied using a small buffer.
 * Performance-sensitive or bulk transfer code should instead use
 * spi_{async,sync}() calls with DMA-safe buffers.
 *
 * Return: zero on success, else a negative error code.
 */
int spi_write_then_read(struct spi_device *spi,
                const void *txbuf, unsigned n_tx,
                void *rxbuf, unsigned n_rx)
{
        static DEFINE_MUTEX(lock);

        int                        status;
        struct spi_message        message;
        struct spi_transfer        x[2];
        u8                        *local_buf;

        /*
         * Use preallocated DMA-safe buffer if we can. We can't avoid
         * copying here, (as a pure convenience thing), but we can
         * keep heap costs out of the hot path unless someone else is
         * using the pre-allocated buffer or the transfer is too large.
         */
        if ((n_tx + n_rx) > SPI_BUFSIZ || !mutex_trylock(&lock)) {
                local_buf = kmalloc(max((unsigned)SPI_BUFSIZ, n_tx + n_rx),
                                    GFP_KERNEL | GFP_DMA);
                if (!local_buf)
                        return -ENOMEM;
        } else {
                local_buf = buf;
        }

        spi_message_init(&message);
        memset(x, 0, sizeof(x));
        if (n_tx) {
                x[0].len = n_tx;
                spi_message_add_tail(&x[0], &message);
        }
        if (n_rx) {
                x[1].len = n_rx;
                spi_message_add_tail(&x[1], &message);
        }

        memcpy(local_buf, txbuf, n_tx);
        x[0].tx_buf = local_buf;
        x[1].rx_buf = local_buf + n_tx;

        /* Do the I/O */
        status = spi_sync(spi, &message);
        if (status == 0)
                memcpy(rxbuf, x[1].rx_buf, n_rx);

        if (x[0].tx_buf == buf)
                mutex_unlock(&lock);
        else
                kfree(local_buf);

        return status;
}
EXPORT_SYMBOL_GPL(spi_write_then_read);

/*-------------------------------------------------------------------------*/

#if IS_ENABLED(CONFIG_OF_DYNAMIC)
/* Must call put_device() when done with returned spi_device device */
static struct spi_device *of_find_spi_device_by_node(struct device_node *node)
{
        struct device *dev = bus_find_device_by_of_node(&spi_bus_type, node);

        return dev ? to_spi_device(dev) : NULL;
}

/* The spi controllers are not using spi_bus, so we find it with another way */
static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node)
{
        struct device *dev;

        dev = class_find_device_by_of_node(&spi_master_class, node);
        if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
                dev = class_find_device_by_of_node(&spi_slave_class, node);
        if (!dev)
                return NULL;

        /* Reference got in class_find_device */
        return container_of(dev, struct spi_controller, dev);
}

static int of_spi_notify(struct notifier_block *nb, unsigned long action,
                         void *arg)
{
        struct of_reconfig_data *rd = arg;
        struct spi_controller *ctlr;
        struct spi_device *spi;

        switch (of_reconfig_get_state_change(action, arg)) {
        case OF_RECONFIG_CHANGE_ADD:
                ctlr = of_find_spi_controller_by_node(rd->dn->parent);
                if (ctlr == NULL)
                        return NOTIFY_OK;        /* Not for us */

                if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
                        put_device(&ctlr->dev);
                        return NOTIFY_OK;
                }

                /*
                 * Clear the flag before adding the device so that fw_devlink
                 * doesn't skip adding consumers to this device.
                 */
                rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE;
                spi = of_register_spi_device(ctlr, rd->dn);
                put_device(&ctlr->dev);

                if (IS_ERR(spi)) {
                        pr_err("%s: failed to create for '%pOF'\n",
                                        __func__, rd->dn);
                        of_node_clear_flag(rd->dn, OF_POPULATED);
                        return notifier_from_errno(PTR_ERR(spi));
                }
                break;

        case OF_RECONFIG_CHANGE_REMOVE:
                /* Already depopulated? */
                if (!of_node_check_flag(rd->dn, OF_POPULATED))
                        return NOTIFY_OK;

                /* Find our device by node */
                spi = of_find_spi_device_by_node(rd->dn);
                if (spi == NULL)
                        return NOTIFY_OK;        /* No? not meant for us */

                /* Unregister takes one ref away */
                spi_unregister_device(spi);

                /* And put the reference of the find */
                put_device(&spi->dev);
                break;
        }

        return NOTIFY_OK;
}

static struct notifier_block spi_of_notifier = {
        .notifier_call = of_spi_notify,
};
#else /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
extern struct notifier_block spi_of_notifier;
#endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */

#if IS_ENABLED(CONFIG_ACPI)
static int spi_acpi_controller_match(struct device *dev, const void *data)
{
        return ACPI_COMPANION(dev->parent) == data;
}

struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev)
{
        struct device *dev;

        dev = class_find_device(&spi_master_class, NULL, adev,
                                spi_acpi_controller_match);
        if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
                dev = class_find_device(&spi_slave_class, NULL, adev,
                                        spi_acpi_controller_match);
        if (!dev)
                return NULL;

        return container_of(dev, struct spi_controller, dev);
}
EXPORT_SYMBOL_GPL(acpi_spi_find_controller_by_adev);

static struct spi_device *acpi_spi_find_device_by_adev(struct acpi_device *adev)
{
        struct device *dev;

        dev = bus_find_device_by_acpi_dev(&spi_bus_type, adev);
        return to_spi_device(dev);
}

static int acpi_spi_notify(struct notifier_block *nb, unsigned long value,
                           void *arg)
{
        struct acpi_device *adev = arg;
        struct spi_controller *ctlr;
        struct spi_device *spi;

        switch (value) {
        case ACPI_RECONFIG_DEVICE_ADD:
                ctlr = acpi_spi_find_controller_by_adev(acpi_dev_parent(adev));
                if (!ctlr)
                        break;

                acpi_register_spi_device(ctlr, adev);
                put_device(&ctlr->dev);
                break;
        case ACPI_RECONFIG_DEVICE_REMOVE:
                if (!acpi_device_enumerated(adev))
                        break;

                spi = acpi_spi_find_device_by_adev(adev);
                if (!spi)
                        break;

                spi_unregister_device(spi);
                put_device(&spi->dev);
                break;
        }

        return NOTIFY_OK;
}

static struct notifier_block spi_acpi_notifier = {
        .notifier_call = acpi_spi_notify,
};
#else
extern struct notifier_block spi_acpi_notifier;
#endif

static int __init spi_init(void)
{
        int        status;

        buf = kmalloc(SPI_BUFSIZ, GFP_KERNEL);
        if (!buf) {
                status = -ENOMEM;
                goto err0;
        }

        status = bus_register(&spi_bus_type);
        if (status < 0)
                goto err1;

        status = class_register(&spi_master_class);
        if (status < 0)
                goto err2;

        if (IS_ENABLED(CONFIG_SPI_SLAVE)) {
                status = class_register(&spi_slave_class);
                if (status < 0)
                        goto err3;
        }

        if (IS_ENABLED(CONFIG_OF_DYNAMIC))
                WARN_ON(of_reconfig_notifier_register(&spi_of_notifier));
        if (IS_ENABLED(CONFIG_ACPI))
                WARN_ON(acpi_reconfig_notifier_register(&spi_acpi_notifier));

        return 0;

err3:
        class_unregister(&spi_master_class);
err2:
        bus_unregister(&spi_bus_type);
err1:
        kfree(buf);
        buf = NULL;
err0:
        return status;
}

/*
 * A board_info is normally registered in arch_initcall(),
 * but even essential drivers wait till later.
 *
 * REVISIT only boardinfo really needs static linking. The rest (device and
 * driver registration) _could_ be dynamically linked (modular) ... Costs
 * include needing to have boardinfo data structures be much more public.
 */
postcore_initcall(spi_init);




































































































































































































































































































































































































































































































































































































































































    2 







    2 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
// SPDX-License-Identifier: GPL-2.0-only
/* Industrial I/O event handling
 *
 * Copyright (c) 2008 Jonathan Cameron
 *
 * Based on elements of hwmon and input subsystems.
 */

#include <linux/anon_inodes.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kfifo.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
#include <linux/iio/iio.h>
#include <linux/iio/iio-opaque.h>
#include "iio_core.h"
#include <linux/iio/sysfs.h>
#include <linux/iio/events.h>

/**
 * struct iio_event_interface - chrdev interface for an event line
 * @wait:                wait queue to allow blocking reads of events
 * @det_events:                list of detected events
 * @dev_attr_list:        list of event interface sysfs attribute
 * @flags:                file operations related flags including busy flag.
 * @group:                event interface sysfs attribute group
 * @read_lock:                lock to protect kfifo read operations
 * @ioctl_handler:        handler for event ioctl() calls
 */
struct iio_event_interface {
        wait_queue_head_t        wait;
        DECLARE_KFIFO(det_events, struct iio_event_data, 16);

        struct list_head        dev_attr_list;
        unsigned long                flags;
        struct attribute_group        group;
        struct mutex                read_lock;
        struct iio_ioctl_handler        ioctl_handler;
};

bool iio_event_enabled(const struct iio_event_interface *ev_int)
{
        return !!test_bit(IIO_BUSY_BIT_POS, &ev_int->flags);
}

/**
 * iio_push_event() - try to add event to the list for userspace reading
 * @indio_dev:                IIO device structure
 * @ev_code:                What event
 * @timestamp:                When the event occurred
 *
 * Note: The caller must make sure that this function is not running
 * concurrently for the same indio_dev more than once.
 *
 * This function may be safely used as soon as a valid reference to iio_dev has
 * been obtained via iio_device_alloc(), but any events that are submitted
 * before iio_device_register() has successfully completed will be silently
 * discarded.
 **/
int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;
        struct iio_event_data ev;
        int copied;

        if (!ev_int)
                return 0;

        /* Does anyone care? */
        if (iio_event_enabled(ev_int)) {

                ev.id = ev_code;
                ev.timestamp = timestamp;

                copied = kfifo_put(&ev_int->det_events, ev);
                if (copied != 0)
                        wake_up_poll(&ev_int->wait, EPOLLIN);
        }

        return 0;
}
EXPORT_SYMBOL(iio_push_event);

/**
 * iio_event_poll() - poll the event queue to find out if it has data
 * @filep:        File structure pointer to identify the device
 * @wait:        Poll table pointer to add the wait queue on
 *
 * Return: (EPOLLIN | EPOLLRDNORM) if data is available for reading
 *           or a negative error code on failure
 */
static __poll_t iio_event_poll(struct file *filep,
                             struct poll_table_struct *wait)
{
        struct iio_dev *indio_dev = filep->private_data;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;
        __poll_t events = 0;

        if (!indio_dev->info)
                return events;

        poll_wait(filep, &ev_int->wait, wait);

        if (!kfifo_is_empty(&ev_int->det_events))
                events = EPOLLIN | EPOLLRDNORM;

        return events;
}

static ssize_t iio_event_chrdev_read(struct file *filep,
                                     char __user *buf,
                                     size_t count,
                                     loff_t *f_ps)
{
        struct iio_dev *indio_dev = filep->private_data;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;
        unsigned int copied;
        int ret;

        if (!indio_dev->info)
                return -ENODEV;

        if (count < sizeof(struct iio_event_data))
                return -EINVAL;

        do {
                if (kfifo_is_empty(&ev_int->det_events)) {
                        if (filep->f_flags & O_NONBLOCK)
                                return -EAGAIN;

                        ret = wait_event_interruptible(ev_int->wait,
                                        !kfifo_is_empty(&ev_int->det_events) ||
                                        indio_dev->info == NULL);
                        if (ret)
                                return ret;
                        if (indio_dev->info == NULL)
                                return -ENODEV;
                }

                if (mutex_lock_interruptible(&ev_int->read_lock))
                        return -ERESTARTSYS;
                ret = kfifo_to_user(&ev_int->det_events, buf, count, &copied);
                mutex_unlock(&ev_int->read_lock);

                if (ret)
                        return ret;

                /*
                 * If we couldn't read anything from the fifo (a different
                 * thread might have been faster) we either return -EAGAIN if
                 * the file descriptor is non-blocking, otherwise we go back to
                 * sleep and wait for more data to arrive.
                 */
                if (copied == 0 && (filep->f_flags & O_NONBLOCK))
                        return -EAGAIN;

        } while (copied == 0);

        return copied;
}

static int iio_event_chrdev_release(struct inode *inode, struct file *filep)
{
        struct iio_dev *indio_dev = filep->private_data;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;

        clear_bit(IIO_BUSY_BIT_POS, &ev_int->flags);

        iio_device_put(indio_dev);

        return 0;
}

static const struct file_operations iio_event_chrdev_fileops = {
        .read =  iio_event_chrdev_read,
        .poll =  iio_event_poll,
        .release = iio_event_chrdev_release,
        .owner = THIS_MODULE,
        .llseek = noop_llseek,
};

static int iio_event_getfd(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;
        int fd;

        if (ev_int == NULL)
                return -ENODEV;

        fd = mutex_lock_interruptible(&iio_dev_opaque->mlock);
        if (fd)
                return fd;

        if (test_and_set_bit(IIO_BUSY_BIT_POS, &ev_int->flags)) {
                fd = -EBUSY;
                goto unlock;
        }

        iio_device_get(indio_dev);

        fd = anon_inode_getfd("iio:event", &iio_event_chrdev_fileops,
                                indio_dev, O_RDONLY | O_CLOEXEC);
        if (fd < 0) {
                clear_bit(IIO_BUSY_BIT_POS, &ev_int->flags);
                iio_device_put(indio_dev);
        } else {
                kfifo_reset_out(&ev_int->det_events);
        }

unlock:
        mutex_unlock(&iio_dev_opaque->mlock);
        return fd;
}

static const char * const iio_ev_type_text[] = {
        [IIO_EV_TYPE_THRESH] = "thresh",
        [IIO_EV_TYPE_MAG] = "mag",
        [IIO_EV_TYPE_ROC] = "roc",
        [IIO_EV_TYPE_THRESH_ADAPTIVE] = "thresh_adaptive",
        [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive",
        [IIO_EV_TYPE_CHANGE] = "change",
        [IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced",
        [IIO_EV_TYPE_GESTURE] = "gesture",
};

static const char * const iio_ev_dir_text[] = {
        [IIO_EV_DIR_EITHER] = "either",
        [IIO_EV_DIR_RISING] = "rising",
        [IIO_EV_DIR_FALLING] = "falling",
        [IIO_EV_DIR_SINGLETAP] = "singletap",
        [IIO_EV_DIR_DOUBLETAP] = "doubletap",
};

static const char * const iio_ev_info_text[] = {
        [IIO_EV_INFO_ENABLE] = "en",
        [IIO_EV_INFO_VALUE] = "value",
        [IIO_EV_INFO_HYSTERESIS] = "hysteresis",
        [IIO_EV_INFO_PERIOD] = "period",
        [IIO_EV_INFO_HIGH_PASS_FILTER_3DB] = "high_pass_filter_3db",
        [IIO_EV_INFO_LOW_PASS_FILTER_3DB] = "low_pass_filter_3db",
        [IIO_EV_INFO_TIMEOUT] = "timeout",
        [IIO_EV_INFO_RESET_TIMEOUT] = "reset_timeout",
        [IIO_EV_INFO_TAP2_MIN_DELAY] = "tap2_min_delay",
        [IIO_EV_INFO_RUNNING_PERIOD] = "runningperiod",
        [IIO_EV_INFO_RUNNING_COUNT] = "runningcount",
};

static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr)
{
        return attr->c->event_spec[attr->address & 0xffff].dir;
}

static enum iio_event_type iio_ev_attr_type(struct iio_dev_attr *attr)
{
        return attr->c->event_spec[attr->address & 0xffff].type;
}

static enum iio_event_info iio_ev_attr_info(struct iio_dev_attr *attr)
{
        return (attr->address >> 16) & 0xffff;
}

static ssize_t iio_ev_state_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf,
                                  size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int ret;
        bool val;

        ret = kstrtobool(buf, &val);
        if (ret < 0)
                return ret;

        ret = indio_dev->info->write_event_config(indio_dev,
                this_attr->c, iio_ev_attr_type(this_attr),
                iio_ev_attr_dir(this_attr), val);

        return (ret < 0) ? ret : len;
}

static ssize_t iio_ev_state_show(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int val;

        val = indio_dev->info->read_event_config(indio_dev,
                this_attr->c, iio_ev_attr_type(this_attr),
                iio_ev_attr_dir(this_attr));
        if (val < 0)
                return val;
        else
                return sysfs_emit(buf, "%d\n", val);
}

static ssize_t iio_ev_value_show(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int val, val2, val_arr[2];
        int ret;

        ret = indio_dev->info->read_event_value(indio_dev,
                this_attr->c, iio_ev_attr_type(this_attr),
                iio_ev_attr_dir(this_attr), iio_ev_attr_info(this_attr),
                &val, &val2);
        if (ret < 0)
                return ret;
        val_arr[0] = val;
        val_arr[1] = val2;
        return iio_format_value(buf, ret, 2, val_arr);
}

static ssize_t iio_ev_value_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf,
                                  size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int val, val2;
        int ret;

        if (!indio_dev->info->write_event_value)
                return -EINVAL;

        ret = iio_str_to_fixpoint(buf, 100000, &val, &val2);
        if (ret)
                return ret;
        ret = indio_dev->info->write_event_value(indio_dev,
                this_attr->c, iio_ev_attr_type(this_attr),
                iio_ev_attr_dir(this_attr), iio_ev_attr_info(this_attr),
                val, val2);
        if (ret < 0)
                return ret;

        return len;
}

static ssize_t iio_ev_label_show(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);

        if (indio_dev->info->read_event_label)
                return indio_dev->info->read_event_label(indio_dev,
                                 this_attr->c, iio_ev_attr_type(this_attr),
                                 iio_ev_attr_dir(this_attr), buf);

        return -EINVAL;
}

static int iio_device_add_event(struct iio_dev *indio_dev,
        const struct iio_chan_spec *chan, unsigned int spec_index,
        enum iio_event_type type, enum iio_event_direction dir,
        enum iio_shared_by shared_by, const unsigned long *mask)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        ssize_t (*show)(struct device *dev, struct device_attribute *attr,
                char *buf);
        ssize_t (*store)(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t len);
        unsigned int attrcount = 0;
        unsigned int i;
        char *postfix;
        int ret;

        for_each_set_bit(i, mask, sizeof(*mask)*8) {
                if (i >= ARRAY_SIZE(iio_ev_info_text))
                        return -EINVAL;
                if (dir != IIO_EV_DIR_NONE)
                        postfix = kasprintf(GFP_KERNEL, "%s_%s_%s",
                                        iio_ev_type_text[type],
                                        iio_ev_dir_text[dir],
                                        iio_ev_info_text[i]);
                else
                        postfix = kasprintf(GFP_KERNEL, "%s_%s",
                                        iio_ev_type_text[type],
                                        iio_ev_info_text[i]);
                if (postfix == NULL)
                        return -ENOMEM;

                if (i == IIO_EV_INFO_ENABLE) {
                        show = iio_ev_state_show;
                        store = iio_ev_state_store;
                } else {
                        show = iio_ev_value_show;
                        store = iio_ev_value_store;
                }

                ret = __iio_add_chan_devattr(postfix, chan, show, store,
                         (i << 16) | spec_index, shared_by, &indio_dev->dev,
                         NULL,
                        &iio_dev_opaque->event_interface->dev_attr_list);
                kfree(postfix);

                if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
                        continue;

                if (ret)
                        return ret;

                attrcount++;
        }

        return attrcount;
}

static int iio_device_add_event_label(struct iio_dev *indio_dev,
                                      const struct iio_chan_spec *chan,
                                      unsigned int spec_index,
                                      enum iio_event_type type,
                                      enum iio_event_direction dir)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        char *postfix;
        int ret;

        if (!indio_dev->info->read_event_label)
                return 0;

        if (dir != IIO_EV_DIR_NONE)
                postfix = kasprintf(GFP_KERNEL, "%s_%s_label",
                                iio_ev_type_text[type],
                                iio_ev_dir_text[dir]);
        else
                postfix = kasprintf(GFP_KERNEL, "%s_label",
                                iio_ev_type_text[type]);
        if (postfix == NULL)
                return -ENOMEM;

        ret = __iio_add_chan_devattr(postfix, chan, &iio_ev_label_show, NULL,
                                spec_index, IIO_SEPARATE, &indio_dev->dev, NULL,
                                &iio_dev_opaque->event_interface->dev_attr_list);

        kfree(postfix);

        if (ret < 0)
                return ret;

        return 1;
}

static int iio_device_add_event_sysfs(struct iio_dev *indio_dev,
        struct iio_chan_spec const *chan)
{
        int ret = 0, i, attrcount = 0;
        enum iio_event_direction dir;
        enum iio_event_type type;

        for (i = 0; i < chan->num_event_specs; i++) {
                type = chan->event_spec[i].type;
                dir = chan->event_spec[i].dir;

                ret = iio_device_add_event(indio_dev, chan, i, type, dir,
                        IIO_SEPARATE, &chan->event_spec[i].mask_separate);
                if (ret < 0)
                        return ret;
                attrcount += ret;

                ret = iio_device_add_event(indio_dev, chan, i, type, dir,
                        IIO_SHARED_BY_TYPE,
                        &chan->event_spec[i].mask_shared_by_type);
                if (ret < 0)
                        return ret;
                attrcount += ret;

                ret = iio_device_add_event(indio_dev, chan, i, type, dir,
                        IIO_SHARED_BY_DIR,
                        &chan->event_spec[i].mask_shared_by_dir);
                if (ret < 0)
                        return ret;
                attrcount += ret;

                ret = iio_device_add_event(indio_dev, chan, i, type, dir,
                        IIO_SHARED_BY_ALL,
                        &chan->event_spec[i].mask_shared_by_all);
                if (ret < 0)
                        return ret;
                attrcount += ret;

                ret = iio_device_add_event_label(indio_dev, chan, i, type, dir);
                if (ret < 0)
                        return ret;
                attrcount += ret;
        }
        ret = attrcount;
        return ret;
}

static inline int __iio_add_event_config_attrs(struct iio_dev *indio_dev)
{
        int j, ret, attrcount = 0;

        /* Dynamically created from the channels array */
        for (j = 0; j < indio_dev->num_channels; j++) {
                ret = iio_device_add_event_sysfs(indio_dev,
                                                 &indio_dev->channels[j]);
                if (ret < 0)
                        return ret;
                attrcount += ret;
        }
        return attrcount;
}

static bool iio_check_for_dynamic_events(struct iio_dev *indio_dev)
{
        int j;

        for (j = 0; j < indio_dev->num_channels; j++) {
                if (indio_dev->channels[j].num_event_specs != 0)
                        return true;
        }
        return false;
}

static void iio_setup_ev_int(struct iio_event_interface *ev_int)
{
        INIT_KFIFO(ev_int->det_events);
        init_waitqueue_head(&ev_int->wait);
        mutex_init(&ev_int->read_lock);
}

static long iio_event_ioctl(struct iio_dev *indio_dev, struct file *filp,
                            unsigned int cmd, unsigned long arg)
{
        int __user *ip = (int __user *)arg;
        int fd;

        if (cmd == IIO_GET_EVENT_FD_IOCTL) {
                fd = iio_event_getfd(indio_dev);
                if (fd < 0)
                        return fd;
                if (copy_to_user(ip, &fd, sizeof(fd)))
                        return -EFAULT;
                return 0;
        }

        return IIO_IOCTL_UNHANDLED;
}

static const char *iio_event_group_name = "events";
int iio_device_register_eventset(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int;
        struct iio_dev_attr *p;
        int ret = 0, attrcount_orig = 0, attrcount, attrn;
        struct attribute **attr;

        if (!(indio_dev->info->event_attrs ||
              iio_check_for_dynamic_events(indio_dev)))
                return 0;

        ev_int = kzalloc(sizeof(struct iio_event_interface), GFP_KERNEL);
        if (ev_int == NULL)
                return -ENOMEM;

        iio_dev_opaque->event_interface = ev_int;

        INIT_LIST_HEAD(&ev_int->dev_attr_list);

        iio_setup_ev_int(ev_int);
        if (indio_dev->info->event_attrs != NULL) {
                attr = indio_dev->info->event_attrs->attrs;
                while (*attr++ != NULL)
                        attrcount_orig++;
        }
        attrcount = attrcount_orig;
        if (indio_dev->channels) {
                ret = __iio_add_event_config_attrs(indio_dev);
                if (ret < 0)
                        goto error_free_setup_event_lines;
                attrcount += ret;
        }

        ev_int->group.name = iio_event_group_name;
        ev_int->group.attrs = kcalloc(attrcount + 1,
                                      sizeof(ev_int->group.attrs[0]),
                                      GFP_KERNEL);
        if (ev_int->group.attrs == NULL) {
                ret = -ENOMEM;
                goto error_free_setup_event_lines;
        }
        if (indio_dev->info->event_attrs)
                memcpy(ev_int->group.attrs,
                       indio_dev->info->event_attrs->attrs,
                       sizeof(ev_int->group.attrs[0]) * attrcount_orig);
        attrn = attrcount_orig;
        /* Add all elements from the list. */
        list_for_each_entry(p, &ev_int->dev_attr_list, l)
                ev_int->group.attrs[attrn++] = &p->dev_attr.attr;

        ret = iio_device_register_sysfs_group(indio_dev, &ev_int->group);
        if (ret)
                goto error_free_group_attrs;

        ev_int->ioctl_handler.ioctl = iio_event_ioctl;
        iio_device_ioctl_handler_register(&iio_dev_opaque->indio_dev,
                                          &ev_int->ioctl_handler);

        return 0;

error_free_group_attrs:
        kfree(ev_int->group.attrs);
error_free_setup_event_lines:
        iio_free_chan_devattr_list(&ev_int->dev_attr_list);
        kfree(ev_int);
        iio_dev_opaque->event_interface = NULL;
        return ret;
}

/**
 * iio_device_wakeup_eventset - Wakes up the event waitqueue
 * @indio_dev: The IIO device
 *
 * Wakes up the event waitqueue used for poll() and blocking read().
 * Should usually be called when the device is unregistered.
 */
void iio_device_wakeup_eventset(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        if (iio_dev_opaque->event_interface == NULL)
                return;
        wake_up(&iio_dev_opaque->event_interface->wait);
}

void iio_device_unregister_eventset(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;

        if (ev_int == NULL)
                return;

        iio_device_ioctl_handler_unregister(&ev_int->ioctl_handler);
        iio_free_chan_devattr_list(&ev_int->dev_attr_list);
        kfree(ev_int->group.attrs);
        kfree(ev_int);
        iio_dev_opaque->event_interface = NULL;
}






















































































    1 













































    1 













    1 
    1 


























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/io.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/info.h>
#include <sound/initval.h>
#include "pcm_local.h"

static int preallocate_dma = 1;
module_param(preallocate_dma, int, 0444);
MODULE_PARM_DESC(preallocate_dma, "Preallocate DMA memory when the PCM devices are initialized.");

static int maximum_substreams = 4;
module_param(maximum_substreams, int, 0444);
MODULE_PARM_DESC(maximum_substreams, "Maximum substreams with preallocated DMA memory.");

static const size_t snd_minimum_buffer = 16384;

static unsigned long max_alloc_per_card = 32UL * 1024UL * 1024UL;
module_param(max_alloc_per_card, ulong, 0644);
MODULE_PARM_DESC(max_alloc_per_card, "Max total allocation bytes per card.");

static void __update_allocated_size(struct snd_card *card, ssize_t bytes)
{
        card->total_pcm_alloc_bytes += bytes;
}

static void update_allocated_size(struct snd_card *card, ssize_t bytes)
{
        guard(mutex)(&card->memory_mutex);
        __update_allocated_size(card, bytes);
}

static void decrease_allocated_size(struct snd_card *card, size_t bytes)
{
        guard(mutex)(&card->memory_mutex);
        WARN_ON(card->total_pcm_alloc_bytes < bytes);
        __update_allocated_size(card, -(ssize_t)bytes);
}

static int do_alloc_pages(struct snd_card *card, int type, struct device *dev,
                          int str, size_t size, struct snd_dma_buffer *dmab)
{
        enum dma_data_direction dir;
        int err;

        /* check and reserve the requested size */
        scoped_guard(mutex, &card->memory_mutex) {
                if (max_alloc_per_card &&
                    card->total_pcm_alloc_bytes + size > max_alloc_per_card)
                        return -ENOMEM;
                __update_allocated_size(card, size);
        }

        if (str == SNDRV_PCM_STREAM_PLAYBACK)
                dir = DMA_TO_DEVICE;
        else
                dir = DMA_FROM_DEVICE;
        err = snd_dma_alloc_dir_pages(type, dev, dir, size, dmab);
        if (!err) {
                /* the actual allocation size might be bigger than requested,
                 * and we need to correct the account
                 */
                if (dmab->bytes != size)
                        update_allocated_size(card, dmab->bytes - size);
        } else {
                /* take back on allocation failure */
                decrease_allocated_size(card, size);
        }
        return err;
}

static void do_free_pages(struct snd_card *card, struct snd_dma_buffer *dmab)
{
        if (!dmab->area)
                return;
        decrease_allocated_size(card, dmab->bytes);
        snd_dma_free_pages(dmab);
        dmab->area = NULL;
}

/*
 * try to allocate as the large pages as possible.
 * stores the resultant memory size in *res_size.
 *
 * the minimum size is snd_minimum_buffer.  it should be power of 2.
 */
static int preallocate_pcm_pages(struct snd_pcm_substream *substream,
                                 size_t size, bool no_fallback)
{
        struct snd_dma_buffer *dmab = &substream->dma_buffer;
        struct snd_card *card = substream->pcm->card;
        size_t orig_size = size;
        int err;

        do {
                err = do_alloc_pages(card, dmab->dev.type, dmab->dev.dev,
                                     substream->stream, size, dmab);
                if (err != -ENOMEM)
                        return err;
                if (no_fallback)
                        break;
                size >>= 1;
        } while (size >= snd_minimum_buffer);
        dmab->bytes = 0; /* tell error */
        pr_warn("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n",
                substream->pcm->card->number, substream->pcm->device,
                substream->stream ? 'c' : 'p', substream->number,
                substream->pcm->name, orig_size);
        return -ENOMEM;
}

/**
 * snd_pcm_lib_preallocate_free - release the preallocated buffer of the specified substream.
 * @substream: the pcm substream instance
 *
 * Releases the pre-allocated buffer of the given substream.
 */
void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
{
        do_free_pages(substream->pcm->card, &substream->dma_buffer);
}

/**
 * snd_pcm_lib_preallocate_free_for_all - release all pre-allocated buffers on the pcm
 * @pcm: the pcm instance
 *
 * Releases all the pre-allocated buffers on the given pcm.
 */
void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm)
{
        struct snd_pcm_substream *substream;
        int stream;

        for_each_pcm_substream(pcm, stream, substream)
                snd_pcm_lib_preallocate_free(substream);
}
EXPORT_SYMBOL(snd_pcm_lib_preallocate_free_for_all);

#ifdef CONFIG_SND_VERBOSE_PROCFS
/*
 * read callback for prealloc proc file
 *
 * prints the current allocated size in kB.
 */
static void snd_pcm_lib_preallocate_proc_read(struct snd_info_entry *entry,
                                              struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        snd_iprintf(buffer, "%lu\n", (unsigned long) substream->dma_buffer.bytes / 1024);
}

/*
 * read callback for prealloc_max proc file
 *
 * prints the maximum allowed size in kB.
 */
static void snd_pcm_lib_preallocate_max_proc_read(struct snd_info_entry *entry,
                                                  struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        snd_iprintf(buffer, "%lu\n", (unsigned long) substream->dma_max / 1024);
}

/*
 * write callback for prealloc proc file
 *
 * accepts the preallocation size in kB.
 */
static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry,
                                               struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        struct snd_card *card = substream->pcm->card;
        char line[64], str[64];
        size_t size;
        struct snd_dma_buffer new_dmab;

        guard(mutex)(&substream->pcm->open_mutex);
        if (substream->runtime) {
                buffer->error = -EBUSY;
                return;
        }
        if (!snd_info_get_line(buffer, line, sizeof(line))) {
                snd_info_get_str(str, line, sizeof(str));
                size = simple_strtoul(str, NULL, 10) * 1024;
                if ((size != 0 && size < 8192) || size > substream->dma_max) {
                        buffer->error = -EINVAL;
                        return;
                }
                if (substream->dma_buffer.bytes == size)
                        return;
                memset(&new_dmab, 0, sizeof(new_dmab));
                new_dmab.dev = substream->dma_buffer.dev;
                if (size > 0) {
                        if (do_alloc_pages(card,
                                           substream->dma_buffer.dev.type,
                                           substream->dma_buffer.dev.dev,
                                           substream->stream,
                                           size, &new_dmab) < 0) {
                                buffer->error = -ENOMEM;
                                pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n",
                                         substream->pcm->card->number, substream->pcm->device,
                                         substream->stream ? 'c' : 'p', substream->number,
                                         substream->pcm->name, size);
                                return;
                        }
                        substream->buffer_bytes_max = size;
                } else {
                        substream->buffer_bytes_max = UINT_MAX;
                }
                if (substream->dma_buffer.area)
                        do_free_pages(card, &substream->dma_buffer);
                substream->dma_buffer = new_dmab;
        } else {
                buffer->error = -EINVAL;
        }
}

static inline void preallocate_info_init(struct snd_pcm_substream *substream)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_card_entry(substream->pcm->card, "prealloc",
                                           substream->proc_root);
        if (entry) {
                snd_info_set_text_ops(entry, substream,
                                      snd_pcm_lib_preallocate_proc_read);
                entry->c.text.write = snd_pcm_lib_preallocate_proc_write;
                entry->mode |= 0200;
        }
        entry = snd_info_create_card_entry(substream->pcm->card, "prealloc_max",
                                           substream->proc_root);
        if (entry)
                snd_info_set_text_ops(entry, substream,
                                      snd_pcm_lib_preallocate_max_proc_read);
}

#else /* !CONFIG_SND_VERBOSE_PROCFS */
static inline void preallocate_info_init(struct snd_pcm_substream *substream)
{
}
#endif /* CONFIG_SND_VERBOSE_PROCFS */

/*
 * pre-allocate the buffer and create a proc file for the substream
 */
static int preallocate_pages(struct snd_pcm_substream *substream,
                              int type, struct device *data,
                              size_t size, size_t max, bool managed)
{
        int err;

        if (snd_BUG_ON(substream->dma_buffer.dev.type))
                return -EINVAL;

        substream->dma_buffer.dev.type = type;
        substream->dma_buffer.dev.dev = data;

        if (size > 0) {
                if (!max) {
                        /* no fallback, only also inform -ENOMEM */
                        err = preallocate_pcm_pages(substream, size, true);
                        if (err < 0)
                                return err;
                } else if (preallocate_dma &&
                           substream->number < maximum_substreams) {
                        err = preallocate_pcm_pages(substream, size, false);
                        if (err < 0 && err != -ENOMEM)
                                return err;
                }
        }

        if (substream->dma_buffer.bytes > 0)
                substream->buffer_bytes_max = substream->dma_buffer.bytes;
        substream->dma_max = max;
        if (max > 0)
                preallocate_info_init(substream);
        if (managed)
                substream->managed_buffer_alloc = 1;
        return 0;
}

static int preallocate_pages_for_all(struct snd_pcm *pcm, int type,
                                      void *data, size_t size, size_t max,
                                      bool managed)
{
        struct snd_pcm_substream *substream;
        int stream, err;

        for_each_pcm_substream(pcm, stream, substream) {
                err = preallocate_pages(substream, type, data, size, max, managed);
                if (err < 0)
                        return err;
        }
        return 0;
}

/**
 * snd_pcm_lib_preallocate_pages - pre-allocation for the given DMA type
 * @substream: the pcm substream instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 * @max: the max. allowed pre-allocation size
 *
 * Do pre-allocation for the given DMA buffer type.
 */
void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
                                  int type, struct device *data,
                                  size_t size, size_t max)
{
        preallocate_pages(substream, type, data, size, max, false);
}
EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages);

/**
 * snd_pcm_lib_preallocate_pages_for_all - pre-allocation for continuous memory type (all substreams)
 * @pcm: the pcm instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 * @max: the max. allowed pre-allocation size
 *
 * Do pre-allocation to all substreams of the given pcm for the
 * specified DMA type.
 */
void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
                                          int type, void *data,
                                          size_t size, size_t max)
{
        preallocate_pages_for_all(pcm, type, data, size, max, false);
}
EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages_for_all);

/**
 * snd_pcm_set_managed_buffer - set up buffer management for a substream
 * @substream: the pcm substream instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 * @max: the max. allowed pre-allocation size
 *
 * Do pre-allocation for the given DMA buffer type, and set the managed
 * buffer allocation mode to the given substream.
 * In this mode, PCM core will allocate a buffer automatically before PCM
 * hw_params ops call, and release the buffer after PCM hw_free ops call
 * as well, so that the driver doesn't need to invoke the allocation and
 * the release explicitly in its callback.
 * When a buffer is actually allocated before the PCM hw_params call, it
 * turns on the runtime buffer_changed flag for drivers changing their h/w
 * parameters accordingly.
 *
 * When @size is non-zero and @max is zero, this tries to allocate for only
 * the exact buffer size without fallback, and may return -ENOMEM.
 * Otherwise, the function tries to allocate smaller chunks if the allocation
 * fails.  This is the behavior of snd_pcm_set_fixed_buffer().
 *
 * When both @size and @max are zero, the function only sets up the buffer
 * for later dynamic allocations. It's used typically for buffers with
 * SNDRV_DMA_TYPE_VMALLOC type.
 *
 * Upon successful buffer allocation and setup, the function returns 0.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_set_managed_buffer(struct snd_pcm_substream *substream, int type,
                                struct device *data, size_t size, size_t max)
{
        return preallocate_pages(substream, type, data, size, max, true);
}
EXPORT_SYMBOL(snd_pcm_set_managed_buffer);

/**
 * snd_pcm_set_managed_buffer_all - set up buffer management for all substreams
 *        for all substreams
 * @pcm: the pcm instance
 * @type: DMA type (SNDRV_DMA_TYPE_*)
 * @data: DMA type dependent data
 * @size: the requested pre-allocation size in bytes
 * @max: the max. allowed pre-allocation size
 *
 * Do pre-allocation to all substreams of the given pcm for the specified DMA
 * type and size, and set the managed_buffer_alloc flag to each substream.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_set_managed_buffer_all(struct snd_pcm *pcm, int type,
                                   struct device *data,
                                   size_t size, size_t max)
{
        return preallocate_pages_for_all(pcm, type, data, size, max, true);
}
EXPORT_SYMBOL(snd_pcm_set_managed_buffer_all);

/**
 * snd_pcm_lib_malloc_pages - allocate the DMA buffer
 * @substream: the substream to allocate the DMA buffer to
 * @size: the requested buffer size in bytes
 *
 * Allocates the DMA buffer on the BUS type given earlier to
 * snd_pcm_lib_preallocate_xxx_pages().
 *
 * Return: 1 if the buffer is changed, 0 if not changed, or a negative
 * code on failure.
 */
int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size)
{
        struct snd_card *card;
        struct snd_pcm_runtime *runtime;
        struct snd_dma_buffer *dmab = NULL;

        if (PCM_RUNTIME_CHECK(substream))
                return -EINVAL;
        if (snd_BUG_ON(substream->dma_buffer.dev.type ==
                       SNDRV_DMA_TYPE_UNKNOWN))
                return -EINVAL;
        runtime = substream->runtime;
        card = substream->pcm->card;

        if (runtime->dma_buffer_p) {
                /* perphaps, we might free the large DMA memory region
                   to save some space here, but the actual solution
                   costs us less time */
                if (runtime->dma_buffer_p->bytes >= size) {
                        runtime->dma_bytes = size;
                        return 0;        /* ok, do not change */
                }
                snd_pcm_lib_free_pages(substream);
        }
        if (substream->dma_buffer.area != NULL &&
            substream->dma_buffer.bytes >= size) {
                dmab = &substream->dma_buffer; /* use the pre-allocated buffer */
        } else {
                /* dma_max=0 means the fixed size preallocation */
                if (substream->dma_buffer.area && !substream->dma_max)
                        return -ENOMEM;
                dmab = kzalloc(sizeof(*dmab), GFP_KERNEL);
                if (! dmab)
                        return -ENOMEM;
                dmab->dev = substream->dma_buffer.dev;
                if (do_alloc_pages(card,
                                   substream->dma_buffer.dev.type,
                                   substream->dma_buffer.dev.dev,
                                   substream->stream,
                                   size, dmab) < 0) {
                        kfree(dmab);
                        pr_debug("ALSA pcmC%dD%d%c,%d:%s: cannot preallocate for size %zu\n",
                                 substream->pcm->card->number, substream->pcm->device,
                                 substream->stream ? 'c' : 'p', substream->number,
                                 substream->pcm->name, size);
                        return -ENOMEM;
                }
        }
        snd_pcm_set_runtime_buffer(substream, dmab);
        runtime->dma_bytes = size;
        return 1;                        /* area was changed */
}
EXPORT_SYMBOL(snd_pcm_lib_malloc_pages);

/**
 * snd_pcm_lib_free_pages - release the allocated DMA buffer.
 * @substream: the substream to release the DMA buffer
 *
 * Releases the DMA buffer allocated via snd_pcm_lib_malloc_pages().
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;

        if (PCM_RUNTIME_CHECK(substream))
                return -EINVAL;
        runtime = substream->runtime;
        if (runtime->dma_area == NULL)
                return 0;
        if (runtime->dma_buffer_p != &substream->dma_buffer) {
                struct snd_card *card = substream->pcm->card;

                /* it's a newly allocated buffer.  release it now. */
                do_free_pages(card, runtime->dma_buffer_p);
                kfree(runtime->dma_buffer_p);
        }
        snd_pcm_set_runtime_buffer(substream, NULL);
        return 0;
}
EXPORT_SYMBOL(snd_pcm_lib_free_pages);

int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream,
                                      size_t size, gfp_t gfp_flags)
{
        struct snd_pcm_runtime *runtime;

        if (PCM_RUNTIME_CHECK(substream))
                return -EINVAL;
        runtime = substream->runtime;
        if (runtime->dma_area) {
                if (runtime->dma_bytes >= size)
                        return 0; /* already large enough */
                vfree(runtime->dma_area);
        }
        runtime->dma_area = __vmalloc(size, gfp_flags);
        if (!runtime->dma_area)
                return -ENOMEM;
        runtime->dma_bytes = size;
        return 1;
}
EXPORT_SYMBOL(_snd_pcm_lib_alloc_vmalloc_buffer);

/**
 * snd_pcm_lib_free_vmalloc_buffer - free vmalloc buffer
 * @substream: the substream with a buffer allocated by
 *        snd_pcm_lib_alloc_vmalloc_buffer()
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_lib_free_vmalloc_buffer(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;

        if (PCM_RUNTIME_CHECK(substream))
                return -EINVAL;
        runtime = substream->runtime;
        vfree(runtime->dma_area);
        runtime->dma_area = NULL;
        return 0;
}
EXPORT_SYMBOL(snd_pcm_lib_free_vmalloc_buffer);

/**
 * snd_pcm_lib_get_vmalloc_page - map vmalloc buffer offset to page struct
 * @substream: the substream with a buffer allocated by
 *        snd_pcm_lib_alloc_vmalloc_buffer()
 * @offset: offset in the buffer
 *
 * This function is to be used as the page callback in the PCM ops.
 *
 * Return: The page struct, or %NULL on failure.
 */
struct page *snd_pcm_lib_get_vmalloc_page(struct snd_pcm_substream *substream,
                                          unsigned long offset)
{
        return vmalloc_to_page(substream->runtime->dma_area + offset);
}
EXPORT_SYMBOL(snd_pcm_lib_get_vmalloc_page);











































































































































    3 

    3 

    3 







    3 





    3 





    3 
    3 






































































































































































































































































































































































































































































































































































































































































    3 



    3 
    3 
    3 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 



    3 














    3 








    3 
    3 
    3 
    3 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * dvb_frontend.c: DVB frontend tuning interface/thread
 *
 * Copyright (C) 1999-2001 Ralph  Metzler
 *                           Marcus Metzler
 *                           Holger Waechtler
 *                                      for convergence integrated media GmbH
 *
 * Copyright (C) 2004 Andrew de Quincey (tuning thread cleanup)
 */

/* Enables DVBv3 compatibility bits at the headers */
#define __DVB_CORE__

#define pr_fmt(fmt) "dvb_frontend: " fmt

#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/semaphore.h>
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/list.h>
#include <linux/freezer.h>
#include <linux/jiffies.h>
#include <linux/kthread.h>
#include <linux/ktime.h>
#include <linux/compat.h>
#include <asm/processor.h>

#include <media/dvb_frontend.h>
#include <media/dvbdev.h>
#include <linux/dvb/version.h>

static int dvb_frontend_debug;
static int dvb_shutdown_timeout;
static int dvb_force_auto_inversion;
static int dvb_override_tune_delay;
static int dvb_powerdown_on_sleep = 1;
static int dvb_mfe_wait_time = 5;

module_param_named(frontend_debug, dvb_frontend_debug, int, 0644);
MODULE_PARM_DESC(frontend_debug, "Turn on/off frontend core debugging (default:off).");
module_param(dvb_shutdown_timeout, int, 0644);
MODULE_PARM_DESC(dvb_shutdown_timeout, "wait <shutdown_timeout> seconds after close() before suspending hardware");
module_param(dvb_force_auto_inversion, int, 0644);
MODULE_PARM_DESC(dvb_force_auto_inversion, "0: normal (default), 1: INVERSION_AUTO forced always");
module_param(dvb_override_tune_delay, int, 0644);
MODULE_PARM_DESC(dvb_override_tune_delay, "0: normal (default), >0 => delay in milliseconds to wait for lock after a tune attempt");
module_param(dvb_powerdown_on_sleep, int, 0644);
MODULE_PARM_DESC(dvb_powerdown_on_sleep, "0: do not power down, 1: turn LNB voltage off on sleep (default)");
module_param(dvb_mfe_wait_time, int, 0644);
MODULE_PARM_DESC(dvb_mfe_wait_time, "Wait up to <mfe_wait_time> seconds on open() for multi-frontend to become available (default:5 seconds)");

#define dprintk(fmt, arg...) \
        printk(KERN_DEBUG pr_fmt("%s: " fmt), __func__, ##arg)

#define FESTATE_IDLE 1
#define FESTATE_RETUNE 2
#define FESTATE_TUNING_FAST 4
#define FESTATE_TUNING_SLOW 8
#define FESTATE_TUNED 16
#define FESTATE_ZIGZAG_FAST 32
#define FESTATE_ZIGZAG_SLOW 64
#define FESTATE_DISEQC 128
#define FESTATE_ERROR 256
#define FESTATE_WAITFORLOCK (FESTATE_TUNING_FAST | FESTATE_TUNING_SLOW | FESTATE_ZIGZAG_FAST | FESTATE_ZIGZAG_SLOW | FESTATE_DISEQC)
#define FESTATE_SEARCHING_FAST (FESTATE_TUNING_FAST | FESTATE_ZIGZAG_FAST)
#define FESTATE_SEARCHING_SLOW (FESTATE_TUNING_SLOW | FESTATE_ZIGZAG_SLOW)
#define FESTATE_LOSTLOCK (FESTATE_ZIGZAG_FAST | FESTATE_ZIGZAG_SLOW)

/*
 * FESTATE_IDLE. No tuning parameters have been supplied and the loop is idling.
 * FESTATE_RETUNE. Parameters have been supplied, but we have not yet performed the first tune.
 * FESTATE_TUNING_FAST. Tuning parameters have been supplied and fast zigzag scan is in progress.
 * FESTATE_TUNING_SLOW. Tuning parameters have been supplied. Fast zigzag failed, so we're trying again, but slower.
 * FESTATE_TUNED. The frontend has successfully locked on.
 * FESTATE_ZIGZAG_FAST. The lock has been lost, and a fast zigzag has been initiated to try and regain it.
 * FESTATE_ZIGZAG_SLOW. The lock has been lost. Fast zigzag has been failed, so we're trying again, but slower.
 * FESTATE_DISEQC. A DISEQC command has just been issued.
 * FESTATE_WAITFORLOCK. When we're waiting for a lock.
 * FESTATE_SEARCHING_FAST. When we're searching for a signal using a fast zigzag scan.
 * FESTATE_SEARCHING_SLOW. When we're searching for a signal using a slow zigzag scan.
 * FESTATE_LOSTLOCK. When the lock has been lost, and we're searching it again.
 */

static DEFINE_MUTEX(frontend_mutex);

struct dvb_frontend_private {
        /* thread/frontend values */
        struct dvb_device *dvbdev;
        struct dvb_frontend_parameters parameters_out;
        struct dvb_fe_events events;
        struct semaphore sem;
        struct list_head list_head;
        wait_queue_head_t wait_queue;
        struct task_struct *thread;
        unsigned long release_jiffies;
        unsigned int wakeup;
        enum fe_status status;
        unsigned long tune_mode_flags;
        unsigned int delay;
        unsigned int reinitialise;
        int tone;
        int voltage;

        /* swzigzag values */
        unsigned int state;
        unsigned int bending;
        int lnb_drift;
        unsigned int inversion;
        unsigned int auto_step;
        unsigned int auto_sub_step;
        unsigned int started_auto_step;
        unsigned int min_delay;
        unsigned int max_drift;
        unsigned int step_size;
        int quality;
        unsigned int check_wrapped;
        enum dvbfe_search algo_status;

#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
        struct media_pipeline pipe;
#endif
};

static void dvb_frontend_invoke_release(struct dvb_frontend *fe,
                                        void (*release)(struct dvb_frontend *fe));

static void __dvb_frontend_free(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        if (fepriv)
                dvb_device_put(fepriv->dvbdev);

        dvb_frontend_invoke_release(fe, fe->ops.release);

        kfree(fepriv);
}

static void dvb_frontend_free(struct kref *ref)
{
        struct dvb_frontend *fe =
                container_of(ref, struct dvb_frontend, refcount);

        __dvb_frontend_free(fe);
}

static void dvb_frontend_put(struct dvb_frontend *fe)
{
        /* call detach before dropping the reference count */
        if (fe->ops.detach)
                fe->ops.detach(fe);
        /*
         * Check if the frontend was registered, as otherwise
         * kref was not initialized yet.
         */
        if (fe->frontend_priv)
                kref_put(&fe->refcount, dvb_frontend_free);
        else
                __dvb_frontend_free(fe);
}

static void dvb_frontend_get(struct dvb_frontend *fe)
{
        kref_get(&fe->refcount);
}

static void dvb_frontend_wakeup(struct dvb_frontend *fe);
static int dtv_get_frontend(struct dvb_frontend *fe,
                            struct dtv_frontend_properties *c,
                            struct dvb_frontend_parameters *p_out);
static int
dtv_property_legacy_params_sync(struct dvb_frontend *fe,
                                const struct dtv_frontend_properties *c,
                                struct dvb_frontend_parameters *p);

static bool has_get_frontend(struct dvb_frontend *fe)
{
        return fe->ops.get_frontend;
}

/*
 * Due to DVBv3 API calls, a delivery system should be mapped into one of
 * the 4 DVBv3 delivery systems (FE_QPSK, FE_QAM, FE_OFDM or FE_ATSC),
 * otherwise, a DVBv3 call will fail.
 */
enum dvbv3_emulation_type {
        DVBV3_UNKNOWN,
        DVBV3_QPSK,
        DVBV3_QAM,
        DVBV3_OFDM,
        DVBV3_ATSC,
};

static enum dvbv3_emulation_type dvbv3_type(u32 delivery_system)
{
        switch (delivery_system) {
        case SYS_DVBC_ANNEX_A:
        case SYS_DVBC_ANNEX_C:
                return DVBV3_QAM;
        case SYS_DVBS:
        case SYS_DVBS2:
        case SYS_TURBO:
        case SYS_ISDBS:
        case SYS_DSS:
                return DVBV3_QPSK;
        case SYS_DVBT:
        case SYS_DVBT2:
        case SYS_ISDBT:
        case SYS_DTMB:
                return DVBV3_OFDM;
        case SYS_ATSC:
        case SYS_ATSCMH:
        case SYS_DVBC_ANNEX_B:
                return DVBV3_ATSC;
        case SYS_UNDEFINED:
        case SYS_ISDBC:
        case SYS_DVBH:
        case SYS_DAB:
        default:
                /*
                 * Doesn't know how to emulate those types and/or
                 * there's no frontend driver from this type yet
                 * with some emulation code, so, we're not sure yet how
                 * to handle them, or they're not compatible with a DVBv3 call.
                 */
                return DVBV3_UNKNOWN;
        }
}

static void dvb_frontend_add_event(struct dvb_frontend *fe,
                                   enum fe_status status)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        struct dvb_fe_events *events = &fepriv->events;
        struct dvb_frontend_event *e;
        int wp;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if ((status & FE_HAS_LOCK) && has_get_frontend(fe))
                dtv_get_frontend(fe, c, &fepriv->parameters_out);

        mutex_lock(&events->mtx);

        wp = (events->eventw + 1) % MAX_EVENT;
        if (wp == events->eventr) {
                events->overflow = 1;
                events->eventr = (events->eventr + 1) % MAX_EVENT;
        }

        e = &events->events[events->eventw];
        e->status = status;
        e->parameters = fepriv->parameters_out;

        events->eventw = wp;

        mutex_unlock(&events->mtx);

        wake_up_interruptible(&events->wait_queue);
}

static int dvb_frontend_test_event(struct dvb_frontend_private *fepriv,
                                   struct dvb_fe_events *events)
{
        int ret;

        up(&fepriv->sem);
        ret = events->eventw != events->eventr;
        down(&fepriv->sem);

        return ret;
}

static int dvb_frontend_get_event(struct dvb_frontend *fe,
                                  struct dvb_frontend_event *event, int flags)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dvb_fe_events *events = &fepriv->events;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if (events->overflow) {
                events->overflow = 0;
                return -EOVERFLOW;
        }

        if (events->eventw == events->eventr) {
                struct wait_queue_entry wait;
                int ret = 0;

                if (flags & O_NONBLOCK)
                        return -EWOULDBLOCK;

                init_waitqueue_entry(&wait, current);
                add_wait_queue(&events->wait_queue, &wait);
                while (!dvb_frontend_test_event(fepriv, events)) {
                        wait_woken(&wait, TASK_INTERRUPTIBLE, 0);
                        if (signal_pending(current)) {
                                ret = -ERESTARTSYS;
                                break;
                        }
                }
                remove_wait_queue(&events->wait_queue, &wait);
                if (ret < 0)
                        return ret;
        }

        mutex_lock(&events->mtx);
        *event = events->events[events->eventr];
        events->eventr = (events->eventr + 1) % MAX_EVENT;
        mutex_unlock(&events->mtx);

        return 0;
}

static void dvb_frontend_clear_events(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dvb_fe_events *events = &fepriv->events;

        mutex_lock(&events->mtx);
        events->eventr = events->eventw;
        mutex_unlock(&events->mtx);
}

static void dvb_frontend_init(struct dvb_frontend *fe)
{
        dev_dbg(fe->dvb->device,
                "%s: initialising adapter %i frontend %i (%s)...\n",
                __func__, fe->dvb->num, fe->id, fe->ops.info.name);

        if (fe->ops.init)
                fe->ops.init(fe);
        if (fe->ops.tuner_ops.init) {
                if (fe->ops.i2c_gate_ctrl)
                        fe->ops.i2c_gate_ctrl(fe, 1);
                fe->ops.tuner_ops.init(fe);
                if (fe->ops.i2c_gate_ctrl)
                        fe->ops.i2c_gate_ctrl(fe, 0);
        }
}

void dvb_frontend_reinitialise(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        fepriv->reinitialise = 1;
        dvb_frontend_wakeup(fe);
}
EXPORT_SYMBOL(dvb_frontend_reinitialise);

static void dvb_frontend_swzigzag_update_delay(struct dvb_frontend_private *fepriv, int locked)
{
        int q2;
        struct dvb_frontend *fe = fepriv->dvbdev->priv;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if (locked)
                (fepriv->quality) = (fepriv->quality * 220 + 36 * 256) / 256;
        else
                (fepriv->quality) = (fepriv->quality * 220 + 0) / 256;

        q2 = fepriv->quality - 128;
        q2 *= q2;

        fepriv->delay = fepriv->min_delay + q2 * HZ / (128 * 128);
}

/**
 * dvb_frontend_swzigzag_autotune - Performs automatic twiddling of frontend
 *        parameters.
 *
 * @fe: The frontend concerned.
 * @check_wrapped: Checks if an iteration has completed.
 *                   DO NOT SET ON THE FIRST ATTEMPT.
 *
 * return: Number of complete iterations that have been performed.
 */
static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wrapped)
{
        int autoinversion;
        int ready = 0;
        int fe_set_err = 0;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache, tmp;
        int original_inversion = c->inversion;
        u32 original_frequency = c->frequency;

        /* are we using autoinversion? */
        autoinversion = ((!(fe->ops.info.caps & FE_CAN_INVERSION_AUTO)) &&
                         (c->inversion == INVERSION_AUTO));

        /* setup parameters correctly */
        while (!ready) {
                /* calculate the lnb_drift */
                fepriv->lnb_drift = fepriv->auto_step * fepriv->step_size;

                /* wrap the auto_step if we've exceeded the maximum drift */
                if (fepriv->lnb_drift > fepriv->max_drift) {
                        fepriv->auto_step = 0;
                        fepriv->auto_sub_step = 0;
                        fepriv->lnb_drift = 0;
                }

                /* perform inversion and +/- zigzag */
                switch (fepriv->auto_sub_step) {
                case 0:
                        /* try with the current inversion and current drift setting */
                        ready = 1;
                        break;

                case 1:
                        if (!autoinversion) break;

                        fepriv->inversion = (fepriv->inversion == INVERSION_OFF) ? INVERSION_ON : INVERSION_OFF;
                        ready = 1;
                        break;

                case 2:
                        if (fepriv->lnb_drift == 0) break;

                        fepriv->lnb_drift = -fepriv->lnb_drift;
                        ready = 1;
                        break;

                case 3:
                        if (fepriv->lnb_drift == 0) break;
                        if (!autoinversion) break;

                        fepriv->inversion = (fepriv->inversion == INVERSION_OFF) ? INVERSION_ON : INVERSION_OFF;
                        fepriv->lnb_drift = -fepriv->lnb_drift;
                        ready = 1;
                        break;

                default:
                        fepriv->auto_step++;
                        fepriv->auto_sub_step = -1; /* it'll be incremented to 0 in a moment */
                        break;
                }

                if (!ready) fepriv->auto_sub_step++;
        }

        /* if this attempt would hit where we started, indicate a complete
         * iteration has occurred */
        if ((fepriv->auto_step == fepriv->started_auto_step) &&
            (fepriv->auto_sub_step == 0) && check_wrapped) {
                return 1;
        }

        dev_dbg(fe->dvb->device,
                "%s: drift:%i inversion:%i auto_step:%i auto_sub_step:%i started_auto_step:%i\n",
                __func__, fepriv->lnb_drift, fepriv->inversion,
                fepriv->auto_step, fepriv->auto_sub_step,
                fepriv->started_auto_step);

        /* set the frontend itself */
        c->frequency += fepriv->lnb_drift;
        if (autoinversion)
                c->inversion = fepriv->inversion;
        tmp = *c;
        if (fe->ops.set_frontend)
                fe_set_err = fe->ops.set_frontend(fe);
        *c = tmp;
        if (fe_set_err < 0) {
                fepriv->state = FESTATE_ERROR;
                return fe_set_err;
        }

        c->frequency = original_frequency;
        c->inversion = original_inversion;

        fepriv->auto_sub_step++;
        return 0;
}

static void dvb_frontend_swzigzag(struct dvb_frontend *fe)
{
        enum fe_status s = FE_NONE;
        int retval = 0;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache, tmp;

        if (fepriv->max_drift)
                dev_warn_once(fe->dvb->device,
                              "Frontend requested software zigzag, but didn't set the frequency step size\n");

        /* if we've got no parameters, just keep idling */
        if (fepriv->state & FESTATE_IDLE) {
                fepriv->delay = 3 * HZ;
                fepriv->quality = 0;
                return;
        }

        /* in SCAN mode, we just set the frontend when asked and leave it alone */
        if (fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT) {
                if (fepriv->state & FESTATE_RETUNE) {
                        tmp = *c;
                        if (fe->ops.set_frontend)
                                retval = fe->ops.set_frontend(fe);
                        *c = tmp;
                        if (retval < 0)
                                fepriv->state = FESTATE_ERROR;
                        else
                                fepriv->state = FESTATE_TUNED;
                }
                fepriv->delay = 3 * HZ;
                fepriv->quality = 0;
                return;
        }

        /* get the frontend status */
        if (fepriv->state & FESTATE_RETUNE) {
                s = 0;
        } else {
                if (fe->ops.read_status)
                        fe->ops.read_status(fe, &s);
                if (s != fepriv->status) {
                        dvb_frontend_add_event(fe, s);
                        fepriv->status = s;
                }
        }

        /* if we're not tuned, and we have a lock, move to the TUNED state */
        if ((fepriv->state & FESTATE_WAITFORLOCK) && (s & FE_HAS_LOCK)) {
                dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK);
                fepriv->state = FESTATE_TUNED;

                /* if we're tuned, then we have determined the correct inversion */
                if ((!(fe->ops.info.caps & FE_CAN_INVERSION_AUTO)) &&
                    (c->inversion == INVERSION_AUTO)) {
                        c->inversion = fepriv->inversion;
                }
                return;
        }

        /* if we are tuned already, check we're still locked */
        if (fepriv->state & FESTATE_TUNED) {
                dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK);

                /* we're tuned, and the lock is still good... */
                if (s & FE_HAS_LOCK) {
                        return;
                } else { /* if we _WERE_ tuned, but now don't have a lock */
                        fepriv->state = FESTATE_ZIGZAG_FAST;
                        fepriv->started_auto_step = fepriv->auto_step;
                        fepriv->check_wrapped = 0;
                }
        }

        /* don't actually do anything if we're in the LOSTLOCK state,
         * the frontend is set to FE_CAN_RECOVER, and the max_drift is 0 */
        if ((fepriv->state & FESTATE_LOSTLOCK) &&
            (fe->ops.info.caps & FE_CAN_RECOVER) && (fepriv->max_drift == 0)) {
                dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK);
                return;
        }

        /* don't do anything if we're in the DISEQC state, since this
         * might be someone with a motorized dish controlled by DISEQC.
         * If its actually a re-tune, there will be a SET_FRONTEND soon enough.        */
        if (fepriv->state & FESTATE_DISEQC) {
                dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK);
                return;
        }

        /* if we're in the RETUNE state, set everything up for a brand
         * new scan, keeping the current inversion setting, as the next
         * tune is _very_ likely to require the same */
        if (fepriv->state & FESTATE_RETUNE) {
                fepriv->lnb_drift = 0;
                fepriv->auto_step = 0;
                fepriv->auto_sub_step = 0;
                fepriv->started_auto_step = 0;
                fepriv->check_wrapped = 0;
        }

        /* fast zigzag. */
        if ((fepriv->state & FESTATE_SEARCHING_FAST) || (fepriv->state & FESTATE_RETUNE)) {
                fepriv->delay = fepriv->min_delay;

                /* perform a tune */
                retval = dvb_frontend_swzigzag_autotune(fe,
                                                        fepriv->check_wrapped);
                if (retval < 0) {
                        return;
                } else if (retval) {
                        /* OK, if we've run out of trials at the fast speed.
                         * Drop back to slow for the _next_ attempt */
                        fepriv->state = FESTATE_SEARCHING_SLOW;
                        fepriv->started_auto_step = fepriv->auto_step;
                        return;
                }
                fepriv->check_wrapped = 1;

                /* if we've just re-tuned, enter the ZIGZAG_FAST state.
                 * This ensures we cannot return from an
                 * FE_SET_FRONTEND ioctl before the first frontend tune
                 * occurs */
                if (fepriv->state & FESTATE_RETUNE) {
                        fepriv->state = FESTATE_TUNING_FAST;
                }
        }

        /* slow zigzag */
        if (fepriv->state & FESTATE_SEARCHING_SLOW) {
                dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK);

                /* Note: don't bother checking for wrapping; we stay in this
                 * state until we get a lock */
                dvb_frontend_swzigzag_autotune(fe, 0);
        }
}

static int dvb_frontend_is_exiting(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        if (fe->exit != DVB_FE_NO_EXIT)
                return 1;

        if (fepriv->dvbdev->writers == 1)
                if (time_after_eq(jiffies, fepriv->release_jiffies +
                                  dvb_shutdown_timeout * HZ))
                        return 1;

        return 0;
}

static int dvb_frontend_should_wakeup(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        if (fepriv->wakeup) {
                fepriv->wakeup = 0;
                return 1;
        }
        return dvb_frontend_is_exiting(fe);
}

static void dvb_frontend_wakeup(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        fepriv->wakeup = 1;
        wake_up_interruptible(&fepriv->wait_queue);
}

static int dvb_frontend_thread(void *data)
{
        struct dvb_frontend *fe = data;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        enum fe_status s = FE_NONE;
        enum dvbfe_algo algo;
        bool re_tune = false;
        bool semheld = false;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        fepriv->check_wrapped = 0;
        fepriv->quality = 0;
        fepriv->delay = 3 * HZ;
        fepriv->status = 0;
        fepriv->wakeup = 0;
        fepriv->reinitialise = 0;

        dvb_frontend_init(fe);

        set_freezable();
        while (1) {
                up(&fepriv->sem);            /* is locked when we enter the thread... */
                wait_event_freezable_timeout(fepriv->wait_queue,
                                             dvb_frontend_should_wakeup(fe) ||
                                             kthread_should_stop(),
                                             fepriv->delay);

                if (kthread_should_stop() || dvb_frontend_is_exiting(fe)) {
                        /* got signal or quitting */
                        if (!down_interruptible(&fepriv->sem))
                                semheld = true;
                        fe->exit = DVB_FE_NORMAL_EXIT;
                        break;
                }

                if (down_interruptible(&fepriv->sem))
                        break;

                if (fepriv->reinitialise) {
                        dvb_frontend_init(fe);
                        if (fe->ops.set_tone && fepriv->tone != -1)
                                fe->ops.set_tone(fe, fepriv->tone);
                        if (fe->ops.set_voltage && fepriv->voltage != -1)
                                fe->ops.set_voltage(fe, fepriv->voltage);
                        fepriv->reinitialise = 0;
                }

                /* do an iteration of the tuning loop */
                if (fe->ops.get_frontend_algo) {
                        algo = fe->ops.get_frontend_algo(fe);
                        switch (algo) {
                        case DVBFE_ALGO_HW:
                                dev_dbg(fe->dvb->device, "%s: Frontend ALGO = DVBFE_ALGO_HW\n", __func__);

                                if (fepriv->state & FESTATE_RETUNE) {
                                        dev_dbg(fe->dvb->device, "%s: Retune requested, FESTATE_RETUNE\n", __func__);
                                        re_tune = true;
                                        fepriv->state = FESTATE_TUNED;
                                } else {
                                        re_tune = false;
                                }

                                if (fe->ops.tune)
                                        fe->ops.tune(fe, re_tune, fepriv->tune_mode_flags, &fepriv->delay, &s);

                                if (s != fepriv->status && !(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT)) {
                                        dev_dbg(fe->dvb->device, "%s: state changed, adding current state\n", __func__);
                                        dvb_frontend_add_event(fe, s);
                                        fepriv->status = s;
                                }
                                break;
                        case DVBFE_ALGO_SW:
                                dev_dbg(fe->dvb->device, "%s: Frontend ALGO = DVBFE_ALGO_SW\n", __func__);
                                dvb_frontend_swzigzag(fe);
                                break;
                        case DVBFE_ALGO_CUSTOM:
                                dev_dbg(fe->dvb->device, "%s: Frontend ALGO = DVBFE_ALGO_CUSTOM, state=%d\n", __func__, fepriv->state);
                                if (fepriv->state & FESTATE_RETUNE) {
                                        dev_dbg(fe->dvb->device, "%s: Retune requested, FESTAT_RETUNE\n", __func__);
                                        fepriv->state = FESTATE_TUNED;
                                }
                                /* Case where we are going to search for a carrier
                                 * User asked us to retune again for some reason, possibly
                                 * requesting a search with a new set of parameters
                                 */
                                if (fepriv->algo_status & DVBFE_ALGO_SEARCH_AGAIN) {
                                        if (fe->ops.search) {
                                                fepriv->algo_status = fe->ops.search(fe);
                                                /* We did do a search as was requested, the flags are
                                                 * now unset as well and has the flags wrt to search.
                                                 */
                                        } else {
                                                fepriv->algo_status &= ~DVBFE_ALGO_SEARCH_AGAIN;
                                        }
                                }
                                /* Track the carrier if the search was successful */
                                if (fepriv->algo_status != DVBFE_ALGO_SEARCH_SUCCESS) {
                                        fepriv->algo_status |= DVBFE_ALGO_SEARCH_AGAIN;
                                        fepriv->delay = HZ / 2;
                                }
                                dtv_property_legacy_params_sync(fe, c, &fepriv->parameters_out);
                                fe->ops.read_status(fe, &s);
                                if (s != fepriv->status) {
                                        dvb_frontend_add_event(fe, s); /* update event list */
                                        fepriv->status = s;
                                        if (!(s & FE_HAS_LOCK)) {
                                                fepriv->delay = HZ / 10;
                                                fepriv->algo_status |= DVBFE_ALGO_SEARCH_AGAIN;
                                        } else {
                                                fepriv->delay = 60 * HZ;
                                        }
                                }
                                break;
                        default:
                                dev_dbg(fe->dvb->device, "%s: UNDEFINED ALGO !\n", __func__);
                                break;
                        }
                } else {
                        dvb_frontend_swzigzag(fe);
                }
        }

        if (dvb_powerdown_on_sleep) {
                if (fe->ops.set_voltage)
                        fe->ops.set_voltage(fe, SEC_VOLTAGE_OFF);
                if (fe->ops.tuner_ops.sleep) {
                        if (fe->ops.i2c_gate_ctrl)
                                fe->ops.i2c_gate_ctrl(fe, 1);
                        fe->ops.tuner_ops.sleep(fe);
                        if (fe->ops.i2c_gate_ctrl)
                                fe->ops.i2c_gate_ctrl(fe, 0);
                }
                if (fe->ops.sleep)
                        fe->ops.sleep(fe);
        }

        fepriv->thread = NULL;
        if (kthread_should_stop())
                fe->exit = DVB_FE_DEVICE_REMOVED;
        else
                fe->exit = DVB_FE_NO_EXIT;
        mb();

        if (semheld)
                up(&fepriv->sem);
        dvb_frontend_wakeup(fe);
        return 0;
}

static void dvb_frontend_stop(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if (fe->exit != DVB_FE_DEVICE_REMOVED)
                fe->exit = DVB_FE_NORMAL_EXIT;
        mb();

        if (!fepriv->thread)
                return;

        kthread_stop(fepriv->thread);

        sema_init(&fepriv->sem, 1);
        fepriv->state = FESTATE_IDLE;

        /* paranoia check in case a signal arrived */
        if (fepriv->thread)
                dev_warn(fe->dvb->device,
                         "dvb_frontend_stop: warning: thread %p won't exit\n",
                         fepriv->thread);
}

/*
 * Sleep for the amount of time given by add_usec parameter
 *
 * This needs to be as precise as possible, as it affects the detection of
 * the dish tone command at the satellite subsystem. The precision is improved
 * by using a scheduled msleep followed by udelay for the remainder.
 */
void dvb_frontend_sleep_until(ktime_t *waketime, u32 add_usec)
{
        s32 delta;

        *waketime = ktime_add_us(*waketime, add_usec);
        delta = ktime_us_delta(ktime_get_boottime(), *waketime);
        if (delta > 2500) {
                msleep((delta - 1500) / 1000);
                delta = ktime_us_delta(ktime_get_boottime(), *waketime);
        }
        if (delta > 0)
                udelay(delta);
}
EXPORT_SYMBOL(dvb_frontend_sleep_until);

static int dvb_frontend_start(struct dvb_frontend *fe)
{
        int ret;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct task_struct *fe_thread;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if (fepriv->thread) {
                if (fe->exit == DVB_FE_NO_EXIT)
                        return 0;
                else
                        dvb_frontend_stop(fe);
        }

        if (signal_pending(current))
                return -EINTR;
        if (down_interruptible(&fepriv->sem))
                return -EINTR;

        fepriv->state = FESTATE_IDLE;
        fe->exit = DVB_FE_NO_EXIT;
        fepriv->thread = NULL;
        mb();

        fe_thread = kthread_run(dvb_frontend_thread, fe,
                                "kdvb-ad-%i-fe-%i", fe->dvb->num, fe->id);
        if (IS_ERR(fe_thread)) {
                ret = PTR_ERR(fe_thread);
                dev_warn(fe->dvb->device,
                         "dvb_frontend_start: failed to start kthread (%d)\n",
                         ret);
                up(&fepriv->sem);
                return ret;
        }
        fepriv->thread = fe_thread;
        return 0;
}

static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe,
                                              u32 *freq_min, u32 *freq_max,
                                              u32 *tolerance)
{
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        u32 tuner_min = fe->ops.tuner_ops.info.frequency_min_hz;
        u32 tuner_max = fe->ops.tuner_ops.info.frequency_max_hz;
        u32 frontend_min = fe->ops.info.frequency_min_hz;
        u32 frontend_max = fe->ops.info.frequency_max_hz;

        *freq_min = max(frontend_min, tuner_min);

        if (frontend_max == 0)
                *freq_max = tuner_max;
        else if (tuner_max == 0)
                *freq_max = frontend_max;
        else
                *freq_max = min(frontend_max, tuner_max);

        if (*freq_min == 0 || *freq_max == 0)
                dev_warn(fe->dvb->device,
                         "DVB: adapter %i frontend %u frequency limits undefined - fix the driver\n",
                         fe->dvb->num, fe->id);

        dev_dbg(fe->dvb->device, "frequency interval: tuner: %u...%u, frontend: %u...%u",
                tuner_min, tuner_max, frontend_min, frontend_max);

        /* If the standard is for satellite, convert frequencies to kHz */
        switch (c->delivery_system) {
        case SYS_DSS:
        case SYS_DVBS:
        case SYS_DVBS2:
        case SYS_TURBO:
        case SYS_ISDBS:
                *freq_min /= kHz;
                *freq_max /= kHz;
                if (tolerance)
                        *tolerance = fe->ops.info.frequency_tolerance_hz / kHz;

                break;
        default:
                if (tolerance)
                        *tolerance = fe->ops.info.frequency_tolerance_hz;
                break;
        }
}

static u32 dvb_frontend_get_stepsize(struct dvb_frontend *fe)
{
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        u32 fe_step = fe->ops.info.frequency_stepsize_hz;
        u32 tuner_step = fe->ops.tuner_ops.info.frequency_step_hz;
        u32 step = max(fe_step, tuner_step);

        switch (c->delivery_system) {
        case SYS_DSS:
        case SYS_DVBS:
        case SYS_DVBS2:
        case SYS_TURBO:
        case SYS_ISDBS:
                step /= kHz;
                break;
        default:
                break;
        }

        return step;
}

static int dvb_frontend_check_parameters(struct dvb_frontend *fe)
{
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        u32 freq_min;
        u32 freq_max;

        /* range check: frequency */
        dvb_frontend_get_frequency_limits(fe, &freq_min, &freq_max, NULL);
        if ((freq_min && c->frequency < freq_min) ||
            (freq_max && c->frequency > freq_max)) {
                dev_warn(fe->dvb->device, "DVB: adapter %i frontend %i frequency %u out of range (%u..%u)\n",
                         fe->dvb->num, fe->id, c->frequency,
                         freq_min, freq_max);
                return -EINVAL;
        }

        /* range check: symbol rate */
        switch (c->delivery_system) {
        case SYS_DSS:
        case SYS_DVBS:
        case SYS_DVBS2:
        case SYS_TURBO:
        case SYS_DVBC_ANNEX_A:
        case SYS_DVBC_ANNEX_C:
                if ((fe->ops.info.symbol_rate_min &&
                     c->symbol_rate < fe->ops.info.symbol_rate_min) ||
                    (fe->ops.info.symbol_rate_max &&
                     c->symbol_rate > fe->ops.info.symbol_rate_max)) {
                        dev_warn(fe->dvb->device, "DVB: adapter %i frontend %i symbol rate %u out of range (%u..%u)\n",
                                 fe->dvb->num, fe->id, c->symbol_rate,
                                 fe->ops.info.symbol_rate_min,
                                 fe->ops.info.symbol_rate_max);
                        return -EINVAL;
                }
                break;
        default:
                break;
        }

        return 0;
}

static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
{
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        int i;
        u32 delsys;

        delsys = c->delivery_system;
        memset(c, 0, offsetof(struct dtv_frontend_properties, strength));
        c->delivery_system = delsys;

        dev_dbg(fe->dvb->device, "%s: Clearing cache for delivery system %d\n",
                __func__, c->delivery_system);

        c->transmission_mode = TRANSMISSION_MODE_AUTO;
        c->bandwidth_hz = 0;        /* AUTO */
        c->guard_interval = GUARD_INTERVAL_AUTO;
        c->hierarchy = HIERARCHY_AUTO;
        c->symbol_rate = 0;
        c->code_rate_HP = FEC_AUTO;
        c->code_rate_LP = FEC_AUTO;
        c->fec_inner = FEC_AUTO;
        c->rolloff = ROLLOFF_AUTO;
        c->voltage = SEC_VOLTAGE_OFF;
        c->sectone = SEC_TONE_OFF;
        c->pilot = PILOT_AUTO;

        c->isdbt_partial_reception = 0;
        c->isdbt_sb_mode = 0;
        c->isdbt_sb_subchannel = 0;
        c->isdbt_sb_segment_idx = 0;
        c->isdbt_sb_segment_count = 0;
        c->isdbt_layer_enabled = 7;        /* All layers (A,B,C) */
        for (i = 0; i < 3; i++) {
                c->layer[i].fec = FEC_AUTO;
                c->layer[i].modulation = QAM_AUTO;
                c->layer[i].interleaving = 0;
                c->layer[i].segment_count = 0;
        }

        c->stream_id = NO_STREAM_ID_FILTER;
        c->scrambling_sequence_index = 0;/* default sequence */

        switch (c->delivery_system) {
        case SYS_DSS:
                c->modulation = QPSK;
                c->rolloff = ROLLOFF_20;
                break;
        case SYS_DVBS:
        case SYS_DVBS2:
        case SYS_TURBO:
                c->modulation = QPSK;   /* implied for DVB-S in legacy API */
                c->rolloff = ROLLOFF_35;/* implied for DVB-S */
                break;
        case SYS_ATSC:
                c->modulation = VSB_8;
                break;
        case SYS_ISDBS:
                c->symbol_rate = 28860000;
                c->rolloff = ROLLOFF_35;
                c->bandwidth_hz = c->symbol_rate / 100 * 135;
                break;
        default:
                c->modulation = QAM_AUTO;
                break;
        }

        c->lna = LNA_AUTO;

        return 0;
}

#define _DTV_CMD(n) \
        [n] =  #n

static char *dtv_cmds[DTV_MAX_COMMAND + 1] = {
        _DTV_CMD(DTV_TUNE),
        _DTV_CMD(DTV_CLEAR),

        /* Set */
        _DTV_CMD(DTV_FREQUENCY),
        _DTV_CMD(DTV_BANDWIDTH_HZ),
        _DTV_CMD(DTV_MODULATION),
        _DTV_CMD(DTV_INVERSION),
        _DTV_CMD(DTV_DISEQC_MASTER),
        _DTV_CMD(DTV_SYMBOL_RATE),
        _DTV_CMD(DTV_INNER_FEC),
        _DTV_CMD(DTV_VOLTAGE),
        _DTV_CMD(DTV_TONE),
        _DTV_CMD(DTV_PILOT),
        _DTV_CMD(DTV_ROLLOFF),
        _DTV_CMD(DTV_DELIVERY_SYSTEM),
        _DTV_CMD(DTV_HIERARCHY),
        _DTV_CMD(DTV_CODE_RATE_HP),
        _DTV_CMD(DTV_CODE_RATE_LP),
        _DTV_CMD(DTV_GUARD_INTERVAL),
        _DTV_CMD(DTV_TRANSMISSION_MODE),
        _DTV_CMD(DTV_INTERLEAVING),

        _DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION),
        _DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING),
        _DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID),
        _DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX),
        _DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT),
        _DTV_CMD(DTV_ISDBT_LAYER_ENABLED),
        _DTV_CMD(DTV_ISDBT_LAYERA_FEC),
        _DTV_CMD(DTV_ISDBT_LAYERA_MODULATION),
        _DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT),
        _DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING),
        _DTV_CMD(DTV_ISDBT_LAYERB_FEC),
        _DTV_CMD(DTV_ISDBT_LAYERB_MODULATION),
        _DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT),
        _DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING),
        _DTV_CMD(DTV_ISDBT_LAYERC_FEC),
        _DTV_CMD(DTV_ISDBT_LAYERC_MODULATION),
        _DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT),
        _DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING),

        _DTV_CMD(DTV_STREAM_ID),
        _DTV_CMD(DTV_DVBT2_PLP_ID_LEGACY),
        _DTV_CMD(DTV_SCRAMBLING_SEQUENCE_INDEX),
        _DTV_CMD(DTV_LNA),

        /* Get */
        _DTV_CMD(DTV_DISEQC_SLAVE_REPLY),
        _DTV_CMD(DTV_API_VERSION),

        _DTV_CMD(DTV_ENUM_DELSYS),

        _DTV_CMD(DTV_ATSCMH_PARADE_ID),
        _DTV_CMD(DTV_ATSCMH_RS_FRAME_ENSEMBLE),

        _DTV_CMD(DTV_ATSCMH_FIC_VER),
        _DTV_CMD(DTV_ATSCMH_NOG),
        _DTV_CMD(DTV_ATSCMH_TNOG),
        _DTV_CMD(DTV_ATSCMH_SGN),
        _DTV_CMD(DTV_ATSCMH_PRC),
        _DTV_CMD(DTV_ATSCMH_RS_FRAME_MODE),
        _DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_PRI),
        _DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_SEC),
        _DTV_CMD(DTV_ATSCMH_SCCC_BLOCK_MODE),
        _DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_A),
        _DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_B),
        _DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_C),
        _DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_D),

        /* Statistics API */
        _DTV_CMD(DTV_STAT_SIGNAL_STRENGTH),
        _DTV_CMD(DTV_STAT_CNR),
        _DTV_CMD(DTV_STAT_PRE_ERROR_BIT_COUNT),
        _DTV_CMD(DTV_STAT_PRE_TOTAL_BIT_COUNT),
        _DTV_CMD(DTV_STAT_POST_ERROR_BIT_COUNT),
        _DTV_CMD(DTV_STAT_POST_TOTAL_BIT_COUNT),
        _DTV_CMD(DTV_STAT_ERROR_BLOCK_COUNT),
        _DTV_CMD(DTV_STAT_TOTAL_BLOCK_COUNT),
};

static char *dtv_cmd_name(u32 cmd)
{
        cmd = array_index_nospec(cmd, DTV_MAX_COMMAND);
        return dtv_cmds[cmd];
}

/* Synchronise the legacy tuning parameters into the cache, so that demodulator
 * drivers can use a single set_frontend tuning function, regardless of whether
 * it's being used for the legacy or new API, reducing code and complexity.
 */
static int dtv_property_cache_sync(struct dvb_frontend *fe,
                                   struct dtv_frontend_properties *c,
                                   const struct dvb_frontend_parameters *p)
{
        c->frequency = p->frequency;
        c->inversion = p->inversion;

        switch (dvbv3_type(c->delivery_system)) {
        case DVBV3_QPSK:
                dev_dbg(fe->dvb->device, "%s: Preparing QPSK req\n", __func__);
                c->symbol_rate = p->u.qpsk.symbol_rate;
                c->fec_inner = p->u.qpsk.fec_inner;
                break;
        case DVBV3_QAM:
                dev_dbg(fe->dvb->device, "%s: Preparing QAM req\n", __func__);
                c->symbol_rate = p->u.qam.symbol_rate;
                c->fec_inner = p->u.qam.fec_inner;
                c->modulation = p->u.qam.modulation;
                break;
        case DVBV3_OFDM:
                dev_dbg(fe->dvb->device, "%s: Preparing OFDM req\n", __func__);

                switch (p->u.ofdm.bandwidth) {
                case BANDWIDTH_10_MHZ:
                        c->bandwidth_hz = 10000000;
                        break;
                case BANDWIDTH_8_MHZ:
                        c->bandwidth_hz = 8000000;
                        break;
                case BANDWIDTH_7_MHZ:
                        c->bandwidth_hz = 7000000;
                        break;
                case BANDWIDTH_6_MHZ:
                        c->bandwidth_hz = 6000000;
                        break;
                case BANDWIDTH_5_MHZ:
                        c->bandwidth_hz = 5000000;
                        break;
                case BANDWIDTH_1_712_MHZ:
                        c->bandwidth_hz = 1712000;
                        break;
                case BANDWIDTH_AUTO:
                        c->bandwidth_hz = 0;
                }

                c->code_rate_HP = p->u.ofdm.code_rate_HP;
                c->code_rate_LP = p->u.ofdm.code_rate_LP;
                c->modulation = p->u.ofdm.constellation;
                c->transmission_mode = p->u.ofdm.transmission_mode;
                c->guard_interval = p->u.ofdm.guard_interval;
                c->hierarchy = p->u.ofdm.hierarchy_information;
                break;
        case DVBV3_ATSC:
                dev_dbg(fe->dvb->device, "%s: Preparing ATSC req\n", __func__);
                c->modulation = p->u.vsb.modulation;
                if (c->delivery_system == SYS_ATSCMH)
                        break;
                if ((c->modulation == VSB_8) || (c->modulation == VSB_16))
                        c->delivery_system = SYS_ATSC;
                else
                        c->delivery_system = SYS_DVBC_ANNEX_B;
                break;
        case DVBV3_UNKNOWN:
                dev_err(fe->dvb->device,
                        "%s: doesn't know how to handle a DVBv3 call to delivery system %i\n",
                        __func__, c->delivery_system);
                return -EINVAL;
        }

        return 0;
}

/* Ensure the cached values are set correctly in the frontend
 * legacy tuning structures, for the advanced tuning API.
 */
static int
dtv_property_legacy_params_sync(struct dvb_frontend *fe,
                                const struct dtv_frontend_properties *c,
                                struct dvb_frontend_parameters *p)
{
        p->frequency = c->frequency;
        p->inversion = c->inversion;

        switch (dvbv3_type(c->delivery_system)) {
        case DVBV3_UNKNOWN:
                dev_err(fe->dvb->device,
                        "%s: doesn't know how to handle a DVBv3 call to delivery system %i\n",
                        __func__, c->delivery_system);
                return -EINVAL;
        case DVBV3_QPSK:
                dev_dbg(fe->dvb->device, "%s: Preparing QPSK req\n", __func__);
                p->u.qpsk.symbol_rate = c->symbol_rate;
                p->u.qpsk.fec_inner = c->fec_inner;
                break;
        case DVBV3_QAM:
                dev_dbg(fe->dvb->device, "%s: Preparing QAM req\n", __func__);
                p->u.qam.symbol_rate = c->symbol_rate;
                p->u.qam.fec_inner = c->fec_inner;
                p->u.qam.modulation = c->modulation;
                break;
        case DVBV3_OFDM:
                dev_dbg(fe->dvb->device, "%s: Preparing OFDM req\n", __func__);
                switch (c->bandwidth_hz) {
                case 10000000:
                        p->u.ofdm.bandwidth = BANDWIDTH_10_MHZ;
                        break;
                case 8000000:
                        p->u.ofdm.bandwidth = BANDWIDTH_8_MHZ;
                        break;
                case 7000000:
                        p->u.ofdm.bandwidth = BANDWIDTH_7_MHZ;
                        break;
                case 6000000:
                        p->u.ofdm.bandwidth = BANDWIDTH_6_MHZ;
                        break;
                case 5000000:
                        p->u.ofdm.bandwidth = BANDWIDTH_5_MHZ;
                        break;
                case 1712000:
                        p->u.ofdm.bandwidth = BANDWIDTH_1_712_MHZ;
                        break;
                case 0:
                default:
                        p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
                }
                p->u.ofdm.code_rate_HP = c->code_rate_HP;
                p->u.ofdm.code_rate_LP = c->code_rate_LP;
                p->u.ofdm.constellation = c->modulation;
                p->u.ofdm.transmission_mode = c->transmission_mode;
                p->u.ofdm.guard_interval = c->guard_interval;
                p->u.ofdm.hierarchy_information = c->hierarchy;
                break;
        case DVBV3_ATSC:
                dev_dbg(fe->dvb->device, "%s: Preparing VSB req\n", __func__);
                p->u.vsb.modulation = c->modulation;
                break;
        }
        return 0;
}

/**
 * dtv_get_frontend - calls a callback for retrieving DTV parameters
 * @fe:                struct dvb_frontend pointer
 * @c:                struct dtv_frontend_properties pointer (DVBv5 cache)
 * @p_out:        struct dvb_frontend_parameters pointer (DVBv3 FE struct)
 *
 * This routine calls either the DVBv3 or DVBv5 get_frontend call.
 * If c is not null, it will update the DVBv5 cache struct pointed by it.
 * If p_out is not null, it will update the DVBv3 params pointed by it.
 */
static int dtv_get_frontend(struct dvb_frontend *fe,
                            struct dtv_frontend_properties *c,
                            struct dvb_frontend_parameters *p_out)
{
        int r;

        if (fe->ops.get_frontend) {
                r = fe->ops.get_frontend(fe, c);
                if (unlikely(r < 0))
                        return r;
                if (p_out)
                        dtv_property_legacy_params_sync(fe, c, p_out);
                return 0;
        }

        /* As everything is in cache, get_frontend fops are always supported */
        return 0;
}

static int dvb_frontend_handle_ioctl(struct file *file,
                                     unsigned int cmd, void *parg);

static int dtv_property_process_get(struct dvb_frontend *fe,
                                    const struct dtv_frontend_properties *c,
                                    struct dtv_property *tvp,
                                    struct file *file)
{
        int ncaps;
        unsigned int len = 1;

        switch (tvp->cmd) {
        case DTV_ENUM_DELSYS:
                ncaps = 0;
                while (ncaps < MAX_DELSYS && fe->ops.delsys[ncaps]) {
                        tvp->u.buffer.data[ncaps] = fe->ops.delsys[ncaps];
                        ncaps++;
                }
                tvp->u.buffer.len = ncaps;
                len = ncaps;
                break;
        case DTV_FREQUENCY:
                tvp->u.data = c->frequency;
                break;
        case DTV_MODULATION:
                tvp->u.data = c->modulation;
                break;
        case DTV_BANDWIDTH_HZ:
                tvp->u.data = c->bandwidth_hz;
                break;
        case DTV_INVERSION:
                tvp->u.data = c->inversion;
                break;
        case DTV_SYMBOL_RATE:
                tvp->u.data = c->symbol_rate;
                break;
        case DTV_INNER_FEC:
                tvp->u.data = c->fec_inner;
                break;
        case DTV_PILOT:
                tvp->u.data = c->pilot;
                break;
        case DTV_ROLLOFF:
                tvp->u.data = c->rolloff;
                break;
        case DTV_DELIVERY_SYSTEM:
                tvp->u.data = c->delivery_system;
                break;
        case DTV_VOLTAGE:
                tvp->u.data = c->voltage;
                break;
        case DTV_TONE:
                tvp->u.data = c->sectone;
                break;
        case DTV_API_VERSION:
                tvp->u.data = (DVB_API_VERSION << 8) | DVB_API_VERSION_MINOR;
                break;
        case DTV_CODE_RATE_HP:
                tvp->u.data = c->code_rate_HP;
                break;
        case DTV_CODE_RATE_LP:
                tvp->u.data = c->code_rate_LP;
                break;
        case DTV_GUARD_INTERVAL:
                tvp->u.data = c->guard_interval;
                break;
        case DTV_TRANSMISSION_MODE:
                tvp->u.data = c->transmission_mode;
                break;
        case DTV_HIERARCHY:
                tvp->u.data = c->hierarchy;
                break;
        case DTV_INTERLEAVING:
                tvp->u.data = c->interleaving;
                break;

        /* ISDB-T Support here */
        case DTV_ISDBT_PARTIAL_RECEPTION:
                tvp->u.data = c->isdbt_partial_reception;
                break;
        case DTV_ISDBT_SOUND_BROADCASTING:
                tvp->u.data = c->isdbt_sb_mode;
                break;
        case DTV_ISDBT_SB_SUBCHANNEL_ID:
                tvp->u.data = c->isdbt_sb_subchannel;
                break;
        case DTV_ISDBT_SB_SEGMENT_IDX:
                tvp->u.data = c->isdbt_sb_segment_idx;
                break;
        case DTV_ISDBT_SB_SEGMENT_COUNT:
                tvp->u.data = c->isdbt_sb_segment_count;
                break;
        case DTV_ISDBT_LAYER_ENABLED:
                tvp->u.data = c->isdbt_layer_enabled;
                break;
        case DTV_ISDBT_LAYERA_FEC:
                tvp->u.data = c->layer[0].fec;
                break;
        case DTV_ISDBT_LAYERA_MODULATION:
                tvp->u.data = c->layer[0].modulation;
                break;
        case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
                tvp->u.data = c->layer[0].segment_count;
                break;
        case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
                tvp->u.data = c->layer[0].interleaving;
                break;
        case DTV_ISDBT_LAYERB_FEC:
                tvp->u.data = c->layer[1].fec;
                break;
        case DTV_ISDBT_LAYERB_MODULATION:
                tvp->u.data = c->layer[1].modulation;
                break;
        case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
                tvp->u.data = c->layer[1].segment_count;
                break;
        case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
                tvp->u.data = c->layer[1].interleaving;
                break;
        case DTV_ISDBT_LAYERC_FEC:
                tvp->u.data = c->layer[2].fec;
                break;
        case DTV_ISDBT_LAYERC_MODULATION:
                tvp->u.data = c->layer[2].modulation;
                break;
        case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
                tvp->u.data = c->layer[2].segment_count;
                break;
        case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
                tvp->u.data = c->layer[2].interleaving;
                break;

        /* Multistream support */
        case DTV_STREAM_ID:
        case DTV_DVBT2_PLP_ID_LEGACY:
                tvp->u.data = c->stream_id;
                break;

        /* Physical layer scrambling support */
        case DTV_SCRAMBLING_SEQUENCE_INDEX:
                tvp->u.data = c->scrambling_sequence_index;
                break;

        /* ATSC-MH */
        case DTV_ATSCMH_FIC_VER:
                tvp->u.data = fe->dtv_property_cache.atscmh_fic_ver;
                break;
        case DTV_ATSCMH_PARADE_ID:
                tvp->u.data = fe->dtv_property_cache.atscmh_parade_id;
                break;
        case DTV_ATSCMH_NOG:
                tvp->u.data = fe->dtv_property_cache.atscmh_nog;
                break;
        case DTV_ATSCMH_TNOG:
                tvp->u.data = fe->dtv_property_cache.atscmh_tnog;
                break;
        case DTV_ATSCMH_SGN:
                tvp->u.data = fe->dtv_property_cache.atscmh_sgn;
                break;
        case DTV_ATSCMH_PRC:
                tvp->u.data = fe->dtv_property_cache.atscmh_prc;
                break;
        case DTV_ATSCMH_RS_FRAME_MODE:
                tvp->u.data = fe->dtv_property_cache.atscmh_rs_frame_mode;
                break;
        case DTV_ATSCMH_RS_FRAME_ENSEMBLE:
                tvp->u.data = fe->dtv_property_cache.atscmh_rs_frame_ensemble;
                break;
        case DTV_ATSCMH_RS_CODE_MODE_PRI:
                tvp->u.data = fe->dtv_property_cache.atscmh_rs_code_mode_pri;
                break;
        case DTV_ATSCMH_RS_CODE_MODE_SEC:
                tvp->u.data = fe->dtv_property_cache.atscmh_rs_code_mode_sec;
                break;
        case DTV_ATSCMH_SCCC_BLOCK_MODE:
                tvp->u.data = fe->dtv_property_cache.atscmh_sccc_block_mode;
                break;
        case DTV_ATSCMH_SCCC_CODE_MODE_A:
                tvp->u.data = fe->dtv_property_cache.atscmh_sccc_code_mode_a;
                break;
        case DTV_ATSCMH_SCCC_CODE_MODE_B:
                tvp->u.data = fe->dtv_property_cache.atscmh_sccc_code_mode_b;
                break;
        case DTV_ATSCMH_SCCC_CODE_MODE_C:
                tvp->u.data = fe->dtv_property_cache.atscmh_sccc_code_mode_c;
                break;
        case DTV_ATSCMH_SCCC_CODE_MODE_D:
                tvp->u.data = fe->dtv_property_cache.atscmh_sccc_code_mode_d;
                break;

        case DTV_LNA:
                tvp->u.data = c->lna;
                break;

        /* Fill quality measures */
        case DTV_STAT_SIGNAL_STRENGTH:
                tvp->u.st = c->strength;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_CNR:
                tvp->u.st = c->cnr;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_PRE_ERROR_BIT_COUNT:
                tvp->u.st = c->pre_bit_error;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_PRE_TOTAL_BIT_COUNT:
                tvp->u.st = c->pre_bit_count;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_POST_ERROR_BIT_COUNT:
                tvp->u.st = c->post_bit_error;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_POST_TOTAL_BIT_COUNT:
                tvp->u.st = c->post_bit_count;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_ERROR_BLOCK_COUNT:
                tvp->u.st = c->block_error;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        case DTV_STAT_TOTAL_BLOCK_COUNT:
                tvp->u.st = c->block_count;
                if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
                        tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
                len = tvp->u.buffer.len;
                break;
        default:
                dev_dbg(fe->dvb->device,
                        "%s: FE property %d doesn't exist\n",
                        __func__, tvp->cmd);
                return -EINVAL;
        }

        if (len < 1)
                len = 1;

        dev_dbg(fe->dvb->device,
                "%s: GET cmd 0x%08x (%s) len %d: %*ph\n",
                __func__, tvp->cmd, dtv_cmd_name(tvp->cmd),
                tvp->u.buffer.len, tvp->u.buffer.len, tvp->u.buffer.data);

        return 0;
}

static int dtv_set_frontend(struct dvb_frontend *fe);

static bool is_dvbv3_delsys(u32 delsys)
{
        return (delsys == SYS_DVBT) || (delsys == SYS_DVBC_ANNEX_A) ||
               (delsys == SYS_DVBS) || (delsys == SYS_ATSC);
}

/**
 * emulate_delivery_system - emulate a DVBv5 delivery system with a DVBv3 type
 * @fe:                        struct frontend;
 * @delsys:                        DVBv5 type that will be used for emulation
 *
 * Provides emulation for delivery systems that are compatible with the old
 * DVBv3 call. Among its usages, it provices support for ISDB-T, and allows
 * using a DVB-S2 only frontend just like it were a DVB-S, if the frontend
 * parameters are compatible with DVB-S spec.
 */
static int emulate_delivery_system(struct dvb_frontend *fe, u32 delsys)
{
        int i;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;

        c->delivery_system = delsys;

        /*
         * If the call is for ISDB-T, put it into full-seg, auto mode, TV
         */
        if (c->delivery_system == SYS_ISDBT) {
                dev_dbg(fe->dvb->device,
                        "%s: Using defaults for SYS_ISDBT\n",
                        __func__);

                if (!c->bandwidth_hz)
                        c->bandwidth_hz = 6000000;

                c->isdbt_partial_reception = 0;
                c->isdbt_sb_mode = 0;
                c->isdbt_sb_subchannel = 0;
                c->isdbt_sb_segment_idx = 0;
                c->isdbt_sb_segment_count = 0;
                c->isdbt_layer_enabled = 7;
                for (i = 0; i < 3; i++) {
                        c->layer[i].fec = FEC_AUTO;
                        c->layer[i].modulation = QAM_AUTO;
                        c->layer[i].interleaving = 0;
                        c->layer[i].segment_count = 0;
                }
        }
        dev_dbg(fe->dvb->device, "%s: change delivery system on cache to %d\n",
                __func__, c->delivery_system);

        return 0;
}

/**
 * dvbv5_set_delivery_system - Sets the delivery system for a DVBv5 API call
 * @fe:                        frontend struct
 * @desired_system:        delivery system requested by the user
 *
 * A DVBv5 call know what's the desired system it wants. So, set it.
 *
 * There are, however, a few known issues with early DVBv5 applications that
 * are also handled by this logic:
 *
 * 1) Some early apps use SYS_UNDEFINED as the desired delivery system.
 *    This is an API violation, but, as we don't want to break userspace,
 *    convert it to the first supported delivery system.
 * 2) Some apps might be using a DVBv5 call in a wrong way, passing, for
 *    example, SYS_DVBT instead of SYS_ISDBT. This is because early usage of
 *    ISDB-T provided backward compat with DVB-T.
 */
static int dvbv5_set_delivery_system(struct dvb_frontend *fe,
                                     u32 desired_system)
{
        int ncaps;
        u32 delsys = SYS_UNDEFINED;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        enum dvbv3_emulation_type type;

        /*
         * It was reported that some old DVBv5 applications were
         * filling delivery_system with SYS_UNDEFINED. If this happens,
         * assume that the application wants to use the first supported
         * delivery system.
         */
        if (desired_system == SYS_UNDEFINED)
                desired_system = fe->ops.delsys[0];

        /*
         * This is a DVBv5 call. So, it likely knows the supported
         * delivery systems. So, check if the desired delivery system is
         * supported
         */
        ncaps = 0;
        while (ncaps < MAX_DELSYS && fe->ops.delsys[ncaps]) {
                if (fe->ops.delsys[ncaps] == desired_system) {
                        c->delivery_system = desired_system;
                        dev_dbg(fe->dvb->device,
                                "%s: Changing delivery system to %d\n",
                                __func__, desired_system);
                        return 0;
                }
                ncaps++;
        }

        /*
         * The requested delivery system isn't supported. Maybe userspace
         * is requesting a DVBv3 compatible delivery system.
         *
         * The emulation only works if the desired system is one of the
         * delivery systems supported by DVBv3 API
         */
        if (!is_dvbv3_delsys(desired_system)) {
                dev_dbg(fe->dvb->device,
                        "%s: Delivery system %d not supported.\n",
                        __func__, desired_system);
                return -EINVAL;
        }

        type = dvbv3_type(desired_system);

        /*
        * Get the last non-DVBv3 delivery system that has the same type
        * of the desired system
        */
        ncaps = 0;
        while (ncaps < MAX_DELSYS && fe->ops.delsys[ncaps]) {
                if (dvbv3_type(fe->ops.delsys[ncaps]) == type)
                        delsys = fe->ops.delsys[ncaps];
                ncaps++;
        }

        /* There's nothing compatible with the desired delivery system */
        if (delsys == SYS_UNDEFINED) {
                dev_dbg(fe->dvb->device,
                        "%s: Delivery system %d not supported on emulation mode.\n",
                        __func__, desired_system);
                return -EINVAL;
        }

        dev_dbg(fe->dvb->device,
                "%s: Using delivery system %d emulated as if it were %d\n",
                __func__, delsys, desired_system);

        return emulate_delivery_system(fe, desired_system);
}

/**
 * dvbv3_set_delivery_system - Sets the delivery system for a DVBv3 API call
 * @fe:        frontend struct
 *
 * A DVBv3 call doesn't know what's the desired system it wants. It also
 * doesn't allow to switch between different types. Due to that, userspace
 * should use DVBv5 instead.
 * However, in order to avoid breaking userspace API, limited backward
 * compatibility support is provided.
 *
 * There are some delivery systems that are incompatible with DVBv3 calls.
 *
 * This routine should work fine for frontends that support just one delivery
 * system.
 *
 * For frontends that support multiple frontends:
 * 1) It defaults to use the first supported delivery system. There's an
 *    userspace application that allows changing it at runtime;
 *
 * 2) If the current delivery system is not compatible with DVBv3, it gets
 *    the first one that it is compatible.
 *
 * NOTE: in order for this to work with applications like Kaffeine that
 *        uses a DVBv5 call for DVB-S2 and a DVBv3 call to go back to
 *        DVB-S, drivers that support both DVB-S and DVB-S2 should have the
 *        SYS_DVBS entry before the SYS_DVBS2, otherwise it won't switch back
 *        to DVB-S.
 */
static int dvbv3_set_delivery_system(struct dvb_frontend *fe)
{
        int ncaps;
        u32 delsys = SYS_UNDEFINED;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;

        /* If not set yet, defaults to the first supported delivery system */
        if (c->delivery_system == SYS_UNDEFINED)
                c->delivery_system = fe->ops.delsys[0];

        /*
         * Trivial case: just use the current one, if it already a DVBv3
         * delivery system
         */
        if (is_dvbv3_delsys(c->delivery_system)) {
                dev_dbg(fe->dvb->device,
                        "%s: Using delivery system to %d\n",
                        __func__, c->delivery_system);
                return 0;
        }

        /*
         * Seek for the first delivery system that it is compatible with a
         * DVBv3 standard
         */
        ncaps = 0;
        while (ncaps < MAX_DELSYS && fe->ops.delsys[ncaps]) {
                if (dvbv3_type(fe->ops.delsys[ncaps]) != DVBV3_UNKNOWN) {
                        delsys = fe->ops.delsys[ncaps];
                        break;
                }
                ncaps++;
        }
        if (delsys == SYS_UNDEFINED) {
                dev_dbg(fe->dvb->device,
                        "%s: Couldn't find a delivery system that works with FE_SET_FRONTEND\n",
                        __func__);
                return -EINVAL;
        }
        return emulate_delivery_system(fe, delsys);
}

static void prepare_tuning_algo_parameters(struct dvb_frontend *fe)
{
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dvb_frontend_tune_settings fetunesettings = { 0 };

        /* get frontend-specific tuning settings */
        if (fe->ops.get_tune_settings && (fe->ops.get_tune_settings(fe, &fetunesettings) == 0)) {
                fepriv->min_delay = (fetunesettings.min_delay_ms * HZ) / 1000;
                fepriv->max_drift = fetunesettings.max_drift;
                fepriv->step_size = fetunesettings.step_size;
        } else {
                /* default values */
                switch (c->delivery_system) {
                case SYS_DSS:
                case SYS_DVBS:
                case SYS_DVBS2:
                case SYS_ISDBS:
                case SYS_TURBO:
                case SYS_DVBC_ANNEX_A:
                case SYS_DVBC_ANNEX_C:
                        fepriv->min_delay = HZ / 20;
                        fepriv->step_size = c->symbol_rate / 16000;
                        fepriv->max_drift = c->symbol_rate / 2000;
                        break;
                case SYS_DVBT:
                case SYS_DVBT2:
                case SYS_ISDBT:
                case SYS_DTMB:
                        fepriv->min_delay = HZ / 20;
                        fepriv->step_size = dvb_frontend_get_stepsize(fe) * 2;
                        fepriv->max_drift = fepriv->step_size + 1;
                        break;
                default:
                        /*
                         * FIXME: This sounds wrong! if freqency_stepsize is
                         * defined by the frontend, why not use it???
                         */
                        fepriv->min_delay = HZ / 20;
                        fepriv->step_size = 0; /* no zigzag */
                        fepriv->max_drift = 0;
                        break;
                }
        }
        if (dvb_override_tune_delay > 0)
                fepriv->min_delay = (dvb_override_tune_delay * HZ) / 1000;
}

/**
 * dtv_property_process_set -  Sets a single DTV property
 * @fe:                Pointer to &struct dvb_frontend
 * @file:        Pointer to &struct file
 * @cmd:        Digital TV command
 * @data:        An unsigned 32-bits number
 *
 * This routine assigns the property
 * value to the corresponding member of
 * &struct dtv_frontend_properties
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
static int dtv_property_process_set(struct dvb_frontend *fe,
                                    struct file *file,
                                    u32 cmd, u32 data)
{
        int r = 0;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;

        /** Dump DTV command name and value*/
        if (!cmd || cmd > DTV_MAX_COMMAND)
                dev_warn(fe->dvb->device, "%s: SET cmd 0x%08x undefined\n",
                         __func__, cmd);
        else
                dev_dbg(fe->dvb->device,
                        "%s: SET cmd 0x%08x (%s) to 0x%08x\n",
                        __func__, cmd, dtv_cmd_name(cmd), data);
        switch (cmd) {
        case DTV_CLEAR:
                /*
                 * Reset a cache of data specific to the frontend here. This does
                 * not effect hardware.
                 */
                dvb_frontend_clear_cache(fe);
                break;
        case DTV_TUNE:
                /*
                 * Use the cached Digital TV properties to tune the
                 * frontend
                 */
                dev_dbg(fe->dvb->device,
                        "%s: Setting the frontend from property cache\n",
                        __func__);

                r = dtv_set_frontend(fe);
                break;
        case DTV_FREQUENCY:
                c->frequency = data;
                break;
        case DTV_MODULATION:
                c->modulation = data;
                break;
        case DTV_BANDWIDTH_HZ:
                c->bandwidth_hz = data;
                break;
        case DTV_INVERSION:
                c->inversion = data;
                break;
        case DTV_SYMBOL_RATE:
                c->symbol_rate = data;
                break;
        case DTV_INNER_FEC:
                c->fec_inner = data;
                break;
        case DTV_PILOT:
                c->pilot = data;
                break;
        case DTV_ROLLOFF:
                c->rolloff = data;
                break;
        case DTV_DELIVERY_SYSTEM:
                r = dvbv5_set_delivery_system(fe, data);
                break;
        case DTV_VOLTAGE:
                c->voltage = data;
                r = dvb_frontend_handle_ioctl(file, FE_SET_VOLTAGE,
                                              (void *)c->voltage);
                break;
        case DTV_TONE:
                c->sectone = data;
                r = dvb_frontend_handle_ioctl(file, FE_SET_TONE,
                                              (void *)c->sectone);
                break;
        case DTV_CODE_RATE_HP:
                c->code_rate_HP = data;
                break;
        case DTV_CODE_RATE_LP:
                c->code_rate_LP = data;
                break;
        case DTV_GUARD_INTERVAL:
                c->guard_interval = data;
                break;
        case DTV_TRANSMISSION_MODE:
                c->transmission_mode = data;
                break;
        case DTV_HIERARCHY:
                c->hierarchy = data;
                break;
        case DTV_INTERLEAVING:
                c->interleaving = data;
                break;

        /* ISDB-T Support here */
        case DTV_ISDBT_PARTIAL_RECEPTION:
                c->isdbt_partial_reception = data;
                break;
        case DTV_ISDBT_SOUND_BROADCASTING:
                c->isdbt_sb_mode = data;
                break;
        case DTV_ISDBT_SB_SUBCHANNEL_ID:
                c->isdbt_sb_subchannel = data;
                break;
        case DTV_ISDBT_SB_SEGMENT_IDX:
                c->isdbt_sb_segment_idx = data;
                break;
        case DTV_ISDBT_SB_SEGMENT_COUNT:
                c->isdbt_sb_segment_count = data;
                break;
        case DTV_ISDBT_LAYER_ENABLED:
                c->isdbt_layer_enabled = data;
                break;
        case DTV_ISDBT_LAYERA_FEC:
                c->layer[0].fec = data;
                break;
        case DTV_ISDBT_LAYERA_MODULATION:
                c->layer[0].modulation = data;
                break;
        case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
                c->layer[0].segment_count = data;
                break;
        case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
                c->layer[0].interleaving = data;
                break;
        case DTV_ISDBT_LAYERB_FEC:
                c->layer[1].fec = data;
                break;
        case DTV_ISDBT_LAYERB_MODULATION:
                c->layer[1].modulation = data;
                break;
        case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
                c->layer[1].segment_count = data;
                break;
        case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
                c->layer[1].interleaving = data;
                break;
        case DTV_ISDBT_LAYERC_FEC:
                c->layer[2].fec = data;
                break;
        case DTV_ISDBT_LAYERC_MODULATION:
                c->layer[2].modulation = data;
                break;
        case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
                c->layer[2].segment_count = data;
                break;
        case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
                c->layer[2].interleaving = data;
                break;

        /* Multistream support */
        case DTV_STREAM_ID:
        case DTV_DVBT2_PLP_ID_LEGACY:
                c->stream_id = data;
                break;

        /* Physical layer scrambling support */
        case DTV_SCRAMBLING_SEQUENCE_INDEX:
                c->scrambling_sequence_index = data;
                break;

        /* ATSC-MH */
        case DTV_ATSCMH_PARADE_ID:
                fe->dtv_property_cache.atscmh_parade_id = data;
                break;
        case DTV_ATSCMH_RS_FRAME_ENSEMBLE:
                fe->dtv_property_cache.atscmh_rs_frame_ensemble = data;
                break;

        case DTV_LNA:
                c->lna = data;
                if (fe->ops.set_lna)
                        r = fe->ops.set_lna(fe);
                if (r < 0)
                        c->lna = LNA_AUTO;
                break;

        default:
                return -EINVAL;
        }

        return r;
}

static int dvb_frontend_do_ioctl(struct file *file, unsigned int cmd,
                                 void *parg)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        int err;

        dev_dbg(fe->dvb->device, "%s: (%d)\n", __func__, _IOC_NR(cmd));
        if (down_interruptible(&fepriv->sem))
                return -ERESTARTSYS;

        if (fe->exit != DVB_FE_NO_EXIT) {
                up(&fepriv->sem);
                return -ENODEV;
        }

        /*
         * If the frontend is opened in read-only mode, only the ioctls
         * that don't interfere with the tune logic should be accepted.
         * That allows an external application to monitor the DVB QoS and
         * statistics parameters.
         *
         * That matches all _IOR() ioctls, except for two special cases:
         *   - FE_GET_EVENT is part of the tuning logic on a DVB application;
         *   - FE_DISEQC_RECV_SLAVE_REPLY is part of DiSEqC 2.0
         *     setup
         * So, those two ioctls should also return -EPERM, as otherwise
         * reading from them would interfere with a DVB tune application
         */
        if ((file->f_flags & O_ACCMODE) == O_RDONLY
            && (_IOC_DIR(cmd) != _IOC_READ
                || cmd == FE_GET_EVENT
                || cmd == FE_DISEQC_RECV_SLAVE_REPLY)) {
                up(&fepriv->sem);
                return -EPERM;
        }

        err = dvb_frontend_handle_ioctl(file, cmd, parg);

        up(&fepriv->sem);
        return err;
}

static long dvb_frontend_ioctl(struct file *file, unsigned int cmd,
                               unsigned long arg)
{
        struct dvb_device *dvbdev = file->private_data;

        if (!dvbdev)
                return -ENODEV;

        return dvb_usercopy(file, cmd, arg, dvb_frontend_do_ioctl);
}

#ifdef CONFIG_COMPAT
struct compat_dtv_property {
        __u32 cmd;
        __u32 reserved[3];
        union {
                __u32 data;
                struct dtv_fe_stats st;
                struct {
                        __u8 data[32];
                        __u32 len;
                        __u32 reserved1[3];
                        compat_uptr_t reserved2;
                } buffer;
        } u;
        int result;
} __attribute__ ((packed));

struct compat_dtv_properties {
        __u32 num;
        compat_uptr_t props;
};

#define COMPAT_FE_SET_PROPERTY           _IOW('o', 82, struct compat_dtv_properties)
#define COMPAT_FE_GET_PROPERTY           _IOR('o', 83, struct compat_dtv_properties)

static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
                                            unsigned long arg)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        int i, err = 0;

        if (cmd == COMPAT_FE_SET_PROPERTY) {
                struct compat_dtv_properties prop, *tvps = NULL;
                struct compat_dtv_property *tvp = NULL;

                if (copy_from_user(&prop, compat_ptr(arg), sizeof(prop)))
                        return -EFAULT;

                tvps = &prop;

                /*
                 * Put an arbitrary limit on the number of messages that can
                 * be sent at once
                 */
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;

                tvp = memdup_array_user(compat_ptr(tvps->props),
                                        tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);

                for (i = 0; i < tvps->num; i++) {
                        err = dtv_property_process_set(fe, file,
                                                       (tvp + i)->cmd,
                                                       (tvp + i)->u.data);
                        if (err < 0) {
                                kfree(tvp);
                                return err;
                        }
                }
                kfree(tvp);
        } else if (cmd == COMPAT_FE_GET_PROPERTY) {
                struct compat_dtv_properties prop, *tvps = NULL;
                struct compat_dtv_property *tvp = NULL;
                struct dtv_frontend_properties getp = fe->dtv_property_cache;

                if (copy_from_user(&prop, compat_ptr(arg), sizeof(prop)))
                        return -EFAULT;

                tvps = &prop;

                /*
                 * Put an arbitrary limit on the number of messages that can
                 * be sent at once
                 */
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;

                tvp = memdup_array_user(compat_ptr(tvps->props),
                                        tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);

                /*
                 * Let's use our own copy of property cache, in order to
                 * avoid mangling with DTV zigzag logic, as drivers might
                 * return crap, if they don't check if the data is available
                 * before updating the properties cache.
                 */
                if (fepriv->state != FESTATE_IDLE) {
                        err = dtv_get_frontend(fe, &getp, NULL);
                        if (err < 0) {
                                kfree(tvp);
                                return err;
                        }
                }
                for (i = 0; i < tvps->num; i++) {
                        err = dtv_property_process_get(
                            fe, &getp, (struct dtv_property *)(tvp + i), file);
                        if (err < 0) {
                                kfree(tvp);
                                return err;
                        }
                }

                if (copy_to_user((void __user *)compat_ptr(tvps->props), tvp,
                                 tvps->num * sizeof(struct compat_dtv_property))) {
                        kfree(tvp);
                        return -EFAULT;
                }
                kfree(tvp);
        }

        return err;
}

static long dvb_frontend_compat_ioctl(struct file *file, unsigned int cmd,
                                      unsigned long arg)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        int err;

        if (cmd == COMPAT_FE_SET_PROPERTY || cmd == COMPAT_FE_GET_PROPERTY) {
                if (down_interruptible(&fepriv->sem))
                        return -ERESTARTSYS;

                err = dvb_frontend_handle_compat_ioctl(file, cmd, arg);

                up(&fepriv->sem);
                return err;
        }

        return dvb_frontend_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
}
#endif

static int dtv_set_frontend(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        u32 rolloff = 0;

        if (dvb_frontend_check_parameters(fe) < 0)
                return -EINVAL;

        /*
         * Initialize output parameters to match the values given by
         * the user. FE_SET_FRONTEND triggers an initial frontend event
         * with status = 0, which copies output parameters to userspace.
         */
        dtv_property_legacy_params_sync(fe, c, &fepriv->parameters_out);

        /*
         * Be sure that the bandwidth will be filled for all
         * non-satellite systems, as tuners need to know what
         * low pass/Nyquist half filter should be applied, in
         * order to avoid inter-channel noise.
         *
         * ISDB-T and DVB-T/T2 already sets bandwidth.
         * ATSC and DVB-C don't set, so, the core should fill it.
         *
         * On DVB-C Annex A and C, the bandwidth is a function of
         * the roll-off and symbol rate. Annex B defines different
         * roll-off factors depending on the modulation. Fortunately,
         * Annex B is only used with 6MHz, so there's no need to
         * calculate it.
         *
         * While not officially supported, a side effect of handling it at
         * the cache level is that a program could retrieve the bandwidth
         * via DTV_BANDWIDTH_HZ, which may be useful for test programs.
         */
        switch (c->delivery_system) {
        case SYS_ATSC:
        case SYS_DVBC_ANNEX_B:
                c->bandwidth_hz = 6000000;
                break;
        case SYS_DVBC_ANNEX_A:
                rolloff = 115;
                break;
        case SYS_DVBC_ANNEX_C:
                rolloff = 113;
                break;
        case SYS_DSS:
                rolloff = 120;
                break;
        case SYS_DVBS:
        case SYS_TURBO:
        case SYS_ISDBS:
                rolloff = 135;
                break;
        case SYS_DVBS2:
                switch (c->rolloff) {
                case ROLLOFF_20:
                        rolloff = 120;
                        break;
                case ROLLOFF_25:
                        rolloff = 125;
                        break;
                default:
                case ROLLOFF_35:
                        rolloff = 135;
                }
                break;
        default:
                break;
        }
        if (rolloff)
                c->bandwidth_hz = mult_frac(c->symbol_rate, rolloff, 100);

        /* force auto frequency inversion if requested */
        if (dvb_force_auto_inversion)
                c->inversion = INVERSION_AUTO;

        /*
         * without hierarchical coding code_rate_LP is irrelevant,
         * so we tolerate the otherwise invalid FEC_NONE setting
         */
        if (c->hierarchy == HIERARCHY_NONE && c->code_rate_LP == FEC_NONE)
                c->code_rate_LP = FEC_AUTO;

        prepare_tuning_algo_parameters(fe);

        fepriv->state = FESTATE_RETUNE;

        /* Request the search algorithm to search */
        fepriv->algo_status |= DVBFE_ALGO_SEARCH_AGAIN;

        dvb_frontend_clear_events(fe);
        dvb_frontend_add_event(fe, 0);
        dvb_frontend_wakeup(fe);
        fepriv->status = 0;

        return 0;
}

static int dvb_get_property(struct dvb_frontend *fe, struct file *file,
                            struct dtv_properties *tvps)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_property *tvp = NULL;
        struct dtv_frontend_properties getp;
        int i, err;

        memcpy(&getp, &fe->dtv_property_cache, sizeof(getp));

        dev_dbg(fe->dvb->device, "%s: properties.num = %d\n",
                __func__, tvps->num);
        dev_dbg(fe->dvb->device, "%s: properties.props = %p\n",
                __func__, tvps->props);

        /*
         * Put an arbitrary limit on the number of messages that can
         * be sent at once
         */
        if (!tvps->num || tvps->num > DTV_IOCTL_MAX_MSGS)
                return -EINVAL;

        tvp = memdup_array_user((void __user *)tvps->props,
                                tvps->num, sizeof(*tvp));
        if (IS_ERR(tvp))
                return PTR_ERR(tvp);

        /*
         * Let's use our own copy of property cache, in order to
         * avoid mangling with DTV zigzag logic, as drivers might
         * return crap, if they don't check if the data is available
         * before updating the properties cache.
         */
        if (fepriv->state != FESTATE_IDLE) {
                err = dtv_get_frontend(fe, &getp, NULL);
                if (err < 0)
                        goto out;
        }
        for (i = 0; i < tvps->num; i++) {
                err = dtv_property_process_get(fe, &getp,
                                               tvp + i, file);
                if (err < 0)
                        goto out;
        }

        if (copy_to_user((void __user *)tvps->props, tvp,
                         tvps->num * sizeof(struct dtv_property))) {
                err = -EFAULT;
                goto out;
        }

        err = 0;
out:
        kfree(tvp);
        return err;
}

static int dvb_get_frontend(struct dvb_frontend *fe,
                            struct dvb_frontend_parameters *p_out)
{
        struct dtv_frontend_properties getp;

        /*
         * Let's use our own copy of property cache, in order to
         * avoid mangling with DTV zigzag logic, as drivers might
         * return crap, if they don't check if the data is available
         * before updating the properties cache.
         */
        memcpy(&getp, &fe->dtv_property_cache, sizeof(getp));

        return dtv_get_frontend(fe, &getp, p_out);
}

static int dvb_frontend_handle_ioctl(struct file *file,
                                     unsigned int cmd, void *parg)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        int i, err = -ENOTSUPP;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        switch (cmd) {
        case FE_SET_PROPERTY: {
                struct dtv_properties *tvps = parg;
                struct dtv_property *tvp = NULL;

                dev_dbg(fe->dvb->device, "%s: properties.num = %d\n",
                        __func__, tvps->num);
                dev_dbg(fe->dvb->device, "%s: properties.props = %p\n",
                        __func__, tvps->props);

                /*
                 * Put an arbitrary limit on the number of messages that can
                 * be sent at once
                 */
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;

                tvp = memdup_array_user((void __user *)tvps->props,
                                        tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);

                for (i = 0; i < tvps->num; i++) {
                        err = dtv_property_process_set(fe, file,
                                                       (tvp + i)->cmd,
                                                       (tvp + i)->u.data);
                        if (err < 0) {
                                kfree(tvp);
                                return err;
                        }
                }
                kfree(tvp);
                err = 0;
                break;
        }
        case FE_GET_PROPERTY:
                err = dvb_get_property(fe, file, parg);
                break;

        case FE_GET_INFO: {
                struct dvb_frontend_info *info = parg;
                memset(info, 0, sizeof(*info));

                strscpy(info->name, fe->ops.info.name, sizeof(info->name));
                info->symbol_rate_min = fe->ops.info.symbol_rate_min;
                info->symbol_rate_max = fe->ops.info.symbol_rate_max;
                info->symbol_rate_tolerance = fe->ops.info.symbol_rate_tolerance;
                info->caps = fe->ops.info.caps;
                info->frequency_stepsize = dvb_frontend_get_stepsize(fe);
                dvb_frontend_get_frequency_limits(fe, &info->frequency_min,
                                                  &info->frequency_max,
                                                  &info->frequency_tolerance);

                /*
                 * Associate the 4 delivery systems supported by DVBv3
                 * API with their DVBv5 counterpart. For the other standards,
                 * use the closest type, assuming that it would hopefully
                 * work with a DVBv3 application.
                 * It should be noticed that, on multi-frontend devices with
                 * different types (terrestrial and cable, for example),
                 * a pure DVBv3 application won't be able to use all delivery
                 * systems. Yet, changing the DVBv5 cache to the other delivery
                 * system should be enough for making it work.
                 */
                switch (dvbv3_type(c->delivery_system)) {
                case DVBV3_QPSK:
                        info->type = FE_QPSK;
                        break;
                case DVBV3_ATSC:
                        info->type = FE_ATSC;
                        break;
                case DVBV3_QAM:
                        info->type = FE_QAM;
                        break;
                case DVBV3_OFDM:
                        info->type = FE_OFDM;
                        break;
                default:
                        dev_err(fe->dvb->device,
                                "%s: doesn't know how to handle a DVBv3 call to delivery system %i\n",
                                __func__, c->delivery_system);
                        info->type = FE_OFDM;
                }
                dev_dbg(fe->dvb->device, "%s: current delivery system on cache: %d, V3 type: %d\n",
                        __func__, c->delivery_system, info->type);

                /* Set CAN_INVERSION_AUTO bit on in other than oneshot mode */
                if (!(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT))
                        info->caps |= FE_CAN_INVERSION_AUTO;
                err = 0;
                break;
        }

        case FE_READ_STATUS: {
                enum fe_status *status = parg;

                /* if retune was requested but hasn't occurred yet, prevent
                 * that user get signal state from previous tuning */
                if (fepriv->state == FESTATE_RETUNE ||
                    fepriv->state == FESTATE_ERROR) {
                        err = 0;
                        *status = 0;
                        break;
                }

                if (fe->ops.read_status)
                        err = fe->ops.read_status(fe, status);
                break;
        }

        case FE_DISEQC_RESET_OVERLOAD:
                if (fe->ops.diseqc_reset_overload) {
                        err = fe->ops.diseqc_reset_overload(fe);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        case FE_DISEQC_SEND_MASTER_CMD:
                if (fe->ops.diseqc_send_master_cmd) {
                        struct dvb_diseqc_master_cmd *cmd = parg;

                        if (cmd->msg_len > sizeof(cmd->msg)) {
                                err = -EINVAL;
                                break;
                        }
                        err = fe->ops.diseqc_send_master_cmd(fe, cmd);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        case FE_DISEQC_SEND_BURST:
                if (fe->ops.diseqc_send_burst) {
                        err = fe->ops.diseqc_send_burst(fe, (long)parg);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        case FE_SET_TONE:
                if (fe->ops.set_tone) {
                        fepriv->tone = (long)parg;
                        err = fe->ops.set_tone(fe, fepriv->tone);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        case FE_SET_VOLTAGE:
                if (fe->ops.set_voltage) {
                        fepriv->voltage = (long)parg;
                        err = fe->ops.set_voltage(fe, fepriv->voltage);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        case FE_DISEQC_RECV_SLAVE_REPLY:
                if (fe->ops.diseqc_recv_slave_reply)
                        err = fe->ops.diseqc_recv_slave_reply(fe, parg);
                break;

        case FE_ENABLE_HIGH_LNB_VOLTAGE:
                if (fe->ops.enable_high_lnb_voltage)
                        err = fe->ops.enable_high_lnb_voltage(fe, (long)parg);
                break;

        case FE_SET_FRONTEND_TUNE_MODE:
                fepriv->tune_mode_flags = (unsigned long)parg;
                err = 0;
                break;
        /* DEPRECATED dish control ioctls */

        case FE_DISHNETWORK_SEND_LEGACY_CMD:
                if (fe->ops.dishnetwork_send_legacy_command) {
                        err = fe->ops.dishnetwork_send_legacy_command(fe,
                                                         (unsigned long)parg);
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                } else if (fe->ops.set_voltage) {
                        /*
                         * NOTE: This is a fallback condition.  Some frontends
                         * (stv0299 for instance) take longer than 8msec to
                         * respond to a set_voltage command.  Those switches
                         * need custom routines to switch properly.  For all
                         * other frontends, the following should work ok.
                         * Dish network legacy switches (as used by Dish500)
                         * are controlled by sending 9-bit command words
                         * spaced 8msec apart.
                         * the actual command word is switch/port dependent
                         * so it is up to the userspace application to send
                         * the right command.
                         * The command must always start with a '0' after
                         * initialization, so parg is 8 bits and does not
                         * include the initialization or start bit
                         */
                        unsigned long swcmd = ((unsigned long)parg) << 1;
                        ktime_t nexttime;
                        ktime_t tv[10];
                        int i;
                        u8 last = 1;

                        if (dvb_frontend_debug)
                                dprintk("switch command: 0x%04lx\n",
                                        swcmd);
                        nexttime = ktime_get_boottime();
                        if (dvb_frontend_debug)
                                tv[0] = nexttime;
                        /* before sending a command, initialize by sending
                         * a 32ms 18V to the switch
                         */
                        fe->ops.set_voltage(fe, SEC_VOLTAGE_18);
                        dvb_frontend_sleep_until(&nexttime, 32000);

                        for (i = 0; i < 9; i++) {
                                if (dvb_frontend_debug)
                                        tv[i + 1] = ktime_get_boottime();
                                if ((swcmd & 0x01) != last) {
                                        /* set voltage to (last ? 13V : 18V) */
                                        fe->ops.set_voltage(fe, (last) ? SEC_VOLTAGE_13 : SEC_VOLTAGE_18);
                                        last = (last) ? 0 : 1;
                                }
                                swcmd = swcmd >> 1;
                                if (i != 8)
                                        dvb_frontend_sleep_until(&nexttime, 8000);
                        }
                        if (dvb_frontend_debug) {
                                dprintk("(adapter %d): switch delay (should be 32k followed by all 8k)\n",
                                        fe->dvb->num);
                                for (i = 1; i < 10; i++)
                                        pr_info("%d: %d\n", i,
                                                (int)ktime_us_delta(tv[i], tv[i - 1]));
                        }
                        err = 0;
                        fepriv->state = FESTATE_DISEQC;
                        fepriv->status = 0;
                }
                break;

        /* DEPRECATED statistics ioctls */

        case FE_READ_BER:
                if (fe->ops.read_ber) {
                        if (fepriv->thread)
                                err = fe->ops.read_ber(fe, parg);
                        else
                                err = -EAGAIN;
                }
                break;

        case FE_READ_SIGNAL_STRENGTH:
                if (fe->ops.read_signal_strength) {
                        if (fepriv->thread)
                                err = fe->ops.read_signal_strength(fe, parg);
                        else
                                err = -EAGAIN;
                }
                break;

        case FE_READ_SNR:
                if (fe->ops.read_snr) {
                        if (fepriv->thread)
                                err = fe->ops.read_snr(fe, parg);
                        else
                                err = -EAGAIN;
                }
                break;

        case FE_READ_UNCORRECTED_BLOCKS:
                if (fe->ops.read_ucblocks) {
                        if (fepriv->thread)
                                err = fe->ops.read_ucblocks(fe, parg);
                        else
                                err = -EAGAIN;
                }
                break;

        /* DEPRECATED DVBv3 ioctls */

        case FE_SET_FRONTEND:
                err = dvbv3_set_delivery_system(fe);
                if (err)
                        break;

                err = dtv_property_cache_sync(fe, c, parg);
                if (err)
                        break;
                err = dtv_set_frontend(fe);
                break;

        case FE_GET_EVENT:
                err = dvb_frontend_get_event(fe, parg, file->f_flags);
                break;

        case FE_GET_FRONTEND:
                err = dvb_get_frontend(fe, parg);
                break;

        default:
                return -ENOTSUPP;
        } /* switch */

        return err;
}

static __poll_t dvb_frontend_poll(struct file *file, struct poll_table_struct *wait)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        dev_dbg_ratelimited(fe->dvb->device, "%s:\n", __func__);

        poll_wait(file, &fepriv->events.wait_queue, wait);

        if (fepriv->events.eventw != fepriv->events.eventr)
                return (EPOLLIN | EPOLLRDNORM | EPOLLPRI);

        return 0;
}

static int dvb_frontend_open(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        struct dvb_adapter *adapter = fe->dvb;
        int ret;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);
        if (fe->exit == DVB_FE_DEVICE_REMOVED)
                return -ENODEV;

        if (adapter->mfe_shared == 2) {
                mutex_lock(&adapter->mfe_lock);
                if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
                        if (adapter->mfe_dvbdev &&
                            !adapter->mfe_dvbdev->writers) {
                                mutex_unlock(&adapter->mfe_lock);
                                return -EBUSY;
                        }
                        adapter->mfe_dvbdev = dvbdev;
                }
        } else if (adapter->mfe_shared) {
                mutex_lock(&adapter->mfe_lock);

                if (!adapter->mfe_dvbdev)
                        adapter->mfe_dvbdev = dvbdev;

                else if (adapter->mfe_dvbdev != dvbdev) {
                        struct dvb_device
                                *mfedev = adapter->mfe_dvbdev;
                        struct dvb_frontend
                                *mfe = mfedev->priv;
                        struct dvb_frontend_private
                                *mfepriv = mfe->frontend_priv;
                        int mferetry = (dvb_mfe_wait_time << 1);

                        mutex_unlock(&adapter->mfe_lock);
                        while (mferetry-- && (mfedev->users != -1 ||
                                              mfepriv->thread)) {
                                if (msleep_interruptible(500)) {
                                        if (signal_pending(current))
                                                return -EINTR;
                                }
                        }

                        mutex_lock(&adapter->mfe_lock);
                        if (adapter->mfe_dvbdev != dvbdev) {
                                mfedev = adapter->mfe_dvbdev;
                                mfe = mfedev->priv;
                                mfepriv = mfe->frontend_priv;
                                if (mfedev->users != -1 ||
                                    mfepriv->thread) {
                                        mutex_unlock(&adapter->mfe_lock);
                                        return -EBUSY;
                                }
                                adapter->mfe_dvbdev = dvbdev;
                        }
                }
        }

        if (dvbdev->users == -1 && fe->ops.ts_bus_ctrl) {
                if ((ret = fe->ops.ts_bus_ctrl(fe, 1)) < 0)
                        goto err0;

                /* If we took control of the bus, we need to force
                   reinitialization.  This is because many ts_bus_ctrl()
                   functions strobe the RESET pin on the demod, and if the
                   frontend thread already exists then the dvb_init() routine
                   won't get called (which is what usually does initial
                   register configuration). */
                fepriv->reinitialise = 1;
        }

        if ((ret = dvb_generic_open(inode, file)) < 0)
                goto err1;

        if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
                /* normal tune mode when opened R/W */
                fepriv->tune_mode_flags &= ~FE_TUNE_MODE_ONESHOT;
                fepriv->tone = -1;
                fepriv->voltage = -1;

#ifdef CONFIG_MEDIA_CONTROLLER_DVB
                mutex_lock(&fe->dvb->mdev_lock);
                if (fe->dvb->mdev) {
                        mutex_lock(&fe->dvb->mdev->graph_mutex);
                        if (fe->dvb->mdev->enable_source)
                                ret = fe->dvb->mdev->enable_source(
                                                           dvbdev->entity,
                                                           &fepriv->pipe);
                        mutex_unlock(&fe->dvb->mdev->graph_mutex);
                        if (ret) {
                                mutex_unlock(&fe->dvb->mdev_lock);
                                dev_err(fe->dvb->device,
                                        "Tuner is busy. Error %d\n", ret);
                                goto err2;
                        }
                }
                mutex_unlock(&fe->dvb->mdev_lock);
#endif
                ret = dvb_frontend_start(fe);
                if (ret)
                        goto err3;

                /*  empty event queue */
                fepriv->events.eventr = fepriv->events.eventw = 0;
        }

        dvb_frontend_get(fe);

        if (adapter->mfe_shared)
                mutex_unlock(&adapter->mfe_lock);
        return ret;

err3:
#ifdef CONFIG_MEDIA_CONTROLLER_DVB
        mutex_lock(&fe->dvb->mdev_lock);
        if (fe->dvb->mdev) {
                mutex_lock(&fe->dvb->mdev->graph_mutex);
                if (fe->dvb->mdev->disable_source)
                        fe->dvb->mdev->disable_source(dvbdev->entity);
                mutex_unlock(&fe->dvb->mdev->graph_mutex);
        }
        mutex_unlock(&fe->dvb->mdev_lock);
err2:
#endif
        dvb_generic_release(inode, file);
err1:
        if (dvbdev->users == -1 && fe->ops.ts_bus_ctrl)
                fe->ops.ts_bus_ctrl(fe, 0);
err0:
        if (adapter->mfe_shared)
                mutex_unlock(&adapter->mfe_lock);
        return ret;
}

static int dvb_frontend_release(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dvb_frontend *fe = dvbdev->priv;
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        int ret;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
                fepriv->release_jiffies = jiffies;
                mb();
        }

        ret = dvb_generic_release(inode, file);

        if (dvbdev->users == -1) {
                wake_up(&fepriv->wait_queue);
#ifdef CONFIG_MEDIA_CONTROLLER_DVB
                mutex_lock(&fe->dvb->mdev_lock);
                if (fe->dvb->mdev) {
                        mutex_lock(&fe->dvb->mdev->graph_mutex);
                        if (fe->dvb->mdev->disable_source)
                                fe->dvb->mdev->disable_source(dvbdev->entity);
                        mutex_unlock(&fe->dvb->mdev->graph_mutex);
                }
                mutex_unlock(&fe->dvb->mdev_lock);
#endif
                if (fe->exit != DVB_FE_NO_EXIT)
                        wake_up(&dvbdev->wait_queue);
                if (fe->ops.ts_bus_ctrl)
                        fe->ops.ts_bus_ctrl(fe, 0);
        }

        dvb_frontend_put(fe);

        return ret;
}

static const struct file_operations dvb_frontend_fops = {
        .owner                = THIS_MODULE,
        .unlocked_ioctl        = dvb_frontend_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl        = dvb_frontend_compat_ioctl,
#endif
        .poll                = dvb_frontend_poll,
        .open                = dvb_frontend_open,
        .release        = dvb_frontend_release,
        .llseek                = noop_llseek,
};

int dvb_frontend_suspend(struct dvb_frontend *fe)
{
        int ret = 0;

        dev_dbg(fe->dvb->device, "%s: adap=%d fe=%d\n", __func__, fe->dvb->num,
                fe->id);

        if (fe->ops.tuner_ops.suspend)
                ret = fe->ops.tuner_ops.suspend(fe);
        else if (fe->ops.tuner_ops.sleep)
                ret = fe->ops.tuner_ops.sleep(fe);

        if (fe->ops.suspend)
                ret = fe->ops.suspend(fe);
        else if (fe->ops.sleep)
                ret = fe->ops.sleep(fe);

        return ret;
}
EXPORT_SYMBOL(dvb_frontend_suspend);

int dvb_frontend_resume(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;
        int ret = 0;

        dev_dbg(fe->dvb->device, "%s: adap=%d fe=%d\n", __func__, fe->dvb->num,
                fe->id);

        fe->exit = DVB_FE_DEVICE_RESUME;
        if (fe->ops.resume)
                ret = fe->ops.resume(fe);
        else if (fe->ops.init)
                ret = fe->ops.init(fe);

        if (fe->ops.tuner_ops.resume)
                ret = fe->ops.tuner_ops.resume(fe);
        else if (fe->ops.tuner_ops.init)
                ret = fe->ops.tuner_ops.init(fe);

        if (fe->ops.set_tone && fepriv->tone != -1)
                fe->ops.set_tone(fe, fepriv->tone);
        if (fe->ops.set_voltage && fepriv->voltage != -1)
                fe->ops.set_voltage(fe, fepriv->voltage);

        fe->exit = DVB_FE_NO_EXIT;
        fepriv->state = FESTATE_RETUNE;
        dvb_frontend_wakeup(fe);

        return ret;
}
EXPORT_SYMBOL(dvb_frontend_resume);

int dvb_register_frontend(struct dvb_adapter *dvb,
                          struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv;
        const struct dvb_device dvbdev_template = {
                .users = ~0,
                .writers = 1,
                .readers = (~0) - 1,
                .fops = &dvb_frontend_fops,
#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
                .name = fe->ops.info.name,
#endif
        };
        int ret;

        dev_dbg(dvb->device, "%s:\n", __func__);

        if (mutex_lock_interruptible(&frontend_mutex))
                return -ERESTARTSYS;

        fe->frontend_priv = kzalloc(sizeof(struct dvb_frontend_private), GFP_KERNEL);
        if (!fe->frontend_priv) {
                mutex_unlock(&frontend_mutex);
                return -ENOMEM;
        }
        fepriv = fe->frontend_priv;

        kref_init(&fe->refcount);

        /*
         * After initialization, there need to be two references: one
         * for dvb_unregister_frontend(), and another one for
         * dvb_frontend_detach().
         */
        dvb_frontend_get(fe);

        sema_init(&fepriv->sem, 1);
        init_waitqueue_head(&fepriv->wait_queue);
        init_waitqueue_head(&fepriv->events.wait_queue);
        mutex_init(&fepriv->events.mtx);
        fe->dvb = dvb;
        fepriv->inversion = INVERSION_OFF;

        dev_info(fe->dvb->device,
                 "DVB: registering adapter %i frontend %i (%s)...\n",
                 fe->dvb->num, fe->id, fe->ops.info.name);

        ret = dvb_register_device(fe->dvb, &fepriv->dvbdev, &dvbdev_template,
                            fe, DVB_DEVICE_FRONTEND, 0);
        if (ret) {
                dvb_frontend_put(fe);
                mutex_unlock(&frontend_mutex);
                return ret;
        }

        /*
         * Initialize the cache to the proper values according with the
         * first supported delivery system (ops->delsys[0])
         */

        fe->dtv_property_cache.delivery_system = fe->ops.delsys[0];
        dvb_frontend_clear_cache(fe);

        mutex_unlock(&frontend_mutex);
        return 0;
}
EXPORT_SYMBOL(dvb_register_frontend);

int dvb_unregister_frontend(struct dvb_frontend *fe)
{
        struct dvb_frontend_private *fepriv = fe->frontend_priv;

        dev_dbg(fe->dvb->device, "%s:\n", __func__);

        mutex_lock(&frontend_mutex);
        dvb_frontend_stop(fe);
        dvb_remove_device(fepriv->dvbdev);

        /* fe is invalid now */
        mutex_unlock(&frontend_mutex);
        dvb_frontend_put(fe);
        return 0;
}
EXPORT_SYMBOL(dvb_unregister_frontend);

static void dvb_frontend_invoke_release(struct dvb_frontend *fe,
                                        void (*release)(struct dvb_frontend *fe))
{
        if (release) {
                release(fe);
#ifdef CONFIG_MEDIA_ATTACH
                dvb_detach(release);
#endif
        }
}

void dvb_frontend_detach(struct dvb_frontend *fe)
{
        dvb_frontend_invoke_release(fe, fe->ops.release_sec);
        dvb_frontend_invoke_release(fe, fe->ops.tuner_ops.release);
        dvb_frontend_invoke_release(fe, fe->ops.analog_ops.release);
        dvb_frontend_put(fe);
}
EXPORT_SYMBOL(dvb_frontend_detach);






















































   12 



















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM percpu

#if !defined(_TRACE_PERCPU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PERCPU_H

#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>

TRACE_EVENT(percpu_alloc_percpu,

        TP_PROTO(unsigned long call_site,
                 bool reserved, bool is_atomic, size_t size,
                 size_t align, void *base_addr, int off,
                 void __percpu *ptr, size_t bytes_alloc, gfp_t gfp_flags),

        TP_ARGS(call_site, reserved, is_atomic, size, align, base_addr, off,
                ptr, bytes_alloc, gfp_flags),

        TP_STRUCT__entry(
                __field(        unsigned long,                call_site        )
                __field(        bool,                        reserved        )
                __field(        bool,                        is_atomic        )
                __field(        size_t,                        size                )
                __field(        size_t,                        align                )
                __field(        void *,                        base_addr        )
                __field(        int,                        off                )
                __field(        void __percpu *,        ptr                )
                __field(        size_t,                        bytes_alloc        )
                __field(        unsigned long,                gfp_flags        )
        ),
        TP_fast_assign(
                __entry->call_site        = call_site;
                __entry->reserved        = reserved;
                __entry->is_atomic        = is_atomic;
                __entry->size                = size;
                __entry->align                = align;
                __entry->base_addr        = base_addr;
                __entry->off                = off;
                __entry->ptr                = ptr;
                __entry->bytes_alloc        = bytes_alloc;
                __entry->gfp_flags        = (__force unsigned long)gfp_flags;
        ),

        TP_printk("call_site=%pS reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p bytes_alloc=%zu gfp_flags=%s",
                  (void *)__entry->call_site,
                  __entry->reserved, __entry->is_atomic,
                  __entry->size, __entry->align,
                  __entry->base_addr, __entry->off, __entry->ptr,
                  __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags))
);

TRACE_EVENT(percpu_free_percpu,

        TP_PROTO(void *base_addr, int off, void __percpu *ptr),

        TP_ARGS(base_addr, off, ptr),

        TP_STRUCT__entry(
                __field(        void *,                        base_addr        )
                __field(        int,                        off                )
                __field(        void __percpu *,        ptr                )
        ),

        TP_fast_assign(
                __entry->base_addr        = base_addr;
                __entry->off                = off;
                __entry->ptr                = ptr;
        ),

        TP_printk("base_addr=%p off=%d ptr=%p",
                __entry->base_addr, __entry->off, __entry->ptr)
);

TRACE_EVENT(percpu_alloc_percpu_fail,

        TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align),

        TP_ARGS(reserved, is_atomic, size, align),

        TP_STRUCT__entry(
                __field(        bool,        reserved        )
                __field(        bool,        is_atomic        )
                __field(        size_t,        size                )
                __field(        size_t, align                )
        ),

        TP_fast_assign(
                __entry->reserved        = reserved;
                __entry->is_atomic        = is_atomic;
                __entry->size                = size;
                __entry->align                = align;
        ),

        TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu",
                  __entry->reserved, __entry->is_atomic,
                  __entry->size, __entry->align)
);

TRACE_EVENT(percpu_create_chunk,

        TP_PROTO(void *base_addr),

        TP_ARGS(base_addr),

        TP_STRUCT__entry(
                __field(        void *, base_addr        )
        ),

        TP_fast_assign(
                __entry->base_addr        = base_addr;
        ),

        TP_printk("base_addr=%p", __entry->base_addr)
);

TRACE_EVENT(percpu_destroy_chunk,

        TP_PROTO(void *base_addr),

        TP_ARGS(base_addr),

        TP_STRUCT__entry(
                __field(        void *,        base_addr        )
        ),

        TP_fast_assign(
                __entry->base_addr        = base_addr;
        ),

        TP_printk("base_addr=%p", __entry->base_addr)
);

#endif /* _TRACE_PERCPU_H */

#include <trace/define_trace.h>





















































































































































































































































































































































































































































































































































































































































  278 


  492 















   24 
















    5 






























    1 










































   73 









  481 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk).
 *
 * (C) SGI 2006, Christoph Lameter
 *         Cleaned up and restructured to ease the addition of alternative
 *         implementations of SLAB allocators.
 * (C) Linux Foundation 2008-2013
 *      Unified interface for all slab allocators
 */

#ifndef _LINUX_SLAB_H
#define        _LINUX_SLAB_H

#include <linux/cache.h>
#include <linux/gfp.h>
#include <linux/overflow.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
#include <linux/hash.h>

enum _slab_flag_bits {
        _SLAB_CONSISTENCY_CHECKS,
        _SLAB_RED_ZONE,
        _SLAB_POISON,
        _SLAB_KMALLOC,
        _SLAB_HWCACHE_ALIGN,
        _SLAB_CACHE_DMA,
        _SLAB_CACHE_DMA32,
        _SLAB_STORE_USER,
        _SLAB_PANIC,
        _SLAB_TYPESAFE_BY_RCU,
        _SLAB_TRACE,
#ifdef CONFIG_DEBUG_OBJECTS
        _SLAB_DEBUG_OBJECTS,
#endif
        _SLAB_NOLEAKTRACE,
        _SLAB_NO_MERGE,
#ifdef CONFIG_FAILSLAB
        _SLAB_FAILSLAB,
#endif
#ifdef CONFIG_MEMCG_KMEM
        _SLAB_ACCOUNT,
#endif
#ifdef CONFIG_KASAN_GENERIC
        _SLAB_KASAN,
#endif
        _SLAB_NO_USER_FLAGS,
#ifdef CONFIG_KFENCE
        _SLAB_SKIP_KFENCE,
#endif
#ifndef CONFIG_SLUB_TINY
        _SLAB_RECLAIM_ACCOUNT,
#endif
        _SLAB_OBJECT_POISON,
        _SLAB_CMPXCHG_DOUBLE,
        _SLAB_FLAGS_LAST_BIT
};

#define __SLAB_FLAG_BIT(nr)        ((slab_flags_t __force)(1U << (nr)))
#define __SLAB_FLAG_UNUSED        ((slab_flags_t __force)(0U))

/*
 * Flags to pass to kmem_cache_create().
 * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
 */
/* DEBUG: Perform (expensive) checks on alloc/free */
#define SLAB_CONSISTENCY_CHECKS        __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS)
/* DEBUG: Red zone objs in a cache */
#define SLAB_RED_ZONE                __SLAB_FLAG_BIT(_SLAB_RED_ZONE)
/* DEBUG: Poison objects */
#define SLAB_POISON                __SLAB_FLAG_BIT(_SLAB_POISON)
/* Indicate a kmalloc slab */
#define SLAB_KMALLOC                __SLAB_FLAG_BIT(_SLAB_KMALLOC)
/* Align objs on cache lines */
#define SLAB_HWCACHE_ALIGN        __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
/* Use GFP_DMA memory */
#define SLAB_CACHE_DMA                __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
/* Use GFP_DMA32 memory */
#define SLAB_CACHE_DMA32        __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32)
/* DEBUG: Store the last owner for bug hunting */
#define SLAB_STORE_USER                __SLAB_FLAG_BIT(_SLAB_STORE_USER)
/* Panic if kmem_cache_create() fails */
#define SLAB_PANIC                __SLAB_FLAG_BIT(_SLAB_PANIC)
/*
 * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
 *
 * This delays freeing the SLAB page by a grace period, it does _NOT_
 * delay object freeing. This means that if you do kmem_cache_free()
 * that memory location is free to be reused at any time. Thus it may
 * be possible to see another object there in the same RCU grace period.
 *
 * This feature only ensures the memory location backing the object
 * stays valid, the trick to using this is relying on an independent
 * object validation pass. Something like:
 *
 * begin:
 *  rcu_read_lock();
 *  obj = lockless_lookup(key);
 *  if (obj) {
 *    if (!try_get_ref(obj)) // might fail for free objects
 *      rcu_read_unlock();
 *      goto begin;
 *
 *    if (obj->key != key) { // not the object we expected
 *      put_ref(obj);
 *      rcu_read_unlock();
 *      goto begin;
 *    }
 *  }
 *  rcu_read_unlock();
 *
 * This is useful if we need to approach a kernel structure obliquely,
 * from its address obtained without the usual locking. We can lock
 * the structure to stabilize it and check it's still at the given address,
 * only if we can be sure that the memory has not been meanwhile reused
 * for some other kind of object (which our subsystem's lock might corrupt).
 *
 * rcu_read_lock before reading the address, then rcu_read_unlock after
 * taking the spinlock within the structure expected at that address.
 *
 * Note that it is not possible to acquire a lock within a structure
 * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
 * as described above.  The reason is that SLAB_TYPESAFE_BY_RCU pages
 * are not zeroed before being given to the slab, which means that any
 * locks must be initialized after each and every kmem_struct_alloc().
 * Alternatively, make the ctor passed to kmem_cache_create() initialize
 * the locks at page-allocation time, as is done in __i915_request_ctor(),
 * sighand_ctor(), and anon_vma_ctor().  Such a ctor permits readers
 * to safely acquire those ctor-initialized locks under rcu_read_lock()
 * protection.
 *
 * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
 */
/* Defer freeing slabs to RCU */
#define SLAB_TYPESAFE_BY_RCU        __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
/* Trace allocations and frees */
#define SLAB_TRACE                __SLAB_FLAG_BIT(_SLAB_TRACE)

/* Flag to prevent checks on free */
#ifdef CONFIG_DEBUG_OBJECTS
# define SLAB_DEBUG_OBJECTS        __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS)
#else
# define SLAB_DEBUG_OBJECTS        __SLAB_FLAG_UNUSED
#endif

/* Avoid kmemleak tracing */
#define SLAB_NOLEAKTRACE        __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE)

/*
 * Prevent merging with compatible kmem caches. This flag should be used
 * cautiously. Valid use cases:
 *
 * - caches created for self-tests (e.g. kunit)
 * - general caches created and used by a subsystem, only when a
 *   (subsystem-specific) debug option is enabled
 * - performance critical caches, should be very rare and consulted with slab
 *   maintainers, and not used together with CONFIG_SLUB_TINY
 */
#define SLAB_NO_MERGE                __SLAB_FLAG_BIT(_SLAB_NO_MERGE)

/* Fault injection mark */
#ifdef CONFIG_FAILSLAB
# define SLAB_FAILSLAB                __SLAB_FLAG_BIT(_SLAB_FAILSLAB)
#else
# define SLAB_FAILSLAB                __SLAB_FLAG_UNUSED
#endif
/* Account to memcg */
#ifdef CONFIG_MEMCG_KMEM
# define SLAB_ACCOUNT                __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
# define SLAB_ACCOUNT                __SLAB_FLAG_UNUSED
#endif

#ifdef CONFIG_KASAN_GENERIC
#define SLAB_KASAN                __SLAB_FLAG_BIT(_SLAB_KASAN)
#else
#define SLAB_KASAN                __SLAB_FLAG_UNUSED
#endif

/*
 * Ignore user specified debugging flags.
 * Intended for caches created for self-tests so they have only flags
 * specified in the code and other flags are ignored.
 */
#define SLAB_NO_USER_FLAGS        __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS)

#ifdef CONFIG_KFENCE
#define SLAB_SKIP_KFENCE        __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE)
#else
#define SLAB_SKIP_KFENCE        __SLAB_FLAG_UNUSED
#endif

/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT        __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
#else
#define SLAB_RECLAIM_ACCOUNT        __SLAB_FLAG_UNUSED
#endif
#define SLAB_TEMPORARY                SLAB_RECLAIM_ACCOUNT        /* Objects are short-lived */

/*
 * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
 *
 * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault.
 *
 * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can.
 * Both make kfree a no-op.
 */
#define ZERO_SIZE_PTR ((void *)16)

#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
                                (unsigned long)ZERO_SIZE_PTR)

#include <linux/kasan.h>

struct list_lru;
struct mem_cgroup;
/*
 * struct kmem_cache related prototypes
 */
bool slab_is_available(void);

struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
                        unsigned int align, slab_flags_t flags,
                        void (*ctor)(void *));
struct kmem_cache *kmem_cache_create_usercopy(const char *name,
                        unsigned int size, unsigned int align,
                        slab_flags_t flags,
                        unsigned int useroffset, unsigned int usersize,
                        void (*ctor)(void *));
void kmem_cache_destroy(struct kmem_cache *s);
int kmem_cache_shrink(struct kmem_cache *s);

/*
 * Please use this macro to create slab caches. Simply specify the
 * name of the structure and maybe some flags that are listed above.
 *
 * The alignment of the struct determines object alignment. If you
 * f.e. add ____cacheline_aligned_in_smp to the struct declaration
 * then the objects will be properly aligned in SMP configurations.
 */
#define KMEM_CACHE(__struct, __flags)                                        \
                kmem_cache_create(#__struct, sizeof(struct __struct),        \
                        __alignof__(struct __struct), (__flags), NULL)

/*
 * To whitelist a single field for copying to/from usercopy, use this
 * macro instead for KMEM_CACHE() above.
 */
#define KMEM_CACHE_USERCOPY(__struct, __flags, __field)                        \
                kmem_cache_create_usercopy(#__struct,                        \
                        sizeof(struct __struct),                        \
                        __alignof__(struct __struct), (__flags),        \
                        offsetof(struct __struct, __field),                \
                        sizeof_field(struct __struct, __field), NULL)

/*
 * Common kmalloc functions provided by all allocators
 */
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);

DEFINE_FREE(kfree, void *, if (_T) kfree(_T))

/**
 * ksize - Report actual allocation size of associated object
 *
 * @objp: Pointer returned from a prior kmalloc()-family allocation.
 *
 * This should not be used for writing beyond the originally requested
 * allocation size. Either use krealloc() or round up the allocation size
 * with kmalloc_size_roundup() prior to allocation. If this is used to
 * access beyond the originally requested allocation size, UBSAN_BOUNDS
 * and/or FORTIFY_SOURCE may trip, since they only know about the
 * originally allocated size via the __alloc_size attribute.
 */
size_t ksize(const void *objp);

#ifdef CONFIG_PRINTK
bool kmem_dump_obj(void *object);
#else
static inline bool kmem_dump_obj(void *object) { return false; }
#endif

/*
 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
 * alignment larger than the alignment of a 64-bit integer.
 * Setting ARCH_DMA_MINALIGN in arch headers allows that.
 */
#ifdef ARCH_HAS_DMA_MINALIGN
#if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN)
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
#endif
#endif

#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#elif ARCH_KMALLOC_MINALIGN > 8
#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
#endif

/*
 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
 * Intended for arches that get misalignment faults even for 64 bit integer
 * aligned buffers.
 */
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif

/*
 * Arches can define this function if they want to decide the minimum slab
 * alignment at runtime. The value returned by the function must be a power
 * of two and >= ARCH_SLAB_MINALIGN.
 */
#ifndef arch_slab_minalign
static inline unsigned int arch_slab_minalign(void)
{
        return ARCH_SLAB_MINALIGN;
}
#endif

/*
 * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN.
 * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN
 * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment.
 */
#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
#define __assume_page_alignment __assume_aligned(PAGE_SIZE)

/*
 * Kmalloc array related definitions
 */

/*
 * SLUB directly allocates requests fitting in to an order-1 page
 * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
 */
#define KMALLOC_SHIFT_HIGH        (PAGE_SHIFT + 1)
#define KMALLOC_SHIFT_MAX        (MAX_PAGE_ORDER + PAGE_SHIFT)
#ifndef KMALLOC_SHIFT_LOW
#define KMALLOC_SHIFT_LOW        3
#endif

/* Maximum allocatable size */
#define KMALLOC_MAX_SIZE        (1UL << KMALLOC_SHIFT_MAX)
/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE        (1UL << KMALLOC_SHIFT_HIGH)
/* Maximum order allocatable via the slab allocator */
#define KMALLOC_MAX_ORDER        (KMALLOC_SHIFT_MAX - PAGE_SHIFT)

/*
 * Kmalloc subsystem.
 */
#ifndef KMALLOC_MIN_SIZE
#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
#endif

/*
 * This restriction comes from byte sized index implementation.
 * Page size is normally 2^12 bytes and, in this case, if we want to use
 * byte sized index which can represent 2^8 entries, the size of the object
 * should be equal or greater to 2^12 / 2^8 = 2^4 = 16.
 * If minimum size of kmalloc is less than 16, we use it as minimum object
 * size and give up to use byte sized index.
 */
#define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
                               (KMALLOC_MIN_SIZE) : 16)

#ifdef CONFIG_RANDOM_KMALLOC_CACHES
#define RANDOM_KMALLOC_CACHES_NR        15 // # of cache copies
#else
#define RANDOM_KMALLOC_CACHES_NR        0
#endif

/*
 * Whenever changing this, take care of that kmalloc_type() and
 * create_kmalloc_caches() still work as intended.
 *
 * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP
 * is for accounted but unreclaimable and non-dma objects. All the other
 * kmem caches can have both accounted and unaccounted objects.
 */
enum kmalloc_cache_type {
        KMALLOC_NORMAL = 0,
#ifndef CONFIG_ZONE_DMA
        KMALLOC_DMA = KMALLOC_NORMAL,
#endif
#ifndef CONFIG_MEMCG_KMEM
        KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
        KMALLOC_RANDOM_START = KMALLOC_NORMAL,
        KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
#ifdef CONFIG_SLUB_TINY
        KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
        KMALLOC_RECLAIM,
#endif
#ifdef CONFIG_ZONE_DMA
        KMALLOC_DMA,
#endif
#ifdef CONFIG_MEMCG_KMEM
        KMALLOC_CGROUP,
#endif
        NR_KMALLOC_TYPES
};

extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];

/*
 * Define gfp bits that should not be set for KMALLOC_NORMAL.
 */
#define KMALLOC_NOT_NORMAL_BITS                                        \
        (__GFP_RECLAIMABLE |                                        \
        (IS_ENABLED(CONFIG_ZONE_DMA)   ? __GFP_DMA : 0) |        \
        (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))

extern unsigned long random_kmalloc_seed;

static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
{
        /*
         * The most common case is KMALLOC_NORMAL, so test for it
         * with a single branch for all the relevant flags.
         */
        if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
                /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
                return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
                                                      ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
#else
                return KMALLOC_NORMAL;
#endif

        /*
         * At least one of the flags has to be set. Their priorities in
         * decreasing order are:
         *  1) __GFP_DMA
         *  2) __GFP_RECLAIMABLE
         *  3) __GFP_ACCOUNT
         */
        if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA))
                return KMALLOC_DMA;
        if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE))
                return KMALLOC_RECLAIM;
        else
                return KMALLOC_CGROUP;
}

/*
 * Figure out which kmalloc slab an allocation of a certain size
 * belongs to.
 * 0 = zero alloc
 * 1 =  65 .. 96 bytes
 * 2 = 129 .. 192 bytes
 * n = 2^(n-1)+1 .. 2^n
 *
 * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized;
 * typical usage is via kmalloc_index() and therefore evaluated at compile-time.
 * Callers where !size_is_constant should only be test modules, where runtime
 * overheads of __kmalloc_index() can be tolerated.  Also see kmalloc_slab().
 */
static __always_inline unsigned int __kmalloc_index(size_t size,
                                                    bool size_is_constant)
{
        if (!size)
                return 0;

        if (size <= KMALLOC_MIN_SIZE)
                return KMALLOC_SHIFT_LOW;

        if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
                return 1;
        if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
                return 2;
        if (size <=          8) return 3;
        if (size <=         16) return 4;
        if (size <=         32) return 5;
        if (size <=         64) return 6;
        if (size <=        128) return 7;
        if (size <=        256) return 8;
        if (size <=        512) return 9;
        if (size <=       1024) return 10;
        if (size <=   2 * 1024) return 11;
        if (size <=   4 * 1024) return 12;
        if (size <=   8 * 1024) return 13;
        if (size <=  16 * 1024) return 14;
        if (size <=  32 * 1024) return 15;
        if (size <=  64 * 1024) return 16;
        if (size <= 128 * 1024) return 17;
        if (size <= 256 * 1024) return 18;
        if (size <= 512 * 1024) return 19;
        if (size <= 1024 * 1024) return 20;
        if (size <=  2 * 1024 * 1024) return 21;

        if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
                BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
        else
                BUG();

        /* Will never be reached. Needed because the compiler may complain */
        return -1;
}
static_assert(PAGE_SHIFT <= 20);
#define kmalloc_index(s) __kmalloc_index(s, true)

void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);

/**
 * kmem_cache_alloc - Allocate an object
 * @cachep: The cache to allocate from.
 * @flags: See kmalloc().
 *
 * Allocate an object from this cache.
 * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags.
 *
 * Return: pointer to the new object or %NULL in case of error
 */
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
                           gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);

/*
 * Bulk allocation and freeing operations. These are accelerated in an
 * allocator specific way to avoid taking locks repeatedly or building
 * metadata structures unnecessarily.
 *
 * Note that interrupts must be enabled when calling these functions.
 */
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p);

static __always_inline void kfree_bulk(size_t size, void **p)
{
        kmem_cache_free_bulk(NULL, size, p);
}

void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
                                                         __alloc_size(1);
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
                                                                         __malloc;

void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
                    __assume_kmalloc_alignment __alloc_size(3);

void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
                         int node, size_t size) __assume_kmalloc_alignment
                                                __alloc_size(4);
void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
                                              __alloc_size(1);

void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment
                                                             __alloc_size(1);

/**
 * kmalloc - allocate kernel memory
 * @size: how many bytes of memory are required.
 * @flags: describe the allocation context
 *
 * kmalloc is the normal method of allocating memory
 * for objects smaller than page size in the kernel.
 *
 * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
 * bytes. For @size of power of two bytes, the alignment is also guaranteed
 * to be at least to the size.
 *
 * The @flags argument may be one of the GFP flags defined at
 * include/linux/gfp_types.h and described at
 * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
 *
 * The recommended usage of the @flags is described at
 * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>`
 *
 * Below is a brief outline of the most useful GFP flags
 *
 * %GFP_KERNEL
 *        Allocate normal kernel ram. May sleep.
 *
 * %GFP_NOWAIT
 *        Allocation will not sleep.
 *
 * %GFP_ATOMIC
 *        Allocation will not sleep.  May use emergency pools.
 *
 * Also it is possible to set different flags by OR'ing
 * in one or more of the following additional @flags:
 *
 * %__GFP_ZERO
 *        Zero the allocated memory before returning. Also see kzalloc().
 *
 * %__GFP_HIGH
 *        This allocation has high priority and may use emergency pools.
 *
 * %__GFP_NOFAIL
 *        Indicate that this allocation is in no way allowed to fail
 *        (think twice before using).
 *
 * %__GFP_NORETRY
 *        If memory is not immediately available,
 *        then give up at once.
 *
 * %__GFP_NOWARN
 *        If allocation fails, don't issue any warnings.
 *
 * %__GFP_RETRY_MAYFAIL
 *        Try really hard to succeed the allocation but fail
 *        eventually.
 */
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
        if (__builtin_constant_p(size) && size) {
                unsigned int index;

                if (size > KMALLOC_MAX_CACHE_SIZE)
                        return kmalloc_large(size, flags);

                index = kmalloc_index(size);
                return kmalloc_trace(
                                kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
                                flags, size);
        }
        return __kmalloc(size, flags);
}

static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
        if (__builtin_constant_p(size) && size) {
                unsigned int index;

                if (size > KMALLOC_MAX_CACHE_SIZE)
                        return kmalloc_large_node(size, flags, node);

                index = kmalloc_index(size);
                return kmalloc_node_trace(
                                kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
                                flags, node, size);
        }
        return __kmalloc_node(size, flags, node);
}

/**
 * kmalloc_array - allocate memory for an array.
 * @n: number of elements.
 * @size: element size.
 * @flags: the type of memory to allocate (see kmalloc).
 */
static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;
        if (__builtin_constant_p(n) && __builtin_constant_p(size))
                return kmalloc(bytes, flags);
        return __kmalloc(bytes, flags);
}

/**
 * krealloc_array - reallocate memory for an array.
 * @p: pointer to the memory chunk to reallocate
 * @new_n: new number of elements to alloc
 * @new_size: new size of a single member of the array
 * @flags: the type of memory to allocate (see kmalloc)
 */
static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
                                                                      size_t new_n,
                                                                      size_t new_size,
                                                                      gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
                return NULL;

        return krealloc(p, bytes, flags);
}

/**
 * kcalloc - allocate memory for an array. The memory is set to zero.
 * @n: number of elements.
 * @size: element size.
 * @flags: the type of memory to allocate (see kmalloc).
 */
static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags)
{
        return kmalloc_array(n, size, flags | __GFP_ZERO);
}

void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
                                  unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller(size, flags, node) \
        __kmalloc_node_track_caller(size, flags, node, \
                                    _RET_IP_)

/*
 * kmalloc_track_caller is a special version of kmalloc that records the
 * calling function of the routine calling it for slab leak tracking instead
 * of just the calling function (confusing, eh?).
 * It's useful when the call to kmalloc comes from a widely-used standard
 * allocator where we care about the real place the memory allocation
 * request comes from.
 */
#define kmalloc_track_caller(size, flags) \
        __kmalloc_node_track_caller(size, flags, \
                                    NUMA_NO_NODE, _RET_IP_)

static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
                                                          int node)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;
        if (__builtin_constant_p(n) && __builtin_constant_p(size))
                return kmalloc_node(bytes, flags, node);
        return __kmalloc_node(bytes, flags, node);
}

static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
{
        return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
}

/*
 * Shortcuts
 */
static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
{
        return kmem_cache_alloc(k, flags | __GFP_ZERO);
}

/**
 * kzalloc - allocate memory. The memory is set to zero.
 * @size: how many bytes of memory are required.
 * @flags: the type of memory to allocate (see kmalloc).
 */
static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags)
{
        return kmalloc(size, flags | __GFP_ZERO);
}

/**
 * kzalloc_node - allocate zeroed memory from a particular memory node.
 * @size: how many bytes of memory are required.
 * @flags: the type of memory to allocate (see kmalloc).
 * @node: memory node from which to allocate
 */
static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node)
{
        return kmalloc_node(size, flags | __GFP_ZERO, node);
}

extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags)
{
        return kvmalloc_node(size, flags, NUMA_NO_NODE);
}
static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node)
{
        return kvmalloc_node(size, flags | __GFP_ZERO, node);
}
static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags)
{
        return kvmalloc(size, flags | __GFP_ZERO);
}

static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;

        return kvmalloc(bytes, flags);
}

static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags)
{
        return kvmalloc_array(n, size, flags | __GFP_ZERO);
}

extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
                      __realloc_size(3);
extern void kvfree(const void *addr);
DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T))

extern void kvfree_sensitive(const void *addr, size_t len);

unsigned int kmem_cache_size(struct kmem_cache *s);

/**
 * kmalloc_size_roundup - Report allocation bucket size for the given size
 *
 * @size: Number of bytes to round up from.
 *
 * This returns the number of bytes that would be available in a kmalloc()
 * allocation of @size bytes. For example, a 126 byte request would be
 * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly
 * for the general-purpose kmalloc()-based allocations, and is not for the
 * pre-sized kmem_cache_alloc()-based allocations.)
 *
 * Use this to kmalloc() the full bucket size ahead of time instead of using
 * ksize() to query the size after an allocation.
 */
size_t kmalloc_size_roundup(size_t size);

void __init kmem_cache_init_late(void);

#endif        /* _LINUX_SLAB_H */





































































































































    4 

    4 





























































































































































































































































































































































   11 






































































































































































































































































































































































































































































   18 






   18 










   18 

   18 

    4 
    4 


   16 
    4 


   16 
    1 








   16 



   14 



   16 


   16 

   16 



   11 












































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   (Tentative) USB Audio Driver for ALSA
 *
 *   Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de>
 *
 *   Many codes borrowed from audio.c by
 *            Alan Cox (alan@lxorguk.ukuu.org.uk)
 *            Thomas Sailer (sailer@ife.ee.ethz.ch)
 *
 *   Audio Class 3.0 support by Ruslan Bilovol <ruslan.bilovol@gmail.com>
 *
 *  NOTES:
 *
 *   - the linked URBs would be preferred but not used so far because of
 *     the instability of unlinking.
 *   - type II is not supported properly.  there is no device which supports
 *     this type *correctly*.  SB extigy looks as if it supports, but it's
 *     indeed an AC3 stream packed in SPDIF frames (i.e. no real AC3 stream).
 */


#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/usb.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/usb/audio.h>
#include <linux/usb/audio-v2.h>
#include <linux/usb/audio-v3.h>
#include <linux/module.h>

#include <sound/control.h>
#include <sound/core.h>
#include <sound/info.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/initval.h>

#include "usbaudio.h"
#include "card.h"
#include "midi.h"
#include "midi2.h"
#include "mixer.h"
#include "proc.h"
#include "quirks.h"
#include "endpoint.h"
#include "helper.h"
#include "pcm.h"
#include "format.h"
#include "power.h"
#include "stream.h"
#include "media.h"

MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
MODULE_DESCRIPTION("USB Audio");
MODULE_LICENSE("GPL");

static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;        /* Index 0-MAX */
static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;        /* ID for this card */
static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;/* Enable this card */
/* Vendor/product IDs for this card */
static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 };
static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 };
static int device_setup[SNDRV_CARDS]; /* device parameter for this card */
static bool ignore_ctl_error;
static bool autoclock = true;
static bool lowlatency = true;
static char *quirk_alias[SNDRV_CARDS];
static char *delayed_register[SNDRV_CARDS];
static bool implicit_fb[SNDRV_CARDS];
static unsigned int quirk_flags[SNDRV_CARDS];

bool snd_usb_use_vmalloc = true;
bool snd_usb_skip_validation;

module_param_array(index, int, NULL, 0444);
MODULE_PARM_DESC(index, "Index value for the USB audio adapter.");
module_param_array(id, charp, NULL, 0444);
MODULE_PARM_DESC(id, "ID string for the USB audio adapter.");
module_param_array(enable, bool, NULL, 0444);
MODULE_PARM_DESC(enable, "Enable USB audio adapter.");
module_param_array(vid, int, NULL, 0444);
MODULE_PARM_DESC(vid, "Vendor ID for the USB audio device.");
module_param_array(pid, int, NULL, 0444);
MODULE_PARM_DESC(pid, "Product ID for the USB audio device.");
module_param_array(device_setup, int, NULL, 0444);
MODULE_PARM_DESC(device_setup, "Specific device setup (if needed).");
module_param(ignore_ctl_error, bool, 0444);
MODULE_PARM_DESC(ignore_ctl_error,
                 "Ignore errors from USB controller for mixer interfaces.");
module_param(autoclock, bool, 0444);
MODULE_PARM_DESC(autoclock, "Enable auto-clock selection for UAC2 devices (default: yes).");
module_param(lowlatency, bool, 0444);
MODULE_PARM_DESC(lowlatency, "Enable low latency playback (default: yes).");
module_param_array(quirk_alias, charp, NULL, 0444);
MODULE_PARM_DESC(quirk_alias, "Quirk aliases, e.g. 0123abcd:5678beef.");
module_param_array(delayed_register, charp, NULL, 0444);
MODULE_PARM_DESC(delayed_register, "Quirk for delayed registration, given by id:iface, e.g. 0123abcd:4.");
module_param_array(implicit_fb, bool, NULL, 0444);
MODULE_PARM_DESC(implicit_fb, "Apply generic implicit feedback sync mode.");
module_param_array(quirk_flags, uint, NULL, 0444);
MODULE_PARM_DESC(quirk_flags, "Driver quirk bit flags.");
module_param_named(use_vmalloc, snd_usb_use_vmalloc, bool, 0444);
MODULE_PARM_DESC(use_vmalloc, "Use vmalloc for PCM intermediate buffers (default: yes).");
module_param_named(skip_validation, snd_usb_skip_validation, bool, 0444);
MODULE_PARM_DESC(skip_validation, "Skip unit descriptor validation (default: no).");

/*
 * we keep the snd_usb_audio_t instances by ourselves for merging
 * the all interfaces on the same card as one sound device.
 */

static DEFINE_MUTEX(register_mutex);
static struct snd_usb_audio *usb_chip[SNDRV_CARDS];
static struct usb_driver usb_audio_driver;

/*
 * disconnect streams
 * called from usb_audio_disconnect()
 */
static void snd_usb_stream_disconnect(struct snd_usb_stream *as)
{
        int idx;
        struct snd_usb_substream *subs;

        for (idx = 0; idx < 2; idx++) {
                subs = &as->substream[idx];
                if (!subs->num_formats)
                        continue;
                subs->data_endpoint = NULL;
                subs->sync_endpoint = NULL;
        }
}

static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int interface)
{
        struct usb_device *dev = chip->dev;
        struct usb_host_interface *alts;
        struct usb_interface_descriptor *altsd;
        struct usb_interface *iface = usb_ifnum_to_if(dev, interface);

        if (!iface) {
                dev_err(&dev->dev, "%u:%d : does not exist\n",
                        ctrlif, interface);
                return -EINVAL;
        }

        alts = &iface->altsetting[0];
        altsd = get_iface_desc(alts);

        /*
         * Android with both accessory and audio interfaces enabled gets the
         * interface numbers wrong.
         */
        if ((chip->usb_id == USB_ID(0x18d1, 0x2d04) ||
             chip->usb_id == USB_ID(0x18d1, 0x2d05)) &&
            interface == 0 &&
            altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC &&
            altsd->bInterfaceSubClass == USB_SUBCLASS_VENDOR_SPEC) {
                interface = 2;
                iface = usb_ifnum_to_if(dev, interface);
                if (!iface)
                        return -EINVAL;
                alts = &iface->altsetting[0];
                altsd = get_iface_desc(alts);
        }

        if (usb_interface_claimed(iface)) {
                dev_dbg(&dev->dev, "%d:%d: skipping, already claimed\n",
                        ctrlif, interface);
                return -EINVAL;
        }

        if ((altsd->bInterfaceClass == USB_CLASS_AUDIO ||
             altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) &&
            altsd->bInterfaceSubClass == USB_SUBCLASS_MIDISTREAMING) {
                int err = snd_usb_midi_v2_create(chip, iface, NULL,
                                                 chip->usb_id);
                if (err < 0) {
                        dev_err(&dev->dev,
                                "%u:%d: cannot create sequencer device\n",
                                ctrlif, interface);
                        return -EINVAL;
                }
                return usb_driver_claim_interface(&usb_audio_driver, iface,
                                                  USB_AUDIO_IFACE_UNUSED);
        }

        if ((altsd->bInterfaceClass != USB_CLASS_AUDIO &&
             altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
            altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING) {
                dev_dbg(&dev->dev,
                        "%u:%d: skipping non-supported interface %d\n",
                        ctrlif, interface, altsd->bInterfaceClass);
                /* skip non-supported classes */
                return -EINVAL;
        }

        if (snd_usb_get_speed(dev) == USB_SPEED_LOW) {
                dev_err(&dev->dev, "low speed audio streaming not supported\n");
                return -EINVAL;
        }

        if (! snd_usb_parse_audio_interface(chip, interface)) {
                usb_set_interface(dev, interface, 0); /* reset the current interface */
                return usb_driver_claim_interface(&usb_audio_driver, iface,
                                                  USB_AUDIO_IFACE_UNUSED);
        }

        return 0;
}

/*
 * parse audio control descriptor and create pcm/midi streams
 */
static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
{
        struct usb_device *dev = chip->dev;
        struct usb_host_interface *host_iface;
        struct usb_interface_descriptor *altsd;
        int i, protocol;

        /* find audiocontrol interface */
        host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0];
        altsd = get_iface_desc(host_iface);
        protocol = altsd->bInterfaceProtocol;

        switch (protocol) {
        default:
                dev_warn(&dev->dev,
                         "unknown interface protocol %#02x, assuming v1\n",
                         protocol);
                fallthrough;

        case UAC_VERSION_1: {
                struct uac1_ac_header_descriptor *h1;
                int rest_bytes;

                h1 = snd_usb_find_csint_desc(host_iface->extra,
                                                         host_iface->extralen,
                                                         NULL, UAC_HEADER);
                if (!h1 || h1->bLength < sizeof(*h1)) {
                        dev_err(&dev->dev, "cannot find UAC_HEADER\n");
                        return -EINVAL;
                }

                rest_bytes = (void *)(host_iface->extra +
                                host_iface->extralen) - (void *)h1;

                /* just to be sure -- this shouldn't hit at all */
                if (rest_bytes <= 0) {
                        dev_err(&dev->dev, "invalid control header\n");
                        return -EINVAL;
                }

                if (rest_bytes < sizeof(*h1)) {
                        dev_err(&dev->dev, "too short v1 buffer descriptor\n");
                        return -EINVAL;
                }

                if (!h1->bInCollection) {
                        dev_info(&dev->dev, "skipping empty audio interface (v1)\n");
                        return -EINVAL;
                }

                if (rest_bytes < h1->bLength) {
                        dev_err(&dev->dev, "invalid buffer length (v1)\n");
                        return -EINVAL;
                }

                if (h1->bLength < sizeof(*h1) + h1->bInCollection) {
                        dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n");
                        return -EINVAL;
                }

                for (i = 0; i < h1->bInCollection; i++)
                        snd_usb_create_stream(chip, ctrlif, h1->baInterfaceNr[i]);

                break;
        }

        case UAC_VERSION_2:
        case UAC_VERSION_3: {
                struct usb_interface_assoc_descriptor *assoc =
                        usb_ifnum_to_if(dev, ctrlif)->intf_assoc;

                if (!assoc) {
                        /*
                         * Firmware writers cannot count to three.  So to find
                         * the IAD on the NuForce UDH-100, also check the next
                         * interface.
                         */
                        struct usb_interface *iface =
                                usb_ifnum_to_if(dev, ctrlif + 1);
                        if (iface &&
                            iface->intf_assoc &&
                            iface->intf_assoc->bFunctionClass == USB_CLASS_AUDIO &&
                            iface->intf_assoc->bFunctionProtocol == UAC_VERSION_2)
                                assoc = iface->intf_assoc;
                }

                if (!assoc) {
                        dev_err(&dev->dev, "Audio class v2/v3 interfaces need an interface association\n");
                        return -EINVAL;
                }

                if (protocol == UAC_VERSION_3) {
                        int badd = assoc->bFunctionSubClass;

                        if (badd != UAC3_FUNCTION_SUBCLASS_FULL_ADC_3_0 &&
                            (badd < UAC3_FUNCTION_SUBCLASS_GENERIC_IO ||
                             badd > UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE)) {
                                dev_err(&dev->dev,
                                        "Unsupported UAC3 BADD profile\n");
                                return -EINVAL;
                        }

                        chip->badd_profile = badd;
                }

                for (i = 0; i < assoc->bInterfaceCount; i++) {
                        int intf = assoc->bFirstInterface + i;

                        if (intf != ctrlif)
                                snd_usb_create_stream(chip, ctrlif, intf);
                }

                break;
        }
        }

        return 0;
}

/*
 * Profile name preset table
 */
struct usb_audio_device_name {
        u32 id;
        const char *vendor_name;
        const char *product_name;
        const char *profile_name;        /* override card->longname */
};

#define PROFILE_NAME(vid, pid, vendor, product, profile)         \
        { .id = USB_ID(vid, pid), .vendor_name = (vendor),         \
          .product_name = (product), .profile_name = (profile) }
#define DEVICE_NAME(vid, pid, vendor, product) \
        PROFILE_NAME(vid, pid, vendor, product, NULL)

/* vendor/product and profile name presets, sorted in device id order */
static const struct usb_audio_device_name usb_audio_names[] = {
        /* HP Thunderbolt Dock Audio Headset */
        PROFILE_NAME(0x03f0, 0x0269, "HP", "Thunderbolt Dock Audio Headset",
                     "HP-Thunderbolt-Dock-Audio-Headset"),
        /* HP Thunderbolt Dock Audio Module */
        PROFILE_NAME(0x03f0, 0x0567, "HP", "Thunderbolt Dock Audio Module",
                     "HP-Thunderbolt-Dock-Audio-Module"),

        /* Two entries for Gigabyte TRX40 Aorus Master:
         * TRX40 Aorus Master has two USB-audio devices, one for the front
         * headphone with ESS SABRE9218 DAC chip, while another for the rest
         * I/O (the rear panel and the front mic) with Realtek ALC1220-VB.
         * Here we provide two distinct names for making UCM profiles easier.
         */
        PROFILE_NAME(0x0414, 0xa000, "Gigabyte", "Aorus Master Front Headphone",
                     "Gigabyte-Aorus-Master-Front-Headphone"),
        PROFILE_NAME(0x0414, 0xa001, "Gigabyte", "Aorus Master Main Audio",
                     "Gigabyte-Aorus-Master-Main-Audio"),

        /* Gigabyte TRX40 Aorus Pro WiFi */
        PROFILE_NAME(0x0414, 0xa002,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),

        /* Creative/E-Mu devices */
        DEVICE_NAME(0x041e, 0x3010, "Creative Labs", "Sound Blaster MP3+"),
        /* Creative/Toshiba Multimedia Center SB-0500 */
        DEVICE_NAME(0x041e, 0x3048, "Toshiba", "SB-0500"),

        DEVICE_NAME(0x046d, 0x0990, "Logitech, Inc.", "QuickCam Pro 9000"),

        DEVICE_NAME(0x05e1, 0x0408, "Syntek", "STK1160"),
        DEVICE_NAME(0x05e1, 0x0480, "Hauppauge", "Woodbury"),

        /* ASUS ROG Zenith II: this machine has also two devices, one for
         * the front headphone and another for the rest
         */
        PROFILE_NAME(0x0b05, 0x1915, "ASUS", "Zenith II Front Headphone",
                     "Zenith-II-Front-Headphone"),
        PROFILE_NAME(0x0b05, 0x1916, "ASUS", "Zenith II Main Audio",
                     "Zenith-II-Main-Audio"),

        /* ASUS ROG Strix */
        PROFILE_NAME(0x0b05, 0x1917,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
        /* ASUS PRIME TRX40 PRO-S */
        PROFILE_NAME(0x0b05, 0x1918,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),

        /* Dell WD15 Dock */
        PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"),
        /* Dell WD19 Dock */
        PROFILE_NAME(0x0bda, 0x402e, "Dell", "WD19 Dock", "Dell-WD15-Dock"),

        DEVICE_NAME(0x0ccd, 0x0028, "TerraTec", "Aureon5.1MkII"),

        /*
         * The original product_name is "USB Sound Device", however this name
         * is also used by the CM106 based cards, so make it unique.
         */
        DEVICE_NAME(0x0d8c, 0x0102, NULL, "ICUSBAUDIO7D"),
        DEVICE_NAME(0x0d8c, 0x0103, NULL, "Audio Advantage MicroII"),

        /* MSI TRX40 Creator */
        PROFILE_NAME(0x0db0, 0x0d64,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
        /* MSI TRX40 */
        PROFILE_NAME(0x0db0, 0x543d,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),

        DEVICE_NAME(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"),

        /* Stanton/N2IT Final Scratch v1 device ('Scratchamp') */
        DEVICE_NAME(0x103d, 0x0100, "Stanton", "ScratchAmp"),
        DEVICE_NAME(0x103d, 0x0101, "Stanton", "ScratchAmp"),

        /* aka. Serato Scratch Live DJ Box */
        DEVICE_NAME(0x13e5, 0x0001, "Rane", "SL-1"),

        /* Lenovo ThinkStation P620 Rear Line-in, Line-out and Microphone */
        PROFILE_NAME(0x17aa, 0x1046, "Lenovo", "ThinkStation P620 Rear",
                     "Lenovo-ThinkStation-P620-Rear"),
        /* Lenovo ThinkStation P620 Internal Speaker + Front Headset */
        PROFILE_NAME(0x17aa, 0x104d, "Lenovo", "ThinkStation P620 Main",
                     "Lenovo-ThinkStation-P620-Main"),

        /* Asrock TRX40 Creator */
        PROFILE_NAME(0x26ce, 0x0a01,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),

        DEVICE_NAME(0x2040, 0x7200, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7201, "Hauppauge", "HVR-950Q-MXL"),
        DEVICE_NAME(0x2040, 0x7210, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7211, "Hauppauge", "HVR-950Q-MXL"),
        DEVICE_NAME(0x2040, 0x7213, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7217, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x721b, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x721e, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x721f, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7240, "Hauppauge", "HVR-850"),
        DEVICE_NAME(0x2040, 0x7260, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7280, "Hauppauge", "HVR-950Q"),
        DEVICE_NAME(0x2040, 0x7281, "Hauppauge", "HVR-950Q-MXL"),
        DEVICE_NAME(0x2040, 0x8200, "Hauppauge", "Woodbury"),

        { } /* terminator */
};

static const struct usb_audio_device_name *
lookup_device_name(u32 id)
{
        static const struct usb_audio_device_name *p;

        for (p = usb_audio_names; p->id; p++)
                if (p->id == id)
                        return p;
        return NULL;
}

/*
 * free the chip instance
 *
 * here we have to do not much, since pcm and controls are already freed
 *
 */

static void snd_usb_audio_free(struct snd_card *card)
{
        struct snd_usb_audio *chip = card->private_data;

        snd_usb_endpoint_free_all(chip);
        snd_usb_midi_v2_free_all(chip);

        mutex_destroy(&chip->mutex);
        if (!atomic_read(&chip->shutdown))
                dev_set_drvdata(&chip->dev->dev, NULL);
}

static void usb_audio_make_shortname(struct usb_device *dev,
                                     struct snd_usb_audio *chip,
                                     const struct snd_usb_audio_quirk *quirk)
{
        struct snd_card *card = chip->card;
        const struct usb_audio_device_name *preset;
        const char *s = NULL;

        preset = lookup_device_name(chip->usb_id);
        if (preset && preset->product_name)
                s = preset->product_name;
        else if (quirk && quirk->product_name)
                s = quirk->product_name;
        if (s && *s) {
                strscpy(card->shortname, s, sizeof(card->shortname));
                return;
        }

        /* retrieve the device string as shortname */
        if (!dev->descriptor.iProduct ||
            usb_string(dev, dev->descriptor.iProduct,
                       card->shortname, sizeof(card->shortname)) <= 0) {
                /* no name available from anywhere, so use ID */
                sprintf(card->shortname, "USB Device %#04x:%#04x",
                        USB_ID_VENDOR(chip->usb_id),
                        USB_ID_PRODUCT(chip->usb_id));
        }

        strim(card->shortname);
}

static void usb_audio_make_longname(struct usb_device *dev,
                                    struct snd_usb_audio *chip,
                                    const struct snd_usb_audio_quirk *quirk)
{
        struct snd_card *card = chip->card;
        const struct usb_audio_device_name *preset;
        const char *s = NULL;
        int len;

        preset = lookup_device_name(chip->usb_id);

        /* shortcut - if any pre-defined string is given, use it */
        if (preset && preset->profile_name)
                s = preset->profile_name;
        if (s && *s) {
                strscpy(card->longname, s, sizeof(card->longname));
                return;
        }

        if (preset && preset->vendor_name)
                s = preset->vendor_name;
        else if (quirk && quirk->vendor_name)
                s = quirk->vendor_name;
        *card->longname = 0;
        if (s && *s) {
                strscpy(card->longname, s, sizeof(card->longname));
        } else {
                /* retrieve the vendor and device strings as longname */
                if (dev->descriptor.iManufacturer)
                        usb_string(dev, dev->descriptor.iManufacturer,
                                   card->longname, sizeof(card->longname));
                /* we don't really care if there isn't any vendor string */
        }
        if (*card->longname) {
                strim(card->longname);
                if (*card->longname)
                        strlcat(card->longname, " ", sizeof(card->longname));
        }

        strlcat(card->longname, card->shortname, sizeof(card->longname));

        len = strlcat(card->longname, " at ", sizeof(card->longname));

        if (len < sizeof(card->longname))
                usb_make_path(dev, card->longname + len, sizeof(card->longname) - len);

        switch (snd_usb_get_speed(dev)) {
        case USB_SPEED_LOW:
                strlcat(card->longname, ", low speed", sizeof(card->longname));
                break;
        case USB_SPEED_FULL:
                strlcat(card->longname, ", full speed", sizeof(card->longname));
                break;
        case USB_SPEED_HIGH:
                strlcat(card->longname, ", high speed", sizeof(card->longname));
                break;
        case USB_SPEED_SUPER:
                strlcat(card->longname, ", super speed", sizeof(card->longname));
                break;
        case USB_SPEED_SUPER_PLUS:
                strlcat(card->longname, ", super speed plus", sizeof(card->longname));
                break;
        default:
                break;
        }
}

/*
 * create a chip instance and set its names.
 */
static int snd_usb_audio_create(struct usb_interface *intf,
                                struct usb_device *dev, int idx,
                                const struct snd_usb_audio_quirk *quirk,
                                unsigned int usb_id,
                                struct snd_usb_audio **rchip)
{
        struct snd_card *card;
        struct snd_usb_audio *chip;
        int err;
        char component[14];

        *rchip = NULL;

        switch (snd_usb_get_speed(dev)) {
        case USB_SPEED_LOW:
        case USB_SPEED_FULL:
        case USB_SPEED_HIGH:
        case USB_SPEED_SUPER:
        case USB_SPEED_SUPER_PLUS:
                break;
        default:
                dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev));
                return -ENXIO;
        }

        err = snd_card_new(&intf->dev, index[idx], id[idx], THIS_MODULE,
                           sizeof(*chip), &card);
        if (err < 0) {
                dev_err(&dev->dev, "cannot create card instance %d\n", idx);
                return err;
        }

        chip = card->private_data;
        mutex_init(&chip->mutex);
        init_waitqueue_head(&chip->shutdown_wait);
        chip->index = idx;
        chip->dev = dev;
        chip->card = card;
        chip->setup = device_setup[idx];
        chip->generic_implicit_fb = implicit_fb[idx];
        chip->autoclock = autoclock;
        chip->lowlatency = lowlatency;
        atomic_set(&chip->active, 1); /* avoid autopm during probing */
        atomic_set(&chip->usage_count, 0);
        atomic_set(&chip->shutdown, 0);

        chip->usb_id = usb_id;
        INIT_LIST_HEAD(&chip->pcm_list);
        INIT_LIST_HEAD(&chip->ep_list);
        INIT_LIST_HEAD(&chip->iface_ref_list);
        INIT_LIST_HEAD(&chip->clock_ref_list);
        INIT_LIST_HEAD(&chip->midi_list);
        INIT_LIST_HEAD(&chip->midi_v2_list);
        INIT_LIST_HEAD(&chip->mixer_list);

        if (quirk_flags[idx])
                chip->quirk_flags = quirk_flags[idx];
        else
                snd_usb_init_quirk_flags(chip);

        card->private_free = snd_usb_audio_free;

        strcpy(card->driver, "USB-Audio");
        sprintf(component, "USB%04x:%04x",
                USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id));
        snd_component_add(card, component);

        usb_audio_make_shortname(dev, chip, quirk);
        usb_audio_make_longname(dev, chip, quirk);

        snd_usb_audio_create_proc(chip);

        *rchip = chip;
        return 0;
}

/* look for a matching quirk alias id */
static bool get_alias_id(struct usb_device *dev, unsigned int *id)
{
        int i;
        unsigned int src, dst;

        for (i = 0; i < ARRAY_SIZE(quirk_alias); i++) {
                if (!quirk_alias[i] ||
                    sscanf(quirk_alias[i], "%x:%x", &src, &dst) != 2 ||
                    src != *id)
                        continue;
                dev_info(&dev->dev,
                         "device (%04x:%04x): applying quirk alias %04x:%04x\n",
                         USB_ID_VENDOR(*id), USB_ID_PRODUCT(*id),
                         USB_ID_VENDOR(dst), USB_ID_PRODUCT(dst));
                *id = dst;
                return true;
        }

        return false;
}

static int check_delayed_register_option(struct snd_usb_audio *chip)
{
        int i;
        unsigned int id, inum;

        for (i = 0; i < ARRAY_SIZE(delayed_register); i++) {
                if (delayed_register[i] &&
                    sscanf(delayed_register[i], "%x:%x", &id, &inum) == 2 &&
                    id == chip->usb_id)
                        return inum;
        }

        return -1;
}

static const struct usb_device_id usb_audio_ids[]; /* defined below */

/* look for the last interface that matches with our ids and remember it */
static void find_last_interface(struct snd_usb_audio *chip)
{
        struct usb_host_config *config = chip->dev->actconfig;
        struct usb_interface *intf;
        int i;

        if (!config)
                return;
        for (i = 0; i < config->desc.bNumInterfaces; i++) {
                intf = config->interface[i];
                if (usb_match_id(intf, usb_audio_ids))
                        chip->last_iface = intf->altsetting[0].desc.bInterfaceNumber;
        }
        usb_audio_dbg(chip, "Found last interface = %d\n", chip->last_iface);
}

/* look for the corresponding quirk */
static const struct snd_usb_audio_quirk *
get_alias_quirk(struct usb_device *dev, unsigned int id)
{
        const struct usb_device_id *p;

        for (p = usb_audio_ids; p->match_flags; p++) {
                /* FIXME: this checks only vendor:product pair in the list */
                if ((p->match_flags & USB_DEVICE_ID_MATCH_DEVICE) ==
                    USB_DEVICE_ID_MATCH_DEVICE &&
                    p->idVendor == USB_ID_VENDOR(id) &&
                    p->idProduct == USB_ID_PRODUCT(id))
                        return (const struct snd_usb_audio_quirk *)p->driver_info;
        }

        return NULL;
}

/* register card if we reach to the last interface or to the specified
 * one given via option
 */
static int try_to_register_card(struct snd_usb_audio *chip, int ifnum)
{
        if (check_delayed_register_option(chip) == ifnum ||
            chip->last_iface == ifnum ||
            usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface)))
                return snd_card_register(chip->card);
        return 0;
}

/*
 * probe the active usb device
 *
 * note that this can be called multiple times per a device, when it
 * includes multiple audio control interfaces.
 *
 * thus we check the usb device pointer and creates the card instance
 * only at the first time.  the successive calls of this function will
 * append the pcm interface to the corresponding card.
 */
static int usb_audio_probe(struct usb_interface *intf,
                           const struct usb_device_id *usb_id)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        const struct snd_usb_audio_quirk *quirk =
                (const struct snd_usb_audio_quirk *)usb_id->driver_info;
        struct snd_usb_audio *chip;
        int i, err;
        struct usb_host_interface *alts;
        int ifnum;
        u32 id;

        alts = &intf->altsetting[0];
        ifnum = get_iface_desc(alts)->bInterfaceNumber;
        id = USB_ID(le16_to_cpu(dev->descriptor.idVendor),
                    le16_to_cpu(dev->descriptor.idProduct));
        if (get_alias_id(dev, &id))
                quirk = get_alias_quirk(dev, id);
        if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum)
                return -ENXIO;
        if (quirk && quirk->ifnum == QUIRK_NODEV_INTERFACE)
                return -ENODEV;

        err = snd_usb_apply_boot_quirk(dev, intf, quirk, id);
        if (err < 0)
                return err;

        /*
         * found a config.  now register to ALSA
         */

        /* check whether it's already registered */
        chip = NULL;
        mutex_lock(&register_mutex);
        for (i = 0; i < SNDRV_CARDS; i++) {
                if (usb_chip[i] && usb_chip[i]->dev == dev) {
                        if (atomic_read(&usb_chip[i]->shutdown)) {
                                dev_err(&dev->dev, "USB device is in the shutdown state, cannot create a card instance\n");
                                err = -EIO;
                                goto __error;
                        }
                        chip = usb_chip[i];
                        atomic_inc(&chip->active); /* avoid autopm */
                        break;
                }
        }
        if (! chip) {
                err = snd_usb_apply_boot_quirk_once(dev, intf, quirk, id);
                if (err < 0)
                        goto __error;

                /* it's a fresh one.
                 * now look for an empty slot and create a new card instance
                 */
                for (i = 0; i < SNDRV_CARDS; i++)
                        if (!usb_chip[i] &&
                            (vid[i] == -1 || vid[i] == USB_ID_VENDOR(id)) &&
                            (pid[i] == -1 || pid[i] == USB_ID_PRODUCT(id))) {
                                if (enable[i]) {
                                        err = snd_usb_audio_create(intf, dev, i, quirk,
                                                                   id, &chip);
                                        if (err < 0)
                                                goto __error;
                                        break;
                                } else if (vid[i] != -1 || pid[i] != -1) {
                                        dev_info(&dev->dev,
                                                 "device (%04x:%04x) is disabled\n",
                                                 USB_ID_VENDOR(id),
                                                 USB_ID_PRODUCT(id));
                                        err = -ENOENT;
                                        goto __error;
                                }
                        }
                if (!chip) {
                        dev_err(&dev->dev, "no available usb audio device\n");
                        err = -ENODEV;
                        goto __error;
                }
                find_last_interface(chip);
        }

        if (chip->num_interfaces >= MAX_CARD_INTERFACES) {
                dev_info(&dev->dev, "Too many interfaces assigned to the single USB-audio card\n");
                err = -EINVAL;
                goto __error;
        }

        dev_set_drvdata(&dev->dev, chip);

        if (ignore_ctl_error)
                chip->quirk_flags |= QUIRK_FLAG_IGNORE_CTL_ERROR;

        if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND)
                usb_disable_autosuspend(interface_to_usbdev(intf));

        /*
         * For devices with more than one control interface, we assume the
         * first contains the audio controls. We might need a more specific
         * check here in the future.
         */
        if (!chip->ctrl_intf)
                chip->ctrl_intf = alts;

        err = 1; /* continue */
        if (quirk && quirk->ifnum != QUIRK_NO_INTERFACE) {
                /* need some special handlings */
                err = snd_usb_create_quirk(chip, intf, &usb_audio_driver, quirk);
                if (err < 0)
                        goto __error;
        }

        if (err > 0) {
                /* create normal USB audio interfaces */
                err = snd_usb_create_streams(chip, ifnum);
                if (err < 0)
                        goto __error;
                err = snd_usb_create_mixer(chip, ifnum);
                if (err < 0)
                        goto __error;
        }

        if (chip->need_delayed_register) {
                dev_info(&dev->dev,
                         "Found post-registration device assignment: %08x:%02x\n",
                         chip->usb_id, ifnum);
                chip->need_delayed_register = false; /* clear again */
        }

        err = try_to_register_card(chip, ifnum);
        if (err < 0)
                goto __error_no_register;

        if (chip->quirk_flags & QUIRK_FLAG_SHARE_MEDIA_DEVICE) {
                /* don't want to fail when snd_media_device_create() fails */
                snd_media_device_create(chip, intf);
        }

        if (quirk)
                chip->quirk_type = quirk->type;

        usb_chip[chip->index] = chip;
        chip->intf[chip->num_interfaces] = intf;
        chip->num_interfaces++;
        usb_set_intfdata(intf, chip);
        atomic_dec(&chip->active);
        mutex_unlock(&register_mutex);
        return 0;

 __error:
        /* in the case of error in secondary interface, still try to register */
        if (chip)
                try_to_register_card(chip, ifnum);

 __error_no_register:
        if (chip) {
                /* chip->active is inside the chip->card object,
                 * decrement before memory is possibly returned.
                 */
                atomic_dec(&chip->active);
                if (!chip->num_interfaces)
                        snd_card_free(chip->card);
        }
        mutex_unlock(&register_mutex);
        return err;
}

/*
 * we need to take care of counter, since disconnection can be called also
 * many times as well as usb_audio_probe().
 */
static void usb_audio_disconnect(struct usb_interface *intf)
{
        struct snd_usb_audio *chip = usb_get_intfdata(intf);
        struct snd_card *card;
        struct list_head *p;

        if (chip == USB_AUDIO_IFACE_UNUSED)
                return;

        card = chip->card;

        mutex_lock(&register_mutex);
        if (atomic_inc_return(&chip->shutdown) == 1) {
                struct snd_usb_stream *as;
                struct snd_usb_endpoint *ep;
                struct usb_mixer_interface *mixer;

                /* wait until all pending tasks done;
                 * they are protected by snd_usb_lock_shutdown()
                 */
                wait_event(chip->shutdown_wait,
                           !atomic_read(&chip->usage_count));
                snd_card_disconnect(card);
                /* release the pcm resources */
                list_for_each_entry(as, &chip->pcm_list, list) {
                        snd_usb_stream_disconnect(as);
                }
                /* release the endpoint resources */
                list_for_each_entry(ep, &chip->ep_list, list) {
                        snd_usb_endpoint_release(ep);
                }
                /* release the midi resources */
                list_for_each(p, &chip->midi_list) {
                        snd_usbmidi_disconnect(p);
                }
                snd_usb_midi_v2_disconnect_all(chip);
                /*
                 * Nice to check quirk && quirk->shares_media_device and
                 * then call the snd_media_device_delete(). Don't have
                 * access to the quirk here. snd_media_device_delete()
                 * accesses mixer_list
                 */
                snd_media_device_delete(chip);

                /* release mixer resources */
                list_for_each_entry(mixer, &chip->mixer_list, list) {
                        snd_usb_mixer_disconnect(mixer);
                }
        }

        if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND)
                usb_enable_autosuspend(interface_to_usbdev(intf));

        chip->num_interfaces--;
        if (chip->num_interfaces <= 0) {
                usb_chip[chip->index] = NULL;
                mutex_unlock(&register_mutex);
                snd_card_free_when_closed(card);
        } else {
                mutex_unlock(&register_mutex);
        }
}

/* lock the shutdown (disconnect) task and autoresume */
int snd_usb_lock_shutdown(struct snd_usb_audio *chip)
{
        int err;

        atomic_inc(&chip->usage_count);
        if (atomic_read(&chip->shutdown)) {
                err = -EIO;
                goto error;
        }
        err = snd_usb_autoresume(chip);
        if (err < 0)
                goto error;
        return 0;

 error:
        if (atomic_dec_and_test(&chip->usage_count))
                wake_up(&chip->shutdown_wait);
        return err;
}

/* autosuspend and unlock the shutdown */
void snd_usb_unlock_shutdown(struct snd_usb_audio *chip)
{
        snd_usb_autosuspend(chip);
        if (atomic_dec_and_test(&chip->usage_count))
                wake_up(&chip->shutdown_wait);
}

int snd_usb_autoresume(struct snd_usb_audio *chip)
{
        int i, err;

        if (atomic_read(&chip->shutdown))
                return -EIO;
        if (atomic_inc_return(&chip->active) != 1)
                return 0;

        for (i = 0; i < chip->num_interfaces; i++) {
                err = usb_autopm_get_interface(chip->intf[i]);
                if (err < 0) {
                        /* rollback */
                        while (--i >= 0)
                                usb_autopm_put_interface(chip->intf[i]);
                        atomic_dec(&chip->active);
                        return err;
                }
        }
        return 0;
}

void snd_usb_autosuspend(struct snd_usb_audio *chip)
{
        int i;

        if (atomic_read(&chip->shutdown))
                return;
        if (!atomic_dec_and_test(&chip->active))
                return;

        for (i = 0; i < chip->num_interfaces; i++)
                usb_autopm_put_interface(chip->intf[i]);
}

static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct snd_usb_audio *chip = usb_get_intfdata(intf);
        struct snd_usb_stream *as;
        struct snd_usb_endpoint *ep;
        struct usb_mixer_interface *mixer;
        struct list_head *p;

        if (chip == USB_AUDIO_IFACE_UNUSED)
                return 0;

        if (!chip->num_suspended_intf++) {
                list_for_each_entry(as, &chip->pcm_list, list)
                        snd_usb_pcm_suspend(as);
                list_for_each_entry(ep, &chip->ep_list, list)
                        snd_usb_endpoint_suspend(ep);
                list_for_each(p, &chip->midi_list)
                        snd_usbmidi_suspend(p);
                list_for_each_entry(mixer, &chip->mixer_list, list)
                        snd_usb_mixer_suspend(mixer);
                snd_usb_midi_v2_suspend_all(chip);
        }

        if (!PMSG_IS_AUTO(message) && !chip->system_suspend) {
                snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot);
                chip->system_suspend = chip->num_suspended_intf;
        }

        return 0;
}

static int usb_audio_resume(struct usb_interface *intf)
{
        struct snd_usb_audio *chip = usb_get_intfdata(intf);
        struct snd_usb_stream *as;
        struct usb_mixer_interface *mixer;
        struct list_head *p;
        int err = 0;

        if (chip == USB_AUDIO_IFACE_UNUSED)
                return 0;

        atomic_inc(&chip->active); /* avoid autopm */
        if (chip->num_suspended_intf > 1)
                goto out;

        list_for_each_entry(as, &chip->pcm_list, list) {
                err = snd_usb_pcm_resume(as);
                if (err < 0)
                        goto err_out;
        }

        /*
         * ALSA leaves material resumption to user space
         * we just notify and restart the mixers
         */
        list_for_each_entry(mixer, &chip->mixer_list, list) {
                err = snd_usb_mixer_resume(mixer);
                if (err < 0)
                        goto err_out;
        }

        list_for_each(p, &chip->midi_list) {
                snd_usbmidi_resume(p);
        }

        snd_usb_midi_v2_resume_all(chip);

 out:
        if (chip->num_suspended_intf == chip->system_suspend) {
                snd_power_change_state(chip->card, SNDRV_CTL_POWER_D0);
                chip->system_suspend = 0;
        }
        chip->num_suspended_intf--;

err_out:
        atomic_dec(&chip->active); /* allow autopm after this point */
        return err;
}

static const struct usb_device_id usb_audio_ids [] = {
#include "quirks-table.h"
    { .match_flags = (USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS),
      .bInterfaceClass = USB_CLASS_AUDIO,
      .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL },
    { }                                                /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, usb_audio_ids);

/*
 * entry point for linux usb interface
 */

static struct usb_driver usb_audio_driver = {
        .name =                "snd-usb-audio",
        .probe =        usb_audio_probe,
        .disconnect =        usb_audio_disconnect,
        .suspend =        usb_audio_suspend,
        .resume =        usb_audio_resume,
        .reset_resume =        usb_audio_resume,
        .id_table =        usb_audio_ids,
        .supports_autosuspend = 1,
};

module_usb_driver(usb_audio_driver);



































































































































































































































































































































































































































































































































































































































































































































































    1 














































    1 
    1 


































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Kone driver for Linux
 *
 * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Kone is a gamer mouse which consists of a mouse part and a keyboard
 * part. The keyboard part enables the mouse to execute stored macros with mixed
 * key- and button-events.
 *
 * TODO implement on-the-fly polling-rate change
 *      The windows driver has the ability to change the polling rate of the
 *      device on the press of a mousebutton.
 *      Is it possible to remove and reinstall the urb in raw-event- or any
 *      other handler, or to defer this action to be executed somewhere else?
 *
 * TODO is it possible to overwrite group for sysfs attributes via udev?
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-kone.h"

static uint profile_numbers[5] = {0, 1, 2, 3, 4};

static void kone_profile_activated(struct kone_device *kone, uint new_profile)
{
        kone->actual_profile = new_profile;
        kone->actual_dpi = kone->profiles[new_profile - 1].startup_dpi;
}

static void kone_profile_report(struct kone_device *kone, uint new_profile)
{
        struct kone_roccat_report roccat_report;

        roccat_report.event = kone_mouse_event_switch_profile;
        roccat_report.value = new_profile;
        roccat_report.key = 0;
        roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report);
}

static int kone_receive(struct usb_device *usb_dev, uint usb_command,
                void *data, uint size)
{
        char *buf;
        int len;

        buf = kmalloc(size, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;

        len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
                        HID_REQ_GET_REPORT,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
                        usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT);

        memcpy(data, buf, size);
        kfree(buf);
        return ((len < 0) ? len : ((len != size) ? -EIO : 0));
}

static int kone_send(struct usb_device *usb_dev, uint usb_command,
                void const *data, uint size)
{
        char *buf;
        int len;

        buf = kmemdup(data, size, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;

        len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
                        HID_REQ_SET_REPORT,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
                        usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT);

        kfree(buf);
        return ((len < 0) ? len : ((len != size) ? -EIO : 0));
}

static void kone_set_settings_checksum(struct kone_settings *settings)
{
        uint16_t checksum = 0;
        unsigned char *address = (unsigned char *)settings;
        int i;

        for (i = 0; i < sizeof(struct kone_settings) - 2; ++i, ++address)
                checksum += *address;
        settings->checksum = cpu_to_le16(checksum);
}

/*
 * Checks success after writing data to mouse
 * On success returns 0
 * On failure returns errno
 */
static int kone_check_write(struct usb_device *usb_dev)
{
        int retval;
        uint8_t data;

        do {
                /*
                 * Mouse needs 50 msecs until it says ok, but there are
                 * 30 more msecs needed for next write to work.
                 */
                msleep(80);

                retval = kone_receive(usb_dev,
                                kone_command_confirm_write, &data, 1);
                if (retval)
                        return retval;

                /*
                 * value of 3 seems to mean something like
                 * "not finished yet, but it looks good"
                 * So check again after a moment.
                 */
        } while (data == 3);

        if (data == 1) /* everything alright */
                return 0;

        /* unknown answer */
        dev_err(&usb_dev->dev, "got retval %d when checking write\n", data);
        return -EIO;
}

/*
 * Reads settings from mouse and stores it in @buf
 * On success returns 0
 * On failure returns errno
 */
static int kone_get_settings(struct usb_device *usb_dev,
                struct kone_settings *buf)
{
        return kone_receive(usb_dev, kone_command_settings, buf,
                        sizeof(struct kone_settings));
}

/*
 * Writes settings from @buf to mouse
 * On success returns 0
 * On failure returns errno
 */
static int kone_set_settings(struct usb_device *usb_dev,
                struct kone_settings const *settings)
{
        int retval;

        retval = kone_send(usb_dev, kone_command_settings,
                        settings, sizeof(struct kone_settings));
        if (retval)
                return retval;
        return kone_check_write(usb_dev);
}

/*
 * Reads profile data from mouse and stores it in @buf
 * @number: profile number to read
 * On success returns 0
 * On failure returns errno
 */
static int kone_get_profile(struct usb_device *usb_dev,
                struct kone_profile *buf, int number)
{
        int len;

        if (number < 1 || number > 5)
                return -EINVAL;

        len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
                        USB_REQ_CLEAR_FEATURE,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
                        kone_command_profile, number, buf,
                        sizeof(struct kone_profile), USB_CTRL_SET_TIMEOUT);

        if (len != sizeof(struct kone_profile))
                return -EIO;

        return 0;
}

/*
 * Writes profile data to mouse.
 * @number: profile number to write
 * On success returns 0
 * On failure returns errno
 */
static int kone_set_profile(struct usb_device *usb_dev,
                struct kone_profile const *profile, int number)
{
        int len;

        if (number < 1 || number > 5)
                return -EINVAL;

        len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
                        USB_REQ_SET_CONFIGURATION,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
                        kone_command_profile, number, (void *)profile,
                        sizeof(struct kone_profile),
                        USB_CTRL_SET_TIMEOUT);

        if (len != sizeof(struct kone_profile))
                return len;

        if (kone_check_write(usb_dev))
                return -EIO;

        return 0;
}

/*
 * Reads value of "fast-clip-weight" and stores it in @result
 * On success returns 0
 * On failure returns errno
 */
static int kone_get_weight(struct usb_device *usb_dev, int *result)
{
        int retval;
        uint8_t data;

        retval = kone_receive(usb_dev, kone_command_weight, &data, 1);

        if (retval)
                return retval;

        *result = (int)data;
        return 0;
}

/*
 * Reads firmware_version of mouse and stores it in @result
 * On success returns 0
 * On failure returns errno
 */
static int kone_get_firmware_version(struct usb_device *usb_dev, int *result)
{
        int retval;
        uint16_t data;

        retval = kone_receive(usb_dev, kone_command_firmware_version,
                        &data, 2);
        if (retval)
                return retval;

        *result = le16_to_cpu(data);
        return 0;
}

static ssize_t kone_sysfs_read_settings(struct file *fp, struct kobject *kobj,
                struct bin_attribute *attr, char *buf,
                loff_t off, size_t count) {
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev));

        if (off >= sizeof(struct kone_settings))
                return 0;

        if (off + count > sizeof(struct kone_settings))
                count = sizeof(struct kone_settings) - off;

        mutex_lock(&kone->kone_lock);
        memcpy(buf, ((char const *)&kone->settings) + off, count);
        mutex_unlock(&kone->kone_lock);

        return count;
}

/*
 * Writing settings automatically activates startup_profile.
 * This function keeps values in kone_device up to date and assumes that in
 * case of error the old data is still valid
 */
static ssize_t kone_sysfs_write_settings(struct file *fp, struct kobject *kobj,
                struct bin_attribute *attr, char *buf,
                loff_t off, size_t count) {
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval = 0, difference, old_profile;
        struct kone_settings *settings = (struct kone_settings *)buf;

        /* I need to get my data in one piece */
        if (off != 0 || count != sizeof(struct kone_settings))
                return -EINVAL;

        mutex_lock(&kone->kone_lock);
        difference = memcmp(settings, &kone->settings,
                            sizeof(struct kone_settings));
        if (difference) {
                if (settings->startup_profile < 1 ||
                    settings->startup_profile > 5) {
                        retval = -EINVAL;
                        goto unlock;
                }

                retval = kone_set_settings(usb_dev, settings);
                if (retval)
                        goto unlock;

                old_profile = kone->settings.startup_profile;
                memcpy(&kone->settings, settings, sizeof(struct kone_settings));

                kone_profile_activated(kone, kone->settings.startup_profile);

                if (kone->settings.startup_profile != old_profile)
                        kone_profile_report(kone, kone->settings.startup_profile);
        }
unlock:
        mutex_unlock(&kone->kone_lock);

        if (retval)
                return retval;

        return sizeof(struct kone_settings);
}
static BIN_ATTR(settings, 0660, kone_sysfs_read_settings,
                kone_sysfs_write_settings, sizeof(struct kone_settings));

static ssize_t kone_sysfs_read_profilex(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr,
                char *buf, loff_t off, size_t count) {
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev));

        if (off >= sizeof(struct kone_profile))
                return 0;

        if (off + count > sizeof(struct kone_profile))
                count = sizeof(struct kone_profile) - off;

        mutex_lock(&kone->kone_lock);
        memcpy(buf, ((char const *)&kone->profiles[*(uint *)(attr->private)]) + off, count);
        mutex_unlock(&kone->kone_lock);

        return count;
}

/* Writes data only if different to stored data */
static ssize_t kone_sysfs_write_profilex(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr,
                char *buf, loff_t off, size_t count) {
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        struct kone_profile *profile;
        int retval = 0, difference;

        /* I need to get my data in one piece */
        if (off != 0 || count != sizeof(struct kone_profile))
                return -EINVAL;

        profile = &kone->profiles[*(uint *)(attr->private)];

        mutex_lock(&kone->kone_lock);
        difference = memcmp(buf, profile, sizeof(struct kone_profile));
        if (difference) {
                retval = kone_set_profile(usb_dev,
                                (struct kone_profile const *)buf,
                                *(uint *)(attr->private) + 1);
                if (!retval)
                        memcpy(profile, buf, sizeof(struct kone_profile));
        }
        mutex_unlock(&kone->kone_lock);

        if (retval)
                return retval;

        return sizeof(struct kone_profile);
}
#define PROFILE_ATTR(number)                                        \
static struct bin_attribute bin_attr_profile##number = {        \
        .attr = { .name = "profile" #number, .mode = 0660 },        \
        .size = sizeof(struct kone_profile),                        \
        .read = kone_sysfs_read_profilex,                        \
        .write = kone_sysfs_write_profilex,                        \
        .private = &profile_numbers[number-1],                        \
}
PROFILE_ATTR(1);
PROFILE_ATTR(2);
PROFILE_ATTR(3);
PROFILE_ATTR(4);
PROFILE_ATTR(5);

static ssize_t kone_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kone->actual_profile);
}
static DEVICE_ATTR(actual_profile, 0440, kone_sysfs_show_actual_profile, NULL);

static ssize_t kone_sysfs_show_actual_dpi(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kone->actual_dpi);
}
static DEVICE_ATTR(actual_dpi, 0440, kone_sysfs_show_actual_dpi, NULL);

/* weight is read each time, since we don't get informed when it's changed */
static ssize_t kone_sysfs_show_weight(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone;
        struct usb_device *usb_dev;
        int weight = 0;
        int retval;

        dev = dev->parent->parent;
        kone = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        mutex_lock(&kone->kone_lock);
        retval = kone_get_weight(usb_dev, &weight);
        mutex_unlock(&kone->kone_lock);

        if (retval)
                return retval;
        return snprintf(buf, PAGE_SIZE, "%d\n", weight);
}
static DEVICE_ATTR(weight, 0440, kone_sysfs_show_weight, NULL);

static ssize_t kone_sysfs_show_firmware_version(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kone->firmware_version);
}
static DEVICE_ATTR(firmware_version, 0440, kone_sysfs_show_firmware_version,
                   NULL);

static ssize_t kone_sysfs_show_tcu(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kone->settings.tcu);
}

static int kone_tcu_command(struct usb_device *usb_dev, int number)
{
        unsigned char value;

        value = number;
        return kone_send(usb_dev, kone_command_calibrate, &value, 1);
}

/*
 * Calibrating the tcu is the only action that changes settings data inside the
 * mouse, so this data needs to be reread
 */
static ssize_t kone_sysfs_set_tcu(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct kone_device *kone;
        struct usb_device *usb_dev;
        int retval;
        unsigned long state;

        dev = dev->parent->parent;
        kone = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        retval = kstrtoul(buf, 10, &state);
        if (retval)
                return retval;

        if (state != 0 && state != 1)
                return -EINVAL;

        mutex_lock(&kone->kone_lock);

        if (state == 1) { /* state activate */
                retval = kone_tcu_command(usb_dev, 1);
                if (retval)
                        goto exit_unlock;
                retval = kone_tcu_command(usb_dev, 2);
                if (retval)
                        goto exit_unlock;
                ssleep(5); /* tcu needs this time for calibration */
                retval = kone_tcu_command(usb_dev, 3);
                if (retval)
                        goto exit_unlock;
                retval = kone_tcu_command(usb_dev, 0);
                if (retval)
                        goto exit_unlock;
                retval = kone_tcu_command(usb_dev, 4);
                if (retval)
                        goto exit_unlock;
                /*
                 * Kone needs this time to settle things.
                 * Reading settings too early will result in invalid data.
                 * Roccat's driver waits 1 sec, maybe this time could be
                 * shortened.
                 */
                ssleep(1);
        }

        /* calibration changes values in settings, so reread */
        retval = kone_get_settings(usb_dev, &kone->settings);
        if (retval)
                goto exit_no_settings;

        /* only write settings back if activation state is different */
        if (kone->settings.tcu != state) {
                kone->settings.tcu = state;
                kone_set_settings_checksum(&kone->settings);

                retval = kone_set_settings(usb_dev, &kone->settings);
                if (retval) {
                        dev_err(&usb_dev->dev, "couldn't set tcu state\n");
                        /*
                         * try to reread valid settings into buffer overwriting
                         * first error code
                         */
                        retval = kone_get_settings(usb_dev, &kone->settings);
                        if (retval)
                                goto exit_no_settings;
                        goto exit_unlock;
                }
                /* calibration resets profile */
                kone_profile_activated(kone, kone->settings.startup_profile);
        }

        retval = size;
exit_no_settings:
        dev_err(&usb_dev->dev, "couldn't read settings\n");
exit_unlock:
        mutex_unlock(&kone->kone_lock);
        return retval;
}
static DEVICE_ATTR(tcu, 0660, kone_sysfs_show_tcu, kone_sysfs_set_tcu);

static ssize_t kone_sysfs_show_startup_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kone_device *kone =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kone->settings.startup_profile);
}

static ssize_t kone_sysfs_set_startup_profile(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct kone_device *kone;
        struct usb_device *usb_dev;
        int retval;
        unsigned long new_startup_profile;

        dev = dev->parent->parent;
        kone = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        retval = kstrtoul(buf, 10, &new_startup_profile);
        if (retval)
                return retval;

        if (new_startup_profile  < 1 || new_startup_profile > 5)
                return -EINVAL;

        mutex_lock(&kone->kone_lock);

        kone->settings.startup_profile = new_startup_profile;
        kone_set_settings_checksum(&kone->settings);

        retval = kone_set_settings(usb_dev, &kone->settings);
        if (retval) {
                mutex_unlock(&kone->kone_lock);
                return retval;
        }

        /* changing the startup profile immediately activates this profile */
        kone_profile_activated(kone, new_startup_profile);
        kone_profile_report(kone, new_startup_profile);

        mutex_unlock(&kone->kone_lock);
        return size;
}
static DEVICE_ATTR(startup_profile, 0660, kone_sysfs_show_startup_profile,
                   kone_sysfs_set_startup_profile);

static struct attribute *kone_attrs[] = {
        /*
         * Read actual dpi settings.
         * Returns raw value for further processing. Refer to enum
         * kone_polling_rates to get real value.
         */
        &dev_attr_actual_dpi.attr,
        &dev_attr_actual_profile.attr,

        /*
         * The mouse can be equipped with one of four supplied weights from 5
         * to 20 grams which are recognized and its value can be read out.
         * This returns the raw value reported by the mouse for easy evaluation
         * by software. Refer to enum kone_weights to get corresponding real
         * weight.
         */
        &dev_attr_weight.attr,

        /*
         * Prints firmware version stored in mouse as integer.
         * The raw value reported by the mouse is returned for easy evaluation,
         * to get the real version number the decimal point has to be shifted 2
         * positions to the left. E.g. a value of 138 means 1.38.
         */
        &dev_attr_firmware_version.attr,

        /*
         * Prints state of Tracking Control Unit as number where 0 = off and
         * 1 = on. Writing 0 deactivates tcu and writing 1 calibrates and
         * activates the tcu
         */
        &dev_attr_tcu.attr,

        /* Prints and takes the number of the profile the mouse starts with */
        &dev_attr_startup_profile.attr,
        NULL,
};

static struct bin_attribute *kone_bin_attributes[] = {
        &bin_attr_settings,
        &bin_attr_profile1,
        &bin_attr_profile2,
        &bin_attr_profile3,
        &bin_attr_profile4,
        &bin_attr_profile5,
        NULL,
};

static const struct attribute_group kone_group = {
        .attrs = kone_attrs,
        .bin_attrs = kone_bin_attributes,
};

static const struct attribute_group *kone_groups[] = {
        &kone_group,
        NULL,
};

/* kone_class is used for creating sysfs attributes via roccat char device */
static const struct class kone_class = {
        .name = "kone",
        .dev_groups = kone_groups,
};

static int kone_init_kone_device_struct(struct usb_device *usb_dev,
                struct kone_device *kone)
{
        uint i;
        int retval;

        mutex_init(&kone->kone_lock);

        for (i = 0; i < 5; ++i) {
                retval = kone_get_profile(usb_dev, &kone->profiles[i], i + 1);
                if (retval)
                        return retval;
        }

        retval = kone_get_settings(usb_dev, &kone->settings);
        if (retval)
                return retval;

        retval = kone_get_firmware_version(usb_dev, &kone->firmware_version);
        if (retval)
                return retval;

        kone_profile_activated(kone, kone->settings.startup_profile);

        return 0;
}

/*
 * Since IGNORE_MOUSE quirk moved to hid-apple, there is no way to bind only to
 * mousepart if usb_hid is compiled into the kernel and kone is compiled as
 * module.
 * Secial behaviour is bound only to mousepart since only mouseevents contain
 * additional notifications.
 */
static int kone_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct kone_device *kone;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {

                kone = kzalloc(sizeof(*kone), GFP_KERNEL);
                if (!kone)
                        return -ENOMEM;
                hid_set_drvdata(hdev, kone);

                retval = kone_init_kone_device_struct(usb_dev, kone);
                if (retval) {
                        hid_err(hdev, "couldn't init struct kone_device\n");
                        goto exit_free;
                }

                retval = roccat_connect(&kone_class, hdev,
                                        sizeof(struct kone_roccat_report));
                if (retval < 0) {
                        hid_err(hdev, "couldn't init char dev\n");
                        /* be tolerant about not getting chrdev */
                } else {
                        kone->roccat_claimed = 1;
                        kone->chrdev_minor = retval;
                }
        } else {
                hid_set_drvdata(hdev, NULL);
        }

        return 0;
exit_free:
        kfree(kone);
        return retval;
}

static void kone_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct kone_device *kone;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {
                kone = hid_get_drvdata(hdev);
                if (kone->roccat_claimed)
                        roccat_disconnect(kone->chrdev_minor);
                kfree(hid_get_drvdata(hdev));
        }
}

static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = kone_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void kone_remove(struct hid_device *hdev)
{
        kone_remove_specials(hdev);
        hid_hw_stop(hdev);
}

/* handle special events and keep actual profile and dpi values up to date */
static void kone_keep_values_up_to_date(struct kone_device *kone,
                struct kone_mouse_event const *event)
{
        switch (event->event) {
        case kone_mouse_event_switch_profile:
                kone->actual_dpi = kone->profiles[event->value - 1].
                                startup_dpi;
                fallthrough;
        case kone_mouse_event_osd_profile:
                kone->actual_profile = event->value;
                break;
        case kone_mouse_event_switch_dpi:
        case kone_mouse_event_osd_dpi:
                kone->actual_dpi = event->value;
                break;
        }
}

static void kone_report_to_chrdev(struct kone_device const *kone,
                struct kone_mouse_event const *event)
{
        struct kone_roccat_report roccat_report;

        switch (event->event) {
        case kone_mouse_event_switch_profile:
        case kone_mouse_event_switch_dpi:
        case kone_mouse_event_osd_profile:
        case kone_mouse_event_osd_dpi:
                roccat_report.event = event->event;
                roccat_report.value = event->value;
                roccat_report.key = 0;
                roccat_report_event(kone->chrdev_minor,
                                (uint8_t *)&roccat_report);
                break;
        case kone_mouse_event_call_overlong_macro:
        case kone_mouse_event_multimedia:
                if (event->value == kone_keystroke_action_press) {
                        roccat_report.event = event->event;
                        roccat_report.value = kone->actual_profile;
                        roccat_report.key = event->macro_key;
                        roccat_report_event(kone->chrdev_minor,
                                        (uint8_t *)&roccat_report);
                }
                break;
        }

}

/*
 * Is called for keyboard- and mousepart.
 * Only mousepart gets informations about special events in its extended event
 * structure.
 */
static int kone_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *data, int size)
{
        struct kone_device *kone = hid_get_drvdata(hdev);
        struct kone_mouse_event *event = (struct kone_mouse_event *)data;

        /* keyboard events are always processed by default handler */
        if (size != sizeof(struct kone_mouse_event))
                return 0;

        if (kone == NULL)
                return 0;

        /*
         * Firmware 1.38 introduced new behaviour for tilt and special buttons.
         * Pressed button is reported in each movement event.
         * Workaround sends only one event per press.
         */
        if (memcmp(&kone->last_mouse_event.tilt, &event->tilt, 5))
                memcpy(&kone->last_mouse_event, event,
                                sizeof(struct kone_mouse_event));
        else
                memset(&event->wipe, 0, sizeof(event->wipe));

        kone_keep_values_up_to_date(kone, event);

        if (kone->roccat_claimed)
                kone_report_to_chrdev(kone, event);

        return 0; /* always do further processing */
}

static const struct hid_device_id kone_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) },
        { }
};

MODULE_DEVICE_TABLE(hid, kone_devices);

static struct hid_driver kone_driver = {
                .name = "kone",
                .id_table = kone_devices,
                .probe = kone_probe,
                .remove = kone_remove,
                .raw_event = kone_raw_event
};

static int __init kone_init(void)
{
        int retval;

        /* class name has to be same as driver name */
        retval = class_register(&kone_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&kone_driver);
        if (retval)
                class_unregister(&kone_class);
        return retval;
}

static void __exit kone_exit(void)
{
        hid_unregister_driver(&kone_driver);
        class_unregister(&kone_class);
}

module_init(kone_init);
module_exit(kone_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Kone driver");
MODULE_LICENSE("GPL v2");
























































































































































































































































































































































































































































































































































































































































































































































































































    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  The NFC Controller Interface is the communication protocol between an
 *  NFC Controller (NFCC) and a Device Host (DH).
 *  This is the HCI over NCI implementation, as specified in the 10.2
 *  section of the NCI 1.1 specification.
 *
 *  Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
 */

#include <linux/skbuff.h>

#include "../nfc.h"
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include <linux/nfc.h>
#include <linux/kcov.h>

struct nci_data {
        u8 conn_id;
        u8 pipe;
        u8 cmd;
        const u8 *data;
        u32 data_len;
} __packed;

struct nci_hci_create_pipe_params {
        u8 src_gate;
        u8 dest_host;
        u8 dest_gate;
} __packed;

struct nci_hci_create_pipe_resp {
        u8 src_host;
        u8 src_gate;
        u8 dest_host;
        u8 dest_gate;
        u8 pipe;
} __packed;

struct nci_hci_delete_pipe_noti {
        u8 pipe;
} __packed;

struct nci_hci_all_pipe_cleared_noti {
        u8 host;
} __packed;

struct nci_hcp_message {
        u8 header;      /* type -cmd,evt,rsp- + instruction */
        u8 data[];
} __packed;

struct nci_hcp_packet {
        u8 header;      /* cbit+pipe */
        struct nci_hcp_message message;
} __packed;

#define NCI_HCI_ANY_SET_PARAMETER  0x01
#define NCI_HCI_ANY_GET_PARAMETER  0x02
#define NCI_HCI_ANY_CLOSE_PIPE     0x04
#define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14

#define NCI_HFP_NO_CHAINING        0x80

#define NCI_NFCEE_ID_HCI                0x80

#define NCI_EVT_HOT_PLUG           0x03

#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY       0x01
#define NCI_HCI_ADM_CREATE_PIPE                        0x10
#define NCI_HCI_ADM_DELETE_PIPE                        0x11

/* HCP headers */
#define NCI_HCI_HCP_PACKET_HEADER_LEN      1
#define NCI_HCI_HCP_MESSAGE_HEADER_LEN     1
#define NCI_HCI_HCP_HEADER_LEN             2

/* HCP types */
#define NCI_HCI_HCP_COMMAND        0x00
#define NCI_HCI_HCP_EVENT          0x01
#define NCI_HCI_HCP_RESPONSE       0x02

#define NCI_HCI_ADM_NOTIFY_PIPE_CREATED     0x12
#define NCI_HCI_ADM_NOTIFY_PIPE_DELETED     0x13
#define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15

#define NCI_HCI_FRAGMENT           0x7f
#define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\
                                      ((instr) & 0x3f))

#define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6)
#define NCI_HCP_MSG_GET_CMD(header)  (header & 0x3f)
#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)

static int nci_hci_result_to_errno(u8 result)
{
        switch (result) {
        case NCI_HCI_ANY_OK:
                return 0;
        case NCI_HCI_ANY_E_REG_PAR_UNKNOWN:
                return -EOPNOTSUPP;
        case NCI_HCI_ANY_E_TIMEOUT:
                return -ETIME;
        default:
                return -1;
        }
}

/* HCI core */
static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
{
        int i;

        for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
                hdev->pipes[i].gate = NCI_HCI_INVALID_GATE;
                hdev->pipes[i].host = NCI_HCI_INVALID_HOST;
        }
        memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
}

static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host)
{
        int i;

        for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
                if (ndev->hci_dev->pipes[i].host == host) {
                        ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE;
                        ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST;
                }
        }
}

/* Fragment HCI data over NCI packet.
 * NFC Forum NCI 10.2.2 Data Exchange:
 * The payload of the Data Packets sent on the Logical Connection SHALL be
 * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL
 * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be
 * applied to Data Messages in either direction. The HCI fragmentation mechanism
 * is used if required.
 */
static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
                             const u8 data_type, const u8 *data,
                             size_t data_len)
{
        const struct nci_conn_info *conn_info;
        struct sk_buff *skb;
        int len, i, r;
        u8 cb = pipe;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        i = 0;
        skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
                            NCI_DATA_HDR_SIZE, GFP_ATOMIC);
        if (!skb)
                return -ENOMEM;

        skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
        *(u8 *)skb_push(skb, 1) = data_type;

        do {
                /* If last packet add NCI_HFP_NO_CHAINING */
                if (i + conn_info->max_pkt_payload_len -
                    (skb->len + 1) >= data_len) {
                        cb |= NCI_HFP_NO_CHAINING;
                        len = data_len - i;
                } else {
                        len = conn_info->max_pkt_payload_len - skb->len - 1;
                }

                *(u8 *)skb_push(skb, 1) = cb;

                if (len > 0)
                        skb_put_data(skb, data + i, len);

                r = nci_send_data(ndev, conn_info->conn_id, skb);
                if (r < 0)
                        return r;

                i += len;

                if (i < data_len) {
                        skb = nci_skb_alloc(ndev,
                                            conn_info->max_pkt_payload_len +
                                            NCI_DATA_HDR_SIZE, GFP_ATOMIC);
                        if (!skb)
                                return -ENOMEM;

                        skb_reserve(skb, NCI_DATA_HDR_SIZE + 1);
                }
        } while (i < data_len);

        return i;
}

static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_data *data = opt;

        nci_hci_send_data(ndev, data->pipe, data->cmd,
                          data->data, data->data_len);
}

int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
                       const u8 *param, size_t param_len)
{
        u8 pipe = ndev->hci_dev->gate2pipe[gate];

        if (pipe == NCI_HCI_INVALID_PIPE)
                return -EADDRNOTAVAIL;

        return nci_hci_send_data(ndev, pipe,
                        NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event),
                        param, param_len);
}
EXPORT_SYMBOL(nci_hci_send_event);

int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
                     const u8 *param, size_t param_len,
                     struct sk_buff **skb)
{
        const struct nci_hcp_message *message;
        const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 pipe = ndev->hci_dev->gate2pipe[gate];

        if (pipe == NCI_HCI_INVALID_PIPE)
                return -EADDRNOTAVAIL;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        data.conn_id = conn_info->conn_id;
        data.pipe = pipe;
        data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd);
        data.data = param;
        data.data_len = param_len;

        r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
                r = nci_hci_result_to_errno(
                        NCI_HCP_MSG_GET_CMD(message->header));
                skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);

                if (!r && skb)
                        *skb = conn_info->rx_skb;
        }

        return r;
}
EXPORT_SYMBOL(nci_hci_send_cmd);

int nci_hci_clear_all_pipes(struct nci_dev *ndev)
{
        int r;

        r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
                             NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL);
        if (r < 0)
                return r;

        nci_hci_reset_pipes(ndev->hci_dev);
        return r;
}
EXPORT_SYMBOL(nci_hci_clear_all_pipes);

static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
                                   u8 event, struct sk_buff *skb)
{
        if (ndev->ops->hci_event_received)
                ndev->ops->hci_event_received(ndev, pipe, event, skb);
}

static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
                                 u8 cmd, struct sk_buff *skb)
{
        u8 gate = ndev->hci_dev->pipes[pipe].gate;
        u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT;
        u8 dest_gate, new_pipe;
        struct nci_hci_create_pipe_resp *create_info;
        struct nci_hci_delete_pipe_noti *delete_info;
        struct nci_hci_all_pipe_cleared_noti *cleared_info;

        pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);

        switch (cmd) {
        case NCI_HCI_ADM_NOTIFY_PIPE_CREATED:
                if (skb->len != 5) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }
                create_info = (struct nci_hci_create_pipe_resp *)skb->data;
                dest_gate = create_info->dest_gate;
                new_pipe = create_info->pipe;
                if (new_pipe >= NCI_HCI_MAX_PIPES) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }

                /* Save the new created pipe and bind with local gate,
                 * the description for skb->data[3] is destination gate id
                 * but since we received this cmd from host controller, we
                 * are the destination and it is our local gate
                 */
                ndev->hci_dev->gate2pipe[dest_gate] = new_pipe;
                ndev->hci_dev->pipes[new_pipe].gate = dest_gate;
                ndev->hci_dev->pipes[new_pipe].host =
                                                create_info->src_host;
                break;
        case NCI_HCI_ANY_OPEN_PIPE:
                /* If the pipe is not created report an error */
                if (gate == NCI_HCI_INVALID_GATE) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }
                break;
        case NCI_HCI_ADM_NOTIFY_PIPE_DELETED:
                if (skb->len != 1) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }
                delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
                if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }

                ndev->hci_dev->pipes[delete_info->pipe].gate =
                                                NCI_HCI_INVALID_GATE;
                ndev->hci_dev->pipes[delete_info->pipe].host =
                                                NCI_HCI_INVALID_HOST;
                break;
        case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
                if (skb->len != 1) {
                        status = NCI_HCI_ANY_E_NOK;
                        goto exit;
                }

                cleared_info =
                        (struct nci_hci_all_pipe_cleared_noti *)skb->data;
                nci_hci_reset_pipes_per_host(ndev, cleared_info->host);
                break;
        default:
                pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate);
                break;
        }

        if (ndev->ops->hci_cmd_received)
                ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb);

exit:
        nci_hci_send_data(ndev, pipe, status, NULL, 0);

        kfree_skb(skb);
}

static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
                                  struct sk_buff *skb)
{
        struct nci_conn_info *conn_info;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                goto exit;

        conn_info->rx_skb = skb;

exit:
        nci_req_complete(ndev, NCI_STATUS_OK);
}

/* Receive hcp message for pipe, with type and cmd.
 * skb contains optional message data only.
 */
static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
                                   u8 type, u8 instruction, struct sk_buff *skb)
{
        switch (type) {
        case NCI_HCI_HCP_RESPONSE:
                nci_hci_resp_received(ndev, pipe, skb);
                break;
        case NCI_HCI_HCP_COMMAND:
                nci_hci_cmd_received(ndev, pipe, instruction, skb);
                break;
        case NCI_HCI_HCP_EVENT:
                nci_hci_event_received(ndev, pipe, instruction, skb);
                break;
        default:
                pr_err("UNKNOWN MSG Type %d, instruction=%d\n",
                       type, instruction);
                kfree_skb(skb);
                break;
        }

        nci_req_complete(ndev, NCI_STATUS_OK);
}

static void nci_hci_msg_rx_work(struct work_struct *work)
{
        struct nci_hci_dev *hdev =
                container_of(work, struct nci_hci_dev, msg_rx_work);
        struct sk_buff *skb;
        const struct nci_hcp_message *message;
        u8 pipe, type, instruction;

        for (; (skb = skb_dequeue(&hdev->msg_rx_queue)); kcov_remote_stop()) {
                kcov_remote_start_common(skb_get_kcov_handle(skb));
                pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]);
                skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
                message = (struct nci_hcp_message *)skb->data;
                type = NCI_HCP_MSG_GET_TYPE(message->header);
                instruction = NCI_HCP_MSG_GET_CMD(message->header);
                skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);

                nci_hci_hcp_message_rx(hdev->ndev, pipe,
                                       type, instruction, skb);
        }
}

void nci_hci_data_received_cb(void *context,
                              struct sk_buff *skb, int err)
{
        struct nci_dev *ndev = (struct nci_dev *)context;
        struct nci_hcp_packet *packet;
        u8 pipe, type;
        struct sk_buff *hcp_skb;
        struct sk_buff *frag_skb;
        int msg_len;

        if (err) {
                nci_req_complete(ndev, err);
                return;
        }

        packet = (struct nci_hcp_packet *)skb->data;
        if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) {
                skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
                return;
        }

        /* it's the last fragment. Does it need re-aggregation? */
        if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
                pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
                skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);

                msg_len = 0;
                skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
                        msg_len += (frag_skb->len -
                                    NCI_HCI_HCP_PACKET_HEADER_LEN);
                }

                hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN +
                                             msg_len, GFP_KERNEL);
                if (!hcp_skb) {
                        nci_req_complete(ndev, -ENOMEM);
                        return;
                }

                skb_put_u8(hcp_skb, pipe);

                skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
                        msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
                        skb_put_data(hcp_skb,
                                     frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN,
                                     msg_len);
                }

                skb_queue_purge(&ndev->hci_dev->rx_hcp_frags);
        } else {
                packet->header &= NCI_HCI_FRAGMENT;
                hcp_skb = skb;
        }

        /* if this is a response, dispatch immediately to
         * unblock waiting cmd context. Otherwise, enqueue to dispatch
         * in separate context where handler can also execute command.
         */
        packet = (struct nci_hcp_packet *)hcp_skb->data;
        type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
        if (type == NCI_HCI_HCP_RESPONSE) {
                pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
                skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
                nci_hci_hcp_message_rx(ndev, pipe, type,
                                       NCI_STATUS_OK, hcp_skb);
        } else {
                skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
                schedule_work(&ndev->hci_dev->msg_rx_work);
        }
}

int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
{
        struct nci_data data;
        const struct nci_conn_info *conn_info;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        data.conn_id = conn_info->conn_id;
        data.pipe = pipe;
        data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
                                       NCI_HCI_ANY_OPEN_PIPE);
        data.data = NULL;
        data.data_len = 0;

        return nci_request(ndev, nci_hci_send_data_req, &data,
                           msecs_to_jiffies(NCI_DATA_TIMEOUT));
}
EXPORT_SYMBOL(nci_hci_open_pipe);

static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
                              u8 dest_gate, int *result)
{
        u8 pipe;
        struct sk_buff *skb;
        struct nci_hci_create_pipe_params params;
        const struct nci_hci_create_pipe_resp *resp;

        pr_debug("gate=%d\n", dest_gate);

        params.src_gate = NCI_HCI_ADMIN_GATE;
        params.dest_host = dest_host;
        params.dest_gate = dest_gate;

        *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
                                   NCI_HCI_ADM_CREATE_PIPE,
                                   (u8 *)&params, sizeof(params), &skb);
        if (*result < 0)
                return NCI_HCI_INVALID_PIPE;

        resp = (struct nci_hci_create_pipe_resp *)skb->data;
        pipe = resp->pipe;
        kfree_skb(skb);

        pr_debug("pipe created=%d\n", pipe);

        return pipe;
}

static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
{
        return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
                                NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
}

int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
                      const u8 *param, size_t param_len)
{
        const struct nci_hcp_message *message;
        const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 *tmp;
        u8 pipe = ndev->hci_dev->gate2pipe[gate];

        pr_debug("idx=%d to gate %d\n", idx, gate);

        if (pipe == NCI_HCI_INVALID_PIPE)
                return -EADDRNOTAVAIL;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        tmp = kmalloc(1 + param_len, GFP_KERNEL);
        if (!tmp)
                return -ENOMEM;

        *tmp = idx;
        memcpy(tmp + 1, param, param_len);

        data.conn_id = conn_info->conn_id;
        data.pipe = pipe;
        data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
                                       NCI_HCI_ANY_SET_PARAMETER);
        data.data = tmp;
        data.data_len = param_len + 1;

        r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
                r = nci_hci_result_to_errno(
                        NCI_HCP_MSG_GET_CMD(message->header));
                skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
        }

        kfree(tmp);
        return r;
}
EXPORT_SYMBOL(nci_hci_set_param);

int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
                      struct sk_buff **skb)
{
        const struct nci_hcp_message *message;
        const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 pipe = ndev->hci_dev->gate2pipe[gate];

        pr_debug("idx=%d to gate %d\n", idx, gate);

        if (pipe == NCI_HCI_INVALID_PIPE)
                return -EADDRNOTAVAIL;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        data.conn_id = conn_info->conn_id;
        data.pipe = pipe;
        data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
                                  NCI_HCI_ANY_GET_PARAMETER);
        data.data = &idx;
        data.data_len = 1;

        r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));

        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
                r = nci_hci_result_to_errno(
                        NCI_HCP_MSG_GET_CMD(message->header));
                skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);

                if (!r && skb)
                        *skb = conn_info->rx_skb;
        }

        return r;
}
EXPORT_SYMBOL(nci_hci_get_param);

int nci_hci_connect_gate(struct nci_dev *ndev,
                         u8 dest_host, u8 dest_gate, u8 pipe)
{
        bool pipe_created = false;
        int r;

        if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
                return 0;

        if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE)
                return -EADDRINUSE;

        if (pipe != NCI_HCI_INVALID_PIPE)
                goto open_pipe;

        switch (dest_gate) {
        case NCI_HCI_LINK_MGMT_GATE:
                pipe = NCI_HCI_LINK_MGMT_PIPE;
        break;
        case NCI_HCI_ADMIN_GATE:
                pipe = NCI_HCI_ADMIN_PIPE;
        break;
        default:
                pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r);
                if (pipe == NCI_HCI_INVALID_PIPE)
                        return r;
                pipe_created = true;
                break;
        }

open_pipe:
        r = nci_hci_open_pipe(ndev, pipe);
        if (r < 0) {
                if (pipe_created) {
                        if (nci_hci_delete_pipe(ndev, pipe) < 0) {
                                /* TODO: Cannot clean by deleting pipe...
                                 * -> inconsistent state
                                 */
                        }
                }
                return r;
        }

        ndev->hci_dev->pipes[pipe].gate = dest_gate;
        ndev->hci_dev->pipes[pipe].host = dest_host;
        ndev->hci_dev->gate2pipe[dest_gate] = pipe;

        return 0;
}
EXPORT_SYMBOL(nci_hci_connect_gate);

static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
                                     u8 gate_count,
                                     const struct nci_hci_gate *gates)
{
        int r;

        while (gate_count--) {
                r = nci_hci_connect_gate(ndev, gates->dest_host,
                                         gates->gate, gates->pipe);
                if (r < 0)
                        return r;
                gates++;
        }

        return 0;
}

int nci_hci_dev_session_init(struct nci_dev *ndev)
{
        struct nci_conn_info *conn_info;
        struct sk_buff *skb;
        int r;

        ndev->hci_dev->count_pipes = 0;
        ndev->hci_dev->expected_pipes = 0;

        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
                return -EPROTO;

        conn_info->data_exchange_cb = nci_hci_data_received_cb;
        conn_info->data_exchange_cb_context = ndev;

        nci_hci_reset_pipes(ndev->hci_dev);

        if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE)
                return -EPROTO;

        r = nci_hci_connect_gate(ndev,
                                 ndev->hci_dev->init_data.gates[0].dest_host,
                                 ndev->hci_dev->init_data.gates[0].gate,
                                 ndev->hci_dev->init_data.gates[0].pipe);
        if (r < 0)
                return r;

        r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
                              NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb);
        if (r < 0)
                return r;

        if (skb->len &&
            skb->len == strlen(ndev->hci_dev->init_data.session_id) &&
            !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) &&
            ndev->ops->hci_load_session) {
                /* Restore gate<->pipe table from some proprietary location. */
                r = ndev->ops->hci_load_session(ndev);
        } else {
                r = nci_hci_clear_all_pipes(ndev);
                if (r < 0)
                        goto exit;

                r = nci_hci_dev_connect_gates(ndev,
                                              ndev->hci_dev->init_data.gate_count,
                                              ndev->hci_dev->init_data.gates);
                if (r < 0)
                        goto exit;

                r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE,
                                      NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY,
                                      ndev->hci_dev->init_data.session_id,
                                      strlen(ndev->hci_dev->init_data.session_id));
        }

exit:
        kfree_skb(skb);

        return r;
}
EXPORT_SYMBOL(nci_hci_dev_session_init);

struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
{
        struct nci_hci_dev *hdev;

        hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
        if (!hdev)
                return NULL;

        skb_queue_head_init(&hdev->rx_hcp_frags);
        INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work);
        skb_queue_head_init(&hdev->msg_rx_queue);
        hdev->ndev = ndev;

        return hdev;
}

void nci_hci_deallocate(struct nci_dev *ndev)
{
        kfree(ndev->hci_dev);
}









































   29 





























   29 






















































































































































  266 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/*
 *  include/linux/ktime.h
 *
 *  ktime_t - nanosecond-resolution time format.
 *
 *   Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
 *   Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
 *
 *  data type definitions, declarations, prototypes and macros.
 *
 *  Started by: Thomas Gleixner and Ingo Molnar
 *
 *  Credits:
 *
 *          Roman Zippel provided the ideas and primary code snippets of
 *          the ktime_t union and further simplifications of the original
 *          code.
 *
 *  For licencing details see kernel-base/COPYING
 */
#ifndef _LINUX_KTIME_H
#define _LINUX_KTIME_H

#include <asm/bug.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/types.h>

/**
 * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
 * @secs:        seconds to set
 * @nsecs:        nanoseconds to set
 *
 * Return: The ktime_t representation of the value.
 */
static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs)
{
        if (unlikely(secs >= KTIME_SEC_MAX))
                return KTIME_MAX;

        return secs * NSEC_PER_SEC + (s64)nsecs;
}

/* Subtract two ktime_t variables. rem = lhs -rhs: */
#define ktime_sub(lhs, rhs)        ((lhs) - (rhs))

/* Add two ktime_t variables. res = lhs + rhs: */
#define ktime_add(lhs, rhs)        ((lhs) + (rhs))

/*
 * Same as ktime_add(), but avoids undefined behaviour on overflow; however,
 * this means that you must check the result for overflow yourself.
 */
#define ktime_add_unsafe(lhs, rhs)        ((u64) (lhs) + (rhs))

/*
 * Add a ktime_t variable and a scalar nanosecond value.
 * res = kt + nsval:
 */
#define ktime_add_ns(kt, nsval)                ((kt) + (nsval))

/*
 * Subtract a scalar nanosecod from a ktime_t variable
 * res = kt - nsval:
 */
#define ktime_sub_ns(kt, nsval)                ((kt) - (nsval))

/* convert a timespec64 to ktime_t format: */
static inline ktime_t timespec64_to_ktime(struct timespec64 ts)
{
        return ktime_set(ts.tv_sec, ts.tv_nsec);
}

/* Map the ktime_t to timespec conversion to ns_to_timespec function */
#define ktime_to_timespec64(kt)                ns_to_timespec64((kt))

/* Convert ktime_t to nanoseconds */
static inline s64 ktime_to_ns(const ktime_t kt)
{
        return kt;
}

/**
 * ktime_compare - Compares two ktime_t variables for less, greater or equal
 * @cmp1:        comparable1
 * @cmp2:        comparable2
 *
 * Return: ...
 *   cmp1  < cmp2: return <0
 *   cmp1 == cmp2: return 0
 *   cmp1  > cmp2: return >0
 */
static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
{
        if (cmp1 < cmp2)
                return -1;
        if (cmp1 > cmp2)
                return 1;
        return 0;
}

/**
 * ktime_after - Compare if a ktime_t value is bigger than another one.
 * @cmp1:        comparable1
 * @cmp2:        comparable2
 *
 * Return: true if cmp1 happened after cmp2.
 */
static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2)
{
        return ktime_compare(cmp1, cmp2) > 0;
}

/**
 * ktime_before - Compare if a ktime_t value is smaller than another one.
 * @cmp1:        comparable1
 * @cmp2:        comparable2
 *
 * Return: true if cmp1 happened before cmp2.
 */
static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
{
        return ktime_compare(cmp1, cmp2) < 0;
}

#if BITS_PER_LONG < 64
extern s64 __ktime_divns(const ktime_t kt, s64 div);
static inline s64 ktime_divns(const ktime_t kt, s64 div)
{
        /*
         * Negative divisors could cause an inf loop,
         * so bug out here.
         */
        BUG_ON(div < 0);
        if (__builtin_constant_p(div) && !(div >> 32)) {
                s64 ns = kt;
                u64 tmp = ns < 0 ? -ns : ns;

                do_div(tmp, div);
                return ns < 0 ? -tmp : tmp;
        } else {
                return __ktime_divns(kt, div);
        }
}
#else /* BITS_PER_LONG < 64 */
static inline s64 ktime_divns(const ktime_t kt, s64 div)
{
        /*
         * 32-bit implementation cannot handle negative divisors,
         * so catch them on 64bit as well.
         */
        WARN_ON(div < 0);
        return kt / div;
}
#endif

static inline s64 ktime_to_us(const ktime_t kt)
{
        return ktime_divns(kt, NSEC_PER_USEC);
}

static inline s64 ktime_to_ms(const ktime_t kt)
{
        return ktime_divns(kt, NSEC_PER_MSEC);
}

static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
{
       return ktime_to_us(ktime_sub(later, earlier));
}

static inline s64 ktime_ms_delta(const ktime_t later, const ktime_t earlier)
{
        return ktime_to_ms(ktime_sub(later, earlier));
}

static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
{
        return ktime_add_ns(kt, usec * NSEC_PER_USEC);
}

static inline ktime_t ktime_add_ms(const ktime_t kt, const u64 msec)
{
        return ktime_add_ns(kt, msec * NSEC_PER_MSEC);
}

static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
{
        return ktime_sub_ns(kt, usec * NSEC_PER_USEC);
}

static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec)
{
        return ktime_sub_ns(kt, msec * NSEC_PER_MSEC);
}

extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);

/**
 * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64
 *                            format only if the variable contains data
 * @kt:                the ktime_t variable to convert
 * @ts:                the timespec variable to store the result in
 *
 * Return: %true if there was a successful conversion, %false if kt was 0.
 */
static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt,
                                                       struct timespec64 *ts)
{
        if (kt) {
                *ts = ktime_to_timespec64(kt);
                return true;
        } else {
                return false;
        }
}

#include <vdso/ktime.h>

static inline ktime_t ns_to_ktime(u64 ns)
{
        return ns;
}

static inline ktime_t ms_to_ktime(u64 ms)
{
        return ms * NSEC_PER_MSEC;
}

# include <linux/timekeeping.h>

#endif


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Dynamic loading of modules into the kernel.
 *
 * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
 * Rewritten again by Rusty Russell, 2002
 */

#ifndef _LINUX_MODULE_H
#define _LINUX_MODULE_H

#include <linux/list.h>
#include <linux/stat.h>
#include <linux/buildid.h>
#include <linux/compiler.h>
#include <linux/cache.h>
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/elf.h>
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
#include <linux/srcu.h>
#include <linux/static_call_types.h>
#include <linux/dynamic_debug.h>

#include <linux/percpu.h>
#include <asm/module.h>

#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN

struct modversion_info {
        unsigned long crc;
        char name[MODULE_NAME_LEN];
};

struct module;
struct exception_table_entry;

struct module_kobject {
        struct kobject kobj;
        struct module *mod;
        struct kobject *drivers_dir;
        struct module_param_attrs *mp;
        struct completion *kobj_completion;
} __randomize_layout;

struct module_attribute {
        struct attribute attr;
        ssize_t (*show)(struct module_attribute *, struct module_kobject *,
                        char *);
        ssize_t (*store)(struct module_attribute *, struct module_kobject *,
                         const char *, size_t count);
        void (*setup)(struct module *, const char *);
        int (*test)(struct module *);
        void (*free)(struct module *);
};

struct module_version_attribute {
        struct module_attribute mattr;
        const char *module_name;
        const char *version;
};

extern ssize_t __modver_version_show(struct module_attribute *,
                                     struct module_kobject *, char *);

extern struct module_attribute module_uevent;

/* These are either module local, or the kernel's dummy ones. */
extern int init_module(void);
extern void cleanup_module(void);

#ifndef MODULE
/**
 * module_init() - driver initialization entry point
 * @x: function to be run at kernel boot time or module insertion
 *
 * module_init() will either be called during do_initcalls() (if
 * builtin) or at module insertion time (if a module).  There can only
 * be one per module.
 */
#define module_init(x)        __initcall(x);

/**
 * module_exit() - driver exit entry point
 * @x: function to be run when driver is removed
 *
 * module_exit() will wrap the driver clean-up code
 * with cleanup_module() when used with rmmod when
 * the driver is a module.  If the driver is statically
 * compiled into the kernel, module_exit() has no effect.
 * There can only be one per module.
 */
#define module_exit(x)        __exitcall(x);

#else /* MODULE */

/*
 * In most cases loadable modules do not need custom
 * initcall levels. There are still some valid cases where
 * a driver may be needed early if built in, and does not
 * matter when built as a loadable module. Like bus
 * snooping debug drivers.
 */
#define early_initcall(fn)                module_init(fn)
#define core_initcall(fn)                module_init(fn)
#define core_initcall_sync(fn)                module_init(fn)
#define postcore_initcall(fn)                module_init(fn)
#define postcore_initcall_sync(fn)        module_init(fn)
#define arch_initcall(fn)                module_init(fn)
#define subsys_initcall(fn)                module_init(fn)
#define subsys_initcall_sync(fn)        module_init(fn)
#define fs_initcall(fn)                        module_init(fn)
#define fs_initcall_sync(fn)                module_init(fn)
#define rootfs_initcall(fn)                module_init(fn)
#define device_initcall(fn)                module_init(fn)
#define device_initcall_sync(fn)        module_init(fn)
#define late_initcall(fn)                module_init(fn)
#define late_initcall_sync(fn)                module_init(fn)

#define console_initcall(fn)                module_init(fn)

/* Each module must use one module_init(). */
#define module_init(initfn)                                        \
        static inline initcall_t __maybe_unused __inittest(void)                \
        { return initfn; }                                        \
        int init_module(void) __copy(initfn)                        \
                __attribute__((alias(#initfn)));                \
        ___ADDRESSABLE(init_module, __initdata);

/* This is only required if you want to be unloadable. */
#define module_exit(exitfn)                                        \
        static inline exitcall_t __maybe_unused __exittest(void)                \
        { return exitfn; }                                        \
        void cleanup_module(void) __copy(exitfn)                \
                __attribute__((alias(#exitfn)));                \
        ___ADDRESSABLE(cleanup_module, __exitdata);

#endif

/* This means "can be init if no module support, otherwise module load
   may call it." */
#ifdef CONFIG_MODULES
#define __init_or_module
#define __initdata_or_module
#define __initconst_or_module
#define __INIT_OR_MODULE        .text
#define __INITDATA_OR_MODULE        .data
#define __INITRODATA_OR_MODULE        .section ".rodata","a",%progbits
#else
#define __init_or_module __init
#define __initdata_or_module __initdata
#define __initconst_or_module __initconst
#define __INIT_OR_MODULE __INIT
#define __INITDATA_OR_MODULE __INITDATA
#define __INITRODATA_OR_MODULE __INITRODATA
#endif /*CONFIG_MODULES*/

/* Generic info of form tag = "info" */
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)

/* For userspace: you can also call me... */
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)

/* Soft module dependencies. See man modprobe.d for details.
 * Example: MODULE_SOFTDEP("pre: module-foo module-bar post: module-baz")
 */
#define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep)

/*
 * MODULE_FILE is used for generating modules.builtin
 * So, make it no-op when this is being built as a module
 */
#ifdef MODULE
#define MODULE_FILE
#else
#define MODULE_FILE        MODULE_INFO(file, KBUILD_MODFILE);
#endif

/*
 * The following license idents are currently accepted as indicating free
 * software modules
 *
 *        "GPL"                                [GNU Public License v2]
 *        "GPL v2"                        [GNU Public License v2]
 *        "GPL and additional rights"        [GNU Public License v2 rights and more]
 *        "Dual BSD/GPL"                        [GNU Public License v2
 *                                         or BSD license choice]
 *        "Dual MIT/GPL"                        [GNU Public License v2
 *                                         or MIT license choice]
 *        "Dual MPL/GPL"                        [GNU Public License v2
 *                                         or Mozilla license choice]
 *
 * The following other idents are available
 *
 *        "Proprietary"                        [Non free products]
 *
 * Both "GPL v2" and "GPL" (the latter also in dual licensed strings) are
 * merely stating that the module is licensed under the GPL v2, but are not
 * telling whether "GPL v2 only" or "GPL v2 or later". The reason why there
 * are two variants is a historic and failed attempt to convey more
 * information in the MODULE_LICENSE string. For module loading the
 * "only/or later" distinction is completely irrelevant and does neither
 * replace the proper license identifiers in the corresponding source file
 * nor amends them in any way. The sole purpose is to make the
 * 'Proprietary' flagging work and to refuse to bind symbols which are
 * exported with EXPORT_SYMBOL_GPL when a non free module is loaded.
 *
 * In the same way "BSD" is not a clear license information. It merely
 * states, that the module is licensed under one of the compatible BSD
 * license variants. The detailed and correct license information is again
 * to be found in the corresponding source files.
 *
 * There are dual licensed components, but when running with Linux it is the
 * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
 * is a GPL combined work.
 *
 * This exists for several reasons
 * 1.        So modinfo can show license info for users wanting to vet their setup
 *        is free
 * 2.        So the community can ignore bug reports including proprietary modules
 * 3.        So vendors can do likewise based on their own policies
 */
#define MODULE_LICENSE(_license) MODULE_FILE MODULE_INFO(license, _license)

/*
 * Author(s), use "Name <email>" or just "Name", for multiple
 * authors use multiple MODULE_AUTHOR() statements/lines.
 */
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)

/* What your module does. */
#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)

#ifdef MODULE
/* Creates an alias so file2alias.c can find device table. */
#define MODULE_DEVICE_TABLE(type, name)                                        \
extern typeof(name) __mod_##type##__##name##_device_table                \
  __attribute__ ((unused, alias(__stringify(name))))
#else  /* !MODULE */
#define MODULE_DEVICE_TABLE(type, name)
#endif

/* Version of form [<epoch>:]<version>[-<extra-version>].
 * Or for CVS/RCS ID version, everything but the number is stripped.
 * <epoch>: A (small) unsigned integer which allows you to start versions
 * anew. If not mentioned, it's zero.  eg. "2:1.0" is after
 * "1:2.0".

 * <version>: The <version> may contain only alphanumerics and the
 * character `.'.  Ordered by numeric sort for numeric parts,
 * ascii sort for ascii parts (as per RPM or DEB algorithm).

 * <extraversion>: Like <version>, but inserted for local
 * customizations, eg "rh3" or "rusty1".

 * Using this automatically adds a checksum of the .c files and the
 * local headers in "srcversion".
 */

#if defined(MODULE) || !defined(CONFIG_SYSFS)
#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
#else
#define MODULE_VERSION(_version)                                        \
        MODULE_INFO(version, _version);                                        \
        static struct module_version_attribute __modver_attr                \
                __used __section("__modver")                                \
                __aligned(__alignof__(struct module_version_attribute)) \
                = {                                                        \
                        .mattr        = {                                        \
                                .attr        = {                                \
                                        .name        = "version",                \
                                        .mode        = S_IRUGO,                \
                                },                                        \
                                .show        = __modver_version_show,        \
                        },                                                \
                        .module_name        = KBUILD_MODNAME,                \
                        .version        = _version,                        \
                }
#endif

/* Optional firmware file (or files) needed by the module
 * format is simply firmware file name.  Multiple firmware
 * files require multiple MODULE_FIRMWARE() specifiers */
#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)

#define MODULE_IMPORT_NS(ns)        MODULE_INFO(import_ns, __stringify(ns))

struct notifier_block;

#ifdef CONFIG_MODULES

extern int modules_disabled; /* for sysctl */
/* Get/put a kernel symbol (calls must be symmetric) */
void *__symbol_get(const char *symbol);
void *__symbol_get_gpl(const char *symbol);
#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))

/* modules using other modules: kdb wants to see this. */
struct module_use {
        struct list_head source_list;
        struct list_head target_list;
        struct module *source, *target;
};

enum module_state {
        MODULE_STATE_LIVE,        /* Normal state. */
        MODULE_STATE_COMING,        /* Full formed, running module_init. */
        MODULE_STATE_GOING,        /* Going away. */
        MODULE_STATE_UNFORMED,        /* Still setting it up. */
};

struct mod_tree_node {
        struct module *mod;
        struct latch_tree_node node;
};

enum mod_mem_type {
        MOD_TEXT = 0,
        MOD_DATA,
        MOD_RODATA,
        MOD_RO_AFTER_INIT,
        MOD_INIT_TEXT,
        MOD_INIT_DATA,
        MOD_INIT_RODATA,

        MOD_MEM_NUM_TYPES,
        MOD_INVALID = -1,
};

#define mod_mem_type_is_init(type)        \
        ((type) == MOD_INIT_TEXT ||        \
         (type) == MOD_INIT_DATA ||        \
         (type) == MOD_INIT_RODATA)

#define mod_mem_type_is_core(type) (!mod_mem_type_is_init(type))

#define mod_mem_type_is_text(type)        \
         ((type) == MOD_TEXT ||                \
          (type) == MOD_INIT_TEXT)

#define mod_mem_type_is_data(type) (!mod_mem_type_is_text(type))

#define mod_mem_type_is_core_data(type)        \
        (mod_mem_type_is_core(type) &&        \
         mod_mem_type_is_data(type))

#define for_each_mod_mem_type(type)                        \
        for (enum mod_mem_type (type) = 0;                \
             (type) < MOD_MEM_NUM_TYPES; (type)++)

#define for_class_mod_mem_type(type, class)                \
        for_each_mod_mem_type(type)                        \
                if (mod_mem_type_is_##class(type))

struct module_memory {
        void *base;
        unsigned int size;

#ifdef CONFIG_MODULES_TREE_LOOKUP
        struct mod_tree_node mtn;
#endif
};

#ifdef CONFIG_MODULES_TREE_LOOKUP
/* Only touch one cacheline for common rbtree-for-core-layout case. */
#define __module_memory_align ____cacheline_aligned
#else
#define __module_memory_align
#endif

struct mod_kallsyms {
        Elf_Sym *symtab;
        unsigned int num_symtab;
        char *strtab;
        char *typetab;
};

#ifdef CONFIG_LIVEPATCH
/**
 * struct klp_modinfo - ELF information preserved from the livepatch module
 *
 * @hdr: ELF header
 * @sechdrs: Section header table
 * @secstrings: String table for the section headers
 * @symndx: The symbol table section index
 */
struct klp_modinfo {
        Elf_Ehdr hdr;
        Elf_Shdr *sechdrs;
        char *secstrings;
        unsigned int symndx;
};
#endif

struct module {
        enum module_state state;

        /* Member of list of modules */
        struct list_head list;

        /* Unique handle for this module */
        char name[MODULE_NAME_LEN];

#ifdef CONFIG_STACKTRACE_BUILD_ID
        /* Module build ID */
        unsigned char build_id[BUILD_ID_SIZE_MAX];
#endif

        /* Sysfs stuff. */
        struct module_kobject mkobj;
        struct module_attribute *modinfo_attrs;
        const char *version;
        const char *srcversion;
        struct kobject *holders_dir;

        /* Exported symbols */
        const struct kernel_symbol *syms;
        const s32 *crcs;
        unsigned int num_syms;

#ifdef CONFIG_ARCH_USES_CFI_TRAPS
        s32 *kcfi_traps;
        s32 *kcfi_traps_end;
#endif

        /* Kernel parameters. */
#ifdef CONFIG_SYSFS
        struct mutex param_lock;
#endif
        struct kernel_param *kp;
        unsigned int num_kp;

        /* GPL-only exported symbols. */
        unsigned int num_gpl_syms;
        const struct kernel_symbol *gpl_syms;
        const s32 *gpl_crcs;
        bool using_gplonly_symbols;

#ifdef CONFIG_MODULE_SIG
        /* Signature was verified. */
        bool sig_ok;
#endif

        bool async_probe_requested;

        /* Exception table */
        unsigned int num_exentries;
        struct exception_table_entry *extable;

        /* Startup function. */
        int (*init)(void);

        struct module_memory mem[MOD_MEM_NUM_TYPES] __module_memory_align;

        /* Arch-specific module values */
        struct mod_arch_specific arch;

        unsigned long taints;        /* same bits as kernel:taint_flags */

#ifdef CONFIG_GENERIC_BUG
        /* Support for BUG */
        unsigned num_bugs;
        struct list_head bug_list;
        struct bug_entry *bug_table;
#endif

#ifdef CONFIG_KALLSYMS
        /* Protected by RCU and/or module_mutex: use rcu_dereference() */
        struct mod_kallsyms __rcu *kallsyms;
        struct mod_kallsyms core_kallsyms;

        /* Section attributes */
        struct module_sect_attrs *sect_attrs;

        /* Notes attributes */
        struct module_notes_attrs *notes_attrs;
#endif

        /* The command line arguments (may be mangled).  People like
           keeping pointers to this stuff */
        char *args;

#ifdef CONFIG_SMP
        /* Per-cpu data. */
        void __percpu *percpu;
        unsigned int percpu_size;
#endif
        void *noinstr_text_start;
        unsigned int noinstr_text_size;

#ifdef CONFIG_TRACEPOINTS
        unsigned int num_tracepoints;
        tracepoint_ptr_t *tracepoints_ptrs;
#endif
#ifdef CONFIG_TREE_SRCU
        unsigned int num_srcu_structs;
        struct srcu_struct **srcu_struct_ptrs;
#endif
#ifdef CONFIG_BPF_EVENTS
        unsigned int num_bpf_raw_events;
        struct bpf_raw_event_map *bpf_raw_events;
#endif
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
        unsigned int btf_data_size;
        void *btf_data;
#endif
#ifdef CONFIG_JUMP_LABEL
        struct jump_entry *jump_entries;
        unsigned int num_jump_entries;
#endif
#ifdef CONFIG_TRACING
        unsigned int num_trace_bprintk_fmt;
        const char **trace_bprintk_fmt_start;
#endif
#ifdef CONFIG_EVENT_TRACING
        struct trace_event_call **trace_events;
        unsigned int num_trace_events;
        struct trace_eval_map **trace_evals;
        unsigned int num_trace_evals;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
        unsigned int num_ftrace_callsites;
        unsigned long *ftrace_callsites;
#endif
#ifdef CONFIG_KPROBES
        void *kprobes_text_start;
        unsigned int kprobes_text_size;
        unsigned long *kprobe_blacklist;
        unsigned int num_kprobe_blacklist;
#endif
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
        int num_static_call_sites;
        struct static_call_site *static_call_sites;
#endif
#if IS_ENABLED(CONFIG_KUNIT)
        int num_kunit_init_suites;
        struct kunit_suite **kunit_init_suites;
        int num_kunit_suites;
        struct kunit_suite **kunit_suites;
#endif


#ifdef CONFIG_LIVEPATCH
        bool klp; /* Is this a livepatch module? */
        bool klp_alive;

        /* ELF information */
        struct klp_modinfo *klp_info;
#endif

#ifdef CONFIG_PRINTK_INDEX
        unsigned int printk_index_size;
        struct pi_entry **printk_index_start;
#endif

#ifdef CONFIG_MODULE_UNLOAD
        /* What modules depend on me? */
        struct list_head source_list;
        /* What modules do I depend on? */
        struct list_head target_list;

        /* Destruction function. */
        void (*exit)(void);

        atomic_t refcnt;
#endif

#ifdef CONFIG_CONSTRUCTORS
        /* Constructor functions. */
        ctor_fn_t *ctors;
        unsigned int num_ctors;
#endif

#ifdef CONFIG_FUNCTION_ERROR_INJECTION
        struct error_injection_entry *ei_funcs;
        unsigned int num_ei_funcs;
#endif
#ifdef CONFIG_DYNAMIC_DEBUG_CORE
        struct _ddebug_info dyndbg_info;
#endif
} ____cacheline_aligned __randomize_layout;
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
#endif

#ifndef HAVE_ARCH_KALLSYMS_SYMBOL_VALUE
static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym)
{
        return sym->st_value;
}
#endif

/* FIXME: It'd be nice to isolate modules during init, too, so they
   aren't used before they (may) fail.  But presently too much code
   (IDE & SCSI) require entry into the module during init.*/
static inline bool module_is_live(struct module *mod)
{
        return mod->state != MODULE_STATE_GOING;
}

struct module *__module_text_address(unsigned long addr);
struct module *__module_address(unsigned long addr);
bool is_module_address(unsigned long addr);
bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
bool is_module_percpu_address(unsigned long addr);
bool is_module_text_address(unsigned long addr);

static inline bool within_module_mem_type(unsigned long addr,
                                          const struct module *mod,
                                          enum mod_mem_type type)
{
        unsigned long base, size;

        base = (unsigned long)mod->mem[type].base;
        size = mod->mem[type].size;
        return addr - base < size;
}

static inline bool within_module_core(unsigned long addr,
                                      const struct module *mod)
{
        for_class_mod_mem_type(type, core) {
                if (within_module_mem_type(addr, mod, type))
                        return true;
        }
        return false;
}

static inline bool within_module_init(unsigned long addr,
                                      const struct module *mod)
{
        for_class_mod_mem_type(type, init) {
                if (within_module_mem_type(addr, mod, type))
                        return true;
        }
        return false;
}

static inline bool within_module(unsigned long addr, const struct module *mod)
{
        return within_module_init(addr, mod) || within_module_core(addr, mod);
}

/* Search for module by name: must be in a RCU-sched critical section. */
struct module *find_module(const char *name);

extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
                        long code);
#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)

#ifdef CONFIG_MODULE_UNLOAD
int module_refcount(struct module *mod);
void __symbol_put(const char *symbol);
#define symbol_put(x) __symbol_put(__stringify(x))
void symbol_put_addr(void *addr);

/* Sometimes we know we already have a refcount, and it's easier not
   to handle the error case (which only happens with rmmod --wait). */
extern void __module_get(struct module *module);

/**
 * try_module_get() - take module refcount unless module is being removed
 * @module: the module we should check for
 *
 * Only try to get a module reference count if the module is not being removed.
 * This call will fail if the module is in the process of being removed.
 *
 * Care must also be taken to ensure the module exists and is alive prior to
 * usage of this call. This can be gauranteed through two means:
 *
 * 1) Direct protection: you know an earlier caller must have increased the
 *    module reference through __module_get(). This can typically be achieved
 *    by having another entity other than the module itself increment the
 *    module reference count.
 *
 * 2) Implied protection: there is an implied protection against module
 *    removal. An example of this is the implied protection used by kernfs /
 *    sysfs. The sysfs store / read file operations are guaranteed to exist
 *    through the use of kernfs's active reference (see kernfs_active()) and a
 *    sysfs / kernfs file removal cannot happen unless the same file is not
 *    active. Therefore, if a sysfs file is being read or written to the module
 *    which created it must still exist. It is therefore safe to use
 *    try_module_get() on module sysfs store / read ops.
 *
 * One of the real values to try_module_get() is the module_is_live() check
 * which ensures that the caller of try_module_get() can yield to userspace
 * module removal requests and gracefully fail if the module is on its way out.
 *
 * Returns true if the reference count was successfully incremented.
 */
extern bool try_module_get(struct module *module);

/**
 * module_put() - release a reference count to a module
 * @module: the module we should release a reference count for
 *
 * If you successfully bump a reference count to a module with try_module_get(),
 * when you are finished you must call module_put() to release that reference
 * count.
 */
extern void module_put(struct module *module);

#else /*!CONFIG_MODULE_UNLOAD*/
static inline bool try_module_get(struct module *module)
{
        return !module || module_is_live(module);
}
static inline void module_put(struct module *module)
{
}
static inline void __module_get(struct module *module)
{
}
#define symbol_put(x) do { } while (0)
#define symbol_put_addr(p) do { } while (0)

#endif /* CONFIG_MODULE_UNLOAD */

/* This is a #define so the string doesn't get put in every .o file */
#define module_name(mod)                        \
({                                                \
        struct module *__mod = (mod);                \
        __mod ? __mod->name : "kernel";                \
})

/* Dereference module function descriptor */
void *dereference_module_function_descriptor(struct module *mod, void *ptr);

int register_module_notifier(struct notifier_block *nb);
int unregister_module_notifier(struct notifier_block *nb);

extern void print_modules(void);

static inline bool module_requested_async_probing(struct module *module)
{
        return module && module->async_probe_requested;
}

static inline bool is_livepatch_module(struct module *mod)
{
#ifdef CONFIG_LIVEPATCH
        return mod->klp;
#else
        return false;
#endif
}

void set_module_sig_enforced(void);

#else /* !CONFIG_MODULES... */

static inline struct module *__module_address(unsigned long addr)
{
        return NULL;
}

static inline struct module *__module_text_address(unsigned long addr)
{
        return NULL;
}

static inline bool is_module_address(unsigned long addr)
{
        return false;
}

static inline bool is_module_percpu_address(unsigned long addr)
{
        return false;
}

static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
{
        return false;
}

static inline bool is_module_text_address(unsigned long addr)
{
        return false;
}

static inline bool within_module_core(unsigned long addr,
                                      const struct module *mod)
{
        return false;
}

static inline bool within_module_init(unsigned long addr,
                                      const struct module *mod)
{
        return false;
}

static inline bool within_module(unsigned long addr, const struct module *mod)
{
        return false;
}

/* Get/put a kernel symbol (calls should be symmetric) */
#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); })
#define symbol_put(x) do { } while (0)
#define symbol_put_addr(x) do { } while (0)

static inline void __module_get(struct module *module)
{
}

static inline bool try_module_get(struct module *module)
{
        return true;
}

static inline void module_put(struct module *module)
{
}

#define module_name(mod) "kernel"

static inline int register_module_notifier(struct notifier_block *nb)
{
        /* no events will happen anyway, so this can always succeed */
        return 0;
}

static inline int unregister_module_notifier(struct notifier_block *nb)
{
        return 0;
}

#define module_put_and_kthread_exit(code) kthread_exit(code)

static inline void print_modules(void)
{
}

static inline bool module_requested_async_probing(struct module *module)
{
        return false;
}


static inline void set_module_sig_enforced(void)
{
}

/* Dereference module function descriptor */
static inline
void *dereference_module_function_descriptor(struct module *mod, void *ptr)
{
        return ptr;
}

#endif /* CONFIG_MODULES */

#ifdef CONFIG_SYSFS
extern struct kset *module_kset;
extern const struct kobj_type module_ktype;
#endif /* CONFIG_SYSFS */

#define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)

/* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */

#define __MODULE_STRING(x) __stringify(x)

#ifdef CONFIG_GENERIC_BUG
void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
                         struct module *);
void module_bug_cleanup(struct module *);

#else        /* !CONFIG_GENERIC_BUG */

static inline void module_bug_finalize(const Elf_Ehdr *hdr,
                                        const Elf_Shdr *sechdrs,
                                        struct module *mod)
{
}
static inline void module_bug_cleanup(struct module *mod) {}
#endif        /* CONFIG_GENERIC_BUG */

#ifdef CONFIG_MITIGATION_RETPOLINE
extern bool retpoline_module_ok(bool has_retpoline);
#else
static inline bool retpoline_module_ok(bool has_retpoline)
{
        return true;
}
#endif

#ifdef CONFIG_MODULE_SIG
bool is_module_sig_enforced(void);

static inline bool module_sig_ok(struct module *module)
{
        return module->sig_ok;
}
#else        /* !CONFIG_MODULE_SIG */
static inline bool is_module_sig_enforced(void)
{
        return false;
}

static inline bool module_sig_ok(struct module *module)
{
        return true;
}
#endif        /* CONFIG_MODULE_SIG */

#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS)
int module_kallsyms_on_each_symbol(const char *modname,
                                   int (*fn)(void *, const char *, unsigned long),
                                   void *data);

/* For kallsyms to ask for address resolution.  namebuf should be at
 * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
 * found, otherwise NULL.
 */
const char *module_address_lookup(unsigned long addr,
                                  unsigned long *symbolsize,
                                  unsigned long *offset,
                                  char **modname, const unsigned char **modbuildid,
                                  char *namebuf);
int lookup_module_symbol_name(unsigned long addr, char *symname);
int lookup_module_symbol_attrs(unsigned long addr,
                               unsigned long *size,
                               unsigned long *offset,
                               char *modname,
                               char *name);

/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
 * symnum out of range.
 */
int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                       char *name, char *module_name, int *exported);

/* Look for this name: can be of form module:name. */
unsigned long module_kallsyms_lookup_name(const char *name);

unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name);

#else        /* CONFIG_MODULES && CONFIG_KALLSYMS */

static inline int module_kallsyms_on_each_symbol(const char *modname,
                                                 int (*fn)(void *, const char *, unsigned long),
                                                 void *data)
{
        return -EOPNOTSUPP;
}

/* For kallsyms to ask for address resolution.  NULL means not found. */
static inline const char *module_address_lookup(unsigned long addr,
                                                unsigned long *symbolsize,
                                                unsigned long *offset,
                                                char **modname,
                                                const unsigned char **modbuildid,
                                                char *namebuf)
{
        return NULL;
}

static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
{
        return -ERANGE;
}

static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
                                     char *type, char *name,
                                     char *module_name, int *exported)
{
        return -ERANGE;
}

static inline unsigned long module_kallsyms_lookup_name(const char *name)
{
        return 0;
}

static inline unsigned long find_kallsyms_symbol_value(struct module *mod,
                                                       const char *name)
{
        return 0;
}

#endif  /* CONFIG_MODULES && CONFIG_KALLSYMS */

#endif /* _LINUX_MODULE_H */
















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_NAMEI_H
#define _LINUX_NAMEI_H

#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/path.h>
#include <linux/fcntl.h>
#include <linux/errno.h>

enum { MAX_NESTED_LINKS = 8 };

#define MAXSYMLINKS 40

/*
 * Type of the last component on LOOKUP_PARENT
 */
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};

/* pathwalk mode */
#define LOOKUP_FOLLOW                0x0001        /* follow links at the end */
#define LOOKUP_DIRECTORY        0x0002        /* require a directory */
#define LOOKUP_AUTOMOUNT        0x0004  /* force terminal automount */
#define LOOKUP_EMPTY                0x4000        /* accept empty path [user_... only] */
#define LOOKUP_DOWN                0x8000        /* follow mounts in the starting point */
#define LOOKUP_MOUNTPOINT        0x0080        /* follow mounts in the end */

#define LOOKUP_REVAL                0x0020        /* tell ->d_revalidate() to trust no cache */
#define LOOKUP_RCU                0x0040        /* RCU pathwalk mode; semi-internal */

/* These tell filesystem methods that we are dealing with the final component... */
#define LOOKUP_OPEN                0x0100        /* ... in open */
#define LOOKUP_CREATE                0x0200        /* ... in object creation */
#define LOOKUP_EXCL                0x0400        /* ... in exclusive creation */
#define LOOKUP_RENAME_TARGET        0x0800        /* ... in destination of rename() */

/* internal use only */
#define LOOKUP_PARENT                0x0010

/* Scoping flags for lookup. */
#define LOOKUP_NO_SYMLINKS        0x010000 /* No symlink crossing. */
#define LOOKUP_NO_MAGICLINKS        0x020000 /* No nd_jump_link() crossing. */
#define LOOKUP_NO_XDEV                0x040000 /* No mountpoint crossing. */
#define LOOKUP_BENEATH                0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT                0x100000 /* Treat dirfd as fs root. */
#define LOOKUP_CACHED                0x200000 /* Only do cached lookup */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)

extern int path_pts(struct path *path);

extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);

static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
                 struct path *path)
{
        return user_path_at_empty(dfd, name, flags, path, NULL);
}

struct dentry *lookup_one_qstr_excl(const struct qstr *name,
                                    struct dentry *base,
                                    unsigned int flags);
extern int kern_path(const char *, unsigned, struct path *);

extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
extern void done_path_create(struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *);
extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
                           struct path *parent, struct qstr *last, int *type,
                           const struct path *root);
int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *,
                    unsigned int, struct path *);

extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int);
struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int);
struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
                                   const char *name, struct dentry *base,
                                   int len);
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
                                            const char *name,
                                            struct dentry *base, int len);

extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
extern int follow_up(struct path *);

extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);

/**
 * mode_strip_umask - handle vfs umask stripping
 * @dir:        parent directory of the new inode
 * @mode:        mode of the new inode to be created in @dir
 *
 * In most filesystems, umask stripping depends on whether or not the
 * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask
 * stripping is done directly in here. If the filesystem does support POSIX
 * ACLs umask stripping is deferred until the filesystem calls
 * posix_acl_create().
 *
 * Some filesystems (like NFSv4) also want to avoid umask stripping by the
 * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK
 * to get this effect without declaring that they support POSIX ACLs.
 *
 * Returns: mode
 */
static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode)
{
        if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK))
                mode &= ~current_umask();
        return mode;
}

extern int __must_check nd_jump_link(const struct path *path);

static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
{
        ((char *) name)[min(len, maxlen)] = '\0';
}

/**
 * retry_estale - determine whether the caller should retry an operation
 * @error: the error that would currently be returned
 * @flags: flags being used for next lookup attempt
 *
 * Check to see if the error code was -ESTALE, and then determine whether
 * to retry the call based on whether "flags" already has LOOKUP_REVAL set.
 *
 * Returns true if the caller should try the operation again.
 */
static inline bool
retry_estale(const long error, const unsigned int flags)
{
        return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL));
}

#endif /* _LINUX_NAMEI_H */






































































































































































































































































































































































































































































































































































































































































































































































































































   11 
   11 
   11 
   11 




   11 




   11 


   11 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
// SPDX-License-Identifier: GPL-2.0
/*
 * Componentized device handling.
 */
#include <linux/component.h>
#include <linux/device.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/debugfs.h>

/**
 * DOC: overview
 *
 * The component helper allows drivers to collect a pile of sub-devices,
 * including their bound drivers, into an aggregate driver. Various subsystems
 * already provide functions to get hold of such components, e.g.
 * of_clk_get_by_name(). The component helper can be used when such a
 * subsystem-specific way to find a device is not available: The component
 * helper fills the niche of aggregate drivers for specific hardware, where
 * further standardization into a subsystem would not be practical. The common
 * example is when a logical device (e.g. a DRM display driver) is spread around
 * the SoC on various components (scanout engines, blending blocks, transcoders
 * for various outputs and so on).
 *
 * The component helper also doesn't solve runtime dependencies, e.g. for system
 * suspend and resume operations. See also :ref:`device links<device_link>`.
 *
 * Components are registered using component_add() and unregistered with
 * component_del(), usually from the driver's probe and disconnect functions.
 *
 * Aggregate drivers first assemble a component match list of what they need
 * using component_match_add(). This is then registered as an aggregate driver
 * using component_master_add_with_match(), and unregistered using
 * component_master_del().
 */

struct component;

struct component_match_array {
        void *data;
        int (*compare)(struct device *, void *);
        int (*compare_typed)(struct device *, int, void *);
        void (*release)(struct device *, void *);
        struct component *component;
        bool duplicate;
};

struct component_match {
        size_t alloc;
        size_t num;
        struct component_match_array *compare;
};

struct aggregate_device {
        struct list_head node;
        bool bound;

        const struct component_master_ops *ops;
        struct device *parent;
        struct component_match *match;
};

struct component {
        struct list_head node;
        struct aggregate_device *adev;
        bool bound;

        const struct component_ops *ops;
        int subcomponent;
        struct device *dev;
};

static DEFINE_MUTEX(component_mutex);
static LIST_HEAD(component_list);
static LIST_HEAD(aggregate_devices);

#ifdef CONFIG_DEBUG_FS

static struct dentry *component_debugfs_dir;

static int component_devices_show(struct seq_file *s, void *data)
{
        struct aggregate_device *m = s->private;
        struct component_match *match = m->match;
        size_t i;

        mutex_lock(&component_mutex);
        seq_printf(s, "%-40s %20s\n", "aggregate_device name", "status");
        seq_puts(s, "-------------------------------------------------------------\n");
        seq_printf(s, "%-40s %20s\n\n",
                   dev_name(m->parent), m->bound ? "bound" : "not bound");

        seq_printf(s, "%-40s %20s\n", "device name", "status");
        seq_puts(s, "-------------------------------------------------------------\n");
        for (i = 0; i < match->num; i++) {
                struct component *component = match->compare[i].component;

                seq_printf(s, "%-40s %20s\n",
                           component ? dev_name(component->dev) : "(unknown)",
                           component ? (component->bound ? "bound" : "not bound") : "not registered");
        }
        mutex_unlock(&component_mutex);

        return 0;
}

DEFINE_SHOW_ATTRIBUTE(component_devices);

static int __init component_debug_init(void)
{
        component_debugfs_dir = debugfs_create_dir("device_component", NULL);

        return 0;
}

core_initcall(component_debug_init);

static void component_debugfs_add(struct aggregate_device *m)
{
        debugfs_create_file(dev_name(m->parent), 0444, component_debugfs_dir, m,
                            &component_devices_fops);
}

static void component_debugfs_del(struct aggregate_device *m)
{
        debugfs_lookup_and_remove(dev_name(m->parent), component_debugfs_dir);
}

#else

static void component_debugfs_add(struct aggregate_device *m)
{ }

static void component_debugfs_del(struct aggregate_device *m)
{ }

#endif

static struct aggregate_device *__aggregate_find(struct device *parent,
        const struct component_master_ops *ops)
{
        struct aggregate_device *m;

        list_for_each_entry(m, &aggregate_devices, node)
                if (m->parent == parent && (!ops || m->ops == ops))
                        return m;

        return NULL;
}

static struct component *find_component(struct aggregate_device *adev,
        struct component_match_array *mc)
{
        struct component *c;

        list_for_each_entry(c, &component_list, node) {
                if (c->adev && c->adev != adev)
                        continue;

                if (mc->compare && mc->compare(c->dev, mc->data))
                        return c;

                if (mc->compare_typed &&
                    mc->compare_typed(c->dev, c->subcomponent, mc->data))
                        return c;
        }

        return NULL;
}

static int find_components(struct aggregate_device *adev)
{
        struct component_match *match = adev->match;
        size_t i;
        int ret = 0;

        /*
         * Scan the array of match functions and attach
         * any components which are found to this adev.
         */
        for (i = 0; i < match->num; i++) {
                struct component_match_array *mc = &match->compare[i];
                struct component *c;

                dev_dbg(adev->parent, "Looking for component %zu\n", i);

                if (match->compare[i].component)
                        continue;

                c = find_component(adev, mc);
                if (!c) {
                        ret = -ENXIO;
                        break;
                }

                dev_dbg(adev->parent, "found component %s, duplicate %u\n",
                        dev_name(c->dev), !!c->adev);

                /* Attach this component to the adev */
                match->compare[i].duplicate = !!c->adev;
                match->compare[i].component = c;
                c->adev = adev;
        }
        return ret;
}

/* Detach component from associated aggregate_device */
static void remove_component(struct aggregate_device *adev, struct component *c)
{
        size_t i;

        /* Detach the component from this adev. */
        for (i = 0; i < adev->match->num; i++)
                if (adev->match->compare[i].component == c)
                        adev->match->compare[i].component = NULL;
}

/*
 * Try to bring up an aggregate device.  If component is NULL, we're interested
 * in this aggregate device, otherwise it's a component which must be present
 * to try and bring up the aggregate device.
 *
 * Returns 1 for successful bringup, 0 if not ready, or -ve errno.
 */
static int try_to_bring_up_aggregate_device(struct aggregate_device *adev,
        struct component *component)
{
        int ret;

        dev_dbg(adev->parent, "trying to bring up adev\n");

        if (find_components(adev)) {
                dev_dbg(adev->parent, "master has incomplete components\n");
                return 0;
        }

        if (component && component->adev != adev) {
                dev_dbg(adev->parent, "master is not for this component (%s)\n",
                        dev_name(component->dev));
                return 0;
        }

        if (!devres_open_group(adev->parent, adev, GFP_KERNEL))
                return -ENOMEM;

        /* Found all components */
        ret = adev->ops->bind(adev->parent);
        if (ret < 0) {
                devres_release_group(adev->parent, NULL);
                if (ret != -EPROBE_DEFER)
                        dev_info(adev->parent, "adev bind failed: %d\n", ret);
                return ret;
        }

        devres_close_group(adev->parent, NULL);
        adev->bound = true;
        return 1;
}

static int try_to_bring_up_masters(struct component *component)
{
        struct aggregate_device *adev;
        int ret = 0;

        list_for_each_entry(adev, &aggregate_devices, node) {
                if (!adev->bound) {
                        ret = try_to_bring_up_aggregate_device(adev, component);
                        if (ret != 0)
                                break;
                }
        }

        return ret;
}

static void take_down_aggregate_device(struct aggregate_device *adev)
{
        if (adev->bound) {
                adev->ops->unbind(adev->parent);
                devres_release_group(adev->parent, adev);
                adev->bound = false;
        }
}

/**
 * component_compare_of - A common component compare function for of_node
 * @dev: component device
 * @data: @compare_data from component_match_add_release()
 *
 * A common compare function when compare_data is device of_node. e.g.
 * component_match_add_release(masterdev, &match, component_release_of,
 * component_compare_of, component_dev_of_node)
 */
int component_compare_of(struct device *dev, void *data)
{
        return device_match_of_node(dev, data);
}
EXPORT_SYMBOL_GPL(component_compare_of);

/**
 * component_release_of - A common component release function for of_node
 * @dev: component device
 * @data: @compare_data from component_match_add_release()
 *
 * About the example, Please see component_compare_of().
 */
void component_release_of(struct device *dev, void *data)
{
        of_node_put(data);
}
EXPORT_SYMBOL_GPL(component_release_of);

/**
 * component_compare_dev - A common component compare function for dev
 * @dev: component device
 * @data: @compare_data from component_match_add_release()
 *
 * A common compare function when compare_data is struce device. e.g.
 * component_match_add(masterdev, &match, component_compare_dev, component_dev)
 */
int component_compare_dev(struct device *dev, void *data)
{
        return dev == data;
}
EXPORT_SYMBOL_GPL(component_compare_dev);

/**
 * component_compare_dev_name - A common component compare function for device name
 * @dev: component device
 * @data: @compare_data from component_match_add_release()
 *
 * A common compare function when compare_data is device name string. e.g.
 * component_match_add(masterdev, &match, component_compare_dev_name,
 * "component_dev_name")
 */
int component_compare_dev_name(struct device *dev, void *data)
{
        return device_match_name(dev, data);
}
EXPORT_SYMBOL_GPL(component_compare_dev_name);

static void devm_component_match_release(struct device *parent, void *res)
{
        struct component_match *match = res;
        unsigned int i;

        for (i = 0; i < match->num; i++) {
                struct component_match_array *mc = &match->compare[i];

                if (mc->release)
                        mc->release(parent, mc->data);
        }

        kfree(match->compare);
}

static int component_match_realloc(struct component_match *match, size_t num)
{
        struct component_match_array *new;

        if (match->alloc == num)
                return 0;

        new = kmalloc_array(num, sizeof(*new), GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        if (match->compare) {
                memcpy(new, match->compare, sizeof(*new) *
                                            min(match->num, num));
                kfree(match->compare);
        }
        match->compare = new;
        match->alloc = num;

        return 0;
}

static void __component_match_add(struct device *parent,
        struct component_match **matchptr,
        void (*release)(struct device *, void *),
        int (*compare)(struct device *, void *),
        int (*compare_typed)(struct device *, int, void *),
        void *compare_data)
{
        struct component_match *match = *matchptr;

        if (IS_ERR(match))
                return;

        if (!match) {
                match = devres_alloc(devm_component_match_release,
                                     sizeof(*match), GFP_KERNEL);
                if (!match) {
                        *matchptr = ERR_PTR(-ENOMEM);
                        return;
                }

                devres_add(parent, match);

                *matchptr = match;
        }

        if (match->num == match->alloc) {
                size_t new_size = match->alloc + 16;
                int ret;

                ret = component_match_realloc(match, new_size);
                if (ret) {
                        *matchptr = ERR_PTR(ret);
                        return;
                }
        }

        match->compare[match->num].compare = compare;
        match->compare[match->num].compare_typed = compare_typed;
        match->compare[match->num].release = release;
        match->compare[match->num].data = compare_data;
        match->compare[match->num].component = NULL;
        match->num++;
}

/**
 * component_match_add_release - add a component match entry with release callback
 * @parent: parent device of the aggregate driver
 * @matchptr: pointer to the list of component matches
 * @release: release function for @compare_data
 * @compare: compare function to match against all components
 * @compare_data: opaque pointer passed to the @compare function
 *
 * Adds a new component match to the list stored in @matchptr, which the
 * aggregate driver needs to function. The list of component matches pointed to
 * by @matchptr must be initialized to NULL before adding the first match. This
 * only matches against components added with component_add().
 *
 * The allocated match list in @matchptr is automatically released using devm
 * actions, where upon @release will be called to free any references held by
 * @compare_data, e.g. when @compare_data is a &device_node that must be
 * released with of_node_put().
 *
 * See also component_match_add() and component_match_add_typed().
 */
void component_match_add_release(struct device *parent,
        struct component_match **matchptr,
        void (*release)(struct device *, void *),
        int (*compare)(struct device *, void *), void *compare_data)
{
        __component_match_add(parent, matchptr, release, compare, NULL,
                              compare_data);
}
EXPORT_SYMBOL(component_match_add_release);

/**
 * component_match_add_typed - add a component match entry for a typed component
 * @parent: parent device of the aggregate driver
 * @matchptr: pointer to the list of component matches
 * @compare_typed: compare function to match against all typed components
 * @compare_data: opaque pointer passed to the @compare function
 *
 * Adds a new component match to the list stored in @matchptr, which the
 * aggregate driver needs to function. The list of component matches pointed to
 * by @matchptr must be initialized to NULL before adding the first match. This
 * only matches against components added with component_add_typed().
 *
 * The allocated match list in @matchptr is automatically released using devm
 * actions.
 *
 * See also component_match_add_release() and component_match_add_typed().
 */
void component_match_add_typed(struct device *parent,
        struct component_match **matchptr,
        int (*compare_typed)(struct device *, int, void *), void *compare_data)
{
        __component_match_add(parent, matchptr, NULL, NULL, compare_typed,
                              compare_data);
}
EXPORT_SYMBOL(component_match_add_typed);

static void free_aggregate_device(struct aggregate_device *adev)
{
        struct component_match *match = adev->match;
        int i;

        component_debugfs_del(adev);
        list_del(&adev->node);

        if (match) {
                for (i = 0; i < match->num; i++) {
                        struct component *c = match->compare[i].component;
                        if (c)
                                c->adev = NULL;
                }
        }

        kfree(adev);
}

/**
 * component_master_add_with_match - register an aggregate driver
 * @parent: parent device of the aggregate driver
 * @ops: callbacks for the aggregate driver
 * @match: component match list for the aggregate driver
 *
 * Registers a new aggregate driver consisting of the components added to @match
 * by calling one of the component_match_add() functions. Once all components in
 * @match are available, it will be assembled by calling
 * &component_master_ops.bind from @ops. Must be unregistered by calling
 * component_master_del().
 */
int component_master_add_with_match(struct device *parent,
        const struct component_master_ops *ops,
        struct component_match *match)
{
        struct aggregate_device *adev;
        int ret;

        /* Reallocate the match array for its true size */
        ret = component_match_realloc(match, match->num);
        if (ret)
                return ret;

        adev = kzalloc(sizeof(*adev), GFP_KERNEL);
        if (!adev)
                return -ENOMEM;

        adev->parent = parent;
        adev->ops = ops;
        adev->match = match;

        component_debugfs_add(adev);
        /* Add to the list of available aggregate devices. */
        mutex_lock(&component_mutex);
        list_add(&adev->node, &aggregate_devices);

        ret = try_to_bring_up_aggregate_device(adev, NULL);

        if (ret < 0)
                free_aggregate_device(adev);

        mutex_unlock(&component_mutex);

        return ret < 0 ? ret : 0;
}
EXPORT_SYMBOL_GPL(component_master_add_with_match);

/**
 * component_master_del - unregister an aggregate driver
 * @parent: parent device of the aggregate driver
 * @ops: callbacks for the aggregate driver
 *
 * Unregisters an aggregate driver registered with
 * component_master_add_with_match(). If necessary the aggregate driver is first
 * disassembled by calling &component_master_ops.unbind from @ops.
 */
void component_master_del(struct device *parent,
        const struct component_master_ops *ops)
{
        struct aggregate_device *adev;

        mutex_lock(&component_mutex);
        adev = __aggregate_find(parent, ops);
        if (adev) {
                take_down_aggregate_device(adev);
                free_aggregate_device(adev);
        }
        mutex_unlock(&component_mutex);
}
EXPORT_SYMBOL_GPL(component_master_del);

static void component_unbind(struct component *component,
        struct aggregate_device *adev, void *data)
{
        WARN_ON(!component->bound);

        if (component->ops && component->ops->unbind)
                component->ops->unbind(component->dev, adev->parent, data);
        component->bound = false;

        /* Release all resources claimed in the binding of this component */
        devres_release_group(component->dev, component);
}

/**
 * component_unbind_all - unbind all components of an aggregate driver
 * @parent: parent device of the aggregate driver
 * @data: opaque pointer, passed to all components
 *
 * Unbinds all components of the aggregate device by passing @data to their
 * &component_ops.unbind functions. Should be called from
 * &component_master_ops.unbind.
 */
void component_unbind_all(struct device *parent, void *data)
{
        struct aggregate_device *adev;
        struct component *c;
        size_t i;

        WARN_ON(!mutex_is_locked(&component_mutex));

        adev = __aggregate_find(parent, NULL);
        if (!adev)
                return;

        /* Unbind components in reverse order */
        for (i = adev->match->num; i--; )
                if (!adev->match->compare[i].duplicate) {
                        c = adev->match->compare[i].component;
                        component_unbind(c, adev, data);
                }
}
EXPORT_SYMBOL_GPL(component_unbind_all);

static int component_bind(struct component *component, struct aggregate_device *adev,
        void *data)
{
        int ret;

        /*
         * Each component initialises inside its own devres group.
         * This allows us to roll-back a failed component without
         * affecting anything else.
         */
        if (!devres_open_group(adev->parent, NULL, GFP_KERNEL))
                return -ENOMEM;

        /*
         * Also open a group for the device itself: this allows us
         * to release the resources claimed against the sub-device
         * at the appropriate moment.
         */
        if (!devres_open_group(component->dev, component, GFP_KERNEL)) {
                devres_release_group(adev->parent, NULL);
                return -ENOMEM;
        }

        dev_dbg(adev->parent, "binding %s (ops %ps)\n",
                dev_name(component->dev), component->ops);

        ret = component->ops->bind(component->dev, adev->parent, data);
        if (!ret) {
                component->bound = true;

                /*
                 * Close the component device's group so that resources
                 * allocated in the binding are encapsulated for removal
                 * at unbind.  Remove the group on the DRM device as we
                 * can clean those resources up independently.
                 */
                devres_close_group(component->dev, NULL);
                devres_remove_group(adev->parent, NULL);

                dev_info(adev->parent, "bound %s (ops %ps)\n",
                         dev_name(component->dev), component->ops);
        } else {
                devres_release_group(component->dev, NULL);
                devres_release_group(adev->parent, NULL);

                if (ret != -EPROBE_DEFER)
                        dev_err(adev->parent, "failed to bind %s (ops %ps): %d\n",
                                dev_name(component->dev), component->ops, ret);
        }

        return ret;
}

/**
 * component_bind_all - bind all components of an aggregate driver
 * @parent: parent device of the aggregate driver
 * @data: opaque pointer, passed to all components
 *
 * Binds all components of the aggregate @dev by passing @data to their
 * &component_ops.bind functions. Should be called from
 * &component_master_ops.bind.
 */
int component_bind_all(struct device *parent, void *data)
{
        struct aggregate_device *adev;
        struct component *c;
        size_t i;
        int ret = 0;

        WARN_ON(!mutex_is_locked(&component_mutex));

        adev = __aggregate_find(parent, NULL);
        if (!adev)
                return -EINVAL;

        /* Bind components in match order */
        for (i = 0; i < adev->match->num; i++)
                if (!adev->match->compare[i].duplicate) {
                        c = adev->match->compare[i].component;
                        ret = component_bind(c, adev, data);
                        if (ret)
                                break;
                }

        if (ret != 0) {
                for (; i > 0; i--)
                        if (!adev->match->compare[i - 1].duplicate) {
                                c = adev->match->compare[i - 1].component;
                                component_unbind(c, adev, data);
                        }
        }

        return ret;
}
EXPORT_SYMBOL_GPL(component_bind_all);

static int __component_add(struct device *dev, const struct component_ops *ops,
        int subcomponent)
{
        struct component *component;
        int ret;

        component = kzalloc(sizeof(*component), GFP_KERNEL);
        if (!component)
                return -ENOMEM;

        component->ops = ops;
        component->dev = dev;
        component->subcomponent = subcomponent;

        dev_dbg(dev, "adding component (ops %ps)\n", ops);

        mutex_lock(&component_mutex);
        list_add_tail(&component->node, &component_list);

        ret = try_to_bring_up_masters(component);
        if (ret < 0) {
                if (component->adev)
                        remove_component(component->adev, component);
                list_del(&component->node);

                kfree(component);
        }
        mutex_unlock(&component_mutex);

        return ret < 0 ? ret : 0;
}

/**
 * component_add_typed - register a component
 * @dev: component device
 * @ops: component callbacks
 * @subcomponent: nonzero identifier for subcomponents
 *
 * Register a new component for @dev. Functions in @ops will be call when the
 * aggregate driver is ready to bind the overall driver by calling
 * component_bind_all(). See also &struct component_ops.
 *
 * @subcomponent must be nonzero and is used to differentiate between multiple
 * components registered on the same device @dev. These components are match
 * using component_match_add_typed().
 *
 * The component needs to be unregistered at driver unload/disconnect by
 * calling component_del().
 *
 * See also component_add().
 */
int component_add_typed(struct device *dev, const struct component_ops *ops,
        int subcomponent)
{
        if (WARN_ON(subcomponent == 0))
                return -EINVAL;

        return __component_add(dev, ops, subcomponent);
}
EXPORT_SYMBOL_GPL(component_add_typed);

/**
 * component_add - register a component
 * @dev: component device
 * @ops: component callbacks
 *
 * Register a new component for @dev. Functions in @ops will be called when the
 * aggregate driver is ready to bind the overall driver by calling
 * component_bind_all(). See also &struct component_ops.
 *
 * The component needs to be unregistered at driver unload/disconnect by
 * calling component_del().
 *
 * See also component_add_typed() for a variant that allows multiple different
 * components on the same device.
 */
int component_add(struct device *dev, const struct component_ops *ops)
{
        return __component_add(dev, ops, 0);
}
EXPORT_SYMBOL_GPL(component_add);

/**
 * component_del - unregister a component
 * @dev: component device
 * @ops: component callbacks
 *
 * Unregister a component added with component_add(). If the component is bound
 * into an aggregate driver, this will force the entire aggregate driver, including
 * all its components, to be unbound.
 */
void component_del(struct device *dev, const struct component_ops *ops)
{
        struct component *c, *component = NULL;

        mutex_lock(&component_mutex);
        list_for_each_entry(c, &component_list, node)
                if (c->dev == dev && c->ops == ops) {
                        list_del(&c->node);
                        component = c;
                        break;
                }

        if (component && component->adev) {
                take_down_aggregate_device(component->adev);
                remove_component(component->adev, component);
        }

        mutex_unlock(&component_mutex);

        WARN_ON(!component);
        kfree(component);
}
EXPORT_SYMBOL_GPL(component_del);





















































































































































































































































































  232 















  129 






















































































































































































































































































































































































































































  241 



  164 


































































  129 



























































































































































































































































































































































































































































































































































































































































































































































































































































































  247 

































    3 















































    5 

    3 

    5 











































  255 




















































































































































































  237 
























  263 
  263 




    1 





    1 





    2 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_USB_H
#define __LINUX_USB_H

#include <linux/mod_devicetable.h>
#include <linux/usb/ch9.h>

#define USB_MAJOR                        180
#define USB_DEVICE_MAJOR                189


#ifdef __KERNEL__

#include <linux/errno.h>        /* for -ENODEV */
#include <linux/delay.h>        /* for mdelay() */
#include <linux/interrupt.h>        /* for in_interrupt() */
#include <linux/list.h>                /* for struct list_head */
#include <linux/kref.h>                /* for struct kref */
#include <linux/device.h>        /* for struct device */
#include <linux/fs.h>                /* for struct file_operations */
#include <linux/completion.h>        /* for struct completion */
#include <linux/sched.h>        /* for current && schedule_timeout */
#include <linux/mutex.h>        /* for struct mutex */
#include <linux/pm_runtime.h>        /* for runtime PM */

struct usb_device;
struct usb_driver;

/*-------------------------------------------------------------------------*/

/*
 * Host-side wrappers for standard USB descriptors ... these are parsed
 * from the data provided by devices.  Parsing turns them from a flat
 * sequence of descriptors into a hierarchy:
 *
 *  - devices have one (usually) or more configs;
 *  - configs have one (often) or more interfaces;
 *  - interfaces have one (usually) or more settings;
 *  - each interface setting has zero or (usually) more endpoints.
 *  - a SuperSpeed endpoint has a companion descriptor
 *
 * And there might be other descriptors mixed in with those.
 *
 * Devices may also have class-specific or vendor-specific descriptors.
 */

struct ep_device;

/**
 * struct usb_host_endpoint - host-side endpoint descriptor and queue
 * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder
 * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint
 * @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint
 * @urb_list: urbs queued to this endpoint; maintained by usbcore
 * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
 *        with one or more transfer descriptors (TDs) per urb
 * @ep_dev: ep_device for sysfs info
 * @extra: descriptors following this endpoint in the configuration
 * @extralen: how many bytes of "extra" are valid
 * @enabled: URBs may be submitted to this endpoint
 * @streams: number of USB-3 streams allocated on the endpoint
 *
 * USB requests are always queued to a given endpoint, identified by a
 * descriptor within an active interface in a given USB configuration.
 */
struct usb_host_endpoint {
        struct usb_endpoint_descriptor                desc;
        struct usb_ss_ep_comp_descriptor        ss_ep_comp;
        struct usb_ssp_isoc_ep_comp_descriptor        ssp_isoc_ep_comp;
        struct list_head                urb_list;
        void                                *hcpriv;
        struct ep_device                *ep_dev;        /* For sysfs info */

        unsigned char *extra;   /* Extra descriptors */
        int extralen;
        int enabled;
        int streams;
};

/* host-side wrapper for one interface setting's parsed descriptors */
struct usb_host_interface {
        struct usb_interface_descriptor        desc;

        int extralen;
        unsigned char *extra;   /* Extra descriptors */

        /* array of desc.bNumEndpoints endpoints associated with this
         * interface setting.  these will be in no particular order.
         */
        struct usb_host_endpoint *endpoint;

        char *string;                /* iInterface string, if present */
};

enum usb_interface_condition {
        USB_INTERFACE_UNBOUND = 0,
        USB_INTERFACE_BINDING,
        USB_INTERFACE_BOUND,
        USB_INTERFACE_UNBINDING,
};

int __must_check
usb_find_common_endpoints(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in,
                struct usb_endpoint_descriptor **bulk_out,
                struct usb_endpoint_descriptor **int_in,
                struct usb_endpoint_descriptor **int_out);

int __must_check
usb_find_common_endpoints_reverse(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in,
                struct usb_endpoint_descriptor **bulk_out,
                struct usb_endpoint_descriptor **int_in,
                struct usb_endpoint_descriptor **int_out);

static inline int __must_check
usb_find_bulk_in_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in)
{
        return usb_find_common_endpoints(alt, bulk_in, NULL, NULL, NULL);
}

static inline int __must_check
usb_find_bulk_out_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_out)
{
        return usb_find_common_endpoints(alt, NULL, bulk_out, NULL, NULL);
}

static inline int __must_check
usb_find_int_in_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **int_in)
{
        return usb_find_common_endpoints(alt, NULL, NULL, int_in, NULL);
}

static inline int __must_check
usb_find_int_out_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **int_out)
{
        return usb_find_common_endpoints(alt, NULL, NULL, NULL, int_out);
}

static inline int __must_check
usb_find_last_bulk_in_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in)
{
        return usb_find_common_endpoints_reverse(alt, bulk_in, NULL, NULL, NULL);
}

static inline int __must_check
usb_find_last_bulk_out_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_out)
{
        return usb_find_common_endpoints_reverse(alt, NULL, bulk_out, NULL, NULL);
}

static inline int __must_check
usb_find_last_int_in_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **int_in)
{
        return usb_find_common_endpoints_reverse(alt, NULL, NULL, int_in, NULL);
}

static inline int __must_check
usb_find_last_int_out_endpoint(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **int_out)
{
        return usb_find_common_endpoints_reverse(alt, NULL, NULL, NULL, int_out);
}

enum usb_wireless_status {
        USB_WIRELESS_STATUS_NA = 0,
        USB_WIRELESS_STATUS_DISCONNECTED,
        USB_WIRELESS_STATUS_CONNECTED,
};

/**
 * struct usb_interface - what usb device drivers talk to
 * @altsetting: array of interface structures, one for each alternate
 *        setting that may be selected.  Each one includes a set of
 *        endpoint configurations.  They will be in no particular order.
 * @cur_altsetting: the current altsetting.
 * @num_altsetting: number of altsettings defined.
 * @intf_assoc: interface association descriptor
 * @minor: the minor number assigned to this interface, if this
 *        interface is bound to a driver that uses the USB major number.
 *        If this interface does not use the USB major, this field should
 *        be unused.  The driver should set this value in the probe()
 *        function of the driver, after it has been assigned a minor
 *        number from the USB core by calling usb_register_dev().
 * @condition: binding state of the interface: not bound, binding
 *        (in probe()), bound to a driver, or unbinding (in disconnect())
 * @sysfs_files_created: sysfs attributes exist
 * @ep_devs_created: endpoint child pseudo-devices exist
 * @unregistering: flag set when the interface is being unregistered
 * @needs_remote_wakeup: flag set when the driver requires remote-wakeup
 *        capability during autosuspend.
 * @needs_altsetting0: flag set when a set-interface request for altsetting 0
 *        has been deferred.
 * @needs_binding: flag set when the driver should be re-probed or unbound
 *        following a reset or suspend operation it doesn't support.
 * @authorized: This allows to (de)authorize individual interfaces instead
 *        a whole device in contrast to the device authorization.
 * @wireless_status: if the USB device uses a receiver/emitter combo, whether
 *        the emitter is connected.
 * @wireless_status_work: Used for scheduling wireless status changes
 *        from atomic context.
 * @dev: driver model's view of this device
 * @usb_dev: if an interface is bound to the USB major, this will point
 *        to the sysfs representation for that device.
 * @reset_ws: Used for scheduling resets from atomic context.
 * @resetting_device: USB core reset the device, so use alt setting 0 as
 *        current; needs bandwidth alloc after reset.
 *
 * USB device drivers attach to interfaces on a physical device.  Each
 * interface encapsulates a single high level function, such as feeding
 * an audio stream to a speaker or reporting a change in a volume control.
 * Many USB devices only have one interface.  The protocol used to talk to
 * an interface's endpoints can be defined in a usb "class" specification,
 * or by a product's vendor.  The (default) control endpoint is part of
 * every interface, but is never listed among the interface's descriptors.
 *
 * The driver that is bound to the interface can use standard driver model
 * calls such as dev_get_drvdata() on the dev member of this structure.
 *
 * Each interface may have alternate settings.  The initial configuration
 * of a device sets altsetting 0, but the device driver can change
 * that setting using usb_set_interface().  Alternate settings are often
 * used to control the use of periodic endpoints, such as by having
 * different endpoints use different amounts of reserved USB bandwidth.
 * All standards-conformant USB devices that use isochronous endpoints
 * will use them in non-default settings.
 *
 * The USB specification says that alternate setting numbers must run from
 * 0 to one less than the total number of alternate settings.  But some
 * devices manage to mess this up, and the structures aren't necessarily
 * stored in numerical order anyhow.  Use usb_altnum_to_altsetting() to
 * look up an alternate setting in the altsetting array based on its number.
 */
struct usb_interface {
        /* array of alternate settings for this interface,
         * stored in no particular order */
        struct usb_host_interface *altsetting;

        struct usb_host_interface *cur_altsetting;        /* the currently
                                         * active alternate setting */
        unsigned num_altsetting;        /* number of alternate settings */

        /* If there is an interface association descriptor then it will list
         * the associated interfaces */
        struct usb_interface_assoc_descriptor *intf_assoc;

        int minor;                        /* minor number this interface is
                                         * bound to */
        enum usb_interface_condition condition;                /* state of binding */
        unsigned sysfs_files_created:1;        /* the sysfs attributes exist */
        unsigned ep_devs_created:1;        /* endpoint "devices" exist */
        unsigned unregistering:1;        /* unregistration is in progress */
        unsigned needs_remote_wakeup:1;        /* driver requires remote wakeup */
        unsigned needs_altsetting0:1;        /* switch to altsetting 0 is pending */
        unsigned needs_binding:1;        /* needs delayed unbind/rebind */
        unsigned resetting_device:1;        /* true: bandwidth alloc after reset */
        unsigned authorized:1;                /* used for interface authorization */
        enum usb_wireless_status wireless_status;
        struct work_struct wireless_status_work;

        struct device dev;                /* interface specific device info */
        struct device *usb_dev;
        struct work_struct reset_ws;        /* for resets in atomic context */
};

#define to_usb_interface(__dev)        container_of_const(__dev, struct usb_interface, dev)

static inline void *usb_get_intfdata(struct usb_interface *intf)
{
        return dev_get_drvdata(&intf->dev);
}

/**
 * usb_set_intfdata() - associate driver-specific data with an interface
 * @intf: USB interface
 * @data: driver data
 *
 * Drivers can use this function in their probe() callbacks to associate
 * driver-specific data with an interface.
 *
 * Note that there is generally no need to clear the driver-data pointer even
 * if some drivers do so for historical or implementation-specific reasons.
 */
static inline void usb_set_intfdata(struct usb_interface *intf, void *data)
{
        dev_set_drvdata(&intf->dev, data);
}

struct usb_interface *usb_get_intf(struct usb_interface *intf);
void usb_put_intf(struct usb_interface *intf);

/* Hard limit */
#define USB_MAXENDPOINTS        30
/* this maximum is arbitrary */
#define USB_MAXINTERFACES        32
#define USB_MAXIADS                (USB_MAXINTERFACES/2)

bool usb_check_bulk_endpoints(
                const struct usb_interface *intf, const u8 *ep_addrs);
bool usb_check_int_endpoints(
                const struct usb_interface *intf, const u8 *ep_addrs);

/*
 * USB Resume Timer: Every Host controller driver should drive the resume
 * signalling on the bus for the amount of time defined by this macro.
 *
 * That way we will have a 'stable' behavior among all HCDs supported by Linux.
 *
 * Note that the USB Specification states we should drive resume for *at least*
 * 20 ms, but it doesn't give an upper bound. This creates two possible
 * situations which we want to avoid:
 *
 * (a) sometimes an msleep(20) might expire slightly before 20 ms, which causes
 * us to fail USB Electrical Tests, thus failing Certification
 *
 * (b) Some (many) devices actually need more than 20 ms of resume signalling,
 * and while we can argue that's against the USB Specification, we don't have
 * control over which devices a certification laboratory will be using for
 * certification. If CertLab uses a device which was tested against Windows and
 * that happens to have relaxed resume signalling rules, we might fall into
 * situations where we fail interoperability and electrical tests.
 *
 * In order to avoid both conditions, we're using a 40 ms resume timeout, which
 * should cope with both LPJ calibration errors and devices not following every
 * detail of the USB Specification.
 */
#define USB_RESUME_TIMEOUT        40 /* ms */

/**
 * struct usb_interface_cache - long-term representation of a device interface
 * @num_altsetting: number of altsettings defined.
 * @ref: reference counter.
 * @altsetting: variable-length array of interface structures, one for
 *        each alternate setting that may be selected.  Each one includes a
 *        set of endpoint configurations.  They will be in no particular order.
 *
 * These structures persist for the lifetime of a usb_device, unlike
 * struct usb_interface (which persists only as long as its configuration
 * is installed).  The altsetting arrays can be accessed through these
 * structures at any time, permitting comparison of configurations and
 * providing support for the /sys/kernel/debug/usb/devices pseudo-file.
 */
struct usb_interface_cache {
        unsigned num_altsetting;        /* number of alternate settings */
        struct kref ref;                /* reference counter */

        /* variable-length array of alternate settings for this interface,
         * stored in no particular order */
        struct usb_host_interface altsetting[];
};
#define        ref_to_usb_interface_cache(r) \
                container_of(r, struct usb_interface_cache, ref)
#define        altsetting_to_usb_interface_cache(a) \
                container_of(a, struct usb_interface_cache, altsetting[0])

/**
 * struct usb_host_config - representation of a device's configuration
 * @desc: the device's configuration descriptor.
 * @string: pointer to the cached version of the iConfiguration string, if
 *        present for this configuration.
 * @intf_assoc: list of any interface association descriptors in this config
 * @interface: array of pointers to usb_interface structures, one for each
 *        interface in the configuration.  The number of interfaces is stored
 *        in desc.bNumInterfaces.  These pointers are valid only while the
 *        configuration is active.
 * @intf_cache: array of pointers to usb_interface_cache structures, one
 *        for each interface in the configuration.  These structures exist
 *        for the entire life of the device.
 * @extra: pointer to buffer containing all extra descriptors associated
 *        with this configuration (those preceding the first interface
 *        descriptor).
 * @extralen: length of the extra descriptors buffer.
 *
 * USB devices may have multiple configurations, but only one can be active
 * at any time.  Each encapsulates a different operational environment;
 * for example, a dual-speed device would have separate configurations for
 * full-speed and high-speed operation.  The number of configurations
 * available is stored in the device descriptor as bNumConfigurations.
 *
 * A configuration can contain multiple interfaces.  Each corresponds to
 * a different function of the USB device, and all are available whenever
 * the configuration is active.  The USB standard says that interfaces
 * are supposed to be numbered from 0 to desc.bNumInterfaces-1, but a lot
 * of devices get this wrong.  In addition, the interface array is not
 * guaranteed to be sorted in numerical order.  Use usb_ifnum_to_if() to
 * look up an interface entry based on its number.
 *
 * Device drivers should not attempt to activate configurations.  The choice
 * of which configuration to install is a policy decision based on such
 * considerations as available power, functionality provided, and the user's
 * desires (expressed through userspace tools).  However, drivers can call
 * usb_reset_configuration() to reinitialize the current configuration and
 * all its interfaces.
 */
struct usb_host_config {
        struct usb_config_descriptor        desc;

        char *string;                /* iConfiguration string, if present */

        /* List of any Interface Association Descriptors in this
         * configuration. */
        struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS];

        /* the interfaces associated with this configuration,
         * stored in no particular order */
        struct usb_interface *interface[USB_MAXINTERFACES];

        /* Interface information available even when this is not the
         * active configuration */
        struct usb_interface_cache *intf_cache[USB_MAXINTERFACES];

        unsigned char *extra;   /* Extra descriptors */
        int extralen;
};

/* USB2.0 and USB3.0 device BOS descriptor set */
struct usb_host_bos {
        struct usb_bos_descriptor        *desc;

        struct usb_ext_cap_descriptor        *ext_cap;
        struct usb_ss_cap_descriptor        *ss_cap;
        struct usb_ssp_cap_descriptor        *ssp_cap;
        struct usb_ss_container_id_descriptor        *ss_id;
        struct usb_ptm_cap_descriptor        *ptm_cap;
};

int __usb_get_extra_descriptor(char *buffer, unsigned size,
        unsigned char type, void **ptr, size_t min);
#define usb_get_extra_descriptor(ifpoint, type, ptr) \
                                __usb_get_extra_descriptor((ifpoint)->extra, \
                                (ifpoint)->extralen, \
                                type, (void **)ptr, sizeof(**(ptr)))

/* ----------------------------------------------------------------------- */

/*
 * Allocated per bus (tree of devices) we have:
 */
struct usb_bus {
        struct device *controller;        /* host side hardware */
        struct device *sysdev;                /* as seen from firmware or bus */
        int busnum;                        /* Bus number (in order of reg) */
        const char *bus_name;                /* stable id (PCI slot_name etc) */
        u8 uses_pio_for_control;        /*
                                         * Does the host controller use PIO
                                         * for control transfers?
                                         */
        u8 otg_port;                        /* 0, or number of OTG/HNP port */
        unsigned is_b_host:1;                /* true during some HNP roleswitches */
        unsigned b_hnp_enable:1;        /* OTG: did A-Host enable HNP? */
        unsigned no_stop_on_short:1;    /*
                                         * Quirk: some controllers don't stop
                                         * the ep queue on a short transfer
                                         * with the URB_SHORT_NOT_OK flag set.
                                         */
        unsigned no_sg_constraint:1;        /* no sg constraint */
        unsigned sg_tablesize;                /* 0 or largest number of sg list entries */

        int devnum_next;                /* Next open device number in
                                         * round-robin allocation */
        struct mutex devnum_next_mutex; /* devnum_next mutex */

        DECLARE_BITMAP(devmap, 128);        /* USB device number allocation bitmap */
        struct usb_device *root_hub;        /* Root hub */
        struct usb_bus *hs_companion;        /* Companion EHCI bus, if any */

        int bandwidth_allocated;        /* on this bus: how much of the time
                                         * reserved for periodic (intr/iso)
                                         * requests is used, on average?
                                         * Units: microseconds/frame.
                                         * Limits: Full/low speed reserve 90%,
                                         * while high speed reserves 80%.
                                         */
        int bandwidth_int_reqs;                /* number of Interrupt requests */
        int bandwidth_isoc_reqs;        /* number of Isoc. requests */

        unsigned resuming_ports;        /* bit array: resuming root-hub ports */

#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
        struct mon_bus *mon_bus;        /* non-null when associated */
        int monitored;                        /* non-zero when monitored */
#endif
};

struct usb_dev_state;

/* ----------------------------------------------------------------------- */

struct usb_tt;

enum usb_port_connect_type {
        USB_PORT_CONNECT_TYPE_UNKNOWN = 0,
        USB_PORT_CONNECT_TYPE_HOT_PLUG,
        USB_PORT_CONNECT_TYPE_HARD_WIRED,
        USB_PORT_NOT_USED,
};

/*
 * USB port quirks.
 */

/* For the given port, prefer the old (faster) enumeration scheme. */
#define USB_PORT_QUIRK_OLD_SCHEME        BIT(0)

/* Decrease TRSTRCY to 10ms during device enumeration. */
#define USB_PORT_QUIRK_FAST_ENUM        BIT(1)

/*
 * USB 2.0 Link Power Management (LPM) parameters.
 */
struct usb2_lpm_parameters {
        /* Best effort service latency indicate how long the host will drive
         * resume on an exit from L1.
         */
        unsigned int besl;

        /* Timeout value in microseconds for the L1 inactivity (LPM) timer.
         * When the timer counts to zero, the parent hub will initiate a LPM
         * transition to L1.
         */
        int timeout;
};

/*
 * USB 3.0 Link Power Management (LPM) parameters.
 *
 * PEL and SEL are USB 3.0 Link PM latencies for device-initiated LPM exit.
 * MEL is the USB 3.0 Link PM latency for host-initiated LPM exit.
 * All three are stored in nanoseconds.
 */
struct usb3_lpm_parameters {
        /*
         * Maximum exit latency (MEL) for the host to send a packet to the
         * device (either a Ping for isoc endpoints, or a data packet for
         * interrupt endpoints), the hubs to decode the packet, and for all hubs
         * in the path to transition the links to U0.
         */
        unsigned int mel;
        /*
         * Maximum exit latency for a device-initiated LPM transition to bring
         * all links into U0.  Abbreviated as "PEL" in section 9.4.12 of the USB
         * 3.0 spec, with no explanation of what "P" stands for.  "Path"?
         */
        unsigned int pel;

        /*
         * The System Exit Latency (SEL) includes PEL, and three other
         * latencies.  After a device initiates a U0 transition, it will take
         * some time from when the device sends the ERDY to when it will finally
         * receive the data packet.  Basically, SEL should be the worse-case
         * latency from when a device starts initiating a U0 transition to when
         * it will get data.
         */
        unsigned int sel;
        /*
         * The idle timeout value that is currently programmed into the parent
         * hub for this device.  When the timer counts to zero, the parent hub
         * will initiate an LPM transition to either U1 or U2.
         */
        int timeout;
};

/**
 * struct usb_device - kernel's representation of a USB device
 * @devnum: device number; address on a USB bus
 * @devpath: device ID string for use in messages (e.g., /port/...)
 * @route: tree topology hex string for use with xHCI
 * @state: device state: configured, not attached, etc.
 * @speed: device speed: high/full/low (or error)
 * @rx_lanes: number of rx lanes in use, USB 3.2 adds dual-lane support
 * @tx_lanes: number of tx lanes in use, USB 3.2 adds dual-lane support
 * @ssp_rate: SuperSpeed Plus phy signaling rate and lane count
 * @tt: Transaction Translator info; used with low/full speed dev, highspeed hub
 * @ttport: device port on that tt hub
 * @toggle: one bit for each endpoint, with ([0] = IN, [1] = OUT) endpoints
 * @parent: our hub, unless we're the root
 * @bus: bus we're part of
 * @ep0: endpoint 0 data (default control pipe)
 * @dev: generic device interface
 * @descriptor: USB device descriptor
 * @bos: USB device BOS descriptor set
 * @config: all of the device's configs
 * @actconfig: the active configuration
 * @ep_in: array of IN endpoints
 * @ep_out: array of OUT endpoints
 * @rawdescriptors: raw descriptors for each config
 * @bus_mA: Current available from the bus
 * @portnum: parent port number (origin 1)
 * @level: number of USB hub ancestors
 * @devaddr: device address, XHCI: assigned by HW, others: same as devnum
 * @can_submit: URBs may be submitted
 * @persist_enabled:  USB_PERSIST enabled for this device
 * @reset_in_progress: the device is being reset
 * @have_langid: whether string_langid is valid
 * @authorized: policy has said we can use it;
 *        (user space) policy determines if we authorize this device to be
 *        used or not. By default, wired USB devices are authorized.
 *        WUSB devices are not, until we authorize them from user space.
 *        FIXME -- complete doc
 * @authenticated: Crypto authentication passed
 * @lpm_capable: device supports LPM
 * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range
 * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
 * @usb2_hw_lpm_besl_capable: device can perform USB2 hardware BESL LPM
 * @usb2_hw_lpm_enabled: USB2 hardware LPM is enabled
 * @usb2_hw_lpm_allowed: Userspace allows USB 2.0 LPM to be enabled
 * @usb3_lpm_u1_enabled: USB3 hardware U1 LPM enabled
 * @usb3_lpm_u2_enabled: USB3 hardware U2 LPM enabled
 * @string_langid: language ID for strings
 * @product: iProduct string, if present (static)
 * @manufacturer: iManufacturer string, if present (static)
 * @serial: iSerialNumber string, if present (static)
 * @filelist: usbfs files that are open to this device
 * @maxchild: number of ports if hub
 * @quirks: quirks of the whole device
 * @urbnum: number of URBs submitted for the whole device
 * @active_duration: total time device is not suspended
 * @connect_time: time device was first connected
 * @do_remote_wakeup:  remote wakeup should be enabled
 * @reset_resume: needs reset instead of resume
 * @port_is_suspended: the upstream port is suspended (L2 or U3)
 * @slot_id: Slot ID assigned by xHCI
 * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout.
 * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout.
 * @u2_params: exit latencies for USB3 U2 LPM state, and hub-initiated timeout.
 * @lpm_disable_count: Ref count used by usb_disable_lpm() and usb_enable_lpm()
 *        to keep track of the number of functions that require USB 3.0 Link Power
 *        Management to be disabled for this usb_device.  This count should only
 *        be manipulated by those functions, with the bandwidth_mutex is held.
 * @hub_delay: cached value consisting of:
 *        parent->hub_delay + wHubDelay + tTPTransmissionDelay (40ns)
 *        Will be used as wValue for SetIsochDelay requests.
 * @use_generic_driver: ask driver core to reprobe using the generic driver.
 *
 * Notes:
 * Usbcore drivers should not set usbdev->state directly.  Instead use
 * usb_set_device_state().
 */
struct usb_device {
        int                devnum;
        char                devpath[16];
        u32                route;
        enum usb_device_state        state;
        enum usb_device_speed        speed;
        unsigned int                rx_lanes;
        unsigned int                tx_lanes;
        enum usb_ssp_rate        ssp_rate;

        struct usb_tt        *tt;
        int                ttport;

        unsigned int toggle[2];

        struct usb_device *parent;
        struct usb_bus *bus;
        struct usb_host_endpoint ep0;

        struct device dev;

        struct usb_device_descriptor descriptor;
        struct usb_host_bos *bos;
        struct usb_host_config *config;

        struct usb_host_config *actconfig;
        struct usb_host_endpoint *ep_in[16];
        struct usb_host_endpoint *ep_out[16];

        char **rawdescriptors;

        unsigned short bus_mA;
        u8 portnum;
        u8 level;
        u8 devaddr;

        unsigned can_submit:1;
        unsigned persist_enabled:1;
        unsigned reset_in_progress:1;
        unsigned have_langid:1;
        unsigned authorized:1;
        unsigned authenticated:1;
        unsigned lpm_capable:1;
        unsigned lpm_devinit_allow:1;
        unsigned usb2_hw_lpm_capable:1;
        unsigned usb2_hw_lpm_besl_capable:1;
        unsigned usb2_hw_lpm_enabled:1;
        unsigned usb2_hw_lpm_allowed:1;
        unsigned usb3_lpm_u1_enabled:1;
        unsigned usb3_lpm_u2_enabled:1;
        int string_langid;

        /* static strings from the device */
        char *product;
        char *manufacturer;
        char *serial;

        struct list_head filelist;

        int maxchild;

        u32 quirks;
        atomic_t urbnum;

        unsigned long active_duration;

        unsigned long connect_time;

        unsigned do_remote_wakeup:1;
        unsigned reset_resume:1;
        unsigned port_is_suspended:1;

        int slot_id;
        struct usb2_lpm_parameters l1_params;
        struct usb3_lpm_parameters u1_params;
        struct usb3_lpm_parameters u2_params;
        unsigned lpm_disable_count;

        u16 hub_delay;
        unsigned use_generic_driver:1;
};

#define to_usb_device(__dev)        container_of_const(__dev, struct usb_device, dev)

static inline struct usb_device *__intf_to_usbdev(struct usb_interface *intf)
{
        return to_usb_device(intf->dev.parent);
}
static inline const struct usb_device *__intf_to_usbdev_const(const struct usb_interface *intf)
{
        return to_usb_device((const struct device *)intf->dev.parent);
}

#define interface_to_usbdev(intf)                                        \
        _Generic((intf),                                                \
                 const struct usb_interface *: __intf_to_usbdev_const,        \
                 struct usb_interface *: __intf_to_usbdev)(intf)

extern struct usb_device *usb_get_dev(struct usb_device *dev);
extern void usb_put_dev(struct usb_device *dev);
extern struct usb_device *usb_hub_find_child(struct usb_device *hdev,
        int port1);

/**
 * usb_hub_for_each_child - iterate over all child devices on the hub
 * @hdev:  USB device belonging to the usb hub
 * @port1: portnum associated with child device
 * @child: child device pointer
 */
#define usb_hub_for_each_child(hdev, port1, child) \
        for (port1 = 1,        child =        usb_hub_find_child(hdev, port1); \
                        port1 <= hdev->maxchild; \
                        child = usb_hub_find_child(hdev, ++port1)) \
                if (!child) continue; else

/* USB device locking */
#define usb_lock_device(udev)                        device_lock(&(udev)->dev)
#define usb_unlock_device(udev)                        device_unlock(&(udev)->dev)
#define usb_lock_device_interruptible(udev)        device_lock_interruptible(&(udev)->dev)
#define usb_trylock_device(udev)                device_trylock(&(udev)->dev)
extern int usb_lock_device_for_reset(struct usb_device *udev,
                                     const struct usb_interface *iface);

/* USB port reset for device reinitialization */
extern int usb_reset_device(struct usb_device *dev);
extern void usb_queue_reset_device(struct usb_interface *dev);

extern struct device *usb_intf_get_dma_device(struct usb_interface *intf);

#ifdef CONFIG_ACPI
extern int usb_acpi_set_power_state(struct usb_device *hdev, int index,
        bool enable);
extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index);
extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index);
#else
static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index,
        bool enable) { return 0; }
static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
        { return true; }
static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
        { return 0; }
#endif

/* USB autosuspend and autoresume */
#ifdef CONFIG_PM
extern void usb_enable_autosuspend(struct usb_device *udev);
extern void usb_disable_autosuspend(struct usb_device *udev);

extern int usb_autopm_get_interface(struct usb_interface *intf);
extern void usb_autopm_put_interface(struct usb_interface *intf);
extern int usb_autopm_get_interface_async(struct usb_interface *intf);
extern void usb_autopm_put_interface_async(struct usb_interface *intf);
extern void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);

static inline void usb_mark_last_busy(struct usb_device *udev)
{
        pm_runtime_mark_last_busy(&udev->dev);
}

#else

static inline int usb_enable_autosuspend(struct usb_device *udev)
{ return 0; }
static inline int usb_disable_autosuspend(struct usb_device *udev)
{ return 0; }

static inline int usb_autopm_get_interface(struct usb_interface *intf)
{ return 0; }
static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
{ return 0; }

static inline void usb_autopm_put_interface(struct usb_interface *intf)
{ }
static inline void usb_autopm_put_interface_async(struct usb_interface *intf)
{ }
static inline void usb_autopm_get_interface_no_resume(
                struct usb_interface *intf)
{ }
static inline void usb_autopm_put_interface_no_suspend(
                struct usb_interface *intf)
{ }
static inline void usb_mark_last_busy(struct usb_device *udev)
{ }
#endif

extern int usb_disable_lpm(struct usb_device *udev);
extern void usb_enable_lpm(struct usb_device *udev);
/* Same as above, but these functions lock/unlock the bandwidth_mutex. */
extern int usb_unlocked_disable_lpm(struct usb_device *udev);
extern void usb_unlocked_enable_lpm(struct usb_device *udev);

extern int usb_disable_ltm(struct usb_device *udev);
extern void usb_enable_ltm(struct usb_device *udev);

static inline bool usb_device_supports_ltm(struct usb_device *udev)
{
        if (udev->speed < USB_SPEED_SUPER || !udev->bos || !udev->bos->ss_cap)
                return false;
        return udev->bos->ss_cap->bmAttributes & USB_LTM_SUPPORT;
}

static inline bool usb_device_no_sg_constraint(struct usb_device *udev)
{
        return udev && udev->bus && udev->bus->no_sg_constraint;
}


/*-------------------------------------------------------------------------*/

/* for drivers using iso endpoints */
extern int usb_get_current_frame_number(struct usb_device *usb_dev);

/* Sets up a group of bulk endpoints to support multiple stream IDs. */
extern int usb_alloc_streams(struct usb_interface *interface,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                unsigned int num_streams, gfp_t mem_flags);

/* Reverts a group of bulk endpoints back to not using stream IDs. */
extern int usb_free_streams(struct usb_interface *interface,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                gfp_t mem_flags);

/* used these for multi-interface device registration */
extern int usb_driver_claim_interface(struct usb_driver *driver,
                        struct usb_interface *iface, void *data);

/**
 * usb_interface_claimed - returns true iff an interface is claimed
 * @iface: the interface being checked
 *
 * Return: %true (nonzero) iff the interface is claimed, else %false
 * (zero).
 *
 * Note:
 * Callers must own the driver model's usb bus readlock.  So driver
 * probe() entries don't need extra locking, but other call contexts
 * may need to explicitly claim that lock.
 *
 */
static inline int usb_interface_claimed(struct usb_interface *iface)
{
        return (iface->dev.driver != NULL);
}

extern void usb_driver_release_interface(struct usb_driver *driver,
                        struct usb_interface *iface);

int usb_set_wireless_status(struct usb_interface *iface,
                        enum usb_wireless_status status);

const struct usb_device_id *usb_match_id(struct usb_interface *interface,
                                         const struct usb_device_id *id);
extern int usb_match_one_id(struct usb_interface *interface,
                            const struct usb_device_id *id);

extern int usb_for_each_dev(void *data, int (*fn)(struct usb_device *, void *));
extern struct usb_interface *usb_find_interface(struct usb_driver *drv,
                int minor);
extern struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
                unsigned ifnum);
extern struct usb_host_interface *usb_altnum_to_altsetting(
                const struct usb_interface *intf, unsigned int altnum);
extern struct usb_host_interface *usb_find_alt_setting(
                struct usb_host_config *config,
                unsigned int iface_num,
                unsigned int alt_num);

/* port claiming functions */
int usb_hub_claim_port(struct usb_device *hdev, unsigned port1,
                struct usb_dev_state *owner);
int usb_hub_release_port(struct usb_device *hdev, unsigned port1,
                struct usb_dev_state *owner);

/**
 * usb_make_path - returns stable device path in the usb tree
 * @dev: the device whose path is being constructed
 * @buf: where to put the string
 * @size: how big is "buf"?
 *
 * Return: Length of the string (> 0) or negative if size was too small.
 *
 * Note:
 * This identifier is intended to be "stable", reflecting physical paths in
 * hardware such as physical bus addresses for host controllers or ports on
 * USB hubs.  That makes it stay the same until systems are physically
 * reconfigured, by re-cabling a tree of USB devices or by moving USB host
 * controllers.  Adding and removing devices, including virtual root hubs
 * in host controller driver modules, does not change these path identifiers;
 * neither does rebooting or re-enumerating.  These are more useful identifiers
 * than changeable ("unstable") ones like bus numbers or device addresses.
 *
 * With a partial exception for devices connected to USB 2.0 root hubs, these
 * identifiers are also predictable.  So long as the device tree isn't changed,
 * plugging any USB device into a given hub port always gives it the same path.
 * Because of the use of "companion" controllers, devices connected to ports on
 * USB 2.0 root hubs (EHCI host controllers) will get one path ID if they are
 * high speed, and a different one if they are full or low speed.
 */
static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size)
{
        int actual;
        actual = snprintf(buf, size, "usb-%s-%s", dev->bus->bus_name,
                          dev->devpath);
        return (actual >= (int)size) ? -1 : actual;
}

/*-------------------------------------------------------------------------*/

#define USB_DEVICE_ID_MATCH_DEVICE \
                (USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT)
#define USB_DEVICE_ID_MATCH_DEV_RANGE \
                (USB_DEVICE_ID_MATCH_DEV_LO | USB_DEVICE_ID_MATCH_DEV_HI)
#define USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION \
                (USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_DEV_RANGE)
#define USB_DEVICE_ID_MATCH_DEV_INFO \
                (USB_DEVICE_ID_MATCH_DEV_CLASS | \
                USB_DEVICE_ID_MATCH_DEV_SUBCLASS | \
                USB_DEVICE_ID_MATCH_DEV_PROTOCOL)
#define USB_DEVICE_ID_MATCH_INT_INFO \
                (USB_DEVICE_ID_MATCH_INT_CLASS | \
                USB_DEVICE_ID_MATCH_INT_SUBCLASS | \
                USB_DEVICE_ID_MATCH_INT_PROTOCOL)

/**
 * USB_DEVICE - macro used to describe a specific usb device
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific device.
 */
#define USB_DEVICE(vend, prod) \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE, \
        .idVendor = (vend), \
        .idProduct = (prod)
/**
 * USB_DEVICE_VER - describe a specific usb device with a version range
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 * @lo: the bcdDevice_lo value
 * @hi: the bcdDevice_hi value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific device, with a version range.
 */
#define USB_DEVICE_VER(vend, prod, lo, hi) \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE_AND_VERSION, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bcdDevice_lo = (lo), \
        .bcdDevice_hi = (hi)

/**
 * USB_DEVICE_INTERFACE_CLASS - describe a usb device with a specific interface class
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 * @cl: bInterfaceClass value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific interface class of devices.
 */
#define USB_DEVICE_INTERFACE_CLASS(vend, prod, cl) \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
                       USB_DEVICE_ID_MATCH_INT_CLASS, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bInterfaceClass = (cl)

/**
 * USB_DEVICE_INTERFACE_PROTOCOL - describe a usb device with a specific interface protocol
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 * @pr: bInterfaceProtocol value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific interface protocol of devices.
 */
#define USB_DEVICE_INTERFACE_PROTOCOL(vend, prod, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
                       USB_DEVICE_ID_MATCH_INT_PROTOCOL, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bInterfaceProtocol = (pr)

/**
 * USB_DEVICE_INTERFACE_NUMBER - describe a usb device with a specific interface number
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 * @num: bInterfaceNumber value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific interface number of devices.
 */
#define USB_DEVICE_INTERFACE_NUMBER(vend, prod, num) \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
                       USB_DEVICE_ID_MATCH_INT_NUMBER, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bInterfaceNumber = (num)

/**
 * USB_DEVICE_INFO - macro used to describe a class of usb devices
 * @cl: bDeviceClass value
 * @sc: bDeviceSubClass value
 * @pr: bDeviceProtocol value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific class of devices.
 */
#define USB_DEVICE_INFO(cl, sc, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_DEV_INFO, \
        .bDeviceClass = (cl), \
        .bDeviceSubClass = (sc), \
        .bDeviceProtocol = (pr)

/**
 * USB_INTERFACE_INFO - macro used to describe a class of usb interfaces
 * @cl: bInterfaceClass value
 * @sc: bInterfaceSubClass value
 * @pr: bInterfaceProtocol value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific class of interfaces.
 */
#define USB_INTERFACE_INFO(cl, sc, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_INT_INFO, \
        .bInterfaceClass = (cl), \
        .bInterfaceSubClass = (sc), \
        .bInterfaceProtocol = (pr)

/**
 * USB_DEVICE_AND_INTERFACE_INFO - describe a specific usb device with a class of usb interfaces
 * @vend: the 16 bit USB Vendor ID
 * @prod: the 16 bit USB Product ID
 * @cl: bInterfaceClass value
 * @sc: bInterfaceSubClass value
 * @pr: bInterfaceProtocol value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific device with a specific class of interfaces.
 *
 * This is especially useful when explicitly matching devices that have
 * vendor specific bDeviceClass values, but standards-compliant interfaces.
 */
#define USB_DEVICE_AND_INTERFACE_INFO(vend, prod, cl, sc, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_INT_INFO \
                | USB_DEVICE_ID_MATCH_DEVICE, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bInterfaceClass = (cl), \
        .bInterfaceSubClass = (sc), \
        .bInterfaceProtocol = (pr)

/**
 * USB_VENDOR_AND_INTERFACE_INFO - describe a specific usb vendor with a class of usb interfaces
 * @vend: the 16 bit USB Vendor ID
 * @cl: bInterfaceClass value
 * @sc: bInterfaceSubClass value
 * @pr: bInterfaceProtocol value
 *
 * This macro is used to create a struct usb_device_id that matches a
 * specific vendor with a specific class of interfaces.
 *
 * This is especially useful when explicitly matching devices that have
 * vendor specific bDeviceClass values, but standards-compliant interfaces.
 */
#define USB_VENDOR_AND_INTERFACE_INFO(vend, cl, sc, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_INT_INFO \
                | USB_DEVICE_ID_MATCH_VENDOR, \
        .idVendor = (vend), \
        .bInterfaceClass = (cl), \
        .bInterfaceSubClass = (sc), \
        .bInterfaceProtocol = (pr)

/* ----------------------------------------------------------------------- */

/* Stuff for dynamic usb ids */
struct usb_dynids {
        spinlock_t lock;
        struct list_head list;
};

struct usb_dynid {
        struct list_head node;
        struct usb_device_id id;
};

extern ssize_t usb_store_new_id(struct usb_dynids *dynids,
                                const struct usb_device_id *id_table,
                                struct device_driver *driver,
                                const char *buf, size_t count);

extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf);

/**
 * struct usb_driver - identifies USB interface driver to usbcore
 * @name: The driver name should be unique among USB drivers,
 *        and should normally be the same as the module name.
 * @probe: Called to see if the driver is willing to manage a particular
 *        interface on a device.  If it is, probe returns zero and uses
 *        usb_set_intfdata() to associate driver-specific data with the
 *        interface.  It may also use usb_set_interface() to specify the
 *        appropriate altsetting.  If unwilling to manage the interface,
 *        return -ENODEV, if genuine IO errors occurred, an appropriate
 *        negative errno value.
 * @disconnect: Called when the interface is no longer accessible, usually
 *        because its device has been (or is being) disconnected or the
 *        driver module is being unloaded.
 * @unlocked_ioctl: Used for drivers that want to talk to userspace through
 *        the "usbfs" filesystem.  This lets devices provide ways to
 *        expose information to user space regardless of where they
 *        do (or don't) show up otherwise in the filesystem.
 * @suspend: Called when the device is going to be suspended by the
 *        system either from system sleep or runtime suspend context. The
 *        return value will be ignored in system sleep context, so do NOT
 *        try to continue using the device if suspend fails in this case.
 *        Instead, let the resume or reset-resume routine recover from
 *        the failure.
 * @resume: Called when the device is being resumed by the system.
 * @reset_resume: Called when the suspended device has been reset instead
 *        of being resumed.
 * @pre_reset: Called by usb_reset_device() when the device is about to be
 *        reset.  This routine must not return until the driver has no active
 *        URBs for the device, and no more URBs may be submitted until the
 *        post_reset method is called.
 * @post_reset: Called by usb_reset_device() after the device
 *        has been reset
 * @id_table: USB drivers use ID table to support hotplugging.
 *        Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
 *        or your driver's probe function will never get called.
 * @dev_groups: Attributes attached to the device that will be created once it
 *        is bound to the driver.
 * @dynids: used internally to hold the list of dynamically added device
 *        ids for this driver.
 * @driver: The driver-model core driver structure.
 * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
 *        added to this driver by preventing the sysfs file from being created.
 * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
 *        for interfaces bound to this driver.
 * @soft_unbind: if set to 1, the USB core will not kill URBs and disable
 *        endpoints before calling the driver's disconnect method.
 * @disable_hub_initiated_lpm: if set to 1, the USB core will not allow hubs
 *        to initiate lower power link state transitions when an idle timeout
 *        occurs.  Device-initiated USB 3.0 link PM will still be allowed.
 *
 * USB interface drivers must provide a name, probe() and disconnect()
 * methods, and an id_table.  Other driver fields are optional.
 *
 * The id_table is used in hotplugging.  It holds a set of descriptors,
 * and specialized data may be associated with each entry.  That table
 * is used by both user and kernel mode hotplugging support.
 *
 * The probe() and disconnect() methods are called in a context where
 * they can sleep, but they should avoid abusing the privilege.  Most
 * work to connect to a device should be done when the device is opened,
 * and undone at the last close.  The disconnect code needs to address
 * concurrency issues with respect to open() and close() methods, as
 * well as forcing all pending I/O requests to complete (by unlinking
 * them as necessary, and blocking until the unlinks complete).
 */
struct usb_driver {
        const char *name;

        int (*probe) (struct usb_interface *intf,
                      const struct usb_device_id *id);

        void (*disconnect) (struct usb_interface *intf);

        int (*unlocked_ioctl) (struct usb_interface *intf, unsigned int code,
                        void *buf);

        int (*suspend) (struct usb_interface *intf, pm_message_t message);
        int (*resume) (struct usb_interface *intf);
        int (*reset_resume)(struct usb_interface *intf);

        int (*pre_reset)(struct usb_interface *intf);
        int (*post_reset)(struct usb_interface *intf);

        const struct usb_device_id *id_table;
        const struct attribute_group **dev_groups;

        struct usb_dynids dynids;
        struct device_driver driver;
        unsigned int no_dynamic_id:1;
        unsigned int supports_autosuspend:1;
        unsigned int disable_hub_initiated_lpm:1;
        unsigned int soft_unbind:1;
};
#define        to_usb_driver(d) container_of(d, struct usb_driver, driver)

/**
 * struct usb_device_driver - identifies USB device driver to usbcore
 * @name: The driver name should be unique among USB drivers,
 *        and should normally be the same as the module name.
 * @match: If set, used for better device/driver matching.
 * @probe: Called to see if the driver is willing to manage a particular
 *        device.  If it is, probe returns zero and uses dev_set_drvdata()
 *        to associate driver-specific data with the device.  If unwilling
 *        to manage the device, return a negative errno value.
 * @disconnect: Called when the device is no longer accessible, usually
 *        because it has been (or is being) disconnected or the driver's
 *        module is being unloaded.
 * @suspend: Called when the device is going to be suspended by the system.
 * @resume: Called when the device is being resumed by the system.
 * @choose_configuration: If non-NULL, called instead of the default
 *        usb_choose_configuration(). If this returns an error then we'll go
 *        on to call the normal usb_choose_configuration().
 * @dev_groups: Attributes attached to the device that will be created once it
 *        is bound to the driver.
 * @driver: The driver-model core driver structure.
 * @id_table: used with @match() to select better matching driver at
 *         probe() time.
 * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
 *        for devices bound to this driver.
 * @generic_subclass: if set to 1, the generic USB driver's probe, disconnect,
 *        resume and suspend functions will be called in addition to the driver's
 *        own, so this part of the setup does not need to be replicated.
 *
 * USB drivers must provide all the fields listed above except driver,
 * match, and id_table.
 */
struct usb_device_driver {
        const char *name;

        bool (*match) (struct usb_device *udev);
        int (*probe) (struct usb_device *udev);
        void (*disconnect) (struct usb_device *udev);

        int (*suspend) (struct usb_device *udev, pm_message_t message);
        int (*resume) (struct usb_device *udev, pm_message_t message);

        int (*choose_configuration) (struct usb_device *udev);

        const struct attribute_group **dev_groups;
        struct device_driver driver;
        const struct usb_device_id *id_table;
        unsigned int supports_autosuspend:1;
        unsigned int generic_subclass:1;
};
#define        to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
                driver)

/**
 * struct usb_class_driver - identifies a USB driver that wants to use the USB major number
 * @name: the usb class device name for this driver.  Will show up in sysfs.
 * @devnode: Callback to provide a naming hint for a possible
 *        device node to create.
 * @fops: pointer to the struct file_operations of this driver.
 * @minor_base: the start of the minor range for this driver.
 *
 * This structure is used for the usb_register_dev() and
 * usb_deregister_dev() functions, to consolidate a number of the
 * parameters used for them.
 */
struct usb_class_driver {
        char *name;
        char *(*devnode)(const struct device *dev, umode_t *mode);
        const struct file_operations *fops;
        int minor_base;
};

/*
 * use these in module_init()/module_exit()
 * and don't forget MODULE_DEVICE_TABLE(usb, ...)
 */
extern int usb_register_driver(struct usb_driver *, struct module *,
                               const char *);

/* use a define to avoid include chaining to get THIS_MODULE & friends */
#define usb_register(driver) \
        usb_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)

extern void usb_deregister(struct usb_driver *);

/**
 * module_usb_driver() - Helper macro for registering a USB driver
 * @__usb_driver: usb_driver struct
 *
 * Helper macro for USB drivers which do not do anything special in module
 * init/exit. This eliminates a lot of boilerplate. Each module may only
 * use this macro once, and calling it replaces module_init() and module_exit()
 */
#define module_usb_driver(__usb_driver) \
        module_driver(__usb_driver, usb_register, \
                       usb_deregister)

extern int usb_register_device_driver(struct usb_device_driver *,
                        struct module *);
extern void usb_deregister_device_driver(struct usb_device_driver *);

extern int usb_register_dev(struct usb_interface *intf,
                            struct usb_class_driver *class_driver);
extern void usb_deregister_dev(struct usb_interface *intf,
                               struct usb_class_driver *class_driver);

extern int usb_disabled(void);

/* ----------------------------------------------------------------------- */

/*
 * URB support, for asynchronous request completions
 */

/*
 * urb->transfer_flags:
 *
 * Note: URB_DIR_IN/OUT is automatically set in usb_submit_urb().
 */
#define URB_SHORT_NOT_OK        0x0001        /* report short reads as errors */
#define URB_ISO_ASAP                0x0002        /* iso-only; use the first unexpired
                                         * slot in the schedule */
#define URB_NO_TRANSFER_DMA_MAP        0x0004        /* urb->transfer_dma valid on submit */
#define URB_ZERO_PACKET                0x0040        /* Finish bulk OUT with short packet */
#define URB_NO_INTERRUPT        0x0080        /* HINT: no non-error interrupt
                                         * needed */
#define URB_FREE_BUFFER                0x0100        /* Free transfer buffer with the URB */

/* The following flags are used internally by usbcore and HCDs */
#define URB_DIR_IN                0x0200        /* Transfer from device to host */
#define URB_DIR_OUT                0
#define URB_DIR_MASK                URB_DIR_IN

#define URB_DMA_MAP_SINGLE        0x00010000        /* Non-scatter-gather mapping */
#define URB_DMA_MAP_PAGE        0x00020000        /* HCD-unsupported S-G */
#define URB_DMA_MAP_SG                0x00040000        /* HCD-supported S-G */
#define URB_MAP_LOCAL                0x00080000        /* HCD-local-memory mapping */
#define URB_SETUP_MAP_SINGLE        0x00100000        /* Setup packet DMA mapped */
#define URB_SETUP_MAP_LOCAL        0x00200000        /* HCD-local setup packet */
#define URB_DMA_SG_COMBINED        0x00400000        /* S-G entries were combined */
#define URB_ALIGNED_TEMP_BUFFER        0x00800000        /* Temp buffer was alloc'd */

struct usb_iso_packet_descriptor {
        unsigned int offset;
        unsigned int length;                /* expected length */
        unsigned int actual_length;
        int status;
};

struct urb;

struct usb_anchor {
        struct list_head urb_list;
        wait_queue_head_t wait;
        spinlock_t lock;
        atomic_t suspend_wakeups;
        unsigned int poisoned:1;
};

static inline void init_usb_anchor(struct usb_anchor *anchor)
{
        memset(anchor, 0, sizeof(*anchor));
        INIT_LIST_HEAD(&anchor->urb_list);
        init_waitqueue_head(&anchor->wait);
        spin_lock_init(&anchor->lock);
}

typedef void (*usb_complete_t)(struct urb *);

/**
 * struct urb - USB Request Block
 * @urb_list: For use by current owner of the URB.
 * @anchor_list: membership in the list of an anchor
 * @anchor: to anchor URBs to a common mooring
 * @ep: Points to the endpoint's data structure.  Will eventually
 *        replace @pipe.
 * @pipe: Holds endpoint number, direction, type, and more.
 *        Create these values with the eight macros available;
 *        usb_{snd,rcv}TYPEpipe(dev,endpoint), where the TYPE is "ctrl"
 *        (control), "bulk", "int" (interrupt), or "iso" (isochronous).
 *        For example usb_sndbulkpipe() or usb_rcvintpipe().  Endpoint
 *        numbers range from zero to fifteen.  Note that "in" endpoint two
 *        is a different endpoint (and pipe) from "out" endpoint two.
 *        The current configuration controls the existence, type, and
 *        maximum packet size of any given endpoint.
 * @stream_id: the endpoint's stream ID for bulk streams
 * @dev: Identifies the USB device to perform the request.
 * @status: This is read in non-iso completion functions to get the
 *        status of the particular request.  ISO requests only use it
 *        to tell whether the URB was unlinked; detailed status for
 *        each frame is in the fields of the iso_frame-desc.
 * @transfer_flags: A variety of flags may be used to affect how URB
 *        submission, unlinking, or operation are handled.  Different
 *        kinds of URB can use different flags.
 * @transfer_buffer:  This identifies the buffer to (or from) which the I/O
 *        request will be performed unless URB_NO_TRANSFER_DMA_MAP is set
 *        (however, do not leave garbage in transfer_buffer even then).
 *        This buffer must be suitable for DMA; allocate it with
 *        kmalloc() or equivalent.  For transfers to "in" endpoints, contents
 *        of this buffer will be modified.  This buffer is used for the data
 *        stage of control transfers.
 * @transfer_dma: When transfer_flags includes URB_NO_TRANSFER_DMA_MAP,
 *        the device driver is saying that it provided this DMA address,
 *        which the host controller driver should use in preference to the
 *        transfer_buffer.
 * @sg: scatter gather buffer list, the buffer size of each element in
 *         the list (except the last) must be divisible by the endpoint's
 *         max packet size if no_sg_constraint isn't set in 'struct usb_bus'
 * @num_mapped_sgs: (internal) number of mapped sg entries
 * @num_sgs: number of entries in the sg list
 * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
 *        be broken up into chunks according to the current maximum packet
 *        size for the endpoint, which is a function of the configuration
 *        and is encoded in the pipe.  When the length is zero, neither
 *        transfer_buffer nor transfer_dma is used.
 * @actual_length: This is read in non-iso completion functions, and
 *        it tells how many bytes (out of transfer_buffer_length) were
 *        transferred.  It will normally be the same as requested, unless
 *        either an error was reported or a short read was performed.
 *        The URB_SHORT_NOT_OK transfer flag may be used to make such
 *        short reads be reported as errors.
 * @setup_packet: Only used for control transfers, this points to eight bytes
 *        of setup data.  Control transfers always start by sending this data
 *        to the device.  Then transfer_buffer is read or written, if needed.
 * @setup_dma: DMA pointer for the setup packet.  The caller must not use
 *        this field; setup_packet must point to a valid buffer.
 * @start_frame: Returns the initial frame for isochronous transfers.
 * @number_of_packets: Lists the number of ISO transfer buffers.
 * @interval: Specifies the polling interval for interrupt or isochronous
 *        transfers.  The units are frames (milliseconds) for full and low
 *        speed devices, and microframes (1/8 millisecond) for highspeed
 *        and SuperSpeed devices.
 * @error_count: Returns the number of ISO transfers that reported errors.
 * @context: For use in completion functions.  This normally points to
 *        request-specific driver context.
 * @complete: Completion handler. This URB is passed as the parameter to the
 *        completion function.  The completion function may then do what
 *        it likes with the URB, including resubmitting or freeing it.
 * @iso_frame_desc: Used to provide arrays of ISO transfer buffers and to
 *        collect the transfer status for each buffer.
 *
 * This structure identifies USB transfer requests.  URBs must be allocated by
 * calling usb_alloc_urb() and freed with a call to usb_free_urb().
 * Initialization may be done using various usb_fill_*_urb() functions.  URBs
 * are submitted using usb_submit_urb(), and pending requests may be canceled
 * using usb_unlink_urb() or usb_kill_urb().
 *
 * Data Transfer Buffers:
 *
 * Normally drivers provide I/O buffers allocated with kmalloc() or otherwise
 * taken from the general page pool.  That is provided by transfer_buffer
 * (control requests also use setup_packet), and host controller drivers
 * perform a dma mapping (and unmapping) for each buffer transferred.  Those
 * mapping operations can be expensive on some platforms (perhaps using a dma
 * bounce buffer or talking to an IOMMU),
 * although they're cheap on commodity x86 and ppc hardware.
 *
 * Alternatively, drivers may pass the URB_NO_TRANSFER_DMA_MAP transfer flag,
 * which tells the host controller driver that no such mapping is needed for
 * the transfer_buffer since
 * the device driver is DMA-aware.  For example, a device driver might
 * allocate a DMA buffer with usb_alloc_coherent() or call usb_buffer_map().
 * When this transfer flag is provided, host controller drivers will
 * attempt to use the dma address found in the transfer_dma
 * field rather than determining a dma address themselves.
 *
 * Note that transfer_buffer must still be set if the controller
 * does not support DMA (as indicated by hcd_uses_dma()) and when talking
 * to root hub. If you have to transfer between highmem zone and the device
 * on such controller, create a bounce buffer or bail out with an error.
 * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
 * capable, assign NULL to it, so that usbmon knows not to use the value.
 * The setup_packet must always be set, so it cannot be located in highmem.
 *
 * Initialization:
 *
 * All URBs submitted must initialize the dev, pipe, transfer_flags (may be
 * zero), and complete fields.  All URBs must also initialize
 * transfer_buffer and transfer_buffer_length.  They may provide the
 * URB_SHORT_NOT_OK transfer flag, indicating that short reads are
 * to be treated as errors; that flag is invalid for write requests.
 *
 * Bulk URBs may
 * use the URB_ZERO_PACKET transfer flag, indicating that bulk OUT transfers
 * should always terminate with a short packet, even if it means adding an
 * extra zero length packet.
 *
 * Control URBs must provide a valid pointer in the setup_packet field.
 * Unlike the transfer_buffer, the setup_packet may not be mapped for DMA
 * beforehand.
 *
 * Interrupt URBs must provide an interval, saying how often (in milliseconds
 * or, for highspeed devices, 125 microsecond units)
 * to poll for transfers.  After the URB has been submitted, the interval
 * field reflects how the transfer was actually scheduled.
 * The polling interval may be more frequent than requested.
 * For example, some controllers have a maximum interval of 32 milliseconds,
 * while others support intervals of up to 1024 milliseconds.
 * Isochronous URBs also have transfer intervals.  (Note that for isochronous
 * endpoints, as well as high speed interrupt endpoints, the encoding of
 * the transfer interval in the endpoint descriptor is logarithmic.
 * Device drivers must convert that value to linear units themselves.)
 *
 * If an isochronous endpoint queue isn't already running, the host
 * controller will schedule a new URB to start as soon as bandwidth
 * utilization allows.  If the queue is running then a new URB will be
 * scheduled to start in the first transfer slot following the end of the
 * preceding URB, if that slot has not already expired.  If the slot has
 * expired (which can happen when IRQ delivery is delayed for a long time),
 * the scheduling behavior depends on the URB_ISO_ASAP flag.  If the flag
 * is clear then the URB will be scheduled to start in the expired slot,
 * implying that some of its packets will not be transferred; if the flag
 * is set then the URB will be scheduled in the first unexpired slot,
 * breaking the queue's synchronization.  Upon URB completion, the
 * start_frame field will be set to the (micro)frame number in which the
 * transfer was scheduled.  Ranges for frame counter values are HC-specific
 * and can go from as low as 256 to as high as 65536 frames.
 *
 * Isochronous URBs have a different data transfer model, in part because
 * the quality of service is only "best effort".  Callers provide specially
 * allocated URBs, with number_of_packets worth of iso_frame_desc structures
 * at the end.  Each such packet is an individual ISO transfer.  Isochronous
 * URBs are normally queued, submitted by drivers to arrange that
 * transfers are at least double buffered, and then explicitly resubmitted
 * in completion handlers, so
 * that data (such as audio or video) streams at as constant a rate as the
 * host controller scheduler can support.
 *
 * Completion Callbacks:
 *
 * The completion callback is made in_interrupt(), and one of the first
 * things that a completion handler should do is check the status field.
 * The status field is provided for all URBs.  It is used to report
 * unlinked URBs, and status for all non-ISO transfers.  It should not
 * be examined before the URB is returned to the completion handler.
 *
 * The context field is normally used to link URBs back to the relevant
 * driver or request state.
 *
 * When the completion callback is invoked for non-isochronous URBs, the
 * actual_length field tells how many bytes were transferred.  This field
 * is updated even when the URB terminated with an error or was unlinked.
 *
 * ISO transfer status is reported in the status and actual_length fields
 * of the iso_frame_desc array, and the number of errors is reported in
 * error_count.  Completion callbacks for ISO transfers will normally
 * (re)submit URBs to ensure a constant transfer rate.
 *
 * Note that even fields marked "public" should not be touched by the driver
 * when the urb is owned by the hcd, that is, since the call to
 * usb_submit_urb() till the entry into the completion routine.
 */
struct urb {
        /* private: usb core and host controller only fields in the urb */
        struct kref kref;                /* reference count of the URB */
        int unlinked;                        /* unlink error code */
        void *hcpriv;                        /* private data for host controller */
        atomic_t use_count;                /* concurrent submissions counter */
        atomic_t reject;                /* submissions will fail */

        /* public: documented fields in the urb that can be used by drivers */
        struct list_head urb_list;        /* list head for use by the urb's
                                         * current owner */
        struct list_head anchor_list;        /* the URB may be anchored */
        struct usb_anchor *anchor;
        struct usb_device *dev;                /* (in) pointer to associated device */
        struct usb_host_endpoint *ep;        /* (internal) pointer to endpoint */
        unsigned int pipe;                /* (in) pipe information */
        unsigned int stream_id;                /* (in) stream ID */
        int status;                        /* (return) non-ISO status */
        unsigned int transfer_flags;        /* (in) URB_SHORT_NOT_OK | ...*/
        void *transfer_buffer;                /* (in) associated data buffer */
        dma_addr_t transfer_dma;        /* (in) dma addr for transfer_buffer */
        struct scatterlist *sg;                /* (in) scatter gather buffer list */
        int num_mapped_sgs;                /* (internal) mapped sg entries */
        int num_sgs;                        /* (in) number of entries in the sg list */
        u32 transfer_buffer_length;        /* (in) data buffer length */
        u32 actual_length;                /* (return) actual transfer length */
        unsigned char *setup_packet;        /* (in) setup packet (control only) */
        dma_addr_t setup_dma;                /* (in) dma addr for setup_packet */
        int start_frame;                /* (modify) start frame (ISO) */
        int number_of_packets;                /* (in) number of ISO packets */
        int interval;                        /* (modify) transfer interval
                                         * (INT/ISO) */
        int error_count;                /* (return) number of ISO errors */
        void *context;                        /* (in) context for completion */
        usb_complete_t complete;        /* (in) completion routine */
        struct usb_iso_packet_descriptor iso_frame_desc[];
                                        /* (in) ISO ONLY */
};

/* ----------------------------------------------------------------------- */

/**
 * usb_fill_control_urb - initializes a control urb
 * @urb: pointer to the urb to initialize.
 * @dev: pointer to the struct usb_device for this urb.
 * @pipe: the endpoint pipe
 * @setup_packet: pointer to the setup_packet buffer. The buffer must be
 *        suitable for DMA.
 * @transfer_buffer: pointer to the transfer buffer. The buffer must be
 *        suitable for DMA.
 * @buffer_length: length of the transfer buffer
 * @complete_fn: pointer to the usb_complete_t function
 * @context: what to set the urb context to.
 *
 * Initializes a control urb with the proper information needed to submit
 * it to a device.
 *
 * The transfer buffer and the setup_packet buffer will most likely be filled
 * or read via DMA. The simplest way to get a buffer that can be DMAed to is
 * allocating it via kmalloc() or equivalent, even for very small buffers.
 * If the buffers are embedded in a bigger structure, there is a risk that
 * the buffer itself, the previous fields and/or the next fields are corrupted
 * due to cache incoherencies; or slowed down if they are evicted from the
 * cache. For more information, check &struct urb.
 *
 */
static inline void usb_fill_control_urb(struct urb *urb,
                                        struct usb_device *dev,
                                        unsigned int pipe,
                                        unsigned char *setup_packet,
                                        void *transfer_buffer,
                                        int buffer_length,
                                        usb_complete_t complete_fn,
                                        void *context)
{
        urb->dev = dev;
        urb->pipe = pipe;
        urb->setup_packet = setup_packet;
        urb->transfer_buffer = transfer_buffer;
        urb->transfer_buffer_length = buffer_length;
        urb->complete = complete_fn;
        urb->context = context;
}

/**
 * usb_fill_bulk_urb - macro to help initialize a bulk urb
 * @urb: pointer to the urb to initialize.
 * @dev: pointer to the struct usb_device for this urb.
 * @pipe: the endpoint pipe
 * @transfer_buffer: pointer to the transfer buffer. The buffer must be
 *        suitable for DMA.
 * @buffer_length: length of the transfer buffer
 * @complete_fn: pointer to the usb_complete_t function
 * @context: what to set the urb context to.
 *
 * Initializes a bulk urb with the proper information needed to submit it
 * to a device.
 *
 * Refer to usb_fill_control_urb() for a description of the requirements for
 * transfer_buffer.
 */
static inline void usb_fill_bulk_urb(struct urb *urb,
                                     struct usb_device *dev,
                                     unsigned int pipe,
                                     void *transfer_buffer,
                                     int buffer_length,
                                     usb_complete_t complete_fn,
                                     void *context)
{
        urb->dev = dev;
        urb->pipe = pipe;
        urb->transfer_buffer = transfer_buffer;
        urb->transfer_buffer_length = buffer_length;
        urb->complete = complete_fn;
        urb->context = context;
}

/**
 * usb_fill_int_urb - macro to help initialize a interrupt urb
 * @urb: pointer to the urb to initialize.
 * @dev: pointer to the struct usb_device for this urb.
 * @pipe: the endpoint pipe
 * @transfer_buffer: pointer to the transfer buffer. The buffer must be
 *        suitable for DMA.
 * @buffer_length: length of the transfer buffer
 * @complete_fn: pointer to the usb_complete_t function
 * @context: what to set the urb context to.
 * @interval: what to set the urb interval to, encoded like
 *        the endpoint descriptor's bInterval value.
 *
 * Initializes a interrupt urb with the proper information needed to submit
 * it to a device.
 *
 * Refer to usb_fill_control_urb() for a description of the requirements for
 * transfer_buffer.
 *
 * Note that High Speed and SuperSpeed(+) interrupt endpoints use a logarithmic
 * encoding of the endpoint interval, and express polling intervals in
 * microframes (eight per millisecond) rather than in frames (one per
 * millisecond).
 */
static inline void usb_fill_int_urb(struct urb *urb,
                                    struct usb_device *dev,
                                    unsigned int pipe,
                                    void *transfer_buffer,
                                    int buffer_length,
                                    usb_complete_t complete_fn,
                                    void *context,
                                    int interval)
{
        urb->dev = dev;
        urb->pipe = pipe;
        urb->transfer_buffer = transfer_buffer;
        urb->transfer_buffer_length = buffer_length;
        urb->complete = complete_fn;
        urb->context = context;

        if (dev->speed == USB_SPEED_HIGH || dev->speed >= USB_SPEED_SUPER) {
                /* make sure interval is within allowed range */
                interval = clamp(interval, 1, 16);

                urb->interval = 1 << (interval - 1);
        } else {
                urb->interval = interval;
        }

        urb->start_frame = -1;
}

extern void usb_init_urb(struct urb *urb);
extern struct urb *usb_alloc_urb(int iso_packets, gfp_t mem_flags);
extern void usb_free_urb(struct urb *urb);
#define usb_put_urb usb_free_urb
extern struct urb *usb_get_urb(struct urb *urb);
extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags);
extern int usb_unlink_urb(struct urb *urb);
extern void usb_kill_urb(struct urb *urb);
extern void usb_poison_urb(struct urb *urb);
extern void usb_unpoison_urb(struct urb *urb);
extern void usb_block_urb(struct urb *urb);
extern void usb_kill_anchored_urbs(struct usb_anchor *anchor);
extern void usb_poison_anchored_urbs(struct usb_anchor *anchor);
extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor);
extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor);
extern void usb_anchor_suspend_wakeups(struct usb_anchor *anchor);
extern void usb_anchor_resume_wakeups(struct usb_anchor *anchor);
extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor);
extern void usb_unanchor_urb(struct urb *urb);
extern int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor,
                                         unsigned int timeout);
extern struct urb *usb_get_from_anchor(struct usb_anchor *anchor);
extern void usb_scuttle_anchored_urbs(struct usb_anchor *anchor);
extern int usb_anchor_empty(struct usb_anchor *anchor);

#define usb_unblock_urb        usb_unpoison_urb

/**
 * usb_urb_dir_in - check if an URB describes an IN transfer
 * @urb: URB to be checked
 *
 * Return: 1 if @urb describes an IN transfer (device-to-host),
 * otherwise 0.
 */
static inline int usb_urb_dir_in(struct urb *urb)
{
        return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_IN;
}

/**
 * usb_urb_dir_out - check if an URB describes an OUT transfer
 * @urb: URB to be checked
 *
 * Return: 1 if @urb describes an OUT transfer (host-to-device),
 * otherwise 0.
 */
static inline int usb_urb_dir_out(struct urb *urb)
{
        return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT;
}

int usb_pipe_type_check(struct usb_device *dev, unsigned int pipe);
int usb_urb_ep_type_check(const struct urb *urb);

void *usb_alloc_coherent(struct usb_device *dev, size_t size,
        gfp_t mem_flags, dma_addr_t *dma);
void usb_free_coherent(struct usb_device *dev, size_t size,
        void *addr, dma_addr_t dma);

/*-------------------------------------------------------------------*
 *                         SYNCHRONOUS CALL SUPPORT                  *
 *-------------------------------------------------------------------*/

extern int usb_control_msg(struct usb_device *dev, unsigned int pipe,
        __u8 request, __u8 requesttype, __u16 value, __u16 index,
        void *data, __u16 size, int timeout);
extern int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe,
        void *data, int len, int *actual_length, int timeout);
extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe,
        void *data, int len, int *actual_length,
        int timeout);

/* wrappers around usb_control_msg() for the most common standard requests */
int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request,
                         __u8 requesttype, __u16 value, __u16 index,
                         const void *data, __u16 size, int timeout,
                         gfp_t memflags);
int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request,
                         __u8 requesttype, __u16 value, __u16 index,
                         void *data, __u16 size, int timeout,
                         gfp_t memflags);
extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype,
        unsigned char descindex, void *buf, int size);
extern int usb_get_status(struct usb_device *dev,
        int recip, int type, int target, void *data);

static inline int usb_get_std_status(struct usb_device *dev,
        int recip, int target, void *data)
{
        return usb_get_status(dev, recip, USB_STATUS_TYPE_STANDARD, target,
                data);
}

static inline int usb_get_ptm_status(struct usb_device *dev, void *data)
{
        return usb_get_status(dev, USB_RECIP_DEVICE, USB_STATUS_TYPE_PTM,
                0, data);
}

extern int usb_string(struct usb_device *dev, int index,
        char *buf, size_t size);
extern char *usb_cache_string(struct usb_device *udev, int index);

/* wrappers that also update important state inside usbcore */
extern int usb_clear_halt(struct usb_device *dev, int pipe);
extern int usb_reset_configuration(struct usb_device *dev);
extern int usb_set_interface(struct usb_device *dev, int ifnum, int alternate);
extern void usb_reset_endpoint(struct usb_device *dev, unsigned int epaddr);

/* this request isn't really synchronous, but it belongs with the others */
extern int usb_driver_set_configuration(struct usb_device *udev, int config);

/* choose and set configuration for device */
extern int usb_choose_configuration(struct usb_device *udev);
extern int usb_set_configuration(struct usb_device *dev, int configuration);

/*
 * timeouts, in milliseconds, used for sending/receiving control messages
 * they typically complete within a few frames (msec) after they're issued
 * USB identifies 5 second timeouts, maybe more in a few cases, and a few
 * slow devices (like some MGE Ellipse UPSes) actually push that limit.
 */
#define USB_CTRL_GET_TIMEOUT        5000
#define USB_CTRL_SET_TIMEOUT        5000


/**
 * struct usb_sg_request - support for scatter/gather I/O
 * @status: zero indicates success, else negative errno
 * @bytes: counts bytes transferred.
 *
 * These requests are initialized using usb_sg_init(), and then are used
 * as request handles passed to usb_sg_wait() or usb_sg_cancel().  Most
 * members of the request object aren't for driver access.
 *
 * The status and bytecount values are valid only after usb_sg_wait()
 * returns.  If the status is zero, then the bytecount matches the total
 * from the request.
 *
 * After an error completion, drivers may need to clear a halt condition
 * on the endpoint.
 */
struct usb_sg_request {
        int                        status;
        size_t                        bytes;

        /* private:
         * members below are private to usbcore,
         * and are not provided for driver access!
         */
        spinlock_t                lock;

        struct usb_device        *dev;
        int                        pipe;

        int                        entries;
        struct urb                **urbs;

        int                        count;
        struct completion        complete;
};

int usb_sg_init(
        struct usb_sg_request        *io,
        struct usb_device        *dev,
        unsigned                pipe,
        unsigned                period,
        struct scatterlist        *sg,
        int                        nents,
        size_t                        length,
        gfp_t                        mem_flags
);
void usb_sg_cancel(struct usb_sg_request *io);
void usb_sg_wait(struct usb_sg_request *io);


/* ----------------------------------------------------------------------- */

/*
 * For various legacy reasons, Linux has a small cookie that's paired with
 * a struct usb_device to identify an endpoint queue.  Queue characteristics
 * are defined by the endpoint's descriptor.  This cookie is called a "pipe",
 * an unsigned int encoded as:
 *
 *  - direction:        bit 7                (0 = Host-to-Device [Out],
 *                                         1 = Device-to-Host [In] ...
 *                                        like endpoint bEndpointAddress)
 *  - device address:        bits 8-14       ... bit positions known to uhci-hcd
 *  - endpoint:                bits 15-18      ... bit positions known to uhci-hcd
 *  - pipe type:        bits 30-31        (00 = isochronous, 01 = interrupt,
 *                                         10 = control, 11 = bulk)
 *
 * Given the device address and endpoint descriptor, pipes are redundant.
 */

/* NOTE:  these are not the standard USB_ENDPOINT_XFER_* values!! */
/* (yet ... they're the values used by usbfs) */
#define PIPE_ISOCHRONOUS                0
#define PIPE_INTERRUPT                        1
#define PIPE_CONTROL                        2
#define PIPE_BULK                        3

#define usb_pipein(pipe)        ((pipe) & USB_DIR_IN)
#define usb_pipeout(pipe)        (!usb_pipein(pipe))

#define usb_pipedevice(pipe)        (((pipe) >> 8) & 0x7f)
#define usb_pipeendpoint(pipe)        (((pipe) >> 15) & 0xf)

#define usb_pipetype(pipe)        (((pipe) >> 30) & 3)
#define usb_pipeisoc(pipe)        (usb_pipetype((pipe)) == PIPE_ISOCHRONOUS)
#define usb_pipeint(pipe)        (usb_pipetype((pipe)) == PIPE_INTERRUPT)
#define usb_pipecontrol(pipe)        (usb_pipetype((pipe)) == PIPE_CONTROL)
#define usb_pipebulk(pipe)        (usb_pipetype((pipe)) == PIPE_BULK)

static inline unsigned int __create_pipe(struct usb_device *dev,
                unsigned int endpoint)
{
        return (dev->devnum << 8) | (endpoint << 15);
}

/* Create various pipes... */
#define usb_sndctrlpipe(dev, endpoint)        \
        ((PIPE_CONTROL << 30) | __create_pipe(dev, endpoint))
#define usb_rcvctrlpipe(dev, endpoint)        \
        ((PIPE_CONTROL << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
#define usb_sndisocpipe(dev, endpoint)        \
        ((PIPE_ISOCHRONOUS << 30) | __create_pipe(dev, endpoint))
#define usb_rcvisocpipe(dev, endpoint)        \
        ((PIPE_ISOCHRONOUS << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
#define usb_sndbulkpipe(dev, endpoint)        \
        ((PIPE_BULK << 30) | __create_pipe(dev, endpoint))
#define usb_rcvbulkpipe(dev, endpoint)        \
        ((PIPE_BULK << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
#define usb_sndintpipe(dev, endpoint)        \
        ((PIPE_INTERRUPT << 30) | __create_pipe(dev, endpoint))
#define usb_rcvintpipe(dev, endpoint)        \
        ((PIPE_INTERRUPT << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)

static inline struct usb_host_endpoint *
usb_pipe_endpoint(struct usb_device *dev, unsigned int pipe)
{
        struct usb_host_endpoint **eps;
        eps = usb_pipein(pipe) ? dev->ep_in : dev->ep_out;
        return eps[usb_pipeendpoint(pipe)];
}

static inline u16 usb_maxpacket(struct usb_device *udev, int pipe)
{
        struct usb_host_endpoint *ep = usb_pipe_endpoint(udev, pipe);

        if (!ep)
                return 0;

        /* NOTE:  only 0x07ff bits are for packet size... */
        return usb_endpoint_maxp(&ep->desc);
}

/* translate USB error codes to codes user space understands */
static inline int usb_translate_errors(int error_code)
{
        switch (error_code) {
        case 0:
        case -ENOMEM:
        case -ENODEV:
        case -EOPNOTSUPP:
                return error_code;
        default:
                return -EIO;
        }
}

/* Events from the usb core */
#define USB_DEVICE_ADD                0x0001
#define USB_DEVICE_REMOVE        0x0002
#define USB_BUS_ADD                0x0003
#define USB_BUS_REMOVE                0x0004
extern void usb_register_notify(struct notifier_block *nb);
extern void usb_unregister_notify(struct notifier_block *nb);

/* debugfs stuff */
extern struct dentry *usb_debug_root;

/* LED triggers */
enum usb_led_event {
        USB_LED_EVENT_HOST = 0,
        USB_LED_EVENT_GADGET = 1,
};

#ifdef CONFIG_USB_LED_TRIG
extern void usb_led_activity(enum usb_led_event ev);
#else
static inline void usb_led_activity(enum usb_led_event ev) {}
#endif

#endif  /* __KERNEL__ */

#endif







































































































































































































































































































































































    5 















    5 






















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat driver for Linux
 *
 * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Module roccat is a char device used to report special events of roccat
 * hardware to userland. These events include requests for on-screen-display of
 * profile or dpi settings or requests for execution of macro sequences that are
 * not stored in device. The information in these events depends on hid device
 * implementation and contains data that is not available in a single hid event
 * or else hidraw could have been used.
 * It is inspired by hidraw, but uses only one circular buffer for all readers.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/cdev.h>
#include <linux/poll.h>
#include <linux/sched/signal.h>
#include <linux/hid-roccat.h>
#include <linux/module.h>

#define ROCCAT_FIRST_MINOR 0
#define ROCCAT_MAX_DEVICES 8

/* should be a power of 2 for performance reason */
#define ROCCAT_CBUF_SIZE 16

struct roccat_report {
        uint8_t *value;
};

struct roccat_device {
        unsigned int minor;
        int report_size;
        int open;
        int exist;
        wait_queue_head_t wait;
        struct device *dev;
        struct hid_device *hid;
        struct list_head readers;
        /* protects modifications of readers list */
        struct mutex readers_lock;

        /*
         * circular_buffer has one writer and multiple readers with their own
         * read pointers
         */
        struct roccat_report cbuf[ROCCAT_CBUF_SIZE];
        int cbuf_end;
        struct mutex cbuf_lock;
};

struct roccat_reader {
        struct list_head node;
        struct roccat_device *device;
        int cbuf_start;
};

static int roccat_major;
static struct cdev roccat_cdev;

static struct roccat_device *devices[ROCCAT_MAX_DEVICES];
/* protects modifications of devices array */
static DEFINE_MUTEX(devices_lock);

static ssize_t roccat_read(struct file *file, char __user *buffer,
                size_t count, loff_t *ppos)
{
        struct roccat_reader *reader = file->private_data;
        struct roccat_device *device = reader->device;
        struct roccat_report *report;
        ssize_t retval = 0, len;
        DECLARE_WAITQUEUE(wait, current);

        mutex_lock(&device->cbuf_lock);

        /* no data? */
        if (reader->cbuf_start == device->cbuf_end) {
                add_wait_queue(&device->wait, &wait);
                set_current_state(TASK_INTERRUPTIBLE);

                /* wait for data */
                while (reader->cbuf_start == device->cbuf_end) {
                        if (file->f_flags & O_NONBLOCK) {
                                retval = -EAGAIN;
                                break;
                        }
                        if (signal_pending(current)) {
                                retval = -ERESTARTSYS;
                                break;
                        }
                        if (!device->exist) {
                                retval = -EIO;
                                break;
                        }

                        mutex_unlock(&device->cbuf_lock);
                        schedule();
                        mutex_lock(&device->cbuf_lock);
                        set_current_state(TASK_INTERRUPTIBLE);
                }

                set_current_state(TASK_RUNNING);
                remove_wait_queue(&device->wait, &wait);
        }

        /* here we either have data or a reason to return if retval is set */
        if (retval)
                goto exit_unlock;

        report = &device->cbuf[reader->cbuf_start];
        /*
         * If report is larger than requested amount of data, rest of report
         * is lost!
         */
        len = device->report_size > count ? count : device->report_size;

        if (copy_to_user(buffer, report->value, len)) {
                retval = -EFAULT;
                goto exit_unlock;
        }
        retval += len;
        reader->cbuf_start = (reader->cbuf_start + 1) % ROCCAT_CBUF_SIZE;

exit_unlock:
        mutex_unlock(&device->cbuf_lock);
        return retval;
}

static __poll_t roccat_poll(struct file *file, poll_table *wait)
{
        struct roccat_reader *reader = file->private_data;
        poll_wait(file, &reader->device->wait, wait);
        if (reader->cbuf_start != reader->device->cbuf_end)
                return EPOLLIN | EPOLLRDNORM;
        if (!reader->device->exist)
                return EPOLLERR | EPOLLHUP;
        return 0;
}

static int roccat_open(struct inode *inode, struct file *file)
{
        unsigned int minor = iminor(inode);
        struct roccat_reader *reader;
        struct roccat_device *device;
        int error = 0;

        reader = kzalloc(sizeof(struct roccat_reader), GFP_KERNEL);
        if (!reader)
                return -ENOMEM;

        mutex_lock(&devices_lock);

        device = devices[minor];

        if (!device) {
                pr_emerg("roccat device with minor %d doesn't exist\n", minor);
                error = -ENODEV;
                goto exit_err_devices;
        }

        mutex_lock(&device->readers_lock);

        if (!device->open++) {
                /* power on device on adding first reader */
                error = hid_hw_power(device->hid, PM_HINT_FULLON);
                if (error < 0) {
                        --device->open;
                        goto exit_err_readers;
                }

                error = hid_hw_open(device->hid);
                if (error < 0) {
                        hid_hw_power(device->hid, PM_HINT_NORMAL);
                        --device->open;
                        goto exit_err_readers;
                }
        }

        reader->device = device;
        /* new reader doesn't get old events */
        reader->cbuf_start = device->cbuf_end;

        list_add_tail(&reader->node, &device->readers);
        file->private_data = reader;

exit_err_readers:
        mutex_unlock(&device->readers_lock);
exit_err_devices:
        mutex_unlock(&devices_lock);
        if (error)
                kfree(reader);
        return error;
}

static int roccat_release(struct inode *inode, struct file *file)
{
        unsigned int minor = iminor(inode);
        struct roccat_reader *reader = file->private_data;
        struct roccat_device *device;

        mutex_lock(&devices_lock);

        device = devices[minor];
        if (!device) {
                mutex_unlock(&devices_lock);
                pr_emerg("roccat device with minor %d doesn't exist\n", minor);
                return -ENODEV;
        }

        mutex_lock(&device->readers_lock);
        list_del(&reader->node);
        mutex_unlock(&device->readers_lock);
        kfree(reader);

        if (!--device->open) {
                /* removing last reader */
                if (device->exist) {
                        hid_hw_power(device->hid, PM_HINT_NORMAL);
                        hid_hw_close(device->hid);
                } else {
                        kfree(device);
                }
        }

        mutex_unlock(&devices_lock);

        return 0;
}

/*
 * roccat_report_event() - output data to readers
 * @minor: minor device number returned by roccat_connect()
 * @data: pointer to data
 *
 * Return value is zero on success, a negative error code on failure.
 *
 * This is called from interrupt handler.
 */
int roccat_report_event(int minor, u8 const *data)
{
        struct roccat_device *device;
        struct roccat_reader *reader;
        struct roccat_report *report;
        uint8_t *new_value;

        device = devices[minor];

        new_value = kmemdup(data, device->report_size, GFP_ATOMIC);
        if (!new_value)
                return -ENOMEM;

        mutex_lock(&device->cbuf_lock);

        report = &device->cbuf[device->cbuf_end];

        /* passing NULL is safe */
        kfree(report->value);

        report->value = new_value;
        device->cbuf_end = (device->cbuf_end + 1) % ROCCAT_CBUF_SIZE;

        list_for_each_entry(reader, &device->readers, node) {
                /*
                 * As we already inserted one element, the buffer can't be
                 * empty. If start and end are equal, buffer is full and we
                 * increase start, so that slow reader misses one event, but
                 * gets the newer ones in the right order.
                 */
                if (reader->cbuf_start == device->cbuf_end)
                        reader->cbuf_start = (reader->cbuf_start + 1) % ROCCAT_CBUF_SIZE;
        }

        mutex_unlock(&device->cbuf_lock);

        wake_up_interruptible(&device->wait);
        return 0;
}
EXPORT_SYMBOL_GPL(roccat_report_event);

/*
 * roccat_connect() - create a char device for special event output
 * @class: the class thats used to create the device. Meant to hold device
 * specific sysfs attributes.
 * @hid: the hid device the char device should be connected to.
 * @report_size: size of reports
 *
 * Return value is minor device number in Range [0, ROCCAT_MAX_DEVICES] on
 * success, a negative error code on failure.
 */
int roccat_connect(const struct class *klass, struct hid_device *hid, int report_size)
{
        unsigned int minor;
        struct roccat_device *device;
        int temp;

        device = kzalloc(sizeof(struct roccat_device), GFP_KERNEL);
        if (!device)
                return -ENOMEM;

        mutex_lock(&devices_lock);

        for (minor = 0; minor < ROCCAT_MAX_DEVICES; ++minor) {
                if (devices[minor])
                        continue;
                break;
        }

        if (minor < ROCCAT_MAX_DEVICES) {
                devices[minor] = device;
        } else {
                mutex_unlock(&devices_lock);
                kfree(device);
                return -EINVAL;
        }

        device->dev = device_create(klass, &hid->dev,
                        MKDEV(roccat_major, minor), NULL,
                        "%s%s%d", "roccat", hid->driver->name, minor);

        if (IS_ERR(device->dev)) {
                devices[minor] = NULL;
                mutex_unlock(&devices_lock);
                temp = PTR_ERR(device->dev);
                kfree(device);
                return temp;
        }

        mutex_unlock(&devices_lock);

        init_waitqueue_head(&device->wait);
        INIT_LIST_HEAD(&device->readers);
        mutex_init(&device->readers_lock);
        mutex_init(&device->cbuf_lock);
        device->minor = minor;
        device->hid = hid;
        device->exist = 1;
        device->cbuf_end = 0;
        device->report_size = report_size;

        return minor;
}
EXPORT_SYMBOL_GPL(roccat_connect);

/* roccat_disconnect() - remove char device from hid device
 * @minor: the minor device number returned by roccat_connect()
 */
void roccat_disconnect(int minor)
{
        struct roccat_device *device;

        mutex_lock(&devices_lock);
        device = devices[minor];
        mutex_unlock(&devices_lock);

        device->exist = 0; /* TODO exist maybe not needed */

        device_destroy(device->dev->class, MKDEV(roccat_major, minor));

        mutex_lock(&devices_lock);
        devices[minor] = NULL;
        mutex_unlock(&devices_lock);

        if (device->open) {
                hid_hw_close(device->hid);
                wake_up_interruptible(&device->wait);
        } else {
                kfree(device);
        }
}
EXPORT_SYMBOL_GPL(roccat_disconnect);

static long roccat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct inode *inode = file_inode(file);
        struct roccat_device *device;
        unsigned int minor = iminor(inode);
        long retval = 0;

        mutex_lock(&devices_lock);

        device = devices[minor];
        if (!device) {
                retval = -ENODEV;
                goto out;
        }

        switch (cmd) {
        case ROCCATIOCGREPSIZE:
                if (put_user(device->report_size, (int __user *)arg))
                        retval = -EFAULT;
                break;
        default:
                retval = -ENOTTY;
        }
out:
        mutex_unlock(&devices_lock);
        return retval;
}

static const struct file_operations roccat_ops = {
        .owner = THIS_MODULE,
        .read = roccat_read,
        .poll = roccat_poll,
        .open = roccat_open,
        .release = roccat_release,
        .llseek = noop_llseek,
        .unlocked_ioctl = roccat_ioctl,
};

static int __init roccat_init(void)
{
        int retval;
        dev_t dev_id;

        retval = alloc_chrdev_region(&dev_id, ROCCAT_FIRST_MINOR,
                        ROCCAT_MAX_DEVICES, "roccat");
        if (retval < 0) {
                pr_warn("can't get major number\n");
                goto error;
        }

        roccat_major = MAJOR(dev_id);

        cdev_init(&roccat_cdev, &roccat_ops);
        retval = cdev_add(&roccat_cdev, dev_id, ROCCAT_MAX_DEVICES);

        if (retval < 0) {
                pr_warn("cannot add cdev\n");
                goto cleanup_alloc_chrdev_region;
        }
        return 0;


 cleanup_alloc_chrdev_region:
        unregister_chrdev_region(dev_id, ROCCAT_MAX_DEVICES);
 error:
        return retval;
}

static void __exit roccat_exit(void)
{
        dev_t dev_id = MKDEV(roccat_major, 0);

        cdev_del(&roccat_cdev);
        unregister_chrdev_region(dev_id, ROCCAT_MAX_DEVICES);
}

module_init(roccat_init);
module_exit(roccat_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat char device");
MODULE_LICENSE("GPL v2");


































































  159 

  159 




  158 





















































































































  159 










  158 












  159 















  157 












  159 






































































  159 









































  159 

  159 




  159 

  159 































  159 




  225 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Link physical devices with ACPI devices support
 *
 * Copyright (c) 2005 David Shaohua Li <shaohua.li@intel.com>
 * Copyright (c) 2005 Intel Corp.
 */

#define pr_fmt(fmt) "ACPI: " fmt

#include <linux/acpi_iort.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/rwsem.h>
#include <linux/acpi.h>
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/pci-acpi.h>
#include <linux/platform_device.h>

#include "internal.h"

static LIST_HEAD(bus_type_list);
static DECLARE_RWSEM(bus_type_sem);

#define PHYSICAL_NODE_STRING "physical_node"
#define PHYSICAL_NODE_NAME_SIZE (sizeof(PHYSICAL_NODE_STRING) + 10)

int register_acpi_bus_type(struct acpi_bus_type *type)
{
        if (acpi_disabled)
                return -ENODEV;
        if (type && type->match && type->find_companion) {
                down_write(&bus_type_sem);
                list_add_tail(&type->list, &bus_type_list);
                up_write(&bus_type_sem);
                pr_info("bus type %s registered\n", type->name);
                return 0;
        }
        return -ENODEV;
}
EXPORT_SYMBOL_GPL(register_acpi_bus_type);

int unregister_acpi_bus_type(struct acpi_bus_type *type)
{
        if (acpi_disabled)
                return 0;
        if (type) {
                down_write(&bus_type_sem);
                list_del_init(&type->list);
                up_write(&bus_type_sem);
                pr_info("bus type %s unregistered\n", type->name);
                return 0;
        }
        return -ENODEV;
}
EXPORT_SYMBOL_GPL(unregister_acpi_bus_type);

static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
{
        struct acpi_bus_type *tmp, *ret = NULL;

        down_read(&bus_type_sem);
        list_for_each_entry(tmp, &bus_type_list, list) {
                if (tmp->match(dev)) {
                        ret = tmp;
                        break;
                }
        }
        up_read(&bus_type_sem);
        return ret;
}

#define FIND_CHILD_MIN_SCORE        1
#define FIND_CHILD_MID_SCORE        2
#define FIND_CHILD_MAX_SCORE        3

static int match_any(struct acpi_device *adev, void *not_used)
{
        return 1;
}

static bool acpi_dev_has_children(struct acpi_device *adev)
{
        return acpi_dev_for_each_child(adev, match_any, NULL) > 0;
}

static int find_child_checks(struct acpi_device *adev, bool check_children)
{
        unsigned long long sta;
        acpi_status status;

        if (check_children && !acpi_dev_has_children(adev))
                return -ENODEV;

        status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
        if (status == AE_NOT_FOUND) {
                /*
                 * Special case: backlight device objects without _STA are
                 * preferred to other objects with the same _ADR value, because
                 * it is more likely that they are actually useful.
                 */
                if (adev->pnp.type.backlight)
                        return FIND_CHILD_MID_SCORE;

                return FIND_CHILD_MIN_SCORE;
        }

        if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
                return -ENODEV;

        /*
         * If the device has a _HID returning a valid ACPI/PNP device ID, it is
         * better to make it look less attractive here, so that the other device
         * with the same _ADR value (that may not have a valid device ID) can be
         * matched going forward.  [This means a second spec violation in a row,
         * so whatever we do here is best effort anyway.]
         */
        if (adev->pnp.type.platform_id)
                return FIND_CHILD_MIN_SCORE;

        return FIND_CHILD_MAX_SCORE;
}

struct find_child_walk_data {
        struct acpi_device *adev;
        u64 address;
        int score;
        bool check_sta;
        bool check_children;
};

static int check_one_child(struct acpi_device *adev, void *data)
{
        struct find_child_walk_data *wd = data;
        int score;

        if (!adev->pnp.type.bus_address || acpi_device_adr(adev) != wd->address)
                return 0;

        if (!wd->adev) {
                /*
                 * This is the first matching object, so save it.  If it is not
                 * necessary to look for any other matching objects, stop the
                 * search.
                 */
                wd->adev = adev;
                return !(wd->check_sta || wd->check_children);
        }

        /*
         * There is more than one matching device object with the same _ADR
         * value.  That really is unexpected, so we are kind of beyond the scope
         * of the spec here.  We have to choose which one to return, though.
         *
         * First, get the score for the previously found object and terminate
         * the walk if it is maximum.
        */
        if (!wd->score) {
                score = find_child_checks(wd->adev, wd->check_children);
                if (score == FIND_CHILD_MAX_SCORE)
                        return 1;

                wd->score = score;
        }
        /*
         * Second, if the object that has just been found has a better score,
         * replace the previously found one with it and terminate the walk if
         * the new score is maximum.
         */
        score = find_child_checks(adev, wd->check_children);
        if (score > wd->score) {
                wd->adev = adev;
                if (score == FIND_CHILD_MAX_SCORE)
                        return 1;

                wd->score = score;
        }

        /* Continue, because there may be better matches. */
        return 0;
}

static struct acpi_device *acpi_find_child(struct acpi_device *parent,
                                           u64 address, bool check_children,
                                           bool check_sta)
{
        struct find_child_walk_data wd = {
                .address = address,
                .check_children = check_children,
                .check_sta = check_sta,
                .adev = NULL,
                .score = 0,
        };

        if (parent)
                acpi_dev_for_each_child(parent, check_one_child, &wd);

        return wd.adev;
}

struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
                                           u64 address, bool check_children)
{
        return acpi_find_child(parent, address, check_children, true);
}
EXPORT_SYMBOL_GPL(acpi_find_child_device);

struct acpi_device *acpi_find_child_by_adr(struct acpi_device *adev,
                                           acpi_bus_address adr)
{
        return acpi_find_child(adev, adr, false, false);
}
EXPORT_SYMBOL_GPL(acpi_find_child_by_adr);

static void acpi_physnode_link_name(char *buf, unsigned int node_id)
{
        if (node_id > 0)
                snprintf(buf, PHYSICAL_NODE_NAME_SIZE,
                         PHYSICAL_NODE_STRING "%u", node_id);
        else
                strcpy(buf, PHYSICAL_NODE_STRING);
}

int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev)
{
        struct acpi_device_physical_node *physical_node, *pn;
        char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
        struct list_head *physnode_list;
        unsigned int node_id;
        int retval = -EINVAL;

        if (has_acpi_companion(dev)) {
                if (acpi_dev) {
                        dev_warn(dev, "ACPI companion already set\n");
                        return -EINVAL;
                } else {
                        acpi_dev = ACPI_COMPANION(dev);
                }
        }
        if (!acpi_dev)
                return -EINVAL;

        acpi_dev_get(acpi_dev);
        get_device(dev);
        physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
        if (!physical_node) {
                retval = -ENOMEM;
                goto err;
        }

        mutex_lock(&acpi_dev->physical_node_lock);

        /*
         * Keep the list sorted by node_id so that the IDs of removed nodes can
         * be recycled easily.
         */
        physnode_list = &acpi_dev->physical_node_list;
        node_id = 0;
        list_for_each_entry(pn, &acpi_dev->physical_node_list, node) {
                /* Sanity check. */
                if (pn->dev == dev) {
                        mutex_unlock(&acpi_dev->physical_node_lock);

                        dev_warn(dev, "Already associated with ACPI node\n");
                        kfree(physical_node);
                        if (ACPI_COMPANION(dev) != acpi_dev)
                                goto err;

                        put_device(dev);
                        acpi_dev_put(acpi_dev);
                        return 0;
                }
                if (pn->node_id == node_id) {
                        physnode_list = &pn->node;
                        node_id++;
                }
        }

        physical_node->node_id = node_id;
        physical_node->dev = dev;
        list_add(&physical_node->node, physnode_list);
        acpi_dev->physical_node_count++;

        if (!has_acpi_companion(dev))
                ACPI_COMPANION_SET(dev, acpi_dev);

        acpi_physnode_link_name(physical_node_name, node_id);
        retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
                                   physical_node_name);
        if (retval)
                dev_err(&acpi_dev->dev, "Failed to create link %s (%d)\n",
                        physical_node_name, retval);

        retval = sysfs_create_link(&dev->kobj, &acpi_dev->dev.kobj,
                                   "firmware_node");
        if (retval)
                dev_err(dev, "Failed to create link firmware_node (%d)\n",
                        retval);

        mutex_unlock(&acpi_dev->physical_node_lock);

        if (acpi_dev->wakeup.flags.valid)
                device_set_wakeup_capable(dev, true);

        return 0;

 err:
        ACPI_COMPANION_SET(dev, NULL);
        put_device(dev);
        acpi_dev_put(acpi_dev);
        return retval;
}
EXPORT_SYMBOL_GPL(acpi_bind_one);

int acpi_unbind_one(struct device *dev)
{
        struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
        struct acpi_device_physical_node *entry;

        if (!acpi_dev)
                return 0;

        mutex_lock(&acpi_dev->physical_node_lock);

        list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
                if (entry->dev == dev) {
                        char physnode_name[PHYSICAL_NODE_NAME_SIZE];

                        list_del(&entry->node);
                        acpi_dev->physical_node_count--;

                        acpi_physnode_link_name(physnode_name, entry->node_id);
                        sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
                        sysfs_remove_link(&dev->kobj, "firmware_node");
                        ACPI_COMPANION_SET(dev, NULL);
                        /* Drop references taken by acpi_bind_one(). */
                        put_device(dev);
                        acpi_dev_put(acpi_dev);
                        kfree(entry);
                        break;
                }

        mutex_unlock(&acpi_dev->physical_node_lock);
        return 0;
}
EXPORT_SYMBOL_GPL(acpi_unbind_one);

void acpi_device_notify(struct device *dev)
{
        struct acpi_device *adev;
        int ret;

        ret = acpi_bind_one(dev, NULL);
        if (ret) {
                struct acpi_bus_type *type = acpi_get_bus_type(dev);

                if (!type)
                        goto err;

                adev = type->find_companion(dev);
                if (!adev) {
                        dev_dbg(dev, "ACPI companion not found\n");
                        goto err;
                }
                ret = acpi_bind_one(dev, adev);
                if (ret)
                        goto err;

                if (type->setup) {
                        type->setup(dev);
                        goto done;
                }
        } else {
                adev = ACPI_COMPANION(dev);

                if (dev_is_pci(dev)) {
                        pci_acpi_setup(dev, adev);
                        goto done;
                } else if (dev_is_platform(dev)) {
                        acpi_configure_pmsi_domain(dev);
                }
        }

        if (adev->handler && adev->handler->bind)
                adev->handler->bind(dev);

done:
        acpi_handle_debug(ACPI_HANDLE(dev), "Bound to device %s\n",
                          dev_name(dev));

        return;

err:
        dev_dbg(dev, "No ACPI support\n");
}

void acpi_device_notify_remove(struct device *dev)
{
        struct acpi_device *adev = ACPI_COMPANION(dev);

        if (!adev)
                return;

        if (dev_is_pci(dev))
                pci_acpi_cleanup(dev, adev);
        else if (adev->handler && adev->handler->unbind)
                adev->handler->unbind(dev);

        acpi_unbind_one(dev);
}



































































































































































































































































































































































































































































































































































































































































































































    1 




    1 






























































































































































































































































































































































































































































































































































    1 



    1 



































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * HackRF driver
 *
 * Copyright (C) 2014 Antti Palosaari <crope@iki.fi>
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>
#include <media/videobuf2-v4l2.h>
#include <media/videobuf2-vmalloc.h>

/*
 * Used Avago MGA-81563 RF amplifier could be destroyed pretty easily with too
 * strong signal or transmitting to bad antenna.
 * Set RF gain control to 'grabbed' state by default for sure.
 */
static bool hackrf_enable_rf_gain_ctrl;
module_param_named(enable_rf_gain_ctrl, hackrf_enable_rf_gain_ctrl, bool, 0644);
MODULE_PARM_DESC(enable_rf_gain_ctrl, "enable RX/TX RF amplifier control (warn: could damage amplifier)");

/* HackRF USB API commands (from HackRF Library) */
enum {
        CMD_SET_TRANSCEIVER_MODE           = 0x01,
        CMD_SAMPLE_RATE_SET                = 0x06,
        CMD_BASEBAND_FILTER_BANDWIDTH_SET  = 0x07,
        CMD_BOARD_ID_READ                  = 0x0e,
        CMD_VERSION_STRING_READ            = 0x0f,
        CMD_SET_FREQ                       = 0x10,
        CMD_AMP_ENABLE                     = 0x11,
        CMD_SET_LNA_GAIN                   = 0x13,
        CMD_SET_VGA_GAIN                   = 0x14,
        CMD_SET_TXVGA_GAIN                 = 0x15,
};

/*
 *       bEndpointAddress     0x81  EP 1 IN
 *         Transfer Type            Bulk
 *       wMaxPacketSize     0x0200  1x 512 bytes
 */
#define MAX_BULK_BUFS            (6)
#define BULK_BUFFER_SIZE         (128 * 512)

static const struct v4l2_frequency_band bands_adc_dac[] = {
        {
                .tuner = 0,
                .type = V4L2_TUNER_SDR,
                .index = 0,
                .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   =   200000,
                .rangehigh  = 24000000,
        },
};

static const struct v4l2_frequency_band bands_rx_tx[] = {
        {
                .tuner = 1,
                .type = V4L2_TUNER_RF,
                .index = 0,
                .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   =          1,
                .rangehigh  = 4294967294LL, /* max u32, hw goes over 7GHz */
        },
};

/* stream formats */
struct hackrf_format {
        u32        pixelformat;
        u32        buffersize;
};

/* format descriptions for capture and preview */
static struct hackrf_format formats[] = {
        {
                .pixelformat        = V4L2_SDR_FMT_CS8,
                .buffersize        = BULK_BUFFER_SIZE,
        },
};

static const unsigned int NUM_FORMATS = ARRAY_SIZE(formats);

/* intermediate buffers with raw data from the USB device */
struct hackrf_buffer {
        struct vb2_v4l2_buffer vb;
        struct list_head list;
};

struct hackrf_dev {
#define USB_STATE_URB_BUF                1 /* XXX: set manually */
#define RX_ON                            4
#define TX_ON                            5
#define RX_ADC_FREQUENCY                11
#define TX_DAC_FREQUENCY                12
#define RX_BANDWIDTH                    13
#define TX_BANDWIDTH                    14
#define RX_RF_FREQUENCY                 15
#define TX_RF_FREQUENCY                 16
#define RX_RF_GAIN                      17
#define TX_RF_GAIN                      18
#define RX_IF_GAIN                      19
#define RX_LNA_GAIN                     20
#define TX_LNA_GAIN                     21
        unsigned long flags;

        struct usb_interface *intf;
        struct device *dev;
        struct usb_device *udev;
        struct video_device rx_vdev;
        struct video_device tx_vdev;
        struct v4l2_device v4l2_dev;

        /* videobuf2 queue and queued buffers list */
        struct vb2_queue rx_vb2_queue;
        struct vb2_queue tx_vb2_queue;
        struct list_head rx_buffer_list;
        struct list_head tx_buffer_list;
        spinlock_t buffer_list_lock; /* Protects buffer_list */
        unsigned int sequence;             /* Buffer sequence counter */
        unsigned int vb_full;        /* vb is full and packets dropped */
        unsigned int vb_empty;       /* vb is empty and packets dropped */

        /* Note if taking both locks v4l2_lock must always be locked first! */
        struct mutex v4l2_lock;      /* Protects everything else */
        struct mutex vb_queue_lock;  /* Protects vb_queue */

        struct urb     *urb_list[MAX_BULK_BUFS];
        int            buf_num;
        unsigned long  buf_size;
        u8             *buf_list[MAX_BULK_BUFS];
        dma_addr_t     dma_addr[MAX_BULK_BUFS];
        int            urbs_initialized;
        int            urbs_submitted;

        /* USB control message buffer */
        #define BUF_SIZE 24
        u8 buf[BUF_SIZE];

        /* Current configuration */
        unsigned int f_adc;
        unsigned int f_dac;
        unsigned int f_rx;
        unsigned int f_tx;
        u32 pixelformat;
        u32 buffersize;

        /* Controls */
        struct v4l2_ctrl_handler rx_ctrl_handler;
        struct v4l2_ctrl *rx_bandwidth_auto;
        struct v4l2_ctrl *rx_bandwidth;
        struct v4l2_ctrl *rx_rf_gain;
        struct v4l2_ctrl *rx_lna_gain;
        struct v4l2_ctrl *rx_if_gain;
        struct v4l2_ctrl_handler tx_ctrl_handler;
        struct v4l2_ctrl *tx_bandwidth_auto;
        struct v4l2_ctrl *tx_bandwidth;
        struct v4l2_ctrl *tx_rf_gain;
        struct v4l2_ctrl *tx_lna_gain;

        /* Sample rate calc */
        unsigned long jiffies_next;
        unsigned int sample;
        unsigned int sample_measured;
};

#define hackrf_dbg_usb_control_msg(_dev, _r, _t, _v, _i, _b, _l) { \
        char *_direction; \
        if (_t & USB_DIR_IN) \
                _direction = "<<<"; \
        else \
                _direction = ">>>"; \
        dev_dbg(_dev, "%02x %02x %02x %02x %02x %02x %02x %02x %s %*ph\n", \
                        _t, _r, _v & 0xff, _v >> 8, _i & 0xff, \
                        _i >> 8, _l & 0xff, _l >> 8, _direction, _l, _b); \
}

/* execute firmware command */
static int hackrf_ctrl_msg(struct hackrf_dev *dev, u8 request, u16 value,
                u16 index, u8 *data, u16 size)
{
        int ret;
        unsigned int pipe;
        u8 requesttype;

        switch (request) {
        case CMD_SET_TRANSCEIVER_MODE:
        case CMD_SET_FREQ:
        case CMD_AMP_ENABLE:
        case CMD_SAMPLE_RATE_SET:
        case CMD_BASEBAND_FILTER_BANDWIDTH_SET:
                pipe = usb_sndctrlpipe(dev->udev, 0);
                requesttype = (USB_TYPE_VENDOR | USB_DIR_OUT);
                break;
        case CMD_BOARD_ID_READ:
        case CMD_VERSION_STRING_READ:
        case CMD_SET_LNA_GAIN:
        case CMD_SET_VGA_GAIN:
        case CMD_SET_TXVGA_GAIN:
                pipe = usb_rcvctrlpipe(dev->udev, 0);
                requesttype = (USB_TYPE_VENDOR | USB_DIR_IN);
                break;
        default:
                dev_err(dev->dev, "Unknown command %02x\n", request);
                ret = -EINVAL;
                goto err;
        }

        /* write request */
        if (!(requesttype & USB_DIR_IN))
                memcpy(dev->buf, data, size);

        ret = usb_control_msg(dev->udev, pipe, request, requesttype, value,
                        index, dev->buf, size, 1000);
        hackrf_dbg_usb_control_msg(dev->dev, request, requesttype, value,
                        index, dev->buf, size);
        if (ret < 0) {
                dev_err(dev->dev, "usb_control_msg() failed %d request %02x\n",
                                ret, request);
                goto err;
        }

        /* read request */
        if (requesttype & USB_DIR_IN)
                memcpy(data, dev->buf, size);

        return 0;
err:
        return ret;
}

static int hackrf_set_params(struct hackrf_dev *dev)
{
        struct usb_interface *intf = dev->intf;
        int ret, i;
        u8 buf[8], u8tmp;
        unsigned int uitmp, uitmp1, uitmp2;
        const bool rx = test_bit(RX_ON, &dev->flags);
        const bool tx = test_bit(TX_ON, &dev->flags);
        static const struct {
                u32 freq;
        } bandwidth_lut[] = {
                { 1750000}, /*  1.75 MHz */
                { 2500000}, /*  2.5  MHz */
                { 3500000}, /*  3.5  MHz */
                { 5000000}, /*  5    MHz */
                { 5500000}, /*  5.5  MHz */
                { 6000000}, /*  6    MHz */
                { 7000000}, /*  7    MHz */
                { 8000000}, /*  8    MHz */
                { 9000000}, /*  9    MHz */
                {10000000}, /* 10    MHz */
                {12000000}, /* 12    MHz */
                {14000000}, /* 14    MHz */
                {15000000}, /* 15    MHz */
                {20000000}, /* 20    MHz */
                {24000000}, /* 24    MHz */
                {28000000}, /* 28    MHz */
        };

        if (!rx && !tx) {
                dev_dbg(&intf->dev, "device is sleeping\n");
                return 0;
        }

        /* ADC / DAC frequency */
        if (rx && test_and_clear_bit(RX_ADC_FREQUENCY, &dev->flags)) {
                dev_dbg(&intf->dev, "RX ADC frequency=%u Hz\n", dev->f_adc);
                uitmp1 = dev->f_adc;
                uitmp2 = 1;
                set_bit(TX_DAC_FREQUENCY, &dev->flags);
        } else if (tx && test_and_clear_bit(TX_DAC_FREQUENCY, &dev->flags)) {
                dev_dbg(&intf->dev, "TX DAC frequency=%u Hz\n", dev->f_dac);
                uitmp1 = dev->f_dac;
                uitmp2 = 1;
                set_bit(RX_ADC_FREQUENCY, &dev->flags);
        } else {
                uitmp1 = uitmp2 = 0;
        }
        if (uitmp1 || uitmp2) {
                buf[0] = (uitmp1 >>  0) & 0xff;
                buf[1] = (uitmp1 >>  8) & 0xff;
                buf[2] = (uitmp1 >> 16) & 0xff;
                buf[3] = (uitmp1 >> 24) & 0xff;
                buf[4] = (uitmp2 >>  0) & 0xff;
                buf[5] = (uitmp2 >>  8) & 0xff;
                buf[6] = (uitmp2 >> 16) & 0xff;
                buf[7] = (uitmp2 >> 24) & 0xff;
                ret = hackrf_ctrl_msg(dev, CMD_SAMPLE_RATE_SET, 0, 0, buf, 8);
                if (ret)
                        goto err;
        }

        /* bandwidth */
        if (rx && test_and_clear_bit(RX_BANDWIDTH, &dev->flags)) {
                if (dev->rx_bandwidth_auto->val == true)
                        uitmp = dev->f_adc;
                else
                        uitmp = dev->rx_bandwidth->val;

                for (i = 0; i < ARRAY_SIZE(bandwidth_lut); i++) {
                        if (uitmp <= bandwidth_lut[i].freq) {
                                uitmp = bandwidth_lut[i].freq;
                                break;
                        }
                }
                dev->rx_bandwidth->val = uitmp;
                dev->rx_bandwidth->cur.val = uitmp;
                dev_dbg(&intf->dev, "RX bandwidth selected=%u\n", uitmp);
                set_bit(TX_BANDWIDTH, &dev->flags);
        } else if (tx && test_and_clear_bit(TX_BANDWIDTH, &dev->flags)) {
                if (dev->tx_bandwidth_auto->val == true)
                        uitmp = dev->f_dac;
                else
                        uitmp = dev->tx_bandwidth->val;

                for (i = 0; i < ARRAY_SIZE(bandwidth_lut); i++) {
                        if (uitmp <= bandwidth_lut[i].freq) {
                                uitmp = bandwidth_lut[i].freq;
                                break;
                        }
                }
                dev->tx_bandwidth->val = uitmp;
                dev->tx_bandwidth->cur.val = uitmp;
                dev_dbg(&intf->dev, "TX bandwidth selected=%u\n", uitmp);
                set_bit(RX_BANDWIDTH, &dev->flags);
        } else {
                uitmp = 0;
        }
        if (uitmp) {
                uitmp1 = uitmp2 = 0;
                uitmp1 |= ((uitmp >>  0) & 0xff) << 0;
                uitmp1 |= ((uitmp >>  8) & 0xff) << 8;
                uitmp2 |= ((uitmp >> 16) & 0xff) << 0;
                uitmp2 |= ((uitmp >> 24) & 0xff) << 8;
                ret = hackrf_ctrl_msg(dev, CMD_BASEBAND_FILTER_BANDWIDTH_SET,
                                      uitmp1, uitmp2, NULL, 0);
                if (ret)
                        goto err;
        }

        /* RX / TX RF frequency */
        if (rx && test_and_clear_bit(RX_RF_FREQUENCY, &dev->flags)) {
                dev_dbg(&intf->dev, "RX RF frequency=%u Hz\n", dev->f_rx);
                uitmp1 = dev->f_rx / 1000000;
                uitmp2 = dev->f_rx % 1000000;
                set_bit(TX_RF_FREQUENCY, &dev->flags);
        } else if (tx && test_and_clear_bit(TX_RF_FREQUENCY, &dev->flags)) {
                dev_dbg(&intf->dev, "TX RF frequency=%u Hz\n", dev->f_tx);
                uitmp1 = dev->f_tx / 1000000;
                uitmp2 = dev->f_tx % 1000000;
                set_bit(RX_RF_FREQUENCY, &dev->flags);
        } else {
                uitmp1 = uitmp2 = 0;
        }
        if (uitmp1 || uitmp2) {
                buf[0] = (uitmp1 >>  0) & 0xff;
                buf[1] = (uitmp1 >>  8) & 0xff;
                buf[2] = (uitmp1 >> 16) & 0xff;
                buf[3] = (uitmp1 >> 24) & 0xff;
                buf[4] = (uitmp2 >>  0) & 0xff;
                buf[5] = (uitmp2 >>  8) & 0xff;
                buf[6] = (uitmp2 >> 16) & 0xff;
                buf[7] = (uitmp2 >> 24) & 0xff;
                ret = hackrf_ctrl_msg(dev, CMD_SET_FREQ, 0, 0, buf, 8);
                if (ret)
                        goto err;
        }

        /* RX RF gain */
        if (rx && test_and_clear_bit(RX_RF_GAIN, &dev->flags)) {
                dev_dbg(&intf->dev, "RX RF gain val=%d->%d\n",
                        dev->rx_rf_gain->cur.val, dev->rx_rf_gain->val);

                u8tmp = (dev->rx_rf_gain->val) ? 1 : 0;
                ret = hackrf_ctrl_msg(dev, CMD_AMP_ENABLE, u8tmp, 0, NULL, 0);
                if (ret)
                        goto err;
                set_bit(TX_RF_GAIN, &dev->flags);
        }

        /* TX RF gain */
        if (tx && test_and_clear_bit(TX_RF_GAIN, &dev->flags)) {
                dev_dbg(&intf->dev, "TX RF gain val=%d->%d\n",
                        dev->tx_rf_gain->cur.val, dev->tx_rf_gain->val);

                u8tmp = (dev->tx_rf_gain->val) ? 1 : 0;
                ret = hackrf_ctrl_msg(dev, CMD_AMP_ENABLE, u8tmp, 0, NULL, 0);
                if (ret)
                        goto err;
                set_bit(RX_RF_GAIN, &dev->flags);
        }

        /* RX LNA gain */
        if (rx && test_and_clear_bit(RX_LNA_GAIN, &dev->flags)) {
                dev_dbg(dev->dev, "RX LNA gain val=%d->%d\n",
                        dev->rx_lna_gain->cur.val, dev->rx_lna_gain->val);

                ret = hackrf_ctrl_msg(dev, CMD_SET_LNA_GAIN, 0,
                                      dev->rx_lna_gain->val, &u8tmp, 1);
                if (ret)
                        goto err;
        }

        /* RX IF gain */
        if (rx && test_and_clear_bit(RX_IF_GAIN, &dev->flags)) {
                dev_dbg(&intf->dev, "IF gain val=%d->%d\n",
                        dev->rx_if_gain->cur.val, dev->rx_if_gain->val);

                ret = hackrf_ctrl_msg(dev, CMD_SET_VGA_GAIN, 0,
                                      dev->rx_if_gain->val, &u8tmp, 1);
                if (ret)
                        goto err;
        }

        /* TX LNA gain */
        if (tx && test_and_clear_bit(TX_LNA_GAIN, &dev->flags)) {
                dev_dbg(&intf->dev, "TX LNA gain val=%d->%d\n",
                        dev->tx_lna_gain->cur.val, dev->tx_lna_gain->val);

                ret = hackrf_ctrl_msg(dev, CMD_SET_TXVGA_GAIN, 0,
                                      dev->tx_lna_gain->val, &u8tmp, 1);
                if (ret)
                        goto err;
        }

        return 0;
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

/* Private functions */
static struct hackrf_buffer *hackrf_get_next_buffer(struct hackrf_dev *dev,
                                                    struct list_head *buffer_list)
{
        unsigned long flags;
        struct hackrf_buffer *buffer = NULL;

        spin_lock_irqsave(&dev->buffer_list_lock, flags);
        if (list_empty(buffer_list))
                goto leave;

        buffer = list_entry(buffer_list->next, struct hackrf_buffer, list);
        list_del(&buffer->list);
leave:
        spin_unlock_irqrestore(&dev->buffer_list_lock, flags);
        return buffer;
}

static void hackrf_copy_stream(struct hackrf_dev *dev, void *dst, void *src,
                               unsigned int src_len)
{
        memcpy(dst, src, src_len);

        /* calculate sample rate and output it in 10 seconds intervals */
        if (unlikely(time_is_before_jiffies(dev->jiffies_next))) {
                #define MSECS 10000UL
                unsigned int msecs = jiffies_to_msecs(jiffies -
                                dev->jiffies_next + msecs_to_jiffies(MSECS));
                unsigned int samples = dev->sample - dev->sample_measured;

                dev->jiffies_next = jiffies + msecs_to_jiffies(MSECS);
                dev->sample_measured = dev->sample;
                dev_dbg(dev->dev, "slen=%u samples=%u msecs=%u sample rate=%lu\n",
                                src_len, samples, msecs,
                                samples * 1000UL / msecs);
        }

        /* total number of samples */
        dev->sample += src_len / 2;
}

/*
 * This gets called for the bulk stream pipe. This is done in interrupt
 * time, so it has to be fast, not crash, and not stall. Neat.
 */
static void hackrf_urb_complete_in(struct urb *urb)
{
        struct hackrf_dev *dev = urb->context;
        struct usb_interface *intf = dev->intf;
        struct hackrf_buffer *buffer;
        unsigned int len;

        dev_dbg_ratelimited(&intf->dev, "status=%d length=%u/%u\n", urb->status,
                            urb->actual_length, urb->transfer_buffer_length);

        switch (urb->status) {
        case 0:             /* success */
        case -ETIMEDOUT:    /* NAK */
                break;
        case -ECONNRESET:   /* kill */
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        default:            /* error */
                dev_err_ratelimited(&intf->dev, "URB failed %d\n", urb->status);
                goto exit_usb_submit_urb;
        }

        /* get buffer to write */
        buffer = hackrf_get_next_buffer(dev, &dev->rx_buffer_list);
        if (unlikely(buffer == NULL)) {
                dev->vb_full++;
                dev_notice_ratelimited(&intf->dev,
                                       "buffer is full - %u packets dropped\n",
                                       dev->vb_full);
                goto exit_usb_submit_urb;
        }

        len = min_t(unsigned long, vb2_plane_size(&buffer->vb.vb2_buf, 0),
                    urb->actual_length);
        hackrf_copy_stream(dev, vb2_plane_vaddr(&buffer->vb.vb2_buf, 0),
                    urb->transfer_buffer, len);
        vb2_set_plane_payload(&buffer->vb.vb2_buf, 0, len);
        buffer->vb.sequence = dev->sequence++;
        buffer->vb.vb2_buf.timestamp = ktime_get_ns();
        vb2_buffer_done(&buffer->vb.vb2_buf, VB2_BUF_STATE_DONE);
exit_usb_submit_urb:
        usb_submit_urb(urb, GFP_ATOMIC);
}

static void hackrf_urb_complete_out(struct urb *urb)
{
        struct hackrf_dev *dev = urb->context;
        struct usb_interface *intf = dev->intf;
        struct hackrf_buffer *buffer;
        unsigned int len;

        dev_dbg_ratelimited(&intf->dev, "status=%d length=%u/%u\n", urb->status,
                            urb->actual_length, urb->transfer_buffer_length);

        switch (urb->status) {
        case 0:             /* success */
        case -ETIMEDOUT:    /* NAK */
                break;
        case -ECONNRESET:   /* kill */
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        default:            /* error */
                dev_err_ratelimited(&intf->dev, "URB failed %d\n", urb->status);
        }

        /* get buffer to read */
        buffer = hackrf_get_next_buffer(dev, &dev->tx_buffer_list);
        if (unlikely(buffer == NULL)) {
                dev->vb_empty++;
                dev_notice_ratelimited(&intf->dev,
                                       "buffer is empty - %u packets dropped\n",
                                       dev->vb_empty);
                urb->actual_length = 0;
                goto exit_usb_submit_urb;
        }

        len = min_t(unsigned long, urb->transfer_buffer_length,
                    vb2_get_plane_payload(&buffer->vb.vb2_buf, 0));
        hackrf_copy_stream(dev, urb->transfer_buffer,
                           vb2_plane_vaddr(&buffer->vb.vb2_buf, 0), len);
        urb->actual_length = len;
        buffer->vb.sequence = dev->sequence++;
        buffer->vb.vb2_buf.timestamp = ktime_get_ns();
        vb2_buffer_done(&buffer->vb.vb2_buf, VB2_BUF_STATE_DONE);
exit_usb_submit_urb:
        usb_submit_urb(urb, GFP_ATOMIC);
}

static int hackrf_kill_urbs(struct hackrf_dev *dev)
{
        int i;

        for (i = dev->urbs_submitted - 1; i >= 0; i--) {
                dev_dbg(dev->dev, "kill urb=%d\n", i);
                /* stop the URB */
                usb_kill_urb(dev->urb_list[i]);
        }
        dev->urbs_submitted = 0;

        return 0;
}

static int hackrf_submit_urbs(struct hackrf_dev *dev)
{
        int i, ret;

        for (i = 0; i < dev->urbs_initialized; i++) {
                dev_dbg(dev->dev, "submit urb=%d\n", i);
                ret = usb_submit_urb(dev->urb_list[i], GFP_KERNEL);
                if (ret) {
                        dev_err(dev->dev, "Could not submit URB no. %d - get them all back\n",
                                        i);
                        hackrf_kill_urbs(dev);
                        return ret;
                }
                dev->urbs_submitted++;
        }

        return 0;
}

static int hackrf_free_stream_bufs(struct hackrf_dev *dev)
{
        if (dev->flags & USB_STATE_URB_BUF) {
                while (dev->buf_num) {
                        dev->buf_num--;
                        dev_dbg(dev->dev, "free buf=%d\n", dev->buf_num);
                        usb_free_coherent(dev->udev, dev->buf_size,
                                          dev->buf_list[dev->buf_num],
                                          dev->dma_addr[dev->buf_num]);
                }
        }
        dev->flags &= ~USB_STATE_URB_BUF;

        return 0;
}

static int hackrf_alloc_stream_bufs(struct hackrf_dev *dev)
{
        dev->buf_num = 0;
        dev->buf_size = BULK_BUFFER_SIZE;

        dev_dbg(dev->dev, "all in all I will use %u bytes for streaming\n",
                        MAX_BULK_BUFS * BULK_BUFFER_SIZE);

        for (dev->buf_num = 0; dev->buf_num < MAX_BULK_BUFS; dev->buf_num++) {
                dev->buf_list[dev->buf_num] = usb_alloc_coherent(dev->udev,
                                BULK_BUFFER_SIZE, GFP_KERNEL,
                                &dev->dma_addr[dev->buf_num]);
                if (!dev->buf_list[dev->buf_num]) {
                        dev_dbg(dev->dev, "alloc buf=%d failed\n",
                                        dev->buf_num);
                        hackrf_free_stream_bufs(dev);
                        return -ENOMEM;
                }

                dev_dbg(dev->dev, "alloc buf=%d %p (dma %llu)\n", dev->buf_num,
                                dev->buf_list[dev->buf_num],
                                (long long)dev->dma_addr[dev->buf_num]);
                dev->flags |= USB_STATE_URB_BUF;
        }

        return 0;
}

static int hackrf_free_urbs(struct hackrf_dev *dev)
{
        int i;

        hackrf_kill_urbs(dev);

        for (i = dev->urbs_initialized - 1; i >= 0; i--) {
                if (dev->urb_list[i]) {
                        dev_dbg(dev->dev, "free urb=%d\n", i);
                        /* free the URBs */
                        usb_free_urb(dev->urb_list[i]);
                }
        }
        dev->urbs_initialized = 0;

        return 0;
}

static int hackrf_alloc_urbs(struct hackrf_dev *dev, bool rcv)
{
        int i, j;
        unsigned int pipe;
        usb_complete_t complete;

        if (rcv) {
                pipe = usb_rcvbulkpipe(dev->udev, 0x81);
                complete = &hackrf_urb_complete_in;
        } else {
                pipe = usb_sndbulkpipe(dev->udev, 0x02);
                complete = &hackrf_urb_complete_out;
        }

        /* allocate the URBs */
        for (i = 0; i < MAX_BULK_BUFS; i++) {
                dev_dbg(dev->dev, "alloc urb=%d\n", i);
                dev->urb_list[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!dev->urb_list[i]) {
                        for (j = 0; j < i; j++)
                                usb_free_urb(dev->urb_list[j]);
                        return -ENOMEM;
                }
                usb_fill_bulk_urb(dev->urb_list[i],
                                dev->udev,
                                pipe,
                                dev->buf_list[i],
                                BULK_BUFFER_SIZE,
                                complete, dev);

                dev->urb_list[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                dev->urb_list[i]->transfer_dma = dev->dma_addr[i];
                dev->urbs_initialized++;
        }

        return 0;
}

/* The user yanked out the cable... */
static void hackrf_disconnect(struct usb_interface *intf)
{
        struct v4l2_device *v = usb_get_intfdata(intf);
        struct hackrf_dev *dev = container_of(v, struct hackrf_dev, v4l2_dev);

        dev_dbg(dev->dev, "\n");

        mutex_lock(&dev->vb_queue_lock);
        mutex_lock(&dev->v4l2_lock);
        /* No need to keep the urbs around after disconnection */
        dev->udev = NULL;
        v4l2_device_disconnect(&dev->v4l2_dev);
        video_unregister_device(&dev->tx_vdev);
        video_unregister_device(&dev->rx_vdev);
        mutex_unlock(&dev->v4l2_lock);
        mutex_unlock(&dev->vb_queue_lock);

        v4l2_device_put(&dev->v4l2_dev);
}

/* Videobuf2 operations */
static void hackrf_return_all_buffers(struct vb2_queue *vq,
                                      enum vb2_buffer_state state)
{
        struct hackrf_dev *dev = vb2_get_drv_priv(vq);
        struct usb_interface *intf = dev->intf;
        struct hackrf_buffer *buffer, *node;
        struct list_head *buffer_list;
        unsigned long flags;

        dev_dbg(&intf->dev, "\n");

        if (vq->type == V4L2_BUF_TYPE_SDR_CAPTURE)
                buffer_list = &dev->rx_buffer_list;
        else
                buffer_list = &dev->tx_buffer_list;

        spin_lock_irqsave(&dev->buffer_list_lock, flags);
        list_for_each_entry_safe(buffer, node, buffer_list, list) {
                dev_dbg(&intf->dev, "list_for_each_entry_safe\n");
                vb2_buffer_done(&buffer->vb.vb2_buf, state);
                list_del(&buffer->list);
        }
        spin_unlock_irqrestore(&dev->buffer_list_lock, flags);
}

static int hackrf_queue_setup(struct vb2_queue *vq,
                unsigned int *nbuffers,
                unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[])
{
        struct hackrf_dev *dev = vb2_get_drv_priv(vq);
        unsigned int q_num_bufs = vb2_get_num_buffers(vq);

        dev_dbg(dev->dev, "nbuffers=%d\n", *nbuffers);

        /* Need at least 8 buffers */
        if (q_num_bufs + *nbuffers < 8)
                *nbuffers = 8 - q_num_bufs;
        *nplanes = 1;
        sizes[0] = PAGE_ALIGN(dev->buffersize);

        dev_dbg(dev->dev, "nbuffers=%d sizes[0]=%d\n", *nbuffers, sizes[0]);
        return 0;
}

static void hackrf_buf_queue(struct vb2_buffer *vb)
{
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct vb2_queue *vq = vb->vb2_queue;
        struct hackrf_dev *dev = vb2_get_drv_priv(vq);
        struct hackrf_buffer *buffer = container_of(vbuf, struct hackrf_buffer, vb);
        struct list_head *buffer_list;
        unsigned long flags;

        dev_dbg_ratelimited(&dev->intf->dev, "\n");

        if (vq->type == V4L2_BUF_TYPE_SDR_CAPTURE)
                buffer_list = &dev->rx_buffer_list;
        else
                buffer_list = &dev->tx_buffer_list;

        spin_lock_irqsave(&dev->buffer_list_lock, flags);
        list_add_tail(&buffer->list, buffer_list);
        spin_unlock_irqrestore(&dev->buffer_list_lock, flags);
}

static int hackrf_start_streaming(struct vb2_queue *vq, unsigned int count)
{
        struct hackrf_dev *dev = vb2_get_drv_priv(vq);
        struct usb_interface *intf = dev->intf;
        int ret;
        unsigned int mode;

        dev_dbg(&intf->dev, "count=%i\n", count);

        mutex_lock(&dev->v4l2_lock);

        /* Allow only RX or TX, not both same time */
        if (vq->type == V4L2_BUF_TYPE_SDR_CAPTURE) {
                if (test_bit(TX_ON, &dev->flags)) {
                        ret = -EBUSY;
                        goto err_hackrf_return_all_buffers;
                }

                mode = 1;
                set_bit(RX_ON, &dev->flags);
        } else {
                if (test_bit(RX_ON, &dev->flags)) {
                        ret = -EBUSY;
                        goto err_hackrf_return_all_buffers;
                }

                mode = 2;
                set_bit(TX_ON, &dev->flags);
        }

        dev->sequence = 0;

        ret = hackrf_alloc_stream_bufs(dev);
        if (ret)
                goto err;

        ret = hackrf_alloc_urbs(dev, (mode == 1));
        if (ret)
                goto err;

        ret = hackrf_submit_urbs(dev);
        if (ret)
                goto err;

        ret = hackrf_set_params(dev);
        if (ret)
                goto err;

        /* start hardware streaming */
        ret = hackrf_ctrl_msg(dev, CMD_SET_TRANSCEIVER_MODE, mode, 0, NULL, 0);
        if (ret)
                goto err;

        mutex_unlock(&dev->v4l2_lock);

        return 0;
err:
        hackrf_kill_urbs(dev);
        hackrf_free_urbs(dev);
        hackrf_free_stream_bufs(dev);
        clear_bit(RX_ON, &dev->flags);
        clear_bit(TX_ON, &dev->flags);
err_hackrf_return_all_buffers:
        hackrf_return_all_buffers(vq, VB2_BUF_STATE_QUEUED);
        mutex_unlock(&dev->v4l2_lock);
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

static void hackrf_stop_streaming(struct vb2_queue *vq)
{
        struct hackrf_dev *dev = vb2_get_drv_priv(vq);
        struct usb_interface *intf = dev->intf;

        dev_dbg(&intf->dev, "\n");

        mutex_lock(&dev->v4l2_lock);

        /* stop hardware streaming */
        hackrf_ctrl_msg(dev, CMD_SET_TRANSCEIVER_MODE, 0, 0, NULL, 0);

        hackrf_kill_urbs(dev);
        hackrf_free_urbs(dev);
        hackrf_free_stream_bufs(dev);

        hackrf_return_all_buffers(vq, VB2_BUF_STATE_ERROR);

        if (vq->type == V4L2_BUF_TYPE_SDR_CAPTURE)
                clear_bit(RX_ON, &dev->flags);
        else
                clear_bit(TX_ON, &dev->flags);

        mutex_unlock(&dev->v4l2_lock);
}

static const struct vb2_ops hackrf_vb2_ops = {
        .queue_setup            = hackrf_queue_setup,
        .buf_queue              = hackrf_buf_queue,
        .start_streaming        = hackrf_start_streaming,
        .stop_streaming         = hackrf_stop_streaming,
        .wait_prepare           = vb2_ops_wait_prepare,
        .wait_finish            = vb2_ops_wait_finish,
};

static int hackrf_querycap(struct file *file, void *fh,
                struct v4l2_capability *cap)
{
        struct hackrf_dev *dev = video_drvdata(file);
        struct usb_interface *intf = dev->intf;

        dev_dbg(&intf->dev, "\n");

        cap->capabilities = V4L2_CAP_SDR_CAPTURE | V4L2_CAP_TUNER |
                            V4L2_CAP_SDR_OUTPUT | V4L2_CAP_MODULATOR |
                            V4L2_CAP_STREAMING | V4L2_CAP_READWRITE |
                            V4L2_CAP_DEVICE_CAPS;
        strscpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
        strscpy(cap->card, dev->rx_vdev.name, sizeof(cap->card));
        usb_make_path(dev->udev, cap->bus_info, sizeof(cap->bus_info));

        return 0;
}

static int hackrf_s_fmt_sdr(struct file *file, void *priv,
                            struct v4l2_format *f)
{
        struct hackrf_dev *dev = video_drvdata(file);
        struct video_device *vdev = video_devdata(file);
        struct vb2_queue *q;
        int i;

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                        (char *)&f->fmt.sdr.pixelformat);

        if (vdev->vfl_dir == VFL_DIR_RX)
                q = &dev->rx_vb2_queue;
        else
                q = &dev->tx_vb2_queue;

        if (vb2_is_busy(q))
                return -EBUSY;

        for (i = 0; i < NUM_FORMATS; i++) {
                if (f->fmt.sdr.pixelformat == formats[i].pixelformat) {
                        dev->pixelformat = formats[i].pixelformat;
                        dev->buffersize = formats[i].buffersize;
                        f->fmt.sdr.buffersize = formats[i].buffersize;
                        return 0;
                }
        }

        dev->pixelformat = formats[0].pixelformat;
        dev->buffersize = formats[0].buffersize;
        f->fmt.sdr.pixelformat = formats[0].pixelformat;
        f->fmt.sdr.buffersize = formats[0].buffersize;

        return 0;
}

static int hackrf_g_fmt_sdr(struct file *file, void *priv,
                            struct v4l2_format *f)
{
        struct hackrf_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                        (char *)&dev->pixelformat);

        f->fmt.sdr.pixelformat = dev->pixelformat;
        f->fmt.sdr.buffersize = dev->buffersize;

        return 0;
}

static int hackrf_try_fmt_sdr(struct file *file, void *priv,
                              struct v4l2_format *f)
{
        struct hackrf_dev *dev = video_drvdata(file);
        int i;

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                        (char *)&f->fmt.sdr.pixelformat);

        for (i = 0; i < NUM_FORMATS; i++) {
                if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
                        f->fmt.sdr.buffersize = formats[i].buffersize;
                        return 0;
                }
        }

        f->fmt.sdr.pixelformat = formats[0].pixelformat;
        f->fmt.sdr.buffersize = formats[0].buffersize;

        return 0;
}

static int hackrf_enum_fmt_sdr(struct file *file, void *priv,
                               struct v4l2_fmtdesc *f)
{
        struct hackrf_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "index=%d\n", f->index);

        if (f->index >= NUM_FORMATS)
                return -EINVAL;

        f->pixelformat = formats[f->index].pixelformat;

        return 0;
}

static int hackrf_s_tuner(struct file *file, void *priv,
                const struct v4l2_tuner *v)
{
        struct hackrf_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "index=%d\n", v->index);

        if (v->index == 0)
                ret = 0;
        else if (v->index == 1)
                ret = 0;
        else
                ret = -EINVAL;

        return ret;
}

static int hackrf_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v)
{
        struct hackrf_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "index=%d\n", v->index);

        if (v->index == 0) {
                strscpy(v->name, "HackRF ADC", sizeof(v->name));
                v->type = V4L2_TUNER_SDR;
                v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
                v->rangelow  = bands_adc_dac[0].rangelow;
                v->rangehigh = bands_adc_dac[0].rangehigh;
                ret = 0;
        } else if (v->index == 1) {
                strscpy(v->name, "HackRF RF", sizeof(v->name));
                v->type = V4L2_TUNER_RF;
                v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
                v->rangelow  = bands_rx_tx[0].rangelow;
                v->rangehigh = bands_rx_tx[0].rangehigh;
                ret = 0;
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static int hackrf_s_modulator(struct file *file, void *fh,
                              const struct v4l2_modulator *a)
{
        struct hackrf_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "index=%d\n", a->index);

        return a->index > 1 ? -EINVAL : 0;
}

static int hackrf_g_modulator(struct file *file, void *fh,
                              struct v4l2_modulator *a)
{
        struct hackrf_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "index=%d\n", a->index);

        if (a->index == 0) {
                strscpy(a->name, "HackRF DAC", sizeof(a->name));
                a->type = V4L2_TUNER_SDR;
                a->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
                a->rangelow  = bands_adc_dac[0].rangelow;
                a->rangehigh = bands_adc_dac[0].rangehigh;
                ret = 0;
        } else if (a->index == 1) {
                strscpy(a->name, "HackRF RF", sizeof(a->name));
                a->type = V4L2_TUNER_RF;
                a->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
                a->rangelow  = bands_rx_tx[0].rangelow;
                a->rangehigh = bands_rx_tx[0].rangehigh;
                ret = 0;
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static int hackrf_s_frequency(struct file *file, void *priv,
                const struct v4l2_frequency *f)
{
        struct hackrf_dev *dev = video_drvdata(file);
        struct usb_interface *intf = dev->intf;
        struct video_device *vdev = video_devdata(file);
        int ret;
        unsigned int uitmp;

        dev_dbg(&intf->dev, "tuner=%d type=%d frequency=%u\n",
                        f->tuner, f->type, f->frequency);

        if (f->tuner == 0) {
                uitmp = clamp(f->frequency, bands_adc_dac[0].rangelow,
                              bands_adc_dac[0].rangehigh);
                if (vdev->vfl_dir == VFL_DIR_RX) {
                        dev->f_adc = uitmp;
                        set_bit(RX_ADC_FREQUENCY, &dev->flags);
                } else {
                        dev->f_dac = uitmp;
                        set_bit(TX_DAC_FREQUENCY, &dev->flags);
                }
        } else if (f->tuner == 1) {
                uitmp = clamp(f->frequency, bands_rx_tx[0].rangelow,
                              bands_rx_tx[0].rangehigh);
                if (vdev->vfl_dir == VFL_DIR_RX) {
                        dev->f_rx = uitmp;
                        set_bit(RX_RF_FREQUENCY, &dev->flags);
                } else {
                        dev->f_tx = uitmp;
                        set_bit(TX_RF_FREQUENCY, &dev->flags);
                }
        } else {
                ret = -EINVAL;
                goto err;
        }

        ret = hackrf_set_params(dev);
        if (ret)
                goto err;

        return 0;
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

static int hackrf_g_frequency(struct file *file, void *priv,
                struct v4l2_frequency *f)
{
        struct hackrf_dev *dev = video_drvdata(file);
        struct usb_interface *intf = dev->intf;
        struct video_device *vdev = video_devdata(file);
        int ret;

        dev_dbg(dev->dev, "tuner=%d type=%d\n", f->tuner, f->type);

        if (f->tuner == 0) {
                f->type = V4L2_TUNER_SDR;
                if (vdev->vfl_dir == VFL_DIR_RX)
                        f->frequency = dev->f_adc;
                else
                        f->frequency = dev->f_dac;
        } else if (f->tuner == 1) {
                f->type = V4L2_TUNER_RF;
                if (vdev->vfl_dir == VFL_DIR_RX)
                        f->frequency = dev->f_rx;
                else
                        f->frequency = dev->f_tx;
        } else {
                ret = -EINVAL;
                goto err;
        }

        return 0;
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

static int hackrf_enum_freq_bands(struct file *file, void *priv,
                struct v4l2_frequency_band *band)
{
        struct hackrf_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "tuner=%d type=%d index=%d\n",
                        band->tuner, band->type, band->index);

        if (band->tuner == 0) {
                if (band->index >= ARRAY_SIZE(bands_adc_dac)) {
                        ret = -EINVAL;
                } else {
                        *band = bands_adc_dac[band->index];
                        ret = 0;
                }
        } else if (band->tuner == 1) {
                if (band->index >= ARRAY_SIZE(bands_rx_tx)) {
                        ret = -EINVAL;
                } else {
                        *band = bands_rx_tx[band->index];
                        ret = 0;
                }
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static const struct v4l2_ioctl_ops hackrf_ioctl_ops = {
        .vidioc_querycap          = hackrf_querycap,

        .vidioc_s_fmt_sdr_cap     = hackrf_s_fmt_sdr,
        .vidioc_g_fmt_sdr_cap     = hackrf_g_fmt_sdr,
        .vidioc_enum_fmt_sdr_cap  = hackrf_enum_fmt_sdr,
        .vidioc_try_fmt_sdr_cap   = hackrf_try_fmt_sdr,

        .vidioc_s_fmt_sdr_out     = hackrf_s_fmt_sdr,
        .vidioc_g_fmt_sdr_out     = hackrf_g_fmt_sdr,
        .vidioc_enum_fmt_sdr_out  = hackrf_enum_fmt_sdr,
        .vidioc_try_fmt_sdr_out   = hackrf_try_fmt_sdr,

        .vidioc_reqbufs           = vb2_ioctl_reqbufs,
        .vidioc_create_bufs       = vb2_ioctl_create_bufs,
        .vidioc_prepare_buf       = vb2_ioctl_prepare_buf,
        .vidioc_querybuf          = vb2_ioctl_querybuf,
        .vidioc_qbuf              = vb2_ioctl_qbuf,
        .vidioc_dqbuf             = vb2_ioctl_dqbuf,
        .vidioc_expbuf            = vb2_ioctl_expbuf,

        .vidioc_streamon          = vb2_ioctl_streamon,
        .vidioc_streamoff         = vb2_ioctl_streamoff,

        .vidioc_s_tuner           = hackrf_s_tuner,
        .vidioc_g_tuner           = hackrf_g_tuner,

        .vidioc_s_modulator       = hackrf_s_modulator,
        .vidioc_g_modulator       = hackrf_g_modulator,

        .vidioc_s_frequency       = hackrf_s_frequency,
        .vidioc_g_frequency       = hackrf_g_frequency,
        .vidioc_enum_freq_bands   = hackrf_enum_freq_bands,

        .vidioc_subscribe_event   = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
        .vidioc_log_status        = v4l2_ctrl_log_status,
};

static const struct v4l2_file_operations hackrf_fops = {
        .owner                    = THIS_MODULE,
        .open                     = v4l2_fh_open,
        .release                  = vb2_fop_release,
        .read                     = vb2_fop_read,
        .write                    = vb2_fop_write,
        .poll                     = vb2_fop_poll,
        .mmap                     = vb2_fop_mmap,
        .unlocked_ioctl           = video_ioctl2,
};

static const struct video_device hackrf_template = {
        .name                     = "HackRF One",
        .release                  = video_device_release_empty,
        .fops                     = &hackrf_fops,
        .ioctl_ops                = &hackrf_ioctl_ops,
};

static void hackrf_video_release(struct v4l2_device *v)
{
        struct hackrf_dev *dev = container_of(v, struct hackrf_dev, v4l2_dev);

        dev_dbg(dev->dev, "\n");

        v4l2_ctrl_handler_free(&dev->rx_ctrl_handler);
        v4l2_ctrl_handler_free(&dev->tx_ctrl_handler);
        v4l2_device_unregister(&dev->v4l2_dev);
        kfree(dev);
}

static int hackrf_s_ctrl_rx(struct v4l2_ctrl *ctrl)
{
        struct hackrf_dev *dev = container_of(ctrl->handler,
                        struct hackrf_dev, rx_ctrl_handler);
        struct usb_interface *intf = dev->intf;
        int ret;

        switch (ctrl->id) {
        case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
        case V4L2_CID_RF_TUNER_BANDWIDTH:
                set_bit(RX_BANDWIDTH, &dev->flags);
                break;
        case  V4L2_CID_RF_TUNER_RF_GAIN:
                set_bit(RX_RF_GAIN, &dev->flags);
                break;
        case  V4L2_CID_RF_TUNER_LNA_GAIN:
                set_bit(RX_LNA_GAIN, &dev->flags);
                break;
        case  V4L2_CID_RF_TUNER_IF_GAIN:
                set_bit(RX_IF_GAIN, &dev->flags);
                break;
        default:
                dev_dbg(&intf->dev, "unknown ctrl: id=%d name=%s\n",
                        ctrl->id, ctrl->name);
                ret = -EINVAL;
                goto err;
        }

        ret = hackrf_set_params(dev);
        if (ret)
                goto err;

        return 0;
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

static int hackrf_s_ctrl_tx(struct v4l2_ctrl *ctrl)
{
        struct hackrf_dev *dev = container_of(ctrl->handler,
                        struct hackrf_dev, tx_ctrl_handler);
        struct usb_interface *intf = dev->intf;
        int ret;

        switch (ctrl->id) {
        case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
        case V4L2_CID_RF_TUNER_BANDWIDTH:
                set_bit(TX_BANDWIDTH, &dev->flags);
                break;
        case  V4L2_CID_RF_TUNER_LNA_GAIN:
                set_bit(TX_LNA_GAIN, &dev->flags);
                break;
        case  V4L2_CID_RF_TUNER_RF_GAIN:
                set_bit(TX_RF_GAIN, &dev->flags);
                break;
        default:
                dev_dbg(&intf->dev, "unknown ctrl: id=%d name=%s\n",
                        ctrl->id, ctrl->name);
                ret = -EINVAL;
                goto err;
        }

        ret = hackrf_set_params(dev);
        if (ret)
                goto err;

        return 0;
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

static const struct v4l2_ctrl_ops hackrf_ctrl_ops_rx = {
        .s_ctrl = hackrf_s_ctrl_rx,
};

static const struct v4l2_ctrl_ops hackrf_ctrl_ops_tx = {
        .s_ctrl = hackrf_s_ctrl_tx,
};

static int hackrf_probe(struct usb_interface *intf,
                const struct usb_device_id *id)
{
        struct hackrf_dev *dev;
        int ret;
        u8 u8tmp, buf[BUF_SIZE];

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev) {
                ret = -ENOMEM;
                goto err;
        }

        mutex_init(&dev->v4l2_lock);
        mutex_init(&dev->vb_queue_lock);
        spin_lock_init(&dev->buffer_list_lock);
        INIT_LIST_HEAD(&dev->rx_buffer_list);
        INIT_LIST_HEAD(&dev->tx_buffer_list);
        dev->intf = intf;
        dev->dev = &intf->dev;
        dev->udev = interface_to_usbdev(intf);
        dev->pixelformat = formats[0].pixelformat;
        dev->buffersize = formats[0].buffersize;
        dev->f_adc = bands_adc_dac[0].rangelow;
        dev->f_dac = bands_adc_dac[0].rangelow;
        dev->f_rx = bands_rx_tx[0].rangelow;
        dev->f_tx = bands_rx_tx[0].rangelow;
        set_bit(RX_ADC_FREQUENCY, &dev->flags);
        set_bit(TX_DAC_FREQUENCY, &dev->flags);
        set_bit(RX_RF_FREQUENCY, &dev->flags);
        set_bit(TX_RF_FREQUENCY, &dev->flags);

        /* Detect device */
        ret = hackrf_ctrl_msg(dev, CMD_BOARD_ID_READ, 0, 0, &u8tmp, 1);
        if (ret == 0)
                ret = hackrf_ctrl_msg(dev, CMD_VERSION_STRING_READ, 0, 0,
                                buf, BUF_SIZE);
        if (ret) {
                dev_err(dev->dev, "Could not detect board\n");
                goto err_kfree;
        }

        buf[BUF_SIZE - 1] = '\0';
        dev_info(dev->dev, "Board ID: %02x\n", u8tmp);
        dev_info(dev->dev, "Firmware version: %s\n", buf);

        /* Init vb2 queue structure for receiver */
        dev->rx_vb2_queue.type = V4L2_BUF_TYPE_SDR_CAPTURE;
        dev->rx_vb2_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF |
                                     VB2_READ;
        dev->rx_vb2_queue.ops = &hackrf_vb2_ops;
        dev->rx_vb2_queue.mem_ops = &vb2_vmalloc_memops;
        dev->rx_vb2_queue.drv_priv = dev;
        dev->rx_vb2_queue.buf_struct_size = sizeof(struct hackrf_buffer);
        dev->rx_vb2_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        ret = vb2_queue_init(&dev->rx_vb2_queue);
        if (ret) {
                dev_err(dev->dev, "Could not initialize rx vb2 queue\n");
                goto err_kfree;
        }

        /* Init vb2 queue structure for transmitter */
        dev->tx_vb2_queue.type = V4L2_BUF_TYPE_SDR_OUTPUT;
        dev->tx_vb2_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF |
                                     VB2_WRITE;
        dev->tx_vb2_queue.ops = &hackrf_vb2_ops;
        dev->tx_vb2_queue.mem_ops = &vb2_vmalloc_memops;
        dev->tx_vb2_queue.drv_priv = dev;
        dev->tx_vb2_queue.buf_struct_size = sizeof(struct hackrf_buffer);
        dev->tx_vb2_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        ret = vb2_queue_init(&dev->tx_vb2_queue);
        if (ret) {
                dev_err(dev->dev, "Could not initialize tx vb2 queue\n");
                goto err_kfree;
        }

        /* Register controls for receiver */
        v4l2_ctrl_handler_init(&dev->rx_ctrl_handler, 5);
        dev->rx_bandwidth_auto = v4l2_ctrl_new_std(&dev->rx_ctrl_handler,
                &hackrf_ctrl_ops_rx, V4L2_CID_RF_TUNER_BANDWIDTH_AUTO,
                0, 1, 0, 1);
        dev->rx_bandwidth = v4l2_ctrl_new_std(&dev->rx_ctrl_handler,
                &hackrf_ctrl_ops_rx, V4L2_CID_RF_TUNER_BANDWIDTH,
                1750000, 28000000, 50000, 1750000);
        v4l2_ctrl_auto_cluster(2, &dev->rx_bandwidth_auto, 0, false);
        dev->rx_rf_gain = v4l2_ctrl_new_std(&dev->rx_ctrl_handler,
                &hackrf_ctrl_ops_rx, V4L2_CID_RF_TUNER_RF_GAIN, 0, 12, 12, 0);
        dev->rx_lna_gain = v4l2_ctrl_new_std(&dev->rx_ctrl_handler,
                &hackrf_ctrl_ops_rx, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 40, 8, 0);
        dev->rx_if_gain = v4l2_ctrl_new_std(&dev->rx_ctrl_handler,
                &hackrf_ctrl_ops_rx, V4L2_CID_RF_TUNER_IF_GAIN, 0, 62, 2, 0);
        if (dev->rx_ctrl_handler.error) {
                ret = dev->rx_ctrl_handler.error;
                dev_err(dev->dev, "Could not initialize controls\n");
                goto err_v4l2_ctrl_handler_free_rx;
        }
        v4l2_ctrl_grab(dev->rx_rf_gain, !hackrf_enable_rf_gain_ctrl);
        v4l2_ctrl_handler_setup(&dev->rx_ctrl_handler);

        /* Register controls for transmitter */
        v4l2_ctrl_handler_init(&dev->tx_ctrl_handler, 4);
        dev->tx_bandwidth_auto = v4l2_ctrl_new_std(&dev->tx_ctrl_handler,
                &hackrf_ctrl_ops_tx, V4L2_CID_RF_TUNER_BANDWIDTH_AUTO,
                0, 1, 0, 1);
        dev->tx_bandwidth = v4l2_ctrl_new_std(&dev->tx_ctrl_handler,
                &hackrf_ctrl_ops_tx, V4L2_CID_RF_TUNER_BANDWIDTH,
                1750000, 28000000, 50000, 1750000);
        v4l2_ctrl_auto_cluster(2, &dev->tx_bandwidth_auto, 0, false);
        dev->tx_lna_gain = v4l2_ctrl_new_std(&dev->tx_ctrl_handler,
                &hackrf_ctrl_ops_tx, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 47, 1, 0);
        dev->tx_rf_gain = v4l2_ctrl_new_std(&dev->tx_ctrl_handler,
                &hackrf_ctrl_ops_tx, V4L2_CID_RF_TUNER_RF_GAIN, 0, 15, 15, 0);
        if (dev->tx_ctrl_handler.error) {
                ret = dev->tx_ctrl_handler.error;
                dev_err(dev->dev, "Could not initialize controls\n");
                goto err_v4l2_ctrl_handler_free_tx;
        }
        v4l2_ctrl_grab(dev->tx_rf_gain, !hackrf_enable_rf_gain_ctrl);
        v4l2_ctrl_handler_setup(&dev->tx_ctrl_handler);

        /* Register the v4l2_device structure */
        dev->v4l2_dev.release = hackrf_video_release;
        ret = v4l2_device_register(&intf->dev, &dev->v4l2_dev);
        if (ret) {
                dev_err(dev->dev, "Failed to register v4l2-device (%d)\n", ret);
                goto err_v4l2_ctrl_handler_free_tx;
        }

        /* Init video_device structure for receiver */
        dev->rx_vdev = hackrf_template;
        dev->rx_vdev.queue = &dev->rx_vb2_queue;
        dev->rx_vdev.queue->lock = &dev->vb_queue_lock;
        dev->rx_vdev.v4l2_dev = &dev->v4l2_dev;
        dev->rx_vdev.ctrl_handler = &dev->rx_ctrl_handler;
        dev->rx_vdev.lock = &dev->v4l2_lock;
        dev->rx_vdev.vfl_dir = VFL_DIR_RX;
        dev->rx_vdev.device_caps = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE |
                                   V4L2_CAP_SDR_CAPTURE | V4L2_CAP_TUNER;
        video_set_drvdata(&dev->rx_vdev, dev);
        ret = video_register_device(&dev->rx_vdev, VFL_TYPE_SDR, -1);
        if (ret) {
                dev_err(dev->dev,
                        "Failed to register as video device (%d)\n", ret);
                goto err_v4l2_device_unregister;
        }
        dev_info(dev->dev, "Registered as %s\n",
                 video_device_node_name(&dev->rx_vdev));

        /* Init video_device structure for transmitter */
        dev->tx_vdev = hackrf_template;
        dev->tx_vdev.queue = &dev->tx_vb2_queue;
        dev->tx_vdev.queue->lock = &dev->vb_queue_lock;
        dev->tx_vdev.v4l2_dev = &dev->v4l2_dev;
        dev->tx_vdev.ctrl_handler = &dev->tx_ctrl_handler;
        dev->tx_vdev.lock = &dev->v4l2_lock;
        dev->tx_vdev.vfl_dir = VFL_DIR_TX;
        dev->tx_vdev.device_caps = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE |
                                   V4L2_CAP_SDR_OUTPUT | V4L2_CAP_MODULATOR;
        video_set_drvdata(&dev->tx_vdev, dev);
        ret = video_register_device(&dev->tx_vdev, VFL_TYPE_SDR, -1);
        if (ret) {
                dev_err(dev->dev,
                        "Failed to register as video device (%d)\n", ret);
                goto err_video_unregister_device_rx;
        }
        dev_info(dev->dev, "Registered as %s\n",
                 video_device_node_name(&dev->tx_vdev));

        dev_notice(dev->dev, "SDR API is still slightly experimental and functionality changes may follow\n");
        return 0;
err_video_unregister_device_rx:
        video_unregister_device(&dev->rx_vdev);
err_v4l2_device_unregister:
        v4l2_device_unregister(&dev->v4l2_dev);
err_v4l2_ctrl_handler_free_tx:
        v4l2_ctrl_handler_free(&dev->tx_ctrl_handler);
err_v4l2_ctrl_handler_free_rx:
        v4l2_ctrl_handler_free(&dev->rx_ctrl_handler);
err_kfree:
        kfree(dev);
err:
        dev_dbg(&intf->dev, "failed=%d\n", ret);
        return ret;
}

/* USB device ID list */
static const struct usb_device_id hackrf_id_table[] = {
        { USB_DEVICE(0x1d50, 0x6089) }, /* HackRF One */
        { }
};
MODULE_DEVICE_TABLE(usb, hackrf_id_table);

/* USB subsystem interface */
static struct usb_driver hackrf_driver = {
        .name                     = KBUILD_MODNAME,
        .probe                    = hackrf_probe,
        .disconnect               = hackrf_disconnect,
        .id_table                 = hackrf_id_table,
};

module_usb_driver(hackrf_driver);

MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
MODULE_DESCRIPTION("HackRF");
MODULE_LICENSE("GPL");

























































































































    9 





    9 

    9 


    9 























    9 





    9 



























    9 















    9 
    9 

    9 






    9 









    9 

    2 

    9 
    9 
    9 









    9 


    9 













































































































































































































    9 






















































    9 







    9 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
// SPDX-License-Identifier: GPL-2.0
/*
 * FPU signal frame handling routines.
 */

#include <linux/compat.h>
#include <linux/cpu.h>
#include <linux/pagemap.h>

#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>

#include <asm/sigframe.h>
#include <asm/trapnr.h>
#include <asm/trace/fpu.h>

#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"

/*
 * Check for the presence of extended state information in the
 * user fpstate pointer in the sigcontext.
 */
static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
                                            struct _fpx_sw_bytes *fx_sw)
{
        int min_xstate_size = sizeof(struct fxregs_state) +
                              sizeof(struct xstate_header);
        void __user *fpstate = fxbuf;
        unsigned int magic2;

        if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw)))
                return false;

        /* Check for the first magic field and other error scenarios. */
        if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
            fx_sw->xstate_size < min_xstate_size ||
            fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
            fx_sw->xstate_size > fx_sw->extended_size)
                goto setfx;

        /*
         * Check for the presence of second magic word at the end of memory
         * layout. This detects the case where the user just copied the legacy
         * fpstate layout with out copying the extended state information
         * in the memory layout.
         */
        if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)))
                return false;

        if (likely(magic2 == FP_XSTATE_MAGIC2))
                return true;
setfx:
        trace_x86_fpu_xstate_check_failed(&current->thread.fpu);

        /* Set the parameters for fx only state */
        fx_sw->magic1 = 0;
        fx_sw->xstate_size = sizeof(struct fxregs_state);
        fx_sw->xfeatures = XFEATURE_MASK_FPSSE;
        return true;
}

/*
 * Signal frame handlers.
 */
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
        if (use_fxsr()) {
                struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
                struct user_i387_ia32_struct env;
                struct _fpstate_32 __user *fp = buf;

                fpregs_lock();
                if (!test_thread_flag(TIF_NEED_FPU_LOAD))
                        fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
                fpregs_unlock();

                convert_from_fxsr(&env, tsk);

                if (__copy_to_user(buf, &env, sizeof(env)) ||
                    __put_user(xsave->i387.swd, &fp->status) ||
                    __put_user(X86_FXSR_MAGIC, &fp->magic))
                        return false;
        } else {
                struct fregs_state __user *fp = buf;
                u32 swd;

                if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
                        return false;
        }

        return true;
}

/*
 * Prepare the SW reserved portion of the fxsave memory layout, indicating
 * the presence of the extended state information in the memory layout
 * pointed to by the fpstate pointer in the sigcontext.
 * This is saved when ever the FP and extended state context is
 * saved on the user stack during the signal handler delivery to the user.
 */
static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame,
                                 struct fpstate *fpstate)
{
        sw_bytes->magic1 = FP_XSTATE_MAGIC1;
        sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE;
        sw_bytes->xfeatures = fpstate->user_xfeatures;
        sw_bytes->xstate_size = fpstate->user_size;

        if (ia32_frame)
                sw_bytes->extended_size += sizeof(struct fregs_state);
}

static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
                                      struct fpstate *fpstate)
{
        struct xregs_state __user *x = buf;
        struct _fpx_sw_bytes sw_bytes = {};
        u32 xfeatures;
        int err;

        /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
        save_sw_bytes(&sw_bytes, ia32_frame, fpstate);
        err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes));

        if (!use_xsave())
                return !err;

        err |= __put_user(FP_XSTATE_MAGIC2,
                          (__u32 __user *)(buf + fpstate->user_size));

        /*
         * Read the xfeatures which we copied (directly from the cpu or
         * from the state in task struct) to the user buffers.
         */
        err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);

        /*
         * For legacy compatible, we always set FP/SSE bits in the bit
         * vector while saving the state to the user context. This will
         * enable us capturing any changes(during sigreturn) to
         * the FP/SSE bits by the legacy applications which don't touch
         * xfeatures in the xsave header.
         *
         * xsave aware apps can change the xfeatures in the xsave
         * header as well as change any contents in the memory layout.
         * xrestore as part of sigreturn will capture all the changes.
         */
        xfeatures |= XFEATURE_MASK_FPSSE;

        err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);

        return !err;
}

static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
{
        if (use_xsave())
                return xsave_to_user_sigframe(buf);
        if (use_fxsr())
                return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
        else
                return fnsave_to_user_sigframe((struct fregs_state __user *) buf);
}

/*
 * Save the fpu, extended register state to the user signal frame.
 *
 * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
 *  state is copied.
 *  'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
 *
 *        buf == buf_fx for 64-bit frames and 32-bit fsave frame.
 *        buf != buf_fx for 32-bit frames with fxstate.
 *
 * Save it directly to the user frame with disabled page fault handler. If
 * that faults, try to clear the frame which handles the page fault.
 *
 * If this is a 32-bit frame with fxstate, put a fsave header before
 * the aligned state at 'buf_fx'.
 *
 * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
 * indicating the absence/presence of the extended state to the user.
 */
bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
        struct task_struct *tsk = current;
        struct fpstate *fpstate = tsk->thread.fpu.fpstate;
        bool ia32_fxstate = (buf != buf_fx);
        int ret;

        ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
                         IS_ENABLED(CONFIG_IA32_EMULATION));

        if (!static_cpu_has(X86_FEATURE_FPU)) {
                struct user_i387_ia32_struct fp;

                fpregs_soft_get(current, NULL, (struct membuf){.p = &fp,
                                                .left = sizeof(fp)});
                return !copy_to_user(buf, &fp, sizeof(fp));
        }

        if (!access_ok(buf, size))
                return false;

        if (use_xsave()) {
                struct xregs_state __user *xbuf = buf_fx;

                /*
                 * Clear the xsave header first, so that reserved fields are
                 * initialized to zero.
                 */
                if (__clear_user(&xbuf->header, sizeof(xbuf->header)))
                        return false;
        }
retry:
        /*
         * Load the FPU registers if they are not valid for the current task.
         * With a valid FPU state we can attempt to save the state directly to
         * userland's stack frame which will likely succeed. If it does not,
         * resolve the fault in the user memory and try again.
         */
        fpregs_lock();
        if (test_thread_flag(TIF_NEED_FPU_LOAD))
                fpregs_restore_userregs();

        pagefault_disable();
        ret = copy_fpregs_to_sigframe(buf_fx);
        pagefault_enable();
        fpregs_unlock();

        if (ret) {
                if (!__clear_user(buf_fx, fpstate->user_size))
                        goto retry;
                return false;
        }

        /* Save the fsave header for the 32-bit frames. */
        if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf))
                return false;

        if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate))
                return false;

        return true;
}

static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
                                      u64 xrestore, bool fx_only)
{
        if (use_xsave()) {
                u64 init_bv = ufeatures & ~xrestore;
                int ret;

                if (likely(!fx_only))
                        ret = xrstor_from_user_sigframe(buf, xrestore);
                else
                        ret = fxrstor_from_user_sigframe(buf);

                if (!ret && unlikely(init_bv))
                        os_xrstor(&init_fpstate, init_bv);
                return ret;
        } else if (use_fxsr()) {
                return fxrstor_from_user_sigframe(buf);
        } else {
                return frstor_from_user_sigframe(buf);
        }
}

/*
 * Attempt to restore the FPU registers directly from user memory.
 * Pagefaults are handled and any errors returned are fatal.
 */
static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
        struct fpu *fpu = &current->thread.fpu;
        int ret;

        /* Restore enabled features only. */
        xrestore &= fpu->fpstate->user_xfeatures;
retry:
        fpregs_lock();
        /* Ensure that XFD is up to date */
        xfd_update_state(fpu->fpstate);
        pagefault_disable();
        ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures,
                                         xrestore, fx_only);
        pagefault_enable();

        if (unlikely(ret)) {
                /*
                 * The above did an FPU restore operation, restricted to
                 * the user portion of the registers, and failed, but the
                 * microcode might have modified the FPU registers
                 * nevertheless.
                 *
                 * If the FPU registers do not belong to current, then
                 * invalidate the FPU register state otherwise the task
                 * might preempt current and return to user space with
                 * corrupted FPU registers.
                 */
                if (test_thread_flag(TIF_NEED_FPU_LOAD))
                        __cpu_invalidate_fpregs_state();
                fpregs_unlock();

                /* Try to handle #PF, but anything else is fatal. */
                if (ret != X86_TRAP_PF)
                        return false;

                if (!fault_in_readable(buf, fpu->fpstate->user_size))
                        goto retry;
                return false;
        }

        /*
         * Restore supervisor states: previous context switch etc has done
         * XSAVES and saved the supervisor states in the kernel buffer from
         * which they can be restored now.
         *
         * It would be optimal to handle this with a single XRSTORS, but
         * this does not work because the rest of the FPU registers have
         * been restored from a user buffer directly.
         */
        if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor())
                os_xrstor_supervisor(fpu->fpstate);

        fpregs_mark_activate();
        fpregs_unlock();
        return true;
}

static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
                              bool ia32_fxstate)
{
        struct task_struct *tsk = current;
        struct fpu *fpu = &tsk->thread.fpu;
        struct user_i387_ia32_struct env;
        bool success, fx_only = false;
        union fpregs_state *fpregs;
        u64 user_xfeatures = 0;

        if (use_xsave()) {
                struct _fpx_sw_bytes fx_sw_user;

                if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user))
                        return false;

                fx_only = !fx_sw_user.magic1;
                user_xfeatures = fx_sw_user.xfeatures;
        } else {
                user_xfeatures = XFEATURE_MASK_FPSSE;
        }

        if (likely(!ia32_fxstate)) {
                /* Restore the FPU registers directly from user memory. */
                return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
        }

        /*
         * Copy the legacy state because the FP portion of the FX frame has
         * to be ignored for histerical raisins. The legacy state is folded
         * in once the larger state has been copied.
         */
        if (__copy_from_user(&env, buf, sizeof(env)))
                return false;

        /*
         * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
         * not modified on context switch and that the xstate is considered
         * to be loaded again on return to userland (overriding last_cpu avoids
         * the optimisation).
         */
        fpregs_lock();
        if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
                /*
                 * If supervisor states are available then save the
                 * hardware state in current's fpstate so that the
                 * supervisor state is preserved. Save the full state for
                 * simplicity. There is no point in optimizing this by only
                 * saving the supervisor states and then shuffle them to
                 * the right place in memory. It's ia32 mode. Shrug.
                 */
                if (xfeatures_mask_supervisor())
                        os_xsave(fpu->fpstate);
                set_thread_flag(TIF_NEED_FPU_LOAD);
        }
        __fpu_invalidate_fpregs_state(fpu);
        __cpu_invalidate_fpregs_state();
        fpregs_unlock();

        fpregs = &fpu->fpstate->regs;
        if (use_xsave() && !fx_only) {
                if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
                        return false;
        } else {
                if (__copy_from_user(&fpregs->fxsave, buf_fx,
                                     sizeof(fpregs->fxsave)))
                        return false;

                if (IS_ENABLED(CONFIG_X86_64)) {
                        /* Reject invalid MXCSR values. */
                        if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask)
                                return false;
                } else {
                        /* Mask invalid bits out for historical reasons (broken hardware). */
                        fpregs->fxsave.mxcsr &= mxcsr_feature_mask;
                }

                /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */
                if (use_xsave())
                        fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
        }

        /* Fold the legacy FP storage */
        convert_to_fxsr(&fpregs->fxsave, &env);

        fpregs_lock();
        if (use_xsave()) {
                /*
                 * Remove all UABI feature bits not set in user_xfeatures
                 * from the memory xstate header which makes the full
                 * restore below bring them into init state. This works for
                 * fx_only mode as well because that has only FP and SSE
                 * set in user_xfeatures.
                 *
                 * Preserve supervisor states!
                 */
                u64 mask = user_xfeatures | xfeatures_mask_supervisor();

                fpregs->xsave.header.xfeatures &= mask;
                success = !os_xrstor_safe(fpu->fpstate,
                                          fpu_kernel_cfg.max_features);
        } else {
                success = !fxrstor_safe(&fpregs->fxsave);
        }

        if (likely(success))
                fpregs_mark_activate();

        fpregs_unlock();
        return success;
}

static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
{
        unsigned int size = fpstate->user_size;

        return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size;
}

/*
 * Restore FPU state from a sigframe:
 */
bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
        struct fpu *fpu = &current->thread.fpu;
        void __user *buf_fx = buf;
        bool ia32_fxstate = false;
        bool success = false;
        unsigned int size;

        if (unlikely(!buf)) {
                fpu__clear_user_states(fpu);
                return true;
        }

        size = xstate_sigframe_size(fpu->fpstate);

        ia32_frame &= (IS_ENABLED(CONFIG_X86_32) ||
                       IS_ENABLED(CONFIG_IA32_EMULATION));

        /*
         * Only FXSR enabled systems need the FX state quirk.
         * FRSTOR does not need it and can use the fast path.
         */
        if (ia32_frame && use_fxsr()) {
                buf_fx = buf + sizeof(struct fregs_state);
                size += sizeof(struct fregs_state);
                ia32_fxstate = true;
        }

        if (!access_ok(buf, size))
                goto out;

        if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) {
                success = !fpregs_soft_set(current, NULL, 0,
                                           sizeof(struct user_i387_ia32_struct),
                                           NULL, buf);
        } else {
                success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);
        }

out:
        if (unlikely(!success))
                fpu__clear_user_states(fpu);
        return success;
}

unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
                     unsigned long *buf_fx, unsigned long *size)
{
        unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);

        *buf_fx = sp = round_down(sp - frame_size, 64);
        if (ia32_frame && use_fxsr()) {
                frame_size += sizeof(struct fregs_state);
                sp -= sizeof(struct fregs_state);
        }

        *size = frame_size;

        return sp;
}

unsigned long __init fpu__get_fpstate_size(void)
{
        unsigned long ret = fpu_user_cfg.max_size;

        if (use_xsave())
                ret += FP_XSTATE_MAGIC2_SIZE;

        /*
         * This space is needed on (most) 32-bit kernels, or when a 32-bit
         * app is running on a 64-bit kernel. To keep things simple, just
         * assume the worst case and always include space for 'freg_state',
         * even for 64-bit apps on 64-bit kernels. This wastes a bit of
         * space, but keeps the code simple.
         */
        if ((IS_ENABLED(CONFIG_IA32_EMULATION) ||
             IS_ENABLED(CONFIG_X86_32)) && use_fxsr())
                ret += sizeof(struct fregs_state);

        return ret;
}




































































































































    2 





    1 






























   10 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * tsacct.c - System accounting over taskstats interface
 *
 * Copyright (C) Jay Lan,        <jlan@sgi.com>
 */

#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/sched/cputime.h>
#include <linux/tsacct_kern.h>
#include <linux/acct.h>
#include <linux/jiffies.h>
#include <linux/mm.h>

/*
 * fill in basic accounting fields
 */
void bacct_add_tsk(struct user_namespace *user_ns,
                   struct pid_namespace *pid_ns,
                   struct taskstats *stats, struct task_struct *tsk)
{
        const struct cred *tcred;
        u64 utime, stime, utimescaled, stimescaled;
        u64 now_ns, delta;
        time64_t btime;

        BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);

        /* calculate task elapsed time in nsec */
        now_ns = ktime_get_ns();
        /* store whole group time first */
        delta = now_ns - tsk->group_leader->start_time;
        /* Convert to micro seconds */
        do_div(delta, NSEC_PER_USEC);
        stats->ac_tgetime = delta;
        delta = now_ns - tsk->start_time;
        do_div(delta, NSEC_PER_USEC);
        stats->ac_etime = delta;
        /* Convert to seconds for btime (note y2106 limit) */
        btime = ktime_get_real_seconds() - div_u64(delta, USEC_PER_SEC);
        stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
        stats->ac_btime64 = btime;

        if (tsk->flags & PF_EXITING)
                stats->ac_exitcode = tsk->exit_code;
        if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
                stats->ac_flag |= AFORK;
        if (tsk->flags & PF_SUPERPRIV)
                stats->ac_flag |= ASU;
        if (tsk->flags & PF_DUMPCORE)
                stats->ac_flag |= ACORE;
        if (tsk->flags & PF_SIGNALED)
                stats->ac_flag |= AXSIG;
        stats->ac_nice         = task_nice(tsk);
        stats->ac_sched         = tsk->policy;
        stats->ac_pid         = task_pid_nr_ns(tsk, pid_ns);
        stats->ac_tgid   = task_tgid_nr_ns(tsk, pid_ns);
        rcu_read_lock();
        tcred = __task_cred(tsk);
        stats->ac_uid         = from_kuid_munged(user_ns, tcred->uid);
        stats->ac_gid         = from_kgid_munged(user_ns, tcred->gid);
        stats->ac_ppid         = pid_alive(tsk) ?
                task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
        rcu_read_unlock();

        task_cputime(tsk, &utime, &stime);
        stats->ac_utime = div_u64(utime, NSEC_PER_USEC);
        stats->ac_stime = div_u64(stime, NSEC_PER_USEC);

        task_cputime_scaled(tsk, &utimescaled, &stimescaled);
        stats->ac_utimescaled = div_u64(utimescaled, NSEC_PER_USEC);
        stats->ac_stimescaled = div_u64(stimescaled, NSEC_PER_USEC);

        stats->ac_minflt = tsk->min_flt;
        stats->ac_majflt = tsk->maj_flt;

        strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
}


#ifdef CONFIG_TASK_XACCT

#define KB 1024
#define MB (1024*KB)
#define KB_MASK (~(KB-1))
/*
 * fill in extended accounting fields
 */
void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{
        struct mm_struct *mm;

        /* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
        stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
        do_div(stats->coremem, 1000 * KB);
        stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
        do_div(stats->virtmem, 1000 * KB);
        mm = get_task_mm(p);
        if (mm) {
                /* adjust to KB unit */
                stats->hiwater_rss   = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
                stats->hiwater_vm    = get_mm_hiwater_vm(mm)  * PAGE_SIZE / KB;
                mmput(mm);
        }
        stats->read_char        = p->ioac.rchar & KB_MASK;
        stats->write_char        = p->ioac.wchar & KB_MASK;
        stats->read_syscalls        = p->ioac.syscr & KB_MASK;
        stats->write_syscalls        = p->ioac.syscw & KB_MASK;
#ifdef CONFIG_TASK_IO_ACCOUNTING
        stats->read_bytes        = p->ioac.read_bytes & KB_MASK;
        stats->write_bytes        = p->ioac.write_bytes & KB_MASK;
        stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
#else
        stats->read_bytes        = 0;
        stats->write_bytes        = 0;
        stats->cancelled_write_bytes = 0;
#endif
}
#undef KB
#undef MB

static void __acct_update_integrals(struct task_struct *tsk,
                                    u64 utime, u64 stime)
{
        u64 time, delta;

        if (!likely(tsk->mm))
                return;

        time = stime + utime;
        delta = time - tsk->acct_timexpd;

        if (delta < TICK_NSEC)
                return;

        tsk->acct_timexpd = time;
        /*
         * Divide by 1024 to avoid overflow, and to avoid division.
         * The final unit reported to userspace is Mbyte-usecs,
         * the rest of the math is done in xacct_add_tsk.
         */
        tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
        tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10;
}

/**
 * acct_update_integrals - update mm integral fields in task_struct
 * @tsk: task_struct for accounting
 */
void acct_update_integrals(struct task_struct *tsk)
{
        u64 utime, stime;
        unsigned long flags;

        local_irq_save(flags);
        task_cputime(tsk, &utime, &stime);
        __acct_update_integrals(tsk, utime, stime);
        local_irq_restore(flags);
}

/**
 * acct_account_cputime - update mm integral after cputime update
 * @tsk: task_struct for accounting
 */
void acct_account_cputime(struct task_struct *tsk)
{
        __acct_update_integrals(tsk, tsk->utime, tsk->stime);
}

/**
 * acct_clear_integrals - clear the mm integral fields in task_struct
 * @tsk: task_struct whose accounting fields are cleared
 */
void acct_clear_integrals(struct task_struct *tsk)
{
        tsk->acct_timexpd = 0;
        tsk->acct_rss_mem1 = 0;
        tsk->acct_vm_mem1 = 0;
}
#endif





















































































































































































































    1 










    1 

    1 


    1 
































    2 































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
// SPDX-License-Identifier: GPL-2.0
/*
 * When connected to the machine, the Thrustmaster wheels appear as
 * a «generic» hid gamepad called "Thrustmaster FFB Wheel".
 *
 * When in this mode not every functionality of the wheel, like the force feedback,
 * are available. To enable all functionalities of a Thrustmaster wheel we have to send
 * to it a specific USB CONTROL request with a code different for each wheel.
 *
 * This driver tries to understand which model of Thrustmaster wheel the generic
 * "Thrustmaster FFB Wheel" really is and then sends the appropriate control code.
 *
 * Copyright (c) 2020-2021 Dario Pagani <dario.pagani.146+linuxk@gmail.com>
 * Copyright (c) 2020-2021 Kim Kuparinen <kimi.h.kuparinen@gmail.com>
 */
#include <linux/hid.h>
#include <linux/usb.h>
#include <linux/input.h>
#include <linux/slab.h>
#include <linux/module.h>

/*
 * These interrupts are used to prevent a nasty crash when initializing the
 * T300RS. Used in thrustmaster_interrupts().
 */
static const u8 setup_0[] = { 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
static const u8 setup_1[] = { 0x0a, 0x04, 0x90, 0x03, 0x00, 0x00, 0x00, 0x00 };
static const u8 setup_2[] = { 0x0a, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00 };
static const u8 setup_3[] = { 0x0a, 0x04, 0x12, 0x10, 0x00, 0x00, 0x00, 0x00 };
static const u8 setup_4[] = { 0x0a, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00 };
static const u8 *const setup_arr[] = { setup_0, setup_1, setup_2, setup_3, setup_4 };
static const unsigned int setup_arr_sizes[] = {
        ARRAY_SIZE(setup_0),
        ARRAY_SIZE(setup_1),
        ARRAY_SIZE(setup_2),
        ARRAY_SIZE(setup_3),
        ARRAY_SIZE(setup_4)
};
/*
 * This struct contains for each type of
 * Thrustmaster wheel
 *
 * Note: The values are stored in the CPU
 * endianness, the USB protocols always use
 * little endian; the macro cpu_to_le[BIT]()
 * must be used when preparing USB packets
 * and vice-versa
 */
struct tm_wheel_info {
        uint16_t wheel_type;

        /*
         * See when the USB control out packet is prepared...
         * @TODO The TMX seems to require multiple control codes to switch.
         */
        uint16_t switch_value;

        char const *const wheel_name;
};

/*
 * Known wheels.
 * Note: TMX does not work as it requires 2 control packets
 */
static const struct tm_wheel_info tm_wheels_infos[] = {
        {0x0306, 0x0006, "Thrustmaster T150RS"},
        {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"},
        {0x0206, 0x0005, "Thrustmaster T300RS"},
        {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"},
        {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"},
        {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"},
        {0x0002, 0x0002, "Thrustmaster T500RS"}
        //{0x0407, 0x0001, "Thrustmaster TMX"}
};

static const uint8_t tm_wheels_infos_length = 7;

/*
 * This structs contains (in little endian) the response data
 * of the wheel to the request 73
 *
 * A sufficient research to understand what each field does is not
 * beign conducted yet. The position and meaning of fields are a
 * just a very optimistic guess based on instinct....
 */
struct __packed tm_wheel_response
{
        /*
         * Seems to be the type of packet
         * - 0x0049 if is data.a (15 bytes)
         * - 0x0047 if is data.b (7 bytes)
         */
        uint16_t type;

        union {
                struct __packed {
                        uint16_t field0;
                        uint16_t field1;
                        /*
                         * Seems to be the model code of the wheel
                         * Read table thrustmaster_wheels to values
                         */
                        uint16_t model;

                        uint16_t field2;
                        uint16_t field3;
                        uint16_t field4;
                        uint16_t field5;
                } a;
                struct __packed {
                        uint16_t field0;
                        uint16_t field1;
                        uint16_t model;
                } b;
        } data;
};

struct tm_wheel {
        struct usb_device *usb_dev;
        struct urb *urb;

        struct usb_ctrlrequest *model_request;
        struct tm_wheel_response *response;

        struct usb_ctrlrequest *change_request;
};

/* The control packet to send to wheel */
static const struct usb_ctrlrequest model_request = {
        .bRequestType = 0xc1,
        .bRequest = 73,
        .wValue = 0,
        .wIndex = 0,
        .wLength = cpu_to_le16(0x0010)
};

static const struct usb_ctrlrequest change_request = {
        .bRequestType = 0x41,
        .bRequest = 83,
        .wValue = 0, // Will be filled by the driver
        .wIndex = 0,
        .wLength = 0
};

/*
 * On some setups initializing the T300RS crashes the kernel,
 * these interrupts fix that particular issue. So far they haven't caused any
 * adverse effects in other wheels.
 */
static void thrustmaster_interrupts(struct hid_device *hdev)
{
        int ret, trans, i, b_ep;
        u8 *send_buf = kmalloc(256, GFP_KERNEL);
        struct usb_host_endpoint *ep;
        struct device *dev = &hdev->dev;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);

        if (!send_buf) {
                hid_err(hdev, "failed allocating send buffer\n");
                return;
        }

        if (usbif->cur_altsetting->desc.bNumEndpoints < 2) {
                kfree(send_buf);
                hid_err(hdev, "Wrong number of endpoints?\n");
                return;
        }

        ep = &usbif->cur_altsetting->endpoint[1];
        b_ep = ep->desc.bEndpointAddress;

        for (i = 0; i < ARRAY_SIZE(setup_arr); ++i) {
                memcpy(send_buf, setup_arr[i], setup_arr_sizes[i]);

                ret = usb_interrupt_msg(usbdev,
                        usb_sndintpipe(usbdev, b_ep),
                        send_buf,
                        setup_arr_sizes[i],
                        &trans,
                        USB_CTRL_SET_TIMEOUT);

                if (ret) {
                        hid_err(hdev, "setup data couldn't be sent\n");
                        kfree(send_buf);
                        return;
                }
        }

        kfree(send_buf);
}

static void thrustmaster_change_handler(struct urb *urb)
{
        struct hid_device *hdev = urb->context;

        // The wheel seems to kill himself before answering the host and therefore is violating the USB protocol...
        if (urb->status == 0 || urb->status == -EPROTO || urb->status == -EPIPE)
                hid_info(hdev, "Success?! The wheel should have been initialized!\n");
        else
                hid_warn(hdev, "URB to change wheel mode seems to have failed with error %d\n", urb->status);
}

/*
 * Called by the USB subsystem when the wheel responses to our request
 * to get [what it seems to be] the wheel's model.
 *
 * If the model id is recognized then we send an opportune USB CONTROL REQUEST
 * to switch the wheel to its full capabilities
 */
static void thrustmaster_model_handler(struct urb *urb)
{
        struct hid_device *hdev = urb->context;
        struct tm_wheel *tm_wheel = hid_get_drvdata(hdev);
        uint16_t model = 0;
        int i, ret;
        const struct tm_wheel_info *twi = NULL;

        if (urb->status) {
                hid_err(hdev, "URB to get model id failed with error %d\n", urb->status);
                return;
        }

        if (tm_wheel->response->type == cpu_to_le16(0x49))
                model = le16_to_cpu(tm_wheel->response->data.a.model);
        else if (tm_wheel->response->type == cpu_to_le16(0x47))
                model = le16_to_cpu(tm_wheel->response->data.b.model);
        else {
                hid_err(hdev, "Unknown packet type 0x%x, unable to proceed further with wheel init\n", tm_wheel->response->type);
                return;
        }

        for (i = 0; i < tm_wheels_infos_length && !twi; i++)
                if (tm_wheels_infos[i].wheel_type == model)
                        twi = tm_wheels_infos + i;

        if (twi)
                hid_info(hdev, "Wheel with model id 0x%x is a %s\n", model, twi->wheel_name);
        else {
                hid_err(hdev, "Unknown wheel's model id 0x%x, unable to proceed further with wheel init\n", model);
                return;
        }

        tm_wheel->change_request->wValue = cpu_to_le16(twi->switch_value);
        usb_fill_control_urb(
                tm_wheel->urb,
                tm_wheel->usb_dev,
                usb_sndctrlpipe(tm_wheel->usb_dev, 0),
                (char *)tm_wheel->change_request,
                NULL, 0, // We do not expect any response from the wheel
                thrustmaster_change_handler,
                hdev
        );

        ret = usb_submit_urb(tm_wheel->urb, GFP_ATOMIC);
        if (ret)
                hid_err(hdev, "Error %d while submitting the change URB. I am unable to initialize this wheel...\n", ret);
}

static void thrustmaster_remove(struct hid_device *hdev)
{
        struct tm_wheel *tm_wheel = hid_get_drvdata(hdev);

        usb_kill_urb(tm_wheel->urb);

        kfree(tm_wheel->change_request);
        kfree(tm_wheel->response);
        kfree(tm_wheel->model_request);
        usb_free_urb(tm_wheel->urb);
        kfree(tm_wheel);

        hid_hw_stop(hdev);
}

/*
 * Function called by HID when a hid Thrustmaster FFB wheel is connected to the host.
 * This function starts the hid dev, tries to allocate the tm_wheel data structure and
 * finally send an USB CONTROL REQUEST to the wheel to get [what it seems to be] its
 * model type.
 */
static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int ret = 0;
        struct tm_wheel *tm_wheel = NULL;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed with error %d\n", ret);
                goto error0;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
        if (ret) {
                hid_err(hdev, "hw start failed with error %d\n", ret);
                goto error0;
        }

        // Now we allocate the tm_wheel
        tm_wheel = kzalloc(sizeof(struct tm_wheel), GFP_KERNEL);
        if (!tm_wheel) {
                ret = -ENOMEM;
                goto error1;
        }

        tm_wheel->urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!tm_wheel->urb) {
                ret = -ENOMEM;
                goto error2;
        }

        tm_wheel->model_request = kmemdup(&model_request,
                                          sizeof(struct usb_ctrlrequest),
                                          GFP_KERNEL);
        if (!tm_wheel->model_request) {
                ret = -ENOMEM;
                goto error3;
        }

        tm_wheel->response = kzalloc(sizeof(struct tm_wheel_response), GFP_KERNEL);
        if (!tm_wheel->response) {
                ret = -ENOMEM;
                goto error4;
        }

        tm_wheel->change_request = kmemdup(&change_request,
                                           sizeof(struct usb_ctrlrequest),
                                           GFP_KERNEL);
        if (!tm_wheel->change_request) {
                ret = -ENOMEM;
                goto error5;
        }

        tm_wheel->usb_dev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
        hid_set_drvdata(hdev, tm_wheel);

        thrustmaster_interrupts(hdev);

        usb_fill_control_urb(
                tm_wheel->urb,
                tm_wheel->usb_dev,
                usb_rcvctrlpipe(tm_wheel->usb_dev, 0),
                (char *)tm_wheel->model_request,
                tm_wheel->response,
                sizeof(struct tm_wheel_response),
                thrustmaster_model_handler,
                hdev
        );

        ret = usb_submit_urb(tm_wheel->urb, GFP_ATOMIC);
        if (ret) {
                hid_err(hdev, "Error %d while submitting the URB. I am unable to initialize this wheel...\n", ret);
                goto error6;
        }

        return ret;

error6: kfree(tm_wheel->change_request);
error5: kfree(tm_wheel->response);
error4: kfree(tm_wheel->model_request);
error3: usb_free_urb(tm_wheel->urb);
error2: kfree(tm_wheel);
error1: hid_hw_stop(hdev);
error0:
        return ret;
}

static const struct hid_device_id thrustmaster_devices[] = {
        { HID_USB_DEVICE(0x044f, 0xb65d)},
        {}
};

MODULE_DEVICE_TABLE(hid, thrustmaster_devices);

static struct hid_driver thrustmaster_driver = {
        .name = "hid-thrustmaster",
        .id_table = thrustmaster_devices,
        .probe = thrustmaster_probe,
        .remove = thrustmaster_remove,
};

module_hid_driver(thrustmaster_driver);

MODULE_AUTHOR("Dario Pagani <dario.pagani.146+linuxk@gmail.com>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Driver to initialize some steering wheel joysticks from Thrustmaster");



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 






    1 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  Universal power supply monitor class
 *
 *  Copyright © 2007  Anton Vorontsov <cbou@mail.ru>
 *  Copyright © 2004  Szabolcs Gyurko
 *  Copyright © 2003  Ian Molton <spyro@f2s.com>
 *
 *  Modified: 2004, Oct     Szabolcs Gyurko
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/err.h>
#include <linux/of.h>
#include <linux/power_supply.h>
#include <linux/property.h>
#include <linux/thermal.h>
#include <linux/fixp-arith.h>
#include "power_supply.h"
#include "samsung-sdi-battery.h"

static const struct class power_supply_class = {
        .name = "power_supply",
        .dev_uevent = power_supply_uevent,
};

static BLOCKING_NOTIFIER_HEAD(power_supply_notifier);

static const struct device_type power_supply_dev_type = {
        .name = "power_supply",
        .groups = power_supply_attr_groups,
};

#define POWER_SUPPLY_DEFERRED_REGISTER_TIME        msecs_to_jiffies(10)

static bool __power_supply_is_supplied_by(struct power_supply *supplier,
                                         struct power_supply *supply)
{
        int i;

        if (!supply->supplied_from && !supplier->supplied_to)
                return false;

        /* Support both supplied_to and supplied_from modes */
        if (supply->supplied_from) {
                if (!supplier->desc->name)
                        return false;
                for (i = 0; i < supply->num_supplies; i++)
                        if (!strcmp(supplier->desc->name, supply->supplied_from[i]))
                                return true;
        } else {
                if (!supply->desc->name)
                        return false;
                for (i = 0; i < supplier->num_supplicants; i++)
                        if (!strcmp(supplier->supplied_to[i], supply->desc->name))
                                return true;
        }

        return false;
}

static int __power_supply_changed_work(struct device *dev, void *data)
{
        struct power_supply *psy = data;
        struct power_supply *pst = dev_get_drvdata(dev);

        if (__power_supply_is_supplied_by(psy, pst)) {
                if (pst->desc->external_power_changed)
                        pst->desc->external_power_changed(pst);
        }

        return 0;
}

static void power_supply_changed_work(struct work_struct *work)
{
        unsigned long flags;
        struct power_supply *psy = container_of(work, struct power_supply,
                                                changed_work);

        dev_dbg(&psy->dev, "%s\n", __func__);

        spin_lock_irqsave(&psy->changed_lock, flags);
        /*
         * Check 'changed' here to avoid issues due to race between
         * power_supply_changed() and this routine. In worst case
         * power_supply_changed() can be called again just before we take above
         * lock. During the first call of this routine we will mark 'changed' as
         * false and it will stay false for the next call as well.
         */
        if (likely(psy->changed)) {
                psy->changed = false;
                spin_unlock_irqrestore(&psy->changed_lock, flags);
                power_supply_for_each_device(psy, __power_supply_changed_work);
                power_supply_update_leds(psy);
                blocking_notifier_call_chain(&power_supply_notifier,
                                PSY_EVENT_PROP_CHANGED, psy);
                kobject_uevent(&psy->dev.kobj, KOBJ_CHANGE);
                spin_lock_irqsave(&psy->changed_lock, flags);
        }

        /*
         * Hold the wakeup_source until all events are processed.
         * power_supply_changed() might have called again and have set 'changed'
         * to true.
         */
        if (likely(!psy->changed))
                pm_relax(&psy->dev);
        spin_unlock_irqrestore(&psy->changed_lock, flags);
}

int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data))
{
        return class_for_each_device(&power_supply_class, NULL, data, fn);
}
EXPORT_SYMBOL_GPL(power_supply_for_each_device);

void power_supply_changed(struct power_supply *psy)
{
        unsigned long flags;

        dev_dbg(&psy->dev, "%s\n", __func__);

        spin_lock_irqsave(&psy->changed_lock, flags);
        psy->changed = true;
        pm_stay_awake(&psy->dev);
        spin_unlock_irqrestore(&psy->changed_lock, flags);
        schedule_work(&psy->changed_work);
}
EXPORT_SYMBOL_GPL(power_supply_changed);

/*
 * Notify that power supply was registered after parent finished the probing.
 *
 * Often power supply is registered from driver's probe function. However
 * calling power_supply_changed() directly from power_supply_register()
 * would lead to execution of get_property() function provided by the driver
 * too early - before the probe ends.
 *
 * Avoid that by waiting on parent's mutex.
 */
static void power_supply_deferred_register_work(struct work_struct *work)
{
        struct power_supply *psy = container_of(work, struct power_supply,
                                                deferred_register_work.work);

        if (psy->dev.parent) {
                while (!mutex_trylock(&psy->dev.parent->mutex)) {
                        if (psy->removing)
                                return;
                        msleep(10);
                }
        }

        power_supply_changed(psy);

        if (psy->dev.parent)
                mutex_unlock(&psy->dev.parent->mutex);
}

#ifdef CONFIG_OF
static int __power_supply_populate_supplied_from(struct device *dev,
                                                 void *data)
{
        struct power_supply *psy = data;
        struct power_supply *epsy = dev_get_drvdata(dev);
        struct device_node *np;
        int i = 0;

        do {
                np = of_parse_phandle(psy->of_node, "power-supplies", i++);
                if (!np)
                        break;

                if (np == epsy->of_node) {
                        dev_dbg(&psy->dev, "%s: Found supply : %s\n",
                                psy->desc->name, epsy->desc->name);
                        psy->supplied_from[i-1] = (char *)epsy->desc->name;
                        psy->num_supplies++;
                        of_node_put(np);
                        break;
                }
                of_node_put(np);
        } while (np);

        return 0;
}

static int power_supply_populate_supplied_from(struct power_supply *psy)
{
        int error;

        error = power_supply_for_each_device(psy, __power_supply_populate_supplied_from);

        dev_dbg(&psy->dev, "%s %d\n", __func__, error);

        return error;
}

static int  __power_supply_find_supply_from_node(struct device *dev,
                                                 void *data)
{
        struct device_node *np = data;
        struct power_supply *epsy = dev_get_drvdata(dev);

        /* returning non-zero breaks out of power_supply_for_each_device loop */
        if (epsy->of_node == np)
                return 1;

        return 0;
}

static int power_supply_find_supply_from_node(struct device_node *supply_node)
{
        int error;

        /*
         * power_supply_for_each_device() either returns its own errors or values
         * returned by __power_supply_find_supply_from_node().
         *
         * __power_supply_find_supply_from_node() will return 0 (no match)
         * or 1 (match).
         *
         * We return 0 if power_supply_for_each_device() returned 1, -EPROBE_DEFER if
         * it returned 0, or error as returned by it.
         */
        error = power_supply_for_each_device(supply_node, __power_supply_find_supply_from_node);

        return error ? (error == 1 ? 0 : error) : -EPROBE_DEFER;
}

static int power_supply_check_supplies(struct power_supply *psy)
{
        struct device_node *np;
        int cnt = 0;

        /* If there is already a list honor it */
        if (psy->supplied_from && psy->num_supplies > 0)
                return 0;

        /* No device node found, nothing to do */
        if (!psy->of_node)
                return 0;

        do {
                int ret;

                np = of_parse_phandle(psy->of_node, "power-supplies", cnt++);
                if (!np)
                        break;

                ret = power_supply_find_supply_from_node(np);
                of_node_put(np);

                if (ret) {
                        dev_dbg(&psy->dev, "Failed to find supply!\n");
                        return ret;
                }
        } while (np);

        /* Missing valid "power-supplies" entries */
        if (cnt == 1)
                return 0;

        /* All supplies found, allocate char ** array for filling */
        psy->supplied_from = devm_kzalloc(&psy->dev, sizeof(*psy->supplied_from),
                                          GFP_KERNEL);
        if (!psy->supplied_from)
                return -ENOMEM;

        *psy->supplied_from = devm_kcalloc(&psy->dev,
                                           cnt - 1, sizeof(**psy->supplied_from),
                                           GFP_KERNEL);
        if (!*psy->supplied_from)
                return -ENOMEM;

        return power_supply_populate_supplied_from(psy);
}
#else
static int power_supply_check_supplies(struct power_supply *psy)
{
        int nval, ret;

        if (!psy->dev.parent)
                return 0;

        nval = device_property_string_array_count(psy->dev.parent, "supplied-from");
        if (nval <= 0)
                return 0;

        psy->supplied_from = devm_kmalloc_array(&psy->dev, nval,
                                                sizeof(char *), GFP_KERNEL);
        if (!psy->supplied_from)
                return -ENOMEM;

        ret = device_property_read_string_array(psy->dev.parent,
                "supplied-from", (const char **)psy->supplied_from, nval);
        if (ret < 0)
                return ret;

        psy->num_supplies = nval;

        return 0;
}
#endif

struct psy_am_i_supplied_data {
        struct power_supply *psy;
        unsigned int count;
};

static int __power_supply_am_i_supplied(struct device *dev, void *_data)
{
        union power_supply_propval ret = {0,};
        struct power_supply *epsy = dev_get_drvdata(dev);
        struct psy_am_i_supplied_data *data = _data;

        if (__power_supply_is_supplied_by(epsy, data->psy)) {
                data->count++;
                if (!epsy->desc->get_property(epsy, POWER_SUPPLY_PROP_ONLINE,
                                        &ret))
                        return ret.intval;
        }

        return 0;
}

int power_supply_am_i_supplied(struct power_supply *psy)
{
        struct psy_am_i_supplied_data data = { psy, 0 };
        int error;

        error = power_supply_for_each_device(&data, __power_supply_am_i_supplied);

        dev_dbg(&psy->dev, "%s count %u err %d\n", __func__, data.count, error);

        if (data.count == 0)
                return -ENODEV;

        return error;
}
EXPORT_SYMBOL_GPL(power_supply_am_i_supplied);

static int __power_supply_is_system_supplied(struct device *dev, void *data)
{
        union power_supply_propval ret = {0,};
        struct power_supply *psy = dev_get_drvdata(dev);
        unsigned int *count = data;

        if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, &ret))
                if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
                        return 0;

        (*count)++;
        if (psy->desc->type != POWER_SUPPLY_TYPE_BATTERY)
                if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE,
                                        &ret))
                        return ret.intval;

        return 0;
}

int power_supply_is_system_supplied(void)
{
        int error;
        unsigned int count = 0;

        error = power_supply_for_each_device(&count, __power_supply_is_system_supplied);

        /*
         * If no system scope power class device was found at all, most probably we
         * are running on a desktop system, so assume we are on mains power.
         */
        if (count == 0)
                return 1;

        return error;
}
EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);

struct psy_get_supplier_prop_data {
        struct power_supply *psy;
        enum power_supply_property psp;
        union power_supply_propval *val;
};

static int __power_supply_get_supplier_property(struct device *dev, void *_data)
{
        struct power_supply *epsy = dev_get_drvdata(dev);
        struct psy_get_supplier_prop_data *data = _data;

        if (__power_supply_is_supplied_by(epsy, data->psy))
                if (!power_supply_get_property(epsy, data->psp, data->val))
                        return 1; /* Success */

        return 0; /* Continue iterating */
}

int power_supply_get_property_from_supplier(struct power_supply *psy,
                                            enum power_supply_property psp,
                                            union power_supply_propval *val)
{
        struct psy_get_supplier_prop_data data = {
                .psy = psy,
                .psp = psp,
                .val = val,
        };
        int ret;

        /*
         * This function is not intended for use with a supply with multiple
         * suppliers, we simply pick the first supply to report the psp.
         */
        ret = power_supply_for_each_device(&data, __power_supply_get_supplier_property);
        if (ret < 0)
                return ret;
        if (ret == 0)
                return -ENODEV;

        return 0;
}
EXPORT_SYMBOL_GPL(power_supply_get_property_from_supplier);

int power_supply_set_battery_charged(struct power_supply *psy)
{
        if (atomic_read(&psy->use_cnt) >= 0 &&
                        psy->desc->type == POWER_SUPPLY_TYPE_BATTERY &&
                        psy->desc->set_charged) {
                psy->desc->set_charged(psy);
                return 0;
        }

        return -EINVAL;
}
EXPORT_SYMBOL_GPL(power_supply_set_battery_charged);

static int power_supply_match_device_by_name(struct device *dev, const void *data)
{
        const char *name = data;
        struct power_supply *psy = dev_get_drvdata(dev);

        return strcmp(psy->desc->name, name) == 0;
}

/**
 * power_supply_get_by_name() - Search for a power supply and returns its ref
 * @name: Power supply name to fetch
 *
 * If power supply was found, it increases reference count for the
 * internal power supply's device. The user should power_supply_put()
 * after usage.
 *
 * Return: On success returns a reference to a power supply with
 * matching name equals to @name, a NULL otherwise.
 */
struct power_supply *power_supply_get_by_name(const char *name)
{
        struct power_supply *psy = NULL;
        struct device *dev = class_find_device(&power_supply_class, NULL, name,
                                               power_supply_match_device_by_name);

        if (dev) {
                psy = dev_get_drvdata(dev);
                atomic_inc(&psy->use_cnt);
        }

        return psy;
}
EXPORT_SYMBOL_GPL(power_supply_get_by_name);

/**
 * power_supply_put() - Drop reference obtained with power_supply_get_by_name
 * @psy: Reference to put
 *
 * The reference to power supply should be put before unregistering
 * the power supply.
 */
void power_supply_put(struct power_supply *psy)
{
        might_sleep();

        atomic_dec(&psy->use_cnt);
        put_device(&psy->dev);
}
EXPORT_SYMBOL_GPL(power_supply_put);

#ifdef CONFIG_OF
static int power_supply_match_device_node(struct device *dev, const void *data)
{
        return dev->parent && dev->parent->of_node == data;
}

/**
 * power_supply_get_by_phandle() - Search for a power supply and returns its ref
 * @np: Pointer to device node holding phandle property
 * @property: Name of property holding a power supply name
 *
 * If power supply was found, it increases reference count for the
 * internal power supply's device. The user should power_supply_put()
 * after usage.
 *
 * Return: On success returns a reference to a power supply with
 * matching name equals to value under @property, NULL or ERR_PTR otherwise.
 */
struct power_supply *power_supply_get_by_phandle(struct device_node *np,
                                                        const char *property)
{
        struct device_node *power_supply_np;
        struct power_supply *psy = NULL;
        struct device *dev;

        power_supply_np = of_parse_phandle(np, property, 0);
        if (!power_supply_np)
                return ERR_PTR(-ENODEV);

        dev = class_find_device(&power_supply_class, NULL, power_supply_np,
                                power_supply_match_device_node);

        of_node_put(power_supply_np);

        if (dev) {
                psy = dev_get_drvdata(dev);
                atomic_inc(&psy->use_cnt);
        }

        return psy;
}
EXPORT_SYMBOL_GPL(power_supply_get_by_phandle);

static void devm_power_supply_put(struct device *dev, void *res)
{
        struct power_supply **psy = res;

        power_supply_put(*psy);
}

/**
 * devm_power_supply_get_by_phandle() - Resource managed version of
 *  power_supply_get_by_phandle()
 * @dev: Pointer to device holding phandle property
 * @property: Name of property holding a power supply phandle
 *
 * Return: On success returns a reference to a power supply with
 * matching name equals to value under @property, NULL or ERR_PTR otherwise.
 */
struct power_supply *devm_power_supply_get_by_phandle(struct device *dev,
                                                      const char *property)
{
        struct power_supply **ptr, *psy;

        if (!dev->of_node)
                return ERR_PTR(-ENODEV);

        ptr = devres_alloc(devm_power_supply_put, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        psy = power_supply_get_by_phandle(dev->of_node, property);
        if (IS_ERR_OR_NULL(psy)) {
                devres_free(ptr);
        } else {
                *ptr = psy;
                devres_add(dev, ptr);
        }
        return psy;
}
EXPORT_SYMBOL_GPL(devm_power_supply_get_by_phandle);
#endif /* CONFIG_OF */

int power_supply_get_battery_info(struct power_supply *psy,
                                  struct power_supply_battery_info **info_out)
{
        struct power_supply_resistance_temp_table *resist_table;
        struct power_supply_battery_info *info;
        struct device_node *battery_np = NULL;
        struct fwnode_reference_args args;
        struct fwnode_handle *fwnode = NULL;
        const char *value;
        int err, len, index;
        const __be32 *list;
        u32 min_max[2];

        if (psy->of_node) {
                battery_np = of_parse_phandle(psy->of_node, "monitored-battery", 0);
                if (!battery_np)
                        return -ENODEV;

                fwnode = fwnode_handle_get(of_fwnode_handle(battery_np));
        } else if (psy->dev.parent) {
                err = fwnode_property_get_reference_args(
                                        dev_fwnode(psy->dev.parent),
                                        "monitored-battery", NULL, 0, 0, &args);
                if (err)
                        return err;

                fwnode = args.fwnode;
        }

        if (!fwnode)
                return -ENOENT;

        err = fwnode_property_read_string(fwnode, "compatible", &value);
        if (err)
                goto out_put_node;


        /* Try static batteries first */
        err = samsung_sdi_battery_get_info(&psy->dev, value, &info);
        if (!err)
                goto out_ret_pointer;
        else if (err == -ENODEV)
                /*
                 * Device does not have a static battery.
                 * Proceed to look for a simple battery.
                 */
                err = 0;

        if (strcmp("simple-battery", value)) {
                err = -ENODEV;
                goto out_put_node;
        }

        info = devm_kzalloc(&psy->dev, sizeof(*info), GFP_KERNEL);
        if (!info) {
                err = -ENOMEM;
                goto out_put_node;
        }

        info->technology                     = POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
        info->energy_full_design_uwh         = -EINVAL;
        info->charge_full_design_uah         = -EINVAL;
        info->voltage_min_design_uv          = -EINVAL;
        info->voltage_max_design_uv          = -EINVAL;
        info->precharge_current_ua           = -EINVAL;
        info->charge_term_current_ua         = -EINVAL;
        info->constant_charge_current_max_ua = -EINVAL;
        info->constant_charge_voltage_max_uv = -EINVAL;
        info->tricklecharge_current_ua       = -EINVAL;
        info->precharge_voltage_max_uv       = -EINVAL;
        info->charge_restart_voltage_uv      = -EINVAL;
        info->overvoltage_limit_uv           = -EINVAL;
        info->maintenance_charge             = NULL;
        info->alert_low_temp_charge_current_ua = -EINVAL;
        info->alert_low_temp_charge_voltage_uv = -EINVAL;
        info->alert_high_temp_charge_current_ua = -EINVAL;
        info->alert_high_temp_charge_voltage_uv = -EINVAL;
        info->temp_ambient_alert_min         = INT_MIN;
        info->temp_ambient_alert_max         = INT_MAX;
        info->temp_alert_min                 = INT_MIN;
        info->temp_alert_max                 = INT_MAX;
        info->temp_min                       = INT_MIN;
        info->temp_max                       = INT_MAX;
        info->factory_internal_resistance_uohm  = -EINVAL;
        info->resist_table                   = NULL;
        info->bti_resistance_ohm             = -EINVAL;
        info->bti_resistance_tolerance       = -EINVAL;

        for (index = 0; index < POWER_SUPPLY_OCV_TEMP_MAX; index++) {
                info->ocv_table[index]       = NULL;
                info->ocv_temp[index]        = -EINVAL;
                info->ocv_table_size[index]  = -EINVAL;
        }

        /* The property and field names below must correspond to elements
         * in enum power_supply_property. For reasoning, see
         * Documentation/power/power_supply_class.rst.
         */

        if (!fwnode_property_read_string(fwnode, "device-chemistry", &value)) {
                if (!strcmp("nickel-cadmium", value))
                        info->technology = POWER_SUPPLY_TECHNOLOGY_NiCd;
                else if (!strcmp("nickel-metal-hydride", value))
                        info->technology = POWER_SUPPLY_TECHNOLOGY_NiMH;
                else if (!strcmp("lithium-ion", value))
                        /* Imprecise lithium-ion type */
                        info->technology = POWER_SUPPLY_TECHNOLOGY_LION;
                else if (!strcmp("lithium-ion-polymer", value))
                        info->technology = POWER_SUPPLY_TECHNOLOGY_LIPO;
                else if (!strcmp("lithium-ion-iron-phosphate", value))
                        info->technology = POWER_SUPPLY_TECHNOLOGY_LiFe;
                else if (!strcmp("lithium-ion-manganese-oxide", value))
                        info->technology = POWER_SUPPLY_TECHNOLOGY_LiMn;
                else
                        dev_warn(&psy->dev, "%s unknown battery type\n", value);
        }

        fwnode_property_read_u32(fwnode, "energy-full-design-microwatt-hours",
                             &info->energy_full_design_uwh);
        fwnode_property_read_u32(fwnode, "charge-full-design-microamp-hours",
                             &info->charge_full_design_uah);
        fwnode_property_read_u32(fwnode, "voltage-min-design-microvolt",
                             &info->voltage_min_design_uv);
        fwnode_property_read_u32(fwnode, "voltage-max-design-microvolt",
                             &info->voltage_max_design_uv);
        fwnode_property_read_u32(fwnode, "trickle-charge-current-microamp",
                             &info->tricklecharge_current_ua);
        fwnode_property_read_u32(fwnode, "precharge-current-microamp",
                             &info->precharge_current_ua);
        fwnode_property_read_u32(fwnode, "precharge-upper-limit-microvolt",
                             &info->precharge_voltage_max_uv);
        fwnode_property_read_u32(fwnode, "charge-term-current-microamp",
                             &info->charge_term_current_ua);
        fwnode_property_read_u32(fwnode, "re-charge-voltage-microvolt",
                             &info->charge_restart_voltage_uv);
        fwnode_property_read_u32(fwnode, "over-voltage-threshold-microvolt",
                             &info->overvoltage_limit_uv);
        fwnode_property_read_u32(fwnode, "constant-charge-current-max-microamp",
                             &info->constant_charge_current_max_ua);
        fwnode_property_read_u32(fwnode, "constant-charge-voltage-max-microvolt",
                             &info->constant_charge_voltage_max_uv);
        fwnode_property_read_u32(fwnode, "factory-internal-resistance-micro-ohms",
                             &info->factory_internal_resistance_uohm);

        if (!fwnode_property_read_u32_array(fwnode, "ambient-celsius",
                                            min_max, ARRAY_SIZE(min_max))) {
                info->temp_ambient_alert_min = min_max[0];
                info->temp_ambient_alert_max = min_max[1];
        }
        if (!fwnode_property_read_u32_array(fwnode, "alert-celsius",
                                            min_max, ARRAY_SIZE(min_max))) {
                info->temp_alert_min = min_max[0];
                info->temp_alert_max = min_max[1];
        }
        if (!fwnode_property_read_u32_array(fwnode, "operating-range-celsius",
                                            min_max, ARRAY_SIZE(min_max))) {
                info->temp_min = min_max[0];
                info->temp_max = min_max[1];
        }

        /*
         * The below code uses raw of-data parsing to parse
         * /schemas/types.yaml#/definitions/uint32-matrix
         * data, so for now this is only support with of.
         */
        if (!battery_np)
                goto out_ret_pointer;

        len = of_property_count_u32_elems(battery_np, "ocv-capacity-celsius");
        if (len < 0 && len != -EINVAL) {
                err = len;
                goto out_put_node;
        } else if (len > POWER_SUPPLY_OCV_TEMP_MAX) {
                dev_err(&psy->dev, "Too many temperature values\n");
                err = -EINVAL;
                goto out_put_node;
        } else if (len > 0) {
                of_property_read_u32_array(battery_np, "ocv-capacity-celsius",
                                           info->ocv_temp, len);
        }

        for (index = 0; index < len; index++) {
                struct power_supply_battery_ocv_table *table;
                char *propname;
                int i, tab_len, size;

                propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index);
                if (!propname) {
                        power_supply_put_battery_info(psy, info);
                        err = -ENOMEM;
                        goto out_put_node;
                }
                list = of_get_property(battery_np, propname, &size);
                if (!list || !size) {
                        dev_err(&psy->dev, "failed to get %s\n", propname);
                        kfree(propname);
                        power_supply_put_battery_info(psy, info);
                        err = -EINVAL;
                        goto out_put_node;
                }

                kfree(propname);
                tab_len = size / (2 * sizeof(__be32));
                info->ocv_table_size[index] = tab_len;

                table = info->ocv_table[index] =
                        devm_kcalloc(&psy->dev, tab_len, sizeof(*table), GFP_KERNEL);
                if (!info->ocv_table[index]) {
                        power_supply_put_battery_info(psy, info);
                        err = -ENOMEM;
                        goto out_put_node;
                }

                for (i = 0; i < tab_len; i++) {
                        table[i].ocv = be32_to_cpu(*list);
                        list++;
                        table[i].capacity = be32_to_cpu(*list);
                        list++;
                }
        }

        list = of_get_property(battery_np, "resistance-temp-table", &len);
        if (!list || !len)
                goto out_ret_pointer;

        info->resist_table_size = len / (2 * sizeof(__be32));
        resist_table = info->resist_table = devm_kcalloc(&psy->dev,
                                                         info->resist_table_size,
                                                         sizeof(*resist_table),
                                                         GFP_KERNEL);
        if (!info->resist_table) {
                power_supply_put_battery_info(psy, info);
                err = -ENOMEM;
                goto out_put_node;
        }

        for (index = 0; index < info->resist_table_size; index++) {
                resist_table[index].temp = be32_to_cpu(*list++);
                resist_table[index].resistance = be32_to_cpu(*list++);
        }

out_ret_pointer:
        /* Finally return the whole thing */
        *info_out = info;

out_put_node:
        fwnode_handle_put(fwnode);
        of_node_put(battery_np);
        return err;
}
EXPORT_SYMBOL_GPL(power_supply_get_battery_info);

void power_supply_put_battery_info(struct power_supply *psy,
                                   struct power_supply_battery_info *info)
{
        int i;

        for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) {
                if (info->ocv_table[i])
                        devm_kfree(&psy->dev, info->ocv_table[i]);
        }

        if (info->resist_table)
                devm_kfree(&psy->dev, info->resist_table);

        devm_kfree(&psy->dev, info);
}
EXPORT_SYMBOL_GPL(power_supply_put_battery_info);

const enum power_supply_property power_supply_battery_info_properties[] = {
        POWER_SUPPLY_PROP_TECHNOLOGY,
        POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
        POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
        POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
        POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
        POWER_SUPPLY_PROP_PRECHARGE_CURRENT,
        POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
        POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
        POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
        POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN,
        POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX,
        POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
        POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
        POWER_SUPPLY_PROP_TEMP_MIN,
        POWER_SUPPLY_PROP_TEMP_MAX,
};
EXPORT_SYMBOL_GPL(power_supply_battery_info_properties);

const size_t power_supply_battery_info_properties_size = ARRAY_SIZE(power_supply_battery_info_properties);
EXPORT_SYMBOL_GPL(power_supply_battery_info_properties_size);

bool power_supply_battery_info_has_prop(struct power_supply_battery_info *info,
                                        enum power_supply_property psp)
{
        if (!info)
                return false;

        switch (psp) {
        case POWER_SUPPLY_PROP_TECHNOLOGY:
                return info->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
        case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
                return info->energy_full_design_uwh >= 0;
        case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
                return info->charge_full_design_uah >= 0;
        case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
                return info->voltage_min_design_uv >= 0;
        case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
                return info->voltage_max_design_uv >= 0;
        case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
                return info->precharge_current_ua >= 0;
        case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
                return info->charge_term_current_ua >= 0;
        case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
                return info->constant_charge_current_max_ua >= 0;
        case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
                return info->constant_charge_voltage_max_uv >= 0;
        case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
                return info->temp_ambient_alert_min > INT_MIN;
        case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
                return info->temp_ambient_alert_max < INT_MAX;
        case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
                return info->temp_alert_min > INT_MIN;
        case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
                return info->temp_alert_max < INT_MAX;
        case POWER_SUPPLY_PROP_TEMP_MIN:
                return info->temp_min > INT_MIN;
        case POWER_SUPPLY_PROP_TEMP_MAX:
                return info->temp_max < INT_MAX;
        default:
                return false;
        }
}
EXPORT_SYMBOL_GPL(power_supply_battery_info_has_prop);

int power_supply_battery_info_get_prop(struct power_supply_battery_info *info,
                                       enum power_supply_property psp,
                                       union power_supply_propval *val)
{
        if (!info)
                return -EINVAL;

        if (!power_supply_battery_info_has_prop(info, psp))
                return -EINVAL;

        switch (psp) {
        case POWER_SUPPLY_PROP_TECHNOLOGY:
                val->intval = info->technology;
                return 0;
        case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
                val->intval = info->energy_full_design_uwh;
                return 0;
        case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
                val->intval = info->charge_full_design_uah;
                return 0;
        case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
                val->intval = info->voltage_min_design_uv;
                return 0;
        case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
                val->intval = info->voltage_max_design_uv;
                return 0;
        case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
                val->intval = info->precharge_current_ua;
                return 0;
        case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
                val->intval = info->charge_term_current_ua;
                return 0;
        case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
                val->intval = info->constant_charge_current_max_ua;
                return 0;
        case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
                val->intval = info->constant_charge_voltage_max_uv;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
                val->intval = info->temp_ambient_alert_min;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
                val->intval = info->temp_ambient_alert_max;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
                val->intval = info->temp_alert_min;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
                val->intval = info->temp_alert_max;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_MIN:
                val->intval = info->temp_min;
                return 0;
        case POWER_SUPPLY_PROP_TEMP_MAX:
                val->intval = info->temp_max;
                return 0;
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL_GPL(power_supply_battery_info_get_prop);

/**
 * power_supply_temp2resist_simple() - find the battery internal resistance
 * percent from temperature
 * @table: Pointer to battery resistance temperature table
 * @table_len: The table length
 * @temp: Current temperature
 *
 * This helper function is used to look up battery internal resistance percent
 * according to current temperature value from the resistance temperature table,
 * and the table must be ordered descending. Then the actual battery internal
 * resistance = the ideal battery internal resistance * percent / 100.
 *
 * Return: the battery internal resistance percent
 */
int power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table,
                                    int table_len, int temp)
{
        int i, high, low;

        for (i = 0; i < table_len; i++)
                if (temp > table[i].temp)
                        break;

        /* The library function will deal with high == low */
        if (i == 0)
                high = low = i;
        else if (i == table_len)
                high = low = i - 1;
        else
                high = (low = i) - 1;

        return fixp_linear_interpolate(table[low].temp,
                                       table[low].resistance,
                                       table[high].temp,
                                       table[high].resistance,
                                       temp);
}
EXPORT_SYMBOL_GPL(power_supply_temp2resist_simple);

/**
 * power_supply_vbat2ri() - find the battery internal resistance
 * from the battery voltage
 * @info: The battery information container
 * @vbat_uv: The battery voltage in microvolt
 * @charging: If we are charging (true) or not (false)
 *
 * This helper function is used to look up battery internal resistance
 * according to current battery voltage. Depending on whether the battery
 * is currently charging or not, different resistance will be returned.
 *
 * Returns the internal resistance in microohm or negative error code.
 */
int power_supply_vbat2ri(struct power_supply_battery_info *info,
                         int vbat_uv, bool charging)
{
        struct power_supply_vbat_ri_table *vbat2ri;
        int table_len;
        int i, high, low;

        /*
         * If we are charging, and the battery supplies a separate table
         * for this state, we use that in order to compensate for the
         * charging voltage. Otherwise we use the main table.
         */
        if (charging && info->vbat2ri_charging) {
                vbat2ri = info->vbat2ri_charging;
                table_len = info->vbat2ri_charging_size;
        } else {
                vbat2ri = info->vbat2ri_discharging;
                table_len = info->vbat2ri_discharging_size;
        }

        /*
         * If no tables are specified, or if we are above the highest voltage in
         * the voltage table, just return the factory specified internal resistance.
         */
        if (!vbat2ri || (table_len <= 0) || (vbat_uv > vbat2ri[0].vbat_uv)) {
                if (charging && (info->factory_internal_resistance_charging_uohm > 0))
                        return info->factory_internal_resistance_charging_uohm;
                else
                        return info->factory_internal_resistance_uohm;
        }

        /* Break loop at table_len - 1 because that is the highest index */
        for (i = 0; i < table_len - 1; i++)
                if (vbat_uv > vbat2ri[i].vbat_uv)
                        break;

        /* The library function will deal with high == low */
        if ((i == 0) || (i == (table_len - 1)))
                high = i;
        else
                high = i - 1;
        low = i;

        return fixp_linear_interpolate(vbat2ri[low].vbat_uv,
                                       vbat2ri[low].ri_uohm,
                                       vbat2ri[high].vbat_uv,
                                       vbat2ri[high].ri_uohm,
                                       vbat_uv);
}
EXPORT_SYMBOL_GPL(power_supply_vbat2ri);

struct power_supply_maintenance_charge_table *
power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info,
                                              int index)
{
        if (index >= info->maintenance_charge_size)
                return NULL;
        return &info->maintenance_charge[index];
}
EXPORT_SYMBOL_GPL(power_supply_get_maintenance_charging_setting);

/**
 * power_supply_ocv2cap_simple() - find the battery capacity
 * @table: Pointer to battery OCV lookup table
 * @table_len: OCV table length
 * @ocv: Current OCV value
 *
 * This helper function is used to look up battery capacity according to
 * current OCV value from one OCV table, and the OCV table must be ordered
 * descending.
 *
 * Return: the battery capacity.
 */
int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table,
                                int table_len, int ocv)
{
        int i, high, low;

        for (i = 0; i < table_len; i++)
                if (ocv > table[i].ocv)
                        break;

        /* The library function will deal with high == low */
        if (i == 0)
                high = low = i;
        else if (i == table_len)
                high = low = i - 1;
        else
                high = (low = i) - 1;

        return fixp_linear_interpolate(table[low].ocv,
                                       table[low].capacity,
                                       table[high].ocv,
                                       table[high].capacity,
                                       ocv);
}
EXPORT_SYMBOL_GPL(power_supply_ocv2cap_simple);

struct power_supply_battery_ocv_table *
power_supply_find_ocv2cap_table(struct power_supply_battery_info *info,
                                int temp, int *table_len)
{
        int best_temp_diff = INT_MAX, temp_diff;
        u8 i, best_index = 0;

        if (!info->ocv_table[0])
                return NULL;

        for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) {
                /* Out of capacity tables */
                if (!info->ocv_table[i])
                        break;

                temp_diff = abs(info->ocv_temp[i] - temp);

                if (temp_diff < best_temp_diff) {
                        best_temp_diff = temp_diff;
                        best_index = i;
                }
        }

        *table_len = info->ocv_table_size[best_index];
        return info->ocv_table[best_index];
}
EXPORT_SYMBOL_GPL(power_supply_find_ocv2cap_table);

int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info,
                                 int ocv, int temp)
{
        struct power_supply_battery_ocv_table *table;
        int table_len;

        table = power_supply_find_ocv2cap_table(info, temp, &table_len);
        if (!table)
                return -EINVAL;

        return power_supply_ocv2cap_simple(table, table_len, ocv);
}
EXPORT_SYMBOL_GPL(power_supply_batinfo_ocv2cap);

bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info,
                                       int resistance)
{
        int low, high;

        /* Nothing like this can be checked */
        if (info->bti_resistance_ohm <= 0)
                return false;

        /* This will be extremely strict and unlikely to work */
        if (info->bti_resistance_tolerance <= 0)
                return (info->bti_resistance_ohm == resistance);

        low = info->bti_resistance_ohm -
                (info->bti_resistance_ohm * info->bti_resistance_tolerance) / 100;
        high = info->bti_resistance_ohm +
                (info->bti_resistance_ohm * info->bti_resistance_tolerance) / 100;

        return ((resistance >= low) && (resistance <= high));
}
EXPORT_SYMBOL_GPL(power_supply_battery_bti_in_range);

static bool psy_has_property(const struct power_supply_desc *psy_desc,
                             enum power_supply_property psp)
{
        bool found = false;
        int i;

        for (i = 0; i < psy_desc->num_properties; i++) {
                if (psy_desc->properties[i] == psp) {
                        found = true;
                        break;
                }
        }

        return found;
}

int power_supply_get_property(struct power_supply *psy,
                            enum power_supply_property psp,
                            union power_supply_propval *val)
{
        if (atomic_read(&psy->use_cnt) <= 0) {
                if (!psy->initialized)
                        return -EAGAIN;
                return -ENODEV;
        }

        if (psy_has_property(psy->desc, psp))
                return psy->desc->get_property(psy, psp, val);
        else if (power_supply_battery_info_has_prop(psy->battery_info, psp))
                return power_supply_battery_info_get_prop(psy->battery_info, psp, val);
        else
                return -EINVAL;
}
EXPORT_SYMBOL_GPL(power_supply_get_property);

int power_supply_set_property(struct power_supply *psy,
                            enum power_supply_property psp,
                            const union power_supply_propval *val)
{
        if (atomic_read(&psy->use_cnt) <= 0 || !psy->desc->set_property)
                return -ENODEV;

        return psy->desc->set_property(psy, psp, val);
}
EXPORT_SYMBOL_GPL(power_supply_set_property);

int power_supply_property_is_writeable(struct power_supply *psy,
                                        enum power_supply_property psp)
{
        if (atomic_read(&psy->use_cnt) <= 0 ||
                        !psy->desc->property_is_writeable)
                return -ENODEV;

        return psy->desc->property_is_writeable(psy, psp);
}
EXPORT_SYMBOL_GPL(power_supply_property_is_writeable);

void power_supply_external_power_changed(struct power_supply *psy)
{
        if (atomic_read(&psy->use_cnt) <= 0 ||
                        !psy->desc->external_power_changed)
                return;

        psy->desc->external_power_changed(psy);
}
EXPORT_SYMBOL_GPL(power_supply_external_power_changed);

int power_supply_powers(struct power_supply *psy, struct device *dev)
{
        return sysfs_create_link(&psy->dev.kobj, &dev->kobj, "powers");
}
EXPORT_SYMBOL_GPL(power_supply_powers);

static void power_supply_dev_release(struct device *dev)
{
        struct power_supply *psy = to_power_supply(dev);

        dev_dbg(dev, "%s\n", __func__);
        kfree(psy);
}

int power_supply_reg_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&power_supply_notifier, nb);
}
EXPORT_SYMBOL_GPL(power_supply_reg_notifier);

void power_supply_unreg_notifier(struct notifier_block *nb)
{
        blocking_notifier_chain_unregister(&power_supply_notifier, nb);
}
EXPORT_SYMBOL_GPL(power_supply_unreg_notifier);

#ifdef CONFIG_THERMAL
static int power_supply_read_temp(struct thermal_zone_device *tzd,
                int *temp)
{
        struct power_supply *psy;
        union power_supply_propval val;
        int ret;

        WARN_ON(tzd == NULL);
        psy = thermal_zone_device_priv(tzd);
        ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
        if (ret)
                return ret;

        /* Convert tenths of degree Celsius to milli degree Celsius. */
        *temp = val.intval * 100;

        return ret;
}

static struct thermal_zone_device_ops psy_tzd_ops = {
        .get_temp = power_supply_read_temp,
};

static int psy_register_thermal(struct power_supply *psy)
{
        int ret;

        if (psy->desc->no_thermal)
                return 0;

        /* Register battery zone device psy reports temperature */
        if (psy_has_property(psy->desc, POWER_SUPPLY_PROP_TEMP)) {
                /* Prefer our hwmon device and avoid duplicates */
                struct thermal_zone_params tzp = {
                        .no_hwmon = IS_ENABLED(CONFIG_POWER_SUPPLY_HWMON)
                };
                psy->tzd = thermal_tripless_zone_device_register(psy->desc->name,
                                psy, &psy_tzd_ops, &tzp);
                if (IS_ERR(psy->tzd))
                        return PTR_ERR(psy->tzd);
                ret = thermal_zone_device_enable(psy->tzd);
                if (ret)
                        thermal_zone_device_unregister(psy->tzd);
                return ret;
        }

        return 0;
}

static void psy_unregister_thermal(struct power_supply *psy)
{
        if (IS_ERR_OR_NULL(psy->tzd))
                return;
        thermal_zone_device_unregister(psy->tzd);
}

#else
static int psy_register_thermal(struct power_supply *psy)
{
        return 0;
}

static void psy_unregister_thermal(struct power_supply *psy)
{
}
#endif

static struct power_supply *__must_check
__power_supply_register(struct device *parent,
                                   const struct power_supply_desc *desc,
                                   const struct power_supply_config *cfg,
                                   bool ws)
{
        struct device *dev;
        struct power_supply *psy;
        int rc;

        if (!desc || !desc->name || !desc->properties || !desc->num_properties)
                return ERR_PTR(-EINVAL);

        if (!parent)
                pr_warn("%s: Expected proper parent device for '%s'\n",
                        __func__, desc->name);

        if (psy_has_property(desc, POWER_SUPPLY_PROP_USB_TYPE) &&
            (!desc->usb_types || !desc->num_usb_types))
                return ERR_PTR(-EINVAL);

        psy = kzalloc(sizeof(*psy), GFP_KERNEL);
        if (!psy)
                return ERR_PTR(-ENOMEM);

        dev = &psy->dev;

        device_initialize(dev);

        dev->class = &power_supply_class;
        dev->type = &power_supply_dev_type;
        dev->parent = parent;
        dev->release = power_supply_dev_release;
        dev_set_drvdata(dev, psy);
        psy->desc = desc;
        if (cfg) {
                dev->groups = cfg->attr_grp;
                psy->drv_data = cfg->drv_data;
                psy->of_node =
                        cfg->fwnode ? to_of_node(cfg->fwnode) : cfg->of_node;
                dev->of_node = psy->of_node;
                psy->supplied_to = cfg->supplied_to;
                psy->num_supplicants = cfg->num_supplicants;
        }

        rc = dev_set_name(dev, "%s", desc->name);
        if (rc)
                goto dev_set_name_failed;

        INIT_WORK(&psy->changed_work, power_supply_changed_work);
        INIT_DELAYED_WORK(&psy->deferred_register_work,
                          power_supply_deferred_register_work);

        rc = power_supply_check_supplies(psy);
        if (rc) {
                dev_dbg(dev, "Not all required supplies found, defer probe\n");
                goto check_supplies_failed;
        }

        /*
         * Expose constant battery info, if it is available. While there are
         * some chargers accessing constant battery data, we only want to
         * expose battery data to userspace for battery devices.
         */
        if (desc->type == POWER_SUPPLY_TYPE_BATTERY) {
                rc = power_supply_get_battery_info(psy, &psy->battery_info);
                if (rc && rc != -ENODEV && rc != -ENOENT)
                        goto check_supplies_failed;
        }

        spin_lock_init(&psy->changed_lock);
        rc = device_add(dev);
        if (rc)
                goto device_add_failed;

        rc = device_init_wakeup(dev, ws);
        if (rc)
                goto wakeup_init_failed;

        rc = psy_register_thermal(psy);
        if (rc)
                goto register_thermal_failed;

        rc = power_supply_create_triggers(psy);
        if (rc)
                goto create_triggers_failed;

        rc = power_supply_add_hwmon_sysfs(psy);
        if (rc)
                goto add_hwmon_sysfs_failed;

        /*
         * Update use_cnt after any uevents (most notably from device_add()).
         * We are here still during driver's probe but
         * the power_supply_uevent() calls back driver's get_property
         * method so:
         * 1. Driver did not assigned the returned struct power_supply,
         * 2. Driver could not finish initialization (anything in its probe
         *    after calling power_supply_register()).
         */
        atomic_inc(&psy->use_cnt);
        psy->initialized = true;

        queue_delayed_work(system_power_efficient_wq,
                           &psy->deferred_register_work,
                           POWER_SUPPLY_DEFERRED_REGISTER_TIME);

        return psy;

add_hwmon_sysfs_failed:
        power_supply_remove_triggers(psy);
create_triggers_failed:
        psy_unregister_thermal(psy);
register_thermal_failed:
wakeup_init_failed:
        device_del(dev);
device_add_failed:
check_supplies_failed:
dev_set_name_failed:
        put_device(dev);
        return ERR_PTR(rc);
}

/**
 * power_supply_register() - Register new power supply
 * @parent:        Device to be a parent of power supply's device, usually
 *                the device which probe function calls this
 * @desc:        Description of power supply, must be valid through whole
 *                lifetime of this power supply
 * @cfg:        Run-time specific configuration accessed during registering,
 *                may be NULL
 *
 * Return: A pointer to newly allocated power_supply on success
 * or ERR_PTR otherwise.
 * Use power_supply_unregister() on returned power_supply pointer to release
 * resources.
 */
struct power_supply *__must_check power_supply_register(struct device *parent,
                const struct power_supply_desc *desc,
                const struct power_supply_config *cfg)
{
        return __power_supply_register(parent, desc, cfg, true);
}
EXPORT_SYMBOL_GPL(power_supply_register);

/**
 * power_supply_register_no_ws() - Register new non-waking-source power supply
 * @parent:        Device to be a parent of power supply's device, usually
 *                the device which probe function calls this
 * @desc:        Description of power supply, must be valid through whole
 *                lifetime of this power supply
 * @cfg:        Run-time specific configuration accessed during registering,
 *                may be NULL
 *
 * Return: A pointer to newly allocated power_supply on success
 * or ERR_PTR otherwise.
 * Use power_supply_unregister() on returned power_supply pointer to release
 * resources.
 */
struct power_supply *__must_check
power_supply_register_no_ws(struct device *parent,
                const struct power_supply_desc *desc,
                const struct power_supply_config *cfg)
{
        return __power_supply_register(parent, desc, cfg, false);
}
EXPORT_SYMBOL_GPL(power_supply_register_no_ws);

static void devm_power_supply_release(struct device *dev, void *res)
{
        struct power_supply **psy = res;

        power_supply_unregister(*psy);
}

/**
 * devm_power_supply_register() - Register managed power supply
 * @parent:        Device to be a parent of power supply's device, usually
 *                the device which probe function calls this
 * @desc:        Description of power supply, must be valid through whole
 *                lifetime of this power supply
 * @cfg:        Run-time specific configuration accessed during registering,
 *                may be NULL
 *
 * Return: A pointer to newly allocated power_supply on success
 * or ERR_PTR otherwise.
 * The returned power_supply pointer will be automatically unregistered
 * on driver detach.
 */
struct power_supply *__must_check
devm_power_supply_register(struct device *parent,
                const struct power_supply_desc *desc,
                const struct power_supply_config *cfg)
{
        struct power_supply **ptr, *psy;

        ptr = devres_alloc(devm_power_supply_release, sizeof(*ptr), GFP_KERNEL);

        if (!ptr)
                return ERR_PTR(-ENOMEM);
        psy = __power_supply_register(parent, desc, cfg, true);
        if (IS_ERR(psy)) {
                devres_free(ptr);
        } else {
                *ptr = psy;
                devres_add(parent, ptr);
        }
        return psy;
}
EXPORT_SYMBOL_GPL(devm_power_supply_register);

/**
 * devm_power_supply_register_no_ws() - Register managed non-waking-source power supply
 * @parent:        Device to be a parent of power supply's device, usually
 *                the device which probe function calls this
 * @desc:        Description of power supply, must be valid through whole
 *                lifetime of this power supply
 * @cfg:        Run-time specific configuration accessed during registering,
 *                may be NULL
 *
 * Return: A pointer to newly allocated power_supply on success
 * or ERR_PTR otherwise.
 * The returned power_supply pointer will be automatically unregistered
 * on driver detach.
 */
struct power_supply *__must_check
devm_power_supply_register_no_ws(struct device *parent,
                const struct power_supply_desc *desc,
                const struct power_supply_config *cfg)
{
        struct power_supply **ptr, *psy;

        ptr = devres_alloc(devm_power_supply_release, sizeof(*ptr), GFP_KERNEL);

        if (!ptr)
                return ERR_PTR(-ENOMEM);
        psy = __power_supply_register(parent, desc, cfg, false);
        if (IS_ERR(psy)) {
                devres_free(ptr);
        } else {
                *ptr = psy;
                devres_add(parent, ptr);
        }
        return psy;
}
EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws);

/**
 * power_supply_unregister() - Remove this power supply from system
 * @psy:        Pointer to power supply to unregister
 *
 * Remove this power supply from the system. The resources of power supply
 * will be freed here or on last power_supply_put() call.
 */
void power_supply_unregister(struct power_supply *psy)
{
        WARN_ON(atomic_dec_return(&psy->use_cnt));
        psy->removing = true;
        cancel_work_sync(&psy->changed_work);
        cancel_delayed_work_sync(&psy->deferred_register_work);
        sysfs_remove_link(&psy->dev.kobj, "powers");
        power_supply_remove_hwmon_sysfs(psy);
        power_supply_remove_triggers(psy);
        psy_unregister_thermal(psy);
        device_init_wakeup(&psy->dev, false);
        device_unregister(&psy->dev);
}
EXPORT_SYMBOL_GPL(power_supply_unregister);

void *power_supply_get_drvdata(struct power_supply *psy)
{
        return psy->drv_data;
}
EXPORT_SYMBOL_GPL(power_supply_get_drvdata);

static int __init power_supply_class_init(void)
{
        power_supply_init_attrs();
        return class_register(&power_supply_class);
}

static void __exit power_supply_class_exit(void)
{
        class_unregister(&power_supply_class);
}

subsys_initcall(power_supply_class_init);
module_exit(power_supply_class_exit);

MODULE_DESCRIPTION("Universal power supply monitor class");
MODULE_AUTHOR("Ian Molton <spyro@f2s.com>");
MODULE_AUTHOR("Szabolcs Gyurko");
MODULE_AUTHOR("Anton Vorontsov <cbou@mail.ru>");


















































  240 




  249 












  234 
  231 

















  170 
















   88 
   87 




   88 


   88 
   74 





















   87 
   88 
















































  234 


  234 

  194 
  194 


  194 




  234 





  249 




  240 




  234 
  241 

  116 

  229 








  231 









  194 





  193 
  194 




  194 




  194 









  238 















  143 



  135 











   17 













  139 
  109 




















    2 







    2 
    2 


    2 

    2 

    2 
    2 
    2 





    2 

    2 
    2 
    2 













  143 







   89 
   89 


  143 

  143 

  141 
  142 
  142 





  142 

  134 
    3 
  143 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
// SPDX-License-Identifier: GPL-2.0-only
/*
 * klist.c - Routines for manipulating klists.
 *
 * Copyright (C) 2005 Patrick Mochel
 *
 * This klist interface provides a couple of structures that wrap around
 * struct list_head to provide explicit list "head" (struct klist) and list
 * "node" (struct klist_node) objects. For struct klist, a spinlock is
 * included that protects access to the actual list itself. struct
 * klist_node provides a pointer to the klist that owns it and a kref
 * reference count that indicates the number of current users of that node
 * in the list.
 *
 * The entire point is to provide an interface for iterating over a list
 * that is safe and allows for modification of the list during the
 * iteration (e.g. insertion and removal), including modification of the
 * current node on the list.
 *
 * It works using a 3rd object type - struct klist_iter - that is declared
 * and initialized before an iteration. klist_next() is used to acquire the
 * next element in the list. It returns NULL if there are no more items.
 * Internally, that routine takes the klist's lock, decrements the
 * reference count of the previous klist_node and increments the count of
 * the next klist_node. It then drops the lock and returns.
 *
 * There are primitives for adding and removing nodes to/from a klist.
 * When deleting, klist_del() will simply decrement the reference count.
 * Only when the count goes to 0 is the node removed from the list.
 * klist_remove() will try to delete the node from the list and block until
 * it is actually removed. This is useful for objects (like devices) that
 * have been removed from the system and must be freed (but must wait until
 * all accessors have finished).
 */

#include <linux/klist.h>
#include <linux/export.h>
#include <linux/sched.h>

/*
 * Use the lowest bit of n_klist to mark deleted nodes and exclude
 * dead ones from iteration.
 */
#define KNODE_DEAD                1LU
#define KNODE_KLIST_MASK        ~KNODE_DEAD

static struct klist *knode_klist(struct klist_node *knode)
{
        return (struct klist *)
                ((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
}

static bool knode_dead(struct klist_node *knode)
{
        return (unsigned long)knode->n_klist & KNODE_DEAD;
}

static void knode_set_klist(struct klist_node *knode, struct klist *klist)
{
        knode->n_klist = klist;
        /* no knode deserves to start its life dead */
        WARN_ON(knode_dead(knode));
}

static void knode_kill(struct klist_node *knode)
{
        /* and no knode should die twice ever either, see we're very humane */
        WARN_ON(knode_dead(knode));
        *(unsigned long *)&knode->n_klist |= KNODE_DEAD;
}

/**
 * klist_init - Initialize a klist structure.
 * @k: The klist we're initializing.
 * @get: The get function for the embedding object (NULL if none)
 * @put: The put function for the embedding object (NULL if none)
 *
 * Initialises the klist structure.  If the klist_node structures are
 * going to be embedded in refcounted objects (necessary for safe
 * deletion) then the get/put arguments are used to initialise
 * functions that take and release references on the embedding
 * objects.
 */
void klist_init(struct klist *k, void (*get)(struct klist_node *),
                void (*put)(struct klist_node *))
{
        INIT_LIST_HEAD(&k->k_list);
        spin_lock_init(&k->k_lock);
        k->get = get;
        k->put = put;
}
EXPORT_SYMBOL_GPL(klist_init);

static void add_head(struct klist *k, struct klist_node *n)
{
        spin_lock(&k->k_lock);
        list_add(&n->n_node, &k->k_list);
        spin_unlock(&k->k_lock);
}

static void add_tail(struct klist *k, struct klist_node *n)
{
        spin_lock(&k->k_lock);
        list_add_tail(&n->n_node, &k->k_list);
        spin_unlock(&k->k_lock);
}

static void klist_node_init(struct klist *k, struct klist_node *n)
{
        INIT_LIST_HEAD(&n->n_node);
        kref_init(&n->n_ref);
        knode_set_klist(n, k);
        if (k->get)
                k->get(n);
}

/**
 * klist_add_head - Initialize a klist_node and add it to front.
 * @n: node we're adding.
 * @k: klist it's going on.
 */
void klist_add_head(struct klist_node *n, struct klist *k)
{
        klist_node_init(k, n);
        add_head(k, n);
}
EXPORT_SYMBOL_GPL(klist_add_head);

/**
 * klist_add_tail - Initialize a klist_node and add it to back.
 * @n: node we're adding.
 * @k: klist it's going on.
 */
void klist_add_tail(struct klist_node *n, struct klist *k)
{
        klist_node_init(k, n);
        add_tail(k, n);
}
EXPORT_SYMBOL_GPL(klist_add_tail);

/**
 * klist_add_behind - Init a klist_node and add it after an existing node
 * @n: node we're adding.
 * @pos: node to put @n after
 */
void klist_add_behind(struct klist_node *n, struct klist_node *pos)
{
        struct klist *k = knode_klist(pos);

        klist_node_init(k, n);
        spin_lock(&k->k_lock);
        list_add(&n->n_node, &pos->n_node);
        spin_unlock(&k->k_lock);
}
EXPORT_SYMBOL_GPL(klist_add_behind);

/**
 * klist_add_before - Init a klist_node and add it before an existing node
 * @n: node we're adding.
 * @pos: node to put @n after
 */
void klist_add_before(struct klist_node *n, struct klist_node *pos)
{
        struct klist *k = knode_klist(pos);

        klist_node_init(k, n);
        spin_lock(&k->k_lock);
        list_add_tail(&n->n_node, &pos->n_node);
        spin_unlock(&k->k_lock);
}
EXPORT_SYMBOL_GPL(klist_add_before);

struct klist_waiter {
        struct list_head list;
        struct klist_node *node;
        struct task_struct *process;
        int woken;
};

static DEFINE_SPINLOCK(klist_remove_lock);
static LIST_HEAD(klist_remove_waiters);

static void klist_release(struct kref *kref)
{
        struct klist_waiter *waiter, *tmp;
        struct klist_node *n = container_of(kref, struct klist_node, n_ref);

        WARN_ON(!knode_dead(n));
        list_del(&n->n_node);
        spin_lock(&klist_remove_lock);
        list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) {
                if (waiter->node != n)
                        continue;

                list_del(&waiter->list);
                waiter->woken = 1;
                mb();
                wake_up_process(waiter->process);
        }
        spin_unlock(&klist_remove_lock);
        knode_set_klist(n, NULL);
}

static int klist_dec_and_del(struct klist_node *n)
{
        return kref_put(&n->n_ref, klist_release);
}

static void klist_put(struct klist_node *n, bool kill)
{
        struct klist *k = knode_klist(n);
        void (*put)(struct klist_node *) = k->put;

        spin_lock(&k->k_lock);
        if (kill)
                knode_kill(n);
        if (!klist_dec_and_del(n))
                put = NULL;
        spin_unlock(&k->k_lock);
        if (put)
                put(n);
}

/**
 * klist_del - Decrement the reference count of node and try to remove.
 * @n: node we're deleting.
 */
void klist_del(struct klist_node *n)
{
        klist_put(n, true);
}
EXPORT_SYMBOL_GPL(klist_del);

/**
 * klist_remove - Decrement the refcount of node and wait for it to go away.
 * @n: node we're removing.
 */
void klist_remove(struct klist_node *n)
{
        struct klist_waiter waiter;

        waiter.node = n;
        waiter.process = current;
        waiter.woken = 0;
        spin_lock(&klist_remove_lock);
        list_add(&waiter.list, &klist_remove_waiters);
        spin_unlock(&klist_remove_lock);

        klist_del(n);

        for (;;) {
                set_current_state(TASK_UNINTERRUPTIBLE);
                if (waiter.woken)
                        break;
                schedule();
        }
        __set_current_state(TASK_RUNNING);
}
EXPORT_SYMBOL_GPL(klist_remove);

/**
 * klist_node_attached - Say whether a node is bound to a list or not.
 * @n: Node that we're testing.
 */
int klist_node_attached(struct klist_node *n)
{
        return (n->n_klist != NULL);
}
EXPORT_SYMBOL_GPL(klist_node_attached);

/**
 * klist_iter_init_node - Initialize a klist_iter structure.
 * @k: klist we're iterating.
 * @i: klist_iter we're filling.
 * @n: node to start with.
 *
 * Similar to klist_iter_init(), but starts the action off with @n,
 * instead of with the list head.
 */
void klist_iter_init_node(struct klist *k, struct klist_iter *i,
                          struct klist_node *n)
{
        i->i_klist = k;
        i->i_cur = NULL;
        if (n && kref_get_unless_zero(&n->n_ref))
                i->i_cur = n;
}
EXPORT_SYMBOL_GPL(klist_iter_init_node);

/**
 * klist_iter_init - Iniitalize a klist_iter structure.
 * @k: klist we're iterating.
 * @i: klist_iter structure we're filling.
 *
 * Similar to klist_iter_init_node(), but start with the list head.
 */
void klist_iter_init(struct klist *k, struct klist_iter *i)
{
        klist_iter_init_node(k, i, NULL);
}
EXPORT_SYMBOL_GPL(klist_iter_init);

/**
 * klist_iter_exit - Finish a list iteration.
 * @i: Iterator structure.
 *
 * Must be called when done iterating over list, as it decrements the
 * refcount of the current node. Necessary in case iteration exited before
 * the end of the list was reached, and always good form.
 */
void klist_iter_exit(struct klist_iter *i)
{
        if (i->i_cur) {
                klist_put(i->i_cur, false);
                i->i_cur = NULL;
        }
}
EXPORT_SYMBOL_GPL(klist_iter_exit);

static struct klist_node *to_klist_node(struct list_head *n)
{
        return container_of(n, struct klist_node, n_node);
}

/**
 * klist_prev - Ante up prev node in list.
 * @i: Iterator structure.
 *
 * First grab list lock. Decrement the reference count of the previous
 * node, if there was one. Grab the prev node, increment its reference
 * count, drop the lock, and return that prev node.
 */
struct klist_node *klist_prev(struct klist_iter *i)
{
        void (*put)(struct klist_node *) = i->i_klist->put;
        struct klist_node *last = i->i_cur;
        struct klist_node *prev;
        unsigned long flags;

        spin_lock_irqsave(&i->i_klist->k_lock, flags);

        if (last) {
                prev = to_klist_node(last->n_node.prev);
                if (!klist_dec_and_del(last))
                        put = NULL;
        } else
                prev = to_klist_node(i->i_klist->k_list.prev);

        i->i_cur = NULL;
        while (prev != to_klist_node(&i->i_klist->k_list)) {
                if (likely(!knode_dead(prev))) {
                        kref_get(&prev->n_ref);
                        i->i_cur = prev;
                        break;
                }
                prev = to_klist_node(prev->n_node.prev);
        }

        spin_unlock_irqrestore(&i->i_klist->k_lock, flags);

        if (put && last)
                put(last);
        return i->i_cur;
}
EXPORT_SYMBOL_GPL(klist_prev);

/**
 * klist_next - Ante up next node in list.
 * @i: Iterator structure.
 *
 * First grab list lock. Decrement the reference count of the previous
 * node, if there was one. Grab the next node, increment its reference
 * count, drop the lock, and return that next node.
 */
struct klist_node *klist_next(struct klist_iter *i)
{
        void (*put)(struct klist_node *) = i->i_klist->put;
        struct klist_node *last = i->i_cur;
        struct klist_node *next;
        unsigned long flags;

        spin_lock_irqsave(&i->i_klist->k_lock, flags);

        if (last) {
                next = to_klist_node(last->n_node.next);
                if (!klist_dec_and_del(last))
                        put = NULL;
        } else
                next = to_klist_node(i->i_klist->k_list.next);

        i->i_cur = NULL;
        while (next != to_klist_node(&i->i_klist->k_list)) {
                if (likely(!knode_dead(next))) {
                        kref_get(&next->n_ref);
                        i->i_cur = next;
                        break;
                }
                next = to_klist_node(next->n_node.next);
        }

        spin_unlock_irqrestore(&i->i_klist->k_lock, flags);

        if (put && last)
                put(last);
        return i->i_cur;
}
EXPORT_SYMBOL_GPL(klist_next);





































































































    2 




    2 







    2 




    2 














































    2 

    2 














































    2 






























































    2 

    2 







    2 
    2 








    2 







    2 
    2 






    2 






















































































































































































































































































































































































































    2 












    2 
























    2 
    2 
    2 









    2 

    2 



    2 
























    2 







    2 



    2 
    2 
    2 

    2 

















































































































































    2 

    2 
    2 











































































































































































































































































































































































































    2 


    2 

    2 

    2 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 


    2 



    2 

    2 






























































    2 



    2 





    2 



    2 


    2 

    2 
    2 




















    2 



    2 
    2 
    2 









































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  ALSA sequencer Client Manager
 *  Copyright (c) 1998-2001 by Frank van de Pol <fvdpol@coil.demon.nl>
 *                             Jaroslav Kysela <perex@perex.cz>
 *                             Takashi Iwai <tiwai@suse.de>
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <sound/core.h>
#include <sound/minors.h>
#include <linux/kmod.h>

#include <sound/seq_kernel.h>
#include <sound/ump.h>
#include "seq_clientmgr.h"
#include "seq_memory.h"
#include "seq_queue.h"
#include "seq_timer.h"
#include "seq_info.h"
#include "seq_system.h"
#include "seq_ump_convert.h"
#include <sound/seq_device.h>
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
#endif

/* Client Manager

 * this module handles the connections of userland and kernel clients
 * 
 */

/*
 * There are four ranges of client numbers (last two shared):
 * 0..15: global clients
 * 16..127: statically allocated client numbers for cards 0..27
 * 128..191: dynamically allocated client numbers for cards 28..31
 * 128..191: dynamically allocated client numbers for applications
 */

/* number of kernel non-card clients */
#define SNDRV_SEQ_GLOBAL_CLIENTS        16
/* clients per cards, for static clients */
#define SNDRV_SEQ_CLIENTS_PER_CARD        4
/* dynamically allocated client numbers (both kernel drivers and user space) */
#define SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN        128

#define SNDRV_SEQ_LFLG_INPUT        0x0001
#define SNDRV_SEQ_LFLG_OUTPUT        0x0002
#define SNDRV_SEQ_LFLG_OPEN        (SNDRV_SEQ_LFLG_INPUT|SNDRV_SEQ_LFLG_OUTPUT)

static DEFINE_SPINLOCK(clients_lock);
static DEFINE_MUTEX(register_mutex);

/*
 * client table
 */
static char clienttablock[SNDRV_SEQ_MAX_CLIENTS];
static struct snd_seq_client *clienttab[SNDRV_SEQ_MAX_CLIENTS];
static struct snd_seq_usage client_usage;

/*
 * prototypes
 */
static int bounce_error_event(struct snd_seq_client *client,
                              struct snd_seq_event *event,
                              int err, int atomic, int hop);
static int snd_seq_deliver_single_event(struct snd_seq_client *client,
                                        struct snd_seq_event *event,
                                        int filter, int atomic, int hop);

#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
static void free_ump_info(struct snd_seq_client *client);
#endif

/*
 */
static inline unsigned short snd_seq_file_flags(struct file *file)
{
        switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
        case FMODE_WRITE:
                return SNDRV_SEQ_LFLG_OUTPUT;
        case FMODE_READ:
                return SNDRV_SEQ_LFLG_INPUT;
        default:
                return SNDRV_SEQ_LFLG_OPEN;
        }
}

static inline int snd_seq_write_pool_allocated(struct snd_seq_client *client)
{
        return snd_seq_total_cells(client->pool) > 0;
}

/* return pointer to client structure for specified id */
static struct snd_seq_client *clientptr(int clientid)
{
        if (clientid < 0 || clientid >= SNDRV_SEQ_MAX_CLIENTS) {
                pr_debug("ALSA: seq: oops. Trying to get pointer to client %d\n",
                           clientid);
                return NULL;
        }
        return clienttab[clientid];
}

struct snd_seq_client *snd_seq_client_use_ptr(int clientid)
{
        unsigned long flags;
        struct snd_seq_client *client;

        if (clientid < 0 || clientid >= SNDRV_SEQ_MAX_CLIENTS) {
                pr_debug("ALSA: seq: oops. Trying to get pointer to client %d\n",
                           clientid);
                return NULL;
        }
        spin_lock_irqsave(&clients_lock, flags);
        client = clientptr(clientid);
        if (client)
                goto __lock;
        if (clienttablock[clientid]) {
                spin_unlock_irqrestore(&clients_lock, flags);
                return NULL;
        }
        spin_unlock_irqrestore(&clients_lock, flags);
#ifdef CONFIG_MODULES
        if (!in_interrupt()) {
                static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS);
                static DECLARE_BITMAP(card_requested, SNDRV_CARDS);

                if (clientid < SNDRV_SEQ_GLOBAL_CLIENTS) {
                        int idx;
                        
                        if (!test_and_set_bit(clientid, client_requested)) {
                                for (idx = 0; idx < 15; idx++) {
                                        if (seq_client_load[idx] < 0)
                                                break;
                                        if (seq_client_load[idx] == clientid) {
                                                request_module("snd-seq-client-%i",
                                                               clientid);
                                                break;
                                        }
                                }
                        }
                } else if (clientid < SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN) {
                        int card = (clientid - SNDRV_SEQ_GLOBAL_CLIENTS) /
                                SNDRV_SEQ_CLIENTS_PER_CARD;
                        if (card < snd_ecards_limit) {
                                if (!test_and_set_bit(card, card_requested))
                                        snd_request_card(card);
                                snd_seq_device_load_drivers();
                        }
                }
                spin_lock_irqsave(&clients_lock, flags);
                client = clientptr(clientid);
                if (client)
                        goto __lock;
                spin_unlock_irqrestore(&clients_lock, flags);
        }
#endif
        return NULL;

      __lock:
        snd_use_lock_use(&client->use_lock);
        spin_unlock_irqrestore(&clients_lock, flags);
        return client;
}

/* Take refcount and perform ioctl_mutex lock on the given client;
 * used only for OSS sequencer
 * Unlock via snd_seq_client_ioctl_unlock() below
 */
bool snd_seq_client_ioctl_lock(int clientid)
{
        struct snd_seq_client *client;

        client = snd_seq_client_use_ptr(clientid);
        if (!client)
                return false;
        mutex_lock(&client->ioctl_mutex);
        /* The client isn't unrefed here; see snd_seq_client_ioctl_unlock() */
        return true;
}
EXPORT_SYMBOL_GPL(snd_seq_client_ioctl_lock);

/* Unlock and unref the given client; for OSS sequencer use only */
void snd_seq_client_ioctl_unlock(int clientid)
{
        struct snd_seq_client *client;

        client = snd_seq_client_use_ptr(clientid);
        if (WARN_ON(!client))
                return;
        mutex_unlock(&client->ioctl_mutex);
        /* The doubly unrefs below are intentional; the first one releases the
         * leftover from snd_seq_client_ioctl_lock() above, and the second one
         * is for releasing snd_seq_client_use_ptr() in this function
         */
        snd_seq_client_unlock(client);
        snd_seq_client_unlock(client);
}
EXPORT_SYMBOL_GPL(snd_seq_client_ioctl_unlock);

static void usage_alloc(struct snd_seq_usage *res, int num)
{
        res->cur += num;
        if (res->cur > res->peak)
                res->peak = res->cur;
}

static void usage_free(struct snd_seq_usage *res, int num)
{
        res->cur -= num;
}

/* initialise data structures */
int __init client_init_data(void)
{
        /* zap out the client table */
        memset(&clienttablock, 0, sizeof(clienttablock));
        memset(&clienttab, 0, sizeof(clienttab));
        return 0;
}


static struct snd_seq_client *seq_create_client1(int client_index, int poolsize)
{
        int c;
        struct snd_seq_client *client;

        /* init client data */
        client = kzalloc(sizeof(*client), GFP_KERNEL);
        if (client == NULL)
                return NULL;
        client->pool = snd_seq_pool_new(poolsize);
        if (client->pool == NULL) {
                kfree(client);
                return NULL;
        }
        client->type = NO_CLIENT;
        snd_use_lock_init(&client->use_lock);
        rwlock_init(&client->ports_lock);
        mutex_init(&client->ports_mutex);
        INIT_LIST_HEAD(&client->ports_list_head);
        mutex_init(&client->ioctl_mutex);
        client->ump_endpoint_port = -1;

        /* find free slot in the client table */
        spin_lock_irq(&clients_lock);
        if (client_index < 0) {
                for (c = SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN;
                     c < SNDRV_SEQ_MAX_CLIENTS;
                     c++) {
                        if (clienttab[c] || clienttablock[c])
                                continue;
                        clienttab[client->number = c] = client;
                        spin_unlock_irq(&clients_lock);
                        return client;
                }
        } else {
                if (clienttab[client_index] == NULL && !clienttablock[client_index]) {
                        clienttab[client->number = client_index] = client;
                        spin_unlock_irq(&clients_lock);
                        return client;
                }
        }
        spin_unlock_irq(&clients_lock);
        snd_seq_pool_delete(&client->pool);
        kfree(client);
        return NULL;        /* no free slot found or busy, return failure code */
}


static int seq_free_client1(struct snd_seq_client *client)
{
        if (!client)
                return 0;
        spin_lock_irq(&clients_lock);
        clienttablock[client->number] = 1;
        clienttab[client->number] = NULL;
        spin_unlock_irq(&clients_lock);
        snd_seq_delete_all_ports(client);
        snd_seq_queue_client_leave(client->number);
        snd_use_lock_sync(&client->use_lock);
        if (client->pool)
                snd_seq_pool_delete(&client->pool);
        spin_lock_irq(&clients_lock);
        clienttablock[client->number] = 0;
        spin_unlock_irq(&clients_lock);
        return 0;
}


static void seq_free_client(struct snd_seq_client * client)
{
        mutex_lock(&register_mutex);
        switch (client->type) {
        case NO_CLIENT:
                pr_warn("ALSA: seq: Trying to free unused client %d\n",
                        client->number);
                break;
        case USER_CLIENT:
        case KERNEL_CLIENT:
                seq_free_client1(client);
                usage_free(&client_usage, 1);
                break;

        default:
                pr_err("ALSA: seq: Trying to free client %d with undefined type = %d\n",
                           client->number, client->type);
        }
        mutex_unlock(&register_mutex);

        snd_seq_system_client_ev_client_exit(client->number);
}



/* -------------------------------------------------------- */

/* create a user client */
static int snd_seq_open(struct inode *inode, struct file *file)
{
        int c, mode;                        /* client id */
        struct snd_seq_client *client;
        struct snd_seq_user_client *user;
        int err;

        err = stream_open(inode, file);
        if (err < 0)
                return err;

        mutex_lock(&register_mutex);
        client = seq_create_client1(-1, SNDRV_SEQ_DEFAULT_EVENTS);
        if (!client) {
                mutex_unlock(&register_mutex);
                return -ENOMEM;        /* failure code */
        }

        mode = snd_seq_file_flags(file);
        if (mode & SNDRV_SEQ_LFLG_INPUT)
                client->accept_input = 1;
        if (mode & SNDRV_SEQ_LFLG_OUTPUT)
                client->accept_output = 1;

        user = &client->data.user;
        user->fifo = NULL;
        user->fifo_pool_size = 0;

        if (mode & SNDRV_SEQ_LFLG_INPUT) {
                user->fifo_pool_size = SNDRV_SEQ_DEFAULT_CLIENT_EVENTS;
                user->fifo = snd_seq_fifo_new(user->fifo_pool_size);
                if (user->fifo == NULL) {
                        seq_free_client1(client);
                        kfree(client);
                        mutex_unlock(&register_mutex);
                        return -ENOMEM;
                }
        }

        usage_alloc(&client_usage, 1);
        client->type = USER_CLIENT;
        mutex_unlock(&register_mutex);

        c = client->number;
        file->private_data = client;

        /* fill client data */
        user->file = file;
        sprintf(client->name, "Client-%d", c);
        client->data.user.owner = get_pid(task_pid(current));

        /* make others aware this new client */
        snd_seq_system_client_ev_client_start(c);

        return 0;
}

/* delete a user client */
static int snd_seq_release(struct inode *inode, struct file *file)
{
        struct snd_seq_client *client = file->private_data;

        if (client) {
                seq_free_client(client);
                if (client->data.user.fifo)
                        snd_seq_fifo_delete(&client->data.user.fifo);
#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
                free_ump_info(client);
#endif
                put_pid(client->data.user.owner);
                kfree(client);
        }

        return 0;
}

static bool event_is_compatible(const struct snd_seq_client *client,
                                const struct snd_seq_event *ev)
{
        if (snd_seq_ev_is_ump(ev) && !client->midi_version)
                return false;
        if (snd_seq_ev_is_ump(ev) && snd_seq_ev_is_variable(ev))
                return false;
        return true;
}

/* handle client read() */
/* possible error values:
 *        -ENXIO        invalid client or file open mode
 *        -ENOSPC        FIFO overflow (the flag is cleared after this error report)
 *        -EINVAL        no enough user-space buffer to write the whole event
 *        -EFAULT        seg. fault during copy to user space
 */
static ssize_t snd_seq_read(struct file *file, char __user *buf, size_t count,
                            loff_t *offset)
{
        struct snd_seq_client *client = file->private_data;
        struct snd_seq_fifo *fifo;
        size_t aligned_size;
        int err;
        long result = 0;
        struct snd_seq_event_cell *cell;

        if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_INPUT))
                return -ENXIO;

        if (!access_ok(buf, count))
                return -EFAULT;

        /* check client structures are in place */
        if (snd_BUG_ON(!client))
                return -ENXIO;

        if (!client->accept_input)
                return -ENXIO;
        fifo = client->data.user.fifo;
        if (!fifo)
                return -ENXIO;

        if (atomic_read(&fifo->overflow) > 0) {
                /* buffer overflow is detected */
                snd_seq_fifo_clear(fifo);
                /* return error code */
                return -ENOSPC;
        }

        cell = NULL;
        err = 0;
        snd_seq_fifo_lock(fifo);

        if (IS_ENABLED(CONFIG_SND_SEQ_UMP) && client->midi_version > 0)
                aligned_size = sizeof(struct snd_seq_ump_event);
        else
                aligned_size = sizeof(struct snd_seq_event);

        /* while data available in queue */
        while (count >= aligned_size) {
                int nonblock;

                nonblock = (file->f_flags & O_NONBLOCK) || result > 0;
                err = snd_seq_fifo_cell_out(fifo, &cell, nonblock);
                if (err < 0)
                        break;
                if (!event_is_compatible(client, &cell->event)) {
                        snd_seq_cell_free(cell);
                        cell = NULL;
                        continue;
                }
                if (snd_seq_ev_is_variable(&cell->event)) {
                        struct snd_seq_ump_event tmpev;

                        memcpy(&tmpev, &cell->event, aligned_size);
                        tmpev.data.ext.len &= ~SNDRV_SEQ_EXT_MASK;
                        if (copy_to_user(buf, &tmpev, aligned_size)) {
                                err = -EFAULT;
                                break;
                        }
                        count -= aligned_size;
                        buf += aligned_size;
                        err = snd_seq_expand_var_event(&cell->event, count,
                                                       (char __force *)buf, 0,
                                                       aligned_size);
                        if (err < 0)
                                break;
                        result += err;
                        count -= err;
                        buf += err;
                } else {
                        if (copy_to_user(buf, &cell->event, aligned_size)) {
                                err = -EFAULT;
                                break;
                        }
                        count -= aligned_size;
                        buf += aligned_size;
                }
                snd_seq_cell_free(cell);
                cell = NULL; /* to be sure */
                result += aligned_size;
        }

        if (err < 0) {
                if (cell)
                        snd_seq_fifo_cell_putback(fifo, cell);
                if (err == -EAGAIN && result > 0)
                        err = 0;
        }
        snd_seq_fifo_unlock(fifo);

        return (err < 0) ? err : result;
}


/*
 * check access permission to the port
 */
static int check_port_perm(struct snd_seq_client_port *port, unsigned int flags)
{
        if ((port->capability & flags) != flags)
                return 0;
        return flags;
}

/*
 * check if the destination client is available, and return the pointer
 * if filter is non-zero, client filter bitmap is tested.
 */
static struct snd_seq_client *get_event_dest_client(struct snd_seq_event *event,
                                                    int filter)
{
        struct snd_seq_client *dest;

        dest = snd_seq_client_use_ptr(event->dest.client);
        if (dest == NULL)
                return NULL;
        if (! dest->accept_input)
                goto __not_avail;
        if ((dest->filter & SNDRV_SEQ_FILTER_USE_EVENT) &&
            ! test_bit(event->type, dest->event_filter))
                goto __not_avail;
        if (filter && !(dest->filter & filter))
                goto __not_avail;

        return dest; /* ok - accessible */
__not_avail:
        snd_seq_client_unlock(dest);
        return NULL;
}


/*
 * Return the error event.
 *
 * If the receiver client is a user client, the original event is
 * encapsulated in SNDRV_SEQ_EVENT_BOUNCE as variable length event.  If
 * the original event is also variable length, the external data is
 * copied after the event record. 
 * If the receiver client is a kernel client, the original event is
 * quoted in SNDRV_SEQ_EVENT_KERNEL_ERROR, since this requires no extra
 * kmalloc.
 */
static int bounce_error_event(struct snd_seq_client *client,
                              struct snd_seq_event *event,
                              int err, int atomic, int hop)
{
        struct snd_seq_event bounce_ev;
        int result;

        if (client == NULL ||
            ! (client->filter & SNDRV_SEQ_FILTER_BOUNCE) ||
            ! client->accept_input)
                return 0; /* ignored */

        /* set up quoted error */
        memset(&bounce_ev, 0, sizeof(bounce_ev));
        bounce_ev.type = SNDRV_SEQ_EVENT_KERNEL_ERROR;
        bounce_ev.flags = SNDRV_SEQ_EVENT_LENGTH_FIXED;
        bounce_ev.queue = SNDRV_SEQ_QUEUE_DIRECT;
        bounce_ev.source.client = SNDRV_SEQ_CLIENT_SYSTEM;
        bounce_ev.source.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE;
        bounce_ev.dest.client = client->number;
        bounce_ev.dest.port = event->source.port;
        bounce_ev.data.quote.origin = event->dest;
        bounce_ev.data.quote.event = event;
        bounce_ev.data.quote.value = -err; /* use positive value */
        result = snd_seq_deliver_single_event(NULL, &bounce_ev, 0, atomic, hop + 1);
        if (result < 0) {
                client->event_lost++;
                return result;
        }

        return result;
}


/*
 * rewrite the time-stamp of the event record with the curren time
 * of the given queue.
 * return non-zero if updated.
 */
static int update_timestamp_of_queue(struct snd_seq_event *event,
                                     int queue, int real_time)
{
        struct snd_seq_queue *q;

        q = queueptr(queue);
        if (! q)
                return 0;
        event->queue = queue;
        event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK;
        if (real_time) {
                event->time.time = snd_seq_timer_get_cur_time(q->timer, true);
                event->flags |= SNDRV_SEQ_TIME_STAMP_REAL;
        } else {
                event->time.tick = snd_seq_timer_get_cur_tick(q->timer);
                event->flags |= SNDRV_SEQ_TIME_STAMP_TICK;
        }
        queuefree(q);
        return 1;
}

/* deliver a single event; called from below and UMP converter */
int __snd_seq_deliver_single_event(struct snd_seq_client *dest,
                                   struct snd_seq_client_port *dest_port,
                                   struct snd_seq_event *event,
                                   int atomic, int hop)
{
        switch (dest->type) {
        case USER_CLIENT:
                if (!dest->data.user.fifo)
                        return 0;
                return snd_seq_fifo_event_in(dest->data.user.fifo, event);
        case KERNEL_CLIENT:
                if (!dest_port->event_input)
                        return 0;
                return dest_port->event_input(event,
                                              snd_seq_ev_is_direct(event),
                                              dest_port->private_data,
                                              atomic, hop);
        }
        return 0;
}

/*
 * deliver an event to the specified destination.
 * if filter is non-zero, client filter bitmap is tested.
 *
 *  RETURN VALUE: 0 : if succeeded
 *                 <0 : error
 */
static int snd_seq_deliver_single_event(struct snd_seq_client *client,
                                        struct snd_seq_event *event,
                                        int filter, int atomic, int hop)
{
        struct snd_seq_client *dest = NULL;
        struct snd_seq_client_port *dest_port = NULL;
        int result = -ENOENT;
        int direct;

        direct = snd_seq_ev_is_direct(event);

        dest = get_event_dest_client(event, filter);
        if (dest == NULL)
                goto __skip;
        dest_port = snd_seq_port_use_ptr(dest, event->dest.port);
        if (dest_port == NULL)
                goto __skip;

        /* check permission */
        if (! check_port_perm(dest_port, SNDRV_SEQ_PORT_CAP_WRITE)) {
                result = -EPERM;
                goto __skip;
        }
                
        if (dest_port->timestamping)
                update_timestamp_of_queue(event, dest_port->time_queue,
                                          dest_port->time_real);

#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
        if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) {
                if (snd_seq_ev_is_ump(event)) {
                        result = snd_seq_deliver_from_ump(client, dest, dest_port,
                                                          event, atomic, hop);
                        goto __skip;
                } else if (snd_seq_client_is_ump(dest)) {
                        result = snd_seq_deliver_to_ump(client, dest, dest_port,
                                                        event, atomic, hop);
                        goto __skip;
                }
        }
#endif /* CONFIG_SND_SEQ_UMP */

        result = __snd_seq_deliver_single_event(dest, dest_port, event,
                                                atomic, hop);

  __skip:
        if (dest_port)
                snd_seq_port_unlock(dest_port);
        if (dest)
                snd_seq_client_unlock(dest);

        if (result < 0 && !direct) {
                result = bounce_error_event(client, event, result, atomic, hop);
        }
        return result;
}


/*
 * send the event to all subscribers:
 */
static int __deliver_to_subscribers(struct snd_seq_client *client,
                                    struct snd_seq_event *event,
                                    struct snd_seq_client_port *src_port,
                                    int atomic, int hop)
{
        struct snd_seq_subscribers *subs;
        int err, result = 0, num_ev = 0;
        union __snd_seq_event event_saved;
        size_t saved_size;
        struct snd_seq_port_subs_info *grp;

        /* save original event record */
        saved_size = snd_seq_event_packet_size(event);
        memcpy(&event_saved, event, saved_size);
        grp = &src_port->c_src;
        
        /* lock list */
        if (atomic)
                read_lock(&grp->list_lock);
        else
                down_read_nested(&grp->list_mutex, hop);
        list_for_each_entry(subs, &grp->list_head, src_list) {
                /* both ports ready? */
                if (atomic_read(&subs->ref_count) != 2)
                        continue;
                event->dest = subs->info.dest;
                if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
                        /* convert time according to flag with subscription */
                        update_timestamp_of_queue(event, subs->info.queue,
                                                  subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL);
                err = snd_seq_deliver_single_event(client, event,
                                                   0, atomic, hop);
                if (err < 0) {
                        /* save first error that occurs and continue */
                        if (!result)
                                result = err;
                        continue;
                }
                num_ev++;
                /* restore original event record */
                memcpy(event, &event_saved, saved_size);
        }
        if (atomic)
                read_unlock(&grp->list_lock);
        else
                up_read(&grp->list_mutex);
        memcpy(event, &event_saved, saved_size);
        return (result < 0) ? result : num_ev;
}

static int deliver_to_subscribers(struct snd_seq_client *client,
                                  struct snd_seq_event *event,
                                  int atomic, int hop)
{
        struct snd_seq_client_port *src_port;
        int ret = 0, ret2;

        src_port = snd_seq_port_use_ptr(client, event->source.port);
        if (src_port) {
                ret = __deliver_to_subscribers(client, event, src_port, atomic, hop);
                snd_seq_port_unlock(src_port);
        }

        if (client->ump_endpoint_port < 0 ||
            event->source.port == client->ump_endpoint_port)
                return ret;

        src_port = snd_seq_port_use_ptr(client, client->ump_endpoint_port);
        if (!src_port)
                return ret;
        ret2 = __deliver_to_subscribers(client, event, src_port, atomic, hop);
        snd_seq_port_unlock(src_port);
        return ret2 < 0 ? ret2 : ret;
}

/* deliver an event to the destination port(s).
 * if the event is to subscribers or broadcast, the event is dispatched
 * to multiple targets.
 *
 * RETURN VALUE: n > 0  : the number of delivered events.
 *               n == 0 : the event was not passed to any client.
 *               n < 0  : error - event was not processed.
 */
static int snd_seq_deliver_event(struct snd_seq_client *client, struct snd_seq_event *event,
                                 int atomic, int hop)
{
        int result;

        hop++;
        if (hop >= SNDRV_SEQ_MAX_HOPS) {
                pr_debug("ALSA: seq: too long delivery path (%d:%d->%d:%d)\n",
                           event->source.client, event->source.port,
                           event->dest.client, event->dest.port);
                return -EMLINK;
        }

        if (snd_seq_ev_is_variable(event) &&
            snd_BUG_ON(atomic && (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR)))
                return -EINVAL;

        if (event->queue == SNDRV_SEQ_ADDRESS_SUBSCRIBERS ||
            event->dest.client == SNDRV_SEQ_ADDRESS_SUBSCRIBERS)
                result = deliver_to_subscribers(client, event, atomic, hop);
        else
                result = snd_seq_deliver_single_event(client, event, 0, atomic, hop);

        return result;
}

/*
 * dispatch an event cell:
 * This function is called only from queue check routines in timer
 * interrupts or after enqueued.
 * The event cell shall be released or re-queued in this function.
 *
 * RETURN VALUE: n > 0  : the number of delivered events.
 *                 n == 0 : the event was not passed to any client.
 *                 n < 0  : error - event was not processed.
 */
int snd_seq_dispatch_event(struct snd_seq_event_cell *cell, int atomic, int hop)
{
        struct snd_seq_client *client;
        int result;

        if (snd_BUG_ON(!cell))
                return -EINVAL;

        client = snd_seq_client_use_ptr(cell->event.source.client);
        if (client == NULL) {
                snd_seq_cell_free(cell); /* release this cell */
                return -EINVAL;
        }

        if (!snd_seq_ev_is_ump(&cell->event) &&
            cell->event.type == SNDRV_SEQ_EVENT_NOTE) {
                /* NOTE event:
                 * the event cell is re-used as a NOTE-OFF event and
                 * enqueued again.
                 */
                struct snd_seq_event tmpev, *ev;

                /* reserve this event to enqueue note-off later */
                tmpev = cell->event;
                tmpev.type = SNDRV_SEQ_EVENT_NOTEON;
                result = snd_seq_deliver_event(client, &tmpev, atomic, hop);

                /*
                 * This was originally a note event.  We now re-use the
                 * cell for the note-off event.
                 */

                ev = &cell->event;
                ev->type = SNDRV_SEQ_EVENT_NOTEOFF;
                ev->flags |= SNDRV_SEQ_PRIORITY_HIGH;

                /* add the duration time */
                switch (ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) {
                case SNDRV_SEQ_TIME_STAMP_TICK:
                        cell->event.time.tick += ev->data.note.duration;
                        break;
                case SNDRV_SEQ_TIME_STAMP_REAL:
                        /* unit for duration is ms */
                        ev->time.time.tv_nsec += 1000000 * (ev->data.note.duration % 1000);
                        ev->time.time.tv_sec += ev->data.note.duration / 1000 +
                                                ev->time.time.tv_nsec / 1000000000;
                        ev->time.time.tv_nsec %= 1000000000;
                        break;
                }
                ev->data.note.velocity = ev->data.note.off_velocity;

                /* Now queue this cell as the note off event */
                if (snd_seq_enqueue_event(cell, atomic, hop) < 0)
                        snd_seq_cell_free(cell); /* release this cell */

        } else {
                /* Normal events:
                 * event cell is freed after processing the event
                 */

                result = snd_seq_deliver_event(client, &cell->event, atomic, hop);
                snd_seq_cell_free(cell);
        }

        snd_seq_client_unlock(client);
        return result;
}


/* Allocate a cell from client pool and enqueue it to queue:
 * if pool is empty and blocking is TRUE, sleep until a new cell is
 * available.
 */
static int snd_seq_client_enqueue_event(struct snd_seq_client *client,
                                        struct snd_seq_event *event,
                                        struct file *file, int blocking,
                                        int atomic, int hop,
                                        struct mutex *mutexp)
{
        struct snd_seq_event_cell *cell;
        int err;

        /* special queue values - force direct passing */
        if (event->queue == SNDRV_SEQ_ADDRESS_SUBSCRIBERS) {
                event->dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;
                event->queue = SNDRV_SEQ_QUEUE_DIRECT;
        } else if (event->dest.client == SNDRV_SEQ_ADDRESS_SUBSCRIBERS) {
                /* check presence of source port */
                struct snd_seq_client_port *src_port = snd_seq_port_use_ptr(client, event->source.port);
                if (src_port == NULL)
                        return -EINVAL;
                snd_seq_port_unlock(src_port);
        }

        /* direct event processing without enqueued */
        if (snd_seq_ev_is_direct(event)) {
                if (!snd_seq_ev_is_ump(event) &&
                    event->type == SNDRV_SEQ_EVENT_NOTE)
                        return -EINVAL; /* this event must be enqueued! */
                return snd_seq_deliver_event(client, event, atomic, hop);
        }

        /* Not direct, normal queuing */
        if (snd_seq_queue_is_used(event->queue, client->number) <= 0)
                return -EINVAL;  /* invalid queue */
        if (! snd_seq_write_pool_allocated(client))
                return -ENXIO; /* queue is not allocated */

        /* allocate an event cell */
        err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic,
                                file, mutexp);
        if (err < 0)
                return err;

        /* we got a cell. enqueue it. */
        err = snd_seq_enqueue_event(cell, atomic, hop);
        if (err < 0) {
                snd_seq_cell_free(cell);
                return err;
        }

        return 0;
}


/*
 * check validity of event type and data length.
 * return non-zero if invalid.
 */
static int check_event_type_and_length(struct snd_seq_event *ev)
{
        switch (snd_seq_ev_length_type(ev)) {
        case SNDRV_SEQ_EVENT_LENGTH_FIXED:
                if (snd_seq_ev_is_variable_type(ev))
                        return -EINVAL;
                break;
        case SNDRV_SEQ_EVENT_LENGTH_VARIABLE:
                if (! snd_seq_ev_is_variable_type(ev) ||
                    (ev->data.ext.len & ~SNDRV_SEQ_EXT_MASK) >= SNDRV_SEQ_MAX_EVENT_LEN)
                        return -EINVAL;
                break;
        case SNDRV_SEQ_EVENT_LENGTH_VARUSR:
                if (! snd_seq_ev_is_direct(ev))
                        return -EINVAL;
                break;
        }
        return 0;
}


/* handle write() */
/* possible error values:
 *        -ENXIO        invalid client or file open mode
 *        -ENOMEM        malloc failed
 *        -EFAULT        seg. fault during copy from user space
 *        -EINVAL        invalid event
 *        -EAGAIN        no space in output pool
 *        -EINTR        interrupts while sleep
 *        -EMLINK        too many hops
 *        others        depends on return value from driver callback
 */
static ssize_t snd_seq_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *offset)
{
        struct snd_seq_client *client = file->private_data;
        int written = 0, len;
        int err, handled;
        union __snd_seq_event __event;
        struct snd_seq_event *ev = &__event.legacy;

        if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT))
                return -ENXIO;

        /* check client structures are in place */
        if (snd_BUG_ON(!client))
                return -ENXIO;
                
        if (!client->accept_output || client->pool == NULL)
                return -ENXIO;

 repeat:
        handled = 0;
        /* allocate the pool now if the pool is not allocated yet */ 
        mutex_lock(&client->ioctl_mutex);
        if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) {
                err = snd_seq_pool_init(client->pool);
                if (err < 0)
                        goto out;
        }

        /* only process whole events */
        err = -EINVAL;
        while (count >= sizeof(struct snd_seq_event)) {
                /* Read in the event header from the user */
                len = sizeof(struct snd_seq_event);
                if (copy_from_user(ev, buf, len)) {
                        err = -EFAULT;
                        break;
                }
                /* read in the rest bytes for UMP events */
                if (snd_seq_ev_is_ump(ev)) {
                        if (count < sizeof(struct snd_seq_ump_event))
                                break;
                        if (copy_from_user((char *)ev + len, buf + len,
                                           sizeof(struct snd_seq_ump_event) - len)) {
                                err = -EFAULT;
                                break;
                        }
                        len = sizeof(struct snd_seq_ump_event);
                }

                ev->source.client = client->number;        /* fill in client number */
                /* Check for extension data length */
                if (check_event_type_and_length(ev)) {
                        err = -EINVAL;
                        break;
                }

                if (!event_is_compatible(client, ev)) {
                        err = -EINVAL;
                        break;
                }

                /* check for special events */
                if (!snd_seq_ev_is_ump(ev)) {
                        if (ev->type == SNDRV_SEQ_EVENT_NONE)
                                goto __skip_event;
                        else if (snd_seq_ev_is_reserved(ev)) {
                                err = -EINVAL;
                                break;
                        }
                }

                if (snd_seq_ev_is_variable(ev)) {
                        int extlen = ev->data.ext.len & ~SNDRV_SEQ_EXT_MASK;
                        if ((size_t)(extlen + len) > count) {
                                /* back out, will get an error this time or next */
                                err = -EINVAL;
                                break;
                        }
                        /* set user space pointer */
                        ev->data.ext.len = extlen | SNDRV_SEQ_EXT_USRPTR;
                        ev->data.ext.ptr = (char __force *)buf + len;
                        len += extlen; /* increment data length */
                } else {
#ifdef CONFIG_COMPAT
                        if (client->convert32 && snd_seq_ev_is_varusr(ev))
                                ev->data.ext.ptr =
                                        (void __force *)compat_ptr(ev->data.raw32.d[1]);
#endif
                }

                /* ok, enqueue it */
                err = snd_seq_client_enqueue_event(client, ev, file,
                                                   !(file->f_flags & O_NONBLOCK),
                                                   0, 0, &client->ioctl_mutex);
                if (err < 0)
                        break;
                handled++;

        __skip_event:
                /* Update pointers and counts */
                count -= len;
                buf += len;
                written += len;

                /* let's have a coffee break if too many events are queued */
                if (++handled >= 200) {
                        mutex_unlock(&client->ioctl_mutex);
                        goto repeat;
                }
        }

 out:
        mutex_unlock(&client->ioctl_mutex);
        return written ? written : err;
}


/*
 * handle polling
 */
static __poll_t snd_seq_poll(struct file *file, poll_table * wait)
{
        struct snd_seq_client *client = file->private_data;
        __poll_t mask = 0;

        /* check client structures are in place */
        if (snd_BUG_ON(!client))
                return EPOLLERR;

        if ((snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_INPUT) &&
            client->data.user.fifo) {

                /* check if data is available in the outqueue */
                if (snd_seq_fifo_poll_wait(client->data.user.fifo, file, wait))
                        mask |= EPOLLIN | EPOLLRDNORM;
        }

        if (snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT) {

                /* check if data is available in the pool */
                if (!snd_seq_write_pool_allocated(client) ||
                    snd_seq_pool_poll_wait(client->pool, file, wait))
                        mask |= EPOLLOUT | EPOLLWRNORM;
        }

        return mask;
}


/*-----------------------------------------------------*/

static int snd_seq_ioctl_pversion(struct snd_seq_client *client, void *arg)
{
        int *pversion = arg;

        *pversion = SNDRV_SEQ_VERSION;
        return 0;
}

static int snd_seq_ioctl_user_pversion(struct snd_seq_client *client, void *arg)
{
        client->user_pversion = *(unsigned int *)arg;
        return 0;
}

static int snd_seq_ioctl_client_id(struct snd_seq_client *client, void *arg)
{
        int *client_id = arg;

        *client_id = client->number;
        return 0;
}

/* SYSTEM_INFO ioctl() */
static int snd_seq_ioctl_system_info(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_system_info *info = arg;

        memset(info, 0, sizeof(*info));
        /* fill the info fields */
        info->queues = SNDRV_SEQ_MAX_QUEUES;
        info->clients = SNDRV_SEQ_MAX_CLIENTS;
        info->ports = SNDRV_SEQ_MAX_PORTS;
        info->channels = 256;        /* fixed limit */
        info->cur_clients = client_usage.cur;
        info->cur_queues = snd_seq_queue_get_cur_queues();

        return 0;
}


/* RUNNING_MODE ioctl() */
static int snd_seq_ioctl_running_mode(struct snd_seq_client *client, void  *arg)
{
        struct snd_seq_running_info *info = arg;
        struct snd_seq_client *cptr;
        int err = 0;

        /* requested client number */
        cptr = snd_seq_client_use_ptr(info->client);
        if (cptr == NULL)
                return -ENOENT;                /* don't change !!! */

#ifdef SNDRV_BIG_ENDIAN
        if (!info->big_endian) {
                err = -EINVAL;
                goto __err;
        }
#else
        if (info->big_endian) {
                err = -EINVAL;
                goto __err;
        }

#endif
        if (info->cpu_mode > sizeof(long)) {
                err = -EINVAL;
                goto __err;
        }
        cptr->convert32 = (info->cpu_mode < sizeof(long));
 __err:
        snd_seq_client_unlock(cptr);
        return err;
}

/* CLIENT_INFO ioctl() */
static void get_client_info(struct snd_seq_client *cptr,
                            struct snd_seq_client_info *info)
{
        info->client = cptr->number;

        /* fill the info fields */
        info->type = cptr->type;
        strcpy(info->name, cptr->name);
        info->filter = cptr->filter;
        info->event_lost = cptr->event_lost;
        memcpy(info->event_filter, cptr->event_filter, 32);
        info->group_filter = cptr->group_filter;
        info->num_ports = cptr->num_ports;

        if (cptr->type == USER_CLIENT)
                info->pid = pid_vnr(cptr->data.user.owner);
        else
                info->pid = -1;

        if (cptr->type == KERNEL_CLIENT)
                info->card = cptr->data.kernel.card ? cptr->data.kernel.card->number : -1;
        else
                info->card = -1;

        info->midi_version = cptr->midi_version;
        memset(info->reserved, 0, sizeof(info->reserved));
}

static int snd_seq_ioctl_get_client_info(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_client_info *client_info = arg;
        struct snd_seq_client *cptr;

        /* requested client number */
        cptr = snd_seq_client_use_ptr(client_info->client);
        if (cptr == NULL)
                return -ENOENT;                /* don't change !!! */

        get_client_info(cptr, client_info);
        snd_seq_client_unlock(cptr);

        return 0;
}


/* CLIENT_INFO ioctl() */
static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_client_info *client_info = arg;

        /* it is not allowed to set the info fields for an another client */
        if (client->number != client_info->client)
                return -EPERM;
        /* also client type must be set now */
        if (client->type != client_info->type)
                return -EINVAL;

        /* check validity of midi_version field */
        if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3) &&
            client_info->midi_version > SNDRV_SEQ_CLIENT_UMP_MIDI_2_0)
                return -EINVAL;

        /* fill the info fields */
        if (client_info->name[0])
                strscpy(client->name, client_info->name, sizeof(client->name));

        client->filter = client_info->filter;
        client->event_lost = client_info->event_lost;
        if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3))
                client->midi_version = client_info->midi_version;
        memcpy(client->event_filter, client_info->event_filter, 32);
        client->group_filter = client_info->group_filter;
        return 0;
}


/* 
 * CREATE PORT ioctl() 
 */
static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_port_info *info = arg;
        struct snd_seq_client_port *port;
        struct snd_seq_port_callback *callback;
        int port_idx, err;

        /* it is not allowed to create the port for an another client */
        if (info->addr.client != client->number)
                return -EPERM;
        if (client->type == USER_CLIENT && info->kernel)
                return -EINVAL;
        if ((info->capability & SNDRV_SEQ_PORT_CAP_UMP_ENDPOINT) &&
            client->ump_endpoint_port >= 0)
                return -EBUSY;

        if (info->flags & SNDRV_SEQ_PORT_FLG_GIVEN_PORT)
                port_idx = info->addr.port;
        else
                port_idx = -1;
        if (port_idx >= SNDRV_SEQ_ADDRESS_UNKNOWN)
                return -EINVAL;
        err = snd_seq_create_port(client, port_idx, &port);
        if (err < 0)
                return err;

        if (client->type == KERNEL_CLIENT) {
                callback = info->kernel;
                if (callback) {
                        if (callback->owner)
                                port->owner = callback->owner;
                        port->private_data = callback->private_data;
                        port->private_free = callback->private_free;
                        port->event_input = callback->event_input;
                        port->c_src.open = callback->subscribe;
                        port->c_src.close = callback->unsubscribe;
                        port->c_dest.open = callback->use;
                        port->c_dest.close = callback->unuse;
                }
        }

        info->addr = port->addr;

        snd_seq_set_port_info(port, info);
        if (info->capability & SNDRV_SEQ_PORT_CAP_UMP_ENDPOINT)
                client->ump_endpoint_port = port->addr.port;
        snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port);
        snd_seq_port_unlock(port);

        return 0;
}

/* 
 * DELETE PORT ioctl() 
 */
static int snd_seq_ioctl_delete_port(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_port_info *info = arg;
        int err;

        /* it is not allowed to remove the port for an another client */
        if (info->addr.client != client->number)
                return -EPERM;

        err = snd_seq_delete_port(client, info->addr.port);
        if (err >= 0) {
                if (client->ump_endpoint_port == info->addr.port)
                        client->ump_endpoint_port = -1;
                snd_seq_system_client_ev_port_exit(client->number, info->addr.port);
        }
        return err;
}


/* 
 * GET_PORT_INFO ioctl() (on any client) 
 */
static int snd_seq_ioctl_get_port_info(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_port_info *info = arg;
        struct snd_seq_client *cptr;
        struct snd_seq_client_port *port;

        cptr = snd_seq_client_use_ptr(info->addr.client);
        if (cptr == NULL)
                return -ENXIO;

        port = snd_seq_port_use_ptr(cptr, info->addr.port);
        if (port == NULL) {
                snd_seq_client_unlock(cptr);
                return -ENOENT;                        /* don't change */
        }

        /* get port info */
        snd_seq_get_port_info(port, info);
        snd_seq_port_unlock(port);
        snd_seq_client_unlock(cptr);

        return 0;
}


/* 
 * SET_PORT_INFO ioctl() (only ports on this/own client) 
 */
static int snd_seq_ioctl_set_port_info(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_port_info *info = arg;
        struct snd_seq_client_port *port;

        if (info->addr.client != client->number) /* only set our own ports ! */
                return -EPERM;
        port = snd_seq_port_use_ptr(client, info->addr.port);
        if (port) {
                snd_seq_set_port_info(port, info);
                snd_seq_port_unlock(port);
        }
        return 0;
}


/*
 * port subscription (connection)
 */
#define PERM_RD                (SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ)
#define PERM_WR                (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_SUBS_WRITE)

static int check_subscription_permission(struct snd_seq_client *client,
                                         struct snd_seq_client_port *sport,
                                         struct snd_seq_client_port *dport,
                                         struct snd_seq_port_subscribe *subs)
{
        if (client->number != subs->sender.client &&
            client->number != subs->dest.client) {
                /* connection by third client - check export permission */
                if (check_port_perm(sport, SNDRV_SEQ_PORT_CAP_NO_EXPORT))
                        return -EPERM;
                if (check_port_perm(dport, SNDRV_SEQ_PORT_CAP_NO_EXPORT))
                        return -EPERM;
        }

        /* check read permission */
        /* if sender or receiver is the subscribing client itself,
         * no permission check is necessary
         */
        if (client->number != subs->sender.client) {
                if (! check_port_perm(sport, PERM_RD))
                        return -EPERM;
        }
        /* check write permission */
        if (client->number != subs->dest.client) {
                if (! check_port_perm(dport, PERM_WR))
                        return -EPERM;
        }
        return 0;
}

/*
 * send an subscription notify event to user client:
 * client must be user client.
 */
int snd_seq_client_notify_subscription(int client, int port,
                                       struct snd_seq_port_subscribe *info,
                                       int evtype)
{
        struct snd_seq_event event;

        memset(&event, 0, sizeof(event));
        event.type = evtype;
        event.data.connect.dest = info->dest;
        event.data.connect.sender = info->sender;

        return snd_seq_system_notify(client, port, &event);  /* non-atomic */
}


/* 
 * add to port's subscription list IOCTL interface 
 */
static int snd_seq_ioctl_subscribe_port(struct snd_seq_client *client,
                                        void *arg)
{
        struct snd_seq_port_subscribe *subs = arg;
        int result = -EINVAL;
        struct snd_seq_client *receiver = NULL, *sender = NULL;
        struct snd_seq_client_port *sport = NULL, *dport = NULL;

        receiver = snd_seq_client_use_ptr(subs->dest.client);
        if (!receiver)
                goto __end;
        sender = snd_seq_client_use_ptr(subs->sender.client);
        if (!sender)
                goto __end;
        sport = snd_seq_port_use_ptr(sender, subs->sender.port);
        if (!sport)
                goto __end;
        dport = snd_seq_port_use_ptr(receiver, subs->dest.port);
        if (!dport)
                goto __end;

        result = check_subscription_permission(client, sport, dport, subs);
        if (result < 0)
                goto __end;

        /* connect them */
        result = snd_seq_port_connect(client, sender, sport, receiver, dport, subs);
        if (! result) /* broadcast announce */
                snd_seq_client_notify_subscription(SNDRV_SEQ_ADDRESS_SUBSCRIBERS, 0,
                                                   subs, SNDRV_SEQ_EVENT_PORT_SUBSCRIBED);
      __end:
              if (sport)
                snd_seq_port_unlock(sport);
        if (dport)
                snd_seq_port_unlock(dport);
        if (sender)
                snd_seq_client_unlock(sender);
        if (receiver)
                snd_seq_client_unlock(receiver);
        return result;
}


/* 
 * remove from port's subscription list 
 */
static int snd_seq_ioctl_unsubscribe_port(struct snd_seq_client *client,
                                          void *arg)
{
        struct snd_seq_port_subscribe *subs = arg;
        int result = -ENXIO;
        struct snd_seq_client *receiver = NULL, *sender = NULL;
        struct snd_seq_client_port *sport = NULL, *dport = NULL;

        receiver = snd_seq_client_use_ptr(subs->dest.client);
        if (!receiver)
                goto __end;
        sender = snd_seq_client_use_ptr(subs->sender.client);
        if (!sender)
                goto __end;
        sport = snd_seq_port_use_ptr(sender, subs->sender.port);
        if (!sport)
                goto __end;
        dport = snd_seq_port_use_ptr(receiver, subs->dest.port);
        if (!dport)
                goto __end;

        result = check_subscription_permission(client, sport, dport, subs);
        if (result < 0)
                goto __end;

        result = snd_seq_port_disconnect(client, sender, sport, receiver, dport, subs);
        if (! result) /* broadcast announce */
                snd_seq_client_notify_subscription(SNDRV_SEQ_ADDRESS_SUBSCRIBERS, 0,
                                                   subs, SNDRV_SEQ_EVENT_PORT_UNSUBSCRIBED);
      __end:
              if (sport)
                snd_seq_port_unlock(sport);
        if (dport)
                snd_seq_port_unlock(dport);
        if (sender)
                snd_seq_client_unlock(sender);
        if (receiver)
                snd_seq_client_unlock(receiver);
        return result;
}


/* CREATE_QUEUE ioctl() */
static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_queue_info *info = arg;
        struct snd_seq_queue *q;

        q = snd_seq_queue_alloc(client->number, info->locked, info->flags);
        if (IS_ERR(q))
                return PTR_ERR(q);

        info->queue = q->queue;
        info->locked = q->locked;
        info->owner = q->owner;

        /* set queue name */
        if (!info->name[0])
                snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue);
        strscpy(q->name, info->name, sizeof(q->name));
        snd_use_lock_free(&q->use_lock);

        return 0;
}

/* DELETE_QUEUE ioctl() */
static int snd_seq_ioctl_delete_queue(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_queue_info *info = arg;

        return snd_seq_queue_delete(client->number, info->queue);
}

/* GET_QUEUE_INFO ioctl() */
static int snd_seq_ioctl_get_queue_info(struct snd_seq_client *client,
                                        void *arg)
{
        struct snd_seq_queue_info *info = arg;
        struct snd_seq_queue *q;

        q = queueptr(info->queue);
        if (q == NULL)
                return -EINVAL;

        memset(info, 0, sizeof(*info));
        info->queue = q->queue;
        info->owner = q->owner;
        info->locked = q->locked;
        strscpy(info->name, q->name, sizeof(info->name));
        queuefree(q);

        return 0;
}

/* SET_QUEUE_INFO ioctl() */
static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client,
                                        void *arg)
{
        struct snd_seq_queue_info *info = arg;
        struct snd_seq_queue *q;

        if (info->owner != client->number)
                return -EINVAL;

        /* change owner/locked permission */
        if (snd_seq_queue_check_access(info->queue, client->number)) {
                if (snd_seq_queue_set_owner(info->queue, client->number, info->locked) < 0)
                        return -EPERM;
                if (info->locked)
                        snd_seq_queue_use(info->queue, client->number, 1);
        } else {
                return -EPERM;
        }        

        q = queueptr(info->queue);
        if (! q)
                return -EINVAL;
        if (q->owner != client->number) {
                queuefree(q);
                return -EPERM;
        }
        strscpy(q->name, info->name, sizeof(q->name));
        queuefree(q);

        return 0;
}

/* GET_NAMED_QUEUE ioctl() */
static int snd_seq_ioctl_get_named_queue(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_queue_info *info = arg;
        struct snd_seq_queue *q;

        q = snd_seq_queue_find_name(info->name);
        if (q == NULL)
                return -EINVAL;
        info->queue = q->queue;
        info->owner = q->owner;
        info->locked = q->locked;
        queuefree(q);

        return 0;
}

/* GET_QUEUE_STATUS ioctl() */
static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client,
                                          void *arg)
{
        struct snd_seq_queue_status *status = arg;
        struct snd_seq_queue *queue;
        struct snd_seq_timer *tmr;

        queue = queueptr(status->queue);
        if (queue == NULL)
                return -EINVAL;
        memset(status, 0, sizeof(*status));
        status->queue = queue->queue;
        
        tmr = queue->timer;
        status->events = queue->tickq->cells + queue->timeq->cells;

        status->time = snd_seq_timer_get_cur_time(tmr, true);
        status->tick = snd_seq_timer_get_cur_tick(tmr);

        status->running = tmr->running;

        status->flags = queue->flags;
        queuefree(queue);

        return 0;
}


/* GET_QUEUE_TEMPO ioctl() */
static int snd_seq_ioctl_get_queue_tempo(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_queue_tempo *tempo = arg;
        struct snd_seq_queue *queue;
        struct snd_seq_timer *tmr;

        queue = queueptr(tempo->queue);
        if (queue == NULL)
                return -EINVAL;
        memset(tempo, 0, sizeof(*tempo));
        tempo->queue = queue->queue;
        
        tmr = queue->timer;

        tempo->tempo = tmr->tempo;
        tempo->ppq = tmr->ppq;
        tempo->skew_value = tmr->skew;
        tempo->skew_base = tmr->skew_base;
        queuefree(queue);

        return 0;
}


/* SET_QUEUE_TEMPO ioctl() */
int snd_seq_set_queue_tempo(int client, struct snd_seq_queue_tempo *tempo)
{
        if (!snd_seq_queue_check_access(tempo->queue, client))
                return -EPERM;
        return snd_seq_queue_timer_set_tempo(tempo->queue, client, tempo);
}
EXPORT_SYMBOL(snd_seq_set_queue_tempo);

static int snd_seq_ioctl_set_queue_tempo(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_queue_tempo *tempo = arg;
        int result;

        result = snd_seq_set_queue_tempo(client->number, tempo);
        return result < 0 ? result : 0;
}


/* GET_QUEUE_TIMER ioctl() */
static int snd_seq_ioctl_get_queue_timer(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_queue_timer *timer = arg;
        struct snd_seq_queue *queue;
        struct snd_seq_timer *tmr;

        queue = queueptr(timer->queue);
        if (queue == NULL)
                return -EINVAL;

        mutex_lock(&queue->timer_mutex);
        tmr = queue->timer;
        memset(timer, 0, sizeof(*timer));
        timer->queue = queue->queue;

        timer->type = tmr->type;
        if (tmr->type == SNDRV_SEQ_TIMER_ALSA) {
                timer->u.alsa.id = tmr->alsa_id;
                timer->u.alsa.resolution = tmr->preferred_resolution;
        }
        mutex_unlock(&queue->timer_mutex);
        queuefree(queue);
        
        return 0;
}


/* SET_QUEUE_TIMER ioctl() */
static int snd_seq_ioctl_set_queue_timer(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_queue_timer *timer = arg;
        int result = 0;

        if (timer->type != SNDRV_SEQ_TIMER_ALSA)
                return -EINVAL;

        if (snd_seq_queue_check_access(timer->queue, client->number)) {
                struct snd_seq_queue *q;
                struct snd_seq_timer *tmr;

                q = queueptr(timer->queue);
                if (q == NULL)
                        return -ENXIO;
                mutex_lock(&q->timer_mutex);
                tmr = q->timer;
                snd_seq_queue_timer_close(timer->queue);
                tmr->type = timer->type;
                if (tmr->type == SNDRV_SEQ_TIMER_ALSA) {
                        tmr->alsa_id = timer->u.alsa.id;
                        tmr->preferred_resolution = timer->u.alsa.resolution;
                }
                result = snd_seq_queue_timer_open(timer->queue);
                mutex_unlock(&q->timer_mutex);
                queuefree(q);
        } else {
                return -EPERM;
        }        

        return result;
}


/* GET_QUEUE_CLIENT ioctl() */
static int snd_seq_ioctl_get_queue_client(struct snd_seq_client *client,
                                          void *arg)
{
        struct snd_seq_queue_client *info = arg;
        int used;

        used = snd_seq_queue_is_used(info->queue, client->number);
        if (used < 0)
                return -EINVAL;
        info->used = used;
        info->client = client->number;

        return 0;
}


/* SET_QUEUE_CLIENT ioctl() */
static int snd_seq_ioctl_set_queue_client(struct snd_seq_client *client,
                                          void *arg)
{
        struct snd_seq_queue_client *info = arg;
        int err;

        if (info->used >= 0) {
                err = snd_seq_queue_use(info->queue, client->number, info->used);
                if (err < 0)
                        return err;
        }

        return snd_seq_ioctl_get_queue_client(client, arg);
}


/* GET_CLIENT_POOL ioctl() */
static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_client_pool *info = arg;
        struct snd_seq_client *cptr;

        cptr = snd_seq_client_use_ptr(info->client);
        if (cptr == NULL)
                return -ENOENT;
        memset(info, 0, sizeof(*info));
        info->client = cptr->number;
        info->output_pool = cptr->pool->size;
        info->output_room = cptr->pool->room;
        info->output_free = info->output_pool;
        info->output_free = snd_seq_unused_cells(cptr->pool);
        if (cptr->type == USER_CLIENT) {
                info->input_pool = cptr->data.user.fifo_pool_size;
                info->input_free = info->input_pool;
                info->input_free = snd_seq_fifo_unused_cells(cptr->data.user.fifo);
        } else {
                info->input_pool = 0;
                info->input_free = 0;
        }
        snd_seq_client_unlock(cptr);
        
        return 0;
}

/* SET_CLIENT_POOL ioctl() */
static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_client_pool *info = arg;
        int rc;

        if (client->number != info->client)
                return -EINVAL; /* can't change other clients */

        if (info->output_pool >= 1 && info->output_pool <= SNDRV_SEQ_MAX_EVENTS &&
            (! snd_seq_write_pool_allocated(client) ||
             info->output_pool != client->pool->size)) {
                if (snd_seq_write_pool_allocated(client)) {
                        /* is the pool in use? */
                        if (atomic_read(&client->pool->counter))
                                return -EBUSY;
                        /* remove all existing cells */
                        snd_seq_pool_mark_closing(client->pool);
                        snd_seq_pool_done(client->pool);
                }
                client->pool->size = info->output_pool;
                rc = snd_seq_pool_init(client->pool);
                if (rc < 0)
                        return rc;
        }
        if (client->type == USER_CLIENT && client->data.user.fifo != NULL &&
            info->input_pool >= 1 &&
            info->input_pool <= SNDRV_SEQ_MAX_CLIENT_EVENTS &&
            info->input_pool != client->data.user.fifo_pool_size) {
                /* change pool size */
                rc = snd_seq_fifo_resize(client->data.user.fifo, info->input_pool);
                if (rc < 0)
                        return rc;
                client->data.user.fifo_pool_size = info->input_pool;
        }
        if (info->output_room >= 1 &&
            info->output_room <= client->pool->size) {
                client->pool->room  = info->output_room;
        }

        return snd_seq_ioctl_get_client_pool(client, arg);
}


/* REMOVE_EVENTS ioctl() */
static int snd_seq_ioctl_remove_events(struct snd_seq_client *client,
                                       void *arg)
{
        struct snd_seq_remove_events *info = arg;

        /*
         * Input mostly not implemented XXX.
         */
        if (info->remove_mode & SNDRV_SEQ_REMOVE_INPUT) {
                /*
                 * No restrictions so for a user client we can clear
                 * the whole fifo
                 */
                if (client->type == USER_CLIENT && client->data.user.fifo)
                        snd_seq_fifo_clear(client->data.user.fifo);
        }

        if (info->remove_mode & SNDRV_SEQ_REMOVE_OUTPUT)
                snd_seq_queue_remove_cells(client->number, info);

        return 0;
}


/*
 * get subscription info
 */
static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client,
                                          void *arg)
{
        struct snd_seq_port_subscribe *subs = arg;
        int result;
        struct snd_seq_client *sender = NULL;
        struct snd_seq_client_port *sport = NULL;

        result = -EINVAL;
        sender = snd_seq_client_use_ptr(subs->sender.client);
        if (!sender)
                goto __end;
        sport = snd_seq_port_use_ptr(sender, subs->sender.port);
        if (!sport)
                goto __end;
        result = snd_seq_port_get_subscription(&sport->c_src, &subs->dest,
                                               subs);
      __end:
              if (sport)
                snd_seq_port_unlock(sport);
        if (sender)
                snd_seq_client_unlock(sender);

        return result;
}


/*
 * get subscription info - check only its presence
 */
static int snd_seq_ioctl_query_subs(struct snd_seq_client *client, void *arg)
{
        struct snd_seq_query_subs *subs = arg;
        int result = -ENXIO;
        struct snd_seq_client *cptr = NULL;
        struct snd_seq_client_port *port = NULL;
        struct snd_seq_port_subs_info *group;
        struct list_head *p;
        int i;

        cptr = snd_seq_client_use_ptr(subs->root.client);
        if (!cptr)
                goto __end;
        port = snd_seq_port_use_ptr(cptr, subs->root.port);
        if (!port)
                goto __end;

        switch (subs->type) {
        case SNDRV_SEQ_QUERY_SUBS_READ:
                group = &port->c_src;
                break;
        case SNDRV_SEQ_QUERY_SUBS_WRITE:
                group = &port->c_dest;
                break;
        default:
                goto __end;
        }

        down_read(&group->list_mutex);
        /* search for the subscriber */
        subs->num_subs = group->count;
        i = 0;
        result = -ENOENT;
        list_for_each(p, &group->list_head) {
                if (i++ == subs->index) {
                        /* found! */
                        struct snd_seq_subscribers *s;
                        if (subs->type == SNDRV_SEQ_QUERY_SUBS_READ) {
                                s = list_entry(p, struct snd_seq_subscribers, src_list);
                                subs->addr = s->info.dest;
                        } else {
                                s = list_entry(p, struct snd_seq_subscribers, dest_list);
                                subs->addr = s->info.sender;
                        }
                        subs->flags = s->info.flags;
                        subs->queue = s->info.queue;
                        result = 0;
                        break;
                }
        }
        up_read(&group->list_mutex);

      __end:
           if (port)
                snd_seq_port_unlock(port);
        if (cptr)
                snd_seq_client_unlock(cptr);

        return result;
}


/*
 * query next client
 */
static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client,
                                           void *arg)
{
        struct snd_seq_client_info *info = arg;
        struct snd_seq_client *cptr = NULL;

        /* search for next client */
        if (info->client < INT_MAX)
                info->client++;
        if (info->client < 0)
                info->client = 0;
        for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) {
                cptr = snd_seq_client_use_ptr(info->client);
                if (cptr)
                        break; /* found */
        }
        if (cptr == NULL)
                return -ENOENT;

        get_client_info(cptr, info);
        snd_seq_client_unlock(cptr);

        return 0;
}

/* 
 * query next port
 */
static int snd_seq_ioctl_query_next_port(struct snd_seq_client *client,
                                         void *arg)
{
        struct snd_seq_port_info *info = arg;
        struct snd_seq_client *cptr;
        struct snd_seq_client_port *port = NULL;

        cptr = snd_seq_client_use_ptr(info->addr.client);
        if (cptr == NULL)
                return -ENXIO;

        /* search for next port */
        info->addr.port++;
        port = snd_seq_port_query_nearest(cptr, info);
        if (port == NULL) {
                snd_seq_client_unlock(cptr);
                return -ENOENT;
        }

        /* get port info */
        info->addr = port->addr;
        snd_seq_get_port_info(port, info);
        snd_seq_port_unlock(port);
        snd_seq_client_unlock(cptr);

        return 0;
}

#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
#define NUM_UMP_INFOS (SNDRV_UMP_MAX_BLOCKS + 1)

static void free_ump_info(struct snd_seq_client *client)
{
        int i;

        if (!client->ump_info)
                return;
        for (i = 0; i < NUM_UMP_INFOS; i++)
                kfree(client->ump_info[i]);
        kfree(client->ump_info);
        client->ump_info = NULL;
}

static void terminate_ump_info_strings(void *p, int type)
{
        if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT) {
                struct snd_ump_endpoint_info *ep = p;
                ep->name[sizeof(ep->name) - 1] = 0;
        } else {
                struct snd_ump_block_info *bp = p;
                bp->name[sizeof(bp->name) - 1] = 0;
        }
}

#ifdef CONFIG_SND_PROC_FS
static void dump_ump_info(struct snd_info_buffer *buffer,
                          struct snd_seq_client *client)
{
        struct snd_ump_endpoint_info *ep;
        struct snd_ump_block_info *bp;
        int i;

        if (!client->ump_info)
                return;
        ep = client->ump_info[SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT];
        if (ep && *ep->name)
                snd_iprintf(buffer, "  UMP Endpoint: \"%s\"\n", ep->name);
        for (i = 0; i < SNDRV_UMP_MAX_BLOCKS; i++) {
                bp = client->ump_info[i + 1];
                if (bp && *bp->name) {
                        snd_iprintf(buffer, "  UMP Block %d: \"%s\" [%s]\n",
                                    i, bp->name,
                                    bp->active ? "Active" : "Inactive");
                        snd_iprintf(buffer, "    Groups: %d-%d\n",
                                    bp->first_group + 1,
                                    bp->first_group + bp->num_groups);
                }
        }
}
#endif

/* UMP-specific ioctls -- called directly without data copy */
static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller,
                                         unsigned int cmd,
                                         unsigned long arg)
{
        struct snd_seq_client_ump_info __user *argp =
                (struct snd_seq_client_ump_info __user *)arg;
        struct snd_seq_client *cptr;
        int client, type, err = 0;
        size_t size;
        void *p;

        if (get_user(client, &argp->client) || get_user(type, &argp->type))
                return -EFAULT;
        if (cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO &&
            caller->number != client)
                return -EPERM;
        if (type < 0 || type >= NUM_UMP_INFOS)
                return -EINVAL;
        if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT)
                size = sizeof(struct snd_ump_endpoint_info);
        else
                size = sizeof(struct snd_ump_block_info);
        cptr = snd_seq_client_use_ptr(client);
        if (!cptr)
                return -ENOENT;

        mutex_lock(&cptr->ioctl_mutex);
        if (!cptr->midi_version) {
                err = -EBADFD;
                goto error;
        }

        if (cmd == SNDRV_SEQ_IOCTL_GET_CLIENT_UMP_INFO) {
                if (!cptr->ump_info)
                        p = NULL;
                else
                        p = cptr->ump_info[type];
                if (!p) {
                        err = -ENODEV;
                        goto error;
                }
                if (copy_to_user(argp->info, p, size)) {
                        err = -EFAULT;
                        goto error;
                }
        } else {
                if (cptr->type != USER_CLIENT) {
                        err = -EBADFD;
                        goto error;
                }
                if (!cptr->ump_info) {
                        cptr->ump_info = kcalloc(NUM_UMP_INFOS,
                                                 sizeof(void *), GFP_KERNEL);
                        if (!cptr->ump_info) {
                                err = -ENOMEM;
                                goto error;
                        }
                }
                p = memdup_user(argp->info, size);
                if (IS_ERR(p)) {
                        err = PTR_ERR(p);
                        goto error;
                }
                kfree(cptr->ump_info[type]);
                terminate_ump_info_strings(p, type);
                cptr->ump_info[type] = p;
        }

 error:
        mutex_unlock(&cptr->ioctl_mutex);
        snd_seq_client_unlock(cptr);
        return err;
}
#endif

/* -------------------------------------------------------- */

static const struct ioctl_handler {
        unsigned int cmd;
        int (*func)(struct snd_seq_client *client, void *arg);
} ioctl_handlers[] = {
        { SNDRV_SEQ_IOCTL_PVERSION, snd_seq_ioctl_pversion },
        { SNDRV_SEQ_IOCTL_USER_PVERSION, snd_seq_ioctl_user_pversion },
        { SNDRV_SEQ_IOCTL_CLIENT_ID, snd_seq_ioctl_client_id },
        { SNDRV_SEQ_IOCTL_SYSTEM_INFO, snd_seq_ioctl_system_info },
        { SNDRV_SEQ_IOCTL_RUNNING_MODE, snd_seq_ioctl_running_mode },
        { SNDRV_SEQ_IOCTL_GET_CLIENT_INFO, snd_seq_ioctl_get_client_info },
        { SNDRV_SEQ_IOCTL_SET_CLIENT_INFO, snd_seq_ioctl_set_client_info },
        { SNDRV_SEQ_IOCTL_CREATE_PORT, snd_seq_ioctl_create_port },
        { SNDRV_SEQ_IOCTL_DELETE_PORT, snd_seq_ioctl_delete_port },
        { SNDRV_SEQ_IOCTL_GET_PORT_INFO, snd_seq_ioctl_get_port_info },
        { SNDRV_SEQ_IOCTL_SET_PORT_INFO, snd_seq_ioctl_set_port_info },
        { SNDRV_SEQ_IOCTL_SUBSCRIBE_PORT, snd_seq_ioctl_subscribe_port },
        { SNDRV_SEQ_IOCTL_UNSUBSCRIBE_PORT, snd_seq_ioctl_unsubscribe_port },
        { SNDRV_SEQ_IOCTL_CREATE_QUEUE, snd_seq_ioctl_create_queue },
        { SNDRV_SEQ_IOCTL_DELETE_QUEUE, snd_seq_ioctl_delete_queue },
        { SNDRV_SEQ_IOCTL_GET_QUEUE_INFO, snd_seq_ioctl_get_queue_info },
        { SNDRV_SEQ_IOCTL_SET_QUEUE_INFO, snd_seq_ioctl_set_queue_info },
        { SNDRV_SEQ_IOCTL_GET_NAMED_QUEUE, snd_seq_ioctl_get_named_queue },
        { SNDRV_SEQ_IOCTL_GET_QUEUE_STATUS, snd_seq_ioctl_get_queue_status },
        { SNDRV_SEQ_IOCTL_GET_QUEUE_TEMPO, snd_seq_ioctl_get_queue_tempo },
        { SNDRV_SEQ_IOCTL_SET_QUEUE_TEMPO, snd_seq_ioctl_set_queue_tempo },
        { SNDRV_SEQ_IOCTL_GET_QUEUE_TIMER, snd_seq_ioctl_get_queue_timer },
        { SNDRV_SEQ_IOCTL_SET_QUEUE_TIMER, snd_seq_ioctl_set_queue_timer },
        { SNDRV_SEQ_IOCTL_GET_QUEUE_CLIENT, snd_seq_ioctl_get_queue_client },
        { SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT, snd_seq_ioctl_set_queue_client },
        { SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, snd_seq_ioctl_get_client_pool },
        { SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, snd_seq_ioctl_set_client_pool },
        { SNDRV_SEQ_IOCTL_GET_SUBSCRIPTION, snd_seq_ioctl_get_subscription },
        { SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT, snd_seq_ioctl_query_next_client },
        { SNDRV_SEQ_IOCTL_QUERY_NEXT_PORT, snd_seq_ioctl_query_next_port },
        { SNDRV_SEQ_IOCTL_REMOVE_EVENTS, snd_seq_ioctl_remove_events },
        { SNDRV_SEQ_IOCTL_QUERY_SUBS, snd_seq_ioctl_query_subs },
        { 0, NULL },
};

static long snd_seq_ioctl(struct file *file, unsigned int cmd,
                          unsigned long arg)
{
        struct snd_seq_client *client = file->private_data;
        /* To use kernel stack for ioctl data. */
        union {
                int pversion;
                int client_id;
                struct snd_seq_system_info        system_info;
                struct snd_seq_running_info        running_info;
                struct snd_seq_client_info        client_info;
                struct snd_seq_port_info        port_info;
                struct snd_seq_port_subscribe        port_subscribe;
                struct snd_seq_queue_info        queue_info;
                struct snd_seq_queue_status        queue_status;
                struct snd_seq_queue_tempo        tempo;
                struct snd_seq_queue_timer        queue_timer;
                struct snd_seq_queue_client        queue_client;
                struct snd_seq_client_pool        client_pool;
                struct snd_seq_remove_events        remove_events;
                struct snd_seq_query_subs        query_subs;
        } buf;
        const struct ioctl_handler *handler;
        unsigned long size;
        int err;

        if (snd_BUG_ON(!client))
                return -ENXIO;

#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
        /* exception - handling large data */
        switch (cmd) {
        case SNDRV_SEQ_IOCTL_GET_CLIENT_UMP_INFO:
        case SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO:
                return snd_seq_ioctl_client_ump_info(client, cmd, arg);
        }
#endif

        for (handler = ioctl_handlers; handler->cmd > 0; ++handler) {
                if (handler->cmd == cmd)
                        break;
        }
        if (handler->cmd == 0)
                return -ENOTTY;

        memset(&buf, 0, sizeof(buf));

        /*
         * All of ioctl commands for ALSA sequencer get an argument of size
         * within 13 bits. We can safely pick up the size from the command.
         */
        size = _IOC_SIZE(handler->cmd);
        if (handler->cmd & IOC_IN) {
                if (copy_from_user(&buf, (const void __user *)arg, size))
                        return -EFAULT;
        }

        mutex_lock(&client->ioctl_mutex);
        err = handler->func(client, &buf);
        mutex_unlock(&client->ioctl_mutex);
        if (err >= 0) {
                /* Some commands includes a bug in 'dir' field. */
                if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT ||
                    handler->cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_POOL ||
                    (handler->cmd & IOC_OUT))
                        if (copy_to_user((void __user *)arg, &buf, size))
                                return -EFAULT;
        }

        return err;
}

#ifdef CONFIG_COMPAT
#include "seq_compat.c"
#else
#define snd_seq_ioctl_compat        NULL
#endif

/* -------------------------------------------------------- */


/* exported to kernel modules */
int snd_seq_create_kernel_client(struct snd_card *card, int client_index,
                                 const char *name_fmt, ...)
{
        struct snd_seq_client *client;
        va_list args;

        if (snd_BUG_ON(in_interrupt()))
                return -EBUSY;

        if (card && client_index >= SNDRV_SEQ_CLIENTS_PER_CARD)
                return -EINVAL;
        if (card == NULL && client_index >= SNDRV_SEQ_GLOBAL_CLIENTS)
                return -EINVAL;

        mutex_lock(&register_mutex);

        if (card) {
                client_index += SNDRV_SEQ_GLOBAL_CLIENTS
                        + card->number * SNDRV_SEQ_CLIENTS_PER_CARD;
                if (client_index >= SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN)
                        client_index = -1;
        }

        /* empty write queue as default */
        client = seq_create_client1(client_index, 0);
        if (client == NULL) {
                mutex_unlock(&register_mutex);
                return -EBUSY;        /* failure code */
        }
        usage_alloc(&client_usage, 1);

        client->accept_input = 1;
        client->accept_output = 1;
        client->data.kernel.card = card;
        client->user_pversion = SNDRV_SEQ_VERSION;
                
        va_start(args, name_fmt);
        vsnprintf(client->name, sizeof(client->name), name_fmt, args);
        va_end(args);

        client->type = KERNEL_CLIENT;
        mutex_unlock(&register_mutex);

        /* make others aware this new client */
        snd_seq_system_client_ev_client_start(client->number);
        
        /* return client number to caller */
        return client->number;
}
EXPORT_SYMBOL(snd_seq_create_kernel_client);

/* exported to kernel modules */
int snd_seq_delete_kernel_client(int client)
{
        struct snd_seq_client *ptr;

        if (snd_BUG_ON(in_interrupt()))
                return -EBUSY;

        ptr = clientptr(client);
        if (ptr == NULL)
                return -EINVAL;

        seq_free_client(ptr);
        kfree(ptr);
        return 0;
}
EXPORT_SYMBOL(snd_seq_delete_kernel_client);

/*
 * exported, called by kernel clients to enqueue events (w/o blocking)
 *
 * RETURN VALUE: zero if succeed, negative if error
 */
int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev,
                                  struct file *file, bool blocking)
{
        struct snd_seq_client *cptr;
        int result;

        if (snd_BUG_ON(!ev))
                return -EINVAL;

        if (!snd_seq_ev_is_ump(ev)) {
                if (ev->type == SNDRV_SEQ_EVENT_NONE)
                        return 0; /* ignore this */
                if (ev->type == SNDRV_SEQ_EVENT_KERNEL_ERROR)
                        return -EINVAL; /* quoted events can't be enqueued */
        }

        /* fill in client number */
        ev->source.client = client;

        if (check_event_type_and_length(ev))
                return -EINVAL;

        cptr = snd_seq_client_use_ptr(client);
        if (cptr == NULL)
                return -EINVAL;
        
        if (!cptr->accept_output) {
                result = -EPERM;
        } else { /* send it */
                mutex_lock(&cptr->ioctl_mutex);
                result = snd_seq_client_enqueue_event(cptr, ev, file, blocking,
                                                      false, 0,
                                                      &cptr->ioctl_mutex);
                mutex_unlock(&cptr->ioctl_mutex);
        }

        snd_seq_client_unlock(cptr);
        return result;
}
EXPORT_SYMBOL(snd_seq_kernel_client_enqueue);

/* 
 * exported, called by kernel clients to dispatch events directly to other
 * clients, bypassing the queues.  Event time-stamp will be updated.
 *
 * RETURN VALUE: negative = delivery failed,
 *                 zero, or positive: the number of delivered events
 */
int snd_seq_kernel_client_dispatch(int client, struct snd_seq_event * ev,
                                   int atomic, int hop)
{
        struct snd_seq_client *cptr;
        int result;

        if (snd_BUG_ON(!ev))
                return -EINVAL;

        /* fill in client number */
        ev->queue = SNDRV_SEQ_QUEUE_DIRECT;
        ev->source.client = client;

        if (check_event_type_and_length(ev))
                return -EINVAL;

        cptr = snd_seq_client_use_ptr(client);
        if (cptr == NULL)
                return -EINVAL;

        if (!cptr->accept_output)
                result = -EPERM;
        else
                result = snd_seq_deliver_event(cptr, ev, atomic, hop);

        snd_seq_client_unlock(cptr);
        return result;
}
EXPORT_SYMBOL(snd_seq_kernel_client_dispatch);

/**
 * snd_seq_kernel_client_ctl - operate a command for a client with data in
 *                               kernel space.
 * @clientid:        A numerical ID for a client.
 * @cmd:        An ioctl(2) command for ALSA sequencer operation.
 * @arg:        A pointer to data in kernel space.
 *
 * Against its name, both kernel/application client can be handled by this
 * kernel API. A pointer of 'arg' argument should be in kernel space.
 *
 * Return: 0 at success. Negative error code at failure.
 */
int snd_seq_kernel_client_ctl(int clientid, unsigned int cmd, void *arg)
{
        const struct ioctl_handler *handler;
        struct snd_seq_client *client;

        client = clientptr(clientid);
        if (client == NULL)
                return -ENXIO;

        for (handler = ioctl_handlers; handler->cmd > 0; ++handler) {
                if (handler->cmd == cmd)
                        return handler->func(client, arg);
        }

        pr_debug("ALSA: seq unknown ioctl() 0x%x (type='%c', number=0x%02x)\n",
                 cmd, _IOC_TYPE(cmd), _IOC_NR(cmd));
        return -ENOTTY;
}
EXPORT_SYMBOL(snd_seq_kernel_client_ctl);

/* exported (for OSS emulator) */
int snd_seq_kernel_client_write_poll(int clientid, struct file *file, poll_table *wait)
{
        struct snd_seq_client *client;

        client = clientptr(clientid);
        if (client == NULL)
                return -ENXIO;

        if (! snd_seq_write_pool_allocated(client))
                return 1;
        if (snd_seq_pool_poll_wait(client->pool, file, wait))
                return 1;
        return 0;
}
EXPORT_SYMBOL(snd_seq_kernel_client_write_poll);

/* get a sequencer client object; for internal use from a kernel client */
struct snd_seq_client *snd_seq_kernel_client_get(int id)
{
        return snd_seq_client_use_ptr(id);
}
EXPORT_SYMBOL_GPL(snd_seq_kernel_client_get);

/* put a sequencer client object; for internal use from a kernel client */
void snd_seq_kernel_client_put(struct snd_seq_client *cptr)
{
        if (cptr)
                snd_seq_client_unlock(cptr);
}
EXPORT_SYMBOL_GPL(snd_seq_kernel_client_put);

/*---------------------------------------------------------------------------*/

#ifdef CONFIG_SND_PROC_FS
/*
 *  /proc interface
 */
static void snd_seq_info_dump_subscribers(struct snd_info_buffer *buffer,
                                          struct snd_seq_port_subs_info *group,
                                          int is_src, char *msg)
{
        struct list_head *p;
        struct snd_seq_subscribers *s;
        int count = 0;

        down_read(&group->list_mutex);
        if (list_empty(&group->list_head)) {
                up_read(&group->list_mutex);
                return;
        }
        snd_iprintf(buffer, msg);
        list_for_each(p, &group->list_head) {
                if (is_src)
                        s = list_entry(p, struct snd_seq_subscribers, src_list);
                else
                        s = list_entry(p, struct snd_seq_subscribers, dest_list);
                if (count++)
                        snd_iprintf(buffer, ", ");
                snd_iprintf(buffer, "%d:%d",
                            is_src ? s->info.dest.client : s->info.sender.client,
                            is_src ? s->info.dest.port : s->info.sender.port);
                if (s->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP)
                        snd_iprintf(buffer, "[%c:%d]", ((s->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL) ? 'r' : 't'), s->info.queue);
                if (group->exclusive)
                        snd_iprintf(buffer, "[ex]");
        }
        up_read(&group->list_mutex);
        snd_iprintf(buffer, "\n");
}

#define FLAG_PERM_RD(perm) ((perm) & SNDRV_SEQ_PORT_CAP_READ ? ((perm) & SNDRV_SEQ_PORT_CAP_SUBS_READ ? 'R' : 'r') : '-')
#define FLAG_PERM_WR(perm) ((perm) & SNDRV_SEQ_PORT_CAP_WRITE ? ((perm) & SNDRV_SEQ_PORT_CAP_SUBS_WRITE ? 'W' : 'w') : '-')
#define FLAG_PERM_EX(perm) ((perm) & SNDRV_SEQ_PORT_CAP_NO_EXPORT ? '-' : 'e')

#define FLAG_PERM_DUPLEX(perm) ((perm) & SNDRV_SEQ_PORT_CAP_DUPLEX ? 'X' : '-')

static const char *port_direction_name(unsigned char dir)
{
        static const char *names[4] = {
                "-", "In", "Out", "In/Out"
        };

        if (dir > SNDRV_SEQ_PORT_DIR_BIDIRECTION)
                return "Invalid";
        return names[dir];
}

static void snd_seq_info_dump_ports(struct snd_info_buffer *buffer,
                                    struct snd_seq_client *client)
{
        struct snd_seq_client_port *p;

        mutex_lock(&client->ports_mutex);
        list_for_each_entry(p, &client->ports_list_head, list) {
                if (p->capability & SNDRV_SEQ_PORT_CAP_INACTIVE)
                        continue;
                snd_iprintf(buffer, "  Port %3d : \"%s\" (%c%c%c%c) [%s]\n",
                            p->addr.port, p->name,
                            FLAG_PERM_RD(p->capability),
                            FLAG_PERM_WR(p->capability),
                            FLAG_PERM_EX(p->capability),
                            FLAG_PERM_DUPLEX(p->capability),
                            port_direction_name(p->direction));
                snd_seq_info_dump_subscribers(buffer, &p->c_src, 1, "    Connecting To: ");
                snd_seq_info_dump_subscribers(buffer, &p->c_dest, 0, "    Connected From: ");
        }
        mutex_unlock(&client->ports_mutex);
}

static const char *midi_version_string(unsigned int version)
{
        switch (version) {
        case SNDRV_SEQ_CLIENT_LEGACY_MIDI:
                return "Legacy";
        case SNDRV_SEQ_CLIENT_UMP_MIDI_1_0:
                return "UMP MIDI1";
        case SNDRV_SEQ_CLIENT_UMP_MIDI_2_0:
                return "UMP MIDI2";
        default:
                return "Unknown";
        }
}

/* exported to seq_info.c */
void snd_seq_info_clients_read(struct snd_info_entry *entry, 
                               struct snd_info_buffer *buffer)
{
        int c;
        struct snd_seq_client *client;

        snd_iprintf(buffer, "Client info\n");
        snd_iprintf(buffer, "  cur  clients : %d\n", client_usage.cur);
        snd_iprintf(buffer, "  peak clients : %d\n", client_usage.peak);
        snd_iprintf(buffer, "  max  clients : %d\n", SNDRV_SEQ_MAX_CLIENTS);
        snd_iprintf(buffer, "\n");

        /* list the client table */
        for (c = 0; c < SNDRV_SEQ_MAX_CLIENTS; c++) {
                client = snd_seq_client_use_ptr(c);
                if (client == NULL)
                        continue;
                if (client->type == NO_CLIENT) {
                        snd_seq_client_unlock(client);
                        continue;
                }

                snd_iprintf(buffer, "Client %3d : \"%s\" [%s %s]\n",
                            c, client->name,
                            client->type == USER_CLIENT ? "User" : "Kernel",
                            midi_version_string(client->midi_version));
#if IS_ENABLED(CONFIG_SND_SEQ_UMP)
                dump_ump_info(buffer, client);
#endif
                snd_seq_info_dump_ports(buffer, client);
                if (snd_seq_write_pool_allocated(client)) {
                        snd_iprintf(buffer, "  Output pool :\n");
                        snd_seq_info_pool(buffer, client->pool, "    ");
                }
                if (client->type == USER_CLIENT && client->data.user.fifo &&
                    client->data.user.fifo->pool) {
                        snd_iprintf(buffer, "  Input pool :\n");
                        snd_seq_info_pool(buffer, client->data.user.fifo->pool, "    ");
                }
                snd_seq_client_unlock(client);
        }
}
#endif /* CONFIG_SND_PROC_FS */

/*---------------------------------------------------------------------------*/


/*
 *  REGISTRATION PART
 */

static const struct file_operations snd_seq_f_ops =
{
        .owner =        THIS_MODULE,
        .read =                snd_seq_read,
        .write =        snd_seq_write,
        .open =                snd_seq_open,
        .release =        snd_seq_release,
        .llseek =        no_llseek,
        .poll =                snd_seq_poll,
        .unlocked_ioctl =        snd_seq_ioctl,
        .compat_ioctl =        snd_seq_ioctl_compat,
};

static struct device *seq_dev;

/* 
 * register sequencer device 
 */
int __init snd_sequencer_device_init(void)
{
        int err;

        err = snd_device_alloc(&seq_dev, NULL);
        if (err < 0)
                return err;
        dev_set_name(seq_dev, "seq");

        mutex_lock(&register_mutex);
        err = snd_register_device(SNDRV_DEVICE_TYPE_SEQUENCER, NULL, 0,
                                  &snd_seq_f_ops, NULL, seq_dev);
        mutex_unlock(&register_mutex);
        if (err < 0) {
                put_device(seq_dev);
                return err;
        }
        
        return 0;
}



/* 
 * unregister sequencer device 
 */
void snd_sequencer_device_done(void)
{
        snd_unregister_device(seq_dev);
        put_device(seq_dev);
}




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 
   11 







   14 
   11 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        IP multicast routing support for mrouted 3.6/3.8
 *
 *                (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
 *          Linux Consultancy and Custom Driver Development
 *
 *        Fixes:
 *        Michael Chastain        :        Incorrect size of copying.
 *        Alan Cox                :        Added the cache manager code
 *        Alan Cox                :        Fixed the clone/copy bug and device race.
 *        Mike McLagan                :        Routing by source
 *        Malcolm Beattie                :        Buffer handling fixes.
 *        Alexey Kuznetsov        :        Double buffer free and other fixes.
 *        SVR Anand                :        Fixed several multicast bugs and problems.
 *        Alexey Kuznetsov        :        Status, optimisations and more.
 *        Brad Parker                :        Better behaviour on mrouted upcall
 *                                        overflow.
 *      Carlos Picoto           :       PIMv1 Support
 *        Pavlin Ivanov Radoslavov:        PIMv2 Registers must checksum only PIM header
 *                                        Relax this requirement to work with older peers.
 */

#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/if_ether.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/compat.h>
#include <linux/export.h>
#include <linux/rhashtable.h>
#include <net/ip_tunnels.h>
#include <net/checksum.h>
#include <net/netlink.h>
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/rtnh.h>

#include <linux/nospec.h>

struct ipmr_rule {
        struct fib_rule                common;
};

struct ipmr_result {
        struct mr_table                *mrt;
};

/* Big lock, protecting vif table, mrt cache and mroute socket state.
 * Note that the changes are semaphored via rtnl_lock.
 */

static DEFINE_SPINLOCK(mrt_lock);

static struct net_device *vif_dev_read(const struct vif_device *vif)
{
        return rcu_dereference(vif->dev);
}

/* Multicast router control variables */

/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);

/* We return to original Alan's scheme. Hash table of resolved
 * entries is changed only in process context and protected
 * with weak lock mrt_lock. Queue of unresolved entries is protected
 * with strong spinlock mfc_unres_lock.
 *
 * In this case data path is free of exclusive locks at all.
 */

static struct kmem_cache *mrt_cachep __ro_after_init;

static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);

static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
                          struct mfc_cache *cache, int local);
static int ipmr_cache_report(const struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
                                 int cmd);
static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);

#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
#define ipmr_for_each_table(mrt, net)                                        \
        list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list,        \
                                lockdep_rtnl_is_held() ||                \
                                list_empty(&net->ipv4.mr_tables))

static struct mr_table *ipmr_mr_table_iter(struct net *net,
                                           struct mr_table *mrt)
{
        struct mr_table *ret;

        if (!mrt)
                ret = list_entry_rcu(net->ipv4.mr_tables.next,
                                     struct mr_table, list);
        else
                ret = list_entry_rcu(mrt->list.next,
                                     struct mr_table, list);

        if (&ret->list == &net->ipv4.mr_tables)
                return NULL;
        return ret;
}

static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
        struct mr_table *mrt;

        ipmr_for_each_table(mrt, net) {
                if (mrt->id == id)
                        return mrt;
        }
        return NULL;
}

static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
                           struct mr_table **mrt)
{
        int err;
        struct ipmr_result res;
        struct fib_lookup_arg arg = {
                .result = &res,
                .flags = FIB_LOOKUP_NOREF,
        };

        /* update flow if oif or iif point to device enslaved to l3mdev */
        l3mdev_update_flow(net, flowi4_to_flowi(flp4));

        err = fib_rules_lookup(net->ipv4.mr_rules_ops,
                               flowi4_to_flowi(flp4), 0, &arg);
        if (err < 0)
                return err;
        *mrt = res.mrt;
        return 0;
}

static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
                            int flags, struct fib_lookup_arg *arg)
{
        struct ipmr_result *res = arg->result;
        struct mr_table *mrt;

        switch (rule->action) {
        case FR_ACT_TO_TBL:
                break;
        case FR_ACT_UNREACHABLE:
                return -ENETUNREACH;
        case FR_ACT_PROHIBIT:
                return -EACCES;
        case FR_ACT_BLACKHOLE:
        default:
                return -EINVAL;
        }

        arg->table = fib_rule_get_table(rule, arg);

        mrt = ipmr_get_table(rule->fr_net, arg->table);
        if (!mrt)
                return -EAGAIN;
        res->mrt = mrt;
        return 0;
}

static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
        return 1;
}

static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
                               struct fib_rule_hdr *frh, struct nlattr **tb,
                               struct netlink_ext_ack *extack)
{
        return 0;
}

static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
                             struct nlattr **tb)
{
        return 1;
}

static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
                          struct fib_rule_hdr *frh)
{
        frh->dst_len = 0;
        frh->src_len = 0;
        frh->tos     = 0;
        return 0;
}

static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
        .family                = RTNL_FAMILY_IPMR,
        .rule_size        = sizeof(struct ipmr_rule),
        .addr_size        = sizeof(u32),
        .action                = ipmr_rule_action,
        .match                = ipmr_rule_match,
        .configure        = ipmr_rule_configure,
        .compare        = ipmr_rule_compare,
        .fill                = ipmr_rule_fill,
        .nlgroup        = RTNLGRP_IPV4_RULE,
        .owner                = THIS_MODULE,
};

static int __net_init ipmr_rules_init(struct net *net)
{
        struct fib_rules_ops *ops;
        struct mr_table *mrt;
        int err;

        ops = fib_rules_register(&ipmr_rules_ops_template, net);
        if (IS_ERR(ops))
                return PTR_ERR(ops);

        INIT_LIST_HEAD(&net->ipv4.mr_tables);

        mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
        if (IS_ERR(mrt)) {
                err = PTR_ERR(mrt);
                goto err1;
        }

        err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT);
        if (err < 0)
                goto err2;

        net->ipv4.mr_rules_ops = ops;
        return 0;

err2:
        rtnl_lock();
        ipmr_free_table(mrt);
        rtnl_unlock();
err1:
        fib_rules_unregister(ops);
        return err;
}

static void __net_exit ipmr_rules_exit(struct net *net)
{
        struct mr_table *mrt, *next;

        ASSERT_RTNL();
        list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
                list_del(&mrt->list);
                ipmr_free_table(mrt);
        }
        fib_rules_unregister(net->ipv4.mr_rules_ops);
}

static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
                           struct netlink_ext_ack *extack)
{
        return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
}

static unsigned int ipmr_rules_seq_read(struct net *net)
{
        return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
}

bool ipmr_rule_default(const struct fib_rule *rule)
{
        return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
}
EXPORT_SYMBOL(ipmr_rule_default);
#else
#define ipmr_for_each_table(mrt, net) \
        for (mrt = net->ipv4.mrt; mrt; mrt = NULL)

static struct mr_table *ipmr_mr_table_iter(struct net *net,
                                           struct mr_table *mrt)
{
        if (!mrt)
                return net->ipv4.mrt;
        return NULL;
}

static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
        return net->ipv4.mrt;
}

static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
                           struct mr_table **mrt)
{
        *mrt = net->ipv4.mrt;
        return 0;
}

static int __net_init ipmr_rules_init(struct net *net)
{
        struct mr_table *mrt;

        mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
        if (IS_ERR(mrt))
                return PTR_ERR(mrt);
        net->ipv4.mrt = mrt;
        return 0;
}

static void __net_exit ipmr_rules_exit(struct net *net)
{
        ASSERT_RTNL();
        ipmr_free_table(net->ipv4.mrt);
        net->ipv4.mrt = NULL;
}

static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
                           struct netlink_ext_ack *extack)
{
        return 0;
}

static unsigned int ipmr_rules_seq_read(struct net *net)
{
        return 0;
}

bool ipmr_rule_default(const struct fib_rule *rule)
{
        return true;
}
EXPORT_SYMBOL(ipmr_rule_default);
#endif

static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
                                const void *ptr)
{
        const struct mfc_cache_cmp_arg *cmparg = arg->key;
        const struct mfc_cache *c = ptr;

        return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
               cmparg->mfc_origin != c->mfc_origin;
}

static const struct rhashtable_params ipmr_rht_params = {
        .head_offset = offsetof(struct mr_mfc, mnode),
        .key_offset = offsetof(struct mfc_cache, cmparg),
        .key_len = sizeof(struct mfc_cache_cmp_arg),
        .nelem_hint = 3,
        .obj_cmpfn = ipmr_hash_cmp,
        .automatic_shrinking = true,
};

static void ipmr_new_table_set(struct mr_table *mrt,
                               struct net *net)
{
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
        list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
#endif
}

static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
        .mfc_mcastgrp = htonl(INADDR_ANY),
        .mfc_origin = htonl(INADDR_ANY),
};

static struct mr_table_ops ipmr_mr_table_ops = {
        .rht_params = &ipmr_rht_params,
        .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
};

static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
        struct mr_table *mrt;

        /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
        if (id != RT_TABLE_DEFAULT && id >= 1000000000)
                return ERR_PTR(-EINVAL);

        mrt = ipmr_get_table(net, id);
        if (mrt)
                return mrt;

        return mr_table_alloc(net, id, &ipmr_mr_table_ops,
                              ipmr_expire_process, ipmr_new_table_set);
}

static void ipmr_free_table(struct mr_table *mrt)
{
        timer_shutdown_sync(&mrt->ipmr_expire_timer);
        mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
                                 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
        rhltable_destroy(&mrt->mfc_hash);
        kfree(mrt);
}

/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */

/* Initialize ipmr pimreg/tunnel in_device */
static bool ipmr_init_vif_indev(const struct net_device *dev)
{
        struct in_device *in_dev;

        ASSERT_RTNL();

        in_dev = __in_dev_get_rtnl(dev);
        if (!in_dev)
                return false;
        ipv4_devconf_setall(in_dev);
        neigh_parms_data_state_setall(in_dev->arp_parms);
        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;

        return true;
}

static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
        struct net_device *tunnel_dev, *new_dev;
        struct ip_tunnel_parm p = { };
        int err;

        tunnel_dev = __dev_get_by_name(net, "tunl0");
        if (!tunnel_dev)
                goto out;

        p.iph.daddr = v->vifc_rmt_addr.s_addr;
        p.iph.saddr = v->vifc_lcl_addr.s_addr;
        p.iph.version = 4;
        p.iph.ihl = 5;
        p.iph.protocol = IPPROTO_IPIP;
        sprintf(p.name, "dvmrp%d", v->vifc_vifi);

        if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl)
                goto out;
        err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
                        SIOCADDTUNNEL);
        if (err)
                goto out;

        new_dev = __dev_get_by_name(net, p.name);
        if (!new_dev)
                goto out;

        new_dev->flags |= IFF_MULTICAST;
        if (!ipmr_init_vif_indev(new_dev))
                goto out_unregister;
        if (dev_open(new_dev, NULL))
                goto out_unregister;
        dev_hold(new_dev);
        err = dev_set_allmulti(new_dev, 1);
        if (err) {
                dev_close(new_dev);
                tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
                                SIOCDELTUNNEL);
                dev_put(new_dev);
                new_dev = ERR_PTR(err);
        }
        return new_dev;

out_unregister:
        unregister_netdevice(new_dev);
out:
        return ERR_PTR(-ENOBUFS);
}

#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
        struct net *net = dev_net(dev);
        struct mr_table *mrt;
        struct flowi4 fl4 = {
                .flowi4_oif        = dev->ifindex,
                .flowi4_iif        = skb->skb_iif ? : LOOPBACK_IFINDEX,
                .flowi4_mark        = skb->mark,
        };
        int err;

        err = ipmr_fib_lookup(net, &fl4, &mrt);
        if (err < 0) {
                kfree_skb(skb);
                return err;
        }

        DEV_STATS_ADD(dev, tx_bytes, skb->len);
        DEV_STATS_INC(dev, tx_packets);
        rcu_read_lock();

        /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
        ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
                          IGMPMSG_WHOLEPKT);

        rcu_read_unlock();
        kfree_skb(skb);
        return NETDEV_TX_OK;
}

static int reg_vif_get_iflink(const struct net_device *dev)
{
        return 0;
}

static const struct net_device_ops reg_vif_netdev_ops = {
        .ndo_start_xmit        = reg_vif_xmit,
        .ndo_get_iflink = reg_vif_get_iflink,
};

static void reg_vif_setup(struct net_device *dev)
{
        dev->type                = ARPHRD_PIMREG;
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
        dev->flags                = IFF_NOARP;
        dev->netdev_ops                = &reg_vif_netdev_ops;
        dev->needs_free_netdev        = true;
        dev->features                |= NETIF_F_NETNS_LOCAL;
}

static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
{
        struct net_device *dev;
        char name[IFNAMSIZ];

        if (mrt->id == RT_TABLE_DEFAULT)
                sprintf(name, "pimreg");
        else
                sprintf(name, "pimreg%u", mrt->id);

        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);

        if (!dev)
                return NULL;

        dev_net_set(dev, net);

        if (register_netdevice(dev)) {
                free_netdev(dev);
                return NULL;
        }

        if (!ipmr_init_vif_indev(dev))
                goto failure;
        if (dev_open(dev, NULL))
                goto failure;

        dev_hold(dev);

        return dev;

failure:
        unregister_netdevice(dev);
        return NULL;
}

/* called with rcu_read_lock() */
static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
                     unsigned int pimlen)
{
        struct net_device *reg_dev = NULL;
        struct iphdr *encap;
        int vif_num;

        encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
        /* Check that:
         * a. packet is really sent to a multicast group
         * b. packet is not a NULL-REGISTER
         * c. packet is not truncated
         */
        if (!ipv4_is_multicast(encap->daddr) ||
            encap->tot_len == 0 ||
            ntohs(encap->tot_len) + pimlen > skb->len)
                return 1;

        /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
        vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
        if (vif_num >= 0)
                reg_dev = vif_dev_read(&mrt->vif_table[vif_num]);
        if (!reg_dev)
                return 1;

        skb->mac_header = skb->network_header;
        skb_pull(skb, (u8 *)encap - skb->data);
        skb_reset_network_header(skb);
        skb->protocol = htons(ETH_P_IP);
        skb->ip_summed = CHECKSUM_NONE;

        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));

        netif_rx(skb);

        return NET_RX_SUCCESS;
}
#else
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
{
        return NULL;
}
#endif

static int call_ipmr_vif_entry_notifiers(struct net *net,
                                         enum fib_event_type event_type,
                                         struct vif_device *vif,
                                         struct net_device *vif_dev,
                                         vifi_t vif_index, u32 tb_id)
{
        return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
                                     vif, vif_dev, vif_index, tb_id,
                                     &net->ipv4.ipmr_seq);
}

static int call_ipmr_mfc_entry_notifiers(struct net *net,
                                         enum fib_event_type event_type,
                                         struct mfc_cache *mfc, u32 tb_id)
{
        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
                                     &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
}

/**
 *        vif_delete - Delete a VIF entry
 *        @mrt: Table to delete from
 *        @vifi: VIF identifier to delete
 *        @notify: Set to 1, if the caller is a notifier_call
 *        @head: if unregistering the VIF, place it on this queue
 */
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
                      struct list_head *head)
{
        struct net *net = read_pnet(&mrt->net);
        struct vif_device *v;
        struct net_device *dev;
        struct in_device *in_dev;

        if (vifi < 0 || vifi >= mrt->maxvif)
                return -EADDRNOTAVAIL;

        v = &mrt->vif_table[vifi];

        dev = rtnl_dereference(v->dev);
        if (!dev)
                return -EADDRNOTAVAIL;

        spin_lock(&mrt_lock);
        call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev,
                                      vifi, mrt->id);
        RCU_INIT_POINTER(v->dev, NULL);

        if (vifi == mrt->mroute_reg_vif_num) {
                /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
                WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
        }
        if (vifi + 1 == mrt->maxvif) {
                int tmp;

                for (tmp = vifi - 1; tmp >= 0; tmp--) {
                        if (VIF_EXISTS(mrt, tmp))
                                break;
                }
                WRITE_ONCE(mrt->maxvif, tmp + 1);
        }

        spin_unlock(&mrt_lock);

        dev_set_allmulti(dev, -1);

        in_dev = __in_dev_get_rtnl(dev);
        if (in_dev) {
                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
                inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
                                            NETCONFA_MC_FORWARDING,
                                            dev->ifindex, &in_dev->cnf);
                ip_rt_multicast_event(in_dev);
        }

        if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
                unregister_netdevice_queue(dev, head);

        netdev_put(dev, &v->dev_tracker);
        return 0;
}

static void ipmr_cache_free_rcu(struct rcu_head *head)
{
        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);

        kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}

static void ipmr_cache_free(struct mfc_cache *c)
{
        call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}

/* Destroy an unresolved cache entry, killing queued skbs
 * and reporting error to netlink readers.
 */
static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
{
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
        struct nlmsgerr *e;

        atomic_dec(&mrt->cache_resolve_queue_len);

        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));
                        nlh->nlmsg_type = NLMSG_ERROR;
                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
                        skb_trim(skb, nlh->nlmsg_len);
                        e = nlmsg_data(nlh);
                        e->error = -ETIMEDOUT;
                        memset(&e->msg, 0, sizeof(e->msg));

                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
                } else {
                        kfree_skb(skb);
                }
        }

        ipmr_cache_free(c);
}

/* Timer process for the unresolved queue. */
static void ipmr_expire_process(struct timer_list *t)
{
        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
        struct mr_mfc *c, *next;
        unsigned long expires;
        unsigned long now;

        if (!spin_trylock(&mfc_unres_lock)) {
                mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
                return;
        }

        if (list_empty(&mrt->mfc_unres_queue))
                goto out;

        now = jiffies;
        expires = 10*HZ;

        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
                if (time_after(c->mfc_un.unres.expires, now)) {
                        unsigned long interval = c->mfc_un.unres.expires - now;
                        if (interval < expires)
                                expires = interval;
                        continue;
                }

                list_del(&c->list);
                mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
                ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
        }

        if (!list_empty(&mrt->mfc_unres_queue))
                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);

out:
        spin_unlock(&mfc_unres_lock);
}

/* Fill oifs list. It is called under locked mrt_lock. */
static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
                                   unsigned char *ttls)
{
        int vifi;

        cache->mfc_un.res.minvif = MAXVIFS;
        cache->mfc_un.res.maxvif = 0;
        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);

        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
                if (VIF_EXISTS(mrt, vifi) &&
                    ttls[vifi] && ttls[vifi] < 255) {
                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
                        if (cache->mfc_un.res.minvif > vifi)
                                cache->mfc_un.res.minvif = vifi;
                        if (cache->mfc_un.res.maxvif <= vifi)
                                cache->mfc_un.res.maxvif = vifi + 1;
                }
        }
        cache->mfc_un.res.lastuse = jiffies;
}

static int vif_add(struct net *net, struct mr_table *mrt,
                   struct vifctl *vifc, int mrtsock)
{
        struct netdev_phys_item_id ppid = { };
        int vifi = vifc->vifc_vifi;
        struct vif_device *v = &mrt->vif_table[vifi];
        struct net_device *dev;
        struct in_device *in_dev;
        int err;

        /* Is vif busy ? */
        if (VIF_EXISTS(mrt, vifi))
                return -EADDRINUSE;

        switch (vifc->vifc_flags) {
        case VIFF_REGISTER:
                if (!ipmr_pimsm_enabled())
                        return -EINVAL;
                /* Special Purpose VIF in PIM
                 * All the packets will be sent to the daemon
                 */
                if (mrt->mroute_reg_vif_num >= 0)
                        return -EADDRINUSE;
                dev = ipmr_reg_vif(net, mrt);
                if (!dev)
                        return -ENOBUFS;
                err = dev_set_allmulti(dev, 1);
                if (err) {
                        unregister_netdevice(dev);
                        dev_put(dev);
                        return err;
                }
                break;
        case VIFF_TUNNEL:
                dev = ipmr_new_tunnel(net, vifc);
                if (IS_ERR(dev))
                        return PTR_ERR(dev);
                break;
        case VIFF_USE_IFINDEX:
        case 0:
                if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
                        dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
                        if (dev && !__in_dev_get_rtnl(dev)) {
                                dev_put(dev);
                                return -EADDRNOTAVAIL;
                        }
                } else {
                        dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
                }
                if (!dev)
                        return -EADDRNOTAVAIL;
                err = dev_set_allmulti(dev, 1);
                if (err) {
                        dev_put(dev);
                        return err;
                }
                break;
        default:
                return -EINVAL;
        }

        in_dev = __in_dev_get_rtnl(dev);
        if (!in_dev) {
                dev_put(dev);
                return -EADDRNOTAVAIL;
        }
        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
        inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
                                    dev->ifindex, &in_dev->cnf);
        ip_rt_multicast_event(in_dev);

        /* Fill in the VIF structures */
        vif_device_init(v, dev, vifc->vifc_rate_limit,
                        vifc->vifc_threshold,
                        vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
                        (VIFF_TUNNEL | VIFF_REGISTER));

        err = dev_get_port_parent_id(dev, &ppid, true);
        if (err == 0) {
                memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
                v->dev_parent_id.id_len = ppid.id_len;
        } else {
                v->dev_parent_id.id_len = 0;
        }

        v->local = vifc->vifc_lcl_addr.s_addr;
        v->remote = vifc->vifc_rmt_addr.s_addr;

        /* And finish update writing critical data */
        spin_lock(&mrt_lock);
        rcu_assign_pointer(v->dev, dev);
        netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
        if (v->flags & VIFF_REGISTER) {
                /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
                WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
        }
        if (vifi+1 > mrt->maxvif)
                WRITE_ONCE(mrt->maxvif, vifi + 1);
        spin_unlock(&mrt_lock);
        call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev,
                                      vifi, mrt->id);
        return 0;
}

/* called with rcu_read_lock() */
static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
                                         __be32 origin,
                                         __be32 mcastgrp)
{
        struct mfc_cache_cmp_arg arg = {
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin
        };

        return mr_mfc_find(mrt, &arg);
}

/* Look for a (*,G) entry */
static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
                                             __be32 mcastgrp, int vifi)
{
        struct mfc_cache_cmp_arg arg = {
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = htonl(INADDR_ANY)
        };

        if (mcastgrp == htonl(INADDR_ANY))
                return mr_mfc_find_any_parent(mrt, vifi);
        return mr_mfc_find_any(mrt, vifi, &arg);
}

/* Look for a (S,G,iif) entry if parent != -1 */
static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
                                                __be32 origin, __be32 mcastgrp,
                                                int parent)
{
        struct mfc_cache_cmp_arg arg = {
                        .mfc_mcastgrp = mcastgrp,
                        .mfc_origin = origin,
        };

        return mr_mfc_find_parent(mrt, &arg, parent);
}

/* Allocate a multicast cache entry */
static struct mfc_cache *ipmr_cache_alloc(void)
{
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);

        if (c) {
                c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
                c->_c.mfc_un.res.minvif = MAXVIFS;
                c->_c.free = ipmr_cache_free_rcu;
                refcount_set(&c->_c.mfc_un.res.refcount, 1);
        }
        return c;
}

static struct mfc_cache *ipmr_cache_alloc_unres(void)
{
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);

        if (c) {
                skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
                c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
        }
        return c;
}

/* A cache entry has gone into a resolved state from queued */
static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
                               struct mfc_cache *uc, struct mfc_cache *c)
{
        struct sk_buff *skb;
        struct nlmsgerr *e;

        /* Play the pending entries through our router */
        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = skb_pull(skb,
                                                        sizeof(struct iphdr));

                        if (mr_fill_mroute(mrt, skb, &c->_c,
                                           nlmsg_data(nlh)) > 0) {
                                nlh->nlmsg_len = skb_tail_pointer(skb) -
                                                 (u8 *)nlh;
                        } else {
                                nlh->nlmsg_type = NLMSG_ERROR;
                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
                                skb_trim(skb, nlh->nlmsg_len);
                                e = nlmsg_data(nlh);
                                e->error = -EMSGSIZE;
                                memset(&e->msg, 0, sizeof(e->msg));
                        }

                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
                } else {
                        rcu_read_lock();
                        ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
                        rcu_read_unlock();
                }
        }
}

/* Bounce a cache query up to mrouted and netlink.
 *
 * Called under rcu_read_lock().
 */
static int ipmr_cache_report(const struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert)
{
        const int ihl = ip_hdrlen(pkt);
        struct sock *mroute_sk;
        struct igmphdr *igmp;
        struct igmpmsg *msg;
        struct sk_buff *skb;
        int ret;

        mroute_sk = rcu_dereference(mrt->mroute_sk);
        if (!mroute_sk)
                return -EINVAL;

        if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
        else
                skb = alloc_skb(128, GFP_ATOMIC);

        if (!skb)
                return -ENOBUFS;

        if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
                /* Ugly, but we have no choice with this interface.
                 * Duplicate old header, fix ihl, length etc.
                 * And all this only to mangle msg->im_msgtype and
                 * to set msg->im_mbz to "mbz" :-)
                 */
                skb_push(skb, sizeof(struct iphdr));
                skb_reset_network_header(skb);
                skb_reset_transport_header(skb);
                msg = (struct igmpmsg *)skb_network_header(skb);
                memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
                msg->im_msgtype = assert;
                msg->im_mbz = 0;
                if (assert == IGMPMSG_WRVIFWHOLE) {
                        msg->im_vif = vifi;
                        msg->im_vif_hi = vifi >> 8;
                } else {
                        /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
                        int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);

                        msg->im_vif = vif_num;
                        msg->im_vif_hi = vif_num >> 8;
                }
                ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
                ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
                                             sizeof(struct iphdr));
        } else {
                /* Copy the IP header */
                skb_set_network_header(skb, skb->len);
                skb_put(skb, ihl);
                skb_copy_to_linear_data(skb, pkt->data, ihl);
                /* Flag to the kernel this is a route add */
                ip_hdr(skb)->protocol = 0;
                msg = (struct igmpmsg *)skb_network_header(skb);
                msg->im_vif = vifi;
                msg->im_vif_hi = vifi >> 8;
                ipv4_pktinfo_prepare(mroute_sk, pkt, false);
                memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
                /* Add our header */
                igmp = skb_put(skb, sizeof(struct igmphdr));
                igmp->type = assert;
                msg->im_msgtype = assert;
                igmp->code = 0;
                ip_hdr(skb)->tot_len = htons(skb->len);        /* Fix the length */
                skb->transport_header = skb->network_header;
        }

        igmpmsg_netlink_event(mrt, skb);

        /* Deliver to mrouted */
        ret = sock_queue_rcv_skb(mroute_sk, skb);

        if (ret < 0) {
                net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
                kfree_skb(skb);
        }

        return ret;
}

/* Queue a packet for resolution. It gets locked cache entry! */
/* Called under rcu_read_lock() */
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                                 struct sk_buff *skb, struct net_device *dev)
{
        const struct iphdr *iph = ip_hdr(skb);
        struct mfc_cache *c;
        bool found = false;
        int err;

        spin_lock_bh(&mfc_unres_lock);
        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
                if (c->mfc_mcastgrp == iph->daddr &&
                    c->mfc_origin == iph->saddr) {
                        found = true;
                        break;
                }
        }

        if (!found) {
                /* Create a new entry if allowable */
                c = ipmr_cache_alloc_unres();
                if (!c) {
                        spin_unlock_bh(&mfc_unres_lock);

                        kfree_skb(skb);
                        return -ENOBUFS;
                }

                /* Fill in the new cache entry */
                c->_c.mfc_parent = -1;
                c->mfc_origin        = iph->saddr;
                c->mfc_mcastgrp        = iph->daddr;

                /* Reflect first query at mrouted. */
                err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);

                if (err < 0) {
                        /* If the report failed throw the cache entry
                           out - Brad Parker
                         */
                        spin_unlock_bh(&mfc_unres_lock);

                        ipmr_cache_free(c);
                        kfree_skb(skb);
                        return err;
                }

                atomic_inc(&mrt->cache_resolve_queue_len);
                list_add(&c->_c.list, &mrt->mfc_unres_queue);
                mroute_netlink_event(mrt, c, RTM_NEWROUTE);

                if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
                        mod_timer(&mrt->ipmr_expire_timer,
                                  c->_c.mfc_un.unres.expires);
        }

        /* See if we can append the packet */
        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
                if (dev) {
                        skb->dev = dev;
                        skb->skb_iif = dev->ifindex;
                }
                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
                err = 0;
        }

        spin_unlock_bh(&mfc_unres_lock);
        return err;
}

/* MFC cache manipulation by user space mroute daemon */

static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
        struct net *net = read_pnet(&mrt->net);
        struct mfc_cache *c;

        /* The entries are added/deleted only under RTNL */
        rcu_read_lock();
        c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
                                   mfc->mfcc_mcastgrp.s_addr, parent);
        rcu_read_unlock();
        if (!c)
                return -ENOENT;
        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
        list_del_rcu(&c->_c.list);
        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
        mroute_netlink_event(mrt, c, RTM_DELROUTE);
        mr_cache_put(&c->_c);

        return 0;
}

static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
                        struct mfcctl *mfc, int mrtsock, int parent)
{
        struct mfc_cache *uc, *c;
        struct mr_mfc *_uc;
        bool found;
        int ret;

        if (mfc->mfcc_parent >= MAXVIFS)
                return -ENFILE;

        /* The entries are added/deleted only under RTNL */
        rcu_read_lock();
        c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
                                   mfc->mfcc_mcastgrp.s_addr, parent);
        rcu_read_unlock();
        if (c) {
                spin_lock(&mrt_lock);
                c->_c.mfc_parent = mfc->mfcc_parent;
                ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
                if (!mrtsock)
                        c->_c.mfc_flags |= MFC_STATIC;
                spin_unlock(&mrt_lock);
                call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
                                              mrt->id);
                mroute_netlink_event(mrt, c, RTM_NEWROUTE);
                return 0;
        }

        if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
            !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
                return -EINVAL;

        c = ipmr_cache_alloc();
        if (!c)
                return -ENOMEM;

        c->mfc_origin = mfc->mfcc_origin.s_addr;
        c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
        c->_c.mfc_parent = mfc->mfcc_parent;
        ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
        if (!mrtsock)
                c->_c.mfc_flags |= MFC_STATIC;

        ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
                                  ipmr_rht_params);
        if (ret) {
                pr_err("ipmr: rhtable insert error %d\n", ret);
                ipmr_cache_free(c);
                return ret;
        }
        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
        /* Check to see if we resolved a queued list. If so we
         * need to send on the frames and tidy up.
         */
        found = false;
        spin_lock_bh(&mfc_unres_lock);
        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
                uc = (struct mfc_cache *)_uc;
                if (uc->mfc_origin == c->mfc_origin &&
                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
                        list_del(&_uc->list);
                        atomic_dec(&mrt->cache_resolve_queue_len);
                        found = true;
                        break;
                }
        }
        if (list_empty(&mrt->mfc_unres_queue))
                del_timer(&mrt->ipmr_expire_timer);
        spin_unlock_bh(&mfc_unres_lock);

        if (found) {
                ipmr_cache_resolve(net, mrt, uc, c);
                ipmr_cache_free(uc);
        }
        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
        mroute_netlink_event(mrt, c, RTM_NEWROUTE);
        return 0;
}

/* Close the multicast socket, and clear the vif tables etc */
static void mroute_clean_tables(struct mr_table *mrt, int flags)
{
        struct net *net = read_pnet(&mrt->net);
        struct mr_mfc *c, *tmp;
        struct mfc_cache *cache;
        LIST_HEAD(list);
        int i;

        /* Shut down all active vif entries */
        if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
                for (i = 0; i < mrt->maxvif; i++) {
                        if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
                             !(flags & MRT_FLUSH_VIFS_STATIC)) ||
                            (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
                                continue;
                        vif_delete(mrt, i, 0, &list);
                }
                unregister_netdevice_many(&list);
        }

        /* Wipe the cache */
        if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
                list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
                        if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
                            (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
                                continue;
                        rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
                        list_del_rcu(&c->list);
                        cache = (struct mfc_cache *)c;
                        call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
                                                      mrt->id);
                        mroute_netlink_event(mrt, cache, RTM_DELROUTE);
                        mr_cache_put(c);
                }
        }

        if (flags & MRT_FLUSH_MFC) {
                if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
                        spin_lock_bh(&mfc_unres_lock);
                        list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
                                list_del(&c->list);
                                cache = (struct mfc_cache *)c;
                                mroute_netlink_event(mrt, cache, RTM_DELROUTE);
                                ipmr_destroy_unres(mrt, cache);
                        }
                        spin_unlock_bh(&mfc_unres_lock);
                }
        }
}

/* called from ip_ra_control(), before an RCU grace period,
 * we don't need to call synchronize_rcu() here
 */
static void mrtsock_destruct(struct sock *sk)
{
        struct net *net = sock_net(sk);
        struct mr_table *mrt;

        rtnl_lock();
        ipmr_for_each_table(mrt, net) {
                if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_MC_FORWARDING,
                                                    NETCONFA_IFINDEX_ALL,
                                                    net->ipv4.devconf_all);
                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
                        mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
                }
        }
        rtnl_unlock();
}

/* Socket options and virtual interface manipulation. The whole
 * virtual interface system is a complete heap, but unfortunately
 * that's how BSD mrouted happens to think. Maybe one day with a proper
 * MOSPF/PIM router set up we can clean this up.
 */

int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
                         unsigned int optlen)
{
        struct net *net = sock_net(sk);
        int val, ret = 0, parent = 0;
        struct mr_table *mrt;
        struct vifctl vif;
        struct mfcctl mfc;
        bool do_wrvifwhole;
        u32 uval;

        /* There's one exception to the lock - MRT_DONE which needs to unlock */
        rtnl_lock();
        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_IGMP) {
                ret = -EOPNOTSUPP;
                goto out_unlock;
        }

        mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
        if (!mrt) {
                ret = -ENOENT;
                goto out_unlock;
        }
        if (optname != MRT_INIT) {
                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
                    !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
                        ret = -EACCES;
                        goto out_unlock;
                }
        }

        switch (optname) {
        case MRT_INIT:
                if (optlen != sizeof(int)) {
                        ret = -EINVAL;
                        break;
                }
                if (rtnl_dereference(mrt->mroute_sk)) {
                        ret = -EADDRINUSE;
                        break;
                }

                ret = ip_ra_control(sk, 1, mrtsock_destruct);
                if (ret == 0) {
                        rcu_assign_pointer(mrt->mroute_sk, sk);
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                    NETCONFA_MC_FORWARDING,
                                                    NETCONFA_IFINDEX_ALL,
                                                    net->ipv4.devconf_all);
                }
                break;
        case MRT_DONE:
                if (sk != rcu_access_pointer(mrt->mroute_sk)) {
                        ret = -EACCES;
                } else {
                        /* We need to unlock here because mrtsock_destruct takes
                         * care of rtnl itself and we can't change that due to
                         * the IP_ROUTER_ALERT setsockopt which runs without it.
                         */
                        rtnl_unlock();
                        ret = ip_ra_control(sk, 0, NULL);
                        goto out;
                }
                break;
        case MRT_ADD_VIF:
        case MRT_DEL_VIF:
                if (optlen != sizeof(vif)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
                        ret = -EFAULT;
                        break;
                }
                if (vif.vifc_vifi >= MAXVIFS) {
                        ret = -ENFILE;
                        break;
                }
                if (optname == MRT_ADD_VIF) {
                        ret = vif_add(net, mrt, &vif,
                                      sk == rtnl_dereference(mrt->mroute_sk));
                } else {
                        ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
                }
                break;
        /* Manipulate the forwarding caches. These live
         * in a sort of kernel/user symbiosis.
         */
        case MRT_ADD_MFC:
        case MRT_DEL_MFC:
                parent = -1;
                fallthrough;
        case MRT_ADD_MFC_PROXY:
        case MRT_DEL_MFC_PROXY:
                if (optlen != sizeof(mfc)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) {
                        ret = -EFAULT;
                        break;
                }
                if (parent == 0)
                        parent = mfc.mfcc_parent;
                if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
                        ret = ipmr_mfc_delete(mrt, &mfc, parent);
                else
                        ret = ipmr_mfc_add(net, mrt, &mfc,
                                           sk == rtnl_dereference(mrt->mroute_sk),
                                           parent);
                break;
        case MRT_FLUSH:
                if (optlen != sizeof(val)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&val, optval, sizeof(val))) {
                        ret = -EFAULT;
                        break;
                }
                mroute_clean_tables(mrt, val);
                break;
        /* Control PIM assert. */
        case MRT_ASSERT:
                if (optlen != sizeof(val)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&val, optval, sizeof(val))) {
                        ret = -EFAULT;
                        break;
                }
                mrt->mroute_do_assert = val;
                break;
        case MRT_PIM:
                if (!ipmr_pimsm_enabled()) {
                        ret = -ENOPROTOOPT;
                        break;
                }
                if (optlen != sizeof(val)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&val, optval, sizeof(val))) {
                        ret = -EFAULT;
                        break;
                }

                do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
                val = !!val;
                if (val != mrt->mroute_do_pim) {
                        mrt->mroute_do_pim = val;
                        mrt->mroute_do_assert = val;
                        mrt->mroute_do_wrvifwhole = do_wrvifwhole;
                }
                break;
        case MRT_TABLE:
                if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
                        ret = -ENOPROTOOPT;
                        break;
                }
                if (optlen != sizeof(uval)) {
                        ret = -EINVAL;
                        break;
                }
                if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
                        ret = -EFAULT;
                        break;
                }

                if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        ret = -EBUSY;
                } else {
                        mrt = ipmr_new_table(net, uval);
                        if (IS_ERR(mrt))
                                ret = PTR_ERR(mrt);
                        else
                                raw_sk(sk)->ipmr_table = uval;
                }
                break;
        /* Spurious command, or MRT_VERSION which you cannot set. */
        default:
                ret = -ENOPROTOOPT;
        }
out_unlock:
        rtnl_unlock();
out:
        return ret;
}

/* Execute if this ioctl is a special mroute ioctl */
int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
        switch (cmd) {
        /* These userspace buffers will be consumed by ipmr_ioctl() */
        case SIOCGETVIFCNT: {
                struct sioc_vif_req buffer;

                return sock_ioctl_inout(sk, cmd, arg, &buffer,
                                      sizeof(buffer));
                }
        case SIOCGETSGCNT: {
                struct sioc_sg_req buffer;

                return sock_ioctl_inout(sk, cmd, arg, &buffer,
                                      sizeof(buffer));
                }
        }
        /* return code > 0 means that the ioctl was not executed */
        return 1;
}

/* Getsock opt support for the multicast routing system. */
int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
                         sockptr_t optlen)
{
        int olr;
        int val;
        struct net *net = sock_net(sk);
        struct mr_table *mrt;

        if (sk->sk_type != SOCK_RAW ||
            inet_sk(sk)->inet_num != IPPROTO_IGMP)
                return -EOPNOTSUPP;

        mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
        if (!mrt)
                return -ENOENT;

        switch (optname) {
        case MRT_VERSION:
                val = 0x0305;
                break;
        case MRT_PIM:
                if (!ipmr_pimsm_enabled())
                        return -ENOPROTOOPT;
                val = mrt->mroute_do_pim;
                break;
        case MRT_ASSERT:
                val = mrt->mroute_do_assert;
                break;
        default:
                return -ENOPROTOOPT;
        }

        if (copy_from_sockptr(&olr, optlen, sizeof(int)))
                return -EFAULT;
        if (olr < 0)
                return -EINVAL;

        olr = min_t(unsigned int, olr, sizeof(int));

        if (copy_to_sockptr(optlen, &olr, sizeof(int)))
                return -EFAULT;
        if (copy_to_sockptr(optval, &val, olr))
                return -EFAULT;
        return 0;
}

/* The IP multicast ioctl support routines. */
int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
{
        struct vif_device *vif;
        struct mfc_cache *c;
        struct net *net = sock_net(sk);
        struct sioc_vif_req *vr;
        struct sioc_sg_req *sr;
        struct mr_table *mrt;

        mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
        if (!mrt)
                return -ENOENT;

        switch (cmd) {
        case SIOCGETVIFCNT:
                vr = (struct sioc_vif_req *)arg;
                if (vr->vifi >= mrt->maxvif)
                        return -EINVAL;
                vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif);
                rcu_read_lock();
                vif = &mrt->vif_table[vr->vifi];
                if (VIF_EXISTS(mrt, vr->vifi)) {
                        vr->icount = READ_ONCE(vif->pkt_in);
                        vr->ocount = READ_ONCE(vif->pkt_out);
                        vr->ibytes = READ_ONCE(vif->bytes_in);
                        vr->obytes = READ_ONCE(vif->bytes_out);
                        rcu_read_unlock();

                        return 0;
                }
                rcu_read_unlock();
                return -EADDRNOTAVAIL;
        case SIOCGETSGCNT:
                sr = (struct sioc_sg_req *)arg;

                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
                if (c) {
                        sr->pktcnt = c->_c.mfc_un.res.pkt;
                        sr->bytecnt = c->_c.mfc_un.res.bytes;
                        sr->wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();
                        return 0;
                }
                rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
        }
}

#ifdef CONFIG_COMPAT
struct compat_sioc_sg_req {
        struct in_addr src;
        struct in_addr grp;
        compat_ulong_t pktcnt;
        compat_ulong_t bytecnt;
        compat_ulong_t wrong_if;
};

struct compat_sioc_vif_req {
        vifi_t        vifi;                /* Which iface */
        compat_ulong_t icount;
        compat_ulong_t ocount;
        compat_ulong_t ibytes;
        compat_ulong_t obytes;
};

int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
        struct compat_sioc_sg_req sr;
        struct compat_sioc_vif_req vr;
        struct vif_device *vif;
        struct mfc_cache *c;
        struct net *net = sock_net(sk);
        struct mr_table *mrt;

        mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
        if (!mrt)
                return -ENOENT;

        switch (cmd) {
        case SIOCGETVIFCNT:
                if (copy_from_user(&vr, arg, sizeof(vr)))
                        return -EFAULT;
                if (vr.vifi >= mrt->maxvif)
                        return -EINVAL;
                vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
                rcu_read_lock();
                vif = &mrt->vif_table[vr.vifi];
                if (VIF_EXISTS(mrt, vr.vifi)) {
                        vr.icount = READ_ONCE(vif->pkt_in);
                        vr.ocount = READ_ONCE(vif->pkt_out);
                        vr.ibytes = READ_ONCE(vif->bytes_in);
                        vr.obytes = READ_ONCE(vif->bytes_out);
                        rcu_read_unlock();

                        if (copy_to_user(arg, &vr, sizeof(vr)))
                                return -EFAULT;
                        return 0;
                }
                rcu_read_unlock();
                return -EADDRNOTAVAIL;
        case SIOCGETSGCNT:
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;

                rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
                        sr.pktcnt = c->_c.mfc_un.res.pkt;
                        sr.bytecnt = c->_c.mfc_un.res.bytes;
                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
                        rcu_read_unlock();

                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
                rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
        }
}
#endif

static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
        struct mr_table *mrt;
        struct vif_device *v;
        int ct;

        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;

        ipmr_for_each_table(mrt, net) {
                v = &mrt->vif_table[0];
                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
                        if (rcu_access_pointer(v->dev) == dev)
                                vif_delete(mrt, ct, 1, NULL);
                }
        }
        return NOTIFY_DONE;
}

static struct notifier_block ip_mr_notifier = {
        .notifier_call = ipmr_device_event,
};

/* Encapsulate a packet by attaching a valid IPIP header to it.
 * This avoids tunnel drivers and other mess and gives us the speed so
 * important for multicast video.
 */
static void ip_encap(struct net *net, struct sk_buff *skb,
                     __be32 saddr, __be32 daddr)
{
        struct iphdr *iph;
        const struct iphdr *old_iph = ip_hdr(skb);

        skb_push(skb, sizeof(struct iphdr));
        skb->transport_header = skb->network_header;
        skb_reset_network_header(skb);
        iph = ip_hdr(skb);

        iph->version        =        4;
        iph->tos        =        old_iph->tos;
        iph->ttl        =        old_iph->ttl;
        iph->frag_off        =        0;
        iph->daddr        =        daddr;
        iph->saddr        =        saddr;
        iph->protocol        =        IPPROTO_IPIP;
        iph->ihl        =        5;
        iph->tot_len        =        htons(skb->len);
        ip_select_ident(net, skb, NULL);
        ip_send_check(iph);

        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
        nf_reset_ct(skb);
}

static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
                                      struct sk_buff *skb)
{
        struct ip_options *opt = &(IPCB(skb)->opt);

        IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);

        if (unlikely(opt->optlen))
                ip_forward_options(skb);

        return dst_output(net, sk, skb);
}

#ifdef CONFIG_NET_SWITCHDEV
static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
                                   int in_vifi, int out_vifi)
{
        struct vif_device *out_vif = &mrt->vif_table[out_vifi];
        struct vif_device *in_vif = &mrt->vif_table[in_vifi];

        if (!skb->offload_l3_fwd_mark)
                return false;
        if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
                return false;
        return netdev_phys_item_id_same(&out_vif->dev_parent_id,
                                        &in_vif->dev_parent_id);
}
#else
static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
                                   int in_vifi, int out_vifi)
{
        return false;
}
#endif

/* Processing handlers for ipmr_forward, under rcu_read_lock() */

static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
                            int in_vifi, struct sk_buff *skb, int vifi)
{
        const struct iphdr *iph = ip_hdr(skb);
        struct vif_device *vif = &mrt->vif_table[vifi];
        struct net_device *vif_dev;
        struct net_device *dev;
        struct rtable *rt;
        struct flowi4 fl4;
        int    encap = 0;

        vif_dev = vif_dev_read(vif);
        if (!vif_dev)
                goto out_free;

        if (vif->flags & VIFF_REGISTER) {
                WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
                WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
                DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
                DEV_STATS_INC(vif_dev, tx_packets);
                ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
                goto out_free;
        }

        if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
                goto out_free;

        if (vif->flags & VIFF_TUNNEL) {
                rt = ip_route_output_ports(net, &fl4, NULL,
                                           vif->remote, vif->local,
                                           0, 0,
                                           IPPROTO_IPIP,
                                           RT_TOS(iph->tos), vif->link);
                if (IS_ERR(rt))
                        goto out_free;
                encap = sizeof(struct iphdr);
        } else {
                rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
                                           0, 0,
                                           IPPROTO_IPIP,
                                           RT_TOS(iph->tos), vif->link);
                if (IS_ERR(rt))
                        goto out_free;
        }

        dev = rt->dst.dev;

        if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
                /* Do not fragment multicasts. Alas, IPv4 does not
                 * allow to send ICMP, so that packets will disappear
                 * to blackhole.
                 */
                IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
                ip_rt_put(rt);
                goto out_free;
        }

        encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;

        if (skb_cow(skb, encap)) {
                ip_rt_put(rt);
                goto out_free;
        }

        WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
        WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);

        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
        ip_decrease_ttl(ip_hdr(skb));

        /* FIXME: forward and output firewalls used to be called here.
         * What do we do with netfilter? -- RR
         */
        if (vif->flags & VIFF_TUNNEL) {
                ip_encap(net, skb, vif->local, vif->remote);
                /* FIXME: extra output firewall step used to be here. --RR */
                DEV_STATS_INC(vif_dev, tx_packets);
                DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
        }

        IPCB(skb)->flags |= IPSKB_FORWARDED;

        /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
         * not only before forwarding, but after forwarding on all output
         * interfaces. It is clear, if mrouter runs a multicasting
         * program, it should receive packets not depending to what interface
         * program is joined.
         * If we will not make it, the program will have to join on all
         * interfaces. On the other hand, multihoming host (or router, but
         * not mrouter) cannot join to more than one interface - it will
         * result in receiving multiple packets.
         */
        NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
                net, NULL, skb, skb->dev, dev,
                ipmr_forward_finish);
        return;

out_free:
        kfree_skb(skb);
}

/* Called with mrt_lock or rcu_read_lock() */
static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
{
        int ct;
        /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */
        for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
                if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
                        break;
        }
        return ct;
}

/* "local" means that we should preserve one skb (for local delivery) */
/* Called uner rcu_read_lock() */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
                          struct mfc_cache *c, int local)
{
        int true_vifi = ipmr_find_vif(mrt, dev);
        int psend = -1;
        int vif, ct;

        vif = c->_c.mfc_parent;
        c->_c.mfc_un.res.pkt++;
        c->_c.mfc_un.res.bytes += skb->len;
        c->_c.mfc_un.res.lastuse = jiffies;

        if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
                struct mfc_cache *cache_proxy;

                /* For an (*,G) entry, we only check that the incoming
                 * interface is part of the static tree.
                 */
                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
                if (cache_proxy &&
                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
                        goto forward;
        }

        /* Wrong interface: drop packet and (maybe) send PIM assert. */
        if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
                if (rt_is_output_route(skb_rtable(skb))) {
                        /* It is our own packet, looped back.
                         * Very complicated situation...
                         *
                         * The best workaround until routing daemons will be
                         * fixed is not to redistribute packet, if it was
                         * send through wrong interface. It means, that
                         * multicast applications WILL NOT work for
                         * (S,G), which have default multicast route pointing
                         * to wrong oif. In any case, it is not a good
                         * idea to use multicasting applications on router.
                         */
                        goto dont_forward;
                }

                c->_c.mfc_un.res.wrong_if++;

                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
                     * so that we cannot check that packet arrived on an oif.
                     * It is bad, but otherwise we would need to move pretty
                     * large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
                    time_after(jiffies,
                               c->_c.mfc_un.res.last_assert +
                               MFC_ASSERT_THRESH)) {
                        c->_c.mfc_un.res.last_assert = jiffies;
                        ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
                        if (mrt->mroute_do_wrvifwhole)
                                ipmr_cache_report(mrt, skb, true_vifi,
                                                  IGMPMSG_WRVIFWHOLE);
                }
                goto dont_forward;
        }

forward:
        WRITE_ONCE(mrt->vif_table[vif].pkt_in,
                   mrt->vif_table[vif].pkt_in + 1);
        WRITE_ONCE(mrt->vif_table[vif].bytes_in,
                   mrt->vif_table[vif].bytes_in + skb->len);

        /* Forward the frame */
        if (c->mfc_origin == htonl(INADDR_ANY) &&
            c->mfc_mcastgrp == htonl(INADDR_ANY)) {
                if (true_vifi >= 0 &&
                    true_vifi != c->_c.mfc_parent &&
                    ip_hdr(skb)->ttl >
                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
                        /* It's an (*,*) entry and the packet is not coming from
                         * the upstream: forward the packet to the upstream
                         * only.
                         */
                        psend = c->_c.mfc_parent;
                        goto last_forward;
                }
                goto dont_forward;
        }
        for (ct = c->_c.mfc_un.res.maxvif - 1;
             ct >= c->_c.mfc_un.res.minvif; ct--) {
                /* For (*,G) entry, don't forward to the incoming interface */
                if ((c->mfc_origin != htonl(INADDR_ANY) ||
                     ct != true_vifi) &&
                    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);

                                if (skb2)
                                        ipmr_queue_xmit(net, mrt, true_vifi,
                                                        skb2, psend);
                        }
                        psend = ct;
                }
        }
last_forward:
        if (psend != -1) {
                if (local) {
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);

                        if (skb2)
                                ipmr_queue_xmit(net, mrt, true_vifi, skb2,
                                                psend);
                } else {
                        ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
                        return;
                }
        }

dont_forward:
        if (!local)
                kfree_skb(skb);
}

static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
{
        struct rtable *rt = skb_rtable(skb);
        struct iphdr *iph = ip_hdr(skb);
        struct flowi4 fl4 = {
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowi4_tos = RT_TOS(iph->tos),
                .flowi4_oif = (rt_is_output_route(rt) ?
                               skb->dev->ifindex : 0),
                .flowi4_iif = (rt_is_output_route(rt) ?
                               LOOPBACK_IFINDEX :
                               skb->dev->ifindex),
                .flowi4_mark = skb->mark,
        };
        struct mr_table *mrt;
        int err;

        err = ipmr_fib_lookup(net, &fl4, &mrt);
        if (err)
                return ERR_PTR(err);
        return mrt;
}

/* Multicast packets for forwarding arrive here
 * Called with rcu_read_lock();
 */
int ip_mr_input(struct sk_buff *skb)
{
        struct mfc_cache *cache;
        struct net *net = dev_net(skb->dev);
        int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
        struct mr_table *mrt;
        struct net_device *dev;

        /* skb->dev passed in is the loX master dev for vrfs.
         * As there are no vifs associated with loopback devices,
         * get the proper interface that does have a vif associated with it.
         */
        dev = skb->dev;
        if (netif_is_l3_master(skb->dev)) {
                dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
                if (!dev) {
                        kfree_skb(skb);
                        return -ENODEV;
                }
        }

        /* Packet is looped back after forward, it should not be
         * forwarded second time, but still can be delivered locally.
         */
        if (IPCB(skb)->flags & IPSKB_FORWARDED)
                goto dont_forward;

        mrt = ipmr_rt_fib_lookup(net, skb);
        if (IS_ERR(mrt)) {
                kfree_skb(skb);
                return PTR_ERR(mrt);
        }
        if (!local) {
                if (IPCB(skb)->opt.router_alert) {
                        if (ip_call_ra_chain(skb))
                                return 0;
                } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
                        /* IGMPv1 (and broken IGMPv2 implementations sort of
                         * Cisco IOS <= 11.2(8)) do not put router alert
                         * option to IGMP packets destined to routable
                         * groups. It is very bad, because it means
                         * that we can forward NO IGMP messages.
                         */
                        struct sock *mroute_sk;

                        mroute_sk = rcu_dereference(mrt->mroute_sk);
                        if (mroute_sk) {
                                nf_reset_ct(skb);
                                raw_rcv(mroute_sk, skb);
                                return 0;
                        }
                }
        }

        /* already under rcu_read_lock() */
        cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
        if (!cache) {
                int vif = ipmr_find_vif(mrt, dev);

                if (vif >= 0)
                        cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
                                                    vif);
        }

        /* No usable cache entry */
        if (!cache) {
                int vif;

                if (local) {
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                        ip_local_deliver(skb);
                        if (!skb2)
                                return -ENOBUFS;
                        skb = skb2;
                }

                vif = ipmr_find_vif(mrt, dev);
                if (vif >= 0)
                        return ipmr_cache_unresolved(mrt, vif, skb, dev);
                kfree_skb(skb);
                return -ENODEV;
        }

        ip_mr_forward(net, mrt, dev, skb, cache, local);

        if (local)
                return ip_local_deliver(skb);

        return 0;

dont_forward:
        if (local)
                return ip_local_deliver(skb);
        kfree_skb(skb);
        return 0;
}

#ifdef CONFIG_IP_PIMSM_V1
/* Handle IGMP messages of PIMv1 */
int pim_rcv_v1(struct sk_buff *skb)
{
        struct igmphdr *pim;
        struct net *net = dev_net(skb->dev);
        struct mr_table *mrt;

        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
                goto drop;

        pim = igmp_hdr(skb);

        mrt = ipmr_rt_fib_lookup(net, skb);
        if (IS_ERR(mrt))
                goto drop;
        if (!mrt->mroute_do_pim ||
            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
                goto drop;

        if (__pim_rcv(mrt, skb, sizeof(*pim))) {
drop:
                kfree_skb(skb);
        }
        return 0;
}
#endif

#ifdef CONFIG_IP_PIMSM_V2
static int pim_rcv(struct sk_buff *skb)
{
        struct pimreghdr *pim;
        struct net *net = dev_net(skb->dev);
        struct mr_table *mrt;

        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
                goto drop;

        pim = (struct pimreghdr *)skb_transport_header(skb);
        if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
            (pim->flags & PIM_NULL_REGISTER) ||
            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
                goto drop;

        mrt = ipmr_rt_fib_lookup(net, skb);
        if (IS_ERR(mrt))
                goto drop;
        if (__pim_rcv(mrt, skb, sizeof(*pim))) {
drop:
                kfree_skb(skb);
        }
        return 0;
}
#endif

int ipmr_get_route(struct net *net, struct sk_buff *skb,
                   __be32 saddr, __be32 daddr,
                   struct rtmsg *rtm, u32 portid)
{
        struct mfc_cache *cache;
        struct mr_table *mrt;
        int err;

        mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
        if (!mrt)
                return -ENOENT;

        rcu_read_lock();
        cache = ipmr_cache_find(mrt, saddr, daddr);
        if (!cache && skb->dev) {
                int vif = ipmr_find_vif(mrt, skb->dev);

                if (vif >= 0)
                        cache = ipmr_cache_find_any(mrt, daddr, vif);
        }
        if (!cache) {
                struct sk_buff *skb2;
                struct iphdr *iph;
                struct net_device *dev;
                int vif = -1;

                dev = skb->dev;
                if (dev)
                        vif = ipmr_find_vif(mrt, dev);
                if (vif < 0) {
                        rcu_read_unlock();
                        return -ENODEV;
                }

                skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
                if (!skb2) {
                        rcu_read_unlock();
                        return -ENOMEM;
                }

                NETLINK_CB(skb2).portid = portid;
                skb_push(skb2, sizeof(struct iphdr));
                skb_reset_network_header(skb2);
                iph = ip_hdr(skb2);
                iph->ihl = sizeof(struct iphdr) >> 2;
                iph->saddr = saddr;
                iph->daddr = daddr;
                iph->version = 0;
                err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
                rcu_read_unlock();
                return err;
        }

        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
        rcu_read_unlock();
        return err;
}

static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                            u32 portid, u32 seq, struct mfc_cache *c, int cmd,
                            int flags)
{
        struct nlmsghdr *nlh;
        struct rtmsg *rtm;
        int err;

        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
        if (!nlh)
                return -EMSGSIZE;

        rtm = nlmsg_data(nlh);
        rtm->rtm_family   = RTNL_FAMILY_IPMR;
        rtm->rtm_dst_len  = 32;
        rtm->rtm_src_len  = 32;
        rtm->rtm_tos      = 0;
        rtm->rtm_table    = mrt->id;
        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
                goto nla_put_failure;
        rtm->rtm_type     = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
        if (c->_c.mfc_flags & MFC_STATIC)
                rtm->rtm_protocol = RTPROT_STATIC;
        else
                rtm->rtm_protocol = RTPROT_MROUTED;
        rtm->rtm_flags    = 0;

        if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
            nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
                goto nla_put_failure;
        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
        /* do not break the dump if cache is unresolved */
        if (err < 0 && err != -ENOENT)
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
                             u32 portid, u32 seq, struct mr_mfc *c, int cmd,
                             int flags)
{
        return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
                                cmd, flags);
}

static size_t mroute_msgsize(bool unresolved, int maxvif)
{
        size_t len =
                NLMSG_ALIGN(sizeof(struct rtmsg))
                + nla_total_size(4)        /* RTA_TABLE */
                + nla_total_size(4)        /* RTA_SRC */
                + nla_total_size(4)        /* RTA_DST */
                ;

        if (!unresolved)
                len = len
                      + nla_total_size(4)        /* RTA_IIF */
                      + nla_total_size(0)        /* RTA_MULTIPATH */
                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
                                                /* RTA_MFC_STATS */
                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
                ;

        return len;
}

static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
                                 int cmd)
{
        struct net *net = read_pnet(&mrt->net);
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
                                       mrt->maxvif),
                        GFP_ATOMIC);
        if (!skb)
                goto errout;

        err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
        if (err < 0)
                goto errout;

        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
        return;

errout:
        kfree_skb(skb);
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
}

static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
{
        size_t len =
                NLMSG_ALIGN(sizeof(struct rtgenmsg))
                + nla_total_size(1)        /* IPMRA_CREPORT_MSGTYPE */
                + nla_total_size(4)        /* IPMRA_CREPORT_VIF_ID */
                + nla_total_size(4)        /* IPMRA_CREPORT_SRC_ADDR */
                + nla_total_size(4)        /* IPMRA_CREPORT_DST_ADDR */
                + nla_total_size(4)        /* IPMRA_CREPORT_TABLE */
                                        /* IPMRA_CREPORT_PKT */
                + nla_total_size(payloadlen)
                ;

        return len;
}

static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
{
        struct net *net = read_pnet(&mrt->net);
        struct nlmsghdr *nlh;
        struct rtgenmsg *rtgenm;
        struct igmpmsg *msg;
        struct sk_buff *skb;
        struct nlattr *nla;
        int payloadlen;

        payloadlen = pkt->len - sizeof(struct igmpmsg);
        msg = (struct igmpmsg *)skb_network_header(pkt);

        skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
        if (!skb)
                goto errout;

        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
                        sizeof(struct rtgenmsg), 0);
        if (!nlh)
                goto errout;
        rtgenm = nlmsg_data(nlh);
        rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
        if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
            nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) ||
            nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
                            msg->im_src.s_addr) ||
            nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
                            msg->im_dst.s_addr) ||
            nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id))
                goto nla_put_failure;

        nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
        if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
                                  nla_data(nla), payloadlen))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);

        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
        return;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
errout:
        kfree_skb(skb);
        rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
}

static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
                                       const struct nlmsghdr *nlh,
                                       struct nlattr **tb,
                                       struct netlink_ext_ack *extack)
{
        struct rtmsg *rtm;
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
                                              rtm_ipv4_policy, extack);

        rtm = nlmsg_data(nlh);
        if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
            (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
            rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
            rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
                                            rtm_ipv4_policy, extack);
        if (err)
                return err;

        if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
            (tb[RTA_DST] && !rtm->rtm_dst_len)) {
                NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
                return -EINVAL;
        }

        for (i = 0; i <= RTA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case RTA_SRC:
                case RTA_DST:
                case RTA_TABLE:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[RTA_MAX + 1];
        struct sk_buff *skb = NULL;
        struct mfc_cache *cache;
        struct mr_table *mrt;
        __be32 src, grp;
        u32 tableid;
        int err;

        err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
        if (err < 0)
                goto errout;

        src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
        grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
        tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;

        mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
        if (!mrt) {
                err = -ENOENT;
                goto errout_free;
        }

        /* entries are added/deleted only under RTNL */
        rcu_read_lock();
        cache = ipmr_cache_find(mrt, src, grp);
        rcu_read_unlock();
        if (!cache) {
                err = -ENOENT;
                goto errout_free;
        }

        skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
        if (!skb) {
                err = -ENOBUFS;
                goto errout_free;
        }

        err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
                               nlh->nlmsg_seq, cache,
                               RTM_NEWROUTE, 0);
        if (err < 0)
                goto errout_free;

        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);

errout:
        return err;

errout_free:
        kfree_skb(skb);
        goto errout;
}

static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct fib_dump_filter filter = {
                .rtnl_held = true,
        };
        int err;

        if (cb->strict_check) {
                err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
                                            &filter, cb);
                if (err < 0)
                        return err;
        }

        if (filter.table_id) {
                struct mr_table *mrt;

                mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
                if (!mrt) {
                        if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
                                return skb->len;

                        NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
                        return -ENOENT;
                }
                err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
                                    &mfc_unres_lock, &filter);
                return skb->len ? : err;
        }

        return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
                                _ipmr_fill_mroute, &mfc_unres_lock, &filter);
}

static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
        [RTA_SRC]        = { .type = NLA_U32 },
        [RTA_DST]        = { .type = NLA_U32 },
        [RTA_IIF]        = { .type = NLA_U32 },
        [RTA_TABLE]        = { .type = NLA_U32 },
        [RTA_MULTIPATH]        = { .len = sizeof(struct rtnexthop) },
};

static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
{
        switch (rtm_protocol) {
        case RTPROT_STATIC:
        case RTPROT_MROUTED:
                return true;
        }
        return false;
}

static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
{
        struct rtnexthop *rtnh = nla_data(nla);
        int remaining = nla_len(nla), vifi = 0;

        while (rtnh_ok(rtnh, remaining)) {
                mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
                if (++vifi == MAXVIFS)
                        break;
                rtnh = rtnh_next(rtnh, &remaining);
        }

        return remaining > 0 ? -EINVAL : vifi;
}

/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
                            struct mfcctl *mfcc, int *mrtsock,
                            struct mr_table **mrtret,
                            struct netlink_ext_ack *extack)
{
        struct net_device *dev = NULL;
        u32 tblid = RT_TABLE_DEFAULT;
        struct mr_table *mrt;
        struct nlattr *attr;
        struct rtmsg *rtm;
        int ret, rem;

        ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
                                        rtm_ipmr_policy, extack);
        if (ret < 0)
                goto out;
        rtm = nlmsg_data(nlh);

        ret = -EINVAL;
        if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
            rtm->rtm_type != RTN_MULTICAST ||
            rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
            !ipmr_rtm_validate_proto(rtm->rtm_protocol))
                goto out;

        memset(mfcc, 0, sizeof(*mfcc));
        mfcc->mfcc_parent = -1;
        ret = 0;
        nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
                switch (nla_type(attr)) {
                case RTA_SRC:
                        mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
                        break;
                case RTA_DST:
                        mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
                        break;
                case RTA_IIF:
                        dev = __dev_get_by_index(net, nla_get_u32(attr));
                        if (!dev) {
                                ret = -ENODEV;
                                goto out;
                        }
                        break;
                case RTA_MULTIPATH:
                        if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
                                ret = -EINVAL;
                                goto out;
                        }
                        break;
                case RTA_PREFSRC:
                        ret = 1;
                        break;
                case RTA_TABLE:
                        tblid = nla_get_u32(attr);
                        break;
                }
        }
        mrt = ipmr_get_table(net, tblid);
        if (!mrt) {
                ret = -ENOENT;
                goto out;
        }
        *mrtret = mrt;
        *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
        if (dev)
                mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);

out:
        return ret;
}

/* takes care of both newroute and delroute */
static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
                          struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        int ret, mrtsock, parent;
        struct mr_table *tbl;
        struct mfcctl mfcc;

        mrtsock = 0;
        tbl = NULL;
        ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
        if (ret < 0)
                return ret;

        parent = ret ? mfcc.mfcc_parent : -1;
        if (nlh->nlmsg_type == RTM_NEWROUTE)
                return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
        else
                return ipmr_mfc_delete(tbl, &mfcc, parent);
}

static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
{
        u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);

        if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
            nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
            nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
                        mrt->mroute_reg_vif_num) ||
            nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
                       mrt->mroute_do_assert) ||
            nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
            nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
                       mrt->mroute_do_wrvifwhole))
                return false;

        return true;
}

static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
{
        struct net_device *vif_dev;
        struct nlattr *vif_nest;
        struct vif_device *vif;

        vif = &mrt->vif_table[vifid];
        vif_dev = rtnl_dereference(vif->dev);
        /* if the VIF doesn't exist just continue */
        if (!vif_dev)
                return true;

        vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
        if (!vif_nest)
                return false;

        if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) ||
            nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
            nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
            nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
                              IPMRA_VIFA_PAD) ||
            nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
                              IPMRA_VIFA_PAD) ||
            nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
                              IPMRA_VIFA_PAD) ||
            nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
                              IPMRA_VIFA_PAD) ||
            nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
            nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
                nla_nest_cancel(skb, vif_nest);
                return false;
        }
        nla_nest_end(skb, vif_nest);

        return true;
}

static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
                               struct netlink_ext_ack *extack)
{
        struct ifinfomsg *ifm;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
                return -EINVAL;
        }

        if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
                NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
                return -EINVAL;
        }

        ifm = nlmsg_data(nlh);
        if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
            ifm->ifi_change || ifm->ifi_index) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
                return -EINVAL;
        }

        return 0;
}

static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct net *net = sock_net(skb->sk);
        struct nlmsghdr *nlh = NULL;
        unsigned int t = 0, s_t;
        unsigned int e = 0, s_e;
        struct mr_table *mrt;

        if (cb->strict_check) {
                int err = ipmr_valid_dumplink(cb->nlh, cb->extack);

                if (err < 0)
                        return err;
        }

        s_t = cb->args[0];
        s_e = cb->args[1];

        ipmr_for_each_table(mrt, net) {
                struct nlattr *vifs, *af;
                struct ifinfomsg *hdr;
                u32 i;

                if (t < s_t)
                        goto skip_table;
                nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
                                cb->nlh->nlmsg_seq, RTM_NEWLINK,
                                sizeof(*hdr), NLM_F_MULTI);
                if (!nlh)
                        break;

                hdr = nlmsg_data(nlh);
                memset(hdr, 0, sizeof(*hdr));
                hdr->ifi_family = RTNL_FAMILY_IPMR;

                af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
                if (!af) {
                        nlmsg_cancel(skb, nlh);
                        goto out;
                }

                if (!ipmr_fill_table(mrt, skb)) {
                        nlmsg_cancel(skb, nlh);
                        goto out;
                }

                vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
                if (!vifs) {
                        nla_nest_end(skb, af);
                        nlmsg_end(skb, nlh);
                        goto out;
                }
                for (i = 0; i < mrt->maxvif; i++) {
                        if (e < s_e)
                                goto skip_entry;
                        if (!ipmr_fill_vif(mrt, i, skb)) {
                                nla_nest_end(skb, vifs);
                                nla_nest_end(skb, af);
                                nlmsg_end(skb, nlh);
                                goto out;
                        }
skip_entry:
                        e++;
                }
                s_e = 0;
                e = 0;
                nla_nest_end(skb, vifs);
                nla_nest_end(skb, af);
                nlmsg_end(skb, nlh);
skip_table:
                t++;
        }

out:
        cb->args[1] = e;
        cb->args[0] = t;

        return skb->len;
}

#ifdef CONFIG_PROC_FS
/* The /proc interfaces to multicast routing :
 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
 */

static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        struct mr_vif_iter *iter = seq->private;
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;

        mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
        if (!mrt)
                return ERR_PTR(-ENOENT);

        iter->mrt = mrt;

        rcu_read_lock();
        return mr_vif_seq_start(seq, pos);
}

static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        rcu_read_unlock();
}

static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
        struct mr_vif_iter *iter = seq->private;
        struct mr_table *mrt = iter->mrt;

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
        } else {
                const struct vif_device *vif = v;
                const struct net_device *vif_dev;
                const char *name;

                vif_dev = vif_dev_read(vif);
                name = vif_dev ? vif_dev->name : "none";
                seq_printf(seq,
                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
                           vif - mrt->vif_table,
                           name, vif->bytes_in, vif->pkt_in,
                           vif->bytes_out, vif->pkt_out,
                           vif->flags, vif->local, vif->remote);
        }
        return 0;
}

static const struct seq_operations ipmr_vif_seq_ops = {
        .start = ipmr_vif_seq_start,
        .next  = mr_vif_seq_next,
        .stop  = ipmr_vif_seq_stop,
        .show  = ipmr_vif_seq_show,
};

static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
        struct net *net = seq_file_net(seq);
        struct mr_table *mrt;

        mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
        if (!mrt)
                return ERR_PTR(-ENOENT);

        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}

static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
{
        int n;

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
        } else {
                const struct mfc_cache *mfc = v;
                const struct mr_mfc_iter *it = seq->private;
                const struct mr_table *mrt = it->mrt;

                seq_printf(seq, "%08X %08X %-3hd",
                           (__force u32) mfc->mfc_mcastgrp,
                           (__force u32) mfc->mfc_origin,
                           mfc->_c.mfc_parent);

                if (it->cache != &mrt->mfc_unres_queue) {
                        seq_printf(seq, " %8lu %8lu %8lu",
                                   mfc->_c.mfc_un.res.pkt,
                                   mfc->_c.mfc_un.res.bytes,
                                   mfc->_c.mfc_un.res.wrong_if);
                        for (n = mfc->_c.mfc_un.res.minvif;
                             n < mfc->_c.mfc_un.res.maxvif; n++) {
                                if (VIF_EXISTS(mrt, n) &&
                                    mfc->_c.mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
                                           " %2d:%-3d",
                                           n, mfc->_c.mfc_un.res.ttls[n]);
                        }
                } else {
                        /* unresolved mfc_caches don't contain
                         * pkt, bytes and wrong_if values
                         */
                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
                }
                seq_putc(seq, '\n');
        }
        return 0;
}

static const struct seq_operations ipmr_mfc_seq_ops = {
        .start = ipmr_mfc_seq_start,
        .next  = mr_mfc_seq_next,
        .stop  = mr_mfc_seq_stop,
        .show  = ipmr_mfc_seq_show,
};
#endif

#ifdef CONFIG_IP_PIMSM_V2
static const struct net_protocol pim_protocol = {
        .handler        =        pim_rcv,
};
#endif

static unsigned int ipmr_seq_read(struct net *net)
{
        ASSERT_RTNL();

        return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
}

static int ipmr_dump(struct net *net, struct notifier_block *nb,
                     struct netlink_ext_ack *extack)
{
        return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
                       ipmr_mr_table_iter, extack);
}

static const struct fib_notifier_ops ipmr_notifier_ops_template = {
        .family                = RTNL_FAMILY_IPMR,
        .fib_seq_read        = ipmr_seq_read,
        .fib_dump        = ipmr_dump,
        .owner                = THIS_MODULE,
};

static int __net_init ipmr_notifier_init(struct net *net)
{
        struct fib_notifier_ops *ops;

        net->ipv4.ipmr_seq = 0;

        ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
        if (IS_ERR(ops))
                return PTR_ERR(ops);
        net->ipv4.ipmr_notifier_ops = ops;

        return 0;
}

static void __net_exit ipmr_notifier_exit(struct net *net)
{
        fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
        net->ipv4.ipmr_notifier_ops = NULL;
}

/* Setup for IP multicast routing */
static int __net_init ipmr_net_init(struct net *net)
{
        int err;

        err = ipmr_notifier_init(net);
        if (err)
                goto ipmr_notifier_fail;

        err = ipmr_rules_init(net);
        if (err < 0)
                goto ipmr_rules_fail;

#ifdef CONFIG_PROC_FS
        err = -ENOMEM;
        if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
                        sizeof(struct mr_vif_iter)))
                goto proc_vif_fail;
        if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
                        sizeof(struct mr_mfc_iter)))
                goto proc_cache_fail;
#endif
        return 0;

#ifdef CONFIG_PROC_FS
proc_cache_fail:
        remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
        rtnl_lock();
        ipmr_rules_exit(net);
        rtnl_unlock();
#endif
ipmr_rules_fail:
        ipmr_notifier_exit(net);
ipmr_notifier_fail:
        return err;
}

static void __net_exit ipmr_net_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
        remove_proc_entry("ip_mr_cache", net->proc_net);
        remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
        ipmr_notifier_exit(net);
}

static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
{
        struct net *net;

        rtnl_lock();
        list_for_each_entry(net, net_list, exit_list)
                ipmr_rules_exit(net);
        rtnl_unlock();
}

static struct pernet_operations ipmr_net_ops = {
        .init = ipmr_net_init,
        .exit = ipmr_net_exit,
        .exit_batch = ipmr_net_exit_batch,
};

int __init ip_mr_init(void)
{
        int err;

        mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);

        err = register_pernet_subsys(&ipmr_net_ops);
        if (err)
                goto reg_pernet_fail;

        err = register_netdevice_notifier(&ip_mr_notifier);
        if (err)
                goto reg_notif_fail;
#ifdef CONFIG_IP_PIMSM_V2
        if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
                pr_err("%s: can't add PIM protocol\n", __func__);
                err = -EAGAIN;
                goto add_proto_fail;
        }
#endif
        rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
                      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
        rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
                      ipmr_rtm_route, NULL, 0);
        rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
                      ipmr_rtm_route, NULL, 0);

        rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
                      NULL, ipmr_rtm_dumplink, 0);
        return 0;

#ifdef CONFIG_IP_PIMSM_V2
add_proto_fail:
        unregister_netdevice_notifier(&ip_mr_notifier);
#endif
reg_notif_fail:
        unregister_pernet_subsys(&ipmr_net_ops);
reg_pernet_fail:
        kmem_cache_destroy(mrt_cachep);
        return err;
}




































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  USB HID quirks support for Linux
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2006-2007 Jiri Kosina
 *  Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com>
 *  Copyright (c) 2019 Paul Pawlowski <paul@mrarm.io>
 */

/*
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/leds.h>

#include "hid-ids.h"

#define APPLE_RDESC_JIS                BIT(0)
#define APPLE_IGNORE_MOUSE        BIT(1)
#define APPLE_HAS_FN                BIT(2)
/* BIT(3) reserved, was: APPLE_HIDDEV */
#define APPLE_ISO_TILDE_QUIRK        BIT(4)
#define APPLE_MIGHTYMOUSE        BIT(5)
#define APPLE_INVERT_HWHEEL        BIT(6)
/* BIT(7) reserved, was: APPLE_IGNORE_HIDINPUT */
#define APPLE_NUMLOCK_EMULATION        BIT(8)
#define APPLE_RDESC_BATTERY        BIT(9)
#define APPLE_BACKLIGHT_CTL        BIT(10)
#define APPLE_IS_NON_APPLE        BIT(11)

#define APPLE_FLAG_FKEY                0x01

#define HID_COUNTRY_INTERNATIONAL_ISO        13
#define APPLE_BATTERY_TIMEOUT_MS        60000

static unsigned int fnmode = 3;
module_param(fnmode, uint, 0644);
MODULE_PARM_DESC(fnmode, "Mode of fn key on Apple keyboards (0 = disabled, "
                "1 = fkeyslast, 2 = fkeysfirst, [3] = auto)");

static int iso_layout = -1;
module_param(iso_layout, int, 0644);
MODULE_PARM_DESC(iso_layout, "Swap the backtick/tilde and greater-than/less-than keys. "
                "([-1] = auto, 0 = disabled, 1 = enabled)");

static unsigned int swap_opt_cmd;
module_param(swap_opt_cmd, uint, 0644);
MODULE_PARM_DESC(swap_opt_cmd, "Swap the Option (\"Alt\") and Command (\"Flag\") keys. "
                "(For people who want to keep Windows PC keyboard muscle memory. "
                "[0] = as-is, Mac layout. 1 = swapped, Windows layout., 2 = swapped, Swap only left side)");

static unsigned int swap_ctrl_cmd;
module_param(swap_ctrl_cmd, uint, 0644);
MODULE_PARM_DESC(swap_ctrl_cmd, "Swap the Control (\"Ctrl\") and Command (\"Flag\") keys. "
                "(For people who are used to Mac shortcuts involving Command instead of Control. "
                "[0] = No change. 1 = Swapped.)");

static unsigned int swap_fn_leftctrl;
module_param(swap_fn_leftctrl, uint, 0644);
MODULE_PARM_DESC(swap_fn_leftctrl, "Swap the Fn and left Control keys. "
                "(For people who want to keep PC keyboard muscle memory. "
                "[0] = as-is, Mac layout, 1 = swapped, PC layout)");

struct apple_non_apple_keyboard {
        char *name;
};

struct apple_sc_backlight {
        struct led_classdev cdev;
        struct hid_device *hdev;
};

struct apple_sc {
        struct hid_device *hdev;
        unsigned long quirks;
        unsigned int fn_on;
        unsigned int fn_found;
        DECLARE_BITMAP(pressed_numlock, KEY_CNT);
        struct timer_list battery_timer;
        struct apple_sc_backlight *backlight;
};

struct apple_key_translation {
        u16 from;
        u16 to;
        u8 flags;
};

static const struct apple_key_translation magic_keyboard_alu_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_F1,        KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_SCALE,          APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_DASHBOARD,      APPLE_FLAG_FKEY },
        { KEY_F6,        KEY_NUMLOCK,        APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_NEXTSONG,       APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_MUTE,           APPLE_FLAG_FKEY },
        { KEY_F11,        KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
        { KEY_F12,        KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation magic_keyboard_2015_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_F1,        KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_SCALE,          APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_DASHBOARD,      APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_NEXTSONG,       APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_MUTE,           APPLE_FLAG_FKEY },
        { KEY_F11,        KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
        { KEY_F12,        KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

struct apple_backlight_config_report {
        u8 report_id;
        u8 version;
        u16 backlight_off, backlight_on_min, backlight_on_max;
};

struct apple_backlight_set_report {
        u8 report_id;
        u8 version;
        u16 backlight;
        u16 rate;
};


static const struct apple_key_translation apple2021_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_F1,        KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_SCALE,          APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_SEARCH,         APPLE_FLAG_FKEY },
        { KEY_F5,        KEY_MICMUTE,        APPLE_FLAG_FKEY },
        { KEY_F6,        KEY_SLEEP,          APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_NEXTSONG,       APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_MUTE,           APPLE_FLAG_FKEY },
        { KEY_F11,        KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
        { KEY_F12,        KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation macbookair_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_F1,        KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_SCALE,          APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_DASHBOARD,      APPLE_FLAG_FKEY },
        { KEY_F6,        KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_NEXTSONG,       APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_MUTE,           APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
        { KEY_F11,        KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
        { KEY_F12,        KEY_EJECTCD,        APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation macbookpro_no_esc_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_GRAVE,        KEY_ESC },
        { KEY_1,        KEY_F1 },
        { KEY_2,        KEY_F2 },
        { KEY_3,        KEY_F3 },
        { KEY_4,        KEY_F4 },
        { KEY_5,        KEY_F5 },
        { KEY_6,        KEY_F6 },
        { KEY_7,        KEY_F7 },
        { KEY_8,        KEY_F8 },
        { KEY_9,        KEY_F9 },
        { KEY_0,        KEY_F10 },
        { KEY_MINUS,        KEY_F11 },
        { KEY_EQUAL,        KEY_F12 },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation macbookpro_dedicated_esc_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_1,        KEY_F1 },
        { KEY_2,        KEY_F2 },
        { KEY_3,        KEY_F3 },
        { KEY_4,        KEY_F4 },
        { KEY_5,        KEY_F5 },
        { KEY_6,        KEY_F6 },
        { KEY_7,        KEY_F7 },
        { KEY_8,        KEY_F8 },
        { KEY_9,        KEY_F9 },
        { KEY_0,        KEY_F10 },
        { KEY_MINUS,        KEY_F11 },
        { KEY_EQUAL,        KEY_F12 },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation apple_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_ENTER,        KEY_INSERT },
        { KEY_F1,        KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,   APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_SCALE,          APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_DASHBOARD,      APPLE_FLAG_FKEY },
        { KEY_F5,        KEY_KBDILLUMDOWN,   APPLE_FLAG_FKEY },
        { KEY_F6,        KEY_KBDILLUMUP,     APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_PREVIOUSSONG,   APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_PLAYPAUSE,      APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_NEXTSONG,       APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_MUTE,           APPLE_FLAG_FKEY },
        { KEY_F11,        KEY_VOLUMEDOWN,     APPLE_FLAG_FKEY },
        { KEY_F12,        KEY_VOLUMEUP,       APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation powerbook_fn_keys[] = {
        { KEY_BACKSPACE, KEY_DELETE },
        { KEY_F1,        KEY_BRIGHTNESSDOWN,     APPLE_FLAG_FKEY },
        { KEY_F2,        KEY_BRIGHTNESSUP,       APPLE_FLAG_FKEY },
        { KEY_F3,        KEY_MUTE,               APPLE_FLAG_FKEY },
        { KEY_F4,        KEY_VOLUMEDOWN,         APPLE_FLAG_FKEY },
        { KEY_F5,        KEY_VOLUMEUP,           APPLE_FLAG_FKEY },
        { KEY_F6,        KEY_NUMLOCK,            APPLE_FLAG_FKEY },
        { KEY_F7,        KEY_SWITCHVIDEOMODE,    APPLE_FLAG_FKEY },
        { KEY_F8,        KEY_KBDILLUMTOGGLE,     APPLE_FLAG_FKEY },
        { KEY_F9,        KEY_KBDILLUMDOWN,       APPLE_FLAG_FKEY },
        { KEY_F10,        KEY_KBDILLUMUP,         APPLE_FLAG_FKEY },
        { KEY_UP,        KEY_PAGEUP },
        { KEY_DOWN,        KEY_PAGEDOWN },
        { KEY_LEFT,        KEY_HOME },
        { KEY_RIGHT,        KEY_END },
        { }
};

static const struct apple_key_translation powerbook_numlock_keys[] = {
        { KEY_J,        KEY_KP1 },
        { KEY_K,        KEY_KP2 },
        { KEY_L,        KEY_KP3 },
        { KEY_U,        KEY_KP4 },
        { KEY_I,        KEY_KP5 },
        { KEY_O,        KEY_KP6 },
        { KEY_7,        KEY_KP7 },
        { KEY_8,        KEY_KP8 },
        { KEY_9,        KEY_KP9 },
        { KEY_M,        KEY_KP0 },
        { KEY_DOT,        KEY_KPDOT },
        { KEY_SLASH,        KEY_KPPLUS },
        { KEY_SEMICOLON, KEY_KPMINUS },
        { KEY_P,        KEY_KPASTERISK },
        { KEY_MINUS,        KEY_KPEQUAL },
        { KEY_0,        KEY_KPSLASH },
        { KEY_F6,        KEY_NUMLOCK },
        { KEY_KPENTER,        KEY_KPENTER },
        { KEY_BACKSPACE, KEY_BACKSPACE },
        { }
};

static const struct apple_key_translation apple_iso_keyboard[] = {
        { KEY_GRAVE,        KEY_102ND },
        { KEY_102ND,        KEY_GRAVE },
        { }
};

static const struct apple_key_translation swapped_option_cmd_keys[] = {
        { KEY_LEFTALT,        KEY_LEFTMETA },
        { KEY_LEFTMETA,        KEY_LEFTALT },
        { KEY_RIGHTALT,        KEY_RIGHTMETA },
        { KEY_RIGHTMETA, KEY_RIGHTALT },
        { }
};

static const struct apple_key_translation swapped_option_cmd_left_keys[] = {
        { KEY_LEFTALT,        KEY_LEFTMETA },
        { KEY_LEFTMETA,        KEY_LEFTALT },
        { }
};

static const struct apple_key_translation swapped_ctrl_cmd_keys[] = {
        { KEY_LEFTCTRL,        KEY_LEFTMETA },
        { KEY_LEFTMETA,        KEY_LEFTCTRL },
        { KEY_RIGHTCTRL, KEY_RIGHTMETA },
        { KEY_RIGHTMETA, KEY_RIGHTCTRL },
        { }
};

static const struct apple_key_translation swapped_fn_leftctrl_keys[] = {
        { KEY_FN, KEY_LEFTCTRL },
        { KEY_LEFTCTRL, KEY_FN },
        { }
};

static const struct apple_non_apple_keyboard non_apple_keyboards[] = {
        { "SONiX USB DEVICE" },
        { "Keychron" },
        { "AONE" },
        { "GANSS" },
        { "Hailuck" },
        { "Jamesdonkey" },
        { "A3R" },
        { "hfd.cn" },
        { "WKB603" },
};

static bool apple_is_non_apple_keyboard(struct hid_device *hdev)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(non_apple_keyboards); i++) {
                char *non_apple = non_apple_keyboards[i].name;

                if (strncmp(hdev->name, non_apple, strlen(non_apple)) == 0)
                        return true;
        }

        return false;
}

static inline void apple_setup_key_translation(struct input_dev *input,
                const struct apple_key_translation *table)
{
        const struct apple_key_translation *trans;

        for (trans = table; trans->from; trans++)
                set_bit(trans->to, input->keybit);
}

static const struct apple_key_translation *apple_find_translation(
                const struct apple_key_translation *table, u16 from)
{
        const struct apple_key_translation *trans;

        /* Look for the translation */
        for (trans = table; trans->from; trans++)
                if (trans->from == from)
                        return trans;

        return NULL;
}

static void input_event_with_scancode(struct input_dev *input,
                __u8 type, __u16 code, unsigned int hid, __s32 value)
{
        if (type == EV_KEY &&
            (!test_bit(code, input->key)) == value)
                input_event(input, EV_MSC, MSC_SCAN, hid);
        input_event(input, type, code, value);
}

static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
                struct hid_usage *usage, __s32 value)
{
        struct apple_sc *asc = hid_get_drvdata(hid);
        const struct apple_key_translation *trans, *table;
        bool do_translate;
        u16 code = usage->code;
        unsigned int real_fnmode;

        if (fnmode == 3) {
                real_fnmode = (asc->quirks & APPLE_IS_NON_APPLE) ? 2 : 1;
        } else {
                real_fnmode = fnmode;
        }

        if (swap_fn_leftctrl) {
                trans = apple_find_translation(swapped_fn_leftctrl_keys, code);

                if (trans)
                        code = trans->to;
        }

        if (iso_layout > 0 || (iso_layout < 0 && (asc->quirks & APPLE_ISO_TILDE_QUIRK) &&
                        hid->country == HID_COUNTRY_INTERNATIONAL_ISO)) {
                trans = apple_find_translation(apple_iso_keyboard, code);

                if (trans)
                        code = trans->to;
        }

        if (swap_opt_cmd) {
                if (swap_opt_cmd == 2)
                        trans = apple_find_translation(swapped_option_cmd_left_keys, code);
                else
                        trans = apple_find_translation(swapped_option_cmd_keys, code);

                if (trans)
                        code = trans->to;
        }

        if (swap_ctrl_cmd) {
                trans = apple_find_translation(swapped_ctrl_cmd_keys, code);

                if (trans)
                        code = trans->to;
        }

        if (code == KEY_FN)
                asc->fn_on = !!value;

        if (real_fnmode) {
                if (hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO ||
                    hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS)
                        table = magic_keyboard_alu_fn_keys;
                else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2015 ||
                         hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015)
                        table = magic_keyboard_2015_fn_keys;
                else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 ||
                         hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 ||
                         hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021)
                        table = apple2021_fn_keys;
                else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 ||
                         hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 ||
                         hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213)
                                table = macbookpro_no_esc_fn_keys;
                else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K ||
                         hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 ||
                         hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F)
                                table = macbookpro_dedicated_esc_fn_keys;
                else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K ||
                         hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K)
                                table = apple_fn_keys;
                else if (hid->product >= USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI &&
                                hid->product <= USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS)
                        table = macbookair_fn_keys;
                else if (hid->product < 0x21d || hid->product >= 0x300)
                        table = powerbook_fn_keys;
                else
                        table = apple_fn_keys;

                trans = apple_find_translation(table, code);

                if (trans) {
                        bool from_is_set = test_bit(trans->from, input->key);
                        bool to_is_set = test_bit(trans->to, input->key);

                        if (from_is_set)
                                code = trans->from;
                        else if (to_is_set)
                                code = trans->to;

                        if (!(from_is_set || to_is_set)) {
                                if (trans->flags & APPLE_FLAG_FKEY) {
                                        switch (real_fnmode) {
                                        case 1:
                                                do_translate = !asc->fn_on;
                                                break;
                                        case 2:
                                                do_translate = asc->fn_on;
                                                break;
                                        default:
                                                /* should never happen */
                                                do_translate = false;
                                        }
                                } else {
                                        do_translate = asc->fn_on;
                                }

                                if (do_translate)
                                        code = trans->to;
                        }
                }

                if (asc->quirks & APPLE_NUMLOCK_EMULATION &&
                                (test_bit(code, asc->pressed_numlock) ||
                                test_bit(LED_NUML, input->led))) {
                        trans = apple_find_translation(powerbook_numlock_keys, code);

                        if (trans) {
                                if (value)
                                        set_bit(code, asc->pressed_numlock);
                                else
                                        clear_bit(code, asc->pressed_numlock);

                                code = trans->to;
                        }
                }
        }

        if (usage->code != code) {
                input_event_with_scancode(input, usage->type, code, usage->hid, value);

                return 1;
        }

        return 0;
}

static int apple_event(struct hid_device *hdev, struct hid_field *field,
                struct hid_usage *usage, __s32 value)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput ||
                        !usage->type)
                return 0;

        if ((asc->quirks & APPLE_INVERT_HWHEEL) &&
                        usage->code == REL_HWHEEL) {
                input_event_with_scancode(field->hidinput->input, usage->type,
                                usage->code, usage->hid, -value);
                return 1;
        }

        if ((asc->quirks & APPLE_HAS_FN) &&
                        hidinput_apple_event(hdev, field->hidinput->input,
                                usage, value))
                return 1;


        return 0;
}

static int apple_fetch_battery(struct hid_device *hdev)
{
#ifdef CONFIG_HID_BATTERY_STRENGTH
        struct apple_sc *asc = hid_get_drvdata(hdev);
        struct hid_report_enum *report_enum;
        struct hid_report *report;

        if (!(asc->quirks & APPLE_RDESC_BATTERY) || !hdev->battery)
                return -1;

        report_enum = &hdev->report_enum[hdev->battery_report_type];
        report = report_enum->report_id_hash[hdev->battery_report_id];

        if (!report || report->maxfield < 1)
                return -1;

        if (hdev->battery_capacity == hdev->battery_max)
                return -1;

        hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
        return 0;
#else
        return -1;
#endif
}

static void apple_battery_timer_tick(struct timer_list *t)
{
        struct apple_sc *asc = from_timer(asc, t, battery_timer);
        struct hid_device *hdev = asc->hdev;

        if (apple_fetch_battery(hdev) == 0) {
                mod_timer(&asc->battery_timer,
                          jiffies + msecs_to_jiffies(APPLE_BATTERY_TIMEOUT_MS));
        }
}

/*
 * MacBook JIS keyboard has wrong logical maximum
 * Magic Keyboard JIS has wrong logical maximum
 */
static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        if(*rsize >=71 && rdesc[70] == 0x65 && rdesc[64] == 0x65) {
                hid_info(hdev,
                         "fixing up Magic Keyboard JIS report descriptor\n");
                rdesc[64] = rdesc[70] = 0xe7;
        }

        if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 &&
                        rdesc[53] == 0x65 && rdesc[59] == 0x65) {
                hid_info(hdev,
                         "fixing up MacBook JIS keyboard report descriptor\n");
                rdesc[53] = rdesc[59] = 0xe7;
        }

        /*
         * Change the usage from:
         *   0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1)  0
         *   0x09, 0x0b,       // Usage (Vendor Usage 0x0b)           3
         * To:
         *   0x05, 0x01,       // Usage Page (Generic Desktop)        0
         *   0x09, 0x06,       // Usage (Keyboard)                    2
         */
        if ((asc->quirks & APPLE_RDESC_BATTERY) && *rsize == 83 &&
            rdesc[46] == 0x84 && rdesc[58] == 0x85) {
                hid_info(hdev,
                         "fixing up Magic Keyboard battery report descriptor\n");
                *rsize = *rsize - 1;
                rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL);
                if (!rdesc)
                        return NULL;

                rdesc[0] = 0x05;
                rdesc[1] = 0x01;
                rdesc[2] = 0x09;
                rdesc[3] = 0x06;
        }

        return rdesc;
}

static void apple_setup_input(struct input_dev *input)
{
        set_bit(KEY_NUMLOCK, input->keybit);

        /* Enable all needed keys */
        apple_setup_key_translation(input, apple_fn_keys);
        apple_setup_key_translation(input, powerbook_fn_keys);
        apple_setup_key_translation(input, powerbook_numlock_keys);
        apple_setup_key_translation(input, apple_iso_keyboard);
        apple_setup_key_translation(input, magic_keyboard_alu_fn_keys);
        apple_setup_key_translation(input, magic_keyboard_2015_fn_keys);
        apple_setup_key_translation(input, apple2021_fn_keys);
        apple_setup_key_translation(input, macbookpro_no_esc_fn_keys);
        apple_setup_key_translation(input, macbookpro_dedicated_esc_fn_keys);
}

static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        if (usage->hid == (HID_UP_CUSTOM | 0x0003) ||
                        usage->hid == (HID_UP_MSVENDOR | 0x0003) ||
                        usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) {
                /* The fn key on Apple USB keyboards */
                set_bit(EV_REP, hi->input->evbit);
                hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
                asc->fn_found = true;
                apple_setup_input(hi->input);
                return 1;
        }

        /* we want the hid layer to go through standard path (set and ignore) */
        return 0;
}

static int apple_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        if (asc->quirks & APPLE_MIGHTYMOUSE) {
                if (usage->hid == HID_GD_Z)
                        hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
                else if (usage->code == BTN_1)
                        hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_2);
                else if (usage->code == BTN_2)
                        hid_map_usage(hi, usage, bit, max, EV_KEY, BTN_1);
        }

        return 0;
}

static int apple_input_configured(struct hid_device *hdev,
                struct hid_input *hidinput)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) {
                hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n");
                asc->quirks &= ~APPLE_HAS_FN;
        }

        if (apple_is_non_apple_keyboard(hdev)) {
                hid_info(hdev, "Non-apple keyboard detected; function keys will default to fnmode=2 behavior\n");
                asc->quirks |= APPLE_IS_NON_APPLE;
        }

        return 0;
}

static bool apple_backlight_check_support(struct hid_device *hdev)
{
        int i;
        unsigned int hid;
        struct hid_report *report;

        list_for_each_entry(report, &hdev->report_enum[HID_INPUT_REPORT].report_list, list) {
                for (i = 0; i < report->maxfield; i++) {
                        hid = report->field[i]->usage->hid;
                        if ((hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR && (hid & HID_USAGE) == 0xf)
                                return true;
                }
        }

        return false;
}

static int apple_backlight_set(struct hid_device *hdev, u16 value, u16 rate)
{
        int ret = 0;
        struct apple_backlight_set_report *rep;

        rep = kmalloc(sizeof(*rep), GFP_KERNEL);
        if (rep == NULL)
                return -ENOMEM;

        rep->report_id = 0xB0;
        rep->version = 1;
        rep->backlight = value;
        rep->rate = rate;

        ret = hid_hw_raw_request(hdev, 0xB0u, (u8 *) rep, sizeof(*rep),
                                 HID_OUTPUT_REPORT, HID_REQ_SET_REPORT);

        kfree(rep);
        return ret;
}

static int apple_backlight_led_set(struct led_classdev *led_cdev,
        enum led_brightness brightness)
{
        struct apple_sc_backlight *backlight = container_of(led_cdev,
                                                            struct apple_sc_backlight, cdev);

        return apple_backlight_set(backlight->hdev, brightness, 0);
}

static int apple_backlight_init(struct hid_device *hdev)
{
        int ret;
        struct apple_sc *asc = hid_get_drvdata(hdev);
        struct apple_backlight_config_report *rep;

        if (!apple_backlight_check_support(hdev))
                return -EINVAL;

        rep = kmalloc(0x200, GFP_KERNEL);
        if (rep == NULL)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, 0xBFu, (u8 *) rep, sizeof(*rep),
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "backlight request failed: %d\n", ret);
                goto cleanup_and_exit;
        }
        if (ret < 8 || rep->version != 1) {
                hid_err(hdev, "backlight config struct: bad version %i\n", rep->version);
                ret = -EINVAL;
                goto cleanup_and_exit;
        }

        hid_dbg(hdev, "backlight config: off=%u, on_min=%u, on_max=%u\n",
                rep->backlight_off, rep->backlight_on_min, rep->backlight_on_max);

        asc->backlight = devm_kzalloc(&hdev->dev, sizeof(*asc->backlight), GFP_KERNEL);
        if (!asc->backlight) {
                ret = -ENOMEM;
                goto cleanup_and_exit;
        }

        asc->backlight->hdev = hdev;
        asc->backlight->cdev.name = "apple::kbd_backlight";
        asc->backlight->cdev.max_brightness = rep->backlight_on_max;
        asc->backlight->cdev.brightness_set_blocking = apple_backlight_led_set;

        ret = apple_backlight_set(hdev, 0, 0);
        if (ret < 0) {
                hid_err(hdev, "backlight set request failed: %d\n", ret);
                goto cleanup_and_exit;
        }

        ret = devm_led_classdev_register(&hdev->dev, &asc->backlight->cdev);

cleanup_and_exit:
        kfree(rep);
        return ret;
}

static int apple_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        unsigned long quirks = id->driver_data;
        struct apple_sc *asc;
        int ret;

        asc = devm_kzalloc(&hdev->dev, sizeof(*asc), GFP_KERNEL);
        if (asc == NULL) {
                hid_err(hdev, "can't alloc apple descriptor\n");
                return -ENOMEM;
        }

        asc->hdev = hdev;
        asc->quirks = quirks;

        hid_set_drvdata(hdev, asc);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                return ret;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                return ret;
        }

        timer_setup(&asc->battery_timer, apple_battery_timer_tick, 0);
        mod_timer(&asc->battery_timer,
                  jiffies + msecs_to_jiffies(APPLE_BATTERY_TIMEOUT_MS));
        apple_fetch_battery(hdev);

        if (quirks & APPLE_BACKLIGHT_CTL)
                apple_backlight_init(hdev);

        return 0;
}

static void apple_remove(struct hid_device *hdev)
{
        struct apple_sc *asc = hid_get_drvdata(hdev);

        del_timer_sync(&asc->battery_timer);

        hid_hw_stop(hdev);
}

static const struct hid_device_id apple_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE),
                .driver_data = APPLE_MIGHTYMOUSE | APPLE_INVERT_HWHEEL },

        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ISO),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_JIS),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
                .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
                .driver_data = APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                                USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                                USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2015),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2015),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
                .driver_data = APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
                .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K),
                .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132),
                .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680),
                .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213),
                .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                        APPLE_ISO_TILDE_QUIRK },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY),
                .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021),
                .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },

        { }
};
MODULE_DEVICE_TABLE(hid, apple_devices);

static struct hid_driver apple_driver = {
        .name = "apple",
        .id_table = apple_devices,
        .report_fixup = apple_report_fixup,
        .probe = apple_probe,
        .remove = apple_remove,
        .event = apple_event,
        .input_mapping = apple_input_mapping,
        .input_mapped = apple_input_mapped,
        .input_configured = apple_input_configured,
};
module_hid_driver(apple_driver);

MODULE_LICENSE("GPL");






















































































































































































































  314 








































































  236 


























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTRACE_H
#define _ASM_X86_PTRACE_H

#include <asm/segment.h>
#include <asm/page_types.h>
#include <uapi/asm/ptrace.h>

#ifndef __ASSEMBLY__
#ifdef __i386__

struct pt_regs {
        /*
         * NB: 32-bit x86 CPUs are inconsistent as what happens in the
         * following cases (where %seg represents a segment register):
         *
         * - pushl %seg: some do a 16-bit write and leave the high
         *   bits alone
         * - movl %seg, [mem]: some do a 16-bit write despite the movl
         * - IDT entry: some (e.g. 486) will leave the high bits of CS
         *   and (if applicable) SS undefined.
         *
         * Fortunately, x86-32 doesn't read the high bits on POP or IRET,
         * so we can just treat all of the segment registers as 16-bit
         * values.
         */
        unsigned long bx;
        unsigned long cx;
        unsigned long dx;
        unsigned long si;
        unsigned long di;
        unsigned long bp;
        unsigned long ax;
        unsigned short ds;
        unsigned short __dsh;
        unsigned short es;
        unsigned short __esh;
        unsigned short fs;
        unsigned short __fsh;
        /*
         * On interrupt, gs and __gsh store the vector number.  They never
         * store gs any more.
         */
        unsigned short gs;
        unsigned short __gsh;
        /* On interrupt, this is the error code. */
        unsigned long orig_ax;
        unsigned long ip;
        unsigned short cs;
        unsigned short __csh;
        unsigned long flags;
        unsigned long sp;
        unsigned short ss;
        unsigned short __ssh;
};

#else /* __i386__ */

struct fred_cs {
                /* CS selector */
        u64        cs        : 16,
                /* Stack level at event time */
                sl        :  2,
                /* IBT in WAIT_FOR_ENDBRANCH state */
                wfe        :  1,
                        : 45;
};

struct fred_ss {
                /* SS selector */
        u64        ss        : 16,
                /* STI state */
                sti        :  1,
                /* Set if syscall, sysenter or INT n */
                swevent        :  1,
                /* Event is NMI type */
                nmi        :  1,
                        : 13,
                /* Event vector */
                vector        :  8,
                        :  8,
                /* Event type */
                type        :  4,
                        :  4,
                /* Event was incident to enclave execution */
                enclave        :  1,
                /* CPU was in long mode */
                lm        :  1,
                /*
                 * Nested exception during FRED delivery, not set
                 * for #DF.
                 */
                nested        :  1,
                        :  1,
                /*
                 * The length of the instruction causing the event.
                 * Only set for INTO, INT1, INT3, INT n, SYSCALL
                 * and SYSENTER.  0 otherwise.
                 */
                insnlen        :  4;
};

struct pt_regs {
        /*
         * C ABI says these regs are callee-preserved. They aren't saved on
         * kernel entry unless syscall needs a complete, fully filled
         * "struct pt_regs".
         */
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long bp;
        unsigned long bx;

        /* These regs are callee-clobbered. Always saved on kernel entry. */
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
        unsigned long r8;
        unsigned long ax;
        unsigned long cx;
        unsigned long dx;
        unsigned long si;
        unsigned long di;

        /*
         * orig_ax is used on entry for:
         * - the syscall number (syscall, sysenter, int80)
         * - error_code stored by the CPU on traps and exceptions
         * - the interrupt number for device interrupts
         *
         * A FRED stack frame starts here:
         *   1) It _always_ includes an error code;
         *
         *   2) The return frame for ERET[US] starts here, but
         *      the content of orig_ax is ignored.
         */
        unsigned long orig_ax;

        /* The IRETQ return frame starts here */
        unsigned long ip;

        union {
                /* CS selector */
                u16                cs;
                /* The extended 64-bit data slot containing CS */
                u64                csx;
                /* The FRED CS extension */
                struct fred_cs        fred_cs;
        };

        unsigned long flags;
        unsigned long sp;

        union {
                /* SS selector */
                u16                ss;
                /* The extended 64-bit data slot containing SS */
                u64                ssx;
                /* The FRED SS extension */
                struct fred_ss        fred_ss;
        };

        /*
         * Top of stack on IDT systems, while FRED systems have extra fields
         * defined above for storing exception related information, e.g. CR2 or
         * DR6.
         */
};

#endif /* !__i386__ */

#ifdef CONFIG_PARAVIRT
#include <asm/paravirt_types.h>
#endif

#include <asm/proto.h>

struct cpuinfo_x86;
struct task_struct;

extern unsigned long profile_pc(struct pt_regs *regs);

extern unsigned long
convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);


static inline unsigned long regs_return_value(struct pt_regs *regs)
{
        return regs->ax;
}

static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
        regs->ax = rc;
}

/*
 * user_mode(regs) determines whether a register set came from user
 * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
 * register set was from protected mode with RPL-3 CS value.  This
 * tricky test checks that with one comparison.
 *
 * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
 * the extra check.
 */
static __always_inline int user_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
        return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
#else
        return !!(regs->cs & 3);
#endif
}

static __always_inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
        return (regs->flags & X86_VM_MASK);
#else
        return 0;        /* No V86 mode support in long mode */
#endif
}

static inline bool user_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
#ifndef CONFIG_PARAVIRT_XXL
        /*
         * On non-paravirt systems, this is the only long mode CPL 3
         * selector.  We do not allow long mode selectors in the LDT.
         */
        return regs->cs == __USER_CS;
#else
        /* Headers are too twisted for this to go in paravirt.h. */
        return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
#endif
#else /* !CONFIG_X86_64 */
        return false;
#endif
}

/*
 * Determine whether the register set came from any context that is running in
 * 64-bit mode.
 */
static inline bool any_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
        return !user_mode(regs) || user_64bit_mode(regs);
#else
        return false;
#endif
}

#ifdef CONFIG_X86_64
#define current_user_stack_pointer()        current_pt_regs()->sp
#define compat_user_stack_pointer()        current_pt_regs()->sp

static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
        bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
                    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);

        ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
                      regs->ip <  (unsigned long)entry_SYSRETQ_end);
#ifdef CONFIG_IA32_EMULATION
        ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
                      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
        ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
                      regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
#endif

        return ret;
}
#endif

static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
        return regs->sp;
}

static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
        return regs->ip;
}

static inline void instruction_pointer_set(struct pt_regs *regs,
                unsigned long val)
{
        regs->ip = val;
}

static inline unsigned long frame_pointer(struct pt_regs *regs)
{
        return regs->bp;
}

static inline unsigned long user_stack_pointer(struct pt_regs *regs)
{
        return regs->sp;
}

static inline void user_stack_pointer_set(struct pt_regs *regs,
                unsigned long val)
{
        regs->sp = val;
}

static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
        return !(regs->flags & X86_EFLAGS_IF);
}

/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
extern const char *regs_query_register_name(unsigned int offset);
#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))

/**
 * regs_get_register() - get register value from its offset
 * @regs:        pt_regs from which register value is gotten.
 * @offset:        offset number of the register.
 *
 * regs_get_register returns the value of a register. The @offset is the
 * offset of the register in struct pt_regs address which specified by @regs.
 * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
 */
static inline unsigned long regs_get_register(struct pt_regs *regs,
                                              unsigned int offset)
{
        if (unlikely(offset > MAX_REG_OFFSET))
                return 0;
#ifdef CONFIG_X86_32
        /* The selector fields are 16-bit. */
        if (offset == offsetof(struct pt_regs, cs) ||
            offset == offsetof(struct pt_regs, ss) ||
            offset == offsetof(struct pt_regs, ds) ||
            offset == offsetof(struct pt_regs, es) ||
            offset == offsetof(struct pt_regs, fs) ||
            offset == offsetof(struct pt_regs, gs)) {
                return *(u16 *)((unsigned long)regs + offset);

        }
#endif
        return *(unsigned long *)((unsigned long)regs + offset);
}

/**
 * regs_within_kernel_stack() - check the address in the stack
 * @regs:        pt_regs which contains kernel stack pointer.
 * @addr:        address which is checked.
 *
 * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
 * If @addr is within the kernel stack, it returns true. If not, returns false.
 */
static inline int regs_within_kernel_stack(struct pt_regs *regs,
                                           unsigned long addr)
{
        return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}

/**
 * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack
 * @regs:        pt_regs which contains kernel stack pointer.
 * @n:                stack entry number.
 *
 * regs_get_kernel_stack_nth() returns the address of the @n th entry of the
 * kernel stack which is specified by @regs. If the @n th entry is NOT in
 * the kernel stack, this returns NULL.
 */
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
        unsigned long *addr = (unsigned long *)regs->sp;

        addr += n;
        if (regs_within_kernel_stack(regs, (unsigned long)addr))
                return addr;
        else
                return NULL;
}

/* To avoid include hell, we can't include uaccess.h */
extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size);

/**
 * regs_get_kernel_stack_nth() - get Nth entry of the stack
 * @regs:        pt_regs which contains kernel stack pointer.
 * @n:                stack entry number.
 *
 * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
 * is specified by @regs. If the @n th entry is NOT in the kernel stack
 * this returns 0.
 */
static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
                                                      unsigned int n)
{
        unsigned long *addr;
        unsigned long val;
        long ret;

        addr = regs_get_kernel_stack_nth_addr(regs, n);
        if (addr) {
                ret = copy_from_kernel_nofault(&val, addr, sizeof(val));
                if (!ret)
                        return val;
        }
        return 0;
}

/**
 * regs_get_kernel_argument() - get Nth function argument in kernel
 * @regs:        pt_regs of that context
 * @n:                function argument number (start from 0)
 *
 * regs_get_argument() returns @n th argument of the function call.
 * Note that this chooses most probably assignment, in some case
 * it can be incorrect.
 * This is expected to be called from kprobes or ftrace with regs
 * where the top of stack is the return address.
 */
static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
                                                     unsigned int n)
{
        static const unsigned int argument_offs[] = {
#ifdef __i386__
                offsetof(struct pt_regs, ax),
                offsetof(struct pt_regs, dx),
                offsetof(struct pt_regs, cx),
#define NR_REG_ARGUMENTS 3
#else
                offsetof(struct pt_regs, di),
                offsetof(struct pt_regs, si),
                offsetof(struct pt_regs, dx),
                offsetof(struct pt_regs, cx),
                offsetof(struct pt_regs, r8),
                offsetof(struct pt_regs, r9),
#define NR_REG_ARGUMENTS 6
#endif
        };

        if (n >= NR_REG_ARGUMENTS) {
                n -= NR_REG_ARGUMENTS - 1;
                return regs_get_kernel_stack_nth(regs, n);
        } else
                return regs_get_register(regs, argument_offs[n]);
}

#define arch_has_single_step()        (1)
#ifdef CONFIG_X86_DEBUGCTLMSR
#define arch_has_block_step()        (1)
#else
#define arch_has_block_step()        (boot_cpu_data.x86 >= 6)
#endif

#define ARCH_HAS_USER_SINGLE_STEP_REPORT

struct user_desc;
extern int do_get_thread_area(struct task_struct *p, int idx,
                              struct user_desc __user *info);
extern int do_set_thread_area(struct task_struct *p, int idx,
                              struct user_desc __user *info, int can_allocate);

#ifdef CONFIG_X86_64
# define do_set_thread_area_64(p, s, t)        do_arch_prctl_64(p, s, t)
#else
# define do_set_thread_area_64(p, s, t)        (0)
#endif

#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PTRACE_H */




















































































































































































































































































































































































































































































































    4 



    4 
























    3 
    3 





    3 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



    1 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
// SPDX-License-Identifier: GPL-2.0-or-later
/* A network driver using virtio.
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/scatterlist.h>
#include <linux/if_vlan.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/average.h>
#include <linux/filter.h>
#include <linux/kernel.h>
#include <linux/dim.h>
#include <net/route.h>
#include <net/xdp.h>
#include <net/net_failover.h>
#include <net/netdev_rx_queue.h>

static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);

static bool csum = true, gso = true, napi_tx = true;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);

/* FIXME: MTU in config. */
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
#define GOOD_COPY_LEN        128

#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)

/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
#define VIRTIO_XDP_HEADROOM 256

/* Separating two types of XDP xmit */
#define VIRTIO_XDP_TX                BIT(0)
#define VIRTIO_XDP_REDIR        BIT(1)

#define VIRTIO_XDP_FLAG        BIT(0)

/* RX packet size EWMA. The average packet size is used to determine the packet
 * buffer size when refilling RX rings. As the entire RX ring may be refilled
 * at once, the weight is chosen so that the EWMA will be insensitive to short-
 * term, transient changes in packet size.
 */
DECLARE_EWMA(pkt_len, 0, 64)

#define VIRTNET_DRIVER_VERSION "1.0.0"

static const unsigned long guest_offloads[] = {
        VIRTIO_NET_F_GUEST_TSO4,
        VIRTIO_NET_F_GUEST_TSO6,
        VIRTIO_NET_F_GUEST_ECN,
        VIRTIO_NET_F_GUEST_UFO,
        VIRTIO_NET_F_GUEST_CSUM,
        VIRTIO_NET_F_GUEST_USO4,
        VIRTIO_NET_F_GUEST_USO6,
        VIRTIO_NET_F_GUEST_HDRLEN
};

#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
                                (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
                                (1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
                                (1ULL << VIRTIO_NET_F_GUEST_UFO)  | \
                                (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
                                (1ULL << VIRTIO_NET_F_GUEST_USO6))

struct virtnet_stat_desc {
        char desc[ETH_GSTRING_LEN];
        size_t offset;
};

struct virtnet_sq_free_stats {
        u64 packets;
        u64 bytes;
};

struct virtnet_sq_stats {
        struct u64_stats_sync syncp;
        u64_stats_t packets;
        u64_stats_t bytes;
        u64_stats_t xdp_tx;
        u64_stats_t xdp_tx_drops;
        u64_stats_t kicks;
        u64_stats_t tx_timeouts;
};

struct virtnet_rq_stats {
        struct u64_stats_sync syncp;
        u64_stats_t packets;
        u64_stats_t bytes;
        u64_stats_t drops;
        u64_stats_t xdp_packets;
        u64_stats_t xdp_tx;
        u64_stats_t xdp_redirects;
        u64_stats_t xdp_drops;
        u64_stats_t kicks;
};

#define VIRTNET_SQ_STAT(m)        offsetof(struct virtnet_sq_stats, m)
#define VIRTNET_RQ_STAT(m)        offsetof(struct virtnet_rq_stats, m)

static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
        { "packets",                VIRTNET_SQ_STAT(packets) },
        { "bytes",                VIRTNET_SQ_STAT(bytes) },
        { "xdp_tx",                VIRTNET_SQ_STAT(xdp_tx) },
        { "xdp_tx_drops",        VIRTNET_SQ_STAT(xdp_tx_drops) },
        { "kicks",                VIRTNET_SQ_STAT(kicks) },
        { "tx_timeouts",        VIRTNET_SQ_STAT(tx_timeouts) },
};

static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
        { "packets",                VIRTNET_RQ_STAT(packets) },
        { "bytes",                VIRTNET_RQ_STAT(bytes) },
        { "drops",                VIRTNET_RQ_STAT(drops) },
        { "xdp_packets",        VIRTNET_RQ_STAT(xdp_packets) },
        { "xdp_tx",                VIRTNET_RQ_STAT(xdp_tx) },
        { "xdp_redirects",        VIRTNET_RQ_STAT(xdp_redirects) },
        { "xdp_drops",                VIRTNET_RQ_STAT(xdp_drops) },
        { "kicks",                VIRTNET_RQ_STAT(kicks) },
};

#define VIRTNET_SQ_STATS_LEN        ARRAY_SIZE(virtnet_sq_stats_desc)
#define VIRTNET_RQ_STATS_LEN        ARRAY_SIZE(virtnet_rq_stats_desc)

struct virtnet_interrupt_coalesce {
        u32 max_packets;
        u32 max_usecs;
};

/* The dma information of pages allocated at a time. */
struct virtnet_rq_dma {
        dma_addr_t addr;
        u32 ref;
        u16 len;
        u16 need_sync;
};

/* Internal representation of a send virtqueue */
struct send_queue {
        /* Virtqueue associated with this send _queue */
        struct virtqueue *vq;

        /* TX: fragments + linear part + virtio header */
        struct scatterlist sg[MAX_SKB_FRAGS + 2];

        /* Name of the send queue: output.$index */
        char name[16];

        struct virtnet_sq_stats stats;

        struct virtnet_interrupt_coalesce intr_coal;

        struct napi_struct napi;

        /* Record whether sq is in reset state. */
        bool reset;
};

/* Internal representation of a receive virtqueue */
struct receive_queue {
        /* Virtqueue associated with this receive_queue */
        struct virtqueue *vq;

        struct napi_struct napi;

        struct bpf_prog __rcu *xdp_prog;

        struct virtnet_rq_stats stats;

        /* The number of rx notifications */
        u16 calls;

        /* Is dynamic interrupt moderation enabled? */
        bool dim_enabled;

        /* Dynamic Interrupt Moderation */
        struct dim dim;

        u32 packets_in_napi;

        struct virtnet_interrupt_coalesce intr_coal;

        /* Chain pages by the private ptr. */
        struct page *pages;

        /* Average packet length for mergeable receive buffers. */
        struct ewma_pkt_len mrg_avg_pkt_len;

        /* Page frag for packet buffer allocation. */
        struct page_frag alloc_frag;

        /* RX: fragments + linear part + virtio header */
        struct scatterlist sg[MAX_SKB_FRAGS + 2];

        /* Min single buffer size for mergeable buffers case. */
        unsigned int min_buf_len;

        /* Name of this receive queue: input.$index */
        char name[16];

        struct xdp_rxq_info xdp_rxq;

        /* Record the last dma info to free after new pages is allocated. */
        struct virtnet_rq_dma *last_dma;

        /* Do dma by self */
        bool do_dma;
};

/* This structure can contain rss message with maximum settings for indirection table and keysize
 * Note, that default structure that describes RSS configuration virtio_net_rss_config
 * contains same info but can't handle table values.
 * In any case, structure would be passed to virtio hw through sg_buf split by parts
 * because table sizes may be differ according to the device configuration.
 */
#define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
#define VIRTIO_NET_RSS_MAX_TABLE_LEN    128
struct virtio_net_ctrl_rss {
        u32 hash_types;
        u16 indirection_table_mask;
        u16 unclassified_queue;
        u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
        u16 max_tx_vq;
        u8 hash_key_length;
        u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
};

/* Control VQ buffers: protected by the rtnl lock */
struct control_buf {
        struct virtio_net_ctrl_hdr hdr;
        virtio_net_ctrl_ack status;
        struct virtio_net_ctrl_mq mq;
        u8 promisc;
        u8 allmulti;
        __virtio16 vid;
        __virtio64 offloads;
        struct virtio_net_ctrl_rss rss;
        struct virtio_net_ctrl_coal_tx coal_tx;
        struct virtio_net_ctrl_coal_rx coal_rx;
        struct virtio_net_ctrl_coal_vq coal_vq;
};

struct virtnet_info {
        struct virtio_device *vdev;
        struct virtqueue *cvq;
        struct net_device *dev;
        struct send_queue *sq;
        struct receive_queue *rq;
        unsigned int status;

        /* Max # of queue pairs supported by the device */
        u16 max_queue_pairs;

        /* # of queue pairs currently used by the driver */
        u16 curr_queue_pairs;

        /* # of XDP queue pairs currently used by the driver */
        u16 xdp_queue_pairs;

        /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
        bool xdp_enabled;

        /* I like... big packets and I cannot lie! */
        bool big_packets;

        /* number of sg entries allocated for big packets */
        unsigned int big_packets_num_skbfrags;

        /* Host will merge rx buffers for big packets (shake it! shake it!) */
        bool mergeable_rx_bufs;

        /* Host supports rss and/or hash report */
        bool has_rss;
        bool has_rss_hash_report;
        u8 rss_key_size;
        u16 rss_indir_table_size;
        u32 rss_hash_types_supported;
        u32 rss_hash_types_saved;

        /* Has control virtqueue */
        bool has_cvq;

        /* Host can handle any s/g split between our header and packet data */
        bool any_header_sg;

        /* Packet virtio header size */
        u8 hdr_len;

        /* Work struct for delayed refilling if we run low on memory. */
        struct delayed_work refill;

        /* Is delayed refill enabled? */
        bool refill_enabled;

        /* The lock to synchronize the access to refill_enabled */
        spinlock_t refill_lock;

        /* Work struct for config space updates */
        struct work_struct config_work;

        /* Work struct for setting rx mode */
        struct work_struct rx_mode_work;

        /* OK to queue work setting RX mode? */
        bool rx_mode_work_enabled;

        /* Does the affinity hint is set for virtqueues? */
        bool affinity_hint_set;

        /* CPU hotplug instances for online & dead */
        struct hlist_node node;
        struct hlist_node node_dead;

        struct control_buf *ctrl;

        /* Ethtool settings */
        u8 duplex;
        u32 speed;

        /* Is rx dynamic interrupt moderation enabled? */
        bool rx_dim_enabled;

        /* Interrupt coalescing settings */
        struct virtnet_interrupt_coalesce intr_coal_tx;
        struct virtnet_interrupt_coalesce intr_coal_rx;

        unsigned long guest_offloads;
        unsigned long guest_offloads_capable;

        /* failover when STANDBY feature enabled */
        struct failover *failover;
};

struct padded_vnet_hdr {
        struct virtio_net_hdr_v1_hash hdr;
        /*
         * hdr is in a separate sg buffer, and data sg buffer shares same page
         * with this header sg. This padding makes next sg 16 byte aligned
         * after the header.
         */
        char padding[12];
};

struct virtio_net_common_hdr {
        union {
                struct virtio_net_hdr hdr;
                struct virtio_net_hdr_mrg_rxbuf        mrg_hdr;
                struct virtio_net_hdr_v1_hash hash_v1_hdr;
        };
};

static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);

static bool is_xdp_frame(void *ptr)
{
        return (unsigned long)ptr & VIRTIO_XDP_FLAG;
}

static void *xdp_to_ptr(struct xdp_frame *ptr)
{
        return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
}

static struct xdp_frame *ptr_to_xdp(void *ptr)
{
        return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
}

static void __free_old_xmit(struct send_queue *sq, bool in_napi,
                            struct virtnet_sq_free_stats *stats)
{
        unsigned int len;
        void *ptr;

        while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
                ++stats->packets;

                if (!is_xdp_frame(ptr)) {
                        struct sk_buff *skb = ptr;

                        pr_debug("Sent skb %p\n", skb);

                        stats->bytes += skb->len;
                        napi_consume_skb(skb, in_napi);
                } else {
                        struct xdp_frame *frame = ptr_to_xdp(ptr);

                        stats->bytes += xdp_get_frame_len(frame);
                        xdp_return_frame(frame);
                }
        }
}

/* Converting between virtqueue no. and kernel tx/rx queue no.
 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 */
static int vq2txq(struct virtqueue *vq)
{
        return (vq->index - 1) / 2;
}

static int txq2vq(int txq)
{
        return txq * 2 + 1;
}

static int vq2rxq(struct virtqueue *vq)
{
        return vq->index / 2;
}

static int rxq2vq(int rxq)
{
        return rxq * 2;
}

static inline struct virtio_net_common_hdr *
skb_vnet_common_hdr(struct sk_buff *skb)
{
        return (struct virtio_net_common_hdr *)skb->cb;
}

/*
 * private is used to chain pages for big packets, put the whole
 * most recent used list in the beginning for reuse
 */
static void give_pages(struct receive_queue *rq, struct page *page)
{
        struct page *end;

        /* Find end of list, sew whole thing into vi->rq.pages. */
        for (end = page; end->private; end = (struct page *)end->private);
        end->private = (unsigned long)rq->pages;
        rq->pages = page;
}

static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
{
        struct page *p = rq->pages;

        if (p) {
                rq->pages = (struct page *)p->private;
                /* clear private here, it is used to chain pages */
                p->private = 0;
        } else
                p = alloc_page(gfp_mask);
        return p;
}

static void virtnet_rq_free_buf(struct virtnet_info *vi,
                                struct receive_queue *rq, void *buf)
{
        if (vi->mergeable_rx_bufs)
                put_page(virt_to_head_page(buf));
        else if (vi->big_packets)
                give_pages(rq, buf);
        else
                put_page(virt_to_head_page(buf));
}

static void enable_delayed_refill(struct virtnet_info *vi)
{
        spin_lock_bh(&vi->refill_lock);
        vi->refill_enabled = true;
        spin_unlock_bh(&vi->refill_lock);
}

static void disable_delayed_refill(struct virtnet_info *vi)
{
        spin_lock_bh(&vi->refill_lock);
        vi->refill_enabled = false;
        spin_unlock_bh(&vi->refill_lock);
}

static void enable_rx_mode_work(struct virtnet_info *vi)
{
        rtnl_lock();
        vi->rx_mode_work_enabled = true;
        rtnl_unlock();
}

static void disable_rx_mode_work(struct virtnet_info *vi)
{
        rtnl_lock();
        vi->rx_mode_work_enabled = false;
        rtnl_unlock();
}

static void virtqueue_napi_schedule(struct napi_struct *napi,
                                    struct virtqueue *vq)
{
        if (napi_schedule_prep(napi)) {
                virtqueue_disable_cb(vq);
                __napi_schedule(napi);
        }
}

static bool virtqueue_napi_complete(struct napi_struct *napi,
                                    struct virtqueue *vq, int processed)
{
        int opaque;

        opaque = virtqueue_enable_cb_prepare(vq);
        if (napi_complete_done(napi, processed)) {
                if (unlikely(virtqueue_poll(vq, opaque)))
                        virtqueue_napi_schedule(napi, vq);
                else
                        return true;
        } else {
                virtqueue_disable_cb(vq);
        }

        return false;
}

static void skb_xmit_done(struct virtqueue *vq)
{
        struct virtnet_info *vi = vq->vdev->priv;
        struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;

        /* Suppress further interrupts. */
        virtqueue_disable_cb(vq);

        if (napi->weight)
                virtqueue_napi_schedule(napi, vq);
        else
                /* We were probably waiting for more output buffers. */
                netif_wake_subqueue(vi->dev, vq2txq(vq));
}

#define MRG_CTX_HEADER_SHIFT 22
static void *mergeable_len_to_ctx(unsigned int truesize,
                                  unsigned int headroom)
{
        return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
}

static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
{
        return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
}

static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
{
        return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}

static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
                                         unsigned int headroom,
                                         unsigned int len)
{
        struct sk_buff *skb;

        skb = build_skb(buf, buflen);
        if (unlikely(!skb))
                return NULL;

        skb_reserve(skb, headroom);
        skb_put(skb, len);

        return skb;
}

/* Called from bottom half context */
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct receive_queue *rq,
                                   struct page *page, unsigned int offset,
                                   unsigned int len, unsigned int truesize,
                                   unsigned int headroom)
{
        struct sk_buff *skb;
        struct virtio_net_common_hdr *hdr;
        unsigned int copy, hdr_len, hdr_padded_len;
        struct page *page_to_free = NULL;
        int tailroom, shinfo_size;
        char *p, *hdr_p, *buf;

        p = page_address(page) + offset;
        hdr_p = p;

        hdr_len = vi->hdr_len;
        if (vi->mergeable_rx_bufs)
                hdr_padded_len = hdr_len;
        else
                hdr_padded_len = sizeof(struct padded_vnet_hdr);

        buf = p - headroom;
        len -= hdr_len;
        offset += hdr_padded_len;
        p += hdr_padded_len;
        tailroom = truesize - headroom  - hdr_padded_len - len;

        shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        /* copy small packet so we can reuse these pages */
        if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
                skb = virtnet_build_skb(buf, truesize, p - buf, len);
                if (unlikely(!skb))
                        return NULL;

                page = (struct page *)page->private;
                if (page)
                        give_pages(rq, page);
                goto ok;
        }

        /* copy small packet so we can reuse these pages for small data */
        skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
        if (unlikely(!skb))
                return NULL;

        /* Copy all frame if it fits skb->head, otherwise
         * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
         */
        if (len <= skb_tailroom(skb))
                copy = len;
        else
                copy = ETH_HLEN;
        skb_put_data(skb, p, copy);

        len -= copy;
        offset += copy;

        if (vi->mergeable_rx_bufs) {
                if (len)
                        skb_add_rx_frag(skb, 0, page, offset, len, truesize);
                else
                        page_to_free = page;
                goto ok;
        }

        /*
         * Verify that we can indeed put this data into a skb.
         * This is here to handle cases when the device erroneously
         * tries to receive more than is possible. This is usually
         * the case of a broken device.
         */
        if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
                net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
                dev_kfree_skb(skb);
                return NULL;
        }
        BUG_ON(offset >= PAGE_SIZE);
        while (len) {
                unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
                                frag_size, truesize);
                len -= frag_size;
                page = (struct page *)page->private;
                offset = 0;
        }

        if (page)
                give_pages(rq, page);

ok:
        hdr = skb_vnet_common_hdr(skb);
        memcpy(hdr, hdr_p, hdr_len);
        if (page_to_free)
                put_page(page_to_free);

        return skb;
}

static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len)
{
        struct page *page = virt_to_head_page(buf);
        struct virtnet_rq_dma *dma;
        void *head;
        int offset;

        head = page_address(page);

        dma = head;

        --dma->ref;

        if (dma->need_sync && len) {
                offset = buf - (head + sizeof(*dma));

                virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr,
                                                        offset, len,
                                                        DMA_FROM_DEVICE);
        }

        if (dma->ref)
                return;

        virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len,
                                         DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
        put_page(page);
}

static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
{
        void *buf;

        buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
        if (buf && rq->do_dma)
                virtnet_rq_unmap(rq, buf, *len);

        return buf;
}

static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
{
        struct virtnet_rq_dma *dma;
        dma_addr_t addr;
        u32 offset;
        void *head;

        if (!rq->do_dma) {
                sg_init_one(rq->sg, buf, len);
                return;
        }

        head = page_address(rq->alloc_frag.page);

        offset = buf - head;

        dma = head;

        addr = dma->addr - sizeof(*dma) + offset;

        sg_init_table(rq->sg, 1);
        rq->sg[0].dma_address = addr;
        rq->sg[0].length = len;
}

static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
{
        struct page_frag *alloc_frag = &rq->alloc_frag;
        struct virtnet_rq_dma *dma;
        void *buf, *head;
        dma_addr_t addr;

        if (unlikely(!skb_page_frag_refill(size, alloc_frag, gfp)))
                return NULL;

        head = page_address(alloc_frag->page);

        if (rq->do_dma) {
                dma = head;

                /* new pages */
                if (!alloc_frag->offset) {
                        if (rq->last_dma) {
                                /* Now, the new page is allocated, the last dma
                                 * will not be used. So the dma can be unmapped
                                 * if the ref is 0.
                                 */
                                virtnet_rq_unmap(rq, rq->last_dma, 0);
                                rq->last_dma = NULL;
                        }

                        dma->len = alloc_frag->size - sizeof(*dma);

                        addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
                                                              dma->len, DMA_FROM_DEVICE, 0);
                        if (virtqueue_dma_mapping_error(rq->vq, addr))
                                return NULL;

                        dma->addr = addr;
                        dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);

                        /* Add a reference to dma to prevent the entire dma from
                         * being released during error handling. This reference
                         * will be freed after the pages are no longer used.
                         */
                        get_page(alloc_frag->page);
                        dma->ref = 1;
                        alloc_frag->offset = sizeof(*dma);

                        rq->last_dma = dma;
                }

                ++dma->ref;
        }

        buf = head + alloc_frag->offset;

        get_page(alloc_frag->page);
        alloc_frag->offset += size;

        return buf;
}

static void virtnet_rq_set_premapped(struct virtnet_info *vi)
{
        int i;

        /* disable for big mode */
        if (!vi->mergeable_rx_bufs && vi->big_packets)
                return;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                if (virtqueue_set_dma_premapped(vi->rq[i].vq))
                        continue;

                vi->rq[i].do_dma = true;
        }
}

static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
{
        struct virtnet_info *vi = vq->vdev->priv;
        struct receive_queue *rq;
        int i = vq2rxq(vq);

        rq = &vi->rq[i];

        if (rq->do_dma)
                virtnet_rq_unmap(rq, buf, 0);

        virtnet_rq_free_buf(vi, rq, buf);
}

static void free_old_xmit(struct send_queue *sq, bool in_napi)
{
        struct virtnet_sq_free_stats stats = {0};

        __free_old_xmit(sq, in_napi, &stats);

        /* Avoid overhead when no packets have been processed
         * happens when called speculatively from start_xmit.
         */
        if (!stats.packets)
                return;

        u64_stats_update_begin(&sq->stats.syncp);
        u64_stats_add(&sq->stats.bytes, stats.bytes);
        u64_stats_add(&sq->stats.packets, stats.packets);
        u64_stats_update_end(&sq->stats.syncp);
}

static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
{
        if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
                return false;
        else if (q < vi->curr_queue_pairs)
                return true;
        else
                return false;
}

static void check_sq_full_and_disable(struct virtnet_info *vi,
                                      struct net_device *dev,
                                      struct send_queue *sq)
{
        bool use_napi = sq->napi.weight;
        int qnum;

        qnum = sq - vi->sq;

        /* If running out of space, stop queue to avoid getting packets that we
         * are then unable to transmit.
         * An alternative would be to force queuing layer to requeue the skb by
         * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
         * returned in a normal path of operation: it means that driver is not
         * maintaining the TX queue stop/start state properly, and causes
         * the stack to do a non-trivial amount of useless work.
         * Since most packets only take 1 or 2 ring slots, stopping the queue
         * early means 16 slots are typically wasted.
         */
        if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
                netif_stop_subqueue(dev, qnum);
                if (use_napi) {
                        if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
                                virtqueue_napi_schedule(&sq->napi, sq->vq);
                } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
                        /* More just got used, free them then recheck. */
                        free_old_xmit(sq, false);
                        if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
                                netif_start_subqueue(dev, qnum);
                                virtqueue_disable_cb(sq->vq);
                        }
                }
        }
}

static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
                                   struct send_queue *sq,
                                   struct xdp_frame *xdpf)
{
        struct virtio_net_hdr_mrg_rxbuf *hdr;
        struct skb_shared_info *shinfo;
        u8 nr_frags = 0;
        int err, i;

        if (unlikely(xdpf->headroom < vi->hdr_len))
                return -EOVERFLOW;

        if (unlikely(xdp_frame_has_frags(xdpf))) {
                shinfo = xdp_get_shared_info_from_frame(xdpf);
                nr_frags = shinfo->nr_frags;
        }

        /* In wrapping function virtnet_xdp_xmit(), we need to free
         * up the pending old buffers, where we need to calculate the
         * position of skb_shared_info in xdp_get_frame_len() and
         * xdp_return_frame(), which will involve to xdpf->data and
         * xdpf->headroom. Therefore, we need to update the value of
         * headroom synchronously here.
         */
        xdpf->headroom -= vi->hdr_len;
        xdpf->data -= vi->hdr_len;
        /* Zero header and leave csum up to XDP layers */
        hdr = xdpf->data;
        memset(hdr, 0, vi->hdr_len);
        xdpf->len   += vi->hdr_len;

        sg_init_table(sq->sg, nr_frags + 1);
        sg_set_buf(sq->sg, xdpf->data, xdpf->len);
        for (i = 0; i < nr_frags; i++) {
                skb_frag_t *frag = &shinfo->frags[i];

                sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
                            skb_frag_size(frag), skb_frag_off(frag));
        }

        err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
                                   xdp_to_ptr(xdpf), GFP_ATOMIC);
        if (unlikely(err))
                return -ENOSPC; /* Caller handle free/refcnt */

        return 0;
}

/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
 * the current cpu, so it does not need to be locked.
 *
 * Here we use marco instead of inline functions because we have to deal with
 * three issues at the same time: 1. the choice of sq. 2. judge and execute the
 * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
 * functions to perfectly solve these three problems at the same time.
 */
#define virtnet_xdp_get_sq(vi) ({                                       \
        int cpu = smp_processor_id();                                   \
        struct netdev_queue *txq;                                       \
        typeof(vi) v = (vi);                                            \
        unsigned int qp;                                                \
                                                                        \
        if (v->curr_queue_pairs > nr_cpu_ids) {                         \
                qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
                qp += cpu;                                              \
                txq = netdev_get_tx_queue(v->dev, qp);                  \
                __netif_tx_acquire(txq);                                \
        } else {                                                        \
                qp = cpu % v->curr_queue_pairs;                         \
                txq = netdev_get_tx_queue(v->dev, qp);                  \
                __netif_tx_lock(txq, cpu);                              \
        }                                                               \
        v->sq + qp;                                                     \
})

#define virtnet_xdp_put_sq(vi, q) {                                     \
        struct netdev_queue *txq;                                       \
        typeof(vi) v = (vi);                                            \
                                                                        \
        txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \
        if (v->curr_queue_pairs > nr_cpu_ids)                           \
                __netif_tx_release(txq);                                \
        else                                                            \
                __netif_tx_unlock(txq);                                 \
}

static int virtnet_xdp_xmit(struct net_device *dev,
                            int n, struct xdp_frame **frames, u32 flags)
{
        struct virtnet_info *vi = netdev_priv(dev);
        struct virtnet_sq_free_stats stats = {0};
        struct receive_queue *rq = vi->rq;
        struct bpf_prog *xdp_prog;
        struct send_queue *sq;
        int nxmit = 0;
        int kicks = 0;
        int ret;
        int i;

        /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
         * indicate XDP resources have been successfully allocated.
         */
        xdp_prog = rcu_access_pointer(rq->xdp_prog);
        if (!xdp_prog)
                return -ENXIO;

        sq = virtnet_xdp_get_sq(vi);

        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
                ret = -EINVAL;
                goto out;
        }

        /* Free up any pending old buffers before queueing new ones. */
        __free_old_xmit(sq, false, &stats);

        for (i = 0; i < n; i++) {
                struct xdp_frame *xdpf = frames[i];

                if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
                        break;
                nxmit++;
        }
        ret = nxmit;

        if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
                check_sq_full_and_disable(vi, dev, sq);

        if (flags & XDP_XMIT_FLUSH) {
                if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
                        kicks = 1;
        }
out:
        u64_stats_update_begin(&sq->stats.syncp);
        u64_stats_add(&sq->stats.bytes, stats.bytes);
        u64_stats_add(&sq->stats.packets, stats.packets);
        u64_stats_add(&sq->stats.xdp_tx, n);
        u64_stats_add(&sq->stats.xdp_tx_drops, n - nxmit);
        u64_stats_add(&sq->stats.kicks, kicks);
        u64_stats_update_end(&sq->stats.syncp);

        virtnet_xdp_put_sq(vi, sq);
        return ret;
}

static void put_xdp_frags(struct xdp_buff *xdp)
{
        struct skb_shared_info *shinfo;
        struct page *xdp_page;
        int i;

        if (xdp_buff_has_frags(xdp)) {
                shinfo = xdp_get_shared_info_from_buff(xdp);
                for (i = 0; i < shinfo->nr_frags; i++) {
                        xdp_page = skb_frag_page(&shinfo->frags[i]);
                        put_page(xdp_page);
                }
        }
}

static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
                               struct net_device *dev,
                               unsigned int *xdp_xmit,
                               struct virtnet_rq_stats *stats)
{
        struct xdp_frame *xdpf;
        int err;
        u32 act;

        act = bpf_prog_run_xdp(xdp_prog, xdp);
        u64_stats_inc(&stats->xdp_packets);

        switch (act) {
        case XDP_PASS:
                return act;

        case XDP_TX:
                u64_stats_inc(&stats->xdp_tx);
                xdpf = xdp_convert_buff_to_frame(xdp);
                if (unlikely(!xdpf)) {
                        netdev_dbg(dev, "convert buff to frame failed for xdp\n");
                        return XDP_DROP;
                }

                err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
                if (unlikely(!err)) {
                        xdp_return_frame_rx_napi(xdpf);
                } else if (unlikely(err < 0)) {
                        trace_xdp_exception(dev, xdp_prog, act);
                        return XDP_DROP;
                }
                *xdp_xmit |= VIRTIO_XDP_TX;
                return act;

        case XDP_REDIRECT:
                u64_stats_inc(&stats->xdp_redirects);
                err = xdp_do_redirect(dev, xdp, xdp_prog);
                if (err)
                        return XDP_DROP;

                *xdp_xmit |= VIRTIO_XDP_REDIR;
                return act;

        default:
                bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
                fallthrough;
        case XDP_ABORTED:
                trace_xdp_exception(dev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
                return XDP_DROP;
        }
}

static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
        return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
}

/* We copy the packet for XDP in the following cases:
 *
 * 1) Packet is scattered across multiple rx buffers.
 * 2) Headroom space is insufficient.
 *
 * This is inefficient but it's a temporary condition that
 * we hit right after XDP is enabled and until queue is refilled
 * with large buffers with sufficient headroom - so it should affect
 * at most queue size packets.
 * Afterwards, the conditions to enable
 * XDP should preclude the underlying device from sending packets
 * across multiple buffers (num_buf > 1), and we make sure buffers
 * have enough headroom.
 */
static struct page *xdp_linearize_page(struct receive_queue *rq,
                                       int *num_buf,
                                       struct page *p,
                                       int offset,
                                       int page_off,
                                       unsigned int *len)
{
        int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
        struct page *page;

        if (page_off + *len + tailroom > PAGE_SIZE)
                return NULL;

        page = alloc_page(GFP_ATOMIC);
        if (!page)
                return NULL;

        memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
        page_off += *len;

        while (--*num_buf) {
                unsigned int buflen;
                void *buf;
                int off;

                buf = virtnet_rq_get_buf(rq, &buflen, NULL);
                if (unlikely(!buf))
                        goto err_buf;

                p = virt_to_head_page(buf);
                off = buf - page_address(p);

                /* guard against a misconfigured or uncooperative backend that
                 * is sending packet larger than the MTU.
                 */
                if ((page_off + buflen + tailroom) > PAGE_SIZE) {
                        put_page(p);
                        goto err_buf;
                }

                memcpy(page_address(page) + page_off,
                       page_address(p) + off, buflen);
                page_off += buflen;
                put_page(p);
        }

        /* Headroom does not contribute to packet length */
        *len = page_off - VIRTIO_XDP_HEADROOM;
        return page;
err_buf:
        __free_pages(page, 0);
        return NULL;
}

static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
                                               unsigned int xdp_headroom,
                                               void *buf,
                                               unsigned int len)
{
        unsigned int header_offset;
        unsigned int headroom;
        unsigned int buflen;
        struct sk_buff *skb;

        header_offset = VIRTNET_RX_PAD + xdp_headroom;
        headroom = vi->hdr_len + header_offset;
        buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        skb = virtnet_build_skb(buf, buflen, headroom, len);
        if (unlikely(!skb))
                return NULL;

        buf += header_offset;
        memcpy(skb_vnet_common_hdr(skb), buf, vi->hdr_len);

        return skb;
}

static struct sk_buff *receive_small_xdp(struct net_device *dev,
                                         struct virtnet_info *vi,
                                         struct receive_queue *rq,
                                         struct bpf_prog *xdp_prog,
                                         void *buf,
                                         unsigned int xdp_headroom,
                                         unsigned int len,
                                         unsigned int *xdp_xmit,
                                         struct virtnet_rq_stats *stats)
{
        unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
        unsigned int headroom = vi->hdr_len + header_offset;
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
        struct page *page = virt_to_head_page(buf);
        struct page *xdp_page;
        unsigned int buflen;
        struct xdp_buff xdp;
        struct sk_buff *skb;
        unsigned int metasize = 0;
        u32 act;

        if (unlikely(hdr->hdr.gso_type))
                goto err_xdp;

        buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
                int offset = buf - page_address(page) + header_offset;
                unsigned int tlen = len + vi->hdr_len;
                int num_buf = 1;

                xdp_headroom = virtnet_get_headroom(vi);
                header_offset = VIRTNET_RX_PAD + xdp_headroom;
                headroom = vi->hdr_len + header_offset;
                buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
                        SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
                xdp_page = xdp_linearize_page(rq, &num_buf, page,
                                              offset, header_offset,
                                              &tlen);
                if (!xdp_page)
                        goto err_xdp;

                buf = page_address(xdp_page);
                put_page(page);
                page = xdp_page;
        }

        xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
        xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
                         xdp_headroom, len, true);

        act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);

        switch (act) {
        case XDP_PASS:
                /* Recalculate length in case bpf program changed it */
                len = xdp.data_end - xdp.data;
                metasize = xdp.data - xdp.data_meta;
                break;

        case XDP_TX:
        case XDP_REDIRECT:
                goto xdp_xmit;

        default:
                goto err_xdp;
        }

        skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
        if (unlikely(!skb))
                goto err;

        if (metasize)
                skb_metadata_set(skb, metasize);

        return skb;

err_xdp:
        u64_stats_inc(&stats->xdp_drops);
err:
        u64_stats_inc(&stats->drops);
        put_page(page);
xdp_xmit:
        return NULL;
}

static struct sk_buff *receive_small(struct net_device *dev,
                                     struct virtnet_info *vi,
                                     struct receive_queue *rq,
                                     void *buf, void *ctx,
                                     unsigned int len,
                                     unsigned int *xdp_xmit,
                                     struct virtnet_rq_stats *stats)
{
        unsigned int xdp_headroom = (unsigned long)ctx;
        struct page *page = virt_to_head_page(buf);
        struct sk_buff *skb;

        len -= vi->hdr_len;
        u64_stats_add(&stats->bytes, len);

        if (unlikely(len > GOOD_PACKET_LEN)) {
                pr_debug("%s: rx error: len %u exceeds max size %d\n",
                         dev->name, len, GOOD_PACKET_LEN);
                DEV_STATS_INC(dev, rx_length_errors);
                goto err;
        }

        if (unlikely(vi->xdp_enabled)) {
                struct bpf_prog *xdp_prog;

                rcu_read_lock();
                xdp_prog = rcu_dereference(rq->xdp_prog);
                if (xdp_prog) {
                        skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
                                                xdp_headroom, len, xdp_xmit,
                                                stats);
                        rcu_read_unlock();
                        return skb;
                }
                rcu_read_unlock();
        }

        skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
        if (likely(skb))
                return skb;

err:
        u64_stats_inc(&stats->drops);
        put_page(page);
        return NULL;
}

static struct sk_buff *receive_big(struct net_device *dev,
                                   struct virtnet_info *vi,
                                   struct receive_queue *rq,
                                   void *buf,
                                   unsigned int len,
                                   struct virtnet_rq_stats *stats)
{
        struct page *page = buf;
        struct sk_buff *skb =
                page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);

        u64_stats_add(&stats->bytes, len - vi->hdr_len);
        if (unlikely(!skb))
                goto err;

        return skb;

err:
        u64_stats_inc(&stats->drops);
        give_pages(rq, page);
        return NULL;
}

static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
                               struct net_device *dev,
                               struct virtnet_rq_stats *stats)
{
        struct page *page;
        void *buf;
        int len;

        while (num_buf-- > 1) {
                buf = virtnet_rq_get_buf(rq, &len, NULL);
                if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers missing\n",
                                 dev->name, num_buf);
                        DEV_STATS_INC(dev, rx_length_errors);
                        break;
                }
                u64_stats_add(&stats->bytes, len);
                page = virt_to_head_page(buf);
                put_page(page);
        }
}

/* Why not use xdp_build_skb_from_frame() ?
 * XDP core assumes that xdp frags are PAGE_SIZE in length, while in
 * virtio-net there are 2 points that do not match its requirements:
 *  1. The size of the prefilled buffer is not fixed before xdp is set.
 *  2. xdp_build_skb_from_frame() does more checks that we don't need,
 *     like eth_type_trans() (which virtio-net does in receive_buf()).
 */
static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
                                               struct virtnet_info *vi,
                                               struct xdp_buff *xdp,
                                               unsigned int xdp_frags_truesz)
{
        struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        unsigned int headroom, data_len;
        struct sk_buff *skb;
        int metasize;
        u8 nr_frags;

        if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
                pr_debug("Error building skb as missing reserved tailroom for xdp");
                return NULL;
        }

        if (unlikely(xdp_buff_has_frags(xdp)))
                nr_frags = sinfo->nr_frags;

        skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
        if (unlikely(!skb))
                return NULL;

        headroom = xdp->data - xdp->data_hard_start;
        data_len = xdp->data_end - xdp->data;
        skb_reserve(skb, headroom);
        __skb_put(skb, data_len);

        metasize = xdp->data - xdp->data_meta;
        metasize = metasize > 0 ? metasize : 0;
        if (metasize)
                skb_metadata_set(skb, metasize);

        if (unlikely(xdp_buff_has_frags(xdp)))
                xdp_update_skb_shared_info(skb, nr_frags,
                                           sinfo->xdp_frags_size,
                                           xdp_frags_truesz,
                                           xdp_buff_is_frag_pfmemalloc(xdp));

        return skb;
}

/* TODO: build xdp in big mode */
static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
                                      struct virtnet_info *vi,
                                      struct receive_queue *rq,
                                      struct xdp_buff *xdp,
                                      void *buf,
                                      unsigned int len,
                                      unsigned int frame_sz,
                                      int *num_buf,
                                      unsigned int *xdp_frags_truesize,
                                      struct virtnet_rq_stats *stats)
{
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
        unsigned int headroom, tailroom, room;
        unsigned int truesize, cur_frag_size;
        struct skb_shared_info *shinfo;
        unsigned int xdp_frags_truesz = 0;
        struct page *page;
        skb_frag_t *frag;
        int offset;
        void *ctx;

        xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
        xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM,
                         VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);

        if (!*num_buf)
                return 0;

        if (*num_buf > 1) {
                /* If we want to build multi-buffer xdp, we need
                 * to specify that the flags of xdp_buff have the
                 * XDP_FLAGS_HAS_FRAG bit.
                 */
                if (!xdp_buff_has_frags(xdp))
                        xdp_buff_set_frags_flag(xdp);

                shinfo = xdp_get_shared_info_from_buff(xdp);
                shinfo->nr_frags = 0;
                shinfo->xdp_frags_size = 0;
        }

        if (*num_buf > MAX_SKB_FRAGS + 1)
                return -EINVAL;

        while (--*num_buf > 0) {
                buf = virtnet_rq_get_buf(rq, &len, &ctx);
                if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers out of %d missing\n",
                                 dev->name, *num_buf,
                                 virtio16_to_cpu(vi->vdev, hdr->num_buffers));
                        DEV_STATS_INC(dev, rx_length_errors);
                        goto err;
                }

                u64_stats_add(&stats->bytes, len);
                page = virt_to_head_page(buf);
                offset = buf - page_address(page);

                truesize = mergeable_ctx_to_truesize(ctx);
                headroom = mergeable_ctx_to_headroom(ctx);
                tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
                room = SKB_DATA_ALIGN(headroom + tailroom);

                cur_frag_size = truesize;
                xdp_frags_truesz += cur_frag_size;
                if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
                        put_page(page);
                        pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                                 dev->name, len, (unsigned long)(truesize - room));
                        DEV_STATS_INC(dev, rx_length_errors);
                        goto err;
                }

                frag = &shinfo->frags[shinfo->nr_frags++];
                skb_frag_fill_page_desc(frag, page, offset, len);
                if (page_is_pfmemalloc(page))
                        xdp_buff_set_frag_pfmemalloc(xdp);

                shinfo->xdp_frags_size += len;
        }

        *xdp_frags_truesize = xdp_frags_truesz;
        return 0;

err:
        put_xdp_frags(xdp);
        return -EINVAL;
}

static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
                                   struct receive_queue *rq,
                                   struct bpf_prog *xdp_prog,
                                   void *ctx,
                                   unsigned int *frame_sz,
                                   int *num_buf,
                                   struct page **page,
                                   int offset,
                                   unsigned int *len,
                                   struct virtio_net_hdr_mrg_rxbuf *hdr)
{
        unsigned int truesize = mergeable_ctx_to_truesize(ctx);
        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
        struct page *xdp_page;
        unsigned int xdp_room;

        /* Transient failure which in theory could occur if
         * in-flight packets from before XDP was enabled reach
         * the receive path after XDP is loaded.
         */
        if (unlikely(hdr->hdr.gso_type))
                return NULL;

        /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
         * with headroom may add hole in truesize, which
         * make their length exceed PAGE_SIZE. So we disabled the
         * hole mechanism for xdp. See add_recvbuf_mergeable().
         */
        *frame_sz = truesize;

        if (likely(headroom >= virtnet_get_headroom(vi) &&
                   (*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
                return page_address(*page) + offset;
        }

        /* This happens when headroom is not enough because
         * of the buffer was prefilled before XDP is set.
         * This should only happen for the first several packets.
         * In fact, vq reset can be used here to help us clean up
         * the prefilled buffers, but many existing devices do not
         * support it, and we don't want to bother users who are
         * using xdp normally.
         */
        if (!xdp_prog->aux->xdp_has_frags) {
                /* linearize data for XDP */
                xdp_page = xdp_linearize_page(rq, num_buf,
                                              *page, offset,
                                              VIRTIO_XDP_HEADROOM,
                                              len);
                if (!xdp_page)
                        return NULL;
        } else {
                xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
                                          sizeof(struct skb_shared_info));
                if (*len + xdp_room > PAGE_SIZE)
                        return NULL;

                xdp_page = alloc_page(GFP_ATOMIC);
                if (!xdp_page)
                        return NULL;

                memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
                       page_address(*page) + offset, *len);
        }

        *frame_sz = PAGE_SIZE;

        put_page(*page);

        *page = xdp_page;

        return page_address(*page) + VIRTIO_XDP_HEADROOM;
}

static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
                                             struct virtnet_info *vi,
                                             struct receive_queue *rq,
                                             struct bpf_prog *xdp_prog,
                                             void *buf,
                                             void *ctx,
                                             unsigned int len,
                                             unsigned int *xdp_xmit,
                                             struct virtnet_rq_stats *stats)
{
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
        int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
        struct page *page = virt_to_head_page(buf);
        int offset = buf - page_address(page);
        unsigned int xdp_frags_truesz = 0;
        struct sk_buff *head_skb;
        unsigned int frame_sz;
        struct xdp_buff xdp;
        void *data;
        u32 act;
        int err;

        data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
                                     offset, &len, hdr);
        if (unlikely(!data))
                goto err_xdp;

        err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
                                         &num_buf, &xdp_frags_truesz, stats);
        if (unlikely(err))
                goto err_xdp;

        act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);

        switch (act) {
        case XDP_PASS:
                head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
                if (unlikely(!head_skb))
                        break;
                return head_skb;

        case XDP_TX:
        case XDP_REDIRECT:
                return NULL;

        default:
                break;
        }

        put_xdp_frags(&xdp);

err_xdp:
        put_page(page);
        mergeable_buf_free(rq, num_buf, dev, stats);

        u64_stats_inc(&stats->xdp_drops);
        u64_stats_inc(&stats->drops);
        return NULL;
}

static struct sk_buff *receive_mergeable(struct net_device *dev,
                                         struct virtnet_info *vi,
                                         struct receive_queue *rq,
                                         void *buf,
                                         void *ctx,
                                         unsigned int len,
                                         unsigned int *xdp_xmit,
                                         struct virtnet_rq_stats *stats)
{
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
        int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
        struct page *page = virt_to_head_page(buf);
        int offset = buf - page_address(page);
        struct sk_buff *head_skb, *curr_skb;
        unsigned int truesize = mergeable_ctx_to_truesize(ctx);
        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
        unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);

        head_skb = NULL;
        u64_stats_add(&stats->bytes, len - vi->hdr_len);

        if (unlikely(len > truesize - room)) {
                pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                         dev->name, len, (unsigned long)(truesize - room));
                DEV_STATS_INC(dev, rx_length_errors);
                goto err_skb;
        }

        if (unlikely(vi->xdp_enabled)) {
                struct bpf_prog *xdp_prog;

                rcu_read_lock();
                xdp_prog = rcu_dereference(rq->xdp_prog);
                if (xdp_prog) {
                        head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
                                                         len, xdp_xmit, stats);
                        rcu_read_unlock();
                        return head_skb;
                }
                rcu_read_unlock();
        }

        head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
        curr_skb = head_skb;

        if (unlikely(!curr_skb))
                goto err_skb;
        while (--num_buf) {
                int num_skb_frags;

                buf = virtnet_rq_get_buf(rq, &len, &ctx);
                if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers out of %d missing\n",
                                 dev->name, num_buf,
                                 virtio16_to_cpu(vi->vdev,
                                                 hdr->num_buffers));
                        DEV_STATS_INC(dev, rx_length_errors);
                        goto err_buf;
                }

                u64_stats_add(&stats->bytes, len);
                page = virt_to_head_page(buf);

                truesize = mergeable_ctx_to_truesize(ctx);
                headroom = mergeable_ctx_to_headroom(ctx);
                tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
                room = SKB_DATA_ALIGN(headroom + tailroom);
                if (unlikely(len > truesize - room)) {
                        pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
                                 dev->name, len, (unsigned long)(truesize - room));
                        DEV_STATS_INC(dev, rx_length_errors);
                        goto err_skb;
                }

                num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
                if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
                        struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);

                        if (unlikely(!nskb))
                                goto err_skb;
                        if (curr_skb == head_skb)
                                skb_shinfo(curr_skb)->frag_list = nskb;
                        else
                                curr_skb->next = nskb;
                        curr_skb = nskb;
                        head_skb->truesize += nskb->truesize;
                        num_skb_frags = 0;
                }
                if (curr_skb != head_skb) {
                        head_skb->data_len += len;
                        head_skb->len += len;
                        head_skb->truesize += truesize;
                }
                offset = buf - page_address(page);
                if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
                        put_page(page);
                        skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
                                             len, truesize);
                } else {
                        skb_add_rx_frag(curr_skb, num_skb_frags, page,
                                        offset, len, truesize);
                }
        }

        ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
        return head_skb;

err_skb:
        put_page(page);
        mergeable_buf_free(rq, num_buf, dev, stats);

err_buf:
        u64_stats_inc(&stats->drops);
        dev_kfree_skb(head_skb);
        return NULL;
}

static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash,
                                struct sk_buff *skb)
{
        enum pkt_hash_types rss_hash_type;

        if (!hdr_hash || !skb)
                return;

        switch (__le16_to_cpu(hdr_hash->hash_report)) {
        case VIRTIO_NET_HASH_REPORT_TCPv4:
        case VIRTIO_NET_HASH_REPORT_UDPv4:
        case VIRTIO_NET_HASH_REPORT_TCPv6:
        case VIRTIO_NET_HASH_REPORT_UDPv6:
        case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
        case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
                rss_hash_type = PKT_HASH_TYPE_L4;
                break;
        case VIRTIO_NET_HASH_REPORT_IPv4:
        case VIRTIO_NET_HASH_REPORT_IPv6:
        case VIRTIO_NET_HASH_REPORT_IPv6_EX:
                rss_hash_type = PKT_HASH_TYPE_L3;
                break;
        case VIRTIO_NET_HASH_REPORT_NONE:
        default:
                rss_hash_type = PKT_HASH_TYPE_NONE;
        }
        skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type);
}

static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
                        void *buf, unsigned int len, void **ctx,
                        unsigned int *xdp_xmit,
                        struct virtnet_rq_stats *stats)
{
        struct net_device *dev = vi->dev;
        struct sk_buff *skb;
        struct virtio_net_common_hdr *hdr;

        if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
                pr_debug("%s: short packet %i\n", dev->name, len);
                DEV_STATS_INC(dev, rx_length_errors);
                virtnet_rq_free_buf(vi, rq, buf);
                return;
        }

        if (vi->mergeable_rx_bufs)
                skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
                                        stats);
        else if (vi->big_packets)
                skb = receive_big(dev, vi, rq, buf, len, stats);
        else
                skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);

        if (unlikely(!skb))
                return;

        hdr = skb_vnet_common_hdr(skb);
        if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
                virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);

        if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
                skb->ip_summed = CHECKSUM_UNNECESSARY;

        if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
                                  virtio_is_little_endian(vi->vdev))) {
                net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
                                     dev->name, hdr->hdr.gso_type,
                                     hdr->hdr.gso_size);
                goto frame_err;
        }

        skb_record_rx_queue(skb, vq2rxq(rq->vq));
        skb->protocol = eth_type_trans(skb, dev);
        pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
                 ntohs(skb->protocol), skb->len, skb->pkt_type);

        napi_gro_receive(&rq->napi, skb);
        return;

frame_err:
        DEV_STATS_INC(dev, rx_frame_errors);
        dev_kfree_skb(skb);
}

/* Unlike mergeable buffers, all buffers are allocated to the
 * same size, except for the headroom. For this reason we do
 * not need to use  mergeable_len_to_ctx here - it is enough
 * to store the headroom as the context ignoring the truesize.
 */
static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
                             gfp_t gfp)
{
        char *buf;
        unsigned int xdp_headroom = virtnet_get_headroom(vi);
        void *ctx = (void *)(unsigned long)xdp_headroom;
        int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
        int err;

        len = SKB_DATA_ALIGN(len) +
              SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        buf = virtnet_rq_alloc(rq, len, gfp);
        if (unlikely(!buf))
                return -ENOMEM;

        virtnet_rq_init_one_sg(rq, buf + VIRTNET_RX_PAD + xdp_headroom,
                               vi->hdr_len + GOOD_PACKET_LEN);

        err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
        if (err < 0) {
                if (rq->do_dma)
                        virtnet_rq_unmap(rq, buf, 0);
                put_page(virt_to_head_page(buf));
        }

        return err;
}

static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
                           gfp_t gfp)
{
        struct page *first, *list = NULL;
        char *p;
        int i, err, offset;

        sg_init_table(rq->sg, vi->big_packets_num_skbfrags + 2);

        /* page in rq->sg[vi->big_packets_num_skbfrags + 1] is list tail */
        for (i = vi->big_packets_num_skbfrags + 1; i > 1; --i) {
                first = get_a_page(rq, gfp);
                if (!first) {
                        if (list)
                                give_pages(rq, list);
                        return -ENOMEM;
                }
                sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);

                /* chain new page in list head to match sg */
                first->private = (unsigned long)list;
                list = first;
        }

        first = get_a_page(rq, gfp);
        if (!first) {
                give_pages(rq, list);
                return -ENOMEM;
        }
        p = page_address(first);

        /* rq->sg[0], rq->sg[1] share the same page */
        /* a separated rq->sg[0] for header - required in case !any_header_sg */
        sg_set_buf(&rq->sg[0], p, vi->hdr_len);

        /* rq->sg[1] for data packet, from offset */
        offset = sizeof(struct padded_vnet_hdr);
        sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);

        /* chain first in list head */
        first->private = (unsigned long)list;
        err = virtqueue_add_inbuf(rq->vq, rq->sg, vi->big_packets_num_skbfrags + 2,
                                  first, gfp);
        if (err < 0)
                give_pages(rq, first);

        return err;
}

static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
                                          struct ewma_pkt_len *avg_pkt_len,
                                          unsigned int room)
{
        struct virtnet_info *vi = rq->vq->vdev->priv;
        const size_t hdr_len = vi->hdr_len;
        unsigned int len;

        if (room)
                return PAGE_SIZE - room;

        len = hdr_len +        clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
                                rq->min_buf_len, PAGE_SIZE - hdr_len);

        return ALIGN(len, L1_CACHE_BYTES);
}

static int add_recvbuf_mergeable(struct virtnet_info *vi,
                                 struct receive_queue *rq, gfp_t gfp)
{
        struct page_frag *alloc_frag = &rq->alloc_frag;
        unsigned int headroom = virtnet_get_headroom(vi);
        unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
        unsigned int len, hole;
        void *ctx;
        char *buf;
        int err;

        /* Extra tailroom is needed to satisfy XDP's assumption. This
         * means rx frags coalescing won't work, but consider we've
         * disabled GSO for XDP, it won't be a big issue.
         */
        len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);

        buf = virtnet_rq_alloc(rq, len + room, gfp);
        if (unlikely(!buf))
                return -ENOMEM;

        buf += headroom; /* advance address leaving hole at front of pkt */
        hole = alloc_frag->size - alloc_frag->offset;
        if (hole < len + room) {
                /* To avoid internal fragmentation, if there is very likely not
                 * enough space for another buffer, add the remaining space to
                 * the current buffer.
                 * XDP core assumes that frame_size of xdp_buff and the length
                 * of the frag are PAGE_SIZE, so we disable the hole mechanism.
                 */
                if (!headroom)
                        len += hole;
                alloc_frag->offset += hole;
        }

        virtnet_rq_init_one_sg(rq, buf, len);

        ctx = mergeable_len_to_ctx(len + room, headroom);
        err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
        if (err < 0) {
                if (rq->do_dma)
                        virtnet_rq_unmap(rq, buf, 0);
                put_page(virt_to_head_page(buf));
        }

        return err;
}

/*
 * Returns false if we couldn't fill entirely (OOM).
 *
 * Normally run in the receive path, but can also be run from ndo_open
 * before we're receiving packets, or from refill_work which is
 * careful to disable receiving (using napi_disable).
 */
static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
                          gfp_t gfp)
{
        int err;
        bool oom;

        do {
                if (vi->mergeable_rx_bufs)
                        err = add_recvbuf_mergeable(vi, rq, gfp);
                else if (vi->big_packets)
                        err = add_recvbuf_big(vi, rq, gfp);
                else
                        err = add_recvbuf_small(vi, rq, gfp);

                oom = err == -ENOMEM;
                if (err)
                        break;
        } while (rq->vq->num_free);
        if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
                unsigned long flags;

                flags = u64_stats_update_begin_irqsave(&rq->stats.syncp);
                u64_stats_inc(&rq->stats.kicks);
                u64_stats_update_end_irqrestore(&rq->stats.syncp, flags);
        }

        return !oom;
}

static void skb_recv_done(struct virtqueue *rvq)
{
        struct virtnet_info *vi = rvq->vdev->priv;
        struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];

        rq->calls++;
        virtqueue_napi_schedule(&rq->napi, rvq);
}

static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
{
        napi_enable(napi);

        /* If all buffers were filled by other side before we napi_enabled, we
         * won't get another interrupt, so process any outstanding packets now.
         * Call local_bh_enable after to trigger softIRQ processing.
         */
        local_bh_disable();
        virtqueue_napi_schedule(napi, vq);
        local_bh_enable();
}

static void virtnet_napi_tx_enable(struct virtnet_info *vi,
                                   struct virtqueue *vq,
                                   struct napi_struct *napi)
{
        if (!napi->weight)
                return;

        /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
         * enable the feature if this is likely affine with the transmit path.
         */
        if (!vi->affinity_hint_set) {
                napi->weight = 0;
                return;
        }

        return virtnet_napi_enable(vq, napi);
}

static void virtnet_napi_tx_disable(struct napi_struct *napi)
{
        if (napi->weight)
                napi_disable(napi);
}

static void refill_work(struct work_struct *work)
{
        struct virtnet_info *vi =
                container_of(work, struct virtnet_info, refill.work);
        bool still_empty;
        int i;

        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];

                napi_disable(&rq->napi);
                still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
                virtnet_napi_enable(rq->vq, &rq->napi);

                /* In theory, this can happen: if we don't get any buffers in
                 * we will *never* try to fill again.
                 */
                if (still_empty)
                        schedule_delayed_work(&vi->refill, HZ/2);
        }
}

static int virtnet_receive(struct receive_queue *rq, int budget,
                           unsigned int *xdp_xmit)
{
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct virtnet_rq_stats stats = {};
        unsigned int len;
        int packets = 0;
        void *buf;
        int i;

        if (!vi->big_packets || vi->mergeable_rx_bufs) {
                void *ctx;

                while (packets < budget &&
                       (buf = virtnet_rq_get_buf(rq, &len, &ctx))) {
                        receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
                        packets++;
                }
        } else {
                while (packets < budget &&
                       (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
                        receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
                        packets++;
                }
        }

        if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
                if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
                        spin_lock(&vi->refill_lock);
                        if (vi->refill_enabled)
                                schedule_delayed_work(&vi->refill, 0);
                        spin_unlock(&vi->refill_lock);
                }
        }

        u64_stats_set(&stats.packets, packets);
        u64_stats_update_begin(&rq->stats.syncp);
        for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
                size_t offset = virtnet_rq_stats_desc[i].offset;
                u64_stats_t *item, *src;

                item = (u64_stats_t *)((u8 *)&rq->stats + offset);
                src = (u64_stats_t *)((u8 *)&stats + offset);
                u64_stats_add(item, u64_stats_read(src));
        }
        u64_stats_update_end(&rq->stats.syncp);

        return packets;
}

static void virtnet_poll_cleantx(struct receive_queue *rq)
{
        struct virtnet_info *vi = rq->vq->vdev->priv;
        unsigned int index = vq2rxq(rq->vq);
        struct send_queue *sq = &vi->sq[index];
        struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);

        if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
                return;

        if (__netif_tx_trylock(txq)) {
                if (sq->reset) {
                        __netif_tx_unlock(txq);
                        return;
                }

                do {
                        virtqueue_disable_cb(sq->vq);
                        free_old_xmit(sq, true);
                } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));

                if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
                        netif_tx_wake_queue(txq);

                __netif_tx_unlock(txq);
        }
}

static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue *rq)
{
        struct dim_sample cur_sample = {};

        if (!rq->packets_in_napi)
                return;

        u64_stats_update_begin(&rq->stats.syncp);
        dim_update_sample(rq->calls,
                          u64_stats_read(&rq->stats.packets),
                          u64_stats_read(&rq->stats.bytes),
                          &cur_sample);
        u64_stats_update_end(&rq->stats.syncp);

        net_dim(&rq->dim, cur_sample);
        rq->packets_in_napi = 0;
}

static int virtnet_poll(struct napi_struct *napi, int budget)
{
        struct receive_queue *rq =
                container_of(napi, struct receive_queue, napi);
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct send_queue *sq;
        unsigned int received;
        unsigned int xdp_xmit = 0;
        bool napi_complete;

        virtnet_poll_cleantx(rq);

        received = virtnet_receive(rq, budget, &xdp_xmit);
        rq->packets_in_napi += received;

        if (xdp_xmit & VIRTIO_XDP_REDIR)
                xdp_do_flush();

        /* Out of packets? */
        if (received < budget) {
                napi_complete = virtqueue_napi_complete(napi, rq->vq, received);
                if (napi_complete && rq->dim_enabled)
                        virtnet_rx_dim_update(vi, rq);
        }

        if (xdp_xmit & VIRTIO_XDP_TX) {
                sq = virtnet_xdp_get_sq(vi);
                if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
                        u64_stats_update_begin(&sq->stats.syncp);
                        u64_stats_inc(&sq->stats.kicks);
                        u64_stats_update_end(&sq->stats.syncp);
                }
                virtnet_xdp_put_sq(vi, sq);
        }

        return received;
}

static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
{
        virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
        napi_disable(&vi->rq[qp_index].napi);
        xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}

static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
{
        struct net_device *dev = vi->dev;
        int err;

        err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
                               vi->rq[qp_index].napi.napi_id);
        if (err < 0)
                return err;

        err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
                                         MEM_TYPE_PAGE_SHARED, NULL);
        if (err < 0)
                goto err_xdp_reg_mem_model;

        virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
        virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);

        return 0;

err_xdp_reg_mem_model:
        xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
        return err;
}

static int virtnet_open(struct net_device *dev)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int i, err;

        enable_delayed_refill(vi);

        for (i = 0; i < vi->max_queue_pairs; i++) {
                if (i < vi->curr_queue_pairs)
                        /* Make sure we have some buffers: if oom use wq. */
                        if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
                                schedule_delayed_work(&vi->refill, 0);

                err = virtnet_enable_queue_pair(vi, i);
                if (err < 0)
                        goto err_enable_qp;
        }

        return 0;

err_enable_qp:
        disable_delayed_refill(vi);
        cancel_delayed_work_sync(&vi->refill);

        for (i--; i >= 0; i--) {
                virtnet_disable_queue_pair(vi, i);
                cancel_work_sync(&vi->rq[i].dim.work);
        }

        return err;
}

static int virtnet_poll_tx(struct napi_struct *napi, int budget)
{
        struct send_queue *sq = container_of(napi, struct send_queue, napi);
        struct virtnet_info *vi = sq->vq->vdev->priv;
        unsigned int index = vq2txq(sq->vq);
        struct netdev_queue *txq;
        int opaque;
        bool done;

        if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
                /* We don't need to enable cb for XDP */
                napi_complete_done(napi, 0);
                return 0;
        }

        txq = netdev_get_tx_queue(vi->dev, index);
        __netif_tx_lock(txq, raw_smp_processor_id());
        virtqueue_disable_cb(sq->vq);
        free_old_xmit(sq, true);

        if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
                netif_tx_wake_queue(txq);

        opaque = virtqueue_enable_cb_prepare(sq->vq);

        done = napi_complete_done(napi, 0);

        if (!done)
                virtqueue_disable_cb(sq->vq);

        __netif_tx_unlock(txq);

        if (done) {
                if (unlikely(virtqueue_poll(sq->vq, opaque))) {
                        if (napi_schedule_prep(napi)) {
                                __netif_tx_lock(txq, raw_smp_processor_id());
                                virtqueue_disable_cb(sq->vq);
                                __netif_tx_unlock(txq);
                                __napi_schedule(napi);
                        }
                }
        }

        return 0;
}

static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
{
        struct virtio_net_hdr_mrg_rxbuf *hdr;
        const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
        struct virtnet_info *vi = sq->vq->vdev->priv;
        int num_sg;
        unsigned hdr_len = vi->hdr_len;
        bool can_push;

        pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);

        can_push = vi->any_header_sg &&
                !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
                !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
        /* Even if we can, don't push here yet as this would skew
         * csum_start offset below. */
        if (can_push)
                hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
        else
                hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;

        if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
                                    virtio_is_little_endian(vi->vdev), false,
                                    0))
                return -EPROTO;

        if (vi->mergeable_rx_bufs)
                hdr->num_buffers = 0;

        sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
        if (can_push) {
                __skb_push(skb, hdr_len);
                num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
                if (unlikely(num_sg < 0))
                        return num_sg;
                /* Pull header back to avoid skew in tx bytes calculations. */
                __skb_pull(skb, hdr_len);
        } else {
                sg_set_buf(sq->sg, hdr, hdr_len);
                num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
                if (unlikely(num_sg < 0))
                        return num_sg;
                num_sg++;
        }
        return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
}

static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int qnum = skb_get_queue_mapping(skb);
        struct send_queue *sq = &vi->sq[qnum];
        int err;
        struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
        bool kick = !netdev_xmit_more();
        bool use_napi = sq->napi.weight;

        /* Free up any pending old buffers before queueing new ones. */
        do {
                if (use_napi)
                        virtqueue_disable_cb(sq->vq);

                free_old_xmit(sq, false);

        } while (use_napi && kick &&
               unlikely(!virtqueue_enable_cb_delayed(sq->vq)));

        /* timestamp packet in software */
        skb_tx_timestamp(skb);

        /* Try to transmit */
        err = xmit_skb(sq, skb);

        /* This should not happen! */
        if (unlikely(err)) {
                DEV_STATS_INC(dev, tx_fifo_errors);
                if (net_ratelimit())
                        dev_warn(&dev->dev,
                                 "Unexpected TXQ (%d) queue failure: %d\n",
                                 qnum, err);
                DEV_STATS_INC(dev, tx_dropped);
                dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
        }

        /* Don't wait up for transmitted skbs to be freed. */
        if (!use_napi) {
                skb_orphan(skb);
                nf_reset_ct(skb);
        }

        check_sq_full_and_disable(vi, dev, sq);

        if (kick || netif_xmit_stopped(txq)) {
                if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
                        u64_stats_update_begin(&sq->stats.syncp);
                        u64_stats_inc(&sq->stats.kicks);
                        u64_stats_update_end(&sq->stats.syncp);
                }
        }

        return NETDEV_TX_OK;
}

static int virtnet_rx_resize(struct virtnet_info *vi,
                             struct receive_queue *rq, u32 ring_num)
{
        bool running = netif_running(vi->dev);
        int err, qindex;

        qindex = rq - vi->rq;

        if (running) {
                napi_disable(&rq->napi);
                cancel_work_sync(&rq->dim.work);
        }

        err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_unmap_free_buf);
        if (err)
                netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);

        if (!try_fill_recv(vi, rq, GFP_KERNEL))
                schedule_delayed_work(&vi->refill, 0);

        if (running)
                virtnet_napi_enable(rq->vq, &rq->napi);
        return err;
}

static int virtnet_tx_resize(struct virtnet_info *vi,
                             struct send_queue *sq, u32 ring_num)
{
        bool running = netif_running(vi->dev);
        struct netdev_queue *txq;
        int err, qindex;

        qindex = sq - vi->sq;

        if (running)
                virtnet_napi_tx_disable(&sq->napi);

        txq = netdev_get_tx_queue(vi->dev, qindex);

        /* 1. wait all ximt complete
         * 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
         */
        __netif_tx_lock_bh(txq);

        /* Prevent rx poll from accessing sq. */
        sq->reset = true;

        /* Prevent the upper layer from trying to send packets. */
        netif_stop_subqueue(vi->dev, qindex);

        __netif_tx_unlock_bh(txq);

        err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
        if (err)
                netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);

        __netif_tx_lock_bh(txq);
        sq->reset = false;
        netif_tx_wake_queue(txq);
        __netif_tx_unlock_bh(txq);

        if (running)
                virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
        return err;
}

/*
 * Send command via the control virtqueue and check status.  Commands
 * supported by the hypervisor, as indicated by feature bits, should
 * never fail unless improperly formatted.
 */
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
                                 struct scatterlist *out)
{
        struct scatterlist *sgs[4], hdr, stat;
        unsigned out_num = 0, tmp;
        int ret;

        /* Caller should know better */
        BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));

        vi->ctrl->status = ~0;
        vi->ctrl->hdr.class = class;
        vi->ctrl->hdr.cmd = cmd;
        /* Add header */
        sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
        sgs[out_num++] = &hdr;

        if (out)
                sgs[out_num++] = out;

        /* Add return status. */
        sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
        sgs[out_num] = &stat;

        BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
        ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
        if (ret < 0) {
                dev_warn(&vi->vdev->dev,
                         "Failed to add sgs for command vq: %d\n.", ret);
                return false;
        }

        if (unlikely(!virtqueue_kick(vi->cvq)))
                return vi->ctrl->status == VIRTIO_NET_OK;

        /* Spin for a response, the kick causes an ioport write, trapping
         * into the hypervisor, so the request should be handled immediately.
         */
        while (!virtqueue_get_buf(vi->cvq, &tmp) &&
               !virtqueue_is_broken(vi->cvq)) {
                cond_resched();
                cpu_relax();
        }

        return vi->ctrl->status == VIRTIO_NET_OK;
}

static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
        struct virtnet_info *vi = netdev_priv(dev);
        struct virtio_device *vdev = vi->vdev;
        int ret;
        struct sockaddr *addr;
        struct scatterlist sg;

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
                return -EOPNOTSUPP;

        addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
        if (!addr)
                return -ENOMEM;

        ret = eth_prepare_mac_addr_change(dev, addr);
        if (ret)
                goto out;

        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
                sg_init_one(&sg, addr->sa_data, dev->addr_len);
                if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
                                          VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
                        dev_warn(&vdev->dev,
                                 "Failed to set mac address by vq command.\n");
                        ret = -EINVAL;
                        goto out;
                }
        } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
                   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
                unsigned int i;

                /* Naturally, this has an atomicity problem. */
                for (i = 0; i < dev->addr_len; i++)
                        virtio_cwrite8(vdev,
                                       offsetof(struct virtio_net_config, mac) +
                                       i, addr->sa_data[i]);
        }

        eth_commit_mac_addr_change(dev, p);
        ret = 0;

out:
        kfree(addr);
        return ret;
}

static void virtnet_stats(struct net_device *dev,
                          struct rtnl_link_stats64 *tot)
{
        struct virtnet_info *vi = netdev_priv(dev);
        unsigned int start;
        int i;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops;
                struct receive_queue *rq = &vi->rq[i];
                struct send_queue *sq = &vi->sq[i];

                do {
                        start = u64_stats_fetch_begin(&sq->stats.syncp);
                        tpackets = u64_stats_read(&sq->stats.packets);
                        tbytes   = u64_stats_read(&sq->stats.bytes);
                        terrors  = u64_stats_read(&sq->stats.tx_timeouts);
                } while (u64_stats_fetch_retry(&sq->stats.syncp, start));

                do {
                        start = u64_stats_fetch_begin(&rq->stats.syncp);
                        rpackets = u64_stats_read(&rq->stats.packets);
                        rbytes   = u64_stats_read(&rq->stats.bytes);
                        rdrops   = u64_stats_read(&rq->stats.drops);
                } while (u64_stats_fetch_retry(&rq->stats.syncp, start));

                tot->rx_packets += rpackets;
                tot->tx_packets += tpackets;
                tot->rx_bytes   += rbytes;
                tot->tx_bytes   += tbytes;
                tot->rx_dropped += rdrops;
                tot->tx_errors  += terrors;
        }

        tot->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
        tot->tx_fifo_errors = DEV_STATS_READ(dev, tx_fifo_errors);
        tot->rx_length_errors = DEV_STATS_READ(dev, rx_length_errors);
        tot->rx_frame_errors = DEV_STATS_READ(dev, rx_frame_errors);
}

static void virtnet_ack_link_announce(struct virtnet_info *vi)
{
        rtnl_lock();
        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
                                  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
                dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
        rtnl_unlock();
}

static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
        struct scatterlist sg;
        struct net_device *dev = vi->dev;

        if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
                return 0;

        vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
        sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
                                  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
                dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
                         queue_pairs);
                return -EINVAL;
        } else {
                vi->curr_queue_pairs = queue_pairs;
                /* virtnet_open() will refill when device is going to up. */
                if (dev->flags & IFF_UP)
                        schedule_delayed_work(&vi->refill, 0);
        }

        return 0;
}

static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
        int err;

        rtnl_lock();
        err = _virtnet_set_queues(vi, queue_pairs);
        rtnl_unlock();
        return err;
}

static int virtnet_close(struct net_device *dev)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int i;

        /* Make sure NAPI doesn't schedule refill work */
        disable_delayed_refill(vi);
        /* Make sure refill_work doesn't re-enable napi! */
        cancel_delayed_work_sync(&vi->refill);

        for (i = 0; i < vi->max_queue_pairs; i++) {
                virtnet_disable_queue_pair(vi, i);
                cancel_work_sync(&vi->rq[i].dim.work);
        }

        return 0;
}

static void virtnet_rx_mode_work(struct work_struct *work)
{
        struct virtnet_info *vi =
                container_of(work, struct virtnet_info, rx_mode_work);
        struct net_device *dev = vi->dev;
        struct scatterlist sg[2];
        struct virtio_net_ctrl_mac *mac_data;
        struct netdev_hw_addr *ha;
        int uc_count;
        int mc_count;
        void *buf;
        int i;

        /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
                return;

        rtnl_lock();

        vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
        vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);

        sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
                                  VIRTIO_NET_CTRL_RX_PROMISC, sg))
                dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
                         vi->ctrl->promisc ? "en" : "dis");

        sg_init_one(sg, &vi->ctrl->allmulti, sizeof(vi->ctrl->allmulti));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
                                  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
                dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
                         vi->ctrl->allmulti ? "en" : "dis");

        netif_addr_lock_bh(dev);

        uc_count = netdev_uc_count(dev);
        mc_count = netdev_mc_count(dev);
        /* MAC filter - use one buffer for both lists */
        buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
                      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
        mac_data = buf;
        if (!buf) {
                netif_addr_unlock_bh(dev);
                rtnl_unlock();
                return;
        }

        sg_init_table(sg, 2);

        /* Store the unicast list and count in the front of the buffer */
        mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
        i = 0;
        netdev_for_each_uc_addr(ha, dev)
                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);

        sg_set_buf(&sg[0], mac_data,
                   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));

        /* multicast list and count fill the end */
        mac_data = (void *)&mac_data->macs[uc_count][0];

        mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
        i = 0;
        netdev_for_each_mc_addr(ha, dev)
                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);

        netif_addr_unlock_bh(dev);

        sg_set_buf(&sg[1], mac_data,
                   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
                                  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
                dev_warn(&dev->dev, "Failed to set MAC filter table.\n");

        rtnl_unlock();

        kfree(buf);
}

static void virtnet_set_rx_mode(struct net_device *dev)
{
        struct virtnet_info *vi = netdev_priv(dev);

        if (vi->rx_mode_work_enabled)
                schedule_work(&vi->rx_mode_work);
}

static int virtnet_vlan_rx_add_vid(struct net_device *dev,
                                   __be16 proto, u16 vid)
{
        struct virtnet_info *vi = netdev_priv(dev);
        struct scatterlist sg;

        vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
        sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
                                  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
                dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
        return 0;
}

static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
                                    __be16 proto, u16 vid)
{
        struct virtnet_info *vi = netdev_priv(dev);
        struct scatterlist sg;

        vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
        sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
                                  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
                dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
        return 0;
}

static void virtnet_clean_affinity(struct virtnet_info *vi)
{
        int i;

        if (vi->affinity_hint_set) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
                        virtqueue_set_affinity(vi->rq[i].vq, NULL);
                        virtqueue_set_affinity(vi->sq[i].vq, NULL);
                }

                vi->affinity_hint_set = false;
        }
}

static void virtnet_set_affinity(struct virtnet_info *vi)
{
        cpumask_var_t mask;
        int stragglers;
        int group_size;
        int i, j, cpu;
        int num_cpu;
        int stride;

        if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
                virtnet_clean_affinity(vi);
                return;
        }

        num_cpu = num_online_cpus();
        stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1);
        stragglers = num_cpu >= vi->curr_queue_pairs ?
                        num_cpu % vi->curr_queue_pairs :
                        0;
        cpu = cpumask_first(cpu_online_mask);

        for (i = 0; i < vi->curr_queue_pairs; i++) {
                group_size = stride + (i < stragglers ? 1 : 0);

                for (j = 0; j < group_size; j++) {
                        cpumask_set_cpu(cpu, mask);
                        cpu = cpumask_next_wrap(cpu, cpu_online_mask,
                                                nr_cpu_ids, false);
                }
                virtqueue_set_affinity(vi->rq[i].vq, mask);
                virtqueue_set_affinity(vi->sq[i].vq, mask);
                __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
                cpumask_clear(mask);
        }

        vi->affinity_hint_set = true;
        free_cpumask_var(mask);
}

static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
{
        struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
                                                   node);
        virtnet_set_affinity(vi);
        return 0;
}

static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
        struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
                                                   node_dead);
        virtnet_set_affinity(vi);
        return 0;
}

static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
{
        struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
                                                   node);

        virtnet_clean_affinity(vi);
        return 0;
}

static enum cpuhp_state virtionet_online;

static int virtnet_cpu_notif_add(struct virtnet_info *vi)
{
        int ret;

        ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
        if (ret)
                return ret;
        ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
                                               &vi->node_dead);
        if (!ret)
                return ret;
        cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
        return ret;
}

static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
{
        cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
        cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
                                            &vi->node_dead);
}

static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
                                         u16 vqn, u32 max_usecs, u32 max_packets)
{
        struct scatterlist sgs;

        vi->ctrl->coal_vq.vqn = cpu_to_le16(vqn);
        vi->ctrl->coal_vq.coal.max_usecs = cpu_to_le32(max_usecs);
        vi->ctrl->coal_vq.coal.max_packets = cpu_to_le32(max_packets);
        sg_init_one(&sgs, &vi->ctrl->coal_vq, sizeof(vi->ctrl->coal_vq));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
                                  VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET,
                                  &sgs))
                return -EINVAL;

        return 0;
}

static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
                                            u16 queue, u32 max_usecs,
                                            u32 max_packets)
{
        int err;

        err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue),
                                            max_usecs, max_packets);
        if (err)
                return err;

        vi->rq[queue].intr_coal.max_usecs = max_usecs;
        vi->rq[queue].intr_coal.max_packets = max_packets;

        return 0;
}

static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi,
                                            u16 queue, u32 max_usecs,
                                            u32 max_packets)
{
        int err;

        err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue),
                                            max_usecs, max_packets);
        if (err)
                return err;

        vi->sq[queue].intr_coal.max_usecs = max_usecs;
        vi->sq[queue].intr_coal.max_packets = max_packets;

        return 0;
}

static void virtnet_get_ringparam(struct net_device *dev,
                                  struct ethtool_ringparam *ring,
                                  struct kernel_ethtool_ringparam *kernel_ring,
                                  struct netlink_ext_ack *extack)
{
        struct virtnet_info *vi = netdev_priv(dev);

        ring->rx_max_pending = vi->rq[0].vq->num_max;
        ring->tx_max_pending = vi->sq[0].vq->num_max;
        ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
        ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
}

static int virtnet_set_ringparam(struct net_device *dev,
                                 struct ethtool_ringparam *ring,
                                 struct kernel_ethtool_ringparam *kernel_ring,
                                 struct netlink_ext_ack *extack)
{
        struct virtnet_info *vi = netdev_priv(dev);
        u32 rx_pending, tx_pending;
        struct receive_queue *rq;
        struct send_queue *sq;
        int i, err;

        if (ring->rx_mini_pending || ring->rx_jumbo_pending)
                return -EINVAL;

        rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
        tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);

        if (ring->rx_pending == rx_pending &&
            ring->tx_pending == tx_pending)
                return 0;

        if (ring->rx_pending > vi->rq[0].vq->num_max)
                return -EINVAL;

        if (ring->tx_pending > vi->sq[0].vq->num_max)
                return -EINVAL;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                rq = vi->rq + i;
                sq = vi->sq + i;

                if (ring->tx_pending != tx_pending) {
                        err = virtnet_tx_resize(vi, sq, ring->tx_pending);
                        if (err)
                                return err;

                        /* Upon disabling and re-enabling a transmit virtqueue, the device must
                         * set the coalescing parameters of the virtqueue to those configured
                         * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver
                         * did not set any TX coalescing parameters, to 0.
                         */
                        err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i,
                                                               vi->intr_coal_tx.max_usecs,
                                                               vi->intr_coal_tx.max_packets);
                        if (err)
                                return err;
                }

                if (ring->rx_pending != rx_pending) {
                        err = virtnet_rx_resize(vi, rq, ring->rx_pending);
                        if (err)
                                return err;

                        /* The reason is same as the transmit virtqueue reset */
                        err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, i,
                                                               vi->intr_coal_rx.max_usecs,
                                                               vi->intr_coal_rx.max_packets);
                        if (err)
                                return err;
                }
        }

        return 0;
}

static bool virtnet_commit_rss_command(struct virtnet_info *vi)
{
        struct net_device *dev = vi->dev;
        struct scatterlist sgs[4];
        unsigned int sg_buf_size;

        /* prepare sgs */
        sg_init_table(sgs, 4);

        sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table);
        sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size);

        sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1);
        sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size);

        sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
                        - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
        sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size);

        sg_buf_size = vi->rss_key_size;
        sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size);

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
                                  vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
                                  : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
                dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n");
                return false;
        }
        return true;
}

static void virtnet_init_default_rss(struct virtnet_info *vi)
{
        u32 indir_val = 0;
        int i = 0;

        vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
        vi->rss_hash_types_saved = vi->rss_hash_types_supported;
        vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size
                                                ? vi->rss_indir_table_size - 1 : 0;
        vi->ctrl->rss.unclassified_queue = 0;

        for (; i < vi->rss_indir_table_size; ++i) {
                indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs);
                vi->ctrl->rss.indirection_table[i] = indir_val;
        }

        vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
        vi->ctrl->rss.hash_key_length = vi->rss_key_size;

        netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
}

static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info)
{
        info->data = 0;
        switch (info->flow_type) {
        case TCP_V4_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
                        info->data = RXH_IP_SRC | RXH_IP_DST |
                                                 RXH_L4_B_0_1 | RXH_L4_B_2_3;
                } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
                        info->data = RXH_IP_SRC | RXH_IP_DST;
                }
                break;
        case TCP_V6_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
                        info->data = RXH_IP_SRC | RXH_IP_DST |
                                                 RXH_L4_B_0_1 | RXH_L4_B_2_3;
                } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
                        info->data = RXH_IP_SRC | RXH_IP_DST;
                }
                break;
        case UDP_V4_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
                        info->data = RXH_IP_SRC | RXH_IP_DST |
                                                 RXH_L4_B_0_1 | RXH_L4_B_2_3;
                } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
                        info->data = RXH_IP_SRC | RXH_IP_DST;
                }
                break;
        case UDP_V6_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
                        info->data = RXH_IP_SRC | RXH_IP_DST |
                                                 RXH_L4_B_0_1 | RXH_L4_B_2_3;
                } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
                        info->data = RXH_IP_SRC | RXH_IP_DST;
                }
                break;
        case IPV4_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
                        info->data = RXH_IP_SRC | RXH_IP_DST;

                break;
        case IPV6_FLOW:
                if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
                        info->data = RXH_IP_SRC | RXH_IP_DST;

                break;
        default:
                info->data = 0;
                break;
        }
}

static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info)
{
        u32 new_hashtypes = vi->rss_hash_types_saved;
        bool is_disable = info->data & RXH_DISCARD;
        bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3);

        /* supports only 'sd', 'sdfn' and 'r' */
        if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
                return false;

        switch (info->flow_type) {
        case TCP_V4_FLOW:
                new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
                if (!is_disable)
                        new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
                                | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
                break;
        case UDP_V4_FLOW:
                new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
                if (!is_disable)
                        new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
                                | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
                break;
        case IPV4_FLOW:
                new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
                if (!is_disable)
                        new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
                break;
        case TCP_V6_FLOW:
                new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
                if (!is_disable)
                        new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
                                | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0);
                break;
        case UDP_V6_FLOW:
                new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
                if (!is_disable)
                        new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
                                | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0);
                break;
        case IPV6_FLOW:
                new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
                if (!is_disable)
                        new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
                break;
        default:
                /* unsupported flow */
                return false;
        }

        /* if unsupported hashtype was set */
        if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
                return false;

        if (new_hashtypes != vi->rss_hash_types_saved) {
                vi->rss_hash_types_saved = new_hashtypes;
                vi->ctrl->rss.hash_types = vi->rss_hash_types_saved;
                if (vi->dev->features & NETIF_F_RXHASH)
                        return virtnet_commit_rss_command(vi);
        }

        return true;
}

static void virtnet_get_drvinfo(struct net_device *dev,
                                struct ethtool_drvinfo *info)
{
        struct virtnet_info *vi = netdev_priv(dev);
        struct virtio_device *vdev = vi->vdev;

        strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
        strscpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
        strscpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));

}

/* TODO: Eliminate OOO packets during switching */
static int virtnet_set_channels(struct net_device *dev,
                                struct ethtool_channels *channels)
{
        struct virtnet_info *vi = netdev_priv(dev);
        u16 queue_pairs = channels->combined_count;
        int err;

        /* We don't support separate rx/tx channels.
         * We don't allow setting 'other' channels.
         */
        if (channels->rx_count || channels->tx_count || channels->other_count)
                return -EINVAL;

        if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
                return -EINVAL;

        /* For now we don't support modifying channels while XDP is loaded
         * also when XDP is loaded all RX queues have XDP programs so we only
         * need to check a single RX queue.
         */
        if (vi->rq[0].xdp_prog)
                return -EINVAL;

        cpus_read_lock();
        err = _virtnet_set_queues(vi, queue_pairs);
        if (err) {
                cpus_read_unlock();
                goto err;
        }
        virtnet_set_affinity(vi);
        cpus_read_unlock();

        netif_set_real_num_tx_queues(dev, queue_pairs);
        netif_set_real_num_rx_queues(dev, queue_pairs);
 err:
        return err;
}

static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
        struct virtnet_info *vi = netdev_priv(dev);
        unsigned int i, j;
        u8 *p = data;

        switch (stringset) {
        case ETH_SS_STATS:
                for (i = 0; i < vi->curr_queue_pairs; i++) {
                        for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
                                ethtool_sprintf(&p, "rx_queue_%u_%s", i,
                                                virtnet_rq_stats_desc[j].desc);
                }

                for (i = 0; i < vi->curr_queue_pairs; i++) {
                        for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
                                ethtool_sprintf(&p, "tx_queue_%u_%s", i,
                                                virtnet_sq_stats_desc[j].desc);
                }
                break;
        }
}

static int virtnet_get_sset_count(struct net_device *dev, int sset)
{
        struct virtnet_info *vi = netdev_priv(dev);

        switch (sset) {
        case ETH_SS_STATS:
                return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
                                               VIRTNET_SQ_STATS_LEN);
        default:
                return -EOPNOTSUPP;
        }
}

static void virtnet_get_ethtool_stats(struct net_device *dev,
                                      struct ethtool_stats *stats, u64 *data)
{
        struct virtnet_info *vi = netdev_priv(dev);
        unsigned int idx = 0, start, i, j;
        const u8 *stats_base;
        const u64_stats_t *p;
        size_t offset;

        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];

                stats_base = (const u8 *)&rq->stats;
                do {
                        start = u64_stats_fetch_begin(&rq->stats.syncp);
                        for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
                                offset = virtnet_rq_stats_desc[j].offset;
                                p = (const u64_stats_t *)(stats_base + offset);
                                data[idx + j] = u64_stats_read(p);
                        }
                } while (u64_stats_fetch_retry(&rq->stats.syncp, start));
                idx += VIRTNET_RQ_STATS_LEN;
        }

        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct send_queue *sq = &vi->sq[i];

                stats_base = (const u8 *)&sq->stats;
                do {
                        start = u64_stats_fetch_begin(&sq->stats.syncp);
                        for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
                                offset = virtnet_sq_stats_desc[j].offset;
                                p = (const u64_stats_t *)(stats_base + offset);
                                data[idx + j] = u64_stats_read(p);
                        }
                } while (u64_stats_fetch_retry(&sq->stats.syncp, start));
                idx += VIRTNET_SQ_STATS_LEN;
        }
}

static void virtnet_get_channels(struct net_device *dev,
                                 struct ethtool_channels *channels)
{
        struct virtnet_info *vi = netdev_priv(dev);

        channels->combined_count = vi->curr_queue_pairs;
        channels->max_combined = vi->max_queue_pairs;
        channels->max_other = 0;
        channels->rx_count = 0;
        channels->tx_count = 0;
        channels->other_count = 0;
}

static int virtnet_set_link_ksettings(struct net_device *dev,
                                      const struct ethtool_link_ksettings *cmd)
{
        struct virtnet_info *vi = netdev_priv(dev);

        return ethtool_virtdev_set_link_ksettings(dev, cmd,
                                                  &vi->speed, &vi->duplex);
}

static int virtnet_get_link_ksettings(struct net_device *dev,
                                      struct ethtool_link_ksettings *cmd)
{
        struct virtnet_info *vi = netdev_priv(dev);

        cmd->base.speed = vi->speed;
        cmd->base.duplex = vi->duplex;
        cmd->base.port = PORT_OTHER;

        return 0;
}

static int virtnet_send_tx_notf_coal_cmds(struct virtnet_info *vi,
                                          struct ethtool_coalesce *ec)
{
        struct scatterlist sgs_tx;
        int i;

        vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
        vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
        sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
                                  VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
                                  &sgs_tx))
                return -EINVAL;

        vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs;
        vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames;
        for (i = 0; i < vi->max_queue_pairs; i++) {
                vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs;
                vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames;
        }

        return 0;
}

static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
                                          struct ethtool_coalesce *ec)
{
        bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
        struct scatterlist sgs_rx;
        int i;

        if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
                return -EOPNOTSUPP;

        if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != vi->intr_coal_rx.max_usecs ||
                               ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
                return -EINVAL;

        if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
                vi->rx_dim_enabled = true;
                for (i = 0; i < vi->max_queue_pairs; i++)
                        vi->rq[i].dim_enabled = true;
                return 0;
        }

        if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
                vi->rx_dim_enabled = false;
                for (i = 0; i < vi->max_queue_pairs; i++)
                        vi->rq[i].dim_enabled = false;
        }

        /* Since the per-queue coalescing params can be set,
         * we need apply the global new params even if they
         * are not updated.
         */
        vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
        vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
        sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
                                  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
                                  &sgs_rx))
                return -EINVAL;

        vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
        vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
        for (i = 0; i < vi->max_queue_pairs; i++) {
                vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
                vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
        }

        return 0;
}

static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
                                       struct ethtool_coalesce *ec)
{
        int err;

        err = virtnet_send_tx_notf_coal_cmds(vi, ec);
        if (err)
                return err;

        err = virtnet_send_rx_notf_coal_cmds(vi, ec);
        if (err)
                return err;

        return 0;
}

static int virtnet_send_rx_notf_coal_vq_cmds(struct virtnet_info *vi,
                                             struct ethtool_coalesce *ec,
                                             u16 queue)
{
        bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
        bool cur_rx_dim = vi->rq[queue].dim_enabled;
        u32 max_usecs, max_packets;
        int err;

        max_usecs = vi->rq[queue].intr_coal.max_usecs;
        max_packets = vi->rq[queue].intr_coal.max_packets;

        if (rx_ctrl_dim_on && (ec->rx_coalesce_usecs != max_usecs ||
                               ec->rx_max_coalesced_frames != max_packets))
                return -EINVAL;

        if (rx_ctrl_dim_on && !cur_rx_dim) {
                vi->rq[queue].dim_enabled = true;
                return 0;
        }

        if (!rx_ctrl_dim_on && cur_rx_dim)
                vi->rq[queue].dim_enabled = false;

        /* If no params are updated, userspace ethtool will
         * reject the modification.
         */
        err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, queue,
                                               ec->rx_coalesce_usecs,
                                               ec->rx_max_coalesced_frames);
        if (err)
                return err;

        return 0;
}

static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
                                          struct ethtool_coalesce *ec,
                                          u16 queue)
{
        int err;

        err = virtnet_send_rx_notf_coal_vq_cmds(vi, ec, queue);
        if (err)
                return err;

        err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, queue,
                                               ec->tx_coalesce_usecs,
                                               ec->tx_max_coalesced_frames);
        if (err)
                return err;

        return 0;
}

static void virtnet_rx_dim_work(struct work_struct *work)
{
        struct dim *dim = container_of(work, struct dim, work);
        struct receive_queue *rq = container_of(dim,
                        struct receive_queue, dim);
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct net_device *dev = vi->dev;
        struct dim_cq_moder update_moder;
        int i, qnum, err;

        if (!rtnl_trylock())
                return;

        /* Each rxq's work is queued by "net_dim()->schedule_work()"
         * in response to NAPI traffic changes. Note that dim->profile_ix
         * for each rxq is updated prior to the queuing action.
         * So we only need to traverse and update profiles for all rxqs
         * in the work which is holding rtnl_lock.
         */
        for (i = 0; i < vi->curr_queue_pairs; i++) {
                rq = &vi->rq[i];
                dim = &rq->dim;
                qnum = rq - vi->rq;

                if (!rq->dim_enabled)
                        continue;

                update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
                if (update_moder.usec != rq->intr_coal.max_usecs ||
                    update_moder.pkts != rq->intr_coal.max_packets) {
                        err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
                                                               update_moder.usec,
                                                               update_moder.pkts);
                        if (err)
                                pr_debug("%s: Failed to send dim parameters on rxq%d\n",
                                         dev->name, qnum);
                        dim->state = DIM_START_MEASURE;
                }
        }

        rtnl_unlock();
}

static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
{
        /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
         * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated.
         */
        if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
                return -EOPNOTSUPP;

        if (ec->tx_max_coalesced_frames > 1 ||
            ec->rx_max_coalesced_frames != 1)
                return -EINVAL;

        return 0;
}

static int virtnet_should_update_vq_weight(int dev_flags, int weight,
                                           int vq_weight, bool *should_update)
{
        if (weight ^ vq_weight) {
                if (dev_flags & IFF_UP)
                        return -EBUSY;
                *should_update = true;
        }

        return 0;
}

static int virtnet_set_coalesce(struct net_device *dev,
                                struct ethtool_coalesce *ec,
                                struct kernel_ethtool_coalesce *kernel_coal,
                                struct netlink_ext_ack *extack)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int ret, queue_number, napi_weight;
        bool update_napi = false;

        /* Can't change NAPI weight if the link is up */
        napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
        for (queue_number = 0; queue_number < vi->max_queue_pairs; queue_number++) {
                ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
                                                      vi->sq[queue_number].napi.weight,
                                                      &update_napi);
                if (ret)
                        return ret;

                if (update_napi) {
                        /* All queues that belong to [queue_number, vi->max_queue_pairs] will be
                         * updated for the sake of simplicity, which might not be necessary
                         */
                        break;
                }
        }

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
                ret = virtnet_send_notf_coal_cmds(vi, ec);
        else
                ret = virtnet_coal_params_supported(ec);

        if (ret)
                return ret;

        if (update_napi) {
                for (; queue_number < vi->max_queue_pairs; queue_number++)
                        vi->sq[queue_number].napi.weight = napi_weight;
        }

        return ret;
}

static int virtnet_get_coalesce(struct net_device *dev,
                                struct ethtool_coalesce *ec,
                                struct kernel_ethtool_coalesce *kernel_coal,
                                struct netlink_ext_ack *extack)
{
        struct virtnet_info *vi = netdev_priv(dev);

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
                ec->rx_coalesce_usecs = vi->intr_coal_rx.max_usecs;
                ec->tx_coalesce_usecs = vi->intr_coal_tx.max_usecs;
                ec->tx_max_coalesced_frames = vi->intr_coal_tx.max_packets;
                ec->rx_max_coalesced_frames = vi->intr_coal_rx.max_packets;
                ec->use_adaptive_rx_coalesce = vi->rx_dim_enabled;
        } else {
                ec->rx_max_coalesced_frames = 1;

                if (vi->sq[0].napi.weight)
                        ec->tx_max_coalesced_frames = 1;
        }

        return 0;
}

static int virtnet_set_per_queue_coalesce(struct net_device *dev,
                                          u32 queue,
                                          struct ethtool_coalesce *ec)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int ret, napi_weight;
        bool update_napi = false;

        if (queue >= vi->max_queue_pairs)
                return -EINVAL;

        /* Can't change NAPI weight if the link is up */
        napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
        ret = virtnet_should_update_vq_weight(dev->flags, napi_weight,
                                              vi->sq[queue].napi.weight,
                                              &update_napi);
        if (ret)
                return ret;

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
                ret = virtnet_send_notf_coal_vq_cmds(vi, ec, queue);
        else
                ret = virtnet_coal_params_supported(ec);

        if (ret)
                return ret;

        if (update_napi)
                vi->sq[queue].napi.weight = napi_weight;

        return 0;
}

static int virtnet_get_per_queue_coalesce(struct net_device *dev,
                                          u32 queue,
                                          struct ethtool_coalesce *ec)
{
        struct virtnet_info *vi = netdev_priv(dev);

        if (queue >= vi->max_queue_pairs)
                return -EINVAL;

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
                ec->rx_coalesce_usecs = vi->rq[queue].intr_coal.max_usecs;
                ec->tx_coalesce_usecs = vi->sq[queue].intr_coal.max_usecs;
                ec->tx_max_coalesced_frames = vi->sq[queue].intr_coal.max_packets;
                ec->rx_max_coalesced_frames = vi->rq[queue].intr_coal.max_packets;
                ec->use_adaptive_rx_coalesce = vi->rq[queue].dim_enabled;
        } else {
                ec->rx_max_coalesced_frames = 1;

                if (vi->sq[queue].napi.weight)
                        ec->tx_max_coalesced_frames = 1;
        }

        return 0;
}

static void virtnet_init_settings(struct net_device *dev)
{
        struct virtnet_info *vi = netdev_priv(dev);

        vi->speed = SPEED_UNKNOWN;
        vi->duplex = DUPLEX_UNKNOWN;
}

static void virtnet_update_settings(struct virtnet_info *vi)
{
        u32 speed;
        u8 duplex;

        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX))
                return;

        virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed);

        if (ethtool_validate_speed(speed))
                vi->speed = speed;

        virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex);

        if (ethtool_validate_duplex(duplex))
                vi->duplex = duplex;
}

static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
{
        return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
}

static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
{
        return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
}

static int virtnet_get_rxfh(struct net_device *dev,
                            struct ethtool_rxfh_param *rxfh)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int i;

        if (rxfh->indir) {
                for (i = 0; i < vi->rss_indir_table_size; ++i)
                        rxfh->indir[i] = vi->ctrl->rss.indirection_table[i];
        }

        if (rxfh->key)
                memcpy(rxfh->key, vi->ctrl->rss.key, vi->rss_key_size);

        rxfh->hfunc = ETH_RSS_HASH_TOP;

        return 0;
}

static int virtnet_set_rxfh(struct net_device *dev,
                            struct ethtool_rxfh_param *rxfh,
                            struct netlink_ext_ack *extack)
{
        struct virtnet_info *vi = netdev_priv(dev);
        bool update = false;
        int i;

        if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
            rxfh->hfunc != ETH_RSS_HASH_TOP)
                return -EOPNOTSUPP;

        if (rxfh->indir) {
                if (!vi->has_rss)
                        return -EOPNOTSUPP;

                for (i = 0; i < vi->rss_indir_table_size; ++i)
                        vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
                update = true;
        }

        if (rxfh->key) {
                /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
                 * device provides hash calculation capabilities, that is,
                 * hash_key is configured.
                 */
                if (!vi->has_rss && !vi->has_rss_hash_report)
                        return -EOPNOTSUPP;

                memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
                update = true;
        }

        if (update)
                virtnet_commit_rss_command(vi);

        return 0;
}

static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int rc = 0;

        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
                info->data = vi->curr_queue_pairs;
                break;
        case ETHTOOL_GRXFH:
                virtnet_get_hashflow(vi, info);
                break;
        default:
                rc = -EOPNOTSUPP;
        }

        return rc;
}

static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int rc = 0;

        switch (info->cmd) {
        case ETHTOOL_SRXFH:
                if (!virtnet_set_hashflow(vi, info))
                        rc = -EINVAL;

                break;
        default:
                rc = -EOPNOTSUPP;
        }

        return rc;
}

static const struct ethtool_ops virtnet_ethtool_ops = {
        .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
                ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
        .get_drvinfo = virtnet_get_drvinfo,
        .get_link = ethtool_op_get_link,
        .get_ringparam = virtnet_get_ringparam,
        .set_ringparam = virtnet_set_ringparam,
        .get_strings = virtnet_get_strings,
        .get_sset_count = virtnet_get_sset_count,
        .get_ethtool_stats = virtnet_get_ethtool_stats,
        .set_channels = virtnet_set_channels,
        .get_channels = virtnet_get_channels,
        .get_ts_info = ethtool_op_get_ts_info,
        .get_link_ksettings = virtnet_get_link_ksettings,
        .set_link_ksettings = virtnet_set_link_ksettings,
        .set_coalesce = virtnet_set_coalesce,
        .get_coalesce = virtnet_get_coalesce,
        .set_per_queue_coalesce = virtnet_set_per_queue_coalesce,
        .get_per_queue_coalesce = virtnet_get_per_queue_coalesce,
        .get_rxfh_key_size = virtnet_get_rxfh_key_size,
        .get_rxfh_indir_size = virtnet_get_rxfh_indir_size,
        .get_rxfh = virtnet_get_rxfh,
        .set_rxfh = virtnet_set_rxfh,
        .get_rxnfc = virtnet_get_rxnfc,
        .set_rxnfc = virtnet_set_rxnfc,
};

static void virtnet_freeze_down(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;

        /* Make sure no work handler is accessing the device */
        flush_work(&vi->config_work);
        disable_rx_mode_work(vi);
        flush_work(&vi->rx_mode_work);

        netif_tx_lock_bh(vi->dev);
        netif_device_detach(vi->dev);
        netif_tx_unlock_bh(vi->dev);
        if (netif_running(vi->dev))
                virtnet_close(vi->dev);
}

static int init_vqs(struct virtnet_info *vi);

static int virtnet_restore_up(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;
        int err;

        err = init_vqs(vi);
        if (err)
                return err;

        virtio_device_ready(vdev);

        enable_delayed_refill(vi);
        enable_rx_mode_work(vi);

        if (netif_running(vi->dev)) {
                err = virtnet_open(vi->dev);
                if (err)
                        return err;
        }

        netif_tx_lock_bh(vi->dev);
        netif_device_attach(vi->dev);
        netif_tx_unlock_bh(vi->dev);
        return err;
}

static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
{
        struct scatterlist sg;
        vi->ctrl->offloads = cpu_to_virtio64(vi->vdev, offloads);

        sg_init_one(&sg, &vi->ctrl->offloads, sizeof(vi->ctrl->offloads));

        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
                                  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
                dev_warn(&vi->dev->dev, "Fail to set guest offload.\n");
                return -EINVAL;
        }

        return 0;
}

static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
{
        u64 offloads = 0;

        if (!vi->guest_offloads)
                return 0;

        return virtnet_set_guest_offloads(vi, offloads);
}

static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
{
        u64 offloads = vi->guest_offloads;

        if (!vi->guest_offloads)
                return 0;

        return virtnet_set_guest_offloads(vi, offloads);
}

static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                           struct netlink_ext_ack *extack)
{
        unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
                                           sizeof(struct skb_shared_info));
        unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
        struct virtnet_info *vi = netdev_priv(dev);
        struct bpf_prog *old_prog;
        u16 xdp_qp = 0, curr_qp;
        int i, err;

        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
            && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
                NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first");
                return -EOPNOTSUPP;
        }

        if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
                NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
                return -EINVAL;
        }

        if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
                NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
                netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
                return -EINVAL;
        }

        curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
        if (prog)
                xdp_qp = nr_cpu_ids;

        /* XDP requires extra queues for XDP_TX */
        if (curr_qp + xdp_qp > vi->max_queue_pairs) {
                netdev_warn_once(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
                                 curr_qp + xdp_qp, vi->max_queue_pairs);
                xdp_qp = 0;
        }

        old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
        if (!prog && !old_prog)
                return 0;

        if (prog)
                bpf_prog_add(prog, vi->max_queue_pairs - 1);

        /* Make sure NAPI is not using any XDP TX queues for RX. */
        if (netif_running(dev)) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
                        napi_disable(&vi->rq[i].napi);
                        virtnet_napi_tx_disable(&vi->sq[i].napi);
                }
        }

        if (!prog) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
                        rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
                        if (i == 0)
                                virtnet_restore_guest_offloads(vi);
                }
                synchronize_net();
        }

        err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
        if (err)
                goto err;
        netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
        vi->xdp_queue_pairs = xdp_qp;

        if (prog) {
                vi->xdp_enabled = true;
                for (i = 0; i < vi->max_queue_pairs; i++) {
                        rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
                        if (i == 0 && !old_prog)
                                virtnet_clear_guest_offloads(vi);
                }
                if (!old_prog)
                        xdp_features_set_redirect_target(dev, true);
        } else {
                xdp_features_clear_redirect_target(dev);
                vi->xdp_enabled = false;
        }

        for (i = 0; i < vi->max_queue_pairs; i++) {
                if (old_prog)
                        bpf_prog_put(old_prog);
                if (netif_running(dev)) {
                        virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
                        virtnet_napi_tx_enable(vi, vi->sq[i].vq,
                                               &vi->sq[i].napi);
                }
        }

        return 0;

err:
        if (!prog) {
                virtnet_clear_guest_offloads(vi);
                for (i = 0; i < vi->max_queue_pairs; i++)
                        rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
        }

        if (netif_running(dev)) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
                        virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
                        virtnet_napi_tx_enable(vi, vi->sq[i].vq,
                                               &vi->sq[i].napi);
                }
        }
        if (prog)
                bpf_prog_sub(prog, vi->max_queue_pairs - 1);
        return err;
}

static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
        default:
                return -EINVAL;
        }
}

static int virtnet_get_phys_port_name(struct net_device *dev, char *buf,
                                      size_t len)
{
        struct virtnet_info *vi = netdev_priv(dev);
        int ret;

        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY))
                return -EOPNOTSUPP;

        ret = snprintf(buf, len, "sby");
        if (ret >= len)
                return -EOPNOTSUPP;

        return 0;
}

static int virtnet_set_features(struct net_device *dev,
                                netdev_features_t features)
{
        struct virtnet_info *vi = netdev_priv(dev);
        u64 offloads;
        int err;

        if ((dev->features ^ features) & NETIF_F_GRO_HW) {
                if (vi->xdp_enabled)
                        return -EBUSY;

                if (features & NETIF_F_GRO_HW)
                        offloads = vi->guest_offloads_capable;
                else
                        offloads = vi->guest_offloads_capable &
                                   ~GUEST_OFFLOAD_GRO_HW_MASK;

                err = virtnet_set_guest_offloads(vi, offloads);
                if (err)
                        return err;
                vi->guest_offloads = offloads;
        }

        if ((dev->features ^ features) & NETIF_F_RXHASH) {
                if (features & NETIF_F_RXHASH)
                        vi->ctrl->rss.hash_types = vi->rss_hash_types_saved;
                else
                        vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE;

                if (!virtnet_commit_rss_command(vi))
                        return -EINVAL;
        }

        return 0;
}

static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
        struct virtnet_info *priv = netdev_priv(dev);
        struct send_queue *sq = &priv->sq[txqueue];
        struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue);

        u64_stats_update_begin(&sq->stats.syncp);
        u64_stats_inc(&sq->stats.tx_timeouts);
        u64_stats_update_end(&sq->stats.syncp);

        netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n",
                   txqueue, sq->name, sq->vq->index, sq->vq->name,
                   jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
}

static const struct net_device_ops virtnet_netdev = {
        .ndo_open            = virtnet_open,
        .ndo_stop                = virtnet_close,
        .ndo_start_xmit      = start_xmit,
        .ndo_validate_addr   = eth_validate_addr,
        .ndo_set_mac_address = virtnet_set_mac_address,
        .ndo_set_rx_mode     = virtnet_set_rx_mode,
        .ndo_get_stats64     = virtnet_stats,
        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
        .ndo_bpf                = virtnet_xdp,
        .ndo_xdp_xmit                = virtnet_xdp_xmit,
        .ndo_features_check        = passthru_features_check,
        .ndo_get_phys_port_name        = virtnet_get_phys_port_name,
        .ndo_set_features        = virtnet_set_features,
        .ndo_tx_timeout                = virtnet_tx_timeout,
};

static void virtnet_config_changed_work(struct work_struct *work)
{
        struct virtnet_info *vi =
                container_of(work, struct virtnet_info, config_work);
        u16 v;

        if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
                                 struct virtio_net_config, status, &v) < 0)
                return;

        if (v & VIRTIO_NET_S_ANNOUNCE) {
                netdev_notify_peers(vi->dev);
                virtnet_ack_link_announce(vi);
        }

        /* Ignore unknown (future) status bits */
        v &= VIRTIO_NET_S_LINK_UP;

        if (vi->status == v)
                return;

        vi->status = v;

        if (vi->status & VIRTIO_NET_S_LINK_UP) {
                virtnet_update_settings(vi);
                netif_carrier_on(vi->dev);
                netif_tx_wake_all_queues(vi->dev);
        } else {
                netif_carrier_off(vi->dev);
                netif_tx_stop_all_queues(vi->dev);
        }
}

static void virtnet_config_changed(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;

        schedule_work(&vi->config_work);
}

static void virtnet_free_queues(struct virtnet_info *vi)
{
        int i;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                __netif_napi_del(&vi->rq[i].napi);
                __netif_napi_del(&vi->sq[i].napi);
        }

        /* We called __netif_napi_del(),
         * we need to respect an RCU grace period before freeing vi->rq
         */
        synchronize_net();

        kfree(vi->rq);
        kfree(vi->sq);
        kfree(vi->ctrl);
}

static void _free_receive_bufs(struct virtnet_info *vi)
{
        struct bpf_prog *old_prog;
        int i;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                while (vi->rq[i].pages)
                        __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);

                old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
                RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
                if (old_prog)
                        bpf_prog_put(old_prog);
        }
}

static void free_receive_bufs(struct virtnet_info *vi)
{
        rtnl_lock();
        _free_receive_bufs(vi);
        rtnl_unlock();
}

static void free_receive_page_frags(struct virtnet_info *vi)
{
        int i;
        for (i = 0; i < vi->max_queue_pairs; i++)
                if (vi->rq[i].alloc_frag.page) {
                        if (vi->rq[i].do_dma && vi->rq[i].last_dma)
                                virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
                        put_page(vi->rq[i].alloc_frag.page);
                }
}

static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
{
        if (!is_xdp_frame(buf))
                dev_kfree_skb(buf);
        else
                xdp_return_frame(ptr_to_xdp(buf));
}

static void free_unused_bufs(struct virtnet_info *vi)
{
        void *buf;
        int i;

        for (i = 0; i < vi->max_queue_pairs; i++) {
                struct virtqueue *vq = vi->sq[i].vq;
                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
                        virtnet_sq_free_unused_buf(vq, buf);
                cond_resched();
        }

        for (i = 0; i < vi->max_queue_pairs; i++) {
                struct virtqueue *vq = vi->rq[i].vq;

                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
                        virtnet_rq_unmap_free_buf(vq, buf);
                cond_resched();
        }
}

static void virtnet_del_vqs(struct virtnet_info *vi)
{
        struct virtio_device *vdev = vi->vdev;

        virtnet_clean_affinity(vi);

        vdev->config->del_vqs(vdev);

        virtnet_free_queues(vi);
}

/* How large should a single buffer be so a queue full of these can fit at
 * least one full packet?
 * Logic below assumes the mergeable buffer header is used.
 */
static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
{
        const unsigned int hdr_len = vi->hdr_len;
        unsigned int rq_size = virtqueue_get_vring_size(vq);
        unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
        unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
        unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);

        return max(max(min_buf_len, hdr_len) - hdr_len,
                   (unsigned int)GOOD_PACKET_LEN);
}

static int virtnet_find_vqs(struct virtnet_info *vi)
{
        vq_callback_t **callbacks;
        struct virtqueue **vqs;
        const char **names;
        int ret = -ENOMEM;
        int total_vqs;
        bool *ctx;
        u16 i;

        /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
         * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
         * possible control vq.
         */
        total_vqs = vi->max_queue_pairs * 2 +
                    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);

        /* Allocate space for find_vqs parameters */
        vqs = kcalloc(total_vqs, sizeof(*vqs), GFP_KERNEL);
        if (!vqs)
                goto err_vq;
        callbacks = kmalloc_array(total_vqs, sizeof(*callbacks), GFP_KERNEL);
        if (!callbacks)
                goto err_callback;
        names = kmalloc_array(total_vqs, sizeof(*names), GFP_KERNEL);
        if (!names)
                goto err_names;
        if (!vi->big_packets || vi->mergeable_rx_bufs) {
                ctx = kcalloc(total_vqs, sizeof(*ctx), GFP_KERNEL);
                if (!ctx)
                        goto err_ctx;
        } else {
                ctx = NULL;
        }

        /* Parameters for control virtqueue, if any */
        if (vi->has_cvq) {
                callbacks[total_vqs - 1] = NULL;
                names[total_vqs - 1] = "control";
        }

        /* Allocate/initialize parameters for send/receive virtqueues */
        for (i = 0; i < vi->max_queue_pairs; i++) {
                callbacks[rxq2vq(i)] = skb_recv_done;
                callbacks[txq2vq(i)] = skb_xmit_done;
                sprintf(vi->rq[i].name, "input.%u", i);
                sprintf(vi->sq[i].name, "output.%u", i);
                names[rxq2vq(i)] = vi->rq[i].name;
                names[txq2vq(i)] = vi->sq[i].name;
                if (ctx)
                        ctx[rxq2vq(i)] = true;
        }

        ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
                                  names, ctx, NULL);
        if (ret)
                goto err_find;

        if (vi->has_cvq) {
                vi->cvq = vqs[total_vqs - 1];
                if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
                        vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
        }

        for (i = 0; i < vi->max_queue_pairs; i++) {
                vi->rq[i].vq = vqs[rxq2vq(i)];
                vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
                vi->sq[i].vq = vqs[txq2vq(i)];
        }

        /* run here: ret == 0. */


err_find:
        kfree(ctx);
err_ctx:
        kfree(names);
err_names:
        kfree(callbacks);
err_callback:
        kfree(vqs);
err_vq:
        return ret;
}

static int virtnet_alloc_queues(struct virtnet_info *vi)
{
        int i;

        if (vi->has_cvq) {
                vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
                if (!vi->ctrl)
                        goto err_ctrl;
        } else {
                vi->ctrl = NULL;
        }
        vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
        if (!vi->sq)
                goto err_sq;
        vi->rq = kcalloc(vi->max_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
        if (!vi->rq)
                goto err_rq;

        INIT_DELAYED_WORK(&vi->refill, refill_work);
        for (i = 0; i < vi->max_queue_pairs; i++) {
                vi->rq[i].pages = NULL;
                netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll,
                                      napi_weight);
                netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi,
                                         virtnet_poll_tx,
                                         napi_tx ? napi_weight : 0);

                INIT_WORK(&vi->rq[i].dim.work, virtnet_rx_dim_work);
                vi->rq[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;

                sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
                ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
                sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));

                u64_stats_init(&vi->rq[i].stats.syncp);
                u64_stats_init(&vi->sq[i].stats.syncp);
        }

        return 0;

err_rq:
        kfree(vi->sq);
err_sq:
        kfree(vi->ctrl);
err_ctrl:
        return -ENOMEM;
}

static int init_vqs(struct virtnet_info *vi)
{
        int ret;

        /* Allocate send & receive queues */
        ret = virtnet_alloc_queues(vi);
        if (ret)
                goto err;

        ret = virtnet_find_vqs(vi);
        if (ret)
                goto err_free;

        virtnet_rq_set_premapped(vi);

        cpus_read_lock();
        virtnet_set_affinity(vi);
        cpus_read_unlock();

        return 0;

err_free:
        virtnet_free_queues(vi);
err:
        return ret;
}

#ifdef CONFIG_SYSFS
static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
                char *buf)
{
        struct virtnet_info *vi = netdev_priv(queue->dev);
        unsigned int queue_index = get_netdev_rx_queue_index(queue);
        unsigned int headroom = virtnet_get_headroom(vi);
        unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
        struct ewma_pkt_len *avg;

        BUG_ON(queue_index >= vi->max_queue_pairs);
        avg = &vi->rq[queue_index].mrg_avg_pkt_len;
        return sprintf(buf, "%u\n",
                       get_mergeable_buf_len(&vi->rq[queue_index], avg,
                                       SKB_DATA_ALIGN(headroom + tailroom)));
}

static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
        __ATTR_RO(mergeable_rx_buffer_size);

static struct attribute *virtio_net_mrg_rx_attrs[] = {
        &mergeable_rx_buffer_size_attribute.attr,
        NULL
};

static const struct attribute_group virtio_net_mrg_rx_group = {
        .name = "virtio_net",
        .attrs = virtio_net_mrg_rx_attrs
};
#endif

static bool virtnet_fail_on_feature(struct virtio_device *vdev,
                                    unsigned int fbit,
                                    const char *fname, const char *dname)
{
        if (!virtio_has_feature(vdev, fbit))
                return false;

        dev_err(&vdev->dev, "device advertises feature %s but not %s",
                fname, dname);

        return true;
}

#define VIRTNET_FAIL_ON(vdev, fbit, dbit)                        \
        virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)

static bool virtnet_validate_features(struct virtio_device *vdev)
{
        if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
            (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
                             "VIRTIO_NET_F_CTRL_VQ") ||
             VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_VQ_NOTF_COAL,
                             "VIRTIO_NET_F_CTRL_VQ"))) {
                return false;
        }

        return true;
}

#define MIN_MTU ETH_MIN_MTU
#define MAX_MTU ETH_MAX_MTU

static int virtnet_validate(struct virtio_device *vdev)
{
        if (!vdev->config->get) {
                dev_err(&vdev->dev, "%s failure: config access disabled\n",
                        __func__);
                return -EINVAL;
        }

        if (!virtnet_validate_features(vdev))
                return -EINVAL;

        if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
                int mtu = virtio_cread16(vdev,
                                         offsetof(struct virtio_net_config,
                                                  mtu));
                if (mtu < MIN_MTU)
                        __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
        }

        if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) &&
            !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
                dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby");
                __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY);
        }

        return 0;
}

static bool virtnet_check_guest_gso(const struct virtnet_info *vi)
{
        return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
                (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) &&
                virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6));
}

static void virtnet_set_big_packets(struct virtnet_info *vi, const int mtu)
{
        bool guest_gso = virtnet_check_guest_gso(vi);

        /* If device can receive ANY guest GSO packets, regardless of mtu,
         * allocate packets of maximum size, otherwise limit it to only
         * mtu size worth only.
         */
        if (mtu > ETH_DATA_LEN || guest_gso) {
                vi->big_packets = true;
                vi->big_packets_num_skbfrags = guest_gso ? MAX_SKB_FRAGS : DIV_ROUND_UP(mtu, PAGE_SIZE);
        }
}

static int virtnet_probe(struct virtio_device *vdev)
{
        int i, err = -ENOMEM;
        struct net_device *dev;
        struct virtnet_info *vi;
        u16 max_queue_pairs;
        int mtu = 0;

        /* Find if host supports multiqueue/rss virtio_net device */
        max_queue_pairs = 1;
        if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
                max_queue_pairs =
                     virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs));

        /* We need at least 2 queue's */
        if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
            max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
            !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
                max_queue_pairs = 1;

        /* Allocate ourselves a network device with room for our info */
        dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
        if (!dev)
                return -ENOMEM;

        /* Set up network device as normal. */
        dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
                           IFF_TX_SKB_NO_LINEAR;
        dev->netdev_ops = &virtnet_netdev;
        dev->features = NETIF_F_HIGHDMA;

        dev->ethtool_ops = &virtnet_ethtool_ops;
        SET_NETDEV_DEV(dev, &vdev->dev);

        /* Do we support "hardware" checksums? */
        if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
                /* This opens up the world of extra features. */
                dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
                if (csum)
                        dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;

                if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
                        dev->hw_features |= NETIF_F_TSO
                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
                }
                /* Individual feature bits: what can host handle? */
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
                        dev->hw_features |= NETIF_F_TSO;
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
                        dev->hw_features |= NETIF_F_TSO6;
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
                        dev->hw_features |= NETIF_F_TSO_ECN;
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
                        dev->hw_features |= NETIF_F_GSO_UDP_L4;

                dev->features |= NETIF_F_GSO_ROBUST;

                if (gso)
                        dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
                /* (!csum && gso) case will be fixed by register_netdev() */
        }
        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
                dev->features |= NETIF_F_RXCSUM;
        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6))
                dev->features |= NETIF_F_GRO_HW;
        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
                dev->hw_features |= NETIF_F_GRO_HW;

        dev->vlan_features = dev->features;
        dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;

        /* MTU range: 68 - 65535 */
        dev->min_mtu = MIN_MTU;
        dev->max_mtu = MAX_MTU;

        /* Configuration may specify what MAC to use.  Otherwise random. */
        if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
                u8 addr[ETH_ALEN];

                virtio_cread_bytes(vdev,
                                   offsetof(struct virtio_net_config, mac),
                                   addr, ETH_ALEN);
                eth_hw_addr_set(dev, addr);
        } else {
                eth_hw_addr_random(dev);
                dev_info(&vdev->dev, "Assigned random MAC address %pM\n",
                         dev->dev_addr);
        }

        /* Set up our device-specific information */
        vi = netdev_priv(dev);
        vi->dev = dev;
        vi->vdev = vdev;
        vdev->priv = vi;

        INIT_WORK(&vi->config_work, virtnet_config_changed_work);
        INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
        spin_lock_init(&vi->refill_lock);

        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
                vi->mergeable_rx_bufs = true;
                dev->xdp_features |= NETDEV_XDP_ACT_RX_SG;
        }

        if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
                vi->has_rss_hash_report = true;

        if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
                vi->has_rss = true;

                vi->rss_indir_table_size =
                        virtio_cread16(vdev, offsetof(struct virtio_net_config,
                                rss_max_indirection_table_length));
        }

        if (vi->has_rss || vi->has_rss_hash_report) {
                vi->rss_key_size =
                        virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));

                vi->rss_hash_types_supported =
                    virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types));
                vi->rss_hash_types_supported &=
                                ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
                                  VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
                                  VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);

                dev->hw_features |= NETIF_F_RXHASH;
        }

        if (vi->has_rss_hash_report)
                vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
        else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
                 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
                vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
        else
                vi->hdr_len = sizeof(struct virtio_net_hdr);

        if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
            virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
                vi->any_header_sg = true;

        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
                vi->has_cvq = true;

        if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
                mtu = virtio_cread16(vdev,
                                     offsetof(struct virtio_net_config,
                                              mtu));
                if (mtu < dev->min_mtu) {
                        /* Should never trigger: MTU was previously validated
                         * in virtnet_validate.
                         */
                        dev_err(&vdev->dev,
                                "device MTU appears to have changed it is now %d < %d",
                                mtu, dev->min_mtu);
                        err = -EINVAL;
                        goto free;
                }

                dev->mtu = mtu;
                dev->max_mtu = mtu;
        }

        virtnet_set_big_packets(vi, mtu);

        if (vi->any_header_sg)
                dev->needed_headroom = vi->hdr_len;

        /* Enable multiqueue by default */
        if (num_online_cpus() >= max_queue_pairs)
                vi->curr_queue_pairs = max_queue_pairs;
        else
                vi->curr_queue_pairs = num_online_cpus();
        vi->max_queue_pairs = max_queue_pairs;

        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
        err = init_vqs(vi);
        if (err)
                goto free;

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
                vi->intr_coal_rx.max_usecs = 0;
                vi->intr_coal_tx.max_usecs = 0;
                vi->intr_coal_rx.max_packets = 0;

                /* Keep the default values of the coalescing parameters
                 * aligned with the default napi_tx state.
                 */
                if (vi->sq[0].napi.weight)
                        vi->intr_coal_tx.max_packets = 1;
                else
                        vi->intr_coal_tx.max_packets = 0;
        }

        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) {
                /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */
                for (i = 0; i < vi->max_queue_pairs; i++)
                        if (vi->sq[i].napi.weight)
                                vi->sq[i].intr_coal.max_packets = 1;
        }

#ifdef CONFIG_SYSFS
        if (vi->mergeable_rx_bufs)
                dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
#endif
        netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
        netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);

        virtnet_init_settings(dev);

        if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
                vi->failover = net_failover_create(vi->dev);
                if (IS_ERR(vi->failover)) {
                        err = PTR_ERR(vi->failover);
                        goto free_vqs;
                }
        }

        if (vi->has_rss || vi->has_rss_hash_report)
                virtnet_init_default_rss(vi);

        enable_rx_mode_work(vi);

        /* serialize netdev register + virtio_device_ready() with ndo_open() */
        rtnl_lock();

        err = register_netdevice(dev);
        if (err) {
                pr_debug("virtio_net: registering device failed\n");
                rtnl_unlock();
                goto free_failover;
        }

        virtio_device_ready(vdev);

        _virtnet_set_queues(vi, vi->curr_queue_pairs);

        /* a random MAC address has been assigned, notify the device.
         * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
         * because many devices work fine without getting MAC explicitly
         */
        if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
            virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
                struct scatterlist sg;

                sg_init_one(&sg, dev->dev_addr, dev->addr_len);
                if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
                                          VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
                        pr_debug("virtio_net: setting MAC address failed\n");
                        rtnl_unlock();
                        err = -EINVAL;
                        goto free_unregister_netdev;
                }
        }

        rtnl_unlock();

        err = virtnet_cpu_notif_add(vi);
        if (err) {
                pr_debug("virtio_net: registering cpu notifier failed\n");
                goto free_unregister_netdev;
        }

        /* Assume link up if device can't report link status,
           otherwise get link status from config. */
        netif_carrier_off(dev);
        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
                schedule_work(&vi->config_work);
        } else {
                vi->status = VIRTIO_NET_S_LINK_UP;
                virtnet_update_settings(vi);
                netif_carrier_on(dev);
        }

        for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
                if (virtio_has_feature(vi->vdev, guest_offloads[i]))
                        set_bit(guest_offloads[i], &vi->guest_offloads);
        vi->guest_offloads_capable = vi->guest_offloads;

        pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
                 dev->name, max_queue_pairs);

        return 0;

free_unregister_netdev:
        unregister_netdev(dev);
free_failover:
        net_failover_destroy(vi->failover);
free_vqs:
        virtio_reset_device(vdev);
        cancel_delayed_work_sync(&vi->refill);
        free_receive_page_frags(vi);
        virtnet_del_vqs(vi);
free:
        free_netdev(dev);
        return err;
}

static void remove_vq_common(struct virtnet_info *vi)
{
        virtio_reset_device(vi->vdev);

        /* Free unused buffers in both send and recv, if any. */
        free_unused_bufs(vi);

        free_receive_bufs(vi);

        free_receive_page_frags(vi);

        virtnet_del_vqs(vi);
}

static void virtnet_remove(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;

        virtnet_cpu_notif_remove(vi);

        /* Make sure no work handler is accessing the device. */
        flush_work(&vi->config_work);
        disable_rx_mode_work(vi);
        flush_work(&vi->rx_mode_work);

        unregister_netdev(vi->dev);

        net_failover_destroy(vi->failover);

        remove_vq_common(vi);

        free_netdev(vi->dev);
}

static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;

        virtnet_cpu_notif_remove(vi);
        virtnet_freeze_down(vdev);
        remove_vq_common(vi);

        return 0;
}

static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
{
        struct virtnet_info *vi = vdev->priv;
        int err;

        err = virtnet_restore_up(vdev);
        if (err)
                return err;
        virtnet_set_queues(vi, vi->curr_queue_pairs);

        err = virtnet_cpu_notif_add(vi);
        if (err) {
                virtnet_freeze_down(vdev);
                remove_vq_common(vi);
                return err;
        }

        return 0;
}

static struct virtio_device_id id_table[] = {
        { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
        { 0 },
};

#define VIRTNET_FEATURES \
        VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
        VIRTIO_NET_F_MAC, \
        VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
        VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
        VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
        VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \
        VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
        VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
        VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
        VIRTIO_NET_F_CTRL_MAC_ADDR, \
        VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
        VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
        VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
        VIRTIO_NET_F_VQ_NOTF_COAL, \
        VIRTIO_NET_F_GUEST_HDRLEN

static unsigned int features[] = {
        VIRTNET_FEATURES,
};

static unsigned int features_legacy[] = {
        VIRTNET_FEATURES,
        VIRTIO_NET_F_GSO,
        VIRTIO_F_ANY_LAYOUT,
};

static struct virtio_driver virtio_net_driver = {
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
        .feature_table_legacy = features_legacy,
        .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
        .driver.name =        KBUILD_MODNAME,
        .driver.owner =        THIS_MODULE,
        .id_table =        id_table,
        .validate =        virtnet_validate,
        .probe =        virtnet_probe,
        .remove =        virtnet_remove,
        .config_changed = virtnet_config_changed,
#ifdef CONFIG_PM_SLEEP
        .freeze =        virtnet_freeze,
        .restore =        virtnet_restore,
#endif
};

static __init int virtio_net_driver_init(void)
{
        int ret;

        ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
                                      virtnet_cpu_online,
                                      virtnet_cpu_down_prep);
        if (ret < 0)
                goto out;
        virtionet_online = ret;
        ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
                                      NULL, virtnet_cpu_dead);
        if (ret)
                goto err_dead;
        ret = register_virtio_driver(&virtio_net_driver);
        if (ret)
                goto err_virtio;
        return 0;
err_virtio:
        cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
err_dead:
        cpuhp_remove_multi_state(virtionet_online);
out:
        return ret;
}
module_init(virtio_net_driver_init);

static __exit void virtio_net_driver_exit(void)
{
        unregister_virtio_driver(&virtio_net_driver);
        cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
        cpuhp_remove_multi_state(virtionet_online);
}
module_exit(virtio_net_driver_exit);

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");














































































  243 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#ifndef _LINUX_HASH_H
#define _LINUX_HASH_H
/* Fast hashing routine for ints,  longs and pointers.
   (C) 2002 Nadia Yvette Chambers, IBM */

#include <asm/types.h>
#include <linux/compiler.h>

/*
 * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
 * fs/inode.c.  It's not actually prime any more (the previous primes
 * were actively bad for hashing), but the name remains.
 */
#if BITS_PER_LONG == 32
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
#define hash_long(val, bits) hash_32(val, bits)
#elif BITS_PER_LONG == 64
#define hash_long(val, bits) hash_64(val, bits)
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
#else
#error Wordsize not 32 or 64
#endif

/*
 * This hash multiplies the input by a large odd number and takes the
 * high bits.  Since multiplication propagates changes to the most
 * significant end only, it is essential that the high bits of the
 * product be used for the hash value.
 *
 * Chuck Lever verified the effectiveness of this technique:
 * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
 *
 * Although a random odd number will do, it turns out that the golden
 * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
 * properties.  (See Knuth vol 3, section 6.4, exercise 9.)
 *
 * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
 * which is very slightly easier to multiply by and makes no
 * difference to the hash distribution.
 */
#define GOLDEN_RATIO_32 0x61C88647
#define GOLDEN_RATIO_64 0x61C8864680B583EBull

#ifdef CONFIG_HAVE_ARCH_HASH
/* This header may use the GOLDEN_RATIO_xx constants */
#include <asm/hash.h>
#endif

/*
 * The _generic versions exist only so lib/test_hash.c can compare
 * the arch-optimized versions with the generic.
 *
 * Note that if you change these, any <asm/hash.h> that aren't updated
 * to match need to have their HAVE_ARCH_* define values updated so the
 * self-test will not false-positive.
 */
#ifndef HAVE_ARCH__HASH_32
#define __hash_32 __hash_32_generic
#endif
static inline u32 __hash_32_generic(u32 val)
{
        return val * GOLDEN_RATIO_32;
}

static inline u32 hash_32(u32 val, unsigned int bits)
{
        /* High bits are more random, so use them. */
        return __hash_32(val) >> (32 - bits);
}

#ifndef HAVE_ARCH_HASH_64
#define hash_64 hash_64_generic
#endif
static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
{
#if BITS_PER_LONG == 64
        /* 64x64-bit multiply is efficient on all 64-bit processors */
        return val * GOLDEN_RATIO_64 >> (64 - bits);
#else
        /* Hash 64 bits using only 32x32-bit multiply. */
        return hash_32((u32)val ^ __hash_32(val >> 32), bits);
#endif
}

static inline u32 hash_ptr(const void *ptr, unsigned int bits)
{
        return hash_long((unsigned long)ptr, bits);
}

/* This really should be called fold32_ptr; it does no hashing to speak of. */
static inline u32 hash32_ptr(const void *ptr)
{
        unsigned long val = (unsigned long)ptr;

#if BITS_PER_LONG == 64
        val ^= (val >> 32);
#endif
        return (u32)val;
}

#endif /* _LINUX_HASH_H */










































































   21 




   10 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _KBD_KERN_H
#define _KBD_KERN_H

#include <linux/tty.h>
#include <linux/interrupt.h>
#include <linux/keyboard.h>

extern char *func_table[MAX_NR_FUNC];

/*
 * kbd->xxx contains the VC-local things (flag settings etc..)
 *
 * Note: externally visible are LED_SCR, LED_NUM, LED_CAP defined in kd.h
 *       The code in KDGETLED / KDSETLED depends on the internal and
 *       external order being the same.
 *
 * Note: lockstate is used as index in the array key_map.
 */
struct kbd_struct {

        unsigned char lockstate;
/* 8 modifiers - the names do not have any meaning at all;
   they can be associated to arbitrarily chosen keys */
#define VC_SHIFTLOCK        KG_SHIFT        /* shift lock mode */
#define VC_ALTGRLOCK        KG_ALTGR        /* altgr lock mode */
#define VC_CTRLLOCK        KG_CTRL         /* control lock mode */
#define VC_ALTLOCK        KG_ALT          /* alt lock mode */
#define VC_SHIFTLLOCK        KG_SHIFTL        /* shiftl lock mode */
#define VC_SHIFTRLOCK        KG_SHIFTR        /* shiftr lock mode */
#define VC_CTRLLLOCK        KG_CTRLL         /* ctrll lock mode */
#define VC_CTRLRLOCK        KG_CTRLR         /* ctrlr lock mode */
        unsigned char slockstate;         /* for `sticky' Shift, Ctrl, etc. */

        unsigned char ledmode:1;
#define LED_SHOW_FLAGS 0        /* traditional state */
#define LED_SHOW_IOCTL 1        /* only change leds upon ioctl */

        unsigned char ledflagstate:4;        /* flags, not lights */
        unsigned char default_ledflagstate:4;
#define VC_SCROLLOCK        0        /* scroll-lock mode */
#define VC_NUMLOCK        1        /* numeric lock mode */
#define VC_CAPSLOCK        2        /* capslock mode */
#define VC_KANALOCK        3        /* kanalock mode */

        unsigned char kbdmode:3;        /* one 3-bit value */
#define VC_XLATE        0        /* translate keycodes using keymap */
#define VC_MEDIUMRAW        1        /* medium raw (keycode) mode */
#define VC_RAW                2        /* raw (scancode) mode */
#define VC_UNICODE        3        /* Unicode mode */
#define VC_OFF                4        /* disabled mode */

        unsigned char modeflags:5;
#define VC_APPLIC        0        /* application key mode */
#define VC_CKMODE        1        /* cursor key mode */
#define VC_REPEAT        2        /* keyboard repeat */
#define VC_CRLF                3        /* 0 - enter sends CR, 1 - enter sends CRLF */
#define VC_META                4        /* 0 - meta, 1 - meta=prefix with ESC */
};

extern int kbd_init(void);

extern void setledstate(struct kbd_struct *kbd, unsigned int led);

extern int do_poke_blanked_console;

extern void (*kbd_ledfunc)(unsigned int led);

extern int set_console(int nr);
extern void schedule_console_callback(void);

static inline int vc_kbd_mode(struct kbd_struct * kbd, int flag)
{
        return ((kbd->modeflags >> flag) & 1);
}

static inline int vc_kbd_led(struct kbd_struct * kbd, int flag)
{
        return ((kbd->ledflagstate >> flag) & 1);
}

static inline void set_vc_kbd_mode(struct kbd_struct * kbd, int flag)
{
        kbd->modeflags |= 1 << flag;
}

static inline void set_vc_kbd_led(struct kbd_struct * kbd, int flag)
{
        kbd->ledflagstate |= 1 << flag;
}

static inline void clr_vc_kbd_mode(struct kbd_struct * kbd, int flag)
{
        kbd->modeflags &= ~(1 << flag);
}

static inline void clr_vc_kbd_led(struct kbd_struct * kbd, int flag)
{
        kbd->ledflagstate &= ~(1 << flag);
}

static inline void chg_vc_kbd_lock(struct kbd_struct * kbd, int flag)
{
        kbd->lockstate ^= 1 << flag;
}

static inline void chg_vc_kbd_slock(struct kbd_struct * kbd, int flag)
{
        kbd->slockstate ^= 1 << flag;
}

static inline void chg_vc_kbd_mode(struct kbd_struct * kbd, int flag)
{
        kbd->modeflags ^= 1 << flag;
}

static inline void chg_vc_kbd_led(struct kbd_struct * kbd, int flag)
{
        kbd->ledflagstate ^= 1 << flag;
}

#define U(x) ((x) ^ 0xf000)

#define BRL_UC_ROW 0x2800

/* keyboard.c */

struct console;

void vt_set_leds_compute_shiftstate(void);

/* defkeymap.c */

extern unsigned int keymap_count;

#endif











































































































































































































































































































































































































































































































    3 












































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
/*
   BlueZ - Bluetooth protocol stack for Linux
   Copyright (C) 2000-2001 Qualcomm Incorporated
   Copyright 2023 NXP

   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as
   published by the Free Software Foundation;

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
   SOFTWARE IS DISCLAIMED.
*/

#ifndef __BLUETOOTH_H
#define __BLUETOOTH_H

#include <linux/poll.h>
#include <net/sock.h>
#include <linux/seq_file.h>

#define BT_SUBSYS_VERSION        2
#define BT_SUBSYS_REVISION        22

#ifndef AF_BLUETOOTH
#define AF_BLUETOOTH        31
#define PF_BLUETOOTH        AF_BLUETOOTH
#endif

/* Bluetooth versions */
#define BLUETOOTH_VER_1_1        1
#define BLUETOOTH_VER_1_2        2
#define BLUETOOTH_VER_2_0        3
#define BLUETOOTH_VER_2_1        4
#define BLUETOOTH_VER_4_0        6

/* Reserv for core and drivers use */
#define BT_SKB_RESERVE        8

#define BTPROTO_L2CAP        0
#define BTPROTO_HCI        1
#define BTPROTO_SCO        2
#define BTPROTO_RFCOMM        3
#define BTPROTO_BNEP        4
#define BTPROTO_CMTP        5
#define BTPROTO_HIDP        6
#define BTPROTO_AVDTP        7
#define BTPROTO_ISO        8
#define BTPROTO_LAST        BTPROTO_ISO

#define SOL_HCI                0
#define SOL_L2CAP        6
#define SOL_SCO                17
#define SOL_RFCOMM        18

#define BT_SECURITY        4
struct bt_security {
        __u8 level;
        __u8 key_size;
};
#define BT_SECURITY_SDP                0
#define BT_SECURITY_LOW                1
#define BT_SECURITY_MEDIUM        2
#define BT_SECURITY_HIGH        3
#define BT_SECURITY_FIPS        4

#define BT_DEFER_SETUP        7

#define BT_FLUSHABLE        8

#define BT_FLUSHABLE_OFF        0
#define BT_FLUSHABLE_ON                1

#define BT_POWER        9
struct bt_power {
        __u8 force_active;
};
#define BT_POWER_FORCE_ACTIVE_OFF 0
#define BT_POWER_FORCE_ACTIVE_ON  1

#define BT_CHANNEL_POLICY        10

/* BR/EDR only (default policy)
 *   AMP controllers cannot be used.
 *   Channel move requests from the remote device are denied.
 *   If the L2CAP channel is currently using AMP, move the channel to BR/EDR.
 */
#define BT_CHANNEL_POLICY_BREDR_ONLY                0

/* BR/EDR Preferred
 *   Allow use of AMP controllers.
 *   If the L2CAP channel is currently on AMP, move it to BR/EDR.
 *   Channel move requests from the remote device are allowed.
 */
#define BT_CHANNEL_POLICY_BREDR_PREFERRED        1

/* AMP Preferred
 *   Allow use of AMP controllers
 *   If the L2CAP channel is currently on BR/EDR and AMP controller
 *     resources are available, initiate a channel move to AMP.
 *   Channel move requests from the remote device are allowed.
 *   If the L2CAP socket has not been connected yet, try to create
 *     and configure the channel directly on an AMP controller rather
 *     than BR/EDR.
 */
#define BT_CHANNEL_POLICY_AMP_PREFERRED                2

#define BT_VOICE                11
struct bt_voice {
        __u16 setting;
};

#define BT_VOICE_TRANSPARENT                        0x0003
#define BT_VOICE_CVSD_16BIT                        0x0060

#define BT_SNDMTU                12
#define BT_RCVMTU                13
#define BT_PHY                        14

#define BT_PHY_BR_1M_1SLOT        0x00000001
#define BT_PHY_BR_1M_3SLOT        0x00000002
#define BT_PHY_BR_1M_5SLOT        0x00000004
#define BT_PHY_EDR_2M_1SLOT        0x00000008
#define BT_PHY_EDR_2M_3SLOT        0x00000010
#define BT_PHY_EDR_2M_5SLOT        0x00000020
#define BT_PHY_EDR_3M_1SLOT        0x00000040
#define BT_PHY_EDR_3M_3SLOT        0x00000080
#define BT_PHY_EDR_3M_5SLOT        0x00000100
#define BT_PHY_LE_1M_TX                0x00000200
#define BT_PHY_LE_1M_RX                0x00000400
#define BT_PHY_LE_2M_TX                0x00000800
#define BT_PHY_LE_2M_RX                0x00001000
#define BT_PHY_LE_CODED_TX        0x00002000
#define BT_PHY_LE_CODED_RX        0x00004000

#define BT_MODE                        15

#define BT_MODE_BASIC                0x00
#define BT_MODE_ERTM                0x01
#define BT_MODE_STREAMING        0x02
#define BT_MODE_LE_FLOWCTL        0x03
#define BT_MODE_EXT_FLOWCTL        0x04

#define BT_PKT_STATUS           16

#define BT_SCM_PKT_STATUS        0x03

#define BT_ISO_QOS                17

#define BT_ISO_QOS_CIG_UNSET        0xff
#define BT_ISO_QOS_CIS_UNSET        0xff

#define BT_ISO_QOS_BIG_UNSET        0xff
#define BT_ISO_QOS_BIS_UNSET        0xff

#define BT_ISO_SYNC_TIMEOUT        0x07d0 /* 20 secs */

struct bt_iso_io_qos {
        __u32 interval;
        __u16 latency;
        __u16 sdu;
        __u8  phy;
        __u8  rtn;
};

struct bt_iso_ucast_qos {
        __u8  cig;
        __u8  cis;
        __u8  sca;
        __u8  packing;
        __u8  framing;
        struct bt_iso_io_qos in;
        struct bt_iso_io_qos out;
};

struct bt_iso_bcast_qos {
        __u8  big;
        __u8  bis;
        __u8  sync_factor;
        __u8  packing;
        __u8  framing;
        struct bt_iso_io_qos in;
        struct bt_iso_io_qos out;
        __u8  encryption;
        __u8  bcode[16];
        __u8  options;
        __u16 skip;
        __u16 sync_timeout;
        __u8  sync_cte_type;
        __u8  mse;
        __u16 timeout;
};

struct bt_iso_qos {
        union {
                struct bt_iso_ucast_qos ucast;
                struct bt_iso_bcast_qos bcast;
        };
};

#define BT_ISO_PHY_1M                0x01
#define BT_ISO_PHY_2M                0x02
#define BT_ISO_PHY_CODED        0x04
#define BT_ISO_PHY_ANY                (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \
                                 BT_ISO_PHY_CODED)

#define BT_CODEC        19

struct        bt_codec_caps {
        __u8        len;
        __u8        data[];
} __packed;

struct bt_codec {
        __u8        id;
        __u16        cid;
        __u16        vid;
        __u8        data_path;
        __u8        num_caps;
} __packed;

struct bt_codecs {
        __u8                num_codecs;
        struct bt_codec        codecs[];
} __packed;

#define BT_CODEC_CVSD                0x02
#define BT_CODEC_TRANSPARENT        0x03
#define BT_CODEC_MSBC                0x05

#define BT_ISO_BASE                20

__printf(1, 2)
void bt_info(const char *fmt, ...);
__printf(1, 2)
void bt_warn(const char *fmt, ...);
__printf(1, 2)
void bt_err(const char *fmt, ...);
#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
void bt_dbg_set(bool enable);
bool bt_dbg_get(void);
__printf(1, 2)
void bt_dbg(const char *fmt, ...);
#endif
__printf(1, 2)
void bt_warn_ratelimited(const char *fmt, ...);
__printf(1, 2)
void bt_err_ratelimited(const char *fmt, ...);

#define BT_INFO(fmt, ...)        bt_info(fmt "\n", ##__VA_ARGS__)
#define BT_WARN(fmt, ...)        bt_warn(fmt "\n", ##__VA_ARGS__)
#define BT_ERR(fmt, ...)        bt_err(fmt "\n", ##__VA_ARGS__)

#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
#define BT_DBG(fmt, ...)        bt_dbg(fmt "\n", ##__VA_ARGS__)
#else
#define BT_DBG(fmt, ...)        pr_debug(fmt "\n", ##__VA_ARGS__)
#endif

#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null")

#define bt_dev_info(hdev, fmt, ...)                                \
        BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_warn(hdev, fmt, ...)                                \
        BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err(hdev, fmt, ...)                                \
        BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_dbg(hdev, fmt, ...)                                \
        BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)

#define bt_dev_warn_ratelimited(hdev, fmt, ...)                        \
        bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
#define bt_dev_err_ratelimited(hdev, fmt, ...)                        \
        bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)

/* Connection and socket states */
enum {
        BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */
        BT_OPEN,
        BT_BOUND,
        BT_LISTEN,
        BT_CONNECT,
        BT_CONNECT2,
        BT_CONFIG,
        BT_DISCONN,
        BT_CLOSED
};

/* If unused will be removed by compiler */
static inline const char *state_to_string(int state)
{
        switch (state) {
        case BT_CONNECTED:
                return "BT_CONNECTED";
        case BT_OPEN:
                return "BT_OPEN";
        case BT_BOUND:
                return "BT_BOUND";
        case BT_LISTEN:
                return "BT_LISTEN";
        case BT_CONNECT:
                return "BT_CONNECT";
        case BT_CONNECT2:
                return "BT_CONNECT2";
        case BT_CONFIG:
                return "BT_CONFIG";
        case BT_DISCONN:
                return "BT_DISCONN";
        case BT_CLOSED:
                return "BT_CLOSED";
        }

        return "invalid state";
}

/* BD Address */
typedef struct {
        __u8 b[6];
} __packed bdaddr_t;

/* BD Address type */
#define BDADDR_BREDR                0x00
#define BDADDR_LE_PUBLIC        0x01
#define BDADDR_LE_RANDOM        0x02

static inline bool bdaddr_type_is_valid(u8 type)
{
        switch (type) {
        case BDADDR_BREDR:
        case BDADDR_LE_PUBLIC:
        case BDADDR_LE_RANDOM:
                return true;
        }

        return false;
}

static inline bool bdaddr_type_is_le(u8 type)
{
        switch (type) {
        case BDADDR_LE_PUBLIC:
        case BDADDR_LE_RANDOM:
                return true;
        }

        return false;
}

#define BDADDR_ANY  (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}})
#define BDADDR_NONE (&(bdaddr_t) {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}})

/* Copy, swap, convert BD Address */
static inline int bacmp(const bdaddr_t *ba1, const bdaddr_t *ba2)
{
        return memcmp(ba1, ba2, sizeof(bdaddr_t));
}
static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src)
{
        memcpy(dst, src, sizeof(bdaddr_t));
}

void baswap(bdaddr_t *dst, const bdaddr_t *src);

/* Common socket structures and functions */

#define bt_sk(__sk) ((struct bt_sock *) __sk)

struct bt_sock {
        struct sock sk;
        struct list_head accept_q;
        struct sock *parent;
        unsigned long flags;
        void (*skb_msg_name)(struct sk_buff *, void *, int *);
        void (*skb_put_cmsg)(struct sk_buff *, struct msghdr *, struct sock *);
};

enum {
        BT_SK_DEFER_SETUP,
        BT_SK_SUSPEND,
        BT_SK_PKT_STATUS
};

struct bt_sock_list {
        struct hlist_head head;
        rwlock_t          lock;
#ifdef CONFIG_PROC_FS
        int (* custom_seq_show)(struct seq_file *, void *);
#endif
};

int  bt_sock_register(int proto, const struct net_proto_family *ops);
void bt_sock_unregister(int proto);
void bt_sock_link(struct bt_sock_list *l, struct sock *s);
void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
                           struct proto *prot, int proto, gfp_t prio, int kern);
int  bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                     int flags);
int  bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                            size_t len, int flags);
__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
int  bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags);

void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh);
void bt_accept_unlink(struct sock *sk);
struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);

/* Skb helpers */
struct l2cap_ctrl {
        u8        sframe:1,
                poll:1,
                final:1,
                fcs:1,
                sar:2,
                super:2;

        u16        reqseq;
        u16        txseq;
        u8        retries;
        __le16  psm;
        bdaddr_t bdaddr;
        struct l2cap_chan *chan;
};

struct hci_dev;

typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
                                       u16 opcode, struct sk_buff *skb);

#define HCI_REQ_START        BIT(0)
#define HCI_REQ_SKB        BIT(1)

struct hci_ctrl {
        struct sock *sk;
        u16 opcode;
        u8 req_flags;
        u8 req_event;
        union {
                hci_req_complete_t req_complete;
                hci_req_complete_skb_t req_complete_skb;
        };
};

struct mgmt_ctrl {
        struct hci_dev *hdev;
        u16 opcode;
};

struct bt_skb_cb {
        u8 pkt_type;
        u8 force_active;
        u16 expect;
        u8 incoming:1;
        u8 pkt_status:2;
        union {
                struct l2cap_ctrl l2cap;
                struct hci_ctrl hci;
                struct mgmt_ctrl mgmt;
                struct scm_creds creds;
        };
};
#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))

#define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
#define hci_skb_pkt_status(skb) bt_cb((skb))->pkt_status
#define hci_skb_expect(skb) bt_cb((skb))->expect
#define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
#define hci_skb_event(skb) bt_cb((skb))->hci.req_event
#define hci_skb_sk(skb) bt_cb((skb))->hci.sk

static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
{
        struct sk_buff *skb;

        skb = alloc_skb(len + BT_SKB_RESERVE, how);
        if (skb)
                skb_reserve(skb, BT_SKB_RESERVE);
        return skb;
}

static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk,
                                        unsigned long len, int nb, int *err)
{
        struct sk_buff *skb;

        skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err);
        if (skb)
                skb_reserve(skb, BT_SKB_RESERVE);

        if (!skb && *err)
                return NULL;

        *err = sock_error(sk);
        if (*err)
                goto out;

        if (sk->sk_shutdown) {
                *err = -ECONNRESET;
                goto out;
        }

        return skb;

out:
        kfree_skb(skb);
        return NULL;
}

/* Shall not be called with lock_sock held */
static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
                                             struct msghdr *msg,
                                             size_t len, size_t mtu,
                                             size_t headroom, size_t tailroom)
{
        struct sk_buff *skb;
        size_t size = min_t(size_t, len, mtu);
        int err;

        skb = bt_skb_send_alloc(sk, size + headroom + tailroom,
                                msg->msg_flags & MSG_DONTWAIT, &err);
        if (!skb)
                return ERR_PTR(err);

        skb_reserve(skb, headroom);
        skb_tailroom_reserve(skb, mtu, tailroom);

        if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) {
                kfree_skb(skb);
                return ERR_PTR(-EFAULT);
        }

        skb->priority = READ_ONCE(sk->sk_priority);

        return skb;
}

/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments
 * accourding to the MTU.
 */
static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
                                              struct msghdr *msg,
                                              size_t len, size_t mtu,
                                              size_t headroom, size_t tailroom)
{
        struct sk_buff *skb, **frag;

        skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
        if (IS_ERR(skb))
                return skb;

        len -= skb->len;
        if (!len)
                return skb;

        /* Add remaining data over MTU as continuation fragments */
        frag = &skb_shinfo(skb)->frag_list;
        while (len) {
                struct sk_buff *tmp;

                tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom);
                if (IS_ERR(tmp)) {
                        return skb;
                }

                len -= tmp->len;

                *frag = tmp;
                frag = &(*frag)->next;
        }

        return skb;
}

static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
                                       sockptr_t src, size_t src_size)
{
        if (dst_size > src_size)
                return -EINVAL;

        return copy_from_sockptr(dst, src, dst_size);
}

int bt_to_errno(u16 code);
__u8 bt_status(int err);

void hci_sock_set_flag(struct sock *sk, int nr);
void hci_sock_clear_flag(struct sock *sk, int nr);
int hci_sock_test_flag(struct sock *sk, int nr);
unsigned short hci_sock_get_channel(struct sock *sk);
u32 hci_sock_get_cookie(struct sock *sk);

int hci_sock_init(void);
void hci_sock_cleanup(void);

int bt_sysfs_init(void);
void bt_sysfs_cleanup(void);

int bt_procfs_init(struct net *net, const char *name,
                   struct bt_sock_list *sk_list,
                   int (*seq_show)(struct seq_file *, void *));
void bt_procfs_cleanup(struct net *net, const char *name);

extern struct dentry *bt_debugfs;

int l2cap_init(void);
void l2cap_exit(void);

#if IS_ENABLED(CONFIG_BT_BREDR)
int sco_init(void);
void sco_exit(void);
#else
static inline int sco_init(void)
{
        return 0;
}

static inline void sco_exit(void)
{
}
#endif

#if IS_ENABLED(CONFIG_BT_LE)
int iso_init(void);
int iso_exit(void);
bool iso_enabled(void);
#else
static inline int iso_init(void)
{
        return 0;
}

static inline int iso_exit(void)
{
        return 0;
}

static inline bool iso_enabled(void)
{
        return false;
}
#endif

int mgmt_init(void);
void mgmt_exit(void);
void mgmt_cleanup(struct sock *sk);

void bt_sock_reclassify_lock(struct sock *sk, int proto);

#endif /* __BLUETOOTH_H */


























  231 
  230 
  230 
  231 
  137 
  137 




   74 


   74 














  157 
  156 








  157 
   85 





  156 



  157 





   85 





   85 
   83 
   83 




   82 
   83 





   83 



   83 






   82 











  157 



  157 






  157 





  157 

   74 







   74 















   73 








  156 


  157 
  157 




   85 

   85 


















   73 









  168 
  168 

  157 
  157 

























  167 

























































  229 



  229 







  231 


  231 

  231 

  230 











  231 




  232 

  231 
  229 
















   73 









   73 

   71 
   72 

   64 



   63 

   64 














  228 

  230 
  230 
  231 


















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
// SPDX-License-Identifier: GPL-2.0
/*
 * fs/sysfs/group.c - Operations for adding/removing multiple files at once.
 *
 * Copyright (c) 2003 Patrick Mochel
 * Copyright (c) 2003 Open Source Development Lab
 * Copyright (c) 2013 Greg Kroah-Hartman
 * Copyright (c) 2013 The Linux Foundation
 */

#include <linux/kobject.h>
#include <linux/module.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/err.h>
#include <linux/fs.h>
#include "sysfs.h"


static void remove_files(struct kernfs_node *parent,
                         const struct attribute_group *grp)
{
        struct attribute *const *attr;
        struct bin_attribute *const *bin_attr;

        if (grp->attrs)
                for (attr = grp->attrs; *attr; attr++)
                        kernfs_remove_by_name(parent, (*attr)->name);
        if (grp->bin_attrs)
                for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
                        kernfs_remove_by_name(parent, (*bin_attr)->attr.name);
}

static umode_t __first_visible(const struct attribute_group *grp, struct kobject *kobj)
{
        if (grp->attrs && grp->attrs[0] && grp->is_visible)
                return grp->is_visible(kobj, grp->attrs[0], 0);

        if (grp->bin_attrs && grp->bin_attrs[0] && grp->is_bin_visible)
                return grp->is_bin_visible(kobj, grp->bin_attrs[0], 0);

        return 0;
}

static int create_files(struct kernfs_node *parent, struct kobject *kobj,
                        kuid_t uid, kgid_t gid,
                        const struct attribute_group *grp, int update)
{
        struct attribute *const *attr;
        struct bin_attribute *const *bin_attr;
        int error = 0, i;

        if (grp->attrs) {
                for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
                        umode_t mode = (*attr)->mode;

                        /*
                         * In update mode, we're changing the permissions or
                         * visibility.  Do this by first removing then
                         * re-adding (if required) the file.
                         */
                        if (update)
                                kernfs_remove_by_name(parent, (*attr)->name);
                        if (grp->is_visible) {
                                mode = grp->is_visible(kobj, *attr, i);
                                mode &= ~SYSFS_GROUP_INVISIBLE;
                                if (!mode)
                                        continue;
                        }

                        WARN(mode & ~(SYSFS_PREALLOC | 0664),
                             "Attribute %s: Invalid permissions 0%o\n",
                             (*attr)->name, mode);

                        mode &= SYSFS_PREALLOC | 0664;
                        error = sysfs_add_file_mode_ns(parent, *attr, mode, uid,
                                                       gid, NULL);
                        if (unlikely(error))
                                break;
                }
                if (error) {
                        remove_files(parent, grp);
                        goto exit;
                }
        }

        if (grp->bin_attrs) {
                for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) {
                        umode_t mode = (*bin_attr)->attr.mode;

                        if (update)
                                kernfs_remove_by_name(parent,
                                                (*bin_attr)->attr.name);
                        if (grp->is_bin_visible) {
                                mode = grp->is_bin_visible(kobj, *bin_attr, i);
                                mode &= ~SYSFS_GROUP_INVISIBLE;
                                if (!mode)
                                        continue;
                        }

                        WARN(mode & ~(SYSFS_PREALLOC | 0664),
                             "Attribute %s: Invalid permissions 0%o\n",
                             (*bin_attr)->attr.name, mode);

                        mode &= SYSFS_PREALLOC | 0664;
                        error = sysfs_add_bin_file_mode_ns(parent, *bin_attr,
                                                           mode, uid, gid,
                                                           NULL);
                        if (error)
                                break;
                }
                if (error)
                        remove_files(parent, grp);
        }
exit:
        return error;
}


static int internal_create_group(struct kobject *kobj, int update,
                                 const struct attribute_group *grp)
{
        struct kernfs_node *kn;
        kuid_t uid;
        kgid_t gid;
        int error;

        if (WARN_ON(!kobj || (!update && !kobj->sd)))
                return -EINVAL;

        /* Updates may happen before the object has been instantiated */
        if (unlikely(update && !kobj->sd))
                return -EINVAL;

        if (!grp->attrs && !grp->bin_attrs) {
                pr_debug("sysfs: (bin_)attrs not set by subsystem for group: %s/%s, skipping\n",
                         kobj->name, grp->name ?: "");
                return 0;
        }

        kobject_get_ownership(kobj, &uid, &gid);
        if (grp->name) {
                umode_t mode = __first_visible(grp, kobj);

                if (mode & SYSFS_GROUP_INVISIBLE)
                        mode = 0;
                else
                        mode = S_IRWXU | S_IRUGO | S_IXUGO;

                if (update) {
                        kn = kernfs_find_and_get(kobj->sd, grp->name);
                        if (!kn) {
                                pr_debug("attr grp %s/%s not created yet\n",
                                         kobj->name, grp->name);
                                /* may have been invisible prior to this update */
                                update = 0;
                        } else if (!mode) {
                                sysfs_remove_group(kobj, grp);
                                kernfs_put(kn);
                                return 0;
                        }
                }

                if (!update) {
                        if (!mode)
                                return 0;
                        kn = kernfs_create_dir_ns(kobj->sd, grp->name, mode,
                                                  uid, gid, kobj, NULL);
                        if (IS_ERR(kn)) {
                                if (PTR_ERR(kn) == -EEXIST)
                                        sysfs_warn_dup(kobj->sd, grp->name);
                                return PTR_ERR(kn);
                        }
                }
        } else {
                kn = kobj->sd;
        }

        kernfs_get(kn);
        error = create_files(kn, kobj, uid, gid, grp, update);
        if (error) {
                if (grp->name)
                        kernfs_remove(kn);
        }
        kernfs_put(kn);

        if (grp->name && update)
                kernfs_put(kn);

        return error;
}

/**
 * sysfs_create_group - given a directory kobject, create an attribute group
 * @kobj:        The kobject to create the group on
 * @grp:        The attribute group to create
 *
 * This function creates a group for the first time.  It will explicitly
 * warn and error if any of the attribute files being created already exist.
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_create_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
        return internal_create_group(kobj, 0, grp);
}
EXPORT_SYMBOL_GPL(sysfs_create_group);

static int internal_create_groups(struct kobject *kobj, int update,
                                  const struct attribute_group **groups)
{
        int error = 0;
        int i;

        if (!groups)
                return 0;

        for (i = 0; groups[i]; i++) {
                error = internal_create_group(kobj, update, groups[i]);
                if (error) {
                        while (--i >= 0)
                                sysfs_remove_group(kobj, groups[i]);
                        break;
                }
        }
        return error;
}

/**
 * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups
 * @kobj:        The kobject to create the group on
 * @groups:        The attribute groups to create, NULL terminated
 *
 * This function creates a bunch of attribute groups.  If an error occurs when
 * creating a group, all previously created groups will be removed, unwinding
 * everything back to the original state when this function was called.
 * It will explicitly warn and error if any of the attribute files being
 * created already exist.
 *
 * Returns 0 on success or error code from sysfs_create_group on failure.
 */
int sysfs_create_groups(struct kobject *kobj,
                        const struct attribute_group **groups)
{
        return internal_create_groups(kobj, 0, groups);
}
EXPORT_SYMBOL_GPL(sysfs_create_groups);

/**
 * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups
 * @kobj:        The kobject to update the group on
 * @groups:        The attribute groups to update, NULL terminated
 *
 * This function update a bunch of attribute groups.  If an error occurs when
 * updating a group, all previously updated groups will be removed together
 * with already existing (not updated) attributes.
 *
 * Returns 0 on success or error code from sysfs_update_group on failure.
 */
int sysfs_update_groups(struct kobject *kobj,
                        const struct attribute_group **groups)
{
        return internal_create_groups(kobj, 1, groups);
}
EXPORT_SYMBOL_GPL(sysfs_update_groups);

/**
 * sysfs_update_group - given a directory kobject, update an attribute group
 * @kobj:        The kobject to update the group on
 * @grp:        The attribute group to update
 *
 * This function updates an attribute group.  Unlike
 * sysfs_create_group(), it will explicitly not warn or error if any
 * of the attribute files being created already exist.  Furthermore,
 * if the visibility of the files has changed through the is_visible()
 * callback, it will update the permissions and add or remove the
 * relevant files. Changing a group's name (subdirectory name under
 * kobj's directory in sysfs) is not allowed.
 *
 * The primary use for this function is to call it after making a change
 * that affects group visibility.
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_update_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
        return internal_create_group(kobj, 1, grp);
}
EXPORT_SYMBOL_GPL(sysfs_update_group);

/**
 * sysfs_remove_group: remove a group from a kobject
 * @kobj:        kobject to remove the group from
 * @grp:        group to remove
 *
 * This function removes a group of attributes from a kobject.  The attributes
 * previously have to have been created for this group, otherwise it will fail.
 */
void sysfs_remove_group(struct kobject *kobj,
                        const struct attribute_group *grp)
{
        struct kernfs_node *parent = kobj->sd;
        struct kernfs_node *kn;

        if (grp->name) {
                kn = kernfs_find_and_get(parent, grp->name);
                if (!kn) {
                        pr_debug("sysfs group '%s' not found for kobject '%s'\n",
                                 grp->name, kobject_name(kobj));
                        return;
                }
        } else {
                kn = parent;
                kernfs_get(kn);
        }

        remove_files(kn, grp);
        if (grp->name)
                kernfs_remove(kn);

        kernfs_put(kn);
}
EXPORT_SYMBOL_GPL(sysfs_remove_group);

/**
 * sysfs_remove_groups - remove a list of groups
 *
 * @kobj:        The kobject for the groups to be removed from
 * @groups:        NULL terminated list of groups to be removed
 *
 * If groups is not NULL, remove the specified groups from the kobject.
 */
void sysfs_remove_groups(struct kobject *kobj,
                         const struct attribute_group **groups)
{
        int i;

        if (!groups)
                return;
        for (i = 0; groups[i]; i++)
                sysfs_remove_group(kobj, groups[i]);
}
EXPORT_SYMBOL_GPL(sysfs_remove_groups);

/**
 * sysfs_merge_group - merge files into a pre-existing named attribute group.
 * @kobj:        The kobject containing the group.
 * @grp:        The files to create and the attribute group they belong to.
 *
 * This function returns an error if the group doesn't exist, the .name field is
 * NULL or any of the files already exist in that group, in which case none of
 * the new files are created.
 */
int sysfs_merge_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
        struct kernfs_node *parent;
        kuid_t uid;
        kgid_t gid;
        int error = 0;
        struct attribute *const *attr;
        int i;

        parent = kernfs_find_and_get(kobj->sd, grp->name);
        if (!parent)
                return -ENOENT;

        kobject_get_ownership(kobj, &uid, &gid);

        for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
                error = sysfs_add_file_mode_ns(parent, *attr, (*attr)->mode,
                                               uid, gid, NULL);
        if (error) {
                while (--i >= 0)
                        kernfs_remove_by_name(parent, (*--attr)->name);
        }
        kernfs_put(parent);

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_merge_group);

/**
 * sysfs_unmerge_group - remove files from a pre-existing named attribute group.
 * @kobj:        The kobject containing the group.
 * @grp:        The files to remove and the attribute group they belong to.
 */
void sysfs_unmerge_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
        struct kernfs_node *parent;
        struct attribute *const *attr;

        parent = kernfs_find_and_get(kobj->sd, grp->name);
        if (parent) {
                for (attr = grp->attrs; *attr; ++attr)
                        kernfs_remove_by_name(parent, (*attr)->name);
                kernfs_put(parent);
        }
}
EXPORT_SYMBOL_GPL(sysfs_unmerge_group);

/**
 * sysfs_add_link_to_group - add a symlink to an attribute group.
 * @kobj:        The kobject containing the group.
 * @group_name:        The name of the group.
 * @target:        The target kobject of the symlink to create.
 * @link_name:        The name of the symlink to create.
 */
int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
                            struct kobject *target, const char *link_name)
{
        struct kernfs_node *parent;
        int error = 0;

        parent = kernfs_find_and_get(kobj->sd, group_name);
        if (!parent)
                return -ENOENT;

        error = sysfs_create_link_sd(parent, target, link_name);
        kernfs_put(parent);

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);

/**
 * sysfs_remove_link_from_group - remove a symlink from an attribute group.
 * @kobj:        The kobject containing the group.
 * @group_name:        The name of the group.
 * @link_name:        The name of the symlink to remove.
 */
void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
                                  const char *link_name)
{
        struct kernfs_node *parent;

        parent = kernfs_find_and_get(kobj->sd, group_name);
        if (parent) {
                kernfs_remove_by_name(parent, link_name);
                kernfs_put(parent);
        }
}
EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);

/**
 * compat_only_sysfs_link_entry_to_kobj - add a symlink to a kobject pointing
 * to a group or an attribute
 * @kobj:                The kobject containing the group.
 * @target_kobj:        The target kobject.
 * @target_name:        The name of the target group or attribute.
 * @symlink_name:        The name of the symlink file (target_name will be
 *                        considered if symlink_name is NULL).
 */
int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
                                         struct kobject *target_kobj,
                                         const char *target_name,
                                         const char *symlink_name)
{
        struct kernfs_node *target;
        struct kernfs_node *entry;
        struct kernfs_node *link;

        /*
         * We don't own @target_kobj and it may be removed at any time.
         * Synchronize using sysfs_symlink_target_lock. See sysfs_remove_dir()
         * for details.
         */
        spin_lock(&sysfs_symlink_target_lock);
        target = target_kobj->sd;
        if (target)
                kernfs_get(target);
        spin_unlock(&sysfs_symlink_target_lock);
        if (!target)
                return -ENOENT;

        entry = kernfs_find_and_get(target, target_name);
        if (!entry) {
                kernfs_put(target);
                return -ENOENT;
        }

        if (!symlink_name)
                symlink_name = target_name;

        link = kernfs_create_link(kobj->sd, symlink_name, entry);
        if (PTR_ERR(link) == -EEXIST)
                sysfs_warn_dup(kobj->sd, symlink_name);

        kernfs_put(entry);
        kernfs_put(target);
        return PTR_ERR_OR_ZERO(link);
}
EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj);

static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn,
                                          const struct attribute_group *grp,
                                          struct iattr *newattrs)
{
        struct kernfs_node *kn;
        int error;

        if (grp->attrs) {
                struct attribute *const *attr;

                for (attr = grp->attrs; *attr; attr++) {
                        kn = kernfs_find_and_get(grp_kn, (*attr)->name);
                        if (!kn)
                                return -ENOENT;

                        error = kernfs_setattr(kn, newattrs);
                        kernfs_put(kn);
                        if (error)
                                return error;
                }
        }

        if (grp->bin_attrs) {
                struct bin_attribute *const *bin_attr;

                for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
                        kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name);
                        if (!kn)
                                return -ENOENT;

                        error = kernfs_setattr(kn, newattrs);
                        kernfs_put(kn);
                        if (error)
                                return error;
                }
        }

        return 0;
}

/**
 * sysfs_group_change_owner - change owner of an attribute group.
 * @kobj:        The kobject containing the group.
 * @grp:        The attribute group.
 * @kuid:        new owner's kuid
 * @kgid:        new owner's kgid
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_group_change_owner(struct kobject *kobj,
                             const struct attribute_group *grp, kuid_t kuid,
                             kgid_t kgid)
{
        struct kernfs_node *grp_kn;
        int error;
        struct iattr newattrs = {
                .ia_valid = ATTR_UID | ATTR_GID,
                .ia_uid = kuid,
                .ia_gid = kgid,
        };

        if (!kobj->state_in_sysfs)
                return -EINVAL;

        if (grp->name) {
                grp_kn = kernfs_find_and_get(kobj->sd, grp->name);
        } else {
                kernfs_get(kobj->sd);
                grp_kn = kobj->sd;
        }
        if (!grp_kn)
                return -ENOENT;

        error = kernfs_setattr(grp_kn, &newattrs);
        if (!error)
                error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs);

        kernfs_put(grp_kn);

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_group_change_owner);

/**
 * sysfs_groups_change_owner - change owner of a set of attribute groups.
 * @kobj:        The kobject containing the groups.
 * @groups:        The attribute groups.
 * @kuid:        new owner's kuid
 * @kgid:        new owner's kgid
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_groups_change_owner(struct kobject *kobj,
                              const struct attribute_group **groups,
                              kuid_t kuid, kgid_t kgid)
{
        int error = 0, i;

        if (!kobj->state_in_sysfs)
                return -EINVAL;

        if (!groups)
                return 0;

        for (i = 0; groups[i]; i++) {
                error = sysfs_group_change_owner(kobj, groups[i], kuid, kgid);
                if (error)
                        break;
        }

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_groups_change_owner);












































































































































































































    1 



































































































































































































    1 










































































































































































    1 


    1 















    1 

    1 



    1 






    1 











































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
/*
 * Parallel-port resource manager code.
 *
 * Authors: David Campbell <campbell@tirian.che.curtin.edu.au>
 *          Tim Waugh <tim@cyberelk.demon.co.uk>
 *          Jose Renau <renau@acm.org>
 *          Philip Blundell <philb@gnu.org>
 *            Andrea Arcangeli
 *
 * based on work by Grant Guenther <grant@torque.net>
 *          and Philip Blundell
 *
 * Any part of this program may be used in documents licensed under
 * the GNU Free Documentation License, Version 1.1 or any later version
 * published by the Free Software Foundation.
 */

#undef PARPORT_DEBUG_SHARING                /* undef for production */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/threads.h>
#include <linux/parport.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/kmod.h>
#include <linux/device.h>

#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <asm/irq.h>

#undef PARPORT_PARANOID

#define PARPORT_DEFAULT_TIMESLICE        (HZ/5)

unsigned long parport_default_timeslice = PARPORT_DEFAULT_TIMESLICE;
int parport_default_spintime =  DEFAULT_SPIN_TIME;

static LIST_HEAD(portlist);
static DEFINE_SPINLOCK(parportlist_lock);

/* list of all allocated ports, sorted by ->number */
static LIST_HEAD(all_ports);
static DEFINE_SPINLOCK(full_list_lock);

static LIST_HEAD(drivers);

static DEFINE_MUTEX(registration_lock);

/* What you can do to a port that's gone away.. */
static void dead_write_lines(struct parport *p, unsigned char b){}
static unsigned char dead_read_lines(struct parport *p) { return 0; }
static unsigned char dead_frob_lines(struct parport *p, unsigned char b,
                             unsigned char c) { return 0; }
static void dead_onearg(struct parport *p){}
static void dead_initstate(struct pardevice *d, struct parport_state *s) { }
static void dead_state(struct parport *p, struct parport_state *s) { }
static size_t dead_write(struct parport *p, const void *b, size_t l, int f)
{ return 0; }
static size_t dead_read(struct parport *p, void *b, size_t l, int f)
{ return 0; }
static struct parport_operations dead_ops = {
        .write_data        = dead_write_lines,        /* data */
        .read_data        = dead_read_lines,

        .write_control        = dead_write_lines,        /* control */
        .read_control        = dead_read_lines,
        .frob_control        = dead_frob_lines,

        .read_status        = dead_read_lines,        /* status */

        .enable_irq        = dead_onearg,                /* enable_irq */
        .disable_irq        = dead_onearg,                /* disable_irq */

        .data_forward        = dead_onearg,                /* data_forward */
        .data_reverse        = dead_onearg,                /* data_reverse */

        .init_state        = dead_initstate,        /* init_state */
        .save_state        = dead_state,
        .restore_state        = dead_state,

        .epp_write_data        = dead_write,                /* epp */
        .epp_read_data        = dead_read,
        .epp_write_addr        = dead_write,
        .epp_read_addr        = dead_read,

        .ecp_write_data        = dead_write,                /* ecp */
        .ecp_read_data        = dead_read,
        .ecp_write_addr        = dead_write,

        .compat_write_data        = dead_write,        /* compat */
        .nibble_read_data        = dead_read,        /* nibble */
        .byte_read_data                = dead_read,        /* byte */

        .owner                = NULL,
};

static struct device_type parport_device_type = {
        .name = "parport",
};

static int is_parport(struct device *dev)
{
        return dev->type == &parport_device_type;
}

static int parport_probe(struct device *dev)
{
        struct parport_driver *drv;

        if (is_parport(dev))
                return -ENODEV;

        drv = to_parport_driver(dev->driver);
        if (!drv->probe) {
                /* if driver has not defined a custom probe */
                struct pardevice *par_dev = to_pardevice(dev);

                if (strcmp(par_dev->name, drv->name))
                        return -ENODEV;
                return 0;
        }
        /* if driver defined its own probe */
        return drv->probe(to_pardevice(dev));
}

static struct bus_type parport_bus_type = {
        .name = "parport",
        .probe = parport_probe,
};

int parport_bus_init(void)
{
        return bus_register(&parport_bus_type);
}

void parport_bus_exit(void)
{
        bus_unregister(&parport_bus_type);
}

/*
 * iterates through all the drivers registered with the bus and sends the port
 * details to the match_port callback of the driver, so that the driver can
 * know about the new port that just registered with the bus and decide if it
 * wants to use this new port.
 */
static int driver_check(struct device_driver *dev_drv, void *_port)
{
        struct parport *port = _port;
        struct parport_driver *drv = to_parport_driver(dev_drv);

        if (drv->match_port)
                drv->match_port(port);
        return 0;
}

/* Call attach(port) for each registered driver. */
static void attach_driver_chain(struct parport *port)
{
        /* caller has exclusive registration_lock */
        struct parport_driver *drv;

        list_for_each_entry(drv, &drivers, list)
                drv->attach(port);

        /*
         * call the driver_check function of the drivers registered in
         * new device model
         */

        bus_for_each_drv(&parport_bus_type, NULL, port, driver_check);
}

static int driver_detach(struct device_driver *_drv, void *_port)
{
        struct parport *port = _port;
        struct parport_driver *drv = to_parport_driver(_drv);

        if (drv->detach)
                drv->detach(port);
        return 0;
}

/* Call detach(port) for each registered driver. */
static void detach_driver_chain(struct parport *port)
{
        struct parport_driver *drv;
        /* caller has exclusive registration_lock */
        list_for_each_entry(drv, &drivers, list)
                drv->detach(port);

        /*
         * call the detach function of the drivers registered in
         * new device model
         */

        bus_for_each_drv(&parport_bus_type, NULL, port, driver_detach);
}

/* Ask kmod for some lowlevel drivers. */
static void get_lowlevel_driver(void)
{
        /*
         * There is no actual module called this: you should set
         * up an alias for modutils.
         */
        request_module("parport_lowlevel");
}

/*
 * iterates through all the devices connected to the bus and sends the device
 * details to the match_port callback of the driver, so that the driver can
 * know what are all the ports that are connected to the bus and choose the
 * port to which it wants to register its device.
 */
static int port_check(struct device *dev, void *dev_drv)
{
        struct parport_driver *drv = dev_drv;

        /* only send ports, do not send other devices connected to bus */
        if (is_parport(dev))
                drv->match_port(to_parport_dev(dev));
        return 0;
}

/*
 * Iterates through all the devices connected to the bus and return 1
 * if the device is a parallel port.
 */

static int port_detect(struct device *dev, void *dev_drv)
{
        if (is_parport(dev))
                return 1;
        return 0;
}

/**
 *        __parport_register_driver - register a parallel port device driver
 *        @drv: structure describing the driver
 *        @owner: owner module of drv
 *        @mod_name: module name string
 *
 *        This can be called by a parallel port device driver in order
 *        to receive notifications about ports being found in the
 *        system, as well as ports no longer available.
 *
 *        If devmodel is true then the new device model is used
 *        for registration.
 *
 *        The @drv structure is allocated by the caller and must not be
 *        deallocated until after calling parport_unregister_driver().
 *
 *        If using the non device model:
 *        The driver's attach() function may block.  The port that
 *        attach() is given will be valid for the duration of the
 *        callback, but if the driver wants to take a copy of the
 *        pointer it must call parport_get_port() to do so.  Calling
 *        parport_register_device() on that port will do this for you.
 *
 *        The driver's detach() function may block.  The port that
 *        detach() is given will be valid for the duration of the
 *        callback, but if the driver wants to take a copy of the
 *        pointer it must call parport_get_port() to do so.
 *
 *
 *        Returns 0 on success. The non device model will always succeeds.
 *        but the new device model can fail and will return the error code.
 **/

int __parport_register_driver(struct parport_driver *drv, struct module *owner,
                              const char *mod_name)
{
        /* using device model */
        int ret;

        /* initialize common driver fields */
        drv->driver.name = drv->name;
        drv->driver.bus = &parport_bus_type;
        drv->driver.owner = owner;
        drv->driver.mod_name = mod_name;
        ret = driver_register(&drv->driver);
        if (ret)
                return ret;

        /*
         * check if bus has any parallel port registered, if
         * none is found then load the lowlevel driver.
         */
        ret = bus_for_each_dev(&parport_bus_type, NULL, NULL,
                               port_detect);
        if (!ret)
                get_lowlevel_driver();

        mutex_lock(&registration_lock);
        if (drv->match_port)
                bus_for_each_dev(&parport_bus_type, NULL, drv,
                                 port_check);
        mutex_unlock(&registration_lock);

        return 0;
}
EXPORT_SYMBOL(__parport_register_driver);

static int port_detach(struct device *dev, void *_drv)
{
        struct parport_driver *drv = _drv;

        if (is_parport(dev) && drv->detach)
                drv->detach(to_parport_dev(dev));

        return 0;
}

/**
 *        parport_unregister_driver - deregister a parallel port device driver
 *        @drv: structure describing the driver that was given to
 *              parport_register_driver()
 *
 *        This should be called by a parallel port device driver that
 *        has registered itself using parport_register_driver() when it
 *        is about to be unloaded.
 *
 *        When it returns, the driver's attach() routine will no longer
 *        be called, and for each port that attach() was called for, the
 *        detach() routine will have been called.
 *
 *        All the driver's attach() and detach() calls are guaranteed to have
 *        finished by the time this function returns.
 **/

void parport_unregister_driver(struct parport_driver *drv)
{
        mutex_lock(&registration_lock);
        bus_for_each_dev(&parport_bus_type, NULL, drv, port_detach);
        driver_unregister(&drv->driver);
        mutex_unlock(&registration_lock);
}
EXPORT_SYMBOL(parport_unregister_driver);

static void free_port(struct device *dev)
{
        int d;
        struct parport *port = to_parport_dev(dev);

        spin_lock(&full_list_lock);
        list_del(&port->full_list);
        spin_unlock(&full_list_lock);
        for (d = 0; d < 5; d++) {
                kfree(port->probe_info[d].class_name);
                kfree(port->probe_info[d].mfr);
                kfree(port->probe_info[d].model);
                kfree(port->probe_info[d].cmdset);
                kfree(port->probe_info[d].description);
        }

        kfree(port);
}

/**
 *        parport_get_port - increment a port's reference count
 *        @port: the port
 *
 *        This ensures that a struct parport pointer remains valid
 *        until the matching parport_put_port() call.
 **/

struct parport *parport_get_port(struct parport *port)
{
        struct device *dev = get_device(&port->bus_dev);

        return to_parport_dev(dev);
}
EXPORT_SYMBOL(parport_get_port);

void parport_del_port(struct parport *port)
{
        device_unregister(&port->bus_dev);
}
EXPORT_SYMBOL(parport_del_port);

/**
 *        parport_put_port - decrement a port's reference count
 *        @port: the port
 *
 *        This should be called once for each call to parport_get_port(),
 *        once the port is no longer needed. When the reference count reaches
 *        zero (port is no longer used), free_port is called.
 **/

void parport_put_port(struct parport *port)
{
        put_device(&port->bus_dev);
}
EXPORT_SYMBOL(parport_put_port);

/**
 *        parport_register_port - register a parallel port
 *        @base: base I/O address
 *        @irq: IRQ line
 *        @dma: DMA channel
 *        @ops: pointer to the port driver's port operations structure
 *
 *        When a parallel port (lowlevel) driver finds a port that
 *        should be made available to parallel port device drivers, it
 *        should call parport_register_port().  The @base, @irq, and
 *        @dma parameters are for the convenience of port drivers, and
 *        for ports where they aren't meaningful needn't be set to
 *        anything special.  They can be altered afterwards by adjusting
 *        the relevant members of the parport structure that is returned
 *        and represents the port.  They should not be tampered with
 *        after calling parport_announce_port, however.
 *
 *        If there are parallel port device drivers in the system that
 *        have registered themselves using parport_register_driver(),
 *        they are not told about the port at this time; that is done by
 *        parport_announce_port().
 *
 *        The @ops structure is allocated by the caller, and must not be
 *        deallocated before calling parport_remove_port().
 *
 *        If there is no memory to allocate a new parport structure,
 *        this function will return %NULL.
 **/

struct parport *parport_register_port(unsigned long base, int irq, int dma,
                                      struct parport_operations *ops)
{
        struct list_head *l;
        struct parport *tmp;
        int num;
        int device;
        int ret;

        tmp = kzalloc(sizeof(struct parport), GFP_KERNEL);
        if (!tmp)
                return NULL;

        /* Init our structure */
        tmp->base = base;
        tmp->irq = irq;
        tmp->dma = dma;
        tmp->muxport = tmp->daisy = tmp->muxsel = -1;
        INIT_LIST_HEAD(&tmp->list);
        tmp->ops = ops;
        tmp->physport = tmp;
        rwlock_init(&tmp->cad_lock);
        spin_lock_init(&tmp->waitlist_lock);
        spin_lock_init(&tmp->pardevice_lock);
        tmp->ieee1284.mode = IEEE1284_MODE_COMPAT;
        tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
        sema_init(&tmp->ieee1284.irq, 0);
        tmp->spintime = parport_default_spintime;
        atomic_set(&tmp->ref_count, 1);

        /* Search for the lowest free parport number. */

        spin_lock(&full_list_lock);
        num = 0;
        list_for_each(l, &all_ports) {
                struct parport *p = list_entry(l, struct parport, full_list);

                if (p->number != num++)
                        break;
        }
        tmp->portnum = tmp->number = num;
        list_add_tail(&tmp->full_list, l);
        spin_unlock(&full_list_lock);

        /*
         * Now that the portnum is known finish doing the Init.
         */
        dev_set_name(&tmp->bus_dev, "parport%d", tmp->portnum);
        tmp->bus_dev.bus = &parport_bus_type;
        tmp->bus_dev.release = free_port;
        tmp->bus_dev.type = &parport_device_type;

        tmp->name = dev_name(&tmp->bus_dev);

        for (device = 0; device < 5; device++)
                /* assume the worst */
                tmp->probe_info[device].class = PARPORT_CLASS_LEGACY;

        ret = device_register(&tmp->bus_dev);
        if (ret) {
                put_device(&tmp->bus_dev);
                return NULL;
        }

        return tmp;
}
EXPORT_SYMBOL(parport_register_port);

/**
 *        parport_announce_port - tell device drivers about a parallel port
 *        @port: parallel port to announce
 *
 *        After a port driver has registered a parallel port with
 *        parport_register_port, and performed any necessary
 *        initialisation or adjustments, it should call
 *        parport_announce_port() in order to notify all device drivers
 *        that have called parport_register_driver().  Their attach()
 *        functions will be called, with @port as the parameter.
 **/

void parport_announce_port(struct parport *port)
{
        int i;

#ifdef CONFIG_PARPORT_1284
        /* Analyse the IEEE1284.3 topology of the port. */
        parport_daisy_init(port);
#endif

        if (!port->dev)
                pr_warn("%s: fix this legacy no-device port driver!\n",
                        port->name);

        parport_proc_register(port);
        mutex_lock(&registration_lock);
        spin_lock_irq(&parportlist_lock);
        list_add_tail(&port->list, &portlist);
        for (i = 1; i < 3; i++) {
                struct parport *slave = port->slaves[i-1];
                if (slave)
                        list_add_tail(&slave->list, &portlist);
        }
        spin_unlock_irq(&parportlist_lock);

        /* Let drivers know that new port(s) has arrived. */
        attach_driver_chain(port);
        for (i = 1; i < 3; i++) {
                struct parport *slave = port->slaves[i-1];
                if (slave)
                        attach_driver_chain(slave);
        }
        mutex_unlock(&registration_lock);
}
EXPORT_SYMBOL(parport_announce_port);

/**
 *        parport_remove_port - deregister a parallel port
 *        @port: parallel port to deregister
 *
 *        When a parallel port driver is forcibly unloaded, or a
 *        parallel port becomes inaccessible, the port driver must call
 *        this function in order to deal with device drivers that still
 *        want to use it.
 *
 *        The parport structure associated with the port has its
 *        operations structure replaced with one containing 'null'
 *        operations that return errors or just don't do anything.
 *
 *        Any drivers that have registered themselves using
 *        parport_register_driver() are notified that the port is no
 *        longer accessible by having their detach() routines called
 *        with @port as the parameter.
 **/

void parport_remove_port(struct parport *port)
{
        int i;

        mutex_lock(&registration_lock);

        /* Spread the word. */
        detach_driver_chain(port);

#ifdef CONFIG_PARPORT_1284
        /* Forget the IEEE1284.3 topology of the port. */
        parport_daisy_fini(port);
        for (i = 1; i < 3; i++) {
                struct parport *slave = port->slaves[i-1];
                if (!slave)
                        continue;
                detach_driver_chain(slave);
                parport_daisy_fini(slave);
        }
#endif

        port->ops = &dead_ops;
        spin_lock(&parportlist_lock);
        list_del_init(&port->list);
        for (i = 1; i < 3; i++) {
                struct parport *slave = port->slaves[i-1];
                if (slave)
                        list_del_init(&slave->list);
        }
        spin_unlock(&parportlist_lock);

        mutex_unlock(&registration_lock);

        parport_proc_unregister(port);

        for (i = 1; i < 3; i++) {
                struct parport *slave = port->slaves[i-1];
                if (slave)
                        parport_put_port(slave);
        }
}
EXPORT_SYMBOL(parport_remove_port);

static void free_pardevice(struct device *dev)
{
        struct pardevice *par_dev = to_pardevice(dev);

        kfree_const(par_dev->name);
        kfree(par_dev);
}

/**
 *        parport_register_dev_model - register a device on a parallel port
 *        @port: port to which the device is attached
 *        @name: a name to refer to the device
 *        @par_dev_cb: struct containing callbacks
 *        @id: device number to be given to the device
 *
 *        This function, called by parallel port device drivers,
 *        declares that a device is connected to a port, and tells the
 *        system all it needs to know.
 *
 *        The struct pardev_cb contains pointer to callbacks. preemption
 *        callback function, @preempt, is called when this device driver
 *        has claimed access to the port but another device driver wants
 *        to use it.  It is given, @private, as its parameter, and should
 *        return zero if it is willing for the system to release the port
 *        to another driver on its behalf. If it wants to keep control of
 *        the port it should return non-zero, and no action will be taken.
 *        It is good manners for the driver to try to release the port at
 *        the earliest opportunity after its preemption callback rejects a
 *        preemption attempt. Note that if a preemption callback is happy
 *        for preemption to go ahead, there is no need to release the
 *        port; it is done automatically. This function may not block, as
 *        it may be called from interrupt context. If the device driver
 *        does not support preemption, @preempt can be %NULL.
 *
 *        The wake-up ("kick") callback function, @wakeup, is called when
 *        the port is available to be claimed for exclusive access; that
 *        is, parport_claim() is guaranteed to succeed when called from
 *        inside the wake-up callback function.  If the driver wants to
 *        claim the port it should do so; otherwise, it need not take
 *        any action.  This function may not block, as it may be called
 *        from interrupt context.  If the device driver does not want to
 *        be explicitly invited to claim the port in this way, @wakeup can
 *        be %NULL.
 *
 *        The interrupt handler, @irq_func, is called when an interrupt
 *        arrives from the parallel port.  Note that if a device driver
 *        wants to use interrupts it should use parport_enable_irq(),
 *        and can also check the irq member of the parport structure
 *        representing the port.
 *
 *        The parallel port (lowlevel) driver is the one that has called
 *        request_irq() and whose interrupt handler is called first.
 *        This handler does whatever needs to be done to the hardware to
 *        acknowledge the interrupt (for PC-style ports there is nothing
 *        special to be done).  It then tells the IEEE 1284 code about
 *        the interrupt, which may involve reacting to an IEEE 1284
 *        event depending on the current IEEE 1284 phase.  After this,
 *        it calls @irq_func.  Needless to say, @irq_func will be called
 *        from interrupt context, and may not block.
 *
 *        The %PARPORT_DEV_EXCL flag is for preventing port sharing, and
 *        so should only be used when sharing the port with other device
 *        drivers is impossible and would lead to incorrect behaviour.
 *        Use it sparingly!  Normally, @flags will be zero.
 *
 *        This function returns a pointer to a structure that represents
 *        the device on the port, or %NULL if there is not enough memory
 *        to allocate space for that structure.
 **/

struct pardevice *
parport_register_dev_model(struct parport *port, const char *name,
                           const struct pardev_cb *par_dev_cb, int id)
{
        struct pardevice *par_dev;
        const char *devname;
        int ret;

        if (port->physport->flags & PARPORT_FLAG_EXCL) {
                /* An exclusive device is registered. */
                pr_err("%s: no more devices allowed\n", port->name);
                return NULL;
        }

        if (par_dev_cb->flags & PARPORT_DEV_LURK) {
                if (!par_dev_cb->preempt || !par_dev_cb->wakeup) {
                        pr_info("%s: refused to register lurking device (%s) without callbacks\n",
                                port->name, name);
                        return NULL;
                }
        }

        if (par_dev_cb->flags & PARPORT_DEV_EXCL) {
                if (port->physport->devices) {
                        /*
                         * If a device is already registered and this new
                         * device wants exclusive access, then no need to
                         * continue as we can not grant exclusive access to
                         * this device.
                         */
                        pr_err("%s: cannot grant exclusive access for device %s\n",
                               port->name, name);
                        return NULL;
                }
        }

        if (!try_module_get(port->ops->owner))
                return NULL;

        parport_get_port(port);

        par_dev = kzalloc(sizeof(*par_dev), GFP_KERNEL);
        if (!par_dev)
                goto err_put_port;

        par_dev->state = kzalloc(sizeof(*par_dev->state), GFP_KERNEL);
        if (!par_dev->state)
                goto err_put_par_dev;

        devname = kstrdup_const(name, GFP_KERNEL);
        if (!devname)
                goto err_free_par_dev;

        par_dev->name = devname;
        par_dev->port = port;
        par_dev->daisy = -1;
        par_dev->preempt = par_dev_cb->preempt;
        par_dev->wakeup = par_dev_cb->wakeup;
        par_dev->private = par_dev_cb->private;
        par_dev->flags = par_dev_cb->flags;
        par_dev->irq_func = par_dev_cb->irq_func;
        par_dev->waiting = 0;
        par_dev->timeout = 5 * HZ;

        par_dev->dev.parent = &port->bus_dev;
        par_dev->dev.bus = &parport_bus_type;
        ret = dev_set_name(&par_dev->dev, "%s.%d", devname, id);
        if (ret)
                goto err_free_devname;
        par_dev->dev.release = free_pardevice;
        par_dev->devmodel = true;
        ret = device_register(&par_dev->dev);
        if (ret) {
                kfree(par_dev->state);
                put_device(&par_dev->dev);
                goto err_put_port;
        }

        /* Chain this onto the list */
        par_dev->prev = NULL;
        /*
         * This function must not run from an irq handler so we don' t need
         * to clear irq on the local CPU. -arca
         */
        spin_lock(&port->physport->pardevice_lock);

        if (par_dev_cb->flags & PARPORT_DEV_EXCL) {
                if (port->physport->devices) {
                        spin_unlock(&port->physport->pardevice_lock);
                        pr_debug("%s: cannot grant exclusive access for device %s\n",
                                 port->name, name);
                        kfree(par_dev->state);
                        device_unregister(&par_dev->dev);
                        goto err_put_port;
                }
                port->flags |= PARPORT_FLAG_EXCL;
        }

        par_dev->next = port->physport->devices;
        wmb();        /*
                 * Make sure that tmp->next is written before it's
                 * added to the list; see comments marked 'no locking
                 * required'
                 */
        if (port->physport->devices)
                port->physport->devices->prev = par_dev;
        port->physport->devices = par_dev;
        spin_unlock(&port->physport->pardevice_lock);

        init_waitqueue_head(&par_dev->wait_q);
        par_dev->timeslice = parport_default_timeslice;
        par_dev->waitnext = NULL;
        par_dev->waitprev = NULL;

        /*
         * This has to be run as last thing since init_state may need other
         * pardevice fields. -arca
         */
        port->ops->init_state(par_dev, par_dev->state);
        if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
                port->proc_device = par_dev;
                parport_device_proc_register(par_dev);
        }

        return par_dev;

err_free_devname:
        kfree_const(devname);
err_free_par_dev:
        kfree(par_dev->state);
err_put_par_dev:
        if (!par_dev->devmodel)
                kfree(par_dev);
err_put_port:
        parport_put_port(port);
        module_put(port->ops->owner);

        return NULL;
}
EXPORT_SYMBOL(parport_register_dev_model);

/**
 *        parport_unregister_device - deregister a device on a parallel port
 *        @dev: pointer to structure representing device
 *
 *        This undoes the effect of parport_register_device().
 **/

void parport_unregister_device(struct pardevice *dev)
{
        struct parport *port;

#ifdef PARPORT_PARANOID
        if (!dev) {
                pr_err("%s: passed NULL\n", __func__);
                return;
        }
#endif

        port = dev->port->physport;

        if (port->proc_device == dev) {
                port->proc_device = NULL;
                clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags);
                parport_device_proc_unregister(dev);
        }

        if (port->cad == dev) {
                printk(KERN_DEBUG "%s: %s forgot to release port\n",
                       port->name, dev->name);
                parport_release(dev);
        }

        spin_lock(&port->pardevice_lock);
        if (dev->next)
                dev->next->prev = dev->prev;
        if (dev->prev)
                dev->prev->next = dev->next;
        else
                port->devices = dev->next;

        if (dev->flags & PARPORT_DEV_EXCL)
                port->flags &= ~PARPORT_FLAG_EXCL;

        spin_unlock(&port->pardevice_lock);

        /*
         * Make sure we haven't left any pointers around in the wait
         * list.
         */
        spin_lock_irq(&port->waitlist_lock);
        if (dev->waitprev || dev->waitnext || port->waithead == dev) {
                if (dev->waitprev)
                        dev->waitprev->waitnext = dev->waitnext;
                else
                        port->waithead = dev->waitnext;
                if (dev->waitnext)
                        dev->waitnext->waitprev = dev->waitprev;
                else
                        port->waittail = dev->waitprev;
        }
        spin_unlock_irq(&port->waitlist_lock);

        kfree(dev->state);
        device_unregister(&dev->dev);

        module_put(port->ops->owner);
        parport_put_port(port);
}
EXPORT_SYMBOL(parport_unregister_device);

/**
 *        parport_find_number - find a parallel port by number
 *        @number: parallel port number
 *
 *        This returns the parallel port with the specified number, or
 *        %NULL if there is none.
 *
 *        There is an implicit parport_get_port() done already; to throw
 *        away the reference to the port that parport_find_number()
 *        gives you, use parport_put_port().
 */

struct parport *parport_find_number(int number)
{
        struct parport *port, *result = NULL;

        if (list_empty(&portlist))
                get_lowlevel_driver();

        spin_lock(&parportlist_lock);
        list_for_each_entry(port, &portlist, list) {
                if (port->number == number) {
                        result = parport_get_port(port);
                        break;
                }
        }
        spin_unlock(&parportlist_lock);
        return result;
}
EXPORT_SYMBOL(parport_find_number);

/**
 *        parport_find_base - find a parallel port by base address
 *        @base: base I/O address
 *
 *        This returns the parallel port with the specified base
 *        address, or %NULL if there is none.
 *
 *        There is an implicit parport_get_port() done already; to throw
 *        away the reference to the port that parport_find_base()
 *        gives you, use parport_put_port().
 */

struct parport *parport_find_base(unsigned long base)
{
        struct parport *port, *result = NULL;

        if (list_empty(&portlist))
                get_lowlevel_driver();

        spin_lock(&parportlist_lock);
        list_for_each_entry(port, &portlist, list) {
                if (port->base == base) {
                        result = parport_get_port(port);
                        break;
                }
        }
        spin_unlock(&parportlist_lock);
        return result;
}
EXPORT_SYMBOL(parport_find_base);

/**
 *        parport_claim - claim access to a parallel port device
 *        @dev: pointer to structure representing a device on the port
 *
 *        This function will not block and so can be used from interrupt
 *        context.  If parport_claim() succeeds in claiming access to
 *        the port it returns zero and the port is available to use.  It
 *        may fail (returning non-zero) if the port is in use by another
 *        driver and that driver is not willing to relinquish control of
 *        the port.
 **/

int parport_claim(struct pardevice *dev)
{
        struct pardevice *oldcad;
        struct parport *port = dev->port->physport;
        unsigned long flags;

        if (port->cad == dev) {
                pr_info("%s: %s already owner\n", dev->port->name, dev->name);
                return 0;
        }

        /* Preempt any current device */
        write_lock_irqsave(&port->cad_lock, flags);
        oldcad = port->cad;
        if (oldcad) {
                if (oldcad->preempt) {
                        if (oldcad->preempt(oldcad->private))
                                goto blocked;
                        port->ops->save_state(port, dev->state);
                } else
                        goto blocked;

                if (port->cad != oldcad) {
                        /*
                         * I think we'll actually deadlock rather than
                         * get here, but just in case..
                         */
                        pr_warn("%s: %s released port when preempted!\n",
                                port->name, oldcad->name);
                        if (port->cad)
                                goto blocked;
                }
        }

        /* Can't fail from now on, so mark ourselves as no longer waiting.  */
        if (dev->waiting & 1) {
                dev->waiting = 0;

                /* Take ourselves out of the wait list again.  */
                spin_lock_irq(&port->waitlist_lock);
                if (dev->waitprev)
                        dev->waitprev->waitnext = dev->waitnext;
                else
                        port->waithead = dev->waitnext;
                if (dev->waitnext)
                        dev->waitnext->waitprev = dev->waitprev;
                else
                        port->waittail = dev->waitprev;
                spin_unlock_irq(&port->waitlist_lock);
                dev->waitprev = dev->waitnext = NULL;
        }

        /* Now we do the change of devices */
        port->cad = dev;

#ifdef CONFIG_PARPORT_1284
        /* If it's a mux port, select it. */
        if (dev->port->muxport >= 0) {
                /* FIXME */
                port->muxsel = dev->port->muxport;
        }

        /* If it's a daisy chain device, select it. */
        if (dev->daisy >= 0) {
                /* This could be lazier. */
                if (!parport_daisy_select(port, dev->daisy,
                                           IEEE1284_MODE_COMPAT))
                        port->daisy = dev->daisy;
        }
#endif /* IEEE1284.3 support */

        /* Restore control registers */
        port->ops->restore_state(port, dev->state);
        write_unlock_irqrestore(&port->cad_lock, flags);
        dev->time = jiffies;
        return 0;

blocked:
        /*
         * If this is the first time we tried to claim the port, register an
         * interest.  This is only allowed for devices sleeping in
         * parport_claim_or_block(), or those with a wakeup function.
         */

        /* The cad_lock is still held for writing here */
        if (dev->waiting & 2 || dev->wakeup) {
                spin_lock(&port->waitlist_lock);
                if (test_and_set_bit(0, &dev->waiting) == 0) {
                        /* First add ourselves to the end of the wait list. */
                        dev->waitnext = NULL;
                        dev->waitprev = port->waittail;
                        if (port->waittail) {
                                port->waittail->waitnext = dev;
                                port->waittail = dev;
                        } else
                                port->waithead = port->waittail = dev;
                }
                spin_unlock(&port->waitlist_lock);
        }
        write_unlock_irqrestore(&port->cad_lock, flags);
        return -EAGAIN;
}
EXPORT_SYMBOL(parport_claim);

/**
 *        parport_claim_or_block - claim access to a parallel port device
 *        @dev: pointer to structure representing a device on the port
 *
 *        This behaves like parport_claim(), but will block if necessary
 *        to wait for the port to be free.  A return value of 1
 *        indicates that it slept; 0 means that it succeeded without
 *        needing to sleep.  A negative error code indicates failure.
 **/

int parport_claim_or_block(struct pardevice *dev)
{
        int r;

        /*
         * Signal to parport_claim() that we can wait even without a
         * wakeup function.
         */
        dev->waiting = 2;

        /* Try to claim the port.  If this fails, we need to sleep.  */
        r = parport_claim(dev);
        if (r == -EAGAIN) {
#ifdef PARPORT_DEBUG_SHARING
                printk(KERN_DEBUG "%s: parport_claim() returned -EAGAIN\n",
                       dev->name);
#endif
                /*
                 * FIXME!!! Use the proper locking for dev->waiting,
                 * and make this use the "wait_event_interruptible()"
                 * interfaces. The cli/sti that used to be here
                 * did nothing.
                 *
                 * See also parport_release()
                 */

                /*
                 * If dev->waiting is clear now, an interrupt
                 * gave us the port and we would deadlock if we slept.
                 */
                if (dev->waiting) {
                        wait_event_interruptible(dev->wait_q,
                                                 !dev->waiting);
                        if (signal_pending(current))
                                return -EINTR;
                        r = 1;
                } else {
                        r = 0;
#ifdef PARPORT_DEBUG_SHARING
                        printk(KERN_DEBUG "%s: didn't sleep in parport_claim_or_block()\n",
                               dev->name);
#endif
                }

#ifdef PARPORT_DEBUG_SHARING
                if (dev->port->physport->cad != dev)
                        printk(KERN_DEBUG "%s: exiting parport_claim_or_block but %s owns port!\n",
                               dev->name, dev->port->physport->cad ?
                               dev->port->physport->cad->name : "nobody");
#endif
        }
        dev->waiting = 0;
        return r;
}
EXPORT_SYMBOL(parport_claim_or_block);

/**
 *        parport_release - give up access to a parallel port device
 *        @dev: pointer to structure representing parallel port device
 *
 *        This function cannot fail, but it should not be called without
 *        the port claimed.  Similarly, if the port is already claimed
 *        you should not try claiming it again.
 **/

void parport_release(struct pardevice *dev)
{
        struct parport *port = dev->port->physport;
        struct pardevice *pd;
        unsigned long flags;

        /* Make sure that dev is the current device */
        write_lock_irqsave(&port->cad_lock, flags);
        if (port->cad != dev) {
                write_unlock_irqrestore(&port->cad_lock, flags);
                pr_warn("%s: %s tried to release parport when not owner\n",
                        port->name, dev->name);
                return;
        }

#ifdef CONFIG_PARPORT_1284
        /* If this is on a mux port, deselect it. */
        if (dev->port->muxport >= 0) {
                /* FIXME */
                port->muxsel = -1;
        }

        /* If this is a daisy device, deselect it. */
        if (dev->daisy >= 0) {
                parport_daisy_deselect_all(port);
                port->daisy = -1;
        }
#endif

        port->cad = NULL;
        write_unlock_irqrestore(&port->cad_lock, flags);

        /* Save control registers */
        port->ops->save_state(port, dev->state);

        /*
         * If anybody is waiting, find out who's been there longest and
         * then wake them up. (Note: no locking required)
         */
        /* !!! LOCKING IS NEEDED HERE */
        for (pd = port->waithead; pd; pd = pd->waitnext) {
                if (pd->waiting & 2) { /* sleeping in claim_or_block */
                        parport_claim(pd);
                        if (waitqueue_active(&pd->wait_q))
                                wake_up_interruptible(&pd->wait_q);
                        return;
                } else if (pd->wakeup) {
                        pd->wakeup(pd->private);
                        if (dev->port->cad) /* racy but no matter */
                                return;
                } else {
                        pr_err("%s: don't know how to wake %s\n",
                               port->name, pd->name);
                }
        }

        /*
         * Nobody was waiting, so walk the list to see if anyone is
         * interested in being woken up. (Note: no locking required)
         */
        /* !!! LOCKING IS NEEDED HERE */
        for (pd = port->devices; !port->cad && pd; pd = pd->next) {
                if (pd->wakeup && pd != dev)
                        pd->wakeup(pd->private);
        }
}
EXPORT_SYMBOL(parport_release);

irqreturn_t parport_irq_handler(int irq, void *dev_id)
{
        struct parport *port = dev_id;

        parport_generic_irq(port);

        return IRQ_HANDLED;
}
EXPORT_SYMBOL(parport_irq_handler);

MODULE_LICENSE("GPL");
























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2023 Isovalent */
#ifndef __BPF_MPROG_H
#define __BPF_MPROG_H

#include <linux/bpf.h>

/* bpf_mprog framework:
 *
 * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
 * of the bpf_mprog don't need to care about the dependency resolution
 * internals, they can just consume it with few API calls. Currently available
 * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
 * a BPF program or BPF link relative to an existing BPF program or BPF link
 * inside the multi-program array as well as prepend and append behavior if
 * no relative object was specified, see corresponding selftests for concrete
 * examples (e.g. tc_links and tc_opts test cases of test_progs).
 *
 * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
 *
 *  Attach case:
 *
 *   struct bpf_mprog_entry *entry, *entry_new;
 *   int ret;
 *
 *   // bpf_mprog user-side lock
 *   // fetch active @entry from attach location
 *   [...]
 *   ret = bpf_mprog_attach(entry, &entry_new, [...]);
 *   if (!ret) {
 *       if (entry != entry_new) {
 *           // swap @entry to @entry_new at attach location
 *           // ensure there are no inflight users of @entry:
 *           synchronize_rcu();
 *       }
 *       bpf_mprog_commit(entry);
 *   } else {
 *       // error path, bail out, propagate @ret
 *   }
 *   // bpf_mprog user-side unlock
 *
 *  Detach case:
 *
 *   struct bpf_mprog_entry *entry, *entry_new;
 *   int ret;
 *
 *   // bpf_mprog user-side lock
 *   // fetch active @entry from attach location
 *   [...]
 *   ret = bpf_mprog_detach(entry, &entry_new, [...]);
 *   if (!ret) {
 *       // all (*) marked is optional and depends on the use-case
 *       // whether bpf_mprog_bundle should be freed or not
 *       if (!bpf_mprog_total(entry_new))     (*)
 *           entry_new = NULL                 (*)
 *       // swap @entry to @entry_new at attach location
 *       // ensure there are no inflight users of @entry:
 *       synchronize_rcu();
 *       bpf_mprog_commit(entry);
 *       if (!entry_new)                      (*)
 *           // free bpf_mprog_bundle         (*)
 *   } else {
 *       // error path, bail out, propagate @ret
 *   }
 *   // bpf_mprog user-side unlock
 *
 *  Query case:
 *
 *   struct bpf_mprog_entry *entry;
 *   int ret;
 *
 *   // bpf_mprog user-side lock
 *   // fetch active @entry from attach location
 *   [...]
 *   ret = bpf_mprog_query(attr, uattr, entry);
 *   // bpf_mprog user-side unlock
 *
 *  Data/fast path:
 *
 *   struct bpf_mprog_entry *entry;
 *   struct bpf_mprog_fp *fp;
 *   struct bpf_prog *prog;
 *   int ret = [...];
 *
 *   rcu_read_lock();
 *   // fetch active @entry from attach location
 *   [...]
 *   bpf_mprog_foreach_prog(entry, fp, prog) {
 *       ret = bpf_prog_run(prog, [...]);
 *       // process @ret from program
 *   }
 *   [...]
 *   rcu_read_unlock();
 *
 * bpf_mprog locking considerations:
 *
 * bpf_mprog_{attach,detach,query}() must be protected by an external lock
 * (like RTNL in case of tcx).
 *
 * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
 * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
 * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
 * the bpf_mprog_bundle.
 *
 * Fast path accesses the active bpf_mprog_entry within RCU critical section
 * (in case of tcx it runs in NAPI which provides RCU protection there,
 * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
 * assumes that for the old bpf_mprog_entry there are no inflight users
 * anymore.
 *
 * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
 * the replacement case where we don't swap the bpf_mprog_entry.
 */

#define bpf_mprog_foreach_tuple(entry, fp, cp, t)                        \
        for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
             ({                                                                \
                t.prog = READ_ONCE(fp->prog);                                \
                t.link = cp->link;                                        \
                t.prog;                                                        \
              });                                                        \
             fp++, cp++)

#define bpf_mprog_foreach_prog(entry, fp, p)                                \
        for (fp = &entry->fp_items[0];                                        \
             (p = READ_ONCE(fp->prog));                                        \
             fp++)

#define BPF_MPROG_MAX 64

struct bpf_mprog_fp {
        struct bpf_prog *prog;
};

struct bpf_mprog_cp {
        struct bpf_link *link;
};

struct bpf_mprog_entry {
        struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
        struct bpf_mprog_bundle *parent;
};

struct bpf_mprog_bundle {
        struct bpf_mprog_entry a;
        struct bpf_mprog_entry b;
        struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
        struct bpf_prog *ref;
        atomic64_t revision;
        u32 count;
};

struct bpf_tuple {
        struct bpf_prog *prog;
        struct bpf_link *link;
};

static inline struct bpf_mprog_entry *
bpf_mprog_peer(const struct bpf_mprog_entry *entry)
{
        if (entry == &entry->parent->a)
                return &entry->parent->b;
        else
                return &entry->parent->a;
}

static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
{
        BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
        BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
                     ARRAY_SIZE(bundle->cp_items));

        memset(bundle, 0, sizeof(*bundle));
        atomic64_set(&bundle->revision, 1);
        bundle->a.parent = bundle;
        bundle->b.parent = bundle;
}

static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
{
        entry->parent->count++;
}

static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
{
        entry->parent->count--;
}

static inline int bpf_mprog_max(void)
{
        return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
}

static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
{
        int total = entry->parent->count;

        WARN_ON_ONCE(total > bpf_mprog_max());
        return total;
}

static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
                                    struct bpf_prog *prog)
{
        const struct bpf_mprog_fp *fp;
        const struct bpf_prog *tmp;

        bpf_mprog_foreach_prog(entry, fp, tmp) {
                if (tmp == prog)
                        return true;
        }
        return false;
}

static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
                                              struct bpf_tuple *tuple)
{
        WARN_ON_ONCE(entry->parent->ref);
        if (!tuple->link)
                entry->parent->ref = tuple->prog;
}

static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
{
        /* In the non-link case prog deletions can only drop the reference
         * to the prog after the bpf_mprog_entry got swapped and the
         * bpf_mprog ensured that there are no inflight users anymore.
         *
         * Paired with bpf_mprog_mark_for_release().
         */
        if (entry->parent->ref) {
                bpf_prog_put(entry->parent->ref);
                entry->parent->ref = NULL;
        }
}

static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
{
        atomic64_inc(&entry->parent->revision);
}

static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
{
        bpf_mprog_complete_release(entry);
        bpf_mprog_revision_new(entry);
}

static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
{
        return atomic64_read(&entry->parent->revision);
}

static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
                                        struct bpf_mprog_entry *src)
{
        memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
}

static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
{
        memset(dst->fp_items, 0, sizeof(dst->fp_items));
}

static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
                                       struct bpf_mprog_entry **entry_new)
{
        struct bpf_mprog_entry *peer;

        peer = bpf_mprog_peer(entry);
        bpf_mprog_entry_clear(peer);
        peer->parent->count = 0;
        *entry_new = peer;
}

static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
{
        int total = bpf_mprog_total(entry);

        memmove(entry->fp_items + idx + 1,
                entry->fp_items + idx,
                (total - idx) * sizeof(struct bpf_mprog_fp));

        memmove(entry->parent->cp_items + idx + 1,
                entry->parent->cp_items + idx,
                (total - idx) * sizeof(struct bpf_mprog_cp));
}

static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
{
        /* Total array size is needed in this case to enure the NULL
         * entry is copied at the end.
         */
        int total = ARRAY_SIZE(entry->fp_items);

        memmove(entry->fp_items + idx,
                entry->fp_items + idx + 1,
                (total - idx - 1) * sizeof(struct bpf_mprog_fp));

        memmove(entry->parent->cp_items + idx,
                entry->parent->cp_items + idx + 1,
                (total - idx - 1) * sizeof(struct bpf_mprog_cp));
}

static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
                                  struct bpf_mprog_fp **fp,
                                  struct bpf_mprog_cp **cp)
{
        *fp = &entry->fp_items[idx];
        *cp = &entry->parent->cp_items[idx];
}

static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
                                   struct bpf_mprog_cp *cp,
                                   struct bpf_tuple *tuple)
{
        WRITE_ONCE(fp->prog, tuple->prog);
        cp->link = tuple->link;
}

int bpf_mprog_attach(struct bpf_mprog_entry *entry,
                     struct bpf_mprog_entry **entry_new,
                     struct bpf_prog *prog_new, struct bpf_link *link,
                     struct bpf_prog *prog_old,
                     u32 flags, u32 id_or_fd, u64 revision);

int bpf_mprog_detach(struct bpf_mprog_entry *entry,
                     struct bpf_mprog_entry **entry_new,
                     struct bpf_prog *prog, struct bpf_link *link,
                     u32 flags, u32 id_or_fd, u64 revision);

int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
                    struct bpf_mprog_entry *entry);

static inline bool bpf_mprog_supported(enum bpf_prog_type type)
{
        switch (type) {
        case BPF_PROG_TYPE_SCHED_CLS:
                return true;
        default:
                return false;
        }
}
#endif /* __BPF_MPROG_H */















































































   19 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* SPDX-License-Identifier: GPL-2.0 */
/*
 *
 * Definitions for mount interface. This describes the in the kernel build 
 * linkedlist with mounted filesystems.
 *
 * Author:  Marco van Wieringen <mvw@planets.elm.net>
 *
 */
#ifndef _LINUX_MOUNT_H
#define _LINUX_MOUNT_H

#include <linux/types.h>
#include <asm/barrier.h>

struct super_block;
struct dentry;
struct user_namespace;
struct mnt_idmap;
struct file_system_type;
struct fs_context;
struct file;
struct path;

#define MNT_NOSUID        0x01
#define MNT_NODEV        0x02
#define MNT_NOEXEC        0x04
#define MNT_NOATIME        0x08
#define MNT_NODIRATIME        0x10
#define MNT_RELATIME        0x20
#define MNT_READONLY        0x40        /* does the user want this to be r/o? */
#define MNT_NOSYMFOLLOW        0x80

#define MNT_SHRINKABLE        0x100
#define MNT_WRITE_HOLD        0x200

#define MNT_SHARED        0x1000        /* if the vfsmount is a shared mount */
#define MNT_UNBINDABLE        0x2000        /* if the vfsmount is a unbindable mount */
/*
 * MNT_SHARED_MASK is the set of flags that should be cleared when a
 * mount becomes shared.  Currently, this is only the flag that says a
 * mount cannot be bind mounted, since this is how we create a mount
 * that shares events with another mount.  If you add a new MNT_*
 * flag, consider how it interacts with shared mounts.
 */
#define MNT_SHARED_MASK        (MNT_UNBINDABLE)
#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
                                 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
                                 | MNT_READONLY | MNT_NOSYMFOLLOW)
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )

#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
                            MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)

#define MNT_INTERNAL        0x4000

#define MNT_LOCK_ATIME                0x040000
#define MNT_LOCK_NOEXEC                0x080000
#define MNT_LOCK_NOSUID                0x100000
#define MNT_LOCK_NODEV                0x200000
#define MNT_LOCK_READONLY        0x400000
#define MNT_LOCKED                0x800000
#define MNT_DOOMED                0x1000000
#define MNT_SYNC_UMOUNT                0x2000000
#define MNT_MARKED                0x4000000
#define MNT_UMOUNT                0x8000000
#define MNT_ONRB                0x10000000

struct vfsmount {
        struct dentry *mnt_root;        /* root of the mounted tree */
        struct super_block *mnt_sb;        /* pointer to superblock */
        int mnt_flags;
        struct mnt_idmap *mnt_idmap;
} __randomize_layout;

static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
        /* Pairs with smp_store_release() in do_idmap_mount(). */
        return smp_load_acquire(&mnt->mnt_idmap);
}

extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
extern void mnt_drop_write(struct vfsmount *mnt);
extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern void mnt_make_shortterm(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(const struct path *path);
extern bool __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);

extern struct vfsmount *clone_private_mount(const struct path *path);
int mnt_get_write_access(struct vfsmount *mnt);
void mnt_put_write_access(struct vfsmount *mnt);

extern struct vfsmount *fc_mount(struct fs_context *fc);
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
                                      int flags, const char *name,
                                      void *data);
extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
                                     struct file_system_type *type,
                                     const char *name, void *data);

extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);

extern bool path_is_mountpoint(const struct path *path);

extern bool our_mnt(struct vfsmount *mnt);

extern struct vfsmount *kern_mount(struct file_system_type *);
extern void kern_unmount(struct vfsmount *mnt);
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
extern long do_mount(const char *, const char __user *,
                     const char *, unsigned long, void *);
extern struct vfsmount *collect_mounts(const struct path *);
extern void drop_collected_mounts(struct vfsmount *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
                          struct vfsmount *);
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);

extern int cifs_root_data(char **dev, char **opts);

#endif /* _LINUX_MOUNT_H */
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   16 
   16 





































































































   16 

   16 

   16 












































































































































































































   16 

   16 









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
 * or rs-channels. It also implements echoing, cooked mode etc.
 *
 * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
 *
 * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
 * tty_struct and tty_queue structures.  Previously there was an array
 * of 256 tty_struct's which was statically allocated, and the
 * tty_queue structures were allocated at boot time.  Both are now
 * dynamically allocated only when the tty is open.
 *
 * Also restructured routines so that there is more of a separation
 * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
 * the low-level tty routines (serial.c, pty.c, console.c).  This
 * makes for cleaner and more compact code.  -TYT, 9/17/92
 *
 * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
 * which can be dynamically activated and de-activated by the line
 * discipline handling modules (like SLIP).
 *
 * NOTE: pay no attention to the line discipline code (yet); its
 * interface is still subject to change in this version...
 * -- TYT, 1/31/92
 *
 * Added functionality to the OPOST tty handling.  No delays, but all
 * other bits should be there.
 *        -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
 *
 * Rewrote canonical mode and added more termios flags.
 *        -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
 *
 * Reorganized FASYNC support so mouse code can share it.
 *        -- ctm@ardi.com, 9Sep95
 *
 * New TIOCLINUX variants added.
 *        -- mj@k332.feld.cvut.cz, 19-Nov-95
 *
 * Restrict vt switching via ioctl()
 *      -- grif@cs.ucr.edu, 5-Dec-95
 *
 * Move console and virtual terminal code to more appropriate files,
 * implement CONFIG_VT and generalize console device interface.
 *        -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
 *
 * Rewrote tty_init_dev and tty_release_dev to eliminate races.
 *        -- Bill Hawes <whawes@star.net>, June 97
 *
 * Added devfs support.
 *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
 *
 * Added support for a Unix98-style ptmx device.
 *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
 *
 * Reduced memory usage for older ARM systems
 *      -- Russell King <rmk@arm.linux.org.uk>
 *
 * Move do_SAK() into process context.  Less stack use in devfs functions.
 * alloc_tty_struct() always uses kmalloc()
 *                         -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
 */

#include <linux/types.h>
#include <linux/major.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/fcntl.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/interrupt.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/devpts_fs.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/console.h>
#include <linux/timer.h>
#include <linux/ctype.h>
#include <linux/kd.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/ppp-ioctl.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/serial.h>
#include <linux/ratelimit.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/termios_internal.h>
#include <linux/fs.h>

#include <linux/kbd_kern.h>
#include <linux/vt_kern.h>
#include <linux/selection.h>

#include <linux/kmod.h>
#include <linux/nsproxy.h>
#include "tty.h"

#undef TTY_DEBUG_HANGUP
#ifdef TTY_DEBUG_HANGUP
# define tty_debug_hangup(tty, f, args...)        tty_debug(tty, f, ##args)
#else
# define tty_debug_hangup(tty, f, args...)        do { } while (0)
#endif

#define TTY_PARANOIA_CHECK 1
#define CHECK_TTY_COUNT 1

struct ktermios tty_std_termios = {        /* for the benefit of tty drivers  */
        .c_iflag = ICRNL | IXON,
        .c_oflag = OPOST | ONLCR,
        .c_cflag = B38400 | CS8 | CREAD | HUPCL,
        .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
                   ECHOCTL | ECHOKE | IEXTEN,
        .c_cc = INIT_C_CC,
        .c_ispeed = 38400,
        .c_ospeed = 38400,
        /* .c_line = N_TTY, */
};
EXPORT_SYMBOL(tty_std_termios);

/* This list gets poked at by procfs and various bits of boot up code. This
 * could do with some rationalisation such as pulling the tty proc function
 * into this file.
 */

LIST_HEAD(tty_drivers);                        /* linked list of tty drivers */

/* Mutex to protect creating and releasing a tty */
DEFINE_MUTEX(tty_mutex);

static ssize_t tty_read(struct kiocb *, struct iov_iter *);
static ssize_t tty_write(struct kiocb *, struct iov_iter *);
static __poll_t tty_poll(struct file *, poll_table *);
static int tty_open(struct inode *, struct file *);
#ifdef CONFIG_COMPAT
static long tty_compat_ioctl(struct file *file, unsigned int cmd,
                                unsigned long arg);
#else
#define tty_compat_ioctl NULL
#endif
static int __tty_fasync(int fd, struct file *filp, int on);
static int tty_fasync(int fd, struct file *filp, int on);
static void release_tty(struct tty_struct *tty, int idx);

/**
 * free_tty_struct - free a disused tty
 * @tty: tty struct to free
 *
 * Free the write buffers, tty queue and tty memory itself.
 *
 * Locking: none. Must be called after tty is definitely unused
 */
static void free_tty_struct(struct tty_struct *tty)
{
        tty_ldisc_deinit(tty);
        put_device(tty->dev);
        kvfree(tty->write_buf);
        kfree(tty);
}

static inline struct tty_struct *file_tty(struct file *file)
{
        return ((struct tty_file_private *)file->private_data)->tty;
}

int tty_alloc_file(struct file *file)
{
        struct tty_file_private *priv;

        priv = kmalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        file->private_data = priv;

        return 0;
}

/* Associate a new file with the tty structure */
void tty_add_file(struct tty_struct *tty, struct file *file)
{
        struct tty_file_private *priv = file->private_data;

        priv->tty = tty;
        priv->file = file;

        spin_lock(&tty->files_lock);
        list_add(&priv->list, &tty->tty_files);
        spin_unlock(&tty->files_lock);
}

/**
 * tty_free_file - free file->private_data
 * @file: to free private_data of
 *
 * This shall be used only for fail path handling when tty_add_file was not
 * called yet.
 */
void tty_free_file(struct file *file)
{
        struct tty_file_private *priv = file->private_data;

        file->private_data = NULL;
        kfree(priv);
}

/* Delete file from its tty */
static void tty_del_file(struct file *file)
{
        struct tty_file_private *priv = file->private_data;
        struct tty_struct *tty = priv->tty;

        spin_lock(&tty->files_lock);
        list_del(&priv->list);
        spin_unlock(&tty->files_lock);
        tty_free_file(file);
}

/**
 * tty_name - return tty naming
 * @tty: tty structure
 *
 * Convert a tty structure into a name. The name reflects the kernel naming
 * policy and if udev is in use may not reflect user space
 *
 * Locking: none
 */
const char *tty_name(const struct tty_struct *tty)
{
        if (!tty) /* Hmm.  NULL pointer.  That's fun. */
                return "NULL tty";
        return tty->name;
}
EXPORT_SYMBOL(tty_name);

const char *tty_driver_name(const struct tty_struct *tty)
{
        if (!tty || !tty->driver)
                return "";
        return tty->driver->name;
}

static int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
                              const char *routine)
{
#ifdef TTY_PARANOIA_CHECK
        if (!tty) {
                pr_warn("(%d:%d): %s: NULL tty\n",
                        imajor(inode), iminor(inode), routine);
                return 1;
        }
#endif
        return 0;
}

/* Caller must hold tty_lock */
static void check_tty_count(struct tty_struct *tty, const char *routine)
{
#ifdef CHECK_TTY_COUNT
        struct list_head *p;
        int count = 0, kopen_count = 0;

        spin_lock(&tty->files_lock);
        list_for_each(p, &tty->tty_files) {
                count++;
        }
        spin_unlock(&tty->files_lock);
        if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
            tty->driver->subtype == PTY_TYPE_SLAVE &&
            tty->link && tty->link->count)
                count++;
        if (tty_port_kopened(tty->port))
                kopen_count++;
        if (tty->count != (count + kopen_count)) {
                tty_warn(tty, "%s: tty->count(%d) != (#fd's(%d) + #kopen's(%d))\n",
                         routine, tty->count, count, kopen_count);
        }
#endif
}

/**
 * get_tty_driver - find device of a tty
 * @device: device identifier
 * @index: returns the index of the tty
 *
 * This routine returns a tty driver structure, given a device number and also
 * passes back the index number.
 *
 * Locking: caller must hold tty_mutex
 */
static struct tty_driver *get_tty_driver(dev_t device, int *index)
{
        struct tty_driver *p;

        list_for_each_entry(p, &tty_drivers, tty_drivers) {
                dev_t base = MKDEV(p->major, p->minor_start);

                if (device < base || device >= base + p->num)
                        continue;
                *index = device - base;
                return tty_driver_kref_get(p);
        }
        return NULL;
}

/**
 * tty_dev_name_to_number - return dev_t for device name
 * @name: user space name of device under /dev
 * @number: pointer to dev_t that this function will populate
 *
 * This function converts device names like ttyS0 or ttyUSB1 into dev_t like
 * (4, 64) or (188, 1). If no corresponding driver is registered then the
 * function returns -%ENODEV.
 *
 * Locking: this acquires tty_mutex to protect the tty_drivers list from
 *        being modified while we are traversing it, and makes sure to
 *        release it before exiting.
 */
int tty_dev_name_to_number(const char *name, dev_t *number)
{
        struct tty_driver *p;
        int ret;
        int index, prefix_length = 0;
        const char *str;

        for (str = name; *str && !isdigit(*str); str++)
                ;

        if (!*str)
                return -EINVAL;

        ret = kstrtoint(str, 10, &index);
        if (ret)
                return ret;

        prefix_length = str - name;
        mutex_lock(&tty_mutex);

        list_for_each_entry(p, &tty_drivers, tty_drivers)
                if (prefix_length == strlen(p->name) && strncmp(name,
                                        p->name, prefix_length) == 0) {
                        if (index < p->num) {
                                *number = MKDEV(p->major, p->minor_start + index);
                                goto out;
                        }
                }

        /* if here then driver wasn't found */
        ret = -ENODEV;
out:
        mutex_unlock(&tty_mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(tty_dev_name_to_number);

#ifdef CONFIG_CONSOLE_POLL

/**
 * tty_find_polling_driver - find device of a polled tty
 * @name: name string to match
 * @line: pointer to resulting tty line nr
 *
 * This routine returns a tty driver structure, given a name and the condition
 * that the tty driver is capable of polled operation.
 */
struct tty_driver *tty_find_polling_driver(char *name, int *line)
{
        struct tty_driver *p, *res = NULL;
        int tty_line = 0;
        int len;
        char *str, *stp;

        for (str = name; *str; str++)
                if ((*str >= '0' && *str <= '9') || *str == ',')
                        break;
        if (!*str)
                return NULL;

        len = str - name;
        tty_line = simple_strtoul(str, &str, 10);

        mutex_lock(&tty_mutex);
        /* Search through the tty devices to look for a match */
        list_for_each_entry(p, &tty_drivers, tty_drivers) {
                if (!len || strncmp(name, p->name, len) != 0)
                        continue;
                stp = str;
                if (*stp == ',')
                        stp++;
                if (*stp == '\0')
                        stp = NULL;

                if (tty_line >= 0 && tty_line < p->num && p->ops &&
                    p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
                        res = tty_driver_kref_get(p);
                        *line = tty_line;
                        break;
                }
        }
        mutex_unlock(&tty_mutex);

        return res;
}
EXPORT_SYMBOL_GPL(tty_find_polling_driver);
#endif

static ssize_t hung_up_tty_read(struct kiocb *iocb, struct iov_iter *to)
{
        return 0;
}

static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from)
{
        return -EIO;
}

/* No kernel lock held - none needed ;) */
static __poll_t hung_up_tty_poll(struct file *filp, poll_table *wait)
{
        return EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | EPOLLWRNORM;
}

static long hung_up_tty_ioctl(struct file *file, unsigned int cmd,
                unsigned long arg)
{
        return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
}

static long hung_up_tty_compat_ioctl(struct file *file,
                                     unsigned int cmd, unsigned long arg)
{
        return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
}

static int hung_up_tty_fasync(int fd, struct file *file, int on)
{
        return -ENOTTY;
}

static void tty_show_fdinfo(struct seq_file *m, struct file *file)
{
        struct tty_struct *tty = file_tty(file);

        if (tty && tty->ops && tty->ops->show_fdinfo)
                tty->ops->show_fdinfo(tty, m);
}

static const struct file_operations tty_fops = {
        .llseek                = no_llseek,
        .read_iter        = tty_read,
        .write_iter        = tty_write,
        .splice_read        = copy_splice_read,
        .splice_write        = iter_file_splice_write,
        .poll                = tty_poll,
        .unlocked_ioctl        = tty_ioctl,
        .compat_ioctl        = tty_compat_ioctl,
        .open                = tty_open,
        .release        = tty_release,
        .fasync                = tty_fasync,
        .show_fdinfo        = tty_show_fdinfo,
};

static const struct file_operations console_fops = {
        .llseek                = no_llseek,
        .read_iter        = tty_read,
        .write_iter        = redirected_tty_write,
        .splice_read        = copy_splice_read,
        .splice_write        = iter_file_splice_write,
        .poll                = tty_poll,
        .unlocked_ioctl        = tty_ioctl,
        .compat_ioctl        = tty_compat_ioctl,
        .open                = tty_open,
        .release        = tty_release,
        .fasync                = tty_fasync,
};

static const struct file_operations hung_up_tty_fops = {
        .llseek                = no_llseek,
        .read_iter        = hung_up_tty_read,
        .write_iter        = hung_up_tty_write,
        .poll                = hung_up_tty_poll,
        .unlocked_ioctl        = hung_up_tty_ioctl,
        .compat_ioctl        = hung_up_tty_compat_ioctl,
        .release        = tty_release,
        .fasync                = hung_up_tty_fasync,
};

static DEFINE_SPINLOCK(redirect_lock);
static struct file *redirect;

/**
 * tty_wakeup - request more data
 * @tty: terminal
 *
 * Internal and external helper for wakeups of tty. This function informs the
 * line discipline if present that the driver is ready to receive more output
 * data.
 */
void tty_wakeup(struct tty_struct *tty)
{
        struct tty_ldisc *ld;

        if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
                ld = tty_ldisc_ref(tty);
                if (ld) {
                        if (ld->ops->write_wakeup)
                                ld->ops->write_wakeup(tty);
                        tty_ldisc_deref(ld);
                }
        }
        wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
}
EXPORT_SYMBOL_GPL(tty_wakeup);

/**
 * tty_release_redirect - Release a redirect on a pty if present
 * @tty: tty device
 *
 * This is available to the pty code so if the master closes, if the slave is a
 * redirect it can release the redirect.
 */
static struct file *tty_release_redirect(struct tty_struct *tty)
{
        struct file *f = NULL;

        spin_lock(&redirect_lock);
        if (redirect && file_tty(redirect) == tty) {
                f = redirect;
                redirect = NULL;
        }
        spin_unlock(&redirect_lock);

        return f;
}

/**
 * __tty_hangup - actual handler for hangup events
 * @tty: tty device
 * @exit_session: if non-zero, signal all foreground group processes
 *
 * This can be called by a "kworker" kernel thread. That is process synchronous
 * but doesn't hold any locks, so we need to make sure we have the appropriate
 * locks for what we're doing.
 *
 * The hangup event clears any pending redirections onto the hung up device. It
 * ensures future writes will error and it does the needed line discipline
 * hangup and signal delivery. The tty object itself remains intact.
 *
 * Locking:
 *  * BTM
 *
 *   * redirect lock for undoing redirection
 *   * file list lock for manipulating list of ttys
 *   * tty_ldiscs_lock from called functions
 *   * termios_rwsem resetting termios data
 *   * tasklist_lock to walk task list for hangup event
 *
 *    * ->siglock to protect ->signal/->sighand
 *
 */
static void __tty_hangup(struct tty_struct *tty, int exit_session)
{
        struct file *cons_filp = NULL;
        struct file *filp, *f;
        struct tty_file_private *priv;
        int    closecount = 0, n;
        int refs;

        if (!tty)
                return;

        f = tty_release_redirect(tty);

        tty_lock(tty);

        if (test_bit(TTY_HUPPED, &tty->flags)) {
                tty_unlock(tty);
                return;
        }

        /*
         * Some console devices aren't actually hung up for technical and
         * historical reasons, which can lead to indefinite interruptible
         * sleep in n_tty_read().  The following explicitly tells
         * n_tty_read() to abort readers.
         */
        set_bit(TTY_HUPPING, &tty->flags);

        /* inuse_filps is protected by the single tty lock,
         * this really needs to change if we want to flush the
         * workqueue with the lock held.
         */
        check_tty_count(tty, "tty_hangup");

        spin_lock(&tty->files_lock);
        /* This breaks for file handles being sent over AF_UNIX sockets ? */
        list_for_each_entry(priv, &tty->tty_files, list) {
                filp = priv->file;
                if (filp->f_op->write_iter == redirected_tty_write)
                        cons_filp = filp;
                if (filp->f_op->write_iter != tty_write)
                        continue;
                closecount++;
                __tty_fasync(-1, filp, 0);        /* can't block */
                filp->f_op = &hung_up_tty_fops;
        }
        spin_unlock(&tty->files_lock);

        refs = tty_signal_session_leader(tty, exit_session);
        /* Account for the p->signal references we killed */
        while (refs--)
                tty_kref_put(tty);

        tty_ldisc_hangup(tty, cons_filp != NULL);

        spin_lock_irq(&tty->ctrl.lock);
        clear_bit(TTY_THROTTLED, &tty->flags);
        clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
        put_pid(tty->ctrl.session);
        put_pid(tty->ctrl.pgrp);
        tty->ctrl.session = NULL;
        tty->ctrl.pgrp = NULL;
        tty->ctrl.pktstatus = 0;
        spin_unlock_irq(&tty->ctrl.lock);

        /*
         * If one of the devices matches a console pointer, we
         * cannot just call hangup() because that will cause
         * tty->count and state->count to go out of sync.
         * So we just call close() the right number of times.
         */
        if (cons_filp) {
                if (tty->ops->close)
                        for (n = 0; n < closecount; n++)
                                tty->ops->close(tty, cons_filp);
        } else if (tty->ops->hangup)
                tty->ops->hangup(tty);
        /*
         * We don't want to have driver/ldisc interactions beyond the ones
         * we did here. The driver layer expects no calls after ->hangup()
         * from the ldisc side, which is now guaranteed.
         */
        set_bit(TTY_HUPPED, &tty->flags);
        clear_bit(TTY_HUPPING, &tty->flags);
        tty_unlock(tty);

        if (f)
                fput(f);
}

static void do_tty_hangup(struct work_struct *work)
{
        struct tty_struct *tty =
                container_of(work, struct tty_struct, hangup_work);

        __tty_hangup(tty, 0);
}

/**
 * tty_hangup - trigger a hangup event
 * @tty: tty to hangup
 *
 * A carrier loss (virtual or otherwise) has occurred on @tty. Schedule a
 * hangup sequence to run after this event.
 */
void tty_hangup(struct tty_struct *tty)
{
        tty_debug_hangup(tty, "hangup\n");
        schedule_work(&tty->hangup_work);
}
EXPORT_SYMBOL(tty_hangup);

/**
 * tty_vhangup - process vhangup
 * @tty: tty to hangup
 *
 * The user has asked via system call for the terminal to be hung up. We do
 * this synchronously so that when the syscall returns the process is complete.
 * That guarantee is necessary for security reasons.
 */
void tty_vhangup(struct tty_struct *tty)
{
        tty_debug_hangup(tty, "vhangup\n");
        __tty_hangup(tty, 0);
}
EXPORT_SYMBOL(tty_vhangup);


/**
 * tty_vhangup_self - process vhangup for own ctty
 *
 * Perform a vhangup on the current controlling tty
 */
void tty_vhangup_self(void)
{
        struct tty_struct *tty;

        tty = get_current_tty();
        if (tty) {
                tty_vhangup(tty);
                tty_kref_put(tty);
        }
}

/**
 * tty_vhangup_session - hangup session leader exit
 * @tty: tty to hangup
 *
 * The session leader is exiting and hanging up its controlling terminal.
 * Every process in the foreground process group is signalled %SIGHUP.
 *
 * We do this synchronously so that when the syscall returns the process is
 * complete. That guarantee is necessary for security reasons.
 */
void tty_vhangup_session(struct tty_struct *tty)
{
        tty_debug_hangup(tty, "session hangup\n");
        __tty_hangup(tty, 1);
}

/**
 * tty_hung_up_p - was tty hung up
 * @filp: file pointer of tty
 *
 * Return: true if the tty has been subject to a vhangup or a carrier loss
 */
int tty_hung_up_p(struct file *filp)
{
        return (filp && filp->f_op == &hung_up_tty_fops);
}
EXPORT_SYMBOL(tty_hung_up_p);

void __stop_tty(struct tty_struct *tty)
{
        if (tty->flow.stopped)
                return;
        tty->flow.stopped = true;
        if (tty->ops->stop)
                tty->ops->stop(tty);
}

/**
 * stop_tty - propagate flow control
 * @tty: tty to stop
 *
 * Perform flow control to the driver. May be called on an already stopped
 * device and will not re-call the &tty_driver->stop() method.
 *
 * This functionality is used by both the line disciplines for halting incoming
 * flow and by the driver. It may therefore be called from any context, may be
 * under the tty %atomic_write_lock but not always.
 *
 * Locking:
 *        flow.lock
 */
void stop_tty(struct tty_struct *tty)
{
        unsigned long flags;

        spin_lock_irqsave(&tty->flow.lock, flags);
        __stop_tty(tty);
        spin_unlock_irqrestore(&tty->flow.lock, flags);
}
EXPORT_SYMBOL(stop_tty);

void __start_tty(struct tty_struct *tty)
{
        if (!tty->flow.stopped || tty->flow.tco_stopped)
                return;
        tty->flow.stopped = false;
        if (tty->ops->start)
                tty->ops->start(tty);
        tty_wakeup(tty);
}

/**
 * start_tty - propagate flow control
 * @tty: tty to start
 *
 * Start a tty that has been stopped if at all possible. If @tty was previously
 * stopped and is now being started, the &tty_driver->start() method is invoked
 * and the line discipline woken.
 *
 * Locking:
 *        flow.lock
 */
void start_tty(struct tty_struct *tty)
{
        unsigned long flags;

        spin_lock_irqsave(&tty->flow.lock, flags);
        __start_tty(tty);
        spin_unlock_irqrestore(&tty->flow.lock, flags);
}
EXPORT_SYMBOL(start_tty);

static void tty_update_time(struct tty_struct *tty, bool mtime)
{
        time64_t sec = ktime_get_real_seconds();
        struct tty_file_private *priv;

        spin_lock(&tty->files_lock);
        list_for_each_entry(priv, &tty->tty_files, list) {
                struct inode *inode = file_inode(priv->file);
                struct timespec64 time = mtime ? inode_get_mtime(inode) : inode_get_atime(inode);

                /*
                 * We only care if the two values differ in anything other than the
                 * lower three bits (i.e every 8 seconds).  If so, then we can update
                 * the time of the tty device, otherwise it could be construded as a
                 * security leak to let userspace know the exact timing of the tty.
                 */
                if ((sec ^ time.tv_sec) & ~7) {
                        if (mtime)
                                inode_set_mtime(inode, sec, 0);
                        else
                                inode_set_atime(inode, sec, 0);
                }
        }
        spin_unlock(&tty->files_lock);
}

/*
 * Iterate on the ldisc ->read() function until we've gotten all
 * the data the ldisc has for us.
 *
 * The "cookie" is something that the ldisc read function can fill
 * in to let us know that there is more data to be had.
 *
 * We promise to continue to call the ldisc until it stops returning
 * data or clears the cookie. The cookie may be something that the
 * ldisc maintains state for and needs to free.
 */
static ssize_t iterate_tty_read(struct tty_ldisc *ld, struct tty_struct *tty,
                                struct file *file, struct iov_iter *to)
{
        void *cookie = NULL;
        unsigned long offset = 0;
        ssize_t retval = 0;
        size_t copied, count = iov_iter_count(to);
        u8 kernel_buf[64];

        do {
                ssize_t size = min(count, sizeof(kernel_buf));

                size = ld->ops->read(tty, file, kernel_buf, size, &cookie, offset);
                if (!size)
                        break;

                if (size < 0) {
                        /* Did we have an earlier error (ie -EFAULT)? */
                        if (retval)
                                break;
                        retval = size;

                        /*
                         * -EOVERFLOW means we didn't have enough space
                         * for a whole packet, and we shouldn't return
                         * a partial result.
                         */
                        if (retval == -EOVERFLOW)
                                offset = 0;
                        break;
                }

                copied = copy_to_iter(kernel_buf, size, to);
                offset += copied;
                count -= copied;

                /*
                 * If the user copy failed, we still need to do another ->read()
                 * call if we had a cookie to let the ldisc clear up.
                 *
                 * But make sure size is zeroed.
                 */
                if (unlikely(copied != size)) {
                        count = 0;
                        retval = -EFAULT;
                }
        } while (cookie);

        /* We always clear tty buffer in case they contained passwords */
        memzero_explicit(kernel_buf, sizeof(kernel_buf));
        return offset ? offset : retval;
}


/**
 * tty_read - read method for tty device files
 * @iocb: kernel I/O control block
 * @to: destination for the data read
 *
 * Perform the read system call function on this terminal device. Checks
 * for hung up devices before calling the line discipline method.
 *
 * Locking:
 *        Locks the line discipline internally while needed. Multiple read calls
 *        may be outstanding in parallel.
 */
static ssize_t tty_read(struct kiocb *iocb, struct iov_iter *to)
{
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct tty_struct *tty = file_tty(file);
        struct tty_ldisc *ld;
        ssize_t ret;

        if (tty_paranoia_check(tty, inode, "tty_read"))
                return -EIO;
        if (!tty || tty_io_error(tty))
                return -EIO;

        /* We want to wait for the line discipline to sort out in this
         * situation.
         */
        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return hung_up_tty_read(iocb, to);
        ret = -EIO;
        if (ld->ops->read)
                ret = iterate_tty_read(ld, tty, file, to);
        tty_ldisc_deref(ld);

        if (ret > 0)
                tty_update_time(tty, false);

        return ret;
}

void tty_write_unlock(struct tty_struct *tty)
{
        mutex_unlock(&tty->atomic_write_lock);
        wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT);
}

int tty_write_lock(struct tty_struct *tty, bool ndelay)
{
        if (!mutex_trylock(&tty->atomic_write_lock)) {
                if (ndelay)
                        return -EAGAIN;
                if (mutex_lock_interruptible(&tty->atomic_write_lock))
                        return -ERESTARTSYS;
        }
        return 0;
}

/*
 * Split writes up in sane blocksizes to avoid
 * denial-of-service type attacks
 */
static ssize_t iterate_tty_write(struct tty_ldisc *ld, struct tty_struct *tty,
                                 struct file *file, struct iov_iter *from)
{
        size_t chunk, count = iov_iter_count(from);
        ssize_t ret, written = 0;

        ret = tty_write_lock(tty, file->f_flags & O_NDELAY);
        if (ret < 0)
                return ret;

        /*
         * We chunk up writes into a temporary buffer. This
         * simplifies low-level drivers immensely, since they
         * don't have locking issues and user mode accesses.
         *
         * But if TTY_NO_WRITE_SPLIT is set, we should use a
         * big chunk-size..
         *
         * The default chunk-size is 2kB, because the NTTY
         * layer has problems with bigger chunks. It will
         * claim to be able to handle more characters than
         * it actually does.
         */
        chunk = 2048;
        if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
                chunk = 65536;
        if (count < chunk)
                chunk = count;

        /* write_buf/write_cnt is protected by the atomic_write_lock mutex */
        if (tty->write_cnt < chunk) {
                u8 *buf_chunk;

                if (chunk < 1024)
                        chunk = 1024;

                buf_chunk = kvmalloc(chunk, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
                if (!buf_chunk) {
                        ret = -ENOMEM;
                        goto out;
                }
                kvfree(tty->write_buf);
                tty->write_cnt = chunk;
                tty->write_buf = buf_chunk;
        }

        /* Do the write .. */
        for (;;) {
                size_t size = min(chunk, count);

                ret = -EFAULT;
                if (copy_from_iter(tty->write_buf, size, from) != size)
                        break;

                ret = ld->ops->write(tty, file, tty->write_buf, size);
                if (ret <= 0)
                        break;

                written += ret;
                if (ret > size)
                        break;

                /* FIXME! Have Al check this! */
                if (ret != size)
                        iov_iter_revert(from, size-ret);

                count -= ret;
                if (!count)
                        break;
                ret = -ERESTARTSYS;
                if (signal_pending(current))
                        break;
                cond_resched();
        }
        if (written) {
                tty_update_time(tty, true);
                ret = written;
        }
out:
        tty_write_unlock(tty);
        return ret;
}

#ifdef CONFIG_PRINT_QUOTA_WARNING
/**
 * tty_write_message - write a message to a certain tty, not just the console.
 * @tty: the destination tty_struct
 * @msg: the message to write
 *
 * This is used for messages that need to be redirected to a specific tty. We
 * don't put it into the syslog queue right now maybe in the future if really
 * needed.
 *
 * We must still hold the BTM and test the CLOSING flag for the moment.
 *
 * This function is DEPRECATED, do not use in new code.
 */
void tty_write_message(struct tty_struct *tty, char *msg)
{
        if (tty) {
                mutex_lock(&tty->atomic_write_lock);
                tty_lock(tty);
                if (tty->ops->write && tty->count > 0)
                        tty->ops->write(tty, msg, strlen(msg));
                tty_unlock(tty);
                tty_write_unlock(tty);
        }
}
#endif

static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from)
{
        struct tty_struct *tty = file_tty(file);
        struct tty_ldisc *ld;
        ssize_t ret;

        if (tty_paranoia_check(tty, file_inode(file), "tty_write"))
                return -EIO;
        if (!tty || !tty->ops->write ||        tty_io_error(tty))
                return -EIO;
        /* Short term debug to catch buggy drivers */
        if (tty->ops->write_room == NULL)
                tty_err(tty, "missing write_room method\n");
        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return hung_up_tty_write(iocb, from);
        if (!ld->ops->write)
                ret = -EIO;
        else
                ret = iterate_tty_write(ld, tty, file, from);
        tty_ldisc_deref(ld);
        return ret;
}

/**
 * tty_write - write method for tty device file
 * @iocb: kernel I/O control block
 * @from: iov_iter with data to write
 *
 * Write data to a tty device via the line discipline.
 *
 * Locking:
 *        Locks the line discipline as required
 *        Writes to the tty driver are serialized by the atomic_write_lock
 *        and are then processed in chunks to the device. The line
 *        discipline write method will not be invoked in parallel for
 *        each device.
 */
static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from)
{
        return file_tty_write(iocb->ki_filp, iocb, from);
}

ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter)
{
        struct file *p = NULL;

        spin_lock(&redirect_lock);
        if (redirect)
                p = get_file(redirect);
        spin_unlock(&redirect_lock);

        /*
         * We know the redirected tty is just another tty, we can
         * call file_tty_write() directly with that file pointer.
         */
        if (p) {
                ssize_t res;

                res = file_tty_write(p, iocb, iter);
                fput(p);
                return res;
        }
        return tty_write(iocb, iter);
}

/**
 * tty_send_xchar - send priority character
 * @tty: the tty to send to
 * @ch: xchar to send
 *
 * Send a high priority character to the tty even if stopped.
 *
 * Locking: none for xchar method, write ordering for write method.
 */
int tty_send_xchar(struct tty_struct *tty, u8 ch)
{
        bool was_stopped = tty->flow.stopped;

        if (tty->ops->send_xchar) {
                down_read(&tty->termios_rwsem);
                tty->ops->send_xchar(tty, ch);
                up_read(&tty->termios_rwsem);
                return 0;
        }

        if (tty_write_lock(tty, false) < 0)
                return -ERESTARTSYS;

        down_read(&tty->termios_rwsem);
        if (was_stopped)
                start_tty(tty);
        tty->ops->write(tty, &ch, 1);
        if (was_stopped)
                stop_tty(tty);
        up_read(&tty->termios_rwsem);
        tty_write_unlock(tty);
        return 0;
}

/**
 * pty_line_name - generate name for a pty
 * @driver: the tty driver in use
 * @index: the minor number
 * @p: output buffer of at least 6 bytes
 *
 * Generate a name from a @driver reference and write it to the output buffer
 * @p.
 *
 * Locking: None
 */
static void pty_line_name(struct tty_driver *driver, int index, char *p)
{
        static const char ptychar[] = "pqrstuvwxyzabcde";
        int i = index + driver->name_base;
        /* ->name is initialized to "ttyp", but "tty" is expected */
        sprintf(p, "%s%c%x",
                driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name,
                ptychar[i >> 4 & 0xf], i & 0xf);
}

/**
 * tty_line_name - generate name for a tty
 * @driver: the tty driver in use
 * @index: the minor number
 * @p: output buffer of at least 7 bytes
 *
 * Generate a name from a @driver reference and write it to the output buffer
 * @p.
 *
 * Locking: None
 */
static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p)
{
        if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE)
                return sprintf(p, "%s", driver->name);
        else
                return sprintf(p, "%s%d", driver->name,
                               index + driver->name_base);
}

/**
 * tty_driver_lookup_tty() - find an existing tty, if any
 * @driver: the driver for the tty
 * @file: file object
 * @idx: the minor number
 *
 * Return: the tty, if found. If not found, return %NULL or ERR_PTR() if the
 * driver lookup() method returns an error.
 *
 * Locking: tty_mutex must be held. If the tty is found, bump the tty kref.
 */
static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
                struct file *file, int idx)
{
        struct tty_struct *tty;

        if (driver->ops->lookup) {
                if (!file)
                        tty = ERR_PTR(-EIO);
                else
                        tty = driver->ops->lookup(driver, file, idx);
        } else {
                if (idx >= driver->num)
                        return ERR_PTR(-EINVAL);
                tty = driver->ttys[idx];
        }
        if (!IS_ERR(tty))
                tty_kref_get(tty);
        return tty;
}

/**
 * tty_init_termios - helper for termios setup
 * @tty: the tty to set up
 *
 * Initialise the termios structure for this tty. This runs under the
 * %tty_mutex currently so we can be relaxed about ordering.
 */
void tty_init_termios(struct tty_struct *tty)
{
        struct ktermios *tp;
        int idx = tty->index;

        if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
                tty->termios = tty->driver->init_termios;
        else {
                /* Check for lazy saved data */
                tp = tty->driver->termios[idx];
                if (tp != NULL) {
                        tty->termios = *tp;
                        tty->termios.c_line  = tty->driver->init_termios.c_line;
                } else
                        tty->termios = tty->driver->init_termios;
        }
        /* Compatibility until drivers always set this */
        tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios);
        tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios);
}
EXPORT_SYMBOL_GPL(tty_init_termios);

/**
 * tty_standard_install - usual tty->ops->install
 * @driver: the driver for the tty
 * @tty: the tty
 *
 * If the @driver overrides @tty->ops->install, it still can call this function
 * to perform the standard install operations.
 */
int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty)
{
        tty_init_termios(tty);
        tty_driver_kref_get(driver);
        tty->count++;
        driver->ttys[tty->index] = tty;
        return 0;
}
EXPORT_SYMBOL_GPL(tty_standard_install);

/**
 * tty_driver_install_tty() - install a tty entry in the driver
 * @driver: the driver for the tty
 * @tty: the tty
 *
 * Install a tty object into the driver tables. The @tty->index field will be
 * set by the time this is called. This method is responsible for ensuring any
 * need additional structures are allocated and configured.
 *
 * Locking: tty_mutex for now
 */
static int tty_driver_install_tty(struct tty_driver *driver,
                                                struct tty_struct *tty)
{
        return driver->ops->install ? driver->ops->install(driver, tty) :
                tty_standard_install(driver, tty);
}

/**
 * tty_driver_remove_tty() - remove a tty from the driver tables
 * @driver: the driver for the tty
 * @tty: tty to remove
 *
 * Remove a tty object from the driver tables. The tty->index field will be set
 * by the time this is called.
 *
 * Locking: tty_mutex for now
 */
static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
{
        if (driver->ops->remove)
                driver->ops->remove(driver, tty);
        else
                driver->ttys[tty->index] = NULL;
}

/**
 * tty_reopen() - fast re-open of an open tty
 * @tty: the tty to open
 *
 * Re-opens on master ptys are not allowed and return -%EIO.
 *
 * Locking: Caller must hold tty_lock
 * Return: 0 on success, -errno on error.
 */
static int tty_reopen(struct tty_struct *tty)
{
        struct tty_driver *driver = tty->driver;
        struct tty_ldisc *ld;
        int retval = 0;

        if (driver->type == TTY_DRIVER_TYPE_PTY &&
            driver->subtype == PTY_TYPE_MASTER)
                return -EIO;

        if (!tty->count)
                return -EAGAIN;

        if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
                return -EBUSY;

        ld = tty_ldisc_ref_wait(tty);
        if (ld) {
                tty_ldisc_deref(ld);
        } else {
                retval = tty_ldisc_lock(tty, 5 * HZ);
                if (retval)
                        return retval;

                if (!tty->ldisc)
                        retval = tty_ldisc_reinit(tty, tty->termios.c_line);
                tty_ldisc_unlock(tty);
        }

        if (retval == 0)
                tty->count++;

        return retval;
}

/**
 * tty_init_dev - initialise a tty device
 * @driver: tty driver we are opening a device on
 * @idx: device index
 *
 * Prepare a tty device. This may not be a "new" clean device but could also be
 * an active device. The pty drivers require special handling because of this.
 *
 * Locking:
 *        The function is called under the tty_mutex, which protects us from the
 *        tty struct or driver itself going away.
 *
 * On exit the tty device has the line discipline attached and a reference
 * count of 1. If a pair was created for pty/tty use and the other was a pty
 * master then it too has a reference count of 1.
 *
 * WSH 06/09/97: Rewritten to remove races and properly clean up after a failed
 * open. The new code protects the open with a mutex, so it's really quite
 * straightforward. The mutex locking can probably be relaxed for the (most
 * common) case of reopening a tty.
 *
 * Return: new tty structure
 */
struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
{
        struct tty_struct *tty;
        int retval;

        /*
         * First time open is complex, especially for PTY devices.
         * This code guarantees that either everything succeeds and the
         * TTY is ready for operation, or else the table slots are vacated
         * and the allocated memory released.  (Except that the termios
         * may be retained.)
         */

        if (!try_module_get(driver->owner))
                return ERR_PTR(-ENODEV);

        tty = alloc_tty_struct(driver, idx);
        if (!tty) {
                retval = -ENOMEM;
                goto err_module_put;
        }

        tty_lock(tty);
        retval = tty_driver_install_tty(driver, tty);
        if (retval < 0)
                goto err_free_tty;

        if (!tty->port)
                tty->port = driver->ports[idx];

        if (WARN_RATELIMIT(!tty->port,
                        "%s: %s driver does not set tty->port. This would crash the kernel. Fix the driver!\n",
                        __func__, tty->driver->name)) {
                retval = -EINVAL;
                goto err_release_lock;
        }

        retval = tty_ldisc_lock(tty, 5 * HZ);
        if (retval)
                goto err_release_lock;
        tty->port->itty = tty;

        /*
         * Structures all installed ... call the ldisc open routines.
         * If we fail here just call release_tty to clean up.  No need
         * to decrement the use counts, as release_tty doesn't care.
         */
        retval = tty_ldisc_setup(tty, tty->link);
        if (retval)
                goto err_release_tty;
        tty_ldisc_unlock(tty);
        /* Return the tty locked so that it cannot vanish under the caller */
        return tty;

err_free_tty:
        tty_unlock(tty);
        free_tty_struct(tty);
err_module_put:
        module_put(driver->owner);
        return ERR_PTR(retval);

        /* call the tty release_tty routine to clean out this slot */
err_release_tty:
        tty_ldisc_unlock(tty);
        tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n",
                             retval, idx);
err_release_lock:
        tty_unlock(tty);
        release_tty(tty, idx);
        return ERR_PTR(retval);
}

/**
 * tty_save_termios() - save tty termios data in driver table
 * @tty: tty whose termios data to save
 *
 * Locking: Caller guarantees serialisation with tty_init_termios().
 */
void tty_save_termios(struct tty_struct *tty)
{
        struct ktermios *tp;
        int idx = tty->index;

        /* If the port is going to reset then it has no termios to save */
        if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
                return;

        /* Stash the termios data */
        tp = tty->driver->termios[idx];
        if (tp == NULL) {
                tp = kmalloc(sizeof(*tp), GFP_KERNEL);
                if (tp == NULL)
                        return;
                tty->driver->termios[idx] = tp;
        }
        *tp = tty->termios;
}
EXPORT_SYMBOL_GPL(tty_save_termios);

/**
 * tty_flush_works - flush all works of a tty/pty pair
 * @tty: tty device to flush works for (or either end of a pty pair)
 *
 * Sync flush all works belonging to @tty (and the 'other' tty).
 */
static void tty_flush_works(struct tty_struct *tty)
{
        flush_work(&tty->SAK_work);
        flush_work(&tty->hangup_work);
        if (tty->link) {
                flush_work(&tty->link->SAK_work);
                flush_work(&tty->link->hangup_work);
        }
}

/**
 * release_one_tty - release tty structure memory
 * @work: work of tty we are obliterating
 *
 * Releases memory associated with a tty structure, and clears out the
 * driver table slots. This function is called when a device is no longer
 * in use. It also gets called when setup of a device fails.
 *
 * Locking:
 *        takes the file list lock internally when working on the list of ttys
 *        that the driver keeps.
 *
 * This method gets called from a work queue so that the driver private
 * cleanup ops can sleep (needed for USB at least)
 */
static void release_one_tty(struct work_struct *work)
{
        struct tty_struct *tty =
                container_of(work, struct tty_struct, hangup_work);
        struct tty_driver *driver = tty->driver;
        struct module *owner = driver->owner;

        if (tty->ops->cleanup)
                tty->ops->cleanup(tty);

        tty_driver_kref_put(driver);
        module_put(owner);

        spin_lock(&tty->files_lock);
        list_del_init(&tty->tty_files);
        spin_unlock(&tty->files_lock);

        put_pid(tty->ctrl.pgrp);
        put_pid(tty->ctrl.session);
        free_tty_struct(tty);
}

static void queue_release_one_tty(struct kref *kref)
{
        struct tty_struct *tty = container_of(kref, struct tty_struct, kref);

        /* The hangup queue is now free so we can reuse it rather than
         *  waste a chunk of memory for each port.
         */
        INIT_WORK(&tty->hangup_work, release_one_tty);
        schedule_work(&tty->hangup_work);
}

/**
 * tty_kref_put - release a tty kref
 * @tty: tty device
 *
 * Release a reference to the @tty device and if need be let the kref layer
 * destruct the object for us.
 */
void tty_kref_put(struct tty_struct *tty)
{
        if (tty)
                kref_put(&tty->kref, queue_release_one_tty);
}
EXPORT_SYMBOL(tty_kref_put);

/**
 * release_tty - release tty structure memory
 * @tty: tty device release
 * @idx: index of the tty device release
 *
 * Release both @tty and a possible linked partner (think pty pair),
 * and decrement the refcount of the backing module.
 *
 * Locking:
 *        tty_mutex
 *        takes the file list lock internally when working on the list of ttys
 *        that the driver keeps.
 */
static void release_tty(struct tty_struct *tty, int idx)
{
        /* This should always be true but check for the moment */
        WARN_ON(tty->index != idx);
        WARN_ON(!mutex_is_locked(&tty_mutex));
        if (tty->ops->shutdown)
                tty->ops->shutdown(tty);
        tty_save_termios(tty);
        tty_driver_remove_tty(tty->driver, tty);
        if (tty->port)
                tty->port->itty = NULL;
        if (tty->link)
                tty->link->port->itty = NULL;
        if (tty->port)
                tty_buffer_cancel_work(tty->port);
        if (tty->link)
                tty_buffer_cancel_work(tty->link->port);

        tty_kref_put(tty->link);
        tty_kref_put(tty);
}

/**
 * tty_release_checks - check a tty before real release
 * @tty: tty to check
 * @idx: index of the tty
 *
 * Performs some paranoid checking before true release of the @tty. This is a
 * no-op unless %TTY_PARANOIA_CHECK is defined.
 */
static int tty_release_checks(struct tty_struct *tty, int idx)
{
#ifdef TTY_PARANOIA_CHECK
        if (idx < 0 || idx >= tty->driver->num) {
                tty_debug(tty, "bad idx %d\n", idx);
                return -1;
        }

        /* not much to check for devpts */
        if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)
                return 0;

        if (tty != tty->driver->ttys[idx]) {
                tty_debug(tty, "bad driver table[%d] = %p\n",
                          idx, tty->driver->ttys[idx]);
                return -1;
        }
        if (tty->driver->other) {
                struct tty_struct *o_tty = tty->link;

                if (o_tty != tty->driver->other->ttys[idx]) {
                        tty_debug(tty, "bad other table[%d] = %p\n",
                                  idx, tty->driver->other->ttys[idx]);
                        return -1;
                }
                if (o_tty->link != tty) {
                        tty_debug(tty, "bad link = %p\n", o_tty->link);
                        return -1;
                }
        }
#endif
        return 0;
}

/**
 * tty_kclose - closes tty opened by tty_kopen
 * @tty: tty device
 *
 * Performs the final steps to release and free a tty device. It is the same as
 * tty_release_struct() except that it also resets %TTY_PORT_KOPENED flag on
 * @tty->port.
 */
void tty_kclose(struct tty_struct *tty)
{
        /*
         * Ask the line discipline code to release its structures
         */
        tty_ldisc_release(tty);

        /* Wait for pending work before tty destruction commences */
        tty_flush_works(tty);

        tty_debug_hangup(tty, "freeing structure\n");
        /*
         * The release_tty function takes care of the details of clearing
         * the slots and preserving the termios structure.
         */
        mutex_lock(&tty_mutex);
        tty_port_set_kopened(tty->port, 0);
        release_tty(tty, tty->index);
        mutex_unlock(&tty_mutex);
}
EXPORT_SYMBOL_GPL(tty_kclose);

/**
 * tty_release_struct - release a tty struct
 * @tty: tty device
 * @idx: index of the tty
 *
 * Performs the final steps to release and free a tty device. It is roughly the
 * reverse of tty_init_dev().
 */
void tty_release_struct(struct tty_struct *tty, int idx)
{
        /*
         * Ask the line discipline code to release its structures
         */
        tty_ldisc_release(tty);

        /* Wait for pending work before tty destruction commmences */
        tty_flush_works(tty);

        tty_debug_hangup(tty, "freeing structure\n");
        /*
         * The release_tty function takes care of the details of clearing
         * the slots and preserving the termios structure.
         */
        mutex_lock(&tty_mutex);
        release_tty(tty, idx);
        mutex_unlock(&tty_mutex);
}
EXPORT_SYMBOL_GPL(tty_release_struct);

/**
 * tty_release - vfs callback for close
 * @inode: inode of tty
 * @filp: file pointer for handle to tty
 *
 * Called the last time each file handle is closed that references this tty.
 * There may however be several such references.
 *
 * Locking:
 *        Takes BKL. See tty_release_dev().
 *
 * Even releasing the tty structures is a tricky business. We have to be very
 * careful that the structures are all released at the same time, as interrupts
 * might otherwise get the wrong pointers.
 *
 * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
 * lead to double frees or releasing memory still in use.
 */
int tty_release(struct inode *inode, struct file *filp)
{
        struct tty_struct *tty = file_tty(filp);
        struct tty_struct *o_tty = NULL;
        int        do_sleep, final;
        int        idx;
        long        timeout = 0;
        int        once = 1;

        if (tty_paranoia_check(tty, inode, __func__))
                return 0;

        tty_lock(tty);
        check_tty_count(tty, __func__);

        __tty_fasync(-1, filp, 0);

        idx = tty->index;
        if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
            tty->driver->subtype == PTY_TYPE_MASTER)
                o_tty = tty->link;

        if (tty_release_checks(tty, idx)) {
                tty_unlock(tty);
                return 0;
        }

        tty_debug_hangup(tty, "releasing (count=%d)\n", tty->count);

        if (tty->ops->close)
                tty->ops->close(tty, filp);

        /* If tty is pty master, lock the slave pty (stable lock order) */
        tty_lock_slave(o_tty);

        /*
         * Sanity check: if tty->count is going to zero, there shouldn't be
         * any waiters on tty->read_wait or tty->write_wait.  We test the
         * wait queues and kick everyone out _before_ actually starting to
         * close.  This ensures that we won't block while releasing the tty
         * structure.
         *
         * The test for the o_tty closing is necessary, since the master and
         * slave sides may close in any order.  If the slave side closes out
         * first, its count will be one, since the master side holds an open.
         * Thus this test wouldn't be triggered at the time the slave closed,
         * so we do it now.
         */
        while (1) {
                do_sleep = 0;

                if (tty->count <= 1) {
                        if (waitqueue_active(&tty->read_wait)) {
                                wake_up_poll(&tty->read_wait, EPOLLIN);
                                do_sleep++;
                        }
                        if (waitqueue_active(&tty->write_wait)) {
                                wake_up_poll(&tty->write_wait, EPOLLOUT);
                                do_sleep++;
                        }
                }
                if (o_tty && o_tty->count <= 1) {
                        if (waitqueue_active(&o_tty->read_wait)) {
                                wake_up_poll(&o_tty->read_wait, EPOLLIN);
                                do_sleep++;
                        }
                        if (waitqueue_active(&o_tty->write_wait)) {
                                wake_up_poll(&o_tty->write_wait, EPOLLOUT);
                                do_sleep++;
                        }
                }
                if (!do_sleep)
                        break;

                if (once) {
                        once = 0;
                        tty_warn(tty, "read/write wait queue active!\n");
                }
                schedule_timeout_killable(timeout);
                if (timeout < 120 * HZ)
                        timeout = 2 * timeout + 1;
                else
                        timeout = MAX_SCHEDULE_TIMEOUT;
        }

        if (o_tty) {
                if (--o_tty->count < 0) {
                        tty_warn(tty, "bad slave count (%d)\n", o_tty->count);
                        o_tty->count = 0;
                }
        }
        if (--tty->count < 0) {
                tty_warn(tty, "bad tty->count (%d)\n", tty->count);
                tty->count = 0;
        }

        /*
         * We've decremented tty->count, so we need to remove this file
         * descriptor off the tty->tty_files list; this serves two
         * purposes:
         *  - check_tty_count sees the correct number of file descriptors
         *    associated with this tty.
         *  - do_tty_hangup no longer sees this file descriptor as
         *    something that needs to be handled for hangups.
         */
        tty_del_file(filp);

        /*
         * Perform some housekeeping before deciding whether to return.
         *
         * If _either_ side is closing, make sure there aren't any
         * processes that still think tty or o_tty is their controlling
         * tty.
         */
        if (!tty->count) {
                read_lock(&tasklist_lock);
                session_clear_tty(tty->ctrl.session);
                if (o_tty)
                        session_clear_tty(o_tty->ctrl.session);
                read_unlock(&tasklist_lock);
        }

        /* check whether both sides are closing ... */
        final = !tty->count && !(o_tty && o_tty->count);

        tty_unlock_slave(o_tty);
        tty_unlock(tty);

        /* At this point, the tty->count == 0 should ensure a dead tty
         * cannot be re-opened by a racing opener.
         */

        if (!final)
                return 0;

        tty_debug_hangup(tty, "final close\n");

        tty_release_struct(tty, idx);
        return 0;
}

/**
 * tty_open_current_tty - get locked tty of current task
 * @device: device number
 * @filp: file pointer to tty
 * @return: locked tty of the current task iff @device is /dev/tty
 *
 * Performs a re-open of the current task's controlling tty.
 *
 * We cannot return driver and index like for the other nodes because devpts
 * will not work then. It expects inodes to be from devpts FS.
 */
static struct tty_struct *tty_open_current_tty(dev_t device, struct file *filp)
{
        struct tty_struct *tty;
        int retval;

        if (device != MKDEV(TTYAUX_MAJOR, 0))
                return NULL;

        tty = get_current_tty();
        if (!tty)
                return ERR_PTR(-ENXIO);

        filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
        /* noctty = 1; */
        tty_lock(tty);
        tty_kref_put(tty);        /* safe to drop the kref now */

        retval = tty_reopen(tty);
        if (retval < 0) {
                tty_unlock(tty);
                tty = ERR_PTR(retval);
        }
        return tty;
}

/**
 * tty_lookup_driver - lookup a tty driver for a given device file
 * @device: device number
 * @filp: file pointer to tty
 * @index: index for the device in the @return driver
 *
 * If returned value is not erroneous, the caller is responsible to decrement
 * the refcount by tty_driver_kref_put().
 *
 * Locking: %tty_mutex protects get_tty_driver()
 *
 * Return: driver for this inode (with increased refcount)
 */
static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
                int *index)
{
        struct tty_driver *driver = NULL;

        switch (device) {
#ifdef CONFIG_VT
        case MKDEV(TTY_MAJOR, 0): {
                extern struct tty_driver *console_driver;

                driver = tty_driver_kref_get(console_driver);
                *index = fg_console;
                break;
        }
#endif
        case MKDEV(TTYAUX_MAJOR, 1): {
                struct tty_driver *console_driver = console_device(index);

                if (console_driver) {
                        driver = tty_driver_kref_get(console_driver);
                        if (driver && filp) {
                                /* Don't let /dev/console block */
                                filp->f_flags |= O_NONBLOCK;
                                break;
                        }
                }
                if (driver)
                        tty_driver_kref_put(driver);
                return ERR_PTR(-ENODEV);
        }
        default:
                driver = get_tty_driver(device, index);
                if (!driver)
                        return ERR_PTR(-ENODEV);
                break;
        }
        return driver;
}

static struct tty_struct *tty_kopen(dev_t device, int shared)
{
        struct tty_struct *tty;
        struct tty_driver *driver;
        int index = -1;

        mutex_lock(&tty_mutex);
        driver = tty_lookup_driver(device, NULL, &index);
        if (IS_ERR(driver)) {
                mutex_unlock(&tty_mutex);
                return ERR_CAST(driver);
        }

        /* check whether we're reopening an existing tty */
        tty = tty_driver_lookup_tty(driver, NULL, index);
        if (IS_ERR(tty) || shared)
                goto out;

        if (tty) {
                /* drop kref from tty_driver_lookup_tty() */
                tty_kref_put(tty);
                tty = ERR_PTR(-EBUSY);
        } else { /* tty_init_dev returns tty with the tty_lock held */
                tty = tty_init_dev(driver, index);
                if (IS_ERR(tty))
                        goto out;
                tty_port_set_kopened(tty->port, 1);
        }
out:
        mutex_unlock(&tty_mutex);
        tty_driver_kref_put(driver);
        return tty;
}

/**
 * tty_kopen_exclusive - open a tty device for kernel
 * @device: dev_t of device to open
 *
 * Opens tty exclusively for kernel. Performs the driver lookup, makes sure
 * it's not already opened and performs the first-time tty initialization.
 *
 * Claims the global %tty_mutex to serialize:
 *  * concurrent first-time tty initialization
 *  * concurrent tty driver removal w/ lookup
 *  * concurrent tty removal from driver table
 *
 * Return: the locked initialized &tty_struct
 */
struct tty_struct *tty_kopen_exclusive(dev_t device)
{
        return tty_kopen(device, 0);
}
EXPORT_SYMBOL_GPL(tty_kopen_exclusive);

/**
 * tty_kopen_shared - open a tty device for shared in-kernel use
 * @device: dev_t of device to open
 *
 * Opens an already existing tty for in-kernel use. Compared to
 * tty_kopen_exclusive() above it doesn't ensure to be the only user.
 *
 * Locking: identical to tty_kopen() above.
 */
struct tty_struct *tty_kopen_shared(dev_t device)
{
        return tty_kopen(device, 1);
}
EXPORT_SYMBOL_GPL(tty_kopen_shared);

/**
 * tty_open_by_driver - open a tty device
 * @device: dev_t of device to open
 * @filp: file pointer to tty
 *
 * Performs the driver lookup, checks for a reopen, or otherwise performs the
 * first-time tty initialization.
 *
 *
 * Claims the global tty_mutex to serialize:
 *  * concurrent first-time tty initialization
 *  * concurrent tty driver removal w/ lookup
 *  * concurrent tty removal from driver table
 *
 * Return: the locked initialized or re-opened &tty_struct
 */
static struct tty_struct *tty_open_by_driver(dev_t device,
                                             struct file *filp)
{
        struct tty_struct *tty;
        struct tty_driver *driver = NULL;
        int index = -1;
        int retval;

        mutex_lock(&tty_mutex);
        driver = tty_lookup_driver(device, filp, &index);
        if (IS_ERR(driver)) {
                mutex_unlock(&tty_mutex);
                return ERR_CAST(driver);
        }

        /* check whether we're reopening an existing tty */
        tty = tty_driver_lookup_tty(driver, filp, index);
        if (IS_ERR(tty)) {
                mutex_unlock(&tty_mutex);
                goto out;
        }

        if (tty) {
                if (tty_port_kopened(tty->port)) {
                        tty_kref_put(tty);
                        mutex_unlock(&tty_mutex);
                        tty = ERR_PTR(-EBUSY);
                        goto out;
                }
                mutex_unlock(&tty_mutex);
                retval = tty_lock_interruptible(tty);
                tty_kref_put(tty);  /* drop kref from tty_driver_lookup_tty() */
                if (retval) {
                        if (retval == -EINTR)
                                retval = -ERESTARTSYS;
                        tty = ERR_PTR(retval);
                        goto out;
                }
                retval = tty_reopen(tty);
                if (retval < 0) {
                        tty_unlock(tty);
                        tty = ERR_PTR(retval);
                }
        } else { /* Returns with the tty_lock held for now */
                tty = tty_init_dev(driver, index);
                mutex_unlock(&tty_mutex);
        }
out:
        tty_driver_kref_put(driver);
        return tty;
}

/**
 * tty_open - open a tty device
 * @inode: inode of device file
 * @filp: file pointer to tty
 *
 * tty_open() and tty_release() keep up the tty count that contains the number
 * of opens done on a tty. We cannot use the inode-count, as different inodes
 * might point to the same tty.
 *
 * Open-counting is needed for pty masters, as well as for keeping track of
 * serial lines: DTR is dropped when the last close happens.
 * (This is not done solely through tty->count, now.  - Ted 1/27/92)
 *
 * The termios state of a pty is reset on the first open so that settings don't
 * persist across reuse.
 *
 * Locking:
 *  * %tty_mutex protects tty, tty_lookup_driver() and tty_init_dev().
 *  * @tty->count should protect the rest.
 *  * ->siglock protects ->signal/->sighand
 *
 * Note: the tty_unlock/lock cases without a ref are only safe due to %tty_mutex
 */
static int tty_open(struct inode *inode, struct file *filp)
{
        struct tty_struct *tty;
        int noctty, retval;
        dev_t device = inode->i_rdev;
        unsigned saved_flags = filp->f_flags;

        nonseekable_open(inode, filp);

retry_open:
        retval = tty_alloc_file(filp);
        if (retval)
                return -ENOMEM;

        tty = tty_open_current_tty(device, filp);
        if (!tty)
                tty = tty_open_by_driver(device, filp);

        if (IS_ERR(tty)) {
                tty_free_file(filp);
                retval = PTR_ERR(tty);
                if (retval != -EAGAIN || signal_pending(current))
                        return retval;
                schedule();
                goto retry_open;
        }

        tty_add_file(tty, filp);

        check_tty_count(tty, __func__);
        tty_debug_hangup(tty, "opening (count=%d)\n", tty->count);

        if (tty->ops->open)
                retval = tty->ops->open(tty, filp);
        else
                retval = -ENODEV;
        filp->f_flags = saved_flags;

        if (retval) {
                tty_debug_hangup(tty, "open error %d, releasing\n", retval);

                tty_unlock(tty); /* need to call tty_release without BTM */
                tty_release(inode, filp);
                if (retval != -ERESTARTSYS)
                        return retval;

                if (signal_pending(current))
                        return retval;

                schedule();
                /*
                 * Need to reset f_op in case a hangup happened.
                 */
                if (tty_hung_up_p(filp))
                        filp->f_op = &tty_fops;
                goto retry_open;
        }
        clear_bit(TTY_HUPPED, &tty->flags);

        noctty = (filp->f_flags & O_NOCTTY) ||
                 (IS_ENABLED(CONFIG_VT) && device == MKDEV(TTY_MAJOR, 0)) ||
                 device == MKDEV(TTYAUX_MAJOR, 1) ||
                 (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
                  tty->driver->subtype == PTY_TYPE_MASTER);
        if (!noctty)
                tty_open_proc_set_tty(filp, tty);
        tty_unlock(tty);
        return 0;
}


/**
 * tty_poll - check tty status
 * @filp: file being polled
 * @wait: poll wait structures to update
 *
 * Call the line discipline polling method to obtain the poll status of the
 * device.
 *
 * Locking: locks called line discipline but ldisc poll method may be
 * re-entered freely by other callers.
 */
static __poll_t tty_poll(struct file *filp, poll_table *wait)
{
        struct tty_struct *tty = file_tty(filp);
        struct tty_ldisc *ld;
        __poll_t ret = 0;

        if (tty_paranoia_check(tty, file_inode(filp), "tty_poll"))
                return 0;

        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return hung_up_tty_poll(filp, wait);
        if (ld->ops->poll)
                ret = ld->ops->poll(tty, filp, wait);
        tty_ldisc_deref(ld);
        return ret;
}

static int __tty_fasync(int fd, struct file *filp, int on)
{
        struct tty_struct *tty = file_tty(filp);
        unsigned long flags;
        int retval = 0;

        if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync"))
                goto out;

        retval = fasync_helper(fd, filp, on, &tty->fasync);
        if (retval <= 0)
                goto out;

        if (on) {
                enum pid_type type;
                struct pid *pid;

                spin_lock_irqsave(&tty->ctrl.lock, flags);
                if (tty->ctrl.pgrp) {
                        pid = tty->ctrl.pgrp;
                        type = PIDTYPE_PGID;
                } else {
                        pid = task_pid(current);
                        type = PIDTYPE_TGID;
                }
                get_pid(pid);
                spin_unlock_irqrestore(&tty->ctrl.lock, flags);
                __f_setown(filp, pid, type, 0);
                put_pid(pid);
                retval = 0;
        }
out:
        return retval;
}

static int tty_fasync(int fd, struct file *filp, int on)
{
        struct tty_struct *tty = file_tty(filp);
        int retval = -ENOTTY;

        tty_lock(tty);
        if (!tty_hung_up_p(filp))
                retval = __tty_fasync(fd, filp, on);
        tty_unlock(tty);

        return retval;
}

static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI);
/**
 * tiocsti - fake input character
 * @tty: tty to fake input into
 * @p: pointer to character
 *
 * Fake input to a tty device. Does the necessary locking and input management.
 *
 * FIXME: does not honour flow control ??
 *
 * Locking:
 *  * Called functions take tty_ldiscs_lock
 *  * current->signal->tty check is safe without locks
 */
static int tiocsti(struct tty_struct *tty, u8 __user *p)
{
        struct tty_ldisc *ld;
        u8 ch;

        if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN))
                return -EIO;

        if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
                return -EPERM;
        if (get_user(ch, p))
                return -EFAULT;
        tty_audit_tiocsti(tty, ch);
        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return -EIO;
        tty_buffer_lock_exclusive(tty->port);
        if (ld->ops->receive_buf)
                ld->ops->receive_buf(tty, &ch, NULL, 1);
        tty_buffer_unlock_exclusive(tty->port);
        tty_ldisc_deref(ld);
        return 0;
}

/**
 * tiocgwinsz - implement window query ioctl
 * @tty: tty
 * @arg: user buffer for result
 *
 * Copies the kernel idea of the window size into the user buffer.
 *
 * Locking: @tty->winsize_mutex is taken to ensure the winsize data is
 * consistent.
 */
static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
{
        int err;

        mutex_lock(&tty->winsize_mutex);
        err = copy_to_user(arg, &tty->winsize, sizeof(*arg));
        mutex_unlock(&tty->winsize_mutex);

        return err ? -EFAULT : 0;
}

/**
 * tty_do_resize - resize event
 * @tty: tty being resized
 * @ws: new dimensions
 *
 * Update the termios variables and send the necessary signals to peform a
 * terminal resize correctly.
 */
int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
{
        struct pid *pgrp;

        /* Lock the tty */
        mutex_lock(&tty->winsize_mutex);
        if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
                goto done;

        /* Signal the foreground process group */
        pgrp = tty_get_pgrp(tty);
        if (pgrp)
                kill_pgrp(pgrp, SIGWINCH, 1);
        put_pid(pgrp);

        tty->winsize = *ws;
done:
        mutex_unlock(&tty->winsize_mutex);
        return 0;
}
EXPORT_SYMBOL(tty_do_resize);

/**
 * tiocswinsz - implement window size set ioctl
 * @tty: tty side of tty
 * @arg: user buffer for result
 *
 * Copies the user idea of the window size to the kernel. Traditionally this is
 * just advisory information but for the Linux console it actually has driver
 * level meaning and triggers a VC resize.
 *
 * Locking:
 *        Driver dependent. The default do_resize method takes the tty termios
 *        mutex and ctrl.lock. The console takes its own lock then calls into the
 *        default method.
 */
static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
{
        struct winsize tmp_ws;

        if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
                return -EFAULT;

        if (tty->ops->resize)
                return tty->ops->resize(tty, &tmp_ws);
        else
                return tty_do_resize(tty, &tmp_ws);
}

/**
 * tioccons - allow admin to move logical console
 * @file: the file to become console
 *
 * Allow the administrator to move the redirected console device.
 *
 * Locking: uses redirect_lock to guard the redirect information
 */
static int tioccons(struct file *file)
{
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
        if (file->f_op->write_iter == redirected_tty_write) {
                struct file *f;

                spin_lock(&redirect_lock);
                f = redirect;
                redirect = NULL;
                spin_unlock(&redirect_lock);
                if (f)
                        fput(f);
                return 0;
        }
        if (file->f_op->write_iter != tty_write)
                return -ENOTTY;
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
        spin_lock(&redirect_lock);
        if (redirect) {
                spin_unlock(&redirect_lock);
                return -EBUSY;
        }
        redirect = get_file(file);
        spin_unlock(&redirect_lock);
        return 0;
}

/**
 * tiocsetd - set line discipline
 * @tty: tty device
 * @p: pointer to user data
 *
 * Set the line discipline according to user request.
 *
 * Locking: see tty_set_ldisc(), this function is just a helper
 */
static int tiocsetd(struct tty_struct *tty, int __user *p)
{
        int disc;
        int ret;

        if (get_user(disc, p))
                return -EFAULT;

        ret = tty_set_ldisc(tty, disc);

        return ret;
}

/**
 * tiocgetd - get line discipline
 * @tty: tty device
 * @p: pointer to user data
 *
 * Retrieves the line discipline id directly from the ldisc.
 *
 * Locking: waits for ldisc reference (in case the line discipline is changing
 * or the @tty is being hungup)
 */
static int tiocgetd(struct tty_struct *tty, int __user *p)
{
        struct tty_ldisc *ld;
        int ret;

        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return -EIO;
        ret = put_user(ld->ops->num, p);
        tty_ldisc_deref(ld);
        return ret;
}

/**
 * send_break - performed time break
 * @tty: device to break on
 * @duration: timeout in mS
 *
 * Perform a timed break on hardware that lacks its own driver level timed
 * break functionality.
 *
 * Locking:
 *        @tty->atomic_write_lock serializes
 */
static int send_break(struct tty_struct *tty, unsigned int duration)
{
        int retval;

        if (tty->ops->break_ctl == NULL)
                return 0;

        if (tty->driver->flags & TTY_DRIVER_HARDWARE_BREAK)
                return tty->ops->break_ctl(tty, duration);

        /* Do the work ourselves */
        if (tty_write_lock(tty, false) < 0)
                return -EINTR;

        retval = tty->ops->break_ctl(tty, -1);
        if (!retval) {
                msleep_interruptible(duration);
                retval = tty->ops->break_ctl(tty, 0);
        } else if (retval == -EOPNOTSUPP) {
                /* some drivers can tell only dynamically */
                retval = 0;
        }
        tty_write_unlock(tty);

        if (signal_pending(current))
                retval = -EINTR;

        return retval;
}

/**
 * tty_get_tiocm - get tiocm status register
 * @tty: tty device
 *
 * Obtain the modem status bits from the tty driver if the feature
 * is supported.
 */
int tty_get_tiocm(struct tty_struct *tty)
{
        int retval = -ENOTTY;

        if (tty->ops->tiocmget)
                retval = tty->ops->tiocmget(tty);

        return retval;
}
EXPORT_SYMBOL_GPL(tty_get_tiocm);

/**
 * tty_tiocmget - get modem status
 * @tty: tty device
 * @p: pointer to result
 *
 * Obtain the modem status bits from the tty driver if the feature is
 * supported. Return -%ENOTTY if it is not available.
 *
 * Locking: none (up to the driver)
 */
static int tty_tiocmget(struct tty_struct *tty, int __user *p)
{
        int retval;

        retval = tty_get_tiocm(tty);
        if (retval >= 0)
                retval = put_user(retval, p);

        return retval;
}

/**
 * tty_tiocmset - set modem status
 * @tty: tty device
 * @cmd: command - clear bits, set bits or set all
 * @p: pointer to desired bits
 *
 * Set the modem status bits from the tty driver if the feature
 * is supported. Return -%ENOTTY if it is not available.
 *
 * Locking: none (up to the driver)
 */
static int tty_tiocmset(struct tty_struct *tty, unsigned int cmd,
             unsigned __user *p)
{
        int retval;
        unsigned int set, clear, val;

        if (tty->ops->tiocmset == NULL)
                return -ENOTTY;

        retval = get_user(val, p);
        if (retval)
                return retval;
        set = clear = 0;
        switch (cmd) {
        case TIOCMBIS:
                set = val;
                break;
        case TIOCMBIC:
                clear = val;
                break;
        case TIOCMSET:
                set = val;
                clear = ~val;
                break;
        }
        set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
        clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
        return tty->ops->tiocmset(tty, set, clear);
}

/**
 * tty_get_icount - get tty statistics
 * @tty: tty device
 * @icount: output parameter
 *
 * Gets a copy of the @tty's icount statistics.
 *
 * Locking: none (up to the driver)
 */
int tty_get_icount(struct tty_struct *tty,
                   struct serial_icounter_struct *icount)
{
        memset(icount, 0, sizeof(*icount));

        if (tty->ops->get_icount)
                return tty->ops->get_icount(tty, icount);
        else
                return -ENOTTY;
}
EXPORT_SYMBOL_GPL(tty_get_icount);

static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
{
        struct serial_icounter_struct icount;
        int retval;

        retval = tty_get_icount(tty, &icount);
        if (retval != 0)
                return retval;

        if (copy_to_user(arg, &icount, sizeof(icount)))
                return -EFAULT;
        return 0;
}

static int tty_set_serial(struct tty_struct *tty, struct serial_struct *ss)
{
        char comm[TASK_COMM_LEN];
        int flags;

        flags = ss->flags & ASYNC_DEPRECATED;

        if (flags)
                pr_warn_ratelimited("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n",
                                __func__, get_task_comm(comm, current), flags);

        if (!tty->ops->set_serial)
                return -ENOTTY;

        return tty->ops->set_serial(tty, ss);
}

static int tty_tiocsserial(struct tty_struct *tty, struct serial_struct __user *ss)
{
        struct serial_struct v;

        if (copy_from_user(&v, ss, sizeof(*ss)))
                return -EFAULT;

        return tty_set_serial(tty, &v);
}

static int tty_tiocgserial(struct tty_struct *tty, struct serial_struct __user *ss)
{
        struct serial_struct v;
        int err;

        memset(&v, 0, sizeof(v));
        if (!tty->ops->get_serial)
                return -ENOTTY;
        err = tty->ops->get_serial(tty, &v);
        if (!err && copy_to_user(ss, &v, sizeof(v)))
                err = -EFAULT;
        return err;
}

/*
 * if pty, return the slave side (real_tty)
 * otherwise, return self
 */
static struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
{
        if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
            tty->driver->subtype == PTY_TYPE_MASTER)
                tty = tty->link;
        return tty;
}

/*
 * Split this up, as gcc can choke on it otherwise..
 */
long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct tty_struct *tty = file_tty(file);
        struct tty_struct *real_tty;
        void __user *p = (void __user *)arg;
        int retval;
        struct tty_ldisc *ld;

        if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
                return -EINVAL;

        real_tty = tty_pair_get_tty(tty);

        /*
         * Factor out some common prep work
         */
        switch (cmd) {
        case TIOCSETD:
        case TIOCSBRK:
        case TIOCCBRK:
        case TCSBRK:
        case TCSBRKP:
                retval = tty_check_change(tty);
                if (retval)
                        return retval;
                if (cmd != TIOCCBRK) {
                        tty_wait_until_sent(tty, 0);
                        if (signal_pending(current))
                                return -EINTR;
                }
                break;
        }

        /*
         *        Now do the stuff.
         */
        switch (cmd) {
        case TIOCSTI:
                return tiocsti(tty, p);
        case TIOCGWINSZ:
                return tiocgwinsz(real_tty, p);
        case TIOCSWINSZ:
                return tiocswinsz(real_tty, p);
        case TIOCCONS:
                return real_tty != tty ? -EINVAL : tioccons(file);
        case TIOCEXCL:
                set_bit(TTY_EXCLUSIVE, &tty->flags);
                return 0;
        case TIOCNXCL:
                clear_bit(TTY_EXCLUSIVE, &tty->flags);
                return 0;
        case TIOCGEXCL:
        {
                int excl = test_bit(TTY_EXCLUSIVE, &tty->flags);

                return put_user(excl, (int __user *)p);
        }
        case TIOCGETD:
                return tiocgetd(tty, p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        case TIOCVHANGUP:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                tty_vhangup(tty);
                return 0;
        case TIOCGDEV:
        {
                unsigned int ret = new_encode_dev(tty_devnum(real_tty));

                return put_user(ret, (unsigned int __user *)p);
        }
        /*
         * Break handling
         */
        case TIOCSBRK:        /* Turn break on, unconditionally */
                if (tty->ops->break_ctl)
                        return tty->ops->break_ctl(tty, -1);
                return 0;
        case TIOCCBRK:        /* Turn break off, unconditionally */
                if (tty->ops->break_ctl)
                        return tty->ops->break_ctl(tty, 0);
                return 0;
        case TCSBRK:   /* SVID version: non-zero arg --> no break */
                /* non-zero arg means wait for all output data
                 * to be sent (performed above) but don't send break.
                 * This is used by the tcdrain() termios function.
                 */
                if (!arg)
                        return send_break(tty, 250);
                return 0;
        case TCSBRKP:        /* support for POSIX tcsendbreak() */
                return send_break(tty, arg ? arg*100 : 250);

        case TIOCMGET:
                return tty_tiocmget(tty, p);
        case TIOCMSET:
        case TIOCMBIC:
        case TIOCMBIS:
                return tty_tiocmset(tty, cmd, p);
        case TIOCGICOUNT:
                return tty_tiocgicount(tty, p);
        case TCFLSH:
                switch (arg) {
                case TCIFLUSH:
                case TCIOFLUSH:
                /* flush tty buffer and allow ldisc to process ioctl */
                        tty_buffer_flush(tty, NULL);
                        break;
                }
                break;
        case TIOCSSERIAL:
                return tty_tiocsserial(tty, p);
        case TIOCGSERIAL:
                return tty_tiocgserial(tty, p);
        case TIOCGPTPEER:
                /* Special because the struct file is needed */
                return ptm_open_peer(file, tty, (int)arg);
        default:
                retval = tty_jobctrl_ioctl(tty, real_tty, file, cmd, arg);
                if (retval != -ENOIOCTLCMD)
                        return retval;
        }
        if (tty->ops->ioctl) {
                retval = tty->ops->ioctl(tty, cmd, arg);
                if (retval != -ENOIOCTLCMD)
                        return retval;
        }
        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return hung_up_tty_ioctl(file, cmd, arg);
        retval = -EINVAL;
        if (ld->ops->ioctl) {
                retval = ld->ops->ioctl(tty, cmd, arg);
                if (retval == -ENOIOCTLCMD)
                        retval = -ENOTTY;
        }
        tty_ldisc_deref(ld);
        return retval;
}

#ifdef CONFIG_COMPAT

struct serial_struct32 {
        compat_int_t    type;
        compat_int_t    line;
        compat_uint_t   port;
        compat_int_t    irq;
        compat_int_t    flags;
        compat_int_t    xmit_fifo_size;
        compat_int_t    custom_divisor;
        compat_int_t    baud_base;
        unsigned short  close_delay;
        char    io_type;
        char    reserved_char;
        compat_int_t    hub6;
        unsigned short  closing_wait; /* time to wait before closing */
        unsigned short  closing_wait2; /* no longer used... */
        compat_uint_t   iomem_base;
        unsigned short  iomem_reg_shift;
        unsigned int    port_high;
        /* compat_ulong_t  iomap_base FIXME */
        compat_int_t    reserved;
};

static int compat_tty_tiocsserial(struct tty_struct *tty,
                struct serial_struct32 __user *ss)
{
        struct serial_struct32 v32;
        struct serial_struct v;

        if (copy_from_user(&v32, ss, sizeof(*ss)))
                return -EFAULT;

        memcpy(&v, &v32, offsetof(struct serial_struct32, iomem_base));
        v.iomem_base = compat_ptr(v32.iomem_base);
        v.iomem_reg_shift = v32.iomem_reg_shift;
        v.port_high = v32.port_high;
        v.iomap_base = 0;

        return tty_set_serial(tty, &v);
}

static int compat_tty_tiocgserial(struct tty_struct *tty,
                        struct serial_struct32 __user *ss)
{
        struct serial_struct32 v32;
        struct serial_struct v;
        int err;

        memset(&v, 0, sizeof(v));
        memset(&v32, 0, sizeof(v32));

        if (!tty->ops->get_serial)
                return -ENOTTY;
        err = tty->ops->get_serial(tty, &v);
        if (!err) {
                memcpy(&v32, &v, offsetof(struct serial_struct32, iomem_base));
                v32.iomem_base = (unsigned long)v.iomem_base >> 32 ?
                        0xfffffff : ptr_to_compat(v.iomem_base);
                v32.iomem_reg_shift = v.iomem_reg_shift;
                v32.port_high = v.port_high;
                if (copy_to_user(ss, &v32, sizeof(v32)))
                        err = -EFAULT;
        }
        return err;
}
static long tty_compat_ioctl(struct file *file, unsigned int cmd,
                                unsigned long arg)
{
        struct tty_struct *tty = file_tty(file);
        struct tty_ldisc *ld;
        int retval = -ENOIOCTLCMD;

        switch (cmd) {
        case TIOCOUTQ:
        case TIOCSTI:
        case TIOCGWINSZ:
        case TIOCSWINSZ:
        case TIOCGEXCL:
        case TIOCGETD:
        case TIOCSETD:
        case TIOCGDEV:
        case TIOCMGET:
        case TIOCMSET:
        case TIOCMBIC:
        case TIOCMBIS:
        case TIOCGICOUNT:
        case TIOCGPGRP:
        case TIOCSPGRP:
        case TIOCGSID:
        case TIOCSERGETLSR:
        case TIOCGRS485:
        case TIOCSRS485:
#ifdef TIOCGETP
        case TIOCGETP:
        case TIOCSETP:
        case TIOCSETN:
#endif
#ifdef TIOCGETC
        case TIOCGETC:
        case TIOCSETC:
#endif
#ifdef TIOCGLTC
        case TIOCGLTC:
        case TIOCSLTC:
#endif
        case TCSETSF:
        case TCSETSW:
        case TCSETS:
        case TCGETS:
#ifdef TCGETS2
        case TCGETS2:
        case TCSETSF2:
        case TCSETSW2:
        case TCSETS2:
#endif
        case TCGETA:
        case TCSETAF:
        case TCSETAW:
        case TCSETA:
        case TIOCGLCKTRMIOS:
        case TIOCSLCKTRMIOS:
#ifdef TCGETX
        case TCGETX:
        case TCSETX:
        case TCSETXW:
        case TCSETXF:
#endif
        case TIOCGSOFTCAR:
        case TIOCSSOFTCAR:

        case PPPIOCGCHAN:
        case PPPIOCGUNIT:
                return tty_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
        case TIOCCONS:
        case TIOCEXCL:
        case TIOCNXCL:
        case TIOCVHANGUP:
        case TIOCSBRK:
        case TIOCCBRK:
        case TCSBRK:
        case TCSBRKP:
        case TCFLSH:
        case TIOCGPTPEER:
        case TIOCNOTTY:
        case TIOCSCTTY:
        case TCXONC:
        case TIOCMIWAIT:
        case TIOCSERCONFIG:
                return tty_ioctl(file, cmd, arg);
        }

        if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
                return -EINVAL;

        switch (cmd) {
        case TIOCSSERIAL:
                return compat_tty_tiocsserial(tty, compat_ptr(arg));
        case TIOCGSERIAL:
                return compat_tty_tiocgserial(tty, compat_ptr(arg));
        }
        if (tty->ops->compat_ioctl) {
                retval = tty->ops->compat_ioctl(tty, cmd, arg);
                if (retval != -ENOIOCTLCMD)
                        return retval;
        }

        ld = tty_ldisc_ref_wait(tty);
        if (!ld)
                return hung_up_tty_compat_ioctl(file, cmd, arg);
        if (ld->ops->compat_ioctl)
                retval = ld->ops->compat_ioctl(tty, cmd, arg);
        if (retval == -ENOIOCTLCMD && ld->ops->ioctl)
                retval = ld->ops->ioctl(tty, (unsigned long)compat_ptr(cmd),
                                arg);
        tty_ldisc_deref(ld);

        return retval;
}
#endif

static int this_tty(const void *t, struct file *file, unsigned fd)
{
        if (likely(file->f_op->read_iter != tty_read))
                return 0;
        return file_tty(file) != t ? 0 : fd + 1;
}

/*
 * This implements the "Secure Attention Key" ---  the idea is to
 * prevent trojan horses by killing all processes associated with this
 * tty when the user hits the "Secure Attention Key".  Required for
 * super-paranoid applications --- see the Orange Book for more details.
 *
 * This code could be nicer; ideally it should send a HUP, wait a few
 * seconds, then send a INT, and then a KILL signal.  But you then
 * have to coordinate with the init process, since all processes associated
 * with the current tty must be dead before the new getty is allowed
 * to spawn.
 *
 * Now, if it would be correct ;-/ The current code has a nasty hole -
 * it doesn't catch files in flight. We may send the descriptor to ourselves
 * via AF_UNIX socket, close it and later fetch from socket. FIXME.
 *
 * Nasty bug: do_SAK is being called in interrupt context.  This can
 * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
 */
void __do_SAK(struct tty_struct *tty)
{
        struct task_struct *g, *p;
        struct pid *session;
        int i;
        unsigned long flags;

        spin_lock_irqsave(&tty->ctrl.lock, flags);
        session = get_pid(tty->ctrl.session);
        spin_unlock_irqrestore(&tty->ctrl.lock, flags);

        tty_ldisc_flush(tty);

        tty_driver_flush_buffer(tty);

        read_lock(&tasklist_lock);
        /* Kill the entire session */
        do_each_pid_task(session, PIDTYPE_SID, p) {
                tty_notice(tty, "SAK: killed process %d (%s): by session\n",
                           task_pid_nr(p), p->comm);
                group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_SID);
        } while_each_pid_task(session, PIDTYPE_SID, p);

        /* Now kill any processes that happen to have the tty open */
        for_each_process_thread(g, p) {
                if (p->signal->tty == tty) {
                        tty_notice(tty, "SAK: killed process %d (%s): by controlling tty\n",
                                   task_pid_nr(p), p->comm);
                        group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p,
                                        PIDTYPE_SID);
                        continue;
                }
                task_lock(p);
                i = iterate_fd(p->files, 0, this_tty, tty);
                if (i != 0) {
                        tty_notice(tty, "SAK: killed process %d (%s): by fd#%d\n",
                                   task_pid_nr(p), p->comm, i - 1);
                        group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p,
                                        PIDTYPE_SID);
                }
                task_unlock(p);
        }
        read_unlock(&tasklist_lock);
        put_pid(session);
}

static void do_SAK_work(struct work_struct *work)
{
        struct tty_struct *tty =
                container_of(work, struct tty_struct, SAK_work);
        __do_SAK(tty);
}

/*
 * The tq handling here is a little racy - tty->SAK_work may already be queued.
 * Fortunately we don't need to worry, because if ->SAK_work is already queued,
 * the values which we write to it will be identical to the values which it
 * already has. --akpm
 */
void do_SAK(struct tty_struct *tty)
{
        if (!tty)
                return;
        schedule_work(&tty->SAK_work);
}
EXPORT_SYMBOL(do_SAK);

/* Must put_device() after it's unused! */
static struct device *tty_get_device(struct tty_struct *tty)
{
        dev_t devt = tty_devnum(tty);

        return class_find_device_by_devt(&tty_class, devt);
}


/**
 * alloc_tty_struct - allocate a new tty
 * @driver: driver which will handle the returned tty
 * @idx: minor of the tty
 *
 * This subroutine allocates and initializes a tty structure.
 *
 * Locking: none - @tty in question is not exposed at this point
 */
struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
{
        struct tty_struct *tty;

        tty = kzalloc(sizeof(*tty), GFP_KERNEL_ACCOUNT);
        if (!tty)
                return NULL;

        kref_init(&tty->kref);
        if (tty_ldisc_init(tty)) {
                kfree(tty);
                return NULL;
        }
        tty->ctrl.session = NULL;
        tty->ctrl.pgrp = NULL;
        mutex_init(&tty->legacy_mutex);
        mutex_init(&tty->throttle_mutex);
        init_rwsem(&tty->termios_rwsem);
        mutex_init(&tty->winsize_mutex);
        init_ldsem(&tty->ldisc_sem);
        init_waitqueue_head(&tty->write_wait);
        init_waitqueue_head(&tty->read_wait);
        INIT_WORK(&tty->hangup_work, do_tty_hangup);
        mutex_init(&tty->atomic_write_lock);
        spin_lock_init(&tty->ctrl.lock);
        spin_lock_init(&tty->flow.lock);
        spin_lock_init(&tty->files_lock);
        INIT_LIST_HEAD(&tty->tty_files);
        INIT_WORK(&tty->SAK_work, do_SAK_work);

        tty->driver = driver;
        tty->ops = driver->ops;
        tty->index = idx;
        tty_line_name(driver, idx, tty->name);
        tty->dev = tty_get_device(tty);

        return tty;
}

/**
 * tty_put_char - write one character to a tty
 * @tty: tty
 * @ch: character to write
 *
 * Write one byte to the @tty using the provided @tty->ops->put_char() method
 * if present.
 *
 * Note: the specific put_char operation in the driver layer may go
 * away soon. Don't call it directly, use this method
 *
 * Return: the number of characters successfully output.
 */
int tty_put_char(struct tty_struct *tty, u8 ch)
{
        if (tty->ops->put_char)
                return tty->ops->put_char(tty, ch);
        return tty->ops->write(tty, &ch, 1);
}
EXPORT_SYMBOL_GPL(tty_put_char);

static int tty_cdev_add(struct tty_driver *driver, dev_t dev,
                unsigned int index, unsigned int count)
{
        int err;

        /* init here, since reused cdevs cause crashes */
        driver->cdevs[index] = cdev_alloc();
        if (!driver->cdevs[index])
                return -ENOMEM;
        driver->cdevs[index]->ops = &tty_fops;
        driver->cdevs[index]->owner = driver->owner;
        err = cdev_add(driver->cdevs[index], dev, count);
        if (err)
                kobject_put(&driver->cdevs[index]->kobj);
        return err;
}

/**
 * tty_register_device - register a tty device
 * @driver: the tty driver that describes the tty device
 * @index: the index in the tty driver for this tty device
 * @device: a struct device that is associated with this tty device.
 *        This field is optional, if there is no known struct device
 *        for this tty device it can be set to NULL safely.
 *
 * This call is required to be made to register an individual tty device
 * if the tty driver's flags have the %TTY_DRIVER_DYNAMIC_DEV bit set.  If
 * that bit is not set, this function should not be called by a tty
 * driver.
 *
 * Locking: ??
 *
 * Return: A pointer to the struct device for this tty device (or
 * ERR_PTR(-EFOO) on error).
 */
struct device *tty_register_device(struct tty_driver *driver, unsigned index,
                                   struct device *device)
{
        return tty_register_device_attr(driver, index, device, NULL, NULL);
}
EXPORT_SYMBOL(tty_register_device);

static void tty_device_create_release(struct device *dev)
{
        dev_dbg(dev, "releasing...\n");
        kfree(dev);
}

/**
 * tty_register_device_attr - register a tty device
 * @driver: the tty driver that describes the tty device
 * @index: the index in the tty driver for this tty device
 * @device: a struct device that is associated with this tty device.
 *        This field is optional, if there is no known struct device
 *        for this tty device it can be set to %NULL safely.
 * @drvdata: Driver data to be set to device.
 * @attr_grp: Attribute group to be set on device.
 *
 * This call is required to be made to register an individual tty device if the
 * tty driver's flags have the %TTY_DRIVER_DYNAMIC_DEV bit set. If that bit is
 * not set, this function should not be called by a tty driver.
 *
 * Locking: ??
 *
 * Return: A pointer to the struct device for this tty device (or
 * ERR_PTR(-EFOO) on error).
 */
struct device *tty_register_device_attr(struct tty_driver *driver,
                                   unsigned index, struct device *device,
                                   void *drvdata,
                                   const struct attribute_group **attr_grp)
{
        char name[64];
        dev_t devt = MKDEV(driver->major, driver->minor_start) + index;
        struct ktermios *tp;
        struct device *dev;
        int retval;

        if (index >= driver->num) {
                pr_err("%s: Attempt to register invalid tty line number (%d)\n",
                       driver->name, index);
                return ERR_PTR(-EINVAL);
        }

        if (driver->type == TTY_DRIVER_TYPE_PTY)
                pty_line_name(driver, index, name);
        else
                tty_line_name(driver, index, name);

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return ERR_PTR(-ENOMEM);

        dev->devt = devt;
        dev->class = &tty_class;
        dev->parent = device;
        dev->release = tty_device_create_release;
        dev_set_name(dev, "%s", name);
        dev->groups = attr_grp;
        dev_set_drvdata(dev, drvdata);

        dev_set_uevent_suppress(dev, 1);

        retval = device_register(dev);
        if (retval)
                goto err_put;

        if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                /*
                 * Free any saved termios data so that the termios state is
                 * reset when reusing a minor number.
                 */
                tp = driver->termios[index];
                if (tp) {
                        driver->termios[index] = NULL;
                        kfree(tp);
                }

                retval = tty_cdev_add(driver, devt, index, 1);
                if (retval)
                        goto err_del;
        }

        dev_set_uevent_suppress(dev, 0);
        kobject_uevent(&dev->kobj, KOBJ_ADD);

        return dev;

err_del:
        device_del(dev);
err_put:
        put_device(dev);

        return ERR_PTR(retval);
}
EXPORT_SYMBOL_GPL(tty_register_device_attr);

/**
 * tty_unregister_device - unregister a tty device
 * @driver: the tty driver that describes the tty device
 * @index: the index in the tty driver for this tty device
 *
 * If a tty device is registered with a call to tty_register_device() then
 * this function must be called when the tty device is gone.
 *
 * Locking: ??
 */
void tty_unregister_device(struct tty_driver *driver, unsigned index)
{
        device_destroy(&tty_class, MKDEV(driver->major, driver->minor_start) + index);
        if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                cdev_del(driver->cdevs[index]);
                driver->cdevs[index] = NULL;
        }
}
EXPORT_SYMBOL(tty_unregister_device);

/**
 * __tty_alloc_driver - allocate tty driver
 * @lines: count of lines this driver can handle at most
 * @owner: module which is responsible for this driver
 * @flags: some of %TTY_DRIVER_ flags, will be set in driver->flags
 *
 * This should not be called directly, some of the provided macros should be
 * used instead. Use IS_ERR() and friends on @retval.
 */
struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner,
                unsigned long flags)
{
        struct tty_driver *driver;
        unsigned int cdevs = 1;
        int err;

        if (!lines || (flags & TTY_DRIVER_UNNUMBERED_NODE && lines > 1))
                return ERR_PTR(-EINVAL);

        driver = kzalloc(sizeof(*driver), GFP_KERNEL);
        if (!driver)
                return ERR_PTR(-ENOMEM);

        kref_init(&driver->kref);
        driver->num = lines;
        driver->owner = owner;
        driver->flags = flags;

        if (!(flags & TTY_DRIVER_DEVPTS_MEM)) {
                driver->ttys = kcalloc(lines, sizeof(*driver->ttys),
                                GFP_KERNEL);
                driver->termios = kcalloc(lines, sizeof(*driver->termios),
                                GFP_KERNEL);
                if (!driver->ttys || !driver->termios) {
                        err = -ENOMEM;
                        goto err_free_all;
                }
        }

        if (!(flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                driver->ports = kcalloc(lines, sizeof(*driver->ports),
                                GFP_KERNEL);
                if (!driver->ports) {
                        err = -ENOMEM;
                        goto err_free_all;
                }
                cdevs = lines;
        }

        driver->cdevs = kcalloc(cdevs, sizeof(*driver->cdevs), GFP_KERNEL);
        if (!driver->cdevs) {
                err = -ENOMEM;
                goto err_free_all;
        }

        return driver;
err_free_all:
        kfree(driver->ports);
        kfree(driver->ttys);
        kfree(driver->termios);
        kfree(driver->cdevs);
        kfree(driver);
        return ERR_PTR(err);
}
EXPORT_SYMBOL(__tty_alloc_driver);

static void destruct_tty_driver(struct kref *kref)
{
        struct tty_driver *driver = container_of(kref, struct tty_driver, kref);
        int i;
        struct ktermios *tp;

        if (driver->flags & TTY_DRIVER_INSTALLED) {
                for (i = 0; i < driver->num; i++) {
                        tp = driver->termios[i];
                        if (tp) {
                                driver->termios[i] = NULL;
                                kfree(tp);
                        }
                        if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV))
                                tty_unregister_device(driver, i);
                }
                proc_tty_unregister_driver(driver);
                if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)
                        cdev_del(driver->cdevs[0]);
        }
        kfree(driver->cdevs);
        kfree(driver->ports);
        kfree(driver->termios);
        kfree(driver->ttys);
        kfree(driver);
}

/**
 * tty_driver_kref_put - drop a reference to a tty driver
 * @driver: driver of which to drop the reference
 *
 * The final put will destroy and free up the driver.
 */
void tty_driver_kref_put(struct tty_driver *driver)
{
        kref_put(&driver->kref, destruct_tty_driver);
}
EXPORT_SYMBOL(tty_driver_kref_put);

/**
 * tty_register_driver - register a tty driver
 * @driver: driver to register
 *
 * Called by a tty driver to register itself.
 */
int tty_register_driver(struct tty_driver *driver)
{
        int error;
        int i;
        dev_t dev;
        struct device *d;

        if (!driver->major) {
                error = alloc_chrdev_region(&dev, driver->minor_start,
                                                driver->num, driver->name);
                if (!error) {
                        driver->major = MAJOR(dev);
                        driver->minor_start = MINOR(dev);
                }
        } else {
                dev = MKDEV(driver->major, driver->minor_start);
                error = register_chrdev_region(dev, driver->num, driver->name);
        }
        if (error < 0)
                goto err;

        if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) {
                error = tty_cdev_add(driver, dev, 0, driver->num);
                if (error)
                        goto err_unreg_char;
        }

        mutex_lock(&tty_mutex);
        list_add(&driver->tty_drivers, &tty_drivers);
        mutex_unlock(&tty_mutex);

        if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) {
                for (i = 0; i < driver->num; i++) {
                        d = tty_register_device(driver, i, NULL);
                        if (IS_ERR(d)) {
                                error = PTR_ERR(d);
                                goto err_unreg_devs;
                        }
                }
        }
        proc_tty_register_driver(driver);
        driver->flags |= TTY_DRIVER_INSTALLED;
        return 0;

err_unreg_devs:
        for (i--; i >= 0; i--)
                tty_unregister_device(driver, i);

        mutex_lock(&tty_mutex);
        list_del(&driver->tty_drivers);
        mutex_unlock(&tty_mutex);

err_unreg_char:
        unregister_chrdev_region(dev, driver->num);
err:
        return error;
}
EXPORT_SYMBOL(tty_register_driver);

/**
 * tty_unregister_driver - unregister a tty driver
 * @driver: driver to unregister
 *
 * Called by a tty driver to unregister itself.
 */
void tty_unregister_driver(struct tty_driver *driver)
{
        unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
                                driver->num);
        mutex_lock(&tty_mutex);
        list_del(&driver->tty_drivers);
        mutex_unlock(&tty_mutex);
}
EXPORT_SYMBOL(tty_unregister_driver);

dev_t tty_devnum(struct tty_struct *tty)
{
        return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
}
EXPORT_SYMBOL(tty_devnum);

void tty_default_fops(struct file_operations *fops)
{
        *fops = tty_fops;
}

static char *tty_devnode(const struct device *dev, umode_t *mode)
{
        if (!mode)
                return NULL;
        if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
            dev->devt == MKDEV(TTYAUX_MAJOR, 2))
                *mode = 0666;
        return NULL;
}

const struct class tty_class = {
        .name                = "tty",
        .devnode        = tty_devnode,
};

static int __init tty_class_init(void)
{
        return class_register(&tty_class);
}

postcore_initcall(tty_class_init);

/* 3/2004 jmc: why do these devices exist? */
static struct cdev tty_cdev, console_cdev;

static ssize_t show_cons_active(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        struct console *cs[16];
        int i = 0;
        struct console *c;
        ssize_t count = 0;

        /*
         * Hold the console_list_lock to guarantee that no consoles are
         * unregistered until all console processing is complete.
         * This also allows safe traversal of the console list and
         * race-free reading of @flags.
         */
        console_list_lock();

        for_each_console(c) {
                if (!c->device)
                        continue;
                if (!c->write)
                        continue;
                if ((c->flags & CON_ENABLED) == 0)
                        continue;
                cs[i++] = c;
                if (i >= ARRAY_SIZE(cs))
                        break;
        }

        /*
         * Take console_lock to serialize device() callback with
         * other console operations. For example, fg_console is
         * modified under console_lock when switching vt.
         */
        console_lock();
        while (i--) {
                int index = cs[i]->index;
                struct tty_driver *drv = cs[i]->device(cs[i], &index);

                /* don't resolve tty0 as some programs depend on it */
                if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR))
                        count += tty_line_name(drv, index, buf + count);
                else
                        count += sprintf(buf + count, "%s%d",
                                         cs[i]->name, cs[i]->index);

                count += sprintf(buf + count, "%c", i ? ' ':'\n');
        }
        console_unlock();

        console_list_unlock();

        return count;
}
static DEVICE_ATTR(active, S_IRUGO, show_cons_active, NULL);

static struct attribute *cons_dev_attrs[] = {
        &dev_attr_active.attr,
        NULL
};

ATTRIBUTE_GROUPS(cons_dev);

static struct device *consdev;

void console_sysfs_notify(void)
{
        if (consdev)
                sysfs_notify(&consdev->kobj, NULL, "active");
}

static struct ctl_table tty_table[] = {
        {
                .procname        = "legacy_tiocsti",
                .data                = &tty_legacy_tiocsti,
                .maxlen                = sizeof(tty_legacy_tiocsti),
                .mode                = 0644,
                .proc_handler        = proc_dobool,
        },
        {
                .procname        = "ldisc_autoload",
                .data                = &tty_ldisc_autoload,
                .maxlen                = sizeof(tty_ldisc_autoload),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
};

/*
 * Ok, now we can initialize the rest of the tty devices and can count
 * on memory allocations, interrupts etc..
 */
int __init tty_init(void)
{
        register_sysctl_init("dev/tty", tty_table);
        cdev_init(&tty_cdev, &tty_fops);
        if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
            register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
                panic("Couldn't register /dev/tty driver\n");
        device_create(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");

        cdev_init(&console_cdev, &console_fops);
        if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
            register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
                panic("Couldn't register /dev/console driver\n");
        consdev = device_create_with_groups(&tty_class, NULL,
                                            MKDEV(TTYAUX_MAJOR, 1), NULL,
                                            cons_dev_groups, "console");
        if (IS_ERR(consdev))
                consdev = NULL;

#ifdef CONFIG_VT
        vty_init(&console_fops);
#endif
        return 0;
}









































































































































































































































































































































































































































































   10 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  256 


































































































































































































































































































































































































































































































































































































































































































































































  266 

  266 
























  266 




















    8 























    9 

    9 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 








































































    1 

































































































































































































































  339 

  343 
























  345 




















  345 


























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
// SPDX-License-Identifier: GPL-2.0

// Generated by scripts/atomic/gen-atomic-fallback.sh
// DO NOT MODIFY THIS FILE DIRECTLY

#ifndef _LINUX_ATOMIC_FALLBACK_H
#define _LINUX_ATOMIC_FALLBACK_H

#include <linux/compiler.h>

#if defined(arch_xchg)
#define raw_xchg arch_xchg
#elif defined(arch_xchg_relaxed)
#define raw_xchg(...) \
        __atomic_op_fence(arch_xchg, __VA_ARGS__)
#else
extern void raw_xchg_not_implemented(void);
#define raw_xchg(...) raw_xchg_not_implemented()
#endif

#if defined(arch_xchg_acquire)
#define raw_xchg_acquire arch_xchg_acquire
#elif defined(arch_xchg_relaxed)
#define raw_xchg_acquire(...) \
        __atomic_op_acquire(arch_xchg, __VA_ARGS__)
#elif defined(arch_xchg)
#define raw_xchg_acquire arch_xchg
#else
extern void raw_xchg_acquire_not_implemented(void);
#define raw_xchg_acquire(...) raw_xchg_acquire_not_implemented()
#endif

#if defined(arch_xchg_release)
#define raw_xchg_release arch_xchg_release
#elif defined(arch_xchg_relaxed)
#define raw_xchg_release(...) \
        __atomic_op_release(arch_xchg, __VA_ARGS__)
#elif defined(arch_xchg)
#define raw_xchg_release arch_xchg
#else
extern void raw_xchg_release_not_implemented(void);
#define raw_xchg_release(...) raw_xchg_release_not_implemented()
#endif

#if defined(arch_xchg_relaxed)
#define raw_xchg_relaxed arch_xchg_relaxed
#elif defined(arch_xchg)
#define raw_xchg_relaxed arch_xchg
#else
extern void raw_xchg_relaxed_not_implemented(void);
#define raw_xchg_relaxed(...) raw_xchg_relaxed_not_implemented()
#endif

#if defined(arch_cmpxchg)
#define raw_cmpxchg arch_cmpxchg
#elif defined(arch_cmpxchg_relaxed)
#define raw_cmpxchg(...) \
        __atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
#else
extern void raw_cmpxchg_not_implemented(void);
#define raw_cmpxchg(...) raw_cmpxchg_not_implemented()
#endif

#if defined(arch_cmpxchg_acquire)
#define raw_cmpxchg_acquire arch_cmpxchg_acquire
#elif defined(arch_cmpxchg_relaxed)
#define raw_cmpxchg_acquire(...) \
        __atomic_op_acquire(arch_cmpxchg, __VA_ARGS__)
#elif defined(arch_cmpxchg)
#define raw_cmpxchg_acquire arch_cmpxchg
#else
extern void raw_cmpxchg_acquire_not_implemented(void);
#define raw_cmpxchg_acquire(...) raw_cmpxchg_acquire_not_implemented()
#endif

#if defined(arch_cmpxchg_release)
#define raw_cmpxchg_release arch_cmpxchg_release
#elif defined(arch_cmpxchg_relaxed)
#define raw_cmpxchg_release(...) \
        __atomic_op_release(arch_cmpxchg, __VA_ARGS__)
#elif defined(arch_cmpxchg)
#define raw_cmpxchg_release arch_cmpxchg
#else
extern void raw_cmpxchg_release_not_implemented(void);
#define raw_cmpxchg_release(...) raw_cmpxchg_release_not_implemented()
#endif

#if defined(arch_cmpxchg_relaxed)
#define raw_cmpxchg_relaxed arch_cmpxchg_relaxed
#elif defined(arch_cmpxchg)
#define raw_cmpxchg_relaxed arch_cmpxchg
#else
extern void raw_cmpxchg_relaxed_not_implemented(void);
#define raw_cmpxchg_relaxed(...) raw_cmpxchg_relaxed_not_implemented()
#endif

#if defined(arch_cmpxchg64)
#define raw_cmpxchg64 arch_cmpxchg64
#elif defined(arch_cmpxchg64_relaxed)
#define raw_cmpxchg64(...) \
        __atomic_op_fence(arch_cmpxchg64, __VA_ARGS__)
#else
extern void raw_cmpxchg64_not_implemented(void);
#define raw_cmpxchg64(...) raw_cmpxchg64_not_implemented()
#endif

#if defined(arch_cmpxchg64_acquire)
#define raw_cmpxchg64_acquire arch_cmpxchg64_acquire
#elif defined(arch_cmpxchg64_relaxed)
#define raw_cmpxchg64_acquire(...) \
        __atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__)
#elif defined(arch_cmpxchg64)
#define raw_cmpxchg64_acquire arch_cmpxchg64
#else
extern void raw_cmpxchg64_acquire_not_implemented(void);
#define raw_cmpxchg64_acquire(...) raw_cmpxchg64_acquire_not_implemented()
#endif

#if defined(arch_cmpxchg64_release)
#define raw_cmpxchg64_release arch_cmpxchg64_release
#elif defined(arch_cmpxchg64_relaxed)
#define raw_cmpxchg64_release(...) \
        __atomic_op_release(arch_cmpxchg64, __VA_ARGS__)
#elif defined(arch_cmpxchg64)
#define raw_cmpxchg64_release arch_cmpxchg64
#else
extern void raw_cmpxchg64_release_not_implemented(void);
#define raw_cmpxchg64_release(...) raw_cmpxchg64_release_not_implemented()
#endif

#if defined(arch_cmpxchg64_relaxed)
#define raw_cmpxchg64_relaxed arch_cmpxchg64_relaxed
#elif defined(arch_cmpxchg64)
#define raw_cmpxchg64_relaxed arch_cmpxchg64
#else
extern void raw_cmpxchg64_relaxed_not_implemented(void);
#define raw_cmpxchg64_relaxed(...) raw_cmpxchg64_relaxed_not_implemented()
#endif

#if defined(arch_cmpxchg128)
#define raw_cmpxchg128 arch_cmpxchg128
#elif defined(arch_cmpxchg128_relaxed)
#define raw_cmpxchg128(...) \
        __atomic_op_fence(arch_cmpxchg128, __VA_ARGS__)
#else
extern void raw_cmpxchg128_not_implemented(void);
#define raw_cmpxchg128(...) raw_cmpxchg128_not_implemented()
#endif

#if defined(arch_cmpxchg128_acquire)
#define raw_cmpxchg128_acquire arch_cmpxchg128_acquire
#elif defined(arch_cmpxchg128_relaxed)
#define raw_cmpxchg128_acquire(...) \
        __atomic_op_acquire(arch_cmpxchg128, __VA_ARGS__)
#elif defined(arch_cmpxchg128)
#define raw_cmpxchg128_acquire arch_cmpxchg128
#else
extern void raw_cmpxchg128_acquire_not_implemented(void);
#define raw_cmpxchg128_acquire(...) raw_cmpxchg128_acquire_not_implemented()
#endif

#if defined(arch_cmpxchg128_release)
#define raw_cmpxchg128_release arch_cmpxchg128_release
#elif defined(arch_cmpxchg128_relaxed)
#define raw_cmpxchg128_release(...) \
        __atomic_op_release(arch_cmpxchg128, __VA_ARGS__)
#elif defined(arch_cmpxchg128)
#define raw_cmpxchg128_release arch_cmpxchg128
#else
extern void raw_cmpxchg128_release_not_implemented(void);
#define raw_cmpxchg128_release(...) raw_cmpxchg128_release_not_implemented()
#endif

#if defined(arch_cmpxchg128_relaxed)
#define raw_cmpxchg128_relaxed arch_cmpxchg128_relaxed
#elif defined(arch_cmpxchg128)
#define raw_cmpxchg128_relaxed arch_cmpxchg128
#else
extern void raw_cmpxchg128_relaxed_not_implemented(void);
#define raw_cmpxchg128_relaxed(...) raw_cmpxchg128_relaxed_not_implemented()
#endif

#if defined(arch_try_cmpxchg)
#define raw_try_cmpxchg arch_try_cmpxchg
#elif defined(arch_try_cmpxchg_relaxed)
#define raw_try_cmpxchg(...) \
        __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__)
#else
#define raw_try_cmpxchg(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg_acquire)
#define raw_try_cmpxchg_acquire arch_try_cmpxchg_acquire
#elif defined(arch_try_cmpxchg_relaxed)
#define raw_try_cmpxchg_acquire(...) \
        __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__)
#elif defined(arch_try_cmpxchg)
#define raw_try_cmpxchg_acquire arch_try_cmpxchg
#else
#define raw_try_cmpxchg_acquire(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg_acquire((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg_release)
#define raw_try_cmpxchg_release arch_try_cmpxchg_release
#elif defined(arch_try_cmpxchg_relaxed)
#define raw_try_cmpxchg_release(...) \
        __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__)
#elif defined(arch_try_cmpxchg)
#define raw_try_cmpxchg_release arch_try_cmpxchg
#else
#define raw_try_cmpxchg_release(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg_release((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg_relaxed)
#define raw_try_cmpxchg_relaxed arch_try_cmpxchg_relaxed
#elif defined(arch_try_cmpxchg)
#define raw_try_cmpxchg_relaxed arch_try_cmpxchg
#else
#define raw_try_cmpxchg_relaxed(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg_relaxed((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg64)
#define raw_try_cmpxchg64 arch_try_cmpxchg64
#elif defined(arch_try_cmpxchg64_relaxed)
#define raw_try_cmpxchg64(...) \
        __atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__)
#else
#define raw_try_cmpxchg64(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg64((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg64_acquire)
#define raw_try_cmpxchg64_acquire arch_try_cmpxchg64_acquire
#elif defined(arch_try_cmpxchg64_relaxed)
#define raw_try_cmpxchg64_acquire(...) \
        __atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__)
#elif defined(arch_try_cmpxchg64)
#define raw_try_cmpxchg64_acquire arch_try_cmpxchg64
#else
#define raw_try_cmpxchg64_acquire(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg64_acquire((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg64_release)
#define raw_try_cmpxchg64_release arch_try_cmpxchg64_release
#elif defined(arch_try_cmpxchg64_relaxed)
#define raw_try_cmpxchg64_release(...) \
        __atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__)
#elif defined(arch_try_cmpxchg64)
#define raw_try_cmpxchg64_release arch_try_cmpxchg64
#else
#define raw_try_cmpxchg64_release(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg64_release((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg64_relaxed)
#define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64_relaxed
#elif defined(arch_try_cmpxchg64)
#define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64
#else
#define raw_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg64_relaxed((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg128)
#define raw_try_cmpxchg128 arch_try_cmpxchg128
#elif defined(arch_try_cmpxchg128_relaxed)
#define raw_try_cmpxchg128(...) \
        __atomic_op_fence(arch_try_cmpxchg128, __VA_ARGS__)
#else
#define raw_try_cmpxchg128(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg128((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg128_acquire)
#define raw_try_cmpxchg128_acquire arch_try_cmpxchg128_acquire
#elif defined(arch_try_cmpxchg128_relaxed)
#define raw_try_cmpxchg128_acquire(...) \
        __atomic_op_acquire(arch_try_cmpxchg128, __VA_ARGS__)
#elif defined(arch_try_cmpxchg128)
#define raw_try_cmpxchg128_acquire arch_try_cmpxchg128
#else
#define raw_try_cmpxchg128_acquire(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg128_acquire((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg128_release)
#define raw_try_cmpxchg128_release arch_try_cmpxchg128_release
#elif defined(arch_try_cmpxchg128_relaxed)
#define raw_try_cmpxchg128_release(...) \
        __atomic_op_release(arch_try_cmpxchg128, __VA_ARGS__)
#elif defined(arch_try_cmpxchg128)
#define raw_try_cmpxchg128_release arch_try_cmpxchg128
#else
#define raw_try_cmpxchg128_release(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg128_release((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#if defined(arch_try_cmpxchg128_relaxed)
#define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128_relaxed
#elif defined(arch_try_cmpxchg128)
#define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128
#else
#define raw_try_cmpxchg128_relaxed(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg128_relaxed((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#define raw_cmpxchg_local arch_cmpxchg_local

#ifdef arch_try_cmpxchg_local
#define raw_try_cmpxchg_local arch_try_cmpxchg_local
#else
#define raw_try_cmpxchg_local(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg_local((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#define raw_cmpxchg64_local arch_cmpxchg64_local

#ifdef arch_try_cmpxchg64_local
#define raw_try_cmpxchg64_local arch_try_cmpxchg64_local
#else
#define raw_try_cmpxchg64_local(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg64_local((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#define raw_cmpxchg128_local arch_cmpxchg128_local

#ifdef arch_try_cmpxchg128_local
#define raw_try_cmpxchg128_local arch_try_cmpxchg128_local
#else
#define raw_try_cmpxchg128_local(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_cmpxchg128_local((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

#define raw_sync_cmpxchg arch_sync_cmpxchg

#ifdef arch_sync_try_cmpxchg
#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg
#else
#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \
({ \
        typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
        ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \
        if (unlikely(___r != ___o)) \
                *___op = ___r; \
        likely(___r == ___o); \
})
#endif

/**
 * raw_atomic_read() - atomic load with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_read() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline int
raw_atomic_read(const atomic_t *v)
{
        return arch_atomic_read(v);
}

/**
 * raw_atomic_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_read_acquire() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline int
raw_atomic_read_acquire(const atomic_t *v)
{
#if defined(arch_atomic_read_acquire)
        return arch_atomic_read_acquire(v);
#else
        int ret;

        if (__native_word(atomic_t)) {
                ret = smp_load_acquire(&(v)->counter);
        } else {
                ret = raw_atomic_read(v);
                __atomic_acquire_fence();
        }

        return ret;
#endif
}

/**
 * raw_atomic_set() - atomic set with relaxed ordering
 * @v: pointer to atomic_t
 * @i: int value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_set() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_set(atomic_t *v, int i)
{
        arch_atomic_set(v, i);
}

/**
 * raw_atomic_set_release() - atomic set with release ordering
 * @v: pointer to atomic_t
 * @i: int value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_set_release() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_set_release(atomic_t *v, int i)
{
#if defined(arch_atomic_set_release)
        arch_atomic_set_release(v, i);
#else
        if (__native_word(atomic_t)) {
                smp_store_release(&(v)->counter, i);
        } else {
                __atomic_release_fence();
                raw_atomic_set(v, i);
        }
#endif
}

/**
 * raw_atomic_add() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_add(int i, atomic_t *v)
{
        arch_atomic_add(i, v);
}

/**
 * raw_atomic_add_return() - atomic add with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_add_return(int i, atomic_t *v)
{
#if defined(arch_atomic_add_return)
        return arch_atomic_add_return(i, v);
#elif defined(arch_atomic_add_return_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_add_return_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_add_return"
#endif
}

/**
 * raw_atomic_add_return_acquire() - atomic add with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_add_return_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_add_return_acquire)
        return arch_atomic_add_return_acquire(i, v);
#elif defined(arch_atomic_add_return_relaxed)
        int ret = arch_atomic_add_return_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_add_return)
        return arch_atomic_add_return(i, v);
#else
#error "Unable to define raw_atomic_add_return_acquire"
#endif
}

/**
 * raw_atomic_add_return_release() - atomic add with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_add_return_release(int i, atomic_t *v)
{
#if defined(arch_atomic_add_return_release)
        return arch_atomic_add_return_release(i, v);
#elif defined(arch_atomic_add_return_relaxed)
        __atomic_release_fence();
        return arch_atomic_add_return_relaxed(i, v);
#elif defined(arch_atomic_add_return)
        return arch_atomic_add_return(i, v);
#else
#error "Unable to define raw_atomic_add_return_release"
#endif
}

/**
 * raw_atomic_add_return_relaxed() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_add_return_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_add_return_relaxed)
        return arch_atomic_add_return_relaxed(i, v);
#elif defined(arch_atomic_add_return)
        return arch_atomic_add_return(i, v);
#else
#error "Unable to define raw_atomic_add_return_relaxed"
#endif
}

/**
 * raw_atomic_fetch_add() - atomic add with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_add() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_add(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_add)
        return arch_atomic_fetch_add(i, v);
#elif defined(arch_atomic_fetch_add_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_add_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_fetch_add"
#endif
}

/**
 * raw_atomic_fetch_add_acquire() - atomic add with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_add_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_add_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_add_acquire)
        return arch_atomic_fetch_add_acquire(i, v);
#elif defined(arch_atomic_fetch_add_relaxed)
        int ret = arch_atomic_fetch_add_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_add)
        return arch_atomic_fetch_add(i, v);
#else
#error "Unable to define raw_atomic_fetch_add_acquire"
#endif
}

/**
 * raw_atomic_fetch_add_release() - atomic add with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_add_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_add_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_add_release)
        return arch_atomic_fetch_add_release(i, v);
#elif defined(arch_atomic_fetch_add_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_add_relaxed(i, v);
#elif defined(arch_atomic_fetch_add)
        return arch_atomic_fetch_add(i, v);
#else
#error "Unable to define raw_atomic_fetch_add_release"
#endif
}

/**
 * raw_atomic_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_add_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_add_relaxed)
        return arch_atomic_fetch_add_relaxed(i, v);
#elif defined(arch_atomic_fetch_add)
        return arch_atomic_fetch_add(i, v);
#else
#error "Unable to define raw_atomic_fetch_add_relaxed"
#endif
}

/**
 * raw_atomic_sub() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_sub(int i, atomic_t *v)
{
        arch_atomic_sub(i, v);
}

/**
 * raw_atomic_sub_return() - atomic subtract with full ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_sub_return(int i, atomic_t *v)
{
#if defined(arch_atomic_sub_return)
        return arch_atomic_sub_return(i, v);
#elif defined(arch_atomic_sub_return_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_sub_return_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_sub_return"
#endif
}

/**
 * raw_atomic_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_sub_return_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_sub_return_acquire)
        return arch_atomic_sub_return_acquire(i, v);
#elif defined(arch_atomic_sub_return_relaxed)
        int ret = arch_atomic_sub_return_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_sub_return)
        return arch_atomic_sub_return(i, v);
#else
#error "Unable to define raw_atomic_sub_return_acquire"
#endif
}

/**
 * raw_atomic_sub_return_release() - atomic subtract with release ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_sub_return_release(int i, atomic_t *v)
{
#if defined(arch_atomic_sub_return_release)
        return arch_atomic_sub_return_release(i, v);
#elif defined(arch_atomic_sub_return_relaxed)
        __atomic_release_fence();
        return arch_atomic_sub_return_relaxed(i, v);
#elif defined(arch_atomic_sub_return)
        return arch_atomic_sub_return(i, v);
#else
#error "Unable to define raw_atomic_sub_return_release"
#endif
}

/**
 * raw_atomic_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_sub_return_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_sub_return_relaxed)
        return arch_atomic_sub_return_relaxed(i, v);
#elif defined(arch_atomic_sub_return)
        return arch_atomic_sub_return(i, v);
#else
#error "Unable to define raw_atomic_sub_return_relaxed"
#endif
}

/**
 * raw_atomic_fetch_sub() - atomic subtract with full ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_sub() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_sub(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_sub)
        return arch_atomic_fetch_sub(i, v);
#elif defined(arch_atomic_fetch_sub_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_sub_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_fetch_sub"
#endif
}

/**
 * raw_atomic_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_sub_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_sub_acquire)
        return arch_atomic_fetch_sub_acquire(i, v);
#elif defined(arch_atomic_fetch_sub_relaxed)
        int ret = arch_atomic_fetch_sub_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_sub)
        return arch_atomic_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic_fetch_sub_acquire"
#endif
}

/**
 * raw_atomic_fetch_sub_release() - atomic subtract with release ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_sub_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_sub_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_sub_release)
        return arch_atomic_fetch_sub_release(i, v);
#elif defined(arch_atomic_fetch_sub_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_sub_relaxed(i, v);
#elif defined(arch_atomic_fetch_sub)
        return arch_atomic_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic_fetch_sub_release"
#endif
}

/**
 * raw_atomic_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_sub_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_sub_relaxed)
        return arch_atomic_fetch_sub_relaxed(i, v);
#elif defined(arch_atomic_fetch_sub)
        return arch_atomic_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic_fetch_sub_relaxed"
#endif
}

/**
 * raw_atomic_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_inc(atomic_t *v)
{
#if defined(arch_atomic_inc)
        arch_atomic_inc(v);
#else
        raw_atomic_add(1, v);
#endif
}

/**
 * raw_atomic_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_inc_return(atomic_t *v)
{
#if defined(arch_atomic_inc_return)
        return arch_atomic_inc_return(v);
#elif defined(arch_atomic_inc_return_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_inc_return_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_add_return(1, v);
#endif
}

/**
 * raw_atomic_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_inc_return_acquire(atomic_t *v)
{
#if defined(arch_atomic_inc_return_acquire)
        return arch_atomic_inc_return_acquire(v);
#elif defined(arch_atomic_inc_return_relaxed)
        int ret = arch_atomic_inc_return_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_inc_return)
        return arch_atomic_inc_return(v);
#else
        return raw_atomic_add_return_acquire(1, v);
#endif
}

/**
 * raw_atomic_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_inc_return_release(atomic_t *v)
{
#if defined(arch_atomic_inc_return_release)
        return arch_atomic_inc_return_release(v);
#elif defined(arch_atomic_inc_return_relaxed)
        __atomic_release_fence();
        return arch_atomic_inc_return_relaxed(v);
#elif defined(arch_atomic_inc_return)
        return arch_atomic_inc_return(v);
#else
        return raw_atomic_add_return_release(1, v);
#endif
}

/**
 * raw_atomic_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_inc_return_relaxed(atomic_t *v)
{
#if defined(arch_atomic_inc_return_relaxed)
        return arch_atomic_inc_return_relaxed(v);
#elif defined(arch_atomic_inc_return)
        return arch_atomic_inc_return(v);
#else
        return raw_atomic_add_return_relaxed(1, v);
#endif
}

/**
 * raw_atomic_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_inc() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_inc(atomic_t *v)
{
#if defined(arch_atomic_fetch_inc)
        return arch_atomic_fetch_inc(v);
#elif defined(arch_atomic_fetch_inc_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_inc_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_fetch_add(1, v);
#endif
}

/**
 * raw_atomic_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_inc_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_inc_acquire(atomic_t *v)
{
#if defined(arch_atomic_fetch_inc_acquire)
        return arch_atomic_fetch_inc_acquire(v);
#elif defined(arch_atomic_fetch_inc_relaxed)
        int ret = arch_atomic_fetch_inc_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_inc)
        return arch_atomic_fetch_inc(v);
#else
        return raw_atomic_fetch_add_acquire(1, v);
#endif
}

/**
 * raw_atomic_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_inc_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_inc_release(atomic_t *v)
{
#if defined(arch_atomic_fetch_inc_release)
        return arch_atomic_fetch_inc_release(v);
#elif defined(arch_atomic_fetch_inc_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_inc_relaxed(v);
#elif defined(arch_atomic_fetch_inc)
        return arch_atomic_fetch_inc(v);
#else
        return raw_atomic_fetch_add_release(1, v);
#endif
}

/**
 * raw_atomic_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_inc_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_inc_relaxed(atomic_t *v)
{
#if defined(arch_atomic_fetch_inc_relaxed)
        return arch_atomic_fetch_inc_relaxed(v);
#elif defined(arch_atomic_fetch_inc)
        return arch_atomic_fetch_inc(v);
#else
        return raw_atomic_fetch_add_relaxed(1, v);
#endif
}

/**
 * raw_atomic_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_dec(atomic_t *v)
{
#if defined(arch_atomic_dec)
        arch_atomic_dec(v);
#else
        raw_atomic_sub(1, v);
#endif
}

/**
 * raw_atomic_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_dec_return(atomic_t *v)
{
#if defined(arch_atomic_dec_return)
        return arch_atomic_dec_return(v);
#elif defined(arch_atomic_dec_return_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_dec_return_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_sub_return(1, v);
#endif
}

/**
 * raw_atomic_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_dec_return_acquire(atomic_t *v)
{
#if defined(arch_atomic_dec_return_acquire)
        return arch_atomic_dec_return_acquire(v);
#elif defined(arch_atomic_dec_return_relaxed)
        int ret = arch_atomic_dec_return_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_dec_return)
        return arch_atomic_dec_return(v);
#else
        return raw_atomic_sub_return_acquire(1, v);
#endif
}

/**
 * raw_atomic_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_dec_return_release(atomic_t *v)
{
#if defined(arch_atomic_dec_return_release)
        return arch_atomic_dec_return_release(v);
#elif defined(arch_atomic_dec_return_relaxed)
        __atomic_release_fence();
        return arch_atomic_dec_return_relaxed(v);
#elif defined(arch_atomic_dec_return)
        return arch_atomic_dec_return(v);
#else
        return raw_atomic_sub_return_release(1, v);
#endif
}

/**
 * raw_atomic_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
raw_atomic_dec_return_relaxed(atomic_t *v)
{
#if defined(arch_atomic_dec_return_relaxed)
        return arch_atomic_dec_return_relaxed(v);
#elif defined(arch_atomic_dec_return)
        return arch_atomic_dec_return(v);
#else
        return raw_atomic_sub_return_relaxed(1, v);
#endif
}

/**
 * raw_atomic_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_dec() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_dec(atomic_t *v)
{
#if defined(arch_atomic_fetch_dec)
        return arch_atomic_fetch_dec(v);
#elif defined(arch_atomic_fetch_dec_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_dec_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_fetch_sub(1, v);
#endif
}

/**
 * raw_atomic_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_dec_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_dec_acquire(atomic_t *v)
{
#if defined(arch_atomic_fetch_dec_acquire)
        return arch_atomic_fetch_dec_acquire(v);
#elif defined(arch_atomic_fetch_dec_relaxed)
        int ret = arch_atomic_fetch_dec_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_dec)
        return arch_atomic_fetch_dec(v);
#else
        return raw_atomic_fetch_sub_acquire(1, v);
#endif
}

/**
 * raw_atomic_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_dec_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_dec_release(atomic_t *v)
{
#if defined(arch_atomic_fetch_dec_release)
        return arch_atomic_fetch_dec_release(v);
#elif defined(arch_atomic_fetch_dec_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_dec_relaxed(v);
#elif defined(arch_atomic_fetch_dec)
        return arch_atomic_fetch_dec(v);
#else
        return raw_atomic_fetch_sub_release(1, v);
#endif
}

/**
 * raw_atomic_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_dec_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_dec_relaxed(atomic_t *v)
{
#if defined(arch_atomic_fetch_dec_relaxed)
        return arch_atomic_fetch_dec_relaxed(v);
#elif defined(arch_atomic_fetch_dec)
        return arch_atomic_fetch_dec(v);
#else
        return raw_atomic_fetch_sub_relaxed(1, v);
#endif
}

/**
 * raw_atomic_and() - atomic bitwise AND with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_and() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_and(int i, atomic_t *v)
{
        arch_atomic_and(i, v);
}

/**
 * raw_atomic_fetch_and() - atomic bitwise AND with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_and() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_and(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_and)
        return arch_atomic_fetch_and(i, v);
#elif defined(arch_atomic_fetch_and_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_and_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_fetch_and"
#endif
}

/**
 * raw_atomic_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_and_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_and_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_and_acquire)
        return arch_atomic_fetch_and_acquire(i, v);
#elif defined(arch_atomic_fetch_and_relaxed)
        int ret = arch_atomic_fetch_and_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_and)
        return arch_atomic_fetch_and(i, v);
#else
#error "Unable to define raw_atomic_fetch_and_acquire"
#endif
}

/**
 * raw_atomic_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_and_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_and_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_and_release)
        return arch_atomic_fetch_and_release(i, v);
#elif defined(arch_atomic_fetch_and_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_and_relaxed(i, v);
#elif defined(arch_atomic_fetch_and)
        return arch_atomic_fetch_and(i, v);
#else
#error "Unable to define raw_atomic_fetch_and_release"
#endif
}

/**
 * raw_atomic_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_and_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_and_relaxed)
        return arch_atomic_fetch_and_relaxed(i, v);
#elif defined(arch_atomic_fetch_and)
        return arch_atomic_fetch_and(i, v);
#else
#error "Unable to define raw_atomic_fetch_and_relaxed"
#endif
}

/**
 * raw_atomic_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_andnot() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_andnot(int i, atomic_t *v)
{
#if defined(arch_atomic_andnot)
        arch_atomic_andnot(i, v);
#else
        raw_atomic_and(~i, v);
#endif
}

/**
 * raw_atomic_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_andnot() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_andnot(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_andnot)
        return arch_atomic_fetch_andnot(i, v);
#elif defined(arch_atomic_fetch_andnot_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_andnot_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_fetch_and(~i, v);
#endif
}

/**
 * raw_atomic_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_andnot_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_andnot_acquire)
        return arch_atomic_fetch_andnot_acquire(i, v);
#elif defined(arch_atomic_fetch_andnot_relaxed)
        int ret = arch_atomic_fetch_andnot_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_andnot)
        return arch_atomic_fetch_andnot(i, v);
#else
        return raw_atomic_fetch_and_acquire(~i, v);
#endif
}

/**
 * raw_atomic_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_andnot_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_andnot_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_andnot_release)
        return arch_atomic_fetch_andnot_release(i, v);
#elif defined(arch_atomic_fetch_andnot_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_andnot_relaxed(i, v);
#elif defined(arch_atomic_fetch_andnot)
        return arch_atomic_fetch_andnot(i, v);
#else
        return raw_atomic_fetch_and_release(~i, v);
#endif
}

/**
 * raw_atomic_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_andnot_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_andnot_relaxed)
        return arch_atomic_fetch_andnot_relaxed(i, v);
#elif defined(arch_atomic_fetch_andnot)
        return arch_atomic_fetch_andnot(i, v);
#else
        return raw_atomic_fetch_and_relaxed(~i, v);
#endif
}

/**
 * raw_atomic_or() - atomic bitwise OR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_or() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_or(int i, atomic_t *v)
{
        arch_atomic_or(i, v);
}

/**
 * raw_atomic_fetch_or() - atomic bitwise OR with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_or() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_or(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_or)
        return arch_atomic_fetch_or(i, v);
#elif defined(arch_atomic_fetch_or_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_or_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_fetch_or"
#endif
}

/**
 * raw_atomic_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_or_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_or_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_or_acquire)
        return arch_atomic_fetch_or_acquire(i, v);
#elif defined(arch_atomic_fetch_or_relaxed)
        int ret = arch_atomic_fetch_or_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_or)
        return arch_atomic_fetch_or(i, v);
#else
#error "Unable to define raw_atomic_fetch_or_acquire"
#endif
}

/**
 * raw_atomic_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_or_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_or_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_or_release)
        return arch_atomic_fetch_or_release(i, v);
#elif defined(arch_atomic_fetch_or_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_or_relaxed(i, v);
#elif defined(arch_atomic_fetch_or)
        return arch_atomic_fetch_or(i, v);
#else
#error "Unable to define raw_atomic_fetch_or_release"
#endif
}

/**
 * raw_atomic_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_or_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_or_relaxed)
        return arch_atomic_fetch_or_relaxed(i, v);
#elif defined(arch_atomic_fetch_or)
        return arch_atomic_fetch_or(i, v);
#else
#error "Unable to define raw_atomic_fetch_or_relaxed"
#endif
}

/**
 * raw_atomic_xor() - atomic bitwise XOR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_xor() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_xor(int i, atomic_t *v)
{
        arch_atomic_xor(i, v);
}

/**
 * raw_atomic_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_xor() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_xor(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_xor)
        return arch_atomic_fetch_xor(i, v);
#elif defined(arch_atomic_fetch_xor_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_fetch_xor_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic_fetch_xor"
#endif
}

/**
 * raw_atomic_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_xor_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_xor_acquire)
        return arch_atomic_fetch_xor_acquire(i, v);
#elif defined(arch_atomic_fetch_xor_relaxed)
        int ret = arch_atomic_fetch_xor_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_fetch_xor)
        return arch_atomic_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic_fetch_xor_acquire"
#endif
}

/**
 * raw_atomic_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_xor_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_xor_release(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_xor_release)
        return arch_atomic_fetch_xor_release(i, v);
#elif defined(arch_atomic_fetch_xor_relaxed)
        __atomic_release_fence();
        return arch_atomic_fetch_xor_relaxed(i, v);
#elif defined(arch_atomic_fetch_xor)
        return arch_atomic_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic_fetch_xor_release"
#endif
}

/**
 * raw_atomic_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_xor_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_fetch_xor_relaxed)
        return arch_atomic_fetch_xor_relaxed(i, v);
#elif defined(arch_atomic_fetch_xor)
        return arch_atomic_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic_fetch_xor_relaxed"
#endif
}

/**
 * raw_atomic_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_xchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_xchg(atomic_t *v, int new)
{
#if defined(arch_atomic_xchg)
        return arch_atomic_xchg(v, new);
#elif defined(arch_atomic_xchg_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_xchg_relaxed(v, new);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_xchg(&v->counter, new);
#endif
}

/**
 * raw_atomic_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_xchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_xchg_acquire(atomic_t *v, int new)
{
#if defined(arch_atomic_xchg_acquire)
        return arch_atomic_xchg_acquire(v, new);
#elif defined(arch_atomic_xchg_relaxed)
        int ret = arch_atomic_xchg_relaxed(v, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_xchg)
        return arch_atomic_xchg(v, new);
#else
        return raw_xchg_acquire(&v->counter, new);
#endif
}

/**
 * raw_atomic_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_xchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_xchg_release(atomic_t *v, int new)
{
#if defined(arch_atomic_xchg_release)
        return arch_atomic_xchg_release(v, new);
#elif defined(arch_atomic_xchg_relaxed)
        __atomic_release_fence();
        return arch_atomic_xchg_relaxed(v, new);
#elif defined(arch_atomic_xchg)
        return arch_atomic_xchg(v, new);
#else
        return raw_xchg_release(&v->counter, new);
#endif
}

/**
 * raw_atomic_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_xchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_xchg_relaxed(atomic_t *v, int new)
{
#if defined(arch_atomic_xchg_relaxed)
        return arch_atomic_xchg_relaxed(v, new);
#elif defined(arch_atomic_xchg)
        return arch_atomic_xchg(v, new);
#else
        return raw_xchg_relaxed(&v->counter, new);
#endif
}

/**
 * raw_atomic_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_cmpxchg(atomic_t *v, int old, int new)
{
#if defined(arch_atomic_cmpxchg)
        return arch_atomic_cmpxchg(v, old, new);
#elif defined(arch_atomic_cmpxchg_relaxed)
        int ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_cmpxchg_relaxed(v, old, new);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_cmpxchg(&v->counter, old, new);
#endif
}

/**
 * raw_atomic_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
{
#if defined(arch_atomic_cmpxchg_acquire)
        return arch_atomic_cmpxchg_acquire(v, old, new);
#elif defined(arch_atomic_cmpxchg_relaxed)
        int ret = arch_atomic_cmpxchg_relaxed(v, old, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_cmpxchg)
        return arch_atomic_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_acquire(&v->counter, old, new);
#endif
}

/**
 * raw_atomic_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
{
#if defined(arch_atomic_cmpxchg_release)
        return arch_atomic_cmpxchg_release(v, old, new);
#elif defined(arch_atomic_cmpxchg_relaxed)
        __atomic_release_fence();
        return arch_atomic_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic_cmpxchg)
        return arch_atomic_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_release(&v->counter, old, new);
#endif
}

/**
 * raw_atomic_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
{
#if defined(arch_atomic_cmpxchg_relaxed)
        return arch_atomic_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic_cmpxchg)
        return arch_atomic_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_relaxed(&v->counter, old, new);
#endif
}

/**
 * raw_atomic_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
#if defined(arch_atomic_try_cmpxchg)
        return arch_atomic_try_cmpxchg(v, old, new);
#elif defined(arch_atomic_try_cmpxchg_relaxed)
        bool ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
        __atomic_post_full_fence();
        return ret;
#else
        int r, o = *old;
        r = raw_atomic_cmpxchg(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
{
#if defined(arch_atomic_try_cmpxchg_acquire)
        return arch_atomic_try_cmpxchg_acquire(v, old, new);
#elif defined(arch_atomic_try_cmpxchg_relaxed)
        bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_try_cmpxchg)
        return arch_atomic_try_cmpxchg(v, old, new);
#else
        int r, o = *old;
        r = raw_atomic_cmpxchg_acquire(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
{
#if defined(arch_atomic_try_cmpxchg_release)
        return arch_atomic_try_cmpxchg_release(v, old, new);
#elif defined(arch_atomic_try_cmpxchg_relaxed)
        __atomic_release_fence();
        return arch_atomic_try_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic_try_cmpxchg)
        return arch_atomic_try_cmpxchg(v, old, new);
#else
        int r, o = *old;
        r = raw_atomic_cmpxchg_release(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
{
#if defined(arch_atomic_try_cmpxchg_relaxed)
        return arch_atomic_try_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic_try_cmpxchg)
        return arch_atomic_try_cmpxchg(v, old, new);
#else
        int r, o = *old;
        r = raw_atomic_cmpxchg_relaxed(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_sub_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_sub_and_test(int i, atomic_t *v)
{
#if defined(arch_atomic_sub_and_test)
        return arch_atomic_sub_and_test(i, v);
#else
        return raw_atomic_sub_return(i, v) == 0;
#endif
}

/**
 * raw_atomic_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_dec_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_dec_and_test(atomic_t *v)
{
#if defined(arch_atomic_dec_and_test)
        return arch_atomic_dec_and_test(v);
#else
        return raw_atomic_dec_return(v) == 0;
#endif
}

/**
 * raw_atomic_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_inc_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_inc_and_test(atomic_t *v)
{
#if defined(arch_atomic_inc_and_test)
        return arch_atomic_inc_and_test(v);
#else
        return raw_atomic_inc_return(v) == 0;
#endif
}

/**
 * raw_atomic_add_negative() - atomic add and test if negative with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_negative() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_add_negative(int i, atomic_t *v)
{
#if defined(arch_atomic_add_negative)
        return arch_atomic_add_negative(i, v);
#elif defined(arch_atomic_add_negative_relaxed)
        bool ret;
        __atomic_pre_full_fence();
        ret = arch_atomic_add_negative_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic_add_return(i, v) < 0;
#endif
}

/**
 * raw_atomic_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_negative_acquire() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_add_negative_acquire(int i, atomic_t *v)
{
#if defined(arch_atomic_add_negative_acquire)
        return arch_atomic_add_negative_acquire(i, v);
#elif defined(arch_atomic_add_negative_relaxed)
        bool ret = arch_atomic_add_negative_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic_add_negative)
        return arch_atomic_add_negative(i, v);
#else
        return raw_atomic_add_return_acquire(i, v) < 0;
#endif
}

/**
 * raw_atomic_add_negative_release() - atomic add and test if negative with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_negative_release() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_add_negative_release(int i, atomic_t *v)
{
#if defined(arch_atomic_add_negative_release)
        return arch_atomic_add_negative_release(i, v);
#elif defined(arch_atomic_add_negative_relaxed)
        __atomic_release_fence();
        return arch_atomic_add_negative_relaxed(i, v);
#elif defined(arch_atomic_add_negative)
        return arch_atomic_add_negative(i, v);
#else
        return raw_atomic_add_return_release(i, v) < 0;
#endif
}

/**
 * raw_atomic_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_add_negative_relaxed() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_add_negative_relaxed(int i, atomic_t *v)
{
#if defined(arch_atomic_add_negative_relaxed)
        return arch_atomic_add_negative_relaxed(i, v);
#elif defined(arch_atomic_add_negative)
        return arch_atomic_add_negative(i, v);
#else
        return raw_atomic_add_return_relaxed(i, v) < 0;
#endif
}

/**
 * raw_atomic_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_t
 * @a: int value to add
 * @u: int value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline int
raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
#if defined(arch_atomic_fetch_add_unless)
        return arch_atomic_fetch_add_unless(v, a, u);
#else
        int c = raw_atomic_read(v);

        do {
                if (unlikely(c == u))
                        break;
        } while (!raw_atomic_try_cmpxchg(v, &c, c + a));

        return c;
#endif
}

/**
 * raw_atomic_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_t
 * @a: int value to add
 * @u: int value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_add_unless(atomic_t *v, int a, int u)
{
#if defined(arch_atomic_add_unless)
        return arch_atomic_add_unless(v, a, u);
#else
        return raw_atomic_fetch_add_unless(v, a, u) != u;
#endif
}

/**
 * raw_atomic_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_inc_not_zero(atomic_t *v)
{
#if defined(arch_atomic_inc_not_zero)
        return arch_atomic_inc_not_zero(v);
#else
        return raw_atomic_add_unless(v, 1, 0);
#endif
}

/**
 * raw_atomic_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_inc_unless_negative(atomic_t *v)
{
#if defined(arch_atomic_inc_unless_negative)
        return arch_atomic_inc_unless_negative(v);
#else
        int c = raw_atomic_read(v);

        do {
                if (unlikely(c < 0))
                        return false;
        } while (!raw_atomic_try_cmpxchg(v, &c, c + 1));

        return true;
#endif
}

/**
 * raw_atomic_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_dec_unless_positive(atomic_t *v)
{
#if defined(arch_atomic_dec_unless_positive)
        return arch_atomic_dec_unless_positive(v);
#else
        int c = raw_atomic_read(v);

        do {
                if (unlikely(c > 0))
                        return false;
        } while (!raw_atomic_try_cmpxchg(v, &c, c - 1));

        return true;
#endif
}

/**
 * raw_atomic_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline int
raw_atomic_dec_if_positive(atomic_t *v)
{
#if defined(arch_atomic_dec_if_positive)
        return arch_atomic_dec_if_positive(v);
#else
        int dec, c = raw_atomic_read(v);

        do {
                dec = c - 1;
                if (unlikely(dec < 0))
                        break;
        } while (!raw_atomic_try_cmpxchg(v, &c, dec));

        return dec;
#endif
}

#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#endif

/**
 * raw_atomic64_read() - atomic load with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_read() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline s64
raw_atomic64_read(const atomic64_t *v)
{
        return arch_atomic64_read(v);
}

/**
 * raw_atomic64_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_read_acquire() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline s64
raw_atomic64_read_acquire(const atomic64_t *v)
{
#if defined(arch_atomic64_read_acquire)
        return arch_atomic64_read_acquire(v);
#else
        s64 ret;

        if (__native_word(atomic64_t)) {
                ret = smp_load_acquire(&(v)->counter);
        } else {
                ret = raw_atomic64_read(v);
                __atomic_acquire_fence();
        }

        return ret;
#endif
}

/**
 * raw_atomic64_set() - atomic set with relaxed ordering
 * @v: pointer to atomic64_t
 * @i: s64 value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_set() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_set(atomic64_t *v, s64 i)
{
        arch_atomic64_set(v, i);
}

/**
 * raw_atomic64_set_release() - atomic set with release ordering
 * @v: pointer to atomic64_t
 * @i: s64 value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_set_release() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_set_release(atomic64_t *v, s64 i)
{
#if defined(arch_atomic64_set_release)
        arch_atomic64_set_release(v, i);
#else
        if (__native_word(atomic64_t)) {
                smp_store_release(&(v)->counter, i);
        } else {
                __atomic_release_fence();
                raw_atomic64_set(v, i);
        }
#endif
}

/**
 * raw_atomic64_add() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_add(s64 i, atomic64_t *v)
{
        arch_atomic64_add(i, v);
}

/**
 * raw_atomic64_add_return() - atomic add with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_add_return(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_return)
        return arch_atomic64_add_return(i, v);
#elif defined(arch_atomic64_add_return_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_add_return_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_add_return"
#endif
}

/**
 * raw_atomic64_add_return_acquire() - atomic add with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_return_acquire)
        return arch_atomic64_add_return_acquire(i, v);
#elif defined(arch_atomic64_add_return_relaxed)
        s64 ret = arch_atomic64_add_return_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_add_return)
        return arch_atomic64_add_return(i, v);
#else
#error "Unable to define raw_atomic64_add_return_acquire"
#endif
}

/**
 * raw_atomic64_add_return_release() - atomic add with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_add_return_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_return_release)
        return arch_atomic64_add_return_release(i, v);
#elif defined(arch_atomic64_add_return_relaxed)
        __atomic_release_fence();
        return arch_atomic64_add_return_relaxed(i, v);
#elif defined(arch_atomic64_add_return)
        return arch_atomic64_add_return(i, v);
#else
#error "Unable to define raw_atomic64_add_return_release"
#endif
}

/**
 * raw_atomic64_add_return_relaxed() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_return_relaxed)
        return arch_atomic64_add_return_relaxed(i, v);
#elif defined(arch_atomic64_add_return)
        return arch_atomic64_add_return(i, v);
#else
#error "Unable to define raw_atomic64_add_return_relaxed"
#endif
}

/**
 * raw_atomic64_fetch_add() - atomic add with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_add() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_add(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_add)
        return arch_atomic64_fetch_add(i, v);
#elif defined(arch_atomic64_fetch_add_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_add_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_fetch_add"
#endif
}

/**
 * raw_atomic64_fetch_add_acquire() - atomic add with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_add_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_add_acquire)
        return arch_atomic64_fetch_add_acquire(i, v);
#elif defined(arch_atomic64_fetch_add_relaxed)
        s64 ret = arch_atomic64_fetch_add_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_add)
        return arch_atomic64_fetch_add(i, v);
#else
#error "Unable to define raw_atomic64_fetch_add_acquire"
#endif
}

/**
 * raw_atomic64_fetch_add_release() - atomic add with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_add_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_add_release)
        return arch_atomic64_fetch_add_release(i, v);
#elif defined(arch_atomic64_fetch_add_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_add_relaxed(i, v);
#elif defined(arch_atomic64_fetch_add)
        return arch_atomic64_fetch_add(i, v);
#else
#error "Unable to define raw_atomic64_fetch_add_release"
#endif
}

/**
 * raw_atomic64_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_add_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_add_relaxed)
        return arch_atomic64_fetch_add_relaxed(i, v);
#elif defined(arch_atomic64_fetch_add)
        return arch_atomic64_fetch_add(i, v);
#else
#error "Unable to define raw_atomic64_fetch_add_relaxed"
#endif
}

/**
 * raw_atomic64_sub() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_sub(s64 i, atomic64_t *v)
{
        arch_atomic64_sub(i, v);
}

/**
 * raw_atomic64_sub_return() - atomic subtract with full ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_sub_return(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_sub_return)
        return arch_atomic64_sub_return(i, v);
#elif defined(arch_atomic64_sub_return_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_sub_return_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_sub_return"
#endif
}

/**
 * raw_atomic64_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_sub_return_acquire)
        return arch_atomic64_sub_return_acquire(i, v);
#elif defined(arch_atomic64_sub_return_relaxed)
        s64 ret = arch_atomic64_sub_return_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_sub_return)
        return arch_atomic64_sub_return(i, v);
#else
#error "Unable to define raw_atomic64_sub_return_acquire"
#endif
}

/**
 * raw_atomic64_sub_return_release() - atomic subtract with release ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_sub_return_release)
        return arch_atomic64_sub_return_release(i, v);
#elif defined(arch_atomic64_sub_return_relaxed)
        __atomic_release_fence();
        return arch_atomic64_sub_return_relaxed(i, v);
#elif defined(arch_atomic64_sub_return)
        return arch_atomic64_sub_return(i, v);
#else
#error "Unable to define raw_atomic64_sub_return_release"
#endif
}

/**
 * raw_atomic64_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_sub_return_relaxed)
        return arch_atomic64_sub_return_relaxed(i, v);
#elif defined(arch_atomic64_sub_return)
        return arch_atomic64_sub_return(i, v);
#else
#error "Unable to define raw_atomic64_sub_return_relaxed"
#endif
}

/**
 * raw_atomic64_fetch_sub() - atomic subtract with full ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_sub() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_sub)
        return arch_atomic64_fetch_sub(i, v);
#elif defined(arch_atomic64_fetch_sub_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_sub_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_fetch_sub"
#endif
}

/**
 * raw_atomic64_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_sub_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_sub_acquire)
        return arch_atomic64_fetch_sub_acquire(i, v);
#elif defined(arch_atomic64_fetch_sub_relaxed)
        s64 ret = arch_atomic64_fetch_sub_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_sub)
        return arch_atomic64_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic64_fetch_sub_acquire"
#endif
}

/**
 * raw_atomic64_fetch_sub_release() - atomic subtract with release ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_sub_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_sub_release)
        return arch_atomic64_fetch_sub_release(i, v);
#elif defined(arch_atomic64_fetch_sub_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_sub_relaxed(i, v);
#elif defined(arch_atomic64_fetch_sub)
        return arch_atomic64_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic64_fetch_sub_release"
#endif
}

/**
 * raw_atomic64_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_sub_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_sub_relaxed)
        return arch_atomic64_fetch_sub_relaxed(i, v);
#elif defined(arch_atomic64_fetch_sub)
        return arch_atomic64_fetch_sub(i, v);
#else
#error "Unable to define raw_atomic64_fetch_sub_relaxed"
#endif
}

/**
 * raw_atomic64_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_inc(atomic64_t *v)
{
#if defined(arch_atomic64_inc)
        arch_atomic64_inc(v);
#else
        raw_atomic64_add(1, v);
#endif
}

/**
 * raw_atomic64_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_inc_return(atomic64_t *v)
{
#if defined(arch_atomic64_inc_return)
        return arch_atomic64_inc_return(v);
#elif defined(arch_atomic64_inc_return_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_inc_return_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_add_return(1, v);
#endif
}

/**
 * raw_atomic64_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_inc_return_acquire(atomic64_t *v)
{
#if defined(arch_atomic64_inc_return_acquire)
        return arch_atomic64_inc_return_acquire(v);
#elif defined(arch_atomic64_inc_return_relaxed)
        s64 ret = arch_atomic64_inc_return_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_inc_return)
        return arch_atomic64_inc_return(v);
#else
        return raw_atomic64_add_return_acquire(1, v);
#endif
}

/**
 * raw_atomic64_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_inc_return_release(atomic64_t *v)
{
#if defined(arch_atomic64_inc_return_release)
        return arch_atomic64_inc_return_release(v);
#elif defined(arch_atomic64_inc_return_relaxed)
        __atomic_release_fence();
        return arch_atomic64_inc_return_relaxed(v);
#elif defined(arch_atomic64_inc_return)
        return arch_atomic64_inc_return(v);
#else
        return raw_atomic64_add_return_release(1, v);
#endif
}

/**
 * raw_atomic64_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_inc_return_relaxed(atomic64_t *v)
{
#if defined(arch_atomic64_inc_return_relaxed)
        return arch_atomic64_inc_return_relaxed(v);
#elif defined(arch_atomic64_inc_return)
        return arch_atomic64_inc_return(v);
#else
        return raw_atomic64_add_return_relaxed(1, v);
#endif
}

/**
 * raw_atomic64_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_inc() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_inc(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_inc)
        return arch_atomic64_fetch_inc(v);
#elif defined(arch_atomic64_fetch_inc_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_inc_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_fetch_add(1, v);
#endif
}

/**
 * raw_atomic64_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_inc_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_inc_acquire(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_inc_acquire)
        return arch_atomic64_fetch_inc_acquire(v);
#elif defined(arch_atomic64_fetch_inc_relaxed)
        s64 ret = arch_atomic64_fetch_inc_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_inc)
        return arch_atomic64_fetch_inc(v);
#else
        return raw_atomic64_fetch_add_acquire(1, v);
#endif
}

/**
 * raw_atomic64_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_inc_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_inc_release(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_inc_release)
        return arch_atomic64_fetch_inc_release(v);
#elif defined(arch_atomic64_fetch_inc_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_inc_relaxed(v);
#elif defined(arch_atomic64_fetch_inc)
        return arch_atomic64_fetch_inc(v);
#else
        return raw_atomic64_fetch_add_release(1, v);
#endif
}

/**
 * raw_atomic64_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_inc_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_inc_relaxed)
        return arch_atomic64_fetch_inc_relaxed(v);
#elif defined(arch_atomic64_fetch_inc)
        return arch_atomic64_fetch_inc(v);
#else
        return raw_atomic64_fetch_add_relaxed(1, v);
#endif
}

/**
 * raw_atomic64_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_dec(atomic64_t *v)
{
#if defined(arch_atomic64_dec)
        arch_atomic64_dec(v);
#else
        raw_atomic64_sub(1, v);
#endif
}

/**
 * raw_atomic64_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_dec_return(atomic64_t *v)
{
#if defined(arch_atomic64_dec_return)
        return arch_atomic64_dec_return(v);
#elif defined(arch_atomic64_dec_return_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_dec_return_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_sub_return(1, v);
#endif
}

/**
 * raw_atomic64_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_dec_return_acquire(atomic64_t *v)
{
#if defined(arch_atomic64_dec_return_acquire)
        return arch_atomic64_dec_return_acquire(v);
#elif defined(arch_atomic64_dec_return_relaxed)
        s64 ret = arch_atomic64_dec_return_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_dec_return)
        return arch_atomic64_dec_return(v);
#else
        return raw_atomic64_sub_return_acquire(1, v);
#endif
}

/**
 * raw_atomic64_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_dec_return_release(atomic64_t *v)
{
#if defined(arch_atomic64_dec_return_release)
        return arch_atomic64_dec_return_release(v);
#elif defined(arch_atomic64_dec_return_relaxed)
        __atomic_release_fence();
        return arch_atomic64_dec_return_relaxed(v);
#elif defined(arch_atomic64_dec_return)
        return arch_atomic64_dec_return(v);
#else
        return raw_atomic64_sub_return_release(1, v);
#endif
}

/**
 * raw_atomic64_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
raw_atomic64_dec_return_relaxed(atomic64_t *v)
{
#if defined(arch_atomic64_dec_return_relaxed)
        return arch_atomic64_dec_return_relaxed(v);
#elif defined(arch_atomic64_dec_return)
        return arch_atomic64_dec_return(v);
#else
        return raw_atomic64_sub_return_relaxed(1, v);
#endif
}

/**
 * raw_atomic64_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_dec() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_dec(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_dec)
        return arch_atomic64_fetch_dec(v);
#elif defined(arch_atomic64_fetch_dec_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_dec_relaxed(v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_fetch_sub(1, v);
#endif
}

/**
 * raw_atomic64_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_dec_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_dec_acquire(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_dec_acquire)
        return arch_atomic64_fetch_dec_acquire(v);
#elif defined(arch_atomic64_fetch_dec_relaxed)
        s64 ret = arch_atomic64_fetch_dec_relaxed(v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_dec)
        return arch_atomic64_fetch_dec(v);
#else
        return raw_atomic64_fetch_sub_acquire(1, v);
#endif
}

/**
 * raw_atomic64_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_dec_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_dec_release(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_dec_release)
        return arch_atomic64_fetch_dec_release(v);
#elif defined(arch_atomic64_fetch_dec_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_dec_relaxed(v);
#elif defined(arch_atomic64_fetch_dec)
        return arch_atomic64_fetch_dec(v);
#else
        return raw_atomic64_fetch_sub_release(1, v);
#endif
}

/**
 * raw_atomic64_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_dec_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
{
#if defined(arch_atomic64_fetch_dec_relaxed)
        return arch_atomic64_fetch_dec_relaxed(v);
#elif defined(arch_atomic64_fetch_dec)
        return arch_atomic64_fetch_dec(v);
#else
        return raw_atomic64_fetch_sub_relaxed(1, v);
#endif
}

/**
 * raw_atomic64_and() - atomic bitwise AND with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_and() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_and(s64 i, atomic64_t *v)
{
        arch_atomic64_and(i, v);
}

/**
 * raw_atomic64_fetch_and() - atomic bitwise AND with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_and() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_and(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_and)
        return arch_atomic64_fetch_and(i, v);
#elif defined(arch_atomic64_fetch_and_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_and_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_fetch_and"
#endif
}

/**
 * raw_atomic64_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_and_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_and_acquire)
        return arch_atomic64_fetch_and_acquire(i, v);
#elif defined(arch_atomic64_fetch_and_relaxed)
        s64 ret = arch_atomic64_fetch_and_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_and)
        return arch_atomic64_fetch_and(i, v);
#else
#error "Unable to define raw_atomic64_fetch_and_acquire"
#endif
}

/**
 * raw_atomic64_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_and_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_and_release)
        return arch_atomic64_fetch_and_release(i, v);
#elif defined(arch_atomic64_fetch_and_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_and_relaxed(i, v);
#elif defined(arch_atomic64_fetch_and)
        return arch_atomic64_fetch_and(i, v);
#else
#error "Unable to define raw_atomic64_fetch_and_release"
#endif
}

/**
 * raw_atomic64_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_and_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_and_relaxed)
        return arch_atomic64_fetch_and_relaxed(i, v);
#elif defined(arch_atomic64_fetch_and)
        return arch_atomic64_fetch_and(i, v);
#else
#error "Unable to define raw_atomic64_fetch_and_relaxed"
#endif
}

/**
 * raw_atomic64_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_andnot() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_andnot(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_andnot)
        arch_atomic64_andnot(i, v);
#else
        raw_atomic64_and(~i, v);
#endif
}

/**
 * raw_atomic64_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_andnot() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_andnot)
        return arch_atomic64_fetch_andnot(i, v);
#elif defined(arch_atomic64_fetch_andnot_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_andnot_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_fetch_and(~i, v);
#endif
}

/**
 * raw_atomic64_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_andnot_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_andnot_acquire)
        return arch_atomic64_fetch_andnot_acquire(i, v);
#elif defined(arch_atomic64_fetch_andnot_relaxed)
        s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_andnot)
        return arch_atomic64_fetch_andnot(i, v);
#else
        return raw_atomic64_fetch_and_acquire(~i, v);
#endif
}

/**
 * raw_atomic64_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_andnot_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_andnot_release)
        return arch_atomic64_fetch_andnot_release(i, v);
#elif defined(arch_atomic64_fetch_andnot_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_andnot_relaxed(i, v);
#elif defined(arch_atomic64_fetch_andnot)
        return arch_atomic64_fetch_andnot(i, v);
#else
        return raw_atomic64_fetch_and_release(~i, v);
#endif
}

/**
 * raw_atomic64_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_andnot_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_andnot_relaxed)
        return arch_atomic64_fetch_andnot_relaxed(i, v);
#elif defined(arch_atomic64_fetch_andnot)
        return arch_atomic64_fetch_andnot(i, v);
#else
        return raw_atomic64_fetch_and_relaxed(~i, v);
#endif
}

/**
 * raw_atomic64_or() - atomic bitwise OR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_or() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_or(s64 i, atomic64_t *v)
{
        arch_atomic64_or(i, v);
}

/**
 * raw_atomic64_fetch_or() - atomic bitwise OR with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_or() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_or(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_or)
        return arch_atomic64_fetch_or(i, v);
#elif defined(arch_atomic64_fetch_or_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_or_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_fetch_or"
#endif
}

/**
 * raw_atomic64_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_or_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_or_acquire)
        return arch_atomic64_fetch_or_acquire(i, v);
#elif defined(arch_atomic64_fetch_or_relaxed)
        s64 ret = arch_atomic64_fetch_or_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_or)
        return arch_atomic64_fetch_or(i, v);
#else
#error "Unable to define raw_atomic64_fetch_or_acquire"
#endif
}

/**
 * raw_atomic64_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_or_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_or_release)
        return arch_atomic64_fetch_or_release(i, v);
#elif defined(arch_atomic64_fetch_or_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_or_relaxed(i, v);
#elif defined(arch_atomic64_fetch_or)
        return arch_atomic64_fetch_or(i, v);
#else
#error "Unable to define raw_atomic64_fetch_or_release"
#endif
}

/**
 * raw_atomic64_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_or_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_or_relaxed)
        return arch_atomic64_fetch_or_relaxed(i, v);
#elif defined(arch_atomic64_fetch_or)
        return arch_atomic64_fetch_or(i, v);
#else
#error "Unable to define raw_atomic64_fetch_or_relaxed"
#endif
}

/**
 * raw_atomic64_xor() - atomic bitwise XOR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_xor() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic64_xor(s64 i, atomic64_t *v)
{
        arch_atomic64_xor(i, v);
}

/**
 * raw_atomic64_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_xor() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_xor)
        return arch_atomic64_fetch_xor(i, v);
#elif defined(arch_atomic64_fetch_xor_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_fetch_xor_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
#error "Unable to define raw_atomic64_fetch_xor"
#endif
}

/**
 * raw_atomic64_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_xor_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_xor_acquire)
        return arch_atomic64_fetch_xor_acquire(i, v);
#elif defined(arch_atomic64_fetch_xor_relaxed)
        s64 ret = arch_atomic64_fetch_xor_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_fetch_xor)
        return arch_atomic64_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic64_fetch_xor_acquire"
#endif
}

/**
 * raw_atomic64_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_xor_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_xor_release)
        return arch_atomic64_fetch_xor_release(i, v);
#elif defined(arch_atomic64_fetch_xor_relaxed)
        __atomic_release_fence();
        return arch_atomic64_fetch_xor_relaxed(i, v);
#elif defined(arch_atomic64_fetch_xor)
        return arch_atomic64_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic64_fetch_xor_release"
#endif
}

/**
 * raw_atomic64_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_xor_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_fetch_xor_relaxed)
        return arch_atomic64_fetch_xor_relaxed(i, v);
#elif defined(arch_atomic64_fetch_xor)
        return arch_atomic64_fetch_xor(i, v);
#else
#error "Unable to define raw_atomic64_fetch_xor_relaxed"
#endif
}

/**
 * raw_atomic64_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_xchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_xchg(atomic64_t *v, s64 new)
{
#if defined(arch_atomic64_xchg)
        return arch_atomic64_xchg(v, new);
#elif defined(arch_atomic64_xchg_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_xchg_relaxed(v, new);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_xchg(&v->counter, new);
#endif
}

/**
 * raw_atomic64_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_xchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
{
#if defined(arch_atomic64_xchg_acquire)
        return arch_atomic64_xchg_acquire(v, new);
#elif defined(arch_atomic64_xchg_relaxed)
        s64 ret = arch_atomic64_xchg_relaxed(v, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_xchg)
        return arch_atomic64_xchg(v, new);
#else
        return raw_xchg_acquire(&v->counter, new);
#endif
}

/**
 * raw_atomic64_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_xchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_xchg_release(atomic64_t *v, s64 new)
{
#if defined(arch_atomic64_xchg_release)
        return arch_atomic64_xchg_release(v, new);
#elif defined(arch_atomic64_xchg_relaxed)
        __atomic_release_fence();
        return arch_atomic64_xchg_relaxed(v, new);
#elif defined(arch_atomic64_xchg)
        return arch_atomic64_xchg(v, new);
#else
        return raw_xchg_release(&v->counter, new);
#endif
}

/**
 * raw_atomic64_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_xchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
{
#if defined(arch_atomic64_xchg_relaxed)
        return arch_atomic64_xchg_relaxed(v, new);
#elif defined(arch_atomic64_xchg)
        return arch_atomic64_xchg(v, new);
#else
        return raw_xchg_relaxed(&v->counter, new);
#endif
}

/**
 * raw_atomic64_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
#if defined(arch_atomic64_cmpxchg)
        return arch_atomic64_cmpxchg(v, old, new);
#elif defined(arch_atomic64_cmpxchg_relaxed)
        s64 ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_cmpxchg(&v->counter, old, new);
#endif
}

/**
 * raw_atomic64_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
{
#if defined(arch_atomic64_cmpxchg_acquire)
        return arch_atomic64_cmpxchg_acquire(v, old, new);
#elif defined(arch_atomic64_cmpxchg_relaxed)
        s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_cmpxchg)
        return arch_atomic64_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_acquire(&v->counter, old, new);
#endif
}

/**
 * raw_atomic64_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
{
#if defined(arch_atomic64_cmpxchg_release)
        return arch_atomic64_cmpxchg_release(v, old, new);
#elif defined(arch_atomic64_cmpxchg_relaxed)
        __atomic_release_fence();
        return arch_atomic64_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic64_cmpxchg)
        return arch_atomic64_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_release(&v->counter, old, new);
#endif
}

/**
 * raw_atomic64_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
{
#if defined(arch_atomic64_cmpxchg_relaxed)
        return arch_atomic64_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic64_cmpxchg)
        return arch_atomic64_cmpxchg(v, old, new);
#else
        return raw_cmpxchg_relaxed(&v->counter, old, new);
#endif
}

/**
 * raw_atomic64_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
#if defined(arch_atomic64_try_cmpxchg)
        return arch_atomic64_try_cmpxchg(v, old, new);
#elif defined(arch_atomic64_try_cmpxchg_relaxed)
        bool ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
        __atomic_post_full_fence();
        return ret;
#else
        s64 r, o = *old;
        r = raw_atomic64_cmpxchg(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic64_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
{
#if defined(arch_atomic64_try_cmpxchg_acquire)
        return arch_atomic64_try_cmpxchg_acquire(v, old, new);
#elif defined(arch_atomic64_try_cmpxchg_relaxed)
        bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_try_cmpxchg)
        return arch_atomic64_try_cmpxchg(v, old, new);
#else
        s64 r, o = *old;
        r = raw_atomic64_cmpxchg_acquire(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic64_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
{
#if defined(arch_atomic64_try_cmpxchg_release)
        return arch_atomic64_try_cmpxchg_release(v, old, new);
#elif defined(arch_atomic64_try_cmpxchg_relaxed)
        __atomic_release_fence();
        return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic64_try_cmpxchg)
        return arch_atomic64_try_cmpxchg(v, old, new);
#else
        s64 r, o = *old;
        r = raw_atomic64_cmpxchg_release(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic64_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
{
#if defined(arch_atomic64_try_cmpxchg_relaxed)
        return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
#elif defined(arch_atomic64_try_cmpxchg)
        return arch_atomic64_try_cmpxchg(v, old, new);
#else
        s64 r, o = *old;
        r = raw_atomic64_cmpxchg_relaxed(v, o, new);
        if (unlikely(r != o))
                *old = r;
        return likely(r == o);
#endif
}

/**
 * raw_atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_sub_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_sub_and_test)
        return arch_atomic64_sub_and_test(i, v);
#else
        return raw_atomic64_sub_return(i, v) == 0;
#endif
}

/**
 * raw_atomic64_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic64_dec_and_test(atomic64_t *v)
{
#if defined(arch_atomic64_dec_and_test)
        return arch_atomic64_dec_and_test(v);
#else
        return raw_atomic64_dec_return(v) == 0;
#endif
}

/**
 * raw_atomic64_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic64_inc_and_test(atomic64_t *v)
{
#if defined(arch_atomic64_inc_and_test)
        return arch_atomic64_inc_and_test(v);
#else
        return raw_atomic64_inc_return(v) == 0;
#endif
}

/**
 * raw_atomic64_add_negative() - atomic add and test if negative with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_negative() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic64_add_negative(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_negative)
        return arch_atomic64_add_negative(i, v);
#elif defined(arch_atomic64_add_negative_relaxed)
        bool ret;
        __atomic_pre_full_fence();
        ret = arch_atomic64_add_negative_relaxed(i, v);
        __atomic_post_full_fence();
        return ret;
#else
        return raw_atomic64_add_return(i, v) < 0;
#endif
}

/**
 * raw_atomic64_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_negative_acquire() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_negative_acquire)
        return arch_atomic64_add_negative_acquire(i, v);
#elif defined(arch_atomic64_add_negative_relaxed)
        bool ret = arch_atomic64_add_negative_relaxed(i, v);
        __atomic_acquire_fence();
        return ret;
#elif defined(arch_atomic64_add_negative)
        return arch_atomic64_add_negative(i, v);
#else
        return raw_atomic64_add_return_acquire(i, v) < 0;
#endif
}

/**
 * raw_atomic64_add_negative_release() - atomic add and test if negative with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_negative_release() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_negative_release)
        return arch_atomic64_add_negative_release(i, v);
#elif defined(arch_atomic64_add_negative_relaxed)
        __atomic_release_fence();
        return arch_atomic64_add_negative_relaxed(i, v);
#elif defined(arch_atomic64_add_negative)
        return arch_atomic64_add_negative(i, v);
#else
        return raw_atomic64_add_return_release(i, v) < 0;
#endif
}

/**
 * raw_atomic64_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic64_add_negative_relaxed() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
{
#if defined(arch_atomic64_add_negative_relaxed)
        return arch_atomic64_add_negative_relaxed(i, v);
#elif defined(arch_atomic64_add_negative)
        return arch_atomic64_add_negative(i, v);
#else
        return raw_atomic64_add_return_relaxed(i, v) < 0;
#endif
}

/**
 * raw_atomic64_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic64_t
 * @a: s64 value to add
 * @u: s64 value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
#if defined(arch_atomic64_fetch_add_unless)
        return arch_atomic64_fetch_add_unless(v, a, u);
#else
        s64 c = raw_atomic64_read(v);

        do {
                if (unlikely(c == u))
                        break;
        } while (!raw_atomic64_try_cmpxchg(v, &c, c + a));

        return c;
#endif
}

/**
 * raw_atomic64_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic64_t
 * @a: s64 value to add
 * @u: s64 value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
#if defined(arch_atomic64_add_unless)
        return arch_atomic64_add_unless(v, a, u);
#else
        return raw_atomic64_fetch_add_unless(v, a, u) != u;
#endif
}

/**
 * raw_atomic64_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic64_inc_not_zero(atomic64_t *v)
{
#if defined(arch_atomic64_inc_not_zero)
        return arch_atomic64_inc_not_zero(v);
#else
        return raw_atomic64_add_unless(v, 1, 0);
#endif
}

/**
 * raw_atomic64_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic64_inc_unless_negative(atomic64_t *v)
{
#if defined(arch_atomic64_inc_unless_negative)
        return arch_atomic64_inc_unless_negative(v);
#else
        s64 c = raw_atomic64_read(v);

        do {
                if (unlikely(c < 0))
                        return false;
        } while (!raw_atomic64_try_cmpxchg(v, &c, c + 1));

        return true;
#endif
}

/**
 * raw_atomic64_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic64_dec_unless_positive(atomic64_t *v)
{
#if defined(arch_atomic64_dec_unless_positive)
        return arch_atomic64_dec_unless_positive(v);
#else
        s64 c = raw_atomic64_read(v);

        do {
                if (unlikely(c > 0))
                        return false;
        } while (!raw_atomic64_try_cmpxchg(v, &c, c - 1));

        return true;
#endif
}

/**
 * raw_atomic64_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline s64
raw_atomic64_dec_if_positive(atomic64_t *v)
{
#if defined(arch_atomic64_dec_if_positive)
        return arch_atomic64_dec_if_positive(v);
#else
        s64 dec, c = raw_atomic64_read(v);

        do {
                dec = c - 1;
                if (unlikely(dec < 0))
                        break;
        } while (!raw_atomic64_try_cmpxchg(v, &c, dec));

        return dec;
#endif
}

#endif /* _LINUX_ATOMIC_FALLBACK_H */
// 14850c0b0db20c62fdc78ccd1d42b98b88d76331











































































































































































































































































































































































































































































    1 

























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Marvell NFC driver: Firmware downloader
 *
 * Copyright (C) 2015, Marvell International Ltd.
 */

#include <linux/module.h>
#include <asm/unaligned.h>
#include <linux/firmware.h>
#include <linux/nfc.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include "nfcmrvl.h"

#define FW_DNLD_TIMEOUT                        15000

#define NCI_OP_PROPRIETARY_BOOT_CMD        nci_opcode_pack(NCI_GID_PROPRIETARY, \
                                                        NCI_OP_PROP_BOOT_CMD)

/* FW download states */

enum {
        STATE_RESET = 0,
        STATE_INIT,
        STATE_SET_REF_CLOCK,
        STATE_SET_HI_CONFIG,
        STATE_OPEN_LC,
        STATE_FW_DNLD,
        STATE_CLOSE_LC,
        STATE_BOOT
};

enum {
        SUBSTATE_WAIT_COMMAND = 0,
        SUBSTATE_WAIT_ACK_CREDIT,
        SUBSTATE_WAIT_NACK_CREDIT,
        SUBSTATE_WAIT_DATA_CREDIT,
};

/*
 * Patterns for responses
 */

static const uint8_t nci_pattern_core_reset_ntf[] = {
        0x60, 0x00, 0x02, 0xA0, 0x01
};

static const uint8_t nci_pattern_core_init_rsp[] = {
        0x40, 0x01, 0x11
};

static const uint8_t nci_pattern_core_set_config_rsp[] = {
        0x40, 0x02, 0x02, 0x00, 0x00
};

static const uint8_t nci_pattern_core_conn_create_rsp[] = {
        0x40, 0x04, 0x04, 0x00
};

static const uint8_t nci_pattern_core_conn_close_rsp[] = {
        0x40, 0x05, 0x01, 0x00
};

static const uint8_t nci_pattern_core_conn_credits_ntf[] = {
        0x60, 0x06, 0x03, 0x01, NCI_CORE_LC_CONNID_PROP_FW_DL, 0x01
};

static const uint8_t nci_pattern_proprietary_boot_rsp[] = {
        0x4F, 0x3A, 0x01, 0x00
};

static struct sk_buff *alloc_lc_skb(struct nfcmrvl_private *priv, uint8_t plen)
{
        struct sk_buff *skb;
        struct nci_data_hdr *hdr;

        skb = nci_skb_alloc(priv->ndev, (NCI_DATA_HDR_SIZE + plen), GFP_KERNEL);
        if (!skb)
                return NULL;

        hdr = skb_put(skb, NCI_DATA_HDR_SIZE);
        hdr->conn_id = NCI_CORE_LC_CONNID_PROP_FW_DL;
        hdr->rfu = 0;
        hdr->plen = plen;

        nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT);
        nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST);

        return skb;
}

static void fw_dnld_over(struct nfcmrvl_private *priv, u32 error)
{
        if (priv->fw_dnld.fw) {
                release_firmware(priv->fw_dnld.fw);
                priv->fw_dnld.fw = NULL;
                priv->fw_dnld.header = NULL;
                priv->fw_dnld.binary_config = NULL;
        }

        atomic_set(&priv->ndev->cmd_cnt, 0);

        if (timer_pending(&priv->ndev->cmd_timer))
                del_timer_sync(&priv->ndev->cmd_timer);

        if (timer_pending(&priv->fw_dnld.timer))
                del_timer_sync(&priv->fw_dnld.timer);

        nfc_info(priv->dev, "FW loading over (%d)]\n", error);

        if (error != 0) {
                /* failed, halt the chip to avoid power consumption */
                nfcmrvl_chip_halt(priv);
        }

        nfc_fw_download_done(priv->ndev->nfc_dev, priv->fw_dnld.name, error);
}

static void fw_dnld_timeout(struct timer_list *t)
{
        struct nfcmrvl_private *priv = from_timer(priv, t, fw_dnld.timer);

        nfc_err(priv->dev, "FW loading timeout");
        priv->fw_dnld.state = STATE_RESET;
        fw_dnld_over(priv, -ETIMEDOUT);
}

static int process_state_reset(struct nfcmrvl_private *priv,
                               const struct sk_buff *skb)
{
        if (sizeof(nci_pattern_core_reset_ntf) != skb->len ||
            memcmp(skb->data, nci_pattern_core_reset_ntf,
                   sizeof(nci_pattern_core_reset_ntf)))
                return -EINVAL;

        nfc_info(priv->dev, "BootROM reset, start fw download\n");

        /* Start FW download state machine */
        priv->fw_dnld.state = STATE_INIT;
        nci_send_cmd(priv->ndev, NCI_OP_CORE_INIT_CMD, 0, NULL);

        return 0;
}

static int process_state_init(struct nfcmrvl_private *priv,
                              const struct sk_buff *skb)
{
        struct nci_core_set_config_cmd cmd;

        if (sizeof(nci_pattern_core_init_rsp) >= skb->len ||
            memcmp(skb->data, nci_pattern_core_init_rsp,
                   sizeof(nci_pattern_core_init_rsp)))
                return -EINVAL;

        cmd.num_params = 1;
        cmd.param.id = NFCMRVL_PROP_REF_CLOCK;
        cmd.param.len = 4;
        memcpy(cmd.param.val, &priv->fw_dnld.header->ref_clock, 4);

        nci_send_cmd(priv->ndev, NCI_OP_CORE_SET_CONFIG_CMD, 3 + cmd.param.len,
                     &cmd);

        priv->fw_dnld.state = STATE_SET_REF_CLOCK;
        return 0;
}

static void create_lc(struct nfcmrvl_private *priv)
{
        uint8_t param[2] = { NCI_CORE_LC_PROP_FW_DL, 0x0 };

        priv->fw_dnld.state = STATE_OPEN_LC;
        nci_send_cmd(priv->ndev, NCI_OP_CORE_CONN_CREATE_CMD, 2, param);
}

static int process_state_set_ref_clock(struct nfcmrvl_private *priv,
                                       const struct sk_buff *skb)
{
        struct nci_core_set_config_cmd cmd;

        if (sizeof(nci_pattern_core_set_config_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len))
                return -EINVAL;

        cmd.num_params = 1;
        cmd.param.id = NFCMRVL_PROP_SET_HI_CONFIG;

        switch (priv->phy) {
        case NFCMRVL_PHY_UART:
                cmd.param.len = 5;
                memcpy(cmd.param.val,
                       &priv->fw_dnld.binary_config->uart.baudrate,
                       4);
                cmd.param.val[4] =
                        priv->fw_dnld.binary_config->uart.flow_control;
                break;
        case NFCMRVL_PHY_I2C:
                cmd.param.len = 5;
                memcpy(cmd.param.val,
                       &priv->fw_dnld.binary_config->i2c.clk,
                       4);
                cmd.param.val[4] = 0;
                break;
        case NFCMRVL_PHY_SPI:
                cmd.param.len = 5;
                memcpy(cmd.param.val,
                       &priv->fw_dnld.binary_config->spi.clk,
                       4);
                cmd.param.val[4] = 0;
                break;
        default:
                create_lc(priv);
                return 0;
        }

        priv->fw_dnld.state = STATE_SET_HI_CONFIG;
        nci_send_cmd(priv->ndev, NCI_OP_CORE_SET_CONFIG_CMD, 3 + cmd.param.len,
                     &cmd);
        return 0;
}

static int process_state_set_hi_config(struct nfcmrvl_private *priv,
                                       const struct sk_buff *skb)
{
        if (sizeof(nci_pattern_core_set_config_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len))
                return -EINVAL;

        create_lc(priv);
        return 0;
}

static int process_state_open_lc(struct nfcmrvl_private *priv,
                                 const struct sk_buff *skb)
{
        if (sizeof(nci_pattern_core_conn_create_rsp) >= skb->len ||
            memcmp(skb->data, nci_pattern_core_conn_create_rsp,
                   sizeof(nci_pattern_core_conn_create_rsp)))
                return -EINVAL;

        priv->fw_dnld.state = STATE_FW_DNLD;
        priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND;
        priv->fw_dnld.offset = priv->fw_dnld.binary_config->offset;
        return 0;
}

static int process_state_fw_dnld(struct nfcmrvl_private *priv,
                                 struct sk_buff *skb)
{
        uint16_t len;
        uint16_t comp_len;
        struct sk_buff *out_skb;

        switch (priv->fw_dnld.substate) {
        case SUBSTATE_WAIT_COMMAND:
                /*
                 * Command format:
                 * B0..2: NCI header
                 * B3   : Helper command (0xA5)
                 * B4..5: le16 data size
                 * B6..7: le16 data size complement (~)
                 * B8..N: payload
                 */

                /* Remove NCI HDR */
                skb_pull(skb, 3);
                if (skb->data[0] != HELPER_CMD_PACKET_FORMAT || skb->len != 5) {
                        nfc_err(priv->dev, "bad command");
                        return -EINVAL;
                }
                skb_pull(skb, 1);
                len = get_unaligned_le16(skb->data);
                skb_pull(skb, 2);
                comp_len = get_unaligned_le16(skb->data);
                memcpy(&comp_len, skb->data, 2);
                skb_pull(skb, 2);
                if (((~len) & 0xFFFF) != comp_len) {
                        nfc_err(priv->dev, "bad len complement: %x %x %x",
                                len, comp_len, (~len & 0xFFFF));
                        out_skb = alloc_lc_skb(priv, 1);
                        if (!out_skb)
                                return -ENOMEM;
                        skb_put_u8(out_skb, 0xBF);
                        nci_send_frame(priv->ndev, out_skb);
                        priv->fw_dnld.substate = SUBSTATE_WAIT_NACK_CREDIT;
                        return 0;
                }
                priv->fw_dnld.chunk_len = len;
                out_skb = alloc_lc_skb(priv, 1);
                if (!out_skb)
                        return -ENOMEM;
                skb_put_u8(out_skb, HELPER_ACK_PACKET_FORMAT);
                nci_send_frame(priv->ndev, out_skb);
                priv->fw_dnld.substate = SUBSTATE_WAIT_ACK_CREDIT;
                break;

        case SUBSTATE_WAIT_ACK_CREDIT:
                if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len ||
                    memcmp(nci_pattern_core_conn_credits_ntf, skb->data,
                           skb->len)) {
                        nfc_err(priv->dev, "bad packet: waiting for credit");
                        return -EINVAL;
                }
                if (priv->fw_dnld.chunk_len == 0) {
                        /* FW Loading is done */
                        uint8_t conn_id = NCI_CORE_LC_CONNID_PROP_FW_DL;

                        priv->fw_dnld.state = STATE_CLOSE_LC;
                        nci_send_cmd(priv->ndev, NCI_OP_CORE_CONN_CLOSE_CMD,
                                     1, &conn_id);
                } else {
                        out_skb = alloc_lc_skb(priv, priv->fw_dnld.chunk_len);
                        if (!out_skb)
                                return -ENOMEM;
                        skb_put_data(out_skb,
                                     ((uint8_t *)priv->fw_dnld.fw->data) + priv->fw_dnld.offset,
                                     priv->fw_dnld.chunk_len);
                        nci_send_frame(priv->ndev, out_skb);
                        priv->fw_dnld.substate = SUBSTATE_WAIT_DATA_CREDIT;
                }
                break;

        case SUBSTATE_WAIT_DATA_CREDIT:
                if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len ||
                    memcmp(nci_pattern_core_conn_credits_ntf, skb->data,
                            skb->len)) {
                        nfc_err(priv->dev, "bad packet: waiting for credit");
                        return -EINVAL;
                }
                priv->fw_dnld.offset += priv->fw_dnld.chunk_len;
                priv->fw_dnld.chunk_len = 0;
                priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND;
                break;

        case SUBSTATE_WAIT_NACK_CREDIT:
                if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len ||
                    memcmp(nci_pattern_core_conn_credits_ntf, skb->data,
                            skb->len)) {
                        nfc_err(priv->dev, "bad packet: waiting for credit");
                        return -EINVAL;
                }
                priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND;
                break;
        }
        return 0;
}

static int process_state_close_lc(struct nfcmrvl_private *priv,
                                  const struct sk_buff *skb)
{
        if (sizeof(nci_pattern_core_conn_close_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_core_conn_close_rsp, skb->len))
                return -EINVAL;

        priv->fw_dnld.state = STATE_BOOT;
        nci_send_cmd(priv->ndev, NCI_OP_PROPRIETARY_BOOT_CMD, 0, NULL);
        return 0;
}

static int process_state_boot(struct nfcmrvl_private *priv,
                              const struct sk_buff *skb)
{
        if (sizeof(nci_pattern_proprietary_boot_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_proprietary_boot_rsp, skb->len))
                return -EINVAL;

        /*
         * Update HI config to use the right configuration for the next
         * data exchanges.
         */
        priv->if_ops->nci_update_config(priv,
                                        &priv->fw_dnld.binary_config->config);

        if (priv->fw_dnld.binary_config == &priv->fw_dnld.header->helper) {
                /*
                 * This is the case where an helper was needed and we have
                 * uploaded it. Now we have to wait the next RESET NTF to start
                 * FW download.
                 */
                priv->fw_dnld.state = STATE_RESET;
                priv->fw_dnld.binary_config = &priv->fw_dnld.header->firmware;
                nfc_info(priv->dev, "FW loading: helper loaded");
        } else {
                nfc_info(priv->dev, "FW loading: firmware loaded");
                fw_dnld_over(priv, 0);
        }
        return 0;
}

static void fw_dnld_rx_work(struct work_struct *work)
{
        int ret;
        struct sk_buff *skb;
        struct nfcmrvl_fw_dnld *fw_dnld = container_of(work,
                                                       struct nfcmrvl_fw_dnld,
                                                       rx_work);
        struct nfcmrvl_private *priv = container_of(fw_dnld,
                                                    struct nfcmrvl_private,
                                                    fw_dnld);

        while ((skb = skb_dequeue(&fw_dnld->rx_q))) {
                nfc_send_to_raw_sock(priv->ndev->nfc_dev, skb,
                                     RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
                switch (fw_dnld->state) {
                case STATE_RESET:
                        ret = process_state_reset(priv, skb);
                        break;
                case STATE_INIT:
                        ret = process_state_init(priv, skb);
                        break;
                case STATE_SET_REF_CLOCK:
                        ret = process_state_set_ref_clock(priv, skb);
                        break;
                case STATE_SET_HI_CONFIG:
                        ret = process_state_set_hi_config(priv, skb);
                        break;
                case STATE_OPEN_LC:
                        ret = process_state_open_lc(priv, skb);
                        break;
                case STATE_FW_DNLD:
                        ret = process_state_fw_dnld(priv, skb);
                        break;
                case STATE_CLOSE_LC:
                        ret = process_state_close_lc(priv, skb);
                        break;
                case STATE_BOOT:
                        ret = process_state_boot(priv, skb);
                        break;
                default:
                        ret = -EFAULT;
                }

                kfree_skb(skb);

                if (ret != 0) {
                        nfc_err(priv->dev, "FW loading error");
                        fw_dnld_over(priv, ret);
                        break;
                }
        }
}

int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
{
        char name[32];

        INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work);
        snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq",
                 dev_name(&priv->ndev->nfc_dev->dev));
        priv->fw_dnld.rx_wq = create_singlethread_workqueue(name);
        if (!priv->fw_dnld.rx_wq)
                return -ENOMEM;
        skb_queue_head_init(&priv->fw_dnld.rx_q);
        return 0;
}

void nfcmrvl_fw_dnld_deinit(struct nfcmrvl_private *priv)
{
        destroy_workqueue(priv->fw_dnld.rx_wq);
}

void nfcmrvl_fw_dnld_recv_frame(struct nfcmrvl_private *priv,
                                struct sk_buff *skb)
{
        /* Discard command timer */
        if (timer_pending(&priv->ndev->cmd_timer))
                del_timer_sync(&priv->ndev->cmd_timer);

        /* Allow next command */
        atomic_set(&priv->ndev->cmd_cnt, 1);

        /* Queue and trigger rx work */
        skb_queue_tail(&priv->fw_dnld.rx_q, skb);
        queue_work(priv->fw_dnld.rx_wq, &priv->fw_dnld.rx_work);
}

void nfcmrvl_fw_dnld_abort(struct nfcmrvl_private *priv)
{
        fw_dnld_over(priv, -EHOSTDOWN);
}

int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
{
        struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
        struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld;
        int res;

        if (!priv->support_fw_dnld)
                return -ENOTSUPP;

        if (!firmware_name || !firmware_name[0])
                return -EINVAL;

        strcpy(fw_dnld->name, firmware_name);

        /*
         * Retrieve FW binary file and parse it to initialize FW download
         * state machine.
         */

        /* Retrieve FW binary */
        res = request_firmware(&fw_dnld->fw, firmware_name,
                               &ndev->nfc_dev->dev);
        if (res < 0) {
                nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name);
                return -ENOENT;
        }

        fw_dnld->header = (const struct nfcmrvl_fw *) priv->fw_dnld.fw->data;

        if (fw_dnld->header->magic != NFCMRVL_FW_MAGIC ||
            fw_dnld->header->phy != priv->phy) {
                nfc_err(priv->dev, "bad firmware binary %s magic=0x%x phy=%d",
                        firmware_name, fw_dnld->header->magic,
                        fw_dnld->header->phy);
                release_firmware(fw_dnld->fw);
                fw_dnld->header = NULL;
                return -EINVAL;
        }

        if (fw_dnld->header->helper.offset != 0) {
                nfc_info(priv->dev, "loading helper");
                fw_dnld->binary_config = &fw_dnld->header->helper;
        } else {
                nfc_info(priv->dev, "loading firmware");
                fw_dnld->binary_config = &fw_dnld->header->firmware;
        }

        /* Configure a timer for timeout */
        timer_setup(&priv->fw_dnld.timer, fw_dnld_timeout, 0);
        mod_timer(&priv->fw_dnld.timer,
                  jiffies + msecs_to_jiffies(FW_DNLD_TIMEOUT));

        /* Ronfigure HI to be sure that it is the bootrom values */
        priv->if_ops->nci_update_config(priv,
                                        &fw_dnld->header->bootrom.config);

        /* Allow first command */
        atomic_set(&priv->ndev->cmd_cnt, 1);

        /* First, reset the chip */
        priv->fw_dnld.state = STATE_RESET;
        nfcmrvl_chip_reset(priv);

        /* Now wait for CORE_RESET_NTF or timeout */

        return 0;
}























































































































































































































































































































































































































































































































































    2 




    2 





































































    2 







    4 













    4 





    2 


    2 






































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
 *
 *  Pentium III FXSR, SSE support
 *        Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * Handle hardware traps and faults.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/context_tracking.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/kmsan.h>
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/kexec.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/timer.h>
#include <linux/init.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/hardirq.h>
#include <linux/atomic.h>
#include <linux/iommu.h>

#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <asm/realmode.h>
#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fred.h>
#include <asm/fpu/api.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
#include <asm/vm86.h>
#include <asm/umip.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/vdso.h>
#include <asm/tdx.h>
#include <asm/cfi.h>

#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
#endif

#include <asm/proto.h>

DECLARE_BITMAP(system_vectors, NR_VECTORS);

__always_inline int is_valid_bugaddr(unsigned long addr)
{
        if (addr < TASK_SIZE_MAX)
                return 0;

        /*
         * We got #UD, if the text isn't readable we'd have gotten
         * a different exception.
         */
        return *(unsigned short *)addr == INSN_UD2;
}

static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
                  struct pt_regs *regs,        long error_code)
{
        if (v8086_mode(regs)) {
                /*
                 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
                 * On nmi (interrupt 2), do_trap should not be called.
                 */
                if (trapnr < X86_TRAP_UD) {
                        if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
                                                error_code, trapnr))
                                return 0;
                }
        } else if (!user_mode(regs)) {
                if (fixup_exception(regs, trapnr, error_code, 0))
                        return 0;

                tsk->thread.error_code = error_code;
                tsk->thread.trap_nr = trapnr;
                die(str, regs, error_code);
        } else {
                if (fixup_vdso_exception(regs, trapnr, error_code, 0))
                        return 0;
        }

        /*
         * We want error_code and trap_nr set for userspace faults and
         * kernelspace faults which result in die(), but not
         * kernelspace faults which are fixed up.  die() gives the
         * process no chance to handle the signal and notice the
         * kernel fault information, so that won't result in polluting
         * the information about previously queued, but not yet
         * delivered, faults.  See also exc_general_protection below.
         */
        tsk->thread.error_code = error_code;
        tsk->thread.trap_nr = trapnr;

        return -1;
}

static void show_signal(struct task_struct *tsk, int signr,
                        const char *type, const char *desc,
                        struct pt_regs *regs, long error_code)
{
        if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
            printk_ratelimit()) {
                pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx",
                        tsk->comm, task_pid_nr(tsk), type, desc,
                        regs->ip, regs->sp, error_code);
                print_vma_addr(KERN_CONT " in ", regs->ip);
                pr_cont("\n");
        }
}

static void
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
        long error_code, int sicode, void __user *addr)
{
        struct task_struct *tsk = current;

        if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
                return;

        show_signal(tsk, signr, "trap ", str, regs, error_code);

        if (!sicode)
                force_sig(signr);
        else
                force_sig_fault(signr, sicode, addr);
}
NOKPROBE_SYMBOL(do_trap);

static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
        unsigned long trapnr, int signr, int sicode, void __user *addr)
{
        RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");

        if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
                        NOTIFY_STOP) {
                cond_local_irq_enable(regs);
                do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
                cond_local_irq_disable(regs);
        }
}

/*
 * Posix requires to provide the address of the faulting instruction for
 * SIGILL (#UD) and SIGFPE (#DE) in the si_addr member of siginfo_t.
 *
 * This address is usually regs->ip, but when an uprobe moved the code out
 * of line then regs->ip points to the XOL code which would confuse
 * anything which analyzes the fault address vs. the unmodified binary. If
 * a trap happened in XOL code then uprobe maps regs->ip back to the
 * original instruction address.
 */
static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
{
        return (void __user *)uprobe_get_trap_addr(regs);
}

DEFINE_IDTENTRY(exc_divide_error)
{
        do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
                      FPE_INTDIV, error_get_trap_addr(regs));
}

DEFINE_IDTENTRY(exc_overflow)
{
        do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
}

#ifdef CONFIG_X86_F00F_BUG
void handle_invalid_op(struct pt_regs *regs)
#else
static inline void handle_invalid_op(struct pt_regs *regs)
#endif
{
        do_error_trap(regs, 0, "invalid opcode", X86_TRAP_UD, SIGILL,
                      ILL_ILLOPN, error_get_trap_addr(regs));
}

static noinstr bool handle_bug(struct pt_regs *regs)
{
        bool handled = false;

        /*
         * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
         * is a rare case that uses @regs without passing them to
         * irqentry_enter().
         */
        kmsan_unpoison_entry_regs(regs);
        if (!is_valid_bugaddr(regs->ip))
                return handled;

        /*
         * All lies, just get the WARN/BUG out.
         */
        instrumentation_begin();
        /*
         * Since we're emulating a CALL with exceptions, restore the interrupt
         * state to what it was at the exception site.
         */
        if (regs->flags & X86_EFLAGS_IF)
                raw_local_irq_enable();
        if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
            handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
                regs->ip += LEN_UD2;
                handled = true;
        }
        if (regs->flags & X86_EFLAGS_IF)
                raw_local_irq_disable();
        instrumentation_end();

        return handled;
}

DEFINE_IDTENTRY_RAW(exc_invalid_op)
{
        irqentry_state_t state;

        /*
         * We use UD2 as a short encoding for 'CALL __WARN', as such
         * handle it before exception entry to avoid recursive WARN
         * in case exception entry is the one triggering WARNs.
         */
        if (!user_mode(regs) && handle_bug(regs))
                return;

        state = irqentry_enter(regs);
        instrumentation_begin();
        handle_invalid_op(regs);
        instrumentation_end();
        irqentry_exit(regs, state);
}

DEFINE_IDTENTRY(exc_coproc_segment_overrun)
{
        do_error_trap(regs, 0, "coprocessor segment overrun",
                      X86_TRAP_OLD_MF, SIGFPE, 0, NULL);
}

DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss)
{
        do_error_trap(regs, error_code, "invalid TSS", X86_TRAP_TS, SIGSEGV,
                      0, NULL);
}

DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present)
{
        do_error_trap(regs, error_code, "segment not present", X86_TRAP_NP,
                      SIGBUS, 0, NULL);
}

DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment)
{
        do_error_trap(regs, error_code, "stack segment", X86_TRAP_SS, SIGBUS,
                      0, NULL);
}

DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
{
        char *str = "alignment check";

        if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
                return;

        if (!user_mode(regs))
                die("Split lock detected\n", regs, error_code);

        local_irq_enable();

        if (handle_user_split_lock(regs, error_code))
                goto out;

        do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
                error_code, BUS_ADRALN, NULL);

out:
        local_irq_disable();
}

#ifdef CONFIG_VMAP_STACK
__visible void __noreturn handle_stack_overflow(struct pt_regs *regs,
                                                unsigned long fault_address,
                                                struct stack_info *info)
{
        const char *name = stack_type_name(info->type);

        printk(KERN_EMERG "BUG: %s stack guard page was hit at %p (stack is %p..%p)\n",
               name, (void *)fault_address, info->begin, info->end);

        die("stack guard page", regs, 0);

        /* Be absolutely certain we don't return. */
        panic("%s stack guard hit", name);
}
#endif

/*
 * Runs on an IST stack for x86_64 and on a special task stack for x86_32.
 *
 * On x86_64, this is more or less a normal kernel entry.  Notwithstanding the
 * SDM's warnings about double faults being unrecoverable, returning works as
 * expected.  Presumably what the SDM actually means is that the CPU may get
 * the register state wrong on entry, so returning could be a bad idea.
 *
 * Various CPU engineers have promised that double faults due to an IRET fault
 * while the stack is read-only are, in fact, recoverable.
 *
 * On x86_32, this is entered through a task gate, and regs are synthesized
 * from the TSS.  Returning is, in principle, okay, but changes to regs will
 * be lost.  If, for some reason, we need to return to a context with modified
 * regs, the shim code could be adjusted to synchronize the registers.
 *
 * The 32bit #DF shim provides CR2 already as an argument. On 64bit it needs
 * to be read before doing anything else.
 */
DEFINE_IDTENTRY_DF(exc_double_fault)
{
        static const char str[] = "double fault";
        struct task_struct *tsk = current;

#ifdef CONFIG_VMAP_STACK
        unsigned long address = read_cr2();
        struct stack_info info;
#endif

#ifdef CONFIG_X86_ESPFIX64
        extern unsigned char native_irq_return_iret[];

        /*
         * If IRET takes a non-IST fault on the espfix64 stack, then we
         * end up promoting it to a doublefault.  In that case, take
         * advantage of the fact that we're not using the normal (TSS.sp0)
         * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
         * and then modify our own IRET frame so that, when we return,
         * we land directly at the #GP(0) vector with the stack already
         * set up according to its expectations.
         *
         * The net result is that our #GP handler will think that we
         * entered from usermode with the bad user context.
         *
         * No need for nmi_enter() here because we don't use RCU.
         */
        if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
                regs->cs == __KERNEL_CS &&
                regs->ip == (unsigned long)native_irq_return_iret)
        {
                struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
                unsigned long *p = (unsigned long *)regs->sp;

                /*
                 * regs->sp points to the failing IRET frame on the
                 * ESPFIX64 stack.  Copy it to the entry stack.  This fills
                 * in gpregs->ss through gpregs->ip.
                 *
                 */
                gpregs->ip        = p[0];
                gpregs->cs        = p[1];
                gpregs->flags        = p[2];
                gpregs->sp        = p[3];
                gpregs->ss        = p[4];
                gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */

                /*
                 * Adjust our frame so that we return straight to the #GP
                 * vector with the expected RSP value.  This is safe because
                 * we won't enable interrupts or schedule before we invoke
                 * general_protection, so nothing will clobber the stack
                 * frame we just set up.
                 *
                 * We will enter general_protection with kernel GSBASE,
                 * which is what the stub expects, given that the faulting
                 * RIP will be the IRET instruction.
                 */
                regs->ip = (unsigned long)asm_exc_general_protection;
                regs->sp = (unsigned long)&gpregs->orig_ax;

                return;
        }
#endif

        irqentry_nmi_enter(regs);
        instrumentation_begin();
        notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);

        tsk->thread.error_code = error_code;
        tsk->thread.trap_nr = X86_TRAP_DF;

#ifdef CONFIG_VMAP_STACK
        /*
         * If we overflow the stack into a guard page, the CPU will fail
         * to deliver #PF and will send #DF instead.  Similarly, if we
         * take any non-IST exception while too close to the bottom of
         * the stack, the processor will get a page fault while
         * delivering the exception and will generate a double fault.
         *
         * According to the SDM (footnote in 6.15 under "Interrupt 14 -
         * Page-Fault Exception (#PF):
         *
         *   Processors update CR2 whenever a page fault is detected. If a
         *   second page fault occurs while an earlier page fault is being
         *   delivered, the faulting linear address of the second fault will
         *   overwrite the contents of CR2 (replacing the previous
         *   address). These updates to CR2 occur even if the page fault
         *   results in a double fault or occurs during the delivery of a
         *   double fault.
         *
         * The logic below has a small possibility of incorrectly diagnosing
         * some errors as stack overflows.  For example, if the IDT or GDT
         * gets corrupted such that #GP delivery fails due to a bad descriptor
         * causing #GP and we hit this condition while CR2 coincidentally
         * points to the stack guard page, we'll think we overflowed the
         * stack.  Given that we're going to panic one way or another
         * if this happens, this isn't necessarily worth fixing.
         *
         * If necessary, we could improve the test by only diagnosing
         * a stack overflow if the saved RSP points within 47 bytes of
         * the bottom of the stack: if RSP == tsk_stack + 48 and we
         * take an exception, the stack is already aligned and there
         * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
         * possible error code, so a stack overflow would *not* double
         * fault.  With any less space left, exception delivery could
         * fail, and, as a practical matter, we've overflowed the
         * stack even if the actual trigger for the double fault was
         * something else.
         */
        if (get_stack_guard_info((void *)address, &info))
                handle_stack_overflow(regs, address, &info);
#endif

        pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
        die("double fault", regs, error_code);
        panic("Machine halted.");
        instrumentation_end();
}

DEFINE_IDTENTRY(exc_bounds)
{
        if (notify_die(DIE_TRAP, "bounds", regs, 0,
                        X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
                return;
        cond_local_irq_enable(regs);

        if (!user_mode(regs))
                die("bounds", regs, 0);

        do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL);

        cond_local_irq_disable(regs);
}

enum kernel_gp_hint {
        GP_NO_HINT,
        GP_NON_CANONICAL,
        GP_CANONICAL
};

/*
 * When an uncaught #GP occurs, try to determine the memory address accessed by
 * the instruction and return that address to the caller. Also, try to figure
 * out whether any part of the access to that address was non-canonical.
 */
static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
                                                 unsigned long *addr)
{
        u8 insn_buf[MAX_INSN_SIZE];
        struct insn insn;
        int ret;

        if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
                        MAX_INSN_SIZE))
                return GP_NO_HINT;

        ret = insn_decode_kernel(&insn, insn_buf);
        if (ret < 0)
                return GP_NO_HINT;

        *addr = (unsigned long)insn_get_addr_ref(&insn, regs);
        if (*addr == -1UL)
                return GP_NO_HINT;

#ifdef CONFIG_X86_64
        /*
         * Check that:
         *  - the operand is not in the kernel half
         *  - the last byte of the operand is not in the user canonical half
         */
        if (*addr < ~__VIRTUAL_MASK &&
            *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
                return GP_NON_CANONICAL;
#endif

        return GP_CANONICAL;
}

#define GPFSTR "general protection fault"

static bool fixup_iopl_exception(struct pt_regs *regs)
{
        struct thread_struct *t = &current->thread;
        unsigned char byte;
        unsigned long ip;

        if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
                return false;

        if (insn_get_effective_ip(regs, &ip))
                return false;

        if (get_user(byte, (const char __user *)ip))
                return false;

        if (byte != 0xfa && byte != 0xfb)
                return false;

        if (!t->iopl_warn && printk_ratelimit()) {
                pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
                       current->comm, task_pid_nr(current), ip);
                print_vma_addr(KERN_CONT " in ", ip);
                pr_cont("\n");
                t->iopl_warn = 1;
        }

        regs->ip += 1;
        return true;
}

/*
 * The unprivileged ENQCMD instruction generates #GPs if the
 * IA32_PASID MSR has not been populated.  If possible, populate
 * the MSR from a PASID previously allocated to the mm.
 */
static bool try_fixup_enqcmd_gp(void)
{
#ifdef CONFIG_ARCH_HAS_CPU_PASID
        u32 pasid;

        /*
         * MSR_IA32_PASID is managed using XSAVE.  Directly
         * writing to the MSR is only possible when fpregs
         * are valid and the fpstate is not.  This is
         * guaranteed when handling a userspace exception
         * in *before* interrupts are re-enabled.
         */
        lockdep_assert_irqs_disabled();

        /*
         * Hardware without ENQCMD will not generate
         * #GPs that can be fixed up here.
         */
        if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
                return false;

        /*
         * If the mm has not been allocated a
         * PASID, the #GP can not be fixed up.
         */
        if (!mm_valid_pasid(current->mm))
                return false;

        pasid = mm_get_enqcmd_pasid(current->mm);

        /*
         * Did this thread already have its PASID activated?
         * If so, the #GP must be from something else.
         */
        if (current->pasid_activated)
                return false;

        wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
        current->pasid_activated = 1;

        return true;
#else
        return false;
#endif
}

static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
                                    unsigned long error_code, const char *str,
                                    unsigned long address)
{
        if (fixup_exception(regs, trapnr, error_code, address))
                return true;

        current->thread.error_code = error_code;
        current->thread.trap_nr = trapnr;

        /*
         * To be potentially processing a kprobe fault and to trust the result
         * from kprobe_running(), we have to be non-preemptible.
         */
        if (!preemptible() && kprobe_running() &&
            kprobe_fault_handler(regs, trapnr))
                return true;

        return notify_die(DIE_GPF, str, regs, error_code, trapnr, SIGSEGV) == NOTIFY_STOP;
}

static void gp_user_force_sig_segv(struct pt_regs *regs, int trapnr,
                                   unsigned long error_code, const char *str)
{
        current->thread.error_code = error_code;
        current->thread.trap_nr = trapnr;
        show_signal(current, SIGSEGV, "", str, regs, error_code);
        force_sig(SIGSEGV);
}

DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
        char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
        enum kernel_gp_hint hint = GP_NO_HINT;
        unsigned long gp_addr;

        if (user_mode(regs) && try_fixup_enqcmd_gp())
                return;

        cond_local_irq_enable(regs);

        if (static_cpu_has(X86_FEATURE_UMIP)) {
                if (user_mode(regs) && fixup_umip_exception(regs))
                        goto exit;
        }

        if (v8086_mode(regs)) {
                local_irq_enable();
                handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
                local_irq_disable();
                return;
        }

        if (user_mode(regs)) {
                if (fixup_iopl_exception(regs))
                        goto exit;

                if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
                        goto exit;

                gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
                goto exit;
        }

        if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
                goto exit;

        if (error_code)
                snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
        else
                hint = get_kernel_gp_address(regs, &gp_addr);

        if (hint != GP_NO_HINT)
                snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
                         (hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
                                                    : "maybe for address",
                         gp_addr);

        /*
         * KASAN is interested only in the non-canonical case, clear it
         * otherwise.
         */
        if (hint != GP_NON_CANONICAL)
                gp_addr = 0;

        die_addr(desc, regs, error_code, gp_addr);

exit:
        cond_local_irq_disable(regs);
}

static bool do_int3(struct pt_regs *regs)
{
        int res;

#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
        if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP,
                         SIGTRAP) == NOTIFY_STOP)
                return true;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */

#ifdef CONFIG_KPROBES
        if (kprobe_int3_handler(regs))
                return true;
#endif
        res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP);

        return res == NOTIFY_STOP;
}
NOKPROBE_SYMBOL(do_int3);

static void do_int3_user(struct pt_regs *regs)
{
        if (do_int3(regs))
                return;

        cond_local_irq_enable(regs);
        do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL);
        cond_local_irq_disable(regs);
}

DEFINE_IDTENTRY_RAW(exc_int3)
{
        /*
         * poke_int3_handler() is completely self contained code; it does (and
         * must) *NOT* call out to anything, lest it hits upon yet another
         * INT3.
         */
        if (poke_int3_handler(regs))
                return;

        /*
         * irqentry_enter_from_user_mode() uses static_branch_{,un}likely()
         * and therefore can trigger INT3, hence poke_int3_handler() must
         * be done before. If the entry came from kernel mode, then use
         * nmi_enter() because the INT3 could have been hit in any context
         * including NMI.
         */
        if (user_mode(regs)) {
                irqentry_enter_from_user_mode(regs);
                instrumentation_begin();
                do_int3_user(regs);
                instrumentation_end();
                irqentry_exit_to_user_mode(regs);
        } else {
                irqentry_state_t irq_state = irqentry_nmi_enter(regs);

                instrumentation_begin();
                if (!do_int3(regs))
                        die("int3", regs, 0);
                instrumentation_end();
                irqentry_nmi_exit(regs, irq_state);
        }
}

#ifdef CONFIG_X86_64
/*
 * Help handler running on a per-cpu (IST or entry trampoline) stack
 * to switch to the normal thread stack if the interrupted code was in
 * user mode. The actual stack switch is done in entry_64.S
 */
asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
{
        struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1;
        if (regs != eregs)
                *regs = *eregs;
        return regs;
}

#ifdef CONFIG_AMD_MEM_ENCRYPT
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
{
        unsigned long sp, *stack;
        struct stack_info info;
        struct pt_regs *regs_ret;

        /*
         * In the SYSCALL entry path the RSP value comes from user-space - don't
         * trust it and switch to the current kernel stack
         */
        if (ip_within_syscall_gap(regs)) {
                sp = current_top_of_stack();
                goto sync;
        }

        /*
         * From here on the RSP value is trusted. Now check whether entry
         * happened from a safe stack. Not safe are the entry or unknown stacks,
         * use the fall-back stack instead in this case.
         */
        sp    = regs->sp;
        stack = (unsigned long *)sp;

        if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
            info.type > STACK_TYPE_EXCEPTION_LAST)
                sp = __this_cpu_ist_top_va(VC2);

sync:
        /*
         * Found a safe stack - switch to it as if the entry didn't happen via
         * IST stack. The code below only copies pt_regs, the real switch happens
         * in assembly code.
         */
        sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);

        regs_ret = (struct pt_regs *)sp;
        *regs_ret = *regs;

        return regs_ret;
}
#endif

asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
{
        struct pt_regs tmp, *new_stack;

        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
         * correctly, we want to move our stack frame to where it would
         * be had we entered directly on the entry stack (rather than
         * just below the IRET frame) and we want to pretend that the
         * exception came from the IRET target.
         */
        new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;

        /* Copy the IRET target to the temporary storage. */
        __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);

        /* Copy the remainder of the stack from the current stack. */
        __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));

        /* Update the entry stack */
        __memcpy(new_stack, &tmp, sizeof(tmp));

        BUG_ON(!user_mode(new_stack));
        return new_stack;
}
#endif

static bool is_sysenter_singlestep(struct pt_regs *regs)
{
        /*
         * We don't try for precision here.  If we're anywhere in the region of
         * code that can be single-stepped in the SYSENTER entry path, then
         * assume that this is a useless single-step trap due to SYSENTER
         * being invoked with TF set.  (We don't know in advance exactly
         * which instructions will be hit because BTF could plausibly
         * be set.)
         */
#ifdef CONFIG_X86_32
        return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
                (unsigned long)__end_SYSENTER_singlestep_region -
                (unsigned long)__begin_SYSENTER_singlestep_region;
#elif defined(CONFIG_IA32_EMULATION)
        return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
                (unsigned long)__end_entry_SYSENTER_compat -
                (unsigned long)entry_SYSENTER_compat;
#else
        return false;
#endif
}

static __always_inline unsigned long debug_read_clear_dr6(void)
{
        unsigned long dr6;

        /*
         * The Intel SDM says:
         *
         *   Certain debug exceptions may clear bits 0-3. The remaining
         *   contents of the DR6 register are never cleared by the
         *   processor. To avoid confusion in identifying debug
         *   exceptions, debug handlers should clear the register before
         *   returning to the interrupted task.
         *
         * Keep it simple: clear DR6 immediately.
         */
        get_debugreg(dr6, 6);
        set_debugreg(DR6_RESERVED, 6);
        dr6 ^= DR6_RESERVED; /* Flip to positive polarity */

        return dr6;
}

/*
 * Our handling of the processor debug registers is non-trivial.
 * We do not clear them on entry and exit from the kernel. Therefore
 * it is possible to get a watchpoint trap here from inside the kernel.
 * However, the code in ./ptrace.c has ensured that the user can
 * only set watchpoints on userspace addresses. Therefore the in-kernel
 * watchpoint trap can only occur in code which is reading/writing
 * from user space. Such code must not hold kernel locks (since it
 * can equally take a page fault), therefore it is safe to call
 * force_sig_info even though that claims and releases locks.
 *
 * Code in ./signal.c ensures that the debug control register
 * is restored before we deliver any signal, and therefore that
 * user code runs with the correct debug control register even though
 * we clear it here.
 *
 * Being careful here means that we don't have to be as careful in a
 * lot of more complicated places (task switching can be a bit lazy
 * about restoring all the debug state, and ptrace doesn't have to
 * find every occurrence of the TF bit that could be saved away even
 * by user code)
 *
 * May run on IST stack.
 */

static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
{
        /*
         * Notifiers will clear bits in @dr6 to indicate the event has been
         * consumed - hw_breakpoint_handler(), single_stop_cont().
         *
         * Notifiers will set bits in @virtual_dr6 to indicate the desire
         * for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
         */
        if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
                return true;

        return false;
}

static noinstr void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6)
{
        /*
         * Disable breakpoints during exception handling; recursive exceptions
         * are exceedingly 'fun'.
         *
         * Since this function is NOKPROBE, and that also applies to
         * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
         * HW_BREAKPOINT_W on our stack)
         *
         * Entry text is excluded for HW_BP_X and cpu_entry_area, which
         * includes the entry stack is excluded for everything.
         *
         * For FRED, nested #DB should just work fine. But when a watchpoint or
         * breakpoint is set in the code path which is executed by #DB handler,
         * it results in an endless recursion and stack overflow. Thus we stay
         * with the IDT approach, i.e., save DR7 and disable #DB.
         */
        unsigned long dr7 = local_db_save();
        irqentry_state_t irq_state = irqentry_nmi_enter(regs);
        instrumentation_begin();

        /*
         * If something gets miswired and we end up here for a user mode
         * #DB, we will malfunction.
         */
        WARN_ON_ONCE(user_mode(regs));

        if (test_thread_flag(TIF_BLOCKSTEP)) {
                /*
                 * The SDM says "The processor clears the BTF flag when it
                 * generates a debug exception." but PTRACE_BLOCKSTEP requested
                 * it for userspace, but we just took a kernel #DB, so re-set
                 * BTF.
                 */
                unsigned long debugctl;

                rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
                debugctl |= DEBUGCTLMSR_BTF;
                wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
        }

        /*
         * Catch SYSENTER with TF set and clear DR_STEP. If this hit a
         * watchpoint at the same time then that will still be handled.
         */
        if (!cpu_feature_enabled(X86_FEATURE_FRED) &&
            (dr6 & DR_STEP) && is_sysenter_singlestep(regs))
                dr6 &= ~DR_STEP;

        /*
         * The kernel doesn't use INT1
         */
        if (!dr6)
                goto out;

        if (notify_debug(regs, &dr6))
                goto out;

        /*
         * The kernel doesn't use TF single-step outside of:
         *
         *  - Kprobes, consumed through kprobe_debug_handler()
         *  - KGDB, consumed through notify_debug()
         *
         * So if we get here with DR_STEP set, something is wonky.
         *
         * A known way to trigger this is through QEMU's GDB stub,
         * which leaks #DB into the guest and causes IST recursion.
         */
        if (WARN_ON_ONCE(dr6 & DR_STEP))
                regs->flags &= ~X86_EFLAGS_TF;
out:
        instrumentation_end();
        irqentry_nmi_exit(regs, irq_state);

        local_db_restore(dr7);
}

static noinstr void exc_debug_user(struct pt_regs *regs, unsigned long dr6)
{
        bool icebp;

        /*
         * If something gets miswired and we end up here for a kernel mode
         * #DB, we will malfunction.
         */
        WARN_ON_ONCE(!user_mode(regs));

        /*
         * NB: We can't easily clear DR7 here because
         * irqentry_exit_to_usermode() can invoke ptrace, schedule, access
         * user memory, etc.  This means that a recursive #DB is possible.  If
         * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
         * Since we're not on the IST stack right now, everything will be
         * fine.
         */

        irqentry_enter_from_user_mode(regs);
        instrumentation_begin();

        /*
         * Start the virtual/ptrace DR6 value with just the DR_STEP mask
         * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits.
         *
         * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6)
         * even if it is not the result of PTRACE_SINGLESTEP.
         */
        current->thread.virtual_dr6 = (dr6 & DR_STEP);

        /*
         * The SDM says "The processor clears the BTF flag when it
         * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
         * TIF_BLOCKSTEP in sync with the hardware BTF flag.
         */
        clear_thread_flag(TIF_BLOCKSTEP);

        /*
         * If dr6 has no reason to give us about the origin of this trap,
         * then it's very likely the result of an icebp/int01 trap.
         * User wants a sigtrap for that.
         */
        icebp = !dr6;

        if (notify_debug(regs, &dr6))
                goto out;

        /* It's safe to allow irq's after DR6 has been saved */
        local_irq_enable();

        if (v8086_mode(regs)) {
                handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
                goto out_irq;
        }

        /* #DB for bus lock can only be triggered from userspace. */
        if (dr6 & DR_BUS_LOCK)
                handle_bus_lock(regs);

        /* Add the virtual_dr6 bits for signals. */
        dr6 |= current->thread.virtual_dr6;
        if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
                send_sigtrap(regs, 0, get_si_code(dr6));

out_irq:
        local_irq_disable();
out:
        instrumentation_end();
        irqentry_exit_to_user_mode(regs);
}

#ifdef CONFIG_X86_64
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
        exc_debug_kernel(regs, debug_read_clear_dr6());
}

/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
        exc_debug_user(regs, debug_read_clear_dr6());
}

#ifdef CONFIG_X86_FRED
/*
 * When occurred on different ring level, i.e., from user or kernel
 * context, #DB needs to be handled on different stack: User #DB on
 * current task stack, while kernel #DB on a dedicated stack.
 *
 * This is exactly how FRED event delivery invokes an exception
 * handler: ring 3 event on level 0 stack, i.e., current task stack;
 * ring 0 event on the #DB dedicated stack specified in the
 * IA32_FRED_STKLVLS MSR. So unlike IDT, the FRED debug exception
 * entry stub doesn't do stack switch.
 */
DEFINE_FREDENTRY_DEBUG(exc_debug)
{
        /*
         * FRED #DB stores DR6 on the stack in the format which
         * debug_read_clear_dr6() returns for the IDT entry points.
         */
        unsigned long dr6 = fred_event_data(regs);

        if (user_mode(regs))
                exc_debug_user(regs, dr6);
        else
                exc_debug_kernel(regs, dr6);
}
#endif /* CONFIG_X86_FRED */

#else
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
        unsigned long dr6 = debug_read_clear_dr6();

        if (user_mode(regs))
                exc_debug_user(regs, dr6);
        else
                exc_debug_kernel(regs, dr6);
}
#endif

/*
 * Note that we play around with the 'TS' bit in an attempt to get
 * the correct behaviour even in the presence of the asynchronous
 * IRQ13 behaviour
 */
static void math_error(struct pt_regs *regs, int trapnr)
{
        struct task_struct *task = current;
        struct fpu *fpu = &task->thread.fpu;
        int si_code;
        char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
                                                "simd exception";

        cond_local_irq_enable(regs);

        if (!user_mode(regs)) {
                if (fixup_exception(regs, trapnr, 0, 0))
                        goto exit;

                task->thread.error_code = 0;
                task->thread.trap_nr = trapnr;

                if (notify_die(DIE_TRAP, str, regs, 0, trapnr,
                               SIGFPE) != NOTIFY_STOP)
                        die(str, regs, 0);
                goto exit;
        }

        /*
         * Synchronize the FPU register state to the memory register state
         * if necessary. This allows the exception handler to inspect it.
         */
        fpu_sync_fpstate(fpu);

        task->thread.trap_nr        = trapnr;
        task->thread.error_code = 0;

        si_code = fpu__exception_code(fpu, trapnr);
        /* Retry when we get spurious exceptions: */
        if (!si_code)
                goto exit;

        if (fixup_vdso_exception(regs, trapnr, 0, 0))
                goto exit;

        force_sig_fault(SIGFPE, si_code,
                        (void __user *)uprobe_get_trap_addr(regs));
exit:
        cond_local_irq_disable(regs);
}

DEFINE_IDTENTRY(exc_coprocessor_error)
{
        math_error(regs, X86_TRAP_MF);
}

DEFINE_IDTENTRY(exc_simd_coprocessor_error)
{
        if (IS_ENABLED(CONFIG_X86_INVD_BUG)) {
                /* AMD 486 bug: INVD in CPL 0 raises #XF instead of #GP */
                if (!static_cpu_has(X86_FEATURE_XMM)) {
                        __exc_general_protection(regs, 0);
                        return;
                }
        }
        math_error(regs, X86_TRAP_XF);
}

DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
{
        /*
         * This addresses a Pentium Pro Erratum:
         *
         * PROBLEM: If the APIC subsystem is configured in mixed mode with
         * Virtual Wire mode implemented through the local APIC, an
         * interrupt vector of 0Fh (Intel reserved encoding) may be
         * generated by the local APIC (Int 15).  This vector may be
         * generated upon receipt of a spurious interrupt (an interrupt
         * which is removed before the system receives the INTA sequence)
         * instead of the programmed 8259 spurious interrupt vector.
         *
         * IMPLICATION: The spurious interrupt vector programmed in the
         * 8259 is normally handled by an operating system's spurious
         * interrupt handler. However, a vector of 0Fh is unknown to some
         * operating systems, which would crash if this erratum occurred.
         *
         * In theory this could be limited to 32bit, but the handler is not
         * hurting and who knows which other CPUs suffer from this.
         */
}

static bool handle_xfd_event(struct pt_regs *regs)
{
        u64 xfd_err;
        int err;

        if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD))
                return false;

        rdmsrl(MSR_IA32_XFD_ERR, xfd_err);
        if (!xfd_err)
                return false;

        wrmsrl(MSR_IA32_XFD_ERR, 0);

        /* Die if that happens in kernel space */
        if (WARN_ON(!user_mode(regs)))
                return false;

        local_irq_enable();

        err = xfd_enable_feature(xfd_err);

        switch (err) {
        case -EPERM:
                force_sig_fault(SIGILL, ILL_ILLOPC, error_get_trap_addr(regs));
                break;
        case -EFAULT:
                force_sig(SIGSEGV);
                break;
        }

        local_irq_disable();
        return true;
}

DEFINE_IDTENTRY(exc_device_not_available)
{
        unsigned long cr0 = read_cr0();

        if (handle_xfd_event(regs))
                return;

#ifdef CONFIG_MATH_EMULATION
        if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
                struct math_emu_info info = { };

                cond_local_irq_enable(regs);

                info.regs = regs;
                math_emulate(&info);

                cond_local_irq_disable(regs);
                return;
        }
#endif

        /* This should not happen. */
        if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
                /* Try to fix it up and carry on. */
                write_cr0(cr0 & ~X86_CR0_TS);
        } else {
                /*
                 * Something terrible happened, and we're better off trying
                 * to kill the task than getting stuck in a never-ending
                 * loop of #NM faults.
                 */
                die("unexpected #NM exception", regs, 0);
        }
}

#ifdef CONFIG_INTEL_TDX_GUEST

#define VE_FAULT_STR "VE fault"

static void ve_raise_fault(struct pt_regs *regs, long error_code,
                           unsigned long address)
{
        if (user_mode(regs)) {
                gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
                return;
        }

        if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code,
                                    VE_FAULT_STR, address)) {
                return;
        }

        die_addr(VE_FAULT_STR, regs, error_code, address);
}

/*
 * Virtualization Exceptions (#VE) are delivered to TDX guests due to
 * specific guest actions which may happen in either user space or the
 * kernel:
 *
 *  * Specific instructions (WBINVD, for example)
 *  * Specific MSR accesses
 *  * Specific CPUID leaf accesses
 *  * Access to specific guest physical addresses
 *
 * In the settings that Linux will run in, virtualization exceptions are
 * never generated on accesses to normal, TD-private memory that has been
 * accepted (by BIOS or with tdx_enc_status_changed()).
 *
 * Syscall entry code has a critical window where the kernel stack is not
 * yet set up. Any exception in this window leads to hard to debug issues
 * and can be exploited for privilege escalation. Exceptions in the NMI
 * entry code also cause issues. Returning from the exception handler with
 * IRET will re-enable NMIs and nested NMI will corrupt the NMI stack.
 *
 * For these reasons, the kernel avoids #VEs during the syscall gap and
 * the NMI entry code. Entry code paths do not access TD-shared memory,
 * MMIO regions, use #VE triggering MSRs, instructions, or CPUID leaves
 * that might generate #VE. VMM can remove memory from TD at any point,
 * but access to unaccepted (or missing) private memory leads to VM
 * termination, not to #VE.
 *
 * Similarly to page faults and breakpoints, #VEs are allowed in NMI
 * handlers once the kernel is ready to deal with nested NMIs.
 *
 * During #VE delivery, all interrupts, including NMIs, are blocked until
 * TDGETVEINFO is called. It prevents #VE nesting until the kernel reads
 * the VE info.
 *
 * If a guest kernel action which would normally cause a #VE occurs in
 * the interrupt-disabled region before TDGETVEINFO, a #DF (fault
 * exception) is delivered to the guest which will result in an oops.
 *
 * The entry code has been audited carefully for following these expectations.
 * Changes in the entry code have to be audited for correctness vs. this
 * aspect. Similarly to #PF, #VE in these places will expose kernel to
 * privilege escalation or may lead to random crashes.
 */
DEFINE_IDTENTRY(exc_virtualization_exception)
{
        struct ve_info ve;

        /*
         * NMIs/Machine-checks/Interrupts will be in a disabled state
         * till TDGETVEINFO TDCALL is executed. This ensures that VE
         * info cannot be overwritten by a nested #VE.
         */
        tdx_get_ve_info(&ve);

        cond_local_irq_enable(regs);

        /*
         * If tdx_handle_virt_exception() could not process
         * it successfully, treat it as #GP(0) and handle it.
         */
        if (!tdx_handle_virt_exception(regs, &ve))
                ve_raise_fault(regs, 0, ve.gla);

        cond_local_irq_disable(regs);
}

#endif

#ifdef CONFIG_X86_32
DEFINE_IDTENTRY_SW(iret_error)
{
        local_irq_enable();
        if (notify_die(DIE_TRAP, "iret exception", regs, 0,
                        X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
                do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, 0,
                        ILL_BADSTK, (void __user *)NULL);
        }
        local_irq_disable();
}
#endif

/* Do not enable FRED by default yet. */
static bool enable_fred __ro_after_init = false;

#ifdef CONFIG_X86_FRED
static int __init fred_setup(char *str)
{
        if (!str)
                return -EINVAL;

        if (!cpu_feature_enabled(X86_FEATURE_FRED))
                return 0;

        if (!strcmp(str, "on"))
                enable_fred = true;
        else if (!strcmp(str, "off"))
                enable_fred = false;
        else
                pr_warn("invalid FRED option: 'fred=%s'\n", str);
        return 0;
}
early_param("fred", fred_setup);
#endif

void __init trap_init(void)
{
        if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
                setup_clear_cpu_cap(X86_FEATURE_FRED);

        /* Init cpu_entry_area before IST entries are set up */
        setup_cpu_entry_areas();

        /* Init GHCB memory pages when running as an SEV-ES guest */
        sev_es_init_vc_handling();

        /* Initialize TSS before setting up traps so ISTs work */
        cpu_init_exception_handling();

        /* Setup traps as cpu_init() might #GP */
        if (!cpu_feature_enabled(X86_FEATURE_FRED))
                idt_setup_traps();

        cpu_init();
}




















































































































































































































    1 





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
// SPDX-License-Identifier: GPL-2.0+
/*
 * IPWireless 3G UMTS TDD Modem driver (USB connected)
 *
 *   Copyright (C) 2004 Roelf Diedericks <roelfd@inet.co.za>
 *   Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
 *
 * All information about the device was acquired using SnoopyPro
 * on MSFT's O/S, and examing the MSFT drivers' debug output
 * (insanely left _on_ in the enduser version)
 *
 * It was written out of frustration with the IPWireless USB modem
 * supplied by Axity3G/Sentech South Africa not supporting
 * Linux whatsoever.
 *
 * Nobody provided any proprietary information that was not already
 * available for this device.
 *
 * The modem adheres to the "3GPP TS  27.007 AT command set for 3G
 * User Equipment (UE)" standard, available from
 * http://www.3gpp.org/ftp/Specs/html-info/27007.htm
 *
 * The code was only tested the IPWireless handheld modem distributed
 * in South Africa by Sentech.
 *
 * It may work for Woosh Inc in .nz too, as it appears they use the
 * same kit.
 *
 * There is still some work to be done in terms of handling
 * DCD, DTR, RTS, CTS which are currently faked.
 * It's good enough for PPP at this point. It's based off all kinds of
 * code found in usb/serial and usb/class
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/uaccess.h>
#include "usb-wwan.h"

#define DRIVER_AUTHOR        "Roelf Diedericks"
#define DRIVER_DESC        "IPWireless tty driver"

#define IPW_TTY_MAJOR        240        /* real device node major id, experimental range */
#define IPW_TTY_MINORS        256        /* we support 256 devices, dunno why, it'd be insane :) */

#define USB_IPW_MAGIC        0x6d02        /* magic number for ipw struct */


/* Message sizes */
#define EVENT_BUFFER_SIZE        0xFF
#define CHAR2INT16(c1, c0)        (((u32)((c1) & 0xff) << 8) + (u32)((c0) & 0xff))

/* vendor/product pairs that are known work with this driver*/
#define IPW_VID                0x0bc3
#define IPW_PID                0x0001


/* Vendor commands: */

/* baud rates */
enum {
        ipw_sio_b256000 = 0x000e,
        ipw_sio_b128000 = 0x001d,
        ipw_sio_b115200 = 0x0020,
        ipw_sio_b57600  = 0x0040,
        ipw_sio_b56000  = 0x0042,
        ipw_sio_b38400  = 0x0060,
        ipw_sio_b19200  = 0x00c0,
        ipw_sio_b14400  = 0x0100,
        ipw_sio_b9600   = 0x0180,
        ipw_sio_b4800   = 0x0300,
        ipw_sio_b2400   = 0x0600,
        ipw_sio_b1200   = 0x0c00,
        ipw_sio_b600    = 0x1800
};

/* data bits */
#define ipw_dtb_7                0x700
#define ipw_dtb_8                0x810        /* ok so the define is misleading, I know, but forces 8,n,1 */
                                        /* I mean, is there a point to any other setting these days? :) */

/* usb control request types : */
#define IPW_SIO_RXCTL                0x00        /* control bulk rx channel transmissions, value=1/0 (on/off) */
#define IPW_SIO_SET_BAUD        0x01        /* set baud, value=requested ipw_sio_bxxxx */
#define IPW_SIO_SET_LINE        0x03        /* set databits, parity. value=ipw_dtb_x */
#define IPW_SIO_SET_PIN                0x03        /* set/clear dtr/rts value=ipw_pin_xxx */
#define IPW_SIO_POLL                0x08        /* get serial port status byte, call with value=0 */
#define IPW_SIO_INIT                0x11        /* initializes ? value=0 (appears as first thing todo on open) */
#define IPW_SIO_PURGE                0x12        /* purge all transmissions?, call with value=numchar_to_purge */
#define IPW_SIO_HANDFLOW        0x13        /* set xon/xoff limits value=0, and a buffer of 0x10 bytes */
#define IPW_SIO_SETCHARS        0x13        /* set the flowcontrol special chars, value=0, buf=6 bytes, */
                                        /* last 2 bytes contain flowcontrol chars e.g. 00 00 00 00 11 13 */

/* values used for request IPW_SIO_SET_PIN */
#define IPW_PIN_SETDTR                0x101
#define IPW_PIN_SETRTS                0x202
#define IPW_PIN_CLRDTR                0x100
#define IPW_PIN_CLRRTS                0x200 /* unconfirmed */

/* values used for request IPW_SIO_RXCTL */
#define IPW_RXBULK_ON                1
#define IPW_RXBULK_OFF                0

/* various 16 byte hardcoded transferbuffers used by flow control */
#define IPW_BYTES_FLOWINIT        { 0x01, 0, 0, 0, 0x40, 0, 0, 0, \
                                        0, 0, 0, 0, 0, 0, 0, 0 }

/* Interpretation of modem status lines */
/* These need sorting out by individually connecting pins and checking
 * results. FIXME!
 * When data is being sent we see 0x30 in the lower byte; this must
 * contain DSR and CTS ...
 */
#define IPW_DSR                        ((1<<4) | (1<<5))
#define IPW_CTS                        ((1<<5) | (1<<4))

#define IPW_WANTS_TO_SEND        0x30

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(IPW_VID, IPW_PID) },
        { },
};
MODULE_DEVICE_TABLE(usb, id_table);

static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct usb_device *udev = port->serial->dev;
        struct device *dev = &port->dev;
        u8 buf_flow_static[16] = IPW_BYTES_FLOWINIT;
        u8 *buf_flow_init;
        int result;

        buf_flow_init = kmemdup(buf_flow_static, 16, GFP_KERNEL);
        if (!buf_flow_init)
                return -ENOMEM;

        /* --1: Tell the modem to initialize (we think) From sniffs this is
         *        always the first thing that gets sent to the modem during
         *        opening of the device */
        dev_dbg(dev, "%s: Sending SIO_INIT (we guess)\n", __func__);
        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_INIT,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
                         0,
                         0, /* index */
                         NULL,
                         0,
                         100000);
        if (result < 0)
                dev_err(dev, "Init of modem failed (error = %d)\n", result);

        /* reset the bulk pipes */
        usb_clear_halt(udev, usb_rcvbulkpipe(udev, port->bulk_in_endpointAddress));
        usb_clear_halt(udev, usb_sndbulkpipe(udev, port->bulk_out_endpointAddress));

        /*--2: Start reading from the device */
        dev_dbg(dev, "%s: setting up bulk read callback\n", __func__);
        usb_wwan_open(tty, port);

        /*--3: Tell the modem to open the floodgates on the rx bulk channel */
        dev_dbg(dev, "%s:asking modem for RxRead (RXBULK_ON)\n", __func__);
        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_RXCTL,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
                         IPW_RXBULK_ON,
                         0, /* index */
                         NULL,
                         0,
                         100000);
        if (result < 0)
                dev_err(dev, "Enabling bulk RxRead failed (error = %d)\n", result);

        /*--4: setup the initial flowcontrol */
        dev_dbg(dev, "%s:setting init flowcontrol (%s)\n", __func__, buf_flow_init);
        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_HANDFLOW,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
                         0,
                         0,
                         buf_flow_init,
                         0x10,
                         200000);
        if (result < 0)
                dev_err(dev, "initial flowcontrol failed (error = %d)\n", result);

        kfree(buf_flow_init);
        return 0;
}

static int ipw_attach(struct usb_serial *serial)
{
        struct usb_wwan_intf_private *data;

        data = kzalloc(sizeof(struct usb_wwan_intf_private), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        spin_lock_init(&data->susp_lock);
        usb_set_serial_data(serial, data);
        return 0;
}

static void ipw_release(struct usb_serial *serial)
{
        struct usb_wwan_intf_private *data = usb_get_serial_data(serial);

        usb_set_serial_data(serial, NULL);
        kfree(data);
}

static void ipw_dtr_rts(struct usb_serial_port *port, int on)
{
        struct usb_device *udev = port->serial->dev;
        struct device *dev = &port->dev;
        int result;

        dev_dbg(dev, "%s: on = %d\n", __func__, on);

        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_SET_PIN,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
                         on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
                         0,
                         NULL,
                         0,
                         200000);
        if (result < 0)
                dev_err(dev, "setting dtr failed (error = %d)\n", result);

        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
                                        USB_RECIP_INTERFACE | USB_DIR_OUT,
                         on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
                         0,
                         NULL,
                         0,
                         200000);
        if (result < 0)
                dev_err(dev, "setting rts failed (error = %d)\n", result);
}

static void ipw_close(struct usb_serial_port *port)
{
        struct usb_device *udev = port->serial->dev;
        struct device *dev = &port->dev;
        int result;

        /*--3: purge */
        dev_dbg(dev, "%s:sending purge\n", __func__);
        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_PURGE, USB_TYPE_VENDOR |
                                         USB_RECIP_INTERFACE | USB_DIR_OUT,
                         0x03,
                         0,
                         NULL,
                         0,
                         200000);
        if (result < 0)
                dev_err(dev, "purge failed (error = %d)\n", result);


        /* send RXBULK_off (tell modem to stop transmitting bulk data on
           rx chan) */
        result = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                         IPW_SIO_RXCTL,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
                         IPW_RXBULK_OFF,
                         0, /* index */
                         NULL,
                         0,
                         100000);

        if (result < 0)
                dev_err(dev, "Disabling bulk RxRead failed (error = %d)\n", result);

        usb_wwan_close(port);
}

static struct usb_serial_driver ipw_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "ipw",
        },
        .description =                "IPWireless converter",
        .id_table =                id_table,
        .num_ports =                1,
        .open =                        ipw_open,
        .close =                ipw_close,
        .attach =                ipw_attach,
        .release =                ipw_release,
        .port_probe =                usb_wwan_port_probe,
        .port_remove =                usb_wwan_port_remove,
        .dtr_rts =                ipw_dtr_rts,
        .write =                usb_wwan_write,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &ipw_device, NULL
};

module_usb_serial_driver(serial_drivers, id_table);

/* Module information */
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

























































































































































































































    1 










































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
// SPDX-License-Identifier: GPL-2.0+
/*
 * KLSI KL5KUSB105 chip RS232 converter driver
 *
 *   Copyright (C) 2010 Johan Hovold <jhovold@gmail.com>
 *   Copyright (C) 2001 Utz-Uwe Haus <haus@uuhaus.de>
 *
 * All information about the device was acquired using SniffUSB ans snoopUSB
 * on Windows98.
 * It was written out of frustration with the PalmConnect USB Serial adapter
 * sold by Palm Inc.
 * Neither Palm, nor their contractor (MCCI) or their supplier (KLSI) provided
 * information that was not already available.
 *
 * It seems that KLSI bought some silicon-design information from ScanLogic,
 * whose SL11R processor is at the core of the KL5KUSB chipset from KLSI.
 * KLSI has firmware available for their devices; it is probable that the
 * firmware differs from that used by KLSI in their products. If you have an
 * original KLSI device and can provide some information on it, I would be
 * most interested in adding support for it here. If you have any information
 * on the protocol used (or find errors in my reverse-engineered stuff), please
 * let me know.
 *
 * The code was only tested with a PalmConnect USB adapter; if you
 * are adventurous, try it with any KLSI-based device and let me know how it
 * breaks so that I can fix it!
 */

/* TODO:
 *        check modem line signals
 *        implement handshaking or decide that we do not support it
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include "kl5kusb105.h"

#define DRIVER_AUTHOR "Utz-Uwe Haus <haus@uuhaus.de>, Johan Hovold <jhovold@gmail.com>"
#define DRIVER_DESC "KLSI KL5KUSB105 chipset USB->Serial Converter driver"


/*
 * Function prototypes
 */
static int klsi_105_port_probe(struct usb_serial_port *port);
static void klsi_105_port_remove(struct usb_serial_port *port);
static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port);
static void klsi_105_close(struct usb_serial_port *port);
static void klsi_105_set_termios(struct tty_struct *tty,
                                 struct usb_serial_port *port,
                                 const struct ktermios *old_termios);
static int  klsi_105_tiocmget(struct tty_struct *tty);
static void klsi_105_process_read_urb(struct urb *urb);
static int klsi_105_prepare_write_buffer(struct usb_serial_port *port,
                                                void *dest, size_t size);

/*
 * All of the device info needed for the KLSI converters.
 */
static const struct usb_device_id id_table[] = {
        { USB_DEVICE(PALMCONNECT_VID, PALMCONNECT_PID) },
        { }                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, id_table);

static struct usb_serial_driver kl5kusb105d_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "kl5kusb105d",
        },
        .description =                "KL5KUSB105D / PalmConnect",
        .id_table =                id_table,
        .num_ports =                1,
        .bulk_out_size =        64,
        .open =                        klsi_105_open,
        .close =                klsi_105_close,
        .set_termios =                klsi_105_set_termios,
        .tiocmget =                klsi_105_tiocmget,
        .port_probe =                klsi_105_port_probe,
        .port_remove =                klsi_105_port_remove,
        .throttle =                usb_serial_generic_throttle,
        .unthrottle =                usb_serial_generic_unthrottle,
        .process_read_urb =        klsi_105_process_read_urb,
        .prepare_write_buffer =        klsi_105_prepare_write_buffer,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &kl5kusb105d_device, NULL
};

struct klsi_105_port_settings {
        u8        pktlen;                /* always 5, it seems */
        u8        baudrate;
        u8        databits;
        u8        unknown1;
        u8        unknown2;
};

struct klsi_105_private {
        struct klsi_105_port_settings        cfg;
        unsigned long                        line_state; /* modem line settings */
        spinlock_t                        lock;
};


/*
 * Handle vendor specific USB requests
 */


#define KLSI_TIMEOUT         5000 /* default urb timeout */

static int klsi_105_chg_port_settings(struct usb_serial_port *port,
                                      struct klsi_105_port_settings *settings)
{
        int rc;

        rc = usb_control_msg_send(port->serial->dev,
                                  0,
                                  KL5KUSB105A_SIO_SET_DATA,
                                  USB_TYPE_VENDOR | USB_DIR_OUT |
                                  USB_RECIP_INTERFACE,
                                  0, /* value */
                                  0, /* index */
                                  settings,
                                  sizeof(struct klsi_105_port_settings),
                                  KLSI_TIMEOUT,
                                  GFP_KERNEL);
        if (rc)
                dev_err(&port->dev,
                        "Change port settings failed (error = %d)\n", rc);

        dev_dbg(&port->dev,
                "pktlen %u, baudrate 0x%02x, databits %u, u1 %u, u2 %u\n",
                settings->pktlen, settings->baudrate, settings->databits,
                settings->unknown1, settings->unknown2);

        return rc;
}

/*
 * Read line control via vendor command and return result through
 * the state pointer.
 */
static int klsi_105_get_line_state(struct usb_serial_port *port,
                                   unsigned long *state)
{
        u16 status;
        int rc;

        rc = usb_control_msg_recv(port->serial->dev, 0,
                                  KL5KUSB105A_SIO_POLL,
                                  USB_TYPE_VENDOR | USB_DIR_IN,
                                  0, /* value */
                                  0, /* index */
                                  &status, sizeof(status),
                                  10000,
                                  GFP_KERNEL);
        if (rc) {
                dev_err(&port->dev, "reading line status failed: %d\n", rc);
                return rc;
        }

        le16_to_cpus(&status);

        dev_dbg(&port->dev, "read status %04x\n", status);

        *state = ((status & KL5KUSB105A_DSR) ? TIOCM_DSR : 0) |
                 ((status & KL5KUSB105A_CTS) ? TIOCM_CTS : 0);

        return 0;
}


/*
 * Driver's tty interface functions
 */

static int klsi_105_port_probe(struct usb_serial_port *port)
{
        struct klsi_105_private *priv;

        priv = kmalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        /* set initial values for control structures */
        priv->cfg.pktlen    = 5;
        priv->cfg.baudrate  = kl5kusb105a_sio_b9600;
        priv->cfg.databits  = kl5kusb105a_dtb_8;
        priv->cfg.unknown1  = 0;
        priv->cfg.unknown2  = 1;

        priv->line_state    = 0;

        spin_lock_init(&priv->lock);

        usb_set_serial_port_data(port, priv);

        return 0;
}

static void klsi_105_port_remove(struct usb_serial_port *port)
{
        struct klsi_105_private *priv;

        priv = usb_get_serial_port_data(port);
        kfree(priv);
}

static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct klsi_105_private *priv = usb_get_serial_port_data(port);
        int retval = 0;
        int rc;
        unsigned long line_state;
        struct klsi_105_port_settings cfg;
        unsigned long flags;

        /* Do a defined restart:
         * Set up sane default baud rate and send the 'READ_ON'
         * vendor command.
         * FIXME: set modem line control (how?)
         * Then read the modem line control and store values in
         * priv->line_state.
         */

        cfg.pktlen   = 5;
        cfg.baudrate = kl5kusb105a_sio_b9600;
        cfg.databits = kl5kusb105a_dtb_8;
        cfg.unknown1 = 0;
        cfg.unknown2 = 1;
        klsi_105_chg_port_settings(port, &cfg);

        spin_lock_irqsave(&priv->lock, flags);
        priv->cfg.pktlen   = cfg.pktlen;
        priv->cfg.baudrate = cfg.baudrate;
        priv->cfg.databits = cfg.databits;
        priv->cfg.unknown1 = cfg.unknown1;
        priv->cfg.unknown2 = cfg.unknown2;
        spin_unlock_irqrestore(&priv->lock, flags);

        /* READ_ON and urb submission */
        rc = usb_serial_generic_open(tty, port);
        if (rc)
                return rc;

        rc = usb_control_msg(port->serial->dev,
                             usb_sndctrlpipe(port->serial->dev, 0),
                             KL5KUSB105A_SIO_CONFIGURE,
                             USB_TYPE_VENDOR|USB_DIR_OUT|USB_RECIP_INTERFACE,
                             KL5KUSB105A_SIO_CONFIGURE_READ_ON,
                             0, /* index */
                             NULL,
                             0,
                             KLSI_TIMEOUT);
        if (rc < 0) {
                dev_err(&port->dev, "Enabling read failed (error = %d)\n", rc);
                retval = rc;
                goto err_generic_close;
        } else
                dev_dbg(&port->dev, "%s - enabled reading\n", __func__);

        rc = klsi_105_get_line_state(port, &line_state);
        if (rc < 0) {
                retval = rc;
                goto err_disable_read;
        }

        spin_lock_irqsave(&priv->lock, flags);
        priv->line_state = line_state;
        spin_unlock_irqrestore(&priv->lock, flags);
        dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__,
                        line_state);

        return 0;

err_disable_read:
        usb_control_msg(port->serial->dev,
                             usb_sndctrlpipe(port->serial->dev, 0),
                             KL5KUSB105A_SIO_CONFIGURE,
                             USB_TYPE_VENDOR | USB_DIR_OUT,
                             KL5KUSB105A_SIO_CONFIGURE_READ_OFF,
                             0, /* index */
                             NULL, 0,
                             KLSI_TIMEOUT);
err_generic_close:
        usb_serial_generic_close(port);

        return retval;
}

static void klsi_105_close(struct usb_serial_port *port)
{
        int rc;

        /* send READ_OFF */
        rc = usb_control_msg(port->serial->dev,
                             usb_sndctrlpipe(port->serial->dev, 0),
                             KL5KUSB105A_SIO_CONFIGURE,
                             USB_TYPE_VENDOR | USB_DIR_OUT,
                             KL5KUSB105A_SIO_CONFIGURE_READ_OFF,
                             0, /* index */
                             NULL, 0,
                             KLSI_TIMEOUT);
        if (rc < 0)
                dev_err(&port->dev, "failed to disable read: %d\n", rc);

        /* shutdown our bulk reads and writes */
        usb_serial_generic_close(port);
}

/* We need to write a complete 64-byte data block and encode the
 * number actually sent in the first double-byte, LSB-order. That
 * leaves at most 62 bytes of payload.
 */
#define KLSI_HDR_LEN                2
static int klsi_105_prepare_write_buffer(struct usb_serial_port *port,
                                                void *dest, size_t size)
{
        unsigned char *buf = dest;
        int count;

        count = kfifo_out_locked(&port->write_fifo, buf + KLSI_HDR_LEN, size,
                                                                &port->lock);
        put_unaligned_le16(count, buf);

        return count + KLSI_HDR_LEN;
}

/* The data received is preceded by a length double-byte in LSB-first order.
 */
static void klsi_105_process_read_urb(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        unsigned len;

        /* empty urbs seem to happen, we ignore them */
        if (!urb->actual_length)
                return;

        if (urb->actual_length <= KLSI_HDR_LEN) {
                dev_dbg(&port->dev, "%s - malformed packet\n", __func__);
                return;
        }

        len = get_unaligned_le16(data);
        if (len > urb->actual_length - KLSI_HDR_LEN) {
                dev_dbg(&port->dev, "%s - packet length mismatch\n", __func__);
                len = urb->actual_length - KLSI_HDR_LEN;
        }

        tty_insert_flip_string(&port->port, data + KLSI_HDR_LEN, len);
        tty_flip_buffer_push(&port->port);
}

static void klsi_105_set_termios(struct tty_struct *tty,
                                 struct usb_serial_port *port,
                                 const struct ktermios *old_termios)
{
        struct klsi_105_private *priv = usb_get_serial_port_data(port);
        struct device *dev = &port->dev;
        unsigned int iflag = tty->termios.c_iflag;
        unsigned int old_iflag = old_termios->c_iflag;
        unsigned int cflag = tty->termios.c_cflag;
        unsigned int old_cflag = old_termios->c_cflag;
        struct klsi_105_port_settings *cfg;
        unsigned long flags;
        speed_t baud;

        cfg = kmalloc(sizeof(*cfg), GFP_KERNEL);
        if (!cfg)
                return;

        /* lock while we are modifying the settings */
        spin_lock_irqsave(&priv->lock, flags);

        /*
         * Update baud rate
         */
        baud = tty_get_baud_rate(tty);

        switch (baud) {
        case 0: /* handled below */
                break;
        case 1200:
                priv->cfg.baudrate = kl5kusb105a_sio_b1200;
                break;
        case 2400:
                priv->cfg.baudrate = kl5kusb105a_sio_b2400;
                break;
        case 4800:
                priv->cfg.baudrate = kl5kusb105a_sio_b4800;
                break;
        case 9600:
                priv->cfg.baudrate = kl5kusb105a_sio_b9600;
                break;
        case 19200:
                priv->cfg.baudrate = kl5kusb105a_sio_b19200;
                break;
        case 38400:
                priv->cfg.baudrate = kl5kusb105a_sio_b38400;
                break;
        case 57600:
                priv->cfg.baudrate = kl5kusb105a_sio_b57600;
                break;
        case 115200:
                priv->cfg.baudrate = kl5kusb105a_sio_b115200;
                break;
        default:
                dev_dbg(dev, "unsupported baudrate, using 9600\n");
                priv->cfg.baudrate = kl5kusb105a_sio_b9600;
                baud = 9600;
                break;
        }

        /*
         * FIXME: implement B0 handling
         *
         * Maybe this should be simulated by sending read disable and read
         * enable messages?
         */

        tty_encode_baud_rate(tty, baud, baud);

        if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
                /* set the number of data bits */
                switch (cflag & CSIZE) {
                case CS5:
                        dev_dbg(dev, "%s - 5 bits/byte not supported\n", __func__);
                        spin_unlock_irqrestore(&priv->lock, flags);
                        goto err;
                case CS6:
                        dev_dbg(dev, "%s - 6 bits/byte not supported\n", __func__);
                        spin_unlock_irqrestore(&priv->lock, flags);
                        goto err;
                case CS7:
                        priv->cfg.databits = kl5kusb105a_dtb_7;
                        break;
                case CS8:
                        priv->cfg.databits = kl5kusb105a_dtb_8;
                        break;
                default:
                        dev_err(dev, "CSIZE was not CS5-CS8, using default of 8\n");
                        priv->cfg.databits = kl5kusb105a_dtb_8;
                        break;
                }
        }

        /*
         * Update line control register (LCR)
         */
        if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))
            || (cflag & CSTOPB) != (old_cflag & CSTOPB)) {
                /* Not currently supported */
                tty->termios.c_cflag &= ~(PARENB|PARODD|CSTOPB);
        }
        /*
         * Set flow control: well, I do not really now how to handle DTR/RTS.
         * Just do what we have seen with SniffUSB on Win98.
         */
        if ((iflag & IXOFF) != (old_iflag & IXOFF)
            || (iflag & IXON) != (old_iflag & IXON)
            ||  (cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
                /* Not currently supported */
                tty->termios.c_cflag &= ~CRTSCTS;
        }
        memcpy(cfg, &priv->cfg, sizeof(*cfg));
        spin_unlock_irqrestore(&priv->lock, flags);

        /* now commit changes to device */
        klsi_105_chg_port_settings(port, cfg);
err:
        kfree(cfg);
}

static int klsi_105_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct klsi_105_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        int rc;
        unsigned long line_state;

        rc = klsi_105_get_line_state(port, &line_state);
        if (rc < 0) {
                dev_err(&port->dev,
                        "Reading line control failed (error = %d)\n", rc);
                /* better return value? EAGAIN? */
                return rc;
        }

        spin_lock_irqsave(&priv->lock, flags);
        priv->line_state = line_state;
        spin_unlock_irqrestore(&priv->lock, flags);
        dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, line_state);
        return (int)line_state;
}

module_usb_serial_driver(serial_drivers, id_table);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");





















































































































































































































































































































































































































































































































































































































































































































































































































































































    1 





















































    1 
























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
// SPDX-License-Identifier: GPL-2.0
/*
  USB Driver for Sierra Wireless

  Copyright (C) 2006, 2007, 2008  Kevin Lloyd <klloyd@sierrawireless.com>,

  Copyright (C) 2008, 2009  Elina Pasheva, Matthew Safar, Rory Filer
                        <linux@sierrawireless.com>

  IMPORTANT DISCLAIMER: This driver is not commercially supported by
  Sierra Wireless. Use at your own risk.

  Portions based on the option driver by Matthias Urlichs <smurf@smurf.noris.de>
  Whom based his on the Keyspan driver by Hugh Blemings <hugh@blemings.org>
*/
/* Uncomment to log function calls */
/* #define DEBUG */

#define DRIVER_AUTHOR "Kevin Lloyd, Elina Pasheva, Matthew Safar, Rory Filer"
#define DRIVER_DESC "USB Driver for Sierra Wireless USB modems"

#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/errno.h>
#include <linux/tty.h>
#include <linux/slab.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>

#define SWIMS_USB_REQUEST_SetPower        0x00
#define SWIMS_USB_REQUEST_SetNmea        0x07

#define N_IN_URB_HM        8
#define N_OUT_URB_HM        64
#define N_IN_URB        4
#define N_OUT_URB        4
#define IN_BUFLEN        4096

#define MAX_TRANSFER                (PAGE_SIZE - 512)
/* MAX_TRANSFER is chosen so that the VM is not stressed by
   allocations > PAGE_SIZE and the number of packets in a page
   is an integer 512 is the largest possible packet on EHCI */

static bool nmea;

struct sierra_iface_list {
        const u8 *nums;                /* array of interface numbers */
        size_t count;                /* number of elements in array */
};

struct sierra_intf_private {
        spinlock_t susp_lock;
        unsigned int suspended:1;
        int in_flight;
        unsigned int open_ports;
};

static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
{
        return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        SWIMS_USB_REQUEST_SetPower,        /* __u8 request      */
                        USB_TYPE_VENDOR,                /* __u8 request type */
                        swiState,                        /* __u16 value       */
                        0,                                /* __u16 index       */
                        NULL,                                /* void *data        */
                        0,                                /* __u16 size              */
                        USB_CTRL_SET_TIMEOUT);                /* int timeout              */
}

static int sierra_vsc_set_nmea(struct usb_device *udev, __u16 enable)
{
        return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        SWIMS_USB_REQUEST_SetNmea,        /* __u8 request      */
                        USB_TYPE_VENDOR,                /* __u8 request type */
                        enable,                                /* __u16 value       */
                        0x0000,                                /* __u16 index       */
                        NULL,                                /* void *data        */
                        0,                                /* __u16 size              */
                        USB_CTRL_SET_TIMEOUT);                /* int timeout       */
}

static int sierra_calc_num_ports(struct usb_serial *serial,
                                        struct usb_serial_endpoints *epds)
{
        int num_ports = 0;
        u8 ifnum, numendpoints;

        ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
        numendpoints = serial->interface->cur_altsetting->desc.bNumEndpoints;

        /* Dummy interface present on some SKUs should be ignored */
        if (ifnum == 0x99)
                num_ports = 0;
        else if (numendpoints <= 3)
                num_ports = 1;
        else
                num_ports = (numendpoints-1)/2;
        return num_ports;
}

static bool is_listed(const u8 ifnum, const struct sierra_iface_list *list)
{
        int i;

        if (!list)
                return false;

        for (i = 0; i < list->count; i++) {
                if (list->nums[i] == ifnum)
                        return true;
        }

        return false;
}

static u8 sierra_interface_num(struct usb_serial *serial)
{
        return serial->interface->cur_altsetting->desc.bInterfaceNumber;
}

static int sierra_probe(struct usb_serial *serial,
                        const struct usb_device_id *id)
{
        const struct sierra_iface_list *ignore_list;
        int result = 0;
        struct usb_device *udev;
        u8 ifnum;

        udev = serial->dev;
        ifnum = sierra_interface_num(serial);

        /*
         * If this interface supports more than 1 alternate
         * select the 2nd one
         */
        if (serial->interface->num_altsetting == 2) {
                dev_dbg(&udev->dev, "Selecting alt setting for interface %d\n",
                        ifnum);
                /* We know the alternate setting is 1 for the MC8785 */
                usb_set_interface(udev, ifnum, 1);
        }

        ignore_list = (const struct sierra_iface_list *)id->driver_info;

        if (is_listed(ifnum, ignore_list)) {
                dev_dbg(&serial->dev->dev, "Ignoring interface #%d\n", ifnum);
                return -ENODEV;
        }

        return result;
}

/* interfaces with higher memory requirements */
static const u8 hi_memory_typeA_ifaces[] = { 0, 2 };
static const struct sierra_iface_list typeA_interface_list = {
        .nums        = hi_memory_typeA_ifaces,
        .count        = ARRAY_SIZE(hi_memory_typeA_ifaces),
};

static const u8 hi_memory_typeB_ifaces[] = { 3, 4, 5, 6 };
static const struct sierra_iface_list typeB_interface_list = {
        .nums        = hi_memory_typeB_ifaces,
        .count        = ARRAY_SIZE(hi_memory_typeB_ifaces),
};

/* 'ignorelist' of interfaces not served by this driver */
static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11, 19, 20 };
static const struct sierra_iface_list direct_ip_interface_ignore = {
        .nums        = direct_ip_non_serial_ifaces,
        .count        = ARRAY_SIZE(direct_ip_non_serial_ifaces),
};

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
        { USB_DEVICE(0x03F0, 0x1B1D) },        /* HP ev2200 a.k.a MC5720 */
        { USB_DEVICE(0x03F0, 0x211D) }, /* HP ev2210 a.k.a MC5725 */
        { USB_DEVICE(0x03F0, 0x1E1D) },        /* HP hs2300 a.k.a MC8775 */

        { USB_DEVICE(0x1199, 0x0017) },        /* Sierra Wireless EM5625 */
        { USB_DEVICE(0x1199, 0x0018) },        /* Sierra Wireless MC5720 */
        { USB_DEVICE(0x1199, 0x0218) },        /* Sierra Wireless MC5720 */
        { USB_DEVICE(0x1199, 0x0020) },        /* Sierra Wireless MC5725 */
        { USB_DEVICE(0x1199, 0x0220) },        /* Sierra Wireless MC5725 */
        { USB_DEVICE(0x1199, 0x0022) },        /* Sierra Wireless EM5725 */
        { USB_DEVICE(0x1199, 0x0024) },        /* Sierra Wireless MC5727 */
        { USB_DEVICE(0x1199, 0x0224) },        /* Sierra Wireless MC5727 */
        { USB_DEVICE(0x1199, 0x0019) },        /* Sierra Wireless AirCard 595 */
        { USB_DEVICE(0x1199, 0x0021) },        /* Sierra Wireless AirCard 597E */
        { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
        { USB_DEVICE(0x1199, 0x0120) },        /* Sierra Wireless USB Dongle 595U */
        { USB_DEVICE(0x1199, 0x0301) },        /* Sierra Wireless USB Dongle 250U */
        /* Sierra Wireless C597 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x0023, 0xFF, 0xFF, 0xFF) },
        /* Sierra Wireless T598 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x0025, 0xFF, 0xFF, 0xFF) },
        { USB_DEVICE(0x1199, 0x0026) }, /* Sierra Wireless T11 */
        { USB_DEVICE(0x1199, 0x0027) }, /* Sierra Wireless AC402 */
        { USB_DEVICE(0x1199, 0x0028) }, /* Sierra Wireless MC5728 */
        { USB_DEVICE(0x1199, 0x0029) }, /* Sierra Wireless Device */

        { USB_DEVICE(0x1199, 0x6802) },        /* Sierra Wireless MC8755 */
        { USB_DEVICE(0x1199, 0x6803) },        /* Sierra Wireless MC8765 */
        { USB_DEVICE(0x1199, 0x6804) },        /* Sierra Wireless MC8755 */
        { USB_DEVICE(0x1199, 0x6805) },        /* Sierra Wireless MC8765 */
        { USB_DEVICE(0x1199, 0x6808) },        /* Sierra Wireless MC8755 */
        { USB_DEVICE(0x1199, 0x6809) },        /* Sierra Wireless MC8765 */
        { USB_DEVICE(0x1199, 0x6812) },        /* Sierra Wireless MC8775 & AC 875U */
        { USB_DEVICE(0x1199, 0x6813) },        /* Sierra Wireless MC8775 */
        { USB_DEVICE(0x1199, 0x6815) },        /* Sierra Wireless MC8775 */
        { USB_DEVICE(0x1199, 0x6816) },        /* Sierra Wireless MC8775 */
        { USB_DEVICE(0x1199, 0x6820) },        /* Sierra Wireless AirCard 875 */
        { USB_DEVICE(0x1199, 0x6821) },        /* Sierra Wireless AirCard 875U */
        { USB_DEVICE(0x1199, 0x6822) },        /* Sierra Wireless AirCard 875E */
        { USB_DEVICE(0x1199, 0x6832) },        /* Sierra Wireless MC8780 */
        { USB_DEVICE(0x1199, 0x6833) },        /* Sierra Wireless MC8781 */
        { USB_DEVICE(0x1199, 0x6834) },        /* Sierra Wireless MC8780 */
        { USB_DEVICE(0x1199, 0x6835) },        /* Sierra Wireless MC8781 */
        { USB_DEVICE(0x1199, 0x6838) },        /* Sierra Wireless MC8780 */
        { USB_DEVICE(0x1199, 0x6839) },        /* Sierra Wireless MC8781 */
        { USB_DEVICE(0x1199, 0x683A) },        /* Sierra Wireless MC8785 */
        { USB_DEVICE(0x1199, 0x683B) },        /* Sierra Wireless MC8785 Composite */
        /* Sierra Wireless MC8790, MC8791, MC8792 Composite */
        { USB_DEVICE(0x1199, 0x683C) },
        { USB_DEVICE(0x1199, 0x683D) },        /* Sierra Wireless MC8791 Composite */
        /* Sierra Wireless MC8790, MC8791, MC8792 */
        { USB_DEVICE(0x1199, 0x683E) },
        { USB_DEVICE(0x1199, 0x6850) },        /* Sierra Wireless AirCard 880 */
        { USB_DEVICE(0x1199, 0x6851) },        /* Sierra Wireless AirCard 881 */
        { USB_DEVICE(0x1199, 0x6852) },        /* Sierra Wireless AirCard 880 E */
        { USB_DEVICE(0x1199, 0x6853) },        /* Sierra Wireless AirCard 881 E */
        { USB_DEVICE(0x1199, 0x6855) },        /* Sierra Wireless AirCard 880 U */
        { USB_DEVICE(0x1199, 0x6856) },        /* Sierra Wireless AirCard 881 U */
        { USB_DEVICE(0x1199, 0x6859) },        /* Sierra Wireless AirCard 885 E */
        { USB_DEVICE(0x1199, 0x685A) },        /* Sierra Wireless AirCard 885 E */
        /* Sierra Wireless C885 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6880, 0xFF, 0xFF, 0xFF)},
        /* Sierra Wireless C888, Air Card 501, USB 303, USB 304 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6890, 0xFF, 0xFF, 0xFF)},
        /* Sierra Wireless C22/C33 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6891, 0xFF, 0xFF, 0xFF)},
        /* Sierra Wireless HSPA Non-Composite Device */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6892, 0xFF, 0xFF, 0xFF)},
        { USB_DEVICE(0x1199, 0x6893) },        /* Sierra Wireless Device */
        /* Sierra Wireless Direct IP modems */
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68A3, 0xFF, 0xFF, 0xFF),
          .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore
        },
        { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF),
          .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore
        },
        { USB_DEVICE(0x1199, 0x68AB) }, /* Sierra Wireless AR8550 */
        /* AT&T Direct IP LTE modems */
        { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF),
          .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore
        },
        /* Airprime/Sierra Wireless Direct IP modems */
        { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68A3, 0xFF, 0xFF, 0xFF),
          .driver_info = (kernel_ulong_t)&direct_ip_interface_ignore
        },

        { }
};
MODULE_DEVICE_TABLE(usb, id_table);


struct sierra_port_private {
        spinlock_t lock;        /* lock the structure */
        int outstanding_urbs;        /* number of out urbs in flight */
        struct usb_anchor active;
        struct usb_anchor delayed;

        int num_out_urbs;
        int num_in_urbs;
        /* Input endpoints and buffers for this port */
        struct urb *in_urbs[N_IN_URB_HM];

        /* Settings for the port */
        int rts_state;        /* Handshaking pins (outputs) */
        int dtr_state;
        int cts_state;        /* Handshaking pins (inputs) */
        int dsr_state;
        int dcd_state;
        int ri_state;
};

static int sierra_send_setup(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct sierra_port_private *portdata;
        __u16 interface = 0;
        int val = 0;
        int do_send = 0;
        int retval;

        portdata = usb_get_serial_port_data(port);

        if (portdata->dtr_state)
                val |= 0x01;
        if (portdata->rts_state)
                val |= 0x02;

        /* If composite device then properly report interface */
        if (serial->num_ports == 1) {
                interface = sierra_interface_num(serial);
                /* Control message is sent only to interfaces with
                 * interrupt_in endpoints
                 */
                if (port->interrupt_in_urb) {
                        /* send control message */
                        do_send = 1;
                }
        }

        /* Otherwise the need to do non-composite mapping */
        else {
                if (port->bulk_out_endpointAddress == 2)
                        interface = 0;
                else if (port->bulk_out_endpointAddress == 4)
                        interface = 1;
                else if (port->bulk_out_endpointAddress == 5)
                        interface = 2;

                do_send = 1;
        }
        if (!do_send)
                return 0;

        retval = usb_autopm_get_interface(serial->interface);
        if (retval < 0)
                return retval;

        retval = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
                0x22, 0x21, val, interface, NULL, 0, USB_CTRL_SET_TIMEOUT);
        usb_autopm_put_interface(serial->interface);

        return retval;
}

static int sierra_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        unsigned int value;
        struct sierra_port_private *portdata;

        portdata = usb_get_serial_port_data(port);

        value = ((portdata->rts_state) ? TIOCM_RTS : 0) |
                ((portdata->dtr_state) ? TIOCM_DTR : 0) |
                ((portdata->cts_state) ? TIOCM_CTS : 0) |
                ((portdata->dsr_state) ? TIOCM_DSR : 0) |
                ((portdata->dcd_state) ? TIOCM_CAR : 0) |
                ((portdata->ri_state) ? TIOCM_RNG : 0);

        return value;
}

static int sierra_tiocmset(struct tty_struct *tty,
                        unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct sierra_port_private *portdata;

        portdata = usb_get_serial_port_data(port);

        if (set & TIOCM_RTS)
                portdata->rts_state = 1;
        if (set & TIOCM_DTR)
                portdata->dtr_state = 1;

        if (clear & TIOCM_RTS)
                portdata->rts_state = 0;
        if (clear & TIOCM_DTR)
                portdata->dtr_state = 0;
        return sierra_send_setup(port);
}

static void sierra_release_urb(struct urb *urb)
{
        if (urb) {
                kfree(urb->transfer_buffer);
                usb_free_urb(urb);
        }
}

static void sierra_outdat_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);
        struct sierra_intf_private *intfdata;
        int status = urb->status;
        unsigned long flags;

        intfdata = usb_get_serial_data(port->serial);

        /* free up the transfer buffer, as usb_free_urb() does not do this */
        kfree(urb->transfer_buffer);
        usb_autopm_put_interface_async(port->serial->interface);
        if (status)
                dev_dbg(&port->dev, "%s - nonzero write bulk status "
                    "received: %d\n", __func__, status);

        spin_lock_irqsave(&portdata->lock, flags);
        --portdata->outstanding_urbs;
        spin_unlock_irqrestore(&portdata->lock, flags);
        spin_lock_irqsave(&intfdata->susp_lock, flags);
        --intfdata->in_flight;
        spin_unlock_irqrestore(&intfdata->susp_lock, flags);

        usb_serial_port_softint(port);
}

/* Write */
static int sierra_write(struct tty_struct *tty, struct usb_serial_port *port,
                                        const unsigned char *buf, int count)
{
        struct sierra_port_private *portdata;
        struct sierra_intf_private *intfdata;
        struct usb_serial *serial = port->serial;
        unsigned long flags;
        unsigned char *buffer;
        struct urb *urb;
        size_t writesize = min((size_t)count, (size_t)MAX_TRANSFER);
        int retval = 0;

        /* verify that we actually have some data to write */
        if (count == 0)
                return 0;

        portdata = usb_get_serial_port_data(port);
        intfdata = usb_get_serial_data(serial);

        dev_dbg(&port->dev, "%s: write (%zd bytes)\n", __func__, writesize);
        spin_lock_irqsave(&portdata->lock, flags);
        dev_dbg(&port->dev, "%s - outstanding_urbs: %d\n", __func__,
                portdata->outstanding_urbs);
        if (portdata->outstanding_urbs > portdata->num_out_urbs) {
                spin_unlock_irqrestore(&portdata->lock, flags);
                dev_dbg(&port->dev, "%s - write limit hit\n", __func__);
                return 0;
        }
        portdata->outstanding_urbs++;
        dev_dbg(&port->dev, "%s - 1, outstanding_urbs: %d\n", __func__,
                portdata->outstanding_urbs);
        spin_unlock_irqrestore(&portdata->lock, flags);

        retval = usb_autopm_get_interface_async(serial->interface);
        if (retval < 0) {
                spin_lock_irqsave(&portdata->lock, flags);
                portdata->outstanding_urbs--;
                spin_unlock_irqrestore(&portdata->lock, flags);
                goto error_simple;
        }

        buffer = kmemdup(buf, writesize, GFP_ATOMIC);
        if (!buffer) {
                retval = -ENOMEM;
                goto error_no_buffer;
        }

        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!urb) {
                retval = -ENOMEM;
                goto error_no_urb;
        }

        usb_serial_debug_data(&port->dev, __func__, writesize, buffer);

        usb_fill_bulk_urb(urb, serial->dev,
                          usb_sndbulkpipe(serial->dev,
                                          port->bulk_out_endpointAddress),
                          buffer, writesize, sierra_outdat_callback, port);

        /* Handle the need to send a zero length packet */
        urb->transfer_flags |= URB_ZERO_PACKET;

        spin_lock_irqsave(&intfdata->susp_lock, flags);

        if (intfdata->suspended) {
                usb_anchor_urb(urb, &portdata->delayed);
                spin_unlock_irqrestore(&intfdata->susp_lock, flags);
                goto skip_power;
        } else {
                usb_anchor_urb(urb, &portdata->active);
        }
        /* send it down the pipe */
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval) {
                usb_unanchor_urb(urb);
                spin_unlock_irqrestore(&intfdata->susp_lock, flags);
                dev_err(&port->dev, "%s - usb_submit_urb(write bulk) failed "
                        "with status = %d\n", __func__, retval);
                goto error;
        } else {
                intfdata->in_flight++;
                spin_unlock_irqrestore(&intfdata->susp_lock, flags);
        }

skip_power:
        /* we are done with this urb, so let the host driver
         * really free it when it is finished with it */
        usb_free_urb(urb);

        return writesize;
error:
        usb_free_urb(urb);
error_no_urb:
        kfree(buffer);
error_no_buffer:
        spin_lock_irqsave(&portdata->lock, flags);
        --portdata->outstanding_urbs;
        dev_dbg(&port->dev, "%s - 2. outstanding_urbs: %d\n", __func__,
                portdata->outstanding_urbs);
        spin_unlock_irqrestore(&portdata->lock, flags);
        usb_autopm_put_interface_async(serial->interface);
error_simple:
        return retval;
}

static void sierra_indat_callback(struct urb *urb)
{
        int err;
        int endpoint;
        struct usb_serial_port *port;
        unsigned char *data = urb->transfer_buffer;
        int status = urb->status;

        endpoint = usb_pipeendpoint(urb->pipe);
        port = urb->context;

        if (status) {
                dev_dbg(&port->dev, "%s: nonzero status: %d on"
                        " endpoint %02x\n", __func__, status, endpoint);
        } else {
                if (urb->actual_length) {
                        tty_insert_flip_string(&port->port, data,
                                urb->actual_length);
                        tty_flip_buffer_push(&port->port);

                        usb_serial_debug_data(&port->dev, __func__,
                                              urb->actual_length, data);
                } else {
                        dev_dbg(&port->dev, "%s: empty read urb"
                                " received\n", __func__);
                }
        }

        /* Resubmit urb so we continue receiving */
        if (status != -ESHUTDOWN && status != -EPERM) {
                usb_mark_last_busy(port->serial->dev);
                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err && err != -EPERM)
                        dev_err(&port->dev, "resubmit read urb failed."
                                "(%d)\n", err);
        }
}

static void sierra_instat_callback(struct urb *urb)
{
        int err;
        int status = urb->status;
        struct usb_serial_port *port =  urb->context;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;

        dev_dbg(&port->dev, "%s: urb %p port %p has data %p\n", __func__,
                urb, port, portdata);

        if (status == 0) {
                struct usb_ctrlrequest *req_pkt = urb->transfer_buffer;

                if (!req_pkt) {
                        dev_dbg(&port->dev, "%s: NULL req_pkt\n",
                                __func__);
                        return;
                }
                if ((req_pkt->bRequestType == 0xA1) &&
                                (req_pkt->bRequest == 0x20)) {
                        int old_dcd_state;
                        unsigned char signals = *((unsigned char *)
                                        urb->transfer_buffer +
                                        sizeof(struct usb_ctrlrequest));

                        dev_dbg(&port->dev, "%s: signal x%x\n", __func__,
                                signals);

                        old_dcd_state = portdata->dcd_state;
                        portdata->cts_state = 1;
                        portdata->dcd_state = ((signals & 0x01) ? 1 : 0);
                        portdata->dsr_state = ((signals & 0x02) ? 1 : 0);
                        portdata->ri_state = ((signals & 0x08) ? 1 : 0);

                        if (old_dcd_state && !portdata->dcd_state)
                                tty_port_tty_hangup(&port->port, true);
                } else {
                        dev_dbg(&port->dev, "%s: type %x req %x\n",
                                __func__, req_pkt->bRequestType,
                                req_pkt->bRequest);
                }
        } else
                dev_dbg(&port->dev, "%s: error %d\n", __func__, status);

        /* Resubmit urb so we continue receiving IRQ data */
        if (status != -ESHUTDOWN && status != -ENOENT) {
                usb_mark_last_busy(serial->dev);
                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err && err != -EPERM)
                        dev_err(&port->dev, "%s: resubmit intr urb "
                                "failed. (%d)\n", __func__, err);
        }
}

static unsigned int sierra_write_room(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);
        unsigned long flags;

        /* try to give a good number back based on if we have any free urbs at
         * this point in time */
        spin_lock_irqsave(&portdata->lock, flags);
        if (portdata->outstanding_urbs > (portdata->num_out_urbs * 2) / 3) {
                spin_unlock_irqrestore(&portdata->lock, flags);
                dev_dbg(&port->dev, "%s - write limit hit\n", __func__);
                return 0;
        }
        spin_unlock_irqrestore(&portdata->lock, flags);

        return 2048;
}

static unsigned int sierra_chars_in_buffer(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);
        unsigned long flags;
        unsigned int chars;

        /* NOTE: This overcounts somewhat. */
        spin_lock_irqsave(&portdata->lock, flags);
        chars = portdata->outstanding_urbs * MAX_TRANSFER;
        spin_unlock_irqrestore(&portdata->lock, flags);

        dev_dbg(&port->dev, "%s - %u\n", __func__, chars);

        return chars;
}

static void sierra_stop_rx_urbs(struct usb_serial_port *port)
{
        int i;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);

        for (i = 0; i < portdata->num_in_urbs; i++)
                usb_kill_urb(portdata->in_urbs[i]);

        usb_kill_urb(port->interrupt_in_urb);
}

static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
{
        int ok_cnt;
        int err = -EINVAL;
        int i;
        struct urb *urb;
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);

        ok_cnt = 0;
        for (i = 0; i < portdata->num_in_urbs; i++) {
                urb = portdata->in_urbs[i];
                if (!urb)
                        continue;
                err = usb_submit_urb(urb, mem_flags);
                if (err) {
                        dev_err(&port->dev, "%s: submit urb failed: %d\n",
                                __func__, err);
                } else {
                        ok_cnt++;
                }
        }

        if (ok_cnt && port->interrupt_in_urb) {
                err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
                if (err) {
                        dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
                                __func__, err);
                }
        }

        if (ok_cnt > 0) /* at least one rx urb submitted */
                return 0;
        else
                return err;
}

static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
                                        int dir, void *ctx, int len,
                                        gfp_t mem_flags,
                                        usb_complete_t callback)
{
        struct urb        *urb;
        u8                *buf;

        urb = usb_alloc_urb(0, mem_flags);
        if (!urb)
                return NULL;

        buf = kmalloc(len, mem_flags);
        if (buf) {
                /* Fill URB using supplied data */
                usb_fill_bulk_urb(urb, serial->dev,
                        usb_sndbulkpipe(serial->dev, endpoint) | dir,
                        buf, len, callback, ctx);

                dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
                                dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
        } else {
                sierra_release_urb(urb);
                urb = NULL;
        }

        return urb;
}

static void sierra_close(struct usb_serial_port *port)
{
        int i;
        struct usb_serial *serial = port->serial;
        struct sierra_port_private *portdata;
        struct sierra_intf_private *intfdata = usb_get_serial_data(serial);
        struct urb *urb;

        portdata = usb_get_serial_port_data(port);

        /*
         * Need to take susp_lock to make sure port is not already being
         * resumed, but no need to hold it due to the tty-port initialized
         * flag.
         */
        spin_lock_irq(&intfdata->susp_lock);
        if (--intfdata->open_ports == 0)
                serial->interface->needs_remote_wakeup = 0;
        spin_unlock_irq(&intfdata->susp_lock);

        for (;;) {
                urb = usb_get_from_anchor(&portdata->delayed);
                if (!urb)
                        break;
                kfree(urb->transfer_buffer);
                usb_free_urb(urb);
                usb_autopm_put_interface_async(serial->interface);
                spin_lock_irq(&portdata->lock);
                portdata->outstanding_urbs--;
                spin_unlock_irq(&portdata->lock);
        }

        sierra_stop_rx_urbs(port);
        usb_kill_anchored_urbs(&portdata->active);

        for (i = 0; i < portdata->num_in_urbs; i++) {
                sierra_release_urb(portdata->in_urbs[i]);
                portdata->in_urbs[i] = NULL;
        }

        usb_autopm_get_interface_no_resume(serial->interface);
}

static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct sierra_port_private *portdata;
        struct usb_serial *serial = port->serial;
        struct sierra_intf_private *intfdata = usb_get_serial_data(serial);
        int i;
        int err;
        int endpoint;
        struct urb *urb;

        portdata = usb_get_serial_port_data(port);

        endpoint = port->bulk_in_endpointAddress;
        for (i = 0; i < portdata->num_in_urbs; i++) {
                urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
                                        IN_BUFLEN, GFP_KERNEL,
                                        sierra_indat_callback);
                portdata->in_urbs[i] = urb;
        }
        /* clear halt condition */
        usb_clear_halt(serial->dev,
                        usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);

        err = sierra_submit_rx_urbs(port, GFP_KERNEL);
        if (err)
                goto err_submit;

        spin_lock_irq(&intfdata->susp_lock);
        if (++intfdata->open_ports == 1)
                serial->interface->needs_remote_wakeup = 1;
        spin_unlock_irq(&intfdata->susp_lock);
        usb_autopm_put_interface(serial->interface);

        return 0;

err_submit:
        sierra_stop_rx_urbs(port);

        for (i = 0; i < portdata->num_in_urbs; i++) {
                sierra_release_urb(portdata->in_urbs[i]);
                portdata->in_urbs[i] = NULL;
        }

        return err;
}


static void sierra_dtr_rts(struct usb_serial_port *port, int on)
{
        struct sierra_port_private *portdata;

        portdata = usb_get_serial_port_data(port);
        portdata->rts_state = on;
        portdata->dtr_state = on;

        sierra_send_setup(port);
}

static int sierra_startup(struct usb_serial *serial)
{
        struct sierra_intf_private *intfdata;

        intfdata = kzalloc(sizeof(*intfdata), GFP_KERNEL);
        if (!intfdata)
                return -ENOMEM;

        spin_lock_init(&intfdata->susp_lock);

        usb_set_serial_data(serial, intfdata);

        /* Set Device mode to D0 */
        sierra_set_power_state(serial->dev, 0x0000);

        /* Check NMEA and set */
        if (nmea)
                sierra_vsc_set_nmea(serial->dev, 1);

        return 0;
}

static void sierra_release(struct usb_serial *serial)
{
        struct sierra_intf_private *intfdata;

        intfdata = usb_get_serial_data(serial);
        kfree(intfdata);
}

static int sierra_port_probe(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct sierra_port_private *portdata;
        const struct sierra_iface_list *himemory_list;
        u8 ifnum;

        portdata = kzalloc(sizeof(*portdata), GFP_KERNEL);
        if (!portdata)
                return -ENOMEM;

        spin_lock_init(&portdata->lock);
        init_usb_anchor(&portdata->active);
        init_usb_anchor(&portdata->delayed);

        /* Assume low memory requirements */
        portdata->num_out_urbs = N_OUT_URB;
        portdata->num_in_urbs  = N_IN_URB;

        /* Determine actual memory requirements */
        if (serial->num_ports == 1) {
                /* Get interface number for composite device */
                ifnum = sierra_interface_num(serial);
                himemory_list = &typeB_interface_list;
        } else {
                /* This is really the usb-serial port number of the interface
                 * rather than the interface number.
                 */
                ifnum = port->port_number;
                himemory_list = &typeA_interface_list;
        }

        if (is_listed(ifnum, himemory_list)) {
                portdata->num_out_urbs = N_OUT_URB_HM;
                portdata->num_in_urbs  = N_IN_URB_HM;
        }

        dev_dbg(&port->dev,
                        "Memory usage (urbs) interface #%d, in=%d, out=%d\n",
                        ifnum, portdata->num_in_urbs, portdata->num_out_urbs);

        usb_set_serial_port_data(port, portdata);

        return 0;
}

static void sierra_port_remove(struct usb_serial_port *port)
{
        struct sierra_port_private *portdata;

        portdata = usb_get_serial_port_data(port);
        usb_set_serial_port_data(port, NULL);
        kfree(portdata);
}

#ifdef CONFIG_PM
static void stop_read_write_urbs(struct usb_serial *serial)
{
        int i;
        struct usb_serial_port *port;
        struct sierra_port_private *portdata;

        /* Stop reading/writing urbs */
        for (i = 0; i < serial->num_ports; ++i) {
                port = serial->port[i];
                portdata = usb_get_serial_port_data(port);
                if (!portdata)
                        continue;
                sierra_stop_rx_urbs(port);
                usb_kill_anchored_urbs(&portdata->active);
        }
}

static int sierra_suspend(struct usb_serial *serial, pm_message_t message)
{
        struct sierra_intf_private *intfdata = usb_get_serial_data(serial);

        spin_lock_irq(&intfdata->susp_lock);
        if (PMSG_IS_AUTO(message)) {
                if (intfdata->in_flight) {
                        spin_unlock_irq(&intfdata->susp_lock);
                        return -EBUSY;
                }
        }
        intfdata->suspended = 1;
        spin_unlock_irq(&intfdata->susp_lock);

        stop_read_write_urbs(serial);

        return 0;
}

/* Caller must hold susp_lock. */
static int sierra_submit_delayed_urbs(struct usb_serial_port *port)
{
        struct sierra_port_private *portdata = usb_get_serial_port_data(port);
        struct sierra_intf_private *intfdata;
        struct urb *urb;
        int ec = 0;
        int err;

        intfdata = usb_get_serial_data(port->serial);

        for (;;) {
                urb = usb_get_from_anchor(&portdata->delayed);
                if (!urb)
                        break;

                usb_anchor_urb(urb, &portdata->active);
                intfdata->in_flight++;
                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err) {
                        dev_err(&port->dev, "%s - submit urb failed: %d",
                                        __func__, err);
                        ec++;
                        intfdata->in_flight--;
                        usb_unanchor_urb(urb);
                        kfree(urb->transfer_buffer);
                        usb_free_urb(urb);

                        spin_lock(&portdata->lock);
                        portdata->outstanding_urbs--;
                        spin_unlock(&portdata->lock);
                }
        }

        if (ec)
                return -EIO;

        return 0;
}

static int sierra_resume(struct usb_serial *serial)
{
        struct usb_serial_port *port;
        struct sierra_intf_private *intfdata = usb_get_serial_data(serial);
        int ec = 0;
        int i, err;

        spin_lock_irq(&intfdata->susp_lock);
        for (i = 0; i < serial->num_ports; i++) {
                port = serial->port[i];

                if (!tty_port_initialized(&port->port))
                        continue;

                err = sierra_submit_delayed_urbs(port);
                if (err)
                        ec++;

                err = sierra_submit_rx_urbs(port, GFP_ATOMIC);
                if (err)
                        ec++;
        }
        intfdata->suspended = 0;
        spin_unlock_irq(&intfdata->susp_lock);

        return ec ? -EIO : 0;
}

#else
#define sierra_suspend NULL
#define sierra_resume NULL
#endif

static struct usb_serial_driver sierra_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "sierra",
        },
        .description       = "Sierra USB modem",
        .id_table          = id_table,
        .calc_num_ports           = sierra_calc_num_ports,
        .probe                   = sierra_probe,
        .open              = sierra_open,
        .close             = sierra_close,
        .dtr_rts           = sierra_dtr_rts,
        .write             = sierra_write,
        .write_room        = sierra_write_room,
        .chars_in_buffer   = sierra_chars_in_buffer,
        .tiocmget          = sierra_tiocmget,
        .tiocmset          = sierra_tiocmset,
        .attach            = sierra_startup,
        .release           = sierra_release,
        .port_probe        = sierra_port_probe,
        .port_remove       = sierra_port_remove,
        .suspend           = sierra_suspend,
        .resume                   = sierra_resume,
        .read_int_callback = sierra_instat_callback,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &sierra_device, NULL
};

module_usb_serial_driver(serial_drivers, id_table);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL v2");

module_param(nmea, bool, 0644);
MODULE_PARM_DESC(nmea, "NMEA streaming");



































































































  214 
















   58 





   58 






   53 
   53 
   53 


   53 



   52 




  207 





  206 



  205 


  204 






































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// SPDX-License-Identifier: GPL-2.0
/*
 * devtmpfs - kernel-maintained tmpfs-based /dev
 *
 * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org>
 *
 * During bootup, before any driver core device is registered,
 * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
 * device which requests a device node, will add a node in this
 * filesystem.
 * By default, all devices are named after the name of the device,
 * owned by root and have a default mode of 0600. Subsystems can
 * overwrite the default setting if needed.
 */

#define pr_fmt(fmt) "devtmpfs: " fmt

#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/device.h>
#include <linux/blkdev.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include <linux/shmem_fs.h>
#include <linux/ramfs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/init_syscalls.h>
#include <uapi/linux/mount.h>
#include "base.h"

#ifdef CONFIG_DEVTMPFS_SAFE
#define DEVTMPFS_MFLAGS       (MS_SILENT | MS_NOEXEC | MS_NOSUID)
#else
#define DEVTMPFS_MFLAGS       (MS_SILENT)
#endif

static struct task_struct *thread;

static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT);

static DEFINE_SPINLOCK(req_lock);

static struct req {
        struct req *next;
        struct completion done;
        int err;
        const char *name;
        umode_t mode;        /* 0 => delete */
        kuid_t uid;
        kgid_t gid;
        struct device *dev;
} *requests;

static int __init mount_param(char *str)
{
        mount_dev = simple_strtoul(str, NULL, 0);
        return 1;
}
__setup("devtmpfs.mount=", mount_param);

static struct vfsmount *mnt;

static struct dentry *public_dev_mount(struct file_system_type *fs_type, int flags,
                      const char *dev_name, void *data)
{
        struct super_block *s = mnt->mnt_sb;
        int err;

        atomic_inc(&s->s_active);
        down_write(&s->s_umount);
        err = reconfigure_single(s, flags, data);
        if (err < 0) {
                deactivate_locked_super(s);
                return ERR_PTR(err);
        }
        return dget(s->s_root);
}

static struct file_system_type internal_fs_type = {
        .name = "devtmpfs",
#ifdef CONFIG_TMPFS
        .init_fs_context = shmem_init_fs_context,
#else
        .init_fs_context = ramfs_init_fs_context,
#endif
        .kill_sb = kill_litter_super,
};

static struct file_system_type dev_fs_type = {
        .name = "devtmpfs",
        .mount = public_dev_mount,
};

static int devtmpfs_submit_req(struct req *req, const char *tmp)
{
        init_completion(&req->done);

        spin_lock(&req_lock);
        req->next = requests;
        requests = req;
        spin_unlock(&req_lock);

        wake_up_process(thread);
        wait_for_completion(&req->done);

        kfree(tmp);

        return req->err;
}

int devtmpfs_create_node(struct device *dev)
{
        const char *tmp = NULL;
        struct req req;

        if (!thread)
                return 0;

        req.mode = 0;
        req.uid = GLOBAL_ROOT_UID;
        req.gid = GLOBAL_ROOT_GID;
        req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp);
        if (!req.name)
                return -ENOMEM;

        if (req.mode == 0)
                req.mode = 0600;
        if (is_blockdev(dev))
                req.mode |= S_IFBLK;
        else
                req.mode |= S_IFCHR;

        req.dev = dev;

        return devtmpfs_submit_req(&req, tmp);
}

int devtmpfs_delete_node(struct device *dev)
{
        const char *tmp = NULL;
        struct req req;

        if (!thread)
                return 0;

        req.name = device_get_devnode(dev, NULL, NULL, NULL, &tmp);
        if (!req.name)
                return -ENOMEM;

        req.mode = 0;
        req.dev = dev;

        return devtmpfs_submit_req(&req, tmp);
}

static int dev_mkdir(const char *name, umode_t mode)
{
        struct dentry *dentry;
        struct path path;
        int err;

        dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);

        err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
        if (!err)
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
        done_path_create(&path, dentry);
        return err;
}

static int create_path(const char *nodepath)
{
        char *path;
        char *s;
        int err = 0;

        /* parent directories do not exist, create them */
        path = kstrdup(nodepath, GFP_KERNEL);
        if (!path)
                return -ENOMEM;

        s = path;
        for (;;) {
                s = strchr(s, '/');
                if (!s)
                        break;
                s[0] = '\0';
                err = dev_mkdir(path, 0755);
                if (err && err != -EEXIST)
                        break;
                s[0] = '/';
                s++;
        }
        kfree(path);
        return err;
}

static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                         kgid_t gid, struct device *dev)
{
        struct dentry *dentry;
        struct path path;
        int err;

        dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
        if (dentry == ERR_PTR(-ENOENT)) {
                create_path(nodename);
                dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
        }
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);

        err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode,
                        dev->devt);
        if (!err) {
                struct iattr newattrs;

                newattrs.ia_mode = mode;
                newattrs.ia_uid = uid;
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
                inode_lock(d_inode(dentry));
                notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
                inode_unlock(d_inode(dentry));

                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
        }
        done_path_create(&path, dentry);
        return err;
}

static int dev_rmdir(const char *name)
{
        struct path parent;
        struct dentry *dentry;
        int err;

        dentry = kern_path_locked(name, &parent);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
        if (d_really_is_positive(dentry)) {
                if (d_inode(dentry)->i_private == &thread)
                        err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
                                        dentry);
                else
                        err = -EPERM;
        } else {
                err = -ENOENT;
        }
        dput(dentry);
        inode_unlock(d_inode(parent.dentry));
        path_put(&parent);
        return err;
}

static int delete_path(const char *nodepath)
{
        char *path;
        int err = 0;

        path = kstrdup(nodepath, GFP_KERNEL);
        if (!path)
                return -ENOMEM;

        for (;;) {
                char *base;

                base = strrchr(path, '/');
                if (!base)
                        break;
                base[0] = '\0';
                err = dev_rmdir(path);
                if (err)
                        break;
        }

        kfree(path);
        return err;
}

static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
{
        /* did we create it */
        if (inode->i_private != &thread)
                return 0;

        /* does the dev_t match */
        if (is_blockdev(dev)) {
                if (!S_ISBLK(stat->mode))
                        return 0;
        } else {
                if (!S_ISCHR(stat->mode))
                        return 0;
        }
        if (stat->rdev != dev->devt)
                return 0;

        /* ours */
        return 1;
}

static int handle_remove(const char *nodename, struct device *dev)
{
        struct path parent;
        struct dentry *dentry;
        int deleted = 0;
        int err;

        dentry = kern_path_locked(nodename, &parent);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);

        if (d_really_is_positive(dentry)) {
                struct kstat stat;
                struct path p = {.mnt = parent.mnt, .dentry = dentry};
                err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
                                  AT_STATX_SYNC_AS_STAT);
                if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
                        struct iattr newattrs;
                        /*
                         * before unlinking this node, reset permissions
                         * of possible references like hardlinks
                         */
                        newattrs.ia_uid = GLOBAL_ROOT_UID;
                        newattrs.ia_gid = GLOBAL_ROOT_GID;
                        newattrs.ia_mode = stat.mode & ~0777;
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
                        inode_lock(d_inode(dentry));
                        notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
                        inode_unlock(d_inode(dentry));
                        err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
                                         dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
                }
        } else {
                err = -ENOENT;
        }
        dput(dentry);
        inode_unlock(d_inode(parent.dentry));

        path_put(&parent);
        if (deleted && strchr(nodename, '/'))
                delete_path(nodename);
        return err;
}

/*
 * If configured, or requested by the commandline, devtmpfs will be
 * auto-mounted after the kernel mounted the root filesystem.
 */
int __init devtmpfs_mount(void)
{
        int err;

        if (!mount_dev)
                return 0;

        if (!thread)
                return 0;

        err = init_mount("devtmpfs", "dev", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
        if (err)
                pr_info("error mounting %d\n", err);
        else
                pr_info("mounted\n");
        return err;
}

static __initdata DECLARE_COMPLETION(setup_done);

static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid,
                  struct device *dev)
{
        if (mode)
                return handle_create(name, mode, uid, gid, dev);
        else
                return handle_remove(name, dev);
}

static void __noreturn devtmpfs_work_loop(void)
{
        while (1) {
                spin_lock(&req_lock);
                while (requests) {
                        struct req *req = requests;
                        requests = NULL;
                        spin_unlock(&req_lock);
                        while (req) {
                                struct req *next = req->next;
                                req->err = handle(req->name, req->mode,
                                                  req->uid, req->gid, req->dev);
                                complete(&req->done);
                                req = next;
                        }
                        spin_lock(&req_lock);
                }
                __set_current_state(TASK_INTERRUPTIBLE);
                spin_unlock(&req_lock);
                schedule();
        }
}

static noinline int __init devtmpfs_setup(void *p)
{
        int err;

        err = ksys_unshare(CLONE_NEWNS);
        if (err)
                goto out;
        err = init_mount("devtmpfs", "/", "devtmpfs", DEVTMPFS_MFLAGS, NULL);
        if (err)
                goto out;
        init_chdir("/.."); /* will traverse into overmounted root */
        init_chroot(".");
out:
        *(int *)p = err;
        return err;
}

/*
 * The __ref is because devtmpfs_setup needs to be __init for the routines it
 * calls.  That call is done while devtmpfs_init, which is marked __init,
 * synchronously waits for it to complete.
 */
static int __ref devtmpfsd(void *p)
{
        int err = devtmpfs_setup(p);

        complete(&setup_done);
        if (err)
                return err;
        devtmpfs_work_loop();
        return 0;
}

/*
 * Create devtmpfs instance, driver-core devices will add their device
 * nodes here.
 */
int __init devtmpfs_init(void)
{
        char opts[] = "mode=0755";
        int err;

        mnt = vfs_kern_mount(&internal_fs_type, 0, "devtmpfs", opts);
        if (IS_ERR(mnt)) {
                pr_err("unable to create devtmpfs %ld\n", PTR_ERR(mnt));
                return PTR_ERR(mnt);
        }
        err = register_filesystem(&dev_fs_type);
        if (err) {
                pr_err("unable to register devtmpfs type %d\n", err);
                return err;
        }

        thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
        if (!IS_ERR(thread)) {
                wait_for_completion(&setup_done);
        } else {
                err = PTR_ERR(thread);
                thread = NULL;
        }

        if (err) {
                pr_err("unable to create devtmpfs %d\n", err);
                unregister_filesystem(&dev_fs_type);
                thread = NULL;
                return err;
        }

        pr_info("initialized\n");
        return 0;
}
























































































































































































































































































































































    2 






    2 


    2 


    2 


    2 


    2 






    2 















    2 























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
// SPDX-License-Identifier: GPL-2.0-only
/*
 * drivers/mfd/mfd-core.c
 *
 * core MFD support
 * Copyright (c) 2006 Ian Molton
 * Copyright (c) 2007,2008 Dmitry Baryshkov
 */

#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/list.h>
#include <linux/property.h>
#include <linux/mfd/core.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/regulator/consumer.h>

static LIST_HEAD(mfd_of_node_list);

struct mfd_of_node_entry {
        struct list_head list;
        struct device *dev;
        struct device_node *np;
};

static const struct device_type mfd_dev_type = {
        .name        = "mfd_device",
};

#if IS_ENABLED(CONFIG_ACPI)
struct match_ids_walk_data {
        struct acpi_device_id *ids;
        struct acpi_device *adev;
};

static int match_device_ids(struct acpi_device *adev, void *data)
{
        struct match_ids_walk_data *wd = data;

        if (!acpi_match_device_ids(adev, wd->ids)) {
                wd->adev = adev;
                return 1;
        }

        return 0;
}

static void mfd_acpi_add_device(const struct mfd_cell *cell,
                                struct platform_device *pdev)
{
        const struct mfd_cell_acpi_match *match = cell->acpi_match;
        struct acpi_device *adev = NULL;
        struct acpi_device *parent;

        parent = ACPI_COMPANION(pdev->dev.parent);
        if (!parent)
                return;

        /*
         * MFD child device gets its ACPI handle either from the ACPI device
         * directly under the parent that matches the either _HID or _CID, or
         * _ADR or it will use the parent handle if is no ID is given.
         *
         * Note that use of _ADR is a grey area in the ACPI specification,
         * though at least Intel Galileo Gen 2 is using it to distinguish
         * the children devices.
         */
        if (match) {
                if (match->pnpid) {
                        struct acpi_device_id ids[2] = {};
                        struct match_ids_walk_data wd = {
                                .adev = NULL,
                                .ids = ids,
                        };

                        strscpy(ids[0].id, match->pnpid, sizeof(ids[0].id));
                        acpi_dev_for_each_child(parent, match_device_ids, &wd);
                        adev = wd.adev;
                } else {
                        adev = acpi_find_child_device(parent, match->adr, false);
                }
        }

        ACPI_COMPANION_SET(&pdev->dev, adev ?: parent);
}
#else
static inline void mfd_acpi_add_device(const struct mfd_cell *cell,
                                       struct platform_device *pdev)
{
}
#endif

static int mfd_match_of_node_to_dev(struct platform_device *pdev,
                                    struct device_node *np,
                                    const struct mfd_cell *cell)
{
#if IS_ENABLED(CONFIG_OF)
        struct mfd_of_node_entry *of_entry;
        u64 of_node_addr;

        /* Skip if OF node has previously been allocated to a device */
        list_for_each_entry(of_entry, &mfd_of_node_list, list)
                if (of_entry->np == np)
                        return -EAGAIN;

        if (!cell->use_of_reg)
                /* No of_reg defined - allocate first free compatible match */
                goto allocate_of_node;

        /* We only care about each node's first defined address */
        if (of_property_read_reg(np, 0, &of_node_addr, NULL))
                /* OF node does not contatin a 'reg' property to match to */
                return -EAGAIN;

        if (cell->of_reg != of_node_addr)
                /* No match */
                return -EAGAIN;

allocate_of_node:
        of_entry = kzalloc(sizeof(*of_entry), GFP_KERNEL);
        if (!of_entry)
                return -ENOMEM;

        of_entry->dev = &pdev->dev;
        of_entry->np = np;
        list_add_tail(&of_entry->list, &mfd_of_node_list);

        pdev->dev.of_node = np;
        pdev->dev.fwnode = &np->fwnode;
#endif
        return 0;
}

static int mfd_add_device(struct device *parent, int id,
                          const struct mfd_cell *cell,
                          struct resource *mem_base,
                          int irq_base, struct irq_domain *domain)
{
        struct resource *res;
        struct platform_device *pdev;
        struct device_node *np = NULL;
        struct mfd_of_node_entry *of_entry, *tmp;
        bool disabled = false;
        int ret = -ENOMEM;
        int platform_id;
        int r;

        if (id == PLATFORM_DEVID_AUTO)
                platform_id = id;
        else
                platform_id = id + cell->id;

        pdev = platform_device_alloc(cell->name, platform_id);
        if (!pdev)
                goto fail_alloc;

        pdev->mfd_cell = kmemdup(cell, sizeof(*cell), GFP_KERNEL);
        if (!pdev->mfd_cell)
                goto fail_device;

        res = kcalloc(cell->num_resources, sizeof(*res), GFP_KERNEL);
        if (!res)
                goto fail_device;

        pdev->dev.parent = parent;
        pdev->dev.type = &mfd_dev_type;
        pdev->dev.dma_mask = parent->dma_mask;
        pdev->dev.dma_parms = parent->dma_parms;
        pdev->dev.coherent_dma_mask = parent->coherent_dma_mask;

        ret = regulator_bulk_register_supply_alias(
                        &pdev->dev, cell->parent_supplies,
                        parent, cell->parent_supplies,
                        cell->num_parent_supplies);
        if (ret < 0)
                goto fail_res;

        if (IS_ENABLED(CONFIG_OF) && parent->of_node && cell->of_compatible) {
                for_each_child_of_node(parent->of_node, np) {
                        if (of_device_is_compatible(np, cell->of_compatible)) {
                                /* Skip 'disabled' devices */
                                if (!of_device_is_available(np)) {
                                        disabled = true;
                                        continue;
                                }

                                ret = mfd_match_of_node_to_dev(pdev, np, cell);
                                if (ret == -EAGAIN)
                                        continue;
                                of_node_put(np);
                                if (ret)
                                        goto fail_alias;

                                goto match;
                        }
                }

                if (disabled) {
                        /* Ignore 'disabled' devices error free */
                        ret = 0;
                        goto fail_alias;
                }

match:
                if (!pdev->dev.of_node)
                        pr_warn("%s: Failed to locate of_node [id: %d]\n",
                                cell->name, platform_id);
        }

        mfd_acpi_add_device(cell, pdev);

        if (cell->pdata_size) {
                ret = platform_device_add_data(pdev,
                                        cell->platform_data, cell->pdata_size);
                if (ret)
                        goto fail_of_entry;
        }

        if (cell->swnode) {
                ret = device_add_software_node(&pdev->dev, cell->swnode);
                if (ret)
                        goto fail_of_entry;
        }

        for (r = 0; r < cell->num_resources; r++) {
                res[r].name = cell->resources[r].name;
                res[r].flags = cell->resources[r].flags;

                /* Find out base to use */
                if ((cell->resources[r].flags & IORESOURCE_MEM) && mem_base) {
                        res[r].parent = mem_base;
                        res[r].start = mem_base->start +
                                cell->resources[r].start;
                        res[r].end = mem_base->start +
                                cell->resources[r].end;
                } else if (cell->resources[r].flags & IORESOURCE_IRQ) {
                        if (domain) {
                                /* Unable to create mappings for IRQ ranges. */
                                WARN_ON(cell->resources[r].start !=
                                        cell->resources[r].end);
                                res[r].start = res[r].end = irq_create_mapping(
                                        domain, cell->resources[r].start);
                        } else {
                                res[r].start = irq_base +
                                        cell->resources[r].start;
                                res[r].end   = irq_base +
                                        cell->resources[r].end;
                        }
                } else {
                        res[r].parent = cell->resources[r].parent;
                        res[r].start = cell->resources[r].start;
                        res[r].end   = cell->resources[r].end;
                }

                if (!cell->ignore_resource_conflicts) {
                        if (has_acpi_companion(&pdev->dev)) {
                                ret = acpi_check_resource_conflict(&res[r]);
                                if (ret)
                                        goto fail_res_conflict;
                        }
                }
        }

        ret = platform_device_add_resources(pdev, res, cell->num_resources);
        if (ret)
                goto fail_res_conflict;

        ret = platform_device_add(pdev);
        if (ret)
                goto fail_res_conflict;

        if (cell->pm_runtime_no_callbacks)
                pm_runtime_no_callbacks(&pdev->dev);

        kfree(res);

        return 0;

fail_res_conflict:
        if (cell->swnode)
                device_remove_software_node(&pdev->dev);
fail_of_entry:
        list_for_each_entry_safe(of_entry, tmp, &mfd_of_node_list, list)
                if (of_entry->dev == &pdev->dev) {
                        list_del(&of_entry->list);
                        kfree(of_entry);
                }
fail_alias:
        regulator_bulk_unregister_supply_alias(&pdev->dev,
                                               cell->parent_supplies,
                                               cell->num_parent_supplies);
fail_res:
        kfree(res);
fail_device:
        platform_device_put(pdev);
fail_alloc:
        return ret;
}

/**
 * mfd_add_devices - register child devices
 *
 * @parent:        Pointer to parent device.
 * @id:                Can be PLATFORM_DEVID_AUTO to let the Platform API take care
 *                of device numbering, or will be added to a device's cell_id.
 * @cells:        Array of (struct mfd_cell)s describing child devices.
 * @n_devs:        Number of child devices to register.
 * @mem_base:        Parent register range resource for child devices.
 * @irq_base:        Base of the range of virtual interrupt numbers allocated for
 *                this MFD device. Unused if @domain is specified.
 * @domain:        Interrupt domain to create mappings for hardware interrupts.
 */
int mfd_add_devices(struct device *parent, int id,
                    const struct mfd_cell *cells, int n_devs,
                    struct resource *mem_base,
                    int irq_base, struct irq_domain *domain)
{
        int i;
        int ret;

        for (i = 0; i < n_devs; i++) {
                ret = mfd_add_device(parent, id, cells + i, mem_base,
                                     irq_base, domain);
                if (ret)
                        goto fail;
        }

        return 0;

fail:
        if (i)
                mfd_remove_devices(parent);

        return ret;
}
EXPORT_SYMBOL(mfd_add_devices);

static int mfd_remove_devices_fn(struct device *dev, void *data)
{
        struct platform_device *pdev;
        const struct mfd_cell *cell;
        struct mfd_of_node_entry *of_entry, *tmp;
        int *level = data;

        if (dev->type != &mfd_dev_type)
                return 0;

        pdev = to_platform_device(dev);
        cell = mfd_get_cell(pdev);

        if (level && cell->level > *level)
                return 0;

        if (cell->swnode)
                device_remove_software_node(&pdev->dev);

        list_for_each_entry_safe(of_entry, tmp, &mfd_of_node_list, list)
                if (of_entry->dev == &pdev->dev) {
                        list_del(&of_entry->list);
                        kfree(of_entry);
                }

        regulator_bulk_unregister_supply_alias(dev, cell->parent_supplies,
                                               cell->num_parent_supplies);

        platform_device_unregister(pdev);
        return 0;
}

void mfd_remove_devices_late(struct device *parent)
{
        int level = MFD_DEP_LEVEL_HIGH;

        device_for_each_child_reverse(parent, &level, mfd_remove_devices_fn);
}
EXPORT_SYMBOL(mfd_remove_devices_late);

void mfd_remove_devices(struct device *parent)
{
        int level = MFD_DEP_LEVEL_NORMAL;

        device_for_each_child_reverse(parent, &level, mfd_remove_devices_fn);
}
EXPORT_SYMBOL(mfd_remove_devices);

static void devm_mfd_dev_release(struct device *dev, void *res)
{
        mfd_remove_devices(dev);
}

/**
 * devm_mfd_add_devices - Resource managed version of mfd_add_devices()
 *
 * Returns 0 on success or an appropriate negative error number on failure.
 * All child-devices of the MFD will automatically be removed when it gets
 * unbinded.
 *
 * @dev:        Pointer to parent device.
 * @id:                Can be PLATFORM_DEVID_AUTO to let the Platform API take care
 *                of device numbering, or will be added to a device's cell_id.
 * @cells:        Array of (struct mfd_cell)s describing child devices.
 * @n_devs:        Number of child devices to register.
 * @mem_base:        Parent register range resource for child devices.
 * @irq_base:        Base of the range of virtual interrupt numbers allocated for
 *                this MFD device. Unused if @domain is specified.
 * @domain:        Interrupt domain to create mappings for hardware interrupts.
 */
int devm_mfd_add_devices(struct device *dev, int id,
                         const struct mfd_cell *cells, int n_devs,
                         struct resource *mem_base,
                         int irq_base, struct irq_domain *domain)
{
        struct device **ptr;
        int ret;

        ptr = devres_alloc(devm_mfd_dev_release, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return -ENOMEM;

        ret = mfd_add_devices(dev, id, cells, n_devs, mem_base,
                              irq_base, domain);
        if (ret < 0) {
                devres_free(ptr);
                return ret;
        }

        *ptr = dev;
        devres_add(dev, ptr);

        return ret;
}
EXPORT_SYMBOL(devm_mfd_add_devices);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov");






































  236 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ipi

#if !defined(_TRACE_IPI_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_IPI_H

#include <linux/tracepoint.h>

/**
 * ipi_raise - called when a smp cross call is made
 *
 * @mask: mask of recipient CPUs for the IPI
 * @reason: string identifying the IPI purpose
 *
 * It is necessary for @reason to be a static string declared with
 * __tracepoint_string.
 */
TRACE_EVENT(ipi_raise,

        TP_PROTO(const struct cpumask *mask, const char *reason),

        TP_ARGS(mask, reason),

        TP_STRUCT__entry(
                __bitmask(target_cpus, nr_cpumask_bits)
                __field(const char *, reason)
        ),

        TP_fast_assign(
                __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits);
                __entry->reason = reason;
        ),

        TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason)
);

TRACE_EVENT(ipi_send_cpu,

        TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback),

        TP_ARGS(cpu, callsite, callback),

        TP_STRUCT__entry(
                __field(unsigned int, cpu)
                __field(void *, callsite)
                __field(void *, callback)
        ),

        TP_fast_assign(
                __entry->cpu = cpu;
                __entry->callsite = (void *)callsite;
                __entry->callback = callback;
        ),

        TP_printk("cpu=%u callsite=%pS callback=%pS",
                  __entry->cpu, __entry->callsite, __entry->callback)
);

TRACE_EVENT(ipi_send_cpumask,

        TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback),

        TP_ARGS(cpumask, callsite, callback),

        TP_STRUCT__entry(
                __cpumask(cpumask)
                __field(void *, callsite)
                __field(void *, callback)
        ),

        TP_fast_assign(
                __assign_cpumask(cpumask, cpumask_bits(cpumask));
                __entry->callsite = (void *)callsite;
                __entry->callback = callback;
        ),

        TP_printk("cpumask=%s callsite=%pS callback=%pS",
                  __get_cpumask(cpumask), __entry->callsite, __entry->callback)
);

DECLARE_EVENT_CLASS(ipi_handler,

        TP_PROTO(const char *reason),

        TP_ARGS(reason),

        TP_STRUCT__entry(
                __field(const char *, reason)
        ),

        TP_fast_assign(
                __entry->reason = reason;
        ),

        TP_printk("(%s)", __entry->reason)
);

/**
 * ipi_entry - called immediately before the IPI handler
 *
 * @reason: string identifying the IPI purpose
 *
 * It is necessary for @reason to be a static string declared with
 * __tracepoint_string, ideally the same as used with trace_ipi_raise
 * for that IPI.
 */
DEFINE_EVENT(ipi_handler, ipi_entry,

        TP_PROTO(const char *reason),

        TP_ARGS(reason)
);

/**
 * ipi_exit - called immediately after the IPI handler returns
 *
 * @reason: string identifying the IPI purpose
 *
 * It is necessary for @reason to be a static string declared with
 * __tracepoint_string, ideally the same as used with trace_ipi_raise for
 * that IPI.
 */
DEFINE_EVENT(ipi_handler, ipi_exit,

        TP_PROTO(const char *reason),

        TP_ARGS(reason)
);

#endif /* _TRACE_IPI_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H

#include <asm/cpufeatures.h>

#ifdef CONFIG_64BIT
#define REG_IN "D"
#define REG_OUT "a"
#else
#define REG_IN "a"
#define REG_OUT "a"
#endif

static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
        unsigned int res;

        asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
                         : "="REG_OUT (res)
                         : REG_IN (w));

        return res;
}

static inline unsigned int __arch_hweight16(unsigned int w)
{
        return __arch_hweight32(w & 0xffff);
}

static inline unsigned int __arch_hweight8(unsigned int w)
{
        return __arch_hweight32(w & 0xff);
}

#ifdef CONFIG_X86_32
static inline unsigned long __arch_hweight64(__u64 w)
{
        return  __arch_hweight32((u32)w) +
                __arch_hweight32((u32)(w >> 32));
}
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
        unsigned long res;

        asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
                         : "="REG_OUT (res)
                         : REG_IN (w));

        return res;
}
#endif /* CONFIG_X86_32 */

#endif

























































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_DMA_MAPPING_H
#define _LINUX_DMA_MAPPING_H

#include <linux/cache.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/bug.h>
#include <linux/mem_encrypt.h>

/**
 * List of possible attributes associated with a DMA mapping. The semantics
 * of each attribute should be defined in Documentation/core-api/dma-attributes.rst.
 */

/*
 * DMA_ATTR_WEAK_ORDERING: Specifies that reads and writes to the mapping
 * may be weakly ordered, that is that reads and writes may pass each other.
 */
#define DMA_ATTR_WEAK_ORDERING                (1UL << 1)
/*
 * DMA_ATTR_WRITE_COMBINE: Specifies that writes to the mapping may be
 * buffered to improve performance.
 */
#define DMA_ATTR_WRITE_COMBINE                (1UL << 2)
/*
 * DMA_ATTR_NO_KERNEL_MAPPING: Lets the platform to avoid creating a kernel
 * virtual mapping for the allocated buffer.
 */
#define DMA_ATTR_NO_KERNEL_MAPPING        (1UL << 4)
/*
 * DMA_ATTR_SKIP_CPU_SYNC: Allows platform code to skip synchronization of
 * the CPU cache for the given buffer assuming that it has been already
 * transferred to 'device' domain.
 */
#define DMA_ATTR_SKIP_CPU_SYNC                (1UL << 5)
/*
 * DMA_ATTR_FORCE_CONTIGUOUS: Forces contiguous allocation of the buffer
 * in physical memory.
 */
#define DMA_ATTR_FORCE_CONTIGUOUS        (1UL << 6)
/*
 * DMA_ATTR_ALLOC_SINGLE_PAGES: This is a hint to the DMA-mapping subsystem
 * that it's probably not worth the time to try to allocate memory to in a way
 * that gives better TLB efficiency.
 */
#define DMA_ATTR_ALLOC_SINGLE_PAGES        (1UL << 7)
/*
 * DMA_ATTR_NO_WARN: This tells the DMA-mapping subsystem to suppress
 * allocation failure reports (similarly to __GFP_NOWARN).
 */
#define DMA_ATTR_NO_WARN        (1UL << 8)

/*
 * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
 * accessible at an elevated privilege level (and ideally inaccessible or
 * at least read-only at lesser-privileged levels).
 */
#define DMA_ATTR_PRIVILEGED                (1UL << 9)

/*
 * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
 * be given to a device to use as a DMA source or target.  It is specific to a
 * given device and there may be a translation between the CPU physical address
 * space and the bus address space.
 *
 * DMA_MAPPING_ERROR is the magic error code if a mapping failed.  It should not
 * be used directly in drivers, but checked for using dma_mapping_error()
 * instead.
 */
#define DMA_MAPPING_ERROR                (~(dma_addr_t)0)

#define DMA_BIT_MASK(n)        (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))

#ifdef CONFIG_DMA_API_DEBUG
void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
void debug_dma_map_single(struct device *dev, const void *addr,
                unsigned long len);
#else
static inline void debug_dma_mapping_error(struct device *dev,
                dma_addr_t dma_addr)
{
}
static inline void debug_dma_map_single(struct device *dev, const void *addr,
                unsigned long len)
{
}
#endif /* CONFIG_DMA_API_DEBUG */

#ifdef CONFIG_HAS_DMA
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
        debug_dma_mapping_error(dev, dma_addr);

        if (unlikely(dma_addr == DMA_MAPPING_ERROR))
                return -ENOMEM;
        return 0;
}

dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
                size_t offset, size_t size, enum dma_data_direction dir,
                unsigned long attrs);
void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
                enum dma_data_direction dir, unsigned long attrs);
unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
                int nents, enum dma_data_direction dir, unsigned long attrs);
void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
                                      int nents, enum dma_data_direction dir,
                                      unsigned long attrs);
int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
                enum dma_data_direction dir, unsigned long attrs);
dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
                size_t size, enum dma_data_direction dir, unsigned long attrs);
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
                enum dma_data_direction dir, unsigned long attrs);
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
                enum dma_data_direction dir);
void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
                size_t size, enum dma_data_direction dir);
void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                    int nelems, enum dma_data_direction dir);
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                       int nelems, enum dma_data_direction dir);
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
                gfp_t flag, unsigned long attrs);
void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
                dma_addr_t dma_handle, unsigned long attrs);
void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
                gfp_t gfp, unsigned long attrs);
void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
                dma_addr_t dma_handle);
int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
                void *cpu_addr, dma_addr_t dma_addr, size_t size,
                unsigned long attrs);
int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
                void *cpu_addr, dma_addr_t dma_addr, size_t size,
                unsigned long attrs);
bool dma_can_mmap(struct device *dev);
bool dma_pci_p2pdma_supported(struct device *dev);
int dma_set_mask(struct device *dev, u64 mask);
int dma_set_coherent_mask(struct device *dev, u64 mask);
u64 dma_get_required_mask(struct device *dev);
bool dma_addressing_limited(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
size_t dma_opt_mapping_size(struct device *dev);
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
unsigned long dma_get_merge_boundary(struct device *dev);
struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
                enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
void dma_free_noncontiguous(struct device *dev, size_t size,
                struct sg_table *sgt, enum dma_data_direction dir);
void *dma_vmap_noncontiguous(struct device *dev, size_t size,
                struct sg_table *sgt);
void dma_vunmap_noncontiguous(struct device *dev, void *vaddr);
int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
                size_t size, struct sg_table *sgt);
#else /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_page_attrs(struct device *dev,
                struct page *page, size_t offset, size_t size,
                enum dma_data_direction dir, unsigned long attrs)
{
        return DMA_MAPPING_ERROR;
}
static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
{
}
static inline unsigned int dma_map_sg_attrs(struct device *dev,
                struct scatterlist *sg, int nents, enum dma_data_direction dir,
                unsigned long attrs)
{
        return 0;
}
static inline void dma_unmap_sg_attrs(struct device *dev,
                struct scatterlist *sg, int nents, enum dma_data_direction dir,
                unsigned long attrs)
{
}
static inline int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
                enum dma_data_direction dir, unsigned long attrs)
{
        return -EOPNOTSUPP;
}
static inline dma_addr_t dma_map_resource(struct device *dev,
                phys_addr_t phys_addr, size_t size, enum dma_data_direction dir,
                unsigned long attrs)
{
        return DMA_MAPPING_ERROR;
}
static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
{
}
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
                size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_single_for_device(struct device *dev,
                dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_cpu(struct device *dev,
                struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline void dma_sync_sg_for_device(struct device *dev,
                struct scatterlist *sg, int nelems, enum dma_data_direction dir)
{
}
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
        return -ENOMEM;
}
static inline void *dma_alloc_attrs(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
{
        return NULL;
}
static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
                dma_addr_t dma_handle, unsigned long attrs)
{
}
static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
        return NULL;
}
static inline void dmam_free_coherent(struct device *dev, size_t size,
                void *vaddr, dma_addr_t dma_handle)
{
}
static inline int dma_get_sgtable_attrs(struct device *dev,
                struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr,
                size_t size, unsigned long attrs)
{
        return -ENXIO;
}
static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
                void *cpu_addr, dma_addr_t dma_addr, size_t size,
                unsigned long attrs)
{
        return -ENXIO;
}
static inline bool dma_can_mmap(struct device *dev)
{
        return false;
}
static inline bool dma_pci_p2pdma_supported(struct device *dev)
{
        return false;
}
static inline int dma_set_mask(struct device *dev, u64 mask)
{
        return -EIO;
}
static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
{
        return -EIO;
}
static inline u64 dma_get_required_mask(struct device *dev)
{
        return 0;
}
static inline bool dma_addressing_limited(struct device *dev)
{
        return false;
}
static inline size_t dma_max_mapping_size(struct device *dev)
{
        return 0;
}
static inline size_t dma_opt_mapping_size(struct device *dev)
{
        return 0;
}
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
        return false;
}
static inline unsigned long dma_get_merge_boundary(struct device *dev)
{
        return 0;
}
static inline struct sg_table *dma_alloc_noncontiguous(struct device *dev,
                size_t size, enum dma_data_direction dir, gfp_t gfp,
                unsigned long attrs)
{
        return NULL;
}
static inline void dma_free_noncontiguous(struct device *dev, size_t size,
                struct sg_table *sgt, enum dma_data_direction dir)
{
}
static inline void *dma_vmap_noncontiguous(struct device *dev, size_t size,
                struct sg_table *sgt)
{
        return NULL;
}
static inline void dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
{
}
static inline int dma_mmap_noncontiguous(struct device *dev,
                struct vm_area_struct *vma, size_t size, struct sg_table *sgt)
{
        return -EINVAL;
}
#endif /* CONFIG_HAS_DMA */

struct page *dma_alloc_pages(struct device *dev, size_t size,
                dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
void dma_free_pages(struct device *dev, size_t size, struct page *page,
                dma_addr_t dma_handle, enum dma_data_direction dir);
int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
                size_t size, struct page *page);

static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
                dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
{
        struct page *page = dma_alloc_pages(dev, size, dma_handle, dir, gfp);
        return page ? page_address(page) : NULL;
}

static inline void dma_free_noncoherent(struct device *dev, size_t size,
                void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir)
{
        dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir);
}

static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
{
        /* DMA must never operate on areas that might be remapped. */
        if (dev_WARN_ONCE(dev, is_vmalloc_addr(ptr),
                          "rejecting DMA map of vmalloc memory\n"))
                return DMA_MAPPING_ERROR;
        debug_dma_map_single(dev, ptr, size);
        return dma_map_page_attrs(dev, virt_to_page(ptr), offset_in_page(ptr),
                        size, dir, attrs);
}

static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
{
        return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
}

static inline void dma_sync_single_range_for_cpu(struct device *dev,
                dma_addr_t addr, unsigned long offset, size_t size,
                enum dma_data_direction dir)
{
        return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
}

static inline void dma_sync_single_range_for_device(struct device *dev,
                dma_addr_t addr, unsigned long offset, size_t size,
                enum dma_data_direction dir)
{
        return dma_sync_single_for_device(dev, addr + offset, size, dir);
}

/**
 * dma_unmap_sgtable - Unmap the given buffer for DMA
 * @dev:        The device for which to perform the DMA operation
 * @sgt:        The sg_table object describing the buffer
 * @dir:        DMA direction
 * @attrs:        Optional DMA attributes for the unmap operation
 *
 * Unmaps a buffer described by a scatterlist stored in the given sg_table
 * object for the @dir DMA operation by the @dev device. After this function
 * the ownership of the buffer is transferred back to the CPU domain.
 */
static inline void dma_unmap_sgtable(struct device *dev, struct sg_table *sgt,
                enum dma_data_direction dir, unsigned long attrs)
{
        dma_unmap_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
}

/**
 * dma_sync_sgtable_for_cpu - Synchronize the given buffer for CPU access
 * @dev:        The device for which to perform the DMA operation
 * @sgt:        The sg_table object describing the buffer
 * @dir:        DMA direction
 *
 * Performs the needed cache synchronization and moves the ownership of the
 * buffer back to the CPU domain, so it is safe to perform any access to it
 * by the CPU. Before doing any further DMA operations, one has to transfer
 * the ownership of the buffer back to the DMA domain by calling the
 * dma_sync_sgtable_for_device().
 */
static inline void dma_sync_sgtable_for_cpu(struct device *dev,
                struct sg_table *sgt, enum dma_data_direction dir)
{
        dma_sync_sg_for_cpu(dev, sgt->sgl, sgt->orig_nents, dir);
}

/**
 * dma_sync_sgtable_for_device - Synchronize the given buffer for DMA
 * @dev:        The device for which to perform the DMA operation
 * @sgt:        The sg_table object describing the buffer
 * @dir:        DMA direction
 *
 * Performs the needed cache synchronization and moves the ownership of the
 * buffer back to the DMA domain, so it is safe to perform the DMA operation.
 * Once finished, one has to call dma_sync_sgtable_for_cpu() or
 * dma_unmap_sgtable().
 */
static inline void dma_sync_sgtable_for_device(struct device *dev,
                struct sg_table *sgt, enum dma_data_direction dir)
{
        dma_sync_sg_for_device(dev, sgt->sgl, sgt->orig_nents, dir);
}

#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)

bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);

static inline void *dma_alloc_coherent(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp)
{
        return dma_alloc_attrs(dev, size, dma_handle, gfp,
                        (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
}

static inline void dma_free_coherent(struct device *dev, size_t size,
                void *cpu_addr, dma_addr_t dma_handle)
{
        return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0);
}


static inline u64 dma_get_mask(struct device *dev)
{
        if (dev->dma_mask && *dev->dma_mask)
                return *dev->dma_mask;
        return DMA_BIT_MASK(32);
}

/*
 * Set both the DMA mask and the coherent DMA mask to the same thing.
 * Note that we don't check the return value from dma_set_coherent_mask()
 * as the DMA API guarantees that the coherent DMA mask can be set to
 * the same or smaller than the streaming DMA mask.
 */
static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask)
{
        int rc = dma_set_mask(dev, mask);
        if (rc == 0)
                dma_set_coherent_mask(dev, mask);
        return rc;
}

/*
 * Similar to the above, except it deals with the case where the device
 * does not have dev->dma_mask appropriately setup.
 */
static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
{
        dev->dma_mask = &dev->coherent_dma_mask;
        return dma_set_mask_and_coherent(dev, mask);
}

static inline unsigned int dma_get_max_seg_size(struct device *dev)
{
        if (dev->dma_parms && dev->dma_parms->max_segment_size)
                return dev->dma_parms->max_segment_size;
        return SZ_64K;
}

static inline int dma_set_max_seg_size(struct device *dev, unsigned int size)
{
        if (dev->dma_parms) {
                dev->dma_parms->max_segment_size = size;
                return 0;
        }
        return -EIO;
}

static inline unsigned long dma_get_seg_boundary(struct device *dev)
{
        if (dev->dma_parms && dev->dma_parms->segment_boundary_mask)
                return dev->dma_parms->segment_boundary_mask;
        return ULONG_MAX;
}

/**
 * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units
 * @dev: device to guery the boundary for
 * @page_shift: ilog() of the IOMMU page size
 *
 * Return the segment boundary in IOMMU page units (which may be different from
 * the CPU page size) for the passed in device.
 *
 * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for
 * non-DMA API callers.
 */
static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev,
                unsigned int page_shift)
{
        if (!dev)
                return (U32_MAX >> page_shift) + 1;
        return (dma_get_seg_boundary(dev) >> page_shift) + 1;
}

static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
{
        if (dev->dma_parms) {
                dev->dma_parms->segment_boundary_mask = mask;
                return 0;
        }
        return -EIO;
}

static inline unsigned int dma_get_min_align_mask(struct device *dev)
{
        if (dev->dma_parms)
                return dev->dma_parms->min_align_mask;
        return 0;
}

static inline int dma_set_min_align_mask(struct device *dev,
                unsigned int min_align_mask)
{
        if (WARN_ON_ONCE(!dev->dma_parms))
                return -EIO;
        dev->dma_parms->min_align_mask = min_align_mask;
        return 0;
}

#ifndef dma_get_cache_alignment
static inline int dma_get_cache_alignment(void)
{
#ifdef ARCH_HAS_DMA_MINALIGN
        return ARCH_DMA_MINALIGN;
#endif
        return 1;
}
#endif

static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
                dma_addr_t *dma_handle, gfp_t gfp)
{
        return dmam_alloc_attrs(dev, size, dma_handle, gfp,
                        (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
}

static inline void *dma_alloc_wc(struct device *dev, size_t size,
                                 dma_addr_t *dma_addr, gfp_t gfp)
{
        unsigned long attrs = DMA_ATTR_WRITE_COMBINE;

        if (gfp & __GFP_NOWARN)
                attrs |= DMA_ATTR_NO_WARN;

        return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs);
}

static inline void dma_free_wc(struct device *dev, size_t size,
                               void *cpu_addr, dma_addr_t dma_addr)
{
        return dma_free_attrs(dev, size, cpu_addr, dma_addr,
                              DMA_ATTR_WRITE_COMBINE);
}

static inline int dma_mmap_wc(struct device *dev,
                              struct vm_area_struct *vma,
                              void *cpu_addr, dma_addr_t dma_addr,
                              size_t size)
{
        return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size,
                              DMA_ATTR_WRITE_COMBINE);
}

#ifdef CONFIG_NEED_DMA_MAP_STATE
#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME
#define dma_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
#define dma_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
#else
#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)
#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)
#define dma_unmap_addr(PTR, ADDR_NAME)           (0)
#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
#define dma_unmap_len(PTR, LEN_NAME)             (0)
#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
#endif

#endif /* _LINUX_DMA_MAPPING_H */















































































































































    8 


    8 








    8 
    8 

    8 

    8 
    8 
    8 











    4 
    8 
    8 

    8 
    8 

    4 
    4 
    8 
    3 











    8 



    8 






























































































































































































































































































   11 




   11 






































































































































































































































































































































   11 























































































    8 



    8 
    8 













    8 











































































































   11 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net/core/dev_addr_lists.c - Functions for handling net device lists
 * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
 *
 * This file contains functions for working with unicast, multicast and device
 * addresses lists.
 */

#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/export.h>
#include <linux/list.h>

#include "dev.h"

/*
 * General list handling functions
 */

static int __hw_addr_insert(struct netdev_hw_addr_list *list,
                            struct netdev_hw_addr *new, int addr_len)
{
        struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL;
        struct netdev_hw_addr *ha;

        while (*ins_point) {
                int diff;

                ha = rb_entry(*ins_point, struct netdev_hw_addr, node);
                diff = memcmp(new->addr, ha->addr, addr_len);
                if (diff == 0)
                        diff = memcmp(&new->type, &ha->type, sizeof(new->type));

                parent = *ins_point;
                if (diff < 0)
                        ins_point = &parent->rb_left;
                else if (diff > 0)
                        ins_point = &parent->rb_right;
                else
                        return -EEXIST;
        }

        rb_link_node_rcu(&new->node, parent, ins_point);
        rb_insert_color(&new->node, &list->tree);

        return 0;
}

static struct netdev_hw_addr*
__hw_addr_create(const unsigned char *addr, int addr_len,
                 unsigned char addr_type, bool global, bool sync)
{
        struct netdev_hw_addr *ha;
        int alloc_size;

        alloc_size = sizeof(*ha);
        if (alloc_size < L1_CACHE_BYTES)
                alloc_size = L1_CACHE_BYTES;
        ha = kmalloc(alloc_size, GFP_ATOMIC);
        if (!ha)
                return NULL;
        memcpy(ha->addr, addr, addr_len);
        ha->type = addr_type;
        ha->refcount = 1;
        ha->global_use = global;
        ha->synced = sync ? 1 : 0;
        ha->sync_cnt = 0;

        return ha;
}

static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
                            const unsigned char *addr, int addr_len,
                            unsigned char addr_type, bool global, bool sync,
                            int sync_count, bool exclusive)
{
        struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL;
        struct netdev_hw_addr *ha;

        if (addr_len > MAX_ADDR_LEN)
                return -EINVAL;

        while (*ins_point) {
                int diff;

                ha = rb_entry(*ins_point, struct netdev_hw_addr, node);
                diff = memcmp(addr, ha->addr, addr_len);
                if (diff == 0)
                        diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));

                parent = *ins_point;
                if (diff < 0) {
                        ins_point = &parent->rb_left;
                } else if (diff > 0) {
                        ins_point = &parent->rb_right;
                } else {
                        if (exclusive)
                                return -EEXIST;
                        if (global) {
                                /* check if addr is already used as global */
                                if (ha->global_use)
                                        return 0;
                                else
                                        ha->global_use = true;
                        }
                        if (sync) {
                                if (ha->synced && sync_count)
                                        return -EEXIST;
                                else
                                        ha->synced++;
                        }
                        ha->refcount++;
                        return 0;
                }
        }

        ha = __hw_addr_create(addr, addr_len, addr_type, global, sync);
        if (!ha)
                return -ENOMEM;

        rb_link_node(&ha->node, parent, ins_point);
        rb_insert_color(&ha->node, &list->tree);

        list_add_tail_rcu(&ha->list, &list->list);
        list->count++;

        return 0;
}

static int __hw_addr_add(struct netdev_hw_addr_list *list,
                         const unsigned char *addr, int addr_len,
                         unsigned char addr_type)
{
        return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
                                0, false);
}

static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
                               struct netdev_hw_addr *ha, bool global,
                               bool sync)
{
        if (global && !ha->global_use)
                return -ENOENT;

        if (sync && !ha->synced)
                return -ENOENT;

        if (global)
                ha->global_use = false;

        if (sync)
                ha->synced--;

        if (--ha->refcount)
                return 0;

        rb_erase(&ha->node, &list->tree);

        list_del_rcu(&ha->list);
        kfree_rcu(ha, rcu_head);
        list->count--;
        return 0;
}

static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list,
                                               const unsigned char *addr, int addr_len,
                                               unsigned char addr_type)
{
        struct rb_node *node;

        node = list->tree.rb_node;

        while (node) {
                struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node);
                int diff = memcmp(addr, ha->addr, addr_len);

                if (diff == 0 && addr_type)
                        diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));

                if (diff < 0)
                        node = node->rb_left;
                else if (diff > 0)
                        node = node->rb_right;
                else
                        return ha;
        }

        return NULL;
}

static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
                            const unsigned char *addr, int addr_len,
                            unsigned char addr_type, bool global, bool sync)
{
        struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type);

        if (!ha)
                return -ENOENT;
        return __hw_addr_del_entry(list, ha, global, sync);
}

static int __hw_addr_del(struct netdev_hw_addr_list *list,
                         const unsigned char *addr, int addr_len,
                         unsigned char addr_type)
{
        return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
}

static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
                               struct netdev_hw_addr *ha,
                               int addr_len)
{
        int err;

        err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
                               false, true, ha->sync_cnt, false);
        if (err && err != -EEXIST)
                return err;

        if (!err) {
                ha->sync_cnt++;
                ha->refcount++;
        }

        return 0;
}

static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
                                 struct netdev_hw_addr_list *from_list,
                                 struct netdev_hw_addr *ha,
                                 int addr_len)
{
        int err;

        err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
                               false, true);
        if (err)
                return;
        ha->sync_cnt--;
        /* address on from list is not marked synced */
        __hw_addr_del_entry(from_list, ha, false, false);
}

static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
                                   struct netdev_hw_addr_list *from_list,
                                   int addr_len)
{
        int err = 0;
        struct netdev_hw_addr *ha, *tmp;

        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
                if (ha->sync_cnt == ha->refcount) {
                        __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
                } else {
                        err = __hw_addr_sync_one(to_list, ha, addr_len);
                        if (err)
                                break;
                }
        }
        return err;
}

/* This function only works where there is a strict 1-1 relationship
 * between source and destionation of they synch. If you ever need to
 * sync addresses to more then 1 destination, you need to use
 * __hw_addr_sync_multiple().
 */
int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
                   struct netdev_hw_addr_list *from_list,
                   int addr_len)
{
        int err = 0;
        struct netdev_hw_addr *ha, *tmp;

        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
                if (!ha->sync_cnt) {
                        err = __hw_addr_sync_one(to_list, ha, addr_len);
                        if (err)
                                break;
                } else if (ha->refcount == 1)
                        __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
        }
        return err;
}
EXPORT_SYMBOL(__hw_addr_sync);

void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
                      struct netdev_hw_addr_list *from_list,
                      int addr_len)
{
        struct netdev_hw_addr *ha, *tmp;

        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
                if (ha->sync_cnt)
                        __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
        }
}
EXPORT_SYMBOL(__hw_addr_unsync);

/**
 *  __hw_addr_sync_dev - Synchonize device's multicast list
 *  @list: address list to syncronize
 *  @dev:  device to sync
 *  @sync: function to call if address should be added
 *  @unsync: function to call if address should be removed
 *
 *  This function is intended to be called from the ndo_set_rx_mode
 *  function of devices that require explicit address add/remove
 *  notifications.  The unsync function may be NULL in which case
 *  the addresses requiring removal will simply be removed without
 *  any notification to the device.
 **/
int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
                       struct net_device *dev,
                       int (*sync)(struct net_device *, const unsigned char *),
                       int (*unsync)(struct net_device *,
                                     const unsigned char *))
{
        struct netdev_hw_addr *ha, *tmp;
        int err;

        /* first go through and flush out any stale entries */
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                if (!ha->sync_cnt || ha->refcount != 1)
                        continue;

                /* if unsync is defined and fails defer unsyncing address */
                if (unsync && unsync(dev, ha->addr))
                        continue;

                ha->sync_cnt--;
                __hw_addr_del_entry(list, ha, false, false);
        }

        /* go through and sync new entries to the list */
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                if (ha->sync_cnt)
                        continue;

                err = sync(dev, ha->addr);
                if (err)
                        return err;

                ha->sync_cnt++;
                ha->refcount++;
        }

        return 0;
}
EXPORT_SYMBOL(__hw_addr_sync_dev);

/**
 *  __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking
 *  into account references
 *  @list: address list to synchronize
 *  @dev:  device to sync
 *  @sync: function to call if address or reference on it should be added
 *  @unsync: function to call if address or some reference on it should removed
 *
 *  This function is intended to be called from the ndo_set_rx_mode
 *  function of devices that require explicit address or references on it
 *  add/remove notifications. The unsync function may be NULL in which case
 *  the addresses or references on it requiring removal will simply be
 *  removed without any notification to the device. That is responsibility of
 *  the driver to identify and distribute address or references on it between
 *  internal address tables.
 **/
int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
                           struct net_device *dev,
                           int (*sync)(struct net_device *,
                                       const unsigned char *, int),
                           int (*unsync)(struct net_device *,
                                         const unsigned char *, int))
{
        struct netdev_hw_addr *ha, *tmp;
        int err, ref_cnt;

        /* first go through and flush out any unsynced/stale entries */
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                /* sync if address is not used */
                if ((ha->sync_cnt << 1) <= ha->refcount)
                        continue;

                /* if fails defer unsyncing address */
                ref_cnt = ha->refcount - ha->sync_cnt;
                if (unsync && unsync(dev, ha->addr, ref_cnt))
                        continue;

                ha->refcount = (ref_cnt << 1) + 1;
                ha->sync_cnt = ref_cnt;
                __hw_addr_del_entry(list, ha, false, false);
        }

        /* go through and sync updated/new entries to the list */
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                /* sync if address added or reused */
                if ((ha->sync_cnt << 1) >= ha->refcount)
                        continue;

                ref_cnt = ha->refcount - ha->sync_cnt;
                err = sync(dev, ha->addr, ref_cnt);
                if (err)
                        return err;

                ha->refcount = ref_cnt << 1;
                ha->sync_cnt = ref_cnt;
        }

        return 0;
}
EXPORT_SYMBOL(__hw_addr_ref_sync_dev);

/**
 *  __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on
 *  it from device
 *  @list: address list to remove synchronized addresses (references on it) from
 *  @dev:  device to sync
 *  @unsync: function to call if address and references on it should be removed
 *
 *  Remove all addresses that were added to the device by
 *  __hw_addr_ref_sync_dev(). This function is intended to be called from the
 *  ndo_stop or ndo_open functions on devices that require explicit address (or
 *  references on it) add/remove notifications. If the unsync function pointer
 *  is NULL then this function can be used to just reset the sync_cnt for the
 *  addresses in the list.
 **/
void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
                              struct net_device *dev,
                              int (*unsync)(struct net_device *,
                                            const unsigned char *, int))
{
        struct netdev_hw_addr *ha, *tmp;

        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                if (!ha->sync_cnt)
                        continue;

                /* if fails defer unsyncing address */
                if (unsync && unsync(dev, ha->addr, ha->sync_cnt))
                        continue;

                ha->refcount -= ha->sync_cnt - 1;
                ha->sync_cnt = 0;
                __hw_addr_del_entry(list, ha, false, false);
        }
}
EXPORT_SYMBOL(__hw_addr_ref_unsync_dev);

/**
 *  __hw_addr_unsync_dev - Remove synchronized addresses from device
 *  @list: address list to remove synchronized addresses from
 *  @dev:  device to sync
 *  @unsync: function to call if address should be removed
 *
 *  Remove all addresses that were added to the device by __hw_addr_sync_dev().
 *  This function is intended to be called from the ndo_stop or ndo_open
 *  functions on devices that require explicit address add/remove
 *  notifications.  If the unsync function pointer is NULL then this function
 *  can be used to just reset the sync_cnt for the addresses in the list.
 **/
void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
                          struct net_device *dev,
                          int (*unsync)(struct net_device *,
                                        const unsigned char *))
{
        struct netdev_hw_addr *ha, *tmp;

        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                if (!ha->sync_cnt)
                        continue;

                /* if unsync is defined and fails defer unsyncing address */
                if (unsync && unsync(dev, ha->addr))
                        continue;

                ha->sync_cnt--;
                __hw_addr_del_entry(list, ha, false, false);
        }
}
EXPORT_SYMBOL(__hw_addr_unsync_dev);

static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
        struct netdev_hw_addr *ha, *tmp;

        list->tree = RB_ROOT;
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                list_del_rcu(&ha->list);
                kfree_rcu(ha, rcu_head);
        }
        list->count = 0;
}

void __hw_addr_init(struct netdev_hw_addr_list *list)
{
        INIT_LIST_HEAD(&list->list);
        list->count = 0;
        list->tree = RB_ROOT;
}
EXPORT_SYMBOL(__hw_addr_init);

/*
 * Device addresses handling functions
 */

/* Check that netdev->dev_addr is not written to directly as this would
 * break the rbtree layout. All changes should go thru dev_addr_set() and co.
 * Remove this check in mid-2024.
 */
void dev_addr_check(struct net_device *dev)
{
        if (!memcmp(dev->dev_addr, dev->dev_addr_shadow, MAX_ADDR_LEN))
                return;

        netdev_warn(dev, "Current addr:  %*ph\n", MAX_ADDR_LEN, dev->dev_addr);
        netdev_warn(dev, "Expected addr: %*ph\n",
                    MAX_ADDR_LEN, dev->dev_addr_shadow);
        netdev_WARN(dev, "Incorrect netdev->dev_addr\n");
}

/**
 *        dev_addr_flush - Flush device address list
 *        @dev: device
 *
 *        Flush device address list and reset ->dev_addr.
 *
 *        The caller must hold the rtnl_mutex.
 */
void dev_addr_flush(struct net_device *dev)
{
        /* rtnl_mutex must be held here */
        dev_addr_check(dev);

        __hw_addr_flush(&dev->dev_addrs);
        dev->dev_addr = NULL;
}

/**
 *        dev_addr_init - Init device address list
 *        @dev: device
 *
 *        Init device address list and create the first element,
 *        used by ->dev_addr.
 *
 *        The caller must hold the rtnl_mutex.
 */
int dev_addr_init(struct net_device *dev)
{
        unsigned char addr[MAX_ADDR_LEN];
        struct netdev_hw_addr *ha;
        int err;

        /* rtnl_mutex must be held here */

        __hw_addr_init(&dev->dev_addrs);
        memset(addr, 0, sizeof(addr));
        err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
                            NETDEV_HW_ADDR_T_LAN);
        if (!err) {
                /*
                 * Get the first (previously created) address from the list
                 * and set dev_addr pointer to this location.
                 */
                ha = list_first_entry(&dev->dev_addrs.list,
                                      struct netdev_hw_addr, list);
                dev->dev_addr = ha->addr;
        }
        return err;
}

void dev_addr_mod(struct net_device *dev, unsigned int offset,
                  const void *addr, size_t len)
{
        struct netdev_hw_addr *ha;

        dev_addr_check(dev);

        ha = container_of(dev->dev_addr, struct netdev_hw_addr, addr[0]);
        rb_erase(&ha->node, &dev->dev_addrs.tree);
        memcpy(&ha->addr[offset], addr, len);
        memcpy(&dev->dev_addr_shadow[offset], addr, len);
        WARN_ON(__hw_addr_insert(&dev->dev_addrs, ha, dev->addr_len));
}
EXPORT_SYMBOL(dev_addr_mod);

/**
 *        dev_addr_add - Add a device address
 *        @dev: device
 *        @addr: address to add
 *        @addr_type: address type
 *
 *        Add a device address to the device or increase the reference count if
 *        it already exists.
 *
 *        The caller must hold the rtnl_mutex.
 */
int dev_addr_add(struct net_device *dev, const unsigned char *addr,
                 unsigned char addr_type)
{
        int err;

        ASSERT_RTNL();

        err = dev_pre_changeaddr_notify(dev, addr, NULL);
        if (err)
                return err;
        err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
        if (!err)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        return err;
}
EXPORT_SYMBOL(dev_addr_add);

/**
 *        dev_addr_del - Release a device address.
 *        @dev: device
 *        @addr: address to delete
 *        @addr_type: address type
 *
 *        Release reference to a device address and remove it from the device
 *        if the reference count drops to zero.
 *
 *        The caller must hold the rtnl_mutex.
 */
int dev_addr_del(struct net_device *dev, const unsigned char *addr,
                 unsigned char addr_type)
{
        int err;
        struct netdev_hw_addr *ha;

        ASSERT_RTNL();

        /*
         * We can not remove the first address from the list because
         * dev->dev_addr points to that.
         */
        ha = list_first_entry(&dev->dev_addrs.list,
                              struct netdev_hw_addr, list);
        if (!memcmp(ha->addr, addr, dev->addr_len) &&
            ha->type == addr_type && ha->refcount == 1)
                return -ENOENT;

        err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
                            addr_type);
        if (!err)
                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        return err;
}
EXPORT_SYMBOL(dev_addr_del);

/*
 * Unicast list handling functions
 */

/**
 *        dev_uc_add_excl - Add a global secondary unicast address
 *        @dev: device
 *        @addr: address to add
 */
int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len,
                               NETDEV_HW_ADDR_T_UNICAST, true, false,
                               0, true);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}
EXPORT_SYMBOL(dev_uc_add_excl);

/**
 *        dev_uc_add - Add a secondary unicast address
 *        @dev: device
 *        @addr: address to add
 *
 *        Add a secondary unicast address to the device or increase
 *        the reference count if it already exists.
 */
int dev_uc_add(struct net_device *dev, const unsigned char *addr)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
                            NETDEV_HW_ADDR_T_UNICAST);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}
EXPORT_SYMBOL(dev_uc_add);

/**
 *        dev_uc_del - Release secondary unicast address.
 *        @dev: device
 *        @addr: address to delete
 *
 *        Release reference to a secondary unicast address and remove it
 *        from the device if the reference count drops to zero.
 */
int dev_uc_del(struct net_device *dev, const unsigned char *addr)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
                            NETDEV_HW_ADDR_T_UNICAST);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}
EXPORT_SYMBOL(dev_uc_del);

/**
 *        dev_uc_sync - Synchronize device's unicast list to another device
 *        @to: destination device
 *        @from: source device
 *
 *        Add newly added addresses to the destination device and release
 *        addresses that have no users left. The source device must be
 *        locked by netif_addr_lock_bh.
 *
 *        This function is intended to be called from the dev->set_rx_mode
 *        function of layered software devices.  This function assumes that
 *        addresses will only ever be synced to the @to devices and no other.
 */
int dev_uc_sync(struct net_device *to, struct net_device *from)
{
        int err = 0;

        if (to->addr_len != from->addr_len)
                return -EINVAL;

        netif_addr_lock(to);
        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        return err;
}
EXPORT_SYMBOL(dev_uc_sync);

/**
 *        dev_uc_sync_multiple - Synchronize device's unicast list to another
 *        device, but allow for multiple calls to sync to multiple devices.
 *        @to: destination device
 *        @from: source device
 *
 *        Add newly added addresses to the destination device and release
 *        addresses that have been deleted from the source. The source device
 *        must be locked by netif_addr_lock_bh.
 *
 *        This function is intended to be called from the dev->set_rx_mode
 *        function of layered software devices.  It allows for a single source
 *        device to be synced to multiple destination devices.
 */
int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
{
        int err = 0;

        if (to->addr_len != from->addr_len)
                return -EINVAL;

        netif_addr_lock(to);
        err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        return err;
}
EXPORT_SYMBOL(dev_uc_sync_multiple);

/**
 *        dev_uc_unsync - Remove synchronized addresses from the destination device
 *        @to: destination device
 *        @from: source device
 *
 *        Remove all addresses that were added to the destination device by
 *        dev_uc_sync(). This function is intended to be called from the
 *        dev->stop function of layered software devices.
 */
void dev_uc_unsync(struct net_device *to, struct net_device *from)
{
        if (to->addr_len != from->addr_len)
                return;

        /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two
         * reasons:
         * 1) This is always called without any addr_list_lock, so as the
         *    outermost one here, it must be 0.
         * 2) This is called by some callers after unlinking the upper device,
         *    so the dev->lower_level becomes 1 again.
         * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or
         * larger.
         */
        netif_addr_lock_bh(from);
        netif_addr_lock(to);
        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
        __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_uc_unsync);

/**
 *        dev_uc_flush - Flush unicast addresses
 *        @dev: device
 *
 *        Flush unicast addresses.
 */
void dev_uc_flush(struct net_device *dev)
{
        netif_addr_lock_bh(dev);
        __hw_addr_flush(&dev->uc);
        netif_addr_unlock_bh(dev);
}
EXPORT_SYMBOL(dev_uc_flush);

/**
 *        dev_uc_init - Init unicast address list
 *        @dev: device
 *
 *        Init unicast address list.
 */
void dev_uc_init(struct net_device *dev)
{
        __hw_addr_init(&dev->uc);
}
EXPORT_SYMBOL(dev_uc_init);

/*
 * Multicast list handling functions
 */

/**
 *        dev_mc_add_excl - Add a global secondary multicast address
 *        @dev: device
 *        @addr: address to add
 */
int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
                               NETDEV_HW_ADDR_T_MULTICAST, true, false,
                               0, true);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}
EXPORT_SYMBOL(dev_mc_add_excl);

static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
                        bool global)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
                               NETDEV_HW_ADDR_T_MULTICAST, global, false,
                               0, false);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}
/**
 *        dev_mc_add - Add a multicast address
 *        @dev: device
 *        @addr: address to add
 *
 *        Add a multicast address to the device or increase
 *        the reference count if it already exists.
 */
int dev_mc_add(struct net_device *dev, const unsigned char *addr)
{
        return __dev_mc_add(dev, addr, false);
}
EXPORT_SYMBOL(dev_mc_add);

/**
 *        dev_mc_add_global - Add a global multicast address
 *        @dev: device
 *        @addr: address to add
 *
 *        Add a global multicast address to the device.
 */
int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
{
        return __dev_mc_add(dev, addr, true);
}
EXPORT_SYMBOL(dev_mc_add_global);

static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
                        bool global)
{
        int err;

        netif_addr_lock_bh(dev);
        err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
                               NETDEV_HW_ADDR_T_MULTICAST, global, false);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
        return err;
}

/**
 *        dev_mc_del - Delete a multicast address.
 *        @dev: device
 *        @addr: address to delete
 *
 *        Release reference to a multicast address and remove it
 *        from the device if the reference count drops to zero.
 */
int dev_mc_del(struct net_device *dev, const unsigned char *addr)
{
        return __dev_mc_del(dev, addr, false);
}
EXPORT_SYMBOL(dev_mc_del);

/**
 *        dev_mc_del_global - Delete a global multicast address.
 *        @dev: device
 *        @addr: address to delete
 *
 *        Release reference to a multicast address and remove it
 *        from the device if the reference count drops to zero.
 */
int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
{
        return __dev_mc_del(dev, addr, true);
}
EXPORT_SYMBOL(dev_mc_del_global);

/**
 *        dev_mc_sync - Synchronize device's multicast list to another device
 *        @to: destination device
 *        @from: source device
 *
 *        Add newly added addresses to the destination device and release
 *        addresses that have no users left. The source device must be
 *        locked by netif_addr_lock_bh.
 *
 *        This function is intended to be called from the ndo_set_rx_mode
 *        function of layered software devices.
 */
int dev_mc_sync(struct net_device *to, struct net_device *from)
{
        int err = 0;

        if (to->addr_len != from->addr_len)
                return -EINVAL;

        netif_addr_lock(to);
        err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        return err;
}
EXPORT_SYMBOL(dev_mc_sync);

/**
 *        dev_mc_sync_multiple - Synchronize device's multicast list to another
 *        device, but allow for multiple calls to sync to multiple devices.
 *        @to: destination device
 *        @from: source device
 *
 *        Add newly added addresses to the destination device and release
 *        addresses that have no users left. The source device must be
 *        locked by netif_addr_lock_bh.
 *
 *        This function is intended to be called from the ndo_set_rx_mode
 *        function of layered software devices.  It allows for a single
 *        source device to be synced to multiple destination devices.
 */
int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
{
        int err = 0;

        if (to->addr_len != from->addr_len)
                return -EINVAL;

        netif_addr_lock(to);
        err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
        if (!err)
                __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        return err;
}
EXPORT_SYMBOL(dev_mc_sync_multiple);

/**
 *        dev_mc_unsync - Remove synchronized addresses from the destination device
 *        @to: destination device
 *        @from: source device
 *
 *        Remove all addresses that were added to the destination device by
 *        dev_mc_sync(). This function is intended to be called from the
 *        dev->stop function of layered software devices.
 */
void dev_mc_unsync(struct net_device *to, struct net_device *from)
{
        if (to->addr_len != from->addr_len)
                return;

        /* See the above comments inside dev_uc_unsync(). */
        netif_addr_lock_bh(from);
        netif_addr_lock(to);
        __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
        __dev_set_rx_mode(to);
        netif_addr_unlock(to);
        netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_mc_unsync);

/**
 *        dev_mc_flush - Flush multicast addresses
 *        @dev: device
 *
 *        Flush multicast addresses.
 */
void dev_mc_flush(struct net_device *dev)
{
        netif_addr_lock_bh(dev);
        __hw_addr_flush(&dev->mc);
        netif_addr_unlock_bh(dev);
}
EXPORT_SYMBOL(dev_mc_flush);

/**
 *        dev_mc_init - Init multicast address list
 *        @dev: device
 *
 *        Init multicast address list.
 */
void dev_mc_init(struct net_device *dev)
{
        __hw_addr_init(&dev->mc);
}
EXPORT_SYMBOL(dev_mc_init);

















































    9 
























   94 











































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * Copyright (c) 2021, Google LLC.
 * Pasha Tatashin <pasha.tatashin@soleen.com>
 */
#ifndef __LINUX_PAGE_TABLE_CHECK_H
#define __LINUX_PAGE_TABLE_CHECK_H

#ifdef CONFIG_PAGE_TABLE_CHECK
#include <linux/jump_label.h>

extern struct static_key_true page_table_check_disabled;
extern struct page_ext_operations page_table_check_ops;

void __page_table_check_zero(struct page *page, unsigned int order);
void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte);
void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd);
void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud);
void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
                unsigned int nr);
void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd);
void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud);
void __page_table_check_pte_clear_range(struct mm_struct *mm,
                                        unsigned long addr,
                                        pmd_t pmd);

static inline void page_table_check_alloc(struct page *page, unsigned int order)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_zero(page, order);
}

static inline void page_table_check_free(struct page *page, unsigned int order)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_zero(page, order);
}

static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pte_clear(mm, pte);
}

static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pmd_clear(mm, pmd);
}

static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pud_clear(mm, pud);
}

static inline void page_table_check_ptes_set(struct mm_struct *mm,
                pte_t *ptep, pte_t pte, unsigned int nr)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_ptes_set(mm, ptep, pte, nr);
}

static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
                                            pmd_t pmd)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pmd_set(mm, pmdp, pmd);
}

static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
                                            pud_t pud)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pud_set(mm, pudp, pud);
}

static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
                                                    unsigned long addr,
                                                    pmd_t pmd)
{
        if (static_branch_likely(&page_table_check_disabled))
                return;

        __page_table_check_pte_clear_range(mm, addr, pmd);
}

#else

static inline void page_table_check_alloc(struct page *page, unsigned int order)
{
}

static inline void page_table_check_free(struct page *page, unsigned int order)
{
}

static inline void page_table_check_pte_clear(struct mm_struct *mm, pte_t pte)
{
}

static inline void page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd)
{
}

static inline void page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
{
}

static inline void page_table_check_ptes_set(struct mm_struct *mm,
                pte_t *ptep, pte_t pte, unsigned int nr)
{
}

static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp,
                                            pmd_t pmd)
{
}

static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp,
                                            pud_t pud)
{
}

static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
                                                    unsigned long addr,
                                                    pmd_t pmd)
{
}

#endif /* CONFIG_PAGE_TABLE_CHECK */
#endif /* __LINUX_PAGE_TABLE_CHECK_H */















   11 

   12 






   74 

























































   12 


   12 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RCULIST_BL_H
#define _LINUX_RCULIST_BL_H

/*
 * RCU-protected bl list version. See include/linux/list_bl.h.
 */
#include <linux/list_bl.h>
#include <linux/rcupdate.h>

static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
                                        struct hlist_bl_node *n)
{
        LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
        LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
                                                        LIST_BL_LOCKMASK);
        rcu_assign_pointer(h->first,
                (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
}

static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
{
        return (struct hlist_bl_node *)
                ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK);
}

/**
 * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
 * @n: the element to delete from the hash list.
 *
 * Note: hlist_bl_unhashed() on entry does not return true after this,
 * the entry is in an undefined state. It is useful for RCU based
 * lockfree traversal.
 *
 * In particular, it means that we can not poison the forward
 * pointers that may still be used for walking the hash list.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
 * or hlist_bl_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_bl_for_each_entry().
 */
static inline void hlist_bl_del_rcu(struct hlist_bl_node *n)
{
        __hlist_bl_del(n);
        n->pprev = LIST_POISON2;
}

/**
 * hlist_bl_add_head_rcu
 * @n: the element to add to the hash list.
 * @h: the list to add to.
 *
 * Description:
 * Adds the specified element to the specified hlist_bl,
 * while permitting racing traversals.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
 * or hlist_bl_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.  Regardless of the type of CPU, the
 * list-traversal primitive must be guarded by rcu_read_lock().
 */
static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
                                        struct hlist_bl_head *h)
{
        struct hlist_bl_node *first;

        /* don't need hlist_bl_first_rcu because we're under lock */
        first = hlist_bl_first(h);

        n->next = first;
        if (first)
                first->pprev = &n->next;
        n->pprev = &h->first;

        /* need _rcu because we can have concurrent lock free readers */
        hlist_bl_set_first_rcu(h, n);
}
/**
 * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct hlist_bl_node to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_bl_node within the struct.
 *
 */
#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member)                \
        for (pos = hlist_bl_first_rcu(head);                                \
                pos &&                                                        \
                ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
                pos = rcu_dereference_raw(pos->next))

#endif



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    5 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
// SPDX-License-Identifier: GPL-2.0
/*
  USB Driver for GSM modems

  Copyright (C) 2005  Matthias Urlichs <smurf@smurf.noris.de>

  Portions copied from the Keyspan driver by Hugh Blemings <hugh@blemings.org>

  History: see the git log.

  Work sponsored by: Sigos GmbH, Germany <info@sigos.de>

  This driver exists because the "normal" serial driver doesn't work too well
  with GSM modems. Issues:
  - data loss -- one single Receive URB is not nearly enough
  - nonstandard flow (Option devices) control
  - controlling the baud rate doesn't make sense

  This driver is named "option" because the most common device it's
  used for is a PC-Card (with an internal OHCI-USB interface, behind
  which the GSM interface sits), made by Option Inc.

  Some of the "one port" devices actually exhibit multiple USB instances
  on the USB bus. This is not a bug, these ports are used for different
  device features.
*/

#define DRIVER_AUTHOR "Matthias Urlichs <smurf@smurf.noris.de>"
#define DRIVER_DESC "USB Driver for GSM modems"

#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/errno.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include "usb-wwan.h"

/* Function prototypes */
static int  option_probe(struct usb_serial *serial,
                        const struct usb_device_id *id);
static int option_attach(struct usb_serial *serial);
static void option_release(struct usb_serial *serial);
static void option_instat_callback(struct urb *urb);

/* Vendor and product IDs */
#define OPTION_VENDOR_ID                        0x0AF0
#define OPTION_PRODUCT_COLT                        0x5000
#define OPTION_PRODUCT_RICOLA                        0x6000
#define OPTION_PRODUCT_RICOLA_LIGHT                0x6100
#define OPTION_PRODUCT_RICOLA_QUAD                0x6200
#define OPTION_PRODUCT_RICOLA_QUAD_LIGHT        0x6300
#define OPTION_PRODUCT_RICOLA_NDIS                0x6050
#define OPTION_PRODUCT_RICOLA_NDIS_LIGHT        0x6150
#define OPTION_PRODUCT_RICOLA_NDIS_QUAD                0x6250
#define OPTION_PRODUCT_RICOLA_NDIS_QUAD_LIGHT        0x6350
#define OPTION_PRODUCT_COBRA                        0x6500
#define OPTION_PRODUCT_COBRA_BUS                0x6501
#define OPTION_PRODUCT_VIPER                        0x6600
#define OPTION_PRODUCT_VIPER_BUS                0x6601
#define OPTION_PRODUCT_GT_MAX_READY                0x6701
#define OPTION_PRODUCT_FUJI_MODEM_LIGHT                0x6721
#define OPTION_PRODUCT_FUJI_MODEM_GT                0x6741
#define OPTION_PRODUCT_FUJI_MODEM_EX                0x6761
#define OPTION_PRODUCT_KOI_MODEM                0x6800
#define OPTION_PRODUCT_SCORPION_MODEM                0x6901
#define OPTION_PRODUCT_ETNA_MODEM                0x7001
#define OPTION_PRODUCT_ETNA_MODEM_LITE                0x7021
#define OPTION_PRODUCT_ETNA_MODEM_GT                0x7041
#define OPTION_PRODUCT_ETNA_MODEM_EX                0x7061
#define OPTION_PRODUCT_ETNA_KOI_MODEM                0x7100
#define OPTION_PRODUCT_GTM380_MODEM                0x7201

#define HUAWEI_VENDOR_ID                        0x12D1
#define HUAWEI_PRODUCT_E173                        0x140C
#define HUAWEI_PRODUCT_E1750                        0x1406
#define HUAWEI_PRODUCT_K4505                        0x1464
#define HUAWEI_PRODUCT_K3765                        0x1465
#define HUAWEI_PRODUCT_K4605                        0x14C6
#define HUAWEI_PRODUCT_E173S6                        0x1C07

#define QUANTA_VENDOR_ID                        0x0408
#define QUANTA_PRODUCT_Q101                        0xEA02
#define QUANTA_PRODUCT_Q111                        0xEA03
#define QUANTA_PRODUCT_GLX                        0xEA04
#define QUANTA_PRODUCT_GKE                        0xEA05
#define QUANTA_PRODUCT_GLE                        0xEA06

#define NOVATELWIRELESS_VENDOR_ID                0x1410

/* YISO PRODUCTS */

#define YISO_VENDOR_ID                                0x0EAB
#define YISO_PRODUCT_U893                        0xC893

/*
 * NOVATEL WIRELESS PRODUCTS
 *
 * Note from Novatel Wireless:
 * If your Novatel modem does not work on linux, don't
 * change the option module, but check our website. If
 * that does not help, contact ddeschepper@nvtl.com
*/
/* MERLIN EVDO PRODUCTS */
#define NOVATELWIRELESS_PRODUCT_V640                0x1100
#define NOVATELWIRELESS_PRODUCT_V620                0x1110
#define NOVATELWIRELESS_PRODUCT_V740                0x1120
#define NOVATELWIRELESS_PRODUCT_V720                0x1130

/* MERLIN HSDPA/HSPA PRODUCTS */
#define NOVATELWIRELESS_PRODUCT_U730                0x1400
#define NOVATELWIRELESS_PRODUCT_U740                0x1410
#define NOVATELWIRELESS_PRODUCT_U870                0x1420
#define NOVATELWIRELESS_PRODUCT_XU870                0x1430
#define NOVATELWIRELESS_PRODUCT_X950D                0x1450

/* EXPEDITE PRODUCTS */
#define NOVATELWIRELESS_PRODUCT_EV620                0x2100
#define NOVATELWIRELESS_PRODUCT_ES720                0x2110
#define NOVATELWIRELESS_PRODUCT_E725                0x2120
#define NOVATELWIRELESS_PRODUCT_ES620                0x2130
#define NOVATELWIRELESS_PRODUCT_EU730                0x2400
#define NOVATELWIRELESS_PRODUCT_EU740                0x2410
#define NOVATELWIRELESS_PRODUCT_EU870D                0x2420
/* OVATION PRODUCTS */
#define NOVATELWIRELESS_PRODUCT_MC727                0x4100
#define NOVATELWIRELESS_PRODUCT_MC950D                0x4400
/*
 * Note from Novatel Wireless:
 * All PID in the 5xxx range are currently reserved for
 * auto-install CDROMs, and should not be added to this
 * module.
 *
 * #define NOVATELWIRELESS_PRODUCT_U727                0x5010
 * #define NOVATELWIRELESS_PRODUCT_MC727_NEW        0x5100
*/
#define NOVATELWIRELESS_PRODUCT_OVMC760                0x6002
#define NOVATELWIRELESS_PRODUCT_MC780                0x6010
#define NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED        0x6000
#define NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED        0x6001
#define NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED        0x7000
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED        0x7001
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED3        0x7003
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED4        0x7004
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED5        0x7005
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED6        0x7006
#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED7        0x7007
#define NOVATELWIRELESS_PRODUCT_MC996D                0x7030
#define NOVATELWIRELESS_PRODUCT_MF3470                0x7041
#define NOVATELWIRELESS_PRODUCT_MC547                0x7042
#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED        0x8000
#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED        0x8001
#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED        0x9000
#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED        0x9001
#define NOVATELWIRELESS_PRODUCT_E362                0x9010
#define NOVATELWIRELESS_PRODUCT_E371                0x9011
#define NOVATELWIRELESS_PRODUCT_U620L                0x9022
#define NOVATELWIRELESS_PRODUCT_G2                0xA010
#define NOVATELWIRELESS_PRODUCT_MC551                0xB001

#define UBLOX_VENDOR_ID                                0x1546

/* AMOI PRODUCTS */
#define AMOI_VENDOR_ID                                0x1614
#define AMOI_PRODUCT_H01                        0x0800
#define AMOI_PRODUCT_H01A                        0x7002
#define AMOI_PRODUCT_H02                        0x0802
#define AMOI_PRODUCT_SKYPEPHONE_S2                0x0407

#define DELL_VENDOR_ID                                0x413C

/* Dell modems */
#define DELL_PRODUCT_5700_MINICARD                0x8114
#define DELL_PRODUCT_5500_MINICARD                0x8115
#define DELL_PRODUCT_5505_MINICARD                0x8116
#define DELL_PRODUCT_5700_EXPRESSCARD                0x8117
#define DELL_PRODUCT_5510_EXPRESSCARD                0x8118

#define DELL_PRODUCT_5700_MINICARD_SPRINT        0x8128
#define DELL_PRODUCT_5700_MINICARD_TELUS        0x8129

#define DELL_PRODUCT_5720_MINICARD_VZW                0x8133
#define DELL_PRODUCT_5720_MINICARD_SPRINT        0x8134
#define DELL_PRODUCT_5720_MINICARD_TELUS        0x8135
#define DELL_PRODUCT_5520_MINICARD_CINGULAR        0x8136
#define DELL_PRODUCT_5520_MINICARD_GENERIC_L        0x8137
#define DELL_PRODUCT_5520_MINICARD_GENERIC_I        0x8138

#define DELL_PRODUCT_5730_MINICARD_SPRINT        0x8180
#define DELL_PRODUCT_5730_MINICARD_TELUS        0x8181
#define DELL_PRODUCT_5730_MINICARD_VZW                0x8182

#define DELL_PRODUCT_5800_MINICARD_VZW                0x8195  /* Novatel E362 */
#define DELL_PRODUCT_5800_V2_MINICARD_VZW        0x8196  /* Novatel E362 */
#define DELL_PRODUCT_5804_MINICARD_ATT                0x819b  /* Novatel E371 */

#define DELL_PRODUCT_5821E                        0x81d7
#define DELL_PRODUCT_5821E_ESIM                        0x81e0
#define DELL_PRODUCT_5829E_ESIM                        0x81e4
#define DELL_PRODUCT_5829E                        0x81e6

#define DELL_PRODUCT_FM101R_ESIM                0x8213
#define DELL_PRODUCT_FM101R                        0x8215

#define KYOCERA_VENDOR_ID                        0x0c88
#define KYOCERA_PRODUCT_KPC650                        0x17da
#define KYOCERA_PRODUCT_KPC680                        0x180a

#define ANYDATA_VENDOR_ID                        0x16d5
#define ANYDATA_PRODUCT_ADU_620UW                0x6202
#define ANYDATA_PRODUCT_ADU_E100A                0x6501
#define ANYDATA_PRODUCT_ADU_500A                0x6502

#define AXESSTEL_VENDOR_ID                        0x1726
#define AXESSTEL_PRODUCT_MV110H                        0x1000

#define BANDRICH_VENDOR_ID                        0x1A8D
#define BANDRICH_PRODUCT_C100_1                        0x1002
#define BANDRICH_PRODUCT_C100_2                        0x1003
#define BANDRICH_PRODUCT_1004                        0x1004
#define BANDRICH_PRODUCT_1005                        0x1005
#define BANDRICH_PRODUCT_1006                        0x1006
#define BANDRICH_PRODUCT_1007                        0x1007
#define BANDRICH_PRODUCT_1008                        0x1008
#define BANDRICH_PRODUCT_1009                        0x1009
#define BANDRICH_PRODUCT_100A                        0x100a

#define BANDRICH_PRODUCT_100B                        0x100b
#define BANDRICH_PRODUCT_100C                        0x100c
#define BANDRICH_PRODUCT_100D                        0x100d
#define BANDRICH_PRODUCT_100E                        0x100e

#define BANDRICH_PRODUCT_100F                        0x100f
#define BANDRICH_PRODUCT_1010                        0x1010
#define BANDRICH_PRODUCT_1011                        0x1011
#define BANDRICH_PRODUCT_1012                        0x1012

#define QUALCOMM_VENDOR_ID                        0x05C6
/* These Quectel products use Qualcomm's vendor ID */
#define QUECTEL_PRODUCT_UC20                        0x9003
#define QUECTEL_PRODUCT_UC15                        0x9090
/* These u-blox products use Qualcomm's vendor ID */
#define UBLOX_PRODUCT_R410M                        0x90b2
/* These Yuga products use Qualcomm's vendor ID */
#define YUGA_PRODUCT_CLM920_NC5                        0x9625

#define QUECTEL_VENDOR_ID                        0x2c7c
/* These Quectel products use Quectel's vendor ID */
#define QUECTEL_PRODUCT_EC21                        0x0121
#define QUECTEL_PRODUCT_EM061K_LTA                0x0123
#define QUECTEL_PRODUCT_EM061K_LMS                0x0124
#define QUECTEL_PRODUCT_EC25                        0x0125
#define QUECTEL_PRODUCT_EM060K_128                0x0128
#define QUECTEL_PRODUCT_EM060K_129                0x0129
#define QUECTEL_PRODUCT_EM060K_12a                0x012a
#define QUECTEL_PRODUCT_EM060K_12b                0x012b
#define QUECTEL_PRODUCT_EM060K_12c                0x012c
#define QUECTEL_PRODUCT_EG91                        0x0191
#define QUECTEL_PRODUCT_EG95                        0x0195
#define QUECTEL_PRODUCT_BG96                        0x0296
#define QUECTEL_PRODUCT_EP06                        0x0306
#define QUECTEL_PRODUCT_EM05G                        0x030a
#define QUECTEL_PRODUCT_EM060K                        0x030b
#define QUECTEL_PRODUCT_EM05G_CS                0x030c
#define QUECTEL_PRODUCT_EM05GV2                        0x030e
#define QUECTEL_PRODUCT_EM05CN_SG                0x0310
#define QUECTEL_PRODUCT_EM05G_SG                0x0311
#define QUECTEL_PRODUCT_EM05CN                        0x0312
#define QUECTEL_PRODUCT_EM05G_GR                0x0313
#define QUECTEL_PRODUCT_EM05G_RS                0x0314
#define QUECTEL_PRODUCT_EM12                        0x0512
#define QUECTEL_PRODUCT_RM500Q                        0x0800
#define QUECTEL_PRODUCT_RM520N                        0x0801
#define QUECTEL_PRODUCT_EC200U                        0x0901
#define QUECTEL_PRODUCT_EG912Y                        0x6001
#define QUECTEL_PRODUCT_EC200S_CN                0x6002
#define QUECTEL_PRODUCT_EC200A                        0x6005
#define QUECTEL_PRODUCT_EM061K_LWW                0x6008
#define QUECTEL_PRODUCT_EM061K_LCN                0x6009
#define QUECTEL_PRODUCT_EC200T                        0x6026
#define QUECTEL_PRODUCT_RM500K                        0x7001

#define CMOTECH_VENDOR_ID                        0x16d8
#define CMOTECH_PRODUCT_6001                        0x6001
#define CMOTECH_PRODUCT_CMU_300                        0x6002
#define CMOTECH_PRODUCT_6003                        0x6003
#define CMOTECH_PRODUCT_6004                        0x6004
#define CMOTECH_PRODUCT_6005                        0x6005
#define CMOTECH_PRODUCT_CGU_628A                0x6006
#define CMOTECH_PRODUCT_CHE_628S                0x6007
#define CMOTECH_PRODUCT_CMU_301                        0x6008
#define CMOTECH_PRODUCT_CHU_628                        0x6280
#define CMOTECH_PRODUCT_CHU_628S                0x6281
#define CMOTECH_PRODUCT_CDU_680                        0x6803
#define CMOTECH_PRODUCT_CDU_685A                0x6804
#define CMOTECH_PRODUCT_CHU_720S                0x7001
#define CMOTECH_PRODUCT_7002                        0x7002
#define CMOTECH_PRODUCT_CHU_629K                0x7003
#define CMOTECH_PRODUCT_7004                        0x7004
#define CMOTECH_PRODUCT_7005                        0x7005
#define CMOTECH_PRODUCT_CGU_629                        0x7006
#define CMOTECH_PRODUCT_CHU_629S                0x700a
#define CMOTECH_PRODUCT_CHU_720I                0x7211
#define CMOTECH_PRODUCT_7212                        0x7212
#define CMOTECH_PRODUCT_7213                        0x7213
#define CMOTECH_PRODUCT_7251                        0x7251
#define CMOTECH_PRODUCT_7252                        0x7252
#define CMOTECH_PRODUCT_7253                        0x7253

#define TELIT_VENDOR_ID                                0x1bc7
#define TELIT_PRODUCT_UC864E                        0x1003
#define TELIT_PRODUCT_UC864G                        0x1004
#define TELIT_PRODUCT_CC864_DUAL                0x1005
#define TELIT_PRODUCT_CC864_SINGLE                0x1006
#define TELIT_PRODUCT_DE910_DUAL                0x1010
#define TELIT_PRODUCT_UE910_V2                        0x1012
#define TELIT_PRODUCT_LE922_USBCFG1                0x1040
#define TELIT_PRODUCT_LE922_USBCFG2                0x1041
#define TELIT_PRODUCT_LE922_USBCFG0                0x1042
#define TELIT_PRODUCT_LE922_USBCFG3                0x1043
#define TELIT_PRODUCT_LE922_USBCFG5                0x1045
#define TELIT_PRODUCT_ME910                        0x1100
#define TELIT_PRODUCT_ME910_DUAL_MODEM                0x1101
#define TELIT_PRODUCT_LE920                        0x1200
#define TELIT_PRODUCT_LE910                        0x1201
#define TELIT_PRODUCT_LE910_USBCFG4                0x1206
#define TELIT_PRODUCT_LE920A4_1207                0x1207
#define TELIT_PRODUCT_LE920A4_1208                0x1208
#define TELIT_PRODUCT_LE920A4_1211                0x1211
#define TELIT_PRODUCT_LE920A4_1212                0x1212
#define TELIT_PRODUCT_LE920A4_1213                0x1213
#define TELIT_PRODUCT_LE920A4_1214                0x1214

/* ZTE PRODUCTS */
#define ZTE_VENDOR_ID                                0x19d2
#define ZTE_PRODUCT_MF622                        0x0001
#define ZTE_PRODUCT_MF628                        0x0015
#define ZTE_PRODUCT_MF626                        0x0031
#define ZTE_PRODUCT_ZM8620_X                        0x0396
#define ZTE_PRODUCT_ME3620_MBIM                        0x0426
#define ZTE_PRODUCT_ME3620_X                        0x1432
#define ZTE_PRODUCT_ME3620_L                        0x1433
#define ZTE_PRODUCT_AC2726                        0xfff1
#define ZTE_PRODUCT_MG880                        0xfffd
#define ZTE_PRODUCT_CDMA_TECH                        0xfffe
#define ZTE_PRODUCT_AC8710T                        0xffff
#define ZTE_PRODUCT_MC2718                        0xffe8
#define ZTE_PRODUCT_AD3812                        0xffeb
#define ZTE_PRODUCT_MC2716                        0xffed

#define BENQ_VENDOR_ID                                0x04a5
#define BENQ_PRODUCT_H10                        0x4068

#define DLINK_VENDOR_ID                                0x1186
#define DLINK_PRODUCT_DWM_652                        0x3e04
#define DLINK_PRODUCT_DWM_652_U5                0xce16
#define DLINK_PRODUCT_DWM_652_U5A                0xce1e

#define QISDA_VENDOR_ID                                0x1da5
#define QISDA_PRODUCT_H21_4512                        0x4512
#define QISDA_PRODUCT_H21_4523                        0x4523
#define QISDA_PRODUCT_H20_4515                        0x4515
#define QISDA_PRODUCT_H20_4518                        0x4518
#define QISDA_PRODUCT_H20_4519                        0x4519

/* TLAYTECH PRODUCTS */
#define TLAYTECH_VENDOR_ID                        0x20B9
#define TLAYTECH_PRODUCT_TEU800                        0x1682

/* TOSHIBA PRODUCTS */
#define TOSHIBA_VENDOR_ID                        0x0930
#define TOSHIBA_PRODUCT_HSDPA_MINICARD                0x1302
#define TOSHIBA_PRODUCT_G450                        0x0d45

#define ALINK_VENDOR_ID                                0x1e0e
#define SIMCOM_PRODUCT_SIM7100E                        0x9001 /* Yes, ALINK_VENDOR_ID */
#define ALINK_PRODUCT_PH300                        0x9100
#define ALINK_PRODUCT_3GU                        0x9200

/* ALCATEL PRODUCTS */
#define ALCATEL_VENDOR_ID                        0x1bbb
#define ALCATEL_PRODUCT_X060S_X200                0x0000
#define ALCATEL_PRODUCT_X220_X500D                0x0017
#define ALCATEL_PRODUCT_L100V                        0x011e
#define ALCATEL_PRODUCT_L800MA                        0x0203

#define PIRELLI_VENDOR_ID                        0x1266
#define PIRELLI_PRODUCT_C100_1                        0x1002
#define PIRELLI_PRODUCT_C100_2                        0x1003
#define PIRELLI_PRODUCT_1004                        0x1004
#define PIRELLI_PRODUCT_1005                        0x1005
#define PIRELLI_PRODUCT_1006                        0x1006
#define PIRELLI_PRODUCT_1007                        0x1007
#define PIRELLI_PRODUCT_1008                        0x1008
#define PIRELLI_PRODUCT_1009                        0x1009
#define PIRELLI_PRODUCT_100A                        0x100a
#define PIRELLI_PRODUCT_100B                        0x100b
#define PIRELLI_PRODUCT_100C                        0x100c
#define PIRELLI_PRODUCT_100D                        0x100d
#define PIRELLI_PRODUCT_100E                        0x100e
#define PIRELLI_PRODUCT_100F                        0x100f
#define PIRELLI_PRODUCT_1011                        0x1011
#define PIRELLI_PRODUCT_1012                        0x1012

/* Airplus products */
#define AIRPLUS_VENDOR_ID                        0x1011
#define AIRPLUS_PRODUCT_MCD650                        0x3198

/* Longcheer/Longsung vendor ID; makes whitelabel devices that
 * many other vendors like 4G Systems, Alcatel, ChinaBird,
 * Mobidata, etc sell under their own brand names.
 */
#define LONGCHEER_VENDOR_ID                        0x1c9e

/* 4G Systems products */
/* This one was sold as the VW and Skoda "Carstick LTE" */
#define FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE        0x7605
/* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick *
 * It seems to contain a Qualcomm QSC6240/6290 chipset            */
#define FOUR_G_SYSTEMS_PRODUCT_W14                0x9603
#define FOUR_G_SYSTEMS_PRODUCT_W100                0x9b01

/* Fujisoft products */
#define FUJISOFT_PRODUCT_FS040U                        0x9b02

/* iBall 3.5G connect wireless modem */
#define IBALL_3_5G_CONNECT                        0x9605

/* Zoom */
#define ZOOM_PRODUCT_4597                        0x9607

/* SpeedUp SU9800 usb 3g modem */
#define SPEEDUP_PRODUCT_SU9800                        0x9800

/* Haier products */
#define HAIER_VENDOR_ID                                0x201e
#define HAIER_PRODUCT_CE81B                        0x10f8
#define HAIER_PRODUCT_CE100                        0x2009

/* Gemalto's Cinterion products (formerly Siemens) */
#define SIEMENS_VENDOR_ID                        0x0681
#define CINTERION_VENDOR_ID                        0x1e2d
#define CINTERION_PRODUCT_HC25_MDMNET                0x0040
#define CINTERION_PRODUCT_HC25_MDM                0x0047
#define CINTERION_PRODUCT_HC28_MDMNET                0x004A /* same for HC28J */
#define CINTERION_PRODUCT_HC28_MDM                0x004C
#define CINTERION_PRODUCT_EU3_E                        0x0051
#define CINTERION_PRODUCT_EU3_P                        0x0052
#define CINTERION_PRODUCT_PH8                        0x0053
#define CINTERION_PRODUCT_AHXX                        0x0055
#define CINTERION_PRODUCT_PLXX                        0x0060
#define CINTERION_PRODUCT_EXS82                        0x006c
#define CINTERION_PRODUCT_PH8_2RMNET                0x0082
#define CINTERION_PRODUCT_PH8_AUDIO                0x0083
#define CINTERION_PRODUCT_AHXX_2RMNET                0x0084
#define CINTERION_PRODUCT_AHXX_AUDIO                0x0085
#define CINTERION_PRODUCT_CLS8                        0x00b0
#define CINTERION_PRODUCT_MV31_MBIM                0x00b3
#define CINTERION_PRODUCT_MV31_RMNET                0x00b7
#define CINTERION_PRODUCT_MV31_2_MBIM                0x00b8
#define CINTERION_PRODUCT_MV31_2_RMNET                0x00b9
#define CINTERION_PRODUCT_MV32_WA                0x00f1
#define CINTERION_PRODUCT_MV32_WB                0x00f2
#define CINTERION_PRODUCT_MV32_WA_RMNET                0x00f3
#define CINTERION_PRODUCT_MV32_WB_RMNET                0x00f4

/* Olivetti products */
#define OLIVETTI_VENDOR_ID                        0x0b3c
#define OLIVETTI_PRODUCT_OLICARD100                0xc000
#define OLIVETTI_PRODUCT_OLICARD120                0xc001
#define OLIVETTI_PRODUCT_OLICARD140                0xc002
#define OLIVETTI_PRODUCT_OLICARD145                0xc003
#define OLIVETTI_PRODUCT_OLICARD155                0xc004
#define OLIVETTI_PRODUCT_OLICARD200                0xc005
#define OLIVETTI_PRODUCT_OLICARD160                0xc00a
#define OLIVETTI_PRODUCT_OLICARD500                0xc00b

/* Celot products */
#define CELOT_VENDOR_ID                                0x211f
#define CELOT_PRODUCT_CT680M                        0x6801

/* Samsung products */
#define SAMSUNG_VENDOR_ID                       0x04e8
#define SAMSUNG_PRODUCT_GT_B3730                0x6889

/* YUGA products  www.yuga-info.com gavin.kx@qq.com */
#define YUGA_VENDOR_ID                                0x257A
#define YUGA_PRODUCT_CEM600                        0x1601
#define YUGA_PRODUCT_CEM610                        0x1602
#define YUGA_PRODUCT_CEM500                        0x1603
#define YUGA_PRODUCT_CEM510                        0x1604
#define YUGA_PRODUCT_CEM800                        0x1605
#define YUGA_PRODUCT_CEM900                        0x1606

#define YUGA_PRODUCT_CEU818                        0x1607
#define YUGA_PRODUCT_CEU816                        0x1608
#define YUGA_PRODUCT_CEU828                        0x1609
#define YUGA_PRODUCT_CEU826                        0x160A
#define YUGA_PRODUCT_CEU518                        0x160B
#define YUGA_PRODUCT_CEU516                        0x160C
#define YUGA_PRODUCT_CEU528                        0x160D
#define YUGA_PRODUCT_CEU526                        0x160F
#define YUGA_PRODUCT_CEU881                        0x161F
#define YUGA_PRODUCT_CEU882                        0x162F

#define YUGA_PRODUCT_CWM600                        0x2601
#define YUGA_PRODUCT_CWM610                        0x2602
#define YUGA_PRODUCT_CWM500                        0x2603
#define YUGA_PRODUCT_CWM510                        0x2604
#define YUGA_PRODUCT_CWM800                        0x2605
#define YUGA_PRODUCT_CWM900                        0x2606

#define YUGA_PRODUCT_CWU718                        0x2607
#define YUGA_PRODUCT_CWU716                        0x2608
#define YUGA_PRODUCT_CWU728                        0x2609
#define YUGA_PRODUCT_CWU726                        0x260A
#define YUGA_PRODUCT_CWU518                        0x260B
#define YUGA_PRODUCT_CWU516                        0x260C
#define YUGA_PRODUCT_CWU528                        0x260D
#define YUGA_PRODUCT_CWU581                        0x260E
#define YUGA_PRODUCT_CWU526                        0x260F
#define YUGA_PRODUCT_CWU582                        0x261F
#define YUGA_PRODUCT_CWU583                        0x262F

#define YUGA_PRODUCT_CLM600                        0x3601
#define YUGA_PRODUCT_CLM610                        0x3602
#define YUGA_PRODUCT_CLM500                        0x3603
#define YUGA_PRODUCT_CLM510                        0x3604
#define YUGA_PRODUCT_CLM800                        0x3605
#define YUGA_PRODUCT_CLM900                        0x3606

#define YUGA_PRODUCT_CLU718                        0x3607
#define YUGA_PRODUCT_CLU716                        0x3608
#define YUGA_PRODUCT_CLU728                        0x3609
#define YUGA_PRODUCT_CLU726                        0x360A
#define YUGA_PRODUCT_CLU518                        0x360B
#define YUGA_PRODUCT_CLU516                        0x360C
#define YUGA_PRODUCT_CLU528                        0x360D
#define YUGA_PRODUCT_CLU526                        0x360F

/* Viettel products */
#define VIETTEL_VENDOR_ID                        0x2262
#define VIETTEL_PRODUCT_VT1000                        0x0002

/* ZD Incorporated */
#define ZD_VENDOR_ID                                0x0685
#define ZD_PRODUCT_7000                                0x7000

/* LG products */
#define LG_VENDOR_ID                                0x1004
#define LG_PRODUCT_L02C                                0x618f

/* MediaTek products */
#define MEDIATEK_VENDOR_ID                        0x0e8d
#define MEDIATEK_PRODUCT_DC_1COM                0x00a0
#define MEDIATEK_PRODUCT_DC_4COM                0x00a5
#define MEDIATEK_PRODUCT_DC_4COM2                0x00a7
#define MEDIATEK_PRODUCT_DC_5COM                0x00a4
#define MEDIATEK_PRODUCT_7208_1COM                0x7101
#define MEDIATEK_PRODUCT_7208_2COM                0x7102
#define MEDIATEK_PRODUCT_7103_2COM                0x7103
#define MEDIATEK_PRODUCT_7106_2COM                0x7106
#define MEDIATEK_PRODUCT_FP_1COM                0x0003
#define MEDIATEK_PRODUCT_FP_2COM                0x0023
#define MEDIATEK_PRODUCT_FPDC_1COM                0x0043
#define MEDIATEK_PRODUCT_FPDC_2COM                0x0033

/* Cellient products */
#define CELLIENT_VENDOR_ID                        0x2692
#define CELLIENT_PRODUCT_MEN200                        0x9005
#define CELLIENT_PRODUCT_MPL200                        0x9025

/* Hyundai Petatel Inc. products */
#define PETATEL_VENDOR_ID                        0x1ff4
#define PETATEL_PRODUCT_NP10T_600A                0x600a
#define PETATEL_PRODUCT_NP10T_600E                0x600e

/* TP-LINK Incorporated products */
#define TPLINK_VENDOR_ID                        0x2357
#define TPLINK_PRODUCT_LTE                        0x000D
#define TPLINK_PRODUCT_MA180                        0x0201

/* Changhong products */
#define CHANGHONG_VENDOR_ID                        0x2077
#define CHANGHONG_PRODUCT_CH690                        0x7001

/* Inovia */
#define INOVIA_VENDOR_ID                        0x20a6
#define INOVIA_SEW858                                0x1105

/* VIA Telecom */
#define VIATELECOM_VENDOR_ID                        0x15eb
#define VIATELECOM_PRODUCT_CDS7                        0x0001

/* WeTelecom products */
#define WETELECOM_VENDOR_ID                        0x22de
#define WETELECOM_PRODUCT_WMD200                0x6801
#define WETELECOM_PRODUCT_6802                        0x6802
#define WETELECOM_PRODUCT_WMD300                0x6803

/* OPPO products */
#define OPPO_VENDOR_ID                                0x22d9
#define OPPO_PRODUCT_R11                        0x276c

/* Sierra Wireless products */
#define SIERRA_VENDOR_ID                        0x1199
#define SIERRA_PRODUCT_EM9191                        0x90d3

/* UNISOC (Spreadtrum) products */
#define UNISOC_VENDOR_ID                        0x1782
/* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */
#define TOZED_PRODUCT_LT70C                        0x4055
/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */
#define LUAT_PRODUCT_AIR720U                        0x4e00

/* MeiG Smart Technology products */
#define MEIGSMART_VENDOR_ID                        0x2dee
/* MeiG Smart SLM320 based on UNISOC UIS8910 */
#define MEIGSMART_PRODUCT_SLM320                0x4d41

/* Device flags */

/* Highest interface number which can be used with NCTRL() and RSVD() */
#define FLAG_IFNUM_MAX        7

/* Interface does not support modem-control requests */
#define NCTRL(ifnum)        ((BIT(ifnum) & 0xff) << 8)

/* Interface is reserved */
#define RSVD(ifnum)        ((BIT(ifnum) & 0xff) << 0)

/* Interface must have two endpoints */
#define NUMEP2                BIT(16)

/* Device needs ZLP */
#define ZLP                BIT(17)


static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_LIGHT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_QUAD) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_QUAD_LIGHT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_NDIS) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_NDIS_LIGHT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_NDIS_QUAD) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA_NDIS_QUAD_LIGHT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COBRA) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COBRA_BUS) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_VIPER) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_VIPER_BUS) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_GT_MAX_READY) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUJI_MODEM_LIGHT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUJI_MODEM_GT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUJI_MODEM_EX) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_KOI_MODEM) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_SCORPION_MODEM) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_ETNA_MODEM) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_ETNA_MODEM_LITE) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_ETNA_MODEM_GT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_ETNA_MODEM_EX) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_ETNA_KOI_MODEM) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_GTM380_MODEM) },
        { USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_Q101) },
        { USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_Q111) },
        { USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GLX) },
        { USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GKE) },
        { USB_DEVICE(QUANTA_VENDOR_ID, QUANTA_PRODUCT_GLE) },
        { USB_DEVICE(QUANTA_VENDOR_ID, 0xea42),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c05, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c1f, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c23, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173S6, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1750, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1441, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1442, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4505, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x14ac, 0xff, 0xff, 0xff),        /* Huawei E1820 */
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0xff, 0xff) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x01, 0x7C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x02, 0x7C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x03, 0x7C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x04, 0x7C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x05, 0x7C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x02) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x03) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x04) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x06) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x0A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x0B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x0D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x0E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x0F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x10) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x12) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x13) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x14) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x15) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x17) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x18) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x19) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x1A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x1B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x1C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x31) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x32) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x33) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x34) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x35) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x36) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x3A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x3B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x3D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x3E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x3F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x48) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x49) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x4A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x4B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x4C) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x61) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x62) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x63) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x64) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x65) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x66) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x6A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x6B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x6D) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x6E) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x6F) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x72) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x73) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x74) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x75) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x78) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x79) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7A) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7B) },
        { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7C) },

        /* Motorola devices */
        { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2a70, 0xff, 0xff, 0xff) },        /* mdm6600 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2e0a, 0xff, 0xff, 0xff) },        /* mdm9600 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x4281, 0x0a, 0x00, 0xfc) },        /* mdm ram dl */
        { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x900e, 0xff, 0xff, 0xff) },        /* mdm qc dl */

        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V720) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U730) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U740) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U870) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_XU870) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_X950D) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EV620) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES720) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E725) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES620) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU730) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU740) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU870D) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC950D) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC727) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_OVMC760) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC780) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED3) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED4) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED5) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED6) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED7) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC996D) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MF3470) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC547) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED) },
        { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
        /* Novatel Ovation MC551 a.k.a. Verizon USB551L */
        { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) },

        { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
        { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
        { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H02) },
        { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_SKYPEPHONE_S2) },

        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD) },                /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite EV620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5500_MINICARD) },                /* Dell Wireless 5500 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5505_MINICARD) },                /* Dell Wireless 5505 Mobile Broadband HSDPA Mini-Card == Novatel Expedite EU740 HSDPA/3G */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_EXPRESSCARD) },                /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO ExpressCard == Novatel Merlin XV620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5510_EXPRESSCARD) },                /* Dell Wireless 5510 Mobile Broadband HSDPA ExpressCard == Novatel Merlin XU870 HSDPA/3G */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD_SPRINT) },        /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite E720 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5700_MINICARD_TELUS) },        /* Dell Wireless 5700 Mobile Broadband CDMA/EVDO Mini-Card == Novatel Expedite ET620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_VZW) },         /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_SPRINT) },         /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5720_MINICARD_TELUS) },         /* Dell Wireless 5720 == Novatel EV620 CDMA/EV-DO */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_CINGULAR) },        /* Dell Wireless HSDPA 5520 == Novatel Expedite EU860D */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_GENERIC_L) },        /* Dell Wireless HSDPA 5520 */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5520_MINICARD_GENERIC_I) },        /* Dell Wireless 5520 Voda I Mobile Broadband (3G HSDPA) Minicard */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) },        /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) },        /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) },         /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
        { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) },
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E),
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM),
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E),
          .driver_info = RSVD(0) | RSVD(6) },
        { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM),
          .driver_info = RSVD(0) | RSVD(6) },
        { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R_ESIM, 0xff) },
        { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) },        /* ADU-E100, ADU-310 */
        { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
        { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
        { USB_DEVICE(AXESSTEL_VENDOR_ID, AXESSTEL_PRODUCT_MV110H) },
        { USB_DEVICE(YISO_VENDOR_ID, YISO_PRODUCT_U893) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_C100_1, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_C100_2, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1004, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1005, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1006, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1007, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1008, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1009, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100A, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100B, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100C, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100D, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100E, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_100F, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1010, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1011, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1012, 0xff) },
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
        { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */
          .driver_info = RSVD(3) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000), /* SIMCom SIM5218 */
          .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | NCTRL(3) | RSVD(4) },
        /* Quectel products using Qualcomm vendor ID */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
          .driver_info = RSVD(4) },
        /* Yuga products use Qualcomm vendor ID */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
          .driver_info = RSVD(1) | RSVD(4) },
        /* u-blox products using Qualcomm vendor ID */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
          .driver_info = RSVD(1) | RSVD(3) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b),        /* u-blox LARA-R6 00B */
          .driver_info = RSVD(4) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa),
          .driver_info = RSVD(3) },
        /* u-blox products */
        { USB_DEVICE(UBLOX_VENDOR_ID, 0x1311) },        /* u-blox LARA-R6 01B */
        { USB_DEVICE(UBLOX_VENDOR_ID, 0x1312),                /* u-blox LARA-R6 01B (RMNET) */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(UBLOX_VENDOR_ID, 0x1313, 0xff) },        /* u-blox LARA-R6 01B (ECM) */
        { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) },        /* u-blox LARA-L6 */
        { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342),                /* u-blox LARA-L6 (RMNET) */
          .driver_info = RSVD(4) },
        { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343),                /* u-blox LARA-L6 (ECM) */
          .driver_info = RSVD(4) },
        /* Quectel products using Quectel vendor ID */
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0203, 0xff), /* BG95-M3 */
          .driver_info = ZLP },
        { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05GV2, 0xff),
          .driver_info = RSVD(4) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff),
          .driver_info = RSVD(6) | ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) },        /* EM160R-GL */
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) },
        { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */
          .driver_info = RSVD(3) | ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10),
          .driver_info = ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */
          .driver_info = ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200A, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG912Y, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) },

        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6004) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6005) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CGU_628A) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHE_628S),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_301),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_628),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_628S) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CDU_680) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CDU_685A) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_720S),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7002),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_629K),
          .driver_info = RSVD(4) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7004),
          .driver_info = RSVD(3) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7005) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CGU_629),
          .driver_info = RSVD(5) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_629S),
          .driver_info = RSVD(4) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_720I),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7212),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7213),
          .driver_info = RSVD(0) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7251),
          .driver_info = RSVD(1) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7252),
          .driver_info = RSVD(1) },
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7253),
          .driver_info = RSVD(1) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864E) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864G) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1031, 0xff),        /* Telit LE910C1-EUX */
         .driver_info = NCTRL(0) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1033, 0xff),        /* Telit LE910C1-EUX (ECM) */
         .driver_info = NCTRL(0) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1035, 0xff) }, /* Telit LE910C4-WWX (ECM) */
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0),
          .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG2),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
          .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1050, 0xff),        /* Telit FN980 (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1051, 0xff),        /* Telit FN980 (MBIM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1052, 0xff),        /* Telit FN980 (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1053, 0xff),        /* Telit FN980 (ECM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1054, 0xff),        /* Telit FT980-KS */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff),        /* Telit FN980 (PCIe) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff),        /* Telit FD980 */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff),        /* Telit FN980 */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff),        /* Telit FN980 (PCIe) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff),        /* Telit LN920 (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff),        /* Telit LN920 (MBIM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1062, 0xff),        /* Telit LN920 (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff),        /* Telit LN920 (ECM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff),        /* Telit FN990 (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff),        /* Telit FN990 (MBIM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff),        /* Telit FN990 (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff),        /* Telit FN990 (ECM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff),        /* Telit FN990 (PCIe) */
          .driver_info = RSVD(0) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff),        /* Telit FE990 (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff),        /* Telit FE990 (MBIM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff),        /* Telit FE990 (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff),        /* Telit FE990 (ECM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff),        /* Telit FN20C04 (rmnet) */
          .driver_info = RSVD(0) | NCTRL(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff),        /* Telit FN20C04 (rmnet) */
          .driver_info = RSVD(0) | NCTRL(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff),        /* Telit FN20C04 (rmnet) */
          .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
          .driver_info = NCTRL(0) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff),        /* Telit ME910 (ECM) */
          .driver_info = NCTRL(0) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff),        /* Telit ME910G1 */
          .driver_info = NCTRL(0) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff),        /* Telit ME910G1 (ECM) */
          .driver_info = NCTRL(0) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff),        /* Telit LE910Cx (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1204, 0xff),        /* Telit LE910Cx (MBIM) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(5) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1230, 0xff),        /* Telit LE910Cx (rmnet) */
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff),        /* Telit LE910Cx (RNDIS) */
          .driver_info = NCTRL(2) | RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) },        /* Telit LE910Cx (rmnet) */
        { USB_DEVICE(TELIT_VENDOR_ID, 0x1260),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x1261),
          .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x1900),                                /* Telit LN940 (QMI) */
          .driver_info = NCTRL(0) | RSVD(1) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff),        /* Telit LN940 (MBIM) */
          .driver_info = NCTRL(0) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff),        /* Telit LE910-S1 (RNDIS) */
          .driver_info = NCTRL(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff),        /* Telit LE910-S1 (ECM) */
          .driver_info = NCTRL(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff),        /* Telit LE910R1 (RNDIS) */
          .driver_info = NCTRL(2) },
        { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff),        /* Telit LE910R1 (ECM) */
          .driver_info = NCTRL(2) },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x9010),                                /* Telit SBL FN980 flashing device */
          .driver_info = NCTRL(0) | ZLP },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x9200),                                /* Telit LE910S1 flashing device */
          .driver_info = NCTRL(0) | ZLP },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x9201),                                /* Telit LE910R1 flashing device */
          .driver_info = NCTRL(0) | ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0003, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0004, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0005, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0006, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0008, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0009, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x000f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0010, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0011, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0012, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0013, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF628, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0016, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0022, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0023, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0024, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0025, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0028, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0029, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0030, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF626, 0xff, 0xff, 0xff),
          .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0032, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0033, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0034, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0037, 0xff, 0xff, 0xff),
          .driver_info = NCTRL(0) | NCTRL(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0038, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0039, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0040, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0042, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0043, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0044, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0048, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0049, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0050, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0051, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0052, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0054, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0055, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0056, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0057, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0058, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0061, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0062, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0063, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0064, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0065, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0066, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0067, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0069, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0076, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0077, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0078, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0079, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0082, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0083, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0086, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0087, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0088, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0089, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0090, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0091, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0092, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0093, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0094, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0095, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0096, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0097, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0104, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0105, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0106, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0108, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0113, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0117, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0118, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0121, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0122, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0123, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0124, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0125, 0xff, 0xff, 0xff),
          .driver_info = RSVD(6) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0126, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0128, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0135, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0136, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0137, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0139, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0142, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0143, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0144, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0145, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0148, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0151, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0153, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0155, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0156, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0157, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0158, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0159, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0161, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0162, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0164, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0197, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0199, 0xff, 0xff, 0xff), /* ZTE MF820S */
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0200, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0201, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0254, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0257, 0xff, 0xff, 0xff), /* ZTE MF821 */
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0265, 0xff, 0xff, 0xff), /* ONDA MT8205 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0284, 0xff, 0xff, 0xff), /* ZTE MF880 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0317, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0326, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0330, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0395, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0412, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0414, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0417, 0xff, 0xff, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0601, 0xff) },        /* GosunCn ZTE WeLink ME3630 (RNDIS mode) */
        { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x0602, 0xff) },        /* GosunCn ZTE WeLink ME3630 (MBIM mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1008, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1010, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1012, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1018, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1021, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1057, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1058, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1059, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1060, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1061, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1062, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1063, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1064, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1065, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1066, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1067, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1068, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1069, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1070, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1071, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1072, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1073, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1074, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1075, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1076, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1077, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1078, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1079, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1080, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1081, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1082, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1083, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1084, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1085, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1086, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1087, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1088, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1089, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1090, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1091, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1092, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1093, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1094, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1095, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1096, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1097, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1098, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1099, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1100, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1101, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1102, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1103, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1104, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1105, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1106, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1107, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1108, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1109, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1110, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1111, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1112, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1113, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1114, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1115, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1116, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1117, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1118, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1119, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1120, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1121, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1122, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1123, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1124, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1125, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1126, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1127, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1128, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1129, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1130, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1131, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1132, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1133, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1134, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1135, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1136, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1137, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1138, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1139, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1140, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1141, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1142, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1143, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1144, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1145, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1146, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1147, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1148, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1149, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1150, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1151, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1152, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1153, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1154, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1155, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1156, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1157, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1158, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1159, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1160, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1161, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1162, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1163, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1164, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1165, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1166, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1167, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1168, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1169, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1170, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1244, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1245, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1246, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1247, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1248, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1249, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1250, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1251, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1252, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1253, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1254, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1255, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) | RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1256, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1257, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1258, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1259, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1260, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1261, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1262, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1263, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1264, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1265, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1266, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1267, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1268, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1269, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1270, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1271, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1272, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1273, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1274, 0xff, 0xff, 0xff) },
        { USB_DEVICE(ZTE_VENDOR_ID, 0x1275),        /* ZTE P685M */
          .driver_info = RSVD(3) | RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1276, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1277, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1278, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1279, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1280, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1281, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1282, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1283, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1284, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1285, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1286, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1287, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1288, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1289, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1290, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1291, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1292, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1293, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1294, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1295, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1296, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1297, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1298, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1299, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1300, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1301, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1302, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1303, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1333, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1401, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1402, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1424, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1425, 0xff, 0xff, 0xff),
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1426, 0xff, 0xff, 0xff),  /* ZTE MF91 */
          .driver_info = RSVD(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff),  /* Telewell TW-LTE 4G v2 */
          .driver_info = RSVD(2) },
        { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) },        /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff),  /* ZTE MF286D */
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1545, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1546, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1547, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1565, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1566, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1567, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1589, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1590, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1591, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1592, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1594, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1596, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1598, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1600, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2002, 0xff, 0xff, 0xff),
          .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x2003, 0xff, 0xff, 0xff) },

        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0014, 0xff, 0xff, 0xff) }, /* ZTE CDMA products */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0027, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0059, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0060, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0141, 0xff, 0xff, 0xff),
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0147, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0152, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0168, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0170, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0176, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0178, 0xff, 0xff, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff42, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff43, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff44, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff45, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff46, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff47, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff48, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff49, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff4f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff50, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff51, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff52, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff53, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff54, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff55, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff56, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff57, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff58, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff59, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff5f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff60, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff61, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff62, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff63, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff64, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff65, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff66, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff67, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff68, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff69, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff6f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff70, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff71, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff72, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff73, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff74, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff75, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff76, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff77, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff78, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff79, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff7f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff80, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff81, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff82, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff83, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff84, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff85, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff86, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff87, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff88, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff89, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8a, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8b, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8c, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8d, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8e, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff8f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff90, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff91, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff92, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff93, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff94, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff9f, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa0, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa1, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa2, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa3, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa4, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa5, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa6, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa7, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa8, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffa9, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffaa, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffab, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffac, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffae, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffaf, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb0, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb1, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb2, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb3, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb4, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb5, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb6, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb7, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb8, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffb9, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffba, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbb, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbc, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbd, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbe, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffbf, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc0, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc1, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc2, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc3, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc4, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc5, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc6, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc7, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc8, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffc9, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffca, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcb, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcc, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcd, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffce, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffcf, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd0, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd1, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd2, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd3, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd4, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffd5, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffe9, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffec, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xffee, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfff6, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfff7, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfff8, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfff9, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfffb, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xfffc, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MG880, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710T, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2718, 0xff, 0xff, 0xff),
         .driver_info = NCTRL(1) | NCTRL(2) | NCTRL(3) | NCTRL(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AD3812, 0xff, 0xff, 0xff),
         .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff),
         .driver_info = NCTRL(1) | NCTRL(2) | NCTRL(3) },
        { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_L),
         .driver_info = RSVD(3) | RSVD(4) | RSVD(5) },
        { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_MBIM),
         .driver_info = RSVD(2) | RSVD(3) | RSVD(4) },
        { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ME3620_X),
         .driver_info = RSVD(3) | RSVD(4) | RSVD(5) },
        { USB_DEVICE(ZTE_VENDOR_ID, ZTE_PRODUCT_ZM8620_X),
         .driver_info = RSVD(3) | RSVD(4) | RSVD(5) },
        { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) },
        { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) },
        { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) },

        { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) },
        { USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) },
        { USB_DEVICE(ALINK_VENDOR_ID, DLINK_PRODUCT_DWM_652_U5) }, /* Yes, ALINK_VENDOR_ID */
        { USB_DEVICE(ALINK_VENDOR_ID, DLINK_PRODUCT_DWM_652_U5A) },
        { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H21_4512) },
        { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H21_4523) },
        { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H20_4515) },
        { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H20_4518) },
        { USB_DEVICE(QISDA_VENDOR_ID, QISDA_PRODUCT_H20_4519) },
        { USB_DEVICE(TOSHIBA_VENDOR_ID, TOSHIBA_PRODUCT_G450) },
        { USB_DEVICE(TOSHIBA_VENDOR_ID, TOSHIBA_PRODUCT_HSDPA_MINICARD ) }, /* Toshiba 3G HSDPA == Novatel Expedite EU870D MiniCard */
        { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) },
        { USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) },
        { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
        { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
          .driver_info = RSVD(5) | RSVD(6) },
        { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) },        /* Simcom SIM7500/SIM7600 MBIM mode */
        { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9011, 0xff),        /* Simcom SIM7500/SIM7600 RNDIS mode */
          .driver_info = RSVD(7) },
        { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9205, 0xff) },        /* Simcom SIM7070/SIM7080/SIM7090 AT+ECM mode */
        { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9206, 0xff) },        /* Simcom SIM7070/SIM7080/SIM7090 AT-only mode */
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
          .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D),
          .driver_info = RSVD(6) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, 0x0052),
          .driver_info = RSVD(6) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, 0x00b6),
          .driver_info = RSVD(3) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, 0x00b7),
          .driver_info = RSVD(5) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_L100V),
          .driver_info = RSVD(4) },
        { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_L800MA),
          .driver_info = RSVD(2) },
        { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
        { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE),
          .driver_info = RSVD(0) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
          .driver_info = NCTRL(0) | NCTRL(1) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
          .driver_info = NCTRL(1) | NCTRL(2) | RSVD(3) },
        {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U),
         .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff),
          .driver_info = RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05),        /* Longsung U8300 */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c),        /* Longsung U9300 */
          .driver_info = RSVD(0) | RSVD(4) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
        { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
        { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
        { USB_DEVICE_AND_INTERFACE_INFO(HAIER_VENDOR_ID, HAIER_PRODUCT_CE81B, 0xff, 0xff, 0xff) },
        /* Pirelli  */
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_1, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_2, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1004, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1005, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1006, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1007, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1008, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1009, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100A, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100B, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100C, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100D, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100E, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_100F, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1011, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_1012, 0xff) },
        /* Cinterion */
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_E) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EU3_P) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8),
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PLXX),
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8_2RMNET, 0xff),
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_PH8_AUDIO, 0xff),
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_2RMNET, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff),
          .driver_info = RSVD(0) | RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EXS82, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDMNET) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, /* HC28 enumerates with Siemens or Cinterion VID depending on FW revision */
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_MBIM, 0xff),
          .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
          .driver_info = RSVD(0)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff),
          .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff),
          .driver_info = RSVD(0)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
          .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA_RMNET, 0xff),
          .driver_info = RSVD(0) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
          .driver_info = RSVD(3)},
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB_RMNET, 0xff),
          .driver_info = RSVD(0) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100),
          .driver_info = RSVD(4) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120),
          .driver_info = RSVD(4) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD140),
          .driver_info = RSVD(4) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD155),
          .driver_info = RSVD(6) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200),
          .driver_info = RSVD(6) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD160),
          .driver_info = RSVD(6) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD500),
          .driver_info = RSVD(4) },
        { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
        { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU881) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU882) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU581) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU582) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU583) },
        { USB_DEVICE_AND_INTERFACE_INFO(VIETTEL_VENDOR_ID, VIETTEL_PRODUCT_VT1000, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZD_VENDOR_ID, ZD_PRODUCT_7000, 0xff, 0xff, 0xff) },
        { USB_DEVICE(LG_VENDOR_ID, LG_PRODUCT_L02C) }, /* docomo L-02C modem */
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a1, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x00a2, 0xff, 0x02, 0x01) },        /* MediaTek MT6276M modem & app port */
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_1COM, 0x0a, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_5COM, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_1COM, 0x02, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7208_2COM, 0x02, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_1COM, 0x0a, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FP_2COM, 0x0a, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_1COM, 0x0a, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_FPDC_2COM, 0x0a, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7103_2COM, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7106_2COM, 0x02, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) },
        { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
        { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MPL200),
          .driver_info = RSVD(1) | RSVD(4) },
        { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) },
        { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) },
        { USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, TPLINK_PRODUCT_LTE, 0xff, 0x00, 0x00) },        /* TP-Link LTE Module */
        { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180),
          .driver_info = RSVD(4) },
        { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000),                                        /* TP-Link MA260 */
          .driver_info = RSVD(4) },
        { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d01, 0xff) },                        /* D-Link DWM-156 (variant) */
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d02, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d03, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff),                        /* D-Link DWM-158 */
         .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d0e, 0xff) },                        /* D-Link DWM-157 C1 */
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff),                        /* D-Link DWM-221 B1 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff),                        /* D-Link DWM-222 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e3d, 0xff),                        /* D-Link DWM-222 A2 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) },        /* D-Link DWM-152/C1 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) },        /* D-Link DWM-156/C1 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) },        /* D-Link DWM-156/A3 */
        { USB_DEVICE_INTERFACE_CLASS(0x1435, 0xd191, 0xff),                        /* Wistron Neweb D19Q1 */
          .driver_info = RSVD(1) | RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x1690, 0x7588, 0xff),                        /* ASKEY WWHC050 */
          .driver_info = RSVD(1) | RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff),                        /* Olicard 600 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2033, 0xff),                        /* BroadMobi BM806U */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff),                        /* BroadMobi BM818 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                        /* OLICARD300 - MT6225 */
        { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
        { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
        { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) },        /* HP lt2523 (Novatel E371) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x10) },        /* HP lt4132 (Huawei ME906s-158) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x12) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) },
        { USB_DEVICE(0x0489, 0xe0b4),                                                /* Foxconn T77W968 */
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
        { USB_DEVICE(0x0489, 0xe0b5),                                                /* Foxconn T77W968 ESIM */
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
        { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0da, 0xff),                     /* Foxconn T99W265 MBIM variant */
          .driver_info = RSVD(3) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0db, 0xff),                        /* Foxconn T99W265 MBIM */
          .driver_info = RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0ee, 0xff),                        /* Foxconn T99W368 MBIM */
          .driver_info = RSVD(3) },
        { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0f0, 0xff),                        /* Foxconn T99W373 MBIM */
          .driver_info = RSVD(3) },
        { USB_DEVICE(0x1508, 0x1001),                                                /* Fibocom NL668 (IOT version) */
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
        { USB_DEVICE(0x1782, 0x4d10) },                                                /* Fibocom L610 (AT mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) },                        /* Fibocom L610 (ECM/RNDIS mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) },        /* Fibocom L716-EU (ECM/RNDIS mode) */
        { USB_DEVICE(0x2cb7, 0x0104),                                                /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                        /* Fibocom NL678 series */
          .driver_info = RSVD(6) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) },                        /* Fibocom MA510 (ECM mode w/ diag intf.) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) },                        /* Fibocom MA510 (ECM mode) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) },        /* Fibocom FG150 Diag */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) },                /* Fibocom FG150 AT */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) },                        /* Fibocom FM160 (MBIM mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff),                        /* Fibocom FM135 (laptop MBIM) */
          .driver_info = RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                        /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) },                        /* Fibocom FM101-GL (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) },                        /* Fibocom FM101-GL (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff),                        /* Fibocom FM101-GL (laptop MBIM) */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) },                        /* Fibocom FM650-CN (ECM mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) },                        /* Fibocom FM650-CN (NCM mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) },                        /* Fibocom FM650-CN (RNDIS mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) },                        /* Fibocom FM650-CN (MBIM mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) },                        /* LongSung M5710 */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) },                        /* GosunCn GM500 RNDIS */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) },                        /* GosunCn GM500 MBIM */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) },                        /* GosunCn GM500 ECM/NCM */
        { USB_DEVICE(0x33f8, 0x0104),                                                /* Rolling RW101-GL (laptop RMNET) */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) },                        /* Rolling RW101-GL (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) },                        /* Rolling RW101-GL (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff),                        /* Rolling RW101-GL (laptop MBIM) */
          .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff),                        /* Rolling RW135-GL (laptop MBIM) */
          .driver_info = RSVD(5) },
        { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) },
        { } /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, option_ids);

/* The card has three separate interfaces, which the serial driver
 * recognizes separately, thus num_port=1.
 */

static struct usb_serial_driver option_1port_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "option1",
        },
        .description       = "GSM modem (1-port)",
        .id_table          = option_ids,
        .num_ports         = 1,
        .probe             = option_probe,
        .open              = usb_wwan_open,
        .close             = usb_wwan_close,
        .dtr_rts           = usb_wwan_dtr_rts,
        .write             = usb_wwan_write,
        .write_room        = usb_wwan_write_room,
        .chars_in_buffer   = usb_wwan_chars_in_buffer,
        .tiocmget          = usb_wwan_tiocmget,
        .tiocmset          = usb_wwan_tiocmset,
        .attach            = option_attach,
        .release           = option_release,
        .port_probe        = usb_wwan_port_probe,
        .port_remove           = usb_wwan_port_remove,
        .read_int_callback = option_instat_callback,
#ifdef CONFIG_PM
        .suspend           = usb_wwan_suspend,
        .resume            = usb_wwan_resume,
#endif
};

static struct usb_serial_driver * const serial_drivers[] = {
        &option_1port_device, NULL
};

module_usb_serial_driver(serial_drivers, option_ids);

static bool iface_is_reserved(unsigned long device_flags, u8 ifnum)
{
        if (ifnum > FLAG_IFNUM_MAX)
                return false;

        return device_flags & RSVD(ifnum);
}

static int option_probe(struct usb_serial *serial,
                        const struct usb_device_id *id)
{
        struct usb_interface_descriptor *iface_desc =
                                &serial->interface->cur_altsetting->desc;
        unsigned long device_flags = id->driver_info;

        /* Never bind to the CD-Rom emulation interface        */
        if (iface_desc->bInterfaceClass == USB_CLASS_MASS_STORAGE)
                return -ENODEV;

        /*
         * Don't bind reserved interfaces (like network ones) which often have
         * the same class/subclass/protocol as the serial interfaces.  Look at
         * the Windows driver .INF files for reserved interface numbers.
         */
        if (iface_is_reserved(device_flags, iface_desc->bInterfaceNumber))
                return -ENODEV;

        /*
         * Allow matching on bNumEndpoints for devices whose interface numbers
         * can change (e.g. Quectel EP06).
         */
        if (device_flags & NUMEP2 && iface_desc->bNumEndpoints != 2)
                return -ENODEV;

        /* Store the device flags so we can use them during attach. */
        usb_set_serial_data(serial, (void *)device_flags);

        return 0;
}

static bool iface_no_modem_control(unsigned long device_flags, u8 ifnum)
{
        if (ifnum > FLAG_IFNUM_MAX)
                return false;

        return device_flags & NCTRL(ifnum);
}

static int option_attach(struct usb_serial *serial)
{
        struct usb_interface_descriptor *iface_desc;
        struct usb_wwan_intf_private *data;
        unsigned long device_flags;

        data = kzalloc(sizeof(struct usb_wwan_intf_private), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        /* Retrieve device flags stored at probe. */
        device_flags = (unsigned long)usb_get_serial_data(serial);

        iface_desc = &serial->interface->cur_altsetting->desc;

        if (!iface_no_modem_control(device_flags, iface_desc->bInterfaceNumber))
                data->use_send_setup = 1;

        if (device_flags & ZLP)
                data->use_zlp = 1;

        spin_lock_init(&data->susp_lock);

        usb_set_serial_data(serial, data);

        return 0;
}

static void option_release(struct usb_serial *serial)
{
        struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial);

        kfree(intfdata);
}

static void option_instat_callback(struct urb *urb)
{
        int err;
        int status = urb->status;
        struct usb_serial_port *port = urb->context;
        struct device *dev = &port->dev;
        struct usb_wwan_port_private *portdata =
                                        usb_get_serial_port_data(port);

        dev_dbg(dev, "%s: urb %p port %p has data %p\n", __func__, urb, port, portdata);

        if (status == 0) {
                struct usb_ctrlrequest *req_pkt = urb->transfer_buffer;

                if (!req_pkt) {
                        dev_dbg(dev, "%s: NULL req_pkt\n", __func__);
                        return;
                }
                if ((req_pkt->bRequestType == 0xA1) &&
                                (req_pkt->bRequest == 0x20)) {
                        int old_dcd_state;
                        unsigned char signals = *((unsigned char *)
                                        urb->transfer_buffer +
                                        sizeof(struct usb_ctrlrequest));

                        dev_dbg(dev, "%s: signal x%x\n", __func__, signals);

                        old_dcd_state = portdata->dcd_state;
                        portdata->cts_state = 1;
                        portdata->dcd_state = ((signals & 0x01) ? 1 : 0);
                        portdata->dsr_state = ((signals & 0x02) ? 1 : 0);
                        portdata->ri_state = ((signals & 0x08) ? 1 : 0);

                        if (old_dcd_state && !portdata->dcd_state)
                                tty_port_tty_hangup(&port->port, true);
                } else {
                        dev_dbg(dev, "%s: type %x req %x\n", __func__,
                                req_pkt->bRequestType, req_pkt->bRequest);
                }
        } else if (status == -ENOENT || status == -ESHUTDOWN) {
                dev_dbg(dev, "%s: urb stopped: %d\n", __func__, status);
        } else
                dev_dbg(dev, "%s: error %d\n", __func__, status);

        /* Resubmit urb so we continue receiving IRQ data */
        if (status != -ESHUTDOWN && status != -ENOENT) {
                usb_mark_last_busy(port->serial->dev);
                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err)
                        dev_dbg(dev, "%s: resubmit intr urb failed. (%d)\n",
                                __func__, err);
        }
}

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL v2");













































































































































































































































































































































































































































































































































    7 




    7 































    8 





















































   61 








































































































































   79 









































   26 




   94 









    8 





























































































    4 


























   10 





   39 












































































































































  101 






















































































































































































































































































   61 






































   59 

















    4 

































   52 

















    4 

















    4 















    1 













    1 























    1 


    1 


















   20 





   18 





























    9 























   23 






































   54 




   41 




   18 


















































































   54 















































   41 




   18 


















































































































































































































































































































































































































































































   48 
   50 




   16 




















































































































































































    3 
































































































































































   27 










































































































































































































































   16 



















   15 





























































    9 








    4 








    6 









   20 




















































   53 




















































































   61 









































































































































































































































































































    6 
   55 
    6 
   56 
    6 























































































































































    4 
    4 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FS_H
#define _LINUX_FS_H

#include <linux/linkage.h>
#include <linux/wait_bit.h>
#include <linux/kdev_t.h>
#include <linux/dcache.h>
#include <linux/path.h>
#include <linux/stat.h>
#include <linux/cache.h>
#include <linux/list.h>
#include <linux/list_lru.h>
#include <linux/llist.h>
#include <linux/radix-tree.h>
#include <linux/xarray.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/pid.h>
#include <linux/bug.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/mm_types.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fcntl.h>
#include <linux/rculist_bl.h>
#include <linux/atomic.h>
#include <linux/shrinker.h>
#include <linux/migrate_mode.h>
#include <linux/uidgid.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
#include <linux/delayed_call.h>
#include <linux/uuid.h>
#include <linux/errseq.h>
#include <linux/ioprio.h>
#include <linux/fs_types.h>
#include <linux/build_bug.h>
#include <linux/stddef.h>
#include <linux/mount.h>
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
#include <linux/maple_tree.h>
#include <linux/rw_hint.h>

#include <asm/byteorder.h>
#include <uapi/linux/fs.h>

struct backing_dev_info;
struct bdi_writeback;
struct bio;
struct io_comp_batch;
struct export_operations;
struct fiemap_extent_info;
struct hd_geometry;
struct iovec;
struct kiocb;
struct kobject;
struct pipe_inode_info;
struct poll_table_struct;
struct kstatfs;
struct vm_area_struct;
struct vfsmount;
struct cred;
struct swap_info_struct;
struct seq_file;
struct workqueue_struct;
struct iov_iter;
struct fscrypt_inode_info;
struct fscrypt_operations;
struct fsverity_info;
struct fsverity_operations;
struct fs_context;
struct fs_parameter_spec;
struct fileattr;
struct iomap_ops;

extern void __init inode_init(void);
extern void __init inode_init_early(void);
extern void __init files_init(void);
extern void __init files_maxfiles_init(void);

extern unsigned long get_max_files(void);
extern unsigned int sysctl_nr_open;

typedef __kernel_rwf_t rwf_t;

struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create);
typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                        ssize_t bytes, void *private);

#define MAY_EXEC                0x00000001
#define MAY_WRITE                0x00000002
#define MAY_READ                0x00000004
#define MAY_APPEND                0x00000008
#define MAY_ACCESS                0x00000010
#define MAY_OPEN                0x00000020
#define MAY_CHDIR                0x00000040
/* called from RCU mode, don't block */
#define MAY_NOT_BLOCK                0x00000080

/*
 * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
 * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
 */

/* file is open for reading */
#define FMODE_READ                ((__force fmode_t)0x1)
/* file is open for writing */
#define FMODE_WRITE                ((__force fmode_t)0x2)
/* file is seekable */
#define FMODE_LSEEK                ((__force fmode_t)0x4)
/* file can be accessed using pread */
#define FMODE_PREAD                ((__force fmode_t)0x8)
/* file can be accessed using pwrite */
#define FMODE_PWRITE                ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC                ((__force fmode_t)0x20)
/* File writes are restricted (block device specific) */
#define FMODE_WRITE_RESTRICTED  ((__force fmode_t)0x40)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH         ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
#define FMODE_64BITHASH         ((__force fmode_t)0x400)

/*
 * Don't update ctime and mtime.
 *
 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
 */
#define FMODE_NOCMTIME                ((__force fmode_t)0x800)

/* Expect random access pattern */
#define FMODE_RANDOM                ((__force fmode_t)0x1000)

/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET        ((__force fmode_t)0x2000)

/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH                ((__force fmode_t)0x4000)

/* File needs atomic accesses to f_pos */
#define FMODE_ATOMIC_POS        ((__force fmode_t)0x8000)
/* Write access to underlying fs */
#define FMODE_WRITER                ((__force fmode_t)0x10000)
/* Has read method(s) */
#define FMODE_CAN_READ          ((__force fmode_t)0x20000)
/* Has write method(s) */
#define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)

#define FMODE_OPENED                ((__force fmode_t)0x80000)
#define FMODE_CREATED                ((__force fmode_t)0x100000)

/* File is stream-like */
#define FMODE_STREAM                ((__force fmode_t)0x200000)

/* File supports DIRECT IO */
#define        FMODE_CAN_ODIRECT        ((__force fmode_t)0x400000)

#define        FMODE_NOREUSE                ((__force fmode_t)0x800000)

/* File supports non-exclusive O_DIRECT writes from multiple threads */
#define FMODE_DIO_PARALLEL_WRITE        ((__force fmode_t)0x1000000)

/* File is embedded in backing_file object */
#define FMODE_BACKING                ((__force fmode_t)0x2000000)

/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY                ((__force fmode_t)0x4000000)

/* File is capable of returning -EAGAIN if I/O will block */
#define FMODE_NOWAIT                ((__force fmode_t)0x8000000)

/* File represents mount that needs unmounting */
#define FMODE_NEED_UNMOUNT        ((__force fmode_t)0x10000000)

/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT                ((__force fmode_t)0x20000000)

/* File supports async buffered reads */
#define FMODE_BUF_RASYNC        ((__force fmode_t)0x40000000)

/* File supports async nowait buffered writes */
#define FMODE_BUF_WASYNC        ((__force fmode_t)0x80000000)

/*
 * Attribute flags.  These should be or-ed together to figure out what
 * has been changed!
 */
#define ATTR_MODE        (1 << 0)
#define ATTR_UID        (1 << 1)
#define ATTR_GID        (1 << 2)
#define ATTR_SIZE        (1 << 3)
#define ATTR_ATIME        (1 << 4)
#define ATTR_MTIME        (1 << 5)
#define ATTR_CTIME        (1 << 6)
#define ATTR_ATIME_SET        (1 << 7)
#define ATTR_MTIME_SET        (1 << 8)
#define ATTR_FORCE        (1 << 9) /* Not a change, but a change it */
#define ATTR_KILL_SUID        (1 << 11)
#define ATTR_KILL_SGID        (1 << 12)
#define ATTR_FILE        (1 << 13)
#define ATTR_KILL_PRIV        (1 << 14)
#define ATTR_OPEN        (1 << 15) /* Truncating from open(O_TRUNC) */
#define ATTR_TIMES_SET        (1 << 16)
#define ATTR_TOUCH        (1 << 17)

/*
 * Whiteout is represented by a char device.  The following constants define the
 * mode and device number to use.
 */
#define WHITEOUT_MODE 0
#define WHITEOUT_DEV 0

/*
 * This is the Inode Attributes structure, used for notify_change().  It
 * uses the above definitions as flags, to know which values have changed.
 * Also, in this manner, a Filesystem can look at only the values it cares
 * about.  Basically, these are the attributes that the VFS layer can
 * request to change from the FS layer.
 *
 * Derek Atkins <warlord@MIT.EDU> 94-10-20
 */
struct iattr {
        unsigned int        ia_valid;
        umode_t                ia_mode;
        /*
         * The two anonymous unions wrap structures with the same member.
         *
         * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
         * are a dedicated type requiring the filesystem to use the dedicated
         * helpers. Other filesystem can continue to use ia_{g,u}id until they
         * have been ported.
         *
         * They always contain the same value. In other words FS_ALLOW_IDMAP
         * pass down the same value on idmapped mounts as they would on regular
         * mounts.
         */
        union {
                kuid_t                ia_uid;
                vfsuid_t        ia_vfsuid;
        };
        union {
                kgid_t                ia_gid;
                vfsgid_t        ia_vfsgid;
        };
        loff_t                ia_size;
        struct timespec64 ia_atime;
        struct timespec64 ia_mtime;
        struct timespec64 ia_ctime;

        /*
         * Not an attribute, but an auxiliary info for filesystems wanting to
         * implement an ftruncate() like method.  NOTE: filesystem should
         * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
         */
        struct file        *ia_file;
};

/*
 * Includes for diskquotas.
 */
#include <linux/quota.h>

/*
 * Maximum number of layers of fs stack.  Needs to be limited to
 * prevent kernel stack overflow
 */
#define FILESYSTEM_MAX_STACK_DEPTH 2

/** 
 * enum positive_aop_returns - aop return codes with specific semantics
 *
 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
 *                             completed, that the page is still locked, and
 *                             should be considered active.  The VM uses this hint
 *                             to return the page to the active list -- it won't
 *                             be a candidate for writeback again in the near
 *                             future.  Other callers must be careful to unlock
 *                             the page if they get this return.  Returned by
 *                             writepage(); 
 *
 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
 *                          unlocked it and the page might have been truncated.
 *                          The caller should back up to acquiring a new page and
 *                          trying again.  The aop will be taking reasonable
 *                          precautions not to livelock.  If the caller held a page
 *                          reference, it should drop it before retrying.  Returned
 *                          by read_folio().
 *
 * address_space_operation functions return these large constants to indicate
 * special semantics to the caller.  These are much larger than the bytes in a
 * page to allow for functions that return the number of bytes operated on in a
 * given page.
 */

enum positive_aop_returns {
        AOP_WRITEPAGE_ACTIVATE        = 0x80000,
        AOP_TRUNCATED_PAGE        = 0x80001,
};

/*
 * oh the beauties of C type declarations.
 */
struct page;
struct address_space;
struct writeback_control;
struct readahead_control;

/* Match RWF_* bits to IOCB bits */
#define IOCB_HIPRI                (__force int) RWF_HIPRI
#define IOCB_DSYNC                (__force int) RWF_DSYNC
#define IOCB_SYNC                (__force int) RWF_SYNC
#define IOCB_NOWAIT                (__force int) RWF_NOWAIT
#define IOCB_APPEND                (__force int) RWF_APPEND

/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD                (1 << 16)
#define IOCB_DIRECT                (1 << 17)
#define IOCB_WRITE                (1 << 18)
/* iocb->ki_waitq is valid */
#define IOCB_WAITQ                (1 << 19)
#define IOCB_NOIO                (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE        (1 << 21)
/*
 * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
 * iocb completion can be passed back to the owner for execution from a safe
 * context rather than needing to be punted through a workqueue. If this
 * flag is set, the bio completion handling may set iocb->dio_complete to a
 * handler function and iocb->private to context information for that handler.
 * The issuer should call the handler with that context information from task
 * context to complete the processing of the iocb. Note that while this
 * provides a task context for the dio_complete() callback, it should only be
 * used on the completion side for non-IO generating completions. It's fine to
 * call blocking functions from this callback, but they should not wait for
 * unrelated IO (like cache flushing, new IO generation, etc).
 */
#define IOCB_DIO_CALLER_COMP        (1 << 22)
/* kiocb is a read or write operation submitted by fs/aio.c. */
#define IOCB_AIO_RW                (1 << 23)

/* for use in trace events */
#define TRACE_IOCB_STRINGS \
        { IOCB_HIPRI,                "HIPRI" }, \
        { IOCB_DSYNC,                "DSYNC" }, \
        { IOCB_SYNC,                "SYNC" }, \
        { IOCB_NOWAIT,                "NOWAIT" }, \
        { IOCB_APPEND,                "APPEND" }, \
        { IOCB_EVENTFD,                "EVENTFD"}, \
        { IOCB_DIRECT,                "DIRECT" }, \
        { IOCB_WRITE,                "WRITE" }, \
        { IOCB_WAITQ,                "WAITQ" }, \
        { IOCB_NOIO,                "NOIO" }, \
        { IOCB_ALLOC_CACHE,        "ALLOC_CACHE" }, \
        { IOCB_DIO_CALLER_COMP,        "CALLER_COMP" }

struct kiocb {
        struct file                *ki_filp;
        loff_t                        ki_pos;
        void (*ki_complete)(struct kiocb *iocb, long ret);
        void                        *private;
        int                        ki_flags;
        u16                        ki_ioprio; /* See linux/ioprio.h */
        union {
                /*
                 * Only used for async buffered reads, where it denotes the
                 * page waitqueue associated with completing the read. Valid
                 * IFF IOCB_WAITQ is set.
                 */
                struct wait_page_queue        *ki_waitq;
                /*
                 * Can be used for O_DIRECT IO, where the completion handling
                 * is punted back to the issuer of the IO. May only be set
                 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
                 * must then check for presence of this handler when ki_complete
                 * is invoked. The data passed in to this handler must be
                 * assigned to ->private when dio_complete is assigned.
                 */
                ssize_t (*dio_complete)(void *data);
        };
};

static inline bool is_sync_kiocb(struct kiocb *kiocb)
{
        return kiocb->ki_complete == NULL;
}

struct address_space_operations {
        int (*writepage)(struct page *page, struct writeback_control *wbc);
        int (*read_folio)(struct file *, struct folio *);

        /* Write back some dirty pages from this mapping. */
        int (*writepages)(struct address_space *, struct writeback_control *);

        /* Mark a folio dirty.  Return true if this dirtied it */
        bool (*dirty_folio)(struct address_space *, struct folio *);

        void (*readahead)(struct readahead_control *);

        int (*write_begin)(struct file *, struct address_space *mapping,
                                loff_t pos, unsigned len,
                                struct page **pagep, void **fsdata);
        int (*write_end)(struct file *, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned copied,
                                struct page *page, void *fsdata);

        /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
        sector_t (*bmap)(struct address_space *, sector_t);
        void (*invalidate_folio) (struct folio *, size_t offset, size_t len);
        bool (*release_folio)(struct folio *, gfp_t);
        void (*free_folio)(struct folio *folio);
        ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
        /*
         * migrate the contents of a folio to the specified target. If
         * migrate_mode is MIGRATE_ASYNC, it must not block.
         */
        int (*migrate_folio)(struct address_space *, struct folio *dst,
                        struct folio *src, enum migrate_mode);
        int (*launder_folio)(struct folio *);
        bool (*is_partially_uptodate) (struct folio *, size_t from,
                        size_t count);
        void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
        int (*error_remove_folio)(struct address_space *, struct folio *);

        /* swapfile support */
        int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
                                sector_t *span);
        void (*swap_deactivate)(struct file *file);
        int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
};

extern const struct address_space_operations empty_aops;

/**
 * struct address_space - Contents of a cacheable, mappable object.
 * @host: Owner, either the inode or the block_device.
 * @i_pages: Cached pages.
 * @invalidate_lock: Guards coherency between page cache contents and
 *   file offset->disk block mappings in the filesystem during invalidates.
 *   It is also used to block modification of page cache contents through
 *   memory mappings.
 * @gfp_mask: Memory allocation flags to use for allocating pages.
 * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
 * @nr_thps: Number of THPs in the pagecache (non-shmem only).
 * @i_mmap: Tree of private and shared mappings.
 * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
 * @nrpages: Number of page entries, protected by the i_pages lock.
 * @writeback_index: Writeback starts here.
 * @a_ops: Methods.
 * @flags: Error bits and flags (AS_*).
 * @wb_err: The most recent error which has occurred.
 * @i_private_lock: For use by the owner of the address_space.
 * @i_private_list: For use by the owner of the address_space.
 * @i_private_data: For use by the owner of the address_space.
 */
struct address_space {
        struct inode                *host;
        struct xarray                i_pages;
        struct rw_semaphore        invalidate_lock;
        gfp_t                        gfp_mask;
        atomic_t                i_mmap_writable;
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
        /* number of thp, only for non-shmem files */
        atomic_t                nr_thps;
#endif
        struct rb_root_cached        i_mmap;
        unsigned long                nrpages;
        pgoff_t                        writeback_index;
        const struct address_space_operations *a_ops;
        unsigned long                flags;
        errseq_t                wb_err;
        spinlock_t                i_private_lock;
        struct list_head        i_private_list;
        struct rw_semaphore        i_mmap_rwsem;
        void *                        i_private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
        /*
         * On most architectures that alignment is already the case; but
         * must be enforced here for CRIS, to let the least significant bit
         * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
         */

/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
#define PAGECACHE_TAG_DIRTY        XA_MARK_0
#define PAGECACHE_TAG_WRITEBACK        XA_MARK_1
#define PAGECACHE_TAG_TOWRITE        XA_MARK_2

/*
 * Returns true if any of the pages in the mapping are marked with the tag.
 */
static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
{
        return xa_marked(&mapping->i_pages, tag);
}

static inline void i_mmap_lock_write(struct address_space *mapping)
{
        down_write(&mapping->i_mmap_rwsem);
}

static inline int i_mmap_trylock_write(struct address_space *mapping)
{
        return down_write_trylock(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_unlock_write(struct address_space *mapping)
{
        up_write(&mapping->i_mmap_rwsem);
}

static inline int i_mmap_trylock_read(struct address_space *mapping)
{
        return down_read_trylock(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_lock_read(struct address_space *mapping)
{
        down_read(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_unlock_read(struct address_space *mapping)
{
        up_read(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_assert_locked(struct address_space *mapping)
{
        lockdep_assert_held(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_assert_write_locked(struct address_space *mapping)
{
        lockdep_assert_held_write(&mapping->i_mmap_rwsem);
}

/*
 * Might pages of this file be mapped into userspace?
 */
static inline int mapping_mapped(struct address_space *mapping)
{
        return        !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
}

/*
 * Might pages of this file have been modified in userspace?
 * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
 * marks vma as VM_SHARED if it is shared, and the file was opened for
 * writing i.e. vma may be mprotected writable even if now readonly.
 *
 * If i_mmap_writable is negative, no new writable mappings are allowed. You
 * can only deny writable mappings, if none exists right now.
 */
static inline int mapping_writably_mapped(struct address_space *mapping)
{
        return atomic_read(&mapping->i_mmap_writable) > 0;
}

static inline int mapping_map_writable(struct address_space *mapping)
{
        return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
                0 : -EPERM;
}

static inline void mapping_unmap_writable(struct address_space *mapping)
{
        atomic_dec(&mapping->i_mmap_writable);
}

static inline int mapping_deny_writable(struct address_space *mapping)
{
        return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
                0 : -EBUSY;
}

static inline void mapping_allow_writable(struct address_space *mapping)
{
        atomic_inc(&mapping->i_mmap_writable);
}

/*
 * Use sequence counter to get consistent i_size on 32-bit processors.
 */
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __NEED_I_SIZE_ORDERED
#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
#else
#define i_size_ordered_init(inode) do { } while (0)
#endif

struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
/*
 * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
 * cache the ACL.  This also means that ->get_inode_acl() can be called in RCU
 * mode with the LOOKUP_RCU flag.
 */
#define ACL_DONT_CACHE ((void *)(-3))

static inline struct posix_acl *
uncached_acl_sentinel(struct task_struct *task)
{
        return (void *)task + 1;
}

static inline bool
is_uncached_acl(struct posix_acl *acl)
{
        return (long)acl & 1;
}

#define IOP_FASTPERM        0x0001
#define IOP_LOOKUP        0x0002
#define IOP_NOFOLLOW        0x0004
#define IOP_XATTR        0x0008
#define IOP_DEFAULT_READLINK        0x0010

struct fsnotify_mark_connector;

/*
 * Keep mostly read-only and often accessed (especially for
 * the RCU path lookup and 'stat' data) fields at the beginning
 * of the 'struct inode'
 */
struct inode {
        umode_t                        i_mode;
        unsigned short                i_opflags;
        kuid_t                        i_uid;
        kgid_t                        i_gid;
        unsigned int                i_flags;

#ifdef CONFIG_FS_POSIX_ACL
        struct posix_acl        *i_acl;
        struct posix_acl        *i_default_acl;
#endif

        const struct inode_operations        *i_op;
        struct super_block        *i_sb;
        struct address_space        *i_mapping;

#ifdef CONFIG_SECURITY
        void                        *i_security;
#endif

        /* Stat data, not accessed from path walking */
        unsigned long                i_ino;
        /*
         * Filesystems may only read i_nlink directly.  They shall use the
         * following functions for modification:
         *
         *    (set|clear|inc|drop)_nlink
         *    inode_(inc|dec)_link_count
         */
        union {
                const unsigned int i_nlink;
                unsigned int __i_nlink;
        };
        dev_t                        i_rdev;
        loff_t                        i_size;
        struct timespec64        __i_atime;
        struct timespec64        __i_mtime;
        struct timespec64        __i_ctime; /* use inode_*_ctime accessors! */
        spinlock_t                i_lock;        /* i_blocks, i_bytes, maybe i_size */
        unsigned short          i_bytes;
        u8                        i_blkbits;
        enum rw_hint                i_write_hint;
        blkcnt_t                i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
        seqcount_t                i_size_seqcount;
#endif

        /* Misc */
        unsigned long                i_state;
        struct rw_semaphore        i_rwsem;

        unsigned long                dirtied_when;        /* jiffies of first dirtying */
        unsigned long                dirtied_time_when;

        struct hlist_node        i_hash;
        struct list_head        i_io_list;        /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK
        struct bdi_writeback        *i_wb;                /* the associated cgroup wb */

        /* foreign inode detection, see wbc_detach_inode() */
        int                        i_wb_frn_winner;
        u16                        i_wb_frn_avg_time;
        u16                        i_wb_frn_history;
#endif
        struct list_head        i_lru;                /* inode LRU list */
        struct list_head        i_sb_list;
        struct list_head        i_wb_list;        /* backing dev writeback list */
        union {
                struct hlist_head        i_dentry;
                struct rcu_head                i_rcu;
        };
        atomic64_t                i_version;
        atomic64_t                i_sequence; /* see futex */
        atomic_t                i_count;
        atomic_t                i_dio_count;
        atomic_t                i_writecount;
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
        atomic_t                i_readcount; /* struct files open RO */
#endif
        union {
                const struct file_operations        *i_fop;        /* former ->i_op->default_file_ops */
                void (*free_inode)(struct inode *);
        };
        struct file_lock_context        *i_flctx;
        struct address_space        i_data;
        struct list_head        i_devices;
        union {
                struct pipe_inode_info        *i_pipe;
                struct cdev                *i_cdev;
                char                        *i_link;
                unsigned                i_dir_seq;
        };

        __u32                        i_generation;

#ifdef CONFIG_FSNOTIFY
        __u32                        i_fsnotify_mask; /* all events this inode cares about */
        struct fsnotify_mark_connector __rcu        *i_fsnotify_marks;
#endif

#ifdef CONFIG_FS_ENCRYPTION
        struct fscrypt_inode_info        *i_crypt_info;
#endif

#ifdef CONFIG_FS_VERITY
        struct fsverity_info        *i_verity_info;
#endif

        void                        *i_private; /* fs or device private pointer */
} __randomize_layout;

struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);

static inline unsigned int i_blocksize(const struct inode *node)
{
        return (1 << node->i_blkbits);
}

static inline int inode_unhashed(struct inode *inode)
{
        return hlist_unhashed(&inode->i_hash);
}

/*
 * __mark_inode_dirty expects inodes to be hashed.  Since we don't
 * want special inodes in the fileset inode space, we make them
 * appear hashed, but do not put on any lists.  hlist_del()
 * will work fine and require no locking.
 */
static inline void inode_fake_hash(struct inode *inode)
{
        hlist_add_fake(&inode->i_hash);
}

/*
 * inode->i_mutex nesting subclasses for the lock validator:
 *
 * 0: the object of the current VFS operation
 * 1: parent
 * 2: child/target
 * 3: xattr
 * 4: second non-directory
 * 5: second parent (when locking independent directories in rename)
 *
 * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
 * non-directories at once.
 *
 * The locking order between these classes is
 * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
 */
enum inode_i_mutex_lock_class
{
        I_MUTEX_NORMAL,
        I_MUTEX_PARENT,
        I_MUTEX_CHILD,
        I_MUTEX_XATTR,
        I_MUTEX_NONDIR2,
        I_MUTEX_PARENT2,
};

static inline void inode_lock(struct inode *inode)
{
        down_write(&inode->i_rwsem);
}

static inline void inode_unlock(struct inode *inode)
{
        up_write(&inode->i_rwsem);
}

static inline void inode_lock_shared(struct inode *inode)
{
        down_read(&inode->i_rwsem);
}

static inline void inode_unlock_shared(struct inode *inode)
{
        up_read(&inode->i_rwsem);
}

static inline int inode_trylock(struct inode *inode)
{
        return down_write_trylock(&inode->i_rwsem);
}

static inline int inode_trylock_shared(struct inode *inode)
{
        return down_read_trylock(&inode->i_rwsem);
}

static inline int inode_is_locked(struct inode *inode)
{
        return rwsem_is_locked(&inode->i_rwsem);
}

static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
{
        down_write_nested(&inode->i_rwsem, subclass);
}

static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass)
{
        down_read_nested(&inode->i_rwsem, subclass);
}

static inline void filemap_invalidate_lock(struct address_space *mapping)
{
        down_write(&mapping->invalidate_lock);
}

static inline void filemap_invalidate_unlock(struct address_space *mapping)
{
        up_write(&mapping->invalidate_lock);
}

static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
{
        down_read(&mapping->invalidate_lock);
}

static inline int filemap_invalidate_trylock_shared(
                                        struct address_space *mapping)
{
        return down_read_trylock(&mapping->invalidate_lock);
}

static inline void filemap_invalidate_unlock_shared(
                                        struct address_space *mapping)
{
        up_read(&mapping->invalidate_lock);
}

void lock_two_nondirectories(struct inode *, struct inode*);
void unlock_two_nondirectories(struct inode *, struct inode*);

void filemap_invalidate_lock_two(struct address_space *mapping1,
                                 struct address_space *mapping2);
void filemap_invalidate_unlock_two(struct address_space *mapping1,
                                   struct address_space *mapping2);


/*
 * NOTE: in a 32bit arch with a preemptable kernel and
 * an UP compile the i_size_read/write must be atomic
 * with respect to the local cpu (unlike with preempt disabled),
 * but they don't need to be atomic with respect to other cpus like in
 * true SMP (so they need either to either locally disable irq around
 * the read or for example on x86 they can be still implemented as a
 * cmpxchg8b without the need of the lock prefix). For SMP compiles
 * and 64bit archs it makes no difference if preempt is enabled or not.
 */
static inline loff_t i_size_read(const struct inode *inode)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
        loff_t i_size;
        unsigned int seq;

        do {
                seq = read_seqcount_begin(&inode->i_size_seqcount);
                i_size = inode->i_size;
        } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
        return i_size;
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        loff_t i_size;

        preempt_disable();
        i_size = inode->i_size;
        preempt_enable();
        return i_size;
#else
        /* Pairs with smp_store_release() in i_size_write() */
        return smp_load_acquire(&inode->i_size);
#endif
}

/*
 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
 */
static inline void i_size_write(struct inode *inode, loff_t i_size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
        preempt_disable();
        write_seqcount_begin(&inode->i_size_seqcount);
        inode->i_size = i_size;
        write_seqcount_end(&inode->i_size_seqcount);
        preempt_enable();
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        preempt_disable();
        inode->i_size = i_size;
        preempt_enable();
#else
        /*
         * Pairs with smp_load_acquire() in i_size_read() to ensure
         * changes related to inode size (such as page contents) are
         * visible before we see the changed inode size.
         */
        smp_store_release(&inode->i_size, i_size);
#endif
}

static inline unsigned iminor(const struct inode *inode)
{
        return MINOR(inode->i_rdev);
}

static inline unsigned imajor(const struct inode *inode)
{
        return MAJOR(inode->i_rdev);
}

struct fown_struct {
        rwlock_t lock;          /* protects pid, uid, euid fields */
        struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
        enum pid_type pid_type;        /* Kind of process group SIGIO should be sent to */
        kuid_t uid, euid;        /* uid/euid of process setting the owner */
        int signum;                /* posix.1b rt signal to be delivered on IO */
};

/**
 * struct file_ra_state - Track a file's readahead state.
 * @start: Where the most recent readahead started.
 * @size: Number of pages read in the most recent readahead.
 * @async_size: Numer of pages that were/are not needed immediately
 *      and so were/are genuinely "ahead".  Start next readahead when
 *      the first of these pages is accessed.
 * @ra_pages: Maximum size of a readahead request, copied from the bdi.
 * @mmap_miss: How many mmap accesses missed in the page cache.
 * @prev_pos: The last byte in the most recent read request.
 *
 * When this structure is passed to ->readahead(), the "most recent"
 * readahead means the current readahead.
 */
struct file_ra_state {
        pgoff_t start;
        unsigned int size;
        unsigned int async_size;
        unsigned int ra_pages;
        unsigned int mmap_miss;
        loff_t prev_pos;
};

/*
 * Check if @index falls in the readahead windows.
 */
static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
{
        return (index >= ra->start &&
                index <  ra->start + ra->size);
}

/*
 * f_{lock,count,pos_lock} members can be highly contended and share
 * the same cacheline. f_{lock,mode} are very frequently used together
 * and so share the same cacheline as well. The read-mostly
 * f_{path,inode,op} are kept on a separate cacheline.
 */
struct file {
        union {
                /* fput() uses task work when closing and freeing file (default). */
                struct callback_head         f_task_work;
                /* fput() must use workqueue (most kernel threads). */
                struct llist_node        f_llist;
                unsigned int                 f_iocb_flags;
        };

        /*
         * Protects f_ep, f_flags.
         * Must not be taken from IRQ context.
         */
        spinlock_t                f_lock;
        fmode_t                        f_mode;
        atomic_long_t                f_count;
        struct mutex                f_pos_lock;
        loff_t                        f_pos;
        unsigned int                f_flags;
        struct fown_struct        f_owner;
        const struct cred        *f_cred;
        struct file_ra_state        f_ra;
        struct path                f_path;
        struct inode                *f_inode;        /* cached value */
        const struct file_operations        *f_op;

        u64                        f_version;
#ifdef CONFIG_SECURITY
        void                        *f_security;
#endif
        /* needed for tty driver, and maybe others */
        void                        *private_data;

#ifdef CONFIG_EPOLL
        /* Used by fs/eventpoll.c to link all the hooks to this file */
        struct hlist_head        *f_ep;
#endif /* #ifdef CONFIG_EPOLL */
        struct address_space        *f_mapping;
        errseq_t                f_wb_err;
        errseq_t                f_sb_err; /* for syncfs */
} __randomize_layout
  __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */

struct file_handle {
        __u32 handle_bytes;
        int handle_type;
        /* file identifier */
        unsigned char f_handle[];
};

static inline struct file *get_file(struct file *f)
{
        atomic_long_inc(&f->f_count);
        return f;
}

struct file *get_file_rcu(struct file __rcu **f);
struct file *get_file_active(struct file **f);

#define file_count(x)        atomic_long_read(&(x)->f_count)

#define        MAX_NON_LFS        ((1UL<<31) - 1)

/* Page cache limit. The filesystems should put that into their s_maxbytes 
   limits, otherwise bad things can happen in VM. */ 
#if BITS_PER_LONG==32
#define MAX_LFS_FILESIZE        ((loff_t)ULONG_MAX << PAGE_SHIFT)
#elif BITS_PER_LONG==64
#define MAX_LFS_FILESIZE         ((loff_t)LLONG_MAX)
#endif

/* legacy typedef, should eventually be removed */
typedef void *fl_owner_t;

struct file_lock;
struct file_lease;

/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
#define OFFSET_MAX        type_max(loff_t)
#define OFFT_OFFSET_MAX        type_max(off_t)
#endif

extern void send_sigio(struct fown_struct *fown, int fd, int band);

static inline struct inode *file_inode(const struct file *f)
{
        return f->f_inode;
}

/*
 * file_dentry() is a relic from the days that overlayfs was using files with a
 * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs.
 * In those days, file_dentry() was needed to get the underlying fs dentry that
 * matches f_inode.
 * Files with "fake" path should not exist nowadays, so use an assertion to make
 * sure that file_dentry() was not papering over filesystem bugs.
 */
static inline struct dentry *file_dentry(const struct file *file)
{
        struct dentry *dentry = file->f_path.dentry;

        WARN_ON_ONCE(d_inode(dentry) != file_inode(file));
        return dentry;
}

struct fasync_struct {
        rwlock_t                fa_lock;
        int                        magic;
        int                        fa_fd;
        struct fasync_struct        *fa_next; /* singly linked list */
        struct file                *fa_file;
        struct rcu_head                fa_rcu;
};

#define FASYNC_MAGIC 0x4601

/* SMP safe fasync helpers: */
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
extern int fasync_remove_entry(struct file *, struct fasync_struct **);
extern struct fasync_struct *fasync_alloc(void);
extern void fasync_free(struct fasync_struct *);

/* can be called from interrupts */
extern void kill_fasync(struct fasync_struct **, int, int);

extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
extern int f_setown(struct file *filp, int who, int force);
extern void f_delown(struct file *filp);
extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct fown_struct *fown);

/*
 * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
 * represented in both.
 */
#define SB_RDONLY       BIT(0)        /* Mount read-only */
#define SB_NOSUID       BIT(1)        /* Ignore suid and sgid bits */
#define SB_NODEV        BIT(2)        /* Disallow access to device special files */
#define SB_NOEXEC       BIT(3)        /* Disallow program execution */
#define SB_SYNCHRONOUS  BIT(4)        /* Writes are synced at once */
#define SB_MANDLOCK     BIT(6)        /* Allow mandatory locks on an FS */
#define SB_DIRSYNC      BIT(7)        /* Directory modifications are synchronous */
#define SB_NOATIME      BIT(10)        /* Do not update access times. */
#define SB_NODIRATIME   BIT(11)        /* Do not update directory access times */
#define SB_SILENT       BIT(15)
#define SB_POSIXACL     BIT(16)        /* Supports POSIX ACLs */
#define SB_INLINECRYPT  BIT(17)        /* Use blk-crypto for encrypted files */
#define SB_KERNMOUNT    BIT(22)        /* this is a kern_mount call */
#define SB_I_VERSION    BIT(23)        /* Update inode I_version field */
#define SB_LAZYTIME     BIT(25)        /* Update the on-disk [acm]times lazily */

/* These sb flags are internal to the kernel */
#define SB_DEAD         BIT(21)
#define SB_DYING        BIT(24)
#define SB_SUBMOUNT     BIT(26)
#define SB_FORCE        BIT(27)
#define SB_NOSEC        BIT(28)
#define SB_BORN         BIT(29)
#define SB_ACTIVE       BIT(30)
#define SB_NOUSER       BIT(31)

/* These flags relate to encoding and casefolding */
#define SB_ENC_STRICT_MODE_FL        (1 << 0)

#define sb_has_strict_encoding(sb) \
        (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)

/*
 *        Umount options
 */

#define MNT_FORCE        0x00000001        /* Attempt to forcibily umount */
#define MNT_DETACH        0x00000002        /* Just detach from the tree */
#define MNT_EXPIRE        0x00000004        /* Mark for expiry */
#define UMOUNT_NOFOLLOW        0x00000008        /* Don't follow symlink on umount */
#define UMOUNT_UNUSED        0x80000000        /* Flag guaranteed to be unused */

/* sb->s_iflags */
#define SB_I_CGROUPWB        0x00000001        /* cgroup-aware writeback enabled */
#define SB_I_NOEXEC        0x00000002        /* Ignore executables on this fs */
#define SB_I_NODEV        0x00000004        /* Ignore devices on this fs */
#define SB_I_STABLE_WRITES 0x00000008        /* don't modify blks until WB is done */

/* sb->s_iflags to limit user namespace mounts */
#define SB_I_USERNS_VISIBLE                0x00000010 /* fstype already mounted */
#define SB_I_IMA_UNVERIFIABLE_SIGNATURE        0x00000020
#define SB_I_UNTRUSTED_MOUNTER                0x00000040
#define SB_I_EVM_UNSUPPORTED                0x00000080

#define SB_I_SKIP_SYNC        0x00000100        /* Skip superblock at global sync */
#define SB_I_PERSB_BDI        0x00000200        /* has a per-sb bdi */
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED        0x00000800        /* superblock shouldn't be reused */
#define SB_I_NOUMASK        0x00001000        /* VFS does not apply umask */

/* Possible states of 'frozen' field */
enum {
        SB_UNFROZEN = 0,                /* FS is unfrozen */
        SB_FREEZE_WRITE        = 1,                /* Writes, dir ops, ioctls frozen */
        SB_FREEZE_PAGEFAULT = 2,        /* Page faults stopped as well */
        SB_FREEZE_FS = 3,                /* For internal FS use (e.g. to stop
                                         * internal threads if needed) */
        SB_FREEZE_COMPLETE = 4,                /* ->freeze_fs finished successfully */
};

#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)

struct sb_writers {
        unsigned short                        frozen;                /* Is sb frozen? */
        int                                freeze_kcount;        /* How many kernel freeze requests? */
        int                                freeze_ucount;        /* How many userspace freeze requests? */
        struct percpu_rw_semaphore        rw_sem[SB_FREEZE_LEVELS];
};

struct super_block {
        struct list_head        s_list;                /* Keep this first */
        dev_t                        s_dev;                /* search index; _not_ kdev_t */
        unsigned char                s_blocksize_bits;
        unsigned long                s_blocksize;
        loff_t                        s_maxbytes;        /* Max file size */
        struct file_system_type        *s_type;
        const struct super_operations        *s_op;
        const struct dquot_operations        *dq_op;
        const struct quotactl_ops        *s_qcop;
        const struct export_operations *s_export_op;
        unsigned long                s_flags;
        unsigned long                s_iflags;        /* internal SB_I_* flags */
        unsigned long                s_magic;
        struct dentry                *s_root;
        struct rw_semaphore        s_umount;
        int                        s_count;
        atomic_t                s_active;
#ifdef CONFIG_SECURITY
        void                    *s_security;
#endif
        const struct xattr_handler * const *s_xattr;
#ifdef CONFIG_FS_ENCRYPTION
        const struct fscrypt_operations        *s_cop;
        struct fscrypt_keyring        *s_master_keys; /* master crypto keys in use */
#endif
#ifdef CONFIG_FS_VERITY
        const struct fsverity_operations *s_vop;
#endif
#if IS_ENABLED(CONFIG_UNICODE)
        struct unicode_map *s_encoding;
        __u16 s_encoding_flags;
#endif
        struct hlist_bl_head        s_roots;        /* alternate root dentries for NFS */
        struct list_head        s_mounts;        /* list of mounts; _not_ for fs use */
        struct block_device        *s_bdev;        /* can go away once we use an accessor for @s_bdev_file */
        struct file                *s_bdev_file;
        struct backing_dev_info *s_bdi;
        struct mtd_info                *s_mtd;
        struct hlist_node        s_instances;
        unsigned int                s_quota_types;        /* Bitmask of supported quota types */
        struct quota_info        s_dquot;        /* Diskquota specific options */

        struct sb_writers        s_writers;

        /*
         * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
         * s_fsnotify_marks together for cache efficiency. They are frequently
         * accessed and rarely modified.
         */
        void                        *s_fs_info;        /* Filesystem private info */

        /* Granularity of c/m/atime in ns (cannot be worse than a second) */
        u32                        s_time_gran;
        /* Time limits for c/m/atime in seconds */
        time64_t                   s_time_min;
        time64_t                   s_time_max;
#ifdef CONFIG_FSNOTIFY
        __u32                        s_fsnotify_mask;
        struct fsnotify_mark_connector __rcu        *s_fsnotify_marks;
#endif

        /*
         * q: why are s_id and s_sysfs_name not the same? both are human
         * readable strings that identify the filesystem
         * a: s_id is allowed to change at runtime; it's used in log messages,
         * and we want to when a device starts out as single device (s_id is dev
         * name) but then a device is hot added and we have to switch to
         * identifying it by UUID
         * but s_sysfs_name is a handle for programmatic access, and can't
         * change at runtime
         */
        char                        s_id[32];        /* Informational name */
        uuid_t                        s_uuid;                /* UUID */
        u8                        s_uuid_len;        /* Default 16, possibly smaller for weird filesystems */

        /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
        char                        s_sysfs_name[UUID_STRING_LEN + 1];

        unsigned int                s_max_links;

        /*
         * The next field is for VFS *only*. No filesystems have any business
         * even looking at it. You had been warned.
         */
        struct mutex s_vfs_rename_mutex;        /* Kludge */

        /*
         * Filesystem subtype.  If non-empty the filesystem type field
         * in /proc/mounts will be "type.subtype"
         */
        const char *s_subtype;

        const struct dentry_operations *s_d_op; /* default d_op for dentries */

        struct shrinker *s_shrink;        /* per-sb shrinker handle */

        /* Number of inodes with nlink == 0 but still referenced */
        atomic_long_t s_remove_count;

        /*
         * Number of inode/mount/sb objects that are being watched, note that
         * inodes objects are currently double-accounted.
         */
        atomic_long_t s_fsnotify_connectors;

        /* Read-only state of the superblock is being changed */
        int s_readonly_remount;

        /* per-sb errseq_t for reporting writeback errors via syncfs */
        errseq_t s_wb_err;

        /* AIO completions deferred from interrupt context */
        struct workqueue_struct *s_dio_done_wq;
        struct hlist_head s_pins;

        /*
         * Owning user namespace and default context in which to
         * interpret filesystem uids, gids, quotas, device nodes,
         * xattrs and security labels.
         */
        struct user_namespace *s_user_ns;

        /*
         * The list_lru structure is essentially just a pointer to a table
         * of per-node lru lists, each of which has its own spinlock.
         * There is no need to put them into separate cachelines.
         */
        struct list_lru                s_dentry_lru;
        struct list_lru                s_inode_lru;
        struct rcu_head                rcu;
        struct work_struct        destroy_work;

        struct mutex                s_sync_lock;        /* sync serialisation lock */

        /*
         * Indicates how deep in a filesystem stack this SB is
         */
        int s_stack_depth;

        /* s_inode_list_lock protects s_inodes */
        spinlock_t                s_inode_list_lock ____cacheline_aligned_in_smp;
        struct list_head        s_inodes;        /* all inodes */

        spinlock_t                s_inode_wblist_lock;
        struct list_head        s_inodes_wb;        /* writeback inodes */
} __randomize_layout;

static inline struct user_namespace *i_user_ns(const struct inode *inode)
{
        return inode->i_sb->s_user_ns;
}

/* Helper functions so that in most cases filesystems will
 * not need to deal directly with kuid_t and kgid_t and can
 * instead deal with the raw numeric values that are stored
 * in the filesystem.
 */
static inline uid_t i_uid_read(const struct inode *inode)
{
        return from_kuid(i_user_ns(inode), inode->i_uid);
}

static inline gid_t i_gid_read(const struct inode *inode)
{
        return from_kgid(i_user_ns(inode), inode->i_gid);
}

static inline void i_uid_write(struct inode *inode, uid_t uid)
{
        inode->i_uid = make_kuid(i_user_ns(inode), uid);
}

static inline void i_gid_write(struct inode *inode, gid_t gid)
{
        inode->i_gid = make_kgid(i_user_ns(inode), gid);
}

/**
 * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping
 * @idmap: idmap of the mount the inode was found from
 * @inode: inode to map
 *
 * Return: whe inode's i_uid mapped down according to @idmap.
 * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
 */
static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
                                         const struct inode *inode)
{
        return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid);
}

/**
 * i_uid_needs_update - check whether inode's i_uid needs to be updated
 * @idmap: idmap of the mount the inode was found from
 * @attr: the new attributes of @inode
 * @inode: the inode to update
 *
 * Check whether the $inode's i_uid field needs to be updated taking idmapped
 * mounts into account if the filesystem supports it.
 *
 * Return: true if @inode's i_uid field needs to be updated, false if not.
 */
static inline bool i_uid_needs_update(struct mnt_idmap *idmap,
                                      const struct iattr *attr,
                                      const struct inode *inode)
{
        return ((attr->ia_valid & ATTR_UID) &&
                !vfsuid_eq(attr->ia_vfsuid,
                           i_uid_into_vfsuid(idmap, inode)));
}

/**
 * i_uid_update - update @inode's i_uid field
 * @idmap: idmap of the mount the inode was found from
 * @attr: the new attributes of @inode
 * @inode: the inode to update
 *
 * Safely update @inode's i_uid field translating the vfsuid of any idmapped
 * mount into the filesystem kuid.
 */
static inline void i_uid_update(struct mnt_idmap *idmap,
                                const struct iattr *attr,
                                struct inode *inode)
{
        if (attr->ia_valid & ATTR_UID)
                inode->i_uid = from_vfsuid(idmap, i_user_ns(inode),
                                           attr->ia_vfsuid);
}

/**
 * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping
 * @idmap: idmap of the mount the inode was found from
 * @inode: inode to map
 *
 * Return: the inode's i_gid mapped down according to @idmap.
 * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
 */
static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
                                         const struct inode *inode)
{
        return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid);
}

/**
 * i_gid_needs_update - check whether inode's i_gid needs to be updated
 * @idmap: idmap of the mount the inode was found from
 * @attr: the new attributes of @inode
 * @inode: the inode to update
 *
 * Check whether the $inode's i_gid field needs to be updated taking idmapped
 * mounts into account if the filesystem supports it.
 *
 * Return: true if @inode's i_gid field needs to be updated, false if not.
 */
static inline bool i_gid_needs_update(struct mnt_idmap *idmap,
                                      const struct iattr *attr,
                                      const struct inode *inode)
{
        return ((attr->ia_valid & ATTR_GID) &&
                !vfsgid_eq(attr->ia_vfsgid,
                           i_gid_into_vfsgid(idmap, inode)));
}

/**
 * i_gid_update - update @inode's i_gid field
 * @idmap: idmap of the mount the inode was found from
 * @attr: the new attributes of @inode
 * @inode: the inode to update
 *
 * Safely update @inode's i_gid field translating the vfsgid of any idmapped
 * mount into the filesystem kgid.
 */
static inline void i_gid_update(struct mnt_idmap *idmap,
                                const struct iattr *attr,
                                struct inode *inode)
{
        if (attr->ia_valid & ATTR_GID)
                inode->i_gid = from_vfsgid(idmap, i_user_ns(inode),
                                           attr->ia_vfsgid);
}

/**
 * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
 * @inode: inode to initialize
 * @idmap: idmap of the mount the inode was found from
 *
 * Initialize the i_uid field of @inode. If the inode was found/created via
 * an idmapped mount map the caller's fsuid according to @idmap.
 */
static inline void inode_fsuid_set(struct inode *inode,
                                   struct mnt_idmap *idmap)
{
        inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
}

/**
 * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
 * @inode: inode to initialize
 * @idmap: idmap of the mount the inode was found from
 *
 * Initialize the i_gid field of @inode. If the inode was found/created via
 * an idmapped mount map the caller's fsgid according to @idmap.
 */
static inline void inode_fsgid_set(struct inode *inode,
                                   struct mnt_idmap *idmap)
{
        inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
}

/**
 * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
 * @sb: the superblock we want a mapping in
 * @idmap: idmap of the relevant mount
 *
 * Check whether the caller's fsuid and fsgid have a valid mapping in the
 * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
 * the caller's fsuid and fsgid according to the @idmap first.
 *
 * Return: true if fsuid and fsgid is mapped, false if not.
 */
static inline bool fsuidgid_has_mapping(struct super_block *sb,
                                        struct mnt_idmap *idmap)
{
        struct user_namespace *fs_userns = sb->s_user_ns;
        kuid_t kuid;
        kgid_t kgid;

        kuid = mapped_fsuid(idmap, fs_userns);
        if (!uid_valid(kuid))
                return false;
        kgid = mapped_fsgid(idmap, fs_userns);
        if (!gid_valid(kgid))
                return false;
        return kuid_has_mapping(fs_userns, kuid) &&
               kgid_has_mapping(fs_userns, kgid);
}

struct timespec64 current_time(struct inode *inode);
struct timespec64 inode_set_ctime_current(struct inode *inode);

static inline time64_t inode_get_atime_sec(const struct inode *inode)
{
        return inode->__i_atime.tv_sec;
}

static inline long inode_get_atime_nsec(const struct inode *inode)
{
        return inode->__i_atime.tv_nsec;
}

static inline struct timespec64 inode_get_atime(const struct inode *inode)
{
        return inode->__i_atime;
}

static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
                                                      struct timespec64 ts)
{
        inode->__i_atime = ts;
        return ts;
}

static inline struct timespec64 inode_set_atime(struct inode *inode,
                                                time64_t sec, long nsec)
{
        struct timespec64 ts = { .tv_sec  = sec,
                                 .tv_nsec = nsec };
        return inode_set_atime_to_ts(inode, ts);
}

static inline time64_t inode_get_mtime_sec(const struct inode *inode)
{
        return inode->__i_mtime.tv_sec;
}

static inline long inode_get_mtime_nsec(const struct inode *inode)
{
        return inode->__i_mtime.tv_nsec;
}

static inline struct timespec64 inode_get_mtime(const struct inode *inode)
{
        return inode->__i_mtime;
}

static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
                                                      struct timespec64 ts)
{
        inode->__i_mtime = ts;
        return ts;
}

static inline struct timespec64 inode_set_mtime(struct inode *inode,
                                                time64_t sec, long nsec)
{
        struct timespec64 ts = { .tv_sec  = sec,
                                 .tv_nsec = nsec };
        return inode_set_mtime_to_ts(inode, ts);
}

static inline time64_t inode_get_ctime_sec(const struct inode *inode)
{
        return inode->__i_ctime.tv_sec;
}

static inline long inode_get_ctime_nsec(const struct inode *inode)
{
        return inode->__i_ctime.tv_nsec;
}

static inline struct timespec64 inode_get_ctime(const struct inode *inode)
{
        return inode->__i_ctime;
}

static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
                                                      struct timespec64 ts)
{
        inode->__i_ctime = ts;
        return ts;
}

/**
 * inode_set_ctime - set the ctime in the inode
 * @inode: inode in which to set the ctime
 * @sec: tv_sec value to set
 * @nsec: tv_nsec value to set
 *
 * Set the ctime in @inode to { @sec, @nsec }
 */
static inline struct timespec64 inode_set_ctime(struct inode *inode,
                                                time64_t sec, long nsec)
{
        struct timespec64 ts = { .tv_sec  = sec,
                                 .tv_nsec = nsec };

        return inode_set_ctime_to_ts(inode, ts);
}

struct timespec64 simple_inode_init_ts(struct inode *inode);

/*
 * Snapshotting support.
 */

/*
 * These are internal functions, please use sb_start_{write,pagefault,intwrite}
 * instead.
 */
static inline void __sb_end_write(struct super_block *sb, int level)
{
        percpu_up_read(sb->s_writers.rw_sem + level-1);
}

static inline void __sb_start_write(struct super_block *sb, int level)
{
        percpu_down_read(sb->s_writers.rw_sem + level - 1);
}

static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
{
        return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
}

#define __sb_writers_acquired(sb, lev)        \
        percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev)        \
        percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)

/**
 * __sb_write_started - check if sb freeze level is held
 * @sb: the super we write to
 * @level: the freeze level
 *
 * * > 0 - sb freeze level is held
 * *   0 - sb freeze level is not held
 * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
 */
static inline int __sb_write_started(const struct super_block *sb, int level)
{
        return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
}

/**
 * sb_write_started - check if SB_FREEZE_WRITE is held
 * @sb: the super we write to
 *
 * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
 */
static inline bool sb_write_started(const struct super_block *sb)
{
        return __sb_write_started(sb, SB_FREEZE_WRITE);
}

/**
 * sb_write_not_started - check if SB_FREEZE_WRITE is not held
 * @sb: the super we write to
 *
 * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
 */
static inline bool sb_write_not_started(const struct super_block *sb)
{
        return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
}

/**
 * file_write_started - check if SB_FREEZE_WRITE is held
 * @file: the file we write to
 *
 * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
 * May be false positive with !S_ISREG, because file_start_write() has
 * no effect on !S_ISREG.
 */
static inline bool file_write_started(const struct file *file)
{
        if (!S_ISREG(file_inode(file)->i_mode))
                return true;
        return sb_write_started(file_inode(file)->i_sb);
}

/**
 * file_write_not_started - check if SB_FREEZE_WRITE is not held
 * @file: the file we write to
 *
 * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
 * May be false positive with !S_ISREG, because file_start_write() has
 * no effect on !S_ISREG.
 */
static inline bool file_write_not_started(const struct file *file)
{
        if (!S_ISREG(file_inode(file)->i_mode))
                return true;
        return sb_write_not_started(file_inode(file)->i_sb);
}

/**
 * sb_end_write - drop write access to a superblock
 * @sb: the super we wrote to
 *
 * Decrement number of writers to the filesystem. Wake up possible waiters
 * wanting to freeze the filesystem.
 */
static inline void sb_end_write(struct super_block *sb)
{
        __sb_end_write(sb, SB_FREEZE_WRITE);
}

/**
 * sb_end_pagefault - drop write access to a superblock from a page fault
 * @sb: the super we wrote to
 *
 * Decrement number of processes handling write page fault to the filesystem.
 * Wake up possible waiters wanting to freeze the filesystem.
 */
static inline void sb_end_pagefault(struct super_block *sb)
{
        __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
}

/**
 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
 * @sb: the super we wrote to
 *
 * Decrement fs-internal number of writers to the filesystem.  Wake up possible
 * waiters wanting to freeze the filesystem.
 */
static inline void sb_end_intwrite(struct super_block *sb)
{
        __sb_end_write(sb, SB_FREEZE_FS);
}

/**
 * sb_start_write - get write access to a superblock
 * @sb: the super we write to
 *
 * When a process wants to write data or metadata to a file system (i.e. dirty
 * a page or an inode), it should embed the operation in a sb_start_write() -
 * sb_end_write() pair to get exclusion against file system freezing. This
 * function increments number of writers preventing freezing. If the file
 * system is already frozen, the function waits until the file system is
 * thawed.
 *
 * Since freeze protection behaves as a lock, users have to preserve
 * ordering of freeze protection and other filesystem locks. Generally,
 * freeze protection should be the outermost lock. In particular, we have:
 *
 * sb_start_write
 *   -> i_mutex                        (write path, truncate, directory ops, ...)
 *   -> s_umount                (freeze_super, thaw_super)
 */
static inline void sb_start_write(struct super_block *sb)
{
        __sb_start_write(sb, SB_FREEZE_WRITE);
}

static inline bool sb_start_write_trylock(struct super_block *sb)
{
        return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
}

/**
 * sb_start_pagefault - get write access to a superblock from a page fault
 * @sb: the super we write to
 *
 * When a process starts handling write page fault, it should embed the
 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
 * exclusion against file system freezing. This is needed since the page fault
 * is going to dirty a page. This function increments number of running page
 * faults preventing freezing. If the file system is already frozen, the
 * function waits until the file system is thawed.
 *
 * Since page fault freeze protection behaves as a lock, users have to preserve
 * ordering of freeze protection and other filesystem locks. It is advised to
 * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
 * handling code implies lock dependency:
 *
 * mmap_lock
 *   -> sb_start_pagefault
 */
static inline void sb_start_pagefault(struct super_block *sb)
{
        __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
}

/**
 * sb_start_intwrite - get write access to a superblock for internal fs purposes
 * @sb: the super we write to
 *
 * This is the third level of protection against filesystem freezing. It is
 * free for use by a filesystem. The only requirement is that it must rank
 * below sb_start_pagefault.
 *
 * For example filesystem can call sb_start_intwrite() when starting a
 * transaction which somewhat eases handling of freezing for internal sources
 * of filesystem changes (internal fs threads, discarding preallocation on file
 * close, etc.).
 */
static inline void sb_start_intwrite(struct super_block *sb)
{
        __sb_start_write(sb, SB_FREEZE_FS);
}

static inline bool sb_start_intwrite_trylock(struct super_block *sb)
{
        return __sb_start_write_trylock(sb, SB_FREEZE_FS);
}

bool inode_owner_or_capable(struct mnt_idmap *idmap,
                            const struct inode *inode);

/*
 * VFS helper functions..
 */
int vfs_create(struct mnt_idmap *, struct inode *,
               struct dentry *, umode_t, bool);
int vfs_mkdir(struct mnt_idmap *, struct inode *,
              struct dentry *, umode_t);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
              umode_t, dev_t);
int vfs_symlink(struct mnt_idmap *, struct inode *,
                struct dentry *, const char *);
int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
             struct dentry *, struct inode **);
int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *);
int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
               struct inode **);

/**
 * struct renamedata - contains all information required for renaming
 * @old_mnt_idmap:     idmap of the old mount the inode was found from
 * @old_dir:           parent of source
 * @old_dentry:                source
 * @new_mnt_idmap:     idmap of the new mount the inode was found from
 * @new_dir:           parent of destination
 * @new_dentry:                destination
 * @delegated_inode:   returns an inode needing a delegation break
 * @flags:             rename flags
 */
struct renamedata {
        struct mnt_idmap *old_mnt_idmap;
        struct inode *old_dir;
        struct dentry *old_dentry;
        struct mnt_idmap *new_mnt_idmap;
        struct inode *new_dir;
        struct dentry *new_dentry;
        struct inode **delegated_inode;
        unsigned int flags;
} __randomize_layout;

int vfs_rename(struct renamedata *);

static inline int vfs_whiteout(struct mnt_idmap *idmap,
                               struct inode *dir, struct dentry *dentry)
{
        return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
                         WHITEOUT_DEV);
}

struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
                                 const struct path *parentpath,
                                 umode_t mode, int open_flag,
                                 const struct cred *cred);
struct file *kernel_file_open(const struct path *path, int flags,
                              struct inode *inode, const struct cred *cred);

int vfs_mkobj(struct dentry *, umode_t,
                int (*f)(struct dentry *, umode_t, void *),
                void *);

int vfs_fchown(struct file *file, uid_t user, gid_t group);
int vfs_fchmod(struct file *file, umode_t mode);
int vfs_utimes(const struct path *path, struct timespec64 *times);

extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);

#ifdef CONFIG_COMPAT
extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
                                        unsigned long arg);
#else
#define compat_ptr_ioctl NULL
#endif

/*
 * VFS file helper functions.
 */
void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
                      const struct inode *dir, umode_t mode);
extern bool may_open_dev(const struct path *path);
umode_t mode_strip_sgid(struct mnt_idmap *idmap,
                        const struct inode *dir, umode_t mode);

/*
 * This is the "filldir" function type, used by readdir() to let
 * the kernel specify what kind of dirent layout it wants to have.
 * This allows the kernel to read directories into kernel space or
 * to have different dirent layouts depending on the binary type.
 * Return 'true' to keep going and 'false' if there are no more entries.
 */
struct dir_context;
typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
                         unsigned);

struct dir_context {
        filldir_t actor;
        loff_t pos;
};

/*
 * These flags let !MMU mmap() govern direct device mapping vs immediate
 * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
 *
 * NOMMU_MAP_COPY:        Copy can be mapped (MAP_PRIVATE)
 * NOMMU_MAP_DIRECT:        Can be mapped directly (MAP_SHARED)
 * NOMMU_MAP_READ:        Can be mapped for reading
 * NOMMU_MAP_WRITE:        Can be mapped for writing
 * NOMMU_MAP_EXEC:        Can be mapped for execution
 */
#define NOMMU_MAP_COPY                0x00000001
#define NOMMU_MAP_DIRECT        0x00000008
#define NOMMU_MAP_READ                VM_MAYREAD
#define NOMMU_MAP_WRITE                VM_MAYWRITE
#define NOMMU_MAP_EXEC                VM_MAYEXEC

#define NOMMU_VMFLAGS \
        (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)

/*
 * These flags control the behavior of the remap_file_range function pointer.
 * If it is called with len == 0 that means "remap to end of source file".
 * See Documentation/filesystems/vfs.rst for more details about this call.
 *
 * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
 * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
 */
#define REMAP_FILE_DEDUP                (1 << 0)
#define REMAP_FILE_CAN_SHORTEN                (1 << 1)

/*
 * These flags signal that the caller is ok with altering various aspects of
 * the behavior of the remap operation.  The changes must be made by the
 * implementation; the vfs remap helper functions can take advantage of them.
 * Flags in this category exist to preserve the quirky behavior of the hoisted
 * btrfs clone/dedupe ioctls.
 */
#define REMAP_FILE_ADVISORY                (REMAP_FILE_CAN_SHORTEN)

/*
 * These flags control the behavior of vfs_copy_file_range().
 * They are not available to the user via syscall.
 *
 * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
 */
#define COPY_FILE_SPLICE                (1 << 0)

struct iov_iter;
struct io_uring_cmd;
struct offset_ctx;

struct file_operations {
        struct module *owner;
        loff_t (*llseek) (struct file *, loff_t, int);
        ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
        ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
                        unsigned int flags);
        int (*iterate_shared) (struct file *, struct dir_context *);
        __poll_t (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
        int (*mmap) (struct file *, struct vm_area_struct *);
        unsigned long mmap_supported_flags;
        int (*open) (struct inode *, struct file *);
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
        int (*fsync) (struct file *, loff_t, loff_t, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);
        unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
        int (*check_flags)(int);
        int (*flock) (struct file *, int, struct file_lock *);
        ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
        void (*splice_eof)(struct file *file);
        int (*setlease)(struct file *, int, struct file_lease **, void **);
        long (*fallocate)(struct file *file, int mode, loff_t offset,
                          loff_t len);
        void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
        unsigned (*mmap_capabilities)(struct file *);
#endif
        ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
                        loff_t, size_t, unsigned int);
        loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
        int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
        int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
                                unsigned int poll_flags);
} __randomize_layout;

/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
                            int (*) (struct file *, struct dir_context *));
#define WRAP_DIR_ITER(x) \
        static int shared_##x(struct file *file , struct dir_context *ctx) \
        { return wrap_directory_iterator(file, ctx, x); }

struct inode_operations {
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
        const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
        int (*permission) (struct mnt_idmap *, struct inode *, int);
        struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);

        int (*readlink) (struct dentry *, char __user *,int);

        int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,
                       umode_t, bool);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
        int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
                        const char *);
        int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
                      umode_t);
        int (*rmdir) (struct inode *,struct dentry *);
        int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
                      umode_t,dev_t);
        int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
                        struct inode *, struct dentry *, unsigned int);
        int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
        int (*getattr) (struct mnt_idmap *, const struct path *,
                        struct kstat *, u32, unsigned int);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
        int (*update_time)(struct inode *, int);
        int (*atomic_open)(struct inode *, struct dentry *,
                           struct file *, unsigned open_flag,
                           umode_t create_mode);
        int (*tmpfile) (struct mnt_idmap *, struct inode *,
                        struct file *, umode_t);
        struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *,
                                     int);
        int (*set_acl)(struct mnt_idmap *, struct dentry *,
                       struct posix_acl *, int);
        int (*fileattr_set)(struct mnt_idmap *idmap,
                            struct dentry *dentry, struct fileattr *fa);
        int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
        struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;

static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
                                     struct iov_iter *iter)
{
        return file->f_op->read_iter(kio, iter);
}

static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
                                      struct iov_iter *iter)
{
        return file->f_op->write_iter(kio, iter);
}

static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
{
        return file->f_op->mmap(file, vma);
}

extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
                                   loff_t, size_t, unsigned int);
int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                    struct file *file_out, loff_t pos_out,
                                    loff_t *len, unsigned int remap_flags,
                                    const struct iomap_ops *dax_read_ops);
int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                  struct file *file_out, loff_t pos_out,
                                  loff_t *count, unsigned int remap_flags);
extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
extern int vfs_dedupe_file_range(struct file *file,
                                 struct file_dedupe_range *same);
extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
                                        struct file *dst_file, loff_t dst_pos,
                                        loff_t len, unsigned int remap_flags);

/**
 * enum freeze_holder - holder of the freeze
 * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
 * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
 * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
 *
 * Indicate who the owner of the freeze or thaw request is and whether
 * the freeze needs to be exclusive or can nest.
 * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
 * same holder aren't allowed. It is however allowed to hold a single
 * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
 * the same time. This is relied upon by some filesystems during online
 * repair or similar.
 */
enum freeze_holder {
        FREEZE_HOLDER_KERNEL        = (1U << 0),
        FREEZE_HOLDER_USERSPACE        = (1U << 1),
        FREEZE_MAY_NEST                = (1U << 2),
};

struct super_operations {
           struct inode *(*alloc_inode)(struct super_block *sb);
        void (*destroy_inode)(struct inode *);
        void (*free_inode)(struct inode *);

           void (*dirty_inode) (struct inode *, int flags);
        int (*write_inode) (struct inode *, struct writeback_control *wbc);
        int (*drop_inode) (struct inode *);
        void (*evict_inode) (struct inode *);
        void (*put_super) (struct super_block *);
        int (*sync_fs)(struct super_block *sb, int wait);
        int (*freeze_super) (struct super_block *, enum freeze_holder who);
        int (*freeze_fs) (struct super_block *);
        int (*thaw_super) (struct super_block *, enum freeze_holder who);
        int (*unfreeze_fs) (struct super_block *);
        int (*statfs) (struct dentry *, struct kstatfs *);
        int (*remount_fs) (struct super_block *, int *, char *);
        void (*umount_begin) (struct super_block *);

        int (*show_options)(struct seq_file *, struct dentry *);
        int (*show_devname)(struct seq_file *, struct dentry *);
        int (*show_path)(struct seq_file *, struct dentry *);
        int (*show_stats)(struct seq_file *, struct dentry *);
#ifdef CONFIG_QUOTA
        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
        struct dquot __rcu **(*get_dquots)(struct inode *);
#endif
        long (*nr_cached_objects)(struct super_block *,
                                  struct shrink_control *);
        long (*free_cached_objects)(struct super_block *,
                                    struct shrink_control *);
        void (*shutdown)(struct super_block *sb);
};

/*
 * Inode flags - they have no relation to superblock flags now
 */
#define S_SYNC                (1 << 0)  /* Writes are synced at once */
#define S_NOATIME        (1 << 1)  /* Do not update access times */
#define S_APPEND        (1 << 2)  /* Append-only file */
#define S_IMMUTABLE        (1 << 3)  /* Immutable file */
#define S_DEAD                (1 << 4)  /* removed, but still open directory */
#define S_NOQUOTA        (1 << 5)  /* Inode is not counted to quota */
#define S_DIRSYNC        (1 << 6)  /* Directory modifications are synchronous */
#define S_NOCMTIME        (1 << 7)  /* Do not update file c/mtime */
#define S_SWAPFILE        (1 << 8)  /* Do not truncate: swapon got its bmaps */
#define S_PRIVATE        (1 << 9)  /* Inode is fs-internal */
#define S_IMA                (1 << 10) /* Inode has an associated IMA struct */
#define S_AUTOMOUNT        (1 << 11) /* Automount/referral quasi-directory */
#define S_NOSEC                (1 << 12) /* no suid or xattr security attributes */
#ifdef CONFIG_FS_DAX
#define S_DAX                (1 << 13) /* Direct Access, avoiding the page cache */
#else
#define S_DAX                0          /* Make all the DAX code disappear */
#endif
#define S_ENCRYPTED        (1 << 14) /* Encrypted file (using fs/crypto/) */
#define S_CASEFOLD        (1 << 15) /* Casefolded file */
#define S_VERITY        (1 << 16) /* Verity file (using fs/verity/) */
#define S_KERNEL_FILE        (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */

/*
 * Note that nosuid etc flags are inode-specific: setting some file-system
 * flags just means all the inodes inherit those flags by default. It might be
 * possible to override it selectively if you really wanted to with some
 * ioctl() that is not currently implemented.
 *
 * Exception: SB_RDONLY is always applied to the entire file system.
 *
 * Unfortunately, it is possible to change a filesystems flags with it mounted
 * with files in use.  This means that all of the inodes will not have their
 * i_flags updated.  Hence, i_flags no longer inherit the superblock mount
 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
 */
#define __IS_FLG(inode, flg)        ((inode)->i_sb->s_flags & (flg))

static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
#define IS_RDONLY(inode)        sb_rdonly((inode)->i_sb)
#define IS_SYNC(inode)                (__IS_FLG(inode, SB_SYNCHRONOUS) || \
                                        ((inode)->i_flags & S_SYNC))
#define IS_DIRSYNC(inode)        (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \
                                        ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
#define IS_MANDLOCK(inode)        __IS_FLG(inode, SB_MANDLOCK)
#define IS_NOATIME(inode)        __IS_FLG(inode, SB_RDONLY|SB_NOATIME)
#define IS_I_VERSION(inode)        __IS_FLG(inode, SB_I_VERSION)

#define IS_NOQUOTA(inode)        ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode)        ((inode)->i_flags & S_APPEND)
#define IS_IMMUTABLE(inode)        ((inode)->i_flags & S_IMMUTABLE)

#ifdef CONFIG_FS_POSIX_ACL
#define IS_POSIXACL(inode)        __IS_FLG(inode, SB_POSIXACL)
#else
#define IS_POSIXACL(inode)        0
#endif

#define IS_DEADDIR(inode)        ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode)        ((inode)->i_flags & S_NOCMTIME)
#define IS_SWAPFILE(inode)        ((inode)->i_flags & S_SWAPFILE)
#define IS_PRIVATE(inode)        ((inode)->i_flags & S_PRIVATE)
#define IS_IMA(inode)                ((inode)->i_flags & S_IMA)
#define IS_AUTOMOUNT(inode)        ((inode)->i_flags & S_AUTOMOUNT)
#define IS_NOSEC(inode)                ((inode)->i_flags & S_NOSEC)
#define IS_DAX(inode)                ((inode)->i_flags & S_DAX)
#define IS_ENCRYPTED(inode)        ((inode)->i_flags & S_ENCRYPTED)
#define IS_CASEFOLDED(inode)        ((inode)->i_flags & S_CASEFOLD)
#define IS_VERITY(inode)        ((inode)->i_flags & S_VERITY)

#define IS_WHITEOUT(inode)        (S_ISCHR(inode->i_mode) && \
                                 (inode)->i_rdev == WHITEOUT_DEV)

static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
                                   struct inode *inode)
{
        return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
               !vfsgid_valid(i_gid_into_vfsgid(idmap, inode));
}

static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
{
        *kiocb = (struct kiocb) {
                .ki_filp = filp,
                .ki_flags = filp->f_iocb_flags,
                .ki_ioprio = get_current_ioprio(),
        };
}

static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
                               struct file *filp)
{
        *kiocb = (struct kiocb) {
                .ki_filp = filp,
                .ki_flags = kiocb_src->ki_flags,
                .ki_ioprio = kiocb_src->ki_ioprio,
                .ki_pos = kiocb_src->ki_pos,
        };
}

/*
 * Inode state bits.  Protected by inode->i_lock
 *
 * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
 * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
 *
 * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
 * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
 * various stages of removing an inode.
 *
 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
 *
 * I_DIRTY_SYNC                Inode is dirty, but doesn't have to be written on
 *                        fdatasync() (unless I_DIRTY_DATASYNC is also set).
 *                        Timestamp updates are the usual cause.
 * I_DIRTY_DATASYNC        Data-related inode changes pending.  We keep track of
 *                        these changes separately from I_DIRTY_SYNC so that we
 *                        don't have to write inode on fdatasync() when only
 *                        e.g. the timestamps have changed.
 * I_DIRTY_PAGES        Inode has dirty pages.  Inode itself may be clean.
 * I_DIRTY_TIME                The inode itself has dirty timestamps, and the
 *                        lazytime mount option is enabled.  We keep track of this
 *                        separately from I_DIRTY_SYNC in order to implement
 *                        lazytime.  This gets cleared if I_DIRTY_INODE
 *                        (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
 *                        I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
 *                        in place because writeback might already be in progress
 *                        and we don't want to lose the time update
 * I_NEW                Serves as both a mutex and completion notification.
 *                        New inodes set I_NEW.  If two processes both create
 *                        the same inode, one of them will release its inode and
 *                        wait for I_NEW to be released before returning.
 *                        Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
 *                        also cause waiting on I_NEW, without I_NEW actually
 *                        being set.  find_inode() uses this to prevent returning
 *                        nearly-dead inodes.
 * I_WILL_FREE                Must be set when calling write_inode_now() if i_count
 *                        is zero.  I_FREEING must be set when I_WILL_FREE is
 *                        cleared.
 * I_FREEING                Set when inode is about to be freed but still has dirty
 *                        pages or buffers attached or the inode itself is still
 *                        dirty.
 * I_CLEAR                Added by clear_inode().  In this state the inode is
 *                        clean and can be destroyed.  Inode keeps I_FREEING.
 *
 *                        Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
 *                        prohibited for many purposes.  iget() must wait for
 *                        the inode to be completely released, then create it
 *                        anew.  Other functions will just ignore such inodes,
 *                        if appropriate.  I_NEW is used for waiting.
 *
 * I_SYNC                Writeback of inode is running. The bit is set during
 *                        data writeback, and cleared with a wakeup on the bit
 *                        address once it is done. The bit is also used to pin
 *                        the inode in memory for flusher thread.
 *
 * I_REFERENCED                Marks the inode as recently references on the LRU list.
 *
 * I_DIO_WAKEUP                Never set.  Only used as a key for wait_on_bit().
 *
 * I_WB_SWITCH                Cgroup bdi_writeback switching in progress.  Used to
 *                        synchronize competing switching instances and to tell
 *                        wb stat updates to grab the i_pages lock.  See
 *                        inode_switch_wbs_work_fn() for details.
 *
 * I_OVL_INUSE                Used by overlayfs to get exclusive ownership on upper
 *                        and work dirs among overlayfs mounts.
 *
 * I_CREATING                New object's inode in the middle of setting up.
 *
 * I_DONTCACHE                Evict inode as soon as it is not used anymore.
 *
 * I_SYNC_QUEUED        Inode is queued in b_io or b_more_io writeback lists.
 *                        Used to detect that mark_inode_dirty() should not move
 *                         inode between dirty lists.
 *
 * I_PINNING_FSCACHE_WB        Inode is pinning an fscache object for writeback.
 *
 * Q: What is the difference between I_WILL_FREE and I_FREEING?
 */
#define I_DIRTY_SYNC                (1 << 0)
#define I_DIRTY_DATASYNC        (1 << 1)
#define I_DIRTY_PAGES                (1 << 2)
#define __I_NEW                        3
#define I_NEW                        (1 << __I_NEW)
#define I_WILL_FREE                (1 << 4)
#define I_FREEING                (1 << 5)
#define I_CLEAR                        (1 << 6)
#define __I_SYNC                7
#define I_SYNC                        (1 << __I_SYNC)
#define I_REFERENCED                (1 << 8)
#define __I_DIO_WAKEUP                9
#define I_DIO_WAKEUP                (1 << __I_DIO_WAKEUP)
#define I_LINKABLE                (1 << 10)
#define I_DIRTY_TIME                (1 << 11)
#define I_WB_SWITCH                (1 << 13)
#define I_OVL_INUSE                (1 << 14)
#define I_CREATING                (1 << 15)
#define I_DONTCACHE                (1 << 16)
#define I_SYNC_QUEUED                (1 << 17)
#define I_PINNING_NETFS_WB        (1 << 18)

#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)

extern void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode)
{
        __mark_inode_dirty(inode, I_DIRTY);
}

static inline void mark_inode_dirty_sync(struct inode *inode)
{
        __mark_inode_dirty(inode, I_DIRTY_SYNC);
}

/*
 * Returns true if the given inode itself only has dirty timestamps (its pages
 * may still be dirty) and isn't currently being allocated or freed.
 * Filesystems should call this if when writing an inode when lazytime is
 * enabled, they want to opportunistically write the timestamps of other inodes
 * located very nearby on-disk, e.g. in the same inode block.  This returns true
 * if the given inode is in need of such an opportunistic update.  Requires
 * i_lock, or at least later re-checking under i_lock.
 */
static inline bool inode_is_dirtytime_only(struct inode *inode)
{
        return (inode->i_state & (I_DIRTY_TIME | I_NEW |
                                  I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
}

extern void inc_nlink(struct inode *inode);
extern void drop_nlink(struct inode *inode);
extern void clear_nlink(struct inode *inode);
extern void set_nlink(struct inode *inode, unsigned int nlink);

static inline void inode_inc_link_count(struct inode *inode)
{
        inc_nlink(inode);
        mark_inode_dirty(inode);
}

static inline void inode_dec_link_count(struct inode *inode)
{
        drop_nlink(inode);
        mark_inode_dirty(inode);
}

enum file_time_flags {
        S_ATIME = 1,
        S_MTIME = 2,
        S_CTIME = 4,
        S_VERSION = 8,
};

extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
int inode_update_time(struct inode *inode, int flags);

static inline void file_accessed(struct file *file)
{
        if (!(file->f_flags & O_NOATIME))
                touch_atime(&file->f_path);
}

extern int file_modified(struct file *file);
int kiocb_modified(struct kiocb *iocb);

int sync_inode_metadata(struct inode *inode, int wait);

struct file_system_type {
        const char *name;
        int fs_flags;
#define FS_REQUIRES_DEV                1 
#define FS_BINARY_MOUNTDATA        2
#define FS_HAS_SUBTYPE                4
#define FS_USERNS_MOUNT                8        /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM        16        /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
#define FS_RENAME_DOES_D_MOVE        32768        /* FS will handle d_move() during rename() internally. */
        int (*init_fs_context)(struct fs_context *);
        const struct fs_parameter_spec *parameters;
        struct dentry *(*mount) (struct file_system_type *, int,
                       const char *, void *);
        void (*kill_sb) (struct super_block *);
        struct module *owner;
        struct file_system_type * next;
        struct hlist_head fs_supers;

        struct lock_class_key s_lock_key;
        struct lock_class_key s_umount_key;
        struct lock_class_key s_vfs_rename_key;
        struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];

        struct lock_class_key i_lock_key;
        struct lock_class_key i_mutex_key;
        struct lock_class_key invalidate_lock_key;
        struct lock_class_key i_mutex_dir_key;
};

#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)

extern struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_single(struct file_system_type *fs_type,
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_nodev(struct file_system_type *fs_type,
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void retire_super(struct super_block *sb);
void generic_shutdown_super(struct super_block *sb);
void kill_block_super(struct super_block *sb);
void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
void deactivate_locked_super(struct super_block *sb);
int set_anon_super(struct super_block *s, void *data);
int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
int get_anon_bdev(dev_t *);
void free_anon_bdev(dev_t);
struct super_block *sget_fc(struct fs_context *fc,
                            int (*test)(struct super_block *, struct fs_context *),
                            int (*set)(struct super_block *, struct fs_context *));
struct super_block *sget(struct file_system_type *type,
                        int (*test)(struct super_block *,void *),
                        int (*set)(struct super_block *,void *),
                        int flags, void *data);
struct super_block *sget_dev(struct fs_context *fc, dev_t dev);

/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
#define fops_get(fops) \
        (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
#define fops_put(fops) \
        do { if (fops) module_put((fops)->owner); } while(0)
/*
 * This one is to be used *ONLY* from ->open() instances.
 * fops must be non-NULL, pinned down *and* module dependencies
 * should be sufficient to pin the caller down as well.
 */
#define replace_fops(f, fops) \
        do {        \
                struct file *__file = (f); \
                fops_put(__file->f_op); \
                BUG_ON(!(__file->f_op = (fops))); \
        } while(0)

extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
int freeze_super(struct super_block *super, enum freeze_holder who);
int thaw_super(struct super_block *super, enum freeze_holder who);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);

static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len)
{
        if (WARN_ON(len > sizeof(sb->s_uuid)))
                len = sizeof(sb->s_uuid);
        sb->s_uuid_len = len;
        memcpy(&sb->s_uuid, uuid, len);
}

/* set sb sysfs name based on sb->s_bdev */
static inline void super_set_sysfs_name_bdev(struct super_block *sb)
{
        snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev);
}

/* set sb sysfs name based on sb->s_uuid */
static inline void super_set_sysfs_name_uuid(struct super_block *sb)
{
        WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid));
        snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b);
}

/* set sb sysfs name based on sb->s_id */
static inline void super_set_sysfs_name_id(struct super_block *sb)
{
        strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name));
}

/* try to use something standard before you use this */
__printf(2, 3)
static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...)
{
        va_list args;

        va_start(args, fmt);
        vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args);
        va_end(args);
}

extern int current_umask(void);

extern void ihold(struct inode * inode);
extern void iput(struct inode *);
int inode_update_timestamps(struct inode *inode, int flags);
int generic_update_time(struct inode *, int);

/* /sys/fs */
extern struct kobject *fs_kobj;

#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)

/* fs/open.c */
struct audit_names;
struct filename {
        const char                *name;        /* pointer to actual string */
        const __user char        *uptr;        /* original userland pointer */
        atomic_t                refcnt;
        struct audit_names        *aname;
        const char                iname[];
};
static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);

static inline struct mnt_idmap *file_mnt_idmap(const struct file *file)
{
        return mnt_idmap(file->f_path.mnt);
}

/**
 * is_idmapped_mnt - check whether a mount is mapped
 * @mnt: the mount to check
 *
 * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped.
 *
 * Return: true if mount is mapped, false if not.
 */
static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
{
        return mnt_idmap(mnt) != &nop_mnt_idmap;
}

extern long vfs_truncate(const struct path *, loff_t);
int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
                unsigned int time_attrs, struct file *filp);
extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
                        loff_t len);
extern long do_sys_open(int dfd, const char __user *filename, int flags,
                        umode_t mode);
extern struct file *file_open_name(struct filename *, int, umode_t);
extern struct file *filp_open(const char *, int, umode_t);
extern struct file *file_open_root(const struct path *,
                                   const char *, int, umode_t);
static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
                                   const char *name, int flags, umode_t mode)
{
        return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
                              name, flags, mode);
}
struct file *dentry_open(const struct path *path, int flags,
                         const struct cred *creds);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
                           const struct cred *cred);
struct path *backing_file_user_path(struct file *f);

/*
 * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
 * stored in ->vm_file is a backing file whose f_inode is on the underlying
 * filesystem.  When the mapped file path and inode number are displayed to
 * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
 * path and inode number to display to the user, which is the path of the fd
 * that user has requested to map and the inode number that would be returned
 * by fstat() on that same fd.
 */
/* Get the path to display in /proc/<pid>/maps */
static inline const struct path *file_user_path(struct file *f)
{
        if (unlikely(f->f_mode & FMODE_BACKING))
                return backing_file_user_path(f);
        return &f->f_path;
}
/* Get the inode whose inode number to display in /proc/<pid>/maps */
static inline const struct inode *file_user_inode(struct file *f)
{
        if (unlikely(f->f_mode & FMODE_BACKING))
                return d_inode(backing_file_user_path(f)->dentry);
        return file_inode(f);
}

static inline struct file *file_clone_open(struct file *file)
{
        return dentry_open(&file->f_path, file->f_flags, file->f_cred);
}
extern int filp_close(struct file *, fl_owner_t id);

extern struct filename *getname_flags(const char __user *, int, int *);
extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
extern void putname(struct filename *name);

extern int finish_open(struct file *file, struct dentry *dentry,
                        int (*open)(struct inode *, struct file *));
extern int finish_no_open(struct file *file, struct dentry *dentry);

/* Helper for the simple case when original dentry is used */
static inline int finish_open_simple(struct file *file, int error)
{
        if (error)
                return error;

        return finish_open(file, file->f_path.dentry, NULL);
}

/* fs/dcache.c */
extern void __init vfs_caches_init_early(void);
extern void __init vfs_caches_init(void);

extern struct kmem_cache *names_cachep;

#define __getname()                kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name)                kmem_cache_free(names_cachep, (void *)(name))

extern struct super_block *blockdev_superblock;
static inline bool sb_is_blkdev_sb(struct super_block *sb)
{
        return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
}

void emergency_thaw_all(void);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;

/* fs/char_dev.c */
#define CHRDEV_MAJOR_MAX 512
/* Marks the bottom of the first segment of free char majors */
#define CHRDEV_MAJOR_DYN_END 234
/* Marks the top and bottom of the second segment of free char majors */
#define CHRDEV_MAJOR_DYN_EXT_START 511
#define CHRDEV_MAJOR_DYN_EXT_END 384

extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
extern int register_chrdev_region(dev_t, unsigned, const char *);
extern int __register_chrdev(unsigned int major, unsigned int baseminor,
                             unsigned int count, const char *name,
                             const struct file_operations *fops);
extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
                                unsigned int count, const char *name);
extern void unregister_chrdev_region(dev_t, unsigned);
extern void chrdev_show(struct seq_file *,off_t);

static inline int register_chrdev(unsigned int major, const char *name,
                                  const struct file_operations *fops)
{
        return __register_chrdev(major, 0, 256, name, fops);
}

static inline void unregister_chrdev(unsigned int major, const char *name)
{
        __unregister_chrdev(major, 0, 256, name);
}

extern void init_special_inode(struct inode *, umode_t, dev_t);

/* Invalid inode operations -- fs/bad_inode.c */
extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);

extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
                                                loff_t lend);
extern int __must_check file_check_and_advance_wb_err(struct file *file);
extern int __must_check file_write_and_wait_range(struct file *file,
                                                loff_t start, loff_t end);

static inline int file_write_and_wait(struct file *file)
{
        return file_write_and_wait_range(file, 0, LLONG_MAX);
}

extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
                           int datasync);
extern int vfs_fsync(struct file *file, int datasync);

extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
                                unsigned int flags);

static inline bool iocb_is_dsync(const struct kiocb *iocb)
{
        return (iocb->ki_flags & IOCB_DSYNC) ||
                IS_SYNC(iocb->ki_filp->f_mapping->host);
}

/*
 * Sync the bytes written if this was a synchronous write.  Expect ki_pos
 * to already be updated for the write, and will return either the amount
 * of bytes passed in, or an error if syncing the file failed.
 */
static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
{
        if (iocb_is_dsync(iocb)) {
                int ret = vfs_fsync_range(iocb->ki_filp,
                                iocb->ki_pos - count, iocb->ki_pos - 1,
                                (iocb->ki_flags & IOCB_SYNC) ? 0 : 1);
                if (ret)
                        return ret;
        }

        return count;
}

extern void emergency_sync(void);
extern void emergency_remount(void);

#ifdef CONFIG_BLOCK
extern int bmap(struct inode *inode, sector_t *block);
#else
static inline int bmap(struct inode *inode,  sector_t *block)
{
        return -EINVAL;
}
#endif

int notify_change(struct mnt_idmap *, struct dentry *,
                  struct iattr *, struct inode **);
int inode_permission(struct mnt_idmap *, struct inode *, int);
int generic_permission(struct mnt_idmap *, struct inode *, int);
static inline int file_permission(struct file *file, int mask)
{
        return inode_permission(file_mnt_idmap(file),
                                file_inode(file), mask);
}
static inline int path_permission(const struct path *path, int mask)
{
        return inode_permission(mnt_idmap(path->mnt),
                                d_inode(path->dentry), mask);
}
int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
                   struct inode *inode);

static inline bool execute_ok(struct inode *inode)
{
        return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
}

static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
{
        return (inode->i_mode ^ mode) & S_IFMT;
}

/**
 * file_start_write - get write access to a superblock for regular file io
 * @file: the file we want to write to
 *
 * This is a variant of sb_start_write() which is a noop on non-regualr file.
 * Should be matched with a call to file_end_write().
 */
static inline void file_start_write(struct file *file)
{
        if (!S_ISREG(file_inode(file)->i_mode))
                return;
        sb_start_write(file_inode(file)->i_sb);
}

static inline bool file_start_write_trylock(struct file *file)
{
        if (!S_ISREG(file_inode(file)->i_mode))
                return true;
        return sb_start_write_trylock(file_inode(file)->i_sb);
}

/**
 * file_end_write - drop write access to a superblock of a regular file
 * @file: the file we wrote to
 *
 * Should be matched with a call to file_start_write().
 */
static inline void file_end_write(struct file *file)
{
        if (!S_ISREG(file_inode(file)->i_mode))
                return;
        sb_end_write(file_inode(file)->i_sb);
}

/**
 * kiocb_start_write - get write access to a superblock for async file io
 * @iocb: the io context we want to submit the write with
 *
 * This is a variant of sb_start_write() for async io submission.
 * Should be matched with a call to kiocb_end_write().
 */
static inline void kiocb_start_write(struct kiocb *iocb)
{
        struct inode *inode = file_inode(iocb->ki_filp);

        sb_start_write(inode->i_sb);
        /*
         * Fool lockdep by telling it the lock got released so that it
         * doesn't complain about the held lock when we return to userspace.
         */
        __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
}

/**
 * kiocb_end_write - drop write access to a superblock after async file io
 * @iocb: the io context we sumbitted the write with
 *
 * Should be matched with a call to kiocb_start_write().
 */
static inline void kiocb_end_write(struct kiocb *iocb)
{
        struct inode *inode = file_inode(iocb->ki_filp);

        /*
         * Tell lockdep we inherited freeze protection from submission thread.
         */
        __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
        sb_end_write(inode->i_sb);
}

/*
 * This is used for regular files where some users -- especially the
 * currently executed binary in a process, previously handled via
 * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
 * read-write shared) accesses.
 *
 * get_write_access() gets write permission for a file.
 * put_write_access() releases this write permission.
 * deny_write_access() denies write access to a file.
 * allow_write_access() re-enables write access to a file.
 *
 * The i_writecount field of an inode can have the following values:
 * 0: no write access, no denied write access
 * < 0: (-i_writecount) users that denied write access to the file.
 * > 0: (i_writecount) users that have write access to the file.
 *
 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
 * except for the cases where we don't hold i_writecount yet. Then we need to
 * use {get,deny}_write_access() - these functions check the sign and refuse
 * to do the change if sign is wrong.
 */
static inline int get_write_access(struct inode *inode)
{
        return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
}
static inline int deny_write_access(struct file *file)
{
        struct inode *inode = file_inode(file);
        return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
}
static inline void put_write_access(struct inode * inode)
{
        atomic_dec(&inode->i_writecount);
}
static inline void allow_write_access(struct file *file)
{
        if (file)
                atomic_inc(&file_inode(file)->i_writecount);
}
static inline bool inode_is_open_for_write(const struct inode *inode)
{
        return atomic_read(&inode->i_writecount) > 0;
}

#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
        BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
}
static inline void i_readcount_inc(struct inode *inode)
{
        atomic_inc(&inode->i_readcount);
}
#else
static inline void i_readcount_dec(struct inode *inode)
{
        return;
}
static inline void i_readcount_inc(struct inode *inode)
{
        return;
}
#endif
extern int do_pipe_flags(int *, int);

extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
extern struct file * open_exec(const char *);
 
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);

extern char *file_path(struct file *, char *, int);

/**
 * is_dot_dotdot - returns true only if @name is "." or ".."
 * @name: file name to check
 * @len: length of file name, in bytes
 */
static inline bool is_dot_dotdot(const char *name, size_t len)
{
        return len && unlikely(name[0] == '.') &&
                (len == 1 || (len == 2 && name[1] == '.'));
}

#include <linux/err.h>

/* needed for stackable file system support */
extern loff_t default_llseek(struct file *file, loff_t offset, int whence);

extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);

extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
extern void address_space_init_once(struct address_space *mapping);
extern struct inode * igrab(struct inode *);
extern ino_t iunique(struct super_block *, ino_t);
extern int inode_needs_sync(struct inode *inode);
extern int generic_delete_inode(struct inode *inode);
static inline int generic_drop_inode(struct inode *inode)
{
        return !inode->i_nlink || inode_unhashed(inode);
}
extern void d_mark_dontcache(struct inode *inode);

extern struct inode *ilookup5_nowait(struct super_block *sb,
                unsigned long hashval, int (*test)(struct inode *, void *),
                void *data);
extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
                int (*test)(struct inode *, void *), void *data);
extern struct inode *ilookup(struct super_block *sb, unsigned long ino);

extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
                int (*test)(struct inode *, void *),
                int (*set)(struct inode *, void *),
                void *data);
extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
extern struct inode * iget_locked(struct super_block *, unsigned long);
extern struct inode *find_inode_nowait(struct super_block *,
                                       unsigned long,
                                       int (*match)(struct inode *,
                                                    unsigned long, void *),
                                       void *data);
extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
                                    int (*)(struct inode *, void *), void *);
extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
#else
static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
#endif
extern void unlock_new_inode(struct inode *);
extern void discard_new_inode(struct inode *);
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
void dump_mapping(const struct address_space *);

/*
 * Userspace may rely on the inode number being non-zero. For example, glibc
 * simply ignores files with zero i_ino in unlink() and other places.
 *
 * As an additional complication, if userspace was compiled with
 * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
 * lower 32 bits, so we need to check that those aren't zero explicitly. With
 * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
 * better safe than sorry.
 */
static inline bool is_zero_ino(ino_t ino)
{
        return (u32)ino == 0;
}

extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode_pseudo(struct super_block *sb);
extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
extern int file_remove_privs_flags(struct file *file, unsigned int flags);
extern int file_remove_privs(struct file *);
int setattr_should_drop_sgid(struct mnt_idmap *idmap,
                             const struct inode *inode);

/*
 * This must be used for allocating filesystems specific inodes to set
 * up the inode reclaim context correctly.
 */
static inline void *
alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp)
{
        return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp);
}

extern void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
{
        __insert_inode_hash(inode, inode->i_ino);
}

extern void __remove_inode_hash(struct inode *);
static inline void remove_inode_hash(struct inode *inode)
{
        if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
                __remove_inode_hash(inode);
}

extern void inode_sb_list_add(struct inode *inode);
extern void inode_add_lru(struct inode *inode);

extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);

extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
extern int generic_write_check_limits(struct file *file, loff_t pos,
                loff_t *count);
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
                ssize_t already_read);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
                ssize_t direct_written, ssize_t buffered_written);

ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
                rwf_t flags);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
                rwf_t flags);
ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
                           struct iov_iter *iter);
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
                            struct iov_iter *iter);

/* fs/splice.c */
ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
                            struct pipe_inode_info *pipe,
                            size_t len, unsigned int flags);
ssize_t copy_splice_read(struct file *in, loff_t *ppos,
                         struct pipe_inode_info *pipe,
                         size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
                struct file *, loff_t *, size_t, unsigned int);


extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
#define no_llseek NULL
extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
                int whence, loff_t maxsize, loff_t eof);
extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
                int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
int rw_verify_area(int, struct file *, const loff_t *, size_t);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
extern int stream_open(struct inode * inode, struct file * filp);

#ifdef CONFIG_BLOCK
typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
                            loff_t file_offset);

enum {
        /* need locking between buffered and direct access */
        DIO_LOCKING        = 0x01,

        /* filesystem does not support filling holes */
        DIO_SKIP_HOLES        = 0x02,
};

ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                             struct block_device *bdev, struct iov_iter *iter,
                             get_block_t get_block,
                             dio_iodone_t end_io,
                             int flags);

static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
                                         struct inode *inode,
                                         struct iov_iter *iter,
                                         get_block_t get_block)
{
        return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
                        get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
}
#endif

void inode_dio_wait(struct inode *inode);

/**
 * inode_dio_begin - signal start of a direct I/O requests
 * @inode: inode the direct I/O happens on
 *
 * This is called once we've finished processing a direct I/O request,
 * and is used to wake up callers waiting for direct I/O to be quiesced.
 */
static inline void inode_dio_begin(struct inode *inode)
{
        atomic_inc(&inode->i_dio_count);
}

/**
 * inode_dio_end - signal finish of a direct I/O requests
 * @inode: inode the direct I/O happens on
 *
 * This is called once we've finished processing a direct I/O request,
 * and is used to wake up callers waiting for direct I/O to be quiesced.
 */
static inline void inode_dio_end(struct inode *inode)
{
        if (atomic_dec_and_test(&inode->i_dio_count))
                wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
}

extern void inode_set_flags(struct inode *inode, unsigned int flags,
                            unsigned int mask);

extern const struct file_operations generic_ro_fops;

#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))

extern int readlink_copy(char __user *, int, const char *);
extern int page_readlink(struct dentry *, char __user *, int);
extern const char *page_get_link(struct dentry *, struct inode *,
                                 struct delayed_call *);
extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes);
void __inode_sub_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
static inline loff_t __inode_get_bytes(struct inode *inode)
{
        return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
}
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
const char *simple_get_link(struct dentry *, struct inode *,
                            struct delayed_call *);
extern const struct inode_operations simple_symlink_inode_operations;

extern int iterate_dir(struct file *, struct dir_context *);

int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
                int flags);
int vfs_fstat(int fd, struct kstat *stat);

static inline int vfs_stat(const char __user *filename, struct kstat *stat)
{
        return vfs_fstatat(AT_FDCWD, filename, stat, 0);
}
static inline int vfs_lstat(const char __user *name, struct kstat *stat)
{
        return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
}

extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
extern int vfs_readlink(struct dentry *, char __user *, int);

extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
extern void iterate_supers(void (*)(struct super_block *, void *), void *);
extern void iterate_supers_type(struct file_system_type *,
                                void (*)(struct super_block *, void *), void *);

extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
extern int dcache_readdir(struct file *, struct dir_context *);
extern int simple_setattr(struct mnt_idmap *, struct dentry *,
                          struct iattr *);
extern int simple_getattr(struct mnt_idmap *, const struct path *,
                          struct kstat *, u32, unsigned int);
extern int simple_statfs(struct dentry *, struct kstatfs *);
extern int simple_open(struct inode *inode, struct file *file);
extern int simple_link(struct dentry *, struct inode *, struct dentry *);
extern int simple_unlink(struct inode *, struct dentry *);
extern int simple_rmdir(struct inode *, struct dentry *);
void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                                  struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename(struct mnt_idmap *, struct inode *,
                         struct dentry *, struct inode *, struct dentry *,
                         unsigned int);
extern void simple_recursive_removal(struct dentry *,
                              void (*callback)(struct dentry *));
extern int noop_fsync(struct file *, loff_t, loff_t, int);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
extern int simple_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata);
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
extern const struct dentry_operations simple_dentry_operations;

extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
extern const struct file_operations simple_dir_operations;
extern const struct inode_operations simple_dir_inode_operations;
extern void make_empty_dir_inode(struct inode *inode);
extern bool is_empty_dir_inode(struct inode *inode);
struct tree_descr { const char *name; const struct file_operations *ops; int mode; };
struct dentry *d_alloc_name(struct dentry *, const char *);
extern int simple_fill_super(struct super_block *, unsigned long,
                             const struct tree_descr *);
extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
extern void simple_release_fs(struct vfsmount **mount, int *count);

extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
                        loff_t *ppos, const void *from, size_t available);
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
                const void __user *from, size_t count);

struct offset_ctx {
        struct maple_tree        mt;
        unsigned long                next_offset;
};

void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
                                  struct dentry *old_dentry,
                                  struct inode *new_dir,
                                  struct dentry *new_dentry);
void simple_offset_destroy(struct offset_ctx *octx);

extern const struct file_operations simple_offset_dir_operations;

extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);

extern int generic_check_addressable(unsigned, u64);

extern void generic_set_sb_d_ops(struct super_block *sb);

static inline bool sb_has_encoding(const struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
        return !!sb->s_encoding;
#else
        return false;
#endif
}

int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
                unsigned int ia_valid);
int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
extern int inode_newsize_ok(const struct inode *, loff_t offset);
void setattr_copy(struct mnt_idmap *, struct inode *inode,
                  const struct iattr *attr);

extern int file_update_time(struct file *file);

static inline bool vma_is_dax(const struct vm_area_struct *vma)
{
        return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}

static inline bool vma_is_fsdax(struct vm_area_struct *vma)
{
        struct inode *inode;

        if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
                return false;
        if (!vma_is_dax(vma))
                return false;
        inode = file_inode(vma->vm_file);
        if (S_ISCHR(inode->i_mode))
                return false; /* device-dax */
        return true;
}

static inline int iocb_flags(struct file *file)
{
        int res = 0;
        if (file->f_flags & O_APPEND)
                res |= IOCB_APPEND;
        if (file->f_flags & O_DIRECT)
                res |= IOCB_DIRECT;
        if (file->f_flags & O_DSYNC)
                res |= IOCB_DSYNC;
        if (file->f_flags & __O_SYNC)
                res |= IOCB_SYNC;
        return res;
}

static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
{
        int kiocb_flags = 0;

        /* make sure there's no overlap between RWF and private IOCB flags */
        BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);

        if (!flags)
                return 0;
        if (unlikely(flags & ~RWF_SUPPORTED))
                return -EOPNOTSUPP;
        if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND)))
                return -EINVAL;

        if (flags & RWF_NOWAIT) {
                if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
                        return -EOPNOTSUPP;
                kiocb_flags |= IOCB_NOIO;
        }
        kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
        if (flags & RWF_SYNC)
                kiocb_flags |= IOCB_DSYNC;

        if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) {
                if (IS_APPEND(file_inode(ki->ki_filp)))
                        return -EPERM;
                ki->ki_flags &= ~IOCB_APPEND;
        }

        ki->ki_flags |= kiocb_flags;
        return 0;
}

static inline ino_t parent_ino(struct dentry *dentry)
{
        ino_t res;

        /*
         * Don't strictly need d_lock here? If the parent ino could change
         * then surely we'd have a deeper race in the caller?
         */
        spin_lock(&dentry->d_lock);
        res = dentry->d_parent->d_inode->i_ino;
        spin_unlock(&dentry->d_lock);
        return res;
}

/* Transaction based IO helpers */

/*
 * An argresp is stored in an allocated page and holds the
 * size of the argument or response, along with its content
 */
struct simple_transaction_argresp {
        ssize_t size;
        char data[];
};

#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))

char *simple_transaction_get(struct file *file, const char __user *buf,
                                size_t size);
ssize_t simple_transaction_read(struct file *file, char __user *buf,
                                size_t size, loff_t *pos);
int simple_transaction_release(struct inode *inode, struct file *file);

void simple_transaction_set(struct file *file, size_t n);

/*
 * simple attribute files
 *
 * These attributes behave similar to those in sysfs:
 *
 * Writing to an attribute immediately sets a value, an open file can be
 * written to multiple times.
 *
 * Reading from an attribute creates a buffer from the value that might get
 * read with multiple read calls. When the attribute has been read
 * completely, no further read calls are possible until the file is opened
 * again.
 *
 * All attributes contain a text representation of a numeric value
 * that are accessed with the get() and set() functions.
 */
#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed)        \
static int __fops ## _open(struct inode *inode, struct file *file)        \
{                                                                        \
        __simple_attr_check_format(__fmt, 0ull);                        \
        return simple_attr_open(inode, file, __get, __set, __fmt);        \
}                                                                        \
static const struct file_operations __fops = {                                \
        .owner         = THIS_MODULE,                                                \
        .open         = __fops ## _open,                                        \
        .release = simple_attr_release,                                        \
        .read         = simple_attr_read,                                        \
        .write         = (__is_signed) ? simple_attr_write_signed : simple_attr_write,        \
        .llseek         = generic_file_llseek,                                        \
}

#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)                \
        DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)

#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt)        \
        DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)

static inline __printf(1, 2)
void __simple_attr_check_format(const char *fmt, ...)
{
        /* don't do anything, just let the compiler check the arguments; */
}

int simple_attr_open(struct inode *inode, struct file *file,
                     int (*get)(void *, u64 *), int (*set)(void *, u64),
                     const char *fmt);
int simple_attr_release(struct inode *inode, struct file *file);
ssize_t simple_attr_read(struct file *file, char __user *buf,
                         size_t len, loff_t *ppos);
ssize_t simple_attr_write(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos);
ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
                                 size_t len, loff_t *ppos);

struct ctl_table;
int __init list_bdev_fs_names(char *buf, size_t size);

#define __FMODE_EXEC                ((__force int) FMODE_EXEC)
#define __FMODE_NONOTIFY        ((__force int) FMODE_NONOTIFY)

#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
                                            (flag & __FMODE_NONOTIFY)))

static inline bool is_sxid(umode_t mode)
{
        return mode & (S_ISUID | S_ISGID);
}

static inline int check_sticky(struct mnt_idmap *idmap,
                               struct inode *dir, struct inode *inode)
{
        if (!(dir->i_mode & S_ISVTX))
                return 0;

        return __check_sticky(idmap, dir, inode);
}

static inline void inode_has_no_xattr(struct inode *inode)
{
        if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC))
                inode->i_flags |= S_NOSEC;
}

static inline bool is_root_inode(struct inode *inode)
{
        return inode == inode->i_sb->s_root->d_inode;
}

static inline bool dir_emit(struct dir_context *ctx,
                            const char *name, int namelen,
                            u64 ino, unsigned type)
{
        return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
}
static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
{
        return ctx->actor(ctx, ".", 1, ctx->pos,
                          file->f_path.dentry->d_inode->i_ino, DT_DIR);
}
static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
{
        return ctx->actor(ctx, "..", 2, ctx->pos,
                          parent_ino(file->f_path.dentry), DT_DIR);
}
static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
{
        if (ctx->pos == 0) {
                if (!dir_emit_dot(file, ctx))
                        return false;
                ctx->pos = 1;
        }
        if (ctx->pos == 1) {
                if (!dir_emit_dotdot(file, ctx))
                        return false;
                ctx->pos = 2;
        }
        return true;
}
static inline bool dir_relax(struct inode *inode)
{
        inode_unlock(inode);
        inode_lock(inode);
        return !IS_DEADDIR(inode);
}

static inline bool dir_relax_shared(struct inode *inode)
{
        inode_unlock_shared(inode);
        inode_lock_shared(inode);
        return !IS_DEADDIR(inode);
}

extern bool path_noexec(const struct path *path);
extern void inode_nohighmem(struct inode *inode);

/* mm/fadvise.c */
extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
                       int advice);
extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
                           int advice);

#endif /* _LINUX_FS_H */







































   54 








    1 




   53 












    4 




   51 








  112 


  111 











    4 


   98 



  115 


   65 
  112 



































































































































   53 
   53 











    1 

    1 



































   53 






























   36 


























   52 







    3 







   54 









   51 


   52 










   14 


















    4 

    4 

    4 
    4 


    4 

    4 

    4 
    4 





    4 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FS_NOTIFY_H
#define _LINUX_FS_NOTIFY_H

/*
 * include/linux/fsnotify.h - generic hooks for filesystem notification, to
 * reduce in-source duplication from both dnotify and inotify.
 *
 * We don't compile any of this away in some complicated menagerie of ifdefs.
 * Instead, we rely on the code inside to optimize away as needed.
 *
 * (C) Copyright 2005 Robert Love
 */

#include <linux/fsnotify_backend.h>
#include <linux/audit.h>
#include <linux/slab.h>
#include <linux/bug.h>

/* Are there any inode/mount/sb objects that are being watched at all? */
static inline bool fsnotify_sb_has_watchers(struct super_block *sb)
{
        return atomic_long_read(&sb->s_fsnotify_connectors);
}

/*
 * Notify this @dir inode about a change in a child directory entry.
 * The directory entry may have turned positive or negative or its inode may
 * have changed (i.e. renamed over).
 *
 * Unlike fsnotify_parent(), the event will be reported regardless of the
 * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only
 * the child is interested and not the parent.
 */
static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
                                struct inode *dir, const struct qstr *name,
                                u32 cookie)
{
        if (!fsnotify_sb_has_watchers(dir->i_sb))
                return 0;

        return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
}

static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
                                   __u32 mask)
{
        fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0);
}

static inline void fsnotify_inode(struct inode *inode, __u32 mask)
{
        if (!fsnotify_sb_has_watchers(inode->i_sb))
                return;

        if (S_ISDIR(inode->i_mode))
                mask |= FS_ISDIR;

        fsnotify(mask, inode, FSNOTIFY_EVENT_INODE, NULL, NULL, inode, 0);
}

/* Notify this dentry's parent about a child's events. */
static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
                                  const void *data, int data_type)
{
        struct inode *inode = d_inode(dentry);

        if (!fsnotify_sb_has_watchers(inode->i_sb))
                return 0;

        if (S_ISDIR(inode->i_mode)) {
                mask |= FS_ISDIR;

                /* sb/mount marks are not interested in name of directory */
                if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
                        goto notify_child;
        }

        /* disconnected dentry cannot notify parent */
        if (IS_ROOT(dentry))
                goto notify_child;

        return __fsnotify_parent(dentry, mask, data, data_type);

notify_child:
        return fsnotify(mask, data, data_type, NULL, NULL, inode, 0);
}

/*
 * Simple wrappers to consolidate calls to fsnotify_parent() when an event
 * is on a file/dentry.
 */
static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
{
        fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
}

static inline int fsnotify_file(struct file *file, __u32 mask)
{
        const struct path *path;

        if (file->f_mode & FMODE_NONOTIFY)
                return 0;

        path = &file->f_path;
        return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}

#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/*
 * fsnotify_file_area_perm - permission hook before access to file range
 */
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
                                          const loff_t *ppos, size_t count)
{
        __u32 fsnotify_mask = FS_ACCESS_PERM;

        /*
         * filesystem may be modified in the context of permission events
         * (e.g. by HSM filling a file on access), so sb freeze protection
         * must not be held.
         */
        lockdep_assert_once(file_write_not_started(file));

        if (!(perm_mask & MAY_READ))
                return 0;

        return fsnotify_file(file, fsnotify_mask);
}

/*
 * fsnotify_file_perm - permission hook before file access
 */
static inline int fsnotify_file_perm(struct file *file, int perm_mask)
{
        return fsnotify_file_area_perm(file, perm_mask, NULL, 0);
}

/*
 * fsnotify_open_perm - permission hook before file open
 */
static inline int fsnotify_open_perm(struct file *file)
{
        int ret;

        if (file->f_flags & __FMODE_EXEC) {
                ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
                if (ret)
                        return ret;
        }

        return fsnotify_file(file, FS_OPEN_PERM);
}

#else
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
                                          const loff_t *ppos, size_t count)
{
        return 0;
}

static inline int fsnotify_file_perm(struct file *file, int perm_mask)
{
        return 0;
}

static inline int fsnotify_open_perm(struct file *file)
{
        return 0;
}
#endif

/*
 * fsnotify_link_count - inode's link count changed
 */
static inline void fsnotify_link_count(struct inode *inode)
{
        fsnotify_inode(inode, FS_ATTRIB);
}

/*
 * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
 */
static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
                                 const struct qstr *old_name,
                                 int isdir, struct inode *target,
                                 struct dentry *moved)
{
        struct inode *source = moved->d_inode;
        u32 fs_cookie = fsnotify_get_cookie();
        __u32 old_dir_mask = FS_MOVED_FROM;
        __u32 new_dir_mask = FS_MOVED_TO;
        __u32 rename_mask = FS_RENAME;
        const struct qstr *new_name = &moved->d_name;

        if (isdir) {
                old_dir_mask |= FS_ISDIR;
                new_dir_mask |= FS_ISDIR;
                rename_mask |= FS_ISDIR;
        }

        /* Event with information about both old and new parent+name */
        fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY,
                      old_dir, old_name, 0);

        fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
                      old_dir, old_name, fs_cookie);
        fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
                      new_dir, new_name, fs_cookie);

        if (target)
                fsnotify_link_count(target);
        fsnotify_inode(source, FS_MOVE_SELF);
        audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE);
}

/*
 * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
 */
static inline void fsnotify_inode_delete(struct inode *inode)
{
        __fsnotify_inode_delete(inode);
}

/*
 * fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed
 */
static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
{
        __fsnotify_vfsmount_delete(mnt);
}

/*
 * fsnotify_inoderemove - an inode is going away
 */
static inline void fsnotify_inoderemove(struct inode *inode)
{
        fsnotify_inode(inode, FS_DELETE_SELF);
        __fsnotify_inode_delete(inode);
}

/*
 * fsnotify_create - 'name' was linked in
 *
 * Caller must make sure that dentry->d_name is stable.
 * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
 * ->d_inode later
 */
static inline void fsnotify_create(struct inode *dir, struct dentry *dentry)
{
        audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);

        fsnotify_dirent(dir, dentry, FS_CREATE);
}

/*
 * fsnotify_link - new hardlink in 'inode' directory
 *
 * Caller must make sure that new_dentry->d_name is stable.
 * Note: We have to pass also the linked inode ptr as some filesystems leave
 *   new_dentry->d_inode NULL and instantiate inode pointer later
 */
static inline void fsnotify_link(struct inode *dir, struct inode *inode,
                                 struct dentry *new_dentry)
{
        fsnotify_link_count(inode);
        audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE);

        fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE,
                      dir, &new_dentry->d_name, 0);
}

/*
 * fsnotify_delete - @dentry was unlinked and unhashed
 *
 * Caller must make sure that dentry->d_name is stable.
 *
 * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode
 * as this may be called after d_delete() and old_dentry may be negative.
 */
static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
                                   struct dentry *dentry)
{
        __u32 mask = FS_DELETE;

        if (S_ISDIR(inode->i_mode))
                mask |= FS_ISDIR;

        fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name,
                      0);
}

/**
 * d_delete_notify - delete a dentry and call fsnotify_delete()
 * @dentry: The dentry to delete
 *
 * This helper is used to guaranty that the unlinked inode cannot be found
 * by lookup of this name after fsnotify_delete() event has been delivered.
 */
static inline void d_delete_notify(struct inode *dir, struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);

        ihold(inode);
        d_delete(dentry);
        fsnotify_delete(dir, inode, dentry);
        iput(inode);
}

/*
 * fsnotify_unlink - 'name' was unlinked
 *
 * Caller must make sure that dentry->d_name is stable.
 */
static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
{
        if (WARN_ON_ONCE(d_is_negative(dentry)))
                return;

        fsnotify_delete(dir, d_inode(dentry), dentry);
}

/*
 * fsnotify_mkdir - directory 'name' was created
 *
 * Caller must make sure that dentry->d_name is stable.
 * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
 * ->d_inode later
 */
static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
{
        audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);

        fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR);
}

/*
 * fsnotify_rmdir - directory 'name' was removed
 *
 * Caller must make sure that dentry->d_name is stable.
 */
static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry)
{
        if (WARN_ON_ONCE(d_is_negative(dentry)))
                return;

        fsnotify_delete(dir, d_inode(dentry), dentry);
}

/*
 * fsnotify_access - file was read
 */
static inline void fsnotify_access(struct file *file)
{
        fsnotify_file(file, FS_ACCESS);
}

/*
 * fsnotify_modify - file was modified
 */
static inline void fsnotify_modify(struct file *file)
{
        fsnotify_file(file, FS_MODIFY);
}

/*
 * fsnotify_open - file was opened
 */
static inline void fsnotify_open(struct file *file)
{
        __u32 mask = FS_OPEN;

        if (file->f_flags & __FMODE_EXEC)
                mask |= FS_OPEN_EXEC;

        fsnotify_file(file, mask);
}

/*
 * fsnotify_close - file was closed
 */
static inline void fsnotify_close(struct file *file)
{
        __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE :
                                                    FS_CLOSE_NOWRITE;

        fsnotify_file(file, mask);
}

/*
 * fsnotify_xattr - extended attributes were changed
 */
static inline void fsnotify_xattr(struct dentry *dentry)
{
        fsnotify_dentry(dentry, FS_ATTRIB);
}

/*
 * fsnotify_change - notify_change event.  file was modified and/or metadata
 * was changed.
 */
static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
{
        __u32 mask = 0;

        if (ia_valid & ATTR_UID)
                mask |= FS_ATTRIB;
        if (ia_valid & ATTR_GID)
                mask |= FS_ATTRIB;
        if (ia_valid & ATTR_SIZE)
                mask |= FS_MODIFY;

        /* both times implies a utime(s) call */
        if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
                mask |= FS_ATTRIB;
        else if (ia_valid & ATTR_ATIME)
                mask |= FS_ACCESS;
        else if (ia_valid & ATTR_MTIME)
                mask |= FS_MODIFY;

        if (ia_valid & ATTR_MODE)
                mask |= FS_ATTRIB;

        if (mask)
                fsnotify_dentry(dentry, mask);
}

static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
                                    int error)
{
        struct fs_error_report report = {
                .error = error,
                .inode = inode,
                .sb = sb,
        };

        return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
                        NULL, NULL, NULL, 0);
}

#endif        /* _LINUX_FS_NOTIFY_H */















































































































































































































  383 










































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_JUMP_LABEL_H
#define _LINUX_JUMP_LABEL_H

/*
 * Jump label support
 *
 * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
 *
 * DEPRECATED API:
 *
 * The use of 'struct static_key' directly, is now DEPRECATED. In addition
 * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
 *
 * struct static_key false = STATIC_KEY_INIT_FALSE;
 * struct static_key true = STATIC_KEY_INIT_TRUE;
 * static_key_true()
 * static_key_false()
 *
 * The updated API replacements are:
 *
 * DEFINE_STATIC_KEY_TRUE(key);
 * DEFINE_STATIC_KEY_FALSE(key);
 * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
 * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
 * static_branch_likely()
 * static_branch_unlikely()
 *
 * Jump labels provide an interface to generate dynamic branches using
 * self-modifying code. Assuming toolchain and architecture support, if we
 * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)",
 * an "if (static_branch_unlikely(&key))" statement is an unconditional branch
 * (which defaults to false - and the true block is placed out of line).
 * Similarly, we can define an initially true key via
 * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same
 * "if (static_branch_unlikely(&key))", in which case we will generate an
 * unconditional branch to the out-of-line true branch. Keys that are
 * initially true or false can be using in both static_branch_unlikely()
 * and static_branch_likely() statements.
 *
 * At runtime we can change the branch target by setting the key
 * to true via a call to static_branch_enable(), or false using
 * static_branch_disable(). If the direction of the branch is switched by
 * these calls then we run-time modify the branch target via a
 * no-op -> jump or jump -> no-op conversion. For example, for an
 * initially false key that is used in an "if (static_branch_unlikely(&key))"
 * statement, setting the key to true requires us to patch in a jump
 * to the out-of-line of true branch.
 *
 * In addition to static_branch_{enable,disable}, we can also reference count
 * the key or branch direction via static_branch_{inc,dec}. Thus,
 * static_branch_inc() can be thought of as a 'make more true' and
 * static_branch_dec() as a 'make more false'.
 *
 * Since this relies on modifying code, the branch modifying functions
 * must be considered absolute slow paths (machine wide synchronization etc.).
 * OTOH, since the affected branches are unconditional, their runtime overhead
 * will be absolutely minimal, esp. in the default (off) case where the total
 * effect is a single NOP of appropriate size. The on case will patch in a jump
 * to the out-of-line block.
 *
 * When the control is directly exposed to userspace, it is prudent to delay the
 * decrement to avoid high frequency code modifications which can (and do)
 * cause significant performance degradation. Struct static_key_deferred and
 * static_key_slow_dec_deferred() provide for this.
 *
 * Lacking toolchain and or architecture support, static keys fall back to a
 * simple conditional branch.
 *
 * Additional babbling in: Documentation/staging/static-keys.rst
 */

#ifndef __ASSEMBLY__

#include <linux/types.h>
#include <linux/compiler.h>

extern bool static_key_initialized;

#define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized,                      \
                                    "%s(): static key '%pS' used before call to jump_label_init()", \
                                    __func__, (key))

struct static_key {
        atomic_t enabled;
#ifdef CONFIG_JUMP_LABEL
/*
 * Note:
 *   To make anonymous unions work with old compilers, the static
 *   initialization of them requires brackets. This creates a dependency
 *   on the order of the struct with the initializers. If any fields
 *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
 *   to be modified.
 *
 * bit 0 => 1 if key is initially true
 *            0 if initially false
 * bit 1 => 1 if points to struct static_key_mod
 *            0 if points to struct jump_entry
 */
        union {
                unsigned long type;
                struct jump_entry *entries;
                struct static_key_mod *next;
        };
#endif        /* CONFIG_JUMP_LABEL */
};

#endif /* __ASSEMBLY__ */

#ifdef CONFIG_JUMP_LABEL
#include <asm/jump_label.h>

#ifndef __ASSEMBLY__
#ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE

struct jump_entry {
        s32 code;
        s32 target;
        long key;        // key may be far away from the core kernel under KASLR
};

static inline unsigned long jump_entry_code(const struct jump_entry *entry)
{
        return (unsigned long)&entry->code + entry->code;
}

static inline unsigned long jump_entry_target(const struct jump_entry *entry)
{
        return (unsigned long)&entry->target + entry->target;
}

static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
{
        long offset = entry->key & ~3L;

        return (struct static_key *)((unsigned long)&entry->key + offset);
}

#else

static inline unsigned long jump_entry_code(const struct jump_entry *entry)
{
        return entry->code;
}

static inline unsigned long jump_entry_target(const struct jump_entry *entry)
{
        return entry->target;
}

static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
{
        return (struct static_key *)((unsigned long)entry->key & ~3UL);
}

#endif

static inline bool jump_entry_is_branch(const struct jump_entry *entry)
{
        return (unsigned long)entry->key & 1UL;
}

static inline bool jump_entry_is_init(const struct jump_entry *entry)
{
        return (unsigned long)entry->key & 2UL;
}

static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
{
        if (set)
                entry->key |= 2;
        else
                entry->key &= ~2;
}

static inline int jump_entry_size(struct jump_entry *entry)
{
#ifdef JUMP_LABEL_NOP_SIZE
        return JUMP_LABEL_NOP_SIZE;
#else
        return arch_jump_entry_size(entry);
#endif
}

#endif
#endif

#ifndef __ASSEMBLY__

enum jump_label_type {
        JUMP_LABEL_NOP = 0,
        JUMP_LABEL_JMP,
};

struct module;

#ifdef CONFIG_JUMP_LABEL

#define JUMP_TYPE_FALSE                0UL
#define JUMP_TYPE_TRUE                1UL
#define JUMP_TYPE_LINKED        2UL
#define JUMP_TYPE_MASK                3UL

static __always_inline bool static_key_false(struct static_key *key)
{
        return arch_static_branch(key, false);
}

static __always_inline bool static_key_true(struct static_key *key)
{
        return !arch_static_branch(key, true);
}

extern struct jump_entry __start___jump_table[];
extern struct jump_entry __stop___jump_table[];

extern void jump_label_init(void);
extern void jump_label_lock(void);
extern void jump_label_unlock(void);
extern void arch_jump_label_transform(struct jump_entry *entry,
                                      enum jump_label_type type);
extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
                                            enum jump_label_type type);
extern void arch_jump_label_transform_apply(void);
extern int jump_label_text_reserved(void *start, void *end);
extern bool static_key_slow_inc(struct static_key *key);
extern bool static_key_fast_inc_not_disabled(struct static_key *key);
extern void static_key_slow_dec(struct static_key *key);
extern bool static_key_slow_inc_cpuslocked(struct static_key *key);
extern void static_key_slow_dec_cpuslocked(struct static_key *key);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
extern void static_key_disable(struct static_key *key);
extern void static_key_enable_cpuslocked(struct static_key *key);
extern void static_key_disable_cpuslocked(struct static_key *key);
extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);

/*
 * We should be using ATOMIC_INIT() for initializing .enabled, but
 * the inclusion of atomic.h is problematic for inclusion of jump_label.h
 * in 'low-level' headers. Thus, we are initializing .enabled with a
 * raw value, but have added a BUILD_BUG_ON() to catch any issues in
 * jump_label_init() see: kernel/jump_label.c.
 */
#define STATIC_KEY_INIT_TRUE                                        \
        { .enabled = { 1 },                                        \
          { .type = JUMP_TYPE_TRUE } }
#define STATIC_KEY_INIT_FALSE                                        \
        { .enabled = { 0 },                                        \
          { .type = JUMP_TYPE_FALSE } }

#else  /* !CONFIG_JUMP_LABEL */

#include <linux/atomic.h>
#include <linux/bug.h>

static __always_inline int static_key_count(struct static_key *key)
{
        return raw_atomic_read(&key->enabled);
}

static __always_inline void jump_label_init(void)
{
        static_key_initialized = true;
}

static __always_inline bool static_key_false(struct static_key *key)
{
        if (unlikely_notrace(static_key_count(key) > 0))
                return true;
        return false;
}

static __always_inline bool static_key_true(struct static_key *key)
{
        if (likely_notrace(static_key_count(key) > 0))
                return true;
        return false;
}

static inline bool static_key_fast_inc_not_disabled(struct static_key *key)
{
        int v;

        STATIC_KEY_CHECK_USE(key);
        /*
         * Prevent key->enabled getting negative to follow the same semantics
         * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment.
         */
        v = atomic_read(&key->enabled);
        do {
                if (v < 0 || (v + 1) < 0)
                        return false;
        } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1)));
        return true;
}
#define static_key_slow_inc(key)        static_key_fast_inc_not_disabled(key)

static inline void static_key_slow_dec(struct static_key *key)
{
        STATIC_KEY_CHECK_USE(key);
        atomic_dec(&key->enabled);
}

#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key)
#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key)

static inline int jump_label_text_reserved(void *start, void *end)
{
        return 0;
}

static inline void jump_label_lock(void) {}
static inline void jump_label_unlock(void) {}

static inline void static_key_enable(struct static_key *key)
{
        STATIC_KEY_CHECK_USE(key);

        if (atomic_read(&key->enabled) != 0) {
                WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
                return;
        }
        atomic_set(&key->enabled, 1);
}

static inline void static_key_disable(struct static_key *key)
{
        STATIC_KEY_CHECK_USE(key);

        if (atomic_read(&key->enabled) != 1) {
                WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
                return;
        }
        atomic_set(&key->enabled, 0);
}

#define static_key_enable_cpuslocked(k)                static_key_enable((k))
#define static_key_disable_cpuslocked(k)        static_key_disable((k))

#define STATIC_KEY_INIT_TRUE        { .enabled = ATOMIC_INIT(1) }
#define STATIC_KEY_INIT_FALSE        { .enabled = ATOMIC_INIT(0) }

#endif        /* CONFIG_JUMP_LABEL */

#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
#define jump_label_enabled static_key_enabled

/* -------------------------------------------------------------------------- */

/*
 * Two type wrappers around static_key, such that we can use compile time
 * type differentiation to emit the right code.
 *
 * All the below code is macros in order to play type games.
 */

struct static_key_true {
        struct static_key key;
};

struct static_key_false {
        struct static_key key;
};

#define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }

#define DEFINE_STATIC_KEY_TRUE(name)        \
        struct static_key_true name = STATIC_KEY_TRUE_INIT

#define DEFINE_STATIC_KEY_TRUE_RO(name)        \
        struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT

#define DECLARE_STATIC_KEY_TRUE(name)        \
        extern struct static_key_true name

#define DEFINE_STATIC_KEY_FALSE(name)        \
        struct static_key_false name = STATIC_KEY_FALSE_INIT

#define DEFINE_STATIC_KEY_FALSE_RO(name)        \
        struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT

#define DECLARE_STATIC_KEY_FALSE(name)        \
        extern struct static_key_false name

#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count)                \
        struct static_key_true name[count] = {                        \
                [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT,        \
        }

#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count)                \
        struct static_key_false name[count] = {                        \
                [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT,        \
        }

#define _DEFINE_STATIC_KEY_1(name)        DEFINE_STATIC_KEY_TRUE(name)
#define _DEFINE_STATIC_KEY_0(name)        DEFINE_STATIC_KEY_FALSE(name)
#define DEFINE_STATIC_KEY_MAYBE(cfg, name)                        \
        __PASTE(_DEFINE_STATIC_KEY_, IS_ENABLED(cfg))(name)

#define _DEFINE_STATIC_KEY_RO_1(name)        DEFINE_STATIC_KEY_TRUE_RO(name)
#define _DEFINE_STATIC_KEY_RO_0(name)        DEFINE_STATIC_KEY_FALSE_RO(name)
#define DEFINE_STATIC_KEY_MAYBE_RO(cfg, name)                        \
        __PASTE(_DEFINE_STATIC_KEY_RO_, IS_ENABLED(cfg))(name)

#define _DECLARE_STATIC_KEY_1(name)        DECLARE_STATIC_KEY_TRUE(name)
#define _DECLARE_STATIC_KEY_0(name)        DECLARE_STATIC_KEY_FALSE(name)
#define DECLARE_STATIC_KEY_MAYBE(cfg, name)                        \
        __PASTE(_DECLARE_STATIC_KEY_, IS_ENABLED(cfg))(name)

extern bool ____wrong_branch_error(void);

#define static_key_enabled(x)                                                        \
({                                                                                \
        if (!__builtin_types_compatible_p(typeof(*x), struct static_key) &&        \
            !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\
            !__builtin_types_compatible_p(typeof(*x), struct static_key_false))        \
                ____wrong_branch_error();                                        \
        static_key_count((struct static_key *)x) > 0;                                \
})

#ifdef CONFIG_JUMP_LABEL

/*
 * Combine the right initial value (type) with the right branch order
 * to generate the desired result.
 *
 *
 * type\branch|        likely (1)              |        unlikely (0)
 * -----------+-----------------------+------------------
 *            |                       |
 *  true (1)  |           ...                      |           ...
 *            |    NOP                      |           JMP L
 *            |    <br-stmts>              |        1: ...
 *            |        L: ...                      |
 *            |                              |
 *            |                              |        L: <br-stmts>
 *            |                              |           jmp 1b
 *            |                       |
 * -----------+-----------------------+------------------
 *            |                       |
 *  false (0) |           ...                      |           ...
 *            |    JMP L              |           NOP
 *            |    <br-stmts>              |        1: ...
 *            |        L: ...                      |
 *            |                              |
 *            |                              |        L: <br-stmts>
 *            |                              |           jmp 1b
 *            |                       |
 * -----------+-----------------------+------------------
 *
 * The initial value is encoded in the LSB of static_key::entries,
 * type: 0 = false, 1 = true.
 *
 * The branch type is encoded in the LSB of jump_entry::key,
 * branch: 0 = unlikely, 1 = likely.
 *
 * This gives the following logic table:
 *
 *        enabled        type        branch          instuction
 * -----------------------------+-----------
 *        0        0        0        | NOP
 *        0        0        1        | JMP
 *        0        1        0        | NOP
 *        0        1        1        | JMP
 *
 *        1        0        0        | JMP
 *        1        0        1        | NOP
 *        1        1        0        | JMP
 *        1        1        1        | NOP
 *
 * Which gives the following functions:
 *
 *   dynamic: instruction = enabled ^ branch
 *   static:  instruction = type ^ branch
 *
 * See jump_label_type() / jump_label_init_type().
 */

#define static_branch_likely(x)                                                        \
({                                                                                \
        bool branch;                                                                \
        if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))        \
                branch = !arch_static_branch(&(x)->key, true);                        \
        else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
                branch = !arch_static_branch_jump(&(x)->key, true);                \
        else                                                                        \
                branch = ____wrong_branch_error();                                \
        likely_notrace(branch);                                                                \
})

#define static_branch_unlikely(x)                                                \
({                                                                                \
        bool branch;                                                                \
        if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))        \
                branch = arch_static_branch_jump(&(x)->key, false);                \
        else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
                branch = arch_static_branch(&(x)->key, false);                        \
        else                                                                        \
                branch = ____wrong_branch_error();                                \
        unlikely_notrace(branch);                                                        \
})

#else /* !CONFIG_JUMP_LABEL */

#define static_branch_likely(x)                likely_notrace(static_key_enabled(&(x)->key))
#define static_branch_unlikely(x)        unlikely_notrace(static_key_enabled(&(x)->key))

#endif /* CONFIG_JUMP_LABEL */

#define static_branch_maybe(config, x)                                        \
        (IS_ENABLED(config) ? static_branch_likely(x)                        \
                            : static_branch_unlikely(x))

/*
 * Advanced usage; refcount, branch is enabled when: count != 0
 */

#define static_branch_inc(x)                static_key_slow_inc(&(x)->key)
#define static_branch_dec(x)                static_key_slow_dec(&(x)->key)
#define static_branch_inc_cpuslocked(x)        static_key_slow_inc_cpuslocked(&(x)->key)
#define static_branch_dec_cpuslocked(x)        static_key_slow_dec_cpuslocked(&(x)->key)

/*
 * Normal usage; boolean enable/disable.
 */

#define static_branch_enable(x)                        static_key_enable(&(x)->key)
#define static_branch_disable(x)                static_key_disable(&(x)->key)
#define static_branch_enable_cpuslocked(x)        static_key_enable_cpuslocked(&(x)->key)
#define static_branch_disable_cpuslocked(x)        static_key_disable_cpuslocked(&(x)->key)

#endif /* __ASSEMBLY__ */

#endif        /* _LINUX_JUMP_LABEL_H */
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 







    1 

    2 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Apple USB BCM5974 (Macbook Air and Penryn Macbook Pro) multitouch driver
 *
 * Copyright (C) 2008           Henrik Rydberg (rydberg@euromail.se)
 * Copyright (C) 2015      John Horan (knasher@gmail.com)
 *
 * The USB initialization and package decoding was made by
 * Scott Shawcroft as part of the touchd user-space driver project:
 * Copyright (C) 2008           Scott Shawcroft (scott.shawcroft@gmail.com)
 *
 * The BCM5974 driver is based on the appletouch driver:
 * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
 * Copyright (C) 2005      Johannes Berg (johannes@sipsolutions.net)
 * Copyright (C) 2005           Stelian Pop (stelian@popies.net)
 * Copyright (C) 2005           Frank Arnold (frank@scirocco-5v-turbo.de)
 * Copyright (C) 2005           Peter Osterlund (petero2@telia.com)
 * Copyright (C) 2005           Michael Hanselmann (linux-kernel@hansmi.ch)
 * Copyright (C) 2006           Nicolas Boichat (nicolas@boichat.ch)
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb/input.h>
#include <linux/hid.h>
#include <linux/mutex.h>
#include <linux/input/mt.h>

#define USB_VENDOR_ID_APPLE                0x05ac

/* MacbookAir, aka wellspring */
#define USB_DEVICE_ID_APPLE_WELLSPRING_ANSI        0x0223
#define USB_DEVICE_ID_APPLE_WELLSPRING_ISO        0x0224
#define USB_DEVICE_ID_APPLE_WELLSPRING_JIS        0x0225
/* MacbookProPenryn, aka wellspring2 */
#define USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI        0x0230
#define USB_DEVICE_ID_APPLE_WELLSPRING2_ISO        0x0231
#define USB_DEVICE_ID_APPLE_WELLSPRING2_JIS        0x0232
/* Macbook5,1 (unibody), aka wellspring3 */
#define USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI        0x0236
#define USB_DEVICE_ID_APPLE_WELLSPRING3_ISO        0x0237
#define USB_DEVICE_ID_APPLE_WELLSPRING3_JIS        0x0238
/* MacbookAir3,2 (unibody), aka wellspring5 */
#define USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI        0x023f
#define USB_DEVICE_ID_APPLE_WELLSPRING4_ISO        0x0240
#define USB_DEVICE_ID_APPLE_WELLSPRING4_JIS        0x0241
/* MacbookAir3,1 (unibody), aka wellspring4 */
#define USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI        0x0242
#define USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO        0x0243
#define USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS        0x0244
/* Macbook8 (unibody, March 2011) */
#define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI        0x0245
#define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO        0x0246
#define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS        0x0247
/* MacbookAir4,1 (unibody, July 2011) */
#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI        0x0249
#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO        0x024a
#define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS        0x024b
/* MacbookAir4,2 (unibody, July 2011) */
#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI        0x024c
#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO        0x024d
#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS        0x024e
/* Macbook8,2 (unibody) */
#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI        0x0252
#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO        0x0253
#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS        0x0254
/* MacbookPro10,1 (unibody, June 2012) */
#define USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI        0x0262
#define USB_DEVICE_ID_APPLE_WELLSPRING7_ISO        0x0263
#define USB_DEVICE_ID_APPLE_WELLSPRING7_JIS        0x0264
/* MacbookPro10,2 (unibody, October 2012) */
#define USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI        0x0259
#define USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO        0x025a
#define USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS        0x025b
/* MacbookAir6,2 (unibody, June 2013) */
#define USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI        0x0290
#define USB_DEVICE_ID_APPLE_WELLSPRING8_ISO        0x0291
#define USB_DEVICE_ID_APPLE_WELLSPRING8_JIS        0x0292
/* MacbookPro12,1 (2015) */
#define USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI        0x0272
#define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO        0x0273
#define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS        0x0274

#define BCM5974_DEVICE(prod) {                                        \
        .match_flags = (USB_DEVICE_ID_MATCH_DEVICE |                \
                        USB_DEVICE_ID_MATCH_INT_CLASS |                \
                        USB_DEVICE_ID_MATCH_INT_PROTOCOL),        \
        .idVendor = USB_VENDOR_ID_APPLE,                        \
        .idProduct = (prod),                                        \
        .bInterfaceClass = USB_INTERFACE_CLASS_HID,                \
        .bInterfaceProtocol = USB_INTERFACE_PROTOCOL_MOUSE        \
}

/* table of devices that work with this driver */
static const struct usb_device_id bcm5974_table[] = {
        /* MacbookAir1.1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING_JIS),
        /* MacbookProPenryn */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING2_JIS),
        /* Macbook5,1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING3_JIS),
        /* MacbookAir3,2 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4_JIS),
        /* MacbookAir3,1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS),
        /* MacbookPro8 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
        /* MacbookAir4,1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
        /* MacbookAir4,2 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
        /* MacbookPro8,2 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
        /* MacbookPro10,1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
        /* MacbookPro10,2 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS),
        /* MacbookAir6,2 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING8_JIS),
        /* MacbookPro12,1 */
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
        BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
        /* Terminating entry */
        {}
};
MODULE_DEVICE_TABLE(usb, bcm5974_table);

MODULE_AUTHOR("Henrik Rydberg");
MODULE_DESCRIPTION("Apple USB BCM5974 multitouch driver");
MODULE_LICENSE("GPL");

#define dprintk(level, format, a...)\
        { if (debug >= level) printk(KERN_DEBUG format, ##a); }

static int debug = 1;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Activate debugging output");

/* button data structure */
struct bt_data {
        u8 unknown1;                /* constant */
        u8 button;                /* left button */
        u8 rel_x;                /* relative x coordinate */
        u8 rel_y;                /* relative y coordinate */
};

/* trackpad header types */
enum tp_type {
        TYPE1,                        /* plain trackpad */
        TYPE2,                        /* button integrated in trackpad */
        TYPE3,                        /* additional header fields since June 2013 */
        TYPE4                        /* additional header field for pressure data */
};

/* trackpad finger data offsets, le16-aligned */
#define HEADER_TYPE1                (13 * sizeof(__le16))
#define HEADER_TYPE2                (15 * sizeof(__le16))
#define HEADER_TYPE3                (19 * sizeof(__le16))
#define HEADER_TYPE4                (23 * sizeof(__le16))

/* trackpad button data offsets */
#define BUTTON_TYPE1                0
#define BUTTON_TYPE2                15
#define BUTTON_TYPE3                23
#define BUTTON_TYPE4                31

/* list of device capability bits */
#define HAS_INTEGRATED_BUTTON        1

/* trackpad finger data block size */
#define FSIZE_TYPE1                (14 * sizeof(__le16))
#define FSIZE_TYPE2                (14 * sizeof(__le16))
#define FSIZE_TYPE3                (14 * sizeof(__le16))
#define FSIZE_TYPE4                (15 * sizeof(__le16))

/* offset from header to finger struct */
#define DELTA_TYPE1                (0 * sizeof(__le16))
#define DELTA_TYPE2                (0 * sizeof(__le16))
#define DELTA_TYPE3                (0 * sizeof(__le16))
#define DELTA_TYPE4                (1 * sizeof(__le16))

/* usb control message mode switch data */
#define USBMSG_TYPE1                8, 0x300, 0, 0, 0x1, 0x8
#define USBMSG_TYPE2                8, 0x300, 0, 0, 0x1, 0x8
#define USBMSG_TYPE3                8, 0x300, 0, 0, 0x1, 0x8
#define USBMSG_TYPE4                2, 0x302, 2, 1, 0x1, 0x0

/* Wellspring initialization constants */
#define BCM5974_WELLSPRING_MODE_READ_REQUEST_ID                1
#define BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID        9

/* trackpad finger structure, le16-aligned */
struct tp_finger {
        __le16 origin;                /* zero when switching track finger */
        __le16 abs_x;                /* absolute x coodinate */
        __le16 abs_y;                /* absolute y coodinate */
        __le16 rel_x;                /* relative x coodinate */
        __le16 rel_y;                /* relative y coodinate */
        __le16 tool_major;        /* tool area, major axis */
        __le16 tool_minor;        /* tool area, minor axis */
        __le16 orientation;        /* 16384 when point, else 15 bit angle */
        __le16 touch_major;        /* touch area, major axis */
        __le16 touch_minor;        /* touch area, minor axis */
        __le16 unused[2];        /* zeros */
        __le16 pressure;        /* pressure on forcetouch touchpad */
        __le16 multi;                /* one finger: varies, more fingers: constant */
} __attribute__((packed,aligned(2)));

/* trackpad finger data size, empirically at least ten fingers */
#define MAX_FINGERS                16
#define MAX_FINGER_ORIENTATION        16384

/* device-specific parameters */
struct bcm5974_param {
        int snratio;                /* signal-to-noise ratio */
        int min;                /* device minimum reading */
        int max;                /* device maximum reading */
};

/* device-specific configuration */
struct bcm5974_config {
        int ansi, iso, jis;        /* the product id of this device */
        int caps;                /* device capability bitmask */
        int bt_ep;                /* the endpoint of the button interface */
        int bt_datalen;                /* data length of the button interface */
        int tp_ep;                /* the endpoint of the trackpad interface */
        enum tp_type tp_type;        /* type of trackpad interface */
        int tp_header;                /* bytes in header block */
        int tp_datalen;                /* data length of the trackpad interface */
        int tp_button;                /* offset to button data */
        int tp_fsize;                /* bytes in single finger block */
        int tp_delta;                /* offset from header to finger struct */
        int um_size;                /* usb control message length */
        int um_req_val;                /* usb control message value */
        int um_req_idx;                /* usb control message index */
        int um_switch_idx;        /* usb control message mode switch index */
        int um_switch_on;        /* usb control message mode switch on */
        int um_switch_off;        /* usb control message mode switch off */
        struct bcm5974_param p;        /* finger pressure limits */
        struct bcm5974_param w;        /* finger width limits */
        struct bcm5974_param x;        /* horizontal limits */
        struct bcm5974_param y;        /* vertical limits */
        struct bcm5974_param o;        /* orientation limits */
};

/* logical device structure */
struct bcm5974 {
        char phys[64];
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;        /* our interface */
        struct input_dev *input;        /* input dev */
        struct bcm5974_config cfg;        /* device configuration */
        struct mutex pm_mutex;                /* serialize access to open/suspend */
        int opened;                        /* 1: opened, 0: closed */
        struct urb *bt_urb;                /* button usb request block */
        struct bt_data *bt_data;        /* button transferred data */
        struct urb *tp_urb;                /* trackpad usb request block */
        u8 *tp_data;                        /* trackpad transferred data */
        const struct tp_finger *index[MAX_FINGERS];        /* finger index data */
        struct input_mt_pos pos[MAX_FINGERS];                /* position array */
        int slots[MAX_FINGERS];                                /* slot assignments */
};

/* trackpad finger block data, le16-aligned */
static const struct tp_finger *get_tp_finger(const struct bcm5974 *dev, int i)
{
        const struct bcm5974_config *c = &dev->cfg;
        u8 *f_base = dev->tp_data + c->tp_header + c->tp_delta;

        return (const struct tp_finger *)(f_base + i * c->tp_fsize);
}

#define DATAFORMAT(type)                                \
        type,                                                \
        HEADER_##type,                                        \
        HEADER_##type + (MAX_FINGERS) * (FSIZE_##type),        \
        BUTTON_##type,                                        \
        FSIZE_##type,                                        \
        DELTA_##type,                                        \
        USBMSG_##type

/* logical signal quality */
#define SN_PRESSURE        45                /* pressure signal-to-noise ratio */
#define SN_WIDTH        25                /* width signal-to-noise ratio */
#define SN_COORD        250                /* coordinate signal-to-noise ratio */
#define SN_ORIENT        10                /* orientation signal-to-noise ratio */

/* device constants */
static const struct bcm5974_config bcm5974_config_table[] = {
        {
                USB_DEVICE_ID_APPLE_WELLSPRING_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING_JIS,
                0,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE1),
                { SN_PRESSURE, 0, 256 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4824, 5342 },
                { SN_COORD, -172, 5820 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING2_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING2_JIS,
                0,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE1),
                { SN_PRESSURE, 0, 256 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4824, 4824 },
                { SN_COORD, -172, 4290 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING3_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING3_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4460, 5166 },
                { SN_COORD, -75, 6700 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING4_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING4_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4620, 5140 },
                { SN_COORD, -150, 6600 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4616, 5112 },
                { SN_COORD, -142, 5234 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING5_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING5_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4415, 5050 },
                { SN_COORD, -55, 6680 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING6_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING6_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4620, 5140 },
                { SN_COORD, -150, 6600 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4750, 5280 },
                { SN_COORD, -150, 6730 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4620, 5140 },
                { SN_COORD, -150, 6600 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING7_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING7_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4750, 5280 },
                { SN_COORD, -150, 6730 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS,
                HAS_INTEGRATED_BUTTON,
                0x84, sizeof(struct bt_data),
                0x81, DATAFORMAT(TYPE2),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4750, 5280 },
                { SN_COORD, -150, 6730 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING8_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING8_JIS,
                HAS_INTEGRATED_BUTTON,
                0, sizeof(struct bt_data),
                0x83, DATAFORMAT(TYPE3),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4620, 5140 },
                { SN_COORD, -150, 6600 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {
                USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI,
                USB_DEVICE_ID_APPLE_WELLSPRING9_ISO,
                USB_DEVICE_ID_APPLE_WELLSPRING9_JIS,
                HAS_INTEGRATED_BUTTON,
                0, sizeof(struct bt_data),
                0x83, DATAFORMAT(TYPE4),
                { SN_PRESSURE, 0, 300 },
                { SN_WIDTH, 0, 2048 },
                { SN_COORD, -4828, 5345 },
                { SN_COORD, -203, 6803 },
                { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION }
        },
        {}
};

/* return the device-specific configuration by device */
static const struct bcm5974_config *bcm5974_get_config(struct usb_device *udev)
{
        u16 id = le16_to_cpu(udev->descriptor.idProduct);
        const struct bcm5974_config *cfg;

        for (cfg = bcm5974_config_table; cfg->ansi; ++cfg)
                if (cfg->ansi == id || cfg->iso == id || cfg->jis == id)
                        return cfg;

        return bcm5974_config_table;
}

/* convert 16-bit little endian to signed integer */
static inline int raw2int(__le16 x)
{
        return (signed short)le16_to_cpu(x);
}

static void set_abs(struct input_dev *input, unsigned int code,
                    const struct bcm5974_param *p)
{
        int fuzz = p->snratio ? (p->max - p->min) / p->snratio : 0;
        input_set_abs_params(input, code, p->min, p->max, fuzz, 0);
}

/* setup which logical events to report */
static void setup_events_to_report(struct input_dev *input_dev,
                                   const struct bcm5974_config *cfg)
{
        __set_bit(EV_ABS, input_dev->evbit);

        /* for synaptics only */
        input_set_abs_params(input_dev, ABS_PRESSURE, 0, 256, 5, 0);
        input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 16, 0, 0);

        /* finger touch area */
        set_abs(input_dev, ABS_MT_TOUCH_MAJOR, &cfg->w);
        set_abs(input_dev, ABS_MT_TOUCH_MINOR, &cfg->w);
        /* finger approach area */
        set_abs(input_dev, ABS_MT_WIDTH_MAJOR, &cfg->w);
        set_abs(input_dev, ABS_MT_WIDTH_MINOR, &cfg->w);
        /* finger orientation */
        set_abs(input_dev, ABS_MT_ORIENTATION, &cfg->o);
        /* finger position */
        set_abs(input_dev, ABS_MT_POSITION_X, &cfg->x);
        set_abs(input_dev, ABS_MT_POSITION_Y, &cfg->y);

        __set_bit(EV_KEY, input_dev->evbit);
        __set_bit(BTN_LEFT, input_dev->keybit);

        if (cfg->caps & HAS_INTEGRATED_BUTTON)
                __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);

        input_mt_init_slots(input_dev, MAX_FINGERS,
                INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | INPUT_MT_TRACK);
}

/* report button data as logical button state */
static int report_bt_state(struct bcm5974 *dev, int size)
{
        if (size != sizeof(struct bt_data))
                return -EIO;

        dprintk(7,
                "bcm5974: button data: %x %x %x %x\n",
                dev->bt_data->unknown1, dev->bt_data->button,
                dev->bt_data->rel_x, dev->bt_data->rel_y);

        input_report_key(dev->input, BTN_LEFT, dev->bt_data->button);
        input_sync(dev->input);

        return 0;
}

static void report_finger_data(struct input_dev *input, int slot,
                               const struct input_mt_pos *pos,
                               const struct tp_finger *f)
{
        input_mt_slot(input, slot);
        input_mt_report_slot_state(input, MT_TOOL_FINGER, true);

        input_report_abs(input, ABS_MT_TOUCH_MAJOR,
                         raw2int(f->touch_major) << 1);
        input_report_abs(input, ABS_MT_TOUCH_MINOR,
                         raw2int(f->touch_minor) << 1);
        input_report_abs(input, ABS_MT_WIDTH_MAJOR,
                         raw2int(f->tool_major) << 1);
        input_report_abs(input, ABS_MT_WIDTH_MINOR,
                         raw2int(f->tool_minor) << 1);
        input_report_abs(input, ABS_MT_ORIENTATION,
                         MAX_FINGER_ORIENTATION - raw2int(f->orientation));
        input_report_abs(input, ABS_MT_POSITION_X, pos->x);
        input_report_abs(input, ABS_MT_POSITION_Y, pos->y);
}

static void report_synaptics_data(struct input_dev *input,
                                  const struct bcm5974_config *cfg,
                                  const struct tp_finger *f, int raw_n)
{
        int abs_p = 0, abs_w = 0;

        if (raw_n) {
                int p = raw2int(f->touch_major);
                int w = raw2int(f->tool_major);
                if (p > 0 && raw2int(f->origin)) {
                        abs_p = clamp_val(256 * p / cfg->p.max, 0, 255);
                        abs_w = clamp_val(16 * w / cfg->w.max, 0, 15);
                }
        }

        input_report_abs(input, ABS_PRESSURE, abs_p);
        input_report_abs(input, ABS_TOOL_WIDTH, abs_w);
}

/* report trackpad data as logical trackpad state */
static int report_tp_state(struct bcm5974 *dev, int size)
{
        const struct bcm5974_config *c = &dev->cfg;
        const struct tp_finger *f;
        struct input_dev *input = dev->input;
        int raw_n, i, n = 0;

        if (size < c->tp_header || (size - c->tp_header) % c->tp_fsize != 0)
                return -EIO;

        raw_n = (size - c->tp_header) / c->tp_fsize;

        for (i = 0; i < raw_n; i++) {
                f = get_tp_finger(dev, i);
                if (raw2int(f->touch_major) == 0)
                        continue;
                dev->pos[n].x = raw2int(f->abs_x);
                dev->pos[n].y = c->y.min + c->y.max - raw2int(f->abs_y);
                dev->index[n++] = f;
        }

        input_mt_assign_slots(input, dev->slots, dev->pos, n, 0);

        for (i = 0; i < n; i++)
                report_finger_data(input, dev->slots[i],
                                   &dev->pos[i], dev->index[i]);

        input_mt_sync_frame(input);

        report_synaptics_data(input, c, get_tp_finger(dev, 0), raw_n);

        /* later types report button events via integrated button only */
        if (c->caps & HAS_INTEGRATED_BUTTON) {
                int ibt = raw2int(dev->tp_data[c->tp_button]);
                input_report_key(input, BTN_LEFT, ibt);
        }

        input_sync(input);

        return 0;
}

static int bcm5974_wellspring_mode(struct bcm5974 *dev, bool on)
{
        const struct bcm5974_config *c = &dev->cfg;
        int retval = 0, size;
        char *data;

        /* Type 3 does not require a mode switch */
        if (c->tp_type == TYPE3)
                return 0;

        data = kmalloc(c->um_size, GFP_KERNEL);
        if (!data) {
                dev_err(&dev->intf->dev, "out of memory\n");
                retval = -ENOMEM;
                goto out;
        }

        /* read configuration */
        size = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
                        BCM5974_WELLSPRING_MODE_READ_REQUEST_ID,
                        USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        c->um_req_val, c->um_req_idx, data, c->um_size, 5000);

        if (size != c->um_size) {
                dev_err(&dev->intf->dev, "could not read from device\n");
                retval = -EIO;
                goto out;
        }

        /* apply the mode switch */
        data[c->um_switch_idx] = on ? c->um_switch_on : c->um_switch_off;

        /* write configuration */
        size = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
                        BCM5974_WELLSPRING_MODE_WRITE_REQUEST_ID,
                        USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        c->um_req_val, c->um_req_idx, data, c->um_size, 5000);

        if (size != c->um_size) {
                dev_err(&dev->intf->dev, "could not write to device\n");
                retval = -EIO;
                goto out;
        }

        dprintk(2, "bcm5974: switched to %s mode.\n",
                on ? "wellspring" : "normal");

 out:
        kfree(data);
        return retval;
}

static void bcm5974_irq_button(struct urb *urb)
{
        struct bcm5974 *dev = urb->context;
        struct usb_interface *intf = dev->intf;
        int error;

        switch (urb->status) {
        case 0:
                break;
        case -EOVERFLOW:
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                dev_dbg(&intf->dev, "button urb shutting down: %d\n",
                        urb->status);
                return;
        default:
                dev_dbg(&intf->dev, "button urb status: %d\n", urb->status);
                goto exit;
        }

        if (report_bt_state(dev, dev->bt_urb->actual_length))
                dprintk(1, "bcm5974: bad button package, length: %d\n",
                        dev->bt_urb->actual_length);

exit:
        error = usb_submit_urb(dev->bt_urb, GFP_ATOMIC);
        if (error)
                dev_err(&intf->dev, "button urb failed: %d\n", error);
}

static void bcm5974_irq_trackpad(struct urb *urb)
{
        struct bcm5974 *dev = urb->context;
        struct usb_interface *intf = dev->intf;
        int error;

        switch (urb->status) {
        case 0:
                break;
        case -EOVERFLOW:
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                dev_dbg(&intf->dev, "trackpad urb shutting down: %d\n",
                        urb->status);
                return;
        default:
                dev_dbg(&intf->dev, "trackpad urb status: %d\n", urb->status);
                goto exit;
        }

        /* control response ignored */
        if (dev->tp_urb->actual_length == 2)
                goto exit;

        if (report_tp_state(dev, dev->tp_urb->actual_length))
                dprintk(1, "bcm5974: bad trackpad package, length: %d\n",
                        dev->tp_urb->actual_length);

exit:
        error = usb_submit_urb(dev->tp_urb, GFP_ATOMIC);
        if (error)
                dev_err(&intf->dev, "trackpad urb failed: %d\n", error);
}

/*
 * The Wellspring trackpad, like many recent Apple trackpads, share
 * the usb device with the keyboard. Since keyboards are usually
 * handled by the HID system, the device ends up being handled by two
 * modules. Setting up the device therefore becomes slightly
 * complicated. To enable multitouch features, a mode switch is
 * required, which is usually applied via the control interface of the
 * device.  It can be argued where this switch should take place. In
 * some drivers, like appletouch, the switch is made during
 * probe. However, the hid module may also alter the state of the
 * device, resulting in trackpad malfunction under certain
 * circumstances. To get around this problem, there is at least one
 * example that utilizes the USB_QUIRK_RESET_RESUME quirk in order to
 * receive a reset_resume request rather than the normal resume.
 * Since the implementation of reset_resume is equal to mode switch
 * plus start_traffic, it seems easier to always do the switch when
 * starting traffic on the device.
 */
static int bcm5974_start_traffic(struct bcm5974 *dev)
{
        int error;

        error = bcm5974_wellspring_mode(dev, true);
        if (error) {
                dprintk(1, "bcm5974: mode switch failed\n");
                goto err_out;
        }

        if (dev->bt_urb) {
                error = usb_submit_urb(dev->bt_urb, GFP_KERNEL);
                if (error)
                        goto err_reset_mode;
        }

        error = usb_submit_urb(dev->tp_urb, GFP_KERNEL);
        if (error)
                goto err_kill_bt;

        return 0;

err_kill_bt:
        usb_kill_urb(dev->bt_urb);
err_reset_mode:
        bcm5974_wellspring_mode(dev, false);
err_out:
        return error;
}

static void bcm5974_pause_traffic(struct bcm5974 *dev)
{
        usb_kill_urb(dev->tp_urb);
        usb_kill_urb(dev->bt_urb);
        bcm5974_wellspring_mode(dev, false);
}

/*
 * The code below implements open/close and manual suspend/resume.
 * All functions may be called in random order.
 *
 * Opening a suspended device fails with EACCES - permission denied.
 *
 * Failing a resume leaves the device resumed but closed.
 */
static int bcm5974_open(struct input_dev *input)
{
        struct bcm5974 *dev = input_get_drvdata(input);
        int error;

        error = usb_autopm_get_interface(dev->intf);
        if (error)
                return error;

        mutex_lock(&dev->pm_mutex);

        error = bcm5974_start_traffic(dev);
        if (!error)
                dev->opened = 1;

        mutex_unlock(&dev->pm_mutex);

        if (error)
                usb_autopm_put_interface(dev->intf);

        return error;
}

static void bcm5974_close(struct input_dev *input)
{
        struct bcm5974 *dev = input_get_drvdata(input);

        mutex_lock(&dev->pm_mutex);

        bcm5974_pause_traffic(dev);
        dev->opened = 0;

        mutex_unlock(&dev->pm_mutex);

        usb_autopm_put_interface(dev->intf);
}

static int bcm5974_suspend(struct usb_interface *iface, pm_message_t message)
{
        struct bcm5974 *dev = usb_get_intfdata(iface);

        mutex_lock(&dev->pm_mutex);

        if (dev->opened)
                bcm5974_pause_traffic(dev);

        mutex_unlock(&dev->pm_mutex);

        return 0;
}

static int bcm5974_resume(struct usb_interface *iface)
{
        struct bcm5974 *dev = usb_get_intfdata(iface);
        int error = 0;

        mutex_lock(&dev->pm_mutex);

        if (dev->opened)
                error = bcm5974_start_traffic(dev);

        mutex_unlock(&dev->pm_mutex);

        return error;
}

static int bcm5974_probe(struct usb_interface *iface,
                         const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(iface);
        const struct bcm5974_config *cfg;
        struct bcm5974 *dev;
        struct input_dev *input_dev;
        int error = -ENOMEM;

        /* find the product index */
        cfg = bcm5974_get_config(udev);

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!dev || !input_dev) {
                dev_err(&iface->dev, "out of memory\n");
                goto err_free_devs;
        }

        dev->udev = udev;
        dev->intf = iface;
        dev->input = input_dev;
        dev->cfg = *cfg;
        mutex_init(&dev->pm_mutex);

        /* setup urbs */
        if (cfg->tp_type == TYPE1) {
                dev->bt_urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!dev->bt_urb)
                        goto err_free_devs;
        }

        dev->tp_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->tp_urb)
                goto err_free_bt_urb;

        if (dev->bt_urb) {
                dev->bt_data = usb_alloc_coherent(dev->udev,
                                          dev->cfg.bt_datalen, GFP_KERNEL,
                                          &dev->bt_urb->transfer_dma);
                if (!dev->bt_data)
                        goto err_free_urb;
        }

        dev->tp_data = usb_alloc_coherent(dev->udev,
                                          dev->cfg.tp_datalen, GFP_KERNEL,
                                          &dev->tp_urb->transfer_dma);
        if (!dev->tp_data)
                goto err_free_bt_buffer;

        if (dev->bt_urb) {
                usb_fill_int_urb(dev->bt_urb, udev,
                                 usb_rcvintpipe(udev, cfg->bt_ep),
                                 dev->bt_data, dev->cfg.bt_datalen,
                                 bcm5974_irq_button, dev, 1);

                dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        }

        usb_fill_int_urb(dev->tp_urb, udev,
                         usb_rcvintpipe(udev, cfg->tp_ep),
                         dev->tp_data, dev->cfg.tp_datalen,
                         bcm5974_irq_trackpad, dev, 1);

        dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        /* create bcm5974 device */
        usb_make_path(udev, dev->phys, sizeof(dev->phys));
        strlcat(dev->phys, "/input0", sizeof(dev->phys));

        input_dev->name = "bcm5974";
        input_dev->phys = dev->phys;
        usb_to_input_id(dev->udev, &input_dev->id);
        /* report driver capabilities via the version field */
        input_dev->id.version = cfg->caps;
        input_dev->dev.parent = &iface->dev;

        input_set_drvdata(input_dev, dev);

        input_dev->open = bcm5974_open;
        input_dev->close = bcm5974_close;

        setup_events_to_report(input_dev, cfg);

        error = input_register_device(dev->input);
        if (error)
                goto err_free_buffer;

        /* save our data pointer in this interface device */
        usb_set_intfdata(iface, dev);

        return 0;

err_free_buffer:
        usb_free_coherent(dev->udev, dev->cfg.tp_datalen,
                dev->tp_data, dev->tp_urb->transfer_dma);
err_free_bt_buffer:
        if (dev->bt_urb)
                usb_free_coherent(dev->udev, dev->cfg.bt_datalen,
                                  dev->bt_data, dev->bt_urb->transfer_dma);
err_free_urb:
        usb_free_urb(dev->tp_urb);
err_free_bt_urb:
        usb_free_urb(dev->bt_urb);
err_free_devs:
        usb_set_intfdata(iface, NULL);
        input_free_device(input_dev);
        kfree(dev);
        return error;
}

static void bcm5974_disconnect(struct usb_interface *iface)
{
        struct bcm5974 *dev = usb_get_intfdata(iface);

        usb_set_intfdata(iface, NULL);

        input_unregister_device(dev->input);
        usb_free_coherent(dev->udev, dev->cfg.tp_datalen,
                          dev->tp_data, dev->tp_urb->transfer_dma);
        if (dev->bt_urb)
                usb_free_coherent(dev->udev, dev->cfg.bt_datalen,
                                  dev->bt_data, dev->bt_urb->transfer_dma);
        usb_free_urb(dev->tp_urb);
        usb_free_urb(dev->bt_urb);
        kfree(dev);
}

static struct usb_driver bcm5974_driver = {
        .name                        = "bcm5974",
        .probe                        = bcm5974_probe,
        .disconnect                = bcm5974_disconnect,
        .suspend                = bcm5974_suspend,
        .resume                        = bcm5974_resume,
        .id_table                = bcm5974_table,
        .supports_autosuspend        = 1,
};

module_usb_driver(bcm5974_driver);









































  233 









   18 









   80 










  477 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_ERR_H
#define _LINUX_ERR_H

#include <linux/compiler.h>
#include <linux/types.h>

#include <asm/errno.h>

/*
 * Kernel pointers have redundant information, so we can use a
 * scheme where we can return either an error code or a normal
 * pointer with the same return value.
 *
 * This should be a per-architecture thing, to allow different
 * error and pointer decisions.
 */
#define MAX_ERRNO        4095

#ifndef __ASSEMBLY__

/**
 * IS_ERR_VALUE - Detect an error pointer.
 * @x: The pointer to check.
 *
 * Like IS_ERR(), but does not generate a compiler warning if result is unused.
 */
#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)

/**
 * ERR_PTR - Create an error pointer.
 * @error: A negative error code.
 *
 * Encodes @error into a pointer value. Users should consider the result
 * opaque and not assume anything about how the error is encoded.
 *
 * Return: A pointer with @error encoded within its value.
 */
static inline void * __must_check ERR_PTR(long error)
{
        return (void *) error;
}

/**
 * PTR_ERR - Extract the error code from an error pointer.
 * @ptr: An error pointer.
 * Return: The error code within @ptr.
 */
static inline long __must_check PTR_ERR(__force const void *ptr)
{
        return (long) ptr;
}

/**
 * IS_ERR - Detect an error pointer.
 * @ptr: The pointer to check.
 * Return: true if @ptr is an error pointer, false otherwise.
 */
static inline bool __must_check IS_ERR(__force const void *ptr)
{
        return IS_ERR_VALUE((unsigned long)ptr);
}

/**
 * IS_ERR_OR_NULL - Detect an error pointer or a null pointer.
 * @ptr: The pointer to check.
 *
 * Like IS_ERR(), but also returns true for a null pointer.
 */
static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
{
        return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
}

/**
 * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type
 * @ptr: The pointer to cast.
 *
 * Explicitly cast an error-valued pointer to another pointer type in such a
 * way as to make it clear that's what's going on.
 */
static inline void * __must_check ERR_CAST(__force const void *ptr)
{
        /* cast away the const */
        return (void *) ptr;
}

/**
 * PTR_ERR_OR_ZERO - Extract the error code from a pointer if it has one.
 * @ptr: A potential error pointer.
 *
 * Convenience function that can be used inside a function that returns
 * an error code to propagate errors received as error pointers.
 * For example, ``return PTR_ERR_OR_ZERO(ptr);`` replaces:
 *
 * .. code-block:: c
 *
 *        if (IS_ERR(ptr))
 *                return PTR_ERR(ptr);
 *        else
 *                return 0;
 *
 * Return: The error code within @ptr if it is an error pointer; 0 otherwise.
 */
static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
{
        if (IS_ERR(ptr))
                return PTR_ERR(ptr);
        else
                return 0;
}

#endif

#endif /* _LINUX_ERR_H */
































































































































































































































































































































































































































































































































































































   11 



   11 








   11 


   11 


   11 
   11 

   11 


   11 


   10 

   11 








   10 

   10 








   11 



   10 





   10 


   11 
   11 








































   11 






























































   11 


   11 
   11 

   11 
   11 
   11 

   11 


   11 



   11 













   11 


   11 

   11 


   11 


   11 


   11 






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2008 IBM Corporation
 * Author: Mimi Zohar <zohar@us.ibm.com>
 *
 * ima_policy.c
 *        - initialize default measure policy rules
 */

#include <linux/init.h>
#include <linux/list.h>
#include <linux/kernel_read_file.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/magic.h>
#include <linux/parser.h>
#include <linux/slab.h>
#include <linux/rculist.h>
#include <linux/seq_file.h>
#include <linux/ima.h>

#include "ima.h"

/* flags definitions */
#define IMA_FUNC        0x0001
#define IMA_MASK        0x0002
#define IMA_FSMAGIC        0x0004
#define IMA_UID                0x0008
#define IMA_FOWNER        0x0010
#define IMA_FSUUID        0x0020
#define IMA_INMASK        0x0040
#define IMA_EUID        0x0080
#define IMA_PCR                0x0100
#define IMA_FSNAME        0x0200
#define IMA_KEYRINGS        0x0400
#define IMA_LABEL        0x0800
#define IMA_VALIDATE_ALGOS        0x1000
#define IMA_GID                0x2000
#define IMA_EGID        0x4000
#define IMA_FGROUP        0x8000

#define UNKNOWN                0
#define MEASURE                0x0001        /* same as IMA_MEASURE */
#define DONT_MEASURE        0x0002
#define APPRAISE        0x0004        /* same as IMA_APPRAISE */
#define DONT_APPRAISE        0x0008
#define AUDIT                0x0040
#define HASH                0x0100
#define DONT_HASH        0x0200

#define INVALID_PCR(a) (((a) < 0) || \
        (a) >= (sizeof_field(struct ima_iint_cache, measured_pcrs) * 8))

int ima_policy_flag;
static int temp_ima_appraise;
static int build_ima_appraise __ro_after_init;

atomic_t ima_setxattr_allowed_hash_algorithms;

#define MAX_LSM_RULES 6
enum lsm_rule_types { LSM_OBJ_USER, LSM_OBJ_ROLE, LSM_OBJ_TYPE,
        LSM_SUBJ_USER, LSM_SUBJ_ROLE, LSM_SUBJ_TYPE
};

enum policy_types { ORIGINAL_TCB = 1, DEFAULT_TCB };

enum policy_rule_list { IMA_DEFAULT_POLICY = 1, IMA_CUSTOM_POLICY };

struct ima_rule_opt_list {
        size_t count;
        char *items[] __counted_by(count);
};

/*
 * These comparators are needed nowhere outside of ima so just define them here.
 * This pattern should hopefully never be needed outside of ima.
 */
static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid)
{
        return __vfsuid_val(vfsuid) > __kuid_val(kuid);
}

static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid)
{
        return __vfsgid_val(vfsgid) > __kgid_val(kgid);
}

static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid)
{
        return __vfsuid_val(vfsuid) < __kuid_val(kuid);
}

static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid)
{
        return __vfsgid_val(vfsgid) < __kgid_val(kgid);
}

struct ima_rule_entry {
        struct list_head list;
        int action;
        unsigned int flags;
        enum ima_hooks func;
        int mask;
        unsigned long fsmagic;
        uuid_t fsuuid;
        kuid_t uid;
        kgid_t gid;
        kuid_t fowner;
        kgid_t fgroup;
        bool (*uid_op)(kuid_t cred_uid, kuid_t rule_uid);    /* Handlers for operators       */
        bool (*gid_op)(kgid_t cred_gid, kgid_t rule_gid);
        bool (*fowner_op)(vfsuid_t vfsuid, kuid_t rule_uid); /* vfsuid_eq_kuid(), vfsuid_gt_kuid(), vfsuid_lt_kuid() */
        bool (*fgroup_op)(vfsgid_t vfsgid, kgid_t rule_gid); /* vfsgid_eq_kgid(), vfsgid_gt_kgid(), vfsgid_lt_kgid() */
        int pcr;
        unsigned int allowed_algos; /* bitfield of allowed hash algorithms */
        struct {
                void *rule;        /* LSM file metadata specific */
                char *args_p;        /* audit value */
                int type;        /* audit type */
        } lsm[MAX_LSM_RULES];
        char *fsname;
        struct ima_rule_opt_list *keyrings; /* Measure keys added to these keyrings */
        struct ima_rule_opt_list *label; /* Measure data grouped under this label */
        struct ima_template_desc *template;
};

/*
 * sanity check in case the kernels gains more hash algorithms that can
 * fit in an unsigned int
 */
static_assert(
        8 * sizeof(unsigned int) >= HASH_ALGO__LAST,
        "The bitfield allowed_algos in ima_rule_entry is too small to contain all the supported hash algorithms, consider using a bigger type");

/*
 * Without LSM specific knowledge, the default policy can only be
 * written in terms of .action, .func, .mask, .fsmagic, .uid, .gid,
 * .fowner, and .fgroup
 */

/*
 * The minimum rule set to allow for full TCB coverage.  Measures all files
 * opened or mmap for exec and everything read by root.  Dangerous because
 * normal users can easily run the machine out of memory simply building
 * and running executables.
 */
static struct ima_rule_entry dont_measure_rules[] __ro_after_init = {
        {.action = DONT_MEASURE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = TMPFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = DEVPTS_SUPER_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = SMACK_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC,
         .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = CGROUP2_SUPER_MAGIC,
         .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE, .fsmagic = EFIVARFS_MAGIC, .flags = IMA_FSMAGIC}
};

static struct ima_rule_entry original_measurement_rules[] __ro_after_init = {
        {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC,
         .flags = IMA_FUNC | IMA_MASK},
        {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC,
         .flags = IMA_FUNC | IMA_MASK},
        {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ,
         .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq,
         .flags = IMA_FUNC | IMA_MASK | IMA_UID},
        {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC},
        {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC},
};

static struct ima_rule_entry default_measurement_rules[] __ro_after_init = {
        {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC,
         .flags = IMA_FUNC | IMA_MASK},
        {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC,
         .flags = IMA_FUNC | IMA_MASK},
        {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ,
         .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq,
         .flags = IMA_FUNC | IMA_INMASK | IMA_EUID},
        {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ,
         .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq,
         .flags = IMA_FUNC | IMA_INMASK | IMA_UID},
        {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC},
        {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC},
        {.action = MEASURE, .func = POLICY_CHECK, .flags = IMA_FUNC},
};

static struct ima_rule_entry default_appraise_rules[] __ro_after_init = {
        {.action = DONT_APPRAISE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = TMPFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = RAMFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = DEVPTS_SUPER_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = SMACK_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = EFIVARFS_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC},
        {.action = DONT_APPRAISE, .fsmagic = CGROUP2_SUPER_MAGIC, .flags = IMA_FSMAGIC},
#ifdef CONFIG_IMA_WRITE_POLICY
        {.action = APPRAISE, .func = POLICY_CHECK,
        .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
#endif
#ifndef CONFIG_IMA_APPRAISE_SIGNED_INIT
        {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .fowner_op = &vfsuid_eq_kuid,
         .flags = IMA_FOWNER},
#else
        /* force signature */
        {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .fowner_op = &vfsuid_eq_kuid,
         .flags = IMA_FOWNER | IMA_DIGSIG_REQUIRED},
#endif
};

static struct ima_rule_entry build_appraise_rules[] __ro_after_init = {
#ifdef CONFIG_IMA_APPRAISE_REQUIRE_MODULE_SIGS
        {.action = APPRAISE, .func = MODULE_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
#endif
#ifdef CONFIG_IMA_APPRAISE_REQUIRE_FIRMWARE_SIGS
        {.action = APPRAISE, .func = FIRMWARE_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
#endif
#ifdef CONFIG_IMA_APPRAISE_REQUIRE_KEXEC_SIGS
        {.action = APPRAISE, .func = KEXEC_KERNEL_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
#endif
#ifdef CONFIG_IMA_APPRAISE_REQUIRE_POLICY_SIGS
        {.action = APPRAISE, .func = POLICY_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
#endif
};

static struct ima_rule_entry secure_boot_rules[] __ro_after_init = {
        {.action = APPRAISE, .func = MODULE_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
        {.action = APPRAISE, .func = FIRMWARE_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
        {.action = APPRAISE, .func = KEXEC_KERNEL_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
        {.action = APPRAISE, .func = POLICY_CHECK,
         .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED},
};

static struct ima_rule_entry critical_data_rules[] __ro_after_init = {
        {.action = MEASURE, .func = CRITICAL_DATA, .flags = IMA_FUNC},
};

/* An array of architecture specific rules */
static struct ima_rule_entry *arch_policy_entry __ro_after_init;

static LIST_HEAD(ima_default_rules);
static LIST_HEAD(ima_policy_rules);
static LIST_HEAD(ima_temp_rules);
static struct list_head __rcu *ima_rules = (struct list_head __rcu *)(&ima_default_rules);

static int ima_policy __initdata;

static int __init default_measure_policy_setup(char *str)
{
        if (ima_policy)
                return 1;

        ima_policy = ORIGINAL_TCB;
        return 1;
}
__setup("ima_tcb", default_measure_policy_setup);

static bool ima_use_appraise_tcb __initdata;
static bool ima_use_secure_boot __initdata;
static bool ima_use_critical_data __initdata;
static bool ima_fail_unverifiable_sigs __ro_after_init;
static int __init policy_setup(char *str)
{
        char *p;

        while ((p = strsep(&str, " |\n")) != NULL) {
                if (*p == ' ')
                        continue;
                if ((strcmp(p, "tcb") == 0) && !ima_policy)
                        ima_policy = DEFAULT_TCB;
                else if (strcmp(p, "appraise_tcb") == 0)
                        ima_use_appraise_tcb = true;
                else if (strcmp(p, "secure_boot") == 0)
                        ima_use_secure_boot = true;
                else if (strcmp(p, "critical_data") == 0)
                        ima_use_critical_data = true;
                else if (strcmp(p, "fail_securely") == 0)
                        ima_fail_unverifiable_sigs = true;
                else
                        pr_err("policy \"%s\" not found", p);
        }

        return 1;
}
__setup("ima_policy=", policy_setup);

static int __init default_appraise_policy_setup(char *str)
{
        ima_use_appraise_tcb = true;
        return 1;
}
__setup("ima_appraise_tcb", default_appraise_policy_setup);

static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
{
        struct ima_rule_opt_list *opt_list;
        size_t count = 0;
        char *src_copy;
        char *cur, *next;
        size_t i;

        src_copy = match_strdup(src);
        if (!src_copy)
                return ERR_PTR(-ENOMEM);

        next = src_copy;
        while ((cur = strsep(&next, "|"))) {
                /* Don't accept an empty list item */
                if (!(*cur)) {
                        kfree(src_copy);
                        return ERR_PTR(-EINVAL);
                }
                count++;
        }

        /* Don't accept an empty list */
        if (!count) {
                kfree(src_copy);
                return ERR_PTR(-EINVAL);
        }

        opt_list = kzalloc(struct_size(opt_list, items, count), GFP_KERNEL);
        if (!opt_list) {
                kfree(src_copy);
                return ERR_PTR(-ENOMEM);
        }
        opt_list->count = count;

        /*
         * strsep() has already replaced all instances of '|' with '\0',
         * leaving a byte sequence of NUL-terminated strings. Reference each
         * string with the array of items.
         *
         * IMPORTANT: Ownership of the allocated buffer is transferred from
         * src_copy to the first element in the items array. To free the
         * buffer, kfree() must only be called on the first element of the
         * array.
         */
        for (i = 0, cur = src_copy; i < count; i++) {
                opt_list->items[i] = cur;
                cur = strchr(cur, '\0') + 1;
        }

        return opt_list;
}

static void ima_free_rule_opt_list(struct ima_rule_opt_list *opt_list)
{
        if (!opt_list)
                return;

        if (opt_list->count) {
                kfree(opt_list->items[0]);
                opt_list->count = 0;
        }

        kfree(opt_list);
}

static void ima_lsm_free_rule(struct ima_rule_entry *entry)
{
        int i;

        for (i = 0; i < MAX_LSM_RULES; i++) {
                ima_filter_rule_free(entry->lsm[i].rule);
                kfree(entry->lsm[i].args_p);
        }
}

static void ima_free_rule(struct ima_rule_entry *entry)
{
        if (!entry)
                return;

        /*
         * entry->template->fields may be allocated in ima_parse_rule() but that
         * reference is owned by the corresponding ima_template_desc element in
         * the defined_templates list and cannot be freed here
         */
        kfree(entry->fsname);
        ima_free_rule_opt_list(entry->keyrings);
        ima_lsm_free_rule(entry);
        kfree(entry);
}

static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
{
        struct ima_rule_entry *nentry;
        int i;

        /*
         * Immutable elements are copied over as pointers and data; only
         * lsm rules can change
         */
        nentry = kmemdup(entry, sizeof(*nentry), GFP_KERNEL);
        if (!nentry)
                return NULL;

        memset(nentry->lsm, 0, sizeof_field(struct ima_rule_entry, lsm));

        for (i = 0; i < MAX_LSM_RULES; i++) {
                if (!entry->lsm[i].args_p)
                        continue;

                nentry->lsm[i].type = entry->lsm[i].type;
                nentry->lsm[i].args_p = entry->lsm[i].args_p;

                ima_filter_rule_init(nentry->lsm[i].type, Audit_equal,
                                     nentry->lsm[i].args_p,
                                     &nentry->lsm[i].rule);
                if (!nentry->lsm[i].rule)
                        pr_warn("rule for LSM \'%s\' is undefined\n",
                                nentry->lsm[i].args_p);
        }
        return nentry;
}

static int ima_lsm_update_rule(struct ima_rule_entry *entry)
{
        int i;
        struct ima_rule_entry *nentry;

        nentry = ima_lsm_copy_rule(entry);
        if (!nentry)
                return -ENOMEM;

        list_replace_rcu(&entry->list, &nentry->list);
        synchronize_rcu();
        /*
         * ima_lsm_copy_rule() shallow copied all references, except for the
         * LSM references, from entry to nentry so we only want to free the LSM
         * references and the entry itself. All other memory references will now
         * be owned by nentry.
         */
        for (i = 0; i < MAX_LSM_RULES; i++)
                ima_filter_rule_free(entry->lsm[i].rule);
        kfree(entry);

        return 0;
}

static bool ima_rule_contains_lsm_cond(struct ima_rule_entry *entry)
{
        int i;

        for (i = 0; i < MAX_LSM_RULES; i++)
                if (entry->lsm[i].args_p)
                        return true;

        return false;
}

/*
 * The LSM policy can be reloaded, leaving the IMA LSM based rules referring
 * to the old, stale LSM policy.  Update the IMA LSM based rules to reflect
 * the reloaded LSM policy.
 */
static void ima_lsm_update_rules(void)
{
        struct ima_rule_entry *entry, *e;
        int result;

        list_for_each_entry_safe(entry, e, &ima_policy_rules, list) {
                if (!ima_rule_contains_lsm_cond(entry))
                        continue;

                result = ima_lsm_update_rule(entry);
                if (result) {
                        pr_err("lsm rule update error %d\n", result);
                        return;
                }
        }
}

int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event,
                          void *lsm_data)
{
        if (event != LSM_POLICY_CHANGE)
                return NOTIFY_DONE;

        ima_lsm_update_rules();
        return NOTIFY_OK;
}

/**
 * ima_match_rule_data - determine whether func_data matches the policy rule
 * @rule: a pointer to a rule
 * @func_data: data to match against the measure rule data
 * @cred: a pointer to a credentials structure for user validation
 *
 * Returns true if func_data matches one in the rule, false otherwise.
 */
static bool ima_match_rule_data(struct ima_rule_entry *rule,
                                const char *func_data,
                                const struct cred *cred)
{
        const struct ima_rule_opt_list *opt_list = NULL;
        bool matched = false;
        size_t i;

        if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
                return false;

        switch (rule->func) {
        case KEY_CHECK:
                if (!rule->keyrings)
                        return true;

                opt_list = rule->keyrings;
                break;
        case CRITICAL_DATA:
                if (!rule->label)
                        return true;

                opt_list = rule->label;
                break;
        default:
                return false;
        }

        if (!func_data)
                return false;

        for (i = 0; i < opt_list->count; i++) {
                if (!strcmp(opt_list->items[i], func_data)) {
                        matched = true;
                        break;
                }
        }

        return matched;
}

/**
 * ima_match_rules - determine whether an inode matches the policy rule.
 * @rule: a pointer to a rule
 * @idmap: idmap of the mount the inode was found from
 * @inode: a pointer to an inode
 * @cred: a pointer to a credentials structure for user validation
 * @secid: the secid of the task to be validated
 * @func: LIM hook identifier
 * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
 * @func_data: func specific data, may be NULL
 *
 * Returns true on rule match, false on failure.
 */
static bool ima_match_rules(struct ima_rule_entry *rule,
                            struct mnt_idmap *idmap,
                            struct inode *inode, const struct cred *cred,
                            u32 secid, enum ima_hooks func, int mask,
                            const char *func_data)
{
        int i;
        bool result = false;
        struct ima_rule_entry *lsm_rule = rule;
        bool rule_reinitialized = false;

        if ((rule->flags & IMA_FUNC) &&
            (rule->func != func && func != POST_SETATTR))
                return false;

        switch (func) {
        case KEY_CHECK:
        case CRITICAL_DATA:
                return ((rule->func == func) &&
                        ima_match_rule_data(rule, func_data, cred));
        default:
                break;
        }

        if ((rule->flags & IMA_MASK) &&
            (rule->mask != mask && func != POST_SETATTR))
                return false;
        if ((rule->flags & IMA_INMASK) &&
            (!(rule->mask & mask) && func != POST_SETATTR))
                return false;
        if ((rule->flags & IMA_FSMAGIC)
            && rule->fsmagic != inode->i_sb->s_magic)
                return false;
        if ((rule->flags & IMA_FSNAME)
            && strcmp(rule->fsname, inode->i_sb->s_type->name))
                return false;
        if ((rule->flags & IMA_FSUUID) &&
            !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
                return false;
        if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
                return false;
        if (rule->flags & IMA_EUID) {
                if (has_capability_noaudit(current, CAP_SETUID)) {
                        if (!rule->uid_op(cred->euid, rule->uid)
                            && !rule->uid_op(cred->suid, rule->uid)
                            && !rule->uid_op(cred->uid, rule->uid))
                                return false;
                } else if (!rule->uid_op(cred->euid, rule->uid))
                        return false;
        }
        if ((rule->flags & IMA_GID) && !rule->gid_op(cred->gid, rule->gid))
                return false;
        if (rule->flags & IMA_EGID) {
                if (has_capability_noaudit(current, CAP_SETGID)) {
                        if (!rule->gid_op(cred->egid, rule->gid)
                            && !rule->gid_op(cred->sgid, rule->gid)
                            && !rule->gid_op(cred->gid, rule->gid))
                                return false;
                } else if (!rule->gid_op(cred->egid, rule->gid))
                        return false;
        }
        if ((rule->flags & IMA_FOWNER) &&
            !rule->fowner_op(i_uid_into_vfsuid(idmap, inode),
                             rule->fowner))
                return false;
        if ((rule->flags & IMA_FGROUP) &&
            !rule->fgroup_op(i_gid_into_vfsgid(idmap, inode),
                             rule->fgroup))
                return false;
        for (i = 0; i < MAX_LSM_RULES; i++) {
                int rc = 0;
                u32 osid;

                if (!lsm_rule->lsm[i].rule) {
                        if (!lsm_rule->lsm[i].args_p)
                                continue;
                        else
                                return false;
                }

retry:
                switch (i) {
                case LSM_OBJ_USER:
                case LSM_OBJ_ROLE:
                case LSM_OBJ_TYPE:
                        security_inode_getsecid(inode, &osid);
                        rc = ima_filter_rule_match(osid, lsm_rule->lsm[i].type,
                                                   Audit_equal,
                                                   lsm_rule->lsm[i].rule);
                        break;
                case LSM_SUBJ_USER:
                case LSM_SUBJ_ROLE:
                case LSM_SUBJ_TYPE:
                        rc = ima_filter_rule_match(secid, lsm_rule->lsm[i].type,
                                                   Audit_equal,
                                                   lsm_rule->lsm[i].rule);
                        break;
                default:
                        break;
                }

                if (rc == -ESTALE && !rule_reinitialized) {
                        lsm_rule = ima_lsm_copy_rule(rule);
                        if (lsm_rule) {
                                rule_reinitialized = true;
                                goto retry;
                        }
                }
                if (!rc) {
                        result = false;
                        goto out;
                }
        }
        result = true;

out:
        if (rule_reinitialized) {
                for (i = 0; i < MAX_LSM_RULES; i++)
                        ima_filter_rule_free(lsm_rule->lsm[i].rule);
                kfree(lsm_rule);
        }
        return result;
}

/*
 * In addition to knowing that we need to appraise the file in general,
 * we need to differentiate between calling hooks, for hook specific rules.
 */
static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
{
        if (!(rule->flags & IMA_FUNC))
                return IMA_FILE_APPRAISE;

        switch (func) {
        case MMAP_CHECK:
        case MMAP_CHECK_REQPROT:
                return IMA_MMAP_APPRAISE;
        case BPRM_CHECK:
                return IMA_BPRM_APPRAISE;
        case CREDS_CHECK:
                return IMA_CREDS_APPRAISE;
        case FILE_CHECK:
        case POST_SETATTR:
                return IMA_FILE_APPRAISE;
        case MODULE_CHECK ... MAX_CHECK - 1:
        default:
                return IMA_READ_APPRAISE;
        }
}

/**
 * ima_match_policy - decision based on LSM and other conditions
 * @idmap: idmap of the mount the inode was found from
 * @inode: pointer to an inode for which the policy decision is being made
 * @cred: pointer to a credentials structure for which the policy decision is
 *        being made
 * @secid: LSM secid of the task to be validated
 * @func: IMA hook identifier
 * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
 * @flags: IMA actions to consider (e.g. IMA_MEASURE | IMA_APPRAISE)
 * @pcr: set the pcr to extend
 * @template_desc: the template that should be used for this rule
 * @func_data: func specific data, may be NULL
 * @allowed_algos: allowlist of hash algorithms for the IMA xattr
 *
 * Measure decision based on func/mask/fsmagic and LSM(subj/obj/type)
 * conditions.
 *
 * Since the IMA policy may be updated multiple times we need to lock the
 * list when walking it.  Reads are many orders of magnitude more numerous
 * than writes so ima_match_policy() is classical RCU candidate.
 */
int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode,
                     const struct cred *cred, u32 secid, enum ima_hooks func,
                     int mask, int flags, int *pcr,
                     struct ima_template_desc **template_desc,
                     const char *func_data, unsigned int *allowed_algos)
{
        struct ima_rule_entry *entry;
        int action = 0, actmask = flags | (flags << 1);
        struct list_head *ima_rules_tmp;

        if (template_desc && !*template_desc)
                *template_desc = ima_template_desc_current();

        rcu_read_lock();
        ima_rules_tmp = rcu_dereference(ima_rules);
        list_for_each_entry_rcu(entry, ima_rules_tmp, list) {

                if (!(entry->action & actmask))
                        continue;

                if (!ima_match_rules(entry, idmap, inode, cred, secid,
                                     func, mask, func_data))
                        continue;

                action |= entry->flags & IMA_NONACTION_FLAGS;

                action |= entry->action & IMA_DO_MASK;
                if (entry->action & IMA_APPRAISE) {
                        action |= get_subaction(entry, func);
                        action &= ~IMA_HASH;
                        if (ima_fail_unverifiable_sigs)
                                action |= IMA_FAIL_UNVERIFIABLE_SIGS;

                        if (allowed_algos &&
                            entry->flags & IMA_VALIDATE_ALGOS)
                                *allowed_algos = entry->allowed_algos;
                }

                if (entry->action & IMA_DO_MASK)
                        actmask &= ~(entry->action | entry->action << 1);
                else
                        actmask &= ~(entry->action | entry->action >> 1);

                if ((pcr) && (entry->flags & IMA_PCR))
                        *pcr = entry->pcr;

                if (template_desc && entry->template)
                        *template_desc = entry->template;

                if (!actmask)
                        break;
        }
        rcu_read_unlock();

        return action;
}

/**
 * ima_update_policy_flags() - Update global IMA variables
 *
 * Update ima_policy_flag and ima_setxattr_allowed_hash_algorithms
 * based on the currently loaded policy.
 *
 * With ima_policy_flag, the decision to short circuit out of a function
 * or not call the function in the first place can be made earlier.
 *
 * With ima_setxattr_allowed_hash_algorithms, the policy can restrict the
 * set of hash algorithms accepted when updating the security.ima xattr of
 * a file.
 *
 * Context: called after a policy update and at system initialization.
 */
void ima_update_policy_flags(void)
{
        struct ima_rule_entry *entry;
        int new_policy_flag = 0;
        struct list_head *ima_rules_tmp;

        rcu_read_lock();
        ima_rules_tmp = rcu_dereference(ima_rules);
        list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
                /*
                 * SETXATTR_CHECK rules do not implement a full policy check
                 * because rule checking would probably have an important
                 * performance impact on setxattr(). As a consequence, only one
                 * SETXATTR_CHECK can be active at a given time.
                 * Because we want to preserve that property, we set out to use
                 * atomic_cmpxchg. Either:
                 * - the atomic was non-zero: a setxattr hash policy is
                 *   already enforced, we do nothing
                 * - the atomic was zero: no setxattr policy was set, enable
                 *   the setxattr hash policy
                 */
                if (entry->func == SETXATTR_CHECK) {
                        atomic_cmpxchg(&ima_setxattr_allowed_hash_algorithms,
                                       0, entry->allowed_algos);
                        /* SETXATTR_CHECK doesn't impact ima_policy_flag */
                        continue;
                }

                if (entry->action & IMA_DO_MASK)
                        new_policy_flag |= entry->action;
        }
        rcu_read_unlock();

        ima_appraise |= (build_ima_appraise | temp_ima_appraise);
        if (!ima_appraise)
                new_policy_flag &= ~IMA_APPRAISE;

        ima_policy_flag = new_policy_flag;
}

static int ima_appraise_flag(enum ima_hooks func)
{
        if (func == MODULE_CHECK)
                return IMA_APPRAISE_MODULES;
        else if (func == FIRMWARE_CHECK)
                return IMA_APPRAISE_FIRMWARE;
        else if (func == POLICY_CHECK)
                return IMA_APPRAISE_POLICY;
        else if (func == KEXEC_KERNEL_CHECK)
                return IMA_APPRAISE_KEXEC;
        return 0;
}

static void add_rules(struct ima_rule_entry *entries, int count,
                      enum policy_rule_list policy_rule)
{
        int i = 0;

        for (i = 0; i < count; i++) {
                struct ima_rule_entry *entry;

                if (policy_rule & IMA_DEFAULT_POLICY)
                        list_add_tail(&entries[i].list, &ima_default_rules);

                if (policy_rule & IMA_CUSTOM_POLICY) {
                        entry = kmemdup(&entries[i], sizeof(*entry),
                                        GFP_KERNEL);
                        if (!entry)
                                continue;

                        list_add_tail(&entry->list, &ima_policy_rules);
                }
                if (entries[i].action == APPRAISE) {
                        if (entries != build_appraise_rules)
                                temp_ima_appraise |=
                                        ima_appraise_flag(entries[i].func);
                        else
                                build_ima_appraise |=
                                        ima_appraise_flag(entries[i].func);
                }
        }
}

static int ima_parse_rule(char *rule, struct ima_rule_entry *entry);

static int __init ima_init_arch_policy(void)
{
        const char * const *arch_rules;
        const char * const *rules;
        int arch_entries = 0;
        int i = 0;

        arch_rules = arch_get_ima_policy();
        if (!arch_rules)
                return arch_entries;

        /* Get number of rules */
        for (rules = arch_rules; *rules != NULL; rules++)
                arch_entries++;

        arch_policy_entry = kcalloc(arch_entries + 1,
                                    sizeof(*arch_policy_entry), GFP_KERNEL);
        if (!arch_policy_entry)
                return 0;

        /* Convert each policy string rules to struct ima_rule_entry format */
        for (rules = arch_rules, i = 0; *rules != NULL; rules++) {
                char rule[255];
                int result;

                result = strscpy(rule, *rules, sizeof(rule));

                INIT_LIST_HEAD(&arch_policy_entry[i].list);
                result = ima_parse_rule(rule, &arch_policy_entry[i]);
                if (result) {
                        pr_warn("Skipping unknown architecture policy rule: %s\n",
                                rule);
                        memset(&arch_policy_entry[i], 0,
                               sizeof(*arch_policy_entry));
                        continue;
                }
                i++;
        }
        return i;
}

/**
 * ima_init_policy - initialize the default measure rules.
 *
 * ima_rules points to either the ima_default_rules or the new ima_policy_rules.
 */
void __init ima_init_policy(void)
{
        int build_appraise_entries, arch_entries;

        /* if !ima_policy, we load NO default rules */
        if (ima_policy)
                add_rules(dont_measure_rules, ARRAY_SIZE(dont_measure_rules),
                          IMA_DEFAULT_POLICY);

        switch (ima_policy) {
        case ORIGINAL_TCB:
                add_rules(original_measurement_rules,
                          ARRAY_SIZE(original_measurement_rules),
                          IMA_DEFAULT_POLICY);
                break;
        case DEFAULT_TCB:
                add_rules(default_measurement_rules,
                          ARRAY_SIZE(default_measurement_rules),
                          IMA_DEFAULT_POLICY);
                break;
        default:
                break;
        }

        /*
         * Based on runtime secure boot flags, insert arch specific measurement
         * and appraise rules requiring file signatures for both the initial
         * and custom policies, prior to other appraise rules.
         * (Highest priority)
         */
        arch_entries = ima_init_arch_policy();
        if (!arch_entries)
                pr_info("No architecture policies found\n");
        else
                add_rules(arch_policy_entry, arch_entries,
                          IMA_DEFAULT_POLICY | IMA_CUSTOM_POLICY);

        /*
         * Insert the builtin "secure_boot" policy rules requiring file
         * signatures, prior to other appraise rules.
         */
        if (ima_use_secure_boot)
                add_rules(secure_boot_rules, ARRAY_SIZE(secure_boot_rules),
                          IMA_DEFAULT_POLICY);

        /*
         * Insert the build time appraise rules requiring file signatures
         * for both the initial and custom policies, prior to other appraise
         * rules. As the secure boot rules includes all of the build time
         * rules, include either one or the other set of rules, but not both.
         */
        build_appraise_entries = ARRAY_SIZE(build_appraise_rules);
        if (build_appraise_entries) {
                if (ima_use_secure_boot)
                        add_rules(build_appraise_rules, build_appraise_entries,
                                  IMA_CUSTOM_POLICY);
                else
                        add_rules(build_appraise_rules, build_appraise_entries,
                                  IMA_DEFAULT_POLICY | IMA_CUSTOM_POLICY);
        }

        if (ima_use_appraise_tcb)
                add_rules(default_appraise_rules,
                          ARRAY_SIZE(default_appraise_rules),
                          IMA_DEFAULT_POLICY);

        if (ima_use_critical_data)
                add_rules(critical_data_rules,
                          ARRAY_SIZE(critical_data_rules),
                          IMA_DEFAULT_POLICY);

        atomic_set(&ima_setxattr_allowed_hash_algorithms, 0);

        ima_update_policy_flags();
}

/* Make sure we have a valid policy, at least containing some rules. */
int ima_check_policy(void)
{
        if (list_empty(&ima_temp_rules))
                return -EINVAL;
        return 0;
}

/**
 * ima_update_policy - update default_rules with new measure rules
 *
 * Called on file .release to update the default rules with a complete new
 * policy.  What we do here is to splice ima_policy_rules and ima_temp_rules so
 * they make a queue.  The policy may be updated multiple times and this is the
 * RCU updater.
 *
 * Policy rules are never deleted so ima_policy_flag gets zeroed only once when
 * we switch from the default policy to user defined.
 */
void ima_update_policy(void)
{
        struct list_head *policy = &ima_policy_rules;

        list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu);

        if (ima_rules != (struct list_head __rcu *)policy) {
                ima_policy_flag = 0;

                rcu_assign_pointer(ima_rules, policy);
                /*
                 * IMA architecture specific policy rules are specified
                 * as strings and converted to an array of ima_entry_rules
                 * on boot.  After loading a custom policy, free the
                 * architecture specific rules stored as an array.
                 */
                kfree(arch_policy_entry);
        }
        ima_update_policy_flags();

        /* Custom IMA policy has been loaded */
        ima_process_queued_keys();
}

/* Keep the enumeration in sync with the policy_tokens! */
enum policy_opt {
        Opt_measure, Opt_dont_measure,
        Opt_appraise, Opt_dont_appraise,
        Opt_audit, Opt_hash, Opt_dont_hash,
        Opt_obj_user, Opt_obj_role, Opt_obj_type,
        Opt_subj_user, Opt_subj_role, Opt_subj_type,
        Opt_func, Opt_mask, Opt_fsmagic, Opt_fsname, Opt_fsuuid,
        Opt_uid_eq, Opt_euid_eq, Opt_gid_eq, Opt_egid_eq,
        Opt_fowner_eq, Opt_fgroup_eq,
        Opt_uid_gt, Opt_euid_gt, Opt_gid_gt, Opt_egid_gt,
        Opt_fowner_gt, Opt_fgroup_gt,
        Opt_uid_lt, Opt_euid_lt, Opt_gid_lt, Opt_egid_lt,
        Opt_fowner_lt, Opt_fgroup_lt,
        Opt_digest_type,
        Opt_appraise_type, Opt_appraise_flag, Opt_appraise_algos,
        Opt_permit_directio, Opt_pcr, Opt_template, Opt_keyrings,
        Opt_label, Opt_err
};

static const match_table_t policy_tokens = {
        {Opt_measure, "measure"},
        {Opt_dont_measure, "dont_measure"},
        {Opt_appraise, "appraise"},
        {Opt_dont_appraise, "dont_appraise"},
        {Opt_audit, "audit"},
        {Opt_hash, "hash"},
        {Opt_dont_hash, "dont_hash"},
        {Opt_obj_user, "obj_user=%s"},
        {Opt_obj_role, "obj_role=%s"},
        {Opt_obj_type, "obj_type=%s"},
        {Opt_subj_user, "subj_user=%s"},
        {Opt_subj_role, "subj_role=%s"},
        {Opt_subj_type, "subj_type=%s"},
        {Opt_func, "func=%s"},
        {Opt_mask, "mask=%s"},
        {Opt_fsmagic, "fsmagic=%s"},
        {Opt_fsname, "fsname=%s"},
        {Opt_fsuuid, "fsuuid=%s"},
        {Opt_uid_eq, "uid=%s"},
        {Opt_euid_eq, "euid=%s"},
        {Opt_gid_eq, "gid=%s"},
        {Opt_egid_eq, "egid=%s"},
        {Opt_fowner_eq, "fowner=%s"},
        {Opt_fgroup_eq, "fgroup=%s"},
        {Opt_uid_gt, "uid>%s"},
        {Opt_euid_gt, "euid>%s"},
        {Opt_gid_gt, "gid>%s"},
        {Opt_egid_gt, "egid>%s"},
        {Opt_fowner_gt, "fowner>%s"},
        {Opt_fgroup_gt, "fgroup>%s"},
        {Opt_uid_lt, "uid<%s"},
        {Opt_euid_lt, "euid<%s"},
        {Opt_gid_lt, "gid<%s"},
        {Opt_egid_lt, "egid<%s"},
        {Opt_fowner_lt, "fowner<%s"},
        {Opt_fgroup_lt, "fgroup<%s"},
        {Opt_digest_type, "digest_type=%s"},
        {Opt_appraise_type, "appraise_type=%s"},
        {Opt_appraise_flag, "appraise_flag=%s"},
        {Opt_appraise_algos, "appraise_algos=%s"},
        {Opt_permit_directio, "permit_directio"},
        {Opt_pcr, "pcr=%s"},
        {Opt_template, "template=%s"},
        {Opt_keyrings, "keyrings=%s"},
        {Opt_label, "label=%s"},
        {Opt_err, NULL}
};

static int ima_lsm_rule_init(struct ima_rule_entry *entry,
                             substring_t *args, int lsm_rule, int audit_type)
{
        int result;

        if (entry->lsm[lsm_rule].rule)
                return -EINVAL;

        entry->lsm[lsm_rule].args_p = match_strdup(args);
        if (!entry->lsm[lsm_rule].args_p)
                return -ENOMEM;

        entry->lsm[lsm_rule].type = audit_type;
        result = ima_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal,
                                      entry->lsm[lsm_rule].args_p,
                                      &entry->lsm[lsm_rule].rule);
        if (!entry->lsm[lsm_rule].rule) {
                pr_warn("rule for LSM \'%s\' is undefined\n",
                        entry->lsm[lsm_rule].args_p);

                if (ima_rules == (struct list_head __rcu *)(&ima_default_rules)) {
                        kfree(entry->lsm[lsm_rule].args_p);
                        entry->lsm[lsm_rule].args_p = NULL;
                        result = -EINVAL;
                } else
                        result = 0;
        }

        return result;
}

static void ima_log_string_op(struct audit_buffer *ab, char *key, char *value,
                              enum policy_opt rule_operator)
{
        if (!ab)
                return;

        switch (rule_operator) {
        case Opt_uid_gt:
        case Opt_euid_gt:
        case Opt_gid_gt:
        case Opt_egid_gt:
        case Opt_fowner_gt:
        case Opt_fgroup_gt:
                audit_log_format(ab, "%s>", key);
                break;
        case Opt_uid_lt:
        case Opt_euid_lt:
        case Opt_gid_lt:
        case Opt_egid_lt:
        case Opt_fowner_lt:
        case Opt_fgroup_lt:
                audit_log_format(ab, "%s<", key);
                break;
        default:
                audit_log_format(ab, "%s=", key);
        }
        audit_log_format(ab, "%s ", value);
}
static void ima_log_string(struct audit_buffer *ab, char *key, char *value)
{
        ima_log_string_op(ab, key, value, Opt_err);
}

/*
 * Validating the appended signature included in the measurement list requires
 * the file hash calculated without the appended signature (i.e., the 'd-modsig'
 * field). Therefore, notify the user if they have the 'modsig' field but not
 * the 'd-modsig' field in the template.
 */
static void check_template_modsig(const struct ima_template_desc *template)
{
#define MSG "template with 'modsig' field also needs 'd-modsig' field\n"
        bool has_modsig, has_dmodsig;
        static bool checked;
        int i;

        /* We only need to notify the user once. */
        if (checked)
                return;

        has_modsig = has_dmodsig = false;
        for (i = 0; i < template->num_fields; i++) {
                if (!strcmp(template->fields[i]->field_id, "modsig"))
                        has_modsig = true;
                else if (!strcmp(template->fields[i]->field_id, "d-modsig"))
                        has_dmodsig = true;
        }

        if (has_modsig && !has_dmodsig)
                pr_notice(MSG);

        checked = true;
#undef MSG
}

/*
 * Warn if the template does not contain the given field.
 */
static void check_template_field(const struct ima_template_desc *template,
                                 const char *field, const char *msg)
{
        int i;

        for (i = 0; i < template->num_fields; i++)
                if (!strcmp(template->fields[i]->field_id, field))
                        return;

        pr_notice_once("%s", msg);
}

static bool ima_validate_rule(struct ima_rule_entry *entry)
{
        /* Ensure that the action is set and is compatible with the flags */
        if (entry->action == UNKNOWN)
                return false;

        if (entry->action != MEASURE && entry->flags & IMA_PCR)
                return false;

        if (entry->action != APPRAISE &&
            entry->flags & (IMA_DIGSIG_REQUIRED | IMA_MODSIG_ALLOWED |
                            IMA_CHECK_BLACKLIST | IMA_VALIDATE_ALGOS))
                return false;

        /*
         * The IMA_FUNC bit must be set if and only if there's a valid hook
         * function specified, and vice versa. Enforcing this property allows
         * for the NONE case below to validate a rule without an explicit hook
         * function.
         */
        if (((entry->flags & IMA_FUNC) && entry->func == NONE) ||
            (!(entry->flags & IMA_FUNC) && entry->func != NONE))
                return false;

        /*
         * Ensure that the hook function is compatible with the other
         * components of the rule
         */
        switch (entry->func) {
        case NONE:
        case FILE_CHECK:
        case MMAP_CHECK:
        case MMAP_CHECK_REQPROT:
        case BPRM_CHECK:
        case CREDS_CHECK:
        case POST_SETATTR:
        case FIRMWARE_CHECK:
        case POLICY_CHECK:
                if (entry->flags & ~(IMA_FUNC | IMA_MASK | IMA_FSMAGIC |
                                     IMA_UID | IMA_FOWNER | IMA_FSUUID |
                                     IMA_INMASK | IMA_EUID | IMA_PCR |
                                     IMA_FSNAME | IMA_GID | IMA_EGID |
                                     IMA_FGROUP | IMA_DIGSIG_REQUIRED |
                                     IMA_PERMIT_DIRECTIO | IMA_VALIDATE_ALGOS |
                                     IMA_CHECK_BLACKLIST | IMA_VERITY_REQUIRED))
                        return false;

                break;
        case MODULE_CHECK:
        case KEXEC_KERNEL_CHECK:
        case KEXEC_INITRAMFS_CHECK:
                if (entry->flags & ~(IMA_FUNC | IMA_MASK | IMA_FSMAGIC |
                                     IMA_UID | IMA_FOWNER | IMA_FSUUID |
                                     IMA_INMASK | IMA_EUID | IMA_PCR |
                                     IMA_FSNAME | IMA_GID | IMA_EGID |
                                     IMA_FGROUP | IMA_DIGSIG_REQUIRED |
                                     IMA_PERMIT_DIRECTIO | IMA_MODSIG_ALLOWED |
                                     IMA_CHECK_BLACKLIST | IMA_VALIDATE_ALGOS))
                        return false;

                break;
        case KEXEC_CMDLINE:
                if (entry->action & ~(MEASURE | DONT_MEASURE))
                        return false;

                if (entry->flags & ~(IMA_FUNC | IMA_FSMAGIC | IMA_UID |
                                     IMA_FOWNER | IMA_FSUUID | IMA_EUID |
                                     IMA_PCR | IMA_FSNAME | IMA_GID | IMA_EGID |
                                     IMA_FGROUP))
                        return false;

                break;
        case KEY_CHECK:
                if (entry->action & ~(MEASURE | DONT_MEASURE))
                        return false;

                if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_GID | IMA_PCR |
                                     IMA_KEYRINGS))
                        return false;

                if (ima_rule_contains_lsm_cond(entry))
                        return false;

                break;
        case CRITICAL_DATA:
                if (entry->action & ~(MEASURE | DONT_MEASURE))
                        return false;

                if (entry->flags & ~(IMA_FUNC | IMA_UID | IMA_GID | IMA_PCR |
                                     IMA_LABEL))
                        return false;

                if (ima_rule_contains_lsm_cond(entry))
                        return false;

                break;
        case SETXATTR_CHECK:
                /* any action other than APPRAISE is unsupported */
                if (entry->action != APPRAISE)
                        return false;

                /* SETXATTR_CHECK requires an appraise_algos parameter */
                if (!(entry->flags & IMA_VALIDATE_ALGOS))
                        return false;

                /*
                 * full policies are not supported, they would have too
                 * much of a performance impact
                 */
                if (entry->flags & ~(IMA_FUNC | IMA_VALIDATE_ALGOS))
                        return false;

                break;
        default:
                return false;
        }

        /* Ensure that combinations of flags are compatible with each other */
        if (entry->flags & IMA_CHECK_BLACKLIST &&
            !(entry->flags & IMA_DIGSIG_REQUIRED))
                return false;

        /*
         * Unlike for regular IMA 'appraise' policy rules where security.ima
         * xattr may contain either a file hash or signature, the security.ima
         * xattr for fsverity must contain a file signature (sigv3).  Ensure
         * that 'appraise' rules for fsverity require file signatures by
         * checking the IMA_DIGSIG_REQUIRED flag is set.
         */
        if (entry->action == APPRAISE &&
            (entry->flags & IMA_VERITY_REQUIRED) &&
            !(entry->flags & IMA_DIGSIG_REQUIRED))
                return false;

        return true;
}

static unsigned int ima_parse_appraise_algos(char *arg)
{
        unsigned int res = 0;
        int idx;
        char *token;

        while ((token = strsep(&arg, ",")) != NULL) {
                idx = match_string(hash_algo_name, HASH_ALGO__LAST, token);

                if (idx < 0) {
                        pr_err("unknown hash algorithm \"%s\"",
                               token);
                        return 0;
                }

                if (!crypto_has_alg(hash_algo_name[idx], 0, 0)) {
                        pr_err("unavailable hash algorithm \"%s\", check your kernel configuration",
                               token);
                        return 0;
                }

                /* Add the hash algorithm to the 'allowed' bitfield */
                res |= (1U << idx);
        }

        return res;
}

static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
{
        struct audit_buffer *ab;
        char *from;
        char *p;
        bool eid_token; /* either euid or egid */
        struct ima_template_desc *template_desc;
        int result = 0;

        ab = integrity_audit_log_start(audit_context(), GFP_KERNEL,
                                       AUDIT_INTEGRITY_POLICY_RULE);

        entry->uid = INVALID_UID;
        entry->gid = INVALID_GID;
        entry->fowner = INVALID_UID;
        entry->fgroup = INVALID_GID;
        entry->uid_op = &uid_eq;
        entry->gid_op = &gid_eq;
        entry->fowner_op = &vfsuid_eq_kuid;
        entry->fgroup_op = &vfsgid_eq_kgid;
        entry->action = UNKNOWN;
        while ((p = strsep(&rule, " \t")) != NULL) {
                substring_t args[MAX_OPT_ARGS];
                int token;
                unsigned long lnum;

                if (result < 0)
                        break;
                if ((*p == '\0') || (*p == ' ') || (*p == '\t'))
                        continue;
                token = match_token(p, policy_tokens, args);
                switch (token) {
                case Opt_measure:
                        ima_log_string(ab, "action", "measure");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = MEASURE;
                        break;
                case Opt_dont_measure:
                        ima_log_string(ab, "action", "dont_measure");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = DONT_MEASURE;
                        break;
                case Opt_appraise:
                        ima_log_string(ab, "action", "appraise");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = APPRAISE;
                        break;
                case Opt_dont_appraise:
                        ima_log_string(ab, "action", "dont_appraise");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = DONT_APPRAISE;
                        break;
                case Opt_audit:
                        ima_log_string(ab, "action", "audit");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = AUDIT;
                        break;
                case Opt_hash:
                        ima_log_string(ab, "action", "hash");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = HASH;
                        break;
                case Opt_dont_hash:
                        ima_log_string(ab, "action", "dont_hash");

                        if (entry->action != UNKNOWN)
                                result = -EINVAL;

                        entry->action = DONT_HASH;
                        break;
                case Opt_func:
                        ima_log_string(ab, "func", args[0].from);

                        if (entry->func)
                                result = -EINVAL;

                        if (strcmp(args[0].from, "FILE_CHECK") == 0)
                                entry->func = FILE_CHECK;
                        /* PATH_CHECK is for backwards compat */
                        else if (strcmp(args[0].from, "PATH_CHECK") == 0)
                                entry->func = FILE_CHECK;
                        else if (strcmp(args[0].from, "MODULE_CHECK") == 0)
                                entry->func = MODULE_CHECK;
                        else if (strcmp(args[0].from, "FIRMWARE_CHECK") == 0)
                                entry->func = FIRMWARE_CHECK;
                        else if ((strcmp(args[0].from, "FILE_MMAP") == 0)
                                || (strcmp(args[0].from, "MMAP_CHECK") == 0))
                                entry->func = MMAP_CHECK;
                        else if ((strcmp(args[0].from, "MMAP_CHECK_REQPROT") == 0))
                                entry->func = MMAP_CHECK_REQPROT;
                        else if (strcmp(args[0].from, "BPRM_CHECK") == 0)
                                entry->func = BPRM_CHECK;
                        else if (strcmp(args[0].from, "CREDS_CHECK") == 0)
                                entry->func = CREDS_CHECK;
                        else if (strcmp(args[0].from, "KEXEC_KERNEL_CHECK") ==
                                 0)
                                entry->func = KEXEC_KERNEL_CHECK;
                        else if (strcmp(args[0].from, "KEXEC_INITRAMFS_CHECK")
                                 == 0)
                                entry->func = KEXEC_INITRAMFS_CHECK;
                        else if (strcmp(args[0].from, "POLICY_CHECK") == 0)
                                entry->func = POLICY_CHECK;
                        else if (strcmp(args[0].from, "KEXEC_CMDLINE") == 0)
                                entry->func = KEXEC_CMDLINE;
                        else if (IS_ENABLED(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) &&
                                 strcmp(args[0].from, "KEY_CHECK") == 0)
                                entry->func = KEY_CHECK;
                        else if (strcmp(args[0].from, "CRITICAL_DATA") == 0)
                                entry->func = CRITICAL_DATA;
                        else if (strcmp(args[0].from, "SETXATTR_CHECK") == 0)
                                entry->func = SETXATTR_CHECK;
                        else
                                result = -EINVAL;
                        if (!result)
                                entry->flags |= IMA_FUNC;
                        break;
                case Opt_mask:
                        ima_log_string(ab, "mask", args[0].from);

                        if (entry->mask)
                                result = -EINVAL;

                        from = args[0].from;
                        if (*from == '^')
                                from++;

                        if ((strcmp(from, "MAY_EXEC")) == 0)
                                entry->mask = MAY_EXEC;
                        else if (strcmp(from, "MAY_WRITE") == 0)
                                entry->mask = MAY_WRITE;
                        else if (strcmp(from, "MAY_READ") == 0)
                                entry->mask = MAY_READ;
                        else if (strcmp(from, "MAY_APPEND") == 0)
                                entry->mask = MAY_APPEND;
                        else
                                result = -EINVAL;
                        if (!result)
                                entry->flags |= (*args[0].from == '^')
                                     ? IMA_INMASK : IMA_MASK;
                        break;
                case Opt_fsmagic:
                        ima_log_string(ab, "fsmagic", args[0].from);

                        if (entry->fsmagic) {
                                result = -EINVAL;
                                break;
                        }

                        result = kstrtoul(args[0].from, 16, &entry->fsmagic);
                        if (!result)
                                entry->flags |= IMA_FSMAGIC;
                        break;
                case Opt_fsname:
                        ima_log_string(ab, "fsname", args[0].from);

                        entry->fsname = kstrdup(args[0].from, GFP_KERNEL);
                        if (!entry->fsname) {
                                result = -ENOMEM;
                                break;
                        }
                        result = 0;
                        entry->flags |= IMA_FSNAME;
                        break;
                case Opt_keyrings:
                        ima_log_string(ab, "keyrings", args[0].from);

                        if (!IS_ENABLED(CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS) ||
                            entry->keyrings) {
                                result = -EINVAL;
                                break;
                        }

                        entry->keyrings = ima_alloc_rule_opt_list(args);
                        if (IS_ERR(entry->keyrings)) {
                                result = PTR_ERR(entry->keyrings);
                                entry->keyrings = NULL;
                                break;
                        }

                        entry->flags |= IMA_KEYRINGS;
                        break;
                case Opt_label:
                        ima_log_string(ab, "label", args[0].from);

                        if (entry->label) {
                                result = -EINVAL;
                                break;
                        }

                        entry->label = ima_alloc_rule_opt_list(args);
                        if (IS_ERR(entry->label)) {
                                result = PTR_ERR(entry->label);
                                entry->label = NULL;
                                break;
                        }

                        entry->flags |= IMA_LABEL;
                        break;
                case Opt_fsuuid:
                        ima_log_string(ab, "fsuuid", args[0].from);

                        if (!uuid_is_null(&entry->fsuuid)) {
                                result = -EINVAL;
                                break;
                        }

                        result = uuid_parse(args[0].from, &entry->fsuuid);
                        if (!result)
                                entry->flags |= IMA_FSUUID;
                        break;
                case Opt_uid_gt:
                case Opt_euid_gt:
                        entry->uid_op = &uid_gt;
                        fallthrough;
                case Opt_uid_lt:
                case Opt_euid_lt:
                        if ((token == Opt_uid_lt) || (token == Opt_euid_lt))
                                entry->uid_op = &uid_lt;
                        fallthrough;
                case Opt_uid_eq:
                case Opt_euid_eq:
                        eid_token = (token == Opt_euid_eq) ||
                                    (token == Opt_euid_gt) ||
                                    (token == Opt_euid_lt);

                        ima_log_string_op(ab, eid_token ? "euid" : "uid",
                                          args[0].from, token);

                        if (uid_valid(entry->uid)) {
                                result = -EINVAL;
                                break;
                        }

                        result = kstrtoul(args[0].from, 10, &lnum);
                        if (!result) {
                                entry->uid = make_kuid(current_user_ns(),
                                                       (uid_t) lnum);
                                if (!uid_valid(entry->uid) ||
                                    (uid_t)lnum != lnum)
                                        result = -EINVAL;
                                else
                                        entry->flags |= eid_token
                                            ? IMA_EUID : IMA_UID;
                        }
                        break;
                case Opt_gid_gt:
                case Opt_egid_gt:
                        entry->gid_op = &gid_gt;
                        fallthrough;
                case Opt_gid_lt:
                case Opt_egid_lt:
                        if ((token == Opt_gid_lt) || (token == Opt_egid_lt))
                                entry->gid_op = &gid_lt;
                        fallthrough;
                case Opt_gid_eq:
                case Opt_egid_eq:
                        eid_token = (token == Opt_egid_eq) ||
                                    (token == Opt_egid_gt) ||
                                    (token == Opt_egid_lt);

                        ima_log_string_op(ab, eid_token ? "egid" : "gid",
                                          args[0].from, token);

                        if (gid_valid(entry->gid)) {
                                result = -EINVAL;
                                break;
                        }

                        result = kstrtoul(args[0].from, 10, &lnum);
                        if (!result) {
                                entry->gid = make_kgid(current_user_ns(),
                                                       (gid_t)lnum);
                                if (!gid_valid(entry->gid) ||
                                    (((gid_t)lnum) != lnum))
                                        result = -EINVAL;
                                else
                                        entry->flags |= eid_token
                                            ? IMA_EGID : IMA_GID;
                        }
                        break;
                case Opt_fowner_gt:
                        entry->fowner_op = &vfsuid_gt_kuid;
                        fallthrough;
                case Opt_fowner_lt:
                        if (token == Opt_fowner_lt)
                                entry->fowner_op = &vfsuid_lt_kuid;
                        fallthrough;
                case Opt_fowner_eq:
                        ima_log_string_op(ab, "fowner", args[0].from, token);

                        if (uid_valid(entry->fowner)) {
                                result = -EINVAL;
                                break;
                        }

                        result = kstrtoul(args[0].from, 10, &lnum);
                        if (!result) {
                                entry->fowner = make_kuid(current_user_ns(),
                                                          (uid_t)lnum);
                                if (!uid_valid(entry->fowner) ||
                                    (((uid_t)lnum) != lnum))
                                        result = -EINVAL;
                                else
                                        entry->flags |= IMA_FOWNER;
                        }
                        break;
                case Opt_fgroup_gt:
                        entry->fgroup_op = &vfsgid_gt_kgid;
                        fallthrough;
                case Opt_fgroup_lt:
                        if (token == Opt_fgroup_lt)
                                entry->fgroup_op = &vfsgid_lt_kgid;
                        fallthrough;
                case Opt_fgroup_eq:
                        ima_log_string_op(ab, "fgroup", args[0].from, token);

                        if (gid_valid(entry->fgroup)) {
                                result = -EINVAL;
                                break;
                        }

                        result = kstrtoul(args[0].from, 10, &lnum);
                        if (!result) {
                                entry->fgroup = make_kgid(current_user_ns(),
                                                          (gid_t)lnum);
                                if (!gid_valid(entry->fgroup) ||
                                    (((gid_t)lnum) != lnum))
                                        result = -EINVAL;
                                else
                                        entry->flags |= IMA_FGROUP;
                        }
                        break;
                case Opt_obj_user:
                        ima_log_string(ab, "obj_user", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_OBJ_USER,
                                                   AUDIT_OBJ_USER);
                        break;
                case Opt_obj_role:
                        ima_log_string(ab, "obj_role", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_OBJ_ROLE,
                                                   AUDIT_OBJ_ROLE);
                        break;
                case Opt_obj_type:
                        ima_log_string(ab, "obj_type", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_OBJ_TYPE,
                                                   AUDIT_OBJ_TYPE);
                        break;
                case Opt_subj_user:
                        ima_log_string(ab, "subj_user", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_SUBJ_USER,
                                                   AUDIT_SUBJ_USER);
                        break;
                case Opt_subj_role:
                        ima_log_string(ab, "subj_role", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_SUBJ_ROLE,
                                                   AUDIT_SUBJ_ROLE);
                        break;
                case Opt_subj_type:
                        ima_log_string(ab, "subj_type", args[0].from);
                        result = ima_lsm_rule_init(entry, args,
                                                   LSM_SUBJ_TYPE,
                                                   AUDIT_SUBJ_TYPE);
                        break;
                case Opt_digest_type:
                        ima_log_string(ab, "digest_type", args[0].from);
                        if (entry->flags & IMA_DIGSIG_REQUIRED)
                                result = -EINVAL;
                        else if ((strcmp(args[0].from, "verity")) == 0)
                                entry->flags |= IMA_VERITY_REQUIRED;
                        else
                                result = -EINVAL;
                        break;
                case Opt_appraise_type:
                        ima_log_string(ab, "appraise_type", args[0].from);

                        if ((strcmp(args[0].from, "imasig")) == 0) {
                                if (entry->flags & IMA_VERITY_REQUIRED)
                                        result = -EINVAL;
                                else
                                        entry->flags |= IMA_DIGSIG_REQUIRED | IMA_CHECK_BLACKLIST;
                        } else if (strcmp(args[0].from, "sigv3") == 0) {
                                /* Only fsverity supports sigv3 for now */
                                if (entry->flags & IMA_VERITY_REQUIRED)
                                        entry->flags |= IMA_DIGSIG_REQUIRED | IMA_CHECK_BLACKLIST;
                                else
                                        result = -EINVAL;
                        } else if (IS_ENABLED(CONFIG_IMA_APPRAISE_MODSIG) &&
                                 strcmp(args[0].from, "imasig|modsig") == 0) {
                                if (entry->flags & IMA_VERITY_REQUIRED)
                                        result = -EINVAL;
                                else
                                        entry->flags |= IMA_DIGSIG_REQUIRED |
                                                IMA_MODSIG_ALLOWED | IMA_CHECK_BLACKLIST;
                        } else {
                                result = -EINVAL;
                        }
                        break;
                case Opt_appraise_flag:
                        ima_log_string(ab, "appraise_flag", args[0].from);
                        break;
                case Opt_appraise_algos:
                        ima_log_string(ab, "appraise_algos", args[0].from);

                        if (entry->allowed_algos) {
                                result = -EINVAL;
                                break;
                        }

                        entry->allowed_algos =
                                ima_parse_appraise_algos(args[0].from);
                        /* invalid or empty list of algorithms */
                        if (!entry->allowed_algos) {
                                result = -EINVAL;
                                break;
                        }

                        entry->flags |= IMA_VALIDATE_ALGOS;

                        break;
                case Opt_permit_directio:
                        entry->flags |= IMA_PERMIT_DIRECTIO;
                        break;
                case Opt_pcr:
                        ima_log_string(ab, "pcr", args[0].from);

                        result = kstrtoint(args[0].from, 10, &entry->pcr);
                        if (result || INVALID_PCR(entry->pcr))
                                result = -EINVAL;
                        else
                                entry->flags |= IMA_PCR;

                        break;
                case Opt_template:
                        ima_log_string(ab, "template", args[0].from);
                        if (entry->action != MEASURE) {
                                result = -EINVAL;
                                break;
                        }
                        template_desc = lookup_template_desc(args[0].from);
                        if (!template_desc || entry->template) {
                                result = -EINVAL;
                                break;
                        }

                        /*
                         * template_desc_init_fields() does nothing if
                         * the template is already initialised, so
                         * it's safe to do this unconditionally
                         */
                        template_desc_init_fields(template_desc->fmt,
                                                 &(template_desc->fields),
                                                 &(template_desc->num_fields));
                        entry->template = template_desc;
                        break;
                case Opt_err:
                        ima_log_string(ab, "UNKNOWN", p);
                        result = -EINVAL;
                        break;
                }
        }
        if (!result && !ima_validate_rule(entry))
                result = -EINVAL;
        else if (entry->action == APPRAISE)
                temp_ima_appraise |= ima_appraise_flag(entry->func);

        if (!result && entry->flags & IMA_MODSIG_ALLOWED) {
                template_desc = entry->template ? entry->template :
                                                  ima_template_desc_current();
                check_template_modsig(template_desc);
        }

        /* d-ngv2 template field recommended for unsigned fs-verity digests */
        if (!result && entry->action == MEASURE &&
            entry->flags & IMA_VERITY_REQUIRED) {
                template_desc = entry->template ? entry->template :
                                                  ima_template_desc_current();
                check_template_field(template_desc, "d-ngv2",
                                     "verity rules should include d-ngv2");
        }

        audit_log_format(ab, "res=%d", !result);
        audit_log_end(ab);
        return result;
}

/**
 * ima_parse_add_rule - add a rule to ima_policy_rules
 * @rule: ima measurement policy rule
 *
 * Avoid locking by allowing just one writer at a time in ima_write_policy()
 * Returns the length of the rule parsed, an error code on failure
 */
ssize_t ima_parse_add_rule(char *rule)
{
        static const char op[] = "update_policy";
        char *p;
        struct ima_rule_entry *entry;
        ssize_t result, len;
        int audit_info = 0;

        p = strsep(&rule, "\n");
        len = strlen(p) + 1;
        p += strspn(p, " \t");

        if (*p == '#' || *p == '\0')
                return len;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry) {
                integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL,
                                    NULL, op, "-ENOMEM", -ENOMEM, audit_info);
                return -ENOMEM;
        }

        INIT_LIST_HEAD(&entry->list);

        result = ima_parse_rule(p, entry);
        if (result) {
                ima_free_rule(entry);
                integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL,
                                    NULL, op, "invalid-policy", result,
                                    audit_info);
                return result;
        }

        list_add_tail(&entry->list, &ima_temp_rules);

        return len;
}

/**
 * ima_delete_rules() - called to cleanup invalid in-flight policy.
 *
 * We don't need locking as we operate on the temp list, which is
 * different from the active one.  There is also only one user of
 * ima_delete_rules() at a time.
 */
void ima_delete_rules(void)
{
        struct ima_rule_entry *entry, *tmp;

        temp_ima_appraise = 0;
        list_for_each_entry_safe(entry, tmp, &ima_temp_rules, list) {
                list_del(&entry->list);
                ima_free_rule(entry);
        }
}

#define __ima_hook_stringify(func, str)        (#func),

const char *const func_tokens[] = {
        __ima_hooks(__ima_hook_stringify)
};

#ifdef        CONFIG_IMA_READ_POLICY
enum {
        mask_exec = 0, mask_write, mask_read, mask_append
};

static const char *const mask_tokens[] = {
        "^MAY_EXEC",
        "^MAY_WRITE",
        "^MAY_READ",
        "^MAY_APPEND"
};

void *ima_policy_start(struct seq_file *m, loff_t *pos)
{
        loff_t l = *pos;
        struct ima_rule_entry *entry;
        struct list_head *ima_rules_tmp;

        rcu_read_lock();
        ima_rules_tmp = rcu_dereference(ima_rules);
        list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
                if (!l--) {
                        rcu_read_unlock();
                        return entry;
                }
        }
        rcu_read_unlock();
        return NULL;
}

void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
{
        struct ima_rule_entry *entry = v;

        rcu_read_lock();
        entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
        rcu_read_unlock();
        (*pos)++;

        return (&entry->list == &ima_default_rules ||
                &entry->list == &ima_policy_rules) ? NULL : entry;
}

void ima_policy_stop(struct seq_file *m, void *v)
{
}

#define pt(token)        policy_tokens[token].pattern
#define mt(token)        mask_tokens[token]

/*
 * policy_func_show - display the ima_hooks policy rule
 */
static void policy_func_show(struct seq_file *m, enum ima_hooks func)
{
        if (func > 0 && func < MAX_CHECK)
                seq_printf(m, "func=%s ", func_tokens[func]);
        else
                seq_printf(m, "func=%d ", func);
}

static void ima_show_rule_opt_list(struct seq_file *m,
                                   const struct ima_rule_opt_list *opt_list)
{
        size_t i;

        for (i = 0; i < opt_list->count; i++)
                seq_printf(m, "%s%s", i ? "|" : "", opt_list->items[i]);
}

static void ima_policy_show_appraise_algos(struct seq_file *m,
                                           unsigned int allowed_hashes)
{
        int idx, list_size = 0;

        for (idx = 0; idx < HASH_ALGO__LAST; idx++) {
                if (!(allowed_hashes & (1U << idx)))
                        continue;

                /* only add commas if the list contains multiple entries */
                if (list_size++)
                        seq_puts(m, ",");

                seq_puts(m, hash_algo_name[idx]);
        }
}

int ima_policy_show(struct seq_file *m, void *v)
{
        struct ima_rule_entry *entry = v;
        int i;
        char tbuf[64] = {0,};
        int offset = 0;

        rcu_read_lock();

        /* Do not print rules with inactive LSM labels */
        for (i = 0; i < MAX_LSM_RULES; i++) {
                if (entry->lsm[i].args_p && !entry->lsm[i].rule) {
                        rcu_read_unlock();
                        return 0;
                }
        }

        if (entry->action & MEASURE)
                seq_puts(m, pt(Opt_measure));
        if (entry->action & DONT_MEASURE)
                seq_puts(m, pt(Opt_dont_measure));
        if (entry->action & APPRAISE)
                seq_puts(m, pt(Opt_appraise));
        if (entry->action & DONT_APPRAISE)
                seq_puts(m, pt(Opt_dont_appraise));
        if (entry->action & AUDIT)
                seq_puts(m, pt(Opt_audit));
        if (entry->action & HASH)
                seq_puts(m, pt(Opt_hash));
        if (entry->action & DONT_HASH)
                seq_puts(m, pt(Opt_dont_hash));

        seq_puts(m, " ");

        if (entry->flags & IMA_FUNC)
                policy_func_show(m, entry->func);

        if ((entry->flags & IMA_MASK) || (entry->flags & IMA_INMASK)) {
                if (entry->flags & IMA_MASK)
                        offset = 1;
                if (entry->mask & MAY_EXEC)
                        seq_printf(m, pt(Opt_mask), mt(mask_exec) + offset);
                if (entry->mask & MAY_WRITE)
                        seq_printf(m, pt(Opt_mask), mt(mask_write) + offset);
                if (entry->mask & MAY_READ)
                        seq_printf(m, pt(Opt_mask), mt(mask_read) + offset);
                if (entry->mask & MAY_APPEND)
                        seq_printf(m, pt(Opt_mask), mt(mask_append) + offset);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_FSMAGIC) {
                snprintf(tbuf, sizeof(tbuf), "0x%lx", entry->fsmagic);
                seq_printf(m, pt(Opt_fsmagic), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_FSNAME) {
                snprintf(tbuf, sizeof(tbuf), "%s", entry->fsname);
                seq_printf(m, pt(Opt_fsname), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_KEYRINGS) {
                seq_puts(m, "keyrings=");
                ima_show_rule_opt_list(m, entry->keyrings);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_LABEL) {
                seq_puts(m, "label=");
                ima_show_rule_opt_list(m, entry->label);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_PCR) {
                snprintf(tbuf, sizeof(tbuf), "%d", entry->pcr);
                seq_printf(m, pt(Opt_pcr), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_FSUUID) {
                seq_printf(m, "fsuuid=%pU", &entry->fsuuid);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_UID) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->uid));
                if (entry->uid_op == &uid_gt)
                        seq_printf(m, pt(Opt_uid_gt), tbuf);
                else if (entry->uid_op == &uid_lt)
                        seq_printf(m, pt(Opt_uid_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_uid_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_EUID) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->uid));
                if (entry->uid_op == &uid_gt)
                        seq_printf(m, pt(Opt_euid_gt), tbuf);
                else if (entry->uid_op == &uid_lt)
                        seq_printf(m, pt(Opt_euid_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_euid_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_GID) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->gid));
                if (entry->gid_op == &gid_gt)
                        seq_printf(m, pt(Opt_gid_gt), tbuf);
                else if (entry->gid_op == &gid_lt)
                        seq_printf(m, pt(Opt_gid_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_gid_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_EGID) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->gid));
                if (entry->gid_op == &gid_gt)
                        seq_printf(m, pt(Opt_egid_gt), tbuf);
                else if (entry->gid_op == &gid_lt)
                        seq_printf(m, pt(Opt_egid_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_egid_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_FOWNER) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->fowner));
                if (entry->fowner_op == &vfsuid_gt_kuid)
                        seq_printf(m, pt(Opt_fowner_gt), tbuf);
                else if (entry->fowner_op == &vfsuid_lt_kuid)
                        seq_printf(m, pt(Opt_fowner_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_fowner_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_FGROUP) {
                snprintf(tbuf, sizeof(tbuf), "%d", __kgid_val(entry->fgroup));
                if (entry->fgroup_op == &vfsgid_gt_kgid)
                        seq_printf(m, pt(Opt_fgroup_gt), tbuf);
                else if (entry->fgroup_op == &vfsgid_lt_kgid)
                        seq_printf(m, pt(Opt_fgroup_lt), tbuf);
                else
                        seq_printf(m, pt(Opt_fgroup_eq), tbuf);
                seq_puts(m, " ");
        }

        if (entry->flags & IMA_VALIDATE_ALGOS) {
                seq_puts(m, "appraise_algos=");
                ima_policy_show_appraise_algos(m, entry->allowed_algos);
                seq_puts(m, " ");
        }

        for (i = 0; i < MAX_LSM_RULES; i++) {
                if (entry->lsm[i].rule) {
                        switch (i) {
                        case LSM_OBJ_USER:
                                seq_printf(m, pt(Opt_obj_user),
                                           entry->lsm[i].args_p);
                                break;
                        case LSM_OBJ_ROLE:
                                seq_printf(m, pt(Opt_obj_role),
                                           entry->lsm[i].args_p);
                                break;
                        case LSM_OBJ_TYPE:
                                seq_printf(m, pt(Opt_obj_type),
                                           entry->lsm[i].args_p);
                                break;
                        case LSM_SUBJ_USER:
                                seq_printf(m, pt(Opt_subj_user),
                                           entry->lsm[i].args_p);
                                break;
                        case LSM_SUBJ_ROLE:
                                seq_printf(m, pt(Opt_subj_role),
                                           entry->lsm[i].args_p);
                                break;
                        case LSM_SUBJ_TYPE:
                                seq_printf(m, pt(Opt_subj_type),
                                           entry->lsm[i].args_p);
                                break;
                        }
                        seq_puts(m, " ");
                }
        }
        if (entry->template)
                seq_printf(m, "template=%s ", entry->template->name);
        if (entry->flags & IMA_DIGSIG_REQUIRED) {
                if (entry->flags & IMA_VERITY_REQUIRED)
                        seq_puts(m, "appraise_type=sigv3 ");
                else if (entry->flags & IMA_MODSIG_ALLOWED)
                        seq_puts(m, "appraise_type=imasig|modsig ");
                else
                        seq_puts(m, "appraise_type=imasig ");
        }
        if (entry->flags & IMA_VERITY_REQUIRED)
                seq_puts(m, "digest_type=verity ");
        if (entry->flags & IMA_PERMIT_DIRECTIO)
                seq_puts(m, "permit_directio ");
        rcu_read_unlock();
        seq_puts(m, "\n");
        return 0;
}
#endif        /* CONFIG_IMA_READ_POLICY */

#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
/*
 * ima_appraise_signature: whether IMA will appraise a given function using
 * an IMA digital signature. This is restricted to cases where the kernel
 * has a set of built-in trusted keys in order to avoid an attacker simply
 * loading additional keys.
 */
bool ima_appraise_signature(enum kernel_read_file_id id)
{
        struct ima_rule_entry *entry;
        bool found = false;
        enum ima_hooks func;
        struct list_head *ima_rules_tmp;

        if (id >= READING_MAX_ID)
                return false;

        if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
            && security_locked_down(LOCKDOWN_KEXEC))
                return false;

        func = read_idmap[id] ?: FILE_CHECK;

        rcu_read_lock();
        ima_rules_tmp = rcu_dereference(ima_rules);
        list_for_each_entry_rcu(entry, ima_rules_tmp, list) {
                if (entry->action != APPRAISE)
                        continue;

                /*
                 * A generic entry will match, but otherwise require that it
                 * match the func we're looking for
                 */
                if (entry->func && entry->func != func)
                        continue;

                /*
                 * We require this to be a digital signature, not a raw IMA
                 * hash.
                 */
                if (entry->flags & IMA_DIGSIG_REQUIRED)
                        found = true;

                /*
                 * We've found a rule that matches, so break now even if it
                 * didn't require a digital signature - a later rule that does
                 * won't override it, so would be a false positive.
                 */
                break;
        }

        rcu_read_unlock();
        return found;
}
#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */

































































































    1 







    1 






















    1 












































































































    1 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * IgorPlug-USB IR Receiver
 *
 * Copyright (C) 2014 Sean Young <sean@mess.org>
 *
 * Supports the standard homebrew IgorPlugUSB receiver with Igor's firmware.
 * See http://www.cesko.host.sk/IgorPlugUSB/IgorPlug-USB%20(AVR)_eng.htm
 *
 * Based on the lirc_igorplugusb.c driver:
 *        Copyright (C) 2004 Jan M. Hochstein
 *        <hochstein@algo.informatik.tu-darmstadt.de>
 */
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/input.h>
#include <media/rc-core.h>

#define DRIVER_DESC                "IgorPlug-USB IR Receiver"
#define DRIVER_NAME                "igorplugusb"

#define HEADERLEN                3
#define BUFLEN                        36
#define MAX_PACKET                (HEADERLEN + BUFLEN)

#define SET_INFRABUFFER_EMPTY        1
#define GET_INFRACODE                2


struct igorplugusb {
        struct rc_dev *rc;
        struct device *dev;

        struct urb *urb;
        struct usb_ctrlrequest request;

        struct timer_list timer;

        u8 *buf_in;

        char phys[64];
};

static void igorplugusb_cmd(struct igorplugusb *ir, int cmd);

static void igorplugusb_irdata(struct igorplugusb *ir, unsigned len)
{
        struct ir_raw_event rawir = {};
        unsigned i, start, overflow;

        dev_dbg(ir->dev, "irdata: %*ph (len=%u)", len, ir->buf_in, len);

        /*
         * If more than 36 pulses and spaces follow each other, the igorplugusb
         * overwrites its buffer from the beginning. The overflow value is the
         * last offset which was not overwritten. Everything from this offset
         * onwards occurred before everything until this offset.
         */
        overflow = ir->buf_in[2];
        i = start = overflow + HEADERLEN;

        if (start >= len) {
                dev_err(ir->dev, "receive overflow invalid: %u", overflow);
        } else {
                if (overflow > 0) {
                        dev_warn(ir->dev, "receive overflow, at least %u lost",
                                                                overflow);
                        ir_raw_event_overflow(ir->rc);
                }

                do {
                        rawir.duration = ir->buf_in[i] * 85;
                        rawir.pulse = i & 1;

                        ir_raw_event_store_with_filter(ir->rc, &rawir);

                        if (++i == len)
                                i = HEADERLEN;
                } while (i != start);

                /* add a trailing space */
                rawir.duration = ir->rc->timeout;
                rawir.pulse = false;
                ir_raw_event_store_with_filter(ir->rc, &rawir);

                ir_raw_event_handle(ir->rc);
        }

        igorplugusb_cmd(ir, SET_INFRABUFFER_EMPTY);
}

static void igorplugusb_callback(struct urb *urb)
{
        struct usb_ctrlrequest *req;
        struct igorplugusb *ir = urb->context;

        req = (struct usb_ctrlrequest *)urb->setup_packet;

        switch (urb->status) {
        case 0:
                if (req->bRequest == GET_INFRACODE &&
                                        urb->actual_length > HEADERLEN)
                        igorplugusb_irdata(ir, urb->actual_length);
                else /* request IR */
                        mod_timer(&ir->timer, jiffies + msecs_to_jiffies(50));
                break;
        case -EPROTO:
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        default:
                dev_warn(ir->dev, "Error: urb status = %d\n", urb->status);
                igorplugusb_cmd(ir, SET_INFRABUFFER_EMPTY);
                break;
        }
}

static void igorplugusb_cmd(struct igorplugusb *ir, int cmd)
{
        int ret;

        ir->request.bRequest = cmd;
        ir->urb->transfer_flags = 0;
        ret = usb_submit_urb(ir->urb, GFP_ATOMIC);
        if (ret && ret != -EPERM)
                dev_err(ir->dev, "submit urb failed: %d", ret);
}

static void igorplugusb_timer(struct timer_list *t)
{
        struct igorplugusb *ir = from_timer(ir, t, timer);

        igorplugusb_cmd(ir, GET_INFRACODE);
}

static int igorplugusb_probe(struct usb_interface *intf,
                                        const struct usb_device_id *id)
{
        struct usb_device *udev;
        struct usb_host_interface *idesc;
        struct usb_endpoint_descriptor *ep;
        struct igorplugusb *ir;
        struct rc_dev *rc;
        int ret = -ENOMEM;

        udev = interface_to_usbdev(intf);
        idesc = intf->cur_altsetting;

        if (idesc->desc.bNumEndpoints != 1) {
                dev_err(&intf->dev, "incorrect number of endpoints");
                return -ENODEV;
        }

        ep = &idesc->endpoint[0].desc;
        if (!usb_endpoint_dir_in(ep) || !usb_endpoint_xfer_control(ep)) {
                dev_err(&intf->dev, "endpoint incorrect");
                return -ENODEV;
        }

        ir = devm_kzalloc(&intf->dev, sizeof(*ir), GFP_KERNEL);
        if (!ir)
                return -ENOMEM;

        ir->dev = &intf->dev;

        timer_setup(&ir->timer, igorplugusb_timer, 0);

        ir->request.bRequest = GET_INFRACODE;
        ir->request.bRequestType = USB_TYPE_VENDOR | USB_DIR_IN;
        ir->request.wLength = cpu_to_le16(MAX_PACKET);

        ir->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!ir->urb)
                goto fail;

        ir->buf_in = kmalloc(MAX_PACKET, GFP_KERNEL);
        if (!ir->buf_in)
                goto fail;
        usb_fill_control_urb(ir->urb, udev,
                usb_rcvctrlpipe(udev, 0), (uint8_t *)&ir->request,
                ir->buf_in, MAX_PACKET, igorplugusb_callback, ir);

        usb_make_path(udev, ir->phys, sizeof(ir->phys));

        rc = rc_allocate_device(RC_DRIVER_IR_RAW);
        if (!rc)
                goto fail;

        rc->device_name = DRIVER_DESC;
        rc->input_phys = ir->phys;
        usb_to_input_id(udev, &rc->input_id);
        rc->dev.parent = &intf->dev;
        /*
         * This device can only store 36 pulses + spaces, which is not enough
         * for the NEC protocol and many others.
         */
        rc->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER &
                ~(RC_PROTO_BIT_NEC | RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 |
                  RC_PROTO_BIT_RC6_6A_20 | RC_PROTO_BIT_RC6_6A_24 |
                  RC_PROTO_BIT_RC6_6A_32 | RC_PROTO_BIT_RC6_MCE |
                  RC_PROTO_BIT_SONY20 | RC_PROTO_BIT_SANYO);

        rc->priv = ir;
        rc->driver_name = DRIVER_NAME;
        rc->map_name = RC_MAP_HAUPPAUGE;
        rc->timeout = MS_TO_US(100);
        rc->rx_resolution = 85;

        ir->rc = rc;
        ret = rc_register_device(rc);
        if (ret) {
                dev_err(&intf->dev, "failed to register rc device: %d", ret);
                goto fail;
        }

        usb_set_intfdata(intf, ir);

        igorplugusb_cmd(ir, SET_INFRABUFFER_EMPTY);

        return 0;
fail:
        usb_poison_urb(ir->urb);
        del_timer(&ir->timer);
        usb_unpoison_urb(ir->urb);
        usb_free_urb(ir->urb);
        rc_free_device(ir->rc);
        kfree(ir->buf_in);

        return ret;
}

static void igorplugusb_disconnect(struct usb_interface *intf)
{
        struct igorplugusb *ir = usb_get_intfdata(intf);

        rc_unregister_device(ir->rc);
        usb_poison_urb(ir->urb);
        del_timer_sync(&ir->timer);
        usb_set_intfdata(intf, NULL);
        usb_unpoison_urb(ir->urb);
        usb_free_urb(ir->urb);
        kfree(ir->buf_in);
}

static const struct usb_device_id igorplugusb_table[] = {
        /* Igor Plug USB (Atmel's Manufact. ID) */
        { USB_DEVICE(0x03eb, 0x0002) },
        /* Fit PC2 Infrared Adapter */
        { USB_DEVICE(0x03eb, 0x21fe) },
        /* Terminating entry */
        { }
};

static struct usb_driver igorplugusb_driver = {
        .name =        DRIVER_NAME,
        .probe = igorplugusb_probe,
        .disconnect = igorplugusb_disconnect,
        .id_table = igorplugusb_table
};

module_usb_driver(igorplugusb_driver);

MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_AUTHOR("Sean Young <sean@mess.org>");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(usb, igorplugusb_table);













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
/* SPDX-License-Identifier: GPL-2.0+ */
#ifndef _LINUX_OF_H
#define _LINUX_OF_H
/*
 * Definitions for talking to the Open Firmware PROM on
 * Power Macintosh and other computers.
 *
 * Copyright (C) 1996-2005 Paul Mackerras.
 *
 * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
 * Updates for SPARC64 by David S. Miller
 * Derived from PowerPC and Sparc prom.h files by Stephen Rothwell, IBM Corp.
 */
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/kobject.h>
#include <linux/mod_devicetable.h>
#include <linux/property.h>
#include <linux/list.h>

#include <asm/byteorder.h>

typedef u32 phandle;
typedef u32 ihandle;

struct property {
        char        *name;
        int        length;
        void        *value;
        struct property *next;
#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
        unsigned long _flags;
#endif
#if defined(CONFIG_OF_PROMTREE)
        unsigned int unique_id;
#endif
#if defined(CONFIG_OF_KOBJ)
        struct bin_attribute attr;
#endif
};

#if defined(CONFIG_SPARC)
struct of_irq_controller;
#endif

struct device_node {
        const char *name;
        phandle phandle;
        const char *full_name;
        struct fwnode_handle fwnode;

        struct        property *properties;
        struct        property *deadprops;        /* removed properties */
        struct        device_node *parent;
        struct        device_node *child;
        struct        device_node *sibling;
#if defined(CONFIG_OF_KOBJ)
        struct        kobject kobj;
#endif
        unsigned long _flags;
        void        *data;
#if defined(CONFIG_SPARC)
        unsigned int unique_id;
        struct of_irq_controller *irq_trans;
#endif
};

#define MAX_PHANDLE_ARGS 16
struct of_phandle_args {
        struct device_node *np;
        int args_count;
        uint32_t args[MAX_PHANDLE_ARGS];
};

struct of_phandle_iterator {
        /* Common iterator information */
        const char *cells_name;
        int cell_count;
        const struct device_node *parent;

        /* List size information */
        const __be32 *list_end;
        const __be32 *phandle_end;

        /* Current position state */
        const __be32 *cur;
        uint32_t cur_count;
        phandle phandle;
        struct device_node *node;
};

struct of_reconfig_data {
        struct device_node        *dn;
        struct property                *prop;
        struct property                *old_prop;
};

extern const struct kobj_type of_node_ktype;
extern const struct fwnode_operations of_fwnode_ops;

/**
 * of_node_init - initialize a devicetree node
 * @node: Pointer to device node that has been created by kzalloc()
 *
 * On return the device_node refcount is set to one.  Use of_node_put()
 * on @node when done to free the memory allocated for it.  If the node
 * is NOT a dynamic node the memory will not be freed. The decision of
 * whether to free the memory will be done by node->release(), which is
 * of_node_release().
 */
static inline void of_node_init(struct device_node *node)
{
#if defined(CONFIG_OF_KOBJ)
        kobject_init(&node->kobj, &of_node_ktype);
#endif
        fwnode_init(&node->fwnode, &of_fwnode_ops);
}

#if defined(CONFIG_OF_KOBJ)
#define of_node_kobj(n) (&(n)->kobj)
#else
#define of_node_kobj(n) NULL
#endif

#ifdef CONFIG_OF_DYNAMIC
extern struct device_node *of_node_get(struct device_node *node);
extern void of_node_put(struct device_node *node);
#else /* CONFIG_OF_DYNAMIC */
/* Dummy ref counting routines - to be implemented later */
static inline struct device_node *of_node_get(struct device_node *node)
{
        return node;
}
static inline void of_node_put(struct device_node *node) { }
#endif /* !CONFIG_OF_DYNAMIC */
DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T))

/* Pointer for first entry in chain of all nodes. */
extern struct device_node *of_root;
extern struct device_node *of_chosen;
extern struct device_node *of_aliases;
extern struct device_node *of_stdout;

/*
 * struct device_node flag descriptions
 * (need to be visible even when !CONFIG_OF)
 */
#define OF_DYNAMIC                1 /* (and properties) allocated via kmalloc */
#define OF_DETACHED                2 /* detached from the device tree */
#define OF_POPULATED                3 /* device already created */
#define OF_POPULATED_BUS        4 /* platform bus created for children */
#define OF_OVERLAY                5 /* allocated for an overlay */
#define OF_OVERLAY_FREE_CSET        6 /* in overlay cset being freed */

#define OF_BAD_ADDR        ((u64)-1)

#ifdef CONFIG_OF
void of_core_init(void);

static inline bool is_of_node(const struct fwnode_handle *fwnode)
{
        return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &of_fwnode_ops;
}

#define to_of_node(__fwnode)                                                \
        ({                                                                \
                typeof(__fwnode) __to_of_node_fwnode = (__fwnode);        \
                                                                        \
                is_of_node(__to_of_node_fwnode) ?                        \
                        container_of(__to_of_node_fwnode,                \
                                     struct device_node, fwnode) :        \
                        NULL;                                                \
        })

#define of_fwnode_handle(node)                                                \
        ({                                                                \
                typeof(node) __of_fwnode_handle_node = (node);                \
                                                                        \
                __of_fwnode_handle_node ?                                \
                        &__of_fwnode_handle_node->fwnode : NULL;        \
        })

static inline bool of_node_is_root(const struct device_node *node)
{
        return node && (node->parent == NULL);
}

static inline int of_node_check_flag(const struct device_node *n, unsigned long flag)
{
        return test_bit(flag, &n->_flags);
}

static inline int of_node_test_and_set_flag(struct device_node *n,
                                            unsigned long flag)
{
        return test_and_set_bit(flag, &n->_flags);
}

static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
{
        set_bit(flag, &n->_flags);
}

static inline void of_node_clear_flag(struct device_node *n, unsigned long flag)
{
        clear_bit(flag, &n->_flags);
}

#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC)
static inline int of_property_check_flag(const struct property *p, unsigned long flag)
{
        return test_bit(flag, &p->_flags);
}

static inline void of_property_set_flag(struct property *p, unsigned long flag)
{
        set_bit(flag, &p->_flags);
}

static inline void of_property_clear_flag(struct property *p, unsigned long flag)
{
        clear_bit(flag, &p->_flags);
}
#endif

extern struct device_node *__of_find_all_nodes(struct device_node *prev);
extern struct device_node *of_find_all_nodes(struct device_node *prev);

/*
 * OF address retrieval & translation
 */

/* Helper to read a big number; size is in cells (not bytes) */
static inline u64 of_read_number(const __be32 *cell, int size)
{
        u64 r = 0;
        for (; size--; cell++)
                r = (r << 32) | be32_to_cpu(*cell);
        return r;
}

/* Like of_read_number, but we want an unsigned long result */
static inline unsigned long of_read_ulong(const __be32 *cell, int size)
{
        /* toss away upper bits if unsigned long is smaller than u64 */
        return of_read_number(cell, size);
}

#if defined(CONFIG_SPARC)
#include <asm/prom.h>
#endif

#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)

extern bool of_node_name_eq(const struct device_node *np, const char *name);
extern bool of_node_name_prefix(const struct device_node *np, const char *prefix);

static inline const char *of_node_full_name(const struct device_node *np)
{
        return np ? np->full_name : "<no-node>";
}

#define for_each_of_allnodes_from(from, dn) \
        for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn))
#define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn)
extern struct device_node *of_find_node_by_name(struct device_node *from,
        const char *name);
extern struct device_node *of_find_node_by_type(struct device_node *from,
        const char *type);
extern struct device_node *of_find_compatible_node(struct device_node *from,
        const char *type, const char *compat);
extern struct device_node *of_find_matching_node_and_match(
        struct device_node *from,
        const struct of_device_id *matches,
        const struct of_device_id **match);

extern struct device_node *of_find_node_opts_by_path(const char *path,
        const char **opts);
static inline struct device_node *of_find_node_by_path(const char *path)
{
        return of_find_node_opts_by_path(path, NULL);
}

extern struct device_node *of_find_node_by_phandle(phandle handle);
extern struct device_node *of_get_parent(const struct device_node *node);
extern struct device_node *of_get_next_parent(struct device_node *node);
extern struct device_node *of_get_next_child(const struct device_node *node,
                                             struct device_node *prev);
extern struct device_node *of_get_next_available_child(
        const struct device_node *node, struct device_node *prev);
extern struct device_node *of_get_next_reserved_child(
        const struct device_node *node, struct device_node *prev);

extern struct device_node *of_get_compatible_child(const struct device_node *parent,
                                        const char *compatible);
extern struct device_node *of_get_child_by_name(const struct device_node *node,
                                        const char *name);

/* cache lookup */
extern struct device_node *of_find_next_cache_node(const struct device_node *);
extern int of_find_last_cache_level(unsigned int cpu);
extern struct device_node *of_find_node_with_property(
        struct device_node *from, const char *prop_name);

extern struct property *of_find_property(const struct device_node *np,
                                         const char *name,
                                         int *lenp);
extern int of_property_count_elems_of_size(const struct device_node *np,
                                const char *propname, int elem_size);
extern int of_property_read_u32_index(const struct device_node *np,
                                       const char *propname,
                                       u32 index, u32 *out_value);
extern int of_property_read_u64_index(const struct device_node *np,
                                       const char *propname,
                                       u32 index, u64 *out_value);
extern int of_property_read_variable_u8_array(const struct device_node *np,
                                        const char *propname, u8 *out_values,
                                        size_t sz_min, size_t sz_max);
extern int of_property_read_variable_u16_array(const struct device_node *np,
                                        const char *propname, u16 *out_values,
                                        size_t sz_min, size_t sz_max);
extern int of_property_read_variable_u32_array(const struct device_node *np,
                                        const char *propname,
                                        u32 *out_values,
                                        size_t sz_min,
                                        size_t sz_max);
extern int of_property_read_u64(const struct device_node *np,
                                const char *propname, u64 *out_value);
extern int of_property_read_variable_u64_array(const struct device_node *np,
                                        const char *propname,
                                        u64 *out_values,
                                        size_t sz_min,
                                        size_t sz_max);

extern int of_property_read_string(const struct device_node *np,
                                   const char *propname,
                                   const char **out_string);
extern int of_property_match_string(const struct device_node *np,
                                    const char *propname,
                                    const char *string);
extern int of_property_read_string_helper(const struct device_node *np,
                                              const char *propname,
                                              const char **out_strs, size_t sz, int index);
extern int of_device_is_compatible(const struct device_node *device,
                                   const char *);
extern int of_device_compatible_match(const struct device_node *device,
                                      const char *const *compat);
extern bool of_device_is_available(const struct device_node *device);
extern bool of_device_is_big_endian(const struct device_node *device);
extern const void *of_get_property(const struct device_node *node,
                                const char *name,
                                int *lenp);
extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
extern struct device_node *of_cpu_device_node_get(int cpu);
extern int of_cpu_node_to_id(struct device_node *np);
extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
                                                 int index);
extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread);

extern int of_n_addr_cells(struct device_node *np);
extern int of_n_size_cells(struct device_node *np);
extern const struct of_device_id *of_match_node(
        const struct of_device_id *matches, const struct device_node *node);
extern const void *of_device_get_match_data(const struct device *dev);
extern int of_alias_from_compatible(const struct device_node *node, char *alias,
                                    int len);
extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
extern int __of_parse_phandle_with_args(const struct device_node *np,
        const char *list_name, const char *cells_name, int cell_count,
        int index, struct of_phandle_args *out_args);
extern int of_parse_phandle_with_args_map(const struct device_node *np,
        const char *list_name, const char *stem_name, int index,
        struct of_phandle_args *out_args);
extern int of_count_phandle_with_args(const struct device_node *np,
        const char *list_name, const char *cells_name);

/* module functions */
extern ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len);
extern int of_request_module(const struct device_node *np);

/* phandle iterator functions */
extern int of_phandle_iterator_init(struct of_phandle_iterator *it,
                                    const struct device_node *np,
                                    const char *list_name,
                                    const char *cells_name,
                                    int cell_count);

extern int of_phandle_iterator_next(struct of_phandle_iterator *it);
extern int of_phandle_iterator_args(struct of_phandle_iterator *it,
                                    uint32_t *args,
                                    int size);

extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
extern int of_alias_get_id(struct device_node *np, const char *stem);
extern int of_alias_get_highest_id(const char *stem);

bool of_machine_compatible_match(const char *const *compats);

/**
 * of_machine_is_compatible - Test root of device tree for a given compatible value
 * @compat: compatible string to look for in root node's compatible property.
 *
 * Return: true if the root node has the given value in its compatible property.
 */
static inline bool of_machine_is_compatible(const char *compat)
{
        const char *compats[] = { compat, NULL };

        return of_machine_compatible_match(compats);
}

extern int of_add_property(struct device_node *np, struct property *prop);
extern int of_remove_property(struct device_node *np, struct property *prop);
extern int of_update_property(struct device_node *np, struct property *newprop);

/* For updating the device tree at runtime */
#define OF_RECONFIG_ATTACH_NODE                0x0001
#define OF_RECONFIG_DETACH_NODE                0x0002
#define OF_RECONFIG_ADD_PROPERTY        0x0003
#define OF_RECONFIG_REMOVE_PROPERTY        0x0004
#define OF_RECONFIG_UPDATE_PROPERTY        0x0005

extern int of_attach_node(struct device_node *);
extern int of_detach_node(struct device_node *);

#define of_match_ptr(_ptr)        (_ptr)

/*
 * struct property *prop;
 * const __be32 *p;
 * u32 u;
 *
 * of_property_for_each_u32(np, "propname", prop, p, u)
 *         printk("U32 value: %x\n", u);
 */
const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
                               u32 *pu);
/*
 * struct property *prop;
 * const char *s;
 *
 * of_property_for_each_string(np, "propname", prop, s)
 *         printk("String value: %s\n", s);
 */
const char *of_prop_next_string(struct property *prop, const char *cur);

bool of_console_check(struct device_node *dn, char *name, int index);

int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out);

phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);

struct kimage;
void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
                                   unsigned long initrd_load_addr,
                                   unsigned long initrd_len,
                                   const char *cmdline, size_t extra_fdt_size);
#else /* CONFIG_OF */

static inline void of_core_init(void)
{
}

static inline bool is_of_node(const struct fwnode_handle *fwnode)
{
        return false;
}

static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode)
{
        return NULL;
}

static inline bool of_node_name_eq(const struct device_node *np, const char *name)
{
        return false;
}

static inline bool of_node_name_prefix(const struct device_node *np, const char *prefix)
{
        return false;
}

static inline const char* of_node_full_name(const struct device_node *np)
{
        return "<no-node>";
}

static inline struct device_node *of_find_node_by_name(struct device_node *from,
        const char *name)
{
        return NULL;
}

static inline struct device_node *of_find_node_by_type(struct device_node *from,
        const char *type)
{
        return NULL;
}

static inline struct device_node *of_find_matching_node_and_match(
        struct device_node *from,
        const struct of_device_id *matches,
        const struct of_device_id **match)
{
        return NULL;
}

static inline struct device_node *of_find_node_by_path(const char *path)
{
        return NULL;
}

static inline struct device_node *of_find_node_opts_by_path(const char *path,
        const char **opts)
{
        return NULL;
}

static inline struct device_node *of_find_node_by_phandle(phandle handle)
{
        return NULL;
}

static inline struct device_node *of_get_parent(const struct device_node *node)
{
        return NULL;
}

static inline struct device_node *of_get_next_parent(struct device_node *node)
{
        return NULL;
}

static inline struct device_node *of_get_next_child(
        const struct device_node *node, struct device_node *prev)
{
        return NULL;
}

static inline struct device_node *of_get_next_available_child(
        const struct device_node *node, struct device_node *prev)
{
        return NULL;
}

static inline struct device_node *of_get_next_reserved_child(
        const struct device_node *node, struct device_node *prev)
{
        return NULL;
}

static inline struct device_node *of_find_node_with_property(
        struct device_node *from, const char *prop_name)
{
        return NULL;
}

#define of_fwnode_handle(node) NULL

static inline struct device_node *of_get_compatible_child(const struct device_node *parent,
                                        const char *compatible)
{
        return NULL;
}

static inline struct device_node *of_get_child_by_name(
                                        const struct device_node *node,
                                        const char *name)
{
        return NULL;
}

static inline int of_device_is_compatible(const struct device_node *device,
                                          const char *name)
{
        return 0;
}

static inline  int of_device_compatible_match(const struct device_node *device,
                                              const char *const *compat)
{
        return 0;
}

static inline bool of_device_is_available(const struct device_node *device)
{
        return false;
}

static inline bool of_device_is_big_endian(const struct device_node *device)
{
        return false;
}

static inline struct property *of_find_property(const struct device_node *np,
                                                const char *name,
                                                int *lenp)
{
        return NULL;
}

static inline struct device_node *of_find_compatible_node(
                                                struct device_node *from,
                                                const char *type,
                                                const char *compat)
{
        return NULL;
}

static inline int of_property_count_elems_of_size(const struct device_node *np,
                        const char *propname, int elem_size)
{
        return -ENOSYS;
}

static inline int of_property_read_u32_index(const struct device_node *np,
                        const char *propname, u32 index, u32 *out_value)
{
        return -ENOSYS;
}

static inline int of_property_read_u64_index(const struct device_node *np,
                        const char *propname, u32 index, u64 *out_value)
{
        return -ENOSYS;
}

static inline const void *of_get_property(const struct device_node *node,
                                const char *name,
                                int *lenp)
{
        return NULL;
}

static inline struct device_node *of_get_cpu_node(int cpu,
                                        unsigned int *thread)
{
        return NULL;
}

static inline struct device_node *of_cpu_device_node_get(int cpu)
{
        return NULL;
}

static inline int of_cpu_node_to_id(struct device_node *np)
{
        return -ENODEV;
}

static inline struct device_node *of_get_next_cpu_node(struct device_node *prev)
{
        return NULL;
}

static inline struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
                                        int index)
{
        return NULL;
}

static inline int of_n_addr_cells(struct device_node *np)
{
        return 0;

}
static inline int of_n_size_cells(struct device_node *np)
{
        return 0;
}

static inline int of_property_read_variable_u8_array(const struct device_node *np,
                                        const char *propname, u8 *out_values,
                                        size_t sz_min, size_t sz_max)
{
        return -ENOSYS;
}

static inline int of_property_read_variable_u16_array(const struct device_node *np,
                                        const char *propname, u16 *out_values,
                                        size_t sz_min, size_t sz_max)
{
        return -ENOSYS;
}

static inline int of_property_read_variable_u32_array(const struct device_node *np,
                                        const char *propname,
                                        u32 *out_values,
                                        size_t sz_min,
                                        size_t sz_max)
{
        return -ENOSYS;
}

static inline int of_property_read_u64(const struct device_node *np,
                                       const char *propname, u64 *out_value)
{
        return -ENOSYS;
}

static inline int of_property_read_variable_u64_array(const struct device_node *np,
                                        const char *propname,
                                        u64 *out_values,
                                        size_t sz_min,
                                        size_t sz_max)
{
        return -ENOSYS;
}

static inline int of_property_read_string(const struct device_node *np,
                                          const char *propname,
                                          const char **out_string)
{
        return -ENOSYS;
}

static inline int of_property_match_string(const struct device_node *np,
                                           const char *propname,
                                           const char *string)
{
        return -ENOSYS;
}

static inline int of_property_read_string_helper(const struct device_node *np,
                                                 const char *propname,
                                                 const char **out_strs, size_t sz, int index)
{
        return -ENOSYS;
}

static inline int __of_parse_phandle_with_args(const struct device_node *np,
                                               const char *list_name,
                                               const char *cells_name,
                                               int cell_count,
                                               int index,
                                               struct of_phandle_args *out_args)
{
        return -ENOSYS;
}

static inline int of_parse_phandle_with_args_map(const struct device_node *np,
                                                 const char *list_name,
                                                 const char *stem_name,
                                                 int index,
                                                 struct of_phandle_args *out_args)
{
        return -ENOSYS;
}

static inline int of_count_phandle_with_args(const struct device_node *np,
                                             const char *list_name,
                                             const char *cells_name)
{
        return -ENOSYS;
}

static inline ssize_t of_modalias(const struct device_node *np, char *str,
                                  ssize_t len)
{
        return -ENODEV;
}

static inline int of_request_module(const struct device_node *np)
{
        return -ENODEV;
}

static inline int of_phandle_iterator_init(struct of_phandle_iterator *it,
                                           const struct device_node *np,
                                           const char *list_name,
                                           const char *cells_name,
                                           int cell_count)
{
        return -ENOSYS;
}

static inline int of_phandle_iterator_next(struct of_phandle_iterator *it)
{
        return -ENOSYS;
}

static inline int of_phandle_iterator_args(struct of_phandle_iterator *it,
                                           uint32_t *args,
                                           int size)
{
        return 0;
}

static inline int of_alias_get_id(struct device_node *np, const char *stem)
{
        return -ENOSYS;
}

static inline int of_alias_get_highest_id(const char *stem)
{
        return -ENOSYS;
}

static inline int of_machine_is_compatible(const char *compat)
{
        return 0;
}

static inline int of_add_property(struct device_node *np, struct property *prop)
{
        return 0;
}

static inline int of_remove_property(struct device_node *np, struct property *prop)
{
        return 0;
}

static inline bool of_machine_compatible_match(const char *const *compats)
{
        return false;
}

static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
{
        return false;
}

static inline const __be32 *of_prop_next_u32(struct property *prop,
                const __be32 *cur, u32 *pu)
{
        return NULL;
}

static inline const char *of_prop_next_string(struct property *prop,
                const char *cur)
{
        return NULL;
}

static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
{
        return 0;
}

static inline int of_node_test_and_set_flag(struct device_node *n,
                                            unsigned long flag)
{
        return 0;
}

static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
{
}

static inline void of_node_clear_flag(struct device_node *n, unsigned long flag)
{
}

static inline int of_property_check_flag(const struct property *p,
                                         unsigned long flag)
{
        return 0;
}

static inline void of_property_set_flag(struct property *p, unsigned long flag)
{
}

static inline void of_property_clear_flag(struct property *p, unsigned long flag)
{
}

static inline int of_map_id(struct device_node *np, u32 id,
                             const char *map_name, const char *map_mask_name,
                             struct device_node **target, u32 *id_out)
{
        return -EINVAL;
}

static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
{
        return PHYS_ADDR_MAX;
}

static inline const void *of_device_get_match_data(const struct device *dev)
{
        return NULL;
}

#define of_match_ptr(_ptr)        NULL
#define of_match_node(_matches, _node)        NULL
#endif /* CONFIG_OF */

/* Default string compare functions, Allow arch asm/prom.h to override */
#if !defined(of_compat_cmp)
#define of_compat_cmp(s1, s2, l)        strcasecmp((s1), (s2))
#define of_prop_cmp(s1, s2)                strcmp((s1), (s2))
#define of_node_cmp(s1, s2)                strcasecmp((s1), (s2))
#endif

static inline int of_prop_val_eq(struct property *p1, struct property *p2)
{
        return p1->length == p2->length &&
               !memcmp(p1->value, p2->value, (size_t)p1->length);
}

#define for_each_property_of_node(dn, pp) \
        for (pp = dn->properties; pp != NULL; pp = pp->next)

#if defined(CONFIG_OF) && defined(CONFIG_NUMA)
extern int of_node_to_nid(struct device_node *np);
#else
static inline int of_node_to_nid(struct device_node *device)
{
        return NUMA_NO_NODE;
}
#endif

#ifdef CONFIG_OF_NUMA
extern int of_numa_init(void);
#else
static inline int of_numa_init(void)
{
        return -ENOSYS;
}
#endif

static inline struct device_node *of_find_matching_node(
        struct device_node *from,
        const struct of_device_id *matches)
{
        return of_find_matching_node_and_match(from, matches, NULL);
}

static inline const char *of_node_get_device_type(const struct device_node *np)
{
        return of_get_property(np, "device_type", NULL);
}

static inline bool of_node_is_type(const struct device_node *np, const char *type)
{
        const char *match = of_node_get_device_type(np);

        return np && match && type && !strcmp(match, type);
}

/**
 * of_parse_phandle - Resolve a phandle property to a device_node pointer
 * @np: Pointer to device node holding phandle property
 * @phandle_name: Name of property holding a phandle value
 * @index: For properties holding a table of phandles, this is the index into
 *         the table
 *
 * Return: The device_node pointer with refcount incremented.  Use
 * of_node_put() on it when done.
 */
static inline struct device_node *of_parse_phandle(const struct device_node *np,
                                                   const char *phandle_name,
                                                   int index)
{
        struct of_phandle_args args;

        if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
                                         index, &args))
                return NULL;

        return args.np;
}

/**
 * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
 * @np:                pointer to a device tree node containing a list
 * @list_name:        property name that contains a list
 * @cells_name:        property name that specifies phandles' arguments count
 * @index:        index of a phandle to parse out
 * @out_args:        optional pointer to output arguments structure (will be filled)
 *
 * This function is useful to parse lists of phandles and their arguments.
 * Returns 0 on success and fills out_args, on error returns appropriate
 * errno value.
 *
 * Caller is responsible to call of_node_put() on the returned out_args->np
 * pointer.
 *
 * Example::
 *
 *  phandle1: node1 {
 *        #list-cells = <2>;
 *  };
 *
 *  phandle2: node2 {
 *        #list-cells = <1>;
 *  };
 *
 *  node3 {
 *        list = <&phandle1 1 2 &phandle2 3>;
 *  };
 *
 * To get a device_node of the ``node2`` node you may call this:
 * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
 */
static inline int of_parse_phandle_with_args(const struct device_node *np,
                                             const char *list_name,
                                             const char *cells_name,
                                             int index,
                                             struct of_phandle_args *out_args)
{
        int cell_count = -1;

        /* If cells_name is NULL we assume a cell count of 0 */
        if (!cells_name)
                cell_count = 0;

        return __of_parse_phandle_with_args(np, list_name, cells_name,
                                            cell_count, index, out_args);
}

/**
 * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
 * @np:                pointer to a device tree node containing a list
 * @list_name:        property name that contains a list
 * @cell_count: number of argument cells following the phandle
 * @index:        index of a phandle to parse out
 * @out_args:        optional pointer to output arguments structure (will be filled)
 *
 * This function is useful to parse lists of phandles and their arguments.
 * Returns 0 on success and fills out_args, on error returns appropriate
 * errno value.
 *
 * Caller is responsible to call of_node_put() on the returned out_args->np
 * pointer.
 *
 * Example::
 *
 *  phandle1: node1 {
 *  };
 *
 *  phandle2: node2 {
 *  };
 *
 *  node3 {
 *        list = <&phandle1 0 2 &phandle2 2 3>;
 *  };
 *
 * To get a device_node of the ``node2`` node you may call this:
 * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
 */
static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
                                                   const char *list_name,
                                                   int cell_count,
                                                   int index,
                                                   struct of_phandle_args *out_args)
{
        return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
                                            index, out_args);
}

/**
 * of_parse_phandle_with_optional_args() - Find a node pointed by phandle in a list
 * @np:                pointer to a device tree node containing a list
 * @list_name:        property name that contains a list
 * @cells_name:        property name that specifies phandles' arguments count
 * @index:        index of a phandle to parse out
 * @out_args:        optional pointer to output arguments structure (will be filled)
 *
 * Same as of_parse_phandle_with_args() except that if the cells_name property
 * is not found, cell_count of 0 is assumed.
 *
 * This is used to useful, if you have a phandle which didn't have arguments
 * before and thus doesn't have a '#*-cells' property but is now migrated to
 * having arguments while retaining backwards compatibility.
 */
static inline int of_parse_phandle_with_optional_args(const struct device_node *np,
                                                      const char *list_name,
                                                      const char *cells_name,
                                                      int index,
                                                      struct of_phandle_args *out_args)
{
        return __of_parse_phandle_with_args(np, list_name, cells_name,
                                            0, index, out_args);
}

/**
 * of_phandle_args_equal() - Compare two of_phandle_args
 * @a1:                First of_phandle_args to compare
 * @a2:                Second of_phandle_args to compare
 *
 * Return: True if a1 and a2 are the same (same node pointer, same phandle
 * args), false otherwise.
 */
static inline bool of_phandle_args_equal(const struct of_phandle_args *a1,
                                         const struct of_phandle_args *a2)
{
        return a1->np == a2->np &&
               a1->args_count == a2->args_count &&
               !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count);
}

/**
 * of_property_count_u8_elems - Count the number of u8 elements in a property
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a property in a device node and count the number of u8 elements
 * in it.
 *
 * Return: The number of elements on sucess, -EINVAL if the property does
 * not exist or its length does not match a multiple of u8 and -ENODATA if the
 * property does not have a value.
 */
static inline int of_property_count_u8_elems(const struct device_node *np,
                                const char *propname)
{
        return of_property_count_elems_of_size(np, propname, sizeof(u8));
}

/**
 * of_property_count_u16_elems - Count the number of u16 elements in a property
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a property in a device node and count the number of u16 elements
 * in it.
 *
 * Return: The number of elements on sucess, -EINVAL if the property does
 * not exist or its length does not match a multiple of u16 and -ENODATA if the
 * property does not have a value.
 */
static inline int of_property_count_u16_elems(const struct device_node *np,
                                const char *propname)
{
        return of_property_count_elems_of_size(np, propname, sizeof(u16));
}

/**
 * of_property_count_u32_elems - Count the number of u32 elements in a property
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a property in a device node and count the number of u32 elements
 * in it.
 *
 * Return: The number of elements on sucess, -EINVAL if the property does
 * not exist or its length does not match a multiple of u32 and -ENODATA if the
 * property does not have a value.
 */
static inline int of_property_count_u32_elems(const struct device_node *np,
                                const char *propname)
{
        return of_property_count_elems_of_size(np, propname, sizeof(u32));
}

/**
 * of_property_count_u64_elems - Count the number of u64 elements in a property
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a property in a device node and count the number of u64 elements
 * in it.
 *
 * Return: The number of elements on sucess, -EINVAL if the property does
 * not exist or its length does not match a multiple of u64 and -ENODATA if the
 * property does not have a value.
 */
static inline int of_property_count_u64_elems(const struct device_node *np,
                                const char *propname)
{
        return of_property_count_elems_of_size(np, propname, sizeof(u64));
}

/**
 * of_property_read_string_array() - Read an array of strings from a multiple
 * strings property.
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @out_strs:        output array of string pointers.
 * @sz:                number of array elements to read.
 *
 * Search for a property in a device tree node and retrieve a list of
 * terminated string values (pointer to data, not a copy) in that property.
 *
 * Return: If @out_strs is NULL, the number of strings in the property is returned.
 */
static inline int of_property_read_string_array(const struct device_node *np,
                                                const char *propname, const char **out_strs,
                                                size_t sz)
{
        return of_property_read_string_helper(np, propname, out_strs, sz, 0);
}

/**
 * of_property_count_strings() - Find and return the number of strings from a
 * multiple strings property.
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a property in a device tree node and retrieve the number of null
 * terminated string contain in it.
 *
 * Return: The number of strings on success, -EINVAL if the property does not
 * exist, -ENODATA if property does not have a value, and -EILSEQ if the string
 * is not null-terminated within the length of the property data.
 */
static inline int of_property_count_strings(const struct device_node *np,
                                            const char *propname)
{
        return of_property_read_string_helper(np, propname, NULL, 0, 0);
}

/**
 * of_property_read_string_index() - Find and read a string from a multiple
 * strings property.
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @index:        index of the string in the list of strings
 * @output:        pointer to null terminated return string, modified only if
 *                return value is 0.
 *
 * Search for a property in a device tree node and retrieve a null
 * terminated string value (pointer to data, not a copy) in the list of strings
 * contained in that property.
 *
 * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if
 * property does not have a value, and -EILSEQ if the string is not
 * null-terminated within the length of the property data.
 *
 * The out_string pointer is modified only if a valid string can be decoded.
 */
static inline int of_property_read_string_index(const struct device_node *np,
                                                const char *propname,
                                                int index, const char **output)
{
        int rc = of_property_read_string_helper(np, propname, output, 1, index);
        return rc < 0 ? rc : 0;
}

/**
 * of_property_read_bool - Find a property
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 *
 * Search for a boolean property in a device node. Usage on non-boolean
 * property types is deprecated.
 *
 * Return: true if the property exists false otherwise.
 */
static inline bool of_property_read_bool(const struct device_node *np,
                                         const char *propname)
{
        struct property *prop = of_find_property(np, propname, NULL);

        return prop ? true : false;
}

/**
 * of_property_present - Test if a property is present in a node
 * @np:                device node to search for the property.
 * @propname:        name of the property to be searched.
 *
 * Test for a property present in a device node.
 *
 * Return: true if the property exists false otherwise.
 */
static inline bool of_property_present(const struct device_node *np, const char *propname)
{
        return of_property_read_bool(np, propname);
}

/**
 * of_property_read_u8_array - Find and read an array of u8 from a property.
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @out_values:        pointer to return value, modified only if return value is 0.
 * @sz:                number of array elements to read
 *
 * Search for a property in a device node and read 8-bit value(s) from
 * it.
 *
 * dts entry of array should be like:
 *  ``property = /bits/ 8 <0x50 0x60 0x70>;``
 *
 * Return: 0 on success, -EINVAL if the property does not exist,
 * -ENODATA if property does not have a value, and -EOVERFLOW if the
 * property data isn't large enough.
 *
 * The out_values is modified only if a valid u8 value can be decoded.
 */
static inline int of_property_read_u8_array(const struct device_node *np,
                                            const char *propname,
                                            u8 *out_values, size_t sz)
{
        int ret = of_property_read_variable_u8_array(np, propname, out_values,
                                                     sz, 0);
        if (ret >= 0)
                return 0;
        else
                return ret;
}

/**
 * of_property_read_u16_array - Find and read an array of u16 from a property.
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @out_values:        pointer to return value, modified only if return value is 0.
 * @sz:                number of array elements to read
 *
 * Search for a property in a device node and read 16-bit value(s) from
 * it.
 *
 * dts entry of array should be like:
 *  ``property = /bits/ 16 <0x5000 0x6000 0x7000>;``
 *
 * Return: 0 on success, -EINVAL if the property does not exist,
 * -ENODATA if property does not have a value, and -EOVERFLOW if the
 * property data isn't large enough.
 *
 * The out_values is modified only if a valid u16 value can be decoded.
 */
static inline int of_property_read_u16_array(const struct device_node *np,
                                             const char *propname,
                                             u16 *out_values, size_t sz)
{
        int ret = of_property_read_variable_u16_array(np, propname, out_values,
                                                      sz, 0);
        if (ret >= 0)
                return 0;
        else
                return ret;
}

/**
 * of_property_read_u32_array - Find and read an array of 32 bit integers
 * from a property.
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @out_values:        pointer to return value, modified only if return value is 0.
 * @sz:                number of array elements to read
 *
 * Search for a property in a device node and read 32-bit value(s) from
 * it.
 *
 * Return: 0 on success, -EINVAL if the property does not exist,
 * -ENODATA if property does not have a value, and -EOVERFLOW if the
 * property data isn't large enough.
 *
 * The out_values is modified only if a valid u32 value can be decoded.
 */
static inline int of_property_read_u32_array(const struct device_node *np,
                                             const char *propname,
                                             u32 *out_values, size_t sz)
{
        int ret = of_property_read_variable_u32_array(np, propname, out_values,
                                                      sz, 0);
        if (ret >= 0)
                return 0;
        else
                return ret;
}

/**
 * of_property_read_u64_array - Find and read an array of 64 bit integers
 * from a property.
 *
 * @np:                device node from which the property value is to be read.
 * @propname:        name of the property to be searched.
 * @out_values:        pointer to return value, modified only if return value is 0.
 * @sz:                number of array elements to read
 *
 * Search for a property in a device node and read 64-bit value(s) from
 * it.
 *
 * Return: 0 on success, -EINVAL if the property does not exist,
 * -ENODATA if property does not have a value, and -EOVERFLOW if the
 * property data isn't large enough.
 *
 * The out_values is modified only if a valid u64 value can be decoded.
 */
static inline int of_property_read_u64_array(const struct device_node *np,
                                             const char *propname,
                                             u64 *out_values, size_t sz)
{
        int ret = of_property_read_variable_u64_array(np, propname, out_values,
                                                      sz, 0);
        if (ret >= 0)
                return 0;
        else
                return ret;
}

static inline int of_property_read_u8(const struct device_node *np,
                                       const char *propname,
                                       u8 *out_value)
{
        return of_property_read_u8_array(np, propname, out_value, 1);
}

static inline int of_property_read_u16(const struct device_node *np,
                                       const char *propname,
                                       u16 *out_value)
{
        return of_property_read_u16_array(np, propname, out_value, 1);
}

static inline int of_property_read_u32(const struct device_node *np,
                                       const char *propname,
                                       u32 *out_value)
{
        return of_property_read_u32_array(np, propname, out_value, 1);
}

static inline int of_property_read_s32(const struct device_node *np,
                                       const char *propname,
                                       s32 *out_value)
{
        return of_property_read_u32(np, propname, (u32*) out_value);
}

#define of_for_each_phandle(it, err, np, ln, cn, cc)                        \
        for (of_phandle_iterator_init((it), (np), (ln), (cn), (cc)),        \
             err = of_phandle_iterator_next(it);                        \
             err == 0;                                                        \
             err = of_phandle_iterator_next(it))

#define of_property_for_each_u32(np, propname, prop, p, u)        \
        for (prop = of_find_property(np, propname, NULL),        \
                p = of_prop_next_u32(prop, NULL, &u);                \
                p;                                                \
                p = of_prop_next_u32(prop, p, &u))

#define of_property_for_each_string(np, propname, prop, s)        \
        for (prop = of_find_property(np, propname, NULL),        \
                s = of_prop_next_string(prop, NULL);                \
                s;                                                \
                s = of_prop_next_string(prop, s))

#define for_each_node_by_name(dn, name) \
        for (dn = of_find_node_by_name(NULL, name); dn; \
             dn = of_find_node_by_name(dn, name))
#define for_each_node_by_type(dn, type) \
        for (dn = of_find_node_by_type(NULL, type); dn; \
             dn = of_find_node_by_type(dn, type))
#define for_each_compatible_node(dn, type, compatible) \
        for (dn = of_find_compatible_node(NULL, type, compatible); dn; \
             dn = of_find_compatible_node(dn, type, compatible))
#define for_each_matching_node(dn, matches) \
        for (dn = of_find_matching_node(NULL, matches); dn; \
             dn = of_find_matching_node(dn, matches))
#define for_each_matching_node_and_match(dn, matches, match) \
        for (dn = of_find_matching_node_and_match(NULL, matches, match); \
             dn; dn = of_find_matching_node_and_match(dn, matches, match))

#define for_each_child_of_node(parent, child) \
        for (child = of_get_next_child(parent, NULL); child != NULL; \
             child = of_get_next_child(parent, child))

#define for_each_child_of_node_scoped(parent, child) \
        for (struct device_node *child __free(device_node) =                \
             of_get_next_child(parent, NULL);                                \
             child != NULL;                                                \
             child = of_get_next_child(parent, child))

#define for_each_available_child_of_node(parent, child) \
        for (child = of_get_next_available_child(parent, NULL); child != NULL; \
             child = of_get_next_available_child(parent, child))
#define for_each_reserved_child_of_node(parent, child)                        \
        for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \
             child = of_get_next_reserved_child(parent, child))

#define for_each_available_child_of_node_scoped(parent, child) \
        for (struct device_node *child __free(device_node) =                \
             of_get_next_available_child(parent, NULL);                        \
             child != NULL;                                                \
             child = of_get_next_available_child(parent, child))

#define for_each_of_cpu_node(cpu) \
        for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
             cpu = of_get_next_cpu_node(cpu))

#define for_each_node_with_property(dn, prop_name) \
        for (dn = of_find_node_with_property(NULL, prop_name); dn; \
             dn = of_find_node_with_property(dn, prop_name))

static inline int of_get_child_count(const struct device_node *np)
{
        struct device_node *child;
        int num = 0;

        for_each_child_of_node(np, child)
                num++;

        return num;
}

static inline int of_get_available_child_count(const struct device_node *np)
{
        struct device_node *child;
        int num = 0;

        for_each_available_child_of_node(np, child)
                num++;

        return num;
}

#define _OF_DECLARE_STUB(table, name, compat, fn, fn_type)                \
        static const struct of_device_id __of_table_##name                \
                __attribute__((unused))                                        \
                 = { .compatible = compat,                                \
                     .data = (fn == (fn_type)NULL) ? fn : fn }

#if defined(CONFIG_OF) && !defined(MODULE)
#define _OF_DECLARE(table, name, compat, fn, fn_type)                        \
        static const struct of_device_id __of_table_##name                \
                __used __section("__" #table "_of_table")                \
                __aligned(__alignof__(struct of_device_id))                \
                 = { .compatible = compat,                                \
                     .data = (fn == (fn_type)NULL) ? fn : fn  }
#else
#define _OF_DECLARE(table, name, compat, fn, fn_type)                        \
        _OF_DECLARE_STUB(table, name, compat, fn, fn_type)
#endif

typedef int (*of_init_fn_2)(struct device_node *, struct device_node *);
typedef int (*of_init_fn_1_ret)(struct device_node *);
typedef void (*of_init_fn_1)(struct device_node *);

#define OF_DECLARE_1(table, name, compat, fn) \
                _OF_DECLARE(table, name, compat, fn, of_init_fn_1)
#define OF_DECLARE_1_RET(table, name, compat, fn) \
                _OF_DECLARE(table, name, compat, fn, of_init_fn_1_ret)
#define OF_DECLARE_2(table, name, compat, fn) \
                _OF_DECLARE(table, name, compat, fn, of_init_fn_2)

/**
 * struct of_changeset_entry        - Holds a changeset entry
 *
 * @node:        list_head for the log list
 * @action:        notifier action
 * @np:                pointer to the device node affected
 * @prop:        pointer to the property affected
 * @old_prop:        hold a pointer to the original property
 *
 * Every modification of the device tree during a changeset
 * is held in a list of of_changeset_entry structures.
 * That way we can recover from a partial application, or we can
 * revert the changeset
 */
struct of_changeset_entry {
        struct list_head node;
        unsigned long action;
        struct device_node *np;
        struct property *prop;
        struct property *old_prop;
};

/**
 * struct of_changeset - changeset tracker structure
 *
 * @entries:        list_head for the changeset entries
 *
 * changesets are a convenient way to apply bulk changes to the
 * live tree. In case of an error, changes are rolled-back.
 * changesets live on after initial application, and if not
 * destroyed after use, they can be reverted in one single call.
 */
struct of_changeset {
        struct list_head entries;
};

enum of_reconfig_change {
        OF_RECONFIG_NO_CHANGE = 0,
        OF_RECONFIG_CHANGE_ADD,
        OF_RECONFIG_CHANGE_REMOVE,
};

struct notifier_block;

#ifdef CONFIG_OF_DYNAMIC
extern int of_reconfig_notifier_register(struct notifier_block *);
extern int of_reconfig_notifier_unregister(struct notifier_block *);
extern int of_reconfig_notify(unsigned long, struct of_reconfig_data *rd);
extern int of_reconfig_get_state_change(unsigned long action,
                                        struct of_reconfig_data *arg);

extern void of_changeset_init(struct of_changeset *ocs);
extern void of_changeset_destroy(struct of_changeset *ocs);
extern int of_changeset_apply(struct of_changeset *ocs);
extern int of_changeset_revert(struct of_changeset *ocs);
extern int of_changeset_action(struct of_changeset *ocs,
                unsigned long action, struct device_node *np,
                struct property *prop);

static inline int of_changeset_attach_node(struct of_changeset *ocs,
                struct device_node *np)
{
        return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL);
}

static inline int of_changeset_detach_node(struct of_changeset *ocs,
                struct device_node *np)
{
        return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL);
}

static inline int of_changeset_add_property(struct of_changeset *ocs,
                struct device_node *np, struct property *prop)
{
        return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop);
}

static inline int of_changeset_remove_property(struct of_changeset *ocs,
                struct device_node *np, struct property *prop)
{
        return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop);
}

static inline int of_changeset_update_property(struct of_changeset *ocs,
                struct device_node *np, struct property *prop)
{
        return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
}

struct device_node *of_changeset_create_node(struct of_changeset *ocs,
                                             struct device_node *parent,
                                             const char *full_name);
int of_changeset_add_prop_string(struct of_changeset *ocs,
                                 struct device_node *np,
                                 const char *prop_name, const char *str);
int of_changeset_add_prop_string_array(struct of_changeset *ocs,
                                       struct device_node *np,
                                       const char *prop_name,
                                       const char **str_array, size_t sz);
int of_changeset_add_prop_u32_array(struct of_changeset *ocs,
                                    struct device_node *np,
                                    const char *prop_name,
                                    const u32 *array, size_t sz);
static inline int of_changeset_add_prop_u32(struct of_changeset *ocs,
                                            struct device_node *np,
                                            const char *prop_name,
                                            const u32 val)
{
        return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1);
}

#else /* CONFIG_OF_DYNAMIC */
static inline int of_reconfig_notifier_register(struct notifier_block *nb)
{
        return -EINVAL;
}
static inline int of_reconfig_notifier_unregister(struct notifier_block *nb)
{
        return -EINVAL;
}
static inline int of_reconfig_notify(unsigned long action,
                                     struct of_reconfig_data *arg)
{
        return -EINVAL;
}
static inline int of_reconfig_get_state_change(unsigned long action,
                                                struct of_reconfig_data *arg)
{
        return -EINVAL;
}
#endif /* CONFIG_OF_DYNAMIC */

/**
 * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node
 * @np: Pointer to the given device_node
 *
 * Return: true if present false otherwise
 */
static inline bool of_device_is_system_power_controller(const struct device_node *np)
{
        return of_property_read_bool(np, "system-power-controller");
}

/**
 * of_have_populated_dt() - Has DT been populated by bootloader
 *
 * Return: True if a DTB has been populated by the bootloader and it isn't the
 * empty builtin one. False otherwise.
 */
static inline bool of_have_populated_dt(void)
{
#ifdef CONFIG_OF
        return of_property_present(of_root, "compatible");
#else
        return false;
#endif
}

/*
 * Overlay support
 */

enum of_overlay_notify_action {
        OF_OVERLAY_INIT = 0,        /* kzalloc() of ovcs sets this value */
        OF_OVERLAY_PRE_APPLY,
        OF_OVERLAY_POST_APPLY,
        OF_OVERLAY_PRE_REMOVE,
        OF_OVERLAY_POST_REMOVE,
};

static inline const char *of_overlay_action_name(enum of_overlay_notify_action action)
{
        static const char *const of_overlay_action_name[] = {
                "init",
                "pre-apply",
                "post-apply",
                "pre-remove",
                "post-remove",
        };

        return of_overlay_action_name[action];
}

struct of_overlay_notify_data {
        struct device_node *overlay;
        struct device_node *target;
};

#ifdef CONFIG_OF_OVERLAY

int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
                         int *ovcs_id, struct device_node *target_base);
int of_overlay_remove(int *ovcs_id);
int of_overlay_remove_all(void);

int of_overlay_notifier_register(struct notifier_block *nb);
int of_overlay_notifier_unregister(struct notifier_block *nb);

#else

static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
                                       int *ovcs_id, struct device_node *target_base)
{
        return -ENOTSUPP;
}

static inline int of_overlay_remove(int *ovcs_id)
{
        return -ENOTSUPP;
}

static inline int of_overlay_remove_all(void)
{
        return -ENOTSUPP;
}

static inline int of_overlay_notifier_register(struct notifier_block *nb)
{
        return 0;
}

static inline int of_overlay_notifier_unregister(struct notifier_block *nb)
{
        return 0;
}

#endif

#endif /* _LINUX_OF_H */




































































































    2 
    2 

    2 
















    2 

    2 


    2 


































































































































































































    2 

































   14 

















   14 













   14 


   14 

   14 

   14 

   14 



    2 
   14 


    2 




    2 



















    2 
















   14 

   14 
   14 


   14 












   14 




































































































































































































































































































































































































   14 
   14 










   14 















    2 









    2 









    2 


    2 

    2 





    2 
    2 



    2 
    2 

































































































































































































































































































































































































































































































































































































































































































































































































































































   11 

   11 

   11 
   11 


   11 
   11 




































































































































































































































































































































































































































































































































































































































































































































































































































    2 




    2 

    2 

    2 













    2 


    2 

    2 
    2 





    2 






    2 


    2 




    2 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 







    2 






    2 































































































































































































































































































































































   12 

   12 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Generic address resolution entity
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *        Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
 *
 *        Fixes:
 *        Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
 *        Harald Welte                Add neighbour cache statistics like rtstat
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/slab.h>
#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
#include <net/net_namespace.h>
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
#include <linux/string.h>
#include <linux/log2.h>
#include <linux/inetdevice.h>
#include <net/addrconf.h>

#include <trace/events/neigh.h>

#define NEIGH_DEBUG 1
#define neigh_dbg(level, fmt, ...)                \
do {                                                \
        if (level <= NEIGH_DEBUG)                \
                pr_debug(fmt, ##__VA_ARGS__);        \
} while (0)

#define PNEIGH_HASHMASK                0xF

static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
                           u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                                    struct net_device *dev);

#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
#endif

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.
 */

static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
{
        kfree_skb(skb);
        return -ENETDOWN;
}

static void neigh_cleanup_and_release(struct neighbour *neigh)
{
        trace_neigh_cleanup_and_release(neigh, 0);
        __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
        neigh_release(neigh);
}

/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
        return base ? get_random_u32_below(base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);

static void neigh_mark_dead(struct neighbour *n)
{
        n->dead = 1;
        if (!list_empty(&n->gc_list)) {
                list_del_init(&n->gc_list);
                atomic_dec(&n->tbl->gc_entries);
        }
        if (!list_empty(&n->managed_list))
                list_del_init(&n->managed_list);
}

static void neigh_update_gc_list(struct neighbour *n)
{
        bool on_gc_list, exempt_from_gc;

        write_lock_bh(&n->tbl->lock);
        write_lock(&n->lock);
        if (n->dead)
                goto out;

        /* remove from the gc list if new state is permanent or if neighbor
         * is externally learned; otherwise entry should be on the gc list
         */
        exempt_from_gc = n->nud_state & NUD_PERMANENT ||
                         n->flags & NTF_EXT_LEARNED;
        on_gc_list = !list_empty(&n->gc_list);

        if (exempt_from_gc && on_gc_list) {
                list_del_init(&n->gc_list);
                atomic_dec(&n->tbl->gc_entries);
        } else if (!exempt_from_gc && !on_gc_list) {
                /* add entries to the tail; cleaning removes from the front */
                list_add_tail(&n->gc_list, &n->tbl->gc_list);
                atomic_inc(&n->tbl->gc_entries);
        }
out:
        write_unlock(&n->lock);
        write_unlock_bh(&n->tbl->lock);
}

static void neigh_update_managed_list(struct neighbour *n)
{
        bool on_managed_list, add_to_managed;

        write_lock_bh(&n->tbl->lock);
        write_lock(&n->lock);
        if (n->dead)
                goto out;

        add_to_managed = n->flags & NTF_MANAGED;
        on_managed_list = !list_empty(&n->managed_list);

        if (!add_to_managed && on_managed_list)
                list_del_init(&n->managed_list);
        else if (add_to_managed && !on_managed_list)
                list_add_tail(&n->managed_list, &n->tbl->managed_list);
out:
        write_unlock(&n->lock);
        write_unlock_bh(&n->tbl->lock);
}

static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
                               bool *gc_update, bool *managed_update)
{
        u32 ndm_flags, old_flags = neigh->flags;

        if (!(flags & NEIGH_UPDATE_F_ADMIN))
                return;

        ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
        ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;

        if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
                if (ndm_flags & NTF_EXT_LEARNED)
                        neigh->flags |= NTF_EXT_LEARNED;
                else
                        neigh->flags &= ~NTF_EXT_LEARNED;
                *notify = 1;
                *gc_update = true;
        }
        if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
                if (ndm_flags & NTF_MANAGED)
                        neigh->flags |= NTF_MANAGED;
                else
                        neigh->flags &= ~NTF_MANAGED;
                *notify = 1;
                *managed_update = true;
        }
}

static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
                      struct neigh_table *tbl)
{
        bool retval = false;

        write_lock(&n->lock);
        if (refcount_read(&n->refcnt) == 1) {
                struct neighbour *neigh;

                neigh = rcu_dereference_protected(n->next,
                                                  lockdep_is_held(&tbl->lock));
                rcu_assign_pointer(*np, neigh);
                neigh_mark_dead(n);
                retval = true;
        }
        write_unlock(&n->lock);
        if (retval)
                neigh_cleanup_and_release(n);
        return retval;
}

bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
{
        struct neigh_hash_table *nht;
        void *pkey = ndel->primary_key;
        u32 hash_val;
        struct neighbour *n;
        struct neighbour __rcu **np;

        nht = rcu_dereference_protected(tbl->nht,
                                        lockdep_is_held(&tbl->lock));
        hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
        hash_val = hash_val >> (32 - nht->hash_shift);

        np = &nht->hash_buckets[hash_val];
        while ((n = rcu_dereference_protected(*np,
                                              lockdep_is_held(&tbl->lock)))) {
                if (n == ndel)
                        return neigh_del(n, np, tbl);
                np = &n->next;
        }
        return false;
}

static int neigh_forced_gc(struct neigh_table *tbl)
{
        int max_clean = atomic_read(&tbl->gc_entries) -
                        READ_ONCE(tbl->gc_thresh2);
        u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
        unsigned long tref = jiffies - 5 * HZ;
        struct neighbour *n, *tmp;
        int shrunk = 0;
        int loop = 0;

        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

        write_lock_bh(&tbl->lock);

        list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
                if (refcount_read(&n->refcnt) == 1) {
                        bool remove = false;

                        write_lock(&n->lock);
                        if ((n->nud_state == NUD_FAILED) ||
                            (n->nud_state == NUD_NOARP) ||
                            (tbl->is_multicast &&
                             tbl->is_multicast(n->primary_key)) ||
                            !time_in_range(n->updated, tref, jiffies))
                                remove = true;
                        write_unlock(&n->lock);

                        if (remove && neigh_remove_one(n, tbl))
                                shrunk++;
                        if (shrunk >= max_clean)
                                break;
                        if (++loop == 16) {
                                if (ktime_get_ns() > tmax)
                                        goto unlock;
                                loop = 0;
                        }
                }
        }

        WRITE_ONCE(tbl->last_flush, jiffies);
unlock:
        write_unlock_bh(&tbl->lock);

        return shrunk;
}

static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
        /* Use safe distance from the jiffies - LONG_MAX point while timer
         * is running in DELAY/PROBE state but still show to user space
         * large times in the past.
         */
        unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);

        neigh_hold(n);
        if (!time_in_range(n->confirmed, mint, jiffies))
                n->confirmed = mint;
        if (time_before(n->used, n->confirmed))
                n->used = n->confirmed;
        if (unlikely(mod_timer(&n->timer, when))) {
                printk("NEIGH: BUG, double timer add, state is %x\n",
                       n->nud_state);
                dump_stack();
        }
}

static int neigh_del_timer(struct neighbour *n)
{
        if ((n->nud_state & NUD_IN_TIMER) &&
            del_timer(&n->timer)) {
                neigh_release(n);
                return 1;
        }
        return 0;
}

static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
                                                   int family)
{
        switch (family) {
        case AF_INET:
                return __in_dev_arp_parms_get_rcu(dev);
        case AF_INET6:
                return __in6_dev_nd_parms_get_rcu(dev);
        }
        return NULL;
}

static void neigh_parms_qlen_dec(struct net_device *dev, int family)
{
        struct neigh_parms *p;

        rcu_read_lock();
        p = neigh_get_dev_parms_rcu(dev, family);
        if (p)
                p->qlen--;
        rcu_read_unlock();
}

static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
                               int family)
{
        struct sk_buff_head tmp;
        unsigned long flags;
        struct sk_buff *skb;

        skb_queue_head_init(&tmp);
        spin_lock_irqsave(&list->lock, flags);
        skb = skb_peek(list);
        while (skb != NULL) {
                struct sk_buff *skb_next = skb_peek_next(skb, list);
                struct net_device *dev = skb->dev;

                if (net == NULL || net_eq(dev_net(dev), net)) {
                        neigh_parms_qlen_dec(dev, family);
                        __skb_unlink(skb, list);
                        __skb_queue_tail(&tmp, skb);
                }
                skb = skb_next;
        }
        spin_unlock_irqrestore(&list->lock, flags);

        while ((skb = __skb_dequeue(&tmp))) {
                dev_put(skb->dev);
                kfree_skb(skb);
        }
}

static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
                            bool skip_perm)
{
        int i;
        struct neigh_hash_table *nht;

        nht = rcu_dereference_protected(tbl->nht,
                                        lockdep_is_held(&tbl->lock));

        for (i = 0; i < (1 << nht->hash_shift); i++) {
                struct neighbour *n;
                struct neighbour __rcu **np = &nht->hash_buckets[i];

                while ((n = rcu_dereference_protected(*np,
                                        lockdep_is_held(&tbl->lock))) != NULL) {
                        if (dev && n->dev != dev) {
                                np = &n->next;
                                continue;
                        }
                        if (skip_perm && n->nud_state & NUD_PERMANENT) {
                                np = &n->next;
                                continue;
                        }
                        rcu_assign_pointer(*np,
                                   rcu_dereference_protected(n->next,
                                                lockdep_is_held(&tbl->lock)));
                        write_lock(&n->lock);
                        neigh_del_timer(n);
                        neigh_mark_dead(n);
                        if (refcount_read(&n->refcnt) != 1) {
                                /* The most unpleasant situation.
                                   We must destroy neighbour entry,
                                   but someone still uses it.

                                   The destroy will be delayed until
                                   the last user releases us, but
                                   we must kill timers etc. and move
                                   it to safe state.
                                 */
                                __skb_queue_purge(&n->arp_queue);
                                n->arp_queue_len_bytes = 0;
                                WRITE_ONCE(n->output, neigh_blackhole);
                                if (n->nud_state & NUD_VALID)
                                        n->nud_state = NUD_NOARP;
                                else
                                        n->nud_state = NUD_NONE;
                                neigh_dbg(2, "neigh %p is stray\n", n);
                        }
                        write_unlock(&n->lock);
                        neigh_cleanup_and_release(n);
                }
        }
}

void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
        write_lock_bh(&tbl->lock);
        neigh_flush_dev(tbl, dev, false);
        write_unlock_bh(&tbl->lock);
}
EXPORT_SYMBOL(neigh_changeaddr);

static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
                          bool skip_perm)
{
        write_lock_bh(&tbl->lock);
        neigh_flush_dev(tbl, dev, skip_perm);
        pneigh_ifdown_and_unlock(tbl, dev);
        pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
                           tbl->family);
        if (skb_queue_empty_lockless(&tbl->proxy_queue))
                del_timer_sync(&tbl->proxy_timer);
        return 0;
}

int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
{
        __neigh_ifdown(tbl, dev, true);
        return 0;
}
EXPORT_SYMBOL(neigh_carrier_down);

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
        __neigh_ifdown(tbl, dev, false);
        return 0;
}
EXPORT_SYMBOL(neigh_ifdown);

static struct neighbour *neigh_alloc(struct neigh_table *tbl,
                                     struct net_device *dev,
                                     u32 flags, bool exempt_from_gc)
{
        struct neighbour *n = NULL;
        unsigned long now = jiffies;
        int entries, gc_thresh3;

        if (exempt_from_gc)
                goto do_alloc;

        entries = atomic_inc_return(&tbl->gc_entries) - 1;
        gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
        if (entries >= gc_thresh3 ||
            (entries >= READ_ONCE(tbl->gc_thresh2) &&
             time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
                if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
                        net_info_ratelimited("%s: neighbor table overflow!\n",
                                             tbl->id);
                        NEIGH_CACHE_STAT_INC(tbl, table_fulls);
                        goto out_entries;
                }
        }

do_alloc:
        n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
        if (!n)
                goto out_entries;

        __skb_queue_head_init(&n->arp_queue);
        rwlock_init(&n->lock);
        seqlock_init(&n->ha_lock);
        n->updated          = n->used = now;
        n->nud_state          = NUD_NONE;
        n->output          = neigh_blackhole;
        n->flags          = flags;
        seqlock_init(&n->hh.hh_lock);
        n->parms          = neigh_parms_clone(&tbl->parms);
        timer_setup(&n->timer, neigh_timer_handler, 0);

        NEIGH_CACHE_STAT_INC(tbl, allocs);
        n->tbl                  = tbl;
        refcount_set(&n->refcnt, 1);
        n->dead                  = 1;
        INIT_LIST_HEAD(&n->gc_list);
        INIT_LIST_HEAD(&n->managed_list);

        atomic_inc(&tbl->entries);
out:
        return n;

out_entries:
        if (!exempt_from_gc)
                atomic_dec(&tbl->gc_entries);
        goto out;
}

static void neigh_get_hash_rnd(u32 *x)
{
        *x = get_random_u32() | 1;
}

static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
        size_t size = (1 << shift) * sizeof(struct neighbour *);
        struct neigh_hash_table *ret;
        struct neighbour __rcu **buckets;
        int i;

        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
        if (!ret)
                return NULL;
        if (size <= PAGE_SIZE) {
                buckets = kzalloc(size, GFP_ATOMIC);
        } else {
                buckets = (struct neighbour __rcu **)
                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
                                           get_order(size));
                kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
        }
        if (!buckets) {
                kfree(ret);
                return NULL;
        }
        ret->hash_buckets = buckets;
        ret->hash_shift = shift;
        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
                neigh_get_hash_rnd(&ret->hash_rnd[i]);
        return ret;
}

static void neigh_hash_free_rcu(struct rcu_head *head)
{
        struct neigh_hash_table *nht = container_of(head,
                                                    struct neigh_hash_table,
                                                    rcu);
        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
        struct neighbour __rcu **buckets = nht->hash_buckets;

        if (size <= PAGE_SIZE) {
                kfree(buckets);
        } else {
                kmemleak_free(buckets);
                free_pages((unsigned long)buckets, get_order(size));
        }
        kfree(nht);
}

static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
                                                unsigned long new_shift)
{
        unsigned int i, hash;
        struct neigh_hash_table *new_nht, *old_nht;

        NEIGH_CACHE_STAT_INC(tbl, hash_grows);

        old_nht = rcu_dereference_protected(tbl->nht,
                                            lockdep_is_held(&tbl->lock));
        new_nht = neigh_hash_alloc(new_shift);
        if (!new_nht)
                return old_nht;

        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
                struct neighbour *n, *next;

                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
                                                   lockdep_is_held(&tbl->lock));
                     n != NULL;
                     n = next) {
                        hash = tbl->hash(n->primary_key, n->dev,
                                         new_nht->hash_rnd);

                        hash >>= (32 - new_nht->hash_shift);
                        next = rcu_dereference_protected(n->next,
                                                lockdep_is_held(&tbl->lock));

                        rcu_assign_pointer(n->next,
                                           rcu_dereference_protected(
                                                new_nht->hash_buckets[hash],
                                                lockdep_is_held(&tbl->lock)));
                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
                }
        }

        rcu_assign_pointer(tbl->nht, new_nht);
        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
        return new_nht;
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
                               struct net_device *dev)
{
        struct neighbour *n;

        NEIGH_CACHE_STAT_INC(tbl, lookups);

        rcu_read_lock();
        n = __neigh_lookup_noref(tbl, pkey, dev);
        if (n) {
                if (!refcount_inc_not_zero(&n->refcnt))
                        n = NULL;
                NEIGH_CACHE_STAT_INC(tbl, hits);
        }

        rcu_read_unlock();
        return n;
}
EXPORT_SYMBOL(neigh_lookup);

static struct neighbour *
___neigh_create(struct neigh_table *tbl, const void *pkey,
                struct net_device *dev, u32 flags,
                bool exempt_from_gc, bool want_ref)
{
        u32 hash_val, key_len = tbl->key_len;
        struct neighbour *n1, *rc, *n;
        struct neigh_hash_table *nht;
        int error;

        n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
        trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
        if (!n) {
                rc = ERR_PTR(-ENOBUFS);
                goto out;
        }

        memcpy(n->primary_key, pkey, key_len);
        n->dev = dev;
        netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);

        /* Protocol specific setup. */
        if (tbl->constructor &&        (error = tbl->constructor(n)) < 0) {
                rc = ERR_PTR(error);
                goto out_neigh_release;
        }

        if (dev->netdev_ops->ndo_neigh_construct) {
                error = dev->netdev_ops->ndo_neigh_construct(dev, n);
                if (error < 0) {
                        rc = ERR_PTR(error);
                        goto out_neigh_release;
                }
        }

        /* Device specific setup. */
        if (n->parms->neigh_setup &&
            (error = n->parms->neigh_setup(n)) < 0) {
                rc = ERR_PTR(error);
                goto out_neigh_release;
        }

        n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);

        write_lock_bh(&tbl->lock);
        nht = rcu_dereference_protected(tbl->nht,
                                        lockdep_is_held(&tbl->lock));

        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);

        hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);

        if (n->parms->dead) {
                rc = ERR_PTR(-EINVAL);
                goto out_tbl_unlock;
        }

        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
                                            lockdep_is_held(&tbl->lock));
             n1 != NULL;
             n1 = rcu_dereference_protected(n1->next,
                        lockdep_is_held(&tbl->lock))) {
                if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
                        if (want_ref)
                                neigh_hold(n1);
                        rc = n1;
                        goto out_tbl_unlock;
                }
        }

        n->dead = 0;
        if (!exempt_from_gc)
                list_add_tail(&n->gc_list, &n->tbl->gc_list);
        if (n->flags & NTF_MANAGED)
                list_add_tail(&n->managed_list, &n->tbl->managed_list);
        if (want_ref)
                neigh_hold(n);
        rcu_assign_pointer(n->next,
                           rcu_dereference_protected(nht->hash_buckets[hash_val],
                                                     lockdep_is_held(&tbl->lock)));
        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
        write_unlock_bh(&tbl->lock);
        neigh_dbg(2, "neigh %p is created\n", n);
        rc = n;
out:
        return rc;
out_tbl_unlock:
        write_unlock_bh(&tbl->lock);
out_neigh_release:
        if (!exempt_from_gc)
                atomic_dec(&tbl->gc_entries);
        neigh_release(n);
        goto out;
}

struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
                                 struct net_device *dev, bool want_ref)
{
        return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
}
EXPORT_SYMBOL(__neigh_create);

static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
        u32 hash_val = *(u32 *)(pkey + key_len - 4);
        hash_val ^= (hash_val >> 16);
        hash_val ^= hash_val >> 8;
        hash_val ^= hash_val >> 4;
        hash_val &= PNEIGH_HASHMASK;
        return hash_val;
}

static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
                                              struct net *net,
                                              const void *pkey,
                                              unsigned int key_len,
                                              struct net_device *dev)
{
        while (n) {
                if (!memcmp(n->key, pkey, key_len) &&
                    net_eq(pneigh_net(n), net) &&
                    (n->dev == dev || !n->dev))
                        return n;
                n = n->next;
        }
        return NULL;
}

struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
                struct net *net, const void *pkey, struct net_device *dev)
{
        unsigned int key_len = tbl->key_len;
        u32 hash_val = pneigh_hash(pkey, key_len);

        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
                                 net, pkey, key_len, dev);
}
EXPORT_SYMBOL_GPL(__pneigh_lookup);

struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
                                    struct net *net, const void *pkey,
                                    struct net_device *dev, int creat)
{
        struct pneigh_entry *n;
        unsigned int key_len = tbl->key_len;
        u32 hash_val = pneigh_hash(pkey, key_len);

        read_lock_bh(&tbl->lock);
        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
                              net, pkey, key_len, dev);
        read_unlock_bh(&tbl->lock);

        if (n || !creat)
                goto out;

        ASSERT_RTNL();

        n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
        if (!n)
                goto out;

        write_pnet(&n->net, net);
        memcpy(n->key, pkey, key_len);
        n->dev = dev;
        netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);

        if (tbl->pconstructor && tbl->pconstructor(n)) {
                netdev_put(dev, &n->dev_tracker);
                kfree(n);
                n = NULL;
                goto out;
        }

        write_lock_bh(&tbl->lock);
        n->next = tbl->phash_buckets[hash_val];
        tbl->phash_buckets[hash_val] = n;
        write_unlock_bh(&tbl->lock);
out:
        return n;
}
EXPORT_SYMBOL(pneigh_lookup);


int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                  struct net_device *dev)
{
        struct pneigh_entry *n, **np;
        unsigned int key_len = tbl->key_len;
        u32 hash_val = pneigh_hash(pkey, key_len);

        write_lock_bh(&tbl->lock);
        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
             np = &n->next) {
                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
                    net_eq(pneigh_net(n), net)) {
                        *np = n->next;
                        write_unlock_bh(&tbl->lock);
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
                        netdev_put(n->dev, &n->dev_tracker);
                        kfree(n);
                        return 0;
                }
        }
        write_unlock_bh(&tbl->lock);
        return -ENOENT;
}

static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                                    struct net_device *dev)
{
        struct pneigh_entry *n, **np, *freelist = NULL;
        u32 h;

        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
                np = &tbl->phash_buckets[h];
                while ((n = *np) != NULL) {
                        if (!dev || n->dev == dev) {
                                *np = n->next;
                                n->next = freelist;
                                freelist = n;
                                continue;
                        }
                        np = &n->next;
                }
        }
        write_unlock_bh(&tbl->lock);
        while ((n = freelist)) {
                freelist = n->next;
                n->next = NULL;
                if (tbl->pdestructor)
                        tbl->pdestructor(n);
                netdev_put(n->dev, &n->dev_tracker);
                kfree(n);
        }
        return -ENOENT;
}

static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
        if (refcount_dec_and_test(&parms->refcnt))
                neigh_parms_destroy(parms);
}

/*
 *        neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
        struct net_device *dev = neigh->dev;

        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

        if (!neigh->dead) {
                pr_warn("Destroying alive neighbour %p\n", neigh);
                dump_stack();
                return;
        }

        if (neigh_del_timer(neigh))
                pr_warn("Impossible event\n");

        write_lock_bh(&neigh->lock);
        __skb_queue_purge(&neigh->arp_queue);
        write_unlock_bh(&neigh->lock);
        neigh->arp_queue_len_bytes = 0;

        if (dev->netdev_ops->ndo_neigh_destroy)
                dev->netdev_ops->ndo_neigh_destroy(dev, neigh);

        netdev_put(dev, &neigh->dev_tracker);
        neigh_parms_put(neigh->parms);

        neigh_dbg(2, "neigh %p is destroyed\n", neigh);

        atomic_dec(&neigh->tbl->entries);
        kfree_rcu(neigh, rcu);
}
EXPORT_SYMBOL(neigh_destroy);

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
        neigh_dbg(2, "neigh %p is suspected\n", neigh);

        WRITE_ONCE(neigh->output, neigh->ops->output);
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
        neigh_dbg(2, "neigh %p is connected\n", neigh);

        WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}

static void neigh_periodic_work(struct work_struct *work)
{
        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
        struct neighbour *n;
        struct neighbour __rcu **np;
        unsigned int i;
        struct neigh_hash_table *nht;

        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

        write_lock_bh(&tbl->lock);
        nht = rcu_dereference_protected(tbl->nht,
                                        lockdep_is_held(&tbl->lock));

        /*
         *        periodically recompute ReachableTime from random function
         */

        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
                struct neigh_parms *p;

                WRITE_ONCE(tbl->last_rand, jiffies);
                list_for_each_entry(p, &tbl->parms_list, list)
                        p->reachable_time =
                                neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
        }

        if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
                goto out;

        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
                np = &nht->hash_buckets[i];

                while ((n = rcu_dereference_protected(*np,
                                lockdep_is_held(&tbl->lock))) != NULL) {
                        unsigned int state;

                        write_lock(&n->lock);

                        state = n->nud_state;
                        if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
                            (n->flags & NTF_EXT_LEARNED)) {
                                write_unlock(&n->lock);
                                goto next_elt;
                        }

                        if (time_before(n->used, n->confirmed) &&
                            time_is_before_eq_jiffies(n->confirmed))
                                n->used = n->confirmed;

                        if (refcount_read(&n->refcnt) == 1 &&
                            (state == NUD_FAILED ||
                             !time_in_range_open(jiffies, n->used,
                                                 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
                                rcu_assign_pointer(*np,
                                        rcu_dereference_protected(n->next,
                                                lockdep_is_held(&tbl->lock)));
                                neigh_mark_dead(n);
                                write_unlock(&n->lock);
                                neigh_cleanup_and_release(n);
                                continue;
                        }
                        write_unlock(&n->lock);

next_elt:
                        np = &n->next;
                }
                /*
                 * It's fine to release lock here, even if hash table
                 * grows while we are preempted.
                 */
                write_unlock_bh(&tbl->lock);
                cond_resched();
                write_lock_bh(&tbl->lock);
                nht = rcu_dereference_protected(tbl->nht,
                                                lockdep_is_held(&tbl->lock));
        }
out:
        /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
         * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
         * BASE_REACHABLE_TIME.
         */
        queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
                              NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
        write_unlock_bh(&tbl->lock);
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
        struct neigh_parms *p = n->parms;
        return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
               (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
                NEIGH_VAR(p, MCAST_PROBES));
}

static void neigh_invalidate(struct neighbour *neigh)
        __releases(neigh->lock)
        __acquires(neigh->lock)
{
        struct sk_buff *skb;

        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
        neigh_dbg(2, "neigh %p is failed\n", neigh);
        neigh->updated = jiffies;

        /* It is very thin place. report_unreachable is very complicated
           routine. Particularly, it can hit the same neighbour entry!

           So that, we try to be accurate and avoid dead loop. --ANK
         */
        while (neigh->nud_state == NUD_FAILED &&
               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
                write_unlock(&neigh->lock);
                neigh->ops->error_report(neigh, skb);
                write_lock(&neigh->lock);
        }
        __skb_queue_purge(&neigh->arp_queue);
        neigh->arp_queue_len_bytes = 0;
}

static void neigh_probe(struct neighbour *neigh)
        __releases(neigh->lock)
{
        struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
        /* keep skb alive even if arp_queue overflows */
        if (skb)
                skb = skb_clone(skb, GFP_ATOMIC);
        write_unlock(&neigh->lock);
        if (neigh->ops->solicit)
                neigh->ops->solicit(neigh, skb);
        atomic_inc(&neigh->probes);
        consume_skb(skb);
}

/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(struct timer_list *t)
{
        unsigned long now, next;
        struct neighbour *neigh = from_timer(neigh, t, timer);
        unsigned int state;
        int notify = 0;

        write_lock(&neigh->lock);

        state = neigh->nud_state;
        now = jiffies;
        next = now + HZ;

        if (!(state & NUD_IN_TIMER))
                goto out;

        if (state & NUD_REACHABLE) {
                if (time_before_eq(now,
                                   neigh->confirmed + neigh->parms->reachable_time)) {
                        neigh_dbg(2, "neigh %p is still alive\n", neigh);
                        next = neigh->confirmed + neigh->parms->reachable_time;
                } else if (time_before_eq(now,
                                          neigh->used +
                                          NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
                        neigh_dbg(2, "neigh %p is delayed\n", neigh);
                        WRITE_ONCE(neigh->nud_state, NUD_DELAY);
                        neigh->updated = jiffies;
                        neigh_suspect(neigh);
                        next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
                } else {
                        neigh_dbg(2, "neigh %p is suspected\n", neigh);
                        WRITE_ONCE(neigh->nud_state, NUD_STALE);
                        neigh->updated = jiffies;
                        neigh_suspect(neigh);
                        notify = 1;
                }
        } else if (state & NUD_DELAY) {
                if (time_before_eq(now,
                                   neigh->confirmed +
                                   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
                        neigh_dbg(2, "neigh %p is now reachable\n", neigh);
                        WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
                        neigh->updated = jiffies;
                        neigh_connect(neigh);
                        notify = 1;
                        next = neigh->confirmed + neigh->parms->reachable_time;
                } else {
                        neigh_dbg(2, "neigh %p is probed\n", neigh);
                        WRITE_ONCE(neigh->nud_state, NUD_PROBE);
                        neigh->updated = jiffies;
                        atomic_set(&neigh->probes, 0);
                        notify = 1;
                        next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
                                         HZ/100);
                }
        } else {
                /* NUD_PROBE|NUD_INCOMPLETE */
                next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
        }

        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
                WRITE_ONCE(neigh->nud_state, NUD_FAILED);
                notify = 1;
                neigh_invalidate(neigh);
                goto out;
        }

        if (neigh->nud_state & NUD_IN_TIMER) {
                if (time_before(next, jiffies + HZ/100))
                        next = jiffies + HZ/100;
                if (!mod_timer(&neigh->timer, next))
                        neigh_hold(neigh);
        }
        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
                neigh_probe(neigh);
        } else {
out:
                write_unlock(&neigh->lock);
        }

        if (notify)
                neigh_update_notify(neigh, 0);

        trace_neigh_timer_handler(neigh, 0);

        neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
                       const bool immediate_ok)
{
        int rc;
        bool immediate_probe = false;

        write_lock_bh(&neigh->lock);

        rc = 0;
        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
                goto out_unlock_bh;
        if (neigh->dead)
                goto out_dead;

        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
                if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
                    NEIGH_VAR(neigh->parms, APP_PROBES)) {
                        unsigned long next, now = jiffies;

                        atomic_set(&neigh->probes,
                                   NEIGH_VAR(neigh->parms, UCAST_PROBES));
                        neigh_del_timer(neigh);
                        WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
                        neigh->updated = now;
                        if (!immediate_ok) {
                                next = now + 1;
                        } else {
                                immediate_probe = true;
                                next = now + max(NEIGH_VAR(neigh->parms,
                                                           RETRANS_TIME),
                                                 HZ / 100);
                        }
                        neigh_add_timer(neigh, next);
                } else {
                        WRITE_ONCE(neigh->nud_state, NUD_FAILED);
                        neigh->updated = jiffies;
                        write_unlock_bh(&neigh->lock);

                        kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
                        return 1;
                }
        } else if (neigh->nud_state & NUD_STALE) {
                neigh_dbg(2, "neigh %p is delayed\n", neigh);
                neigh_del_timer(neigh);
                WRITE_ONCE(neigh->nud_state, NUD_DELAY);
                neigh->updated = jiffies;
                neigh_add_timer(neigh, jiffies +
                                NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
        }

        if (neigh->nud_state == NUD_INCOMPLETE) {
                if (skb) {
                        while (neigh->arp_queue_len_bytes + skb->truesize >
                               NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
                                struct sk_buff *buff;

                                buff = __skb_dequeue(&neigh->arp_queue);
                                if (!buff)
                                        break;
                                neigh->arp_queue_len_bytes -= buff->truesize;
                                kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
                        }
                        skb_dst_force(skb);
                        __skb_queue_tail(&neigh->arp_queue, skb);
                        neigh->arp_queue_len_bytes += skb->truesize;
                }
                rc = 1;
        }
out_unlock_bh:
        if (immediate_probe)
                neigh_probe(neigh);
        else
                write_unlock(&neigh->lock);
        local_bh_enable();
        trace_neigh_event_send_done(neigh, rc);
        return rc;

out_dead:
        if (neigh->nud_state & NUD_STALE)
                goto out_unlock_bh;
        write_unlock_bh(&neigh->lock);
        kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
        trace_neigh_event_send_dead(neigh, 1);
        return 1;
}
EXPORT_SYMBOL(__neigh_event_send);

static void neigh_update_hhs(struct neighbour *neigh)
{
        struct hh_cache *hh;
        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
                = NULL;

        if (neigh->dev->header_ops)
                update = neigh->dev->header_ops->cache_update;

        if (update) {
                hh = &neigh->hh;
                if (READ_ONCE(hh->hh_len)) {
                        write_seqlock_bh(&hh->hh_lock);
                        update(hh, neigh->dev, neigh->ha);
                        write_sequnlock_bh(&hh->hh_lock);
                }
        }
}

/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
                                if it is different.
        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
                                lladdr instead of overriding it
                                if it is different.
        NEIGH_UPDATE_F_ADMIN        means that the change is administrative.
        NEIGH_UPDATE_F_USE        means that the entry is user triggered.
        NEIGH_UPDATE_F_MANAGED        means that the entry will be auto-refreshed.
        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
                                NTF_ROUTER flag.
        NEIGH_UPDATE_F_ISROUTER        indicates if the neighbour is known as
                                a router.

   Caller MUST hold reference count on the entry.
 */
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
                          u8 new, u32 flags, u32 nlmsg_pid,
                          struct netlink_ext_ack *extack)
{
        bool gc_update = false, managed_update = false;
        int update_isrouter = 0;
        struct net_device *dev;
        int err, notify = 0;
        u8 old;

        trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);

        write_lock_bh(&neigh->lock);

        dev    = neigh->dev;
        old    = neigh->nud_state;
        err    = -EPERM;

        if (neigh->dead) {
                NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
                new = old;
                goto out;
        }
        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
            (old & (NUD_NOARP | NUD_PERMANENT)))
                goto out;

        neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
        if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
                new = old & ~NUD_PERMANENT;
                WRITE_ONCE(neigh->nud_state, new);
                err = 0;
                goto out;
        }

        if (!(new & NUD_VALID)) {
                neigh_del_timer(neigh);
                if (old & NUD_CONNECTED)
                        neigh_suspect(neigh);
                WRITE_ONCE(neigh->nud_state, new);
                err = 0;
                notify = old & NUD_VALID;
                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
                    (new & NUD_FAILED)) {
                        neigh_invalidate(neigh);
                        notify = 1;
                }
                goto out;
        }

        /* Compare new lladdr with cached one */
        if (!dev->addr_len) {
                /* First case: device needs no address. */
                lladdr = neigh->ha;
        } else if (lladdr) {
                /* The second case: if something is already cached
                   and a new address is proposed:
                   - compare new & old
                   - if they are different, check override flag
                 */
                if ((old & NUD_VALID) &&
                    !memcmp(lladdr, neigh->ha, dev->addr_len))
                        lladdr = neigh->ha;
        } else {
                /* No address is supplied; if we know something,
                   use it, otherwise discard the request.
                 */
                err = -EINVAL;
                if (!(old & NUD_VALID)) {
                        NL_SET_ERR_MSG(extack, "No link layer address given");
                        goto out;
                }
                lladdr = neigh->ha;
        }

        /* Update confirmed timestamp for neighbour entry after we
         * received ARP packet even if it doesn't change IP to MAC binding.
         */
        if (new & NUD_CONNECTED)
                neigh->confirmed = jiffies;

        /* If entry was valid and address is not changed,
           do not change entry state, if new one is STALE.
         */
        err = 0;
        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
        if (old & NUD_VALID) {
                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
                        update_isrouter = 0;
                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
                            (old & NUD_CONNECTED)) {
                                lladdr = neigh->ha;
                                new = NUD_STALE;
                        } else
                                goto out;
                } else {
                        if (lladdr == neigh->ha && new == NUD_STALE &&
                            !(flags & NEIGH_UPDATE_F_ADMIN))
                                new = old;
                }
        }

        /* Update timestamp only once we know we will make a change to the
         * neighbour entry. Otherwise we risk to move the locktime window with
         * noop updates and ignore relevant ARP updates.
         */
        if (new != old || lladdr != neigh->ha)
                neigh->updated = jiffies;

        if (new != old) {
                neigh_del_timer(neigh);
                if (new & NUD_PROBE)
                        atomic_set(&neigh->probes, 0);
                if (new & NUD_IN_TIMER)
                        neigh_add_timer(neigh, (jiffies +
                                                ((new & NUD_REACHABLE) ?
                                                 neigh->parms->reachable_time :
                                                 0)));
                WRITE_ONCE(neigh->nud_state, new);
                notify = 1;
        }

        if (lladdr != neigh->ha) {
                write_seqlock(&neigh->ha_lock);
                memcpy(&neigh->ha, lladdr, dev->addr_len);
                write_sequnlock(&neigh->ha_lock);
                neigh_update_hhs(neigh);
                if (!(new & NUD_CONNECTED))
                        neigh->confirmed = jiffies -
                                      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
                notify = 1;
        }
        if (new == old)
                goto out;
        if (new & NUD_CONNECTED)
                neigh_connect(neigh);
        else
                neigh_suspect(neigh);
        if (!(old & NUD_VALID)) {
                struct sk_buff *skb;

                /* Again: avoid dead loop if something went wrong */

                while (neigh->nud_state & NUD_VALID &&
                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
                        struct dst_entry *dst = skb_dst(skb);
                        struct neighbour *n2, *n1 = neigh;
                        write_unlock_bh(&neigh->lock);

                        rcu_read_lock();

                        /* Why not just use 'neigh' as-is?  The problem is that
                         * things such as shaper, eql, and sch_teql can end up
                         * using alternative, different, neigh objects to output
                         * the packet in the output path.  So what we need to do
                         * here is re-lookup the top-level neigh in the path so
                         * we can reinject the packet there.
                         */
                        n2 = NULL;
                        if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
                                n2 = dst_neigh_lookup_skb(dst, skb);
                                if (n2)
                                        n1 = n2;
                        }
                        READ_ONCE(n1->output)(n1, skb);
                        if (n2)
                                neigh_release(n2);
                        rcu_read_unlock();

                        write_lock_bh(&neigh->lock);
                }
                __skb_queue_purge(&neigh->arp_queue);
                neigh->arp_queue_len_bytes = 0;
        }
out:
        if (update_isrouter)
                neigh_update_is_router(neigh, flags, &notify);
        write_unlock_bh(&neigh->lock);
        if (((new ^ old) & NUD_PERMANENT) || gc_update)
                neigh_update_gc_list(neigh);
        if (managed_update)
                neigh_update_managed_list(neigh);
        if (notify)
                neigh_update_notify(neigh, nlmsg_pid);
        trace_neigh_update_done(neigh, err);
        return err;
}

int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                 u32 flags, u32 nlmsg_pid)
{
        return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
}
EXPORT_SYMBOL(neigh_update);

/* Update the neigh to listen temporarily for probe responses, even if it is
 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
 */
void __neigh_set_probe_once(struct neighbour *neigh)
{
        if (neigh->dead)
                return;
        neigh->updated = jiffies;
        if (!(neigh->nud_state & NUD_FAILED))
                return;
        WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
        atomic_set(&neigh->probes, neigh_max_probes(neigh));
        neigh_add_timer(neigh,
                        jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
                                      HZ/100));
}
EXPORT_SYMBOL(__neigh_set_probe_once);

struct neighbour *neigh_event_ns(struct neigh_table *tbl,
                                 u8 *lladdr, void *saddr,
                                 struct net_device *dev)
{
        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
                                                 lladdr || !dev->addr_len);
        if (neigh)
                neigh_update(neigh, lladdr, NUD_STALE,
                             NEIGH_UPDATE_F_OVERRIDE, 0);
        return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);

/* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n)
{
        struct net_device *dev = n->dev;
        __be16 prot = n->tbl->protocol;
        struct hh_cache        *hh = &n->hh;

        write_lock_bh(&n->lock);

        /* Only one thread can come in here and initialize the
         * hh_cache entry.
         */
        if (!hh->hh_len)
                dev->header_ops->cache(n, hh, prot);

        write_unlock_bh(&n->lock);
}

/* Slow and careful. */

int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
        int rc = 0;

        if (!neigh_event_send(neigh, skb)) {
                int err;
                struct net_device *dev = neigh->dev;
                unsigned int seq;

                if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
                        neigh_hh_init(neigh);

                do {
                        __skb_pull(skb, skb_network_offset(skb));
                        seq = read_seqbegin(&neigh->ha_lock);
                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                                              neigh->ha, NULL, skb->len);
                } while (read_seqretry(&neigh->ha_lock, seq));

                if (err >= 0)
                        rc = dev_queue_xmit(skb);
                else
                        goto out_kfree_skb;
        }
out:
        return rc;
out_kfree_skb:
        rc = -EINVAL;
        kfree_skb(skb);
        goto out;
}
EXPORT_SYMBOL(neigh_resolve_output);

/* As fast as possible without hh cache */

int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
{
        struct net_device *dev = neigh->dev;
        unsigned int seq;
        int err;

        do {
                __skb_pull(skb, skb_network_offset(skb));
                seq = read_seqbegin(&neigh->ha_lock);
                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                                      neigh->ha, NULL, skb->len);
        } while (read_seqretry(&neigh->ha_lock, seq));

        if (err >= 0)
                err = dev_queue_xmit(skb);
        else {
                err = -EINVAL;
                kfree_skb(skb);
        }
        return err;
}
EXPORT_SYMBOL(neigh_connected_output);

int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
{
        return dev_queue_xmit(skb);
}
EXPORT_SYMBOL(neigh_direct_output);

static void neigh_managed_work(struct work_struct *work)
{
        struct neigh_table *tbl = container_of(work, struct neigh_table,
                                               managed_work.work);
        struct neighbour *neigh;

        write_lock_bh(&tbl->lock);
        list_for_each_entry(neigh, &tbl->managed_list, managed_list)
                neigh_event_send_probe(neigh, NULL, false);
        queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
                           NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
        write_unlock_bh(&tbl->lock);
}

static void neigh_proxy_process(struct timer_list *t)
{
        struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
        long sched_next = 0;
        unsigned long now = jiffies;
        struct sk_buff *skb, *n;

        spin_lock(&tbl->proxy_queue.lock);

        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
                long tdif = NEIGH_CB(skb)->sched_next - now;

                if (tdif <= 0) {
                        struct net_device *dev = skb->dev;

                        neigh_parms_qlen_dec(dev, tbl->family);
                        __skb_unlink(skb, &tbl->proxy_queue);

                        if (tbl->proxy_redo && netif_running(dev)) {
                                rcu_read_lock();
                                tbl->proxy_redo(skb);
                                rcu_read_unlock();
                        } else {
                                kfree_skb(skb);
                        }

                        dev_put(dev);
                } else if (!sched_next || tdif < sched_next)
                        sched_next = tdif;
        }
        del_timer(&tbl->proxy_timer);
        if (sched_next)
                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
        spin_unlock(&tbl->proxy_queue.lock);
}

static unsigned long neigh_proxy_delay(struct neigh_parms *p)
{
        /* If proxy_delay is zero, do not call get_random_u32_below()
         * as it is undefined behavior.
         */
        unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);

        return proxy_delay ?
               jiffies + get_random_u32_below(proxy_delay) : jiffies;
}

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
                    struct sk_buff *skb)
{
        unsigned long sched_next = neigh_proxy_delay(p);

        if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
                kfree_skb(skb);
                return;
        }

        NEIGH_CB(skb)->sched_next = sched_next;
        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;

        spin_lock(&tbl->proxy_queue.lock);
        if (del_timer(&tbl->proxy_timer)) {
                if (time_before(tbl->proxy_timer.expires, sched_next))
                        sched_next = tbl->proxy_timer.expires;
        }
        skb_dst_drop(skb);
        dev_hold(skb->dev);
        __skb_queue_tail(&tbl->proxy_queue, skb);
        p->qlen++;
        mod_timer(&tbl->proxy_timer, sched_next);
        spin_unlock(&tbl->proxy_queue.lock);
}
EXPORT_SYMBOL(pneigh_enqueue);

static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
                                                      struct net *net, int ifindex)
{
        struct neigh_parms *p;

        list_for_each_entry(p, &tbl->parms_list, list) {
                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
                    (!p->dev && !ifindex && net_eq(net, &init_net)))
                        return p;
        }

        return NULL;
}

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
                                      struct neigh_table *tbl)
{
        struct neigh_parms *p;
        struct net *net = dev_net(dev);
        const struct net_device_ops *ops = dev->netdev_ops;

        p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
        if (p) {
                p->tbl                  = tbl;
                refcount_set(&p->refcnt, 1);
                p->reachable_time =
                                neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
                p->qlen = 0;
                netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
                p->dev = dev;
                write_pnet(&p->net, net);
                p->sysctl_table = NULL;

                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
                        netdev_put(dev, &p->dev_tracker);
                        kfree(p);
                        return NULL;
                }

                write_lock_bh(&tbl->lock);
                list_add(&p->list, &tbl->parms.list);
                write_unlock_bh(&tbl->lock);

                neigh_parms_data_state_cleanall(p);
        }
        return p;
}
EXPORT_SYMBOL(neigh_parms_alloc);

static void neigh_rcu_free_parms(struct rcu_head *head)
{
        struct neigh_parms *parms =
                container_of(head, struct neigh_parms, rcu_head);

        neigh_parms_put(parms);
}

void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
        if (!parms || parms == &tbl->parms)
                return;
        write_lock_bh(&tbl->lock);
        list_del(&parms->list);
        parms->dead = 1;
        write_unlock_bh(&tbl->lock);
        netdev_put(parms->dev, &parms->dev_tracker);
        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);

static void neigh_parms_destroy(struct neigh_parms *parms)
{
        kfree(parms);
}

static struct lock_class_key neigh_table_proxy_queue_class;

static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;

void neigh_table_init(int index, struct neigh_table *tbl)
{
        unsigned long now = jiffies;
        unsigned long phsize;

        INIT_LIST_HEAD(&tbl->parms_list);
        INIT_LIST_HEAD(&tbl->gc_list);
        INIT_LIST_HEAD(&tbl->managed_list);

        list_add(&tbl->parms.list, &tbl->parms_list);
        write_pnet(&tbl->parms.net, &init_net);
        refcount_set(&tbl->parms.refcnt, 1);
        tbl->parms.reachable_time =
                          neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
        tbl->parms.qlen = 0;

        tbl->stats = alloc_percpu(struct neigh_statistics);
        if (!tbl->stats)
                panic("cannot create neighbour cache statistics");

#ifdef CONFIG_PROC_FS
        if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
                              &neigh_stat_seq_ops, tbl))
                panic("cannot create neighbour proc dir entry");
#endif

        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));

        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);

        if (!tbl->nht || !tbl->phash_buckets)
                panic("cannot allocate neighbour cache hashes");

        if (!tbl->entry_size)
                tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
                                        tbl->key_len, NEIGH_PRIV_ALIGN);
        else
                WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);

        rwlock_init(&tbl->lock);

        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
        queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
                        tbl->parms.reachable_time);
        INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
        queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);

        timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
        skb_queue_head_init_class(&tbl->proxy_queue,
                        &neigh_table_proxy_queue_class);

        tbl->last_flush = now;
        tbl->last_rand        = now + tbl->parms.reachable_time * 20;

        neigh_tables[index] = tbl;
}
EXPORT_SYMBOL(neigh_table_init);

int neigh_table_clear(int index, struct neigh_table *tbl)
{
        neigh_tables[index] = NULL;
        /* It is not clean... Fix it to unload IPv6 module safely */
        cancel_delayed_work_sync(&tbl->managed_work);
        cancel_delayed_work_sync(&tbl->gc_work);
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
        neigh_ifdown(tbl, NULL);
        if (atomic_read(&tbl->entries))
                pr_crit("neighbour leakage\n");

        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
                 neigh_hash_free_rcu);
        tbl->nht = NULL;

        kfree(tbl->phash_buckets);
        tbl->phash_buckets = NULL;

        remove_proc_entry(tbl->id, init_net.proc_net_stat);

        free_percpu(tbl->stats);
        tbl->stats = NULL;

        return 0;
}
EXPORT_SYMBOL(neigh_table_clear);

static struct neigh_table *neigh_find_table(int family)
{
        struct neigh_table *tbl = NULL;

        switch (family) {
        case AF_INET:
                tbl = neigh_tables[NEIGH_ARP_TABLE];
                break;
        case AF_INET6:
                tbl = neigh_tables[NEIGH_ND_TABLE];
                break;
        }

        return tbl;
}

const struct nla_policy nda_policy[NDA_MAX+1] = {
        [NDA_UNSPEC]                = { .strict_start_type = NDA_NH_ID },
        [NDA_DST]                = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
        [NDA_LLADDR]                = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
        [NDA_CACHEINFO]                = { .len = sizeof(struct nda_cacheinfo) },
        [NDA_PROBES]                = { .type = NLA_U32 },
        [NDA_VLAN]                = { .type = NLA_U16 },
        [NDA_PORT]                = { .type = NLA_U16 },
        [NDA_VNI]                = { .type = NLA_U32 },
        [NDA_IFINDEX]                = { .type = NLA_U32 },
        [NDA_MASTER]                = { .type = NLA_U32 },
        [NDA_PROTOCOL]                = { .type = NLA_U8 },
        [NDA_NH_ID]                = { .type = NLA_U32 },
        [NDA_FLAGS_EXT]                = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
        [NDA_FDB_EXT_ATTRS]        = { .type = NLA_NESTED },
};

static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ndmsg *ndm;
        struct nlattr *dst_attr;
        struct neigh_table *tbl;
        struct neighbour *neigh;
        struct net_device *dev = NULL;
        int err = -EINVAL;

        ASSERT_RTNL();
        if (nlmsg_len(nlh) < sizeof(*ndm))
                goto out;

        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
        if (!dst_attr) {
                NL_SET_ERR_MSG(extack, "Network address not specified");
                goto out;
        }

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_ifindex) {
                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
                if (dev == NULL) {
                        err = -ENODEV;
                        goto out;
                }
        }

        tbl = neigh_find_table(ndm->ndm_family);
        if (tbl == NULL)
                return -EAFNOSUPPORT;

        if (nla_len(dst_attr) < (int)tbl->key_len) {
                NL_SET_ERR_MSG(extack, "Invalid network address");
                goto out;
        }

        if (ndm->ndm_flags & NTF_PROXY) {
                err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
                goto out;
        }

        if (dev == NULL)
                goto out;

        neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
        if (neigh == NULL) {
                err = -ENOENT;
                goto out;
        }

        err = __neigh_update(neigh, NULL, NUD_FAILED,
                             NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
                             NETLINK_CB(skb).portid, extack);
        write_lock_bh(&tbl->lock);
        neigh_release(neigh);
        neigh_remove_one(neigh, tbl);
        write_unlock_bh(&tbl->lock);

out:
        return err;
}

static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                     struct netlink_ext_ack *extack)
{
        int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
                    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
        struct net *net = sock_net(skb->sk);
        struct ndmsg *ndm;
        struct nlattr *tb[NDA_MAX+1];
        struct neigh_table *tbl;
        struct net_device *dev = NULL;
        struct neighbour *neigh;
        void *dst, *lladdr;
        u8 protocol = 0;
        u32 ndm_flags;
        int err;

        ASSERT_RTNL();
        err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
                                     nda_policy, extack);
        if (err < 0)
                goto out;

        err = -EINVAL;
        if (!tb[NDA_DST]) {
                NL_SET_ERR_MSG(extack, "Network address not specified");
                goto out;
        }

        ndm = nlmsg_data(nlh);
        ndm_flags = ndm->ndm_flags;
        if (tb[NDA_FLAGS_EXT]) {
                u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);

                BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
                             (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
                              hweight32(NTF_EXT_MASK)));
                ndm_flags |= (ext << NTF_EXT_SHIFT);
        }
        if (ndm->ndm_ifindex) {
                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
                if (dev == NULL) {
                        err = -ENODEV;
                        goto out;
                }

                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
                        NL_SET_ERR_MSG(extack, "Invalid link address");
                        goto out;
                }
        }

        tbl = neigh_find_table(ndm->ndm_family);
        if (tbl == NULL)
                return -EAFNOSUPPORT;

        if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
                NL_SET_ERR_MSG(extack, "Invalid network address");
                goto out;
        }

        dst = nla_data(tb[NDA_DST]);
        lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;

        if (tb[NDA_PROTOCOL])
                protocol = nla_get_u8(tb[NDA_PROTOCOL]);
        if (ndm_flags & NTF_PROXY) {
                struct pneigh_entry *pn;

                if (ndm_flags & NTF_MANAGED) {
                        NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
                        goto out;
                }

                err = -ENOBUFS;
                pn = pneigh_lookup(tbl, net, dst, dev, 1);
                if (pn) {
                        pn->flags = ndm_flags;
                        if (protocol)
                                pn->protocol = protocol;
                        err = 0;
                }
                goto out;
        }

        if (!dev) {
                NL_SET_ERR_MSG(extack, "Device not specified");
                goto out;
        }

        if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
                err = -EINVAL;
                goto out;
        }

        neigh = neigh_lookup(tbl, dst, dev);
        if (neigh == NULL) {
                bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
                bool exempt_from_gc = ndm_permanent ||
                                      ndm_flags & NTF_EXT_LEARNED;

                if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
                        err = -ENOENT;
                        goto out;
                }
                if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
                        NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
                        err = -EINVAL;
                        goto out;
                }

                neigh = ___neigh_create(tbl, dst, dev,
                                        ndm_flags &
                                        (NTF_EXT_LEARNED | NTF_MANAGED),
                                        exempt_from_gc, true);
                if (IS_ERR(neigh)) {
                        err = PTR_ERR(neigh);
                        goto out;
                }
        } else {
                if (nlh->nlmsg_flags & NLM_F_EXCL) {
                        err = -EEXIST;
                        neigh_release(neigh);
                        goto out;
                }

                if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
                        flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
                                   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
        }

        if (protocol)
                neigh->protocol = protocol;
        if (ndm_flags & NTF_EXT_LEARNED)
                flags |= NEIGH_UPDATE_F_EXT_LEARNED;
        if (ndm_flags & NTF_ROUTER)
                flags |= NEIGH_UPDATE_F_ISROUTER;
        if (ndm_flags & NTF_MANAGED)
                flags |= NEIGH_UPDATE_F_MANAGED;
        if (ndm_flags & NTF_USE)
                flags |= NEIGH_UPDATE_F_USE;

        err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
                             NETLINK_CB(skb).portid, extack);
        if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
                neigh_event_send(neigh, NULL);
                err = 0;
        }
        neigh_release(neigh);
out:
        return err;
}

static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
        struct nlattr *nest;

        nest = nla_nest_start_noflag(skb, NDTA_PARMS);
        if (nest == NULL)
                return -ENOBUFS;

        if ((parms->dev &&
             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
            nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
                        NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
            /* approximative value for deprecated QUEUE_LEN (in packets) */
            nla_put_u32(skb, NDTPA_QUEUE_LEN,
                        NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
            nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
            nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
            nla_put_u32(skb, NDTPA_UCAST_PROBES,
                        NEIGH_VAR(parms, UCAST_PROBES)) ||
            nla_put_u32(skb, NDTPA_MCAST_PROBES,
                        NEIGH_VAR(parms, MCAST_PROBES)) ||
            nla_put_u32(skb, NDTPA_MCAST_REPROBES,
                        NEIGH_VAR(parms, MCAST_REPROBES)) ||
            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
                          NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
                          NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_GC_STALETIME,
                          NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
                          NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_RETRANS_TIME,
                          NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
                          NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_PROXY_DELAY,
                          NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_LOCKTIME,
                          NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
            nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
                          NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
                goto nla_put_failure;
        return nla_nest_end(skb, nest);

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
                              u32 pid, u32 seq, int type, int flags)
{
        struct nlmsghdr *nlh;
        struct ndtmsg *ndtmsg;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        ndtmsg = nlmsg_data(nlh);

        read_lock_bh(&tbl->lock);
        ndtmsg->ndtm_family = tbl->family;
        ndtmsg->ndtm_pad1   = 0;
        ndtmsg->ndtm_pad2   = 0;

        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
            nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
                          NDTA_PAD) ||
            nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
            nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
            nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
                goto nla_put_failure;
        {
                unsigned long now = jiffies;
                long flush_delta = now - READ_ONCE(tbl->last_flush);
                long rand_delta = now - READ_ONCE(tbl->last_rand);
                struct neigh_hash_table *nht;
                struct ndt_config ndc = {
                        .ndtc_key_len                = tbl->key_len,
                        .ndtc_entry_size        = tbl->entry_size,
                        .ndtc_entries                = atomic_read(&tbl->entries),
                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
                        .ndtc_last_rand                = jiffies_to_msecs(rand_delta),
                        .ndtc_proxy_qlen        = READ_ONCE(tbl->proxy_queue.qlen),
                };

                rcu_read_lock();
                nht = rcu_dereference(tbl->nht);
                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
                rcu_read_unlock();

                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
                        goto nla_put_failure;
        }

        {
                int cpu;
                struct ndt_stats ndst;

                memset(&ndst, 0, sizeof(ndst));

                for_each_possible_cpu(cpu) {
                        struct neigh_statistics        *st;

                        st = per_cpu_ptr(tbl->stats, cpu);
                        ndst.ndts_allocs                += READ_ONCE(st->allocs);
                        ndst.ndts_destroys                += READ_ONCE(st->destroys);
                        ndst.ndts_hash_grows                += READ_ONCE(st->hash_grows);
                        ndst.ndts_res_failed                += READ_ONCE(st->res_failed);
                        ndst.ndts_lookups                += READ_ONCE(st->lookups);
                        ndst.ndts_hits                        += READ_ONCE(st->hits);
                        ndst.ndts_rcv_probes_mcast        += READ_ONCE(st->rcv_probes_mcast);
                        ndst.ndts_rcv_probes_ucast        += READ_ONCE(st->rcv_probes_ucast);
                        ndst.ndts_periodic_gc_runs        += READ_ONCE(st->periodic_gc_runs);
                        ndst.ndts_forced_gc_runs        += READ_ONCE(st->forced_gc_runs);
                        ndst.ndts_table_fulls                += READ_ONCE(st->table_fulls);
                }

                if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
                                  NDTA_PAD))
                        goto nla_put_failure;
        }

        BUG_ON(tbl->parms.dev);
        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
                goto nla_put_failure;

        read_unlock_bh(&tbl->lock);
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        read_unlock_bh(&tbl->lock);
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int neightbl_fill_param_info(struct sk_buff *skb,
                                    struct neigh_table *tbl,
                                    struct neigh_parms *parms,
                                    u32 pid, u32 seq, int type,
                                    unsigned int flags)
{
        struct ndtmsg *ndtmsg;
        struct nlmsghdr *nlh;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        ndtmsg = nlmsg_data(nlh);

        read_lock_bh(&tbl->lock);
        ndtmsg->ndtm_family = tbl->family;
        ndtmsg->ndtm_pad1   = 0;
        ndtmsg->ndtm_pad2   = 0;

        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
            neightbl_fill_parms(skb, parms) < 0)
                goto errout;

        read_unlock_bh(&tbl->lock);
        nlmsg_end(skb, nlh);
        return 0;
errout:
        read_unlock_bh(&tbl->lock);
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
        [NDTA_NAME]                = { .type = NLA_STRING },
        [NDTA_THRESH1]                = { .type = NLA_U32 },
        [NDTA_THRESH2]                = { .type = NLA_U32 },
        [NDTA_THRESH3]                = { .type = NLA_U32 },
        [NDTA_GC_INTERVAL]        = { .type = NLA_U64 },
        [NDTA_PARMS]                = { .type = NLA_NESTED },
};

static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
        [NDTPA_IFINDEX]                        = { .type = NLA_U32 },
        [NDTPA_QUEUE_LEN]                = { .type = NLA_U32 },
        [NDTPA_PROXY_QLEN]                = { .type = NLA_U32 },
        [NDTPA_APP_PROBES]                = { .type = NLA_U32 },
        [NDTPA_UCAST_PROBES]                = { .type = NLA_U32 },
        [NDTPA_MCAST_PROBES]                = { .type = NLA_U32 },
        [NDTPA_MCAST_REPROBES]                = { .type = NLA_U32 },
        [NDTPA_BASE_REACHABLE_TIME]        = { .type = NLA_U64 },
        [NDTPA_GC_STALETIME]                = { .type = NLA_U64 },
        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
        [NDTPA_RETRANS_TIME]                = { .type = NLA_U64 },
        [NDTPA_ANYCAST_DELAY]                = { .type = NLA_U64 },
        [NDTPA_PROXY_DELAY]                = { .type = NLA_U64 },
        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
        [NDTPA_INTERVAL_PROBE_TIME_MS]        = { .type = NLA_U64, .min = 1 },
};

static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct neigh_table *tbl;
        struct ndtmsg *ndtmsg;
        struct nlattr *tb[NDTA_MAX+1];
        bool found = false;
        int err, tidx;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
                                     nl_neightbl_policy, extack);
        if (err < 0)
                goto errout;

        if (tb[NDTA_NAME] == NULL) {
                err = -EINVAL;
                goto errout;
        }

        ndtmsg = nlmsg_data(nlh);

        for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
                tbl = neigh_tables[tidx];
                if (!tbl)
                        continue;
                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
                        continue;
                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
                        found = true;
                        break;
                }
        }

        if (!found)
                return -ENOENT;

        /*
         * We acquire tbl->lock to be nice to the periodic timers and
         * make sure they always see a consistent set of values.
         */
        write_lock_bh(&tbl->lock);

        if (tb[NDTA_PARMS]) {
                struct nlattr *tbp[NDTPA_MAX+1];
                struct neigh_parms *p;
                int i, ifindex = 0;

                err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
                                                  tb[NDTA_PARMS],
                                                  nl_ntbl_parm_policy, extack);
                if (err < 0)
                        goto errout_tbl_lock;

                if (tbp[NDTPA_IFINDEX])
                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);

                p = lookup_neigh_parms(tbl, net, ifindex);
                if (p == NULL) {
                        err = -ENOENT;
                        goto errout_tbl_lock;
                }

                for (i = 1; i <= NDTPA_MAX; i++) {
                        if (tbp[i] == NULL)
                                continue;

                        switch (i) {
                        case NDTPA_QUEUE_LEN:
                                NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
                                              nla_get_u32(tbp[i]) *
                                              SKB_TRUESIZE(ETH_FRAME_LEN));
                                break;
                        case NDTPA_QUEUE_LENBYTES:
                                NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_PROXY_QLEN:
                                NEIGH_VAR_SET(p, PROXY_QLEN,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_APP_PROBES:
                                NEIGH_VAR_SET(p, APP_PROBES,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_UCAST_PROBES:
                                NEIGH_VAR_SET(p, UCAST_PROBES,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_MCAST_PROBES:
                                NEIGH_VAR_SET(p, MCAST_PROBES,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_MCAST_REPROBES:
                                NEIGH_VAR_SET(p, MCAST_REPROBES,
                                              nla_get_u32(tbp[i]));
                                break;
                        case NDTPA_BASE_REACHABLE_TIME:
                                NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
                                              nla_get_msecs(tbp[i]));
                                /* update reachable_time as well, otherwise, the change will
                                 * only be effective after the next time neigh_periodic_work
                                 * decides to recompute it (can be multiple minutes)
                                 */
                                p->reachable_time =
                                        neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
                                break;
                        case NDTPA_GC_STALETIME:
                                NEIGH_VAR_SET(p, GC_STALETIME,
                                              nla_get_msecs(tbp[i]));
                                break;
                        case NDTPA_DELAY_PROBE_TIME:
                                NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
                                              nla_get_msecs(tbp[i]));
                                call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
                                break;
                        case NDTPA_INTERVAL_PROBE_TIME_MS:
                                NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
                                              nla_get_msecs(tbp[i]));
                                break;
                        case NDTPA_RETRANS_TIME:
                                NEIGH_VAR_SET(p, RETRANS_TIME,
                                              nla_get_msecs(tbp[i]));
                                break;
                        case NDTPA_ANYCAST_DELAY:
                                NEIGH_VAR_SET(p, ANYCAST_DELAY,
                                              nla_get_msecs(tbp[i]));
                                break;
                        case NDTPA_PROXY_DELAY:
                                NEIGH_VAR_SET(p, PROXY_DELAY,
                                              nla_get_msecs(tbp[i]));
                                break;
                        case NDTPA_LOCKTIME:
                                NEIGH_VAR_SET(p, LOCKTIME,
                                              nla_get_msecs(tbp[i]));
                                break;
                        }
                }
        }

        err = -ENOENT;
        if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
             tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
            !net_eq(net, &init_net))
                goto errout_tbl_lock;

        if (tb[NDTA_THRESH1])
                WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));

        if (tb[NDTA_THRESH2])
                WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));

        if (tb[NDTA_THRESH3])
                WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));

        if (tb[NDTA_GC_INTERVAL])
                WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));

        err = 0;

errout_tbl_lock:
        write_unlock_bh(&tbl->lock);
errout:
        return err;
}

static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
                                    struct netlink_ext_ack *extack)
{
        struct ndtmsg *ndtm;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
                return -EINVAL;
        }

        ndtm = nlmsg_data(nlh);
        if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
                return -EINVAL;
        }

        if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
                NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
                return -EINVAL;
        }

        return 0;
}

static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        int family, tidx, nidx = 0;
        int tbl_skip = cb->args[0];
        int neigh_skip = cb->args[1];
        struct neigh_table *tbl;

        if (cb->strict_check) {
                int err = neightbl_valid_dump_info(nlh, cb->extack);

                if (err < 0)
                        return err;
        }

        family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;

        for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
                struct neigh_parms *p;

                tbl = neigh_tables[tidx];
                if (!tbl)
                        continue;

                if (tidx < tbl_skip || (family && tbl->family != family))
                        continue;

                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
                                       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
                                       NLM_F_MULTI) < 0)
                        break;

                nidx = 0;
                p = list_next_entry(&tbl->parms, list);
                list_for_each_entry_from(p, &tbl->parms_list, list) {
                        if (!net_eq(neigh_parms_net(p), net))
                                continue;

                        if (nidx < neigh_skip)
                                goto next;

                        if (neightbl_fill_param_info(skb, tbl, p,
                                                     NETLINK_CB(cb->skb).portid,
                                                     nlh->nlmsg_seq,
                                                     RTM_NEWNEIGHTBL,
                                                     NLM_F_MULTI) < 0)
                                goto out;
                next:
                        nidx++;
                }

                neigh_skip = 0;
        }
out:
        cb->args[0] = tidx;
        cb->args[1] = nidx;

        return skb->len;
}

static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
                           u32 pid, u32 seq, int type, unsigned int flags)
{
        u32 neigh_flags, neigh_flags_ext;
        unsigned long now = jiffies;
        struct nda_cacheinfo ci;
        struct nlmsghdr *nlh;
        struct ndmsg *ndm;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
        neigh_flags     = neigh->flags & NTF_OLD_MASK;

        ndm = nlmsg_data(nlh);
        ndm->ndm_family         = neigh->ops->family;
        ndm->ndm_pad1    = 0;
        ndm->ndm_pad2    = 0;
        ndm->ndm_flags         = neigh_flags;
        ndm->ndm_type         = neigh->type;
        ndm->ndm_ifindex = neigh->dev->ifindex;

        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
                goto nla_put_failure;

        read_lock_bh(&neigh->lock);
        ndm->ndm_state         = neigh->nud_state;
        if (neigh->nud_state & NUD_VALID) {
                char haddr[MAX_ADDR_LEN];

                neigh_ha_snapshot(haddr, neigh, neigh->dev);
                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
                        read_unlock_bh(&neigh->lock);
                        goto nla_put_failure;
                }
        }

        ci.ndm_used         = jiffies_to_clock_t(now - neigh->used);
        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
        ci.ndm_updated         = jiffies_to_clock_t(now - neigh->updated);
        ci.ndm_refcnt         = refcount_read(&neigh->refcnt) - 1;
        read_unlock_bh(&neigh->lock);

        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
                goto nla_put_failure;

        if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
                goto nla_put_failure;
        if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
                            u32 pid, u32 seq, int type, unsigned int flags,
                            struct neigh_table *tbl)
{
        u32 neigh_flags, neigh_flags_ext;
        struct nlmsghdr *nlh;
        struct ndmsg *ndm;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
        neigh_flags     = pn->flags & NTF_OLD_MASK;

        ndm = nlmsg_data(nlh);
        ndm->ndm_family         = tbl->family;
        ndm->ndm_pad1    = 0;
        ndm->ndm_pad2    = 0;
        ndm->ndm_flags         = neigh_flags | NTF_PROXY;
        ndm->ndm_type         = RTN_UNICAST;
        ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
        ndm->ndm_state         = NUD_NONE;

        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
                goto nla_put_failure;

        if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
                goto nla_put_failure;
        if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
        __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}

static bool neigh_master_filtered(struct net_device *dev, int master_idx)
{
        struct net_device *master;

        if (!master_idx)
                return false;

        master = dev ? netdev_master_upper_dev_get(dev) : NULL;

        /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
         * invalid value for ifindex to denote "no master".
         */
        if (master_idx == -1)
                return !!master;

        if (!master || master->ifindex != master_idx)
                return true;

        return false;
}

static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
{
        if (filter_idx && (!dev || dev->ifindex != filter_idx))
                return true;

        return false;
}

struct neigh_dump_filter {
        int master_idx;
        int dev_idx;
};

static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
                            struct netlink_callback *cb,
                            struct neigh_dump_filter *filter)
{
        struct net *net = sock_net(skb->sk);
        struct neighbour *n;
        int rc, h, s_h = cb->args[1];
        int idx, s_idx = idx = cb->args[2];
        struct neigh_hash_table *nht;
        unsigned int flags = NLM_F_MULTI;

        if (filter->dev_idx || filter->master_idx)
                flags |= NLM_F_DUMP_FILTERED;

        rcu_read_lock();
        nht = rcu_dereference(tbl->nht);

        for (h = s_h; h < (1 << nht->hash_shift); h++) {
                if (h > s_h)
                        s_idx = 0;
                for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
                     n != NULL;
                     n = rcu_dereference(n->next)) {
                        if (idx < s_idx || !net_eq(dev_net(n->dev), net))
                                goto next;
                        if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
                            neigh_master_filtered(n->dev, filter->master_idx))
                                goto next;
                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
                                            RTM_NEWNEIGH,
                                            flags) < 0) {
                                rc = -1;
                                goto out;
                        }
next:
                        idx++;
                }
        }
        rc = skb->len;
out:
        rcu_read_unlock();
        cb->args[1] = h;
        cb->args[2] = idx;
        return rc;
}

static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
                             struct netlink_callback *cb,
                             struct neigh_dump_filter *filter)
{
        struct pneigh_entry *n;
        struct net *net = sock_net(skb->sk);
        int rc, h, s_h = cb->args[3];
        int idx, s_idx = idx = cb->args[4];
        unsigned int flags = NLM_F_MULTI;

        if (filter->dev_idx || filter->master_idx)
                flags |= NLM_F_DUMP_FILTERED;

        read_lock_bh(&tbl->lock);

        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
                if (h > s_h)
                        s_idx = 0;
                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
                        if (idx < s_idx || pneigh_net(n) != net)
                                goto next;
                        if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
                            neigh_master_filtered(n->dev, filter->master_idx))
                                goto next;
                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
                                            RTM_NEWNEIGH, flags, tbl) < 0) {
                                read_unlock_bh(&tbl->lock);
                                rc = -1;
                                goto out;
                        }
                next:
                        idx++;
                }
        }

        read_unlock_bh(&tbl->lock);
        rc = skb->len;
out:
        cb->args[3] = h;
        cb->args[4] = idx;
        return rc;

}

static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
                                bool strict_check,
                                struct neigh_dump_filter *filter,
                                struct netlink_ext_ack *extack)
{
        struct nlattr *tb[NDA_MAX + 1];
        int err, i;

        if (strict_check) {
                struct ndmsg *ndm;

                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
                        NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
                        return -EINVAL;
                }

                ndm = nlmsg_data(nlh);
                if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
                    ndm->ndm_state || ndm->ndm_type) {
                        NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
                        return -EINVAL;
                }

                if (ndm->ndm_flags & ~NTF_PROXY) {
                        NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
                        return -EINVAL;
                }

                err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
                                                    tb, NDA_MAX, nda_policy,
                                                    extack);
        } else {
                err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
                                             NDA_MAX, nda_policy, extack);
        }
        if (err < 0)
                return err;

        for (i = 0; i <= NDA_MAX; ++i) {
                if (!tb[i])
                        continue;

                /* all new attributes should require strict_check */
                switch (i) {
                case NDA_IFINDEX:
                        filter->dev_idx = nla_get_u32(tb[i]);
                        break;
                case NDA_MASTER:
                        filter->master_idx = nla_get_u32(tb[i]);
                        break;
                default:
                        if (strict_check) {
                                NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
                                return -EINVAL;
                        }
                }
        }

        return 0;
}

static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct neigh_dump_filter filter = {};
        struct neigh_table *tbl;
        int t, family, s_t;
        int proxy = 0;
        int err;

        family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;

        /* check for full ndmsg structure presence, family member is
         * the same for both structures
         */
        if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
            ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
                proxy = 1;

        err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
        if (err < 0 && cb->strict_check)
                return err;

        s_t = cb->args[0];

        for (t = 0; t < NEIGH_NR_TABLES; t++) {
                tbl = neigh_tables[t];

                if (!tbl)
                        continue;
                if (t < s_t || (family && tbl->family != family))
                        continue;
                if (t > s_t)
                        memset(&cb->args[1], 0, sizeof(cb->args) -
                                                sizeof(cb->args[0]));
                if (proxy)
                        err = pneigh_dump_table(tbl, skb, cb, &filter);
                else
                        err = neigh_dump_table(tbl, skb, cb, &filter);
                if (err < 0)
                        break;
        }

        cb->args[0] = t;
        return skb->len;
}

static int neigh_valid_get_req(const struct nlmsghdr *nlh,
                               struct neigh_table **tbl,
                               void **dst, int *dev_idx, u8 *ndm_flags,
                               struct netlink_ext_ack *extack)
{
        struct nlattr *tb[NDA_MAX + 1];
        struct ndmsg *ndm;
        int err, i;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
                return -EINVAL;
        }

        ndm = nlmsg_data(nlh);
        if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
            ndm->ndm_type) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
                return -EINVAL;
        }

        if (ndm->ndm_flags & ~NTF_PROXY) {
                NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
                                            NDA_MAX, nda_policy, extack);
        if (err < 0)
                return err;

        *ndm_flags = ndm->ndm_flags;
        *dev_idx = ndm->ndm_ifindex;
        *tbl = neigh_find_table(ndm->ndm_family);
        if (*tbl == NULL) {
                NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
                return -EAFNOSUPPORT;
        }

        for (i = 0; i <= NDA_MAX; ++i) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NDA_DST:
                        if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
                                NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
                                return -EINVAL;
                        }
                        *dst = nla_data(tb[i]);
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static inline size_t neigh_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct ndmsg))
               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
               + nla_total_size(sizeof(struct nda_cacheinfo))
               + nla_total_size(4)  /* NDA_PROBES */
               + nla_total_size(4)  /* NDA_FLAGS_EXT */
               + nla_total_size(1); /* NDA_PROTOCOL */
}

static int neigh_get_reply(struct net *net, struct neighbour *neigh,
                           u32 pid, u32 seq)
{
        struct sk_buff *skb;
        int err = 0;

        skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
        if (err) {
                kfree_skb(skb);
                goto errout;
        }

        err = rtnl_unicast(skb, net, pid);
errout:
        return err;
}

static inline size_t pneigh_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct ndmsg))
               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
               + nla_total_size(4)  /* NDA_FLAGS_EXT */
               + nla_total_size(1); /* NDA_PROTOCOL */
}

static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
                            u32 pid, u32 seq, struct neigh_table *tbl)
{
        struct sk_buff *skb;
        int err = 0;

        skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
        if (err) {
                kfree_skb(skb);
                goto errout;
        }

        err = rtnl_unicast(skb, net, pid);
errout:
        return err;
}

static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                     struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct net_device *dev = NULL;
        struct neigh_table *tbl = NULL;
        struct neighbour *neigh;
        void *dst = NULL;
        u8 ndm_flags = 0;
        int dev_idx = 0;
        int err;

        err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
                                  extack);
        if (err < 0)
                return err;

        if (dev_idx) {
                dev = __dev_get_by_index(net, dev_idx);
                if (!dev) {
                        NL_SET_ERR_MSG(extack, "Unknown device ifindex");
                        return -ENODEV;
                }
        }

        if (!dst) {
                NL_SET_ERR_MSG(extack, "Network address not specified");
                return -EINVAL;
        }

        if (ndm_flags & NTF_PROXY) {
                struct pneigh_entry *pn;

                pn = pneigh_lookup(tbl, net, dst, dev, 0);
                if (!pn) {
                        NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
                        return -ENOENT;
                }
                return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
                                        nlh->nlmsg_seq, tbl);
        }

        if (!dev) {
                NL_SET_ERR_MSG(extack, "No device specified");
                return -EINVAL;
        }

        neigh = neigh_lookup(tbl, dst, dev);
        if (!neigh) {
                NL_SET_ERR_MSG(extack, "Neighbour entry not found");
                return -ENOENT;
        }

        err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
                              nlh->nlmsg_seq);

        neigh_release(neigh);

        return err;
}

void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
        int chain;
        struct neigh_hash_table *nht;

        rcu_read_lock();
        nht = rcu_dereference(tbl->nht);

        read_lock_bh(&tbl->lock); /* avoid resizes */
        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
                struct neighbour *n;

                for (n = rcu_dereference(nht->hash_buckets[chain]);
                     n != NULL;
                     n = rcu_dereference(n->next))
                        cb(n, cookie);
        }
        read_unlock_bh(&tbl->lock);
        rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);

/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
                              int (*cb)(struct neighbour *))
{
        int chain;
        struct neigh_hash_table *nht;

        nht = rcu_dereference_protected(tbl->nht,
                                        lockdep_is_held(&tbl->lock));
        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
                struct neighbour *n;
                struct neighbour __rcu **np;

                np = &nht->hash_buckets[chain];
                while ((n = rcu_dereference_protected(*np,
                                        lockdep_is_held(&tbl->lock))) != NULL) {
                        int release;

                        write_lock(&n->lock);
                        release = cb(n);
                        if (release) {
                                rcu_assign_pointer(*np,
                                        rcu_dereference_protected(n->next,
                                                lockdep_is_held(&tbl->lock)));
                                neigh_mark_dead(n);
                        } else
                                np = &n->next;
                        write_unlock(&n->lock);
                        if (release)
                                neigh_cleanup_and_release(n);
                }
        }
}
EXPORT_SYMBOL(__neigh_for_each_release);

int neigh_xmit(int index, struct net_device *dev,
               const void *addr, struct sk_buff *skb)
{
        int err = -EAFNOSUPPORT;
        if (likely(index < NEIGH_NR_TABLES)) {
                struct neigh_table *tbl;
                struct neighbour *neigh;

                tbl = neigh_tables[index];
                if (!tbl)
                        goto out;
                rcu_read_lock();
                if (index == NEIGH_ARP_TABLE) {
                        u32 key = *((u32 *)addr);

                        neigh = __ipv4_neigh_lookup_noref(dev, key);
                } else {
                        neigh = __neigh_lookup_noref(tbl, addr, dev);
                }
                if (!neigh)
                        neigh = __neigh_create(tbl, addr, dev, false);
                err = PTR_ERR(neigh);
                if (IS_ERR(neigh)) {
                        rcu_read_unlock();
                        goto out_kfree_skb;
                }
                err = READ_ONCE(neigh->output)(neigh, skb);
                rcu_read_unlock();
        }
        else if (index == NEIGH_LINK_TABLE) {
                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                                      addr, NULL, skb->len);
                if (err < 0)
                        goto out_kfree_skb;
                err = dev_queue_xmit(skb);
        }
out:
        return err;
out_kfree_skb:
        kfree_skb(skb);
        goto out;
}
EXPORT_SYMBOL(neigh_xmit);

#ifdef CONFIG_PROC_FS

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
        struct neigh_seq_state *state = seq->private;
        struct net *net = seq_file_net(seq);
        struct neigh_hash_table *nht = state->nht;
        struct neighbour *n = NULL;
        int bucket;

        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
                n = rcu_dereference(nht->hash_buckets[bucket]);

                while (n) {
                        if (!net_eq(dev_net(n->dev), net))
                                goto next;
                        if (state->neigh_sub_iter) {
                                loff_t fakep = 0;
                                void *v;

                                v = state->neigh_sub_iter(state, n, &fakep);
                                if (!v)
                                        goto next;
                        }
                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
                                break;
                        if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
                                break;
next:
                        n = rcu_dereference(n->next);
                }

                if (n)
                        break;
        }
        state->bucket = bucket;

        return n;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
                                        struct neighbour *n,
                                        loff_t *pos)
{
        struct neigh_seq_state *state = seq->private;
        struct net *net = seq_file_net(seq);
        struct neigh_hash_table *nht = state->nht;

        if (state->neigh_sub_iter) {
                void *v = state->neigh_sub_iter(state, n, pos);
                if (v)
                        return n;
        }
        n = rcu_dereference(n->next);

        while (1) {
                while (n) {
                        if (!net_eq(dev_net(n->dev), net))
                                goto next;
                        if (state->neigh_sub_iter) {
                                void *v = state->neigh_sub_iter(state, n, pos);
                                if (v)
                                        return n;
                                goto next;
                        }
                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
                                break;

                        if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
                                break;
next:
                        n = rcu_dereference(n->next);
                }

                if (n)
                        break;

                if (++state->bucket >= (1 << nht->hash_shift))
                        break;

                n = rcu_dereference(nht->hash_buckets[state->bucket]);
        }

        if (n && pos)
                --(*pos);
        return n;
}

static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
{
        struct neighbour *n = neigh_get_first(seq);

        if (n) {
                --(*pos);
                while (*pos) {
                        n = neigh_get_next(seq, n, pos);
                        if (!n)
                                break;
                }
        }
        return *pos ? NULL : n;
}

static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
        struct neigh_seq_state *state = seq->private;
        struct net *net = seq_file_net(seq);
        struct neigh_table *tbl = state->tbl;
        struct pneigh_entry *pn = NULL;
        int bucket;

        state->flags |= NEIGH_SEQ_IS_PNEIGH;
        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
                pn = tbl->phash_buckets[bucket];
                while (pn && !net_eq(pneigh_net(pn), net))
                        pn = pn->next;
                if (pn)
                        break;
        }
        state->bucket = bucket;

        return pn;
}

static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
                                            struct pneigh_entry *pn,
                                            loff_t *pos)
{
        struct neigh_seq_state *state = seq->private;
        struct net *net = seq_file_net(seq);
        struct neigh_table *tbl = state->tbl;

        do {
                pn = pn->next;
        } while (pn && !net_eq(pneigh_net(pn), net));

        while (!pn) {
                if (++state->bucket > PNEIGH_HASHMASK)
                        break;
                pn = tbl->phash_buckets[state->bucket];
                while (pn && !net_eq(pneigh_net(pn), net))
                        pn = pn->next;
                if (pn)
                        break;
        }

        if (pn && pos)
                --(*pos);

        return pn;
}

static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
{
        struct pneigh_entry *pn = pneigh_get_first(seq);

        if (pn) {
                --(*pos);
                while (*pos) {
                        pn = pneigh_get_next(seq, pn, pos);
                        if (!pn)
                                break;
                }
        }
        return *pos ? NULL : pn;
}

static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
{
        struct neigh_seq_state *state = seq->private;
        void *rc;
        loff_t idxpos = *pos;

        rc = neigh_get_idx(seq, &idxpos);
        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
                rc = pneigh_get_idx(seq, &idxpos);

        return rc;
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
        __acquires(tbl->lock)
        __acquires(rcu)
{
        struct neigh_seq_state *state = seq->private;

        state->tbl = tbl;
        state->bucket = 0;
        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

        rcu_read_lock();
        state->nht = rcu_dereference(tbl->nht);
        read_lock_bh(&tbl->lock);

        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL(neigh_seq_start);

void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct neigh_seq_state *state;
        void *rc;

        if (v == SEQ_START_TOKEN) {
                rc = neigh_get_first(seq);
                goto out;
        }

        state = seq->private;
        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
                rc = neigh_get_next(seq, v, NULL);
                if (rc)
                        goto out;
                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
                        rc = pneigh_get_first(seq);
        } else {
                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
                rc = pneigh_get_next(seq, v, NULL);
        }
out:
        ++(*pos);
        return rc;
}
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
        __releases(tbl->lock)
        __releases(rcu)
{
        struct neigh_seq_state *state = seq->private;
        struct neigh_table *tbl = state->tbl;

        read_unlock_bh(&tbl->lock);
        rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);

/* statistics via seq_file */

static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
        struct neigh_table *tbl = pde_data(file_inode(seq->file));
        int cpu;

        if (*pos == 0)
                return SEQ_START_TOKEN;

        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
                return per_cpu_ptr(tbl->stats, cpu);
        }
        return NULL;
}

static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct neigh_table *tbl = pde_data(file_inode(seq->file));
        int cpu;

        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
                return per_cpu_ptr(tbl->stats, cpu);
        }
        (*pos)++;
        return NULL;
}

static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{

}

static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
        struct neigh_table *tbl = pde_data(file_inode(seq->file));
        struct neigh_statistics *st = v;

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
                return 0;
        }

        seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
                        "%08lx         %08lx         %08lx         "
                        "%08lx       %08lx            %08lx\n",
                   atomic_read(&tbl->entries),

                   st->allocs,
                   st->destroys,
                   st->hash_grows,

                   st->lookups,
                   st->hits,

                   st->res_failed,

                   st->rcv_probes_mcast,
                   st->rcv_probes_ucast,

                   st->periodic_gc_runs,
                   st->forced_gc_runs,
                   st->unres_discards,
                   st->table_fulls
                   );

        return 0;
}

static const struct seq_operations neigh_stat_seq_ops = {
        .start        = neigh_stat_seq_start,
        .next        = neigh_stat_seq_next,
        .stop        = neigh_stat_seq_stop,
        .show        = neigh_stat_seq_show,
};
#endif /* CONFIG_PROC_FS */

static void __neigh_notify(struct neighbour *n, int type, int flags,
                           u32 pid)
{
        struct net *net = dev_net(n->dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
        if (skb == NULL)
                goto errout;

        err = neigh_fill_info(skb, n, pid, 0, type, flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}

void neigh_app_ns(struct neighbour *n)
{
        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);

#ifdef CONFIG_SYSCTL
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);

static int proc_unres_qlen(struct ctl_table *ctl, int write,
                           void *buffer, size_t *lenp, loff_t *ppos)
{
        int size, ret;
        struct ctl_table tmp = *ctl;

        tmp.extra1 = SYSCTL_ZERO;
        tmp.extra2 = &unres_qlen_max;
        tmp.data = &size;

        size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

        if (write && !ret)
                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
        return ret;
}

static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
                                  int index)
{
        struct net_device *dev;
        int family = neigh_parms_family(p);

        rcu_read_lock();
        for_each_netdev_rcu(net, dev) {
                struct neigh_parms *dst_p =
                                neigh_get_dev_parms_rcu(dev, family);

                if (dst_p && !test_bit(index, dst_p->data_state))
                        dst_p->data[index] = p->data[index];
        }
        rcu_read_unlock();
}

static void neigh_proc_update(struct ctl_table *ctl, int write)
{
        struct net_device *dev = ctl->extra1;
        struct neigh_parms *p = ctl->extra2;
        struct net *net = neigh_parms_net(p);
        int index = (int *) ctl->data - p->data;

        if (!write)
                return;

        set_bit(index, p->data_state);
        if (index == NEIGH_VAR_DELAY_PROBE_TIME)
                call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
        if (!dev) /* NULL dev means this is default value */
                neigh_copy_dflt_parms(net, p, index);
}

static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
                                           void *buffer, size_t *lenp,
                                           loff_t *ppos)
{
        struct ctl_table tmp = *ctl;
        int ret;

        tmp.extra1 = SYSCTL_ZERO;
        tmp.extra2 = SYSCTL_INT_MAX;

        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
        neigh_proc_update(ctl, write);
        return ret;
}

static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
                                                   void *buffer, size_t *lenp, loff_t *ppos)
{
        struct ctl_table tmp = *ctl;
        int ret;

        int min = msecs_to_jiffies(1);

        tmp.extra1 = &min;
        tmp.extra2 = NULL;

        ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
        neigh_proc_update(ctl, write);
        return ret;
}

int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
                        size_t *lenp, loff_t *ppos)
{
        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);

        neigh_proc_update(ctl, write);
        return ret;
}
EXPORT_SYMBOL(neigh_proc_dointvec);

int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
                                size_t *lenp, loff_t *ppos)
{
        int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);

        neigh_proc_update(ctl, write);
        return ret;
}
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);

static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
                                              void *buffer, size_t *lenp,
                                              loff_t *ppos)
{
        int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);

        neigh_proc_update(ctl, write);
        return ret;
}

int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
                                   void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);

        neigh_proc_update(ctl, write);
        return ret;
}
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);

static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
                                          void *buffer, size_t *lenp,
                                          loff_t *ppos)
{
        int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);

        neigh_proc_update(ctl, write);
        return ret;
}

static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
                                          void *buffer, size_t *lenp,
                                          loff_t *ppos)
{
        struct neigh_parms *p = ctl->extra2;
        int ret;

        if (strcmp(ctl->procname, "base_reachable_time") == 0)
                ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
        else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
                ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
        else
                ret = -1;

        if (write && ret == 0) {
                /* update reachable_time as well, otherwise, the change will
                 * only be effective after the next time neigh_periodic_work
                 * decides to recompute it
                 */
                p->reachable_time =
                        neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
        }
        return ret;
}

#define NEIGH_PARMS_DATA_OFFSET(index)        \
        (&((struct neigh_parms *) 0)->data[index])

#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
        [NEIGH_VAR_ ## attr] = { \
                .procname        = name, \
                .data                = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
                .maxlen                = sizeof(int), \
                .mode                = mval, \
                .proc_handler        = proc, \
        }

#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)

#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)

#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)

#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)

#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)

#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
        NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)

static struct neigh_sysctl_table {
        struct ctl_table_header *sysctl_header;
        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
} neigh_sysctl_template __read_mostly = {
        .neigh_vars = {
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
                NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
                NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
                NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
                                                       "interval_probe_time_ms"),
                NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
                NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
                NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
                NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
                [NEIGH_VAR_GC_INTERVAL] = {
                        .procname        = "gc_interval",
                        .maxlen                = sizeof(int),
                        .mode                = 0644,
                        .proc_handler        = proc_dointvec_jiffies,
                },
                [NEIGH_VAR_GC_THRESH1] = {
                        .procname        = "gc_thresh1",
                        .maxlen                = sizeof(int),
                        .mode                = 0644,
                        .extra1                = SYSCTL_ZERO,
                        .extra2                = SYSCTL_INT_MAX,
                        .proc_handler        = proc_dointvec_minmax,
                },
                [NEIGH_VAR_GC_THRESH2] = {
                        .procname        = "gc_thresh2",
                        .maxlen                = sizeof(int),
                        .mode                = 0644,
                        .extra1                = SYSCTL_ZERO,
                        .extra2                = SYSCTL_INT_MAX,
                        .proc_handler        = proc_dointvec_minmax,
                },
                [NEIGH_VAR_GC_THRESH3] = {
                        .procname        = "gc_thresh3",
                        .maxlen                = sizeof(int),
                        .mode                = 0644,
                        .extra1                = SYSCTL_ZERO,
                        .extra2                = SYSCTL_INT_MAX,
                        .proc_handler        = proc_dointvec_minmax,
                },
                {},
        },
};

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
                          proc_handler *handler)
{
        int i;
        struct neigh_sysctl_table *t;
        const char *dev_name_source;
        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
        char *p_name;
        size_t neigh_vars_size;

        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
        if (!t)
                goto err;

        for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
                t->neigh_vars[i].data += (long) p;
                t->neigh_vars[i].extra1 = dev;
                t->neigh_vars[i].extra2 = p;
        }

        neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
        if (dev) {
                dev_name_source = dev->name;
                /* Terminate the table early */
                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
                neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
        } else {
                struct neigh_table *tbl = p->tbl;
                dev_name_source = "default";
                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
        }

        if (handler) {
                /* RetransTime */
                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
                /* ReachableTime */
                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
                /* RetransTime (in milliseconds)*/
                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
                /* ReachableTime (in milliseconds) */
                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
        } else {
                /* Those handlers will update p->reachable_time after
                 * base_reachable_time(_ms) is set to ensure the new timer starts being
                 * applied after the next neighbour update instead of waiting for
                 * neigh_periodic_work to update its value (can be multiple minutes)
                 * So any handler that replaces them should do this as well
                 */
                /* ReachableTime */
                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
                        neigh_proc_base_reachable_time;
                /* ReachableTime (in milliseconds) */
                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
                        neigh_proc_base_reachable_time;
        }

        switch (neigh_parms_family(p)) {
        case AF_INET:
              p_name = "ipv4";
              break;
        case AF_INET6:
              p_name = "ipv6";
              break;
        default:
              BUG();
        }

        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
                p_name, dev_name_source);
        t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
                                                  neigh_path, t->neigh_vars,
                                                  neigh_vars_size);
        if (!t->sysctl_header)
                goto free;

        p->sysctl_table = t;
        return 0;

free:
        kfree(t);
err:
        return -ENOBUFS;
}
EXPORT_SYMBOL(neigh_sysctl_register);

void neigh_sysctl_unregister(struct neigh_parms *p)
{
        if (p->sysctl_table) {
                struct neigh_sysctl_table *t = p->sysctl_table;
                p->sysctl_table = NULL;
                unregister_net_sysctl_table(t->sysctl_header);
                kfree(t);
        }
}
EXPORT_SYMBOL(neigh_sysctl_unregister);

#endif        /* CONFIG_SYSCTL */

static int __init neigh_init(void)
{
        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);

        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
                      0);
        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);

        return 0;
}

subsys_initcall(neigh_init);





















































































































































































































































































































































































































  256 





  256 



































































































































































































































































































  256 


  256 




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  255 

  256 

  255 

  255 











  255 







































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  255 



  255 
  256 

  256 
  256 
  254 


  255 


















  255 










  256 




  256 








  256 





  256 








  253 
  256 
  255 
  256 





  255 


  256 







  256 







  254 











  254 
  255 
  256 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
// SPDX-License-Identifier: GPL-2.0+
/*
 *  Base port operations for 8250/16550-type serial ports
 *
 *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
 *  Split from 8250_core.c, Copyright (C) 2001 Russell King.
 *
 * A note about mapbase / membase
 *
 *  mapbase is the physical address of the IO port.
 *  membase is an 'ioremapped' cookie.
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/console.h>
#include <linux/gpio/consumer.h>
#include <linux/sysrq.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/tty.h>
#include <linux/ratelimit.h>
#include <linux/tty_flip.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
#include <linux/nmi.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
#include <linux/ktime.h>

#include <asm/io.h>
#include <asm/irq.h>

#include "8250.h"

/*
 * Debugging.
 */
#if 0
#define DEBUG_AUTOCONF(fmt...)        printk(fmt)
#else
#define DEBUG_AUTOCONF(fmt...)        do { } while (0)
#endif

/*
 * Here we define the default xmit fifo size used for each type of UART.
 */
static const struct serial8250_config uart_config[] = {
        [PORT_UNKNOWN] = {
                .name                = "unknown",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_8250] = {
                .name                = "8250",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_16450] = {
                .name                = "16450",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_16550] = {
                .name                = "16550",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_16550A] = {
                .name                = "16550A",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO,
        },
        [PORT_CIRRUS] = {
                .name                = "Cirrus",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_16650] = {
                .name                = "ST16650",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
                .flags                = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
        },
        [PORT_16650V2] = {
                .name                = "ST16650V2",
                .fifo_size        = 32,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
                                  UART_FCR_T_TRIG_00,
                .rxtrig_bytes        = {8, 16, 24, 28},
                .flags                = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
        },
        [PORT_16750] = {
                .name                = "TI16750",
                .fifo_size        = 64,
                .tx_loadsz        = 64,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
                                  UART_FCR7_64BYTE,
                .rxtrig_bytes        = {1, 16, 32, 56},
                .flags                = UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE,
        },
        [PORT_STARTECH] = {
                .name                = "Startech",
                .fifo_size        = 1,
                .tx_loadsz        = 1,
        },
        [PORT_16C950] = {
                .name                = "16C950/954",
                .fifo_size        = 128,
                .tx_loadsz        = 128,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01,
                .rxtrig_bytes        = {16, 32, 112, 120},
                /* UART_CAP_EFR breaks billionon CF bluetooth card. */
                .flags                = UART_CAP_FIFO | UART_CAP_SLEEP,
        },
        [PORT_16654] = {
                .name                = "ST16654",
                .fifo_size        = 64,
                .tx_loadsz        = 32,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
                                  UART_FCR_T_TRIG_10,
                .rxtrig_bytes        = {8, 16, 56, 60},
                .flags                = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
        },
        [PORT_16850] = {
                .name                = "XR16850",
                .fifo_size        = 128,
                .tx_loadsz        = 128,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
        },
        [PORT_RSA] = {
                .name                = "RSA",
                .fifo_size        = 2048,
                .tx_loadsz        = 2048,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11,
                .flags                = UART_CAP_FIFO,
        },
        [PORT_NS16550A] = {
                .name                = "NS16550A",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO | UART_NATSEMI,
        },
        [PORT_XSCALE] = {
                .name                = "XScale",
                .fifo_size        = 32,
                .tx_loadsz        = 32,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
        },
        [PORT_OCTEON] = {
                .name                = "OCTEON",
                .fifo_size        = 64,
                .tx_loadsz        = 64,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO,
        },
        [PORT_U6_16550A] = {
                .name                = "U6_16550A",
                .fifo_size        = 64,
                .tx_loadsz        = 64,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
        [PORT_TEGRA] = {
                .name                = "Tegra",
                .fifo_size        = 32,
                .tx_loadsz        = 8,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
                                  UART_FCR_T_TRIG_01,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO | UART_CAP_RTOIE,
        },
        [PORT_XR17D15X] = {
                .name                = "XR17D15X",
                .fifo_size        = 64,
                .tx_loadsz        = 64,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags                = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
                                  UART_CAP_SLEEP,
        },
        [PORT_XR17V35X] = {
                .name                = "XR17V35X",
                .fifo_size        = 256,
                .tx_loadsz        = 256,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 |
                                  UART_FCR_T_TRIG_11,
                .flags                = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
                                  UART_CAP_SLEEP,
        },
        [PORT_LPC3220] = {
                .name                = "LPC3220",
                .fifo_size        = 64,
                .tx_loadsz        = 32,
                .fcr                = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO |
                                  UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00,
                .flags                = UART_CAP_FIFO,
        },
        [PORT_BRCM_TRUMANAGE] = {
                .name                = "TruManage",
                .fifo_size        = 1,
                .tx_loadsz        = 1024,
                .flags                = UART_CAP_HFIFO,
        },
        [PORT_8250_CIR] = {
                .name                = "CIR port"
        },
        [PORT_ALTR_16550_F32] = {
                .name                = "Altera 16550 FIFO32",
                .fifo_size        = 32,
                .tx_loadsz        = 32,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 8, 16, 30},
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
        [PORT_ALTR_16550_F64] = {
                .name                = "Altera 16550 FIFO64",
                .fifo_size        = 64,
                .tx_loadsz        = 64,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 16, 32, 62},
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
        [PORT_ALTR_16550_F128] = {
                .name                = "Altera 16550 FIFO128",
                .fifo_size        = 128,
                .tx_loadsz        = 128,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 32, 64, 126},
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
        /*
         * tx_loadsz is set to 63-bytes instead of 64-bytes to implement
         * workaround of errata A-008006 which states that tx_loadsz should
         * be configured less than Maximum supported fifo bytes.
         */
        [PORT_16550A_FSL64] = {
                .name                = "16550A_FSL64",
                .fifo_size        = 64,
                .tx_loadsz        = 63,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
                                  UART_FCR7_64BYTE,
                .flags                = UART_CAP_FIFO | UART_CAP_NOTEMT,
        },
        [PORT_RT2880] = {
                .name                = "Palmchip BK-3103",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO,
        },
        [PORT_DA830] = {
                .name                = "TI DA8xx/66AK2x",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO |
                                  UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
        [PORT_MTK_BTIF] = {
                .name                = "MediaTek BTIF",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO |
                                  UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT,
                .flags                = UART_CAP_FIFO,
        },
        [PORT_NPCM] = {
                .name                = "Nuvoton 16550",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
                                  UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO,
        },
        [PORT_SUNIX] = {
                .name                = "Sunix",
                .fifo_size        = 128,
                .tx_loadsz        = 128,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .rxtrig_bytes        = {1, 32, 64, 112},
                .flags                = UART_CAP_FIFO | UART_CAP_SLEEP,
        },
        [PORT_ASPEED_VUART] = {
                .name                = "ASPEED VUART",
                .fifo_size        = 16,
                .tx_loadsz        = 16,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
                .rxtrig_bytes        = {1, 4, 8, 14},
                .flags                = UART_CAP_FIFO,
        },
        [PORT_MCHP16550A] = {
                .name           = "MCHP16550A",
                .fifo_size      = 256,
                .tx_loadsz      = 256,
                .fcr            = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01,
                .rxtrig_bytes   = {2, 66, 130, 194},
                .flags          = UART_CAP_FIFO,
        },
        [PORT_BCM7271] = {
                .name                = "Broadcom BCM7271 UART",
                .fifo_size        = 32,
                .tx_loadsz        = 32,
                .fcr                = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01,
                .rxtrig_bytes        = {1, 8, 16, 30},
                .flags                = UART_CAP_FIFO | UART_CAP_AFE,
        },
};

/* Uart divisor latch read */
static u32 default_serial_dl_read(struct uart_8250_port *up)
{
        /* Assign these in pieces to truncate any bits above 7.  */
        unsigned char dll = serial_in(up, UART_DLL);
        unsigned char dlm = serial_in(up, UART_DLM);

        return dll | dlm << 8;
}

/* Uart divisor latch write */
static void default_serial_dl_write(struct uart_8250_port *up, u32 value)
{
        serial_out(up, UART_DLL, value & 0xff);
        serial_out(up, UART_DLM, value >> 8 & 0xff);
}

static unsigned int hub6_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        outb(p->hub6 - 1 + offset, p->iobase);
        return inb(p->iobase + 1);
}

static void hub6_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        outb(p->hub6 - 1 + offset, p->iobase);
        outb(value, p->iobase + 1);
}

static unsigned int mem_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        return readb(p->membase + offset);
}

static void mem_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        writeb(value, p->membase + offset);
}

static void mem16_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        writew(value, p->membase + offset);
}

static unsigned int mem16_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        return readw(p->membase + offset);
}

static void mem32_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        writel(value, p->membase + offset);
}

static unsigned int mem32_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        return readl(p->membase + offset);
}

static void mem32be_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        iowrite32be(value, p->membase + offset);
}

static unsigned int mem32be_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        return ioread32be(p->membase + offset);
}

static unsigned int io_serial_in(struct uart_port *p, int offset)
{
        offset = offset << p->regshift;
        return inb(p->iobase + offset);
}

static void io_serial_out(struct uart_port *p, int offset, int value)
{
        offset = offset << p->regshift;
        outb(value, p->iobase + offset);
}

static int serial8250_default_handle_irq(struct uart_port *port);

static void set_io_from_upio(struct uart_port *p)
{
        struct uart_8250_port *up = up_to_u8250p(p);

        up->dl_read = default_serial_dl_read;
        up->dl_write = default_serial_dl_write;

        switch (p->iotype) {
        case UPIO_HUB6:
                p->serial_in = hub6_serial_in;
                p->serial_out = hub6_serial_out;
                break;

        case UPIO_MEM:
                p->serial_in = mem_serial_in;
                p->serial_out = mem_serial_out;
                break;

        case UPIO_MEM16:
                p->serial_in = mem16_serial_in;
                p->serial_out = mem16_serial_out;
                break;

        case UPIO_MEM32:
                p->serial_in = mem32_serial_in;
                p->serial_out = mem32_serial_out;
                break;

        case UPIO_MEM32BE:
                p->serial_in = mem32be_serial_in;
                p->serial_out = mem32be_serial_out;
                break;

        default:
                p->serial_in = io_serial_in;
                p->serial_out = io_serial_out;
                break;
        }
        /* Remember loaded iotype */
        up->cur_iotype = p->iotype;
        p->handle_irq = serial8250_default_handle_irq;
}

static void
serial_port_out_sync(struct uart_port *p, int offset, int value)
{
        switch (p->iotype) {
        case UPIO_MEM:
        case UPIO_MEM16:
        case UPIO_MEM32:
        case UPIO_MEM32BE:
        case UPIO_AU:
                p->serial_out(p, offset, value);
                p->serial_in(p, UART_LCR);        /* safe, no side-effects */
                break;
        default:
                p->serial_out(p, offset, value);
        }
}

/*
 * FIFO support.
 */
static void serial8250_clear_fifos(struct uart_8250_port *p)
{
        if (p->capabilities & UART_CAP_FIFO) {
                serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO);
                serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO |
                               UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
                serial_out(p, UART_FCR, 0);
        }
}

static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t);
static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t);

void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
{
        serial8250_clear_fifos(p);
        serial_out(p, UART_FCR, p->fcr);
}
EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);

void serial8250_rpm_get(struct uart_8250_port *p)
{
        if (!(p->capabilities & UART_CAP_RPM))
                return;
        pm_runtime_get_sync(p->port.dev);
}
EXPORT_SYMBOL_GPL(serial8250_rpm_get);

void serial8250_rpm_put(struct uart_8250_port *p)
{
        if (!(p->capabilities & UART_CAP_RPM))
                return;
        pm_runtime_mark_last_busy(p->port.dev);
        pm_runtime_put_autosuspend(p->port.dev);
}
EXPORT_SYMBOL_GPL(serial8250_rpm_put);

/**
 *        serial8250_em485_init() - put uart_8250_port into rs485 emulating
 *        @p:        uart_8250_port port instance
 *
 *        The function is used to start rs485 software emulating on the
 *        &struct uart_8250_port* @p. Namely, RTS is switched before/after
 *        transmission. The function is idempotent, so it is safe to call it
 *        multiple times.
 *
 *        The caller MUST enable interrupt on empty shift register before
 *        calling serial8250_em485_init(). This interrupt is not a part of
 *        8250 standard, but implementation defined.
 *
 *        The function is supposed to be called from .rs485_config callback
 *        or from any other callback protected with p->port.lock spinlock.
 *
 *        See also serial8250_em485_destroy()
 *
 *        Return 0 - success, -errno - otherwise
 */
static int serial8250_em485_init(struct uart_8250_port *p)
{
        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&p->port.lock);

        if (p->em485)
                goto deassert_rts;

        p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC);
        if (!p->em485)
                return -ENOMEM;

        hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
                     HRTIMER_MODE_REL);
        hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
                     HRTIMER_MODE_REL);
        p->em485->stop_tx_timer.function = &serial8250_em485_handle_stop_tx;
        p->em485->start_tx_timer.function = &serial8250_em485_handle_start_tx;
        p->em485->port = p;
        p->em485->active_timer = NULL;
        p->em485->tx_stopped = true;

deassert_rts:
        if (p->em485->tx_stopped)
                p->rs485_stop_tx(p);

        return 0;
}

/**
 *        serial8250_em485_destroy() - put uart_8250_port into normal state
 *        @p:        uart_8250_port port instance
 *
 *        The function is used to stop rs485 software emulating on the
 *        &struct uart_8250_port* @p. The function is idempotent, so it is safe to
 *        call it multiple times.
 *
 *        The function is supposed to be called from .rs485_config callback
 *        or from any other callback protected with p->port.lock spinlock.
 *
 *        See also serial8250_em485_init()
 */
void serial8250_em485_destroy(struct uart_8250_port *p)
{
        if (!p->em485)
                return;

        hrtimer_cancel(&p->em485->start_tx_timer);
        hrtimer_cancel(&p->em485->stop_tx_timer);

        kfree(p->em485);
        p->em485 = NULL;
}
EXPORT_SYMBOL_GPL(serial8250_em485_destroy);

struct serial_rs485 serial8250_em485_supported = {
        .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RTS_AFTER_SEND |
                 SER_RS485_TERMINATE_BUS | SER_RS485_RX_DURING_TX,
        .delay_rts_before_send = 1,
        .delay_rts_after_send = 1,
};
EXPORT_SYMBOL_GPL(serial8250_em485_supported);

/**
 * serial8250_em485_config() - generic ->rs485_config() callback
 * @port: uart port
 * @termios: termios structure
 * @rs485: rs485 settings
 *
 * Generic callback usable by 8250 uart drivers to activate rs485 settings
 * if the uart is incapable of driving RTS as a Transmit Enable signal in
 * hardware, relying on software emulation instead.
 */
int serial8250_em485_config(struct uart_port *port, struct ktermios *termios,
                            struct serial_rs485 *rs485)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        /* pick sane settings if the user hasn't */
        if (!!(rs485->flags & SER_RS485_RTS_ON_SEND) ==
            !!(rs485->flags & SER_RS485_RTS_AFTER_SEND)) {
                rs485->flags |= SER_RS485_RTS_ON_SEND;
                rs485->flags &= ~SER_RS485_RTS_AFTER_SEND;
        }

        /*
         * Both serial8250_em485_init() and serial8250_em485_destroy()
         * are idempotent.
         */
        if (rs485->flags & SER_RS485_ENABLED)
                return serial8250_em485_init(up);

        serial8250_em485_destroy(up);
        return 0;
}
EXPORT_SYMBOL_GPL(serial8250_em485_config);

/*
 * These two wrappers ensure that enable_runtime_pm_tx() can be called more than
 * once and disable_runtime_pm_tx() will still disable RPM because the fifo is
 * empty and the HW can idle again.
 */
void serial8250_rpm_get_tx(struct uart_8250_port *p)
{
        unsigned char rpm_active;

        if (!(p->capabilities & UART_CAP_RPM))
                return;

        rpm_active = xchg(&p->rpm_tx_active, 1);
        if (rpm_active)
                return;
        pm_runtime_get_sync(p->port.dev);
}
EXPORT_SYMBOL_GPL(serial8250_rpm_get_tx);

void serial8250_rpm_put_tx(struct uart_8250_port *p)
{
        unsigned char rpm_active;

        if (!(p->capabilities & UART_CAP_RPM))
                return;

        rpm_active = xchg(&p->rpm_tx_active, 0);
        if (!rpm_active)
                return;
        pm_runtime_mark_last_busy(p->port.dev);
        pm_runtime_put_autosuspend(p->port.dev);
}
EXPORT_SYMBOL_GPL(serial8250_rpm_put_tx);

/*
 * IER sleep support.  UARTs which have EFRs need the "extended
 * capability" bit enabled.  Note that on XR16C850s, we need to
 * reset LCR to write to IER.
 */
static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
{
        unsigned char lcr = 0, efr = 0;

        serial8250_rpm_get(p);

        if (p->capabilities & UART_CAP_SLEEP) {
                /* Synchronize UART_IER access against the console. */
                uart_port_lock_irq(&p->port);
                if (p->capabilities & UART_CAP_EFR) {
                        lcr = serial_in(p, UART_LCR);
                        efr = serial_in(p, UART_EFR);
                        serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
                        serial_out(p, UART_EFR, UART_EFR_ECB);
                        serial_out(p, UART_LCR, 0);
                }
                serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
                if (p->capabilities & UART_CAP_EFR) {
                        serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
                        serial_out(p, UART_EFR, efr);
                        serial_out(p, UART_LCR, lcr);
                }
                uart_port_unlock_irq(&p->port);
        }

        serial8250_rpm_put(p);
}

static void serial8250_clear_IER(struct uart_8250_port *up)
{
        if (up->capabilities & UART_CAP_UUE)
                serial_out(up, UART_IER, UART_IER_UUE);
        else
                serial_out(up, UART_IER, 0);
}

#ifdef CONFIG_SERIAL_8250_RSA
/*
 * Attempts to turn on the RSA FIFO.  Returns zero on failure.
 * We set the port uart clock rate if we succeed.
 */
static int __enable_rsa(struct uart_8250_port *up)
{
        unsigned char mode;
        int result;

        mode = serial_in(up, UART_RSA_MSR);
        result = mode & UART_RSA_MSR_FIFO;

        if (!result) {
                serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO);
                mode = serial_in(up, UART_RSA_MSR);
                result = mode & UART_RSA_MSR_FIFO;
        }

        if (result)
                up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16;

        return result;
}

static void enable_rsa(struct uart_8250_port *up)
{
        if (up->port.type == PORT_RSA) {
                if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) {
                        uart_port_lock_irq(&up->port);
                        __enable_rsa(up);
                        uart_port_unlock_irq(&up->port);
                }
                if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16)
                        serial_out(up, UART_RSA_FRR, 0);
        }
}

/*
 * Attempts to turn off the RSA FIFO.  Returns zero on failure.
 * It is unknown why interrupts were disabled in here.  However,
 * the caller is expected to preserve this behaviour by grabbing
 * the spinlock before calling this function.
 */
static void disable_rsa(struct uart_8250_port *up)
{
        unsigned char mode;
        int result;

        if (up->port.type == PORT_RSA &&
            up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) {
                uart_port_lock_irq(&up->port);

                mode = serial_in(up, UART_RSA_MSR);
                result = !(mode & UART_RSA_MSR_FIFO);

                if (!result) {
                        serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO);
                        mode = serial_in(up, UART_RSA_MSR);
                        result = !(mode & UART_RSA_MSR_FIFO);
                }

                if (result)
                        up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16;
                uart_port_unlock_irq(&up->port);
        }
}
#endif /* CONFIG_SERIAL_8250_RSA */

/*
 * This is a quickie test to see how big the FIFO is.
 * It doesn't work at all the time, more's the pity.
 */
static int size_fifo(struct uart_8250_port *up)
{
        unsigned char old_fcr, old_mcr, old_lcr;
        u32 old_dl;
        int count;

        old_lcr = serial_in(up, UART_LCR);
        serial_out(up, UART_LCR, 0);
        old_fcr = serial_in(up, UART_FCR);
        old_mcr = serial8250_in_MCR(up);
        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
                    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
        serial8250_out_MCR(up, UART_MCR_LOOP);
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
        old_dl = serial_dl_read(up);
        serial_dl_write(up, 0x0001);
        serial_out(up, UART_LCR, UART_LCR_WLEN8);
        for (count = 0; count < 256; count++)
                serial_out(up, UART_TX, count);
        mdelay(20);/* FIXME - schedule_timeout */
        for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) &&
             (count < 256); count++)
                serial_in(up, UART_RX);
        serial_out(up, UART_FCR, old_fcr);
        serial8250_out_MCR(up, old_mcr);
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
        serial_dl_write(up, old_dl);
        serial_out(up, UART_LCR, old_lcr);

        return count;
}

/*
 * Read UART ID using the divisor method - set DLL and DLM to zero
 * and the revision will be in DLL and device type in DLM.  We
 * preserve the device state across this.
 */
static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
{
        unsigned char old_lcr;
        unsigned int id, old_dl;

        old_lcr = serial_in(p, UART_LCR);
        serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A);
        old_dl = serial_dl_read(p);
        serial_dl_write(p, 0);
        id = serial_dl_read(p);
        serial_dl_write(p, old_dl);

        serial_out(p, UART_LCR, old_lcr);

        return id;
}

/*
 * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's.
 * When this function is called we know it is at least a StarTech
 * 16650 V2, but it might be one of several StarTech UARTs, or one of
 * its clones.  (We treat the broken original StarTech 16650 V1 as a
 * 16550, and why not?  Startech doesn't seem to even acknowledge its
 * existence.)
 *
 * What evil have men's minds wrought...
 */
static void autoconfig_has_efr(struct uart_8250_port *up)
{
        unsigned int id1, id2, id3, rev;

        /*
         * Everything with an EFR has SLEEP
         */
        up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;

        /*
         * First we check to see if it's an Oxford Semiconductor UART.
         *
         * If we have to do this here because some non-National
         * Semiconductor clone chips lock up if you try writing to the
         * LSR register (which serial_icr_read does)
         */

        /*
         * Check for Oxford Semiconductor 16C950.
         *
         * EFR [4] must be set else this test fails.
         *
         * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca)
         * claims that it's needed for 952 dual UART's (which are not
         * recommended for new designs).
         */
        up->acr = 0;
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
        serial_out(up, UART_EFR, UART_EFR_ECB);
        serial_out(up, UART_LCR, 0x00);
        id1 = serial_icr_read(up, UART_ID1);
        id2 = serial_icr_read(up, UART_ID2);
        id3 = serial_icr_read(up, UART_ID3);
        rev = serial_icr_read(up, UART_REV);

        DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev);

        if (id1 == 0x16 && id2 == 0xC9 &&
            (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) {
                up->port.type = PORT_16C950;

                /*
                 * Enable work around for the Oxford Semiconductor 952 rev B
                 * chip which causes it to seriously miscalculate baud rates
                 * when DLL is 0.
                 */
                if (id3 == 0x52 && rev == 0x01)
                        up->bugs |= UART_BUG_QUOT;
                return;
        }

        /*
         * We check for a XR16C850 by setting DLL and DLM to 0, and then
         * reading back DLL and DLM.  The chip type depends on the DLM
         * value read back:
         *  0x10 - XR16C850 and the DLL contains the chip revision.
         *  0x12 - XR16C2850.
         *  0x14 - XR16C854.
         */
        id1 = autoconfig_read_divisor_id(up);
        DEBUG_AUTOCONF("850id=%04x ", id1);

        id2 = id1 >> 8;
        if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) {
                up->port.type = PORT_16850;
                return;
        }

        /*
         * It wasn't an XR16C850.
         *
         * We distinguish between the '654 and the '650 by counting
         * how many bytes are in the FIFO.  I'm using this for now,
         * since that's the technique that was sent to me in the
         * serial driver update, but I'm not convinced this works.
         * I've had problems doing this in the past.  -TYT
         */
        if (size_fifo(up) == 64)
                up->port.type = PORT_16654;
        else
                up->port.type = PORT_16650V2;
}

/*
 * We detected a chip without a FIFO.  Only two fall into
 * this category - the original 8250 and the 16450.  The
 * 16450 has a scratch register (accessible with LCR=0)
 */
static void autoconfig_8250(struct uart_8250_port *up)
{
        unsigned char scratch, status1, status2;

        up->port.type = PORT_8250;

        scratch = serial_in(up, UART_SCR);
        serial_out(up, UART_SCR, 0xa5);
        status1 = serial_in(up, UART_SCR);
        serial_out(up, UART_SCR, 0x5a);
        status2 = serial_in(up, UART_SCR);
        serial_out(up, UART_SCR, scratch);

        if (status1 == 0xa5 && status2 == 0x5a)
                up->port.type = PORT_16450;
}

static int broken_efr(struct uart_8250_port *up)
{
        /*
         * Exar ST16C2550 "A2" devices incorrectly detect as
         * having an EFR, and report an ID of 0x0201.  See
         * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html
         */
        if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16)
                return 1;

        return 0;
}

/*
 * We know that the chip has FIFOs.  Does it have an EFR?  The
 * EFR is located in the same register position as the IIR and
 * we know the top two bits of the IIR are currently set.  The
 * EFR should contain zero.  Try to read the EFR.
 */
static void autoconfig_16550a(struct uart_8250_port *up)
{
        unsigned char status1, status2;
        unsigned int iersave;

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&up->port.lock);

        up->port.type = PORT_16550A;
        up->capabilities |= UART_CAP_FIFO;

        if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) &&
            !(up->port.flags & UPF_FULL_PROBE))
                return;

        /*
         * Check for presence of the EFR when DLAB is set.
         * Only ST16C650V1 UARTs pass this test.
         */
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
        if (serial_in(up, UART_EFR) == 0) {
                serial_out(up, UART_EFR, 0xA8);
                if (serial_in(up, UART_EFR) != 0) {
                        DEBUG_AUTOCONF("EFRv1 ");
                        up->port.type = PORT_16650;
                        up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
                } else {
                        serial_out(up, UART_LCR, 0);
                        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
                                   UART_FCR7_64BYTE);
                        status1 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750;
                        serial_out(up, UART_FCR, 0);
                        serial_out(up, UART_LCR, 0);

                        if (status1 == UART_IIR_FIFO_ENABLED_16750)
                                up->port.type = PORT_16550A_FSL64;
                        else
                                DEBUG_AUTOCONF("Motorola 8xxx DUART ");
                }
                serial_out(up, UART_EFR, 0);
                return;
        }

        /*
         * Maybe it requires 0xbf to be written to the LCR.
         * (other ST16C650V2 UARTs, TI16C752A, etc)
         */
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
        if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) {
                DEBUG_AUTOCONF("EFRv2 ");
                autoconfig_has_efr(up);
                return;
        }

        /*
         * Check for a National Semiconductor SuperIO chip.
         * Attempt to switch to bank 2, read the value of the LOOP bit
         * from EXCR1. Switch back to bank 0, change it in MCR. Then
         * switch back to bank 2, read it from EXCR1 again and check
         * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2
         */
        serial_out(up, UART_LCR, 0);
        status1 = serial8250_in_MCR(up);
        serial_out(up, UART_LCR, 0xE0);
        status2 = serial_in(up, 0x02); /* EXCR1 */

        if (!((status2 ^ status1) & UART_MCR_LOOP)) {
                serial_out(up, UART_LCR, 0);
                serial8250_out_MCR(up, status1 ^ UART_MCR_LOOP);
                serial_out(up, UART_LCR, 0xE0);
                status2 = serial_in(up, 0x02); /* EXCR1 */
                serial_out(up, UART_LCR, 0);
                serial8250_out_MCR(up, status1);

                if ((status2 ^ status1) & UART_MCR_LOOP) {
                        unsigned short quot;

                        serial_out(up, UART_LCR, 0xE0);

                        quot = serial_dl_read(up);
                        quot <<= 3;

                        if (ns16550a_goto_highspeed(up))
                                serial_dl_write(up, quot);

                        serial_out(up, UART_LCR, 0);

                        up->port.uartclk = 921600*16;
                        up->port.type = PORT_NS16550A;
                        up->capabilities |= UART_NATSEMI;
                        return;
                }
        }

        /*
         * No EFR.  Try to detect a TI16750, which only sets bit 5 of
         * the IIR when 64 byte FIFO mode is enabled when DLAB is set.
         * Try setting it with and without DLAB set.  Cheap clones
         * set bit 5 without DLAB set.
         */
        serial_out(up, UART_LCR, 0);
        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
        status1 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750;
        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);

        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
        status2 = serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED_16750;
        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);

        serial_out(up, UART_LCR, 0);

        DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2);

        if (status1 == UART_IIR_FIFO_ENABLED_16550A &&
            status2 == UART_IIR_FIFO_ENABLED_16750) {
                up->port.type = PORT_16750;
                up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP;
                return;
        }

        /*
         * Try writing and reading the UART_IER_UUE bit (b6).
         * If it works, this is probably one of the Xscale platform's
         * internal UARTs.
         * We're going to explicitly set the UUE bit to 0 before
         * trying to write and read a 1 just to make sure it's not
         * already a 1 and maybe locked there before we even start.
         */
        iersave = serial_in(up, UART_IER);
        serial_out(up, UART_IER, iersave & ~UART_IER_UUE);
        if (!(serial_in(up, UART_IER) & UART_IER_UUE)) {
                /*
                 * OK it's in a known zero state, try writing and reading
                 * without disturbing the current state of the other bits.
                 */
                serial_out(up, UART_IER, iersave | UART_IER_UUE);
                if (serial_in(up, UART_IER) & UART_IER_UUE) {
                        /*
                         * It's an Xscale.
                         * We'll leave the UART_IER_UUE bit set to 1 (enabled).
                         */
                        DEBUG_AUTOCONF("Xscale ");
                        up->port.type = PORT_XSCALE;
                        up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE;
                        return;
                }
        } else {
                /*
                 * If we got here we couldn't force the IER_UUE bit to 0.
                 * Log it and continue.
                 */
                DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 ");
        }
        serial_out(up, UART_IER, iersave);

        /*
         * We distinguish between 16550A and U6 16550A by counting
         * how many bytes are in the FIFO.
         */
        if (up->port.type == PORT_16550A && size_fifo(up) == 64) {
                up->port.type = PORT_U6_16550A;
                up->capabilities |= UART_CAP_AFE;
        }
}

/*
 * This routine is called by rs_init() to initialize a specific serial
 * port.  It determines what type of UART chip this serial port is
 * using: 8250, 16450, 16550, 16550A.  The important question is
 * whether or not this UART is a 16550A or not, since this will
 * determine whether or not we can use its FIFO features or not.
 */
static void autoconfig(struct uart_8250_port *up)
{
        unsigned char status1, scratch, scratch2, scratch3;
        unsigned char save_lcr, save_mcr;
        struct uart_port *port = &up->port;
        unsigned long flags;
        unsigned int old_capabilities;

        if (!port->iobase && !port->mapbase && !port->membase)
                return;

        DEBUG_AUTOCONF("%s: autoconf (0x%04lx, 0x%p): ",
                       port->name, port->iobase, port->membase);

        /*
         * We really do need global IRQs disabled here - we're going to
         * be frobbing the chips IRQ enable register to see if it exists.
         *
         * Synchronize UART_IER access against the console.
         */
        uart_port_lock_irqsave(port, &flags);

        up->capabilities = 0;
        up->bugs = 0;

        if (!(port->flags & UPF_BUGGY_UART)) {
                /*
                 * Do a simple existence test first; if we fail this,
                 * there's no point trying anything else.
                 *
                 * 0x80 is used as a nonsense port to prevent against
                 * false positives due to ISA bus float.  The
                 * assumption is that 0x80 is a non-existent port;
                 * which should be safe since include/asm/io.h also
                 * makes this assumption.
                 *
                 * Note: this is safe as long as MCR bit 4 is clear
                 * and the device is in "PC" mode.
                 */
                scratch = serial_in(up, UART_IER);
                serial_out(up, UART_IER, 0);
#ifdef __i386__
                outb(0xff, 0x080);
#endif
                /*
                 * Mask out IER[7:4] bits for test as some UARTs (e.g. TL
                 * 16C754B) allow only to modify them if an EFR bit is set.
                 */
                scratch2 = serial_in(up, UART_IER) & UART_IER_ALL_INTR;
                serial_out(up, UART_IER, UART_IER_ALL_INTR);
#ifdef __i386__
                outb(0, 0x080);
#endif
                scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR;
                serial_out(up, UART_IER, scratch);
                if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) {
                        /*
                         * We failed; there's nothing here
                         */
                        uart_port_unlock_irqrestore(port, flags);
                        DEBUG_AUTOCONF("IER test failed (%02x, %02x) ",
                                       scratch2, scratch3);
                        goto out;
                }
        }

        save_mcr = serial8250_in_MCR(up);
        save_lcr = serial_in(up, UART_LCR);

        /*
         * Check to see if a UART is really there.  Certain broken
         * internal modems based on the Rockwell chipset fail this
         * test, because they apparently don't implement the loopback
         * test mode.  So this test is skipped on the COM 1 through
         * COM 4 ports.  This *should* be safe, since no board
         * manufacturer would be stupid enough to design a board
         * that conflicts with COM 1-4 --- we hope!
         */
        if (!(port->flags & UPF_SKIP_TEST)) {
                serial8250_out_MCR(up, UART_MCR_LOOP | UART_MCR_OUT2 | UART_MCR_RTS);
                status1 = serial_in(up, UART_MSR) & UART_MSR_STATUS_BITS;
                serial8250_out_MCR(up, save_mcr);
                if (status1 != (UART_MSR_DCD | UART_MSR_CTS)) {
                        uart_port_unlock_irqrestore(port, flags);
                        DEBUG_AUTOCONF("LOOP test failed (%02x) ",
                                       status1);
                        goto out;
                }
        }

        /*
         * We're pretty sure there's a port here.  Lets find out what
         * type of port it is.  The IIR top two bits allows us to find
         * out if it's 8250 or 16450, 16550, 16550A or later.  This
         * determines what we test for next.
         *
         * We also initialise the EFR (if any) to zero for later.  The
         * EFR occupies the same register location as the FCR and IIR.
         */
        serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
        serial_out(up, UART_EFR, 0);
        serial_out(up, UART_LCR, 0);

        serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);

        switch (serial_in(up, UART_IIR) & UART_IIR_FIFO_ENABLED) {
        case UART_IIR_FIFO_ENABLED_8250:
                autoconfig_8250(up);
                break;
        case UART_IIR_FIFO_ENABLED_16550:
                port->type = PORT_16550;
                break;
        case UART_IIR_FIFO_ENABLED_16550A:
                autoconfig_16550a(up);
                break;
        default:
                port->type = PORT_UNKNOWN;
                break;
        }

#ifdef CONFIG_SERIAL_8250_RSA
        /*
         * Only probe for RSA ports if we got the region.
         */
        if (port->type == PORT_16550A && up->probe & UART_PROBE_RSA &&
            __enable_rsa(up))
                port->type = PORT_RSA;
#endif

        serial_out(up, UART_LCR, save_lcr);

        port->fifosize = uart_config[up->port.type].fifo_size;
        old_capabilities = up->capabilities;
        up->capabilities = uart_config[port->type].flags;
        up->tx_loadsz = uart_config[port->type].tx_loadsz;

        if (port->type == PORT_UNKNOWN)
                goto out_unlock;

        /*
         * Reset the UART.
         */
#ifdef CONFIG_SERIAL_8250_RSA
        if (port->type == PORT_RSA)
                serial_out(up, UART_RSA_FRR, 0);
#endif
        serial8250_out_MCR(up, save_mcr);
        serial8250_clear_fifos(up);
        serial_in(up, UART_RX);
        serial8250_clear_IER(up);

out_unlock:
        uart_port_unlock_irqrestore(port, flags);

        /*
         * Check if the device is a Fintek F81216A
         */
        if (port->type == PORT_16550A && port->iotype == UPIO_PORT)
                fintek_8250_probe(up);

        if (up->capabilities != old_capabilities) {
                dev_warn(port->dev, "detected caps %08x should be %08x\n",
                         old_capabilities, up->capabilities);
        }
out:
        DEBUG_AUTOCONF("iir=%d ", scratch);
        DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name);
}

static void autoconfig_irq(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;
        unsigned char save_mcr, save_ier;
        unsigned char save_ICP = 0;
        unsigned int ICP = 0;
        unsigned long irqs;
        int irq;

        if (port->flags & UPF_FOURPORT) {
                ICP = (port->iobase & 0xfe0) | 0x1f;
                save_ICP = inb_p(ICP);
                outb_p(0x80, ICP);
                inb_p(ICP);
        }

        /* forget possible initially masked and pending IRQ */
        probe_irq_off(probe_irq_on());
        save_mcr = serial8250_in_MCR(up);
        /* Synchronize UART_IER access against the console. */
        uart_port_lock_irq(port);
        save_ier = serial_in(up, UART_IER);
        uart_port_unlock_irq(port);
        serial8250_out_MCR(up, UART_MCR_OUT1 | UART_MCR_OUT2);

        irqs = probe_irq_on();
        serial8250_out_MCR(up, 0);
        udelay(10);
        if (port->flags & UPF_FOURPORT) {
                serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
        } else {
                serial8250_out_MCR(up,
                        UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2);
        }
        /* Synchronize UART_IER access against the console. */
        uart_port_lock_irq(port);
        serial_out(up, UART_IER, UART_IER_ALL_INTR);
        uart_port_unlock_irq(port);
        serial_in(up, UART_LSR);
        serial_in(up, UART_RX);
        serial_in(up, UART_IIR);
        serial_in(up, UART_MSR);
        serial_out(up, UART_TX, 0xFF);
        udelay(20);
        irq = probe_irq_off(irqs);

        serial8250_out_MCR(up, save_mcr);
        /* Synchronize UART_IER access against the console. */
        uart_port_lock_irq(port);
        serial_out(up, UART_IER, save_ier);
        uart_port_unlock_irq(port);

        if (port->flags & UPF_FOURPORT)
                outb_p(save_ICP, ICP);

        port->irq = (irq > 0) ? irq : 0;
}

static void serial8250_stop_rx(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&port->lock);

        serial8250_rpm_get(up);

        up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
        up->port.read_status_mask &= ~UART_LSR_DR;
        serial_port_out(port, UART_IER, up->ier);

        serial8250_rpm_put(up);
}

/**
 * serial8250_em485_stop_tx() - generic ->rs485_stop_tx() callback
 * @p: uart 8250 port
 *
 * Generic callback usable by 8250 uart drivers to stop rs485 transmission.
 */
void serial8250_em485_stop_tx(struct uart_8250_port *p)
{
        unsigned char mcr = serial8250_in_MCR(p);

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&p->port.lock);

        if (p->port.rs485.flags & SER_RS485_RTS_AFTER_SEND)
                mcr |= UART_MCR_RTS;
        else
                mcr &= ~UART_MCR_RTS;
        serial8250_out_MCR(p, mcr);

        /*
         * Empty the RX FIFO, we are not interested in anything
         * received during the half-duplex transmission.
         * Enable previously disabled RX interrupts.
         */
        if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
                serial8250_clear_and_reinit_fifos(p);

                p->ier |= UART_IER_RLSI | UART_IER_RDI;
                serial_port_out(&p->port, UART_IER, p->ier);
        }
}
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);

static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t)
{
        struct uart_8250_em485 *em485 = container_of(t, struct uart_8250_em485,
                        stop_tx_timer);
        struct uart_8250_port *p = em485->port;
        unsigned long flags;

        serial8250_rpm_get(p);
        uart_port_lock_irqsave(&p->port, &flags);
        if (em485->active_timer == &em485->stop_tx_timer) {
                p->rs485_stop_tx(p);
                em485->active_timer = NULL;
                em485->tx_stopped = true;
        }
        uart_port_unlock_irqrestore(&p->port, flags);
        serial8250_rpm_put(p);

        return HRTIMER_NORESTART;
}

static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec)
{
        hrtimer_start(hrt, ms_to_ktime(msec), HRTIMER_MODE_REL);
}

static void __stop_tx_rs485(struct uart_8250_port *p, u64 stop_delay)
{
        struct uart_8250_em485 *em485 = p->em485;

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&p->port.lock);

        stop_delay += (u64)p->port.rs485.delay_rts_after_send * NSEC_PER_MSEC;

        /*
         * rs485_stop_tx() is going to set RTS according to config
         * AND flush RX FIFO if required.
         */
        if (stop_delay > 0) {
                em485->active_timer = &em485->stop_tx_timer;
                hrtimer_start(&em485->stop_tx_timer, ns_to_ktime(stop_delay), HRTIMER_MODE_REL);
        } else {
                p->rs485_stop_tx(p);
                em485->active_timer = NULL;
                em485->tx_stopped = true;
        }
}

static inline void __stop_tx(struct uart_8250_port *p)
{
        struct uart_8250_em485 *em485 = p->em485;

        if (em485) {
                u16 lsr = serial_lsr_in(p);
                u64 stop_delay = 0;

                if (!(lsr & UART_LSR_THRE))
                        return;
                /*
                 * To provide required timing and allow FIFO transfer,
                 * __stop_tx_rs485() must be called only when both FIFO and
                 * shift register are empty. The device driver should either
                 * enable interrupt on TEMT or set UART_CAP_NOTEMT that will
                 * enlarge stop_tx_timer by the tx time of one frame to cover
                 * for emptying of the shift register.
                 */
                if (!(lsr & UART_LSR_TEMT)) {
                        if (!(p->capabilities & UART_CAP_NOTEMT))
                                return;
                        /*
                         * RTS might get deasserted too early with the normal
                         * frame timing formula. It seems to suggest THRE might
                         * get asserted already during tx of the stop bit
                         * rather than after it is fully sent.
                         * Roughly estimate 1 extra bit here with / 7.
                         */
                        stop_delay = p->port.frame_time + DIV_ROUND_UP(p->port.frame_time, 7);
                }

                __stop_tx_rs485(p, stop_delay);
        }

        if (serial8250_clear_THRI(p))
                serial8250_rpm_put_tx(p);
}

static void serial8250_stop_tx(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        serial8250_rpm_get(up);
        __stop_tx(up);

        /*
         * We really want to stop the transmitter from sending.
         */
        if (port->type == PORT_16C950) {
                up->acr |= UART_ACR_TXDIS;
                serial_icr_write(up, UART_ACR, up->acr);
        }
        serial8250_rpm_put(up);
}

static inline void __start_tx(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        if (up->dma && !up->dma->tx_dma(up))
                return;

        if (serial8250_set_THRI(up)) {
                if (up->bugs & UART_BUG_TXEN) {
                        u16 lsr = serial_lsr_in(up);

                        if (lsr & UART_LSR_THRE)
                                serial8250_tx_chars(up);
                }
        }

        /*
         * Re-enable the transmitter if we disabled it.
         */
        if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) {
                up->acr &= ~UART_ACR_TXDIS;
                serial_icr_write(up, UART_ACR, up->acr);
        }
}

/**
 * serial8250_em485_start_tx() - generic ->rs485_start_tx() callback
 * @up: uart 8250 port
 *
 * Generic callback usable by 8250 uart drivers to start rs485 transmission.
 * Assumes that setting the RTS bit in the MCR register means RTS is high.
 * (Some chips use inverse semantics.)  Further assumes that reception is
 * stoppable by disabling the UART_IER_RDI interrupt.  (Some chips set the
 * UART_LSR_DR bit even when UART_IER_RDI is disabled, foiling this approach.)
 */
void serial8250_em485_start_tx(struct uart_8250_port *up)
{
        unsigned char mcr = serial8250_in_MCR(up);

        if (!(up->port.rs485.flags & SER_RS485_RX_DURING_TX))
                serial8250_stop_rx(&up->port);

        if (up->port.rs485.flags & SER_RS485_RTS_ON_SEND)
                mcr |= UART_MCR_RTS;
        else
                mcr &= ~UART_MCR_RTS;
        serial8250_out_MCR(up, mcr);
}
EXPORT_SYMBOL_GPL(serial8250_em485_start_tx);

/* Returns false, if start_tx_timer was setup to defer TX start */
static bool start_tx_rs485(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        struct uart_8250_em485 *em485 = up->em485;

        /*
         * While serial8250_em485_handle_stop_tx() is a noop if
         * em485->active_timer != &em485->stop_tx_timer, it might happen that
         * the timer is still armed and triggers only after the current bunch of
         * chars is send and em485->active_timer == &em485->stop_tx_timer again.
         * So cancel the timer. There is still a theoretical race condition if
         * the timer is already running and only comes around to check for
         * em485->active_timer when &em485->stop_tx_timer is armed again.
         */
        if (em485->active_timer == &em485->stop_tx_timer)
                hrtimer_try_to_cancel(&em485->stop_tx_timer);

        em485->active_timer = NULL;

        if (em485->tx_stopped) {
                em485->tx_stopped = false;

                up->rs485_start_tx(up);

                if (up->port.rs485.delay_rts_before_send > 0) {
                        em485->active_timer = &em485->start_tx_timer;
                        start_hrtimer_ms(&em485->start_tx_timer,
                                         up->port.rs485.delay_rts_before_send);
                        return false;
                }
        }

        return true;
}

static enum hrtimer_restart serial8250_em485_handle_start_tx(struct hrtimer *t)
{
        struct uart_8250_em485 *em485 = container_of(t, struct uart_8250_em485,
                        start_tx_timer);
        struct uart_8250_port *p = em485->port;
        unsigned long flags;

        uart_port_lock_irqsave(&p->port, &flags);
        if (em485->active_timer == &em485->start_tx_timer) {
                __start_tx(&p->port);
                em485->active_timer = NULL;
        }
        uart_port_unlock_irqrestore(&p->port, flags);

        return HRTIMER_NORESTART;
}

static void serial8250_start_tx(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        struct uart_8250_em485 *em485 = up->em485;

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&port->lock);

        if (!port->x_char && uart_circ_empty(&port->state->xmit))
                return;

        serial8250_rpm_get_tx(up);

        if (em485) {
                if ((em485->active_timer == &em485->start_tx_timer) ||
                    !start_tx_rs485(port))
                        return;
        }
        __start_tx(port);
}

static void serial8250_throttle(struct uart_port *port)
{
        port->throttle(port);
}

static void serial8250_unthrottle(struct uart_port *port)
{
        port->unthrottle(port);
}

static void serial8250_disable_ms(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&port->lock);

        /* no MSR capabilities */
        if (up->bugs & UART_BUG_NOMSR)
                return;

        mctrl_gpio_disable_ms(up->gpios);

        up->ier &= ~UART_IER_MSI;
        serial_port_out(port, UART_IER, up->ier);
}

static void serial8250_enable_ms(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&port->lock);

        /* no MSR capabilities */
        if (up->bugs & UART_BUG_NOMSR)
                return;

        mctrl_gpio_enable_ms(up->gpios);

        up->ier |= UART_IER_MSI;

        serial8250_rpm_get(up);
        serial_port_out(port, UART_IER, up->ier);
        serial8250_rpm_put(up);
}

void serial8250_read_char(struct uart_8250_port *up, u16 lsr)
{
        struct uart_port *port = &up->port;
        u8 ch, flag = TTY_NORMAL;

        if (likely(lsr & UART_LSR_DR))
                ch = serial_in(up, UART_RX);
        else
                /*
                 * Intel 82571 has a Serial Over Lan device that will
                 * set UART_LSR_BI without setting UART_LSR_DR when
                 * it receives a break. To avoid reading from the
                 * receive buffer without UART_LSR_DR bit set, we
                 * just force the read character to be 0
                 */
                ch = 0;

        port->icount.rx++;

        lsr |= up->lsr_saved_flags;
        up->lsr_saved_flags = 0;

        if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) {
                if (lsr & UART_LSR_BI) {
                        lsr &= ~(UART_LSR_FE | UART_LSR_PE);
                        port->icount.brk++;
                        /*
                         * We do the SysRQ and SAK checking
                         * here because otherwise the break
                         * may get masked by ignore_status_mask
                         * or read_status_mask.
                         */
                        if (uart_handle_break(port))
                                return;
                } else if (lsr & UART_LSR_PE)
                        port->icount.parity++;
                else if (lsr & UART_LSR_FE)
                        port->icount.frame++;
                if (lsr & UART_LSR_OE)
                        port->icount.overrun++;

                /*
                 * Mask off conditions which should be ignored.
                 */
                lsr &= port->read_status_mask;

                if (lsr & UART_LSR_BI) {
                        dev_dbg(port->dev, "handling break\n");
                        flag = TTY_BREAK;
                } else if (lsr & UART_LSR_PE)
                        flag = TTY_PARITY;
                else if (lsr & UART_LSR_FE)
                        flag = TTY_FRAME;
        }
        if (uart_prepare_sysrq_char(port, ch))
                return;

        uart_insert_char(port, lsr, UART_LSR_OE, ch, flag);
}
EXPORT_SYMBOL_GPL(serial8250_read_char);

/*
 * serial8250_rx_chars - Read characters. The first LSR value must be passed in.
 *
 * Returns LSR bits. The caller should rely only on non-Rx related LSR bits
 * (such as THRE) because the LSR value might come from an already consumed
 * character.
 */
u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr)
{
        struct uart_port *port = &up->port;
        int max_count = 256;

        do {
                serial8250_read_char(up, lsr);
                if (--max_count == 0)
                        break;
                lsr = serial_in(up, UART_LSR);
        } while (lsr & (UART_LSR_DR | UART_LSR_BI));

        tty_flip_buffer_push(&port->state->port);
        return lsr;
}
EXPORT_SYMBOL_GPL(serial8250_rx_chars);

void serial8250_tx_chars(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;
        struct circ_buf *xmit = &port->state->xmit;
        int count;

        if (port->x_char) {
                uart_xchar_out(port, UART_TX);
                return;
        }
        if (uart_tx_stopped(port)) {
                serial8250_stop_tx(port);
                return;
        }
        if (uart_circ_empty(xmit)) {
                __stop_tx(up);
                return;
        }

        count = up->tx_loadsz;
        do {
                serial_out(up, UART_TX, xmit->buf[xmit->tail]);
                if (up->bugs & UART_BUG_TXRACE) {
                        /*
                         * The Aspeed BMC virtual UARTs have a bug where data
                         * may get stuck in the BMC's Tx FIFO from bursts of
                         * writes on the APB interface.
                         *
                         * Delay back-to-back writes by a read cycle to avoid
                         * stalling the VUART. Read a register that won't have
                         * side-effects and discard the result.
                         */
                        serial_in(up, UART_SCR);
                }
                uart_xmit_advance(port, 1);
                if (uart_circ_empty(xmit))
                        break;
                if ((up->capabilities & UART_CAP_HFIFO) &&
                    !uart_lsr_tx_empty(serial_in(up, UART_LSR)))
                        break;
                /* The BCM2835 MINI UART THRE bit is really a not-full bit. */
                if ((up->capabilities & UART_CAP_MINI) &&
                    !(serial_in(up, UART_LSR) & UART_LSR_THRE))
                        break;
        } while (--count > 0);

        if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
                uart_write_wakeup(port);

        /*
         * With RPM enabled, we have to wait until the FIFO is empty before the
         * HW can go idle. So we get here once again with empty FIFO and disable
         * the interrupt and RPM in __stop_tx()
         */
        if (uart_circ_empty(xmit) && !(up->capabilities & UART_CAP_RPM))
                __stop_tx(up);
}
EXPORT_SYMBOL_GPL(serial8250_tx_chars);

/* Caller holds uart port lock */
unsigned int serial8250_modem_status(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;
        unsigned int status = serial_in(up, UART_MSR);

        status |= up->msr_saved_flags;
        up->msr_saved_flags = 0;
        if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
            port->state != NULL) {
                if (status & UART_MSR_TERI)
                        port->icount.rng++;
                if (status & UART_MSR_DDSR)
                        port->icount.dsr++;
                if (status & UART_MSR_DDCD)
                        uart_handle_dcd_change(port, status & UART_MSR_DCD);
                if (status & UART_MSR_DCTS)
                        uart_handle_cts_change(port, status & UART_MSR_CTS);

                wake_up_interruptible(&port->state->port.delta_msr_wait);
        }

        return status;
}
EXPORT_SYMBOL_GPL(serial8250_modem_status);

static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir)
{
        switch (iir & 0x3f) {
        case UART_IIR_THRI:
                /*
                 * Postpone DMA or not decision to IIR_RDI or IIR_RX_TIMEOUT
                 * because it's impossible to do an informed decision about
                 * that with IIR_THRI.
                 *
                 * This also fixes one known DMA Rx corruption issue where
                 * DR is asserted but DMA Rx only gets a corrupted zero byte
                 * (too early DR?).
                 */
                return false;
        case UART_IIR_RDI:
                if (!up->dma->rx_running)
                        break;
                fallthrough;
        case UART_IIR_RLSI:
        case UART_IIR_RX_TIMEOUT:
                serial8250_rx_dma_flush(up);
                return true;
        }
        return up->dma->rx_dma(up);
}

/*
 * This handles the interrupt from one port.
 */
int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        struct tty_port *tport = &port->state->port;
        bool skip_rx = false;
        unsigned long flags;
        u16 status;

        if (iir & UART_IIR_NO_INT)
                return 0;

        uart_port_lock_irqsave(port, &flags);

        status = serial_lsr_in(up);

        /*
         * If port is stopped and there are no error conditions in the
         * FIFO, then don't drain the FIFO, as this may lead to TTY buffer
         * overflow. Not servicing, RX FIFO would trigger auto HW flow
         * control when FIFO occupancy reaches preset threshold, thus
         * halting RX. This only works when auto HW flow control is
         * available.
         */
        if (!(status & (UART_LSR_FIFOE | UART_LSR_BRK_ERROR_BITS)) &&
            (port->status & (UPSTAT_AUTOCTS | UPSTAT_AUTORTS)) &&
            !(port->read_status_mask & UART_LSR_DR))
                skip_rx = true;

        if (status & (UART_LSR_DR | UART_LSR_BI) && !skip_rx) {
                struct irq_data *d;

                d = irq_get_irq_data(port->irq);
                if (d && irqd_is_wakeup_set(d))
                        pm_wakeup_event(tport->tty->dev, 0);
                if (!up->dma || handle_rx_dma(up, iir))
                        status = serial8250_rx_chars(up, status);
        }
        serial8250_modem_status(up);
        if ((status & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) {
                if (!up->dma || up->dma->tx_err)
                        serial8250_tx_chars(up);
                else if (!up->dma->tx_running)
                        __stop_tx(up);
        }

        uart_unlock_and_check_sysrq_irqrestore(port, flags);

        return 1;
}
EXPORT_SYMBOL_GPL(serial8250_handle_irq);

static int serial8250_default_handle_irq(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned int iir;
        int ret;

        serial8250_rpm_get(up);

        iir = serial_port_in(port, UART_IIR);
        ret = serial8250_handle_irq(port, iir);

        serial8250_rpm_put(up);
        return ret;
}

/*
 * Newer 16550 compatible parts such as the SC16C650 & Altera 16550 Soft IP
 * have a programmable TX threshold that triggers the THRE interrupt in
 * the IIR register. In this case, the THRE interrupt indicates the FIFO
 * has space available. Load it up with tx_loadsz bytes.
 */
static int serial8250_tx_threshold_handle_irq(struct uart_port *port)
{
        unsigned long flags;
        unsigned int iir = serial_port_in(port, UART_IIR);

        /* TX Threshold IRQ triggered so load up FIFO */
        if ((iir & UART_IIR_ID) == UART_IIR_THRI) {
                struct uart_8250_port *up = up_to_u8250p(port);

                uart_port_lock_irqsave(port, &flags);
                serial8250_tx_chars(up);
                uart_port_unlock_irqrestore(port, flags);
        }

        iir = serial_port_in(port, UART_IIR);
        return serial8250_handle_irq(port, iir);
}

static unsigned int serial8250_tx_empty(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned int result = 0;
        unsigned long flags;

        serial8250_rpm_get(up);

        uart_port_lock_irqsave(port, &flags);
        if (!serial8250_tx_dma_running(up) && uart_lsr_tx_empty(serial_lsr_in(up)))
                result = TIOCSER_TEMT;
        uart_port_unlock_irqrestore(port, flags);

        serial8250_rpm_put(up);

        return result;
}

unsigned int serial8250_do_get_mctrl(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned int status;
        unsigned int val;

        serial8250_rpm_get(up);
        status = serial8250_modem_status(up);
        serial8250_rpm_put(up);

        val = serial8250_MSR_to_TIOCM(status);
        if (up->gpios)
                return mctrl_gpio_get(up->gpios, &val);

        return val;
}
EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl);

static unsigned int serial8250_get_mctrl(struct uart_port *port)
{
        if (port->get_mctrl)
                return port->get_mctrl(port);
        return serial8250_do_get_mctrl(port);
}

void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned char mcr;

        mcr = serial8250_TIOCM_to_MCR(mctrl);

        mcr |= up->mcr;

        serial8250_out_MCR(up, mcr);
}
EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl);

static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
        if (port->rs485.flags & SER_RS485_ENABLED)
                return;

        if (port->set_mctrl)
                port->set_mctrl(port, mctrl);
        else
                serial8250_do_set_mctrl(port, mctrl);
}

static void serial8250_break_ctl(struct uart_port *port, int break_state)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned long flags;

        serial8250_rpm_get(up);
        uart_port_lock_irqsave(port, &flags);
        if (break_state == -1)
                up->lcr |= UART_LCR_SBC;
        else
                up->lcr &= ~UART_LCR_SBC;
        serial_port_out(port, UART_LCR, up->lcr);
        uart_port_unlock_irqrestore(port, flags);
        serial8250_rpm_put(up);
}

static void wait_for_lsr(struct uart_8250_port *up, int bits)
{
        unsigned int status, tmout = 10000;

        /* Wait up to 10ms for the character(s) to be sent. */
        for (;;) {
                status = serial_lsr_in(up);

                if ((status & bits) == bits)
                        break;
                if (--tmout == 0)
                        break;
                udelay(1);
                touch_nmi_watchdog();
        }
}

/*
 *        Wait for transmitter & holding register to empty
 */
static void wait_for_xmitr(struct uart_8250_port *up, int bits)
{
        unsigned int tmout;

        wait_for_lsr(up, bits);

        /* Wait up to 1s for flow control if necessary */
        if (up->port.flags & UPF_CONS_FLOW) {
                for (tmout = 1000000; tmout; tmout--) {
                        unsigned int msr = serial_in(up, UART_MSR);
                        up->msr_saved_flags |= msr & MSR_SAVE_FLAGS;
                        if (msr & UART_MSR_CTS)
                                break;
                        udelay(1);
                        touch_nmi_watchdog();
                }
        }
}

#ifdef CONFIG_CONSOLE_POLL
/*
 * Console polling routines for writing and reading from the uart while
 * in an interrupt or debug context.
 */

static int serial8250_get_poll_char(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        int status;
        u16 lsr;

        serial8250_rpm_get(up);

        lsr = serial_port_in(port, UART_LSR);

        if (!(lsr & UART_LSR_DR)) {
                status = NO_POLL_CHAR;
                goto out;
        }

        status = serial_port_in(port, UART_RX);
out:
        serial8250_rpm_put(up);
        return status;
}


static void serial8250_put_poll_char(struct uart_port *port,
                         unsigned char c)
{
        unsigned int ier;
        struct uart_8250_port *up = up_to_u8250p(port);

        /*
         * Normally the port is locked to synchronize UART_IER access
         * against the console. However, this function is only used by
         * KDB/KGDB, where it may not be possible to acquire the port
         * lock because all other CPUs are quiesced. The quiescence
         * should allow safe lockless usage here.
         */

        serial8250_rpm_get(up);
        /*
         *        First save the IER then disable the interrupts
         */
        ier = serial_port_in(port, UART_IER);
        serial8250_clear_IER(up);

        wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
        /*
         *        Send the character out.
         */
        serial_port_out(port, UART_TX, c);

        /*
         *        Finally, wait for transmitter to become empty
         *        and restore the IER
         */
        wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
        serial_port_out(port, UART_IER, ier);
        serial8250_rpm_put(up);
}

#endif /* CONFIG_CONSOLE_POLL */

int serial8250_do_startup(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned long flags;
        unsigned char iir;
        int retval;
        u16 lsr;

        if (!port->fifosize)
                port->fifosize = uart_config[port->type].fifo_size;
        if (!up->tx_loadsz)
                up->tx_loadsz = uart_config[port->type].tx_loadsz;
        if (!up->capabilities)
                up->capabilities = uart_config[port->type].flags;
        up->mcr = 0;

        if (port->iotype != up->cur_iotype)
                set_io_from_upio(port);

        serial8250_rpm_get(up);
        if (port->type == PORT_16C950) {
                /*
                 * Wake up and initialize UART
                 *
                 * Synchronize UART_IER access against the console.
                 */
                uart_port_lock_irqsave(port, &flags);
                up->acr = 0;
                serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
                serial_port_out(port, UART_EFR, UART_EFR_ECB);
                serial_port_out(port, UART_IER, 0);
                serial_port_out(port, UART_LCR, 0);
                serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
                serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
                serial_port_out(port, UART_EFR, UART_EFR_ECB);
                serial_port_out(port, UART_LCR, 0);
                uart_port_unlock_irqrestore(port, flags);
        }

        if (port->type == PORT_DA830) {
                /*
                 * Reset the port
                 *
                 * Synchronize UART_IER access against the console.
                 */
                uart_port_lock_irqsave(port, &flags);
                serial_port_out(port, UART_IER, 0);
                serial_port_out(port, UART_DA830_PWREMU_MGMT, 0);
                uart_port_unlock_irqrestore(port, flags);
                mdelay(10);

                /* Enable Tx, Rx and free run mode */
                serial_port_out(port, UART_DA830_PWREMU_MGMT,
                                UART_DA830_PWREMU_MGMT_UTRST |
                                UART_DA830_PWREMU_MGMT_URRST |
                                UART_DA830_PWREMU_MGMT_FREE);
        }

#ifdef CONFIG_SERIAL_8250_RSA
        /*
         * If this is an RSA port, see if we can kick it up to the
         * higher speed clock.
         */
        enable_rsa(up);
#endif

        /*
         * Clear the FIFO buffers and disable them.
         * (they will be reenabled in set_termios())
         */
        serial8250_clear_fifos(up);

        /*
         * Clear the interrupt registers.
         */
        serial_port_in(port, UART_LSR);
        serial_port_in(port, UART_RX);
        serial_port_in(port, UART_IIR);
        serial_port_in(port, UART_MSR);

        /*
         * At this point, there's no way the LSR could still be 0xff;
         * if it is, then bail out, because there's likely no UART
         * here.
         */
        if (!(port->flags & UPF_BUGGY_UART) &&
            (serial_port_in(port, UART_LSR) == 0xff)) {
                dev_info_ratelimited(port->dev, "LSR safety check engaged!\n");
                retval = -ENODEV;
                goto out;
        }

        /*
         * For a XR16C850, we need to set the trigger levels
         */
        if (port->type == PORT_16850) {
                unsigned char fctr;

                serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);

                fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX);
                serial_port_out(port, UART_FCTR,
                                fctr | UART_FCTR_TRGD | UART_FCTR_RX);
                serial_port_out(port, UART_TRG, UART_TRG_96);
                serial_port_out(port, UART_FCTR,
                                fctr | UART_FCTR_TRGD | UART_FCTR_TX);
                serial_port_out(port, UART_TRG, UART_TRG_96);

                serial_port_out(port, UART_LCR, 0);
        }

        /*
         * For the Altera 16550 variants, set TX threshold trigger level.
         */
        if (((port->type == PORT_ALTR_16550_F32) ||
             (port->type == PORT_ALTR_16550_F64) ||
             (port->type == PORT_ALTR_16550_F128)) && (port->fifosize > 1)) {
                /* Bounds checking of TX threshold (valid 0 to fifosize-2) */
                if ((up->tx_loadsz < 2) || (up->tx_loadsz > port->fifosize)) {
                        dev_err(port->dev, "TX FIFO Threshold errors, skipping\n");
                } else {
                        serial_port_out(port, UART_ALTR_AFR,
                                        UART_ALTR_EN_TXFIFO_LW);
                        serial_port_out(port, UART_ALTR_TX_LOW,
                                        port->fifosize - up->tx_loadsz);
                        port->handle_irq = serial8250_tx_threshold_handle_irq;
                }
        }

        /* Check if we need to have shared IRQs */
        if (port->irq && (up->port.flags & UPF_SHARE_IRQ))
                up->port.irqflags |= IRQF_SHARED;

        retval = up->ops->setup_irq(up);
        if (retval)
                goto out;

        if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
                unsigned char iir1;

                if (port->irqflags & IRQF_SHARED)
                        disable_irq_nosync(port->irq);

                /*
                 * Test for UARTs that do not reassert THRE when the
                 * transmitter is idle and the interrupt has already
                 * been cleared.  Real 16550s should always reassert
                 * this interrupt whenever the transmitter is idle and
                 * the interrupt is enabled.  Delays are necessary to
                 * allow register changes to become visible.
                 *
                 * Synchronize UART_IER access against the console.
                 */
                uart_port_lock_irqsave(port, &flags);

                wait_for_xmitr(up, UART_LSR_THRE);
                serial_port_out_sync(port, UART_IER, UART_IER_THRI);
                udelay(1); /* allow THRE to set */
                iir1 = serial_port_in(port, UART_IIR);
                serial_port_out(port, UART_IER, 0);
                serial_port_out_sync(port, UART_IER, UART_IER_THRI);
                udelay(1); /* allow a working UART time to re-assert THRE */
                iir = serial_port_in(port, UART_IIR);
                serial_port_out(port, UART_IER, 0);

                uart_port_unlock_irqrestore(port, flags);

                if (port->irqflags & IRQF_SHARED)
                        enable_irq(port->irq);

                /*
                 * If the interrupt is not reasserted, or we otherwise
                 * don't trust the iir, setup a timer to kick the UART
                 * on a regular basis.
                 */
                if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) ||
                    up->port.flags & UPF_BUG_THRE) {
                        up->bugs |= UART_BUG_THRE;
                }
        }

        up->ops->setup_timer(up);

        /*
         * Now, initialize the UART
         */
        serial_port_out(port, UART_LCR, UART_LCR_WLEN8);

        uart_port_lock_irqsave(port, &flags);
        if (up->port.flags & UPF_FOURPORT) {
                if (!up->port.irq)
                        up->port.mctrl |= TIOCM_OUT1;
        } else
                /*
                 * Most PC uarts need OUT2 raised to enable interrupts.
                 */
                if (port->irq)
                        up->port.mctrl |= TIOCM_OUT2;

        serial8250_set_mctrl(port, port->mctrl);

        /*
         * Serial over Lan (SoL) hack:
         * Intel 8257x Gigabit ethernet chips have a 16550 emulation, to be
         * used for Serial Over Lan.  Those chips take a longer time than a
         * normal serial device to signalize that a transmission data was
         * queued. Due to that, the above test generally fails. One solution
         * would be to delay the reading of iir. However, this is not
         * reliable, since the timeout is variable. So, let's just don't
         * test if we receive TX irq.  This way, we'll never enable
         * UART_BUG_TXEN.
         */
        if (up->port.quirks & UPQ_NO_TXEN_TEST)
                goto dont_test_tx_en;

        /*
         * Do a quick test to see if we receive an interrupt when we enable
         * the TX irq.
         */
        serial_port_out(port, UART_IER, UART_IER_THRI);
        lsr = serial_port_in(port, UART_LSR);
        iir = serial_port_in(port, UART_IIR);
        serial_port_out(port, UART_IER, 0);

        if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
                if (!(up->bugs & UART_BUG_TXEN)) {
                        up->bugs |= UART_BUG_TXEN;
                        dev_dbg(port->dev, "enabling bad tx status workarounds\n");
                }
        } else {
                up->bugs &= ~UART_BUG_TXEN;
        }

dont_test_tx_en:
        uart_port_unlock_irqrestore(port, flags);

        /*
         * Clear the interrupt registers again for luck, and clear the
         * saved flags to avoid getting false values from polling
         * routines or the previous session.
         */
        serial_port_in(port, UART_LSR);
        serial_port_in(port, UART_RX);
        serial_port_in(port, UART_IIR);
        serial_port_in(port, UART_MSR);
        up->lsr_saved_flags = 0;
        up->msr_saved_flags = 0;

        /*
         * Request DMA channels for both RX and TX.
         */
        if (up->dma) {
                const char *msg = NULL;

                if (uart_console(port))
                        msg = "forbid DMA for kernel console";
                else if (serial8250_request_dma(up))
                        msg = "failed to request DMA";
                if (msg) {
                        dev_warn_ratelimited(port->dev, "%s\n", msg);
                        up->dma = NULL;
                }
        }

        /*
         * Set the IER shadow for rx interrupts but defer actual interrupt
         * enable until after the FIFOs are enabled; otherwise, an already-
         * active sender can swamp the interrupt handler with "too much work".
         */
        up->ier = UART_IER_RLSI | UART_IER_RDI;

        if (port->flags & UPF_FOURPORT) {
                unsigned int icp;
                /*
                 * Enable interrupts on the AST Fourport board
                 */
                icp = (port->iobase & 0xfe0) | 0x01f;
                outb_p(0x80, icp);
                inb_p(icp);
        }
        retval = 0;
out:
        serial8250_rpm_put(up);
        return retval;
}
EXPORT_SYMBOL_GPL(serial8250_do_startup);

static int serial8250_startup(struct uart_port *port)
{
        if (port->startup)
                return port->startup(port);
        return serial8250_do_startup(port);
}

void serial8250_do_shutdown(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned long flags;

        serial8250_rpm_get(up);
        /*
         * Disable interrupts from this port
         *
         * Synchronize UART_IER access against the console.
         */
        uart_port_lock_irqsave(port, &flags);
        up->ier = 0;
        serial_port_out(port, UART_IER, 0);
        uart_port_unlock_irqrestore(port, flags);

        synchronize_irq(port->irq);

        if (up->dma)
                serial8250_release_dma(up);

        uart_port_lock_irqsave(port, &flags);
        if (port->flags & UPF_FOURPORT) {
                /* reset interrupts on the AST Fourport board */
                inb((port->iobase & 0xfe0) | 0x1f);
                port->mctrl |= TIOCM_OUT1;
        } else
                port->mctrl &= ~TIOCM_OUT2;

        serial8250_set_mctrl(port, port->mctrl);
        uart_port_unlock_irqrestore(port, flags);

        /*
         * Disable break condition and FIFOs
         */
        serial_port_out(port, UART_LCR,
                        serial_port_in(port, UART_LCR) & ~UART_LCR_SBC);
        serial8250_clear_fifos(up);

#ifdef CONFIG_SERIAL_8250_RSA
        /*
         * Reset the RSA board back to 115kbps compat mode.
         */
        disable_rsa(up);
#endif

        /*
         * Read data port to reset things, and then unlink from
         * the IRQ chain.
         */
        serial_port_in(port, UART_RX);
        serial8250_rpm_put(up);

        up->ops->release_irq(up);
}
EXPORT_SYMBOL_GPL(serial8250_do_shutdown);

static void serial8250_shutdown(struct uart_port *port)
{
        if (port->shutdown)
                port->shutdown(port);
        else
                serial8250_do_shutdown(port);
}

static unsigned int serial8250_do_get_divisor(struct uart_port *port,
                                              unsigned int baud,
                                              unsigned int *frac)
{
        upf_t magic_multiplier = port->flags & UPF_MAGIC_MULTIPLIER;
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned int quot;

        /*
         * Handle magic divisors for baud rates above baud_base on SMSC
         * Super I/O chips.  We clamp custom rates from clk/6 and clk/12
         * up to clk/4 (0x8001) and clk/8 (0x8002) respectively.  These
         * magic divisors actually reprogram the baud rate generator's
         * reference clock derived from chips's 14.318MHz clock input.
         *
         * Documentation claims that with these magic divisors the base
         * frequencies of 7.3728MHz and 3.6864MHz are used respectively
         * for the extra baud rates of 460800bps and 230400bps rather
         * than the usual base frequency of 1.8462MHz.  However empirical
         * evidence contradicts that.
         *
         * Instead bit 7 of the DLM register (bit 15 of the divisor) is
         * effectively used as a clock prescaler selection bit for the
         * base frequency of 7.3728MHz, always used.  If set to 0, then
         * the base frequency is divided by 4 for use by the Baud Rate
         * Generator, for the usual arrangement where the value of 1 of
         * the divisor produces the baud rate of 115200bps.  Conversely,
         * if set to 1 and high-speed operation has been enabled with the
         * Serial Port Mode Register in the Device Configuration Space,
         * then the base frequency is supplied directly to the Baud Rate
         * Generator, so for the divisor values of 0x8001, 0x8002, 0x8003,
         * 0x8004, etc. the respective baud rates produced are 460800bps,
         * 230400bps, 153600bps, 115200bps, etc.
         *
         * In all cases only low 15 bits of the divisor are used to divide
         * the baud base and therefore 32767 is the maximum divisor value
         * possible, even though documentation says that the programmable
         * Baud Rate Generator is capable of dividing the internal PLL
         * clock by any divisor from 1 to 65535.
         */
        if (magic_multiplier && baud >= port->uartclk / 6)
                quot = 0x8001;
        else if (magic_multiplier && baud >= port->uartclk / 12)
                quot = 0x8002;
        else
                quot = uart_get_divisor(port, baud);

        /*
         * Oxford Semi 952 rev B workaround
         */
        if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0)
                quot++;

        return quot;
}

static unsigned int serial8250_get_divisor(struct uart_port *port,
                                           unsigned int baud,
                                           unsigned int *frac)
{
        if (port->get_divisor)
                return port->get_divisor(port, baud, frac);

        return serial8250_do_get_divisor(port, baud, frac);
}

static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
                                            tcflag_t c_cflag)
{
        unsigned char cval;

        cval = UART_LCR_WLEN(tty_get_char_size(c_cflag));

        if (c_cflag & CSTOPB)
                cval |= UART_LCR_STOP;
        if (c_cflag & PARENB)
                cval |= UART_LCR_PARITY;
        if (!(c_cflag & PARODD))
                cval |= UART_LCR_EPAR;
        if (c_cflag & CMSPAR)
                cval |= UART_LCR_SPAR;

        return cval;
}

void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
                               unsigned int quot, unsigned int quot_frac)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        /* Workaround to enable 115200 baud on OMAP1510 internal ports */
        if (is_omap1510_8250(up)) {
                if (baud == 115200) {
                        quot = 1;
                        serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1);
                } else
                        serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0);
        }

        /*
         * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2,
         * otherwise just set DLAB
         */
        if (up->capabilities & UART_NATSEMI)
                serial_port_out(port, UART_LCR, 0xe0);
        else
                serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB);

        serial_dl_write(up, quot);
}
EXPORT_SYMBOL_GPL(serial8250_do_set_divisor);

static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
                                   unsigned int quot, unsigned int quot_frac)
{
        if (port->set_divisor)
                port->set_divisor(port, baud, quot, quot_frac);
        else
                serial8250_do_set_divisor(port, baud, quot, quot_frac);
}

static unsigned int serial8250_get_baud_rate(struct uart_port *port,
                                             struct ktermios *termios,
                                             const struct ktermios *old)
{
        unsigned int tolerance = port->uartclk / 100;
        unsigned int min;
        unsigned int max;

        /*
         * Handle magic divisors for baud rates above baud_base on SMSC
         * Super I/O chips.  Enable custom rates of clk/4 and clk/8, but
         * disable divisor values beyond 32767, which are unavailable.
         */
        if (port->flags & UPF_MAGIC_MULTIPLIER) {
                min = port->uartclk / 16 / UART_DIV_MAX >> 1;
                max = (port->uartclk + tolerance) / 4;
        } else {
                min = port->uartclk / 16 / UART_DIV_MAX;
                max = (port->uartclk + tolerance) / 16;
        }

        /*
         * Ask the core to calculate the divisor for us.
         * Allow 1% tolerance at the upper limit so uart clks marginally
         * slower than nominal still match standard baud rates without
         * causing transmission errors.
         */
        return uart_get_baud_rate(port, termios, old, min, max);
}

/*
 * Note in order to avoid the tty port mutex deadlock don't use the next method
 * within the uart port callbacks. Primarily it's supposed to be utilized to
 * handle a sudden reference clock rate change.
 */
void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk)
{
        struct tty_port *tport = &port->state->port;
        struct tty_struct *tty;

        tty = tty_port_tty_get(tport);
        if (!tty) {
                mutex_lock(&tport->mutex);
                port->uartclk = uartclk;
                mutex_unlock(&tport->mutex);
                return;
        }

        down_write(&tty->termios_rwsem);
        mutex_lock(&tport->mutex);

        if (port->uartclk == uartclk)
                goto out_unlock;

        port->uartclk = uartclk;

        if (!tty_port_initialized(tport))
                goto out_unlock;

        serial8250_do_set_termios(port, &tty->termios, NULL);

out_unlock:
        mutex_unlock(&tport->mutex);
        up_write(&tty->termios_rwsem);
        tty_kref_put(tty);
}
EXPORT_SYMBOL_GPL(serial8250_update_uartclk);

void
serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
                          const struct ktermios *old)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned char cval;
        unsigned long flags;
        unsigned int baud, quot, frac = 0;

        if (up->capabilities & UART_CAP_MINI) {
                termios->c_cflag &= ~(CSTOPB | PARENB | PARODD | CMSPAR);
                if ((termios->c_cflag & CSIZE) == CS5 ||
                    (termios->c_cflag & CSIZE) == CS6)
                        termios->c_cflag = (termios->c_cflag & ~CSIZE) | CS7;
        }
        cval = serial8250_compute_lcr(up, termios->c_cflag);

        baud = serial8250_get_baud_rate(port, termios, old);
        quot = serial8250_get_divisor(port, baud, &frac);

        /*
         * Ok, we're now changing the port state.  Do it with
         * interrupts disabled.
         *
         * Synchronize UART_IER access against the console.
         */
        serial8250_rpm_get(up);
        uart_port_lock_irqsave(port, &flags);

        up->lcr = cval;                                        /* Save computed LCR */

        if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
                if (baud < 2400 && !up->dma) {
                        up->fcr &= ~UART_FCR_TRIGGER_MASK;
                        up->fcr |= UART_FCR_TRIGGER_1;
                }
        }

        /*
         * MCR-based auto flow control.  When AFE is enabled, RTS will be
         * deasserted when the receive FIFO contains more characters than
         * the trigger, or the MCR RTS bit is cleared.
         */
        if (up->capabilities & UART_CAP_AFE) {
                up->mcr &= ~UART_MCR_AFE;
                if (termios->c_cflag & CRTSCTS)
                        up->mcr |= UART_MCR_AFE;
        }

        /*
         * Update the per-port timeout.
         */
        uart_update_timeout(port, termios->c_cflag, baud);

        port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
        if (termios->c_iflag & INPCK)
                port->read_status_mask |= UART_LSR_FE | UART_LSR_PE;
        if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
                port->read_status_mask |= UART_LSR_BI;

        /*
         * Characters to ignore
         */
        port->ignore_status_mask = 0;
        if (termios->c_iflag & IGNPAR)
                port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
        if (termios->c_iflag & IGNBRK) {
                port->ignore_status_mask |= UART_LSR_BI;
                /*
                 * If we're ignoring parity and break indicators,
                 * ignore overruns too (for real raw support).
                 */
                if (termios->c_iflag & IGNPAR)
                        port->ignore_status_mask |= UART_LSR_OE;
        }

        /*
         * ignore all characters if CREAD is not set
         */
        if ((termios->c_cflag & CREAD) == 0)
                port->ignore_status_mask |= UART_LSR_DR;

        /*
         * CTS flow control flag and modem status interrupts
         */
        up->ier &= ~UART_IER_MSI;
        if (!(up->bugs & UART_BUG_NOMSR) &&
                        UART_ENABLE_MS(&up->port, termios->c_cflag))
                up->ier |= UART_IER_MSI;
        if (up->capabilities & UART_CAP_UUE)
                up->ier |= UART_IER_UUE;
        if (up->capabilities & UART_CAP_RTOIE)
                up->ier |= UART_IER_RTOIE;

        serial_port_out(port, UART_IER, up->ier);

        if (up->capabilities & UART_CAP_EFR) {
                unsigned char efr = 0;
                /*
                 * TI16C752/Startech hardware flow control.  FIXME:
                 * - TI16C752 requires control thresholds to be set.
                 * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled.
                 */
                if (termios->c_cflag & CRTSCTS)
                        efr |= UART_EFR_CTS;

                serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
                if (port->flags & UPF_EXAR_EFR)
                        serial_port_out(port, UART_XR_EFR, efr);
                else
                        serial_port_out(port, UART_EFR, efr);
        }

        serial8250_set_divisor(port, baud, quot, frac);

        /*
         * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR
         * is written without DLAB set, this mode will be disabled.
         */
        if (port->type == PORT_16750)
                serial_port_out(port, UART_FCR, up->fcr);

        serial_port_out(port, UART_LCR, up->lcr);        /* reset DLAB */
        if (port->type != PORT_16750) {
                /* emulated UARTs (Lucent Venus 167x) need two steps */
                if (up->fcr & UART_FCR_ENABLE_FIFO)
                        serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO);
                serial_port_out(port, UART_FCR, up->fcr);        /* set fcr */
        }
        serial8250_set_mctrl(port, port->mctrl);
        uart_port_unlock_irqrestore(port, flags);
        serial8250_rpm_put(up);

        /* Don't rewrite B0 */
        if (tty_termios_baud_rate(termios))
                tty_termios_encode_baud_rate(termios, baud, baud);
}
EXPORT_SYMBOL(serial8250_do_set_termios);

static void
serial8250_set_termios(struct uart_port *port, struct ktermios *termios,
                       const struct ktermios *old)
{
        if (port->set_termios)
                port->set_termios(port, termios, old);
        else
                serial8250_do_set_termios(port, termios, old);
}

void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios)
{
        if (termios->c_line == N_PPS) {
                port->flags |= UPF_HARDPPS_CD;
                uart_port_lock_irq(port);
                serial8250_enable_ms(port);
                uart_port_unlock_irq(port);
        } else {
                port->flags &= ~UPF_HARDPPS_CD;
                if (!UART_ENABLE_MS(port, termios->c_cflag)) {
                        uart_port_lock_irq(port);
                        serial8250_disable_ms(port);
                        uart_port_unlock_irq(port);
                }
        }
}
EXPORT_SYMBOL_GPL(serial8250_do_set_ldisc);

static void
serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios)
{
        if (port->set_ldisc)
                port->set_ldisc(port, termios);
        else
                serial8250_do_set_ldisc(port, termios);
}

void serial8250_do_pm(struct uart_port *port, unsigned int state,
                      unsigned int oldstate)
{
        struct uart_8250_port *p = up_to_u8250p(port);

        serial8250_set_sleep(p, state != 0);
}
EXPORT_SYMBOL(serial8250_do_pm);

static void
serial8250_pm(struct uart_port *port, unsigned int state,
              unsigned int oldstate)
{
        if (port->pm)
                port->pm(port, state, oldstate);
        else
                serial8250_do_pm(port, state, oldstate);
}

static unsigned int serial8250_port_size(struct uart_8250_port *pt)
{
        if (pt->port.mapsize)
                return pt->port.mapsize;
        if (is_omap1_8250(pt))
                return 0x16 << pt->port.regshift;

        return 8 << pt->port.regshift;
}

/*
 * Resource handling.
 */
static int serial8250_request_std_resource(struct uart_8250_port *up)
{
        unsigned int size = serial8250_port_size(up);
        struct uart_port *port = &up->port;
        int ret = 0;

        switch (port->iotype) {
        case UPIO_AU:
        case UPIO_TSI:
        case UPIO_MEM32:
        case UPIO_MEM32BE:
        case UPIO_MEM16:
        case UPIO_MEM:
                if (!port->mapbase) {
                        ret = -EINVAL;
                        break;
                }

                if (!request_mem_region(port->mapbase, size, "serial")) {
                        ret = -EBUSY;
                        break;
                }

                if (port->flags & UPF_IOREMAP) {
                        port->membase = ioremap(port->mapbase, size);
                        if (!port->membase) {
                                release_mem_region(port->mapbase, size);
                                ret = -ENOMEM;
                        }
                }
                break;

        case UPIO_HUB6:
        case UPIO_PORT:
                if (!request_region(port->iobase, size, "serial"))
                        ret = -EBUSY;
                break;
        }
        return ret;
}

static void serial8250_release_std_resource(struct uart_8250_port *up)
{
        unsigned int size = serial8250_port_size(up);
        struct uart_port *port = &up->port;

        switch (port->iotype) {
        case UPIO_AU:
        case UPIO_TSI:
        case UPIO_MEM32:
        case UPIO_MEM32BE:
        case UPIO_MEM16:
        case UPIO_MEM:
                if (!port->mapbase)
                        break;

                if (port->flags & UPF_IOREMAP) {
                        iounmap(port->membase);
                        port->membase = NULL;
                }

                release_mem_region(port->mapbase, size);
                break;

        case UPIO_HUB6:
        case UPIO_PORT:
                release_region(port->iobase, size);
                break;
        }
}

static void serial8250_release_port(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        serial8250_release_std_resource(up);
}

static int serial8250_request_port(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        return serial8250_request_std_resource(up);
}

static int fcr_get_rxtrig_bytes(struct uart_8250_port *up)
{
        const struct serial8250_config *conf_type = &uart_config[up->port.type];
        unsigned char bytes;

        bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)];

        return bytes ? bytes : -EOPNOTSUPP;
}

static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes)
{
        const struct serial8250_config *conf_type = &uart_config[up->port.type];
        int i;

        if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)])
                return -EOPNOTSUPP;

        for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) {
                if (bytes < conf_type->rxtrig_bytes[i])
                        /* Use the nearest lower value */
                        return (--i) << UART_FCR_R_TRIG_SHIFT;
        }

        return UART_FCR_R_TRIG_11;
}

static int do_get_rxtrig(struct tty_port *port)
{
        struct uart_state *state = container_of(port, struct uart_state, port);
        struct uart_port *uport = state->uart_port;
        struct uart_8250_port *up = up_to_u8250p(uport);

        if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
                return -EINVAL;

        return fcr_get_rxtrig_bytes(up);
}

static int do_serial8250_get_rxtrig(struct tty_port *port)
{
        int rxtrig_bytes;

        mutex_lock(&port->mutex);
        rxtrig_bytes = do_get_rxtrig(port);
        mutex_unlock(&port->mutex);

        return rxtrig_bytes;
}

static ssize_t rx_trig_bytes_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct tty_port *port = dev_get_drvdata(dev);
        int rxtrig_bytes;

        rxtrig_bytes = do_serial8250_get_rxtrig(port);
        if (rxtrig_bytes < 0)
                return rxtrig_bytes;

        return sysfs_emit(buf, "%d\n", rxtrig_bytes);
}

static int do_set_rxtrig(struct tty_port *port, unsigned char bytes)
{
        struct uart_state *state = container_of(port, struct uart_state, port);
        struct uart_port *uport = state->uart_port;
        struct uart_8250_port *up = up_to_u8250p(uport);
        int rxtrig;

        if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
                return -EINVAL;

        rxtrig = bytes_to_fcr_rxtrig(up, bytes);
        if (rxtrig < 0)
                return rxtrig;

        serial8250_clear_fifos(up);
        up->fcr &= ~UART_FCR_TRIGGER_MASK;
        up->fcr |= (unsigned char)rxtrig;
        serial_out(up, UART_FCR, up->fcr);
        return 0;
}

static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes)
{
        int ret;

        mutex_lock(&port->mutex);
        ret = do_set_rxtrig(port, bytes);
        mutex_unlock(&port->mutex);

        return ret;
}

static ssize_t rx_trig_bytes_store(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct tty_port *port = dev_get_drvdata(dev);
        unsigned char bytes;
        int ret;

        if (!count)
                return -EINVAL;

        ret = kstrtou8(buf, 10, &bytes);
        if (ret < 0)
                return ret;

        ret = do_serial8250_set_rxtrig(port, bytes);
        if (ret < 0)
                return ret;

        return count;
}

static DEVICE_ATTR_RW(rx_trig_bytes);

static struct attribute *serial8250_dev_attrs[] = {
        &dev_attr_rx_trig_bytes.attr,
        NULL
};

static struct attribute_group serial8250_dev_attr_group = {
        .attrs = serial8250_dev_attrs,
};

static void register_dev_spec_attr_grp(struct uart_8250_port *up)
{
        const struct serial8250_config *conf_type = &uart_config[up->port.type];

        if (conf_type->rxtrig_bytes[0])
                up->port.attr_group = &serial8250_dev_attr_group;
}

static void serial8250_config_port(struct uart_port *port, int flags)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        int ret;

        /*
         * Find the region that we can probe for.  This in turn
         * tells us whether we can probe for the type of port.
         */
        ret = serial8250_request_std_resource(up);
        if (ret < 0)
                return;

        if (port->iotype != up->cur_iotype)
                set_io_from_upio(port);

        if (flags & UART_CONFIG_TYPE)
                autoconfig(up);

        /* HW bugs may trigger IRQ while IIR == NO_INT */
        if (port->type == PORT_TEGRA)
                up->bugs |= UART_BUG_NOMSR;

        if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ)
                autoconfig_irq(up);

        if (port->type == PORT_UNKNOWN)
                serial8250_release_std_resource(up);

        register_dev_spec_attr_grp(up);
        up->fcr = uart_config[up->port.type].fcr;
}

static int
serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
{
        if (ser->irq >= nr_irqs || ser->irq < 0 ||
            ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
            ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
            ser->type == PORT_STARTECH)
                return -EINVAL;
        return 0;
}

static const char *serial8250_type(struct uart_port *port)
{
        int type = port->type;

        if (type >= ARRAY_SIZE(uart_config))
                type = 0;
        return uart_config[type].name;
}

static const struct uart_ops serial8250_pops = {
        .tx_empty        = serial8250_tx_empty,
        .set_mctrl        = serial8250_set_mctrl,
        .get_mctrl        = serial8250_get_mctrl,
        .stop_tx        = serial8250_stop_tx,
        .start_tx        = serial8250_start_tx,
        .throttle        = serial8250_throttle,
        .unthrottle        = serial8250_unthrottle,
        .stop_rx        = serial8250_stop_rx,
        .enable_ms        = serial8250_enable_ms,
        .break_ctl        = serial8250_break_ctl,
        .startup        = serial8250_startup,
        .shutdown        = serial8250_shutdown,
        .set_termios        = serial8250_set_termios,
        .set_ldisc        = serial8250_set_ldisc,
        .pm                = serial8250_pm,
        .type                = serial8250_type,
        .release_port        = serial8250_release_port,
        .request_port        = serial8250_request_port,
        .config_port        = serial8250_config_port,
        .verify_port        = serial8250_verify_port,
#ifdef CONFIG_CONSOLE_POLL
        .poll_get_char = serial8250_get_poll_char,
        .poll_put_char = serial8250_put_poll_char,
#endif
};

void serial8250_init_port(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;

        spin_lock_init(&port->lock);
        port->ctrl_id = 0;
        port->pm = NULL;
        port->ops = &serial8250_pops;
        port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);

        up->cur_iotype = 0xFF;
}
EXPORT_SYMBOL_GPL(serial8250_init_port);

void serial8250_set_defaults(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;

        if (up->port.flags & UPF_FIXED_TYPE) {
                unsigned int type = up->port.type;

                if (!up->port.fifosize)
                        up->port.fifosize = uart_config[type].fifo_size;
                if (!up->tx_loadsz)
                        up->tx_loadsz = uart_config[type].tx_loadsz;
                if (!up->capabilities)
                        up->capabilities = uart_config[type].flags;
        }

        set_io_from_upio(port);

        /* default dma handlers */
        if (up->dma) {
                if (!up->dma->tx_dma)
                        up->dma->tx_dma = serial8250_tx_dma;
                if (!up->dma->rx_dma)
                        up->dma->rx_dma = serial8250_rx_dma;
        }
}
EXPORT_SYMBOL_GPL(serial8250_set_defaults);

#ifdef CONFIG_SERIAL_8250_CONSOLE

static void serial8250_console_putchar(struct uart_port *port, unsigned char ch)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        wait_for_xmitr(up, UART_LSR_THRE);
        serial_port_out(port, UART_TX, ch);
}

/*
 *        Restore serial console when h/w power-off detected
 */
static void serial8250_console_restore(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;
        struct ktermios termios;
        unsigned int baud, quot, frac = 0;

        termios.c_cflag = port->cons->cflag;
        termios.c_ispeed = port->cons->ispeed;
        termios.c_ospeed = port->cons->ospeed;
        if (port->state->port.tty && termios.c_cflag == 0) {
                termios.c_cflag = port->state->port.tty->termios.c_cflag;
                termios.c_ispeed = port->state->port.tty->termios.c_ispeed;
                termios.c_ospeed = port->state->port.tty->termios.c_ospeed;
        }

        baud = serial8250_get_baud_rate(port, &termios, NULL);
        quot = serial8250_get_divisor(port, baud, &frac);

        serial8250_set_divisor(port, baud, quot, frac);
        serial_port_out(port, UART_LCR, up->lcr);
        serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
}

/*
 * Print a string to the serial port using the device FIFO
 *
 * It sends fifosize bytes and then waits for the fifo
 * to get empty.
 */
static void serial8250_console_fifo_write(struct uart_8250_port *up,
                                          const char *s, unsigned int count)
{
        int i;
        const char *end = s + count;
        unsigned int fifosize = up->tx_loadsz;
        bool cr_sent = false;

        while (s != end) {
                wait_for_lsr(up, UART_LSR_THRE);

                for (i = 0; i < fifosize && s != end; ++i) {
                        if (*s == '\n' && !cr_sent) {
                                serial_out(up, UART_TX, '\r');
                                cr_sent = true;
                        } else {
                                serial_out(up, UART_TX, *s++);
                                cr_sent = false;
                        }
                }
        }
}

/*
 *        Print a string to the serial port trying not to disturb
 *        any possible real use of the port...
 *
 *        The console_lock must be held when we get here.
 *
 *        Doing runtime PM is really a bad idea for the kernel console.
 *        Thus, we assume the function is called when device is powered up.
 */
void serial8250_console_write(struct uart_8250_port *up, const char *s,
                              unsigned int count)
{
        struct uart_8250_em485 *em485 = up->em485;
        struct uart_port *port = &up->port;
        unsigned long flags;
        unsigned int ier, use_fifo;
        int locked = 1;

        touch_nmi_watchdog();

        if (oops_in_progress)
                locked = uart_port_trylock_irqsave(port, &flags);
        else
                uart_port_lock_irqsave(port, &flags);

        /*
         *        First save the IER then disable the interrupts
         */
        ier = serial_port_in(port, UART_IER);
        serial8250_clear_IER(up);

        /* check scratch reg to see if port powered off during system sleep */
        if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
                serial8250_console_restore(up);
                up->canary = 0;
        }

        if (em485) {
                if (em485->tx_stopped)
                        up->rs485_start_tx(up);
                mdelay(port->rs485.delay_rts_before_send);
        }

        use_fifo = (up->capabilities & UART_CAP_FIFO) &&
                /*
                 * BCM283x requires to check the fifo
                 * after each byte.
                 */
                !(up->capabilities & UART_CAP_MINI) &&
                /*
                 * tx_loadsz contains the transmit fifo size
                 */
                up->tx_loadsz > 1 &&
                (up->fcr & UART_FCR_ENABLE_FIFO) &&
                port->state &&
                test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) &&
                /*
                 * After we put a data in the fifo, the controller will send
                 * it regardless of the CTS state. Therefore, only use fifo
                 * if we don't use control flow.
                 */
                !(up->port.flags & UPF_CONS_FLOW);

        if (likely(use_fifo))
                serial8250_console_fifo_write(up, s, count);
        else
                uart_console_write(port, s, count, serial8250_console_putchar);

        /*
         *        Finally, wait for transmitter to become empty
         *        and restore the IER
         */
        wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);

        if (em485) {
                mdelay(port->rs485.delay_rts_after_send);
                if (em485->tx_stopped)
                        up->rs485_stop_tx(up);
        }

        serial_port_out(port, UART_IER, ier);

        /*
         *        The receive handling will happen properly because the
         *        receive ready bit will still be set; it is not cleared
         *        on read.  However, modem control will not, we must
         *        call it if we have saved something in the saved flags
         *        while processing with interrupts off.
         */
        if (up->msr_saved_flags)
                serial8250_modem_status(up);

        if (locked)
                uart_port_unlock_irqrestore(port, flags);
}

static unsigned int probe_baud(struct uart_port *port)
{
        unsigned char lcr, dll, dlm;
        unsigned int quot;

        lcr = serial_port_in(port, UART_LCR);
        serial_port_out(port, UART_LCR, lcr | UART_LCR_DLAB);
        dll = serial_port_in(port, UART_DLL);
        dlm = serial_port_in(port, UART_DLM);
        serial_port_out(port, UART_LCR, lcr);

        quot = (dlm << 8) | dll;
        return (port->uartclk / 16) / quot;
}

int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
{
        int baud = 9600;
        int bits = 8;
        int parity = 'n';
        int flow = 'n';
        int ret;

        if (!port->iobase && !port->membase)
                return -ENODEV;

        if (options)
                uart_parse_options(options, &baud, &parity, &bits, &flow);
        else if (probe)
                baud = probe_baud(port);

        ret = uart_set_options(port, port->cons, baud, parity, bits, flow);
        if (ret)
                return ret;

        if (port->dev)
                pm_runtime_get_sync(port->dev);

        return 0;
}

int serial8250_console_exit(struct uart_port *port)
{
        if (port->dev)
                pm_runtime_put_sync(port->dev);

        return 0;
}

#endif /* CONFIG_SERIAL_8250_CONSOLE */

MODULE_LICENSE("GPL");





















































































































































































































































































































































































   55 

























































































































































































































































   48 

   55 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SWAP_H
#define _LINUX_SWAP_H

#include <linux/spinlock.h>
#include <linux/linkage.h>
#include <linux/mmzone.h>
#include <linux/list.h>
#include <linux/memcontrol.h>
#include <linux/sched.h>
#include <linux/node.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/atomic.h>
#include <linux/page-flags.h>
#include <uapi/linux/mempolicy.h>
#include <asm/page.h>

struct notifier_block;

struct bio;

struct pagevec;

#define SWAP_FLAG_PREFER        0x8000        /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK        0x7fff
#define SWAP_FLAG_PRIO_SHIFT        0
#define SWAP_FLAG_DISCARD        0x10000 /* enable discard for swap */
#define SWAP_FLAG_DISCARD_ONCE        0x20000 /* discard swap area at swapon-time */
#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */

#define SWAP_FLAGS_VALID        (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
                                 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
                                 SWAP_FLAG_DISCARD_PAGES)
#define SWAP_BATCH 64

static inline int current_is_kswapd(void)
{
        return current->flags & PF_KSWAPD;
}

/*
 * MAX_SWAPFILES defines the maximum number of swaptypes: things which can
 * be swapped to.  The swap type and the offset into that swap type are
 * encoded into pte's and into pgoff_t's in the swapcache.  Using five bits
 * for the type means that the maximum number of swapcache pages is 27 bits
 * on 32-bit-pgoff_t architectures.  And that assumes that the architecture packs
 * the type/offset into the pte as 5/27 as well.
 */
#define MAX_SWAPFILES_SHIFT        5

/*
 * Use some of the swap files numbers for other purposes. This
 * is a convenient way to hook into the VM to trigger special
 * actions on faults.
 */

/*
 * PTE markers are used to persist information onto PTEs that otherwise
 * should be a none pte.  As its name "PTE" hints, it should only be
 * applied to the leaves of pgtables.
 */
#define SWP_PTE_MARKER_NUM 1
#define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
                            SWP_MIGRATION_NUM + SWP_DEVICE_NUM)

/*
 * Unaddressable device memory support. See include/linux/hmm.h and
 * Documentation/mm/hmm.rst. Short description is we need struct pages for
 * device memory that is unaddressable (inaccessible) by CPU, so that we can
 * migrate part of a process memory to device memory.
 *
 * When a page is migrated from CPU to device, we set the CPU page table entry
 * to a special SWP_DEVICE_{READ|WRITE} entry.
 *
 * When a page is mapped by the device for exclusive access we set the CPU page
 * table entries to special SWP_DEVICE_EXCLUSIVE_* entries.
 */
#ifdef CONFIG_DEVICE_PRIVATE
#define SWP_DEVICE_NUM 4
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
#else
#define SWP_DEVICE_NUM 0
#endif

/*
 * Page migration support.
 *
 * SWP_MIGRATION_READ_EXCLUSIVE is only applicable to anonymous pages and
 * indicates that the referenced (part of) an anonymous page is exclusive to
 * a single process. For SWP_MIGRATION_WRITE, that information is implicit:
 * (part of) an anonymous page that are mapped writable are exclusive to a
 * single process.
 */
#ifdef CONFIG_MIGRATION
#define SWP_MIGRATION_NUM 3
#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
#define SWP_MIGRATION_READ_EXCLUSIVE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 2)
#else
#define SWP_MIGRATION_NUM 0
#endif

/*
 * Handling of hardware poisoned pages with memory corruption.
 */
#ifdef CONFIG_MEMORY_FAILURE
#define SWP_HWPOISON_NUM 1
#define SWP_HWPOISON                MAX_SWAPFILES
#else
#define SWP_HWPOISON_NUM 0
#endif

#define MAX_SWAPFILES \
        ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
        SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
        SWP_PTE_MARKER_NUM)

/*
 * Magic header for a swap area. The first part of the union is
 * what the swap magic looks like for the old (limited to 128MB)
 * swap area format, the second part of the union adds - in the
 * old reserved area - some extra information. Note that the first
 * kilobyte is reserved for boot loader or disk label stuff...
 *
 * Having the magic at the end of the PAGE_SIZE makes detecting swap
 * areas somewhat tricky on machines that support multiple page sizes.
 * For 2.5 we'll probably want to move the magic to just beyond the
 * bootbits...
 */
union swap_header {
        struct {
                char reserved[PAGE_SIZE - 10];
                char magic[10];                        /* SWAP-SPACE or SWAPSPACE2 */
        } magic;
        struct {
                char                bootbits[1024];        /* Space for disklabel etc. */
                __u32                version;
                __u32                last_page;
                __u32                nr_badpages;
                unsigned char        sws_uuid[16];
                unsigned char        sws_volume[16];
                __u32                padding[117];
                __u32                badpages[1];
        } info;
};

/*
 * current->reclaim_state points to one of these when a task is running
 * memory reclaim
 */
struct reclaim_state {
        /* pages reclaimed outside of LRU-based reclaim */
        unsigned long reclaimed;
#ifdef CONFIG_LRU_GEN
        /* per-thread mm walk data */
        struct lru_gen_mm_walk *mm_walk;
#endif
};

/*
 * mm_account_reclaimed_pages(): account reclaimed pages outside of LRU-based
 * reclaim
 * @pages: number of pages reclaimed
 *
 * If the current process is undergoing a reclaim operation, increment the
 * number of reclaimed pages by @pages.
 */
static inline void mm_account_reclaimed_pages(unsigned long pages)
{
        if (current->reclaim_state)
                current->reclaim_state->reclaimed += pages;
}

#ifdef __KERNEL__

struct address_space;
struct sysinfo;
struct writeback_control;
struct zone;

/*
 * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
 * disk blocks.  A rbtree of swap extents maps the entire swapfile (Where the
 * term `swapfile' refers to either a blockdevice or an IS_REG file). Apart
 * from setup, they're handled identically.
 *
 * We always assume that blocks are of size PAGE_SIZE.
 */
struct swap_extent {
        struct rb_node rb_node;
        pgoff_t start_page;
        pgoff_t nr_pages;
        sector_t start_block;
};

/*
 * Max bad pages in the new format..
 */
#define MAX_SWAP_BADPAGES \
        ((offsetof(union swap_header, magic.magic) - \
          offsetof(union swap_header, info.badpages)) / sizeof(int))

enum {
        SWP_USED        = (1 << 0),        /* is slot in swap_info[] used? */
        SWP_WRITEOK        = (1 << 1),        /* ok to write to this swap?        */
        SWP_DISCARDABLE = (1 << 2),        /* blkdev support discard */
        SWP_DISCARDING        = (1 << 3),        /* now discarding a free cluster */
        SWP_SOLIDSTATE        = (1 << 4),        /* blkdev seeks are cheap */
        SWP_CONTINUED        = (1 << 5),        /* swap_map has count continuation */
        SWP_BLKDEV        = (1 << 6),        /* its a block device */
        SWP_ACTIVATED        = (1 << 7),        /* set after swap_activate success */
        SWP_FS_OPS        = (1 << 8),        /* swapfile operations go through fs */
        SWP_AREA_DISCARD = (1 << 9),        /* single-time swap area discards */
        SWP_PAGE_DISCARD = (1 << 10),        /* freed swap page-cluster discards */
        SWP_STABLE_WRITES = (1 << 11),        /* no overwrite PG_writeback pages */
        SWP_SYNCHRONOUS_IO = (1 << 12),        /* synchronous IO is efficient */
                                        /* add others here before... */
        SWP_SCANNING        = (1 << 14),        /* refcount in scan_swap_map */
};

#define SWAP_CLUSTER_MAX 32UL
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX

/* Bit flag in swap_map */
#define SWAP_HAS_CACHE        0x40        /* Flag page is cached, in first swap_map */
#define COUNT_CONTINUED        0x80        /* Flag swap_map continuation for full count */

/* Special value in first swap_map */
#define SWAP_MAP_MAX        0x3e        /* Max count */
#define SWAP_MAP_BAD        0x3f        /* Note page is bad */
#define SWAP_MAP_SHMEM        0xbf        /* Owned by shmem/tmpfs */

/* Special value in each swap_map continuation */
#define SWAP_CONT_MAX        0x7f        /* Max count */

/*
 * We use this to track usage of a cluster. A cluster is a block of swap disk
 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
 * free clusters are organized into a list. We fetch an entry from the list to
 * get a free cluster.
 *
 * The data field stores next cluster if the cluster is free or cluster usage
 * counter otherwise. The flags field determines if a cluster is free. This is
 * protected by swap_info_struct.lock.
 */
struct swap_cluster_info {
        spinlock_t lock;        /*
                                 * Protect swap_cluster_info fields
                                 * and swap_info_struct->swap_map
                                 * elements correspond to the swap
                                 * cluster
                                 */
        unsigned int data:24;
        unsigned int flags:8;
};
#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
#define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */

/*
 * We assign a cluster to each CPU, so each CPU can allocate swap entry from
 * its own cluster and swapout sequentially. The purpose is to optimize swapout
 * throughput.
 */
struct percpu_cluster {
        struct swap_cluster_info index; /* Current cluster index */
        unsigned int next; /* Likely next allocation offset */
};

struct swap_cluster_list {
        struct swap_cluster_info head;
        struct swap_cluster_info tail;
};

/*
 * The in-memory structure used to track swap areas.
 */
struct swap_info_struct {
        struct percpu_ref users;        /* indicate and keep swap device valid. */
        unsigned long        flags;                /* SWP_USED etc: see above */
        signed short        prio;                /* swap priority of this type */
        struct plist_node list;                /* entry in swap_active_head */
        signed char        type;                /* strange name for an index */
        unsigned int        max;                /* extent of the swap_map */
        unsigned char *swap_map;        /* vmalloc'ed array of usage counts */
        struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
        struct swap_cluster_list free_clusters; /* free clusters list */
        unsigned int lowest_bit;        /* index of first free in swap_map */
        unsigned int highest_bit;        /* index of last free in swap_map */
        unsigned int pages;                /* total of usable pages of swap */
        unsigned int inuse_pages;        /* number of those currently in use */
        unsigned int cluster_next;        /* likely index for next allocation */
        unsigned int cluster_nr;        /* countdown to next cluster search */
        unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
        struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
        struct rb_root swap_extent_root;/* root of the swap extent rbtree */
        struct file *bdev_file;                /* open handle of the bdev */
        struct block_device *bdev;        /* swap device or bdev of swap file */
        struct file *swap_file;                /* seldom referenced */
        unsigned int old_block_size;        /* seldom referenced */
        struct completion comp;                /* seldom referenced */
        spinlock_t lock;                /*
                                         * protect map scan related fields like
                                         * swap_map, lowest_bit, highest_bit,
                                         * inuse_pages, cluster_next,
                                         * cluster_nr, lowest_alloc,
                                         * highest_alloc, free/discard cluster
                                         * list. other fields are only changed
                                         * at swapon/swapoff, so are protected
                                         * by swap_lock. changing flags need
                                         * hold this lock and swap_lock. If
                                         * both locks need hold, hold swap_lock
                                         * first.
                                         */
        spinlock_t cont_lock;                /*
                                         * protect swap count continuation page
                                         * list.
                                         */
        struct work_struct discard_work; /* discard worker */
        struct swap_cluster_list discard_clusters; /* discard clusters list */
        struct plist_node avail_lists[]; /*
                                           * entries in swap_avail_heads, one
                                           * entry per node.
                                           * Must be last as the number of the
                                           * array is nr_node_ids, which is not
                                           * a fixed value so have to allocate
                                           * dynamically.
                                           * And it has to be an array so that
                                           * plist_for_each_* can work.
                                           */
};

static inline swp_entry_t page_swap_entry(struct page *page)
{
        struct folio *folio = page_folio(page);
        swp_entry_t entry = folio->swap;

        entry.val += folio_page_idx(folio, page);
        return entry;
}

/* linux/mm/workingset.c */
bool workingset_test_recent(void *shadow, bool file, bool *workingset);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
void workingset_activation(struct folio *folio);

/* linux/mm/page_alloc.c */
extern unsigned long totalreserve_pages;

/* Definition of global_zone_page_state not available yet */
#define nr_free_pages() global_zone_page_state(NR_FREE_PAGES)


/* linux/mm/swap.c */
void lru_note_cost(struct lruvec *lruvec, bool file,
                   unsigned int nr_io, unsigned int nr_rotated);
void lru_note_cost_refault(struct folio *);
void folio_add_lru(struct folio *);
void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);

extern atomic_t lru_disable_count;

static inline bool lru_cache_disabled(void)
{
        return atomic_read(&lru_disable_count);
}

static inline void lru_cache_enable(void)
{
        atomic_dec(&lru_disable_count);
}

extern void lru_cache_disable(void);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
void folio_deactivate(struct folio *folio);
void folio_mark_lazyfree(struct folio *folio);
extern void swap_setup(void);

/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);

#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                                  unsigned long nr_pages,
                                                  gfp_t gfp_mask,
                                                  unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
                                                gfp_t gfp_mask, bool noswap,
                                                pg_data_t *pgdat,
                                                unsigned long *nr_scanned);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
long remove_mapping(struct address_space *mapping, struct folio *folio);

#ifdef CONFIG_NUMA
extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
#else
#define node_reclaim_mode 0
#endif

static inline bool node_reclaim_enabled(void)
{
        /* Is any node_reclaim_mode bit set? */
        return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
}

void check_move_unevictable_folios(struct folio_batch *fbatch);

extern void __meminit kswapd_run(int nid);
extern void __meminit kswapd_stop(int nid);

#ifdef CONFIG_SWAP

int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
                unsigned long nr_pages, sector_t start_block);
int generic_swapfile_activate(struct swap_info_struct *, struct file *,
                sector_t *);

static inline unsigned long total_swapcache_pages(void)
{
        return global_node_page_state(NR_SWAPCACHE);
}

void free_swap_cache(struct folio *folio);
void free_page_and_swap_cache(struct page *);
void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
extern atomic_t nr_rotate_swap;
extern bool has_usable_swap(void);

/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
{
        return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
}

static inline long get_nr_swap_pages(void)
{
        return atomic_long_read(&nr_swap_pages);
}

extern void si_swapinfo(struct sysinfo *);
swp_entry_t folio_alloc_swap(struct folio *folio);
bool folio_free_swap(struct folio *folio);
void put_swap_folio(struct folio *folio, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern int free_swap_and_cache(swp_entry_t);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
struct swap_info_struct *swp_swap_info(swp_entry_t entry);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
sector_t swap_folio_sector(struct folio *folio);

static inline void put_swap_device(struct swap_info_struct *si)
{
        percpu_ref_put(&si->users);
}

#else /* CONFIG_SWAP */
static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
        return NULL;
}

static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
{
        return NULL;
}

static inline void put_swap_device(struct swap_info_struct *si)
{
}

#define get_nr_swap_pages()                        0L
#define total_swap_pages                        0L
#define total_swapcache_pages()                        0UL
#define vm_swap_full()                                0

#define si_swapinfo(val) \
        do { (val)->freeswap = (val)->totalswap = 0; } while (0)
/* only sparc can not include linux/pagemap.h in this file
 * so leave put_page and release_pages undeclared... */
#define free_page_and_swap_cache(page) \
        put_page(page)
#define free_pages_and_swap_cache(pages, nr) \
        release_pages((pages), (nr));

/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
#define free_swap_and_cache(e) is_pfn_swap_entry(e)

static inline void free_swap_cache(struct folio *folio)
{
}

static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
{
        return 0;
}

static inline void swap_shmem_alloc(swp_entry_t swp)
{
}

static inline int swap_duplicate(swp_entry_t swp)
{
        return 0;
}

static inline int swapcache_prepare(swp_entry_t swp)
{
        return 0;
}

static inline void swap_free(swp_entry_t swp)
{
}

static inline void put_swap_folio(struct folio *folio, swp_entry_t swp)
{
}

static inline int __swap_count(swp_entry_t entry)
{
        return 0;
}

static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
{
        return 0;
}

static inline int swp_swapcount(swp_entry_t entry)
{
        return 0;
}

static inline swp_entry_t folio_alloc_swap(struct folio *folio)
{
        swp_entry_t entry;
        entry.val = 0;
        return entry;
}

static inline bool folio_free_swap(struct folio *folio)
{
        return false;
}

static inline int add_swap_extent(struct swap_info_struct *sis,
                                  unsigned long start_page,
                                  unsigned long nr_pages, sector_t start_block)
{
        return -EINVAL;
}
#endif /* CONFIG_SWAP */

#ifdef CONFIG_THP_SWAP
extern int split_swap_cluster(swp_entry_t entry);
#else
static inline int split_swap_cluster(swp_entry_t entry)
{
        return 0;
}
#endif

#ifdef CONFIG_MEMCG
static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
{
        /* Cgroup2 doesn't have per-cgroup swappiness */
        if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
                return READ_ONCE(vm_swappiness);

        /* root ? */
        if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg))
                return READ_ONCE(vm_swappiness);

        return READ_ONCE(memcg->swappiness);
}
#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
{
        return READ_ONCE(vm_swappiness);
}
#endif

#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp);
static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
{
        if (mem_cgroup_disabled())
                return;
        __folio_throttle_swaprate(folio, gfp);
}
#else
static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
{
}
#endif

#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
                swp_entry_t entry)
{
        if (mem_cgroup_disabled())
                return 0;
        return __mem_cgroup_try_charge_swap(folio, entry);
}

extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
{
        if (mem_cgroup_disabled())
                return;
        __mem_cgroup_uncharge_swap(entry, nr_pages);
}

extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
extern bool mem_cgroup_swap_full(struct folio *folio);
#else
static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
{
}

static inline int mem_cgroup_try_charge_swap(struct folio *folio,
                                             swp_entry_t entry)
{
        return 0;
}

static inline void mem_cgroup_uncharge_swap(swp_entry_t entry,
                                            unsigned int nr_pages)
{
}

static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
{
        return get_nr_swap_pages();
}

static inline bool mem_cgroup_swap_full(struct folio *folio)
{
        return vm_swap_full();
}
#endif

#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */





















































    9 





































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/* SPDX-License-Identifier: GPL-2.0 */
/* Freezer declarations */

#ifndef FREEZER_H_INCLUDED
#define FREEZER_H_INCLUDED

#include <linux/debug_locks.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/atomic.h>
#include <linux/jump_label.h>

#ifdef CONFIG_FREEZER
DECLARE_STATIC_KEY_FALSE(freezer_active);

extern bool pm_freezing;                /* PM freezing in effect */
extern bool pm_nosig_freezing;                /* PM nosig freezing in effect */

/*
 * Timeout for stopping processes
 */
extern unsigned int freeze_timeout_msecs;

/*
 * Check if a process has been frozen
 */
extern bool frozen(struct task_struct *p);

extern bool freezing_slow_path(struct task_struct *p);

/*
 * Check if there is a request to freeze a process
 */
static inline bool freezing(struct task_struct *p)
{
        if (static_branch_unlikely(&freezer_active))
                return freezing_slow_path(p);

        return false;
}

/* Takes and releases task alloc lock using task_lock() */
extern void __thaw_task(struct task_struct *t);

extern bool __refrigerator(bool check_kthr_stop);
extern int freeze_processes(void);
extern int freeze_kernel_threads(void);
extern void thaw_processes(void);
extern void thaw_kernel_threads(void);

static inline bool try_to_freeze(void)
{
        might_sleep();
        if (likely(!freezing(current)))
                return false;
        if (!(current->flags & PF_NOFREEZE))
                debug_check_no_locks_held();
        return __refrigerator(false);
}

extern bool freeze_task(struct task_struct *p);
extern bool set_freezable(void);

#ifdef CONFIG_CGROUP_FREEZER
extern bool cgroup_freezing(struct task_struct *task);
#else /* !CONFIG_CGROUP_FREEZER */
static inline bool cgroup_freezing(struct task_struct *task)
{
        return false;
}
#endif /* !CONFIG_CGROUP_FREEZER */

#else /* !CONFIG_FREEZER */
static inline bool frozen(struct task_struct *p) { return false; }
static inline bool freezing(struct task_struct *p) { return false; }
static inline void __thaw_task(struct task_struct *t) {}

static inline bool __refrigerator(bool check_kthr_stop) { return false; }
static inline int freeze_processes(void) { return -ENOSYS; }
static inline int freeze_kernel_threads(void) { return -ENOSYS; }
static inline void thaw_processes(void) {}
static inline void thaw_kernel_threads(void) {}

static inline bool try_to_freeze(void) { return false; }

static inline void set_freezable(void) {}

#endif /* !CONFIG_FREEZER */

#endif        /* FREEZER_H_INCLUDED */




























    5 








































































    5 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* SPDX-License-Identifier: GPL-2.0 */
/*
  File: linux/xattr.h

  Extended attributes handling.

  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
  Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
  Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*/
#ifndef _LINUX_XATTR_H
#define _LINUX_XATTR_H


#include <linux/slab.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>

struct inode;
struct dentry;

static inline bool is_posix_acl_xattr(const char *name)
{
        return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
               (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0);
}

/*
 * struct xattr_handler: When @name is set, match attributes with exactly that
 * name.  When @prefix is set instead, match attributes with that prefix and
 * with a non-empty suffix.
 */
struct xattr_handler {
        const char *name;
        const char *prefix;
        int flags;      /* fs private flags */
        bool (*list)(struct dentry *dentry);
        int (*get)(const struct xattr_handler *, struct dentry *dentry,
                   struct inode *inode, const char *name, void *buffer,
                   size_t size);
        int (*set)(const struct xattr_handler *,
                   struct mnt_idmap *idmap, struct dentry *dentry,
                   struct inode *inode, const char *name, const void *buffer,
                   size_t size, int flags);
};

/**
 * xattr_handler_can_list - check whether xattr can be listed
 * @handler: handler for this type of xattr
 * @dentry: dentry whose inode xattr to list
 *
 * Determine whether the xattr associated with @dentry can be listed given
 * @handler.
 *
 * Return: true if xattr can be listed, false if not.
 */
static inline bool xattr_handler_can_list(const struct xattr_handler *handler,
                                          struct dentry *dentry)
{
        return handler && (!handler->list || handler->list(dentry));
}

const char *xattr_full_name(const struct xattr_handler *, const char *);

struct xattr {
        const char *name;
        void *value;
        size_t value_len;
};

ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *,
                     void *, size_t);
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
int __vfs_setxattr(struct mnt_idmap *, struct dentry *, struct inode *,
                   const char *, const void *, size_t, int);
int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *,
                          const char *, const void *, size_t, int);
int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *,
                          const char *, const void *, size_t, int,
                          struct inode **);
int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *,
                 const void *, size_t, int);
int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);
int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *,
                             const char *, struct inode **);
int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *);

ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
int vfs_getxattr_alloc(struct mnt_idmap *idmap,
                       struct dentry *dentry, const char *name,
                       char **xattr_value, size_t size, gfp_t flags);

int xattr_supports_user_prefix(struct inode *inode);

static inline const char *xattr_prefix(const struct xattr_handler *handler)
{
        return handler->prefix ?: handler->name;
}

struct simple_xattrs {
        struct rb_root rb_root;
        rwlock_t lock;
};

struct simple_xattr {
        struct rb_node rb_node;
        char *name;
        size_t size;
        char value[];
};

void simple_xattrs_init(struct simple_xattrs *xattrs);
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
void simple_xattr_free(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
                     void *buffer, size_t size);
struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
                                      const char *name, const void *value,
                                      size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
                          char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,
                      struct simple_xattr *new_xattr);
int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);

#endif        /* _LINUX_XATTR_H */




































    8 






    8 

    8 



































































   14 
   14 




    8 
    8 
    8 



    8 

    8 
    8 

    8 



    8 




    8 
    8 

    8 




    8 
    8 



    8 

    8 

   14 

    8 


































































































   28 







   28 






   28 










































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/proc/inode.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/cache.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/pid_namespace.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/completion.h>
#include <linux/poll.h>
#include <linux/printk.h>
#include <linux/file.h>
#include <linux/limits.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/bug.h>

#include "internal.h"

static void proc_evict_inode(struct inode *inode)
{
        struct ctl_table_header *head;
        struct proc_inode *ei = PROC_I(inode);

        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);

        /* Stop tracking associated processes */
        if (ei->pid)
                proc_pid_evict_inode(ei);

        head = ei->sysctl;
        if (head) {
                RCU_INIT_POINTER(ei->sysctl, NULL);
                proc_sys_evict_inode(inode, head);
        }
}

static struct kmem_cache *proc_inode_cachep __ro_after_init;
static struct kmem_cache *pde_opener_cache __ro_after_init;

static struct inode *proc_alloc_inode(struct super_block *sb)
{
        struct proc_inode *ei;

        ei = alloc_inode_sb(sb, proc_inode_cachep, GFP_KERNEL);
        if (!ei)
                return NULL;
        ei->pid = NULL;
        ei->fd = 0;
        ei->op.proc_get_link = NULL;
        ei->pde = NULL;
        ei->sysctl = NULL;
        ei->sysctl_entry = NULL;
        INIT_HLIST_NODE(&ei->sibling_inodes);
        ei->ns_ops = NULL;
        return &ei->vfs_inode;
}

static void proc_free_inode(struct inode *inode)
{
        struct proc_inode *ei = PROC_I(inode);

        if (ei->pid)
                put_pid(ei->pid);
        /* Let go of any associated proc directory entry */
        if (ei->pde)
                pde_put(ei->pde);
        kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}

static void init_once(void *foo)
{
        struct proc_inode *ei = (struct proc_inode *) foo;

        inode_init_once(&ei->vfs_inode);
}

void __init proc_init_kmemcache(void)
{
        proc_inode_cachep = kmem_cache_create("proc_inode_cache",
                                             sizeof(struct proc_inode),
                                             0, (SLAB_RECLAIM_ACCOUNT|
                                                SLAB_ACCOUNT|
                                                SLAB_PANIC),
                                             init_once);
        pde_opener_cache =
                kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
                                  SLAB_ACCOUNT|SLAB_PANIC, NULL);
        proc_dir_entry_cache = kmem_cache_create_usercopy(
                "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC,
                offsetof(struct proc_dir_entry, inline_name),
                SIZEOF_PDE_INLINE_NAME, NULL);
        BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
}

void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock)
{
        struct hlist_node *node;
        struct super_block *old_sb = NULL;

        rcu_read_lock();
        while ((node = hlist_first_rcu(inodes))) {
                struct proc_inode *ei = hlist_entry(node, struct proc_inode, sibling_inodes);
                struct super_block *sb;
                struct inode *inode;

                spin_lock(lock);
                hlist_del_init_rcu(&ei->sibling_inodes);
                spin_unlock(lock);

                inode = &ei->vfs_inode;
                sb = inode->i_sb;
                if ((sb != old_sb) && !atomic_inc_not_zero(&sb->s_active))
                        continue;
                inode = igrab(inode);
                rcu_read_unlock();
                if (sb != old_sb) {
                        if (old_sb)
                                deactivate_super(old_sb);
                        old_sb = sb;
                }
                if (unlikely(!inode)) {
                        rcu_read_lock();
                        continue;
                }

                if (S_ISDIR(inode->i_mode)) {
                        struct dentry *dir = d_find_any_alias(inode);
                        if (dir) {
                                d_invalidate(dir);
                                dput(dir);
                        }
                } else {
                        struct dentry *dentry;
                        while ((dentry = d_find_alias(inode))) {
                                d_invalidate(dentry);
                                dput(dentry);
                        }
                }
                iput(inode);

                rcu_read_lock();
        }
        rcu_read_unlock();
        if (old_sb)
                deactivate_super(old_sb);
}

static inline const char *hidepid2str(enum proc_hidepid v)
{
        switch (v) {
                case HIDEPID_OFF: return "off";
                case HIDEPID_NO_ACCESS: return "noaccess";
                case HIDEPID_INVISIBLE: return "invisible";
                case HIDEPID_NOT_PTRACEABLE: return "ptraceable";
        }
        WARN_ONCE(1, "bad hide_pid value: %d\n", v);
        return "unknown";
}

static int proc_show_options(struct seq_file *seq, struct dentry *root)
{
        struct proc_fs_info *fs_info = proc_sb_info(root->d_sb);

        if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID))
                seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid));
        if (fs_info->hide_pid != HIDEPID_OFF)
                seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid));
        if (fs_info->pidonly != PROC_PIDONLY_OFF)
                seq_printf(seq, ",subset=pid");

        return 0;
}

const struct super_operations proc_sops = {
        .alloc_inode        = proc_alloc_inode,
        .free_inode        = proc_free_inode,
        .drop_inode        = generic_delete_inode,
        .evict_inode        = proc_evict_inode,
        .statfs                = simple_statfs,
        .show_options        = proc_show_options,
};

enum {BIAS = -1U<<31};

static inline int use_pde(struct proc_dir_entry *pde)
{
        return likely(atomic_inc_unless_negative(&pde->in_use));
}

static void unuse_pde(struct proc_dir_entry *pde)
{
        if (unlikely(atomic_dec_return(&pde->in_use) == BIAS))
                complete(pde->pde_unload_completion);
}

/*
 * At most 2 contexts can enter this function: the one doing the last
 * close on the descriptor and whoever is deleting PDE itself.
 *
 * First to enter calls ->proc_release hook and signals its completion
 * to the second one which waits and then does nothing.
 *
 * PDE is locked on entry, unlocked on exit.
 */
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
        __releases(&pde->pde_unload_lock)
{
        /*
         * close() (proc_reg_release()) can't delete an entry and proceed:
         * ->release hook needs to be available at the right moment.
         *
         * rmmod (remove_proc_entry() et al) can't delete an entry and proceed:
         * "struct file" needs to be available at the right moment.
         */
        if (pdeo->closing) {
                /* somebody else is doing that, just wait */
                DECLARE_COMPLETION_ONSTACK(c);
                pdeo->c = &c;
                spin_unlock(&pde->pde_unload_lock);
                wait_for_completion(&c);
        } else {
                struct file *file;
                struct completion *c;

                pdeo->closing = true;
                spin_unlock(&pde->pde_unload_lock);

                file = pdeo->file;
                pde->proc_ops->proc_release(file_inode(file), file);

                spin_lock(&pde->pde_unload_lock);
                /* Strictly after ->proc_release, see above. */
                list_del(&pdeo->lh);
                c = pdeo->c;
                spin_unlock(&pde->pde_unload_lock);
                if (unlikely(c))
                        complete(c);
                kmem_cache_free(pde_opener_cache, pdeo);
        }
}

void proc_entry_rundown(struct proc_dir_entry *de)
{
        DECLARE_COMPLETION_ONSTACK(c);
        /* Wait until all existing callers into module are done. */
        de->pde_unload_completion = &c;
        if (atomic_add_return(BIAS, &de->in_use) != BIAS)
                wait_for_completion(&c);

        /* ->pde_openers list can't grow from now on. */

        spin_lock(&de->pde_unload_lock);
        while (!list_empty(&de->pde_openers)) {
                struct pde_opener *pdeo;
                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
                close_pdeo(de, pdeo);
                spin_lock(&de->pde_unload_lock);
        }
        spin_unlock(&de->pde_unload_lock);
}

static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        loff_t rv = -EINVAL;

        if (pde_is_permanent(pde)) {
                return pde->proc_ops->proc_lseek(file, offset, whence);
        } else if (use_pde(pde)) {
                rv = pde->proc_ops->proc_lseek(file, offset, whence);
                unuse_pde(pde);
        }
        return rv;
}

static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
        struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp));
        ssize_t ret;

        if (pde_is_permanent(pde))
                return pde->proc_ops->proc_read_iter(iocb, iter);

        if (!use_pde(pde))
                return -EIO;
        ret = pde->proc_ops->proc_read_iter(iocb, iter);
        unuse_pde(pde);
        return ret;
}

static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
        typeof_member(struct proc_ops, proc_read) read;

        read = pde->proc_ops->proc_read;
        if (read)
                return read(file, buf, count, ppos);
        return -EIO;
}

static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;

        if (pde_is_permanent(pde)) {
                return pde_read(pde, file, buf, count, ppos);
        } else if (use_pde(pde)) {
                rv = pde_read(pde, file, buf, count, ppos);
                unuse_pde(pde);
        }
        return rv;
}

static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
        typeof_member(struct proc_ops, proc_write) write;

        write = pde->proc_ops->proc_write;
        if (write)
                return write(file, buf, count, ppos);
        return -EIO;
}

static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;

        if (pde_is_permanent(pde)) {
                return pde_write(pde, file, buf, count, ppos);
        } else if (use_pde(pde)) {
                rv = pde_write(pde, file, buf, count, ppos);
                unuse_pde(pde);
        }
        return rv;
}

static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts)
{
        typeof_member(struct proc_ops, proc_poll) poll;

        poll = pde->proc_ops->proc_poll;
        if (poll)
                return poll(file, pts);
        return DEFAULT_POLLMASK;
}

static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        __poll_t rv = DEFAULT_POLLMASK;

        if (pde_is_permanent(pde)) {
                return pde_poll(pde, file, pts);
        } else if (use_pde(pde)) {
                rv = pde_poll(pde, file, pts);
                unuse_pde(pde);
        }
        return rv;
}

static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
{
        typeof_member(struct proc_ops, proc_ioctl) ioctl;

        ioctl = pde->proc_ops->proc_ioctl;
        if (ioctl)
                return ioctl(file, cmd, arg);
        return -ENOTTY;
}

static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;

        if (pde_is_permanent(pde)) {
                return pde_ioctl(pde, file, cmd, arg);
        } else if (use_pde(pde)) {
                rv = pde_ioctl(pde, file, cmd, arg);
                unuse_pde(pde);
        }
        return rv;
}

#ifdef CONFIG_COMPAT
static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
{
        typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;

        compat_ioctl = pde->proc_ops->proc_compat_ioctl;
        if (compat_ioctl)
                return compat_ioctl(file, cmd, arg);
        return -ENOTTY;
}

static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;
        if (pde_is_permanent(pde)) {
                return pde_compat_ioctl(pde, file, cmd, arg);
        } else if (use_pde(pde)) {
                rv = pde_compat_ioctl(pde, file, cmd, arg);
                unuse_pde(pde);
        }
        return rv;
}
#endif

static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma)
{
        typeof_member(struct proc_ops, proc_mmap) mmap;

        mmap = pde->proc_ops->proc_mmap;
        if (mmap)
                return mmap(file, vma);
        return -EIO;
}

static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        int rv = -EIO;

        if (pde_is_permanent(pde)) {
                return pde_mmap(pde, file, vma);
        } else if (use_pde(pde)) {
                rv = pde_mmap(pde, file, vma);
                unuse_pde(pde);
        }
        return rv;
}

static unsigned long
pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr,
                           unsigned long len, unsigned long pgoff,
                           unsigned long flags)
{
        typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;

        get_area = pde->proc_ops->proc_get_unmapped_area;
#ifdef CONFIG_MMU
        if (!get_area)
                get_area = current->mm->get_unmapped_area;
#endif
        if (get_area)
                return get_area(file, orig_addr, len, pgoff, flags);
        return orig_addr;
}

static unsigned long
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
                           unsigned long len, unsigned long pgoff,
                           unsigned long flags)
{
        struct proc_dir_entry *pde = PDE(file_inode(file));
        unsigned long rv = -EIO;

        if (pde_is_permanent(pde)) {
                return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
        } else if (use_pde(pde)) {
                rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
                unuse_pde(pde);
        }
        return rv;
}

static int proc_reg_open(struct inode *inode, struct file *file)
{
        struct proc_dir_entry *pde = PDE(inode);
        int rv = 0;
        typeof_member(struct proc_ops, proc_open) open;
        typeof_member(struct proc_ops, proc_release) release;
        struct pde_opener *pdeo;

        if (!pde->proc_ops->proc_lseek)
                file->f_mode &= ~FMODE_LSEEK;

        if (pde_is_permanent(pde)) {
                open = pde->proc_ops->proc_open;
                if (open)
                        rv = open(inode, file);
                return rv;
        }

        /*
         * Ensure that
         * 1) PDE's ->release hook will be called no matter what
         *    either normally by close()/->release, or forcefully by
         *    rmmod/remove_proc_entry.
         *
         * 2) rmmod isn't blocked by opening file in /proc and sitting on
         *    the descriptor (including "rmmod foo </proc/foo" scenario).
         *
         * Save every "struct file" with custom ->release hook.
         */
        if (!use_pde(pde))
                return -ENOENT;

        release = pde->proc_ops->proc_release;
        if (release) {
                pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
                if (!pdeo) {
                        rv = -ENOMEM;
                        goto out_unuse;
                }
        }

        open = pde->proc_ops->proc_open;
        if (open)
                rv = open(inode, file);

        if (release) {
                if (rv == 0) {
                        /* To know what to release. */
                        pdeo->file = file;
                        pdeo->closing = false;
                        pdeo->c = NULL;
                        spin_lock(&pde->pde_unload_lock);
                        list_add(&pdeo->lh, &pde->pde_openers);
                        spin_unlock(&pde->pde_unload_lock);
                } else
                        kmem_cache_free(pde_opener_cache, pdeo);
        }

out_unuse:
        unuse_pde(pde);
        return rv;
}

static int proc_reg_release(struct inode *inode, struct file *file)
{
        struct proc_dir_entry *pde = PDE(inode);
        struct pde_opener *pdeo;

        if (pde_is_permanent(pde)) {
                typeof_member(struct proc_ops, proc_release) release;

                release = pde->proc_ops->proc_release;
                if (release) {
                        return release(inode, file);
                }
                return 0;
        }

        spin_lock(&pde->pde_unload_lock);
        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
                if (pdeo->file == file) {
                        close_pdeo(pde, pdeo);
                        return 0;
                }
        }
        spin_unlock(&pde->pde_unload_lock);
        return 0;
}

static const struct file_operations proc_reg_file_ops = {
        .llseek                = proc_reg_llseek,
        .read                = proc_reg_read,
        .write                = proc_reg_write,
        .poll                = proc_reg_poll,
        .unlocked_ioctl        = proc_reg_unlocked_ioctl,
        .mmap                = proc_reg_mmap,
        .get_unmapped_area = proc_reg_get_unmapped_area,
        .open                = proc_reg_open,
        .release        = proc_reg_release,
};

static const struct file_operations proc_iter_file_ops = {
        .llseek                = proc_reg_llseek,
        .read_iter        = proc_reg_read_iter,
        .write                = proc_reg_write,
        .splice_read        = copy_splice_read,
        .poll                = proc_reg_poll,
        .unlocked_ioctl        = proc_reg_unlocked_ioctl,
        .mmap                = proc_reg_mmap,
        .get_unmapped_area = proc_reg_get_unmapped_area,
        .open                = proc_reg_open,
        .release        = proc_reg_release,
};

#ifdef CONFIG_COMPAT
static const struct file_operations proc_reg_file_ops_compat = {
        .llseek                = proc_reg_llseek,
        .read                = proc_reg_read,
        .write                = proc_reg_write,
        .poll                = proc_reg_poll,
        .unlocked_ioctl        = proc_reg_unlocked_ioctl,
        .compat_ioctl        = proc_reg_compat_ioctl,
        .mmap                = proc_reg_mmap,
        .get_unmapped_area = proc_reg_get_unmapped_area,
        .open                = proc_reg_open,
        .release        = proc_reg_release,
};

static const struct file_operations proc_iter_file_ops_compat = {
        .llseek                = proc_reg_llseek,
        .read_iter        = proc_reg_read_iter,
        .splice_read        = copy_splice_read,
        .write                = proc_reg_write,
        .poll                = proc_reg_poll,
        .unlocked_ioctl        = proc_reg_unlocked_ioctl,
        .compat_ioctl        = proc_reg_compat_ioctl,
        .mmap                = proc_reg_mmap,
        .get_unmapped_area = proc_reg_get_unmapped_area,
        .open                = proc_reg_open,
        .release        = proc_reg_release,
};
#endif

static void proc_put_link(void *p)
{
        unuse_pde(p);
}

static const char *proc_get_link(struct dentry *dentry,
                                 struct inode *inode,
                                 struct delayed_call *done)
{
        struct proc_dir_entry *pde = PDE(inode);
        if (!use_pde(pde))
                return ERR_PTR(-EINVAL);
        set_delayed_call(done, proc_put_link, pde);
        return pde->data;
}

const struct inode_operations proc_link_inode_operations = {
        .get_link        = proc_get_link,
};

struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
        struct inode *inode = new_inode(sb);

        if (!inode) {
                pde_put(de);
                return NULL;
        }

        inode->i_private = de->data;
        inode->i_ino = de->low_ino;
        simple_inode_init_ts(inode);
        PROC_I(inode)->pde = de;
        if (is_empty_pde(de)) {
                make_empty_dir_inode(inode);
                return inode;
        }

        if (de->mode) {
                inode->i_mode = de->mode;
                inode->i_uid = de->uid;
                inode->i_gid = de->gid;
        }
        if (de->size)
                inode->i_size = de->size;
        if (de->nlink)
                set_nlink(inode, de->nlink);

        if (S_ISREG(inode->i_mode)) {
                inode->i_op = de->proc_iops;
                if (de->proc_ops->proc_read_iter)
                        inode->i_fop = &proc_iter_file_ops;
                else
                        inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT
                if (de->proc_ops->proc_compat_ioctl) {
                        if (de->proc_ops->proc_read_iter)
                                inode->i_fop = &proc_iter_file_ops_compat;
                        else
                                inode->i_fop = &proc_reg_file_ops_compat;
                }
#endif
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = de->proc_iops;
                inode->i_fop = de->proc_dir_ops;
        } else if (S_ISLNK(inode->i_mode)) {
                inode->i_op = de->proc_iops;
                inode->i_fop = NULL;
        } else {
                BUG();
        }
        return inode;
}




































































































































































































































































































































































































































































































































































































































  243 



































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PROCESSOR_H
#define _ASM_X86_PROCESSOR_H

#include <asm/processor-flags.h>

/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct io_bitmap;
struct vm86;

#include <asm/math_emu.h>
#include <asm/segment.h>
#include <asm/types.h>
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeatures.h>
#include <asm/cpuid.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
#include <asm/vmxfeatures.h>
#include <asm/vdso/processor.h>
#include <asm/shstk.h>

#include <linux/personality.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/math64.h>
#include <linux/err.h>
#include <linux/irqflags.h>
#include <linux/mem_encrypt.h>

/*
 * We handle most unaligned accesses in hardware.  On the other hand
 * unaligned DMA can be quite expensive on some Nehalem processors.
 *
 * Based on this we disable the IP header alignment in network drivers.
 */
#define NET_IP_ALIGN        0

#define HBP_NUM 4

/*
 * These alignment constraints are for performance in the vSMP case,
 * but in the task_struct case we must also meet hardware imposed
 * alignment requirements of the FPU state:
 */
#ifdef CONFIG_X86_VSMP
# define ARCH_MIN_TASKALIGN                (1 << INTERNODE_CACHE_SHIFT)
# define ARCH_MIN_MMSTRUCT_ALIGN        (1 << INTERNODE_CACHE_SHIFT)
#else
# define ARCH_MIN_TASKALIGN                __alignof__(union fpregs_state)
# define ARCH_MIN_MMSTRUCT_ALIGN        0
#endif

enum tlb_infos {
        ENTRIES,
        NR_INFO
};

extern u16 __read_mostly tlb_lli_4k[NR_INFO];
extern u16 __read_mostly tlb_lli_2m[NR_INFO];
extern u16 __read_mostly tlb_lli_4m[NR_INFO];
extern u16 __read_mostly tlb_lld_4k[NR_INFO];
extern u16 __read_mostly tlb_lld_2m[NR_INFO];
extern u16 __read_mostly tlb_lld_4m[NR_INFO];
extern u16 __read_mostly tlb_lld_1g[NR_INFO];

/*
 * CPU type and hardware bug flags. Kept separately for each CPU.
 */

struct cpuinfo_topology {
        // Real APIC ID read from the local APIC
        u32                        apicid;
        // The initial APIC ID provided by CPUID
        u32                        initial_apicid;

        // Physical package ID
        u32                        pkg_id;

        // Physical die ID on AMD, Relative on Intel
        u32                        die_id;

        // Compute unit ID - AMD specific
        u32                        cu_id;

        // Core ID relative to the package
        u32                        core_id;

        // Logical ID mappings
        u32                        logical_pkg_id;
        u32                        logical_die_id;

        // AMD Node ID and Nodes per Package info
        u32                        amd_node_id;

        // Cache level topology IDs
        u32                        llc_id;
        u32                        l2c_id;
};

struct cpuinfo_x86 {
        __u8                        x86;                /* CPU family */
        __u8                        x86_vendor;        /* CPU vendor */
        __u8                        x86_model;
        __u8                        x86_stepping;
#ifdef CONFIG_X86_64
        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
        int                        x86_tlbsize;
#endif
#ifdef CONFIG_X86_VMX_FEATURE_NAMES
        __u32                        vmx_capability[NVMXINTS];
#endif
        __u8                        x86_virt_bits;
        __u8                        x86_phys_bits;
        /* Max extended CPUID function supported: */
        __u32                        extended_cpuid_level;
        /* Maximum supported CPUID level, -1=no CPUID: */
        int                        cpuid_level;
        /*
         * Align to size of unsigned long because the x86_capability array
         * is passed to bitops which require the alignment. Use unnamed
         * union to enforce the array is aligned to size of unsigned long.
         */
        union {
                __u32                x86_capability[NCAPINTS + NBUGINTS];
                unsigned long        x86_capability_alignment;
        };
        char                        x86_vendor_id[16];
        char                        x86_model_id[64];
        struct cpuinfo_topology        topo;
        /* in KB - valid for CPUS which support this call: */
        unsigned int                x86_cache_size;
        int                        x86_cache_alignment;        /* In bytes */
        /* Cache QoS architectural values, valid only on the BSP: */
        int                        x86_cache_max_rmid;        /* max index */
        int                        x86_cache_occ_scale;        /* scale to bytes */
        int                        x86_cache_mbm_width_offset;
        int                        x86_power;
        unsigned long                loops_per_jiffy;
        /* protected processor identification number */
        u64                        ppin;
        u16                        x86_clflush_size;
        /* number of cores as seen by the OS: */
        u16                        booted_cores;
        /* Index into per_cpu list: */
        u16                        cpu_index;
        /*  Is SMT active on this core? */
        bool                        smt_active;
        u32                        microcode;
        /* Address space bits used by the cache internally */
        u8                        x86_cache_bits;
        unsigned                initialized : 1;
} __randomize_layout;

#define X86_VENDOR_INTEL        0
#define X86_VENDOR_CYRIX        1
#define X86_VENDOR_AMD                2
#define X86_VENDOR_UMC                3
#define X86_VENDOR_CENTAUR        5
#define X86_VENDOR_TRANSMETA        7
#define X86_VENDOR_NSC                8
#define X86_VENDOR_HYGON        9
#define X86_VENDOR_ZHAOXIN        10
#define X86_VENDOR_VORTEX        11
#define X86_VENDOR_NUM                12

#define X86_VENDOR_UNKNOWN        0xff

/*
 * capabilities of CPUs
 */
extern struct cpuinfo_x86        boot_cpu_data;
extern struct cpuinfo_x86        new_cpu_data;

extern __u32                        cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32                        cpu_caps_set[NCAPINTS + NBUGINTS];

DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu)                per_cpu(cpu_info, cpu)

extern const struct seq_operations cpuinfo_op;

#define cache_line_size()        (boot_cpu_data.x86_cache_alignment)

extern void cpu_detect(struct cpuinfo_x86 *c);

static inline unsigned long long l1tf_pfn_limit(void)
{
        return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}

extern void early_cpu_init(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);

/*
 * Friendlier CR3 helpers.
 */
static inline unsigned long read_cr3_pa(void)
{
        return __read_cr3() & CR3_ADDR_MASK;
}

static inline unsigned long native_read_cr3_pa(void)
{
        return __native_read_cr3() & CR3_ADDR_MASK;
}

static inline void load_cr3(pgd_t *pgdir)
{
        write_cr3(__sme_pa(pgdir));
}

/*
 * Note that while the legacy 'TSS' name comes from 'Task State Segment',
 * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
 * unrelated to the task-switch mechanism:
 */
#ifdef CONFIG_X86_32
/* This is the TSS defined by the hardware. */
struct x86_hw_tss {
        unsigned short                back_link, __blh;
        unsigned long                sp0;
        unsigned short                ss0, __ss0h;
        unsigned long                sp1;

        /*
         * We don't use ring 1, so ss1 is a convenient scratch space in
         * the same cacheline as sp0.  We use ss1 to cache the value in
         * MSR_IA32_SYSENTER_CS.  When we context switch
         * MSR_IA32_SYSENTER_CS, we first check if the new value being
         * written matches ss1, and, if it's not, then we wrmsr the new
         * value and update ss1.
         *
         * The only reason we context switch MSR_IA32_SYSENTER_CS is
         * that we set it to zero in vm86 tasks to avoid corrupting the
         * stack if we were to go through the sysenter path from vm86
         * mode.
         */
        unsigned short                ss1;        /* MSR_IA32_SYSENTER_CS */

        unsigned short                __ss1h;
        unsigned long                sp2;
        unsigned short                ss2, __ss2h;
        unsigned long                __cr3;
        unsigned long                ip;
        unsigned long                flags;
        unsigned long                ax;
        unsigned long                cx;
        unsigned long                dx;
        unsigned long                bx;
        unsigned long                sp;
        unsigned long                bp;
        unsigned long                si;
        unsigned long                di;
        unsigned short                es, __esh;
        unsigned short                cs, __csh;
        unsigned short                ss, __ssh;
        unsigned short                ds, __dsh;
        unsigned short                fs, __fsh;
        unsigned short                gs, __gsh;
        unsigned short                ldt, __ldth;
        unsigned short                trace;
        unsigned short                io_bitmap_base;

} __attribute__((packed));
#else
struct x86_hw_tss {
        u32                        reserved1;
        u64                        sp0;
        u64                        sp1;

        /*
         * Since Linux does not use ring 2, the 'sp2' slot is unused by
         * hardware.  entry_SYSCALL_64 uses it as scratch space to stash
         * the user RSP value.
         */
        u64                        sp2;

        u64                        reserved2;
        u64                        ist[7];
        u32                        reserved3;
        u32                        reserved4;
        u16                        reserved5;
        u16                        io_bitmap_base;

} __attribute__((packed));
#endif

/*
 * IO-bitmap sizes:
 */
#define IO_BITMAP_BITS                        65536
#define IO_BITMAP_BYTES                        (IO_BITMAP_BITS / BITS_PER_BYTE)
#define IO_BITMAP_LONGS                        (IO_BITMAP_BYTES / sizeof(long))

#define IO_BITMAP_OFFSET_VALID_MAP                                \
        (offsetof(struct tss_struct, io_bitmap.bitmap) -        \
         offsetof(struct tss_struct, x86_tss))

#define IO_BITMAP_OFFSET_VALID_ALL                                \
        (offsetof(struct tss_struct, io_bitmap.mapall) -        \
         offsetof(struct tss_struct, x86_tss))

#ifdef CONFIG_X86_IOPL_IOPERM
/*
 * sizeof(unsigned long) coming from an extra "long" at the end of the
 * iobitmap. The limit is inclusive, i.e. the last valid byte.
 */
# define __KERNEL_TSS_LIMIT        \
        (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
         sizeof(unsigned long) - 1)
#else
# define __KERNEL_TSS_LIMIT        \
        (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
#endif

/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
#define IO_BITMAP_OFFSET_INVALID        (__KERNEL_TSS_LIMIT + 1)

struct entry_stack {
        char        stack[PAGE_SIZE];
};

struct entry_stack_page {
        struct entry_stack stack;
} __aligned(PAGE_SIZE);

/*
 * All IO bitmap related data stored in the TSS:
 */
struct x86_io_bitmap {
        /* The sequence number of the last active bitmap. */
        u64                        prev_sequence;

        /*
         * Store the dirty size of the last io bitmap offender. The next
         * one will have to do the cleanup as the switch out to a non io
         * bitmap user will just set x86_tss.io_bitmap_base to a value
         * outside of the TSS limit. So for sane tasks there is no need to
         * actually touch the io_bitmap at all.
         */
        unsigned int                prev_max;

        /*
         * The extra 1 is there because the CPU will access an
         * additional byte beyond the end of the IO permission
         * bitmap. The extra byte must be all 1 bits, and must
         * be within the limit.
         */
        unsigned long                bitmap[IO_BITMAP_LONGS + 1];

        /*
         * Special I/O bitmap to emulate IOPL(3). All bytes zero,
         * except the additional byte at the end.
         */
        unsigned long                mapall[IO_BITMAP_LONGS + 1];
};

struct tss_struct {
        /*
         * The fixed hardware portion.  This must not cross a page boundary
         * at risk of violating the SDM's advice and potentially triggering
         * errata.
         */
        struct x86_hw_tss        x86_tss;

        struct x86_io_bitmap        io_bitmap;
} __aligned(PAGE_SIZE);

DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);

/* Per CPU interrupt stacks */
struct irq_stack {
        char                stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);

#ifdef CONFIG_X86_64
struct fixed_percpu_data {
        /*
         * GCC hardcodes the stack canary as %gs:40.  Since the
         * irq_stack is the object at %gs:0, we reserve the bottom
         * 48 bytes of the irq stack for the canary.
         *
         * Once we are willing to require -mstack-protector-guard-symbol=
         * support for x86_64 stackprotector, we can get rid of this.
         */
        char                gs_base[40];
        unsigned long        stack_canary;
};

DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
DECLARE_INIT_PER_CPU(fixed_percpu_data);

static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
        return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}

extern asmlinkage void entry_SYSCALL32_ignore(void);

/* Save actual FS/GS selectors and bases to current->thread */
void current_save_fsgs(void);
#else        /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
#endif        /* !X86_64 */

struct perf_event;

struct thread_struct {
        /* Cached TLS descriptors: */
        struct desc_struct        tls_array[GDT_ENTRY_TLS_ENTRIES];
#ifdef CONFIG_X86_32
        unsigned long                sp0;
#endif
        unsigned long                sp;
#ifdef CONFIG_X86_32
        unsigned long                sysenter_cs;
#else
        unsigned short                es;
        unsigned short                ds;
        unsigned short                fsindex;
        unsigned short                gsindex;
#endif

#ifdef CONFIG_X86_64
        unsigned long                fsbase;
        unsigned long                gsbase;
#else
        /*
         * XXX: this could presumably be unsigned short.  Alternatively,
         * 32-bit kernels could be taught to use fsindex instead.
         */
        unsigned long fs;
        unsigned long gs;
#endif

        /* Save middle states of ptrace breakpoints */
        struct perf_event        *ptrace_bps[HBP_NUM];
        /* Debug status used for traps, single steps, etc... */
        unsigned long           virtual_dr6;
        /* Keep track of the exact dr7 value set by the user */
        unsigned long           ptrace_dr7;
        /* Fault info: */
        unsigned long                cr2;
        unsigned long                trap_nr;
        unsigned long                error_code;
#ifdef CONFIG_VM86
        /* Virtual 86 mode info */
        struct vm86                *vm86;
#endif
        /* IO permissions: */
        struct io_bitmap        *io_bitmap;

        /*
         * IOPL. Privilege level dependent I/O permission which is
         * emulated via the I/O bitmap to prevent user space from disabling
         * interrupts.
         */
        unsigned long                iopl_emul;

        unsigned int                iopl_warn:1;

        /*
         * Protection Keys Register for Userspace.  Loaded immediately on
         * context switch. Store it in thread_struct to avoid a lookup in
         * the tasks's FPU xstate buffer. This value is only valid when a
         * task is scheduled out. For 'current' the authoritative source of
         * PKRU is the hardware itself.
         */
        u32                        pkru;

#ifdef CONFIG_X86_USER_SHADOW_STACK
        unsigned long                features;
        unsigned long                features_locked;

        struct thread_shstk        shstk;
#endif

        /* Floating point and extended processor state */
        struct fpu                fpu;
        /*
         * WARNING: 'fpu' is dynamically-sized.  It *MUST* be at
         * the end.
         */
};

extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);

static inline void arch_thread_struct_whitelist(unsigned long *offset,
                                                unsigned long *size)
{
        fpu_thread_struct_whitelist(offset, size);
}

static inline void
native_load_sp0(unsigned long sp0)
{
        this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}

static __always_inline void native_swapgs(void)
{
#ifdef CONFIG_X86_64
        asm volatile("swapgs" ::: "memory");
#endif
}

static __always_inline unsigned long current_top_of_stack(void)
{
        /*
         *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
         *  and around vm86 mode and sp0 on x86_64 is special because of the
         *  entry trampoline.
         */
        if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
                return this_cpu_read_const(const_pcpu_hot.top_of_stack);

        return this_cpu_read_stable(pcpu_hot.top_of_stack);
}

static __always_inline bool on_thread_stack(void)
{
        return (unsigned long)(current_top_of_stack() -
                               current_stack_pointer) < THREAD_SIZE;
}

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else

static inline void load_sp0(unsigned long sp0)
{
        native_load_sp0(sp0);
}

#endif /* CONFIG_PARAVIRT_XXL */

unsigned long __get_wchan(struct task_struct *p);

extern void select_idle_routine(void);
extern void amd_e400_c1e_apic_setup(void);

extern unsigned long                boot_option_idle_override;

enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
                         IDLE_POLL};

extern void enable_sep_cpu(void);


/* Defined in head.S */
extern struct desc_ptr                early_gdt_descr;

extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
extern void cpu_init_exception_handling(void);
extern void cr4_init(void);

extern void set_task_blockstep(struct task_struct *task, bool on);

/* Boot loader type from the setup header: */
extern int                        bootloader_type;
extern int                        bootloader_version;

extern char                        ignore_fpu_irq;

#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
#define ARCH_HAS_PREFETCHW

#ifdef CONFIG_X86_32
# define BASE_PREFETCH                ""
# define ARCH_HAS_PREFETCH
#else
# define BASE_PREFETCH                "prefetcht0 %P1"
#endif

/*
 * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
 *
 * It's not worth to care about 3dnow prefetches for the K6
 * because they are microcoded there and very slow.
 */
static inline void prefetch(const void *x)
{
        alternative_input(BASE_PREFETCH, "prefetchnta %P1",
                          X86_FEATURE_XMM,
                          "m" (*(const char *)x));
}

/*
 * 3dnow prefetch to get an exclusive cache line.
 * Useful for spinlocks to avoid one state transition in the
 * cache coherency protocol:
 */
static __always_inline void prefetchw(const void *x)
{
        alternative_input(BASE_PREFETCH, "prefetchw %P1",
                          X86_FEATURE_3DNOWPREFETCH,
                          "m" (*(const char *)x));
}

#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
                           TOP_OF_KERNEL_STACK_PADDING)

#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))

#define task_pt_regs(task) \
({                                                                        \
        unsigned long __ptr = (unsigned long)task_stack_page(task);        \
        __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;                \
        ((struct pt_regs *)__ptr) - 1;                                        \
})

#ifdef CONFIG_X86_32
#define INIT_THREAD  {                                                          \
        .sp0                        = TOP_OF_INIT_STACK,                          \
        .sysenter_cs                = __KERNEL_CS,                                  \
}

#define KSTK_ESP(task)                (task_pt_regs(task)->sp)

#else
extern unsigned long __end_init_task[];

#define INIT_THREAD {                                                        \
        .sp        = (unsigned long)&__end_init_task -                        \
                  TOP_OF_KERNEL_STACK_PADDING -                                \
                  sizeof(struct pt_regs),                                \
}

extern unsigned long KSTK_ESP(struct task_struct *task);

#endif /* CONFIG_X86_64 */

extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
                                               unsigned long new_sp);

/*
 * This decides where the kernel will search for a free chunk of vm
 * space during mmap's.
 */
#define __TASK_UNMAPPED_BASE(task_size)        (PAGE_ALIGN(task_size / 3))
#define TASK_UNMAPPED_BASE                __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)

#define KSTK_EIP(task)                (task_pt_regs(task)->ip)

/* Get/set a process' ability to use the timestamp counter instruction */
#define GET_TSC_CTL(adr)        get_tsc_mode((adr))
#define SET_TSC_CTL(val)        set_tsc_mode((val))

extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);

DECLARE_PER_CPU(u64, msr_misc_features_shadow);

static inline u32 per_cpu_llc_id(unsigned int cpu)
{
        return per_cpu(cpu_info.topo.llc_id, cpu);
}

static inline u32 per_cpu_l2c_id(unsigned int cpu)
{
        return per_cpu(cpu_info.topo.l2c_id, cpu);
}

#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_highest_perf(void);
extern void amd_clear_divider(void);
extern void amd_check_microcode(void);
#else
static inline u32 amd_get_highest_perf(void)                { return 0; }
static inline void amd_clear_divider(void)                { }
static inline void amd_check_microcode(void)                { }
#endif

extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
extern void free_kernel_image_pages(const char *what, void *begin, void *end);

void default_idle(void);
#ifdef        CONFIG_XEN
bool xen_set_default_idle(void);
#else
#define xen_set_default_idle 0
#endif

void __noreturn stop_this_cpu(void *dummy);
void microcode_check(struct cpuinfo_x86 *prev_info);
void store_cpu_caps(struct cpuinfo_x86 *info);

enum l1tf_mitigations {
        L1TF_MITIGATION_OFF,
        L1TF_MITIGATION_FLUSH_NOWARN,
        L1TF_MITIGATION_FLUSH,
        L1TF_MITIGATION_FLUSH_NOSMT,
        L1TF_MITIGATION_FULL,
        L1TF_MITIGATION_FULL_FORCE
};

extern enum l1tf_mitigations l1tf_mitigation;

enum mds_mitigations {
        MDS_MITIGATION_OFF,
        MDS_MITIGATION_FULL,
        MDS_MITIGATION_VMWERV,
};

extern bool gds_ucode_mitigated(void);

/*
 * Make previous memory operations globally visible before
 * a WRMSR.
 *
 * MFENCE makes writes visible, but only affects load/store
 * instructions.  WRMSR is unfortunately not a load/store
 * instruction and is unaffected by MFENCE.  The LFENCE ensures
 * that the WRMSR is not reordered.
 *
 * Most WRMSRs are full serializing instructions themselves and
 * do not require this barrier.  This is only required for the
 * IA32_TSC_DEADLINE and X2APIC MSRs.
 */
static inline void weak_wrmsr_fence(void)
{
        alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
}

#endif /* _ASM_X86_PROCESSOR_H */












































































































    3 
    3 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/interval_tree.c - interval tree for mapping->i_mmap
 *
 * Copyright (C) 2012, Michel Lespinasse <walken@google.com>
 */

#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/rmap.h>
#include <linux/interval_tree_generic.h>

static inline unsigned long vma_start_pgoff(struct vm_area_struct *v)
{
        return v->vm_pgoff;
}

static inline unsigned long vma_last_pgoff(struct vm_area_struct *v)
{
        return v->vm_pgoff + vma_pages(v) - 1;
}

INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb,
                     unsigned long, shared.rb_subtree_last,
                     vma_start_pgoff, vma_last_pgoff, /* empty */, vma_interval_tree)

/* Insert node immediately after prev in the interval tree */
void vma_interval_tree_insert_after(struct vm_area_struct *node,
                                    struct vm_area_struct *prev,
                                    struct rb_root_cached *root)
{
        struct rb_node **link;
        struct vm_area_struct *parent;
        unsigned long last = vma_last_pgoff(node);

        VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);

        if (!prev->shared.rb.rb_right) {
                parent = prev;
                link = &prev->shared.rb.rb_right;
        } else {
                parent = rb_entry(prev->shared.rb.rb_right,
                                  struct vm_area_struct, shared.rb);
                if (parent->shared.rb_subtree_last < last)
                        parent->shared.rb_subtree_last = last;
                while (parent->shared.rb.rb_left) {
                        parent = rb_entry(parent->shared.rb.rb_left,
                                struct vm_area_struct, shared.rb);
                        if (parent->shared.rb_subtree_last < last)
                                parent->shared.rb_subtree_last = last;
                }
                link = &parent->shared.rb.rb_left;
        }

        node->shared.rb_subtree_last = last;
        rb_link_node(&node->shared.rb, &parent->shared.rb, link);
        rb_insert_augmented(&node->shared.rb, &root->rb_root,
                            &vma_interval_tree_augment);
}

static inline unsigned long avc_start_pgoff(struct anon_vma_chain *avc)
{
        return vma_start_pgoff(avc->vma);
}

static inline unsigned long avc_last_pgoff(struct anon_vma_chain *avc)
{
        return vma_last_pgoff(avc->vma);
}

INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last,
                     avc_start_pgoff, avc_last_pgoff,
                     static inline, __anon_vma_interval_tree)

void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
                                   struct rb_root_cached *root)
{
#ifdef CONFIG_DEBUG_VM_RB
        node->cached_vma_start = avc_start_pgoff(node);
        node->cached_vma_last = avc_last_pgoff(node);
#endif
        __anon_vma_interval_tree_insert(node, root);
}

void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
                                   struct rb_root_cached *root)
{
        __anon_vma_interval_tree_remove(node, root);
}

struct anon_vma_chain *
anon_vma_interval_tree_iter_first(struct rb_root_cached *root,
                                  unsigned long first, unsigned long last)
{
        return __anon_vma_interval_tree_iter_first(root, first, last);
}

struct anon_vma_chain *
anon_vma_interval_tree_iter_next(struct anon_vma_chain *node,
                                 unsigned long first, unsigned long last)
{
        return __anon_vma_interval_tree_iter_next(node, first, last);
}

#ifdef CONFIG_DEBUG_VM_RB
void anon_vma_interval_tree_verify(struct anon_vma_chain *node)
{
        WARN_ON_ONCE(node->cached_vma_start != avc_start_pgoff(node));
        WARN_ON_ONCE(node->cached_vma_last != avc_last_pgoff(node));
}
#endif








































  244 
  245 
















  170 


  168 


  170 

















  170 


  170 













































    1 

    1 
    1 















































































   32 



   32 
    2 
    2 



















































































































































































































































































































































































































   16 









   16 





   16 




































   35 


   35 
















































































































   22 


   22 


   22 
   20 



   15 































































































































































    6 





    6 

    6 
    6 































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/security.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/userfaultfd_k.h>
#include <linux/elf.h>
#include <linux/elf-randomize.h>
#include <linux/personality.h>
#include <linux/random.h>
#include <linux/processor.h>
#include <linux/sizes.h>
#include <linux/compat.h>

#include <linux/uaccess.h>

#include "internal.h"
#include "swap.h"

/**
 * kfree_const - conditionally free memory
 * @x: pointer to the memory
 *
 * Function calls kfree only if @x is not in .rodata section.
 */
void kfree_const(const void *x)
{
        if (!is_kernel_rodata((unsigned long)x))
                kfree(x);
}
EXPORT_SYMBOL(kfree_const);

/**
 * kstrdup - allocate space for and copy an existing string
 * @s: the string to duplicate
 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 *
 * Return: newly allocated copy of @s or %NULL in case of error
 */
noinline
char *kstrdup(const char *s, gfp_t gfp)
{
        size_t len;
        char *buf;

        if (!s)
                return NULL;

        len = strlen(s) + 1;
        buf = kmalloc_track_caller(len, gfp);
        if (buf)
                memcpy(buf, s, len);
        return buf;
}
EXPORT_SYMBOL(kstrdup);

/**
 * kstrdup_const - conditionally duplicate an existing const string
 * @s: the string to duplicate
 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 *
 * Note: Strings allocated by kstrdup_const should be freed by kfree_const and
 * must not be passed to krealloc().
 *
 * Return: source string if it is in .rodata section otherwise
 * fallback to kstrdup.
 */
const char *kstrdup_const(const char *s, gfp_t gfp)
{
        if (is_kernel_rodata((unsigned long)s))
                return s;

        return kstrdup(s, gfp);
}
EXPORT_SYMBOL(kstrdup_const);

/**
 * kstrndup - allocate space for and copy an existing string
 * @s: the string to duplicate
 * @max: read at most @max chars from @s
 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 *
 * Note: Use kmemdup_nul() instead if the size is known exactly.
 *
 * Return: newly allocated copy of @s or %NULL in case of error
 */
char *kstrndup(const char *s, size_t max, gfp_t gfp)
{
        size_t len;
        char *buf;

        if (!s)
                return NULL;

        len = strnlen(s, max);
        buf = kmalloc_track_caller(len+1, gfp);
        if (buf) {
                memcpy(buf, s, len);
                buf[len] = '\0';
        }
        return buf;
}
EXPORT_SYMBOL(kstrndup);

/**
 * kmemdup - duplicate region of memory
 *
 * @src: memory region to duplicate
 * @len: memory region length
 * @gfp: GFP mask to use
 *
 * Return: newly allocated copy of @src or %NULL in case of error,
 * result is physically contiguous. Use kfree() to free.
 */
void *kmemdup(const void *src, size_t len, gfp_t gfp)
{
        void *p;

        p = kmalloc_track_caller(len, gfp);
        if (p)
                memcpy(p, src, len);
        return p;
}
EXPORT_SYMBOL(kmemdup);

/**
 * kmemdup_array - duplicate a given array.
 *
 * @src: array to duplicate.
 * @element_size: size of each element of array.
 * @count: number of elements to duplicate from array.
 * @gfp: GFP mask to use.
 *
 * Return: duplicated array of @src or %NULL in case of error,
 * result is physically contiguous. Use kfree() to free.
 */
void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
{
        return kmemdup(src, size_mul(element_size, count), gfp);
}
EXPORT_SYMBOL(kmemdup_array);

/**
 * kvmemdup - duplicate region of memory
 *
 * @src: memory region to duplicate
 * @len: memory region length
 * @gfp: GFP mask to use
 *
 * Return: newly allocated copy of @src or %NULL in case of error,
 * result may be not physically contiguous. Use kvfree() to free.
 */
void *kvmemdup(const void *src, size_t len, gfp_t gfp)
{
        void *p;

        p = kvmalloc(len, gfp);
        if (p)
                memcpy(p, src, len);
        return p;
}
EXPORT_SYMBOL(kvmemdup);

/**
 * kmemdup_nul - Create a NUL-terminated string from unterminated data
 * @s: The data to stringify
 * @len: The size of the data
 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 *
 * Return: newly allocated copy of @s with NUL-termination or %NULL in
 * case of error
 */
char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
{
        char *buf;

        if (!s)
                return NULL;

        buf = kmalloc_track_caller(len + 1, gfp);
        if (buf) {
                memcpy(buf, s, len);
                buf[len] = '\0';
        }
        return buf;
}
EXPORT_SYMBOL(kmemdup_nul);

/**
 * memdup_user - duplicate memory region from user space
 *
 * @src: source address in user space
 * @len: number of bytes to copy
 *
 * Return: an ERR_PTR() on failure.  Result is physically
 * contiguous, to be freed by kfree().
 */
void *memdup_user(const void __user *src, size_t len)
{
        void *p;

        p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
        if (!p)
                return ERR_PTR(-ENOMEM);

        if (copy_from_user(p, src, len)) {
                kfree(p);
                return ERR_PTR(-EFAULT);
        }

        return p;
}
EXPORT_SYMBOL(memdup_user);

/**
 * vmemdup_user - duplicate memory region from user space
 *
 * @src: source address in user space
 * @len: number of bytes to copy
 *
 * Return: an ERR_PTR() on failure.  Result may be not
 * physically contiguous.  Use kvfree() to free.
 */
void *vmemdup_user(const void __user *src, size_t len)
{
        void *p;

        p = kvmalloc(len, GFP_USER);
        if (!p)
                return ERR_PTR(-ENOMEM);

        if (copy_from_user(p, src, len)) {
                kvfree(p);
                return ERR_PTR(-EFAULT);
        }

        return p;
}
EXPORT_SYMBOL(vmemdup_user);

/**
 * strndup_user - duplicate an existing string from user space
 * @s: The string to duplicate
 * @n: Maximum number of bytes to copy, including the trailing NUL.
 *
 * Return: newly allocated copy of @s or an ERR_PTR() in case of error
 */
char *strndup_user(const char __user *s, long n)
{
        char *p;
        long length;

        length = strnlen_user(s, n);

        if (!length)
                return ERR_PTR(-EFAULT);

        if (length > n)
                return ERR_PTR(-EINVAL);

        p = memdup_user(s, length);

        if (IS_ERR(p))
                return p;

        p[length - 1] = '\0';

        return p;
}
EXPORT_SYMBOL(strndup_user);

/**
 * memdup_user_nul - duplicate memory region from user space and NUL-terminate
 *
 * @src: source address in user space
 * @len: number of bytes to copy
 *
 * Return: an ERR_PTR() on failure.
 */
void *memdup_user_nul(const void __user *src, size_t len)
{
        char *p;

        /*
         * Always use GFP_KERNEL, since copy_from_user() can sleep and
         * cause pagefault, which makes it pointless to use GFP_NOFS
         * or GFP_ATOMIC.
         */
        p = kmalloc_track_caller(len + 1, GFP_KERNEL);
        if (!p)
                return ERR_PTR(-ENOMEM);

        if (copy_from_user(p, src, len)) {
                kfree(p);
                return ERR_PTR(-EFAULT);
        }
        p[len] = '\0';

        return p;
}
EXPORT_SYMBOL(memdup_user_nul);

/* Check if the vma is being used as a stack by this task */
int vma_is_stack_for_current(struct vm_area_struct *vma)
{
        struct task_struct * __maybe_unused t = current;

        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
}

/*
 * Change backing file, only valid to use during initial VMA setup.
 */
void vma_set_file(struct vm_area_struct *vma, struct file *file)
{
        /* Changing an anonymous vma with this is illegal */
        get_file(file);
        swap(vma->vm_file, file);
        fput(file);
}
EXPORT_SYMBOL(vma_set_file);

#ifndef STACK_RND_MASK
#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
#endif

unsigned long randomize_stack_top(unsigned long stack_top)
{
        unsigned long random_variable = 0;

        if (current->flags & PF_RANDOMIZE) {
                random_variable = get_random_long();
                random_variable &= STACK_RND_MASK;
                random_variable <<= PAGE_SHIFT;
        }
#ifdef CONFIG_STACK_GROWSUP
        return PAGE_ALIGN(stack_top) + random_variable;
#else
        return PAGE_ALIGN(stack_top) - random_variable;
#endif
}

/**
 * randomize_page - Generate a random, page aligned address
 * @start:        The smallest acceptable address the caller will take.
 * @range:        The size of the area, starting at @start, within which the
 *                random address must fall.
 *
 * If @start + @range would overflow, @range is capped.
 *
 * NOTE: Historical use of randomize_range, which this replaces, presumed that
 * @start was already page aligned.  We now align it regardless.
 *
 * Return: A page aligned address within [start, start + range).  On error,
 * @start is returned.
 */
unsigned long randomize_page(unsigned long start, unsigned long range)
{
        if (!PAGE_ALIGNED(start)) {
                range -= PAGE_ALIGN(start) - start;
                start = PAGE_ALIGN(start);
        }

        if (start > ULONG_MAX - range)
                range = ULONG_MAX - start;

        range >>= PAGE_SHIFT;

        if (range == 0)
                return start;

        return start + (get_random_long() % range << PAGE_SHIFT);
}

#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
{
        /* Is the current task 32bit ? */
        if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
                return randomize_page(mm->brk, SZ_32M);

        return randomize_page(mm->brk, SZ_1G);
}

unsigned long arch_mmap_rnd(void)
{
        unsigned long rnd;

#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
        if (is_compat_task())
                rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
        else
#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */
                rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);

        return rnd << PAGE_SHIFT;
}

static int mmap_is_legacy(struct rlimit *rlim_stack)
{
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;

        /* On parisc the stack always grows up - so a unlimited stack should
         * not be an indicator to use the legacy memory layout. */
        if (rlim_stack->rlim_cur == RLIM_INFINITY &&
                !IS_ENABLED(CONFIG_STACK_GROWSUP))
                return 1;

        return sysctl_legacy_va_layout;
}

/*
 * Leave enough space between the mmap area and the stack to honour ulimit in
 * the face of randomisation.
 */
#define MIN_GAP                (SZ_128M)
#define MAX_GAP                (STACK_TOP / 6 * 5)

static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
#ifdef CONFIG_STACK_GROWSUP
        /*
         * For an upwards growing stack the calculation is much simpler.
         * Memory for the maximum stack size is reserved at the top of the
         * task. mmap_base starts directly below the stack and grows
         * downwards.
         */
        return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
#else
        unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = stack_guard_gap;

        /* Account for stack randomization if necessary */
        if (current->flags & PF_RANDOMIZE)
                pad += (STACK_RND_MASK << PAGE_SHIFT);

        /* Values close to RLIM_INFINITY can overflow. */
        if (gap + pad > gap)
                gap += pad;

        if (gap < MIN_GAP)
                gap = MIN_GAP;
        else if (gap > MAX_GAP)
                gap = MAX_GAP;

        return PAGE_ALIGN(STACK_TOP - gap - rnd);
#endif
}

void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
        unsigned long random_factor = 0UL;

        if (current->flags & PF_RANDOMIZE)
                random_factor = arch_mmap_rnd();

        if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
                mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
}
#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
        mm->mmap_base = TASK_UNMAPPED_BASE;
        mm->get_unmapped_area = arch_get_unmapped_area;
}
#endif

/**
 * __account_locked_vm - account locked pages to an mm's locked_vm
 * @mm:          mm to account against
 * @pages:       number of pages to account
 * @inc:         %true if @pages should be considered positive, %false if not
 * @task:        task used to check RLIMIT_MEMLOCK
 * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
 *
 * Assumes @task and @mm are valid (i.e. at least one reference on each), and
 * that mmap_lock is held as writer.
 *
 * Return:
 * * 0       on success
 * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
 */
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
                        struct task_struct *task, bool bypass_rlim)
{
        unsigned long locked_vm, limit;
        int ret = 0;

        mmap_assert_write_locked(mm);

        locked_vm = mm->locked_vm;
        if (inc) {
                if (!bypass_rlim) {
                        limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
                        if (locked_vm + pages > limit)
                                ret = -ENOMEM;
                }
                if (!ret)
                        mm->locked_vm = locked_vm + pages;
        } else {
                WARN_ON_ONCE(pages > locked_vm);
                mm->locked_vm = locked_vm - pages;
        }

        pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid,
                 (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT,
                 locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK),
                 ret ? " - exceeded" : "");

        return ret;
}
EXPORT_SYMBOL_GPL(__account_locked_vm);

/**
 * account_locked_vm - account locked pages to an mm's locked_vm
 * @mm:          mm to account against, may be NULL
 * @pages:       number of pages to account
 * @inc:         %true if @pages should be considered positive, %false if not
 *
 * Assumes a non-NULL @mm is valid (i.e. at least one reference on it).
 *
 * Return:
 * * 0       on success, or if mm is NULL
 * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
 */
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
{
        int ret;

        if (pages == 0 || !mm)
                return 0;

        mmap_write_lock(mm);
        ret = __account_locked_vm(mm, pages, inc, current,
                                  capable(CAP_IPC_LOCK));
        mmap_write_unlock(mm);

        return ret;
}
EXPORT_SYMBOL_GPL(account_locked_vm);

unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
        unsigned long flag, unsigned long pgoff)
{
        unsigned long ret;
        struct mm_struct *mm = current->mm;
        unsigned long populate;
        LIST_HEAD(uf);

        ret = security_mmap_file(file, prot, flag);
        if (!ret) {
                if (mmap_write_lock_killable(mm))
                        return -EINTR;
                ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate,
                              &uf);
                mmap_write_unlock(mm);
                userfaultfd_unmap_complete(mm, &uf);
                if (populate)
                        mm_populate(ret, populate);
        }
        return ret;
}

unsigned long vm_mmap(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
        unsigned long flag, unsigned long offset)
{
        if (unlikely(offset + PAGE_ALIGN(len) < offset))
                return -EINVAL;
        if (unlikely(offset_in_page(offset)))
                return -EINVAL;

        return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
}
EXPORT_SYMBOL(vm_mmap);

/**
 * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
 * failure, fall back to non-contiguous (vmalloc) allocation.
 * @size: size of the request.
 * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
 * @node: numa node to allocate from
 *
 * Uses kmalloc to get the memory but if the allocation fails then falls back
 * to the vmalloc allocator. Use kvfree for freeing the memory.
 *
 * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier.
 * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
 * preferable to the vmalloc fallback, due to visible performance drawbacks.
 *
 * Return: pointer to the allocated memory of %NULL in case of failure
 */
void *kvmalloc_node(size_t size, gfp_t flags, int node)
{
        gfp_t kmalloc_flags = flags;
        void *ret;

        /*
         * We want to attempt a large physically contiguous block first because
         * it is less likely to fragment multiple larger blocks and therefore
         * contribute to a long term fragmentation less than vmalloc fallback.
         * However make sure that larger requests are not too disruptive - no
         * OOM killer and no allocation failure warnings as we have a fallback.
         */
        if (size > PAGE_SIZE) {
                kmalloc_flags |= __GFP_NOWARN;

                if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
                        kmalloc_flags |= __GFP_NORETRY;

                /* nofail semantic is implemented by the vmalloc fallback */
                kmalloc_flags &= ~__GFP_NOFAIL;
        }

        ret = kmalloc_node(size, kmalloc_flags, node);

        /*
         * It doesn't really make sense to fallback to vmalloc for sub page
         * requests
         */
        if (ret || size <= PAGE_SIZE)
                return ret;

        /* non-sleeping allocations are not supported by vmalloc */
        if (!gfpflags_allow_blocking(flags))
                return NULL;

        /* Don't even allow crazy sizes */
        if (unlikely(size > INT_MAX)) {
                WARN_ON_ONCE(!(flags & __GFP_NOWARN));
                return NULL;
        }

        /*
         * kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
         * since the callers already cannot assume anything
         * about the resulting pointer, and cannot play
         * protection games.
         */
        return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
                        flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
                        node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node);

/**
 * kvfree() - Free memory.
 * @addr: Pointer to allocated memory.
 *
 * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
 * It is slightly more efficient to use kfree() or vfree() if you are certain
 * that you know which one to use.
 *
 * Context: Either preemptible task context or not-NMI interrupt.
 */
void kvfree(const void *addr)
{
        if (is_vmalloc_addr(addr))
                vfree(addr);
        else
                kfree(addr);
}
EXPORT_SYMBOL(kvfree);

/**
 * kvfree_sensitive - Free a data object containing sensitive information.
 * @addr: address of the data object to be freed.
 * @len: length of the data object.
 *
 * Use the special memzero_explicit() function to clear the content of a
 * kvmalloc'ed object containing sensitive data to make sure that the
 * compiler won't optimize out the data clearing.
 */
void kvfree_sensitive(const void *addr, size_t len)
{
        if (likely(!ZERO_OR_NULL_PTR(addr))) {
                memzero_explicit((void *)addr, len);
                kvfree(addr);
        }
}
EXPORT_SYMBOL(kvfree_sensitive);

void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
{
        void *newp;

        if (oldsize >= newsize)
                return (void *)p;
        newp = kvmalloc(newsize, flags);
        if (!newp)
                return NULL;
        memcpy(newp, p, oldsize);
        kvfree(p);
        return newp;
}
EXPORT_SYMBOL(kvrealloc);

/**
 * __vmalloc_array - allocate memory for a virtually contiguous array.
 * @n: number of elements.
 * @size: element size.
 * @flags: the type of memory to allocate (see kmalloc).
 */
void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
{
        size_t bytes;

        if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;
        return __vmalloc(bytes, flags);
}
EXPORT_SYMBOL(__vmalloc_array);

/**
 * vmalloc_array - allocate memory for a virtually contiguous array.
 * @n: number of elements.
 * @size: element size.
 */
void *vmalloc_array(size_t n, size_t size)
{
        return __vmalloc_array(n, size, GFP_KERNEL);
}
EXPORT_SYMBOL(vmalloc_array);

/**
 * __vcalloc - allocate and zero memory for a virtually contiguous array.
 * @n: number of elements.
 * @size: element size.
 * @flags: the type of memory to allocate (see kmalloc).
 */
void *__vcalloc(size_t n, size_t size, gfp_t flags)
{
        return __vmalloc_array(n, size, flags | __GFP_ZERO);
}
EXPORT_SYMBOL(__vcalloc);

/**
 * vcalloc - allocate and zero memory for a virtually contiguous array.
 * @n: number of elements.
 * @size: element size.
 */
void *vcalloc(size_t n, size_t size)
{
        return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vcalloc);

struct anon_vma *folio_anon_vma(struct folio *folio)
{
        unsigned long mapping = (unsigned long)folio->mapping;

        if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                return NULL;
        return (void *)(mapping - PAGE_MAPPING_ANON);
}

/**
 * folio_mapping - Find the mapping where this folio is stored.
 * @folio: The folio.
 *
 * For folios which are in the page cache, return the mapping that this
 * page belongs to.  Folios in the swap cache return the swap mapping
 * this page is stored in (which is different from the mapping for the
 * swap file or swap device where the data is stored).
 *
 * You can call this for folios which aren't in the swap cache or page
 * cache and it will return NULL.
 */
struct address_space *folio_mapping(struct folio *folio)
{
        struct address_space *mapping;

        /* This happens if someone calls flush_dcache_page on slab page */
        if (unlikely(folio_test_slab(folio)))
                return NULL;

        if (unlikely(folio_test_swapcache(folio)))
                return swap_address_space(folio->swap);

        mapping = folio->mapping;
        if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
                return NULL;

        return mapping;
}
EXPORT_SYMBOL(folio_mapping);

/**
 * folio_copy - Copy the contents of one folio to another.
 * @dst: Folio to copy to.
 * @src: Folio to copy from.
 *
 * The bytes in the folio represented by @src are copied to @dst.
 * Assumes the caller has validated that @dst is at least as large as @src.
 * Can be called in atomic context for order-0 folios, but if the folio is
 * larger, it may sleep.
 */
void folio_copy(struct folio *dst, struct folio *src)
{
        long i = 0;
        long nr = folio_nr_pages(src);

        for (;;) {
                copy_highpage(folio_page(dst, i), folio_page(src, i));
                if (++i == nr)
                        break;
                cond_resched();
        }
}
EXPORT_SYMBOL(folio_copy);

int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
int sysctl_overcommit_ratio __read_mostly = 50;
unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */

int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
                size_t *lenp, loff_t *ppos)
{
        int ret;

        ret = proc_dointvec(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                sysctl_overcommit_kbytes = 0;
        return ret;
}

static void sync_overcommit_as(struct work_struct *dummy)
{
        percpu_counter_sync(&vm_committed_as);
}

int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
                size_t *lenp, loff_t *ppos)
{
        struct ctl_table t;
        int new_policy = -1;
        int ret;

        /*
         * The deviation of sync_overcommit_as could be big with loose policy
         * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
         * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
         * with the strict "NEVER", and to avoid possible race condition (even
         * though user usually won't too frequently do the switching to policy
         * OVERCOMMIT_NEVER), the switch is done in the following order:
         *        1. changing the batch
         *        2. sync percpu count on each CPU
         *        3. switch the policy
         */
        if (write) {
                t = *table;
                t.data = &new_policy;
                ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
                if (ret || new_policy == -1)
                        return ret;

                mm_compute_batch(new_policy);
                if (new_policy == OVERCOMMIT_NEVER)
                        schedule_on_each_cpu(sync_overcommit_as);
                sysctl_overcommit_memory = new_policy;
        } else {
                ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        }

        return ret;
}

int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
                size_t *lenp, loff_t *ppos)
{
        int ret;

        ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                sysctl_overcommit_ratio = 0;
        return ret;
}

/*
 * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
 */
unsigned long vm_commit_limit(void)
{
        unsigned long allowed;

        if (sysctl_overcommit_kbytes)
                allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
        else
                allowed = ((totalram_pages() - hugetlb_total_pages())
                           * sysctl_overcommit_ratio / 100);
        allowed += total_swap_pages;

        return allowed;
}

/*
 * Make sure vm_committed_as in one cacheline and not cacheline shared with
 * other variables. It can be updated by several CPUs frequently.
 */
struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;

/*
 * The global memory commitment made in the system can be a metric
 * that can be used to drive ballooning decisions when Linux is hosted
 * as a guest. On Hyper-V, the host implements a policy engine for dynamically
 * balancing memory across competing virtual machines that are hosted.
 * Several metrics drive this policy engine including the guest reported
 * memory commitment.
 *
 * The time cost of this is very low for small platforms, and for big
 * platform like a 2S/36C/72T Skylake server, in worst case where
 * vm_committed_as's spinlock is under severe contention, the time cost
 * could be about 30~40 microseconds.
 */
unsigned long vm_memory_committed(void)
{
        return percpu_counter_sum_positive(&vm_committed_as);
}
EXPORT_SYMBOL_GPL(vm_memory_committed);

/*
 * Check that a process has enough memory to allocate a new virtual
 * mapping. 0 means there is enough memory for the allocation to
 * succeed and -ENOMEM implies there is not.
 *
 * We currently support three overcommit policies, which are set via the
 * vm.overcommit_memory sysctl.  See Documentation/mm/overcommit-accounting.rst
 *
 * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
 * Additional code 2002 Jul 20 by Robert Love.
 *
 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
 *
 * Note this is a helper function intended to be used by LSMs which
 * wish to use this logic.
 */
int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
        long allowed;
        unsigned long bytes_failed;

        vm_acct_memory(pages);

        /*
         * Sometimes we want to use more memory than we have
         */
        if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
                return 0;

        if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
                if (pages > totalram_pages() + total_swap_pages)
                        goto error;
                return 0;
        }

        allowed = vm_commit_limit();
        /*
         * Reserve some for root
         */
        if (!cap_sys_admin)
                allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);

        /*
         * Don't let a single process grow so big a user can't recover
         */
        if (mm) {
                long reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);

                allowed -= min_t(long, mm->total_vm / 32, reserve);
        }

        if (percpu_counter_read_positive(&vm_committed_as) < allowed)
                return 0;
error:
        bytes_failed = pages << PAGE_SHIFT;
        pr_warn_ratelimited("%s: pid: %d, comm: %s, bytes: %lu not enough memory for the allocation\n",
                            __func__, current->pid, current->comm, bytes_failed);
        vm_unacct_memory(pages);

        return -ENOMEM;
}

/**
 * get_cmdline() - copy the cmdline value to a buffer.
 * @task:     the task whose cmdline value to copy.
 * @buffer:   the buffer to copy to.
 * @buflen:   the length of the buffer. Larger cmdline values are truncated
 *            to this length.
 *
 * Return: the size of the cmdline field copied. Note that the copy does
 * not guarantee an ending NULL byte.
 */
int get_cmdline(struct task_struct *task, char *buffer, int buflen)
{
        int res = 0;
        unsigned int len;
        struct mm_struct *mm = get_task_mm(task);
        unsigned long arg_start, arg_end, env_start, env_end;
        if (!mm)
                goto out;
        if (!mm->arg_end)
                goto out_mm;        /* Shh! No looking before we're done */

        spin_lock(&mm->arg_lock);
        arg_start = mm->arg_start;
        arg_end = mm->arg_end;
        env_start = mm->env_start;
        env_end = mm->env_end;
        spin_unlock(&mm->arg_lock);

        len = arg_end - arg_start;

        if (len > buflen)
                len = buflen;

        res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);

        /*
         * If the nul at the end of args has been overwritten, then
         * assume application is using setproctitle(3).
         */
        if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
                len = strnlen(buffer, res);
                if (len < res) {
                        res = len;
                } else {
                        len = env_end - env_start;
                        if (len > buflen - res)
                                len = buflen - res;
                        res += access_process_vm(task, env_start,
                                                 buffer+res, len,
                                                 FOLL_FORCE);
                        res = strnlen(buffer, res);
                }
        }
out_mm:
        mmput(mm);
out:
        return res;
}

int __weak memcmp_pages(struct page *page1, struct page *page2)
{
        char *addr1, *addr2;
        int ret;

        addr1 = kmap_local_page(page1);
        addr2 = kmap_local_page(page2);
        ret = memcmp(addr1, addr2, PAGE_SIZE);
        kunmap_local(addr2);
        kunmap_local(addr1);
        return ret;
}

#ifdef CONFIG_PRINTK
/**
 * mem_dump_obj - Print available provenance information
 * @object: object for which to find provenance information.
 *
 * This function uses pr_cont(), so that the caller is expected to have
 * printed out whatever preamble is appropriate.  The provenance information
 * depends on the type of object and on how much debugging is enabled.
 * For example, for a slab-cache object, the slab name is printed, and,
 * if available, the return address and stack trace from the allocation
 * and last free path of that object.
 */
void mem_dump_obj(void *object)
{
        const char *type;

        if (kmem_dump_obj(object))
                return;

        if (vmalloc_dump_obj(object))
                return;

        if (is_vmalloc_addr(object))
                type = "vmalloc memory";
        else if (virt_addr_valid(object))
                type = "non-slab/vmalloc memory";
        else if (object == NULL)
                type = "NULL pointer";
        else if (object == ZERO_SIZE_PTR)
                type = "zero-size pointer";
        else
                type = "non-paged memory";

        pr_cont(" %s\n", type);
}
EXPORT_SYMBOL_GPL(mem_dump_obj);
#endif

/*
 * A driver might set a page logically offline -- PageOffline() -- and
 * turn the page inaccessible in the hypervisor; after that, access to page
 * content can be fatal.
 *
 * Some special PFN walkers -- i.e., /proc/kcore -- read content of random
 * pages after checking PageOffline(); however, these PFN walkers can race
 * with drivers that set PageOffline().
 *
 * page_offline_freeze()/page_offline_thaw() allows for a subsystem to
 * synchronize with such drivers, achieving that a page cannot be set
 * PageOffline() while frozen.
 *
 * page_offline_begin()/page_offline_end() is used by drivers that care about
 * such races when setting a page PageOffline().
 */
static DECLARE_RWSEM(page_offline_rwsem);

void page_offline_freeze(void)
{
        down_read(&page_offline_rwsem);
}

void page_offline_thaw(void)
{
        up_read(&page_offline_rwsem);
}

void page_offline_begin(void)
{
        down_write(&page_offline_rwsem);
}
EXPORT_SYMBOL(page_offline_begin);

void page_offline_end(void)
{
        up_write(&page_offline_rwsem);
}
EXPORT_SYMBOL(page_offline_end);

#ifndef flush_dcache_folio
void flush_dcache_folio(struct folio *folio)
{
        long i, nr = folio_nr_pages(folio);

        for (i = 0; i < nr; i++)
                flush_dcache_page(folio_page(folio, i));
}
EXPORT_SYMBOL(flush_dcache_folio);
#endif



































































































































































































































































































    8 



    8 
    8 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 




































    4 









   14 















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Neighbour Discovery for IPv6
 *        Linux INET6 implementation
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *        Mike Shaver                <shaver@ingenia.com>
 */

/*
 *        Changes:
 *
 *        Alexey I. Froloff                :        RFC6106 (DNSSL) support
 *        Pierre Ynard                        :        export userland ND options
 *                                                through netlink (RDNSS support)
 *        Lars Fenneberg                        :        fixed MTU setting on receipt
 *                                                of an RA.
 *        Janos Farkas                        :        kmalloc failure checks
 *        Alexey Kuznetsov                :        state machine reworked
 *                                                and moved to net/core.
 *        Pekka Savola                        :        RFC2461 validation
 *        YOSHIFUJI Hideaki @USAGI        :        Verify ND options properly
 */

#define pr_fmt(fmt) "ICMPv6: " fmt

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/sched.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/jhash.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/icmp.h>

#include <net/netlink.h>
#include <linux/rtnetlink.h>

#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
#include <linux/proc_fs.h>

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

static u32 ndisc_hash(const void *pkey,
                      const struct net_device *dev,
                      __u32 *hash_rnd);
static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
static bool ndisc_allow_add(const struct net_device *dev,
                            struct netlink_ext_ack *extack);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
static int pndisc_constructor(struct pneigh_entry *n);
static void pndisc_destructor(struct pneigh_entry *n);
static void pndisc_redo(struct sk_buff *skb);
static int ndisc_is_multicast(const void *pkey);

static const struct neigh_ops ndisc_generic_ops = {
        .family =                AF_INET6,
        .solicit =                ndisc_solicit,
        .error_report =                ndisc_error_report,
        .output =                neigh_resolve_output,
        .connected_output =        neigh_connected_output,
};

static const struct neigh_ops ndisc_hh_ops = {
        .family =                AF_INET6,
        .solicit =                ndisc_solicit,
        .error_report =                ndisc_error_report,
        .output =                neigh_resolve_output,
        .connected_output =        neigh_resolve_output,
};


static const struct neigh_ops ndisc_direct_ops = {
        .family =                AF_INET6,
        .output =                neigh_direct_output,
        .connected_output =        neigh_direct_output,
};

struct neigh_table nd_tbl = {
        .family =        AF_INET6,
        .key_len =        sizeof(struct in6_addr),
        .protocol =        cpu_to_be16(ETH_P_IPV6),
        .hash =                ndisc_hash,
        .key_eq =        ndisc_key_eq,
        .constructor =        ndisc_constructor,
        .pconstructor =        pndisc_constructor,
        .pdestructor =        pndisc_destructor,
        .proxy_redo =        pndisc_redo,
        .is_multicast =        ndisc_is_multicast,
        .allow_add  =   ndisc_allow_add,
        .id =                "ndisc_cache",
        .parms = {
                .tbl                        = &nd_tbl,
                .reachable_time                = ND_REACHABLE_TIME,
                .data = {
                        [NEIGH_VAR_MCAST_PROBES] = 3,
                        [NEIGH_VAR_UCAST_PROBES] = 3,
                        [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
                        [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
                        [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
                        [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
                        [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
                        [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
                        [NEIGH_VAR_PROXY_QLEN] = 64,
                        [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
                        [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
                },
        },
        .gc_interval =          30 * HZ,
        .gc_thresh1 =         128,
        .gc_thresh2 =         512,
        .gc_thresh3 =        1024,
};
EXPORT_SYMBOL_GPL(nd_tbl);

void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
                              int data_len, int pad)
{
        int space = __ndisc_opt_addr_space(data_len, pad);
        u8 *opt = skb_put(skb, space);

        opt[0] = type;
        opt[1] = space>>3;

        memset(opt + 2, 0, pad);
        opt   += pad;
        space -= pad;

        memcpy(opt+2, data, data_len);
        data_len += 2;
        opt += data_len;
        space -= data_len;
        if (space > 0)
                memset(opt, 0, space);
}
EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);

static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
                                          const void *data, u8 icmp6_type)
{
        __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
                                 ndisc_addr_option_pad(skb->dev->type));
        ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
}

static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
                                                   void *ha,
                                                   const u8 *ops_data)
{
        ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
        ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
}

static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
                                            struct nd_opt_hdr *end)
{
        int type;
        if (!cur || !end || cur >= end)
                return NULL;
        type = cur->nd_opt_type;
        do {
                cur = ((void *)cur) + (cur->nd_opt_len << 3);
        } while (cur < end && cur->nd_opt_type != type);
        return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}

static inline int ndisc_is_useropt(const struct net_device *dev,
                                   struct nd_opt_hdr *opt)
{
        return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
                opt->nd_opt_type == ND_OPT_RDNSS ||
                opt->nd_opt_type == ND_OPT_DNSSL ||
                opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
                opt->nd_opt_type == ND_OPT_PREF64 ||
                ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}

static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
                                             struct nd_opt_hdr *cur,
                                             struct nd_opt_hdr *end)
{
        if (!cur || !end || cur >= end)
                return NULL;
        do {
                cur = ((void *)cur) + (cur->nd_opt_len << 3);
        } while (cur < end && !ndisc_is_useropt(dev, cur));
        return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}

struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
                                          u8 *opt, int opt_len,
                                          struct ndisc_options *ndopts)
{
        struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;

        if (!nd_opt || opt_len < 0 || !ndopts)
                return NULL;
        memset(ndopts, 0, sizeof(*ndopts));
        while (opt_len) {
                int l;
                if (opt_len < sizeof(struct nd_opt_hdr))
                        return NULL;
                l = nd_opt->nd_opt_len << 3;
                if (opt_len < l || l == 0)
                        return NULL;
                if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
                        goto next_opt;
                switch (nd_opt->nd_opt_type) {
                case ND_OPT_SOURCE_LL_ADDR:
                case ND_OPT_TARGET_LL_ADDR:
                case ND_OPT_MTU:
                case ND_OPT_NONCE:
                case ND_OPT_REDIRECT_HDR:
                        if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
                                ND_PRINTK(2, warn,
                                          "%s: duplicated ND6 option found: type=%d\n",
                                          __func__, nd_opt->nd_opt_type);
                        } else {
                                ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
                        }
                        break;
                case ND_OPT_PREFIX_INFO:
                        ndopts->nd_opts_pi_end = nd_opt;
                        if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
                                ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
                        break;
#ifdef CONFIG_IPV6_ROUTE_INFO
                case ND_OPT_ROUTE_INFO:
                        ndopts->nd_opts_ri_end = nd_opt;
                        if (!ndopts->nd_opts_ri)
                                ndopts->nd_opts_ri = nd_opt;
                        break;
#endif
                default:
                        if (ndisc_is_useropt(dev, nd_opt)) {
                                ndopts->nd_useropts_end = nd_opt;
                                if (!ndopts->nd_useropts)
                                        ndopts->nd_useropts = nd_opt;
                        } else {
                                /*
                                 * Unknown options must be silently ignored,
                                 * to accommodate future extension to the
                                 * protocol.
                                 */
                                ND_PRINTK(2, notice,
                                          "%s: ignored unsupported option; type=%d, len=%d\n",
                                          __func__,
                                          nd_opt->nd_opt_type,
                                          nd_opt->nd_opt_len);
                        }
                }
next_opt:
                opt_len -= l;
                nd_opt = ((void *)nd_opt) + l;
        }
        return ndopts;
}

int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
{
        switch (dev->type) {
        case ARPHRD_ETHER:
        case ARPHRD_IEEE802:        /* Not sure. Check it later. --ANK */
        case ARPHRD_FDDI:
                ipv6_eth_mc_map(addr, buf);
                return 0;
        case ARPHRD_ARCNET:
                ipv6_arcnet_mc_map(addr, buf);
                return 0;
        case ARPHRD_INFINIBAND:
                ipv6_ib_mc_map(addr, dev->broadcast, buf);
                return 0;
        case ARPHRD_IPGRE:
                return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
        default:
                if (dir) {
                        memcpy(buf, dev->broadcast, dev->addr_len);
                        return 0;
                }
        }
        return -EINVAL;
}
EXPORT_SYMBOL(ndisc_mc_map);

static u32 ndisc_hash(const void *pkey,
                      const struct net_device *dev,
                      __u32 *hash_rnd)
{
        return ndisc_hashfn(pkey, dev, hash_rnd);
}

static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
{
        return neigh_key_eq128(n, pkey);
}

static int ndisc_constructor(struct neighbour *neigh)
{
        struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
        struct net_device *dev = neigh->dev;
        struct inet6_dev *in6_dev;
        struct neigh_parms *parms;
        bool is_multicast = ipv6_addr_is_multicast(addr);

        in6_dev = in6_dev_get(dev);
        if (!in6_dev) {
                return -EINVAL;
        }

        parms = in6_dev->nd_parms;
        __neigh_parms_put(neigh->parms);
        neigh->parms = neigh_parms_clone(parms);

        neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
        if (!dev->header_ops) {
                neigh->nud_state = NUD_NOARP;
                neigh->ops = &ndisc_direct_ops;
                neigh->output = neigh_direct_output;
        } else {
                if (is_multicast) {
                        neigh->nud_state = NUD_NOARP;
                        ndisc_mc_map(addr, neigh->ha, dev, 1);
                } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
                        neigh->nud_state = NUD_NOARP;
                        memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
                        if (dev->flags&IFF_LOOPBACK)
                                neigh->type = RTN_LOCAL;
                } else if (dev->flags&IFF_POINTOPOINT) {
                        neigh->nud_state = NUD_NOARP;
                        memcpy(neigh->ha, dev->broadcast, dev->addr_len);
                }
                if (dev->header_ops->cache)
                        neigh->ops = &ndisc_hh_ops;
                else
                        neigh->ops = &ndisc_generic_ops;
                if (neigh->nud_state&NUD_VALID)
                        neigh->output = neigh->ops->connected_output;
                else
                        neigh->output = neigh->ops->output;
        }
        in6_dev_put(in6_dev);
        return 0;
}

static int pndisc_constructor(struct pneigh_entry *n)
{
        struct in6_addr *addr = (struct in6_addr *)&n->key;
        struct in6_addr maddr;
        struct net_device *dev = n->dev;

        if (!dev || !__in6_dev_get(dev))
                return -EINVAL;
        addrconf_addr_solict_mult(addr, &maddr);
        ipv6_dev_mc_inc(dev, &maddr);
        return 0;
}

static void pndisc_destructor(struct pneigh_entry *n)
{
        struct in6_addr *addr = (struct in6_addr *)&n->key;
        struct in6_addr maddr;
        struct net_device *dev = n->dev;

        if (!dev || !__in6_dev_get(dev))
                return;
        addrconf_addr_solict_mult(addr, &maddr);
        ipv6_dev_mc_dec(dev, &maddr);
}

/* called with rtnl held */
static bool ndisc_allow_add(const struct net_device *dev,
                            struct netlink_ext_ack *extack)
{
        struct inet6_dev *idev = __in6_dev_get(dev);

        if (!idev || idev->cnf.disable_ipv6) {
                NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device");
                return false;
        }

        return true;
}

static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
                                       int len)
{
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
        struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
        struct sk_buff *skb;

        skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
        if (!skb) {
                ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
                          __func__);
                return NULL;
        }

        skb->protocol = htons(ETH_P_IPV6);
        skb->dev = dev;

        skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
        skb_reset_transport_header(skb);

        /* Manually assign socket ownership as we avoid calling
         * sock_alloc_send_pskb() to bypass wmem buffer limits
         */
        skb_set_owner_w(skb, sk);

        return skb;
}

static void ip6_nd_hdr(struct sk_buff *skb,
                       const struct in6_addr *saddr,
                       const struct in6_addr *daddr,
                       int hop_limit, int len)
{
        struct ipv6hdr *hdr;
        struct inet6_dev *idev;
        unsigned tclass;

        rcu_read_lock();
        idev = __in6_dev_get(skb->dev);
        tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
        rcu_read_unlock();

        skb_push(skb, sizeof(*hdr));
        skb_reset_network_header(skb);
        hdr = ipv6_hdr(skb);

        ip6_flow_hdr(hdr, tclass, 0);

        hdr->payload_len = htons(len);
        hdr->nexthdr = IPPROTO_ICMPV6;
        hdr->hop_limit = hop_limit;

        hdr->saddr = *saddr;
        hdr->daddr = *daddr;
}

void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
                    const struct in6_addr *saddr)
{
        struct dst_entry *dst = skb_dst(skb);
        struct net *net = dev_net(skb->dev);
        struct sock *sk = net->ipv6.ndisc_sk;
        struct inet6_dev *idev;
        int err;
        struct icmp6hdr *icmp6h = icmp6_hdr(skb);
        u8 type;

        type = icmp6h->icmp6_type;

        if (!dst) {
                struct flowi6 fl6;
                int oif = skb->dev->ifindex;

                icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
                dst = icmp6_dst_alloc(skb->dev, &fl6);
                if (IS_ERR(dst)) {
                        kfree_skb(skb);
                        return;
                }

                skb_dst_set(skb, dst);
        }

        icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len,
                                              IPPROTO_ICMPV6,
                                              csum_partial(icmp6h,
                                                           skb->len, 0));

        ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);

        rcu_read_lock();
        idev = __in6_dev_get(dst->dev);
        IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);

        err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
                      net, sk, skb, NULL, dst->dev,
                      dst_output);
        if (!err) {
                ICMP6MSGOUT_INC_STATS(net, idev, type);
                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
        }

        rcu_read_unlock();
}
EXPORT_SYMBOL(ndisc_send_skb);

void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
                   const struct in6_addr *solicited_addr,
                   bool router, bool solicited, bool override, bool inc_opt)
{
        struct sk_buff *skb;
        struct in6_addr tmpaddr;
        struct inet6_ifaddr *ifp;
        const struct in6_addr *src_addr;
        struct nd_msg *msg;
        int optlen = 0;

        /* for anycast or proxy, solicited_addr != src_addr */
        ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
        if (ifp) {
                src_addr = solicited_addr;
                if (ifp->flags & IFA_F_OPTIMISTIC)
                        override = false;
                inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
                in6_ifa_put(ifp);
        } else {
                if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
                                       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
                                       &tmpaddr))
                        return;
                src_addr = &tmpaddr;
        }

        if (!dev->addr_len)
                inc_opt = false;
        if (inc_opt)
                optlen += ndisc_opt_addr_space(dev,
                                               NDISC_NEIGHBOUR_ADVERTISEMENT);

        skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
        if (!skb)
                return;

        msg = skb_put(skb, sizeof(*msg));
        *msg = (struct nd_msg) {
                .icmph = {
                        .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
                        .icmp6_router = router,
                        .icmp6_solicited = solicited,
                        .icmp6_override = override,
                },
                .target = *solicited_addr,
        };

        if (inc_opt)
                ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
                                       dev->dev_addr,
                                       NDISC_NEIGHBOUR_ADVERTISEMENT);

        ndisc_send_skb(skb, daddr, src_addr);
}

static void ndisc_send_unsol_na(struct net_device *dev)
{
        struct inet6_dev *idev;
        struct inet6_ifaddr *ifa;

        idev = in6_dev_get(dev);
        if (!idev)
                return;

        read_lock_bh(&idev->lock);
        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                /* skip tentative addresses until dad completes */
                if (ifa->flags & IFA_F_TENTATIVE &&
                    !(ifa->flags & IFA_F_OPTIMISTIC))
                        continue;

                ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
                              /*router=*/ !!idev->cnf.forwarding,
                              /*solicited=*/ false, /*override=*/ true,
                              /*inc_opt=*/ true);
        }
        read_unlock_bh(&idev->lock);

        in6_dev_put(idev);
}

struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
                                const struct in6_addr *saddr, u64 nonce)
{
        int inc_opt = dev->addr_len;
        struct sk_buff *skb;
        struct nd_msg *msg;
        int optlen = 0;

        if (!saddr)
                return NULL;

        if (ipv6_addr_any(saddr))
                inc_opt = false;
        if (inc_opt)
                optlen += ndisc_opt_addr_space(dev,
                                               NDISC_NEIGHBOUR_SOLICITATION);
        if (nonce != 0)
                optlen += 8;

        skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
        if (!skb)
                return NULL;

        msg = skb_put(skb, sizeof(*msg));
        *msg = (struct nd_msg) {
                .icmph = {
                        .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
                },
                .target = *solicit,
        };

        if (inc_opt)
                ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
                                       dev->dev_addr,
                                       NDISC_NEIGHBOUR_SOLICITATION);
        if (nonce != 0) {
                u8 *opt = skb_put(skb, 8);

                opt[0] = ND_OPT_NONCE;
                opt[1] = 8 >> 3;
                memcpy(opt + 2, &nonce, 6);
        }

        return skb;
}
EXPORT_SYMBOL(ndisc_ns_create);

void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
                   const struct in6_addr *daddr, const struct in6_addr *saddr,
                   u64 nonce)
{
        struct in6_addr addr_buf;
        struct sk_buff *skb;

        if (!saddr) {
                if (ipv6_get_lladdr(dev, &addr_buf,
                                    (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
                        return;
                saddr = &addr_buf;
        }

        skb = ndisc_ns_create(dev, solicit, saddr, nonce);

        if (skb)
                ndisc_send_skb(skb, daddr, saddr);
}

void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
                   const struct in6_addr *daddr)
{
        struct sk_buff *skb;
        struct rs_msg *msg;
        int send_sllao = dev->addr_len;
        int optlen = 0;

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        /*
         * According to section 2.2 of RFC 4429, we must not
         * send router solicitations with a sllao from
         * optimistic addresses, but we may send the solicitation
         * if we don't include the sllao.  So here we check
         * if our address is optimistic, and if so, we
         * suppress the inclusion of the sllao.
         */
        if (send_sllao) {
                struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
                                                           dev, 1);
                if (ifp) {
                        if (ifp->flags & IFA_F_OPTIMISTIC)  {
                                send_sllao = 0;
                        }
                        in6_ifa_put(ifp);
                } else {
                        send_sllao = 0;
                }
        }
#endif
        if (send_sllao)
                optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);

        skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
        if (!skb)
                return;

        msg = skb_put(skb, sizeof(*msg));
        *msg = (struct rs_msg) {
                .icmph = {
                        .icmp6_type = NDISC_ROUTER_SOLICITATION,
                },
        };

        if (send_sllao)
                ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
                                       dev->dev_addr,
                                       NDISC_ROUTER_SOLICITATION);

        ndisc_send_skb(skb, daddr, saddr);
}


static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
        /*
         *        "The sender MUST return an ICMP
         *         destination unreachable"
         */
        dst_link_failure(skb);
        kfree_skb(skb);
}

/* Called with locked neigh: either read or both */

static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
        struct in6_addr *saddr = NULL;
        struct in6_addr mcaddr;
        struct net_device *dev = neigh->dev;
        struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
        int probes = atomic_read(&neigh->probes);

        if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
                                           dev, false, 1,
                                           IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
                saddr = &ipv6_hdr(skb)->saddr;
        probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
        if (probes < 0) {
                if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
                        ND_PRINTK(1, dbg,
                                  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
                                  __func__, target);
                }
                ndisc_send_ns(dev, target, target, saddr, 0);
        } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
                neigh_app_ns(neigh);
        } else {
                addrconf_addr_solict_mult(target, &mcaddr);
                ndisc_send_ns(dev, target, &mcaddr, saddr, 0);
        }
}

static int pndisc_is_router(const void *pkey,
                            struct net_device *dev)
{
        struct pneigh_entry *n;
        int ret = -1;

        read_lock_bh(&nd_tbl.lock);
        n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
        if (n)
                ret = !!(n->flags & NTF_ROUTER);
        read_unlock_bh(&nd_tbl.lock);

        return ret;
}

void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
                  const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
                  struct ndisc_options *ndopts)
{
        neigh_update(neigh, lladdr, new, flags, 0);
        /* report ndisc ops about neighbour update */
        ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}

static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
{
        struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
        const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
        const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
        u8 *lladdr = NULL;
        u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
                                    offsetof(struct nd_msg, opt));
        struct ndisc_options ndopts;
        struct net_device *dev = skb->dev;
        struct inet6_ifaddr *ifp;
        struct inet6_dev *idev = NULL;
        struct neighbour *neigh;
        int dad = ipv6_addr_any(saddr);
        int is_router = -1;
        SKB_DR(reason);
        u64 nonce = 0;
        bool inc;

        if (skb->len < sizeof(struct nd_msg))
                return SKB_DROP_REASON_PKT_TOO_SMALL;

        if (ipv6_addr_is_multicast(&msg->target)) {
                ND_PRINTK(2, warn, "NS: multicast target address\n");
                return reason;
        }

        /*
         * RFC2461 7.1.1:
         * DAD has to be destined for solicited node multicast address.
         */
        if (dad && !ipv6_addr_is_solict_mult(daddr)) {
                ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
                return reason;
        }

        if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
                return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;

        if (ndopts.nd_opts_src_lladdr) {
                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
                if (!lladdr) {
                        ND_PRINTK(2, warn,
                                  "NS: invalid link-layer address length\n");
                        return reason;
                }

                /* RFC2461 7.1.1:
                 *        If the IP source address is the unspecified address,
                 *        there MUST NOT be source link-layer address option
                 *        in the message.
                 */
                if (dad) {
                        ND_PRINTK(2, warn,
                                  "NS: bad DAD packet (link-layer address option)\n");
                        return reason;
                }
        }
        if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
                memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);

        inc = ipv6_addr_is_multicast(daddr);

        ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
        if (ifp) {
have_ifp:
                if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
                        if (dad) {
                                if (nonce != 0 && ifp->dad_nonce == nonce) {
                                        u8 *np = (u8 *)&nonce;
                                        /* Matching nonce if looped back */
                                        ND_PRINTK(2, notice,
                                                  "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
                                                  ifp->idev->dev->name,
                                                  &ifp->addr, np);
                                        goto out;
                                }
                                /*
                                 * We are colliding with another node
                                 * who is doing DAD
                                 * so fail our DAD process
                                 */
                                addrconf_dad_failure(skb, ifp);
                                return reason;
                        } else {
                                /*
                                 * This is not a dad solicitation.
                                 * If we are an optimistic node,
                                 * we should respond.
                                 * Otherwise, we should ignore it.
                                 */
                                if (!(ifp->flags & IFA_F_OPTIMISTIC))
                                        goto out;
                        }
                }

                idev = ifp->idev;
        } else {
                struct net *net = dev_net(dev);

                /* perhaps an address on the master device */
                if (netif_is_l3_slave(dev)) {
                        struct net_device *mdev;

                        mdev = netdev_master_upper_dev_get_rcu(dev);
                        if (mdev) {
                                ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
                                if (ifp)
                                        goto have_ifp;
                        }
                }

                idev = in6_dev_get(dev);
                if (!idev) {
                        /* XXX: count this drop? */
                        return reason;
                }

                if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
                    (READ_ONCE(idev->cnf.forwarding) &&
                     (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
                      READ_ONCE(idev->cnf.proxy_ndp)) &&
                     (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
                        if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
                            skb->pkt_type != PACKET_HOST &&
                            inc &&
                            NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {
                                /*
                                 * for anycast or proxy,
                                 * sender should delay its response
                                 * by a random time between 0 and
                                 * MAX_ANYCAST_DELAY_TIME seconds.
                                 * (RFC2461) -- yoshfuji
                                 */
                                struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
                                if (n)
                                        pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
                                goto out;
                        }
                } else {
                        SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST);
                        goto out;
                }
        }

        if (is_router < 0)
                is_router = READ_ONCE(idev->cnf.forwarding);

        if (dad) {
                ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
                              !!is_router, false, (ifp != NULL), true);
                goto out;
        }

        if (inc)
                NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
        else
                NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);

        /*
         *        update / create cache entry
         *        for the source address
         */
        neigh = __neigh_lookup(&nd_tbl, saddr, dev,
                               !inc || lladdr || !dev->addr_len);
        if (neigh)
                ndisc_update(dev, neigh, lladdr, NUD_STALE,
                             NEIGH_UPDATE_F_WEAK_OVERRIDE|
                             NEIGH_UPDATE_F_OVERRIDE,
                             NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
        if (neigh || !dev->header_ops) {
                ndisc_send_na(dev, saddr, &msg->target, !!is_router,
                              true, (ifp != NULL && inc), inc);
                if (neigh)
                        neigh_release(neigh);
                reason = SKB_CONSUMED;
        }

out:
        if (ifp)
                in6_ifa_put(ifp);
        else
                in6_dev_put(idev);
        return reason;
}

static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
{
        struct inet6_dev *idev = __in6_dev_get(dev);

        switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
        case 0: /* Don't accept untracked na (absent in neighbor cache) */
                return 0;
        case 1: /* Create new entries from na if currently untracked */
                return 1;
        case 2: /* Create new entries from untracked na only if saddr is in the
                 * same subnet as an address configured on the interface that
                 * received the na
                 */
                return !!ipv6_chk_prefix(saddr, dev);
        default:
                return 0;
        }
}

static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
{
        struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
        struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
        const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
        u8 *lladdr = NULL;
        u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
                                    offsetof(struct nd_msg, opt));
        struct ndisc_options ndopts;
        struct net_device *dev = skb->dev;
        struct inet6_dev *idev = __in6_dev_get(dev);
        struct inet6_ifaddr *ifp;
        struct neighbour *neigh;
        SKB_DR(reason);
        u8 new_state;

        if (skb->len < sizeof(struct nd_msg))
                return SKB_DROP_REASON_PKT_TOO_SMALL;

        if (ipv6_addr_is_multicast(&msg->target)) {
                ND_PRINTK(2, warn, "NA: target address is multicast\n");
                return reason;
        }

        if (ipv6_addr_is_multicast(daddr) &&
            msg->icmph.icmp6_solicited) {
                ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
                return reason;
        }

        /* For some 802.11 wireless deployments (and possibly other networks),
         * there will be a NA proxy and unsolicitd packets are attacks
         * and thus should not be accepted.
         * drop_unsolicited_na takes precedence over accept_untracked_na
         */
        if (!msg->icmph.icmp6_solicited && idev &&
            READ_ONCE(idev->cnf.drop_unsolicited_na))
                return reason;

        if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
                return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;

        if (ndopts.nd_opts_tgt_lladdr) {
                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
                if (!lladdr) {
                        ND_PRINTK(2, warn,
                                  "NA: invalid link-layer address length\n");
                        return reason;
                }
        }
        ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
        if (ifp) {
                if (skb->pkt_type != PACKET_LOOPBACK
                    && (ifp->flags & IFA_F_TENTATIVE)) {
                                addrconf_dad_failure(skb, ifp);
                                return reason;
                }
                /* What should we make now? The advertisement
                   is invalid, but ndisc specs say nothing
                   about it. It could be misconfiguration, or
                   an smart proxy agent tries to help us :-)

                   We should not print the error if NA has been
                   received from loopback - it is just our own
                   unsolicited advertisement.
                 */
                if (skb->pkt_type != PACKET_LOOPBACK)
                        ND_PRINTK(1, warn,
                                  "NA: %pM advertised our address %pI6c on %s!\n",
                                  eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
                in6_ifa_put(ifp);
                return reason;
        }

        neigh = neigh_lookup(&nd_tbl, &msg->target, dev);

        /* RFC 9131 updates original Neighbour Discovery RFC 4861.
         * NAs with Target LL Address option without a corresponding
         * entry in the neighbour cache can now create a STALE neighbour
         * cache entry on routers.
         *
         *   entry accept  fwding  solicited        behaviour
         * ------- ------  ------  ---------    ----------------------
         * present      X       X         0     Set state to STALE
         * present      X       X         1     Set state to REACHABLE
         *  absent      0       X         X     Do nothing
         *  absent      1       0         X     Do nothing
         *  absent      1       1         X     Add a new STALE entry
         *
         * Note that we don't do a (daddr == all-routers-mcast) check.
         */
        new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
        if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
                if (accept_untracked_na(dev, saddr)) {
                        neigh = neigh_create(&nd_tbl, &msg->target, dev);
                        new_state = NUD_STALE;
                }
        }

        if (neigh && !IS_ERR(neigh)) {
                u8 old_flags = neigh->flags;
                struct net *net = dev_net(dev);

                if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
                        goto out;

                /*
                 * Don't update the neighbor cache entry on a proxy NA from
                 * ourselves because either the proxied node is off link or it
                 * has already sent a NA to us.
                 */
                if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
                    READ_ONCE(net->ipv6.devconf_all->forwarding) &&
                    READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
                    pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
                        /* XXX: idev->cnf.proxy_ndp */
                        goto out;
                }

                ndisc_update(dev, neigh, lladdr,
                             new_state,
                             NEIGH_UPDATE_F_WEAK_OVERRIDE|
                             (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
                             NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
                             (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
                             NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);

                if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
                        /*
                         * Change: router to host
                         */
                        rt6_clean_tohost(dev_net(dev),  saddr);
                }
                reason = SKB_CONSUMED;
out:
                neigh_release(neigh);
        }
        return reason;
}

static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
{
        struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
        unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
        struct neighbour *neigh;
        struct inet6_dev *idev;
        const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
        struct ndisc_options ndopts;
        u8 *lladdr = NULL;
        SKB_DR(reason);

        if (skb->len < sizeof(*rs_msg))
                return SKB_DROP_REASON_PKT_TOO_SMALL;

        idev = __in6_dev_get(skb->dev);
        if (!idev) {
                ND_PRINTK(1, err, "RS: can't find in6 device\n");
                return reason;
        }

        /* Don't accept RS if we're not in router mode */
        if (!READ_ONCE(idev->cnf.forwarding))
                goto out;

        /*
         * Don't update NCE if src = ::;
         * this implies that the source node has no ip address assigned yet.
         */
        if (ipv6_addr_any(saddr))
                goto out;

        /* Parse ND options */
        if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts))
                return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;

        if (ndopts.nd_opts_src_lladdr) {
                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
                                             skb->dev);
                if (!lladdr)
                        goto out;
        }

        neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
        if (neigh) {
                ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
                             NEIGH_UPDATE_F_WEAK_OVERRIDE|
                             NEIGH_UPDATE_F_OVERRIDE|
                             NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
                             NDISC_ROUTER_SOLICITATION, &ndopts);
                neigh_release(neigh);
                reason = SKB_CONSUMED;
        }
out:
        return reason;
}

static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
{
        struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
        struct sk_buff *skb;
        struct nlmsghdr *nlh;
        struct nduseroptmsg *ndmsg;
        struct net *net = dev_net(ra->dev);
        int err;
        int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
                                    + (opt->nd_opt_len << 3));
        size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));

        skb = nlmsg_new(msg_size, GFP_ATOMIC);
        if (!skb) {
                err = -ENOBUFS;
                goto errout;
        }

        nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
        if (!nlh) {
                goto nla_put_failure;
        }

        ndmsg = nlmsg_data(nlh);
        ndmsg->nduseropt_family = AF_INET6;
        ndmsg->nduseropt_ifindex = ra->dev->ifindex;
        ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
        ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
        ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;

        memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);

        if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
                goto nla_put_failure;
        nlmsg_end(skb, nlh);

        rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
        return;

nla_put_failure:
        nlmsg_free(skb);
        err = -EMSGSIZE;
errout:
        rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}

static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
{
        struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
        bool send_ifinfo_notify = false;
        struct neighbour *neigh = NULL;
        struct ndisc_options ndopts;
        struct fib6_info *rt = NULL;
        struct inet6_dev *in6_dev;
        struct fib6_table *table;
        u32 defrtr_usr_metric;
        unsigned int pref = 0;
        __u32 old_if_flags;
        struct net *net;
        SKB_DR(reason);
        int lifetime;
        int optlen;

        __u8 *opt = (__u8 *)(ra_msg + 1);

        optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
                sizeof(struct ra_msg);

        ND_PRINTK(2, info,
                  "RA: %s, dev: %s\n",
                  __func__, skb->dev->name);
        if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
                ND_PRINTK(2, warn, "RA: source address is not link-local\n");
                return reason;
        }
        if (optlen < 0)
                return SKB_DROP_REASON_PKT_TOO_SMALL;

#ifdef CONFIG_IPV6_NDISC_NODETYPE
        if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
                ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
                return reason;
        }
#endif

        in6_dev = __in6_dev_get(skb->dev);
        if (!in6_dev) {
                ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
                          skb->dev->name);
                return reason;
        }

        if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
                return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;

        if (!ipv6_accept_ra(in6_dev)) {
                ND_PRINTK(2, info,
                          "RA: %s, did not accept ra for dev: %s\n",
                          __func__, skb->dev->name);
                goto skip_linkparms;
        }

#ifdef CONFIG_IPV6_NDISC_NODETYPE
        /* skip link-specific parameters from interior routers */
        if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
                ND_PRINTK(2, info,
                          "RA: %s, nodetype is NODEFAULT, dev: %s\n",
                          __func__, skb->dev->name);
                goto skip_linkparms;
        }
#endif

        if (in6_dev->if_flags & IF_RS_SENT) {
                /*
                 *        flag that an RA was received after an RS was sent
                 *        out on this interface.
                 */
                in6_dev->if_flags |= IF_RA_RCVD;
        }

        /*
         * Remember the managed/otherconf flags from most recently
         * received RA message (RFC 2462) -- yoshfuji
         */
        old_if_flags = in6_dev->if_flags;
        in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
                                IF_RA_OTHERCONF)) |
                                (ra_msg->icmph.icmp6_addrconf_managed ?
                                        IF_RA_MANAGED : 0) |
                                (ra_msg->icmph.icmp6_addrconf_other ?
                                        IF_RA_OTHERCONF : 0);

        if (old_if_flags != in6_dev->if_flags)
                send_ifinfo_notify = true;

        if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
                ND_PRINTK(2, info,
                          "RA: %s, defrtr is false for dev: %s\n",
                          __func__, skb->dev->name);
                goto skip_defrtr;
        }

        lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
        if (lifetime != 0 &&
            lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
                ND_PRINTK(2, info,
                          "RA: router lifetime (%ds) is too short: %s\n",
                          lifetime, skb->dev->name);
                goto skip_defrtr;
        }

        /* Do not accept RA with source-addr found on local machine unless
         * accept_ra_from_local is set to true.
         */
        net = dev_net(in6_dev->dev);
        if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
            ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
                ND_PRINTK(2, info,
                          "RA from local address detected on dev: %s: default router ignored\n",
                          skb->dev->name);
                goto skip_defrtr;
        }

#ifdef CONFIG_IPV6_ROUTER_PREF
        pref = ra_msg->icmph.icmp6_router_pref;
        /* 10b is handled as if it were 00b (medium) */
        if (pref == ICMPV6_ROUTER_PREF_INVALID ||
            !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
                pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
        /* routes added from RAs do not use nexthop objects */
        rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
        if (rt) {
                neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
                                         rt->fib6_nh->fib_nh_dev, NULL,
                                          &ipv6_hdr(skb)->saddr);
                if (!neigh) {
                        ND_PRINTK(0, err,
                                  "RA: %s got default router without neighbour\n",
                                  __func__);
                        fib6_info_release(rt);
                        return reason;
                }
        }
        /* Set default route metric as specified by user */
        defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
        /* delete the route if lifetime is 0 or if metric needs change */
        if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
                ip6_del_rt(net, rt, false);
                rt = NULL;
        }

        ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n",
                  rt, lifetime, defrtr_usr_metric, skb->dev->name);
        if (!rt && lifetime) {
                ND_PRINTK(3, info, "RA: adding default router\n");

                if (neigh)
                        neigh_release(neigh);

                rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
                                         skb->dev, pref, defrtr_usr_metric,
                                         lifetime);
                if (!rt) {
                        ND_PRINTK(0, err,
                                  "RA: %s failed to add default route\n",
                                  __func__);
                        return reason;
                }

                neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
                                         rt->fib6_nh->fib_nh_dev, NULL,
                                          &ipv6_hdr(skb)->saddr);
                if (!neigh) {
                        ND_PRINTK(0, err,
                                  "RA: %s got default router without neighbour\n",
                                  __func__);
                        fib6_info_release(rt);
                        return reason;
                }
                neigh->flags |= NTF_ROUTER;
        } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
                struct nl_info nlinfo = {
                        .nl_net = net,
                };
                rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
                inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
        }

        if (rt) {
                table = rt->fib6_table;
                spin_lock_bh(&table->tb6_lock);

                fib6_set_expires(rt, jiffies + (HZ * lifetime));
                fib6_add_gc_list(rt);

                spin_unlock_bh(&table->tb6_lock);
        }
        if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
            ra_msg->icmph.icmp6_hop_limit) {
                if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
                    ra_msg->icmph.icmp6_hop_limit) {
                        WRITE_ONCE(in6_dev->cnf.hop_limit,
                                   ra_msg->icmph.icmp6_hop_limit);
                        fib6_metric_set(rt, RTAX_HOPLIMIT,
                                        ra_msg->icmph.icmp6_hop_limit);
                } else {
                        ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
                }
        }

skip_defrtr:

        /*
         *        Update Reachable Time and Retrans Timer
         */

        if (in6_dev->nd_parms) {
                unsigned long rtime = ntohl(ra_msg->retrans_timer);

                if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
                        rtime = (rtime*HZ)/1000;
                        if (rtime < HZ/100)
                                rtime = HZ/100;
                        NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);
                        in6_dev->tstamp = jiffies;
                        send_ifinfo_notify = true;
                }

                rtime = ntohl(ra_msg->reachable_time);
                if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
                        rtime = (rtime*HZ)/1000;

                        if (rtime < HZ/10)
                                rtime = HZ/10;

                        if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) {
                                NEIGH_VAR_SET(in6_dev->nd_parms,
                                              BASE_REACHABLE_TIME, rtime);
                                NEIGH_VAR_SET(in6_dev->nd_parms,
                                              GC_STALETIME, 3 * rtime);
                                in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
                                in6_dev->tstamp = jiffies;
                                send_ifinfo_notify = true;
                        }
                }
        }

skip_linkparms:

        /*
         *        Process options.
         */

        if (!neigh)
                neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
                                       skb->dev, 1);
        if (neigh) {
                u8 *lladdr = NULL;
                if (ndopts.nd_opts_src_lladdr) {
                        lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
                                                     skb->dev);
                        if (!lladdr) {
                                ND_PRINTK(2, warn,
                                          "RA: invalid link-layer address length\n");
                                goto out;
                        }
                }
                ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
                             NEIGH_UPDATE_F_WEAK_OVERRIDE|
                             NEIGH_UPDATE_F_OVERRIDE|
                             NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
                             NEIGH_UPDATE_F_ISROUTER,
                             NDISC_ROUTER_ADVERTISEMENT, &ndopts);
                reason = SKB_CONSUMED;
        }

        if (!ipv6_accept_ra(in6_dev)) {
                ND_PRINTK(2, info,
                          "RA: %s, accept_ra is false for dev: %s\n",
                          __func__, skb->dev->name);
                goto out;
        }

#ifdef CONFIG_IPV6_ROUTE_INFO
        if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
            ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
                          in6_dev->dev, 0)) {
                ND_PRINTK(2, info,
                          "RA from local address detected on dev: %s: router info ignored.\n",
                          skb->dev->name);
                goto skip_routeinfo;
        }

        if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
                struct nd_opt_hdr *p;
                for (p = ndopts.nd_opts_ri;
                     p;
                     p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
                        struct route_info *ri = (struct route_info *)p;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
                        if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
                            ri->prefix_len == 0)
                                continue;
#endif
                        if (ri->prefix_len == 0 &&
                            !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
                                continue;
                        if (ri->lifetime != 0 &&
                            ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
                                continue;
                        if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
                                continue;
                        if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
                                continue;
                        rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
                                      &ipv6_hdr(skb)->saddr);
                }
        }

skip_routeinfo:
#endif

#ifdef CONFIG_IPV6_NDISC_NODETYPE
        /* skip link-specific ndopts from interior routers */
        if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
                ND_PRINTK(2, info,
                          "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
                          __func__, skb->dev->name);
                goto out;
        }
#endif

        if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
                struct nd_opt_hdr *p;
                for (p = ndopts.nd_opts_pi;
                     p;
                     p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
                        addrconf_prefix_rcv(skb->dev, (u8 *)p,
                                            (p->nd_opt_len) << 3,
                                            ndopts.nd_opts_src_lladdr != NULL);
                }
        }

        if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
                __be32 n;
                u32 mtu;

                memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
                mtu = ntohl(n);

                if (in6_dev->ra_mtu != mtu) {
                        in6_dev->ra_mtu = mtu;
                        send_ifinfo_notify = true;
                }

                if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
                        ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
                } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
                        WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
                        fib6_metric_set(rt, RTAX_MTU, mtu);
                        rt6_mtu_change(skb->dev, mtu);
                }
        }

        if (ndopts.nd_useropts) {
                struct nd_opt_hdr *p;
                for (p = ndopts.nd_useropts;
                     p;
                     p = ndisc_next_useropt(skb->dev, p,
                                            ndopts.nd_useropts_end)) {
                        ndisc_ra_useropt(skb, p);
                }
        }

        if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
                ND_PRINTK(2, warn, "RA: invalid RA options\n");
        }
out:
        /* Send a notify if RA changed managed/otherconf flags or
         * timer settings or ra_mtu value
         */
        if (send_ifinfo_notify)
                inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);

        fib6_info_release(rt);
        if (neigh)
                neigh_release(neigh);
        return reason;
}

static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
{
        struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
        u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
                                    offsetof(struct rd_msg, opt));
        struct ndisc_options ndopts;
        SKB_DR(reason);
        u8 *hdr;

#ifdef CONFIG_IPV6_NDISC_NODETYPE
        switch (skb->ndisc_nodetype) {
        case NDISC_NODETYPE_HOST:
        case NDISC_NODETYPE_NODEFAULT:
                ND_PRINTK(2, warn,
                          "Redirect: from host or unauthorized router\n");
                return reason;
        }
#endif

        if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
                ND_PRINTK(2, warn,
                          "Redirect: source address is not link-local\n");
                return reason;
        }

        if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
                return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;

        if (!ndopts.nd_opts_rh) {
                ip6_redirect_no_header(skb, dev_net(skb->dev),
                                        skb->dev->ifindex);
                return reason;
        }

        hdr = (u8 *)ndopts.nd_opts_rh;
        hdr += 8;
        if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
                return SKB_DROP_REASON_PKT_TOO_SMALL;

        return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}

static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
                                           struct sk_buff *orig_skb,
                                           int rd_len)
{
        u8 *opt = skb_put(skb, rd_len);

        memset(opt, 0, 8);
        *(opt++) = ND_OPT_REDIRECT_HDR;
        *(opt++) = (rd_len >> 3);
        opt += 6;

        skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
                      rd_len - 8);
}

void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
{
        struct net_device *dev = skb->dev;
        struct net *net = dev_net(dev);
        struct sock *sk = net->ipv6.ndisc_sk;
        int optlen = 0;
        struct inet_peer *peer;
        struct sk_buff *buff;
        struct rd_msg *msg;
        struct in6_addr saddr_buf;
        struct rt6_info *rt;
        struct dst_entry *dst;
        struct flowi6 fl6;
        int rd_len;
        u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
           ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
        bool ret;

        if (netif_is_l3_master(skb->dev)) {
                dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
                if (!dev)
                        return;
        }

        if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
                ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
                          dev->name);
                return;
        }

        if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
            ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
                ND_PRINTK(2, warn,
                          "Redirect: target address is not link-local unicast\n");
                return;
        }

        icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
                         &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);

        dst = ip6_route_output(net, NULL, &fl6);
        if (dst->error) {
                dst_release(dst);
                return;
        }
        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
        if (IS_ERR(dst))
                return;

        rt = (struct rt6_info *) dst;

        if (rt->rt6i_flags & RTF_GATEWAY) {
                ND_PRINTK(2, warn,
                          "Redirect: destination is not a neighbour\n");
                goto release;
        }
        peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
        ret = inet_peer_xrlim_allow(peer, 1*HZ);
        if (peer)
                inet_putpeer(peer);
        if (!ret)
                goto release;

        if (dev->addr_len) {
                struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
                if (!neigh) {
                        ND_PRINTK(2, warn,
                                  "Redirect: no neigh for target address\n");
                        goto release;
                }

                read_lock_bh(&neigh->lock);
                if (neigh->nud_state & NUD_VALID) {
                        memcpy(ha_buf, neigh->ha, dev->addr_len);
                        read_unlock_bh(&neigh->lock);
                        ha = ha_buf;
                        optlen += ndisc_redirect_opt_addr_space(dev, neigh,
                                                                ops_data_buf,
                                                                &ops_data);
                } else
                        read_unlock_bh(&neigh->lock);

                neigh_release(neigh);
        }

        rd_len = min_t(unsigned int,
                       IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen,
                       skb->len + 8);
        rd_len &= ~0x7;
        optlen += rd_len;

        buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
        if (!buff)
                goto release;

        msg = skb_put(buff, sizeof(*msg));
        *msg = (struct rd_msg) {
                .icmph = {
                        .icmp6_type = NDISC_REDIRECT,
                },
                .target = *target,
                .dest = ipv6_hdr(skb)->daddr,
        };

        /*
         *        include target_address option
         */

        if (ha)
                ndisc_fill_redirect_addr_option(buff, ha, ops_data);

        /*
         *        build redirect option and copy skb over to the new packet.
         */

        if (rd_len)
                ndisc_fill_redirect_hdr_option(buff, skb, rd_len);

        skb_dst_set(buff, dst);
        ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf);
        return;

release:
        dst_release(dst);
}

static void pndisc_redo(struct sk_buff *skb)
{
        enum skb_drop_reason reason = ndisc_recv_ns(skb);

        kfree_skb_reason(skb, reason);
}

static int ndisc_is_multicast(const void *pkey)
{
        return ipv6_addr_is_multicast((struct in6_addr *)pkey);
}

static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
{
        struct inet6_dev *idev = __in6_dev_get(skb->dev);

        if (!idev)
                return true;
        if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
            READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
                net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
                return true;
        }
        return false;
}

enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
{
        struct nd_msg *msg;
        SKB_DR(reason);

        if (ndisc_suppress_frag_ndisc(skb))
                return SKB_DROP_REASON_IPV6_NDISC_FRAG;

        if (skb_linearize(skb))
                return SKB_DROP_REASON_NOMEM;

        msg = (struct nd_msg *)skb_transport_header(skb);

        __skb_push(skb, skb->data - skb_transport_header(skb));

        if (ipv6_hdr(skb)->hop_limit != 255) {
                ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
                          ipv6_hdr(skb)->hop_limit);
                return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
        }

        if (msg->icmph.icmp6_code != 0) {
                ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
                          msg->icmph.icmp6_code);
                return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
        }

        switch (msg->icmph.icmp6_type) {
        case NDISC_NEIGHBOUR_SOLICITATION:
                memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
                reason = ndisc_recv_ns(skb);
                break;

        case NDISC_NEIGHBOUR_ADVERTISEMENT:
                reason = ndisc_recv_na(skb);
                break;

        case NDISC_ROUTER_SOLICITATION:
                reason = ndisc_recv_rs(skb);
                break;

        case NDISC_ROUTER_ADVERTISEMENT:
                reason = ndisc_router_discovery(skb);
                break;

        case NDISC_REDIRECT:
                reason = ndisc_redirect_rcv(skb);
                break;
        }

        return reason;
}

static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_change_info *change_info;
        struct net *net = dev_net(dev);
        struct inet6_dev *idev;
        bool evict_nocarrier;

        switch (event) {
        case NETDEV_CHANGEADDR:
                neigh_changeaddr(&nd_tbl, dev);
                fib6_run_gc(0, net, false);
                fallthrough;
        case NETDEV_UP:
                idev = in6_dev_get(dev);
                if (!idev)
                        break;
                if (READ_ONCE(idev->cnf.ndisc_notify) ||
                    READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
                        ndisc_send_unsol_na(dev);
                in6_dev_put(idev);
                break;
        case NETDEV_CHANGE:
                idev = in6_dev_get(dev);
                if (!idev)
                        evict_nocarrier = true;
                else {
                        evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
                                          READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
                        in6_dev_put(idev);
                }

                change_info = ptr;
                if (change_info->flags_changed & IFF_NOARP)
                        neigh_changeaddr(&nd_tbl, dev);
                if (evict_nocarrier && !netif_carrier_ok(dev))
                        neigh_carrier_down(&nd_tbl, dev);
                break;
        case NETDEV_DOWN:
                neigh_ifdown(&nd_tbl, dev);
                fib6_run_gc(0, net, false);
                break;
        case NETDEV_NOTIFY_PEERS:
                ndisc_send_unsol_na(dev);
                break;
        default:
                break;
        }

        return NOTIFY_DONE;
}

static struct notifier_block ndisc_netdev_notifier = {
        .notifier_call = ndisc_netdev_event,
        .priority = ADDRCONF_NOTIFY_PRIORITY - 5,
};

#ifdef CONFIG_SYSCTL
static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
                                         const char *func, const char *dev_name)
{
        static char warncomm[TASK_COMM_LEN];
        static int warned;
        if (strcmp(warncomm, current->comm) && warned < 5) {
                strcpy(warncomm, current->comm);
                pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
                        warncomm, func,
                        dev_name, ctl->procname,
                        dev_name, ctl->procname);
                warned++;
        }
}

int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
                size_t *lenp, loff_t *ppos)
{
        struct net_device *dev = ctl->extra1;
        struct inet6_dev *idev;
        int ret;

        if ((strcmp(ctl->procname, "retrans_time") == 0) ||
            (strcmp(ctl->procname, "base_reachable_time") == 0))
                ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");

        if (strcmp(ctl->procname, "retrans_time") == 0)
                ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);

        else if (strcmp(ctl->procname, "base_reachable_time") == 0)
                ret = neigh_proc_dointvec_jiffies(ctl, write,
                                                  buffer, lenp, ppos);

        else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
                 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
                ret = neigh_proc_dointvec_ms_jiffies(ctl, write,
                                                     buffer, lenp, ppos);
        else
                ret = -1;

        if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
                if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
                        idev->nd_parms->reachable_time =
                                        neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
                WRITE_ONCE(idev->tstamp, jiffies);
                inet6_ifinfo_notify(RTM_NEWLINK, idev);
                in6_dev_put(idev);
        }
        return ret;
}


#endif

static int __net_init ndisc_net_init(struct net *net)
{
        struct ipv6_pinfo *np;
        struct sock *sk;
        int err;

        err = inet_ctl_sock_create(&sk, PF_INET6,
                                   SOCK_RAW, IPPROTO_ICMPV6, net);
        if (err < 0) {
                ND_PRINTK(0, err,
                          "NDISC: Failed to initialize the control socket (err %d)\n",
                          err);
                return err;
        }

        net->ipv6.ndisc_sk = sk;

        np = inet6_sk(sk);
        np->hop_limit = 255;
        /* Do not loopback ndisc messages */
        inet6_clear_bit(MC6_LOOP, sk);

        return 0;
}

static void __net_exit ndisc_net_exit(struct net *net)
{
        inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
}

static struct pernet_operations ndisc_net_ops = {
        .init = ndisc_net_init,
        .exit = ndisc_net_exit,
};

int __init ndisc_init(void)
{
        int err;

        err = register_pernet_subsys(&ndisc_net_ops);
        if (err)
                return err;
        /*
         * Initialize the neighbour table
         */
        neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);

#ifdef CONFIG_SYSCTL
        err = neigh_sysctl_register(NULL, &nd_tbl.parms,
                                    ndisc_ifinfo_sysctl_change);
        if (err)
                goto out_unregister_pernet;
out:
#endif
        return err;

#ifdef CONFIG_SYSCTL
out_unregister_pernet:
        unregister_pernet_subsys(&ndisc_net_ops);
        goto out;
#endif
}

int __init ndisc_late_init(void)
{
        return register_netdevice_notifier(&ndisc_netdev_notifier);
}

void ndisc_late_cleanup(void)
{
        unregister_netdevice_notifier(&ndisc_netdev_notifier);
}

void ndisc_cleanup(void)
{
#ifdef CONFIG_SYSCTL
        neigh_sysctl_unregister(&nd_tbl.parms);
#endif
        neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
        unregister_pernet_subsys(&ndisc_net_ops);
}



























































































    3 


    3 


    2 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// SPDX-License-Identifier: GPL-2.0
/* Bluetooth HCI driver model support. */

#include <linux/module.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>

static const struct class bt_class = {
        .name = "bluetooth",
};

static void bt_link_release(struct device *dev)
{
        struct hci_conn *conn = to_hci_conn(dev);
        kfree(conn);
}

static const struct device_type bt_link = {
        .name    = "link",
        .release = bt_link_release,
};

/*
 * The rfcomm tty device will possibly retain even when conn
 * is down, and sysfs doesn't support move zombie device,
 * so we should move the device before conn device is destroyed.
 */
static int __match_tty(struct device *dev, void *data)
{
        return !strncmp(dev_name(dev), "rfcomm", 6);
}

void hci_conn_init_sysfs(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;

        bt_dev_dbg(hdev, "conn %p", conn);

        conn->dev.type = &bt_link;
        conn->dev.class = &bt_class;
        conn->dev.parent = &hdev->dev;

        device_initialize(&conn->dev);
}

void hci_conn_add_sysfs(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;

        bt_dev_dbg(hdev, "conn %p", conn);

        if (device_is_registered(&conn->dev))
                return;

        dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);

        if (device_add(&conn->dev) < 0)
                bt_dev_err(hdev, "failed to register connection device");
}

void hci_conn_del_sysfs(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;

        bt_dev_dbg(hdev, "conn %p", conn);

        if (!device_is_registered(&conn->dev)) {
                /* If device_add() has *not* succeeded, use *only* put_device()
                 * to drop the reference count.
                 */
                put_device(&conn->dev);
                return;
        }

        while (1) {
                struct device *dev;

                dev = device_find_child(&conn->dev, NULL, __match_tty);
                if (!dev)
                        break;
                device_move(dev, NULL, DPM_ORDER_DEV_LAST);
                put_device(dev);
        }

        device_unregister(&conn->dev);
}

static void bt_host_release(struct device *dev)
{
        struct hci_dev *hdev = to_hci_dev(dev);

        if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
                hci_release_dev(hdev);
        else
                kfree(hdev);
        module_put(THIS_MODULE);
}

static const struct device_type bt_host = {
        .name    = "host",
        .release = bt_host_release,
};

void hci_init_sysfs(struct hci_dev *hdev)
{
        struct device *dev = &hdev->dev;

        dev->type = &bt_host;
        dev->class = &bt_class;

        __module_get(THIS_MODULE);
        device_initialize(dev);
}

int __init bt_sysfs_init(void)
{
        return class_register(&bt_class);
}

void bt_sysfs_cleanup(void)
{
        class_unregister(&bt_class);
}






































  380 











  234 





  231 





  231 











  232 

  232 
  230 

  232 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_CONTEXT_H
#define __X86_KERNEL_FPU_CONTEXT_H

#include <asm/fpu/xstate.h>
#include <asm/trace/fpu.h>

/* Functions related to FPU context tracking */

/*
 * The in-register FPU state for an FPU context on a CPU is assumed to be
 * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
 * matches the FPU.
 *
 * If the FPU register state is valid, the kernel can skip restoring the
 * FPU state from memory.
 *
 * Any code that clobbers the FPU registers or updates the in-memory
 * FPU state for a task MUST let the rest of the kernel know that the
 * FPU registers are no longer valid for this task.
 *
 * Invalidate a resource you control: CPU if using the CPU for something else
 * (with preemption disabled), FPU for the current task, or a task that
 * is prevented from running by the current task.
 */
static inline void __cpu_invalidate_fpregs_state(void)
{
        __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}

static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
        fpu->last_cpu = -1;
}

static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
        return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}

static inline void fpregs_deactivate(struct fpu *fpu)
{
        __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
        trace_x86_fpu_regs_deactivated(fpu);
}

static inline void fpregs_activate(struct fpu *fpu)
{
        __this_cpu_write(fpu_fpregs_owner_ctx, fpu);
        trace_x86_fpu_regs_activated(fpu);
}

/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
        struct fpu *fpu = &current->thread.fpu;
        int cpu = smp_processor_id();

        if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
                return;

        if (!fpregs_state_valid(fpu, cpu)) {
                /*
                 * This restores _all_ xstate which has not been
                 * established yet.
                 *
                 * If PKRU is enabled, then the PKRU value is already
                 * correct because it was either set in switch_to() or in
                 * flush_thread(). So it is excluded because it might be
                 * not up to date in current->thread.fpu.xsave state.
                 *
                 * XFD state is handled in restore_fpregs_from_fpstate().
                 */
                restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE);

                fpregs_activate(fpu);
                fpu->last_cpu = cpu;
        }
        clear_thread_flag(TIF_NEED_FPU_LOAD);
}

#endif
























    1 























































    1 





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// SPDX-License-Identifier: GPL-2.0-only
/*
 * This file provides /sys/class/ieee80211/<wiphy name>/
 * and some default attributes.
 *
 * Copyright 2005-2006        Jiri Benc <jbenc@suse.cz>
 * Copyright 2006        Johannes Berg <johannes@sipsolutions.net>
 * Copyright (C) 2020-2021, 2023 Intel Corporation
 */

#include <linux/device.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
#include <linux/rtnetlink.h>
#include <net/cfg80211.h>
#include "sysfs.h"
#include "core.h"
#include "rdev-ops.h"

static inline struct cfg80211_registered_device *dev_to_rdev(
        struct device *dev)
{
        return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
}

#define SHOW_FMT(name, fmt, member)                                        \
static ssize_t name ## _show(struct device *dev,                        \
                              struct device_attribute *attr,                \
                              char *buf)                                \
{                                                                        \
        return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member);        \
}                                                                        \
static DEVICE_ATTR_RO(name)

SHOW_FMT(index, "%d", wiphy_idx);
SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
SHOW_FMT(address_mask, "%pM", wiphy.addr_mask);

static ssize_t name_show(struct device *dev,
                         struct device_attribute *attr,
                         char *buf)
{
        struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;

        return sprintf(buf, "%s\n", wiphy_name(wiphy));
}
static DEVICE_ATTR_RO(name);

static ssize_t addresses_show(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
        char *start = buf;
        int i;

        if (!wiphy->addresses)
                return sprintf(buf, "%pM\n", wiphy->perm_addr);

        for (i = 0; i < wiphy->n_addresses; i++)
                buf += sprintf(buf, "%pM\n", wiphy->addresses[i].addr);

        return buf - start;
}
static DEVICE_ATTR_RO(addresses);

static struct attribute *ieee80211_attrs[] = {
        &dev_attr_index.attr,
        &dev_attr_macaddress.attr,
        &dev_attr_address_mask.attr,
        &dev_attr_addresses.attr,
        &dev_attr_name.attr,
        NULL,
};
ATTRIBUTE_GROUPS(ieee80211);

static void wiphy_dev_release(struct device *dev)
{
        struct cfg80211_registered_device *rdev = dev_to_rdev(dev);

        cfg80211_dev_free(rdev);
}

#ifdef CONFIG_PM_SLEEP
static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
{
        struct wireless_dev *wdev;

        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
                cfg80211_leave(rdev, wdev);
}

static int wiphy_suspend(struct device *dev)
{
        struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
        int ret = 0;

        rdev->suspend_at = ktime_get_boottime_seconds();

        rtnl_lock();
        wiphy_lock(&rdev->wiphy);
        if (rdev->wiphy.registered) {
                if (!rdev->wiphy.wowlan_config) {
                        cfg80211_leave_all(rdev);
                        cfg80211_process_rdev_events(rdev);
                }
                cfg80211_process_wiphy_works(rdev, NULL);
                if (rdev->ops->suspend)
                        ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
                if (ret == 1) {
                        /* Driver refuse to configure wowlan */
                        cfg80211_leave_all(rdev);
                        cfg80211_process_rdev_events(rdev);
                        cfg80211_process_wiphy_works(rdev, NULL);
                        ret = rdev_suspend(rdev, NULL);
                }
                if (ret == 0)
                        rdev->suspended = true;
        }
        wiphy_unlock(&rdev->wiphy);
        rtnl_unlock();

        return ret;
}

static int wiphy_resume(struct device *dev)
{
        struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
        int ret = 0;

        /* Age scan results with time spent in suspend */
        cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at);

        rtnl_lock();
        wiphy_lock(&rdev->wiphy);
        if (rdev->wiphy.registered && rdev->ops->resume)
                ret = rdev_resume(rdev);
        rdev->suspended = false;
        schedule_work(&rdev->wiphy_work);
        wiphy_unlock(&rdev->wiphy);

        if (ret)
                cfg80211_shutdown_all_interfaces(&rdev->wiphy);

        rtnl_unlock();

        return ret;
}

static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume);
#define WIPHY_PM_OPS (&wiphy_pm_ops)
#else
#define WIPHY_PM_OPS NULL
#endif

static const void *wiphy_namespace(const struct device *d)
{
        struct wiphy *wiphy = container_of(d, struct wiphy, dev);

        return wiphy_net(wiphy);
}

struct class ieee80211_class = {
        .name = "ieee80211",
        .dev_release = wiphy_dev_release,
        .dev_groups = ieee80211_groups,
        .pm = WIPHY_PM_OPS,
        .ns_type = &net_ns_type_operations,
        .namespace = wiphy_namespace,
};

int wiphy_sysfs_init(void)
{
        return class_register(&ieee80211_class);
}

void wiphy_sysfs_exit(void)
{
        class_unregister(&ieee80211_class);
}



















































































































































































  166 
  218 












    4 
    4 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_GENERIC_SECTIONS_H_
#define _ASM_GENERIC_SECTIONS_H_

/* References to section boundaries */

#include <linux/compiler.h>
#include <linux/types.h>

/*
 * Usage guidelines:
 * _text, _data: architecture specific, don't use them in arch-independent code
 * [_stext, _etext]: contains .text.* sections, may also contain .rodata.*
 *                   and/or .init.* sections
 * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.*
 *                   and/or .init.* sections.
 * [__start_rodata, __end_rodata]: contains .rodata.* sections
 * [__start_ro_after_init, __end_ro_after_init]:
 *                     contains .data..ro_after_init section
 * [__init_begin, __init_end]: contains .init.* sections, but .init.text.*
 *                   may be out of this range on some architectures.
 * [_sinittext, _einittext]: contains .init.text.* sections
 * [__bss_start, __bss_stop]: contains BSS sections
 *
 * Following global variables are optional and may be unavailable on some
 * architectures and/or kernel configurations.
 *        _text, _data
 *        __kprobes_text_start, __kprobes_text_end
 *        __entry_text_start, __entry_text_end
 *        __ctors_start, __ctors_end
 *        __irqentry_text_start, __irqentry_text_end
 *        __softirqentry_text_start, __softirqentry_text_end
 *        __start_opd, __end_opd
 */
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
extern char __bss_start[], __bss_stop[];
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char __start_ro_after_init[], __end_ro_after_init[];
extern char _end[];
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __start_rodata[], __end_rodata[];
extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __softirqentry_text_start[], __softirqentry_text_end[];
extern char __start_once[], __end_once[];

/* Start and end of .ctors section - used for constructor calls. */
extern char __ctors_start[], __ctors_end[];

/* Start and end of .opd section - used for function descriptors. */
extern char __start_opd[], __end_opd[];

/* Start and end of instrumentation protected text section */
extern char __noinstr_text_start[], __noinstr_text_end[];

extern __visible const void __nosave_begin, __nosave_end;

/* Function descriptor handling (if any).  Override in asm/sections.h */
#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS
void *dereference_function_descriptor(void *ptr);
void *dereference_kernel_function_descriptor(void *ptr);
#else
#define dereference_function_descriptor(p) ((void *)(p))
#define dereference_kernel_function_descriptor(p) ((void *)(p))

/* An address is simply the address of the function. */
typedef struct {
        unsigned long addr;
} func_desc_t;
#endif

static inline bool have_function_descriptors(void)
{
        return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS);
}

/**
 * memory_contains - checks if an object is contained within a memory region
 * @begin: virtual address of the beginning of the memory region
 * @end: virtual address of the end of the memory region
 * @virt: virtual address of the memory object
 * @size: size of the memory object
 *
 * Returns: true if the object specified by @virt and @size is entirely
 * contained within the memory region defined by @begin and @end, false
 * otherwise.
 */
static inline bool memory_contains(void *begin, void *end, void *virt,
                                   size_t size)
{
        return virt >= begin && virt + size <= end;
}

/**
 * memory_intersects - checks if the region occupied by an object intersects
 *                     with another memory region
 * @begin: virtual address of the beginning of the memory region
 * @end: virtual address of the end of the memory region
 * @virt: virtual address of the memory object
 * @size: size of the memory object
 *
 * Returns: true if an object's memory region, specified by @virt and @size,
 * intersects with the region specified by @begin and @end, false otherwise.
 */
static inline bool memory_intersects(void *begin, void *end, void *virt,
                                     size_t size)
{
        void *vend = virt + size;

        if (virt < end && vend > begin)
                return true;

        return false;
}

/**
 * init_section_contains - checks if an object is contained within the init
 *                         section
 * @virt: virtual address of the memory object
 * @size: size of the memory object
 *
 * Returns: true if the object specified by @virt and @size is entirely
 * contained within the init section, false otherwise.
 */
static inline bool init_section_contains(void *virt, size_t size)
{
        return memory_contains(__init_begin, __init_end, virt, size);
}

/**
 * init_section_intersects - checks if the region occupied by an object
 *                           intersects with the init section
 * @virt: virtual address of the memory object
 * @size: size of the memory object
 *
 * Returns: true if an object's memory region, specified by @virt and @size,
 * intersects with the init section, false otherwise.
 */
static inline bool init_section_intersects(void *virt, size_t size)
{
        return memory_intersects(__init_begin, __init_end, virt, size);
}

/**
 * is_kernel_core_data - checks if the pointer address is located in the
 *                         .data or .bss section
 *
 * @addr: address to check
 *
 * Returns: true if the address is located in .data or .bss, false otherwise.
 * Note: On some archs it may return true for core RODATA, and false
 *       for others. But will always be true for core RW data.
 */
static inline bool is_kernel_core_data(unsigned long addr)
{
        if (addr >= (unsigned long)_sdata && addr < (unsigned long)_edata)
                return true;

        if (addr >= (unsigned long)__bss_start &&
            addr < (unsigned long)__bss_stop)
                return true;

        return false;
}

/**
 * is_kernel_rodata - checks if the pointer address is located in the
 *                    .rodata section
 *
 * @addr: address to check
 *
 * Returns: true if the address is located in .rodata, false otherwise.
 */
static inline bool is_kernel_rodata(unsigned long addr)
{
        return addr >= (unsigned long)__start_rodata &&
               addr < (unsigned long)__end_rodata;
}

/**
 * is_kernel_inittext - checks if the pointer address is located in the
 *                      .init.text section
 *
 * @addr: address to check
 *
 * Returns: true if the address is located in .init.text, false otherwise.
 */
static inline bool is_kernel_inittext(unsigned long addr)
{
        return addr >= (unsigned long)_sinittext &&
               addr < (unsigned long)_einittext;
}

/**
 * __is_kernel_text - checks if the pointer address is located in the
 *                    .text section
 *
 * @addr: address to check
 *
 * Returns: true if the address is located in .text, false otherwise.
 * Note: an internal helper, only check the range of _stext to _etext.
 */
static inline bool __is_kernel_text(unsigned long addr)
{
        return addr >= (unsigned long)_stext &&
               addr < (unsigned long)_etext;
}

/**
 * __is_kernel - checks if the pointer address is located in the kernel range
 *
 * @addr: address to check
 *
 * Returns: true if the address is located in the kernel range, false otherwise.
 * Note: an internal helper, check the range of _stext to _end,
 *       and range from __init_begin to __init_end, which can be outside
 *       of the _stext to _end range.
 */
static inline bool __is_kernel(unsigned long addr)
{
        return ((addr >= (unsigned long)_stext &&
                 addr < (unsigned long)_end) ||
                (addr >= (unsigned long)__init_begin &&
                 addr < (unsigned long)__init_end));
}

#endif /* _ASM_GENERIC_SECTIONS_H_ */











































    6 





    6 


    6 




    6 


    6 


    6 



    4 




















































































































































































   10 


    9 



    8 



    8 




    8 




    8 




    8 




    8 



    8 





    7 


    7 





    7 






    7 







    7 


    7 


   10 



    7 


    7 


    7 
    7 









    6 

    7 
    7 




























































































































































































































































































































































































































































































































































    4 


    4 


    4 




















   66 






    1 




   64 
   20 
   45 
    4 


    4 



   25 
   11 

   65 





   63 



   64 




   65 

   64 

   64 



   56 

   51 
   32 
   56 
   41 
   21 
   55 
   20 
   56 


   53 
   56 



   56 






   50 






   40 




















   52 





   12 
   12 

   12 



   66 































































   64 













































































































   72 
    4 


    4 
   73 




   74 


   71 


















   73 



   73 



   73 
    8 

   73 

   69 













   74 





    3 

    2 


   72 

    4 










   72 
   73 





   11 



   73 
    5 

   73 

   69 


    5 
    6 
    1 




   73 


   71 

   72 

   74 

   74 

   74 

   74 

   72 






   74 
   74 






























































   74 




   70 

   74 



   74 

   71 

   27 


   51 


   69 





   74 











   74 




   74 







































































    9 





    9 


    9 
    8 


    9 


















   11 








    9 








    9 





    9 








































   11 

   11 






























   10 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/fs/open.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/string.h>
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/fsnotify.h>
#include <linux/module.h>
#include <linux/tty.h>
#include <linux/namei.h>
#include <linux/backing-dev.h>
#include <linux/capability.h>
#include <linux/securebits.h>
#include <linux/security.h>
#include <linux/mount.h>
#include <linux/fcntl.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>
#include <linux/fs_struct.h>
#include <linux/dnotify.h>
#include <linux/compat.h>
#include <linux/mnt_idmapping.h>
#include <linux/filelock.h>

#include "internal.h"

int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
                loff_t length, unsigned int time_attrs, struct file *filp)
{
        int ret;
        struct iattr newattrs;

        /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
        if (length < 0)
                return -EINVAL;

        newattrs.ia_size = length;
        newattrs.ia_valid = ATTR_SIZE | time_attrs;
        if (filp) {
                newattrs.ia_file = filp;
                newattrs.ia_valid |= ATTR_FILE;
        }

        /* Remove suid, sgid, and file capabilities on truncate too */
        ret = dentry_needs_remove_privs(idmap, dentry);
        if (ret < 0)
                return ret;
        if (ret)
                newattrs.ia_valid |= ret | ATTR_FORCE;

        inode_lock(dentry->d_inode);
        /* Note any delegations or leases have already been broken: */
        ret = notify_change(idmap, dentry, &newattrs, NULL);
        inode_unlock(dentry->d_inode);
        return ret;
}

long vfs_truncate(const struct path *path, loff_t length)
{
        struct mnt_idmap *idmap;
        struct inode *inode;
        long error;

        inode = path->dentry->d_inode;

        /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
        if (S_ISDIR(inode->i_mode))
                return -EISDIR;
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;

        error = mnt_want_write(path->mnt);
        if (error)
                goto out;

        idmap = mnt_idmap(path->mnt);
        error = inode_permission(idmap, inode, MAY_WRITE);
        if (error)
                goto mnt_drop_write_and_out;

        error = -EPERM;
        if (IS_APPEND(inode))
                goto mnt_drop_write_and_out;

        error = get_write_access(inode);
        if (error)
                goto mnt_drop_write_and_out;

        /*
         * Make sure that there are no leases.  get_write_access() protects
         * against the truncate racing with a lease-granting setlease().
         */
        error = break_lease(inode, O_WRONLY);
        if (error)
                goto put_write_and_out;

        error = security_path_truncate(path);
        if (!error)
                error = do_truncate(idmap, path->dentry, length, 0, NULL);

put_write_and_out:
        put_write_access(inode);
mnt_drop_write_and_out:
        mnt_drop_write(path->mnt);
out:
        return error;
}
EXPORT_SYMBOL_GPL(vfs_truncate);

long do_sys_truncate(const char __user *pathname, loff_t length)
{
        unsigned int lookup_flags = LOOKUP_FOLLOW;
        struct path path;
        int error;

        if (length < 0)        /* sorry, but loff_t says... */
                return -EINVAL;

retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (!error) {
                error = vfs_truncate(&path, length);
                path_put(&path);
        }
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
        return error;
}

SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
{
        return do_sys_truncate(path, length);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
{
        return do_sys_truncate(path, length);
}
#endif

long do_ftruncate(struct file *file, loff_t length, int small)
{
        struct inode *inode;
        struct dentry *dentry;
        int error;

        /* explicitly opened as large or we are on 64-bit box */
        if (file->f_flags & O_LARGEFILE)
                small = 0;

        dentry = file->f_path.dentry;
        inode = dentry->d_inode;
        if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
                return -EINVAL;

        /* Cannot ftruncate over 2^31 bytes without large file support */
        if (small && length > MAX_NON_LFS)
                return -EINVAL;

        /* Check IS_APPEND on real upper inode */
        if (IS_APPEND(file_inode(file)))
                return -EPERM;
        sb_start_write(inode->i_sb);
        error = security_file_truncate(file);
        if (!error)
                error = do_truncate(file_mnt_idmap(file), dentry, length,
                                    ATTR_MTIME | ATTR_CTIME, file);
        sb_end_write(inode->i_sb);

        return error;
}

long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
        struct fd f;
        int error;

        if (length < 0)
                return -EINVAL;
        f = fdget(fd);
        if (!f.file)
                return -EBADF;

        error = do_ftruncate(f.file, length, small);

        fdput(f);
        return error;
}

SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
{
        return do_sys_ftruncate(fd, length, 1);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
{
        return do_sys_ftruncate(fd, length, 1);
}
#endif

/* LFS versions of truncate are only needed on 32 bit machines */
#if BITS_PER_LONG == 32
SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
{
        return do_sys_truncate(path, length);
}

SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
{
        return do_sys_ftruncate(fd, length, 0);
}
#endif /* BITS_PER_LONG == 32 */

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_TRUNCATE64)
COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
                       compat_arg_u64_dual(length))
{
        return ksys_truncate(pathname, compat_arg_u64_glue(length));
}
#endif

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FTRUNCATE64)
COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
                       compat_arg_u64_dual(length))
{
        return ksys_ftruncate(fd, compat_arg_u64_glue(length));
}
#endif

int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
        struct inode *inode = file_inode(file);
        long ret;

        if (offset < 0 || len <= 0)
                return -EINVAL;

        /* Return error if mode is not supported */
        if (mode & ~FALLOC_FL_SUPPORTED_MASK)
                return -EOPNOTSUPP;

        /* Punch hole and zero range are mutually exclusive */
        if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
            (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;

        /* Punch hole must have keep size set */
        if ((mode & FALLOC_FL_PUNCH_HOLE) &&
            !(mode & FALLOC_FL_KEEP_SIZE))
                return -EOPNOTSUPP;

        /* Collapse range should only be used exclusively. */
        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
            (mode & ~FALLOC_FL_COLLAPSE_RANGE))
                return -EINVAL;

        /* Insert range should only be used exclusively. */
        if ((mode & FALLOC_FL_INSERT_RANGE) &&
            (mode & ~FALLOC_FL_INSERT_RANGE))
                return -EINVAL;

        /* Unshare range should only be used with allocate mode. */
        if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
            (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
                return -EINVAL;

        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;

        /*
         * We can only allow pure fallocate on append only files
         */
        if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
                return -EPERM;

        if (IS_IMMUTABLE(inode))
                return -EPERM;

        /*
         * We cannot allow any fallocate operation on an active swapfile
         */
        if (IS_SWAPFILE(inode))
                return -ETXTBSY;

        /*
         * Revalidate the write permissions, in case security policy has
         * changed since the files were opened.
         */
        ret = security_file_permission(file, MAY_WRITE);
        if (ret)
                return ret;

        ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len);
        if (ret)
                return ret;

        if (S_ISFIFO(inode->i_mode))
                return -ESPIPE;

        if (S_ISDIR(inode->i_mode))
                return -EISDIR;

        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
                return -ENODEV;

        /* Check for wrap through zero too */
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;

        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;

        file_start_write(file);
        ret = file->f_op->fallocate(file, mode, offset, len);

        /*
         * Create inotify and fanotify events.
         *
         * To keep the logic simple always create events if fallocate succeeds.
         * This implies that events are even created if the file size remains
         * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
         */
        if (ret == 0)
                fsnotify_modify(file);

        file_end_write(file);
        return ret;
}
EXPORT_SYMBOL_GPL(vfs_fallocate);

int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
        struct fd f = fdget(fd);
        int error = -EBADF;

        if (f.file) {
                error = vfs_fallocate(f.file, mode, offset, len);
                fdput(f);
        }
        return error;
}

SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
{
        return ksys_fallocate(fd, mode, offset, len);
}

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FALLOCATE)
COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset),
                       compat_arg_u64_dual(len))
{
        return ksys_fallocate(fd, mode, compat_arg_u64_glue(offset),
                              compat_arg_u64_glue(len));
}
#endif

/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 *
 * Creating new credentials is expensive, so we try to skip doing it,
 * which we can if the result would match what we already got.
 */
static bool access_need_override_creds(int flags)
{
        const struct cred *cred;

        if (flags & AT_EACCESS)
                return false;

        cred = current_cred();
        if (!uid_eq(cred->fsuid, cred->uid) ||
            !gid_eq(cred->fsgid, cred->gid))
                return true;

        if (!issecure(SECURE_NO_SETUID_FIXUP)) {
                kuid_t root_uid = make_kuid(cred->user_ns, 0);
                if (!uid_eq(cred->uid, root_uid)) {
                        if (!cap_isclear(cred->cap_effective))
                                return true;
                } else {
                        if (!cap_isidentical(cred->cap_effective,
                            cred->cap_permitted))
                                return true;
                }
        }

        return false;
}

static const struct cred *access_override_creds(void)
{
        const struct cred *old_cred;
        struct cred *override_cred;

        override_cred = prepare_creds();
        if (!override_cred)
                return NULL;

        /*
         * XXX access_need_override_creds performs checks in hopes of skipping
         * this work. Make sure it stays in sync if making any changes in this
         * routine.
         */

        override_cred->fsuid = override_cred->uid;
        override_cred->fsgid = override_cred->gid;

        if (!issecure(SECURE_NO_SETUID_FIXUP)) {
                /* Clear the capabilities if we switch to a non-root user */
                kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
                if (!uid_eq(override_cred->uid, root_uid))
                        cap_clear(override_cred->cap_effective);
                else
                        override_cred->cap_effective =
                                override_cred->cap_permitted;
        }

        /*
         * The new set of credentials can *only* be used in
         * task-synchronous circumstances, and does not need
         * RCU freeing, unless somebody then takes a separate
         * reference to it.
         *
         * NOTE! This is _only_ true because this credential
         * is used purely for override_creds() that installs
         * it as the subjective cred. Other threads will be
         * accessing ->real_cred, not the subjective cred.
         *
         * If somebody _does_ make a copy of this (using the
         * 'get_current_cred()' function), that will clear the
         * non_rcu field, because now that other user may be
         * expecting RCU freeing. But normal thread-synchronous
         * cred accesses will keep things non-racy to avoid RCU
         * freeing.
         */
        override_cred->non_rcu = 1;

        old_cred = override_creds(override_cred);

        /* override_cred() gets its own ref */
        put_cred(override_cred);

        return old_cred;
}

static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
{
        struct path path;
        struct inode *inode;
        int res;
        unsigned int lookup_flags = LOOKUP_FOLLOW;
        const struct cred *old_cred = NULL;

        if (mode & ~S_IRWXO)        /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;

        if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
                return -EINVAL;

        if (flags & AT_SYMLINK_NOFOLLOW)
                lookup_flags &= ~LOOKUP_FOLLOW;
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;

        if (access_need_override_creds(flags)) {
                old_cred = access_override_creds();
                if (!old_cred)
                        return -ENOMEM;
        }

retry:
        res = user_path_at(dfd, filename, lookup_flags, &path);
        if (res)
                goto out;

        inode = d_backing_inode(path.dentry);

        if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
                /*
                 * MAY_EXEC on regular files is denied if the fs is mounted
                 * with the "noexec" flag.
                 */
                res = -EACCES;
                if (path_noexec(&path))
                        goto out_path_release;
        }

        res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS);
        /* SuS v2 requires we report a read only fs too */
        if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
                goto out_path_release;
        /*
         * This is a rare case where using __mnt_is_readonly()
         * is OK without a mnt_want/drop_write() pair.  Since
         * no actual write to the fs is performed here, we do
         * not need to telegraph to that to anyone.
         *
         * By doing this, we accept that this access is
         * inherently racy and know that the fs may change
         * state before we even see this result.
         */
        if (__mnt_is_readonly(path.mnt))
                res = -EROFS;

out_path_release:
        path_put(&path);
        if (retry_estale(res, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out:
        if (old_cred)
                revert_creds(old_cred);

        return res;
}

SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
{
        return do_faccessat(dfd, filename, mode, 0);
}

SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
                int, flags)
{
        return do_faccessat(dfd, filename, mode, flags);
}

SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
{
        return do_faccessat(AT_FDCWD, filename, mode, 0);
}

SYSCALL_DEFINE1(chdir, const char __user *, filename)
{
        struct path path;
        int error;
        unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
retry:
        error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
        if (error)
                goto out;

        error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (error)
                goto dput_and_out;

        set_fs_pwd(current->fs, &path);

dput_and_out:
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out:
        return error;
}

SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
        struct fd f = fdget_raw(fd);
        int error;

        error = -EBADF;
        if (!f.file)
                goto out;

        error = -ENOTDIR;
        if (!d_can_lookup(f.file->f_path.dentry))
                goto out_putf;

        error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
        if (!error)
                set_fs_pwd(current->fs, &f.file->f_path);
out_putf:
        fdput(f);
out:
        return error;
}

SYSCALL_DEFINE1(chroot, const char __user *, filename)
{
        struct path path;
        int error;
        unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
retry:
        error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
        if (error)
                goto out;

        error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (error)
                goto dput_and_out;

        error = -EPERM;
        if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
                goto dput_and_out;
        error = security_path_chroot(&path);
        if (error)
                goto dput_and_out;

        set_fs_root(current->fs, &path);
        error = 0;
dput_and_out:
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out:
        return error;
}

int chmod_common(const struct path *path, umode_t mode)
{
        struct inode *inode = path->dentry->d_inode;
        struct inode *delegated_inode = NULL;
        struct iattr newattrs;
        int error;

        error = mnt_want_write(path->mnt);
        if (error)
                return error;
retry_deleg:
        inode_lock(inode);
        error = security_path_chmod(path, mode);
        if (error)
                goto out_unlock;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
        error = notify_change(mnt_idmap(path->mnt), path->dentry,
                              &newattrs, &delegated_inode);
out_unlock:
        inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
        mnt_drop_write(path->mnt);
        return error;
}

int vfs_fchmod(struct file *file, umode_t mode)
{
        audit_file(file);
        return chmod_common(&file->f_path, mode);
}

SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
{
        struct fd f = fdget(fd);
        int err = -EBADF;

        if (f.file) {
                err = vfs_fchmod(f.file, mode);
                fdput(f);
        }
        return err;
}

static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
                       unsigned int flags)
{
        struct path path;
        int error;
        unsigned int lookup_flags;

        if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)))
                return -EINVAL;

        lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;

retry:
        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (!error) {
                error = chmod_common(&path, mode);
                path_put(&path);
                if (retry_estale(error, lookup_flags)) {
                        lookup_flags |= LOOKUP_REVAL;
                        goto retry;
                }
        }
        return error;
}

SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
                umode_t, mode, unsigned int, flags)
{
        return do_fchmodat(dfd, filename, mode, flags);
}

SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
                umode_t, mode)
{
        return do_fchmodat(dfd, filename, mode, 0);
}

SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
{
        return do_fchmodat(AT_FDCWD, filename, mode, 0);
}

/*
 * Check whether @kuid is valid and if so generate and set vfsuid_t in
 * ia_vfsuid.
 *
 * Return: true if @kuid is valid, false if not.
 */
static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
{
        if (!uid_valid(kuid))
                return false;
        attr->ia_valid |= ATTR_UID;
        attr->ia_vfsuid = VFSUIDT_INIT(kuid);
        return true;
}

/*
 * Check whether @kgid is valid and if so generate and set vfsgid_t in
 * ia_vfsgid.
 *
 * Return: true if @kgid is valid, false if not.
 */
static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
{
        if (!gid_valid(kgid))
                return false;
        attr->ia_valid |= ATTR_GID;
        attr->ia_vfsgid = VFSGIDT_INIT(kgid);
        return true;
}

int chown_common(const struct path *path, uid_t user, gid_t group)
{
        struct mnt_idmap *idmap;
        struct user_namespace *fs_userns;
        struct inode *inode = path->dentry->d_inode;
        struct inode *delegated_inode = NULL;
        int error;
        struct iattr newattrs;
        kuid_t uid;
        kgid_t gid;

        uid = make_kuid(current_user_ns(), user);
        gid = make_kgid(current_user_ns(), group);

        idmap = mnt_idmap(path->mnt);
        fs_userns = i_user_ns(inode);

retry_deleg:
        newattrs.ia_vfsuid = INVALID_VFSUID;
        newattrs.ia_vfsgid = INVALID_VFSGID;
        newattrs.ia_valid =  ATTR_CTIME;
        if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
                return -EINVAL;
        if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
                return -EINVAL;
        inode_lock(inode);
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
                                     setattr_should_drop_sgid(idmap, inode);
        /* Continue to send actual fs values, not the mount values. */
        error = security_path_chown(
                path,
                from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid),
                from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid));
        if (!error)
                error = notify_change(idmap, path->dentry, &newattrs,
                                      &delegated_inode);
        inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
        return error;
}

int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
                int flag)
{
        struct path path;
        int error = -EINVAL;
        int lookup_flags;

        if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
                goto out;

        lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
        if (flag & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;
retry:
        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (error)
                goto out;
        error = mnt_want_write(path.mnt);
        if (error)
                goto out_release;
        error = chown_common(&path, user, group);
        mnt_drop_write(path.mnt);
out_release:
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out:
        return error;
}

SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
                gid_t, group, int, flag)
{
        return do_fchownat(dfd, filename, user, group, flag);
}

SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
{
        return do_fchownat(AT_FDCWD, filename, user, group, 0);
}

SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
{
        return do_fchownat(AT_FDCWD, filename, user, group,
                           AT_SYMLINK_NOFOLLOW);
}

int vfs_fchown(struct file *file, uid_t user, gid_t group)
{
        int error;

        error = mnt_want_write_file(file);
        if (error)
                return error;
        audit_file(file);
        error = chown_common(&file->f_path, user, group);
        mnt_drop_write_file(file);
        return error;
}

int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
{
        struct fd f = fdget(fd);
        int error = -EBADF;

        if (f.file) {
                error = vfs_fchown(f.file, user, group);
                fdput(f);
        }
        return error;
}

SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
{
        return ksys_fchown(fd, user, group);
}

static inline int file_get_write_access(struct file *f)
{
        int error;

        error = get_write_access(f->f_inode);
        if (unlikely(error))
                return error;
        error = mnt_get_write_access(f->f_path.mnt);
        if (unlikely(error))
                goto cleanup_inode;
        if (unlikely(f->f_mode & FMODE_BACKING)) {
                error = mnt_get_write_access(backing_file_user_path(f)->mnt);
                if (unlikely(error))
                        goto cleanup_mnt;
        }
        return 0;

cleanup_mnt:
        mnt_put_write_access(f->f_path.mnt);
cleanup_inode:
        put_write_access(f->f_inode);
        return error;
}

static int do_dentry_open(struct file *f,
                          struct inode *inode,
                          int (*open)(struct inode *, struct file *))
{
        static const struct file_operations empty_fops = {};
        int error;

        path_get(&f->f_path);
        f->f_inode = inode;
        f->f_mapping = inode->i_mapping;
        f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
        f->f_sb_err = file_sample_sb_err(f);

        if (unlikely(f->f_flags & O_PATH)) {
                f->f_mode = FMODE_PATH | FMODE_OPENED;
                f->f_op = &empty_fops;
                return 0;
        }

        if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
                i_readcount_inc(inode);
        } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
                error = file_get_write_access(f);
                if (unlikely(error))
                        goto cleanup_file;
                f->f_mode |= FMODE_WRITER;
        }

        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
        if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
                f->f_mode |= FMODE_ATOMIC_POS;

        f->f_op = fops_get(inode->i_fop);
        if (WARN_ON(!f->f_op)) {
                error = -ENODEV;
                goto cleanup_all;
        }

        error = security_file_open(f);
        if (error)
                goto cleanup_all;

        error = break_lease(file_inode(f), f->f_flags);
        if (error)
                goto cleanup_all;

        /* normally all 3 are set; ->open() can clear them if needed */
        f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
        if (!open)
                open = f->f_op->open;
        if (open) {
                error = open(inode, f);
                if (error)
                        goto cleanup_all;
        }
        f->f_mode |= FMODE_OPENED;
        if ((f->f_mode & FMODE_READ) &&
             likely(f->f_op->read || f->f_op->read_iter))
                f->f_mode |= FMODE_CAN_READ;
        if ((f->f_mode & FMODE_WRITE) &&
             likely(f->f_op->write || f->f_op->write_iter))
                f->f_mode |= FMODE_CAN_WRITE;
        if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek)
                f->f_mode &= ~FMODE_LSEEK;
        if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
                f->f_mode |= FMODE_CAN_ODIRECT;

        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
        f->f_iocb_flags = iocb_flags(f);

        file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

        if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
                return -EINVAL;

        /*
         * XXX: Huge page cache doesn't support writing yet. Drop all page
         * cache for this file before processing writes.
         */
        if (f->f_mode & FMODE_WRITE) {
                /*
                 * Paired with smp_mb() in collapse_file() to ensure nr_thps
                 * is up to date and the update to i_writecount by
                 * get_write_access() is visible. Ensures subsequent insertion
                 * of THPs into the page cache will fail.
                 */
                smp_mb();
                if (filemap_nr_thps(inode->i_mapping)) {
                        struct address_space *mapping = inode->i_mapping;

                        filemap_invalidate_lock(inode->i_mapping);
                        /*
                         * unmap_mapping_range just need to be called once
                         * here, because the private pages is not need to be
                         * unmapped mapping (e.g. data segment of dynamic
                         * shared libraries here).
                         */
                        unmap_mapping_range(mapping, 0, 0, 0);
                        truncate_inode_pages(mapping, 0);
                        filemap_invalidate_unlock(inode->i_mapping);
                }
        }

        /*
         * Once we return a file with FMODE_OPENED, __fput() will call
         * fsnotify_close(), so we need fsnotify_open() here for symmetry.
         */
        fsnotify_open(f);
        return 0;

cleanup_all:
        if (WARN_ON_ONCE(error > 0))
                error = -EINVAL;
        fops_put(f->f_op);
        put_file_access(f);
cleanup_file:
        path_put(&f->f_path);
        f->f_path.mnt = NULL;
        f->f_path.dentry = NULL;
        f->f_inode = NULL;
        return error;
}

/**
 * finish_open - finish opening a file
 * @file: file pointer
 * @dentry: pointer to dentry
 * @open: open callback
 *
 * This can be used to finish opening a file passed to i_op->atomic_open().
 *
 * If the open callback is set to NULL, then the standard f_op->open()
 * filesystem callback is substituted.
 *
 * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
 * the return value of d_splice_alias(), then the caller needs to perform dput()
 * on it after finish_open().
 *
 * Returns zero on success or -errno if the open failed.
 */
int finish_open(struct file *file, struct dentry *dentry,
                int (*open)(struct inode *, struct file *))
{
        BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */

        file->f_path.dentry = dentry;
        return do_dentry_open(file, d_backing_inode(dentry), open);
}
EXPORT_SYMBOL(finish_open);

/**
 * finish_no_open - finish ->atomic_open() without opening the file
 *
 * @file: file pointer
 * @dentry: dentry or NULL (as returned from ->lookup())
 *
 * This can be used to set the result of a successful lookup in ->atomic_open().
 *
 * NB: unlike finish_open() this function does consume the dentry reference and
 * the caller need not dput() it.
 *
 * Returns "0" which must be the return value of ->atomic_open() after having
 * called this function.
 */
int finish_no_open(struct file *file, struct dentry *dentry)
{
        file->f_path.dentry = dentry;
        return 0;
}
EXPORT_SYMBOL(finish_no_open);

char *file_path(struct file *filp, char *buf, int buflen)
{
        return d_path(&filp->f_path, buf, buflen);
}
EXPORT_SYMBOL(file_path);

/**
 * vfs_open - open the file at the given path
 * @path: path to open
 * @file: newly allocated file with f_flag initialized
 */
int vfs_open(const struct path *path, struct file *file)
{
        file->f_path = *path;
        return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
}

struct file *dentry_open(const struct path *path, int flags,
                         const struct cred *cred)
{
        int error;
        struct file *f;

        /* We must always pass in a valid mount pointer. */
        BUG_ON(!path->mnt);

        f = alloc_empty_file(flags, cred);
        if (!IS_ERR(f)) {
                error = vfs_open(path, f);
                if (error) {
                        fput(f);
                        f = ERR_PTR(error);
                }
        }
        return f;
}
EXPORT_SYMBOL(dentry_open);

/**
 * dentry_create - Create and open a file
 * @path: path to create
 * @flags: O_ flags
 * @mode: mode bits for new file
 * @cred: credentials to use
 *
 * Caller must hold the parent directory's lock, and have prepared
 * a negative dentry, placed in @path->dentry, for the new file.
 *
 * Caller sets @path->mnt to the vfsmount of the filesystem where
 * the new file is to be created. The parent directory and the
 * negative dentry must reside on the same filesystem instance.
 *
 * On success, returns a "struct file *". Otherwise a ERR_PTR
 * is returned.
 */
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
                           const struct cred *cred)
{
        struct file *f;
        int error;

        f = alloc_empty_file(flags, cred);
        if (IS_ERR(f))
                return f;

        error = vfs_create(mnt_idmap(path->mnt),
                           d_inode(path->dentry->d_parent),
                           path->dentry, mode, true);
        if (!error)
                error = vfs_open(path, f);

        if (unlikely(error)) {
                fput(f);
                return ERR_PTR(error);
        }
        return f;
}
EXPORT_SYMBOL(dentry_create);

/**
 * kernel_file_open - open a file for kernel internal use
 * @path:        path of the file to open
 * @flags:        open flags
 * @inode:        the inode
 * @cred:        credentials for open
 *
 * Open a file for use by in-kernel consumers. The file is not accounted
 * against nr_files and must not be installed into the file descriptor
 * table.
 *
 * Return: Opened file on success, an error pointer on failure.
 */
struct file *kernel_file_open(const struct path *path, int flags,
                                struct inode *inode, const struct cred *cred)
{
        struct file *f;
        int error;

        f = alloc_empty_file_noaccount(flags, cred);
        if (IS_ERR(f))
                return f;

        f->f_path = *path;
        error = do_dentry_open(f, inode, NULL);
        if (error) {
                fput(f);
                f = ERR_PTR(error);
        }
        return f;
}
EXPORT_SYMBOL_GPL(kernel_file_open);

#define WILL_CREATE(flags)        (flags & (O_CREAT | __O_TMPFILE))
#define O_PATH_FLAGS                (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)

inline struct open_how build_open_how(int flags, umode_t mode)
{
        struct open_how how = {
                .flags = flags & VALID_OPEN_FLAGS,
                .mode = mode & S_IALLUGO,
        };

        /* O_PATH beats everything else. */
        if (how.flags & O_PATH)
                how.flags &= O_PATH_FLAGS;
        /* Modes should only be set for create-like flags. */
        if (!WILL_CREATE(how.flags))
                how.mode = 0;
        return how;
}

inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{
        u64 flags = how->flags;
        u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
        int lookup_flags = 0;
        int acc_mode = ACC_MODE(flags);

        BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
                         "struct open_flags doesn't yet handle flags > 32 bits");

        /*
         * Strip flags that either shouldn't be set by userspace like
         * FMODE_NONOTIFY or that aren't relevant in determining struct
         * open_flags like O_CLOEXEC.
         */
        flags &= ~strip;

        /*
         * Older syscalls implicitly clear all of the invalid flags or argument
         * values before calling build_open_flags(), but openat2(2) checks all
         * of its arguments.
         */
        if (flags & ~VALID_OPEN_FLAGS)
                return -EINVAL;
        if (how->resolve & ~VALID_RESOLVE_FLAGS)
                return -EINVAL;

        /* Scoping flags are mutually exclusive. */
        if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
                return -EINVAL;

        /* Deal with the mode. */
        if (WILL_CREATE(flags)) {
                if (how->mode & ~S_IALLUGO)
                        return -EINVAL;
                op->mode = how->mode | S_IFREG;
        } else {
                if (how->mode != 0)
                        return -EINVAL;
                op->mode = 0;
        }

        /*
         * Block bugs where O_DIRECTORY | O_CREAT created regular files.
         * Note, that blocking O_DIRECTORY | O_CREAT here also protects
         * O_TMPFILE below which requires O_DIRECTORY being raised.
         */
        if ((flags & (O_DIRECTORY | O_CREAT)) == (O_DIRECTORY | O_CREAT))
                return -EINVAL;

        /* Now handle the creative implementation of O_TMPFILE. */
        if (flags & __O_TMPFILE) {
                /*
                 * In order to ensure programs get explicit errors when trying
                 * to use O_TMPFILE on old kernels we enforce that O_DIRECTORY
                 * is raised alongside __O_TMPFILE.
                 */
                if (!(flags & O_DIRECTORY))
                        return -EINVAL;
                if (!(acc_mode & MAY_WRITE))
                        return -EINVAL;
        }
        if (flags & O_PATH) {
                /* O_PATH only permits certain other flags to be set. */
                if (flags & ~O_PATH_FLAGS)
                        return -EINVAL;
                acc_mode = 0;
        }

        /*
         * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
         * check for O_DSYNC if the need any syncing at all we enforce it's
         * always set instead of having to deal with possibly weird behaviour
         * for malicious applications setting only __O_SYNC.
         */
        if (flags & __O_SYNC)
                flags |= O_DSYNC;

        op->open_flag = flags;

        /* O_TRUNC implies we need access checks for write permissions */
        if (flags & O_TRUNC)
                acc_mode |= MAY_WRITE;

        /* Allow the LSM permission hook to distinguish append
           access from general write access. */
        if (flags & O_APPEND)
                acc_mode |= MAY_APPEND;

        op->acc_mode = acc_mode;

        op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;

        if (flags & O_CREAT) {
                op->intent |= LOOKUP_CREATE;
                if (flags & O_EXCL) {
                        op->intent |= LOOKUP_EXCL;
                        flags |= O_NOFOLLOW;
                }
        }

        if (flags & O_DIRECTORY)
                lookup_flags |= LOOKUP_DIRECTORY;
        if (!(flags & O_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;

        if (how->resolve & RESOLVE_NO_XDEV)
                lookup_flags |= LOOKUP_NO_XDEV;
        if (how->resolve & RESOLVE_NO_MAGICLINKS)
                lookup_flags |= LOOKUP_NO_MAGICLINKS;
        if (how->resolve & RESOLVE_NO_SYMLINKS)
                lookup_flags |= LOOKUP_NO_SYMLINKS;
        if (how->resolve & RESOLVE_BENEATH)
                lookup_flags |= LOOKUP_BENEATH;
        if (how->resolve & RESOLVE_IN_ROOT)
                lookup_flags |= LOOKUP_IN_ROOT;
        if (how->resolve & RESOLVE_CACHED) {
                /* Don't bother even trying for create/truncate/tmpfile open */
                if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
                        return -EAGAIN;
                lookup_flags |= LOOKUP_CACHED;
        }

        op->lookup_flags = lookup_flags;
        return 0;
}

/**
 * file_open_name - open file and return file pointer
 *
 * @name:        struct filename containing path to open
 * @flags:        open flags as per the open(2) second argument
 * @mode:        mode for the new file if O_CREAT is set, else ignored
 *
 * This is the helper to open a file from kernelspace if you really
 * have to.  But in generally you should not do this, so please move
 * along, nothing to see here..
 */
struct file *file_open_name(struct filename *name, int flags, umode_t mode)
{
        struct open_flags op;
        struct open_how how = build_open_how(flags, mode);
        int err = build_open_flags(&how, &op);
        if (err)
                return ERR_PTR(err);
        return do_filp_open(AT_FDCWD, name, &op);
}

/**
 * filp_open - open file and return file pointer
 *
 * @filename:        path to open
 * @flags:        open flags as per the open(2) second argument
 * @mode:        mode for the new file if O_CREAT is set, else ignored
 *
 * This is the helper to open a file from kernelspace if you really
 * have to.  But in generally you should not do this, so please move
 * along, nothing to see here..
 */
struct file *filp_open(const char *filename, int flags, umode_t mode)
{
        struct filename *name = getname_kernel(filename);
        struct file *file = ERR_CAST(name);

        if (!IS_ERR(name)) {
                file = file_open_name(name, flags, mode);
                putname(name);
        }
        return file;
}
EXPORT_SYMBOL(filp_open);

struct file *file_open_root(const struct path *root,
                            const char *filename, int flags, umode_t mode)
{
        struct open_flags op;
        struct open_how how = build_open_how(flags, mode);
        int err = build_open_flags(&how, &op);
        if (err)
                return ERR_PTR(err);
        return do_file_open_root(root, filename, &op);
}
EXPORT_SYMBOL(file_open_root);

static long do_sys_openat2(int dfd, const char __user *filename,
                           struct open_how *how)
{
        struct open_flags op;
        int fd = build_open_flags(how, &op);
        struct filename *tmp;

        if (fd)
                return fd;

        tmp = getname(filename);
        if (IS_ERR(tmp))
                return PTR_ERR(tmp);

        fd = get_unused_fd_flags(how->flags);
        if (fd >= 0) {
                struct file *f = do_filp_open(dfd, tmp, &op);
                if (IS_ERR(f)) {
                        put_unused_fd(fd);
                        fd = PTR_ERR(f);
                } else {
                        fd_install(fd, f);
                }
        }
        putname(tmp);
        return fd;
}

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
        struct open_how how = build_open_how(flags, mode);
        return do_sys_openat2(dfd, filename, &how);
}


SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
        if (force_o_largefile())
                flags |= O_LARGEFILE;
        return do_sys_open(AT_FDCWD, filename, flags, mode);
}

SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
                umode_t, mode)
{
        if (force_o_largefile())
                flags |= O_LARGEFILE;
        return do_sys_open(dfd, filename, flags, mode);
}

SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
                struct open_how __user *, how, size_t, usize)
{
        int err;
        struct open_how tmp;

        BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
        BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);

        if (unlikely(usize < OPEN_HOW_SIZE_VER0))
                return -EINVAL;

        err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
        if (err)
                return err;

        audit_openat2_how(&tmp);

        /* O_LARGEFILE is only allowed for non-O_PATH. */
        if (!(tmp.flags & O_PATH) && force_o_largefile())
                tmp.flags |= O_LARGEFILE;

        return do_sys_openat2(dfd, filename, &tmp);
}

#ifdef CONFIG_COMPAT
/*
 * Exactly like sys_open(), except that it doesn't set the
 * O_LARGEFILE flag.
 */
COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
        return do_sys_open(AT_FDCWD, filename, flags, mode);
}

/*
 * Exactly like sys_openat(), except that it doesn't set the
 * O_LARGEFILE flag.
 */
COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
{
        return do_sys_open(dfd, filename, flags, mode);
}
#endif

#ifndef __alpha__

/*
 * For backward compatibility?  Maybe this should be moved
 * into arch/i386 instead?
 */
SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
{
        int flags = O_CREAT | O_WRONLY | O_TRUNC;

        if (force_o_largefile())
                flags |= O_LARGEFILE;
        return do_sys_open(AT_FDCWD, pathname, flags, mode);
}
#endif

/*
 * "id" is the POSIX thread ID. We use the
 * files pointer for this..
 */
static int filp_flush(struct file *filp, fl_owner_t id)
{
        int retval = 0;

        if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
                        "VFS: Close: file count is 0 (f_op=%ps)",
                        filp->f_op)) {
                return 0;
        }

        if (filp->f_op->flush)
                retval = filp->f_op->flush(filp, id);

        if (likely(!(filp->f_mode & FMODE_PATH))) {
                dnotify_flush(filp, id);
                locks_remove_posix(filp, id);
        }
        return retval;
}

int filp_close(struct file *filp, fl_owner_t id)
{
        int retval;

        retval = filp_flush(filp, id);
        fput(filp);

        return retval;
}
EXPORT_SYMBOL(filp_close);

/*
 * Careful here! We test whether the file pointer is NULL before
 * releasing the fd. This ensures that one clone task can't release
 * an fd while another clone is opening it.
 */
SYSCALL_DEFINE1(close, unsigned int, fd)
{
        int retval;
        struct file *file;

        file = file_close_fd(fd);
        if (!file)
                return -EBADF;

        retval = filp_flush(file, current->files);

        /*
         * We're returning to user space. Don't bother
         * with any delayed fput() cases.
         */
        __fput_sync(file);

        /* can't restart close syscall because file table entry was cleared */
        if (unlikely(retval == -ERESTARTSYS ||
                     retval == -ERESTARTNOINTR ||
                     retval == -ERESTARTNOHAND ||
                     retval == -ERESTART_RESTARTBLOCK))
                retval = -EINTR;

        return retval;
}

/**
 * sys_close_range() - Close all file descriptors in a given range.
 *
 * @fd:     starting file descriptor to close
 * @max_fd: last file descriptor to close
 * @flags:  reserved for future extensions
 *
 * This closes a range of file descriptors. All file descriptors
 * from @fd up to and including @max_fd are closed.
 * Currently, errors to close a given file descriptor are ignored.
 */
SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
                unsigned int, flags)
{
        return __close_range(fd, max_fd, flags);
}

/*
 * This routine simulates a hangup on the tty, to arrange that users
 * are given clean terminals at login time.
 */
SYSCALL_DEFINE0(vhangup)
{
        if (capable(CAP_SYS_TTY_CONFIG)) {
                tty_vhangup_self();
                return 0;
        }
        return -EPERM;
}

/*
 * Called when an inode is about to be open.
 * We use this to disallow opening large files on 32bit systems if
 * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
 * on this flag in sys_open.
 */
int generic_file_open(struct inode * inode, struct file * filp)
{
        if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
                return -EOVERFLOW;
        return 0;
}

EXPORT_SYMBOL(generic_file_open);

/*
 * This is used by subsystems that don't want seekable
 * file descriptors. The function is not supposed to ever fail, the only
 * reason it returns an 'int' and not 'void' is so that it can be plugged
 * directly into file_operations structure.
 */
int nonseekable_open(struct inode *inode, struct file *filp)
{
        filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
        return 0;
}

EXPORT_SYMBOL(nonseekable_open);

/*
 * stream_open is used by subsystems that want stream-like file descriptors.
 * Such file descriptors are not seekable and don't have notion of position
 * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
 * Contrary to file descriptors of other regular files, .read() and .write()
 * can run simultaneously.
 *
 * stream_open never fails and is marked to return int so that it could be
 * directly used as file_operations.open .
 */
int stream_open(struct inode *inode, struct file *filp)
{
        filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
        filp->f_mode |= FMODE_STREAM;
        return 0;
}

EXPORT_SYMBOL(stream_open);

















































































































































































































































   18 







   18 









   18 








































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_BACKING_DEV_DEFS_H
#define __LINUX_BACKING_DEV_DEFS_H

#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/percpu_counter.h>
#include <linux/percpu-refcount.h>
#include <linux/flex_proportions.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/refcount.h>

struct page;
struct device;
struct dentry;

/*
 * Bits in bdi_writeback.state
 */
enum wb_state {
        WB_registered,                /* bdi_register() was done */
        WB_writeback_running,        /* Writeback is in progress */
        WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
        WB_start_all,                /* nr_pages == 0 (all) work pending */
};

enum wb_stat_item {
        WB_RECLAIMABLE,
        WB_WRITEBACK,
        WB_DIRTIED,
        WB_WRITTEN,
        NR_WB_STAT_ITEMS
};

#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))

/*
 * why some writeback work was initiated
 */
enum wb_reason {
        WB_REASON_BACKGROUND,
        WB_REASON_VMSCAN,
        WB_REASON_SYNC,
        WB_REASON_PERIODIC,
        WB_REASON_LAPTOP_TIMER,
        WB_REASON_FS_FREE_SPACE,
        /*
         * There is no bdi forker thread any more and works are done
         * by emergency worker, however, this is TPs userland visible
         * and we'll be exposing exactly the same information,
         * so it has a mismatch name.
         */
        WB_REASON_FORKER_THREAD,
        WB_REASON_FOREIGN_FLUSH,

        WB_REASON_MAX,
};

struct wb_completion {
        atomic_t                cnt;
        wait_queue_head_t        *waitq;
};

#define __WB_COMPLETION_INIT(_waitq)        \
        (struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }

/*
 * If one wants to wait for one or more wb_writeback_works, each work's
 * ->done should be set to a wb_completion defined using the following
 * macro.  Once all work items are issued with wb_queue_work(), the caller
 * can wait for the completion of all using wb_wait_for_completion().  Work
 * items which are waited upon aren't freed automatically on completion.
 */
#define WB_COMPLETION_INIT(bdi)                __WB_COMPLETION_INIT(&(bdi)->wb_waitq)

#define DEFINE_WB_COMPLETION(cmpl, bdi)        \
        struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)

/*
 * Each wb (bdi_writeback) can perform writeback operations, is measured
 * and throttled, independently.  Without cgroup writeback, each bdi
 * (bdi_writeback) is served by its embedded bdi->wb.
 *
 * On the default hierarchy, blkcg implicitly enables memcg.  This allows
 * using memcg's page ownership for attributing writeback IOs, and every
 * memcg - blkcg combination can be served by its own wb by assigning a
 * dedicated wb to each memcg, which enables isolation across different
 * cgroups and propagation of IO back pressure down from the IO layer upto
 * the tasks which are generating the dirty pages to be written back.
 *
 * A cgroup wb is indexed on its bdi by the ID of the associated memcg,
 * refcounted with the number of inodes attached to it, and pins the memcg
 * and the corresponding blkcg.  As the corresponding blkcg for a memcg may
 * change as blkcg is disabled and enabled higher up in the hierarchy, a wb
 * is tested for blkcg after lookup and removed from index on mismatch so
 * that a new wb for the combination can be created.
 *
 * Each bdi_writeback that is not embedded into the backing_dev_info must hold
 * a reference to the parent backing_dev_info.  See cgwb_create() for details.
 */
struct bdi_writeback {
        struct backing_dev_info *bdi;        /* our parent bdi */

        unsigned long state;                /* Always use atomic bitops on this */
        unsigned long last_old_flush;        /* last old data flush */

        struct list_head b_dirty;        /* dirty inodes */
        struct list_head b_io;                /* parked for writeback */
        struct list_head b_more_io;        /* parked for more writeback */
        struct list_head b_dirty_time;        /* time stamps are dirty */
        spinlock_t list_lock;                /* protects the b_* lists */

        atomic_t writeback_inodes;        /* number of inodes under writeback */
        struct percpu_counter stat[NR_WB_STAT_ITEMS];

        unsigned long bw_time_stamp;        /* last time write bw is updated */
        unsigned long dirtied_stamp;
        unsigned long written_stamp;        /* pages written at bw_time_stamp */
        unsigned long write_bandwidth;        /* the estimated write bandwidth */
        unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */

        /*
         * The base dirty throttle rate, re-calculated on every 200ms.
         * All the bdi tasks' dirty rate will be curbed under it.
         * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
         * in small steps and is much more smooth/stable than the latter.
         */
        unsigned long dirty_ratelimit;
        unsigned long balanced_dirty_ratelimit;

        struct fprop_local_percpu completions;
        int dirty_exceeded;
        enum wb_reason start_all_reason;

        spinlock_t work_lock;                /* protects work_list & dwork scheduling */
        struct list_head work_list;
        struct delayed_work dwork;        /* work item used for writeback */
        struct delayed_work bw_dwork;        /* work item used for bandwidth estimate */

        struct list_head bdi_node;        /* anchored at bdi->wb_list */

#ifdef CONFIG_CGROUP_WRITEBACK
        struct percpu_ref refcnt;        /* used only for !root wb's */
        struct fprop_local_percpu memcg_completions;
        struct cgroup_subsys_state *memcg_css; /* the associated memcg */
        struct cgroup_subsys_state *blkcg_css; /* and blkcg */
        struct list_head memcg_node;        /* anchored at memcg->cgwb_list */
        struct list_head blkcg_node;        /* anchored at blkcg->cgwb_list */
        struct list_head b_attached;        /* attached inodes, protected by list_lock */
        struct list_head offline_node;        /* anchored at offline_cgwbs */

        union {
                struct work_struct release_work;
                struct rcu_head rcu;
        };
#endif
};

struct backing_dev_info {
        u64 id;
        struct rb_node rb_node; /* keyed by ->id */
        struct list_head bdi_list;
        unsigned long ra_pages;        /* max readahead in PAGE_SIZE units */
        unsigned long io_pages;        /* max allowed IO size */

        struct kref refcnt;        /* Reference counter for the structure */
        unsigned int capabilities; /* Device capabilities */
        unsigned int min_ratio;
        unsigned int max_ratio, max_prop_frac;

        /*
         * Sum of avg_write_bw of wbs with dirty inodes.  > 0 if there are
         * any dirty wbs, which is depended upon by bdi_has_dirty().
         */
        atomic_long_t tot_write_bandwidth;
        /*
         * Jiffies when last process was dirty throttled on this bdi. Used by
         * blk-wbt.
         */
        unsigned long last_bdp_sleep;

        struct bdi_writeback wb;  /* the root writeback info for this bdi */
        struct list_head wb_list; /* list of all wbs */
#ifdef CONFIG_CGROUP_WRITEBACK
        struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
        struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
        struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
#endif
        wait_queue_head_t wb_waitq;

        struct device *dev;
        char dev_name[64];
        struct device *owner;

        struct timer_list laptop_mode_wb_timer;

#ifdef CONFIG_DEBUG_FS
        struct dentry *debug_dir;
#endif
};

struct wb_lock_cookie {
        bool locked;
        unsigned long flags;
};

#ifdef CONFIG_CGROUP_WRITEBACK

/**
 * wb_tryget - try to increment a wb's refcount
 * @wb: bdi_writeback to get
 */
static inline bool wb_tryget(struct bdi_writeback *wb)
{
        if (wb != &wb->bdi->wb)
                return percpu_ref_tryget(&wb->refcnt);
        return true;
}

/**
 * wb_get - increment a wb's refcount
 * @wb: bdi_writeback to get
 */
static inline void wb_get(struct bdi_writeback *wb)
{
        if (wb != &wb->bdi->wb)
                percpu_ref_get(&wb->refcnt);
}

/**
 * wb_put - decrement a wb's refcount
 * @wb: bdi_writeback to put
 * @nr: number of references to put
 */
static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
{
        if (WARN_ON_ONCE(!wb->bdi)) {
                /*
                 * A driver bug might cause a file to be removed before bdi was
                 * initialized.
                 */
                return;
        }

        if (wb != &wb->bdi->wb)
                percpu_ref_put_many(&wb->refcnt, nr);
}

/**
 * wb_put - decrement a wb's refcount
 * @wb: bdi_writeback to put
 */
static inline void wb_put(struct bdi_writeback *wb)
{
        wb_put_many(wb, 1);
}

/**
 * wb_dying - is a wb dying?
 * @wb: bdi_writeback of interest
 *
 * Returns whether @wb is unlinked and being drained.
 */
static inline bool wb_dying(struct bdi_writeback *wb)
{
        return percpu_ref_is_dying(&wb->refcnt);
}

#else        /* CONFIG_CGROUP_WRITEBACK */

static inline bool wb_tryget(struct bdi_writeback *wb)
{
        return true;
}

static inline void wb_get(struct bdi_writeback *wb)
{
}

static inline void wb_put(struct bdi_writeback *wb)
{
}

static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
{
}

static inline bool wb_dying(struct bdi_writeback *wb)
{
        return false;
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

#endif        /* __LINUX_BACKING_DEV_DEFS_H */















































































































































































































































































































































































  241 




































































  242 


  241 


























    2 



    2 


    2 




































































































  243 

  242 
  243 











   79 




















  242 
  241 

  242 






























  242 





  242 



  242 



  243 
  242 

  243 













  240 










  242 





































    1 




    3 

    2 






    2 




    2 
    2 










    2 


    2 





    2 

    2 


    3 
    3 
































































































































































































































































































































   45 


   45 

    6 




   46 






   46 






  240 


  233 


   45 







   44 



   45 


   45 










  241 









  240 

  241 

























  241 
  242 
    1 
    1 


  241 

  242 






    4 

  241 













  241 
  241 






    9 


    4 




    4 


    4 















    9 
    4 













































































































































































  242 

  242 


  241 
  241 



























































































  235 



  234 
  235 
  234 




  235 









































  233 






  235 


















































































































































































































































































































































































































































































  235 


  235 





  235 


  235 



  233 


  235 






    6 


    6 



    6 























































































































































    6 







    6 

    6 

    6 


    6 


    6 




























    6 

    6 























    6 

    6 

















































































































































































































































































  242 

  242 



  240 









































































   14 
   14 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 

   14 




































    7 



    7 






























































































  242 






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Routines having to do with the 'struct sk_buff' memory handlers.
 *
 *        Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
 *                        Florian La Roche <rzsfl@rz.uni-sb.de>
 *
 *        Fixes:
 *                Alan Cox        :        Fixed the worst of the load
 *                                        balancer bugs.
 *                Dave Platt        :        Interrupt stacking fix.
 *        Richard Kooijman        :        Timestamp fixes.
 *                Alan Cox        :        Changed buffer format.
 *                Alan Cox        :        destructor hook for AF_UNIX etc.
 *                Linus Torvalds        :        Better skb_clone.
 *                Alan Cox        :        Added skb_copy.
 *                Alan Cox        :        Added all the changed routines Linus
 *                                        only put in the headers
 *                Ray VanTassle        :        Fixed --skb->lock in free
 *                Alan Cox        :        skb_copy copy arp field
 *                Andi Kleen        :        slabified it.
 *                Robert Olsson        :        Removed skb_head_pool
 *
 *        NOTE:
 *                The __skb_ routines should be called with interrupts
 *        disabled, or you better be *real* sure that the operation is atomic
 *        with respect to whatever list is being frobbed (e.g. via lock_sock()
 *        or via disabling bottom half handlers, etc).
 */

/*
 *        The functions in this file will not compile correctly with gcc 2.4.x
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/splice.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
#include <linux/prefetch.h>
#include <linux/bitfield.h>
#include <linux/if_vlan.h>
#include <linux/mpls.h>
#include <linux/kcov.h>
#include <linux/iov_iter.h>

#include <net/protocol.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/gso.h>
#include <net/hotdata.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
#include <net/page_pool/helpers.h>
#include <net/dropreason.h>

#include <linux/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
#include <linux/capability.h>
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <linux/textsearch.h>

#include "dev.h"
#include "sock_destructor.h"

#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif

#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)

/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
 * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
 * size, and we can differentiate heads from skb_small_head_cache
 * vs system slabs by looking at their size (skb_end_offset()).
 */
#define SKB_SMALL_HEAD_CACHE_SIZE                                        \
        (is_power_of_2(SKB_SMALL_HEAD_SIZE) ?                        \
                (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) :        \
                SKB_SMALL_HEAD_SIZE)

#define SKB_SMALL_HEAD_HEADROOM                                                \
        SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)

int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);

/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
 * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
 * netmem is a page.
 */
static_assert(offsetof(struct bio_vec, bv_page) ==
              offsetof(skb_frag_t, netmem));
static_assert(sizeof_field(struct bio_vec, bv_page) ==
              sizeof_field(skb_frag_t, netmem));

static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
static_assert(sizeof_field(struct bio_vec, bv_len) ==
              sizeof_field(skb_frag_t, len));

static_assert(offsetof(struct bio_vec, bv_offset) ==
              offsetof(skb_frag_t, offset));
static_assert(sizeof_field(struct bio_vec, bv_offset) ==
              sizeof_field(skb_frag_t, offset));

#undef FN
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
static const char * const drop_reasons[] = {
        [SKB_CONSUMED] = "CONSUMED",
        DEFINE_DROP_REASON(FN, FN)
};

static const struct drop_reason_list drop_reasons_core = {
        .reasons = drop_reasons,
        .n_reasons = ARRAY_SIZE(drop_reasons),
};

const struct drop_reason_list __rcu *
drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = {
        [SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core),
};
EXPORT_SYMBOL(drop_reasons_by_subsys);

/**
 * drop_reasons_register_subsys - register another drop reason subsystem
 * @subsys: the subsystem to register, must not be the core
 * @list: the list of drop reasons within the subsystem, must point to
 *        a statically initialized list
 */
void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
                                  const struct drop_reason_list *list)
{
        if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE ||
                 subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
                 "invalid subsystem %d\n", subsys))
                return;

        /* must point to statically allocated memory, so INIT is OK */
        RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list);
}
EXPORT_SYMBOL_GPL(drop_reasons_register_subsys);

/**
 * drop_reasons_unregister_subsys - unregister a drop reason subsystem
 * @subsys: the subsystem to remove, must not be the core
 *
 * Note: This will synchronize_rcu() to ensure no users when it returns.
 */
void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys)
{
        if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE ||
                 subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
                 "invalid subsystem %d\n", subsys))
                return;

        RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL);

        synchronize_rcu();
}
EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys);

/**
 *        skb_panic - private function for out-of-line support
 *        @skb:        buffer
 *        @sz:        size
 *        @addr:        address
 *        @msg:        skb_over_panic or skb_under_panic
 *
 *        Out-of-line support for skb_put() and skb_push().
 *        Called via the wrapper skb_over_panic() or skb_under_panic().
 *        Keep out of line to prevent kernel bloat.
 *        __builtin_return_address is not used because it is not always reliable.
 */
static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
                      const char msg[])
{
        pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
                 msg, addr, skb->len, sz, skb->head, skb->data,
                 (unsigned long)skb->tail, (unsigned long)skb->end,
                 skb->dev ? skb->dev->name : "<NULL>");
        BUG();
}

static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
{
        skb_panic(skb, sz, addr, __func__);
}

static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
{
        skb_panic(skb, sz, addr, __func__);
}

#define NAPI_SKB_CACHE_SIZE        64
#define NAPI_SKB_CACHE_BULK        16
#define NAPI_SKB_CACHE_HALF        (NAPI_SKB_CACHE_SIZE / 2)

#if PAGE_SIZE == SZ_4K

#define NAPI_HAS_SMALL_PAGE_FRAG        1
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc)        ((nc).pfmemalloc)

/* specialized page frag allocator using a single order 0 page
 * and slicing it into 1K sized fragment. Constrained to systems
 * with a very limited amount of 1K fragments fitting a single
 * page - to avoid excessive truesize underestimation
 */

struct page_frag_1k {
        void *va;
        u16 offset;
        bool pfmemalloc;
};

static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
{
        struct page *page;
        int offset;

        offset = nc->offset - SZ_1K;
        if (likely(offset >= 0))
                goto use_frag;

        page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
        if (!page)
                return NULL;

        nc->va = page_address(page);
        nc->pfmemalloc = page_is_pfmemalloc(page);
        offset = PAGE_SIZE - SZ_1K;
        page_ref_add(page, offset / SZ_1K);

use_frag:
        nc->offset = offset;
        return nc->va + offset;
}
#else

/* the small page is actually unused in this build; add dummy helpers
 * to please the compiler and avoid later preprocessor's conditionals
 */
#define NAPI_HAS_SMALL_PAGE_FRAG        0
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc)        false

struct page_frag_1k {
};

static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
{
        return NULL;
}

#endif

struct napi_alloc_cache {
        struct page_frag_cache page;
        struct page_frag_1k page_small;
        unsigned int skb_count;
        void *skb_cache[NAPI_SKB_CACHE_SIZE];
};

static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);

/* Double check that napi_get_frags() allocates skbs with
 * skb->head being backed by slab, not a page fragment.
 * This is to make sure bug fixed in 3226b158e67c
 * ("net: avoid 32 x truesize under-estimation for tiny skbs")
 * does not accidentally come back.
 */
void napi_get_frags_check(struct napi_struct *napi)
{
        struct sk_buff *skb;

        local_bh_disable();
        skb = napi_get_frags(napi);
        WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
        napi_free_frags(napi);
        local_bh_enable();
}

void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
        struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

        fragsz = SKB_DATA_ALIGN(fragsz);

        return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
                                       align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);

void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
        void *data;

        fragsz = SKB_DATA_ALIGN(fragsz);
        if (in_hardirq() || irqs_disabled()) {
                struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);

                data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
                                               align_mask);
        } else {
                struct napi_alloc_cache *nc;

                local_bh_disable();
                nc = this_cpu_ptr(&napi_alloc_cache);
                data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
                                               align_mask);
                local_bh_enable();
        }
        return data;
}
EXPORT_SYMBOL(__netdev_alloc_frag_align);

static struct sk_buff *napi_skb_cache_get(void)
{
        struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
        struct sk_buff *skb;

        if (unlikely(!nc->skb_count)) {
                nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
                                                      GFP_ATOMIC,
                                                      NAPI_SKB_CACHE_BULK,
                                                      nc->skb_cache);
                if (unlikely(!nc->skb_count))
                        return NULL;
        }

        skb = nc->skb_cache[--nc->skb_count];
        kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache));

        return skb;
}

static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
                                         unsigned int size)
{
        struct skb_shared_info *shinfo;

        size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

        /* Assumes caller memset cleared SKB */
        skb->truesize = SKB_TRUESIZE(size);
        refcount_set(&skb->users, 1);
        skb->head = data;
        skb->data = data;
        skb_reset_tail_pointer(skb);
        skb_set_end_offset(skb, size);
        skb->mac_header = (typeof(skb->mac_header))~0U;
        skb->transport_header = (typeof(skb->transport_header))~0U;
        skb->alloc_cpu = raw_smp_processor_id();
        /* make sure we initialize shinfo sequentially */
        shinfo = skb_shinfo(skb);
        memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
        atomic_set(&shinfo->dataref, 1);

        skb_set_kcov_handle(skb, kcov_common_handle());
}

static inline void *__slab_build_skb(struct sk_buff *skb, void *data,
                                     unsigned int *size)
{
        void *resized;

        /* Must find the allocation size (and grow it to match). */
        *size = ksize(data);
        /* krealloc() will immediately return "data" when
         * "ksize(data)" is requested: it is the existing upper
         * bounds. As a result, GFP_ATOMIC will be ignored. Note
         * that this "new" pointer needs to be passed back to the
         * caller for use so the __alloc_size hinting will be
         * tracked correctly.
         */
        resized = krealloc(data, *size, GFP_ATOMIC);
        WARN_ON_ONCE(resized != data);
        return resized;
}

/* build_skb() variant which can operate on slab buffers.
 * Note that this should be used sparingly as slab buffers
 * cannot be combined efficiently by GRO!
 */
struct sk_buff *slab_build_skb(void *data)
{
        struct sk_buff *skb;
        unsigned int size;

        skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
        if (unlikely(!skb))
                return NULL;

        memset(skb, 0, offsetof(struct sk_buff, tail));
        data = __slab_build_skb(skb, data, &size);
        __finalize_skb_around(skb, data, size);

        return skb;
}
EXPORT_SYMBOL(slab_build_skb);

/* Caller must provide SKB that is memset cleared */
static void __build_skb_around(struct sk_buff *skb, void *data,
                               unsigned int frag_size)
{
        unsigned int size = frag_size;

        /* frag_size == 0 is considered deprecated now. Callers
         * using slab buffer should use slab_build_skb() instead.
         */
        if (WARN_ONCE(size == 0, "Use slab_build_skb() instead"))
                data = __slab_build_skb(skb, data, &size);

        __finalize_skb_around(skb, data, size);
}

/**
 * __build_skb - build a network buffer
 * @data: data buffer provided by caller
 * @frag_size: size of data (must not be 0)
 *
 * Allocate a new &sk_buff. Caller provides space holding head and
 * skb_shared_info. @data must have been allocated from the page
 * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc()
 * allocation is deprecated, and callers should use slab_build_skb()
 * instead.)
 * The return is the new skb buffer.
 * On a failure the return is %NULL, and @data is not freed.
 * Notes :
 *  Before IO, driver allocates only data buffer where NIC put incoming frame
 *  Driver should add room at head (NET_SKB_PAD) and
 *  MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
 *  After IO, driver calls build_skb(), to allocate sk_buff and populate it
 *  before giving packet to stack.
 *  RX rings only contains data buffers, not full skbs.
 */
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
        struct sk_buff *skb;

        skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
        if (unlikely(!skb))
                return NULL;

        memset(skb, 0, offsetof(struct sk_buff, tail));
        __build_skb_around(skb, data, frag_size);

        return skb;
}

/* build_skb() is wrapper over __build_skb(), that specifically
 * takes care of skb->head and skb->pfmemalloc
 */
struct sk_buff *build_skb(void *data, unsigned int frag_size)
{
        struct sk_buff *skb = __build_skb(data, frag_size);

        if (likely(skb && frag_size)) {
                skb->head_frag = 1;
                skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
        }
        return skb;
}
EXPORT_SYMBOL(build_skb);

/**
 * build_skb_around - build a network buffer around provided skb
 * @skb: sk_buff provide by caller, must be memset cleared
 * @data: data buffer provided by caller
 * @frag_size: size of data
 */
struct sk_buff *build_skb_around(struct sk_buff *skb,
                                 void *data, unsigned int frag_size)
{
        if (unlikely(!skb))
                return NULL;

        __build_skb_around(skb, data, frag_size);

        if (frag_size) {
                skb->head_frag = 1;
                skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
        }
        return skb;
}
EXPORT_SYMBOL(build_skb_around);

/**
 * __napi_build_skb - build a network buffer
 * @data: data buffer provided by caller
 * @frag_size: size of data
 *
 * Version of __build_skb() that uses NAPI percpu caches to obtain
 * skbuff_head instead of inplace allocation.
 *
 * Returns a new &sk_buff on success, %NULL on allocation failure.
 */
static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
{
        struct sk_buff *skb;

        skb = napi_skb_cache_get();
        if (unlikely(!skb))
                return NULL;

        memset(skb, 0, offsetof(struct sk_buff, tail));
        __build_skb_around(skb, data, frag_size);

        return skb;
}

/**
 * napi_build_skb - build a network buffer
 * @data: data buffer provided by caller
 * @frag_size: size of data
 *
 * Version of __napi_build_skb() that takes care of skb->head_frag
 * and skb->pfmemalloc when the data is a page or page fragment.
 *
 * Returns a new &sk_buff on success, %NULL on allocation failure.
 */
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
{
        struct sk_buff *skb = __napi_build_skb(data, frag_size);

        if (likely(skb) && frag_size) {
                skb->head_frag = 1;
                skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
        }

        return skb;
}
EXPORT_SYMBOL(napi_build_skb);

/*
 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
 * the caller if emergency pfmemalloc reserves are being used. If it is and
 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
 * may be used. Otherwise, the packet data may be discarded until enough
 * memory is free
 */
static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                             bool *pfmemalloc)
{
        bool ret_pfmemalloc = false;
        size_t obj_size;
        void *obj;

        obj_size = SKB_HEAD_ALIGN(*size);
        if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
            !(flags & KMALLOC_NOT_NORMAL_BITS)) {
                obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
                                flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
                                node);
                *size = SKB_SMALL_HEAD_CACHE_SIZE;
                if (obj || !(gfp_pfmemalloc_allowed(flags)))
                        goto out;
                /* Try again but now we are using pfmemalloc reserves */
                ret_pfmemalloc = true;
                obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
                goto out;
        }

        obj_size = kmalloc_size_roundup(obj_size);
        /* The following cast might truncate high-order bits of obj_size, this
         * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
         */
        *size = (unsigned int)obj_size;

        /*
         * Try a regular allocation, when that fails and we're not entitled
         * to the reserves, fail.
         */
        obj = kmalloc_node_track_caller(obj_size,
                                        flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
                                        node);
        if (obj || !(gfp_pfmemalloc_allowed(flags)))
                goto out;

        /* Try again but now we are using pfmemalloc reserves */
        ret_pfmemalloc = true;
        obj = kmalloc_node_track_caller(obj_size, flags, node);

out:
        if (pfmemalloc)
                *pfmemalloc = ret_pfmemalloc;

        return obj;
}

/*         Allocate a new skbuff. We do this ourselves so we can fill in a few
 *        'private' fields and also do memory statistics to find all the
 *        [BEEP] leaks.
 *
 */

/**
 *        __alloc_skb        -        allocate a network buffer
 *        @size: size to allocate
 *        @gfp_mask: allocation mask
 *        @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
 *                instead of head cache and allocate a cloned (child) skb.
 *                If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
 *                allocations in case the data is required for writeback
 *        @node: numa node to allocate memory on
 *
 *        Allocate a new &sk_buff. The returned buffer has no headroom and a
 *        tail room of at least size bytes. The object has a reference count
 *        of one. The return is the buffer. On a failure the return is %NULL.
 *
 *        Buffers may only be allocated from interrupts using a @gfp_mask of
 *        %GFP_ATOMIC.
 */
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                            int flags, int node)
{
        struct kmem_cache *cache;
        struct sk_buff *skb;
        bool pfmemalloc;
        u8 *data;

        cache = (flags & SKB_ALLOC_FCLONE)
                ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;

        if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
                gfp_mask |= __GFP_MEMALLOC;

        /* Get the HEAD */
        if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
            likely(node == NUMA_NO_NODE || node == numa_mem_id()))
                skb = napi_skb_cache_get();
        else
                skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
        if (unlikely(!skb))
                return NULL;
        prefetchw(skb);

        /* We do our best to align skb_shared_info on a separate cache
         * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
         * aligned memory blocks, unless SLUB/SLAB debug is enabled.
         * Both skb->head and skb_shared_info are cache line aligned.
         */
        data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
        if (unlikely(!data))
                goto nodata;
        /* kmalloc_size_roundup() might give us more room than requested.
         * Put skb_shared_info exactly at the end of allocated zone,
         * to allow max possible filling before reallocation.
         */
        prefetchw(data + SKB_WITH_OVERHEAD(size));

        /*
         * Only clear those fields we need to clear, not those that we will
         * actually initialise below. Hence, don't put any more fields after
         * the tail pointer in struct sk_buff!
         */
        memset(skb, 0, offsetof(struct sk_buff, tail));
        __build_skb_around(skb, data, size);
        skb->pfmemalloc = pfmemalloc;

        if (flags & SKB_ALLOC_FCLONE) {
                struct sk_buff_fclones *fclones;

                fclones = container_of(skb, struct sk_buff_fclones, skb1);

                skb->fclone = SKB_FCLONE_ORIG;
                refcount_set(&fclones->fclone_ref, 1);
        }

        return skb;

nodata:
        kmem_cache_free(cache, skb);
        return NULL;
}
EXPORT_SYMBOL(__alloc_skb);

/**
 *        __netdev_alloc_skb - allocate an skbuff for rx on a specific device
 *        @dev: network device to receive on
 *        @len: length to allocate
 *        @gfp_mask: get_free_pages mask, passed to alloc_skb
 *
 *        Allocate a new &sk_buff and assign it a usage count of one. The
 *        buffer has NET_SKB_PAD headroom built in. Users should allocate
 *        the headroom they think they need without accounting for the
 *        built in space. The built in space is used for optimisations.
 *
 *        %NULL is returned if there is no free memory.
 */
struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
                                   gfp_t gfp_mask)
{
        struct page_frag_cache *nc;
        struct sk_buff *skb;
        bool pfmemalloc;
        void *data;

        len += NET_SKB_PAD;

        /* If requested length is either too small or too big,
         * we use kmalloc() for skb->head allocation.
         */
        if (len <= SKB_WITH_OVERHEAD(1024) ||
            len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
            (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
                skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
                if (!skb)
                        goto skb_fail;
                goto skb_success;
        }

        len = SKB_HEAD_ALIGN(len);

        if (sk_memalloc_socks())
                gfp_mask |= __GFP_MEMALLOC;

        if (in_hardirq() || irqs_disabled()) {
                nc = this_cpu_ptr(&netdev_alloc_cache);
                data = page_frag_alloc(nc, len, gfp_mask);
                pfmemalloc = nc->pfmemalloc;
        } else {
                local_bh_disable();
                nc = this_cpu_ptr(&napi_alloc_cache.page);
                data = page_frag_alloc(nc, len, gfp_mask);
                pfmemalloc = nc->pfmemalloc;
                local_bh_enable();
        }

        if (unlikely(!data))
                return NULL;

        skb = __build_skb(data, len);
        if (unlikely(!skb)) {
                skb_free_frag(data);
                return NULL;
        }

        if (pfmemalloc)
                skb->pfmemalloc = 1;
        skb->head_frag = 1;

skb_success:
        skb_reserve(skb, NET_SKB_PAD);
        skb->dev = dev;

skb_fail:
        return skb;
}
EXPORT_SYMBOL(__netdev_alloc_skb);

/**
 *        __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
 *        @napi: napi instance this buffer was allocated for
 *        @len: length to allocate
 *        @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
 *
 *        Allocate a new sk_buff for use in NAPI receive.  This buffer will
 *        attempt to allocate the head from a special reserved region used
 *        only for NAPI Rx allocation.  By doing this we can save several
 *        CPU cycles by avoiding having to disable and re-enable IRQs.
 *
 *        %NULL is returned if there is no free memory.
 */
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
                                 gfp_t gfp_mask)
{
        struct napi_alloc_cache *nc;
        struct sk_buff *skb;
        bool pfmemalloc;
        void *data;

        DEBUG_NET_WARN_ON_ONCE(!in_softirq());
        len += NET_SKB_PAD + NET_IP_ALIGN;

        /* If requested length is either too small or too big,
         * we use kmalloc() for skb->head allocation.
         * When the small frag allocator is available, prefer it over kmalloc
         * for small fragments
         */
        if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
            len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
            (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
                skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
                                  NUMA_NO_NODE);
                if (!skb)
                        goto skb_fail;
                goto skb_success;
        }

        nc = this_cpu_ptr(&napi_alloc_cache);

        if (sk_memalloc_socks())
                gfp_mask |= __GFP_MEMALLOC;

        if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
                /* we are artificially inflating the allocation size, but
                 * that is not as bad as it may look like, as:
                 * - 'len' less than GRO_MAX_HEAD makes little sense
                 * - On most systems, larger 'len' values lead to fragment
                 *   size above 512 bytes
                 * - kmalloc would use the kmalloc-1k slab for such values
                 * - Builds with smaller GRO_MAX_HEAD will very likely do
                 *   little networking, as that implies no WiFi and no
                 *   tunnels support, and 32 bits arches.
                 */
                len = SZ_1K;

                data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
                pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
        } else {
                len = SKB_HEAD_ALIGN(len);

                data = page_frag_alloc(&nc->page, len, gfp_mask);
                pfmemalloc = nc->page.pfmemalloc;
        }

        if (unlikely(!data))
                return NULL;

        skb = __napi_build_skb(data, len);
        if (unlikely(!skb)) {
                skb_free_frag(data);
                return NULL;
        }

        if (pfmemalloc)
                skb->pfmemalloc = 1;
        skb->head_frag = 1;

skb_success:
        skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
        skb->dev = napi->dev;

skb_fail:
        return skb;
}
EXPORT_SYMBOL(__napi_alloc_skb);

void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
                            int off, int size, unsigned int truesize)
{
        DEBUG_NET_WARN_ON_ONCE(size > truesize);

        skb_fill_netmem_desc(skb, i, netmem, off, size);
        skb->len += size;
        skb->data_len += size;
        skb->truesize += truesize;
}
EXPORT_SYMBOL(skb_add_rx_frag_netmem);

void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
                          unsigned int truesize)
{
        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

        DEBUG_NET_WARN_ON_ONCE(size > truesize);

        skb_frag_size_add(frag, size);
        skb->len += size;
        skb->data_len += size;
        skb->truesize += truesize;
}
EXPORT_SYMBOL(skb_coalesce_rx_frag);

static void skb_drop_list(struct sk_buff **listp)
{
        kfree_skb_list(*listp);
        *listp = NULL;
}

static inline void skb_drop_fraglist(struct sk_buff *skb)
{
        skb_drop_list(&skb_shinfo(skb)->frag_list);
}

static void skb_clone_fraglist(struct sk_buff *skb)
{
        struct sk_buff *list;

        skb_walk_frags(skb, list)
                skb_get(list);
}

static bool is_pp_page(struct page *page)
{
        return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}

int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
                    unsigned int headroom)
{
#if IS_ENABLED(CONFIG_PAGE_POOL)
        u32 size, truesize, len, max_head_size, off;
        struct sk_buff *skb = *pskb, *nskb;
        int err, i, head_off;
        void *data;

        /* XDP does not support fraglist so we need to linearize
         * the skb.
         */
        if (skb_has_frag_list(skb))
                return -EOPNOTSUPP;

        max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom);
        if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE)
                return -ENOMEM;

        size = min_t(u32, skb->len, max_head_size);
        truesize = SKB_HEAD_ALIGN(size) + headroom;
        data = page_pool_dev_alloc_va(pool, &truesize);
        if (!data)
                return -ENOMEM;

        nskb = napi_build_skb(data, truesize);
        if (!nskb) {
                page_pool_free_va(pool, data, true);
                return -ENOMEM;
        }

        skb_reserve(nskb, headroom);
        skb_copy_header(nskb, skb);
        skb_mark_for_recycle(nskb);

        err = skb_copy_bits(skb, 0, nskb->data, size);
        if (err) {
                consume_skb(nskb);
                return err;
        }
        skb_put(nskb, size);

        head_off = skb_headroom(nskb) - skb_headroom(skb);
        skb_headers_offset_update(nskb, head_off);

        off = size;
        len = skb->len - off;
        for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
                struct page *page;
                u32 page_off;

                size = min_t(u32, len, PAGE_SIZE);
                truesize = size;

                page = page_pool_dev_alloc(pool, &page_off, &truesize);
                if (!page) {
                        consume_skb(nskb);
                        return -ENOMEM;
                }

                skb_add_rx_frag(nskb, i, page, page_off, size, truesize);
                err = skb_copy_bits(skb, off, page_address(page) + page_off,
                                    size);
                if (err) {
                        consume_skb(nskb);
                        return err;
                }

                len -= size;
                off += size;
        }

        consume_skb(skb);
        *pskb = nskb;

        return 0;
#else
        return -EOPNOTSUPP;
#endif
}
EXPORT_SYMBOL(skb_pp_cow_data);

int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
                         struct bpf_prog *prog)
{
        if (!prog->aux->xdp_has_frags)
                return -EINVAL;

        return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM);
}
EXPORT_SYMBOL(skb_cow_data_for_xdp);

#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
        bool allow_direct = false;
        struct page_pool *pp;

        page = compound_head(page);

        /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
         * in order to preserve any existing bits, such as bit 0 for the
         * head page of compound page and bit 1 for pfmemalloc page, so
         * mask those bits for freeing side when doing below checking,
         * and page_is_pfmemalloc() is checked in __page_pool_put_page()
         * to avoid recycling the pfmemalloc page.
         */
        if (unlikely(!is_pp_page(page)))
                return false;

        pp = page->pp;

        /* Allow direct recycle if we have reasons to believe that we are
         * in the same context as the consumer would run, so there's
         * no possible race.
         * __page_pool_put_page() makes sure we're not in hardirq context
         * and interrupts are enabled prior to accessing the cache.
         */
        if (napi_safe || in_softirq()) {
                const struct napi_struct *napi = READ_ONCE(pp->p.napi);
                unsigned int cpuid = smp_processor_id();

                allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
                allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
        }

        /* Driver set this to memory recycling info. Reset it on recycle.
         * This will *not* work for NIC using a split-page memory model.
         * The page will be returned to the pool here regardless of the
         * 'flipped' fragment being in use or not.
         */
        page_pool_put_full_page(pp, page, allow_direct);

        return true;
}
EXPORT_SYMBOL(napi_pp_put_page);
#endif

static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
        if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
                return false;
        return napi_pp_put_page(virt_to_page(data), napi_safe);
}

/**
 * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
 * @skb:        page pool aware skb
 *
 * Increase the fragment reference count (pp_ref_count) of a skb. This is
 * intended to gain fragment references only for page pool aware skbs,
 * i.e. when skb->pp_recycle is true, and not for fragments in a
 * non-pp-recycling skb. It has a fallback to increase references on normal
 * pages, as page pool aware skbs may also have normal page fragments.
 */
static int skb_pp_frag_ref(struct sk_buff *skb)
{
        struct skb_shared_info *shinfo;
        struct page *head_page;
        int i;

        if (!skb->pp_recycle)
                return -EINVAL;

        shinfo = skb_shinfo(skb);

        for (i = 0; i < shinfo->nr_frags; i++) {
                head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
                if (likely(is_pp_page(head_page)))
                        page_pool_ref_page(head_page);
                else
                        page_ref_inc(head_page);
        }
        return 0;
}

static void skb_kfree_head(void *head, unsigned int end_offset)
{
        if (end_offset == SKB_SMALL_HEAD_HEADROOM)
                kmem_cache_free(net_hotdata.skb_small_head_cache, head);
        else
                kfree(head);
}

static void skb_free_head(struct sk_buff *skb, bool napi_safe)
{
        unsigned char *head = skb->head;

        if (skb->head_frag) {
                if (skb_pp_recycle(skb, head, napi_safe))
                        return;
                skb_free_frag(head);
        } else {
                skb_kfree_head(head, skb_end_offset(skb));
        }
}

static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
                             bool napi_safe)
{
        struct skb_shared_info *shinfo = skb_shinfo(skb);
        int i;

        if (!skb_data_unref(skb, shinfo))
                goto exit;

        if (skb_zcopy(skb)) {
                bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;

                skb_zcopy_clear(skb, true);
                if (skip_unref)
                        goto free_head;
        }

        for (i = 0; i < shinfo->nr_frags; i++)
                napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);

free_head:
        if (shinfo->frag_list)
                kfree_skb_list_reason(shinfo->frag_list, reason);

        skb_free_head(skb, napi_safe);
exit:
        /* When we clone an SKB we copy the reycling bit. The pp_recycle
         * bit is only set on the head though, so in order to avoid races
         * while trying to recycle fragments on __skb_frag_unref() we need
         * to make one SKB responsible for triggering the recycle path.
         * So disable the recycling bit if an SKB is cloned and we have
         * additional references to the fragmented part of the SKB.
         * Eventually the last SKB will have the recycling bit set and it's
         * dataref set to 0, which will trigger the recycling
         */
        skb->pp_recycle = 0;
}

/*
 *        Free an skbuff by memory without cleaning the state.
 */
static void kfree_skbmem(struct sk_buff *skb)
{
        struct sk_buff_fclones *fclones;

        switch (skb->fclone) {
        case SKB_FCLONE_UNAVAILABLE:
                kmem_cache_free(net_hotdata.skbuff_cache, skb);
                return;

        case SKB_FCLONE_ORIG:
                fclones = container_of(skb, struct sk_buff_fclones, skb1);

                /* We usually free the clone (TX completion) before original skb
                 * This test would have no chance to be true for the clone,
                 * while here, branch prediction will be good.
                 */
                if (refcount_read(&fclones->fclone_ref) == 1)
                        goto fastpath;
                break;

        default: /* SKB_FCLONE_CLONE */
                fclones = container_of(skb, struct sk_buff_fclones, skb2);
                break;
        }
        if (!refcount_dec_and_test(&fclones->fclone_ref))
                return;
fastpath:
        kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones);
}

void skb_release_head_state(struct sk_buff *skb)
{
        skb_dst_drop(skb);
        if (skb->destructor) {
                DEBUG_NET_WARN_ON_ONCE(in_hardirq());
                skb->destructor(skb);
        }
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        nf_conntrack_put(skb_nfct(skb));
#endif
        skb_ext_put(skb);
}

/* Free everything but the sk_buff shell. */
static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
                            bool napi_safe)
{
        skb_release_head_state(skb);
        if (likely(skb->head))
                skb_release_data(skb, reason, napi_safe);
}

/**
 *        __kfree_skb - private function
 *        @skb: buffer
 *
 *        Free an sk_buff. Release anything attached to the buffer.
 *        Clean the state. This is an internal helper function. Users should
 *        always call kfree_skb
 */

void __kfree_skb(struct sk_buff *skb)
{
        skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
        kfree_skbmem(skb);
}
EXPORT_SYMBOL(__kfree_skb);

static __always_inline
bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
        if (unlikely(!skb_unref(skb)))
                return false;

        DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET ||
                               u32_get_bits(reason,
                                            SKB_DROP_REASON_SUBSYS_MASK) >=
                                SKB_DROP_REASON_SUBSYS_NUM);

        if (reason == SKB_CONSUMED)
                trace_consume_skb(skb, __builtin_return_address(0));
        else
                trace_kfree_skb(skb, __builtin_return_address(0), reason);
        return true;
}

/**
 *        kfree_skb_reason - free an sk_buff with special reason
 *        @skb: buffer to free
 *        @reason: reason why this skb is dropped
 *
 *        Drop a reference to the buffer and free it if the usage count has
 *        hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
 *        tracepoint.
 */
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
        if (__kfree_skb_reason(skb, reason))
                __kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb_reason);

#define KFREE_SKB_BULK_SIZE        16

struct skb_free_array {
        unsigned int skb_count;
        void *skb_array[KFREE_SKB_BULK_SIZE];
};

static void kfree_skb_add_bulk(struct sk_buff *skb,
                               struct skb_free_array *sa,
                               enum skb_drop_reason reason)
{
        /* if SKB is a clone, don't handle this case */
        if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
                __kfree_skb(skb);
                return;
        }

        skb_release_all(skb, reason, false);
        sa->skb_array[sa->skb_count++] = skb;

        if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
                kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE,
                                     sa->skb_array);
                sa->skb_count = 0;
        }
}

void __fix_address
kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
{
        struct skb_free_array sa;

        sa.skb_count = 0;

        while (segs) {
                struct sk_buff *next = segs->next;

                if (__kfree_skb_reason(segs, reason)) {
                        skb_poison_list(segs);
                        kfree_skb_add_bulk(segs, &sa, reason);
                }

                segs = next;
        }

        if (sa.skb_count)
                kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);

/* Dump skb information and contents.
 *
 * Must only be called from net_ratelimit()-ed paths.
 *
 * Dumps whole packets if full_pkt, only headers otherwise.
 */
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
{
        struct skb_shared_info *sh = skb_shinfo(skb);
        struct net_device *dev = skb->dev;
        struct sock *sk = skb->sk;
        struct sk_buff *list_skb;
        bool has_mac, has_trans;
        int headroom, tailroom;
        int i, len, seg_len;

        if (full_pkt)
                len = skb->len;
        else
                len = min_t(int, skb->len, MAX_HEADER + 128);

        headroom = skb_headroom(skb);
        tailroom = skb_tailroom(skb);

        has_mac = skb_mac_header_was_set(skb);
        has_trans = skb_transport_header_was_set(skb);

        printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
               "mac=(%d,%d) net=(%d,%d) trans=%d\n"
               "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
               "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
               "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
               level, skb->len, headroom, skb_headlen(skb), tailroom,
               has_mac ? skb->mac_header : -1,
               has_mac ? skb_mac_header_len(skb) : -1,
               skb->network_header,
               has_trans ? skb_network_header_len(skb) : -1,
               has_trans ? skb->transport_header : -1,
               sh->tx_flags, sh->nr_frags,
               sh->gso_size, sh->gso_type, sh->gso_segs,
               skb->csum, skb->ip_summed, skb->csum_complete_sw,
               skb->csum_valid, skb->csum_level,
               skb->hash, skb->sw_hash, skb->l4_hash,
               ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);

        if (dev)
                printk("%sdev name=%s feat=%pNF\n",
                       level, dev->name, &dev->features);
        if (sk)
                printk("%ssk family=%hu type=%u proto=%u\n",
                       level, sk->sk_family, sk->sk_type, sk->sk_protocol);

        if (full_pkt && headroom)
                print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
                               16, 1, skb->head, headroom, false);

        seg_len = min_t(int, skb_headlen(skb), len);
        if (seg_len)
                print_hex_dump(level, "skb linear:   ", DUMP_PREFIX_OFFSET,
                               16, 1, skb->data, seg_len, false);
        len -= seg_len;

        if (full_pkt && tailroom)
                print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
                               16, 1, skb_tail_pointer(skb), tailroom, false);

        for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                u32 p_off, p_len, copied;
                struct page *p;
                u8 *vaddr;

                skb_frag_foreach_page(frag, skb_frag_off(frag),
                                      skb_frag_size(frag), p, p_off, p_len,
                                      copied) {
                        seg_len = min_t(int, p_len, len);
                        vaddr = kmap_atomic(p);
                        print_hex_dump(level, "skb frag:     ",
                                       DUMP_PREFIX_OFFSET,
                                       16, 1, vaddr + p_off, seg_len, false);
                        kunmap_atomic(vaddr);
                        len -= seg_len;
                        if (!len)
                                break;
                }
        }

        if (full_pkt && skb_has_frag_list(skb)) {
                printk("skb fraglist:\n");
                skb_walk_frags(skb, list_skb)
                        skb_dump(level, list_skb, true);
        }
}
EXPORT_SYMBOL(skb_dump);

/**
 *        skb_tx_error - report an sk_buff xmit error
 *        @skb: buffer that triggered an error
 *
 *        Report xmit error if a device callback is tracking this skb.
 *        skb must be freed afterwards.
 */
void skb_tx_error(struct sk_buff *skb)
{
        if (skb) {
                skb_zcopy_downgrade_managed(skb);
                skb_zcopy_clear(skb, true);
        }
}
EXPORT_SYMBOL(skb_tx_error);

#ifdef CONFIG_TRACEPOINTS
/**
 *        consume_skb - free an skbuff
 *        @skb: buffer to free
 *
 *        Drop a ref to the buffer and free it if the usage count has hit zero
 *        Functions identically to kfree_skb, but kfree_skb assumes that the frame
 *        is being dropped after a failure and notes that
 */
void consume_skb(struct sk_buff *skb)
{
        if (!skb_unref(skb))
                return;

        trace_consume_skb(skb, __builtin_return_address(0));
        __kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
#endif

/**
 *        __consume_stateless_skb - free an skbuff, assuming it is stateless
 *        @skb: buffer to free
 *
 *        Alike consume_skb(), but this variant assumes that this is the last
 *        skb reference and all the head states have been already dropped
 */
void __consume_stateless_skb(struct sk_buff *skb)
{
        trace_consume_skb(skb, __builtin_return_address(0));
        skb_release_data(skb, SKB_CONSUMED, false);
        kfree_skbmem(skb);
}

static void napi_skb_cache_put(struct sk_buff *skb)
{
        struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
        u32 i;

        if (!kasan_mempool_poison_object(skb))
                return;

        nc->skb_cache[nc->skb_count++] = skb;

        if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
                for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
                        kasan_mempool_unpoison_object(nc->skb_cache[i],
                                                kmem_cache_size(net_hotdata.skbuff_cache));

                kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF,
                                     nc->skb_cache + NAPI_SKB_CACHE_HALF);
                nc->skb_count = NAPI_SKB_CACHE_HALF;
        }
}

void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
{
        skb_release_all(skb, reason, true);
        napi_skb_cache_put(skb);
}

void napi_skb_free_stolen_head(struct sk_buff *skb)
{
        if (unlikely(skb->slow_gro)) {
                nf_reset_ct(skb);
                skb_dst_drop(skb);
                skb_ext_put(skb);
                skb_orphan(skb);
                skb->slow_gro = 0;
        }
        napi_skb_cache_put(skb);
}

void napi_consume_skb(struct sk_buff *skb, int budget)
{
        /* Zero budget indicate non-NAPI context called us, like netpoll */
        if (unlikely(!budget)) {
                dev_consume_skb_any(skb);
                return;
        }

        DEBUG_NET_WARN_ON_ONCE(!in_softirq());

        if (!skb_unref(skb))
                return;

        /* if reaching here SKB is ready to free */
        trace_consume_skb(skb, __builtin_return_address(0));

        /* if SKB is a clone, don't handle this case */
        if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
                __kfree_skb(skb);
                return;
        }

        skb_release_all(skb, SKB_CONSUMED, !!budget);
        napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);

/* Make sure a field is contained by headers group */
#define CHECK_SKB_FIELD(field) \
        BUILD_BUG_ON(offsetof(struct sk_buff, field) !=                \
                     offsetof(struct sk_buff, headers.field));        \

static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
        new->tstamp                = old->tstamp;
        /* We do not copy old->sk */
        new->dev                = old->dev;
        memcpy(new->cb, old->cb, sizeof(old->cb));
        skb_dst_copy(new, old);
        __skb_ext_copy(new, old);
        __nf_copy(new, old, false);

        /* Note : this field could be in the headers group.
         * It is not yet because we do not want to have a 16 bit hole
         */
        new->queue_mapping = old->queue_mapping;

        memcpy(&new->headers, &old->headers, sizeof(new->headers));
        CHECK_SKB_FIELD(protocol);
        CHECK_SKB_FIELD(csum);
        CHECK_SKB_FIELD(hash);
        CHECK_SKB_FIELD(priority);
        CHECK_SKB_FIELD(skb_iif);
        CHECK_SKB_FIELD(vlan_proto);
        CHECK_SKB_FIELD(vlan_tci);
        CHECK_SKB_FIELD(transport_header);
        CHECK_SKB_FIELD(network_header);
        CHECK_SKB_FIELD(mac_header);
        CHECK_SKB_FIELD(inner_protocol);
        CHECK_SKB_FIELD(inner_transport_header);
        CHECK_SKB_FIELD(inner_network_header);
        CHECK_SKB_FIELD(inner_mac_header);
        CHECK_SKB_FIELD(mark);
#ifdef CONFIG_NETWORK_SECMARK
        CHECK_SKB_FIELD(secmark);
#endif
#ifdef CONFIG_NET_RX_BUSY_POLL
        CHECK_SKB_FIELD(napi_id);
#endif
        CHECK_SKB_FIELD(alloc_cpu);
#ifdef CONFIG_XPS
        CHECK_SKB_FIELD(sender_cpu);
#endif
#ifdef CONFIG_NET_SCHED
        CHECK_SKB_FIELD(tc_index);
#endif

}

/*
 * You should not add any new code to this function.  Add it to
 * __copy_skb_header above instead.
 */
static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
{
#define C(x) n->x = skb->x

        n->next = n->prev = NULL;
        n->sk = NULL;
        __copy_skb_header(n, skb);

        C(len);
        C(data_len);
        C(mac_len);
        n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
        n->cloned = 1;
        n->nohdr = 0;
        n->peeked = 0;
        C(pfmemalloc);
        C(pp_recycle);
        n->destructor = NULL;
        C(tail);
        C(end);
        C(head);
        C(head_frag);
        C(data);
        C(truesize);
        refcount_set(&n->users, 1);

        atomic_inc(&(skb_shinfo(skb)->dataref));
        skb->cloned = 1;

        return n;
#undef C
}

/**
 * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
 * @first: first sk_buff of the msg
 */
struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
{
        struct sk_buff *n;

        n = alloc_skb(0, GFP_ATOMIC);
        if (!n)
                return NULL;

        n->len = first->len;
        n->data_len = first->len;
        n->truesize = first->truesize;

        skb_shinfo(n)->frag_list = first;

        __copy_skb_header(n, first);
        n->destructor = NULL;

        return n;
}
EXPORT_SYMBOL_GPL(alloc_skb_for_msg);

/**
 *        skb_morph        -        morph one skb into another
 *        @dst: the skb to receive the contents
 *        @src: the skb to supply the contents
 *
 *        This is identical to skb_clone except that the target skb is
 *        supplied by the user.
 *
 *        The target skb is returned upon exit.
 */
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
{
        skb_release_all(dst, SKB_CONSUMED, false);
        return __skb_clone(dst, src);
}
EXPORT_SYMBOL_GPL(skb_morph);

int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
        unsigned long max_pg, num_pg, new_pg, old_pg, rlim;
        struct user_struct *user;

        if (capable(CAP_IPC_LOCK) || !size)
                return 0;

        rlim = rlimit(RLIMIT_MEMLOCK);
        if (rlim == RLIM_INFINITY)
                return 0;

        num_pg = (size >> PAGE_SHIFT) + 2;        /* worst case */
        max_pg = rlim >> PAGE_SHIFT;
        user = mmp->user ? : current_user();

        old_pg = atomic_long_read(&user->locked_vm);
        do {
                new_pg = old_pg + num_pg;
                if (new_pg > max_pg)
                        return -ENOBUFS;
        } while (!atomic_long_try_cmpxchg(&user->locked_vm, &old_pg, new_pg));

        if (!mmp->user) {
                mmp->user = get_uid(user);
                mmp->num_pg = num_pg;
        } else {
                mmp->num_pg += num_pg;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(mm_account_pinned_pages);

void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
        if (mmp->user) {
                atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
                free_uid(mmp->user);
        }
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);

static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
        struct ubuf_info_msgzc *uarg;
        struct sk_buff *skb;

        WARN_ON_ONCE(!in_task());

        skb = sock_omalloc(sk, 0, GFP_KERNEL);
        if (!skb)
                return NULL;

        BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
        uarg = (void *)skb->cb;
        uarg->mmp.user = NULL;

        if (mm_account_pinned_pages(&uarg->mmp, size)) {
                kfree_skb(skb);
                return NULL;
        }

        uarg->ubuf.callback = msg_zerocopy_callback;
        uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
        uarg->len = 1;
        uarg->bytelen = size;
        uarg->zerocopy = 1;
        uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
        refcount_set(&uarg->ubuf.refcnt, 1);
        sock_hold(sk);

        return &uarg->ubuf;
}

static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{
        return container_of((void *)uarg, struct sk_buff, cb);
}

struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
                                       struct ubuf_info *uarg)
{
        if (uarg) {
                struct ubuf_info_msgzc *uarg_zc;
                const u32 byte_limit = 1 << 19;                /* limit to a few TSO */
                u32 bytelen, next;

                /* there might be non MSG_ZEROCOPY users */
                if (uarg->callback != msg_zerocopy_callback)
                        return NULL;

                /* realloc only when socket is locked (TCP, UDP cork),
                 * so uarg->len and sk_zckey access is serialized
                 */
                if (!sock_owned_by_user(sk)) {
                        WARN_ON_ONCE(1);
                        return NULL;
                }

                uarg_zc = uarg_to_msgzc(uarg);
                bytelen = uarg_zc->bytelen + size;
                if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
                        /* TCP can create new skb to attach new uarg */
                        if (sk->sk_type == SOCK_STREAM)
                                goto new_alloc;
                        return NULL;
                }

                next = (u32)atomic_read(&sk->sk_zckey);
                if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
                        if (mm_account_pinned_pages(&uarg_zc->mmp, size))
                                return NULL;
                        uarg_zc->len++;
                        uarg_zc->bytelen = bytelen;
                        atomic_set(&sk->sk_zckey, ++next);

                        /* no extra ref when appending to datagram (MSG_MORE) */
                        if (sk->sk_type == SOCK_STREAM)
                                net_zcopy_get(uarg);

                        return uarg;
                }
        }

new_alloc:
        return msg_zerocopy_alloc(sk, size);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);

static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
{
        struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
        u32 old_lo, old_hi;
        u64 sum_len;

        old_lo = serr->ee.ee_info;
        old_hi = serr->ee.ee_data;
        sum_len = old_hi - old_lo + 1ULL + len;

        if (sum_len >= (1ULL << 32))
                return false;

        if (lo != old_hi + 1)
                return false;

        serr->ee.ee_data += len;
        return true;
}

static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{
        struct sk_buff *tail, *skb = skb_from_uarg(uarg);
        struct sock_exterr_skb *serr;
        struct sock *sk = skb->sk;
        struct sk_buff_head *q;
        unsigned long flags;
        bool is_zerocopy;
        u32 lo, hi;
        u16 len;

        mm_unaccount_pinned_pages(&uarg->mmp);

        /* if !len, there was only 1 call, and it was aborted
         * so do not queue a completion notification
         */
        if (!uarg->len || sock_flag(sk, SOCK_DEAD))
                goto release;

        len = uarg->len;
        lo = uarg->id;
        hi = uarg->id + len - 1;
        is_zerocopy = uarg->zerocopy;

        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        serr->ee.ee_errno = 0;
        serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
        serr->ee.ee_data = hi;
        serr->ee.ee_info = lo;
        if (!is_zerocopy)
                serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;

        q = &sk->sk_error_queue;
        spin_lock_irqsave(&q->lock, flags);
        tail = skb_peek_tail(q);
        if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
            !skb_zerocopy_notify_extend(tail, lo, len)) {
                __skb_queue_tail(q, skb);
                skb = NULL;
        }
        spin_unlock_irqrestore(&q->lock, flags);

        sk_error_report(sk);

release:
        consume_skb(skb);
        sock_put(sk);
}

void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
                           bool success)
{
        struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);

        uarg_zc->zerocopy = uarg_zc->zerocopy & success;

        if (refcount_dec_and_test(&uarg->refcnt))
                __msg_zerocopy_callback(uarg_zc);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_callback);

void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
        struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;

        atomic_dec(&sk->sk_zckey);
        uarg_to_msgzc(uarg)->len--;

        if (have_uref)
                msg_zerocopy_callback(NULL, uarg, true);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);

int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
                             struct msghdr *msg, int len,
                             struct ubuf_info *uarg)
{
        struct ubuf_info *orig_uarg = skb_zcopy(skb);
        int err, orig_len = skb->len;

        /* An skb can only point to one uarg. This edge case happens when
         * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
         */
        if (orig_uarg && uarg != orig_uarg)
                return -EEXIST;

        err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
        if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
                struct sock *save_sk = skb->sk;

                /* Streams do not free skb on error. Reset to prev state. */
                iov_iter_revert(&msg->msg_iter, skb->len - orig_len);
                skb->sk = sk;
                ___pskb_trim(skb, orig_len);
                skb->sk = save_sk;
                return err;
        }

        skb_zcopy_set(skb, uarg, NULL);
        return skb->len - orig_len;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);

void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
{
        int i;

        skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                skb_frag_ref(skb, i);
}
EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);

static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
                              gfp_t gfp_mask)
{
        if (skb_zcopy(orig)) {
                if (skb_zcopy(nskb)) {
                        /* !gfp_mask callers are verified to !skb_zcopy(nskb) */
                        if (!gfp_mask) {
                                WARN_ON_ONCE(1);
                                return -ENOMEM;
                        }
                        if (skb_uarg(nskb) == skb_uarg(orig))
                                return 0;
                        if (skb_copy_ubufs(nskb, GFP_ATOMIC))
                                return -EIO;
                }
                skb_zcopy_set(nskb, skb_uarg(orig), NULL);
        }
        return 0;
}

/**
 *        skb_copy_ubufs        -        copy userspace skb frags buffers to kernel
 *        @skb: the skb to modify
 *        @gfp_mask: allocation priority
 *
 *        This must be called on skb with SKBFL_ZEROCOPY_ENABLE.
 *        It will copy all frags into kernel and drop the reference
 *        to userspace pages.
 *
 *        If this function is called from an interrupt gfp_mask() must be
 *        %GFP_ATOMIC.
 *
 *        Returns 0 on success or a negative error code on failure
 *        to allocate kernel memory to copy to.
 */
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
{
        int num_frags = skb_shinfo(skb)->nr_frags;
        struct page *page, *head = NULL;
        int i, order, psize, new_frags;
        u32 d_off;

        if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
                return -EINVAL;

        if (!num_frags)
                goto release;

        /* We might have to allocate high order pages, so compute what minimum
         * page order is needed.
         */
        order = 0;
        while ((PAGE_SIZE << order) * MAX_SKB_FRAGS < __skb_pagelen(skb))
                order++;
        psize = (PAGE_SIZE << order);

        new_frags = (__skb_pagelen(skb) + psize - 1) >> (PAGE_SHIFT + order);
        for (i = 0; i < new_frags; i++) {
                page = alloc_pages(gfp_mask | __GFP_COMP, order);
                if (!page) {
                        while (head) {
                                struct page *next = (struct page *)page_private(head);
                                put_page(head);
                                head = next;
                        }
                        return -ENOMEM;
                }
                set_page_private(page, (unsigned long)head);
                head = page;
        }

        page = head;
        d_off = 0;
        for (i = 0; i < num_frags; i++) {
                skb_frag_t *f = &skb_shinfo(skb)->frags[i];
                u32 p_off, p_len, copied;
                struct page *p;
                u8 *vaddr;

                skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
                                      p, p_off, p_len, copied) {
                        u32 copy, done = 0;
                        vaddr = kmap_atomic(p);

                        while (done < p_len) {
                                if (d_off == psize) {
                                        d_off = 0;
                                        page = (struct page *)page_private(page);
                                }
                                copy = min_t(u32, psize - d_off, p_len - done);
                                memcpy(page_address(page) + d_off,
                                       vaddr + p_off + done, copy);
                                done += copy;
                                d_off += copy;
                        }
                        kunmap_atomic(vaddr);
                }
        }

        /* skb frags release userspace buffers */
        for (i = 0; i < num_frags; i++)
                skb_frag_unref(skb, i);

        /* skb frags point to kernel buffers */
        for (i = 0; i < new_frags - 1; i++) {
                __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
                head = (struct page *)page_private(head);
        }
        __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
                               d_off);
        skb_shinfo(skb)->nr_frags = new_frags;

release:
        skb_zcopy_clear(skb, false);
        return 0;
}
EXPORT_SYMBOL_GPL(skb_copy_ubufs);

/**
 *        skb_clone        -        duplicate an sk_buff
 *        @skb: buffer to clone
 *        @gfp_mask: allocation priority
 *
 *        Duplicate an &sk_buff. The new one is not owned by a socket. Both
 *        copies share the same packet data but not structure. The new
 *        buffer has a reference count of 1. If the allocation fails the
 *        function returns %NULL otherwise the new buffer is returned.
 *
 *        If this function is called from an interrupt gfp_mask() must be
 *        %GFP_ATOMIC.
 */

struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
        struct sk_buff_fclones *fclones = container_of(skb,
                                                       struct sk_buff_fclones,
                                                       skb1);
        struct sk_buff *n;

        if (skb_orphan_frags(skb, gfp_mask))
                return NULL;

        if (skb->fclone == SKB_FCLONE_ORIG &&
            refcount_read(&fclones->fclone_ref) == 1) {
                n = &fclones->skb2;
                refcount_set(&fclones->fclone_ref, 2);
                n->fclone = SKB_FCLONE_CLONE;
        } else {
                if (skb_pfmemalloc(skb))
                        gfp_mask |= __GFP_MEMALLOC;

                n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask);
                if (!n)
                        return NULL;

                n->fclone = SKB_FCLONE_UNAVAILABLE;
        }

        return __skb_clone(n, skb);
}
EXPORT_SYMBOL(skb_clone);

void skb_headers_offset_update(struct sk_buff *skb, int off)
{
        /* Only adjust this if it actually is csum_start rather than csum */
        if (skb->ip_summed == CHECKSUM_PARTIAL)
                skb->csum_start += off;
        /* {transport,network,mac}_header and tail are relative to skb->head */
        skb->transport_header += off;
        skb->network_header   += off;
        if (skb_mac_header_was_set(skb))
                skb->mac_header += off;
        skb->inner_transport_header += off;
        skb->inner_network_header += off;
        skb->inner_mac_header += off;
}
EXPORT_SYMBOL(skb_headers_offset_update);

void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
{
        __copy_skb_header(new, old);

        skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
        skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
        skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
EXPORT_SYMBOL(skb_copy_header);

static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
{
        if (skb_pfmemalloc(skb))
                return SKB_ALLOC_RX;
        return 0;
}

/**
 *        skb_copy        -        create private copy of an sk_buff
 *        @skb: buffer to copy
 *        @gfp_mask: allocation priority
 *
 *        Make a copy of both an &sk_buff and its data. This is used when the
 *        caller wishes to modify the data and needs a private copy of the
 *        data to alter. Returns %NULL on failure or the pointer to the buffer
 *        on success. The returned buffer has a reference count of 1.
 *
 *        As by-product this function converts non-linear &sk_buff to linear
 *        one, so that &sk_buff becomes completely private and caller is allowed
 *        to modify all the data of returned buffer. This means that this
 *        function is not recommended for use in circumstances when only
 *        header is going to be modified. Use pskb_copy() instead.
 */

struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
        struct sk_buff *n;
        unsigned int size;
        int headerlen;

        if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
                return NULL;

        headerlen = skb_headroom(skb);
        size = skb_end_offset(skb) + skb->data_len;
        n = __alloc_skb(size, gfp_mask,
                        skb_alloc_rx_flag(skb), NUMA_NO_NODE);
        if (!n)
                return NULL;

        /* Set the data pointer */
        skb_reserve(n, headerlen);
        /* Set the tail pointer and length */
        skb_put(n, skb->len);

        BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));

        skb_copy_header(n, skb);
        return n;
}
EXPORT_SYMBOL(skb_copy);

/**
 *        __pskb_copy_fclone        -  create copy of an sk_buff with private head.
 *        @skb: buffer to copy
 *        @headroom: headroom of new skb
 *        @gfp_mask: allocation priority
 *        @fclone: if true allocate the copy of the skb from the fclone
 *        cache instead of the head cache; it is recommended to set this
 *        to true for the cases where the copy will likely be cloned
 *
 *        Make a copy of both an &sk_buff and part of its data, located
 *        in header. Fragmented data remain shared. This is used when
 *        the caller wishes to modify only header of &sk_buff and needs
 *        private copy of the header to alter. Returns %NULL on failure
 *        or the pointer to the buffer on success.
 *        The returned buffer has a reference count of 1.
 */

struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
                                   gfp_t gfp_mask, bool fclone)
{
        unsigned int size = skb_headlen(skb) + headroom;
        int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
        struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);

        if (!n)
                goto out;

        /* Set the data pointer */
        skb_reserve(n, headroom);
        /* Set the tail pointer and length */
        skb_put(n, skb_headlen(skb));
        /* Copy the bytes */
        skb_copy_from_linear_data(skb, n->data, n->len);

        n->truesize += skb->data_len;
        n->data_len  = skb->data_len;
        n->len             = skb->len;

        if (skb_shinfo(skb)->nr_frags) {
                int i;

                if (skb_orphan_frags(skb, gfp_mask) ||
                    skb_zerocopy_clone(n, skb, gfp_mask)) {
                        kfree_skb(n);
                        n = NULL;
                        goto out;
                }
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                        skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
                        skb_frag_ref(skb, i);
                }
                skb_shinfo(n)->nr_frags = i;
        }

        if (skb_has_frag_list(skb)) {
                skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
                skb_clone_fraglist(n);
        }

        skb_copy_header(n, skb);
out:
        return n;
}
EXPORT_SYMBOL(__pskb_copy_fclone);

/**
 *        pskb_expand_head - reallocate header of &sk_buff
 *        @skb: buffer to reallocate
 *        @nhead: room to add at head
 *        @ntail: room to add at tail
 *        @gfp_mask: allocation priority
 *
 *        Expands (or creates identical copy, if @nhead and @ntail are zero)
 *        header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
 *        reference count of 1. Returns zero in the case of success or error,
 *        if expansion failed. In the last case, &sk_buff is not changed.
 *
 *        All the pointers pointing into skb header may change and must be
 *        reloaded after call to this function.
 */

int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                     gfp_t gfp_mask)
{
        unsigned int osize = skb_end_offset(skb);
        unsigned int size = osize + nhead + ntail;
        long off;
        u8 *data;
        int i;

        BUG_ON(nhead < 0);

        BUG_ON(skb_shared(skb));

        skb_zcopy_downgrade_managed(skb);

        if (skb_pfmemalloc(skb))
                gfp_mask |= __GFP_MEMALLOC;

        data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
        if (!data)
                goto nodata;
        size = SKB_WITH_OVERHEAD(size);

        /* Copy only real data... and, alas, header. This should be
         * optimized for the cases when header is void.
         */
        memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);

        memcpy((struct skb_shared_info *)(data + size),
               skb_shinfo(skb),
               offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));

        /*
         * if shinfo is shared we must drop the old head gracefully, but if it
         * is not we can just drop the old head and let the existing refcount
         * be since all we did is relocate the values
         */
        if (skb_cloned(skb)) {
                if (skb_orphan_frags(skb, gfp_mask))
                        goto nofrags;
                if (skb_zcopy(skb))
                        refcount_inc(&skb_uarg(skb)->refcnt);
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                        skb_frag_ref(skb, i);

                if (skb_has_frag_list(skb))
                        skb_clone_fraglist(skb);

                skb_release_data(skb, SKB_CONSUMED, false);
        } else {
                skb_free_head(skb, false);
        }
        off = (data + nhead) - skb->head;

        skb->head     = data;
        skb->head_frag = 0;
        skb->data    += off;

        skb_set_end_offset(skb, size);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
        off           = nhead;
#endif
        skb->tail              += off;
        skb_headers_offset_update(skb, nhead);
        skb->cloned   = 0;
        skb->hdr_len  = 0;
        skb->nohdr    = 0;
        atomic_set(&skb_shinfo(skb)->dataref, 1);

        skb_metadata_clear(skb);

        /* It is not generally safe to change skb->truesize.
         * For the moment, we really care of rx path, or
         * when skb is orphaned (not attached to a socket).
         */
        if (!skb->sk || skb->destructor == sock_edemux)
                skb->truesize += size - osize;

        return 0;

nofrags:
        skb_kfree_head(data, size);
nodata:
        return -ENOMEM;
}
EXPORT_SYMBOL(pskb_expand_head);

/* Make private copy of skb with writable head and some headroom */

struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
{
        struct sk_buff *skb2;
        int delta = headroom - skb_headroom(skb);

        if (delta <= 0)
                skb2 = pskb_copy(skb, GFP_ATOMIC);
        else {
                skb2 = skb_clone(skb, GFP_ATOMIC);
                if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
                                             GFP_ATOMIC)) {
                        kfree_skb(skb2);
                        skb2 = NULL;
                }
        }
        return skb2;
}
EXPORT_SYMBOL(skb_realloc_headroom);

/* Note: We plan to rework this in linux-6.4 */
int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
        unsigned int saved_end_offset, saved_truesize;
        struct skb_shared_info *shinfo;
        int res;

        saved_end_offset = skb_end_offset(skb);
        saved_truesize = skb->truesize;

        res = pskb_expand_head(skb, 0, 0, pri);
        if (res)
                return res;

        skb->truesize = saved_truesize;

        if (likely(skb_end_offset(skb) == saved_end_offset))
                return 0;

        /* We can not change skb->end if the original or new value
         * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
         */
        if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM ||
            skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) {
                /* We think this path should not be taken.
                 * Add a temporary trace to warn us just in case.
                 */
                pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n",
                            saved_end_offset, skb_end_offset(skb));
                WARN_ON_ONCE(1);
                return 0;
        }

        shinfo = skb_shinfo(skb);

        /* We are about to change back skb->end,
         * we need to move skb_shinfo() to its new location.
         */
        memmove(skb->head + saved_end_offset,
                shinfo,
                offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));

        skb_set_end_offset(skb, saved_end_offset);

        return 0;
}

/**
 *        skb_expand_head - reallocate header of &sk_buff
 *        @skb: buffer to reallocate
 *        @headroom: needed headroom
 *
 *        Unlike skb_realloc_headroom, this one does not allocate a new skb
 *        if possible; copies skb->sk to new skb as needed
 *        and frees original skb in case of failures.
 *
 *        It expect increased headroom and generates warning otherwise.
 */

struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
        int delta = headroom - skb_headroom(skb);
        int osize = skb_end_offset(skb);
        struct sock *sk = skb->sk;

        if (WARN_ONCE(delta <= 0,
                      "%s is expecting an increase in the headroom", __func__))
                return skb;

        delta = SKB_DATA_ALIGN(delta);
        /* pskb_expand_head() might crash, if skb is shared. */
        if (skb_shared(skb) || !is_skb_wmem(skb)) {
                struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);

                if (unlikely(!nskb))
                        goto fail;

                if (sk)
                        skb_set_owner_w(nskb, sk);
                consume_skb(skb);
                skb = nskb;
        }
        if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
                goto fail;

        if (sk && is_skb_wmem(skb)) {
                delta = skb_end_offset(skb) - osize;
                refcount_add(delta, &sk->sk_wmem_alloc);
                skb->truesize += delta;
        }
        return skb;

fail:
        kfree_skb(skb);
        return NULL;
}
EXPORT_SYMBOL(skb_expand_head);

/**
 *        skb_copy_expand        -        copy and expand sk_buff
 *        @skb: buffer to copy
 *        @newheadroom: new free bytes at head
 *        @newtailroom: new free bytes at tail
 *        @gfp_mask: allocation priority
 *
 *        Make a copy of both an &sk_buff and its data and while doing so
 *        allocate additional space.
 *
 *        This is used when the caller wishes to modify the data and needs a
 *        private copy of the data to alter as well as more space for new fields.
 *        Returns %NULL on failure or the pointer to the buffer
 *        on success. The returned buffer has a reference count of 1.
 *
 *        You must pass %GFP_ATOMIC as the allocation priority if this function
 *        is called from an interrupt.
 */
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
                                int newheadroom, int newtailroom,
                                gfp_t gfp_mask)
{
        /*
         *        Allocate the copy buffer
         */
        int head_copy_len, head_copy_off;
        struct sk_buff *n;
        int oldheadroom;

        if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
                return NULL;

        oldheadroom = skb_headroom(skb);
        n = __alloc_skb(newheadroom + skb->len + newtailroom,
                        gfp_mask, skb_alloc_rx_flag(skb),
                        NUMA_NO_NODE);
        if (!n)
                return NULL;

        skb_reserve(n, newheadroom);

        /* Set the tail pointer and length */
        skb_put(n, skb->len);

        head_copy_len = oldheadroom;
        head_copy_off = 0;
        if (newheadroom <= head_copy_len)
                head_copy_len = newheadroom;
        else
                head_copy_off = newheadroom - head_copy_len;

        /* Copy the linear header and data. */
        BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
                             skb->len + head_copy_len));

        skb_copy_header(n, skb);

        skb_headers_offset_update(n, newheadroom - oldheadroom);

        return n;
}
EXPORT_SYMBOL(skb_copy_expand);

/**
 *        __skb_pad                -        zero pad the tail of an skb
 *        @skb: buffer to pad
 *        @pad: space to pad
 *        @free_on_error: free buffer on error
 *
 *        Ensure that a buffer is followed by a padding area that is zero
 *        filled. Used by network drivers which may DMA or transfer data
 *        beyond the buffer end onto the wire.
 *
 *        May return error in out of memory cases. The skb is freed on error
 *        if @free_on_error is true.
 */

int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
{
        int err;
        int ntail;

        /* If the skbuff is non linear tailroom is always zero.. */
        if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
                memset(skb->data+skb->len, 0, pad);
                return 0;
        }

        ntail = skb->data_len + pad - (skb->end - skb->tail);
        if (likely(skb_cloned(skb) || ntail > 0)) {
                err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
                if (unlikely(err))
                        goto free_skb;
        }

        /* FIXME: The use of this function with non-linear skb's really needs
         * to be audited.
         */
        err = skb_linearize(skb);
        if (unlikely(err))
                goto free_skb;

        memset(skb->data + skb->len, 0, pad);
        return 0;

free_skb:
        if (free_on_error)
                kfree_skb(skb);
        return err;
}
EXPORT_SYMBOL(__skb_pad);

/**
 *        pskb_put - add data to the tail of a potentially fragmented buffer
 *        @skb: start of the buffer to use
 *        @tail: tail fragment of the buffer to use
 *        @len: amount of data to add
 *
 *        This function extends the used data area of the potentially
 *        fragmented buffer. @tail must be the last fragment of @skb -- or
 *        @skb itself. If this would exceed the total buffer size the kernel
 *        will panic. A pointer to the first byte of the extra data is
 *        returned.
 */

void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
{
        if (tail != skb) {
                skb->data_len += len;
                skb->len += len;
        }
        return skb_put(tail, len);
}
EXPORT_SYMBOL_GPL(pskb_put);

/**
 *        skb_put - add data to a buffer
 *        @skb: buffer to use
 *        @len: amount of data to add
 *
 *        This function extends the used data area of the buffer. If this would
 *        exceed the total buffer size the kernel will panic. A pointer to the
 *        first byte of the extra data is returned.
 */
void *skb_put(struct sk_buff *skb, unsigned int len)
{
        void *tmp = skb_tail_pointer(skb);
        SKB_LINEAR_ASSERT(skb);
        skb->tail += len;
        skb->len  += len;
        if (unlikely(skb->tail > skb->end))
                skb_over_panic(skb, len, __builtin_return_address(0));
        return tmp;
}
EXPORT_SYMBOL(skb_put);

/**
 *        skb_push - add data to the start of a buffer
 *        @skb: buffer to use
 *        @len: amount of data to add
 *
 *        This function extends the used data area of the buffer at the buffer
 *        start. If this would exceed the total buffer headroom the kernel will
 *        panic. A pointer to the first byte of the extra data is returned.
 */
void *skb_push(struct sk_buff *skb, unsigned int len)
{
        skb->data -= len;
        skb->len  += len;
        if (unlikely(skb->data < skb->head))
                skb_under_panic(skb, len, __builtin_return_address(0));
        return skb->data;
}
EXPORT_SYMBOL(skb_push);

/**
 *        skb_pull - remove data from the start of a buffer
 *        @skb: buffer to use
 *        @len: amount of data to remove
 *
 *        This function removes data from the start of a buffer, returning
 *        the memory to the headroom. A pointer to the next data in the buffer
 *        is returned. Once the data has been pulled future pushes will overwrite
 *        the old data.
 */
void *skb_pull(struct sk_buff *skb, unsigned int len)
{
        return skb_pull_inline(skb, len);
}
EXPORT_SYMBOL(skb_pull);

/**
 *        skb_pull_data - remove data from the start of a buffer returning its
 *        original position.
 *        @skb: buffer to use
 *        @len: amount of data to remove
 *
 *        This function removes data from the start of a buffer, returning
 *        the memory to the headroom. A pointer to the original data in the buffer
 *        is returned after checking if there is enough data to pull. Once the
 *        data has been pulled future pushes will overwrite the old data.
 */
void *skb_pull_data(struct sk_buff *skb, size_t len)
{
        void *data = skb->data;

        if (skb->len < len)
                return NULL;

        skb_pull(skb, len);

        return data;
}
EXPORT_SYMBOL(skb_pull_data);

/**
 *        skb_trim - remove end from a buffer
 *        @skb: buffer to alter
 *        @len: new length
 *
 *        Cut the length of a buffer down by removing data from the tail. If
 *        the buffer is already under the length specified it is not modified.
 *        The skb must be linear.
 */
void skb_trim(struct sk_buff *skb, unsigned int len)
{
        if (skb->len > len)
                __skb_trim(skb, len);
}
EXPORT_SYMBOL(skb_trim);

/* Trims skb to length len. It can change skb pointers.
 */

int ___pskb_trim(struct sk_buff *skb, unsigned int len)
{
        struct sk_buff **fragp;
        struct sk_buff *frag;
        int offset = skb_headlen(skb);
        int nfrags = skb_shinfo(skb)->nr_frags;
        int i;
        int err;

        if (skb_cloned(skb) &&
            unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
                return err;

        i = 0;
        if (offset >= len)
                goto drop_pages;

        for (; i < nfrags; i++) {
                int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);

                if (end < len) {
                        offset = end;
                        continue;
                }

                skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);

drop_pages:
                skb_shinfo(skb)->nr_frags = i;

                for (; i < nfrags; i++)
                        skb_frag_unref(skb, i);

                if (skb_has_frag_list(skb))
                        skb_drop_fraglist(skb);
                goto done;
        }

        for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
             fragp = &frag->next) {
                int end = offset + frag->len;

                if (skb_shared(frag)) {
                        struct sk_buff *nfrag;

                        nfrag = skb_clone(frag, GFP_ATOMIC);
                        if (unlikely(!nfrag))
                                return -ENOMEM;

                        nfrag->next = frag->next;
                        consume_skb(frag);
                        frag = nfrag;
                        *fragp = frag;
                }

                if (end < len) {
                        offset = end;
                        continue;
                }

                if (end > len &&
                    unlikely((err = pskb_trim(frag, len - offset))))
                        return err;

                if (frag->next)
                        skb_drop_list(&frag->next);
                break;
        }

done:
        if (len > skb_headlen(skb)) {
                skb->data_len -= skb->len - len;
                skb->len       = len;
        } else {
                skb->len       = len;
                skb->data_len  = 0;
                skb_set_tail_pointer(skb, len);
        }

        if (!skb->sk || skb->destructor == sock_edemux)
                skb_condense(skb);
        return 0;
}
EXPORT_SYMBOL(___pskb_trim);

/* Note : use pskb_trim_rcsum() instead of calling this directly
 */
int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE) {
                int delta = skb->len - len;

                skb->csum = csum_block_sub(skb->csum,
                                           skb_checksum(skb, len, delta, 0),
                                           len);
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
                int offset = skb_checksum_start_offset(skb) + skb->csum_offset;

                if (offset + sizeof(__sum16) > hdlen)
                        return -EINVAL;
        }
        return __pskb_trim(skb, len);
}
EXPORT_SYMBOL(pskb_trim_rcsum_slow);

/**
 *        __pskb_pull_tail - advance tail of skb header
 *        @skb: buffer to reallocate
 *        @delta: number of bytes to advance tail
 *
 *        The function makes a sense only on a fragmented &sk_buff,
 *        it expands header moving its tail forward and copying necessary
 *        data from fragmented part.
 *
 *        &sk_buff MUST have reference count of 1.
 *
 *        Returns %NULL (and &sk_buff does not change) if pull failed
 *        or value of new tail of skb in the case of success.
 *
 *        All the pointers pointing into skb header may change and must be
 *        reloaded after call to this function.
 */

/* Moves tail of skb head forward, copying data from fragmented part,
 * when it is necessary.
 * 1. It may fail due to malloc failure.
 * 2. It may change skb pointers.
 *
 * It is pretty complicated. Luckily, it is called only in exceptional cases.
 */
void *__pskb_pull_tail(struct sk_buff *skb, int delta)
{
        /* If skb has not enough free space at tail, get new one
         * plus 128 bytes for future expansions. If we have enough
         * room at tail, reallocate without expansion only if skb is cloned.
         */
        int i, k, eat = (skb->tail + delta) - skb->end;

        if (eat > 0 || skb_cloned(skb)) {
                if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
                                     GFP_ATOMIC))
                        return NULL;
        }

        BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
                             skb_tail_pointer(skb), delta));

        /* Optimization: no fragments, no reasons to preestimate
         * size of pulled pages. Superb.
         */
        if (!skb_has_frag_list(skb))
                goto pull_pages;

        /* Estimate size of pulled pages. */
        eat = delta;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);

                if (size >= eat)
                        goto pull_pages;
                eat -= size;
        }

        /* If we need update frag list, we are in troubles.
         * Certainly, it is possible to add an offset to skb data,
         * but taking into account that pulling is expected to
         * be very rare operation, it is worth to fight against
         * further bloating skb head and crucify ourselves here instead.
         * Pure masohism, indeed. 8)8)
         */
        if (eat) {
                struct sk_buff *list = skb_shinfo(skb)->frag_list;
                struct sk_buff *clone = NULL;
                struct sk_buff *insp = NULL;

                do {
                        if (list->len <= eat) {
                                /* Eaten as whole. */
                                eat -= list->len;
                                list = list->next;
                                insp = list;
                        } else {
                                /* Eaten partially. */
                                if (skb_is_gso(skb) && !list->head_frag &&
                                    skb_headlen(list))
                                        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;

                                if (skb_shared(list)) {
                                        /* Sucks! We need to fork list. :-( */
                                        clone = skb_clone(list, GFP_ATOMIC);
                                        if (!clone)
                                                return NULL;
                                        insp = list->next;
                                        list = clone;
                                } else {
                                        /* This may be pulled without
                                         * problems. */
                                        insp = list;
                                }
                                if (!pskb_pull(list, eat)) {
                                        kfree_skb(clone);
                                        return NULL;
                                }
                                break;
                        }
                } while (eat);

                /* Free pulled out fragments. */
                while ((list = skb_shinfo(skb)->frag_list) != insp) {
                        skb_shinfo(skb)->frag_list = list->next;
                        consume_skb(list);
                }
                /* And insert new clone at head. */
                if (clone) {
                        clone->next = list;
                        skb_shinfo(skb)->frag_list = clone;
                }
        }
        /* Success! Now we may commit changes to skb data. */

pull_pages:
        eat = delta;
        k = 0;
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);

                if (size <= eat) {
                        skb_frag_unref(skb, i);
                        eat -= size;
                } else {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[k];

                        *frag = skb_shinfo(skb)->frags[i];
                        if (eat) {
                                skb_frag_off_add(frag, eat);
                                skb_frag_size_sub(frag, eat);
                                if (!i)
                                        goto end;
                                eat = 0;
                        }
                        k++;
                }
        }
        skb_shinfo(skb)->nr_frags = k;

end:
        skb->tail     += delta;
        skb->data_len -= delta;

        if (!skb->data_len)
                skb_zcopy_clear(skb, false);

        return skb_tail_pointer(skb);
}
EXPORT_SYMBOL(__pskb_pull_tail);

/**
 *        skb_copy_bits - copy bits from skb to kernel buffer
 *        @skb: source skb
 *        @offset: offset in source
 *        @to: destination buffer
 *        @len: number of bytes to copy
 *
 *        Copy the specified number of bytes from the source skb to the
 *        destination buffer.
 *
 *        CAUTION ! :
 *                If its prototype is ever changed,
 *                check arch/{*}/net/{*}.S files,
 *                since it is called from BPF assembly code.
 */
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
{
        int start = skb_headlen(skb);
        struct sk_buff *frag_iter;
        int i, copy;

        if (offset > (int)skb->len - len)
                goto fault;

        /* Copy header. */
        if ((copy = start - offset) > 0) {
                if (copy > len)
                        copy = len;
                skb_copy_from_linear_data_offset(skb, offset, to, copy);
                if ((len -= copy) == 0)
                        return 0;
                offset += copy;
                to     += copy;
        }

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
                skb_frag_t *f = &skb_shinfo(skb)->frags[i];

                WARN_ON(start > offset + len);

                end = start + skb_frag_size(f);
                if ((copy = end - offset) > 0) {
                        u32 p_off, p_len, copied;
                        struct page *p;
                        u8 *vaddr;

                        if (copy > len)
                                copy = len;

                        skb_frag_foreach_page(f,
                                              skb_frag_off(f) + offset - start,
                                              copy, p, p_off, p_len, copied) {
                                vaddr = kmap_atomic(p);
                                memcpy(to + copied, vaddr + p_off, p_len);
                                kunmap_atomic(vaddr);
                        }

                        if ((len -= copy) == 0)
                                return 0;
                        offset += copy;
                        to     += copy;
                }
                start = end;
        }

        skb_walk_frags(skb, frag_iter) {
                int end;

                WARN_ON(start > offset + len);

                end = start + frag_iter->len;
                if ((copy = end - offset) > 0) {
                        if (copy > len)
                                copy = len;
                        if (skb_copy_bits(frag_iter, offset - start, to, copy))
                                goto fault;
                        if ((len -= copy) == 0)
                                return 0;
                        offset += copy;
                        to     += copy;
                }
                start = end;
        }

        if (!len)
                return 0;

fault:
        return -EFAULT;
}
EXPORT_SYMBOL(skb_copy_bits);

/*
 * Callback from splice_to_pipe(), if we need to release some pages
 * at the end of the spd in case we error'ed out in filling the pipe.
 */
static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
        put_page(spd->pages[i]);
}

static struct page *linear_to_page(struct page *page, unsigned int *len,
                                   unsigned int *offset,
                                   struct sock *sk)
{
        struct page_frag *pfrag = sk_page_frag(sk);

        if (!sk_page_frag_refill(sk, pfrag))
                return NULL;

        *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);

        memcpy(page_address(pfrag->page) + pfrag->offset,
               page_address(page) + *offset, *len);
        *offset = pfrag->offset;
        pfrag->offset += *len;

        return pfrag->page;
}

static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
                             struct page *page,
                             unsigned int offset)
{
        return        spd->nr_pages &&
                spd->pages[spd->nr_pages - 1] == page &&
                (spd->partial[spd->nr_pages - 1].offset +
                 spd->partial[spd->nr_pages - 1].len == offset);
}

/*
 * Fill page/offset/length into spd, if it can hold more pages.
 */
static bool spd_fill_page(struct splice_pipe_desc *spd,
                          struct pipe_inode_info *pipe, struct page *page,
                          unsigned int *len, unsigned int offset,
                          bool linear,
                          struct sock *sk)
{
        if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
                return true;

        if (linear) {
                page = linear_to_page(page, len, &offset, sk);
                if (!page)
                        return true;
        }
        if (spd_can_coalesce(spd, page, offset)) {
                spd->partial[spd->nr_pages - 1].len += *len;
                return false;
        }
        get_page(page);
        spd->pages[spd->nr_pages] = page;
        spd->partial[spd->nr_pages].len = *len;
        spd->partial[spd->nr_pages].offset = offset;
        spd->nr_pages++;

        return false;
}

static bool __splice_segment(struct page *page, unsigned int poff,
                             unsigned int plen, unsigned int *off,
                             unsigned int *len,
                             struct splice_pipe_desc *spd, bool linear,
                             struct sock *sk,
                             struct pipe_inode_info *pipe)
{
        if (!*len)
                return true;

        /* skip this segment if already processed */
        if (*off >= plen) {
                *off -= plen;
                return false;
        }

        /* ignore any bits we already processed */
        poff += *off;
        plen -= *off;
        *off = 0;

        do {
                unsigned int flen = min(*len, plen);

                if (spd_fill_page(spd, pipe, page, &flen, poff,
                                  linear, sk))
                        return true;
                poff += flen;
                plen -= flen;
                *len -= flen;
        } while (*len && plen);

        return false;
}

/*
 * Map linear and fragment data from the skb to spd. It reports true if the
 * pipe is full or if we already spliced the requested length.
 */
static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
                              unsigned int *offset, unsigned int *len,
                              struct splice_pipe_desc *spd, struct sock *sk)
{
        int seg;
        struct sk_buff *iter;

        /* map the linear part :
         * If skb->head_frag is set, this 'linear' part is backed by a
         * fragment, and if the head is not shared with any clones then
         * we can avoid a copy since we own the head portion of this page.
         */
        if (__splice_segment(virt_to_page(skb->data),
                             (unsigned long) skb->data & (PAGE_SIZE - 1),
                             skb_headlen(skb),
                             offset, len, spd,
                             skb_head_is_locked(skb),
                             sk, pipe))
                return true;

        /*
         * then map the fragments
         */
        for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
                const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];

                if (__splice_segment(skb_frag_page(f),
                                     skb_frag_off(f), skb_frag_size(f),
                                     offset, len, spd, false, sk, pipe))
                        return true;
        }

        skb_walk_frags(skb, iter) {
                if (*offset >= iter->len) {
                        *offset -= iter->len;
                        continue;
                }
                /* __skb_splice_bits() only fails if the output has no room
                 * left, so no point in going over the frag_list for the error
                 * case.
                 */
                if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
                        return true;
        }

        return false;
}

/*
 * Map data from the skb to a pipe. Should handle both the linear part,
 * the fragments, and the frag list.
 */
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int tlen,
                    unsigned int flags)
{
        struct partial_page partial[MAX_SKB_FRAGS];
        struct page *pages[MAX_SKB_FRAGS];
        struct splice_pipe_desc spd = {
                .pages = pages,
                .partial = partial,
                .nr_pages_max = MAX_SKB_FRAGS,
                .ops = &nosteal_pipe_buf_ops,
                .spd_release = sock_spd_release,
        };
        int ret = 0;

        __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);

        if (spd.nr_pages)
                ret = splice_to_pipe(pipe, &spd);

        return ret;
}
EXPORT_SYMBOL_GPL(skb_splice_bits);

static int sendmsg_locked(struct sock *sk, struct msghdr *msg)
{
        struct socket *sock = sk->sk_socket;
        size_t size = msg_data_left(msg);

        if (!sock)
                return -EINVAL;

        if (!sock->ops->sendmsg_locked)
                return sock_no_sendmsg_locked(sk, msg, size);

        return sock->ops->sendmsg_locked(sk, msg, size);
}

static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg)
{
        struct socket *sock = sk->sk_socket;

        if (!sock)
                return -EINVAL;
        return sock_sendmsg(sock, msg);
}

typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
                           int len, sendmsg_func sendmsg)
{
        unsigned int orig_len = len;
        struct sk_buff *head = skb;
        unsigned short fragidx;
        int slen, ret;

do_frag_list:

        /* Deal with head data */
        while (offset < skb_headlen(skb) && len) {
                struct kvec kv;
                struct msghdr msg;

                slen = min_t(int, len, skb_headlen(skb) - offset);
                kv.iov_base = skb->data + offset;
                kv.iov_len = slen;
                memset(&msg, 0, sizeof(msg));
                msg.msg_flags = MSG_DONTWAIT;

                iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
                ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
                                      sendmsg_unlocked, sk, &msg);
                if (ret <= 0)
                        goto error;

                offset += ret;
                len -= ret;
        }

        /* All the data was skb head? */
        if (!len)
                goto out;

        /* Make offset relative to start of frags */
        offset -= skb_headlen(skb);

        /* Find where we are in frag list */
        for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
                skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];

                if (offset < skb_frag_size(frag))
                        break;

                offset -= skb_frag_size(frag);
        }

        for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
                skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];

                slen = min_t(size_t, len, skb_frag_size(frag) - offset);

                while (slen) {
                        struct bio_vec bvec;
                        struct msghdr msg = {
                                .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT,
                        };

                        bvec_set_page(&bvec, skb_frag_page(frag), slen,
                                      skb_frag_off(frag) + offset);
                        iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
                                      slen);

                        ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
                                              sendmsg_unlocked, sk, &msg);
                        if (ret <= 0)
                                goto error;

                        len -= ret;
                        offset += ret;
                        slen -= ret;
                }

                offset = 0;
        }

        if (len) {
                /* Process any frag lists */

                if (skb == head) {
                        if (skb_has_frag_list(skb)) {
                                skb = skb_shinfo(skb)->frag_list;
                                goto do_frag_list;
                        }
                } else if (skb->next) {
                        skb = skb->next;
                        goto do_frag_list;
                }
        }

out:
        return orig_len - len;

error:
        return orig_len == len ? ret : orig_len - len;
}

/* Send skb data on a socket. Socket must be locked. */
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
                         int len)
{
        return __skb_send_sock(sk, skb, offset, len, sendmsg_locked);
}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);

/* Send skb data on a socket. Socket must be unlocked. */
int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
{
        return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked);
}

/**
 *        skb_store_bits - store bits from kernel buffer to skb
 *        @skb: destination buffer
 *        @offset: offset in destination
 *        @from: source buffer
 *        @len: number of bytes to copy
 *
 *        Copy the specified number of bytes from the source buffer to the
 *        destination skb.  This function handles all the messy bits of
 *        traversing fragment lists and such.
 */

int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
{
        int start = skb_headlen(skb);
        struct sk_buff *frag_iter;
        int i, copy;

        if (offset > (int)skb->len - len)
                goto fault;

        if ((copy = start - offset) > 0) {
                if (copy > len)
                        copy = len;
                skb_copy_to_linear_data_offset(skb, offset, from, copy);
                if ((len -= copy) == 0)
                        return 0;
                offset += copy;
                from += copy;
        }

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                int end;

                WARN_ON(start > offset + len);

                end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
                        u32 p_off, p_len, copied;
                        struct page *p;
                        u8 *vaddr;

                        if (copy > len)
                                copy = len;

                        skb_frag_foreach_page(frag,
                                              skb_frag_off(frag) + offset - start,
                                              copy, p, p_off, p_len, copied) {
                                vaddr = kmap_atomic(p);
                                memcpy(vaddr + p_off, from + copied, p_len);
                                kunmap_atomic(vaddr);
                        }

                        if ((len -= copy) == 0)
                                return 0;
                        offset += copy;
                        from += copy;
                }
                start = end;
        }

        skb_walk_frags(skb, frag_iter) {
                int end;

                WARN_ON(start > offset + len);

                end = start + frag_iter->len;
                if ((copy = end - offset) > 0) {
                        if (copy > len)
                                copy = len;
                        if (skb_store_bits(frag_iter, offset - start,
                                           from, copy))
                                goto fault;
                        if ((len -= copy) == 0)
                                return 0;
                        offset += copy;
                        from += copy;
                }
                start = end;
        }
        if (!len)
                return 0;

fault:
        return -EFAULT;
}
EXPORT_SYMBOL(skb_store_bits);

/* Checksum skb data. */
__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
                      __wsum csum, const struct skb_checksum_ops *ops)
{
        int start = skb_headlen(skb);
        int i, copy = start - offset;
        struct sk_buff *frag_iter;
        int pos = 0;

        /* Checksum header. */
        if (copy > 0) {
                if (copy > len)
                        copy = len;
                csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
                                       skb->data + offset, copy, csum);
                if ((len -= copy) == 0)
                        return csum;
                offset += copy;
                pos        = copy;
        }

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

                WARN_ON(start > offset + len);

                end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
                        u32 p_off, p_len, copied;
                        struct page *p;
                        __wsum csum2;
                        u8 *vaddr;

                        if (copy > len)
                                copy = len;

                        skb_frag_foreach_page(frag,
                                              skb_frag_off(frag) + offset - start,
                                              copy, p, p_off, p_len, copied) {
                                vaddr = kmap_atomic(p);
                                csum2 = INDIRECT_CALL_1(ops->update,
                                                        csum_partial_ext,
                                                        vaddr + p_off, p_len, 0);
                                kunmap_atomic(vaddr);
                                csum = INDIRECT_CALL_1(ops->combine,
                                                       csum_block_add_ext, csum,
                                                       csum2, pos, p_len);
                                pos += p_len;
                        }

                        if (!(len -= copy))
                                return csum;
                        offset += copy;
                }
                start = end;
        }

        skb_walk_frags(skb, frag_iter) {
                int end;

                WARN_ON(start > offset + len);

                end = start + frag_iter->len;
                if ((copy = end - offset) > 0) {
                        __wsum csum2;
                        if (copy > len)
                                copy = len;
                        csum2 = __skb_checksum(frag_iter, offset - start,
                                               copy, 0, ops);
                        csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
                                               csum, csum2, pos, copy);
                        if ((len -= copy) == 0)
                                return csum;
                        offset += copy;
                        pos    += copy;
                }
                start = end;
        }
        BUG_ON(len);

        return csum;
}
EXPORT_SYMBOL(__skb_checksum);

__wsum skb_checksum(const struct sk_buff *skb, int offset,
                    int len, __wsum csum)
{
        const struct skb_checksum_ops ops = {
                .update  = csum_partial_ext,
                .combine = csum_block_add_ext,
        };

        return __skb_checksum(skb, offset, len, csum, &ops);
}
EXPORT_SYMBOL(skb_checksum);

/* Both of above in one bottle. */

__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
                                    u8 *to, int len)
{
        int start = skb_headlen(skb);
        int i, copy = start - offset;
        struct sk_buff *frag_iter;
        int pos = 0;
        __wsum csum = 0;

        /* Copy header. */
        if (copy > 0) {
                if (copy > len)
                        copy = len;
                csum = csum_partial_copy_nocheck(skb->data + offset, to,
                                                 copy);
                if ((len -= copy) == 0)
                        return csum;
                offset += copy;
                to     += copy;
                pos        = copy;
        }

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;

                WARN_ON(start > offset + len);

                end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                        u32 p_off, p_len, copied;
                        struct page *p;
                        __wsum csum2;
                        u8 *vaddr;

                        if (copy > len)
                                copy = len;

                        skb_frag_foreach_page(frag,
                                              skb_frag_off(frag) + offset - start,
                                              copy, p, p_off, p_len, copied) {
                                vaddr = kmap_atomic(p);
                                csum2 = csum_partial_copy_nocheck(vaddr + p_off,
                                                                  to + copied,
                                                                  p_len);
                                kunmap_atomic(vaddr);
                                csum = csum_block_add(csum, csum2, pos);
                                pos += p_len;
                        }

                        if (!(len -= copy))
                                return csum;
                        offset += copy;
                        to     += copy;
                }
                start = end;
        }

        skb_walk_frags(skb, frag_iter) {
                __wsum csum2;
                int end;

                WARN_ON(start > offset + len);

                end = start + frag_iter->len;
                if ((copy = end - offset) > 0) {
                        if (copy > len)
                                copy = len;
                        csum2 = skb_copy_and_csum_bits(frag_iter,
                                                       offset - start,
                                                       to, copy);
                        csum = csum_block_add(csum, csum2, pos);
                        if ((len -= copy) == 0)
                                return csum;
                        offset += copy;
                        to     += copy;
                        pos    += copy;
                }
                start = end;
        }
        BUG_ON(len);
        return csum;
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);

__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
        __sum16 sum;

        sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
        /* See comments in __skb_checksum_complete(). */
        if (likely(!sum)) {
                if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
                    !skb->csum_complete_sw)
                        netdev_rx_csum_fault(skb->dev, skb);
        }
        if (!skb_shared(skb))
                skb->csum_valid = !sum;
        return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);

/* This function assumes skb->csum already holds pseudo header's checksum,
 * which has been changed from the hardware checksum, for example, by
 * __skb_checksum_validate_complete(). And, the original skb->csum must
 * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
 *
 * It returns non-zero if the recomputed checksum is still invalid, otherwise
 * zero. The new checksum is stored back into skb->csum unless the skb is
 * shared.
 */
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
        __wsum csum;
        __sum16 sum;

        csum = skb_checksum(skb, 0, skb->len, 0);

        sum = csum_fold(csum_add(skb->csum, csum));
        /* This check is inverted, because we already knew the hardware
         * checksum is invalid before calling this function. So, if the
         * re-computed checksum is valid instead, then we have a mismatch
         * between the original skb->csum and skb_checksum(). This means either
         * the original hardware checksum is incorrect or we screw up skb->csum
         * when moving skb->data around.
         */
        if (likely(!sum)) {
                if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
                    !skb->csum_complete_sw)
                        netdev_rx_csum_fault(skb->dev, skb);
        }

        if (!skb_shared(skb)) {
                /* Save full packet checksum */
                skb->csum = csum;
                skb->ip_summed = CHECKSUM_COMPLETE;
                skb->csum_complete_sw = 1;
                skb->csum_valid = !sum;
        }

        return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete);

static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
{
        net_warn_ratelimited(
                "%s: attempt to compute crc32c without libcrc32c.ko\n",
                __func__);
        return 0;
}

static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
                                       int offset, int len)
{
        net_warn_ratelimited(
                "%s: attempt to compute crc32c without libcrc32c.ko\n",
                __func__);
        return 0;
}

static const struct skb_checksum_ops default_crc32c_ops = {
        .update  = warn_crc32c_csum_update,
        .combine = warn_crc32c_csum_combine,
};

const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
        &default_crc32c_ops;
EXPORT_SYMBOL(crc32c_csum_stub);

 /**
 *        skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
 *        @from: source buffer
 *
 *        Calculates the amount of linear headroom needed in the 'to' skb passed
 *        into skb_zerocopy().
 */
unsigned int
skb_zerocopy_headlen(const struct sk_buff *from)
{
        unsigned int hlen = 0;

        if (!from->head_frag ||
            skb_headlen(from) < L1_CACHE_BYTES ||
            skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
                hlen = skb_headlen(from);
                if (!hlen)
                        hlen = from->len;
        }

        if (skb_has_frag_list(from))
                hlen = from->len;

        return hlen;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);

/**
 *        skb_zerocopy - Zero copy skb to skb
 *        @to: destination buffer
 *        @from: source buffer
 *        @len: number of bytes to copy from source buffer
 *        @hlen: size of linear headroom in destination buffer
 *
 *        Copies up to `len` bytes from `from` to `to` by creating references
 *        to the frags in the source buffer.
 *
 *        The `hlen` as calculated by skb_zerocopy_headlen() specifies the
 *        headroom in the `to` buffer.
 *
 *        Return value:
 *        0: everything is OK
 *        -ENOMEM: couldn't orphan frags of @from due to lack of memory
 *        -EFAULT: skb_copy_bits() found some problem with skb geometry
 */
int
skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
{
        int i, j = 0;
        int plen = 0; /* length of skb->head fragment */
        int ret;
        struct page *page;
        unsigned int offset;

        BUG_ON(!from->head_frag && !hlen);

        /* dont bother with small payloads */
        if (len <= skb_tailroom(to))
                return skb_copy_bits(from, 0, skb_put(to, len), len);

        if (hlen) {
                ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
                if (unlikely(ret))
                        return ret;
                len -= hlen;
        } else {
                plen = min_t(int, skb_headlen(from), len);
                if (plen) {
                        page = virt_to_head_page(from->head);
                        offset = from->data - (unsigned char *)page_address(page);
                        __skb_fill_netmem_desc(to, 0, page_to_netmem(page),
                                               offset, plen);
                        get_page(page);
                        j = 1;
                        len -= plen;
                }
        }

        skb_len_add(to, len + plen);

        if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
                skb_tx_error(from);
                return -ENOMEM;
        }
        skb_zerocopy_clone(to, from, GFP_ATOMIC);

        for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
                int size;

                if (!len)
                        break;
                skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
                size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
                                        len);
                skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
                len -= size;
                skb_frag_ref(to, j);
                j++;
        }
        skb_shinfo(to)->nr_frags = j;

        return 0;
}
EXPORT_SYMBOL_GPL(skb_zerocopy);

void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
        __wsum csum;
        long csstart;

        if (skb->ip_summed == CHECKSUM_PARTIAL)
                csstart = skb_checksum_start_offset(skb);
        else
                csstart = skb_headlen(skb);

        BUG_ON(csstart > skb_headlen(skb));

        skb_copy_from_linear_data(skb, to, csstart);

        csum = 0;
        if (csstart != skb->len)
                csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
                                              skb->len - csstart);

        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                long csstuff = csstart + skb->csum_offset;

                *((__sum16 *)(to + csstuff)) = csum_fold(csum);
        }
}
EXPORT_SYMBOL(skb_copy_and_csum_dev);

/**
 *        skb_dequeue - remove from the head of the queue
 *        @list: list to dequeue from
 *
 *        Remove the head of the list. The list lock is taken so the function
 *        may be used safely with other locking list functions. The head item is
 *        returned or %NULL if the list is empty.
 */

struct sk_buff *skb_dequeue(struct sk_buff_head *list)
{
        unsigned long flags;
        struct sk_buff *result;

        spin_lock_irqsave(&list->lock, flags);
        result = __skb_dequeue(list);
        spin_unlock_irqrestore(&list->lock, flags);
        return result;
}
EXPORT_SYMBOL(skb_dequeue);

/**
 *        skb_dequeue_tail - remove from the tail of the queue
 *        @list: list to dequeue from
 *
 *        Remove the tail of the list. The list lock is taken so the function
 *        may be used safely with other locking list functions. The tail item is
 *        returned or %NULL if the list is empty.
 */
struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
{
        unsigned long flags;
        struct sk_buff *result;

        spin_lock_irqsave(&list->lock, flags);
        result = __skb_dequeue_tail(list);
        spin_unlock_irqrestore(&list->lock, flags);
        return result;
}
EXPORT_SYMBOL(skb_dequeue_tail);

/**
 *        skb_queue_purge_reason - empty a list
 *        @list: list to empty
 *        @reason: drop reason
 *
 *        Delete all buffers on an &sk_buff list. Each buffer is removed from
 *        the list and one reference dropped. This function takes the list
 *        lock and is atomic with respect to other list locking functions.
 */
void skb_queue_purge_reason(struct sk_buff_head *list,
                            enum skb_drop_reason reason)
{
        struct sk_buff_head tmp;
        unsigned long flags;

        if (skb_queue_empty_lockless(list))
                return;

        __skb_queue_head_init(&tmp);

        spin_lock_irqsave(&list->lock, flags);
        skb_queue_splice_init(list, &tmp);
        spin_unlock_irqrestore(&list->lock, flags);

        __skb_queue_purge_reason(&tmp, reason);
}
EXPORT_SYMBOL(skb_queue_purge_reason);

/**
 *        skb_rbtree_purge - empty a skb rbtree
 *        @root: root of the rbtree to empty
 *        Return value: the sum of truesizes of all purged skbs.
 *
 *        Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
 *        the list and one reference dropped. This function does not take
 *        any lock. Synchronization should be handled by the caller (e.g., TCP
 *        out-of-order queue is protected by the socket lock).
 */
unsigned int skb_rbtree_purge(struct rb_root *root)
{
        struct rb_node *p = rb_first(root);
        unsigned int sum = 0;

        while (p) {
                struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);

                p = rb_next(p);
                rb_erase(&skb->rbnode, root);
                sum += skb->truesize;
                kfree_skb(skb);
        }
        return sum;
}

void skb_errqueue_purge(struct sk_buff_head *list)
{
        struct sk_buff *skb, *next;
        struct sk_buff_head kill;
        unsigned long flags;

        __skb_queue_head_init(&kill);

        spin_lock_irqsave(&list->lock, flags);
        skb_queue_walk_safe(list, skb, next) {
                if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY ||
                    SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
                        continue;
                __skb_unlink(skb, list);
                __skb_queue_tail(&kill, skb);
        }
        spin_unlock_irqrestore(&list->lock, flags);
        __skb_queue_purge(&kill);
}
EXPORT_SYMBOL(skb_errqueue_purge);

/**
 *        skb_queue_head - queue a buffer at the list head
 *        @list: list to use
 *        @newsk: buffer to queue
 *
 *        Queue a buffer at the start of the list. This function takes the
 *        list lock and can be used safely with other locking &sk_buff functions
 *        safely.
 *
 *        A buffer cannot be placed on two lists at the same time.
 */
void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
{
        unsigned long flags;

        spin_lock_irqsave(&list->lock, flags);
        __skb_queue_head(list, newsk);
        spin_unlock_irqrestore(&list->lock, flags);
}
EXPORT_SYMBOL(skb_queue_head);

/**
 *        skb_queue_tail - queue a buffer at the list tail
 *        @list: list to use
 *        @newsk: buffer to queue
 *
 *        Queue a buffer at the tail of the list. This function takes the
 *        list lock and can be used safely with other locking &sk_buff functions
 *        safely.
 *
 *        A buffer cannot be placed on two lists at the same time.
 */
void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
{
        unsigned long flags;

        spin_lock_irqsave(&list->lock, flags);
        __skb_queue_tail(list, newsk);
        spin_unlock_irqrestore(&list->lock, flags);
}
EXPORT_SYMBOL(skb_queue_tail);

/**
 *        skb_unlink        -        remove a buffer from a list
 *        @skb: buffer to remove
 *        @list: list to use
 *
 *        Remove a packet from a list. The list locks are taken and this
 *        function is atomic with respect to other list locked calls
 *
 *        You must know what list the SKB is on.
 */
void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
{
        unsigned long flags;

        spin_lock_irqsave(&list->lock, flags);
        __skb_unlink(skb, list);
        spin_unlock_irqrestore(&list->lock, flags);
}
EXPORT_SYMBOL(skb_unlink);

/**
 *        skb_append        -        append a buffer
 *        @old: buffer to insert after
 *        @newsk: buffer to insert
 *        @list: list to use
 *
 *        Place a packet after a given packet in a list. The list locks are taken
 *        and this function is atomic with respect to other list locked calls.
 *        A buffer cannot be placed on two lists at the same time.
 */
void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
        unsigned long flags;

        spin_lock_irqsave(&list->lock, flags);
        __skb_queue_after(list, old, newsk);
        spin_unlock_irqrestore(&list->lock, flags);
}
EXPORT_SYMBOL(skb_append);

static inline void skb_split_inside_header(struct sk_buff *skb,
                                           struct sk_buff* skb1,
                                           const u32 len, const int pos)
{
        int i;

        skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
                                         pos - len);
        /* And move data appendix as is. */
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];

        skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
        skb_shinfo(skb)->nr_frags  = 0;
        skb1->data_len                   = skb->data_len;
        skb1->len                   += skb1->data_len;
        skb->data_len                   = 0;
        skb->len                   = len;
        skb_set_tail_pointer(skb, len);
}

static inline void skb_split_no_header(struct sk_buff *skb,
                                       struct sk_buff* skb1,
                                       const u32 len, int pos)
{
        int i, k = 0;
        const int nfrags = skb_shinfo(skb)->nr_frags;

        skb_shinfo(skb)->nr_frags = 0;
        skb1->len                  = skb1->data_len = skb->len - len;
        skb->len                  = len;
        skb->data_len                  = len - pos;

        for (i = 0; i < nfrags; i++) {
                int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);

                if (pos + size > len) {
                        skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];

                        if (pos < len) {
                                /* Split frag.
                                 * We have two variants in this case:
                                 * 1. Move all the frag to the second
                                 *    part, if it is possible. F.e.
                                 *    this approach is mandatory for TUX,
                                 *    where splitting is expensive.
                                 * 2. Split is accurately. We make this.
                                 */
                                skb_frag_ref(skb, i);
                                skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
                                skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
                                skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
                                skb_shinfo(skb)->nr_frags++;
                        }
                        k++;
                } else
                        skb_shinfo(skb)->nr_frags++;
                pos += size;
        }
        skb_shinfo(skb1)->nr_frags = k;
}

/**
 * skb_split - Split fragmented skb to two parts at length len.
 * @skb: the buffer to split
 * @skb1: the buffer to receive the second part
 * @len: new length for skb
 */
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
        int pos = skb_headlen(skb);
        const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;

        skb_zcopy_downgrade_managed(skb);

        skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
        skb_zerocopy_clone(skb1, skb, 0);
        if (len < pos)        /* Split line is inside header. */
                skb_split_inside_header(skb, skb1, len, pos);
        else                /* Second chunk has no header, nothing to copy. */
                skb_split_no_header(skb, skb1, len, pos);
}
EXPORT_SYMBOL(skb_split);

/* Shifting from/to a cloned skb is a no-go.
 *
 * Caller cannot keep skb_shinfo related pointers past calling here!
 */
static int skb_prepare_for_shift(struct sk_buff *skb)
{
        return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
}

/**
 * skb_shift - Shifts paged data partially from skb to another
 * @tgt: buffer into which tail data gets added
 * @skb: buffer from which the paged data comes from
 * @shiftlen: shift up to this many bytes
 *
 * Attempts to shift up to shiftlen worth of bytes, which may be less than
 * the length of the skb, from skb to tgt. Returns number bytes shifted.
 * It's up to caller to free skb if everything was shifted.
 *
 * If @tgt runs out of frags, the whole operation is aborted.
 *
 * Skb cannot include anything else but paged data while tgt is allowed
 * to have non-paged data as well.
 *
 * TODO: full sized shift could be optimized but that would need
 * specialized skb free'er to handle frags without up-to-date nr_frags.
 */
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
{
        int from, to, merge, todo;
        skb_frag_t *fragfrom, *fragto;

        BUG_ON(shiftlen > skb->len);

        if (skb_headlen(skb))
                return 0;
        if (skb_zcopy(tgt) || skb_zcopy(skb))
                return 0;

        todo = shiftlen;
        from = 0;
        to = skb_shinfo(tgt)->nr_frags;
        fragfrom = &skb_shinfo(skb)->frags[from];

        /* Actual merge is delayed until the point when we know we can
         * commit all, so that we don't have to undo partial changes
         */
        if (!to ||
            !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
                              skb_frag_off(fragfrom))) {
                merge = -1;
        } else {
                merge = to - 1;

                todo -= skb_frag_size(fragfrom);
                if (todo < 0) {
                        if (skb_prepare_for_shift(skb) ||
                            skb_prepare_for_shift(tgt))
                                return 0;

                        /* All previous frag pointers might be stale! */
                        fragfrom = &skb_shinfo(skb)->frags[from];
                        fragto = &skb_shinfo(tgt)->frags[merge];

                        skb_frag_size_add(fragto, shiftlen);
                        skb_frag_size_sub(fragfrom, shiftlen);
                        skb_frag_off_add(fragfrom, shiftlen);

                        goto onlymerged;
                }

                from++;
        }

        /* Skip full, not-fitting skb to avoid expensive operations */
        if ((shiftlen == skb->len) &&
            (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
                return 0;

        if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
                return 0;

        while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
                if (to == MAX_SKB_FRAGS)
                        return 0;

                fragfrom = &skb_shinfo(skb)->frags[from];
                fragto = &skb_shinfo(tgt)->frags[to];

                if (todo >= skb_frag_size(fragfrom)) {
                        *fragto = *fragfrom;
                        todo -= skb_frag_size(fragfrom);
                        from++;
                        to++;

                } else {
                        __skb_frag_ref(fragfrom);
                        skb_frag_page_copy(fragto, fragfrom);
                        skb_frag_off_copy(fragto, fragfrom);
                        skb_frag_size_set(fragto, todo);

                        skb_frag_off_add(fragfrom, todo);
                        skb_frag_size_sub(fragfrom, todo);
                        todo = 0;

                        to++;
                        break;
                }
        }

        /* Ready to "commit" this state change to tgt */
        skb_shinfo(tgt)->nr_frags = to;

        if (merge >= 0) {
                fragfrom = &skb_shinfo(skb)->frags[0];
                fragto = &skb_shinfo(tgt)->frags[merge];

                skb_frag_size_add(fragto, skb_frag_size(fragfrom));
                __skb_frag_unref(fragfrom, skb->pp_recycle);
        }

        /* Reposition in the original skb */
        to = 0;
        while (from < skb_shinfo(skb)->nr_frags)
                skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
        skb_shinfo(skb)->nr_frags = to;

        BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);

onlymerged:
        /* Most likely the tgt won't ever need its checksum anymore, skb on
         * the other hand might need it if it needs to be resent
         */
        tgt->ip_summed = CHECKSUM_PARTIAL;
        skb->ip_summed = CHECKSUM_PARTIAL;

        skb_len_add(skb, -shiftlen);
        skb_len_add(tgt, shiftlen);

        return shiftlen;
}

/**
 * skb_prepare_seq_read - Prepare a sequential read of skb data
 * @skb: the buffer to read
 * @from: lower offset of data to be read
 * @to: upper offset of data to be read
 * @st: state variable
 *
 * Initializes the specified state variable. Must be called before
 * invoking skb_seq_read() for the first time.
 */
void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
                          unsigned int to, struct skb_seq_state *st)
{
        st->lower_offset = from;
        st->upper_offset = to;
        st->root_skb = st->cur_skb = skb;
        st->frag_idx = st->stepped_offset = 0;
        st->frag_data = NULL;
        st->frag_off = 0;
}
EXPORT_SYMBOL(skb_prepare_seq_read);

/**
 * skb_seq_read - Sequentially read skb data
 * @consumed: number of bytes consumed by the caller so far
 * @data: destination pointer for data to be returned
 * @st: state variable
 *
 * Reads a block of skb data at @consumed relative to the
 * lower offset specified to skb_prepare_seq_read(). Assigns
 * the head of the data block to @data and returns the length
 * of the block or 0 if the end of the skb data or the upper
 * offset has been reached.
 *
 * The caller is not required to consume all of the data
 * returned, i.e. @consumed is typically set to the number
 * of bytes already consumed and the next call to
 * skb_seq_read() will return the remaining part of the block.
 *
 * Note 1: The size of each block of data returned can be arbitrary,
 *       this limitation is the cost for zerocopy sequential
 *       reads of potentially non linear data.
 *
 * Note 2: Fragment lists within fragments are not implemented
 *       at the moment, state->root_skb could be replaced with
 *       a stack for this purpose.
 */
unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
                          struct skb_seq_state *st)
{
        unsigned int block_limit, abs_offset = consumed + st->lower_offset;
        skb_frag_t *frag;

        if (unlikely(abs_offset >= st->upper_offset)) {
                if (st->frag_data) {
                        kunmap_atomic(st->frag_data);
                        st->frag_data = NULL;
                }
                return 0;
        }

next_skb:
        block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;

        if (abs_offset < block_limit && !st->frag_data) {
                *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
                return block_limit - abs_offset;
        }

        if (st->frag_idx == 0 && !st->frag_data)
                st->stepped_offset += skb_headlen(st->cur_skb);

        while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
                unsigned int pg_idx, pg_off, pg_sz;

                frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];

                pg_idx = 0;
                pg_off = skb_frag_off(frag);
                pg_sz = skb_frag_size(frag);

                if (skb_frag_must_loop(skb_frag_page(frag))) {
                        pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
                        pg_off = offset_in_page(pg_off + st->frag_off);
                        pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
                                                    PAGE_SIZE - pg_off);
                }

                block_limit = pg_sz + st->stepped_offset;
                if (abs_offset < block_limit) {
                        if (!st->frag_data)
                                st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx);

                        *data = (u8 *)st->frag_data + pg_off +
                                (abs_offset - st->stepped_offset);

                        return block_limit - abs_offset;
                }

                if (st->frag_data) {
                        kunmap_atomic(st->frag_data);
                        st->frag_data = NULL;
                }

                st->stepped_offset += pg_sz;
                st->frag_off += pg_sz;
                if (st->frag_off == skb_frag_size(frag)) {
                        st->frag_off = 0;
                        st->frag_idx++;
                }
        }

        if (st->frag_data) {
                kunmap_atomic(st->frag_data);
                st->frag_data = NULL;
        }

        if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
                st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
                st->frag_idx = 0;
                goto next_skb;
        } else if (st->cur_skb->next) {
                st->cur_skb = st->cur_skb->next;
                st->frag_idx = 0;
                goto next_skb;
        }

        return 0;
}
EXPORT_SYMBOL(skb_seq_read);

/**
 * skb_abort_seq_read - Abort a sequential read of skb data
 * @st: state variable
 *
 * Must be called if skb_seq_read() was not called until it
 * returned 0.
 */
void skb_abort_seq_read(struct skb_seq_state *st)
{
        if (st->frag_data)
                kunmap_atomic(st->frag_data);
}
EXPORT_SYMBOL(skb_abort_seq_read);

#define TS_SKB_CB(state)        ((struct skb_seq_state *) &((state)->cb))

static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
                                          struct ts_config *conf,
                                          struct ts_state *state)
{
        return skb_seq_read(offset, text, TS_SKB_CB(state));
}

static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
{
        skb_abort_seq_read(TS_SKB_CB(state));
}

/**
 * skb_find_text - Find a text pattern in skb data
 * @skb: the buffer to look in
 * @from: search offset
 * @to: search limit
 * @config: textsearch configuration
 *
 * Finds a pattern in the skb data according to the specified
 * textsearch configuration. Use textsearch_next() to retrieve
 * subsequent occurrences of the pattern. Returns the offset
 * to the first occurrence or UINT_MAX if no match was found.
 */
unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
                           unsigned int to, struct ts_config *config)
{
        unsigned int patlen = config->ops->get_pattern_len(config);
        struct ts_state state;
        unsigned int ret;

        BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));

        config->get_next_block = skb_ts_get_next_block;
        config->finish = skb_ts_finish;

        skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));

        ret = textsearch_find(config, &state);
        return (ret + patlen <= to - from ? ret : UINT_MAX);
}
EXPORT_SYMBOL(skb_find_text);

int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
                         int offset, size_t size, size_t max_frags)
{
        int i = skb_shinfo(skb)->nr_frags;

        if (skb_can_coalesce(skb, i, page, offset)) {
                skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
        } else if (i < max_frags) {
                skb_zcopy_downgrade_managed(skb);
                get_page(page);
                skb_fill_page_desc_noacc(skb, i, page, offset, size);
        } else {
                return -EMSGSIZE;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(skb_append_pagefrags);

/**
 *        skb_pull_rcsum - pull skb and update receive checksum
 *        @skb: buffer to update
 *        @len: length of data pulled
 *
 *        This function performs an skb_pull on the packet and updates
 *        the CHECKSUM_COMPLETE checksum.  It should be used on
 *        receive path processing instead of skb_pull unless you know
 *        that the checksum difference is zero (e.g., a valid IP header)
 *        or you are setting ip_summed to CHECKSUM_NONE.
 */
void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
{
        unsigned char *data = skb->data;

        BUG_ON(len > skb->len);
        __skb_pull(skb, len);
        skb_postpull_rcsum(skb, data, len);
        return skb->data;
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);

static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
{
        skb_frag_t head_frag;
        struct page *page;

        page = virt_to_head_page(frag_skb->head);
        skb_frag_fill_page_desc(&head_frag, page, frag_skb->data -
                                (unsigned char *)page_address(page),
                                skb_headlen(frag_skb));
        return head_frag;
}

struct sk_buff *skb_segment_list(struct sk_buff *skb,
                                 netdev_features_t features,
                                 unsigned int offset)
{
        struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
        unsigned int tnl_hlen = skb_tnl_header_len(skb);
        unsigned int delta_truesize = 0;
        unsigned int delta_len = 0;
        struct sk_buff *tail = NULL;
        struct sk_buff *nskb, *tmp;
        int len_diff, err;

        skb_push(skb, -skb_network_offset(skb) + offset);

        /* Ensure the head is writeable before touching the shared info */
        err = skb_unclone(skb, GFP_ATOMIC);
        if (err)
                goto err_linearize;

        skb_shinfo(skb)->frag_list = NULL;

        while (list_skb) {
                nskb = list_skb;
                list_skb = list_skb->next;

                err = 0;
                delta_truesize += nskb->truesize;
                if (skb_shared(nskb)) {
                        tmp = skb_clone(nskb, GFP_ATOMIC);
                        if (tmp) {
                                consume_skb(nskb);
                                nskb = tmp;
                                err = skb_unclone(nskb, GFP_ATOMIC);
                        } else {
                                err = -ENOMEM;
                        }
                }

                if (!tail)
                        skb->next = nskb;
                else
                        tail->next = nskb;

                if (unlikely(err)) {
                        nskb->next = list_skb;
                        goto err_linearize;
                }

                tail = nskb;

                delta_len += nskb->len;

                skb_push(nskb, -skb_network_offset(nskb) + offset);

                skb_release_head_state(nskb);
                len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
                __copy_skb_header(nskb, skb);

                skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
                nskb->transport_header += len_diff;
                skb_copy_from_linear_data_offset(skb, -tnl_hlen,
                                                 nskb->data - tnl_hlen,
                                                 offset + tnl_hlen);

                if (skb_needs_linearize(nskb, features) &&
                    __skb_linearize(nskb))
                        goto err_linearize;
        }

        skb->truesize = skb->truesize - delta_truesize;
        skb->data_len = skb->data_len - delta_len;
        skb->len = skb->len - delta_len;

        skb_gso_reset(skb);

        skb->prev = tail;

        if (skb_needs_linearize(skb, features) &&
            __skb_linearize(skb))
                goto err_linearize;

        skb_get(skb);

        return skb;

err_linearize:
        kfree_skb_list(skb->next);
        skb->next = NULL;
        return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(skb_segment_list);

/**
 *        skb_segment - Perform protocol segmentation on skb.
 *        @head_skb: buffer to segment
 *        @features: features for the output path (see dev->features)
 *
 *        This function performs segmentation on the given skb.  It returns
 *        a pointer to the first in a list of new skbs for the segments.
 *        In case of error it returns ERR_PTR(err).
 */
struct sk_buff *skb_segment(struct sk_buff *head_skb,
                            netdev_features_t features)
{
        struct sk_buff *segs = NULL;
        struct sk_buff *tail = NULL;
        struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
        unsigned int mss = skb_shinfo(head_skb)->gso_size;
        unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
        unsigned int offset = doffset;
        unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
        unsigned int partial_segs = 0;
        unsigned int headroom;
        unsigned int len = head_skb->len;
        struct sk_buff *frag_skb;
        skb_frag_t *frag;
        __be16 proto;
        bool csum, sg;
        int err = -ENOMEM;
        int i = 0;
        int nfrags, pos;

        if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
            mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
                struct sk_buff *check_skb;

                for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
                        if (skb_headlen(check_skb) && !check_skb->head_frag) {
                                /* gso_size is untrusted, and we have a frag_list with
                                 * a linear non head_frag item.
                                 *
                                 * If head_skb's headlen does not fit requested gso_size,
                                 * it means that the frag_list members do NOT terminate
                                 * on exact gso_size boundaries. Hence we cannot perform
                                 * skb_frag_t page sharing. Therefore we must fallback to
                                 * copying the frag_list skbs; we do so by disabling SG.
                                 */
                                features &= ~NETIF_F_SG;
                                break;
                        }
                }
        }

        __skb_push(head_skb, doffset);
        proto = skb_network_protocol(head_skb, NULL);
        if (unlikely(!proto))
                return ERR_PTR(-EINVAL);

        sg = !!(features & NETIF_F_SG);
        csum = !!can_checksum_protocol(features, proto);

        if (sg && csum && (mss != GSO_BY_FRAGS))  {
                if (!(features & NETIF_F_GSO_PARTIAL)) {
                        struct sk_buff *iter;
                        unsigned int frag_len;

                        if (!list_skb ||
                            !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
                                goto normal;

                        /* If we get here then all the required
                         * GSO features except frag_list are supported.
                         * Try to split the SKB to multiple GSO SKBs
                         * with no frag_list.
                         * Currently we can do that only when the buffers don't
                         * have a linear part and all the buffers except
                         * the last are of the same length.
                         */
                        frag_len = list_skb->len;
                        skb_walk_frags(head_skb, iter) {
                                if (frag_len != iter->len && iter->next)
                                        goto normal;
                                if (skb_headlen(iter) && !iter->head_frag)
                                        goto normal;

                                len -= iter->len;
                        }

                        if (len != frag_len)
                                goto normal;
                }

                /* GSO partial only requires that we trim off any excess that
                 * doesn't fit into an MSS sized block, so take care of that
                 * now.
                 * Cap len to not accidentally hit GSO_BY_FRAGS.
                 */
                partial_segs = min(len, GSO_BY_FRAGS - 1) / mss;
                if (partial_segs > 1)
                        mss *= partial_segs;
                else
                        partial_segs = 0;
        }

normal:
        headroom = skb_headroom(head_skb);
        pos = skb_headlen(head_skb);

        if (skb_orphan_frags(head_skb, GFP_ATOMIC))
                return ERR_PTR(-ENOMEM);

        nfrags = skb_shinfo(head_skb)->nr_frags;
        frag = skb_shinfo(head_skb)->frags;
        frag_skb = head_skb;

        do {
                struct sk_buff *nskb;
                skb_frag_t *nskb_frag;
                int hsize;
                int size;

                if (unlikely(mss == GSO_BY_FRAGS)) {
                        len = list_skb->len;
                } else {
                        len = head_skb->len - offset;
                        if (len > mss)
                                len = mss;
                }

                hsize = skb_headlen(head_skb) - offset;

                if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) &&
                    (skb_headlen(list_skb) == len || sg)) {
                        BUG_ON(skb_headlen(list_skb) > len);

                        nskb = skb_clone(list_skb, GFP_ATOMIC);
                        if (unlikely(!nskb))
                                goto err;

                        i = 0;
                        nfrags = skb_shinfo(list_skb)->nr_frags;
                        frag = skb_shinfo(list_skb)->frags;
                        frag_skb = list_skb;
                        pos += skb_headlen(list_skb);

                        while (pos < offset + len) {
                                BUG_ON(i >= nfrags);

                                size = skb_frag_size(frag);
                                if (pos + size > offset + len)
                                        break;

                                i++;
                                pos += size;
                                frag++;
                        }

                        list_skb = list_skb->next;

                        if (unlikely(pskb_trim(nskb, len))) {
                                kfree_skb(nskb);
                                goto err;
                        }

                        hsize = skb_end_offset(nskb);
                        if (skb_cow_head(nskb, doffset + headroom)) {
                                kfree_skb(nskb);
                                goto err;
                        }

                        nskb->truesize += skb_end_offset(nskb) - hsize;
                        skb_release_head_state(nskb);
                        __skb_push(nskb, doffset);
                } else {
                        if (hsize < 0)
                                hsize = 0;
                        if (hsize > len || !sg)
                                hsize = len;

                        nskb = __alloc_skb(hsize + doffset + headroom,
                                           GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
                                           NUMA_NO_NODE);

                        if (unlikely(!nskb))
                                goto err;

                        skb_reserve(nskb, headroom);
                        __skb_put(nskb, doffset);
                }

                if (segs)
                        tail->next = nskb;
                else
                        segs = nskb;
                tail = nskb;

                __copy_skb_header(nskb, head_skb);

                skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
                skb_reset_mac_len(nskb);

                skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
                                                 nskb->data - tnl_hlen,
                                                 doffset + tnl_hlen);

                if (nskb->len == len + doffset)
                        goto perform_csum_check;

                if (!sg) {
                        if (!csum) {
                                if (!nskb->remcsum_offload)
                                        nskb->ip_summed = CHECKSUM_NONE;
                                SKB_GSO_CB(nskb)->csum =
                                        skb_copy_and_csum_bits(head_skb, offset,
                                                               skb_put(nskb,
                                                                       len),
                                                               len);
                                SKB_GSO_CB(nskb)->csum_start =
                                        skb_headroom(nskb) + doffset;
                        } else {
                                if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
                                        goto err;
                        }
                        continue;
                }

                nskb_frag = skb_shinfo(nskb)->frags;

                skb_copy_from_linear_data_offset(head_skb, offset,
                                                 skb_put(nskb, hsize), hsize);

                skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
                                           SKBFL_SHARED_FRAG;

                if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
                        goto err;

                while (pos < offset + len) {
                        if (i >= nfrags) {
                                if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
                                    skb_zerocopy_clone(nskb, list_skb,
                                                       GFP_ATOMIC))
                                        goto err;

                                i = 0;
                                nfrags = skb_shinfo(list_skb)->nr_frags;
                                frag = skb_shinfo(list_skb)->frags;
                                frag_skb = list_skb;
                                if (!skb_headlen(list_skb)) {
                                        BUG_ON(!nfrags);
                                } else {
                                        BUG_ON(!list_skb->head_frag);

                                        /* to make room for head_frag. */
                                        i--;
                                        frag--;
                                }

                                list_skb = list_skb->next;
                        }

                        if (unlikely(skb_shinfo(nskb)->nr_frags >=
                                     MAX_SKB_FRAGS)) {
                                net_warn_ratelimited(
                                        "skb_segment: too many frags: %u %u\n",
                                        pos, mss);
                                err = -EINVAL;
                                goto err;
                        }

                        *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
                        __skb_frag_ref(nskb_frag);
                        size = skb_frag_size(nskb_frag);

                        if (pos < offset) {
                                skb_frag_off_add(nskb_frag, offset - pos);
                                skb_frag_size_sub(nskb_frag, offset - pos);
                        }

                        skb_shinfo(nskb)->nr_frags++;

                        if (pos + size <= offset + len) {
                                i++;
                                frag++;
                                pos += size;
                        } else {
                                skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
                                goto skip_fraglist;
                        }

                        nskb_frag++;
                }

skip_fraglist:
                nskb->data_len = len - hsize;
                nskb->len += nskb->data_len;
                nskb->truesize += nskb->data_len;

perform_csum_check:
                if (!csum) {
                        if (skb_has_shared_frag(nskb) &&
                            __skb_linearize(nskb))
                                goto err;

                        if (!nskb->remcsum_offload)
                                nskb->ip_summed = CHECKSUM_NONE;
                        SKB_GSO_CB(nskb)->csum =
                                skb_checksum(nskb, doffset,
                                             nskb->len - doffset, 0);
                        SKB_GSO_CB(nskb)->csum_start =
                                skb_headroom(nskb) + doffset;
                }
        } while ((offset += len) < head_skb->len);

        /* Some callers want to get the end of the list.
         * Put it in segs->prev to avoid walking the list.
         * (see validate_xmit_skb_list() for example)
         */
        segs->prev = tail;

        if (partial_segs) {
                struct sk_buff *iter;
                int type = skb_shinfo(head_skb)->gso_type;
                unsigned short gso_size = skb_shinfo(head_skb)->gso_size;

                /* Update type to add partial and then remove dodgy if set */
                type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
                type &= ~SKB_GSO_DODGY;

                /* Update GSO info and prepare to start updating headers on
                 * our way back down the stack of protocols.
                 */
                for (iter = segs; iter; iter = iter->next) {
                        skb_shinfo(iter)->gso_size = gso_size;
                        skb_shinfo(iter)->gso_segs = partial_segs;
                        skb_shinfo(iter)->gso_type = type;
                        SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
                }

                if (tail->len - doffset <= gso_size)
                        skb_shinfo(tail)->gso_size = 0;
                else if (tail != segs)
                        skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
        }

        /* Following permits correct backpressure, for protocols
         * using skb_set_owner_w().
         * Idea is to tranfert ownership from head_skb to last segment.
         */
        if (head_skb->destructor == sock_wfree) {
                swap(tail->truesize, head_skb->truesize);
                swap(tail->destructor, head_skb->destructor);
                swap(tail->sk, head_skb->sk);
        }
        return segs;

err:
        kfree_skb_list(segs);
        return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(skb_segment);

#ifdef CONFIG_SKB_EXTENSIONS
#define SKB_EXT_ALIGN_VALUE        8
#define SKB_EXT_CHUNKSIZEOF(x)        (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)

static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
#endif
#ifdef CONFIG_XFRM
        [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
#endif
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
        [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
#endif
#if IS_ENABLED(CONFIG_MPTCP)
        [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
#endif
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
        [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
#endif
};

static __always_inline unsigned int skb_ext_total_length(void)
{
        unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
        int i;

        for (i = 0; i < ARRAY_SIZE(skb_ext_type_len); i++)
                l += skb_ext_type_len[i];

        return l;
}

static void skb_extensions_init(void)
{
        BUILD_BUG_ON(SKB_EXT_NUM >= 8);
#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL)
        BUILD_BUG_ON(skb_ext_total_length() > 255);
#endif

        skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
                                             SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
                                             0,
                                             SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                             NULL);
}
#else
static void skb_extensions_init(void) {}
#endif

/* The SKB kmem_cache slab is critical for network performance.  Never
 * merge/alias the slab with similar sized objects.  This avoids fragmentation
 * that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
 */
#ifndef CONFIG_SLUB_TINY
#define FLAG_SKB_NO_MERGE        SLAB_NO_MERGE
#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
#define FLAG_SKB_NO_MERGE        0
#endif

void __init skb_init(void)
{
        net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
                                              sizeof(struct sk_buff),
                                              0,
                                              SLAB_HWCACHE_ALIGN|SLAB_PANIC|
                                                FLAG_SKB_NO_MERGE,
                                              offsetof(struct sk_buff, cb),
                                              sizeof_field(struct sk_buff, cb),
                                              NULL);
        net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
                                                sizeof(struct sk_buff_fclones),
                                                0,
                                                SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                                NULL);
        /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
         * struct skb_shared_info is located at the end of skb->head,
         * and should not be copied to/from user.
         */
        net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
                                                SKB_SMALL_HEAD_CACHE_SIZE,
                                                0,
                                                SLAB_HWCACHE_ALIGN | SLAB_PANIC,
                                                0,
                                                SKB_SMALL_HEAD_HEADROOM,
                                                NULL);
        skb_extensions_init();
}

static int
__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
               unsigned int recursion_level)
{
        int start = skb_headlen(skb);
        int i, copy = start - offset;
        struct sk_buff *frag_iter;
        int elt = 0;

        if (unlikely(recursion_level >= 24))
                return -EMSGSIZE;

        if (copy > 0) {
                if (copy > len)
                        copy = len;
                sg_set_buf(sg, skb->data + offset, copy);
                elt++;
                if ((len -= copy) == 0)
                        return elt;
                offset += copy;
        }

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;

                WARN_ON(start > offset + len);

                end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
                if ((copy = end - offset) > 0) {
                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
                        if (unlikely(elt && sg_is_last(&sg[elt - 1])))
                                return -EMSGSIZE;

                        if (copy > len)
                                copy = len;
                        sg_set_page(&sg[elt], skb_frag_page(frag), copy,
                                    skb_frag_off(frag) + offset - start);
                        elt++;
                        if (!(len -= copy))
                                return elt;
                        offset += copy;
                }
                start = end;
        }

        skb_walk_frags(skb, frag_iter) {
                int end, ret;

                WARN_ON(start > offset + len);

                end = start + frag_iter->len;
                if ((copy = end - offset) > 0) {
                        if (unlikely(elt && sg_is_last(&sg[elt - 1])))
                                return -EMSGSIZE;

                        if (copy > len)
                                copy = len;
                        ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
                                              copy, recursion_level + 1);
                        if (unlikely(ret < 0))
                                return ret;
                        elt += ret;
                        if ((len -= copy) == 0)
                                return elt;
                        offset += copy;
                }
                start = end;
        }
        BUG_ON(len);
        return elt;
}

/**
 *        skb_to_sgvec - Fill a scatter-gather list from a socket buffer
 *        @skb: Socket buffer containing the buffers to be mapped
 *        @sg: The scatter-gather list to map into
 *        @offset: The offset into the buffer's contents to start mapping
 *        @len: Length of buffer space to be mapped
 *
 *        Fill the specified scatter-gather list with mappings/pointers into a
 *        region of the buffer space attached to a socket buffer. Returns either
 *        the number of scatterlist items used, or -EMSGSIZE if the contents
 *        could not fit.
 */
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
        int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);

        if (nsg <= 0)
                return nsg;

        sg_mark_end(&sg[nsg - 1]);

        return nsg;
}
EXPORT_SYMBOL_GPL(skb_to_sgvec);

/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
 * sglist without mark the sg which contain last skb data as the end.
 * So the caller can mannipulate sg list as will when padding new data after
 * the first call without calling sg_unmark_end to expend sg list.
 *
 * Scenario to use skb_to_sgvec_nomark:
 * 1. sg_init_table
 * 2. skb_to_sgvec_nomark(payload1)
 * 3. skb_to_sgvec_nomark(payload2)
 *
 * This is equivalent to:
 * 1. sg_init_table
 * 2. skb_to_sgvec(payload1)
 * 3. sg_unmark_end
 * 4. skb_to_sgvec(payload2)
 *
 * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
 * is more preferable.
 */
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
                        int offset, int len)
{
        return __skb_to_sgvec(skb, sg, offset, len, 0);
}
EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);



/**
 *        skb_cow_data - Check that a socket buffer's data buffers are writable
 *        @skb: The socket buffer to check.
 *        @tailbits: Amount of trailing space to be added
 *        @trailer: Returned pointer to the skb where the @tailbits space begins
 *
 *        Make sure that the data buffers attached to a socket buffer are
 *        writable. If they are not, private copies are made of the data buffers
 *        and the socket buffer is set to use these instead.
 *
 *        If @tailbits is given, make sure that there is space to write @tailbits
 *        bytes of data beyond current end of socket buffer.  @trailer will be
 *        set to point to the skb in which this space begins.
 *
 *        The number of scatterlist elements required to completely map the
 *        COW'd and extended socket buffer will be returned.
 */
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
{
        int copyflag;
        int elt;
        struct sk_buff *skb1, **skb_p;

        /* If skb is cloned or its head is paged, reallocate
         * head pulling out all the pages (pages are considered not writable
         * at the moment even if they are anonymous).
         */
        if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
            !__pskb_pull_tail(skb, __skb_pagelen(skb)))
                return -ENOMEM;

        /* Easy case. Most of packets will go this way. */
        if (!skb_has_frag_list(skb)) {
                /* A little of trouble, not enough of space for trailer.
                 * This should not happen, when stack is tuned to generate
                 * good frames. OK, on miss we reallocate and reserve even more
                 * space, 128 bytes is fair. */

                if (skb_tailroom(skb) < tailbits &&
                    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
                        return -ENOMEM;

                /* Voila! */
                *trailer = skb;
                return 1;
        }

        /* Misery. We are in troubles, going to mincer fragments... */

        elt = 1;
        skb_p = &skb_shinfo(skb)->frag_list;
        copyflag = 0;

        while ((skb1 = *skb_p) != NULL) {
                int ntail = 0;

                /* The fragment is partially pulled by someone,
                 * this can happen on input. Copy it and everything
                 * after it. */

                if (skb_shared(skb1))
                        copyflag = 1;

                /* If the skb is the last, worry about trailer. */

                if (skb1->next == NULL && tailbits) {
                        if (skb_shinfo(skb1)->nr_frags ||
                            skb_has_frag_list(skb1) ||
                            skb_tailroom(skb1) < tailbits)
                                ntail = tailbits + 128;
                }

                if (copyflag ||
                    skb_cloned(skb1) ||
                    ntail ||
                    skb_shinfo(skb1)->nr_frags ||
                    skb_has_frag_list(skb1)) {
                        struct sk_buff *skb2;

                        /* Fuck, we are miserable poor guys... */
                        if (ntail == 0)
                                skb2 = skb_copy(skb1, GFP_ATOMIC);
                        else
                                skb2 = skb_copy_expand(skb1,
                                                       skb_headroom(skb1),
                                                       ntail,
                                                       GFP_ATOMIC);
                        if (unlikely(skb2 == NULL))
                                return -ENOMEM;

                        if (skb1->sk)
                                skb_set_owner_w(skb2, skb1->sk);

                        /* Looking around. Are we still alive?
                         * OK, link new skb, drop old one */

                        skb2->next = skb1->next;
                        *skb_p = skb2;
                        kfree_skb(skb1);
                        skb1 = skb2;
                }
                elt++;
                *trailer = skb1;
                skb_p = &skb1->next;
        }

        return elt;
}
EXPORT_SYMBOL_GPL(skb_cow_data);

static void sock_rmem_free(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;

        atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
}

static void skb_set_err_queue(struct sk_buff *skb)
{
        /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
         * So, it is safe to (mis)use it to mark skbs on the error queue.
         */
        skb->pkt_type = PACKET_OUTGOING;
        BUILD_BUG_ON(PACKET_OUTGOING == 0);
}

/*
 * Note: We dont mem charge error packets (no sk_forward_alloc changes)
 */
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
            (unsigned int)READ_ONCE(sk->sk_rcvbuf))
                return -ENOMEM;

        skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sock_rmem_free;
        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
        skb_set_err_queue(skb);

        /* before exiting rcu section, make sure dst is refcounted */
        skb_dst_force(skb);

        skb_queue_tail(&sk->sk_error_queue, skb);
        if (!sock_flag(sk, SOCK_DEAD))
                sk_error_report(sk);
        return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);

static bool is_icmp_err_skb(const struct sk_buff *skb)
{
        return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
                       SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
}

struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
{
        struct sk_buff_head *q = &sk->sk_error_queue;
        struct sk_buff *skb, *skb_next = NULL;
        bool icmp_next = false;
        unsigned long flags;

        if (skb_queue_empty_lockless(q))
                return NULL;

        spin_lock_irqsave(&q->lock, flags);
        skb = __skb_dequeue(q);
        if (skb && (skb_next = skb_peek(q))) {
                icmp_next = is_icmp_err_skb(skb_next);
                if (icmp_next)
                        sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
        }
        spin_unlock_irqrestore(&q->lock, flags);

        if (is_icmp_err_skb(skb) && !icmp_next)
                sk->sk_err = 0;

        if (skb_next)
                sk_error_report(sk);

        return skb;
}
EXPORT_SYMBOL(sock_dequeue_err_skb);

/**
 * skb_clone_sk - create clone of skb, and take reference to socket
 * @skb: the skb to clone
 *
 * This function creates a clone of a buffer that holds a reference on
 * sk_refcnt.  Buffers created via this function are meant to be
 * returned using sock_queue_err_skb, or free via kfree_skb.
 *
 * When passing buffers allocated with this function to sock_queue_err_skb
 * it is necessary to wrap the call with sock_hold/sock_put in order to
 * prevent the socket from being released prior to being enqueued on
 * the sk_error_queue.
 */
struct sk_buff *skb_clone_sk(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;
        struct sk_buff *clone;

        if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
                return NULL;

        clone = skb_clone(skb, GFP_ATOMIC);
        if (!clone) {
                sock_put(sk);
                return NULL;
        }

        clone->sk = sk;
        clone->destructor = sock_efree;

        return clone;
}
EXPORT_SYMBOL(skb_clone_sk);

static void __skb_complete_tx_timestamp(struct sk_buff *skb,
                                        struct sock *sk,
                                        int tstype,
                                        bool opt_stats)
{
        struct sock_exterr_skb *serr;
        int err;

        BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));

        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        serr->ee.ee_errno = ENOMSG;
        serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
        serr->ee.ee_info = tstype;
        serr->opt_stats = opt_stats;
        serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
        if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
                serr->ee.ee_data = skb_shinfo(skb)->tskey;
                if (sk_is_tcp(sk))
                        serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
        }

        err = sock_queue_err_skb(sk, skb);

        if (err)
                kfree_skb(skb);
}

static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
        bool ret;

        if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
                return true;

        read_lock_bh(&sk->sk_callback_lock);
        ret = sk->sk_socket && sk->sk_socket->file &&
              file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
        read_unlock_bh(&sk->sk_callback_lock);
        return ret;
}

void skb_complete_tx_timestamp(struct sk_buff *skb,
                               struct skb_shared_hwtstamps *hwtstamps)
{
        struct sock *sk = skb->sk;

        if (!skb_may_tx_timestamp(sk, false))
                goto err;

        /* Take a reference to prevent skb_orphan() from freeing the socket,
         * but only if the socket refcount is not zero.
         */
        if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
                *skb_hwtstamps(skb) = *hwtstamps;
                __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
                sock_put(sk);
                return;
        }

err:
        kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);

void __skb_tstamp_tx(struct sk_buff *orig_skb,
                     const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype)
{
        struct sk_buff *skb;
        bool tsonly, opt_stats = false;
        u32 tsflags;

        if (!sk)
                return;

        tsflags = READ_ONCE(sk->sk_tsflags);
        if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
            skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
                return;

        tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
        if (!skb_may_tx_timestamp(sk, tsonly))
                return;

        if (tsonly) {
#ifdef CONFIG_INET
                if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk_is_tcp(sk)) {
                        skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
                                                             ack_skb);
                        opt_stats = true;
                } else
#endif
                        skb = alloc_skb(0, GFP_ATOMIC);
        } else {
                skb = skb_clone(orig_skb, GFP_ATOMIC);

                if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
                        kfree_skb(skb);
                        return;
                }
        }
        if (!skb)
                return;

        if (tsonly) {
                skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags &
                                             SKBTX_ANY_TSTAMP;
                skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
        }

        if (hwtstamps)
                *skb_hwtstamps(skb) = *hwtstamps;
        else
                __net_timestamp(skb);

        __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);

void skb_tstamp_tx(struct sk_buff *orig_skb,
                   struct skb_shared_hwtstamps *hwtstamps)
{
        return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
                               SCM_TSTAMP_SND);
}
EXPORT_SYMBOL_GPL(skb_tstamp_tx);

#ifdef CONFIG_WIRELESS
void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
{
        struct sock *sk = skb->sk;
        struct sock_exterr_skb *serr;
        int err = 1;

        skb->wifi_acked_valid = 1;
        skb->wifi_acked = acked;

        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        serr->ee.ee_errno = ENOMSG;
        serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;

        /* Take a reference to prevent skb_orphan() from freeing the socket,
         * but only if the socket refcount is not zero.
         */
        if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
                err = sock_queue_err_skb(sk, skb);
                sock_put(sk);
        }
        if (err)
                kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
#endif /* CONFIG_WIRELESS */

/**
 * skb_partial_csum_set - set up and verify partial csum values for packet
 * @skb: the skb to set
 * @start: the number of bytes after skb->data to start checksumming.
 * @off: the offset from start to place the checksum.
 *
 * For untrusted partially-checksummed packets, we need to make sure the values
 * for skb->csum_start and skb->csum_offset are valid so we don't oops.
 *
 * This function checks and sets those values and skb->ip_summed: if this
 * returns false you should drop the packet.
 */
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
{
        u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
        u32 csum_start = skb_headroom(skb) + (u32)start;

        if (unlikely(csum_start >= U16_MAX || csum_end > skb_headlen(skb))) {
                net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
                                     start, off, skb_headroom(skb), skb_headlen(skb));
                return false;
        }
        skb->ip_summed = CHECKSUM_PARTIAL;
        skb->csum_start = csum_start;
        skb->csum_offset = off;
        skb->transport_header = csum_start;
        return true;
}
EXPORT_SYMBOL_GPL(skb_partial_csum_set);

static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
                               unsigned int max)
{
        if (skb_headlen(skb) >= len)
                return 0;

        /* If we need to pullup then pullup to the max, so we
         * won't need to do it again.
         */
        if (max > skb->len)
                max = skb->len;

        if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
                return -ENOMEM;

        if (skb_headlen(skb) < len)
                return -EPROTO;

        return 0;
}

#define MAX_TCP_HDR_LEN (15 * 4)

static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
                                      typeof(IPPROTO_IP) proto,
                                      unsigned int off)
{
        int err;

        switch (proto) {
        case IPPROTO_TCP:
                err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
                                          off + MAX_TCP_HDR_LEN);
                if (!err && !skb_partial_csum_set(skb, off,
                                                  offsetof(struct tcphdr,
                                                           check)))
                        err = -EPROTO;
                return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;

        case IPPROTO_UDP:
                err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
                                          off + sizeof(struct udphdr));
                if (!err && !skb_partial_csum_set(skb, off,
                                                  offsetof(struct udphdr,
                                                           check)))
                        err = -EPROTO;
                return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
        }

        return ERR_PTR(-EPROTO);
}

/* This value should be large enough to cover a tagged ethernet header plus
 * maximally sized IP and TCP or UDP headers.
 */
#define MAX_IP_HDR_LEN 128

static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
{
        unsigned int off;
        bool fragment;
        __sum16 *csum;
        int err;

        fragment = false;

        err = skb_maybe_pull_tail(skb,
                                  sizeof(struct iphdr),
                                  MAX_IP_HDR_LEN);
        if (err < 0)
                goto out;

        if (ip_is_fragment(ip_hdr(skb)))
                fragment = true;

        off = ip_hdrlen(skb);

        err = -EPROTO;

        if (fragment)
                goto out;

        csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
        if (IS_ERR(csum))
                return PTR_ERR(csum);

        if (recalculate)
                *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
                                           ip_hdr(skb)->daddr,
                                           skb->len - off,
                                           ip_hdr(skb)->protocol, 0);
        err = 0;

out:
        return err;
}

/* This value should be large enough to cover a tagged ethernet header plus
 * an IPv6 header, all options, and a maximal TCP or UDP header.
 */
#define MAX_IPV6_HDR_LEN 256

#define OPT_HDR(type, skb, off) \
        (type *)(skb_network_header(skb) + (off))

static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
{
        int err;
        u8 nexthdr;
        unsigned int off;
        unsigned int len;
        bool fragment;
        bool done;
        __sum16 *csum;

        fragment = false;
        done = false;

        off = sizeof(struct ipv6hdr);

        err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
        if (err < 0)
                goto out;

        nexthdr = ipv6_hdr(skb)->nexthdr;

        len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
        while (off <= len && !done) {
                switch (nexthdr) {
                case IPPROTO_DSTOPTS:
                case IPPROTO_HOPOPTS:
                case IPPROTO_ROUTING: {
                        struct ipv6_opt_hdr *hp;

                        err = skb_maybe_pull_tail(skb,
                                                  off +
                                                  sizeof(struct ipv6_opt_hdr),
                                                  MAX_IPV6_HDR_LEN);
                        if (err < 0)
                                goto out;

                        hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
                        nexthdr = hp->nexthdr;
                        off += ipv6_optlen(hp);
                        break;
                }
                case IPPROTO_AH: {
                        struct ip_auth_hdr *hp;

                        err = skb_maybe_pull_tail(skb,
                                                  off +
                                                  sizeof(struct ip_auth_hdr),
                                                  MAX_IPV6_HDR_LEN);
                        if (err < 0)
                                goto out;

                        hp = OPT_HDR(struct ip_auth_hdr, skb, off);
                        nexthdr = hp->nexthdr;
                        off += ipv6_authlen(hp);
                        break;
                }
                case IPPROTO_FRAGMENT: {
                        struct frag_hdr *hp;

                        err = skb_maybe_pull_tail(skb,
                                                  off +
                                                  sizeof(struct frag_hdr),
                                                  MAX_IPV6_HDR_LEN);
                        if (err < 0)
                                goto out;

                        hp = OPT_HDR(struct frag_hdr, skb, off);

                        if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
                                fragment = true;

                        nexthdr = hp->nexthdr;
                        off += sizeof(struct frag_hdr);
                        break;
                }
                default:
                        done = true;
                        break;
                }
        }

        err = -EPROTO;

        if (!done || fragment)
                goto out;

        csum = skb_checksum_setup_ip(skb, nexthdr, off);
        if (IS_ERR(csum))
                return PTR_ERR(csum);

        if (recalculate)
                *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
                                         &ipv6_hdr(skb)->daddr,
                                         skb->len - off, nexthdr, 0);
        err = 0;

out:
        return err;
}

/**
 * skb_checksum_setup - set up partial checksum offset
 * @skb: the skb to set up
 * @recalculate: if true the pseudo-header checksum will be recalculated
 */
int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
{
        int err;

        switch (skb->protocol) {
        case htons(ETH_P_IP):
                err = skb_checksum_setup_ipv4(skb, recalculate);
                break;

        case htons(ETH_P_IPV6):
                err = skb_checksum_setup_ipv6(skb, recalculate);
                break;

        default:
                err = -EPROTO;
                break;
        }

        return err;
}
EXPORT_SYMBOL(skb_checksum_setup);

/**
 * skb_checksum_maybe_trim - maybe trims the given skb
 * @skb: the skb to check
 * @transport_len: the data length beyond the network header
 *
 * Checks whether the given skb has data beyond the given transport length.
 * If so, returns a cloned skb trimmed to this transport length.
 * Otherwise returns the provided skb. Returns NULL in error cases
 * (e.g. transport_len exceeds skb length or out-of-memory).
 *
 * Caller needs to set the skb transport header and free any returned skb if it
 * differs from the provided skb.
 */
static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
                                               unsigned int transport_len)
{
        struct sk_buff *skb_chk;
        unsigned int len = skb_transport_offset(skb) + transport_len;
        int ret;

        if (skb->len < len)
                return NULL;
        else if (skb->len == len)
                return skb;

        skb_chk = skb_clone(skb, GFP_ATOMIC);
        if (!skb_chk)
                return NULL;

        ret = pskb_trim_rcsum(skb_chk, len);
        if (ret) {
                kfree_skb(skb_chk);
                return NULL;
        }

        return skb_chk;
}

/**
 * skb_checksum_trimmed - validate checksum of an skb
 * @skb: the skb to check
 * @transport_len: the data length beyond the network header
 * @skb_chkf: checksum function to use
 *
 * Applies the given checksum function skb_chkf to the provided skb.
 * Returns a checked and maybe trimmed skb. Returns NULL on error.
 *
 * If the skb has data beyond the given transport length, then a
 * trimmed & cloned skb is checked and returned.
 *
 * Caller needs to set the skb transport header and free any returned skb if it
 * differs from the provided skb.
 */
struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
                                     unsigned int transport_len,
                                     __sum16(*skb_chkf)(struct sk_buff *skb))
{
        struct sk_buff *skb_chk;
        unsigned int offset = skb_transport_offset(skb);
        __sum16 ret;

        skb_chk = skb_checksum_maybe_trim(skb, transport_len);
        if (!skb_chk)
                goto err;

        if (!pskb_may_pull(skb_chk, offset))
                goto err;

        skb_pull_rcsum(skb_chk, offset);
        ret = skb_chkf(skb_chk);
        skb_push_rcsum(skb_chk, offset);

        if (ret)
                goto err;

        return skb_chk;

err:
        if (skb_chk && skb_chk != skb)
                kfree_skb(skb_chk);

        return NULL;

}
EXPORT_SYMBOL(skb_checksum_trimmed);

void __skb_warn_lro_forwarding(const struct sk_buff *skb)
{
        net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
                             skb->dev->name);
}
EXPORT_SYMBOL(__skb_warn_lro_forwarding);

void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
        if (head_stolen) {
                skb_release_head_state(skb);
                kmem_cache_free(net_hotdata.skbuff_cache, skb);
        } else {
                __kfree_skb(skb);
        }
}
EXPORT_SYMBOL(kfree_skb_partial);

/**
 * skb_try_coalesce - try to merge skb to prior one
 * @to: prior buffer
 * @from: buffer to add
 * @fragstolen: pointer to boolean
 * @delta_truesize: how much more was allocated than was requested
 */
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
                      bool *fragstolen, int *delta_truesize)
{
        struct skb_shared_info *to_shinfo, *from_shinfo;
        int i, delta, len = from->len;

        *fragstolen = false;

        if (skb_cloned(to))
                return false;

        /* In general, avoid mixing page_pool and non-page_pool allocated
         * pages within the same SKB. In theory we could take full
         * references if @from is cloned and !@to->pp_recycle but its
         * tricky (due to potential race with the clone disappearing) and
         * rare, so not worth dealing with.
         */
        if (to->pp_recycle != from->pp_recycle)
                return false;

        if (len <= skb_tailroom(to)) {
                if (len)
                        BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
                *delta_truesize = 0;
                return true;
        }

        to_shinfo = skb_shinfo(to);
        from_shinfo = skb_shinfo(from);
        if (to_shinfo->frag_list || from_shinfo->frag_list)
                return false;
        if (skb_zcopy(to) || skb_zcopy(from))
                return false;

        if (skb_headlen(from) != 0) {
                struct page *page;
                unsigned int offset;

                if (to_shinfo->nr_frags +
                    from_shinfo->nr_frags >= MAX_SKB_FRAGS)
                        return false;

                if (skb_head_is_locked(from))
                        return false;

                delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));

                page = virt_to_head_page(from->head);
                offset = from->data - (unsigned char *)page_address(page);

                skb_fill_page_desc(to, to_shinfo->nr_frags,
                                   page, offset, skb_headlen(from));
                *fragstolen = true;
        } else {
                if (to_shinfo->nr_frags +
                    from_shinfo->nr_frags > MAX_SKB_FRAGS)
                        return false;

                delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
        }

        WARN_ON_ONCE(delta < len);

        memcpy(to_shinfo->frags + to_shinfo->nr_frags,
               from_shinfo->frags,
               from_shinfo->nr_frags * sizeof(skb_frag_t));
        to_shinfo->nr_frags += from_shinfo->nr_frags;

        if (!skb_cloned(from))
                from_shinfo->nr_frags = 0;

        /* if the skb is not cloned this does nothing
         * since we set nr_frags to 0.
         */
        if (skb_pp_frag_ref(from)) {
                for (i = 0; i < from_shinfo->nr_frags; i++)
                        __skb_frag_ref(&from_shinfo->frags[i]);
        }

        to->truesize += delta;
        to->len += len;
        to->data_len += len;

        *delta_truesize = delta;
        return true;
}
EXPORT_SYMBOL(skb_try_coalesce);

/**
 * skb_scrub_packet - scrub an skb
 *
 * @skb: buffer to clean
 * @xnet: packet is crossing netns
 *
 * skb_scrub_packet can be used after encapsulating or decapsulting a packet
 * into/from a tunnel. Some information have to be cleared during these
 * operations.
 * skb_scrub_packet can also be used to clean a skb before injecting it in
 * another namespace (@xnet == true). We have to clear all information in the
 * skb that could impact namespace isolation.
 */
void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
        skb->pkt_type = PACKET_HOST;
        skb->skb_iif = 0;
        skb->ignore_df = 0;
        skb_dst_drop(skb);
        skb_ext_reset(skb);
        nf_reset_ct(skb);
        nf_reset_trace(skb);

#ifdef CONFIG_NET_SWITCHDEV
        skb->offload_fwd_mark = 0;
        skb->offload_l3_fwd_mark = 0;
#endif

        if (!xnet)
                return;

        ipvs_reset(skb);
        skb->mark = 0;
        skb_clear_tstamp(skb);
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);

static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
        int mac_len, meta_len;
        void *meta;

        if (skb_cow(skb, skb_headroom(skb)) < 0) {
                kfree_skb(skb);
                return NULL;
        }

        mac_len = skb->data - skb_mac_header(skb);
        if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
                memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
                        mac_len - VLAN_HLEN - ETH_TLEN);
        }

        meta_len = skb_metadata_len(skb);
        if (meta_len) {
                meta = skb_metadata_end(skb) - meta_len;
                memmove(meta + VLAN_HLEN, meta, meta_len);
        }

        skb->mac_header += VLAN_HLEN;
        return skb;
}

struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
{
        struct vlan_hdr *vhdr;
        u16 vlan_tci;

        if (unlikely(skb_vlan_tag_present(skb))) {
                /* vlan_tci is already set-up so leave this for another time */
                return skb;
        }

        skb = skb_share_check(skb, GFP_ATOMIC);
        if (unlikely(!skb))
                goto err_free;
        /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
        if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
                goto err_free;

        vhdr = (struct vlan_hdr *)skb->data;
        vlan_tci = ntohs(vhdr->h_vlan_TCI);
        __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);

        skb_pull_rcsum(skb, VLAN_HLEN);
        vlan_set_encap_proto(skb, vhdr);

        skb = skb_reorder_vlan_header(skb);
        if (unlikely(!skb))
                goto err_free;

        skb_reset_network_header(skb);
        if (!skb_transport_header_was_set(skb))
                skb_reset_transport_header(skb);
        skb_reset_mac_len(skb);

        return skb;

err_free:
        kfree_skb(skb);
        return NULL;
}
EXPORT_SYMBOL(skb_vlan_untag);

int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
{
        if (!pskb_may_pull(skb, write_len))
                return -ENOMEM;

        if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
                return 0;

        return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
EXPORT_SYMBOL(skb_ensure_writable);

int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev)
{
        int needed_headroom = dev->needed_headroom;
        int needed_tailroom = dev->needed_tailroom;

        /* For tail taggers, we need to pad short frames ourselves, to ensure
         * that the tail tag does not fail at its role of being at the end of
         * the packet, once the conduit interface pads the frame. Account for
         * that pad length here, and pad later.
         */
        if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
                needed_tailroom += ETH_ZLEN - skb->len;
        /* skb_headroom() returns unsigned int... */
        needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
        needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);

        if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
                /* No reallocation needed, yay! */
                return 0;

        return pskb_expand_head(skb, needed_headroom, needed_tailroom,
                                GFP_ATOMIC);
}
EXPORT_SYMBOL(skb_ensure_writable_head_tail);

/* remove VLAN header from packet and update csum accordingly.
 * expects a non skb_vlan_tag_present skb with a vlan tag payload
 */
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{
        int offset = skb->data - skb_mac_header(skb);
        int err;

        if (WARN_ONCE(offset,
                      "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
                      offset)) {
                return -EINVAL;
        }

        err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
        if (unlikely(err))
                return err;

        skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);

        vlan_remove_tag(skb, vlan_tci);

        skb->mac_header += VLAN_HLEN;

        if (skb_network_offset(skb) < ETH_HLEN)
                skb_set_network_header(skb, ETH_HLEN);

        skb_reset_mac_len(skb);

        return err;
}
EXPORT_SYMBOL(__skb_vlan_pop);

/* Pop a vlan tag either from hwaccel or from payload.
 * Expects skb->data at mac header.
 */
int skb_vlan_pop(struct sk_buff *skb)
{
        u16 vlan_tci;
        __be16 vlan_proto;
        int err;

        if (likely(skb_vlan_tag_present(skb))) {
                __vlan_hwaccel_clear_tag(skb);
        } else {
                if (unlikely(!eth_type_vlan(skb->protocol)))
                        return 0;

                err = __skb_vlan_pop(skb, &vlan_tci);
                if (err)
                        return err;
        }
        /* move next vlan tag to hw accel tag */
        if (likely(!eth_type_vlan(skb->protocol)))
                return 0;

        vlan_proto = skb->protocol;
        err = __skb_vlan_pop(skb, &vlan_tci);
        if (unlikely(err))
                return err;

        __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
        return 0;
}
EXPORT_SYMBOL(skb_vlan_pop);

/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
 * Expects skb->data at mac header.
 */
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
        if (skb_vlan_tag_present(skb)) {
                int offset = skb->data - skb_mac_header(skb);
                int err;

                if (WARN_ONCE(offset,
                              "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
                              offset)) {
                        return -EINVAL;
                }

                err = __vlan_insert_tag(skb, skb->vlan_proto,
                                        skb_vlan_tag_get(skb));
                if (err)
                        return err;

                skb->protocol = skb->vlan_proto;
                skb->mac_len += VLAN_HLEN;

                skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
        }
        __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
        return 0;
}
EXPORT_SYMBOL(skb_vlan_push);

/**
 * skb_eth_pop() - Drop the Ethernet header at the head of a packet
 *
 * @skb: Socket buffer to modify
 *
 * Drop the Ethernet header of @skb.
 *
 * Expects that skb->data points to the mac header and that no VLAN tags are
 * present.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_eth_pop(struct sk_buff *skb)
{
        if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
            skb_network_offset(skb) < ETH_HLEN)
                return -EPROTO;

        skb_pull_rcsum(skb, ETH_HLEN);
        skb_reset_mac_header(skb);
        skb_reset_mac_len(skb);

        return 0;
}
EXPORT_SYMBOL(skb_eth_pop);

/**
 * skb_eth_push() - Add a new Ethernet header at the head of a packet
 *
 * @skb: Socket buffer to modify
 * @dst: Destination MAC address of the new header
 * @src: Source MAC address of the new header
 *
 * Prepend @skb with a new Ethernet header.
 *
 * Expects that skb->data points to the mac header, which must be empty.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
                 const unsigned char *src)
{
        struct ethhdr *eth;
        int err;

        if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
                return -EPROTO;

        err = skb_cow_head(skb, sizeof(*eth));
        if (err < 0)
                return err;

        skb_push(skb, sizeof(*eth));
        skb_reset_mac_header(skb);
        skb_reset_mac_len(skb);

        eth = eth_hdr(skb);
        ether_addr_copy(eth->h_dest, dst);
        ether_addr_copy(eth->h_source, src);
        eth->h_proto = skb->protocol;

        skb_postpush_rcsum(skb, eth, sizeof(*eth));

        return 0;
}
EXPORT_SYMBOL(skb_eth_push);

/* Update the ethertype of hdr and the skb csum value if required. */
static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
                             __be16 ethertype)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE) {
                __be16 diff[] = { ~hdr->h_proto, ethertype };

                skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
        }

        hdr->h_proto = ethertype;
}

/**
 * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
 *                   the packet
 *
 * @skb: buffer
 * @mpls_lse: MPLS label stack entry to push
 * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
 * @mac_len: length of the MAC header
 * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
 *            ethernet
 *
 * Expects skb->data at mac header.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
                  int mac_len, bool ethernet)
{
        struct mpls_shim_hdr *lse;
        int err;

        if (unlikely(!eth_p_mpls(mpls_proto)))
                return -EINVAL;

        /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
        if (skb->encapsulation)
                return -EINVAL;

        err = skb_cow_head(skb, MPLS_HLEN);
        if (unlikely(err))
                return err;

        if (!skb->inner_protocol) {
                skb_set_inner_network_header(skb, skb_network_offset(skb));
                skb_set_inner_protocol(skb, skb->protocol);
        }

        skb_push(skb, MPLS_HLEN);
        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
                mac_len);
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, mac_len);
        skb_reset_mac_len(skb);

        lse = mpls_hdr(skb);
        lse->label_stack_entry = mpls_lse;
        skb_postpush_rcsum(skb, lse, MPLS_HLEN);

        if (ethernet && mac_len >= ETH_HLEN)
                skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
        skb->protocol = mpls_proto;

        return 0;
}
EXPORT_SYMBOL_GPL(skb_mpls_push);

/**
 * skb_mpls_pop() - pop the outermost MPLS header
 *
 * @skb: buffer
 * @next_proto: ethertype of header after popped MPLS header
 * @mac_len: length of the MAC header
 * @ethernet: flag to indicate if the packet is ethernet
 *
 * Expects skb->data at mac header.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
                 bool ethernet)
{
        int err;

        if (unlikely(!eth_p_mpls(skb->protocol)))
                return 0;

        err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
        if (unlikely(err))
                return err;

        skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
        memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
                mac_len);

        __skb_pull(skb, MPLS_HLEN);
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, mac_len);

        if (ethernet && mac_len >= ETH_HLEN) {
                struct ethhdr *hdr;

                /* use mpls_hdr() to get ethertype to account for VLANs. */
                hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
                skb_mod_eth_type(skb, hdr, next_proto);
        }
        skb->protocol = next_proto;

        return 0;
}
EXPORT_SYMBOL_GPL(skb_mpls_pop);

/**
 * skb_mpls_update_lse() - modify outermost MPLS header and update csum
 *
 * @skb: buffer
 * @mpls_lse: new MPLS label stack entry to update to
 *
 * Expects skb->data at mac header.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
{
        int err;

        if (unlikely(!eth_p_mpls(skb->protocol)))
                return -EINVAL;

        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
        if (unlikely(err))
                return err;

        if (skb->ip_summed == CHECKSUM_COMPLETE) {
                __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };

                skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
        }

        mpls_hdr(skb)->label_stack_entry = mpls_lse;

        return 0;
}
EXPORT_SYMBOL_GPL(skb_mpls_update_lse);

/**
 * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
 *
 * @skb: buffer
 *
 * Expects skb->data at mac header.
 *
 * Returns 0 on success, -errno otherwise.
 */
int skb_mpls_dec_ttl(struct sk_buff *skb)
{
        u32 lse;
        u8 ttl;

        if (unlikely(!eth_p_mpls(skb->protocol)))
                return -EINVAL;

        if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
                return -ENOMEM;

        lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
        ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
        if (!--ttl)
                return -EINVAL;

        lse &= ~MPLS_LS_TTL_MASK;
        lse |= ttl << MPLS_LS_TTL_SHIFT;

        return skb_mpls_update_lse(skb, cpu_to_be32(lse));
}
EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);

/**
 * alloc_skb_with_frags - allocate skb with page frags
 *
 * @header_len: size of linear part
 * @data_len: needed length in frags
 * @order: max page order desired.
 * @errcode: pointer to error code if any
 * @gfp_mask: allocation mask
 *
 * This can be used to allocate a paged skb, given a maximal order for frags.
 */
struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
                                     unsigned long data_len,
                                     int order,
                                     int *errcode,
                                     gfp_t gfp_mask)
{
        unsigned long chunk;
        struct sk_buff *skb;
        struct page *page;
        int nr_frags = 0;

        *errcode = -EMSGSIZE;
        if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
                return NULL;

        *errcode = -ENOBUFS;
        skb = alloc_skb(header_len, gfp_mask);
        if (!skb)
                return NULL;

        while (data_len) {
                if (nr_frags == MAX_SKB_FRAGS - 1)
                        goto failure;
                while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
                        order--;

                if (order) {
                        page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
                                           __GFP_COMP |
                                           __GFP_NOWARN,
                                           order);
                        if (!page) {
                                order--;
                                continue;
                        }
                } else {
                        page = alloc_page(gfp_mask);
                        if (!page)
                                goto failure;
                }
                chunk = min_t(unsigned long, data_len,
                              PAGE_SIZE << order);
                skb_fill_page_desc(skb, nr_frags, page, 0, chunk);
                nr_frags++;
                skb->truesize += (PAGE_SIZE << order);
                data_len -= chunk;
        }
        return skb;

failure:
        kfree_skb(skb);
        return NULL;
}
EXPORT_SYMBOL(alloc_skb_with_frags);

/* carve out the first off bytes from skb when off < headlen */
static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
                                    const int headlen, gfp_t gfp_mask)
{
        int i;
        unsigned int size = skb_end_offset(skb);
        int new_hlen = headlen - off;
        u8 *data;

        if (skb_pfmemalloc(skb))
                gfp_mask |= __GFP_MEMALLOC;

        data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
        if (!data)
                return -ENOMEM;
        size = SKB_WITH_OVERHEAD(size);

        /* Copy real data, and all frags */
        skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
        skb->len -= off;

        memcpy((struct skb_shared_info *)(data + size),
               skb_shinfo(skb),
               offsetof(struct skb_shared_info,
                        frags[skb_shinfo(skb)->nr_frags]));
        if (skb_cloned(skb)) {
                /* drop the old head gracefully */
                if (skb_orphan_frags(skb, gfp_mask)) {
                        skb_kfree_head(data, size);
                        return -ENOMEM;
                }
                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                        skb_frag_ref(skb, i);
                if (skb_has_frag_list(skb))
                        skb_clone_fraglist(skb);
                skb_release_data(skb, SKB_CONSUMED, false);
        } else {
                /* we can reuse existing recount- all we did was
                 * relocate values
                 */
                skb_free_head(skb, false);
        }

        skb->head = data;
        skb->data = data;
        skb->head_frag = 0;
        skb_set_end_offset(skb, size);
        skb_set_tail_pointer(skb, skb_headlen(skb));
        skb_headers_offset_update(skb, 0);
        skb->cloned = 0;
        skb->hdr_len = 0;
        skb->nohdr = 0;
        atomic_set(&skb_shinfo(skb)->dataref, 1);

        return 0;
}

static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);

/* carve out the first eat bytes from skb's frag_list. May recurse into
 * pskb_carve()
 */
static int pskb_carve_frag_list(struct sk_buff *skb,
                                struct skb_shared_info *shinfo, int eat,
                                gfp_t gfp_mask)
{
        struct sk_buff *list = shinfo->frag_list;
        struct sk_buff *clone = NULL;
        struct sk_buff *insp = NULL;

        do {
                if (!list) {
                        pr_err("Not enough bytes to eat. Want %d\n", eat);
                        return -EFAULT;
                }
                if (list->len <= eat) {
                        /* Eaten as whole. */
                        eat -= list->len;
                        list = list->next;
                        insp = list;
                } else {
                        /* Eaten partially. */
                        if (skb_shared(list)) {
                                clone = skb_clone(list, gfp_mask);
                                if (!clone)
                                        return -ENOMEM;
                                insp = list->next;
                                list = clone;
                        } else {
                                /* This may be pulled without problems. */
                                insp = list;
                        }
                        if (pskb_carve(list, eat, gfp_mask) < 0) {
                                kfree_skb(clone);
                                return -ENOMEM;
                        }
                        break;
                }
        } while (eat);

        /* Free pulled out fragments. */
        while ((list = shinfo->frag_list) != insp) {
                shinfo->frag_list = list->next;
                consume_skb(list);
        }
        /* And insert new clone at head. */
        if (clone) {
                clone->next = list;
                shinfo->frag_list = clone;
        }
        return 0;
}

/* carve off first len bytes from skb. Split line (off) is in the
 * non-linear part of skb
 */
static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
                                       int pos, gfp_t gfp_mask)
{
        int i, k = 0;
        unsigned int size = skb_end_offset(skb);
        u8 *data;
        const int nfrags = skb_shinfo(skb)->nr_frags;
        struct skb_shared_info *shinfo;

        if (skb_pfmemalloc(skb))
                gfp_mask |= __GFP_MEMALLOC;

        data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
        if (!data)
                return -ENOMEM;
        size = SKB_WITH_OVERHEAD(size);

        memcpy((struct skb_shared_info *)(data + size),
               skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
        if (skb_orphan_frags(skb, gfp_mask)) {
                skb_kfree_head(data, size);
                return -ENOMEM;
        }
        shinfo = (struct skb_shared_info *)(data + size);
        for (i = 0; i < nfrags; i++) {
                int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);

                if (pos + fsize > off) {
                        shinfo->frags[k] = skb_shinfo(skb)->frags[i];

                        if (pos < off) {
                                /* Split frag.
                                 * We have two variants in this case:
                                 * 1. Move all the frag to the second
                                 *    part, if it is possible. F.e.
                                 *    this approach is mandatory for TUX,
                                 *    where splitting is expensive.
                                 * 2. Split is accurately. We make this.
                                 */
                                skb_frag_off_add(&shinfo->frags[0], off - pos);
                                skb_frag_size_sub(&shinfo->frags[0], off - pos);
                        }
                        skb_frag_ref(skb, i);
                        k++;
                }
                pos += fsize;
        }
        shinfo->nr_frags = k;
        if (skb_has_frag_list(skb))
                skb_clone_fraglist(skb);

        /* split line is in frag list */
        if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
                /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
                if (skb_has_frag_list(skb))
                        kfree_skb_list(skb_shinfo(skb)->frag_list);
                skb_kfree_head(data, size);
                return -ENOMEM;
        }
        skb_release_data(skb, SKB_CONSUMED, false);

        skb->head = data;
        skb->head_frag = 0;
        skb->data = data;
        skb_set_end_offset(skb, size);
        skb_reset_tail_pointer(skb);
        skb_headers_offset_update(skb, 0);
        skb->cloned   = 0;
        skb->hdr_len  = 0;
        skb->nohdr    = 0;
        skb->len -= off;
        skb->data_len = skb->len;
        atomic_set(&skb_shinfo(skb)->dataref, 1);
        return 0;
}

/* remove len bytes from the beginning of the skb */
static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
{
        int headlen = skb_headlen(skb);

        if (len < headlen)
                return pskb_carve_inside_header(skb, len, headlen, gfp);
        else
                return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
}

/* Extract to_copy bytes starting at off from skb, and return this in
 * a new skb
 */
struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
                             int to_copy, gfp_t gfp)
{
        struct sk_buff  *clone = skb_clone(skb, gfp);

        if (!clone)
                return NULL;

        if (pskb_carve(clone, off, gfp) < 0 ||
            pskb_trim(clone, to_copy)) {
                kfree_skb(clone);
                return NULL;
        }
        return clone;
}
EXPORT_SYMBOL(pskb_extract);

/**
 * skb_condense - try to get rid of fragments/frag_list if possible
 * @skb: buffer
 *
 * Can be used to save memory before skb is added to a busy queue.
 * If packet has bytes in frags and enough tail room in skb->head,
 * pull all of them, so that we can free the frags right now and adjust
 * truesize.
 * Notes:
 *        We do not reallocate skb->head thus can not fail.
 *        Caller must re-evaluate skb->truesize if needed.
 */
void skb_condense(struct sk_buff *skb)
{
        if (skb->data_len) {
                if (skb->data_len > skb->end - skb->tail ||
                    skb_cloned(skb))
                        return;

                /* Nice, we can free page frag(s) right now */
                __pskb_pull_tail(skb, skb->data_len);
        }
        /* At this point, skb->truesize might be over estimated,
         * because skb had a fragment, and fragments do not tell
         * their truesize.
         * When we pulled its content into skb->head, fragment
         * was freed, but __pskb_pull_tail() could not possibly
         * adjust skb->truesize, not knowing the frag truesize.
         */
        skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
}
EXPORT_SYMBOL(skb_condense);

#ifdef CONFIG_SKB_EXTENSIONS
static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
{
        return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}

/**
 * __skb_ext_alloc - allocate a new skb extensions storage
 *
 * @flags: See kmalloc().
 *
 * Returns the newly allocated pointer. The pointer can later attached to a
 * skb via __skb_ext_set().
 * Note: caller must handle the skb_ext as an opaque data.
 */
struct skb_ext *__skb_ext_alloc(gfp_t flags)
{
        struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);

        if (new) {
                memset(new->offset, 0, sizeof(new->offset));
                refcount_set(&new->refcnt, 1);
        }

        return new;
}

static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
                                         unsigned int old_active)
{
        struct skb_ext *new;

        if (refcount_read(&old->refcnt) == 1)
                return old;

        new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
        if (!new)
                return NULL;

        memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
        refcount_set(&new->refcnt, 1);

#ifdef CONFIG_XFRM
        if (old_active & (1 << SKB_EXT_SEC_PATH)) {
                struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
                unsigned int i;

                for (i = 0; i < sp->len; i++)
                        xfrm_state_hold(sp->xvec[i]);
        }
#endif
#ifdef CONFIG_MCTP_FLOWS
        if (old_active & (1 << SKB_EXT_MCTP)) {
                struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP);

                if (flow->key)
                        refcount_inc(&flow->key->refs);
        }
#endif
        __skb_ext_put(old);
        return new;
}

/**
 * __skb_ext_set - attach the specified extension storage to this skb
 * @skb: buffer
 * @id: extension id
 * @ext: extension storage previously allocated via __skb_ext_alloc()
 *
 * Existing extensions, if any, are cleared.
 *
 * Returns the pointer to the extension.
 */
void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
                    struct skb_ext *ext)
{
        unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);

        skb_ext_put(skb);
        newlen = newoff + skb_ext_type_len[id];
        ext->chunks = newlen;
        ext->offset[id] = newoff;
        skb->extensions = ext;
        skb->active_extensions = 1 << id;
        return skb_ext_get_ptr(ext, id);
}

/**
 * skb_ext_add - allocate space for given extension, COW if needed
 * @skb: buffer
 * @id: extension to allocate space for
 *
 * Allocates enough space for the given extension.
 * If the extension is already present, a pointer to that extension
 * is returned.
 *
 * If the skb was cloned, COW applies and the returned memory can be
 * modified without changing the extension space of clones buffers.
 *
 * Returns pointer to the extension or NULL on allocation failure.
 */
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
{
        struct skb_ext *new, *old = NULL;
        unsigned int newlen, newoff;

        if (skb->active_extensions) {
                old = skb->extensions;

                new = skb_ext_maybe_cow(old, skb->active_extensions);
                if (!new)
                        return NULL;

                if (__skb_ext_exist(new, id))
                        goto set_active;

                newoff = new->chunks;
        } else {
                newoff = SKB_EXT_CHUNKSIZEOF(*new);

                new = __skb_ext_alloc(GFP_ATOMIC);
                if (!new)
                        return NULL;
        }

        newlen = newoff + skb_ext_type_len[id];
        new->chunks = newlen;
        new->offset[id] = newoff;
set_active:
        skb->slow_gro = 1;
        skb->extensions = new;
        skb->active_extensions |= 1 << id;
        return skb_ext_get_ptr(new, id);
}
EXPORT_SYMBOL(skb_ext_add);

#ifdef CONFIG_XFRM
static void skb_ext_put_sp(struct sec_path *sp)
{
        unsigned int i;

        for (i = 0; i < sp->len; i++)
                xfrm_state_put(sp->xvec[i]);
}
#endif

#ifdef CONFIG_MCTP_FLOWS
static void skb_ext_put_mctp(struct mctp_flow *flow)
{
        if (flow->key)
                mctp_key_unref(flow->key);
}
#endif

void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
        struct skb_ext *ext = skb->extensions;

        skb->active_extensions &= ~(1 << id);
        if (skb->active_extensions == 0) {
                skb->extensions = NULL;
                __skb_ext_put(ext);
#ifdef CONFIG_XFRM
        } else if (id == SKB_EXT_SEC_PATH &&
                   refcount_read(&ext->refcnt) == 1) {
                struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);

                skb_ext_put_sp(sp);
                sp->len = 0;
#endif
        }
}
EXPORT_SYMBOL(__skb_ext_del);

void __skb_ext_put(struct skb_ext *ext)
{
        /* If this is last clone, nothing can increment
         * it after check passes.  Avoids one atomic op.
         */
        if (refcount_read(&ext->refcnt) == 1)
                goto free_now;

        if (!refcount_dec_and_test(&ext->refcnt))
                return;
free_now:
#ifdef CONFIG_XFRM
        if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
                skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
#endif
#ifdef CONFIG_MCTP_FLOWS
        if (__skb_ext_exist(ext, SKB_EXT_MCTP))
                skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
#endif

        kmem_cache_free(skbuff_ext_cache, ext);
}
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */

/**
 * skb_attempt_defer_free - queue skb for remote freeing
 * @skb: buffer
 *
 * Put @skb in a per-cpu list, using the cpu which
 * allocated the skb/pages to reduce false sharing
 * and memory zone spinlock contention.
 */
void skb_attempt_defer_free(struct sk_buff *skb)
{
        int cpu = skb->alloc_cpu;
        struct softnet_data *sd;
        unsigned int defer_max;
        bool kick;

        if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
            !cpu_online(cpu) ||
            cpu == raw_smp_processor_id()) {
nodefer:        __kfree_skb(skb);
                return;
        }

        DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
        DEBUG_NET_WARN_ON_ONCE(skb->destructor);

        sd = &per_cpu(softnet_data, cpu);
        defer_max = READ_ONCE(sysctl_skb_defer_max);
        if (READ_ONCE(sd->defer_count) >= defer_max)
                goto nodefer;

        spin_lock_bh(&sd->defer_lock);
        /* Send an IPI every time queue reaches half capacity. */
        kick = sd->defer_count == (defer_max >> 1);
        /* Paired with the READ_ONCE() few lines above */
        WRITE_ONCE(sd->defer_count, sd->defer_count + 1);

        skb->next = sd->defer_list;
        /* Paired with READ_ONCE() in skb_defer_free_flush() */
        WRITE_ONCE(sd->defer_list, skb);
        spin_unlock_bh(&sd->defer_lock);

        /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
         * if we are unlucky enough (this seems very unlikely).
         */
        if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
                smp_call_function_single_async(cpu, &sd->defer_csd);
}

static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
                                 size_t offset, size_t len)
{
        const char *kaddr;
        __wsum csum;

        kaddr = kmap_local_page(page);
        csum = csum_partial(kaddr + offset, len, 0);
        kunmap_local(kaddr);
        skb->csum = csum_block_add(skb->csum, csum, skb->len);
}

/**
 * skb_splice_from_iter - Splice (or copy) pages to skbuff
 * @skb: The buffer to add pages to
 * @iter: Iterator representing the pages to be added
 * @maxsize: Maximum amount of pages to be added
 * @gfp: Allocation flags
 *
 * This is a common helper function for supporting MSG_SPLICE_PAGES.  It
 * extracts pages from an iterator and adds them to the socket buffer if
 * possible, copying them to fragments if not possible (such as if they're slab
 * pages).
 *
 * Returns the amount of data spliced/copied or -EMSGSIZE if there's
 * insufficient space in the buffer to transfer anything.
 */
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
                             ssize_t maxsize, gfp_t gfp)
{
        size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
        struct page *pages[8], **ppages = pages;
        ssize_t spliced = 0, ret = 0;
        unsigned int i;

        while (iter->count > 0) {
                ssize_t space, nr, len;
                size_t off;

                ret = -EMSGSIZE;
                space = frag_limit - skb_shinfo(skb)->nr_frags;
                if (space < 0)
                        break;

                /* We might be able to coalesce without increasing nr_frags */
                nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));

                len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
                if (len <= 0) {
                        ret = len ?: -EIO;
                        break;
                }

                i = 0;
                do {
                        struct page *page = pages[i++];
                        size_t part = min_t(size_t, PAGE_SIZE - off, len);

                        ret = -EIO;
                        if (WARN_ON_ONCE(!sendpage_ok(page)))
                                goto out;

                        ret = skb_append_pagefrags(skb, page, off, part,
                                                   frag_limit);
                        if (ret < 0) {
                                iov_iter_revert(iter, len);
                                goto out;
                        }

                        if (skb->ip_summed == CHECKSUM_NONE)
                                skb_splice_csum_page(skb, page, off, part);

                        off = 0;
                        spliced += part;
                        maxsize -= part;
                        len -= part;
                } while (len > 0);

                if (maxsize <= 0)
                        break;
        }

out:
        skb_len_add(skb, spliced);
        return spliced ?: ret;
}
EXPORT_SYMBOL(skb_splice_from_iter);

static __always_inline
size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
                             size_t len, void *to, void *priv2)
{
        __wsum *csum = priv2;
        __wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len);

        *csum = csum_block_add(*csum, next, progress);
        return 0;
}

static __always_inline
size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
                                size_t len, void *to, void *priv2)
{
        __wsum next, *csum = priv2;

        next = csum_and_copy_from_user(iter_from, to + progress, len);
        *csum = csum_block_add(*csum, next, progress);
        return next ? 0 : len;
}

bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
                                  __wsum *csum, struct iov_iter *i)
{
        size_t copied;

        if (WARN_ON_ONCE(!i->data_source))
                return false;
        copied = iterate_and_advance2(i, bytes, addr, csum,
                                      copy_from_user_iter_csum,
                                      memcpy_from_iter_csum);
        if (likely(copied == bytes))
                return true;
        iov_iter_revert(i, copied);
        return false;
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);













































































































































































































































































































































































































































































   11 
























































   10 







































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM power

#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_POWER_H

#include <linux/cpufreq.h>
#include <linux/ktime.h>
#include <linux/pm_qos.h>
#include <linux/tracepoint.h>
#include <linux/trace_events.h>

#define TPS(x)  tracepoint_string(x)

DECLARE_EVENT_CLASS(cpu,

        TP_PROTO(unsigned int state, unsigned int cpu_id),

        TP_ARGS(state, cpu_id),

        TP_STRUCT__entry(
                __field(        u32,                state                )
                __field(        u32,                cpu_id                )
        ),

        TP_fast_assign(
                __entry->state = state;
                __entry->cpu_id = cpu_id;
        ),

        TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state,
                  (unsigned long)__entry->cpu_id)
);

DEFINE_EVENT(cpu, cpu_idle,

        TP_PROTO(unsigned int state, unsigned int cpu_id),

        TP_ARGS(state, cpu_id)
);

TRACE_EVENT(cpu_idle_miss,

        TP_PROTO(unsigned int cpu_id, unsigned int state, bool below),

        TP_ARGS(cpu_id, state, below),

        TP_STRUCT__entry(
                __field(u32,                cpu_id)
                __field(u32,                state)
                __field(bool,                below)
        ),

        TP_fast_assign(
                __entry->cpu_id = cpu_id;
                __entry->state = state;
                __entry->below = below;
        ),

        TP_printk("cpu_id=%lu state=%lu type=%s", (unsigned long)__entry->cpu_id,
                (unsigned long)__entry->state, (__entry->below)?"below":"above")
);

TRACE_EVENT(powernv_throttle,

        TP_PROTO(int chip_id, const char *reason, int pmax),

        TP_ARGS(chip_id, reason, pmax),

        TP_STRUCT__entry(
                __field(int, chip_id)
                __string(reason, reason)
                __field(int, pmax)
        ),

        TP_fast_assign(
                __entry->chip_id = chip_id;
                __assign_str(reason, reason);
                __entry->pmax = pmax;
        ),

        TP_printk("Chip %d Pmax %d %s", __entry->chip_id,
                  __entry->pmax, __get_str(reason))
);

TRACE_EVENT(pstate_sample,

        TP_PROTO(u32 core_busy,
                u32 scaled_busy,
                u32 from,
                u32 to,
                u64 mperf,
                u64 aperf,
                u64 tsc,
                u32 freq,
                u32 io_boost
                ),

        TP_ARGS(core_busy,
                scaled_busy,
                from,
                to,
                mperf,
                aperf,
                tsc,
                freq,
                io_boost
                ),

        TP_STRUCT__entry(
                __field(u32, core_busy)
                __field(u32, scaled_busy)
                __field(u32, from)
                __field(u32, to)
                __field(u64, mperf)
                __field(u64, aperf)
                __field(u64, tsc)
                __field(u32, freq)
                __field(u32, io_boost)
                ),

        TP_fast_assign(
                __entry->core_busy = core_busy;
                __entry->scaled_busy = scaled_busy;
                __entry->from = from;
                __entry->to = to;
                __entry->mperf = mperf;
                __entry->aperf = aperf;
                __entry->tsc = tsc;
                __entry->freq = freq;
                __entry->io_boost = io_boost;
                ),

        TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
                (unsigned long)__entry->core_busy,
                (unsigned long)__entry->scaled_busy,
                (unsigned long)__entry->from,
                (unsigned long)__entry->to,
                (unsigned long long)__entry->mperf,
                (unsigned long long)__entry->aperf,
                (unsigned long long)__entry->tsc,
                (unsigned long)__entry->freq,
                (unsigned long)__entry->io_boost
                )

);

/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
#define _PWR_EVENT_AVOID_DOUBLE_DEFINING

#define PWR_EVENT_EXIT -1
#endif

#define pm_verb_symbolic(event) \
        __print_symbolic(event, \
                { PM_EVENT_SUSPEND, "suspend" }, \
                { PM_EVENT_RESUME, "resume" }, \
                { PM_EVENT_FREEZE, "freeze" }, \
                { PM_EVENT_QUIESCE, "quiesce" }, \
                { PM_EVENT_HIBERNATE, "hibernate" }, \
                { PM_EVENT_THAW, "thaw" }, \
                { PM_EVENT_RESTORE, "restore" }, \
                { PM_EVENT_RECOVER, "recover" })

DEFINE_EVENT(cpu, cpu_frequency,

        TP_PROTO(unsigned int frequency, unsigned int cpu_id),

        TP_ARGS(frequency, cpu_id)
);

TRACE_EVENT(cpu_frequency_limits,

        TP_PROTO(struct cpufreq_policy *policy),

        TP_ARGS(policy),

        TP_STRUCT__entry(
                __field(u32, min_freq)
                __field(u32, max_freq)
                __field(u32, cpu_id)
        ),

        TP_fast_assign(
                __entry->min_freq = policy->min;
                __entry->max_freq = policy->max;
                __entry->cpu_id = policy->cpu;
        ),

        TP_printk("min=%lu max=%lu cpu_id=%lu",
                  (unsigned long)__entry->min_freq,
                  (unsigned long)__entry->max_freq,
                  (unsigned long)__entry->cpu_id)
);

TRACE_EVENT(device_pm_callback_start,

        TP_PROTO(struct device *dev, const char *pm_ops, int event),

        TP_ARGS(dev, pm_ops, event),

        TP_STRUCT__entry(
                __string(device, dev_name(dev))
                __string(driver, dev_driver_string(dev))
                __string(parent, dev->parent ? dev_name(dev->parent) : "none")
                __string(pm_ops, pm_ops ? pm_ops : "none ")
                __field(int, event)
        ),

        TP_fast_assign(
                __assign_str(device, dev_name(dev));
                __assign_str(driver, dev_driver_string(dev));
                __assign_str(parent,
                        dev->parent ? dev_name(dev->parent) : "none");
                __assign_str(pm_ops, pm_ops ? pm_ops : "none ");
                __entry->event = event;
        ),

        TP_printk("%s %s, parent: %s, %s[%s]", __get_str(driver),
                __get_str(device), __get_str(parent), __get_str(pm_ops),
                pm_verb_symbolic(__entry->event))
);

TRACE_EVENT(device_pm_callback_end,

        TP_PROTO(struct device *dev, int error),

        TP_ARGS(dev, error),

        TP_STRUCT__entry(
                __string(device, dev_name(dev))
                __string(driver, dev_driver_string(dev))
                __field(int, error)
        ),

        TP_fast_assign(
                __assign_str(device, dev_name(dev));
                __assign_str(driver, dev_driver_string(dev));
                __entry->error = error;
        ),

        TP_printk("%s %s, err=%d",
                __get_str(driver), __get_str(device), __entry->error)
);

TRACE_EVENT(suspend_resume,

        TP_PROTO(const char *action, int val, bool start),

        TP_ARGS(action, val, start),

        TP_STRUCT__entry(
                __field(const char *, action)
                __field(int, val)
                __field(bool, start)
        ),

        TP_fast_assign(
                __entry->action = action;
                __entry->val = val;
                __entry->start = start;
        ),

        TP_printk("%s[%u] %s", __entry->action, (unsigned int)__entry->val,
                (__entry->start)?"begin":"end")
);

DECLARE_EVENT_CLASS(wakeup_source,

        TP_PROTO(const char *name, unsigned int state),

        TP_ARGS(name, state),

        TP_STRUCT__entry(
                __string(       name,           name            )
                __field(        u64,            state           )
        ),

        TP_fast_assign(
                __assign_str(name, name);
                __entry->state = state;
        ),

        TP_printk("%s state=0x%lx", __get_str(name),
                (unsigned long)__entry->state)
);

DEFINE_EVENT(wakeup_source, wakeup_source_activate,

        TP_PROTO(const char *name, unsigned int state),

        TP_ARGS(name, state)
);

DEFINE_EVENT(wakeup_source, wakeup_source_deactivate,

        TP_PROTO(const char *name, unsigned int state),

        TP_ARGS(name, state)
);

/*
 * The clock events are used for clock enable/disable and for
 *  clock rate change
 */
DECLARE_EVENT_CLASS(clock,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id),

        TP_STRUCT__entry(
                __string(       name,           name            )
                __field(        u64,            state           )
                __field(        u64,            cpu_id          )
        ),

        TP_fast_assign(
                __assign_str(name, name);
                __entry->state = state;
                __entry->cpu_id = cpu_id;
        ),

        TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
                (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
);

DEFINE_EVENT(clock, clock_enable,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id)
);

DEFINE_EVENT(clock, clock_disable,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id)
);

DEFINE_EVENT(clock, clock_set_rate,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id)
);

/*
 * The power domain events are used for power domains transitions
 */
DECLARE_EVENT_CLASS(power_domain,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id),

        TP_STRUCT__entry(
                __string(       name,           name            )
                __field(        u64,            state           )
                __field(        u64,            cpu_id          )
        ),

        TP_fast_assign(
                __assign_str(name, name);
                __entry->state = state;
                __entry->cpu_id = cpu_id;
),

        TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
                (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
);

DEFINE_EVENT(power_domain, power_domain_target,

        TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),

        TP_ARGS(name, state, cpu_id)
);

/*
 * CPU latency QoS events used for global CPU latency QoS list updates
 */
DECLARE_EVENT_CLASS(cpu_latency_qos_request,

        TP_PROTO(s32 value),

        TP_ARGS(value),

        TP_STRUCT__entry(
                __field( s32,                    value          )
        ),

        TP_fast_assign(
                __entry->value = value;
        ),

        TP_printk("CPU_DMA_LATENCY value=%d",
                  __entry->value)
);

DEFINE_EVENT(cpu_latency_qos_request, pm_qos_add_request,

        TP_PROTO(s32 value),

        TP_ARGS(value)
);

DEFINE_EVENT(cpu_latency_qos_request, pm_qos_update_request,

        TP_PROTO(s32 value),

        TP_ARGS(value)
);

DEFINE_EVENT(cpu_latency_qos_request, pm_qos_remove_request,

        TP_PROTO(s32 value),

        TP_ARGS(value)
);

/*
 * General PM QoS events used for updates of PM QoS request lists
 */
DECLARE_EVENT_CLASS(pm_qos_update,

        TP_PROTO(enum pm_qos_req_action action, int prev_value, int curr_value),

        TP_ARGS(action, prev_value, curr_value),

        TP_STRUCT__entry(
                __field( enum pm_qos_req_action, action         )
                __field( int,                    prev_value     )
                __field( int,                    curr_value     )
        ),

        TP_fast_assign(
                __entry->action = action;
                __entry->prev_value = prev_value;
                __entry->curr_value = curr_value;
        ),

        TP_printk("action=%s prev_value=%d curr_value=%d",
                  __print_symbolic(__entry->action,
                        { PM_QOS_ADD_REQ,        "ADD_REQ" },
                        { PM_QOS_UPDATE_REQ,        "UPDATE_REQ" },
                        { PM_QOS_REMOVE_REQ,        "REMOVE_REQ" }),
                  __entry->prev_value, __entry->curr_value)
);

DEFINE_EVENT(pm_qos_update, pm_qos_update_target,

        TP_PROTO(enum pm_qos_req_action action, int prev_value, int curr_value),

        TP_ARGS(action, prev_value, curr_value)
);

DEFINE_EVENT_PRINT(pm_qos_update, pm_qos_update_flags,

        TP_PROTO(enum pm_qos_req_action action, int prev_value, int curr_value),

        TP_ARGS(action, prev_value, curr_value),

        TP_printk("action=%s prev_value=0x%x curr_value=0x%x",
                  __print_symbolic(__entry->action,
                        { PM_QOS_ADD_REQ,        "ADD_REQ" },
                        { PM_QOS_UPDATE_REQ,        "UPDATE_REQ" },
                        { PM_QOS_REMOVE_REQ,        "REMOVE_REQ" }),
                  __entry->prev_value, __entry->curr_value)
);

DECLARE_EVENT_CLASS(dev_pm_qos_request,

        TP_PROTO(const char *name, enum dev_pm_qos_req_type type,
                 s32 new_value),

        TP_ARGS(name, type, new_value),

        TP_STRUCT__entry(
                __string( name,                    name         )
                __field( enum dev_pm_qos_req_type, type         )
                __field( s32,                      new_value    )
        ),

        TP_fast_assign(
                __assign_str(name, name);
                __entry->type = type;
                __entry->new_value = new_value;
        ),

        TP_printk("device=%s type=%s new_value=%d",
                  __get_str(name),
                  __print_symbolic(__entry->type,
                        { DEV_PM_QOS_RESUME_LATENCY, "DEV_PM_QOS_RESUME_LATENCY" },
                        { DEV_PM_QOS_FLAGS, "DEV_PM_QOS_FLAGS" }),
                  __entry->new_value)
);

DEFINE_EVENT(dev_pm_qos_request, dev_pm_qos_add_request,

        TP_PROTO(const char *name, enum dev_pm_qos_req_type type,
                 s32 new_value),

        TP_ARGS(name, type, new_value)
);

DEFINE_EVENT(dev_pm_qos_request, dev_pm_qos_update_request,

        TP_PROTO(const char *name, enum dev_pm_qos_req_type type,
                 s32 new_value),

        TP_ARGS(name, type, new_value)
);

DEFINE_EVENT(dev_pm_qos_request, dev_pm_qos_remove_request,

        TP_PROTO(const char *name, enum dev_pm_qos_req_type type,
                 s32 new_value),

        TP_ARGS(name, type, new_value)
);

TRACE_EVENT(guest_halt_poll_ns,

        TP_PROTO(bool grow, unsigned int new, unsigned int old),

        TP_ARGS(grow, new, old),

        TP_STRUCT__entry(
                __field(bool, grow)
                __field(unsigned int, new)
                __field(unsigned int, old)
        ),

        TP_fast_assign(
                __entry->grow   = grow;
                __entry->new    = new;
                __entry->old    = old;
        ),

        TP_printk("halt_poll_ns %u (%s %u)",
                __entry->new,
                __entry->grow ? "grow" : "shrink",
                __entry->old)
);

#define trace_guest_halt_poll_ns_grow(new, old) \
        trace_guest_halt_poll_ns(true, new, old)
#define trace_guest_halt_poll_ns_shrink(new, old) \
        trace_guest_halt_poll_ns(false, new, old)
#endif /* _TRACE_POWER_H */

/* This part must be outside protection */
#include <trace/define_trace.h>


























































































































































































































































































































































































































































































































































































































































































































    2 

    2 












   14 





   14 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2007-2012 Siemens AG
 *
 * Written by:
 * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 * Sergey Lapin <slapin@ossfans.org>
 * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
 * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
 */

#include <linux/netdevice.h>
#include <linux/module.h>
#include <linux/if_arp.h>
#include <linux/ieee802154.h>

#include <net/nl802154.h>
#include <net/mac802154.h>
#include <net/ieee802154_netdev.h>
#include <net/cfg802154.h>

#include "ieee802154_i.h"
#include "driver-ops.h"

int mac802154_wpan_update_llsec(struct net_device *dev)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        int rc = 0;

        if (ops->llsec) {
                struct ieee802154_llsec_params params;
                int changed = 0;

                params.pan_id = wpan_dev->pan_id;
                changed |= IEEE802154_LLSEC_PARAM_PAN_ID;

                params.hwaddr = wpan_dev->extended_addr;
                changed |= IEEE802154_LLSEC_PARAM_HWADDR;

                rc = ops->llsec->set_params(dev, &params, changed);
        }

        return rc;
}

static int
mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        struct sockaddr_ieee802154 *sa =
                (struct sockaddr_ieee802154 *)&ifr->ifr_addr;
        int err = -ENOIOCTLCMD;

        if (cmd != SIOCGIFADDR && cmd != SIOCSIFADDR)
                return err;

        rtnl_lock();

        switch (cmd) {
        case SIOCGIFADDR:
        {
                u16 pan_id, short_addr;

                pan_id = le16_to_cpu(wpan_dev->pan_id);
                short_addr = le16_to_cpu(wpan_dev->short_addr);
                if (pan_id == IEEE802154_PANID_BROADCAST ||
                    short_addr == IEEE802154_ADDR_BROADCAST) {
                        err = -EADDRNOTAVAIL;
                        break;
                }

                sa->family = AF_IEEE802154;
                sa->addr.addr_type = IEEE802154_ADDR_SHORT;
                sa->addr.pan_id = pan_id;
                sa->addr.short_addr = short_addr;

                err = 0;
                break;
        }
        case SIOCSIFADDR:
                if (netif_running(dev)) {
                        rtnl_unlock();
                        return -EBUSY;
                }

                dev_warn(&dev->dev,
                         "Using DEBUGing ioctl SIOCSIFADDR isn't recommended!\n");
                if (sa->family != AF_IEEE802154 ||
                    sa->addr.addr_type != IEEE802154_ADDR_SHORT ||
                    sa->addr.pan_id == IEEE802154_PANID_BROADCAST ||
                    sa->addr.short_addr == IEEE802154_ADDR_BROADCAST ||
                    sa->addr.short_addr == IEEE802154_ADDR_UNDEF) {
                        err = -EINVAL;
                        break;
                }

                wpan_dev->pan_id = cpu_to_le16(sa->addr.pan_id);
                wpan_dev->short_addr = cpu_to_le16(sa->addr.short_addr);

                err = mac802154_wpan_update_llsec(dev);
                break;
        }

        rtnl_unlock();
        return err;
}

static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct sockaddr *addr = p;
        __le64 extended_addr;

        if (netif_running(dev))
                return -EBUSY;

        /* lowpan need to be down for update
         * SLAAC address after ifup
         */
        if (sdata->wpan_dev.lowpan_dev) {
                if (netif_running(sdata->wpan_dev.lowpan_dev))
                        return -EBUSY;
        }

        ieee802154_be64_to_le64(&extended_addr, addr->sa_data);
        if (!ieee802154_is_valid_extended_unicast_addr(extended_addr))
                return -EINVAL;

        dev_addr_set(dev, addr->sa_data);
        sdata->wpan_dev.extended_addr = extended_addr;

        /* update lowpan interface mac address when
         * wpan mac has been changed
         */
        if (sdata->wpan_dev.lowpan_dev)
                dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr);

        return mac802154_wpan_update_llsec(dev);
}

static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata)
{
        struct ieee802154_local *local = sdata->local;
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        int ret;

        sdata->required_filtering = sdata->iface_default_filtering;

        if (local->hw.flags & IEEE802154_HW_AFILT) {
                local->addr_filt.pan_id = wpan_dev->pan_id;
                local->addr_filt.ieee_addr = wpan_dev->extended_addr;
                local->addr_filt.short_addr = wpan_dev->short_addr;
        }

        if (local->hw.flags & IEEE802154_HW_LBT) {
                ret = drv_set_lbt_mode(local, wpan_dev->lbt);
                if (ret < 0)
                        return ret;
        }

        if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
                ret = drv_set_csma_params(local, wpan_dev->min_be,
                                          wpan_dev->max_be,
                                          wpan_dev->csma_retries);
                if (ret < 0)
                        return ret;
        }

        if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
                ret = drv_set_max_frame_retries(local, wpan_dev->frame_retries);
                if (ret < 0)
                        return ret;
        }

        return 0;
}

static int mac802154_slave_open(struct net_device *dev)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct ieee802154_local *local = sdata->local;
        int res;

        ASSERT_RTNL();

        set_bit(SDATA_STATE_RUNNING, &sdata->state);

        if (!local->open_count) {
                res = ieee802154_setup_hw(sdata);
                if (res)
                        goto err;

                res = drv_start(local, sdata->required_filtering,
                                &local->addr_filt);
                if (res)
                        goto err;
        }

        local->open_count++;
        netif_start_queue(dev);
        return 0;
err:
        /* might already be clear but that doesn't matter */
        clear_bit(SDATA_STATE_RUNNING, &sdata->state);

        return res;
}

static int
ieee802154_check_mac_settings(struct ieee802154_local *local,
                              struct ieee802154_sub_if_data *sdata,
                              struct ieee802154_sub_if_data *nsdata)
{
        struct wpan_dev *nwpan_dev = &nsdata->wpan_dev;
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;

        ASSERT_RTNL();

        if (sdata->iface_default_filtering != nsdata->iface_default_filtering)
                return -EBUSY;

        if (local->hw.flags & IEEE802154_HW_AFILT) {
                if (wpan_dev->pan_id != nwpan_dev->pan_id ||
                    wpan_dev->short_addr != nwpan_dev->short_addr ||
                    wpan_dev->extended_addr != nwpan_dev->extended_addr)
                        return -EBUSY;
        }

        if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
                if (wpan_dev->min_be != nwpan_dev->min_be ||
                    wpan_dev->max_be != nwpan_dev->max_be ||
                    wpan_dev->csma_retries != nwpan_dev->csma_retries)
                        return -EBUSY;
        }

        if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
                if (wpan_dev->frame_retries != nwpan_dev->frame_retries)
                        return -EBUSY;
        }

        if (local->hw.flags & IEEE802154_HW_LBT) {
                if (wpan_dev->lbt != nwpan_dev->lbt)
                        return -EBUSY;
        }

        return 0;
}

static int
ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
                                  enum nl802154_iftype iftype)
{
        struct ieee802154_local *local = sdata->local;
        struct ieee802154_sub_if_data *nsdata;

        /* we hold the RTNL here so can safely walk the list */
        list_for_each_entry(nsdata, &local->interfaces, list) {
                if (nsdata != sdata && ieee802154_sdata_running(nsdata)) {
                        int ret;

                        /* TODO currently we don't support multiple node/coord
                         * types we need to run skb_clone at rx path. Check if
                         * there exist really an use case if we need to support
                         * multiple node/coord types at the same time.
                         */
                        if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR &&
                            nsdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR)
                                return -EBUSY;

                        /* check all phy mac sublayer settings are the same.
                         * We have only one phy, different values makes trouble.
                         */
                        ret = ieee802154_check_mac_settings(local, sdata, nsdata);
                        if (ret < 0)
                                return ret;
                }
        }

        return 0;
}

static int mac802154_wpan_open(struct net_device *dev)
{
        int rc;
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;

        rc = ieee802154_check_concurrent_iface(sdata, wpan_dev->iftype);
        if (rc < 0)
                return rc;

        return mac802154_slave_open(dev);
}

static int mac802154_slave_close(struct net_device *dev)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct ieee802154_local *local = sdata->local;

        ASSERT_RTNL();

        if (mac802154_is_scanning(local))
                mac802154_abort_scan_locked(local, sdata);

        if (mac802154_is_beaconing(local))
                mac802154_stop_beacons_locked(local, sdata);

        netif_stop_queue(dev);
        local->open_count--;

        clear_bit(SDATA_STATE_RUNNING, &sdata->state);

        if (!local->open_count)
                ieee802154_stop_device(local);

        return 0;
}

static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata,
                                         struct ieee802154_hdr *hdr,
                                         const struct ieee802154_mac_cb *cb)
{
        struct ieee802154_llsec_params params;
        u8 level;

        mac802154_llsec_get_params(&sdata->sec, &params);

        if (!params.enabled && cb->secen_override && cb->secen)
                return -EINVAL;
        if (!params.enabled ||
            (cb->secen_override && !cb->secen) ||
            !params.out_level)
                return 0;
        if (cb->seclevel_override && !cb->seclevel)
                return -EINVAL;

        level = cb->seclevel_override ? cb->seclevel : params.out_level;

        hdr->fc.security_enabled = 1;
        hdr->sec.level = level;
        hdr->sec.key_id_mode = params.out_key.mode;
        if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX)
                hdr->sec.short_src = params.out_key.short_source;
        else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX)
                hdr->sec.extended_src = params.out_key.extended_source;
        hdr->sec.key_id = params.out_key.id;

        return 0;
}

static int ieee802154_header_create(struct sk_buff *skb,
                                    struct net_device *dev,
                                    const struct ieee802154_addr *daddr,
                                    const struct ieee802154_addr *saddr,
                                    unsigned len)
{
        struct ieee802154_hdr hdr;
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        struct ieee802154_mac_cb *cb = mac_cb(skb);
        int hlen;

        if (!daddr)
                return -EINVAL;

        memset(&hdr.fc, 0, sizeof(hdr.fc));
        hdr.fc.type = cb->type;
        hdr.fc.security_enabled = cb->secen;
        hdr.fc.ack_request = cb->ackreq;
        hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;

        if (mac802154_set_header_security(sdata, &hdr, cb) < 0)
                return -EINVAL;

        if (!saddr) {
                if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) ||
                    wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
                    wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
                        hdr.source.mode = IEEE802154_ADDR_LONG;
                        hdr.source.extended_addr = wpan_dev->extended_addr;
                } else {
                        hdr.source.mode = IEEE802154_ADDR_SHORT;
                        hdr.source.short_addr = wpan_dev->short_addr;
                }

                hdr.source.pan_id = wpan_dev->pan_id;
        } else {
                hdr.source = *(const struct ieee802154_addr *)saddr;
        }

        hdr.dest = *(const struct ieee802154_addr *)daddr;

        hlen = ieee802154_hdr_push(skb, &hdr);
        if (hlen < 0)
                return -EINVAL;

        skb_reset_mac_header(skb);
        skb->mac_len = hlen;

        if (len > ieee802154_max_payload(&hdr))
                return -EMSGSIZE;

        return hlen;
}

static const struct wpan_dev_header_ops ieee802154_header_ops = {
        .create                = ieee802154_header_create,
};

/* This header create functionality assumes a 8 byte array for
 * source and destination pointer at maximum. To adapt this for
 * the 802.15.4 dataframe header we use extended address handling
 * here only and intra pan connection. fc fields are mostly fallback
 * handling. For provide dev_hard_header for dgram sockets.
 */
static int mac802154_header_create(struct sk_buff *skb,
                                   struct net_device *dev,
                                   unsigned short type,
                                   const void *daddr,
                                   const void *saddr,
                                   unsigned len)
{
        struct ieee802154_hdr hdr;
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        struct ieee802154_mac_cb cb = { };
        int hlen;

        if (!daddr)
                return -EINVAL;

        memset(&hdr.fc, 0, sizeof(hdr.fc));
        hdr.fc.type = IEEE802154_FC_TYPE_DATA;
        hdr.fc.ack_request = wpan_dev->ackreq;
        hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;

        /* TODO currently a workaround to give zero cb block to set
         * security parameters defaults according MIB.
         */
        if (mac802154_set_header_security(sdata, &hdr, &cb) < 0)
                return -EINVAL;

        hdr.dest.pan_id = wpan_dev->pan_id;
        hdr.dest.mode = IEEE802154_ADDR_LONG;
        ieee802154_be64_to_le64(&hdr.dest.extended_addr, daddr);

        hdr.source.pan_id = hdr.dest.pan_id;
        hdr.source.mode = IEEE802154_ADDR_LONG;

        if (!saddr)
                hdr.source.extended_addr = wpan_dev->extended_addr;
        else
                ieee802154_be64_to_le64(&hdr.source.extended_addr, saddr);

        hlen = ieee802154_hdr_push(skb, &hdr);
        if (hlen < 0)
                return -EINVAL;

        skb_reset_mac_header(skb);
        skb->mac_len = hlen;

        if (len > ieee802154_max_payload(&hdr))
                return -EMSGSIZE;

        return hlen;
}

static int
mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
{
        struct ieee802154_hdr hdr;

        if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
                pr_debug("malformed packet\n");
                return 0;
        }

        if (hdr.source.mode == IEEE802154_ADDR_LONG) {
                ieee802154_le64_to_be64(haddr, &hdr.source.extended_addr);
                return IEEE802154_EXTENDED_ADDR_LEN;
        }

        return 0;
}

static const struct header_ops mac802154_header_ops = {
        .create         = mac802154_header_create,
        .parse          = mac802154_header_parse,
};

static const struct net_device_ops mac802154_wpan_ops = {
        .ndo_open                = mac802154_wpan_open,
        .ndo_stop                = mac802154_slave_close,
        .ndo_start_xmit                = ieee802154_subif_start_xmit,
        .ndo_do_ioctl                = mac802154_wpan_ioctl,
        .ndo_set_mac_address        = mac802154_wpan_mac_addr,
};

static const struct net_device_ops mac802154_monitor_ops = {
        .ndo_open                = mac802154_wpan_open,
        .ndo_stop                = mac802154_slave_close,
        .ndo_start_xmit                = ieee802154_monitor_start_xmit,
};

static void mac802154_wpan_free(struct net_device *dev)
{
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);

        mac802154_llsec_destroy(&sdata->sec);
}

static void ieee802154_if_setup(struct net_device *dev)
{
        dev->addr_len                = IEEE802154_EXTENDED_ADDR_LEN;
        memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN);

        /* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET
         * will not send frames without any payload, but ack frames
         * has no payload, so substract one that we can send a 3 bytes
         * frame. The xmit callback assumes at least a hard header where two
         * bytes fc and sequence field are set.
         */
        dev->hard_header_len        = IEEE802154_MIN_HEADER_LEN - 1;
        /* The auth_tag header is for security and places in private payload
         * room of mac frame which stucks between payload and FCS field.
         */
        dev->needed_tailroom        = IEEE802154_MAX_AUTH_TAG_LEN +
                                  IEEE802154_FCS_LEN;
        /* The mtu size is the payload without mac header in this case.
         * We have a dynamic length header with a minimum header length
         * which is hard_header_len. In this case we let mtu to the size
         * of maximum payload which is IEEE802154_MTU - IEEE802154_FCS_LEN -
         * hard_header_len. The FCS which is set by hardware or ndo_start_xmit
         * and the minimum mac header which can be evaluated inside driver
         * layer. The rest of mac header will be part of payload if greater
         * than hard_header_len.
         */
        dev->mtu                = IEEE802154_MTU - IEEE802154_FCS_LEN -
                                  dev->hard_header_len;
        dev->tx_queue_len        = 300;
        dev->flags                = IFF_NOARP | IFF_BROADCAST;
}

static int
ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
                       enum nl802154_iftype type)
{
        struct wpan_dev *wpan_dev = &sdata->wpan_dev;
        int ret;
        u8 tmp;

        /* set some type-dependent values */
        sdata->wpan_dev.iftype = type;

        get_random_bytes(&tmp, sizeof(tmp));
        atomic_set(&wpan_dev->bsn, tmp);
        get_random_bytes(&tmp, sizeof(tmp));
        atomic_set(&wpan_dev->dsn, tmp);

        /* defaults per 802.15.4-2011 */
        wpan_dev->min_be = 3;
        wpan_dev->max_be = 5;
        wpan_dev->csma_retries = 4;
        wpan_dev->frame_retries = 3;

        wpan_dev->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
        wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);

        switch (type) {
        case NL802154_IFTYPE_COORD:
        case NL802154_IFTYPE_NODE:
                ieee802154_be64_to_le64(&wpan_dev->extended_addr,
                                        sdata->dev->dev_addr);

                sdata->dev->header_ops = &mac802154_header_ops;
                sdata->dev->needs_free_netdev = true;
                sdata->dev->priv_destructor = mac802154_wpan_free;
                sdata->dev->netdev_ops = &mac802154_wpan_ops;
                sdata->dev->ml_priv = &mac802154_mlme_wpan;
                sdata->iface_default_filtering = IEEE802154_FILTERING_4_FRAME_FIELDS;
                wpan_dev->header_ops = &ieee802154_header_ops;

                mutex_init(&sdata->sec_mtx);

                mac802154_llsec_init(&sdata->sec);
                ret = mac802154_wpan_update_llsec(sdata->dev);
                if (ret < 0)
                        return ret;

                break;
        case NL802154_IFTYPE_MONITOR:
                sdata->dev->needs_free_netdev = true;
                sdata->dev->netdev_ops = &mac802154_monitor_ops;
                sdata->iface_default_filtering = IEEE802154_FILTERING_NONE;
                break;
        default:
                BUG();
        }

        return 0;
}

struct net_device *
ieee802154_if_add(struct ieee802154_local *local, const char *name,
                  unsigned char name_assign_type, enum nl802154_iftype type,
                  __le64 extended_addr)
{
        u8 addr[IEEE802154_EXTENDED_ADDR_LEN];
        struct net_device *ndev = NULL;
        struct ieee802154_sub_if_data *sdata = NULL;
        int ret;

        ASSERT_RTNL();

        ndev = alloc_netdev(sizeof(*sdata), name,
                            name_assign_type, ieee802154_if_setup);
        if (!ndev)
                return ERR_PTR(-ENOMEM);

        ndev->needed_headroom = local->hw.extra_tx_headroom +
                                IEEE802154_MAX_HEADER_LEN;

        ret = dev_alloc_name(ndev, ndev->name);
        if (ret < 0)
                goto err;

        ieee802154_le64_to_be64(ndev->perm_addr,
                                &local->hw.phy->perm_extended_addr);
        switch (type) {
        case NL802154_IFTYPE_COORD:
        case NL802154_IFTYPE_NODE:
                ndev->type = ARPHRD_IEEE802154;
                if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) {
                        ieee802154_le64_to_be64(addr, &extended_addr);
                        dev_addr_set(ndev, addr);
                } else {
                        dev_addr_set(ndev, ndev->perm_addr);
                }
                break;
        case NL802154_IFTYPE_MONITOR:
                ndev->type = ARPHRD_IEEE802154_MONITOR;
                break;
        default:
                ret = -EINVAL;
                goto err;
        }

        /* TODO check this */
        SET_NETDEV_DEV(ndev, &local->phy->dev);
        dev_net_set(ndev, wpan_phy_net(local->hw.phy));
        sdata = netdev_priv(ndev);
        ndev->ieee802154_ptr = &sdata->wpan_dev;
        memcpy(sdata->name, ndev->name, IFNAMSIZ);
        sdata->dev = ndev;
        sdata->wpan_dev.wpan_phy = local->hw.phy;
        sdata->local = local;
        INIT_LIST_HEAD(&sdata->wpan_dev.list);

        /* setup type-dependent data */
        ret = ieee802154_setup_sdata(sdata, type);
        if (ret)
                goto err;

        ret = register_netdevice(ndev);
        if (ret < 0)
                goto err;

        mutex_lock(&local->iflist_mtx);
        list_add_tail_rcu(&sdata->list, &local->interfaces);
        mutex_unlock(&local->iflist_mtx);

        return ndev;

err:
        free_netdev(ndev);
        return ERR_PTR(ret);
}

void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata)
{
        ASSERT_RTNL();

        mutex_lock(&sdata->local->iflist_mtx);
        list_del_rcu(&sdata->list);
        mutex_unlock(&sdata->local->iflist_mtx);

        synchronize_rcu();
        unregister_netdevice(sdata->dev);
}

void ieee802154_remove_interfaces(struct ieee802154_local *local)
{
        struct ieee802154_sub_if_data *sdata, *tmp;

        mutex_lock(&local->iflist_mtx);
        list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
                list_del(&sdata->list);

                unregister_netdevice(sdata->dev);
        }
        mutex_unlock(&local->iflist_mtx);
}

static int netdev_notify(struct notifier_block *nb,
                         unsigned long state, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct ieee802154_sub_if_data *sdata;

        if (state != NETDEV_CHANGENAME)
                return NOTIFY_DONE;

        if (!dev->ieee802154_ptr || !dev->ieee802154_ptr->wpan_phy)
                return NOTIFY_DONE;

        if (dev->ieee802154_ptr->wpan_phy->privid != mac802154_wpan_phy_privid)
                return NOTIFY_DONE;

        sdata = IEEE802154_DEV_TO_SUB_IF(dev);
        memcpy(sdata->name, dev->name, IFNAMSIZ);

        return NOTIFY_OK;
}

static struct notifier_block mac802154_netdev_notifier = {
        .notifier_call = netdev_notify,
};

int ieee802154_iface_init(void)
{
        return register_netdevice_notifier(&mac802154_netdev_notifier);
}

void ieee802154_iface_exit(void)
{
        unregister_netdevice_notifier(&mac802154_netdev_notifier);
}































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vsyscall

#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define __VSYSCALL_TRACE_H

#include <linux/tracepoint.h>

TRACE_EVENT(emulate_vsyscall,

            TP_PROTO(int nr),

            TP_ARGS(nr),

            TP_STRUCT__entry(__field(int, nr)),

            TP_fast_assign(
                           __entry->nr = nr;
                           ),

            TP_printk("nr = %d", __entry->nr)
);

#endif

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/
#define TRACE_INCLUDE_FILE vsyscall_trace
#include <trace/define_trace.h>























































































































































































































































    2 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#undef TRACE_SYSTEM
#define TRACE_SYSTEM neigh

#if !defined(_TRACE_NEIGH_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NEIGH_H

#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
#include <net/neighbour.h>

#define neigh_state_str(state)                                \
        __print_symbolic(state,                                \
                { NUD_INCOMPLETE, "incomplete" },        \
                { NUD_REACHABLE, "reachable" },                \
                { NUD_STALE, "stale" },                        \
                { NUD_DELAY, "delay" },                        \
                { NUD_PROBE, "probe" },                        \
                { NUD_FAILED, "failed" },                \
                { NUD_NOARP, "noarp" },                        \
                { NUD_PERMANENT, "permanent"})

TRACE_EVENT(neigh_create,

        TP_PROTO(struct neigh_table *tbl, struct net_device *dev,
                 const void *pkey, const struct neighbour *n,
                 bool exempt_from_gc),

        TP_ARGS(tbl, dev, pkey, n, exempt_from_gc),

        TP_STRUCT__entry(
                __field(u32, family)
                __string(dev, dev ? dev->name : "NULL")
                __field(int, entries)
                __field(u8, created)
                __field(u8, gc_exempt)
                __array(u8, primary_key4, 4)
                __array(u8, primary_key6, 16)
        ),

        TP_fast_assign(
                __be32 *p32;

                __entry->family = tbl->family;
                __assign_str(dev, (dev ? dev->name : "NULL"));
                __entry->entries = atomic_read(&tbl->gc_entries);
                __entry->created = n != NULL;
                __entry->gc_exempt = exempt_from_gc;
                p32 = (__be32 *)__entry->primary_key4;

                if (tbl->family == AF_INET)
                        *p32 = *(__be32 *)pkey;
                else
                        *p32 = 0;

#if IS_ENABLED(CONFIG_IPV6)
                if (tbl->family == AF_INET6) {
                        struct in6_addr *pin6;

                        pin6 = (struct in6_addr *)__entry->primary_key6;
                        *pin6 = *(struct in6_addr *)pkey;
                }
#endif
        ),

        TP_printk("family %d dev %s entries %d primary_key4 %pI4 primary_key6 %pI6c created %d gc_exempt %d",
                  __entry->family, __get_str(dev), __entry->entries,
                  __entry->primary_key4, __entry->primary_key6,
                  __entry->created, __entry->gc_exempt)
);

TRACE_EVENT(neigh_update,

        TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new,
                 u32 flags, u32 nlmsg_pid),

        TP_ARGS(n, lladdr, new, flags, nlmsg_pid),

        TP_STRUCT__entry(
                __field(u32, family)
                __string(dev, (n->dev ? n->dev->name : "NULL"))
                __array(u8, lladdr, MAX_ADDR_LEN)
                __field(u8, lladdr_len)
                __field(u8, flags)
                __field(u8, nud_state)
                __field(u8, type)
                __field(u8, dead)
                __field(int, refcnt)
                __array(__u8, primary_key4, 4)
                __array(__u8, primary_key6, 16)
                __field(unsigned long, confirmed)
                __field(unsigned long, updated)
                __field(unsigned long, used)
                __array(u8, new_lladdr, MAX_ADDR_LEN)
                __field(u8, new_state)
                __field(u32, update_flags)
                __field(u32, pid)
        ),

        TP_fast_assign(
                int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN);
                struct in6_addr *pin6;
                __be32 *p32;

                __entry->family = n->tbl->family;
                __assign_str(dev, (n->dev ? n->dev->name : "NULL"));
                __entry->lladdr_len = lladdr_len;
                memcpy(__entry->lladdr, n->ha, lladdr_len);
                __entry->flags = n->flags;
                __entry->nud_state = n->nud_state;
                __entry->type = n->type;
                __entry->dead = n->dead;
                __entry->refcnt = refcount_read(&n->refcnt);
                pin6 = (struct in6_addr *)__entry->primary_key6;
                p32 = (__be32 *)__entry->primary_key4;

                if (n->tbl->family == AF_INET)
                        *p32 = *(__be32 *)n->primary_key;
                else
                        *p32 = 0;

#if IS_ENABLED(CONFIG_IPV6)
                if (n->tbl->family == AF_INET6) {
                        pin6 = (struct in6_addr *)__entry->primary_key6;
                        *pin6 = *(struct in6_addr *)n->primary_key;
                } else
#endif
                {
                        ipv6_addr_set_v4mapped(*p32, pin6);
                }
                __entry->confirmed = n->confirmed;
                __entry->updated = n->updated;
                __entry->used = n->used;
                if (lladdr)
                        memcpy(__entry->new_lladdr, lladdr, lladdr_len);
                __entry->new_state = new;
                __entry->update_flags = flags;
                __entry->pid = nlmsg_pid;
        ),

        TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x "
                  "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c "
                  "confirmed %lu updated %lu used %lu new_lladdr %s "
                  "new_state %s update_flags %02x pid %d",
                  __entry->family, __get_str(dev),
                  __print_hex_str(__entry->lladdr, __entry->lladdr_len),
                  __entry->flags, neigh_state_str(__entry->nud_state),
                  __entry->type, __entry->dead, __entry->refcnt,
                  __entry->primary_key4, __entry->primary_key6,
                  __entry->confirmed, __entry->updated, __entry->used,
                  __print_hex_str(__entry->new_lladdr, __entry->lladdr_len),
                  neigh_state_str(__entry->new_state),
                  __entry->update_flags, __entry->pid)
);

DECLARE_EVENT_CLASS(neigh__update,
        TP_PROTO(struct neighbour *n, int err),
        TP_ARGS(n, err),
        TP_STRUCT__entry(
                __field(u32, family)
                __string(dev, (n->dev ? n->dev->name : "NULL"))
                __array(u8, lladdr, MAX_ADDR_LEN)
                __field(u8, lladdr_len)
                __field(u8, flags)
                __field(u8, nud_state)
                __field(u8, type)
                __field(u8, dead)
                __field(int, refcnt)
                __array(__u8, primary_key4, 4)
                __array(__u8, primary_key6, 16)
                __field(unsigned long, confirmed)
                __field(unsigned long, updated)
                __field(unsigned long, used)
                __field(u32, err)
        ),

        TP_fast_assign(
                int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN);
                struct in6_addr *pin6;
                __be32 *p32;

                __entry->family = n->tbl->family;
                __assign_str(dev, (n->dev ? n->dev->name : "NULL"));
                __entry->lladdr_len = lladdr_len;
                memcpy(__entry->lladdr, n->ha, lladdr_len);
                __entry->flags = n->flags;
                __entry->nud_state = n->nud_state;
                __entry->type = n->type;
                __entry->dead = n->dead;
                __entry->refcnt = refcount_read(&n->refcnt);
                pin6 = (struct in6_addr *)__entry->primary_key6;
                p32 = (__be32 *)__entry->primary_key4;

                if (n->tbl->family == AF_INET)
                        *p32 = *(__be32 *)n->primary_key;
                else
                        *p32 = 0;

#if IS_ENABLED(CONFIG_IPV6)
                if (n->tbl->family == AF_INET6) {
                        pin6 = (struct in6_addr *)__entry->primary_key6;
                        *pin6 = *(struct in6_addr *)n->primary_key;
                } else
#endif
                {
                        ipv6_addr_set_v4mapped(*p32, pin6);
                }

                __entry->confirmed = n->confirmed;
                __entry->updated = n->updated;
                __entry->used = n->used;
                __entry->err = err;
        ),

        TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x "
                  "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c "
                  "confirmed %lu updated %lu used %lu err %d",
                  __entry->family, __get_str(dev),
                  __print_hex_str(__entry->lladdr, __entry->lladdr_len),
                  __entry->flags, neigh_state_str(__entry->nud_state),
                  __entry->type, __entry->dead, __entry->refcnt,
                  __entry->primary_key4, __entry->primary_key6,
                  __entry->confirmed, __entry->updated, __entry->used,
                  __entry->err)
);

DEFINE_EVENT(neigh__update, neigh_update_done,
        TP_PROTO(struct neighbour *neigh, int err),
        TP_ARGS(neigh, err)
);

DEFINE_EVENT(neigh__update, neigh_timer_handler,
        TP_PROTO(struct neighbour *neigh, int err),
        TP_ARGS(neigh, err)
);

DEFINE_EVENT(neigh__update, neigh_event_send_done,
        TP_PROTO(struct neighbour *neigh, int err),
        TP_ARGS(neigh, err)
);

DEFINE_EVENT(neigh__update, neigh_event_send_dead,
        TP_PROTO(struct neighbour *neigh, int err),
        TP_ARGS(neigh, err)
);

DEFINE_EVENT(neigh__update, neigh_cleanup_and_release,
        TP_PROTO(struct neighbour *neigh, int rc),
        TP_ARGS(neigh, rc)
);

#endif /* _TRACE_NEIGH_H */

/* This part must be outside protection */
#include <trace/define_trace.h>






































































































































   53 











































   90 















































   17 
    5 





   90 
   90 































   24 



















































   89 






















































































































   48 













    1 

































   47 































































































































































































































































   97 






   97 

















   96 

   97 







































































































































































































    9 
























































   90 




   12 


























  108 
















    1 




  109 











   20 





   24 






    4 









  107 















   20 



















































































































































    4 
    9 

















































































































































































































































































































































   90 



   90 







































































  100 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PGTABLE_H
#define _ASM_X86_PGTABLE_H

#include <linux/mem_encrypt.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>

/*
 * Macro to mark a page protection value as UC-
 */
#define pgprot_noncached(prot)                                                \
        ((boot_cpu_data.x86 > 3)                                        \
         ? (__pgprot(pgprot_val(prot) |                                        \
                     cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS)))        \
         : (prot))

#ifndef __ASSEMBLY__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
#include <asm/coco.h>
#include <asm-generic/pgtable_uffd.h>
#include <linux/page_table_check.h>

extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);

struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
                                   bool user);
bool ptdump_walk_pgd_level_checkwx(void);
#define ptdump_check_wx ptdump_walk_pgd_level_checkwx
void ptdump_walk_user_pgd_level_checkwx(void);

/*
 * Macros to add or remove encryption attribute
 */
#define pgprot_encrypted(prot)        __pgprot(cc_mkenc(pgprot_val(prot)))
#define pgprot_decrypted(prot)        __pgprot(cc_mkdec(pgprot_val(prot)))

#ifdef CONFIG_DEBUG_WX
#define debug_checkwx_user()        ptdump_walk_user_pgd_level_checkwx()
#else
#define debug_checkwx_user()        do { } while (0)
#endif

/*
 * ZERO_PAGE is a global shared page that is always zero: used
 * for zero-mapped memory areas etc..
 */
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
        __visible;
#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))

extern spinlock_t pgd_lock;
extern struct list_head pgd_list;

extern struct mm_struct *pgd_page_get_mm(struct page *page);

extern pmdval_t early_pmd_flags;

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else  /* !CONFIG_PARAVIRT_XXL */
#define set_pte(ptep, pte)                native_set_pte(ptep, pte)

#define set_pte_atomic(ptep, pte)                                        \
        native_set_pte_atomic(ptep, pte)

#define set_pmd(pmdp, pmd)                native_set_pmd(pmdp, pmd)

#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd)                native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd)                        (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
#endif

#ifndef set_p4d
# define set_p4d(p4dp, p4d)                native_set_p4d(p4dp, p4d)
#endif

#ifndef __PAGETABLE_PUD_FOLDED
#define p4d_clear(p4d)                        native_p4d_clear(p4d)
#endif

#ifndef set_pud
# define set_pud(pudp, pud)                native_set_pud(pudp, pud)
#endif

#ifndef __PAGETABLE_PUD_FOLDED
#define pud_clear(pud)                        native_pud_clear(pud)
#endif

#define pte_clear(mm, addr, ptep)        native_pte_clear(mm, addr, ptep)
#define pmd_clear(pmd)                        native_pmd_clear(pmd)

#define pgd_val(x)        native_pgd_val(x)
#define __pgd(x)        native_make_pgd(x)

#ifndef __PAGETABLE_P4D_FOLDED
#define p4d_val(x)        native_p4d_val(x)
#define __p4d(x)        native_make_p4d(x)
#endif

#ifndef __PAGETABLE_PUD_FOLDED
#define pud_val(x)        native_pud_val(x)
#define __pud(x)        native_make_pud(x)
#endif

#ifndef __PAGETABLE_PMD_FOLDED
#define pmd_val(x)        native_pmd_val(x)
#define __pmd(x)        native_make_pmd(x)
#endif

#define pte_val(x)        native_pte_val(x)
#define __pte(x)        native_make_pte(x)

#define arch_end_context_switch(prev)        do {} while(0)
#endif        /* CONFIG_PARAVIRT_XXL */

/*
 * The following only work if pte_present() is true.
 * Undefined behaviour if not..
 */
static inline bool pte_dirty(pte_t pte)
{
        return pte_flags(pte) & _PAGE_DIRTY_BITS;
}

static inline bool pte_shstk(pte_t pte)
{
        return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
               (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY;
}

static inline int pte_young(pte_t pte)
{
        return pte_flags(pte) & _PAGE_ACCESSED;
}

#define pmd_dirty pmd_dirty
static inline bool pmd_dirty(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
}

static inline bool pmd_shstk(pmd_t pmd)
{
        return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
               (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
               (_PAGE_DIRTY | _PAGE_PSE);
}

#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_ACCESSED;
}

static inline bool pud_dirty(pud_t pud)
{
        return pud_flags(pud) & _PAGE_DIRTY_BITS;
}

static inline int pud_young(pud_t pud)
{
        return pud_flags(pud) & _PAGE_ACCESSED;
}

static inline int pte_write(pte_t pte)
{
        /*
         * Shadow stack pages are logically writable, but do not have
         * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
         */
        return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
}

#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
        /*
         * Shadow stack pages are logically writable, but do not have
         * _PAGE_RW.  Check for them separately from _PAGE_RW itself.
         */
        return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
}

#define pud_write pud_write
static inline int pud_write(pud_t pud)
{
        return pud_flags(pud) & _PAGE_RW;
}

static inline int pte_huge(pte_t pte)
{
        return pte_flags(pte) & _PAGE_PSE;
}

static inline int pte_global(pte_t pte)
{
        return pte_flags(pte) & _PAGE_GLOBAL;
}

static inline int pte_exec(pte_t pte)
{
        return !(pte_flags(pte) & _PAGE_NX);
}

static inline int pte_special(pte_t pte)
{
        return pte_flags(pte) & _PAGE_SPECIAL;
}

/* Entries that were set to PROT_NONE are inverted */

static inline u64 protnone_mask(u64 val);

#define PFN_PTE_SHIFT        PAGE_SHIFT

static inline unsigned long pte_pfn(pte_t pte)
{
        phys_addr_t pfn = pte_val(pte);
        pfn ^= protnone_mask(pfn);
        return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
}

static inline unsigned long pmd_pfn(pmd_t pmd)
{
        phys_addr_t pfn = pmd_val(pmd);
        pfn ^= protnone_mask(pfn);
        return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}

static inline unsigned long pud_pfn(pud_t pud)
{
        phys_addr_t pfn = pud_val(pud);
        pfn ^= protnone_mask(pfn);
        return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}

static inline unsigned long p4d_pfn(p4d_t p4d)
{
        return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
}

static inline unsigned long pgd_pfn(pgd_t pgd)
{
        return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}

#define p4d_leaf p4d_leaf
static inline bool p4d_leaf(p4d_t p4d)
{
        /* No 512 GiB pages yet */
        return 0;
}

#define pte_page(pte)        pfn_to_page(pte_pfn(pte))

#define pmd_leaf pmd_leaf
static inline bool pmd_leaf(pmd_t pte)
{
        return pmd_flags(pte) & _PAGE_PSE;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */
static inline int pmd_trans_huge(pmd_t pmd)
{
        return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_trans_huge(pud_t pud)
{
        return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
#endif

#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
        return boot_cpu_has(X86_FEATURE_PSE);
}

#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pmd_devmap(pmd_t pmd)
{
        return !!(pmd_val(pmd) & _PAGE_DEVMAP);
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_devmap(pud_t pud)
{
        return !!(pud_val(pud) & _PAGE_DEVMAP);
}
#else
static inline int pud_devmap(pud_t pud)
{
        return 0;
}
#endif

static inline int pgd_devmap(pgd_t pgd)
{
        return 0;
}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
{
        pteval_t v = native_pte_val(pte);

        return native_make_pte(v | set);
}

static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
{
        pteval_t v = native_pte_val(pte);

        return native_make_pte(v & ~clear);
}

/*
 * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the
 * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So
 * when creating dirty, write-protected memory, a software bit is used:
 * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the
 * Dirty bit to SavedDirty, and vice-vesra.
 *
 * This shifting is only done if needed. In the case of shifting
 * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of
 * shifting SavedDirty->Dirty, the condition is Write=1.
 */
static inline pgprotval_t mksaveddirty_shift(pgprotval_t v)
{
        pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;

        v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
        v &= ~(cond << _PAGE_BIT_DIRTY);

        return v;
}

static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v)
{
        pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1;

        v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY;
        v &= ~(cond << _PAGE_BIT_SAVED_DIRTY);

        return v;
}

static inline pte_t pte_mksaveddirty(pte_t pte)
{
        pteval_t v = native_pte_val(pte);

        v = mksaveddirty_shift(v);
        return native_make_pte(v);
}

static inline pte_t pte_clear_saveddirty(pte_t pte)
{
        pteval_t v = native_pte_val(pte);

        v = clear_saveddirty_shift(v);
        return native_make_pte(v);
}

static inline pte_t pte_wrprotect(pte_t pte)
{
        pte = pte_clear_flags(pte, _PAGE_RW);

        /*
         * Blindly clearing _PAGE_RW might accidentally create
         * a shadow stack PTE (Write=0,Dirty=1). Move the hardware
         * dirty value to the software bit, if present.
         */
        return pte_mksaveddirty(pte);
}

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
        bool wp = pte_flags(pte) & _PAGE_UFFD_WP;

#ifdef CONFIG_DEBUG_VM
        /*
         * Having write bit for wr-protect-marked present ptes is fatal,
         * because it means the uffd-wp bit will be ignored and write will
         * just go through.
         *
         * Use any chance of pgtable walking to verify this (e.g., when
         * page swapped out or being migrated for all purposes). It means
         * something is already wrong.  Tell the admin even before the
         * process crashes. We also nail it with wrong pgtable setup.
         */
        WARN_ON_ONCE(wp && pte_write(pte));
#endif

        return wp;
}

static inline pte_t pte_mkuffd_wp(pte_t pte)
{
        return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP));
}

static inline pte_t pte_clear_uffd_wp(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

static inline pte_t pte_mkclean(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
}

static inline pte_t pte_mkold(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_ACCESSED);
}

static inline pte_t pte_mkexec(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_NX);
}

static inline pte_t pte_mkdirty(pte_t pte)
{
        pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);

        return pte_mksaveddirty(pte);
}

static inline pte_t pte_mkwrite_shstk(pte_t pte)
{
        pte = pte_clear_flags(pte, _PAGE_RW);

        return pte_set_flags(pte, _PAGE_DIRTY);
}

static inline pte_t pte_mkyoung(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_ACCESSED);
}

static inline pte_t pte_mkwrite_novma(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_RW);
}

struct vm_area_struct;
pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma);
#define pte_mkwrite pte_mkwrite

static inline pte_t pte_mkhuge(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_PSE);
}

static inline pte_t pte_clrhuge(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_PSE);
}

static inline pte_t pte_mkglobal(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_GLOBAL);
}

static inline pte_t pte_clrglobal(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_GLOBAL);
}

static inline pte_t pte_mkspecial(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SPECIAL);
}

static inline pte_t pte_mkdevmap(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
}

static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
{
        pmdval_t v = native_pmd_val(pmd);

        return native_make_pmd(v | set);
}

static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
{
        pmdval_t v = native_pmd_val(pmd);

        return native_make_pmd(v & ~clear);
}

/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
        pmdval_t v = native_pmd_val(pmd);

        v = mksaveddirty_shift(v);
        return native_make_pmd(v);
}

/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_clear_saveddirty(pmd_t pmd)
{
        pmdval_t v = native_pmd_val(pmd);

        v = clear_saveddirty_shift(v);
        return native_make_pmd(v);
}

static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
        pmd = pmd_clear_flags(pmd, _PAGE_RW);

        /*
         * Blindly clearing _PAGE_RW might accidentally create
         * a shadow stack PMD (RW=0, Dirty=1). Move the hardware
         * dirty value to the software bit.
         */
        return pmd_mksaveddirty(pmd);
}

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pmd_uffd_wp(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_UFFD_WP;
}

static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
        return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP));
}

static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

static inline pmd_t pmd_mkold(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_ACCESSED);
}

static inline pmd_t pmd_mkclean(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
}

static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
        pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);

        return pmd_mksaveddirty(pmd);
}

static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
{
        pmd = pmd_clear_flags(pmd, _PAGE_RW);

        return pmd_set_flags(pmd, _PAGE_DIRTY);
}

static inline pmd_t pmd_mkdevmap(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_DEVMAP);
}

static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_PSE);
}

static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_ACCESSED);
}

static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_RW);
}

pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite

static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
        pudval_t v = native_pud_val(pud);

        return native_make_pud(v | set);
}

static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
{
        pudval_t v = native_pud_val(pud);

        return native_make_pud(v & ~clear);
}

/* See comments above mksaveddirty_shift() */
static inline pud_t pud_mksaveddirty(pud_t pud)
{
        pudval_t v = native_pud_val(pud);

        v = mksaveddirty_shift(v);
        return native_make_pud(v);
}

/* See comments above mksaveddirty_shift() */
static inline pud_t pud_clear_saveddirty(pud_t pud)
{
        pudval_t v = native_pud_val(pud);

        v = clear_saveddirty_shift(v);
        return native_make_pud(v);
}

static inline pud_t pud_mkold(pud_t pud)
{
        return pud_clear_flags(pud, _PAGE_ACCESSED);
}

static inline pud_t pud_mkclean(pud_t pud)
{
        return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
}

static inline pud_t pud_wrprotect(pud_t pud)
{
        pud = pud_clear_flags(pud, _PAGE_RW);

        /*
         * Blindly clearing _PAGE_RW might accidentally create
         * a shadow stack PUD (RW=0, Dirty=1). Move the hardware
         * dirty value to the software bit.
         */
        return pud_mksaveddirty(pud);
}

static inline pud_t pud_mkdirty(pud_t pud)
{
        pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);

        return pud_mksaveddirty(pud);
}

static inline pud_t pud_mkdevmap(pud_t pud)
{
        return pud_set_flags(pud, _PAGE_DEVMAP);
}

static inline pud_t pud_mkhuge(pud_t pud)
{
        return pud_set_flags(pud, _PAGE_PSE);
}

static inline pud_t pud_mkyoung(pud_t pud)
{
        return pud_set_flags(pud, _PAGE_ACCESSED);
}

static inline pud_t pud_mkwrite(pud_t pud)
{
        pud = pud_set_flags(pud, _PAGE_RW);

        return pud_clear_saveddirty(pud);
}

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
        return pte_flags(pte) & _PAGE_SOFT_DIRTY;
}

static inline int pmd_soft_dirty(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
}

static inline int pud_soft_dirty(pud_t pud)
{
        return pud_flags(pud) & _PAGE_SOFT_DIRTY;
}

static inline pte_t pte_mksoft_dirty(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
}

static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}

static inline pud_t pud_mksoft_dirty(pud_t pud)
{
        return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
}

static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
}

static inline pud_t pud_clear_soft_dirty(pud_t pud)
{
        return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
}

#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

/*
 * Mask out unsupported bits in a present pgprot.  Non-present pgprots
 * can use those bits for other purposes, so leave them be.
 */
static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
{
        pgprotval_t protval = pgprot_val(pgprot);

        if (protval & _PAGE_PRESENT)
                protval &= __supported_pte_mask;

        return protval;
}

static inline pgprotval_t check_pgprot(pgprot_t pgprot)
{
        pgprotval_t massaged_val = massage_pgprot(pgprot);

        /* mmdebug.h can not be included here because of dependencies */
#ifdef CONFIG_DEBUG_VM
        WARN_ONCE(pgprot_val(pgprot) != massaged_val,
                  "attempted to set unsupported pgprot: %016llx "
                  "bits: %016llx supported: %016llx\n",
                  (u64)pgprot_val(pgprot),
                  (u64)pgprot_val(pgprot) ^ massaged_val,
                  (u64)__supported_pte_mask);
#endif

        return massaged_val;
}

static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
        pfn ^= protnone_mask(pgprot_val(pgprot));
        pfn &= PTE_PFN_MASK;
        return __pte(pfn | check_pgprot(pgprot));
}

static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
        pfn ^= protnone_mask(pgprot_val(pgprot));
        pfn &= PHYSICAL_PMD_PAGE_MASK;
        return __pmd(pfn | check_pgprot(pgprot));
}

static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
{
        phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
        pfn ^= protnone_mask(pgprot_val(pgprot));
        pfn &= PHYSICAL_PUD_PAGE_MASK;
        return __pud(pfn | check_pgprot(pgprot));
}

static inline pmd_t pmd_mkinvalid(pmd_t pmd)
{
        return pfn_pmd(pmd_pfn(pmd),
                      __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}

static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);

static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
        pteval_t val = pte_val(pte), oldval = val;
        pte_t pte_result;

        /*
         * Chop off the NX bit (if present), and add the NX portion of
         * the newprot (if present):
         */
        val &= _PAGE_CHG_MASK;
        val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
        val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);

        pte_result = __pte(val);

        /*
         * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid:
         *  1. Marking Write=0 PTEs Dirty=1
         *  2. Marking Dirty=1 PTEs Write=0
         *
         * The first case cannot happen because the _PAGE_CHG_MASK will filter
         * out any Dirty bit passed in newprot. Handle the second case by
         * going through the mksaveddirty exercise. Only do this if the old
         * value was Write=1 to avoid doing this on Shadow Stack PTEs.
         */
        if (oldval & _PAGE_RW)
                pte_result = pte_mksaveddirty(pte_result);
        else
                pte_result = pte_clear_saveddirty(pte_result);

        return pte_result;
}

static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
        pmdval_t val = pmd_val(pmd), oldval = val;
        pmd_t pmd_result;

        val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
        val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
        val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);

        pmd_result = __pmd(val);

        /*
         * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
         *  1. Marking Write=0 PMDs Dirty=1
         *  2. Marking Dirty=1 PMDs Write=0
         *
         * The first case cannot happen because the _PAGE_CHG_MASK will filter
         * out any Dirty bit passed in newprot. Handle the second case by
         * going through the mksaveddirty exercise. Only do this if the old
         * value was Write=1 to avoid doing this on Shadow Stack PTEs.
         */
        if (oldval & _PAGE_RW)
                pmd_result = pmd_mksaveddirty(pmd_result);
        else
                pmd_result = pmd_clear_saveddirty(pmd_result);

        return pmd_result;
}

/*
 * mprotect needs to preserve PAT and encryption bits when updating
 * vm_page_prot
 */
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
        pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
        pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK;
        return __pgprot(preservebits | addbits);
}

#define pte_pgprot(x) __pgprot(pte_flags(x))
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
#define pud_pgprot(x) __pgprot(pud_flags(x))
#define p4d_pgprot(x) __pgprot(p4d_flags(x))

#define canon_pgprot(p) __pgprot(massage_pgprot(p))

static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
                                         enum page_cache_mode pcm,
                                         enum page_cache_mode new_pcm)
{
        /*
         * PAT type is always WB for untracked ranges, so no need to check.
         */
        if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
                return 1;

        /*
         * Certain new memtypes are not allowed with certain
         * requested memtype:
         * - request is uncached, return cannot be write-back
         * - request is write-combine, return cannot be write-back
         * - request is write-through, return cannot be write-back
         * - request is write-through, return cannot be write-combine
         */
        if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
             new_pcm == _PAGE_CACHE_MODE_WB) ||
            (pcm == _PAGE_CACHE_MODE_WC &&
             new_pcm == _PAGE_CACHE_MODE_WB) ||
            (pcm == _PAGE_CACHE_MODE_WT &&
             new_pcm == _PAGE_CACHE_MODE_WB) ||
            (pcm == _PAGE_CACHE_MODE_WT &&
             new_pcm == _PAGE_CACHE_MODE_WC)) {
                return 0;
        }

        return 1;
}

pmd_t *populate_extra_pmd(unsigned long vaddr);
pte_t *populate_extra_pte(unsigned long vaddr);

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd);

/*
 * Take a PGD location (pgdp) and a pgd value that needs to be set there.
 * Populates the user and returns the resulting PGD that must be set in
 * the kernel copy of the page tables.
 */
static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
        if (!static_cpu_has(X86_FEATURE_PTI))
                return pgd;
        return __pti_set_user_pgtbl(pgdp, pgd);
}
#else   /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{
        return pgd;
}
#endif  /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */

#endif        /* __ASSEMBLY__ */


#ifdef CONFIG_X86_32
# include <asm/pgtable_32.h>
#else
# include <asm/pgtable_64.h>
#endif

#ifndef __ASSEMBLY__
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
#include <asm/fixmap.h>

static inline int pte_none(pte_t pte)
{
        return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
}

#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t a, pte_t b)
{
        return a.pte == b.pte;
}

static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
{
        if (__pte_needs_invert(pte_val(pte)))
                return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT));
        return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
#define pte_advance_pfn        pte_advance_pfn

static inline int pte_present(pte_t a)
{
        return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}

#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pte_devmap(pte_t a)
{
        return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
}
#endif

#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
        if (pte_flags(a) & _PAGE_PRESENT)
                return true;

        if ((pte_flags(a) & _PAGE_PROTNONE) &&
                        atomic_read(&mm->tlb_flush_pending))
                return true;

        return false;
}

static inline int pmd_present(pmd_t pmd)
{
        /*
         * Checking for _PAGE_PSE is needed too because
         * split_huge_page will temporarily clear the present bit (but
         * the _PAGE_PSE flag will remain set at all times while the
         * _PAGE_PRESENT bit is clear).
         */
        return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}

#ifdef CONFIG_NUMA_BALANCING
/*
 * These work without NUMA balancing but the kernel does not care. See the
 * comment in include/linux/pgtable.h
 */
static inline int pte_protnone(pte_t pte)
{
        return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
                == _PAGE_PROTNONE;
}

static inline int pmd_protnone(pmd_t pmd)
{
        return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
                == _PAGE_PROTNONE;
}
#endif /* CONFIG_NUMA_BALANCING */

static inline int pmd_none(pmd_t pmd)
{
        /* Only check low word on 32-bit platforms, since it might be
           out of sync with upper half. */
        unsigned long val = native_pmd_val(pmd);
        return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
}

static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
        return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
}

/*
 * Currently stuck as a macro due to indirect forward reference to
 * linux/mmzone.h's __section_mem_map_addr() definition:
 */
#define pmd_page(pmd)        pfn_to_page(pmd_pfn(pmd))

/*
 * Conversion functions: convert a page and protection to a page entry,
 * and a page entry and page directory to the page they refer to.
 *
 * (Currently stuck as a macro because of indirect forward reference
 * to linux/mm.h:page_to_nid())
 */
#define mk_pte(page, pgprot)                                                  \
({                                                                          \
        pgprot_t __pgprot = pgprot;                                          \
                                                                          \
        WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \
                    _PAGE_DIRTY);                                          \
        pfn_pte(page_to_pfn(page), __pgprot);                                  \
})

static inline int pmd_bad(pmd_t pmd)
{
        return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
               (_KERNPG_TABLE & ~_PAGE_ACCESSED);
}

static inline unsigned long pages_to_mb(unsigned long npg)
{
        return npg >> (20 - PAGE_SHIFT);
}

#if CONFIG_PGTABLE_LEVELS > 2
static inline int pud_none(pud_t pud)
{
        return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
}

static inline int pud_present(pud_t pud)
{
        return pud_flags(pud) & _PAGE_PRESENT;
}

static inline pmd_t *pud_pgtable(pud_t pud)
{
        return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud));
}

/*
 * Currently stuck as a macro due to indirect forward reference to
 * linux/mmzone.h's __section_mem_map_addr() definition:
 */
#define pud_page(pud)        pfn_to_page(pud_pfn(pud))

#define pud_leaf pud_leaf
static inline bool pud_leaf(pud_t pud)
{
        return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
                (_PAGE_PSE | _PAGE_PRESENT);
}

static inline int pud_bad(pud_t pud)
{
        return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
}
#endif        /* CONFIG_PGTABLE_LEVELS > 2 */

#if CONFIG_PGTABLE_LEVELS > 3
static inline int p4d_none(p4d_t p4d)
{
        return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
}

static inline int p4d_present(p4d_t p4d)
{
        return p4d_flags(p4d) & _PAGE_PRESENT;
}

static inline pud_t *p4d_pgtable(p4d_t p4d)
{
        return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
}

/*
 * Currently stuck as a macro due to indirect forward reference to
 * linux/mmzone.h's __section_mem_map_addr() definition:
 */
#define p4d_page(p4d)        pfn_to_page(p4d_pfn(p4d))

static inline int p4d_bad(p4d_t p4d)
{
        unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;

        if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
                ignore_flags |= _PAGE_NX;

        return (p4d_flags(p4d) & ~ignore_flags) != 0;
}
#endif  /* CONFIG_PGTABLE_LEVELS > 3 */

static inline unsigned long p4d_index(unsigned long address)
{
        return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
}

#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
        if (!pgtable_l5_enabled())
                return 1;
        return pgd_flags(pgd) & _PAGE_PRESENT;
}

static inline unsigned long pgd_page_vaddr(pgd_t pgd)
{
        return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
}

/*
 * Currently stuck as a macro due to indirect forward reference to
 * linux/mmzone.h's __section_mem_map_addr() definition:
 */
#define pgd_page(pgd)        pfn_to_page(pgd_pfn(pgd))

/* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
        if (!pgtable_l5_enabled())
                return (p4d_t *)pgd;
        return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}

static inline int pgd_bad(pgd_t pgd)
{
        unsigned long ignore_flags = _PAGE_USER;

        if (!pgtable_l5_enabled())
                return 0;

        if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION))
                ignore_flags |= _PAGE_NX;

        return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}

static inline int pgd_none(pgd_t pgd)
{
        if (!pgtable_l5_enabled())
                return 0;
        /*
         * There is no need to do a workaround for the KNL stray
         * A/D bit erratum here.  PGDs only point to page tables
         * except on 32-bit non-PAE which is not supported on
         * KNL.
         */
        return !native_pgd_val(pgd);
}
#endif        /* CONFIG_PGTABLE_LEVELS > 4 */

#endif        /* __ASSEMBLY__ */

#define KERNEL_PGD_BOUNDARY        pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS                (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)

#ifndef __ASSEMBLY__

extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
extern void memblock_find_dma_reserve(void);
void __init poking_init(void);
unsigned long init_memory_mapping(unsigned long start,
                                  unsigned long end, pgprot_t prot);

#ifdef CONFIG_X86_64
extern pgd_t trampoline_pgd_entry;
#endif

/* local pte updates need not use xchg for locking */
static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
{
        pte_t res = *ptep;

        /* Pure native function needs no input for mm, addr */
        native_pte_clear(NULL, 0, ptep);
        return res;
}

static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
{
        pmd_t res = *pmdp;

        native_pmd_clear(pmdp);
        return res;
}

static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
{
        pud_t res = *pudp;

        native_pud_clear(pudp);
        return res;
}

static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                              pmd_t *pmdp, pmd_t pmd)
{
        page_table_check_pmd_set(mm, pmdp, pmd);
        set_pmd(pmdp, pmd);
}

static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
                              pud_t *pudp, pud_t pud)
{
        page_table_check_pud_set(mm, pudp, pud);
        native_set_pud(pudp, pud);
}

/*
 * We only update the dirty/accessed state if we set
 * the dirty bit by hand in the kernel, since the hardware
 * will do the accessed bit for us, and we don't want to
 * race with other CPU's that might be updating the dirty
 * bit at the same time.
 */
struct vm_area_struct;

#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pte_t *ptep,
                                 pte_t entry, int dirty);

#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
                                     unsigned long addr, pte_t *ptep);

#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
extern int ptep_clear_flush_young(struct vm_area_struct *vma,
                                  unsigned long address, pte_t *ptep);

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
                                       pte_t *ptep)
{
        pte_t pte = native_ptep_get_and_clear(ptep);
        page_table_check_pte_clear(mm, pte);
        return pte;
}

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
                                            unsigned long addr, pte_t *ptep,
                                            int full)
{
        pte_t pte;
        if (full) {
                /*
                 * Full address destruction in progress; paravirt does not
                 * care about updates and native needs no locking
                 */
                pte = native_local_ptep_get_and_clear(ptep);
                page_table_check_pte_clear(mm, pte);
        } else {
                pte = ptep_get_and_clear(mm, addr, ptep);
        }
        return pte;
}

#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pte_t *ptep)
{
        /*
         * Avoid accidentally creating shadow stack PTEs
         * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
         * the hardware setting Dirty=1.
         */
        pte_t old_pte, new_pte;

        old_pte = READ_ONCE(*ptep);
        do {
                new_pte = pte_wrprotect(old_pte);
        } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte));
}

#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)

#define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))

#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pmd_t *pmdp,
                                 pmd_t entry, int dirty);
extern int pudp_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pud_t *pudp,
                                 pud_t entry, int dirty);

#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                                     unsigned long addr, pmd_t *pmdp);
extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
                                     unsigned long addr, pud_t *pudp);

#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
                                  unsigned long address, pmd_t *pmdp);


#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
                                       pmd_t *pmdp)
{
        pmd_t pmd = native_pmdp_get_and_clear(pmdp);

        page_table_check_pmd_clear(mm, pmd);

        return pmd;
}

#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
                                        unsigned long addr, pud_t *pudp)
{
        pud_t pud = native_pudp_get_and_clear(pudp);

        page_table_check_pud_clear(mm, pud);

        return pud;
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pmd_t *pmdp)
{
        /*
         * Avoid accidentally creating shadow stack PTEs
         * (Write=0,Dirty=1).  Use cmpxchg() to prevent races with
         * the hardware setting Dirty=1.
         */
        pmd_t old_pmd, new_pmd;

        old_pmd = READ_ONCE(*pmdp);
        do {
                new_pmd = pmd_wrprotect(old_pmd);
        } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd));
}

#ifndef pmdp_establish
#define pmdp_establish pmdp_establish
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
        page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
        if (IS_ENABLED(CONFIG_SMP)) {
                return xchg(pmdp, pmd);
        } else {
                pmd_t old = *pmdp;
                WRITE_ONCE(*pmdp, pmd);
                return old;
        }
}
#endif

#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
                                unsigned long address, pmd_t *pmdp);

/*
 * Page table pages are page-aligned.  The lower half of the top
 * level is used for userspace and the top half for the kernel.
 *
 * Returns true for parts of the PGD that map userspace and
 * false for the parts that map the kernel.
 */
static inline bool pgdp_maps_userspace(void *__ptr)
{
        unsigned long ptr = (unsigned long)__ptr;

        return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
}

#define pgd_leaf        pgd_leaf
static inline bool pgd_leaf(pgd_t pgd) { return false; }

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
 * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages
 * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
 * the user one is in the last 4k.  To switch between them, you
 * just need to flip the 12th bit in their addresses.
 */
#define PTI_PGTABLE_SWITCH_BIT        PAGE_SHIFT

/*
 * This generates better code than the inline assembly in
 * __set_bit().
 */
static inline void *ptr_set_bit(void *ptr, int bit)
{
        unsigned long __ptr = (unsigned long)ptr;

        __ptr |= BIT(bit);
        return (void *)__ptr;
}
static inline void *ptr_clear_bit(void *ptr, int bit)
{
        unsigned long __ptr = (unsigned long)ptr;

        __ptr &= ~BIT(bit);
        return (void *)__ptr;
}

static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
{
        return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}

static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
{
        return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}

static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
{
        return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}

static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
        return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */

/*
 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
 *
 *  dst - pointer to pgd range anywhere on a pgd page
 *  src - ""
 *  count - the number of pgds to copy.
 *
 * dst and src can be on the same page, but the range must not overlap,
 * and must not cross a page boundary.
 */
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
        memcpy(dst, src, count * sizeof(pgd_t));
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
        if (!static_cpu_has(X86_FEATURE_PTI))
                return;
        /* Clone the user space pgd as well */
        memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
               count * sizeof(pgd_t));
#endif
}

#define PTE_SHIFT ilog2(PTRS_PER_PTE)
static inline int page_level_shift(enum pg_level level)
{
        return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT;
}
static inline unsigned long page_level_size(enum pg_level level)
{
        return 1UL << page_level_shift(level);
}
static inline unsigned long page_level_mask(enum pg_level level)
{
        return ~(page_level_size(level) - 1);
}

/*
 * The x86 doesn't have any external MMU info: the kernel page
 * tables contain all the necessary information.
 */
static inline void update_mmu_cache(struct vm_area_struct *vma,
                unsigned long addr, pte_t *ptep)
{
}
static inline void update_mmu_cache_range(struct vm_fault *vmf,
                struct vm_area_struct *vma, unsigned long addr,
                pte_t *ptep, unsigned int nr)
{
}
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
                unsigned long addr, pmd_t *pmd)
{
}
static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
                unsigned long addr, pud_t *pud)
{
}
static inline pte_t pte_swp_mkexclusive(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
}

static inline int pte_swp_exclusive(pte_t pte)
{
        return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
}

static inline pte_t pte_swp_clear_exclusive(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE);
}

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}

static inline int pte_swp_soft_dirty(pte_t pte)
{
        return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}

static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}

#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
}

static inline int pmd_swp_soft_dirty(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
}

static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
}
#endif
#endif

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
        return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
}

static inline int pte_swp_uffd_wp(pte_t pte)
{
        return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
}

static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
        return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
}

static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
{
        return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
}

static inline int pmd_swp_uffd_wp(pmd_t pmd)
{
        return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
}

static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
{
        return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

static inline u16 pte_flags_pkey(unsigned long pte_flags)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        /* ifdef to avoid doing 59-bit shift on 32-bit values */
        return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0;
#else
        return 0;
#endif
}

static inline bool __pkru_allows_pkey(u16 pkey, bool write)
{
        u32 pkru = read_pkru();

        if (!__pkru_allows_read(pkru, pkey))
                return false;
        if (write && !__pkru_allows_write(pkru, pkey))
                return false;

        return true;
}

/*
 * 'pteval' can come from a PTE, PMD or PUD.  We only check
 * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
 * same value on all 3 types.
 */
static inline bool __pte_access_permitted(unsigned long pteval, bool write)
{
        unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;

        /*
         * Write=0,Dirty=1 PTEs are shadow stack, which the kernel
         * shouldn't generally allow access to, but since they
         * are already Write=0, the below logic covers both cases.
         */
        if (write)
                need_pte_bits |= _PAGE_RW;

        if ((pteval & need_pte_bits) != need_pte_bits)
                return 0;

        return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
}

#define pte_access_permitted pte_access_permitted
static inline bool pte_access_permitted(pte_t pte, bool write)
{
        return __pte_access_permitted(pte_val(pte), write);
}

#define pmd_access_permitted pmd_access_permitted
static inline bool pmd_access_permitted(pmd_t pmd, bool write)
{
        return __pte_access_permitted(pmd_val(pmd), write);
}

#define pud_access_permitted pud_access_permitted
static inline bool pud_access_permitted(pud_t pud, bool write)
{
        return __pte_access_permitted(pud_val(pud), write);
}

#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);

static inline bool arch_has_pfn_modify_check(void)
{
        return boot_cpu_has_bug(X86_BUG_L1TF);
}

#define arch_check_zapped_pte arch_check_zapped_pte
void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);

#define arch_check_zapped_pmd arch_check_zapped_pmd
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);

#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
{
        return !cpu_feature_enabled(X86_FEATURE_XENPV);
}
#endif

#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
        return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER);
}

static inline bool pmd_user_accessible_page(pmd_t pmd)
{
        return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER);
}

static inline bool pud_user_accessible_page(pud_t pud)
{
        return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER);
}
#endif

#ifdef CONFIG_X86_SGX
int arch_memory_failure(unsigned long pfn, int flags);
#define arch_memory_failure arch_memory_failure

bool arch_is_platform_page(u64 paddr);
#define arch_is_platform_page arch_is_platform_page
#endif

#endif        /* __ASSEMBLY__ */

#endif /* _ASM_X86_PGTABLE_H */


























  416 




















































  422 







































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_PREEMPT_H
#define __ASM_PREEMPT_H

#include <asm/rmwcc.h>
#include <asm/percpu.h>
#include <asm/current.h>

#include <linux/static_call_types.h>

/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED        0x80000000

/*
 * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
 * that a decrement hitting 0 means we can and should reschedule.
 */
#define PREEMPT_ENABLED        (0 + PREEMPT_NEED_RESCHED)

/*
 * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
 * that think a non-zero value indicates we cannot preempt.
 */
static __always_inline int preempt_count(void)
{
        return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
}

static __always_inline void preempt_count_set(int pc)
{
        int old, new;

        old = raw_cpu_read_4(pcpu_hot.preempt_count);
        do {
                new = (old & PREEMPT_NEED_RESCHED) |
                        (pc & ~PREEMPT_NEED_RESCHED);
        } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new));
}

/*
 * must be macros to avoid header recursion hell
 */
#define init_task_preempt_count(p) do { } while (0)

#define init_idle_preempt_count(p, cpu) do { \
        per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)

/*
 * We fold the NEED_RESCHED bit into the preempt count such that
 * preempt_enable() can decrement and test for needing to reschedule with a
 * single instruction.
 *
 * We invert the actual bit, so that when the decrement hits 0 we know we both
 * need to resched (the bit is cleared) and can resched (no preempt count).
 */

static __always_inline void set_preempt_need_resched(void)
{
        raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
}

static __always_inline void clear_preempt_need_resched(void)
{
        raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
}

static __always_inline bool test_preempt_need_resched(void)
{
        return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
}

/*
 * The various preempt_count add/sub methods
 */

static __always_inline void __preempt_count_add(int val)
{
        raw_cpu_add_4(pcpu_hot.preempt_count, val);
}

static __always_inline void __preempt_count_sub(int val)
{
        raw_cpu_add_4(pcpu_hot.preempt_count, -val);
}

/*
 * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
 * a decrement which hits zero means we have no preempt_count and should
 * reschedule.
 */
static __always_inline bool __preempt_count_dec_and_test(void)
{
        return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
                               __percpu_arg([var]));
}

/*
 * Returns true when we need to resched and can (barring IRQ state).
 */
static __always_inline bool should_resched(int preempt_offset)
{
        return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
}

#ifdef CONFIG_PREEMPTION

extern asmlinkage void preempt_schedule(void);
extern asmlinkage void preempt_schedule_thunk(void);

#define preempt_schedule_dynamic_enabled        preempt_schedule_thunk
#define preempt_schedule_dynamic_disabled        NULL

extern asmlinkage void preempt_schedule_notrace(void);
extern asmlinkage void preempt_schedule_notrace_thunk(void);

#define preempt_schedule_notrace_dynamic_enabled        preempt_schedule_notrace_thunk
#define preempt_schedule_notrace_dynamic_disabled        NULL

#ifdef CONFIG_PREEMPT_DYNAMIC

DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);

#define __preempt_schedule() \
do { \
        __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \
        asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
} while (0)

DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);

#define __preempt_schedule_notrace() \
do { \
        __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \
        asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
} while (0)

#else /* PREEMPT_DYNAMIC */

#define __preempt_schedule() \
        asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT);

#define __preempt_schedule_notrace() \
        asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT);

#endif /* PREEMPT_DYNAMIC */

#endif /* PREEMPTION */

#endif /* __ASM_PREEMPT_H */













































































































































































































































    1 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// SPDX-License-Identifier: GPL-2.0-only
/*
 * LCD Lowlevel Control Abstraction
 *
 * Copyright (C) 2003,2004 Hewlett-Packard Company
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/lcd.h>
#include <linux/notifier.h>
#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/fb.h>
#include <linux/slab.h>

#if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \
                           defined(CONFIG_LCD_CLASS_DEVICE_MODULE))
/* This callback gets called when something important happens inside a
 * framebuffer driver. We're looking if that important event is blanking,
 * and if it is, we're switching lcd power as well ...
 */
static int fb_notifier_callback(struct notifier_block *self,
                                 unsigned long event, void *data)
{
        struct lcd_device *ld;
        struct fb_event *evdata = data;

        ld = container_of(self, struct lcd_device, fb_notif);
        if (!ld->ops)
                return 0;

        mutex_lock(&ld->ops_lock);
        if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) {
                if (event == FB_EVENT_BLANK) {
                        if (ld->ops->set_power)
                                ld->ops->set_power(ld, *(int *)evdata->data);
                } else {
                        if (ld->ops->set_mode)
                                ld->ops->set_mode(ld, evdata->data);
                }
        }
        mutex_unlock(&ld->ops_lock);
        return 0;
}

static int lcd_register_fb(struct lcd_device *ld)
{
        memset(&ld->fb_notif, 0, sizeof(ld->fb_notif));
        ld->fb_notif.notifier_call = fb_notifier_callback;
        return fb_register_client(&ld->fb_notif);
}

static void lcd_unregister_fb(struct lcd_device *ld)
{
        fb_unregister_client(&ld->fb_notif);
}
#else
static int lcd_register_fb(struct lcd_device *ld)
{
        return 0;
}

static inline void lcd_unregister_fb(struct lcd_device *ld)
{
}
#endif /* CONFIG_FB */

static ssize_t lcd_power_show(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        int rc;
        struct lcd_device *ld = to_lcd_device(dev);

        mutex_lock(&ld->ops_lock);
        if (ld->ops && ld->ops->get_power)
                rc = sprintf(buf, "%d\n", ld->ops->get_power(ld));
        else
                rc = -ENXIO;
        mutex_unlock(&ld->ops_lock);

        return rc;
}

static ssize_t lcd_power_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        int rc;
        struct lcd_device *ld = to_lcd_device(dev);
        unsigned long power;

        rc = kstrtoul(buf, 0, &power);
        if (rc)
                return rc;

        rc = -ENXIO;

        mutex_lock(&ld->ops_lock);
        if (ld->ops && ld->ops->set_power) {
                pr_debug("set power to %lu\n", power);
                ld->ops->set_power(ld, power);
                rc = count;
        }
        mutex_unlock(&ld->ops_lock);

        return rc;
}
static DEVICE_ATTR_RW(lcd_power);

static ssize_t contrast_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        int rc = -ENXIO;
        struct lcd_device *ld = to_lcd_device(dev);

        mutex_lock(&ld->ops_lock);
        if (ld->ops && ld->ops->get_contrast)
                rc = sprintf(buf, "%d\n", ld->ops->get_contrast(ld));
        mutex_unlock(&ld->ops_lock);

        return rc;
}

static ssize_t contrast_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        int rc;
        struct lcd_device *ld = to_lcd_device(dev);
        unsigned long contrast;

        rc = kstrtoul(buf, 0, &contrast);
        if (rc)
                return rc;

        rc = -ENXIO;

        mutex_lock(&ld->ops_lock);
        if (ld->ops && ld->ops->set_contrast) {
                pr_debug("set contrast to %lu\n", contrast);
                ld->ops->set_contrast(ld, contrast);
                rc = count;
        }
        mutex_unlock(&ld->ops_lock);

        return rc;
}
static DEVICE_ATTR_RW(contrast);

static ssize_t max_contrast_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct lcd_device *ld = to_lcd_device(dev);

        return sprintf(buf, "%d\n", ld->props.max_contrast);
}
static DEVICE_ATTR_RO(max_contrast);

static struct class *lcd_class;

static void lcd_device_release(struct device *dev)
{
        struct lcd_device *ld = to_lcd_device(dev);
        kfree(ld);
}

static struct attribute *lcd_device_attrs[] = {
        &dev_attr_lcd_power.attr,
        &dev_attr_contrast.attr,
        &dev_attr_max_contrast.attr,
        NULL,
};
ATTRIBUTE_GROUPS(lcd_device);

/**
 * lcd_device_register - register a new object of lcd_device class.
 * @name: the name of the new object(must be the same as the name of the
 *   respective framebuffer device).
 * @parent: pointer to the parent's struct device .
 * @devdata: an optional pointer to be stored in the device. The
 *   methods may retrieve it by using lcd_get_data(ld).
 * @ops: the lcd operations structure.
 *
 * Creates and registers a new lcd device. Returns either an ERR_PTR()
 * or a pointer to the newly allocated device.
 */
struct lcd_device *lcd_device_register(const char *name, struct device *parent,
                void *devdata, struct lcd_ops *ops)
{
        struct lcd_device *new_ld;
        int rc;

        pr_debug("lcd_device_register: name=%s\n", name);

        new_ld = kzalloc(sizeof(struct lcd_device), GFP_KERNEL);
        if (!new_ld)
                return ERR_PTR(-ENOMEM);

        mutex_init(&new_ld->ops_lock);
        mutex_init(&new_ld->update_lock);

        new_ld->dev.class = lcd_class;
        new_ld->dev.parent = parent;
        new_ld->dev.release = lcd_device_release;
        dev_set_name(&new_ld->dev, "%s", name);
        dev_set_drvdata(&new_ld->dev, devdata);

        new_ld->ops = ops;

        rc = device_register(&new_ld->dev);
        if (rc) {
                put_device(&new_ld->dev);
                return ERR_PTR(rc);
        }

        rc = lcd_register_fb(new_ld);
        if (rc) {
                device_unregister(&new_ld->dev);
                return ERR_PTR(rc);
        }

        return new_ld;
}
EXPORT_SYMBOL(lcd_device_register);

/**
 * lcd_device_unregister - unregisters a object of lcd_device class.
 * @ld: the lcd device object to be unregistered and freed.
 *
 * Unregisters a previously registered via lcd_device_register object.
 */
void lcd_device_unregister(struct lcd_device *ld)
{
        if (!ld)
                return;

        mutex_lock(&ld->ops_lock);
        ld->ops = NULL;
        mutex_unlock(&ld->ops_lock);
        lcd_unregister_fb(ld);

        device_unregister(&ld->dev);
}
EXPORT_SYMBOL(lcd_device_unregister);

static void devm_lcd_device_release(struct device *dev, void *res)
{
        struct lcd_device *lcd = *(struct lcd_device **)res;

        lcd_device_unregister(lcd);
}

static int devm_lcd_device_match(struct device *dev, void *res, void *data)
{
        struct lcd_device **r = res;

        return *r == data;
}

/**
 * devm_lcd_device_register - resource managed lcd_device_register()
 * @dev: the device to register
 * @name: the name of the device
 * @parent: a pointer to the parent device
 * @devdata: an optional pointer to be stored for private driver use
 * @ops: the lcd operations structure
 *
 * @return a struct lcd on success, or an ERR_PTR on error
 *
 * Managed lcd_device_register(). The lcd_device returned from this function
 * are automatically freed on driver detach. See lcd_device_register()
 * for more information.
 */
struct lcd_device *devm_lcd_device_register(struct device *dev,
                const char *name, struct device *parent,
                void *devdata, struct lcd_ops *ops)
{
        struct lcd_device **ptr, *lcd;

        ptr = devres_alloc(devm_lcd_device_release, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        lcd = lcd_device_register(name, parent, devdata, ops);
        if (!IS_ERR(lcd)) {
                *ptr = lcd;
                devres_add(dev, ptr);
        } else {
                devres_free(ptr);
        }

        return lcd;
}
EXPORT_SYMBOL(devm_lcd_device_register);

/**
 * devm_lcd_device_unregister - resource managed lcd_device_unregister()
 * @dev: the device to unregister
 * @ld: the lcd device to unregister
 *
 * Deallocated a lcd allocated with devm_lcd_device_register(). Normally
 * this function will not need to be called and the resource management
 * code will ensure that the resource is freed.
 */
void devm_lcd_device_unregister(struct device *dev, struct lcd_device *ld)
{
        int rc;

        rc = devres_release(dev, devm_lcd_device_release,
                                devm_lcd_device_match, ld);
        WARN_ON(rc);
}
EXPORT_SYMBOL(devm_lcd_device_unregister);


static void __exit lcd_class_exit(void)
{
        class_destroy(lcd_class);
}

static int __init lcd_class_init(void)
{
        lcd_class = class_create("lcd");
        if (IS_ERR(lcd_class)) {
                pr_warn("Unable to create backlight class; errno = %ld\n",
                        PTR_ERR(lcd_class));
                return PTR_ERR(lcd_class);
        }

        lcd_class->dev_groups = lcd_device_groups;
        return 0;
}

/*
 * if this is compiled into the kernel, we need to ensure that the
 * class is registered before users of the class try to register lcd's
 */
postcore_initcall(lcd_class_init);
module_exit(lcd_class_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jamey Hicks <jamey.hicks@hp.com>, Andrew Zabolotny <zap@homelink.ru>");
MODULE_DESCRIPTION("LCD Lowlevel Control Abstraction");





























































































































































































































































































































































































    6 











    6 































   51 


















    9 



    9 






































































    9 





















    9 














    9 




















    9 
    9 




















    9 

















































































































































































   73 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_SIGNAL_H
#define _LINUX_SCHED_SIGNAL_H

#include <linux/rculist.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/sched/jobctl.h>
#include <linux/sched/task.h>
#include <linux/cred.h>
#include <linux/refcount.h>
#include <linux/pid.h>
#include <linux/posix-timers.h>
#include <linux/mm_types.h>
#include <asm/ptrace.h>

/*
 * Types defining task->signal and task->sighand and APIs using them:
 */

struct sighand_struct {
        spinlock_t                siglock;
        refcount_t                count;
        wait_queue_head_t        signalfd_wqh;
        struct k_sigaction        action[_NSIG];
};

/*
 * Per-process accounting stats:
 */
struct pacct_struct {
        int                        ac_flag;
        long                        ac_exitcode;
        unsigned long                ac_mem;
        u64                        ac_utime, ac_stime;
        unsigned long                ac_minflt, ac_majflt;
};

struct cpu_itimer {
        u64 expires;
        u64 incr;
};

/*
 * This is the atomic variant of task_cputime, which can be used for
 * storing and updating task_cputime statistics without locking.
 */
struct task_cputime_atomic {
        atomic64_t utime;
        atomic64_t stime;
        atomic64_t sum_exec_runtime;
};

#define INIT_CPUTIME_ATOMIC \
        (struct task_cputime_atomic) {                                \
                .utime = ATOMIC64_INIT(0),                        \
                .stime = ATOMIC64_INIT(0),                        \
                .sum_exec_runtime = ATOMIC64_INIT(0),                \
        }
/**
 * struct thread_group_cputimer - thread group interval timer counts
 * @cputime_atomic:        atomic thread group interval timers.
 *
 * This structure contains the version of task_cputime, above, that is
 * used for thread group CPU timer calculations.
 */
struct thread_group_cputimer {
        struct task_cputime_atomic cputime_atomic;
};

struct multiprocess_signals {
        sigset_t signal;
        struct hlist_node node;
};

struct core_thread {
        struct task_struct *task;
        struct core_thread *next;
};

struct core_state {
        atomic_t nr_threads;
        struct core_thread dumper;
        struct completion startup;
};

/*
 * NOTE! "signal_struct" does not have its own
 * locking, because a shared signal_struct always
 * implies a shared sighand_struct, so locking
 * sighand_struct is always a proper superset of
 * the locking of signal_struct.
 */
struct signal_struct {
        refcount_t                sigcnt;
        atomic_t                live;
        int                        nr_threads;
        int                        quick_threads;
        struct list_head        thread_head;

        wait_queue_head_t        wait_chldexit;        /* for wait4() */

        /* current thread group signal load-balancing target: */
        struct task_struct        *curr_target;

        /* shared signal handling: */
        struct sigpending        shared_pending;

        /* For collecting multiprocess signals during fork */
        struct hlist_head        multiprocess;

        /* thread group exit support */
        int                        group_exit_code;
        /* notify group_exec_task when notify_count is less or equal to 0 */
        int                        notify_count;
        struct task_struct        *group_exec_task;

        /* thread group stop support, overloads group_exit_code too */
        int                        group_stop_count;
        unsigned int                flags; /* see SIGNAL_* flags below */

        struct core_state *core_state; /* coredumping support */

        /*
         * PR_SET_CHILD_SUBREAPER marks a process, like a service
         * manager, to re-parent orphan (double-forking) child processes
         * to this process instead of 'init'. The service manager is
         * able to receive SIGCHLD signals and is able to investigate
         * the process until it calls wait(). All children of this
         * process will inherit a flag if they should look for a
         * child_subreaper process at exit.
         */
        unsigned int                is_child_subreaper:1;
        unsigned int                has_child_subreaper:1;

#ifdef CONFIG_POSIX_TIMERS

        /* POSIX.1b Interval Timers */
        unsigned int                next_posix_timer_id;
        struct list_head        posix_timers;

        /* ITIMER_REAL timer for the process */
        struct hrtimer real_timer;
        ktime_t it_real_incr;

        /*
         * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
         * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
         * values are defined to 0 and 1 respectively
         */
        struct cpu_itimer it[2];

        /*
         * Thread group totals for process CPU timers.
         * See thread_group_cputimer(), et al, for details.
         */
        struct thread_group_cputimer cputimer;

#endif
        /* Empty if CONFIG_POSIX_TIMERS=n */
        struct posix_cputimers posix_cputimers;

        /* PID/PID hash table linkage. */
        struct pid *pids[PIDTYPE_MAX];

#ifdef CONFIG_NO_HZ_FULL
        atomic_t tick_dep_mask;
#endif

        struct pid *tty_old_pgrp;

        /* boolean value for session group leader */
        int leader;

        struct tty_struct *tty; /* NULL if no tty */

#ifdef CONFIG_SCHED_AUTOGROUP
        struct autogroup *autogroup;
#endif
        /*
         * Cumulative resource counters for dead threads in the group,
         * and for reaped dead child processes forked by this group.
         * Live threads maintain their own counters and add to these
         * in __exit_signal, except for the group leader.
         */
        seqlock_t stats_lock;
        u64 utime, stime, cutime, cstime;
        u64 gtime;
        u64 cgtime;
        struct prev_cputime prev_cputime;
        unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
        unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
        unsigned long inblock, oublock, cinblock, coublock;
        unsigned long maxrss, cmaxrss;
        struct task_io_accounting ioac;

        /*
         * Cumulative ns of schedule CPU time fo dead threads in the
         * group, not including a zombie group leader, (This only differs
         * from jiffies_to_ns(utime + stime) if sched_clock uses something
         * other than jiffies.)
         */
        unsigned long long sum_sched_runtime;

        /*
         * We don't bother to synchronize most readers of this at all,
         * because there is no reader checking a limit that actually needs
         * to get both rlim_cur and rlim_max atomically, and either one
         * alone is a single word that can safely be read normally.
         * getrlimit/setrlimit use task_lock(current->group_leader) to
         * protect this instead of the siglock, because they really
         * have no need to disable irqs.
         */
        struct rlimit rlim[RLIM_NLIMITS];

#ifdef CONFIG_BSD_PROCESS_ACCT
        struct pacct_struct pacct;        /* per-process accounting information */
#endif
#ifdef CONFIG_TASKSTATS
        struct taskstats *stats;
#endif
#ifdef CONFIG_AUDIT
        unsigned audit_tty;
        struct tty_audit_buf *tty_audit_buf;
#endif

        /*
         * Thread is the potential origin of an oom condition; kill first on
         * oom
         */
        bool oom_flag_origin;
        short oom_score_adj;                /* OOM kill score adjustment */
        short oom_score_adj_min;        /* OOM kill score adjustment min value.
                                         * Only settable by CAP_SYS_RESOURCE. */
        struct mm_struct *oom_mm;        /* recorded mm when the thread group got
                                         * killed by the oom killer */

        struct mutex cred_guard_mutex;        /* guard against foreign influences on
                                         * credential calculations
                                         * (notably. ptrace)
                                         * Deprecated do not use in new code.
                                         * Use exec_update_lock instead.
                                         */
        struct rw_semaphore exec_update_lock;        /* Held while task_struct is
                                                 * being updated during exec,
                                                 * and may have inconsistent
                                                 * permissions.
                                                 */
} __randomize_layout;

/*
 * Bits in flags field of signal_struct.
 */
#define SIGNAL_STOP_STOPPED        0x00000001 /* job control stop in effect */
#define SIGNAL_STOP_CONTINUED        0x00000002 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT        0x00000004 /* group exit in progress */
/*
 * Pending notifications to parent.
 */
#define SIGNAL_CLD_STOPPED        0x00000010
#define SIGNAL_CLD_CONTINUED        0x00000020
#define SIGNAL_CLD_MASK                (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)

#define SIGNAL_UNKILLABLE        0x00000040 /* for init: ignore fatal signals */

#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
                          SIGNAL_STOP_CONTINUED)

static inline void signal_set_stop_flags(struct signal_struct *sig,
                                         unsigned int flags)
{
        WARN_ON(sig->flags & SIGNAL_GROUP_EXIT);
        sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
}

extern void flush_signals(struct task_struct *);
extern void ignore_signals(struct task_struct *);
extern void flush_signal_handlers(struct task_struct *, int force_default);
extern int dequeue_signal(struct task_struct *task, sigset_t *mask,
                          kernel_siginfo_t *info, enum pid_type *type);

static inline int kernel_dequeue_signal(void)
{
        struct task_struct *task = current;
        kernel_siginfo_t __info;
        enum pid_type __type;
        int ret;

        spin_lock_irq(&task->sighand->siglock);
        ret = dequeue_signal(task, &task->blocked, &__info, &__type);
        spin_unlock_irq(&task->sighand->siglock);

        return ret;
}

static inline void kernel_signal_stop(void)
{
        spin_lock_irq(&current->sighand->siglock);
        if (current->jobctl & JOBCTL_STOP_DEQUEUED) {
                current->jobctl |= JOBCTL_STOPPED;
                set_special_state(TASK_STOPPED);
        }
        spin_unlock_irq(&current->sighand->siglock);

        schedule();
}

int force_sig_fault_to_task(int sig, int code, void __user *addr,
                            struct task_struct *t);
int force_sig_fault(int sig, int code, void __user *addr);
int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t);

int force_sig_mceerr(int code, void __user *, short);
int send_sig_mceerr(int code, void __user *, short, struct task_struct *);

int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
int force_sig_pkuerr(void __user *addr, u32 pkey);
int send_sig_perf(void __user *addr, u32 type, u64 sig_data);

int force_sig_ptrace_errno_trap(int errno, void __user *addr);
int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno);
int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno,
                        struct task_struct *t);
int force_sig_seccomp(int syscall, int reason, bool force_coredump);

extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
extern void force_sigsegv(int sig);
extern int force_sig_info(struct kernel_siginfo *);
extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp);
extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid);
extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *,
                                const struct cred *);
extern int kill_pgrp(struct pid *pid, int sig, int priv);
extern int kill_pid(struct pid *pid, int sig, int priv);
extern __must_check bool do_notify_parent(struct task_struct *, int);
extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
extern void force_sig(int);
extern void force_fatal_sig(int);
extern void force_exit_sig(int);
extern int send_sig(int, struct task_struct *, int);
extern int zap_other_threads(struct task_struct *p);
extern struct sigqueue *sigqueue_alloc(void);
extern void sigqueue_free(struct sigqueue *);
extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);

static inline void clear_notify_signal(void)
{
        clear_thread_flag(TIF_NOTIFY_SIGNAL);
        smp_mb__after_atomic();
}

/*
 * Returns 'true' if kick_process() is needed to force a transition from
 * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
 */
static inline bool __set_notify_signal(struct task_struct *task)
{
        return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
               !wake_up_state(task, TASK_INTERRUPTIBLE);
}

/*
 * Called to break out of interruptible wait loops, and enter the
 * exit_to_user_mode_loop().
 */
static inline void set_notify_signal(struct task_struct *task)
{
        if (__set_notify_signal(task))
                kick_process(task);
}

static inline int restart_syscall(void)
{
        set_tsk_thread_flag(current, TIF_SIGPENDING);
        return -ERESTARTNOINTR;
}

static inline int task_sigpending(struct task_struct *p)
{
        return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}

static inline int signal_pending(struct task_struct *p)
{
        /*
         * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
         * behavior in terms of ensuring that we break out of wait loops
         * so that notify signal callbacks can be processed.
         */
        if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
                return 1;
        return task_sigpending(p);
}

static inline int __fatal_signal_pending(struct task_struct *p)
{
        return unlikely(sigismember(&p->pending.signal, SIGKILL));
}

static inline int fatal_signal_pending(struct task_struct *p)
{
        return task_sigpending(p) && __fatal_signal_pending(p);
}

static inline int signal_pending_state(unsigned int state, struct task_struct *p)
{
        if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
                return 0;
        if (!signal_pending(p))
                return 0;

        return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}

/*
 * This should only be used in fault handlers to decide whether we
 * should stop the current fault routine to handle the signals
 * instead, especially with the case where we've got interrupted with
 * a VM_FAULT_RETRY.
 */
static inline bool fault_signal_pending(vm_fault_t fault_flags,
                                        struct pt_regs *regs)
{
        return unlikely((fault_flags & VM_FAULT_RETRY) &&
                        (fatal_signal_pending(current) ||
                         (user_mode(regs) && signal_pending(current))));
}

/*
 * Reevaluate whether the task has signals pending delivery.
 * Wake the task if so.
 * This is required every time the blocked sigset_t changes.
 * callers must hold sighand->siglock.
 */
extern void recalc_sigpending(void);
extern void calculate_sigpending(void);

extern void signal_wake_up_state(struct task_struct *t, unsigned int state);

static inline void signal_wake_up(struct task_struct *t, bool fatal)
{
        unsigned int state = 0;
        if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) {
                t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED);
                state = TASK_WAKEKILL | __TASK_TRACED;
        }
        signal_wake_up_state(t, state);
}
static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
{
        unsigned int state = 0;
        if (resume) {
                t->jobctl &= ~JOBCTL_TRACED;
                state = __TASK_TRACED;
        }
        signal_wake_up_state(t, state);
}

void task_join_group_stop(struct task_struct *task);

#ifdef TIF_RESTORE_SIGMASK
/*
 * Legacy restore_sigmask accessors.  These are inefficient on
 * SMP architectures because they require atomic operations.
 */

/**
 * set_restore_sigmask() - make sure saved_sigmask processing gets done
 *
 * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
 * will run before returning to user mode, to process the flag.  For
 * all callers, TIF_SIGPENDING is already set or it's no harm to set
 * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
 * arch code will notice on return to user mode, in case those bits
 * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
 * signal code always gets run when TIF_RESTORE_SIGMASK is set.
 */
static inline void set_restore_sigmask(void)
{
        set_thread_flag(TIF_RESTORE_SIGMASK);
}

static inline void clear_tsk_restore_sigmask(struct task_struct *task)
{
        clear_tsk_thread_flag(task, TIF_RESTORE_SIGMASK);
}

static inline void clear_restore_sigmask(void)
{
        clear_thread_flag(TIF_RESTORE_SIGMASK);
}
static inline bool test_tsk_restore_sigmask(struct task_struct *task)
{
        return test_tsk_thread_flag(task, TIF_RESTORE_SIGMASK);
}
static inline bool test_restore_sigmask(void)
{
        return test_thread_flag(TIF_RESTORE_SIGMASK);
}
static inline bool test_and_clear_restore_sigmask(void)
{
        return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
}

#else        /* TIF_RESTORE_SIGMASK */

/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
static inline void set_restore_sigmask(void)
{
        current->restore_sigmask = true;
}
static inline void clear_tsk_restore_sigmask(struct task_struct *task)
{
        task->restore_sigmask = false;
}
static inline void clear_restore_sigmask(void)
{
        current->restore_sigmask = false;
}
static inline bool test_restore_sigmask(void)
{
        return current->restore_sigmask;
}
static inline bool test_tsk_restore_sigmask(struct task_struct *task)
{
        return task->restore_sigmask;
}
static inline bool test_and_clear_restore_sigmask(void)
{
        if (!current->restore_sigmask)
                return false;
        current->restore_sigmask = false;
        return true;
}
#endif

static inline void restore_saved_sigmask(void)
{
        if (test_and_clear_restore_sigmask())
                __set_current_blocked(&current->saved_sigmask);
}

extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);

static inline void restore_saved_sigmask_unless(bool interrupted)
{
        if (interrupted)
                WARN_ON(!signal_pending(current));
        else
                restore_saved_sigmask();
}

static inline sigset_t *sigmask_to_save(void)
{
        sigset_t *res = &current->blocked;
        if (unlikely(test_restore_sigmask()))
                res = &current->saved_sigmask;
        return res;
}

static inline int kill_cad_pid(int sig, int priv)
{
        return kill_pid(cad_pid, sig, priv);
}

/* These can be the second arg to send_sig_info/send_group_sig_info.  */
#define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0)
#define SEND_SIG_PRIV        ((struct kernel_siginfo *) 1)

static inline int __on_sig_stack(unsigned long sp)
{
#ifdef CONFIG_STACK_GROWSUP
        return sp >= current->sas_ss_sp &&
                sp - current->sas_ss_sp < current->sas_ss_size;
#else
        return sp > current->sas_ss_sp &&
                sp - current->sas_ss_sp <= current->sas_ss_size;
#endif
}

/*
 * True if we are on the alternate signal stack.
 */
static inline int on_sig_stack(unsigned long sp)
{
        /*
         * If the signal stack is SS_AUTODISARM then, by construction, we
         * can't be on the signal stack unless user code deliberately set
         * SS_AUTODISARM when we were already on it.
         *
         * This improves reliability: if user state gets corrupted such that
         * the stack pointer points very close to the end of the signal stack,
         * then this check will enable the signal to be handled anyway.
         */
        if (current->sas_ss_flags & SS_AUTODISARM)
                return 0;

        return __on_sig_stack(sp);
}

static inline int sas_ss_flags(unsigned long sp)
{
        if (!current->sas_ss_size)
                return SS_DISABLE;

        return on_sig_stack(sp) ? SS_ONSTACK : 0;
}

static inline void sas_ss_reset(struct task_struct *p)
{
        p->sas_ss_sp = 0;
        p->sas_ss_size = 0;
        p->sas_ss_flags = SS_DISABLE;
}

static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
{
        if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
#ifdef CONFIG_STACK_GROWSUP
                return current->sas_ss_sp;
#else
                return current->sas_ss_sp + current->sas_ss_size;
#endif
        return sp;
}

extern void __cleanup_sighand(struct sighand_struct *);
extern void flush_itimer_signals(void);

#define tasklist_empty() \
        list_empty(&init_task.tasks)

#define next_task(p) \
        list_entry_rcu((p)->tasks.next, struct task_struct, tasks)

#define for_each_process(p) \
        for (p = &init_task ; (p = next_task(p)) != &init_task ; )

extern bool current_is_single_threaded(void);

/*
 * Without tasklist/siglock it is only rcu-safe if g can't exit/exec,
 * otherwise next_thread(t) will never reach g after list_del_rcu(g).
 */
#define while_each_thread(g, t) \
        while ((t = next_thread(t)) != g)

#define for_other_threads(p, t)        \
        for (t = p; (t = next_thread(t)) != p; )

#define __for_each_thread(signal, t)        \
        list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \
                lockdep_is_held(&tasklist_lock))

#define for_each_thread(p, t)                \
        __for_each_thread((p)->signal, t)

/* Careful: this is a double loop, 'break' won't work as expected. */
#define for_each_process_thread(p, t)        \
        for_each_process(p) for_each_thread(p, t)

typedef int (*proc_visitor)(struct task_struct *p, void *data);
void walk_process_tree(struct task_struct *top, proc_visitor, void *);

static inline
struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
{
        struct pid *pid;
        if (type == PIDTYPE_PID)
                pid = task_pid(task);
        else
                pid = task->signal->pids[type];
        return pid;
}

static inline struct pid *task_tgid(struct task_struct *task)
{
        return task->signal->pids[PIDTYPE_TGID];
}

/*
 * Without tasklist or RCU lock it is not safe to dereference
 * the result of task_pgrp/task_session even if task == current,
 * we can race with another thread doing sys_setsid/sys_setpgid.
 */
static inline struct pid *task_pgrp(struct task_struct *task)
{
        return task->signal->pids[PIDTYPE_PGID];
}

static inline struct pid *task_session(struct task_struct *task)
{
        return task->signal->pids[PIDTYPE_SID];
}

static inline int get_nr_threads(struct task_struct *task)
{
        return task->signal->nr_threads;
}

static inline bool thread_group_leader(struct task_struct *p)
{
        return p->exit_signal >= 0;
}

static inline
bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
{
        return p1->signal == p2->signal;
}

/*
 * returns NULL if p is the last thread in the thread group
 */
static inline struct task_struct *__next_thread(struct task_struct *p)
{
        return list_next_or_null_rcu(&p->signal->thread_head,
                                        &p->thread_node,
                                        struct task_struct,
                                        thread_node);
}

static inline struct task_struct *next_thread(struct task_struct *p)
{
        return __next_thread(p) ?: p->group_leader;
}

static inline int thread_group_empty(struct task_struct *p)
{
        return thread_group_leader(p) &&
               list_is_last(&p->thread_node, &p->signal->thread_head);
}

#define delay_group_leader(p) \
                (thread_group_leader(p) && !thread_group_empty(p))

extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
                                                        unsigned long *flags);

static inline struct sighand_struct *lock_task_sighand(struct task_struct *task,
                                                       unsigned long *flags)
{
        struct sighand_struct *ret;

        ret = __lock_task_sighand(task, flags);
        (void)__cond_lock(&task->sighand->siglock, ret);
        return ret;
}

static inline void unlock_task_sighand(struct task_struct *task,
                                                unsigned long *flags)
{
        spin_unlock_irqrestore(&task->sighand->siglock, *flags);
}

#ifdef CONFIG_LOCKDEP
extern void lockdep_assert_task_sighand_held(struct task_struct *task);
#else
static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { }
#endif

static inline unsigned long task_rlimit(const struct task_struct *task,
                unsigned int limit)
{
        return READ_ONCE(task->signal->rlim[limit].rlim_cur);
}

static inline unsigned long task_rlimit_max(const struct task_struct *task,
                unsigned int limit)
{
        return READ_ONCE(task->signal->rlim[limit].rlim_max);
}

static inline unsigned long rlimit(unsigned int limit)
{
        return task_rlimit(current, limit);
}

static inline unsigned long rlimit_max(unsigned int limit)
{
        return task_rlimit_max(current, limit);
}

#endif /* _LINUX_SCHED_SIGNAL_H */





















































    4 





























    4 


































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM irq

#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_IRQ_H

#include <linux/tracepoint.h>

struct irqaction;
struct softirq_action;

#define SOFTIRQ_NAME_LIST                                \
                         softirq_name(HI)                \
                         softirq_name(TIMER)                \
                         softirq_name(NET_TX)                \
                         softirq_name(NET_RX)                \
                         softirq_name(BLOCK)                \
                         softirq_name(IRQ_POLL)                \
                         softirq_name(TASKLET)                \
                         softirq_name(SCHED)                \
                         softirq_name(HRTIMER)                \
                         softirq_name_end(RCU)

#undef softirq_name
#undef softirq_name_end

#define softirq_name(sirq) TRACE_DEFINE_ENUM(sirq##_SOFTIRQ);
#define softirq_name_end(sirq)  TRACE_DEFINE_ENUM(sirq##_SOFTIRQ);

SOFTIRQ_NAME_LIST

#undef softirq_name
#undef softirq_name_end

#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq },
#define softirq_name_end(sirq) { sirq##_SOFTIRQ, #sirq }

#define show_softirq_name(val)                                \
        __print_symbolic(val, SOFTIRQ_NAME_LIST)

/**
 * irq_handler_entry - called immediately before the irq action handler
 * @irq: irq number
 * @action: pointer to struct irqaction
 *
 * The struct irqaction pointed to by @action contains various
 * information about the handler, including the device name,
 * @action->name, and the device id, @action->dev_id. When used in
 * conjunction with the irq_handler_exit tracepoint, we can figure
 * out irq handler latencies.
 */
TRACE_EVENT(irq_handler_entry,

        TP_PROTO(int irq, struct irqaction *action),

        TP_ARGS(irq, action),

        TP_STRUCT__entry(
                __field(        int,        irq                )
                __string(        name,        action->name        )
        ),

        TP_fast_assign(
                __entry->irq = irq;
                __assign_str(name, action->name);
        ),

        TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
);

/**
 * irq_handler_exit - called immediately after the irq action handler returns
 * @irq: irq number
 * @action: pointer to struct irqaction
 * @ret: return value
 *
 * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
 * @action->handler successfully handled this irq. Otherwise, the irq might be
 * a shared irq line, or the irq was not handled successfully. Can be used in
 * conjunction with the irq_handler_entry to understand irq handler latencies.
 */
TRACE_EVENT(irq_handler_exit,

        TP_PROTO(int irq, struct irqaction *action, int ret),

        TP_ARGS(irq, action, ret),

        TP_STRUCT__entry(
                __field(        int,        irq        )
                __field(        int,        ret        )
        ),

        TP_fast_assign(
                __entry->irq        = irq;
                __entry->ret        = ret;
        ),

        TP_printk("irq=%d ret=%s",
                  __entry->irq, __entry->ret ? "handled" : "unhandled")
);

DECLARE_EVENT_CLASS(softirq,

        TP_PROTO(unsigned int vec_nr),

        TP_ARGS(vec_nr),

        TP_STRUCT__entry(
                __field(        unsigned int,        vec        )
        ),

        TP_fast_assign(
                __entry->vec = vec_nr;
        ),

        TP_printk("vec=%u [action=%s]", __entry->vec,
                  show_softirq_name(__entry->vec))
);

/**
 * softirq_entry - called immediately before the softirq handler
 * @vec_nr:  softirq vector number
 *
 * When used in combination with the softirq_exit tracepoint
 * we can determine the softirq handler routine.
 */
DEFINE_EVENT(softirq, softirq_entry,

        TP_PROTO(unsigned int vec_nr),

        TP_ARGS(vec_nr)
);

/**
 * softirq_exit - called immediately after the softirq handler returns
 * @vec_nr:  softirq vector number
 *
 * When used in combination with the softirq_entry tracepoint
 * we can determine the softirq handler routine.
 */
DEFINE_EVENT(softirq, softirq_exit,

        TP_PROTO(unsigned int vec_nr),

        TP_ARGS(vec_nr)
);

/**
 * softirq_raise - called immediately when a softirq is raised
 * @vec_nr:  softirq vector number
 *
 * When used in combination with the softirq_entry tracepoint
 * we can determine the softirq raise to run latency.
 */
DEFINE_EVENT(softirq, softirq_raise,

        TP_PROTO(unsigned int vec_nr),

        TP_ARGS(vec_nr)
);

DECLARE_EVENT_CLASS(tasklet,

        TP_PROTO(struct tasklet_struct *t, void *func),

        TP_ARGS(t, func),

        TP_STRUCT__entry(
                __field(        void *,        tasklet)
                __field(        void *,        func)
        ),

        TP_fast_assign(
                __entry->tasklet = t;
                __entry->func = func;
        ),

        TP_printk("tasklet=%ps function=%ps", __entry->tasklet, __entry->func)
);

/**
 * tasklet_entry - called immediately before the tasklet is run
 * @t: tasklet pointer
 * @func: tasklet callback or function being run
 *
 * Used to find individual tasklet execution time
 */
DEFINE_EVENT(tasklet, tasklet_entry,

        TP_PROTO(struct tasklet_struct *t, void *func),

        TP_ARGS(t, func)
);

/**
 * tasklet_exit - called immediately after the tasklet is run
 * @t: tasklet pointer
 * @func: tasklet callback or function being run
 *
 * Used to find individual tasklet execution time
 */
DEFINE_EVENT(tasklet, tasklet_exit,

        TP_PROTO(struct tasklet_struct *t, void *func),

        TP_ARGS(t, func)
);

#endif /*  _TRACE_IRQ_H */

/* This part must be outside protection */
#include <trace/define_trace.h>












































































































































































































































































































































































































































































































































































































    3 





    3 






















    3 

    3 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 








    3 










    3 

    3 










    3 

    3 
    3 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
// SPDX-License-Identifier: GPL-2.0
/*
 * BlueZ - Bluetooth protocol stack for Linux
 *
 * Copyright (C) 2021 Intel Corporation
 * Copyright 2023 NXP
 */

#include <linux/property.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>

#include "hci_request.h"
#include "hci_codec.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "eir.h"
#include "msft.h"
#include "aosp.h"
#include "leds.h"

static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
                                  struct sk_buff *skb)
{
        bt_dev_dbg(hdev, "result 0x%2.2x", result);

        if (hdev->req_status != HCI_REQ_PEND)
                return;

        hdev->req_result = result;
        hdev->req_status = HCI_REQ_DONE;

        /* Free the request command so it is not used as response */
        kfree_skb(hdev->req_skb);
        hdev->req_skb = NULL;

        if (skb) {
                struct sock *sk = hci_skb_sk(skb);

                /* Drop sk reference if set */
                if (sk)
                        sock_put(sk);

                hdev->req_rsp = skb_get(skb);
        }

        wake_up_interruptible(&hdev->req_wait_q);
}

static struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode,
                                          u32 plen, const void *param,
                                          struct sock *sk)
{
        int len = HCI_COMMAND_HDR_SIZE + plen;
        struct hci_command_hdr *hdr;
        struct sk_buff *skb;

        skb = bt_skb_alloc(len, GFP_ATOMIC);
        if (!skb)
                return NULL;

        hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE);
        hdr->opcode = cpu_to_le16(opcode);
        hdr->plen   = plen;

        if (plen)
                skb_put_data(skb, param, plen);

        bt_dev_dbg(hdev, "skb len %d", skb->len);

        hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
        hci_skb_opcode(skb) = opcode;

        /* Grab a reference if command needs to be associated with a sock (e.g.
         * likely mgmt socket that initiated the command).
         */
        if (sk) {
                hci_skb_sk(skb) = sk;
                sock_hold(sk);
        }

        return skb;
}

static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen,
                             const void *param, u8 event, struct sock *sk)
{
        struct hci_dev *hdev = req->hdev;
        struct sk_buff *skb;

        bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);

        /* If an error occurred during request building, there is no point in
         * queueing the HCI command. We can simply return.
         */
        if (req->err)
                return;

        skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, sk);
        if (!skb) {
                bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
                           opcode);
                req->err = -ENOMEM;
                return;
        }

        if (skb_queue_empty(&req->cmd_q))
                bt_cb(skb)->hci.req_flags |= HCI_REQ_START;

        hci_skb_event(skb) = event;

        skb_queue_tail(&req->cmd_q, skb);
}

static int hci_cmd_sync_run(struct hci_request *req)
{
        struct hci_dev *hdev = req->hdev;
        struct sk_buff *skb;
        unsigned long flags;

        bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q));

        /* If an error occurred during request building, remove all HCI
         * commands queued on the HCI request queue.
         */
        if (req->err) {
                skb_queue_purge(&req->cmd_q);
                return req->err;
        }

        /* Do not allow empty requests */
        if (skb_queue_empty(&req->cmd_q))
                return -ENODATA;

        skb = skb_peek_tail(&req->cmd_q);
        bt_cb(skb)->hci.req_complete_skb = hci_cmd_sync_complete;
        bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB;

        spin_lock_irqsave(&hdev->cmd_q.lock, flags);
        skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
        spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);

        queue_work(hdev->workqueue, &hdev->cmd_work);

        return 0;
}

/* This function requires the caller holds hdev->req_lock. */
struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
                                  const void *param, u8 event, u32 timeout,
                                  struct sock *sk)
{
        struct hci_request req;
        struct sk_buff *skb;
        int err = 0;

        bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode);

        hci_req_init(&req, hdev);

        hci_cmd_sync_add(&req, opcode, plen, param, event, sk);

        hdev->req_status = HCI_REQ_PEND;

        err = hci_cmd_sync_run(&req);
        if (err < 0)
                return ERR_PTR(err);

        err = wait_event_interruptible_timeout(hdev->req_wait_q,
                                               hdev->req_status != HCI_REQ_PEND,
                                               timeout);

        if (err == -ERESTARTSYS)
                return ERR_PTR(-EINTR);

        switch (hdev->req_status) {
        case HCI_REQ_DONE:
                err = -bt_to_errno(hdev->req_result);
                break;

        case HCI_REQ_CANCELED:
                err = -hdev->req_result;
                break;

        default:
                err = -ETIMEDOUT;
                break;
        }

        hdev->req_status = 0;
        hdev->req_result = 0;
        skb = hdev->req_rsp;
        hdev->req_rsp = NULL;

        bt_dev_dbg(hdev, "end: err %d", err);

        if (err < 0) {
                kfree_skb(skb);
                return ERR_PTR(err);
        }

        return skb;
}
EXPORT_SYMBOL(__hci_cmd_sync_sk);

/* This function requires the caller holds hdev->req_lock. */
struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
                               const void *param, u32 timeout)
{
        return __hci_cmd_sync_sk(hdev, opcode, plen, param, 0, timeout, NULL);
}
EXPORT_SYMBOL(__hci_cmd_sync);

/* Send HCI command and wait for command complete event */
struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
                             const void *param, u32 timeout)
{
        struct sk_buff *skb;

        if (!test_bit(HCI_UP, &hdev->flags))
                return ERR_PTR(-ENETDOWN);

        bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);

        hci_req_sync_lock(hdev);
        skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout);
        hci_req_sync_unlock(hdev);

        return skb;
}
EXPORT_SYMBOL(hci_cmd_sync);

/* This function requires the caller holds hdev->req_lock. */
struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
                                  const void *param, u8 event, u32 timeout)
{
        return __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout,
                                 NULL);
}
EXPORT_SYMBOL(__hci_cmd_sync_ev);

/* This function requires the caller holds hdev->req_lock. */
int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
                             const void *param, u8 event, u32 timeout,
                             struct sock *sk)
{
        struct sk_buff *skb;
        u8 status;

        skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk);
        if (IS_ERR(skb)) {
                if (!event)
                        bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", opcode,
                                   PTR_ERR(skb));
                return PTR_ERR(skb);
        }

        /* If command return a status event skb will be set to NULL as there are
         * no parameters, in case of failure IS_ERR(skb) would have be set to
         * the actual error would be found with PTR_ERR(skb).
         */
        if (!skb)
                return 0;

        status = skb->data[0];

        kfree_skb(skb);

        return status;
}
EXPORT_SYMBOL(__hci_cmd_sync_status_sk);

int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
                          const void *param, u32 timeout)
{
        return __hci_cmd_sync_status_sk(hdev, opcode, plen, param, 0, timeout,
                                        NULL);
}
EXPORT_SYMBOL(__hci_cmd_sync_status);

static void hci_cmd_sync_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);

        bt_dev_dbg(hdev, "");

        /* Dequeue all entries and run them */
        while (1) {
                struct hci_cmd_sync_work_entry *entry;

                mutex_lock(&hdev->cmd_sync_work_lock);
                entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
                                                 struct hci_cmd_sync_work_entry,
                                                 list);
                if (entry)
                        list_del(&entry->list);
                mutex_unlock(&hdev->cmd_sync_work_lock);

                if (!entry)
                        break;

                bt_dev_dbg(hdev, "entry %p", entry);

                if (entry->func) {
                        int err;

                        hci_req_sync_lock(hdev);
                        err = entry->func(hdev, entry->data);
                        if (entry->destroy)
                                entry->destroy(hdev, entry->data, err);
                        hci_req_sync_unlock(hdev);
                }

                kfree(entry);
        }
}

static void hci_cmd_sync_cancel_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_cancel_work);

        cancel_delayed_work_sync(&hdev->cmd_timer);
        cancel_delayed_work_sync(&hdev->ncmd_timer);
        atomic_set(&hdev->cmd_cnt, 1);

        wake_up_interruptible(&hdev->req_wait_q);
}

static int hci_scan_disable_sync(struct hci_dev *hdev);
static int scan_disable_sync(struct hci_dev *hdev, void *data)
{
        return hci_scan_disable_sync(hdev);
}

static int hci_inquiry_sync(struct hci_dev *hdev, u8 length);
static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data)
{
        return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN);
}

static void le_scan_disable(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            le_scan_disable.work);
        int status;

        bt_dev_dbg(hdev, "");
        hci_dev_lock(hdev);

        if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
                goto _return;

        status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL);
        if (status) {
                bt_dev_err(hdev, "failed to disable LE scan: %d", status);
                goto _return;
        }

        hdev->discovery.scan_start = 0;

        /* If we were running LE only scan, change discovery state. If
         * we were running both LE and BR/EDR inquiry simultaneously,
         * and BR/EDR inquiry is already finished, stop discovery,
         * otherwise BR/EDR inquiry will stop discovery when finished.
         * If we will resolve remote device name, do not change
         * discovery state.
         */

        if (hdev->discovery.type == DISCOV_TYPE_LE)
                goto discov_stopped;

        if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
                goto _return;

        if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
                if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
                    hdev->discovery.state != DISCOVERY_RESOLVING)
                        goto discov_stopped;

                goto _return;
        }

        status = hci_cmd_sync_queue(hdev, interleaved_inquiry_sync, NULL, NULL);
        if (status) {
                bt_dev_err(hdev, "inquiry failed: status %d", status);
                goto discov_stopped;
        }

        goto _return;

discov_stopped:
        hci_discovery_set_state(hdev, DISCOVERY_STOPPED);

_return:
        hci_dev_unlock(hdev);
}

static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
                                       u8 filter_dup);

static int reenable_adv_sync(struct hci_dev *hdev, void *data)
{
        bt_dev_dbg(hdev, "");

        if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
            list_empty(&hdev->adv_instances))
                return 0;

        if (hdev->cur_adv_instance) {
                return hci_schedule_adv_instance_sync(hdev,
                                                      hdev->cur_adv_instance,
                                                      true);
        } else {
                if (ext_adv_capable(hdev)) {
                        hci_start_ext_adv_sync(hdev, 0x00);
                } else {
                        hci_update_adv_data_sync(hdev, 0x00);
                        hci_update_scan_rsp_data_sync(hdev, 0x00);
                        hci_enable_advertising_sync(hdev);
                }
        }

        return 0;
}

static void reenable_adv(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            reenable_adv_work);
        int status;

        bt_dev_dbg(hdev, "");

        hci_dev_lock(hdev);

        status = hci_cmd_sync_queue(hdev, reenable_adv_sync, NULL, NULL);
        if (status)
                bt_dev_err(hdev, "failed to reenable ADV: %d", status);

        hci_dev_unlock(hdev);
}

static void cancel_adv_timeout(struct hci_dev *hdev)
{
        if (hdev->adv_instance_timeout) {
                hdev->adv_instance_timeout = 0;
                cancel_delayed_work(&hdev->adv_instance_expire);
        }
}

/* For a single instance:
 * - force == true: The instance will be removed even when its remaining
 *   lifetime is not zero.
 * - force == false: the instance will be deactivated but kept stored unless
 *   the remaining lifetime is zero.
 *
 * For instance == 0x00:
 * - force == true: All instances will be removed regardless of their timeout
 *   setting.
 * - force == false: Only instances that have a timeout will be removed.
 */
int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk,
                                u8 instance, bool force)
{
        struct adv_info *adv_instance, *n, *next_instance = NULL;
        int err;
        u8 rem_inst;

        /* Cancel any timeout concerning the removed instance(s). */
        if (!instance || hdev->cur_adv_instance == instance)
                cancel_adv_timeout(hdev);

        /* Get the next instance to advertise BEFORE we remove
         * the current one. This can be the same instance again
         * if there is only one instance.
         */
        if (instance && hdev->cur_adv_instance == instance)
                next_instance = hci_get_next_instance(hdev, instance);

        if (instance == 0x00) {
                list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
                                         list) {
                        if (!(force || adv_instance->timeout))
                                continue;

                        rem_inst = adv_instance->instance;
                        err = hci_remove_adv_instance(hdev, rem_inst);
                        if (!err)
                                mgmt_advertising_removed(sk, hdev, rem_inst);
                }
        } else {
                adv_instance = hci_find_adv_instance(hdev, instance);

                if (force || (adv_instance && adv_instance->timeout &&
                              !adv_instance->remaining_time)) {
                        /* Don't advertise a removed instance. */
                        if (next_instance &&
                            next_instance->instance == instance)
                                next_instance = NULL;

                        err = hci_remove_adv_instance(hdev, instance);
                        if (!err)
                                mgmt_advertising_removed(sk, hdev, instance);
                }
        }

        if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING))
                return 0;

        if (next_instance && !ext_adv_capable(hdev))
                return hci_schedule_adv_instance_sync(hdev,
                                                      next_instance->instance,
                                                      false);

        return 0;
}

static int adv_timeout_expire_sync(struct hci_dev *hdev, void *data)
{
        u8 instance = *(u8 *)data;

        kfree(data);

        hci_clear_adv_instance_sync(hdev, NULL, instance, false);

        if (list_empty(&hdev->adv_instances))
                return hci_disable_advertising_sync(hdev);

        return 0;
}

static void adv_timeout_expire(struct work_struct *work)
{
        u8 *inst_ptr;
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            adv_instance_expire.work);

        bt_dev_dbg(hdev, "");

        hci_dev_lock(hdev);

        hdev->adv_instance_timeout = 0;

        if (hdev->cur_adv_instance == 0x00)
                goto unlock;

        inst_ptr = kmalloc(1, GFP_KERNEL);
        if (!inst_ptr)
                goto unlock;

        *inst_ptr = hdev->cur_adv_instance;
        hci_cmd_sync_queue(hdev, adv_timeout_expire_sync, inst_ptr, NULL);

unlock:
        hci_dev_unlock(hdev);
}

void hci_cmd_sync_init(struct hci_dev *hdev)
{
        INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
        INIT_LIST_HEAD(&hdev->cmd_sync_work_list);
        mutex_init(&hdev->cmd_sync_work_lock);
        mutex_init(&hdev->unregister_lock);

        INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
        INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
        INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
        INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
}

static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
                                       struct hci_cmd_sync_work_entry *entry,
                                       int err)
{
        if (entry->destroy)
                entry->destroy(hdev, entry->data, err);

        list_del(&entry->list);
        kfree(entry);
}

void hci_cmd_sync_clear(struct hci_dev *hdev)
{
        struct hci_cmd_sync_work_entry *entry, *tmp;

        cancel_work_sync(&hdev->cmd_sync_work);
        cancel_work_sync(&hdev->reenable_adv_work);

        mutex_lock(&hdev->cmd_sync_work_lock);
        list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list)
                _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
        mutex_unlock(&hdev->cmd_sync_work_lock);
}

void hci_cmd_sync_cancel(struct hci_dev *hdev, int err)
{
        bt_dev_dbg(hdev, "err 0x%2.2x", err);

        if (hdev->req_status == HCI_REQ_PEND) {
                hdev->req_result = err;
                hdev->req_status = HCI_REQ_CANCELED;

                queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work);
        }
}
EXPORT_SYMBOL(hci_cmd_sync_cancel);

/* Cancel ongoing command request synchronously:
 *
 * - Set result and mark status to HCI_REQ_CANCELED
 * - Wakeup command sync thread
 */
void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
{
        bt_dev_dbg(hdev, "err 0x%2.2x", err);

        if (hdev->req_status == HCI_REQ_PEND) {
                /* req_result is __u32 so error must be positive to be properly
                 * propagated.
                 */
                hdev->req_result = err < 0 ? -err : err;
                hdev->req_status = HCI_REQ_CANCELED;

                wake_up_interruptible(&hdev->req_wait_q);
        }
}
EXPORT_SYMBOL(hci_cmd_sync_cancel_sync);

/* Submit HCI command to be run in as cmd_sync_work:
 *
 * - hdev must _not_ be unregistered
 */
int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                        void *data, hci_cmd_sync_work_destroy_t destroy)
{
        struct hci_cmd_sync_work_entry *entry;
        int err = 0;

        mutex_lock(&hdev->unregister_lock);
        if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
                err = -ENODEV;
                goto unlock;
        }

        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry) {
                err = -ENOMEM;
                goto unlock;
        }
        entry->func = func;
        entry->data = data;
        entry->destroy = destroy;

        mutex_lock(&hdev->cmd_sync_work_lock);
        list_add_tail(&entry->list, &hdev->cmd_sync_work_list);
        mutex_unlock(&hdev->cmd_sync_work_lock);

        queue_work(hdev->req_workqueue, &hdev->cmd_sync_work);

unlock:
        mutex_unlock(&hdev->unregister_lock);
        return err;
}
EXPORT_SYMBOL(hci_cmd_sync_submit);

/* Queue HCI command:
 *
 * - hdev must be running
 */
int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                       void *data, hci_cmd_sync_work_destroy_t destroy)
{
        /* Only queue command if hdev is running which means it had been opened
         * and is either on init phase or is already up.
         */
        if (!test_bit(HCI_RUNNING, &hdev->flags))
                return -ENETDOWN;

        return hci_cmd_sync_submit(hdev, func, data, destroy);
}
EXPORT_SYMBOL(hci_cmd_sync_queue);

static struct hci_cmd_sync_work_entry *
_hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                           void *data, hci_cmd_sync_work_destroy_t destroy)
{
        struct hci_cmd_sync_work_entry *entry, *tmp;

        list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) {
                if (func && entry->func != func)
                        continue;

                if (data && entry->data != data)
                        continue;

                if (destroy && entry->destroy != destroy)
                        continue;

                return entry;
        }

        return NULL;
}

/* Queue HCI command entry once:
 *
 * - Lookup if an entry already exist and only if it doesn't creates a new entry
 *   and queue it.
 */
int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                            void *data, hci_cmd_sync_work_destroy_t destroy)
{
        if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy))
                return 0;

        return hci_cmd_sync_queue(hdev, func, data, destroy);
}
EXPORT_SYMBOL(hci_cmd_sync_queue_once);

/* Lookup HCI command entry:
 *
 * - Return first entry that matches by function callback or data or
 *   destroy callback.
 */
struct hci_cmd_sync_work_entry *
hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                          void *data, hci_cmd_sync_work_destroy_t destroy)
{
        struct hci_cmd_sync_work_entry *entry;

        mutex_lock(&hdev->cmd_sync_work_lock);
        entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy);
        mutex_unlock(&hdev->cmd_sync_work_lock);

        return entry;
}
EXPORT_SYMBOL(hci_cmd_sync_lookup_entry);

/* Cancel HCI command entry */
void hci_cmd_sync_cancel_entry(struct hci_dev *hdev,
                               struct hci_cmd_sync_work_entry *entry)
{
        mutex_lock(&hdev->cmd_sync_work_lock);
        _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
        mutex_unlock(&hdev->cmd_sync_work_lock);
}
EXPORT_SYMBOL(hci_cmd_sync_cancel_entry);

/* Dequeue one HCI command entry:
 *
 * - Lookup and cancel first entry that matches.
 */
bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev,
                               hci_cmd_sync_work_func_t func,
                               void *data, hci_cmd_sync_work_destroy_t destroy)
{
        struct hci_cmd_sync_work_entry *entry;

        entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy);
        if (!entry)
                return false;

        hci_cmd_sync_cancel_entry(hdev, entry);

        return true;
}
EXPORT_SYMBOL(hci_cmd_sync_dequeue_once);

/* Dequeue HCI command entry:
 *
 * - Lookup and cancel any entry that matches by function callback or data or
 *   destroy callback.
 */
bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
                          void *data, hci_cmd_sync_work_destroy_t destroy)
{
        struct hci_cmd_sync_work_entry *entry;
        bool ret = false;

        mutex_lock(&hdev->cmd_sync_work_lock);
        while ((entry = _hci_cmd_sync_lookup_entry(hdev, func, data,
                                                   destroy))) {
                _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED);
                ret = true;
        }
        mutex_unlock(&hdev->cmd_sync_work_lock);

        return ret;
}
EXPORT_SYMBOL(hci_cmd_sync_dequeue);

int hci_update_eir_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_eir cp;

        bt_dev_dbg(hdev, "");

        if (!hdev_is_powered(hdev))
                return 0;

        if (!lmp_ext_inq_capable(hdev))
                return 0;

        if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
                return 0;

        if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
                return 0;

        memset(&cp, 0, sizeof(cp));

        eir_create(hdev, cp.data);

        if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
                return 0;

        memcpy(hdev->eir, cp.data, sizeof(cp.data));

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp,
                                     HCI_CMD_TIMEOUT);
}

static u8 get_service_classes(struct hci_dev *hdev)
{
        struct bt_uuid *uuid;
        u8 val = 0;

        list_for_each_entry(uuid, &hdev->uuids, list)
                val |= uuid->svc_hint;

        return val;
}

int hci_update_class_sync(struct hci_dev *hdev)
{
        u8 cod[3];

        bt_dev_dbg(hdev, "");

        if (!hdev_is_powered(hdev))
                return 0;

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                return 0;

        if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
                return 0;

        cod[0] = hdev->minor_class;
        cod[1] = hdev->major_class;
        cod[2] = get_service_classes(hdev);

        if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
                cod[1] |= 0x20;

        if (memcmp(cod, hdev->dev_class, 3) == 0)
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CLASS_OF_DEV,
                                     sizeof(cod), cod, HCI_CMD_TIMEOUT);
}

static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
{
        /* If there is no connection we are OK to advertise. */
        if (hci_conn_num(hdev, LE_LINK) == 0)
                return true;

        /* Check le_states if there is any connection in peripheral role. */
        if (hdev->conn_hash.le_num_peripheral > 0) {
                /* Peripheral connection state and non connectable mode
                 * bit 20.
                 */
                if (!connectable && !(hdev->le_states[2] & 0x10))
                        return false;

                /* Peripheral connection state and connectable mode bit 38
                 * and scannable bit 21.
                 */
                if (connectable && (!(hdev->le_states[4] & 0x40) ||
                                    !(hdev->le_states[2] & 0x20)))
                        return false;
        }

        /* Check le_states if there is any connection in central role. */
        if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) {
                /* Central connection state and non connectable mode bit 18. */
                if (!connectable && !(hdev->le_states[2] & 0x02))
                        return false;

                /* Central connection state and connectable mode bit 35 and
                 * scannable 19.
                 */
                if (connectable && (!(hdev->le_states[4] & 0x08) ||
                                    !(hdev->le_states[2] & 0x08)))
                        return false;
        }

        return true;
}

static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
{
        /* If privacy is not enabled don't use RPA */
        if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
                return false;

        /* If basic privacy mode is enabled use RPA */
        if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
                return true;

        /* If limited privacy mode is enabled don't use RPA if we're
         * both discoverable and bondable.
         */
        if ((flags & MGMT_ADV_FLAG_DISCOV) &&
            hci_dev_test_flag(hdev, HCI_BONDABLE))
                return false;

        /* We're neither bondable nor discoverable in the limited
         * privacy mode, therefore use RPA.
         */
        return true;
}

static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa)
{
        /* If we're advertising or initiating an LE connection we can't
         * go ahead and change the random address at this time. This is
         * because the eventual initiator address used for the
         * subsequently created connection will be undefined (some
         * controllers use the new address and others the one we had
         * when the operation started).
         *
         * In this kind of scenario skip the update and let the random
         * address be updated at the next cycle.
         */
        if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
            hci_lookup_le_connect(hdev)) {
                bt_dev_dbg(hdev, "Deferring random address update");
                hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
                return 0;
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RANDOM_ADDR,
                                     6, rpa, HCI_CMD_TIMEOUT);
}

int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy,
                                   bool rpa, u8 *own_addr_type)
{
        int err;

        /* If privacy is enabled use a resolvable private address. If
         * current RPA has expired or there is something else than
         * the current RPA in use, then generate a new one.
         */
        if (rpa) {
                /* If Controller supports LL Privacy use own address type is
                 * 0x03
                 */
                if (use_ll_privacy(hdev))
                        *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
                else
                        *own_addr_type = ADDR_LE_DEV_RANDOM;

                /* Check if RPA is valid */
                if (rpa_valid(hdev))
                        return 0;

                err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
                if (err < 0) {
                        bt_dev_err(hdev, "failed to generate new RPA");
                        return err;
                }

                err = hci_set_random_addr_sync(hdev, &hdev->rpa);
                if (err)
                        return err;

                return 0;
        }

        /* In case of required privacy without resolvable private address,
         * use an non-resolvable private address. This is useful for active
         * scanning and non-connectable advertising.
         */
        if (require_privacy) {
                bdaddr_t nrpa;

                while (true) {
                        /* The non-resolvable private address is generated
                         * from random six bytes with the two most significant
                         * bits cleared.
                         */
                        get_random_bytes(&nrpa, 6);
                        nrpa.b[5] &= 0x3f;

                        /* The non-resolvable private address shall not be
                         * equal to the public address.
                         */
                        if (bacmp(&hdev->bdaddr, &nrpa))
                                break;
                }

                *own_addr_type = ADDR_LE_DEV_RANDOM;

                return hci_set_random_addr_sync(hdev, &nrpa);
        }

        /* If forcing static address is in use or there is no public
         * address use the static address as random address (but skip
         * the HCI command if the current random address is already the
         * static one.
         *
         * In case BR/EDR has been disabled on a dual-mode controller
         * and a static address has been configured, then use that
         * address instead of the public BR/EDR address.
         */
        if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
            !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
            (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
             bacmp(&hdev->static_addr, BDADDR_ANY))) {
                *own_addr_type = ADDR_LE_DEV_RANDOM;
                if (bacmp(&hdev->static_addr, &hdev->random_addr))
                        return hci_set_random_addr_sync(hdev,
                                                        &hdev->static_addr);
                return 0;
        }

        /* Neither privacy nor static address is being used so use a
         * public address.
         */
        *own_addr_type = ADDR_LE_DEV_PUBLIC;

        return 0;
}

static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_ext_adv_enable *cp;
        struct hci_cp_ext_adv_set *set;
        u8 data[sizeof(*cp) + sizeof(*set) * 1];
        u8 size;

        /* If request specifies an instance that doesn't exist, fail */
        if (instance > 0) {
                struct adv_info *adv;

                adv = hci_find_adv_instance(hdev, instance);
                if (!adv)
                        return -EINVAL;

                /* If not enabled there is nothing to do */
                if (!adv->enabled)
                        return 0;
        }

        memset(data, 0, sizeof(data));

        cp = (void *)data;
        set = (void *)cp->data;

        /* Instance 0x00 indicates all advertising instances will be disabled */
        cp->num_of_sets = !!instance;
        cp->enable = 0x00;

        set->handle = instance;

        size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE,
                                     size, data, HCI_CMD_TIMEOUT);
}

static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance,
                                            bdaddr_t *random_addr)
{
        struct hci_cp_le_set_adv_set_rand_addr cp;
        int err;

        if (!instance) {
                /* Instance 0x00 doesn't have an adv_info, instead it uses
                 * hdev->random_addr to track its address so whenever it needs
                 * to be updated this also set the random address since
                 * hdev->random_addr is shared with scan state machine.
                 */
                err = hci_set_random_addr_sync(hdev, random_addr);
                if (err)
                        return err;
        }

        memset(&cp, 0, sizeof(cp));

        cp.handle = instance;
        bacpy(&cp.bdaddr, random_addr);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_ext_adv_params cp;
        bool connectable;
        u32 flags;
        bdaddr_t random_addr;
        u8 own_addr_type;
        int err;
        struct adv_info *adv;
        bool secondary_adv;

        if (instance > 0) {
                adv = hci_find_adv_instance(hdev, instance);
                if (!adv)
                        return -EINVAL;
        } else {
                adv = NULL;
        }

        /* Updating parameters of an active instance will return a
         * Command Disallowed error, so we must first disable the
         * instance if it is active.
         */
        if (adv && !adv->pending) {
                err = hci_disable_ext_adv_instance_sync(hdev, instance);
                if (err)
                        return err;
        }

        flags = hci_adv_instance_flags(hdev, instance);

        /* If the "connectable" instance flag was not set, then choose between
         * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
         */
        connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
                      mgmt_get_connectable(hdev);

        if (!is_advertising_allowed(hdev, connectable))
                return -EPERM;

        /* Set require_privacy to true only when non-connectable
         * advertising is used. In that case it is fine to use a
         * non-resolvable private address.
         */
        err = hci_get_random_address(hdev, !connectable,
                                     adv_use_rpa(hdev, flags), adv,
                                     &own_addr_type, &random_addr);
        if (err < 0)
                return err;

        memset(&cp, 0, sizeof(cp));

        if (adv) {
                hci_cpu_to_le24(adv->min_interval, cp.min_interval);
                hci_cpu_to_le24(adv->max_interval, cp.max_interval);
                cp.tx_power = adv->tx_power;
        } else {
                hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval);
                hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval);
                cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE;
        }

        secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);

        if (connectable) {
                if (secondary_adv)
                        cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND);
                else
                        cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
        } else if (hci_adv_instance_is_scannable(hdev, instance) ||
                   (flags & MGMT_ADV_PARAM_SCAN_RSP)) {
                if (secondary_adv)
                        cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND);
                else
                        cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND);
        } else {
                if (secondary_adv)
                        cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND);
                else
                        cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
        }

        /* If Own_Address_Type equals 0x02 or 0x03, the Peer_Address parameter
         * contains the peer’s Identity Address and the Peer_Address_Type
         * parameter contains the peer’s Identity Type (i.e., 0x00 or 0x01).
         * These parameters are used to locate the corresponding local IRK in
         * the resolving list; this IRK is used to generate their own address
         * used in the advertisement.
         */
        if (own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED)
                hci_copy_identity_address(hdev, &cp.peer_addr,
                                          &cp.peer_addr_type);

        cp.own_addr_type = own_addr_type;
        cp.channel_map = hdev->le_adv_channel_map;
        cp.handle = instance;

        if (flags & MGMT_ADV_FLAG_SEC_2M) {
                cp.primary_phy = HCI_ADV_PHY_1M;
                cp.secondary_phy = HCI_ADV_PHY_2M;
        } else if (flags & MGMT_ADV_FLAG_SEC_CODED) {
                cp.primary_phy = HCI_ADV_PHY_CODED;
                cp.secondary_phy = HCI_ADV_PHY_CODED;
        } else {
                /* In all other cases use 1M */
                cp.primary_phy = HCI_ADV_PHY_1M;
                cp.secondary_phy = HCI_ADV_PHY_1M;
        }

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (err)
                return err;

        if ((own_addr_type == ADDR_LE_DEV_RANDOM ||
             own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) &&
            bacmp(&random_addr, BDADDR_ANY)) {
                /* Check if random address need to be updated */
                if (adv) {
                        if (!bacmp(&random_addr, &adv->random_addr))
                                return 0;
                } else {
                        if (!bacmp(&random_addr, &hdev->random_addr))
                                return 0;
                }

                return hci_set_adv_set_random_addr_sync(hdev, instance,
                                                        &random_addr);
        }

        return 0;
}

static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
        struct {
                struct hci_cp_le_set_ext_scan_rsp_data cp;
                u8 data[HCI_MAX_EXT_AD_LENGTH];
        } pdu;
        u8 len;
        struct adv_info *adv = NULL;
        int err;

        memset(&pdu, 0, sizeof(pdu));

        if (instance) {
                adv = hci_find_adv_instance(hdev, instance);
                if (!adv || !adv->scan_rsp_changed)
                        return 0;
        }

        len = eir_create_scan_rsp(hdev, instance, pdu.data);

        pdu.cp.handle = instance;
        pdu.cp.length = len;
        pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
        pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
                                    sizeof(pdu.cp) + len, &pdu.cp,
                                    HCI_CMD_TIMEOUT);
        if (err)
                return err;

        if (adv) {
                adv->scan_rsp_changed = false;
        } else {
                memcpy(hdev->scan_rsp_data, pdu.data, len);
                hdev->scan_rsp_data_len = len;
        }

        return 0;
}

static int __hci_set_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_scan_rsp_data cp;
        u8 len;

        memset(&cp, 0, sizeof(cp));

        len = eir_create_scan_rsp(hdev, instance, cp.data);

        if (hdev->scan_rsp_data_len == len &&
            !memcmp(cp.data, hdev->scan_rsp_data, len))
                return 0;

        memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
        hdev->scan_rsp_data_len = len;

        cp.length = len;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_RSP_DATA,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
{
        if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
                return 0;

        if (ext_adv_capable(hdev))
                return hci_set_ext_scan_rsp_data_sync(hdev, instance);

        return __hci_set_scan_rsp_data_sync(hdev, instance);
}

int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_ext_adv_enable *cp;
        struct hci_cp_ext_adv_set *set;
        u8 data[sizeof(*cp) + sizeof(*set) * 1];
        struct adv_info *adv;

        if (instance > 0) {
                adv = hci_find_adv_instance(hdev, instance);
                if (!adv)
                        return -EINVAL;
                /* If already enabled there is nothing to do */
                if (adv->enabled)
                        return 0;
        } else {
                adv = NULL;
        }

        cp = (void *)data;
        set = (void *)cp->data;

        memset(cp, 0, sizeof(*cp));

        cp->enable = 0x01;
        cp->num_of_sets = 0x01;

        memset(set, 0, sizeof(*set));

        set->handle = instance;

        /* Set duration per instance since controller is responsible for
         * scheduling it.
         */
        if (adv && adv->timeout) {
                u16 duration = adv->timeout * MSEC_PER_SEC;

                /* Time = N * 10 ms */
                set->duration = cpu_to_le16(duration / 10);
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE,
                                     sizeof(*cp) +
                                     sizeof(*set) * cp->num_of_sets,
                                     data, HCI_CMD_TIMEOUT);
}

int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
{
        int err;

        err = hci_setup_ext_adv_instance_sync(hdev, instance);
        if (err)
                return err;

        err = hci_set_ext_scan_rsp_data_sync(hdev, instance);
        if (err)
                return err;

        return hci_enable_ext_advertising_sync(hdev, instance);
}

int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_per_adv_enable cp;
        struct adv_info *adv = NULL;

        /* If periodic advertising already disabled there is nothing to do. */
        adv = hci_find_adv_instance(hdev, instance);
        if (!adv || !adv->periodic || !adv->enabled)
                return 0;

        memset(&cp, 0, sizeof(cp));

        cp.enable = 0x00;
        cp.handle = instance;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance,
                                       u16 min_interval, u16 max_interval)
{
        struct hci_cp_le_set_per_adv_params cp;

        memset(&cp, 0, sizeof(cp));

        if (!min_interval)
                min_interval = DISCOV_LE_PER_ADV_INT_MIN;

        if (!max_interval)
                max_interval = DISCOV_LE_PER_ADV_INT_MAX;

        cp.handle = instance;
        cp.min_interval = cpu_to_le16(min_interval);
        cp.max_interval = cpu_to_le16(max_interval);
        cp.periodic_properties = 0x0000;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
        struct {
                struct hci_cp_le_set_per_adv_data cp;
                u8 data[HCI_MAX_PER_AD_LENGTH];
        } pdu;
        u8 len;

        memset(&pdu, 0, sizeof(pdu));

        if (instance) {
                struct adv_info *adv = hci_find_adv_instance(hdev, instance);

                if (!adv || !adv->periodic)
                        return 0;
        }

        len = eir_create_per_adv_data(hdev, instance, pdu.data);

        pdu.cp.length = len;
        pdu.cp.handle = instance;
        pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA,
                                     sizeof(pdu.cp) + len, &pdu,
                                     HCI_CMD_TIMEOUT);
}

static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_per_adv_enable cp;
        struct adv_info *adv = NULL;

        /* If periodic advertising already enabled there is nothing to do. */
        adv = hci_find_adv_instance(hdev, instance);
        if (adv && adv->periodic && adv->enabled)
                return 0;

        memset(&cp, 0, sizeof(cp));

        cp.enable = 0x01;
        cp.handle = instance;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* Checks if periodic advertising data contains a Basic Announcement and if it
 * does generates a Broadcast ID and add Broadcast Announcement.
 */
static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
{
        u8 bid[3];
        u8 ad[4 + 3];

        /* Skip if NULL adv as instance 0x00 is used for general purpose
         * advertising so it cannot used for the likes of Broadcast Announcement
         * as it can be overwritten at any point.
         */
        if (!adv)
                return 0;

        /* Check if PA data doesn't contains a Basic Audio Announcement then
         * there is nothing to do.
         */
        if (!eir_get_service_data(adv->per_adv_data, adv->per_adv_data_len,
                                  0x1851, NULL))
                return 0;

        /* Check if advertising data already has a Broadcast Announcement since
         * the process may want to control the Broadcast ID directly and in that
         * case the kernel shall no interfere.
         */
        if (eir_get_service_data(adv->adv_data, adv->adv_data_len, 0x1852,
                                 NULL))
                return 0;

        /* Generate Broadcast ID */
        get_random_bytes(bid, sizeof(bid));
        eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
        hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);

        return hci_update_adv_data_sync(hdev, adv->instance);
}

int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
                           u8 *data, u32 flags, u16 min_interval,
                           u16 max_interval, u16 sync_interval)
{
        struct adv_info *adv = NULL;
        int err;
        bool added = false;

        hci_disable_per_advertising_sync(hdev, instance);

        if (instance) {
                adv = hci_find_adv_instance(hdev, instance);
                /* Create an instance if that could not be found */
                if (!adv) {
                        adv = hci_add_per_instance(hdev, instance, flags,
                                                   data_len, data,
                                                   sync_interval,
                                                   sync_interval);
                        if (IS_ERR(adv))
                                return PTR_ERR(adv);
                        adv->pending = false;
                        added = true;
                }
        }

        /* Start advertising */
        err = hci_start_ext_adv_sync(hdev, instance);
        if (err < 0)
                goto fail;

        err = hci_adv_bcast_annoucement(hdev, adv);
        if (err < 0)
                goto fail;

        err = hci_set_per_adv_params_sync(hdev, instance, min_interval,
                                          max_interval);
        if (err < 0)
                goto fail;

        err = hci_set_per_adv_data_sync(hdev, instance);
        if (err < 0)
                goto fail;

        err = hci_enable_per_advertising_sync(hdev, instance);
        if (err < 0)
                goto fail;

        return 0;

fail:
        if (added)
                hci_remove_adv_instance(hdev, instance);

        return err;
}

static int hci_start_adv_sync(struct hci_dev *hdev, u8 instance)
{
        int err;

        if (ext_adv_capable(hdev))
                return hci_start_ext_adv_sync(hdev, instance);

        err = hci_update_adv_data_sync(hdev, instance);
        if (err)
                return err;

        err = hci_update_scan_rsp_data_sync(hdev, instance);
        if (err)
                return err;

        return hci_enable_advertising_sync(hdev);
}

int hci_enable_advertising_sync(struct hci_dev *hdev)
{
        struct adv_info *adv_instance;
        struct hci_cp_le_set_adv_param cp;
        u8 own_addr_type, enable = 0x01;
        bool connectable;
        u16 adv_min_interval, adv_max_interval;
        u32 flags;
        u8 status;

        if (ext_adv_capable(hdev))
                return hci_enable_ext_advertising_sync(hdev,
                                                       hdev->cur_adv_instance);

        flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance);
        adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance);

        /* If the "connectable" instance flag was not set, then choose between
         * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
         */
        connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
                      mgmt_get_connectable(hdev);

        if (!is_advertising_allowed(hdev, connectable))
                return -EINVAL;

        status = hci_disable_advertising_sync(hdev);
        if (status)
                return status;

        /* Clear the HCI_LE_ADV bit temporarily so that the
         * hci_update_random_address knows that it's safe to go ahead
         * and write a new random address. The flag will be set back on
         * as soon as the SET_ADV_ENABLE HCI command completes.
         */
        hci_dev_clear_flag(hdev, HCI_LE_ADV);

        /* Set require_privacy to true only when non-connectable
         * advertising is used. In that case it is fine to use a
         * non-resolvable private address.
         */
        status = hci_update_random_address_sync(hdev, !connectable,
                                                adv_use_rpa(hdev, flags),
                                                &own_addr_type);
        if (status)
                return status;

        memset(&cp, 0, sizeof(cp));

        if (adv_instance) {
                adv_min_interval = adv_instance->min_interval;
                adv_max_interval = adv_instance->max_interval;
        } else {
                adv_min_interval = hdev->le_adv_min_interval;
                adv_max_interval = hdev->le_adv_max_interval;
        }

        if (connectable) {
                cp.type = LE_ADV_IND;
        } else {
                if (hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance))
                        cp.type = LE_ADV_SCAN_IND;
                else
                        cp.type = LE_ADV_NONCONN_IND;

                if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) ||
                    hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
                        adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN;
                        adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX;
                }
        }

        cp.min_interval = cpu_to_le16(adv_min_interval);
        cp.max_interval = cpu_to_le16(adv_max_interval);
        cp.own_address_type = own_addr_type;
        cp.channel_map = hdev->le_adv_channel_map;

        status = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_PARAM,
                                       sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (status)
                return status;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
                                     sizeof(enable), &enable, HCI_CMD_TIMEOUT);
}

static int enable_advertising_sync(struct hci_dev *hdev, void *data)
{
        return hci_enable_advertising_sync(hdev);
}

int hci_enable_advertising(struct hci_dev *hdev)
{
        if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
            list_empty(&hdev->adv_instances))
                return 0;

        return hci_cmd_sync_queue(hdev, enable_advertising_sync, NULL, NULL);
}

int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
                                     struct sock *sk)
{
        int err;

        if (!ext_adv_capable(hdev))
                return 0;

        err = hci_disable_ext_adv_instance_sync(hdev, instance);
        if (err)
                return err;

        /* If request specifies an instance that doesn't exist, fail */
        if (instance > 0 && !hci_find_adv_instance(hdev, instance))
                return -EINVAL;

        return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_REMOVE_ADV_SET,
                                        sizeof(instance), &instance, 0,
                                        HCI_CMD_TIMEOUT, sk);
}

static int remove_ext_adv_sync(struct hci_dev *hdev, void *data)
{
        struct adv_info *adv = data;
        u8 instance = 0;

        if (adv)
                instance = adv->instance;

        return hci_remove_ext_adv_instance_sync(hdev, instance, NULL);
}

int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance)
{
        struct adv_info *adv = NULL;

        if (instance) {
                adv = hci_find_adv_instance(hdev, instance);
                if (!adv)
                        return -EINVAL;
        }

        return hci_cmd_sync_queue(hdev, remove_ext_adv_sync, adv, NULL);
}

int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
{
        struct hci_cp_le_term_big cp;

        memset(&cp, 0, sizeof(cp));
        cp.handle = handle;
        cp.reason = reason;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_TERM_BIG,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
        struct {
                struct hci_cp_le_set_ext_adv_data cp;
                u8 data[HCI_MAX_EXT_AD_LENGTH];
        } pdu;
        u8 len;
        struct adv_info *adv = NULL;
        int err;

        memset(&pdu, 0, sizeof(pdu));

        if (instance) {
                adv = hci_find_adv_instance(hdev, instance);
                if (!adv || !adv->adv_data_changed)
                        return 0;
        }

        len = eir_create_adv_data(hdev, instance, pdu.data);

        pdu.cp.length = len;
        pdu.cp.handle = instance;
        pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
        pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
                                    sizeof(pdu.cp) + len, &pdu.cp,
                                    HCI_CMD_TIMEOUT);
        if (err)
                return err;

        /* Update data if the command succeed */
        if (adv) {
                adv->adv_data_changed = false;
        } else {
                memcpy(hdev->adv_data, pdu.data, len);
                hdev->adv_data_len = len;
        }

        return 0;
}

static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
        struct hci_cp_le_set_adv_data cp;
        u8 len;

        memset(&cp, 0, sizeof(cp));

        len = eir_create_adv_data(hdev, instance, cp.data);

        /* There's nothing to do if the data hasn't changed */
        if (hdev->adv_data_len == len &&
            memcmp(cp.data, hdev->adv_data, len) == 0)
                return 0;

        memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
        hdev->adv_data_len = len;

        cp.length = len;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
{
        if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
                return 0;

        if (ext_adv_capable(hdev))
                return hci_set_ext_adv_data_sync(hdev, instance);

        return hci_set_adv_data_sync(hdev, instance);
}

int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
                                   bool force)
{
        struct adv_info *adv = NULL;
        u16 timeout;

        if (hci_dev_test_flag(hdev, HCI_ADVERTISING) && !ext_adv_capable(hdev))
                return -EPERM;

        if (hdev->adv_instance_timeout)
                return -EBUSY;

        adv = hci_find_adv_instance(hdev, instance);
        if (!adv)
                return -ENOENT;

        /* A zero timeout means unlimited advertising. As long as there is
         * only one instance, duration should be ignored. We still set a timeout
         * in case further instances are being added later on.
         *
         * If the remaining lifetime of the instance is more than the duration
         * then the timeout corresponds to the duration, otherwise it will be
         * reduced to the remaining instance lifetime.
         */
        if (adv->timeout == 0 || adv->duration <= adv->remaining_time)
                timeout = adv->duration;
        else
                timeout = adv->remaining_time;

        /* The remaining time is being reduced unless the instance is being
         * advertised without time limit.
         */
        if (adv->timeout)
                adv->remaining_time = adv->remaining_time - timeout;

        /* Only use work for scheduling instances with legacy advertising */
        if (!ext_adv_capable(hdev)) {
                hdev->adv_instance_timeout = timeout;
                queue_delayed_work(hdev->req_workqueue,
                                   &hdev->adv_instance_expire,
                                   msecs_to_jiffies(timeout * 1000));
        }

        /* If we're just re-scheduling the same instance again then do not
         * execute any HCI commands. This happens when a single instance is
         * being advertised.
         */
        if (!force && hdev->cur_adv_instance == instance &&
            hci_dev_test_flag(hdev, HCI_LE_ADV))
                return 0;

        hdev->cur_adv_instance = instance;

        return hci_start_adv_sync(hdev, instance);
}

static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
{
        int err;

        if (!ext_adv_capable(hdev))
                return 0;

        /* Disable instance 0x00 to disable all instances */
        err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
        if (err)
                return err;

        return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CLEAR_ADV_SETS,
                                        0, NULL, 0, HCI_CMD_TIMEOUT, sk);
}

static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
        struct adv_info *adv, *n;
        int err = 0;

        if (ext_adv_capable(hdev))
                /* Remove all existing sets */
                err = hci_clear_adv_sets_sync(hdev, sk);
        if (ext_adv_capable(hdev))
                return err;

        /* This is safe as long as there is no command send while the lock is
         * held.
         */
        hci_dev_lock(hdev);

        /* Cleanup non-ext instances */
        list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) {
                u8 instance = adv->instance;
                int err;

                if (!(force || adv->timeout))
                        continue;

                err = hci_remove_adv_instance(hdev, instance);
                if (!err)
                        mgmt_advertising_removed(sk, hdev, instance);
        }

        hci_dev_unlock(hdev);

        return 0;
}

static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
                               struct sock *sk)
{
        int err = 0;

        /* If we use extended advertising, instance has to be removed first. */
        if (ext_adv_capable(hdev))
                err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
        if (ext_adv_capable(hdev))
                return err;

        /* This is safe as long as there is no command send while the lock is
         * held.
         */
        hci_dev_lock(hdev);

        err = hci_remove_adv_instance(hdev, instance);
        if (!err)
                mgmt_advertising_removed(sk, hdev, instance);

        hci_dev_unlock(hdev);

        return err;
}

/* For a single instance:
 * - force == true: The instance will be removed even when its remaining
 *   lifetime is not zero.
 * - force == false: the instance will be deactivated but kept stored unless
 *   the remaining lifetime is zero.
 *
 * For instance == 0x00:
 * - force == true: All instances will be removed regardless of their timeout
 *   setting.
 * - force == false: Only instances that have a timeout will be removed.
 */
int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
                                u8 instance, bool force)
{
        struct adv_info *next = NULL;
        int err;

        /* Cancel any timeout concerning the removed instance(s). */
        if (!instance || hdev->cur_adv_instance == instance)
                cancel_adv_timeout(hdev);

        /* Get the next instance to advertise BEFORE we remove
         * the current one. This can be the same instance again
         * if there is only one instance.
         */
        if (hdev->cur_adv_instance == instance)
                next = hci_get_next_instance(hdev, instance);

        if (!instance) {
                err = hci_clear_adv_sync(hdev, sk, force);
                if (err)
                        return err;
        } else {
                struct adv_info *adv = hci_find_adv_instance(hdev, instance);

                if (force || (adv && adv->timeout && !adv->remaining_time)) {
                        /* Don't advertise a removed instance. */
                        if (next && next->instance == instance)
                                next = NULL;

                        err = hci_remove_adv_sync(hdev, instance, sk);
                        if (err)
                                return err;
                }
        }

        if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING))
                return 0;

        if (next && !ext_adv_capable(hdev))
                hci_schedule_adv_instance_sync(hdev, next->instance, false);

        return 0;
}

int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle)
{
        struct hci_cp_read_rssi cp;

        cp.handle = handle;
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_RSSI,
                                        sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_CLOCK,
                                        sizeof(*cp), cp, HCI_CMD_TIMEOUT);
}

int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
{
        struct hci_cp_read_tx_power cp;

        cp.handle = handle;
        cp.type = type;
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_TX_POWER,
                                        sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_disable_advertising_sync(struct hci_dev *hdev)
{
        u8 enable = 0x00;
        int err = 0;

        /* If controller is not advertising we are done. */
        if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
                return 0;

        if (ext_adv_capable(hdev))
                err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
        if (ext_adv_capable(hdev))
                return err;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
                                     sizeof(enable), &enable, HCI_CMD_TIMEOUT);
}

static int hci_le_set_ext_scan_enable_sync(struct hci_dev *hdev, u8 val,
                                           u8 filter_dup)
{
        struct hci_cp_le_set_ext_scan_enable cp;

        memset(&cp, 0, sizeof(cp));
        cp.enable = val;

        if (hci_dev_test_flag(hdev, HCI_MESH))
                cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
        else
                cp.filter_dup = filter_dup;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
                                       u8 filter_dup)
{
        struct hci_cp_le_set_scan_enable cp;

        if (use_ext_scan(hdev))
                return hci_le_set_ext_scan_enable_sync(hdev, val, filter_dup);

        memset(&cp, 0, sizeof(cp));
        cp.enable = val;

        if (val && hci_dev_test_flag(hdev, HCI_MESH))
                cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
        else
                cp.filter_dup = filter_dup;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_ENABLE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_le_set_addr_resolution_enable_sync(struct hci_dev *hdev, u8 val)
{
        if (!use_ll_privacy(hdev))
                return 0;

        /* If controller is not/already resolving we are done. */
        if (val == hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE,
                                     sizeof(val), &val, HCI_CMD_TIMEOUT);
}

static int hci_scan_disable_sync(struct hci_dev *hdev)
{
        int err;

        /* If controller is not scanning we are done. */
        if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
                return 0;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return 0;
        }

        err = hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
        if (err) {
                bt_dev_err(hdev, "Unable to disable scanning: %d", err);
                return err;
        }

        return err;
}

static bool scan_use_rpa(struct hci_dev *hdev)
{
        return hci_dev_test_flag(hdev, HCI_PRIVACY);
}

static void hci_start_interleave_scan(struct hci_dev *hdev)
{
        hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
        queue_delayed_work(hdev->req_workqueue,
                           &hdev->interleave_scan, 0);
}

static bool is_interleave_scanning(struct hci_dev *hdev)
{
        return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
}

static void cancel_interleave_scan(struct hci_dev *hdev)
{
        bt_dev_dbg(hdev, "cancelling interleave scan");

        cancel_delayed_work_sync(&hdev->interleave_scan);

        hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE;
}

/* Return true if interleave_scan wasn't started until exiting this function,
 * otherwise, return false
 */
static bool hci_update_interleaved_scan_sync(struct hci_dev *hdev)
{
        /* Do interleaved scan only if all of the following are true:
         * - There is at least one ADV monitor
         * - At least one pending LE connection or one device to be scanned for
         * - Monitor offloading is not supported
         * If so, we should alternate between allowlist scan and one without
         * any filters to save power.
         */
        bool use_interleaving = hci_is_adv_monitoring(hdev) &&
                                !(list_empty(&hdev->pend_le_conns) &&
                                  list_empty(&hdev->pend_le_reports)) &&
                                hci_get_adv_monitor_offload_ext(hdev) ==
                                    HCI_ADV_MONITOR_EXT_NONE;
        bool is_interleaving = is_interleave_scanning(hdev);

        if (use_interleaving && !is_interleaving) {
                hci_start_interleave_scan(hdev);
                bt_dev_dbg(hdev, "starting interleave scan");
                return true;
        }

        if (!use_interleaving && is_interleaving)
                cancel_interleave_scan(hdev);

        return false;
}

/* Removes connection to resolve list if needed.*/
static int hci_le_del_resolve_list_sync(struct hci_dev *hdev,
                                        bdaddr_t *bdaddr, u8 bdaddr_type)
{
        struct hci_cp_le_del_from_resolv_list cp;
        struct bdaddr_list_with_irk *entry;

        if (!use_ll_privacy(hdev))
                return 0;

        /* Check if the IRK has been programmed */
        entry = hci_bdaddr_list_lookup_with_irk(&hdev->le_resolv_list, bdaddr,
                                                bdaddr_type);
        if (!entry)
                return 0;

        cp.bdaddr_type = bdaddr_type;
        bacpy(&cp.bdaddr, bdaddr);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
                                       bdaddr_t *bdaddr, u8 bdaddr_type)
{
        struct hci_cp_le_del_from_accept_list cp;
        int err;

        /* Check if device is on accept list before removing it */
        if (!hci_bdaddr_list_lookup(&hdev->le_accept_list, bdaddr, bdaddr_type))
                return 0;

        cp.bdaddr_type = bdaddr_type;
        bacpy(&cp.bdaddr, bdaddr);

        /* Ignore errors when removing from resolving list as that is likely
         * that the device was never added.
         */
        hci_le_del_resolve_list_sync(hdev, &cp.bdaddr, cp.bdaddr_type);

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST,
                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (err) {
                bt_dev_err(hdev, "Unable to remove from allow list: %d", err);
                return err;
        }

        bt_dev_dbg(hdev, "Remove %pMR (0x%x) from allow list", &cp.bdaddr,
                   cp.bdaddr_type);

        return 0;
}

struct conn_params {
        bdaddr_t addr;
        u8 addr_type;
        hci_conn_flags_t flags;
        u8 privacy_mode;
};

/* Adds connection to resolve list if needed.
 * Setting params to NULL programs local hdev->irk
 */
static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
                                        struct conn_params *params)
{
        struct hci_cp_le_add_to_resolv_list cp;
        struct smp_irk *irk;
        struct bdaddr_list_with_irk *entry;
        struct hci_conn_params *p;

        if (!use_ll_privacy(hdev))
                return 0;

        /* Attempt to program local identity address, type and irk if params is
         * NULL.
         */
        if (!params) {
                if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
                        return 0;

                hci_copy_identity_address(hdev, &cp.bdaddr, &cp.bdaddr_type);
                memcpy(cp.peer_irk, hdev->irk, 16);
                goto done;
        }

        irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type);
        if (!irk)
                return 0;

        /* Check if the IK has _not_ been programmed yet. */
        entry = hci_bdaddr_list_lookup_with_irk(&hdev->le_resolv_list,
                                                &params->addr,
                                                params->addr_type);
        if (entry)
                return 0;

        cp.bdaddr_type = params->addr_type;
        bacpy(&cp.bdaddr, &params->addr);
        memcpy(cp.peer_irk, irk->val, 16);

        /* Default privacy mode is always Network */
        params->privacy_mode = HCI_NETWORK_PRIVACY;

        rcu_read_lock();
        p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
                                      &params->addr, params->addr_type);
        if (!p)
                p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
                                              &params->addr, params->addr_type);
        if (p)
                WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
        rcu_read_unlock();

done:
        if (hci_dev_test_flag(hdev, HCI_PRIVACY))
                memcpy(cp.local_irk, hdev->irk, 16);
        else
                memset(cp.local_irk, 0, 16);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* Set Device Privacy Mode. */
static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
                                        struct conn_params *params)
{
        struct hci_cp_le_set_privacy_mode cp;
        struct smp_irk *irk;

        /* If device privacy mode has already been set there is nothing to do */
        if (params->privacy_mode == HCI_DEVICE_PRIVACY)
                return 0;

        /* Check if HCI_CONN_FLAG_DEVICE_PRIVACY has been set as it also
         * indicates that LL Privacy has been enabled and
         * HCI_OP_LE_SET_PRIVACY_MODE is supported.
         */
        if (!(params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY))
                return 0;

        irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type);
        if (!irk)
                return 0;

        memset(&cp, 0, sizeof(cp));
        cp.bdaddr_type = irk->addr_type;
        bacpy(&cp.bdaddr, &irk->bdaddr);
        cp.mode = HCI_DEVICE_PRIVACY;

        /* Note: params->privacy_mode is not updated since it is a copy */

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* Adds connection to allow list if needed, if the device uses RPA (has IRK)
 * this attempts to program the device in the resolving list as well and
 * properly set the privacy mode.
 */
static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
                                       struct conn_params *params,
                                       u8 *num_entries)
{
        struct hci_cp_le_add_to_accept_list cp;
        int err;

        /* During suspend, only wakeable devices can be in acceptlist */
        if (hdev->suspended &&
            !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
                hci_le_del_accept_list_sync(hdev, &params->addr,
                                            params->addr_type);
                return 0;
        }

        /* Select filter policy to accept all advertising */
        if (*num_entries >= hdev->le_accept_list_size)
                return -ENOSPC;

        /* Accept list can not be used with RPAs */
        if (!use_ll_privacy(hdev) &&
            hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
                return -EINVAL;

        /* Attempt to program the device in the resolving list first to avoid
         * having to rollback in case it fails since the resolving list is
         * dynamic it can probably be smaller than the accept list.
         */
        err = hci_le_add_resolve_list_sync(hdev, params);
        if (err) {
                bt_dev_err(hdev, "Unable to add to resolve list: %d", err);
                return err;
        }

        /* Set Privacy Mode */
        err = hci_le_set_privacy_mode_sync(hdev, params);
        if (err) {
                bt_dev_err(hdev, "Unable to set privacy mode: %d", err);
                return err;
        }

        /* Check if already in accept list */
        if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr,
                                   params->addr_type))
                return 0;

        *num_entries += 1;
        cp.bdaddr_type = params->addr_type;
        bacpy(&cp.bdaddr, &params->addr);

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST,
                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (err) {
                bt_dev_err(hdev, "Unable to add to allow list: %d", err);
                /* Rollback the device from the resolving list */
                hci_le_del_resolve_list_sync(hdev, &cp.bdaddr, cp.bdaddr_type);
                return err;
        }

        bt_dev_dbg(hdev, "Add %pMR (0x%x) to allow list", &cp.bdaddr,
                   cp.bdaddr_type);

        return 0;
}

/* This function disables/pause all advertising instances */
static int hci_pause_advertising_sync(struct hci_dev *hdev)
{
        int err;
        int old_state;

        /* If already been paused there is nothing to do. */
        if (hdev->advertising_paused)
                return 0;

        bt_dev_dbg(hdev, "Pausing directed advertising");

        /* Stop directed advertising */
        old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING);
        if (old_state) {
                /* When discoverable timeout triggers, then just make sure
                 * the limited discoverable flag is cleared. Even in the case
                 * of a timeout triggered from general discoverable, it is
                 * safe to unconditionally clear the flag.
                 */
                hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
                hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
                hdev->discov_timeout = 0;
        }

        bt_dev_dbg(hdev, "Pausing advertising instances");

        /* Call to disable any advertisements active on the controller.
         * This will succeed even if no advertisements are configured.
         */
        err = hci_disable_advertising_sync(hdev);
        if (err)
                return err;

        /* If we are using software rotation, pause the loop */
        if (!ext_adv_capable(hdev))
                cancel_adv_timeout(hdev);

        hdev->advertising_paused = true;
        hdev->advertising_old_state = old_state;

        return 0;
}

/* This function enables all user advertising instances */
static int hci_resume_advertising_sync(struct hci_dev *hdev)
{
        struct adv_info *adv, *tmp;
        int err;

        /* If advertising has not been paused there is nothing  to do. */
        if (!hdev->advertising_paused)
                return 0;

        /* Resume directed advertising */
        hdev->advertising_paused = false;
        if (hdev->advertising_old_state) {
                hci_dev_set_flag(hdev, HCI_ADVERTISING);
                hdev->advertising_old_state = 0;
        }

        bt_dev_dbg(hdev, "Resuming advertising instances");

        if (ext_adv_capable(hdev)) {
                /* Call for each tracked instance to be re-enabled */
                list_for_each_entry_safe(adv, tmp, &hdev->adv_instances, list) {
                        err = hci_enable_ext_advertising_sync(hdev,
                                                              adv->instance);
                        if (!err)
                                continue;

                        /* If the instance cannot be resumed remove it */
                        hci_remove_ext_adv_instance_sync(hdev, adv->instance,
                                                         NULL);
                }
        } else {
                /* Schedule for most recent instance to be restarted and begin
                 * the software rotation loop
                 */
                err = hci_schedule_adv_instance_sync(hdev,
                                                     hdev->cur_adv_instance,
                                                     true);
        }

        hdev->advertising_paused = false;

        return err;
}

static int hci_pause_addr_resolution(struct hci_dev *hdev)
{
        int err;

        if (!use_ll_privacy(hdev))
                return 0;

        if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION))
                return 0;

        /* Cannot disable addr resolution if scanning is enabled or
         * when initiating an LE connection.
         */
        if (hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
            hci_lookup_le_connect(hdev)) {
                bt_dev_err(hdev, "Command not allowed when scan/LE connect");
                return -EPERM;
        }

        /* Cannot disable addr resolution if advertising is enabled. */
        err = hci_pause_advertising_sync(hdev);
        if (err) {
                bt_dev_err(hdev, "Pause advertising failed: %d", err);
                return err;
        }

        err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00);
        if (err)
                bt_dev_err(hdev, "Unable to disable Address Resolution: %d",
                           err);

        /* Return if address resolution is disabled and RPA is not used. */
        if (!err && scan_use_rpa(hdev))
                return 0;

        hci_resume_advertising_sync(hdev);
        return err;
}

struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
                                             bool extended, struct sock *sk)
{
        u16 opcode = extended ? HCI_OP_READ_LOCAL_OOB_EXT_DATA :
                                        HCI_OP_READ_LOCAL_OOB_DATA;

        return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
}

static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
{
        struct hci_conn_params *params;
        struct conn_params *p;
        size_t i;

        rcu_read_lock();

        i = 0;
        list_for_each_entry_rcu(params, list, action)
                ++i;
        *n = i;

        rcu_read_unlock();

        p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
        if (!p)
                return NULL;

        rcu_read_lock();

        i = 0;
        list_for_each_entry_rcu(params, list, action) {
                /* Racing adds are handled in next scan update */
                if (i >= *n)
                        break;

                /* No hdev->lock, but: addr, addr_type are immutable.
                 * privacy_mode is only written by us or in
                 * hci_cc_le_set_privacy_mode that we wait for.
                 * We should be idempotent so MGMT updating flags
                 * while we are processing is OK.
                 */
                bacpy(&p[i].addr, &params->addr);
                p[i].addr_type = params->addr_type;
                p[i].flags = READ_ONCE(params->flags);
                p[i].privacy_mode = READ_ONCE(params->privacy_mode);
                ++i;
        }

        rcu_read_unlock();

        *n = i;
        return p;
}

/* Clear LE Accept List */
static int hci_le_clear_accept_list_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[26] & 0x80))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL,
                                     HCI_CMD_TIMEOUT);
}

/* Device must not be scanning when updating the accept list.
 *
 * Update is done using the following sequence:
 *
 * use_ll_privacy((Disable Advertising) -> Disable Resolving List) ->
 * Remove Devices From Accept List ->
 * (has IRK && use_ll_privacy(Remove Devices From Resolving List))->
 * Add Devices to Accept List ->
 * (has IRK && use_ll_privacy(Remove Devices From Resolving List)) ->
 * use_ll_privacy(Enable Resolving List -> (Enable Advertising)) ->
 * Enable Scanning
 *
 * In case of failure advertising shall be restored to its original state and
 * return would disable accept list since either accept or resolving list could
 * not be programmed.
 *
 */
static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
{
        struct conn_params *params;
        struct bdaddr_list *b, *t;
        u8 num_entries = 0;
        bool pend_conn, pend_report;
        u8 filter_policy;
        size_t i, n;
        int err;

        /* Pause advertising if resolving list can be used as controllers
         * cannot accept resolving list modifications while advertising.
         */
        if (use_ll_privacy(hdev)) {
                err = hci_pause_advertising_sync(hdev);
                if (err) {
                        bt_dev_err(hdev, "pause advertising failed: %d", err);
                        return 0x00;
                }
        }

        /* Disable address resolution while reprogramming accept list since
         * devices that do have an IRK will be programmed in the resolving list
         * when LL Privacy is enabled.
         */
        err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00);
        if (err) {
                bt_dev_err(hdev, "Unable to disable LL privacy: %d", err);
                goto done;
        }

        /* Force address filtering if PA Sync is in progress */
        if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
                struct hci_cp_le_pa_create_sync *sent;

                sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC);
                if (sent) {
                        struct conn_params pa;

                        memset(&pa, 0, sizeof(pa));

                        bacpy(&pa.addr, &sent->addr);
                        pa.addr_type = sent->addr_type;

                        /* Clear first since there could be addresses left
                         * behind.
                         */
                        hci_le_clear_accept_list_sync(hdev);

                        num_entries = 1;
                        err = hci_le_add_accept_list_sync(hdev, &pa,
                                                          &num_entries);
                        goto done;
                }
        }

        /* Go through the current accept list programmed into the
         * controller one by one and check if that address is connected or is
         * still in the list of pending connections or list of devices to
         * report. If not present in either list, then remove it from
         * the controller.
         */
        list_for_each_entry_safe(b, t, &hdev->le_accept_list, list) {
                if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
                        continue;

                /* Pointers not dereferenced, no locks needed */
                pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
                                                      &b->bdaddr,
                                                      b->bdaddr_type);
                pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports,
                                                        &b->bdaddr,
                                                        b->bdaddr_type);

                /* If the device is not likely to connect or report,
                 * remove it from the acceptlist.
                 */
                if (!pend_conn && !pend_report) {
                        hci_le_del_accept_list_sync(hdev, &b->bdaddr,
                                                    b->bdaddr_type);
                        continue;
                }

                num_entries++;
        }

        /* Since all no longer valid accept list entries have been
         * removed, walk through the list of pending connections
         * and ensure that any new device gets programmed into
         * the controller.
         *
         * If the list of the devices is larger than the list of
         * available accept list entries in the controller, then
         * just abort and return filer policy value to not use the
         * accept list.
         *
         * The list and params may be mutated while we wait for events,
         * so make a copy and iterate it.
         */

        params = conn_params_copy(&hdev->pend_le_conns, &n);
        if (!params) {
                err = -ENOMEM;
                goto done;
        }

        for (i = 0; i < n; ++i) {
                err = hci_le_add_accept_list_sync(hdev, &params[i],
                                                  &num_entries);
                if (err) {
                        kvfree(params);
                        goto done;
                }
        }

        kvfree(params);

        /* After adding all new pending connections, walk through
         * the list of pending reports and also add these to the
         * accept list if there is still space. Abort if space runs out.
         */

        params = conn_params_copy(&hdev->pend_le_reports, &n);
        if (!params) {
                err = -ENOMEM;
                goto done;
        }

        for (i = 0; i < n; ++i) {
                err = hci_le_add_accept_list_sync(hdev, &params[i],
                                                  &num_entries);
                if (err) {
                        kvfree(params);
                        goto done;
                }
        }

        kvfree(params);

        /* Use the allowlist unless the following conditions are all true:
         * - We are not currently suspending
         * - There are 1 or more ADV monitors registered and it's not offloaded
         * - Interleaved scanning is not currently using the allowlist
         */
        if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended &&
            hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE &&
            hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
                err = -EINVAL;

done:
        filter_policy = err ? 0x00 : 0x01;

        /* Enable address resolution when LL Privacy is enabled. */
        err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
        if (err)
                bt_dev_err(hdev, "Unable to enable LL privacy: %d", err);

        /* Resume advertising if it was paused */
        if (use_ll_privacy(hdev))
                hci_resume_advertising_sync(hdev);

        /* Select filter policy to use accept list */
        return filter_policy;
}

static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp,
                                   u8 type, u16 interval, u16 window)
{
        cp->type = type;
        cp->interval = cpu_to_le16(interval);
        cp->window = cpu_to_le16(window);
}

static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
                                          u16 interval, u16 window,
                                          u8 own_addr_type, u8 filter_policy)
{
        struct hci_cp_le_set_ext_scan_params *cp;
        struct hci_cp_le_scan_phy_params *phy;
        u8 data[sizeof(*cp) + sizeof(*phy) * 2];
        u8 num_phy = 0x00;

        cp = (void *)data;
        phy = (void *)cp->data;

        memset(data, 0, sizeof(data));

        cp->own_addr_type = own_addr_type;
        cp->filter_policy = filter_policy;

        /* Check if PA Sync is in progress then select the PHY based on the
         * hci_conn.iso_qos.
         */
        if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
                struct hci_cp_le_add_to_accept_list *sent;

                sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
                if (sent) {
                        struct hci_conn *conn;

                        conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK,
                                                       &sent->bdaddr);
                        if (conn) {
                                struct bt_iso_qos *qos = &conn->iso_qos;

                                if (qos->bcast.in.phy & BT_ISO_PHY_1M ||
                                    qos->bcast.in.phy & BT_ISO_PHY_2M) {
                                        cp->scanning_phys |= LE_SCAN_PHY_1M;
                                        hci_le_scan_phy_params(phy, type,
                                                               interval,
                                                               window);
                                        num_phy++;
                                        phy++;
                                }

                                if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
                                        cp->scanning_phys |= LE_SCAN_PHY_CODED;
                                        hci_le_scan_phy_params(phy, type,
                                                               interval * 3,
                                                               window * 3);
                                        num_phy++;
                                        phy++;
                                }

                                if (num_phy)
                                        goto done;
                        }
                }
        }

        if (scan_1m(hdev) || scan_2m(hdev)) {
                cp->scanning_phys |= LE_SCAN_PHY_1M;
                hci_le_scan_phy_params(phy, type, interval, window);
                num_phy++;
                phy++;
        }

        if (scan_coded(hdev)) {
                cp->scanning_phys |= LE_SCAN_PHY_CODED;
                hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
                num_phy++;
                phy++;
        }

done:
        if (!num_phy)
                return -EINVAL;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
                                     sizeof(*cp) + sizeof(*phy) * num_phy,
                                     data, HCI_CMD_TIMEOUT);
}

static int hci_le_set_scan_param_sync(struct hci_dev *hdev, u8 type,
                                      u16 interval, u16 window,
                                      u8 own_addr_type, u8 filter_policy)
{
        struct hci_cp_le_set_scan_param cp;

        if (use_ext_scan(hdev))
                return hci_le_set_ext_scan_param_sync(hdev, type, interval,
                                                      window, own_addr_type,
                                                      filter_policy);

        memset(&cp, 0, sizeof(cp));
        cp.type = type;
        cp.interval = cpu_to_le16(interval);
        cp.window = cpu_to_le16(window);
        cp.own_address_type = own_addr_type;
        cp.filter_policy = filter_policy;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_PARAM,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_start_scan_sync(struct hci_dev *hdev, u8 type, u16 interval,
                               u16 window, u8 own_addr_type, u8 filter_policy,
                               u8 filter_dup)
{
        int err;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return 0;
        }

        err = hci_le_set_scan_param_sync(hdev, type, interval, window,
                                         own_addr_type, filter_policy);
        if (err)
                return err;

        return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE, filter_dup);
}

static int hci_passive_scan_sync(struct hci_dev *hdev)
{
        u8 own_addr_type;
        u8 filter_policy;
        u16 window, interval;
        u8 filter_dups = LE_SCAN_FILTER_DUP_ENABLE;
        int err;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return 0;
        }

        err = hci_scan_disable_sync(hdev);
        if (err) {
                bt_dev_err(hdev, "disable scanning failed: %d", err);
                return err;
        }

        /* Set require_privacy to false since no SCAN_REQ are send
         * during passive scanning. Not using an non-resolvable address
         * here is important so that peer devices using direct
         * advertising with our address will be correctly reported
         * by the controller.
         */
        if (hci_update_random_address_sync(hdev, false, scan_use_rpa(hdev),
                                           &own_addr_type))
                return 0;

        if (hdev->enable_advmon_interleave_scan &&
            hci_update_interleaved_scan_sync(hdev))
                return 0;

        bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);

        /* Adding or removing entries from the accept list must
         * happen before enabling scanning. The controller does
         * not allow accept list modification while scanning.
         */
        filter_policy = hci_update_accept_list_sync(hdev);

        /* When the controller is using random resolvable addresses and
         * with that having LE privacy enabled, then controllers with
         * Extended Scanner Filter Policies support can now enable support
         * for handling directed advertising.
         *
         * So instead of using filter polices 0x00 (no acceptlist)
         * and 0x01 (acceptlist enabled) use the new filter policies
         * 0x02 (no acceptlist) and 0x03 (acceptlist enabled).
         */
        if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
            (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
                filter_policy |= 0x02;

        if (hdev->suspended) {
                window = hdev->le_scan_window_suspend;
                interval = hdev->le_scan_int_suspend;
        } else if (hci_is_le_conn_scanning(hdev)) {
                window = hdev->le_scan_window_connect;
                interval = hdev->le_scan_int_connect;
        } else if (hci_is_adv_monitoring(hdev)) {
                window = hdev->le_scan_window_adv_monitor;
                interval = hdev->le_scan_int_adv_monitor;
        } else {
                window = hdev->le_scan_window;
                interval = hdev->le_scan_interval;
        }

        /* Disable all filtering for Mesh */
        if (hci_dev_test_flag(hdev, HCI_MESH)) {
                filter_policy = 0;
                filter_dups = LE_SCAN_FILTER_DUP_DISABLE;
        }

        bt_dev_dbg(hdev, "LE passive scan with acceptlist = %d", filter_policy);

        return hci_start_scan_sync(hdev, LE_SCAN_PASSIVE, interval, window,
                                   own_addr_type, filter_policy, filter_dups);
}

/* This function controls the passive scanning based on hdev->pend_le_conns
 * list. If there are pending LE connection we start the background scanning,
 * otherwise we stop it in the following sequence:
 *
 * If there are devices to scan:
 *
 * Disable Scanning -> Update Accept List ->
 * use_ll_privacy((Disable Advertising) -> Disable Resolving List ->
 * Update Resolving List -> Enable Resolving List -> (Enable Advertising)) ->
 * Enable Scanning
 *
 * Otherwise:
 *
 * Disable Scanning
 */
int hci_update_passive_scan_sync(struct hci_dev *hdev)
{
        int err;

        if (!test_bit(HCI_UP, &hdev->flags) ||
            test_bit(HCI_INIT, &hdev->flags) ||
            hci_dev_test_flag(hdev, HCI_SETUP) ||
            hci_dev_test_flag(hdev, HCI_CONFIG) ||
            hci_dev_test_flag(hdev, HCI_AUTO_OFF) ||
            hci_dev_test_flag(hdev, HCI_UNREGISTER))
                return 0;

        /* No point in doing scanning if LE support hasn't been enabled */
        if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
                return 0;

        /* If discovery is active don't interfere with it */
        if (hdev->discovery.state != DISCOVERY_STOPPED)
                return 0;

        /* Reset RSSI and UUID filters when starting background scanning
         * since these filters are meant for service discovery only.
         *
         * The Start Discovery and Start Service Discovery operations
         * ensure to set proper values for RSSI threshold and UUID
         * filter list. So it is safe to just reset them here.
         */
        hci_discovery_filter_clear(hdev);

        bt_dev_dbg(hdev, "ADV monitoring is %s",
                   hci_is_adv_monitoring(hdev) ? "on" : "off");

        if (!hci_dev_test_flag(hdev, HCI_MESH) &&
            list_empty(&hdev->pend_le_conns) &&
            list_empty(&hdev->pend_le_reports) &&
            !hci_is_adv_monitoring(hdev) &&
            !hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
                /* If there is no pending LE connections or devices
                 * to be scanned for or no ADV monitors, we should stop the
                 * background scanning.
                 */

                bt_dev_dbg(hdev, "stopping background scanning");

                err = hci_scan_disable_sync(hdev);
                if (err)
                        bt_dev_err(hdev, "stop background scanning failed: %d",
                                   err);
        } else {
                /* If there is at least one pending LE connection, we should
                 * keep the background scan running.
                 */

                /* If controller is connecting, we should not start scanning
                 * since some controllers are not able to scan and connect at
                 * the same time.
                 */
                if (hci_lookup_le_connect(hdev))
                        return 0;

                bt_dev_dbg(hdev, "start background scanning");

                err = hci_passive_scan_sync(hdev);
                if (err)
                        bt_dev_err(hdev, "start background scanning failed: %d",
                                   err);
        }

        return err;
}

static int update_scan_sync(struct hci_dev *hdev, void *data)
{
        return hci_update_scan_sync(hdev);
}

int hci_update_scan(struct hci_dev *hdev)
{
        return hci_cmd_sync_queue(hdev, update_scan_sync, NULL, NULL);
}

static int update_passive_scan_sync(struct hci_dev *hdev, void *data)
{
        return hci_update_passive_scan_sync(hdev);
}

int hci_update_passive_scan(struct hci_dev *hdev)
{
        /* Only queue if it would have any effect */
        if (!test_bit(HCI_UP, &hdev->flags) ||
            test_bit(HCI_INIT, &hdev->flags) ||
            hci_dev_test_flag(hdev, HCI_SETUP) ||
            hci_dev_test_flag(hdev, HCI_CONFIG) ||
            hci_dev_test_flag(hdev, HCI_AUTO_OFF) ||
            hci_dev_test_flag(hdev, HCI_UNREGISTER))
                return 0;

        return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL,
                                       NULL);
}

int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val)
{
        int err;

        if (!bredr_sc_enabled(hdev) || lmp_host_sc_capable(hdev))
                return 0;

        err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SC_SUPPORT,
                                    sizeof(val), &val, HCI_CMD_TIMEOUT);

        if (!err) {
                if (val) {
                        hdev->features[1][0] |= LMP_HOST_SC;
                        hci_dev_set_flag(hdev, HCI_SC_ENABLED);
                } else {
                        hdev->features[1][0] &= ~LMP_HOST_SC;
                        hci_dev_clear_flag(hdev, HCI_SC_ENABLED);
                }
        }

        return err;
}

int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode)
{
        int err;

        if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED) ||
            lmp_host_ssp_capable(hdev))
                return 0;

        if (!mode && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
                __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE,
                                      sizeof(mode), &mode, HCI_CMD_TIMEOUT);
        }

        err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_MODE,
                                    sizeof(mode), &mode, HCI_CMD_TIMEOUT);
        if (err)
                return err;

        return hci_write_sc_support_sync(hdev, 0x01);
}

int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul)
{
        struct hci_cp_write_le_host_supported cp;

        if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) ||
            !lmp_bredr_capable(hdev))
                return 0;

        /* Check first if we already have the right host state
         * (host features set)
         */
        if (le == lmp_host_le_capable(hdev) &&
            simul == lmp_host_le_br_capable(hdev))
                return 0;

        memset(&cp, 0, sizeof(cp));

        cp.le = le;
        cp.simul = simul;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_powered_update_adv_sync(struct hci_dev *hdev)
{
        struct adv_info *adv, *tmp;
        int err;

        if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
                return 0;

        /* If RPA Resolution has not been enable yet it means the
         * resolving list is empty and we should attempt to program the
         * local IRK in order to support using own_addr_type
         * ADDR_LE_DEV_RANDOM_RESOLVED (0x03).
         */
        if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
                hci_le_add_resolve_list_sync(hdev, NULL);
                hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
        }

        /* Make sure the controller has a good default for
         * advertising data. This also applies to the case
         * where BR/EDR was toggled during the AUTO_OFF phase.
         */
        if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
            list_empty(&hdev->adv_instances)) {
                if (ext_adv_capable(hdev)) {
                        err = hci_setup_ext_adv_instance_sync(hdev, 0x00);
                        if (!err)
                                hci_update_scan_rsp_data_sync(hdev, 0x00);
                } else {
                        err = hci_update_adv_data_sync(hdev, 0x00);
                        if (!err)
                                hci_update_scan_rsp_data_sync(hdev, 0x00);
                }

                if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
                        hci_enable_advertising_sync(hdev);
        }

        /* Call for each tracked instance to be scheduled */
        list_for_each_entry_safe(adv, tmp, &hdev->adv_instances, list)
                hci_schedule_adv_instance_sync(hdev, adv->instance, true);

        return 0;
}

static int hci_write_auth_enable_sync(struct hci_dev *hdev)
{
        u8 link_sec;

        link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY);
        if (link_sec == test_bit(HCI_AUTH, &hdev->flags))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE,
                                     sizeof(link_sec), &link_sec,
                                     HCI_CMD_TIMEOUT);
}

int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable)
{
        struct hci_cp_write_page_scan_activity cp;
        u8 type;
        int err = 0;

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                return 0;

        if (hdev->hci_ver < BLUETOOTH_VER_1_2)
                return 0;

        memset(&cp, 0, sizeof(cp));

        if (enable) {
                type = PAGE_SCAN_TYPE_INTERLACED;

                /* 160 msec page scan interval */
                cp.interval = cpu_to_le16(0x0100);
        } else {
                type = hdev->def_page_scan_type;
                cp.interval = cpu_to_le16(hdev->def_page_scan_int);
        }

        cp.window = cpu_to_le16(hdev->def_page_scan_window);

        if (__cpu_to_le16(hdev->page_scan_interval) != cp.interval ||
            __cpu_to_le16(hdev->page_scan_window) != cp.window) {
                err = __hci_cmd_sync_status(hdev,
                                            HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
                                            sizeof(cp), &cp, HCI_CMD_TIMEOUT);
                if (err)
                        return err;
        }

        if (hdev->page_scan_type != type)
                err = __hci_cmd_sync_status(hdev,
                                            HCI_OP_WRITE_PAGE_SCAN_TYPE,
                                            sizeof(type), &type,
                                            HCI_CMD_TIMEOUT);

        return err;
}

static bool disconnected_accept_list_entries(struct hci_dev *hdev)
{
        struct bdaddr_list *b;

        list_for_each_entry(b, &hdev->accept_list, list) {
                struct hci_conn *conn;

                conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
                if (!conn)
                        return true;

                if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
                        return true;
        }

        return false;
}

static int hci_write_scan_enable_sync(struct hci_dev *hdev, u8 val)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE,
                                            sizeof(val), &val,
                                            HCI_CMD_TIMEOUT);
}

int hci_update_scan_sync(struct hci_dev *hdev)
{
        u8 scan;

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                return 0;

        if (!hdev_is_powered(hdev))
                return 0;

        if (mgmt_powering_down(hdev))
                return 0;

        if (hdev->scanning_paused)
                return 0;

        if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
            disconnected_accept_list_entries(hdev))
                scan = SCAN_PAGE;
        else
                scan = SCAN_DISABLED;

        if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
                scan |= SCAN_INQUIRY;

        if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) &&
            test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY))
                return 0;

        return hci_write_scan_enable_sync(hdev, scan);
}

int hci_update_name_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_local_name cp;

        memset(&cp, 0, sizeof(cp));

        memcpy(cp.name, hdev->dev_name, sizeof(cp.name));

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LOCAL_NAME,
                                            sizeof(cp), &cp,
                                            HCI_CMD_TIMEOUT);
}

/* This function perform powered update HCI command sequence after the HCI init
 * sequence which end up resetting all states, the sequence is as follows:
 *
 * HCI_SSP_ENABLED(Enable SSP)
 * HCI_LE_ENABLED(Enable LE)
 * HCI_LE_ENABLED(use_ll_privacy(Add local IRK to Resolving List) ->
 * Update adv data)
 * Enable Authentication
 * lmp_bredr_capable(Set Fast Connectable -> Set Scan Type -> Set Class ->
 * Set Name -> Set EIR)
 * HCI_FORCE_STATIC_ADDR | BDADDR_ANY && !HCI_BREDR_ENABLED (Set Static Address)
 */
int hci_powered_update_sync(struct hci_dev *hdev)
{
        int err;

        /* Register the available SMP channels (BR/EDR and LE) only when
         * successfully powering on the controller. This late
         * registration is required so that LE SMP can clearly decide if
         * the public address or static address is used.
         */
        smp_register(hdev);

        err = hci_write_ssp_mode_sync(hdev, 0x01);
        if (err)
                return err;

        err = hci_write_le_host_supported_sync(hdev, 0x01, 0x00);
        if (err)
                return err;

        err = hci_powered_update_adv_sync(hdev);
        if (err)
                return err;

        err = hci_write_auth_enable_sync(hdev);
        if (err)
                return err;

        if (lmp_bredr_capable(hdev)) {
                if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
                        hci_write_fast_connectable_sync(hdev, true);
                else
                        hci_write_fast_connectable_sync(hdev, false);
                hci_update_scan_sync(hdev);
                hci_update_class_sync(hdev);
                hci_update_name_sync(hdev);
                hci_update_eir_sync(hdev);
        }

        /* If forcing static address is in use or there is no public
         * address use the static address as random address (but skip
         * the HCI command if the current random address is already the
         * static one.
         *
         * In case BR/EDR has been disabled on a dual-mode controller
         * and a static address has been configured, then use that
         * address instead of the public BR/EDR address.
         */
        if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
            (!bacmp(&hdev->bdaddr, BDADDR_ANY) &&
            !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))) {
                if (bacmp(&hdev->static_addr, BDADDR_ANY))
                        return hci_set_random_addr_sync(hdev,
                                                        &hdev->static_addr);
        }

        return 0;
}

/**
 * hci_dev_get_bd_addr_from_property - Get the Bluetooth Device Address
 *                                       (BD_ADDR) for a HCI device from
 *                                       a firmware node property.
 * @hdev:        The HCI device
 *
 * Search the firmware node for 'local-bd-address'.
 *
 * All-zero BD addresses are rejected, because those could be properties
 * that exist in the firmware tables, but were not updated by the firmware. For
 * example, the DTS could define 'local-bd-address', with zero BD addresses.
 */
static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
{
        struct fwnode_handle *fwnode = dev_fwnode(hdev->dev.parent);
        bdaddr_t ba;
        int ret;

        ret = fwnode_property_read_u8_array(fwnode, "local-bd-address",
                                            (u8 *)&ba, sizeof(ba));
        if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
                return;

        if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
                baswap(&hdev->public_addr, &ba);
        else
                bacpy(&hdev->public_addr, &ba);
}

struct hci_init_stage {
        int (*func)(struct hci_dev *hdev);
};

/* Run init stage NULL terminated function table */
static int hci_init_stage_sync(struct hci_dev *hdev,
                               const struct hci_init_stage *stage)
{
        size_t i;

        for (i = 0; stage[i].func; i++) {
                int err;

                err = stage[i].func(hdev);
                if (err)
                        return err;
        }

        return 0;
}

/* Read Local Version */
static int hci_read_local_version_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_VERSION,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read BD Address */
static int hci_read_bd_addr_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_BD_ADDR,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

#define HCI_INIT(_func) \
{ \
        .func = _func, \
}

static const struct hci_init_stage hci_init0[] = {
        /* HCI_OP_READ_LOCAL_VERSION */
        HCI_INIT(hci_read_local_version_sync),
        /* HCI_OP_READ_BD_ADDR */
        HCI_INIT(hci_read_bd_addr_sync),
        {}
};

int hci_reset_sync(struct hci_dev *hdev)
{
        int err;

        set_bit(HCI_RESET, &hdev->flags);

        err = __hci_cmd_sync_status(hdev, HCI_OP_RESET, 0, NULL,
                                    HCI_CMD_TIMEOUT);
        if (err)
                return err;

        return 0;
}

static int hci_init0_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        /* Reset */
        if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
                err = hci_reset_sync(hdev);
                if (err)
                        return err;
        }

        return hci_init_stage_sync(hdev, hci_init0);
}

static int hci_unconf_init_sync(struct hci_dev *hdev)
{
        int err;

        if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
                return 0;

        err = hci_init0_sync(hdev);
        if (err < 0)
                return err;

        if (hci_dev_test_flag(hdev, HCI_SETUP))
                hci_debugfs_create_basic(hdev);

        return 0;
}

/* Read Local Supported Features. */
static int hci_read_local_features_sync(struct hci_dev *hdev)
{
         /* Not all AMP controllers support this command */
        if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* BR Controller init stage 1 command sequence */
static const struct hci_init_stage br_init1[] = {
        /* HCI_OP_READ_LOCAL_FEATURES */
        HCI_INIT(hci_read_local_features_sync),
        /* HCI_OP_READ_LOCAL_VERSION */
        HCI_INIT(hci_read_local_version_sync),
        /* HCI_OP_READ_BD_ADDR */
        HCI_INIT(hci_read_bd_addr_sync),
        {}
};

/* Read Local Commands */
static int hci_read_local_cmds_sync(struct hci_dev *hdev)
{
        /* All Bluetooth 1.2 and later controllers should support the
         * HCI command for reading the local supported commands.
         *
         * Unfortunately some controllers indicate Bluetooth 1.2 support,
         * but do not have support for this command. If that is the case,
         * the driver can quirk the behavior and skip reading the local
         * supported commands.
         */
        if (hdev->hci_ver > BLUETOOTH_VER_1_1 &&
            !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks))
                return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS,
                                             0, NULL, HCI_CMD_TIMEOUT);

        return 0;
}

/* Read Local AMP Info */
static int hci_read_local_amp_info_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Data Blk size */
static int hci_read_data_block_size_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Flow Control Mode */
static int hci_read_flow_control_mode_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Location Data */
static int hci_read_location_data_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* AMP Controller init stage 1 command sequence */
static const struct hci_init_stage amp_init1[] = {
        /* HCI_OP_READ_LOCAL_VERSION */
        HCI_INIT(hci_read_local_version_sync),
        /* HCI_OP_READ_LOCAL_COMMANDS */
        HCI_INIT(hci_read_local_cmds_sync),
        /* HCI_OP_READ_LOCAL_AMP_INFO */
        HCI_INIT(hci_read_local_amp_info_sync),
        /* HCI_OP_READ_DATA_BLOCK_SIZE */
        HCI_INIT(hci_read_data_block_size_sync),
        /* HCI_OP_READ_FLOW_CONTROL_MODE */
        HCI_INIT(hci_read_flow_control_mode_sync),
        /* HCI_OP_READ_LOCATION_DATA */
        HCI_INIT(hci_read_location_data_sync),
        {}
};

static int hci_init1_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        /* Reset */
        if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
                err = hci_reset_sync(hdev);
                if (err)
                        return err;
        }

        switch (hdev->dev_type) {
        case HCI_PRIMARY:
                hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
                return hci_init_stage_sync(hdev, br_init1);
        case HCI_AMP:
                hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
                return hci_init_stage_sync(hdev, amp_init1);
        default:
                bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
                break;
        }

        return 0;
}

/* AMP Controller init stage 2 command sequence */
static const struct hci_init_stage amp_init2[] = {
        /* HCI_OP_READ_LOCAL_FEATURES */
        HCI_INIT(hci_read_local_features_sync),
        {}
};

/* Read Buffer Size (ACL mtu, max pkt, etc.) */
static int hci_read_buffer_size_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_BUFFER_SIZE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Class of Device */
static int hci_read_dev_class_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_CLASS_OF_DEV,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Local Name */
static int hci_read_local_name_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_NAME,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Voice Setting */
static int hci_read_voice_setting_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_VOICE_SETTING,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Number of Supported IAC */
static int hci_read_num_supported_iac_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_NUM_SUPPORTED_IAC,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read Current IAC LAP */
static int hci_read_current_iac_lap_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_READ_CURRENT_IAC_LAP,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type,
                                     u8 cond_type, bdaddr_t *bdaddr,
                                     u8 auto_accept)
{
        struct hci_cp_set_event_filter cp;

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                return 0;

        if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
                return 0;

        memset(&cp, 0, sizeof(cp));
        cp.flt_type = flt_type;

        if (flt_type != HCI_FLT_CLEAR_ALL) {
                cp.cond_type = cond_type;
                bacpy(&cp.addr_conn_flt.bdaddr, bdaddr);
                cp.addr_conn_flt.auto_accept = auto_accept;
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_FLT,
                                     flt_type == HCI_FLT_CLEAR_ALL ?
                                     sizeof(cp.flt_type) : sizeof(cp), &cp,
                                     HCI_CMD_TIMEOUT);
}

static int hci_clear_event_filter_sync(struct hci_dev *hdev)
{
        if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED))
                return 0;

        /* In theory the state machine should not reach here unless
         * a hci_set_event_filter_sync() call succeeds, but we do
         * the check both for parity and as a future reminder.
         */
        if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
                return 0;

        return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00,
                                         BDADDR_ANY, 0x00);
}

/* Connection accept timeout ~20 secs */
static int hci_write_ca_timeout_sync(struct hci_dev *hdev)
{
        __le16 param = cpu_to_le16(0x7d00);

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CA_TIMEOUT,
                                     sizeof(param), &param, HCI_CMD_TIMEOUT);
}

/* BR Controller init stage 2 command sequence */
static const struct hci_init_stage br_init2[] = {
        /* HCI_OP_READ_BUFFER_SIZE */
        HCI_INIT(hci_read_buffer_size_sync),
        /* HCI_OP_READ_CLASS_OF_DEV */
        HCI_INIT(hci_read_dev_class_sync),
        /* HCI_OP_READ_LOCAL_NAME */
        HCI_INIT(hci_read_local_name_sync),
        /* HCI_OP_READ_VOICE_SETTING */
        HCI_INIT(hci_read_voice_setting_sync),
        /* HCI_OP_READ_NUM_SUPPORTED_IAC */
        HCI_INIT(hci_read_num_supported_iac_sync),
        /* HCI_OP_READ_CURRENT_IAC_LAP */
        HCI_INIT(hci_read_current_iac_lap_sync),
        /* HCI_OP_SET_EVENT_FLT */
        HCI_INIT(hci_clear_event_filter_sync),
        /* HCI_OP_WRITE_CA_TIMEOUT */
        HCI_INIT(hci_write_ca_timeout_sync),
        {}
};

static int hci_write_ssp_mode_1_sync(struct hci_dev *hdev)
{
        u8 mode = 0x01;

        if (!lmp_ssp_capable(hdev) || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
                return 0;

        /* When SSP is available, then the host features page
         * should also be available as well. However some
         * controllers list the max_page as 0 as long as SSP
         * has not been enabled. To achieve proper debugging
         * output, force the minimum max_page to 1 at least.
         */
        hdev->max_page = 0x01;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_MODE,
                                     sizeof(mode), &mode, HCI_CMD_TIMEOUT);
}

static int hci_write_eir_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_eir cp;

        if (!lmp_ssp_capable(hdev) || hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
                return 0;

        memset(hdev->eir, 0, sizeof(hdev->eir));
        memset(&cp, 0, sizeof(cp));

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp,
                                     HCI_CMD_TIMEOUT);
}

static int hci_write_inquiry_mode_sync(struct hci_dev *hdev)
{
        u8 mode;

        if (!lmp_inq_rssi_capable(hdev) &&
            !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
                return 0;

        /* If Extended Inquiry Result events are supported, then
         * they are clearly preferred over Inquiry Result with RSSI
         * events.
         */
        mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_INQUIRY_MODE,
                                     sizeof(mode), &mode, HCI_CMD_TIMEOUT);
}

static int hci_read_inq_rsp_tx_power_sync(struct hci_dev *hdev)
{
        if (!lmp_inq_tx_pwr_capable(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_INQ_RSP_TX_POWER,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

static int hci_read_local_ext_features_sync(struct hci_dev *hdev, u8 page)
{
        struct hci_cp_read_local_ext_features cp;

        if (!lmp_ext_feat_capable(hdev))
                return 0;

        memset(&cp, 0, sizeof(cp));
        cp.page = page;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_read_local_ext_features_1_sync(struct hci_dev *hdev)
{
        return hci_read_local_ext_features_sync(hdev, 0x01);
}

/* HCI Controller init stage 2 command sequence */
static const struct hci_init_stage hci_init2[] = {
        /* HCI_OP_READ_LOCAL_COMMANDS */
        HCI_INIT(hci_read_local_cmds_sync),
        /* HCI_OP_WRITE_SSP_MODE */
        HCI_INIT(hci_write_ssp_mode_1_sync),
        /* HCI_OP_WRITE_EIR */
        HCI_INIT(hci_write_eir_sync),
        /* HCI_OP_WRITE_INQUIRY_MODE */
        HCI_INIT(hci_write_inquiry_mode_sync),
        /* HCI_OP_READ_INQ_RSP_TX_POWER */
        HCI_INIT(hci_read_inq_rsp_tx_power_sync),
        /* HCI_OP_READ_LOCAL_EXT_FEATURES */
        HCI_INIT(hci_read_local_ext_features_1_sync),
        /* HCI_OP_WRITE_AUTH_ENABLE */
        HCI_INIT(hci_write_auth_enable_sync),
        {}
};

/* Read LE Buffer Size */
static int hci_le_read_buffer_size_sync(struct hci_dev *hdev)
{
        /* Use Read LE Buffer Size V2 if supported */
        if (iso_capable(hdev) && hdev->commands[41] & 0x20)
                return __hci_cmd_sync_status(hdev,
                                             HCI_OP_LE_READ_BUFFER_SIZE_V2,
                                             0, NULL, HCI_CMD_TIMEOUT);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read LE Local Supported Features */
static int hci_le_read_local_features_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_LOCAL_FEATURES,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read LE Supported States */
static int hci_le_read_supported_states_sync(struct hci_dev *hdev)
{
        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_SUPPORTED_STATES,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* LE Controller init stage 2 command sequence */
static const struct hci_init_stage le_init2[] = {
        /* HCI_OP_LE_READ_LOCAL_FEATURES */
        HCI_INIT(hci_le_read_local_features_sync),
        /* HCI_OP_LE_READ_BUFFER_SIZE */
        HCI_INIT(hci_le_read_buffer_size_sync),
        /* HCI_OP_LE_READ_SUPPORTED_STATES */
        HCI_INIT(hci_le_read_supported_states_sync),
        {}
};

static int hci_init2_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        if (hdev->dev_type == HCI_AMP)
                return hci_init_stage_sync(hdev, amp_init2);

        err = hci_init_stage_sync(hdev, hci_init2);
        if (err)
                return err;

        if (lmp_bredr_capable(hdev)) {
                err = hci_init_stage_sync(hdev, br_init2);
                if (err)
                        return err;
        } else {
                hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
        }

        if (lmp_le_capable(hdev)) {
                err = hci_init_stage_sync(hdev, le_init2);
                if (err)
                        return err;
                /* LE-only controllers have LE implicitly enabled */
                if (!lmp_bredr_capable(hdev))
                        hci_dev_set_flag(hdev, HCI_LE_ENABLED);
        }

        return 0;
}

static int hci_set_event_mask_sync(struct hci_dev *hdev)
{
        /* The second byte is 0xff instead of 0x9f (two reserved bits
         * disabled) since a Broadcom 1.2 dongle doesn't respond to the
         * command otherwise.
         */
        u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };

        /* CSR 1.1 dongles does not accept any bitfield so don't try to set
         * any event mask for pre 1.2 devices.
         */
        if (hdev->hci_ver < BLUETOOTH_VER_1_2)
                return 0;

        if (lmp_bredr_capable(hdev)) {
                events[4] |= 0x01; /* Flow Specification Complete */

                /* Don't set Disconnect Complete and mode change when
                 * suspended as that would wakeup the host when disconnecting
                 * due to suspend.
                 */
                if (hdev->suspended) {
                        events[0] &= 0xef;
                        events[2] &= 0xf7;
                }
        } else {
                /* Use a different default for LE-only devices */
                memset(events, 0, sizeof(events));
                events[1] |= 0x20; /* Command Complete */
                events[1] |= 0x40; /* Command Status */
                events[1] |= 0x80; /* Hardware Error */

                /* If the controller supports the Disconnect command, enable
                 * the corresponding event. In addition enable packet flow
                 * control related events.
                 */
                if (hdev->commands[0] & 0x20) {
                        /* Don't set Disconnect Complete when suspended as that
                         * would wakeup the host when disconnecting due to
                         * suspend.
                         */
                        if (!hdev->suspended)
                                events[0] |= 0x10; /* Disconnection Complete */
                        events[2] |= 0x04; /* Number of Completed Packets */
                        events[3] |= 0x02; /* Data Buffer Overflow */
                }

                /* If the controller supports the Read Remote Version
                 * Information command, enable the corresponding event.
                 */
                if (hdev->commands[2] & 0x80)
                        events[1] |= 0x08; /* Read Remote Version Information
                                            * Complete
                                            */

                if (hdev->le_features[0] & HCI_LE_ENCRYPTION) {
                        events[0] |= 0x80; /* Encryption Change */
                        events[5] |= 0x80; /* Encryption Key Refresh Complete */
                }
        }

        if (lmp_inq_rssi_capable(hdev) ||
            test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
                events[4] |= 0x02; /* Inquiry Result with RSSI */

        if (lmp_ext_feat_capable(hdev))
                events[4] |= 0x04; /* Read Remote Extended Features Complete */

        if (lmp_esco_capable(hdev)) {
                events[5] |= 0x08; /* Synchronous Connection Complete */
                events[5] |= 0x10; /* Synchronous Connection Changed */
        }

        if (lmp_sniffsubr_capable(hdev))
                events[5] |= 0x20; /* Sniff Subrating */

        if (lmp_pause_enc_capable(hdev))
                events[5] |= 0x80; /* Encryption Key Refresh Complete */

        if (lmp_ext_inq_capable(hdev))
                events[5] |= 0x40; /* Extended Inquiry Result */

        if (lmp_no_flush_capable(hdev))
                events[7] |= 0x01; /* Enhanced Flush Complete */

        if (lmp_lsto_capable(hdev))
                events[6] |= 0x80; /* Link Supervision Timeout Changed */

        if (lmp_ssp_capable(hdev)) {
                events[6] |= 0x01;        /* IO Capability Request */
                events[6] |= 0x02;        /* IO Capability Response */
                events[6] |= 0x04;        /* User Confirmation Request */
                events[6] |= 0x08;        /* User Passkey Request */
                events[6] |= 0x10;        /* Remote OOB Data Request */
                events[6] |= 0x20;        /* Simple Pairing Complete */
                events[7] |= 0x04;        /* User Passkey Notification */
                events[7] |= 0x08;        /* Keypress Notification */
                events[7] |= 0x10;        /* Remote Host Supported
                                         * Features Notification
                                         */
        }

        if (lmp_le_capable(hdev))
                events[7] |= 0x20;        /* LE Meta-Event */

        return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_MASK,
                                     sizeof(events), events, HCI_CMD_TIMEOUT);
}

static int hci_read_stored_link_key_sync(struct hci_dev *hdev)
{
        struct hci_cp_read_stored_link_key cp;

        if (!(hdev->commands[6] & 0x20) ||
            test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
                return 0;

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, BDADDR_ANY);
        cp.read_all = 0x01;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_STORED_LINK_KEY,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_setup_link_policy_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_def_link_policy cp;
        u16 link_policy = 0;

        if (!(hdev->commands[5] & 0x10))
                return 0;

        memset(&cp, 0, sizeof(cp));

        if (lmp_rswitch_capable(hdev))
                link_policy |= HCI_LP_RSWITCH;
        if (lmp_hold_capable(hdev))
                link_policy |= HCI_LP_HOLD;
        if (lmp_sniff_capable(hdev))
                link_policy |= HCI_LP_SNIFF;
        if (lmp_park_capable(hdev))
                link_policy |= HCI_LP_PARK;

        cp.policy = cpu_to_le16(link_policy);

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_read_page_scan_activity_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[8] & 0x01))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_ACTIVITY,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[18] & 0x04) ||
            !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
            test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

static int hci_read_page_scan_type_sync(struct hci_dev *hdev)
{
        /* Some older Broadcom based Bluetooth 1.2 controllers do not
         * support the Read Page Scan Type command. Check support for
         * this command in the bit mask of supported commands.
         */
        if (!(hdev->commands[13] & 0x01))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read features beyond page 1 if available */
static int hci_read_local_ext_features_all_sync(struct hci_dev *hdev)
{
        u8 page;
        int err;

        if (!lmp_ext_feat_capable(hdev))
                return 0;

        for (page = 2; page < HCI_MAX_PAGES && page <= hdev->max_page;
             page++) {
                err = hci_read_local_ext_features_sync(hdev, page);
                if (err)
                        return err;
        }

        return 0;
}

/* HCI Controller init stage 3 command sequence */
static const struct hci_init_stage hci_init3[] = {
        /* HCI_OP_SET_EVENT_MASK */
        HCI_INIT(hci_set_event_mask_sync),
        /* HCI_OP_READ_STORED_LINK_KEY */
        HCI_INIT(hci_read_stored_link_key_sync),
        /* HCI_OP_WRITE_DEF_LINK_POLICY */
        HCI_INIT(hci_setup_link_policy_sync),
        /* HCI_OP_READ_PAGE_SCAN_ACTIVITY */
        HCI_INIT(hci_read_page_scan_activity_sync),
        /* HCI_OP_READ_DEF_ERR_DATA_REPORTING */
        HCI_INIT(hci_read_def_err_data_reporting_sync),
        /* HCI_OP_READ_PAGE_SCAN_TYPE */
        HCI_INIT(hci_read_page_scan_type_sync),
        /* HCI_OP_READ_LOCAL_EXT_FEATURES */
        HCI_INIT(hci_read_local_ext_features_all_sync),
        {}
};

static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
{
        u8 events[8];

        if (!lmp_le_capable(hdev))
                return 0;

        memset(events, 0, sizeof(events));

        if (hdev->le_features[0] & HCI_LE_ENCRYPTION)
                events[0] |= 0x10;        /* LE Long Term Key Request */

        /* If controller supports the Connection Parameters Request
         * Link Layer Procedure, enable the corresponding event.
         */
        if (hdev->le_features[0] & HCI_LE_CONN_PARAM_REQ_PROC)
                /* LE Remote Connection Parameter Request */
                events[0] |= 0x20;

        /* If the controller supports the Data Length Extension
         * feature, enable the corresponding event.
         */
        if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
                events[0] |= 0x40;        /* LE Data Length Change */

        /* If the controller supports LL Privacy feature or LE Extended Adv,
         * enable the corresponding event.
         */
        if (use_enhanced_conn_complete(hdev))
                events[1] |= 0x02;        /* LE Enhanced Connection Complete */

        /* If the controller supports Extended Scanner Filter
         * Policies, enable the corresponding event.
         */
        if (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)
                events[1] |= 0x04;        /* LE Direct Advertising Report */

        /* If the controller supports Channel Selection Algorithm #2
         * feature, enable the corresponding event.
         */
        if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2)
                events[2] |= 0x08;        /* LE Channel Selection Algorithm */

        /* If the controller supports the LE Set Scan Enable command,
         * enable the corresponding advertising report event.
         */
        if (hdev->commands[26] & 0x08)
                events[0] |= 0x02;        /* LE Advertising Report */

        /* If the controller supports the LE Create Connection
         * command, enable the corresponding event.
         */
        if (hdev->commands[26] & 0x10)
                events[0] |= 0x01;        /* LE Connection Complete */

        /* If the controller supports the LE Connection Update
         * command, enable the corresponding event.
         */
        if (hdev->commands[27] & 0x04)
                events[0] |= 0x04;        /* LE Connection Update Complete */

        /* If the controller supports the LE Read Remote Used Features
         * command, enable the corresponding event.
         */
        if (hdev->commands[27] & 0x20)
                /* LE Read Remote Used Features Complete */
                events[0] |= 0x08;

        /* If the controller supports the LE Read Local P-256
         * Public Key command, enable the corresponding event.
         */
        if (hdev->commands[34] & 0x02)
                /* LE Read Local P-256 Public Key Complete */
                events[0] |= 0x80;

        /* If the controller supports the LE Generate DHKey
         * command, enable the corresponding event.
         */
        if (hdev->commands[34] & 0x04)
                events[1] |= 0x01;        /* LE Generate DHKey Complete */

        /* If the controller supports the LE Set Default PHY or
         * LE Set PHY commands, enable the corresponding event.
         */
        if (hdev->commands[35] & (0x20 | 0x40))
                events[1] |= 0x08;        /* LE PHY Update Complete */

        /* If the controller supports LE Set Extended Scan Parameters
         * and LE Set Extended Scan Enable commands, enable the
         * corresponding event.
         */
        if (use_ext_scan(hdev))
                events[1] |= 0x10;        /* LE Extended Advertising Report */

        /* If the controller supports the LE Extended Advertising
         * command, enable the corresponding event.
         */
        if (ext_adv_capable(hdev))
                events[2] |= 0x02;        /* LE Advertising Set Terminated */

        if (cis_capable(hdev)) {
                events[3] |= 0x01;        /* LE CIS Established */
                if (cis_peripheral_capable(hdev))
                        events[3] |= 0x02; /* LE CIS Request */
        }

        if (bis_capable(hdev)) {
                events[1] |= 0x20;        /* LE PA Report */
                events[1] |= 0x40;        /* LE PA Sync Established */
                events[3] |= 0x04;        /* LE Create BIG Complete */
                events[3] |= 0x08;        /* LE Terminate BIG Complete */
                events[3] |= 0x10;        /* LE BIG Sync Established */
                events[3] |= 0x20;        /* LE BIG Sync Loss */
                events[4] |= 0x02;        /* LE BIG Info Advertising Report */
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK,
                                     sizeof(events), events, HCI_CMD_TIMEOUT);
}

/* Read LE Advertising Channel TX Power */
static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev)
{
        if ((hdev->commands[25] & 0x40) && !ext_adv_capable(hdev)) {
                /* HCI TS spec forbids mixing of legacy and extended
                 * advertising commands wherein READ_ADV_TX_POWER is
                 * also included. So do not call it if extended adv
                 * is supported otherwise controller will return
                 * COMMAND_DISALLOWED for extended commands.
                 */
                return __hci_cmd_sync_status(hdev,
                                               HCI_OP_LE_READ_ADV_TX_POWER,
                                               0, NULL, HCI_CMD_TIMEOUT);
        }

        return 0;
}

/* Read LE Min/Max Tx Power*/
static int hci_le_read_tx_power_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[38] & 0x80) ||
            test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read LE Accept List Size */
static int hci_le_read_accept_list_size_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[26] & 0x40))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ACCEPT_LIST_SIZE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Read LE Resolving List Size */
static int hci_le_read_resolv_list_size_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[34] & 0x40))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_RESOLV_LIST_SIZE,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Clear LE Resolving List */
static int hci_le_clear_resolv_list_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[34] & 0x20))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL,
                                     HCI_CMD_TIMEOUT);
}

/* Set RPA timeout */
static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev)
{
        __le16 timeout = cpu_to_le16(hdev->rpa_timeout);

        if (!(hdev->commands[35] & 0x04) ||
            test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT,
                                     sizeof(timeout), &timeout,
                                     HCI_CMD_TIMEOUT);
}

/* Read LE Maximum Data Length */
static int hci_le_read_max_data_len_sync(struct hci_dev *hdev)
{
        if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL,
                                     HCI_CMD_TIMEOUT);
}

/* Read LE Suggested Default Data Length */
static int hci_le_read_def_data_len_sync(struct hci_dev *hdev)
{
        if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL,
                                     HCI_CMD_TIMEOUT);
}

/* Read LE Number of Supported Advertising Sets */
static int hci_le_read_num_support_adv_sets_sync(struct hci_dev *hdev)
{
        if (!ext_adv_capable(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev,
                                     HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Write LE Host Supported */
static int hci_set_le_support_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_le_host_supported cp;

        /* LE-only devices do not support explicit enablement */
        if (!lmp_bredr_capable(hdev))
                return 0;

        memset(&cp, 0, sizeof(cp));

        if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
                cp.le = 0x01;
                cp.simul = 0x00;
        }

        if (cp.le == lmp_host_le_capable(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* LE Set Host Feature */
static int hci_le_set_host_feature_sync(struct hci_dev *hdev)
{
        struct hci_cp_le_set_host_feature cp;

        if (!cis_capable(hdev))
                return 0;

        memset(&cp, 0, sizeof(cp));

        /* Connected Isochronous Channels (Host Support) */
        cp.bit_number = 32;
        cp.bit_value = 1;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* LE Controller init stage 3 command sequence */
static const struct hci_init_stage le_init3[] = {
        /* HCI_OP_LE_SET_EVENT_MASK */
        HCI_INIT(hci_le_set_event_mask_sync),
        /* HCI_OP_LE_READ_ADV_TX_POWER */
        HCI_INIT(hci_le_read_adv_tx_power_sync),
        /* HCI_OP_LE_READ_TRANSMIT_POWER */
        HCI_INIT(hci_le_read_tx_power_sync),
        /* HCI_OP_LE_READ_ACCEPT_LIST_SIZE */
        HCI_INIT(hci_le_read_accept_list_size_sync),
        /* HCI_OP_LE_CLEAR_ACCEPT_LIST */
        HCI_INIT(hci_le_clear_accept_list_sync),
        /* HCI_OP_LE_READ_RESOLV_LIST_SIZE */
        HCI_INIT(hci_le_read_resolv_list_size_sync),
        /* HCI_OP_LE_CLEAR_RESOLV_LIST */
        HCI_INIT(hci_le_clear_resolv_list_sync),
        /* HCI_OP_LE_SET_RPA_TIMEOUT */
        HCI_INIT(hci_le_set_rpa_timeout_sync),
        /* HCI_OP_LE_READ_MAX_DATA_LEN */
        HCI_INIT(hci_le_read_max_data_len_sync),
        /* HCI_OP_LE_READ_DEF_DATA_LEN */
        HCI_INIT(hci_le_read_def_data_len_sync),
        /* HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS */
        HCI_INIT(hci_le_read_num_support_adv_sets_sync),
        /* HCI_OP_WRITE_LE_HOST_SUPPORTED */
        HCI_INIT(hci_set_le_support_sync),
        /* HCI_OP_LE_SET_HOST_FEATURE */
        HCI_INIT(hci_le_set_host_feature_sync),
        {}
};

static int hci_init3_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        err = hci_init_stage_sync(hdev, hci_init3);
        if (err)
                return err;

        if (lmp_le_capable(hdev))
                return hci_init_stage_sync(hdev, le_init3);

        return 0;
}

static int hci_delete_stored_link_key_sync(struct hci_dev *hdev)
{
        struct hci_cp_delete_stored_link_key cp;

        /* Some Broadcom based Bluetooth controllers do not support the
         * Delete Stored Link Key command. They are clearly indicating its
         * absence in the bit mask of supported commands.
         *
         * Check the supported commands and only if the command is marked
         * as supported send it. If not supported assume that the controller
         * does not have actual support for stored link keys which makes this
         * command redundant anyway.
         *
         * Some controllers indicate that they support handling deleting
         * stored link keys, but they don't. The quirk lets a driver
         * just disable this command.
         */
        if (!(hdev->commands[6] & 0x80) ||
            test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
                return 0;

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, BDADDR_ANY);
        cp.delete_all = 0x01;

        return __hci_cmd_sync_status(hdev, HCI_OP_DELETE_STORED_LINK_KEY,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
{
        u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
        bool changed = false;

        /* Set event mask page 2 if the HCI command for it is supported */
        if (!(hdev->commands[22] & 0x04))
                return 0;

        /* If Connectionless Peripheral Broadcast central role is supported
         * enable all necessary events for it.
         */
        if (lmp_cpb_central_capable(hdev)) {
                events[1] |= 0x40;        /* Triggered Clock Capture */
                events[1] |= 0x80;        /* Synchronization Train Complete */
                events[2] |= 0x08;        /* Truncated Page Complete */
                events[2] |= 0x20;        /* CPB Channel Map Change */
                changed = true;
        }

        /* If Connectionless Peripheral Broadcast peripheral role is supported
         * enable all necessary events for it.
         */
        if (lmp_cpb_peripheral_capable(hdev)) {
                events[2] |= 0x01;        /* Synchronization Train Received */
                events[2] |= 0x02;        /* CPB Receive */
                events[2] |= 0x04;        /* CPB Timeout */
                events[2] |= 0x10;        /* Peripheral Page Response Timeout */
                changed = true;
        }

        /* Enable Authenticated Payload Timeout Expired event if supported */
        if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) {
                events[2] |= 0x80;
                changed = true;
        }

        /* Some Broadcom based controllers indicate support for Set Event
         * Mask Page 2 command, but then actually do not support it. Since
         * the default value is all bits set to zero, the command is only
         * required if the event mask has to be changed. In case no change
         * to the event mask is needed, skip this command.
         */
        if (!changed)
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_MASK_PAGE_2,
                                     sizeof(events), events, HCI_CMD_TIMEOUT);
}

/* Read local codec list if the HCI command is supported */
static int hci_read_local_codecs_sync(struct hci_dev *hdev)
{
        if (hdev->commands[45] & 0x04)
                hci_read_supported_codecs_v2(hdev);
        else if (hdev->commands[29] & 0x20)
                hci_read_supported_codecs(hdev);

        return 0;
}

/* Read local pairing options if the HCI command is supported */
static int hci_read_local_pairing_opts_sync(struct hci_dev *hdev)
{
        if (!(hdev->commands[41] & 0x08))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_PAIRING_OPTS,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Get MWS transport configuration if the HCI command is supported */
static int hci_get_mws_transport_config_sync(struct hci_dev *hdev)
{
        if (!mws_transport_config_capable(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_GET_MWS_TRANSPORT_CONFIG,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Check for Synchronization Train support */
static int hci_read_sync_train_params_sync(struct hci_dev *hdev)
{
        if (!lmp_sync_train_capable(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_READ_SYNC_TRAIN_PARAMS,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

/* Enable Secure Connections if supported and configured */
static int hci_write_sc_support_1_sync(struct hci_dev *hdev)
{
        u8 support = 0x01;

        if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED) ||
            !bredr_sc_enabled(hdev))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SC_SUPPORT,
                                     sizeof(support), &support,
                                     HCI_CMD_TIMEOUT);
}

/* Set erroneous data reporting if supported to the wideband speech
 * setting value
 */
static int hci_set_err_data_report_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_def_err_data_reporting cp;
        bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED);

        if (!(hdev->commands[18] & 0x08) ||
            !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
            test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
                return 0;

        if (enabled == hdev->err_data_reporting)
                return 0;

        memset(&cp, 0, sizeof(cp));
        cp.err_data_reporting = enabled ? ERR_DATA_REPORTING_ENABLED :
                                ERR_DATA_REPORTING_DISABLED;

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING,
                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static const struct hci_init_stage hci_init4[] = {
         /* HCI_OP_DELETE_STORED_LINK_KEY */
        HCI_INIT(hci_delete_stored_link_key_sync),
        /* HCI_OP_SET_EVENT_MASK_PAGE_2 */
        HCI_INIT(hci_set_event_mask_page_2_sync),
        /* HCI_OP_READ_LOCAL_CODECS */
        HCI_INIT(hci_read_local_codecs_sync),
         /* HCI_OP_READ_LOCAL_PAIRING_OPTS */
        HCI_INIT(hci_read_local_pairing_opts_sync),
         /* HCI_OP_GET_MWS_TRANSPORT_CONFIG */
        HCI_INIT(hci_get_mws_transport_config_sync),
         /* HCI_OP_READ_SYNC_TRAIN_PARAMS */
        HCI_INIT(hci_read_sync_train_params_sync),
        /* HCI_OP_WRITE_SC_SUPPORT */
        HCI_INIT(hci_write_sc_support_1_sync),
        /* HCI_OP_WRITE_DEF_ERR_DATA_REPORTING */
        HCI_INIT(hci_set_err_data_report_sync),
        {}
};

/* Set Suggested Default Data Length to maximum if supported */
static int hci_le_set_write_def_data_len_sync(struct hci_dev *hdev)
{
        struct hci_cp_le_write_def_data_len cp;

        if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT))
                return 0;

        memset(&cp, 0, sizeof(cp));
        cp.tx_len = cpu_to_le16(hdev->le_max_tx_len);
        cp.tx_time = cpu_to_le16(hdev->le_max_tx_time);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

/* Set Default PHY parameters if command is supported, enables all supported
 * PHYs according to the LE Features bits.
 */
static int hci_le_set_default_phy_sync(struct hci_dev *hdev)
{
        struct hci_cp_le_set_default_phy cp;

        if (!(hdev->commands[35] & 0x20)) {
                /* If the command is not supported it means only 1M PHY is
                 * supported.
                 */
                hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
                hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
                return 0;
        }

        memset(&cp, 0, sizeof(cp));
        cp.all_phys = 0x00;
        cp.tx_phys = HCI_LE_SET_PHY_1M;
        cp.rx_phys = HCI_LE_SET_PHY_1M;

        /* Enables 2M PHY if supported */
        if (le_2m_capable(hdev)) {
                cp.tx_phys |= HCI_LE_SET_PHY_2M;
                cp.rx_phys |= HCI_LE_SET_PHY_2M;
        }

        /* Enables Coded PHY if supported */
        if (le_coded_capable(hdev)) {
                cp.tx_phys |= HCI_LE_SET_PHY_CODED;
                cp.rx_phys |= HCI_LE_SET_PHY_CODED;
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_DEFAULT_PHY,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static const struct hci_init_stage le_init4[] = {
        /* HCI_OP_LE_WRITE_DEF_DATA_LEN */
        HCI_INIT(hci_le_set_write_def_data_len_sync),
        /* HCI_OP_LE_SET_DEFAULT_PHY */
        HCI_INIT(hci_le_set_default_phy_sync),
        {}
};

static int hci_init4_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        err = hci_init_stage_sync(hdev, hci_init4);
        if (err)
                return err;

        if (lmp_le_capable(hdev))
                return hci_init_stage_sync(hdev, le_init4);

        return 0;
}

static int hci_init_sync(struct hci_dev *hdev)
{
        int err;

        err = hci_init1_sync(hdev);
        if (err < 0)
                return err;

        if (hci_dev_test_flag(hdev, HCI_SETUP))
                hci_debugfs_create_basic(hdev);

        err = hci_init2_sync(hdev);
        if (err < 0)
                return err;

        /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode
         * BR/EDR/LE type controllers. AMP controllers only need the
         * first two stages of init.
         */
        if (hdev->dev_type != HCI_PRIMARY)
                return 0;

        err = hci_init3_sync(hdev);
        if (err < 0)
                return err;

        err = hci_init4_sync(hdev);
        if (err < 0)
                return err;

        /* This function is only called when the controller is actually in
         * configured state. When the controller is marked as unconfigured,
         * this initialization procedure is not run.
         *
         * It means that it is possible that a controller runs through its
         * setup phase and then discovers missing settings. If that is the
         * case, then this function will not be called. It then will only
         * be called during the config phase.
         *
         * So only when in setup phase or config phase, create the debugfs
         * entries and register the SMP channels.
         */
        if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
            !hci_dev_test_flag(hdev, HCI_CONFIG))
                return 0;

        if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED))
                return 0;

        hci_debugfs_create_common(hdev);

        if (lmp_bredr_capable(hdev))
                hci_debugfs_create_bredr(hdev);

        if (lmp_le_capable(hdev))
                hci_debugfs_create_le(hdev);

        return 0;
}

#define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc }

static const struct {
        unsigned long quirk;
        const char *desc;
} hci_broken_table[] = {
        HCI_QUIRK_BROKEN(LOCAL_COMMANDS,
                         "HCI Read Local Supported Commands not supported"),
        HCI_QUIRK_BROKEN(STORED_LINK_KEY,
                         "HCI Delete Stored Link Key command is advertised, "
                         "but not supported."),
        HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
                         "HCI Read Default Erroneous Data Reporting command is "
                         "advertised, but not supported."),
        HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
                         "HCI Read Transmit Power Level command is advertised, "
                         "but not supported."),
        HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL,
                         "HCI Set Event Filter command not supported."),
        HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN,
                         "HCI Enhanced Setup Synchronous Connection command is "
                         "advertised, but not supported."),
        HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT,
                         "HCI LE Set Random Private Address Timeout command is "
                         "advertised, but not supported."),
        HCI_QUIRK_BROKEN(LE_CODED,
                         "HCI LE Coded PHY feature bit is set, "
                         "but its usage is not supported.")
};

/* This function handles hdev setup stage:
 *
 * Calls hdev->setup
 * Setup address if HCI_QUIRK_USE_BDADDR_PROPERTY is set.
 */
static int hci_dev_setup_sync(struct hci_dev *hdev)
{
        int ret = 0;
        bool invalid_bdaddr;
        size_t i;

        if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
            !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks))
                return 0;

        bt_dev_dbg(hdev, "");

        hci_sock_dev_event(hdev, HCI_DEV_SETUP);

        if (hdev->setup)
                ret = hdev->setup(hdev);

        for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
                if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
                        bt_dev_warn(hdev, "%s", hci_broken_table[i].desc);
        }

        /* The transport driver can set the quirk to mark the
         * BD_ADDR invalid before creating the HCI device or in
         * its setup callback.
         */
        invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
                         test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
        if (!ret) {
                if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) &&
                    !bacmp(&hdev->public_addr, BDADDR_ANY))
                        hci_dev_get_bd_addr_from_property(hdev);

                if (invalid_bdaddr && bacmp(&hdev->public_addr, BDADDR_ANY) &&
                    hdev->set_bdaddr) {
                        ret = hdev->set_bdaddr(hdev, &hdev->public_addr);
                        if (!ret)
                                invalid_bdaddr = false;
                }
        }

        /* The transport driver can set these quirks before
         * creating the HCI device or in its setup callback.
         *
         * For the invalid BD_ADDR quirk it is possible that
         * it becomes a valid address if the bootloader does
         * provide it (see above).
         *
         * In case any of them is set, the controller has to
         * start up as unconfigured.
         */
        if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
            invalid_bdaddr)
                hci_dev_set_flag(hdev, HCI_UNCONFIGURED);

        /* For an unconfigured controller it is required to
         * read at least the version information provided by
         * the Read Local Version Information command.
         *
         * If the set_bdaddr driver callback is provided, then
         * also the original Bluetooth public device address
         * will be read using the Read BD Address command.
         */
        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                return hci_unconf_init_sync(hdev);

        return ret;
}

/* This function handles hdev init stage:
 *
 * Calls hci_dev_setup_sync to perform setup stage
 * Calls hci_init_sync to perform HCI command init sequence
 */
static int hci_dev_init_sync(struct hci_dev *hdev)
{
        int ret;

        bt_dev_dbg(hdev, "");

        atomic_set(&hdev->cmd_cnt, 1);
        set_bit(HCI_INIT, &hdev->flags);

        ret = hci_dev_setup_sync(hdev);

        if (hci_dev_test_flag(hdev, HCI_CONFIG)) {
                /* If public address change is configured, ensure that
                 * the address gets programmed. If the driver does not
                 * support changing the public address, fail the power
                 * on procedure.
                 */
                if (bacmp(&hdev->public_addr, BDADDR_ANY) &&
                    hdev->set_bdaddr)
                        ret = hdev->set_bdaddr(hdev, &hdev->public_addr);
                else
                        ret = -EADDRNOTAVAIL;
        }

        if (!ret) {
                if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
                    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                        ret = hci_init_sync(hdev);
                        if (!ret && hdev->post_init)
                                ret = hdev->post_init(hdev);
                }
        }

        /* If the HCI Reset command is clearing all diagnostic settings,
         * then they need to be reprogrammed after the init procedure
         * completed.
         */
        if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
            !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
            hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
                ret = hdev->set_diag(hdev, true);

        if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                msft_do_open(hdev);
                aosp_do_open(hdev);
        }

        clear_bit(HCI_INIT, &hdev->flags);

        return ret;
}

int hci_dev_open_sync(struct hci_dev *hdev)
{
        int ret;

        bt_dev_dbg(hdev, "");

        if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
                ret = -ENODEV;
                goto done;
        }

        if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
            !hci_dev_test_flag(hdev, HCI_CONFIG)) {
                /* Check for rfkill but allow the HCI setup stage to
                 * proceed (which in itself doesn't cause any RF activity).
                 */
                if (hci_dev_test_flag(hdev, HCI_RFKILLED)) {
                        ret = -ERFKILL;
                        goto done;
                }

                /* Check for valid public address or a configured static
                 * random address, but let the HCI setup proceed to
                 * be able to determine if there is a public address
                 * or not.
                 *
                 * In case of user channel usage, it is not important
                 * if a public address or static random address is
                 * available.
                 *
                 * This check is only valid for BR/EDR controllers
                 * since AMP controllers do not have an address.
                 */
                if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
                    hdev->dev_type == HCI_PRIMARY &&
                    !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
                    !bacmp(&hdev->static_addr, BDADDR_ANY)) {
                        ret = -EADDRNOTAVAIL;
                        goto done;
                }
        }

        if (test_bit(HCI_UP, &hdev->flags)) {
                ret = -EALREADY;
                goto done;
        }

        if (hdev->open(hdev)) {
                ret = -EIO;
                goto done;
        }

        hci_devcd_reset(hdev);

        set_bit(HCI_RUNNING, &hdev->flags);
        hci_sock_dev_event(hdev, HCI_DEV_OPEN);

        ret = hci_dev_init_sync(hdev);
        if (!ret) {
                hci_dev_hold(hdev);
                hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
                hci_adv_instances_set_rpa_expired(hdev, true);
                set_bit(HCI_UP, &hdev->flags);
                hci_sock_dev_event(hdev, HCI_DEV_UP);
                hci_leds_update_powered(hdev, true);
                if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
                    !hci_dev_test_flag(hdev, HCI_CONFIG) &&
                    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
                    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
                    hci_dev_test_flag(hdev, HCI_MGMT) &&
                    hdev->dev_type == HCI_PRIMARY) {
                        ret = hci_powered_update_sync(hdev);
                        mgmt_power_on(hdev, ret);
                }
        } else {
                /* Init failed, cleanup */
                flush_work(&hdev->tx_work);

                /* Since hci_rx_work() is possible to awake new cmd_work
                 * it should be flushed first to avoid unexpected call of
                 * hci_cmd_work()
                 */
                flush_work(&hdev->rx_work);
                flush_work(&hdev->cmd_work);

                skb_queue_purge(&hdev->cmd_q);
                skb_queue_purge(&hdev->rx_q);

                if (hdev->flush)
                        hdev->flush(hdev);

                if (hdev->sent_cmd) {
                        cancel_delayed_work_sync(&hdev->cmd_timer);
                        kfree_skb(hdev->sent_cmd);
                        hdev->sent_cmd = NULL;
                }

                if (hdev->req_skb) {
                        kfree_skb(hdev->req_skb);
                        hdev->req_skb = NULL;
                }

                clear_bit(HCI_RUNNING, &hdev->flags);
                hci_sock_dev_event(hdev, HCI_DEV_CLOSE);

                hdev->close(hdev);
                hdev->flags &= BIT(HCI_RAW);
        }

done:
        return ret;
}

/* This function requires the caller holds hdev->lock */
static void hci_pend_le_actions_clear(struct hci_dev *hdev)
{
        struct hci_conn_params *p;

        list_for_each_entry(p, &hdev->le_conn_params, list) {
                hci_pend_le_list_del_init(p);
                if (p->conn) {
                        hci_conn_drop(p->conn);
                        hci_conn_put(p->conn);
                        p->conn = NULL;
                }
        }

        BT_DBG("All LE pending actions cleared");
}

static int hci_dev_shutdown(struct hci_dev *hdev)
{
        int err = 0;
        /* Similar to how we first do setup and then set the exclusive access
         * bit for userspace, we must first unset userchannel and then clean up.
         * Otherwise, the kernel can't properly use the hci channel to clean up
         * the controller (some shutdown routines require sending additional
         * commands to the controller for example).
         */
        bool was_userchannel =
                hci_dev_test_and_clear_flag(hdev, HCI_USER_CHANNEL);

        if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
            test_bit(HCI_UP, &hdev->flags)) {
                /* Execute vendor specific shutdown routine */
                if (hdev->shutdown)
                        err = hdev->shutdown(hdev);
        }

        if (was_userchannel)
                hci_dev_set_flag(hdev, HCI_USER_CHANNEL);

        return err;
}

int hci_dev_close_sync(struct hci_dev *hdev)
{
        bool auto_off;
        int err = 0;

        bt_dev_dbg(hdev, "");

        cancel_delayed_work(&hdev->power_off);
        cancel_delayed_work(&hdev->ncmd_timer);
        cancel_delayed_work(&hdev->le_scan_disable);

        hci_request_cancel_all(hdev);

        if (hdev->adv_instance_timeout) {
                cancel_delayed_work_sync(&hdev->adv_instance_expire);
                hdev->adv_instance_timeout = 0;
        }

        err = hci_dev_shutdown(hdev);

        if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
                cancel_delayed_work_sync(&hdev->cmd_timer);
                return err;
        }

        hci_leds_update_powered(hdev, false);

        /* Flush RX and TX works */
        flush_work(&hdev->tx_work);
        flush_work(&hdev->rx_work);

        if (hdev->discov_timeout > 0) {
                hdev->discov_timeout = 0;
                hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
                hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
        }

        if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE))
                cancel_delayed_work(&hdev->service_cache);

        if (hci_dev_test_flag(hdev, HCI_MGMT)) {
                struct adv_info *adv_instance;

                cancel_delayed_work_sync(&hdev->rpa_expired);

                list_for_each_entry(adv_instance, &hdev->adv_instances, list)
                        cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
        }

        /* Avoid potential lockdep warnings from the *_flush() calls by
         * ensuring the workqueue is empty up front.
         */
        drain_workqueue(hdev->workqueue);

        hci_dev_lock(hdev);

        hci_discovery_set_state(hdev, DISCOVERY_STOPPED);

        auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);

        if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
            !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
            hci_dev_test_flag(hdev, HCI_MGMT))
                __mgmt_power_off(hdev);

        hci_inquiry_cache_flush(hdev);
        hci_pend_le_actions_clear(hdev);
        hci_conn_hash_flush(hdev);
        /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */
        smp_unregister(hdev);
        hci_dev_unlock(hdev);

        hci_sock_dev_event(hdev, HCI_DEV_DOWN);

        if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                aosp_do_close(hdev);
                msft_do_close(hdev);
        }

        if (hdev->flush)
                hdev->flush(hdev);

        /* Reset device */
        skb_queue_purge(&hdev->cmd_q);
        atomic_set(&hdev->cmd_cnt, 1);
        if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
            !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
                set_bit(HCI_INIT, &hdev->flags);
                hci_reset_sync(hdev);
                clear_bit(HCI_INIT, &hdev->flags);
        }

        /* flush cmd  work */
        flush_work(&hdev->cmd_work);

        /* Drop queues */
        skb_queue_purge(&hdev->rx_q);
        skb_queue_purge(&hdev->cmd_q);
        skb_queue_purge(&hdev->raw_q);

        /* Drop last sent command */
        if (hdev->sent_cmd) {
                cancel_delayed_work_sync(&hdev->cmd_timer);
                kfree_skb(hdev->sent_cmd);
                hdev->sent_cmd = NULL;
        }

        /* Drop last request */
        if (hdev->req_skb) {
                kfree_skb(hdev->req_skb);
                hdev->req_skb = NULL;
        }

        clear_bit(HCI_RUNNING, &hdev->flags);
        hci_sock_dev_event(hdev, HCI_DEV_CLOSE);

        /* After this point our queues are empty and no tasks are scheduled. */
        hdev->close(hdev);

        /* Clear flags */
        hdev->flags &= BIT(HCI_RAW);
        hci_dev_clear_volatile_flags(hdev);

        /* Controller radio is available but is currently powered down */
        hdev->amp_status = AMP_STATUS_POWERED_DOWN;

        memset(hdev->eir, 0, sizeof(hdev->eir));
        memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
        bacpy(&hdev->random_addr, BDADDR_ANY);
        hci_codec_list_clear(&hdev->local_codecs);

        hci_dev_put(hdev);
        return err;
}

/* This function perform power on HCI command sequence as follows:
 *
 * If controller is already up (HCI_UP) performs hci_powered_update_sync
 * sequence otherwise run hci_dev_open_sync which will follow with
 * hci_powered_update_sync after the init sequence is completed.
 */
static int hci_power_on_sync(struct hci_dev *hdev)
{
        int err;

        if (test_bit(HCI_UP, &hdev->flags) &&
            hci_dev_test_flag(hdev, HCI_MGMT) &&
            hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
                cancel_delayed_work(&hdev->power_off);
                return hci_powered_update_sync(hdev);
        }

        err = hci_dev_open_sync(hdev);
        if (err < 0)
                return err;

        /* During the HCI setup phase, a few error conditions are
         * ignored and they need to be checked now. If they are still
         * valid, it is important to return the device back off.
         */
        if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
            hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
            (hdev->dev_type == HCI_PRIMARY &&
             !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
             !bacmp(&hdev->static_addr, BDADDR_ANY))) {
                hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
                hci_dev_close_sync(hdev);
        } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) {
                queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
                                   HCI_AUTO_OFF_TIMEOUT);
        }

        if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) {
                /* For unconfigured devices, set the HCI_RAW flag
                 * so that userspace can easily identify them.
                 */
                if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                        set_bit(HCI_RAW, &hdev->flags);

                /* For fully configured devices, this will send
                 * the Index Added event. For unconfigured devices,
                 * it will send Unconfigued Index Added event.
                 *
                 * Devices with HCI_QUIRK_RAW_DEVICE are ignored
                 * and no event will be send.
                 */
                mgmt_index_added(hdev);
        } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) {
                /* When the controller is now configured, then it
                 * is important to clear the HCI_RAW flag.
                 */
                if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                        clear_bit(HCI_RAW, &hdev->flags);

                /* Powering on the controller with HCI_CONFIG set only
                 * happens with the transition from unconfigured to
                 * configured. This will send the Index Added event.
                 */
                mgmt_index_added(hdev);
        }

        return 0;
}

static int hci_remote_name_cancel_sync(struct hci_dev *hdev, bdaddr_t *addr)
{
        struct hci_cp_remote_name_req_cancel cp;

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, addr);

        return __hci_cmd_sync_status(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_stop_discovery_sync(struct hci_dev *hdev)
{
        struct discovery_state *d = &hdev->discovery;
        struct inquiry_entry *e;
        int err;

        bt_dev_dbg(hdev, "state %u", hdev->discovery.state);

        if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) {
                if (test_bit(HCI_INQUIRY, &hdev->flags)) {
                        err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL,
                                                    0, NULL, HCI_CMD_TIMEOUT);
                        if (err)
                                return err;
                }

                if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
                        cancel_delayed_work(&hdev->le_scan_disable);

                        err = hci_scan_disable_sync(hdev);
                        if (err)
                                return err;
                }

        } else {
                err = hci_scan_disable_sync(hdev);
                if (err)
                        return err;
        }

        /* Resume advertising if it was paused */
        if (use_ll_privacy(hdev))
                hci_resume_advertising_sync(hdev);

        /* No further actions needed for LE-only discovery */
        if (d->type == DISCOV_TYPE_LE)
                return 0;

        if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) {
                e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
                                                     NAME_PENDING);
                if (!e)
                        return 0;

                return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
        }

        return 0;
}

static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle,
                                        u8 reason)
{
        struct hci_cp_disconn_phy_link cp;

        memset(&cp, 0, sizeof(cp));
        cp.phy_handle = HCI_PHY_HANDLE(handle);
        cp.reason = reason;

        return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
                               u8 reason)
{
        struct hci_cp_disconnect cp;

        if (conn->type == AMP_LINK)
                return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);

        if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
                /* This is a BIS connection, hci_conn_del will
                 * do the necessary cleanup.
                 */
                hci_dev_lock(hdev);
                hci_conn_failed(conn, reason);
                hci_dev_unlock(hdev);

                return 0;
        }

        memset(&cp, 0, sizeof(cp));
        cp.handle = cpu_to_le16(conn->handle);
        cp.reason = reason;

        /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
         * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
         * used when suspending or powering off, where we don't want to wait
         * for the peer's response.
         */
        if (reason != HCI_ERROR_REMOTE_POWER_OFF)
                return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
                                                sizeof(cp), &cp,
                                                HCI_EV_DISCONN_COMPLETE,
                                                HCI_CMD_TIMEOUT, NULL);

        return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
                                     HCI_CMD_TIMEOUT);
}

static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
                                      struct hci_conn *conn, u8 reason)
{
        /* Return reason if scanning since the connection shall probably be
         * cleanup directly.
         */
        if (test_bit(HCI_CONN_SCANNING, &conn->flags))
                return reason;

        if (conn->role == HCI_ROLE_SLAVE ||
            test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
                return 0;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
                                     0, NULL, HCI_CMD_TIMEOUT);
}

static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn,
                                   u8 reason)
{
        if (conn->type == LE_LINK)
                return hci_le_connect_cancel_sync(hdev, conn, reason);

        if (conn->type == ISO_LINK) {
                /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
                 * page 1857:
                 *
                 * If this command is issued for a CIS on the Central and the
                 * CIS is successfully terminated before being established,
                 * then an HCI_LE_CIS_Established event shall also be sent for
                 * this CIS with the Status Operation Cancelled by Host (0x44).
                 */
                if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
                        return hci_disconnect_sync(hdev, conn, reason);

                /* CIS with no Create CIS sent have nothing to cancel */
                if (bacmp(&conn->dst, BDADDR_ANY))
                        return HCI_ERROR_LOCAL_HOST_TERM;

                /* There is no way to cancel a BIS without terminating the BIG
                 * which is done later on connection cleanup.
                 */
                return 0;
        }

        if (hdev->hci_ver < BLUETOOTH_VER_1_2)
                return 0;

        /* Wait for HCI_EV_CONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
         * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
         * used when suspending or powering off, where we don't want to wait
         * for the peer's response.
         */
        if (reason != HCI_ERROR_REMOTE_POWER_OFF)
                return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN_CANCEL,
                                                6, &conn->dst,
                                                HCI_EV_CONN_COMPLETE,
                                                HCI_CMD_TIMEOUT, NULL);

        return __hci_cmd_sync_status(hdev, HCI_OP_CREATE_CONN_CANCEL,
                                     6, &conn->dst, HCI_CMD_TIMEOUT);
}

static int hci_reject_sco_sync(struct hci_dev *hdev, struct hci_conn *conn,
                               u8 reason)
{
        struct hci_cp_reject_sync_conn_req cp;

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, &conn->dst);
        cp.reason = reason;

        /* SCO rejection has its own limited set of
         * allowed error values (0x0D-0x0F).
         */
        if (reason < 0x0d || reason > 0x0f)
                cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;

        return __hci_cmd_sync_status(hdev, HCI_OP_REJECT_SYNC_CONN_REQ,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_le_reject_cis_sync(struct hci_dev *hdev, struct hci_conn *conn,
                                  u8 reason)
{
        struct hci_cp_le_reject_cis cp;

        memset(&cp, 0, sizeof(cp));
        cp.handle = cpu_to_le16(conn->handle);
        cp.reason = reason;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_REJECT_CIS,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
                                u8 reason)
{
        struct hci_cp_reject_conn_req cp;

        if (conn->type == ISO_LINK)
                return hci_le_reject_cis_sync(hdev, conn, reason);

        if (conn->type == SCO_LINK || conn->type == ESCO_LINK)
                return hci_reject_sco_sync(hdev, conn, reason);

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, &conn->dst);
        cp.reason = reason;

        return __hci_cmd_sync_status(hdev, HCI_OP_REJECT_CONN_REQ,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
        int err = 0;
        u16 handle = conn->handle;
        bool disconnect = false;
        struct hci_conn *c;

        switch (conn->state) {
        case BT_CONNECTED:
        case BT_CONFIG:
                err = hci_disconnect_sync(hdev, conn, reason);
                break;
        case BT_CONNECT:
                err = hci_connect_cancel_sync(hdev, conn, reason);
                break;
        case BT_CONNECT2:
                err = hci_reject_conn_sync(hdev, conn, reason);
                break;
        case BT_OPEN:
        case BT_BOUND:
                break;
        default:
                disconnect = true;
                break;
        }

        hci_dev_lock(hdev);

        /* Check if the connection has been cleaned up concurrently */
        c = hci_conn_hash_lookup_handle(hdev, handle);
        if (!c || c != conn) {
                err = 0;
                goto unlock;
        }

        /* Cleanup hci_conn object if it cannot be cancelled as it
         * likelly means the controller and host stack are out of sync
         * or in case of LE it was still scanning so it can be cleanup
         * safely.
         */
        if (disconnect) {
                conn->state = BT_CLOSED;
                hci_disconn_cfm(conn, reason);
                hci_conn_del(conn);
        } else {
                hci_conn_failed(conn, reason);
        }

unlock:
        hci_dev_unlock(hdev);
        return err;
}

static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
{
        struct list_head *head = &hdev->conn_hash.list;
        struct hci_conn *conn;

        rcu_read_lock();
        while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) {
                /* Make sure the connection is not freed while unlocking */
                conn = hci_conn_get(conn);
                rcu_read_unlock();
                /* Disregard possible errors since hci_conn_del shall have been
                 * called even in case of errors had occurred since it would
                 * then cause hci_conn_failed to be called which calls
                 * hci_conn_del internally.
                 */
                hci_abort_conn_sync(hdev, conn, reason);
                hci_conn_put(conn);
                rcu_read_lock();
        }
        rcu_read_unlock();

        return 0;
}

/* This function perform power off HCI command sequence as follows:
 *
 * Clear Advertising
 * Stop Discovery
 * Disconnect all connections
 * hci_dev_close_sync
 */
static int hci_power_off_sync(struct hci_dev *hdev)
{
        int err;

        /* If controller is already down there is nothing to do */
        if (!test_bit(HCI_UP, &hdev->flags))
                return 0;

        hci_dev_set_flag(hdev, HCI_POWERING_DOWN);

        if (test_bit(HCI_ISCAN, &hdev->flags) ||
            test_bit(HCI_PSCAN, &hdev->flags)) {
                err = hci_write_scan_enable_sync(hdev, 0x00);
                if (err)
                        goto out;
        }

        err = hci_clear_adv_sync(hdev, NULL, false);
        if (err)
                goto out;

        err = hci_stop_discovery_sync(hdev);
        if (err)
                goto out;

        /* Terminated due to Power Off */
        err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
        if (err)
                goto out;

        err = hci_dev_close_sync(hdev);

out:
        hci_dev_clear_flag(hdev, HCI_POWERING_DOWN);
        return err;
}

int hci_set_powered_sync(struct hci_dev *hdev, u8 val)
{
        if (val)
                return hci_power_on_sync(hdev);

        return hci_power_off_sync(hdev);
}

static int hci_write_iac_sync(struct hci_dev *hdev)
{
        struct hci_cp_write_current_iac_lap cp;

        if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
                return 0;

        memset(&cp, 0, sizeof(cp));

        if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
                /* Limited discoverable mode */
                cp.num_iac = min_t(u8, hdev->num_iac, 2);
                cp.iac_lap[0] = 0x00;        /* LIAC */
                cp.iac_lap[1] = 0x8b;
                cp.iac_lap[2] = 0x9e;
                cp.iac_lap[3] = 0x33;        /* GIAC */
                cp.iac_lap[4] = 0x8b;
                cp.iac_lap[5] = 0x9e;
        } else {
                /* General discoverable mode */
                cp.num_iac = 1;
                cp.iac_lap[0] = 0x33;        /* GIAC */
                cp.iac_lap[1] = 0x8b;
                cp.iac_lap[2] = 0x9e;
        }

        return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CURRENT_IAC_LAP,
                                     (cp.num_iac * 3) + 1, &cp,
                                     HCI_CMD_TIMEOUT);
}

int hci_update_discoverable_sync(struct hci_dev *hdev)
{
        int err = 0;

        if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
                err = hci_write_iac_sync(hdev);
                if (err)
                        return err;

                err = hci_update_scan_sync(hdev);
                if (err)
                        return err;

                err = hci_update_class_sync(hdev);
                if (err)
                        return err;
        }

        /* Advertising instances don't use the global discoverable setting, so
         * only update AD if advertising was enabled using Set Advertising.
         */
        if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
                err = hci_update_adv_data_sync(hdev, 0x00);
                if (err)
                        return err;

                /* Discoverable mode affects the local advertising
                 * address in limited privacy mode.
                 */
                if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) {
                        if (ext_adv_capable(hdev))
                                err = hci_start_ext_adv_sync(hdev, 0x00);
                        else
                                err = hci_enable_advertising_sync(hdev);
                }
        }

        return err;
}

static int update_discoverable_sync(struct hci_dev *hdev, void *data)
{
        return hci_update_discoverable_sync(hdev);
}

int hci_update_discoverable(struct hci_dev *hdev)
{
        /* Only queue if it would have any effect */
        if (hdev_is_powered(hdev) &&
            hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
            hci_dev_test_flag(hdev, HCI_DISCOVERABLE) &&
            hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
                return hci_cmd_sync_queue(hdev, update_discoverable_sync, NULL,
                                          NULL);

        return 0;
}

int hci_update_connectable_sync(struct hci_dev *hdev)
{
        int err;

        err = hci_update_scan_sync(hdev);
        if (err)
                return err;

        /* If BR/EDR is not enabled and we disable advertising as a
         * by-product of disabling connectable, we need to update the
         * advertising flags.
         */
        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                err = hci_update_adv_data_sync(hdev, hdev->cur_adv_instance);

        /* Update the advertising parameters if necessary */
        if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
            !list_empty(&hdev->adv_instances)) {
                if (ext_adv_capable(hdev))
                        err = hci_start_ext_adv_sync(hdev,
                                                     hdev->cur_adv_instance);
                else
                        err = hci_enable_advertising_sync(hdev);

                if (err)
                        return err;
        }

        return hci_update_passive_scan_sync(hdev);
}

static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
{
        const u8 giac[3] = { 0x33, 0x8b, 0x9e };
        const u8 liac[3] = { 0x00, 0x8b, 0x9e };
        struct hci_cp_inquiry cp;

        bt_dev_dbg(hdev, "");

        if (test_bit(HCI_INQUIRY, &hdev->flags))
                return 0;

        hci_dev_lock(hdev);
        hci_inquiry_cache_flush(hdev);
        hci_dev_unlock(hdev);

        memset(&cp, 0, sizeof(cp));

        if (hdev->discovery.limited)
                memcpy(&cp.lap, liac, sizeof(cp.lap));
        else
                memcpy(&cp.lap, giac, sizeof(cp.lap));

        cp.length = length;

        return __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
{
        u8 own_addr_type;
        /* Accept list is not used for discovery */
        u8 filter_policy = 0x00;
        /* Default is to enable duplicates filter */
        u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
        int err;

        bt_dev_dbg(hdev, "");

        /* If controller is scanning, it means the passive scanning is
         * running. Thus, we should temporarily stop it in order to set the
         * discovery scanning parameters.
         */
        err = hci_scan_disable_sync(hdev);
        if (err) {
                bt_dev_err(hdev, "Unable to disable scanning: %d", err);
                return err;
        }

        cancel_interleave_scan(hdev);

        /* Pause address resolution for active scan and stop advertising if
         * privacy is enabled.
         */
        err = hci_pause_addr_resolution(hdev);
        if (err)
                goto failed;

        /* All active scans will be done with either a resolvable private
         * address (when privacy feature has been enabled) or non-resolvable
         * private address.
         */
        err = hci_update_random_address_sync(hdev, true, scan_use_rpa(hdev),
                                             &own_addr_type);
        if (err < 0)
                own_addr_type = ADDR_LE_DEV_PUBLIC;

        if (hci_is_adv_monitoring(hdev) ||
            (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
            hdev->discovery.result_filtering)) {
                /* Duplicate filter should be disabled when some advertisement
                 * monitor is activated, otherwise AdvMon can only receive one
                 * advertisement for one peer(*) during active scanning, and
                 * might report loss to these peers.
                 *
                 * If controller does strict duplicate filtering and the
                 * discovery requires result filtering disables controller based
                 * filtering since that can cause reports that would match the
                 * host filter to not be reported.
                 */
                filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
        }

        err = hci_start_scan_sync(hdev, LE_SCAN_ACTIVE, interval,
                                  hdev->le_scan_window_discovery,
                                  own_addr_type, filter_policy, filter_dup);
        if (!err)
                return err;

failed:
        /* Resume advertising if it was paused */
        if (use_ll_privacy(hdev))
                hci_resume_advertising_sync(hdev);

        /* Resume passive scanning */
        hci_update_passive_scan_sync(hdev);
        return err;
}

static int hci_start_interleaved_discovery_sync(struct hci_dev *hdev)
{
        int err;

        bt_dev_dbg(hdev, "");

        err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery * 2);
        if (err)
                return err;

        return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN);
}

int hci_start_discovery_sync(struct hci_dev *hdev)
{
        unsigned long timeout;
        int err;

        bt_dev_dbg(hdev, "type %u", hdev->discovery.type);

        switch (hdev->discovery.type) {
        case DISCOV_TYPE_BREDR:
                return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN);
        case DISCOV_TYPE_INTERLEAVED:
                /* When running simultaneous discovery, the LE scanning time
                 * should occupy the whole discovery time sine BR/EDR inquiry
                 * and LE scanning are scheduled by the controller.
                 *
                 * For interleaving discovery in comparison, BR/EDR inquiry
                 * and LE scanning are done sequentially with separate
                 * timeouts.
                 */
                if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
                             &hdev->quirks)) {
                        timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
                        /* During simultaneous discovery, we double LE scan
                         * interval. We must leave some time for the controller
                         * to do BR/EDR inquiry.
                         */
                        err = hci_start_interleaved_discovery_sync(hdev);
                        break;
                }

                timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
                err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery);
                break;
        case DISCOV_TYPE_LE:
                timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
                err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery);
                break;
        default:
                return -EINVAL;
        }

        if (err)
                return err;

        bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout));

        queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable,
                           timeout);
        return 0;
}

static void hci_suspend_monitor_sync(struct hci_dev *hdev)
{
        switch (hci_get_adv_monitor_offload_ext(hdev)) {
        case HCI_ADV_MONITOR_EXT_MSFT:
                msft_suspend_sync(hdev);
                break;
        default:
                return;
        }
}

/* This function disables discovery and mark it as paused */
static int hci_pause_discovery_sync(struct hci_dev *hdev)
{
        int old_state = hdev->discovery.state;
        int err;

        /* If discovery already stopped/stopping/paused there nothing to do */
        if (old_state == DISCOVERY_STOPPED || old_state == DISCOVERY_STOPPING ||
            hdev->discovery_paused)
                return 0;

        hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
        err = hci_stop_discovery_sync(hdev);
        if (err)
                return err;

        hdev->discovery_paused = true;
        hdev->discovery_old_state = old_state;
        hci_discovery_set_state(hdev, DISCOVERY_STOPPED);

        return 0;
}

static int hci_update_event_filter_sync(struct hci_dev *hdev)
{
        struct bdaddr_list_with_flags *b;
        u8 scan = SCAN_DISABLED;
        bool scanning = test_bit(HCI_PSCAN, &hdev->flags);
        int err;

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
                return 0;

        /* Some fake CSR controllers lock up after setting this type of
         * filter, so avoid sending the request altogether.
         */
        if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
                return 0;

        /* Always clear event filter when starting */
        hci_clear_event_filter_sync(hdev);

        list_for_each_entry(b, &hdev->accept_list, list) {
                if (!(b->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
                        continue;

                bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr);

                err =  hci_set_event_filter_sync(hdev, HCI_FLT_CONN_SETUP,
                                                 HCI_CONN_SETUP_ALLOW_BDADDR,
                                                 &b->bdaddr,
                                                 HCI_CONN_SETUP_AUTO_ON);
                if (err)
                        bt_dev_dbg(hdev, "Failed to set event filter for %pMR",
                                   &b->bdaddr);
                else
                        scan = SCAN_PAGE;
        }

        if (scan && !scanning)
                hci_write_scan_enable_sync(hdev, scan);
        else if (!scan && scanning)
                hci_write_scan_enable_sync(hdev, scan);

        return 0;
}

/* This function disables scan (BR and LE) and mark it as paused */
static int hci_pause_scan_sync(struct hci_dev *hdev)
{
        if (hdev->scanning_paused)
                return 0;

        /* Disable page scan if enabled */
        if (test_bit(HCI_PSCAN, &hdev->flags))
                hci_write_scan_enable_sync(hdev, SCAN_DISABLED);

        hci_scan_disable_sync(hdev);

        hdev->scanning_paused = true;

        return 0;
}

/* This function performs the HCI suspend procedures in the follow order:
 *
 * Pause discovery (active scanning/inquiry)
 * Pause Directed Advertising/Advertising
 * Pause Scanning (passive scanning in case discovery was not active)
 * Disconnect all connections
 * Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup
 * otherwise:
 * Update event mask (only set events that are allowed to wake up the host)
 * Update event filter (with devices marked with HCI_CONN_FLAG_REMOTE_WAKEUP)
 * Update passive scanning (lower duty cycle)
 * Set suspend_status to BT_SUSPEND_CONFIGURE_WAKE
 */
int hci_suspend_sync(struct hci_dev *hdev)
{
        int err;

        /* If marked as suspended there nothing to do */
        if (hdev->suspended)
                return 0;

        /* Mark device as suspended */
        hdev->suspended = true;

        /* Pause discovery if not already stopped */
        hci_pause_discovery_sync(hdev);

        /* Pause other advertisements */
        hci_pause_advertising_sync(hdev);

        /* Suspend monitor filters */
        hci_suspend_monitor_sync(hdev);

        /* Prevent disconnects from causing scanning to be re-enabled */
        hci_pause_scan_sync(hdev);

        if (hci_conn_count(hdev)) {
                /* Soft disconnect everything (power off) */
                err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
                if (err) {
                        /* Set state to BT_RUNNING so resume doesn't notify */
                        hdev->suspend_state = BT_RUNNING;
                        hci_resume_sync(hdev);
                        return err;
                }

                /* Update event mask so only the allowed event can wakeup the
                 * host.
                 */
                hci_set_event_mask_sync(hdev);
        }

        /* Only configure accept list if disconnect succeeded and wake
         * isn't being prevented.
         */
        if (!hdev->wakeup || !hdev->wakeup(hdev)) {
                hdev->suspend_state = BT_SUSPEND_DISCONNECT;
                return 0;
        }

        /* Unpause to take care of updating scanning params */
        hdev->scanning_paused = false;

        /* Enable event filter for paired devices */
        hci_update_event_filter_sync(hdev);

        /* Update LE passive scan if enabled */
        hci_update_passive_scan_sync(hdev);

        /* Pause scan changes again. */
        hdev->scanning_paused = true;

        hdev->suspend_state = BT_SUSPEND_CONFIGURE_WAKE;

        return 0;
}

/* This function resumes discovery */
static int hci_resume_discovery_sync(struct hci_dev *hdev)
{
        int err;

        /* If discovery not paused there nothing to do */
        if (!hdev->discovery_paused)
                return 0;

        hdev->discovery_paused = false;

        hci_discovery_set_state(hdev, DISCOVERY_STARTING);

        err = hci_start_discovery_sync(hdev);

        hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED :
                                DISCOVERY_FINDING);

        return err;
}

static void hci_resume_monitor_sync(struct hci_dev *hdev)
{
        switch (hci_get_adv_monitor_offload_ext(hdev)) {
        case HCI_ADV_MONITOR_EXT_MSFT:
                msft_resume_sync(hdev);
                break;
        default:
                return;
        }
}

/* This function resume scan and reset paused flag */
static int hci_resume_scan_sync(struct hci_dev *hdev)
{
        if (!hdev->scanning_paused)
                return 0;

        hdev->scanning_paused = false;

        hci_update_scan_sync(hdev);

        /* Reset passive scanning to normal */
        hci_update_passive_scan_sync(hdev);

        return 0;
}

/* This function performs the HCI suspend procedures in the follow order:
 *
 * Restore event mask
 * Clear event filter
 * Update passive scanning (normal duty cycle)
 * Resume Directed Advertising/Advertising
 * Resume discovery (active scanning/inquiry)
 */
int hci_resume_sync(struct hci_dev *hdev)
{
        /* If not marked as suspended there nothing to do */
        if (!hdev->suspended)
                return 0;

        hdev->suspended = false;

        /* Restore event mask */
        hci_set_event_mask_sync(hdev);

        /* Clear any event filters and restore scan state */
        hci_clear_event_filter_sync(hdev);

        /* Resume scanning */
        hci_resume_scan_sync(hdev);

        /* Resume monitor filters */
        hci_resume_monitor_sync(hdev);

        /* Resume other advertisements */
        hci_resume_advertising_sync(hdev);

        /* Resume discovery */
        hci_resume_discovery_sync(hdev);

        return 0;
}

static bool conn_use_rpa(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;

        return hci_dev_test_flag(hdev, HCI_PRIVACY);
}

static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
                                                struct hci_conn *conn)
{
        struct hci_cp_le_set_ext_adv_params cp;
        int err;
        bdaddr_t random_addr;
        u8 own_addr_type;

        err = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn),
                                             &own_addr_type);
        if (err)
                return err;

        /* Set require_privacy to false so that the remote device has a
         * chance of identifying us.
         */
        err = hci_get_random_address(hdev, false, conn_use_rpa(conn), NULL,
                                     &own_addr_type, &random_addr);
        if (err)
                return err;

        memset(&cp, 0, sizeof(cp));

        cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_DIRECT_IND);
        cp.channel_map = hdev->le_adv_channel_map;
        cp.tx_power = HCI_TX_POWER_INVALID;
        cp.primary_phy = HCI_ADV_PHY_1M;
        cp.secondary_phy = HCI_ADV_PHY_1M;
        cp.handle = 0x00; /* Use instance 0 for directed adv */
        cp.own_addr_type = own_addr_type;
        cp.peer_addr_type = conn->dst_type;
        bacpy(&cp.peer_addr, &conn->dst);

        /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for
         * advertising_event_property LE_LEGACY_ADV_DIRECT_IND
         * does not supports advertising data when the advertising set already
         * contains some, the controller shall return erroc code 'Invalid
         * HCI Command Parameters(0x12).
         * So it is required to remove adv set for handle 0x00. since we use
         * instance 0 for directed adv.
         */
        err = hci_remove_ext_adv_instance_sync(hdev, cp.handle, NULL);
        if (err)
                return err;

        err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (err)
                return err;

        /* Check if random address need to be updated */
        if (own_addr_type == ADDR_LE_DEV_RANDOM &&
            bacmp(&random_addr, BDADDR_ANY) &&
            bacmp(&random_addr, &hdev->random_addr)) {
                err = hci_set_adv_set_random_addr_sync(hdev, 0x00,
                                                       &random_addr);
                if (err)
                        return err;
        }

        return hci_enable_ext_advertising_sync(hdev, 0x00);
}

static int hci_le_directed_advertising_sync(struct hci_dev *hdev,
                                            struct hci_conn *conn)
{
        struct hci_cp_le_set_adv_param cp;
        u8 status;
        u8 own_addr_type;
        u8 enable;

        if (ext_adv_capable(hdev))
                return hci_le_ext_directed_advertising_sync(hdev, conn);

        /* Clear the HCI_LE_ADV bit temporarily so that the
         * hci_update_random_address knows that it's safe to go ahead
         * and write a new random address. The flag will be set back on
         * as soon as the SET_ADV_ENABLE HCI command completes.
         */
        hci_dev_clear_flag(hdev, HCI_LE_ADV);

        /* Set require_privacy to false so that the remote device has a
         * chance of identifying us.
         */
        status = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn),
                                                &own_addr_type);
        if (status)
                return status;

        memset(&cp, 0, sizeof(cp));

        /* Some controllers might reject command if intervals are not
         * within range for undirected advertising.
         * BCM20702A0 is known to be affected by this.
         */
        cp.min_interval = cpu_to_le16(0x0020);
        cp.max_interval = cpu_to_le16(0x0020);

        cp.type = LE_ADV_DIRECT_IND;
        cp.own_address_type = own_addr_type;
        cp.direct_addr_type = conn->dst_type;
        bacpy(&cp.direct_addr, &conn->dst);
        cp.channel_map = hdev->le_adv_channel_map;

        status = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_PARAM,
                                       sizeof(cp), &cp, HCI_CMD_TIMEOUT);
        if (status)
                return status;

        enable = 0x01;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
                                     sizeof(enable), &enable, HCI_CMD_TIMEOUT);
}

static void set_ext_conn_params(struct hci_conn *conn,
                                struct hci_cp_le_ext_conn_param *p)
{
        struct hci_dev *hdev = conn->hdev;

        memset(p, 0, sizeof(*p));

        p->scan_interval = cpu_to_le16(hdev->le_scan_int_connect);
        p->scan_window = cpu_to_le16(hdev->le_scan_window_connect);
        p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
        p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
        p->conn_latency = cpu_to_le16(conn->le_conn_latency);
        p->supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
        p->min_ce_len = cpu_to_le16(0x0000);
        p->max_ce_len = cpu_to_le16(0x0000);
}

static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
                                       struct hci_conn *conn, u8 own_addr_type)
{
        struct hci_cp_le_ext_create_conn *cp;
        struct hci_cp_le_ext_conn_param *p;
        u8 data[sizeof(*cp) + sizeof(*p) * 3];
        u32 plen;

        cp = (void *)data;
        p = (void *)cp->data;

        memset(cp, 0, sizeof(*cp));

        bacpy(&cp->peer_addr, &conn->dst);
        cp->peer_addr_type = conn->dst_type;
        cp->own_addr_type = own_addr_type;

        plen = sizeof(*cp);

        if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M ||
                              conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) {
                cp->phys |= LE_SCAN_PHY_1M;
                set_ext_conn_params(conn, p);

                p++;
                plen += sizeof(*p);
        }

        if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M ||
                              conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) {
                cp->phys |= LE_SCAN_PHY_2M;
                set_ext_conn_params(conn, p);

                p++;
                plen += sizeof(*p);
        }

        if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED ||
                                 conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) {
                cp->phys |= LE_SCAN_PHY_CODED;
                set_ext_conn_params(conn, p);

                plen += sizeof(*p);
        }

        return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN,
                                        plen, data,
                                        HCI_EV_LE_ENHANCED_CONN_COMPLETE,
                                        conn->conn_timeout, NULL);
}

static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data)
{
        struct hci_cp_le_create_conn cp;
        struct hci_conn_params *params;
        u8 own_addr_type;
        int err;
        struct hci_conn *conn = data;

        if (!hci_conn_valid(hdev, conn))
                return -ECANCELED;

        bt_dev_dbg(hdev, "conn %p", conn);

        clear_bit(HCI_CONN_SCANNING, &conn->flags);
        conn->state = BT_CONNECT;

        /* If requested to connect as peripheral use directed advertising */
        if (conn->role == HCI_ROLE_SLAVE) {
                /* If we're active scanning and simultaneous roles is not
                 * enabled simply reject the attempt.
                 */
                if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
                    hdev->le_scan_type == LE_SCAN_ACTIVE &&
                    !hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)) {
                        hci_conn_del(conn);
                        return -EBUSY;
                }

                /* Pause advertising while doing directed advertising. */
                hci_pause_advertising_sync(hdev);

                err = hci_le_directed_advertising_sync(hdev, conn);
                goto done;
        }

        /* Disable advertising if simultaneous roles is not in use. */
        if (!hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES))
                hci_pause_advertising_sync(hdev);

        params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
        if (params) {
                conn->le_conn_min_interval = params->conn_min_interval;
                conn->le_conn_max_interval = params->conn_max_interval;
                conn->le_conn_latency = params->conn_latency;
                conn->le_supv_timeout = params->supervision_timeout;
        } else {
                conn->le_conn_min_interval = hdev->le_conn_min_interval;
                conn->le_conn_max_interval = hdev->le_conn_max_interval;
                conn->le_conn_latency = hdev->le_conn_latency;
                conn->le_supv_timeout = hdev->le_supv_timeout;
        }

        /* If controller is scanning, we stop it since some controllers are
         * not able to scan and connect at the same time. Also set the
         * HCI_LE_SCAN_INTERRUPTED flag so that the command complete
         * handler for scan disabling knows to set the correct discovery
         * state.
         */
        if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
                hci_scan_disable_sync(hdev);
                hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
        }

        /* Update random address, but set require_privacy to false so
         * that we never connect with an non-resolvable address.
         */
        err = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn),
                                             &own_addr_type);
        if (err)
                goto done;

        if (use_ext_conn(hdev)) {
                err = hci_le_ext_create_conn_sync(hdev, conn, own_addr_type);
                goto done;
        }

        memset(&cp, 0, sizeof(cp));

        cp.scan_interval = cpu_to_le16(hdev->le_scan_int_connect);
        cp.scan_window = cpu_to_le16(hdev->le_scan_window_connect);

        bacpy(&cp.peer_addr, &conn->dst);
        cp.peer_addr_type = conn->dst_type;
        cp.own_address_type = own_addr_type;
        cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
        cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
        cp.conn_latency = cpu_to_le16(conn->le_conn_latency);
        cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
        cp.min_ce_len = cpu_to_le16(0x0000);
        cp.max_ce_len = cpu_to_le16(0x0000);

        /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261:
         *
         * If this event is unmasked and the HCI_LE_Connection_Complete event
         * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is
         * sent when a new connection has been created.
         */
        err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN,
                                       sizeof(cp), &cp,
                                       use_enhanced_conn_complete(hdev) ?
                                       HCI_EV_LE_ENHANCED_CONN_COMPLETE :
                                       HCI_EV_LE_CONN_COMPLETE,
                                       conn->conn_timeout, NULL);

done:
        if (err == -ETIMEDOUT)
                hci_le_connect_cancel_sync(hdev, conn, 0x00);

        /* Re-enable advertising after the connection attempt is finished. */
        hci_resume_advertising_sync(hdev);
        return err;
}

int hci_le_create_cis_sync(struct hci_dev *hdev)
{
        struct {
                struct hci_cp_le_create_cis cp;
                struct hci_cis cis[0x1f];
        } cmd;
        struct hci_conn *conn;
        u8 cig = BT_ISO_QOS_CIG_UNSET;

        /* The spec allows only one pending LE Create CIS command at a time. If
         * the command is pending now, don't do anything. We check for pending
         * connections after each CIS Established event.
         *
         * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
         * page 2566:
         *
         * If the Host issues this command before all the
         * HCI_LE_CIS_Established events from the previous use of the
         * command have been generated, the Controller shall return the
         * error code Command Disallowed (0x0C).
         *
         * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
         * page 2567:
         *
         * When the Controller receives the HCI_LE_Create_CIS command, the
         * Controller sends the HCI_Command_Status event to the Host. An
         * HCI_LE_CIS_Established event will be generated for each CIS when it
         * is established or if it is disconnected or considered lost before
         * being established; until all the events are generated, the command
         * remains pending.
         */

        memset(&cmd, 0, sizeof(cmd));

        hci_dev_lock(hdev);

        rcu_read_lock();

        /* Wait until previous Create CIS has completed */
        list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
                if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
                        goto done;
        }

        /* Find CIG with all CIS ready */
        list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
                struct hci_conn *link;

                if (hci_conn_check_create_cis(conn))
                        continue;

                cig = conn->iso_qos.ucast.cig;

                list_for_each_entry_rcu(link, &hdev->conn_hash.list, list) {
                        if (hci_conn_check_create_cis(link) > 0 &&
                            link->iso_qos.ucast.cig == cig &&
                            link->state != BT_CONNECTED) {
                                cig = BT_ISO_QOS_CIG_UNSET;
                                break;
                        }
                }

                if (cig != BT_ISO_QOS_CIG_UNSET)
                        break;
        }

        if (cig == BT_ISO_QOS_CIG_UNSET)
                goto done;

        list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
                struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];

                if (hci_conn_check_create_cis(conn) ||
                    conn->iso_qos.ucast.cig != cig)
                        continue;

                set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
                cis->acl_handle = cpu_to_le16(conn->parent->handle);
                cis->cis_handle = cpu_to_le16(conn->handle);
                cmd.cp.num_cis++;

                if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
                        break;
        }

done:
        rcu_read_unlock();

        hci_dev_unlock(hdev);

        if (!cmd.cp.num_cis)
                return 0;

        /* Wait for HCI_LE_CIS_Established */
        return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS,
                                        sizeof(cmd.cp) + sizeof(cmd.cis[0]) *
                                        cmd.cp.num_cis, &cmd,
                                        HCI_EVT_LE_CIS_ESTABLISHED,
                                        conn->conn_timeout, NULL);
}

int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle)
{
        struct hci_cp_le_remove_cig cp;

        memset(&cp, 0, sizeof(cp));
        cp.cig_id = handle;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_REMOVE_CIG, sizeof(cp),
                                     &cp, HCI_CMD_TIMEOUT);
}

int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle)
{
        struct hci_cp_le_big_term_sync cp;

        memset(&cp, 0, sizeof(cp));
        cp.handle = handle;

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_BIG_TERM_SYNC,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle)
{
        struct hci_cp_le_pa_term_sync cp;

        memset(&cp, 0, sizeof(cp));
        cp.handle = cpu_to_le16(handle);

        return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}

int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
                           bool use_rpa, struct adv_info *adv_instance,
                           u8 *own_addr_type, bdaddr_t *rand_addr)
{
        int err;

        bacpy(rand_addr, BDADDR_ANY);

        /* If privacy is enabled use a resolvable private address. If
         * current RPA has expired then generate a new one.
         */
        if (use_rpa) {
                /* If Controller supports LL Privacy use own address type is
                 * 0x03
                 */
                if (use_ll_privacy(hdev))
                        *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
                else
                        *own_addr_type = ADDR_LE_DEV_RANDOM;

                if (adv_instance) {
                        if (adv_rpa_valid(adv_instance))
                                return 0;
                } else {
                        if (rpa_valid(hdev))
                                return 0;
                }

                err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
                if (err < 0) {
                        bt_dev_err(hdev, "failed to generate new RPA");
                        return err;
                }

                bacpy(rand_addr, &hdev->rpa);

                return 0;
        }

        /* In case of required privacy without resolvable private address,
         * use an non-resolvable private address. This is useful for
         * non-connectable advertising.
         */
        if (require_privacy) {
                bdaddr_t nrpa;

                while (true) {
                        /* The non-resolvable private address is generated
                         * from random six bytes with the two most significant
                         * bits cleared.
                         */
                        get_random_bytes(&nrpa, 6);
                        nrpa.b[5] &= 0x3f;

                        /* The non-resolvable private address shall not be
                         * equal to the public address.
                         */
                        if (bacmp(&hdev->bdaddr, &nrpa))
                                break;
                }

                *own_addr_type = ADDR_LE_DEV_RANDOM;
                bacpy(rand_addr, &nrpa);

                return 0;
        }

        /* No privacy so use a public address. */
        *own_addr_type = ADDR_LE_DEV_PUBLIC;

        return 0;
}

static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
        u8 instance = PTR_UINT(data);

        return hci_update_adv_data_sync(hdev, instance);
}

int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
        return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
                                  UINT_PTR(instance), NULL);
}

static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data)
{
        struct hci_conn *conn = data;
        struct inquiry_entry *ie;
        struct hci_cp_create_conn cp;
        int err;

        if (!hci_conn_valid(hdev, conn))
                return -ECANCELED;

        /* Many controllers disallow HCI Create Connection while it is doing
         * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create
         * Connection. This may cause the MGMT discovering state to become false
         * without user space's request but it is okay since the MGMT Discovery
         * APIs do not promise that discovery should be done forever. Instead,
         * the user space monitors the status of MGMT discovering and it may
         * request for discovery again when this flag becomes false.
         */
        if (test_bit(HCI_INQUIRY, &hdev->flags)) {
                err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0,
                                            NULL, HCI_CMD_TIMEOUT);
                if (err)
                        bt_dev_warn(hdev, "Failed to cancel inquiry %d", err);
        }

        conn->state = BT_CONNECT;
        conn->out = true;
        conn->role = HCI_ROLE_MASTER;

        conn->attempt++;

        conn->link_policy = hdev->link_policy;

        memset(&cp, 0, sizeof(cp));
        bacpy(&cp.bdaddr, &conn->dst);
        cp.pscan_rep_mode = 0x02;

        ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
        if (ie) {
                if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
                        cp.pscan_rep_mode = ie->data.pscan_rep_mode;
                        cp.pscan_mode     = ie->data.pscan_mode;
                        cp.clock_offset   = ie->data.clock_offset |
                                            cpu_to_le16(0x8000);
                }

                memcpy(conn->dev_class, ie->data.dev_class, 3);
        }

        cp.pkt_type = cpu_to_le16(conn->pkt_type);
        if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
                cp.role_switch = 0x01;
        else
                cp.role_switch = 0x00;

        return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN,
                                        sizeof(cp), &cp,
                                        HCI_EV_CONN_COMPLETE,
                                        conn->conn_timeout, NULL);
}

int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
        return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn,
                                       NULL);
}

static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
        struct hci_conn *conn = data;

        bt_dev_dbg(hdev, "err %d", err);

        if (err == -ECANCELED)
                return;

        hci_dev_lock(hdev);

        if (!hci_conn_valid(hdev, conn))
                goto done;

        if (!err) {
                hci_connect_le_scan_cleanup(conn, 0x00);
                goto done;
        }

        /* Check if connection is still pending */
        if (conn != hci_lookup_le_connect(hdev))
                goto done;

        /* Flush to make sure we send create conn cancel command if needed */
        flush_delayed_work(&conn->le_conn_timeout);
        hci_conn_failed(conn, bt_status(err));

done:
        hci_dev_unlock(hdev);
}

int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
        return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn,
                                       create_le_conn_complete);
}

int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn)
{
        if (conn->state != BT_OPEN)
                return -EINVAL;

        switch (conn->type) {
        case ACL_LINK:
                return !hci_cmd_sync_dequeue_once(hdev,
                                                  hci_acl_create_conn_sync,
                                                  conn, NULL);
        case LE_LINK:
                return !hci_cmd_sync_dequeue_once(hdev, hci_le_create_conn_sync,
                                                  conn, create_le_conn_complete);
        }

        return -ENOENT;
}









































    8 
    8 




































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// SPDX-License-Identifier: GPL-2.0
/*
 * IA-32 Huge TLB Page Support for Kernel.
 *
 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <linux/compat.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/elf.h>

/*
 * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
 * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
 * Otherwise, returns 0.
 */
int pmd_huge(pmd_t pmd)
{
        return !pmd_none(pmd) &&
                (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
}

/*
 * pud_huge() returns 1 if @pud is hugetlb related entry, that is normal
 * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
 * Otherwise, returns 0.
 */
int pud_huge(pud_t pud)
{
#if CONFIG_PGTABLE_LEVELS > 2
        return !pud_none(pud) &&
                (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
#else
        return 0;
#endif
}

#ifdef CONFIG_HUGETLB_PAGE
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags)
{
        struct hstate *h = hstate_file(file);
        struct vm_unmapped_area_info info;

        info.flags = 0;
        info.length = len;
        info.low_limit = get_mmap_base(1);

        /*
         * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
         * in the full address space.
         */
        info.high_limit = in_32bit_syscall() ?
                task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);

        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        return vm_unmapped_area(&info);
}

static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags)
{
        struct hstate *h = hstate_file(file);
        struct vm_unmapped_area_info info;

        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
        info.low_limit = PAGE_SIZE;
        info.high_limit = get_mmap_base(0);

        /*
         * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
         * in the full address space.
         */
        if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
                info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;

        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);

        /*
         * A failed mmap() very likely causes application failure,
         * so fall back to the bottom-up function here. This scenario
         * can happen with large stack limits and large mmap()
         * allocations.
         */
        if (addr & ~PAGE_MASK) {
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = TASK_UNMAPPED_BASE;
                info.high_limit = TASK_SIZE_LOW;
                addr = vm_unmapped_area(&info);
        }

        return addr;
}

unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
{
        struct hstate *h = hstate_file(file);
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;

        if (len & ~huge_page_mask(h))
                return -EINVAL;

        if (len > TASK_SIZE)
                return -ENOMEM;

        /* No address checking. See comment at mmap_address_hint_valid() */
        if (flags & MAP_FIXED) {
                if (prepare_hugepage_range(file, addr, len))
                        return -EINVAL;
                return addr;
        }

        if (addr) {
                addr &= huge_page_mask(h);
                if (!mmap_address_hint_valid(addr, len))
                        goto get_unmapped_area;

                vma = find_vma(mm, addr);
                if (!vma || addr + len <= vm_start_gap(vma))
                        return addr;
        }

get_unmapped_area:
        if (mm->get_unmapped_area == arch_get_unmapped_area)
                return hugetlb_get_unmapped_area_bottomup(file, addr, len,
                                pgoff, flags);
        else
                return hugetlb_get_unmapped_area_topdown(file, addr, len,
                                pgoff, flags);
}
#endif /* CONFIG_HUGETLB_PAGE */

#ifdef CONFIG_X86_64
bool __init arch_hugetlb_valid_size(unsigned long size)
{
        if (size == PMD_SIZE)
                return true;
        else if (size == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES))
                return true;
        else
                return false;
}

#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
        /* With compaction or CMA we can allocate gigantic pages at runtime */
        if (boot_cpu_has(X86_FEATURE_GBPAGES))
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        return 0;
}
arch_initcall(gigantic_pages_init);
#endif
#endif















































































































































































































  158 







  159 

  157 










  159 























  159 








  159 

  159 





  158 



  159 





























  159 
  159 








  159 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
// SPDX-License-Identifier: GPL-2.0
/*
 * USB-ACPI glue code
 *
 * Copyright 2012 Red Hat <mjg@redhat.com>
 */
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/pci.h>
#include <linux/usb/hcd.h>

#include "hub.h"

/**
 * usb_acpi_power_manageable - check whether usb port has
 * acpi power resource.
 * @hdev: USB device belonging to the usb hub
 * @index: port index based zero
 *
 * Return true if the port has acpi power resource and false if no.
 */
bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
{
        acpi_handle port_handle;
        int port1 = index + 1;

        port_handle = usb_get_hub_port_acpi_handle(hdev,
                port1);
        if (port_handle)
                return acpi_bus_power_manageable(port_handle);
        else
                return false;
}
EXPORT_SYMBOL_GPL(usb_acpi_power_manageable);

#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899"
#define USB_DSM_DISABLE_U1_U2_FOR_PORT        5

/**
 * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port.
 * @hdev: USB device belonging to the usb hub
 * @index: zero based port index
 *
 * Some USB3 ports may not support USB3 link power management U1/U2 states
 * due to different retimer setup. ACPI provides _DSM method which returns 0x01
 * if U1 and U2 states should be disabled. Evaluate _DSM with:
 * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899
 * Arg1: Revision ID = 0
 * Arg2: Function Index = 5
 * Arg3: (empty)
 *
 * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0
 */

int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
{
        union acpi_object *obj;
        acpi_handle port_handle;
        int port1 = index + 1;
        guid_t guid;
        int ret;

        ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid);
        if (ret)
                return ret;

        port_handle = usb_get_hub_port_acpi_handle(hdev, port1);
        if (!port_handle) {
                dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1);
                return -ENODEV;
        }

        if (!acpi_check_dsm(port_handle, &guid, 0,
                            BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) {
                dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n",
                        port1, USB_DSM_DISABLE_U1_U2_FOR_PORT);
                return -ENODEV;
        }

        obj = acpi_evaluate_dsm_typed(port_handle, &guid, 0,
                                      USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL,
                                      ACPI_TYPE_INTEGER);
        if (!obj) {
                dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1);
                return -EINVAL;
        }

        if (obj->integer.value == 0x01)
                ret = 1;

        ACPI_FREE(obj);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable);

/**
 * usb_acpi_set_power_state - control usb port's power via acpi power
 * resource
 * @hdev: USB device belonging to the usb hub
 * @index: port index based zero
 * @enable: power state expected to be set
 *
 * Notice to use usb_acpi_power_manageable() to check whether the usb port
 * has acpi power resource before invoking this function.
 *
 * Returns 0 on success, else negative errno.
 */
int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        struct usb_port *port_dev;
        acpi_handle port_handle;
        unsigned char state;
        int port1 = index + 1;
        int error = -EINVAL;

        if (!hub)
                return -ENODEV;
        port_dev = hub->ports[port1 - 1];

        port_handle = (acpi_handle) usb_get_hub_port_acpi_handle(hdev, port1);
        if (!port_handle)
                return error;

        if (enable)
                state = ACPI_STATE_D0;
        else
                state = ACPI_STATE_D3_COLD;

        error = acpi_bus_set_power(port_handle, state);
        if (!error)
                dev_dbg(&port_dev->dev, "acpi: power was set to %d\n", enable);
        else
                dev_dbg(&port_dev->dev, "acpi: power failed to be set\n");

        return error;
}
EXPORT_SYMBOL_GPL(usb_acpi_set_power_state);

/*
 * Private to usb-acpi, all the core needs to know is that
 * port_dev->location is non-zero when it has been set by the firmware.
 */
#define USB_ACPI_LOCATION_VALID (1 << 31)

static void
usb_acpi_get_connect_type(struct usb_port *port_dev, acpi_handle *handle)
{
        enum usb_port_connect_type connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN;
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        union acpi_object *upc = NULL;
        struct acpi_pld_info *pld = NULL;
        acpi_status status;

        /*
         * According to 9.14 in ACPI Spec 6.2. _PLD indicates whether usb port
         * is user visible and _UPC indicates whether it is connectable. If
         * the port was visible and connectable, it could be freely connected
         * and disconnected with USB devices. If no visible and connectable,
         * a usb device is directly hard-wired to the port. If no visible and
         * no connectable, the port would be not used.
         */

        status = acpi_get_physical_device_location(handle, &pld);
        if (ACPI_SUCCESS(status) && pld)
                port_dev->location = USB_ACPI_LOCATION_VALID |
                        pld->group_token << 8 | pld->group_position;

        status = acpi_evaluate_object(handle, "_UPC", NULL, &buffer);
        if (ACPI_FAILURE(status))
                goto out;

        upc = buffer.pointer;
        if (!upc || (upc->type != ACPI_TYPE_PACKAGE) || upc->package.count != 4)
                goto out;

        /* UPC states port is connectable */
        if (upc->package.elements[0].integer.value)
                if (!pld)
                        ; /* keep connect_type as unknown */
                else if (pld->user_visible)
                        connect_type = USB_PORT_CONNECT_TYPE_HOT_PLUG;
                else
                        connect_type = USB_PORT_CONNECT_TYPE_HARD_WIRED;
        else
                connect_type = USB_PORT_NOT_USED;
out:
        port_dev->connect_type = connect_type;
        kfree(upc);
        ACPI_FREE(pld);
}

static struct acpi_device *
usb_acpi_get_companion_for_port(struct usb_port *port_dev)
{
        struct usb_device *udev;
        struct acpi_device *adev;
        acpi_handle *parent_handle;
        int port1;

        /* Get the struct usb_device point of port's hub */
        udev = to_usb_device(port_dev->dev.parent->parent);

        /*
         * The root hub ports' parent is the root hub. The non-root-hub
         * ports' parent is the parent hub port which the hub is
         * connected to.
         */
        if (!udev->parent) {
                adev = ACPI_COMPANION(&udev->dev);
                port1 = usb_hcd_find_raw_port_number(bus_to_hcd(udev->bus),
                                                     port_dev->portnum);
        } else {
                parent_handle = usb_get_hub_port_acpi_handle(udev->parent,
                                                             udev->portnum);
                if (!parent_handle)
                        return NULL;

                adev = acpi_fetch_acpi_dev(parent_handle);
                port1 = port_dev->portnum;
        }

        return acpi_find_child_by_adr(adev, port1);
}

static struct acpi_device *
usb_acpi_find_companion_for_port(struct usb_port *port_dev)
{
        struct acpi_device *adev;

        adev = usb_acpi_get_companion_for_port(port_dev);
        if (!adev)
                return NULL;

        usb_acpi_get_connect_type(port_dev, adev->handle);

        return adev;
}

static struct acpi_device *
usb_acpi_find_companion_for_device(struct usb_device *udev)
{
        struct acpi_device *adev;
        struct usb_port *port_dev;
        struct usb_hub *hub;

        if (!udev->parent) {
                /*
                 * root hub is only child (_ADR=0) under its parent, the HC.
                 * sysdev pointer is the HC as seen from firmware.
                 */
                adev = ACPI_COMPANION(udev->bus->sysdev);
                return acpi_find_child_device(adev, 0, false);
        }

        hub = usb_hub_to_struct_hub(udev->parent);
        if (!hub)
                return NULL;

        /*
         * This is an embedded USB device connected to a port and such
         * devices share port's ACPI companion.
         */
        port_dev = hub->ports[udev->portnum - 1];
        return usb_acpi_get_companion_for_port(port_dev);
}

static struct acpi_device *usb_acpi_find_companion(struct device *dev)
{
        /*
         * The USB hierarchy like following:
         *
         * Device (EHC1)
         *        Device (HUBN)
         *                Device (PR01)
         *                        Device (PR11)
         *                        Device (PR12)
         *                                Device (FN12)
         *                                Device (FN13)
         *                        Device (PR13)
         *                        ...
         * where HUBN is root hub, and PRNN are USB ports and devices
         * connected to them, and FNNN are individualk functions for
         * connected composite USB devices. PRNN and FNNN may contain
         * _CRS and other methods describing sideband resources for
         * the connected device.
         *
         * On the kernel side both root hub and embedded USB devices are
         * represented as instances of usb_device structure, and ports
         * are represented as usb_port structures, so the whole process
         * is split into 2 parts: finding companions for devices and
         * finding companions for ports.
         *
         * Note that we do not handle individual functions of composite
         * devices yet, for that we would need to assign companions to
         * devices corresponding to USB interfaces.
         */
        if (is_usb_device(dev))
                return usb_acpi_find_companion_for_device(to_usb_device(dev));
        else if (is_usb_port(dev))
                return usb_acpi_find_companion_for_port(to_usb_port(dev));

        return NULL;
}

static bool usb_acpi_bus_match(struct device *dev)
{
        return is_usb_device(dev) || is_usb_port(dev);
}

static struct acpi_bus_type usb_acpi_bus = {
        .name = "USB",
        .match = usb_acpi_bus_match,
        .find_companion = usb_acpi_find_companion,
};

int usb_acpi_register(void)
{
        return register_acpi_bus_type(&usb_acpi_bus);
}

void usb_acpi_unregister(void)
{
        unregister_acpi_bus_type(&usb_acpi_bus);
}








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  232 













































  232 

























































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
// SPDX-License-Identifier: GPL-2.0-only
/*
 * xsave/xrstor support.
 *
 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
 */
#include <linux/bitops.h>
#include <linux/compat.h>
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/nospec.h>
#include <linux/pkeys.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>

#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xcr.h>

#include <asm/tlbflush.h>
#include <asm/prctl.h>
#include <asm/elf.h>

#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"

#define for_each_extended_xfeature(bit, mask)                                \
        (bit) = FIRST_EXTENDED_XFEATURE;                                \
        for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))

/*
 * Although we spell it out in here, the Processor Trace
 * xfeature is completely unused.  We use other mechanisms
 * to save/restore PT state in Linux.
 */
static const char *xfeature_names[] =
{
        "x87 floating point registers",
        "SSE registers",
        "AVX registers",
        "MPX bounds registers",
        "MPX CSR",
        "AVX-512 opmask",
        "AVX-512 Hi256",
        "AVX-512 ZMM_Hi256",
        "Processor Trace (unused)",
        "Protection Keys User registers",
        "PASID state",
        "Control-flow User registers",
        "Control-flow Kernel registers (unused)",
        "unknown xstate feature",
        "unknown xstate feature",
        "unknown xstate feature",
        "unknown xstate feature",
        "AMX Tile config",
        "AMX Tile data",
        "unknown xstate feature",
};

static unsigned short xsave_cpuid_features[] __initdata = {
        [XFEATURE_FP]                                = X86_FEATURE_FPU,
        [XFEATURE_SSE]                                = X86_FEATURE_XMM,
        [XFEATURE_YMM]                                = X86_FEATURE_AVX,
        [XFEATURE_BNDREGS]                        = X86_FEATURE_MPX,
        [XFEATURE_BNDCSR]                        = X86_FEATURE_MPX,
        [XFEATURE_OPMASK]                        = X86_FEATURE_AVX512F,
        [XFEATURE_ZMM_Hi256]                        = X86_FEATURE_AVX512F,
        [XFEATURE_Hi16_ZMM]                        = X86_FEATURE_AVX512F,
        [XFEATURE_PT_UNIMPLEMENTED_SO_FAR]        = X86_FEATURE_INTEL_PT,
        [XFEATURE_PKRU]                                = X86_FEATURE_OSPKE,
        [XFEATURE_PASID]                        = X86_FEATURE_ENQCMD,
        [XFEATURE_CET_USER]                        = X86_FEATURE_SHSTK,
        [XFEATURE_XTILE_CFG]                        = X86_FEATURE_AMX_TILE,
        [XFEATURE_XTILE_DATA]                        = X86_FEATURE_AMX_TILE,
};

static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
        { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
        { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;

#define XSTATE_FLAG_SUPERVISOR        BIT(0)
#define XSTATE_FLAG_ALIGNED64        BIT(1)

/*
 * Return whether the system supports a given xfeature.
 *
 * Also return the name of the (most advanced) feature that the caller requested:
 */
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
        u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;

        if (unlikely(feature_name)) {
                long xfeature_idx, max_idx;
                u64 xfeatures_print;
                /*
                 * So we use FLS here to be able to print the most advanced
                 * feature that was requested but is missing. So if a driver
                 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
                 * missing AVX feature - this is the most informative message
                 * to users:
                 */
                if (xfeatures_missing)
                        xfeatures_print = xfeatures_missing;
                else
                        xfeatures_print = xfeatures_needed;

                xfeature_idx = fls64(xfeatures_print)-1;
                max_idx = ARRAY_SIZE(xfeature_names)-1;
                xfeature_idx = min(xfeature_idx, max_idx);

                *feature_name = xfeature_names[xfeature_idx];
        }

        if (xfeatures_missing)
                return 0;

        return 1;
}
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);

static bool xfeature_is_aligned64(int xfeature_nr)
{
        return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
}

static bool xfeature_is_supervisor(int xfeature_nr)
{
        return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
}

static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
{
        unsigned int offs, i;

        /*
         * Non-compacted format and legacy features use the cached fixed
         * offsets.
         */
        if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
            xfeature <= XFEATURE_SSE)
                return xstate_offsets[xfeature];

        /*
         * Compacted format offsets depend on the actual content of the
         * compacted xsave area which is determined by the xcomp_bv header
         * field.
         */
        offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
        for_each_extended_xfeature(i, xcomp_bv) {
                if (xfeature_is_aligned64(i))
                        offs = ALIGN(offs, 64);
                if (i == xfeature)
                        break;
                offs += xstate_sizes[i];
        }
        return offs;
}

/*
 * Enable the extended processor state save/restore feature.
 * Called once per CPU onlining.
 */
void fpu__init_cpu_xstate(void)
{
        if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
                return;

        cr4_set_bits(X86_CR4_OSXSAVE);

        /*
         * Must happen after CR4 setup and before xsetbv() to allow KVM
         * lazy passthrough.  Write independent of the dynamic state static
         * key as that does not work on the boot CPU. This also ensures
         * that any stale state is wiped out from XFD. Reset the per CPU
         * xfd cache too.
         */
        if (cpu_feature_enabled(X86_FEATURE_XFD))
                xfd_set_state(init_fpstate.xfd);

        /*
         * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
         * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
         * states can be set here.
         */
        xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);

        /*
         * MSR_IA32_XSS sets supervisor states managed by XSAVES.
         */
        if (boot_cpu_has(X86_FEATURE_XSAVES)) {
                wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
                                     xfeatures_mask_independent());
        }
}

static bool xfeature_enabled(enum xfeature xfeature)
{
        return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}

/*
 * Record the offsets and sizes of various xstates contained
 * in the XSAVE state memory layout.
 */
static void __init setup_xstate_cache(void)
{
        u32 eax, ebx, ecx, edx, i;
        /* start at the beginning of the "extended state" */
        unsigned int last_good_offset = offsetof(struct xregs_state,
                                                 extended_state_area);
        /*
         * The FP xstates and SSE xstates are legacy states. They are always
         * in the fixed offsets in the xsave area in either compacted form
         * or standard form.
         */
        xstate_offsets[XFEATURE_FP]        = 0;
        xstate_sizes[XFEATURE_FP]        = offsetof(struct fxregs_state,
                                                   xmm_space);

        xstate_offsets[XFEATURE_SSE]        = xstate_sizes[XFEATURE_FP];
        xstate_sizes[XFEATURE_SSE]        = sizeof_field(struct fxregs_state,
                                                       xmm_space);

        for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
                cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);

                xstate_sizes[i] = eax;
                xstate_flags[i] = ecx;

                /*
                 * If an xfeature is supervisor state, the offset in EBX is
                 * invalid, leave it to -1.
                 */
                if (xfeature_is_supervisor(i))
                        continue;

                xstate_offsets[i] = ebx;

                /*
                 * In our xstate size checks, we assume that the highest-numbered
                 * xstate feature has the highest offset in the buffer.  Ensure
                 * it does.
                 */
                WARN_ONCE(last_good_offset > xstate_offsets[i],
                          "x86/fpu: misordered xstate at %d\n", last_good_offset);

                last_good_offset = xstate_offsets[i];
        }
}

static void __init print_xstate_feature(u64 xstate_mask)
{
        const char *feature_name;

        if (cpu_has_xfeatures(xstate_mask, &feature_name))
                pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
}

/*
 * Print out all the supported xstate features:
 */
static void __init print_xstate_features(void)
{
        print_xstate_feature(XFEATURE_MASK_FP);
        print_xstate_feature(XFEATURE_MASK_SSE);
        print_xstate_feature(XFEATURE_MASK_YMM);
        print_xstate_feature(XFEATURE_MASK_BNDREGS);
        print_xstate_feature(XFEATURE_MASK_BNDCSR);
        print_xstate_feature(XFEATURE_MASK_OPMASK);
        print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
        print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
        print_xstate_feature(XFEATURE_MASK_PKRU);
        print_xstate_feature(XFEATURE_MASK_PASID);
        print_xstate_feature(XFEATURE_MASK_CET_USER);
        print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
        print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
}

/*
 * This check is important because it is easy to get XSTATE_*
 * confused with XSTATE_BIT_*.
 */
#define CHECK_XFEATURE(nr) do {                \
        WARN_ON(nr < FIRST_EXTENDED_XFEATURE);        \
        WARN_ON(nr >= XFEATURE_MAX);        \
} while (0)

/*
 * Print out xstate component offsets and sizes
 */
static void __init print_xstate_offset_size(void)
{
        int i;

        for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
                pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
                        i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
                        i, xstate_sizes[i]);
        }
}

/*
 * This function is called only during boot time when x86 caps are not set
 * up and alternative can not be used yet.
 */
static __init void os_xrstor_booting(struct xregs_state *xstate)
{
        u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
        u32 lmask = mask;
        u32 hmask = mask >> 32;
        int err;

        if (cpu_feature_enabled(X86_FEATURE_XSAVES))
                XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
        else
                XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);

        /*
         * We should never fault when copying from a kernel buffer, and the FPU
         * state we set at boot time should be valid.
         */
        WARN_ON_FPU(err);
}

/*
 * All supported features have either init state all zeros or are
 * handled in setup_init_fpu() individually. This is an explicit
 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
 * newly added supported features at build time and make people
 * actually look at the init state for the new feature.
 */
#define XFEATURES_INIT_FPSTATE_HANDLED                \
        (XFEATURE_MASK_FP |                        \
         XFEATURE_MASK_SSE |                        \
         XFEATURE_MASK_YMM |                        \
         XFEATURE_MASK_OPMASK |                        \
         XFEATURE_MASK_ZMM_Hi256 |                \
         XFEATURE_MASK_Hi16_ZMM         |                \
         XFEATURE_MASK_PKRU |                        \
         XFEATURE_MASK_BNDREGS |                \
         XFEATURE_MASK_BNDCSR |                        \
         XFEATURE_MASK_PASID |                        \
         XFEATURE_MASK_CET_USER |                \
         XFEATURE_MASK_XTILE)

/*
 * setup the xstate image representing the init state
 */
static void __init setup_init_fpu_buf(void)
{
        BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
                      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
                     XFEATURES_INIT_FPSTATE_HANDLED);

        if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return;

        print_xstate_features();

        xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);

        /*
         * Init all the features state with header.xfeatures being 0x0
         */
        os_xrstor_booting(&init_fpstate.regs.xsave);

        /*
         * All components are now in init state. Read the state back so
         * that init_fpstate contains all non-zero init state. This only
         * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
         * those use the init optimization which skips writing data for
         * components in init state.
         *
         * XSAVE could be used, but that would require to reshuffle the
         * data when XSAVEC/S is available because XSAVEC/S uses xstate
         * compaction. But doing so is a pointless exercise because most
         * components have an all zeros init state except for the legacy
         * ones (FP and SSE). Those can be saved with FXSAVE into the
         * legacy area. Adding new features requires to ensure that init
         * state is all zeroes or if not to add the necessary handling
         * here.
         */
        fxsave(&init_fpstate.regs.fxsave);
}

int xfeature_size(int xfeature_nr)
{
        u32 eax, ebx, ecx, edx;

        CHECK_XFEATURE(xfeature_nr);
        cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
        return eax;
}

/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
static int validate_user_xstate_header(const struct xstate_header *hdr,
                                       struct fpstate *fpstate)
{
        /* No unknown or supervisor features may be set */
        if (hdr->xfeatures & ~fpstate->user_xfeatures)
                return -EINVAL;

        /* Userspace must use the uncompacted format */
        if (hdr->xcomp_bv)
                return -EINVAL;

        /*
         * If 'reserved' is shrunken to add a new field, make sure to validate
         * that new field here!
         */
        BUILD_BUG_ON(sizeof(hdr->reserved) != 48);

        /* No reserved bits may be set */
        if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
                return -EINVAL;

        return 0;
}

static void __init __xstate_dump_leaves(void)
{
        int i;
        u32 eax, ebx, ecx, edx;
        static int should_dump = 1;

        if (!should_dump)
                return;
        should_dump = 0;
        /*
         * Dump out a few leaves past the ones that we support
         * just in case there are some goodies up there
         */
        for (i = 0; i < XFEATURE_MAX + 10; i++) {
                cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
                pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
                        XSTATE_CPUID, i, eax, ebx, ecx, edx);
        }
}

#define XSTATE_WARN_ON(x, fmt, ...) do {                                        \
        if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {        \
                __xstate_dump_leaves();                                                \
        }                                                                        \
} while (0)

#define XCHECK_SZ(sz, nr, __struct) ({                                        \
        if (WARN_ONCE(sz != sizeof(__struct),                                \
            "[%s]: struct is %zu bytes, cpu state %d bytes\n",                \
            xfeature_names[nr], sizeof(__struct), sz)) {                \
                __xstate_dump_leaves();                                        \
        }                                                                \
        true;                                                                \
})


/**
 * check_xtile_data_against_struct - Check tile data state size.
 *
 * Calculate the state size by multiplying the single tile size which is
 * recorded in a C struct, and the number of tiles that the CPU informs.
 * Compare the provided size with the calculation.
 *
 * @size:        The tile data state size
 *
 * Returns:        0 on success, -EINVAL on mismatch.
 */
static int __init check_xtile_data_against_struct(int size)
{
        u32 max_palid, palid, state_size;
        u32 eax, ebx, ecx, edx;
        u16 max_tile;

        /*
         * Check the maximum palette id:
         *   eax: the highest numbered palette subleaf.
         */
        cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);

        /*
         * Cross-check each tile size and find the maximum number of
         * supported tiles.
         */
        for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
                u16 tile_size, max;

                /*
                 * Check the tile size info:
                 *   eax[31:16]:  bytes per title
                 *   ebx[31:16]:  the max names (or max number of tiles)
                 */
                cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
                tile_size = eax >> 16;
                max = ebx >> 16;

                if (tile_size != sizeof(struct xtile_data)) {
                        pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
                               __stringify(XFEATURE_XTILE_DATA),
                               sizeof(struct xtile_data), tile_size);
                        __xstate_dump_leaves();
                        return -EINVAL;
                }

                if (max > max_tile)
                        max_tile = max;
        }

        state_size = sizeof(struct xtile_data) * max_tile;
        if (size != state_size) {
                pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
                       __stringify(XFEATURE_XTILE_DATA), state_size, size);
                __xstate_dump_leaves();
                return -EINVAL;
        }
        return 0;
}

/*
 * We have a C struct for each 'xstate'.  We need to ensure
 * that our software representation matches what the CPU
 * tells us about the state's size.
 */
static bool __init check_xstate_against_struct(int nr)
{
        /*
         * Ask the CPU for the size of the state.
         */
        int sz = xfeature_size(nr);

        /*
         * Match each CPU state with the corresponding software
         * structure.
         */
        switch (nr) {
        case XFEATURE_YMM:          return XCHECK_SZ(sz, nr, struct ymmh_struct);
        case XFEATURE_BNDREGS:          return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
        case XFEATURE_BNDCSR:          return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
        case XFEATURE_OPMASK:          return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
        case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
        case XFEATURE_Hi16_ZMM:          return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
        case XFEATURE_PKRU:          return XCHECK_SZ(sz, nr, struct pkru_state);
        case XFEATURE_PASID:          return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
        case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
        case XFEATURE_CET_USER:          return XCHECK_SZ(sz, nr, struct cet_user_state);
        case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
        default:
                XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
                return false;
        }

        return true;
}

static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
        unsigned int topmost = fls64(xfeatures) -  1;
        unsigned int offset = xstate_offsets[topmost];

        if (topmost <= XFEATURE_SSE)
                return sizeof(struct xregs_state);

        if (compacted)
                offset = xfeature_get_offset(xfeatures, topmost);
        return offset + xstate_sizes[topmost];
}

/*
 * This essentially double-checks what the cpu told us about
 * how large the XSAVE buffer needs to be.  We are recalculating
 * it to be safe.
 *
 * Independent XSAVE features allocate their own buffers and are not
 * covered by these checks. Only the size of the buffer for task->fpu
 * is checked here.
 */
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{
        bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
        bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
        unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
        int i;

        for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
                if (!check_xstate_against_struct(i))
                        return false;
                /*
                 * Supervisor state components can be managed only by
                 * XSAVES.
                 */
                if (!xsaves && xfeature_is_supervisor(i)) {
                        XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
                        return false;
                }
        }
        size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
        XSTATE_WARN_ON(size != kernel_size,
                       "size %u != kernel_size %u\n", size, kernel_size);
        return size == kernel_size;
}

/*
 * Get total size of enabled xstates in XCR0 | IA32_XSS.
 *
 * Note the SDM's wording here.  "sub-function 0" only enumerates
 * the size of the *user* states.  If we use it to size a buffer
 * that we use 'XSAVES' on, we could potentially overflow the
 * buffer because 'XSAVES' saves system states too.
 *
 * This also takes compaction into account. So this works for
 * XSAVEC as well.
 */
static unsigned int __init get_compacted_size(void)
{
        unsigned int eax, ebx, ecx, edx;
        /*
         * - CPUID function 0DH, sub-function 1:
         *    EBX enumerates the size (in bytes) required by
         *    the XSAVES instruction for an XSAVE area
         *    containing all the state components
         *    corresponding to bits currently set in
         *    XCR0 | IA32_XSS.
         *
         * When XSAVES is not available but XSAVEC is (virt), then there
         * are no supervisor states, but XSAVEC still uses compacted
         * format.
         */
        cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
        return ebx;
}

/*
 * Get the total size of the enabled xstates without the independent supervisor
 * features.
 */
static unsigned int __init get_xsave_compacted_size(void)
{
        u64 mask = xfeatures_mask_independent();
        unsigned int size;

        if (!mask)
                return get_compacted_size();

        /* Disable independent features. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());

        /*
         * Ask the hardware what size is required of the buffer.
         * This is the size required for the task->fpu buffer.
         */
        size = get_compacted_size();

        /* Re-enable independent features so XSAVES will work on them again. */
        wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);

        return size;
}

static unsigned int __init get_xsave_size_user(void)
{
        unsigned int eax, ebx, ecx, edx;
        /*
         * - CPUID function 0DH, sub-function 0:
         *    EBX enumerates the size (in bytes) required by
         *    the XSAVE instruction for an XSAVE area
         *    containing all the *user* state components
         *    corresponding to bits currently set in XCR0.
         */
        cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
        return ebx;
}

static int __init init_xstate_size(void)
{
        /* Recompute the context size for enabled features: */
        unsigned int user_size, kernel_size, kernel_default_size;
        bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);

        /* Uncompacted user space size */
        user_size = get_xsave_size_user();

        /*
         * XSAVES kernel size includes supervisor states and uses compacted
         * format. XSAVEC uses compacted format, but does not save
         * supervisor states.
         *
         * XSAVE[OPT] do not support supervisor states so kernel and user
         * size is identical.
         */
        if (compacted)
                kernel_size = get_xsave_compacted_size();
        else
                kernel_size = user_size;

        kernel_default_size =
                xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);

        if (!paranoid_xstate_size_valid(kernel_size))
                return -EINVAL;

        fpu_kernel_cfg.max_size = kernel_size;
        fpu_user_cfg.max_size = user_size;

        fpu_kernel_cfg.default_size = kernel_default_size;
        fpu_user_cfg.default_size =
                xstate_calculate_size(fpu_user_cfg.default_features, false);

        return 0;
}

/*
 * We enabled the XSAVE hardware, but something went wrong and
 * we can not use it.  Disable it.
 */
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
        fpu_kernel_cfg.max_features = 0;
        cr4_clear_bits(X86_CR4_OSXSAVE);
        setup_clear_cpu_cap(X86_FEATURE_XSAVE);

        /* Restore the legacy size.*/
        fpu_kernel_cfg.max_size = legacy_size;
        fpu_kernel_cfg.default_size = legacy_size;
        fpu_user_cfg.max_size = legacy_size;
        fpu_user_cfg.default_size = legacy_size;

        /*
         * Prevent enabling the static branch which enables writes to the
         * XFD MSR.
         */
        init_fpstate.xfd = 0;

        fpstate_reset(&current->thread.fpu);
}

/*
 * Enable and initialize the xsave feature.
 * Called once per system bootup.
 */
void __init fpu__init_system_xstate(unsigned int legacy_size)
{
        unsigned int eax, ebx, ecx, edx;
        u64 xfeatures;
        int err;
        int i;

        if (!boot_cpu_has(X86_FEATURE_FPU)) {
                pr_info("x86/fpu: No FPU detected\n");
                return;
        }

        if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
                pr_info("x86/fpu: x87 FPU will use %s\n",
                        boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
                return;
        }

        if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
                WARN_ON_FPU(1);
                return;
        }

        /*
         * Find user xstates supported by the processor.
         */
        cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
        fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);

        /*
         * Find supervisor xstates supported by the processor.
         */
        cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
        fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);

        if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
                /*
                 * This indicates that something really unexpected happened
                 * with the enumeration.  Disable XSAVE and try to continue
                 * booting without it.  This is too early to BUG().
                 */
                pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
                       fpu_kernel_cfg.max_features);
                goto out_disable;
        }

        /*
         * Clear XSAVE features that are disabled in the normal CPUID.
         */
        for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
                unsigned short cid = xsave_cpuid_features[i];

                /* Careful: X86_FEATURE_FPU is 0! */
                if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
                        fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
        }

        if (!cpu_feature_enabled(X86_FEATURE_XFD))
                fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;

        if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
                fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
        else
                fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
                                        XFEATURE_MASK_SUPERVISOR_SUPPORTED;

        fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
        fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;

        /* Clean out dynamic features from default */
        fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
        fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;

        fpu_user_cfg.default_features = fpu_user_cfg.max_features;
        fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;

        /* Store it for paranoia check at the end */
        xfeatures = fpu_kernel_cfg.max_features;

        /*
         * Initialize the default XFD state in initfp_state and enable the
         * dynamic sizing mechanism if dynamic states are available.  The
         * static key cannot be enabled here because this runs before
         * jump_label_init(). This is delayed to an initcall.
         */
        init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;

        /* Set up compaction feature bit */
        if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
            cpu_feature_enabled(X86_FEATURE_XSAVES))
                setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);

        /* Enable xstate instructions to be able to continue with initialization: */
        fpu__init_cpu_xstate();

        /* Cache size, offset and flags for initialization */
        setup_xstate_cache();

        err = init_xstate_size();
        if (err)
                goto out_disable;

        /* Reset the state for the current task */
        fpstate_reset(&current->thread.fpu);

        /*
         * Update info used for ptrace frames; use standard-format size and no
         * supervisor xstates:
         */
        update_regset_xstate_info(fpu_user_cfg.max_size,
                                  fpu_user_cfg.max_features);

        /*
         * init_fpstate excludes dynamic states as they are large but init
         * state is zero.
         */
        init_fpstate.size                = fpu_kernel_cfg.default_size;
        init_fpstate.xfeatures                = fpu_kernel_cfg.default_features;

        if (init_fpstate.size > sizeof(init_fpstate.regs)) {
                pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
                        sizeof(init_fpstate.regs), init_fpstate.size);
                goto out_disable;
        }

        setup_init_fpu_buf();

        /*
         * Paranoia check whether something in the setup modified the
         * xfeatures mask.
         */
        if (xfeatures != fpu_kernel_cfg.max_features) {
                pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
                       xfeatures, fpu_kernel_cfg.max_features);
                goto out_disable;
        }

        /*
         * CPU capabilities initialization runs before FPU init. So
         * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
         * functional, set the feature bit so depending code works.
         */
        setup_force_cpu_cap(X86_FEATURE_OSXSAVE);

        print_xstate_offset_size();
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                fpu_kernel_cfg.max_features,
                fpu_kernel_cfg.max_size,
                boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
        return;

out_disable:
        /* something went wrong, try to boot without any XSAVE support */
        fpu__init_disable_system_xstate(legacy_size);
}

/*
 * Restore minimal FPU state after suspend:
 */
void fpu__resume_cpu(void)
{
        /*
         * Restore XCR0 on xsave capable CPUs:
         */
        if (cpu_feature_enabled(X86_FEATURE_XSAVE))
                xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);

        /*
         * Restore IA32_XSS. The same CPUID bit enumerates support
         * of XSAVES and MSR_IA32_XSS.
         */
        if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
                wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
                                     xfeatures_mask_independent());
        }

        if (fpu_state_size_dynamic())
                wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
}

/*
 * Given an xstate feature nr, calculate where in the xsave
 * buffer the state is.  Callers should ensure that the buffer
 * is valid.
 */
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
        u64 xcomp_bv = xsave->header.xcomp_bv;

        if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
                return NULL;

        if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
                if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
                        return NULL;
        }

        return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
}

/*
 * Given the xsave area and a state inside, this function returns the
 * address of the state.
 *
 * This is the API that is called to get xstate address in either
 * standard format or compacted format of xsave area.
 *
 * Note that if there is no data for the field in the xsave buffer
 * this will return NULL.
 *
 * Inputs:
 *        xstate: the thread's storage area for all FPU data
 *        xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
 *        XFEATURE_SSE, etc...)
 * Output:
 *        address of the state in the xsave area, or NULL if the
 *        field is not present in the xsave buffer.
 */
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
        /*
         * Do we even *have* xsave state?
         */
        if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return NULL;

        /*
         * We should not ever be requesting features that we
         * have not enabled.
         */
        if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
                return NULL;

        /*
         * This assumes the last 'xsave*' instruction to
         * have requested that 'xfeature_nr' be saved.
         * If it did not, we might be seeing and old value
         * of the field in the buffer.
         *
         * This can happen because the last 'xsave' did not
         * request that this feature be saved (unlikely)
         * or because the "init optimization" caused it
         * to not be saved.
         */
        if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
                return NULL;

        return __raw_xsave_addr(xsave, xfeature_nr);
}

#ifdef CONFIG_ARCH_HAS_PKEYS

/*
 * This will go out and modify PKRU register to set the access
 * rights for @pkey to @init_val.
 */
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
                              unsigned long init_val)
{
        u32 old_pkru, new_pkru_bits = 0;
        int pkey_shift;

        /*
         * This check implies XSAVE support.  OSPKE only gets
         * set if we enable XSAVE and we enable PKU in XCR0.
         */
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return -EINVAL;

        /*
         * This code should only be called with valid 'pkey'
         * values originating from in-kernel users.  Complain
         * if a bad value is observed.
         */
        if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
                return -EINVAL;

        /* Set the bits we need in PKRU:  */
        if (init_val & PKEY_DISABLE_ACCESS)
                new_pkru_bits |= PKRU_AD_BIT;
        if (init_val & PKEY_DISABLE_WRITE)
                new_pkru_bits |= PKRU_WD_BIT;

        /* Shift the bits in to the correct place in PKRU for pkey: */
        pkey_shift = pkey * PKRU_BITS_PER_PKEY;
        new_pkru_bits <<= pkey_shift;

        /* Get old PKRU and mask off any old bits in place: */
        old_pkru = read_pkru();
        old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);

        /* Write old part along with new part: */
        write_pkru(old_pkru | new_pkru_bits);

        return 0;
}
#endif /* ! CONFIG_ARCH_HAS_PKEYS */

static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
                         void *init_xstate, unsigned int size)
{
        membuf_write(to, from_xstate ? xstate : init_xstate, size);
}

/**
 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
 * @to:                membuf descriptor
 * @fpstate:        The fpstate buffer from which to copy
 * @xfeatures:        The mask of xfeatures to save (XSAVE mode only)
 * @pkru_val:        The PKRU value to store in the PKRU component
 * @copy_mode:        The requested copy mode
 *
 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
 * format, i.e. from the kernel internal hardware dependent storage format
 * to the requested @mode. UABI XSTATE is always uncompacted!
 *
 * It supports partial copy but @to.pos always starts from zero.
 */
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
                               u64 xfeatures, u32 pkru_val,
                               enum xstate_copy_mode copy_mode)
{
        const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
        struct xregs_state *xinit = &init_fpstate.regs.xsave;
        struct xregs_state *xsave = &fpstate->regs.xsave;
        struct xstate_header header;
        unsigned int zerofrom;
        u64 mask;
        int i;

        memset(&header, 0, sizeof(header));
        header.xfeatures = xsave->header.xfeatures;

        /* Mask out the feature bits depending on copy mode */
        switch (copy_mode) {
        case XSTATE_COPY_FP:
                header.xfeatures &= XFEATURE_MASK_FP;
                break;

        case XSTATE_COPY_FX:
                header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
                break;

        case XSTATE_COPY_XSAVE:
                header.xfeatures &= fpstate->user_xfeatures & xfeatures;
                break;
        }

        /* Copy FP state up to MXCSR */
        copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
                     &xinit->i387, off_mxcsr);

        /* Copy MXCSR when SSE or YMM are set in the feature mask */
        copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
                     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
                     MXCSR_AND_FLAGS_SIZE);

        /* Copy the remaining FP state */
        copy_feature(header.xfeatures & XFEATURE_MASK_FP,
                     &to, &xsave->i387.st_space, &xinit->i387.st_space,
                     sizeof(xsave->i387.st_space));

        /* Copy the SSE state - shared with YMM, but independently managed */
        copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
                     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
                     sizeof(xsave->i387.xmm_space));

        if (copy_mode != XSTATE_COPY_XSAVE)
                goto out;

        /* Zero the padding area */
        membuf_zero(&to, sizeof(xsave->i387.padding));

        /* Copy xsave->i387.sw_reserved */
        membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));

        /* Copy the user space relevant state of @xsave->header */
        membuf_write(&to, &header, sizeof(header));

        zerofrom = offsetof(struct xregs_state, extended_state_area);

        /*
         * This 'mask' indicates which states to copy from fpstate.
         * Those extended states that are not present in fpstate are
         * either disabled or initialized:
         *
         * In non-compacted format, disabled features still occupy
         * state space but there is no state to copy from in the
         * compacted init_fpstate. The gap tracking will zero these
         * states.
         *
         * The extended features have an all zeroes init state. Thus,
         * remove them from 'mask' to zero those features in the user
         * buffer instead of retrieving them from init_fpstate.
         */
        mask = header.xfeatures;

        for_each_extended_xfeature(i, mask) {
                /*
                 * If there was a feature or alignment gap, zero the space
                 * in the destination buffer.
                 */
                if (zerofrom < xstate_offsets[i])
                        membuf_zero(&to, xstate_offsets[i] - zerofrom);

                if (i == XFEATURE_PKRU) {
                        struct pkru_state pkru = {0};
                        /*
                         * PKRU is not necessarily up to date in the
                         * XSAVE buffer. Use the provided value.
                         */
                        pkru.pkru = pkru_val;
                        membuf_write(&to, &pkru, sizeof(pkru));
                } else {
                        membuf_write(&to,
                                     __raw_xsave_addr(xsave, i),
                                     xstate_sizes[i]);
                }
                /*
                 * Keep track of the last copied state in the non-compacted
                 * target buffer for gap zeroing.
                 */
                zerofrom = xstate_offsets[i] + xstate_sizes[i];
        }

out:
        if (to.left)
                membuf_zero(&to, to.left);
}

/**
 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
 * @to:                membuf descriptor
 * @tsk:        The task from which to copy the saved xstate
 * @copy_mode:        The requested copy mode
 *
 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
 * format, i.e. from the kernel internal hardware dependent storage format
 * to the requested @mode. UABI XSTATE is always uncompacted!
 *
 * It supports partial copy but @to.pos always starts from zero.
 */
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
                             enum xstate_copy_mode copy_mode)
{
        __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
                                  tsk->thread.fpu.fpstate->user_xfeatures,
                                  tsk->thread.pkru, copy_mode);
}

static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
                            const void *kbuf, const void __user *ubuf)
{
        if (kbuf) {
                memcpy(dst, kbuf + offset, size);
        } else {
                if (copy_from_user(dst, ubuf + offset, size))
                        return -EFAULT;
        }
        return 0;
}


/**
 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
 * @fpstate:        The fpstate buffer to copy to
 * @kbuf:        The UABI format buffer, if it comes from the kernel
 * @ubuf:        The UABI format buffer, if it comes from userspace
 * @pkru:        The location to write the PKRU value to
 *
 * Converts from the UABI format into the kernel internal hardware
 * dependent format.
 *
 * This function ultimately has three different callers with distinct PKRU
 * behavior.
 * 1.        When called from sigreturn the PKRU register will be restored from
 *        @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
 *        @fpstate is sufficient to cover this case, but the caller will also
 *        pass a pointer to the thread_struct's pkru field in @pkru and updating
 *        it is harmless.
 * 2.        When called from ptrace the PKRU register will be restored from the
 *        thread_struct's pkru field. A pointer to that is passed in @pkru.
 *        The kernel will restore it manually, so the XRSTOR behavior that resets
 *        the PKRU register to the hardware init value (0) if the corresponding
 *        xfeatures bit is not set is emulated here.
 * 3.        When called from KVM the PKRU register will be restored from the vcpu's
 *        pkru field. A pointer to that is passed in @pkru. KVM hasn't used
 *        XRSTOR and hasn't had the PKRU resetting behavior described above. To
 *        preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
 *        bit is not set.
 */
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
                               const void __user *ubuf, u32 *pkru)
{
        struct xregs_state *xsave = &fpstate->regs.xsave;
        unsigned int offset, size;
        struct xstate_header hdr;
        u64 mask;
        int i;

        offset = offsetof(struct xregs_state, header);
        if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
                return -EFAULT;

        if (validate_user_xstate_header(&hdr, fpstate))
                return -EINVAL;

        /* Validate MXCSR when any of the related features is in use */
        mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
        if (hdr.xfeatures & mask) {
                u32 mxcsr[2];

                offset = offsetof(struct fxregs_state, mxcsr);
                if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
                        return -EFAULT;

                /* Reserved bits in MXCSR must be zero. */
                if (mxcsr[0] & ~mxcsr_feature_mask)
                        return -EINVAL;

                /* SSE and YMM require MXCSR even when FP is not in use. */
                if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
                        xsave->i387.mxcsr = mxcsr[0];
                        xsave->i387.mxcsr_mask = mxcsr[1];
                }
        }

        for (i = 0; i < XFEATURE_MAX; i++) {
                mask = BIT_ULL(i);

                if (hdr.xfeatures & mask) {
                        void *dst = __raw_xsave_addr(xsave, i);

                        offset = xstate_offsets[i];
                        size = xstate_sizes[i];

                        if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
                                return -EFAULT;
                }
        }

        if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
                struct pkru_state *xpkru;

                xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
                *pkru = xpkru->pkru;
        } else {
                /*
                 * KVM may pass NULL here to indicate that it does not need
                 * PKRU updated.
                 */
                if (pkru)
                        *pkru = 0;
        }

        /*
         * The state that came in from userspace was user-state only.
         * Mask all the user states out of 'xfeatures':
         */
        xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;

        /*
         * Add back in the features that came in from userspace:
         */
        xsave->header.xfeatures |= hdr.xfeatures;

        return 0;
}

/*
 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
 * format and copy to the target thread. Used by ptrace and KVM.
 */
int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
        return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}

/*
 * Convert from a sigreturn standard-format user-space buffer to kernel
 * XSAVE[S] format and copy to the target thread. This is called from the
 * sigreturn() and rt_sigreturn() system calls.
 */
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
                                      const void __user *ubuf)
{
        return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}

static bool validate_independent_components(u64 mask)
{
        u64 xchk;

        if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
                return false;

        xchk = ~xfeatures_mask_independent();

        if (WARN_ON_ONCE(!mask || mask & xchk))
                return false;

        return true;
}

/**
 * xsaves - Save selected components to a kernel xstate buffer
 * @xstate:        Pointer to the buffer
 * @mask:        Feature mask to select the components to save
 *
 * The @xstate buffer must be 64 byte aligned and correctly initialized as
 * XSAVES does not write the full xstate header. Before first use the
 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
 * can #GP.
 *
 * The feature mask must be a subset of the independent features.
 */
void xsaves(struct xregs_state *xstate, u64 mask)
{
        int err;

        if (!validate_independent_components(mask))
                return;

        XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
        WARN_ON_ONCE(err);
}

/**
 * xrstors - Restore selected components from a kernel xstate buffer
 * @xstate:        Pointer to the buffer
 * @mask:        Feature mask to select the components to restore
 *
 * The @xstate buffer must be 64 byte aligned and correctly initialized
 * otherwise XRSTORS from that buffer can #GP.
 *
 * Proper usage is to restore the state which was saved with
 * xsaves() into @xstate.
 *
 * The feature mask must be a subset of the independent features.
 */
void xrstors(struct xregs_state *xstate, u64 mask)
{
        int err;

        if (!validate_independent_components(mask))
                return;

        XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
        WARN_ON_ONCE(err);
}

#if IS_ENABLED(CONFIG_KVM)
void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
{
        void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);

        if (addr)
                memset(addr, 0, xstate_sizes[xfeature]);
}
EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
#endif

#ifdef CONFIG_X86_64

#ifdef CONFIG_X86_DEBUG_FPU
/*
 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
 * can safely operate on the @fpstate buffer.
 */
static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
{
        u64 xfd = __this_cpu_read(xfd_state);

        if (fpstate->xfd == xfd)
                return true;

         /*
          * The XFD MSR does not match fpstate->xfd. That's invalid when
          * the passed in fpstate is current's fpstate.
          */
        if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
                return false;

        /*
         * XRSTOR(S) from init_fpstate are always correct as it will just
         * bring all components into init state and not read from the
         * buffer. XSAVE(S) raises #PF after init.
         */
        if (fpstate == &init_fpstate)
                return rstor;

        /*
         * XSAVE(S): clone(), fpu_swap_kvm_fpu()
         * XRSTORS(S): fpu_swap_kvm_fpu()
         */

        /*
         * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
         * the buffer area for XFD-disabled state components.
         */
        mask &= ~xfd;

        /*
         * Remove features which are valid in fpstate. They
         * have space allocated in fpstate.
         */
        mask &= ~fpstate->xfeatures;

        /*
         * Any remaining state components in 'mask' might be written
         * by XSAVE/XRSTOR. Fail validation it found.
         */
        return !mask;
}

void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
{
        WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
}
#endif /* CONFIG_X86_DEBUG_FPU */

static int __init xfd_update_static_branch(void)
{
        /*
         * If init_fpstate.xfd has bits set then dynamic features are
         * available and the dynamic sizing must be enabled.
         */
        if (init_fpstate.xfd)
                static_branch_enable(&__fpu_state_size_dynamic);
        return 0;
}
arch_initcall(xfd_update_static_branch)

void fpstate_free(struct fpu *fpu)
{
        if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
                vfree(fpu->fpstate);
}

/**
 * fpstate_realloc - Reallocate struct fpstate for the requested new features
 *
 * @xfeatures:        A bitmap of xstate features which extend the enabled features
 *                of that task
 * @ksize:        The required size for the kernel buffer
 * @usize:        The required size for user space buffers
 * @guest_fpu:        Pointer to a guest FPU container. NULL for host allocations
 *
 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
 * with large states are likely to live longer.
 *
 * Returns: 0 on success, -ENOMEM on allocation error.
 */
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
                           unsigned int usize, struct fpu_guest *guest_fpu)
{
        struct fpu *fpu = &current->thread.fpu;
        struct fpstate *curfps, *newfps = NULL;
        unsigned int fpsize;
        bool in_use;

        fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);

        newfps = vzalloc(fpsize);
        if (!newfps)
                return -ENOMEM;
        newfps->size = ksize;
        newfps->user_size = usize;
        newfps->is_valloc = true;

        /*
         * When a guest FPU is supplied, use @guest_fpu->fpstate
         * as reference independent whether it is in use or not.
         */
        curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;

        /* Determine whether @curfps is the active fpstate */
        in_use = fpu->fpstate == curfps;

        if (guest_fpu) {
                newfps->is_guest = true;
                newfps->is_confidential = curfps->is_confidential;
                newfps->in_use = curfps->in_use;
                guest_fpu->xfeatures |= xfeatures;
                guest_fpu->uabi_size = usize;
        }

        fpregs_lock();
        /*
         * If @curfps is in use, ensure that the current state is in the
         * registers before swapping fpstate as that might invalidate it
         * due to layout changes.
         */
        if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
                fpregs_restore_userregs();

        newfps->xfeatures = curfps->xfeatures | xfeatures;
        newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
        newfps->xfd = curfps->xfd & ~xfeatures;

        /* Do the final updates within the locked region */
        xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);

        if (guest_fpu) {
                guest_fpu->fpstate = newfps;
                /* If curfps is active, update the FPU fpstate pointer */
                if (in_use)
                        fpu->fpstate = newfps;
        } else {
                fpu->fpstate = newfps;
        }

        if (in_use)
                xfd_update_state(fpu->fpstate);
        fpregs_unlock();

        /* Only free valloc'ed state */
        if (curfps && curfps->is_valloc)
                vfree(curfps);

        return 0;
}

static int validate_sigaltstack(unsigned int usize)
{
        struct task_struct *thread, *leader = current->group_leader;
        unsigned long framesize = get_sigframe_size();

        lockdep_assert_held(&current->sighand->siglock);

        /* get_sigframe_size() is based on fpu_user_cfg.max_size */
        framesize -= fpu_user_cfg.max_size;
        framesize += usize;
        for_each_thread(leader, thread) {
                if (thread->sas_ss_size && thread->sas_ss_size < framesize)
                        return -ENOSPC;
        }
        return 0;
}

static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
{
        /*
         * This deliberately does not exclude !XSAVES as we still might
         * decide to optionally context switch XCR0 or talk the silicon
         * vendors into extending XFD for the pre AMX states, especially
         * AVX512.
         */
        bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
        struct fpu *fpu = &current->group_leader->thread.fpu;
        struct fpu_state_perm *perm;
        unsigned int ksize, usize;
        u64 mask;
        int ret = 0;

        /* Check whether fully enabled */
        if ((permitted & requested) == requested)
                return 0;

        /* Calculate the resulting kernel state size */
        mask = permitted | requested;
        /* Take supervisor states into account on the host */
        if (!guest)
                mask |= xfeatures_mask_supervisor();
        ksize = xstate_calculate_size(mask, compacted);

        /* Calculate the resulting user state size */
        mask &= XFEATURE_MASK_USER_SUPPORTED;
        usize = xstate_calculate_size(mask, false);

        if (!guest) {
                ret = validate_sigaltstack(usize);
                if (ret)
                        return ret;
        }

        perm = guest ? &fpu->guest_perm : &fpu->perm;
        /* Pairs with the READ_ONCE() in xstate_get_group_perm() */
        WRITE_ONCE(perm->__state_perm, mask);
        /* Protected by sighand lock */
        perm->__state_size = ksize;
        perm->__user_state_size = usize;
        return ret;
}

/*
 * Permissions array to map facilities with more than one component
 */
static const u64 xstate_prctl_req[XFEATURE_MAX] = {
        [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
};

static int xstate_request_perm(unsigned long idx, bool guest)
{
        u64 permitted, requested;
        int ret;

        if (idx >= XFEATURE_MAX)
                return -EINVAL;

        /*
         * Look up the facility mask which can require more than
         * one xstate component.
         */
        idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
        requested = xstate_prctl_req[idx];
        if (!requested)
                return -EOPNOTSUPP;

        if ((fpu_user_cfg.max_features & requested) != requested)
                return -EOPNOTSUPP;

        /* Lockless quick check */
        permitted = xstate_get_group_perm(guest);
        if ((permitted & requested) == requested)
                return 0;

        /* Protect against concurrent modifications */
        spin_lock_irq(&current->sighand->siglock);
        permitted = xstate_get_group_perm(guest);

        /* First vCPU allocation locks the permissions. */
        if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
                ret = -EBUSY;
        else
                ret = __xstate_request_perm(permitted, requested, guest);
        spin_unlock_irq(&current->sighand->siglock);
        return ret;
}

int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
{
        u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
        struct fpu_state_perm *perm;
        unsigned int ksize, usize;
        struct fpu *fpu;

        if (!xfd_event) {
                if (!guest_fpu)
                        pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
                return 0;
        }

        /* Protect against concurrent modifications */
        spin_lock_irq(&current->sighand->siglock);

        /* If not permitted let it die */
        if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
                spin_unlock_irq(&current->sighand->siglock);
                return -EPERM;
        }

        fpu = &current->group_leader->thread.fpu;
        perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
        ksize = perm->__state_size;
        usize = perm->__user_state_size;

        /*
         * The feature is permitted. State size is sufficient.  Dropping
         * the lock is safe here even if more features are added from
         * another task, the retrieved buffer sizes are valid for the
         * currently requested feature(s).
         */
        spin_unlock_irq(&current->sighand->siglock);

        /*
         * Try to allocate a new fpstate. If that fails there is no way
         * out.
         */
        if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
                return -EFAULT;
        return 0;
}

int xfd_enable_feature(u64 xfd_err)
{
        return __xfd_enable_feature(xfd_err, NULL);
}

#else /* CONFIG_X86_64 */
static inline int xstate_request_perm(unsigned long idx, bool guest)
{
        return -EPERM;
}
#endif  /* !CONFIG_X86_64 */

u64 xstate_get_guest_group_perm(void)
{
        return xstate_get_group_perm(true);
}
EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);

/**
 * fpu_xstate_prctl - xstate permission operations
 * @option:        A subfunction of arch_prctl()
 * @arg2:        option argument
 * Return:        0 if successful; otherwise, an error code
 *
 * Option arguments:
 *
 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
 * ARCH_REQ_XCOMP_PERM: Facility number requested
 *
 * For facilities which require more than one XSTATE component, the request
 * must be the highest state component number related to that facility,
 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
 */
long fpu_xstate_prctl(int option, unsigned long arg2)
{
        u64 __user *uptr = (u64 __user *)arg2;
        u64 permitted, supported;
        unsigned long idx = arg2;
        bool guest = false;

        switch (option) {
        case ARCH_GET_XCOMP_SUPP:
                supported = fpu_user_cfg.max_features |        fpu_user_cfg.legacy_features;
                return put_user(supported, uptr);

        case ARCH_GET_XCOMP_PERM:
                /*
                 * Lockless snapshot as it can also change right after the
                 * dropping the lock.
                 */
                permitted = xstate_get_host_group_perm();
                permitted &= XFEATURE_MASK_USER_SUPPORTED;
                return put_user(permitted, uptr);

        case ARCH_GET_XCOMP_GUEST_PERM:
                permitted = xstate_get_guest_group_perm();
                permitted &= XFEATURE_MASK_USER_SUPPORTED;
                return put_user(permitted, uptr);

        case ARCH_REQ_XCOMP_GUEST_PERM:
                guest = true;
                fallthrough;

        case ARCH_REQ_XCOMP_PERM:
                if (!IS_ENABLED(CONFIG_X86_64))
                        return -EOPNOTSUPP;

                return xstate_request_perm(idx, guest);

        default:
                return -EINVAL;
        }
}

#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
 * Report the amount of time elapsed in millisecond since last AVX512
 * use in the task.
 */
static void avx512_status(struct seq_file *m, struct task_struct *task)
{
        unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
        long delta;

        if (!timestamp) {
                /*
                 * Report -1 if no AVX512 usage
                 */
                delta = -1;
        } else {
                delta = (long)(jiffies - timestamp);
                /*
                 * Cap to LONG_MAX if time difference > LONG_MAX
                 */
                if (delta < 0)
                        delta = LONG_MAX;
                delta = jiffies_to_msecs(delta);
        }

        seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
        seq_putc(m, '\n');
}

/*
 * Report architecture specific information
 */
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
                        struct pid *pid, struct task_struct *task)
{
        /*
         * Report AVX512 state if the processor and build option supported.
         */
        if (cpu_feature_enabled(X86_FEATURE_AVX512F))
                avx512_status(m, task);

        return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */










































































































































































































































   68 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/kernel/exit.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/mm.h>
#include <linux/sched/stat.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/cpu.h>
#include <linux/acct.h>
#include <linux/tsacct_kern.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/freezer.h>
#include <linux/binfmts.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <linux/kthread.h>
#include <linux/mempolicy.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
#include <linux/cgroup.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
#include <linux/posix-timers.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/task_work.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
#include <linux/hw_breakpoint.h>
#include <linux/oom.h>
#include <linux/writeback.h>
#include <linux/shm.h>
#include <linux/kcov.h>
#include <linux/kmsan.h>
#include <linux/random.h>
#include <linux/rcuwait.h>
#include <linux/compat.h>
#include <linux/io_uring.h>
#include <linux/kprobes.h>
#include <linux/rethook.h>
#include <linux/sysfs.h>
#include <linux/user_events.h>
#include <linux/uaccess.h>

#include <uapi/linux/wait.h>

#include <asm/unistd.h>
#include <asm/mmu_context.h>

#include "exit.h"

/*
 * The default value should be high enough to not crash a system that randomly
 * crashes its kernel from time to time, but low enough to at least not permit
 * overflowing 32-bit refcounts or the ldsem writer count.
 */
static unsigned int oops_limit = 10000;

#ifdef CONFIG_SYSCTL
static struct ctl_table kern_exit_table[] = {
        {
                .procname       = "oops_limit",
                .data           = &oops_limit,
                .maxlen         = sizeof(oops_limit),
                .mode           = 0644,
                .proc_handler   = proc_douintvec,
        },
        { }
};

static __init int kernel_exit_sysctls_init(void)
{
        register_sysctl_init("kernel", kern_exit_table);
        return 0;
}
late_initcall(kernel_exit_sysctls_init);
#endif

static atomic_t oops_count = ATOMIC_INIT(0);

#ifdef CONFIG_SYSFS
static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
                               char *page)
{
        return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
}

static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);

static __init int kernel_exit_sysfs_init(void)
{
        sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
        return 0;
}
late_initcall(kernel_exit_sysfs_init);
#endif

static void __unhash_process(struct task_struct *p, bool group_dead)
{
        nr_threads--;
        detach_pid(p, PIDTYPE_PID);
        if (group_dead) {
                detach_pid(p, PIDTYPE_TGID);
                detach_pid(p, PIDTYPE_PGID);
                detach_pid(p, PIDTYPE_SID);

                list_del_rcu(&p->tasks);
                list_del_init(&p->sibling);
                __this_cpu_dec(process_counts);
        }
        list_del_rcu(&p->thread_node);
}

/*
 * This function expects the tasklist_lock write-locked.
 */
static void __exit_signal(struct task_struct *tsk)
{
        struct signal_struct *sig = tsk->signal;
        bool group_dead = thread_group_leader(tsk);
        struct sighand_struct *sighand;
        struct tty_struct *tty;
        u64 utime, stime;

        sighand = rcu_dereference_check(tsk->sighand,
                                        lockdep_tasklist_lock_is_held());
        spin_lock(&sighand->siglock);

#ifdef CONFIG_POSIX_TIMERS
        posix_cpu_timers_exit(tsk);
        if (group_dead)
                posix_cpu_timers_exit_group(tsk);
#endif

        if (group_dead) {
                tty = sig->tty;
                sig->tty = NULL;
        } else {
                /*
                 * If there is any task waiting for the group exit
                 * then notify it:
                 */
                if (sig->notify_count > 0 && !--sig->notify_count)
                        wake_up_process(sig->group_exec_task);

                if (tsk == sig->curr_target)
                        sig->curr_target = next_thread(tsk);
        }

        add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
                              sizeof(unsigned long long));

        /*
         * Accumulate here the counters for all threads as they die. We could
         * skip the group leader because it is the last user of signal_struct,
         * but we want to avoid the race with thread_group_cputime() which can
         * see the empty ->thread_head list.
         */
        task_cputime(tsk, &utime, &stime);
        write_seqlock(&sig->stats_lock);
        sig->utime += utime;
        sig->stime += stime;
        sig->gtime += task_gtime(tsk);
        sig->min_flt += tsk->min_flt;
        sig->maj_flt += tsk->maj_flt;
        sig->nvcsw += tsk->nvcsw;
        sig->nivcsw += tsk->nivcsw;
        sig->inblock += task_io_get_inblock(tsk);
        sig->oublock += task_io_get_oublock(tsk);
        task_io_accounting_add(&sig->ioac, &tsk->ioac);
        sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
        sig->nr_threads--;
        __unhash_process(tsk, group_dead);
        write_sequnlock(&sig->stats_lock);

        /*
         * Do this under ->siglock, we can race with another thread
         * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
         */
        flush_sigqueue(&tsk->pending);
        tsk->sighand = NULL;
        spin_unlock(&sighand->siglock);

        __cleanup_sighand(sighand);
        clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
        if (group_dead) {
                flush_sigqueue(&sig->shared_pending);
                tty_kref_put(tty);
        }
}

static void delayed_put_task_struct(struct rcu_head *rhp)
{
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

        kprobe_flush_task(tsk);
        rethook_flush_task(tsk);
        perf_event_delayed_put(tsk);
        trace_sched_process_free(tsk);
        put_task_struct(tsk);
}

void put_task_struct_rcu_user(struct task_struct *task)
{
        if (refcount_dec_and_test(&task->rcu_users))
                call_rcu(&task->rcu, delayed_put_task_struct);
}

void __weak release_thread(struct task_struct *dead_task)
{
}

void release_task(struct task_struct *p)
{
        struct task_struct *leader;
        struct pid *thread_pid;
        int zap_leader;
repeat:
        /* don't need to get the RCU readlock here - the process is dead and
         * can't be modifying its own credentials. But shut RCU-lockdep up */
        rcu_read_lock();
        dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
        rcu_read_unlock();

        cgroup_release(p);

        write_lock_irq(&tasklist_lock);
        ptrace_release_task(p);
        thread_pid = get_pid(p->thread_pid);
        __exit_signal(p);

        /*
         * If we are the last non-leader member of the thread
         * group, and the leader is zombie, then notify the
         * group leader's parent process. (if it wants notification.)
         */
        zap_leader = 0;
        leader = p->group_leader;
        if (leader != p && thread_group_empty(leader)
                        && leader->exit_state == EXIT_ZOMBIE) {
                /*
                 * If we were the last child thread and the leader has
                 * exited already, and the leader's parent ignores SIGCHLD,
                 * then we are the one who should release the leader.
                 */
                zap_leader = do_notify_parent(leader, leader->exit_signal);
                if (zap_leader)
                        leader->exit_state = EXIT_DEAD;
        }

        write_unlock_irq(&tasklist_lock);
        seccomp_filter_release(p);
        proc_flush_pid(thread_pid);
        put_pid(thread_pid);
        release_thread(p);
        put_task_struct_rcu_user(p);

        p = leader;
        if (unlikely(zap_leader))
                goto repeat;
}

int rcuwait_wake_up(struct rcuwait *w)
{
        int ret = 0;
        struct task_struct *task;

        rcu_read_lock();

        /*
         * Order condition vs @task, such that everything prior to the load
         * of @task is visible. This is the condition as to why the user called
         * rcuwait_wake() in the first place. Pairs with set_current_state()
         * barrier (A) in rcuwait_wait_event().
         *
         *    WAIT                WAKE
         *    [S] tsk = current          [S] cond = true
         *        MB (A)              MB (B)
         *    [L] cond                  [L] tsk
         */
        smp_mb(); /* (B) */

        task = rcu_dereference(w->task);
        if (task)
                ret = wake_up_process(task);
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL_GPL(rcuwait_wake_up);

/*
 * Determine if a process group is "orphaned", according to the POSIX
 * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 * by terminal-generated stop signals.  Newly orphaned process groups are
 * to receive a SIGHUP and a SIGCONT.
 *
 * "I ask you, have you ever known what it is to be an orphan?"
 */
static int will_become_orphaned_pgrp(struct pid *pgrp,
                                        struct task_struct *ignored_task)
{
        struct task_struct *p;

        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                if ((p == ignored_task) ||
                    (p->exit_state && thread_group_empty(p)) ||
                    is_global_init(p->real_parent))
                        continue;

                if (task_pgrp(p->real_parent) != pgrp &&
                    task_session(p->real_parent) == task_session(p))
                        return 0;
        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);

        return 1;
}

int is_current_pgrp_orphaned(void)
{
        int retval;

        read_lock(&tasklist_lock);
        retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
        read_unlock(&tasklist_lock);

        return retval;
}

static bool has_stopped_jobs(struct pid *pgrp)
{
        struct task_struct *p;

        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                if (p->signal->flags & SIGNAL_STOP_STOPPED)
                        return true;
        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);

        return false;
}

/*
 * Check to see if any process groups have become orphaned as
 * a result of our exiting, and if they have any stopped jobs,
 * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
 */
static void
kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
{
        struct pid *pgrp = task_pgrp(tsk);
        struct task_struct *ignored_task = tsk;

        if (!parent)
                /* exit: our father is in a different pgrp than
                 * we are and we were the only connection outside.
                 */
                parent = tsk->real_parent;
        else
                /* reparent: our child is in a different pgrp than
                 * we are, and it was the only connection outside.
                 */
                ignored_task = NULL;

        if (task_pgrp(parent) != pgrp &&
            task_session(parent) == task_session(tsk) &&
            will_become_orphaned_pgrp(pgrp, ignored_task) &&
            has_stopped_jobs(pgrp)) {
                __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
                __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
        }
}

static void coredump_task_exit(struct task_struct *tsk)
{
        struct core_state *core_state;

        /*
         * Serialize with any possible pending coredump.
         * We must hold siglock around checking core_state
         * and setting PF_POSTCOREDUMP.  The core-inducing thread
         * will increment ->nr_threads for each thread in the
         * group without PF_POSTCOREDUMP set.
         */
        spin_lock_irq(&tsk->sighand->siglock);
        tsk->flags |= PF_POSTCOREDUMP;
        core_state = tsk->signal->core_state;
        spin_unlock_irq(&tsk->sighand->siglock);

        /* The vhost_worker does not particpate in coredumps */
        if (core_state &&
            ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
                struct core_thread self;

                self.task = current;
                if (self.task->flags & PF_SIGNALED)
                        self.next = xchg(&core_state->dumper.next, &self);
                else
                        self.task = NULL;
                /*
                 * Implies mb(), the result of xchg() must be visible
                 * to core_state->dumper.
                 */
                if (atomic_dec_and_test(&core_state->nr_threads))
                        complete(&core_state->startup);

                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
                        if (!self.task) /* see coredump_finish() */
                                break;
                        schedule();
                }
                __set_current_state(TASK_RUNNING);
        }
}

#ifdef CONFIG_MEMCG
/*
 * A task is exiting.   If it owned this mm, find a new owner for the mm.
 */
void mm_update_next_owner(struct mm_struct *mm)
{
        struct task_struct *c, *g, *p = current;

retry:
        /*
         * If the exiting or execing task is not the owner, it's
         * someone else's problem.
         */
        if (mm->owner != p)
                return;
        /*
         * The current owner is exiting/execing and there are no other
         * candidates.  Do not leave the mm pointing to a possibly
         * freed task structure.
         */
        if (atomic_read(&mm->mm_users) <= 1) {
                WRITE_ONCE(mm->owner, NULL);
                return;
        }

        read_lock(&tasklist_lock);
        /*
         * Search in the children
         */
        list_for_each_entry(c, &p->children, sibling) {
                if (c->mm == mm)
                        goto assign_new_owner;
        }

        /*
         * Search in the siblings
         */
        list_for_each_entry(c, &p->real_parent->children, sibling) {
                if (c->mm == mm)
                        goto assign_new_owner;
        }

        /*
         * Search through everything else, we should not get here often.
         */
        for_each_process(g) {
                if (g->flags & PF_KTHREAD)
                        continue;
                for_each_thread(g, c) {
                        if (c->mm == mm)
                                goto assign_new_owner;
                        if (c->mm)
                                break;
                }
        }
        read_unlock(&tasklist_lock);
        /*
         * We found no owner yet mm_users > 1: this implies that we are
         * most likely racing with swapoff (try_to_unuse()) or /proc or
         * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
         */
        WRITE_ONCE(mm->owner, NULL);
        return;

assign_new_owner:
        BUG_ON(c == p);
        get_task_struct(c);
        /*
         * The task_lock protects c->mm from changing.
         * We always want mm->owner->mm == mm
         */
        task_lock(c);
        /*
         * Delay read_unlock() till we have the task_lock()
         * to ensure that c does not slip away underneath us
         */
        read_unlock(&tasklist_lock);
        if (c->mm != mm) {
                task_unlock(c);
                put_task_struct(c);
                goto retry;
        }
        WRITE_ONCE(mm->owner, c);
        lru_gen_migrate_mm(mm);
        task_unlock(c);
        put_task_struct(c);
}
#endif /* CONFIG_MEMCG */

/*
 * Turn us into a lazy TLB process if we
 * aren't already..
 */
static void exit_mm(void)
{
        struct mm_struct *mm = current->mm;

        exit_mm_release(current, mm);
        if (!mm)
                return;
        mmap_read_lock(mm);
        mmgrab_lazy_tlb(mm);
        BUG_ON(mm != current->active_mm);
        /* more a memory barrier than a real lock */
        task_lock(current);
        /*
         * When a thread stops operating on an address space, the loop
         * in membarrier_private_expedited() may not observe that
         * tsk->mm, and the loop in membarrier_global_expedited() may
         * not observe a MEMBARRIER_STATE_GLOBAL_EXPEDITED
         * rq->membarrier_state, so those would not issue an IPI.
         * Membarrier requires a memory barrier after accessing
         * user-space memory, before clearing tsk->mm or the
         * rq->membarrier_state.
         */
        smp_mb__after_spinlock();
        local_irq_disable();
        current->mm = NULL;
        membarrier_update_current_mm(NULL);
        enter_lazy_tlb(mm, current);
        local_irq_enable();
        task_unlock(current);
        mmap_read_unlock(mm);
        mm_update_next_owner(mm);
        mmput(mm);
        if (test_thread_flag(TIF_MEMDIE))
                exit_oom_victim();
}

static struct task_struct *find_alive_thread(struct task_struct *p)
{
        struct task_struct *t;

        for_each_thread(p, t) {
                if (!(t->flags & PF_EXITING))
                        return t;
        }
        return NULL;
}

static struct task_struct *find_child_reaper(struct task_struct *father,
                                                struct list_head *dead)
        __releases(&tasklist_lock)
        __acquires(&tasklist_lock)
{
        struct pid_namespace *pid_ns = task_active_pid_ns(father);
        struct task_struct *reaper = pid_ns->child_reaper;
        struct task_struct *p, *n;

        if (likely(reaper != father))
                return reaper;

        reaper = find_alive_thread(father);
        if (reaper) {
                pid_ns->child_reaper = reaper;
                return reaper;
        }

        write_unlock_irq(&tasklist_lock);

        list_for_each_entry_safe(p, n, dead, ptrace_entry) {
                list_del_init(&p->ptrace_entry);
                release_task(p);
        }

        zap_pid_ns_processes(pid_ns);
        write_lock_irq(&tasklist_lock);

        return father;
}

/*
 * When we die, we re-parent all our children, and try to:
 * 1. give them to another thread in our thread group, if such a member exists
 * 2. give it to the first ancestor process which prctl'd itself as a
 *    child_subreaper for its children (like a service manager)
 * 3. give it to the init process (PID 1) in our pid namespace
 */
static struct task_struct *find_new_reaper(struct task_struct *father,
                                           struct task_struct *child_reaper)
{
        struct task_struct *thread, *reaper;

        thread = find_alive_thread(father);
        if (thread)
                return thread;

        if (father->signal->has_child_subreaper) {
                unsigned int ns_level = task_pid(father)->level;
                /*
                 * Find the first ->is_child_subreaper ancestor in our pid_ns.
                 * We can't check reaper != child_reaper to ensure we do not
                 * cross the namespaces, the exiting parent could be injected
                 * by setns() + fork().
                 * We check pid->level, this is slightly more efficient than
                 * task_active_pid_ns(reaper) != task_active_pid_ns(father).
                 */
                for (reaper = father->real_parent;
                     task_pid(reaper)->level == ns_level;
                     reaper = reaper->real_parent) {
                        if (reaper == &init_task)
                                break;
                        if (!reaper->signal->is_child_subreaper)
                                continue;
                        thread = find_alive_thread(reaper);
                        if (thread)
                                return thread;
                }
        }

        return child_reaper;
}

/*
* Any that need to be release_task'd are put on the @dead list.
 */
static void reparent_leader(struct task_struct *father, struct task_struct *p,
                                struct list_head *dead)
{
        if (unlikely(p->exit_state == EXIT_DEAD))
                return;

        /* We don't want people slaying init. */
        p->exit_signal = SIGCHLD;

        /* If it has exited notify the new parent about this child's death. */
        if (!p->ptrace &&
            p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
                if (do_notify_parent(p, p->exit_signal)) {
                        p->exit_state = EXIT_DEAD;
                        list_add(&p->ptrace_entry, dead);
                }
        }

        kill_orphaned_pgrp(p, father);
}

/*
 * This does two things:
 *
 * A.  Make init inherit all the child processes
 * B.  Check to see if any process groups have become orphaned
 *        as a result of our exiting, and if they have any stopped
 *        jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 */
static void forget_original_parent(struct task_struct *father,
                                        struct list_head *dead)
{
        struct task_struct *p, *t, *reaper;

        if (unlikely(!list_empty(&father->ptraced)))
                exit_ptrace(father, dead);

        /* Can drop and reacquire tasklist_lock */
        reaper = find_child_reaper(father, dead);
        if (list_empty(&father->children))
                return;

        reaper = find_new_reaper(father, reaper);
        list_for_each_entry(p, &father->children, sibling) {
                for_each_thread(p, t) {
                        RCU_INIT_POINTER(t->real_parent, reaper);
                        BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father));
                        if (likely(!t->ptrace))
                                t->parent = t->real_parent;
                        if (t->pdeath_signal)
                                group_send_sig_info(t->pdeath_signal,
                                                    SEND_SIG_NOINFO, t,
                                                    PIDTYPE_TGID);
                }
                /*
                 * If this is a threaded reparent there is no need to
                 * notify anyone anything has happened.
                 */
                if (!same_thread_group(reaper, father))
                        reparent_leader(father, p, dead);
        }
        list_splice_tail_init(&father->children, &reaper->children);
}

/*
 * Send signals to all our closest relatives so that they know
 * to properly mourn us..
 */
static void exit_notify(struct task_struct *tsk, int group_dead)
{
        bool autoreap;
        struct task_struct *p, *n;
        LIST_HEAD(dead);

        write_lock_irq(&tasklist_lock);
        forget_original_parent(tsk, &dead);

        if (group_dead)
                kill_orphaned_pgrp(tsk->group_leader, NULL);

        tsk->exit_state = EXIT_ZOMBIE;
        /*
         * sub-thread or delay_group_leader(), wake up the
         * PIDFD_THREAD waiters.
         */
        if (!thread_group_empty(tsk))
                do_notify_pidfd(tsk);

        if (unlikely(tsk->ptrace)) {
                int sig = thread_group_leader(tsk) &&
                                thread_group_empty(tsk) &&
                                !ptrace_reparented(tsk) ?
                        tsk->exit_signal : SIGCHLD;
                autoreap = do_notify_parent(tsk, sig);
        } else if (thread_group_leader(tsk)) {
                autoreap = thread_group_empty(tsk) &&
                        do_notify_parent(tsk, tsk->exit_signal);
        } else {
                autoreap = true;
        }

        if (autoreap) {
                tsk->exit_state = EXIT_DEAD;
                list_add(&tsk->ptrace_entry, &dead);
        }

        /* mt-exec, de_thread() is waiting for group leader */
        if (unlikely(tsk->signal->notify_count < 0))
                wake_up_process(tsk->signal->group_exec_task);
        write_unlock_irq(&tasklist_lock);

        list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
                list_del_init(&p->ptrace_entry);
                release_task(p);
        }
}

#ifdef CONFIG_DEBUG_STACK_USAGE
static void check_stack_usage(void)
{
        static DEFINE_SPINLOCK(low_water_lock);
        static int lowest_to_date = THREAD_SIZE;
        unsigned long free;

        free = stack_not_used(current);

        if (free >= lowest_to_date)
                return;

        spin_lock(&low_water_lock);
        if (free < lowest_to_date) {
                pr_info("%s (%d) used greatest stack depth: %lu bytes left\n",
                        current->comm, task_pid_nr(current), free);
                lowest_to_date = free;
        }
        spin_unlock(&low_water_lock);
}
#else
static inline void check_stack_usage(void) {}
#endif

static void synchronize_group_exit(struct task_struct *tsk, long code)
{
        struct sighand_struct *sighand = tsk->sighand;
        struct signal_struct *signal = tsk->signal;

        spin_lock_irq(&sighand->siglock);
        signal->quick_threads--;
        if ((signal->quick_threads == 0) &&
            !(signal->flags & SIGNAL_GROUP_EXIT)) {
                signal->flags = SIGNAL_GROUP_EXIT;
                signal->group_exit_code = code;
                signal->group_stop_count = 0;
        }
        spin_unlock_irq(&sighand->siglock);
}

void __noreturn do_exit(long code)
{
        struct task_struct *tsk = current;
        int group_dead;

        WARN_ON(irqs_disabled());

        synchronize_group_exit(tsk, code);

        WARN_ON(tsk->plug);

        kcov_task_exit(tsk);
        kmsan_task_exit(tsk);

        coredump_task_exit(tsk);
        ptrace_event(PTRACE_EVENT_EXIT, code);
        user_events_exit(tsk);

        io_uring_files_cancel();
        exit_signals(tsk);  /* sets PF_EXITING */

        acct_update_integrals(tsk);
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
                /*
                 * If the last thread of global init has exited, panic
                 * immediately to get a useable coredump.
                 */
                if (unlikely(is_global_init(tsk)))
                        panic("Attempted to kill init! exitcode=0x%08x\n",
                                tsk->signal->group_exit_code ?: (int)code);

#ifdef CONFIG_POSIX_TIMERS
                hrtimer_cancel(&tsk->signal->real_timer);
                exit_itimers(tsk);
#endif
                if (tsk->mm)
                        setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
        }
        acct_collect(code, group_dead);
        if (group_dead)
                tty_audit_exit();
        audit_free(tsk);

        tsk->exit_code = code;
        taskstats_exit(tsk, group_dead);

        exit_mm();

        if (group_dead)
                acct_process();
        trace_sched_process_exit(tsk);

        exit_sem(tsk);
        exit_shm(tsk);
        exit_files(tsk);
        exit_fs(tsk);
        if (group_dead)
                disassociate_ctty(1);
        exit_task_namespaces(tsk);
        exit_task_work(tsk);
        exit_thread(tsk);

        /*
         * Flush inherited counters to the parent - before the parent
         * gets woken up by child-exit notifications.
         *
         * because of cgroup mode, must be called before cgroup_exit()
         */
        perf_event_exit_task(tsk);

        sched_autogroup_exit_task(tsk);
        cgroup_exit(tsk);

        /*
         * FIXME: do that only when needed, using sched_exit tracepoint
         */
        flush_ptrace_hw_breakpoint(tsk);

        exit_tasks_rcu_start();
        exit_notify(tsk, group_dead);
        proc_exit_connector(tsk);
        mpol_put_task_policy(tsk);
#ifdef CONFIG_FUTEX
        if (unlikely(current->pi_state_cache))
                kfree(current->pi_state_cache);
#endif
        /*
         * Make sure we are holding no locks:
         */
        debug_check_no_locks_held();

        if (tsk->io_context)
                exit_io_context(tsk);

        if (tsk->splice_pipe)
                free_pipe_info(tsk->splice_pipe);

        if (tsk->task_frag.page)
                put_page(tsk->task_frag.page);

        exit_task_stack_account(tsk);

        check_stack_usage();
        preempt_disable();
        if (tsk->nr_dirtied)
                __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
        exit_rcu();
        exit_tasks_rcu_finish();

        lockdep_free_task(tsk);
        do_task_dead();
}

void __noreturn make_task_dead(int signr)
{
        /*
         * Take the task off the cpu after something catastrophic has
         * happened.
         *
         * We can get here from a kernel oops, sometimes with preemption off.
         * Start by checking for critical errors.
         * Then fix up important state like USER_DS and preemption.
         * Then do everything else.
         */
        struct task_struct *tsk = current;
        unsigned int limit;

        if (unlikely(in_interrupt()))
                panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");

        if (unlikely(irqs_disabled())) {
                pr_info("note: %s[%d] exited with irqs disabled\n",
                        current->comm, task_pid_nr(current));
                local_irq_enable();
        }
        if (unlikely(in_atomic())) {
                pr_info("note: %s[%d] exited with preempt_count %d\n",
                        current->comm, task_pid_nr(current),
                        preempt_count());
                preempt_count_set(PREEMPT_ENABLED);
        }

        /*
         * Every time the system oopses, if the oops happens while a reference
         * to an object was held, the reference leaks.
         * If the oops doesn't also leak memory, repeated oopsing can cause
         * reference counters to wrap around (if they're not using refcount_t).
         * This means that repeated oopsing can make unexploitable-looking bugs
         * exploitable through repeated oopsing.
         * To make sure this can't happen, place an upper bound on how often the
         * kernel may oops without panic().
         */
        limit = READ_ONCE(oops_limit);
        if (atomic_inc_return(&oops_count) >= limit && limit)
                panic("Oopsed too often (kernel.oops_limit is %d)", limit);

        /*
         * We're taking recursive faults here in make_task_dead. Safest is to just
         * leave this task alone and wait for reboot.
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
                pr_alert("Fixing recursive fault but reboot is needed!\n");
                futex_exit_recursive(tsk);
                tsk->exit_state = EXIT_DEAD;
                refcount_inc(&tsk->rcu_users);
                do_task_dead();
        }

        do_exit(signr);
}

SYSCALL_DEFINE1(exit, int, error_code)
{
        do_exit((error_code&0xff)<<8);
}

/*
 * Take down every thread in the group.  This is called by fatal signals
 * as well as by sys_exit_group (below).
 */
void __noreturn
do_group_exit(int exit_code)
{
        struct signal_struct *sig = current->signal;

        if (sig->flags & SIGNAL_GROUP_EXIT)
                exit_code = sig->group_exit_code;
        else if (sig->group_exec_task)
                exit_code = 0;
        else {
                struct sighand_struct *const sighand = current->sighand;

                spin_lock_irq(&sighand->siglock);
                if (sig->flags & SIGNAL_GROUP_EXIT)
                        /* Another thread got here before we took the lock.  */
                        exit_code = sig->group_exit_code;
                else if (sig->group_exec_task)
                        exit_code = 0;
                else {
                        sig->group_exit_code = exit_code;
                        sig->flags = SIGNAL_GROUP_EXIT;
                        zap_other_threads(current);
                }
                spin_unlock_irq(&sighand->siglock);
        }

        do_exit(exit_code);
        /* NOTREACHED */
}

/*
 * this kills every thread in the thread group. Note that any externally
 * wait4()-ing process will get the correct exit code - even if this
 * thread is not the thread group leader.
 */
SYSCALL_DEFINE1(exit_group, int, error_code)
{
        do_group_exit((error_code & 0xff) << 8);
        /* NOTREACHED */
        return 0;
}

static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
{
        return        wo->wo_type == PIDTYPE_MAX ||
                task_pid_type(p, wo->wo_type) == wo->wo_pid;
}

static int
eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
{
        if (!eligible_pid(wo, p))
                return 0;

        /*
         * Wait for all children (clone and not) if __WALL is set or
         * if it is traced by us.
         */
        if (ptrace || (wo->wo_flags & __WALL))
                return 1;

        /*
         * Otherwise, wait for clone children *only* if __WCLONE is set;
         * otherwise, wait for non-clone children *only*.
         *
         * Note: a "clone" child here is one that reports to its parent
         * using a signal other than SIGCHLD, or a non-leader thread which
         * we can only see if it is traced by us.
         */
        if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
                return 0;

        return 1;
}

/*
 * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
 * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
{
        int state, status;
        pid_t pid = task_pid_vnr(p);
        uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
        struct waitid_info *infop;

        if (!likely(wo->wo_flags & WEXITED))
                return 0;

        if (unlikely(wo->wo_flags & WNOWAIT)) {
                status = (p->signal->flags & SIGNAL_GROUP_EXIT)
                        ? p->signal->group_exit_code : p->exit_code;
                get_task_struct(p);
                read_unlock(&tasklist_lock);
                sched_annotate_sleep();
                if (wo->wo_rusage)
                        getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
                put_task_struct(p);
                goto out_info;
        }
        /*
         * Move the task's state to DEAD/TRACE, only one thread can do this.
         */
        state = (ptrace_reparented(p) && thread_group_leader(p)) ?
                EXIT_TRACE : EXIT_DEAD;
        if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
                return 0;
        /*
         * We own this thread, nobody else can reap it.
         */
        read_unlock(&tasklist_lock);
        sched_annotate_sleep();

        /*
         * Check thread_group_leader() to exclude the traced sub-threads.
         */
        if (state == EXIT_DEAD && thread_group_leader(p)) {
                struct signal_struct *sig = p->signal;
                struct signal_struct *psig = current->signal;
                unsigned long maxrss;
                u64 tgutime, tgstime;

                /*
                 * The resource counters for the group leader are in its
                 * own task_struct.  Those for dead threads in the group
                 * are in its signal_struct, as are those for the child
                 * processes it has previously reaped.  All these
                 * accumulate in the parent's signal_struct c* fields.
                 *
                 * We don't bother to take a lock here to protect these
                 * p->signal fields because the whole thread group is dead
                 * and nobody can change them.
                 *
                 * psig->stats_lock also protects us from our sub-threads
                 * which can reap other children at the same time.
                 *
                 * We use thread_group_cputime_adjusted() to get times for
                 * the thread group, which consolidates times for all threads
                 * in the group including the group leader.
                 */
                thread_group_cputime_adjusted(p, &tgutime, &tgstime);
                write_seqlock_irq(&psig->stats_lock);
                psig->cutime += tgutime + sig->cutime;
                psig->cstime += tgstime + sig->cstime;
                psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
                psig->cmin_flt +=
                        p->min_flt + sig->min_flt + sig->cmin_flt;
                psig->cmaj_flt +=
                        p->maj_flt + sig->maj_flt + sig->cmaj_flt;
                psig->cnvcsw +=
                        p->nvcsw + sig->nvcsw + sig->cnvcsw;
                psig->cnivcsw +=
                        p->nivcsw + sig->nivcsw + sig->cnivcsw;
                psig->cinblock +=
                        task_io_get_inblock(p) +
                        sig->inblock + sig->cinblock;
                psig->coublock +=
                        task_io_get_oublock(p) +
                        sig->oublock + sig->coublock;
                maxrss = max(sig->maxrss, sig->cmaxrss);
                if (psig->cmaxrss < maxrss)
                        psig->cmaxrss = maxrss;
                task_io_accounting_add(&psig->ioac, &p->ioac);
                task_io_accounting_add(&psig->ioac, &sig->ioac);
                write_sequnlock_irq(&psig->stats_lock);
        }

        if (wo->wo_rusage)
                getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
                ? p->signal->group_exit_code : p->exit_code;
        wo->wo_stat = status;

        if (state == EXIT_TRACE) {
                write_lock_irq(&tasklist_lock);
                /* We dropped tasklist, ptracer could die and untrace */
                ptrace_unlink(p);

                /* If parent wants a zombie, don't release it now */
                state = EXIT_ZOMBIE;
                if (do_notify_parent(p, p->exit_signal))
                        state = EXIT_DEAD;
                p->exit_state = state;
                write_unlock_irq(&tasklist_lock);
        }
        if (state == EXIT_DEAD)
                release_task(p);

out_info:
        infop = wo->wo_info;
        if (infop) {
                if ((status & 0x7f) == 0) {
                        infop->cause = CLD_EXITED;
                        infop->status = status >> 8;
                } else {
                        infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
                        infop->status = status & 0x7f;
                }
                infop->pid = pid;
                infop->uid = uid;
        }

        return pid;
}

static int *task_stopped_code(struct task_struct *p, bool ptrace)
{
        if (ptrace) {
                if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
                        return &p->exit_code;
        } else {
                if (p->signal->flags & SIGNAL_STOP_STOPPED)
                        return &p->signal->group_exit_code;
        }
        return NULL;
}

/**
 * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
 * @wo: wait options
 * @ptrace: is the wait for ptrace
 * @p: task to wait for
 *
 * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
 *
 * CONTEXT:
 * read_lock(&tasklist_lock), which is released if return value is
 * non-zero.  Also, grabs and releases @p->sighand->siglock.
 *
 * RETURNS:
 * 0 if wait condition didn't exist and search for other wait conditions
 * should continue.  Non-zero return, -errno on failure and @p's pid on
 * success, implies that tasklist_lock is released and wait condition
 * search should terminate.
 */
static int wait_task_stopped(struct wait_opts *wo,
                                int ptrace, struct task_struct *p)
{
        struct waitid_info *infop;
        int exit_code, *p_code, why;
        uid_t uid = 0; /* unneeded, required by compiler */
        pid_t pid;

        /*
         * Traditionally we see ptrace'd stopped tasks regardless of options.
         */
        if (!ptrace && !(wo->wo_flags & WUNTRACED))
                return 0;

        if (!task_stopped_code(p, ptrace))
                return 0;

        exit_code = 0;
        spin_lock_irq(&p->sighand->siglock);

        p_code = task_stopped_code(p, ptrace);
        if (unlikely(!p_code))
                goto unlock_sig;

        exit_code = *p_code;
        if (!exit_code)
                goto unlock_sig;

        if (!unlikely(wo->wo_flags & WNOWAIT))
                *p_code = 0;

        uid = from_kuid_munged(current_user_ns(), task_uid(p));
unlock_sig:
        spin_unlock_irq(&p->sighand->siglock);
        if (!exit_code)
                return 0;

        /*
         * Now we are pretty sure this task is interesting.
         * Make sure it doesn't get reaped out from under us while we
         * give up the lock and then examine it below.  We don't want to
         * keep holding onto the tasklist_lock while we call getrusage and
         * possibly take page faults for user memory.
         */
        get_task_struct(p);
        pid = task_pid_vnr(p);
        why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
        read_unlock(&tasklist_lock);
        sched_annotate_sleep();
        if (wo->wo_rusage)
                getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
        put_task_struct(p);

        if (likely(!(wo->wo_flags & WNOWAIT)))
                wo->wo_stat = (exit_code << 8) | 0x7f;

        infop = wo->wo_info;
        if (infop) {
                infop->cause = why;
                infop->status = exit_code;
                infop->pid = pid;
                infop->uid = uid;
        }
        return pid;
}

/*
 * Handle do_wait work for one task in a live, non-stopped state.
 * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
 * the lock and this task is uninteresting.  If we return nonzero, we have
 * released the lock and the system call should return.
 */
static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
{
        struct waitid_info *infop;
        pid_t pid;
        uid_t uid;

        if (!unlikely(wo->wo_flags & WCONTINUED))
                return 0;

        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
                return 0;

        spin_lock_irq(&p->sighand->siglock);
        /* Re-check with the lock held.  */
        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
                spin_unlock_irq(&p->sighand->siglock);
                return 0;
        }
        if (!unlikely(wo->wo_flags & WNOWAIT))
                p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
        uid = from_kuid_munged(current_user_ns(), task_uid(p));
        spin_unlock_irq(&p->sighand->siglock);

        pid = task_pid_vnr(p);
        get_task_struct(p);
        read_unlock(&tasklist_lock);
        sched_annotate_sleep();
        if (wo->wo_rusage)
                getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
        put_task_struct(p);

        infop = wo->wo_info;
        if (!infop) {
                wo->wo_stat = 0xffff;
        } else {
                infop->cause = CLD_CONTINUED;
                infop->pid = pid;
                infop->uid = uid;
                infop->status = SIGCONT;
        }
        return pid;
}

/*
 * Consider @p for a wait by @parent.
 *
 * -ECHILD should be in ->notask_error before the first call.
 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
 * Returns zero if the search for a child should continue;
 * then ->notask_error is 0 if @p is an eligible child,
 * or still -ECHILD.
 */
static int wait_consider_task(struct wait_opts *wo, int ptrace,
                                struct task_struct *p)
{
        /*
         * We can race with wait_task_zombie() from another thread.
         * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
         * can't confuse the checks below.
         */
        int exit_state = READ_ONCE(p->exit_state);
        int ret;

        if (unlikely(exit_state == EXIT_DEAD))
                return 0;

        ret = eligible_child(wo, ptrace, p);
        if (!ret)
                return ret;

        if (unlikely(exit_state == EXIT_TRACE)) {
                /*
                 * ptrace == 0 means we are the natural parent. In this case
                 * we should clear notask_error, debugger will notify us.
                 */
                if (likely(!ptrace))
                        wo->notask_error = 0;
                return 0;
        }

        if (likely(!ptrace) && unlikely(p->ptrace)) {
                /*
                 * If it is traced by its real parent's group, just pretend
                 * the caller is ptrace_do_wait() and reap this child if it
                 * is zombie.
                 *
                 * This also hides group stop state from real parent; otherwise
                 * a single stop can be reported twice as group and ptrace stop.
                 * If a ptracer wants to distinguish these two events for its
                 * own children it should create a separate process which takes
                 * the role of real parent.
                 */
                if (!ptrace_reparented(p))
                        ptrace = 1;
        }

        /* slay zombie? */
        if (exit_state == EXIT_ZOMBIE) {
                /* we don't reap group leaders with subthreads */
                if (!delay_group_leader(p)) {
                        /*
                         * A zombie ptracee is only visible to its ptracer.
                         * Notification and reaping will be cascaded to the
                         * real parent when the ptracer detaches.
                         */
                        if (unlikely(ptrace) || likely(!p->ptrace))
                                return wait_task_zombie(wo, p);
                }

                /*
                 * Allow access to stopped/continued state via zombie by
                 * falling through.  Clearing of notask_error is complex.
                 *
                 * When !@ptrace:
                 *
                 * If WEXITED is set, notask_error should naturally be
                 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
                 * so, if there are live subthreads, there are events to
                 * wait for.  If all subthreads are dead, it's still safe
                 * to clear - this function will be called again in finite
                 * amount time once all the subthreads are released and
                 * will then return without clearing.
                 *
                 * When @ptrace:
                 *
                 * Stopped state is per-task and thus can't change once the
                 * target task dies.  Only continued and exited can happen.
                 * Clear notask_error if WCONTINUED | WEXITED.
                 */
                if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
                        wo->notask_error = 0;
        } else {
                /*
                 * @p is alive and it's gonna stop, continue or exit, so
                 * there always is something to wait for.
                 */
                wo->notask_error = 0;
        }

        /*
         * Wait for stopped.  Depending on @ptrace, different stopped state
         * is used and the two don't interact with each other.
         */
        ret = wait_task_stopped(wo, ptrace, p);
        if (ret)
                return ret;

        /*
         * Wait for continued.  There's only one continued state and the
         * ptracer can consume it which can confuse the real parent.  Don't
         * use WCONTINUED from ptracer.  You don't need or want it.
         */
        return wait_task_continued(wo, p);
}

/*
 * Do the work of do_wait() for one thread in the group, @tsk.
 *
 * -ECHILD should be in ->notask_error before the first call.
 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
 * Returns zero if the search for a child should continue; then
 * ->notask_error is 0 if there were any eligible children,
 * or still -ECHILD.
 */
static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
{
        struct task_struct *p;

        list_for_each_entry(p, &tsk->children, sibling) {
                int ret = wait_consider_task(wo, 0, p);

                if (ret)
                        return ret;
        }

        return 0;
}

static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
{
        struct task_struct *p;

        list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
                int ret = wait_consider_task(wo, 1, p);

                if (ret)
                        return ret;
        }

        return 0;
}

bool pid_child_should_wake(struct wait_opts *wo, struct task_struct *p)
{
        if (!eligible_pid(wo, p))
                return false;

        if ((wo->wo_flags & __WNOTHREAD) && wo->child_wait.private != p->parent)
                return false;

        return true;
}

static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
                                int sync, void *key)
{
        struct wait_opts *wo = container_of(wait, struct wait_opts,
                                                child_wait);
        struct task_struct *p = key;

        if (pid_child_should_wake(wo, p))
                return default_wake_function(wait, mode, sync, key);

        return 0;
}

void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
{
        __wake_up_sync_key(&parent->signal->wait_chldexit,
                           TASK_INTERRUPTIBLE, p);
}

static bool is_effectively_child(struct wait_opts *wo, bool ptrace,
                                 struct task_struct *target)
{
        struct task_struct *parent =
                !ptrace ? target->real_parent : target->parent;

        return current == parent || (!(wo->wo_flags & __WNOTHREAD) &&
                                     same_thread_group(current, parent));
}

/*
 * Optimization for waiting on PIDTYPE_PID. No need to iterate through child
 * and tracee lists to find the target task.
 */
static int do_wait_pid(struct wait_opts *wo)
{
        bool ptrace;
        struct task_struct *target;
        int retval;

        ptrace = false;
        target = pid_task(wo->wo_pid, PIDTYPE_TGID);
        if (target && is_effectively_child(wo, ptrace, target)) {
                retval = wait_consider_task(wo, ptrace, target);
                if (retval)
                        return retval;
        }

        ptrace = true;
        target = pid_task(wo->wo_pid, PIDTYPE_PID);
        if (target && target->ptrace &&
            is_effectively_child(wo, ptrace, target)) {
                retval = wait_consider_task(wo, ptrace, target);
                if (retval)
                        return retval;
        }

        return 0;
}

long __do_wait(struct wait_opts *wo)
{
        long retval;

        /*
         * If there is nothing that can match our criteria, just get out.
         * We will clear ->notask_error to zero if we see any child that
         * might later match our criteria, even if we are not able to reap
         * it yet.
         */
        wo->notask_error = -ECHILD;
        if ((wo->wo_type < PIDTYPE_MAX) &&
           (!wo->wo_pid || !pid_has_task(wo->wo_pid, wo->wo_type)))
                goto notask;

        read_lock(&tasklist_lock);

        if (wo->wo_type == PIDTYPE_PID) {
                retval = do_wait_pid(wo);
                if (retval)
                        return retval;
        } else {
                struct task_struct *tsk = current;

                do {
                        retval = do_wait_thread(wo, tsk);
                        if (retval)
                                return retval;

                        retval = ptrace_do_wait(wo, tsk);
                        if (retval)
                                return retval;

                        if (wo->wo_flags & __WNOTHREAD)
                                break;
                } while_each_thread(current, tsk);
        }
        read_unlock(&tasklist_lock);

notask:
        retval = wo->notask_error;
        if (!retval && !(wo->wo_flags & WNOHANG))
                return -ERESTARTSYS;

        return retval;
}

static long do_wait(struct wait_opts *wo)
{
        int retval;

        trace_sched_process_wait(wo->wo_pid);

        init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
        wo->child_wait.private = current;
        add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);

        do {
                set_current_state(TASK_INTERRUPTIBLE);
                retval = __do_wait(wo);
                if (retval != -ERESTARTSYS)
                        break;
                if (signal_pending(current))
                        break;
                schedule();
        } while (1);

        __set_current_state(TASK_RUNNING);
        remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
        return retval;
}

int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
                          struct waitid_info *infop, int options,
                          struct rusage *ru)
{
        unsigned int f_flags = 0;
        struct pid *pid = NULL;
        enum pid_type type;

        if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
                        __WNOTHREAD|__WCLONE|__WALL))
                return -EINVAL;
        if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
                return -EINVAL;

        switch (which) {
        case P_ALL:
                type = PIDTYPE_MAX;
                break;
        case P_PID:
                type = PIDTYPE_PID;
                if (upid <= 0)
                        return -EINVAL;

                pid = find_get_pid(upid);
                break;
        case P_PGID:
                type = PIDTYPE_PGID;
                if (upid < 0)
                        return -EINVAL;

                if (upid)
                        pid = find_get_pid(upid);
                else
                        pid = get_task_pid(current, PIDTYPE_PGID);
                break;
        case P_PIDFD:
                type = PIDTYPE_PID;
                if (upid < 0)
                        return -EINVAL;

                pid = pidfd_get_pid(upid, &f_flags);
                if (IS_ERR(pid))
                        return PTR_ERR(pid);

                break;
        default:
                return -EINVAL;
        }

        wo->wo_type        = type;
        wo->wo_pid        = pid;
        wo->wo_flags        = options;
        wo->wo_info        = infop;
        wo->wo_rusage        = ru;
        if (f_flags & O_NONBLOCK)
                wo->wo_flags |= WNOHANG;

        return 0;
}

static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
                          int options, struct rusage *ru)
{
        struct wait_opts wo;
        long ret;

        ret = kernel_waitid_prepare(&wo, which, upid, infop, options, ru);
        if (ret)
                return ret;

        ret = do_wait(&wo);
        if (!ret && !(options & WNOHANG) && (wo.wo_flags & WNOHANG))
                ret = -EAGAIN;

        put_pid(wo.wo_pid);
        return ret;
}

SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
                infop, int, options, struct rusage __user *, ru)
{
        struct rusage r;
        struct waitid_info info = {.status = 0};
        long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
        int signo = 0;

        if (err > 0) {
                signo = SIGCHLD;
                err = 0;
                if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
                        return -EFAULT;
        }
        if (!infop)
                return err;

        if (!user_write_access_begin(infop, sizeof(*infop)))
                return -EFAULT;

        unsafe_put_user(signo, &infop->si_signo, Efault);
        unsafe_put_user(0, &infop->si_errno, Efault);
        unsafe_put_user(info.cause, &infop->si_code, Efault);
        unsafe_put_user(info.pid, &infop->si_pid, Efault);
        unsafe_put_user(info.uid, &infop->si_uid, Efault);
        unsafe_put_user(info.status, &infop->si_status, Efault);
        user_write_access_end();
        return err;
Efault:
        user_write_access_end();
        return -EFAULT;
}

long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
                  struct rusage *ru)
{
        struct wait_opts wo;
        struct pid *pid = NULL;
        enum pid_type type;
        long ret;

        if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
                        __WNOTHREAD|__WCLONE|__WALL))
                return -EINVAL;

        /* -INT_MIN is not defined */
        if (upid == INT_MIN)
                return -ESRCH;

        if (upid == -1)
                type = PIDTYPE_MAX;
        else if (upid < 0) {
                type = PIDTYPE_PGID;
                pid = find_get_pid(-upid);
        } else if (upid == 0) {
                type = PIDTYPE_PGID;
                pid = get_task_pid(current, PIDTYPE_PGID);
        } else /* upid > 0 */ {
                type = PIDTYPE_PID;
                pid = find_get_pid(upid);
        }

        wo.wo_type        = type;
        wo.wo_pid        = pid;
        wo.wo_flags        = options | WEXITED;
        wo.wo_info        = NULL;
        wo.wo_stat        = 0;
        wo.wo_rusage        = ru;
        ret = do_wait(&wo);
        put_pid(pid);
        if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr))
                ret = -EFAULT;

        return ret;
}

int kernel_wait(pid_t pid, int *stat)
{
        struct wait_opts wo = {
                .wo_type        = PIDTYPE_PID,
                .wo_pid                = find_get_pid(pid),
                .wo_flags        = WEXITED,
        };
        int ret;

        ret = do_wait(&wo);
        if (ret > 0 && wo.wo_stat)
                *stat = wo.wo_stat;
        put_pid(wo.wo_pid);
        return ret;
}

SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
                int, options, struct rusage __user *, ru)
{
        struct rusage r;
        long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL);

        if (err > 0) {
                if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
                        return -EFAULT;
        }
        return err;
}

#ifdef __ARCH_WANT_SYS_WAITPID

/*
 * sys_waitpid() remains for compatibility. waitpid() should be
 * implemented by calling sys_wait4() from libc.a.
 */
SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
{
        return kernel_wait4(pid, stat_addr, options, NULL);
}

#endif

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(wait4,
        compat_pid_t, pid,
        compat_uint_t __user *, stat_addr,
        int, options,
        struct compat_rusage __user *, ru)
{
        struct rusage r;
        long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL);
        if (err > 0) {
                if (ru && put_compat_rusage(&r, ru))
                        return -EFAULT;
        }
        return err;
}

COMPAT_SYSCALL_DEFINE5(waitid,
                int, which, compat_pid_t, pid,
                struct compat_siginfo __user *, infop, int, options,
                struct compat_rusage __user *, uru)
{
        struct rusage ru;
        struct waitid_info info = {.status = 0};
        long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
        int signo = 0;
        if (err > 0) {
                signo = SIGCHLD;
                err = 0;
                if (uru) {
                        /* kernel_waitid() overwrites everything in ru */
                        if (COMPAT_USE_64BIT_TIME)
                                err = copy_to_user(uru, &ru, sizeof(ru));
                        else
                                err = put_compat_rusage(&ru, uru);
                        if (err)
                                return -EFAULT;
                }
        }

        if (!infop)
                return err;

        if (!user_write_access_begin(infop, sizeof(*infop)))
                return -EFAULT;

        unsafe_put_user(signo, &infop->si_signo, Efault);
        unsafe_put_user(0, &infop->si_errno, Efault);
        unsafe_put_user(info.cause, &infop->si_code, Efault);
        unsafe_put_user(info.pid, &infop->si_pid, Efault);
        unsafe_put_user(info.uid, &infop->si_uid, Efault);
        unsafe_put_user(info.status, &infop->si_status, Efault);
        user_write_access_end();
        return err;
Efault:
        user_write_access_end();
        return -EFAULT;
}
#endif

/*
 * This needs to be __function_aligned as GCC implicitly makes any
 * implementation of abort() cold and drops alignment specified by
 * -falign-functions=N.
 *
 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11
 */
__weak __function_aligned void abort(void)
{
        BUG();

        /* if that doesn't kill us, halt */
        panic("Oops failed to kill thread");
}
EXPORT_SYMBOL(abort);






































































































































































































































































































































































































   23 



   24 














































   24 

   24 




    4 















    7 


    7 

    7 




















































































   24 
   24 
   24 























  101 












































































































   48 

   54 





















    1 







    6 


























    7 


    7 







    7 












    7 


    7 





































































































   15 



   20 










    7 




























































   41 
   41 




   41 
   41 


















































    4 


    4 
    4 
    4 

    4 












































































   90 

   88 



















   89 
   90 

   90 
   90 















































































































































































































































































































































































































































































































































    7 


    7 


    7 






















   16 
   16 

















   16 


   16 
   16 


   15 


   16 


























































































































































    8 












































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* memcontrol.h - Memory Controller
 *
 * Copyright IBM Corporation, 2007
 * Author Balbir Singh <balbir@linux.vnet.ibm.com>
 *
 * Copyright 2007 OpenVZ SWsoft Inc
 * Author: Pavel Emelianov <xemul@openvz.org>
 */

#ifndef _LINUX_MEMCONTROL_H
#define _LINUX_MEMCONTROL_H
#include <linux/cgroup.h>
#include <linux/vm_event_item.h>
#include <linux/hardirq.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/page_counter.h>
#include <linux/vmpressure.h>
#include <linux/eventfd.h>
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
#include <linux/shrinker.h>

struct mem_cgroup;
struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;

/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
        MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
        MEMCG_SOCK,
        MEMCG_PERCPU_B,
        MEMCG_VMALLOC,
        MEMCG_KMEM,
        MEMCG_ZSWAP_B,
        MEMCG_ZSWAPPED,
        MEMCG_NR_STAT,
};

enum memcg_memory_event {
        MEMCG_LOW,
        MEMCG_HIGH,
        MEMCG_MAX,
        MEMCG_OOM,
        MEMCG_OOM_KILL,
        MEMCG_OOM_GROUP_KILL,
        MEMCG_SWAP_HIGH,
        MEMCG_SWAP_MAX,
        MEMCG_SWAP_FAIL,
        MEMCG_NR_MEMORY_EVENTS,
};

struct mem_cgroup_reclaim_cookie {
        pg_data_t *pgdat;
        unsigned int generation;
};

#ifdef CONFIG_MEMCG

#define MEM_CGROUP_ID_SHIFT        16

struct mem_cgroup_id {
        int id;
        refcount_t ref;
};

/*
 * Per memcg event counter is incremented at every pagein/pageout. With THP,
 * it will be incremented by the number of pages. This counter is used
 * to trigger some periodic events. This is straightforward and better
 * than using jiffies etc. to handle periodic memcg event.
 */
enum mem_cgroup_events_target {
        MEM_CGROUP_TARGET_THRESH,
        MEM_CGROUP_TARGET_SOFTLIMIT,
        MEM_CGROUP_NTARGETS,
};

struct memcg_vmstats_percpu;
struct memcg_vmstats;

struct mem_cgroup_reclaim_iter {
        struct mem_cgroup *position;
        /* scan generation, increased every round-trip */
        unsigned int generation;
};

struct lruvec_stats_percpu {
        /* Local (CPU and cgroup) state */
        long state[NR_VM_NODE_STAT_ITEMS];

        /* Delta calculation for lockless upward propagation */
        long state_prev[NR_VM_NODE_STAT_ITEMS];
};

struct lruvec_stats {
        /* Aggregated (CPU and subtree) state */
        long state[NR_VM_NODE_STAT_ITEMS];

        /* Non-hierarchical (CPU aggregated) state */
        long state_local[NR_VM_NODE_STAT_ITEMS];

        /* Pending child counts during tree propagation */
        long state_pending[NR_VM_NODE_STAT_ITEMS];
};

/*
 * per-node information in memory controller.
 */
struct mem_cgroup_per_node {
        struct lruvec                lruvec;

        struct lruvec_stats_percpu __percpu        *lruvec_stats_percpu;
        struct lruvec_stats                        lruvec_stats;

        unsigned long                lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];

        struct mem_cgroup_reclaim_iter        iter;

        struct shrinker_info __rcu        *shrinker_info;

        struct rb_node                tree_node;        /* RB tree node */
        unsigned long                usage_in_excess;/* Set to the value by which */
                                                /* the soft limit is exceeded*/
        bool                        on_tree;
        struct mem_cgroup        *memcg;                /* Back pointer, we cannot */
                                                /* use container_of           */
};

struct mem_cgroup_threshold {
        struct eventfd_ctx *eventfd;
        unsigned long threshold;
};

/* For threshold */
struct mem_cgroup_threshold_ary {
        /* An array index points to threshold just below or equal to usage. */
        int current_threshold;
        /* Size of entries[] */
        unsigned int size;
        /* Array of thresholds */
        struct mem_cgroup_threshold entries[] __counted_by(size);
};

struct mem_cgroup_thresholds {
        /* Primary thresholds array */
        struct mem_cgroup_threshold_ary *primary;
        /*
         * Spare threshold array.
         * This is needed to make mem_cgroup_unregister_event() "never fail".
         * It must be able to store at least primary->size - 1 entries.
         */
        struct mem_cgroup_threshold_ary *spare;
};

/*
 * Remember four most recent foreign writebacks with dirty pages in this
 * cgroup.  Inode sharing is expected to be uncommon and, even if we miss
 * one in a given round, we're likely to catch it later if it keeps
 * foreign-dirtying, so a fairly low count should be enough.
 *
 * See mem_cgroup_track_foreign_dirty_slowpath() for details.
 */
#define MEMCG_CGWB_FRN_CNT        4

struct memcg_cgwb_frn {
        u64 bdi_id;                        /* bdi->id of the foreign inode */
        int memcg_id;                        /* memcg->css.id of foreign inode */
        u64 at;                                /* jiffies_64 at the time of dirtying */
        struct wb_completion done;        /* tracks in-flight foreign writebacks */
};

/*
 * Bucket for arbitrarily byte-sized objects charged to a memory
 * cgroup. The bucket can be reparented in one piece when the cgroup
 * is destroyed, without having to round up the individual references
 * of all live memory objects in the wild.
 */
struct obj_cgroup {
        struct percpu_ref refcnt;
        struct mem_cgroup *memcg;
        atomic_t nr_charged_bytes;
        union {
                struct list_head list; /* protected by objcg_lock */
                struct rcu_head rcu;
        };
};

/*
 * The memory controller data structure. The memory controller controls both
 * page cache and RSS per cgroup. We would eventually like to provide
 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
 * to help the administrator determine what knobs to tune.
 */
struct mem_cgroup {
        struct cgroup_subsys_state css;

        /* Private memcg ID. Used to ID objects that outlive the cgroup */
        struct mem_cgroup_id id;

        /* Accounted resources */
        struct page_counter memory;                /* Both v1 & v2 */

        union {
                struct page_counter swap;        /* v2 only */
                struct page_counter memsw;        /* v1 only */
        };

        /* Legacy consumer-oriented counters */
        struct page_counter kmem;                /* v1 only */
        struct page_counter tcpmem;                /* v1 only */

        /* Range enforcement for interrupt charges */
        struct work_struct high_work;

#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
        unsigned long zswap_max;

        /*
         * Prevent pages from this memcg from being written back from zswap to
         * swap, and from being swapped out on zswap store failures.
         */
        bool zswap_writeback;
#endif

        unsigned long soft_limit;

        /* vmpressure notifications */
        struct vmpressure vmpressure;

        /*
         * Should the OOM killer kill all belonging tasks, had it kill one?
         */
        bool oom_group;

        /* protected by memcg_oom_lock */
        bool                oom_lock;
        int                under_oom;

        int        swappiness;
        /* OOM-Killer disable */
        int                oom_kill_disable;

        /* memory.events and memory.events.local */
        struct cgroup_file events_file;
        struct cgroup_file events_local_file;

        /* handle for "memory.swap.events" */
        struct cgroup_file swap_events_file;

        /* protect arrays of thresholds */
        struct mutex thresholds_lock;

        /* thresholds for memory usage. RCU-protected */
        struct mem_cgroup_thresholds thresholds;

        /* thresholds for mem+swap usage. RCU-protected */
        struct mem_cgroup_thresholds memsw_thresholds;

        /* For oom notifier event fd */
        struct list_head oom_notify;

        /*
         * Should we move charges of a task when a task is moved into this
         * mem_cgroup ? And what type of charges should we move ?
         */
        unsigned long move_charge_at_immigrate;
        /* taken only while moving_account > 0 */
        spinlock_t                move_lock;
        unsigned long                move_lock_flags;

        CACHELINE_PADDING(_pad1_);

        /* memory.stat */
        struct memcg_vmstats        *vmstats;

        /* memory.events */
        atomic_long_t                memory_events[MEMCG_NR_MEMORY_EVENTS];
        atomic_long_t                memory_events_local[MEMCG_NR_MEMORY_EVENTS];

        /*
         * Hint of reclaim pressure for socket memroy management. Note
         * that this indicator should NOT be used in legacy cgroup mode
         * where socket memory is accounted/charged separately.
         */
        unsigned long                socket_pressure;

        /* Legacy tcp memory accounting */
        bool                        tcpmem_active;
        int                        tcpmem_pressure;

#ifdef CONFIG_MEMCG_KMEM
        int kmemcg_id;
        /*
         * memcg->objcg is wiped out as a part of the objcg repaprenting
         * process. memcg->orig_objcg preserves a pointer (and a reference)
         * to the original objcg until the end of live of memcg.
         */
        struct obj_cgroup __rcu        *objcg;
        struct obj_cgroup        *orig_objcg;
        /* list of inherited objcgs, protected by objcg_lock */
        struct list_head objcg_list;
#endif

        CACHELINE_PADDING(_pad2_);

        /*
         * set > 0 if pages under this cgroup are moving to other cgroup.
         */
        atomic_t                moving_account;
        struct task_struct        *move_lock_task;

        struct memcg_vmstats_percpu __percpu *vmstats_percpu;

#ifdef CONFIG_CGROUP_WRITEBACK
        struct list_head cgwb_list;
        struct wb_domain cgwb_domain;
        struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT];
#endif

        /* List of events which userspace want to receive */
        struct list_head event_list;
        spinlock_t event_list_lock;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        struct deferred_split deferred_split_queue;
#endif

#ifdef CONFIG_LRU_GEN_WALKS_MMU
        /* per-memcg mm_struct list */
        struct lru_gen_mm_list mm_list;
#endif

        struct mem_cgroup_per_node *nodeinfo[];
};

/*
 * size of first charge trial.
 * TODO: maybe necessary to use big numbers in big irons or dynamic based of the
 * workload.
 */
#define MEMCG_CHARGE_BATCH 64U

extern struct mem_cgroup *root_mem_cgroup;

enum page_memcg_data_flags {
        /* page->memcg_data is a pointer to an objcgs vector */
        MEMCG_DATA_OBJCGS = (1UL << 0),
        /* page has been accounted as a non-slab kernel page */
        MEMCG_DATA_KMEM = (1UL << 1),
        /* the next bit after the last actual flag */
        __NR_MEMCG_DATA_FLAGS  = (1UL << 2),
};

#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)

static inline bool folio_memcg_kmem(struct folio *folio);

/*
 * After the initialization objcg->memcg is always pointing at
 * a valid memcg, but can be atomically swapped to the parent memcg.
 *
 * The caller must ensure that the returned memcg won't be released:
 * e.g. acquire the rcu_read_lock or css_set_lock.
 */
static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
{
        return READ_ONCE(objcg->memcg);
}

/*
 * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
 * @folio: Pointer to the folio.
 *
 * Returns a pointer to the memory cgroup associated with the folio,
 * or NULL. This function assumes that the folio is known to have a
 * proper memory cgroup pointer. It's not safe to call this function
 * against some type of folios, e.g. slab folios or ex-slab folios or
 * kmem folios.
 */
static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
{
        unsigned long memcg_data = folio->memcg_data;

        VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
        VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
        VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);

        return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}

/*
 * __folio_objcg - get the object cgroup associated with a kmem folio.
 * @folio: Pointer to the folio.
 *
 * Returns a pointer to the object cgroup associated with the folio,
 * or NULL. This function assumes that the folio is known to have a
 * proper object cgroup pointer. It's not safe to call this function
 * against some type of folios, e.g. slab folios or ex-slab folios or
 * LRU folios.
 */
static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
{
        unsigned long memcg_data = folio->memcg_data;

        VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
        VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
        VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);

        return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}

/*
 * folio_memcg - Get the memory cgroup associated with a folio.
 * @folio: Pointer to the folio.
 *
 * Returns a pointer to the memory cgroup associated with the folio,
 * or NULL. This function assumes that the folio is known to have a
 * proper memory cgroup pointer. It's not safe to call this function
 * against some type of folios, e.g. slab folios or ex-slab folios.
 *
 * For a non-kmem folio any of the following ensures folio and memcg binding
 * stability:
 *
 * - the folio lock
 * - LRU isolation
 * - folio_memcg_lock()
 * - exclusive reference
 * - mem_cgroup_trylock_pages()
 *
 * For a kmem folio a caller should hold an rcu read lock to protect memcg
 * associated with a kmem folio from being released.
 */
static inline struct mem_cgroup *folio_memcg(struct folio *folio)
{
        if (folio_memcg_kmem(folio))
                return obj_cgroup_memcg(__folio_objcg(folio));
        return __folio_memcg(folio);
}

static inline struct mem_cgroup *page_memcg(struct page *page)
{
        return folio_memcg(page_folio(page));
}

/**
 * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
 * @folio: Pointer to the folio.
 *
 * This function assumes that the folio is known to have a
 * proper memory cgroup pointer. It's not safe to call this function
 * against some type of folios, e.g. slab folios or ex-slab folios.
 *
 * Return: A pointer to the memory cgroup associated with the folio,
 * or NULL.
 */
static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
        unsigned long memcg_data = READ_ONCE(folio->memcg_data);

        VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
        WARN_ON_ONCE(!rcu_read_lock_held());

        if (memcg_data & MEMCG_DATA_KMEM) {
                struct obj_cgroup *objcg;

                objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
                return obj_cgroup_memcg(objcg);
        }

        return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}

/*
 * folio_memcg_check - Get the memory cgroup associated with a folio.
 * @folio: Pointer to the folio.
 *
 * Returns a pointer to the memory cgroup associated with the folio,
 * or NULL. This function unlike folio_memcg() can take any folio
 * as an argument. It has to be used in cases when it's not known if a folio
 * has an associated memory cgroup pointer or an object cgroups vector or
 * an object cgroup.
 *
 * For a non-kmem folio any of the following ensures folio and memcg binding
 * stability:
 *
 * - the folio lock
 * - LRU isolation
 * - lock_folio_memcg()
 * - exclusive reference
 * - mem_cgroup_trylock_pages()
 *
 * For a kmem folio a caller should hold an rcu read lock to protect memcg
 * associated with a kmem folio from being released.
 */
static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
{
        /*
         * Because folio->memcg_data might be changed asynchronously
         * for slabs, READ_ONCE() should be used here.
         */
        unsigned long memcg_data = READ_ONCE(folio->memcg_data);

        if (memcg_data & MEMCG_DATA_OBJCGS)
                return NULL;

        if (memcg_data & MEMCG_DATA_KMEM) {
                struct obj_cgroup *objcg;

                objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
                return obj_cgroup_memcg(objcg);
        }

        return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}

static inline struct mem_cgroup *page_memcg_check(struct page *page)
{
        if (PageTail(page))
                return NULL;
        return folio_memcg_check((struct folio *)page);
}

static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
{
        struct mem_cgroup *memcg;

        rcu_read_lock();
retry:
        memcg = obj_cgroup_memcg(objcg);
        if (unlikely(!css_tryget(&memcg->css)))
                goto retry;
        rcu_read_unlock();

        return memcg;
}

#ifdef CONFIG_MEMCG_KMEM
/*
 * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
 * @folio: Pointer to the folio.
 *
 * Checks if the folio has MemcgKmem flag set. The caller must ensure
 * that the folio has an associated memory cgroup. It's not safe to call
 * this function against some types of folios, e.g. slab folios.
 */
static inline bool folio_memcg_kmem(struct folio *folio)
{
        VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
        VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
        return folio->memcg_data & MEMCG_DATA_KMEM;
}


#else
static inline bool folio_memcg_kmem(struct folio *folio)
{
        return false;
}

#endif

static inline bool PageMemcgKmem(struct page *page)
{
        return folio_memcg_kmem(page_folio(page));
}

static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
        return (memcg == root_mem_cgroup);
}

static inline bool mem_cgroup_disabled(void)
{
        return !cgroup_subsys_enabled(memory_cgrp_subsys);
}

static inline void mem_cgroup_protection(struct mem_cgroup *root,
                                         struct mem_cgroup *memcg,
                                         unsigned long *min,
                                         unsigned long *low)
{
        *min = *low = 0;

        if (mem_cgroup_disabled())
                return;

        /*
         * There is no reclaim protection applied to a targeted reclaim.
         * We are special casing this specific case here because
         * mem_cgroup_calculate_protection is not robust enough to keep
         * the protection invariant for calculated effective values for
         * parallel reclaimers with different reclaim target. This is
         * especially a problem for tail memcgs (as they have pages on LRU)
         * which would want to have effective values 0 for targeted reclaim
         * but a different value for external reclaim.
         *
         * Example
         * Let's have global and A's reclaim in parallel:
         *  |
         *  A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G)
         *  |\
         *  | C (low = 1G, usage = 2.5G)
         *  B (low = 1G, usage = 0.5G)
         *
         * For the global reclaim
         * A.elow = A.low
         * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow
         * C.elow = min(C.usage, C.low)
         *
         * With the effective values resetting we have A reclaim
         * A.elow = 0
         * B.elow = B.low
         * C.elow = C.low
         *
         * If the global reclaim races with A's reclaim then
         * B.elow = C.elow = 0 because children_low_usage > A.elow)
         * is possible and reclaiming B would be violating the protection.
         *
         */
        if (root == memcg)
                return;

        *min = READ_ONCE(memcg->memory.emin);
        *low = READ_ONCE(memcg->memory.elow);
}

void mem_cgroup_calculate_protection(struct mem_cgroup *root,
                                     struct mem_cgroup *memcg);

static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
                                          struct mem_cgroup *memcg)
{
        /*
         * The root memcg doesn't account charges, and doesn't support
         * protection. The target memcg's protection is ignored, see
         * mem_cgroup_calculate_protection() and mem_cgroup_protection()
         */
        return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) ||
                memcg == target;
}

static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
                                        struct mem_cgroup *memcg)
{
        if (mem_cgroup_unprotected(target, memcg))
                return false;

        return READ_ONCE(memcg->memory.elow) >=
                page_counter_read(&memcg->memory);
}

static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
                                        struct mem_cgroup *memcg)
{
        if (mem_cgroup_unprotected(target, memcg))
                return false;

        return READ_ONCE(memcg->memory.emin) >=
                page_counter_read(&memcg->memory);
}

void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);

int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);

/**
 * mem_cgroup_charge - Charge a newly allocated folio to a cgroup.
 * @folio: Folio to charge.
 * @mm: mm context of the allocating task.
 * @gfp: Reclaim mode.
 *
 * Try to charge @folio to the memcg that @mm belongs to, reclaiming
 * pages according to @gfp if necessary.  If @mm is NULL, try to
 * charge to the active memcg.
 *
 * Do not use this for folios allocated for swapin.
 *
 * Return: 0 on success. Otherwise, an error code is returned.
 */
static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
                                    gfp_t gfp)
{
        if (mem_cgroup_disabled())
                return 0;
        return __mem_cgroup_charge(folio, mm, gfp);
}

int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
                long nr_pages);

int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
                                  gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);

void __mem_cgroup_uncharge(struct folio *folio);

/**
 * mem_cgroup_uncharge - Uncharge a folio.
 * @folio: Folio to uncharge.
 *
 * Uncharge a folio previously charged with mem_cgroup_charge().
 */
static inline void mem_cgroup_uncharge(struct folio *folio)
{
        if (mem_cgroup_disabled())
                return;
        __mem_cgroup_uncharge(folio);
}

void __mem_cgroup_uncharge_folios(struct folio_batch *folios);
static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
{
        if (mem_cgroup_disabled())
                return;
        __mem_cgroup_uncharge_folios(folios);
}

void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
void mem_cgroup_migrate(struct folio *old, struct folio *new);

/**
 * mem_cgroup_lruvec - get the lru list vector for a memcg & node
 * @memcg: memcg of the wanted lruvec
 * @pgdat: pglist_data
 *
 * Returns the lru list vector holding pages for a given @memcg &
 * @pgdat combination. This can be the node lruvec, if the memory
 * controller is disabled.
 */
static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
                                               struct pglist_data *pgdat)
{
        struct mem_cgroup_per_node *mz;
        struct lruvec *lruvec;

        if (mem_cgroup_disabled()) {
                lruvec = &pgdat->__lruvec;
                goto out;
        }

        if (!memcg)
                memcg = root_mem_cgroup;

        mz = memcg->nodeinfo[pgdat->node_id];
        lruvec = &mz->lruvec;
out:
        /*
         * Since a node can be onlined after the mem_cgroup was created,
         * we have to be prepared to initialize lruvec->pgdat here;
         * and if offlined then reonlined, we need to reinitialize it.
         */
        if (unlikely(lruvec->pgdat != pgdat))
                lruvec->pgdat = pgdat;
        return lruvec;
}

/**
 * folio_lruvec - return lruvec for isolating/putting an LRU folio
 * @folio: Pointer to the folio.
 *
 * This function relies on folio->mem_cgroup being stable.
 */
static inline struct lruvec *folio_lruvec(struct folio *folio)
{
        struct mem_cgroup *memcg = folio_memcg(folio);

        VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio);
        return mem_cgroup_lruvec(memcg, folio_pgdat(folio));
}

struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);

struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);

struct mem_cgroup *get_mem_cgroup_from_current(void);

struct lruvec *folio_lruvec_lock(struct folio *folio);
struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
                                                unsigned long *flags);

#ifdef CONFIG_DEBUG_VM
void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
#else
static inline
void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
#endif

static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
        return css ? container_of(css, struct mem_cgroup, css) : NULL;
}

static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
{
        return percpu_ref_tryget(&objcg->refcnt);
}

static inline void obj_cgroup_get(struct obj_cgroup *objcg)
{
        percpu_ref_get(&objcg->refcnt);
}

static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
                                       unsigned long nr)
{
        percpu_ref_get_many(&objcg->refcnt, nr);
}

static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
        percpu_ref_put(&objcg->refcnt);
}

static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
{
        return !memcg || css_tryget(&memcg->css);
}

static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg)
{
        return !memcg || css_tryget_online(&memcg->css);
}

static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
        if (memcg)
                css_put(&memcg->css);
}

#define mem_cgroup_from_counter(counter, member)        \
        container_of(counter, struct mem_cgroup, member)

struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
                                   struct mem_cgroup *,
                                   struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
                           int (*)(struct task_struct *, void *), void *arg);

static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
        if (mem_cgroup_disabled())
                return 0;

        return memcg->id.id;
}
struct mem_cgroup *mem_cgroup_from_id(unsigned short id);

#ifdef CONFIG_SHRINKER_DEBUG
static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
{
        return memcg ? cgroup_ino(memcg->css.cgroup) : 0;
}

struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino);
#endif

static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
        return mem_cgroup_from_css(seq_css(m));
}

static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
{
        struct mem_cgroup_per_node *mz;

        if (mem_cgroup_disabled())
                return NULL;

        mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
        return mz->memcg;
}

/**
 * parent_mem_cgroup - find the accounting parent of a memcg
 * @memcg: memcg whose parent to find
 *
 * Returns the parent memcg, or NULL if this is the root.
 */
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
        return mem_cgroup_from_css(memcg->css.parent);
}

static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
                              struct mem_cgroup *root)
{
        if (root == memcg)
                return true;
        return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
}

static inline bool mm_match_cgroup(struct mm_struct *mm,
                                   struct mem_cgroup *memcg)
{
        struct mem_cgroup *task_memcg;
        bool match = false;

        rcu_read_lock();
        task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
        if (task_memcg)
                match = mem_cgroup_is_descendant(task_memcg, memcg);
        rcu_read_unlock();
        return match;
}

struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
ino_t page_cgroup_ino(struct page *page);

static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
        if (mem_cgroup_disabled())
                return true;
        return !!(memcg->css.flags & CSS_ONLINE);
}

void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
                int zid, int nr_pages);

static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
                enum lru_list lru, int zone_idx)
{
        struct mem_cgroup_per_node *mz;

        mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
        return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
}

void mem_cgroup_handle_over_high(gfp_t gfp_mask);

unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);

unsigned long mem_cgroup_size(struct mem_cgroup *memcg);

void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
                                struct task_struct *p);

void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);

static inline void mem_cgroup_enter_user_fault(void)
{
        WARN_ON(current->in_user_fault);
        current->in_user_fault = 1;
}

static inline void mem_cgroup_exit_user_fault(void)
{
        WARN_ON(!current->in_user_fault);
        current->in_user_fault = 0;
}

static inline bool task_in_memcg_oom(struct task_struct *p)
{
        return p->memcg_in_oom;
}

bool mem_cgroup_oom_synchronize(bool wait);
struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
                                            struct mem_cgroup *oom_domain);
void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);

void folio_memcg_lock(struct folio *folio);
void folio_memcg_unlock(struct folio *folio);

void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);

/* try to stablize folio_memcg() for all the pages in a memcg */
static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
{
        rcu_read_lock();

        if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
                return true;

        rcu_read_unlock();
        return false;
}

static inline void mem_cgroup_unlock_pages(void)
{
        rcu_read_unlock();
}

/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
                                   int idx, int val)
{
        unsigned long flags;

        local_irq_save(flags);
        __mod_memcg_state(memcg, idx, val);
        local_irq_restore(flags);
}

static inline void mod_memcg_page_state(struct page *page,
                                        int idx, int val)
{
        struct mem_cgroup *memcg;

        if (mem_cgroup_disabled())
                return;

        rcu_read_lock();
        memcg = page_memcg(page);
        if (memcg)
                mod_memcg_state(memcg, idx, val);
        rcu_read_unlock();
}

unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);

static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
{
        struct mem_cgroup_per_node *pn;
        long x;

        if (mem_cgroup_disabled())
                return node_page_state(lruvec_pgdat(lruvec), idx);

        pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
        x = READ_ONCE(pn->lruvec_stats.state[idx]);
#ifdef CONFIG_SMP
        if (x < 0)
                x = 0;
#endif
        return x;
}

static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
                                                    enum node_stat_item idx)
{
        struct mem_cgroup_per_node *pn;
        long x = 0;

        if (mem_cgroup_disabled())
                return node_page_state(lruvec_pgdat(lruvec), idx);

        pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
        x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
#ifdef CONFIG_SMP
        if (x < 0)
                x = 0;
#endif
        return x;
}

void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);

void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                              int val);
void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);

static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
                                         int val)
{
        unsigned long flags;

        local_irq_save(flags);
        __mod_lruvec_kmem_state(p, idx, val);
        local_irq_restore(flags);
}

static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
                                          enum node_stat_item idx, int val)
{
        unsigned long flags;

        local_irq_save(flags);
        __mod_memcg_lruvec_state(lruvec, idx, val);
        local_irq_restore(flags);
}

void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
                          unsigned long count);

static inline void count_memcg_events(struct mem_cgroup *memcg,
                                      enum vm_event_item idx,
                                      unsigned long count)
{
        unsigned long flags;

        local_irq_save(flags);
        __count_memcg_events(memcg, idx, count);
        local_irq_restore(flags);
}

static inline void count_memcg_folio_events(struct folio *folio,
                enum vm_event_item idx, unsigned long nr)
{
        struct mem_cgroup *memcg = folio_memcg(folio);

        if (memcg)
                count_memcg_events(memcg, idx, nr);
}

static inline void count_memcg_event_mm(struct mm_struct *mm,
                                        enum vm_event_item idx)
{
        struct mem_cgroup *memcg;

        if (mem_cgroup_disabled())
                return;

        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
        if (likely(memcg))
                count_memcg_events(memcg, idx, 1);
        rcu_read_unlock();
}

static inline void memcg_memory_event(struct mem_cgroup *memcg,
                                      enum memcg_memory_event event)
{
        bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX ||
                          event == MEMCG_SWAP_FAIL;

        atomic_long_inc(&memcg->memory_events_local[event]);
        if (!swap_event)
                cgroup_file_notify(&memcg->events_local_file);

        do {
                atomic_long_inc(&memcg->memory_events[event]);
                if (swap_event)
                        cgroup_file_notify(&memcg->swap_events_file);
                else
                        cgroup_file_notify(&memcg->events_file);

                if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                        break;
                if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
                        break;
        } while ((memcg = parent_mem_cgroup(memcg)) &&
                 !mem_cgroup_is_root(memcg));
}

static inline void memcg_memory_event_mm(struct mm_struct *mm,
                                         enum memcg_memory_event event)
{
        struct mem_cgroup *memcg;

        if (mem_cgroup_disabled())
                return;

        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
        if (likely(memcg))
                memcg_memory_event(memcg, event);
        rcu_read_unlock();
}

void split_page_memcg(struct page *head, int old_order, int new_order);

unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                                gfp_t gfp_mask,
                                                unsigned long *total_scanned);

#else /* CONFIG_MEMCG */

#define MEM_CGROUP_ID_SHIFT        0

static inline struct mem_cgroup *folio_memcg(struct folio *folio)
{
        return NULL;
}

static inline struct mem_cgroup *page_memcg(struct page *page)
{
        return NULL;
}

static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
        WARN_ON_ONCE(!rcu_read_lock_held());
        return NULL;
}

static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
{
        return NULL;
}

static inline struct mem_cgroup *page_memcg_check(struct page *page)
{
        return NULL;
}

static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
{
        return NULL;
}

static inline bool folio_memcg_kmem(struct folio *folio)
{
        return false;
}

static inline bool PageMemcgKmem(struct page *page)
{
        return false;
}

static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
        return true;
}

static inline bool mem_cgroup_disabled(void)
{
        return true;
}

static inline void memcg_memory_event(struct mem_cgroup *memcg,
                                      enum memcg_memory_event event)
{
}

static inline void memcg_memory_event_mm(struct mm_struct *mm,
                                         enum memcg_memory_event event)
{
}

static inline void mem_cgroup_protection(struct mem_cgroup *root,
                                         struct mem_cgroup *memcg,
                                         unsigned long *min,
                                         unsigned long *low)
{
        *min = *low = 0;
}

static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
                                                   struct mem_cgroup *memcg)
{
}

static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
                                          struct mem_cgroup *memcg)
{
        return true;
}
static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
                                        struct mem_cgroup *memcg)
{
        return false;
}

static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
                                        struct mem_cgroup *memcg)
{
        return false;
}

static inline void mem_cgroup_commit_charge(struct folio *folio,
                struct mem_cgroup *memcg)
{
}

static inline int mem_cgroup_charge(struct folio *folio,
                struct mm_struct *mm, gfp_t gfp)
{
        return 0;
}

static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg,
                gfp_t gfp, long nr_pages)
{
        return 0;
}

static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
                        struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
        return 0;
}

static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
{
}

static inline void mem_cgroup_uncharge(struct folio *folio)
{
}

static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
{
}

static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
                unsigned int nr_pages)
{
}

static inline void mem_cgroup_replace_folio(struct folio *old,
                struct folio *new)
{
}

static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
{
}

static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
                                               struct pglist_data *pgdat)
{
        return &pgdat->__lruvec;
}

static inline struct lruvec *folio_lruvec(struct folio *folio)
{
        struct pglist_data *pgdat = folio_pgdat(folio);
        return &pgdat->__lruvec;
}

static inline
void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}

static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
        return NULL;
}

static inline bool mm_match_cgroup(struct mm_struct *mm,
                struct mem_cgroup *memcg)
{
        return true;
}

static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
        return NULL;
}

static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
{
        return NULL;
}

static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
        return NULL;
}

static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
}

static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
{
        return true;
}

static inline bool mem_cgroup_tryget_online(struct mem_cgroup *memcg)
{
        return true;
}

static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}

static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
{
        struct pglist_data *pgdat = folio_pgdat(folio);

        spin_lock(&pgdat->__lruvec.lru_lock);
        return &pgdat->__lruvec;
}

static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
        struct pglist_data *pgdat = folio_pgdat(folio);

        spin_lock_irq(&pgdat->__lruvec.lru_lock);
        return &pgdat->__lruvec;
}

static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
                unsigned long *flagsp)
{
        struct pglist_data *pgdat = folio_pgdat(folio);

        spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
        return &pgdat->__lruvec;
}

static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
                struct mem_cgroup *prev,
                struct mem_cgroup_reclaim_cookie *reclaim)
{
        return NULL;
}

static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
                                         struct mem_cgroup *prev)
{
}

static inline void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
                int (*fn)(struct task_struct *, void *), void *arg)
{
}

static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
        return 0;
}

static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
{
        WARN_ON_ONCE(id);
        /* XXX: This should always return root_mem_cgroup */
        return NULL;
}

#ifdef CONFIG_SHRINKER_DEBUG
static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
{
        return 0;
}

static inline struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
{
        return NULL;
}
#endif

static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
        return NULL;
}

static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
{
        return NULL;
}

static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
        return true;
}

static inline
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
                enum lru_list lru, int zone_idx)
{
        return 0;
}

static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
{
        return 0;
}

static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
{
        return 0;
}

static inline void
mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p)
{
}

static inline void
mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
{
}

static inline void folio_memcg_lock(struct folio *folio)
{
}

static inline void folio_memcg_unlock(struct folio *folio)
{
}

static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
{
        /* to match folio_memcg_rcu() */
        rcu_read_lock();
        return true;
}

static inline void mem_cgroup_unlock_pages(void)
{
        rcu_read_unlock();
}

static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
}

static inline void mem_cgroup_enter_user_fault(void)
{
}

static inline void mem_cgroup_exit_user_fault(void)
{
}

static inline bool task_in_memcg_oom(struct task_struct *p)
{
        return false;
}

static inline bool mem_cgroup_oom_synchronize(bool wait)
{
        return false;
}

static inline struct mem_cgroup *mem_cgroup_get_oom_group(
        struct task_struct *victim, struct mem_cgroup *oom_domain)
{
        return NULL;
}

static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
{
}

static inline void __mod_memcg_state(struct mem_cgroup *memcg,
                                     int idx,
                                     int nr)
{
}

static inline void mod_memcg_state(struct mem_cgroup *memcg,
                                   int idx,
                                   int nr)
{
}

static inline void mod_memcg_page_state(struct page *page,
                                        int idx, int val)
{
}

static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
        return 0;
}

static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
{
        return node_page_state(lruvec_pgdat(lruvec), idx);
}

static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
                                                    enum node_stat_item idx)
{
        return node_page_state(lruvec_pgdat(lruvec), idx);
}

static inline void mem_cgroup_flush_stats(struct mem_cgroup *memcg)
{
}

static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
{
}

static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
                                            enum node_stat_item idx, int val)
{
}

static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
                                           int val)
{
        struct page *page = virt_to_head_page(p);

        __mod_node_page_state(page_pgdat(page), idx, val);
}

static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
                                         int val)
{
        struct page *page = virt_to_head_page(p);

        mod_node_page_state(page_pgdat(page), idx, val);
}

static inline void count_memcg_events(struct mem_cgroup *memcg,
                                      enum vm_event_item idx,
                                      unsigned long count)
{
}

static inline void __count_memcg_events(struct mem_cgroup *memcg,
                                        enum vm_event_item idx,
                                        unsigned long count)
{
}

static inline void count_memcg_folio_events(struct folio *folio,
                enum vm_event_item idx, unsigned long nr)
{
}

static inline
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}

static inline void split_page_memcg(struct page *head, int old_order, int new_order)
{
}

static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                            gfp_t gfp_mask,
                                            unsigned long *total_scanned)
{
        return 0;
}
#endif /* CONFIG_MEMCG */

static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
        __mod_lruvec_kmem_state(p, idx, 1);
}

static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
        __mod_lruvec_kmem_state(p, idx, -1);
}

static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
{
        struct mem_cgroup *memcg;

        memcg = lruvec_memcg(lruvec);
        if (!memcg)
                return NULL;
        memcg = parent_mem_cgroup(memcg);
        if (!memcg)
                return NULL;
        return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
}

static inline void unlock_page_lruvec(struct lruvec *lruvec)
{
        spin_unlock(&lruvec->lru_lock);
}

static inline void unlock_page_lruvec_irq(struct lruvec *lruvec)
{
        spin_unlock_irq(&lruvec->lru_lock);
}

static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
                unsigned long flags)
{
        spin_unlock_irqrestore(&lruvec->lru_lock, flags);
}

/* Test requires a stable page->memcg binding, see page_memcg() */
static inline bool folio_matches_lruvec(struct folio *folio,
                struct lruvec *lruvec)
{
        return lruvec_pgdat(lruvec) == folio_pgdat(folio) &&
               lruvec_memcg(lruvec) == folio_memcg(folio);
}

/* Don't lock again iff page's lruvec locked */
static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
                struct lruvec *locked_lruvec)
{
        if (locked_lruvec) {
                if (folio_matches_lruvec(folio, locked_lruvec))
                        return locked_lruvec;

                unlock_page_lruvec_irq(locked_lruvec);
        }

        return folio_lruvec_lock_irq(folio);
}

/* Don't lock again iff folio's lruvec locked */
static inline void folio_lruvec_relock_irqsave(struct folio *folio,
                struct lruvec **lruvecp, unsigned long *flags)
{
        if (*lruvecp) {
                if (folio_matches_lruvec(folio, *lruvecp))
                        return;

                unlock_page_lruvec_irqrestore(*lruvecp, *flags);
        }

        *lruvecp = folio_lruvec_lock_irqsave(folio, flags);
}

#ifdef CONFIG_CGROUP_WRITEBACK

struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
                         unsigned long *pheadroom, unsigned long *pdirty,
                         unsigned long *pwriteback);

void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
                                             struct bdi_writeback *wb);

static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
                                                  struct bdi_writeback *wb)
{
        struct mem_cgroup *memcg;

        if (mem_cgroup_disabled())
                return;

        memcg = folio_memcg(folio);
        if (unlikely(memcg && &memcg->css != wb->memcg_css))
                mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}

void mem_cgroup_flush_foreign(struct bdi_writeback *wb);

#else        /* CONFIG_CGROUP_WRITEBACK */

static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
{
        return NULL;
}

static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
                                       unsigned long *pfilepages,
                                       unsigned long *pheadroom,
                                       unsigned long *pdirty,
                                       unsigned long *pwriteback)
{
}

static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
                                                  struct bdi_writeback *wb)
{
}

static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
{
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

struct sock;
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
                             gfp_t gfp_mask);
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#ifdef CONFIG_MEMCG
extern struct static_key_false memcg_sockets_enabled_key;
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                return !!memcg->tcpmem_pressure;
        do {
                if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
                        return true;
        } while ((memcg = parent_mem_cgroup(memcg)));
        return false;
}

int alloc_shrinker_info(struct mem_cgroup *memcg);
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
static inline void mem_cgroup_sk_free(struct sock *sk) { };
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
        return false;
}

static inline void set_shrinker_bit(struct mem_cgroup *memcg,
                                    int nid, int shrinker_id)
{
}
#endif

#ifdef CONFIG_MEMCG_KMEM
bool mem_cgroup_kmem_disabled(void);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);

/*
 * The returned objcg pointer is safe to use without additional
 * protection within a scope. The scope is defined either by
 * the current task (similar to the "current" global variable)
 * or by set_active_memcg() pair.
 * Please, use obj_cgroup_get() to get a reference if the pointer
 * needs to be used outside of the local scope.
 */
struct obj_cgroup *current_obj_cgroup(void);
struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio);

static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
        struct obj_cgroup *objcg = current_obj_cgroup();

        if (objcg)
                obj_cgroup_get(objcg);

        return objcg;
}

int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);

extern struct static_key_false memcg_bpf_enabled_key;
static inline bool memcg_bpf_enabled(void)
{
        return static_branch_likely(&memcg_bpf_enabled_key);
}

extern struct static_key_false memcg_kmem_online_key;

static inline bool memcg_kmem_online(void)
{
        return static_branch_likely(&memcg_kmem_online_key);
}

static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
                                         int order)
{
        if (memcg_kmem_online())
                return __memcg_kmem_charge_page(page, gfp, order);
        return 0;
}

static inline void memcg_kmem_uncharge_page(struct page *page, int order)
{
        if (memcg_kmem_online())
                __memcg_kmem_uncharge_page(page, order);
}

/*
 * A helper for accessing memcg's kmem_id, used for getting
 * corresponding LRU lists.
 */
static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
        return memcg ? memcg->kmemcg_id : -1;
}

struct mem_cgroup *mem_cgroup_from_obj(void *p);
struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);

static inline void count_objcg_event(struct obj_cgroup *objcg,
                                     enum vm_event_item idx)
{
        struct mem_cgroup *memcg;

        if (!memcg_kmem_online())
                return;

        rcu_read_lock();
        memcg = obj_cgroup_memcg(objcg);
        count_memcg_events(memcg, idx, 1);
        rcu_read_unlock();
}

#else
static inline bool mem_cgroup_kmem_disabled(void)
{
        return true;
}

static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
                                         int order)
{
        return 0;
}

static inline void memcg_kmem_uncharge_page(struct page *page, int order)
{
}

static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp,
                                           int order)
{
        return 0;
}

static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}

static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
{
        return NULL;
}

static inline bool memcg_bpf_enabled(void)
{
        return false;
}

static inline bool memcg_kmem_online(void)
{
        return false;
}

static inline int memcg_kmem_id(struct mem_cgroup *memcg)
{
        return -1;
}

static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
        return NULL;
}

static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
{
        return NULL;
}

static inline void count_objcg_event(struct obj_cgroup *objcg,
                                     enum vm_event_item idx)
{
}

#endif /* CONFIG_MEMCG_KMEM */

#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg);
#else
static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
{
        return true;
}
static inline void obj_cgroup_charge_zswap(struct obj_cgroup *objcg,
                                           size_t size)
{
}
static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg,
                                             size_t size)
{
}
static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
        /* if zswap is disabled, do not block pages going to the swapping device */
        return true;
}
#endif

#endif /* _LINUX_MEMCONTROL_H */


















   17 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PATH_H
#define _LINUX_PATH_H

struct dentry;
struct vfsmount;

struct path {
        struct vfsmount *mnt;
        struct dentry *dentry;
} __randomize_layout;

extern void path_get(const struct path *);
extern void path_put(const struct path *);

static inline int path_equal(const struct path *path1, const struct path *path2)
{
        return path1->mnt == path2->mnt && path1->dentry == path2->dentry;
}

static inline void path_put_init(struct path *path)
{
        path_put(path);
        *path = (struct path) { };
}

#endif  /* _LINUX_PATH_H */































   94 







   93 



   89 

   90 



   91 








































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// SPDX-License-Identifier: GPL-2.0-only
/*
 * fs/kernfs/symlink.c - kernfs symlink implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 */

#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/namei.h>

#include "kernfs-internal.h"

/**
 * kernfs_create_link - create a symlink
 * @parent: directory to create the symlink in
 * @name: name of the symlink
 * @target: target node for the symlink to point to
 *
 * Return: the created node on success, ERR_PTR() value on error.
 * Ownership of the link matches ownership of the target.
 */
struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
                                       const char *name,
                                       struct kernfs_node *target)
{
        struct kernfs_node *kn;
        int error;
        kuid_t uid = GLOBAL_ROOT_UID;
        kgid_t gid = GLOBAL_ROOT_GID;

        if (target->iattr) {
                uid = target->iattr->ia_uid;
                gid = target->iattr->ia_gid;
        }

        kn = kernfs_new_node(parent, name, S_IFLNK|0777, uid, gid, KERNFS_LINK);
        if (!kn)
                return ERR_PTR(-ENOMEM);

        if (kernfs_ns_enabled(parent))
                kn->ns = target->ns;
        kn->symlink.target_kn = target;
        kernfs_get(target);        /* ref owned by symlink */

        error = kernfs_add_one(kn);
        if (!error)
                return kn;

        kernfs_put(kn);
        return ERR_PTR(error);
}

static int kernfs_get_target_path(struct kernfs_node *parent,
                                  struct kernfs_node *target, char *path)
{
        struct kernfs_node *base, *kn;
        char *s = path;
        int len = 0;

        /* go up to the root, stop at the base */
        base = parent;
        while (base->parent) {
                kn = target->parent;
                while (kn->parent && base != kn)
                        kn = kn->parent;

                if (base == kn)
                        break;

                if ((s - path) + 3 >= PATH_MAX)
                        return -ENAMETOOLONG;

                strcpy(s, "../");
                s += 3;
                base = base->parent;
        }

        /* determine end of target string for reverse fillup */
        kn = target;
        while (kn->parent && kn != base) {
                len += strlen(kn->name) + 1;
                kn = kn->parent;
        }

        /* check limits */
        if (len < 2)
                return -EINVAL;
        len--;
        if ((s - path) + len >= PATH_MAX)
                return -ENAMETOOLONG;

        /* reverse fillup of target string from target to base */
        kn = target;
        while (kn->parent && kn != base) {
                int slen = strlen(kn->name);

                len -= slen;
                memcpy(s + len, kn->name, slen);
                if (len)
                        s[--len] = '/';

                kn = kn->parent;
        }

        return 0;
}

static int kernfs_getlink(struct inode *inode, char *path)
{
        struct kernfs_node *kn = inode->i_private;
        struct kernfs_node *parent = kn->parent;
        struct kernfs_node *target = kn->symlink.target_kn;
        struct kernfs_root *root = kernfs_root(parent);
        int error;

        down_read(&root->kernfs_rwsem);
        error = kernfs_get_target_path(parent, target, path);
        up_read(&root->kernfs_rwsem);

        return error;
}

static const char *kernfs_iop_get_link(struct dentry *dentry,
                                       struct inode *inode,
                                       struct delayed_call *done)
{
        char *body;
        int error;

        if (!dentry)
                return ERR_PTR(-ECHILD);
        body = kzalloc(PAGE_SIZE, GFP_KERNEL);
        if (!body)
                return ERR_PTR(-ENOMEM);
        error = kernfs_getlink(inode, body);
        if (unlikely(error < 0)) {
                kfree(body);
                return ERR_PTR(error);
        }
        set_delayed_call(done, kfree_link, body);
        return body;
}

const struct inode_operations kernfs_symlink_iops = {
        .listxattr        = kernfs_iop_listxattr,
        .get_link        = kernfs_iop_get_link,
        .setattr        = kernfs_iop_setattr,
        .getattr        = kernfs_iop_getattr,
        .permission        = kernfs_iop_permission,
};






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
// SPDX-License-Identifier: GPL-2.0-or-later
//
// core.c  --  Voltage/Current Regulator framework.
//
// Copyright 2007, 2008 Wolfson Microelectronics PLC.
// Copyright 2008 SlimLogic Ltd.
//
// Author: Liam Girdwood <lrg@slimlogic.co.uk>

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/async.h>
#include <linux/err.h>
#include <linux/mutex.h>
#include <linux/suspend.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
#include <linux/of.h>
#include <linux/reboot.h>
#include <linux/regmap.h>
#include <linux/regulator/of_regulator.h>
#include <linux/regulator/consumer.h>
#include <linux/regulator/coupler.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
#include <linux/module.h>

#define CREATE_TRACE_POINTS
#include <trace/events/regulator.h>

#include "dummy.h"
#include "internal.h"
#include "regnl.h"

static DEFINE_WW_CLASS(regulator_ww_class);
static DEFINE_MUTEX(regulator_nesting_mutex);
static DEFINE_MUTEX(regulator_list_mutex);
static LIST_HEAD(regulator_map_list);
static LIST_HEAD(regulator_ena_gpio_list);
static LIST_HEAD(regulator_supply_alias_list);
static LIST_HEAD(regulator_coupler_list);
static bool has_full_constraints;

static struct dentry *debugfs_root;

/*
 * struct regulator_map
 *
 * Used to provide symbolic supply names to devices.
 */
struct regulator_map {
        struct list_head list;
        const char *dev_name;   /* The dev_name() for the consumer */
        const char *supply;
        struct regulator_dev *regulator;
};

/*
 * struct regulator_enable_gpio
 *
 * Management for shared enable GPIO pin
 */
struct regulator_enable_gpio {
        struct list_head list;
        struct gpio_desc *gpiod;
        u32 enable_count;        /* a number of enabled shared GPIO */
        u32 request_count;        /* a number of requested shared GPIO */
};

/*
 * struct regulator_supply_alias
 *
 * Used to map lookups for a supply onto an alternative device.
 */
struct regulator_supply_alias {
        struct list_head list;
        struct device *src_dev;
        const char *src_supply;
        struct device *alias_dev;
        const char *alias_supply;
};

static int _regulator_is_enabled(struct regulator_dev *rdev);
static int _regulator_disable(struct regulator *regulator);
static int _regulator_get_error_flags(struct regulator_dev *rdev, unsigned int *flags);
static int _regulator_get_current_limit(struct regulator_dev *rdev);
static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
static int _notifier_call_chain(struct regulator_dev *rdev,
                                  unsigned long event, void *data);
static int _regulator_do_set_voltage(struct regulator_dev *rdev,
                                     int min_uV, int max_uV);
static int regulator_balance_voltage(struct regulator_dev *rdev,
                                     suspend_state_t state);
static struct regulator *create_regulator(struct regulator_dev *rdev,
                                          struct device *dev,
                                          const char *supply_name);
static void destroy_regulator(struct regulator *regulator);
static void _regulator_put(struct regulator *regulator);

const char *rdev_get_name(struct regulator_dev *rdev)
{
        if (rdev->constraints && rdev->constraints->name)
                return rdev->constraints->name;
        else if (rdev->desc->name)
                return rdev->desc->name;
        else
                return "";
}
EXPORT_SYMBOL_GPL(rdev_get_name);

static bool have_full_constraints(void)
{
        return has_full_constraints || of_have_populated_dt();
}

static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
{
        if (!rdev->constraints) {
                rdev_err(rdev, "no constraints\n");
                return false;
        }

        if (rdev->constraints->valid_ops_mask & ops)
                return true;

        return false;
}

/**
 * regulator_lock_nested - lock a single regulator
 * @rdev:                regulator source
 * @ww_ctx:                w/w mutex acquire context
 *
 * This function can be called many times by one task on
 * a single regulator and its mutex will be locked only
 * once. If a task, which is calling this function is other
 * than the one, which initially locked the mutex, it will
 * wait on mutex.
 */
static inline int regulator_lock_nested(struct regulator_dev *rdev,
                                        struct ww_acquire_ctx *ww_ctx)
{
        bool lock = false;
        int ret = 0;

        mutex_lock(&regulator_nesting_mutex);

        if (!ww_mutex_trylock(&rdev->mutex, ww_ctx)) {
                if (rdev->mutex_owner == current)
                        rdev->ref_cnt++;
                else
                        lock = true;

                if (lock) {
                        mutex_unlock(&regulator_nesting_mutex);
                        ret = ww_mutex_lock(&rdev->mutex, ww_ctx);
                        mutex_lock(&regulator_nesting_mutex);
                }
        } else {
                lock = true;
        }

        if (lock && ret != -EDEADLK) {
                rdev->ref_cnt++;
                rdev->mutex_owner = current;
        }

        mutex_unlock(&regulator_nesting_mutex);

        return ret;
}

/**
 * regulator_lock - lock a single regulator
 * @rdev:                regulator source
 *
 * This function can be called many times by one task on
 * a single regulator and its mutex will be locked only
 * once. If a task, which is calling this function is other
 * than the one, which initially locked the mutex, it will
 * wait on mutex.
 */
static void regulator_lock(struct regulator_dev *rdev)
{
        regulator_lock_nested(rdev, NULL);
}

/**
 * regulator_unlock - unlock a single regulator
 * @rdev:                regulator_source
 *
 * This function unlocks the mutex when the
 * reference counter reaches 0.
 */
static void regulator_unlock(struct regulator_dev *rdev)
{
        mutex_lock(&regulator_nesting_mutex);

        if (--rdev->ref_cnt == 0) {
                rdev->mutex_owner = NULL;
                ww_mutex_unlock(&rdev->mutex);
        }

        WARN_ON_ONCE(rdev->ref_cnt < 0);

        mutex_unlock(&regulator_nesting_mutex);
}

/**
 * regulator_lock_two - lock two regulators
 * @rdev1:                first regulator
 * @rdev2:                second regulator
 * @ww_ctx:                w/w mutex acquire context
 *
 * Locks both rdevs using the regulator_ww_class.
 */
static void regulator_lock_two(struct regulator_dev *rdev1,
                               struct regulator_dev *rdev2,
                               struct ww_acquire_ctx *ww_ctx)
{
        struct regulator_dev *held, *contended;
        int ret;

        ww_acquire_init(ww_ctx, &regulator_ww_class);

        /* Try to just grab both of them */
        ret = regulator_lock_nested(rdev1, ww_ctx);
        WARN_ON(ret);
        ret = regulator_lock_nested(rdev2, ww_ctx);
        if (ret != -EDEADLOCK) {
                WARN_ON(ret);
                goto exit;
        }

        held = rdev1;
        contended = rdev2;
        while (true) {
                regulator_unlock(held);

                ww_mutex_lock_slow(&contended->mutex, ww_ctx);
                contended->ref_cnt++;
                contended->mutex_owner = current;
                swap(held, contended);
                ret = regulator_lock_nested(contended, ww_ctx);

                if (ret != -EDEADLOCK) {
                        WARN_ON(ret);
                        break;
                }
        }

exit:
        ww_acquire_done(ww_ctx);
}

/**
 * regulator_unlock_two - unlock two regulators
 * @rdev1:                first regulator
 * @rdev2:                second regulator
 * @ww_ctx:                w/w mutex acquire context
 *
 * The inverse of regulator_lock_two().
 */

static void regulator_unlock_two(struct regulator_dev *rdev1,
                                 struct regulator_dev *rdev2,
                                 struct ww_acquire_ctx *ww_ctx)
{
        regulator_unlock(rdev2);
        regulator_unlock(rdev1);
        ww_acquire_fini(ww_ctx);
}

static bool regulator_supply_is_couple(struct regulator_dev *rdev)
{
        struct regulator_dev *c_rdev;
        int i;

        for (i = 1; i < rdev->coupling_desc.n_coupled; i++) {
                c_rdev = rdev->coupling_desc.coupled_rdevs[i];

                if (rdev->supply->rdev == c_rdev)
                        return true;
        }

        return false;
}

static void regulator_unlock_recursive(struct regulator_dev *rdev,
                                       unsigned int n_coupled)
{
        struct regulator_dev *c_rdev, *supply_rdev;
        int i, supply_n_coupled;

        for (i = n_coupled; i > 0; i--) {
                c_rdev = rdev->coupling_desc.coupled_rdevs[i - 1];

                if (!c_rdev)
                        continue;

                if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) {
                        supply_rdev = c_rdev->supply->rdev;
                        supply_n_coupled = supply_rdev->coupling_desc.n_coupled;

                        regulator_unlock_recursive(supply_rdev,
                                                   supply_n_coupled);
                }

                regulator_unlock(c_rdev);
        }
}

static int regulator_lock_recursive(struct regulator_dev *rdev,
                                    struct regulator_dev **new_contended_rdev,
                                    struct regulator_dev **old_contended_rdev,
                                    struct ww_acquire_ctx *ww_ctx)
{
        struct regulator_dev *c_rdev;
        int i, err;

        for (i = 0; i < rdev->coupling_desc.n_coupled; i++) {
                c_rdev = rdev->coupling_desc.coupled_rdevs[i];

                if (!c_rdev)
                        continue;

                if (c_rdev != *old_contended_rdev) {
                        err = regulator_lock_nested(c_rdev, ww_ctx);
                        if (err) {
                                if (err == -EDEADLK) {
                                        *new_contended_rdev = c_rdev;
                                        goto err_unlock;
                                }

                                /* shouldn't happen */
                                WARN_ON_ONCE(err != -EALREADY);
                        }
                } else {
                        *old_contended_rdev = NULL;
                }

                if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) {
                        err = regulator_lock_recursive(c_rdev->supply->rdev,
                                                       new_contended_rdev,
                                                       old_contended_rdev,
                                                       ww_ctx);
                        if (err) {
                                regulator_unlock(c_rdev);
                                goto err_unlock;
                        }
                }
        }

        return 0;

err_unlock:
        regulator_unlock_recursive(rdev, i);

        return err;
}

/**
 * regulator_unlock_dependent - unlock regulator's suppliers and coupled
 *                                regulators
 * @rdev:                        regulator source
 * @ww_ctx:                        w/w mutex acquire context
 *
 * Unlock all regulators related with rdev by coupling or supplying.
 */
static void regulator_unlock_dependent(struct regulator_dev *rdev,
                                       struct ww_acquire_ctx *ww_ctx)
{
        regulator_unlock_recursive(rdev, rdev->coupling_desc.n_coupled);
        ww_acquire_fini(ww_ctx);
}

/**
 * regulator_lock_dependent - lock regulator's suppliers and coupled regulators
 * @rdev:                        regulator source
 * @ww_ctx:                        w/w mutex acquire context
 *
 * This function as a wrapper on regulator_lock_recursive(), which locks
 * all regulators related with rdev by coupling or supplying.
 */
static void regulator_lock_dependent(struct regulator_dev *rdev,
                                     struct ww_acquire_ctx *ww_ctx)
{
        struct regulator_dev *new_contended_rdev = NULL;
        struct regulator_dev *old_contended_rdev = NULL;
        int err;

        mutex_lock(&regulator_list_mutex);

        ww_acquire_init(ww_ctx, &regulator_ww_class);

        do {
                if (new_contended_rdev) {
                        ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx);
                        old_contended_rdev = new_contended_rdev;
                        old_contended_rdev->ref_cnt++;
                        old_contended_rdev->mutex_owner = current;
                }

                err = regulator_lock_recursive(rdev,
                                               &new_contended_rdev,
                                               &old_contended_rdev,
                                               ww_ctx);

                if (old_contended_rdev)
                        regulator_unlock(old_contended_rdev);

        } while (err == -EDEADLK);

        ww_acquire_done(ww_ctx);

        mutex_unlock(&regulator_list_mutex);
}

/**
 * of_get_child_regulator - get a child regulator device node
 * based on supply name
 * @parent: Parent device node
 * @prop_name: Combination regulator supply name and "-supply"
 *
 * Traverse all child nodes.
 * Extract the child regulator device node corresponding to the supply name.
 * returns the device node corresponding to the regulator if found, else
 * returns NULL.
 */
static struct device_node *of_get_child_regulator(struct device_node *parent,
                                                  const char *prop_name)
{
        struct device_node *regnode = NULL;
        struct device_node *child = NULL;

        for_each_child_of_node(parent, child) {
                regnode = of_parse_phandle(child, prop_name, 0);

                if (!regnode) {
                        regnode = of_get_child_regulator(child, prop_name);
                        if (regnode)
                                goto err_node_put;
                } else {
                        goto err_node_put;
                }
        }
        return NULL;

err_node_put:
        of_node_put(child);
        return regnode;
}

/**
 * of_get_regulator - get a regulator device node based on supply name
 * @dev: Device pointer for the consumer (of regulator) device
 * @supply: regulator supply name
 *
 * Extract the regulator device node corresponding to the supply name.
 * returns the device node corresponding to the regulator if found, else
 * returns NULL.
 */
static struct device_node *of_get_regulator(struct device *dev, const char *supply)
{
        struct device_node *regnode = NULL;
        char prop_name[64]; /* 64 is max size of property name */

        dev_dbg(dev, "Looking up %s-supply from device tree\n", supply);

        snprintf(prop_name, 64, "%s-supply", supply);
        regnode = of_parse_phandle(dev->of_node, prop_name, 0);

        if (!regnode) {
                regnode = of_get_child_regulator(dev->of_node, prop_name);
                if (regnode)
                        return regnode;

                dev_dbg(dev, "Looking up %s property in node %pOF failed\n",
                                prop_name, dev->of_node);
                return NULL;
        }
        return regnode;
}

/* Platform voltage constraint check */
int regulator_check_voltage(struct regulator_dev *rdev,
                            int *min_uV, int *max_uV)
{
        BUG_ON(*min_uV > *max_uV);

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
                rdev_err(rdev, "voltage operation not allowed\n");
                return -EPERM;
        }

        if (*max_uV > rdev->constraints->max_uV)
                *max_uV = rdev->constraints->max_uV;
        if (*min_uV < rdev->constraints->min_uV)
                *min_uV = rdev->constraints->min_uV;

        if (*min_uV > *max_uV) {
                rdev_err(rdev, "unsupportable voltage range: %d-%duV\n",
                         *min_uV, *max_uV);
                return -EINVAL;
        }

        return 0;
}

/* return 0 if the state is valid */
static int regulator_check_states(suspend_state_t state)
{
        return (state > PM_SUSPEND_MAX || state == PM_SUSPEND_TO_IDLE);
}

/* Make sure we select a voltage that suits the needs of all
 * regulator consumers
 */
int regulator_check_consumers(struct regulator_dev *rdev,
                              int *min_uV, int *max_uV,
                              suspend_state_t state)
{
        struct regulator *regulator;
        struct regulator_voltage *voltage;

        list_for_each_entry(regulator, &rdev->consumer_list, list) {
                voltage = &regulator->voltage[state];
                /*
                 * Assume consumers that didn't say anything are OK
                 * with anything in the constraint range.
                 */
                if (!voltage->min_uV && !voltage->max_uV)
                        continue;

                if (*max_uV > voltage->max_uV)
                        *max_uV = voltage->max_uV;
                if (*min_uV < voltage->min_uV)
                        *min_uV = voltage->min_uV;
        }

        if (*min_uV > *max_uV) {
                rdev_err(rdev, "Restricting voltage, %u-%uuV\n",
                        *min_uV, *max_uV);
                return -EINVAL;
        }

        return 0;
}

/* current constraint check */
static int regulator_check_current_limit(struct regulator_dev *rdev,
                                        int *min_uA, int *max_uA)
{
        BUG_ON(*min_uA > *max_uA);

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_CURRENT)) {
                rdev_err(rdev, "current operation not allowed\n");
                return -EPERM;
        }

        if (*max_uA > rdev->constraints->max_uA)
                *max_uA = rdev->constraints->max_uA;
        if (*min_uA < rdev->constraints->min_uA)
                *min_uA = rdev->constraints->min_uA;

        if (*min_uA > *max_uA) {
                rdev_err(rdev, "unsupportable current range: %d-%duA\n",
                         *min_uA, *max_uA);
                return -EINVAL;
        }

        return 0;
}

/* operating mode constraint check */
static int regulator_mode_constrain(struct regulator_dev *rdev,
                                    unsigned int *mode)
{
        switch (*mode) {
        case REGULATOR_MODE_FAST:
        case REGULATOR_MODE_NORMAL:
        case REGULATOR_MODE_IDLE:
        case REGULATOR_MODE_STANDBY:
                break;
        default:
                rdev_err(rdev, "invalid mode %x specified\n", *mode);
                return -EINVAL;
        }

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_MODE)) {
                rdev_err(rdev, "mode operation not allowed\n");
                return -EPERM;
        }

        /* The modes are bitmasks, the most power hungry modes having
         * the lowest values. If the requested mode isn't supported
         * try higher modes.
         */
        while (*mode) {
                if (rdev->constraints->valid_modes_mask & *mode)
                        return 0;
                *mode /= 2;
        }

        return -EINVAL;
}

static inline struct regulator_state *
regulator_get_suspend_state(struct regulator_dev *rdev, suspend_state_t state)
{
        if (rdev->constraints == NULL)
                return NULL;

        switch (state) {
        case PM_SUSPEND_STANDBY:
                return &rdev->constraints->state_standby;
        case PM_SUSPEND_MEM:
                return &rdev->constraints->state_mem;
        case PM_SUSPEND_MAX:
                return &rdev->constraints->state_disk;
        default:
                return NULL;
        }
}

static const struct regulator_state *
regulator_get_suspend_state_check(struct regulator_dev *rdev, suspend_state_t state)
{
        const struct regulator_state *rstate;

        rstate = regulator_get_suspend_state(rdev, state);
        if (rstate == NULL)
                return NULL;

        /* If we have no suspend mode configuration don't set anything;
         * only warn if the driver implements set_suspend_voltage or
         * set_suspend_mode callback.
         */
        if (rstate->enabled != ENABLE_IN_SUSPEND &&
            rstate->enabled != DISABLE_IN_SUSPEND) {
                if (rdev->desc->ops->set_suspend_voltage ||
                    rdev->desc->ops->set_suspend_mode)
                        rdev_warn(rdev, "No configuration\n");
                return NULL;
        }

        return rstate;
}

static ssize_t microvolts_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        int uV;

        regulator_lock(rdev);
        uV = regulator_get_voltage_rdev(rdev);
        regulator_unlock(rdev);

        if (uV < 0)
                return uV;
        return sprintf(buf, "%d\n", uV);
}
static DEVICE_ATTR_RO(microvolts);

static ssize_t microamps_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev));
}
static DEVICE_ATTR_RO(microamps);

static ssize_t name_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return sprintf(buf, "%s\n", rdev_get_name(rdev));
}
static DEVICE_ATTR_RO(name);

static const char *regulator_opmode_to_str(int mode)
{
        switch (mode) {
        case REGULATOR_MODE_FAST:
                return "fast";
        case REGULATOR_MODE_NORMAL:
                return "normal";
        case REGULATOR_MODE_IDLE:
                return "idle";
        case REGULATOR_MODE_STANDBY:
                return "standby";
        }
        return "unknown";
}

static ssize_t regulator_print_opmode(char *buf, int mode)
{
        return sprintf(buf, "%s\n", regulator_opmode_to_str(mode));
}

static ssize_t opmode_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_opmode(buf, _regulator_get_mode(rdev));
}
static DEVICE_ATTR_RO(opmode);

static ssize_t regulator_print_state(char *buf, int state)
{
        if (state > 0)
                return sprintf(buf, "enabled\n");
        else if (state == 0)
                return sprintf(buf, "disabled\n");
        else
                return sprintf(buf, "unknown\n");
}

static ssize_t state_show(struct device *dev,
                          struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        ssize_t ret;

        regulator_lock(rdev);
        ret = regulator_print_state(buf, _regulator_is_enabled(rdev));
        regulator_unlock(rdev);

        return ret;
}
static DEVICE_ATTR_RO(state);

static ssize_t status_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        int status;
        char *label;

        status = rdev->desc->ops->get_status(rdev);
        if (status < 0)
                return status;

        switch (status) {
        case REGULATOR_STATUS_OFF:
                label = "off";
                break;
        case REGULATOR_STATUS_ON:
                label = "on";
                break;
        case REGULATOR_STATUS_ERROR:
                label = "error";
                break;
        case REGULATOR_STATUS_FAST:
                label = "fast";
                break;
        case REGULATOR_STATUS_NORMAL:
                label = "normal";
                break;
        case REGULATOR_STATUS_IDLE:
                label = "idle";
                break;
        case REGULATOR_STATUS_STANDBY:
                label = "standby";
                break;
        case REGULATOR_STATUS_BYPASS:
                label = "bypass";
                break;
        case REGULATOR_STATUS_UNDEFINED:
                label = "undefined";
                break;
        default:
                return -ERANGE;
        }

        return sprintf(buf, "%s\n", label);
}
static DEVICE_ATTR_RO(status);

static ssize_t min_microamps_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        if (!rdev->constraints)
                return sprintf(buf, "constraint not defined\n");

        return sprintf(buf, "%d\n", rdev->constraints->min_uA);
}
static DEVICE_ATTR_RO(min_microamps);

static ssize_t max_microamps_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        if (!rdev->constraints)
                return sprintf(buf, "constraint not defined\n");

        return sprintf(buf, "%d\n", rdev->constraints->max_uA);
}
static DEVICE_ATTR_RO(max_microamps);

static ssize_t min_microvolts_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        if (!rdev->constraints)
                return sprintf(buf, "constraint not defined\n");

        return sprintf(buf, "%d\n", rdev->constraints->min_uV);
}
static DEVICE_ATTR_RO(min_microvolts);

static ssize_t max_microvolts_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        if (!rdev->constraints)
                return sprintf(buf, "constraint not defined\n");

        return sprintf(buf, "%d\n", rdev->constraints->max_uV);
}
static DEVICE_ATTR_RO(max_microvolts);

static ssize_t requested_microamps_show(struct device *dev,
                                        struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        struct regulator *regulator;
        int uA = 0;

        regulator_lock(rdev);
        list_for_each_entry(regulator, &rdev->consumer_list, list) {
                if (regulator->enable_count)
                        uA += regulator->uA_load;
        }
        regulator_unlock(rdev);
        return sprintf(buf, "%d\n", uA);
}
static DEVICE_ATTR_RO(requested_microamps);

static ssize_t num_users_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        return sprintf(buf, "%d\n", rdev->use_count);
}
static DEVICE_ATTR_RO(num_users);

static ssize_t type_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        switch (rdev->desc->type) {
        case REGULATOR_VOLTAGE:
                return sprintf(buf, "voltage\n");
        case REGULATOR_CURRENT:
                return sprintf(buf, "current\n");
        }
        return sprintf(buf, "unknown\n");
}
static DEVICE_ATTR_RO(type);

static ssize_t suspend_mem_microvolts_show(struct device *dev,
                                           struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV);
}
static DEVICE_ATTR_RO(suspend_mem_microvolts);

static ssize_t suspend_disk_microvolts_show(struct device *dev,
                                            struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV);
}
static DEVICE_ATTR_RO(suspend_disk_microvolts);

static ssize_t suspend_standby_microvolts_show(struct device *dev,
                                               struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV);
}
static DEVICE_ATTR_RO(suspend_standby_microvolts);

static ssize_t suspend_mem_mode_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_opmode(buf,
                rdev->constraints->state_mem.mode);
}
static DEVICE_ATTR_RO(suspend_mem_mode);

static ssize_t suspend_disk_mode_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_opmode(buf,
                rdev->constraints->state_disk.mode);
}
static DEVICE_ATTR_RO(suspend_disk_mode);

static ssize_t suspend_standby_mode_show(struct device *dev,
                                         struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_opmode(buf,
                rdev->constraints->state_standby.mode);
}
static DEVICE_ATTR_RO(suspend_standby_mode);

static ssize_t suspend_mem_state_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_state(buf,
                        rdev->constraints->state_mem.enabled);
}
static DEVICE_ATTR_RO(suspend_mem_state);

static ssize_t suspend_disk_state_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_state(buf,
                        rdev->constraints->state_disk.enabled);
}
static DEVICE_ATTR_RO(suspend_disk_state);

static ssize_t suspend_standby_state_show(struct device *dev,
                                          struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        return regulator_print_state(buf,
                        rdev->constraints->state_standby.enabled);
}
static DEVICE_ATTR_RO(suspend_standby_state);

static ssize_t bypass_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);
        const char *report;
        bool bypass;
        int ret;

        ret = rdev->desc->ops->get_bypass(rdev, &bypass);

        if (ret != 0)
                report = "unknown";
        else if (bypass)
                report = "enabled";
        else
                report = "disabled";

        return sprintf(buf, "%s\n", report);
}
static DEVICE_ATTR_RO(bypass);

#define REGULATOR_ERROR_ATTR(name, bit)                                                        \
        static ssize_t name##_show(struct device *dev, struct device_attribute *attr,        \
                                   char *buf)                                                \
        {                                                                                \
                int ret;                                                                \
                unsigned int flags;                                                        \
                struct regulator_dev *rdev = dev_get_drvdata(dev);                        \
                ret = _regulator_get_error_flags(rdev, &flags);                                \
                if (ret)                                                                \
                        return ret;                                                        \
                return sysfs_emit(buf, "%d\n", !!(flags & (bit)));                        \
        }                                                                                \
        static DEVICE_ATTR_RO(name)

REGULATOR_ERROR_ATTR(under_voltage, REGULATOR_ERROR_UNDER_VOLTAGE);
REGULATOR_ERROR_ATTR(over_current, REGULATOR_ERROR_OVER_CURRENT);
REGULATOR_ERROR_ATTR(regulation_out, REGULATOR_ERROR_REGULATION_OUT);
REGULATOR_ERROR_ATTR(fail, REGULATOR_ERROR_FAIL);
REGULATOR_ERROR_ATTR(over_temp, REGULATOR_ERROR_OVER_TEMP);
REGULATOR_ERROR_ATTR(under_voltage_warn, REGULATOR_ERROR_UNDER_VOLTAGE_WARN);
REGULATOR_ERROR_ATTR(over_current_warn, REGULATOR_ERROR_OVER_CURRENT_WARN);
REGULATOR_ERROR_ATTR(over_voltage_warn, REGULATOR_ERROR_OVER_VOLTAGE_WARN);
REGULATOR_ERROR_ATTR(over_temp_warn, REGULATOR_ERROR_OVER_TEMP_WARN);

/* Calculate the new optimum regulator operating mode based on the new total
 * consumer load. All locks held by caller
 */
static int drms_uA_update(struct regulator_dev *rdev)
{
        struct regulator *sibling;
        int current_uA = 0, output_uV, input_uV, err;
        unsigned int mode;

        /*
         * first check to see if we can set modes at all, otherwise just
         * tell the consumer everything is OK.
         */
        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS)) {
                rdev_dbg(rdev, "DRMS operation not allowed\n");
                return 0;
        }

        if (!rdev->desc->ops->get_optimum_mode &&
            !rdev->desc->ops->set_load)
                return 0;

        if (!rdev->desc->ops->set_mode &&
            !rdev->desc->ops->set_load)
                return -EINVAL;

        /* calc total requested load */
        list_for_each_entry(sibling, &rdev->consumer_list, list) {
                if (sibling->enable_count)
                        current_uA += sibling->uA_load;
        }

        current_uA += rdev->constraints->system_load;

        if (rdev->desc->ops->set_load) {
                /* set the optimum mode for our new total regulator load */
                err = rdev->desc->ops->set_load(rdev, current_uA);
                if (err < 0)
                        rdev_err(rdev, "failed to set load %d: %pe\n",
                                 current_uA, ERR_PTR(err));
        } else {
                /*
                 * Unfortunately in some cases the constraints->valid_ops has
                 * REGULATOR_CHANGE_DRMS but there are no valid modes listed.
                 * That's not really legit but we won't consider it a fatal
                 * error here. We'll treat it as if REGULATOR_CHANGE_DRMS
                 * wasn't set.
                 */
                if (!rdev->constraints->valid_modes_mask) {
                        rdev_dbg(rdev, "Can change modes; but no valid mode\n");
                        return 0;
                }

                /* get output voltage */
                output_uV = regulator_get_voltage_rdev(rdev);

                /*
                 * Don't return an error; if regulator driver cares about
                 * output_uV then it's up to the driver to validate.
                 */
                if (output_uV <= 0)
                        rdev_dbg(rdev, "invalid output voltage found\n");

                /* get input voltage */
                input_uV = 0;
                if (rdev->supply)
                        input_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
                if (input_uV <= 0)
                        input_uV = rdev->constraints->input_uV;

                /*
                 * Don't return an error; if regulator driver cares about
                 * input_uV then it's up to the driver to validate.
                 */
                if (input_uV <= 0)
                        rdev_dbg(rdev, "invalid input voltage found\n");

                /* now get the optimum mode for our new total regulator load */
                mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
                                                         output_uV, current_uA);

                /* check the new mode is allowed */
                err = regulator_mode_constrain(rdev, &mode);
                if (err < 0) {
                        rdev_err(rdev, "failed to get optimum mode @ %d uA %d -> %d uV: %pe\n",
                                 current_uA, input_uV, output_uV, ERR_PTR(err));
                        return err;
                }

                err = rdev->desc->ops->set_mode(rdev, mode);
                if (err < 0)
                        rdev_err(rdev, "failed to set optimum mode %x: %pe\n",
                                 mode, ERR_PTR(err));
        }

        return err;
}

static int __suspend_set_state(struct regulator_dev *rdev,
                               const struct regulator_state *rstate)
{
        int ret = 0;

        if (rstate->enabled == ENABLE_IN_SUSPEND &&
                rdev->desc->ops->set_suspend_enable)
                ret = rdev->desc->ops->set_suspend_enable(rdev);
        else if (rstate->enabled == DISABLE_IN_SUSPEND &&
                rdev->desc->ops->set_suspend_disable)
                ret = rdev->desc->ops->set_suspend_disable(rdev);
        else /* OK if set_suspend_enable or set_suspend_disable is NULL */
                ret = 0;

        if (ret < 0) {
                rdev_err(rdev, "failed to enabled/disable: %pe\n", ERR_PTR(ret));
                return ret;
        }

        if (rdev->desc->ops->set_suspend_voltage && rstate->uV > 0) {
                ret = rdev->desc->ops->set_suspend_voltage(rdev, rstate->uV);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set voltage: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        if (rdev->desc->ops->set_suspend_mode && rstate->mode > 0) {
                ret = rdev->desc->ops->set_suspend_mode(rdev, rstate->mode);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set mode: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        return ret;
}

static int suspend_set_initial_state(struct regulator_dev *rdev)
{
        const struct regulator_state *rstate;

        rstate = regulator_get_suspend_state_check(rdev,
                        rdev->constraints->initial_state);
        if (!rstate)
                return 0;

        return __suspend_set_state(rdev, rstate);
}

#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
static void print_constraints_debug(struct regulator_dev *rdev)
{
        struct regulation_constraints *constraints = rdev->constraints;
        char buf[160] = "";
        size_t len = sizeof(buf) - 1;
        int count = 0;
        int ret;

        if (constraints->min_uV && constraints->max_uV) {
                if (constraints->min_uV == constraints->max_uV)
                        count += scnprintf(buf + count, len - count, "%d mV ",
                                           constraints->min_uV / 1000);
                else
                        count += scnprintf(buf + count, len - count,
                                           "%d <--> %d mV ",
                                           constraints->min_uV / 1000,
                                           constraints->max_uV / 1000);
        }

        if (!constraints->min_uV ||
            constraints->min_uV != constraints->max_uV) {
                ret = regulator_get_voltage_rdev(rdev);
                if (ret > 0)
                        count += scnprintf(buf + count, len - count,
                                           "at %d mV ", ret / 1000);
        }

        if (constraints->uV_offset)
                count += scnprintf(buf + count, len - count, "%dmV offset ",
                                   constraints->uV_offset / 1000);

        if (constraints->min_uA && constraints->max_uA) {
                if (constraints->min_uA == constraints->max_uA)
                        count += scnprintf(buf + count, len - count, "%d mA ",
                                           constraints->min_uA / 1000);
                else
                        count += scnprintf(buf + count, len - count,
                                           "%d <--> %d mA ",
                                           constraints->min_uA / 1000,
                                           constraints->max_uA / 1000);
        }

        if (!constraints->min_uA ||
            constraints->min_uA != constraints->max_uA) {
                ret = _regulator_get_current_limit(rdev);
                if (ret > 0)
                        count += scnprintf(buf + count, len - count,
                                           "at %d mA ", ret / 1000);
        }

        if (constraints->valid_modes_mask & REGULATOR_MODE_FAST)
                count += scnprintf(buf + count, len - count, "fast ");
        if (constraints->valid_modes_mask & REGULATOR_MODE_NORMAL)
                count += scnprintf(buf + count, len - count, "normal ");
        if (constraints->valid_modes_mask & REGULATOR_MODE_IDLE)
                count += scnprintf(buf + count, len - count, "idle ");
        if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY)
                count += scnprintf(buf + count, len - count, "standby ");

        if (!count)
                count = scnprintf(buf, len, "no parameters");
        else
                --count;

        count += scnprintf(buf + count, len - count, ", %s",
                _regulator_is_enabled(rdev) ? "enabled" : "disabled");

        rdev_dbg(rdev, "%s\n", buf);
}
#else /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */
static inline void print_constraints_debug(struct regulator_dev *rdev) {}
#endif /* !DEBUG && !CONFIG_DYNAMIC_DEBUG */

static void print_constraints(struct regulator_dev *rdev)
{
        struct regulation_constraints *constraints = rdev->constraints;

        print_constraints_debug(rdev);

        if ((constraints->min_uV != constraints->max_uV) &&
            !regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE))
                rdev_warn(rdev,
                          "Voltage range but no REGULATOR_CHANGE_VOLTAGE\n");
}

static int machine_constraints_voltage(struct regulator_dev *rdev,
        struct regulation_constraints *constraints)
{
        const struct regulator_ops *ops = rdev->desc->ops;
        int ret;

        /* do we need to apply the constraint voltage */
        if (rdev->constraints->apply_uV &&
            rdev->constraints->min_uV && rdev->constraints->max_uV) {
                int target_min, target_max;
                int current_uV = regulator_get_voltage_rdev(rdev);

                if (current_uV == -ENOTRECOVERABLE) {
                        /* This regulator can't be read and must be initialized */
                        rdev_info(rdev, "Setting %d-%duV\n",
                                  rdev->constraints->min_uV,
                                  rdev->constraints->max_uV);
                        _regulator_do_set_voltage(rdev,
                                                  rdev->constraints->min_uV,
                                                  rdev->constraints->max_uV);
                        current_uV = regulator_get_voltage_rdev(rdev);
                }

                if (current_uV < 0) {
                        if (current_uV != -EPROBE_DEFER)
                                rdev_err(rdev,
                                         "failed to get the current voltage: %pe\n",
                                         ERR_PTR(current_uV));
                        return current_uV;
                }

                /*
                 * If we're below the minimum voltage move up to the
                 * minimum voltage, if we're above the maximum voltage
                 * then move down to the maximum.
                 */
                target_min = current_uV;
                target_max = current_uV;

                if (current_uV < rdev->constraints->min_uV) {
                        target_min = rdev->constraints->min_uV;
                        target_max = rdev->constraints->min_uV;
                }

                if (current_uV > rdev->constraints->max_uV) {
                        target_min = rdev->constraints->max_uV;
                        target_max = rdev->constraints->max_uV;
                }

                if (target_min != current_uV || target_max != current_uV) {
                        rdev_info(rdev, "Bringing %duV into %d-%duV\n",
                                  current_uV, target_min, target_max);
                        ret = _regulator_do_set_voltage(
                                rdev, target_min, target_max);
                        if (ret < 0) {
                                rdev_err(rdev,
                                        "failed to apply %d-%duV constraint: %pe\n",
                                        target_min, target_max, ERR_PTR(ret));
                                return ret;
                        }
                }
        }

        /* constrain machine-level voltage specs to fit
         * the actual range supported by this regulator.
         */
        if (ops->list_voltage && rdev->desc->n_voltages) {
                int        count = rdev->desc->n_voltages;
                int        i;
                int        min_uV = INT_MAX;
                int        max_uV = INT_MIN;
                int        cmin = constraints->min_uV;
                int        cmax = constraints->max_uV;

                /* it's safe to autoconfigure fixed-voltage supplies
                 * and the constraints are used by list_voltage.
                 */
                if (count == 1 && !cmin) {
                        cmin = 1;
                        cmax = INT_MAX;
                        constraints->min_uV = cmin;
                        constraints->max_uV = cmax;
                }

                /* voltage constraints are optional */
                if ((cmin == 0) && (cmax == 0))
                        return 0;

                /* else require explicit machine-level constraints */
                if (cmin <= 0 || cmax <= 0 || cmax < cmin) {
                        rdev_err(rdev, "invalid voltage constraints\n");
                        return -EINVAL;
                }

                /* no need to loop voltages if range is continuous */
                if (rdev->desc->continuous_voltage_range)
                        return 0;

                /* initial: [cmin..cmax] valid, [min_uV..max_uV] not */
                for (i = 0; i < count; i++) {
                        int        value;

                        value = ops->list_voltage(rdev, i);
                        if (value <= 0)
                                continue;

                        /* maybe adjust [min_uV..max_uV] */
                        if (value >= cmin && value < min_uV)
                                min_uV = value;
                        if (value <= cmax && value > max_uV)
                                max_uV = value;
                }

                /* final: [min_uV..max_uV] valid iff constraints valid */
                if (max_uV < min_uV) {
                        rdev_err(rdev,
                                 "unsupportable voltage constraints %u-%uuV\n",
                                 min_uV, max_uV);
                        return -EINVAL;
                }

                /* use regulator's subset of machine constraints */
                if (constraints->min_uV < min_uV) {
                        rdev_dbg(rdev, "override min_uV, %d -> %d\n",
                                 constraints->min_uV, min_uV);
                        constraints->min_uV = min_uV;
                }
                if (constraints->max_uV > max_uV) {
                        rdev_dbg(rdev, "override max_uV, %d -> %d\n",
                                 constraints->max_uV, max_uV);
                        constraints->max_uV = max_uV;
                }
        }

        return 0;
}

static int machine_constraints_current(struct regulator_dev *rdev,
        struct regulation_constraints *constraints)
{
        const struct regulator_ops *ops = rdev->desc->ops;
        int ret;

        if (!constraints->min_uA && !constraints->max_uA)
                return 0;

        if (constraints->min_uA > constraints->max_uA) {
                rdev_err(rdev, "Invalid current constraints\n");
                return -EINVAL;
        }

        if (!ops->set_current_limit || !ops->get_current_limit) {
                rdev_warn(rdev, "Operation of current configuration missing\n");
                return 0;
        }

        /* Set regulator current in constraints range */
        ret = ops->set_current_limit(rdev, constraints->min_uA,
                        constraints->max_uA);
        if (ret < 0) {
                rdev_err(rdev, "Failed to set current constraint, %d\n", ret);
                return ret;
        }

        return 0;
}

static int _regulator_do_enable(struct regulator_dev *rdev);

static int notif_set_limit(struct regulator_dev *rdev,
                           int (*set)(struct regulator_dev *, int, int, bool),
                           int limit, int severity)
{
        bool enable;

        if (limit == REGULATOR_NOTIF_LIMIT_DISABLE) {
                enable = false;
                limit = 0;
        } else {
                enable = true;
        }

        if (limit == REGULATOR_NOTIF_LIMIT_ENABLE)
                limit = 0;

        return set(rdev, limit, severity, enable);
}

static int handle_notify_limits(struct regulator_dev *rdev,
                        int (*set)(struct regulator_dev *, int, int, bool),
                        struct notification_limit *limits)
{
        int ret = 0;

        if (!set)
                return -EOPNOTSUPP;

        if (limits->prot)
                ret = notif_set_limit(rdev, set, limits->prot,
                                      REGULATOR_SEVERITY_PROT);
        if (ret)
                return ret;

        if (limits->err)
                ret = notif_set_limit(rdev, set, limits->err,
                                      REGULATOR_SEVERITY_ERR);
        if (ret)
                return ret;

        if (limits->warn)
                ret = notif_set_limit(rdev, set, limits->warn,
                                      REGULATOR_SEVERITY_WARN);

        return ret;
}
/**
 * set_machine_constraints - sets regulator constraints
 * @rdev: regulator source
 *
 * Allows platform initialisation code to define and constrain
 * regulator circuits e.g. valid voltage/current ranges, etc.  NOTE:
 * Constraints *must* be set by platform code in order for some
 * regulator operations to proceed i.e. set_voltage, set_current_limit,
 * set_mode.
 */
static int set_machine_constraints(struct regulator_dev *rdev)
{
        int ret = 0;
        const struct regulator_ops *ops = rdev->desc->ops;

        ret = machine_constraints_voltage(rdev, rdev->constraints);
        if (ret != 0)
                return ret;

        ret = machine_constraints_current(rdev, rdev->constraints);
        if (ret != 0)
                return ret;

        if (rdev->constraints->ilim_uA && ops->set_input_current_limit) {
                ret = ops->set_input_current_limit(rdev,
                                                   rdev->constraints->ilim_uA);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set input limit: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        /* do we need to setup our suspend state */
        if (rdev->constraints->initial_state) {
                ret = suspend_set_initial_state(rdev);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set suspend state: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        if (rdev->constraints->initial_mode) {
                if (!ops->set_mode) {
                        rdev_err(rdev, "no set_mode operation\n");
                        return -EINVAL;
                }

                ret = ops->set_mode(rdev, rdev->constraints->initial_mode);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set initial mode: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        } else if (rdev->constraints->system_load) {
                /*
                 * We'll only apply the initial system load if an
                 * initial mode wasn't specified.
                 */
                drms_uA_update(rdev);
        }

        if ((rdev->constraints->ramp_delay || rdev->constraints->ramp_disable)
                && ops->set_ramp_delay) {
                ret = ops->set_ramp_delay(rdev, rdev->constraints->ramp_delay);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set ramp_delay: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        if (rdev->constraints->pull_down && ops->set_pull_down) {
                ret = ops->set_pull_down(rdev);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set pull down: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        if (rdev->constraints->soft_start && ops->set_soft_start) {
                ret = ops->set_soft_start(rdev);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set soft start: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        /*
         * Existing logic does not warn if over_current_protection is given as
         * a constraint but driver does not support that. I think we should
         * warn about this type of issues as it is possible someone changes
         * PMIC on board to another type - and the another PMIC's driver does
         * not support setting protection. Board composer may happily believe
         * the DT limits are respected - especially if the new PMIC HW also
         * supports protection but the driver does not. I won't change the logic
         * without hearing more experienced opinion on this though.
         *
         * If warning is seen as a good idea then we can merge handling the
         * over-curret protection and detection and get rid of this special
         * handling.
         */
        if (rdev->constraints->over_current_protection
                && ops->set_over_current_protection) {
                int lim = rdev->constraints->over_curr_limits.prot;

                ret = ops->set_over_current_protection(rdev, lim,
                                                       REGULATOR_SEVERITY_PROT,
                                                       true);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set over current protection: %pe\n",
                                 ERR_PTR(ret));
                        return ret;
                }
        }

        if (rdev->constraints->over_current_detection)
                ret = handle_notify_limits(rdev,
                                           ops->set_over_current_protection,
                                           &rdev->constraints->over_curr_limits);
        if (ret) {
                if (ret != -EOPNOTSUPP) {
                        rdev_err(rdev, "failed to set over current limits: %pe\n",
                                 ERR_PTR(ret));
                        return ret;
                }
                rdev_warn(rdev,
                          "IC does not support requested over-current limits\n");
        }

        if (rdev->constraints->over_voltage_detection)
                ret = handle_notify_limits(rdev,
                                           ops->set_over_voltage_protection,
                                           &rdev->constraints->over_voltage_limits);
        if (ret) {
                if (ret != -EOPNOTSUPP) {
                        rdev_err(rdev, "failed to set over voltage limits %pe\n",
                                 ERR_PTR(ret));
                        return ret;
                }
                rdev_warn(rdev,
                          "IC does not support requested over voltage limits\n");
        }

        if (rdev->constraints->under_voltage_detection)
                ret = handle_notify_limits(rdev,
                                           ops->set_under_voltage_protection,
                                           &rdev->constraints->under_voltage_limits);
        if (ret) {
                if (ret != -EOPNOTSUPP) {
                        rdev_err(rdev, "failed to set under voltage limits %pe\n",
                                 ERR_PTR(ret));
                        return ret;
                }
                rdev_warn(rdev,
                          "IC does not support requested under voltage limits\n");
        }

        if (rdev->constraints->over_temp_detection)
                ret = handle_notify_limits(rdev,
                                           ops->set_thermal_protection,
                                           &rdev->constraints->temp_limits);
        if (ret) {
                if (ret != -EOPNOTSUPP) {
                        rdev_err(rdev, "failed to set temperature limits %pe\n",
                                 ERR_PTR(ret));
                        return ret;
                }
                rdev_warn(rdev,
                          "IC does not support requested temperature limits\n");
        }

        if (rdev->constraints->active_discharge && ops->set_active_discharge) {
                bool ad_state = (rdev->constraints->active_discharge ==
                              REGULATOR_ACTIVE_DISCHARGE_ENABLE) ? true : false;

                ret = ops->set_active_discharge(rdev, ad_state);
                if (ret < 0) {
                        rdev_err(rdev, "failed to set active discharge: %pe\n", ERR_PTR(ret));
                        return ret;
                }
        }

        /*
         * If there is no mechanism for controlling the regulator then
         * flag it as always_on so we don't end up duplicating checks
         * for this so much.  Note that we could control the state of
         * a supply to control the output on a regulator that has no
         * direct control.
         */
        if (!rdev->ena_pin && !ops->enable) {
                if (rdev->supply_name && !rdev->supply)
                        return -EPROBE_DEFER;

                if (rdev->supply)
                        rdev->constraints->always_on =
                                rdev->supply->rdev->constraints->always_on;
                else
                        rdev->constraints->always_on = true;
        }

        /* If the constraints say the regulator should be on at this point
         * and we have control then make sure it is enabled.
         */
        if (rdev->constraints->always_on || rdev->constraints->boot_on) {
                /* If we want to enable this regulator, make sure that we know
                 * the supplying regulator.
                 */
                if (rdev->supply_name && !rdev->supply)
                        return -EPROBE_DEFER;

                /* If supplying regulator has already been enabled,
                 * it's not intended to have use_count increment
                 * when rdev is only boot-on.
                 */
                if (rdev->supply &&
                    (rdev->constraints->always_on ||
                     !regulator_is_enabled(rdev->supply))) {
                        ret = regulator_enable(rdev->supply);
                        if (ret < 0) {
                                _regulator_put(rdev->supply);
                                rdev->supply = NULL;
                                return ret;
                        }
                }

                ret = _regulator_do_enable(rdev);
                if (ret < 0 && ret != -EINVAL) {
                        rdev_err(rdev, "failed to enable: %pe\n", ERR_PTR(ret));
                        return ret;
                }

                if (rdev->constraints->always_on)
                        rdev->use_count++;
        } else if (rdev->desc->off_on_delay) {
                rdev->last_off = ktime_get();
        }

        print_constraints(rdev);
        return 0;
}

/**
 * set_supply - set regulator supply regulator
 * @rdev: regulator (locked)
 * @supply_rdev: supply regulator (locked))
 *
 * Called by platform initialisation code to set the supply regulator for this
 * regulator. This ensures that a regulators supply will also be enabled by the
 * core if it's child is enabled.
 */
static int set_supply(struct regulator_dev *rdev,
                      struct regulator_dev *supply_rdev)
{
        int err;

        rdev_dbg(rdev, "supplied by %s\n", rdev_get_name(supply_rdev));

        if (!try_module_get(supply_rdev->owner))
                return -ENODEV;

        rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY");
        if (rdev->supply == NULL) {
                module_put(supply_rdev->owner);
                err = -ENOMEM;
                return err;
        }
        supply_rdev->open_count++;

        return 0;
}

/**
 * set_consumer_device_supply - Bind a regulator to a symbolic supply
 * @rdev:         regulator source
 * @consumer_dev_name: dev_name() string for device supply applies to
 * @supply:       symbolic name for supply
 *
 * Allows platform initialisation code to map physical regulator
 * sources to symbolic names for supplies for use by devices.  Devices
 * should use these symbolic names to request regulators, avoiding the
 * need to provide board-specific regulator names as platform data.
 */
static int set_consumer_device_supply(struct regulator_dev *rdev,
                                      const char *consumer_dev_name,
                                      const char *supply)
{
        struct regulator_map *node, *new_node;
        int has_dev;

        if (supply == NULL)
                return -EINVAL;

        if (consumer_dev_name != NULL)
                has_dev = 1;
        else
                has_dev = 0;

        new_node = kzalloc(sizeof(struct regulator_map), GFP_KERNEL);
        if (new_node == NULL)
                return -ENOMEM;

        new_node->regulator = rdev;
        new_node->supply = supply;

        if (has_dev) {
                new_node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
                if (new_node->dev_name == NULL) {
                        kfree(new_node);
                        return -ENOMEM;
                }
        }

        mutex_lock(&regulator_list_mutex);
        list_for_each_entry(node, &regulator_map_list, list) {
                if (node->dev_name && consumer_dev_name) {
                        if (strcmp(node->dev_name, consumer_dev_name) != 0)
                                continue;
                } else if (node->dev_name || consumer_dev_name) {
                        continue;
                }

                if (strcmp(node->supply, supply) != 0)
                        continue;

                pr_debug("%s: %s/%s is '%s' supply; fail %s/%s\n",
                         consumer_dev_name,
                         dev_name(&node->regulator->dev),
                         node->regulator->desc->name,
                         supply,
                         dev_name(&rdev->dev), rdev_get_name(rdev));
                goto fail;
        }

        list_add(&new_node->list, &regulator_map_list);
        mutex_unlock(&regulator_list_mutex);

        return 0;

fail:
        mutex_unlock(&regulator_list_mutex);
        kfree(new_node->dev_name);
        kfree(new_node);
        return -EBUSY;
}

static void unset_regulator_supplies(struct regulator_dev *rdev)
{
        struct regulator_map *node, *n;

        list_for_each_entry_safe(node, n, &regulator_map_list, list) {
                if (rdev == node->regulator) {
                        list_del(&node->list);
                        kfree(node->dev_name);
                        kfree(node);
                }
        }
}

#ifdef CONFIG_DEBUG_FS
static ssize_t constraint_flags_read_file(struct file *file,
                                          char __user *user_buf,
                                          size_t count, loff_t *ppos)
{
        const struct regulator *regulator = file->private_data;
        const struct regulation_constraints *c = regulator->rdev->constraints;
        char *buf;
        ssize_t ret;

        if (!c)
                return 0;

        buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = snprintf(buf, PAGE_SIZE,
                        "always_on: %u\n"
                        "boot_on: %u\n"
                        "apply_uV: %u\n"
                        "ramp_disable: %u\n"
                        "soft_start: %u\n"
                        "pull_down: %u\n"
                        "over_current_protection: %u\n",
                        c->always_on,
                        c->boot_on,
                        c->apply_uV,
                        c->ramp_disable,
                        c->soft_start,
                        c->pull_down,
                        c->over_current_protection);

        ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
        kfree(buf);

        return ret;
}

#endif

static const struct file_operations constraint_flags_fops = {
#ifdef CONFIG_DEBUG_FS
        .open = simple_open,
        .read = constraint_flags_read_file,
        .llseek = default_llseek,
#endif
};

#define REG_STR_SIZE        64

static struct regulator *create_regulator(struct regulator_dev *rdev,
                                          struct device *dev,
                                          const char *supply_name)
{
        struct regulator *regulator;
        int err = 0;

        lockdep_assert_held_once(&rdev->mutex.base);

        if (dev) {
                char buf[REG_STR_SIZE];
                int size;

                size = snprintf(buf, REG_STR_SIZE, "%s-%s",
                                dev->kobj.name, supply_name);
                if (size >= REG_STR_SIZE)
                        return NULL;

                supply_name = kstrdup(buf, GFP_KERNEL);
                if (supply_name == NULL)
                        return NULL;
        } else {
                supply_name = kstrdup_const(supply_name, GFP_KERNEL);
                if (supply_name == NULL)
                        return NULL;
        }

        regulator = kzalloc(sizeof(*regulator), GFP_KERNEL);
        if (regulator == NULL) {
                kfree_const(supply_name);
                return NULL;
        }

        regulator->rdev = rdev;
        regulator->supply_name = supply_name;

        list_add(&regulator->list, &rdev->consumer_list);

        if (dev) {
                regulator->dev = dev;

                /* Add a link to the device sysfs entry */
                err = sysfs_create_link_nowarn(&rdev->dev.kobj, &dev->kobj,
                                               supply_name);
                if (err) {
                        rdev_dbg(rdev, "could not add device link %s: %pe\n",
                                  dev->kobj.name, ERR_PTR(err));
                        /* non-fatal */
                }
        }

        if (err != -EEXIST)
                regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs);
        if (IS_ERR(regulator->debugfs))
                rdev_dbg(rdev, "Failed to create debugfs directory\n");

        debugfs_create_u32("uA_load", 0444, regulator->debugfs,
                           &regulator->uA_load);
        debugfs_create_u32("min_uV", 0444, regulator->debugfs,
                           &regulator->voltage[PM_SUSPEND_ON].min_uV);
        debugfs_create_u32("max_uV", 0444, regulator->debugfs,
                           &regulator->voltage[PM_SUSPEND_ON].max_uV);
        debugfs_create_file("constraint_flags", 0444, regulator->debugfs,
                            regulator, &constraint_flags_fops);

        /*
         * Check now if the regulator is an always on regulator - if
         * it is then we don't need to do nearly so much work for
         * enable/disable calls.
         */
        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS) &&
            _regulator_is_enabled(rdev))
                regulator->always_on = true;

        return regulator;
}

static int _regulator_get_enable_time(struct regulator_dev *rdev)
{
        if (rdev->constraints && rdev->constraints->enable_time)
                return rdev->constraints->enable_time;
        if (rdev->desc->ops->enable_time)
                return rdev->desc->ops->enable_time(rdev);
        return rdev->desc->enable_time;
}

static struct regulator_supply_alias *regulator_find_supply_alias(
                struct device *dev, const char *supply)
{
        struct regulator_supply_alias *map;

        list_for_each_entry(map, &regulator_supply_alias_list, list)
                if (map->src_dev == dev && strcmp(map->src_supply, supply) == 0)
                        return map;

        return NULL;
}

static void regulator_supply_alias(struct device **dev, const char **supply)
{
        struct regulator_supply_alias *map;

        map = regulator_find_supply_alias(*dev, *supply);
        if (map) {
                dev_dbg(*dev, "Mapping supply %s to %s,%s\n",
                                *supply, map->alias_supply,
                                dev_name(map->alias_dev));
                *dev = map->alias_dev;
                *supply = map->alias_supply;
        }
}

static int regulator_match(struct device *dev, const void *data)
{
        struct regulator_dev *r = dev_to_rdev(dev);

        return strcmp(rdev_get_name(r), data) == 0;
}

static struct regulator_dev *regulator_lookup_by_name(const char *name)
{
        struct device *dev;

        dev = class_find_device(&regulator_class, NULL, name, regulator_match);

        return dev ? dev_to_rdev(dev) : NULL;
}

/**
 * regulator_dev_lookup - lookup a regulator device.
 * @dev: device for regulator "consumer".
 * @supply: Supply name or regulator ID.
 *
 * If successful, returns a struct regulator_dev that corresponds to the name
 * @supply and with the embedded struct device refcount incremented by one.
 * The refcount must be dropped by calling put_device().
 * On failure one of the following ERR-PTR-encoded values is returned:
 * -ENODEV if lookup fails permanently, -EPROBE_DEFER if lookup could succeed
 * in the future.
 */
static struct regulator_dev *regulator_dev_lookup(struct device *dev,
                                                  const char *supply)
{
        struct regulator_dev *r = NULL;
        struct device_node *node;
        struct regulator_map *map;
        const char *devname = NULL;

        regulator_supply_alias(&dev, &supply);

        /* first do a dt based lookup */
        if (dev && dev->of_node) {
                node = of_get_regulator(dev, supply);
                if (node) {
                        r = of_find_regulator_by_node(node);
                        of_node_put(node);
                        if (r)
                                return r;

                        /*
                         * We have a node, but there is no device.
                         * assume it has not registered yet.
                         */
                        return ERR_PTR(-EPROBE_DEFER);
                }
        }

        /* if not found, try doing it non-dt way */
        if (dev)
                devname = dev_name(dev);

        mutex_lock(&regulator_list_mutex);
        list_for_each_entry(map, &regulator_map_list, list) {
                /* If the mapping has a device set up it must match */
                if (map->dev_name &&
                    (!devname || strcmp(map->dev_name, devname)))
                        continue;

                if (strcmp(map->supply, supply) == 0 &&
                    get_device(&map->regulator->dev)) {
                        r = map->regulator;
                        break;
                }
        }
        mutex_unlock(&regulator_list_mutex);

        if (r)
                return r;

        r = regulator_lookup_by_name(supply);
        if (r)
                return r;

        return ERR_PTR(-ENODEV);
}

static int regulator_resolve_supply(struct regulator_dev *rdev)
{
        struct regulator_dev *r;
        struct device *dev = rdev->dev.parent;
        struct ww_acquire_ctx ww_ctx;
        int ret = 0;

        /* No supply to resolve? */
        if (!rdev->supply_name)
                return 0;

        /* Supply already resolved? (fast-path without locking contention) */
        if (rdev->supply)
                return 0;

        r = regulator_dev_lookup(dev, rdev->supply_name);
        if (IS_ERR(r)) {
                ret = PTR_ERR(r);

                /* Did the lookup explicitly defer for us? */
                if (ret == -EPROBE_DEFER)
                        goto out;

                if (have_full_constraints()) {
                        r = dummy_regulator_rdev;
                        get_device(&r->dev);
                } else {
                        dev_err(dev, "Failed to resolve %s-supply for %s\n",
                                rdev->supply_name, rdev->desc->name);
                        ret = -EPROBE_DEFER;
                        goto out;
                }
        }

        if (r == rdev) {
                dev_err(dev, "Supply for %s (%s) resolved to itself\n",
                        rdev->desc->name, rdev->supply_name);
                if (!have_full_constraints()) {
                        ret = -EINVAL;
                        goto out;
                }
                r = dummy_regulator_rdev;
                get_device(&r->dev);
        }

        /*
         * If the supply's parent device is not the same as the
         * regulator's parent device, then ensure the parent device
         * is bound before we resolve the supply, in case the parent
         * device get probe deferred and unregisters the supply.
         */
        if (r->dev.parent && r->dev.parent != rdev->dev.parent) {
                if (!device_is_bound(r->dev.parent)) {
                        put_device(&r->dev);
                        ret = -EPROBE_DEFER;
                        goto out;
                }
        }

        /* Recursively resolve the supply of the supply */
        ret = regulator_resolve_supply(r);
        if (ret < 0) {
                put_device(&r->dev);
                goto out;
        }

        /*
         * Recheck rdev->supply with rdev->mutex lock held to avoid a race
         * between rdev->supply null check and setting rdev->supply in
         * set_supply() from concurrent tasks.
         */
        regulator_lock_two(rdev, r, &ww_ctx);

        /* Supply just resolved by a concurrent task? */
        if (rdev->supply) {
                regulator_unlock_two(rdev, r, &ww_ctx);
                put_device(&r->dev);
                goto out;
        }

        ret = set_supply(rdev, r);
        if (ret < 0) {
                regulator_unlock_two(rdev, r, &ww_ctx);
                put_device(&r->dev);
                goto out;
        }

        regulator_unlock_two(rdev, r, &ww_ctx);

        /*
         * In set_machine_constraints() we may have turned this regulator on
         * but we couldn't propagate to the supply if it hadn't been resolved
         * yet.  Do it now.
         */
        if (rdev->use_count) {
                ret = regulator_enable(rdev->supply);
                if (ret < 0) {
                        _regulator_put(rdev->supply);
                        rdev->supply = NULL;
                        goto out;
                }
        }

out:
        return ret;
}

/* Internal regulator request function */
struct regulator *_regulator_get(struct device *dev, const char *id,
                                 enum regulator_get_type get_type)
{
        struct regulator_dev *rdev;
        struct regulator *regulator;
        struct device_link *link;
        int ret;

        if (get_type >= MAX_GET_TYPE) {
                dev_err(dev, "invalid type %d in %s\n", get_type, __func__);
                return ERR_PTR(-EINVAL);
        }

        if (id == NULL) {
                pr_err("get() with no identifier\n");
                return ERR_PTR(-EINVAL);
        }

        rdev = regulator_dev_lookup(dev, id);
        if (IS_ERR(rdev)) {
                ret = PTR_ERR(rdev);

                /*
                 * If regulator_dev_lookup() fails with error other
                 * than -ENODEV our job here is done, we simply return it.
                 */
                if (ret != -ENODEV)
                        return ERR_PTR(ret);

                if (!have_full_constraints()) {
                        dev_warn(dev,
                                 "incomplete constraints, dummy supplies not allowed\n");
                        return ERR_PTR(-ENODEV);
                }

                switch (get_type) {
                case NORMAL_GET:
                        /*
                         * Assume that a regulator is physically present and
                         * enabled, even if it isn't hooked up, and just
                         * provide a dummy.
                         */
                        dev_warn(dev, "supply %s not found, using dummy regulator\n", id);
                        rdev = dummy_regulator_rdev;
                        get_device(&rdev->dev);
                        break;

                case EXCLUSIVE_GET:
                        dev_warn(dev,
                                 "dummy supplies not allowed for exclusive requests\n");
                        fallthrough;

                default:
                        return ERR_PTR(-ENODEV);
                }
        }

        if (rdev->exclusive) {
                regulator = ERR_PTR(-EPERM);
                put_device(&rdev->dev);
                return regulator;
        }

        if (get_type == EXCLUSIVE_GET && rdev->open_count) {
                regulator = ERR_PTR(-EBUSY);
                put_device(&rdev->dev);
                return regulator;
        }

        mutex_lock(&regulator_list_mutex);
        ret = (rdev->coupling_desc.n_resolved != rdev->coupling_desc.n_coupled);
        mutex_unlock(&regulator_list_mutex);

        if (ret != 0) {
                regulator = ERR_PTR(-EPROBE_DEFER);
                put_device(&rdev->dev);
                return regulator;
        }

        ret = regulator_resolve_supply(rdev);
        if (ret < 0) {
                regulator = ERR_PTR(ret);
                put_device(&rdev->dev);
                return regulator;
        }

        if (!try_module_get(rdev->owner)) {
                regulator = ERR_PTR(-EPROBE_DEFER);
                put_device(&rdev->dev);
                return regulator;
        }

        regulator_lock(rdev);
        regulator = create_regulator(rdev, dev, id);
        regulator_unlock(rdev);
        if (regulator == NULL) {
                regulator = ERR_PTR(-ENOMEM);
                module_put(rdev->owner);
                put_device(&rdev->dev);
                return regulator;
        }

        rdev->open_count++;
        if (get_type == EXCLUSIVE_GET) {
                rdev->exclusive = 1;

                ret = _regulator_is_enabled(rdev);
                if (ret > 0) {
                        rdev->use_count = 1;
                        regulator->enable_count = 1;

                        /* Propagate the regulator state to its supply */
                        if (rdev->supply) {
                                ret = regulator_enable(rdev->supply);
                                if (ret < 0) {
                                        destroy_regulator(regulator);
                                        module_put(rdev->owner);
                                        put_device(&rdev->dev);
                                        return ERR_PTR(ret);
                                }
                        }
                } else {
                        rdev->use_count = 0;
                        regulator->enable_count = 0;
                }
        }

        link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS);
        if (!IS_ERR_OR_NULL(link))
                regulator->device_link = true;

        return regulator;
}

/**
 * regulator_get - lookup and obtain a reference to a regulator.
 * @dev: device for regulator "consumer"
 * @id: Supply name or regulator ID.
 *
 * Returns a struct regulator corresponding to the regulator producer,
 * or IS_ERR() condition containing errno.
 *
 * Use of supply names configured via set_consumer_device_supply() is
 * strongly encouraged.  It is recommended that the supply name used
 * should match the name used for the supply and/or the relevant
 * device pins in the datasheet.
 */
struct regulator *regulator_get(struct device *dev, const char *id)
{
        return _regulator_get(dev, id, NORMAL_GET);
}
EXPORT_SYMBOL_GPL(regulator_get);

/**
 * regulator_get_exclusive - obtain exclusive access to a regulator.
 * @dev: device for regulator "consumer"
 * @id: Supply name or regulator ID.
 *
 * Returns a struct regulator corresponding to the regulator producer,
 * or IS_ERR() condition containing errno.  Other consumers will be
 * unable to obtain this regulator while this reference is held and the
 * use count for the regulator will be initialised to reflect the current
 * state of the regulator.
 *
 * This is intended for use by consumers which cannot tolerate shared
 * use of the regulator such as those which need to force the
 * regulator off for correct operation of the hardware they are
 * controlling.
 *
 * Use of supply names configured via set_consumer_device_supply() is
 * strongly encouraged.  It is recommended that the supply name used
 * should match the name used for the supply and/or the relevant
 * device pins in the datasheet.
 */
struct regulator *regulator_get_exclusive(struct device *dev, const char *id)
{
        return _regulator_get(dev, id, EXCLUSIVE_GET);
}
EXPORT_SYMBOL_GPL(regulator_get_exclusive);

/**
 * regulator_get_optional - obtain optional access to a regulator.
 * @dev: device for regulator "consumer"
 * @id: Supply name or regulator ID.
 *
 * Returns a struct regulator corresponding to the regulator producer,
 * or IS_ERR() condition containing errno.
 *
 * This is intended for use by consumers for devices which can have
 * some supplies unconnected in normal use, such as some MMC devices.
 * It can allow the regulator core to provide stub supplies for other
 * supplies requested using normal regulator_get() calls without
 * disrupting the operation of drivers that can handle absent
 * supplies.
 *
 * Use of supply names configured via set_consumer_device_supply() is
 * strongly encouraged.  It is recommended that the supply name used
 * should match the name used for the supply and/or the relevant
 * device pins in the datasheet.
 */
struct regulator *regulator_get_optional(struct device *dev, const char *id)
{
        return _regulator_get(dev, id, OPTIONAL_GET);
}
EXPORT_SYMBOL_GPL(regulator_get_optional);

static void destroy_regulator(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;

        debugfs_remove_recursive(regulator->debugfs);

        if (regulator->dev) {
                if (regulator->device_link)
                        device_link_remove(regulator->dev, &rdev->dev);

                /* remove any sysfs entries */
                sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
        }

        regulator_lock(rdev);
        list_del(&regulator->list);

        rdev->open_count--;
        rdev->exclusive = 0;
        regulator_unlock(rdev);

        kfree_const(regulator->supply_name);
        kfree(regulator);
}

/* regulator_list_mutex lock held by regulator_put() */
static void _regulator_put(struct regulator *regulator)
{
        struct regulator_dev *rdev;

        if (IS_ERR_OR_NULL(regulator))
                return;

        lockdep_assert_held_once(&regulator_list_mutex);

        /* Docs say you must disable before calling regulator_put() */
        WARN_ON(regulator->enable_count);

        rdev = regulator->rdev;

        destroy_regulator(regulator);

        module_put(rdev->owner);
        put_device(&rdev->dev);
}

/**
 * regulator_put - "free" the regulator source
 * @regulator: regulator source
 *
 * Note: drivers must ensure that all regulator_enable calls made on this
 * regulator source are balanced by regulator_disable calls prior to calling
 * this function.
 */
void regulator_put(struct regulator *regulator)
{
        mutex_lock(&regulator_list_mutex);
        _regulator_put(regulator);
        mutex_unlock(&regulator_list_mutex);
}
EXPORT_SYMBOL_GPL(regulator_put);

/**
 * regulator_register_supply_alias - Provide device alias for supply lookup
 *
 * @dev: device that will be given as the regulator "consumer"
 * @id: Supply name or regulator ID
 * @alias_dev: device that should be used to lookup the supply
 * @alias_id: Supply name or regulator ID that should be used to lookup the
 * supply
 *
 * All lookups for id on dev will instead be conducted for alias_id on
 * alias_dev.
 */
int regulator_register_supply_alias(struct device *dev, const char *id,
                                    struct device *alias_dev,
                                    const char *alias_id)
{
        struct regulator_supply_alias *map;

        map = regulator_find_supply_alias(dev, id);
        if (map)
                return -EEXIST;

        map = kzalloc(sizeof(struct regulator_supply_alias), GFP_KERNEL);
        if (!map)
                return -ENOMEM;

        map->src_dev = dev;
        map->src_supply = id;
        map->alias_dev = alias_dev;
        map->alias_supply = alias_id;

        list_add(&map->list, &regulator_supply_alias_list);

        pr_info("Adding alias for supply %s,%s -> %s,%s\n",
                id, dev_name(dev), alias_id, dev_name(alias_dev));

        return 0;
}
EXPORT_SYMBOL_GPL(regulator_register_supply_alias);

/**
 * regulator_unregister_supply_alias - Remove device alias
 *
 * @dev: device that will be given as the regulator "consumer"
 * @id: Supply name or regulator ID
 *
 * Remove a lookup alias if one exists for id on dev.
 */
void regulator_unregister_supply_alias(struct device *dev, const char *id)
{
        struct regulator_supply_alias *map;

        map = regulator_find_supply_alias(dev, id);
        if (map) {
                list_del(&map->list);
                kfree(map);
        }
}
EXPORT_SYMBOL_GPL(regulator_unregister_supply_alias);

/**
 * regulator_bulk_register_supply_alias - register multiple aliases
 *
 * @dev: device that will be given as the regulator "consumer"
 * @id: List of supply names or regulator IDs
 * @alias_dev: device that should be used to lookup the supply
 * @alias_id: List of supply names or regulator IDs that should be used to
 * lookup the supply
 * @num_id: Number of aliases to register
 *
 * @return 0 on success, an errno on failure.
 *
 * This helper function allows drivers to register several supply
 * aliases in one operation.  If any of the aliases cannot be
 * registered any aliases that were registered will be removed
 * before returning to the caller.
 */
int regulator_bulk_register_supply_alias(struct device *dev,
                                         const char *const *id,
                                         struct device *alias_dev,
                                         const char *const *alias_id,
                                         int num_id)
{
        int i;
        int ret;

        for (i = 0; i < num_id; ++i) {
                ret = regulator_register_supply_alias(dev, id[i], alias_dev,
                                                      alias_id[i]);
                if (ret < 0)
                        goto err;
        }

        return 0;

err:
        dev_err(dev,
                "Failed to create supply alias %s,%s -> %s,%s\n",
                id[i], dev_name(dev), alias_id[i], dev_name(alias_dev));

        while (--i >= 0)
                regulator_unregister_supply_alias(dev, id[i]);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_bulk_register_supply_alias);

/**
 * regulator_bulk_unregister_supply_alias - unregister multiple aliases
 *
 * @dev: device that will be given as the regulator "consumer"
 * @id: List of supply names or regulator IDs
 * @num_id: Number of aliases to unregister
 *
 * This helper function allows drivers to unregister several supply
 * aliases in one operation.
 */
void regulator_bulk_unregister_supply_alias(struct device *dev,
                                            const char *const *id,
                                            int num_id)
{
        int i;

        for (i = 0; i < num_id; ++i)
                regulator_unregister_supply_alias(dev, id[i]);
}
EXPORT_SYMBOL_GPL(regulator_bulk_unregister_supply_alias);


/* Manage enable GPIO list. Same GPIO pin can be shared among regulators */
static int regulator_ena_gpio_request(struct regulator_dev *rdev,
                                const struct regulator_config *config)
{
        struct regulator_enable_gpio *pin, *new_pin;
        struct gpio_desc *gpiod;

        gpiod = config->ena_gpiod;
        new_pin = kzalloc(sizeof(*new_pin), GFP_KERNEL);

        mutex_lock(&regulator_list_mutex);

        list_for_each_entry(pin, &regulator_ena_gpio_list, list) {
                if (pin->gpiod == gpiod) {
                        rdev_dbg(rdev, "GPIO is already used\n");
                        goto update_ena_gpio_to_rdev;
                }
        }

        if (new_pin == NULL) {
                mutex_unlock(&regulator_list_mutex);
                return -ENOMEM;
        }

        pin = new_pin;
        new_pin = NULL;

        pin->gpiod = gpiod;
        list_add(&pin->list, &regulator_ena_gpio_list);

update_ena_gpio_to_rdev:
        pin->request_count++;
        rdev->ena_pin = pin;

        mutex_unlock(&regulator_list_mutex);
        kfree(new_pin);

        return 0;
}

static void regulator_ena_gpio_free(struct regulator_dev *rdev)
{
        struct regulator_enable_gpio *pin, *n;

        if (!rdev->ena_pin)
                return;

        /* Free the GPIO only in case of no use */
        list_for_each_entry_safe(pin, n, &regulator_ena_gpio_list, list) {
                if (pin != rdev->ena_pin)
                        continue;

                if (--pin->request_count)
                        break;

                gpiod_put(pin->gpiod);
                list_del(&pin->list);
                kfree(pin);
                break;
        }

        rdev->ena_pin = NULL;
}

/**
 * regulator_ena_gpio_ctrl - balance enable_count of each GPIO and actual GPIO pin control
 * @rdev: regulator_dev structure
 * @enable: enable GPIO at initial use?
 *
 * GPIO is enabled in case of initial use. (enable_count is 0)
 * GPIO is disabled when it is not shared any more. (enable_count <= 1)
 */
static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
{
        struct regulator_enable_gpio *pin = rdev->ena_pin;

        if (!pin)
                return -EINVAL;

        if (enable) {
                /* Enable GPIO at initial use */
                if (pin->enable_count == 0)
                        gpiod_set_value_cansleep(pin->gpiod, 1);

                pin->enable_count++;
        } else {
                if (pin->enable_count > 1) {
                        pin->enable_count--;
                        return 0;
                }

                /* Disable GPIO if not used */
                if (pin->enable_count <= 1) {
                        gpiod_set_value_cansleep(pin->gpiod, 0);
                        pin->enable_count = 0;
                }
        }

        return 0;
}

/**
 * _regulator_delay_helper - a delay helper function
 * @delay: time to delay in microseconds
 *
 * Delay for the requested amount of time as per the guidelines in:
 *
 *     Documentation/timers/timers-howto.rst
 *
 * The assumption here is that these regulator operations will never used in
 * atomic context and therefore sleeping functions can be used.
 */
static void _regulator_delay_helper(unsigned int delay)
{
        unsigned int ms = delay / 1000;
        unsigned int us = delay % 1000;

        if (ms > 0) {
                /*
                 * For small enough values, handle super-millisecond
                 * delays in the usleep_range() call below.
                 */
                if (ms < 20)
                        us += ms * 1000;
                else
                        msleep(ms);
        }

        /*
         * Give the scheduler some room to coalesce with any other
         * wakeup sources. For delays shorter than 10 us, don't even
         * bother setting up high-resolution timers and just busy-
         * loop.
         */
        if (us >= 10)
                usleep_range(us, us + 100);
        else
                udelay(us);
}

/**
 * _regulator_check_status_enabled
 *
 * A helper function to check if the regulator status can be interpreted
 * as 'regulator is enabled'.
 * @rdev: the regulator device to check
 *
 * Return:
 * * 1                        - if status shows regulator is in enabled state
 * * 0                        - if not enabled state
 * * Error Value        - as received from ops->get_status()
 */
static inline int _regulator_check_status_enabled(struct regulator_dev *rdev)
{
        int ret = rdev->desc->ops->get_status(rdev);

        if (ret < 0) {
                rdev_info(rdev, "get_status returned error: %d\n", ret);
                return ret;
        }

        switch (ret) {
        case REGULATOR_STATUS_OFF:
        case REGULATOR_STATUS_ERROR:
        case REGULATOR_STATUS_UNDEFINED:
                return 0;
        default:
                return 1;
        }
}

static int _regulator_do_enable(struct regulator_dev *rdev)
{
        int ret, delay;

        /* Query before enabling in case configuration dependent.  */
        ret = _regulator_get_enable_time(rdev);
        if (ret >= 0) {
                delay = ret;
        } else {
                rdev_warn(rdev, "enable_time() failed: %pe\n", ERR_PTR(ret));
                delay = 0;
        }

        trace_regulator_enable(rdev_get_name(rdev));

        if (rdev->desc->off_on_delay) {
                /* if needed, keep a distance of off_on_delay from last time
                 * this regulator was disabled.
                 */
                ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay);
                s64 remaining = ktime_us_delta(end, ktime_get_boottime());

                if (remaining > 0)
                        _regulator_delay_helper(remaining);
        }

        if (rdev->ena_pin) {
                if (!rdev->ena_gpio_state) {
                        ret = regulator_ena_gpio_ctrl(rdev, true);
                        if (ret < 0)
                                return ret;
                        rdev->ena_gpio_state = 1;
                }
        } else if (rdev->desc->ops->enable) {
                ret = rdev->desc->ops->enable(rdev);
                if (ret < 0)
                        return ret;
        } else {
                return -EINVAL;
        }

        /* Allow the regulator to ramp; it would be useful to extend
         * this for bulk operations so that the regulators can ramp
         * together.
         */
        trace_regulator_enable_delay(rdev_get_name(rdev));

        /* If poll_enabled_time is set, poll upto the delay calculated
         * above, delaying poll_enabled_time uS to check if the regulator
         * actually got enabled.
         * If the regulator isn't enabled after our delay helper has expired,
         * return -ETIMEDOUT.
         */
        if (rdev->desc->poll_enabled_time) {
                int time_remaining = delay;

                while (time_remaining > 0) {
                        _regulator_delay_helper(rdev->desc->poll_enabled_time);

                        if (rdev->desc->ops->get_status) {
                                ret = _regulator_check_status_enabled(rdev);
                                if (ret < 0)
                                        return ret;
                                else if (ret)
                                        break;
                        } else if (rdev->desc->ops->is_enabled(rdev))
                                break;

                        time_remaining -= rdev->desc->poll_enabled_time;
                }

                if (time_remaining <= 0) {
                        rdev_err(rdev, "Enabled check timed out\n");
                        return -ETIMEDOUT;
                }
        } else {
                _regulator_delay_helper(delay);
        }

        trace_regulator_enable_complete(rdev_get_name(rdev));

        return 0;
}

/**
 * _regulator_handle_consumer_enable - handle that a consumer enabled
 * @regulator: regulator source
 *
 * Some things on a regulator consumer (like the contribution towards total
 * load on the regulator) only have an effect when the consumer wants the
 * regulator enabled.  Explained in example with two consumers of the same
 * regulator:
 *   consumer A: set_load(100);       => total load = 0
 *   consumer A: regulator_enable();  => total load = 100
 *   consumer B: set_load(1000);      => total load = 100
 *   consumer B: regulator_enable();  => total load = 1100
 *   consumer A: regulator_disable(); => total_load = 1000
 *
 * This function (together with _regulator_handle_consumer_disable) is
 * responsible for keeping track of the refcount for a given regulator consumer
 * and applying / unapplying these things.
 *
 * Returns 0 upon no error; -error upon error.
 */
static int _regulator_handle_consumer_enable(struct regulator *regulator)
{
        int ret;
        struct regulator_dev *rdev = regulator->rdev;

        lockdep_assert_held_once(&rdev->mutex.base);

        regulator->enable_count++;
        if (regulator->uA_load && regulator->enable_count == 1) {
                ret = drms_uA_update(rdev);
                if (ret)
                        regulator->enable_count--;
                return ret;
        }

        return 0;
}

/**
 * _regulator_handle_consumer_disable - handle that a consumer disabled
 * @regulator: regulator source
 *
 * The opposite of _regulator_handle_consumer_enable().
 *
 * Returns 0 upon no error; -error upon error.
 */
static int _regulator_handle_consumer_disable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;

        lockdep_assert_held_once(&rdev->mutex.base);

        if (!regulator->enable_count) {
                rdev_err(rdev, "Underflow of regulator enable count\n");
                return -EINVAL;
        }

        regulator->enable_count--;
        if (regulator->uA_load && regulator->enable_count == 0)
                return drms_uA_update(rdev);

        return 0;
}

/* locks held by regulator_enable() */
static int _regulator_enable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        int ret;

        lockdep_assert_held_once(&rdev->mutex.base);

        if (rdev->use_count == 0 && rdev->supply) {
                ret = _regulator_enable(rdev->supply);
                if (ret < 0)
                        return ret;
        }

        /* balance only if there are regulators coupled */
        if (rdev->coupling_desc.n_coupled > 1) {
                ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON);
                if (ret < 0)
                        goto err_disable_supply;
        }

        ret = _regulator_handle_consumer_enable(regulator);
        if (ret < 0)
                goto err_disable_supply;

        if (rdev->use_count == 0) {
                /*
                 * The regulator may already be enabled if it's not switchable
                 * or was left on
                 */
                ret = _regulator_is_enabled(rdev);
                if (ret == -EINVAL || ret == 0) {
                        if (!regulator_ops_is_valid(rdev,
                                        REGULATOR_CHANGE_STATUS)) {
                                ret = -EPERM;
                                goto err_consumer_disable;
                        }

                        ret = _regulator_do_enable(rdev);
                        if (ret < 0)
                                goto err_consumer_disable;

                        _notifier_call_chain(rdev, REGULATOR_EVENT_ENABLE,
                                             NULL);
                } else if (ret < 0) {
                        rdev_err(rdev, "is_enabled() failed: %pe\n", ERR_PTR(ret));
                        goto err_consumer_disable;
                }
                /* Fallthrough on positive return values - already enabled */
        }

        if (regulator->enable_count == 1)
                rdev->use_count++;

        return 0;

err_consumer_disable:
        _regulator_handle_consumer_disable(regulator);

err_disable_supply:
        if (rdev->use_count == 0 && rdev->supply)
                _regulator_disable(rdev->supply);

        return ret;
}

/**
 * regulator_enable - enable regulator output
 * @regulator: regulator source
 *
 * Request that the regulator be enabled with the regulator output at
 * the predefined voltage or current value.  Calls to regulator_enable()
 * must be balanced with calls to regulator_disable().
 *
 * NOTE: the output value can be set by other drivers, boot loader or may be
 * hardwired in the regulator.
 */
int regulator_enable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct ww_acquire_ctx ww_ctx;
        int ret;

        regulator_lock_dependent(rdev, &ww_ctx);
        ret = _regulator_enable(regulator);
        regulator_unlock_dependent(rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_enable);

static int _regulator_do_disable(struct regulator_dev *rdev)
{
        int ret;

        trace_regulator_disable(rdev_get_name(rdev));

        if (rdev->ena_pin) {
                if (rdev->ena_gpio_state) {
                        ret = regulator_ena_gpio_ctrl(rdev, false);
                        if (ret < 0)
                                return ret;
                        rdev->ena_gpio_state = 0;
                }

        } else if (rdev->desc->ops->disable) {
                ret = rdev->desc->ops->disable(rdev);
                if (ret != 0)
                        return ret;
        }

        if (rdev->desc->off_on_delay)
                rdev->last_off = ktime_get_boottime();

        trace_regulator_disable_complete(rdev_get_name(rdev));

        return 0;
}

/* locks held by regulator_disable() */
static int _regulator_disable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        int ret = 0;

        lockdep_assert_held_once(&rdev->mutex.base);

        if (WARN(regulator->enable_count == 0,
                 "unbalanced disables for %s\n", rdev_get_name(rdev)))
                return -EIO;

        if (regulator->enable_count == 1) {
        /* disabling last enable_count from this regulator */
                /* are we the last user and permitted to disable ? */
                if (rdev->use_count == 1 &&
                    (rdev->constraints && !rdev->constraints->always_on)) {

                        /* we are last user */
                        if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) {
                                ret = _notifier_call_chain(rdev,
                                                           REGULATOR_EVENT_PRE_DISABLE,
                                                           NULL);
                                if (ret & NOTIFY_STOP_MASK)
                                        return -EINVAL;

                                ret = _regulator_do_disable(rdev);
                                if (ret < 0) {
                                        rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret));
                                        _notifier_call_chain(rdev,
                                                        REGULATOR_EVENT_ABORT_DISABLE,
                                                        NULL);
                                        return ret;
                                }
                                _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE,
                                                NULL);
                        }

                        rdev->use_count = 0;
                } else if (rdev->use_count > 1) {
                        rdev->use_count--;
                }
        }

        if (ret == 0)
                ret = _regulator_handle_consumer_disable(regulator);

        if (ret == 0 && rdev->coupling_desc.n_coupled > 1)
                ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON);

        if (ret == 0 && rdev->use_count == 0 && rdev->supply)
                ret = _regulator_disable(rdev->supply);

        return ret;
}

/**
 * regulator_disable - disable regulator output
 * @regulator: regulator source
 *
 * Disable the regulator output voltage or current.  Calls to
 * regulator_enable() must be balanced with calls to
 * regulator_disable().
 *
 * NOTE: this will only disable the regulator output if no other consumer
 * devices have it enabled, the regulator device supports disabling and
 * machine constraints permit this operation.
 */
int regulator_disable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct ww_acquire_ctx ww_ctx;
        int ret;

        regulator_lock_dependent(rdev, &ww_ctx);
        ret = _regulator_disable(regulator);
        regulator_unlock_dependent(rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_disable);

/* locks held by regulator_force_disable() */
static int _regulator_force_disable(struct regulator_dev *rdev)
{
        int ret = 0;

        lockdep_assert_held_once(&rdev->mutex.base);

        ret = _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
                        REGULATOR_EVENT_PRE_DISABLE, NULL);
        if (ret & NOTIFY_STOP_MASK)
                return -EINVAL;

        ret = _regulator_do_disable(rdev);
        if (ret < 0) {
                rdev_err(rdev, "failed to force disable: %pe\n", ERR_PTR(ret));
                _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
                                REGULATOR_EVENT_ABORT_DISABLE, NULL);
                return ret;
        }

        _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
                        REGULATOR_EVENT_DISABLE, NULL);

        return 0;
}

/**
 * regulator_force_disable - force disable regulator output
 * @regulator: regulator source
 *
 * Forcibly disable the regulator output voltage or current.
 * NOTE: this *will* disable the regulator output even if other consumer
 * devices have it enabled. This should be used for situations when device
 * damage will likely occur if the regulator is not disabled (e.g. over temp).
 */
int regulator_force_disable(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct ww_acquire_ctx ww_ctx;
        int ret;

        regulator_lock_dependent(rdev, &ww_ctx);

        ret = _regulator_force_disable(regulator->rdev);

        if (rdev->coupling_desc.n_coupled > 1)
                regulator_balance_voltage(rdev, PM_SUSPEND_ON);

        if (regulator->uA_load) {
                regulator->uA_load = 0;
                ret = drms_uA_update(rdev);
        }

        if (rdev->use_count != 0 && rdev->supply)
                _regulator_disable(rdev->supply);

        regulator_unlock_dependent(rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_force_disable);

static void regulator_disable_work(struct work_struct *work)
{
        struct regulator_dev *rdev = container_of(work, struct regulator_dev,
                                                  disable_work.work);
        struct ww_acquire_ctx ww_ctx;
        int count, i, ret;
        struct regulator *regulator;
        int total_count = 0;

        regulator_lock_dependent(rdev, &ww_ctx);

        /*
         * Workqueue functions queue the new work instance while the previous
         * work instance is being processed. Cancel the queued work instance
         * as the work instance under processing does the job of the queued
         * work instance.
         */
        cancel_delayed_work(&rdev->disable_work);

        list_for_each_entry(regulator, &rdev->consumer_list, list) {
                count = regulator->deferred_disables;

                if (!count)
                        continue;

                total_count += count;
                regulator->deferred_disables = 0;

                for (i = 0; i < count; i++) {
                        ret = _regulator_disable(regulator);
                        if (ret != 0)
                                rdev_err(rdev, "Deferred disable failed: %pe\n",
                                         ERR_PTR(ret));
                }
        }
        WARN_ON(!total_count);

        if (rdev->coupling_desc.n_coupled > 1)
                regulator_balance_voltage(rdev, PM_SUSPEND_ON);

        regulator_unlock_dependent(rdev, &ww_ctx);
}

/**
 * regulator_disable_deferred - disable regulator output with delay
 * @regulator: regulator source
 * @ms: milliseconds until the regulator is disabled
 *
 * Execute regulator_disable() on the regulator after a delay.  This
 * is intended for use with devices that require some time to quiesce.
 *
 * NOTE: this will only disable the regulator output if no other consumer
 * devices have it enabled, the regulator device supports disabling and
 * machine constraints permit this operation.
 */
int regulator_disable_deferred(struct regulator *regulator, int ms)
{
        struct regulator_dev *rdev = regulator->rdev;

        if (!ms)
                return regulator_disable(regulator);

        regulator_lock(rdev);
        regulator->deferred_disables++;
        mod_delayed_work(system_power_efficient_wq, &rdev->disable_work,
                         msecs_to_jiffies(ms));
        regulator_unlock(rdev);

        return 0;
}
EXPORT_SYMBOL_GPL(regulator_disable_deferred);

static int _regulator_is_enabled(struct regulator_dev *rdev)
{
        /* A GPIO control always takes precedence */
        if (rdev->ena_pin)
                return rdev->ena_gpio_state;

        /* If we don't know then assume that the regulator is always on */
        if (!rdev->desc->ops->is_enabled)
                return 1;

        return rdev->desc->ops->is_enabled(rdev);
}

static int _regulator_list_voltage(struct regulator_dev *rdev,
                                   unsigned selector, int lock)
{
        const struct regulator_ops *ops = rdev->desc->ops;
        int ret;

        if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector)
                return rdev->desc->fixed_uV;

        if (ops->list_voltage) {
                if (selector >= rdev->desc->n_voltages)
                        return -EINVAL;
                if (selector < rdev->desc->linear_min_sel)
                        return 0;
                if (lock)
                        regulator_lock(rdev);
                ret = ops->list_voltage(rdev, selector);
                if (lock)
                        regulator_unlock(rdev);
        } else if (rdev->is_switch && rdev->supply) {
                ret = _regulator_list_voltage(rdev->supply->rdev,
                                              selector, lock);
        } else {
                return -EINVAL;
        }

        if (ret > 0) {
                if (ret < rdev->constraints->min_uV)
                        ret = 0;
                else if (ret > rdev->constraints->max_uV)
                        ret = 0;
        }

        return ret;
}

/**
 * regulator_is_enabled - is the regulator output enabled
 * @regulator: regulator source
 *
 * Returns positive if the regulator driver backing the source/client
 * has requested that the device be enabled, zero if it hasn't, else a
 * negative errno code.
 *
 * Note that the device backing this regulator handle can have multiple
 * users, so it might be enabled even if regulator_enable() was never
 * called for this particular source.
 */
int regulator_is_enabled(struct regulator *regulator)
{
        int ret;

        if (regulator->always_on)
                return 1;

        regulator_lock(regulator->rdev);
        ret = _regulator_is_enabled(regulator->rdev);
        regulator_unlock(regulator->rdev);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_is_enabled);

/**
 * regulator_count_voltages - count regulator_list_voltage() selectors
 * @regulator: regulator source
 *
 * Returns number of selectors, or negative errno.  Selectors are
 * numbered starting at zero, and typically correspond to bitfields
 * in hardware registers.
 */
int regulator_count_voltages(struct regulator *regulator)
{
        struct regulator_dev        *rdev = regulator->rdev;

        if (rdev->desc->n_voltages)
                return rdev->desc->n_voltages;

        if (!rdev->is_switch || !rdev->supply)
                return -EINVAL;

        return regulator_count_voltages(rdev->supply);
}
EXPORT_SYMBOL_GPL(regulator_count_voltages);

/**
 * regulator_list_voltage - enumerate supported voltages
 * @regulator: regulator source
 * @selector: identify voltage to list
 * Context: can sleep
 *
 * Returns a voltage that can be passed to @regulator_set_voltage(),
 * zero if this selector code can't be used on this system, or a
 * negative errno.
 */
int regulator_list_voltage(struct regulator *regulator, unsigned selector)
{
        return _regulator_list_voltage(regulator->rdev, selector, 1);
}
EXPORT_SYMBOL_GPL(regulator_list_voltage);

/**
 * regulator_get_regmap - get the regulator's register map
 * @regulator: regulator source
 *
 * Returns the register map for the given regulator, or an ERR_PTR value
 * if the regulator doesn't use regmap.
 */
struct regmap *regulator_get_regmap(struct regulator *regulator)
{
        struct regmap *map = regulator->rdev->regmap;

        return map ? map : ERR_PTR(-EOPNOTSUPP);
}

/**
 * regulator_get_hardware_vsel_register - get the HW voltage selector register
 * @regulator: regulator source
 * @vsel_reg: voltage selector register, output parameter
 * @vsel_mask: mask for voltage selector bitfield, output parameter
 *
 * Returns the hardware register offset and bitmask used for setting the
 * regulator voltage. This might be useful when configuring voltage-scaling
 * hardware or firmware that can make I2C requests behind the kernel's back,
 * for example.
 *
 * On success, the output parameters @vsel_reg and @vsel_mask are filled in
 * and 0 is returned, otherwise a negative errno is returned.
 */
int regulator_get_hardware_vsel_register(struct regulator *regulator,
                                         unsigned *vsel_reg,
                                         unsigned *vsel_mask)
{
        struct regulator_dev *rdev = regulator->rdev;
        const struct regulator_ops *ops = rdev->desc->ops;

        if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap)
                return -EOPNOTSUPP;

        *vsel_reg = rdev->desc->vsel_reg;
        *vsel_mask = rdev->desc->vsel_mask;

        return 0;
}
EXPORT_SYMBOL_GPL(regulator_get_hardware_vsel_register);

/**
 * regulator_list_hardware_vsel - get the HW-specific register value for a selector
 * @regulator: regulator source
 * @selector: identify voltage to list
 *
 * Converts the selector to a hardware-specific voltage selector that can be
 * directly written to the regulator registers. The address of the voltage
 * register can be determined by calling @regulator_get_hardware_vsel_register.
 *
 * On error a negative errno is returned.
 */
int regulator_list_hardware_vsel(struct regulator *regulator,
                                 unsigned selector)
{
        struct regulator_dev *rdev = regulator->rdev;
        const struct regulator_ops *ops = rdev->desc->ops;

        if (selector >= rdev->desc->n_voltages)
                return -EINVAL;
        if (selector < rdev->desc->linear_min_sel)
                return 0;
        if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap)
                return -EOPNOTSUPP;

        return selector;
}
EXPORT_SYMBOL_GPL(regulator_list_hardware_vsel);

/**
 * regulator_get_linear_step - return the voltage step size between VSEL values
 * @regulator: regulator source
 *
 * Returns the voltage step size between VSEL values for linear
 * regulators, or return 0 if the regulator isn't a linear regulator.
 */
unsigned int regulator_get_linear_step(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;

        return rdev->desc->uV_step;
}
EXPORT_SYMBOL_GPL(regulator_get_linear_step);

/**
 * regulator_is_supported_voltage - check if a voltage range can be supported
 *
 * @regulator: Regulator to check.
 * @min_uV: Minimum required voltage in uV.
 * @max_uV: Maximum required voltage in uV.
 *
 * Returns a boolean.
 */
int regulator_is_supported_voltage(struct regulator *regulator,
                                   int min_uV, int max_uV)
{
        struct regulator_dev *rdev = regulator->rdev;
        int i, voltages, ret;

        /* If we can't change voltage check the current voltage */
        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
                ret = regulator_get_voltage(regulator);
                if (ret >= 0)
                        return min_uV <= ret && ret <= max_uV;
                else
                        return ret;
        }

        /* Any voltage within constrains range is fine? */
        if (rdev->desc->continuous_voltage_range)
                return min_uV >= rdev->constraints->min_uV &&
                                max_uV <= rdev->constraints->max_uV;

        ret = regulator_count_voltages(regulator);
        if (ret < 0)
                return 0;
        voltages = ret;

        for (i = 0; i < voltages; i++) {
                ret = regulator_list_voltage(regulator, i);

                if (ret >= min_uV && ret <= max_uV)
                        return 1;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(regulator_is_supported_voltage);

static int regulator_map_voltage(struct regulator_dev *rdev, int min_uV,
                                 int max_uV)
{
        const struct regulator_desc *desc = rdev->desc;

        if (desc->ops->map_voltage)
                return desc->ops->map_voltage(rdev, min_uV, max_uV);

        if (desc->ops->list_voltage == regulator_list_voltage_linear)
                return regulator_map_voltage_linear(rdev, min_uV, max_uV);

        if (desc->ops->list_voltage == regulator_list_voltage_linear_range)
                return regulator_map_voltage_linear_range(rdev, min_uV, max_uV);

        if (desc->ops->list_voltage ==
                regulator_list_voltage_pickable_linear_range)
                return regulator_map_voltage_pickable_linear_range(rdev,
                                                        min_uV, max_uV);

        return regulator_map_voltage_iterate(rdev, min_uV, max_uV);
}

static int _regulator_call_set_voltage(struct regulator_dev *rdev,
                                       int min_uV, int max_uV,
                                       unsigned *selector)
{
        struct pre_voltage_change_data data;
        int ret;

        data.old_uV = regulator_get_voltage_rdev(rdev);
        data.min_uV = min_uV;
        data.max_uV = max_uV;
        ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
                                   &data);
        if (ret & NOTIFY_STOP_MASK)
                return -EINVAL;

        ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector);
        if (ret >= 0)
                return ret;

        _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
                             (void *)data.old_uV);

        return ret;
}

static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev,
                                           int uV, unsigned selector)
{
        struct pre_voltage_change_data data;
        int ret;

        data.old_uV = regulator_get_voltage_rdev(rdev);
        data.min_uV = uV;
        data.max_uV = uV;
        ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
                                   &data);
        if (ret & NOTIFY_STOP_MASK)
                return -EINVAL;

        ret = rdev->desc->ops->set_voltage_sel(rdev, selector);
        if (ret >= 0)
                return ret;

        _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
                             (void *)data.old_uV);

        return ret;
}

static int _regulator_set_voltage_sel_step(struct regulator_dev *rdev,
                                           int uV, int new_selector)
{
        const struct regulator_ops *ops = rdev->desc->ops;
        int diff, old_sel, curr_sel, ret;

        /* Stepping is only needed if the regulator is enabled. */
        if (!_regulator_is_enabled(rdev))
                goto final_set;

        if (!ops->get_voltage_sel)
                return -EINVAL;

        old_sel = ops->get_voltage_sel(rdev);
        if (old_sel < 0)
                return old_sel;

        diff = new_selector - old_sel;
        if (diff == 0)
                return 0; /* No change needed. */

        if (diff > 0) {
                /* Stepping up. */
                for (curr_sel = old_sel + rdev->desc->vsel_step;
                     curr_sel < new_selector;
                     curr_sel += rdev->desc->vsel_step) {
                        /*
                         * Call the callback directly instead of using
                         * _regulator_call_set_voltage_sel() as we don't
                         * want to notify anyone yet. Same in the branch
                         * below.
                         */
                        ret = ops->set_voltage_sel(rdev, curr_sel);
                        if (ret)
                                goto try_revert;
                }
        } else {
                /* Stepping down. */
                for (curr_sel = old_sel - rdev->desc->vsel_step;
                     curr_sel > new_selector;
                     curr_sel -= rdev->desc->vsel_step) {
                        ret = ops->set_voltage_sel(rdev, curr_sel);
                        if (ret)
                                goto try_revert;
                }
        }

final_set:
        /* The final selector will trigger the notifiers. */
        return _regulator_call_set_voltage_sel(rdev, uV, new_selector);

try_revert:
        /*
         * At least try to return to the previous voltage if setting a new
         * one failed.
         */
        (void)ops->set_voltage_sel(rdev, old_sel);
        return ret;
}

static int _regulator_set_voltage_time(struct regulator_dev *rdev,
                                       int old_uV, int new_uV)
{
        unsigned int ramp_delay = 0;

        if (rdev->constraints->ramp_delay)
                ramp_delay = rdev->constraints->ramp_delay;
        else if (rdev->desc->ramp_delay)
                ramp_delay = rdev->desc->ramp_delay;
        else if (rdev->constraints->settling_time)
                return rdev->constraints->settling_time;
        else if (rdev->constraints->settling_time_up &&
                 (new_uV > old_uV))
                return rdev->constraints->settling_time_up;
        else if (rdev->constraints->settling_time_down &&
                 (new_uV < old_uV))
                return rdev->constraints->settling_time_down;

        if (ramp_delay == 0)
                return 0;

        return DIV_ROUND_UP(abs(new_uV - old_uV), ramp_delay);
}

static int _regulator_do_set_voltage(struct regulator_dev *rdev,
                                     int min_uV, int max_uV)
{
        int ret;
        int delay = 0;
        int best_val = 0;
        unsigned int selector;
        int old_selector = -1;
        const struct regulator_ops *ops = rdev->desc->ops;
        int old_uV = regulator_get_voltage_rdev(rdev);

        trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV);

        min_uV += rdev->constraints->uV_offset;
        max_uV += rdev->constraints->uV_offset;

        /*
         * If we can't obtain the old selector there is not enough
         * info to call set_voltage_time_sel().
         */
        if (_regulator_is_enabled(rdev) &&
            ops->set_voltage_time_sel && ops->get_voltage_sel) {
                old_selector = ops->get_voltage_sel(rdev);
                if (old_selector < 0)
                        return old_selector;
        }

        if (ops->set_voltage) {
                ret = _regulator_call_set_voltage(rdev, min_uV, max_uV,
                                                  &selector);

                if (ret >= 0) {
                        if (ops->list_voltage)
                                best_val = ops->list_voltage(rdev,
                                                             selector);
                        else
                                best_val = regulator_get_voltage_rdev(rdev);
                }

        } else if (ops->set_voltage_sel) {
                ret = regulator_map_voltage(rdev, min_uV, max_uV);
                if (ret >= 0) {
                        best_val = ops->list_voltage(rdev, ret);
                        if (min_uV <= best_val && max_uV >= best_val) {
                                selector = ret;
                                if (old_selector == selector)
                                        ret = 0;
                                else if (rdev->desc->vsel_step)
                                        ret = _regulator_set_voltage_sel_step(
                                                rdev, best_val, selector);
                                else
                                        ret = _regulator_call_set_voltage_sel(
                                                rdev, best_val, selector);
                        } else {
                                ret = -EINVAL;
                        }
                }
        } else {
                ret = -EINVAL;
        }

        if (ret)
                goto out;

        if (ops->set_voltage_time_sel) {
                /*
                 * Call set_voltage_time_sel if successfully obtained
                 * old_selector
                 */
                if (old_selector >= 0 && old_selector != selector)
                        delay = ops->set_voltage_time_sel(rdev, old_selector,
                                                          selector);
        } else {
                if (old_uV != best_val) {
                        if (ops->set_voltage_time)
                                delay = ops->set_voltage_time(rdev, old_uV,
                                                              best_val);
                        else
                                delay = _regulator_set_voltage_time(rdev,
                                                                    old_uV,
                                                                    best_val);
                }
        }

        if (delay < 0) {
                rdev_warn(rdev, "failed to get delay: %pe\n", ERR_PTR(delay));
                delay = 0;
        }

        /* Insert any necessary delays */
        _regulator_delay_helper(delay);

        if (best_val >= 0) {
                unsigned long data = best_val;

                _notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE,
                                     (void *)data);
        }

out:
        trace_regulator_set_voltage_complete(rdev_get_name(rdev), best_val);

        return ret;
}

static int _regulator_do_set_suspend_voltage(struct regulator_dev *rdev,
                                  int min_uV, int max_uV, suspend_state_t state)
{
        struct regulator_state *rstate;
        int uV, sel;

        rstate = regulator_get_suspend_state(rdev, state);
        if (rstate == NULL)
                return -EINVAL;

        if (min_uV < rstate->min_uV)
                min_uV = rstate->min_uV;
        if (max_uV > rstate->max_uV)
                max_uV = rstate->max_uV;

        sel = regulator_map_voltage(rdev, min_uV, max_uV);
        if (sel < 0)
                return sel;

        uV = rdev->desc->ops->list_voltage(rdev, sel);
        if (uV >= min_uV && uV <= max_uV)
                rstate->uV = uV;

        return 0;
}

static int regulator_set_voltage_unlocked(struct regulator *regulator,
                                          int min_uV, int max_uV,
                                          suspend_state_t state)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct regulator_voltage *voltage = &regulator->voltage[state];
        int ret = 0;
        int old_min_uV, old_max_uV;
        int current_uV;

        /* If we're setting the same range as last time the change
         * should be a noop (some cpufreq implementations use the same
         * voltage for multiple frequencies, for example).
         */
        if (voltage->min_uV == min_uV && voltage->max_uV == max_uV)
                goto out;

        /* If we're trying to set a range that overlaps the current voltage,
         * return successfully even though the regulator does not support
         * changing the voltage.
         */
        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
                current_uV = regulator_get_voltage_rdev(rdev);
                if (min_uV <= current_uV && current_uV <= max_uV) {
                        voltage->min_uV = min_uV;
                        voltage->max_uV = max_uV;
                        goto out;
                }
        }

        /* sanity check */
        if (!rdev->desc->ops->set_voltage &&
            !rdev->desc->ops->set_voltage_sel) {
                ret = -EINVAL;
                goto out;
        }

        /* constraints check */
        ret = regulator_check_voltage(rdev, &min_uV, &max_uV);
        if (ret < 0)
                goto out;

        /* restore original values in case of error */
        old_min_uV = voltage->min_uV;
        old_max_uV = voltage->max_uV;
        voltage->min_uV = min_uV;
        voltage->max_uV = max_uV;

        /* for not coupled regulators this will just set the voltage */
        ret = regulator_balance_voltage(rdev, state);
        if (ret < 0) {
                voltage->min_uV = old_min_uV;
                voltage->max_uV = old_max_uV;
        }

out:
        return ret;
}

int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV,
                               int max_uV, suspend_state_t state)
{
        int best_supply_uV = 0;
        int supply_change_uV = 0;
        int ret;

        if (rdev->supply &&
            regulator_ops_is_valid(rdev->supply->rdev,
                                   REGULATOR_CHANGE_VOLTAGE) &&
            (rdev->desc->min_dropout_uV || !(rdev->desc->ops->get_voltage ||
                                           rdev->desc->ops->get_voltage_sel))) {
                int current_supply_uV;
                int selector;

                selector = regulator_map_voltage(rdev, min_uV, max_uV);
                if (selector < 0) {
                        ret = selector;
                        goto out;
                }

                best_supply_uV = _regulator_list_voltage(rdev, selector, 0);
                if (best_supply_uV < 0) {
                        ret = best_supply_uV;
                        goto out;
                }

                best_supply_uV += rdev->desc->min_dropout_uV;

                current_supply_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
                if (current_supply_uV < 0) {
                        ret = current_supply_uV;
                        goto out;
                }

                supply_change_uV = best_supply_uV - current_supply_uV;
        }

        if (supply_change_uV > 0) {
                ret = regulator_set_voltage_unlocked(rdev->supply,
                                best_supply_uV, INT_MAX, state);
                if (ret) {
                        dev_err(&rdev->dev, "Failed to increase supply voltage: %pe\n",
                                ERR_PTR(ret));
                        goto out;
                }
        }

        if (state == PM_SUSPEND_ON)
                ret = _regulator_do_set_voltage(rdev, min_uV, max_uV);
        else
                ret = _regulator_do_set_suspend_voltage(rdev, min_uV,
                                                        max_uV, state);
        if (ret < 0)
                goto out;

        if (supply_change_uV < 0) {
                ret = regulator_set_voltage_unlocked(rdev->supply,
                                best_supply_uV, INT_MAX, state);
                if (ret)
                        dev_warn(&rdev->dev, "Failed to decrease supply voltage: %pe\n",
                                 ERR_PTR(ret));
                /* No need to fail here */
                ret = 0;
        }

out:
        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_voltage_rdev);

static int regulator_limit_voltage_step(struct regulator_dev *rdev,
                                        int *current_uV, int *min_uV)
{
        struct regulation_constraints *constraints = rdev->constraints;

        /* Limit voltage change only if necessary */
        if (!constraints->max_uV_step || !_regulator_is_enabled(rdev))
                return 1;

        if (*current_uV < 0) {
                *current_uV = regulator_get_voltage_rdev(rdev);

                if (*current_uV < 0)
                        return *current_uV;
        }

        if (abs(*current_uV - *min_uV) <= constraints->max_uV_step)
                return 1;

        /* Clamp target voltage within the given step */
        if (*current_uV < *min_uV)
                *min_uV = min(*current_uV + constraints->max_uV_step,
                              *min_uV);
        else
                *min_uV = max(*current_uV - constraints->max_uV_step,
                              *min_uV);

        return 0;
}

static int regulator_get_optimal_voltage(struct regulator_dev *rdev,
                                         int *current_uV,
                                         int *min_uV, int *max_uV,
                                         suspend_state_t state,
                                         int n_coupled)
{
        struct coupling_desc *c_desc = &rdev->coupling_desc;
        struct regulator_dev **c_rdevs = c_desc->coupled_rdevs;
        struct regulation_constraints *constraints = rdev->constraints;
        int desired_min_uV = 0, desired_max_uV = INT_MAX;
        int max_current_uV = 0, min_current_uV = INT_MAX;
        int highest_min_uV = 0, target_uV, possible_uV;
        int i, ret, max_spread;
        bool done;

        *current_uV = -1;

        /*
         * If there are no coupled regulators, simply set the voltage
         * demanded by consumers.
         */
        if (n_coupled == 1) {
                /*
                 * If consumers don't provide any demands, set voltage
                 * to min_uV
                 */
                desired_min_uV = constraints->min_uV;
                desired_max_uV = constraints->max_uV;

                ret = regulator_check_consumers(rdev,
                                                &desired_min_uV,
                                                &desired_max_uV, state);
                if (ret < 0)
                        return ret;

                done = true;

                goto finish;
        }

        /* Find highest min desired voltage */
        for (i = 0; i < n_coupled; i++) {
                int tmp_min = 0;
                int tmp_max = INT_MAX;

                lockdep_assert_held_once(&c_rdevs[i]->mutex.base);

                ret = regulator_check_consumers(c_rdevs[i],
                                                &tmp_min,
                                                &tmp_max, state);
                if (ret < 0)
                        return ret;

                ret = regulator_check_voltage(c_rdevs[i], &tmp_min, &tmp_max);
                if (ret < 0)
                        return ret;

                highest_min_uV = max(highest_min_uV, tmp_min);

                if (i == 0) {
                        desired_min_uV = tmp_min;
                        desired_max_uV = tmp_max;
                }
        }

        max_spread = constraints->max_spread[0];

        /*
         * Let target_uV be equal to the desired one if possible.
         * If not, set it to minimum voltage, allowed by other coupled
         * regulators.
         */
        target_uV = max(desired_min_uV, highest_min_uV - max_spread);

        /*
         * Find min and max voltages, which currently aren't violating
         * max_spread.
         */
        for (i = 1; i < n_coupled; i++) {
                int tmp_act;

                if (!_regulator_is_enabled(c_rdevs[i]))
                        continue;

                tmp_act = regulator_get_voltage_rdev(c_rdevs[i]);
                if (tmp_act < 0)
                        return tmp_act;

                min_current_uV = min(tmp_act, min_current_uV);
                max_current_uV = max(tmp_act, max_current_uV);
        }

        /* There aren't any other regulators enabled */
        if (max_current_uV == 0) {
                possible_uV = target_uV;
        } else {
                /*
                 * Correct target voltage, so as it currently isn't
                 * violating max_spread
                 */
                possible_uV = max(target_uV, max_current_uV - max_spread);
                possible_uV = min(possible_uV, min_current_uV + max_spread);
        }

        if (possible_uV > desired_max_uV)
                return -EINVAL;

        done = (possible_uV == target_uV);
        desired_min_uV = possible_uV;

finish:
        /* Apply max_uV_step constraint if necessary */
        if (state == PM_SUSPEND_ON) {
                ret = regulator_limit_voltage_step(rdev, current_uV,
                                                   &desired_min_uV);
                if (ret < 0)
                        return ret;

                if (ret == 0)
                        done = false;
        }

        /* Set current_uV if wasn't done earlier in the code and if necessary */
        if (n_coupled > 1 && *current_uV == -1) {

                if (_regulator_is_enabled(rdev)) {
                        ret = regulator_get_voltage_rdev(rdev);
                        if (ret < 0)
                                return ret;

                        *current_uV = ret;
                } else {
                        *current_uV = desired_min_uV;
                }
        }

        *min_uV = desired_min_uV;
        *max_uV = desired_max_uV;

        return done;
}

int regulator_do_balance_voltage(struct regulator_dev *rdev,
                                 suspend_state_t state, bool skip_coupled)
{
        struct regulator_dev **c_rdevs;
        struct regulator_dev *best_rdev;
        struct coupling_desc *c_desc = &rdev->coupling_desc;
        int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev;
        unsigned int delta, best_delta;
        unsigned long c_rdev_done = 0;
        bool best_c_rdev_done;

        c_rdevs = c_desc->coupled_rdevs;
        n_coupled = skip_coupled ? 1 : c_desc->n_coupled;

        /*
         * Find the best possible voltage change on each loop. Leave the loop
         * if there isn't any possible change.
         */
        do {
                best_c_rdev_done = false;
                best_delta = 0;
                best_min_uV = 0;
                best_max_uV = 0;
                best_c_rdev = 0;
                best_rdev = NULL;

                /*
                 * Find highest difference between optimal voltage
                 * and current voltage.
                 */
                for (i = 0; i < n_coupled; i++) {
                        /*
                         * optimal_uV is the best voltage that can be set for
                         * i-th regulator at the moment without violating
                         * max_spread constraint in order to balance
                         * the coupled voltages.
                         */
                        int optimal_uV = 0, optimal_max_uV = 0, current_uV = 0;

                        if (test_bit(i, &c_rdev_done))
                                continue;

                        ret = regulator_get_optimal_voltage(c_rdevs[i],
                                                            &current_uV,
                                                            &optimal_uV,
                                                            &optimal_max_uV,
                                                            state, n_coupled);
                        if (ret < 0)
                                goto out;

                        delta = abs(optimal_uV - current_uV);

                        if (delta && best_delta <= delta) {
                                best_c_rdev_done = ret;
                                best_delta = delta;
                                best_rdev = c_rdevs[i];
                                best_min_uV = optimal_uV;
                                best_max_uV = optimal_max_uV;
                                best_c_rdev = i;
                        }
                }

                /* Nothing to change, return successfully */
                if (!best_rdev) {
                        ret = 0;
                        goto out;
                }

                ret = regulator_set_voltage_rdev(best_rdev, best_min_uV,
                                                 best_max_uV, state);

                if (ret < 0)
                        goto out;

                if (best_c_rdev_done)
                        set_bit(best_c_rdev, &c_rdev_done);

        } while (n_coupled > 1);

out:
        return ret;
}

static int regulator_balance_voltage(struct regulator_dev *rdev,
                                     suspend_state_t state)
{
        struct coupling_desc *c_desc = &rdev->coupling_desc;
        struct regulator_coupler *coupler = c_desc->coupler;
        bool skip_coupled = false;

        /*
         * If system is in a state other than PM_SUSPEND_ON, don't check
         * other coupled regulators.
         */
        if (state != PM_SUSPEND_ON)
                skip_coupled = true;

        if (c_desc->n_resolved < c_desc->n_coupled) {
                rdev_err(rdev, "Not all coupled regulators registered\n");
                return -EPERM;
        }

        /* Invoke custom balancer for customized couplers */
        if (coupler && coupler->balance_voltage)
                return coupler->balance_voltage(coupler, rdev, state);

        return regulator_do_balance_voltage(rdev, state, skip_coupled);
}

/**
 * regulator_set_voltage - set regulator output voltage
 * @regulator: regulator source
 * @min_uV: Minimum required voltage in uV
 * @max_uV: Maximum acceptable voltage in uV
 *
 * Sets a voltage regulator to the desired output voltage. This can be set
 * during any regulator state. IOW, regulator can be disabled or enabled.
 *
 * If the regulator is enabled then the voltage will change to the new value
 * immediately otherwise if the regulator is disabled the regulator will
 * output at the new voltage when enabled.
 *
 * NOTE: If the regulator is shared between several devices then the lowest
 * request voltage that meets the system constraints will be used.
 * Regulator system constraints must be set for this regulator before
 * calling this function otherwise this call will fail.
 */
int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV)
{
        struct ww_acquire_ctx ww_ctx;
        int ret;

        regulator_lock_dependent(regulator->rdev, &ww_ctx);

        ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV,
                                             PM_SUSPEND_ON);

        regulator_unlock_dependent(regulator->rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_voltage);

static inline int regulator_suspend_toggle(struct regulator_dev *rdev,
                                           suspend_state_t state, bool en)
{
        struct regulator_state *rstate;

        rstate = regulator_get_suspend_state(rdev, state);
        if (rstate == NULL)
                return -EINVAL;

        if (!rstate->changeable)
                return -EPERM;

        rstate->enabled = (en) ? ENABLE_IN_SUSPEND : DISABLE_IN_SUSPEND;

        return 0;
}

int regulator_suspend_enable(struct regulator_dev *rdev,
                                    suspend_state_t state)
{
        return regulator_suspend_toggle(rdev, state, true);
}
EXPORT_SYMBOL_GPL(regulator_suspend_enable);

int regulator_suspend_disable(struct regulator_dev *rdev,
                                     suspend_state_t state)
{
        struct regulator *regulator;
        struct regulator_voltage *voltage;

        /*
         * if any consumer wants this regulator device keeping on in
         * suspend states, don't set it as disabled.
         */
        list_for_each_entry(regulator, &rdev->consumer_list, list) {
                voltage = &regulator->voltage[state];
                if (voltage->min_uV || voltage->max_uV)
                        return 0;
        }

        return regulator_suspend_toggle(rdev, state, false);
}
EXPORT_SYMBOL_GPL(regulator_suspend_disable);

static int _regulator_set_suspend_voltage(struct regulator *regulator,
                                          int min_uV, int max_uV,
                                          suspend_state_t state)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct regulator_state *rstate;

        rstate = regulator_get_suspend_state(rdev, state);
        if (rstate == NULL)
                return -EINVAL;

        if (rstate->min_uV == rstate->max_uV) {
                rdev_err(rdev, "The suspend voltage can't be changed!\n");
                return -EPERM;
        }

        return regulator_set_voltage_unlocked(regulator, min_uV, max_uV, state);
}

int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV,
                                  int max_uV, suspend_state_t state)
{
        struct ww_acquire_ctx ww_ctx;
        int ret;

        /* PM_SUSPEND_ON is handled by regulator_set_voltage() */
        if (regulator_check_states(state) || state == PM_SUSPEND_ON)
                return -EINVAL;

        regulator_lock_dependent(regulator->rdev, &ww_ctx);

        ret = _regulator_set_suspend_voltage(regulator, min_uV,
                                             max_uV, state);

        regulator_unlock_dependent(regulator->rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_suspend_voltage);

/**
 * regulator_set_voltage_time - get raise/fall time
 * @regulator: regulator source
 * @old_uV: starting voltage in microvolts
 * @new_uV: target voltage in microvolts
 *
 * Provided with the starting and ending voltage, this function attempts to
 * calculate the time in microseconds required to rise or fall to this new
 * voltage.
 */
int regulator_set_voltage_time(struct regulator *regulator,
                               int old_uV, int new_uV)
{
        struct regulator_dev *rdev = regulator->rdev;
        const struct regulator_ops *ops = rdev->desc->ops;
        int old_sel = -1;
        int new_sel = -1;
        int voltage;
        int i;

        if (ops->set_voltage_time)
                return ops->set_voltage_time(rdev, old_uV, new_uV);
        else if (!ops->set_voltage_time_sel)
                return _regulator_set_voltage_time(rdev, old_uV, new_uV);

        /* Currently requires operations to do this */
        if (!ops->list_voltage || !rdev->desc->n_voltages)
                return -EINVAL;

        for (i = 0; i < rdev->desc->n_voltages; i++) {
                /* We only look for exact voltage matches here */
                if (i < rdev->desc->linear_min_sel)
                        continue;

                if (old_sel >= 0 && new_sel >= 0)
                        break;

                voltage = regulator_list_voltage(regulator, i);
                if (voltage < 0)
                        return -EINVAL;
                if (voltage == 0)
                        continue;
                if (voltage == old_uV)
                        old_sel = i;
                if (voltage == new_uV)
                        new_sel = i;
        }

        if (old_sel < 0 || new_sel < 0)
                return -EINVAL;

        return ops->set_voltage_time_sel(rdev, old_sel, new_sel);
}
EXPORT_SYMBOL_GPL(regulator_set_voltage_time);

/**
 * regulator_set_voltage_time_sel - get raise/fall time
 * @rdev: regulator source device
 * @old_selector: selector for starting voltage
 * @new_selector: selector for target voltage
 *
 * Provided with the starting and target voltage selectors, this function
 * returns time in microseconds required to rise or fall to this new voltage
 *
 * Drivers providing ramp_delay in regulation_constraints can use this as their
 * set_voltage_time_sel() operation.
 */
int regulator_set_voltage_time_sel(struct regulator_dev *rdev,
                                   unsigned int old_selector,
                                   unsigned int new_selector)
{
        int old_volt, new_volt;

        /* sanity check */
        if (!rdev->desc->ops->list_voltage)
                return -EINVAL;

        old_volt = rdev->desc->ops->list_voltage(rdev, old_selector);
        new_volt = rdev->desc->ops->list_voltage(rdev, new_selector);

        if (rdev->desc->ops->set_voltage_time)
                return rdev->desc->ops->set_voltage_time(rdev, old_volt,
                                                         new_volt);
        else
                return _regulator_set_voltage_time(rdev, old_volt, new_volt);
}
EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel);

int regulator_sync_voltage_rdev(struct regulator_dev *rdev)
{
        int ret;

        regulator_lock(rdev);

        if (!rdev->desc->ops->set_voltage &&
            !rdev->desc->ops->set_voltage_sel) {
                ret = -EINVAL;
                goto out;
        }

        /* balance only, if regulator is coupled */
        if (rdev->coupling_desc.n_coupled > 1)
                ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON);
        else
                ret = -EOPNOTSUPP;

out:
        regulator_unlock(rdev);
        return ret;
}

/**
 * regulator_sync_voltage - re-apply last regulator output voltage
 * @regulator: regulator source
 *
 * Re-apply the last configured voltage.  This is intended to be used
 * where some external control source the consumer is cooperating with
 * has caused the configured voltage to change.
 */
int regulator_sync_voltage(struct regulator *regulator)
{
        struct regulator_dev *rdev = regulator->rdev;
        struct regulator_voltage *voltage = &regulator->voltage[PM_SUSPEND_ON];
        int ret, min_uV, max_uV;

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE))
                return 0;

        regulator_lock(rdev);

        if (!rdev->desc->ops->set_voltage &&
            !rdev->desc->ops->set_voltage_sel) {
                ret = -EINVAL;
                goto out;
        }

        /* This is only going to work if we've had a voltage configured. */
        if (!voltage->min_uV && !voltage->max_uV) {
                ret = -EINVAL;
                goto out;
        }

        min_uV = voltage->min_uV;
        max_uV = voltage->max_uV;

        /* This should be a paranoia check... */
        ret = regulator_check_voltage(rdev, &min_uV, &max_uV);
        if (ret < 0)
                goto out;

        ret = regulator_check_consumers(rdev, &min_uV, &max_uV, 0);
        if (ret < 0)
                goto out;

        /* balance only, if regulator is coupled */
        if (rdev->coupling_desc.n_coupled > 1)
                ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON);
        else
                ret = _regulator_do_set_voltage(rdev, min_uV, max_uV);

out:
        regulator_unlock(rdev);
        return ret;
}
EXPORT_SYMBOL_GPL(regulator_sync_voltage);

int regulator_get_voltage_rdev(struct regulator_dev *rdev)
{
        int sel, ret;
        bool bypassed;

        if (rdev->desc->ops->get_bypass) {
                ret = rdev->desc->ops->get_bypass(rdev, &bypassed);
                if (ret < 0)
                        return ret;
                if (bypassed) {
                        /* if bypassed the regulator must have a supply */
                        if (!rdev->supply) {
                                rdev_err(rdev,
                                         "bypassed regulator has no supply!\n");
                                return -EPROBE_DEFER;
                        }

                        return regulator_get_voltage_rdev(rdev->supply->rdev);
                }
        }

        if (rdev->desc->ops->get_voltage_sel) {
                sel = rdev->desc->ops->get_voltage_sel(rdev);
                if (sel < 0)
                        return sel;
                ret = rdev->desc->ops->list_voltage(rdev, sel);
        } else if (rdev->desc->ops->get_voltage) {
                ret = rdev->desc->ops->get_voltage(rdev);
        } else if (rdev->desc->ops->list_voltage) {
                ret = rdev->desc->ops->list_voltage(rdev, 0);
        } else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) {
                ret = rdev->desc->fixed_uV;
        } else if (rdev->supply) {
                ret = regulator_get_voltage_rdev(rdev->supply->rdev);
        } else if (rdev->supply_name) {
                return -EPROBE_DEFER;
        } else {
                return -EINVAL;
        }

        if (ret < 0)
                return ret;
        return ret - rdev->constraints->uV_offset;
}
EXPORT_SYMBOL_GPL(regulator_get_voltage_rdev);

/**
 * regulator_get_voltage - get regulator output voltage
 * @regulator: regulator source
 *
 * This returns the current regulator voltage in uV.
 *
 * NOTE: If the regulator is disabled it will return the voltage value. This
 * function should not be used to determine regulator state.
 */
int regulator_get_voltage(struct regulator *regulator)
{
        struct ww_acquire_ctx ww_ctx;
        int ret;

        regulator_lock_dependent(regulator->rdev, &ww_ctx);
        ret = regulator_get_voltage_rdev(regulator->rdev);
        regulator_unlock_dependent(regulator->rdev, &ww_ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_get_voltage);

/**
 * regulator_set_current_limit - set regulator output current limit
 * @regulator: regulator source
 * @min_uA: Minimum supported current in uA
 * @max_uA: Maximum supported current in uA
 *
 * Sets current sink to the desired output current. This can be set during
 * any regulator state. IOW, regulator can be disabled or enabled.
 *
 * If the regulator is enabled then the current will change to the new value
 * immediately otherwise if the regulator is disabled the regulator will
 * output at the new current when enabled.
 *
 * NOTE: Regulator system constraints must be set for this regulator before
 * calling this function otherwise this call will fail.
 */
int regulator_set_current_limit(struct regulator *regulator,
                               int min_uA, int max_uA)
{
        struct regulator_dev *rdev = regulator->rdev;
        int ret;

        regulator_lock(rdev);

        /* sanity check */
        if (!rdev->desc->ops->set_current_limit) {
                ret = -EINVAL;
                goto out;
        }

        /* constraints check */
        ret = regulator_check_current_limit(rdev, &min_uA, &max_uA);
        if (ret < 0)
                goto out;

        ret = rdev->desc->ops->set_current_limit(rdev, min_uA, max_uA);
out:
        regulator_unlock(rdev);
        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_current_limit);

static int _regulator_get_current_limit_unlocked(struct regulator_dev *rdev)
{
        /* sanity check */
        if (!rdev->desc->ops->get_current_limit)
                return -EINVAL;

        return rdev->desc->ops->get_current_limit(rdev);
}

static int _regulator_get_current_limit(struct regulator_dev *rdev)
{
        int ret;

        regulator_lock(rdev);
        ret = _regulator_get_current_limit_unlocked(rdev);
        regulator_unlock(rdev);

        return ret;
}

/**
 * regulator_get_current_limit - get regulator output current
 * @regulator: regulator source
 *
 * This returns the current supplied by the specified current sink in uA.
 *
 * NOTE: If the regulator is disabled it will return the current value. This
 * function should not be used to determine regulator state.
 */
int regulator_get_current_limit(struct regulator *regulator)
{
        return _regulator_get_current_limit(regulator->rdev);
}
EXPORT_SYMBOL_GPL(regulator_get_current_limit);

/**
 * regulator_set_mode - set regulator operating mode
 * @regulator: regulator source
 * @mode: operating mode - one of the REGULATOR_MODE constants
 *
 * Set regulator operating mode to increase regulator efficiency or improve
 * regulation performance.
 *
 * NOTE: Regulator system constraints must be set for this regulator before
 * calling this function otherwise this call will fail.
 */
int regulator_set_mode(struct regulator *regulator, unsigned int mode)
{
        struct regulator_dev *rdev = regulator->rdev;
        int ret;
        int regulator_curr_mode;

        regulator_lock(rdev);

        /* sanity check */
        if (!rdev->desc->ops->set_mode) {
                ret = -EINVAL;
                goto out;
        }

        /* return if the same mode is requested */
        if (rdev->desc->ops->get_mode) {
                regulator_curr_mode = rdev->desc->ops->get_mode(rdev);
                if (regulator_curr_mode == mode) {
                        ret = 0;
                        goto out;
                }
        }

        /* constraints check */
        ret = regulator_mode_constrain(rdev, &mode);
        if (ret < 0)
                goto out;

        ret = rdev->desc->ops->set_mode(rdev, mode);
out:
        regulator_unlock(rdev);
        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_mode);

static unsigned int _regulator_get_mode_unlocked(struct regulator_dev *rdev)
{
        /* sanity check */
        if (!rdev->desc->ops->get_mode)
                return -EINVAL;

        return rdev->desc->ops->get_mode(rdev);
}

static unsigned int _regulator_get_mode(struct regulator_dev *rdev)
{
        int ret;

        regulator_lock(rdev);
        ret = _regulator_get_mode_unlocked(rdev);
        regulator_unlock(rdev);

        return ret;
}

/**
 * regulator_get_mode - get regulator operating mode
 * @regulator: regulator source
 *
 * Get the current regulator operating mode.
 */
unsigned int regulator_get_mode(struct regulator *regulator)
{
        return _regulator_get_mode(regulator->rdev);
}
EXPORT_SYMBOL_GPL(regulator_get_mode);

static int rdev_get_cached_err_flags(struct regulator_dev *rdev)
{
        int ret = 0;

        if (rdev->use_cached_err) {
                spin_lock(&rdev->err_lock);
                ret = rdev->cached_err;
                spin_unlock(&rdev->err_lock);
        }
        return ret;
}

static int _regulator_get_error_flags(struct regulator_dev *rdev,
                                        unsigned int *flags)
{
        int cached_flags, ret = 0;

        regulator_lock(rdev);

        cached_flags = rdev_get_cached_err_flags(rdev);

        if (rdev->desc->ops->get_error_flags)
                ret = rdev->desc->ops->get_error_flags(rdev, flags);
        else if (!rdev->use_cached_err)
                ret = -EINVAL;

        *flags |= cached_flags;

        regulator_unlock(rdev);

        return ret;
}

/**
 * regulator_get_error_flags - get regulator error information
 * @regulator: regulator source
 * @flags: pointer to store error flags
 *
 * Get the current regulator error information.
 */
int regulator_get_error_flags(struct regulator *regulator,
                                unsigned int *flags)
{
        return _regulator_get_error_flags(regulator->rdev, flags);
}
EXPORT_SYMBOL_GPL(regulator_get_error_flags);

/**
 * regulator_set_load - set regulator load
 * @regulator: regulator source
 * @uA_load: load current
 *
 * Notifies the regulator core of a new device load. This is then used by
 * DRMS (if enabled by constraints) to set the most efficient regulator
 * operating mode for the new regulator loading.
 *
 * Consumer devices notify their supply regulator of the maximum power
 * they will require (can be taken from device datasheet in the power
 * consumption tables) when they change operational status and hence power
 * state. Examples of operational state changes that can affect power
 * consumption are :-
 *
 *    o Device is opened / closed.
 *    o Device I/O is about to begin or has just finished.
 *    o Device is idling in between work.
 *
 * This information is also exported via sysfs to userspace.
 *
 * DRMS will sum the total requested load on the regulator and change
 * to the most efficient operating mode if platform constraints allow.
 *
 * NOTE: when a regulator consumer requests to have a regulator
 * disabled then any load that consumer requested no longer counts
 * toward the total requested load.  If the regulator is re-enabled
 * then the previously requested load will start counting again.
 *
 * If a regulator is an always-on regulator then an individual consumer's
 * load will still be removed if that consumer is fully disabled.
 *
 * On error a negative errno is returned.
 */
int regulator_set_load(struct regulator *regulator, int uA_load)
{
        struct regulator_dev *rdev = regulator->rdev;
        int old_uA_load;
        int ret = 0;

        regulator_lock(rdev);
        old_uA_load = regulator->uA_load;
        regulator->uA_load = uA_load;
        if (regulator->enable_count && old_uA_load != uA_load) {
                ret = drms_uA_update(rdev);
                if (ret < 0)
                        regulator->uA_load = old_uA_load;
        }
        regulator_unlock(rdev);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_set_load);

/**
 * regulator_allow_bypass - allow the regulator to go into bypass mode
 *
 * @regulator: Regulator to configure
 * @enable: enable or disable bypass mode
 *
 * Allow the regulator to go into bypass mode if all other consumers
 * for the regulator also enable bypass mode and the machine
 * constraints allow this.  Bypass mode means that the regulator is
 * simply passing the input directly to the output with no regulation.
 */
int regulator_allow_bypass(struct regulator *regulator, bool enable)
{
        struct regulator_dev *rdev = regulator->rdev;
        const char *name = rdev_get_name(rdev);
        int ret = 0;

        if (!rdev->desc->ops->set_bypass)
                return 0;

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_BYPASS))
                return 0;

        regulator_lock(rdev);

        if (enable && !regulator->bypass) {
                rdev->bypass_count++;

                if (rdev->bypass_count == rdev->open_count) {
                        trace_regulator_bypass_enable(name);

                        ret = rdev->desc->ops->set_bypass(rdev, enable);
                        if (ret != 0)
                                rdev->bypass_count--;
                        else
                                trace_regulator_bypass_enable_complete(name);
                }

        } else if (!enable && regulator->bypass) {
                rdev->bypass_count--;

                if (rdev->bypass_count != rdev->open_count) {
                        trace_regulator_bypass_disable(name);

                        ret = rdev->desc->ops->set_bypass(rdev, enable);
                        if (ret != 0)
                                rdev->bypass_count++;
                        else
                                trace_regulator_bypass_disable_complete(name);
                }
        }

        if (ret == 0)
                regulator->bypass = enable;

        regulator_unlock(rdev);

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_allow_bypass);

/**
 * regulator_register_notifier - register regulator event notifier
 * @regulator: regulator source
 * @nb: notifier block
 *
 * Register notifier block to receive regulator events.
 */
int regulator_register_notifier(struct regulator *regulator,
                              struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&regulator->rdev->notifier,
                                                nb);
}
EXPORT_SYMBOL_GPL(regulator_register_notifier);

/**
 * regulator_unregister_notifier - unregister regulator event notifier
 * @regulator: regulator source
 * @nb: notifier block
 *
 * Unregister regulator event notifier block.
 */
int regulator_unregister_notifier(struct regulator *regulator,
                                struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&regulator->rdev->notifier,
                                                  nb);
}
EXPORT_SYMBOL_GPL(regulator_unregister_notifier);

/* notify regulator consumers and downstream regulator consumers.
 * Note mutex must be held by caller.
 */
static int _notifier_call_chain(struct regulator_dev *rdev,
                                  unsigned long event, void *data)
{
        /* call rdev chain first */
        int ret =  blocking_notifier_call_chain(&rdev->notifier, event, data);

        if (IS_REACHABLE(CONFIG_REGULATOR_NETLINK_EVENTS)) {
                struct device *parent = rdev->dev.parent;
                const char *rname = rdev_get_name(rdev);
                char name[32];

                /* Avoid duplicate debugfs directory names */
                if (parent && rname == rdev->desc->name) {
                        snprintf(name, sizeof(name), "%s-%s", dev_name(parent),
                                 rname);
                        rname = name;
                }
                reg_generate_netlink_event(rname, event);
        }

        return ret;
}

int _regulator_bulk_get(struct device *dev, int num_consumers,
                        struct regulator_bulk_data *consumers, enum regulator_get_type get_type)
{
        int i;
        int ret;

        for (i = 0; i < num_consumers; i++)
                consumers[i].consumer = NULL;

        for (i = 0; i < num_consumers; i++) {
                consumers[i].consumer = _regulator_get(dev,
                                                       consumers[i].supply, get_type);
                if (IS_ERR(consumers[i].consumer)) {
                        ret = dev_err_probe(dev, PTR_ERR(consumers[i].consumer),
                                            "Failed to get supply '%s'",
                                            consumers[i].supply);
                        consumers[i].consumer = NULL;
                        goto err;
                }

                if (consumers[i].init_load_uA > 0) {
                        ret = regulator_set_load(consumers[i].consumer,
                                                 consumers[i].init_load_uA);
                        if (ret) {
                                i++;
                                goto err;
                        }
                }
        }

        return 0;

err:
        while (--i >= 0)
                regulator_put(consumers[i].consumer);

        return ret;
}

/**
 * regulator_bulk_get - get multiple regulator consumers
 *
 * @dev:           Device to supply
 * @num_consumers: Number of consumers to register
 * @consumers:     Configuration of consumers; clients are stored here.
 *
 * @return 0 on success, an errno on failure.
 *
 * This helper function allows drivers to get several regulator
 * consumers in one operation.  If any of the regulators cannot be
 * acquired then any regulators that were allocated will be freed
 * before returning to the caller.
 */
int regulator_bulk_get(struct device *dev, int num_consumers,
                       struct regulator_bulk_data *consumers)
{
        return _regulator_bulk_get(dev, num_consumers, consumers, NORMAL_GET);
}
EXPORT_SYMBOL_GPL(regulator_bulk_get);

static void regulator_bulk_enable_async(void *data, async_cookie_t cookie)
{
        struct regulator_bulk_data *bulk = data;

        bulk->ret = regulator_enable(bulk->consumer);
}

/**
 * regulator_bulk_enable - enable multiple regulator consumers
 *
 * @num_consumers: Number of consumers
 * @consumers:     Consumer data; clients are stored here.
 * @return         0 on success, an errno on failure
 *
 * This convenience API allows consumers to enable multiple regulator
 * clients in a single API call.  If any consumers cannot be enabled
 * then any others that were enabled will be disabled again prior to
 * return.
 */
int regulator_bulk_enable(int num_consumers,
                          struct regulator_bulk_data *consumers)
{
        ASYNC_DOMAIN_EXCLUSIVE(async_domain);
        int i;
        int ret = 0;

        for (i = 0; i < num_consumers; i++) {
                async_schedule_domain(regulator_bulk_enable_async,
                                      &consumers[i], &async_domain);
        }

        async_synchronize_full_domain(&async_domain);

        /* If any consumer failed we need to unwind any that succeeded */
        for (i = 0; i < num_consumers; i++) {
                if (consumers[i].ret != 0) {
                        ret = consumers[i].ret;
                        goto err;
                }
        }

        return 0;

err:
        for (i = 0; i < num_consumers; i++) {
                if (consumers[i].ret < 0)
                        pr_err("Failed to enable %s: %pe\n", consumers[i].supply,
                               ERR_PTR(consumers[i].ret));
                else
                        regulator_disable(consumers[i].consumer);
        }

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_bulk_enable);

/**
 * regulator_bulk_disable - disable multiple regulator consumers
 *
 * @num_consumers: Number of consumers
 * @consumers:     Consumer data; clients are stored here.
 * @return         0 on success, an errno on failure
 *
 * This convenience API allows consumers to disable multiple regulator
 * clients in a single API call.  If any consumers cannot be disabled
 * then any others that were disabled will be enabled again prior to
 * return.
 */
int regulator_bulk_disable(int num_consumers,
                           struct regulator_bulk_data *consumers)
{
        int i;
        int ret, r;

        for (i = num_consumers - 1; i >= 0; --i) {
                ret = regulator_disable(consumers[i].consumer);
                if (ret != 0)
                        goto err;
        }

        return 0;

err:
        pr_err("Failed to disable %s: %pe\n", consumers[i].supply, ERR_PTR(ret));
        for (++i; i < num_consumers; ++i) {
                r = regulator_enable(consumers[i].consumer);
                if (r != 0)
                        pr_err("Failed to re-enable %s: %pe\n",
                               consumers[i].supply, ERR_PTR(r));
        }

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_bulk_disable);

/**
 * regulator_bulk_force_disable - force disable multiple regulator consumers
 *
 * @num_consumers: Number of consumers
 * @consumers:     Consumer data; clients are stored here.
 * @return         0 on success, an errno on failure
 *
 * This convenience API allows consumers to forcibly disable multiple regulator
 * clients in a single API call.
 * NOTE: This should be used for situations when device damage will
 * likely occur if the regulators are not disabled (e.g. over temp).
 * Although regulator_force_disable function call for some consumers can
 * return error numbers, the function is called for all consumers.
 */
int regulator_bulk_force_disable(int num_consumers,
                           struct regulator_bulk_data *consumers)
{
        int i;
        int ret = 0;

        for (i = 0; i < num_consumers; i++) {
                consumers[i].ret =
                            regulator_force_disable(consumers[i].consumer);

                /* Store first error for reporting */
                if (consumers[i].ret && !ret)
                        ret = consumers[i].ret;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(regulator_bulk_force_disable);

/**
 * regulator_bulk_free - free multiple regulator consumers
 *
 * @num_consumers: Number of consumers
 * @consumers:     Consumer data; clients are stored here.
 *
 * This convenience API allows consumers to free multiple regulator
 * clients in a single API call.
 */
void regulator_bulk_free(int num_consumers,
                         struct regulator_bulk_data *consumers)
{
        int i;

        for (i = 0; i < num_consumers; i++) {
                regulator_put(consumers[i].consumer);
                consumers[i].consumer = NULL;
        }
}
EXPORT_SYMBOL_GPL(regulator_bulk_free);

/**
 * regulator_handle_critical - Handle events for system-critical regulators.
 * @rdev: The regulator device.
 * @event: The event being handled.
 *
 * This function handles critical events such as under-voltage, over-current,
 * and unknown errors for regulators deemed system-critical. On detecting such
 * events, it triggers a hardware protection shutdown with a defined timeout.
 */
static void regulator_handle_critical(struct regulator_dev *rdev,
                                      unsigned long event)
{
        const char *reason = NULL;

        if (!rdev->constraints->system_critical)
                return;

        switch (event) {
        case REGULATOR_EVENT_UNDER_VOLTAGE:
                reason = "System critical regulator: voltage drop detected";
                break;
        case REGULATOR_EVENT_OVER_CURRENT:
                reason = "System critical regulator: over-current detected";
                break;
        case REGULATOR_EVENT_FAIL:
                reason = "System critical regulator: unknown error";
        }

        if (!reason)
                return;

        hw_protection_shutdown(reason,
                               rdev->constraints->uv_less_critical_window_ms);
}

/**
 * regulator_notifier_call_chain - call regulator event notifier
 * @rdev: regulator source
 * @event: notifier block
 * @data: callback-specific data.
 *
 * Called by regulator drivers to notify clients a regulator event has
 * occurred.
 */
int regulator_notifier_call_chain(struct regulator_dev *rdev,
                                  unsigned long event, void *data)
{
        regulator_handle_critical(rdev, event);

        _notifier_call_chain(rdev, event, data);
        return NOTIFY_DONE;

}
EXPORT_SYMBOL_GPL(regulator_notifier_call_chain);

/**
 * regulator_mode_to_status - convert a regulator mode into a status
 *
 * @mode: Mode to convert
 *
 * Convert a regulator mode into a status.
 */
int regulator_mode_to_status(unsigned int mode)
{
        switch (mode) {
        case REGULATOR_MODE_FAST:
                return REGULATOR_STATUS_FAST;
        case REGULATOR_MODE_NORMAL:
                return REGULATOR_STATUS_NORMAL;
        case REGULATOR_MODE_IDLE:
                return REGULATOR_STATUS_IDLE;
        case REGULATOR_MODE_STANDBY:
                return REGULATOR_STATUS_STANDBY;
        default:
                return REGULATOR_STATUS_UNDEFINED;
        }
}
EXPORT_SYMBOL_GPL(regulator_mode_to_status);

static struct attribute *regulator_dev_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_num_users.attr,
        &dev_attr_type.attr,
        &dev_attr_microvolts.attr,
        &dev_attr_microamps.attr,
        &dev_attr_opmode.attr,
        &dev_attr_state.attr,
        &dev_attr_status.attr,
        &dev_attr_bypass.attr,
        &dev_attr_requested_microamps.attr,
        &dev_attr_min_microvolts.attr,
        &dev_attr_max_microvolts.attr,
        &dev_attr_min_microamps.attr,
        &dev_attr_max_microamps.attr,
        &dev_attr_under_voltage.attr,
        &dev_attr_over_current.attr,
        &dev_attr_regulation_out.attr,
        &dev_attr_fail.attr,
        &dev_attr_over_temp.attr,
        &dev_attr_under_voltage_warn.attr,
        &dev_attr_over_current_warn.attr,
        &dev_attr_over_voltage_warn.attr,
        &dev_attr_over_temp_warn.attr,
        &dev_attr_suspend_standby_state.attr,
        &dev_attr_suspend_mem_state.attr,
        &dev_attr_suspend_disk_state.attr,
        &dev_attr_suspend_standby_microvolts.attr,
        &dev_attr_suspend_mem_microvolts.attr,
        &dev_attr_suspend_disk_microvolts.attr,
        &dev_attr_suspend_standby_mode.attr,
        &dev_attr_suspend_mem_mode.attr,
        &dev_attr_suspend_disk_mode.attr,
        NULL
};

/*
 * To avoid cluttering sysfs (and memory) with useless state, only
 * create attributes that can be meaningfully displayed.
 */
static umode_t regulator_attr_is_visible(struct kobject *kobj,
                                         struct attribute *attr, int idx)
{
        struct device *dev = kobj_to_dev(kobj);
        struct regulator_dev *rdev = dev_to_rdev(dev);
        const struct regulator_ops *ops = rdev->desc->ops;
        umode_t mode = attr->mode;

        /* these three are always present */
        if (attr == &dev_attr_name.attr ||
            attr == &dev_attr_num_users.attr ||
            attr == &dev_attr_type.attr)
                return mode;

        /* some attributes need specific methods to be displayed */
        if (attr == &dev_attr_microvolts.attr) {
                if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) ||
                    (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) ||
                    (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0) ||
                    (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1))
                        return mode;
                return 0;
        }

        if (attr == &dev_attr_microamps.attr)
                return ops->get_current_limit ? mode : 0;

        if (attr == &dev_attr_opmode.attr)
                return ops->get_mode ? mode : 0;

        if (attr == &dev_attr_state.attr)
                return (rdev->ena_pin || ops->is_enabled) ? mode : 0;

        if (attr == &dev_attr_status.attr)
                return ops->get_status ? mode : 0;

        if (attr == &dev_attr_bypass.attr)
                return ops->get_bypass ? mode : 0;

        if (attr == &dev_attr_under_voltage.attr ||
            attr == &dev_attr_over_current.attr ||
            attr == &dev_attr_regulation_out.attr ||
            attr == &dev_attr_fail.attr ||
            attr == &dev_attr_over_temp.attr ||
            attr == &dev_attr_under_voltage_warn.attr ||
            attr == &dev_attr_over_current_warn.attr ||
            attr == &dev_attr_over_voltage_warn.attr ||
            attr == &dev_attr_over_temp_warn.attr)
                return ops->get_error_flags ? mode : 0;

        /* constraints need specific supporting methods */
        if (attr == &dev_attr_min_microvolts.attr ||
            attr == &dev_attr_max_microvolts.attr)
                return (ops->set_voltage || ops->set_voltage_sel) ? mode : 0;

        if (attr == &dev_attr_min_microamps.attr ||
            attr == &dev_attr_max_microamps.attr)
                return ops->set_current_limit ? mode : 0;

        if (attr == &dev_attr_suspend_standby_state.attr ||
            attr == &dev_attr_suspend_mem_state.attr ||
            attr == &dev_attr_suspend_disk_state.attr)
                return mode;

        if (attr == &dev_attr_suspend_standby_microvolts.attr ||
            attr == &dev_attr_suspend_mem_microvolts.attr ||
            attr == &dev_attr_suspend_disk_microvolts.attr)
                return ops->set_suspend_voltage ? mode : 0;

        if (attr == &dev_attr_suspend_standby_mode.attr ||
            attr == &dev_attr_suspend_mem_mode.attr ||
            attr == &dev_attr_suspend_disk_mode.attr)
                return ops->set_suspend_mode ? mode : 0;

        return mode;
}

static const struct attribute_group regulator_dev_group = {
        .attrs = regulator_dev_attrs,
        .is_visible = regulator_attr_is_visible,
};

static const struct attribute_group *regulator_dev_groups[] = {
        &regulator_dev_group,
        NULL
};

static void regulator_dev_release(struct device *dev)
{
        struct regulator_dev *rdev = dev_get_drvdata(dev);

        debugfs_remove_recursive(rdev->debugfs);
        kfree(rdev->constraints);
        of_node_put(rdev->dev.of_node);
        kfree(rdev);
}

static void rdev_init_debugfs(struct regulator_dev *rdev)
{
        struct device *parent = rdev->dev.parent;
        const char *rname = rdev_get_name(rdev);
        char name[NAME_MAX];

        /* Avoid duplicate debugfs directory names */
        if (parent && rname == rdev->desc->name) {
                snprintf(name, sizeof(name), "%s-%s", dev_name(parent),
                         rname);
                rname = name;
        }

        rdev->debugfs = debugfs_create_dir(rname, debugfs_root);
        if (IS_ERR(rdev->debugfs))
                rdev_dbg(rdev, "Failed to create debugfs directory\n");

        debugfs_create_u32("use_count", 0444, rdev->debugfs,
                           &rdev->use_count);
        debugfs_create_u32("open_count", 0444, rdev->debugfs,
                           &rdev->open_count);
        debugfs_create_u32("bypass_count", 0444, rdev->debugfs,
                           &rdev->bypass_count);
}

static int regulator_register_resolve_supply(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);

        if (regulator_resolve_supply(rdev))
                rdev_dbg(rdev, "unable to resolve supply\n");

        return 0;
}

int regulator_coupler_register(struct regulator_coupler *coupler)
{
        mutex_lock(&regulator_list_mutex);
        list_add_tail(&coupler->list, &regulator_coupler_list);
        mutex_unlock(&regulator_list_mutex);

        return 0;
}

static struct regulator_coupler *
regulator_find_coupler(struct regulator_dev *rdev)
{
        struct regulator_coupler *coupler;
        int err;

        /*
         * Note that regulators are appended to the list and the generic
         * coupler is registered first, hence it will be attached at last
         * if nobody cared.
         */
        list_for_each_entry_reverse(coupler, &regulator_coupler_list, list) {
                err = coupler->attach_regulator(coupler, rdev);
                if (!err) {
                        if (!coupler->balance_voltage &&
                            rdev->coupling_desc.n_coupled > 2)
                                goto err_unsupported;

                        return coupler;
                }

                if (err < 0)
                        return ERR_PTR(err);

                if (err == 1)
                        continue;

                break;
        }

        return ERR_PTR(-EINVAL);

err_unsupported:
        if (coupler->detach_regulator)
                coupler->detach_regulator(coupler, rdev);

        rdev_err(rdev,
                "Voltage balancing for multiple regulator couples is unimplemented\n");

        return ERR_PTR(-EPERM);
}

static void regulator_resolve_coupling(struct regulator_dev *rdev)
{
        struct regulator_coupler *coupler = rdev->coupling_desc.coupler;
        struct coupling_desc *c_desc = &rdev->coupling_desc;
        int n_coupled = c_desc->n_coupled;
        struct regulator_dev *c_rdev;
        int i;

        for (i = 1; i < n_coupled; i++) {
                /* already resolved */
                if (c_desc->coupled_rdevs[i])
                        continue;

                c_rdev = of_parse_coupled_regulator(rdev, i - 1);

                if (!c_rdev)
                        continue;

                if (c_rdev->coupling_desc.coupler != coupler) {
                        rdev_err(rdev, "coupler mismatch with %s\n",
                                 rdev_get_name(c_rdev));
                        return;
                }

                c_desc->coupled_rdevs[i] = c_rdev;
                c_desc->n_resolved++;

                regulator_resolve_coupling(c_rdev);
        }
}

static void regulator_remove_coupling(struct regulator_dev *rdev)
{
        struct regulator_coupler *coupler = rdev->coupling_desc.coupler;
        struct coupling_desc *__c_desc, *c_desc = &rdev->coupling_desc;
        struct regulator_dev *__c_rdev, *c_rdev;
        unsigned int __n_coupled, n_coupled;
        int i, k;
        int err;

        n_coupled = c_desc->n_coupled;

        for (i = 1; i < n_coupled; i++) {
                c_rdev = c_desc->coupled_rdevs[i];

                if (!c_rdev)
                        continue;

                regulator_lock(c_rdev);

                __c_desc = &c_rdev->coupling_desc;
                __n_coupled = __c_desc->n_coupled;

                for (k = 1; k < __n_coupled; k++) {
                        __c_rdev = __c_desc->coupled_rdevs[k];

                        if (__c_rdev == rdev) {
                                __c_desc->coupled_rdevs[k] = NULL;
                                __c_desc->n_resolved--;
                                break;
                        }
                }

                regulator_unlock(c_rdev);

                c_desc->coupled_rdevs[i] = NULL;
                c_desc->n_resolved--;
        }

        if (coupler && coupler->detach_regulator) {
                err = coupler->detach_regulator(coupler, rdev);
                if (err)
                        rdev_err(rdev, "failed to detach from coupler: %pe\n",
                                 ERR_PTR(err));
        }

        kfree(rdev->coupling_desc.coupled_rdevs);
        rdev->coupling_desc.coupled_rdevs = NULL;
}

static int regulator_init_coupling(struct regulator_dev *rdev)
{
        struct regulator_dev **coupled;
        int err, n_phandles;

        if (!IS_ENABLED(CONFIG_OF))
                n_phandles = 0;
        else
                n_phandles = of_get_n_coupled(rdev);

        coupled = kcalloc(n_phandles + 1, sizeof(*coupled), GFP_KERNEL);
        if (!coupled)
                return -ENOMEM;

        rdev->coupling_desc.coupled_rdevs = coupled;

        /*
         * Every regulator should always have coupling descriptor filled with
         * at least pointer to itself.
         */
        rdev->coupling_desc.coupled_rdevs[0] = rdev;
        rdev->coupling_desc.n_coupled = n_phandles + 1;
        rdev->coupling_desc.n_resolved++;

        /* regulator isn't coupled */
        if (n_phandles == 0)
                return 0;

        if (!of_check_coupling_data(rdev))
                return -EPERM;

        mutex_lock(&regulator_list_mutex);
        rdev->coupling_desc.coupler = regulator_find_coupler(rdev);
        mutex_unlock(&regulator_list_mutex);

        if (IS_ERR(rdev->coupling_desc.coupler)) {
                err = PTR_ERR(rdev->coupling_desc.coupler);
                rdev_err(rdev, "failed to get coupler: %pe\n", ERR_PTR(err));
                return err;
        }

        return 0;
}

static int generic_coupler_attach(struct regulator_coupler *coupler,
                                  struct regulator_dev *rdev)
{
        if (rdev->coupling_desc.n_coupled > 2) {
                rdev_err(rdev,
                         "Voltage balancing for multiple regulator couples is unimplemented\n");
                return -EPERM;
        }

        if (!rdev->constraints->always_on) {
                rdev_err(rdev,
                         "Coupling of a non always-on regulator is unimplemented\n");
                return -ENOTSUPP;
        }

        return 0;
}

static struct regulator_coupler generic_regulator_coupler = {
        .attach_regulator = generic_coupler_attach,
};

/**
 * regulator_register - register regulator
 * @dev: the device that drive the regulator
 * @regulator_desc: regulator to register
 * @cfg: runtime configuration for regulator
 *
 * Called by regulator drivers to register a regulator.
 * Returns a valid pointer to struct regulator_dev on success
 * or an ERR_PTR() on error.
 */
struct regulator_dev *
regulator_register(struct device *dev,
                   const struct regulator_desc *regulator_desc,
                   const struct regulator_config *cfg)
{
        const struct regulator_init_data *init_data;
        struct regulator_config *config = NULL;
        static atomic_t regulator_no = ATOMIC_INIT(-1);
        struct regulator_dev *rdev;
        bool dangling_cfg_gpiod = false;
        bool dangling_of_gpiod = false;
        int ret, i;
        bool resolved_early = false;

        if (cfg == NULL)
                return ERR_PTR(-EINVAL);
        if (cfg->ena_gpiod)
                dangling_cfg_gpiod = true;
        if (regulator_desc == NULL) {
                ret = -EINVAL;
                goto rinse;
        }

        WARN_ON(!dev || !cfg->dev);

        if (regulator_desc->name == NULL || regulator_desc->ops == NULL) {
                ret = -EINVAL;
                goto rinse;
        }

        if (regulator_desc->type != REGULATOR_VOLTAGE &&
            regulator_desc->type != REGULATOR_CURRENT) {
                ret = -EINVAL;
                goto rinse;
        }

        /* Only one of each should be implemented */
        WARN_ON(regulator_desc->ops->get_voltage &&
                regulator_desc->ops->get_voltage_sel);
        WARN_ON(regulator_desc->ops->set_voltage &&
                regulator_desc->ops->set_voltage_sel);

        /* If we're using selectors we must implement list_voltage. */
        if (regulator_desc->ops->get_voltage_sel &&
            !regulator_desc->ops->list_voltage) {
                ret = -EINVAL;
                goto rinse;
        }
        if (regulator_desc->ops->set_voltage_sel &&
            !regulator_desc->ops->list_voltage) {
                ret = -EINVAL;
                goto rinse;
        }

        rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL);
        if (rdev == NULL) {
                ret = -ENOMEM;
                goto rinse;
        }
        device_initialize(&rdev->dev);
        dev_set_drvdata(&rdev->dev, rdev);
        rdev->dev.class = &regulator_class;
        spin_lock_init(&rdev->err_lock);

        /*
         * Duplicate the config so the driver could override it after
         * parsing init data.
         */
        config = kmemdup(cfg, sizeof(*cfg), GFP_KERNEL);
        if (config == NULL) {
                ret = -ENOMEM;
                goto clean;
        }

        init_data = regulator_of_get_init_data(dev, regulator_desc, config,
                                               &rdev->dev.of_node);

        /*
         * Sometimes not all resources are probed already so we need to take
         * that into account. This happens most the time if the ena_gpiod comes
         * from a gpio extender or something else.
         */
        if (PTR_ERR(init_data) == -EPROBE_DEFER) {
                ret = -EPROBE_DEFER;
                goto clean;
        }

        /*
         * We need to keep track of any GPIO descriptor coming from the
         * device tree until we have handled it over to the core. If the
         * config that was passed in to this function DOES NOT contain
         * a descriptor, and the config after this call DOES contain
         * a descriptor, we definitely got one from parsing the device
         * tree.
         */
        if (!cfg->ena_gpiod && config->ena_gpiod)
                dangling_of_gpiod = true;
        if (!init_data) {
                init_data = config->init_data;
                rdev->dev.of_node = of_node_get(config->of_node);
        }

        ww_mutex_init(&rdev->mutex, &regulator_ww_class);
        rdev->reg_data = config->driver_data;
        rdev->owner = regulator_desc->owner;
        rdev->desc = regulator_desc;
        if (config->regmap)
                rdev->regmap = config->regmap;
        else if (dev_get_regmap(dev, NULL))
                rdev->regmap = dev_get_regmap(dev, NULL);
        else if (dev->parent)
                rdev->regmap = dev_get_regmap(dev->parent, NULL);
        INIT_LIST_HEAD(&rdev->consumer_list);
        INIT_LIST_HEAD(&rdev->list);
        BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
        INIT_DELAYED_WORK(&rdev->disable_work, regulator_disable_work);

        if (init_data && init_data->supply_regulator)
                rdev->supply_name = init_data->supply_regulator;
        else if (regulator_desc->supply_name)
                rdev->supply_name = regulator_desc->supply_name;

        /* register with sysfs */
        rdev->dev.parent = config->dev;
        dev_set_name(&rdev->dev, "regulator.%lu",
                    (unsigned long) atomic_inc_return(&regulator_no));

        /* set regulator constraints */
        if (init_data)
                rdev->constraints = kmemdup(&init_data->constraints,
                                            sizeof(*rdev->constraints),
                                            GFP_KERNEL);
        else
                rdev->constraints = kzalloc(sizeof(*rdev->constraints),
                                            GFP_KERNEL);
        if (!rdev->constraints) {
                ret = -ENOMEM;
                goto wash;
        }

        if ((rdev->supply_name && !rdev->supply) &&
                (rdev->constraints->always_on ||
                 rdev->constraints->boot_on)) {
                ret = regulator_resolve_supply(rdev);
                if (ret)
                        rdev_dbg(rdev, "unable to resolve supply early: %pe\n",
                                         ERR_PTR(ret));

                resolved_early = true;
        }

        /* perform any regulator specific init */
        if (init_data && init_data->regulator_init) {
                ret = init_data->regulator_init(rdev->reg_data);
                if (ret < 0)
                        goto wash;
        }

        if (config->ena_gpiod) {
                ret = regulator_ena_gpio_request(rdev, config);
                if (ret != 0) {
                        rdev_err(rdev, "Failed to request enable GPIO: %pe\n",
                                 ERR_PTR(ret));
                        goto wash;
                }
                /* The regulator core took over the GPIO descriptor */
                dangling_cfg_gpiod = false;
                dangling_of_gpiod = false;
        }

        ret = set_machine_constraints(rdev);
        if (ret == -EPROBE_DEFER && !resolved_early) {
                /* Regulator might be in bypass mode and so needs its supply
                 * to set the constraints
                 */
                /* FIXME: this currently triggers a chicken-and-egg problem
                 * when creating -SUPPLY symlink in sysfs to a regulator
                 * that is just being created
                 */
                rdev_dbg(rdev, "will resolve supply early: %s\n",
                         rdev->supply_name);
                ret = regulator_resolve_supply(rdev);
                if (!ret)
                        ret = set_machine_constraints(rdev);
                else
                        rdev_dbg(rdev, "unable to resolve supply early: %pe\n",
                                 ERR_PTR(ret));
        }
        if (ret < 0)
                goto wash;

        ret = regulator_init_coupling(rdev);
        if (ret < 0)
                goto wash;

        /* add consumers devices */
        if (init_data) {
                for (i = 0; i < init_data->num_consumer_supplies; i++) {
                        ret = set_consumer_device_supply(rdev,
                                init_data->consumer_supplies[i].dev_name,
                                init_data->consumer_supplies[i].supply);
                        if (ret < 0) {
                                dev_err(dev, "Failed to set supply %s\n",
                                        init_data->consumer_supplies[i].supply);
                                goto unset_supplies;
                        }
                }
        }

        if (!rdev->desc->ops->get_voltage &&
            !rdev->desc->ops->list_voltage &&
            !rdev->desc->fixed_uV)
                rdev->is_switch = true;

        ret = device_add(&rdev->dev);
        if (ret != 0)
                goto unset_supplies;

        rdev_init_debugfs(rdev);

        /* try to resolve regulators coupling since a new one was registered */
        mutex_lock(&regulator_list_mutex);
        regulator_resolve_coupling(rdev);
        mutex_unlock(&regulator_list_mutex);

        /* try to resolve regulators supply since a new one was registered */
        class_for_each_device(&regulator_class, NULL, NULL,
                              regulator_register_resolve_supply);
        kfree(config);
        return rdev;

unset_supplies:
        mutex_lock(&regulator_list_mutex);
        unset_regulator_supplies(rdev);
        regulator_remove_coupling(rdev);
        mutex_unlock(&regulator_list_mutex);
wash:
        regulator_put(rdev->supply);
        kfree(rdev->coupling_desc.coupled_rdevs);
        mutex_lock(&regulator_list_mutex);
        regulator_ena_gpio_free(rdev);
        mutex_unlock(&regulator_list_mutex);
clean:
        if (dangling_of_gpiod)
                gpiod_put(config->ena_gpiod);
        kfree(config);
        put_device(&rdev->dev);
rinse:
        if (dangling_cfg_gpiod)
                gpiod_put(cfg->ena_gpiod);
        return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(regulator_register);

/**
 * regulator_unregister - unregister regulator
 * @rdev: regulator to unregister
 *
 * Called by regulator drivers to unregister a regulator.
 */
void regulator_unregister(struct regulator_dev *rdev)
{
        if (rdev == NULL)
                return;

        if (rdev->supply) {
                while (rdev->use_count--)
                        regulator_disable(rdev->supply);
                regulator_put(rdev->supply);
        }

        flush_work(&rdev->disable_work.work);

        mutex_lock(&regulator_list_mutex);

        WARN_ON(rdev->open_count);
        regulator_remove_coupling(rdev);
        unset_regulator_supplies(rdev);
        list_del(&rdev->list);
        regulator_ena_gpio_free(rdev);
        device_unregister(&rdev->dev);

        mutex_unlock(&regulator_list_mutex);
}
EXPORT_SYMBOL_GPL(regulator_unregister);

#ifdef CONFIG_SUSPEND
/**
 * regulator_suspend - prepare regulators for system wide suspend
 * @dev: ``&struct device`` pointer that is passed to _regulator_suspend()
 *
 * Configure each regulator with it's suspend operating parameters for state.
 */
static int regulator_suspend(struct device *dev)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        suspend_state_t state = pm_suspend_target_state;
        int ret;
        const struct regulator_state *rstate;

        rstate = regulator_get_suspend_state_check(rdev, state);
        if (!rstate)
                return 0;

        regulator_lock(rdev);
        ret = __suspend_set_state(rdev, rstate);
        regulator_unlock(rdev);

        return ret;
}

static int regulator_resume(struct device *dev)
{
        suspend_state_t state = pm_suspend_target_state;
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct regulator_state *rstate;
        int ret = 0;

        rstate = regulator_get_suspend_state(rdev, state);
        if (rstate == NULL)
                return 0;

        /* Avoid grabbing the lock if we don't need to */
        if (!rdev->desc->ops->resume)
                return 0;

        regulator_lock(rdev);

        if (rstate->enabled == ENABLE_IN_SUSPEND ||
            rstate->enabled == DISABLE_IN_SUSPEND)
                ret = rdev->desc->ops->resume(rdev);

        regulator_unlock(rdev);

        return ret;
}
#else /* !CONFIG_SUSPEND */

#define regulator_suspend        NULL
#define regulator_resume        NULL

#endif /* !CONFIG_SUSPEND */

#ifdef CONFIG_PM
static const struct dev_pm_ops __maybe_unused regulator_pm_ops = {
        .suspend        = regulator_suspend,
        .resume                = regulator_resume,
};
#endif

const struct class regulator_class = {
        .name = "regulator",
        .dev_release = regulator_dev_release,
        .dev_groups = regulator_dev_groups,
#ifdef CONFIG_PM
        .pm = &regulator_pm_ops,
#endif
};
/**
 * regulator_has_full_constraints - the system has fully specified constraints
 *
 * Calling this function will cause the regulator API to disable all
 * regulators which have a zero use count and don't have an always_on
 * constraint in a late_initcall.
 *
 * The intention is that this will become the default behaviour in a
 * future kernel release so users are encouraged to use this facility
 * now.
 */
void regulator_has_full_constraints(void)
{
        has_full_constraints = 1;
}
EXPORT_SYMBOL_GPL(regulator_has_full_constraints);

/**
 * rdev_get_drvdata - get rdev regulator driver data
 * @rdev: regulator
 *
 * Get rdev regulator driver private data. This call can be used in the
 * regulator driver context.
 */
void *rdev_get_drvdata(struct regulator_dev *rdev)
{
        return rdev->reg_data;
}
EXPORT_SYMBOL_GPL(rdev_get_drvdata);

/**
 * regulator_get_drvdata - get regulator driver data
 * @regulator: regulator
 *
 * Get regulator driver private data. This call can be used in the consumer
 * driver context when non API regulator specific functions need to be called.
 */
void *regulator_get_drvdata(struct regulator *regulator)
{
        return regulator->rdev->reg_data;
}
EXPORT_SYMBOL_GPL(regulator_get_drvdata);

/**
 * regulator_set_drvdata - set regulator driver data
 * @regulator: regulator
 * @data: data
 */
void regulator_set_drvdata(struct regulator *regulator, void *data)
{
        regulator->rdev->reg_data = data;
}
EXPORT_SYMBOL_GPL(regulator_set_drvdata);

/**
 * rdev_get_id - get regulator ID
 * @rdev: regulator
 */
int rdev_get_id(struct regulator_dev *rdev)
{
        return rdev->desc->id;
}
EXPORT_SYMBOL_GPL(rdev_get_id);

struct device *rdev_get_dev(struct regulator_dev *rdev)
{
        return &rdev->dev;
}
EXPORT_SYMBOL_GPL(rdev_get_dev);

struct regmap *rdev_get_regmap(struct regulator_dev *rdev)
{
        return rdev->regmap;
}
EXPORT_SYMBOL_GPL(rdev_get_regmap);

void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data)
{
        return reg_init_data->driver_data;
}
EXPORT_SYMBOL_GPL(regulator_get_init_drvdata);

#ifdef CONFIG_DEBUG_FS
static int supply_map_show(struct seq_file *sf, void *data)
{
        struct regulator_map *map;

        list_for_each_entry(map, &regulator_map_list, list) {
                seq_printf(sf, "%s -> %s.%s\n",
                                rdev_get_name(map->regulator), map->dev_name,
                                map->supply);
        }

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(supply_map);

struct summary_data {
        struct seq_file *s;
        struct regulator_dev *parent;
        int level;
};

static void regulator_summary_show_subtree(struct seq_file *s,
                                           struct regulator_dev *rdev,
                                           int level);

static int regulator_summary_show_children(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct summary_data *summary_data = data;

        if (rdev->supply && rdev->supply->rdev == summary_data->parent)
                regulator_summary_show_subtree(summary_data->s, rdev,
                                               summary_data->level + 1);

        return 0;
}

static void regulator_summary_show_subtree(struct seq_file *s,
                                           struct regulator_dev *rdev,
                                           int level)
{
        struct regulation_constraints *c;
        struct regulator *consumer;
        struct summary_data summary_data;
        unsigned int opmode;

        if (!rdev)
                return;

        opmode = _regulator_get_mode_unlocked(rdev);
        seq_printf(s, "%*s%-*s %3d %4d %6d %7s ",
                   level * 3 + 1, "",
                   30 - level * 3, rdev_get_name(rdev),
                   rdev->use_count, rdev->open_count, rdev->bypass_count,
                   regulator_opmode_to_str(opmode));

        seq_printf(s, "%5dmV ", regulator_get_voltage_rdev(rdev) / 1000);
        seq_printf(s, "%5dmA ",
                   _regulator_get_current_limit_unlocked(rdev) / 1000);

        c = rdev->constraints;
        if (c) {
                switch (rdev->desc->type) {
                case REGULATOR_VOLTAGE:
                        seq_printf(s, "%5dmV %5dmV ",
                                   c->min_uV / 1000, c->max_uV / 1000);
                        break;
                case REGULATOR_CURRENT:
                        seq_printf(s, "%5dmA %5dmA ",
                                   c->min_uA / 1000, c->max_uA / 1000);
                        break;
                }
        }

        seq_puts(s, "\n");

        list_for_each_entry(consumer, &rdev->consumer_list, list) {
                if (consumer->dev && consumer->dev->class == &regulator_class)
                        continue;

                seq_printf(s, "%*s%-*s ",
                           (level + 1) * 3 + 1, "",
                           30 - (level + 1) * 3,
                           consumer->supply_name ? consumer->supply_name :
                           consumer->dev ? dev_name(consumer->dev) : "deviceless");

                switch (rdev->desc->type) {
                case REGULATOR_VOLTAGE:
                        seq_printf(s, "%3d %33dmA%c%5dmV %5dmV",
                                   consumer->enable_count,
                                   consumer->uA_load / 1000,
                                   consumer->uA_load && !consumer->enable_count ?
                                   '*' : ' ',
                                   consumer->voltage[PM_SUSPEND_ON].min_uV / 1000,
                                   consumer->voltage[PM_SUSPEND_ON].max_uV / 1000);
                        break;
                case REGULATOR_CURRENT:
                        break;
                }

                seq_puts(s, "\n");
        }

        summary_data.s = s;
        summary_data.level = level;
        summary_data.parent = rdev;

        class_for_each_device(&regulator_class, NULL, &summary_data,
                              regulator_summary_show_children);
}

struct summary_lock_data {
        struct ww_acquire_ctx *ww_ctx;
        struct regulator_dev **new_contended_rdev;
        struct regulator_dev **old_contended_rdev;
};

static int regulator_summary_lock_one(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct summary_lock_data *lock_data = data;
        int ret = 0;

        if (rdev != *lock_data->old_contended_rdev) {
                ret = regulator_lock_nested(rdev, lock_data->ww_ctx);

                if (ret == -EDEADLK)
                        *lock_data->new_contended_rdev = rdev;
                else
                        WARN_ON_ONCE(ret);
        } else {
                *lock_data->old_contended_rdev = NULL;
        }

        return ret;
}

static int regulator_summary_unlock_one(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct summary_lock_data *lock_data = data;

        if (lock_data) {
                if (rdev == *lock_data->new_contended_rdev)
                        return -EDEADLK;
        }

        regulator_unlock(rdev);

        return 0;
}

static int regulator_summary_lock_all(struct ww_acquire_ctx *ww_ctx,
                                      struct regulator_dev **new_contended_rdev,
                                      struct regulator_dev **old_contended_rdev)
{
        struct summary_lock_data lock_data;
        int ret;

        lock_data.ww_ctx = ww_ctx;
        lock_data.new_contended_rdev = new_contended_rdev;
        lock_data.old_contended_rdev = old_contended_rdev;

        ret = class_for_each_device(&regulator_class, NULL, &lock_data,
                                    regulator_summary_lock_one);
        if (ret)
                class_for_each_device(&regulator_class, NULL, &lock_data,
                                      regulator_summary_unlock_one);

        return ret;
}

static void regulator_summary_lock(struct ww_acquire_ctx *ww_ctx)
{
        struct regulator_dev *new_contended_rdev = NULL;
        struct regulator_dev *old_contended_rdev = NULL;
        int err;

        mutex_lock(&regulator_list_mutex);

        ww_acquire_init(ww_ctx, &regulator_ww_class);

        do {
                if (new_contended_rdev) {
                        ww_mutex_lock_slow(&new_contended_rdev->mutex, ww_ctx);
                        old_contended_rdev = new_contended_rdev;
                        old_contended_rdev->ref_cnt++;
                        old_contended_rdev->mutex_owner = current;
                }

                err = regulator_summary_lock_all(ww_ctx,
                                                 &new_contended_rdev,
                                                 &old_contended_rdev);

                if (old_contended_rdev)
                        regulator_unlock(old_contended_rdev);

        } while (err == -EDEADLK);

        ww_acquire_done(ww_ctx);
}

static void regulator_summary_unlock(struct ww_acquire_ctx *ww_ctx)
{
        class_for_each_device(&regulator_class, NULL, NULL,
                              regulator_summary_unlock_one);
        ww_acquire_fini(ww_ctx);

        mutex_unlock(&regulator_list_mutex);
}

static int regulator_summary_show_roots(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct seq_file *s = data;

        if (!rdev->supply)
                regulator_summary_show_subtree(s, rdev, 0);

        return 0;
}

static int regulator_summary_show(struct seq_file *s, void *data)
{
        struct ww_acquire_ctx ww_ctx;

        seq_puts(s, " regulator                      use open bypass  opmode voltage current     min     max\n");
        seq_puts(s, "---------------------------------------------------------------------------------------\n");

        regulator_summary_lock(&ww_ctx);

        class_for_each_device(&regulator_class, NULL, s,
                              regulator_summary_show_roots);

        regulator_summary_unlock(&ww_ctx);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(regulator_summary);
#endif /* CONFIG_DEBUG_FS */

static int __init regulator_init(void)
{
        int ret;

        ret = class_register(&regulator_class);

        debugfs_root = debugfs_create_dir("regulator", NULL);
        if (IS_ERR(debugfs_root))
                pr_debug("regulator: Failed to create debugfs directory\n");

#ifdef CONFIG_DEBUG_FS
        debugfs_create_file("supply_map", 0444, debugfs_root, NULL,
                            &supply_map_fops);

        debugfs_create_file("regulator_summary", 0444, debugfs_root,
                            NULL, &regulator_summary_fops);
#endif
        regulator_dummy_init();

        regulator_coupler_register(&generic_regulator_coupler);

        return ret;
}

/* init early to allow our consumers to complete system booting */
core_initcall(regulator_init);

static int regulator_late_cleanup(struct device *dev, void *data)
{
        struct regulator_dev *rdev = dev_to_rdev(dev);
        struct regulation_constraints *c = rdev->constraints;
        int ret;

        if (c && c->always_on)
                return 0;

        if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS))
                return 0;

        regulator_lock(rdev);

        if (rdev->use_count)
                goto unlock;

        /* If reading the status failed, assume that it's off. */
        if (_regulator_is_enabled(rdev) <= 0)
                goto unlock;

        if (have_full_constraints()) {
                /* We log since this may kill the system if it goes
                 * wrong.
                 */
                rdev_info(rdev, "disabling\n");
                ret = _regulator_do_disable(rdev);
                if (ret != 0)
                        rdev_err(rdev, "couldn't disable: %pe\n", ERR_PTR(ret));
        } else {
                /* The intention is that in future we will
                 * assume that full constraints are provided
                 * so warn even if we aren't going to do
                 * anything here.
                 */
                rdev_warn(rdev, "incomplete constraints, leaving on\n");
        }

unlock:
        regulator_unlock(rdev);

        return 0;
}

static bool regulator_ignore_unused;
static int __init regulator_ignore_unused_setup(char *__unused)
{
        regulator_ignore_unused = true;
        return 1;
}
__setup("regulator_ignore_unused", regulator_ignore_unused_setup);

static void regulator_init_complete_work_function(struct work_struct *work)
{
        /*
         * Regulators may had failed to resolve their input supplies
         * when were registered, either because the input supply was
         * not registered yet or because its parent device was not
         * bound yet. So attempt to resolve the input supplies for
         * pending regulators before trying to disable unused ones.
         */
        class_for_each_device(&regulator_class, NULL, NULL,
                              regulator_register_resolve_supply);

        /*
         * For debugging purposes, it may be useful to prevent unused
         * regulators from being disabled.
         */
        if (regulator_ignore_unused) {
                pr_warn("regulator: Not disabling unused regulators\n");
                return;
        }

        /* If we have a full configuration then disable any regulators
         * we have permission to change the status for and which are
         * not in use or always_on.  This is effectively the default
         * for DT and ACPI as they have full constraints.
         */
        class_for_each_device(&regulator_class, NULL, NULL,
                              regulator_late_cleanup);
}

static DECLARE_DELAYED_WORK(regulator_init_complete_work,
                            regulator_init_complete_work_function);

static int __init regulator_init_complete(void)
{
        /*
         * Since DT doesn't provide an idiomatic mechanism for
         * enabling full constraints and since it's much more natural
         * with DT to provide them just assume that a DT enabled
         * system has full constraints.
         */
        if (of_have_populated_dt())
                has_full_constraints = true;

        /*
         * We punt completion for an arbitrary amount of time since
         * systems like distros will load many drivers from userspace
         * so consumers might not always be ready yet, this is
         * particularly an issue with laptops where this might bounce
         * the display off then on.  Ideally we'd get a notification
         * from userspace when this happens but we don't so just wait
         * a bit and hope we waited long enough.  It'd be better if
         * we'd only do this on systems that need it, and a kernel
         * command line option might be useful.
         */
        schedule_delayed_work(&regulator_init_complete_work,
                              msecs_to_jiffies(30000));

        return 0;
}
late_initcall_sync(regulator_init_complete);












































































































































































































































































































































































    7 



    7 










































































































































































































































































































































































































































































































































































































































































































































    7 

    7 
    7 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * PCTV 452e DVB driver
 *
 * Copyright (c) 2006-2008 Dominik Kuhlen <dkuhlen@gmx.net>
 *
 * TT connect S2-3650-CI Common Interface support, MAC readout
 * Copyright (C) 2008 Michael H. Schimek <mschimek@gmx.at>
 */

/* dvb usb framework */
#define DVB_USB_LOG_PREFIX "pctv452e"
#include "dvb-usb.h"

/* Demodulator */
#include "stb0899_drv.h"
#include "stb0899_reg.h"
#include "stb0899_cfg.h"
/* Tuner */
#include "stb6100.h"
#include "stb6100_cfg.h"
/* FE Power */
#include "isl6423.h"
#include "lnbp22.h"

#include <media/dvb_ca_en50221.h>
#include "ttpci-eeprom.h"

#include <linux/etherdevice.h>

static int debug;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off).");

DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);

#define ISOC_INTERFACE_ALTERNATIVE 3

#define SYNC_BYTE_OUT 0xaa
#define SYNC_BYTE_IN  0x55

/* guessed: (copied from ttusb-budget) */
#define PCTV_CMD_RESET 0x15
/* command to poll IR receiver */
#define PCTV_CMD_IR    0x1b
/* command to send I2C  */
#define PCTV_CMD_I2C   0x31

#define I2C_ADDR_STB0899 (0xd0 >> 1)
#define I2C_ADDR_STB6100 (0xc0 >> 1)
#define I2C_ADDR_LNBP22  (0x10 >> 1)
#define I2C_ADDR_24C16   (0xa0 >> 1)
#define I2C_ADDR_24C64   (0xa2 >> 1)


/* pctv452e sends us this amount of data for each issued usb-command */
#define PCTV_ANSWER_LEN 64
/* Wait up to 1000ms for device  */
#define PCTV_TIMEOUT 1000


#define PCTV_LED_GPIO   STB0899_GPIO01
#define PCTV_LED_GREEN  0x82
#define PCTV_LED_ORANGE 0x02

#define ci_dbg(format, arg...)                                \
do {                                                        \
        if (0)                                                \
                printk(KERN_DEBUG DVB_USB_LOG_PREFIX        \
                        ": " format "\n" , ## arg);        \
} while (0)

enum {
        TT3650_CMD_CI_TEST = 0x40,
        TT3650_CMD_CI_RD_CTRL,
        TT3650_CMD_CI_WR_CTRL,
        TT3650_CMD_CI_RD_ATTR,
        TT3650_CMD_CI_WR_ATTR,
        TT3650_CMD_CI_RESET,
        TT3650_CMD_CI_SET_VIDEO_PORT
};


static struct stb0899_postproc pctv45e_postproc[] = {
        { PCTV_LED_GPIO, STB0899_GPIOPULLUP },
        { 0, 0 }
};

static struct isl6423_config pctv452e_isl6423_config = {
        .current_max                = SEC_CURRENT_515m,
        .curlim                        = SEC_CURRENT_LIM_ON,
        .mod_extern                = 1,
        .addr                        = 0x08,
};

/*
 * stores all private variables for communication with the PCTV452e DVB-S2
 */
struct pctv452e_state {
        struct dvb_ca_en50221 ca;
        struct mutex ca_mutex;

        u8 c;           /* transaction counter, wraps around...  */
        u8 initialized; /* set to 1 if 0x15 has been sent */
        u16 last_rc_key;
};

static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data,
                         unsigned int write_len, unsigned int read_len)
{
        struct pctv452e_state *state = d->priv;
        u8 *buf;
        u8 id;
        unsigned int rlen;
        int ret;

        if (!data || (write_len > 64 - 4) || (read_len > 64 - 4)) {
                err("%s: transfer data invalid", __func__);
                return -EIO;
        }

        buf = kmalloc(64, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        id = state->c++;

        buf[0] = SYNC_BYTE_OUT;
        buf[1] = id;
        buf[2] = cmd;
        buf[3] = write_len;

        memcpy(buf + 4, data, write_len);

        rlen = (read_len > 0) ? 64 : 0;
        ret = dvb_usb_generic_rw(d, buf, 4 + write_len,
                                  buf, rlen, /* delay_ms */ 0);
        if (0 != ret)
                goto failed;

        ret = -EIO;
        if (SYNC_BYTE_IN != buf[0] || id != buf[1])
                goto failed;

        memcpy(data, buf + 4, read_len);

        kfree(buf);
        return 0;

failed:
        err("CI error %d; %02X %02X %02X -> %*ph.",
             ret, SYNC_BYTE_OUT, id, cmd, 3, buf);

        kfree(buf);
        return ret;
}

static int tt3650_ci_msg_locked(struct dvb_ca_en50221 *ca,
                                u8 cmd, u8 *data, unsigned int write_len,
                                unsigned int read_len)
{
        struct dvb_usb_device *d = ca->data;
        struct pctv452e_state *state = d->priv;
        int ret;

        mutex_lock(&state->ca_mutex);
        ret = tt3650_ci_msg(d, cmd, data, write_len, read_len);
        mutex_unlock(&state->ca_mutex);

        return ret;
}

static int tt3650_ci_read_attribute_mem(struct dvb_ca_en50221 *ca,
                                 int slot, int address)
{
        u8 buf[3];
        int ret;

        if (0 != slot)
                return -EINVAL;

        buf[0] = (address >> 8) & 0x0F;
        buf[1] = address;

        ret = tt3650_ci_msg_locked(ca, TT3650_CMD_CI_RD_ATTR, buf, 2, 3);

        ci_dbg("%s %04x -> %d 0x%02x",
                __func__, address, ret, buf[2]);

        if (ret < 0)
                return ret;

        return buf[2];
}

static int tt3650_ci_write_attribute_mem(struct dvb_ca_en50221 *ca,
                                 int slot, int address, u8 value)
{
        u8 buf[3];

        ci_dbg("%s %d 0x%04x 0x%02x",
                __func__, slot, address, value);

        if (0 != slot)
                return -EINVAL;

        buf[0] = (address >> 8) & 0x0F;
        buf[1] = address;
        buf[2] = value;

        return tt3650_ci_msg_locked(ca, TT3650_CMD_CI_WR_ATTR, buf, 3, 3);
}

static int tt3650_ci_read_cam_control(struct dvb_ca_en50221 *ca,
                                 int                        slot,
                                 u8                        address)
{
        u8 buf[2];
        int ret;

        if (0 != slot)
                return -EINVAL;

        buf[0] = address & 3;

        ret = tt3650_ci_msg_locked(ca, TT3650_CMD_CI_RD_CTRL, buf, 1, 2);

        ci_dbg("%s 0x%02x -> %d 0x%02x",
                __func__, address, ret, buf[1]);

        if (ret < 0)
                return ret;

        return buf[1];
}

static int tt3650_ci_write_cam_control(struct dvb_ca_en50221 *ca,
                                 int                        slot,
                                 u8                        address,
                                 u8                        value)
{
        u8 buf[2];

        ci_dbg("%s %d 0x%02x 0x%02x",
                __func__, slot, address, value);

        if (0 != slot)
                return -EINVAL;

        buf[0] = address;
        buf[1] = value;

        return tt3650_ci_msg_locked(ca, TT3650_CMD_CI_WR_CTRL, buf, 2, 2);
}

static int tt3650_ci_set_video_port(struct dvb_ca_en50221 *ca,
                                 int                        slot,
                                 int                        enable)
{
        u8 buf[1];
        int ret;

        ci_dbg("%s %d %d", __func__, slot, enable);

        if (0 != slot)
                return -EINVAL;

        enable = !!enable;
        buf[0] = enable;

        ret = tt3650_ci_msg_locked(ca, TT3650_CMD_CI_SET_VIDEO_PORT, buf, 1, 1);
        if (ret < 0)
                return ret;

        if (enable != buf[0]) {
                err("CI not %sabled.", enable ? "en" : "dis");
                return -EIO;
        }

        return 0;
}

static int tt3650_ci_slot_shutdown(struct dvb_ca_en50221 *ca, int slot)
{
        return tt3650_ci_set_video_port(ca, slot, /* enable */ 0);
}

static int tt3650_ci_slot_ts_enable(struct dvb_ca_en50221 *ca, int slot)
{
        return tt3650_ci_set_video_port(ca, slot, /* enable */ 1);
}

static int tt3650_ci_slot_reset(struct dvb_ca_en50221 *ca, int slot)
{
        struct dvb_usb_device *d = ca->data;
        struct pctv452e_state *state = d->priv;
        u8 buf[1];
        int ret;

        ci_dbg("%s %d", __func__, slot);

        if (0 != slot)
                return -EINVAL;

        buf[0] = 0;

        mutex_lock(&state->ca_mutex);

        ret = tt3650_ci_msg(d, TT3650_CMD_CI_RESET, buf, 1, 1);
        if (0 != ret)
                goto failed;

        msleep(500);

        buf[0] = 1;

        ret = tt3650_ci_msg(d, TT3650_CMD_CI_RESET, buf, 1, 1);
        if (0 != ret)
                goto failed;

        msleep(500);

        buf[0] = 0; /* FTA */

        ret = tt3650_ci_msg(d, TT3650_CMD_CI_SET_VIDEO_PORT, buf, 1, 1);

 failed:
        mutex_unlock(&state->ca_mutex);

        return ret;
}

static int tt3650_ci_poll_slot_status(struct dvb_ca_en50221 *ca,
                                 int                        slot,
                                 int                        open)
{
        u8 buf[1];
        int ret;

        if (0 != slot)
                return -EINVAL;

        ret = tt3650_ci_msg_locked(ca, TT3650_CMD_CI_TEST, buf, 0, 1);
        if (0 != ret)
                return ret;

        if (1 == buf[0])
                return DVB_CA_EN50221_POLL_CAM_PRESENT |
                        DVB_CA_EN50221_POLL_CAM_READY;

        return 0;

}

static void tt3650_ci_uninit(struct dvb_usb_device *d)
{
        struct pctv452e_state *state;

        ci_dbg("%s", __func__);

        if (NULL == d)
                return;

        state = d->priv;
        if (NULL == state)
                return;

        if (NULL == state->ca.data)
                return;

        /* Error ignored. */
        tt3650_ci_set_video_port(&state->ca, /* slot */ 0, /* enable */ 0);

        dvb_ca_en50221_release(&state->ca);

        memset(&state->ca, 0, sizeof(state->ca));
}

static int tt3650_ci_init(struct dvb_usb_adapter *a)
{
        struct dvb_usb_device *d = a->dev;
        struct pctv452e_state *state = d->priv;
        int ret;

        ci_dbg("%s", __func__);

        mutex_init(&state->ca_mutex);

        state->ca.owner = THIS_MODULE;
        state->ca.read_attribute_mem = tt3650_ci_read_attribute_mem;
        state->ca.write_attribute_mem = tt3650_ci_write_attribute_mem;
        state->ca.read_cam_control = tt3650_ci_read_cam_control;
        state->ca.write_cam_control = tt3650_ci_write_cam_control;
        state->ca.slot_reset = tt3650_ci_slot_reset;
        state->ca.slot_shutdown = tt3650_ci_slot_shutdown;
        state->ca.slot_ts_enable = tt3650_ci_slot_ts_enable;
        state->ca.poll_slot_status = tt3650_ci_poll_slot_status;
        state->ca.data = d;

        ret = dvb_ca_en50221_init(&a->dvb_adap,
                                   &state->ca,
                                   /* flags */ 0,
                                   /* n_slots */ 1);
        if (0 != ret) {
                err("Cannot initialize CI: Error %d.", ret);
                memset(&state->ca, 0, sizeof(state->ca));
                return ret;
        }

        info("CI initialized.");

        return 0;
}

#define CMD_BUFFER_SIZE 0x28
static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr,
                                const u8 *snd_buf, u8 snd_len,
                                u8 *rcv_buf, u8 rcv_len)
{
        struct pctv452e_state *state = d->priv;
        u8 *buf;
        u8 id;
        int ret;

        buf = kmalloc(64, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        id = state->c++;

        ret = -EINVAL;
        if (snd_len > 64 - 7 || rcv_len > 64 - 7)
                goto failed;

        buf[0] = SYNC_BYTE_OUT;
        buf[1] = id;
        buf[2] = PCTV_CMD_I2C;
        buf[3] = snd_len + 3;
        buf[4] = addr << 1;
        buf[5] = snd_len;
        buf[6] = rcv_len;

        memcpy(buf + 7, snd_buf, snd_len);

        ret = dvb_usb_generic_rw(d, buf, 7 + snd_len,
                                  buf, /* rcv_len */ 64,
                                  /* delay_ms */ 0);
        if (ret < 0)
                goto failed;

        /* TT USB protocol error. */
        ret = -EIO;
        if (SYNC_BYTE_IN != buf[0] || id != buf[1])
                goto failed;

        /* I2C device didn't respond as expected. */
        ret = -EREMOTEIO;
        if (buf[5] < snd_len || buf[6] < rcv_len)
                goto failed;

        memcpy(rcv_buf, buf + 7, rcv_len);

        kfree(buf);
        return rcv_len;

failed:
        err("I2C error %d; %02X %02X  %02X %02X %02X -> %*ph",
             ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len,
             7, buf);

        kfree(buf);
        return ret;
}

static int pctv452e_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msg,
                                int num)
{
        struct dvb_usb_device *d = i2c_get_adapdata(adapter);
        int i;

        if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
                return -EAGAIN;

        for (i = 0; i < num; i++) {
                u8 addr, snd_len, rcv_len, *snd_buf, *rcv_buf;
                int ret;

                if (msg[i].flags & I2C_M_RD) {
                        addr = msg[i].addr;
                        snd_buf = NULL;
                        snd_len = 0;
                        rcv_buf = msg[i].buf;
                        rcv_len = msg[i].len;
                } else {
                        addr = msg[i].addr;
                        snd_buf = msg[i].buf;
                        snd_len = msg[i].len;
                        rcv_buf = NULL;
                        rcv_len = 0;
                }

                ret = pctv452e_i2c_msg(d, addr, snd_buf, snd_len, rcv_buf,
                                        rcv_len);
                if (ret < rcv_len)
                        break;
        }

        mutex_unlock(&d->i2c_mutex);
        return i;
}

static u32 pctv452e_i2c_func(struct i2c_adapter *adapter)
{
        return I2C_FUNC_I2C;
}

static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i)
{
        struct pctv452e_state *state = d->priv;
        u8 *b0, *rx;
        int ret;

        info("%s: %d\n", __func__, i);

        if (!i)
                return 0;

        if (state->initialized)
                return 0;

        b0 = kmalloc(5 + PCTV_ANSWER_LEN, GFP_KERNEL);
        if (!b0)
                return -ENOMEM;

        rx = b0 + 5;

        /* hmm where should this should go? */
        ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE);
        if (ret != 0)
                info("%s: Warning set interface returned: %d\n",
                        __func__, ret);

        /* this is a one-time initialization, don't know where to put */
        b0[0] = 0xaa;
        b0[1] = state->c++;
        b0[2] = PCTV_CMD_RESET;
        b0[3] = 1;
        b0[4] = 0;
        /* reset board */
        ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0);
        if (ret)
                goto ret;

        b0[1] = state->c++;
        b0[4] = 1;
        /* reset board (again?) */
        ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0);
        if (ret)
                goto ret;

        state->initialized = 1;

ret:
        kfree(b0);
        return ret;
}

static int pctv452e_rc_query(struct dvb_usb_device *d)
{
        struct pctv452e_state *state = d->priv;
        u8 *b, *rx;
        int ret, i;
        u8 id;

        b = kmalloc(CMD_BUFFER_SIZE + PCTV_ANSWER_LEN, GFP_KERNEL);
        if (!b)
                return -ENOMEM;

        rx = b + CMD_BUFFER_SIZE;

        id = state->c++;

        /* prepare command header  */
        b[0] = SYNC_BYTE_OUT;
        b[1] = id;
        b[2] = PCTV_CMD_IR;
        b[3] = 0;

        /* send ir request */
        ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0);
        if (ret != 0)
                goto ret;

        if (debug > 3) {
                info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx);
                for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++)
                        info(" %02x", rx[i+3]);

                info("\n");
        }

        if ((rx[3] == 9) &&  (rx[12] & 0x01)) {
                /* got a "press" event */
                state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]);
                if (debug > 2)
                        info("%s: cmd=0x%02x sys=0x%02x\n",
                                __func__, rx[6], rx[7]);

                rc_keydown(d->rc_dev, RC_PROTO_RC5, state->last_rc_key, 0);
        } else if (state->last_rc_key) {
                rc_keyup(d->rc_dev);
                state->last_rc_key = 0;
        }
ret:
        kfree(b);
        return ret;
}

static int pctv452e_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
{
        const u8 mem_addr[] = { 0x1f, 0xcc };
        u8 encoded_mac[20];
        int ret;

        ret = -EAGAIN;
        if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
                goto failed;

        ret = pctv452e_i2c_msg(d, I2C_ADDR_24C16,
                                mem_addr + 1, /* snd_len */ 1,
                                encoded_mac, /* rcv_len */ 20);
        if (-EREMOTEIO == ret)
                /* Caution! A 24C16 interprets 0xA2 0x1F 0xCC as a
                   byte write if /WC is low. */
                ret = pctv452e_i2c_msg(d, I2C_ADDR_24C64,
                                        mem_addr, 2,
                                        encoded_mac, 20);

        mutex_unlock(&d->i2c_mutex);

        if (20 != ret)
                goto failed;

        ret = ttpci_eeprom_decode_mac(mac, encoded_mac);
        if (0 != ret)
                goto failed;

        return 0;

failed:
        eth_zero_addr(mac);

        return ret;
}

static const struct stb0899_s1_reg pctv452e_init_dev[] = {
        { STB0899_DISCNTRL1,        0x26 },
        { STB0899_DISCNTRL2,        0x80 },
        { STB0899_DISRX_ST0,        0x04 },
        { STB0899_DISRX_ST1,        0x20 },
        { STB0899_DISPARITY,        0x00 },
        { STB0899_DISFIFO,        0x00 },
        { STB0899_DISF22,        0x99 },
        { STB0899_DISF22RX,        0x85 }, /* 0xa8 */
        { STB0899_ACRPRESC,        0x11 },
        { STB0899_ACRDIV1,        0x0a },
        { STB0899_ACRDIV2,        0x05 },
        { STB0899_DACR1        ,        0x00 },
        { STB0899_DACR2        ,        0x00 },
        { STB0899_OUTCFG,        0x00 },
        { STB0899_MODECFG,        0x00 }, /* Inversion */
        { STB0899_IRQMSK_3,        0xf3 },
        { STB0899_IRQMSK_2,        0xfc },
        { STB0899_IRQMSK_1,        0xff },
        { STB0899_IRQMSK_0,        0xff },
        { STB0899_I2CCFG,        0x88 },
        { STB0899_I2CRPT,        0x58 },
        { STB0899_GPIO00CFG,        0x82 },
        { STB0899_GPIO01CFG,        0x82 }, /* LED: 0x02 green, 0x82 orange */
        { STB0899_GPIO02CFG,        0x82 },
        { STB0899_GPIO03CFG,        0x82 },
        { STB0899_GPIO04CFG,        0x82 },
        { STB0899_GPIO05CFG,        0x82 },
        { STB0899_GPIO06CFG,        0x82 },
        { STB0899_GPIO07CFG,        0x82 },
        { STB0899_GPIO08CFG,        0x82 },
        { STB0899_GPIO09CFG,        0x82 },
        { STB0899_GPIO10CFG,        0x82 },
        { STB0899_GPIO11CFG,        0x82 },
        { STB0899_GPIO12CFG,        0x82 },
        { STB0899_GPIO13CFG,        0x82 },
        { STB0899_GPIO14CFG,        0x82 },
        { STB0899_GPIO15CFG,        0x82 },
        { STB0899_GPIO16CFG,        0x82 },
        { STB0899_GPIO17CFG,        0x82 },
        { STB0899_GPIO18CFG,        0x82 },
        { STB0899_GPIO19CFG,        0x82 },
        { STB0899_GPIO20CFG,        0x82 },
        { STB0899_SDATCFG,        0xb8 },
        { STB0899_SCLTCFG,        0xba },
        { STB0899_AGCRFCFG,        0x1c }, /* 0x11 DVB-S; 0x1c DVB-S2 (1c, rjkm) */
        { STB0899_GPIO22,        0x82 },
        { STB0899_GPIO21,        0x91 },
        { STB0899_DIRCLKCFG,        0x82 },
        { STB0899_CLKOUT27CFG,        0x7e },
        { STB0899_STDBYCFG,        0x82 },
        { STB0899_CS0CFG,        0x82 },
        { STB0899_CS1CFG,        0x82 },
        { STB0899_DISEQCOCFG,        0x20 },
        { STB0899_NCOARSE,        0x15 }, /* 0x15 27Mhz, F/3 198MHz, F/6 108MHz */
        { STB0899_SYNTCTRL,        0x00 }, /* 0x00 CLKI, 0x02 XTALI */
        { STB0899_FILTCTRL,        0x00 },
        { STB0899_SYSCTRL,        0x00 },
        { STB0899_STOPCLK1,        0x20 }, /* orig: 0x00 budget-ci: 0x20 */
        { STB0899_STOPCLK2,        0x00 },
        { STB0899_INTBUFCTRL,        0x0a },
        { STB0899_AGC2I1,        0x00 },
        { STB0899_AGC2I2,        0x00 },
        { STB0899_AGCIQIN,        0x00 },
        { STB0899_TSTRES,        0x40 }, /* rjkm */
        { 0xffff,                0xff },
};

static const struct stb0899_s1_reg pctv452e_init_s1_demod[] = {
        { STB0899_DEMOD,        0x00 },
        { STB0899_RCOMPC,        0xc9 },
        { STB0899_AGC1CN,        0x01 },
        { STB0899_AGC1REF,        0x10 },
        { STB0899_RTC,                0x23 },
        { STB0899_TMGCFG,        0x4e },
        { STB0899_AGC2REF,        0x34 },
        { STB0899_TLSR,                0x84 },
        { STB0899_CFD,                0xf7 },
        { STB0899_ACLC,                0x87 },
        { STB0899_BCLC,                0x94 },
        { STB0899_EQON,                0x41 },
        { STB0899_LDT,                0xf1 },
        { STB0899_LDT2,                0xe3 },
        { STB0899_EQUALREF,        0xb4 },
        { STB0899_TMGRAMP,        0x10 },
        { STB0899_TMGTHD,        0x30 },
        { STB0899_IDCCOMP,        0xfd },
        { STB0899_QDCCOMP,        0xff },
        { STB0899_POWERI,        0x0c },
        { STB0899_POWERQ,        0x0f },
        { STB0899_RCOMP,        0x6c },
        { STB0899_AGCIQIN,        0x80 },
        { STB0899_AGC2I1,        0x06 },
        { STB0899_AGC2I2,        0x00 },
        { STB0899_TLIR,                0x30 },
        { STB0899_RTF,                0x7f },
        { STB0899_DSTATUS,        0x00 },
        { STB0899_LDI,                0xbc },
        { STB0899_CFRM,                0xea },
        { STB0899_CFRL,                0x31 },
        { STB0899_NIRM,                0x2b },
        { STB0899_NIRL,                0x80 },
        { STB0899_ISYMB,        0x1d },
        { STB0899_QSYMB,        0xa6 },
        { STB0899_SFRH,                0x2f },
        { STB0899_SFRM,                0x68 },
        { STB0899_SFRL,                0x40 },
        { STB0899_SFRUPH,        0x2f },
        { STB0899_SFRUPM,        0x68 },
        { STB0899_SFRUPL,        0x40 },
        { STB0899_EQUAI1,        0x02 },
        { STB0899_EQUAQ1,        0xff },
        { STB0899_EQUAI2,        0x04 },
        { STB0899_EQUAQ2,        0x05 },
        { STB0899_EQUAI3,        0x02 },
        { STB0899_EQUAQ3,        0xfd },
        { STB0899_EQUAI4,        0x03 },
        { STB0899_EQUAQ4,        0x07 },
        { STB0899_EQUAI5,        0x08 },
        { STB0899_EQUAQ5,        0xf5 },
        { STB0899_DSTATUS2,        0x00 },
        { STB0899_VSTATUS,        0x00 },
        { STB0899_VERROR,        0x86 },
        { STB0899_IQSWAP,        0x2a },
        { STB0899_ECNT1M,        0x00 },
        { STB0899_ECNT1L,        0x00 },
        { STB0899_ECNT2M,        0x00 },
        { STB0899_ECNT2L,        0x00 },
        { STB0899_ECNT3M,        0x0a },
        { STB0899_ECNT3L,        0xad },
        { STB0899_FECAUTO1,        0x06 },
        { STB0899_FECM,                0x01 },
        { STB0899_VTH12,        0xb0 },
        { STB0899_VTH23,        0x7a },
        { STB0899_VTH34,        0x58 },
        { STB0899_VTH56,        0x38 },
        { STB0899_VTH67,        0x34 },
        { STB0899_VTH78,        0x24 },
        { STB0899_PRVIT,        0xff },
        { STB0899_VITSYNC,        0x19 },
        { STB0899_RSULC,        0xb1 }, /* DVB = 0xb1, DSS = 0xa1 */
        { STB0899_TSULC,        0x42 },
        { STB0899_RSLLC,        0x41 },
        { STB0899_TSLPL,        0x12 },
        { STB0899_TSCFGH,        0x0c },
        { STB0899_TSCFGM,        0x00 },
        { STB0899_TSCFGL,        0x00 },
        { STB0899_TSOUT,        0x69 }, /* 0x0d for CAM */
        { STB0899_RSSYNCDEL,        0x00 },
        { STB0899_TSINHDELH,        0x02 },
        { STB0899_TSINHDELM,        0x00 },
        { STB0899_TSINHDELL,        0x00 },
        { STB0899_TSLLSTKM,        0x1b },
        { STB0899_TSLLSTKL,        0xb3 },
        { STB0899_TSULSTKM,        0x00 },
        { STB0899_TSULSTKL,        0x00 },
        { STB0899_PCKLENUL,        0xbc },
        { STB0899_PCKLENLL,        0xcc },
        { STB0899_RSPCKLEN,        0xbd },
        { STB0899_TSSTATUS,        0x90 },
        { STB0899_ERRCTRL1,        0xb6 },
        { STB0899_ERRCTRL2,        0x95 },
        { STB0899_ERRCTRL3,        0x8d },
        { STB0899_DMONMSK1,        0x27 },
        { STB0899_DMONMSK0,        0x03 },
        { STB0899_DEMAPVIT,        0x5c },
        { STB0899_PLPARM,        0x19 },
        { STB0899_PDELCTRL,        0x48 },
        { STB0899_PDELCTRL2,        0x00 },
        { STB0899_BBHCTRL1,        0x00 },
        { STB0899_BBHCTRL2,        0x00 },
        { STB0899_HYSTTHRESH,        0x77 },
        { STB0899_MATCSTM,        0x00 },
        { STB0899_MATCSTL,        0x00 },
        { STB0899_UPLCSTM,        0x00 },
        { STB0899_UPLCSTL,        0x00 },
        { STB0899_DFLCSTM,        0x00 },
        { STB0899_DFLCSTL,        0x00 },
        { STB0899_SYNCCST,        0x00 },
        { STB0899_SYNCDCSTM,        0x00 },
        { STB0899_SYNCDCSTL,        0x00 },
        { STB0899_ISI_ENTRY,        0x00 },
        { STB0899_ISI_BIT_EN,        0x00 },
        { STB0899_MATSTRM,        0xf0 },
        { STB0899_MATSTRL,        0x02 },
        { STB0899_UPLSTRM,        0x45 },
        { STB0899_UPLSTRL,        0x60 },
        { STB0899_DFLSTRM,        0xe3 },
        { STB0899_DFLSTRL,        0x00 },
        { STB0899_SYNCSTR,        0x47 },
        { STB0899_SYNCDSTRM,        0x05 },
        { STB0899_SYNCDSTRL,        0x18 },
        { STB0899_CFGPDELSTATUS1, 0x19 },
        { STB0899_CFGPDELSTATUS2, 0x2b },
        { STB0899_BBFERRORM,        0x00 },
        { STB0899_BBFERRORL,        0x01 },
        { STB0899_UPKTERRORM,        0x00 },
        { STB0899_UPKTERRORL,        0x00 },
        { 0xffff,                0xff },
};

static struct stb0899_config stb0899_config = {
        .init_dev        = pctv452e_init_dev,
        .init_s2_demod        = stb0899_s2_init_2,
        .init_s1_demod        = pctv452e_init_s1_demod,
        .init_s2_fec        = stb0899_s2_init_4,
        .init_tst        = stb0899_s1_init_5,

        .demod_address   = I2C_ADDR_STB0899, /* I2C Address */
        .block_sync_mode = STB0899_SYNC_FORCED, /* ? */

        .xtal_freq       = 27000000,         /* Assume Hz ? */
        .inversion       = IQ_SWAP_ON,

        .lo_clk          = 76500000,
        .hi_clk          = 99000000,

        .ts_output_mode  = 0,        /* Use parallel mode */
        .clock_polarity  = 0,
        .data_clk_parity = 0,
        .fec_mode        = 0,

        .esno_ave            = STB0899_DVBS2_ESNO_AVE,
        .esno_quant          = STB0899_DVBS2_ESNO_QUANT,
        .avframes_coarse     = STB0899_DVBS2_AVFRAMES_COARSE,
        .avframes_fine       = STB0899_DVBS2_AVFRAMES_FINE,
        .miss_threshold      = STB0899_DVBS2_MISS_THRESHOLD,
        .uwp_threshold_acq   = STB0899_DVBS2_UWP_THRESHOLD_ACQ,
        .uwp_threshold_track = STB0899_DVBS2_UWP_THRESHOLD_TRACK,
        .uwp_threshold_sof   = STB0899_DVBS2_UWP_THRESHOLD_SOF,
        .sof_search_timeout  = STB0899_DVBS2_SOF_SEARCH_TIMEOUT,

        .btr_nco_bits          = STB0899_DVBS2_BTR_NCO_BITS,
        .btr_gain_shift_offset = STB0899_DVBS2_BTR_GAIN_SHIFT_OFFSET,
        .crl_nco_bits          = STB0899_DVBS2_CRL_NCO_BITS,
        .ldpc_max_iter         = STB0899_DVBS2_LDPC_MAX_ITER,

        .tuner_get_frequency        = stb6100_get_frequency,
        .tuner_set_frequency        = stb6100_set_frequency,
        .tuner_set_bandwidth        = stb6100_set_bandwidth,
        .tuner_get_bandwidth        = stb6100_get_bandwidth,
        .tuner_set_rfsiggain        = NULL,

        /* helper for switching LED green/orange */
        .postproc = pctv45e_postproc
};

static struct stb6100_config stb6100_config = {
        .tuner_address = I2C_ADDR_STB6100,
        .refclock      = 27000000
};


static struct i2c_algorithm pctv452e_i2c_algo = {
        .master_xfer   = pctv452e_i2c_xfer,
        .functionality = pctv452e_i2c_func
};

static int pctv452e_frontend_attach(struct dvb_usb_adapter *a)
{
        struct usb_device_id *id;

        a->fe_adap[0].fe = dvb_attach(stb0899_attach, &stb0899_config,
                                                &a->dev->i2c_adap);
        if (!a->fe_adap[0].fe)
                return -ENODEV;

        id = a->dev->desc->warm_ids[0];
        if (id->idVendor == USB_VID_TECHNOTREND &&
            id->idProduct == USB_PID_TECHNOTREND_CONNECT_S2_3650_CI) {
                if (dvb_attach(lnbp22_attach,
                               a->fe_adap[0].fe,
                               &a->dev->i2c_adap) == NULL) {
                        err("Cannot attach lnbp22\n");
                }
                /* Error ignored. */
                tt3650_ci_init(a);
        } else if (dvb_attach(isl6423_attach,
                              a->fe_adap[0].fe,
                              &a->dev->i2c_adap,
                              &pctv452e_isl6423_config) == NULL) {
                err("Cannot attach isl6423\n");
        }

        return 0;
}

static int pctv452e_tuner_attach(struct dvb_usb_adapter *a)
{
        if (!a->fe_adap[0].fe)
                return -ENODEV;
        if (dvb_attach(stb6100_attach, a->fe_adap[0].fe, &stb6100_config,
                                        &a->dev->i2c_adap) == NULL) {
                err("%s failed\n", __func__);
                return -ENODEV;
        }

        return 0;
}

enum {
        PINNACLE_PCTV_452E,
        TECHNOTREND_CONNECT_S2_3600,
        TECHNOTREND_CONNECT_S2_3650_CI,
};

static struct usb_device_id pctv452e_usb_table[] = {
        DVB_USB_DEV(PINNACLE, PINNACLE_PCTV_452E),
        DVB_USB_DEV(TECHNOTREND, TECHNOTREND_CONNECT_S2_3600),
        DVB_USB_DEV(TECHNOTREND, TECHNOTREND_CONNECT_S2_3650_CI),
        { }
};

MODULE_DEVICE_TABLE(usb, pctv452e_usb_table);

static struct dvb_usb_device_properties pctv452e_properties = {
        .caps = DVB_USB_IS_AN_I2C_ADAPTER, /* more ? */
        .usb_ctrl = DEVICE_SPECIFIC,

        .size_of_priv     = sizeof(struct pctv452e_state),

        .power_ctrl       = pctv452e_power_ctrl,

        .rc.core = {
                .rc_codes        = RC_MAP_DIB0700_RC5_TABLE,
                .allowed_protos        = RC_PROTO_BIT_RC5,
                .rc_query        = pctv452e_rc_query,
                .rc_interval        = 100,
        },

        .num_adapters     = 1,
        .adapter = {{
                .num_frontends = 1,
                .fe = {{
                        .frontend_attach  = pctv452e_frontend_attach,
                        .tuner_attach     = pctv452e_tuner_attach,

                        /* parameter for the MPEG2-data transfer */
                        .stream = {
                                .type     = USB_ISOC,
                                .count    = 4,
                                .endpoint = 0x02,
                                .u = {
                                        .isoc = {
                                                .framesperurb = 4,
                                                .framesize    = 940,
                                                .interval     = 1
                                        }
                                }
                        },
                } },
        } },

        .i2c_algo = &pctv452e_i2c_algo,

        .generic_bulk_ctrl_endpoint = 1, /* allow generice rw function */

        .num_device_descs = 1,
        .devices = {
                { .name = "PCTV HDTV USB",
                  .cold_ids = { NULL, NULL }, /* this is a warm only device */
                  .warm_ids = { &pctv452e_usb_table[PINNACLE_PCTV_452E], NULL }
                },
                { NULL },
        }
};

static struct dvb_usb_device_properties tt_connect_s2_3600_properties = {
        .caps = DVB_USB_IS_AN_I2C_ADAPTER, /* more ? */
        .usb_ctrl = DEVICE_SPECIFIC,

        .size_of_priv                = sizeof(struct pctv452e_state),

        .power_ctrl                = pctv452e_power_ctrl,
        .read_mac_address        = pctv452e_read_mac_address,

        .rc.core = {
                .rc_codes        = RC_MAP_TT_1500,
                .allowed_protos        = RC_PROTO_BIT_RC5,
                .rc_query        = pctv452e_rc_query,
                .rc_interval        = 100,
        },

        .num_adapters                = 1,
        .adapter = {{
                .num_frontends = 1,
                .fe = {{
                        .frontend_attach = pctv452e_frontend_attach,
                        .tuner_attach = pctv452e_tuner_attach,

                        /* parameter for the MPEG2-data transfer */
                        .stream = {
                                .type = USB_ISOC,
                                .count = 4,
                                .endpoint = 0x02,
                                .u = {
                                        .isoc = {
                                                .framesperurb = 64,
                                                .framesize = 940,
                                                .interval = 1
                                        }
                                }
                        },

                } },
        } },

        .i2c_algo = &pctv452e_i2c_algo,

        .generic_bulk_ctrl_endpoint = 1, /* allow generic rw function*/

        .num_device_descs = 2,
        .devices = {
                { .name = "Technotrend TT Connect S2-3600",
                  .cold_ids = { NULL, NULL }, /* this is a warm only device */
                  .warm_ids = { &pctv452e_usb_table[TECHNOTREND_CONNECT_S2_3600], NULL }
                },
                { .name = "Technotrend TT Connect S2-3650-CI",
                  .cold_ids = { NULL, NULL },
                  .warm_ids = { &pctv452e_usb_table[TECHNOTREND_CONNECT_S2_3650_CI], NULL }
                },
                { NULL },
        }
};

static void pctv452e_usb_disconnect(struct usb_interface *intf)
{
        struct dvb_usb_device *d = usb_get_intfdata(intf);

        tt3650_ci_uninit(d);
        dvb_usb_device_exit(intf);
}

static int pctv452e_usb_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        if (0 == dvb_usb_device_init(intf, &pctv452e_properties,
                                        THIS_MODULE, NULL, adapter_nr) ||
            0 == dvb_usb_device_init(intf, &tt_connect_s2_3600_properties,
                                        THIS_MODULE, NULL, adapter_nr))
                return 0;

        return -ENODEV;
}

static struct usb_driver pctv452e_usb_driver = {
        .name       = "pctv452e",
        .probe      = pctv452e_usb_probe,
        .disconnect = pctv452e_usb_disconnect,
        .id_table   = pctv452e_usb_table,
};

module_usb_driver(pctv452e_usb_driver);

MODULE_AUTHOR("Dominik Kuhlen <dkuhlen@gmx.net>");
MODULE_AUTHOR("Andre Weidemann <Andre.Weidemann@web.de>");
MODULE_AUTHOR("Michael H. Schimek <mschimek@gmx.at>");
MODULE_DESCRIPTION("Pinnacle PCTV HDTV USB DVB / TT connect S2-3600 Driver");
MODULE_LICENSE("GPL");






















































































































    1 



    1 













    1 















    1 




    1 


































    1 


    1 
    1 












































    1 
    1 

    1 

    1 

































































































































































































    3 

















  234 


    3 
    1 
  234 

















  231 


    4 
    4 
    4 





    4 



































































































































































































































    1 






    1 


    1 


































































































   17 



   17 


   17 

   17 

























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/power/wakeup.c - System wakeup events framework
 *
 * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
 */
#define pr_fmt(fmt) "PM: " fmt

#include <linux/device.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/capability.h>
#include <linux/export.h>
#include <linux/suspend.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/pm_wakeirq.h>
#include <trace/events/power.h>

#include "power.h"

#define list_for_each_entry_rcu_locked(pos, head, member) \
        list_for_each_entry_rcu(pos, head, member, \
                srcu_read_lock_held(&wakeup_srcu))
/*
 * If set, the suspend/hibernate code will abort transitions to a sleep state
 * if wakeup events are registered during or immediately before the transition.
 */
bool events_check_enabled __read_mostly;

/* First wakeup IRQ seen by the kernel in the last cycle. */
static unsigned int wakeup_irq[2] __read_mostly;
static DEFINE_RAW_SPINLOCK(wakeup_irq_lock);

/* If greater than 0 and the system is suspending, terminate the suspend. */
static atomic_t pm_abort_suspend __read_mostly;

/*
 * Combined counters of registered wakeup events and wakeup events in progress.
 * They need to be modified together atomically, so it's better to use one
 * atomic variable to hold them both.
 */
static atomic_t combined_event_count = ATOMIC_INIT(0);

#define IN_PROGRESS_BITS        (sizeof(int) * 4)
#define MAX_IN_PROGRESS                ((1 << IN_PROGRESS_BITS) - 1)

static void split_counters(unsigned int *cnt, unsigned int *inpr)
{
        unsigned int comb = atomic_read(&combined_event_count);

        *cnt = (comb >> IN_PROGRESS_BITS);
        *inpr = comb & MAX_IN_PROGRESS;
}

/* A preserved old value of the events counter. */
static unsigned int saved_count;

static DEFINE_RAW_SPINLOCK(events_lock);

static void pm_wakeup_timer_fn(struct timer_list *t);

static LIST_HEAD(wakeup_sources);

static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue);

DEFINE_STATIC_SRCU(wakeup_srcu);

static struct wakeup_source deleted_ws = {
        .name = "deleted",
        .lock =  __SPIN_LOCK_UNLOCKED(deleted_ws.lock),
};

static DEFINE_IDA(wakeup_ida);

/**
 * wakeup_source_create - Create a struct wakeup_source object.
 * @name: Name of the new wakeup source.
 */
struct wakeup_source *wakeup_source_create(const char *name)
{
        struct wakeup_source *ws;
        const char *ws_name;
        int id;

        ws = kzalloc(sizeof(*ws), GFP_KERNEL);
        if (!ws)
                goto err_ws;

        ws_name = kstrdup_const(name, GFP_KERNEL);
        if (!ws_name)
                goto err_name;
        ws->name = ws_name;

        id = ida_alloc(&wakeup_ida, GFP_KERNEL);
        if (id < 0)
                goto err_id;
        ws->id = id;

        return ws;

err_id:
        kfree_const(ws->name);
err_name:
        kfree(ws);
err_ws:
        return NULL;
}
EXPORT_SYMBOL_GPL(wakeup_source_create);

/*
 * Record wakeup_source statistics being deleted into a dummy wakeup_source.
 */
static void wakeup_source_record(struct wakeup_source *ws)
{
        unsigned long flags;

        spin_lock_irqsave(&deleted_ws.lock, flags);

        if (ws->event_count) {
                deleted_ws.total_time =
                        ktime_add(deleted_ws.total_time, ws->total_time);
                deleted_ws.prevent_sleep_time =
                        ktime_add(deleted_ws.prevent_sleep_time,
                                  ws->prevent_sleep_time);
                deleted_ws.max_time =
                        ktime_compare(deleted_ws.max_time, ws->max_time) > 0 ?
                                deleted_ws.max_time : ws->max_time;
                deleted_ws.event_count += ws->event_count;
                deleted_ws.active_count += ws->active_count;
                deleted_ws.relax_count += ws->relax_count;
                deleted_ws.expire_count += ws->expire_count;
                deleted_ws.wakeup_count += ws->wakeup_count;
        }

        spin_unlock_irqrestore(&deleted_ws.lock, flags);
}

static void wakeup_source_free(struct wakeup_source *ws)
{
        ida_free(&wakeup_ida, ws->id);
        kfree_const(ws->name);
        kfree(ws);
}

/**
 * wakeup_source_destroy - Destroy a struct wakeup_source object.
 * @ws: Wakeup source to destroy.
 *
 * Use only for wakeup source objects created with wakeup_source_create().
 */
void wakeup_source_destroy(struct wakeup_source *ws)
{
        if (!ws)
                return;

        __pm_relax(ws);
        wakeup_source_record(ws);
        wakeup_source_free(ws);
}
EXPORT_SYMBOL_GPL(wakeup_source_destroy);

/**
 * wakeup_source_add - Add given object to the list of wakeup sources.
 * @ws: Wakeup source object to add to the list.
 */
void wakeup_source_add(struct wakeup_source *ws)
{
        unsigned long flags;

        if (WARN_ON(!ws))
                return;

        spin_lock_init(&ws->lock);
        timer_setup(&ws->timer, pm_wakeup_timer_fn, 0);
        ws->active = false;

        raw_spin_lock_irqsave(&events_lock, flags);
        list_add_rcu(&ws->entry, &wakeup_sources);
        raw_spin_unlock_irqrestore(&events_lock, flags);
}
EXPORT_SYMBOL_GPL(wakeup_source_add);

/**
 * wakeup_source_remove - Remove given object from the wakeup sources list.
 * @ws: Wakeup source object to remove from the list.
 */
void wakeup_source_remove(struct wakeup_source *ws)
{
        unsigned long flags;

        if (WARN_ON(!ws))
                return;

        raw_spin_lock_irqsave(&events_lock, flags);
        list_del_rcu(&ws->entry);
        raw_spin_unlock_irqrestore(&events_lock, flags);
        synchronize_srcu(&wakeup_srcu);

        del_timer_sync(&ws->timer);
        /*
         * Clear timer.function to make wakeup_source_not_registered() treat
         * this wakeup source as not registered.
         */
        ws->timer.function = NULL;
}
EXPORT_SYMBOL_GPL(wakeup_source_remove);

/**
 * wakeup_source_register - Create wakeup source and add it to the list.
 * @dev: Device this wakeup source is associated with (or NULL if virtual).
 * @name: Name of the wakeup source to register.
 */
struct wakeup_source *wakeup_source_register(struct device *dev,
                                             const char *name)
{
        struct wakeup_source *ws;
        int ret;

        ws = wakeup_source_create(name);
        if (ws) {
                if (!dev || device_is_registered(dev)) {
                        ret = wakeup_source_sysfs_add(dev, ws);
                        if (ret) {
                                wakeup_source_free(ws);
                                return NULL;
                        }
                }
                wakeup_source_add(ws);
        }
        return ws;
}
EXPORT_SYMBOL_GPL(wakeup_source_register);

/**
 * wakeup_source_unregister - Remove wakeup source from the list and remove it.
 * @ws: Wakeup source object to unregister.
 */
void wakeup_source_unregister(struct wakeup_source *ws)
{
        if (ws) {
                wakeup_source_remove(ws);
                if (ws->dev)
                        wakeup_source_sysfs_remove(ws);

                wakeup_source_destroy(ws);
        }
}
EXPORT_SYMBOL_GPL(wakeup_source_unregister);

/**
 * wakeup_sources_read_lock - Lock wakeup source list for read.
 *
 * Returns an index of srcu lock for struct wakeup_srcu.
 * This index must be passed to the matching wakeup_sources_read_unlock().
 */
int wakeup_sources_read_lock(void)
{
        return srcu_read_lock(&wakeup_srcu);
}
EXPORT_SYMBOL_GPL(wakeup_sources_read_lock);

/**
 * wakeup_sources_read_unlock - Unlock wakeup source list.
 * @idx: return value from corresponding wakeup_sources_read_lock()
 */
void wakeup_sources_read_unlock(int idx)
{
        srcu_read_unlock(&wakeup_srcu, idx);
}
EXPORT_SYMBOL_GPL(wakeup_sources_read_unlock);

/**
 * wakeup_sources_walk_start - Begin a walk on wakeup source list
 *
 * Returns first object of the list of wakeup sources.
 *
 * Note that to be safe, wakeup sources list needs to be locked by calling
 * wakeup_source_read_lock() for this.
 */
struct wakeup_source *wakeup_sources_walk_start(void)
{
        struct list_head *ws_head = &wakeup_sources;

        return list_entry_rcu(ws_head->next, struct wakeup_source, entry);
}
EXPORT_SYMBOL_GPL(wakeup_sources_walk_start);

/**
 * wakeup_sources_walk_next - Get next wakeup source from the list
 * @ws: Previous wakeup source object
 *
 * Note that to be safe, wakeup sources list needs to be locked by calling
 * wakeup_source_read_lock() for this.
 */
struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws)
{
        struct list_head *ws_head = &wakeup_sources;

        return list_next_or_null_rcu(ws_head, &ws->entry,
                                struct wakeup_source, entry);
}
EXPORT_SYMBOL_GPL(wakeup_sources_walk_next);

/**
 * device_wakeup_attach - Attach a wakeup source object to a device object.
 * @dev: Device to handle.
 * @ws: Wakeup source object to attach to @dev.
 *
 * This causes @dev to be treated as a wakeup device.
 */
static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
{
        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                spin_unlock_irq(&dev->power.lock);
                return -EEXIST;
        }
        dev->power.wakeup = ws;
        if (dev->power.wakeirq)
                device_wakeup_attach_irq(dev, dev->power.wakeirq);
        spin_unlock_irq(&dev->power.lock);
        return 0;
}

/**
 * device_wakeup_enable - Enable given device to be a wakeup source.
 * @dev: Device to handle.
 *
 * Create a wakeup source object, register it and attach it to @dev.
 */
int device_wakeup_enable(struct device *dev)
{
        struct wakeup_source *ws;
        int ret;

        if (!dev || !dev->power.can_wakeup)
                return -EINVAL;

        if (pm_suspend_target_state != PM_SUSPEND_ON)
                dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__);

        ws = wakeup_source_register(dev, dev_name(dev));
        if (!ws)
                return -ENOMEM;

        ret = device_wakeup_attach(dev, ws);
        if (ret)
                wakeup_source_unregister(ws);

        return ret;
}
EXPORT_SYMBOL_GPL(device_wakeup_enable);

/**
 * device_wakeup_attach_irq - Attach a wakeirq to a wakeup source
 * @dev: Device to handle
 * @wakeirq: Device specific wakeirq entry
 *
 * Attach a device wakeirq to the wakeup source so the device
 * wake IRQ can be configured automatically for suspend and
 * resume.
 *
 * Call under the device's power.lock lock.
 */
void device_wakeup_attach_irq(struct device *dev,
                             struct wake_irq *wakeirq)
{
        struct wakeup_source *ws;

        ws = dev->power.wakeup;
        if (!ws)
                return;

        if (ws->wakeirq)
                dev_err(dev, "Leftover wakeup IRQ found, overriding\n");

        ws->wakeirq = wakeirq;
}

/**
 * device_wakeup_detach_irq - Detach a wakeirq from a wakeup source
 * @dev: Device to handle
 *
 * Removes a device wakeirq from the wakeup source.
 *
 * Call under the device's power.lock lock.
 */
void device_wakeup_detach_irq(struct device *dev)
{
        struct wakeup_source *ws;

        ws = dev->power.wakeup;
        if (ws)
                ws->wakeirq = NULL;
}

/**
 * device_wakeup_arm_wake_irqs -
 *
 * Iterates over the list of device wakeirqs to arm them.
 */
void device_wakeup_arm_wake_irqs(void)
{
        struct wakeup_source *ws;
        int srcuidx;

        srcuidx = srcu_read_lock(&wakeup_srcu);
        list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry)
                dev_pm_arm_wake_irq(ws->wakeirq);
        srcu_read_unlock(&wakeup_srcu, srcuidx);
}

/**
 * device_wakeup_disarm_wake_irqs -
 *
 * Iterates over the list of device wakeirqs to disarm them.
 */
void device_wakeup_disarm_wake_irqs(void)
{
        struct wakeup_source *ws;
        int srcuidx;

        srcuidx = srcu_read_lock(&wakeup_srcu);
        list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry)
                dev_pm_disarm_wake_irq(ws->wakeirq);
        srcu_read_unlock(&wakeup_srcu, srcuidx);
}

/**
 * device_wakeup_detach - Detach a device's wakeup source object from it.
 * @dev: Device to detach the wakeup source object from.
 *
 * After it returns, @dev will not be treated as a wakeup device any more.
 */
static struct wakeup_source *device_wakeup_detach(struct device *dev)
{
        struct wakeup_source *ws;

        spin_lock_irq(&dev->power.lock);
        ws = dev->power.wakeup;
        dev->power.wakeup = NULL;
        spin_unlock_irq(&dev->power.lock);
        return ws;
}

/**
 * device_wakeup_disable - Do not regard a device as a wakeup source any more.
 * @dev: Device to handle.
 *
 * Detach the @dev's wakeup source object from it, unregister this wakeup source
 * object and destroy it.
 */
int device_wakeup_disable(struct device *dev)
{
        struct wakeup_source *ws;

        if (!dev || !dev->power.can_wakeup)
                return -EINVAL;

        ws = device_wakeup_detach(dev);
        wakeup_source_unregister(ws);
        return 0;
}
EXPORT_SYMBOL_GPL(device_wakeup_disable);

/**
 * device_set_wakeup_capable - Set/reset device wakeup capability flag.
 * @dev: Device to handle.
 * @capable: Whether or not @dev is capable of waking up the system from sleep.
 *
 * If @capable is set, set the @dev's power.can_wakeup flag and add its
 * wakeup-related attributes to sysfs.  Otherwise, unset the @dev's
 * power.can_wakeup flag and remove its wakeup-related attributes from sysfs.
 *
 * This function may sleep and it can't be called from any context where
 * sleeping is not allowed.
 */
void device_set_wakeup_capable(struct device *dev, bool capable)
{
        if (!!dev->power.can_wakeup == !!capable)
                return;

        dev->power.can_wakeup = capable;
        if (device_is_registered(dev) && !list_empty(&dev->power.entry)) {
                if (capable) {
                        int ret = wakeup_sysfs_add(dev);

                        if (ret)
                                dev_info(dev, "Wakeup sysfs attributes not added\n");
                } else {
                        wakeup_sysfs_remove(dev);
                }
        }
}
EXPORT_SYMBOL_GPL(device_set_wakeup_capable);

/**
 * device_set_wakeup_enable - Enable or disable a device to wake up the system.
 * @dev: Device to handle.
 * @enable: enable/disable flag
 */
int device_set_wakeup_enable(struct device *dev, bool enable)
{
        return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
}
EXPORT_SYMBOL_GPL(device_set_wakeup_enable);

/**
 * wakeup_source_not_registered - validate the given wakeup source.
 * @ws: Wakeup source to be validated.
 */
static bool wakeup_source_not_registered(struct wakeup_source *ws)
{
        /*
         * Use timer struct to check if the given source is initialized
         * by wakeup_source_add.
         */
        return ws->timer.function != pm_wakeup_timer_fn;
}

/*
 * The functions below use the observation that each wakeup event starts a
 * period in which the system should not be suspended.  The moment this period
 * will end depends on how the wakeup event is going to be processed after being
 * detected and all of the possible cases can be divided into two distinct
 * groups.
 *
 * First, a wakeup event may be detected by the same functional unit that will
 * carry out the entire processing of it and possibly will pass it to user space
 * for further processing.  In that case the functional unit that has detected
 * the event may later "close" the "no suspend" period associated with it
 * directly as soon as it has been dealt with.  The pair of pm_stay_awake() and
 * pm_relax(), balanced with each other, is supposed to be used in such
 * situations.
 *
 * Second, a wakeup event may be detected by one functional unit and processed
 * by another one.  In that case the unit that has detected it cannot really
 * "close" the "no suspend" period associated with it, unless it knows in
 * advance what's going to happen to the event during processing.  This
 * knowledge, however, may not be available to it, so it can simply specify time
 * to wait before the system can be suspended and pass it as the second
 * argument of pm_wakeup_event().
 *
 * It is valid to call pm_relax() after pm_wakeup_event(), in which case the
 * "no suspend" period will be ended either by the pm_relax(), or by the timer
 * function executed when the timer expires, whichever comes first.
 */

/**
 * wakeup_source_activate - Mark given wakeup source as active.
 * @ws: Wakeup source to handle.
 *
 * Update the @ws' statistics and, if @ws has just been activated, notify the PM
 * core of the event by incrementing the counter of the wakeup events being
 * processed.
 */
static void wakeup_source_activate(struct wakeup_source *ws)
{
        unsigned int cec;

        if (WARN_ONCE(wakeup_source_not_registered(ws),
                        "unregistered wakeup source\n"))
                return;

        ws->active = true;
        ws->active_count++;
        ws->last_time = ktime_get();
        if (ws->autosleep_enabled)
                ws->start_prevent_time = ws->last_time;

        /* Increment the counter of events in progress. */
        cec = atomic_inc_return(&combined_event_count);

        trace_wakeup_source_activate(ws->name, cec);
}

/**
 * wakeup_source_report_event - Report wakeup event using the given source.
 * @ws: Wakeup source to report the event for.
 * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
 */
static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
{
        ws->event_count++;
        /* This is racy, but the counter is approximate anyway. */
        if (events_check_enabled)
                ws->wakeup_count++;

        if (!ws->active)
                wakeup_source_activate(ws);

        if (hard)
                pm_system_wakeup();
}

/**
 * __pm_stay_awake - Notify the PM core of a wakeup event.
 * @ws: Wakeup source object associated with the source of the event.
 *
 * It is safe to call this function from interrupt context.
 */
void __pm_stay_awake(struct wakeup_source *ws)
{
        unsigned long flags;

        if (!ws)
                return;

        spin_lock_irqsave(&ws->lock, flags);

        wakeup_source_report_event(ws, false);
        del_timer(&ws->timer);
        ws->timer_expires = 0;

        spin_unlock_irqrestore(&ws->lock, flags);
}
EXPORT_SYMBOL_GPL(__pm_stay_awake);

/**
 * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
 * @dev: Device the wakeup event is related to.
 *
 * Notify the PM core of a wakeup event (signaled by @dev) by calling
 * __pm_stay_awake for the @dev's wakeup source object.
 *
 * Call this function after detecting of a wakeup event if pm_relax() is going
 * to be called directly after processing the event (and possibly passing it to
 * user space for further processing).
 */
void pm_stay_awake(struct device *dev)
{
        unsigned long flags;

        if (!dev)
                return;

        spin_lock_irqsave(&dev->power.lock, flags);
        __pm_stay_awake(dev->power.wakeup);
        spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_stay_awake);

#ifdef CONFIG_PM_AUTOSLEEP
static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now)
{
        ktime_t delta = ktime_sub(now, ws->start_prevent_time);
        ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta);
}
#else
static inline void update_prevent_sleep_time(struct wakeup_source *ws,
                                             ktime_t now) {}
#endif

/**
 * wakeup_source_deactivate - Mark given wakeup source as inactive.
 * @ws: Wakeup source to handle.
 *
 * Update the @ws' statistics and notify the PM core that the wakeup source has
 * become inactive by decrementing the counter of wakeup events being processed
 * and incrementing the counter of registered wakeup events.
 */
static void wakeup_source_deactivate(struct wakeup_source *ws)
{
        unsigned int cnt, inpr, cec;
        ktime_t duration;
        ktime_t now;

        ws->relax_count++;
        /*
         * __pm_relax() may be called directly or from a timer function.
         * If it is called directly right after the timer function has been
         * started, but before the timer function calls __pm_relax(), it is
         * possible that __pm_stay_awake() will be called in the meantime and
         * will set ws->active.  Then, ws->active may be cleared immediately
         * by the __pm_relax() called from the timer function, but in such a
         * case ws->relax_count will be different from ws->active_count.
         */
        if (ws->relax_count != ws->active_count) {
                ws->relax_count--;
                return;
        }

        ws->active = false;

        now = ktime_get();
        duration = ktime_sub(now, ws->last_time);
        ws->total_time = ktime_add(ws->total_time, duration);
        if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
                ws->max_time = duration;

        ws->last_time = now;
        del_timer(&ws->timer);
        ws->timer_expires = 0;

        if (ws->autosleep_enabled)
                update_prevent_sleep_time(ws, now);

        /*
         * Increment the counter of registered wakeup events and decrement the
         * counter of wakeup events in progress simultaneously.
         */
        cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
        trace_wakeup_source_deactivate(ws->name, cec);

        split_counters(&cnt, &inpr);
        if (!inpr && waitqueue_active(&wakeup_count_wait_queue))
                wake_up(&wakeup_count_wait_queue);
}

/**
 * __pm_relax - Notify the PM core that processing of a wakeup event has ended.
 * @ws: Wakeup source object associated with the source of the event.
 *
 * Call this function for wakeup events whose processing started with calling
 * __pm_stay_awake().
 *
 * It is safe to call it from interrupt context.
 */
void __pm_relax(struct wakeup_source *ws)
{
        unsigned long flags;

        if (!ws)
                return;

        spin_lock_irqsave(&ws->lock, flags);
        if (ws->active)
                wakeup_source_deactivate(ws);
        spin_unlock_irqrestore(&ws->lock, flags);
}
EXPORT_SYMBOL_GPL(__pm_relax);

/**
 * pm_relax - Notify the PM core that processing of a wakeup event has ended.
 * @dev: Device that signaled the event.
 *
 * Execute __pm_relax() for the @dev's wakeup source object.
 */
void pm_relax(struct device *dev)
{
        unsigned long flags;

        if (!dev)
                return;

        spin_lock_irqsave(&dev->power.lock, flags);
        __pm_relax(dev->power.wakeup);
        spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_relax);

/**
 * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
 * @t: timer list
 *
 * Call wakeup_source_deactivate() for the wakeup source whose address is stored
 * in @data if it is currently active and its timer has not been canceled and
 * the expiration time of the timer is not in future.
 */
static void pm_wakeup_timer_fn(struct timer_list *t)
{
        struct wakeup_source *ws = from_timer(ws, t, timer);
        unsigned long flags;

        spin_lock_irqsave(&ws->lock, flags);

        if (ws->active && ws->timer_expires
            && time_after_eq(jiffies, ws->timer_expires)) {
                wakeup_source_deactivate(ws);
                ws->expire_count++;
        }

        spin_unlock_irqrestore(&ws->lock, flags);
}

/**
 * pm_wakeup_ws_event - Notify the PM core of a wakeup event.
 * @ws: Wakeup source object associated with the event source.
 * @msec: Anticipated event processing time (in milliseconds).
 * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
 *
 * Notify the PM core of a wakeup event whose source is @ws that will take
 * approximately @msec milliseconds to be processed by the kernel.  If @ws is
 * not active, activate it.  If @msec is nonzero, set up the @ws' timer to
 * execute pm_wakeup_timer_fn() in future.
 *
 * It is safe to call this function from interrupt context.
 */
void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard)
{
        unsigned long flags;
        unsigned long expires;

        if (!ws)
                return;

        spin_lock_irqsave(&ws->lock, flags);

        wakeup_source_report_event(ws, hard);

        if (!msec) {
                wakeup_source_deactivate(ws);
                goto unlock;
        }

        expires = jiffies + msecs_to_jiffies(msec);
        if (!expires)
                expires = 1;

        if (!ws->timer_expires || time_after(expires, ws->timer_expires)) {
                mod_timer(&ws->timer, expires);
                ws->timer_expires = expires;
        }

 unlock:
        spin_unlock_irqrestore(&ws->lock, flags);
}
EXPORT_SYMBOL_GPL(pm_wakeup_ws_event);

/**
 * pm_wakeup_dev_event - Notify the PM core of a wakeup event.
 * @dev: Device the wakeup event is related to.
 * @msec: Anticipated event processing time (in milliseconds).
 * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
 *
 * Call pm_wakeup_ws_event() for the @dev's wakeup source object.
 */
void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard)
{
        unsigned long flags;

        if (!dev)
                return;

        spin_lock_irqsave(&dev->power.lock, flags);
        pm_wakeup_ws_event(dev->power.wakeup, msec, hard);
        spin_unlock_irqrestore(&dev->power.lock, flags);
}
EXPORT_SYMBOL_GPL(pm_wakeup_dev_event);

void pm_print_active_wakeup_sources(void)
{
        struct wakeup_source *ws;
        int srcuidx, active = 0;
        struct wakeup_source *last_activity_ws = NULL;

        srcuidx = srcu_read_lock(&wakeup_srcu);
        list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) {
                if (ws->active) {
                        pm_pr_dbg("active wakeup source: %s\n", ws->name);
                        active = 1;
                } else if (!active &&
                           (!last_activity_ws ||
                            ktime_to_ns(ws->last_time) >
                            ktime_to_ns(last_activity_ws->last_time))) {
                        last_activity_ws = ws;
                }
        }

        if (!active && last_activity_ws)
                pm_pr_dbg("last active wakeup source: %s\n",
                        last_activity_ws->name);
        srcu_read_unlock(&wakeup_srcu, srcuidx);
}
EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);

/**
 * pm_wakeup_pending - Check if power transition in progress should be aborted.
 *
 * Compare the current number of registered wakeup events with its preserved
 * value from the past and return true if new wakeup events have been registered
 * since the old value was stored.  Also return true if the current number of
 * wakeup events being processed is different from zero.
 */
bool pm_wakeup_pending(void)
{
        unsigned long flags;
        bool ret = false;

        raw_spin_lock_irqsave(&events_lock, flags);
        if (events_check_enabled) {
                unsigned int cnt, inpr;

                split_counters(&cnt, &inpr);
                ret = (cnt != saved_count || inpr > 0);
                events_check_enabled = !ret;
        }
        raw_spin_unlock_irqrestore(&events_lock, flags);

        if (ret) {
                pm_pr_dbg("Wakeup pending, aborting suspend\n");
                pm_print_active_wakeup_sources();
        }

        return ret || atomic_read(&pm_abort_suspend) > 0;
}
EXPORT_SYMBOL_GPL(pm_wakeup_pending);

void pm_system_wakeup(void)
{
        atomic_inc(&pm_abort_suspend);
        s2idle_wake();
}
EXPORT_SYMBOL_GPL(pm_system_wakeup);

void pm_system_cancel_wakeup(void)
{
        atomic_dec_if_positive(&pm_abort_suspend);
}

void pm_wakeup_clear(unsigned int irq_number)
{
        raw_spin_lock_irq(&wakeup_irq_lock);

        if (irq_number && wakeup_irq[0] == irq_number)
                wakeup_irq[0] = wakeup_irq[1];
        else
                wakeup_irq[0] = 0;

        wakeup_irq[1] = 0;

        raw_spin_unlock_irq(&wakeup_irq_lock);

        if (!irq_number)
                atomic_set(&pm_abort_suspend, 0);
}

void pm_system_irq_wakeup(unsigned int irq_number)
{
        unsigned long flags;

        raw_spin_lock_irqsave(&wakeup_irq_lock, flags);

        if (wakeup_irq[0] == 0)
                wakeup_irq[0] = irq_number;
        else if (wakeup_irq[1] == 0)
                wakeup_irq[1] = irq_number;
        else
                irq_number = 0;

        pm_pr_dbg("Triggering wakeup from IRQ %d\n", irq_number);

        raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags);

        if (irq_number)
                pm_system_wakeup();
}

unsigned int pm_wakeup_irq(void)
{
        return wakeup_irq[0];
}

/**
 * pm_get_wakeup_count - Read the number of registered wakeup events.
 * @count: Address to store the value at.
 * @block: Whether or not to block.
 *
 * Store the number of registered wakeup events at the address in @count.  If
 * @block is set, block until the current number of wakeup events being
 * processed is zero.
 *
 * Return 'false' if the current number of wakeup events being processed is
 * nonzero.  Otherwise return 'true'.
 */
bool pm_get_wakeup_count(unsigned int *count, bool block)
{
        unsigned int cnt, inpr;

        if (block) {
                DEFINE_WAIT(wait);

                for (;;) {
                        prepare_to_wait(&wakeup_count_wait_queue, &wait,
                                        TASK_INTERRUPTIBLE);
                        split_counters(&cnt, &inpr);
                        if (inpr == 0 || signal_pending(current))
                                break;
                        pm_print_active_wakeup_sources();
                        schedule();
                }
                finish_wait(&wakeup_count_wait_queue, &wait);
        }

        split_counters(&cnt, &inpr);
        *count = cnt;
        return !inpr;
}

/**
 * pm_save_wakeup_count - Save the current number of registered wakeup events.
 * @count: Value to compare with the current number of registered wakeup events.
 *
 * If @count is equal to the current number of registered wakeup events and the
 * current number of wakeup events being processed is zero, store @count as the
 * old number of registered wakeup events for pm_check_wakeup_events(), enable
 * wakeup events detection and return 'true'.  Otherwise disable wakeup events
 * detection and return 'false'.
 */
bool pm_save_wakeup_count(unsigned int count)
{
        unsigned int cnt, inpr;
        unsigned long flags;

        events_check_enabled = false;
        raw_spin_lock_irqsave(&events_lock, flags);
        split_counters(&cnt, &inpr);
        if (cnt == count && inpr == 0) {
                saved_count = count;
                events_check_enabled = true;
        }
        raw_spin_unlock_irqrestore(&events_lock, flags);
        return events_check_enabled;
}

#ifdef CONFIG_PM_AUTOSLEEP
/**
 * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
 * @set: Whether to set or to clear the autosleep_enabled flags.
 */
void pm_wakep_autosleep_enabled(bool set)
{
        struct wakeup_source *ws;
        ktime_t now = ktime_get();
        int srcuidx;

        srcuidx = srcu_read_lock(&wakeup_srcu);
        list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) {
                spin_lock_irq(&ws->lock);
                if (ws->autosleep_enabled != set) {
                        ws->autosleep_enabled = set;
                        if (ws->active) {
                                if (set)
                                        ws->start_prevent_time = now;
                                else
                                        update_prevent_sleep_time(ws, now);
                        }
                }
                spin_unlock_irq(&ws->lock);
        }
        srcu_read_unlock(&wakeup_srcu, srcuidx);
}
#endif /* CONFIG_PM_AUTOSLEEP */

/**
 * print_wakeup_source_stats - Print wakeup source statistics information.
 * @m: seq_file to print the statistics into.
 * @ws: Wakeup source object to print the statistics for.
 */
static int print_wakeup_source_stats(struct seq_file *m,
                                     struct wakeup_source *ws)
{
        unsigned long flags;
        ktime_t total_time;
        ktime_t max_time;
        unsigned long active_count;
        ktime_t active_time;
        ktime_t prevent_sleep_time;

        spin_lock_irqsave(&ws->lock, flags);

        total_time = ws->total_time;
        max_time = ws->max_time;
        prevent_sleep_time = ws->prevent_sleep_time;
        active_count = ws->active_count;
        if (ws->active) {
                ktime_t now = ktime_get();

                active_time = ktime_sub(now, ws->last_time);
                total_time = ktime_add(total_time, active_time);
                if (active_time > max_time)
                        max_time = active_time;

                if (ws->autosleep_enabled)
                        prevent_sleep_time = ktime_add(prevent_sleep_time,
                                ktime_sub(now, ws->start_prevent_time));
        } else {
                active_time = 0;
        }

        seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
                   ws->name, active_count, ws->event_count,
                   ws->wakeup_count, ws->expire_count,
                   ktime_to_ms(active_time), ktime_to_ms(total_time),
                   ktime_to_ms(max_time), ktime_to_ms(ws->last_time),
                   ktime_to_ms(prevent_sleep_time));

        spin_unlock_irqrestore(&ws->lock, flags);

        return 0;
}

static void *wakeup_sources_stats_seq_start(struct seq_file *m,
                                        loff_t *pos)
{
        struct wakeup_source *ws;
        loff_t n = *pos;
        int *srcuidx = m->private;

        if (n == 0) {
                seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
                        "expire_count\tactive_since\ttotal_time\tmax_time\t"
                        "last_change\tprevent_suspend_time\n");
        }

        *srcuidx = srcu_read_lock(&wakeup_srcu);
        list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) {
                if (n-- <= 0)
                        return ws;
        }

        return NULL;
}

static void *wakeup_sources_stats_seq_next(struct seq_file *m,
                                        void *v, loff_t *pos)
{
        struct wakeup_source *ws = v;
        struct wakeup_source *next_ws = NULL;

        ++(*pos);

        list_for_each_entry_continue_rcu(ws, &wakeup_sources, entry) {
                next_ws = ws;
                break;
        }

        if (!next_ws)
                print_wakeup_source_stats(m, &deleted_ws);

        return next_ws;
}

static void wakeup_sources_stats_seq_stop(struct seq_file *m, void *v)
{
        int *srcuidx = m->private;

        srcu_read_unlock(&wakeup_srcu, *srcuidx);
}

/**
 * wakeup_sources_stats_seq_show - Print wakeup sources statistics information.
 * @m: seq_file to print the statistics into.
 * @v: wakeup_source of each iteration
 */
static int wakeup_sources_stats_seq_show(struct seq_file *m, void *v)
{
        struct wakeup_source *ws = v;

        print_wakeup_source_stats(m, ws);

        return 0;
}

static const struct seq_operations wakeup_sources_stats_seq_ops = {
        .start = wakeup_sources_stats_seq_start,
        .next  = wakeup_sources_stats_seq_next,
        .stop  = wakeup_sources_stats_seq_stop,
        .show  = wakeup_sources_stats_seq_show,
};

static int wakeup_sources_stats_open(struct inode *inode, struct file *file)
{
        return seq_open_private(file, &wakeup_sources_stats_seq_ops, sizeof(int));
}

static const struct file_operations wakeup_sources_stats_fops = {
        .owner = THIS_MODULE,
        .open = wakeup_sources_stats_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = seq_release_private,
};

static int __init wakeup_sources_debugfs_init(void)
{
        debugfs_create_file("wakeup_sources", 0444, NULL, NULL,
                            &wakeup_sources_stats_fops);
        return 0;
}

postcore_initcall(wakeup_sources_debugfs_init);





























































































































   65 




   64 













































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// SPDX-License-Identifier: GPL-2.0
#include <linux/err.h>
#include <linux/bug.h>
#include <linux/atomic.h>
#include <linux/errseq.h>
#include <linux/log2.h>

/*
 * An errseq_t is a way of recording errors in one place, and allowing any
 * number of "subscribers" to tell whether it has changed since a previous
 * point where it was sampled.
 *
 * It's implemented as an unsigned 32-bit value. The low order bits are
 * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits
 * are used as a counter. This is done with atomics instead of locking so that
 * these functions can be called from any context.
 *
 * The general idea is for consumers to sample an errseq_t value. That value
 * can later be used to tell whether any new errors have occurred since that
 * sampling was done.
 *
 * Note that there is a risk of collisions if new errors are being recorded
 * frequently, since we have so few bits to use as a counter.
 *
 * To mitigate this, one bit is used as a flag to tell whether the value has
 * been sampled since a new value was recorded. That allows us to avoid bumping
 * the counter if no one has sampled it since the last time an error was
 * recorded.
 *
 * A new errseq_t should always be zeroed out.  A errseq_t value of all zeroes
 * is the special (but common) case where there has never been an error. An all
 * zero value thus serves as the "epoch" if one wishes to know whether there
 * has ever been an error set since it was first initialized.
 */

/* The low bits are designated for error code (max of MAX_ERRNO) */
#define ERRSEQ_SHIFT                ilog2(MAX_ERRNO + 1)

/* This bit is used as a flag to indicate whether the value has been seen */
#define ERRSEQ_SEEN                (1 << ERRSEQ_SHIFT)

/* The lowest bit of the counter */
#define ERRSEQ_CTR_INC                (1 << (ERRSEQ_SHIFT + 1))

/**
 * errseq_set - set a errseq_t for later reporting
 * @eseq: errseq_t field that should be set
 * @err: error to set (must be between -1 and -MAX_ERRNO)
 *
 * This function sets the error in @eseq, and increments the sequence counter
 * if the last sequence was sampled at some point in the past.
 *
 * Any error set will always overwrite an existing error.
 *
 * Return: The previous value, primarily for debugging purposes. The
 * return value should not be used as a previously sampled value in later
 * calls as it will not have the SEEN flag set.
 */
errseq_t errseq_set(errseq_t *eseq, int err)
{
        errseq_t cur, old;

        /* MAX_ERRNO must be able to serve as a mask */
        BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1);

        /*
         * Ensure the error code actually fits where we want it to go. If it
         * doesn't then just throw a warning and don't record anything. We
         * also don't accept zero here as that would effectively clear a
         * previous error.
         */
        old = READ_ONCE(*eseq);

        if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO),
                                "err = %d\n", err))
                return old;

        for (;;) {
                errseq_t new;

                /* Clear out error bits and set new error */
                new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err;

                /* Only increment if someone has looked at it */
                if (old & ERRSEQ_SEEN)
                        new += ERRSEQ_CTR_INC;

                /* If there would be no change, then call it done */
                if (new == old) {
                        cur = new;
                        break;
                }

                /* Try to swap the new value into place */
                cur = cmpxchg(eseq, old, new);

                /*
                 * Call it success if we did the swap or someone else beat us
                 * to it for the same value.
                 */
                if (likely(cur == old || cur == new))
                        break;

                /* Raced with an update, try again */
                old = cur;
        }
        return cur;
}
EXPORT_SYMBOL(errseq_set);

/**
 * errseq_sample() - Grab current errseq_t value.
 * @eseq: Pointer to errseq_t to be sampled.
 *
 * This function allows callers to initialise their errseq_t variable.
 * If the error has been "seen", new callers will not see an old error.
 * If there is an unseen error in @eseq, the caller of this function will
 * see it the next time it checks for an error.
 *
 * Context: Any context.
 * Return: The current errseq value.
 */
errseq_t errseq_sample(errseq_t *eseq)
{
        errseq_t old = READ_ONCE(*eseq);

        /* If nobody has seen this error yet, then we can be the first. */
        if (!(old & ERRSEQ_SEEN))
                old = 0;
        return old;
}
EXPORT_SYMBOL(errseq_sample);

/**
 * errseq_check() - Has an error occurred since a particular sample point?
 * @eseq: Pointer to errseq_t value to be checked.
 * @since: Previously-sampled errseq_t from which to check.
 *
 * Grab the value that eseq points to, and see if it has changed @since
 * the given value was sampled. The @since value is not advanced, so there
 * is no need to mark the value as seen.
 *
 * Return: The latest error set in the errseq_t or 0 if it hasn't changed.
 */
int errseq_check(errseq_t *eseq, errseq_t since)
{
        errseq_t cur = READ_ONCE(*eseq);

        if (likely(cur == since))
                return 0;
        return -(cur & MAX_ERRNO);
}
EXPORT_SYMBOL(errseq_check);

/**
 * errseq_check_and_advance() - Check an errseq_t and advance to current value.
 * @eseq: Pointer to value being checked and reported.
 * @since: Pointer to previously-sampled errseq_t to check against and advance.
 *
 * Grab the eseq value, and see whether it matches the value that @since
 * points to. If it does, then just return 0.
 *
 * If it doesn't, then the value has changed. Set the "seen" flag, and try to
 * swap it into place as the new eseq value. Then, set that value as the new
 * "since" value, and return whatever the error portion is set to.
 *
 * Note that no locking is provided here for concurrent updates to the "since"
 * value. The caller must provide that if necessary. Because of this, callers
 * may want to do a lockless errseq_check before taking the lock and calling
 * this.
 *
 * Return: Negative errno if one has been stored, or 0 if no new error has
 * occurred.
 */
int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
{
        int err = 0;
        errseq_t old, new;

        /*
         * Most callers will want to use the inline wrapper to check this,
         * so that the common case of no error is handled without needing
         * to take the lock that protects the "since" value.
         */
        old = READ_ONCE(*eseq);
        if (old != *since) {
                /*
                 * Set the flag and try to swap it into place if it has
                 * changed.
                 *
                 * We don't care about the outcome of the swap here. If the
                 * swap doesn't occur, then it has either been updated by a
                 * writer who is altering the value in some way (updating
                 * counter or resetting the error), or another reader who is
                 * just setting the "seen" flag. Either outcome is OK, and we
                 * can advance "since" and return an error based on what we
                 * have.
                 */
                new = old | ERRSEQ_SEEN;
                if (new != old)
                        cmpxchg(eseq, old, new);
                *since = new;
                err = -(new & MAX_ERRNO);
        }
        return err;
}
EXPORT_SYMBOL(errseq_check_and_advance);


































































































































































































































































































































































































































































    5 

    5 
























    5 
    5 












    5 









































































    5 

    5 
    5 
























































































































































    5 
















































































    5 




    5 
    5 








    5 



    5 
    5 


    5 

















































    4 
    4 









    4 






































































































































    5 

    5 

    5 
    5 

    5 
    5 

    5 























    4 





    4 







    4 

    4 
    4 

    4 

    4 












   13 


   13 
    4 
















































   13 



   13 



















































































































    5 


    5 









    5 

    5 
    5 










    5 



    5 


    5 
















    5 





    5 
    5 




    5 
    5 

    5 


















   15 
    5 

    5 



    5 







   15 

    5 
    5 

    5 




    5 
    5 











































































































   13 
   13 

   13 






























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net/sched/sch_generic.c        Generic packet scheduler routines.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
#include <linux/skb_array.h>
#include <linux/if_macvlan.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/hotdata.h>
#include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h>

/* Qdisc to use by default */
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
EXPORT_SYMBOL(default_qdisc_ops);

static void qdisc_maybe_clear_missed(struct Qdisc *q,
                                     const struct netdev_queue *txq)
{
        clear_bit(__QDISC_STATE_MISSED, &q->state);

        /* Make sure the below netif_xmit_frozen_or_stopped()
         * checking happens after clearing STATE_MISSED.
         */
        smp_mb__after_atomic();

        /* Checking netif_xmit_frozen_or_stopped() again to
         * make sure STATE_MISSED is set if the STATE_MISSED
         * set by netif_tx_wake_queue()'s rescheduling of
         * net_tx_action() is cleared by the above clear_bit().
         */
        if (!netif_xmit_frozen_or_stopped(txq))
                set_bit(__QDISC_STATE_MISSED, &q->state);
        else
                set_bit(__QDISC_STATE_DRAINING, &q->state);
}

/* Main transmission queue. */

/* Modifications to data participating in scheduling must be protected with
 * qdisc_lock(qdisc) spinlock.
 *
 * The idea is the following:
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
 * - updates to tree and tree walking are only done under the rtnl mutex.
 */

#define SKB_XOFF_MAGIC ((struct sk_buff *)1UL)

static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
{
        const struct netdev_queue *txq = q->dev_queue;
        spinlock_t *lock = NULL;
        struct sk_buff *skb;

        if (q->flags & TCQ_F_NOLOCK) {
                lock = qdisc_lock(q);
                spin_lock(lock);
        }

        skb = skb_peek(&q->skb_bad_txq);
        if (skb) {
                /* check the reason of requeuing without tx lock first */
                txq = skb_get_tx_queue(txq->dev, skb);
                if (!netif_xmit_frozen_or_stopped(txq)) {
                        skb = __skb_dequeue(&q->skb_bad_txq);
                        if (qdisc_is_percpu_stats(q)) {
                                qdisc_qstats_cpu_backlog_dec(q, skb);
                                qdisc_qstats_cpu_qlen_dec(q);
                        } else {
                                qdisc_qstats_backlog_dec(q, skb);
                                q->q.qlen--;
                        }
                } else {
                        skb = SKB_XOFF_MAGIC;
                        qdisc_maybe_clear_missed(q, txq);
                }
        }

        if (lock)
                spin_unlock(lock);

        return skb;
}

static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
{
        struct sk_buff *skb = skb_peek(&q->skb_bad_txq);

        if (unlikely(skb))
                skb = __skb_dequeue_bad_txq(q);

        return skb;
}

static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
                                             struct sk_buff *skb)
{
        spinlock_t *lock = NULL;

        if (q->flags & TCQ_F_NOLOCK) {
                lock = qdisc_lock(q);
                spin_lock(lock);
        }

        __skb_queue_tail(&q->skb_bad_txq, skb);

        if (qdisc_is_percpu_stats(q)) {
                qdisc_qstats_cpu_backlog_inc(q, skb);
                qdisc_qstats_cpu_qlen_inc(q);
        } else {
                qdisc_qstats_backlog_inc(q, skb);
                q->q.qlen++;
        }

        if (lock)
                spin_unlock(lock);
}

static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
        spinlock_t *lock = NULL;

        if (q->flags & TCQ_F_NOLOCK) {
                lock = qdisc_lock(q);
                spin_lock(lock);
        }

        while (skb) {
                struct sk_buff *next = skb->next;

                __skb_queue_tail(&q->gso_skb, skb);

                /* it's still part of the queue */
                if (qdisc_is_percpu_stats(q)) {
                        qdisc_qstats_cpu_requeues_inc(q);
                        qdisc_qstats_cpu_backlog_inc(q, skb);
                        qdisc_qstats_cpu_qlen_inc(q);
                } else {
                        q->qstats.requeues++;
                        qdisc_qstats_backlog_inc(q, skb);
                        q->q.qlen++;
                }

                skb = next;
        }

        if (lock) {
                spin_unlock(lock);
                set_bit(__QDISC_STATE_MISSED, &q->state);
        } else {
                __netif_schedule(q);
        }
}

static void try_bulk_dequeue_skb(struct Qdisc *q,
                                 struct sk_buff *skb,
                                 const struct netdev_queue *txq,
                                 int *packets)
{
        int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;

        while (bytelimit > 0) {
                struct sk_buff *nskb = q->dequeue(q);

                if (!nskb)
                        break;

                bytelimit -= nskb->len; /* covers GSO len */
                skb->next = nskb;
                skb = nskb;
                (*packets)++; /* GSO counts as one pkt */
        }
        skb_mark_not_on_list(skb);
}

/* This variant of try_bulk_dequeue_skb() makes sure
 * all skbs in the chain are for the same txq
 */
static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
                                      struct sk_buff *skb,
                                      int *packets)
{
        int mapping = skb_get_queue_mapping(skb);
        struct sk_buff *nskb;
        int cnt = 0;

        do {
                nskb = q->dequeue(q);
                if (!nskb)
                        break;
                if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
                        qdisc_enqueue_skb_bad_txq(q, nskb);
                        break;
                }
                skb->next = nskb;
                skb = nskb;
        } while (++cnt < 8);
        (*packets) += cnt;
        skb_mark_not_on_list(skb);
}

/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
 * A requeued skb (via q->gso_skb) can also be a SKB list.
 */
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
                                   int *packets)
{
        const struct netdev_queue *txq = q->dev_queue;
        struct sk_buff *skb = NULL;

        *packets = 1;
        if (unlikely(!skb_queue_empty(&q->gso_skb))) {
                spinlock_t *lock = NULL;

                if (q->flags & TCQ_F_NOLOCK) {
                        lock = qdisc_lock(q);
                        spin_lock(lock);
                }

                skb = skb_peek(&q->gso_skb);

                /* skb may be null if another cpu pulls gso_skb off in between
                 * empty check and lock.
                 */
                if (!skb) {
                        if (lock)
                                spin_unlock(lock);
                        goto validate;
                }

                /* skb in gso_skb were already validated */
                *validate = false;
                if (xfrm_offload(skb))
                        *validate = true;
                /* check the reason of requeuing without tx lock first */
                txq = skb_get_tx_queue(txq->dev, skb);
                if (!netif_xmit_frozen_or_stopped(txq)) {
                        skb = __skb_dequeue(&q->gso_skb);
                        if (qdisc_is_percpu_stats(q)) {
                                qdisc_qstats_cpu_backlog_dec(q, skb);
                                qdisc_qstats_cpu_qlen_dec(q);
                        } else {
                                qdisc_qstats_backlog_dec(q, skb);
                                q->q.qlen--;
                        }
                } else {
                        skb = NULL;
                        qdisc_maybe_clear_missed(q, txq);
                }
                if (lock)
                        spin_unlock(lock);
                goto trace;
        }
validate:
        *validate = true;

        if ((q->flags & TCQ_F_ONETXQUEUE) &&
            netif_xmit_frozen_or_stopped(txq)) {
                qdisc_maybe_clear_missed(q, txq);
                return skb;
        }

        skb = qdisc_dequeue_skb_bad_txq(q);
        if (unlikely(skb)) {
                if (skb == SKB_XOFF_MAGIC)
                        return NULL;
                goto bulk;
        }
        skb = q->dequeue(q);
        if (skb) {
bulk:
                if (qdisc_may_bulk(q))
                        try_bulk_dequeue_skb(q, skb, txq, packets);
                else
                        try_bulk_dequeue_skb_slow(q, skb, packets);
        }
trace:
        trace_qdisc_dequeue(q, txq, *packets, skb);
        return skb;
}

/*
 * Transmit possibly several skbs, and handle the return status as
 * required. Owning qdisc running bit guarantees that only one CPU
 * can execute this function.
 *
 * Returns to the caller:
 *                                false  - hardware queue frozen backoff
 *                                true   - feel free to send more pkts
 */
bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
                     struct net_device *dev, struct netdev_queue *txq,
                     spinlock_t *root_lock, bool validate)
{
        int ret = NETDEV_TX_BUSY;
        bool again = false;

        /* And release qdisc */
        if (root_lock)
                spin_unlock(root_lock);

        /* Note that we validate skb (GSO, checksum, ...) outside of locks */
        if (validate)
                skb = validate_xmit_skb_list(skb, dev, &again);

#ifdef CONFIG_XFRM_OFFLOAD
        if (unlikely(again)) {
                if (root_lock)
                        spin_lock(root_lock);

                dev_requeue_skb(skb, q);
                return false;
        }
#endif

        if (likely(skb)) {
                HARD_TX_LOCK(dev, txq, smp_processor_id());
                if (!netif_xmit_frozen_or_stopped(txq))
                        skb = dev_hard_start_xmit(skb, dev, txq, &ret);
                else
                        qdisc_maybe_clear_missed(q, txq);

                HARD_TX_UNLOCK(dev, txq);
        } else {
                if (root_lock)
                        spin_lock(root_lock);
                return true;
        }

        if (root_lock)
                spin_lock(root_lock);

        if (!dev_xmit_complete(ret)) {
                /* Driver returned NETDEV_TX_BUSY - requeue skb */
                if (unlikely(ret != NETDEV_TX_BUSY))
                        net_warn_ratelimited("BUG %s code %d qlen %d\n",
                                             dev->name, ret, q->q.qlen);

                dev_requeue_skb(skb, q);
                return false;
        }

        return true;
}

/*
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
 *
 * running seqcount guarantees only one CPU can process
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *                                0  - queue is empty or throttled.
 *                                >0 - queue is not empty.
 *
 */
static inline bool qdisc_restart(struct Qdisc *q, int *packets)
{
        spinlock_t *root_lock = NULL;
        struct netdev_queue *txq;
        struct net_device *dev;
        struct sk_buff *skb;
        bool validate;

        /* Dequeue packet */
        skb = dequeue_skb(q, &validate, packets);
        if (unlikely(!skb))
                return false;

        if (!(q->flags & TCQ_F_NOLOCK))
                root_lock = qdisc_lock(q);

        dev = qdisc_dev(q);
        txq = skb_get_tx_queue(dev, skb);

        return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
}

void __qdisc_run(struct Qdisc *q)
{
        int quota = READ_ONCE(net_hotdata.dev_tx_weight);
        int packets;

        while (qdisc_restart(q, &packets)) {
                quota -= packets;
                if (quota <= 0) {
                        if (q->flags & TCQ_F_NOLOCK)
                                set_bit(__QDISC_STATE_MISSED, &q->state);
                        else
                                __netif_schedule(q);

                        break;
                }
        }
}

unsigned long dev_trans_start(struct net_device *dev)
{
        unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
        unsigned long val;
        unsigned int i;

        for (i = 1; i < dev->num_tx_queues; i++) {
                val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
                if (val && time_after(val, res))
                        res = val;
        }

        return res;
}
EXPORT_SYMBOL(dev_trans_start);

static void netif_freeze_queues(struct net_device *dev)
{
        unsigned int i;
        int cpu;

        cpu = smp_processor_id();
        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                /* We are the only thread of execution doing a
                 * freeze, but we have to grab the _xmit_lock in
                 * order to synchronize with threads which are in
                 * the ->hard_start_xmit() handler and already
                 * checked the frozen bit.
                 */
                __netif_tx_lock(txq, cpu);
                set_bit(__QUEUE_STATE_FROZEN, &txq->state);
                __netif_tx_unlock(txq);
        }
}

void netif_tx_lock(struct net_device *dev)
{
        spin_lock(&dev->tx_global_lock);
        netif_freeze_queues(dev);
}
EXPORT_SYMBOL(netif_tx_lock);

static void netif_unfreeze_queues(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                /* No need to grab the _xmit_lock here.  If the
                 * queue is not stopped for another reason, we
                 * force a schedule.
                 */
                clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
                netif_schedule_queue(txq);
        }
}

void netif_tx_unlock(struct net_device *dev)
{
        netif_unfreeze_queues(dev);
        spin_unlock(&dev->tx_global_lock);
}
EXPORT_SYMBOL(netif_tx_unlock);

static void dev_watchdog(struct timer_list *t)
{
        struct net_device *dev = from_timer(dev, t, watchdog_timer);
        bool release = true;

        spin_lock(&dev->tx_global_lock);
        if (!qdisc_tx_is_noop(dev)) {
                if (netif_device_present(dev) &&
                    netif_running(dev) &&
                    netif_carrier_ok(dev)) {
                        unsigned int timedout_ms = 0;
                        unsigned int i;
                        unsigned long trans_start;

                        for (i = 0; i < dev->num_tx_queues; i++) {
                                struct netdev_queue *txq;

                                txq = netdev_get_tx_queue(dev, i);
                                trans_start = READ_ONCE(txq->trans_start);
                                if (netif_xmit_stopped(txq) &&
                                    time_after(jiffies, (trans_start +
                                                         dev->watchdog_timeo))) {
                                        timedout_ms = jiffies_to_msecs(jiffies - trans_start);
                                        atomic_long_inc(&txq->trans_timeout);
                                        break;
                                }
                        }

                        if (unlikely(timedout_ms)) {
                                trace_net_dev_xmit_timeout(dev, i);
                                netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
                                            raw_smp_processor_id(),
                                            i, timedout_ms);
                                netif_freeze_queues(dev);
                                dev->netdev_ops->ndo_tx_timeout(dev, i);
                                netif_unfreeze_queues(dev);
                        }
                        if (!mod_timer(&dev->watchdog_timer,
                                       round_jiffies(jiffies +
                                                     dev->watchdog_timeo)))
                                release = false;
                }
        }
        spin_unlock(&dev->tx_global_lock);

        if (release)
                netdev_put(dev, &dev->watchdog_dev_tracker);
}

void __netdev_watchdog_up(struct net_device *dev)
{
        if (dev->netdev_ops->ndo_tx_timeout) {
                if (dev->watchdog_timeo <= 0)
                        dev->watchdog_timeo = 5*HZ;
                if (!mod_timer(&dev->watchdog_timer,
                               round_jiffies(jiffies + dev->watchdog_timeo)))
                        netdev_hold(dev, &dev->watchdog_dev_tracker,
                                    GFP_ATOMIC);
        }
}
EXPORT_SYMBOL_GPL(__netdev_watchdog_up);

static void dev_watchdog_up(struct net_device *dev)
{
        __netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
        netif_tx_lock_bh(dev);
        if (del_timer(&dev->watchdog_timer))
                netdev_put(dev, &dev->watchdog_dev_tracker);
        netif_tx_unlock_bh(dev);
}

/**
 *        netif_carrier_on - set carrier
 *        @dev: network device
 *
 * Device has detected acquisition of carrier.
 */
void netif_carrier_on(struct net_device *dev)
{
        if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
                if (dev->reg_state == NETREG_UNINITIALIZED)
                        return;
                atomic_inc(&dev->carrier_up_count);
                linkwatch_fire_event(dev);
                if (netif_running(dev))
                        __netdev_watchdog_up(dev);
        }
}
EXPORT_SYMBOL(netif_carrier_on);

/**
 *        netif_carrier_off - clear carrier
 *        @dev: network device
 *
 * Device has detected loss of carrier.
 */
void netif_carrier_off(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
                if (dev->reg_state == NETREG_UNINITIALIZED)
                        return;
                atomic_inc(&dev->carrier_down_count);
                linkwatch_fire_event(dev);
        }
}
EXPORT_SYMBOL(netif_carrier_off);

/**
 *        netif_carrier_event - report carrier state event
 *        @dev: network device
 *
 * Device has detected a carrier event but the carrier state wasn't changed.
 * Use in drivers when querying carrier state asynchronously, to avoid missing
 * events (link flaps) if link recovers before it's queried.
 */
void netif_carrier_event(struct net_device *dev)
{
        if (dev->reg_state == NETREG_UNINITIALIZED)
                return;
        atomic_inc(&dev->carrier_up_count);
        atomic_inc(&dev->carrier_down_count);
        linkwatch_fire_event(dev);
}
EXPORT_SYMBOL_GPL(netif_carrier_event);

/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
                        struct sk_buff **to_free)
{
        __qdisc_drop(skb, to_free);
        return NET_XMIT_CN;
}

static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
{
        return NULL;
}

struct Qdisc_ops noop_qdisc_ops __read_mostly = {
        .id                =        "noop",
        .priv_size        =        0,
        .enqueue        =        noop_enqueue,
        .dequeue        =        noop_dequeue,
        .peek                =        noop_dequeue,
        .owner                =        THIS_MODULE,
};

static struct netdev_queue noop_netdev_queue = {
        RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
        RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc),
};

struct Qdisc noop_qdisc = {
        .enqueue        =        noop_enqueue,
        .dequeue        =        noop_dequeue,
        .flags                =        TCQ_F_BUILTIN,
        .ops                =        &noop_qdisc_ops,
        .q.lock                =        __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
        .dev_queue        =        &noop_netdev_queue,
        .busylock        =        __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
        .gso_skb = {
                .next = (struct sk_buff *)&noop_qdisc.gso_skb,
                .prev = (struct sk_buff *)&noop_qdisc.gso_skb,
                .qlen = 0,
                .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
        },
        .skb_bad_txq = {
                .next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
                .prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
                .qlen = 0,
                .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
        },
};
EXPORT_SYMBOL(noop_qdisc);

static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
                        struct netlink_ext_ack *extack)
{
        /* register_qdisc() assigns a default of noop_enqueue if unset,
         * but __dev_queue_xmit() treats noqueue only as such
         * if this is NULL - so clear it here. */
        qdisc->enqueue = NULL;
        return 0;
}

struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
        .id                =        "noqueue",
        .priv_size        =        0,
        .init                =        noqueue_init,
        .enqueue        =        noop_enqueue,
        .dequeue        =        noop_dequeue,
        .peek                =        noop_dequeue,
        .owner                =        THIS_MODULE,
};

const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = {
        1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
};
EXPORT_SYMBOL(sch_default_prio2band);

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

/*
 * Private data for a pfifo_fast scheduler containing:
 *        - rings for priority bands
 */
struct pfifo_fast_priv {
        struct skb_array q[PFIFO_FAST_BANDS];
};

static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
                                          int band)
{
        return &priv->q[band];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
                              struct sk_buff **to_free)
{
        int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX];
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        struct skb_array *q = band2list(priv, band);
        unsigned int pkt_len = qdisc_pkt_len(skb);
        int err;

        err = skb_array_produce(q, skb);

        if (unlikely(err)) {
                if (qdisc_is_percpu_stats(qdisc))
                        return qdisc_drop_cpu(skb, qdisc, to_free);
                else
                        return qdisc_drop(skb, qdisc, to_free);
        }

        qdisc_update_stats_at_enqueue(qdisc, pkt_len);
        return NET_XMIT_SUCCESS;
}

static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        struct sk_buff *skb = NULL;
        bool need_retry = true;
        int band;

retry:
        for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
                struct skb_array *q = band2list(priv, band);

                if (__skb_array_empty(q))
                        continue;

                skb = __skb_array_consume(q);
        }
        if (likely(skb)) {
                qdisc_update_stats_at_dequeue(qdisc, skb);
        } else if (need_retry &&
                   READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
                /* Delay clearing the STATE_MISSED here to reduce
                 * the overhead of the second spin_trylock() in
                 * qdisc_run_begin() and __netif_schedule() calling
                 * in qdisc_run_end().
                 */
                clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
                clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);

                /* Make sure dequeuing happens after clearing
                 * STATE_MISSED.
                 */
                smp_mb__after_atomic();

                need_retry = false;

                goto retry;
        }

        return skb;
}

static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
{
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        struct sk_buff *skb = NULL;
        int band;

        for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
                struct skb_array *q = band2list(priv, band);

                skb = __skb_array_peek(q);
        }

        return skb;
}

static void pfifo_fast_reset(struct Qdisc *qdisc)
{
        int i, band;
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);

        for (band = 0; band < PFIFO_FAST_BANDS; band++) {
                struct skb_array *q = band2list(priv, band);
                struct sk_buff *skb;

                /* NULL ring is possible if destroy path is due to a failed
                 * skb_array_init() in pfifo_fast_init() case.
                 */
                if (!q->ring.queue)
                        continue;

                while ((skb = __skb_array_consume(q)) != NULL)
                        kfree_skb(skb);
        }

        if (qdisc_is_percpu_stats(qdisc)) {
                for_each_possible_cpu(i) {
                        struct gnet_stats_queue *q;

                        q = per_cpu_ptr(qdisc->cpu_qstats, i);
                        q->backlog = 0;
                        q->qlen = 0;
                }
        }
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
        struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

        memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1);
        if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
                goto nla_put_failure;
        return skb->len;

nla_put_failure:
        return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
                           struct netlink_ext_ack *extack)
{
        unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        int prio;

        /* guard against zero length rings */
        if (!qlen)
                return -EINVAL;

        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
                struct skb_array *q = band2list(priv, prio);
                int err;

                err = skb_array_init(q, qlen, GFP_KERNEL);
                if (err)
                        return -ENOMEM;
        }

        /* Can by-pass the queue discipline */
        qdisc->flags |= TCQ_F_CAN_BYPASS;
        return 0;
}

static void pfifo_fast_destroy(struct Qdisc *sch)
{
        struct pfifo_fast_priv *priv = qdisc_priv(sch);
        int prio;

        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
                struct skb_array *q = band2list(priv, prio);

                /* NULL ring is possible if destroy path is due to a failed
                 * skb_array_init() in pfifo_fast_init() case.
                 */
                if (!q->ring.queue)
                        continue;
                /* Destroy ring but no need to kfree_skb because a call to
                 * pfifo_fast_reset() has already done that work.
                 */
                ptr_ring_cleanup(&q->ring, NULL);
        }
}

static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
                                          unsigned int new_len)
{
        struct pfifo_fast_priv *priv = qdisc_priv(sch);
        struct skb_array *bands[PFIFO_FAST_BANDS];
        int prio;

        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
                struct skb_array *q = band2list(priv, prio);

                bands[prio] = q;
        }

        return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
                                         GFP_KERNEL);
}

struct Qdisc_ops pfifo_fast_ops __read_mostly = {
        .id                =        "pfifo_fast",
        .priv_size        =        sizeof(struct pfifo_fast_priv),
        .enqueue        =        pfifo_fast_enqueue,
        .dequeue        =        pfifo_fast_dequeue,
        .peek                =        pfifo_fast_peek,
        .init                =        pfifo_fast_init,
        .destroy        =        pfifo_fast_destroy,
        .reset                =        pfifo_fast_reset,
        .dump                =        pfifo_fast_dump,
        .change_tx_queue_len =  pfifo_fast_change_tx_queue_len,
        .owner                =        THIS_MODULE,
        .static_flags        =        TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
};
EXPORT_SYMBOL(pfifo_fast_ops);

static struct lock_class_key qdisc_tx_busylock;

struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          const struct Qdisc_ops *ops,
                          struct netlink_ext_ack *extack)
{
        struct Qdisc *sch;
        unsigned int size = sizeof(*sch) + ops->priv_size;
        int err = -ENOBUFS;
        struct net_device *dev;

        if (!dev_queue) {
                NL_SET_ERR_MSG(extack, "No device queue given");
                err = -EINVAL;
                goto errout;
        }

        dev = dev_queue->dev;
        sch = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue));

        if (!sch)
                goto errout;
        __skb_queue_head_init(&sch->gso_skb);
        __skb_queue_head_init(&sch->skb_bad_txq);
        gnet_stats_basic_sync_init(&sch->bstats);
        spin_lock_init(&sch->q.lock);

        if (ops->static_flags & TCQ_F_CPUSTATS) {
                sch->cpu_bstats =
                        netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
                if (!sch->cpu_bstats)
                        goto errout1;

                sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
                if (!sch->cpu_qstats) {
                        free_percpu(sch->cpu_bstats);
                        goto errout1;
                }
        }

        spin_lock_init(&sch->busylock);
        lockdep_set_class(&sch->busylock,
                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);

        /* seqlock has the same scope of busylock, for NOLOCK qdisc */
        spin_lock_init(&sch->seqlock);
        lockdep_set_class(&sch->seqlock,
                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);

        sch->ops = ops;
        sch->flags = ops->static_flags;
        sch->enqueue = ops->enqueue;
        sch->dequeue = ops->dequeue;
        sch->dev_queue = dev_queue;
        sch->owner = -1;
        netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
        refcount_set(&sch->refcnt, 1);

        return sch;
errout1:
        kfree(sch);
errout:
        return ERR_PTR(err);
}

struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
                                const struct Qdisc_ops *ops,
                                unsigned int parentid,
                                struct netlink_ext_ack *extack)
{
        struct Qdisc *sch;

        if (!try_module_get(ops->owner)) {
                NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
                return NULL;
        }

        sch = qdisc_alloc(dev_queue, ops, extack);
        if (IS_ERR(sch)) {
                module_put(ops->owner);
                return NULL;
        }
        sch->parent = parentid;

        if (!ops->init || ops->init(sch, NULL, extack) == 0) {
                trace_qdisc_create(ops, dev_queue->dev, parentid);
                return sch;
        }

        qdisc_put(sch);
        return NULL;
}
EXPORT_SYMBOL(qdisc_create_dflt);

/* Under qdisc_lock(qdisc) and BH! */

void qdisc_reset(struct Qdisc *qdisc)
{
        const struct Qdisc_ops *ops = qdisc->ops;

        trace_qdisc_reset(qdisc);

        if (ops->reset)
                ops->reset(qdisc);

        __skb_queue_purge(&qdisc->gso_skb);
        __skb_queue_purge(&qdisc->skb_bad_txq);

        qdisc->q.qlen = 0;
        qdisc->qstats.backlog = 0;
}
EXPORT_SYMBOL(qdisc_reset);

void qdisc_free(struct Qdisc *qdisc)
{
        if (qdisc_is_percpu_stats(qdisc)) {
                free_percpu(qdisc->cpu_bstats);
                free_percpu(qdisc->cpu_qstats);
        }

        kfree(qdisc);
}

static void qdisc_free_cb(struct rcu_head *head)
{
        struct Qdisc *q = container_of(head, struct Qdisc, rcu);

        qdisc_free(q);
}

static void __qdisc_destroy(struct Qdisc *qdisc)
{
        const struct Qdisc_ops  *ops = qdisc->ops;
        struct net_device *dev = qdisc_dev(qdisc);

#ifdef CONFIG_NET_SCHED
        qdisc_hash_del(qdisc);

        qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
        gen_kill_estimator(&qdisc->rate_est);

        qdisc_reset(qdisc);


        if (ops->destroy)
                ops->destroy(qdisc);

        module_put(ops->owner);
        netdev_put(dev, &qdisc->dev_tracker);

        trace_qdisc_destroy(qdisc);

        call_rcu(&qdisc->rcu, qdisc_free_cb);
}

void qdisc_destroy(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN)
                return;

        __qdisc_destroy(qdisc);
}

void qdisc_put(struct Qdisc *qdisc)
{
        if (!qdisc)
                return;

        if (qdisc->flags & TCQ_F_BUILTIN ||
            !refcount_dec_and_test(&qdisc->refcnt))
                return;

        __qdisc_destroy(qdisc);
}
EXPORT_SYMBOL(qdisc_put);

/* Version of qdisc_put() that is called with rtnl mutex unlocked.
 * Intended to be used as optimization, this function only takes rtnl lock if
 * qdisc reference counter reached zero.
 */

void qdisc_put_unlocked(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN ||
            !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
                return;

        __qdisc_destroy(qdisc);
        rtnl_unlock();
}
EXPORT_SYMBOL(qdisc_put_unlocked);

/* Attach toplevel qdisc to device queue. */
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
                              struct Qdisc *qdisc)
{
        struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
        spinlock_t *root_lock;

        root_lock = qdisc_lock(oqdisc);
        spin_lock_bh(root_lock);

        /* ... and graft new one */
        if (qdisc == NULL)
                qdisc = &noop_qdisc;
        rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
        rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);

        spin_unlock_bh(root_lock);

        return oqdisc;
}
EXPORT_SYMBOL(dev_graft_qdisc);

static void shutdown_scheduler_queue(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
                                     void *_qdisc_default)
{
        struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
        struct Qdisc *qdisc_default = _qdisc_default;

        if (qdisc) {
                rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
                rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default);

                qdisc_put(qdisc);
        }
}

static void attach_one_default_qdisc(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
                                     void *_unused)
{
        struct Qdisc *qdisc;
        const struct Qdisc_ops *ops = default_qdisc_ops;

        if (dev->priv_flags & IFF_NO_QUEUE)
                ops = &noqueue_qdisc_ops;
        else if(dev->type == ARPHRD_CAN)
                ops = &pfifo_fast_ops;

        qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
        if (!qdisc)
                return;

        if (!netif_is_multiqueue(dev))
                qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
        rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}

static void attach_default_qdiscs(struct net_device *dev)
{
        struct netdev_queue *txq;
        struct Qdisc *qdisc;

        txq = netdev_get_tx_queue(dev, 0);

        if (!netif_is_multiqueue(dev) ||
            dev->priv_flags & IFF_NO_QUEUE) {
                netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
                qdisc = rtnl_dereference(txq->qdisc_sleeping);
                rcu_assign_pointer(dev->qdisc, qdisc);
                qdisc_refcount_inc(qdisc);
        } else {
                qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
                if (qdisc) {
                        rcu_assign_pointer(dev->qdisc, qdisc);
                        qdisc->ops->attach(qdisc);
                }
        }
        qdisc = rtnl_dereference(dev->qdisc);

        /* Detect default qdisc setup/init failed and fallback to "noqueue" */
        if (qdisc == &noop_qdisc) {
                netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
                            default_qdisc_ops->id, noqueue_qdisc_ops.id);
                netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
                dev->priv_flags |= IFF_NO_QUEUE;
                netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
                qdisc = rtnl_dereference(txq->qdisc_sleeping);
                rcu_assign_pointer(dev->qdisc, qdisc);
                qdisc_refcount_inc(qdisc);
                dev->priv_flags ^= IFF_NO_QUEUE;
        }

#ifdef CONFIG_NET_SCHED
        if (qdisc != &noop_qdisc)
                qdisc_hash_add(qdisc, false);
#endif
}

static void transition_one_qdisc(struct net_device *dev,
                                 struct netdev_queue *dev_queue,
                                 void *_need_watchdog)
{
        struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
        int *need_watchdog_p = _need_watchdog;

        if (!(new_qdisc->flags & TCQ_F_BUILTIN))
                clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

        rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
        if (need_watchdog_p) {
                WRITE_ONCE(dev_queue->trans_start, 0);
                *need_watchdog_p = 1;
        }
}

void dev_activate(struct net_device *dev)
{
        int need_watchdog;

        /* No queueing discipline is attached to device;
         * create default one for devices, which need queueing
         * and noqueue_qdisc for virtual interfaces
         */

        if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
                attach_default_qdiscs(dev);

        if (!netif_carrier_ok(dev))
                /* Delay activation until next carrier-on event */
                return;

        need_watchdog = 0;
        netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
        if (dev_ingress_queue(dev))
                transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);

        if (need_watchdog) {
                netif_trans_update(dev);
                dev_watchdog_up(dev);
        }
}
EXPORT_SYMBOL(dev_activate);

static void qdisc_deactivate(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN)
                return;

        set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
}

static void dev_deactivate_queue(struct net_device *dev,
                                 struct netdev_queue *dev_queue,
                                 void *_qdisc_default)
{
        struct Qdisc *qdisc_default = _qdisc_default;
        struct Qdisc *qdisc;

        qdisc = rtnl_dereference(dev_queue->qdisc);
        if (qdisc) {
                qdisc_deactivate(qdisc);
                rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
        }
}

static void dev_reset_queue(struct net_device *dev,
                            struct netdev_queue *dev_queue,
                            void *_unused)
{
        struct Qdisc *qdisc;
        bool nolock;

        qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
        if (!qdisc)
                return;

        nolock = qdisc->flags & TCQ_F_NOLOCK;

        if (nolock)
                spin_lock_bh(&qdisc->seqlock);
        spin_lock_bh(qdisc_lock(qdisc));

        qdisc_reset(qdisc);

        spin_unlock_bh(qdisc_lock(qdisc));
        if (nolock) {
                clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
                clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
                spin_unlock_bh(&qdisc->seqlock);
        }
}

static bool some_qdisc_is_busy(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *dev_queue;
                spinlock_t *root_lock;
                struct Qdisc *q;
                int val;

                dev_queue = netdev_get_tx_queue(dev, i);
                q = rtnl_dereference(dev_queue->qdisc_sleeping);

                root_lock = qdisc_lock(q);
                spin_lock_bh(root_lock);

                val = (qdisc_is_running(q) ||
                       test_bit(__QDISC_STATE_SCHED, &q->state));

                spin_unlock_bh(root_lock);

                if (val)
                        return true;
        }
        return false;
}

/**
 *         dev_deactivate_many - deactivate transmissions on several devices
 *         @head: list of devices to deactivate
 *
 *        This function returns only when all outstanding transmissions
 *        have completed, unless all devices are in dismantle phase.
 */
void dev_deactivate_many(struct list_head *head)
{
        struct net_device *dev;

        list_for_each_entry(dev, head, close_list) {
                netdev_for_each_tx_queue(dev, dev_deactivate_queue,
                                         &noop_qdisc);
                if (dev_ingress_queue(dev))
                        dev_deactivate_queue(dev, dev_ingress_queue(dev),
                                             &noop_qdisc);

                dev_watchdog_down(dev);
        }

        /* Wait for outstanding qdisc-less dev_queue_xmit calls or
         * outstanding qdisc enqueuing calls.
         * This is avoided if all devices are in dismantle phase :
         * Caller will call synchronize_net() for us
         */
        synchronize_net();

        list_for_each_entry(dev, head, close_list) {
                netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);

                if (dev_ingress_queue(dev))
                        dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
        }

        /* Wait for outstanding qdisc_run calls. */
        list_for_each_entry(dev, head, close_list) {
                while (some_qdisc_is_busy(dev)) {
                        /* wait_event() would avoid this sleep-loop but would
                         * require expensive checks in the fast paths of packet
                         * processing which isn't worth it.
                         */
                        schedule_timeout_uninterruptible(1);
                }
        }
}

void dev_deactivate(struct net_device *dev)
{
        LIST_HEAD(single);

        list_add(&dev->close_list, &single);
        dev_deactivate_many(&single);
        list_del(&single);
}
EXPORT_SYMBOL(dev_deactivate);

static int qdisc_change_tx_queue_len(struct net_device *dev,
                                     struct netdev_queue *dev_queue)
{
        struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
        const struct Qdisc_ops *ops = qdisc->ops;

        if (ops->change_tx_queue_len)
                return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
        return 0;
}

void dev_qdisc_change_real_num_tx(struct net_device *dev,
                                  unsigned int new_real_tx)
{
        struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);

        if (qdisc->ops->change_real_num_tx)
                qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
}

void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
{
#ifdef CONFIG_NET_SCHED
        struct net_device *dev = qdisc_dev(sch);
        struct Qdisc *qdisc;
        unsigned int i;

        for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
                qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
                /* Only update the default qdiscs we created,
                 * qdiscs with handles are always hashed.
                 */
                if (qdisc != &noop_qdisc && !qdisc->handle)
                        qdisc_hash_del(qdisc);
        }
        for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
                qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
                if (qdisc != &noop_qdisc && !qdisc->handle)
                        qdisc_hash_add(qdisc, false);
        }
#endif
}
EXPORT_SYMBOL(mq_change_real_num_tx);

int dev_qdisc_change_tx_queue_len(struct net_device *dev)
{
        bool up = dev->flags & IFF_UP;
        unsigned int i;
        int ret = 0;

        if (up)
                dev_deactivate(dev);

        for (i = 0; i < dev->num_tx_queues; i++) {
                ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);

                /* TODO: revert changes on a partial failure */
                if (ret)
                        break;
        }

        if (up)
                dev_activate(dev);
        return ret;
}

static void dev_init_scheduler_queue(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
                                     void *_qdisc)
{
        struct Qdisc *qdisc = _qdisc;

        rcu_assign_pointer(dev_queue->qdisc, qdisc);
        rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}

void dev_init_scheduler(struct net_device *dev)
{
        rcu_assign_pointer(dev->qdisc, &noop_qdisc);
        netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
        if (dev_ingress_queue(dev))
                dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);

        timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}

void dev_shutdown(struct net_device *dev)
{
        netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
        if (dev_ingress_queue(dev))
                shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
        qdisc_put(rtnl_dereference(dev->qdisc));
        rcu_assign_pointer(dev->qdisc, &noop_qdisc);

        WARN_ON(timer_pending(&dev->watchdog_timer));
}

/**
 * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
 * @rate:   Rate to compute reciprocal division values of
 * @mult:   Multiplier for reciprocal division
 * @shift:  Shift for reciprocal division
 *
 * The multiplier and shift for reciprocal division by rate are stored
 * in mult and shift.
 *
 * The deal here is to replace a divide by a reciprocal one
 * in fast path (a reciprocal divide is a multiply and a shift)
 *
 * Normal formula would be :
 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
 *
 * We compute mult/shift to use instead :
 *  time_in_ns = (len * mult) >> shift;
 *
 * We try to get the highest possible mult value for accuracy,
 * but have to make sure no overflows will ever happen.
 *
 * reciprocal_value() is not used here it doesn't handle 64-bit values.
 */
static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
{
        u64 factor = NSEC_PER_SEC;

        *mult = 1;
        *shift = 0;

        if (rate <= 0)
                return;

        for (;;) {
                *mult = div64_u64(factor, rate);
                if (*mult & (1U << 31) || factor & (1ULL << 63))
                        break;
                factor <<= 1;
                (*shift)++;
        }
}

void psched_ratecfg_precompute(struct psched_ratecfg *r,
                               const struct tc_ratespec *conf,
                               u64 rate64)
{
        memset(r, 0, sizeof(*r));
        r->overhead = conf->overhead;
        r->mpu = conf->mpu;
        r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
        r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
        psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
}
EXPORT_SYMBOL(psched_ratecfg_precompute);

void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
{
        r->rate_pkts_ps = pktrate64;
        psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
}
EXPORT_SYMBOL(psched_ppscfg_precompute);

void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
                          struct tcf_proto *tp_head)
{
        /* Protected with chain0->filter_chain_lock.
         * Can't access chain directly because tp_head can be NULL.
         */
        struct mini_Qdisc *miniq_old =
                rcu_dereference_protected(*miniqp->p_miniq, 1);
        struct mini_Qdisc *miniq;

        if (!tp_head) {
                RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
        } else {
                miniq = miniq_old != &miniqp->miniq1 ?
                        &miniqp->miniq1 : &miniqp->miniq2;

                /* We need to make sure that readers won't see the miniq
                 * we are about to modify. So ensure that at least one RCU
                 * grace period has elapsed since the miniq was made
                 * inactive.
                 */
                if (IS_ENABLED(CONFIG_PREEMPT_RT))
                        cond_synchronize_rcu(miniq->rcu_state);
                else if (!poll_state_synchronize_rcu(miniq->rcu_state))
                        synchronize_rcu_expedited();

                miniq->filter_list = tp_head;
                rcu_assign_pointer(*miniqp->p_miniq, miniq);
        }

        if (miniq_old)
                /* This is counterpart of the rcu sync above. We need to
                 * block potential new user of miniq_old until all readers
                 * are not seeing it.
                 */
                miniq_old->rcu_state = start_poll_synchronize_rcu();
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);

void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
                                struct tcf_block *block)
{
        miniqp->miniq1.block = block;
        miniqp->miniq2.block = block;
}
EXPORT_SYMBOL(mini_qdisc_pair_block_init);

void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
                          struct mini_Qdisc __rcu **p_miniq)
{
        miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
        miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
        miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
        miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
        miniqp->miniq1.rcu_state = get_state_synchronize_rcu();
        miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state;
        miniqp->p_miniq = p_miniq;
}
EXPORT_SYMBOL(mini_qdisc_pair_init);



























































































































































































































































































































































































































































    9 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Media entity
 *
 * Copyright (C) 2010 Nokia Corporation
 *
 * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 *             Sakari Ailus <sakari.ailus@iki.fi>
 */

#ifndef _MEDIA_ENTITY_H
#define _MEDIA_ENTITY_H

#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/container_of.h>
#include <linux/fwnode.h>
#include <linux/list.h>
#include <linux/media.h>
#include <linux/minmax.h>
#include <linux/types.h>

/* Enums used internally at the media controller to represent graphs */

/**
 * enum media_gobj_type - type of a graph object
 *
 * @MEDIA_GRAPH_ENTITY:                Identify a media entity
 * @MEDIA_GRAPH_PAD:                Identify a media pad
 * @MEDIA_GRAPH_LINK:                Identify a media link
 * @MEDIA_GRAPH_INTF_DEVNODE:        Identify a media Kernel API interface via
 *                                a device node
 */
enum media_gobj_type {
        MEDIA_GRAPH_ENTITY,
        MEDIA_GRAPH_PAD,
        MEDIA_GRAPH_LINK,
        MEDIA_GRAPH_INTF_DEVNODE,
};

#define MEDIA_BITS_PER_TYPE                8
#define MEDIA_BITS_PER_ID                (32 - MEDIA_BITS_PER_TYPE)
#define MEDIA_ID_MASK                         GENMASK_ULL(MEDIA_BITS_PER_ID - 1, 0)

/* Structs to represent the objects that belong to a media graph */

/**
 * struct media_gobj - Define a graph object.
 *
 * @mdev:        Pointer to the struct &media_device that owns the object
 * @id:                Non-zero object ID identifier. The ID should be unique
 *                inside a media_device, as it is composed by
 *                %MEDIA_BITS_PER_TYPE to store the type plus
 *                %MEDIA_BITS_PER_ID to store the ID
 * @list:        List entry stored in one of the per-type mdev object lists
 *
 * All objects on the media graph should have this struct embedded
 */
struct media_gobj {
        struct media_device        *mdev;
        u32                        id;
        struct list_head        list;
};

#define MEDIA_ENTITY_ENUM_MAX_DEPTH        16

/**
 * struct media_entity_enum - An enumeration of media entities.
 *
 * @bmap:        Bit map in which each bit represents one entity at struct
 *                media_entity->internal_idx.
 * @idx_max:        Number of bits in bmap
 */
struct media_entity_enum {
        unsigned long *bmap;
        int idx_max;
};

/**
 * struct media_graph - Media graph traversal state
 *
 * @stack:                Graph traversal stack; the stack contains information
 *                        on the path the media entities to be walked and the
 *                        links through which they were reached.
 * @stack.entity:        pointer to &struct media_entity at the graph.
 * @stack.link:                pointer to &struct list_head.
 * @ent_enum:                Visited entities
 * @top:                The top of the stack
 */
struct media_graph {
        struct {
                struct media_entity *entity;
                struct list_head *link;
        } stack[MEDIA_ENTITY_ENUM_MAX_DEPTH];

        struct media_entity_enum ent_enum;
        int top;
};

/**
 * struct media_pipeline - Media pipeline related information
 *
 * @allocated:                Media pipeline allocated and freed by the framework
 * @mdev:                The media device the pipeline is part of
 * @pads:                List of media_pipeline_pad
 * @start_count:        Media pipeline start - stop count
 */
struct media_pipeline {
        bool allocated;
        struct media_device *mdev;
        struct list_head pads;
        int start_count;
};

/**
 * struct media_pipeline_pad - A pad part of a media pipeline
 *
 * @list:                Entry in the media_pad pads list
 * @pipe:                The media_pipeline that the pad is part of
 * @pad:                The media pad
 *
 * This structure associate a pad with a media pipeline. Instances of
 * media_pipeline_pad are created by media_pipeline_start() when it builds the
 * pipeline, and stored in the &media_pad.pads list. media_pipeline_stop()
 * removes the entries from the list and deletes them.
 */
struct media_pipeline_pad {
        struct list_head list;
        struct media_pipeline *pipe;
        struct media_pad *pad;
};

/**
 * struct media_pipeline_pad_iter - Iterator for media_pipeline_for_each_pad
 *
 * @cursor: The current element
 */
struct media_pipeline_pad_iter {
        struct list_head *cursor;
};

/**
 * struct media_pipeline_entity_iter - Iterator for media_pipeline_for_each_entity
 *
 * @ent_enum: The entity enumeration tracker
 * @cursor: The current element
 */
struct media_pipeline_entity_iter {
        struct media_entity_enum ent_enum;
        struct list_head *cursor;
};

/**
 * struct media_link - A link object part of a media graph.
 *
 * @graph_obj:        Embedded structure containing the media object common data
 * @list:        Linked list associated with an entity or an interface that
 *                owns the link.
 * @gobj0:        Part of a union. Used to get the pointer for the first
 *                graph_object of the link.
 * @source:        Part of a union. Used only if the first object (gobj0) is
 *                a pad. In that case, it represents the source pad.
 * @intf:        Part of a union. Used only if the first object (gobj0) is
 *                an interface.
 * @gobj1:        Part of a union. Used to get the pointer for the second
 *                graph_object of the link.
 * @sink:        Part of a union. Used only if the second object (gobj1) is
 *                a pad. In that case, it represents the sink pad.
 * @entity:        Part of a union. Used only if the second object (gobj1) is
 *                an entity.
 * @reverse:        Pointer to the link for the reverse direction of a pad to pad
 *                link.
 * @flags:        Link flags, as defined in uapi/media.h (MEDIA_LNK_FL_*)
 * @is_backlink: Indicate if the link is a backlink.
 */
struct media_link {
        struct media_gobj graph_obj;
        struct list_head list;
        union {
                struct media_gobj *gobj0;
                struct media_pad *source;
                struct media_interface *intf;
        };
        union {
                struct media_gobj *gobj1;
                struct media_pad *sink;
                struct media_entity *entity;
        };
        struct media_link *reverse;
        unsigned long flags;
        bool is_backlink;
};

/**
 * enum media_pad_signal_type - type of the signal inside a media pad
 *
 * @PAD_SIGNAL_DEFAULT:
 *        Default signal. Use this when all inputs or all outputs are
 *        uniquely identified by the pad number.
 * @PAD_SIGNAL_ANALOG:
 *        The pad contains an analog signal. It can be Radio Frequency,
 *        Intermediate Frequency, a baseband signal or sub-carriers.
 *        Tuner inputs, IF-PLL demodulators, composite and s-video signals
 *        should use it.
 * @PAD_SIGNAL_DV:
 *        Contains a digital video signal, with can be a bitstream of samples
 *        taken from an analog TV video source. On such case, it usually
 *        contains the VBI data on it.
 * @PAD_SIGNAL_AUDIO:
 *        Contains an Intermediate Frequency analog signal from an audio
 *        sub-carrier or an audio bitstream. IF signals are provided by tuners
 *        and consumed by        audio AM/FM decoders. Bitstream audio is provided by
 *        an audio decoder.
 */
enum media_pad_signal_type {
        PAD_SIGNAL_DEFAULT = 0,
        PAD_SIGNAL_ANALOG,
        PAD_SIGNAL_DV,
        PAD_SIGNAL_AUDIO,
};

/**
 * struct media_pad - A media pad graph object.
 *
 * @graph_obj:        Embedded structure containing the media object common data
 * @entity:        Entity this pad belongs to
 * @index:        Pad index in the entity pads array, numbered from 0 to n
 * @num_links:        Number of links connected to this pad
 * @sig_type:        Type of the signal inside a media pad
 * @flags:        Pad flags, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                (seek for ``MEDIA_PAD_FL_*``)
 * @pipe:        Pipeline this pad belongs to. Use media_entity_pipeline() to
 *                access this field.
 */
struct media_pad {
        struct media_gobj graph_obj;        /* must be first field in struct */
        struct media_entity *entity;
        u16 index;
        u16 num_links;
        enum media_pad_signal_type sig_type;
        unsigned long flags;

        /*
         * The fields below are private, and should only be accessed via
         * appropriate functions.
         */
        struct media_pipeline *pipe;
};

/**
 * struct media_entity_operations - Media entity operations
 * @get_fwnode_pad:        Return the pad number based on a fwnode endpoint or
 *                        a negative value on error. This operation can be used
 *                        to map a fwnode to a media pad number. Optional.
 * @link_setup:                Notify the entity of link changes. The operation can
 *                        return an error, in which case link setup will be
 *                        cancelled. Optional.
 * @link_validate:        Return whether a link is valid from the entity point of
 *                        view. The media_pipeline_start() function
 *                        validates all links by calling this operation. Optional.
 * @has_pad_interdep:        Return whether two pads of the entity are
 *                        interdependent. If two pads are interdependent they are
 *                        part of the same pipeline and enabling one of the pads
 *                        means that the other pad will become "locked" and
 *                        doesn't allow configuration changes. pad0 and pad1 are
 *                        guaranteed to not both be sinks or sources. Never call
 *                        the .has_pad_interdep() operation directly, always use
 *                        media_entity_has_pad_interdep().
 *                        Optional: If the operation isn't implemented all pads
 *                        will be considered as interdependent.
 *
 * .. note::
 *
 *    Those these callbacks are called with struct &media_device.graph_mutex
 *    mutex held.
 */
struct media_entity_operations {
        int (*get_fwnode_pad)(struct media_entity *entity,
                              struct fwnode_endpoint *endpoint);
        int (*link_setup)(struct media_entity *entity,
                          const struct media_pad *local,
                          const struct media_pad *remote, u32 flags);
        int (*link_validate)(struct media_link *link);
        bool (*has_pad_interdep)(struct media_entity *entity, unsigned int pad0,
                                 unsigned int pad1);
};

/**
 * enum media_entity_type - Media entity type
 *
 * @MEDIA_ENTITY_TYPE_BASE:
 *        The entity isn't embedded in another subsystem structure.
 * @MEDIA_ENTITY_TYPE_VIDEO_DEVICE:
 *        The entity is embedded in a struct video_device instance.
 * @MEDIA_ENTITY_TYPE_V4L2_SUBDEV:
 *        The entity is embedded in a struct v4l2_subdev instance.
 *
 * Media entity objects are often not instantiated directly, but the media
 * entity structure is inherited by (through embedding) other subsystem-specific
 * structures. The media entity type identifies the type of the subclass
 * structure that implements a media entity instance.
 *
 * This allows runtime type identification of media entities and safe casting to
 * the correct object type. For instance, a media entity structure instance
 * embedded in a v4l2_subdev structure instance will have the type
 * %MEDIA_ENTITY_TYPE_V4L2_SUBDEV and can safely be cast to a &v4l2_subdev
 * structure using the container_of() macro.
 */
enum media_entity_type {
        MEDIA_ENTITY_TYPE_BASE,
        MEDIA_ENTITY_TYPE_VIDEO_DEVICE,
        MEDIA_ENTITY_TYPE_V4L2_SUBDEV,
};

/**
 * struct media_entity - A media entity graph object.
 *
 * @graph_obj:        Embedded structure containing the media object common data.
 * @name:        Entity name.
 * @obj_type:        Type of the object that implements the media_entity.
 * @function:        Entity main function, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                (seek for ``MEDIA_ENT_F_*``)
 * @flags:        Entity flags, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                (seek for ``MEDIA_ENT_FL_*``)
 * @num_pads:        Number of sink and source pads.
 * @num_links:        Total number of links, forward and back, enabled and disabled.
 * @num_backlinks: Number of backlinks
 * @internal_idx: An unique internal entity specific number. The numbers are
 *                re-used if entities are unregistered or registered again.
 * @pads:        Pads array with the size defined by @num_pads.
 * @links:        List of data links.
 * @ops:        Entity operations.
 * @use_count:        Use count for the entity.
 * @info:        Union with devnode information.  Kept just for backward
 *                compatibility.
 * @info.dev:        Contains device major and minor info.
 * @info.dev.major: device node major, if the device is a devnode.
 * @info.dev.minor: device node minor, if the device is a devnode.
 *
 * .. note::
 *
 *    The @use_count reference count must never be negative, but is a signed
 *    integer on purpose: a simple ``WARN_ON(<0)`` check can be used to detect
 *    reference count bugs that would make it negative.
 */
struct media_entity {
        struct media_gobj graph_obj;        /* must be first field in struct */
        const char *name;
        enum media_entity_type obj_type;
        u32 function;
        unsigned long flags;

        u16 num_pads;
        u16 num_links;
        u16 num_backlinks;
        int internal_idx;

        struct media_pad *pads;
        struct list_head links;

        const struct media_entity_operations *ops;

        int use_count;

        union {
                struct {
                        u32 major;
                        u32 minor;
                } dev;
        } info;
};

/**
 * media_entity_for_each_pad - Iterate on all pads in an entity
 * @entity: The entity the pads belong to
 * @iter: The iterator pad
 *
 * Iterate on all pads in a media entity.
 */
#define media_entity_for_each_pad(entity, iter)                        \
        for (iter = (entity)->pads;                                \
             iter < &(entity)->pads[(entity)->num_pads];        \
             ++iter)

/**
 * struct media_interface - A media interface graph object.
 *
 * @graph_obj:                embedded graph object
 * @links:                List of links pointing to graph entities
 * @type:                Type of the interface as defined in
 *                        :ref:`include/uapi/linux/media.h <media_header>`
 *                        (seek for ``MEDIA_INTF_T_*``)
 * @flags:                Interface flags as defined in
 *                        :ref:`include/uapi/linux/media.h <media_header>`
 *                        (seek for ``MEDIA_INTF_FL_*``)
 *
 * .. note::
 *
 *    Currently, no flags for &media_interface is defined.
 */
struct media_interface {
        struct media_gobj                graph_obj;
        struct list_head                links;
        u32                                type;
        u32                                flags;
};

/**
 * struct media_intf_devnode - A media interface via a device node.
 *
 * @intf:        embedded interface object
 * @major:        Major number of a device node
 * @minor:        Minor number of a device node
 */
struct media_intf_devnode {
        struct media_interface                intf;

        /* Should match the fields at media_v2_intf_devnode */
        u32                                major;
        u32                                minor;
};

/**
 * media_entity_id() - return the media entity graph object id
 *
 * @entity:        pointer to &media_entity
 */
static inline u32 media_entity_id(struct media_entity *entity)
{
        return entity->graph_obj.id;
}

/**
 * media_type() - return the media object type
 *
 * @gobj:        Pointer to the struct &media_gobj graph object
 */
static inline enum media_gobj_type media_type(struct media_gobj *gobj)
{
        return gobj->id >> MEDIA_BITS_PER_ID;
}

/**
 * media_id() - return the media object ID
 *
 * @gobj:        Pointer to the struct &media_gobj graph object
 */
static inline u32 media_id(struct media_gobj *gobj)
{
        return gobj->id & MEDIA_ID_MASK;
}

/**
 * media_gobj_gen_id() - encapsulates type and ID on at the object ID
 *
 * @type:        object type as define at enum &media_gobj_type.
 * @local_id:        next ID, from struct &media_device.id.
 */
static inline u32 media_gobj_gen_id(enum media_gobj_type type, u64 local_id)
{
        u32 id;

        id = type << MEDIA_BITS_PER_ID;
        id |= local_id & MEDIA_ID_MASK;

        return id;
}

/**
 * is_media_entity_v4l2_video_device() - Check if the entity is a video_device
 * @entity:        pointer to entity
 *
 * Return: %true if the entity is an instance of a video_device object and can
 * safely be cast to a struct video_device using the container_of() macro, or
 * %false otherwise.
 */
static inline bool is_media_entity_v4l2_video_device(struct media_entity *entity)
{
        return entity && entity->obj_type == MEDIA_ENTITY_TYPE_VIDEO_DEVICE;
}

/**
 * is_media_entity_v4l2_subdev() - Check if the entity is a v4l2_subdev
 * @entity:        pointer to entity
 *
 * Return: %true if the entity is an instance of a &v4l2_subdev object and can
 * safely be cast to a struct &v4l2_subdev using the container_of() macro, or
 * %false otherwise.
 */
static inline bool is_media_entity_v4l2_subdev(struct media_entity *entity)
{
        return entity && entity->obj_type == MEDIA_ENTITY_TYPE_V4L2_SUBDEV;
}

/**
 * media_entity_enum_init - Initialise an entity enumeration
 *
 * @ent_enum: Entity enumeration to be initialised
 * @mdev: The related media device
 *
 * Return: zero on success or a negative error code.
 */
__must_check int media_entity_enum_init(struct media_entity_enum *ent_enum,
                                        struct media_device *mdev);

/**
 * media_entity_enum_cleanup - Release resources of an entity enumeration
 *
 * @ent_enum: Entity enumeration to be released
 */
void media_entity_enum_cleanup(struct media_entity_enum *ent_enum);

/**
 * media_entity_enum_zero - Clear the entire enum
 *
 * @ent_enum: Entity enumeration to be cleared
 */
static inline void media_entity_enum_zero(struct media_entity_enum *ent_enum)
{
        bitmap_zero(ent_enum->bmap, ent_enum->idx_max);
}

/**
 * media_entity_enum_set - Mark a single entity in the enum
 *
 * @ent_enum: Entity enumeration
 * @entity: Entity to be marked
 */
static inline void media_entity_enum_set(struct media_entity_enum *ent_enum,
                                         struct media_entity *entity)
{
        if (WARN_ON(entity->internal_idx >= ent_enum->idx_max))
                return;

        __set_bit(entity->internal_idx, ent_enum->bmap);
}

/**
 * media_entity_enum_clear - Unmark a single entity in the enum
 *
 * @ent_enum: Entity enumeration
 * @entity: Entity to be unmarked
 */
static inline void media_entity_enum_clear(struct media_entity_enum *ent_enum,
                                           struct media_entity *entity)
{
        if (WARN_ON(entity->internal_idx >= ent_enum->idx_max))
                return;

        __clear_bit(entity->internal_idx, ent_enum->bmap);
}

/**
 * media_entity_enum_test - Test whether the entity is marked
 *
 * @ent_enum: Entity enumeration
 * @entity: Entity to be tested
 *
 * Returns %true if the entity was marked.
 */
static inline bool media_entity_enum_test(struct media_entity_enum *ent_enum,
                                          struct media_entity *entity)
{
        if (WARN_ON(entity->internal_idx >= ent_enum->idx_max))
                return true;

        return test_bit(entity->internal_idx, ent_enum->bmap);
}

/**
 * media_entity_enum_test_and_set - Test whether the entity is marked,
 *        and mark it
 *
 * @ent_enum: Entity enumeration
 * @entity: Entity to be tested
 *
 * Returns %true if the entity was marked, and mark it before doing so.
 */
static inline bool
media_entity_enum_test_and_set(struct media_entity_enum *ent_enum,
                               struct media_entity *entity)
{
        if (WARN_ON(entity->internal_idx >= ent_enum->idx_max))
                return true;

        return __test_and_set_bit(entity->internal_idx, ent_enum->bmap);
}

/**
 * media_entity_enum_empty - Test whether the entire enum is empty
 *
 * @ent_enum: Entity enumeration
 *
 * Return: %true if the entity was empty.
 */
static inline bool media_entity_enum_empty(struct media_entity_enum *ent_enum)
{
        return bitmap_empty(ent_enum->bmap, ent_enum->idx_max);
}

/**
 * media_entity_enum_intersects - Test whether two enums intersect
 *
 * @ent_enum1: First entity enumeration
 * @ent_enum2: Second entity enumeration
 *
 * Return: %true if entity enumerations @ent_enum1 and @ent_enum2 intersect,
 * otherwise %false.
 */
static inline bool media_entity_enum_intersects(
        struct media_entity_enum *ent_enum1,
        struct media_entity_enum *ent_enum2)
{
        WARN_ON(ent_enum1->idx_max != ent_enum2->idx_max);

        return bitmap_intersects(ent_enum1->bmap, ent_enum2->bmap,
                                 min(ent_enum1->idx_max, ent_enum2->idx_max));
}

/**
 * gobj_to_entity - returns the struct &media_entity pointer from the
 *        @gobj contained on it.
 *
 * @gobj: Pointer to the struct &media_gobj graph object
 */
#define gobj_to_entity(gobj) \
                container_of(gobj, struct media_entity, graph_obj)

/**
 * gobj_to_pad - returns the struct &media_pad pointer from the
 *        @gobj contained on it.
 *
 * @gobj: Pointer to the struct &media_gobj graph object
 */
#define gobj_to_pad(gobj) \
                container_of(gobj, struct media_pad, graph_obj)

/**
 * gobj_to_link - returns the struct &media_link pointer from the
 *        @gobj contained on it.
 *
 * @gobj: Pointer to the struct &media_gobj graph object
 */
#define gobj_to_link(gobj) \
                container_of(gobj, struct media_link, graph_obj)

/**
 * gobj_to_intf - returns the struct &media_interface pointer from the
 *        @gobj contained on it.
 *
 * @gobj: Pointer to the struct &media_gobj graph object
 */
#define gobj_to_intf(gobj) \
                container_of(gobj, struct media_interface, graph_obj)

/**
 * intf_to_devnode - returns the struct media_intf_devnode pointer from the
 *        @intf contained on it.
 *
 * @intf: Pointer to struct &media_intf_devnode
 */
#define intf_to_devnode(intf) \
                container_of(intf, struct media_intf_devnode, intf)

/**
 *  media_gobj_create - Initialize a graph object
 *
 * @mdev:        Pointer to the &media_device that contains the object
 * @type:        Type of the object
 * @gobj:        Pointer to the struct &media_gobj graph object
 *
 * This routine initializes the embedded struct &media_gobj inside a
 * media graph object. It is called automatically if ``media_*_create``
 * function calls are used. However, if the object (entity, link, pad,
 * interface) is embedded on some other object, this function should be
 * called before registering the object at the media controller.
 */
void media_gobj_create(struct media_device *mdev,
                    enum media_gobj_type type,
                    struct media_gobj *gobj);

/**
 *  media_gobj_destroy - Stop using a graph object on a media device
 *
 * @gobj:        Pointer to the struct &media_gobj graph object
 *
 * This should be called by all routines like media_device_unregister()
 * that remove/destroy media graph objects.
 */
void media_gobj_destroy(struct media_gobj *gobj);

/**
 * media_entity_pads_init() - Initialize the entity pads
 *
 * @entity:        entity where the pads belong
 * @num_pads:        total number of sink and source pads
 * @pads:        Array of @num_pads pads.
 *
 * The pads array is managed by the entity driver and passed to
 * media_entity_pads_init() where its pointer will be stored in the
 * &media_entity structure.
 *
 * If no pads are needed, drivers could either directly fill
 * &media_entity->num_pads with 0 and &media_entity->pads with %NULL or call
 * this function that will do the same.
 *
 * As the number of pads is known in advance, the pads array is not allocated
 * dynamically but is managed by the entity driver. Most drivers will embed the
 * pads array in a driver-specific structure, avoiding dynamic allocation.
 *
 * Drivers must set the direction of every pad in the pads array before calling
 * media_entity_pads_init(). The function will initialize the other pads fields.
 */
int media_entity_pads_init(struct media_entity *entity, u16 num_pads,
                      struct media_pad *pads);

/**
 * media_entity_cleanup() - free resources associated with an entity
 *
 * @entity:        entity where the pads belong
 *
 * This function must be called during the cleanup phase after unregistering
 * the entity (currently, it does nothing).
 *
 * Calling media_entity_cleanup() on a media_entity whose memory has been
 * zeroed but that has not been initialized with media_entity_pad_init() is
 * valid and is a no-op.
 */
#if IS_ENABLED(CONFIG_MEDIA_CONTROLLER)
static inline void media_entity_cleanup(struct media_entity *entity) {}
#else
#define media_entity_cleanup(entity) do { } while (false)
#endif

/**
 * media_get_pad_index() - retrieves a pad index from an entity
 *
 * @entity:        entity where the pads belong
 * @pad_type:        the type of the pad, one of MEDIA_PAD_FL_* pad types
 * @sig_type:        type of signal of the pad to be search
 *
 * This helper function finds the first pad index inside an entity that
 * satisfies both @is_sink and @sig_type conditions.
 *
 * Return:
 *
 * On success, return the pad number. If the pad was not found or the media
 * entity is a NULL pointer, return -EINVAL.
 */
int media_get_pad_index(struct media_entity *entity, u32 pad_type,
                        enum media_pad_signal_type sig_type);

/**
 * media_create_pad_link() - creates a link between two entities.
 *
 * @source:        pointer to &media_entity of the source pad.
 * @source_pad:        number of the source pad in the pads array
 * @sink:        pointer to &media_entity of the sink pad.
 * @sink_pad:        number of the sink pad in the pads array.
 * @flags:        Link flags, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                ( seek for ``MEDIA_LNK_FL_*``)
 *
 * Valid values for flags:
 *
 * %MEDIA_LNK_FL_ENABLED
 *   Indicates that the link is enabled and can be used to transfer media data.
 *   When two or more links target a sink pad, only one of them can be
 *   enabled at a time.
 *
 * %MEDIA_LNK_FL_IMMUTABLE
 *   Indicates that the link enabled state can't be modified at runtime. If
 *   %MEDIA_LNK_FL_IMMUTABLE is set, then %MEDIA_LNK_FL_ENABLED must also be
 *   set, since an immutable link is always enabled.
 *
 * .. note::
 *
 *    Before calling this function, media_entity_pads_init() and
 *    media_device_register_entity() should be called previously for both ends.
 */
__must_check int media_create_pad_link(struct media_entity *source,
                        u16 source_pad, struct media_entity *sink,
                        u16 sink_pad, u32 flags);

/**
 * media_create_pad_links() - creates a link between two entities.
 *
 * @mdev: Pointer to the media_device that contains the object
 * @source_function: Function of the source entities. Used only if @source is
 *        NULL.
 * @source: pointer to &media_entity of the source pad. If NULL, it will use
 *        all entities that matches the @sink_function.
 * @source_pad: number of the source pad in the pads array
 * @sink_function: Function of the sink entities. Used only if @sink is NULL.
 * @sink: pointer to &media_entity of the sink pad. If NULL, it will use
 *        all entities that matches the @sink_function.
 * @sink_pad: number of the sink pad in the pads array.
 * @flags: Link flags, as defined in include/uapi/linux/media.h.
 * @allow_both_undefined: if %true, then both @source and @sink can be NULL.
 *        In such case, it will create a crossbar between all entities that
 *        matches @source_function to all entities that matches @sink_function.
 *        If %false, it will return 0 and won't create any link if both @source
 *        and @sink are NULL.
 *
 * Valid values for flags:
 *
 * A %MEDIA_LNK_FL_ENABLED flag indicates that the link is enabled and can be
 *        used to transfer media data. If multiple links are created and this
 *        flag is passed as an argument, only the first created link will have
 *        this flag.
 *
 * A %MEDIA_LNK_FL_IMMUTABLE flag indicates that the link enabled state can't
 *        be modified at runtime. If %MEDIA_LNK_FL_IMMUTABLE is set, then
 *        %MEDIA_LNK_FL_ENABLED must also be set since an immutable link is
 *        always enabled.
 *
 * It is common for some devices to have multiple source and/or sink entities
 * of the same type that should be linked. While media_create_pad_link()
 * creates link by link, this function is meant to allow 1:n, n:1 and even
 * cross-bar (n:n) links.
 *
 * .. note::
 *
 *    Before calling this function, media_entity_pads_init() and
 *    media_device_register_entity() should be called previously for the
 *    entities to be linked.
 */
int media_create_pad_links(const struct media_device *mdev,
                           const u32 source_function,
                           struct media_entity *source,
                           const u16 source_pad,
                           const u32 sink_function,
                           struct media_entity *sink,
                           const u16 sink_pad,
                           u32 flags,
                           const bool allow_both_undefined);

void __media_entity_remove_links(struct media_entity *entity);

/**
 * media_entity_remove_links() - remove all links associated with an entity
 *
 * @entity:        pointer to &media_entity
 *
 * .. note::
 *
 *    This is called automatically when an entity is unregistered via
 *    media_device_register_entity().
 */
void media_entity_remove_links(struct media_entity *entity);

/**
 * __media_entity_setup_link - Configure a media link without locking
 * @link: The link being configured
 * @flags: Link configuration flags
 *
 * The bulk of link setup is handled by the two entities connected through the
 * link. This function notifies both entities of the link configuration change.
 *
 * If the link is immutable or if the current and new configuration are
 * identical, return immediately.
 *
 * The user is expected to hold link->source->parent->mutex. If not,
 * media_entity_setup_link() should be used instead.
 */
int __media_entity_setup_link(struct media_link *link, u32 flags);

/**
 * media_entity_setup_link() - changes the link flags properties in runtime
 *
 * @link:        pointer to &media_link
 * @flags:        the requested new link flags
 *
 * The only configurable property is the %MEDIA_LNK_FL_ENABLED link flag
 * to enable/disable a link. Links marked with the
 * %MEDIA_LNK_FL_IMMUTABLE link flag can not be enabled or disabled.
 *
 * When a link is enabled or disabled, the media framework calls the
 * link_setup operation for the two entities at the source and sink of the
 * link, in that order. If the second link_setup call fails, another
 * link_setup call is made on the first entity to restore the original link
 * flags.
 *
 * Media device drivers can be notified of link setup operations by setting the
 * &media_device.link_notify pointer to a callback function. If provided, the
 * notification callback will be called before enabling and after disabling
 * links.
 *
 * Entity drivers must implement the link_setup operation if any of their links
 * is non-immutable. The operation must either configure the hardware or store
 * the configuration information to be applied later.
 *
 * Link configuration must not have any side effect on other links. If an
 * enabled link at a sink pad prevents another link at the same pad from
 * being enabled, the link_setup operation must return %-EBUSY and can't
 * implicitly disable the first enabled link.
 *
 * .. note::
 *
 *    The valid values of the flags for the link is the same as described
 *    on media_create_pad_link(), for pad to pad links or the same as described
 *    on media_create_intf_link(), for interface to entity links.
 */
int media_entity_setup_link(struct media_link *link, u32 flags);

/**
 * media_entity_find_link - Find a link between two pads
 * @source: Source pad
 * @sink: Sink pad
 *
 * Return: returns a pointer to the link between the two entities. If no
 * such link exists, return %NULL.
 */
struct media_link *media_entity_find_link(struct media_pad *source,
                struct media_pad *sink);

/**
 * media_pad_remote_pad_first - Find the first pad at the remote end of a link
 * @pad: Pad at the local end of the link
 *
 * Search for a remote pad connected to the given pad by iterating over all
 * links originating or terminating at that pad until an enabled link is found.
 *
 * Return: returns a pointer to the pad at the remote end of the first found
 * enabled link, or %NULL if no enabled link has been found.
 */
struct media_pad *media_pad_remote_pad_first(const struct media_pad *pad);

/**
 * media_pad_remote_pad_unique - Find a remote pad connected to a pad
 * @pad: The pad
 *
 * Search for and return a remote pad connected to @pad through an enabled
 * link. If multiple (or no) remote pads are found, an error is returned.
 *
 * The uniqueness constraint makes this helper function suitable for entities
 * that support a single active source at a time on a given pad.
 *
 * Return: A pointer to the remote pad, or one of the following error pointers
 * if an error occurs:
 *
 * * -ENOTUNIQ - Multiple links are enabled
 * * -ENOLINK - No connected pad found
 */
struct media_pad *media_pad_remote_pad_unique(const struct media_pad *pad);

/**
 * media_entity_remote_pad_unique - Find a remote pad connected to an entity
 * @entity: The entity
 * @type: The type of pad to find (MEDIA_PAD_FL_SINK or MEDIA_PAD_FL_SOURCE)
 *
 * Search for and return a remote pad of @type connected to @entity through an
 * enabled link. If multiple (or no) remote pads match these criteria, an error
 * is returned.
 *
 * The uniqueness constraint makes this helper function suitable for entities
 * that support a single active source or sink at a time.
 *
 * Return: A pointer to the remote pad, or one of the following error pointers
 * if an error occurs:
 *
 * * -ENOTUNIQ - Multiple links are enabled
 * * -ENOLINK - No connected pad found
 */
struct media_pad *
media_entity_remote_pad_unique(const struct media_entity *entity,
                               unsigned int type);

/**
 * media_entity_remote_source_pad_unique - Find a remote source pad connected to
 *        an entity
 * @entity: The entity
 *
 * Search for and return a remote source pad connected to @entity through an
 * enabled link. If multiple (or no) remote pads match these criteria, an error
 * is returned.
 *
 * The uniqueness constraint makes this helper function suitable for entities
 * that support a single active source at a time.
 *
 * Return: A pointer to the remote pad, or one of the following error pointers
 * if an error occurs:
 *
 * * -ENOTUNIQ - Multiple links are enabled
 * * -ENOLINK - No connected pad found
 */
static inline struct media_pad *
media_entity_remote_source_pad_unique(const struct media_entity *entity)
{
        return media_entity_remote_pad_unique(entity, MEDIA_PAD_FL_SOURCE);
}

/**
 * media_pad_is_streaming - Test if a pad is part of a streaming pipeline
 * @pad: The pad
 *
 * Return: True if the pad is part of a pipeline started with the
 * media_pipeline_start() function, false otherwise.
 */
static inline bool media_pad_is_streaming(const struct media_pad *pad)
{
        return pad->pipe;
}

/**
 * media_entity_is_streaming - Test if an entity is part of a streaming pipeline
 * @entity: The entity
 *
 * Return: True if the entity is part of a pipeline started with the
 * media_pipeline_start() function, false otherwise.
 */
static inline bool media_entity_is_streaming(const struct media_entity *entity)
{
        struct media_pad *pad;

        media_entity_for_each_pad(entity, pad) {
                if (media_pad_is_streaming(pad))
                        return true;
        }

        return false;
}

/**
 * media_entity_pipeline - Get the media pipeline an entity is part of
 * @entity: The entity
 *
 * DEPRECATED: use media_pad_pipeline() instead.
 *
 * This function returns the media pipeline that an entity has been associated
 * with when constructing the pipeline with media_pipeline_start(). The pointer
 * remains valid until media_pipeline_stop() is called.
 *
 * In general, entities can be part of multiple pipelines, when carrying
 * multiple streams (either on different pads, or on the same pad using
 * multiplexed streams). This function is to be used only for entities that
 * do not support multiple pipelines.
 *
 * Return: The media_pipeline the entity is part of, or NULL if the entity is
 * not part of any pipeline.
 */
struct media_pipeline *media_entity_pipeline(struct media_entity *entity);

/**
 * media_pad_pipeline - Get the media pipeline a pad is part of
 * @pad: The pad
 *
 * This function returns the media pipeline that a pad has been associated
 * with when constructing the pipeline with media_pipeline_start(). The pointer
 * remains valid until media_pipeline_stop() is called.
 *
 * Return: The media_pipeline the pad is part of, or NULL if the pad is
 * not part of any pipeline.
 */
struct media_pipeline *media_pad_pipeline(struct media_pad *pad);

/**
 * media_entity_get_fwnode_pad - Get pad number from fwnode
 *
 * @entity: The entity
 * @fwnode: Pointer to the fwnode_handle which should be used to find the pad
 * @direction_flags: Expected direction of the pad, as defined in
 *                     :ref:`include/uapi/linux/media.h <media_header>`
 *                     (seek for ``MEDIA_PAD_FL_*``)
 *
 * This function can be used to resolve the media pad number from
 * a fwnode. This is useful for devices which use more complex
 * mappings of media pads.
 *
 * If the entity does not implement the get_fwnode_pad() operation
 * then this function searches the entity for the first pad that
 * matches the @direction_flags.
 *
 * Return: returns the pad number on success or a negative error code.
 */
int media_entity_get_fwnode_pad(struct media_entity *entity,
                                const struct fwnode_handle *fwnode,
                                unsigned long direction_flags);

/**
 * media_graph_walk_init - Allocate resources used by graph walk.
 *
 * @graph: Media graph structure that will be used to walk the graph
 * @mdev: Pointer to the &media_device that contains the object
 *
 * This function is deprecated, use media_pipeline_for_each_pad() instead.
 *
 * The caller is required to hold the media_device graph_mutex during the graph
 * walk until the graph state is released.
 *
 * Returns zero on success or a negative error code otherwise.
 */
__must_check int media_graph_walk_init(
        struct media_graph *graph, struct media_device *mdev);

/**
 * media_graph_walk_cleanup - Release resources used by graph walk.
 *
 * @graph: Media graph structure that will be used to walk the graph
 *
 * This function is deprecated, use media_pipeline_for_each_pad() instead.
 */
void media_graph_walk_cleanup(struct media_graph *graph);

/**
 * media_graph_walk_start - Start walking the media graph at a
 *        given entity
 *
 * @graph: Media graph structure that will be used to walk the graph
 * @entity: Starting entity
 *
 * This function is deprecated, use media_pipeline_for_each_pad() instead.
 *
 * Before using this function, media_graph_walk_init() must be
 * used to allocate resources used for walking the graph. This
 * function initializes the graph traversal structure to walk the
 * entities graph starting at the given entity. The traversal
 * structure must not be modified by the caller during graph
 * traversal. After the graph walk, the resources must be released
 * using media_graph_walk_cleanup().
 */
void media_graph_walk_start(struct media_graph *graph,
                            struct media_entity *entity);

/**
 * media_graph_walk_next - Get the next entity in the graph
 * @graph: Media graph structure
 *
 * This function is deprecated, use media_pipeline_for_each_pad() instead.
 *
 * Perform a depth-first traversal of the given media entities graph.
 *
 * The graph structure must have been previously initialized with a call to
 * media_graph_walk_start().
 *
 * Return: returns the next entity in the graph or %NULL if the whole graph
 * have been traversed.
 */
struct media_entity *media_graph_walk_next(struct media_graph *graph);

/**
 * media_pipeline_start - Mark a pipeline as streaming
 * @pad: Starting pad
 * @pipe: Media pipeline to be assigned to all pads in the pipeline.
 *
 * Mark all pads connected to a given pad through enabled links, either
 * directly or indirectly, as streaming. The given pipeline object is assigned
 * to every pad in the pipeline and stored in the media_pad pipe field.
 *
 * Calls to this function can be nested, in which case the same number of
 * media_pipeline_stop() calls will be required to stop streaming. The
 * pipeline pointer must be identical for all nested calls to
 * media_pipeline_start().
 */
__must_check int media_pipeline_start(struct media_pad *pad,
                                      struct media_pipeline *pipe);
/**
 * __media_pipeline_start - Mark a pipeline as streaming
 *
 * @pad: Starting pad
 * @pipe: Media pipeline to be assigned to all pads in the pipeline.
 *
 * ..note:: This is the non-locking version of media_pipeline_start()
 */
__must_check int __media_pipeline_start(struct media_pad *pad,
                                        struct media_pipeline *pipe);

/**
 * media_pipeline_stop - Mark a pipeline as not streaming
 * @pad: Starting pad
 *
 * Mark all pads connected to a given pad through enabled links, either
 * directly or indirectly, as not streaming. The media_pad pipe field is
 * reset to %NULL.
 *
 * If multiple calls to media_pipeline_start() have been made, the same
 * number of calls to this function are required to mark the pipeline as not
 * streaming.
 */
void media_pipeline_stop(struct media_pad *pad);

/**
 * __media_pipeline_stop - Mark a pipeline as not streaming
 *
 * @pad: Starting pad
 *
 * .. note:: This is the non-locking version of media_pipeline_stop()
 */
void __media_pipeline_stop(struct media_pad *pad);

struct media_pad *
__media_pipeline_pad_iter_next(struct media_pipeline *pipe,
                               struct media_pipeline_pad_iter *iter,
                               struct media_pad *pad);

/**
 * media_pipeline_for_each_pad - Iterate on all pads in a media pipeline
 * @pipe: The pipeline
 * @iter: The iterator (struct media_pipeline_pad_iter)
 * @pad: The iterator pad
 *
 * Iterate on all pads in a media pipeline. This is only valid after the
 * pipeline has been built with media_pipeline_start() and before it gets
 * destroyed with media_pipeline_stop().
 */
#define media_pipeline_for_each_pad(pipe, iter, pad)                        \
        for (pad = __media_pipeline_pad_iter_next((pipe), iter, NULL);        \
             pad != NULL;                                                \
             pad = __media_pipeline_pad_iter_next((pipe), iter, pad))

/**
 * media_pipeline_entity_iter_init - Initialize a pipeline entity iterator
 * @pipe: The pipeline
 * @iter: The iterator
 *
 * This function must be called to initialize the iterator before using it in a
 * media_pipeline_for_each_entity() loop. The iterator must be destroyed by a
 * call to media_pipeline_entity_iter_cleanup after the loop (including in code
 * paths that break from the loop).
 *
 * The same iterator can be used in multiple consecutive loops without being
 * destroyed and reinitialized.
 *
 * Return: 0 on success or a negative error code otherwise.
 */
int media_pipeline_entity_iter_init(struct media_pipeline *pipe,
                                    struct media_pipeline_entity_iter *iter);

/**
 * media_pipeline_entity_iter_cleanup - Destroy a pipeline entity iterator
 * @iter: The iterator
 *
 * This function must be called to destroy iterators initialized with
 * media_pipeline_entity_iter_init().
 */
void media_pipeline_entity_iter_cleanup(struct media_pipeline_entity_iter *iter);

struct media_entity *
__media_pipeline_entity_iter_next(struct media_pipeline *pipe,
                                  struct media_pipeline_entity_iter *iter,
                                  struct media_entity *entity);

/**
 * media_pipeline_for_each_entity - Iterate on all entities in a media pipeline
 * @pipe: The pipeline
 * @iter: The iterator (struct media_pipeline_entity_iter)
 * @entity: The iterator entity
 *
 * Iterate on all entities in a media pipeline. This is only valid after the
 * pipeline has been built with media_pipeline_start() and before it gets
 * destroyed with media_pipeline_stop(). The iterator must be initialized with
 * media_pipeline_entity_iter_init() before iteration, and destroyed with
 * media_pipeline_entity_iter_cleanup() after (including in code paths that
 * break from the loop).
 */
#define media_pipeline_for_each_entity(pipe, iter, entity)                        \
        for (entity = __media_pipeline_entity_iter_next((pipe), iter, NULL);        \
             entity != NULL;                                                        \
             entity = __media_pipeline_entity_iter_next((pipe), iter, entity))

/**
 * media_pipeline_alloc_start - Mark a pipeline as streaming
 * @pad: Starting pad
 *
 * media_pipeline_alloc_start() is similar to media_pipeline_start() but instead
 * of working on a given pipeline the function will use an existing pipeline if
 * the pad is already part of a pipeline, or allocate a new pipeline.
 *
 * Calls to media_pipeline_alloc_start() must be matched with
 * media_pipeline_stop().
 */
__must_check int media_pipeline_alloc_start(struct media_pad *pad);

/**
 * media_devnode_create() - creates and initializes a device node interface
 *
 * @mdev:        pointer to struct &media_device
 * @type:        type of the interface, as given by
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                ( seek for ``MEDIA_INTF_T_*``) macros.
 * @flags:        Interface flags, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                ( seek for ``MEDIA_INTF_FL_*``)
 * @major:        Device node major number.
 * @minor:        Device node minor number.
 *
 * Return: if succeeded, returns a pointer to the newly allocated
 *        &media_intf_devnode pointer.
 *
 * .. note::
 *
 *    Currently, no flags for &media_interface is defined.
 */
struct media_intf_devnode *
__must_check media_devnode_create(struct media_device *mdev,
                                  u32 type, u32 flags,
                                  u32 major, u32 minor);
/**
 * media_devnode_remove() - removes a device node interface
 *
 * @devnode:        pointer to &media_intf_devnode to be freed.
 *
 * When a device node interface is removed, all links to it are automatically
 * removed.
 */
void media_devnode_remove(struct media_intf_devnode *devnode);

/**
 * media_create_intf_link() - creates a link between an entity and an interface
 *
 * @entity:        pointer to %media_entity
 * @intf:        pointer to %media_interface
 * @flags:        Link flags, as defined in
 *                :ref:`include/uapi/linux/media.h <media_header>`
 *                ( seek for ``MEDIA_LNK_FL_*``)
 *
 *
 * Valid values for flags:
 *
 * %MEDIA_LNK_FL_ENABLED
 *   Indicates that the interface is connected to the entity hardware.
 *   That's the default value for interfaces. An interface may be disabled if
 *   the hardware is busy due to the usage of some other interface that it is
 *   currently controlling the hardware.
 *
 *   A typical example is an hybrid TV device that handle only one type of
 *   stream on a given time. So, when the digital TV is streaming,
 *   the V4L2 interfaces won't be enabled, as such device is not able to
 *   also stream analog TV or radio.
 *
 * .. note::
 *
 *    Before calling this function, media_devnode_create() should be called for
 *    the interface and media_device_register_entity() should be called for the
 *    interface that will be part of the link.
 */
struct media_link *
__must_check media_create_intf_link(struct media_entity *entity,
                                    struct media_interface *intf,
                                    u32 flags);
/**
 * __media_remove_intf_link() - remove a single interface link
 *
 * @link:        pointer to &media_link.
 *
 * .. note:: This is an unlocked version of media_remove_intf_link()
 */
void __media_remove_intf_link(struct media_link *link);

/**
 * media_remove_intf_link() - remove a single interface link
 *
 * @link:        pointer to &media_link.
 *
 * .. note:: Prefer to use this one, instead of __media_remove_intf_link()
 */
void media_remove_intf_link(struct media_link *link);

/**
 * __media_remove_intf_links() - remove all links associated with an interface
 *
 * @intf:        pointer to &media_interface
 *
 * .. note:: This is an unlocked version of media_remove_intf_links().
 */
void __media_remove_intf_links(struct media_interface *intf);

/**
 * media_remove_intf_links() - remove all links associated with an interface
 *
 * @intf:        pointer to &media_interface
 *
 * .. note::
 *
 *   #) This is called automatically when an entity is unregistered via
 *      media_device_register_entity() and by media_devnode_remove().
 *
 *   #) Prefer to use this one, instead of __media_remove_intf_links().
 */
void media_remove_intf_links(struct media_interface *intf);

/**
 * media_entity_call - Calls a struct media_entity_operations operation on
 *        an entity
 *
 * @entity: entity where the @operation will be called
 * @operation: type of the operation. Should be the name of a member of
 *        struct &media_entity_operations.
 *
 * This helper function will check if @operation is not %NULL. On such case,
 * it will issue a call to @operation\(@entity, @args\).
 */

#define media_entity_call(entity, operation, args...)                        \
        (((entity)->ops && (entity)->ops->operation) ?                        \
         (entity)->ops->operation((entity) , ##args) : -ENOIOCTLCMD)

/**
 * media_create_ancillary_link() - create an ancillary link between two
 *                                   instances of &media_entity
 *
 * @primary:        pointer to the primary &media_entity
 * @ancillary:        pointer to the ancillary &media_entity
 *
 * Create an ancillary link between two entities, indicating that they
 * represent two connected pieces of hardware that form a single logical unit.
 * A typical example is a camera lens controller being linked to the sensor that
 * it is supporting.
 *
 * The function sets both MEDIA_LNK_FL_ENABLED and MEDIA_LNK_FL_IMMUTABLE for
 * the new link.
 */
struct media_link *
media_create_ancillary_link(struct media_entity *primary,
                            struct media_entity *ancillary);

/**
 * __media_entity_next_link() - Iterate through a &media_entity's links
 *
 * @entity:        pointer to the &media_entity
 * @link:        pointer to a &media_link to hold the iterated values
 * @link_type:        one of the MEDIA_LNK_FL_LINK_TYPE flags
 *
 * Return the next link against an entity matching a specific link type. This
 * allows iteration through an entity's links whilst guaranteeing all of the
 * returned links are of the given type.
 */
struct media_link *__media_entity_next_link(struct media_entity *entity,
                                            struct media_link *link,
                                            unsigned long link_type);

/**
 * for_each_media_entity_data_link() - Iterate through an entity's data links
 *
 * @entity:        pointer to the &media_entity
 * @link:        pointer to a &media_link to hold the iterated values
 *
 * Iterate over a &media_entity's data links
 */
#define for_each_media_entity_data_link(entity, link)                        \
        for (link = __media_entity_next_link(entity, NULL,                \
                                             MEDIA_LNK_FL_DATA_LINK);        \
             link;                                                        \
             link = __media_entity_next_link(entity, link,                \
                                             MEDIA_LNK_FL_DATA_LINK))

#endif





















  232 



























































































































































































































































































































































































































































































































































































































































































































































































































































































    1 






































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
// SPDX-License-Identifier: GPL-2.0
/*
 * property.c - Unified device property interface.
 *
 * Copyright (C) 2014, Intel Corporation
 * Authors: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 *          Mika Westerberg <mika.westerberg@linux.intel.com>
 */

#include <linux/device.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/kconfig.h>
#include <linux/of.h>
#include <linux/property.h>
#include <linux/phy.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>

struct fwnode_handle *__dev_fwnode(struct device *dev)
{
        return IS_ENABLED(CONFIG_OF) && dev->of_node ?
                of_fwnode_handle(dev->of_node) : dev->fwnode;
}
EXPORT_SYMBOL_GPL(__dev_fwnode);

const struct fwnode_handle *__dev_fwnode_const(const struct device *dev)
{
        return IS_ENABLED(CONFIG_OF) && dev->of_node ?
                of_fwnode_handle(dev->of_node) : dev->fwnode;
}
EXPORT_SYMBOL_GPL(__dev_fwnode_const);

/**
 * device_property_present - check if a property of a device is present
 * @dev: Device whose property is being checked
 * @propname: Name of the property
 *
 * Check if property @propname is present in the device firmware description.
 *
 * Return: true if property @propname is present. Otherwise, returns false.
 */
bool device_property_present(const struct device *dev, const char *propname)
{
        return fwnode_property_present(dev_fwnode(dev), propname);
}
EXPORT_SYMBOL_GPL(device_property_present);

/**
 * fwnode_property_present - check if a property of a firmware node is present
 * @fwnode: Firmware node whose property to check
 * @propname: Name of the property
 *
 * Return: true if property @propname is present. Otherwise, returns false.
 */
bool fwnode_property_present(const struct fwnode_handle *fwnode,
                             const char *propname)
{
        bool ret;

        if (IS_ERR_OR_NULL(fwnode))
                return false;

        ret = fwnode_call_bool_op(fwnode, property_present, propname);
        if (ret)
                return ret;

        return fwnode_call_bool_op(fwnode->secondary, property_present, propname);
}
EXPORT_SYMBOL_GPL(fwnode_property_present);

/**
 * device_property_read_u8_array - return a u8 array property of a device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Function reads an array of u8 properties with @propname from the device
 * firmware description and stores them to @val if found.
 *
 * It's recommended to call device_property_count_u8() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_u8_array(const struct device *dev, const char *propname,
                                  u8 *val, size_t nval)
{
        return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval);
}
EXPORT_SYMBOL_GPL(device_property_read_u8_array);

/**
 * device_property_read_u16_array - return a u16 array property of a device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Function reads an array of u16 properties with @propname from the device
 * firmware description and stores them to @val if found.
 *
 * It's recommended to call device_property_count_u16() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_u16_array(const struct device *dev, const char *propname,
                                   u16 *val, size_t nval)
{
        return fwnode_property_read_u16_array(dev_fwnode(dev), propname, val, nval);
}
EXPORT_SYMBOL_GPL(device_property_read_u16_array);

/**
 * device_property_read_u32_array - return a u32 array property of a device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Function reads an array of u32 properties with @propname from the device
 * firmware description and stores them to @val if found.
 *
 * It's recommended to call device_property_count_u32() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_u32_array(const struct device *dev, const char *propname,
                                   u32 *val, size_t nval)
{
        return fwnode_property_read_u32_array(dev_fwnode(dev), propname, val, nval);
}
EXPORT_SYMBOL_GPL(device_property_read_u32_array);

/**
 * device_property_read_u64_array - return a u64 array property of a device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Function reads an array of u64 properties with @propname from the device
 * firmware description and stores them to @val if found.
 *
 * It's recommended to call device_property_count_u64() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_u64_array(const struct device *dev, const char *propname,
                                   u64 *val, size_t nval)
{
        return fwnode_property_read_u64_array(dev_fwnode(dev), propname, val, nval);
}
EXPORT_SYMBOL_GPL(device_property_read_u64_array);

/**
 * device_property_read_string_array - return a string array property of device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Function reads an array of string properties with @propname from the device
 * firmware description and stores them to @val if found.
 *
 * It's recommended to call device_property_string_array_count() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values read on success if @val is non-NULL,
 *           number of values available on success if @val is NULL,
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO or %-EILSEQ if the property is not an array of strings,
 *           %-EOVERFLOW if the size of the property is not as expected.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_string_array(const struct device *dev, const char *propname,
                                      const char **val, size_t nval)
{
        return fwnode_property_read_string_array(dev_fwnode(dev), propname, val, nval);
}
EXPORT_SYMBOL_GPL(device_property_read_string_array);

/**
 * device_property_read_string - return a string property of a device
 * @dev: Device to get the property of
 * @propname: Name of the property
 * @val: The value is stored here
 *
 * Function reads property @propname from the device firmware description and
 * stores the value into @val if found. The value is checked to be a string.
 *
 * Return: %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO or %-EILSEQ if the property type is not a string.
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_read_string(const struct device *dev, const char *propname,
                                const char **val)
{
        return fwnode_property_read_string(dev_fwnode(dev), propname, val);
}
EXPORT_SYMBOL_GPL(device_property_read_string);

/**
 * device_property_match_string - find a string in an array and return index
 * @dev: Device to get the property of
 * @propname: Name of the property holding the array
 * @string: String to look for
 *
 * Find a given string in a string array and if it is found return the
 * index back.
 *
 * Return: index, starting from %0, if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of strings,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int device_property_match_string(const struct device *dev, const char *propname,
                                 const char *string)
{
        return fwnode_property_match_string(dev_fwnode(dev), propname, string);
}
EXPORT_SYMBOL_GPL(device_property_match_string);

static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode,
                                          const char *propname,
                                          unsigned int elem_size, void *val,
                                          size_t nval)
{
        int ret;

        if (IS_ERR_OR_NULL(fwnode))
                return -EINVAL;

        ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
                                 elem_size, val, nval);
        if (ret != -EINVAL)
                return ret;

        return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname,
                                  elem_size, val, nval);
}

/**
 * fwnode_property_read_u8_array - return a u8 array property of firmware node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Read an array of u8 properties with @propname from @fwnode and stores them to
 * @val if found.
 *
 * It's recommended to call fwnode_property_count_u8() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode,
                                  const char *propname, u8 *val, size_t nval)
{
        return fwnode_property_read_int_array(fwnode, propname, sizeof(u8),
                                              val, nval);
}
EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);

/**
 * fwnode_property_read_u16_array - return a u16 array property of firmware node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Read an array of u16 properties with @propname from @fwnode and store them to
 * @val if found.
 *
 * It's recommended to call fwnode_property_count_u16() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_u16_array(const struct fwnode_handle *fwnode,
                                   const char *propname, u16 *val, size_t nval)
{
        return fwnode_property_read_int_array(fwnode, propname, sizeof(u16),
                                              val, nval);
}
EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);

/**
 * fwnode_property_read_u32_array - return a u32 array property of firmware node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Read an array of u32 properties with @propname from @fwnode store them to
 * @val if found.
 *
 * It's recommended to call fwnode_property_count_u32() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_u32_array(const struct fwnode_handle *fwnode,
                                   const char *propname, u32 *val, size_t nval)
{
        return fwnode_property_read_int_array(fwnode, propname, sizeof(u32),
                                              val, nval);
}
EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);

/**
 * fwnode_property_read_u64_array - return a u64 array property firmware node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Read an array of u64 properties with @propname from @fwnode and store them to
 * @val if found.
 *
 * It's recommended to call fwnode_property_count_u64() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values if @val was %NULL,
 *         %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of numbers,
 *           %-EOVERFLOW if the size of the property is not as expected,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_u64_array(const struct fwnode_handle *fwnode,
                                   const char *propname, u64 *val, size_t nval)
{
        return fwnode_property_read_int_array(fwnode, propname, sizeof(u64),
                                              val, nval);
}
EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);

/**
 * fwnode_property_read_string_array - return string array property of a node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The values are stored here or %NULL to return the number of values
 * @nval: Size of the @val array
 *
 * Read an string list property @propname from the given firmware node and store
 * them to @val if found.
 *
 * It's recommended to call fwnode_property_string_array_count() instead of calling
 * this function with @val equals %NULL and @nval equals 0.
 *
 * Return: number of values read on success if @val is non-NULL,
 *           number of values available on success if @val is NULL,
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO or %-EILSEQ if the property is not an array of strings,
 *           %-EOVERFLOW if the size of the property is not as expected,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_string_array(const struct fwnode_handle *fwnode,
                                      const char *propname, const char **val,
                                      size_t nval)
{
        int ret;

        if (IS_ERR_OR_NULL(fwnode))
                return -EINVAL;

        ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
                                 val, nval);
        if (ret != -EINVAL)
                return ret;

        return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname,
                                  val, nval);
}
EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);

/**
 * fwnode_property_read_string - return a string property of a firmware node
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property
 * @val: The value is stored here
 *
 * Read property @propname from the given firmware node and store the value into
 * @val if found.  The value is checked to be a string.
 *
 * Return: %0 if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO or %-EILSEQ if the property is not a string,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_read_string(const struct fwnode_handle *fwnode,
                                const char *propname, const char **val)
{
        int ret = fwnode_property_read_string_array(fwnode, propname, val, 1);

        return ret < 0 ? ret : 0;
}
EXPORT_SYMBOL_GPL(fwnode_property_read_string);

/**
 * fwnode_property_match_string - find a string in an array and return index
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property holding the array
 * @string: String to look for
 *
 * Find a given string in a string array and if it is found return the
 * index back.
 *
 * Return: index, starting from %0, if the property was found (success),
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO if the property is not an array of strings,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_match_string(const struct fwnode_handle *fwnode,
        const char *propname, const char *string)
{
        const char **values;
        int nval, ret;

        nval = fwnode_property_string_array_count(fwnode, propname);
        if (nval < 0)
                return nval;

        if (nval == 0)
                return -ENODATA;

        values = kcalloc(nval, sizeof(*values), GFP_KERNEL);
        if (!values)
                return -ENOMEM;

        ret = fwnode_property_read_string_array(fwnode, propname, values, nval);
        if (ret < 0)
                goto out_free;

        ret = match_string(values, nval, string);
        if (ret < 0)
                ret = -ENODATA;

out_free:
        kfree(values);
        return ret;
}
EXPORT_SYMBOL_GPL(fwnode_property_match_string);

/**
 * fwnode_property_match_property_string - find a property string value in an array and return index
 * @fwnode: Firmware node to get the property of
 * @propname: Name of the property holding the string value
 * @array: String array to search in
 * @n: Size of the @array
 *
 * Find a property string value in a given @array and if it is found return
 * the index back.
 *
 * Return: index, starting from %0, if the string value was found in the @array (success),
 *           %-ENOENT when the string value was not found in the @array,
 *           %-EINVAL if given arguments are not valid,
 *           %-ENODATA if the property does not have a value,
 *           %-EPROTO or %-EILSEQ if the property is not a string,
 *           %-ENXIO if no suitable firmware interface is present.
 */
int fwnode_property_match_property_string(const struct fwnode_handle *fwnode,
        const char *propname, const char * const *array, size_t n)
{
        const char *string;
        int ret;

        ret = fwnode_property_read_string(fwnode, propname, &string);
        if (ret)
                return ret;

        ret = match_string(array, n, string);
        if (ret < 0)
                ret = -ENOENT;

        return ret;
}
EXPORT_SYMBOL_GPL(fwnode_property_match_property_string);

/**
 * fwnode_property_get_reference_args() - Find a reference with arguments
 * @fwnode:        Firmware node where to look for the reference
 * @prop:        The name of the property
 * @nargs_prop:        The name of the property telling the number of
 *                arguments in the referred node. NULL if @nargs is known,
 *                otherwise @nargs is ignored. Only relevant on OF.
 * @nargs:        Number of arguments. Ignored if @nargs_prop is non-NULL.
 * @index:        Index of the reference, from zero onwards.
 * @args:        Result structure with reference and integer arguments.
 *                May be NULL.
 *
 * Obtain a reference based on a named property in an fwnode, with
 * integer arguments.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * @args->fwnode pointer.
 *
 * Return: %0 on success
 *            %-ENOENT when the index is out of bounds, the index has an empty
 *                     reference or the property was not found
 *            %-EINVAL on parse error
 */
int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
                                       const char *prop, const char *nargs_prop,
                                       unsigned int nargs, unsigned int index,
                                       struct fwnode_reference_args *args)
{
        int ret;

        if (IS_ERR_OR_NULL(fwnode))
                return -ENOENT;

        ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop,
                                 nargs, index, args);
        if (ret == 0)
                return ret;

        if (IS_ERR_OR_NULL(fwnode->secondary))
                return ret;

        return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop,
                                  nargs, index, args);
}
EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args);

/**
 * fwnode_find_reference - Find named reference to a fwnode_handle
 * @fwnode: Firmware node where to look for the reference
 * @name: The name of the reference
 * @index: Index of the reference
 *
 * @index can be used when the named reference holds a table of references.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: a pointer to the reference fwnode, when found. Otherwise,
 * returns an error pointer.
 */
struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode,
                                            const char *name,
                                            unsigned int index)
{
        struct fwnode_reference_args args;
        int ret;

        ret = fwnode_property_get_reference_args(fwnode, name, NULL, 0, index,
                                                 &args);
        return ret ? ERR_PTR(ret) : args.fwnode;
}
EXPORT_SYMBOL_GPL(fwnode_find_reference);

/**
 * fwnode_get_name - Return the name of a node
 * @fwnode: The firmware node
 *
 * Return: a pointer to the node name, or %NULL.
 */
const char *fwnode_get_name(const struct fwnode_handle *fwnode)
{
        return fwnode_call_ptr_op(fwnode, get_name);
}
EXPORT_SYMBOL_GPL(fwnode_get_name);

/**
 * fwnode_get_name_prefix - Return the prefix of node for printing purposes
 * @fwnode: The firmware node
 *
 * Return: the prefix of a node, intended to be printed right before the node.
 * The prefix works also as a separator between the nodes.
 */
const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
{
        return fwnode_call_ptr_op(fwnode, get_name_prefix);
}

/**
 * fwnode_name_eq - Return true if node name is equal
 * @fwnode: The firmware node
 * @name: The name to which to compare the node name
 *
 * Compare the name provided as an argument to the name of the node, stopping
 * the comparison at either NUL or '@' character, whichever comes first. This
 * function is generally used for comparing node names while ignoring the
 * possible unit address of the node.
 *
 * Return: true if the node name matches with the name provided in the @name
 * argument, false otherwise.
 */
bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name)
{
        const char *node_name;
        ptrdiff_t len;

        node_name = fwnode_get_name(fwnode);
        if (!node_name)
                return false;

        len = strchrnul(node_name, '@') - node_name;

        return str_has_prefix(node_name, name) == len;
}
EXPORT_SYMBOL_GPL(fwnode_name_eq);

/**
 * fwnode_get_parent - Return parent firwmare node
 * @fwnode: Firmware whose parent is retrieved
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: parent firmware node of the given node if possible or %NULL if no
 * parent was available.
 */
struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode)
{
        return fwnode_call_ptr_op(fwnode, get_parent);
}
EXPORT_SYMBOL_GPL(fwnode_get_parent);

/**
 * fwnode_get_next_parent - Iterate to the node's parent
 * @fwnode: Firmware whose parent is retrieved
 *
 * This is like fwnode_get_parent() except that it drops the refcount
 * on the passed node, making it suitable for iterating through a
 * node's parents.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer. Note that this function also puts a reference to @fwnode
 * unconditionally.
 *
 * Return: parent firmware node of the given node if possible or %NULL if no
 * parent was available.
 */
struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent = fwnode_get_parent(fwnode);

        fwnode_handle_put(fwnode);

        return parent;
}
EXPORT_SYMBOL_GPL(fwnode_get_next_parent);

/**
 * fwnode_count_parents - Return the number of parents a node has
 * @fwnode: The node the parents of which are to be counted
 *
 * Return: the number of parents a node has.
 */
unsigned int fwnode_count_parents(const struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent;
        unsigned int count = 0;

        fwnode_for_each_parent_node(fwnode, parent)
                count++;

        return count;
}
EXPORT_SYMBOL_GPL(fwnode_count_parents);

/**
 * fwnode_get_nth_parent - Return an nth parent of a node
 * @fwnode: The node the parent of which is requested
 * @depth: Distance of the parent from the node
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: the nth parent of a node. If there is no parent at the requested
 * @depth, %NULL is returned. If @depth is 0, the functionality is equivalent to
 * fwnode_handle_get(). For @depth == 1, it is fwnode_get_parent() and so on.
 */
struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode,
                                            unsigned int depth)
{
        struct fwnode_handle *parent;

        if (depth == 0)
                return fwnode_handle_get(fwnode);

        fwnode_for_each_parent_node(fwnode, parent) {
                if (--depth == 0)
                        return parent;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(fwnode_get_nth_parent);

/**
 * fwnode_get_next_child_node - Return the next child node handle for a node
 * @fwnode: Firmware node to find the next child node for.
 * @child: Handle to one of the node's child nodes or a %NULL handle.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer. Note that this function also puts a reference to @child
 * unconditionally.
 */
struct fwnode_handle *
fwnode_get_next_child_node(const struct fwnode_handle *fwnode,
                           struct fwnode_handle *child)
{
        return fwnode_call_ptr_op(fwnode, get_next_child_node, child);
}
EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);

/**
 * fwnode_get_next_available_child_node - Return the next available child node handle for a node
 * @fwnode: Firmware node to find the next child node for.
 * @child: Handle to one of the node's child nodes or a %NULL handle.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer. Note that this function also puts a reference to @child
 * unconditionally.
 */
struct fwnode_handle *
fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode,
                                     struct fwnode_handle *child)
{
        struct fwnode_handle *next_child = child;

        if (IS_ERR_OR_NULL(fwnode))
                return NULL;

        do {
                next_child = fwnode_get_next_child_node(fwnode, next_child);
                if (!next_child)
                        return NULL;
        } while (!fwnode_device_is_available(next_child));

        return next_child;
}
EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);

/**
 * device_get_next_child_node - Return the next child node handle for a device
 * @dev: Device to find the next child node for.
 * @child: Handle to one of the device's child nodes or a %NULL handle.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer. Note that this function also puts a reference to @child
 * unconditionally.
 */
struct fwnode_handle *device_get_next_child_node(const struct device *dev,
                                                 struct fwnode_handle *child)
{
        const struct fwnode_handle *fwnode = dev_fwnode(dev);
        struct fwnode_handle *next;

        if (IS_ERR_OR_NULL(fwnode))
                return NULL;

        /* Try to find a child in primary fwnode */
        next = fwnode_get_next_child_node(fwnode, child);
        if (next)
                return next;

        /* When no more children in primary, continue with secondary */
        return fwnode_get_next_child_node(fwnode->secondary, child);
}
EXPORT_SYMBOL_GPL(device_get_next_child_node);

/**
 * fwnode_get_named_child_node - Return first matching named child node handle
 * @fwnode: Firmware node to find the named child node for.
 * @childname: String to match child node name against.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 */
struct fwnode_handle *
fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
                            const char *childname)
{
        return fwnode_call_ptr_op(fwnode, get_named_child_node, childname);
}
EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);

/**
 * device_get_named_child_node - Return first matching named child node handle
 * @dev: Device to find the named child node for.
 * @childname: String to match child node name against.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 */
struct fwnode_handle *device_get_named_child_node(const struct device *dev,
                                                  const char *childname)
{
        return fwnode_get_named_child_node(dev_fwnode(dev), childname);
}
EXPORT_SYMBOL_GPL(device_get_named_child_node);

/**
 * fwnode_handle_get - Obtain a reference to a device node
 * @fwnode: Pointer to the device node to obtain the reference to.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: the fwnode handle.
 */
struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode)
{
        if (!fwnode_has_op(fwnode, get))
                return fwnode;

        return fwnode_call_ptr_op(fwnode, get);
}
EXPORT_SYMBOL_GPL(fwnode_handle_get);

/**
 * fwnode_handle_put - Drop reference to a device node
 * @fwnode: Pointer to the device node to drop the reference to.
 *
 * This has to be used when terminating device_for_each_child_node() iteration
 * with break or return to prevent stale device node references from being left
 * behind.
 */
void fwnode_handle_put(struct fwnode_handle *fwnode)
{
        fwnode_call_void_op(fwnode, put);
}
EXPORT_SYMBOL_GPL(fwnode_handle_put);

/**
 * fwnode_device_is_available - check if a device is available for use
 * @fwnode: Pointer to the fwnode of the device.
 *
 * Return: true if device is available for use. Otherwise, returns false.
 *
 * For fwnode node types that don't implement the .device_is_available()
 * operation, this function returns true.
 */
bool fwnode_device_is_available(const struct fwnode_handle *fwnode)
{
        if (IS_ERR_OR_NULL(fwnode))
                return false;

        if (!fwnode_has_op(fwnode, device_is_available))
                return true;

        return fwnode_call_bool_op(fwnode, device_is_available);
}
EXPORT_SYMBOL_GPL(fwnode_device_is_available);

/**
 * device_get_child_node_count - return the number of child nodes for device
 * @dev: Device to cound the child nodes for
 *
 * Return: the number of child nodes for a given device.
 */
unsigned int device_get_child_node_count(const struct device *dev)
{
        struct fwnode_handle *child;
        unsigned int count = 0;

        device_for_each_child_node(dev, child)
                count++;

        return count;
}
EXPORT_SYMBOL_GPL(device_get_child_node_count);

bool device_dma_supported(const struct device *dev)
{
        return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported);
}
EXPORT_SYMBOL_GPL(device_dma_supported);

enum dev_dma_attr device_get_dma_attr(const struct device *dev)
{
        if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr))
                return DEV_DMA_NOT_SUPPORTED;

        return fwnode_call_int_op(dev_fwnode(dev), device_get_dma_attr);
}
EXPORT_SYMBOL_GPL(device_get_dma_attr);

/**
 * fwnode_get_phy_mode - Get phy mode for given firmware node
 * @fwnode:        Pointer to the given node
 *
 * The function gets phy interface string from property 'phy-mode' or
 * 'phy-connection-type', and return its index in phy_modes table, or errno in
 * error case.
 */
int fwnode_get_phy_mode(const struct fwnode_handle *fwnode)
{
        const char *pm;
        int err, i;

        err = fwnode_property_read_string(fwnode, "phy-mode", &pm);
        if (err < 0)
                err = fwnode_property_read_string(fwnode,
                                                  "phy-connection-type", &pm);
        if (err < 0)
                return err;

        for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
                if (!strcasecmp(pm, phy_modes(i)))
                        return i;

        return -ENODEV;
}
EXPORT_SYMBOL_GPL(fwnode_get_phy_mode);

/**
 * device_get_phy_mode - Get phy mode for given device
 * @dev:        Pointer to the given device
 *
 * The function gets phy interface string from property 'phy-mode' or
 * 'phy-connection-type', and return its index in phy_modes table, or errno in
 * error case.
 */
int device_get_phy_mode(struct device *dev)
{
        return fwnode_get_phy_mode(dev_fwnode(dev));
}
EXPORT_SYMBOL_GPL(device_get_phy_mode);

/**
 * fwnode_iomap - Maps the memory mapped IO for a given fwnode
 * @fwnode:        Pointer to the firmware node
 * @index:        Index of the IO range
 *
 * Return: a pointer to the mapped memory.
 */
void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index)
{
        return fwnode_call_ptr_op(fwnode, iomap, index);
}
EXPORT_SYMBOL(fwnode_iomap);

/**
 * fwnode_irq_get - Get IRQ directly from a fwnode
 * @fwnode:        Pointer to the firmware node
 * @index:        Zero-based index of the IRQ
 *
 * Return: Linux IRQ number on success. Negative errno on failure.
 */
int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index)
{
        int ret;

        ret = fwnode_call_int_op(fwnode, irq_get, index);
        /* We treat mapping errors as invalid case */
        if (ret == 0)
                return -EINVAL;

        return ret;
}
EXPORT_SYMBOL(fwnode_irq_get);

/**
 * fwnode_irq_get_byname - Get IRQ from a fwnode using its name
 * @fwnode:        Pointer to the firmware node
 * @name:        IRQ name
 *
 * Description:
 * Find a match to the string @name in the 'interrupt-names' string array
 * in _DSD for ACPI, or of_node for Device Tree. Then get the Linux IRQ
 * number of the IRQ resource corresponding to the index of the matched
 * string.
 *
 * Return: Linux IRQ number on success, or negative errno otherwise.
 */
int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name)
{
        int index;

        if (!name)
                return -EINVAL;

        index = fwnode_property_match_string(fwnode, "interrupt-names",  name);
        if (index < 0)
                return index;

        return fwnode_irq_get(fwnode, index);
}
EXPORT_SYMBOL(fwnode_irq_get_byname);

/**
 * fwnode_graph_get_next_endpoint - Get next endpoint firmware node
 * @fwnode: Pointer to the parent firmware node
 * @prev: Previous endpoint node or %NULL to get the first
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer. Note that this function also puts a reference to @prev
 * unconditionally.
 *
 * Return: an endpoint firmware node pointer or %NULL if no more endpoints
 * are available.
 */
struct fwnode_handle *
fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
                               struct fwnode_handle *prev)
{
        struct fwnode_handle *ep, *port_parent = NULL;
        const struct fwnode_handle *parent;

        /*
         * If this function is in a loop and the previous iteration returned
         * an endpoint from fwnode->secondary, then we need to use the secondary
         * as parent rather than @fwnode.
         */
        if (prev) {
                port_parent = fwnode_graph_get_port_parent(prev);
                parent = port_parent;
        } else {
                parent = fwnode;
        }
        if (IS_ERR_OR_NULL(parent))
                return NULL;

        ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev);
        if (ep)
                goto out_put_port_parent;

        ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL);

out_put_port_parent:
        fwnode_handle_put(port_parent);
        return ep;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);

/**
 * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint
 * @endpoint: Endpoint firmware node of the port
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: the firmware node of the device the @endpoint belongs to.
 */
struct fwnode_handle *
fwnode_graph_get_port_parent(const struct fwnode_handle *endpoint)
{
        struct fwnode_handle *port, *parent;

        port = fwnode_get_parent(endpoint);
        parent = fwnode_call_ptr_op(port, graph_get_port_parent);

        fwnode_handle_put(port);

        return parent;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent);

/**
 * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device
 * @fwnode: Endpoint firmware node pointing to the remote endpoint
 *
 * Extracts firmware node of a remote device the @fwnode points to.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 */
struct fwnode_handle *
fwnode_graph_get_remote_port_parent(const struct fwnode_handle *fwnode)
{
        struct fwnode_handle *endpoint, *parent;

        endpoint = fwnode_graph_get_remote_endpoint(fwnode);
        parent = fwnode_graph_get_port_parent(endpoint);

        fwnode_handle_put(endpoint);

        return parent;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent);

/**
 * fwnode_graph_get_remote_port - Return fwnode of a remote port
 * @fwnode: Endpoint firmware node pointing to the remote endpoint
 *
 * Extracts firmware node of a remote port the @fwnode points to.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 */
struct fwnode_handle *
fwnode_graph_get_remote_port(const struct fwnode_handle *fwnode)
{
        return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode));
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);

/**
 * fwnode_graph_get_remote_endpoint - Return fwnode of a remote endpoint
 * @fwnode: Endpoint firmware node pointing to the remote endpoint
 *
 * Extracts firmware node of a remote endpoint the @fwnode points to.
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 */
struct fwnode_handle *
fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
{
        return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint);
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);

static bool fwnode_graph_remote_available(struct fwnode_handle *ep)
{
        struct fwnode_handle *dev_node;
        bool available;

        dev_node = fwnode_graph_get_remote_port_parent(ep);
        available = fwnode_device_is_available(dev_node);
        fwnode_handle_put(dev_node);

        return available;
}

/**
 * fwnode_graph_get_endpoint_by_id - get endpoint by port and endpoint numbers
 * @fwnode: parent fwnode_handle containing the graph
 * @port: identifier of the port node
 * @endpoint: identifier of the endpoint node under the port node
 * @flags: fwnode lookup flags
 *
 * The caller is responsible for calling fwnode_handle_put() on the returned
 * fwnode pointer.
 *
 * Return: the fwnode handle of the local endpoint corresponding the port and
 * endpoint IDs or %NULL if not found.
 *
 * If FWNODE_GRAPH_ENDPOINT_NEXT is passed in @flags and the specified endpoint
 * has not been found, look for the closest endpoint ID greater than the
 * specified one and return the endpoint that corresponds to it, if present.
 *
 * Does not return endpoints that belong to disabled devices or endpoints that
 * are unconnected, unless FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags.
 */
struct fwnode_handle *
fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
                                u32 port, u32 endpoint, unsigned long flags)
{
        struct fwnode_handle *ep, *best_ep = NULL;
        unsigned int best_ep_id = 0;
        bool endpoint_next = flags & FWNODE_GRAPH_ENDPOINT_NEXT;
        bool enabled_only = !(flags & FWNODE_GRAPH_DEVICE_DISABLED);

        fwnode_graph_for_each_endpoint(fwnode, ep) {
                struct fwnode_endpoint fwnode_ep = { 0 };
                int ret;

                if (enabled_only && !fwnode_graph_remote_available(ep))
                        continue;

                ret = fwnode_graph_parse_endpoint(ep, &fwnode_ep);
                if (ret < 0)
                        continue;

                if (fwnode_ep.port != port)
                        continue;

                if (fwnode_ep.id == endpoint)
                        return ep;

                if (!endpoint_next)
                        continue;

                /*
                 * If the endpoint that has just been found is not the first
                 * matching one and the ID of the one found previously is closer
                 * to the requested endpoint ID, skip it.
                 */
                if (fwnode_ep.id < endpoint ||
                    (best_ep && best_ep_id < fwnode_ep.id))
                        continue;

                fwnode_handle_put(best_ep);
                best_ep = fwnode_handle_get(ep);
                best_ep_id = fwnode_ep.id;
        }

        return best_ep;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id);

/**
 * fwnode_graph_get_endpoint_count - Count endpoints on a device node
 * @fwnode: The node related to a device
 * @flags: fwnode lookup flags
 * Count endpoints in a device node.
 *
 * If FWNODE_GRAPH_DEVICE_DISABLED flag is specified, also unconnected endpoints
 * and endpoints connected to disabled devices are counted.
 */
unsigned int fwnode_graph_get_endpoint_count(const struct fwnode_handle *fwnode,
                                             unsigned long flags)
{
        struct fwnode_handle *ep;
        unsigned int count = 0;

        fwnode_graph_for_each_endpoint(fwnode, ep) {
                if (flags & FWNODE_GRAPH_DEVICE_DISABLED ||
                    fwnode_graph_remote_available(ep))
                        count++;
        }

        return count;
}
EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_count);

/**
 * fwnode_graph_parse_endpoint - parse common endpoint node properties
 * @fwnode: pointer to endpoint fwnode_handle
 * @endpoint: pointer to the fwnode endpoint data structure
 *
 * Parse @fwnode representing a graph endpoint node and store the
 * information in @endpoint. The caller must hold a reference to
 * @fwnode.
 */
int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
                                struct fwnode_endpoint *endpoint)
{
        memset(endpoint, 0, sizeof(*endpoint));

        return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint);
}
EXPORT_SYMBOL(fwnode_graph_parse_endpoint);

const void *device_get_match_data(const struct device *dev)
{
        return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev);
}
EXPORT_SYMBOL_GPL(device_get_match_data);

static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode,
                                                const char *con_id, void *data,
                                                devcon_match_fn_t match,
                                                void **matches,
                                                unsigned int matches_len)
{
        struct fwnode_handle *node;
        struct fwnode_handle *ep;
        unsigned int count = 0;
        void *ret;

        fwnode_graph_for_each_endpoint(fwnode, ep) {
                if (matches && count >= matches_len) {
                        fwnode_handle_put(ep);
                        break;
                }

                node = fwnode_graph_get_remote_port_parent(ep);
                if (!fwnode_device_is_available(node)) {
                        fwnode_handle_put(node);
                        continue;
                }

                ret = match(node, con_id, data);
                fwnode_handle_put(node);
                if (ret) {
                        if (matches)
                                matches[count] = ret;
                        count++;
                }
        }
        return count;
}

static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode,
                                          const char *con_id, void *data,
                                          devcon_match_fn_t match,
                                          void **matches,
                                          unsigned int matches_len)
{
        struct fwnode_handle *node;
        unsigned int count = 0;
        unsigned int i;
        void *ret;

        for (i = 0; ; i++) {
                if (matches && count >= matches_len)
                        break;

                node = fwnode_find_reference(fwnode, con_id, i);
                if (IS_ERR(node))
                        break;

                ret = match(node, NULL, data);
                fwnode_handle_put(node);
                if (ret) {
                        if (matches)
                                matches[count] = ret;
                        count++;
                }
        }

        return count;
}

/**
 * fwnode_connection_find_match - Find connection from a device node
 * @fwnode: Device node with the connection
 * @con_id: Identifier for the connection
 * @data: Data for the match function
 * @match: Function to check and convert the connection description
 *
 * Find a connection with unique identifier @con_id between @fwnode and another
 * device node. @match will be used to convert the connection description to
 * data the caller is expecting to be returned.
 */
void *fwnode_connection_find_match(const struct fwnode_handle *fwnode,
                                   const char *con_id, void *data,
                                   devcon_match_fn_t match)
{
        unsigned int count;
        void *ret;

        if (!fwnode || !match)
                return NULL;

        count = fwnode_graph_devcon_matches(fwnode, con_id, data, match, &ret, 1);
        if (count)
                return ret;

        count = fwnode_devcon_matches(fwnode, con_id, data, match, &ret, 1);
        return count ? ret : NULL;
}
EXPORT_SYMBOL_GPL(fwnode_connection_find_match);

/**
 * fwnode_connection_find_matches - Find connections from a device node
 * @fwnode: Device node with the connection
 * @con_id: Identifier for the connection
 * @data: Data for the match function
 * @match: Function to check and convert the connection description
 * @matches: (Optional) array of pointers to fill with matches
 * @matches_len: Length of @matches
 *
 * Find up to @matches_len connections with unique identifier @con_id between
 * @fwnode and other device nodes. @match will be used to convert the
 * connection description to data the caller is expecting to be returned
 * through the @matches array.
 *
 * If @matches is %NULL @matches_len is ignored and the total number of resolved
 * matches is returned.
 *
 * Return: Number of matches resolved, or negative errno.
 */
int fwnode_connection_find_matches(const struct fwnode_handle *fwnode,
                                   const char *con_id, void *data,
                                   devcon_match_fn_t match,
                                   void **matches, unsigned int matches_len)
{
        unsigned int count_graph;
        unsigned int count_ref;

        if (!fwnode || !match)
                return -EINVAL;

        count_graph = fwnode_graph_devcon_matches(fwnode, con_id, data, match,
                                                  matches, matches_len);

        if (matches) {
                matches += count_graph;
                matches_len -= count_graph;
        }

        count_ref = fwnode_devcon_matches(fwnode, con_id, data, match,
                                          matches, matches_len);

        return count_graph + count_ref;
}
EXPORT_SYMBOL_GPL(fwnode_connection_find_matches);





































































  274 




  242 







  244 





  273 














  234 





  168 

























  176 









   24 



















  109 







































  168 







  274 




  274 


























  113 







  113 
























  110 

  109 






  110 






















  102 





















  167 

  168 


















  167 





































  274 




  274 
  274 











































































    9 




    9 







    6 
































































  244 
  242 

    9 



  101 

   90 






   24 

   24 



  102 





  243 

















































































































































































  273 
  274 
  272 


  273 


  272 
  272 



  273 



  273 
  234 
  274 
  274 









































  274 






  234 
  244 



    1 























  168 



  168 



















  167 

  168 








  166 




  168 







































  168 







  234 

  234 




   25 
















































  168 
  166 

  168 



   56 
























































  168 























    1 
    1 

    1 
    1 


    1 


    1 
    1 




















  168 





































  241 















  241 
  240 



  242 


  242 























































  241 

































































































































  234 

  234 



  234 




  234 
  234 




































  233 






  234 



  234 


  233 

  234 

























  168 












  167 









  167 




  168 
  168 

  168 


  168 






  167 
  168 

  166 
  168 


  113 



  109 


  110 
  168 


  168 
  167 















  168 


  167 

  168 



  168 

















    4 


    4 









    4 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2001 Momchil Velikov
 * Portions Copyright (C) 2001 Christoph Hellwig
 * Copyright (C) 2005 SGI, Christoph Lameter
 * Copyright (C) 2006 Nick Piggin
 * Copyright (C) 2012 Konstantin Khlebnikov
 * Copyright (C) 2016 Intel, Matthew Wilcox
 * Copyright (C) 2016 Intel, Ross Zwisler
 */

#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kmemleak.h>
#include <linux/percpu.h>
#include <linux/preempt.h>                /* in_interrupt() */
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/xarray.h>

#include "radix-tree.h"

/*
 * Radix tree node cache.
 */
struct kmem_cache *radix_tree_node_cachep;

/*
 * The radix tree is variable-height, so an insert operation not only has
 * to build the branch to its corresponding item, it also has to build the
 * branch to existing items if the size has to be increased (by
 * radix_tree_extend).
 *
 * The worst case is a zero height tree with just a single item at index 0,
 * and then inserting an item at index ULONG_MAX. This requires 2 new branches
 * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
 * Hence:
 */
#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)

/*
 * The IDR does not have to be as high as the radix tree since it uses
 * signed integers, not unsigned longs.
 */
#define IDR_INDEX_BITS                (8 /* CHAR_BIT */ * sizeof(int) - 1)
#define IDR_MAX_PATH                (DIV_ROUND_UP(IDR_INDEX_BITS, \
                                                RADIX_TREE_MAP_SHIFT))
#define IDR_PRELOAD_SIZE        (IDR_MAX_PATH * 2 - 1)

/*
 * Per-cpu pool of preloaded nodes
 */
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
        .lock = INIT_LOCAL_LOCK(lock),
};
EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);

static inline struct radix_tree_node *entry_to_node(void *ptr)
{
        return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
}

static inline void *node_to_entry(void *ptr)
{
        return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
}

#define RADIX_TREE_RETRY        XA_RETRY_ENTRY

static inline unsigned long
get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
{
        return parent ? slot - parent->slots : 0;
}

static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
                        struct radix_tree_node **nodep, unsigned long index)
{
        unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
        void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);

        *nodep = (void *)entry;
        return offset;
}

static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
{
        return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
}

static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
                int offset)
{
        __set_bit(offset, node->tags[tag]);
}

static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
                int offset)
{
        __clear_bit(offset, node->tags[tag]);
}

static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
                int offset)
{
        return test_bit(offset, node->tags[tag]);
}

static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
{
        root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
}

static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
{
        root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
}

static inline void root_tag_clear_all(struct radix_tree_root *root)
{
        root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
}

static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
{
        return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
}

static inline unsigned root_tags_get(const struct radix_tree_root *root)
{
        return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
}

static inline bool is_idr(const struct radix_tree_root *root)
{
        return !!(root->xa_flags & ROOT_IS_IDR);
}

/*
 * Returns 1 if any slot in the node has this tag set.
 * Otherwise returns 0.
 */
static inline int any_tag_set(const struct radix_tree_node *node,
                                                        unsigned int tag)
{
        unsigned idx;
        for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
                if (node->tags[tag][idx])
                        return 1;
        }
        return 0;
}

static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
{
        bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
}

/**
 * radix_tree_find_next_bit - find the next set bit in a memory region
 *
 * @node: where to begin the search
 * @tag: the tag index
 * @offset: the bitnumber to start searching at
 *
 * Unrollable variant of find_next_bit() for constant size arrays.
 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
 * Returns next bit offset, or size if nothing found.
 */
static __always_inline unsigned long
radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
                         unsigned long offset)
{
        const unsigned long *addr = node->tags[tag];

        if (offset < RADIX_TREE_MAP_SIZE) {
                unsigned long tmp;

                addr += offset / BITS_PER_LONG;
                tmp = *addr >> (offset % BITS_PER_LONG);
                if (tmp)
                        return __ffs(tmp) + offset;
                offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
                while (offset < RADIX_TREE_MAP_SIZE) {
                        tmp = *++addr;
                        if (tmp)
                                return __ffs(tmp) + offset;
                        offset += BITS_PER_LONG;
                }
        }
        return RADIX_TREE_MAP_SIZE;
}

static unsigned int iter_offset(const struct radix_tree_iter *iter)
{
        return iter->index & RADIX_TREE_MAP_MASK;
}

/*
 * The maximum index which can be stored in a radix tree
 */
static inline unsigned long shift_maxindex(unsigned int shift)
{
        return (RADIX_TREE_MAP_SIZE << shift) - 1;
}

static inline unsigned long node_maxindex(const struct radix_tree_node *node)
{
        return shift_maxindex(node->shift);
}

static unsigned long next_index(unsigned long index,
                                const struct radix_tree_node *node,
                                unsigned long offset)
{
        return (index & ~node_maxindex(node)) + (offset << node->shift);
}

/*
 * This assumes that the caller has performed appropriate preallocation, and
 * that the caller has pinned this thread of control to the current CPU.
 */
static struct radix_tree_node *
radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
                        struct radix_tree_root *root,
                        unsigned int shift, unsigned int offset,
                        unsigned int count, unsigned int nr_values)
{
        struct radix_tree_node *ret = NULL;

        /*
         * Preload code isn't irq safe and it doesn't make sense to use
         * preloading during an interrupt anyway as all the allocations have
         * to be atomic. So just do normal allocation when in interrupt.
         */
        if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
                struct radix_tree_preload *rtp;

                /*
                 * Even if the caller has preloaded, try to allocate from the
                 * cache first for the new node to get accounted to the memory
                 * cgroup.
                 */
                ret = kmem_cache_alloc(radix_tree_node_cachep,
                                       gfp_mask | __GFP_NOWARN);
                if (ret)
                        goto out;

                /*
                 * Provided the caller has preloaded here, we will always
                 * succeed in getting a node here (and never reach
                 * kmem_cache_alloc)
                 */
                rtp = this_cpu_ptr(&radix_tree_preloads);
                if (rtp->nr) {
                        ret = rtp->nodes;
                        rtp->nodes = ret->parent;
                        rtp->nr--;
                }
                /*
                 * Update the allocation stack trace as this is more useful
                 * for debugging.
                 */
                kmemleak_update_trace(ret);
                goto out;
        }
        ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
out:
        BUG_ON(radix_tree_is_internal_node(ret));
        if (ret) {
                ret->shift = shift;
                ret->offset = offset;
                ret->count = count;
                ret->nr_values = nr_values;
                ret->parent = parent;
                ret->array = root;
        }
        return ret;
}

void radix_tree_node_rcu_free(struct rcu_head *head)
{
        struct radix_tree_node *node =
                        container_of(head, struct radix_tree_node, rcu_head);

        /*
         * Must only free zeroed nodes into the slab.  We can be left with
         * non-NULL entries by radix_tree_free_nodes, so clear the entries
         * and tags here.
         */
        memset(node->slots, 0, sizeof(node->slots));
        memset(node->tags, 0, sizeof(node->tags));
        INIT_LIST_HEAD(&node->private_list);

        kmem_cache_free(radix_tree_node_cachep, node);
}

static inline void
radix_tree_node_free(struct radix_tree_node *node)
{
        call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
}

/*
 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 * ensure that the addition of a single element in the tree cannot fail.  On
 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 * with preemption not disabled.
 *
 * To make use of this facility, the radix tree must be initialised without
 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
 */
static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
{
        struct radix_tree_preload *rtp;
        struct radix_tree_node *node;
        int ret = -ENOMEM;

        /*
         * Nodes preloaded by one cgroup can be used by another cgroup, so
         * they should never be accounted to any particular memory cgroup.
         */
        gfp_mask &= ~__GFP_ACCOUNT;

        local_lock(&radix_tree_preloads.lock);
        rtp = this_cpu_ptr(&radix_tree_preloads);
        while (rtp->nr < nr) {
                local_unlock(&radix_tree_preloads.lock);
                node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
                if (node == NULL)
                        goto out;
                local_lock(&radix_tree_preloads.lock);
                rtp = this_cpu_ptr(&radix_tree_preloads);
                if (rtp->nr < nr) {
                        node->parent = rtp->nodes;
                        rtp->nodes = node;
                        rtp->nr++;
                } else {
                        kmem_cache_free(radix_tree_node_cachep, node);
                }
        }
        ret = 0;
out:
        return ret;
}

/*
 * Load up this CPU's radix_tree_node buffer with sufficient objects to
 * ensure that the addition of a single element in the tree cannot fail.  On
 * success, return zero, with preemption disabled.  On error, return -ENOMEM
 * with preemption not disabled.
 *
 * To make use of this facility, the radix tree must be initialised without
 * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
 */
int radix_tree_preload(gfp_t gfp_mask)
{
        /* Warn on non-sensical use... */
        WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
        return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
}
EXPORT_SYMBOL(radix_tree_preload);

/*
 * The same as above function, except we don't guarantee preloading happens.
 * We do it, if we decide it helps. On success, return zero with preemption
 * disabled. On error, return -ENOMEM with preemption not disabled.
 */
int radix_tree_maybe_preload(gfp_t gfp_mask)
{
        if (gfpflags_allow_blocking(gfp_mask))
                return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
        /* Preloading doesn't help anything with this gfp mask, skip it */
        local_lock(&radix_tree_preloads.lock);
        return 0;
}
EXPORT_SYMBOL(radix_tree_maybe_preload);

static unsigned radix_tree_load_root(const struct radix_tree_root *root,
                struct radix_tree_node **nodep, unsigned long *maxindex)
{
        struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);

        *nodep = node;

        if (likely(radix_tree_is_internal_node(node))) {
                node = entry_to_node(node);
                *maxindex = node_maxindex(node);
                return node->shift + RADIX_TREE_MAP_SHIFT;
        }

        *maxindex = 0;
        return 0;
}

/*
 *        Extend a radix tree so it can store key @index.
 */
static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
                                unsigned long index, unsigned int shift)
{
        void *entry;
        unsigned int maxshift;
        int tag;

        /* Figure out what the shift should be.  */
        maxshift = shift;
        while (index > shift_maxindex(maxshift))
                maxshift += RADIX_TREE_MAP_SHIFT;

        entry = rcu_dereference_raw(root->xa_head);
        if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
                goto out;

        do {
                struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
                                                        root, shift, 0, 1, 0);
                if (!node)
                        return -ENOMEM;

                if (is_idr(root)) {
                        all_tag_set(node, IDR_FREE);
                        if (!root_tag_get(root, IDR_FREE)) {
                                tag_clear(node, IDR_FREE, 0);
                                root_tag_set(root, IDR_FREE);
                        }
                } else {
                        /* Propagate the aggregated tag info to the new child */
                        for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
                                if (root_tag_get(root, tag))
                                        tag_set(node, tag, 0);
                        }
                }

                BUG_ON(shift > BITS_PER_LONG);
                if (radix_tree_is_internal_node(entry)) {
                        entry_to_node(entry)->parent = node;
                } else if (xa_is_value(entry)) {
                        /* Moving a value entry root->xa_head to a node */
                        node->nr_values = 1;
                }
                /*
                 * entry was already in the radix tree, so we do not need
                 * rcu_assign_pointer here
                 */
                node->slots[0] = (void __rcu *)entry;
                entry = node_to_entry(node);
                rcu_assign_pointer(root->xa_head, entry);
                shift += RADIX_TREE_MAP_SHIFT;
        } while (shift <= maxshift);
out:
        return maxshift + RADIX_TREE_MAP_SHIFT;
}

/**
 *        radix_tree_shrink    -    shrink radix tree to minimum height
 *        @root:                radix tree root
 */
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
        bool shrunk = false;

        for (;;) {
                struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
                struct radix_tree_node *child;

                if (!radix_tree_is_internal_node(node))
                        break;
                node = entry_to_node(node);

                /*
                 * The candidate node has more than one child, or its child
                 * is not at the leftmost slot, we cannot shrink.
                 */
                if (node->count != 1)
                        break;
                child = rcu_dereference_raw(node->slots[0]);
                if (!child)
                        break;

                /*
                 * For an IDR, we must not shrink entry 0 into the root in
                 * case somebody calls idr_replace() with a pointer that
                 * appears to be an internal entry
                 */
                if (!node->shift && is_idr(root))
                        break;

                if (radix_tree_is_internal_node(child))
                        entry_to_node(child)->parent = NULL;

                /*
                 * We don't need rcu_assign_pointer(), since we are simply
                 * moving the node from one part of the tree to another: if it
                 * was safe to dereference the old pointer to it
                 * (node->slots[0]), it will be safe to dereference the new
                 * one (root->xa_head) as far as dependent read barriers go.
                 */
                root->xa_head = (void __rcu *)child;
                if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
                        root_tag_clear(root, IDR_FREE);

                /*
                 * We have a dilemma here. The node's slot[0] must not be
                 * NULLed in case there are concurrent lookups expecting to
                 * find the item. However if this was a bottom-level node,
                 * then it may be subject to the slot pointer being visible
                 * to callers dereferencing it. If item corresponding to
                 * slot[0] is subsequently deleted, these callers would expect
                 * their slot to become empty sooner or later.
                 *
                 * For example, lockless pagecache will look up a slot, deref
                 * the page pointer, and if the page has 0 refcount it means it
                 * was concurrently deleted from pagecache so try the deref
                 * again. Fortunately there is already a requirement for logic
                 * to retry the entire slot lookup -- the indirect pointer
                 * problem (replacing direct root node with an indirect pointer
                 * also results in a stale slot). So tag the slot as indirect
                 * to force callers to retry.
                 */
                node->count = 0;
                if (!radix_tree_is_internal_node(child)) {
                        node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
                }

                WARN_ON_ONCE(!list_empty(&node->private_list));
                radix_tree_node_free(node);
                shrunk = true;
        }

        return shrunk;
}

static bool delete_node(struct radix_tree_root *root,
                        struct radix_tree_node *node)
{
        bool deleted = false;

        do {
                struct radix_tree_node *parent;

                if (node->count) {
                        if (node_to_entry(node) ==
                                        rcu_dereference_raw(root->xa_head))
                                deleted |= radix_tree_shrink(root);
                        return deleted;
                }

                parent = node->parent;
                if (parent) {
                        parent->slots[node->offset] = NULL;
                        parent->count--;
                } else {
                        /*
                         * Shouldn't the tags already have all been cleared
                         * by the caller?
                         */
                        if (!is_idr(root))
                                root_tag_clear_all(root);
                        root->xa_head = NULL;
                }

                WARN_ON_ONCE(!list_empty(&node->private_list));
                radix_tree_node_free(node);
                deleted = true;

                node = parent;
        } while (node);

        return deleted;
}

/**
 *        __radix_tree_create        -        create a slot in a radix tree
 *        @root:                radix tree root
 *        @index:                index key
 *        @nodep:                returns node
 *        @slotp:                returns slot
 *
 *        Create, if necessary, and return the node and slot for an item
 *        at position @index in the radix tree @root.
 *
 *        Until there is more than one item in the tree, no nodes are
 *        allocated and @root->xa_head is used as a direct slot instead of
 *        pointing to a node, in which case *@nodep will be NULL.
 *
 *        Returns -ENOMEM, or 0 for success.
 */
static int __radix_tree_create(struct radix_tree_root *root,
                unsigned long index, struct radix_tree_node **nodep,
                void __rcu ***slotp)
{
        struct radix_tree_node *node = NULL, *child;
        void __rcu **slot = (void __rcu **)&root->xa_head;
        unsigned long maxindex;
        unsigned int shift, offset = 0;
        unsigned long max = index;
        gfp_t gfp = root_gfp_mask(root);

        shift = radix_tree_load_root(root, &child, &maxindex);

        /* Make sure the tree is high enough.  */
        if (max > maxindex) {
                int error = radix_tree_extend(root, gfp, max, shift);
                if (error < 0)
                        return error;
                shift = error;
                child = rcu_dereference_raw(root->xa_head);
        }

        while (shift > 0) {
                shift -= RADIX_TREE_MAP_SHIFT;
                if (child == NULL) {
                        /* Have to add a child node.  */
                        child = radix_tree_node_alloc(gfp, node, root, shift,
                                                        offset, 0, 0);
                        if (!child)
                                return -ENOMEM;
                        rcu_assign_pointer(*slot, node_to_entry(child));
                        if (node)
                                node->count++;
                } else if (!radix_tree_is_internal_node(child))
                        break;

                /* Go a level down */
                node = entry_to_node(child);
                offset = radix_tree_descend(node, &child, index);
                slot = &node->slots[offset];
        }

        if (nodep)
                *nodep = node;
        if (slotp)
                *slotp = slot;
        return 0;
}

/*
 * Free any nodes below this node.  The tree is presumed to not need
 * shrinking, and any user data in the tree is presumed to not need a
 * destructor called on it.  If we need to add a destructor, we can
 * add that functionality later.  Note that we may not clear tags or
 * slots from the tree as an RCU walker may still have a pointer into
 * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
 * but we'll still have to clear those in rcu_free.
 */
static void radix_tree_free_nodes(struct radix_tree_node *node)
{
        unsigned offset = 0;
        struct radix_tree_node *child = entry_to_node(node);

        for (;;) {
                void *entry = rcu_dereference_raw(child->slots[offset]);
                if (xa_is_node(entry) && child->shift) {
                        child = entry_to_node(entry);
                        offset = 0;
                        continue;
                }
                offset++;
                while (offset == RADIX_TREE_MAP_SIZE) {
                        struct radix_tree_node *old = child;
                        offset = child->offset + 1;
                        child = child->parent;
                        WARN_ON_ONCE(!list_empty(&old->private_list));
                        radix_tree_node_free(old);
                        if (old == entry_to_node(node))
                                return;
                }
        }
}

static inline int insert_entries(struct radix_tree_node *node,
                void __rcu **slot, void *item)
{
        if (*slot)
                return -EEXIST;
        rcu_assign_pointer(*slot, item);
        if (node) {
                node->count++;
                if (xa_is_value(item))
                        node->nr_values++;
        }
        return 1;
}

/**
 *        radix_tree_insert    -    insert into a radix tree
 *        @root:                radix tree root
 *        @index:                index key
 *        @item:                item to insert
 *
 *        Insert an item into the radix tree at position @index.
 */
int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
                        void *item)
{
        struct radix_tree_node *node;
        void __rcu **slot;
        int error;

        BUG_ON(radix_tree_is_internal_node(item));

        error = __radix_tree_create(root, index, &node, &slot);
        if (error)
                return error;

        error = insert_entries(node, slot, item);
        if (error < 0)
                return error;

        if (node) {
                unsigned offset = get_slot_offset(node, slot);
                BUG_ON(tag_get(node, 0, offset));
                BUG_ON(tag_get(node, 1, offset));
                BUG_ON(tag_get(node, 2, offset));
        } else {
                BUG_ON(root_tags_get(root));
        }

        return 0;
}
EXPORT_SYMBOL(radix_tree_insert);

/**
 *        __radix_tree_lookup        -        lookup an item in a radix tree
 *        @root:                radix tree root
 *        @index:                index key
 *        @nodep:                returns node
 *        @slotp:                returns slot
 *
 *        Lookup and return the item at position @index in the radix
 *        tree @root.
 *
 *        Until there is more than one item in the tree, no nodes are
 *        allocated and @root->xa_head is used as a direct slot instead of
 *        pointing to a node, in which case *@nodep will be NULL.
 */
void *__radix_tree_lookup(const struct radix_tree_root *root,
                          unsigned long index, struct radix_tree_node **nodep,
                          void __rcu ***slotp)
{
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
        void __rcu **slot;

 restart:
        parent = NULL;
        slot = (void __rcu **)&root->xa_head;
        radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return NULL;

        while (radix_tree_is_internal_node(node)) {
                unsigned offset;

                parent = entry_to_node(node);
                offset = radix_tree_descend(parent, &node, index);
                slot = parent->slots + offset;
                if (node == RADIX_TREE_RETRY)
                        goto restart;
                if (parent->shift == 0)
                        break;
        }

        if (nodep)
                *nodep = parent;
        if (slotp)
                *slotp = slot;
        return node;
}

/**
 *        radix_tree_lookup_slot    -    lookup a slot in a radix tree
 *        @root:                radix tree root
 *        @index:                index key
 *
 *        Returns:  the slot corresponding to the position @index in the
 *        radix tree @root. This is useful for update-if-exists operations.
 *
 *        This function can be called under rcu_read_lock iff the slot is not
 *        modified by radix_tree_replace_slot, otherwise it must be called
 *        exclusive from other writers. Any dereference of the slot must be done
 *        using radix_tree_deref_slot.
 */
void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
                                unsigned long index)
{
        void __rcu **slot;

        if (!__radix_tree_lookup(root, index, NULL, &slot))
                return NULL;
        return slot;
}
EXPORT_SYMBOL(radix_tree_lookup_slot);

/**
 *        radix_tree_lookup    -    perform lookup operation on a radix tree
 *        @root:                radix tree root
 *        @index:                index key
 *
 *        Lookup the item at the position @index in the radix tree @root.
 *
 *        This function can be called under rcu_read_lock, however the caller
 *        must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 *        them safely). No RCU barriers are required to access or modify the
 *        returned item, however.
 */
void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
{
        return __radix_tree_lookup(root, index, NULL, NULL);
}
EXPORT_SYMBOL(radix_tree_lookup);

static void replace_slot(void __rcu **slot, void *item,
                struct radix_tree_node *node, int count, int values)
{
        if (node && (count || values)) {
                node->count += count;
                node->nr_values += values;
        }

        rcu_assign_pointer(*slot, item);
}

static bool node_tag_get(const struct radix_tree_root *root,
                                const struct radix_tree_node *node,
                                unsigned int tag, unsigned int offset)
{
        if (node)
                return tag_get(node, tag, offset);
        return root_tag_get(root, tag);
}

/*
 * IDR users want to be able to store NULL in the tree, so if the slot isn't
 * free, don't adjust the count, even if it's transitioning between NULL and
 * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
 * have empty bits, but it only stores NULL in slots when they're being
 * deleted.
 */
static int calculate_count(struct radix_tree_root *root,
                                struct radix_tree_node *node, void __rcu **slot,
                                void *item, void *old)
{
        if (is_idr(root)) {
                unsigned offset = get_slot_offset(node, slot);
                bool free = node_tag_get(root, node, IDR_FREE, offset);
                if (!free)
                        return 0;
                if (!old)
                        return 1;
        }
        return !!item - !!old;
}

/**
 * __radix_tree_replace                - replace item in a slot
 * @root:                radix tree root
 * @node:                pointer to tree node
 * @slot:                pointer to slot in @node
 * @item:                new item to store in the slot.
 *
 * For use with __radix_tree_lookup().  Caller must hold tree write locked
 * across slot lookup and replacement.
 */
void __radix_tree_replace(struct radix_tree_root *root,
                          struct radix_tree_node *node,
                          void __rcu **slot, void *item)
{
        void *old = rcu_dereference_raw(*slot);
        int values = !!xa_is_value(item) - !!xa_is_value(old);
        int count = calculate_count(root, node, slot, item, old);

        /*
         * This function supports replacing value entries and
         * deleting entries, but that needs accounting against the
         * node unless the slot is root->xa_head.
         */
        WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
                        (count || values));
        replace_slot(slot, item, node, count, values);

        if (!node)
                return;

        delete_node(root, node);
}

/**
 * radix_tree_replace_slot        - replace item in a slot
 * @root:        radix tree root
 * @slot:        pointer to slot
 * @item:        new item to store in the slot.
 *
 * For use with radix_tree_lookup_slot() and
 * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
 * across slot lookup and replacement.
 *
 * NOTE: This cannot be used to switch between non-entries (empty slots),
 * regular entries, and value entries, as that requires accounting
 * inside the radix tree node. When switching from one type of entry or
 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
 * radix_tree_iter_replace().
 */
void radix_tree_replace_slot(struct radix_tree_root *root,
                             void __rcu **slot, void *item)
{
        __radix_tree_replace(root, NULL, slot, item);
}
EXPORT_SYMBOL(radix_tree_replace_slot);

/**
 * radix_tree_iter_replace - replace item in a slot
 * @root:        radix tree root
 * @iter:        iterator state
 * @slot:        pointer to slot
 * @item:        new item to store in the slot.
 *
 * For use with radix_tree_for_each_slot().
 * Caller must hold tree write locked.
 */
void radix_tree_iter_replace(struct radix_tree_root *root,
                                const struct radix_tree_iter *iter,
                                void __rcu **slot, void *item)
{
        __radix_tree_replace(root, iter->node, slot, item);
}

static void node_tag_set(struct radix_tree_root *root,
                                struct radix_tree_node *node,
                                unsigned int tag, unsigned int offset)
{
        while (node) {
                if (tag_get(node, tag, offset))
                        return;
                tag_set(node, tag, offset);
                offset = node->offset;
                node = node->parent;
        }

        if (!root_tag_get(root, tag))
                root_tag_set(root, tag);
}

/**
 *        radix_tree_tag_set - set a tag on a radix tree node
 *        @root:                radix tree root
 *        @index:                index key
 *        @tag:                tag index
 *
 *        Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *        corresponding to @index in the radix tree.  From
 *        the root all the way down to the leaf node.
 *
 *        Returns the address of the tagged item.  Setting a tag on a not-present
 *        item is a bug.
 */
void *radix_tree_tag_set(struct radix_tree_root *root,
                        unsigned long index, unsigned int tag)
{
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;

        radix_tree_load_root(root, &node, &maxindex);
        BUG_ON(index > maxindex);

        while (radix_tree_is_internal_node(node)) {
                unsigned offset;

                parent = entry_to_node(node);
                offset = radix_tree_descend(parent, &node, index);
                BUG_ON(!node);

                if (!tag_get(parent, tag, offset))
                        tag_set(parent, tag, offset);
        }

        /* set the root's tag bit */
        if (!root_tag_get(root, tag))
                root_tag_set(root, tag);

        return node;
}
EXPORT_SYMBOL(radix_tree_tag_set);

static void node_tag_clear(struct radix_tree_root *root,
                                struct radix_tree_node *node,
                                unsigned int tag, unsigned int offset)
{
        while (node) {
                if (!tag_get(node, tag, offset))
                        return;
                tag_clear(node, tag, offset);
                if (any_tag_set(node, tag))
                        return;

                offset = node->offset;
                node = node->parent;
        }

        /* clear the root's tag bit */
        if (root_tag_get(root, tag))
                root_tag_clear(root, tag);
}

/**
 *        radix_tree_tag_clear - clear a tag on a radix tree node
 *        @root:                radix tree root
 *        @index:                index key
 *        @tag:                tag index
 *
 *        Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
 *        corresponding to @index in the radix tree.  If this causes
 *        the leaf node to have no tags set then clear the tag in the
 *        next-to-leaf node, etc.
 *
 *        Returns the address of the tagged item on success, else NULL.  ie:
 *        has the same return value and semantics as radix_tree_lookup().
 */
void *radix_tree_tag_clear(struct radix_tree_root *root,
                        unsigned long index, unsigned int tag)
{
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
        int offset = 0;

        radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return NULL;

        parent = NULL;

        while (radix_tree_is_internal_node(node)) {
                parent = entry_to_node(node);
                offset = radix_tree_descend(parent, &node, index);
        }

        if (node)
                node_tag_clear(root, parent, tag, offset);

        return node;
}
EXPORT_SYMBOL(radix_tree_tag_clear);

/**
  * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
  * @root: radix tree root
  * @iter: iterator state
  * @tag: tag to clear
  */
void radix_tree_iter_tag_clear(struct radix_tree_root *root,
                        const struct radix_tree_iter *iter, unsigned int tag)
{
        node_tag_clear(root, iter->node, tag, iter_offset(iter));
}

/**
 * radix_tree_tag_get - get a tag on a radix tree node
 * @root:                radix tree root
 * @index:                index key
 * @tag:                tag index (< RADIX_TREE_MAX_TAGS)
 *
 * Return values:
 *
 *  0: tag not present or not set
 *  1: tag set
 *
 * Note that the return value of this function may not be relied on, even if
 * the RCU lock is held, unless tag modification and node deletion are excluded
 * from concurrency.
 */
int radix_tree_tag_get(const struct radix_tree_root *root,
                        unsigned long index, unsigned int tag)
{
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;

        if (!root_tag_get(root, tag))
                return 0;

        radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return 0;

        while (radix_tree_is_internal_node(node)) {
                unsigned offset;

                parent = entry_to_node(node);
                offset = radix_tree_descend(parent, &node, index);

                if (!tag_get(parent, tag, offset))
                        return 0;
                if (node == RADIX_TREE_RETRY)
                        break;
        }

        return 1;
}
EXPORT_SYMBOL(radix_tree_tag_get);

/* Construct iter->tags bit-mask from node->tags[tag] array */
static void set_iter_tags(struct radix_tree_iter *iter,
                                struct radix_tree_node *node, unsigned offset,
                                unsigned tag)
{
        unsigned tag_long = offset / BITS_PER_LONG;
        unsigned tag_bit  = offset % BITS_PER_LONG;

        if (!node) {
                iter->tags = 1;
                return;
        }

        iter->tags = node->tags[tag][tag_long] >> tag_bit;

        /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
        if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
                /* Pick tags from next element */
                if (tag_bit)
                        iter->tags |= node->tags[tag][tag_long + 1] <<
                                                (BITS_PER_LONG - tag_bit);
                /* Clip chunk size, here only BITS_PER_LONG tags */
                iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
        }
}

void __rcu **radix_tree_iter_resume(void __rcu **slot,
                                        struct radix_tree_iter *iter)
{
        iter->index = __radix_tree_iter_add(iter, 1);
        iter->next_index = iter->index;
        iter->tags = 0;
        return NULL;
}
EXPORT_SYMBOL(radix_tree_iter_resume);

/**
 * radix_tree_next_chunk - find next chunk of slots for iteration
 *
 * @root:        radix tree root
 * @iter:        iterator state
 * @flags:        RADIX_TREE_ITER_* flags and tag index
 * Returns:        pointer to chunk first slot, or NULL if iteration is over
 */
void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags)
{
        unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
        struct radix_tree_node *node, *child;
        unsigned long index, offset, maxindex;

        if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
                return NULL;

        /*
         * Catch next_index overflow after ~0UL. iter->index never overflows
         * during iterating; it can be zero only at the beginning.
         * And we cannot overflow iter->next_index in a single step,
         * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
         *
         * This condition also used by radix_tree_next_slot() to stop
         * contiguous iterating, and forbid switching to the next chunk.
         */
        index = iter->next_index;
        if (!index && iter->index)
                return NULL;

 restart:
        radix_tree_load_root(root, &child, &maxindex);
        if (index > maxindex)
                return NULL;
        if (!child)
                return NULL;

        if (!radix_tree_is_internal_node(child)) {
                /* Single-slot tree */
                iter->index = index;
                iter->next_index = maxindex + 1;
                iter->tags = 1;
                iter->node = NULL;
                return (void __rcu **)&root->xa_head;
        }

        do {
                node = entry_to_node(child);
                offset = radix_tree_descend(node, &child, index);

                if ((flags & RADIX_TREE_ITER_TAGGED) ?
                                !tag_get(node, tag, offset) : !child) {
                        /* Hole detected */
                        if (flags & RADIX_TREE_ITER_CONTIG)
                                return NULL;

                        if (flags & RADIX_TREE_ITER_TAGGED)
                                offset = radix_tree_find_next_bit(node, tag,
                                                offset + 1);
                        else
                                while (++offset        < RADIX_TREE_MAP_SIZE) {
                                        void *slot = rcu_dereference_raw(
                                                        node->slots[offset]);
                                        if (slot)
                                                break;
                                }
                        index &= ~node_maxindex(node);
                        index += offset << node->shift;
                        /* Overflow after ~0UL */
                        if (!index)
                                return NULL;
                        if (offset == RADIX_TREE_MAP_SIZE)
                                goto restart;
                        child = rcu_dereference_raw(node->slots[offset]);
                }

                if (!child)
                        goto restart;
                if (child == RADIX_TREE_RETRY)
                        break;
        } while (node->shift && radix_tree_is_internal_node(child));

        /* Update the iterator state */
        iter->index = (index &~ node_maxindex(node)) | offset;
        iter->next_index = (index | node_maxindex(node)) + 1;
        iter->node = node;

        if (flags & RADIX_TREE_ITER_TAGGED)
                set_iter_tags(iter, node, offset, tag);

        return node->slots + offset;
}
EXPORT_SYMBOL(radix_tree_next_chunk);

/**
 *        radix_tree_gang_lookup - perform multiple lookup on a radix tree
 *        @root:                radix tree root
 *        @results:        where the results of the lookup are placed
 *        @first_index:        start the lookup from this key
 *        @max_items:        place up to this many items at *results
 *
 *        Performs an index-ascending scan of the tree for present items.  Places
 *        them at *@results and returns the number of items which were placed at
 *        *@results.
 *
 *        The implementation is naive.
 *
 *        Like radix_tree_lookup, radix_tree_gang_lookup may be called under
 *        rcu_read_lock. In this case, rather than the returned results being
 *        an atomic snapshot of the tree at a single point in time, the
 *        semantics of an RCU protected gang lookup are as though multiple
 *        radix_tree_lookups have been issued in individual locks, and results
 *        stored in 'results'.
 */
unsigned int
radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
                        unsigned long first_index, unsigned int max_items)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        unsigned int ret = 0;

        if (unlikely(!max_items))
                return 0;

        radix_tree_for_each_slot(slot, root, &iter, first_index) {
                results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
                if (radix_tree_is_internal_node(results[ret])) {
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
                if (++ret == max_items)
                        break;
        }

        return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup);

/**
 *        radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
 *                                     based on a tag
 *        @root:                radix tree root
 *        @results:        where the results of the lookup are placed
 *        @first_index:        start the lookup from this key
 *        @max_items:        place up to this many items at *results
 *        @tag:                the tag index (< RADIX_TREE_MAX_TAGS)
 *
 *        Performs an index-ascending scan of the tree for present items which
 *        have the tag indexed by @tag set.  Places the items at *@results and
 *        returns the number of items which were placed at *@results.
 */
unsigned int
radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
                unsigned long first_index, unsigned int max_items,
                unsigned int tag)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        unsigned int ret = 0;

        if (unlikely(!max_items))
                return 0;

        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
                results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
                if (radix_tree_is_internal_node(results[ret])) {
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
                if (++ret == max_items)
                        break;
        }

        return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag);

/**
 *        radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
 *                                          radix tree based on a tag
 *        @root:                radix tree root
 *        @results:        where the results of the lookup are placed
 *        @first_index:        start the lookup from this key
 *        @max_items:        place up to this many items at *results
 *        @tag:                the tag index (< RADIX_TREE_MAX_TAGS)
 *
 *        Performs an index-ascending scan of the tree for present items which
 *        have the tag indexed by @tag set.  Places the slots at *@results and
 *        returns the number of slots which were placed at *@results.
 */
unsigned int
radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
                void __rcu ***results, unsigned long first_index,
                unsigned int max_items, unsigned int tag)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        unsigned int ret = 0;

        if (unlikely(!max_items))
                return 0;

        radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
                results[ret] = slot;
                if (++ret == max_items)
                        break;
        }

        return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);

static bool __radix_tree_delete(struct radix_tree_root *root,
                                struct radix_tree_node *node, void __rcu **slot)
{
        void *old = rcu_dereference_raw(*slot);
        int values = xa_is_value(old) ? -1 : 0;
        unsigned offset = get_slot_offset(node, slot);
        int tag;

        if (is_idr(root))
                node_tag_set(root, node, IDR_FREE, offset);
        else
                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
                        node_tag_clear(root, node, tag, offset);

        replace_slot(slot, NULL, node, -1, values);
        return node && delete_node(root, node);
}

/**
 * radix_tree_iter_delete - delete the entry at this iterator position
 * @root: radix tree root
 * @iter: iterator state
 * @slot: pointer to slot
 *
 * Delete the entry at the position currently pointed to by the iterator.
 * This may result in the current node being freed; if it is, the iterator
 * is advanced so that it will not reference the freed memory.  This
 * function may be called without any locking if there are no other threads
 * which can access this tree.
 */
void radix_tree_iter_delete(struct radix_tree_root *root,
                                struct radix_tree_iter *iter, void __rcu **slot)
{
        if (__radix_tree_delete(root, iter->node, slot))
                iter->index = iter->next_index;
}
EXPORT_SYMBOL(radix_tree_iter_delete);

/**
 * radix_tree_delete_item - delete an item from a radix tree
 * @root: radix tree root
 * @index: index key
 * @item: expected item
 *
 * Remove @item at @index from the radix tree rooted at @root.
 *
 * Return: the deleted entry, or %NULL if it was not present
 * or the entry at the given @index was not @item.
 */
void *radix_tree_delete_item(struct radix_tree_root *root,
                             unsigned long index, void *item)
{
        struct radix_tree_node *node = NULL;
        void __rcu **slot = NULL;
        void *entry;

        entry = __radix_tree_lookup(root, index, &node, &slot);
        if (!slot)
                return NULL;
        if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
                                                get_slot_offset(node, slot))))
                return NULL;

        if (item && entry != item)
                return NULL;

        __radix_tree_delete(root, node, slot);

        return entry;
}
EXPORT_SYMBOL(radix_tree_delete_item);

/**
 * radix_tree_delete - delete an entry from a radix tree
 * @root: radix tree root
 * @index: index key
 *
 * Remove the entry at @index from the radix tree rooted at @root.
 *
 * Return: The deleted entry, or %NULL if it was not present.
 */
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{
        return radix_tree_delete_item(root, index, NULL);
}
EXPORT_SYMBOL(radix_tree_delete);

/**
 *        radix_tree_tagged - test whether any items in the tree are tagged
 *        @root:                radix tree root
 *        @tag:                tag to test
 */
int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
{
        return root_tag_get(root, tag);
}
EXPORT_SYMBOL(radix_tree_tagged);

/**
 * idr_preload - preload for idr_alloc()
 * @gfp_mask: allocation mask to use for preloading
 *
 * Preallocate memory to use for the next call to idr_alloc().  This function
 * returns with preemption disabled.  It will be enabled by idr_preload_end().
 */
void idr_preload(gfp_t gfp_mask)
{
        if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
                local_lock(&radix_tree_preloads.lock);
}
EXPORT_SYMBOL(idr_preload);

void __rcu **idr_get_free(struct radix_tree_root *root,
                              struct radix_tree_iter *iter, gfp_t gfp,
                              unsigned long max)
{
        struct radix_tree_node *node = NULL, *child;
        void __rcu **slot = (void __rcu **)&root->xa_head;
        unsigned long maxindex, start = iter->next_index;
        unsigned int shift, offset = 0;

 grow:
        shift = radix_tree_load_root(root, &child, &maxindex);
        if (!radix_tree_tagged(root, IDR_FREE))
                start = max(start, maxindex + 1);
        if (start > max)
                return ERR_PTR(-ENOSPC);

        if (start > maxindex) {
                int error = radix_tree_extend(root, gfp, start, shift);
                if (error < 0)
                        return ERR_PTR(error);
                shift = error;
                child = rcu_dereference_raw(root->xa_head);
        }
        if (start == 0 && shift == 0)
                shift = RADIX_TREE_MAP_SHIFT;

        while (shift) {
                shift -= RADIX_TREE_MAP_SHIFT;
                if (child == NULL) {
                        /* Have to add a child node.  */
                        child = radix_tree_node_alloc(gfp, node, root, shift,
                                                        offset, 0, 0);
                        if (!child)
                                return ERR_PTR(-ENOMEM);
                        all_tag_set(child, IDR_FREE);
                        rcu_assign_pointer(*slot, node_to_entry(child));
                        if (node)
                                node->count++;
                } else if (!radix_tree_is_internal_node(child))
                        break;

                node = entry_to_node(child);
                offset = radix_tree_descend(node, &child, start);
                if (!tag_get(node, IDR_FREE, offset)) {
                        offset = radix_tree_find_next_bit(node, IDR_FREE,
                                                        offset + 1);
                        start = next_index(start, node, offset);
                        if (start > max || start == 0)
                                return ERR_PTR(-ENOSPC);
                        while (offset == RADIX_TREE_MAP_SIZE) {
                                offset = node->offset + 1;
                                node = node->parent;
                                if (!node)
                                        goto grow;
                                shift = node->shift;
                        }
                        child = rcu_dereference_raw(node->slots[offset]);
                }
                slot = &node->slots[offset];
        }

        iter->index = start;
        if (node)
                iter->next_index = 1 + min(max, (start | node_maxindex(node)));
        else
                iter->next_index = 1;
        iter->node = node;
        set_iter_tags(iter, node, offset, IDR_FREE);

        return slot;
}

/**
 * idr_destroy - release all internal memory from an IDR
 * @idr: idr handle
 *
 * After this function is called, the IDR is empty, and may be reused or
 * the data structure containing it may be freed.
 *
 * A typical clean-up sequence for objects stored in an idr tree will use
 * idr_for_each() to free all objects, if necessary, then idr_destroy() to
 * free the memory used to keep track of those objects.
 */
void idr_destroy(struct idr *idr)
{
        struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
        if (radix_tree_is_internal_node(node))
                radix_tree_free_nodes(node);
        idr->idr_rt.xa_head = NULL;
        root_tag_set(&idr->idr_rt, IDR_FREE);
}
EXPORT_SYMBOL(idr_destroy);

static void
radix_tree_node_ctor(void *arg)
{
        struct radix_tree_node *node = arg;

        memset(node, 0, sizeof(*node));
        INIT_LIST_HEAD(&node->private_list);
}

static int radix_tree_cpu_dead(unsigned int cpu)
{
        struct radix_tree_preload *rtp;
        struct radix_tree_node *node;

        /* Free per-cpu pool of preloaded nodes */
        rtp = &per_cpu(radix_tree_preloads, cpu);
        while (rtp->nr) {
                node = rtp->nodes;
                rtp->nodes = node->parent;
                kmem_cache_free(radix_tree_node_cachep, node);
                rtp->nr--;
        }
        return 0;
}

void __init radix_tree_init(void)
{
        int ret;

        BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
        BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
        BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
        radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
                        sizeof(struct radix_tree_node), 0,
                        SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
                        radix_tree_node_ctor);
        ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
                                        NULL, radix_tree_cpu_dead);
        WARN_ON(ret < 0);
}




































  168 







  168 
  167 




  168 




  168 































































  168 


  167 


  168 




  168 


  168 
  168 



















  234 



















  273 























  240 



  242 










  240 















    3 





    3 
    3 









    3 



    3 















    3 




    3 
    3 



















    1 






    1 
    1 

    1 
















































































   20 





   19 

   20 



   20 

   20 


   20 


   20 
   20 


   20 


   20 
   20 

















    3 










    3 
    3 








    3 


   20 





   20 
   20 
   20 

   20 

























   76 
   76 










   23 
   23 

   23 

   23 


   10 

   61 

   61 




   15 


   76 



   76 

















    3 









    3 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/export.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/xarray.h>

/**
 * idr_alloc_u32() - Allocate an ID.
 * @idr: IDR handle.
 * @ptr: Pointer to be associated with the new ID.
 * @nextid: Pointer to an ID.
 * @max: The maximum ID to allocate (inclusive).
 * @gfp: Memory allocation flags.
 *
 * Allocates an unused ID in the range specified by @nextid and @max.
 * Note that @max is inclusive whereas the @end parameter to idr_alloc()
 * is exclusive.  The new ID is assigned to @nextid before the pointer
 * is inserted into the IDR, so if @nextid points into the object pointed
 * to by @ptr, a concurrent lookup will not find an uninitialised ID.
 *
 * The caller should provide their own locking to ensure that two
 * concurrent modifications to the IDR are not possible.  Read-only
 * accesses to the IDR may be done under the RCU read lock or may
 * exclude simultaneous writers.
 *
 * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed,
 * or -ENOSPC if no free IDs could be found.  If an error occurred,
 * @nextid is unchanged.
 */
int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
                        unsigned long max, gfp_t gfp)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        unsigned int base = idr->idr_base;
        unsigned int id = *nextid;

        if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
                idr->idr_rt.xa_flags |= IDR_RT_MARKER;

        id = (id < base) ? 0 : id - base;
        radix_tree_iter_init(&iter, id);
        slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
        if (IS_ERR(slot))
                return PTR_ERR(slot);

        *nextid = iter.index + base;
        /* there is a memory barrier inside radix_tree_iter_replace() */
        radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
        radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);

        return 0;
}
EXPORT_SYMBOL_GPL(idr_alloc_u32);

/**
 * idr_alloc() - Allocate an ID.
 * @idr: IDR handle.
 * @ptr: Pointer to be associated with the new ID.
 * @start: The minimum ID (inclusive).
 * @end: The maximum ID (exclusive).
 * @gfp: Memory allocation flags.
 *
 * Allocates an unused ID in the range specified by @start and @end.  If
 * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
 * callers to use @start + N as @end as long as N is within integer range.
 *
 * The caller should provide their own locking to ensure that two
 * concurrent modifications to the IDR are not possible.  Read-only
 * accesses to the IDR may be done under the RCU read lock or may
 * exclude simultaneous writers.
 *
 * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
 * or -ENOSPC if no free IDs could be found.
 */
int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
{
        u32 id = start;
        int ret;

        if (WARN_ON_ONCE(start < 0))
                return -EINVAL;

        ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp);
        if (ret)
                return ret;

        return id;
}
EXPORT_SYMBOL_GPL(idr_alloc);

/**
 * idr_alloc_cyclic() - Allocate an ID cyclically.
 * @idr: IDR handle.
 * @ptr: Pointer to be associated with the new ID.
 * @start: The minimum ID (inclusive).
 * @end: The maximum ID (exclusive).
 * @gfp: Memory allocation flags.
 *
 * Allocates an unused ID in the range specified by @start and @end.  If
 * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
 * callers to use @start + N as @end as long as N is within integer range.
 * The search for an unused ID will start at the last ID allocated and will
 * wrap around to @start if no free IDs are found before reaching @end.
 *
 * The caller should provide their own locking to ensure that two
 * concurrent modifications to the IDR are not possible.  Read-only
 * accesses to the IDR may be done under the RCU read lock or may
 * exclude simultaneous writers.
 *
 * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
 * or -ENOSPC if no free IDs could be found.
 */
int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
{
        u32 id = idr->idr_next;
        int err, max = end > 0 ? end - 1 : INT_MAX;

        if ((int)id < start)
                id = start;

        err = idr_alloc_u32(idr, ptr, &id, max, gfp);
        if ((err == -ENOSPC) && (id > start)) {
                id = start;
                err = idr_alloc_u32(idr, ptr, &id, max, gfp);
        }
        if (err)
                return err;

        idr->idr_next = id + 1;
        return id;
}
EXPORT_SYMBOL(idr_alloc_cyclic);

/**
 * idr_remove() - Remove an ID from the IDR.
 * @idr: IDR handle.
 * @id: Pointer ID.
 *
 * Removes this ID from the IDR.  If the ID was not previously in the IDR,
 * this function returns %NULL.
 *
 * Since this function modifies the IDR, the caller should provide their
 * own locking to ensure that concurrent modification of the same IDR is
 * not possible.
 *
 * Return: The pointer formerly associated with this ID.
 */
void *idr_remove(struct idr *idr, unsigned long id)
{
        return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
}
EXPORT_SYMBOL_GPL(idr_remove);

/**
 * idr_find() - Return pointer for given ID.
 * @idr: IDR handle.
 * @id: Pointer ID.
 *
 * Looks up the pointer associated with this ID.  A %NULL pointer may
 * indicate that @id is not allocated or that the %NULL pointer was
 * associated with this ID.
 *
 * This function can be called under rcu_read_lock(), given that the leaf
 * pointers lifetimes are correctly managed.
 *
 * Return: The pointer associated with this ID.
 */
void *idr_find(const struct idr *idr, unsigned long id)
{
        return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
}
EXPORT_SYMBOL_GPL(idr_find);

/**
 * idr_for_each() - Iterate through all stored pointers.
 * @idr: IDR handle.
 * @fn: Function to be called for each pointer.
 * @data: Data passed to callback function.
 *
 * The callback function will be called for each entry in @idr, passing
 * the ID, the entry and @data.
 *
 * If @fn returns anything other than %0, the iteration stops and that
 * value is returned from this function.
 *
 * idr_for_each() can be called concurrently with idr_alloc() and
 * idr_remove() if protected by RCU.  Newly added entries may not be
 * seen and deleted entries may be seen, but adding and removing entries
 * will not cause other entries to be skipped, nor spurious ones to be seen.
 */
int idr_for_each(const struct idr *idr,
                int (*fn)(int id, void *p, void *data), void *data)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        int base = idr->idr_base;

        radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
                int ret;
                unsigned long id = iter.index + base;

                if (WARN_ON_ONCE(id > INT_MAX))
                        break;
                ret = fn(id, rcu_dereference_raw(*slot), data);
                if (ret)
                        return ret;
        }

        return 0;
}
EXPORT_SYMBOL(idr_for_each);

/**
 * idr_get_next_ul() - Find next populated entry.
 * @idr: IDR handle.
 * @nextid: Pointer to an ID.
 *
 * Returns the next populated entry in the tree with an ID greater than
 * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
 * to the ID of the found value.  To use in a loop, the value pointed to by
 * nextid must be incremented by the user.
 */
void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
{
        struct radix_tree_iter iter;
        void __rcu **slot;
        void *entry = NULL;
        unsigned long base = idr->idr_base;
        unsigned long id = *nextid;

        id = (id < base) ? 0 : id - base;
        radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) {
                entry = rcu_dereference_raw(*slot);
                if (!entry)
                        continue;
                if (!xa_is_internal(entry))
                        break;
                if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
                        break;
                slot = radix_tree_iter_retry(&iter);
        }
        if (!slot)
                return NULL;

        *nextid = iter.index + base;
        return entry;
}
EXPORT_SYMBOL(idr_get_next_ul);

/**
 * idr_get_next() - Find next populated entry.
 * @idr: IDR handle.
 * @nextid: Pointer to an ID.
 *
 * Returns the next populated entry in the tree with an ID greater than
 * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
 * to the ID of the found value.  To use in a loop, the value pointed to by
 * nextid must be incremented by the user.
 */
void *idr_get_next(struct idr *idr, int *nextid)
{
        unsigned long id = *nextid;
        void *entry = idr_get_next_ul(idr, &id);

        if (WARN_ON_ONCE(id > INT_MAX))
                return NULL;
        *nextid = id;
        return entry;
}
EXPORT_SYMBOL(idr_get_next);

/**
 * idr_replace() - replace pointer for given ID.
 * @idr: IDR handle.
 * @ptr: New pointer to associate with the ID.
 * @id: ID to change.
 *
 * Replace the pointer registered with an ID and return the old value.
 * This function can be called under the RCU read lock concurrently with
 * idr_alloc() and idr_remove() (as long as the ID being removed is not
 * the one being replaced!).
 *
 * Returns: the old value on success.  %-ENOENT indicates that @id was not
 * found.  %-EINVAL indicates that @ptr was not valid.
 */
void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
{
        struct radix_tree_node *node;
        void __rcu **slot = NULL;
        void *entry;

        id -= idr->idr_base;

        entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
        if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
                return ERR_PTR(-ENOENT);

        __radix_tree_replace(&idr->idr_rt, node, slot, ptr);

        return entry;
}
EXPORT_SYMBOL(idr_replace);

/**
 * DOC: IDA description
 *
 * The IDA is an ID allocator which does not provide the ability to
 * associate an ID with a pointer.  As such, it only needs to store one
 * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
 * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
 * then initialise it using ida_init()).  To allocate a new ID, call
 * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range().
 * To free an ID, call ida_free().
 *
 * ida_destroy() can be used to dispose of an IDA without needing to
 * free the individual IDs in it.  You can use ida_is_empty() to find
 * out whether the IDA has any IDs currently allocated.
 *
 * The IDA handles its own locking.  It is safe to call any of the IDA
 * functions without synchronisation in your code.
 *
 * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
 * limitation, it should be quite straightforward to raise the maximum.
 */

/*
 * Developer's notes:
 *
 * The IDA uses the functionality provided by the XArray to store bitmaps in
 * each entry.  The XA_FREE_MARK is only cleared when all bits in the bitmap
 * have been set.
 *
 * I considered telling the XArray that each slot is an order-10 node
 * and indexing by bit number, but the XArray can't allow a single multi-index
 * entry in the head, which would significantly increase memory consumption
 * for the IDA.  So instead we divide the index by the number of bits in the
 * leaf bitmap before doing a radix tree lookup.
 *
 * As an optimisation, if there are only a few low bits set in any given
 * leaf, instead of allocating a 128-byte bitmap, we store the bits
 * as a value entry.  Value entries never have the XA_FREE_MARK cleared
 * because we can always convert them into a bitmap entry.
 *
 * It would be possible to optimise further; once we've run out of a
 * single 128-byte bitmap, we currently switch to a 576-byte node, put
 * the 128-byte bitmap in the first entry and then start allocating extra
 * 128-byte entries.  We could instead use the 512 bytes of the node's
 * data as a bitmap before moving to that scheme.  I do not believe this
 * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
 * users of the IDA and almost none of them use more than 1024 entries.
 * Those that do use more than the 8192 IDs that the 512 bytes would
 * provide.
 *
 * The IDA always uses a lock to alloc/free.  If we add a 'test_bit'
 * equivalent, it will still need locking.  Going to RCU lookup would require
 * using RCU to free bitmaps, and that's not trivial without embedding an
 * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
 * bitmap, which is excessive.
 */

/**
 * ida_alloc_range() - Allocate an unused ID.
 * @ida: IDA handle.
 * @min: Lowest ID to allocate.
 * @max: Highest ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Allocate an ID between @min and @max, inclusive.  The allocated ID will
 * not exceed %INT_MAX, even if @max is larger.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
 * or %-ENOSPC if there are no free IDs.
 */
int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
                        gfp_t gfp)
{
        XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
        unsigned bit = min % IDA_BITMAP_BITS;
        unsigned long flags;
        struct ida_bitmap *bitmap, *alloc = NULL;

        if ((int)min < 0)
                return -ENOSPC;

        if ((int)max < 0)
                max = INT_MAX;

retry:
        xas_lock_irqsave(&xas, flags);
next:
        bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
        if (xas.xa_index > min / IDA_BITMAP_BITS)
                bit = 0;
        if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
                goto nospc;

        if (xa_is_value(bitmap)) {
                unsigned long tmp = xa_to_value(bitmap);

                if (bit < BITS_PER_XA_VALUE) {
                        bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
                        if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
                                goto nospc;
                        if (bit < BITS_PER_XA_VALUE) {
                                tmp |= 1UL << bit;
                                xas_store(&xas, xa_mk_value(tmp));
                                goto out;
                        }
                }
                bitmap = alloc;
                if (!bitmap)
                        bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
                if (!bitmap)
                        goto alloc;
                bitmap->bitmap[0] = tmp;
                xas_store(&xas, bitmap);
                if (xas_error(&xas)) {
                        bitmap->bitmap[0] = 0;
                        goto out;
                }
        }

        if (bitmap) {
                bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
                if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
                        goto nospc;
                if (bit == IDA_BITMAP_BITS)
                        goto next;

                __set_bit(bit, bitmap->bitmap);
                if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
                        xas_clear_mark(&xas, XA_FREE_MARK);
        } else {
                if (bit < BITS_PER_XA_VALUE) {
                        bitmap = xa_mk_value(1UL << bit);
                } else {
                        bitmap = alloc;
                        if (!bitmap)
                                bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
                        if (!bitmap)
                                goto alloc;
                        __set_bit(bit, bitmap->bitmap);
                }
                xas_store(&xas, bitmap);
        }
out:
        xas_unlock_irqrestore(&xas, flags);
        if (xas_nomem(&xas, gfp)) {
                xas.xa_index = min / IDA_BITMAP_BITS;
                bit = min % IDA_BITMAP_BITS;
                goto retry;
        }
        if (bitmap != alloc)
                kfree(alloc);
        if (xas_error(&xas))
                return xas_error(&xas);
        return xas.xa_index * IDA_BITMAP_BITS + bit;
alloc:
        xas_unlock_irqrestore(&xas, flags);
        alloc = kzalloc(sizeof(*bitmap), gfp);
        if (!alloc)
                return -ENOMEM;
        xas_set(&xas, min / IDA_BITMAP_BITS);
        bit = min % IDA_BITMAP_BITS;
        goto retry;
nospc:
        xas_unlock_irqrestore(&xas, flags);
        kfree(alloc);
        return -ENOSPC;
}
EXPORT_SYMBOL(ida_alloc_range);

/**
 * ida_free() - Release an allocated ID.
 * @ida: IDA handle.
 * @id: Previously allocated ID.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 */
void ida_free(struct ida *ida, unsigned int id)
{
        XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
        unsigned bit = id % IDA_BITMAP_BITS;
        struct ida_bitmap *bitmap;
        unsigned long flags;

        if ((int)id < 0)
                return;

        xas_lock_irqsave(&xas, flags);
        bitmap = xas_load(&xas);

        if (xa_is_value(bitmap)) {
                unsigned long v = xa_to_value(bitmap);
                if (bit >= BITS_PER_XA_VALUE)
                        goto err;
                if (!(v & (1UL << bit)))
                        goto err;
                v &= ~(1UL << bit);
                if (!v)
                        goto delete;
                xas_store(&xas, xa_mk_value(v));
        } else {
                if (!bitmap || !test_bit(bit, bitmap->bitmap))
                        goto err;
                __clear_bit(bit, bitmap->bitmap);
                xas_set_mark(&xas, XA_FREE_MARK);
                if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
                        kfree(bitmap);
delete:
                        xas_store(&xas, NULL);
                }
        }
        xas_unlock_irqrestore(&xas, flags);
        return;
 err:
        xas_unlock_irqrestore(&xas, flags);
        WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
}
EXPORT_SYMBOL(ida_free);

/**
 * ida_destroy() - Free all IDs.
 * @ida: IDA handle.
 *
 * Calling this function frees all IDs and releases all resources used
 * by an IDA.  When this call returns, the IDA is empty and can be reused
 * or freed.  If the IDA is already empty, there is no need to call this
 * function.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 */
void ida_destroy(struct ida *ida)
{
        XA_STATE(xas, &ida->xa, 0);
        struct ida_bitmap *bitmap;
        unsigned long flags;

        xas_lock_irqsave(&xas, flags);
        xas_for_each(&xas, bitmap, ULONG_MAX) {
                if (!xa_is_value(bitmap))
                        kfree(bitmap);
                xas_store(&xas, NULL);
        }
        xas_unlock_irqrestore(&xas, flags);
}
EXPORT_SYMBOL(ida_destroy);

#ifndef __KERNEL__
extern void xa_dump_index(unsigned long index, unsigned int shift);
#define IDA_CHUNK_SHIFT                ilog2(IDA_BITMAP_BITS)

static void ida_dump_entry(void *entry, unsigned long index)
{
        unsigned long i;

        if (!entry)
                return;

        if (xa_is_node(entry)) {
                struct xa_node *node = xa_to_node(entry);
                unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
                        XA_CHUNK_SHIFT;

                xa_dump_index(index * IDA_BITMAP_BITS, shift);
                xa_dump_node(node);
                for (i = 0; i < XA_CHUNK_SIZE; i++)
                        ida_dump_entry(node->slots[i],
                                        index | (i << node->shift));
        } else if (xa_is_value(entry)) {
                xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
                pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
        } else {
                struct ida_bitmap *bitmap = entry;

                xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
                pr_cont("bitmap: %p data", bitmap);
                for (i = 0; i < IDA_BITMAP_LONGS; i++)
                        pr_cont(" %lx", bitmap->bitmap[i]);
                pr_cont("\n");
        }
}

static void ida_dump(struct ida *ida)
{
        struct xarray *xa = &ida->xa;
        pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
                                xa->xa_flags >> ROOT_TAG_SHIFT);
        ida_dump_entry(xa->xa_head, 0);
}
#endif















































































































































































































































































































































































































































































  290 





























































  240 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_JIFFIES_H
#define _LINUX_JIFFIES_H

#include <linux/cache.h>
#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/minmax.h>
#include <linux/types.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <vdso/jiffies.h>
#include <asm/param.h>                        /* for HZ */
#include <generated/timeconst.h>

/*
 * The following defines establish the engineering parameters of the PLL
 * model. The HZ variable establishes the timer interrupt frequency, 100 Hz
 * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the
 * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the
 * nearest power of two in order to avoid hardware multiply operations.
 */
#if HZ >= 12 && HZ < 24
# define SHIFT_HZ        4
#elif HZ >= 24 && HZ < 48
# define SHIFT_HZ        5
#elif HZ >= 48 && HZ < 96
# define SHIFT_HZ        6
#elif HZ >= 96 && HZ < 192
# define SHIFT_HZ        7
#elif HZ >= 192 && HZ < 384
# define SHIFT_HZ        8
#elif HZ >= 384 && HZ < 768
# define SHIFT_HZ        9
#elif HZ >= 768 && HZ < 1536
# define SHIFT_HZ        10
#elif HZ >= 1536 && HZ < 3072
# define SHIFT_HZ        11
#elif HZ >= 3072 && HZ < 6144
# define SHIFT_HZ        12
#elif HZ >= 6144 && HZ < 12288
# define SHIFT_HZ        13
#else
# error Invalid value of HZ.
#endif

/* Suppose we want to divide two numbers NOM and DEN: NOM/DEN, then we can
 * improve accuracy by shifting LSH bits, hence calculating:
 *     (NOM << LSH) / DEN
 * This however means trouble for large NOM, because (NOM << LSH) may no
 * longer fit in 32 bits. The following way of calculating this gives us
 * some slack, under the following conditions:
 *   - (NOM / DEN) fits in (32 - LSH) bits.
 *   - (NOM % DEN) fits in (32 - LSH) bits.
 */
#define SH_DIV(NOM,DEN,LSH) (   (((NOM) / (DEN)) << (LSH))              \
                             + ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))

/* LATCH is used in the interval timer and ftape setup. */
#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)        /* For divider */

extern int register_refined_jiffies(long clock_tick_rate);

/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)

/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)

#ifndef __jiffy_arch_data
#define __jiffy_arch_data
#endif

/*
 * The 64-bit value is not atomic on 32-bit systems - you MUST NOT read it
 * without sampling the sequence number in jiffies_lock.
 * get_jiffies_64() will do this for you as appropriate.
 *
 * jiffies and jiffies_64 are at the same address for little-endian systems
 * and for 64-bit big-endian systems.
 * On 32-bit big-endian systems, jiffies is the lower 32 bits of jiffies_64
 * (i.e., at address @jiffies_64 + 4).
 * See arch/ARCH/kernel/vmlinux.lds.S
 */
extern u64 __cacheline_aligned_in_smp jiffies_64;
extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;

#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void);
#else
/**
 * get_jiffies_64 - read the 64-bit non-atomic jiffies_64 value
 *
 * When BITS_PER_LONG < 64, this uses sequence number sampling using
 * jiffies_lock to protect the 64-bit read.
 *
 * Return: current 64-bit jiffies value
 */
static inline u64 get_jiffies_64(void)
{
        return (u64)jiffies;
}
#endif

/**
 * DOC: General information about time_* inlines
 *
 * These inlines deal with timer wrapping correctly. You are strongly encouraged
 * to use them:
 *
 * #. Because people otherwise forget
 * #. Because if the timer wrap changes in future you won't have to alter your
 *    driver code.
 */

/**
 * time_after - returns true if the time a is after time b.
 * @a: first comparable as unsigned long
 * @b: second comparable as unsigned long
 *
 * Do this with "<0" and ">=0" to only test the sign of the result. A
 * good compiler would generate better code (and a really good compiler
 * wouldn't care). Gcc is currently neither.
 *
 * Return: %true is time a is after time b, otherwise %false.
 */
#define time_after(a,b)                \
        (typecheck(unsigned long, a) && \
         typecheck(unsigned long, b) && \
         ((long)((b) - (a)) < 0))
/**
 * time_before - returns true if the time a is before time b.
 * @a: first comparable as unsigned long
 * @b: second comparable as unsigned long
 *
 * Return: %true is time a is before time b, otherwise %false.
 */
#define time_before(a,b)        time_after(b,a)

/**
 * time_after_eq - returns true if the time a is after or the same as time b.
 * @a: first comparable as unsigned long
 * @b: second comparable as unsigned long
 *
 * Return: %true is time a is after or the same as time b, otherwise %false.
 */
#define time_after_eq(a,b)        \
        (typecheck(unsigned long, a) && \
         typecheck(unsigned long, b) && \
         ((long)((a) - (b)) >= 0))
/**
 * time_before_eq - returns true if the time a is before or the same as time b.
 * @a: first comparable as unsigned long
 * @b: second comparable as unsigned long
 *
 * Return: %true is time a is before or the same as time b, otherwise %false.
 */
#define time_before_eq(a,b)        time_after_eq(b,a)

/**
 * time_in_range - Calculate whether a is in the range of [b, c].
 * @a: time to test
 * @b: beginning of the range
 * @c: end of the range
 *
 * Return: %true is time a is in the range [b, c], otherwise %false.
 */
#define time_in_range(a,b,c) \
        (time_after_eq(a,b) && \
         time_before_eq(a,c))

/**
 * time_in_range_open - Calculate whether a is in the range of [b, c).
 * @a: time to test
 * @b: beginning of the range
 * @c: end of the range
 *
 * Return: %true is time a is in the range [b, c), otherwise %false.
 */
#define time_in_range_open(a,b,c) \
        (time_after_eq(a,b) && \
         time_before(a,c))

/* Same as above, but does so with platform independent 64bit types.
 * These must be used when utilizing jiffies_64 (i.e. return value of
 * get_jiffies_64()). */

/**
 * time_after64 - returns true if the time a is after time b.
 * @a: first comparable as __u64
 * @b: second comparable as __u64
 *
 * This must be used when utilizing jiffies_64 (i.e. return value of
 * get_jiffies_64()).
 *
 * Return: %true is time a is after time b, otherwise %false.
 */
#define time_after64(a,b)        \
        (typecheck(__u64, a) &&        \
         typecheck(__u64, b) && \
         ((__s64)((b) - (a)) < 0))
/**
 * time_before64 - returns true if the time a is before time b.
 * @a: first comparable as __u64
 * @b: second comparable as __u64
 *
 * This must be used when utilizing jiffies_64 (i.e. return value of
 * get_jiffies_64()).
 *
 * Return: %true is time a is before time b, otherwise %false.
 */
#define time_before64(a,b)        time_after64(b,a)

/**
 * time_after_eq64 - returns true if the time a is after or the same as time b.
 * @a: first comparable as __u64
 * @b: second comparable as __u64
 *
 * This must be used when utilizing jiffies_64 (i.e. return value of
 * get_jiffies_64()).
 *
 * Return: %true is time a is after or the same as time b, otherwise %false.
 */
#define time_after_eq64(a,b)        \
        (typecheck(__u64, a) && \
         typecheck(__u64, b) && \
         ((__s64)((a) - (b)) >= 0))
/**
 * time_before_eq64 - returns true if the time a is before or the same as time b.
 * @a: first comparable as __u64
 * @b: second comparable as __u64
 *
 * This must be used when utilizing jiffies_64 (i.e. return value of
 * get_jiffies_64()).
 *
 * Return: %true is time a is before or the same as time b, otherwise %false.
 */
#define time_before_eq64(a,b)        time_after_eq64(b,a)

/**
 * time_in_range64 - Calculate whether a is in the range of [b, c].
 * @a: time to test
 * @b: beginning of the range
 * @c: end of the range
 *
 * Return: %true is time a is in the range [b, c], otherwise %false.
 */
#define time_in_range64(a, b, c) \
        (time_after_eq64(a, b) && \
         time_before_eq64(a, c))

/*
 * These eight macros compare jiffies[_64] and 'a' for convenience.
 */

/**
 * time_is_before_jiffies - return true if a is before jiffies
 * @a: time (unsigned long) to compare to jiffies
 *
 * Return: %true is time a is before jiffies, otherwise %false.
 */
#define time_is_before_jiffies(a) time_after(jiffies, a)
/**
 * time_is_before_jiffies64 - return true if a is before jiffies_64
 * @a: time (__u64) to compare to jiffies_64
 *
 * Return: %true is time a is before jiffies_64, otherwise %false.
 */
#define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a)

/**
 * time_is_after_jiffies - return true if a is after jiffies
 * @a: time (unsigned long) to compare to jiffies
 *
 * Return: %true is time a is after jiffies, otherwise %false.
 */
#define time_is_after_jiffies(a) time_before(jiffies, a)
/**
 * time_is_after_jiffies64 - return true if a is after jiffies_64
 * @a: time (__u64) to compare to jiffies_64
 *
 * Return: %true is time a is after jiffies_64, otherwise %false.
 */
#define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a)

/**
 * time_is_before_eq_jiffies - return true if a is before or equal to jiffies
 * @a: time (unsigned long) to compare to jiffies
 *
 * Return: %true is time a is before or the same as jiffies, otherwise %false.
 */
#define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a)
/**
 * time_is_before_eq_jiffies64 - return true if a is before or equal to jiffies_64
 * @a: time (__u64) to compare to jiffies_64
 *
 * Return: %true is time a is before or the same jiffies_64, otherwise %false.
 */
#define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a)

/**
 * time_is_after_eq_jiffies - return true if a is after or equal to jiffies
 * @a: time (unsigned long) to compare to jiffies
 *
 * Return: %true is time a is after or the same as jiffies, otherwise %false.
 */
#define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a)
/**
 * time_is_after_eq_jiffies64 - return true if a is after or equal to jiffies_64
 * @a: time (__u64) to compare to jiffies_64
 *
 * Return: %true is time a is after or the same as jiffies_64, otherwise %false.
 */
#define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a)

/*
 * Have the 32-bit jiffies value wrap 5 minutes after boot
 * so jiffies wrap bugs show up earlier.
 */
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))

/*
 * Change timeval to jiffies, trying to avoid the
 * most obvious overflows..
 *
 * And some not so obvious.
 *
 * Note that we don't want to return LONG_MAX, because
 * for various timeout reasons we often end up having
 * to wait "jiffies+1" in order to guarantee that we wait
 * at _least_ "jiffies" - so "jiffies+1" had better still
 * be positive.
 */
#define MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1)

extern unsigned long preset_lpj;

/*
 * We want to do realistic conversions of time so we need to use the same
 * values the update wall clock code uses as the jiffies size.  This value
 * is: TICK_NSEC (which is defined in timex.h).  This
 * is a constant and is in nanoseconds.  We will use scaled math
 * with a set of scales defined here as SEC_JIFFIE_SC,  USEC_JIFFIE_SC and
 * NSEC_JIFFIE_SC.  Note that these defines contain nothing but
 * constants and so are computed at compile time.  SHIFT_HZ (computed in
 * timex.h) adjusts the scaling for different HZ values.

 * Scaled math???  What is that?
 *
 * Scaled math is a way to do integer math on values that would,
 * otherwise, either overflow, underflow, or cause undesired div
 * instructions to appear in the execution path.  In short, we "scale"
 * up the operands so they take more bits (more precision, less
 * underflow), do the desired operation and then "scale" the result back
 * by the same amount.  If we do the scaling by shifting we avoid the
 * costly mpy and the dastardly div instructions.

 * Suppose, for example, we want to convert from seconds to jiffies
 * where jiffies is defined in nanoseconds as NSEC_PER_JIFFIE.  The
 * simple math is: jiff = (sec * NSEC_PER_SEC) / NSEC_PER_JIFFIE; We
 * observe that (NSEC_PER_SEC / NSEC_PER_JIFFIE) is a constant which we
 * might calculate at compile time, however, the result will only have
 * about 3-4 bits of precision (less for smaller values of HZ).
 *
 * So, we scale as follows:
 * jiff = (sec) * (NSEC_PER_SEC / NSEC_PER_JIFFIE);
 * jiff = ((sec) * ((NSEC_PER_SEC * SCALE)/ NSEC_PER_JIFFIE)) / SCALE;
 * Then we make SCALE a power of two so:
 * jiff = ((sec) * ((NSEC_PER_SEC << SCALE)/ NSEC_PER_JIFFIE)) >> SCALE;
 * Now we define:
 * #define SEC_CONV = ((NSEC_PER_SEC << SCALE)/ NSEC_PER_JIFFIE))
 * jiff = (sec * SEC_CONV) >> SCALE;
 *
 * Often the math we use will expand beyond 32-bits so we tell C how to
 * do this and pass the 64-bit result of the mpy through the ">> SCALE"
 * which should take the result back to 32-bits.  We want this expansion
 * to capture as much precision as possible.  At the same time we don't
 * want to overflow so we pick the SCALE to avoid this.  In this file,
 * that means using a different scale for each range of HZ values (as
 * defined in timex.h).
 *
 * For those who want to know, gcc will give a 64-bit result from a "*"
 * operator if the result is a long long AND at least one of the
 * operands is cast to long long (usually just prior to the "*" so as
 * not to confuse it into thinking it really has a 64-bit operand,
 * which, buy the way, it can do, but it takes more code and at least 2
 * mpys).

 * We also need to be aware that one second in nanoseconds is only a
 * couple of bits away from overflowing a 32-bit word, so we MUST use
 * 64-bits to get the full range time in nanoseconds.

 */

/*
 * Here are the scales we will use.  One for seconds, nanoseconds and
 * microseconds.
 *
 * Within the limits of cpp we do a rough cut at the SEC_JIFFIE_SC and
 * check if the sign bit is set.  If not, we bump the shift count by 1.
 * (Gets an extra bit of precision where we can use it.)
 * We know it is set for HZ = 1024 and HZ = 100 not for 1000.
 * Haven't tested others.

 * Limits of cpp (for #if expressions) only long (no long long), but
 * then we only need the most signicant bit.
 */

#define SEC_JIFFIE_SC (31 - SHIFT_HZ)
#if !((((NSEC_PER_SEC << 2) / TICK_NSEC) << (SEC_JIFFIE_SC - 2)) & 0x80000000)
#undef SEC_JIFFIE_SC
#define SEC_JIFFIE_SC (32 - SHIFT_HZ)
#endif
#define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29)
#define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\
                                TICK_NSEC -1) / (u64)TICK_NSEC))

#define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\
                                        TICK_NSEC -1) / (u64)TICK_NSEC))
/*
 * The maximum jiffie value is (MAX_INT >> 1).  Here we translate that
 * into seconds.  The 64-bit case will overflow if we are not careful,
 * so use the messy SH_DIV macro to do it.  Still all constants.
 */
#if BITS_PER_LONG < 64
# define MAX_SEC_IN_JIFFIES \
        (long)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC) / NSEC_PER_SEC)
#else        /* take care of overflow on 64-bit machines */
# define MAX_SEC_IN_JIFFIES \
        (SH_DIV((MAX_JIFFY_OFFSET >> SEC_JIFFIE_SC) * TICK_NSEC, NSEC_PER_SEC, 1) - 1)

#endif

/*
 * Convert various time units to each other:
 */
extern unsigned int jiffies_to_msecs(const unsigned long j);
extern unsigned int jiffies_to_usecs(const unsigned long j);

/**
 * jiffies_to_nsecs - Convert jiffies to nanoseconds
 * @j: jiffies value
 *
 * Return: nanoseconds value
 */
static inline u64 jiffies_to_nsecs(const unsigned long j)
{
        return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
}

extern u64 jiffies64_to_nsecs(u64 j);
extern u64 jiffies64_to_msecs(u64 j);

extern unsigned long __msecs_to_jiffies(const unsigned int m);
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
/*
 * HZ is equal to or smaller than 1000, and 1000 is a nice round
 * multiple of HZ, divide with the factor between them, but round
 * upwards:
 */
static inline unsigned long _msecs_to_jiffies(const unsigned int m)
{
        return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
}
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
/*
 * HZ is larger than 1000, and HZ is a nice round multiple of 1000 -
 * simply multiply with the factor between them.
 *
 * But first make sure the multiplication result cannot overflow:
 */
static inline unsigned long _msecs_to_jiffies(const unsigned int m)
{
        if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
                return MAX_JIFFY_OFFSET;
        return m * (HZ / MSEC_PER_SEC);
}
#else
/*
 * Generic case - multiply, round and divide. But first check that if
 * we are doing a net multiplication, that we wouldn't overflow:
 */
static inline unsigned long _msecs_to_jiffies(const unsigned int m)
{
        if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
                return MAX_JIFFY_OFFSET;

        return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32;
}
#endif
/**
 * msecs_to_jiffies: - convert milliseconds to jiffies
 * @m:        time in milliseconds
 *
 * conversion is done as follows:
 *
 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
 *
 * - 'too large' values [that would result in larger than
 *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
 *
 * - all other values are converted to jiffies by either multiplying
 *   the input value by a factor or dividing it with a factor and
 *   handling any 32-bit overflows.
 *   for the details see __msecs_to_jiffies()
 *
 * msecs_to_jiffies() checks for the passed in value being a constant
 * via __builtin_constant_p() allowing gcc to eliminate most of the
 * code. __msecs_to_jiffies() is called if the value passed does not
 * allow constant folding and the actual conversion must be done at
 * runtime.
 * The HZ range specific helpers _msecs_to_jiffies() are called both
 * directly here and from __msecs_to_jiffies() in the case where
 * constant folding is not possible.
 *
 * Return: jiffies value
 */
static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
{
        if (__builtin_constant_p(m)) {
                if ((int)m < 0)
                        return MAX_JIFFY_OFFSET;
                return _msecs_to_jiffies(m);
        } else {
                return __msecs_to_jiffies(m);
        }
}

extern unsigned long __usecs_to_jiffies(const unsigned int u);
#if !(USEC_PER_SEC % HZ)
static inline unsigned long _usecs_to_jiffies(const unsigned int u)
{
        return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
}
#else
static inline unsigned long _usecs_to_jiffies(const unsigned int u)
{
        return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
                >> USEC_TO_HZ_SHR32;
}
#endif

/**
 * usecs_to_jiffies: - convert microseconds to jiffies
 * @u:        time in microseconds
 *
 * conversion is done as follows:
 *
 * - 'too large' values [that would result in larger than
 *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
 *
 * - all other values are converted to jiffies by either multiplying
 *   the input value by a factor or dividing it with a factor and
 *   handling any 32-bit overflows as for msecs_to_jiffies.
 *
 * usecs_to_jiffies() checks for the passed in value being a constant
 * via __builtin_constant_p() allowing gcc to eliminate most of the
 * code. __usecs_to_jiffies() is called if the value passed does not
 * allow constant folding and the actual conversion must be done at
 * runtime.
 * The HZ range specific helpers _usecs_to_jiffies() are called both
 * directly here and from __msecs_to_jiffies() in the case where
 * constant folding is not possible.
 *
 * Return: jiffies value
 */
static __always_inline unsigned long usecs_to_jiffies(const unsigned int u)
{
        if (__builtin_constant_p(u)) {
                if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
                        return MAX_JIFFY_OFFSET;
                return _usecs_to_jiffies(u);
        } else {
                return __usecs_to_jiffies(u);
        }
}

extern unsigned long timespec64_to_jiffies(const struct timespec64 *value);
extern void jiffies_to_timespec64(const unsigned long jiffies,
                                  struct timespec64 *value);
extern clock_t jiffies_to_clock_t(unsigned long x);

static inline clock_t jiffies_delta_to_clock_t(long delta)
{
        return jiffies_to_clock_t(max(0L, delta));
}

static inline unsigned int jiffies_delta_to_msecs(long delta)
{
        return jiffies_to_msecs(max(0L, delta));
}

extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
extern u64 nsecs_to_jiffies64(u64 n);
extern unsigned long nsecs_to_jiffies(u64 n);

#define TIMESTAMP_SIZE        30

#endif























    7 
    2 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/* SPDX-License-Identifier: GPL-2.0-only */

#ifndef LINUX_RESUME_USER_MODE_H
#define LINUX_RESUME_USER_MODE_H

#include <linux/sched.h>
#include <linux/task_work.h>
#include <linux/memcontrol.h>
#include <linux/rseq.h>
#include <linux/blk-cgroup.h>

/**
 * set_notify_resume - cause resume_user_mode_work() to be called
 * @task:                task that will call resume_user_mode_work()
 *
 * Calling this arranges that @task will call resume_user_mode_work()
 * before returning to user mode.  If it's already running in user mode,
 * it will enter the kernel and call resume_user_mode_work() soon.
 * If it's blocked, it will not be woken.
 */
static inline void set_notify_resume(struct task_struct *task)
{
        if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
                kick_process(task);
}


/**
 * resume_user_mode_work - Perform work before returning to user mode
 * @regs:                user-mode registers of @current task
 *
 * This is called when %TIF_NOTIFY_RESUME has been set.  Now we are
 * about to return to user mode, and the user state in @regs can be
 * inspected or adjusted.  The caller in arch code has cleared
 * %TIF_NOTIFY_RESUME before the call.  If the flag gets set again
 * asynchronously, this will be called again before we return to
 * user mode.
 *
 * Called without locks.
 */
static inline void resume_user_mode_work(struct pt_regs *regs)
{
        clear_thread_flag(TIF_NOTIFY_RESUME);
        /*
         * This barrier pairs with task_work_add()->set_notify_resume() after
         * hlist_add_head(task->task_works);
         */
        smp_mb__after_atomic();
        if (unlikely(task_work_pending(current)))
                task_work_run();

#ifdef CONFIG_KEYS_REQUEST_CACHE
        if (unlikely(current->cached_requested_key)) {
                key_put(current->cached_requested_key);
                current->cached_requested_key = NULL;
        }
#endif

        mem_cgroup_handle_over_high(GFP_KERNEL);
        blkcg_maybe_throttle_current();

        rseq_handle_notify_resume(NULL, regs);
}

#endif /* LINUX_RESUME_USER_MODE_H */
















































































































































































































































































































































































































   14 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   11 
   11 




   11 
   11 
   11 






   11 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                ROUTE - implementation of the IP router.
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Alan Cox, <gw4pts@gw4pts.ampr.org>
 *                Linus Torvalds, <Linus.Torvalds@helsinki.fi>
 *                Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Fixes:
 *                Alan Cox        :        Verify area fixes.
 *                Alan Cox        :        cli() protects routing changes
 *                Rui Oliveira        :        ICMP routing table updates
 *                (rco@di.uminho.pt)        Routing table insertion and update
 *                Linus Torvalds        :        Rewrote bits to be sensible
 *                Alan Cox        :        Added BSD route gw semantics
 *                Alan Cox        :        Super /proc >4K
 *                Alan Cox        :        MTU in route table
 *                Alan Cox        :        MSS actually. Also added the window
 *                                        clamper.
 *                Sam Lantinga        :        Fixed route matching in rt_del()
 *                Alan Cox        :        Routing cache support.
 *                Alan Cox        :        Removed compatibility cruft.
 *                Alan Cox        :        RTF_REJECT support.
 *                Alan Cox        :        TCP irtt support.
 *                Jonathan Naylor        :        Added Metric support.
 *        Miquel van Smoorenburg        :        BSD API fixes.
 *        Miquel van Smoorenburg        :        Metrics.
 *                Alan Cox        :        Use __u32 properly
 *                Alan Cox        :        Aligned routing errors more closely with BSD
 *                                        our system is still very different.
 *                Alan Cox        :        Faster /proc handling
 *        Alexey Kuznetsov        :        Massive rework to support tree based routing,
 *                                        routing caches and better behaviour.
 *
 *                Olaf Erb        :        irtt wasn't being copied right.
 *                Bjorn Ekwall        :        Kerneld route support.
 *                Alan Cox        :        Multicast fixed (I hope)
 *                Pavel Krauz        :        Limited broadcast fixed
 *                Mike McLagan        :        Routing by source
 *        Alexey Kuznetsov        :        End of old history. Split to fib.c and
 *                                        route.c and rewritten from scratch.
 *                Andi Kleen        :        Load-limit warning messages.
 *        Vitaly E. Lavrov        :        Transparent proxy revived after year coma.
 *        Vitaly E. Lavrov        :        Race condition in ip_route_input_slow.
 *        Tobias Ringstrom        :        Uninitialized res.type in ip_route_output_slow.
 *        Vladimir V. Ivanov        :        IP rule info (flowid) is really useful.
 *                Marc Boucher        :        routing by fwmark
 *        Robert Olsson                :        Added rt_cache statistics
 *        Arnaldo C. Melo                :        Convert proc stuff to seq_file
 *        Eric Dumazet                :        hashed spinlocks and rt_check_expire() fixes.
 *        Ilia Sotnikov                :        Ignore TOS on PMTUD and Redirect
 *        Ilia Sotnikov                :        Removed TOS from hash calculations
 */

#define pr_fmt(fmt) "IPv4: " fmt

#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/socket.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/pkt_sched.h>
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>

#include "fib_lookup.h"

#define RT_FL_TOS(oldflp4) \
        ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))

#define RT_GC_TIMEOUT (300*HZ)

#define DEFAULT_MIN_PMTU (512 + 20 + 20)
#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
#define DEFAULT_MIN_ADVMSS 256
static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly        = 9;
static int ip_rt_redirect_load __read_mostly        = HZ / 50;
static int ip_rt_redirect_silence __read_mostly        = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly        = HZ;
static int ip_rt_error_burst __read_mostly        = 5 * HZ;

static int ip_rt_gc_timeout __read_mostly        = RT_GC_TIMEOUT;

/*
 *        Interface to generic destination cache.
 */

INDIRECT_CALLABLE_SCOPE
struct dst_entry        *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int         ipv4_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int                ipv4_mtu(const struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void                 ipv4_link_failure(struct sk_buff *skb);
static void                 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                                           struct sk_buff *skb, u32 mtu,
                                           bool confirm_neigh);
static void                 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
                                        struct sk_buff *skb);
static void                ipv4_dst_destroy(struct dst_entry *dst);

static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
{
        WARN_ON(1);
        return NULL;
}

static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
                                           struct sk_buff *skb,
                                           const void *daddr);
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);

static struct dst_ops ipv4_dst_ops = {
        .family =                AF_INET,
        .check =                ipv4_dst_check,
        .default_advmss =        ipv4_default_advmss,
        .mtu =                        ipv4_mtu,
        .cow_metrics =                ipv4_cow_metrics,
        .destroy =                ipv4_dst_destroy,
        .negative_advice =        ipv4_negative_advice,
        .link_failure =                ipv4_link_failure,
        .update_pmtu =                ip_rt_update_pmtu,
        .redirect =                ip_do_redirect,
        .local_out =                __ip_local_out,
        .neigh_lookup =                ipv4_neigh_lookup,
        .confirm_neigh =        ipv4_confirm_neigh,
};

#define ECN_OR_COST(class)        TC_PRIO_##class

const __u8 ip_tos2prio[16] = {
        TC_PRIO_BESTEFFORT,
        ECN_OR_COST(BESTEFFORT),
        TC_PRIO_BESTEFFORT,
        ECN_OR_COST(BESTEFFORT),
        TC_PRIO_BULK,
        ECN_OR_COST(BULK),
        TC_PRIO_BULK,
        ECN_OR_COST(BULK),
        TC_PRIO_INTERACTIVE,
        ECN_OR_COST(INTERACTIVE),
        TC_PRIO_INTERACTIVE,
        ECN_OR_COST(INTERACTIVE),
        TC_PRIO_INTERACTIVE_BULK,
        ECN_OR_COST(INTERACTIVE_BULK),
        TC_PRIO_INTERACTIVE_BULK,
        ECN_OR_COST(INTERACTIVE_BULK)
};
EXPORT_SYMBOL(ip_tos2prio);

static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)

#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
        if (*pos)
                return NULL;
        return SEQ_START_TOKEN;
}

static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        ++*pos;
        return NULL;
}

static void rt_cache_seq_stop(struct seq_file *seq, void *v)
{
}

static int rt_cache_seq_show(struct seq_file *seq, void *v)
{
        if (v == SEQ_START_TOKEN)
                seq_printf(seq, "%-127s\n",
                           "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
                           "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
                           "HHUptod\tSpecDst");
        return 0;
}

static const struct seq_operations rt_cache_seq_ops = {
        .start  = rt_cache_seq_start,
        .next   = rt_cache_seq_next,
        .stop   = rt_cache_seq_stop,
        .show   = rt_cache_seq_show,
};

static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
        int cpu;

        if (*pos == 0)
                return SEQ_START_TOKEN;

        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
                return &per_cpu(rt_cache_stat, cpu);
        }
        return NULL;
}

static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        int cpu;

        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
                return &per_cpu(rt_cache_stat, cpu);
        }
        (*pos)++;
        return NULL;

}

static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
{

}

static int rt_cpu_seq_show(struct seq_file *seq, void *v)
{
        struct rt_cache_stat *st = v;

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq, "entries  in_hit   in_slow_tot in_slow_mc in_no_route in_brd   in_martian_dst in_martian_src out_hit  out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
                return 0;
        }

        seq_printf(seq, "%08x %08x %08x    %08x   %08x    %08x %08x       "
                        "%08x       %08x %08x     %08x    %08x %08x   "
                        "%08x     %08x        %08x        %08x\n",
                   dst_entries_get_slow(&ipv4_dst_ops),
                   0, /* st->in_hit */
                   st->in_slow_tot,
                   st->in_slow_mc,
                   st->in_no_route,
                   st->in_brd,
                   st->in_martian_dst,
                   st->in_martian_src,

                   0, /* st->out_hit */
                   st->out_slow_tot,
                   st->out_slow_mc,

                   0, /* st->gc_total */
                   0, /* st->gc_ignored */
                   0, /* st->gc_goal_miss */
                   0, /* st->gc_dst_overflow */
                   0, /* st->in_hlist_search */
                   0  /* st->out_hlist_search */
                );
        return 0;
}

static const struct seq_operations rt_cpu_seq_ops = {
        .start  = rt_cpu_seq_start,
        .next   = rt_cpu_seq_next,
        .stop   = rt_cpu_seq_stop,
        .show   = rt_cpu_seq_show,
};

#ifdef CONFIG_IP_ROUTE_CLASSID
static int rt_acct_proc_show(struct seq_file *m, void *v)
{
        struct ip_rt_acct *dst, *src;
        unsigned int i, j;

        dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
        if (!dst)
                return -ENOMEM;

        for_each_possible_cpu(i) {
                src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
                for (j = 0; j < 256; j++) {
                        dst[j].o_bytes   += src[j].o_bytes;
                        dst[j].o_packets += src[j].o_packets;
                        dst[j].i_bytes   += src[j].i_bytes;
                        dst[j].i_packets += src[j].i_packets;
                }
        }

        seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
        kfree(dst);
        return 0;
}
#endif

static int __net_init ip_rt_do_proc_init(struct net *net)
{
        struct proc_dir_entry *pde;

        pde = proc_create_seq("rt_cache", 0444, net->proc_net,
                              &rt_cache_seq_ops);
        if (!pde)
                goto err1;

        pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat,
                              &rt_cpu_seq_ops);
        if (!pde)
                goto err2;

#ifdef CONFIG_IP_ROUTE_CLASSID
        pde = proc_create_single("rt_acct", 0, net->proc_net,
                        rt_acct_proc_show);
        if (!pde)
                goto err3;
#endif
        return 0;

#ifdef CONFIG_IP_ROUTE_CLASSID
err3:
        remove_proc_entry("rt_cache", net->proc_net_stat);
#endif
err2:
        remove_proc_entry("rt_cache", net->proc_net);
err1:
        return -ENOMEM;
}

static void __net_exit ip_rt_do_proc_exit(struct net *net)
{
        remove_proc_entry("rt_cache", net->proc_net_stat);
        remove_proc_entry("rt_cache", net->proc_net);
#ifdef CONFIG_IP_ROUTE_CLASSID
        remove_proc_entry("rt_acct", net->proc_net);
#endif
}

static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
        .init = ip_rt_do_proc_init,
        .exit = ip_rt_do_proc_exit,
};

static int __init ip_rt_proc_init(void)
{
        return register_pernet_subsys(&ip_rt_proc_ops);
}

#else
static inline int ip_rt_proc_init(void)
{
        return 0;
}
#endif /* CONFIG_PROC_FS */

static inline bool rt_is_expired(const struct rtable *rth)
{
        return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
}

void rt_cache_flush(struct net *net)
{
        rt_genid_bump_ipv4(net);
}

static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
                                           struct sk_buff *skb,
                                           const void *daddr)
{
        const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net_device *dev = dst->dev;
        struct neighbour *n;

        rcu_read_lock();

        if (likely(rt->rt_gw_family == AF_INET)) {
                n = ip_neigh_gw4(dev, rt->rt_gw4);
        } else if (rt->rt_gw_family == AF_INET6) {
                n = ip_neigh_gw6(dev, &rt->rt_gw6);
        } else {
                __be32 pkey;

                pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
                n = ip_neigh_gw4(dev, pkey);
        }

        if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
                n = NULL;

        rcu_read_unlock();

        return n;
}

static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
        const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net_device *dev = dst->dev;
        const __be32 *pkey = daddr;

        if (rt->rt_gw_family == AF_INET) {
                pkey = (const __be32 *)&rt->rt_gw4;
        } else if (rt->rt_gw_family == AF_INET6) {
                return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
        } else if (!daddr ||
                 (rt->rt_flags &
                  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
                return;
        }
        __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
}

/* Hash tables of size 2048..262144 depending on RAM size.
 * Each bucket uses 8 bytes.
 */
static u32 ip_idents_mask __read_mostly;
static atomic_t *ip_idents __read_mostly;
static u32 *ip_tstamps __read_mostly;

/* In order to protect privacy, we add a perturbation to identifiers
 * if one generator is seldom used. This makes hard for an attacker
 * to infer how many packets were sent between two points in time.
 */
static u32 ip_idents_reserve(u32 hash, int segs)
{
        u32 bucket, old, now = (u32)jiffies;
        atomic_t *p_id;
        u32 *p_tstamp;
        u32 delta = 0;

        bucket = hash & ip_idents_mask;
        p_tstamp = ip_tstamps + bucket;
        p_id = ip_idents + bucket;
        old = READ_ONCE(*p_tstamp);

        if (old != now && cmpxchg(p_tstamp, old, now) == old)
                delta = get_random_u32_below(now - old);

        /* If UBSAN reports an error there, please make sure your compiler
         * supports -fno-strict-overflow before reporting it that was a bug
         * in UBSAN, and it has been fixed in GCC-8.
         */
        return atomic_add_return(segs + delta, p_id) - segs;
}

void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
{
        u32 hash, id;

        /* Note the following code is not safe, but this is okay. */
        if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
                get_random_bytes(&net->ipv4.ip_id_key,
                                 sizeof(net->ipv4.ip_id_key));

        hash = siphash_3u32((__force u32)iph->daddr,
                            (__force u32)iph->saddr,
                            iph->protocol,
                            &net->ipv4.ip_id_key);
        id = ip_idents_reserve(hash, segs);
        iph->id = htons(id);
}
EXPORT_SYMBOL(__ip_select_ident);

static void ip_rt_fix_tos(struct flowi4 *fl4)
{
        __u8 tos = RT_FL_TOS(fl4);

        fl4->flowi4_tos = tos & IPTOS_RT_MASK;
        if (tos & RTO_ONLINK)
                fl4->flowi4_scope = RT_SCOPE_LINK;
}

static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
                             const struct sock *sk, const struct iphdr *iph,
                             int oif, __u8 tos, u8 prot, u32 mark,
                             int flow_flags)
{
        __u8 scope = RT_SCOPE_UNIVERSE;

        if (sk) {
                oif = sk->sk_bound_dev_if;
                mark = READ_ONCE(sk->sk_mark);
                tos = ip_sock_rt_tos(sk);
                scope = ip_sock_rt_scope(sk);
                prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW :
                                                    sk->sk_protocol;
        }

        flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
                           prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
                           sock_net_uid(net, sk));
}

static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
                               const struct sock *sk)
{
        const struct net *net = dev_net(skb->dev);
        const struct iphdr *iph = ip_hdr(skb);
        int oif = skb->dev->ifindex;
        u8 prot = iph->protocol;
        u32 mark = skb->mark;
        __u8 tos = iph->tos;

        __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}

static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
{
        const struct inet_sock *inet = inet_sk(sk);
        const struct ip_options_rcu *inet_opt;
        __be32 daddr = inet->inet_daddr;

        rcu_read_lock();
        inet_opt = rcu_dereference(inet->inet_opt);
        if (inet_opt && inet_opt->opt.srr)
                daddr = inet_opt->opt.faddr;
        flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
                           ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
                           ip_sock_rt_scope(sk),
                           inet_test_bit(HDRINCL, sk) ?
                                IPPROTO_RAW : sk->sk_protocol,
                           inet_sk_flowi_flags(sk),
                           daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
        rcu_read_unlock();
}

static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
                                 const struct sk_buff *skb)
{
        if (skb)
                build_skb_flow_key(fl4, skb, sk);
        else
                build_sk_flow_key(fl4, sk);
}

static DEFINE_SPINLOCK(fnhe_lock);

static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
{
        struct rtable *rt;

        rt = rcu_dereference(fnhe->fnhe_rth_input);
        if (rt) {
                RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
                dst_dev_put(&rt->dst);
                dst_release(&rt->dst);
        }
        rt = rcu_dereference(fnhe->fnhe_rth_output);
        if (rt) {
                RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
                dst_dev_put(&rt->dst);
                dst_release(&rt->dst);
        }
}

static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{
        struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
        struct fib_nh_exception *fnhe, *oldest = NULL;

        for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
                fnhe = rcu_dereference_protected(*fnhe_p,
                                                 lockdep_is_held(&fnhe_lock));
                if (!fnhe)
                        break;
                if (!oldest ||
                    time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
                        oldest = fnhe;
                        oldest_p = fnhe_p;
                }
        }
        fnhe_flush_routes(oldest);
        *oldest_p = oldest->fnhe_next;
        kfree_rcu(oldest, rcu);
}

static u32 fnhe_hashfun(__be32 daddr)
{
        static siphash_aligned_key_t fnhe_hash_key;
        u64 hval;

        net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key));
        hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key);
        return hash_64(hval, FNHE_HASH_SHIFT);
}

static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
{
        rt->rt_pmtu = fnhe->fnhe_pmtu;
        rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
        rt->dst.expires = fnhe->fnhe_expires;

        if (fnhe->fnhe_gw) {
                rt->rt_flags |= RTCF_REDIRECTED;
                rt->rt_uses_gateway = 1;
                rt->rt_gw_family = AF_INET;
                rt->rt_gw4 = fnhe->fnhe_gw;
        }
}

static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
                                  __be32 gw, u32 pmtu, bool lock,
                                  unsigned long expires)
{
        struct fnhe_hash_bucket *hash;
        struct fib_nh_exception *fnhe;
        struct rtable *rt;
        u32 genid, hval;
        unsigned int i;
        int depth;

        genid = fnhe_genid(dev_net(nhc->nhc_dev));
        hval = fnhe_hashfun(daddr);

        spin_lock_bh(&fnhe_lock);

        hash = rcu_dereference(nhc->nhc_exceptions);
        if (!hash) {
                hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
                if (!hash)
                        goto out_unlock;
                rcu_assign_pointer(nhc->nhc_exceptions, hash);
        }

        hash += hval;

        depth = 0;
        for (fnhe = rcu_dereference(hash->chain); fnhe;
             fnhe = rcu_dereference(fnhe->fnhe_next)) {
                if (fnhe->fnhe_daddr == daddr)
                        break;
                depth++;
        }

        if (fnhe) {
                if (fnhe->fnhe_genid != genid)
                        fnhe->fnhe_genid = genid;
                if (gw)
                        fnhe->fnhe_gw = gw;
                if (pmtu) {
                        fnhe->fnhe_pmtu = pmtu;
                        fnhe->fnhe_mtu_locked = lock;
                }
                fnhe->fnhe_expires = max(1UL, expires);
                /* Update all cached dsts too */
                rt = rcu_dereference(fnhe->fnhe_rth_input);
                if (rt)
                        fill_route_from_fnhe(rt, fnhe);
                rt = rcu_dereference(fnhe->fnhe_rth_output);
                if (rt)
                        fill_route_from_fnhe(rt, fnhe);
        } else {
                /* Randomize max depth to avoid some side channels attacks. */
                int max_depth = FNHE_RECLAIM_DEPTH +
                                get_random_u32_below(FNHE_RECLAIM_DEPTH);

                while (depth > max_depth) {
                        fnhe_remove_oldest(hash);
                        depth--;
                }

                fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
                if (!fnhe)
                        goto out_unlock;

                fnhe->fnhe_next = hash->chain;

                fnhe->fnhe_genid = genid;
                fnhe->fnhe_daddr = daddr;
                fnhe->fnhe_gw = gw;
                fnhe->fnhe_pmtu = pmtu;
                fnhe->fnhe_mtu_locked = lock;
                fnhe->fnhe_expires = max(1UL, expires);

                rcu_assign_pointer(hash->chain, fnhe);

                /* Exception created; mark the cached routes for the nexthop
                 * stale, so anyone caching it rechecks if this exception
                 * applies to them.
                 */
                rt = rcu_dereference(nhc->nhc_rth_input);
                if (rt)
                        rt->dst.obsolete = DST_OBSOLETE_KILL;

                for_each_possible_cpu(i) {
                        struct rtable __rcu **prt;

                        prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
                        rt = rcu_dereference(*prt);
                        if (rt)
                                rt->dst.obsolete = DST_OBSOLETE_KILL;
                }
        }

        fnhe->fnhe_stamp = jiffies;

out_unlock:
        spin_unlock_bh(&fnhe_lock);
}

static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
                             bool kill_route)
{
        __be32 new_gw = icmp_hdr(skb)->un.gateway;
        __be32 old_gw = ip_hdr(skb)->saddr;
        struct net_device *dev = skb->dev;
        struct in_device *in_dev;
        struct fib_result res;
        struct neighbour *n;
        struct net *net;

        switch (icmp_hdr(skb)->code & 7) {
        case ICMP_REDIR_NET:
        case ICMP_REDIR_NETTOS:
        case ICMP_REDIR_HOST:
        case ICMP_REDIR_HOSTTOS:
                break;

        default:
                return;
        }

        if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
                return;

        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev)
                return;

        net = dev_net(dev);
        if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
            ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
            ipv4_is_zeronet(new_gw))
                goto reject_redirect;

        if (!IN_DEV_SHARED_MEDIA(in_dev)) {
                if (!inet_addr_onlink(in_dev, new_gw, old_gw))
                        goto reject_redirect;
                if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
                        goto reject_redirect;
        } else {
                if (inet_addr_type(net, new_gw) != RTN_UNICAST)
                        goto reject_redirect;
        }

        n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw);
        if (!n)
                n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
        if (!IS_ERR(n)) {
                if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
                        neigh_event_send(n, NULL);
                } else {
                        if (fib_lookup(net, fl4, &res, 0) == 0) {
                                struct fib_nh_common *nhc;

                                fib_select_path(net, &res, fl4, skb);
                                nhc = FIB_RES_NHC(res);
                                update_or_create_fnhe(nhc, fl4->daddr, new_gw,
                                                0, false,
                                                jiffies + ip_rt_gc_timeout);
                        }
                        if (kill_route)
                                rt->dst.obsolete = DST_OBSOLETE_KILL;
                        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
                }
                neigh_release(n);
        }
        return;

reject_redirect:
#ifdef CONFIG_IP_ROUTE_VERBOSE
        if (IN_DEV_LOG_MARTIANS(in_dev)) {
                const struct iphdr *iph = (const struct iphdr *) skb->data;
                __be32 daddr = iph->daddr;
                __be32 saddr = iph->saddr;

                net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
                                     "  Advised path = %pI4 -> %pI4\n",
                                     &old_gw, dev->name, &new_gw,
                                     &saddr, &daddr);
        }
#endif
        ;
}

static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
        struct rtable *rt;
        struct flowi4 fl4;
        const struct iphdr *iph = (const struct iphdr *) skb->data;
        struct net *net = dev_net(skb->dev);
        int oif = skb->dev->ifindex;
        u8 prot = iph->protocol;
        u32 mark = skb->mark;
        __u8 tos = iph->tos;

        rt = (struct rtable *) dst;

        __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
        __ip_do_redirect(rt, skb, &fl4, true);
}

static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
        struct rtable *rt = (struct rtable *)dst;
        struct dst_entry *ret = dst;

        if (rt) {
                if (dst->obsolete > 0) {
                        ip_rt_put(rt);
                        ret = NULL;
                } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
                           rt->dst.expires) {
                        ip_rt_put(rt);
                        ret = NULL;
                }
        }
        return ret;
}

/*
 * Algorithm:
 *        1. The first ip_rt_redirect_number redirects are sent
 *           with exponential backoff, then we stop sending them at all,
 *           assuming that the host ignores our redirects.
 *        2. If we did not see packets requiring redirects
 *           during ip_rt_redirect_silence, we assume that the host
 *           forgot redirected route and start to send redirects again.
 *
 * This algorithm is much cheaper and more intelligent than dumb load limiting
 * in icmp.c.
 *
 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
 * and "frag. need" (breaks PMTU discovery) in icmp.c.
 */

void ip_rt_send_redirect(struct sk_buff *skb)
{
        struct rtable *rt = skb_rtable(skb);
        struct in_device *in_dev;
        struct inet_peer *peer;
        struct net *net;
        int log_martians;
        int vif;

        rcu_read_lock();
        in_dev = __in_dev_get_rcu(rt->dst.dev);
        if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
                rcu_read_unlock();
                return;
        }
        log_martians = IN_DEV_LOG_MARTIANS(in_dev);
        vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
        rcu_read_unlock();

        net = dev_net(rt->dst.dev);
        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
        if (!peer) {
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
                          rt_nexthop(rt, ip_hdr(skb)->daddr));
                return;
        }

        /* No redirected packets during ip_rt_redirect_silence;
         * reset the algorithm.
         */
        if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
                peer->rate_tokens = 0;
                peer->n_redirects = 0;
        }

        /* Too many ignored redirects; do not send anything
         * set dst.rate_last to the last seen redirected packet.
         */
        if (peer->n_redirects >= ip_rt_redirect_number) {
                peer->rate_last = jiffies;
                goto out_put_peer;
        }

        /* Check for load limit; set rate_last to the latest sent
         * redirect.
         */
        if (peer->n_redirects == 0 ||
            time_after(jiffies,
                       (peer->rate_last +
                        (ip_rt_redirect_load << peer->n_redirects)))) {
                __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);

                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
                peer->rate_last = jiffies;
                ++peer->n_redirects;
                if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
                    peer->n_redirects == ip_rt_redirect_number)
                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
                                             &ip_hdr(skb)->saddr, inet_iif(skb),
                                             &ip_hdr(skb)->daddr, &gw);
        }
out_put_peer:
        inet_putpeer(peer);
}

static int ip_error(struct sk_buff *skb)
{
        struct rtable *rt = skb_rtable(skb);
        struct net_device *dev = skb->dev;
        struct in_device *in_dev;
        struct inet_peer *peer;
        unsigned long now;
        struct net *net;
        SKB_DR(reason);
        bool send;
        int code;

        if (netif_is_l3_master(skb->dev)) {
                dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
                if (!dev)
                        goto out;
        }

        in_dev = __in_dev_get_rcu(dev);

        /* IP on this device is disabled. */
        if (!in_dev)
                goto out;

        net = dev_net(rt->dst.dev);
        if (!IN_DEV_FORWARD(in_dev)) {
                switch (rt->dst.error) {
                case EHOSTUNREACH:
                        SKB_DR_SET(reason, IP_INADDRERRORS);
                        __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
                        break;

                case ENETUNREACH:
                        SKB_DR_SET(reason, IP_INNOROUTES);
                        __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
                        break;
                }
                goto out;
        }

        switch (rt->dst.error) {
        case EINVAL:
        default:
                goto out;
        case EHOSTUNREACH:
                code = ICMP_HOST_UNREACH;
                break;
        case ENETUNREACH:
                code = ICMP_NET_UNREACH;
                SKB_DR_SET(reason, IP_INNOROUTES);
                __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
                break;
        case EACCES:
                code = ICMP_PKT_FILTERED;
                break;
        }

        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
                               l3mdev_master_ifindex(skb->dev), 1);

        send = true;
        if (peer) {
                now = jiffies;
                peer->rate_tokens += now - peer->rate_last;
                if (peer->rate_tokens > ip_rt_error_burst)
                        peer->rate_tokens = ip_rt_error_burst;
                peer->rate_last = now;
                if (peer->rate_tokens >= ip_rt_error_cost)
                        peer->rate_tokens -= ip_rt_error_cost;
                else
                        send = false;
                inet_putpeer(peer);
        }
        if (send)
                icmp_send(skb, ICMP_DEST_UNREACH, code, 0);

out:        kfree_skb_reason(skb, reason);
        return 0;
}

static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
        struct dst_entry *dst = &rt->dst;
        struct net *net = dev_net(dst->dev);
        struct fib_result res;
        bool lock = false;
        u32 old_mtu;

        if (ip_mtu_locked(dst))
                return;

        old_mtu = ipv4_mtu(dst);
        if (old_mtu < mtu)
                return;

        if (mtu < net->ipv4.ip_rt_min_pmtu) {
                lock = true;
                mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
        }

        if (rt->rt_pmtu == mtu && !lock &&
            time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
                return;

        rcu_read_lock();
        if (fib_lookup(net, fl4, &res, 0) == 0) {
                struct fib_nh_common *nhc;

                fib_select_path(net, &res, fl4, NULL);
                nhc = FIB_RES_NHC(res);
                update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
                                      jiffies + net->ipv4.ip_rt_mtu_expires);
        }
        rcu_read_unlock();
}

static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                              struct sk_buff *skb, u32 mtu,
                              bool confirm_neigh)
{
        struct rtable *rt = (struct rtable *) dst;
        struct flowi4 fl4;

        ip_rt_build_flow_key(&fl4, sk, skb);

        /* Don't make lookup fail for bridged encapsulations */
        if (skb && netif_is_any_bridge_port(skb->dev))
                fl4.flowi4_oif = 0;

        __ip_rt_update_pmtu(rt, &fl4, mtu);
}

void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
                      int oif, u8 protocol)
{
        const struct iphdr *iph = (const struct iphdr *)skb->data;
        struct flowi4 fl4;
        struct rtable *rt;
        u32 mark = IP4_REPLY_MARK(net, skb->mark);

        __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark,
                         0);
        rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
                __ip_rt_update_pmtu(rt, &fl4, mtu);
                ip_rt_put(rt);
        }
}
EXPORT_SYMBOL_GPL(ipv4_update_pmtu);

static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
        const struct iphdr *iph = (const struct iphdr *)skb->data;
        struct flowi4 fl4;
        struct rtable *rt;

        __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);

        if (!fl4.flowi4_mark)
                fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);

        rt = __ip_route_output_key(sock_net(sk), &fl4);
        if (!IS_ERR(rt)) {
                __ip_rt_update_pmtu(rt, &fl4, mtu);
                ip_rt_put(rt);
        }
}

void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
{
        const struct iphdr *iph = (const struct iphdr *)skb->data;
        struct flowi4 fl4;
        struct rtable *rt;
        struct dst_entry *odst = NULL;
        bool new = false;
        struct net *net = sock_net(sk);

        bh_lock_sock(sk);

        if (!ip_sk_accept_pmtu(sk))
                goto out;

        odst = sk_dst_get(sk);

        if (sock_owned_by_user(sk) || !odst) {
                __ipv4_sk_update_pmtu(skb, sk, mtu);
                goto out;
        }

        __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);

        rt = (struct rtable *)odst;
        if (odst->obsolete && !odst->ops->check(odst, 0)) {
                rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
                if (IS_ERR(rt))
                        goto out;

                new = true;
        }

        __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);

        if (!dst_check(&rt->dst, 0)) {
                if (new)
                        dst_release(&rt->dst);

                rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
                if (IS_ERR(rt))
                        goto out;

                new = true;
        }

        if (new)
                sk_dst_set(sk, &rt->dst);

out:
        bh_unlock_sock(sk);
        dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);

void ipv4_redirect(struct sk_buff *skb, struct net *net,
                   int oif, u8 protocol)
{
        const struct iphdr *iph = (const struct iphdr *)skb->data;
        struct flowi4 fl4;
        struct rtable *rt;

        __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0);
        rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
                __ip_do_redirect(rt, skb, &fl4, false);
                ip_rt_put(rt);
        }
}
EXPORT_SYMBOL_GPL(ipv4_redirect);

void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
        const struct iphdr *iph = (const struct iphdr *)skb->data;
        struct flowi4 fl4;
        struct rtable *rt;
        struct net *net = sock_net(sk);

        __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
        rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
                __ip_do_redirect(rt, skb, &fl4, false);
                ip_rt_put(rt);
        }
}
EXPORT_SYMBOL_GPL(ipv4_sk_redirect);

INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
                                                         u32 cookie)
{
        struct rtable *rt = (struct rtable *) dst;

        /* All IPV4 dsts are created with ->obsolete set to the value
         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
         * into this function always.
         *
         * When a PMTU/redirect information update invalidates a route,
         * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
         * DST_OBSOLETE_DEAD.
         */
        if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
                return NULL;
        return dst;
}
EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);

static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
        struct net_device *dev;
        struct ip_options opt;
        int res;

        /* Recompile ip options since IPCB may not be valid anymore.
         * Also check we have a reasonable ipv4 header.
         */
        if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
            ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
                return;

        memset(&opt, 0, sizeof(opt));
        if (ip_hdr(skb)->ihl > 5) {
                if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
                        return;
                opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);

                rcu_read_lock();
                dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
                res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
                rcu_read_unlock();

                if (res)
                        return;
        }
        __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
}

static void ipv4_link_failure(struct sk_buff *skb)
{
        struct rtable *rt;

        ipv4_send_dest_unreach(skb);

        rt = skb_rtable(skb);
        if (rt)
                dst_set_expires(&rt->dst, 0);
}

static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        pr_debug("%s: %pI4 -> %pI4, %s\n",
                 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
                 skb->dev ? skb->dev->name : "?");
        kfree_skb(skb);
        WARN_ON(1);
        return 0;
}

/*
 * We do not cache source address of outgoing interface,
 * because it is used only by IP RR, TS and SRR options,
 * so that it out of fast path.
 *
 * BTW remember: "addr" is allowed to be not aligned
 * in IP options!
 */

void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
{
        __be32 src;

        if (rt_is_output_route(rt))
                src = ip_hdr(skb)->saddr;
        else {
                struct fib_result res;
                struct iphdr *iph = ip_hdr(skb);
                struct flowi4 fl4 = {
                        .daddr = iph->daddr,
                        .saddr = iph->saddr,
                        .flowi4_tos = RT_TOS(iph->tos),
                        .flowi4_oif = rt->dst.dev->ifindex,
                        .flowi4_iif = skb->dev->ifindex,
                        .flowi4_mark = skb->mark,
                };

                rcu_read_lock();
                if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
                        src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
                else
                        src = inet_select_addr(rt->dst.dev,
                                               rt_nexthop(rt, iph->daddr),
                                               RT_SCOPE_UNIVERSE);
                rcu_read_unlock();
        }
        memcpy(addr, &src, 4);
}

#ifdef CONFIG_IP_ROUTE_CLASSID
static void set_class_tag(struct rtable *rt, u32 tag)
{
        if (!(rt->dst.tclassid & 0xFFFF))
                rt->dst.tclassid |= tag & 0xFFFF;
        if (!(rt->dst.tclassid & 0xFFFF0000))
                rt->dst.tclassid |= tag & 0xFFFF0000;
}
#endif

static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
        struct net *net = dev_net(dst->dev);
        unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
        unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
                                    net->ipv4.ip_rt_min_advmss);

        return min(advmss, IPV4_MAX_PMTU - header_size);
}

INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{
        return ip_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ipv4_mtu);

static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{
        struct fnhe_hash_bucket *hash;
        struct fib_nh_exception *fnhe, __rcu **fnhe_p;
        u32 hval = fnhe_hashfun(daddr);

        spin_lock_bh(&fnhe_lock);

        hash = rcu_dereference_protected(nhc->nhc_exceptions,
                                         lockdep_is_held(&fnhe_lock));
        hash += hval;

        fnhe_p = &hash->chain;
        fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
        while (fnhe) {
                if (fnhe->fnhe_daddr == daddr) {
                        rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
                                fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
                        /* set fnhe_daddr to 0 to ensure it won't bind with
                         * new dsts in rt_bind_exception().
                         */
                        fnhe->fnhe_daddr = 0;
                        fnhe_flush_routes(fnhe);
                        kfree_rcu(fnhe, rcu);
                        break;
                }
                fnhe_p = &fnhe->fnhe_next;
                fnhe = rcu_dereference_protected(fnhe->fnhe_next,
                                                 lockdep_is_held(&fnhe_lock));
        }

        spin_unlock_bh(&fnhe_lock);
}

static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
                                               __be32 daddr)
{
        struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
        struct fib_nh_exception *fnhe;
        u32 hval;

        if (!hash)
                return NULL;

        hval = fnhe_hashfun(daddr);

        for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
             fnhe = rcu_dereference(fnhe->fnhe_next)) {
                if (fnhe->fnhe_daddr == daddr) {
                        if (fnhe->fnhe_expires &&
                            time_after(jiffies, fnhe->fnhe_expires)) {
                                ip_del_fnhe(nhc, daddr);
                                break;
                        }
                        return fnhe;
                }
        }
        return NULL;
}

/* MTU selection:
 * 1. mtu on route is locked - use it
 * 2. mtu from nexthop exception
 * 3. mtu from egress device
 */

u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
{
        struct fib_nh_common *nhc = res->nhc;
        struct net_device *dev = nhc->nhc_dev;
        struct fib_info *fi = res->fi;
        u32 mtu = 0;

        if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
            fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
                mtu = fi->fib_mtu;

        if (likely(!mtu)) {
                struct fib_nh_exception *fnhe;

                fnhe = find_exception(nhc, daddr);
                if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
                        mtu = fnhe->fnhe_pmtu;
        }

        if (likely(!mtu))
                mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);

        return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
}

static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
                              __be32 daddr, const bool do_cache)
{
        bool ret = false;

        spin_lock_bh(&fnhe_lock);

        if (daddr == fnhe->fnhe_daddr) {
                struct rtable __rcu **porig;
                struct rtable *orig;
                int genid = fnhe_genid(dev_net(rt->dst.dev));

                if (rt_is_input_route(rt))
                        porig = &fnhe->fnhe_rth_input;
                else
                        porig = &fnhe->fnhe_rth_output;
                orig = rcu_dereference(*porig);

                if (fnhe->fnhe_genid != genid) {
                        fnhe->fnhe_genid = genid;
                        fnhe->fnhe_gw = 0;
                        fnhe->fnhe_pmtu = 0;
                        fnhe->fnhe_expires = 0;
                        fnhe->fnhe_mtu_locked = false;
                        fnhe_flush_routes(fnhe);
                        orig = NULL;
                }
                fill_route_from_fnhe(rt, fnhe);
                if (!rt->rt_gw4) {
                        rt->rt_gw4 = daddr;
                        rt->rt_gw_family = AF_INET;
                }

                if (do_cache) {
                        dst_hold(&rt->dst);
                        rcu_assign_pointer(*porig, rt);
                        if (orig) {
                                dst_dev_put(&orig->dst);
                                dst_release(&orig->dst);
                        }
                        ret = true;
                }

                fnhe->fnhe_stamp = jiffies;
        }
        spin_unlock_bh(&fnhe_lock);

        return ret;
}

static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
{
        struct rtable *orig, *prev, **p;
        bool ret = true;

        if (rt_is_input_route(rt)) {
                p = (struct rtable **)&nhc->nhc_rth_input;
        } else {
                p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
        }
        orig = *p;

        /* hold dst before doing cmpxchg() to avoid race condition
         * on this dst
         */
        dst_hold(&rt->dst);
        prev = cmpxchg(p, orig, rt);
        if (prev == orig) {
                if (orig) {
                        rt_add_uncached_list(orig);
                        dst_release(&orig->dst);
                }
        } else {
                dst_release(&rt->dst);
                ret = false;
        }

        return ret;
}

struct uncached_list {
        spinlock_t                lock;
        struct list_head        head;
        struct list_head        quarantine;
};

static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);

void rt_add_uncached_list(struct rtable *rt)
{
        struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);

        rt->dst.rt_uncached_list = ul;

        spin_lock_bh(&ul->lock);
        list_add_tail(&rt->dst.rt_uncached, &ul->head);
        spin_unlock_bh(&ul->lock);
}

void rt_del_uncached_list(struct rtable *rt)
{
        if (!list_empty(&rt->dst.rt_uncached)) {
                struct uncached_list *ul = rt->dst.rt_uncached_list;

                spin_lock_bh(&ul->lock);
                list_del_init(&rt->dst.rt_uncached);
                spin_unlock_bh(&ul->lock);
        }
}

static void ipv4_dst_destroy(struct dst_entry *dst)
{
        struct rtable *rt = (struct rtable *)dst;

        ip_dst_metrics_put(dst);
        rt_del_uncached_list(rt);
}

void rt_flush_dev(struct net_device *dev)
{
        struct rtable *rt, *safe;
        int cpu;

        for_each_possible_cpu(cpu) {
                struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);

                if (list_empty(&ul->head))
                        continue;

                spin_lock_bh(&ul->lock);
                list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
                        if (rt->dst.dev != dev)
                                continue;
                        rt->dst.dev = blackhole_netdev;
                        netdev_ref_replace(dev, blackhole_netdev,
                                           &rt->dst.dev_tracker, GFP_ATOMIC);
                        list_move(&rt->dst.rt_uncached, &ul->quarantine);
                }
                spin_unlock_bh(&ul->lock);
        }
}

static bool rt_cache_valid(const struct rtable *rt)
{
        return        rt &&
                rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
                !rt_is_expired(rt);
}

static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                           const struct fib_result *res,
                           struct fib_nh_exception *fnhe,
                           struct fib_info *fi, u16 type, u32 itag,
                           const bool do_cache)
{
        bool cached = false;

        if (fi) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);

                if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
                        rt->rt_uses_gateway = 1;
                        rt->rt_gw_family = nhc->nhc_gw_family;
                        /* only INET and INET6 are supported */
                        if (likely(nhc->nhc_gw_family == AF_INET))
                                rt->rt_gw4 = nhc->nhc_gw.ipv4;
                        else
                                rt->rt_gw6 = nhc->nhc_gw.ipv6;
                }

                ip_dst_init_metrics(&rt->dst, fi->fib_metrics);

#ifdef CONFIG_IP_ROUTE_CLASSID
                if (nhc->nhc_family == AF_INET) {
                        struct fib_nh *nh;

                        nh = container_of(nhc, struct fib_nh, nh_common);
                        rt->dst.tclassid = nh->nh_tclassid;
                }
#endif
                rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
                if (unlikely(fnhe))
                        cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
                else if (do_cache)
                        cached = rt_cache_route(nhc, rt);
                if (unlikely(!cached)) {
                        /* Routes we intend to cache in nexthop exception or
                         * FIB nexthop have the DST_NOCACHE bit clear.
                         * However, if we are unsuccessful at storing this
                         * route into the cache we really need to set it.
                         */
                        if (!rt->rt_gw4) {
                                rt->rt_gw_family = AF_INET;
                                rt->rt_gw4 = daddr;
                        }
                        rt_add_uncached_list(rt);
                }
        } else
                rt_add_uncached_list(rt);

#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
        set_class_tag(rt, res->tclassid);
#endif
        set_class_tag(rt, itag);
#endif
}

struct rtable *rt_dst_alloc(struct net_device *dev,
                            unsigned int flags, u16 type,
                            bool noxfrm)
{
        struct rtable *rt;

        rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
                       (noxfrm ? DST_NOXFRM : 0));

        if (rt) {
                rt->rt_genid = rt_genid_ipv4(dev_net(dev));
                rt->rt_flags = flags;
                rt->rt_type = type;
                rt->rt_is_input = 0;
                rt->rt_iif = 0;
                rt->rt_pmtu = 0;
                rt->rt_mtu_locked = 0;
                rt->rt_uses_gateway = 0;
                rt->rt_gw_family = 0;
                rt->rt_gw4 = 0;

                rt->dst.output = ip_output;
                if (flags & RTCF_LOCAL)
                        rt->dst.input = ip_local_deliver;
        }

        return rt;
}
EXPORT_SYMBOL(rt_dst_alloc);

struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{
        struct rtable *new_rt;

        new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
                           rt->dst.flags);

        if (new_rt) {
                new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
                new_rt->rt_flags = rt->rt_flags;
                new_rt->rt_type = rt->rt_type;
                new_rt->rt_is_input = rt->rt_is_input;
                new_rt->rt_iif = rt->rt_iif;
                new_rt->rt_pmtu = rt->rt_pmtu;
                new_rt->rt_mtu_locked = rt->rt_mtu_locked;
                new_rt->rt_gw_family = rt->rt_gw_family;
                if (rt->rt_gw_family == AF_INET)
                        new_rt->rt_gw4 = rt->rt_gw4;
                else if (rt->rt_gw_family == AF_INET6)
                        new_rt->rt_gw6 = rt->rt_gw6;

                new_rt->dst.input = rt->dst.input;
                new_rt->dst.output = rt->dst.output;
                new_rt->dst.error = rt->dst.error;
                new_rt->dst.lastuse = jiffies;
                new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
        }
        return new_rt;
}
EXPORT_SYMBOL(rt_dst_clone);

/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                          u8 tos, struct net_device *dev,
                          struct in_device *in_dev, u32 *itag)
{
        int err;

        /* Primary sanity checks. */
        if (!in_dev)
                return -EINVAL;

        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
            skb->protocol != htons(ETH_P_IP))
                return -EINVAL;

        if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
                return -EINVAL;

        if (ipv4_is_zeronet(saddr)) {
                if (!ipv4_is_local_multicast(daddr) &&
                    ip_hdr(skb)->protocol != IPPROTO_IGMP)
                        return -EINVAL;
        } else {
                err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
                                          in_dev, itag);
                if (err < 0)
                        return err;
        }
        return 0;
}

/* called in rcu_read_lock() section */
static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                             u8 tos, struct net_device *dev, int our)
{
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        unsigned int flags = RTCF_MULTICAST;
        struct rtable *rth;
        u32 itag = 0;
        int err;

        err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
        if (err)
                return err;

        if (our)
                flags |= RTCF_LOCAL;

        if (IN_DEV_ORCONF(in_dev, NOPOLICY))
                IPCB(skb)->flags |= IPSKB_NOPOLICY;

        rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
                           false);
        if (!rth)
                return -ENOBUFS;

#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
#endif
        rth->dst.output = ip_rt_bug;
        rth->rt_is_input= 1;

#ifdef CONFIG_IP_MROUTE
        if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
                rth->dst.input = ip_mr_input;
#endif
        RT_CACHE_STAT_INC(in_slow_mc);

        skb_dst_drop(skb);
        skb_dst_set(skb, &rth->dst);
        return 0;
}


static void ip_handle_martian_source(struct net_device *dev,
                                     struct in_device *in_dev,
                                     struct sk_buff *skb,
                                     __be32 daddr,
                                     __be32 saddr)
{
        RT_CACHE_STAT_INC(in_martian_src);
#ifdef CONFIG_IP_ROUTE_VERBOSE
        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
                /*
                 *        RFC1812 recommendation, if source is martian,
                 *        the only hint is MAC header.
                 */
                pr_warn("martian source %pI4 from %pI4, on dev %s\n",
                        &daddr, &saddr, dev->name);
                if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
                        print_hex_dump(KERN_WARNING, "ll header: ",
                                       DUMP_PREFIX_OFFSET, 16, 1,
                                       skb_mac_header(skb),
                                       dev->hard_header_len, false);
                }
        }
#endif
}

/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
                           const struct fib_result *res,
                           struct in_device *in_dev,
                           __be32 daddr, __be32 saddr, u32 tos)
{
        struct fib_nh_common *nhc = FIB_RES_NHC(*res);
        struct net_device *dev = nhc->nhc_dev;
        struct fib_nh_exception *fnhe;
        struct rtable *rth;
        int err;
        struct in_device *out_dev;
        bool do_cache;
        u32 itag = 0;

        /* get a working reference to the output device */
        out_dev = __in_dev_get_rcu(dev);
        if (!out_dev) {
                net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
                return -EINVAL;
        }

        err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
                                  in_dev->dev, in_dev, &itag);
        if (err < 0) {
                ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
                                         saddr);

                goto cleanup;
        }

        do_cache = res->fi && !itag;
        if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
            skb->protocol == htons(ETH_P_IP)) {
                __be32 gw;

                gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
                if (IN_DEV_SHARED_MEDIA(out_dev) ||
                    inet_addr_onlink(out_dev, saddr, gw))
                        IPCB(skb)->flags |= IPSKB_DOREDIRECT;
        }

        if (skb->protocol != htons(ETH_P_IP)) {
                /* Not IP (i.e. ARP). Do not create route, if it is
                 * invalid for proxy arp. DNAT routes are always valid.
                 *
                 * Proxy arp feature have been extended to allow, ARP
                 * replies back to the same interface, to support
                 * Private VLAN switch technologies. See arp.c.
                 */
                if (out_dev == in_dev &&
                    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
                        err = -EINVAL;
                        goto cleanup;
                }
        }

        if (IN_DEV_ORCONF(in_dev, NOPOLICY))
                IPCB(skb)->flags |= IPSKB_NOPOLICY;

        fnhe = find_exception(nhc, daddr);
        if (do_cache) {
                if (fnhe)
                        rth = rcu_dereference(fnhe->fnhe_rth_input);
                else
                        rth = rcu_dereference(nhc->nhc_rth_input);
                if (rt_cache_valid(rth)) {
                        skb_dst_set_noref(skb, &rth->dst);
                        goto out;
                }
        }

        rth = rt_dst_alloc(out_dev->dev, 0, res->type,
                           IN_DEV_ORCONF(out_dev, NOXFRM));
        if (!rth) {
                err = -ENOBUFS;
                goto cleanup;
        }

        rth->rt_is_input = 1;
        RT_CACHE_STAT_INC(in_slow_tot);

        rth->dst.input = ip_forward;

        rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
                       do_cache);
        lwtunnel_set_redirect(&rth->dst);
        skb_dst_set(skb, &rth->dst);
out:
        err = 0;
 cleanup:
        return err;
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH
/* To make ICMP packets follow the right flow, the multipath hash is
 * calculated from the inner IP addresses.
 */
static void ip_multipath_l3_keys(const struct sk_buff *skb,
                                 struct flow_keys *hash_keys)
{
        const struct iphdr *outer_iph = ip_hdr(skb);
        const struct iphdr *key_iph = outer_iph;
        const struct iphdr *inner_iph;
        const struct icmphdr *icmph;
        struct iphdr _inner_iph;
        struct icmphdr _icmph;

        if (likely(outer_iph->protocol != IPPROTO_ICMP))
                goto out;

        if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
                goto out;

        icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
                                   &_icmph);
        if (!icmph)
                goto out;

        if (!icmp_is_err(icmph->type))
                goto out;

        inner_iph = skb_header_pointer(skb,
                                       outer_iph->ihl * 4 + sizeof(_icmph),
                                       sizeof(_inner_iph), &_inner_iph);
        if (!inner_iph)
                goto out;

        key_iph = inner_iph;
out:
        hash_keys->addrs.v4addrs.src = key_iph->saddr;
        hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}

static u32 fib_multipath_custom_hash_outer(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool *p_has_inner)
{
        u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys keys, hash_keys;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);

        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
                hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
                hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
                hash_keys.basic.ip_proto = keys.basic.ip_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
                hash_keys.ports.src = keys.ports.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
                hash_keys.ports.dst = keys.ports.dst;

        *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
        return flow_hash_from_keys(&hash_keys);
}

static u32 fib_multipath_custom_hash_inner(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool has_inner)
{
        u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys keys, hash_keys;

        /* We assume the packet carries an encapsulation, but if none was
         * encountered during dissection of the outer flow, then there is no
         * point in calling the flow dissector again.
         */
        if (!has_inner)
                return 0;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        skb_flow_dissect_flow_keys(skb, &keys, 0);

        if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
                return 0;

        if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
                        hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
                        hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
        } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
                        hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
                        hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
                        hash_keys.tags.flow_label = keys.tags.flow_label;
        }

        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
                hash_keys.basic.ip_proto = keys.basic.ip_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
                hash_keys.ports.src = keys.ports.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
                hash_keys.ports.dst = keys.ports.dst;

        return flow_hash_from_keys(&hash_keys);
}

static u32 fib_multipath_custom_hash_skb(const struct net *net,
                                         const struct sk_buff *skb)
{
        u32 mhash, mhash_inner;
        bool has_inner = true;

        mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
        mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);

        return jhash_2words(mhash, mhash_inner, 0);
}

static u32 fib_multipath_custom_hash_fl4(const struct net *net,
                                         const struct flowi4 *fl4)
{
        u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
        struct flow_keys hash_keys;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
                hash_keys.addrs.v4addrs.src = fl4->saddr;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
                hash_keys.addrs.v4addrs.dst = fl4->daddr;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
                hash_keys.basic.ip_proto = fl4->flowi4_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
                hash_keys.ports.src = fl4->fl4_sport;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
                hash_keys.ports.dst = fl4->fl4_dport;

        return flow_hash_from_keys(&hash_keys);
}

/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
                       const struct sk_buff *skb, struct flow_keys *flkeys)
{
        u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
        struct flow_keys hash_keys;
        u32 mhash = 0;

        switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
        case 0:
                memset(&hash_keys, 0, sizeof(hash_keys));
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                if (skb) {
                        ip_multipath_l3_keys(skb, &hash_keys);
                } else {
                        hash_keys.addrs.v4addrs.src = fl4->saddr;
                        hash_keys.addrs.v4addrs.dst = fl4->daddr;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 1:
                /* skb is currently provided only when forwarding */
                if (skb) {
                        unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
                        struct flow_keys keys;

                        /* short-circuit if we already have L4 hash present */
                        if (skb->l4_hash)
                                return skb_get_hash_raw(skb) >> 1;

                        memset(&hash_keys, 0, sizeof(hash_keys));

                        if (!flkeys) {
                                skb_flow_dissect_flow_keys(skb, &keys, flag);
                                flkeys = &keys;
                        }

                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                        hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
                        hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
                        hash_keys.ports.src = flkeys->ports.src;
                        hash_keys.ports.dst = flkeys->ports.dst;
                        hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
                } else {
                        memset(&hash_keys, 0, sizeof(hash_keys));
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                        hash_keys.addrs.v4addrs.src = fl4->saddr;
                        hash_keys.addrs.v4addrs.dst = fl4->daddr;
                        hash_keys.ports.src = fl4->fl4_sport;
                        hash_keys.ports.dst = fl4->fl4_dport;
                        hash_keys.basic.ip_proto = fl4->flowi4_proto;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 2:
                memset(&hash_keys, 0, sizeof(hash_keys));
                /* skb is currently provided only when forwarding */
                if (skb) {
                        struct flow_keys keys;

                        skb_flow_dissect_flow_keys(skb, &keys, 0);
                        /* Inner can be v4 or v6 */
                        if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                                hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
                                hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
                        } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                                hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
                                hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
                                hash_keys.tags.flow_label = keys.tags.flow_label;
                                hash_keys.basic.ip_proto = keys.basic.ip_proto;
                        } else {
                                /* Same as case 0 */
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                                ip_multipath_l3_keys(skb, &hash_keys);
                        }
                } else {
                        /* Same as case 0 */
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                        hash_keys.addrs.v4addrs.src = fl4->saddr;
                        hash_keys.addrs.v4addrs.dst = fl4->daddr;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 3:
                if (skb)
                        mhash = fib_multipath_custom_hash_skb(net, skb);
                else
                        mhash = fib_multipath_custom_hash_fl4(net, fl4);
                break;
        }

        if (multipath_hash)
                mhash = jhash_2words(mhash, multipath_hash, 0);

        return mhash >> 1;
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */

static int ip_mkroute_input(struct sk_buff *skb,
                            struct fib_result *res,
                            struct in_device *in_dev,
                            __be32 daddr, __be32 saddr, u32 tos,
                            struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && fib_info_num_path(res->fi) > 1) {
                int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);

                fib_select_multipath(res, h);
                IPCB(skb)->flags |= IPSKB_MULTIPATH;
        }
#endif

        /* create a routing cache entry */
        return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
}

/* Implements all the saddr-related checks as ip_route_input_slow(),
 * assuming daddr is valid and the destination is not a local broadcast one.
 * Uses the provided hint instead of performing a route lookup.
 */
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                      u8 tos, struct net_device *dev,
                      const struct sk_buff *hint)
{
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        struct rtable *rt = skb_rtable(hint);
        struct net *net = dev_net(dev);
        int err = -EINVAL;
        u32 tag = 0;

        if (!in_dev)
                return -EINVAL;

        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
                goto martian_source;

        if (ipv4_is_zeronet(saddr))
                goto martian_source;

        if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
                goto martian_source;

        if (rt->rt_type != RTN_LOCAL)
                goto skip_validate_source;

        tos &= IPTOS_RT_MASK;
        err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
        if (err < 0)
                goto martian_source;

skip_validate_source:
        skb_dst_copy(skb, hint);
        return 0;

martian_source:
        ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
        return err;
}

/* get device for dst_alloc with local routes */
static struct net_device *ip_rt_get_dev(struct net *net,
                                        const struct fib_result *res)
{
        struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
        struct net_device *dev = NULL;

        if (nhc)
                dev = l3mdev_master_dev_rcu(nhc->nhc_dev);

        return dev ? : net->loopback_dev;
}

/*
 *        NOTE. We drop all the packets that has local source
 *        addresses, because every properly looped back packet
 *        must have correct destination already attached by output routine.
 *        Changes in the enforced policies must be applied also to
 *        ip_route_use_hint().
 *
 *        Such approach solves two big problems:
 *        1. Not simplex devices are handled properly.
 *        2. IP spoofing attempts are filtered with 100% of guarantee.
 *        called with rcu_read_lock()
 */

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                               u8 tos, struct net_device *dev,
                               struct fib_result *res)
{
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        struct flow_keys *flkeys = NULL, _flkeys;
        struct net    *net = dev_net(dev);
        struct ip_tunnel_info *tun_info;
        int                err = -EINVAL;
        unsigned int        flags = 0;
        u32                itag = 0;
        struct rtable        *rth;
        struct flowi4        fl4;
        bool do_cache = true;

        /* IP on this device is disabled. */

        if (!in_dev)
                goto out;

        /* Check for the most weird martians, which can be not detected
         * by fib_lookup.
         */

        tun_info = skb_tunnel_info(skb);
        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
        else
                fl4.flowi4_tun_key.tun_id = 0;
        skb_dst_drop(skb);

        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
                goto martian_source;

        res->fi = NULL;
        res->table = NULL;
        if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
                goto brd_input;

        /* Accept zero addresses only to limited broadcast;
         * I even do not know to fix it or not. Waiting for complains :-)
         */
        if (ipv4_is_zeronet(saddr))
                goto martian_source;

        if (ipv4_is_zeronet(daddr))
                goto martian_destination;

        /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
         * and call it once if daddr or/and saddr are loopback addresses
         */
        if (ipv4_is_loopback(daddr)) {
                if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
                        goto martian_destination;
        } else if (ipv4_is_loopback(saddr)) {
                if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
                        goto martian_source;
        }

        /*
         *        Now we are ready to route packet.
         */
        fl4.flowi4_l3mdev = 0;
        fl4.flowi4_oif = 0;
        fl4.flowi4_iif = dev->ifindex;
        fl4.flowi4_mark = skb->mark;
        fl4.flowi4_tos = tos;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
        fl4.flowi4_flags = 0;
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
        fl4.flowi4_multipath_hash = 0;

        if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
                flkeys = &_flkeys;
        } else {
                fl4.flowi4_proto = 0;
                fl4.fl4_sport = 0;
                fl4.fl4_dport = 0;
        }

        err = fib_lookup(net, &fl4, res, 0);
        if (err != 0) {
                if (!IN_DEV_FORWARD(in_dev))
                        err = -EHOSTUNREACH;
                goto no_route;
        }

        if (res->type == RTN_BROADCAST) {
                if (IN_DEV_BFORWARD(in_dev))
                        goto make_route;
                /* not do cache if bc_forwarding is enabled */
                if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING))
                        do_cache = false;
                goto brd_input;
        }

        if (res->type == RTN_LOCAL) {
                err = fib_validate_source(skb, saddr, daddr, tos,
                                          0, dev, in_dev, &itag);
                if (err < 0)
                        goto martian_source;
                goto local_input;
        }

        if (!IN_DEV_FORWARD(in_dev)) {
                err = -EHOSTUNREACH;
                goto no_route;
        }
        if (res->type != RTN_UNICAST)
                goto martian_destination;

make_route:
        err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out:        return err;

brd_input:
        if (skb->protocol != htons(ETH_P_IP))
                goto e_inval;

        if (!ipv4_is_zeronet(saddr)) {
                err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
                                          in_dev, &itag);
                if (err < 0)
                        goto martian_source;
        }
        flags |= RTCF_BROADCAST;
        res->type = RTN_BROADCAST;
        RT_CACHE_STAT_INC(in_brd);

local_input:
        if (IN_DEV_ORCONF(in_dev, NOPOLICY))
                IPCB(skb)->flags |= IPSKB_NOPOLICY;

        do_cache &= res->fi && !itag;
        if (do_cache) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);

                rth = rcu_dereference(nhc->nhc_rth_input);
                if (rt_cache_valid(rth)) {
                        skb_dst_set_noref(skb, &rth->dst);
                        err = 0;
                        goto out;
                }
        }

        rth = rt_dst_alloc(ip_rt_get_dev(net, res),
                           flags | RTCF_LOCAL, res->type, false);
        if (!rth)
                goto e_nobufs;

        rth->dst.output= ip_rt_bug;
#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
#endif
        rth->rt_is_input = 1;

        RT_CACHE_STAT_INC(in_slow_tot);
        if (res->type == RTN_UNREACHABLE) {
                rth->dst.input= ip_error;
                rth->dst.error= -err;
                rth->rt_flags        &= ~RTCF_LOCAL;
        }

        if (do_cache) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);

                rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
                if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
                        WARN_ON(rth->dst.input == lwtunnel_input);
                        rth->dst.lwtstate->orig_input = rth->dst.input;
                        rth->dst.input = lwtunnel_input;
                }

                if (unlikely(!rt_cache_route(nhc, rth)))
                        rt_add_uncached_list(rth);
        }
        skb_dst_set(skb, &rth->dst);
        err = 0;
        goto out;

no_route:
        RT_CACHE_STAT_INC(in_no_route);
        res->type = RTN_UNREACHABLE;
        res->fi = NULL;
        res->table = NULL;
        goto local_input;

        /*
         *        Do not cache martian addresses: they should be logged (RFC1812)
         */
martian_destination:
        RT_CACHE_STAT_INC(in_martian_dst);
#ifdef CONFIG_IP_ROUTE_VERBOSE
        if (IN_DEV_LOG_MARTIANS(in_dev))
                net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
                                     &daddr, &saddr, dev->name);
#endif

e_inval:
        err = -EINVAL;
        goto out;

e_nobufs:
        err = -ENOBUFS;
        goto out;

martian_source:
        ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
        goto out;
}

/* called with rcu_read_lock held */
static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                              u8 tos, struct net_device *dev, struct fib_result *res)
{
        /* Multicast recognition logic is moved from route cache to here.
         * The problem was that too many Ethernet cards have broken/missing
         * hardware multicast filters :-( As result the host on multicasting
         * network acquires a lot of useless route cache entries, sort of
         * SDR messages from all the world. Now we try to get rid of them.
         * Really, provided software IP multicast filter is organized
         * reasonably (at least, hashed), it does not result in a slowdown
         * comparing with route cache reject entries.
         * Note, that multicast routers are not affected, because
         * route cache entry is created eventually.
         */
        if (ipv4_is_multicast(daddr)) {
                struct in_device *in_dev = __in_dev_get_rcu(dev);
                int our = 0;
                int err = -EINVAL;

                if (!in_dev)
                        return err;
                our = ip_check_mc_rcu(in_dev, daddr, saddr,
                                      ip_hdr(skb)->protocol);

                /* check l3 master if no match yet */
                if (!our && netif_is_l3_slave(dev)) {
                        struct in_device *l3_in_dev;

                        l3_in_dev = __in_dev_get_rcu(skb->dev);
                        if (l3_in_dev)
                                our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
                                                      ip_hdr(skb)->protocol);
                }

                if (our
#ifdef CONFIG_IP_MROUTE
                        ||
                    (!ipv4_is_local_multicast(daddr) &&
                     IN_DEV_MFORWARD(in_dev))
#endif
                   ) {
                        err = ip_route_input_mc(skb, daddr, saddr,
                                                tos, dev, our);
                }
                return err;
        }

        return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
}

int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                         u8 tos, struct net_device *dev)
{
        struct fib_result res;
        int err;

        tos &= IPTOS_RT_MASK;
        rcu_read_lock();
        err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
        rcu_read_unlock();

        return err;
}
EXPORT_SYMBOL(ip_route_input_noref);

/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
                                       const struct flowi4 *fl4, int orig_oif,
                                       struct net_device *dev_out,
                                       unsigned int flags)
{
        struct fib_info *fi = res->fi;
        struct fib_nh_exception *fnhe;
        struct in_device *in_dev;
        u16 type = res->type;
        struct rtable *rth;
        bool do_cache;

        in_dev = __in_dev_get_rcu(dev_out);
        if (!in_dev)
                return ERR_PTR(-EINVAL);

        if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
                if (ipv4_is_loopback(fl4->saddr) &&
                    !(dev_out->flags & IFF_LOOPBACK) &&
                    !netif_is_l3_master(dev_out))
                        return ERR_PTR(-EINVAL);

        if (ipv4_is_lbcast(fl4->daddr))
                type = RTN_BROADCAST;
        else if (ipv4_is_multicast(fl4->daddr))
                type = RTN_MULTICAST;
        else if (ipv4_is_zeronet(fl4->daddr))
                return ERR_PTR(-EINVAL);

        if (dev_out->flags & IFF_LOOPBACK)
                flags |= RTCF_LOCAL;

        do_cache = true;
        if (type == RTN_BROADCAST) {
                flags |= RTCF_BROADCAST | RTCF_LOCAL;
                fi = NULL;
        } else if (type == RTN_MULTICAST) {
                flags |= RTCF_MULTICAST | RTCF_LOCAL;
                if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
                                     fl4->flowi4_proto))
                        flags &= ~RTCF_LOCAL;
                else
                        do_cache = false;
                /* If multicast route do not exist use
                 * default one, but do not gateway in this case.
                 * Yes, it is hack.
                 */
                if (fi && res->prefixlen < 4)
                        fi = NULL;
        } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
                   (orig_oif != dev_out->ifindex)) {
                /* For local routes that require a particular output interface
                 * we do not want to cache the result.  Caching the result
                 * causes incorrect behaviour when there are multiple source
                 * addresses on the interface, the end result being that if the
                 * intended recipient is waiting on that interface for the
                 * packet he won't receive it because it will be delivered on
                 * the loopback interface and the IP_PKTINFO ipi_ifindex will
                 * be set to the loopback interface as well.
                 */
                do_cache = false;
        }

        fnhe = NULL;
        do_cache &= fi != NULL;
        if (fi) {
                struct fib_nh_common *nhc = FIB_RES_NHC(*res);
                struct rtable __rcu **prth;

                fnhe = find_exception(nhc, fl4->daddr);
                if (!do_cache)
                        goto add;
                if (fnhe) {
                        prth = &fnhe->fnhe_rth_output;
                } else {
                        if (unlikely(fl4->flowi4_flags &
                                     FLOWI_FLAG_KNOWN_NH &&
                                     !(nhc->nhc_gw_family &&
                                       nhc->nhc_scope == RT_SCOPE_LINK))) {
                                do_cache = false;
                                goto add;
                        }
                        prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
                }
                rth = rcu_dereference(*prth);
                if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
                        return rth;
        }

add:
        rth = rt_dst_alloc(dev_out, flags, type,
                           IN_DEV_ORCONF(in_dev, NOXFRM));
        if (!rth)
                return ERR_PTR(-ENOBUFS);

        rth->rt_iif = orig_oif;

        RT_CACHE_STAT_INC(out_slow_tot);

        if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
                if (flags & RTCF_LOCAL &&
                    !(dev_out->flags & IFF_LOOPBACK)) {
                        rth->dst.output = ip_mc_output;
                        RT_CACHE_STAT_INC(out_slow_mc);
                }
#ifdef CONFIG_IP_MROUTE
                if (type == RTN_MULTICAST) {
                        if (IN_DEV_MFORWARD(in_dev) &&
                            !ipv4_is_local_multicast(fl4->daddr)) {
                                rth->dst.input = ip_mr_input;
                                rth->dst.output = ip_mc_output;
                        }
                }
#endif
        }

        rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
        lwtunnel_set_redirect(&rth->dst);

        return rth;
}

/*
 * Major route resolver routine.
 */

struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                                        const struct sk_buff *skb)
{
        struct fib_result res = {
                .type                = RTN_UNSPEC,
                .fi                = NULL,
                .table                = NULL,
                .tclassid        = 0,
        };
        struct rtable *rth;

        fl4->flowi4_iif = LOOPBACK_IFINDEX;
        ip_rt_fix_tos(fl4);

        rcu_read_lock();
        rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
        rcu_read_unlock();

        return rth;
}
EXPORT_SYMBOL_GPL(ip_route_output_key_hash);

struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
                                            struct fib_result *res,
                                            const struct sk_buff *skb)
{
        struct net_device *dev_out = NULL;
        int orig_oif = fl4->flowi4_oif;
        unsigned int flags = 0;
        struct rtable *rth;
        int err;

        if (fl4->saddr) {
                if (ipv4_is_multicast(fl4->saddr) ||
                    ipv4_is_lbcast(fl4->saddr) ||
                    ipv4_is_zeronet(fl4->saddr)) {
                        rth = ERR_PTR(-EINVAL);
                        goto out;
                }

                rth = ERR_PTR(-ENETUNREACH);

                /* I removed check for oif == dev_out->oif here.
                 * It was wrong for two reasons:
                 * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
                 *    is assigned to multiple interfaces.
                 * 2. Moreover, we are allowed to send packets with saddr
                 *    of another iface. --ANK
                 */

                if (fl4->flowi4_oif == 0 &&
                    (ipv4_is_multicast(fl4->daddr) ||
                     ipv4_is_lbcast(fl4->daddr))) {
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
                        dev_out = __ip_dev_find(net, fl4->saddr, false);
                        if (!dev_out)
                                goto out;

                        /* Special hack: user can direct multicasts
                         * and limited broadcast via necessary interface
                         * without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
                         * This hack is not just for fun, it allows
                         * vic,vat and friends to work.
                         * They bind socket to loopback, set ttl to zero
                         * and expect that it will work.
                         * From the viewpoint of routing cache they are broken,
                         * because we are not allowed to build multicast path
                         * with loopback source addr (look, routing cache
                         * cannot know, that ttl is zero, so that packet
                         * will not leave this host and route is valid).
                         * Luckily, this hack is good workaround.
                         */

                        fl4->flowi4_oif = dev_out->ifindex;
                        goto make_route;
                }

                if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
                        if (!__ip_dev_find(net, fl4->saddr, false))
                                goto out;
                }
        }


        if (fl4->flowi4_oif) {
                dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
                rth = ERR_PTR(-ENODEV);
                if (!dev_out)
                        goto out;

                /* RACE: Check return value of inet_select_addr instead. */
                if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
                        rth = ERR_PTR(-ENETUNREACH);
                        goto out;
                }
                if (ipv4_is_local_multicast(fl4->daddr) ||
                    ipv4_is_lbcast(fl4->daddr) ||
                    fl4->flowi4_proto == IPPROTO_IGMP) {
                        if (!fl4->saddr)
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_LINK);
                        goto make_route;
                }
                if (!fl4->saddr) {
                        if (ipv4_is_multicast(fl4->daddr))
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              fl4->flowi4_scope);
                        else if (!fl4->daddr)
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_HOST);
                }
        }

        if (!fl4->daddr) {
                fl4->daddr = fl4->saddr;
                if (!fl4->daddr)
                        fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
                dev_out = net->loopback_dev;
                fl4->flowi4_oif = LOOPBACK_IFINDEX;
                res->type = RTN_LOCAL;
                flags |= RTCF_LOCAL;
                goto make_route;
        }

        err = fib_lookup(net, fl4, res, 0);
        if (err) {
                res->fi = NULL;
                res->table = NULL;
                if (fl4->flowi4_oif &&
                    (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
                        /* Apparently, routing tables are wrong. Assume,
                         * that the destination is on link.
                         *
                         * WHY? DW.
                         * Because we are allowed to send to iface
                         * even if it has NO routes and NO assigned
                         * addresses. When oif is specified, routing
                         * tables are looked up with only one purpose:
                         * to catch if destination is gatewayed, rather than
                         * direct. Moreover, if MSG_DONTROUTE is set,
                         * we send packet, ignoring both routing tables
                         * and ifaddr state. --ANK
                         *
                         *
                         * We could make it even if oif is unknown,
                         * likely IPv6, but we do not.
                         */

                        if (fl4->saddr == 0)
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_LINK);
                        res->type = RTN_UNICAST;
                        goto make_route;
                }
                rth = ERR_PTR(err);
                goto out;
        }

        if (res->type == RTN_LOCAL) {
                if (!fl4->saddr) {
                        if (res->fi->fib_prefsrc)
                                fl4->saddr = res->fi->fib_prefsrc;
                        else
                                fl4->saddr = fl4->daddr;
                }

                /* L3 master device is the loopback for that domain */
                dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
                        net->loopback_dev;

                /* make sure orig_oif points to fib result device even
                 * though packet rx/tx happens over loopback or l3mdev
                 */
                orig_oif = FIB_RES_OIF(*res);

                fl4->flowi4_oif = dev_out->ifindex;
                flags |= RTCF_LOCAL;
                goto make_route;
        }

        fib_select_path(net, res, fl4, skb);

        dev_out = FIB_RES_DEV(*res);

make_route:
        rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);

out:
        return rth;
}

static struct dst_ops ipv4_dst_blackhole_ops = {
        .family                        = AF_INET,
        .default_advmss                = ipv4_default_advmss,
        .neigh_lookup                = ipv4_neigh_lookup,
        .check                        = dst_blackhole_check,
        .cow_metrics                = dst_blackhole_cow_metrics,
        .update_pmtu                = dst_blackhole_update_pmtu,
        .redirect                = dst_blackhole_redirect,
        .mtu                        = dst_blackhole_mtu,
};

struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
        struct rtable *ort = (struct rtable *) dst_orig;
        struct rtable *rt;

        rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
        if (rt) {
                struct dst_entry *new = &rt->dst;

                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard_out;

                new->dev = net->loopback_dev;
                netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC);

                rt->rt_is_input = ort->rt_is_input;
                rt->rt_iif = ort->rt_iif;
                rt->rt_pmtu = ort->rt_pmtu;
                rt->rt_mtu_locked = ort->rt_mtu_locked;

                rt->rt_genid = rt_genid_ipv4(net);
                rt->rt_flags = ort->rt_flags;
                rt->rt_type = ort->rt_type;
                rt->rt_uses_gateway = ort->rt_uses_gateway;
                rt->rt_gw_family = ort->rt_gw_family;
                if (rt->rt_gw_family == AF_INET)
                        rt->rt_gw4 = ort->rt_gw4;
                else if (rt->rt_gw_family == AF_INET6)
                        rt->rt_gw6 = ort->rt_gw6;
        }

        dst_release(dst_orig);

        return rt ? &rt->dst : ERR_PTR(-ENOMEM);
}

struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
                                    const struct sock *sk)
{
        struct rtable *rt = __ip_route_output_key(net, flp4);

        if (IS_ERR(rt))
                return rt;

        if (flp4->flowi4_proto) {
                flp4->flowi4_oif = rt->dst.dev->ifindex;
                rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
                                                        flowi4_to_flowi(flp4),
                                                        sk, 0);
        }

        return rt;
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);

/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
                        struct rtable *rt, u32 table_id, struct flowi4 *fl4,
                        struct sk_buff *skb, u32 portid, u32 seq,
                        unsigned int flags)
{
        struct rtmsg *r;
        struct nlmsghdr *nlh;
        unsigned long expires = 0;
        u32 error;
        u32 metrics[RTAX_MAX];

        nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
        if (!nlh)
                return -EMSGSIZE;

        r = nlmsg_data(nlh);
        r->rtm_family         = AF_INET;
        r->rtm_dst_len        = 32;
        r->rtm_src_len        = 0;
        r->rtm_tos        = fl4 ? fl4->flowi4_tos : 0;
        r->rtm_table        = table_id < 256 ? table_id : RT_TABLE_COMPAT;
        if (nla_put_u32(skb, RTA_TABLE, table_id))
                goto nla_put_failure;
        r->rtm_type        = rt->rt_type;
        r->rtm_scope        = RT_SCOPE_UNIVERSE;
        r->rtm_protocol = RTPROT_UNSPEC;
        r->rtm_flags        = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
        if (rt->rt_flags & RTCF_NOTIFY)
                r->rtm_flags |= RTM_F_NOTIFY;
        if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
                r->rtm_flags |= RTCF_DOREDIRECT;

        if (nla_put_in_addr(skb, RTA_DST, dst))
                goto nla_put_failure;
        if (src) {
                r->rtm_src_len = 32;
                if (nla_put_in_addr(skb, RTA_SRC, src))
                        goto nla_put_failure;
        }
        if (rt->dst.dev &&
            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
                goto nla_put_failure;
        if (rt->dst.lwtstate &&
            lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
                goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
        if (rt->dst.tclassid &&
            nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
                goto nla_put_failure;
#endif
        if (fl4 && !rt_is_input_route(rt) &&
            fl4->saddr != src) {
                if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
                        goto nla_put_failure;
        }
        if (rt->rt_uses_gateway) {
                if (rt->rt_gw_family == AF_INET &&
                    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
                        goto nla_put_failure;
                } else if (rt->rt_gw_family == AF_INET6) {
                        int alen = sizeof(struct in6_addr);
                        struct nlattr *nla;
                        struct rtvia *via;

                        nla = nla_reserve(skb, RTA_VIA, alen + 2);
                        if (!nla)
                                goto nla_put_failure;

                        via = nla_data(nla);
                        via->rtvia_family = AF_INET6;
                        memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
                }
        }

        expires = rt->dst.expires;
        if (expires) {
                unsigned long now = jiffies;

                if (time_before(now, expires))
                        expires -= now;
                else
                        expires = 0;
        }

        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
        if (rt->rt_pmtu && expires)
                metrics[RTAX_MTU - 1] = rt->rt_pmtu;
        if (rt->rt_mtu_locked && expires)
                metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
        if (rtnetlink_put_metrics(skb, metrics) < 0)
                goto nla_put_failure;

        if (fl4) {
                if (fl4->flowi4_mark &&
                    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
                        goto nla_put_failure;

                if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
                    nla_put_u32(skb, RTA_UID,
                                from_kuid_munged(current_user_ns(),
                                                 fl4->flowi4_uid)))
                        goto nla_put_failure;

                if (rt_is_input_route(rt)) {
#ifdef CONFIG_IP_MROUTE
                        if (ipv4_is_multicast(dst) &&
                            !ipv4_is_local_multicast(dst) &&
                            IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) {
                                int err = ipmr_get_route(net, skb,
                                                         fl4->saddr, fl4->daddr,
                                                         r, portid);

                                if (err <= 0) {
                                        if (err == 0)
                                                return 0;
                                        goto nla_put_failure;
                                }
                        } else
#endif
                                if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
                                        goto nla_put_failure;
                }
        }

        error = rt->dst.error;

        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
                            struct netlink_callback *cb, u32 table_id,
                            struct fnhe_hash_bucket *bucket, int genid,
                            int *fa_index, int fa_start, unsigned int flags)
{
        int i;

        for (i = 0; i < FNHE_HASH_SIZE; i++) {
                struct fib_nh_exception *fnhe;

                for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
                     fnhe = rcu_dereference(fnhe->fnhe_next)) {
                        struct rtable *rt;
                        int err;

                        if (*fa_index < fa_start)
                                goto next;

                        if (fnhe->fnhe_genid != genid)
                                goto next;

                        if (fnhe->fnhe_expires &&
                            time_after(jiffies, fnhe->fnhe_expires))
                                goto next;

                        rt = rcu_dereference(fnhe->fnhe_rth_input);
                        if (!rt)
                                rt = rcu_dereference(fnhe->fnhe_rth_output);
                        if (!rt)
                                goto next;

                        err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
                                           table_id, NULL, skb,
                                           NETLINK_CB(cb->skb).portid,
                                           cb->nlh->nlmsg_seq, flags);
                        if (err)
                                return err;
next:
                        (*fa_index)++;
                }
        }

        return 0;
}

int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
                       u32 table_id, struct fib_info *fi,
                       int *fa_index, int fa_start, unsigned int flags)
{
        struct net *net = sock_net(cb->skb->sk);
        int nhsel, genid = fnhe_genid(net);

        for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
                struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
                struct fnhe_hash_bucket *bucket;
                int err;

                if (nhc->nhc_flags & RTNH_F_DEAD)
                        continue;

                rcu_read_lock();
                bucket = rcu_dereference(nhc->nhc_exceptions);
                err = 0;
                if (bucket)
                        err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
                                               genid, fa_index, fa_start,
                                               flags);
                rcu_read_unlock();
                if (err)
                        return err;
        }

        return 0;
}

static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
                                                   u8 ip_proto, __be16 sport,
                                                   __be16 dport)
{
        struct sk_buff *skb;
        struct iphdr *iph;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return NULL;

        /* Reserve room for dummy headers, this skb can pass
         * through good chunk of routing engine.
         */
        skb_reset_mac_header(skb);
        skb_reset_network_header(skb);
        skb->protocol = htons(ETH_P_IP);
        iph = skb_put(skb, sizeof(struct iphdr));
        iph->protocol = ip_proto;
        iph->saddr = src;
        iph->daddr = dst;
        iph->version = 0x4;
        iph->frag_off = 0;
        iph->ihl = 0x5;
        skb_set_transport_header(skb, skb->len);

        switch (iph->protocol) {
        case IPPROTO_UDP: {
                struct udphdr *udph;

                udph = skb_put_zero(skb, sizeof(struct udphdr));
                udph->source = sport;
                udph->dest = dport;
                udph->len = htons(sizeof(struct udphdr));
                udph->check = 0;
                break;
        }
        case IPPROTO_TCP: {
                struct tcphdr *tcph;

                tcph = skb_put_zero(skb, sizeof(struct tcphdr));
                tcph->source        = sport;
                tcph->dest        = dport;
                tcph->doff        = sizeof(struct tcphdr) / 4;
                tcph->rst = 1;
                tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
                                            src, dst, 0);
                break;
        }
        case IPPROTO_ICMP: {
                struct icmphdr *icmph;

                icmph = skb_put_zero(skb, sizeof(struct icmphdr));
                icmph->type = ICMP_ECHO;
                icmph->code = 0;
        }
        }

        return skb;
}

static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
                                       const struct nlmsghdr *nlh,
                                       struct nlattr **tb,
                                       struct netlink_ext_ack *extack)
{
        struct rtmsg *rtm;
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
                NL_SET_ERR_MSG(extack,
                               "ipv4: Invalid header for route get request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
                                              rtm_ipv4_policy, extack);

        rtm = nlmsg_data(nlh);
        if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
            (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
            rtm->rtm_table || rtm->rtm_protocol ||
            rtm->rtm_scope || rtm->rtm_type) {
                NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
                return -EINVAL;
        }

        if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
                               RTM_F_LOOKUP_TABLE |
                               RTM_F_FIB_MATCH)) {
                NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
                                            rtm_ipv4_policy, extack);
        if (err)
                return err;

        if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
            (tb[RTA_DST] && !rtm->rtm_dst_len)) {
                NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
                return -EINVAL;
        }

        for (i = 0; i <= RTA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case RTA_IIF:
                case RTA_OIF:
                case RTA_SRC:
                case RTA_DST:
                case RTA_IP_PROTO:
                case RTA_SPORT:
                case RTA_DPORT:
                case RTA_MARK:
                case RTA_UID:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[RTA_MAX+1];
        u32 table_id = RT_TABLE_MAIN;
        __be16 sport = 0, dport = 0;
        struct fib_result res = {};
        u8 ip_proto = IPPROTO_UDP;
        struct rtable *rt = NULL;
        struct sk_buff *skb;
        struct rtmsg *rtm;
        struct flowi4 fl4 = {};
        __be32 dst = 0;
        __be32 src = 0;
        kuid_t uid;
        u32 iif;
        int err;
        int mark;

        err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
        if (err < 0)
                return err;

        rtm = nlmsg_data(nlh);
        src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
        dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
        iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
        mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
        if (tb[RTA_UID])
                uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
        else
                uid = (iif ? INVALID_UID : current_uid());

        if (tb[RTA_IP_PROTO]) {
                err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
                                                  &ip_proto, AF_INET, extack);
                if (err)
                        return err;
        }

        if (tb[RTA_SPORT])
                sport = nla_get_be16(tb[RTA_SPORT]);

        if (tb[RTA_DPORT])
                dport = nla_get_be16(tb[RTA_DPORT]);

        skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
        if (!skb)
                return -ENOBUFS;

        fl4.daddr = dst;
        fl4.saddr = src;
        fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
        fl4.flowi4_mark = mark;
        fl4.flowi4_uid = uid;
        if (sport)
                fl4.fl4_sport = sport;
        if (dport)
                fl4.fl4_dport = dport;
        fl4.flowi4_proto = ip_proto;

        rcu_read_lock();

        if (iif) {
                struct net_device *dev;

                dev = dev_get_by_index_rcu(net, iif);
                if (!dev) {
                        err = -ENODEV;
                        goto errout_rcu;
                }

                fl4.flowi4_iif = iif; /* for rt_fill_info */
                skb->dev        = dev;
                skb->mark        = mark;
                err = ip_route_input_rcu(skb, dst, src,
                                         rtm->rtm_tos & IPTOS_RT_MASK, dev,
                                         &res);

                rt = skb_rtable(skb);
                if (err == 0 && rt->dst.error)
                        err = -rt->dst.error;
        } else {
                fl4.flowi4_iif = LOOPBACK_IFINDEX;
                skb->dev = net->loopback_dev;
                rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
                err = 0;
                if (IS_ERR(rt))
                        err = PTR_ERR(rt);
                else
                        skb_dst_set(skb, &rt->dst);
        }

        if (err)
                goto errout_rcu;

        if (rtm->rtm_flags & RTM_F_NOTIFY)
                rt->rt_flags |= RTCF_NOTIFY;

        if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
                table_id = res.table ? res.table->tb_id : 0;

        /* reset skb for netlink reply msg */
        skb_trim(skb, 0);
        skb_reset_network_header(skb);
        skb_reset_transport_header(skb);
        skb_reset_mac_header(skb);

        if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
                struct fib_rt_info fri;

                if (!res.fi) {
                        err = fib_props[res.type].error;
                        if (!err)
                                err = -EHOSTUNREACH;
                        goto errout_rcu;
                }
                fri.fi = res.fi;
                fri.tb_id = table_id;
                fri.dst = res.prefix;
                fri.dst_len = res.prefixlen;
                fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
                fri.type = rt->rt_type;
                fri.offload = 0;
                fri.trap = 0;
                fri.offload_failed = 0;
                if (res.fa_head) {
                        struct fib_alias *fa;

                        hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
                                u8 slen = 32 - fri.dst_len;

                                if (fa->fa_slen == slen &&
                                    fa->tb_id == fri.tb_id &&
                                    fa->fa_dscp == fri.dscp &&
                                    fa->fa_info == res.fi &&
                                    fa->fa_type == fri.type) {
                                        fri.offload = READ_ONCE(fa->offload);
                                        fri.trap = READ_ONCE(fa->trap);
                                        fri.offload_failed =
                                                READ_ONCE(fa->offload_failed);
                                        break;
                                }
                        }
                }
                err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
                                    nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
        } else {
                err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
                                   NETLINK_CB(in_skb).portid,
                                   nlh->nlmsg_seq, 0);
        }
        if (err < 0)
                goto errout_rcu;

        rcu_read_unlock();

        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);

errout_free:
        return err;
errout_rcu:
        rcu_read_unlock();
        kfree_skb(skb);
        goto errout_free;
}

void ip_rt_multicast_event(struct in_device *in_dev)
{
        rt_cache_flush(dev_net(in_dev->dev));
}

#ifdef CONFIG_SYSCTL
static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly        = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly        = 8;
static int ip_min_valid_pmtu __read_mostly        = IPV4_MIN_MTU;

static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        struct net *net = (struct net *)__ctl->extra1;

        if (write) {
                rt_cache_flush(net);
                fnhe_genid_bump(net);
                return 0;
        }

        return -EINVAL;
}

static struct ctl_table ipv4_route_table[] = {
        {
                .procname        = "gc_thresh",
                .data                = &ipv4_dst_ops.gc_thresh,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "max_size",
                .data                = &ip_rt_max_size,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                /*  Deprecated. Use gc_min_interval_ms */

                .procname        = "gc_min_interval",
                .data                = &ip_rt_gc_min_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "gc_min_interval_ms",
                .data                = &ip_rt_gc_min_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_ms_jiffies,
        },
        {
                .procname        = "gc_timeout",
                .data                = &ip_rt_gc_timeout,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "gc_interval",
                .data                = &ip_rt_gc_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "redirect_load",
                .data                = &ip_rt_redirect_load,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "redirect_number",
                .data                = &ip_rt_redirect_number,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "redirect_silence",
                .data                = &ip_rt_redirect_silence,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "error_cost",
                .data                = &ip_rt_error_cost,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "error_burst",
                .data                = &ip_rt_error_burst,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "gc_elasticity",
                .data                = &ip_rt_gc_elasticity,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        { }
};

static const char ipv4_route_flush_procname[] = "flush";

static struct ctl_table ipv4_route_netns_table[] = {
        {
                .procname        = ipv4_route_flush_procname,
                .maxlen                = sizeof(int),
                .mode                = 0200,
                .proc_handler        = ipv4_sysctl_rtcache_flush,
        },
        {
                .procname       = "min_pmtu",
                .data           = &init_net.ipv4.ip_rt_min_pmtu,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &ip_min_valid_pmtu,
        },
        {
                .procname       = "mtu_expires",
                .data           = &init_net.ipv4.ip_rt_mtu_expires,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
        },
        {
                .procname   = "min_adv_mss",
                .data       = &init_net.ipv4.ip_rt_min_advmss,
                .maxlen     = sizeof(int),
                .mode       = 0644,
                .proc_handler   = proc_dointvec,
        },
        { },
};

static __net_init int sysctl_route_net_init(struct net *net)
{
        struct ctl_table *tbl;
        size_t table_size = ARRAY_SIZE(ipv4_route_netns_table);

        tbl = ipv4_route_netns_table;
        if (!net_eq(net, &init_net)) {
                int i;

                tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL);
                if (!tbl)
                        goto err_dup;

                /* Don't export non-whitelisted sysctls to unprivileged users */
                if (net->user_ns != &init_user_ns) {
                        if (tbl[0].procname != ipv4_route_flush_procname) {
                                tbl[0].procname = NULL;
                                table_size = 0;
                        }
                }

                /* Update the variables to point into the current struct net
                 * except for the first element flush
                 */
                for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
                        tbl[i].data += (void *)net - (void *)&init_net;
        }
        tbl[0].extra1 = net;

        net->ipv4.route_hdr = register_net_sysctl_sz(net, "net/ipv4/route",
                                                     tbl, table_size);
        if (!net->ipv4.route_hdr)
                goto err_reg;
        return 0;

err_reg:
        if (tbl != ipv4_route_netns_table)
                kfree(tbl);
err_dup:
        return -ENOMEM;
}

static __net_exit void sysctl_route_net_exit(struct net *net)
{
        struct ctl_table *tbl;

        tbl = net->ipv4.route_hdr->ctl_table_arg;
        unregister_net_sysctl_table(net->ipv4.route_hdr);
        BUG_ON(tbl == ipv4_route_netns_table);
        kfree(tbl);
}

static __net_initdata struct pernet_operations sysctl_route_ops = {
        .init = sysctl_route_net_init,
        .exit = sysctl_route_net_exit,
};
#endif

static __net_init int netns_ip_rt_init(struct net *net)
{
        /* Set default value for namespaceified sysctls */
        net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
        net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
        net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
        return 0;
}

static struct pernet_operations __net_initdata ip_rt_ops = {
        .init = netns_ip_rt_init,
};

static __net_init int rt_genid_init(struct net *net)
{
        atomic_set(&net->ipv4.rt_genid, 0);
        atomic_set(&net->fnhe_genid, 0);
        atomic_set(&net->ipv4.dev_addr_genid, get_random_u32());
        return 0;
}

static __net_initdata struct pernet_operations rt_genid_ops = {
        .init = rt_genid_init,
};

static int __net_init ipv4_inetpeer_init(struct net *net)
{
        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);

        if (!bp)
                return -ENOMEM;
        inet_peer_base_init(bp);
        net->ipv4.peers = bp;
        return 0;
}

static void __net_exit ipv4_inetpeer_exit(struct net *net)
{
        struct inet_peer_base *bp = net->ipv4.peers;

        net->ipv4.peers = NULL;
        inetpeer_invalidate_tree(bp);
        kfree(bp);
}

static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
        .init        =        ipv4_inetpeer_init,
        .exit        =        ipv4_inetpeer_exit,
};

#ifdef CONFIG_IP_ROUTE_CLASSID
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */

int __init ip_rt_init(void)
{
        void *idents_hash;
        int cpu;

        /* For modern hosts, this will use 2 MB of memory */
        idents_hash = alloc_large_system_hash("IP idents",
                                              sizeof(*ip_idents) + sizeof(*ip_tstamps),
                                              0,
                                              16, /* one bucket per 64 KB */
                                              HASH_ZERO,
                                              NULL,
                                              &ip_idents_mask,
                                              2048,
                                              256*1024);

        ip_idents = idents_hash;

        get_random_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));

        ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);

        for_each_possible_cpu(cpu) {
                struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);

                INIT_LIST_HEAD(&ul->head);
                INIT_LIST_HEAD(&ul->quarantine);
                spin_lock_init(&ul->lock);
        }
#ifdef CONFIG_IP_ROUTE_CLASSID
        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
        if (!ip_rt_acct)
                panic("IP: failed to allocate ip_rt_acct\n");
#endif

        ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable,
                                              SLAB_HWCACHE_ALIGN | SLAB_PANIC);

        ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;

        if (dst_entries_init(&ipv4_dst_ops) < 0)
                panic("IP: failed to allocate ipv4_dst_ops counter\n");

        if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
                panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");

        ipv4_dst_ops.gc_thresh = ~0;
        ip_rt_max_size = INT_MAX;

        devinet_init();
        ip_fib_init();

        if (ip_rt_proc_init())
                pr_err("Unable to create route proc files\n");
#ifdef CONFIG_XFRM
        xfrm_init();
        xfrm4_init();
#endif
        rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
                      RTNL_FLAG_DOIT_UNLOCKED);

#ifdef CONFIG_SYSCTL
        register_pernet_subsys(&sysctl_route_ops);
#endif
        register_pernet_subsys(&ip_rt_ops);
        register_pernet_subsys(&rt_genid_ops);
        register_pernet_subsys(&ipv4_inetpeer_ops);
        return 0;
}

#ifdef CONFIG_SYSCTL
/*
 * We really need to sanitize the damn ipv4 init order, then all
 * this nonsense will go away.
 */
void __init ip_static_sysctl_init(void)
{
        register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
}
#endif































  242 













  300 


















  304 
  258 













































   56 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * kref.h - library routines for handling generic reference counted objects
 *
 * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (C) 2004 IBM Corp.
 *
 * based on kobject.h which was:
 * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org>
 * Copyright (C) 2002-2003 Open Source Development Labs
 */

#ifndef _KREF_H_
#define _KREF_H_

#include <linux/spinlock.h>
#include <linux/refcount.h>

struct kref {
        refcount_t refcount;
};

#define KREF_INIT(n)        { .refcount = REFCOUNT_INIT(n), }

/**
 * kref_init - initialize object.
 * @kref: object in question.
 */
static inline void kref_init(struct kref *kref)
{
        refcount_set(&kref->refcount, 1);
}

static inline unsigned int kref_read(const struct kref *kref)
{
        return refcount_read(&kref->refcount);
}

/**
 * kref_get - increment refcount for object.
 * @kref: object.
 */
static inline void kref_get(struct kref *kref)
{
        refcount_inc(&kref->refcount);
}

/**
 * kref_put - decrement refcount for object.
 * @kref: object.
 * @release: pointer to the function that will clean up the object when the
 *             last reference to the object is released.
 *             This pointer is required, and it is not acceptable to pass kfree
 *             in as this function.
 *
 * Decrement the refcount, and if 0, call release().
 * Return 1 if the object was removed, otherwise return 0.  Beware, if this
 * function returns 0, you still can not count on the kref from remaining in
 * memory.  Only use the return value if you want to see if the kref is now
 * gone, not present.
 */
static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
        if (refcount_dec_and_test(&kref->refcount)) {
                release(kref);
                return 1;
        }
        return 0;
}

static inline int kref_put_mutex(struct kref *kref,
                                 void (*release)(struct kref *kref),
                                 struct mutex *lock)
{
        if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) {
                release(kref);
                return 1;
        }
        return 0;
}

static inline int kref_put_lock(struct kref *kref,
                                void (*release)(struct kref *kref),
                                spinlock_t *lock)
{
        if (refcount_dec_and_lock(&kref->refcount, lock)) {
                release(kref);
                return 1;
        }
        return 0;
}

/**
 * kref_get_unless_zero - Increment refcount for object unless it is zero.
 * @kref: object.
 *
 * Return non-zero if the increment succeeded. Otherwise return 0.
 *
 * This function is intended to simplify locking around refcounting for
 * objects that can be looked up from a lookup structure, and which are
 * removed from that lookup structure in the object destructor.
 * Operations on such objects require at least a read lock around
 * lookup + kref_get, and a write lock around kref_put + remove from lookup
 * structure. Furthermore, RCU implementations become extremely tricky.
 * With a lookup followed by a kref_get_unless_zero *with return value check*
 * locking in the kref_put path can be deferred to the actual removal from
 * the lookup structure and RCU lookups become trivial.
 */
static inline int __must_check kref_get_unless_zero(struct kref *kref)
{
        return refcount_inc_not_zero(&kref->refcount);
}
#endif /* _KREF_H_ */
















































    1 


   15 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2005-2010 IBM Corporation
 *
 * Authors:
 * Mimi Zohar <zohar@us.ibm.com>
 * Kylene Hall <kjhall@us.ibm.com>
 *
 * File: evm.h
 */

#ifndef __INTEGRITY_EVM_H
#define __INTEGRITY_EVM_H

#include <linux/xattr.h>
#include <linux/security.h>

#include "../integrity.h"

#define EVM_INIT_HMAC        0x0001
#define EVM_INIT_X509        0x0002
#define EVM_ALLOW_METADATA_WRITES        0x0004
#define EVM_SETUP_COMPLETE 0x80000000 /* userland has signaled key load */

#define EVM_KEY_MASK (EVM_INIT_HMAC | EVM_INIT_X509)
#define EVM_INIT_MASK (EVM_INIT_HMAC | EVM_INIT_X509 | EVM_SETUP_COMPLETE | \
                       EVM_ALLOW_METADATA_WRITES)

struct xattr_list {
        struct list_head list;
        char *name;
        bool enabled;
};

#define EVM_NEW_FILE                        0x00000001
#define EVM_IMMUTABLE_DIGSIG                0x00000002

/* EVM integrity metadata associated with an inode */
struct evm_iint_cache {
        unsigned long flags;
        enum integrity_status evm_status:4;
};

extern struct lsm_blob_sizes evm_blob_sizes;

static inline struct evm_iint_cache *evm_iint_inode(const struct inode *inode)
{
        if (unlikely(!inode->i_security))
                return NULL;

        return inode->i_security + evm_blob_sizes.lbs_inode;
}

extern int evm_initialized;

#define EVM_ATTR_FSUUID                0x0001

extern int evm_hmac_attrs;

/* List of EVM protected security xattrs */
extern struct list_head evm_config_xattrnames;

struct evm_digest {
        struct ima_digest_data hdr;
        char digest[IMA_MAX_DIGEST_SIZE];
} __packed;

int evm_protected_xattr(const char *req_xattr_name);

int evm_init_key(void);
int evm_update_evmxattr(struct dentry *dentry,
                        const char *req_xattr_name,
                        const char *req_xattr_value,
                        size_t req_xattr_value_len);
int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name,
                  const char *req_xattr_value,
                  size_t req_xattr_value_len, struct evm_digest *data);
int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name,
                  const char *req_xattr_value,
                  size_t req_xattr_value_len, char type,
                  struct evm_digest *data);
int evm_init_hmac(struct inode *inode, const struct xattr *xattrs,
                  char *hmac_val);
int evm_init_secfs(void);

#endif






































































    2 
































































































































    2 

    2 


















































































































































































































































































































































    2 
    2 







    2 
    2 


    2 







































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   ALSA sequencer Timing queue handling
 *   Copyright (c) 1998-1999 by Frank van de Pol <fvdpol@coil.demon.nl>
 *
 * MAJOR CHANGES
 *   Nov. 13, 1999        Takashi Iwai <iwai@ww.uni-erlangen.de>
 *     - Queues are allocated dynamically via ioctl.
 *     - When owner client is deleted, all owned queues are deleted, too.
 *     - Owner of unlocked queue is kept unmodified even if it is
 *         manipulated by other clients.
 *     - Owner field in SET_QUEUE_OWNER ioctl must be identical with the
 *       caller client.  i.e. Changing owner to a third client is not
 *       allowed.
 *
 *  Aug. 30, 2000        Takashi Iwai
 *     - Queues are managed in static array again, but with better way.
 *       The API itself is identical.
 *     - The queue is locked when struct snd_seq_queue pointer is returned via
 *       queueptr().  This pointer *MUST* be released afterward by
 *       queuefree(ptr).
 *     - Addition of experimental sync support.
 */

#include <linux/init.h>
#include <linux/slab.h>
#include <sound/core.h>

#include "seq_memory.h"
#include "seq_queue.h"
#include "seq_clientmgr.h"
#include "seq_fifo.h"
#include "seq_timer.h"
#include "seq_info.h"

/* list of allocated queues */
static struct snd_seq_queue *queue_list[SNDRV_SEQ_MAX_QUEUES];
static DEFINE_SPINLOCK(queue_list_lock);
/* number of queues allocated */
static int num_queues;

int snd_seq_queue_get_cur_queues(void)
{
        return num_queues;
}

/*----------------------------------------------------------------*/

/* assign queue id and insert to list */
static int queue_list_add(struct snd_seq_queue *q)
{
        int i;

        guard(spinlock_irqsave)(&queue_list_lock);
        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                if (! queue_list[i]) {
                        queue_list[i] = q;
                        q->queue = i;
                        num_queues++;
                        return i;
                }
        }
        return -1;
}

static struct snd_seq_queue *queue_list_remove(int id, int client)
{
        struct snd_seq_queue *q;

        guard(spinlock_irqsave)(&queue_list_lock);
        q = queue_list[id];
        if (q) {
                guard(spinlock)(&q->owner_lock);
                if (q->owner == client) {
                        /* found */
                        q->klocked = 1;
                        queue_list[id] = NULL;
                        num_queues--;
                        return q;
                }
        }
        return NULL;
}

/*----------------------------------------------------------------*/

/* create new queue (constructor) */
static struct snd_seq_queue *queue_new(int owner, int locked)
{
        struct snd_seq_queue *q;

        q = kzalloc(sizeof(*q), GFP_KERNEL);
        if (!q)
                return NULL;

        spin_lock_init(&q->owner_lock);
        spin_lock_init(&q->check_lock);
        mutex_init(&q->timer_mutex);
        snd_use_lock_init(&q->use_lock);
        q->queue = -1;

        q->tickq = snd_seq_prioq_new();
        q->timeq = snd_seq_prioq_new();
        q->timer = snd_seq_timer_new();
        if (q->tickq == NULL || q->timeq == NULL || q->timer == NULL) {
                snd_seq_prioq_delete(&q->tickq);
                snd_seq_prioq_delete(&q->timeq);
                snd_seq_timer_delete(&q->timer);
                kfree(q);
                return NULL;
        }

        q->owner = owner;
        q->locked = locked;
        q->klocked = 0;

        return q;
}

/* delete queue (destructor) */
static void queue_delete(struct snd_seq_queue *q)
{
        /* stop and release the timer */
        mutex_lock(&q->timer_mutex);
        snd_seq_timer_stop(q->timer);
        snd_seq_timer_close(q);
        mutex_unlock(&q->timer_mutex);
        /* wait until access free */
        snd_use_lock_sync(&q->use_lock);
        /* release resources... */
        snd_seq_prioq_delete(&q->tickq);
        snd_seq_prioq_delete(&q->timeq);
        snd_seq_timer_delete(&q->timer);

        kfree(q);
}


/*----------------------------------------------------------------*/

/* delete all existing queues */
void snd_seq_queues_delete(void)
{
        int i;

        /* clear list */
        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                if (queue_list[i])
                        queue_delete(queue_list[i]);
        }
}

static void queue_use(struct snd_seq_queue *queue, int client, int use);

/* allocate a new queue -
 * return pointer to new queue or ERR_PTR(-errno) for error
 * The new queue's use_lock is set to 1. It is the caller's responsibility to
 * call snd_use_lock_free(&q->use_lock).
 */
struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int info_flags)
{
        struct snd_seq_queue *q;

        q = queue_new(client, locked);
        if (q == NULL)
                return ERR_PTR(-ENOMEM);
        q->info_flags = info_flags;
        queue_use(q, client, 1);
        snd_use_lock_use(&q->use_lock);
        if (queue_list_add(q) < 0) {
                snd_use_lock_free(&q->use_lock);
                queue_delete(q);
                return ERR_PTR(-ENOMEM);
        }
        return q;
}

/* delete a queue - queue must be owned by the client */
int snd_seq_queue_delete(int client, int queueid)
{
        struct snd_seq_queue *q;

        if (queueid < 0 || queueid >= SNDRV_SEQ_MAX_QUEUES)
                return -EINVAL;
        q = queue_list_remove(queueid, client);
        if (q == NULL)
                return -EINVAL;
        queue_delete(q);

        return 0;
}


/* return pointer to queue structure for specified id */
struct snd_seq_queue *queueptr(int queueid)
{
        struct snd_seq_queue *q;

        if (queueid < 0 || queueid >= SNDRV_SEQ_MAX_QUEUES)
                return NULL;
        guard(spinlock_irqsave)(&queue_list_lock);
        q = queue_list[queueid];
        if (q)
                snd_use_lock_use(&q->use_lock);
        return q;
}

/* return the (first) queue matching with the specified name */
struct snd_seq_queue *snd_seq_queue_find_name(char *name)
{
        int i;
        struct snd_seq_queue *q;

        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queueptr(i);
                if (q) {
                        if (strncmp(q->name, name, sizeof(q->name)) == 0)
                                return q;
                        queuefree(q);
                }
        }
        return NULL;
}


/* -------------------------------------------------------- */

#define MAX_CELL_PROCESSES_IN_QUEUE        1000

void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
{
        struct snd_seq_event_cell *cell;
        snd_seq_tick_time_t cur_tick;
        snd_seq_real_time_t cur_time;
        int processed = 0;

        if (q == NULL)
                return;

        /* make this function non-reentrant */
        scoped_guard(spinlock_irqsave, &q->check_lock) {
                if (q->check_blocked) {
                        q->check_again = 1;
                        return;        /* other thread is already checking queues */
                }
                q->check_blocked = 1;
        }

      __again:
        /* Process tick queue... */
        cur_tick = snd_seq_timer_get_cur_tick(q->timer);
        for (;;) {
                cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick);
                if (!cell)
                        break;
                snd_seq_dispatch_event(cell, atomic, hop);
                if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE)
                        goto out; /* the rest processed at the next batch */
        }

        /* Process time queue... */
        cur_time = snd_seq_timer_get_cur_time(q->timer, false);
        for (;;) {
                cell = snd_seq_prioq_cell_out(q->timeq, &cur_time);
                if (!cell)
                        break;
                snd_seq_dispatch_event(cell, atomic, hop);
                if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE)
                        goto out; /* the rest processed at the next batch */
        }

 out:
        /* free lock */
        scoped_guard(spinlock_irqsave, &q->check_lock) {
                if (q->check_again) {
                        q->check_again = 0;
                        if (processed < MAX_CELL_PROCESSES_IN_QUEUE)
                                goto __again;
                }
                q->check_blocked = 0;
        }
}


/* enqueue a event to singe queue */
int snd_seq_enqueue_event(struct snd_seq_event_cell *cell, int atomic, int hop)
{
        int dest, err;
        struct snd_seq_queue *q;

        if (snd_BUG_ON(!cell))
                return -EINVAL;
        dest = cell->event.queue;        /* destination queue */
        q = queueptr(dest);
        if (q == NULL)
                return -EINVAL;
        /* handle relative time stamps, convert them into absolute */
        if ((cell->event.flags & SNDRV_SEQ_TIME_MODE_MASK) == SNDRV_SEQ_TIME_MODE_REL) {
                switch (cell->event.flags & SNDRV_SEQ_TIME_STAMP_MASK) {
                case SNDRV_SEQ_TIME_STAMP_TICK:
                        cell->event.time.tick += q->timer->tick.cur_tick;
                        break;

                case SNDRV_SEQ_TIME_STAMP_REAL:
                        snd_seq_inc_real_time(&cell->event.time.time,
                                              &q->timer->cur_time);
                        break;
                }
                cell->event.flags &= ~SNDRV_SEQ_TIME_MODE_MASK;
                cell->event.flags |= SNDRV_SEQ_TIME_MODE_ABS;
        }
        /* enqueue event in the real-time or midi queue */
        switch (cell->event.flags & SNDRV_SEQ_TIME_STAMP_MASK) {
        case SNDRV_SEQ_TIME_STAMP_TICK:
                err = snd_seq_prioq_cell_in(q->tickq, cell);
                break;

        case SNDRV_SEQ_TIME_STAMP_REAL:
        default:
                err = snd_seq_prioq_cell_in(q->timeq, cell);
                break;
        }

        if (err < 0) {
                queuefree(q); /* unlock */
                return err;
        }

        /* trigger dispatching */
        snd_seq_check_queue(q, atomic, hop);

        queuefree(q); /* unlock */

        return 0;
}


/*----------------------------------------------------------------*/

static inline int check_access(struct snd_seq_queue *q, int client)
{
        return (q->owner == client) || (!q->locked && !q->klocked);
}

/* check if the client has permission to modify queue parameters.
 * if it does, lock the queue
 */
static int queue_access_lock(struct snd_seq_queue *q, int client)
{
        int access_ok;
        
        guard(spinlock_irqsave)(&q->owner_lock);
        access_ok = check_access(q, client);
        if (access_ok)
                q->klocked = 1;
        return access_ok;
}

/* unlock the queue */
static inline void queue_access_unlock(struct snd_seq_queue *q)
{
        guard(spinlock_irqsave)(&q->owner_lock);
        q->klocked = 0;
}

/* exported - only checking permission */
int snd_seq_queue_check_access(int queueid, int client)
{
        struct snd_seq_queue *q = queueptr(queueid);
        int access_ok;

        if (! q)
                return 0;
        scoped_guard(spinlock_irqsave, &q->owner_lock)
                access_ok = check_access(q, client);
        queuefree(q);
        return access_ok;
}

/*----------------------------------------------------------------*/

/*
 * change queue's owner and permission
 */
int snd_seq_queue_set_owner(int queueid, int client, int locked)
{
        struct snd_seq_queue *q = queueptr(queueid);

        if (q == NULL)
                return -EINVAL;

        if (! queue_access_lock(q, client)) {
                queuefree(q);
                return -EPERM;
        }

        scoped_guard(spinlock_irqsave, &q->owner_lock) {
                q->locked = locked ? 1 : 0;
                q->owner = client;
        }
        queue_access_unlock(q);
        queuefree(q);

        return 0;
}


/*----------------------------------------------------------------*/

/* open timer -
 * q->use mutex should be down before calling this function to avoid
 * confliction with snd_seq_queue_use()
 */
int snd_seq_queue_timer_open(int queueid)
{
        int result = 0;
        struct snd_seq_queue *queue;
        struct snd_seq_timer *tmr;

        queue = queueptr(queueid);
        if (queue == NULL)
                return -EINVAL;
        tmr = queue->timer;
        result = snd_seq_timer_open(queue);
        if (result < 0) {
                snd_seq_timer_defaults(tmr);
                result = snd_seq_timer_open(queue);
        }
        queuefree(queue);
        return result;
}

/* close timer -
 * q->use mutex should be down before calling this function
 */
int snd_seq_queue_timer_close(int queueid)
{
        struct snd_seq_queue *queue;
        int result = 0;

        queue = queueptr(queueid);
        if (queue == NULL)
                return -EINVAL;
        snd_seq_timer_close(queue);
        queuefree(queue);
        return result;
}

/* change queue tempo and ppq */
int snd_seq_queue_timer_set_tempo(int queueid, int client,
                                  struct snd_seq_queue_tempo *info)
{
        struct snd_seq_queue *q = queueptr(queueid);
        int result;

        if (q == NULL)
                return -EINVAL;
        if (! queue_access_lock(q, client)) {
                queuefree(q);
                return -EPERM;
        }

        result = snd_seq_timer_set_tempo_ppq(q->timer, info->tempo, info->ppq);
        if (result >= 0 && info->skew_base > 0)
                result = snd_seq_timer_set_skew(q->timer, info->skew_value,
                                                info->skew_base);
        queue_access_unlock(q);
        queuefree(q);
        return result;
}

/* use or unuse this queue */
static void queue_use(struct snd_seq_queue *queue, int client, int use)
{
        if (use) {
                if (!test_and_set_bit(client, queue->clients_bitmap))
                        queue->clients++;
        } else {
                if (test_and_clear_bit(client, queue->clients_bitmap))
                        queue->clients--;
        }
        if (queue->clients) {
                if (use && queue->clients == 1)
                        snd_seq_timer_defaults(queue->timer);
                snd_seq_timer_open(queue);
        } else {
                snd_seq_timer_close(queue);
        }
}

/* use or unuse this queue -
 * if it is the first client, starts the timer.
 * if it is not longer used by any clients, stop the timer.
 */
int snd_seq_queue_use(int queueid, int client, int use)
{
        struct snd_seq_queue *queue;

        queue = queueptr(queueid);
        if (queue == NULL)
                return -EINVAL;
        mutex_lock(&queue->timer_mutex);
        queue_use(queue, client, use);
        mutex_unlock(&queue->timer_mutex);
        queuefree(queue);
        return 0;
}

/*
 * check if queue is used by the client
 * return negative value if the queue is invalid.
 * return 0 if not used, 1 if used.
 */
int snd_seq_queue_is_used(int queueid, int client)
{
        struct snd_seq_queue *q;
        int result;

        q = queueptr(queueid);
        if (q == NULL)
                return -EINVAL; /* invalid queue */
        result = test_bit(client, q->clients_bitmap) ? 1 : 0;
        queuefree(q);
        return result;
}


/*----------------------------------------------------------------*/

/* final stage notification -
 * remove cells for no longer exist client (for non-owned queue)
 * or delete this queue (for owned queue)
 */
void snd_seq_queue_client_leave(int client)
{
        int i;
        struct snd_seq_queue *q;

        /* delete own queues from queue list */
        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queue_list_remove(i, client);
                if (q)
                        queue_delete(q);
        }

        /* remove cells from existing queues -
         * they are not owned by this client
         */
        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queueptr(i);
                if (!q)
                        continue;
                if (test_bit(client, q->clients_bitmap)) {
                        snd_seq_prioq_leave(q->tickq, client, 0);
                        snd_seq_prioq_leave(q->timeq, client, 0);
                        snd_seq_queue_use(q->queue, client, 0);
                }
                queuefree(q);
        }
}



/*----------------------------------------------------------------*/

/* remove cells from all queues */
void snd_seq_queue_client_leave_cells(int client)
{
        int i;
        struct snd_seq_queue *q;

        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queueptr(i);
                if (!q)
                        continue;
                snd_seq_prioq_leave(q->tickq, client, 0);
                snd_seq_prioq_leave(q->timeq, client, 0);
                queuefree(q);
        }
}

/* remove cells based on flush criteria */
void snd_seq_queue_remove_cells(int client, struct snd_seq_remove_events *info)
{
        int i;
        struct snd_seq_queue *q;

        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queueptr(i);
                if (!q)
                        continue;
                if (test_bit(client, q->clients_bitmap) &&
                    (! (info->remove_mode & SNDRV_SEQ_REMOVE_DEST) ||
                     q->queue == info->queue)) {
                        snd_seq_prioq_remove_events(q->tickq, client, info);
                        snd_seq_prioq_remove_events(q->timeq, client, info);
                }
                queuefree(q);
        }
}

/*----------------------------------------------------------------*/

/*
 * send events to all subscribed ports
 */
static void queue_broadcast_event(struct snd_seq_queue *q, struct snd_seq_event *ev,
                                  int atomic, int hop)
{
        struct snd_seq_event sev;

        sev = *ev;
        
        sev.flags = SNDRV_SEQ_TIME_STAMP_TICK|SNDRV_SEQ_TIME_MODE_ABS;
        sev.time.tick = q->timer->tick.cur_tick;
        sev.queue = q->queue;
        sev.data.queue.queue = q->queue;

        /* broadcast events from Timer port */
        sev.source.client = SNDRV_SEQ_CLIENT_SYSTEM;
        sev.source.port = SNDRV_SEQ_PORT_SYSTEM_TIMER;
        sev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;
        snd_seq_kernel_client_dispatch(SNDRV_SEQ_CLIENT_SYSTEM, &sev, atomic, hop);
}

/*
 * process a received queue-control event.
 * this function is exported for seq_sync.c.
 */
static void snd_seq_queue_process_event(struct snd_seq_queue *q,
                                        struct snd_seq_event *ev,
                                        int atomic, int hop)
{
        switch (ev->type) {
        case SNDRV_SEQ_EVENT_START:
                snd_seq_prioq_leave(q->tickq, ev->source.client, 1);
                snd_seq_prioq_leave(q->timeq, ev->source.client, 1);
                if (! snd_seq_timer_start(q->timer))
                        queue_broadcast_event(q, ev, atomic, hop);
                break;

        case SNDRV_SEQ_EVENT_CONTINUE:
                if (! snd_seq_timer_continue(q->timer))
                        queue_broadcast_event(q, ev, atomic, hop);
                break;

        case SNDRV_SEQ_EVENT_STOP:
                snd_seq_timer_stop(q->timer);
                queue_broadcast_event(q, ev, atomic, hop);
                break;

        case SNDRV_SEQ_EVENT_TEMPO:
                snd_seq_timer_set_tempo(q->timer, ev->data.queue.param.value);
                queue_broadcast_event(q, ev, atomic, hop);
                break;

        case SNDRV_SEQ_EVENT_SETPOS_TICK:
                if (snd_seq_timer_set_position_tick(q->timer, ev->data.queue.param.time.tick) == 0) {
                        queue_broadcast_event(q, ev, atomic, hop);
                }
                break;

        case SNDRV_SEQ_EVENT_SETPOS_TIME:
                if (snd_seq_timer_set_position_time(q->timer, ev->data.queue.param.time.time) == 0) {
                        queue_broadcast_event(q, ev, atomic, hop);
                }
                break;
        case SNDRV_SEQ_EVENT_QUEUE_SKEW:
                if (snd_seq_timer_set_skew(q->timer,
                                           ev->data.queue.param.skew.value,
                                           ev->data.queue.param.skew.base) == 0) {
                        queue_broadcast_event(q, ev, atomic, hop);
                }
                break;
        }
}


/*
 * Queue control via timer control port:
 * this function is exported as a callback of timer port.
 */
int snd_seq_control_queue(struct snd_seq_event *ev, int atomic, int hop)
{
        struct snd_seq_queue *q;

        if (snd_BUG_ON(!ev))
                return -EINVAL;
        q = queueptr(ev->data.queue.queue);

        if (q == NULL)
                return -EINVAL;

        if (! queue_access_lock(q, ev->source.client)) {
                queuefree(q);
                return -EPERM;
        }

        snd_seq_queue_process_event(q, ev, atomic, hop);

        queue_access_unlock(q);
        queuefree(q);
        return 0;
}


/*----------------------------------------------------------------*/

#ifdef CONFIG_SND_PROC_FS
/* exported to seq_info.c */
void snd_seq_info_queues_read(struct snd_info_entry *entry, 
                              struct snd_info_buffer *buffer)
{
        int i, bpm;
        struct snd_seq_queue *q;
        struct snd_seq_timer *tmr;
        bool locked;
        int owner;

        for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
                q = queueptr(i);
                if (!q)
                        continue;

                tmr = q->timer;
                if (tmr->tempo)
                        bpm = 60000000 / tmr->tempo;
                else
                        bpm = 0;

                scoped_guard(spinlock_irq, &q->owner_lock) {
                        locked = q->locked;
                        owner = q->owner;
                }

                snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name);
                snd_iprintf(buffer, "owned by client    : %d\n", owner);
                snd_iprintf(buffer, "lock status        : %s\n", locked ? "Locked" : "Free");
                snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq));
                snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq));
                snd_iprintf(buffer, "timer state        : %s\n", tmr->running ? "Running" : "Stopped");
                snd_iprintf(buffer, "timer PPQ          : %d\n", tmr->ppq);
                snd_iprintf(buffer, "current tempo      : %d\n", tmr->tempo);
                snd_iprintf(buffer, "current BPM        : %d\n", bpm);
                snd_iprintf(buffer, "current time       : %d.%09d s\n", tmr->cur_time.tv_sec, tmr->cur_time.tv_nsec);
                snd_iprintf(buffer, "current tick       : %d\n", tmr->tick.cur_tick);
                snd_iprintf(buffer, "\n");
                queuefree(q);
        }
}
#endif /* CONFIG_SND_PROC_FS */




































































































































































































































































































































   14 












































    1 
    1 
    1 
    1 
    1 














    8 















    1 




    2 

















    1 





    3 






















    3 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ADDRCONF_H
#define _ADDRCONF_H

#define MAX_RTR_SOLICITATIONS                -1                /* unlimited */
#define RTR_SOLICITATION_INTERVAL        (4*HZ)
#define RTR_SOLICITATION_MAX_INTERVAL        (3600*HZ)        /* 1 hour */

#define MIN_VALID_LIFETIME                (2*3600)        /* 2 hours */

#define TEMP_VALID_LIFETIME                (7*86400)       /* 1 week */
#define TEMP_PREFERRED_LIFETIME                (86400)         /* 24 hours */
#define REGEN_MIN_ADVANCE                (2)             /* 2 seconds */
#define REGEN_MAX_RETRY                        (3)
#define MAX_DESYNC_FACTOR                (600)

#define ADDR_CHECK_FREQUENCY                (120*HZ)

#define IPV6_MAX_ADDRESSES                16

#define ADDRCONF_TIMER_FUZZ_MINUS        (HZ > 50 ? HZ / 50 : 1)
#define ADDRCONF_TIMER_FUZZ                (HZ / 4)
#define ADDRCONF_TIMER_FUZZ_MAX                (HZ)

#define ADDRCONF_NOTIFY_PRIORITY        0

#include <linux/in.h>
#include <linux/in6.h>

struct prefix_info {
        __u8                        type;
        __u8                        length;
        __u8                        prefix_len;

        union __packed {
                __u8                flags;
                struct __packed {
#if defined(__BIG_ENDIAN_BITFIELD)
                        __u8        onlink : 1,
                                 autoconf : 1,
                                reserved : 6;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
                        __u8        reserved : 6,
                                autoconf : 1,
                                onlink : 1;
#else
#error "Please fix <asm/byteorder.h>"
#endif
                };
        };
        __be32                        valid;
        __be32                        prefered;
        __be32                        reserved2;

        struct in6_addr                prefix;
};

/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */
static_assert(sizeof(struct prefix_info) == 32);

#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/if_inet6.h>
#include <net/ipv6.h>

struct in6_validator_info {
        struct in6_addr                i6vi_addr;
        struct inet6_dev        *i6vi_dev;
        struct netlink_ext_ack        *extack;
};

struct ifa6_config {
        const struct in6_addr        *pfx;
        unsigned int                plen;

        u8                        ifa_proto;

        const struct in6_addr        *peer_pfx;

        u32                        rt_priority;
        u32                        ifa_flags;
        u32                        preferred_lft;
        u32                        valid_lft;
        u16                        scope;
};

int addrconf_init(void);
void addrconf_cleanup(void);

int addrconf_add_ifaddr(struct net *net, void __user *arg);
int addrconf_del_ifaddr(struct net *net, void __user *arg);
int addrconf_set_dstaddr(struct net *net, void __user *arg);

int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict);
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
                            const struct net_device *dev, bool skip_dev_check,
                            int strict, u32 banned_flags);

#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
#endif

int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
                          unsigned char nsegs);

bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
                                   const unsigned int prefix_len,
                                   struct net_device *dev);

int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);

struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
                                 struct net_device *dev);

struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
                                     const struct in6_addr *addr,
                                     struct net_device *dev, int strict);

int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
                       const struct in6_addr *daddr, unsigned int srcprefs,
                       struct in6_addr *saddr);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
                    u32 banned_flags);
bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
                          bool match_wildcard);
bool inet_rcv_saddr_any(const struct sock *sk);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);

void addrconf_add_linklocal(struct inet6_dev *idev,
                            const struct in6_addr *addr, u32 flags);

int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
                                 const struct prefix_info *pinfo,
                                 struct inet6_dev *in6_dev,
                                 const struct in6_addr *addr, int addr_type,
                                 u32 addr_flags, bool sllao, bool tokenized,
                                 __u32 valid_lft, u32 prefered_lft);

static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr)
{
        memcpy(eui, addr, 3);
        eui[3] = 0xFF;
        eui[4] = 0xFE;
        memcpy(eui + 5, addr + 3, 3);
}

static inline void addrconf_addr_eui48(u8 *eui, const char *const addr)
{
        addrconf_addr_eui48_base(eui, addr);
        eui[0] ^= 2;
}

static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
        if (dev->addr_len != ETH_ALEN)
                return -1;

        /*
         * The zSeries OSA network cards can be shared among various
         * OS instances, but the OSA cards have only one MAC address.
         * This leads to duplicate address conflicts in conjunction
         * with IPv6 if more than one instance uses the same card.
         *
         * The driver for these cards can deliver a unique 16-bit
         * identifier for each instance sharing the same card.  It is
         * placed instead of 0xFFFE in the interface identifier.  The
         * "u" bit of the interface identifier is not inverted in this
         * case.  Hence the resulting interface identifier has local
         * scope according to RFC2373.
         */

        addrconf_addr_eui48_base(eui, dev->dev_addr);

        if (dev->dev_id) {
                eui[3] = (dev->dev_id >> 8) & 0xFF;
                eui[4] = dev->dev_id & 0xFF;
        } else {
                eui[0] ^= 2;
        }

        return 0;
}

static inline unsigned long addrconf_timeout_fixup(u32 timeout,
                                                   unsigned int unit)
{
        if (timeout == 0xffffffff)
                return ~0UL;

        /*
         * Avoid arithmetic overflow.
         * Assuming unit is constant and non-zero, this "if" statement
         * will go away on 64bit archs.
         */
        if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit)
                return LONG_MAX / unit;

        return timeout;
}

static inline int addrconf_finite_timeout(unsigned long timeout)
{
        return ~timeout;
}

/*
 *        IPv6 Address Label subsystem (addrlabel.c)
 */
int ipv6_addr_label_init(void);
void ipv6_addr_label_cleanup(void);
int ipv6_addr_label_rtnl_register(void);
u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
                    int type, int ifindex);

/*
 *        multicast prototypes (mcast.c)
 */
static inline bool ipv6_mc_may_pull(struct sk_buff *skb,
                                    unsigned int len)
{
        if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len)
                return false;

        return pskb_may_pull(skb, len);
}

int ipv6_sock_mc_join(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
void __ipv6_sock_mc_close(struct sock *sk);
void ipv6_sock_mc_close(struct sock *sk);
bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
                    const struct in6_addr *src_addr);

int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr);
int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr);
int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr);
void ipv6_mc_up(struct inet6_dev *idev);
void ipv6_mc_down(struct inet6_dev *idev);
void ipv6_mc_unmap(struct inet6_dev *idev);
void ipv6_mc_remap(struct inet6_dev *idev);
void ipv6_mc_init_dev(struct inet6_dev *idev);
void ipv6_mc_destroy_dev(struct inet6_dev *idev);
int ipv6_mc_check_mld(struct sk_buff *skb);
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);

bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
                         const struct in6_addr *src_addr);

void ipv6_mc_dad_complete(struct inet6_dev *idev);

/*
 * identify MLD packets for MLD filter exceptions
 */
static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset)
{
        struct icmp6hdr *hdr;

        if (nexthdr != IPPROTO_ICMPV6 ||
            !pskb_network_may_pull(skb, offset + sizeof(struct icmp6hdr)))
                return false;

        hdr = (struct icmp6hdr *)(skb_network_header(skb) + offset);

        switch (hdr->icmp6_type) {
        case ICMPV6_MGM_QUERY:
        case ICMPV6_MGM_REPORT:
        case ICMPV6_MGM_REDUCTION:
        case ICMPV6_MLD2_REPORT:
                return true;
        default:
                break;
        }
        return false;
}

void addrconf_prefix_rcv(struct net_device *dev,
                         u8 *opt, int len, bool sllao);

/*
 *        anycast prototypes (anycast.c)
 */
int ipv6_sock_ac_join(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
void __ipv6_sock_ac_close(struct sock *sk);
void ipv6_sock_ac_close(struct sock *sk);

int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr);
void ipv6_ac_destroy_dev(struct inet6_dev *idev);
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
                         const struct in6_addr *addr);
bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
                             const struct in6_addr *addr);
int ipv6_anycast_init(void);
void ipv6_anycast_cleanup(void);

/* Device notifier */
int register_inet6addr_notifier(struct notifier_block *nb);
int unregister_inet6addr_notifier(struct notifier_block *nb);
int inet6addr_notifier_call_chain(unsigned long val, void *v);

int register_inet6addr_validator_notifier(struct notifier_block *nb);
int unregister_inet6addr_validator_notifier(struct notifier_block *nb);
int inet6addr_validator_notifier_call_chain(unsigned long val, void *v);

void inet6_netconf_notify_devconf(struct net *net, int event, int type,
                                  int ifindex, struct ipv6_devconf *devconf);

/**
 * __in6_dev_get - get inet6_dev pointer from netdevice
 * @dev: network device
 *
 * Caller must hold rcu_read_lock or RTNL, because this function
 * does not take a reference on the inet6_dev.
 */
static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
{
        return rcu_dereference_rtnl(dev->ip6_ptr);
}

/**
 * __in6_dev_stats_get - get inet6_dev pointer for stats
 * @dev: network device
 * @skb: skb for original incoming interface if neeeded
 *
 * Caller must hold rcu_read_lock or RTNL, because this function
 * does not take a reference on the inet6_dev.
 */
static inline struct inet6_dev *__in6_dev_stats_get(const struct net_device *dev,
                                                    const struct sk_buff *skb)
{
        if (netif_is_l3_master(dev))
                dev = dev_get_by_index_rcu(dev_net(dev), inet6_iif(skb));
        return __in6_dev_get(dev);
}

/**
 * __in6_dev_get_safely - get inet6_dev pointer from netdevice
 * @dev: network device
 *
 * This is a safer version of __in6_dev_get
 */
static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev)
{
        if (likely(dev))
                return rcu_dereference_rtnl(dev->ip6_ptr);
        else
                return NULL;
}

/**
 * in6_dev_get - get inet6_dev pointer from netdevice
 * @dev: network device
 *
 * This version can be used in any context, and takes a reference
 * on the inet6_dev. Callers must use in6_dev_put() later to
 * release this reference.
 */
static inline struct inet6_dev *in6_dev_get(const struct net_device *dev)
{
        struct inet6_dev *idev;

        rcu_read_lock();
        idev = rcu_dereference(dev->ip6_ptr);
        if (idev)
                refcount_inc(&idev->refcnt);
        rcu_read_unlock();
        return idev;
}

static inline struct neigh_parms *__in6_dev_nd_parms_get_rcu(const struct net_device *dev)
{
        struct inet6_dev *idev = __in6_dev_get(dev);

        return idev ? idev->nd_parms : NULL;
}

void in6_dev_finish_destroy(struct inet6_dev *idev);

static inline void in6_dev_put(struct inet6_dev *idev)
{
        if (refcount_dec_and_test(&idev->refcnt))
                in6_dev_finish_destroy(idev);
}

static inline void in6_dev_put_clear(struct inet6_dev **pidev)
{
        struct inet6_dev *idev = *pidev;

        if (idev) {
                in6_dev_put(idev);
                *pidev = NULL;
        }
}

static inline void __in6_dev_put(struct inet6_dev *idev)
{
        refcount_dec(&idev->refcnt);
}

static inline void in6_dev_hold(struct inet6_dev *idev)
{
        refcount_inc(&idev->refcnt);
}

/* called with rcu_read_lock held */
static inline bool ip6_ignore_linkdown(const struct net_device *dev)
{
        const struct inet6_dev *idev = __in6_dev_get(dev);

        if (unlikely(!idev))
                return true;

        return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}

void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);

static inline void in6_ifa_put(struct inet6_ifaddr *ifp)
{
        if (refcount_dec_and_test(&ifp->refcnt))
                inet6_ifa_finish_destroy(ifp);
}

static inline void __in6_ifa_put(struct inet6_ifaddr *ifp)
{
        refcount_dec(&ifp->refcnt);
}

static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
{
        refcount_inc(&ifp->refcnt);
}

static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
{
        return refcount_inc_not_zero(&ifp->refcnt);
}

/*
 *        compute link-local solicited-node multicast address
 */

static inline void addrconf_addr_solict_mult(const struct in6_addr *addr,
                                             struct in6_addr *solicited)
{
        ipv6_addr_set(solicited,
                      htonl(0xFF020000), 0,
                      htonl(0x1),
                      htonl(0xFF000000) | addr->s6_addr32[3]);
}

static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        __be64 *p = (__force __be64 *)addr;
        return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL;
#else
        return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
                addr->s6_addr32[1] | addr->s6_addr32[2] |
                (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0;
#endif
}

static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        __be64 *p = (__force __be64 *)addr;
        return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL;
#else
        return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
                addr->s6_addr32[1] | addr->s6_addr32[2] |
                (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0;
#endif
}

static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
{
        return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
}

static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        __be64 *p = (__force __be64 *)addr;
        return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
                ((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) &
                 cpu_to_be64(0xffffffffff000000UL))) == 0UL;
#else
        return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
                addr->s6_addr32[1] |
                (addr->s6_addr32[2] ^ htonl(0x00000001)) |
                (addr->s6_addr[12] ^ 0xff)) == 0;
#endif
}

static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        __be64 *p = (__force __be64 *)addr;

        return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
                (p[1] ^ cpu_to_be64(0x6a))) == 0UL;
#else
        return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
                addr->s6_addr32[1] | addr->s6_addr32[2] |
                (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0;
#endif
}

#ifdef CONFIG_PROC_FS
int if6_proc_init(void);
void if6_proc_exit(void);
#endif

#endif












































































































































































































































































































































    1 






    1 

    1 
    1 






































    1 
    1 














































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Arvo driver for Linux
 *
 * Copyright (c) 2011 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Arvo is a gamer keyboard with 5 macro keys that can be configured in
 * 5 profiles.
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-arvo.h"

static ssize_t arvo_sysfs_show_mode_key(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev =
                        interface_to_usbdev(to_usb_interface(dev->parent->parent));
        struct arvo_mode_key temp_buf;
        int retval;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_MODE_KEY,
                        &temp_buf, sizeof(struct arvo_mode_key));
        mutex_unlock(&arvo->arvo_lock);
        if (retval)
                return retval;

        return sysfs_emit(buf, "%d\n", temp_buf.state);
}

static ssize_t arvo_sysfs_set_mode_key(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev =
                        interface_to_usbdev(to_usb_interface(dev->parent->parent));
        struct arvo_mode_key temp_buf;
        unsigned long state;
        int retval;

        retval = kstrtoul(buf, 10, &state);
        if (retval)
                return retval;

        temp_buf.command = ARVO_COMMAND_MODE_KEY;
        temp_buf.state = state;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_send(usb_dev, ARVO_COMMAND_MODE_KEY,
                        &temp_buf, sizeof(struct arvo_mode_key));
        mutex_unlock(&arvo->arvo_lock);
        if (retval)
                return retval;

        return size;
}
static DEVICE_ATTR(mode_key, 0660,
                   arvo_sysfs_show_mode_key, arvo_sysfs_set_mode_key);

static ssize_t arvo_sysfs_show_key_mask(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev =
                        interface_to_usbdev(to_usb_interface(dev->parent->parent));
        struct arvo_key_mask temp_buf;
        int retval;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_KEY_MASK,
                        &temp_buf, sizeof(struct arvo_key_mask));
        mutex_unlock(&arvo->arvo_lock);
        if (retval)
                return retval;

        return sysfs_emit(buf, "%d\n", temp_buf.key_mask);
}

static ssize_t arvo_sysfs_set_key_mask(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev =
                        interface_to_usbdev(to_usb_interface(dev->parent->parent));
        struct arvo_key_mask temp_buf;
        unsigned long key_mask;
        int retval;

        retval = kstrtoul(buf, 10, &key_mask);
        if (retval)
                return retval;

        temp_buf.command = ARVO_COMMAND_KEY_MASK;
        temp_buf.key_mask = key_mask;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_send(usb_dev, ARVO_COMMAND_KEY_MASK,
                        &temp_buf, sizeof(struct arvo_key_mask));
        mutex_unlock(&arvo->arvo_lock);
        if (retval)
                return retval;

        return size;
}
static DEVICE_ATTR(key_mask, 0660,
                   arvo_sysfs_show_key_mask, arvo_sysfs_set_key_mask);

/* retval is 1-5 on success, < 0 on error */
static int arvo_get_actual_profile(struct usb_device *usb_dev)
{
        struct arvo_actual_profile temp_buf;
        int retval;

        retval = roccat_common2_receive(usb_dev, ARVO_COMMAND_ACTUAL_PROFILE,
                        &temp_buf, sizeof(struct arvo_actual_profile));

        if (retval)
                return retval;

        return temp_buf.actual_profile;
}

static ssize_t arvo_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));

        return sysfs_emit(buf, "%d\n", arvo->actual_profile);
}

static ssize_t arvo_sysfs_set_actual_profile(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev =
                        interface_to_usbdev(to_usb_interface(dev->parent->parent));
        struct arvo_actual_profile temp_buf;
        unsigned long profile;
        int retval;

        retval = kstrtoul(buf, 10, &profile);
        if (retval)
                return retval;

        if (profile < 1 || profile > 5)
                return -EINVAL;

        temp_buf.command = ARVO_COMMAND_ACTUAL_PROFILE;
        temp_buf.actual_profile = profile;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_send(usb_dev, ARVO_COMMAND_ACTUAL_PROFILE,
                        &temp_buf, sizeof(struct arvo_actual_profile));
        if (!retval) {
                arvo->actual_profile = profile;
                retval = size;
        }
        mutex_unlock(&arvo->arvo_lock);
        return retval;
}
static DEVICE_ATTR(actual_profile, 0660,
                   arvo_sysfs_show_actual_profile,
                   arvo_sysfs_set_actual_profile);

static ssize_t arvo_sysfs_write(struct file *fp,
                struct kobject *kobj, void const *buf,
                loff_t off, size_t count, size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_send(usb_dev, command, buf, real_size);
        mutex_unlock(&arvo->arvo_lock);

        return (retval ? retval : real_size);
}

static ssize_t arvo_sysfs_read(struct file *fp,
                struct kobject *kobj, void *buf, loff_t off,
                size_t count, size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct arvo_device *arvo = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off >= real_size)
                return 0;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&arvo->arvo_lock);
        retval = roccat_common2_receive(usb_dev, command, buf, real_size);
        mutex_unlock(&arvo->arvo_lock);

        return (retval ? retval : real_size);
}

static ssize_t arvo_sysfs_write_button(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        return arvo_sysfs_write(fp, kobj, buf, off, count,
                        sizeof(struct arvo_button), ARVO_COMMAND_BUTTON);
}
static BIN_ATTR(button, 0220, NULL, arvo_sysfs_write_button,
                sizeof(struct arvo_button));

static ssize_t arvo_sysfs_read_info(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        return arvo_sysfs_read(fp, kobj, buf, off, count,
                        sizeof(struct arvo_info), ARVO_COMMAND_INFO);
}
static BIN_ATTR(info, 0440, arvo_sysfs_read_info, NULL,
                sizeof(struct arvo_info));

static struct attribute *arvo_attrs[] = {
        &dev_attr_mode_key.attr,
        &dev_attr_key_mask.attr,
        &dev_attr_actual_profile.attr,
        NULL,
};

static struct bin_attribute *arvo_bin_attributes[] = {
        &bin_attr_button,
        &bin_attr_info,
        NULL,
};

static const struct attribute_group arvo_group = {
        .attrs = arvo_attrs,
        .bin_attrs = arvo_bin_attributes,
};

static const struct attribute_group *arvo_groups[] = {
        &arvo_group,
        NULL,
};

static const struct class arvo_class = {
        .name = "arvo",
        .dev_groups = arvo_groups,
};

static int arvo_init_arvo_device_struct(struct usb_device *usb_dev,
                struct arvo_device *arvo)
{
        int retval;

        mutex_init(&arvo->arvo_lock);

        retval = arvo_get_actual_profile(usb_dev);
        if (retval < 0)
                return retval;
        arvo->actual_profile = retval;

        return 0;
}

static int arvo_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct arvo_device *arvo;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_KEYBOARD) {
                hid_set_drvdata(hdev, NULL);
                return 0;
        }

        arvo = kzalloc(sizeof(*arvo), GFP_KERNEL);
        if (!arvo) {
                hid_err(hdev, "can't alloc device descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, arvo);

        retval = arvo_init_arvo_device_struct(usb_dev, arvo);
        if (retval) {
                hid_err(hdev, "couldn't init struct arvo_device\n");
                goto exit_free;
        }

        retval = roccat_connect(&arvo_class, hdev,
                        sizeof(struct arvo_roccat_report));
        if (retval < 0) {
                hid_err(hdev, "couldn't init char dev\n");
        } else {
                arvo->chrdev_minor = retval;
                arvo->roccat_claimed = 1;
        }

        return 0;
exit_free:
        kfree(arvo);
        return retval;
}

static void arvo_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct arvo_device *arvo;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_KEYBOARD)
                return;

        arvo = hid_get_drvdata(hdev);
        if (arvo->roccat_claimed)
                roccat_disconnect(arvo->chrdev_minor);
        kfree(arvo);
}

static int arvo_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = arvo_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install keyboard\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void arvo_remove(struct hid_device *hdev)
{
        arvo_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void arvo_report_to_chrdev(struct arvo_device const *arvo,
                u8 const *data)
{
        struct arvo_special_report const *special_report;
        struct arvo_roccat_report roccat_report;

        special_report = (struct arvo_special_report const *)data;

        roccat_report.profile = arvo->actual_profile;
        roccat_report.button = special_report->event &
                        ARVO_SPECIAL_REPORT_EVENT_MASK_BUTTON;
        if ((special_report->event & ARVO_SPECIAL_REPORT_EVENT_MASK_ACTION) ==
                        ARVO_SPECIAL_REPORT_EVENT_ACTION_PRESS)
                roccat_report.action = ARVO_ROCCAT_REPORT_ACTION_PRESS;
        else
                roccat_report.action = ARVO_ROCCAT_REPORT_ACTION_RELEASE;

        roccat_report_event(arvo->chrdev_minor,
                        (uint8_t const *)&roccat_report);
}

static int arvo_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct arvo_device *arvo = hid_get_drvdata(hdev);

        if (size != 3)
                return 0;

        if (arvo && arvo->roccat_claimed)
                arvo_report_to_chrdev(arvo, data);

        return 0;
}

static const struct hid_device_id arvo_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
        { }
};

MODULE_DEVICE_TABLE(hid, arvo_devices);

static struct hid_driver arvo_driver = {
        .name = "arvo",
        .id_table = arvo_devices,
        .probe = arvo_probe,
        .remove = arvo_remove,
        .raw_event = arvo_raw_event
};

static int __init arvo_init(void)
{
        int retval;

        retval = class_register(&arvo_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&arvo_driver);
        if (retval)
                class_unregister(&arvo_class);
        return retval;
}

static void __exit arvo_exit(void)
{
        hid_unregister_driver(&arvo_driver);
        class_unregister(&arvo_class);
}

module_init(arvo_init);
module_exit(arvo_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Arvo driver");
MODULE_LICENSE("GPL v2");


















































































































   16 









   17 




    4 





















































   14 




    6 




    4 




    1 




    1 























































   20 




    8 




   22 









    3 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// SPDX-License-Identifier: GPL-2.0
/*
 * udc.c - Core UDC Framework
 *
 * Copyright (C) 2016 Intel Corporation
 * Author: Felipe Balbi <felipe.balbi@linux.intel.com>
 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM gadget

#if !defined(__UDC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define __UDC_TRACE_H

#include <linux/types.h>
#include <linux/tracepoint.h>
#include <asm/byteorder.h>
#include <linux/usb/gadget.h>

DECLARE_EVENT_CLASS(udc_log_gadget,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret),
        TP_STRUCT__entry(
                __field(enum usb_device_speed, speed)
                __field(enum usb_device_speed, max_speed)
                __field(enum usb_device_state, state)
                __field(unsigned, mA)
                __field(unsigned, sg_supported)
                __field(unsigned, is_otg)
                __field(unsigned, is_a_peripheral)
                __field(unsigned, b_hnp_enable)
                __field(unsigned, a_hnp_support)
                __field(unsigned, hnp_polling_support)
                __field(unsigned, host_request_flag)
                __field(unsigned, quirk_ep_out_aligned_size)
                __field(unsigned, quirk_altset_not_supp)
                __field(unsigned, quirk_stall_not_supp)
                __field(unsigned, quirk_zlp_not_supp)
                __field(unsigned, is_selfpowered)
                __field(unsigned, deactivated)
                __field(unsigned, connected)
                __field(int, ret)
        ),
        TP_fast_assign(
                __entry->speed = g->speed;
                __entry->max_speed = g->max_speed;
                __entry->state = g->state;
                __entry->mA = g->mA;
                __entry->sg_supported = g->sg_supported;
                __entry->is_otg = g->is_otg;
                __entry->is_a_peripheral = g->is_a_peripheral;
                __entry->b_hnp_enable = g->b_hnp_enable;
                __entry->a_hnp_support = g->a_hnp_support;
                __entry->hnp_polling_support = g->hnp_polling_support;
                __entry->host_request_flag = g->host_request_flag;
                __entry->quirk_ep_out_aligned_size = g->quirk_ep_out_aligned_size;
                __entry->quirk_altset_not_supp = g->quirk_altset_not_supp;
                __entry->quirk_stall_not_supp = g->quirk_stall_not_supp;
                __entry->quirk_zlp_not_supp = g->quirk_zlp_not_supp;
                __entry->is_selfpowered = g->is_selfpowered;
                __entry->deactivated = g->deactivated;
                __entry->connected = g->connected;
                __entry->ret = ret;
        ),
        TP_printk("speed %d/%d state %d %dmA [%s%s%s%s%s%s%s%s%s%s%s%s%s%s] --> %d",
                __entry->speed, __entry->max_speed, __entry->state, __entry->mA,
                __entry->sg_supported ? "sg:" : "",
                __entry->is_otg ? "OTG:" : "",
                __entry->is_a_peripheral ? "a_peripheral:" : "",
                __entry->b_hnp_enable ? "b_hnp:" : "",
                __entry->a_hnp_support ? "a_hnp:" : "",
                __entry->hnp_polling_support ? "hnp_poll:" : "",
                __entry->host_request_flag ? "hostreq:" : "",
                __entry->quirk_ep_out_aligned_size ? "out_aligned:" : "",
                __entry->quirk_altset_not_supp ? "no_altset:" : "",
                __entry->quirk_stall_not_supp ? "no_stall:" : "",
                __entry->quirk_zlp_not_supp ? "no_zlp" : "",
                __entry->is_selfpowered ? "self-powered:" : "bus-powered:",
                __entry->deactivated ? "deactivated:" : "activated:",
                __entry->connected ? "connected" : "disconnected",
                __entry->ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_frame_number,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_clear_selfpowered,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_connect,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_draw,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_vbus_disconnect,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_connect,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_disconnect,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_deactivate,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DEFINE_EVENT(udc_log_gadget, usb_gadget_activate,
        TP_PROTO(struct usb_gadget *g, int ret),
        TP_ARGS(g, ret)
);

DECLARE_EVENT_CLASS(udc_log_ep,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret),
        TP_STRUCT__entry(
                __string(name, ep->name)
                __field(unsigned, maxpacket)
                __field(unsigned, maxpacket_limit)
                __field(unsigned, max_streams)
                __field(unsigned, mult)
                __field(unsigned, maxburst)
                __field(u8, address)
                __field(bool, claimed)
                __field(bool, enabled)
                __field(int, ret)
        ),
        TP_fast_assign(
                __assign_str(name, ep->name);
                __entry->maxpacket = ep->maxpacket;
                __entry->maxpacket_limit = ep->maxpacket_limit;
                __entry->max_streams = ep->max_streams;
                __entry->mult = ep->mult;
                __entry->maxburst = ep->maxburst;
                __entry->address = ep->address,
                __entry->claimed = ep->claimed;
                __entry->enabled = ep->enabled;
                __entry->ret = ret;
        ),
        TP_printk("%s: mps %d/%d streams %d mult %d burst %d addr %02x %s%s --> %d",
                __get_str(name), __entry->maxpacket, __entry->maxpacket_limit,
                __entry->max_streams, __entry->mult, __entry->maxburst,
                __entry->address, __entry->claimed ? "claimed:" : "released:",
                __entry->enabled ? "enabled" : "disabled", ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_set_maxpacket_limit,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_enable,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_disable,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_set_halt,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_clear_halt,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_set_wedge,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_fifo_status,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DEFINE_EVENT(udc_log_ep, usb_ep_fifo_flush,
        TP_PROTO(struct usb_ep *ep, int ret),
        TP_ARGS(ep, ret)
);

DECLARE_EVENT_CLASS(udc_log_req,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret),
        TP_STRUCT__entry(
                __string(name, ep->name)
                __field(unsigned, length)
                __field(unsigned, actual)
                __field(unsigned, num_sgs)
                __field(unsigned, num_mapped_sgs)
                __field(unsigned, stream_id)
                __field(unsigned, no_interrupt)
                __field(unsigned, zero)
                __field(unsigned, short_not_ok)
                __field(int, status)
                __field(int, ret)
                __field(struct usb_request *, req)
        ),
        TP_fast_assign(
                __assign_str(name, ep->name);
                __entry->length = req->length;
                __entry->actual = req->actual;
                __entry->num_sgs = req->num_sgs;
                __entry->num_mapped_sgs = req->num_mapped_sgs;
                __entry->stream_id = req->stream_id;
                __entry->no_interrupt = req->no_interrupt;
                __entry->zero = req->zero;
                __entry->short_not_ok = req->short_not_ok;
                __entry->status = req->status;
                __entry->ret = ret;
                __entry->req = req;
        ),
        TP_printk("%s: req %p length %d/%d sgs %d/%d stream %d %s%s%s status %d --> %d",
                __get_str(name),__entry->req,  __entry->actual, __entry->length,
                __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream_id,
                __entry->zero ? "Z" : "z",
                __entry->short_not_ok ? "S" : "s",
                __entry->no_interrupt ? "i" : "I",
                __entry->status, __entry->ret
        )
);

DEFINE_EVENT(udc_log_req, usb_ep_alloc_request,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret)
);

DEFINE_EVENT(udc_log_req, usb_ep_free_request,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret)
);

DEFINE_EVENT(udc_log_req, usb_ep_queue,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret)
);

DEFINE_EVENT(udc_log_req, usb_ep_dequeue,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret)
);

DEFINE_EVENT(udc_log_req, usb_gadget_giveback_request,
        TP_PROTO(struct usb_ep *ep, struct usb_request *req, int ret),
        TP_ARGS(ep, req, ret)
);

#endif /* __UDC_TRACE_H */

/* this part has to be here */

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .

#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace

#include <trace/define_trace.h>




































































































































































    3 

























    3 


















































































































































































































































































































































































































































































































    1 










    3 
    3 

    3 
















































































































































































































































































































































































































    3 






    3 










































































































































































































    1 


    1 







































































































































































































































































































































































































































































































































































































































































    3 




    1 









    3 


    1 
    1 


    1 



    1 


    1 









    3 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 


































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
// SPDX-License-Identifier: GPL-2.0

#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <linux/cleanup.h>
#include <linux/compat.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/pinctrl/consumer.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/string.h>

#include <linux/gpio.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>

#include <uapi/linux/gpio.h>

#include "gpiolib-acpi.h"
#include "gpiolib-cdev.h"
#include "gpiolib-of.h"
#include "gpiolib-swnode.h"
#include "gpiolib-sysfs.h"
#include "gpiolib.h"

#define CREATE_TRACE_POINTS
#include <trace/events/gpio.h>

/* Implementation infrastructure for GPIO interfaces.
 *
 * The GPIO programming interface allows for inlining speed-critical
 * get/set operations for common cases, so that access to SOC-integrated
 * GPIOs can sometimes cost only an instruction or two per bit.
 */

/* Device and char device-related information */
static DEFINE_IDA(gpio_ida);
static dev_t gpio_devt;
#define GPIO_DEV_MAX 256 /* 256 GPIO chip devices supported */

static int gpio_bus_match(struct device *dev, struct device_driver *drv)
{
        struct fwnode_handle *fwnode = dev_fwnode(dev);

        /*
         * Only match if the fwnode doesn't already have a proper struct device
         * created for it.
         */
        if (fwnode && fwnode->dev != dev)
                return 0;
        return 1;
}

static const struct bus_type gpio_bus_type = {
        .name = "gpio",
        .match = gpio_bus_match,
};

/*
 * Number of GPIOs to use for the fast path in set array
 */
#define FASTPATH_NGPIO CONFIG_GPIOLIB_FASTPATH_LIMIT

static DEFINE_MUTEX(gpio_lookup_lock);
static LIST_HEAD(gpio_lookup_list);

static LIST_HEAD(gpio_devices);
/* Protects the GPIO device list against concurrent modifications. */
static DEFINE_MUTEX(gpio_devices_lock);
/* Ensures coherence during read-only accesses to the list of GPIO devices. */
DEFINE_STATIC_SRCU(gpio_devices_srcu);

static DEFINE_MUTEX(gpio_machine_hogs_mutex);
static LIST_HEAD(gpio_machine_hogs);

static void gpiochip_free_hogs(struct gpio_chip *gc);
static int gpiochip_add_irqchip(struct gpio_chip *gc,
                                struct lock_class_key *lock_key,
                                struct lock_class_key *request_key);
static void gpiochip_irqchip_remove(struct gpio_chip *gc);
static int gpiochip_irqchip_init_hw(struct gpio_chip *gc);
static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc);
static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc);

static bool gpiolib_initialized;

const char *gpiod_get_label(struct gpio_desc *desc)
{
        unsigned long flags;

        flags = READ_ONCE(desc->flags);
        if (test_bit(FLAG_USED_AS_IRQ, &flags) &&
            !test_bit(FLAG_REQUESTED, &flags))
                return "interrupt";

        return test_bit(FLAG_REQUESTED, &flags) ?
                        srcu_dereference(desc->label, &desc->srcu) : NULL;
}

static int desc_set_label(struct gpio_desc *desc, const char *label)
{
        const char *new = NULL, *old;

        if (label) {
                new = kstrdup_const(label, GFP_KERNEL);
                if (!new)
                        return -ENOMEM;
        }

        old = rcu_replace_pointer(desc->label, new, 1);
        synchronize_srcu(&desc->srcu);
        kfree_const(old);

        return 0;
}

/**
 * gpio_to_desc - Convert a GPIO number to its descriptor
 * @gpio: global GPIO number
 *
 * Returns:
 * The GPIO descriptor associated with the given GPIO, or %NULL if no GPIO
 * with the given number exists in the system.
 */
struct gpio_desc *gpio_to_desc(unsigned gpio)
{
        struct gpio_device *gdev;

        scoped_guard(srcu, &gpio_devices_srcu) {
                list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                srcu_read_lock_held(&gpio_devices_srcu)) {
                        if (gdev->base <= gpio &&
                            gdev->base + gdev->ngpio > gpio)
                                return &gdev->descs[gpio - gdev->base];
                }
        }

        if (!gpio_is_valid(gpio))
                pr_warn("invalid GPIO %d\n", gpio);

        return NULL;
}
EXPORT_SYMBOL_GPL(gpio_to_desc);

/* This function is deprecated and will be removed soon, don't use. */
struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc,
                                    unsigned int hwnum)
{
        return gpio_device_get_desc(gc->gpiodev, hwnum);
}
EXPORT_SYMBOL_GPL(gpiochip_get_desc);

/**
 * gpio_device_get_desc() - get the GPIO descriptor corresponding to the given
 *                          hardware number for this GPIO device
 * @gdev: GPIO device to get the descriptor from
 * @hwnum: hardware number of the GPIO for this chip
 *
 * Returns:
 * A pointer to the GPIO descriptor or %EINVAL if no GPIO exists in the given
 * chip for the specified hardware number or %ENODEV if the underlying chip
 * already vanished.
 *
 * The reference count of struct gpio_device is *NOT* increased like when the
 * GPIO is being requested for exclusive usage. It's up to the caller to make
 * sure the GPIO device will stay alive together with the descriptor returned
 * by this function.
 */
struct gpio_desc *
gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum)
{
        if (hwnum >= gdev->ngpio)
                return ERR_PTR(-EINVAL);

        return &gdev->descs[hwnum];
}
EXPORT_SYMBOL_GPL(gpio_device_get_desc);

/**
 * desc_to_gpio - convert a GPIO descriptor to the integer namespace
 * @desc: GPIO descriptor
 *
 * This should disappear in the future but is needed since we still
 * use GPIO numbers for error messages and sysfs nodes.
 *
 * Returns:
 * The global GPIO number for the GPIO specified by its descriptor.
 */
int desc_to_gpio(const struct gpio_desc *desc)
{
        return desc->gdev->base + (desc - &desc->gdev->descs[0]);
}
EXPORT_SYMBOL_GPL(desc_to_gpio);


/**
 * gpiod_to_chip - Return the GPIO chip to which a GPIO descriptor belongs
 * @desc:        descriptor to return the chip of
 *
 * *DEPRECATED*
 * This function is unsafe and should not be used. Using the chip address
 * without taking the SRCU read lock may result in dereferencing a dangling
 * pointer.
 */
struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
{
        if (!desc)
                return NULL;

        return gpio_device_get_chip(desc->gdev);
}
EXPORT_SYMBOL_GPL(gpiod_to_chip);

/**
 * gpiod_to_gpio_device() - Return the GPIO device to which this descriptor
 *                          belongs.
 * @desc: Descriptor for which to return the GPIO device.
 *
 * This *DOES NOT* increase the reference count of the GPIO device as it's
 * expected that the descriptor is requested and the users already holds a
 * reference to the device.
 *
 * Returns:
 * Address of the GPIO device owning this descriptor.
 */
struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc)
{
        if (!desc)
                return NULL;

        return desc->gdev;
}
EXPORT_SYMBOL_GPL(gpiod_to_gpio_device);

/**
 * gpio_device_get_base() - Get the base GPIO number allocated by this device
 * @gdev: GPIO device
 *
 * Returns:
 * First GPIO number in the global GPIO numberspace for this device.
 */
int gpio_device_get_base(struct gpio_device *gdev)
{
        return gdev->base;
}
EXPORT_SYMBOL_GPL(gpio_device_get_base);

/**
 * gpio_device_get_label() - Get the label of this GPIO device
 * @gdev: GPIO device
 *
 * Returns:
 * Pointer to the string containing the GPIO device label. The string's
 * lifetime is tied to that of the underlying GPIO device.
 */
const char *gpio_device_get_label(struct gpio_device *gdev)
{
        return gdev->label;
}
EXPORT_SYMBOL(gpio_device_get_label);

/**
 * gpio_device_get_chip() - Get the gpio_chip implementation of this GPIO device
 * @gdev: GPIO device
 *
 * Returns:
 * Address of the GPIO chip backing this device.
 *
 * *DEPRECATED*
 * Until we can get rid of all non-driver users of struct gpio_chip, we must
 * provide a way of retrieving the pointer to it from struct gpio_device. This
 * is *NOT* safe as the GPIO API is considered to be hot-unpluggable and the
 * chip can dissapear at any moment (unlike reference-counted struct
 * gpio_device).
 *
 * Use at your own risk.
 */
struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev)
{
        return rcu_dereference_check(gdev->chip, 1);
}
EXPORT_SYMBOL_GPL(gpio_device_get_chip);

/* dynamic allocation of GPIOs, e.g. on a hotplugged device */
static int gpiochip_find_base_unlocked(int ngpio)
{
        struct gpio_device *gdev;
        int base = GPIO_DYNAMIC_BASE;

        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 lockdep_is_held(&gpio_devices_lock)) {
                /* found a free space? */
                if (gdev->base >= base + ngpio)
                        break;
                /* nope, check the space right after the chip */
                base = gdev->base + gdev->ngpio;
                if (base < GPIO_DYNAMIC_BASE)
                        base = GPIO_DYNAMIC_BASE;
        }

        if (gpio_is_valid(base)) {
                pr_debug("%s: found new base at %d\n", __func__, base);
                return base;
        } else {
                pr_err("%s: cannot find free range\n", __func__);
                return -ENOSPC;
        }
}

/**
 * gpiod_get_direction - return the current direction of a GPIO
 * @desc:        GPIO to get the direction of
 *
 * Returns 0 for output, 1 for input, or an error code in case of error.
 *
 * This function may sleep if gpiod_cansleep() is true.
 */
int gpiod_get_direction(struct gpio_desc *desc)
{
        unsigned long flags;
        unsigned int offset;
        int ret;

        /*
         * We cannot use VALIDATE_DESC() as we must not return 0 for a NULL
         * descriptor like we usually do.
         */
        if (!desc || IS_ERR(desc))
                return -EINVAL;

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        offset = gpio_chip_hwgpio(desc);
        flags = READ_ONCE(desc->flags);

        /*
         * Open drain emulation using input mode may incorrectly report
         * input here, fix that up.
         */
        if (test_bit(FLAG_OPEN_DRAIN, &flags) &&
            test_bit(FLAG_IS_OUT, &flags))
                return 0;

        if (!guard.gc->get_direction)
                return -ENOTSUPP;

        ret = guard.gc->get_direction(guard.gc, offset);
        if (ret < 0)
                return ret;

        /* GPIOF_DIR_IN or other positive, otherwise GPIOF_DIR_OUT */
        if (ret > 0)
                ret = 1;

        assign_bit(FLAG_IS_OUT, &flags, !ret);
        WRITE_ONCE(desc->flags, flags);

        return ret;
}
EXPORT_SYMBOL_GPL(gpiod_get_direction);

/*
 * Add a new chip to the global chips list, keeping the list of chips sorted
 * by range(means [base, base + ngpio - 1]) order.
 *
 * Return -EBUSY if the new chip overlaps with some other chip's integer
 * space.
 */
static int gpiodev_add_to_list_unlocked(struct gpio_device *gdev)
{
        struct gpio_device *prev, *next;

        lockdep_assert_held(&gpio_devices_lock);

        if (list_empty(&gpio_devices)) {
                /* initial entry in list */
                list_add_tail_rcu(&gdev->list, &gpio_devices);
                return 0;
        }

        next = list_first_entry(&gpio_devices, struct gpio_device, list);
        if (gdev->base + gdev->ngpio <= next->base) {
                /* add before first entry */
                list_add_rcu(&gdev->list, &gpio_devices);
                return 0;
        }

        prev = list_last_entry(&gpio_devices, struct gpio_device, list);
        if (prev->base + prev->ngpio <= gdev->base) {
                /* add behind last entry */
                list_add_tail_rcu(&gdev->list, &gpio_devices);
                return 0;
        }

        list_for_each_entry_safe(prev, next, &gpio_devices, list) {
                /* at the end of the list */
                if (&next->list == &gpio_devices)
                        break;

                /* add between prev and next */
                if (prev->base + prev->ngpio <= gdev->base
                                && gdev->base + gdev->ngpio <= next->base) {
                        list_add_rcu(&gdev->list, &prev->list);
                        return 0;
                }
        }

        synchronize_srcu(&gpio_devices_srcu);

        return -EBUSY;
}

/*
 * Convert a GPIO name to its descriptor
 * Note that there is no guarantee that GPIO names are globally unique!
 * Hence this function will return, if it exists, a reference to the first GPIO
 * line found that matches the given name.
 */
static struct gpio_desc *gpio_name_to_desc(const char * const name)
{
        struct gpio_device *gdev;
        struct gpio_desc *desc;
        struct gpio_chip *gc;

        if (!name)
                return NULL;

        guard(srcu)(&gpio_devices_srcu);

        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 srcu_read_lock_held(&gpio_devices_srcu)) {
                guard(srcu)(&gdev->srcu);

                gc = srcu_dereference(gdev->chip, &gdev->srcu);
                if (!gc)
                        continue;

                for_each_gpio_desc(gc, desc) {
                        if (desc->name && !strcmp(desc->name, name))
                                return desc;
                }
        }

        return NULL;
}

/*
 * Take the names from gc->names and assign them to their GPIO descriptors.
 * Warn if a name is already used for a GPIO line on a different GPIO chip.
 *
 * Note that:
 *   1. Non-unique names are still accepted,
 *   2. Name collisions within the same GPIO chip are not reported.
 */
static int gpiochip_set_desc_names(struct gpio_chip *gc)
{
        struct gpio_device *gdev = gc->gpiodev;
        int i;

        /* First check all names if they are unique */
        for (i = 0; i != gc->ngpio; ++i) {
                struct gpio_desc *gpio;

                gpio = gpio_name_to_desc(gc->names[i]);
                if (gpio)
                        dev_warn(&gdev->dev,
                                 "Detected name collision for GPIO name '%s'\n",
                                 gc->names[i]);
        }

        /* Then add all names to the GPIO descriptors */
        for (i = 0; i != gc->ngpio; ++i)
                gdev->descs[i].name = gc->names[i];

        return 0;
}

/*
 * gpiochip_set_names - Set GPIO line names using device properties
 * @chip: GPIO chip whose lines should be named, if possible
 *
 * Looks for device property "gpio-line-names" and if it exists assigns
 * GPIO line names for the chip. The memory allocated for the assigned
 * names belong to the underlying firmware node and should not be released
 * by the caller.
 */
static int gpiochip_set_names(struct gpio_chip *chip)
{
        struct gpio_device *gdev = chip->gpiodev;
        struct device *dev = &gdev->dev;
        const char **names;
        int ret, i;
        int count;

        count = device_property_string_array_count(dev, "gpio-line-names");
        if (count < 0)
                return 0;

        /*
         * When offset is set in the driver side we assume the driver internally
         * is using more than one gpiochip per the same device. We have to stop
         * setting friendly names if the specified ones with 'gpio-line-names'
         * are less than the offset in the device itself. This means all the
         * lines are not present for every single pin within all the internal
         * gpiochips.
         */
        if (count <= chip->offset) {
                dev_warn(dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n",
                         count, chip->offset);
                return 0;
        }

        names = kcalloc(count, sizeof(*names), GFP_KERNEL);
        if (!names)
                return -ENOMEM;

        ret = device_property_read_string_array(dev, "gpio-line-names",
                                                names, count);
        if (ret < 0) {
                dev_warn(dev, "failed to read GPIO line names\n");
                kfree(names);
                return ret;
        }

        /*
         * When more that one gpiochip per device is used, 'count' can
         * contain at most number gpiochips x chip->ngpio. We have to
         * correctly distribute all defined lines taking into account
         * chip->offset as starting point from where we will assign
         * the names to pins from the 'names' array. Since property
         * 'gpio-line-names' cannot contains gaps, we have to be sure
         * we only assign those pins that really exists since chip->ngpio
         * can be different of the chip->offset.
         */
        count = (count > chip->offset) ? count - chip->offset : count;
        if (count > chip->ngpio)
                count = chip->ngpio;

        for (i = 0; i < count; i++) {
                /*
                 * Allow overriding "fixed" names provided by the GPIO
                 * provider. The "fixed" names are more often than not
                 * generic and less informative than the names given in
                 * device properties.
                 */
                if (names[chip->offset + i] && names[chip->offset + i][0])
                        gdev->descs[i].name = names[chip->offset + i];
        }

        kfree(names);

        return 0;
}

static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc)
{
        unsigned long *p;

        p = bitmap_alloc(gc->ngpio, GFP_KERNEL);
        if (!p)
                return NULL;

        /* Assume by default all GPIOs are valid */
        bitmap_fill(p, gc->ngpio);

        return p;
}

static void gpiochip_free_mask(unsigned long **p)
{
        bitmap_free(*p);
        *p = NULL;
}

static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc)
{
        struct device *dev = &gc->gpiodev->dev;
        int size;

        /* Format is "start, count, ..." */
        size = device_property_count_u32(dev, "gpio-reserved-ranges");
        if (size > 0 && size % 2 == 0)
                return size;

        return 0;
}

static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc)
{
        struct device *dev = &gc->gpiodev->dev;
        unsigned int size;
        u32 *ranges;
        int ret;

        size = gpiochip_count_reserved_ranges(gc);
        if (size == 0)
                return 0;

        ranges = kmalloc_array(size, sizeof(*ranges), GFP_KERNEL);
        if (!ranges)
                return -ENOMEM;

        ret = device_property_read_u32_array(dev, "gpio-reserved-ranges",
                                             ranges, size);
        if (ret) {
                kfree(ranges);
                return ret;
        }

        while (size) {
                u32 count = ranges[--size];
                u32 start = ranges[--size];

                if (start >= gc->ngpio || start + count > gc->ngpio)
                        continue;

                bitmap_clear(gc->valid_mask, start, count);
        }

        kfree(ranges);
        return 0;
}

static int gpiochip_init_valid_mask(struct gpio_chip *gc)
{
        int ret;

        if (!(gpiochip_count_reserved_ranges(gc) || gc->init_valid_mask))
                return 0;

        gc->valid_mask = gpiochip_allocate_mask(gc);
        if (!gc->valid_mask)
                return -ENOMEM;

        ret = gpiochip_apply_reserved_ranges(gc);
        if (ret)
                return ret;

        if (gc->init_valid_mask)
                return gc->init_valid_mask(gc,
                                           gc->valid_mask,
                                           gc->ngpio);

        return 0;
}

static void gpiochip_free_valid_mask(struct gpio_chip *gc)
{
        gpiochip_free_mask(&gc->valid_mask);
}

static int gpiochip_add_pin_ranges(struct gpio_chip *gc)
{
        /*
         * Device Tree platforms are supposed to use "gpio-ranges"
         * property. This check ensures that the ->add_pin_ranges()
         * won't be called for them.
         */
        if (device_property_present(&gc->gpiodev->dev, "gpio-ranges"))
                return 0;

        if (gc->add_pin_ranges)
                return gc->add_pin_ranges(gc);

        return 0;
}

bool gpiochip_line_is_valid(const struct gpio_chip *gc,
                                unsigned int offset)
{
        /* No mask means all valid */
        if (likely(!gc->valid_mask))
                return true;
        return test_bit(offset, gc->valid_mask);
}
EXPORT_SYMBOL_GPL(gpiochip_line_is_valid);

static void gpiodev_release(struct device *dev)
{
        struct gpio_device *gdev = to_gpio_device(dev);
        unsigned int i;

        for (i = 0; i < gdev->ngpio; i++)
                cleanup_srcu_struct(&gdev->descs[i].srcu);

        ida_free(&gpio_ida, gdev->id);
        kfree_const(gdev->label);
        kfree(gdev->descs);
        cleanup_srcu_struct(&gdev->srcu);
        kfree(gdev);
}

static const struct device_type gpio_dev_type = {
        .name = "gpio_chip",
        .release = gpiodev_release,
};

#ifdef CONFIG_GPIO_CDEV
#define gcdev_register(gdev, devt)        gpiolib_cdev_register((gdev), (devt))
#define gcdev_unregister(gdev)                gpiolib_cdev_unregister((gdev))
#else
/*
 * gpiolib_cdev_register() indirectly calls device_add(), which is still
 * required even when cdev is not selected.
 */
#define gcdev_register(gdev, devt)        device_add(&(gdev)->dev)
#define gcdev_unregister(gdev)                device_del(&(gdev)->dev)
#endif

static int gpiochip_setup_dev(struct gpio_device *gdev)
{
        struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
        int ret;

        device_initialize(&gdev->dev);

        /*
         * If fwnode doesn't belong to another device, it's safe to clear its
         * initialized flag.
         */
        if (fwnode && !fwnode->dev)
                fwnode_dev_initialized(fwnode, false);

        ret = gcdev_register(gdev, gpio_devt);
        if (ret)
                return ret;

        ret = gpiochip_sysfs_register(gdev);
        if (ret)
                goto err_remove_device;

        dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base,
                gdev->base + gdev->ngpio - 1, gdev->label);

        return 0;

err_remove_device:
        gcdev_unregister(gdev);
        return ret;
}

static void gpiochip_machine_hog(struct gpio_chip *gc, struct gpiod_hog *hog)
{
        struct gpio_desc *desc;
        int rv;

        desc = gpiochip_get_desc(gc, hog->chip_hwnum);
        if (IS_ERR(desc)) {
                chip_err(gc, "%s: unable to get GPIO desc: %ld\n", __func__,
                         PTR_ERR(desc));
                return;
        }

        rv = gpiod_hog(desc, hog->line_name, hog->lflags, hog->dflags);
        if (rv)
                gpiod_err(desc, "%s: unable to hog GPIO line (%s:%u): %d\n",
                          __func__, gc->label, hog->chip_hwnum, rv);
}

static void machine_gpiochip_add(struct gpio_chip *gc)
{
        struct gpiod_hog *hog;

        mutex_lock(&gpio_machine_hogs_mutex);

        list_for_each_entry(hog, &gpio_machine_hogs, list) {
                if (!strcmp(gc->label, hog->chip_label))
                        gpiochip_machine_hog(gc, hog);
        }

        mutex_unlock(&gpio_machine_hogs_mutex);
}

static void gpiochip_setup_devs(void)
{
        struct gpio_device *gdev;
        int ret;

        guard(srcu)(&gpio_devices_srcu);

        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 srcu_read_lock_held(&gpio_devices_srcu)) {
                ret = gpiochip_setup_dev(gdev);
                if (ret)
                        dev_err(&gdev->dev,
                                "Failed to initialize gpio device (%d)\n", ret);
        }
}

static void gpiochip_set_data(struct gpio_chip *gc, void *data)
{
        gc->gpiodev->data = data;
}

/**
 * gpiochip_get_data() - get per-subdriver data for the chip
 * @gc: GPIO chip
 *
 * Returns:
 * The per-subdriver data for the chip.
 */
void *gpiochip_get_data(struct gpio_chip *gc)
{
        return gc->gpiodev->data;
}
EXPORT_SYMBOL_GPL(gpiochip_get_data);

int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev)
{
        u32 ngpios = gc->ngpio;
        int ret;

        if (ngpios == 0) {
                ret = device_property_read_u32(dev, "ngpios", &ngpios);
                if (ret == -ENODATA)
                        /*
                         * -ENODATA means that there is no property found and
                         * we want to issue the error message to the user.
                         * Besides that, we want to return different error code
                         * to state that supplied value is not valid.
                         */
                        ngpios = 0;
                else if (ret)
                        return ret;

                gc->ngpio = ngpios;
        }

        if (gc->ngpio == 0) {
                chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
                return -EINVAL;
        }

        if (gc->ngpio > FASTPATH_NGPIO)
                chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
                        gc->ngpio, FASTPATH_NGPIO);

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_get_ngpios);

int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
                               struct lock_class_key *lock_key,
                               struct lock_class_key *request_key)
{
        struct gpio_device *gdev;
        unsigned int desc_index;
        int base = 0;
        int ret = 0;

        /*
         * First: allocate and populate the internal stat container, and
         * set up the struct device.
         */
        gdev = kzalloc(sizeof(*gdev), GFP_KERNEL);
        if (!gdev)
                return -ENOMEM;

        gdev->dev.type = &gpio_dev_type;
        gdev->dev.bus = &gpio_bus_type;
        gdev->dev.parent = gc->parent;
        rcu_assign_pointer(gdev->chip, gc);

        gc->gpiodev = gdev;
        gpiochip_set_data(gc, data);

        /*
         * If the calling driver did not initialize firmware node,
         * do it here using the parent device, if any.
         */
        if (gc->fwnode)
                device_set_node(&gdev->dev, gc->fwnode);
        else if (gc->parent)
                device_set_node(&gdev->dev, dev_fwnode(gc->parent));

        gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL);
        if (gdev->id < 0) {
                ret = gdev->id;
                goto err_free_gdev;
        }

        ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id);
        if (ret)
                goto err_free_ida;

        if (gc->parent && gc->parent->driver)
                gdev->owner = gc->parent->driver->owner;
        else if (gc->owner)
                /* TODO: remove chip->owner */
                gdev->owner = gc->owner;
        else
                gdev->owner = THIS_MODULE;

        ret = gpiochip_get_ngpios(gc, &gdev->dev);
        if (ret)
                goto err_free_dev_name;

        gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
        if (!gdev->descs) {
                ret = -ENOMEM;
                goto err_free_dev_name;
        }

        gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
        if (!gdev->label) {
                ret = -ENOMEM;
                goto err_free_descs;
        }

        gdev->ngpio = gc->ngpio;
        gdev->can_sleep = gc->can_sleep;

        scoped_guard(mutex, &gpio_devices_lock) {
                /*
                 * TODO: this allocates a Linux GPIO number base in the global
                 * GPIO numberspace for this chip. In the long run we want to
                 * get *rid* of this numberspace and use only descriptors, but
                 * it may be a pipe dream. It will not happen before we get rid
                 * of the sysfs interface anyways.
                 */
                base = gc->base;
                if (base < 0) {
                        base = gpiochip_find_base_unlocked(gc->ngpio);
                        if (base < 0) {
                                ret = base;
                                base = 0;
                                goto err_free_label;
                        }

                        /*
                         * TODO: it should not be necessary to reflect the
                         * assigned base outside of the GPIO subsystem. Go over
                         * drivers and see if anyone makes use of this, else
                         * drop this and assign a poison instead.
                         */
                        gc->base = base;
                } else {
                        dev_warn(&gdev->dev,
                                 "Static allocation of GPIO base is deprecated, use dynamic allocation.\n");
                }

                gdev->base = base;

                ret = gpiodev_add_to_list_unlocked(gdev);
                if (ret) {
                        chip_err(gc, "GPIO integer space overlap, cannot add chip\n");
                        goto err_free_label;
                }
        }

        for (desc_index = 0; desc_index < gc->ngpio; desc_index++)
                gdev->descs[desc_index].gdev = gdev;

        BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
        BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);

        ret = init_srcu_struct(&gdev->srcu);
        if (ret)
                goto err_remove_from_list;

#ifdef CONFIG_PINCTRL
        INIT_LIST_HEAD(&gdev->pin_ranges);
#endif

        if (gc->names) {
                ret = gpiochip_set_desc_names(gc);
                if (ret)
                        goto err_cleanup_gdev_srcu;
        }
        ret = gpiochip_set_names(gc);
        if (ret)
                goto err_cleanup_gdev_srcu;

        ret = gpiochip_init_valid_mask(gc);
        if (ret)
                goto err_cleanup_gdev_srcu;

        for (desc_index = 0; desc_index < gc->ngpio; desc_index++) {
                struct gpio_desc *desc = &gdev->descs[desc_index];

                ret = init_srcu_struct(&desc->srcu);
                if (ret)
                        goto err_cleanup_desc_srcu;

                if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) {
                        assign_bit(FLAG_IS_OUT,
                                   &desc->flags, !gc->get_direction(gc, desc_index));
                } else {
                        assign_bit(FLAG_IS_OUT,
                                   &desc->flags, !gc->direction_input);
                }
        }

        ret = of_gpiochip_add(gc);
        if (ret)
                goto err_cleanup_desc_srcu;

        ret = gpiochip_add_pin_ranges(gc);
        if (ret)
                goto err_remove_of_chip;

        acpi_gpiochip_add(gc);

        machine_gpiochip_add(gc);

        ret = gpiochip_irqchip_init_valid_mask(gc);
        if (ret)
                goto err_free_hogs;

        ret = gpiochip_irqchip_init_hw(gc);
        if (ret)
                goto err_remove_irqchip_mask;

        ret = gpiochip_add_irqchip(gc, lock_key, request_key);
        if (ret)
                goto err_remove_irqchip_mask;

        /*
         * By first adding the chardev, and then adding the device,
         * we get a device node entry in sysfs under
         * /sys/bus/gpio/devices/gpiochipN/dev that can be used for
         * coldplug of device nodes and other udev business.
         * We can do this only if gpiolib has been initialized.
         * Otherwise, defer until later.
         */
        if (gpiolib_initialized) {
                ret = gpiochip_setup_dev(gdev);
                if (ret)
                        goto err_remove_irqchip;
        }
        return 0;

err_remove_irqchip:
        gpiochip_irqchip_remove(gc);
err_remove_irqchip_mask:
        gpiochip_irqchip_free_valid_mask(gc);
err_free_hogs:
        gpiochip_free_hogs(gc);
        acpi_gpiochip_remove(gc);
        gpiochip_remove_pin_ranges(gc);
err_remove_of_chip:
        of_gpiochip_remove(gc);
err_cleanup_desc_srcu:
        while (desc_index--)
                cleanup_srcu_struct(&gdev->descs[desc_index].srcu);
        gpiochip_free_valid_mask(gc);
err_cleanup_gdev_srcu:
        cleanup_srcu_struct(&gdev->srcu);
err_remove_from_list:
        scoped_guard(mutex, &gpio_devices_lock)
                list_del_rcu(&gdev->list);
        synchronize_srcu(&gpio_devices_srcu);
        if (gdev->dev.release) {
                /* release() has been registered by gpiochip_setup_dev() */
                gpio_device_put(gdev);
                goto err_print_message;
        }
err_free_label:
        kfree_const(gdev->label);
err_free_descs:
        kfree(gdev->descs);
err_free_dev_name:
        kfree(dev_name(&gdev->dev));
err_free_ida:
        ida_free(&gpio_ida, gdev->id);
err_free_gdev:
        kfree(gdev);
err_print_message:
        /* failures here can mean systems won't boot... */
        if (ret != -EPROBE_DEFER) {
                pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
                       base, base + (int)gc->ngpio - 1,
                       gc->label ? : "generic", ret);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key);

/**
 * gpiochip_remove() - unregister a gpio_chip
 * @gc: the chip to unregister
 *
 * A gpio_chip with any GPIOs still requested may not be removed.
 */
void gpiochip_remove(struct gpio_chip *gc)
{
        struct gpio_device *gdev = gc->gpiodev;

        /* FIXME: should the legacy sysfs handling be moved to gpio_device? */
        gpiochip_sysfs_unregister(gdev);
        gpiochip_free_hogs(gc);

        scoped_guard(mutex, &gpio_devices_lock)
                list_del_rcu(&gdev->list);
        synchronize_srcu(&gpio_devices_srcu);

        /* Numb the device, cancelling all outstanding operations */
        rcu_assign_pointer(gdev->chip, NULL);
        synchronize_srcu(&gdev->srcu);
        gpiochip_irqchip_remove(gc);
        acpi_gpiochip_remove(gc);
        of_gpiochip_remove(gc);
        gpiochip_remove_pin_ranges(gc);
        gpiochip_free_valid_mask(gc);
        /*
         * We accept no more calls into the driver from this point, so
         * NULL the driver data pointer.
         */
        gpiochip_set_data(gc, NULL);

        /*
         * The gpiochip side puts its use of the device to rest here:
         * if there are no userspace clients, the chardev and device will
         * be removed, else it will be dangling until the last user is
         * gone.
         */
        gcdev_unregister(gdev);
        gpio_device_put(gdev);
}
EXPORT_SYMBOL_GPL(gpiochip_remove);

/**
 * gpio_device_find() - find a specific GPIO device
 * @data: data to pass to match function
 * @match: Callback function to check gpio_chip
 *
 * Returns:
 * New reference to struct gpio_device.
 *
 * Similar to bus_find_device(). It returns a reference to a gpio_device as
 * determined by a user supplied @match callback. The callback should return
 * 0 if the device doesn't match and non-zero if it does. If the callback
 * returns non-zero, this function will return to the caller and not iterate
 * over any more gpio_devices.
 *
 * The callback takes the GPIO chip structure as argument. During the execution
 * of the callback function the chip is protected from being freed. TODO: This
 * actually has yet to be implemented.
 *
 * If the function returns non-NULL, the returned reference must be freed by
 * the caller using gpio_device_put().
 */
struct gpio_device *gpio_device_find(const void *data,
                                     int (*match)(struct gpio_chip *gc,
                                                  const void *data))
{
        struct gpio_device *gdev;
        struct gpio_chip *gc;

        /*
         * Not yet but in the future the spinlock below will become a mutex.
         * Annotate this function before anyone tries to use it in interrupt
         * context like it happened with gpiochip_find().
         */
        might_sleep();

        guard(srcu)(&gpio_devices_srcu);

        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 srcu_read_lock_held(&gpio_devices_srcu)) {
                if (!device_is_registered(&gdev->dev))
                        continue;

                guard(srcu)(&gdev->srcu);

                gc = srcu_dereference(gdev->chip, &gdev->srcu);

                if (gc && match(gc, data))
                        return gpio_device_get(gdev);
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(gpio_device_find);

static int gpio_chip_match_by_label(struct gpio_chip *gc, const void *label)
{
        return gc->label && !strcmp(gc->label, label);
}

/**
 * gpio_device_find_by_label() - wrapper around gpio_device_find() finding the
 *                               GPIO device by its backing chip's label
 * @label: Label to lookup
 *
 * Returns:
 * Reference to the GPIO device or NULL. Reference must be released with
 * gpio_device_put().
 */
struct gpio_device *gpio_device_find_by_label(const char *label)
{
        return gpio_device_find((void *)label, gpio_chip_match_by_label);
}
EXPORT_SYMBOL_GPL(gpio_device_find_by_label);

static int gpio_chip_match_by_fwnode(struct gpio_chip *gc, const void *fwnode)
{
        return device_match_fwnode(&gc->gpiodev->dev, fwnode);
}

/**
 * gpio_device_find_by_fwnode() - wrapper around gpio_device_find() finding
 *                                the GPIO device by its fwnode
 * @fwnode: Firmware node to lookup
 *
 * Returns:
 * Reference to the GPIO device or NULL. Reference must be released with
 * gpio_device_put().
 */
struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode)
{
        return gpio_device_find((void *)fwnode, gpio_chip_match_by_fwnode);
}
EXPORT_SYMBOL_GPL(gpio_device_find_by_fwnode);

/**
 * gpio_device_get() - Increase the reference count of this GPIO device
 * @gdev: GPIO device to increase the refcount for
 *
 * Returns:
 * Pointer to @gdev.
 */
struct gpio_device *gpio_device_get(struct gpio_device *gdev)
{
        return to_gpio_device(get_device(&gdev->dev));
}
EXPORT_SYMBOL_GPL(gpio_device_get);

/**
 * gpio_device_put() - Decrease the reference count of this GPIO device and
 *                     possibly free all resources associated with it.
 * @gdev: GPIO device to decrease the reference count for
 */
void gpio_device_put(struct gpio_device *gdev)
{
        put_device(&gdev->dev);
}
EXPORT_SYMBOL_GPL(gpio_device_put);

/**
 * gpio_device_to_device() - Retrieve the address of the underlying struct
 *                           device.
 * @gdev: GPIO device for which to return the address.
 *
 * This does not increase the reference count of the GPIO device nor the
 * underlying struct device.
 *
 * Returns:
 * Address of struct device backing this GPIO device.
 */
struct device *gpio_device_to_device(struct gpio_device *gdev)
{
        return &gdev->dev;
}
EXPORT_SYMBOL_GPL(gpio_device_to_device);

#ifdef CONFIG_GPIOLIB_IRQCHIP

/*
 * The following is irqchip helper code for gpiochips.
 */

static int gpiochip_irqchip_init_hw(struct gpio_chip *gc)
{
        struct gpio_irq_chip *girq = &gc->irq;

        if (!girq->init_hw)
                return 0;

        return girq->init_hw(gc);
}

static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
{
        struct gpio_irq_chip *girq = &gc->irq;

        if (!girq->init_valid_mask)
                return 0;

        girq->valid_mask = gpiochip_allocate_mask(gc);
        if (!girq->valid_mask)
                return -ENOMEM;

        girq->init_valid_mask(gc, girq->valid_mask, gc->ngpio);

        return 0;
}

static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc)
{
        gpiochip_free_mask(&gc->irq.valid_mask);
}

static bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc,
                                       unsigned int offset)
{
        if (!gpiochip_line_is_valid(gc, offset))
                return false;
        /* No mask means all valid */
        if (likely(!gc->irq.valid_mask))
                return true;
        return test_bit(offset, gc->irq.valid_mask);
}

#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY

/**
 * gpiochip_set_hierarchical_irqchip() - connects a hierarchical irqchip
 * to a gpiochip
 * @gc: the gpiochip to set the irqchip hierarchical handler to
 * @irqchip: the irqchip to handle this level of the hierarchy, the interrupt
 * will then percolate up to the parent
 */
static void gpiochip_set_hierarchical_irqchip(struct gpio_chip *gc,
                                              struct irq_chip *irqchip)
{
        /* DT will deal with mapping each IRQ as we go along */
        if (is_of_node(gc->irq.fwnode))
                return;

        /*
         * This is for legacy and boardfile "irqchip" fwnodes: allocate
         * irqs upfront instead of dynamically since we don't have the
         * dynamic type of allocation that hardware description languages
         * provide. Once all GPIO drivers using board files are gone from
         * the kernel we can delete this code, but for a transitional period
         * it is necessary to keep this around.
         */
        if (is_fwnode_irqchip(gc->irq.fwnode)) {
                int i;
                int ret;

                for (i = 0; i < gc->ngpio; i++) {
                        struct irq_fwspec fwspec;
                        unsigned int parent_hwirq;
                        unsigned int parent_type;
                        struct gpio_irq_chip *girq = &gc->irq;

                        /*
                         * We call the child to parent translation function
                         * only to check if the child IRQ is valid or not.
                         * Just pick the rising edge type here as that is what
                         * we likely need to support.
                         */
                        ret = girq->child_to_parent_hwirq(gc, i,
                                                          IRQ_TYPE_EDGE_RISING,
                                                          &parent_hwirq,
                                                          &parent_type);
                        if (ret) {
                                chip_err(gc, "skip set-up on hwirq %d\n",
                                         i);
                                continue;
                        }

                        fwspec.fwnode = gc->irq.fwnode;
                        /* This is the hwirq for the GPIO line side of things */
                        fwspec.param[0] = girq->child_offset_to_irq(gc, i);
                        /* Just pick something */
                        fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
                        fwspec.param_count = 2;
                        ret = irq_domain_alloc_irqs(gc->irq.domain, 1,
                                                    NUMA_NO_NODE, &fwspec);
                        if (ret < 0) {
                                chip_err(gc,
                                         "can not allocate irq for GPIO line %d parent hwirq %d in hierarchy domain: %d\n",
                                         i, parent_hwirq,
                                         ret);
                        }
                }
        }

        chip_err(gc, "%s unknown fwnode type proceed anyway\n", __func__);

        return;
}

static int gpiochip_hierarchy_irq_domain_translate(struct irq_domain *d,
                                                   struct irq_fwspec *fwspec,
                                                   unsigned long *hwirq,
                                                   unsigned int *type)
{
        /* We support standard DT translation */
        if (is_of_node(fwspec->fwnode) && fwspec->param_count == 2) {
                return irq_domain_translate_twocell(d, fwspec, hwirq, type);
        }

        /* This is for board files and others not using DT */
        if (is_fwnode_irqchip(fwspec->fwnode)) {
                int ret;

                ret = irq_domain_translate_twocell(d, fwspec, hwirq, type);
                if (ret)
                        return ret;
                WARN_ON(*type == IRQ_TYPE_NONE);
                return 0;
        }
        return -EINVAL;
}

static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
                                               unsigned int irq,
                                               unsigned int nr_irqs,
                                               void *data)
{
        struct gpio_chip *gc = d->host_data;
        irq_hw_number_t hwirq;
        unsigned int type = IRQ_TYPE_NONE;
        struct irq_fwspec *fwspec = data;
        union gpio_irq_fwspec gpio_parent_fwspec = {};
        unsigned int parent_hwirq;
        unsigned int parent_type;
        struct gpio_irq_chip *girq = &gc->irq;
        int ret;

        /*
         * The nr_irqs parameter is always one except for PCI multi-MSI
         * so this should not happen.
         */
        WARN_ON(nr_irqs != 1);

        ret = gc->irq.child_irq_domain_ops.translate(d, fwspec, &hwirq, &type);
        if (ret)
                return ret;

        chip_dbg(gc, "allocate IRQ %d, hwirq %lu\n", irq, hwirq);

        ret = girq->child_to_parent_hwirq(gc, hwirq, type,
                                          &parent_hwirq, &parent_type);
        if (ret) {
                chip_err(gc, "can't look up hwirq %lu\n", hwirq);
                return ret;
        }
        chip_dbg(gc, "found parent hwirq %u\n", parent_hwirq);

        /*
         * We set handle_bad_irq because the .set_type() should
         * always be invoked and set the right type of handler.
         */
        irq_domain_set_info(d,
                            irq,
                            hwirq,
                            gc->irq.chip,
                            gc,
                            girq->handler,
                            NULL, NULL);
        irq_set_probe(irq);

        /* This parent only handles asserted level IRQs */
        ret = girq->populate_parent_alloc_arg(gc, &gpio_parent_fwspec,
                                              parent_hwirq, parent_type);
        if (ret)
                return ret;

        chip_dbg(gc, "alloc_irqs_parent for %d parent hwirq %d\n",
                  irq, parent_hwirq);
        irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key);
        ret = irq_domain_alloc_irqs_parent(d, irq, 1, &gpio_parent_fwspec);
        /*
         * If the parent irqdomain is msi, the interrupts have already
         * been allocated, so the EEXIST is good.
         */
        if (irq_domain_is_msi(d->parent) && (ret == -EEXIST))
                ret = 0;
        if (ret)
                chip_err(gc,
                         "failed to allocate parent hwirq %d for hwirq %lu\n",
                         parent_hwirq, hwirq);

        return ret;
}

static unsigned int gpiochip_child_offset_to_irq_noop(struct gpio_chip *gc,
                                                      unsigned int offset)
{
        return offset;
}

/**
 * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ
 * @domain: The IRQ domain used by this IRQ chip
 * @data: Outermost irq_data associated with the IRQ
 * @reserve: If set, only reserve an interrupt vector instead of assigning one
 *
 * This function is a wrapper that calls gpiochip_lock_as_irq() and is to be
 * used as the activate function for the &struct irq_domain_ops. The host_data
 * for the IRQ domain must be the &struct gpio_chip.
 */
static int gpiochip_irq_domain_activate(struct irq_domain *domain,
                                        struct irq_data *data, bool reserve)
{
        struct gpio_chip *gc = domain->host_data;
        unsigned int hwirq = irqd_to_hwirq(data);

        return gpiochip_lock_as_irq(gc, hwirq);
}

/**
 * gpiochip_irq_domain_deactivate() - Unlock a GPIO used as an IRQ
 * @domain: The IRQ domain used by this IRQ chip
 * @data: Outermost irq_data associated with the IRQ
 *
 * This function is a wrapper that will call gpiochip_unlock_as_irq() and is to
 * be used as the deactivate function for the &struct irq_domain_ops. The
 * host_data for the IRQ domain must be the &struct gpio_chip.
 */
static void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
                                           struct irq_data *data)
{
        struct gpio_chip *gc = domain->host_data;
        unsigned int hwirq = irqd_to_hwirq(data);

        return gpiochip_unlock_as_irq(gc, hwirq);
}

static void gpiochip_hierarchy_setup_domain_ops(struct irq_domain_ops *ops)
{
        ops->activate = gpiochip_irq_domain_activate;
        ops->deactivate = gpiochip_irq_domain_deactivate;
        ops->alloc = gpiochip_hierarchy_irq_domain_alloc;

        /*
         * We only allow overriding the translate() and free() functions for
         * hierarchical chips, and this should only be done if the user
         * really need something other than 1:1 translation for translate()
         * callback and free if user wants to free up any resources which
         * were allocated during callbacks, for example populate_parent_alloc_arg.
         */
        if (!ops->translate)
                ops->translate = gpiochip_hierarchy_irq_domain_translate;
        if (!ops->free)
                ops->free = irq_domain_free_irqs_common;
}

static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc)
{
        struct irq_domain *domain;

        if (!gc->irq.child_to_parent_hwirq ||
            !gc->irq.fwnode) {
                chip_err(gc, "missing irqdomain vital data\n");
                return ERR_PTR(-EINVAL);
        }

        if (!gc->irq.child_offset_to_irq)
                gc->irq.child_offset_to_irq = gpiochip_child_offset_to_irq_noop;

        if (!gc->irq.populate_parent_alloc_arg)
                gc->irq.populate_parent_alloc_arg =
                        gpiochip_populate_parent_fwspec_twocell;

        gpiochip_hierarchy_setup_domain_ops(&gc->irq.child_irq_domain_ops);

        domain = irq_domain_create_hierarchy(
                gc->irq.parent_domain,
                0,
                gc->ngpio,
                gc->irq.fwnode,
                &gc->irq.child_irq_domain_ops,
                gc);

        if (!domain)
                return ERR_PTR(-ENOMEM);

        gpiochip_set_hierarchical_irqchip(gc, gc->irq.chip);

        return domain;
}

static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
{
        return !!gc->irq.parent_domain;
}

int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc,
                                            union gpio_irq_fwspec *gfwspec,
                                            unsigned int parent_hwirq,
                                            unsigned int parent_type)
{
        struct irq_fwspec *fwspec = &gfwspec->fwspec;

        fwspec->fwnode = gc->irq.parent_domain->fwnode;
        fwspec->param_count = 2;
        fwspec->param[0] = parent_hwirq;
        fwspec->param[1] = parent_type;

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_twocell);

int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc,
                                             union gpio_irq_fwspec *gfwspec,
                                             unsigned int parent_hwirq,
                                             unsigned int parent_type)
{
        struct irq_fwspec *fwspec = &gfwspec->fwspec;

        fwspec->fwnode = gc->irq.parent_domain->fwnode;
        fwspec->param_count = 4;
        fwspec->param[0] = 0;
        fwspec->param[1] = parent_hwirq;
        fwspec->param[2] = 0;
        fwspec->param[3] = parent_type;

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell);

#else

static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc)
{
        return ERR_PTR(-EINVAL);
}

static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
{
        return false;
}

#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */

/**
 * gpiochip_irq_map() - maps an IRQ into a GPIO irqchip
 * @d: the irqdomain used by this irqchip
 * @irq: the global irq number used by this GPIO irqchip irq
 * @hwirq: the local IRQ/GPIO line offset on this gpiochip
 *
 * This function will set up the mapping for a certain IRQ line on a
 * gpiochip by assigning the gpiochip as chip data, and using the irqchip
 * stored inside the gpiochip.
 */
static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
                            irq_hw_number_t hwirq)
{
        struct gpio_chip *gc = d->host_data;
        int ret = 0;

        if (!gpiochip_irqchip_irq_valid(gc, hwirq))
                return -ENXIO;

        irq_set_chip_data(irq, gc);
        /*
         * This lock class tells lockdep that GPIO irqs are in a different
         * category than their parents, so it won't report false recursion.
         */
        irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key);
        irq_set_chip_and_handler(irq, gc->irq.chip, gc->irq.handler);
        /* Chips that use nested thread handlers have them marked */
        if (gc->irq.threaded)
                irq_set_nested_thread(irq, 1);
        irq_set_noprobe(irq);

        if (gc->irq.num_parents == 1)
                ret = irq_set_parent(irq, gc->irq.parents[0]);
        else if (gc->irq.map)
                ret = irq_set_parent(irq, gc->irq.map[hwirq]);

        if (ret < 0)
                return ret;

        /*
         * No set-up of the hardware will happen if IRQ_TYPE_NONE
         * is passed as default type.
         */
        if (gc->irq.default_type != IRQ_TYPE_NONE)
                irq_set_irq_type(irq, gc->irq.default_type);

        return 0;
}

static void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq)
{
        struct gpio_chip *gc = d->host_data;

        if (gc->irq.threaded)
                irq_set_nested_thread(irq, 0);
        irq_set_chip_and_handler(irq, NULL, NULL);
        irq_set_chip_data(irq, NULL);
}

static const struct irq_domain_ops gpiochip_domain_ops = {
        .map        = gpiochip_irq_map,
        .unmap        = gpiochip_irq_unmap,
        /* Virtually all GPIO irqchips are twocell:ed */
        .xlate        = irq_domain_xlate_twocell,
};

static struct irq_domain *gpiochip_simple_create_domain(struct gpio_chip *gc)
{
        struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev);
        struct irq_domain *domain;

        domain = irq_domain_create_simple(fwnode, gc->ngpio, gc->irq.first,
                                          &gpiochip_domain_ops, gc);
        if (!domain)
                return ERR_PTR(-EINVAL);

        return domain;
}

static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
{
        struct irq_domain *domain = gc->irq.domain;

#ifdef CONFIG_GPIOLIB_IRQCHIP
        /*
         * Avoid race condition with other code, which tries to lookup
         * an IRQ before the irqchip has been properly registered,
         * i.e. while gpiochip is still being brought up.
         */
        if (!gc->irq.initialized)
                return -EPROBE_DEFER;
#endif

        if (!gpiochip_irqchip_irq_valid(gc, offset))
                return -ENXIO;

#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
        if (irq_domain_is_hierarchy(domain)) {
                struct irq_fwspec spec;

                spec.fwnode = domain->fwnode;
                spec.param_count = 2;
                spec.param[0] = gc->irq.child_offset_to_irq(gc, offset);
                spec.param[1] = IRQ_TYPE_NONE;

                return irq_create_fwspec_mapping(&spec);
        }
#endif

        return irq_create_mapping(domain, offset);
}

int gpiochip_irq_reqres(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        return gpiochip_reqres_irq(gc, hwirq);
}
EXPORT_SYMBOL(gpiochip_irq_reqres);

void gpiochip_irq_relres(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        gpiochip_relres_irq(gc, hwirq);
}
EXPORT_SYMBOL(gpiochip_irq_relres);

static void gpiochip_irq_mask(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        if (gc->irq.irq_mask)
                gc->irq.irq_mask(d);
        gpiochip_disable_irq(gc, hwirq);
}

static void gpiochip_irq_unmask(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        gpiochip_enable_irq(gc, hwirq);
        if (gc->irq.irq_unmask)
                gc->irq.irq_unmask(d);
}

static void gpiochip_irq_enable(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        gpiochip_enable_irq(gc, hwirq);
        gc->irq.irq_enable(d);
}

static void gpiochip_irq_disable(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        unsigned int hwirq = irqd_to_hwirq(d);

        gc->irq.irq_disable(d);
        gpiochip_disable_irq(gc, hwirq);
}

static void gpiochip_set_irq_hooks(struct gpio_chip *gc)
{
        struct irq_chip *irqchip = gc->irq.chip;

        if (irqchip->flags & IRQCHIP_IMMUTABLE)
                return;

        chip_warn(gc, "not an immutable chip, please consider fixing it!\n");

        if (!irqchip->irq_request_resources &&
            !irqchip->irq_release_resources) {
                irqchip->irq_request_resources = gpiochip_irq_reqres;
                irqchip->irq_release_resources = gpiochip_irq_relres;
        }
        if (WARN_ON(gc->irq.irq_enable))
                return;
        /* Check if the irqchip already has this hook... */
        if (irqchip->irq_enable == gpiochip_irq_enable ||
                irqchip->irq_mask == gpiochip_irq_mask) {
                /*
                 * ...and if so, give a gentle warning that this is bad
                 * practice.
                 */
                chip_info(gc,
                          "detected irqchip that is shared with multiple gpiochips: please fix the driver.\n");
                return;
        }

        if (irqchip->irq_disable) {
                gc->irq.irq_disable = irqchip->irq_disable;
                irqchip->irq_disable = gpiochip_irq_disable;
        } else {
                gc->irq.irq_mask = irqchip->irq_mask;
                irqchip->irq_mask = gpiochip_irq_mask;
        }

        if (irqchip->irq_enable) {
                gc->irq.irq_enable = irqchip->irq_enable;
                irqchip->irq_enable = gpiochip_irq_enable;
        } else {
                gc->irq.irq_unmask = irqchip->irq_unmask;
                irqchip->irq_unmask = gpiochip_irq_unmask;
        }
}

static int gpiochip_irqchip_add_allocated_domain(struct gpio_chip *gc,
                                                 struct irq_domain *domain,
                                                 bool allocated_externally)
{
        if (!domain)
                return -EINVAL;

        if (gc->to_irq)
                chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__);

        gc->to_irq = gpiochip_to_irq;
        gc->irq.domain = domain;
        gc->irq.domain_is_allocated_externally = allocated_externally;

        /*
         * Using barrier() here to prevent compiler from reordering
         * gc->irq.initialized before adding irqdomain.
         */
        barrier();

        gc->irq.initialized = true;

        return 0;
}

/**
 * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
 * @gc: the GPIO chip to add the IRQ chip to
 * @lock_key: lockdep class for IRQ lock
 * @request_key: lockdep class for IRQ request
 */
static int gpiochip_add_irqchip(struct gpio_chip *gc,
                                struct lock_class_key *lock_key,
                                struct lock_class_key *request_key)
{
        struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev);
        struct irq_chip *irqchip = gc->irq.chip;
        struct irq_domain *domain;
        unsigned int type;
        unsigned int i;
        int ret;

        if (!irqchip)
                return 0;

        if (gc->irq.parent_handler && gc->can_sleep) {
                chip_err(gc, "you cannot have chained interrupts on a chip that may sleep\n");
                return -EINVAL;
        }

        type = gc->irq.default_type;

        /*
         * Specifying a default trigger is a terrible idea if DT or ACPI is
         * used to configure the interrupts, as you may end up with
         * conflicting triggers. Tell the user, and reset to NONE.
         */
        if (WARN(fwnode && type != IRQ_TYPE_NONE,
                 "%pfw: Ignoring %u default trigger\n", fwnode, type))
                type = IRQ_TYPE_NONE;

        gc->irq.default_type = type;
        gc->irq.lock_key = lock_key;
        gc->irq.request_key = request_key;

        /* If a parent irqdomain is provided, let's build a hierarchy */
        if (gpiochip_hierarchy_is_hierarchical(gc)) {
                domain = gpiochip_hierarchy_create_domain(gc);
        } else {
                domain = gpiochip_simple_create_domain(gc);
        }
        if (IS_ERR(domain))
                return PTR_ERR(domain);

        if (gc->irq.parent_handler) {
                for (i = 0; i < gc->irq.num_parents; i++) {
                        void *data;

                        if (gc->irq.per_parent_data)
                                data = gc->irq.parent_handler_data_array[i];
                        else
                                data = gc->irq.parent_handler_data ?: gc;

                        /*
                         * The parent IRQ chip is already using the chip_data
                         * for this IRQ chip, so our callbacks simply use the
                         * handler_data.
                         */
                        irq_set_chained_handler_and_data(gc->irq.parents[i],
                                                         gc->irq.parent_handler,
                                                         data);
                }
        }

        gpiochip_set_irq_hooks(gc);

        ret = gpiochip_irqchip_add_allocated_domain(gc, domain, false);
        if (ret)
                return ret;

        acpi_gpiochip_request_interrupts(gc);

        return 0;
}

/**
 * gpiochip_irqchip_remove() - removes an irqchip added to a gpiochip
 * @gc: the gpiochip to remove the irqchip from
 *
 * This is called only from gpiochip_remove()
 */
static void gpiochip_irqchip_remove(struct gpio_chip *gc)
{
        struct irq_chip *irqchip = gc->irq.chip;
        unsigned int offset;

        acpi_gpiochip_free_interrupts(gc);

        if (irqchip && gc->irq.parent_handler) {
                struct gpio_irq_chip *irq = &gc->irq;
                unsigned int i;

                for (i = 0; i < irq->num_parents; i++)
                        irq_set_chained_handler_and_data(irq->parents[i],
                                                         NULL, NULL);
        }

        /* Remove all IRQ mappings and delete the domain */
        if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) {
                unsigned int irq;

                for (offset = 0; offset < gc->ngpio; offset++) {
                        if (!gpiochip_irqchip_irq_valid(gc, offset))
                                continue;

                        irq = irq_find_mapping(gc->irq.domain, offset);
                        irq_dispose_mapping(irq);
                }

                irq_domain_remove(gc->irq.domain);
        }

        if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) {
                if (irqchip->irq_request_resources == gpiochip_irq_reqres) {
                        irqchip->irq_request_resources = NULL;
                        irqchip->irq_release_resources = NULL;
                }
                if (irqchip->irq_enable == gpiochip_irq_enable) {
                        irqchip->irq_enable = gc->irq.irq_enable;
                        irqchip->irq_disable = gc->irq.irq_disable;
                }
        }
        gc->irq.irq_enable = NULL;
        gc->irq.irq_disable = NULL;
        gc->irq.chip = NULL;

        gpiochip_irqchip_free_valid_mask(gc);
}

/**
 * gpiochip_irqchip_add_domain() - adds an irqdomain to a gpiochip
 * @gc: the gpiochip to add the irqchip to
 * @domain: the irqdomain to add to the gpiochip
 *
 * This function adds an IRQ domain to the gpiochip.
 */
int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
                                struct irq_domain *domain)
{
        return gpiochip_irqchip_add_allocated_domain(gc, domain, true);
}
EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain);

#else /* CONFIG_GPIOLIB_IRQCHIP */

static inline int gpiochip_add_irqchip(struct gpio_chip *gc,
                                       struct lock_class_key *lock_key,
                                       struct lock_class_key *request_key)
{
        return 0;
}
static void gpiochip_irqchip_remove(struct gpio_chip *gc) {}

static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gc)
{
        return 0;
}

static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
{
        return 0;
}
static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc)
{ }

#endif /* CONFIG_GPIOLIB_IRQCHIP */

/**
 * gpiochip_generic_request() - request the gpio function for a pin
 * @gc: the gpiochip owning the GPIO
 * @offset: the offset of the GPIO to request for GPIO function
 */
int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset)
{
#ifdef CONFIG_PINCTRL
        if (list_empty(&gc->gpiodev->pin_ranges))
                return 0;
#endif

        return pinctrl_gpio_request(gc, offset);
}
EXPORT_SYMBOL_GPL(gpiochip_generic_request);

/**
 * gpiochip_generic_free() - free the gpio function from a pin
 * @gc: the gpiochip to request the gpio function for
 * @offset: the offset of the GPIO to free from GPIO function
 */
void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset)
{
#ifdef CONFIG_PINCTRL
        if (list_empty(&gc->gpiodev->pin_ranges))
                return;
#endif

        pinctrl_gpio_free(gc, offset);
}
EXPORT_SYMBOL_GPL(gpiochip_generic_free);

/**
 * gpiochip_generic_config() - apply configuration for a pin
 * @gc: the gpiochip owning the GPIO
 * @offset: the offset of the GPIO to apply the configuration
 * @config: the configuration to be applied
 */
int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
                            unsigned long config)
{
#ifdef CONFIG_PINCTRL
        if (list_empty(&gc->gpiodev->pin_ranges))
                return -ENOTSUPP;
#endif

        return pinctrl_gpio_set_config(gc, offset, config);
}
EXPORT_SYMBOL_GPL(gpiochip_generic_config);

#ifdef CONFIG_PINCTRL

/**
 * gpiochip_add_pingroup_range() - add a range for GPIO <-> pin mapping
 * @gc: the gpiochip to add the range for
 * @pctldev: the pin controller to map to
 * @gpio_offset: the start offset in the current gpio_chip number space
 * @pin_group: name of the pin group inside the pin controller
 *
 * Calling this function directly from a DeviceTree-supported
 * pinctrl driver is DEPRECATED. Please see Section 2.1 of
 * Documentation/devicetree/bindings/gpio/gpio.txt on how to
 * bind pinctrl and gpio drivers via the "gpio-ranges" property.
 */
int gpiochip_add_pingroup_range(struct gpio_chip *gc,
                        struct pinctrl_dev *pctldev,
                        unsigned int gpio_offset, const char *pin_group)
{
        struct gpio_pin_range *pin_range;
        struct gpio_device *gdev = gc->gpiodev;
        int ret;

        pin_range = kzalloc(sizeof(*pin_range), GFP_KERNEL);
        if (!pin_range) {
                chip_err(gc, "failed to allocate pin ranges\n");
                return -ENOMEM;
        }

        /* Use local offset as range ID */
        pin_range->range.id = gpio_offset;
        pin_range->range.gc = gc;
        pin_range->range.name = gc->label;
        pin_range->range.base = gdev->base + gpio_offset;
        pin_range->pctldev = pctldev;

        ret = pinctrl_get_group_pins(pctldev, pin_group,
                                        &pin_range->range.pins,
                                        &pin_range->range.npins);
        if (ret < 0) {
                kfree(pin_range);
                return ret;
        }

        pinctrl_add_gpio_range(pctldev, &pin_range->range);

        chip_dbg(gc, "created GPIO range %d->%d ==> %s PINGRP %s\n",
                 gpio_offset, gpio_offset + pin_range->range.npins - 1,
                 pinctrl_dev_get_devname(pctldev), pin_group);

        list_add_tail(&pin_range->node, &gdev->pin_ranges);

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_add_pingroup_range);

/**
 * gpiochip_add_pin_range() - add a range for GPIO <-> pin mapping
 * @gc: the gpiochip to add the range for
 * @pinctl_name: the dev_name() of the pin controller to map to
 * @gpio_offset: the start offset in the current gpio_chip number space
 * @pin_offset: the start offset in the pin controller number space
 * @npins: the number of pins from the offset of each pin space (GPIO and
 *        pin controller) to accumulate in this range
 *
 * Returns:
 * 0 on success, or a negative error-code on failure.
 *
 * Calling this function directly from a DeviceTree-supported
 * pinctrl driver is DEPRECATED. Please see Section 2.1 of
 * Documentation/devicetree/bindings/gpio/gpio.txt on how to
 * bind pinctrl and gpio drivers via the "gpio-ranges" property.
 */
int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name,
                           unsigned int gpio_offset, unsigned int pin_offset,
                           unsigned int npins)
{
        struct gpio_pin_range *pin_range;
        struct gpio_device *gdev = gc->gpiodev;
        int ret;

        pin_range = kzalloc(sizeof(*pin_range), GFP_KERNEL);
        if (!pin_range) {
                chip_err(gc, "failed to allocate pin ranges\n");
                return -ENOMEM;
        }

        /* Use local offset as range ID */
        pin_range->range.id = gpio_offset;
        pin_range->range.gc = gc;
        pin_range->range.name = gc->label;
        pin_range->range.base = gdev->base + gpio_offset;
        pin_range->range.pin_base = pin_offset;
        pin_range->range.npins = npins;
        pin_range->pctldev = pinctrl_find_and_add_gpio_range(pinctl_name,
                        &pin_range->range);
        if (IS_ERR(pin_range->pctldev)) {
                ret = PTR_ERR(pin_range->pctldev);
                chip_err(gc, "could not create pin range\n");
                kfree(pin_range);
                return ret;
        }
        chip_dbg(gc, "created GPIO range %d->%d ==> %s PIN %d->%d\n",
                 gpio_offset, gpio_offset + npins - 1,
                 pinctl_name,
                 pin_offset, pin_offset + npins - 1);

        list_add_tail(&pin_range->node, &gdev->pin_ranges);

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_add_pin_range);

/**
 * gpiochip_remove_pin_ranges() - remove all the GPIO <-> pin mappings
 * @gc: the chip to remove all the mappings for
 */
void gpiochip_remove_pin_ranges(struct gpio_chip *gc)
{
        struct gpio_pin_range *pin_range, *tmp;
        struct gpio_device *gdev = gc->gpiodev;

        list_for_each_entry_safe(pin_range, tmp, &gdev->pin_ranges, node) {
                list_del(&pin_range->node);
                pinctrl_remove_gpio_range(pin_range->pctldev,
                                &pin_range->range);
                kfree(pin_range);
        }
}
EXPORT_SYMBOL_GPL(gpiochip_remove_pin_ranges);

#endif /* CONFIG_PINCTRL */

/* These "optional" allocation calls help prevent drivers from stomping
 * on each other, and help provide better diagnostics in debugfs.
 * They're called even less than the "set direction" calls.
 */
static int gpiod_request_commit(struct gpio_desc *desc, const char *label)
{
        unsigned int offset;
        int ret;

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        if (test_and_set_bit(FLAG_REQUESTED, &desc->flags))
                return -EBUSY;

        /* NOTE:  gpio_request() can be called in early boot,
         * before IRQs are enabled, for non-sleeping (SOC) GPIOs.
         */

        if (guard.gc->request) {
                offset = gpio_chip_hwgpio(desc);
                if (gpiochip_line_is_valid(guard.gc, offset))
                        ret = guard.gc->request(guard.gc, offset);
                else
                        ret = -EINVAL;
                if (ret)
                        goto out_clear_bit;
        }

        if (guard.gc->get_direction)
                gpiod_get_direction(desc);

        ret = desc_set_label(desc, label ? : "?");
        if (ret)
                goto out_clear_bit;

        return 0;

out_clear_bit:
        clear_bit(FLAG_REQUESTED, &desc->flags);
        return ret;
}

/*
 * This descriptor validation needs to be inserted verbatim into each
 * function taking a descriptor, so we need to use a preprocessor
 * macro to avoid endless duplication. If the desc is NULL it is an
 * optional GPIO and calls should just bail out.
 */
static int validate_desc(const struct gpio_desc *desc, const char *func)
{
        if (!desc)
                return 0;

        if (IS_ERR(desc)) {
                pr_warn("%s: invalid GPIO (errorpointer)\n", func);
                return PTR_ERR(desc);
        }

        return 1;
}

#define VALIDATE_DESC(desc) do { \
        int __valid = validate_desc(desc, __func__); \
        if (__valid <= 0) \
                return __valid; \
        } while (0)

#define VALIDATE_DESC_VOID(desc) do { \
        int __valid = validate_desc(desc, __func__); \
        if (__valid <= 0) \
                return; \
        } while (0)

int gpiod_request(struct gpio_desc *desc, const char *label)
{
        int ret = -EPROBE_DEFER;

        VALIDATE_DESC(desc);

        if (try_module_get(desc->gdev->owner)) {
                ret = gpiod_request_commit(desc, label);
                if (ret)
                        module_put(desc->gdev->owner);
                else
                        gpio_device_get(desc->gdev);
        }

        if (ret)
                gpiod_dbg(desc, "%s: status %d\n", __func__, ret);

        return ret;
}

static void gpiod_free_commit(struct gpio_desc *desc)
{
        unsigned long flags;

        might_sleep();

        CLASS(gpio_chip_guard, guard)(desc);

        flags = READ_ONCE(desc->flags);

        if (guard.gc && test_bit(FLAG_REQUESTED, &flags)) {
                if (guard.gc->free)
                        guard.gc->free(guard.gc, gpio_chip_hwgpio(desc));

                clear_bit(FLAG_ACTIVE_LOW, &flags);
                clear_bit(FLAG_REQUESTED, &flags);
                clear_bit(FLAG_OPEN_DRAIN, &flags);
                clear_bit(FLAG_OPEN_SOURCE, &flags);
                clear_bit(FLAG_PULL_UP, &flags);
                clear_bit(FLAG_PULL_DOWN, &flags);
                clear_bit(FLAG_BIAS_DISABLE, &flags);
                clear_bit(FLAG_EDGE_RISING, &flags);
                clear_bit(FLAG_EDGE_FALLING, &flags);
                clear_bit(FLAG_IS_HOGGED, &flags);
#ifdef CONFIG_OF_DYNAMIC
                WRITE_ONCE(desc->hog, NULL);
#endif
                desc_set_label(desc, NULL);
                WRITE_ONCE(desc->flags, flags);

                gpiod_line_state_notify(desc, GPIOLINE_CHANGED_RELEASED);
        }
}

void gpiod_free(struct gpio_desc *desc)
{
        VALIDATE_DESC_VOID(desc);

        gpiod_free_commit(desc);
        module_put(desc->gdev->owner);
        gpio_device_put(desc->gdev);
}

/**
 * gpiochip_dup_line_label - Get a copy of the consumer label.
 * @gc: GPIO chip controlling this line.
 * @offset: Hardware offset of the line.
 *
 * Returns:
 * Pointer to a copy of the consumer label if the line is requested or NULL
 * if it's not. If a valid pointer was returned, it must be freed using
 * kfree(). In case of a memory allocation error, the function returns %ENOMEM.
 *
 * Must not be called from atomic context.
 */
char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset)
{
        struct gpio_desc *desc;
        char *label;

        desc = gpiochip_get_desc(gc, offset);
        if (IS_ERR(desc))
                return NULL;

        if (!test_bit(FLAG_REQUESTED, &desc->flags))
                return NULL;

        guard(srcu)(&desc->srcu);

        label = kstrdup(gpiod_get_label(desc), GFP_KERNEL);
        if (!label)
                return ERR_PTR(-ENOMEM);

        return label;
}
EXPORT_SYMBOL_GPL(gpiochip_dup_line_label);

static inline const char *function_name_or_default(const char *con_id)
{
        return con_id ?: "(default)";
}

/**
 * gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor
 * @gc: GPIO chip
 * @hwnum: hardware number of the GPIO for which to request the descriptor
 * @label: label for the GPIO
 * @lflags: lookup flags for this GPIO or 0 if default, this can be used to
 * specify things like line inversion semantics with the machine flags
 * such as GPIO_OUT_LOW
 * @dflags: descriptor request flags for this GPIO or 0 if default, this
 * can be used to specify consumer semantics such as open drain
 *
 * Function allows GPIO chip drivers to request and use their own GPIO
 * descriptors via gpiolib API. Difference to gpiod_request() is that this
 * function will not increase reference count of the GPIO chip module. This
 * allows the GPIO chip module to be unloaded as needed (we assume that the
 * GPIO chip driver handles freeing the GPIOs it has requested).
 *
 * Returns:
 * A pointer to the GPIO descriptor, or an ERR_PTR()-encoded negative error
 * code on failure.
 */
struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
                                            unsigned int hwnum,
                                            const char *label,
                                            enum gpio_lookup_flags lflags,
                                            enum gpiod_flags dflags)
{
        struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum);
        const char *name = function_name_or_default(label);
        int ret;

        if (IS_ERR(desc)) {
                chip_err(gc, "failed to get GPIO %s descriptor\n", name);
                return desc;
        }

        ret = gpiod_request_commit(desc, label);
        if (ret < 0)
                return ERR_PTR(ret);

        ret = gpiod_configure_flags(desc, label, lflags, dflags);
        if (ret) {
                gpiod_free_commit(desc);
                chip_err(gc, "setup of own GPIO %s failed\n", name);
                return ERR_PTR(ret);
        }

        return desc;
}
EXPORT_SYMBOL_GPL(gpiochip_request_own_desc);

/**
 * gpiochip_free_own_desc - Free GPIO requested by the chip driver
 * @desc: GPIO descriptor to free
 *
 * Function frees the given GPIO requested previously with
 * gpiochip_request_own_desc().
 */
void gpiochip_free_own_desc(struct gpio_desc *desc)
{
        if (desc)
                gpiod_free_commit(desc);
}
EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);

/*
 * Drivers MUST set GPIO direction before making get/set calls.  In
 * some cases this is done in early boot, before IRQs are enabled.
 *
 * As a rule these aren't called more than once (except for drivers
 * using the open-drain emulation idiom) so these are natural places
 * to accumulate extra debugging checks.  Note that we can't (yet)
 * rely on gpio_request() having been called beforehand.
 */

static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset,
                              unsigned long config)
{
        if (!gc->set_config)
                return -ENOTSUPP;

        return gc->set_config(gc, offset, config);
}

static int gpio_set_config_with_argument(struct gpio_desc *desc,
                                         enum pin_config_param mode,
                                         u32 argument)
{
        unsigned long config;

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        config = pinconf_to_config_packed(mode, argument);
        return gpio_do_set_config(guard.gc, gpio_chip_hwgpio(desc), config);
}

static int gpio_set_config_with_argument_optional(struct gpio_desc *desc,
                                                  enum pin_config_param mode,
                                                  u32 argument)
{
        struct device *dev = &desc->gdev->dev;
        int gpio = gpio_chip_hwgpio(desc);
        int ret;

        ret = gpio_set_config_with_argument(desc, mode, argument);
        if (ret != -ENOTSUPP)
                return ret;

        switch (mode) {
        case PIN_CONFIG_PERSIST_STATE:
                dev_dbg(dev, "Persistence not supported for GPIO %d\n", gpio);
                break;
        default:
                break;
        }

        return 0;
}

static int gpio_set_config(struct gpio_desc *desc, enum pin_config_param mode)
{
        return gpio_set_config_with_argument(desc, mode, 0);
}

static int gpio_set_bias(struct gpio_desc *desc)
{
        enum pin_config_param bias;
        unsigned long flags;
        unsigned int arg;

        flags = READ_ONCE(desc->flags);

        if (test_bit(FLAG_BIAS_DISABLE, &flags))
                bias = PIN_CONFIG_BIAS_DISABLE;
        else if (test_bit(FLAG_PULL_UP, &flags))
                bias = PIN_CONFIG_BIAS_PULL_UP;
        else if (test_bit(FLAG_PULL_DOWN, &flags))
                bias = PIN_CONFIG_BIAS_PULL_DOWN;
        else
                return 0;

        switch (bias) {
        case PIN_CONFIG_BIAS_PULL_DOWN:
        case PIN_CONFIG_BIAS_PULL_UP:
                arg = 1;
                break;

        default:
                arg = 0;
                break;
        }

        return gpio_set_config_with_argument_optional(desc, bias, arg);
}

/**
 * gpio_set_debounce_timeout() - Set debounce timeout
 * @desc:        GPIO descriptor to set the debounce timeout
 * @debounce:        Debounce timeout in microseconds
 *
 * The function calls the certain GPIO driver to set debounce timeout
 * in the hardware.
 *
 * Returns 0 on success, or negative error code otherwise.
 */
int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce)
{
        return gpio_set_config_with_argument_optional(desc,
                                                      PIN_CONFIG_INPUT_DEBOUNCE,
                                                      debounce);
}

/**
 * gpiod_direction_input - set the GPIO direction to input
 * @desc:        GPIO to set to input
 *
 * Set the direction of the passed GPIO to input, such as gpiod_get_value() can
 * be called safely on it.
 *
 * Return 0 in case of success, else an error code.
 */
int gpiod_direction_input(struct gpio_desc *desc)
{
        int ret = 0;

        VALIDATE_DESC(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        /*
         * It is legal to have no .get() and .direction_input() specified if
         * the chip is output-only, but you can't specify .direction_input()
         * and not support the .get() operation, that doesn't make sense.
         */
        if (!guard.gc->get && guard.gc->direction_input) {
                gpiod_warn(desc,
                           "%s: missing get() but have direction_input()\n",
                           __func__);
                return -EIO;
        }

        /*
         * If we have a .direction_input() callback, things are simple,
         * just call it. Else we are some input-only chip so try to check the
         * direction (if .get_direction() is supported) else we silently
         * assume we are in input mode after this.
         */
        if (guard.gc->direction_input) {
                ret = guard.gc->direction_input(guard.gc,
                                                gpio_chip_hwgpio(desc));
        } else if (guard.gc->get_direction &&
                  (guard.gc->get_direction(guard.gc,
                                           gpio_chip_hwgpio(desc)) != 1)) {
                gpiod_warn(desc,
                           "%s: missing direction_input() operation and line is output\n",
                           __func__);
                return -EIO;
        }
        if (ret == 0) {
                clear_bit(FLAG_IS_OUT, &desc->flags);
                ret = gpio_set_bias(desc);
        }

        trace_gpio_direction(desc_to_gpio(desc), 1, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(gpiod_direction_input);

static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value)
{
        int val = !!value, ret = 0;

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        /*
         * It's OK not to specify .direction_output() if the gpiochip is
         * output-only, but if there is then not even a .set() operation it
         * is pretty tricky to drive the output line.
         */
        if (!guard.gc->set && !guard.gc->direction_output) {
                gpiod_warn(desc,
                           "%s: missing set() and direction_output() operations\n",
                           __func__);
                return -EIO;
        }

        if (guard.gc->direction_output) {
                ret = guard.gc->direction_output(guard.gc,
                                                 gpio_chip_hwgpio(desc), val);
        } else {
                /* Check that we are in output mode if we can */
                if (guard.gc->get_direction &&
                    guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc))) {
                        gpiod_warn(desc,
                                "%s: missing direction_output() operation\n",
                                __func__);
                        return -EIO;
                }
                /*
                 * If we can't actively set the direction, we are some
                 * output-only chip, so just drive the output as desired.
                 */
                guard.gc->set(guard.gc, gpio_chip_hwgpio(desc), val);
        }

        if (!ret)
                set_bit(FLAG_IS_OUT, &desc->flags);
        trace_gpio_value(desc_to_gpio(desc), 0, val);
        trace_gpio_direction(desc_to_gpio(desc), 0, ret);
        return ret;
}

/**
 * gpiod_direction_output_raw - set the GPIO direction to output
 * @desc:        GPIO to set to output
 * @value:        initial output value of the GPIO
 *
 * Set the direction of the passed GPIO to output, such as gpiod_set_value() can
 * be called safely on it. The initial value of the output must be specified
 * as raw value on the physical line without regard for the ACTIVE_LOW status.
 *
 * Return 0 in case of success, else an error code.
 */
int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
{
        VALIDATE_DESC(desc);
        return gpiod_direction_output_raw_commit(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_direction_output_raw);

/**
 * gpiod_direction_output - set the GPIO direction to output
 * @desc:        GPIO to set to output
 * @value:        initial output value of the GPIO
 *
 * Set the direction of the passed GPIO to output, such as gpiod_set_value() can
 * be called safely on it. The initial value of the output must be specified
 * as the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into
 * account.
 *
 * Return 0 in case of success, else an error code.
 */
int gpiod_direction_output(struct gpio_desc *desc, int value)
{
        unsigned long flags;
        int ret;

        VALIDATE_DESC(desc);

        flags = READ_ONCE(desc->flags);

        if (test_bit(FLAG_ACTIVE_LOW, &flags))
                value = !value;
        else
                value = !!value;

        /* GPIOs used for enabled IRQs shall not be set as output */
        if (test_bit(FLAG_USED_AS_IRQ, &flags) &&
            test_bit(FLAG_IRQ_IS_ENABLED, &flags)) {
                gpiod_err(desc,
                          "%s: tried to set a GPIO tied to an IRQ as output\n",
                          __func__);
                return -EIO;
        }

        if (test_bit(FLAG_OPEN_DRAIN, &flags)) {
                /* First see if we can enable open drain in hardware */
                ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_DRAIN);
                if (!ret)
                        goto set_output_value;
                /* Emulate open drain by not actively driving the line high */
                if (value) {
                        ret = gpiod_direction_input(desc);
                        goto set_output_flag;
                }
        } else if (test_bit(FLAG_OPEN_SOURCE, &flags)) {
                ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_SOURCE);
                if (!ret)
                        goto set_output_value;
                /* Emulate open source by not actively driving the line low */
                if (!value) {
                        ret = gpiod_direction_input(desc);
                        goto set_output_flag;
                }
        } else {
                gpio_set_config(desc, PIN_CONFIG_DRIVE_PUSH_PULL);
        }

set_output_value:
        ret = gpio_set_bias(desc);
        if (ret)
                return ret;
        return gpiod_direction_output_raw_commit(desc, value);

set_output_flag:
        /*
         * When emulating open-source or open-drain functionalities by not
         * actively driving the line (setting mode to input) we still need to
         * set the IS_OUT flag or otherwise we won't be able to set the line
         * value anymore.
         */
        if (ret == 0)
                set_bit(FLAG_IS_OUT, &desc->flags);
        return ret;
}
EXPORT_SYMBOL_GPL(gpiod_direction_output);

/**
 * gpiod_enable_hw_timestamp_ns - Enable hardware timestamp in nanoseconds.
 *
 * @desc: GPIO to enable.
 * @flags: Flags related to GPIO edge.
 *
 * Return 0 in case of success, else negative error code.
 */
int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags)
{
        int ret = 0;

        VALIDATE_DESC(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        if (!guard.gc->en_hw_timestamp) {
                gpiod_warn(desc, "%s: hw ts not supported\n", __func__);
                return -ENOTSUPP;
        }

        ret = guard.gc->en_hw_timestamp(guard.gc,
                                        gpio_chip_hwgpio(desc), flags);
        if (ret)
                gpiod_warn(desc, "%s: hw ts request failed\n", __func__);

        return ret;
}
EXPORT_SYMBOL_GPL(gpiod_enable_hw_timestamp_ns);

/**
 * gpiod_disable_hw_timestamp_ns - Disable hardware timestamp.
 *
 * @desc: GPIO to disable.
 * @flags: Flags related to GPIO edge, same value as used during enable call.
 *
 * Return 0 in case of success, else negative error code.
 */
int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags)
{
        int ret = 0;

        VALIDATE_DESC(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        if (!guard.gc->dis_hw_timestamp) {
                gpiod_warn(desc, "%s: hw ts not supported\n", __func__);
                return -ENOTSUPP;
        }

        ret = guard.gc->dis_hw_timestamp(guard.gc, gpio_chip_hwgpio(desc),
                                         flags);
        if (ret)
                gpiod_warn(desc, "%s: hw ts release failed\n", __func__);

        return ret;
}
EXPORT_SYMBOL_GPL(gpiod_disable_hw_timestamp_ns);

/**
 * gpiod_set_config - sets @config for a GPIO
 * @desc: descriptor of the GPIO for which to set the configuration
 * @config: Same packed config format as generic pinconf
 *
 * Returns:
 * 0 on success, %-ENOTSUPP if the controller doesn't support setting the
 * configuration.
 */
int gpiod_set_config(struct gpio_desc *desc, unsigned long config)
{
        VALIDATE_DESC(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        return gpio_do_set_config(guard.gc, gpio_chip_hwgpio(desc), config);
}
EXPORT_SYMBOL_GPL(gpiod_set_config);

/**
 * gpiod_set_debounce - sets @debounce time for a GPIO
 * @desc: descriptor of the GPIO for which to set debounce time
 * @debounce: debounce time in microseconds
 *
 * Returns:
 * 0 on success, %-ENOTSUPP if the controller doesn't support setting the
 * debounce time.
 */
int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce)
{
        unsigned long config;

        config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce);
        return gpiod_set_config(desc, config);
}
EXPORT_SYMBOL_GPL(gpiod_set_debounce);

/**
 * gpiod_set_transitory - Lose or retain GPIO state on suspend or reset
 * @desc: descriptor of the GPIO for which to configure persistence
 * @transitory: True to lose state on suspend or reset, false for persistence
 *
 * Returns:
 * 0 on success, otherwise a negative error code.
 */
int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
{
        VALIDATE_DESC(desc);
        /*
         * Handle FLAG_TRANSITORY first, enabling queries to gpiolib for
         * persistence state.
         */
        assign_bit(FLAG_TRANSITORY, &desc->flags, transitory);

        /* If the driver supports it, set the persistence state now */
        return gpio_set_config_with_argument_optional(desc,
                                                      PIN_CONFIG_PERSIST_STATE,
                                                      !transitory);
}

/**
 * gpiod_is_active_low - test whether a GPIO is active-low or not
 * @desc: the gpio descriptor to test
 *
 * Returns 1 if the GPIO is active-low, 0 otherwise.
 */
int gpiod_is_active_low(const struct gpio_desc *desc)
{
        VALIDATE_DESC(desc);
        return test_bit(FLAG_ACTIVE_LOW, &desc->flags);
}
EXPORT_SYMBOL_GPL(gpiod_is_active_low);

/**
 * gpiod_toggle_active_low - toggle whether a GPIO is active-low or not
 * @desc: the gpio descriptor to change
 */
void gpiod_toggle_active_low(struct gpio_desc *desc)
{
        VALIDATE_DESC_VOID(desc);
        change_bit(FLAG_ACTIVE_LOW, &desc->flags);
}
EXPORT_SYMBOL_GPL(gpiod_toggle_active_low);

static int gpio_chip_get_value(struct gpio_chip *gc, const struct gpio_desc *desc)
{
        return gc->get ? gc->get(gc, gpio_chip_hwgpio(desc)) : -EIO;
}

/* I/O calls are only valid after configuration completed; the relevant
 * "is this a valid GPIO" error checks should already have been done.
 *
 * "Get" operations are often inlinable as reading a pin value register,
 * and masking the relevant bit in that register.
 *
 * When "set" operations are inlinable, they involve writing that mask to
 * one register to set a low value, or a different register to set it high.
 * Otherwise locking is needed, so there may be little value to inlining.
 *
 *------------------------------------------------------------------------
 *
 * IMPORTANT!!!  The hot paths -- get/set value -- assume that callers
 * have requested the GPIO.  That can include implicit requesting by
 * a direction setting call.  Marking a gpio as requested locks its chip
 * in memory, guaranteeing that these table lookups need no more locking
 * and that gpiochip_remove() will fail.
 *
 * REVISIT when debugging, consider adding some instrumentation to ensure
 * that the GPIO was actually requested.
 */

static int gpiod_get_raw_value_commit(const struct gpio_desc *desc)
{
        struct gpio_device *gdev;
        struct gpio_chip *gc;
        int value;

        /* FIXME Unable to use gpio_chip_guard due to const desc. */
        gdev = desc->gdev;

        guard(srcu)(&gdev->srcu);

        gc = srcu_dereference(gdev->chip, &gdev->srcu);
        if (!gc)
                return -ENODEV;

        value = gpio_chip_get_value(gc, desc);
        value = value < 0 ? value : !!value;
        trace_gpio_value(desc_to_gpio(desc), 1, value);
        return value;
}

static int gpio_chip_get_multiple(struct gpio_chip *gc,
                                  unsigned long *mask, unsigned long *bits)
{
        if (gc->get_multiple)
                return gc->get_multiple(gc, mask, bits);
        if (gc->get) {
                int i, value;

                for_each_set_bit(i, mask, gc->ngpio) {
                        value = gc->get(gc, i);
                        if (value < 0)
                                return value;
                        __assign_bit(i, bits, value);
                }
                return 0;
        }
        return -EIO;
}

/* The 'other' chip must be protected with its GPIO device's SRCU. */
static bool gpio_device_chip_cmp(struct gpio_device *gdev, struct gpio_chip *gc)
{
        guard(srcu)(&gdev->srcu);

        return gc == srcu_dereference(gdev->chip, &gdev->srcu);
}

int gpiod_get_array_value_complex(bool raw, bool can_sleep,
                                  unsigned int array_size,
                                  struct gpio_desc **desc_array,
                                  struct gpio_array *array_info,
                                  unsigned long *value_bitmap)
{
        int ret, i = 0;

        /*
         * Validate array_info against desc_array and its size.
         * It should immediately follow desc_array if both
         * have been obtained from the same gpiod_get_array() call.
         */
        if (array_info && array_info->desc == desc_array &&
            array_size <= array_info->size &&
            (void *)array_info == desc_array + array_info->size) {
                if (!can_sleep)
                        WARN_ON(array_info->chip->can_sleep);

                ret = gpio_chip_get_multiple(array_info->chip,
                                             array_info->get_mask,
                                             value_bitmap);
                if (ret)
                        return ret;

                if (!raw && !bitmap_empty(array_info->invert_mask, array_size))
                        bitmap_xor(value_bitmap, value_bitmap,
                                   array_info->invert_mask, array_size);

                i = find_first_zero_bit(array_info->get_mask, array_size);
                if (i == array_size)
                        return 0;
        } else {
                array_info = NULL;
        }

        while (i < array_size) {
                DECLARE_BITMAP(fastpath_mask, FASTPATH_NGPIO);
                DECLARE_BITMAP(fastpath_bits, FASTPATH_NGPIO);
                unsigned long *mask, *bits;
                int first, j;

                CLASS(gpio_chip_guard, guard)(desc_array[i]);
                if (!guard.gc)
                        return -ENODEV;

                if (likely(guard.gc->ngpio <= FASTPATH_NGPIO)) {
                        mask = fastpath_mask;
                        bits = fastpath_bits;
                } else {
                        gfp_t flags = can_sleep ? GFP_KERNEL : GFP_ATOMIC;

                        mask = bitmap_alloc(guard.gc->ngpio, flags);
                        if (!mask)
                                return -ENOMEM;

                        bits = bitmap_alloc(guard.gc->ngpio, flags);
                        if (!bits) {
                                bitmap_free(mask);
                                return -ENOMEM;
                        }
                }

                bitmap_zero(mask, guard.gc->ngpio);

                if (!can_sleep)
                        WARN_ON(guard.gc->can_sleep);

                /* collect all inputs belonging to the same chip */
                first = i;
                do {
                        const struct gpio_desc *desc = desc_array[i];
                        int hwgpio = gpio_chip_hwgpio(desc);

                        __set_bit(hwgpio, mask);
                        i++;

                        if (array_info)
                                i = find_next_zero_bit(array_info->get_mask,
                                                       array_size, i);
                } while ((i < array_size) &&
                         gpio_device_chip_cmp(desc_array[i]->gdev, guard.gc));

                ret = gpio_chip_get_multiple(guard.gc, mask, bits);
                if (ret) {
                        if (mask != fastpath_mask)
                                bitmap_free(mask);
                        if (bits != fastpath_bits)
                                bitmap_free(bits);
                        return ret;
                }

                for (j = first; j < i; ) {
                        const struct gpio_desc *desc = desc_array[j];
                        int hwgpio = gpio_chip_hwgpio(desc);
                        int value = test_bit(hwgpio, bits);

                        if (!raw && test_bit(FLAG_ACTIVE_LOW, &desc->flags))
                                value = !value;
                        __assign_bit(j, value_bitmap, value);
                        trace_gpio_value(desc_to_gpio(desc), 1, value);
                        j++;

                        if (array_info)
                                j = find_next_zero_bit(array_info->get_mask, i,
                                                       j);
                }

                if (mask != fastpath_mask)
                        bitmap_free(mask);
                if (bits != fastpath_bits)
                        bitmap_free(bits);
        }
        return 0;
}

/**
 * gpiod_get_raw_value() - return a gpio's raw value
 * @desc: gpio whose value will be returned
 *
 * Return the GPIO's raw value, i.e. the value of the physical line disregarding
 * its ACTIVE_LOW status, or negative errno on failure.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
int gpiod_get_raw_value(const struct gpio_desc *desc)
{
        VALIDATE_DESC(desc);
        /* Should be using gpiod_get_raw_value_cansleep() */
        WARN_ON(desc->gdev->can_sleep);
        return gpiod_get_raw_value_commit(desc);
}
EXPORT_SYMBOL_GPL(gpiod_get_raw_value);

/**
 * gpiod_get_value() - return a gpio's value
 * @desc: gpio whose value will be returned
 *
 * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
 * account, or negative errno on failure.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
int gpiod_get_value(const struct gpio_desc *desc)
{
        int value;

        VALIDATE_DESC(desc);
        /* Should be using gpiod_get_value_cansleep() */
        WARN_ON(desc->gdev->can_sleep);

        value = gpiod_get_raw_value_commit(desc);
        if (value < 0)
                return value;

        if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
                value = !value;

        return value;
}
EXPORT_SYMBOL_GPL(gpiod_get_value);

/**
 * gpiod_get_raw_array_value() - read raw values from an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be read
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap to store the read values
 *
 * Read the raw values of the GPIOs, i.e. the values of the physical lines
 * without regard for their ACTIVE_LOW status.  Return 0 in case of success,
 * else an error code.
 *
 * This function can be called from contexts where we cannot sleep,
 * and it will complain if the GPIO chip functions potentially sleep.
 */
int gpiod_get_raw_array_value(unsigned int array_size,
                              struct gpio_desc **desc_array,
                              struct gpio_array *array_info,
                              unsigned long *value_bitmap)
{
        if (!desc_array)
                return -EINVAL;
        return gpiod_get_array_value_complex(true, false, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);

/**
 * gpiod_get_array_value() - read values from an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be read
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap to store the read values
 *
 * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
 * into account.  Return 0 in case of success, else an error code.
 *
 * This function can be called from contexts where we cannot sleep,
 * and it will complain if the GPIO chip functions potentially sleep.
 */
int gpiod_get_array_value(unsigned int array_size,
                          struct gpio_desc **desc_array,
                          struct gpio_array *array_info,
                          unsigned long *value_bitmap)
{
        if (!desc_array)
                return -EINVAL;
        return gpiod_get_array_value_complex(false, false, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_get_array_value);

/*
 *  gpio_set_open_drain_value_commit() - Set the open drain gpio's value.
 * @desc: gpio descriptor whose state need to be set.
 * @value: Non-zero for setting it HIGH otherwise it will set to LOW.
 */
static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value)
{
        int ret = 0, offset = gpio_chip_hwgpio(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return;

        if (value) {
                ret = guard.gc->direction_input(guard.gc, offset);
        } else {
                ret = guard.gc->direction_output(guard.gc, offset, 0);
                if (!ret)
                        set_bit(FLAG_IS_OUT, &desc->flags);
        }
        trace_gpio_direction(desc_to_gpio(desc), value, ret);
        if (ret < 0)
                gpiod_err(desc,
                          "%s: Error in set_value for open drain err %d\n",
                          __func__, ret);
}

/*
 *  _gpio_set_open_source_value() - Set the open source gpio's value.
 * @desc: gpio descriptor whose state need to be set.
 * @value: Non-zero for setting it HIGH otherwise it will set to LOW.
 */
static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value)
{
        int ret = 0, offset = gpio_chip_hwgpio(desc);

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return;

        if (value) {
                ret = guard.gc->direction_output(guard.gc, offset, 1);
                if (!ret)
                        set_bit(FLAG_IS_OUT, &desc->flags);
        } else {
                ret = guard.gc->direction_input(guard.gc, offset);
        }
        trace_gpio_direction(desc_to_gpio(desc), !value, ret);
        if (ret < 0)
                gpiod_err(desc,
                          "%s: Error in set_value for open source err %d\n",
                          __func__, ret);
}

static void gpiod_set_raw_value_commit(struct gpio_desc *desc, bool value)
{
        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return;

        trace_gpio_value(desc_to_gpio(desc), 0, value);
        guard.gc->set(guard.gc, gpio_chip_hwgpio(desc), value);
}

/*
 * set multiple outputs on the same chip;
 * use the chip's set_multiple function if available;
 * otherwise set the outputs sequentially;
 * @chip: the GPIO chip we operate on
 * @mask: bit mask array; one bit per output; BITS_PER_LONG bits per word
 *        defines which outputs are to be changed
 * @bits: bit value array; one bit per output; BITS_PER_LONG bits per word
 *        defines the values the outputs specified by mask are to be set to
 */
static void gpio_chip_set_multiple(struct gpio_chip *gc,
                                   unsigned long *mask, unsigned long *bits)
{
        if (gc->set_multiple) {
                gc->set_multiple(gc, mask, bits);
        } else {
                unsigned int i;

                /* set outputs if the corresponding mask bit is set */
                for_each_set_bit(i, mask, gc->ngpio)
                        gc->set(gc, i, test_bit(i, bits));
        }
}

int gpiod_set_array_value_complex(bool raw, bool can_sleep,
                                  unsigned int array_size,
                                  struct gpio_desc **desc_array,
                                  struct gpio_array *array_info,
                                  unsigned long *value_bitmap)
{
        int i = 0;

        /*
         * Validate array_info against desc_array and its size.
         * It should immediately follow desc_array if both
         * have been obtained from the same gpiod_get_array() call.
         */
        if (array_info && array_info->desc == desc_array &&
            array_size <= array_info->size &&
            (void *)array_info == desc_array + array_info->size) {
                if (!can_sleep)
                        WARN_ON(array_info->chip->can_sleep);

                if (!raw && !bitmap_empty(array_info->invert_mask, array_size))
                        bitmap_xor(value_bitmap, value_bitmap,
                                   array_info->invert_mask, array_size);

                gpio_chip_set_multiple(array_info->chip, array_info->set_mask,
                                       value_bitmap);

                i = find_first_zero_bit(array_info->set_mask, array_size);
                if (i == array_size)
                        return 0;
        } else {
                array_info = NULL;
        }

        while (i < array_size) {
                DECLARE_BITMAP(fastpath_mask, FASTPATH_NGPIO);
                DECLARE_BITMAP(fastpath_bits, FASTPATH_NGPIO);
                unsigned long *mask, *bits;
                int count = 0;

                CLASS(gpio_chip_guard, guard)(desc_array[i]);
                if (!guard.gc)
                        return -ENODEV;

                if (likely(guard.gc->ngpio <= FASTPATH_NGPIO)) {
                        mask = fastpath_mask;
                        bits = fastpath_bits;
                } else {
                        gfp_t flags = can_sleep ? GFP_KERNEL : GFP_ATOMIC;

                        mask = bitmap_alloc(guard.gc->ngpio, flags);
                        if (!mask)
                                return -ENOMEM;

                        bits = bitmap_alloc(guard.gc->ngpio, flags);
                        if (!bits) {
                                bitmap_free(mask);
                                return -ENOMEM;
                        }
                }

                bitmap_zero(mask, guard.gc->ngpio);

                if (!can_sleep)
                        WARN_ON(guard.gc->can_sleep);

                do {
                        struct gpio_desc *desc = desc_array[i];
                        int hwgpio = gpio_chip_hwgpio(desc);
                        int value = test_bit(i, value_bitmap);

                        /*
                         * Pins applicable for fast input but not for
                         * fast output processing may have been already
                         * inverted inside the fast path, skip them.
                         */
                        if (!raw && !(array_info &&
                            test_bit(i, array_info->invert_mask)) &&
                            test_bit(FLAG_ACTIVE_LOW, &desc->flags))
                                value = !value;
                        trace_gpio_value(desc_to_gpio(desc), 0, value);
                        /*
                         * collect all normal outputs belonging to the same chip
                         * open drain and open source outputs are set individually
                         */
                        if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) && !raw) {
                                gpio_set_open_drain_value_commit(desc, value);
                        } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags) && !raw) {
                                gpio_set_open_source_value_commit(desc, value);
                        } else {
                                __set_bit(hwgpio, mask);
                                __assign_bit(hwgpio, bits, value);
                                count++;
                        }
                        i++;

                        if (array_info)
                                i = find_next_zero_bit(array_info->set_mask,
                                                       array_size, i);
                } while ((i < array_size) &&
                         gpio_device_chip_cmp(desc_array[i]->gdev, guard.gc));
                /* push collected bits to outputs */
                if (count != 0)
                        gpio_chip_set_multiple(guard.gc, mask, bits);

                if (mask != fastpath_mask)
                        bitmap_free(mask);
                if (bits != fastpath_bits)
                        bitmap_free(bits);
        }
        return 0;
}

/**
 * gpiod_set_raw_value() - assign a gpio's raw value
 * @desc: gpio whose value will be assigned
 * @value: value to assign
 *
 * Set the raw value of the GPIO, i.e. the value of its physical line without
 * regard for its ACTIVE_LOW status.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
void gpiod_set_raw_value(struct gpio_desc *desc, int value)
{
        VALIDATE_DESC_VOID(desc);
        /* Should be using gpiod_set_raw_value_cansleep() */
        WARN_ON(desc->gdev->can_sleep);
        gpiod_set_raw_value_commit(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_value);

/**
 * gpiod_set_value_nocheck() - set a GPIO line value without checking
 * @desc: the descriptor to set the value on
 * @value: value to set
 *
 * This sets the value of a GPIO line backing a descriptor, applying
 * different semantic quirks like active low and open drain/source
 * handling.
 */
static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
{
        if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
                value = !value;
        if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
                gpio_set_open_drain_value_commit(desc, value);
        else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
                gpio_set_open_source_value_commit(desc, value);
        else
                gpiod_set_raw_value_commit(desc, value);
}

/**
 * gpiod_set_value() - assign a gpio's value
 * @desc: gpio whose value will be assigned
 * @value: value to assign
 *
 * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW,
 * OPEN_DRAIN and OPEN_SOURCE flags into account.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
void gpiod_set_value(struct gpio_desc *desc, int value)
{
        VALIDATE_DESC_VOID(desc);
        /* Should be using gpiod_set_value_cansleep() */
        WARN_ON(desc->gdev->can_sleep);
        gpiod_set_value_nocheck(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_value);

/**
 * gpiod_set_raw_array_value() - assign values to an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be assigned
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap of values to assign
 *
 * Set the raw values of the GPIOs, i.e. the values of the physical lines
 * without regard for their ACTIVE_LOW status.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
int gpiod_set_raw_array_value(unsigned int array_size,
                              struct gpio_desc **desc_array,
                              struct gpio_array *array_info,
                              unsigned long *value_bitmap)
{
        if (!desc_array)
                return -EINVAL;
        return gpiod_set_array_value_complex(true, false, array_size,
                                        desc_array, array_info, value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);

/**
 * gpiod_set_array_value() - assign values to an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be assigned
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap of values to assign
 *
 * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
 * into account.
 *
 * This function can be called from contexts where we cannot sleep, and will
 * complain if the GPIO chip functions potentially sleep.
 */
int gpiod_set_array_value(unsigned int array_size,
                          struct gpio_desc **desc_array,
                          struct gpio_array *array_info,
                          unsigned long *value_bitmap)
{
        if (!desc_array)
                return -EINVAL;
        return gpiod_set_array_value_complex(false, false, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_set_array_value);

/**
 * gpiod_cansleep() - report whether gpio value access may sleep
 * @desc: gpio to check
 *
 */
int gpiod_cansleep(const struct gpio_desc *desc)
{
        VALIDATE_DESC(desc);
        return desc->gdev->can_sleep;
}
EXPORT_SYMBOL_GPL(gpiod_cansleep);

/**
 * gpiod_set_consumer_name() - set the consumer name for the descriptor
 * @desc: gpio to set the consumer name on
 * @name: the new consumer name
 */
int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name)
{
        VALIDATE_DESC(desc);

        return desc_set_label(desc, name);
}
EXPORT_SYMBOL_GPL(gpiod_set_consumer_name);

/**
 * gpiod_to_irq() - return the IRQ corresponding to a GPIO
 * @desc: gpio whose IRQ will be returned (already requested)
 *
 * Return the IRQ corresponding to the passed GPIO, or an error code in case of
 * error.
 */
int gpiod_to_irq(const struct gpio_desc *desc)
{
        struct gpio_device *gdev;
        struct gpio_chip *gc;
        int offset;

        /*
         * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
         * requires this function to not return zero on an invalid descriptor
         * but rather a negative error number.
         */
        if (!desc || IS_ERR(desc))
                return -EINVAL;

        gdev = desc->gdev;
        /* FIXME Cannot use gpio_chip_guard due to const desc. */
        guard(srcu)(&gdev->srcu);
        gc = srcu_dereference(gdev->chip, &gdev->srcu);
        if (!gc)
                return -ENODEV;

        offset = gpio_chip_hwgpio(desc);
        if (gc->to_irq) {
                int retirq = gc->to_irq(gc, offset);

                /* Zero means NO_IRQ */
                if (!retirq)
                        return -ENXIO;

                return retirq;
        }
#ifdef CONFIG_GPIOLIB_IRQCHIP
        if (gc->irq.chip) {
                /*
                 * Avoid race condition with other code, which tries to lookup
                 * an IRQ before the irqchip has been properly registered,
                 * i.e. while gpiochip is still being brought up.
                 */
                return -EPROBE_DEFER;
        }
#endif
        return -ENXIO;
}
EXPORT_SYMBOL_GPL(gpiod_to_irq);

/**
 * gpiochip_lock_as_irq() - lock a GPIO to be used as IRQ
 * @gc: the chip the GPIO to lock belongs to
 * @offset: the offset of the GPIO to lock as IRQ
 *
 * This is used directly by GPIO drivers that want to lock down
 * a certain GPIO line to be used for IRQs.
 */
int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset)
{
        struct gpio_desc *desc;

        desc = gpiochip_get_desc(gc, offset);
        if (IS_ERR(desc))
                return PTR_ERR(desc);

        /*
         * If it's fast: flush the direction setting if something changed
         * behind our back
         */
        if (!gc->can_sleep && gc->get_direction) {
                int dir = gpiod_get_direction(desc);

                if (dir < 0) {
                        chip_err(gc, "%s: cannot get GPIO direction\n",
                                 __func__);
                        return dir;
                }
        }

        /* To be valid for IRQ the line needs to be input or open drain */
        if (test_bit(FLAG_IS_OUT, &desc->flags) &&
            !test_bit(FLAG_OPEN_DRAIN, &desc->flags)) {
                chip_err(gc,
                         "%s: tried to flag a GPIO set as output for IRQ\n",
                         __func__);
                return -EIO;
        }

        set_bit(FLAG_USED_AS_IRQ, &desc->flags);
        set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);

        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_lock_as_irq);

/**
 * gpiochip_unlock_as_irq() - unlock a GPIO used as IRQ
 * @gc: the chip the GPIO to lock belongs to
 * @offset: the offset of the GPIO to lock as IRQ
 *
 * This is used directly by GPIO drivers that want to indicate
 * that a certain GPIO is no longer used exclusively for IRQ.
 */
void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset)
{
        struct gpio_desc *desc;

        desc = gpiochip_get_desc(gc, offset);
        if (IS_ERR(desc))
                return;

        clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
        clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
}
EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq);

void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset)
{
        struct gpio_desc *desc = gpiochip_get_desc(gc, offset);

        if (!IS_ERR(desc) &&
            !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags)))
                clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
}
EXPORT_SYMBOL_GPL(gpiochip_disable_irq);

void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset)
{
        struct gpio_desc *desc = gpiochip_get_desc(gc, offset);

        if (!IS_ERR(desc) &&
            !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags))) {
                /*
                 * We must not be output when using IRQ UNLESS we are
                 * open drain.
                 */
                WARN_ON(test_bit(FLAG_IS_OUT, &desc->flags) &&
                        !test_bit(FLAG_OPEN_DRAIN, &desc->flags));
                set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
        }
}
EXPORT_SYMBOL_GPL(gpiochip_enable_irq);

bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset)
{
        if (offset >= gc->ngpio)
                return false;

        return test_bit(FLAG_USED_AS_IRQ, &gc->gpiodev->descs[offset].flags);
}
EXPORT_SYMBOL_GPL(gpiochip_line_is_irq);

int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset)
{
        int ret;

        if (!try_module_get(gc->gpiodev->owner))
                return -ENODEV;

        ret = gpiochip_lock_as_irq(gc, offset);
        if (ret) {
                chip_err(gc, "unable to lock HW IRQ %u for IRQ\n", offset);
                module_put(gc->gpiodev->owner);
                return ret;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(gpiochip_reqres_irq);

void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset)
{
        gpiochip_unlock_as_irq(gc, offset);
        module_put(gc->gpiodev->owner);
}
EXPORT_SYMBOL_GPL(gpiochip_relres_irq);

bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset)
{
        if (offset >= gc->ngpio)
                return false;

        return test_bit(FLAG_OPEN_DRAIN, &gc->gpiodev->descs[offset].flags);
}
EXPORT_SYMBOL_GPL(gpiochip_line_is_open_drain);

bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset)
{
        if (offset >= gc->ngpio)
                return false;

        return test_bit(FLAG_OPEN_SOURCE, &gc->gpiodev->descs[offset].flags);
}
EXPORT_SYMBOL_GPL(gpiochip_line_is_open_source);

bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset)
{
        if (offset >= gc->ngpio)
                return false;

        return !test_bit(FLAG_TRANSITORY, &gc->gpiodev->descs[offset].flags);
}
EXPORT_SYMBOL_GPL(gpiochip_line_is_persistent);

/**
 * gpiod_get_raw_value_cansleep() - return a gpio's raw value
 * @desc: gpio whose value will be returned
 *
 * Return the GPIO's raw value, i.e. the value of the physical line disregarding
 * its ACTIVE_LOW status, or negative errno on failure.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
{
        might_sleep();
        VALIDATE_DESC(desc);
        return gpiod_get_raw_value_commit(desc);
}
EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep);

/**
 * gpiod_get_value_cansleep() - return a gpio's value
 * @desc: gpio whose value will be returned
 *
 * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
 * account, or negative errno on failure.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_get_value_cansleep(const struct gpio_desc *desc)
{
        int value;

        might_sleep();
        VALIDATE_DESC(desc);
        value = gpiod_get_raw_value_commit(desc);
        if (value < 0)
                return value;

        if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
                value = !value;

        return value;
}
EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);

/**
 * gpiod_get_raw_array_value_cansleep() - read raw values from an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be read
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap to store the read values
 *
 * Read the raw values of the GPIOs, i.e. the values of the physical lines
 * without regard for their ACTIVE_LOW status.  Return 0 in case of success,
 * else an error code.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
                                       struct gpio_desc **desc_array,
                                       struct gpio_array *array_info,
                                       unsigned long *value_bitmap)
{
        might_sleep();
        if (!desc_array)
                return -EINVAL;
        return gpiod_get_array_value_complex(true, true, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);

/**
 * gpiod_get_array_value_cansleep() - read values from an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be read
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap to store the read values
 *
 * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
 * into account.  Return 0 in case of success, else an error code.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_get_array_value_cansleep(unsigned int array_size,
                                   struct gpio_desc **desc_array,
                                   struct gpio_array *array_info,
                                   unsigned long *value_bitmap)
{
        might_sleep();
        if (!desc_array)
                return -EINVAL;
        return gpiod_get_array_value_complex(false, true, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_get_array_value_cansleep);

/**
 * gpiod_set_raw_value_cansleep() - assign a gpio's raw value
 * @desc: gpio whose value will be assigned
 * @value: value to assign
 *
 * Set the raw value of the GPIO, i.e. the value of its physical line without
 * regard for its ACTIVE_LOW status.
 *
 * This function is to be called from contexts that can sleep.
 */
void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
{
        might_sleep();
        VALIDATE_DESC_VOID(desc);
        gpiod_set_raw_value_commit(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_value_cansleep);

/**
 * gpiod_set_value_cansleep() - assign a gpio's value
 * @desc: gpio whose value will be assigned
 * @value: value to assign
 *
 * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into
 * account
 *
 * This function is to be called from contexts that can sleep.
 */
void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
{
        might_sleep();
        VALIDATE_DESC_VOID(desc);
        gpiod_set_value_nocheck(desc, value);
}
EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);

/**
 * gpiod_set_raw_array_value_cansleep() - assign values to an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be assigned
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap of values to assign
 *
 * Set the raw values of the GPIOs, i.e. the values of the physical lines
 * without regard for their ACTIVE_LOW status.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
                                       struct gpio_desc **desc_array,
                                       struct gpio_array *array_info,
                                       unsigned long *value_bitmap)
{
        might_sleep();
        if (!desc_array)
                return -EINVAL;
        return gpiod_set_array_value_complex(true, true, array_size, desc_array,
                                      array_info, value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep);

/**
 * gpiod_add_lookup_tables() - register GPIO device consumers
 * @tables: list of tables of consumers to register
 * @n: number of tables in the list
 */
void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
{
        unsigned int i;

        mutex_lock(&gpio_lookup_lock);

        for (i = 0; i < n; i++)
                list_add_tail(&tables[i]->list, &gpio_lookup_list);

        mutex_unlock(&gpio_lookup_lock);
}

/**
 * gpiod_set_array_value_cansleep() - assign values to an array of GPIOs
 * @array_size: number of elements in the descriptor array / value bitmap
 * @desc_array: array of GPIO descriptors whose values will be assigned
 * @array_info: information on applicability of fast bitmap processing path
 * @value_bitmap: bitmap of values to assign
 *
 * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
 * into account.
 *
 * This function is to be called from contexts that can sleep.
 */
int gpiod_set_array_value_cansleep(unsigned int array_size,
                                   struct gpio_desc **desc_array,
                                   struct gpio_array *array_info,
                                   unsigned long *value_bitmap)
{
        might_sleep();
        if (!desc_array)
                return -EINVAL;
        return gpiod_set_array_value_complex(false, true, array_size,
                                             desc_array, array_info,
                                             value_bitmap);
}
EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);

void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action)
{
        blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
                                     action, desc);
}

/**
 * gpiod_add_lookup_table() - register GPIO device consumers
 * @table: table of consumers to register
 */
void gpiod_add_lookup_table(struct gpiod_lookup_table *table)
{
        gpiod_add_lookup_tables(&table, 1);
}
EXPORT_SYMBOL_GPL(gpiod_add_lookup_table);

/**
 * gpiod_remove_lookup_table() - unregister GPIO device consumers
 * @table: table of consumers to unregister
 */
void gpiod_remove_lookup_table(struct gpiod_lookup_table *table)
{
        /* Nothing to remove */
        if (!table)
                return;

        mutex_lock(&gpio_lookup_lock);

        list_del(&table->list);

        mutex_unlock(&gpio_lookup_lock);
}
EXPORT_SYMBOL_GPL(gpiod_remove_lookup_table);

/**
 * gpiod_add_hogs() - register a set of GPIO hogs from machine code
 * @hogs: table of gpio hog entries with a zeroed sentinel at the end
 */
void gpiod_add_hogs(struct gpiod_hog *hogs)
{
        struct gpiod_hog *hog;

        mutex_lock(&gpio_machine_hogs_mutex);

        for (hog = &hogs[0]; hog->chip_label; hog++) {
                list_add_tail(&hog->list, &gpio_machine_hogs);

                /*
                 * The chip may have been registered earlier, so check if it
                 * exists and, if so, try to hog the line now.
                 */
                struct gpio_device *gdev __free(gpio_device_put) =
                                gpio_device_find_by_label(hog->chip_label);
                if (gdev)
                        gpiochip_machine_hog(gpio_device_get_chip(gdev), hog);
        }

        mutex_unlock(&gpio_machine_hogs_mutex);
}
EXPORT_SYMBOL_GPL(gpiod_add_hogs);

void gpiod_remove_hogs(struct gpiod_hog *hogs)
{
        struct gpiod_hog *hog;

        mutex_lock(&gpio_machine_hogs_mutex);
        for (hog = &hogs[0]; hog->chip_label; hog++)
                list_del(&hog->list);
        mutex_unlock(&gpio_machine_hogs_mutex);
}
EXPORT_SYMBOL_GPL(gpiod_remove_hogs);

static struct gpiod_lookup_table *gpiod_find_lookup_table(struct device *dev)
{
        const char *dev_id = dev ? dev_name(dev) : NULL;
        struct gpiod_lookup_table *table;

        list_for_each_entry(table, &gpio_lookup_list, list) {
                if (table->dev_id && dev_id) {
                        /*
                         * Valid strings on both ends, must be identical to have
                         * a match
                         */
                        if (!strcmp(table->dev_id, dev_id))
                                return table;
                } else {
                        /*
                         * One of the pointers is NULL, so both must be to have
                         * a match
                         */
                        if (dev_id == table->dev_id)
                                return table;
                }
        }

        return NULL;
}

static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
                                    unsigned int idx, unsigned long *flags)
{
        struct gpio_desc *desc = ERR_PTR(-ENOENT);
        struct gpiod_lookup_table *table;
        struct gpiod_lookup *p;
        struct gpio_chip *gc;

        guard(mutex)(&gpio_lookup_lock);

        table = gpiod_find_lookup_table(dev);
        if (!table)
                return desc;

        for (p = &table->table[0]; p->key; p++) {
                /* idx must always match exactly */
                if (p->idx != idx)
                        continue;

                /* If the lookup entry has a con_id, require exact match */
                if (p->con_id && (!con_id || strcmp(p->con_id, con_id)))
                        continue;

                if (p->chip_hwnum == U16_MAX) {
                        desc = gpio_name_to_desc(p->key);
                        if (desc) {
                                *flags = p->flags;
                                return desc;
                        }

                        dev_warn(dev, "cannot find GPIO line %s, deferring\n",
                                 p->key);
                        return ERR_PTR(-EPROBE_DEFER);
                }

                struct gpio_device *gdev __free(gpio_device_put) =
                                        gpio_device_find_by_label(p->key);
                if (!gdev) {
                        /*
                         * As the lookup table indicates a chip with
                         * p->key should exist, assume it may
                         * still appear later and let the interested
                         * consumer be probed again or let the Deferred
                         * Probe infrastructure handle the error.
                         */
                        dev_warn(dev, "cannot find GPIO chip %s, deferring\n",
                                 p->key);
                        return ERR_PTR(-EPROBE_DEFER);
                }

                gc = gpio_device_get_chip(gdev);

                if (gc->ngpio <= p->chip_hwnum) {
                        dev_err(dev,
                                "requested GPIO %u (%u) is out of range [0..%u] for chip %s\n",
                                idx, p->chip_hwnum, gc->ngpio - 1,
                                gc->label);
                        return ERR_PTR(-EINVAL);
                }

                desc = gpio_device_get_desc(gdev, p->chip_hwnum);
                *flags = p->flags;

                return desc;
        }

        return desc;
}

static int platform_gpio_count(struct device *dev, const char *con_id)
{
        struct gpiod_lookup_table *table;
        struct gpiod_lookup *p;
        unsigned int count = 0;

        scoped_guard(mutex, &gpio_lookup_lock) {
                table = gpiod_find_lookup_table(dev);
                if (!table)
                        return -ENOENT;

                for (p = &table->table[0]; p->key; p++) {
                        if ((con_id && p->con_id && !strcmp(con_id, p->con_id)) ||
                            (!con_id && !p->con_id))
                                count++;
                }
        }

        if (!count)
                return -ENOENT;

        return count;
}

static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
                                              struct device *consumer,
                                              const char *con_id,
                                              unsigned int idx,
                                              enum gpiod_flags *flags,
                                              unsigned long *lookupflags)
{
        const char *name = function_name_or_default(con_id);
        struct gpio_desc *desc = ERR_PTR(-ENOENT);

        if (is_of_node(fwnode)) {
                dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name);
                desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags);
        } else if (is_acpi_node(fwnode)) {
                dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name);
                desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
        } else if (is_software_node(fwnode)) {
                dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name);
                desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags);
        }

        return desc;
}

struct gpio_desc *gpiod_find_and_request(struct device *consumer,
                                         struct fwnode_handle *fwnode,
                                         const char *con_id,
                                         unsigned int idx,
                                         enum gpiod_flags flags,
                                         const char *label,
                                         bool platform_lookup_allowed)
{
        unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
        const char *name = function_name_or_default(con_id);
        /*
         * scoped_guard() is implemented as a for loop, meaning static
         * analyzers will complain about these two not being initialized.
         */
        struct gpio_desc *desc = NULL;
        int ret = 0;

        scoped_guard(srcu, &gpio_devices_srcu) {
                desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx,
                                            &flags, &lookupflags);
                if (gpiod_not_found(desc) && platform_lookup_allowed) {
                        /*
                         * Either we are not using DT or ACPI, or their lookup
                         * did not return a result. In that case, use platform
                         * lookup as a fallback.
                         */
                        dev_dbg(consumer,
                                "using lookup tables for GPIO lookup\n");
                        desc = gpiod_find(consumer, con_id, idx, &lookupflags);
                }

                if (IS_ERR(desc)) {
                        dev_dbg(consumer, "No GPIO consumer %s found\n", name);
                        return desc;
                }

                /*
                 * If a connection label was passed use that, else attempt to use
                 * the device name as label
                 */
                ret = gpiod_request(desc, label);
        }
        if (ret) {
                if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
                        return ERR_PTR(ret);

                /*
                 * This happens when there are several consumers for
                 * the same GPIO line: we just return here without
                 * further initialization. It is a bit of a hack.
                 * This is necessary to support fixed regulators.
                 *
                 * FIXME: Make this more sane and safe.
                 */
                dev_info(consumer, "nonexclusive access to GPIO for %s\n", name);
                return desc;
        }

        ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
        if (ret < 0) {
                gpiod_put(desc);
                dev_dbg(consumer, "setup of GPIO %s failed\n", name);
                return ERR_PTR(ret);
        }

        gpiod_line_state_notify(desc, GPIOLINE_CHANGED_REQUESTED);

        return desc;
}

/**
 * fwnode_gpiod_get_index - obtain a GPIO from firmware node
 * @fwnode:        handle of the firmware node
 * @con_id:        function within the GPIO consumer
 * @index:        index of the GPIO to obtain for the consumer
 * @flags:        GPIO initialization flags
 * @label:        label to attach to the requested GPIO
 *
 * This function can be used for drivers that get their configuration
 * from opaque firmware.
 *
 * The function properly finds the corresponding GPIO using whatever is the
 * underlying firmware interface and then makes sure that the GPIO
 * descriptor is requested before it is returned to the caller.
 *
 * Returns:
 * On successful request the GPIO pin is configured in accordance with
 * provided @flags.
 *
 * In case of error an ERR_PTR() is returned.
 */
struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
                                         const char *con_id,
                                         int index,
                                         enum gpiod_flags flags,
                                         const char *label)
{
        return gpiod_find_and_request(NULL, fwnode, con_id, index, flags, label, false);
}
EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index);

/**
 * gpiod_count - return the number of GPIOs associated with a device / function
 *                or -ENOENT if no GPIO has been assigned to the requested function
 * @dev:        GPIO consumer, can be NULL for system-global GPIOs
 * @con_id:        function within the GPIO consumer
 */
int gpiod_count(struct device *dev, const char *con_id)
{
        const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
        int count = -ENOENT;

        if (is_of_node(fwnode))
                count = of_gpio_count(fwnode, con_id);
        else if (is_acpi_node(fwnode))
                count = acpi_gpio_count(fwnode, con_id);
        else if (is_software_node(fwnode))
                count = swnode_gpio_count(fwnode, con_id);

        if (count < 0)
                count = platform_gpio_count(dev, con_id);

        return count;
}
EXPORT_SYMBOL_GPL(gpiod_count);

/**
 * gpiod_get - obtain a GPIO for a given GPIO function
 * @dev:        GPIO consumer, can be NULL for system-global GPIOs
 * @con_id:        function within the GPIO consumer
 * @flags:        optional GPIO initialization flags
 *
 * Return the GPIO descriptor corresponding to the function con_id of device
 * dev, -ENOENT if no GPIO has been assigned to the requested function, or
 * another IS_ERR() code if an error occurred while trying to acquire the GPIO.
 */
struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id,
                                         enum gpiod_flags flags)
{
        return gpiod_get_index(dev, con_id, 0, flags);
}
EXPORT_SYMBOL_GPL(gpiod_get);

/**
 * gpiod_get_optional - obtain an optional GPIO for a given GPIO function
 * @dev: GPIO consumer, can be NULL for system-global GPIOs
 * @con_id: function within the GPIO consumer
 * @flags: optional GPIO initialization flags
 *
 * This is equivalent to gpiod_get(), except that when no GPIO was assigned to
 * the requested function it will return NULL. This is convenient for drivers
 * that need to handle optional GPIOs.
 */
struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
                                                  const char *con_id,
                                                  enum gpiod_flags flags)
{
        return gpiod_get_index_optional(dev, con_id, 0, flags);
}
EXPORT_SYMBOL_GPL(gpiod_get_optional);


/**
 * gpiod_configure_flags - helper function to configure a given GPIO
 * @desc:        gpio whose value will be assigned
 * @con_id:        function within the GPIO consumer
 * @lflags:        bitmask of gpio_lookup_flags GPIO_* values - returned from
 *                of_find_gpio() or of_get_gpio_hog()
 * @dflags:        gpiod_flags - optional GPIO initialization flags
 *
 * Return 0 on success, -ENOENT if no GPIO has been assigned to the
 * requested function and/or index, or another IS_ERR() code if an error
 * occurred while trying to acquire the GPIO.
 */
int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
                unsigned long lflags, enum gpiod_flags dflags)
{
        const char *name = function_name_or_default(con_id);
        int ret;

        if (lflags & GPIO_ACTIVE_LOW)
                set_bit(FLAG_ACTIVE_LOW, &desc->flags);

        if (lflags & GPIO_OPEN_DRAIN)
                set_bit(FLAG_OPEN_DRAIN, &desc->flags);
        else if (dflags & GPIOD_FLAGS_BIT_OPEN_DRAIN) {
                /*
                 * This enforces open drain mode from the consumer side.
                 * This is necessary for some busses like I2C, but the lookup
                 * should *REALLY* have specified them as open drain in the
                 * first place, so print a little warning here.
                 */
                set_bit(FLAG_OPEN_DRAIN, &desc->flags);
                gpiod_warn(desc,
                           "enforced open drain please flag it properly in DT/ACPI DSDT/board file\n");
        }

        if (lflags & GPIO_OPEN_SOURCE)
                set_bit(FLAG_OPEN_SOURCE, &desc->flags);

        if (((lflags & GPIO_PULL_UP) && (lflags & GPIO_PULL_DOWN)) ||
            ((lflags & GPIO_PULL_UP) && (lflags & GPIO_PULL_DISABLE)) ||
            ((lflags & GPIO_PULL_DOWN) && (lflags & GPIO_PULL_DISABLE))) {
                gpiod_err(desc,
                          "multiple pull-up, pull-down or pull-disable enabled, invalid configuration\n");
                return -EINVAL;
        }

        if (lflags & GPIO_PULL_UP)
                set_bit(FLAG_PULL_UP, &desc->flags);
        else if (lflags & GPIO_PULL_DOWN)
                set_bit(FLAG_PULL_DOWN, &desc->flags);
        else if (lflags & GPIO_PULL_DISABLE)
                set_bit(FLAG_BIAS_DISABLE, &desc->flags);

        ret = gpiod_set_transitory(desc, (lflags & GPIO_TRANSITORY));
        if (ret < 0)
                return ret;

        /* No particular flag request, return here... */
        if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
                gpiod_dbg(desc, "no flags found for GPIO %s\n", name);
                return 0;
        }

        /* Process flags */
        if (dflags & GPIOD_FLAGS_BIT_DIR_OUT)
                ret = gpiod_direction_output(desc,
                                !!(dflags & GPIOD_FLAGS_BIT_DIR_VAL));
        else
                ret = gpiod_direction_input(desc);

        return ret;
}

/**
 * gpiod_get_index - obtain a GPIO from a multi-index GPIO function
 * @dev:        GPIO consumer, can be NULL for system-global GPIOs
 * @con_id:        function within the GPIO consumer
 * @idx:        index of the GPIO to obtain in the consumer
 * @flags:        optional GPIO initialization flags
 *
 * This variant of gpiod_get() allows to access GPIOs other than the first
 * defined one for functions that define several GPIOs.
 *
 * Return a valid GPIO descriptor, -ENOENT if no GPIO has been assigned to the
 * requested function and/or index, or another IS_ERR() code if an error
 * occurred while trying to acquire the GPIO.
 */
struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
                                               const char *con_id,
                                               unsigned int idx,
                                               enum gpiod_flags flags)
{
        struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
        const char *devname = dev ? dev_name(dev) : "?";
        const char *label = con_id ?: devname;

        return gpiod_find_and_request(dev, fwnode, con_id, idx, flags, label, true);
}
EXPORT_SYMBOL_GPL(gpiod_get_index);

/**
 * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO
 *                            function
 * @dev: GPIO consumer, can be NULL for system-global GPIOs
 * @con_id: function within the GPIO consumer
 * @index: index of the GPIO to obtain in the consumer
 * @flags: optional GPIO initialization flags
 *
 * This is equivalent to gpiod_get_index(), except that when no GPIO with the
 * specified index was assigned to the requested function it will return NULL.
 * This is convenient for drivers that need to handle optional GPIOs.
 */
struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
                                                        const char *con_id,
                                                        unsigned int index,
                                                        enum gpiod_flags flags)
{
        struct gpio_desc *desc;

        desc = gpiod_get_index(dev, con_id, index, flags);
        if (gpiod_not_found(desc))
                return NULL;

        return desc;
}
EXPORT_SYMBOL_GPL(gpiod_get_index_optional);

/**
 * gpiod_hog - Hog the specified GPIO desc given the provided flags
 * @desc:        gpio whose value will be assigned
 * @name:        gpio line name
 * @lflags:        bitmask of gpio_lookup_flags GPIO_* values - returned from
 *                of_find_gpio() or of_get_gpio_hog()
 * @dflags:        gpiod_flags - optional GPIO initialization flags
 */
int gpiod_hog(struct gpio_desc *desc, const char *name,
              unsigned long lflags, enum gpiod_flags dflags)
{
        struct gpio_device *gdev = desc->gdev;
        struct gpio_desc *local_desc;
        int hwnum;
        int ret;

        CLASS(gpio_chip_guard, guard)(desc);
        if (!guard.gc)
                return -ENODEV;

        if (test_and_set_bit(FLAG_IS_HOGGED, &desc->flags))
                return 0;

        hwnum = gpio_chip_hwgpio(desc);

        local_desc = gpiochip_request_own_desc(guard.gc, hwnum, name,
                                               lflags, dflags);
        if (IS_ERR(local_desc)) {
                clear_bit(FLAG_IS_HOGGED, &desc->flags);
                ret = PTR_ERR(local_desc);
                pr_err("requesting hog GPIO %s (chip %s, offset %d) failed, %d\n",
                       name, gdev->label, hwnum, ret);
                return ret;
        }

        gpiod_dbg(desc, "hogged as %s%s\n",
                (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? "output" : "input",
                (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ?
                  (dflags & GPIOD_FLAGS_BIT_DIR_VAL) ? "/high" : "/low" : "");

        return 0;
}

/**
 * gpiochip_free_hogs - Scan gpio-controller chip and release GPIO hog
 * @gc:        gpio chip to act on
 */
static void gpiochip_free_hogs(struct gpio_chip *gc)
{
        struct gpio_desc *desc;

        for_each_gpio_desc_with_flag(gc, desc, FLAG_IS_HOGGED)
                gpiochip_free_own_desc(desc);
}

/**
 * gpiod_get_array - obtain multiple GPIOs from a multi-index GPIO function
 * @dev:        GPIO consumer, can be NULL for system-global GPIOs
 * @con_id:        function within the GPIO consumer
 * @flags:        optional GPIO initialization flags
 *
 * This function acquires all the GPIOs defined under a given function.
 *
 * Return a struct gpio_descs containing an array of descriptors, -ENOENT if
 * no GPIO has been assigned to the requested function, or another IS_ERR()
 * code if an error occurred while trying to acquire the GPIOs.
 */
struct gpio_descs *__must_check gpiod_get_array(struct device *dev,
                                                const char *con_id,
                                                enum gpiod_flags flags)
{
        struct gpio_desc *desc;
        struct gpio_descs *descs;
        struct gpio_array *array_info = NULL;
        struct gpio_chip *gc;
        int count, bitmap_size;
        size_t descs_size;

        count = gpiod_count(dev, con_id);
        if (count < 0)
                return ERR_PTR(count);

        descs_size = struct_size(descs, desc, count);
        descs = kzalloc(descs_size, GFP_KERNEL);
        if (!descs)
                return ERR_PTR(-ENOMEM);

        for (descs->ndescs = 0; descs->ndescs < count; descs->ndescs++) {
                desc = gpiod_get_index(dev, con_id, descs->ndescs, flags);
                if (IS_ERR(desc)) {
                        gpiod_put_array(descs);
                        return ERR_CAST(desc);
                }

                descs->desc[descs->ndescs] = desc;

                gc = gpiod_to_chip(desc);
                /*
                 * If pin hardware number of array member 0 is also 0, select
                 * its chip as a candidate for fast bitmap processing path.
                 */
                if (descs->ndescs == 0 && gpio_chip_hwgpio(desc) == 0) {
                        struct gpio_descs *array;

                        bitmap_size = BITS_TO_LONGS(gc->ngpio > count ?
                                                    gc->ngpio : count);

                        array = krealloc(descs, descs_size +
                                         struct_size(array_info, invert_mask, 3 * bitmap_size),
                                         GFP_KERNEL | __GFP_ZERO);
                        if (!array) {
                                gpiod_put_array(descs);
                                return ERR_PTR(-ENOMEM);
                        }

                        descs = array;

                        array_info = (void *)descs + descs_size;
                        array_info->get_mask = array_info->invert_mask +
                                                  bitmap_size;
                        array_info->set_mask = array_info->get_mask +
                                                  bitmap_size;

                        array_info->desc = descs->desc;
                        array_info->size = count;
                        array_info->chip = gc;
                        bitmap_set(array_info->get_mask, descs->ndescs,
                                   count - descs->ndescs);
                        bitmap_set(array_info->set_mask, descs->ndescs,
                                   count - descs->ndescs);
                        descs->info = array_info;
                }

                /* If there is no cache for fast bitmap processing path, continue */
                if (!array_info)
                        continue;

                /* Unmark array members which don't belong to the 'fast' chip */
                if (array_info->chip != gc) {
                        __clear_bit(descs->ndescs, array_info->get_mask);
                        __clear_bit(descs->ndescs, array_info->set_mask);
                }
                /*
                 * Detect array members which belong to the 'fast' chip
                 * but their pins are not in hardware order.
                 */
                else if (gpio_chip_hwgpio(desc) != descs->ndescs) {
                        /*
                         * Don't use fast path if all array members processed so
                         * far belong to the same chip as this one but its pin
                         * hardware number is different from its array index.
                         */
                        if (bitmap_full(array_info->get_mask, descs->ndescs)) {
                                array_info = NULL;
                        } else {
                                __clear_bit(descs->ndescs,
                                            array_info->get_mask);
                                __clear_bit(descs->ndescs,
                                            array_info->set_mask);
                        }
                } else {
                        /* Exclude open drain or open source from fast output */
                        if (gpiochip_line_is_open_drain(gc, descs->ndescs) ||
                            gpiochip_line_is_open_source(gc, descs->ndescs))
                                __clear_bit(descs->ndescs,
                                            array_info->set_mask);
                        /* Identify 'fast' pins which require invertion */
                        if (gpiod_is_active_low(desc))
                                __set_bit(descs->ndescs,
                                          array_info->invert_mask);
                }
        }
        if (array_info)
                dev_dbg(dev,
                        "GPIO array info: chip=%s, size=%d, get_mask=%lx, set_mask=%lx, invert_mask=%lx\n",
                        array_info->chip->label, array_info->size,
                        *array_info->get_mask, *array_info->set_mask,
                        *array_info->invert_mask);
        return descs;
}
EXPORT_SYMBOL_GPL(gpiod_get_array);

/**
 * gpiod_get_array_optional - obtain multiple GPIOs from a multi-index GPIO
 *                            function
 * @dev:        GPIO consumer, can be NULL for system-global GPIOs
 * @con_id:        function within the GPIO consumer
 * @flags:        optional GPIO initialization flags
 *
 * This is equivalent to gpiod_get_array(), except that when no GPIO was
 * assigned to the requested function it will return NULL.
 */
struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev,
                                                        const char *con_id,
                                                        enum gpiod_flags flags)
{
        struct gpio_descs *descs;

        descs = gpiod_get_array(dev, con_id, flags);
        if (gpiod_not_found(descs))
                return NULL;

        return descs;
}
EXPORT_SYMBOL_GPL(gpiod_get_array_optional);

/**
 * gpiod_put - dispose of a GPIO descriptor
 * @desc:        GPIO descriptor to dispose of
 *
 * No descriptor can be used after gpiod_put() has been called on it.
 */
void gpiod_put(struct gpio_desc *desc)
{
        if (desc)
                gpiod_free(desc);
}
EXPORT_SYMBOL_GPL(gpiod_put);

/**
 * gpiod_put_array - dispose of multiple GPIO descriptors
 * @descs:        struct gpio_descs containing an array of descriptors
 */
void gpiod_put_array(struct gpio_descs *descs)
{
        unsigned int i;

        for (i = 0; i < descs->ndescs; i++)
                gpiod_put(descs->desc[i]);

        kfree(descs);
}
EXPORT_SYMBOL_GPL(gpiod_put_array);

static int gpio_stub_drv_probe(struct device *dev)
{
        /*
         * The DT node of some GPIO chips have a "compatible" property, but
         * never have a struct device added and probed by a driver to register
         * the GPIO chip with gpiolib. In such cases, fw_devlink=on will cause
         * the consumers of the GPIO chip to get probe deferred forever because
         * they will be waiting for a device associated with the GPIO chip
         * firmware node to get added and bound to a driver.
         *
         * To allow these consumers to probe, we associate the struct
         * gpio_device of the GPIO chip with the firmware node and then simply
         * bind it to this stub driver.
         */
        return 0;
}

static struct device_driver gpio_stub_drv = {
        .name = "gpio_stub_drv",
        .bus = &gpio_bus_type,
        .probe = gpio_stub_drv_probe,
};

static int __init gpiolib_dev_init(void)
{
        int ret;

        /* Register GPIO sysfs bus */
        ret = bus_register(&gpio_bus_type);
        if (ret < 0) {
                pr_err("gpiolib: could not register GPIO bus type\n");
                return ret;
        }

        ret = driver_register(&gpio_stub_drv);
        if (ret < 0) {
                pr_err("gpiolib: could not register GPIO stub driver\n");
                bus_unregister(&gpio_bus_type);
                return ret;
        }

        ret = alloc_chrdev_region(&gpio_devt, 0, GPIO_DEV_MAX, GPIOCHIP_NAME);
        if (ret < 0) {
                pr_err("gpiolib: failed to allocate char dev region\n");
                driver_unregister(&gpio_stub_drv);
                bus_unregister(&gpio_bus_type);
                return ret;
        }

        gpiolib_initialized = true;
        gpiochip_setup_devs();

#if IS_ENABLED(CONFIG_OF_DYNAMIC) && IS_ENABLED(CONFIG_OF_GPIO)
        WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier));
#endif /* CONFIG_OF_DYNAMIC && CONFIG_OF_GPIO */

        return ret;
}
core_initcall(gpiolib_dev_init);

#ifdef CONFIG_DEBUG_FS

static void gpiolib_dbg_show(struct seq_file *s, struct gpio_device *gdev)
{
        bool active_low, is_irq, is_out;
        unsigned int gpio = gdev->base;
        struct gpio_desc *desc;
        struct gpio_chip *gc;
        int value;

        guard(srcu)(&gdev->srcu);

        gc = srcu_dereference(gdev->chip, &gdev->srcu);
        if (!gc) {
                seq_puts(s, "Underlying GPIO chip is gone\n");
                return;
        }

        for_each_gpio_desc(gc, desc) {
                guard(srcu)(&desc->srcu);
                if (test_bit(FLAG_REQUESTED, &desc->flags)) {
                        gpiod_get_direction(desc);
                        is_out = test_bit(FLAG_IS_OUT, &desc->flags);
                        value = gpio_chip_get_value(gc, desc);
                        is_irq = test_bit(FLAG_USED_AS_IRQ, &desc->flags);
                        active_low = test_bit(FLAG_ACTIVE_LOW, &desc->flags);
                        seq_printf(s, " gpio-%-3d (%-20.20s|%-20.20s) %s %s %s%s\n",
                                   gpio, desc->name ?: "", gpiod_get_label(desc),
                                   is_out ? "out" : "in ",
                                   value >= 0 ? (value ? "hi" : "lo") : "?  ",
                                   is_irq ? "IRQ " : "",
                                   active_low ? "ACTIVE LOW" : "");
                } else if (desc->name) {
                        seq_printf(s, " gpio-%-3d (%-20.20s)\n", gpio, desc->name);
                }

                gpio++;
        }
}

struct gpiolib_seq_priv {
        bool newline;
        int idx;
};

static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos)
{
        struct gpiolib_seq_priv *priv;
        struct gpio_device *gdev;
        loff_t index = *pos;

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return NULL;

        s->private = priv;
        priv->idx = srcu_read_lock(&gpio_devices_srcu);

        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 srcu_read_lock_held(&gpio_devices_srcu)) {
                if (index-- == 0)
                        return gdev;
        }

        return NULL;
}

static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
        struct gpiolib_seq_priv *priv = s->private;
        struct gpio_device *gdev = v, *next;

        next = list_entry_rcu(gdev->list.next, struct gpio_device, list);
        gdev = &next->list == &gpio_devices ? NULL : next;
        priv->newline = true;
        ++*pos;

        return gdev;
}

static void gpiolib_seq_stop(struct seq_file *s, void *v)
{
        struct gpiolib_seq_priv *priv = s->private;

        srcu_read_unlock(&gpio_devices_srcu, priv->idx);
        kfree(priv);
}

static int gpiolib_seq_show(struct seq_file *s, void *v)
{
        struct gpiolib_seq_priv *priv = s->private;
        struct gpio_device *gdev = v;
        struct gpio_chip *gc;
        struct device *parent;

        guard(srcu)(&gdev->srcu);

        gc = srcu_dereference(gdev->chip, &gdev->srcu);
        if (!gc) {
                seq_printf(s, "%s%s: (dangling chip)",
                           priv->newline ? "\n" : "",
                           dev_name(&gdev->dev));
                return 0;
        }

        seq_printf(s, "%s%s: GPIOs %d-%d", priv->newline ? "\n" : "",
                   dev_name(&gdev->dev),
                   gdev->base, gdev->base + gdev->ngpio - 1);
        parent = gc->parent;
        if (parent)
                seq_printf(s, ", parent: %s/%s",
                           parent->bus ? parent->bus->name : "no-bus",
                           dev_name(parent));
        if (gc->label)
                seq_printf(s, ", %s", gc->label);
        if (gc->can_sleep)
                seq_printf(s, ", can sleep");
        seq_printf(s, ":\n");

        if (gc->dbg_show)
                gc->dbg_show(s, gc);
        else
                gpiolib_dbg_show(s, gdev);

        return 0;
}

static const struct seq_operations gpiolib_sops = {
        .start = gpiolib_seq_start,
        .next = gpiolib_seq_next,
        .stop = gpiolib_seq_stop,
        .show = gpiolib_seq_show,
};
DEFINE_SEQ_ATTRIBUTE(gpiolib);

static int __init gpiolib_debugfs_init(void)
{
        /* /sys/kernel/debug/gpio */
        debugfs_create_file("gpio", 0444, NULL, NULL, &gpiolib_fops);
        return 0;
}
subsys_initcall(gpiolib_debugfs_init);

#endif        /* DEBUG_FS */
































   19 

   19 







































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Sound core.  This file is composed of two parts.  sound_class
 *        which is common to both OSS and ALSA and OSS sound core which
 *        is used OSS or emulation of it.
 */

/*
 * First, the common part.
 */
#include <linux/module.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/kdev_t.h>
#include <linux/major.h>
#include <sound/core.h>

#ifdef CONFIG_SOUND_OSS_CORE
static int __init init_oss_soundcore(void);
static void cleanup_oss_soundcore(void);
#else
static inline int init_oss_soundcore(void)        { return 0; }
static inline void cleanup_oss_soundcore(void)        { }
#endif

MODULE_DESCRIPTION("Core sound module");
MODULE_AUTHOR("Alan Cox");
MODULE_LICENSE("GPL");

static char *sound_devnode(const struct device *dev, umode_t *mode)
{
        if (MAJOR(dev->devt) == SOUND_MAJOR)
                return NULL;
        return kasprintf(GFP_KERNEL, "snd/%s", dev_name(dev));
}

const struct class sound_class = {
        .name = "sound",
        .devnode = sound_devnode,
};
EXPORT_SYMBOL(sound_class);

static int __init init_soundcore(void)
{
        int rc;

        rc = init_oss_soundcore();
        if (rc)
                return rc;

        rc = class_register(&sound_class);
        if (rc) {
                cleanup_oss_soundcore();
                return rc;
        }

        return 0;
}

static void __exit cleanup_soundcore(void)
{
        cleanup_oss_soundcore();
        class_unregister(&sound_class);
}

subsys_initcall(init_soundcore);
module_exit(cleanup_soundcore);


#ifdef CONFIG_SOUND_OSS_CORE
/*
 *        OSS sound core handling. Breaks out sound functions to submodules
 *        
 *        Author:                Alan Cox <alan@lxorguk.ukuu.org.uk>
 *
 *        Fixes:
 *
 *                         --------------------
 * 
 *        Top level handler for the sound subsystem. Various devices can
 *        plug into this. The fact they don't all go via OSS doesn't mean 
 *        they don't have to implement the OSS API. There is a lot of logic
 *        to keeping much of the OSS weight out of the code in a compatibility
 *        module, but it's up to the driver to rember to load it...
 *
 *        The code provides a set of functions for registration of devices
 *        by type. This is done rather than providing a single call so that
 *        we can hide any future changes in the internals (eg when we go to
 *        32bit dev_t) from the modules and their interface.
 *
 *        Secondly we need to allocate the dsp, dsp16 and audio devices as
 *        one. Thus we misuse the chains a bit to simplify this.
 *
 *        Thirdly to make it more fun and for 2.3.x and above we do all
 *        of this using fine grained locking.
 *
 *        FIXME: we have to resolve modules and fine grained load/unload
 *        locking at some point in 2.3.x.
 */

#include <linux/init.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sound.h>
#include <linux/kmod.h>

#define SOUND_STEP 16

struct sound_unit
{
        int unit_minor;
        const struct file_operations *unit_fops;
        struct sound_unit *next;
        char name[32];
};

/*
 * By default, OSS sound_core claims full legacy minor range (0-255)
 * of SOUND_MAJOR to trap open attempts to any sound minor and
 * requests modules using custom sound-slot/service-* module aliases.
 * The only benefit of doing this is allowing use of custom module
 * aliases instead of the standard char-major-* ones.  This behavior
 * prevents alternative OSS implementation and is scheduled to be
 * removed.
 *
 * CONFIG_SOUND_OSS_CORE_PRECLAIM and soundcore.preclaim_oss kernel
 * parameter are added to allow distros and developers to try and
 * switch to alternative implementations without needing to rebuild
 * the kernel in the meantime.  If preclaim_oss is non-zero, the
 * kernel will behave the same as before.  All SOUND_MAJOR minors are
 * preclaimed and the custom module aliases along with standard chrdev
 * ones are emitted if a missing device is opened.  If preclaim_oss is
 * zero, sound_core only grabs what's actually in use and for missing
 * devices only the standard chrdev aliases are requested.
 *
 * All these clutters are scheduled to be removed along with
 * sound-slot/service-* module aliases.
 */
static int preclaim_oss = IS_ENABLED(CONFIG_SOUND_OSS_CORE_PRECLAIM);

module_param(preclaim_oss, int, 0444);

static int soundcore_open(struct inode *, struct file *);

static const struct file_operations soundcore_fops =
{
        /* We must have an owner or the module locking fails */
        .owner        = THIS_MODULE,
        .open        = soundcore_open,
        .llseek = noop_llseek,
};

/*
 *        Low level list operator. Scan the ordered list, find a hole and
 *        join into it. Called with the lock asserted
 */

static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, const struct file_operations *fops, int index, int low, int top)
{
        int n=low;

        if (index < 0) {        /* first free */

                while (*list && (*list)->unit_minor<n)
                        list=&((*list)->next);

                while(n<top)
                {
                        /* Found a hole ? */
                        if(*list==NULL || (*list)->unit_minor>n)
                                break;
                        list=&((*list)->next);
                        n+=SOUND_STEP;
                }

                if(n>=top)
                        return -ENOENT;
        } else {
                n = low+(index*16);
                while (*list) {
                        if ((*list)->unit_minor==n)
                                return -EBUSY;
                        if ((*list)->unit_minor>n)
                                break;
                        list=&((*list)->next);
                }
        }        
                
        /*
         *        Fill it in
         */
         
        s->unit_minor=n;
        s->unit_fops=fops;
        
        /*
         *        Link it
         */
         
        s->next=*list;
        *list=s;
        
        
        return n;
}

/*
 *        Remove a node from the chain. Called with the lock asserted
 */
 
static struct sound_unit *__sound_remove_unit(struct sound_unit **list, int unit)
{
        while(*list)
        {
                struct sound_unit *p=*list;
                if(p->unit_minor==unit)
                {
                        *list=p->next;
                        return p;
                }
                list=&(p->next);
        }
        printk(KERN_ERR "Sound device %d went missing!\n", unit);
        return NULL;
}

/*
 *        This lock guards the sound loader list.
 */

static DEFINE_SPINLOCK(sound_loader_lock);

/*
 *        Allocate the controlling structure and add it to the sound driver
 *        list. Acquires locks as needed
 */

static int sound_insert_unit(struct sound_unit **list, const struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev)
{
        struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL);
        int r;

        if (!s)
                return -ENOMEM;

        spin_lock(&sound_loader_lock);
retry:
        r = __sound_insert_unit(s, list, fops, index, low, top);
        spin_unlock(&sound_loader_lock);
        
        if (r < 0)
                goto fail;
        else if (r < SOUND_STEP)
                sprintf(s->name, "sound/%s", name);
        else
                sprintf(s->name, "sound/%s%d", name, r / SOUND_STEP);

        if (!preclaim_oss) {
                /*
                 * Something else might have grabbed the minor.  If
                 * first free slot is requested, rescan with @low set
                 * to the next unit; otherwise, -EBUSY.
                 */
                r = __register_chrdev(SOUND_MAJOR, s->unit_minor, 1, s->name,
                                      &soundcore_fops);
                if (r < 0) {
                        spin_lock(&sound_loader_lock);
                        __sound_remove_unit(list, s->unit_minor);
                        if (index < 0) {
                                low = s->unit_minor + SOUND_STEP;
                                goto retry;
                        }
                        spin_unlock(&sound_loader_lock);
                        r = -EBUSY;
                        goto fail;
                }
        }

        device_create(&sound_class, dev, MKDEV(SOUND_MAJOR, s->unit_minor),
                      NULL, "%s", s->name+6);
        return s->unit_minor;

fail:
        kfree(s);
        return r;
}

/*
 *        Remove a unit. Acquires locks as needed. The drivers MUST have
 *        completed the removal before their file operations become
 *        invalid.
 */
         
static void sound_remove_unit(struct sound_unit **list, int unit)
{
        struct sound_unit *p;

        spin_lock(&sound_loader_lock);
        p = __sound_remove_unit(list, unit);
        spin_unlock(&sound_loader_lock);
        if (p) {
                if (!preclaim_oss)
                        __unregister_chrdev(SOUND_MAJOR, p->unit_minor, 1,
                                            p->name);
                device_destroy(&sound_class, MKDEV(SOUND_MAJOR, p->unit_minor));
                kfree(p);
        }
}

/*
 *        Allocations
 *
 *        0        *16                Mixers
 *        1        *8                Sequencers
 *        2        *16                Midi
 *        3        *16                DSP
 *        4        *16                SunDSP
 *        5        *16                DSP16
 *        6        --                sndstat (obsolete)
 *        7        *16                unused
 *        8        --                alternate sequencer (see above)
 *        9        *16                raw synthesizer access
 *        10        *16                unused
 *        11        *16                unused
 *        12        *16                unused
 *        13        *16                unused
 *        14        *16                unused
 *        15        *16                unused
 */

static struct sound_unit *chains[SOUND_STEP];

/**
 *        register_sound_special_device - register a special sound node
 *        @fops: File operations for the driver
 *        @unit: Unit number to allocate
 *      @dev: device pointer
 *
 *        Allocate a special sound device by minor number from the sound
 *        subsystem.
 *
 *        Return: The allocated number is returned on success. On failure,
 *        a negative error code is returned.
 */
 
int register_sound_special_device(const struct file_operations *fops, int unit,
                                  struct device *dev)
{
        const int chain = unit % SOUND_STEP;
        int max_unit = 256;
        const char *name;
        char _name[16];

        switch (chain) {
            case 0:
                name = "mixer";
                break;
            case 1:
                name = "sequencer";
                if (unit >= SOUND_STEP)
                        goto __unknown;
                max_unit = unit + 1;
                break;
            case 2:
                name = "midi";
                break;
            case 3:
                name = "dsp";
                break;
            case 4:
                name = "audio";
                break;
            case 5:
                name = "dspW";
                break;
            case 8:
                name = "sequencer2";
                if (unit >= SOUND_STEP)
                        goto __unknown;
                max_unit = unit + 1;
                break;
            case 9:
                name = "dmmidi";
                break;
            case 10:
                name = "dmfm";
                break;
            case 12:
                name = "adsp";
                break;
            case 13:
                name = "amidi";
                break;
            case 14:
                name = "admmidi";
                break;
            default:
                    {
                    __unknown:
                        sprintf(_name, "unknown%d", chain);
                            if (unit >= SOUND_STEP)
                                    strcat(_name, "-");
                            name = _name;
                }
                break;
        }
        return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit,
                                 name, 0600, dev);
}
 
EXPORT_SYMBOL(register_sound_special_device);

int register_sound_special(const struct file_operations *fops, int unit)
{
        return register_sound_special_device(fops, unit, NULL);
}

EXPORT_SYMBOL(register_sound_special);

/**
 *        register_sound_mixer - register a mixer device
 *        @fops: File operations for the driver
 *        @dev: Unit number to allocate
 *
 *        Allocate a mixer device. Unit is the number of the mixer requested.
 *        Pass -1 to request the next free mixer unit.
 *
 *        Return: On success, the allocated number is returned. On failure,
 *        a negative error code is returned.
 */

int register_sound_mixer(const struct file_operations *fops, int dev)
{
        return sound_insert_unit(&chains[0], fops, dev, 0, 128,
                                 "mixer", 0600, NULL);
}

EXPORT_SYMBOL(register_sound_mixer);

/*
 *        DSP's are registered as a triple. Register only one and cheat
 *        in open - see below.
 */
 
/**
 *        register_sound_dsp - register a DSP device
 *        @fops: File operations for the driver
 *        @dev: Unit number to allocate
 *
 *        Allocate a DSP device. Unit is the number of the DSP requested.
 *        Pass -1 to request the next free DSP unit.
 *
 *        This function allocates both the audio and dsp device entries together
 *        and will always allocate them as a matching pair - eg dsp3/audio3
 *
 *        Return: On success, the allocated number is returned. On failure,
 *        a negative error code is returned.
 */

int register_sound_dsp(const struct file_operations *fops, int dev)
{
        return sound_insert_unit(&chains[3], fops, dev, 3, 131,
                                 "dsp", 0600, NULL);
}

EXPORT_SYMBOL(register_sound_dsp);

/**
 *        unregister_sound_special - unregister a special sound device
 *        @unit: unit number to allocate
 *
 *        Release a sound device that was allocated with
 *        register_sound_special(). The unit passed is the return value from
 *        the register function.
 */


void unregister_sound_special(int unit)
{
        sound_remove_unit(&chains[unit % SOUND_STEP], unit);
}
 
EXPORT_SYMBOL(unregister_sound_special);

/**
 *        unregister_sound_mixer - unregister a mixer
 *        @unit: unit number to allocate
 *
 *        Release a sound device that was allocated with register_sound_mixer().
 *        The unit passed is the return value from the register function.
 */

void unregister_sound_mixer(int unit)
{
        sound_remove_unit(&chains[0], unit);
}

EXPORT_SYMBOL(unregister_sound_mixer);

/**
 *        unregister_sound_dsp - unregister a DSP device
 *        @unit: unit number to allocate
 *
 *        Release a sound device that was allocated with register_sound_dsp().
 *        The unit passed is the return value from the register function.
 *
 *        Both of the allocated units are released together automatically.
 */

void unregister_sound_dsp(int unit)
{
        sound_remove_unit(&chains[3], unit);
}


EXPORT_SYMBOL(unregister_sound_dsp);

static struct sound_unit *__look_for_unit(int chain, int unit)
{
        struct sound_unit *s;
        
        s=chains[chain];
        while(s && s->unit_minor <= unit)
        {
                if(s->unit_minor==unit)
                        return s;
                s=s->next;
        }
        return NULL;
}

static int soundcore_open(struct inode *inode, struct file *file)
{
        int chain;
        int unit = iminor(inode);
        struct sound_unit *s;
        const struct file_operations *new_fops = NULL;

        chain=unit&0x0F;
        if(chain==4 || chain==5)        /* dsp/audio/dsp16 */
        {
                unit&=0xF0;
                unit|=3;
                chain=3;
        }
        
        spin_lock(&sound_loader_lock);
        s = __look_for_unit(chain, unit);
        if (s)
                new_fops = fops_get(s->unit_fops);
        if (preclaim_oss && !new_fops) {
                spin_unlock(&sound_loader_lock);

                /*
                 *  Please, don't change this order or code.
                 *  For ALSA slot means soundcard and OSS emulation code
                 *  comes as add-on modules which aren't depend on
                 *  ALSA toplevel modules for soundcards, thus we need
                 *  load them at first.          [Jaroslav Kysela <perex@jcu.cz>]
                 */
                request_module("sound-slot-%i", unit>>4);
                request_module("sound-service-%i-%i", unit>>4, chain);

                /*
                 * sound-slot/service-* module aliases are scheduled
                 * for removal in favor of the standard char-major-*
                 * module aliases.  For the time being, generate both
                 * the legacy and standard module aliases to ease
                 * transition.
                 */
                if (request_module("char-major-%d-%d", SOUND_MAJOR, unit) > 0)
                        request_module("char-major-%d", SOUND_MAJOR);

                spin_lock(&sound_loader_lock);
                s = __look_for_unit(chain, unit);
                if (s)
                        new_fops = fops_get(s->unit_fops);
        }
        spin_unlock(&sound_loader_lock);

        if (!new_fops)
                return -ENODEV;

        /*
         * We rely upon the fact that we can't be unloaded while the
         * subdriver is there.
         */
        replace_fops(file, new_fops);

        if (!file->f_op->open)
                return -ENODEV;

        return file->f_op->open(inode, file);
}

MODULE_ALIAS_CHARDEV_MAJOR(SOUND_MAJOR);

static void cleanup_oss_soundcore(void)
{
        /* We have nothing to really do here - we know the lists must be
           empty */
        unregister_chrdev(SOUND_MAJOR, "sound");
}

static int __init init_oss_soundcore(void)
{
        if (preclaim_oss &&
            register_chrdev(SOUND_MAJOR, "sound", &soundcore_fops) < 0) {
                printk(KERN_ERR "soundcore: sound device already in use.\n");
                return -EBUSY;
        }

        return 0;
}

#endif /* CONFIG_SOUND_OSS_CORE */

























































































































































    9 




    9 















































    9 





    9 




















































































    9 
    9 

    9 

    9 





    9 









    9 
    9 
    9 

    9 

    9 






    9 








    9 





































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/stat.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/hash.h>
#include <linux/kmemleak.h>
#include <linux/user_namespace.h>

struct ucounts init_ucounts = {
        .ns    = &init_user_ns,
        .uid   = GLOBAL_ROOT_UID,
        .count = ATOMIC_INIT(1),
};

#define UCOUNTS_HASHTABLE_BITS 10
static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
static DEFINE_SPINLOCK(ucounts_lock);

#define ucounts_hashfn(ns, uid)                                                \
        hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
                  UCOUNTS_HASHTABLE_BITS)
#define ucounts_hashentry(ns, uid)        \
        (ucounts_hashtable + ucounts_hashfn(ns, uid))


#ifdef CONFIG_SYSCTL
static struct ctl_table_set *
set_lookup(struct ctl_table_root *root)
{
        return &current_user_ns()->set;
}

static int set_is_seen(struct ctl_table_set *set)
{
        return &current_user_ns()->set == set;
}

static int set_permissions(struct ctl_table_header *head,
                                  struct ctl_table *table)
{
        struct user_namespace *user_ns =
                container_of(head->set, struct user_namespace, set);
        int mode;

        /* Allow users with CAP_SYS_RESOURCE unrestrained access */
        if (ns_capable(user_ns, CAP_SYS_RESOURCE))
                mode = (table->mode & S_IRWXU) >> 6;
        else
        /* Allow all others at most read-only access */
                mode = table->mode & S_IROTH;
        return (mode << 6) | (mode << 3) | mode;
}

static struct ctl_table_root set_root = {
        .lookup = set_lookup,
        .permissions = set_permissions,
};

static long ue_zero = 0;
static long ue_int_max = INT_MAX;

#define UCOUNT_ENTRY(name)                                        \
        {                                                        \
                .procname        = name,                                \
                .maxlen                = sizeof(long),                        \
                .mode                = 0644,                                \
                .proc_handler        = proc_doulongvec_minmax,        \
                .extra1                = &ue_zero,                        \
                .extra2                = &ue_int_max,                        \
        }
static struct ctl_table user_table[] = {
        UCOUNT_ENTRY("max_user_namespaces"),
        UCOUNT_ENTRY("max_pid_namespaces"),
        UCOUNT_ENTRY("max_uts_namespaces"),
        UCOUNT_ENTRY("max_ipc_namespaces"),
        UCOUNT_ENTRY("max_net_namespaces"),
        UCOUNT_ENTRY("max_mnt_namespaces"),
        UCOUNT_ENTRY("max_cgroup_namespaces"),
        UCOUNT_ENTRY("max_time_namespaces"),
#ifdef CONFIG_INOTIFY_USER
        UCOUNT_ENTRY("max_inotify_instances"),
        UCOUNT_ENTRY("max_inotify_watches"),
#endif
#ifdef CONFIG_FANOTIFY
        UCOUNT_ENTRY("max_fanotify_groups"),
        UCOUNT_ENTRY("max_fanotify_marks"),
#endif
        { }
};
#endif /* CONFIG_SYSCTL */

bool setup_userns_sysctls(struct user_namespace *ns)
{
#ifdef CONFIG_SYSCTL
        struct ctl_table *tbl;

        BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
        setup_sysctl_set(&ns->set, &set_root, set_is_seen);
        tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
        if (tbl) {
                int i;
                for (i = 0; i < UCOUNT_COUNTS; i++) {
                        tbl[i].data = &ns->ucount_max[i];
                }
                ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl,
                                                      ARRAY_SIZE(user_table));
        }
        if (!ns->sysctls) {
                kfree(tbl);
                retire_sysctl_set(&ns->set);
                return false;
        }
#endif
        return true;
}

void retire_userns_sysctls(struct user_namespace *ns)
{
#ifdef CONFIG_SYSCTL
        struct ctl_table *tbl;

        tbl = ns->sysctls->ctl_table_arg;
        unregister_sysctl_table(ns->sysctls);
        retire_sysctl_set(&ns->set);
        kfree(tbl);
#endif
}

static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
{
        struct ucounts *ucounts;

        hlist_for_each_entry(ucounts, hashent, node) {
                if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
                        return ucounts;
        }
        return NULL;
}

static void hlist_add_ucounts(struct ucounts *ucounts)
{
        struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
        spin_lock_irq(&ucounts_lock);
        hlist_add_head(&ucounts->node, hashent);
        spin_unlock_irq(&ucounts_lock);
}

static inline bool get_ucounts_or_wrap(struct ucounts *ucounts)
{
        /* Returns true on a successful get, false if the count wraps. */
        return !atomic_add_negative(1, &ucounts->count);
}

struct ucounts *get_ucounts(struct ucounts *ucounts)
{
        if (!get_ucounts_or_wrap(ucounts)) {
                put_ucounts(ucounts);
                ucounts = NULL;
        }
        return ucounts;
}

struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
{
        struct hlist_head *hashent = ucounts_hashentry(ns, uid);
        struct ucounts *ucounts, *new;
        bool wrapped;

        spin_lock_irq(&ucounts_lock);
        ucounts = find_ucounts(ns, uid, hashent);
        if (!ucounts) {
                spin_unlock_irq(&ucounts_lock);

                new = kzalloc(sizeof(*new), GFP_KERNEL);
                if (!new)
                        return NULL;

                new->ns = ns;
                new->uid = uid;
                atomic_set(&new->count, 1);

                spin_lock_irq(&ucounts_lock);
                ucounts = find_ucounts(ns, uid, hashent);
                if (ucounts) {
                        kfree(new);
                } else {
                        hlist_add_head(&new->node, hashent);
                        get_user_ns(new->ns);
                        spin_unlock_irq(&ucounts_lock);
                        return new;
                }
        }
        wrapped = !get_ucounts_or_wrap(ucounts);
        spin_unlock_irq(&ucounts_lock);
        if (wrapped) {
                put_ucounts(ucounts);
                return NULL;
        }
        return ucounts;
}

void put_ucounts(struct ucounts *ucounts)
{
        unsigned long flags;

        if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
                hlist_del_init(&ucounts->node);
                spin_unlock_irqrestore(&ucounts_lock, flags);
                put_user_ns(ucounts->ns);
                kfree(ucounts);
        }
}

static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
{
        long c, old;
        c = atomic_long_read(v);
        for (;;) {
                if (unlikely(c >= u))
                        return false;
                old = atomic_long_cmpxchg(v, c, c+1);
                if (likely(old == c))
                        return true;
                c = old;
        }
}

struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
                           enum ucount_type type)
{
        struct ucounts *ucounts, *iter, *bad;
        struct user_namespace *tns;
        ucounts = alloc_ucounts(ns, uid);
        for (iter = ucounts; iter; iter = tns->ucounts) {
                long max;
                tns = iter->ns;
                max = READ_ONCE(tns->ucount_max[type]);
                if (!atomic_long_inc_below(&iter->ucount[type], max))
                        goto fail;
        }
        return ucounts;
fail:
        bad = iter;
        for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
                atomic_long_dec(&iter->ucount[type]);

        put_ucounts(ucounts);
        return NULL;
}

void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
{
        struct ucounts *iter;
        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
                long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
                WARN_ON_ONCE(dec < 0);
        }
        put_ucounts(ucounts);
}

long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v)
{
        struct ucounts *iter;
        long max = LONG_MAX;
        long ret = 0;

        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
                long new = atomic_long_add_return(v, &iter->rlimit[type]);
                if (new < 0 || new > max)
                        ret = LONG_MAX;
                else if (iter == ucounts)
                        ret = new;
                max = get_userns_rlimit_max(iter->ns, type);
        }
        return ret;
}

bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v)
{
        struct ucounts *iter;
        long new = -1; /* Silence compiler warning */
        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
                long dec = atomic_long_sub_return(v, &iter->rlimit[type]);
                WARN_ON_ONCE(dec < 0);
                if (iter == ucounts)
                        new = dec;
        }
        return (new == 0);
}

static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
                                struct ucounts *last, enum rlimit_type type)
{
        struct ucounts *iter, *next;
        for (iter = ucounts; iter != last; iter = next) {
                long dec = atomic_long_sub_return(1, &iter->rlimit[type]);
                WARN_ON_ONCE(dec < 0);
                next = iter->ns->ucounts;
                if (dec == 0)
                        put_ucounts(iter);
        }
}

void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type)
{
        do_dec_rlimit_put_ucounts(ucounts, NULL, type);
}

long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type)
{
        /* Caller must hold a reference to ucounts */
        struct ucounts *iter;
        long max = LONG_MAX;
        long dec, ret = 0;

        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
                long new = atomic_long_add_return(1, &iter->rlimit[type]);
                if (new < 0 || new > max)
                        goto unwind;
                if (iter == ucounts)
                        ret = new;
                max = get_userns_rlimit_max(iter->ns, type);
                /*
                 * Grab an extra ucount reference for the caller when
                 * the rlimit count was previously 0.
                 */
                if (new != 1)
                        continue;
                if (!get_ucounts(iter))
                        goto dec_unwind;
        }
        return ret;
dec_unwind:
        dec = atomic_long_sub_return(1, &iter->rlimit[type]);
        WARN_ON_ONCE(dec < 0);
unwind:
        do_dec_rlimit_put_ucounts(ucounts, iter, type);
        return 0;
}

bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long rlimit)
{
        struct ucounts *iter;
        long max = rlimit;
        if (rlimit > LONG_MAX)
                max = LONG_MAX;
        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
                long val = get_rlimit_value(iter, type);
                if (val < 0 || val > max)
                        return true;
                max = get_userns_rlimit_max(iter->ns, type);
        }
        return false;
}

static __init int user_namespace_sysctl_init(void)
{
#ifdef CONFIG_SYSCTL
        static struct ctl_table_header *user_header;
        static struct ctl_table empty[1];
        /*
         * It is necessary to register the user directory in the
         * default set so that registrations in the child sets work
         * properly.
         */
        user_header = register_sysctl_sz("user", empty, 0);
        kmemleak_ignore(user_header);
        BUG_ON(!user_header);
        BUG_ON(!setup_userns_sysctls(&init_user_ns));
#endif
        hlist_add_ucounts(&init_ucounts);
        inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
        return 0;
}
subsys_initcall(user_namespace_sysctl_init);



















































































































































































    1 


    1 









    1 


















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
// SPDX-License-Identifier: GPL-2.0 or MIT
/*
 * Copyright 2018 Noralf Trønnes
 */

#include <linux/iosys-map.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
#include <linux/slab.h>

#include <drm/drm_client.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_framebuffer.h>
#include <drm/drm_gem.h>
#include <drm/drm_mode.h>
#include <drm/drm_print.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

/**
 * DOC: overview
 *
 * This library provides support for clients running in the kernel like fbdev and bootsplash.
 *
 * GEM drivers which provide a GEM based dumb buffer with a virtual address are supported.
 */

static int drm_client_open(struct drm_client_dev *client)
{
        struct drm_device *dev = client->dev;
        struct drm_file *file;

        file = drm_file_alloc(dev->primary);
        if (IS_ERR(file))
                return PTR_ERR(file);

        mutex_lock(&dev->filelist_mutex);
        list_add(&file->lhead, &dev->filelist_internal);
        mutex_unlock(&dev->filelist_mutex);

        client->file = file;

        return 0;
}

static void drm_client_close(struct drm_client_dev *client)
{
        struct drm_device *dev = client->dev;

        mutex_lock(&dev->filelist_mutex);
        list_del(&client->file->lhead);
        mutex_unlock(&dev->filelist_mutex);

        drm_file_free(client->file);
}

/**
 * drm_client_init - Initialise a DRM client
 * @dev: DRM device
 * @client: DRM client
 * @name: Client name
 * @funcs: DRM client functions (optional)
 *
 * This initialises the client and opens a &drm_file.
 * Use drm_client_register() to complete the process.
 * The caller needs to hold a reference on @dev before calling this function.
 * The client is freed when the &drm_device is unregistered. See drm_client_release().
 *
 * Returns:
 * Zero on success or negative error code on failure.
 */
int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
                    const char *name, const struct drm_client_funcs *funcs)
{
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET) || !dev->driver->dumb_create)
                return -EOPNOTSUPP;

        client->dev = dev;
        client->name = name;
        client->funcs = funcs;

        ret = drm_client_modeset_create(client);
        if (ret)
                return ret;

        ret = drm_client_open(client);
        if (ret)
                goto err_free;

        drm_dev_get(dev);

        return 0;

err_free:
        drm_client_modeset_free(client);
        return ret;
}
EXPORT_SYMBOL(drm_client_init);

/**
 * drm_client_register - Register client
 * @client: DRM client
 *
 * Add the client to the &drm_device client list to activate its callbacks.
 * @client must be initialized by a call to drm_client_init(). After
 * drm_client_register() it is no longer permissible to call drm_client_release()
 * directly (outside the unregister callback), instead cleanup will happen
 * automatically on driver unload.
 *
 * Registering a client generates a hotplug event that allows the client
 * to set up its display from pre-existing outputs. The client must have
 * initialized its state to able to handle the hotplug event successfully.
 */
void drm_client_register(struct drm_client_dev *client)
{
        struct drm_device *dev = client->dev;
        int ret;

        mutex_lock(&dev->clientlist_mutex);
        list_add(&client->list, &dev->clientlist);

        if (client->funcs && client->funcs->hotplug) {
                /*
                 * Perform an initial hotplug event to pick up the
                 * display configuration for the client. This step
                 * has to be performed *after* registering the client
                 * in the list of clients, or a concurrent hotplug
                 * event might be lost; leaving the display off.
                 *
                 * Hold the clientlist_mutex as for a regular hotplug
                 * event.
                 */
                ret = client->funcs->hotplug(client);
                if (ret)
                        drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
        }
        mutex_unlock(&dev->clientlist_mutex);
}
EXPORT_SYMBOL(drm_client_register);

/**
 * drm_client_release - Release DRM client resources
 * @client: DRM client
 *
 * Releases resources by closing the &drm_file that was opened by drm_client_init().
 * It is called automatically if the &drm_client_funcs.unregister callback is _not_ set.
 *
 * This function should only be called from the unregister callback. An exception
 * is fbdev which cannot free the buffer if userspace has open file descriptors.
 *
 * Note:
 * Clients cannot initiate a release by themselves. This is done to keep the code simple.
 * The driver has to be unloaded before the client can be unloaded.
 */
void drm_client_release(struct drm_client_dev *client)
{
        struct drm_device *dev = client->dev;

        drm_dbg_kms(dev, "%s\n", client->name);

        drm_client_modeset_free(client);
        drm_client_close(client);
        drm_dev_put(dev);
}
EXPORT_SYMBOL(drm_client_release);

void drm_client_dev_unregister(struct drm_device *dev)
{
        struct drm_client_dev *client, *tmp;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        mutex_lock(&dev->clientlist_mutex);
        list_for_each_entry_safe(client, tmp, &dev->clientlist, list) {
                list_del(&client->list);
                if (client->funcs && client->funcs->unregister) {
                        client->funcs->unregister(client);
                } else {
                        drm_client_release(client);
                        kfree(client);
                }
        }
        mutex_unlock(&dev->clientlist_mutex);
}

/**
 * drm_client_dev_hotplug - Send hotplug event to clients
 * @dev: DRM device
 *
 * This function calls the &drm_client_funcs.hotplug callback on the attached clients.
 *
 * drm_kms_helper_hotplug_event() calls this function, so drivers that use it
 * don't need to call this function themselves.
 */
void drm_client_dev_hotplug(struct drm_device *dev)
{
        struct drm_client_dev *client;
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        if (!dev->mode_config.num_connector) {
                drm_dbg_kms(dev, "No connectors found, will not send hotplug events!\n");
                return;
        }

        mutex_lock(&dev->clientlist_mutex);
        list_for_each_entry(client, &dev->clientlist, list) {
                if (!client->funcs || !client->funcs->hotplug)
                        continue;

                if (client->hotplug_failed)
                        continue;

                ret = client->funcs->hotplug(client);
                drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret);
                if (ret)
                        client->hotplug_failed = true;
        }
        mutex_unlock(&dev->clientlist_mutex);
}
EXPORT_SYMBOL(drm_client_dev_hotplug);

void drm_client_dev_restore(struct drm_device *dev)
{
        struct drm_client_dev *client;
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        mutex_lock(&dev->clientlist_mutex);
        list_for_each_entry(client, &dev->clientlist, list) {
                if (!client->funcs || !client->funcs->restore)
                        continue;

                ret = client->funcs->restore(client);
                drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret);
                if (!ret) /* The first one to return zero gets the privilege to restore */
                        break;
        }
        mutex_unlock(&dev->clientlist_mutex);
}

static void drm_client_buffer_delete(struct drm_client_buffer *buffer)
{
        if (buffer->gem) {
                drm_gem_vunmap_unlocked(buffer->gem, &buffer->map);
                drm_gem_object_put(buffer->gem);
        }

        kfree(buffer);
}

static struct drm_client_buffer *
drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height,
                         u32 format, u32 *handle)
{
        const struct drm_format_info *info = drm_format_info(format);
        struct drm_mode_create_dumb dumb_args = { };
        struct drm_device *dev = client->dev;
        struct drm_client_buffer *buffer;
        struct drm_gem_object *obj;
        int ret;

        buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
        if (!buffer)
                return ERR_PTR(-ENOMEM);

        buffer->client = client;

        dumb_args.width = width;
        dumb_args.height = height;
        dumb_args.bpp = drm_format_info_bpp(info, 0);
        ret = drm_mode_create_dumb(dev, &dumb_args, client->file);
        if (ret)
                goto err_delete;

        obj = drm_gem_object_lookup(client->file, dumb_args.handle);
        if (!obj)  {
                ret = -ENOENT;
                goto err_delete;
        }

        buffer->pitch = dumb_args.pitch;
        buffer->gem = obj;
        *handle = dumb_args.handle;

        return buffer;

err_delete:
        drm_client_buffer_delete(buffer);

        return ERR_PTR(ret);
}

/**
 * drm_client_buffer_vmap - Map DRM client buffer into address space
 * @buffer: DRM client buffer
 * @map_copy: Returns the mapped memory's address
 *
 * This function maps a client buffer into kernel address space. If the
 * buffer is already mapped, it returns the existing mapping's address.
 *
 * Client buffer mappings are not ref'counted. Each call to
 * drm_client_buffer_vmap() should be followed by a call to
 * drm_client_buffer_vunmap(); or the client buffer should be mapped
 * throughout its lifetime.
 *
 * The returned address is a copy of the internal value. In contrast to
 * other vmap interfaces, you don't need it for the client's vunmap
 * function. So you can modify it at will during blit and draw operations.
 *
 * Returns:
 *        0 on success, or a negative errno code otherwise.
 */
int
drm_client_buffer_vmap(struct drm_client_buffer *buffer,
                       struct iosys_map *map_copy)
{
        struct iosys_map *map = &buffer->map;
        int ret;

        /*
         * FIXME: The dependency on GEM here isn't required, we could
         * convert the driver handle to a dma-buf instead and use the
         * backend-agnostic dma-buf vmap support instead. This would
         * require that the handle2fd prime ioctl is reworked to pull the
         * fd_install step out of the driver backend hooks, to make that
         * final step optional for internal users.
         */
        ret = drm_gem_vmap_unlocked(buffer->gem, map);
        if (ret)
                return ret;

        *map_copy = *map;

        return 0;
}
EXPORT_SYMBOL(drm_client_buffer_vmap);

/**
 * drm_client_buffer_vunmap - Unmap DRM client buffer
 * @buffer: DRM client buffer
 *
 * This function removes a client buffer's memory mapping. Calling this
 * function is only required by clients that manage their buffer mappings
 * by themselves.
 */
void drm_client_buffer_vunmap(struct drm_client_buffer *buffer)
{
        struct iosys_map *map = &buffer->map;

        drm_gem_vunmap_unlocked(buffer->gem, map);
}
EXPORT_SYMBOL(drm_client_buffer_vunmap);

static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer)
{
        int ret;

        if (!buffer->fb)
                return;

        ret = drm_mode_rmfb(buffer->client->dev, buffer->fb->base.id, buffer->client->file);
        if (ret)
                drm_err(buffer->client->dev,
                        "Error removing FB:%u (%d)\n", buffer->fb->base.id, ret);

        buffer->fb = NULL;
}

static int drm_client_buffer_addfb(struct drm_client_buffer *buffer,
                                   u32 width, u32 height, u32 format,
                                   u32 handle)
{
        struct drm_client_dev *client = buffer->client;
        struct drm_mode_fb_cmd2 fb_req = { };
        int ret;

        fb_req.width = width;
        fb_req.height = height;
        fb_req.pixel_format = format;
        fb_req.handles[0] = handle;
        fb_req.pitches[0] = buffer->pitch;

        ret = drm_mode_addfb2(client->dev, &fb_req, client->file);
        if (ret)
                return ret;

        buffer->fb = drm_framebuffer_lookup(client->dev, buffer->client->file, fb_req.fb_id);
        if (WARN_ON(!buffer->fb))
                return -ENOENT;

        /* drop the reference we picked up in framebuffer lookup */
        drm_framebuffer_put(buffer->fb);

        strscpy(buffer->fb->comm, client->name, TASK_COMM_LEN);

        return 0;
}

/**
 * drm_client_framebuffer_create - Create a client framebuffer
 * @client: DRM client
 * @width: Framebuffer width
 * @height: Framebuffer height
 * @format: Buffer format
 *
 * This function creates a &drm_client_buffer which consists of a
 * &drm_framebuffer backed by a dumb buffer.
 * Call drm_client_framebuffer_delete() to free the buffer.
 *
 * Returns:
 * Pointer to a client buffer or an error pointer on failure.
 */
struct drm_client_buffer *
drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format)
{
        struct drm_client_buffer *buffer;
        u32 handle;
        int ret;

        buffer = drm_client_buffer_create(client, width, height, format,
                                          &handle);
        if (IS_ERR(buffer))
                return buffer;

        ret = drm_client_buffer_addfb(buffer, width, height, format, handle);

        /*
         * The handle is only needed for creating the framebuffer, destroy it
         * again to solve a circular dependency should anybody export the GEM
         * object as DMA-buf. The framebuffer and our buffer structure are still
         * holding references to the GEM object to prevent its destruction.
         */
        drm_mode_destroy_dumb(client->dev, handle, client->file);

        if (ret) {
                drm_client_buffer_delete(buffer);
                return ERR_PTR(ret);
        }

        return buffer;
}
EXPORT_SYMBOL(drm_client_framebuffer_create);

/**
 * drm_client_framebuffer_delete - Delete a client framebuffer
 * @buffer: DRM client buffer (can be NULL)
 */
void drm_client_framebuffer_delete(struct drm_client_buffer *buffer)
{
        if (!buffer)
                return;

        drm_client_buffer_rmfb(buffer);
        drm_client_buffer_delete(buffer);
}
EXPORT_SYMBOL(drm_client_framebuffer_delete);

/**
 * drm_client_framebuffer_flush - Manually flush client framebuffer
 * @buffer: DRM client buffer (can be NULL)
 * @rect: Damage rectangle (if NULL flushes all)
 *
 * This calls &drm_framebuffer_funcs->dirty (if present) to flush buffer changes
 * for drivers that need it.
 *
 * Returns:
 * Zero on success or negative error code on failure.
 */
int drm_client_framebuffer_flush(struct drm_client_buffer *buffer, struct drm_rect *rect)
{
        if (!buffer || !buffer->fb || !buffer->fb->funcs->dirty)
                return 0;

        if (rect) {
                struct drm_clip_rect clip = {
                        .x1 = rect->x1,
                        .y1 = rect->y1,
                        .x2 = rect->x2,
                        .y2 = rect->y2,
                };

                return buffer->fb->funcs->dirty(buffer->fb, buffer->client->file,
                                                0, 0, &clip, 1);
        }

        return buffer->fb->funcs->dirty(buffer->fb, buffer->client->file,
                                        0, 0, NULL, 0);
}
EXPORT_SYMBOL(drm_client_framebuffer_flush);

#ifdef CONFIG_DEBUG_FS
static int drm_client_debugfs_internal_clients(struct seq_file *m, void *data)
{
        struct drm_debugfs_entry *entry = m->private;
        struct drm_device *dev = entry->dev;
        struct drm_printer p = drm_seq_file_printer(m);
        struct drm_client_dev *client;

        mutex_lock(&dev->clientlist_mutex);
        list_for_each_entry(client, &dev->clientlist, list)
                drm_printf(&p, "%s\n", client->name);
        mutex_unlock(&dev->clientlist_mutex);

        return 0;
}

static const struct drm_debugfs_info drm_client_debugfs_list[] = {
        { "internal_clients", drm_client_debugfs_internal_clients, 0 },
};

void drm_client_debugfs_init(struct drm_device *dev)
{
        drm_debugfs_add_files(dev, drm_client_debugfs_list,
                              ARRAY_SIZE(drm_client_debugfs_list));
}
#endif

























































































































































































































































































    5 






























































































































































































































































































































































































































































































































    5 



























































    6 




    5 







    5 
    5 


    5 


    5 



    5 
    5 


    5 
































































































































































































































































    5 

    5 


    5 








    5 









    6 
    6 













































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2006 - 2007 Ivo van Doorn
 * Copyright (C) 2007 Dmitry Torokhov
 * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/workqueue.h>
#include <linux/capability.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rfkill.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/fs.h>
#include <linux/slab.h>

#include "rfkill.h"

#define POLL_INTERVAL                (5 * HZ)

#define RFKILL_BLOCK_HW                BIT(0)
#define RFKILL_BLOCK_SW                BIT(1)
#define RFKILL_BLOCK_SW_PREV        BIT(2)
#define RFKILL_BLOCK_ANY        (RFKILL_BLOCK_HW |\
                                 RFKILL_BLOCK_SW |\
                                 RFKILL_BLOCK_SW_PREV)
#define RFKILL_BLOCK_SW_SETCALL        BIT(31)

struct rfkill {
        spinlock_t                lock;

        enum rfkill_type        type;

        unsigned long                state;
        unsigned long                hard_block_reasons;

        u32                        idx;

        bool                        registered;
        bool                        persistent;
        bool                        polling_paused;
        bool                        suspended;
        bool                        need_sync;

        const struct rfkill_ops        *ops;
        void                        *data;

#ifdef CONFIG_RFKILL_LEDS
        struct led_trigger        led_trigger;
        const char                *ledtrigname;
#endif

        struct device                dev;
        struct list_head        node;

        struct delayed_work        poll_work;
        struct work_struct        uevent_work;
        struct work_struct        sync_work;
        char                        name[];
};
#define to_rfkill(d)        container_of(d, struct rfkill, dev)

struct rfkill_int_event {
        struct list_head        list;
        struct rfkill_event_ext        ev;
};

struct rfkill_data {
        struct list_head        list;
        struct list_head        events;
        struct mutex                mtx;
        wait_queue_head_t        read_wait;
        bool                        input_handler;
        u8                        max_size;
};


MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
MODULE_DESCRIPTION("RF switch support");
MODULE_LICENSE("GPL");


/*
 * The locking here should be made much smarter, we currently have
 * a bit of a stupid situation because drivers might want to register
 * the rfkill struct under their own lock, and take this lock during
 * rfkill method calls -- which will cause an AB-BA deadlock situation.
 *
 * To fix that, we need to rework this code here to be mostly lock-free
 * and only use the mutex for list manipulations, not to protect the
 * various other global variables. Then we can avoid holding the mutex
 * around driver operations, and all is happy.
 */
static LIST_HEAD(rfkill_list);        /* list of registered rf switches */
static DEFINE_MUTEX(rfkill_global_mutex);
static LIST_HEAD(rfkill_fds);        /* list of open fds of /dev/rfkill */

static unsigned int rfkill_default_state = 1;
module_param_named(default_state, rfkill_default_state, uint, 0444);
MODULE_PARM_DESC(default_state,
                 "Default initial state for all radio types, 0 = radio off");

static struct {
        bool cur, sav;
} rfkill_global_states[NUM_RFKILL_TYPES];

static bool rfkill_epo_lock_active;


#ifdef CONFIG_RFKILL_LEDS
static void rfkill_led_trigger_event(struct rfkill *rfkill)
{
        struct led_trigger *trigger;

        if (!rfkill->registered)
                return;

        trigger = &rfkill->led_trigger;

        if (rfkill->state & RFKILL_BLOCK_ANY)
                led_trigger_event(trigger, LED_OFF);
        else
                led_trigger_event(trigger, LED_FULL);
}

static int rfkill_led_trigger_activate(struct led_classdev *led)
{
        struct rfkill *rfkill;

        rfkill = container_of(led->trigger, struct rfkill, led_trigger);

        rfkill_led_trigger_event(rfkill);

        return 0;
}

const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
{
        return rfkill->led_trigger.name;
}
EXPORT_SYMBOL(rfkill_get_led_trigger_name);

void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
{
        BUG_ON(!rfkill);

        rfkill->ledtrigname = name;
}
EXPORT_SYMBOL(rfkill_set_led_trigger_name);

static int rfkill_led_trigger_register(struct rfkill *rfkill)
{
        rfkill->led_trigger.name = rfkill->ledtrigname
                                        ? : dev_name(&rfkill->dev);
        rfkill->led_trigger.activate = rfkill_led_trigger_activate;
        return led_trigger_register(&rfkill->led_trigger);
}

static void rfkill_led_trigger_unregister(struct rfkill *rfkill)
{
        led_trigger_unregister(&rfkill->led_trigger);
}

static struct led_trigger rfkill_any_led_trigger;
static struct led_trigger rfkill_none_led_trigger;
static struct work_struct rfkill_global_led_trigger_work;

static void rfkill_global_led_trigger_worker(struct work_struct *work)
{
        enum led_brightness brightness = LED_OFF;
        struct rfkill *rfkill;

        mutex_lock(&rfkill_global_mutex);
        list_for_each_entry(rfkill, &rfkill_list, node) {
                if (!(rfkill->state & RFKILL_BLOCK_ANY)) {
                        brightness = LED_FULL;
                        break;
                }
        }
        mutex_unlock(&rfkill_global_mutex);

        led_trigger_event(&rfkill_any_led_trigger, brightness);
        led_trigger_event(&rfkill_none_led_trigger,
                          brightness == LED_OFF ? LED_FULL : LED_OFF);
}

static void rfkill_global_led_trigger_event(void)
{
        schedule_work(&rfkill_global_led_trigger_work);
}

static int rfkill_global_led_trigger_register(void)
{
        int ret;

        INIT_WORK(&rfkill_global_led_trigger_work,
                        rfkill_global_led_trigger_worker);

        rfkill_any_led_trigger.name = "rfkill-any";
        ret = led_trigger_register(&rfkill_any_led_trigger);
        if (ret)
                return ret;

        rfkill_none_led_trigger.name = "rfkill-none";
        ret = led_trigger_register(&rfkill_none_led_trigger);
        if (ret)
                led_trigger_unregister(&rfkill_any_led_trigger);
        else
                /* Delay activation until all global triggers are registered */
                rfkill_global_led_trigger_event();

        return ret;
}

static void rfkill_global_led_trigger_unregister(void)
{
        led_trigger_unregister(&rfkill_none_led_trigger);
        led_trigger_unregister(&rfkill_any_led_trigger);
        cancel_work_sync(&rfkill_global_led_trigger_work);
}
#else
static void rfkill_led_trigger_event(struct rfkill *rfkill)
{
}

static inline int rfkill_led_trigger_register(struct rfkill *rfkill)
{
        return 0;
}

static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill)
{
}

static void rfkill_global_led_trigger_event(void)
{
}

static int rfkill_global_led_trigger_register(void)
{
        return 0;
}

static void rfkill_global_led_trigger_unregister(void)
{
}
#endif /* CONFIG_RFKILL_LEDS */

static void rfkill_fill_event(struct rfkill_event_ext *ev,
                              struct rfkill *rfkill,
                              enum rfkill_operation op)
{
        unsigned long flags;

        ev->idx = rfkill->idx;
        ev->type = rfkill->type;
        ev->op = op;

        spin_lock_irqsave(&rfkill->lock, flags);
        ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW);
        ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW |
                                        RFKILL_BLOCK_SW_PREV));
        ev->hard_block_reasons = rfkill->hard_block_reasons;
        spin_unlock_irqrestore(&rfkill->lock, flags);
}

static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op)
{
        struct rfkill_data *data;
        struct rfkill_int_event *ev;

        list_for_each_entry(data, &rfkill_fds, list) {
                ev = kzalloc(sizeof(*ev), GFP_KERNEL);
                if (!ev)
                        continue;
                rfkill_fill_event(&ev->ev, rfkill, op);
                mutex_lock(&data->mtx);
                list_add_tail(&ev->list, &data->events);
                mutex_unlock(&data->mtx);
                wake_up_interruptible(&data->read_wait);
        }
}

static void rfkill_event(struct rfkill *rfkill)
{
        if (!rfkill->registered)
                return;

        kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);

        /* also send event to /dev/rfkill */
        rfkill_send_events(rfkill, RFKILL_OP_CHANGE);
}

/**
 * rfkill_set_block - wrapper for set_block method
 *
 * @rfkill: the rfkill struct to use
 * @blocked: the new software state
 *
 * Calls the set_block method (when applicable) and handles notifications
 * etc. as well.
 */
static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
{
        unsigned long flags;
        bool prev, curr;
        int err;

        if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP))
                return;

        /*
         * Some platforms (...!) generate input events which affect the
         * _hard_ kill state -- whenever something tries to change the
         * current software state query the hardware state too.
         */
        if (rfkill->ops->query)
                rfkill->ops->query(rfkill, rfkill->data);

        spin_lock_irqsave(&rfkill->lock, flags);
        prev = rfkill->state & RFKILL_BLOCK_SW;

        if (prev)
                rfkill->state |= RFKILL_BLOCK_SW_PREV;
        else
                rfkill->state &= ~RFKILL_BLOCK_SW_PREV;

        if (blocked)
                rfkill->state |= RFKILL_BLOCK_SW;
        else
                rfkill->state &= ~RFKILL_BLOCK_SW;

        rfkill->state |= RFKILL_BLOCK_SW_SETCALL;
        spin_unlock_irqrestore(&rfkill->lock, flags);

        err = rfkill->ops->set_block(rfkill->data, blocked);

        spin_lock_irqsave(&rfkill->lock, flags);
        if (err) {
                /*
                 * Failed -- reset status to _PREV, which may be different
                 * from what we have set _PREV to earlier in this function
                 * if rfkill_set_sw_state was invoked.
                 */
                if (rfkill->state & RFKILL_BLOCK_SW_PREV)
                        rfkill->state |= RFKILL_BLOCK_SW;
                else
                        rfkill->state &= ~RFKILL_BLOCK_SW;
        }
        rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL;
        rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
        curr = rfkill->state & RFKILL_BLOCK_SW;
        spin_unlock_irqrestore(&rfkill->lock, flags);

        rfkill_led_trigger_event(rfkill);
        rfkill_global_led_trigger_event();

        if (prev != curr)
                rfkill_event(rfkill);
}

static void rfkill_sync(struct rfkill *rfkill)
{
        lockdep_assert_held(&rfkill_global_mutex);

        if (!rfkill->need_sync)
                return;

        rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur);
        rfkill->need_sync = false;
}

static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
{
        int i;

        if (type != RFKILL_TYPE_ALL) {
                rfkill_global_states[type].cur = blocked;
                return;
        }

        for (i = 0; i < NUM_RFKILL_TYPES; i++)
                rfkill_global_states[i].cur = blocked;
}

#ifdef CONFIG_RFKILL_INPUT
static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);

/**
 * __rfkill_switch_all - Toggle state of all switches of given type
 * @type: type of interfaces to be affected
 * @blocked: the new state
 *
 * This function sets the state of all switches of given type,
 * unless a specific switch is suspended.
 *
 * Caller must have acquired rfkill_global_mutex.
 */
static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
{
        struct rfkill *rfkill;

        rfkill_update_global_state(type, blocked);
        list_for_each_entry(rfkill, &rfkill_list, node) {
                if (rfkill->type != type && type != RFKILL_TYPE_ALL)
                        continue;

                rfkill_set_block(rfkill, blocked);
        }
}

/**
 * rfkill_switch_all - Toggle state of all switches of given type
 * @type: type of interfaces to be affected
 * @blocked: the new state
 *
 * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
 * Please refer to __rfkill_switch_all() for details.
 *
 * Does nothing if the EPO lock is active.
 */
void rfkill_switch_all(enum rfkill_type type, bool blocked)
{
        if (atomic_read(&rfkill_input_disabled))
                return;

        mutex_lock(&rfkill_global_mutex);

        if (!rfkill_epo_lock_active)
                __rfkill_switch_all(type, blocked);

        mutex_unlock(&rfkill_global_mutex);
}

/**
 * rfkill_epo - emergency power off all transmitters
 *
 * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
 * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex.
 *
 * The global state before the EPO is saved and can be restored later
 * using rfkill_restore_states().
 */
void rfkill_epo(void)
{
        struct rfkill *rfkill;
        int i;

        if (atomic_read(&rfkill_input_disabled))
                return;

        mutex_lock(&rfkill_global_mutex);

        rfkill_epo_lock_active = true;
        list_for_each_entry(rfkill, &rfkill_list, node)
                rfkill_set_block(rfkill, true);

        for (i = 0; i < NUM_RFKILL_TYPES; i++) {
                rfkill_global_states[i].sav = rfkill_global_states[i].cur;
                rfkill_global_states[i].cur = true;
        }

        mutex_unlock(&rfkill_global_mutex);
}

/**
 * rfkill_restore_states - restore global states
 *
 * Restore (and sync switches to) the global state from the
 * states in rfkill_default_states.  This can undo the effects of
 * a call to rfkill_epo().
 */
void rfkill_restore_states(void)
{
        int i;

        if (atomic_read(&rfkill_input_disabled))
                return;

        mutex_lock(&rfkill_global_mutex);

        rfkill_epo_lock_active = false;
        for (i = 0; i < NUM_RFKILL_TYPES; i++)
                __rfkill_switch_all(i, rfkill_global_states[i].sav);
        mutex_unlock(&rfkill_global_mutex);
}

/**
 * rfkill_remove_epo_lock - unlock state changes
 *
 * Used by rfkill-input manually unlock state changes, when
 * the EPO switch is deactivated.
 */
void rfkill_remove_epo_lock(void)
{
        if (atomic_read(&rfkill_input_disabled))
                return;

        mutex_lock(&rfkill_global_mutex);
        rfkill_epo_lock_active = false;
        mutex_unlock(&rfkill_global_mutex);
}

/**
 * rfkill_is_epo_lock_active - returns true EPO is active
 *
 * Returns 0 (false) if there is NOT an active EPO condition,
 * and 1 (true) if there is an active EPO condition, which
 * locks all radios in one of the BLOCKED states.
 *
 * Can be called in atomic context.
 */
bool rfkill_is_epo_lock_active(void)
{
        return rfkill_epo_lock_active;
}

/**
 * rfkill_get_global_sw_state - returns global state for a type
 * @type: the type to get the global state of
 *
 * Returns the current global state for a given wireless
 * device type.
 */
bool rfkill_get_global_sw_state(const enum rfkill_type type)
{
        return rfkill_global_states[type].cur;
}
#endif

bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
                                bool blocked, unsigned long reason)
{
        unsigned long flags;
        bool ret, prev;

        BUG_ON(!rfkill);

        if (WARN(reason &
            ~(RFKILL_HARD_BLOCK_SIGNAL | RFKILL_HARD_BLOCK_NOT_OWNER),
            "hw_state reason not supported: 0x%lx", reason))
                return blocked;

        spin_lock_irqsave(&rfkill->lock, flags);
        prev = !!(rfkill->hard_block_reasons & reason);
        if (blocked) {
                rfkill->state |= RFKILL_BLOCK_HW;
                rfkill->hard_block_reasons |= reason;
        } else {
                rfkill->hard_block_reasons &= ~reason;
                if (!rfkill->hard_block_reasons)
                        rfkill->state &= ~RFKILL_BLOCK_HW;
        }
        ret = !!(rfkill->state & RFKILL_BLOCK_ANY);
        spin_unlock_irqrestore(&rfkill->lock, flags);

        rfkill_led_trigger_event(rfkill);
        rfkill_global_led_trigger_event();

        if (rfkill->registered && prev != blocked)
                schedule_work(&rfkill->uevent_work);

        return ret;
}
EXPORT_SYMBOL(rfkill_set_hw_state_reason);

static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
{
        u32 bit = RFKILL_BLOCK_SW;

        /* if in a ops->set_block right now, use other bit */
        if (rfkill->state & RFKILL_BLOCK_SW_SETCALL)
                bit = RFKILL_BLOCK_SW_PREV;

        if (blocked)
                rfkill->state |= bit;
        else
                rfkill->state &= ~bit;
}

bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
{
        unsigned long flags;
        bool prev, hwblock;

        BUG_ON(!rfkill);

        spin_lock_irqsave(&rfkill->lock, flags);
        prev = !!(rfkill->state & RFKILL_BLOCK_SW);
        __rfkill_set_sw_state(rfkill, blocked);
        hwblock = !!(rfkill->state & RFKILL_BLOCK_HW);
        blocked = blocked || hwblock;
        spin_unlock_irqrestore(&rfkill->lock, flags);

        if (!rfkill->registered)
                return blocked;

        if (prev != blocked && !hwblock)
                schedule_work(&rfkill->uevent_work);

        rfkill_led_trigger_event(rfkill);
        rfkill_global_led_trigger_event();

        return blocked;
}
EXPORT_SYMBOL(rfkill_set_sw_state);

void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked)
{
        unsigned long flags;

        BUG_ON(!rfkill);
        BUG_ON(rfkill->registered);

        spin_lock_irqsave(&rfkill->lock, flags);
        __rfkill_set_sw_state(rfkill, blocked);
        rfkill->persistent = true;
        spin_unlock_irqrestore(&rfkill->lock, flags);
}
EXPORT_SYMBOL(rfkill_init_sw_state);

void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
{
        unsigned long flags;
        bool swprev, hwprev;

        BUG_ON(!rfkill);

        spin_lock_irqsave(&rfkill->lock, flags);

        /*
         * No need to care about prev/setblock ... this is for uevent only
         * and that will get triggered by rfkill_set_block anyway.
         */
        swprev = !!(rfkill->state & RFKILL_BLOCK_SW);
        hwprev = !!(rfkill->state & RFKILL_BLOCK_HW);
        __rfkill_set_sw_state(rfkill, sw);
        if (hw)
                rfkill->state |= RFKILL_BLOCK_HW;
        else
                rfkill->state &= ~RFKILL_BLOCK_HW;

        spin_unlock_irqrestore(&rfkill->lock, flags);

        if (!rfkill->registered) {
                rfkill->persistent = true;
        } else {
                if (swprev != sw || hwprev != hw)
                        schedule_work(&rfkill->uevent_work);

                rfkill_led_trigger_event(rfkill);
                rfkill_global_led_trigger_event();
        }
}
EXPORT_SYMBOL(rfkill_set_states);

static const char * const rfkill_types[] = {
        NULL, /* RFKILL_TYPE_ALL */
        "wlan",
        "bluetooth",
        "ultrawideband",
        "wimax",
        "wwan",
        "gps",
        "fm",
        "nfc",
};

enum rfkill_type rfkill_find_type(const char *name)
{
        int i;

        BUILD_BUG_ON(ARRAY_SIZE(rfkill_types) != NUM_RFKILL_TYPES);

        if (!name)
                return RFKILL_TYPE_ALL;

        for (i = 1; i < NUM_RFKILL_TYPES; i++)
                if (!strcmp(name, rfkill_types[i]))
                        return i;
        return RFKILL_TYPE_ALL;
}
EXPORT_SYMBOL(rfkill_find_type);

static ssize_t name_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "%s\n", rfkill->name);
}
static DEVICE_ATTR_RO(name);

static ssize_t type_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "%s\n", rfkill_types[rfkill->type]);
}
static DEVICE_ATTR_RO(type);

static ssize_t index_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "%d\n", rfkill->idx);
}
static DEVICE_ATTR_RO(index);

static ssize_t persistent_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "%d\n", rfkill->persistent);
}
static DEVICE_ATTR_RO(persistent);

static ssize_t hard_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0);
}
static DEVICE_ATTR_RO(hard);

static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        mutex_lock(&rfkill_global_mutex);
        rfkill_sync(rfkill);
        mutex_unlock(&rfkill_global_mutex);

        return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0);
}

static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count)
{
        struct rfkill *rfkill = to_rfkill(dev);
        unsigned long state;
        int err;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        err = kstrtoul(buf, 0, &state);
        if (err)
                return err;

        if (state > 1 )
                return -EINVAL;

        mutex_lock(&rfkill_global_mutex);
        rfkill_sync(rfkill);
        rfkill_set_block(rfkill, state);
        mutex_unlock(&rfkill_global_mutex);

        return count;
}
static DEVICE_ATTR_RW(soft);

static ssize_t hard_block_reasons_show(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        return sysfs_emit(buf, "0x%lx\n", rfkill->hard_block_reasons);
}
static DEVICE_ATTR_RO(hard_block_reasons);

static u8 user_state_from_blocked(unsigned long state)
{
        if (state & RFKILL_BLOCK_HW)
                return RFKILL_USER_STATE_HARD_BLOCKED;
        if (state & RFKILL_BLOCK_SW)
                return RFKILL_USER_STATE_SOFT_BLOCKED;

        return RFKILL_USER_STATE_UNBLOCKED;
}

static ssize_t state_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct rfkill *rfkill = to_rfkill(dev);

        mutex_lock(&rfkill_global_mutex);
        rfkill_sync(rfkill);
        mutex_unlock(&rfkill_global_mutex);

        return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state));
}

static ssize_t state_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count)
{
        struct rfkill *rfkill = to_rfkill(dev);
        unsigned long state;
        int err;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        err = kstrtoul(buf, 0, &state);
        if (err)
                return err;

        if (state != RFKILL_USER_STATE_SOFT_BLOCKED &&
            state != RFKILL_USER_STATE_UNBLOCKED)
                return -EINVAL;

        mutex_lock(&rfkill_global_mutex);
        rfkill_sync(rfkill);
        rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED);
        mutex_unlock(&rfkill_global_mutex);

        return count;
}
static DEVICE_ATTR_RW(state);

static struct attribute *rfkill_dev_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_type.attr,
        &dev_attr_index.attr,
        &dev_attr_persistent.attr,
        &dev_attr_state.attr,
        &dev_attr_soft.attr,
        &dev_attr_hard.attr,
        &dev_attr_hard_block_reasons.attr,
        NULL,
};
ATTRIBUTE_GROUPS(rfkill_dev);

static void rfkill_release(struct device *dev)
{
        struct rfkill *rfkill = to_rfkill(dev);

        kfree(rfkill);
}

static int rfkill_dev_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        struct rfkill *rfkill = to_rfkill(dev);
        unsigned long flags;
        unsigned long reasons;
        u32 state;
        int error;

        error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name);
        if (error)
                return error;
        error = add_uevent_var(env, "RFKILL_TYPE=%s",
                               rfkill_types[rfkill->type]);
        if (error)
                return error;
        spin_lock_irqsave(&rfkill->lock, flags);
        state = rfkill->state;
        reasons = rfkill->hard_block_reasons;
        spin_unlock_irqrestore(&rfkill->lock, flags);
        error = add_uevent_var(env, "RFKILL_STATE=%d",
                               user_state_from_blocked(state));
        if (error)
                return error;
        return add_uevent_var(env, "RFKILL_HW_BLOCK_REASON=0x%lx", reasons);
}

void rfkill_pause_polling(struct rfkill *rfkill)
{
        BUG_ON(!rfkill);

        if (!rfkill->ops->poll)
                return;

        rfkill->polling_paused = true;
        cancel_delayed_work_sync(&rfkill->poll_work);
}
EXPORT_SYMBOL(rfkill_pause_polling);

void rfkill_resume_polling(struct rfkill *rfkill)
{
        BUG_ON(!rfkill);

        if (!rfkill->ops->poll)
                return;

        rfkill->polling_paused = false;

        if (rfkill->suspended)
                return;

        queue_delayed_work(system_power_efficient_wq,
                           &rfkill->poll_work, 0);
}
EXPORT_SYMBOL(rfkill_resume_polling);

#ifdef CONFIG_PM_SLEEP
static int rfkill_suspend(struct device *dev)
{
        struct rfkill *rfkill = to_rfkill(dev);

        rfkill->suspended = true;
        cancel_delayed_work_sync(&rfkill->poll_work);

        return 0;
}

static int rfkill_resume(struct device *dev)
{
        struct rfkill *rfkill = to_rfkill(dev);
        bool cur;

        rfkill->suspended = false;

        if (!rfkill->registered)
                return 0;

        if (!rfkill->persistent) {
                cur = !!(rfkill->state & RFKILL_BLOCK_SW);
                rfkill_set_block(rfkill, cur);
        }

        if (rfkill->ops->poll && !rfkill->polling_paused)
                queue_delayed_work(system_power_efficient_wq,
                                   &rfkill->poll_work, 0);

        return 0;
}

static SIMPLE_DEV_PM_OPS(rfkill_pm_ops, rfkill_suspend, rfkill_resume);
#define RFKILL_PM_OPS (&rfkill_pm_ops)
#else
#define RFKILL_PM_OPS NULL
#endif

static struct class rfkill_class = {
        .name                = "rfkill",
        .dev_release        = rfkill_release,
        .dev_groups        = rfkill_dev_groups,
        .dev_uevent        = rfkill_dev_uevent,
        .pm                = RFKILL_PM_OPS,
};

bool rfkill_blocked(struct rfkill *rfkill)
{
        unsigned long flags;
        u32 state;

        spin_lock_irqsave(&rfkill->lock, flags);
        state = rfkill->state;
        spin_unlock_irqrestore(&rfkill->lock, flags);

        return !!(state & RFKILL_BLOCK_ANY);
}
EXPORT_SYMBOL(rfkill_blocked);

bool rfkill_soft_blocked(struct rfkill *rfkill)
{
        unsigned long flags;
        u32 state;

        spin_lock_irqsave(&rfkill->lock, flags);
        state = rfkill->state;
        spin_unlock_irqrestore(&rfkill->lock, flags);

        return !!(state & RFKILL_BLOCK_SW);
}
EXPORT_SYMBOL(rfkill_soft_blocked);

struct rfkill * __must_check rfkill_alloc(const char *name,
                                          struct device *parent,
                                          const enum rfkill_type type,
                                          const struct rfkill_ops *ops,
                                          void *ops_data)
{
        struct rfkill *rfkill;
        struct device *dev;

        if (WARN_ON(!ops))
                return NULL;

        if (WARN_ON(!ops->set_block))
                return NULL;

        if (WARN_ON(!name))
                return NULL;

        if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES))
                return NULL;

        rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL);
        if (!rfkill)
                return NULL;

        spin_lock_init(&rfkill->lock);
        INIT_LIST_HEAD(&rfkill->node);
        rfkill->type = type;
        strcpy(rfkill->name, name);
        rfkill->ops = ops;
        rfkill->data = ops_data;

        dev = &rfkill->dev;
        dev->class = &rfkill_class;
        dev->parent = parent;
        device_initialize(dev);

        return rfkill;
}
EXPORT_SYMBOL(rfkill_alloc);

static void rfkill_poll(struct work_struct *work)
{
        struct rfkill *rfkill;

        rfkill = container_of(work, struct rfkill, poll_work.work);

        /*
         * Poll hardware state -- driver will use one of the
         * rfkill_set{,_hw,_sw}_state functions and use its
         * return value to update the current status.
         */
        rfkill->ops->poll(rfkill, rfkill->data);

        queue_delayed_work(system_power_efficient_wq,
                &rfkill->poll_work,
                round_jiffies_relative(POLL_INTERVAL));
}

static void rfkill_uevent_work(struct work_struct *work)
{
        struct rfkill *rfkill;

        rfkill = container_of(work, struct rfkill, uevent_work);

        mutex_lock(&rfkill_global_mutex);
        rfkill_event(rfkill);
        mutex_unlock(&rfkill_global_mutex);
}

static void rfkill_sync_work(struct work_struct *work)
{
        struct rfkill *rfkill = container_of(work, struct rfkill, sync_work);

        mutex_lock(&rfkill_global_mutex);
        rfkill_sync(rfkill);
        mutex_unlock(&rfkill_global_mutex);
}

int __must_check rfkill_register(struct rfkill *rfkill)
{
        static unsigned long rfkill_no;
        struct device *dev;
        int error;

        if (!rfkill)
                return -EINVAL;

        dev = &rfkill->dev;

        mutex_lock(&rfkill_global_mutex);

        if (rfkill->registered) {
                error = -EALREADY;
                goto unlock;
        }

        rfkill->idx = rfkill_no;
        dev_set_name(dev, "rfkill%lu", rfkill_no);
        rfkill_no++;

        list_add_tail(&rfkill->node, &rfkill_list);

        error = device_add(dev);
        if (error)
                goto remove;

        error = rfkill_led_trigger_register(rfkill);
        if (error)
                goto devdel;

        rfkill->registered = true;

        INIT_DELAYED_WORK(&rfkill->poll_work, rfkill_poll);
        INIT_WORK(&rfkill->uevent_work, rfkill_uevent_work);
        INIT_WORK(&rfkill->sync_work, rfkill_sync_work);

        if (rfkill->ops->poll)
                queue_delayed_work(system_power_efficient_wq,
                        &rfkill->poll_work,
                        round_jiffies_relative(POLL_INTERVAL));

        if (!rfkill->persistent || rfkill_epo_lock_active) {
                rfkill->need_sync = true;
                schedule_work(&rfkill->sync_work);
        } else {
#ifdef CONFIG_RFKILL_INPUT
                bool soft_blocked = !!(rfkill->state & RFKILL_BLOCK_SW);

                if (!atomic_read(&rfkill_input_disabled))
                        __rfkill_switch_all(rfkill->type, soft_blocked);
#endif
        }

        rfkill_global_led_trigger_event();
        rfkill_send_events(rfkill, RFKILL_OP_ADD);

        mutex_unlock(&rfkill_global_mutex);
        return 0;

 devdel:
        device_del(&rfkill->dev);
 remove:
        list_del_init(&rfkill->node);
 unlock:
        mutex_unlock(&rfkill_global_mutex);
        return error;
}
EXPORT_SYMBOL(rfkill_register);

void rfkill_unregister(struct rfkill *rfkill)
{
        BUG_ON(!rfkill);

        if (rfkill->ops->poll)
                cancel_delayed_work_sync(&rfkill->poll_work);

        cancel_work_sync(&rfkill->uevent_work);
        cancel_work_sync(&rfkill->sync_work);

        rfkill->registered = false;

        device_del(&rfkill->dev);

        mutex_lock(&rfkill_global_mutex);
        rfkill_send_events(rfkill, RFKILL_OP_DEL);
        list_del_init(&rfkill->node);
        rfkill_global_led_trigger_event();
        mutex_unlock(&rfkill_global_mutex);

        rfkill_led_trigger_unregister(rfkill);
}
EXPORT_SYMBOL(rfkill_unregister);

void rfkill_destroy(struct rfkill *rfkill)
{
        if (rfkill)
                put_device(&rfkill->dev);
}
EXPORT_SYMBOL(rfkill_destroy);

static int rfkill_fop_open(struct inode *inode, struct file *file)
{
        struct rfkill_data *data;
        struct rfkill *rfkill;
        struct rfkill_int_event *ev, *tmp;

        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        data->max_size = RFKILL_EVENT_SIZE_V1;

        INIT_LIST_HEAD(&data->events);
        mutex_init(&data->mtx);
        init_waitqueue_head(&data->read_wait);

        mutex_lock(&rfkill_global_mutex);
        /*
         * start getting events from elsewhere but hold mtx to get
         * startup events added first
         */

        list_for_each_entry(rfkill, &rfkill_list, node) {
                ev = kzalloc(sizeof(*ev), GFP_KERNEL);
                if (!ev)
                        goto free;
                rfkill_sync(rfkill);
                rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
                mutex_lock(&data->mtx);
                list_add_tail(&ev->list, &data->events);
                mutex_unlock(&data->mtx);
        }
        list_add(&data->list, &rfkill_fds);
        mutex_unlock(&rfkill_global_mutex);

        file->private_data = data;

        return stream_open(inode, file);

 free:
        mutex_unlock(&rfkill_global_mutex);
        mutex_destroy(&data->mtx);
        list_for_each_entry_safe(ev, tmp, &data->events, list)
                kfree(ev);
        kfree(data);
        return -ENOMEM;
}

static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
{
        struct rfkill_data *data = file->private_data;
        __poll_t res = EPOLLOUT | EPOLLWRNORM;

        poll_wait(file, &data->read_wait, wait);

        mutex_lock(&data->mtx);
        if (!list_empty(&data->events))
                res = EPOLLIN | EPOLLRDNORM;
        mutex_unlock(&data->mtx);

        return res;
}

static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
                               size_t count, loff_t *pos)
{
        struct rfkill_data *data = file->private_data;
        struct rfkill_int_event *ev;
        unsigned long sz;
        int ret;

        mutex_lock(&data->mtx);

        while (list_empty(&data->events)) {
                if (file->f_flags & O_NONBLOCK) {
                        ret = -EAGAIN;
                        goto out;
                }
                mutex_unlock(&data->mtx);
                /* since we re-check and it just compares pointers,
                 * using !list_empty() without locking isn't a problem
                 */
                ret = wait_event_interruptible(data->read_wait,
                                               !list_empty(&data->events));
                mutex_lock(&data->mtx);

                if (ret)
                        goto out;
        }

        ev = list_first_entry(&data->events, struct rfkill_int_event,
                                list);

        sz = min_t(unsigned long, sizeof(ev->ev), count);
        sz = min_t(unsigned long, sz, data->max_size);
        ret = sz;
        if (copy_to_user(buf, &ev->ev, sz))
                ret = -EFAULT;

        list_del(&ev->list);
        kfree(ev);
 out:
        mutex_unlock(&data->mtx);
        return ret;
}

static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *pos)
{
        struct rfkill_data *data = file->private_data;
        struct rfkill *rfkill;
        struct rfkill_event_ext ev;
        int ret;

        /* we don't need the 'hard' variable but accept it */
        if (count < RFKILL_EVENT_SIZE_V1 - 1)
                return -EINVAL;

        /*
         * Copy as much data as we can accept into our 'ev' buffer,
         * but tell userspace how much we've copied so it can determine
         * our API version even in a write() call, if it cares.
         */
        count = min(count, sizeof(ev));
        count = min_t(size_t, count, data->max_size);
        if (copy_from_user(&ev, buf, count))
                return -EFAULT;

        if (ev.type >= NUM_RFKILL_TYPES)
                return -EINVAL;

        mutex_lock(&rfkill_global_mutex);

        switch (ev.op) {
        case RFKILL_OP_CHANGE_ALL:
                rfkill_update_global_state(ev.type, ev.soft);
                list_for_each_entry(rfkill, &rfkill_list, node)
                        if (rfkill->type == ev.type ||
                            ev.type == RFKILL_TYPE_ALL)
                                rfkill_set_block(rfkill, ev.soft);
                ret = 0;
                break;
        case RFKILL_OP_CHANGE:
                list_for_each_entry(rfkill, &rfkill_list, node)
                        if (rfkill->idx == ev.idx &&
                            (rfkill->type == ev.type ||
                             ev.type == RFKILL_TYPE_ALL))
                                rfkill_set_block(rfkill, ev.soft);
                ret = 0;
                break;
        default:
                ret = -EINVAL;
                break;
        }

        mutex_unlock(&rfkill_global_mutex);

        return ret ?: count;
}

static int rfkill_fop_release(struct inode *inode, struct file *file)
{
        struct rfkill_data *data = file->private_data;
        struct rfkill_int_event *ev, *tmp;

        mutex_lock(&rfkill_global_mutex);
        list_del(&data->list);
        mutex_unlock(&rfkill_global_mutex);

        mutex_destroy(&data->mtx);
        list_for_each_entry_safe(ev, tmp, &data->events, list)
                kfree(ev);

#ifdef CONFIG_RFKILL_INPUT
        if (data->input_handler)
                if (atomic_dec_return(&rfkill_input_disabled) == 0)
                        printk(KERN_DEBUG "rfkill: input handler enabled\n");
#endif

        kfree(data);

        return 0;
}

static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
                             unsigned long arg)
{
        struct rfkill_data *data = file->private_data;
        int ret = -ENOTTY;
        u32 size;

        if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
                return -ENOTTY;

        mutex_lock(&data->mtx);
        switch (_IOC_NR(cmd)) {
#ifdef CONFIG_RFKILL_INPUT
        case RFKILL_IOC_NOINPUT:
                if (!data->input_handler) {
                        if (atomic_inc_return(&rfkill_input_disabled) == 1)
                                printk(KERN_DEBUG "rfkill: input handler disabled\n");
                        data->input_handler = true;
                }
                ret = 0;
                break;
#endif
        case RFKILL_IOC_MAX_SIZE:
                if (get_user(size, (__u32 __user *)arg)) {
                        ret = -EFAULT;
                        break;
                }
                if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) {
                        ret = -EINVAL;
                        break;
                }
                data->max_size = size;
                ret = 0;
                break;
        default:
                break;
        }
        mutex_unlock(&data->mtx);

        return ret;
}

static const struct file_operations rfkill_fops = {
        .owner                = THIS_MODULE,
        .open                = rfkill_fop_open,
        .read                = rfkill_fop_read,
        .write                = rfkill_fop_write,
        .poll                = rfkill_fop_poll,
        .release        = rfkill_fop_release,
        .unlocked_ioctl        = rfkill_fop_ioctl,
        .compat_ioctl        = compat_ptr_ioctl,
        .llseek                = no_llseek,
};

#define RFKILL_NAME "rfkill"

static struct miscdevice rfkill_miscdev = {
        .fops        = &rfkill_fops,
        .name        = RFKILL_NAME,
        .minor        = RFKILL_MINOR,
};

static int __init rfkill_init(void)
{
        int error;

        rfkill_update_global_state(RFKILL_TYPE_ALL, !rfkill_default_state);

        error = class_register(&rfkill_class);
        if (error)
                goto error_class;

        error = misc_register(&rfkill_miscdev);
        if (error)
                goto error_misc;

        error = rfkill_global_led_trigger_register();
        if (error)
                goto error_led_trigger;

#ifdef CONFIG_RFKILL_INPUT
        error = rfkill_handler_init();
        if (error)
                goto error_input;
#endif

        return 0;

#ifdef CONFIG_RFKILL_INPUT
error_input:
        rfkill_global_led_trigger_unregister();
#endif
error_led_trigger:
        misc_deregister(&rfkill_miscdev);
error_misc:
        class_unregister(&rfkill_class);
error_class:
        return error;
}
subsys_initcall(rfkill_init);

static void __exit rfkill_exit(void)
{
#ifdef CONFIG_RFKILL_INPUT
        rfkill_handler_exit();
#endif
        rfkill_global_led_trigger_unregister();
        misc_deregister(&rfkill_miscdev);
        class_unregister(&rfkill_class);
}
module_exit(rfkill_exit);

MODULE_ALIAS_MISCDEV(RFKILL_MINOR);
MODULE_ALIAS("devname:" RFKILL_NAME);










































































































































































































































































  242 








































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM sock

#if !defined(_TRACE_SOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SOCK_H

#include <net/sock.h>
#include <net/ipv6.h>
#include <linux/tracepoint.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>

#define family_names                        \
                EM(AF_INET)                                \
                EMe(AF_INET6)

/* The protocol traced by inet_sock_set_state */
#define inet_protocol_names                \
                EM(IPPROTO_TCP)                        \
                EM(IPPROTO_DCCP)                \
                EM(IPPROTO_SCTP)                \
                EMe(IPPROTO_MPTCP)

#define tcp_state_names                        \
                EM(TCP_ESTABLISHED)                \
                EM(TCP_SYN_SENT)                \
                EM(TCP_SYN_RECV)                \
                EM(TCP_FIN_WAIT1)                \
                EM(TCP_FIN_WAIT2)                \
                EM(TCP_TIME_WAIT)                \
                EM(TCP_CLOSE)                        \
                EM(TCP_CLOSE_WAIT)                \
                EM(TCP_LAST_ACK)                \
                EM(TCP_LISTEN)                        \
                EM(TCP_CLOSING)                        \
                EMe(TCP_NEW_SYN_RECV)

#define skmem_kind_names                        \
                EM(SK_MEM_SEND)                        \
                EMe(SK_MEM_RECV)

/* enums need to be exported to user space */
#undef EM
#undef EMe
#define EM(a)       TRACE_DEFINE_ENUM(a);
#define EMe(a)      TRACE_DEFINE_ENUM(a);

family_names
inet_protocol_names
tcp_state_names
skmem_kind_names

#undef EM
#undef EMe
#define EM(a)       { a, #a },
#define EMe(a)      { a, #a }

#define show_family_name(val)                        \
        __print_symbolic(val, family_names)

#define show_inet_protocol_name(val)    \
        __print_symbolic(val, inet_protocol_names)

#define show_tcp_state_name(val)        \
        __print_symbolic(val, tcp_state_names)

#define show_skmem_kind_names(val)        \
        __print_symbolic(val, skmem_kind_names)

TRACE_EVENT(sock_rcvqueue_full,

        TP_PROTO(struct sock *sk, struct sk_buff *skb),

        TP_ARGS(sk, skb),

        TP_STRUCT__entry(
                __field(int, rmem_alloc)
                __field(unsigned int, truesize)
                __field(int, sk_rcvbuf)
        ),

        TP_fast_assign(
                __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
                __entry->truesize   = skb->truesize;
                __entry->sk_rcvbuf  = READ_ONCE(sk->sk_rcvbuf);
        ),

        TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
                __entry->rmem_alloc, __entry->truesize, __entry->sk_rcvbuf)
);

TRACE_EVENT(sock_exceed_buf_limit,

        TP_PROTO(struct sock *sk, struct proto *prot, long allocated, int kind),

        TP_ARGS(sk, prot, allocated, kind),

        TP_STRUCT__entry(
                __array(char, name, 32)
                __array(long, sysctl_mem, 3)
                __field(long, allocated)
                __field(int, sysctl_rmem)
                __field(int, rmem_alloc)
                __field(int, sysctl_wmem)
                __field(int, wmem_alloc)
                __field(int, wmem_queued)
                __field(int, kind)
        ),

        TP_fast_assign(
                strncpy(__entry->name, prot->name, 32);
                __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]);
                __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]);
                __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]);
                __entry->allocated = allocated;
                __entry->sysctl_rmem = sk_get_rmem0(sk, prot);
                __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
                __entry->sysctl_wmem = sk_get_wmem0(sk, prot);
                __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
                __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
                __entry->kind = kind;
        ),

        TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld sysctl_rmem=%d rmem_alloc=%d sysctl_wmem=%d wmem_alloc=%d wmem_queued=%d kind=%s",
                __entry->name,
                __entry->sysctl_mem[0],
                __entry->sysctl_mem[1],
                __entry->sysctl_mem[2],
                __entry->allocated,
                __entry->sysctl_rmem,
                __entry->rmem_alloc,
                __entry->sysctl_wmem,
                __entry->wmem_alloc,
                __entry->wmem_queued,
                show_skmem_kind_names(__entry->kind)
        )
);

TRACE_EVENT(inet_sock_set_state,

        TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),

        TP_ARGS(sk, oldstate, newstate),

        TP_STRUCT__entry(
                __field(const void *, skaddr)
                __field(int, oldstate)
                __field(int, newstate)
                __field(__u16, sport)
                __field(__u16, dport)
                __field(__u16, family)
                __field(__u16, protocol)
                __array(__u8, saddr, 4)
                __array(__u8, daddr, 4)
                __array(__u8, saddr_v6, 16)
                __array(__u8, daddr_v6, 16)
        ),

        TP_fast_assign(
                const struct inet_sock *inet = inet_sk(sk);
                struct in6_addr *pin6;
                __be32 *p32;

                __entry->skaddr = sk;
                __entry->oldstate = oldstate;
                __entry->newstate = newstate;

                __entry->family = sk->sk_family;
                __entry->protocol = sk->sk_protocol;
                __entry->sport = ntohs(inet->inet_sport);
                __entry->dport = ntohs(inet->inet_dport);

                p32 = (__be32 *) __entry->saddr;
                *p32 = inet->inet_saddr;

                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;

#if IS_ENABLED(CONFIG_IPV6)
                if (sk->sk_family == AF_INET6) {
                        pin6 = (struct in6_addr *)__entry->saddr_v6;
                        *pin6 = sk->sk_v6_rcv_saddr;
                        pin6 = (struct in6_addr *)__entry->daddr_v6;
                        *pin6 = sk->sk_v6_daddr;
                } else
#endif
                {
                        pin6 = (struct in6_addr *)__entry->saddr_v6;
                        ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
                        pin6 = (struct in6_addr *)__entry->daddr_v6;
                        ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
                }
        ),

        TP_printk("family=%s protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
                        show_family_name(__entry->family),
                        show_inet_protocol_name(__entry->protocol),
                        __entry->sport, __entry->dport,
                        __entry->saddr, __entry->daddr,
                        __entry->saddr_v6, __entry->daddr_v6,
                        show_tcp_state_name(__entry->oldstate),
                        show_tcp_state_name(__entry->newstate))
);

TRACE_EVENT(inet_sk_error_report,

        TP_PROTO(const struct sock *sk),

        TP_ARGS(sk),

        TP_STRUCT__entry(
                __field(int, error)
                __field(__u16, sport)
                __field(__u16, dport)
                __field(__u16, family)
                __field(__u16, protocol)
                __array(__u8, saddr, 4)
                __array(__u8, daddr, 4)
                __array(__u8, saddr_v6, 16)
                __array(__u8, daddr_v6, 16)
        ),

        TP_fast_assign(
                const struct inet_sock *inet = inet_sk(sk);
                struct in6_addr *pin6;
                __be32 *p32;

                __entry->error = sk->sk_err;
                __entry->family = sk->sk_family;
                __entry->protocol = sk->sk_protocol;
                __entry->sport = ntohs(inet->inet_sport);
                __entry->dport = ntohs(inet->inet_dport);

                p32 = (__be32 *) __entry->saddr;
                *p32 = inet->inet_saddr;

                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;

#if IS_ENABLED(CONFIG_IPV6)
                if (sk->sk_family == AF_INET6) {
                        pin6 = (struct in6_addr *)__entry->saddr_v6;
                        *pin6 = sk->sk_v6_rcv_saddr;
                        pin6 = (struct in6_addr *)__entry->daddr_v6;
                        *pin6 = sk->sk_v6_daddr;
                } else
#endif
                {
                        pin6 = (struct in6_addr *)__entry->saddr_v6;
                        ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
                        pin6 = (struct in6_addr *)__entry->daddr_v6;
                        ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
                }
        ),

        TP_printk("family=%s protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c error=%d",
                  show_family_name(__entry->family),
                  show_inet_protocol_name(__entry->protocol),
                  __entry->sport, __entry->dport,
                  __entry->saddr, __entry->daddr,
                  __entry->saddr_v6, __entry->daddr_v6,
                  __entry->error)
);

TRACE_EVENT(sk_data_ready,

        TP_PROTO(const struct sock *sk),

        TP_ARGS(sk),

        TP_STRUCT__entry(
                __field(const void *, skaddr)
                __field(__u16, family)
                __field(__u16, protocol)
                __field(unsigned long, ip)
        ),

        TP_fast_assign(
                __entry->skaddr = sk;
                __entry->family = sk->sk_family;
                __entry->protocol = sk->sk_protocol;
                __entry->ip = _RET_IP_;
        ),

        TP_printk("family=%u protocol=%u func=%ps",
                  __entry->family, __entry->protocol, (void *)__entry->ip)
);

/*
 * sock send/recv msg length
 */
DECLARE_EVENT_CLASS(sock_msg_length,

        TP_PROTO(struct sock *sk, int ret, int flags),

        TP_ARGS(sk, ret, flags),

        TP_STRUCT__entry(
                __field(void *, sk)
                __field(__u16, family)
                __field(__u16, protocol)
                __field(int, ret)
                __field(int, flags)
        ),

        TP_fast_assign(
                __entry->sk = sk;
                __entry->family = sk->sk_family;
                __entry->protocol = sk->sk_protocol;
                __entry->ret = ret;
                __entry->flags = flags;
        ),

        TP_printk("sk address = %p, family = %s protocol = %s, length = %d, error = %d, flags = 0x%x",
                  __entry->sk, show_family_name(__entry->family),
                  show_inet_protocol_name(__entry->protocol),
                  !(__entry->flags & MSG_PEEK) ?
                  (__entry->ret > 0 ? __entry->ret : 0) : 0,
                  __entry->ret < 0 ? __entry->ret : 0,
                  __entry->flags)
);

DEFINE_EVENT(sock_msg_length, sock_send_length,
        TP_PROTO(struct sock *sk, int ret, int flags),

        TP_ARGS(sk, ret, flags)
);

DEFINE_EVENT(sock_msg_length, sock_recv_length,
        TP_PROTO(struct sock *sk, int ret, int flags),

        TP_ARGS(sk, ret, flags)
);
#endif /* _TRACE_SOCK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>


























   94 







   94 


   93 









   91 
























   93 


   94 

   93 


   94 











   94 






































  161 
  162 
    8 
  162 












  234 


  234 

  234 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// SPDX-License-Identifier: GPL-2.0
/*
 * fs/sysfs/symlink.c - sysfs symlink implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * Please see Documentation/filesystems/sysfs.rst for more information.
 */

#include <linux/fs.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/security.h>

#include "sysfs.h"

static int sysfs_do_create_link_sd(struct kernfs_node *parent,
                                   struct kobject *target_kobj,
                                   const char *name, int warn)
{
        struct kernfs_node *kn, *target = NULL;

        if (WARN_ON(!name || !parent))
                return -EINVAL;

        /*
         * We don't own @target_kobj and it may be removed at any time.
         * Synchronize using sysfs_symlink_target_lock.  See
         * sysfs_remove_dir() for details.
         */
        spin_lock(&sysfs_symlink_target_lock);
        if (target_kobj->sd) {
                target = target_kobj->sd;
                kernfs_get(target);
        }
        spin_unlock(&sysfs_symlink_target_lock);

        if (!target)
                return -ENOENT;

        kn = kernfs_create_link(parent, name, target);
        kernfs_put(target);

        if (!IS_ERR(kn))
                return 0;

        if (warn && PTR_ERR(kn) == -EEXIST)
                sysfs_warn_dup(parent, name);
        return PTR_ERR(kn);
}

/**
 *        sysfs_create_link_sd - create symlink to a given object.
 *        @kn:                directory we're creating the link in.
 *        @target:        object we're pointing to.
 *        @name:                name of the symlink.
 */
int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
                         const char *name)
{
        return sysfs_do_create_link_sd(kn, target, name, 1);
}

static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
                                const char *name, int warn)
{
        struct kernfs_node *parent = NULL;

        if (!kobj)
                parent = sysfs_root_kn;
        else
                parent = kobj->sd;

        if (!parent)
                return -EFAULT;

        return sysfs_do_create_link_sd(parent, target, name, warn);
}

/**
 *        sysfs_create_link - create symlink between two objects.
 *        @kobj:        object whose directory we're creating the link in.
 *        @target:        object we're pointing to.
 *        @name:                name of the symlink.
 */
int sysfs_create_link(struct kobject *kobj, struct kobject *target,
                      const char *name)
{
        return sysfs_do_create_link(kobj, target, name, 1);
}
EXPORT_SYMBOL_GPL(sysfs_create_link);

/**
 *        sysfs_create_link_nowarn - create symlink between two objects.
 *        @kobj:        object whose directory we're creating the link in.
 *        @target:        object we're pointing to.
 *        @name:                name of the symlink.
 *
 *        This function does the same as sysfs_create_link(), but it
 *        doesn't warn if the link already exists.
 */
int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
                             const char *name)
{
        return sysfs_do_create_link(kobj, target, name, 0);
}
EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn);

/**
 *        sysfs_delete_link - remove symlink in object's directory.
 *        @kobj:        object we're acting for.
 *        @targ:        object we're pointing to.
 *        @name:        name of the symlink to remove.
 *
 *        Unlike sysfs_remove_link sysfs_delete_link has enough information
 *        to successfully delete symlinks in tagged directories.
 */
void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
                        const char *name)
{
        const void *ns = NULL;

        /*
         * We don't own @target and it may be removed at any time.
         * Synchronize using sysfs_symlink_target_lock.  See
         * sysfs_remove_dir() for details.
         */
        spin_lock(&sysfs_symlink_target_lock);
        if (targ->sd && kernfs_ns_enabled(kobj->sd))
                ns = targ->sd->ns;
        spin_unlock(&sysfs_symlink_target_lock);
        kernfs_remove_by_name_ns(kobj->sd, name, ns);
}

/**
 *        sysfs_remove_link - remove symlink in object's directory.
 *        @kobj:        object we're acting for.
 *        @name:        name of the symlink to remove.
 */
void sysfs_remove_link(struct kobject *kobj, const char *name)
{
        struct kernfs_node *parent = NULL;

        if (!kobj)
                parent = sysfs_root_kn;
        else
                parent = kobj->sd;

        kernfs_remove_by_name(parent, name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_link);

/**
 *        sysfs_rename_link_ns - rename symlink in object's directory.
 *        @kobj:        object we're acting for.
 *        @targ:        object we're pointing to.
 *        @old:        previous name of the symlink.
 *        @new:        new name of the symlink.
 *        @new_ns: new namespace of the symlink.
 *
 *        A helper function for the common rename symlink idiom.
 */
int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
                         const char *old, const char *new, const void *new_ns)
{
        struct kernfs_node *parent, *kn = NULL;
        const void *old_ns = NULL;
        int result;

        if (!kobj)
                parent = sysfs_root_kn;
        else
                parent = kobj->sd;

        if (targ->sd)
                old_ns = targ->sd->ns;

        result = -ENOENT;
        kn = kernfs_find_and_get_ns(parent, old, old_ns);
        if (!kn)
                goto out;

        result = -EINVAL;
        if (kernfs_type(kn) != KERNFS_LINK)
                goto out;
        if (kn->symlink.target_kn->priv != targ)
                goto out;

        result = kernfs_rename_ns(kn, parent, new, new_ns);

out:
        kernfs_put(kn);
        return result;
}
EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
































































   11 



   11 




   11 















   11 




   11 



   11 



























































































































































   11 






   11 
   11 



   11 


   11 











   14 







   11 
   11 


   14 






























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
// SPDX-License-Identifier: GPL-2.0-only
/*
 * File: pn_dev.c
 *
 * Phonet network device
 *
 * Copyright (C) 2008 Nokia Corporation.
 *
 * Authors: Sakari Ailus <sakari.ailus@nokia.com>
 *          Rémi Denis-Courmont
 */

#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/phonet.h>
#include <linux/proc_fs.h>
#include <linux/if_arp.h>
#include <net/sock.h>
#include <net/netns/generic.h>
#include <net/phonet/pn_dev.h>

struct phonet_routes {
        struct mutex                lock;
        struct net_device __rcu        *table[64];
};

struct phonet_net {
        struct phonet_device_list pndevs;
        struct phonet_routes routes;
};

static unsigned int phonet_net_id __read_mostly;

static struct phonet_net *phonet_pernet(struct net *net)
{
        return net_generic(net, phonet_net_id);
}

struct phonet_device_list *phonet_device_list(struct net *net)
{
        struct phonet_net *pnn = phonet_pernet(net);
        return &pnn->pndevs;
}

/* Allocate new Phonet device. */
static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC);
        if (pnd == NULL)
                return NULL;
        pnd->netdev = dev;
        bitmap_zero(pnd->addrs, 64);

        BUG_ON(!mutex_is_locked(&pndevs->lock));
        list_add_rcu(&pnd->list, &pndevs->list);
        return pnd;
}

static struct phonet_device *__phonet_get(struct net_device *dev)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd;

        BUG_ON(!mutex_is_locked(&pndevs->lock));
        list_for_each_entry(pnd, &pndevs->list, list) {
                if (pnd->netdev == dev)
                        return pnd;
        }
        return NULL;
}

static struct phonet_device *__phonet_get_rcu(struct net_device *dev)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd;

        list_for_each_entry_rcu(pnd, &pndevs->list, list) {
                if (pnd->netdev == dev)
                        return pnd;
        }
        return NULL;
}

static void phonet_device_destroy(struct net_device *dev)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd;

        ASSERT_RTNL();

        mutex_lock(&pndevs->lock);
        pnd = __phonet_get(dev);
        if (pnd)
                list_del_rcu(&pnd->list);
        mutex_unlock(&pndevs->lock);

        if (pnd) {
                u8 addr;

                for_each_set_bit(addr, pnd->addrs, 64)
                        phonet_address_notify(RTM_DELADDR, dev, addr);
                kfree(pnd);
        }
}

struct net_device *phonet_device_get(struct net *net)
{
        struct phonet_device_list *pndevs = phonet_device_list(net);
        struct phonet_device *pnd;
        struct net_device *dev = NULL;

        rcu_read_lock();
        list_for_each_entry_rcu(pnd, &pndevs->list, list) {
                dev = pnd->netdev;
                BUG_ON(!dev);

                if ((dev->reg_state == NETREG_REGISTERED) &&
                        ((pnd->netdev->flags & IFF_UP)) == IFF_UP)
                        break;
                dev = NULL;
        }
        dev_hold(dev);
        rcu_read_unlock();
        return dev;
}

int phonet_address_add(struct net_device *dev, u8 addr)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd;
        int err = 0;

        mutex_lock(&pndevs->lock);
        /* Find or create Phonet-specific device data */
        pnd = __phonet_get(dev);
        if (pnd == NULL)
                pnd = __phonet_device_alloc(dev);
        if (unlikely(pnd == NULL))
                err = -ENOMEM;
        else if (test_and_set_bit(addr >> 2, pnd->addrs))
                err = -EEXIST;
        mutex_unlock(&pndevs->lock);
        return err;
}

int phonet_address_del(struct net_device *dev, u8 addr)
{
        struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
        struct phonet_device *pnd;
        int err = 0;

        mutex_lock(&pndevs->lock);
        pnd = __phonet_get(dev);
        if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) {
                err = -EADDRNOTAVAIL;
                pnd = NULL;
        } else if (bitmap_empty(pnd->addrs, 64))
                list_del_rcu(&pnd->list);
        else
                pnd = NULL;
        mutex_unlock(&pndevs->lock);

        if (pnd)
                kfree_rcu(pnd, rcu);

        return err;
}

/* Gets a source address toward a destination, through a interface. */
u8 phonet_address_get(struct net_device *dev, u8 daddr)
{
        struct phonet_device *pnd;
        u8 saddr;

        rcu_read_lock();
        pnd = __phonet_get_rcu(dev);
        if (pnd) {
                BUG_ON(bitmap_empty(pnd->addrs, 64));

                /* Use same source address as destination, if possible */
                if (test_bit(daddr >> 2, pnd->addrs))
                        saddr = daddr;
                else
                        saddr = find_first_bit(pnd->addrs, 64) << 2;
        } else
                saddr = PN_NO_ADDR;
        rcu_read_unlock();

        if (saddr == PN_NO_ADDR) {
                /* Fallback to another device */
                struct net_device *def_dev;

                def_dev = phonet_device_get(dev_net(dev));
                if (def_dev) {
                        if (def_dev != dev)
                                saddr = phonet_address_get(def_dev, daddr);
                        dev_put(def_dev);
                }
        }
        return saddr;
}

int phonet_address_lookup(struct net *net, u8 addr)
{
        struct phonet_device_list *pndevs = phonet_device_list(net);
        struct phonet_device *pnd;
        int err = -EADDRNOTAVAIL;

        rcu_read_lock();
        list_for_each_entry_rcu(pnd, &pndevs->list, list) {
                /* Don't allow unregistering devices! */
                if ((pnd->netdev->reg_state != NETREG_REGISTERED) ||
                                ((pnd->netdev->flags & IFF_UP)) != IFF_UP)
                        continue;

                if (test_bit(addr >> 2, pnd->addrs)) {
                        err = 0;
                        goto found;
                }
        }
found:
        rcu_read_unlock();
        return err;
}

/* automatically configure a Phonet device, if supported */
static int phonet_device_autoconf(struct net_device *dev)
{
        struct if_phonet_req req;
        int ret;

        if (!dev->netdev_ops->ndo_siocdevprivate)
                return -EOPNOTSUPP;

        ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req,
                                                  NULL, SIOCPNGAUTOCONF);
        if (ret < 0)
                return ret;

        ASSERT_RTNL();
        ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device);
        if (ret)
                return ret;
        phonet_address_notify(RTM_NEWADDR, dev,
                                req.ifr_phonet_autoconf.device);
        return 0;
}

static void phonet_route_autodel(struct net_device *dev)
{
        struct phonet_net *pnn = phonet_pernet(dev_net(dev));
        unsigned int i;
        DECLARE_BITMAP(deleted, 64);

        /* Remove left-over Phonet routes */
        bitmap_zero(deleted, 64);
        mutex_lock(&pnn->routes.lock);
        for (i = 0; i < 64; i++)
                if (rcu_access_pointer(pnn->routes.table[i]) == dev) {
                        RCU_INIT_POINTER(pnn->routes.table[i], NULL);
                        set_bit(i, deleted);
                }
        mutex_unlock(&pnn->routes.lock);

        if (bitmap_empty(deleted, 64))
                return; /* short-circuit RCU */
        synchronize_rcu();
        for_each_set_bit(i, deleted, 64) {
                rtm_phonet_notify(RTM_DELROUTE, dev, i);
                dev_put(dev);
        }
}

/* notify Phonet of device events */
static int phonet_device_notify(struct notifier_block *me, unsigned long what,
                                void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);

        switch (what) {
        case NETDEV_REGISTER:
                if (dev->type == ARPHRD_PHONET)
                        phonet_device_autoconf(dev);
                break;
        case NETDEV_UNREGISTER:
                phonet_device_destroy(dev);
                phonet_route_autodel(dev);
                break;
        }
        return 0;

}

static struct notifier_block phonet_device_notifier = {
        .notifier_call = phonet_device_notify,
        .priority = 0,
};

/* Per-namespace Phonet devices handling */
static int __net_init phonet_init_net(struct net *net)
{
        struct phonet_net *pnn = phonet_pernet(net);

        if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops,
                        sizeof(struct seq_net_private)))
                return -ENOMEM;

        INIT_LIST_HEAD(&pnn->pndevs.list);
        mutex_init(&pnn->pndevs.lock);
        mutex_init(&pnn->routes.lock);
        return 0;
}

static void __net_exit phonet_exit_net(struct net *net)
{
        struct phonet_net *pnn = phonet_pernet(net);

        remove_proc_entry("phonet", net->proc_net);
        WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
}

static struct pernet_operations phonet_net_ops = {
        .init = phonet_init_net,
        .exit = phonet_exit_net,
        .id   = &phonet_net_id,
        .size = sizeof(struct phonet_net),
};

/* Initialize Phonet devices list */
int __init phonet_device_init(void)
{
        int err = register_pernet_subsys(&phonet_net_ops);
        if (err)
                return err;

        proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops,
                        sizeof(struct seq_net_private));
        register_netdevice_notifier(&phonet_device_notifier);
        err = phonet_netlink_register();
        if (err)
                phonet_device_exit();
        return err;
}

void phonet_device_exit(void)
{
        rtnl_unregister_all(PF_PHONET);
        unregister_netdevice_notifier(&phonet_device_notifier);
        unregister_pernet_subsys(&phonet_net_ops);
        remove_proc_entry("pnresource", init_net.proc_net);
}

int phonet_route_add(struct net_device *dev, u8 daddr)
{
        struct phonet_net *pnn = phonet_pernet(dev_net(dev));
        struct phonet_routes *routes = &pnn->routes;
        int err = -EEXIST;

        daddr = daddr >> 2;
        mutex_lock(&routes->lock);
        if (routes->table[daddr] == NULL) {
                rcu_assign_pointer(routes->table[daddr], dev);
                dev_hold(dev);
                err = 0;
        }
        mutex_unlock(&routes->lock);
        return err;
}

int phonet_route_del(struct net_device *dev, u8 daddr)
{
        struct phonet_net *pnn = phonet_pernet(dev_net(dev));
        struct phonet_routes *routes = &pnn->routes;

        daddr = daddr >> 2;
        mutex_lock(&routes->lock);
        if (rcu_access_pointer(routes->table[daddr]) == dev)
                RCU_INIT_POINTER(routes->table[daddr], NULL);
        else
                dev = NULL;
        mutex_unlock(&routes->lock);

        if (!dev)
                return -ENOENT;
        synchronize_rcu();
        dev_put(dev);
        return 0;
}

struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr)
{
        struct phonet_net *pnn = phonet_pernet(net);
        struct phonet_routes *routes = &pnn->routes;
        struct net_device *dev;

        daddr >>= 2;
        dev = rcu_dereference(routes->table[daddr]);
        return dev;
}

struct net_device *phonet_route_output(struct net *net, u8 daddr)
{
        struct phonet_net *pnn = phonet_pernet(net);
        struct phonet_routes *routes = &pnn->routes;
        struct net_device *dev;

        daddr >>= 2;
        rcu_read_lock();
        dev = rcu_dereference(routes->table[daddr]);
        dev_hold(dev);
        rcu_read_unlock();

        if (!dev)
                dev = phonet_device_get(net); /* Default route */
        return dev;
}






































































    1 


    1 

















































































































    1 



























































































































































































































































































































































































































































































































































































    1 



    1 

    1 


    1 

    1 
    1 

    1 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
// SPDX-License-Identifier: GPL-2.0+
/*****************************************************************************/

/*
 *        uss720.c  --  USS720 USB Parport Cable.
 *
 *        Copyright (C) 1999, 2005, 2010
 *            Thomas Sailer (t.sailer@alumni.ethz.ch)
 *
 *  Based on parport_pc.c
 *
 *  History:
 *   0.1  04.08.1999  Created
 *   0.2  07.08.1999  Some fixes mainly suggested by Tim Waugh
 *                      Interrupt handling currently disabled because
 *                      usb_request_irq crashes somewhere within ohci.c
 *                      for no apparent reason (that is for me, anyway)
 *                      ECP currently untested
 *   0.3  10.08.1999  fixing merge errors
 *   0.4  13.08.1999  Added Vendor/Product ID of Brad Hard's cable
 *   0.5  20.09.1999  usb_control_msg wrapper used
 *        Nov01.2000  usb_device_table support by Adam J. Richter
 *        08.04.2001  Identify version on module load.  gb
 *   0.6  02.09.2005  Fix "scheduling in interrupt" problem by making save/restore
 *                    context asynchronous
 *
 */

/*****************************************************************************/

#include <linux/module.h>
#include <linux/socket.h>
#include <linux/parport.h>
#include <linux/init.h>
#include <linux/usb.h>
#include <linux/delay.h>
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>

#define DRIVER_AUTHOR "Thomas M. Sailer, t.sailer@alumni.ethz.ch"
#define DRIVER_DESC "USB Parport Cable driver for Cables using the Lucent Technologies USS720 Chip"

/* --------------------------------------------------------------------- */

struct parport_uss720_private {
        struct usb_device *usbdev;
        struct parport *pp;
        struct kref ref_count;
        __u8 reg[7];  /* USB registers */
        struct list_head asynclist;
        spinlock_t asynclock;
};

struct uss720_async_request {
        struct parport_uss720_private *priv;
        struct kref ref_count;
        struct list_head asynclist;
        struct completion compl;
        struct urb *urb;
        struct usb_ctrlrequest *dr;
        __u8 reg[7];
};

/* --------------------------------------------------------------------- */

static void destroy_priv(struct kref *kref)
{
        struct parport_uss720_private *priv = container_of(kref, struct parport_uss720_private, ref_count);

        dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n");
        usb_put_dev(priv->usbdev);
        priv->usbdev = NULL;
        kfree(priv);
}

static void destroy_async(struct kref *kref)
{
        struct uss720_async_request *rq = container_of(kref, struct uss720_async_request, ref_count);
        struct parport_uss720_private *priv = rq->priv;
        unsigned long flags;

        if (likely(rq->urb))
                usb_free_urb(rq->urb);
        kfree(rq->dr);
        spin_lock_irqsave(&priv->asynclock, flags);
        list_del_init(&rq->asynclist);
        spin_unlock_irqrestore(&priv->asynclock, flags);
        kfree(rq);
        kref_put(&priv->ref_count, destroy_priv);
}

/* --------------------------------------------------------------------- */

static void async_complete(struct urb *urb)
{
        struct uss720_async_request *rq;
        struct parport *pp;
        struct parport_uss720_private *priv;
        int status = urb->status;

        rq = urb->context;
        priv = rq->priv;
        pp = priv->pp;
        if (status) {
                dev_err(&urb->dev->dev, "async_complete: urb error %d\n",
                        status);
        } else if (rq->dr->bRequest == 3) {
                memcpy(priv->reg, rq->reg, sizeof(priv->reg));
#if 0
                dev_dbg(&priv->usbdev->dev, "async_complete regs %7ph\n",
                        priv->reg);
#endif
                /* if nAck interrupts are enabled and we have an interrupt, call the interrupt procedure */
                if (rq->reg[2] & rq->reg[1] & 0x10 && pp)
                        parport_generic_irq(pp);
        }
        complete(&rq->compl);
        kref_put(&rq->ref_count, destroy_async);
}

static struct uss720_async_request *submit_async_request(struct parport_uss720_private *priv,
                                                         __u8 request, __u8 requesttype, __u16 value, __u16 index,
                                                         gfp_t mem_flags)
{
        struct usb_device *usbdev;
        struct uss720_async_request *rq;
        unsigned long flags;
        int ret;

        if (!priv)
                return NULL;
        usbdev = priv->usbdev;
        if (!usbdev)
                return NULL;
        rq = kzalloc(sizeof(struct uss720_async_request), mem_flags);
        if (!rq)
                return NULL;
        kref_init(&rq->ref_count);
        INIT_LIST_HEAD(&rq->asynclist);
        init_completion(&rq->compl);
        kref_get(&priv->ref_count);
        rq->priv = priv;
        rq->urb = usb_alloc_urb(0, mem_flags);
        if (!rq->urb) {
                kref_put(&rq->ref_count, destroy_async);
                return NULL;
        }
        rq->dr = kmalloc(sizeof(*rq->dr), mem_flags);
        if (!rq->dr) {
                kref_put(&rq->ref_count, destroy_async);
                return NULL;
        }
        rq->dr->bRequestType = requesttype;
        rq->dr->bRequest = request;
        rq->dr->wValue = cpu_to_le16(value);
        rq->dr->wIndex = cpu_to_le16(index);
        rq->dr->wLength = cpu_to_le16((request == 3) ? sizeof(rq->reg) : 0);
        usb_fill_control_urb(rq->urb, usbdev, (requesttype & 0x80) ? usb_rcvctrlpipe(usbdev, 0) : usb_sndctrlpipe(usbdev, 0),
                             (unsigned char *)rq->dr,
                             (request == 3) ? rq->reg : NULL, (request == 3) ? sizeof(rq->reg) : 0, async_complete, rq);
        /* rq->urb->transfer_flags |= URB_ASYNC_UNLINK; */
        spin_lock_irqsave(&priv->asynclock, flags);
        list_add_tail(&rq->asynclist, &priv->asynclist);
        spin_unlock_irqrestore(&priv->asynclock, flags);
        kref_get(&rq->ref_count);
        ret = usb_submit_urb(rq->urb, mem_flags);
        if (!ret)
                return rq;
        destroy_async(&rq->ref_count);
        dev_err(&usbdev->dev, "submit_async_request submit_urb failed with %d\n", ret);
        return NULL;
}

static unsigned int kill_all_async_requests_priv(struct parport_uss720_private *priv)
{
        struct uss720_async_request *rq;
        unsigned long flags;
        unsigned int ret = 0;

        spin_lock_irqsave(&priv->asynclock, flags);
        list_for_each_entry(rq, &priv->asynclist, asynclist) {
                usb_unlink_urb(rq->urb);
                ret++;
        }
        spin_unlock_irqrestore(&priv->asynclock, flags);
        return ret;
}

/* --------------------------------------------------------------------- */

static int get_1284_register(struct parport *pp, unsigned char reg, unsigned char *val, gfp_t mem_flags)
{
        struct parport_uss720_private *priv;
        struct uss720_async_request *rq;
        static const unsigned char regindex[9] = {
                4, 0, 1, 5, 5, 0, 2, 3, 6
        };
        int ret;

        if (!pp)
                return -EIO;
        priv = pp->private_data;
        rq = submit_async_request(priv, 3, 0xc0, ((unsigned int)reg) << 8, 0, mem_flags);
        if (!rq) {
                dev_err(&priv->usbdev->dev, "get_1284_register(%u) failed",
                        (unsigned int)reg);
                return -EIO;
        }
        if (!val) {
                kref_put(&rq->ref_count, destroy_async);
                return 0;
        }
        if (wait_for_completion_timeout(&rq->compl, HZ)) {
                ret = rq->urb->status;
                *val = priv->reg[(reg >= 9) ? 0 : regindex[reg]];
                if (ret)
                        printk(KERN_WARNING "get_1284_register: "
                               "usb error %d\n", ret);
                kref_put(&rq->ref_count, destroy_async);
                return ret;
        }
        printk(KERN_WARNING "get_1284_register timeout\n");
        kill_all_async_requests_priv(priv);
        return -EIO;
}

static int set_1284_register(struct parport *pp, unsigned char reg, unsigned char val, gfp_t mem_flags)
{
        struct parport_uss720_private *priv;
        struct uss720_async_request *rq;

        if (!pp)
                return -EIO;
        priv = pp->private_data;
        rq = submit_async_request(priv, 4, 0x40, (((unsigned int)reg) << 8) | val, 0, mem_flags);
        if (!rq) {
                dev_err(&priv->usbdev->dev, "set_1284_register(%u,%u) failed",
                        (unsigned int)reg, (unsigned int)val);
                return -EIO;
        }
        kref_put(&rq->ref_count, destroy_async);
        return 0;
}

/* --------------------------------------------------------------------- */

/* ECR modes */
#define ECR_SPP 00
#define ECR_PS2 01
#define ECR_PPF 02
#define ECR_ECP 03
#define ECR_EPP 04

/* Safely change the mode bits in the ECR */
static int change_mode(struct parport *pp, int m)
{
        struct parport_uss720_private *priv = pp->private_data;
        int mode;
        __u8 reg;

        if (get_1284_register(pp, 6, &reg, GFP_KERNEL))
                return -EIO;
        /* Bits <7:5> contain the mode. */
        mode = (priv->reg[2] >> 5) & 0x7;
        if (mode == m)
                return 0;
        /* We have to go through mode 000 or 001 */
        if (mode > ECR_PS2 && m > ECR_PS2)
                if (change_mode(pp, ECR_PS2))
                        return -EIO;

        if (m <= ECR_PS2 && !(priv->reg[1] & 0x20)) {
                /* This mode resets the FIFO, so we may
                 * have to wait for it to drain first. */
                unsigned long expire = jiffies + pp->physport->cad->timeout;
                switch (mode) {
                case ECR_PPF: /* Parallel Port FIFO mode */
                case ECR_ECP: /* ECP Parallel Port mode */
                        /* Poll slowly. */
                        for (;;) {
                                if (get_1284_register(pp, 6, &reg, GFP_KERNEL))
                                        return -EIO;
                                if (priv->reg[2] & 0x01)
                                        break;
                                if (time_after_eq (jiffies, expire))
                                        /* The FIFO is stuck. */
                                        return -EBUSY;
                                msleep_interruptible(10);
                                if (signal_pending (current))
                                        break;
                        }
                }
        }
        /* Set the mode. */
        if (set_1284_register(pp, 6, m << 5, GFP_KERNEL))
                return -EIO;
        if (get_1284_register(pp, 6, &reg, GFP_KERNEL))
                return -EIO;
        return 0;
}

/*
 * Clear TIMEOUT BIT in EPP MODE
 */
static int clear_epp_timeout(struct parport *pp)
{
        unsigned char stat;

        if (get_1284_register(pp, 1, &stat, GFP_KERNEL))
                return 1;
        return stat & 1;
}

/*
 * Access functions.
 */
#if 0
static int uss720_irq(int usbstatus, void *buffer, int len, void *dev_id)
{
        struct parport *pp = (struct parport *)dev_id;
        struct parport_uss720_private *priv = pp->private_data;        

        if (usbstatus != 0 || len < 4 || !buffer)
                return 1;
        memcpy(priv->reg, buffer, 4);
        /* if nAck interrupts are enabled and we have an interrupt, call the interrupt procedure */
        if (priv->reg[2] & priv->reg[1] & 0x10)
                parport_generic_irq(pp);
        return 1;
}
#endif

static void parport_uss720_write_data(struct parport *pp, unsigned char d)
{
        set_1284_register(pp, 0, d, GFP_KERNEL);
}

static unsigned char parport_uss720_read_data(struct parport *pp)
{
        unsigned char ret;

        if (get_1284_register(pp, 0, &ret, GFP_KERNEL))
                return 0;
        return ret;
}

static void parport_uss720_write_control(struct parport *pp, unsigned char d)
{
        struct parport_uss720_private *priv = pp->private_data;        

        d = (d & 0xf) | (priv->reg[1] & 0xf0);
        if (set_1284_register(pp, 2, d, GFP_KERNEL))
                return;
        priv->reg[1] = d;
}

static unsigned char parport_uss720_read_control(struct parport *pp)
{
        struct parport_uss720_private *priv = pp->private_data;        
        return priv->reg[1] & 0xf; /* Use soft copy */
}

static unsigned char parport_uss720_frob_control(struct parport *pp, unsigned char mask, unsigned char val)
{
        struct parport_uss720_private *priv = pp->private_data;        
        unsigned char d;

        mask &= 0x0f;
        val &= 0x0f;
        d = (priv->reg[1] & (~mask)) ^ val;
        if (set_1284_register(pp, 2, d, GFP_ATOMIC))
                return 0;
        priv->reg[1] = d;
        return d & 0xf;
}

static unsigned char parport_uss720_read_status(struct parport *pp)
{
        unsigned char ret;

        if (get_1284_register(pp, 1, &ret, GFP_ATOMIC))
                return 0;
        return ret & 0xf8;
}

static void parport_uss720_disable_irq(struct parport *pp)
{
        struct parport_uss720_private *priv = pp->private_data;        
        unsigned char d;

        d = priv->reg[1] & ~0x10;
        if (set_1284_register(pp, 2, d, GFP_KERNEL))
                return;
        priv->reg[1] = d;
}

static void parport_uss720_enable_irq(struct parport *pp)
{
        struct parport_uss720_private *priv = pp->private_data;        
        unsigned char d;

        d = priv->reg[1] | 0x10;
        if (set_1284_register(pp, 2, d, GFP_KERNEL))
                return;
        priv->reg[1] = d;
}

static void parport_uss720_data_forward (struct parport *pp)
{
        struct parport_uss720_private *priv = pp->private_data;        
        unsigned char d;

        d = priv->reg[1] & ~0x20;
        if (set_1284_register(pp, 2, d, GFP_KERNEL))
                return;
        priv->reg[1] = d;
}

static void parport_uss720_data_reverse (struct parport *pp)
{
        struct parport_uss720_private *priv = pp->private_data;        
        unsigned char d;

        d = priv->reg[1] | 0x20;
        if (set_1284_register(pp, 2, d, GFP_KERNEL))
                return;
        priv->reg[1] = d;
}

static void parport_uss720_init_state(struct pardevice *dev, struct parport_state *s)
{
        s->u.pc.ctr = 0xc | (dev->irq_func ? 0x10 : 0x0);
        s->u.pc.ecr = 0x24;
}

static void parport_uss720_save_state(struct parport *pp, struct parport_state *s)
{
        struct parport_uss720_private *priv = pp->private_data;        

#if 0
        if (get_1284_register(pp, 2, NULL, GFP_ATOMIC))
                return;
#endif
        s->u.pc.ctr = priv->reg[1];
        s->u.pc.ecr = priv->reg[2];
}

static void parport_uss720_restore_state(struct parport *pp, struct parport_state *s)
{
        struct parport_uss720_private *priv = pp->private_data;

        set_1284_register(pp, 2, s->u.pc.ctr, GFP_ATOMIC);
        set_1284_register(pp, 6, s->u.pc.ecr, GFP_ATOMIC);
        get_1284_register(pp, 2, NULL, GFP_ATOMIC);
        priv->reg[1] = s->u.pc.ctr;
        priv->reg[2] = s->u.pc.ecr;
}

static size_t parport_uss720_epp_read_data(struct parport *pp, void *buf, size_t length, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;        
        size_t got = 0;

        if (change_mode(pp, ECR_EPP))
                return 0;
        for (; got < length; got++) {
                if (get_1284_register(pp, 4, (char *)buf, GFP_KERNEL))
                        break;
                buf++;
                if (priv->reg[0] & 0x01) {
                        clear_epp_timeout(pp);
                        break;
                }
        }
        change_mode(pp, ECR_PS2);
        return got;
}

static size_t parport_uss720_epp_write_data(struct parport *pp, const void *buf, size_t length, int flags)
{
#if 0
        struct parport_uss720_private *priv = pp->private_data;        
        size_t written = 0;

        if (change_mode(pp, ECR_EPP))
                return 0;
        for (; written < length; written++) {
                if (set_1284_register(pp, 4, (char *)buf, GFP_KERNEL))
                        break;
                ((char*)buf)++;
                if (get_1284_register(pp, 1, NULL, GFP_KERNEL))
                        break;
                if (priv->reg[0] & 0x01) {
                        clear_epp_timeout(pp);
                        break;
                }
        }
        change_mode(pp, ECR_PS2);
        return written;
#else
        struct parport_uss720_private *priv = pp->private_data;
        struct usb_device *usbdev = priv->usbdev;
        int rlen = 0;
        int i;

        if (!usbdev)
                return 0;
        if (change_mode(pp, ECR_EPP))
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buf, length, &rlen, 20000);
        if (i)
                printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buf, length, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
#endif
}

static size_t parport_uss720_epp_read_addr(struct parport *pp, void *buf, size_t length, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;        
        size_t got = 0;

        if (change_mode(pp, ECR_EPP))
                return 0;
        for (; got < length; got++) {
                if (get_1284_register(pp, 3, (char *)buf, GFP_KERNEL))
                        break;
                buf++;
                if (priv->reg[0] & 0x01) {
                        clear_epp_timeout(pp);
                        break;
                }
        }
        change_mode(pp, ECR_PS2);
        return got;
}

static size_t parport_uss720_epp_write_addr(struct parport *pp, const void *buf, size_t length, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;        
        size_t written = 0;

        if (change_mode(pp, ECR_EPP))
                return 0;
        for (; written < length; written++) {
                if (set_1284_register(pp, 3, *(char *)buf, GFP_KERNEL))
                        break;
                buf++;
                if (get_1284_register(pp, 1, NULL, GFP_KERNEL))
                        break;
                if (priv->reg[0] & 0x01) {
                        clear_epp_timeout(pp);
                        break;
                }
        }
        change_mode(pp, ECR_PS2);
        return written;
}

static size_t parport_uss720_ecp_write_data(struct parport *pp, const void *buffer, size_t len, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;
        struct usb_device *usbdev = priv->usbdev;
        int rlen = 0;
        int i;

        if (!usbdev)
                return 0;
        if (change_mode(pp, ECR_ECP))
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buffer, len, &rlen, 20000);
        if (i)
                printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
}

static size_t parport_uss720_ecp_read_data(struct parport *pp, void *buffer, size_t len, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;
        struct usb_device *usbdev = priv->usbdev;
        int rlen = 0;
        int i;

        if (!usbdev)
                return 0;
        if (change_mode(pp, ECR_ECP))
                return 0;
        i = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, 2), buffer, len, &rlen, 20000);
        if (i)
                printk(KERN_ERR "uss720: recvbulk ep 2 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
}

static size_t parport_uss720_ecp_write_addr(struct parport *pp, const void *buffer, size_t len, int flags)
{
        size_t written = 0;

        if (change_mode(pp, ECR_ECP))
                return 0;
        for (; written < len; written++) {
                if (set_1284_register(pp, 5, *(char *)buffer, GFP_KERNEL))
                        break;
                buffer++;
        }
        change_mode(pp, ECR_PS2);
        return written;
}

static size_t parport_uss720_write_compat(struct parport *pp, const void *buffer, size_t len, int flags)
{
        struct parport_uss720_private *priv = pp->private_data;
        struct usb_device *usbdev = priv->usbdev;
        int rlen = 0;
        int i;

        if (!usbdev)
                return 0;
        if (change_mode(pp, ECR_PPF))
                return 0;
        i = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 1), (void *)buffer, len, &rlen, 20000);
        if (i)
                printk(KERN_ERR "uss720: sendbulk ep 1 buf %p len %zu rlen %u\n", buffer, len, rlen);
        change_mode(pp, ECR_PS2);
        return rlen;
}

/* --------------------------------------------------------------------- */

static struct parport_operations parport_uss720_ops = 
{
        .owner =                THIS_MODULE,
        .write_data =                parport_uss720_write_data,
        .read_data =                parport_uss720_read_data,

        .write_control =        parport_uss720_write_control,
        .read_control =                parport_uss720_read_control,
        .frob_control =                parport_uss720_frob_control,

        .read_status =                parport_uss720_read_status,

        .enable_irq =                parport_uss720_enable_irq,
        .disable_irq =                parport_uss720_disable_irq,

        .data_forward =                parport_uss720_data_forward,
        .data_reverse =                parport_uss720_data_reverse,

        .init_state =                parport_uss720_init_state,
        .save_state =                parport_uss720_save_state,
        .restore_state =        parport_uss720_restore_state,

        .epp_write_data =        parport_uss720_epp_write_data,
        .epp_read_data =        parport_uss720_epp_read_data,
        .epp_write_addr =        parport_uss720_epp_write_addr,
        .epp_read_addr =        parport_uss720_epp_read_addr,

        .ecp_write_data =        parport_uss720_ecp_write_data,
        .ecp_read_data =        parport_uss720_ecp_read_data,
        .ecp_write_addr =        parport_uss720_ecp_write_addr,

        .compat_write_data =        parport_uss720_write_compat,
        .nibble_read_data =        parport_ieee1284_read_nibble,
        .byte_read_data =        parport_ieee1284_read_byte,
};

/* --------------------------------------------------------------------- */

static int uss720_probe(struct usb_interface *intf,
                        const struct usb_device_id *id)
{
        struct usb_device *usbdev = usb_get_dev(interface_to_usbdev(intf));
        struct usb_host_interface *interface;
        struct usb_endpoint_descriptor *epd;
        struct parport_uss720_private *priv;
        struct parport *pp;
        unsigned char reg;
        int ret;

        dev_dbg(&intf->dev, "probe: vendor id 0x%x, device id 0x%x\n",
                le16_to_cpu(usbdev->descriptor.idVendor),
                le16_to_cpu(usbdev->descriptor.idProduct));

        /* our known interfaces have 3 alternate settings */
        if (intf->num_altsetting != 3) {
                usb_put_dev(usbdev);
                return -ENODEV;
        }
        ret = usb_set_interface(usbdev, intf->altsetting->desc.bInterfaceNumber, 2);
        dev_dbg(&intf->dev, "set interface result %d\n", ret);

        interface = intf->cur_altsetting;

        if (interface->desc.bNumEndpoints < 2) {
                usb_put_dev(usbdev);
                return -ENODEV;
        }

        /*
         * Allocate parport interface 
         */
        priv = kzalloc(sizeof(struct parport_uss720_private), GFP_KERNEL);
        if (!priv) {
                usb_put_dev(usbdev);
                return -ENOMEM;
        }
        priv->pp = NULL;
        priv->usbdev = usbdev;
        kref_init(&priv->ref_count);
        spin_lock_init(&priv->asynclock);
        INIT_LIST_HEAD(&priv->asynclist);
        pp = parport_register_port(0, PARPORT_IRQ_NONE, PARPORT_DMA_NONE, &parport_uss720_ops);
        if (!pp) {
                printk(KERN_WARNING "uss720: could not register parport\n");
                goto probe_abort;
        }

        priv->pp = pp;
        pp->private_data = priv;
        pp->modes = PARPORT_MODE_PCSPP | PARPORT_MODE_TRISTATE | PARPORT_MODE_EPP | PARPORT_MODE_COMPAT;
        if (interface->desc.bNumEndpoints >= 3)
                pp->modes |= PARPORT_MODE_ECP;
        pp->dev = &usbdev->dev;

        /* set the USS720 control register to manual mode, no ECP compression, enable all ints */
        set_1284_register(pp, 7, 0x00, GFP_KERNEL);
        set_1284_register(pp, 6, 0x30, GFP_KERNEL);  /* PS/2 mode */
        set_1284_register(pp, 2, 0x0c, GFP_KERNEL);

        /* The Belkin F5U002 Rev 2 P80453-B USB parallel port adapter shares the
         * device ID 050d:0002 with some other device that works with this
         * driver, but it itself does not. Detect and handle the bad cable
         * here. */
        ret = get_1284_register(pp, 0, &reg, GFP_KERNEL);
        dev_dbg(&intf->dev, "reg: %7ph\n", priv->reg);
        if (ret < 0)
                return ret;

        ret = usb_find_last_int_in_endpoint(interface, &epd);
        if (!ret) {
                dev_dbg(&intf->dev, "epaddr %d interval %d\n",
                                epd->bEndpointAddress, epd->bInterval);
        }
        parport_announce_port(pp);

        usb_set_intfdata(intf, pp);
        return 0;

probe_abort:
        kill_all_async_requests_priv(priv);
        kref_put(&priv->ref_count, destroy_priv);
        return -ENODEV;
}

static void uss720_disconnect(struct usb_interface *intf)
{
        struct parport *pp = usb_get_intfdata(intf);
        struct parport_uss720_private *priv;

        dev_dbg(&intf->dev, "disconnect\n");
        usb_set_intfdata(intf, NULL);
        if (pp) {
                priv = pp->private_data;
                priv->pp = NULL;
                dev_dbg(&intf->dev, "parport_remove_port\n");
                parport_remove_port(pp);
                parport_put_port(pp);
                kill_all_async_requests_priv(priv);
                kref_put(&priv->ref_count, destroy_priv);
        }
        dev_dbg(&intf->dev, "disconnect done\n");
}

/* table of cables that work through this driver */
static const struct usb_device_id uss720_table[] = {
        { USB_DEVICE(0x047e, 0x1001) }, /* Infowave 901-0030 */
        { USB_DEVICE(0x04b8, 0x0002) }, /* Epson CAEUL0002 ISD-103 */
        { USB_DEVICE(0x04b8, 0x0003) }, /* Epson ISD-101 */
        { USB_DEVICE(0x050d, 0x0002) },
        { USB_DEVICE(0x050d, 0x1202) }, /* Belkin F5U120-PC */
        { USB_DEVICE(0x0557, 0x2001) },
        { USB_DEVICE(0x05ab, 0x0002) }, /* Belkin F5U002 ISD-101 */
        { USB_DEVICE(0x05ab, 0x1001) }, /* Belkin F5U002 P80453-A */
        { USB_DEVICE(0x06c6, 0x0100) }, /* Infowave ISD-103 */
        { USB_DEVICE(0x0729, 0x1284) },
        { USB_DEVICE(0x1293, 0x0002) },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE (usb, uss720_table);


static struct usb_driver uss720_driver = {
        .name =                "uss720",
        .probe =        uss720_probe,
        .disconnect =        uss720_disconnect,
        .id_table =        uss720_table,
};

/* --------------------------------------------------------------------- */

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

static int __init uss720_init(void)
{
        int retval;
        retval = usb_register(&uss720_driver);
        if (retval)
                goto out;

        printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_DESC "\n");
        printk(KERN_INFO KBUILD_MODNAME ": NOTE: this is a special purpose "
               "driver to allow nonstandard\n");
        printk(KERN_INFO KBUILD_MODNAME ": protocols (eg. bitbang) over "
               "USS720 usb to parallel cables\n");
        printk(KERN_INFO KBUILD_MODNAME ": If you just want to connect to a "
               "printer, use usblp instead\n");
out:
        return retval;
}

static void __exit uss720_cleanup(void)
{
        usb_deregister(&uss720_driver);
}

module_init(uss720_init);
module_exit(uss720_cleanup);

/* --------------------------------------------------------------------- */








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   88 





































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
/*
 * Performance events:
 *
 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
 *
 * Data type definitions, declarations, prototypes.
 *
 *    Started by: Thomas Gleixner and Ingo Molnar
 *
 * For licencing details see kernel-base/COPYING
 */
#ifndef _LINUX_PERF_EVENT_H
#define _LINUX_PERF_EVENT_H

#include <uapi/linux/perf_event.h>
#include <uapi/linux/bpf_perf_event.h>

/*
 * Kernel-internal data types and definitions:
 */

#ifdef CONFIG_PERF_EVENTS
# include <asm/perf_event.h>
# include <asm/local64.h>
#endif

#define PERF_GUEST_ACTIVE        0x01
#define PERF_GUEST_USER        0x02

struct perf_guest_info_callbacks {
        unsigned int                        (*state)(void);
        unsigned long                        (*get_ip)(void);
        unsigned int                        (*handle_intel_pt_intr)(void);
};

#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <linux/rhashtable-types.h>
#include <asm/hw_breakpoint.h>
#endif

#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/fs.h>
#include <linux/pid_namespace.h>
#include <linux/workqueue.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <linux/irq_work.h>
#include <linux/static_key.h>
#include <linux/jump_label_ratelimit.h>
#include <linux/atomic.h>
#include <linux/sysfs.h>
#include <linux/perf_regs.h>
#include <linux/cgroup.h>
#include <linux/refcount.h>
#include <linux/security.h>
#include <linux/static_call.h>
#include <linux/lockdep.h>
#include <asm/local.h>

struct perf_callchain_entry {
        __u64                                nr;
        __u64                                ip[]; /* /proc/sys/kernel/perf_event_max_stack */
};

struct perf_callchain_entry_ctx {
        struct perf_callchain_entry *entry;
        u32                            max_stack;
        u32                            nr;
        short                            contexts;
        bool                            contexts_maxed;
};

typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
                                     unsigned long off, unsigned long len);

struct perf_raw_frag {
        union {
                struct perf_raw_frag        *next;
                unsigned long                pad;
        };
        perf_copy_f                        copy;
        void                                *data;
        u32                                size;
} __packed;

struct perf_raw_record {
        struct perf_raw_frag                frag;
        u32                                size;
};

static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
{
        return frag->pad < sizeof(u64);
}

/*
 * branch stack layout:
 *  nr: number of taken branches stored in entries[]
 *  hw_idx: The low level index of raw branch records
 *          for the most recent branch.
 *          -1ULL means invalid/unknown.
 *
 * Note that nr can vary from sample to sample
 * branches (to, from) are stored from most recent
 * to least recent, i.e., entries[0] contains the most
 * recent branch.
 * The entries[] is an abstraction of raw branch records,
 * which may not be stored in age order in HW, e.g. Intel LBR.
 * The hw_idx is to expose the low level index of raw
 * branch record for the most recent branch aka entries[0].
 * The hw_idx index is between -1 (unknown) and max depth,
 * which can be retrieved in /sys/devices/cpu/caps/branches.
 * For the architectures whose raw branch records are
 * already stored in age order, the hw_idx should be 0.
 */
struct perf_branch_stack {
        __u64                                nr;
        __u64                                hw_idx;
        struct perf_branch_entry        entries[];
};

struct task_struct;

/*
 * extra PMU register associated with an event
 */
struct hw_perf_event_extra {
        u64                config;        /* register value */
        unsigned int        reg;        /* register address or index */
        int                alloc;        /* extra register already allocated */
        int                idx;        /* index in shared_regs->regs[] */
};

/**
 * hw_perf_event::flag values
 *
 * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
 * usage.
 */
#define PERF_EVENT_FLAG_ARCH                        0x000fffff
#define PERF_EVENT_FLAG_USER_READ_CNT                0x80000000

static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);

/**
 * struct hw_perf_event - performance event hardware details:
 */
struct hw_perf_event {
#ifdef CONFIG_PERF_EVENTS
        union {
                struct { /* hardware */
                        u64                config;
                        u64                last_tag;
                        unsigned long        config_base;
                        unsigned long        event_base;
                        int                event_base_rdpmc;
                        int                idx;
                        int                last_cpu;
                        int                flags;

                        struct hw_perf_event_extra extra_reg;
                        struct hw_perf_event_extra branch_reg;
                };
                struct { /* software */
                        struct hrtimer        hrtimer;
                };
                struct { /* tracepoint */
                        /* for tp_event->class */
                        struct list_head        tp_list;
                };
                struct { /* amd_power */
                        u64        pwr_acc;
                        u64        ptsc;
                };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
                struct { /* breakpoint */
                        /*
                         * Crufty hack to avoid the chicken and egg
                         * problem hw_breakpoint has with context
                         * creation and event initalization.
                         */
                        struct arch_hw_breakpoint        info;
                        struct rhlist_head                bp_list;
                };
#endif
                struct { /* amd_iommu */
                        u8        iommu_bank;
                        u8        iommu_cntr;
                        u16        padding;
                        u64        conf;
                        u64        conf1;
                };
        };
        /*
         * If the event is a per task event, this will point to the task in
         * question. See the comment in perf_event_alloc().
         */
        struct task_struct                *target;

        /*
         * PMU would store hardware filter configuration
         * here.
         */
        void                                *addr_filters;

        /* Last sync'ed generation of filters */
        unsigned long                        addr_filters_gen;

/*
 * hw_perf_event::state flags; used to track the PERF_EF_* state.
 */
#define PERF_HES_STOPPED        0x01 /* the counter is stopped */
#define PERF_HES_UPTODATE        0x02 /* event->count up-to-date */
#define PERF_HES_ARCH                0x04

        int                                state;

        /*
         * The last observed hardware counter value, updated with a
         * local64_cmpxchg() such that pmu::read() can be called nested.
         */
        local64_t                        prev_count;

        /*
         * The period to start the next sample with.
         */
        u64                                sample_period;

        union {
                struct { /* Sampling */
                        /*
                         * The period we started this sample with.
                         */
                        u64                                last_period;

                        /*
                         * However much is left of the current period;
                         * note that this is a full 64bit value and
                         * allows for generation of periods longer
                         * than hardware might allow.
                         */
                        local64_t                        period_left;
                };
                struct { /* Topdown events counting for context switch */
                        u64                                saved_metric;
                        u64                                saved_slots;
                };
        };

        /*
         * State for throttling the event, see __perf_event_overflow() and
         * perf_adjust_freq_unthr_context().
         */
        u64                             interrupts_seq;
        u64                                interrupts;

        /*
         * State for freq target events, see __perf_event_overflow() and
         * perf_adjust_freq_unthr_context().
         */
        u64                                freq_time_stamp;
        u64                                freq_count_stamp;
#endif
};

struct perf_event;
struct perf_event_pmu_context;

/*
 * Common implementation detail of pmu::{start,commit,cancel}_txn
 */
#define PERF_PMU_TXN_ADD  0x1                /* txn to add/schedule event on PMU */
#define PERF_PMU_TXN_READ 0x2                /* txn to read event group from PMU */

/**
 * pmu::capabilities flags
 */
#define PERF_PMU_CAP_NO_INTERRUPT                0x0001
#define PERF_PMU_CAP_NO_NMI                        0x0002
#define PERF_PMU_CAP_AUX_NO_SG                        0x0004
#define PERF_PMU_CAP_EXTENDED_REGS                0x0008
#define PERF_PMU_CAP_EXCLUSIVE                        0x0010
#define PERF_PMU_CAP_ITRACE                        0x0020
#define PERF_PMU_CAP_NO_EXCLUDE                        0x0040
#define PERF_PMU_CAP_AUX_OUTPUT                        0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE                0x0100

struct perf_output_handle;

#define PMU_NULL_DEV        ((void *)(~0UL))

/**
 * struct pmu - generic performance monitoring unit
 */
struct pmu {
        struct list_head                entry;

        struct module                        *module;
        struct device                        *dev;
        struct device                        *parent;
        const struct attribute_group        **attr_groups;
        const struct attribute_group        **attr_update;
        const char                        *name;
        int                                type;

        /*
         * various common per-pmu feature flags
         */
        int                                capabilities;

        int __percpu                        *pmu_disable_count;
        struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
        atomic_t                        exclusive_cnt; /* < 0: cpu; > 0: tsk */
        int                                task_ctx_nr;
        int                                hrtimer_interval_ms;

        /* number of address filters this PMU can do */
        unsigned int                        nr_addr_filters;

        /*
         * Fully disable/enable this PMU, can be used to protect from the PMI
         * as well as for lazy/batch writing of the MSRs.
         */
        void (*pmu_enable)                (struct pmu *pmu); /* optional */
        void (*pmu_disable)                (struct pmu *pmu); /* optional */

        /*
         * Try and initialize the event for this PMU.
         *
         * Returns:
         *  -ENOENT        -- @event is not for this PMU
         *
         *  -ENODEV        -- @event is for this PMU but PMU not present
         *  -EBUSY        -- @event is for this PMU but PMU temporarily unavailable
         *  -EINVAL        -- @event is for this PMU but @event is not valid
         *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
         *  -EACCES        -- @event is for this PMU, @event is valid, but no privileges
         *
         *  0                -- @event is for this PMU and valid
         *
         * Other error return values are allowed.
         */
        int (*event_init)                (struct perf_event *event);

        /*
         * Notification that the event was mapped or unmapped.  Called
         * in the context of the mapping task.
         */
        void (*event_mapped)                (struct perf_event *event, struct mm_struct *mm); /* optional */
        void (*event_unmapped)                (struct perf_event *event, struct mm_struct *mm); /* optional */

        /*
         * Flags for ->add()/->del()/ ->start()/->stop(). There are
         * matching hw_perf_event::state flags.
         */
#define PERF_EF_START        0x01                /* start the counter when adding    */
#define PERF_EF_RELOAD        0x02                /* reload the counter when starting */
#define PERF_EF_UPDATE        0x04                /* update the counter when stopping */

        /*
         * Adds/Removes a counter to/from the PMU, can be done inside a
         * transaction, see the ->*_txn() methods.
         *
         * The add/del callbacks will reserve all hardware resources required
         * to service the event, this includes any counter constraint
         * scheduling etc.
         *
         * Called with IRQs disabled and the PMU disabled on the CPU the event
         * is on.
         *
         * ->add() called without PERF_EF_START should result in the same state
         *  as ->add() followed by ->stop().
         *
         * ->del() must always PERF_EF_UPDATE stop an event. If it calls
         *  ->stop() that must deal with already being stopped without
         *  PERF_EF_UPDATE.
         */
        int  (*add)                        (struct perf_event *event, int flags);
        void (*del)                        (struct perf_event *event, int flags);

        /*
         * Starts/Stops a counter present on the PMU.
         *
         * The PMI handler should stop the counter when perf_event_overflow()
         * returns !0. ->start() will be used to continue.
         *
         * Also used to change the sample period.
         *
         * Called with IRQs disabled and the PMU disabled on the CPU the event
         * is on -- will be called from NMI context with the PMU generates
         * NMIs.
         *
         * ->stop() with PERF_EF_UPDATE will read the counter and update
         *  period/count values like ->read() would.
         *
         * ->start() with PERF_EF_RELOAD will reprogram the counter
         *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
         */
        void (*start)                        (struct perf_event *event, int flags);
        void (*stop)                        (struct perf_event *event, int flags);

        /*
         * Updates the counter value of the event.
         *
         * For sampling capable PMUs this will also update the software period
         * hw_perf_event::period_left field.
         */
        void (*read)                        (struct perf_event *event);

        /*
         * Group events scheduling is treated as a transaction, add
         * group events as a whole and perform one schedulability test.
         * If the test fails, roll back the whole group
         *
         * Start the transaction, after this ->add() doesn't need to
         * do schedulability tests.
         *
         * Optional.
         */
        void (*start_txn)                (struct pmu *pmu, unsigned int txn_flags);
        /*
         * If ->start_txn() disabled the ->add() schedulability test
         * then ->commit_txn() is required to perform one. On success
         * the transaction is closed. On error the transaction is kept
         * open until ->cancel_txn() is called.
         *
         * Optional.
         */
        int  (*commit_txn)                (struct pmu *pmu);
        /*
         * Will cancel the transaction, assumes ->del() is called
         * for each successful ->add() during the transaction.
         *
         * Optional.
         */
        void (*cancel_txn)                (struct pmu *pmu);

        /*
         * Will return the value for perf_event_mmap_page::index for this event,
         * if no implementation is provided it will default to 0 (see
         * perf_event_idx_default).
         */
        int (*event_idx)                (struct perf_event *event); /*optional */

        /*
         * context-switches callback
         */
        void (*sched_task)                (struct perf_event_pmu_context *pmu_ctx,
                                        bool sched_in);

        /*
         * Kmem cache of PMU specific data
         */
        struct kmem_cache                *task_ctx_cache;

        /*
         * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
         * can be synchronized using this function. See Intel LBR callstack support
         * implementation and Perf core context switch handling callbacks for usage
         * examples.
         */
        void (*swap_task_ctx)                (struct perf_event_pmu_context *prev_epc,
                                         struct perf_event_pmu_context *next_epc);
                                        /* optional */

        /*
         * Set up pmu-private data structures for an AUX area
         */
        void *(*setup_aux)                (struct perf_event *event, void **pages,
                                         int nr_pages, bool overwrite);
                                        /* optional */

        /*
         * Free pmu-private AUX data structures
         */
        void (*free_aux)                (void *aux); /* optional */

        /*
         * Take a snapshot of the AUX buffer without touching the event
         * state, so that preempting ->start()/->stop() callbacks does
         * not interfere with their logic. Called in PMI context.
         *
         * Returns the size of AUX data copied to the output handle.
         *
         * Optional.
         */
        long (*snapshot_aux)                (struct perf_event *event,
                                         struct perf_output_handle *handle,
                                         unsigned long size);

        /*
         * Validate address range filters: make sure the HW supports the
         * requested configuration and number of filters; return 0 if the
         * supplied filters are valid, -errno otherwise.
         *
         * Runs in the context of the ioctl()ing process and is not serialized
         * with the rest of the PMU callbacks.
         */
        int (*addr_filters_validate)        (struct list_head *filters);
                                        /* optional */

        /*
         * Synchronize address range filter configuration:
         * translate hw-agnostic filters into hardware configuration in
         * event::hw::addr_filters.
         *
         * Runs as a part of filter sync sequence that is done in ->start()
         * callback by calling perf_event_addr_filters_sync().
         *
         * May (and should) traverse event::addr_filters::list, for which its
         * caller provides necessary serialization.
         */
        void (*addr_filters_sync)        (struct perf_event *event);
                                        /* optional */

        /*
         * Check if event can be used for aux_output purposes for
         * events of this PMU.
         *
         * Runs from perf_event_open(). Should return 0 for "no match"
         * or non-zero for "match".
         */
        int (*aux_output_match)                (struct perf_event *event);
                                        /* optional */

        /*
         * Skip programming this PMU on the given CPU. Typically needed for
         * big.LITTLE things.
         */
        bool (*filter)                        (struct pmu *pmu, int cpu); /* optional */

        /*
         * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
         */
        int (*check_period)                (struct perf_event *event, u64 value); /* optional */
};

enum perf_addr_filter_action_t {
        PERF_ADDR_FILTER_ACTION_STOP = 0,
        PERF_ADDR_FILTER_ACTION_START,
        PERF_ADDR_FILTER_ACTION_FILTER,
};

/**
 * struct perf_addr_filter - address range filter definition
 * @entry:        event's filter list linkage
 * @path:        object file's path for file-based filters
 * @offset:        filter range offset
 * @size:        filter range size (size==0 means single address trigger)
 * @action:        filter/start/stop
 *
 * This is a hardware-agnostic filter configuration as specified by the user.
 */
struct perf_addr_filter {
        struct list_head        entry;
        struct path                path;
        unsigned long                offset;
        unsigned long                size;
        enum perf_addr_filter_action_t        action;
};

/**
 * struct perf_addr_filters_head - container for address range filters
 * @list:        list of filters for this event
 * @lock:        spinlock that serializes accesses to the @list and event's
 *                (and its children's) filter generations.
 * @nr_file_filters:        number of file-based filters
 *
 * A child event will use parent's @list (and therefore @lock), so they are
 * bundled together; see perf_event_addr_filters().
 */
struct perf_addr_filters_head {
        struct list_head        list;
        raw_spinlock_t                lock;
        unsigned int                nr_file_filters;
};

struct perf_addr_filter_range {
        unsigned long                start;
        unsigned long                size;
};

/**
 * enum perf_event_state - the states of an event:
 */
enum perf_event_state {
        PERF_EVENT_STATE_DEAD                = -4,
        PERF_EVENT_STATE_EXIT                = -3,
        PERF_EVENT_STATE_ERROR                = -2,
        PERF_EVENT_STATE_OFF                = -1,
        PERF_EVENT_STATE_INACTIVE        =  0,
        PERF_EVENT_STATE_ACTIVE                =  1,
};

struct file;
struct perf_sample_data;

typedef void (*perf_overflow_handler_t)(struct perf_event *,
                                        struct perf_sample_data *,
                                        struct pt_regs *regs);

/*
 * Event capabilities. For event_caps and groups caps.
 *
 * PERF_EV_CAP_SOFTWARE: Is a software event.
 * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
 * from any CPU in the package where it is active.
 * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
 * cannot be a group leader. If an event with this flag is detached from the
 * group it is scheduled out and moved into an unrecoverable ERROR state.
 */
#define PERF_EV_CAP_SOFTWARE                BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG        BIT(1)
#define PERF_EV_CAP_SIBLING                BIT(2)

#define SWEVENT_HLIST_BITS                8
#define SWEVENT_HLIST_SIZE                (1 << SWEVENT_HLIST_BITS)

struct swevent_hlist {
        struct hlist_head                heads[SWEVENT_HLIST_SIZE];
        struct rcu_head                        rcu_head;
};

#define PERF_ATTACH_CONTEXT        0x01
#define PERF_ATTACH_GROUP        0x02
#define PERF_ATTACH_TASK        0x04
#define PERF_ATTACH_TASK_DATA        0x08
#define PERF_ATTACH_ITRACE        0x10
#define PERF_ATTACH_SCHED_CB        0x20
#define PERF_ATTACH_CHILD        0x40

struct bpf_prog;
struct perf_cgroup;
struct perf_buffer;

struct pmu_event_list {
        raw_spinlock_t                lock;
        struct list_head        list;
};

/*
 * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
 * as such iteration must hold either lock. However, since ctx->lock is an IRQ
 * safe lock, and is only held by the CPU doing the modification, having IRQs
 * disabled is sufficient since it will hold-off the IPIs.
 */
#ifdef CONFIG_PROVE_LOCKING
#define lockdep_assert_event_ctx(event)                                \
        WARN_ON_ONCE(__lockdep_enabled &&                        \
                     (this_cpu_read(hardirqs_enabled) &&        \
                      lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
#else
#define lockdep_assert_event_ctx(event)
#endif

#define for_each_sibling_event(sibling, event)                        \
        lockdep_assert_event_ctx(event);                        \
        if ((event)->group_leader == (event))                        \
                list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)

/**
 * struct perf_event - performance event kernel representation:
 */
struct perf_event {
#ifdef CONFIG_PERF_EVENTS
        /*
         * entry onto perf_event_context::event_list;
         *   modifications require ctx->lock
         *   RCU safe iterations.
         */
        struct list_head                event_entry;

        /*
         * Locked for modification by both ctx->mutex and ctx->lock; holding
         * either sufficies for read.
         */
        struct list_head                sibling_list;
        struct list_head                active_list;
        /*
         * Node on the pinned or flexible tree located at the event context;
         */
        struct rb_node                        group_node;
        u64                                group_index;
        /*
         * We need storage to track the entries in perf_pmu_migrate_context; we
         * cannot use the event_entry because of RCU and we want to keep the
         * group in tact which avoids us using the other two entries.
         */
        struct list_head                migrate_entry;

        struct hlist_node                hlist_entry;
        struct list_head                active_entry;
        int                                nr_siblings;

        /* Not serialized. Only written during event initialization. */
        int                                event_caps;
        /* The cumulative AND of all event_caps for events in this group. */
        int                                group_caps;

        unsigned int                        group_generation;
        struct perf_event                *group_leader;
        /*
         * event->pmu will always point to pmu in which this event belongs.
         * Whereas event->pmu_ctx->pmu may point to other pmu when group of
         * different pmu events is created.
         */
        struct pmu                        *pmu;
        void                                *pmu_private;

        enum perf_event_state                state;
        unsigned int                        attach_state;
        local64_t                        count;
        atomic64_t                        child_count;

        /*
         * These are the total time in nanoseconds that the event
         * has been enabled (i.e. eligible to run, and the task has
         * been scheduled in, if this is a per-task event)
         * and running (scheduled onto the CPU), respectively.
         */
        u64                                total_time_enabled;
        u64                                total_time_running;
        u64                                tstamp;

        struct perf_event_attr                attr;
        u16                                header_size;
        u16                                id_header_size;
        u16                                read_size;
        struct hw_perf_event                hw;

        struct perf_event_context        *ctx;
        /*
         * event->pmu_ctx points to perf_event_pmu_context in which the event
         * is added. This pmu_ctx can be of other pmu for sw event when that
         * sw event is part of a group which also contains non-sw events.
         */
        struct perf_event_pmu_context        *pmu_ctx;
        atomic_long_t                        refcount;

        /*
         * These accumulate total time (in nanoseconds) that children
         * events have been enabled and running, respectively.
         */
        atomic64_t                        child_total_time_enabled;
        atomic64_t                        child_total_time_running;

        /*
         * Protect attach/detach and child_list:
         */
        struct mutex                        child_mutex;
        struct list_head                child_list;
        struct perf_event                *parent;

        int                                oncpu;
        int                                cpu;

        struct list_head                owner_entry;
        struct task_struct                *owner;

        /* mmap bits */
        struct mutex                        mmap_mutex;
        atomic_t                        mmap_count;

        struct perf_buffer                *rb;
        struct list_head                rb_entry;
        unsigned long                        rcu_batches;
        int                                rcu_pending;

        /* poll related */
        wait_queue_head_t                waitq;
        struct fasync_struct                *fasync;

        /* delayed work for NMIs and such */
        unsigned int                        pending_wakeup;
        unsigned int                        pending_kill;
        unsigned int                        pending_disable;
        unsigned int                        pending_sigtrap;
        unsigned long                        pending_addr;        /* SIGTRAP */
        struct irq_work                        pending_irq;
        struct callback_head                pending_task;
        unsigned int                        pending_work;

        atomic_t                        event_limit;

        /* address range filters */
        struct perf_addr_filters_head        addr_filters;
        /* vma address array for file-based filders */
        struct perf_addr_filter_range        *addr_filter_ranges;
        unsigned long                        addr_filters_gen;

        /* for aux_output events */
        struct perf_event                *aux_event;

        void (*destroy)(struct perf_event *);
        struct rcu_head                        rcu_head;

        struct pid_namespace                *ns;
        u64                                id;

        atomic64_t                        lost_samples;

        u64                                (*clock)(void);
        perf_overflow_handler_t                overflow_handler;
        void                                *overflow_handler_context;
#ifdef CONFIG_BPF_SYSCALL
        perf_overflow_handler_t                orig_overflow_handler;
        struct bpf_prog                        *prog;
        u64                                bpf_cookie;
#endif

#ifdef CONFIG_EVENT_TRACING
        struct trace_event_call                *tp_event;
        struct event_filter                *filter;
#ifdef CONFIG_FUNCTION_TRACER
        struct ftrace_ops               ftrace_ops;
#endif
#endif

#ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup                *cgrp; /* cgroup event is attach to */
#endif

#ifdef CONFIG_SECURITY
        void *security;
#endif
        struct list_head                sb_list;

        /*
         * Certain events gets forwarded to another pmu internally by over-
         * writing kernel copy of event->attr.type without user being aware
         * of it. event->orig_type contains original 'type' requested by
         * user.
         */
        __u32                                orig_type;
#endif /* CONFIG_PERF_EVENTS */
};

/*
 *           ,-----------------------[1:n]------------------------.
 *           V                                                    V
 * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
 *                                        |                       |
 *                                        `--[n:1]-> pmu <-[1:n]--'
 *
 *
 * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
 * (similar to perf_event_context). Locking is as if it were a member of
 * perf_event_context; specifically:
 *
 *   modification, both: ctx->mutex && ctx->lock
 *   reading, either:    ctx->mutex || ctx->lock
 *
 * There is one exception to this; namely put_pmu_ctx() isn't always called
 * with ctx->mutex held; this means that as long as we can guarantee the epc
 * has events the above rules hold.
 *
 * Specificially, sys_perf_event_open()'s group_leader case depends on
 * ctx->mutex pinning the configuration. Since we hold a reference on
 * group_leader (through the filedesc) it can't go away, therefore it's
 * associated pmu_ctx must exist and cannot change due to ctx->mutex.
 *
 * perf_event holds a refcount on perf_event_context
 * perf_event holds a refcount on perf_event_pmu_context
 */
struct perf_event_pmu_context {
        struct pmu                        *pmu;
        struct perf_event_context       *ctx;

        struct list_head                pmu_ctx_entry;

        struct list_head                pinned_active;
        struct list_head                flexible_active;

        /* Used to avoid freeing per-cpu perf_event_pmu_context */
        unsigned int                        embedded : 1;

        unsigned int                        nr_events;
        unsigned int                        nr_cgroups;

        atomic_t                        refcount; /* event <-> epc */
        struct rcu_head                        rcu_head;

        void                                *task_ctx_data; /* pmu specific data */
        /*
         * Set when one or more (plausibly active) event can't be scheduled
         * due to pmu overcommit or pmu constraints, except tolerant to
         * events not necessary to be active due to scheduling constraints,
         * such as cgroups.
         */
        int                                rotate_necessary;
};

struct perf_event_groups {
        struct rb_root        tree;
        u64                index;
};


/**
 * struct perf_event_context - event context structure
 *
 * Used as a container for task events and CPU events as well:
 */
struct perf_event_context {
        /*
         * Protect the states of the events in the list,
         * nr_active, and the list:
         */
        raw_spinlock_t                        lock;
        /*
         * Protect the list of events.  Locking either mutex or lock
         * is sufficient to ensure the list doesn't change; to change
         * the list you need to lock both the mutex and the spinlock.
         */
        struct mutex                        mutex;

        struct list_head                pmu_ctx_list;
        struct perf_event_groups        pinned_groups;
        struct perf_event_groups        flexible_groups;
        struct list_head                event_list;

        int                                nr_events;
        int                                nr_user;
        int                                is_active;

        int                                nr_task_data;
        int                                nr_stat;
        int                                nr_freq;
        int                                rotate_disable;

        refcount_t                        refcount; /* event <-> ctx */
        struct task_struct                *task;

        /*
         * Context clock, runs when context enabled.
         */
        u64                                time;
        u64                                timestamp;
        u64                                timeoffset;

        /*
         * These fields let us detect when two contexts have both
         * been cloned (inherited) from a common ancestor.
         */
        struct perf_event_context        *parent_ctx;
        u64                                parent_gen;
        u64                                generation;
        int                                pin_count;
#ifdef CONFIG_CGROUP_PERF
        int                                nr_cgroups;         /* cgroup evts */
#endif
        struct rcu_head                        rcu_head;

        /*
         * Sum (event->pending_sigtrap + event->pending_work)
         *
         * The SIGTRAP is targeted at ctx->task, as such it won't do changing
         * that until the signal is delivered.
         */
        local_t                                nr_pending;
};

/*
 * Number of contexts where an event can trigger:
 *        task, softirq, hardirq, nmi.
 */
#define PERF_NR_CONTEXTS        4

struct perf_cpu_pmu_context {
        struct perf_event_pmu_context        epc;
        struct perf_event_pmu_context        *task_epc;

        struct list_head                sched_cb_entry;
        int                                sched_cb_usage;

        int                                active_oncpu;
        int                                exclusive;

        raw_spinlock_t                        hrtimer_lock;
        struct hrtimer                        hrtimer;
        ktime_t                                hrtimer_interval;
        unsigned int                        hrtimer_active;
};

/**
 * struct perf_event_cpu_context - per cpu event context structure
 */
struct perf_cpu_context {
        struct perf_event_context        ctx;
        struct perf_event_context        *task_ctx;
        int                                online;

#ifdef CONFIG_CGROUP_PERF
        struct perf_cgroup                *cgrp;
#endif

        /*
         * Per-CPU storage for iterators used in visit_groups_merge. The default
         * storage is of size 2 to hold the CPU and any CPU event iterators.
         */
        int                                heap_size;
        struct perf_event                **heap;
        struct perf_event                *heap_default[2];
};

struct perf_output_handle {
        struct perf_event                *event;
        struct perf_buffer                *rb;
        unsigned long                        wakeup;
        unsigned long                        size;
        u64                                aux_flags;
        union {
                void                        *addr;
                unsigned long                head;
        };
        int                                page;
};

struct bpf_perf_event_data_kern {
        bpf_user_pt_regs_t *regs;
        struct perf_sample_data *data;
        struct perf_event *event;
};

#ifdef CONFIG_CGROUP_PERF

/*
 * perf_cgroup_info keeps track of time_enabled for a cgroup.
 * This is a per-cpu dynamically allocated data structure.
 */
struct perf_cgroup_info {
        u64                                time;
        u64                                timestamp;
        u64                                timeoffset;
        int                                active;
};

struct perf_cgroup {
        struct cgroup_subsys_state        css;
        struct perf_cgroup_info        __percpu *info;
};

/*
 * Must ensure cgroup is pinned (css_get) before calling
 * this function. In other words, we cannot call this function
 * if there is no cgroup event for the current CPU context.
 */
static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
{
        return container_of(task_css_check(task, perf_event_cgrp_id,
                                           ctx ? lockdep_is_held(&ctx->lock)
                                               : true),
                            struct perf_cgroup, css);
}
#endif /* CONFIG_CGROUP_PERF */

#ifdef CONFIG_PERF_EVENTS

extern struct perf_event_context *perf_cpu_task_ctx(void);

extern void *perf_aux_output_begin(struct perf_output_handle *handle,
                                   struct perf_event *event);
extern void perf_aux_output_end(struct perf_output_handle *handle,
                                unsigned long size);
extern int perf_aux_output_skip(struct perf_output_handle *handle,
                                unsigned long size);
extern void *perf_get_aux(struct perf_output_handle *handle);
extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
extern void perf_event_itrace_started(struct perf_event *event);

extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);

extern void __perf_event_task_sched_in(struct task_struct *prev,
                                       struct task_struct *task);
extern void __perf_event_task_sched_out(struct task_struct *prev,
                                        struct task_struct *next);
extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd);
extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
extern void perf_pmu_enable(struct pmu *pmu);
extern void perf_sched_cb_dec(struct pmu *pmu);
extern void perf_sched_cb_inc(struct pmu *pmu);
extern int perf_event_task_disable(void);
extern int perf_event_task_enable(void);

extern void perf_pmu_resched(struct pmu *pmu);

extern int perf_event_refresh(struct perf_event *event, int refresh);
extern void perf_event_update_userpage(struct perf_event *event);
extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
                                int cpu,
                                struct task_struct *task,
                                perf_overflow_handler_t callback,
                                void *context);
extern void perf_pmu_migrate_context(struct pmu *pmu,
                                int src_cpu, int dst_cpu);
int perf_event_read_local(struct perf_event *event, u64 *value,
                          u64 *enabled, u64 *running);
extern u64 perf_event_read_value(struct perf_event *event,
                                 u64 *enabled, u64 *running);

extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);

static inline bool branch_sample_no_flags(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
}

static inline bool branch_sample_no_cycles(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
}

static inline bool branch_sample_type(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
}

static inline bool branch_sample_hw_index(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}

static inline bool branch_sample_priv(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}

static inline bool branch_sample_counters(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
}

static inline bool branch_sample_call_stack(const struct perf_event *event)
{
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}

struct perf_sample_data {
        /*
         * Fields set by perf_sample_data_init() unconditionally,
         * group so as to minimize the cachelines touched.
         */
        u64                                sample_flags;
        u64                                period;
        u64                                dyn_size;

        /*
         * Fields commonly set by __perf_event_header__init_id(),
         * group so as to minimize the cachelines touched.
         */
        u64                                type;
        struct {
                u32        pid;
                u32        tid;
        }                                tid_entry;
        u64                                time;
        u64                                id;
        struct {
                u32        cpu;
                u32        reserved;
        }                                cpu_entry;

        /*
         * The other fields, optionally {set,used} by
         * perf_{prepare,output}_sample().
         */
        u64                                ip;
        struct perf_callchain_entry        *callchain;
        struct perf_raw_record                *raw;
        struct perf_branch_stack        *br_stack;
        u64                                *br_stack_cntr;
        union perf_sample_weight        weight;
        union  perf_mem_data_src        data_src;
        u64                                txn;

        struct perf_regs                regs_user;
        struct perf_regs                regs_intr;
        u64                                stack_user_size;

        u64                                stream_id;
        u64                                cgroup;
        u64                                addr;
        u64                                phys_addr;
        u64                                data_page_size;
        u64                                code_page_size;
        u64                                aux_size;
} ____cacheline_aligned;

/* default value for data source */
#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
                    PERF_MEM_S(LVL, NA)   |\
                    PERF_MEM_S(SNOOP, NA) |\
                    PERF_MEM_S(LOCK, NA)  |\
                    PERF_MEM_S(TLB, NA)   |\
                    PERF_MEM_S(LVLNUM, NA))

static inline void perf_sample_data_init(struct perf_sample_data *data,
                                         u64 addr, u64 period)
{
        /* remaining struct members initialized in perf_prepare_sample() */
        data->sample_flags = PERF_SAMPLE_PERIOD;
        data->period = period;
        data->dyn_size = 0;

        if (addr) {
                data->addr = addr;
                data->sample_flags |= PERF_SAMPLE_ADDR;
        }
}

static inline void perf_sample_save_callchain(struct perf_sample_data *data,
                                              struct perf_event *event,
                                              struct pt_regs *regs)
{
        int size = 1;

        data->callchain = perf_callchain(event, regs);
        size += data->callchain->nr;

        data->dyn_size += size * sizeof(u64);
        data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}

static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
                                             struct perf_raw_record *raw)
{
        struct perf_raw_frag *frag = &raw->frag;
        u32 sum = 0;
        int size;

        do {
                sum += frag->size;
                if (perf_raw_frag_last(frag))
                        break;
                frag = frag->next;
        } while (1);

        size = round_up(sum + sizeof(u32), sizeof(u64));
        raw->size = size - sizeof(u32);
        frag->pad = raw->size - sum;

        data->raw = raw;
        data->dyn_size += size;
        data->sample_flags |= PERF_SAMPLE_RAW;
}

static inline void perf_sample_save_brstack(struct perf_sample_data *data,
                                            struct perf_event *event,
                                            struct perf_branch_stack *brs,
                                            u64 *brs_cntr)
{
        int size = sizeof(u64); /* nr */

        if (branch_sample_hw_index(event))
                size += sizeof(u64);
        size += brs->nr * sizeof(struct perf_branch_entry);

        /*
         * The extension space for counters is appended after the
         * struct perf_branch_stack. It is used to store the occurrences
         * of events of each branch.
         */
        if (brs_cntr)
                size += brs->nr * sizeof(u64);

        data->br_stack = brs;
        data->br_stack_cntr = brs_cntr;
        data->dyn_size += size;
        data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}

static inline u32 perf_sample_data_size(struct perf_sample_data *data,
                                        struct perf_event *event)
{
        u32 size = sizeof(struct perf_event_header);

        size += event->header_size + event->id_header_size;
        size += data->dyn_size;

        return size;
}

/*
 * Clear all bitfields in the perf_branch_entry.
 * The to and from fields are not cleared because they are
 * systematically modified by caller.
 */
static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
{
        br->mispred = 0;
        br->predicted = 0;
        br->in_tx = 0;
        br->abort = 0;
        br->cycles = 0;
        br->type = 0;
        br->spec = PERF_BR_SPEC_NA;
        br->reserved = 0;
}

extern void perf_output_sample(struct perf_output_handle *handle,
                               struct perf_event_header *header,
                               struct perf_sample_data *data,
                               struct perf_event *event);
extern void perf_prepare_sample(struct perf_sample_data *data,
                                struct perf_event *event,
                                struct pt_regs *regs);
extern void perf_prepare_header(struct perf_event_header *header,
                                struct perf_sample_data *data,
                                struct perf_event *event,
                                struct pt_regs *regs);

extern int perf_event_overflow(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);

extern void perf_event_output_forward(struct perf_event *event,
                                     struct perf_sample_data *data,
                                     struct pt_regs *regs);
extern void perf_event_output_backward(struct perf_event *event,
                                       struct perf_sample_data *data,
                                       struct pt_regs *regs);
extern int perf_event_output(struct perf_event *event,
                             struct perf_sample_data *data,
                             struct pt_regs *regs);

static inline bool
__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
{
        if (likely(overflow_handler == perf_event_output_forward))
                return true;
        if (unlikely(overflow_handler == perf_event_output_backward))
                return true;
        return false;
}

#define is_default_overflow_handler(event) \
        __is_default_overflow_handler((event)->overflow_handler)

#ifdef CONFIG_BPF_SYSCALL
static inline bool uses_default_overflow_handler(struct perf_event *event)
{
        if (likely(is_default_overflow_handler(event)))
                return true;

        return __is_default_overflow_handler(event->orig_overflow_handler);
}
#else
#define uses_default_overflow_handler(event) \
        is_default_overflow_handler(event)
#endif

extern void
perf_event_header__init_id(struct perf_event_header *header,
                           struct perf_sample_data *data,
                           struct perf_event *event);
extern void
perf_event__output_id_sample(struct perf_event *event,
                             struct perf_output_handle *handle,
                             struct perf_sample_data *sample);

extern void
perf_log_lost_samples(struct perf_event *event, u64 lost);

static inline bool event_has_any_exclude_flag(struct perf_event *event)
{
        struct perf_event_attr *attr = &event->attr;

        return attr->exclude_idle || attr->exclude_user ||
               attr->exclude_kernel || attr->exclude_hv ||
               attr->exclude_guest || attr->exclude_host;
}

static inline bool is_sampling_event(struct perf_event *event)
{
        return event->attr.sample_period != 0;
}

/*
 * Return 1 for a software event, 0 for a hardware event
 */
static inline int is_software_event(struct perf_event *event)
{
        return event->event_caps & PERF_EV_CAP_SOFTWARE;
}

/*
 * Return 1 for event in sw context, 0 for event in hw context
 */
static inline int in_software_context(struct perf_event *event)
{
        return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
}

static inline int is_exclusive_pmu(struct pmu *pmu)
{
        return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
}

extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];

extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);

#ifndef perf_arch_fetch_caller_regs
static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
#endif

/*
 * When generating a perf sample in-line, instead of from an interrupt /
 * exception, we lack a pt_regs. This is typically used from software events
 * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
 *
 * We typically don't need a full set, but (for x86) do require:
 * - ip for PERF_SAMPLE_IP
 * - cs for user_mode() tests
 * - sp for PERF_SAMPLE_CALLCHAIN
 * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
 *
 * NOTE: assumes @regs is otherwise already 0 filled; this is important for
 * things like PERF_SAMPLE_REGS_INTR.
 */
static inline void perf_fetch_caller_regs(struct pt_regs *regs)
{
        perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
}

static __always_inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
{
        if (static_key_false(&perf_swevent_enabled[event_id]))
                __perf_sw_event(event_id, nr, regs, addr);
}

DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);

/*
 * 'Special' version for the scheduler, it hard assumes no recursion,
 * which is guaranteed by us not actually scheduling inside other swevents
 * because those disable preemption.
 */
static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
{
        struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);

        perf_fetch_caller_regs(regs);
        ___perf_sw_event(event_id, nr, regs, addr);
}

extern struct static_key_false perf_sched_events;

static __always_inline bool __perf_sw_enabled(int swevt)
{
        return static_key_false(&perf_swevent_enabled[swevt]);
}

static inline void perf_event_task_migrate(struct task_struct *task)
{
        if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
                task->sched_migrated = 1;
}

static inline void perf_event_task_sched_in(struct task_struct *prev,
                                            struct task_struct *task)
{
        if (static_branch_unlikely(&perf_sched_events))
                __perf_event_task_sched_in(prev, task);

        if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
            task->sched_migrated) {
                __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
                task->sched_migrated = 0;
        }
}

static inline void perf_event_task_sched_out(struct task_struct *prev,
                                             struct task_struct *next)
{
        if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
                __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);

#ifdef CONFIG_CGROUP_PERF
        if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
            perf_cgroup_from_task(prev, NULL) !=
            perf_cgroup_from_task(next, NULL))
                __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
#endif

        if (static_branch_unlikely(&perf_sched_events))
                __perf_event_task_sched_out(prev, next);
}

extern void perf_event_mmap(struct vm_area_struct *vma);

extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
                               bool unregister, const char *sym);
extern void perf_event_bpf_event(struct bpf_prog *prog,
                                 enum perf_bpf_event_type type,
                                 u16 flags);

#ifdef CONFIG_GUEST_PERF_EVENTS
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;

DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);

static inline unsigned int perf_guest_state(void)
{
        return static_call(__perf_guest_state)();
}
static inline unsigned long perf_guest_get_ip(void)
{
        return static_call(__perf_guest_get_ip)();
}
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
{
        return static_call(__perf_guest_handle_intel_pt_intr)();
}
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
#else
static inline unsigned int perf_guest_state(void)                 { return 0; }
static inline unsigned long perf_guest_get_ip(void)                 { return 0; }
static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
#endif /* CONFIG_GUEST_PERF_EVENTS */

extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
extern void perf_event_text_poke(const void *addr,
                                 const void *old_bytes, size_t old_len,
                                 const void *new_bytes, size_t new_len);

/* Callchains */
DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);

extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
                   u32 max_stack, bool crosstask, bool add_mark);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
extern void put_callchain_entry(int rctx);

extern int sysctl_perf_event_max_stack;
extern int sysctl_perf_event_max_contexts_per_stack;

static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
{
        if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
                struct perf_callchain_entry *entry = ctx->entry;
                entry->ip[entry->nr++] = ip;
                ++ctx->contexts;
                return 0;
        } else {
                ctx->contexts_maxed = true;
                return -1; /* no more room, stop walking the stack */
        }
}

static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
{
        if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
                struct perf_callchain_entry *entry = ctx->entry;
                entry->ip[entry->nr++] = ip;
                ++ctx->nr;
                return 0;
        } else {
                return -1; /* no more room, stop walking the stack */
        }
}

extern int sysctl_perf_event_paranoid;
extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
extern int sysctl_perf_cpu_time_max_percent;

extern void perf_sample_event_took(u64 sample_len_ns);

int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos);
int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos);
int perf_event_max_stack_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos);

/* Access to perf_event_open(2) syscall. */
#define PERF_SECURITY_OPEN                0

/* Finer grained perf_event_open(2) access control. */
#define PERF_SECURITY_CPU                1
#define PERF_SECURITY_KERNEL                2
#define PERF_SECURITY_TRACEPOINT        3

static inline int perf_is_paranoid(void)
{
        return sysctl_perf_event_paranoid > -1;
}

static inline int perf_allow_kernel(struct perf_event_attr *attr)
{
        if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
                return -EACCES;

        return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
}

static inline int perf_allow_cpu(struct perf_event_attr *attr)
{
        if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
                return -EACCES;

        return security_perf_event_open(attr, PERF_SECURITY_CPU);
}

static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
{
        if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
                return -EPERM;

        return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
}

extern void perf_event_init(void);
extern void perf_tp_event(u16 event_type, u64 count, void *record,
                          int entry_size, struct pt_regs *regs,
                          struct hlist_head *head, int rctx,
                          struct task_struct *task);
extern void perf_bp_event(struct perf_event *event, void *data);

#ifndef perf_misc_flags
# define perf_misc_flags(regs) \
                (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
# define perf_instruction_pointer(regs)        instruction_pointer(regs)
#endif
#ifndef perf_arch_bpf_user_pt_regs
# define perf_arch_bpf_user_pt_regs(regs) regs
#endif

static inline bool has_branch_stack(struct perf_event *event)
{
        return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
}

static inline bool needs_branch_stack(struct perf_event *event)
{
        return event->attr.branch_sample_type != 0;
}

static inline bool has_aux(struct perf_event *event)
{
        return event->pmu->setup_aux;
}

static inline bool is_write_backward(struct perf_event *event)
{
        return !!event->attr.write_backward;
}

static inline bool has_addr_filter(struct perf_event *event)
{
        return event->pmu->nr_addr_filters;
}

/*
 * An inherited event uses parent's filters
 */
static inline struct perf_addr_filters_head *
perf_event_addr_filters(struct perf_event *event)
{
        struct perf_addr_filters_head *ifh = &event->addr_filters;

        if (event->parent)
                ifh = &event->parent->addr_filters;

        return ifh;
}

extern void perf_event_addr_filters_sync(struct perf_event *event);
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);

extern int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_sample_data *data,
                             struct perf_event *event, unsigned int size);
extern int perf_output_begin_forward(struct perf_output_handle *handle,
                                     struct perf_sample_data *data,
                                     struct perf_event *event,
                                     unsigned int size);
extern int perf_output_begin_backward(struct perf_output_handle *handle,
                                      struct perf_sample_data *data,
                                      struct perf_event *event,
                                      unsigned int size);

extern void perf_output_end(struct perf_output_handle *handle);
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
                             const void *buf, unsigned int len);
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
                                     unsigned int len);
extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
                                 struct perf_output_handle *handle,
                                 unsigned long from, unsigned long to);
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
extern u64 perf_swevent_set_period(struct perf_event *event);
extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event)                                { return NULL; }
static inline void
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
                                                                        { }
static inline int
perf_aux_output_skip(struct perf_output_handle *handle,
                     unsigned long size)                                { return -EINVAL; }
static inline void *
perf_get_aux(struct perf_output_handle *handle)                                { return NULL; }
static inline void
perf_event_task_migrate(struct task_struct *task)                        { }
static inline void
perf_event_task_sched_in(struct task_struct *prev,
                         struct task_struct *task)                        { }
static inline void
perf_event_task_sched_out(struct task_struct *prev,
                          struct task_struct *next)                        { }
static inline int perf_event_init_task(struct task_struct *child,
                                       u64 clone_flags)                        { return 0; }
static inline void perf_event_exit_task(struct task_struct *child)        { }
static inline void perf_event_free_task(struct task_struct *task)        { }
static inline void perf_event_delayed_put(struct task_struct *task)        { }
static inline struct file *perf_event_get(unsigned int fd)        { return ERR_PTR(-EINVAL); }
static inline const struct perf_event *perf_get_event(struct file *file)
{
        return ERR_PTR(-EINVAL);
}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
        return ERR_PTR(-EINVAL);
}
static inline int perf_event_read_local(struct perf_event *event, u64 *value,
                                        u64 *enabled, u64 *running)
{
        return -EINVAL;
}
static inline void perf_event_print_debug(void)                                { }
static inline int perf_event_task_disable(void)                                { return -EINVAL; }
static inline int perf_event_task_enable(void)                                { return -EINVAL; }
static inline int perf_event_refresh(struct perf_event *event, int refresh)
{
        return -EINVAL;
}

static inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)        { }
static inline void
perf_bp_event(struct perf_event *event, void *data)                        { }

static inline void perf_event_mmap(struct vm_area_struct *vma)                { }

typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
                                      bool unregister, const char *sym)        { }
static inline void perf_event_bpf_event(struct bpf_prog *prog,
                                        enum perf_bpf_event_type type,
                                        u16 flags)                        { }
static inline void perf_event_exec(void)                                { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec)        { }
static inline void perf_event_namespaces(struct task_struct *tsk)        { }
static inline void perf_event_fork(struct task_struct *tsk)                { }
static inline void perf_event_text_poke(const void *addr,
                                        const void *old_bytes,
                                        size_t old_len,
                                        const void *new_bytes,
                                        size_t new_len)                        { }
static inline void perf_event_init(void)                                { }
static inline int  perf_swevent_get_recursion_context(void)                { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx)                { }
static inline u64 perf_swevent_set_period(struct perf_event *event)        { return 0; }
static inline void perf_event_enable(struct perf_event *event)                { }
static inline void perf_event_disable(struct perf_event *event)                { }
static inline int __perf_event_disable(void *info)                        { return -1; }
static inline void perf_event_task_tick(void)                                { }
static inline int perf_event_release_kernel(struct perf_event *event)        { return 0; }
static inline int perf_event_period(struct perf_event *event, u64 value)
{
        return -EINVAL;
}
static inline u64 perf_event_pause(struct perf_event *event, bool reset)
{
        return 0;
}
#endif

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
#else
static inline void perf_restore_debug_store(void)                        { }
#endif

#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))

struct perf_pmu_events_attr {
        struct device_attribute attr;
        u64 id;
        const char *event_str;
};

struct perf_pmu_events_ht_attr {
        struct device_attribute                        attr;
        u64                                        id;
        const char                                *event_str_ht;
        const char                                *event_str_noht;
};

struct perf_pmu_events_hybrid_attr {
        struct device_attribute                        attr;
        u64                                        id;
        const char                                *event_str;
        u64                                        pmu_type;
};

struct perf_pmu_format_hybrid_attr {
        struct device_attribute                        attr;
        u64                                        pmu_type;
};

ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
                              char *page);

#define PMU_EVENT_ATTR(_name, _var, _id, _show)                                \
static struct perf_pmu_events_attr _var = {                                \
        .attr = __ATTR(_name, 0444, _show, NULL),                        \
        .id   =  _id,                                                        \
};

#define PMU_EVENT_ATTR_STRING(_name, _var, _str)                            \
static struct perf_pmu_events_attr _var = {                                    \
        .attr                = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
        .id                = 0,                                                    \
        .event_str        = _str,                                                    \
};

#define PMU_EVENT_ATTR_ID(_name, _show, _id)                                \
        (&((struct perf_pmu_events_attr[]) {                                \
                { .attr = __ATTR(_name, 0444, _show, NULL),                \
                  .id = _id, }                                                \
        })[0].attr.attr)

#define PMU_FORMAT_ATTR_SHOW(_name, _format)                                \
static ssize_t                                                                \
_name##_show(struct device *dev,                                        \
                               struct device_attribute *attr,                \
                               char *page)                                \
{                                                                        \
        BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                        \
        return sprintf(page, _format "\n");                                \
}                                                                        \

#define PMU_FORMAT_ATTR(_name, _format)                                        \
        PMU_FORMAT_ATTR_SHOW(_name, _format)                                \
                                                                        \
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)

/* Performance counter hotplug functions */
#ifdef CONFIG_PERF_EVENTS
int perf_event_init_cpu(unsigned int cpu);
int perf_event_exit_cpu(unsigned int cpu);
#else
#define perf_event_init_cpu        NULL
#define perf_event_exit_cpu        NULL
#endif

extern void arch_perf_update_userpage(struct perf_event *event,
                                      struct perf_event_mmap_page *userpg,
                                      u64 now);

/*
 * Snapshot branch stack on software events.
 *
 * Branch stack can be very useful in understanding software events. For
 * example, when a long function, e.g. sys_perf_event_open, returns an
 * errno, it is not obvious why the function failed. Branch stack could
 * provide very helpful information in this type of scenarios.
 *
 * On software event, it is necessary to stop the hardware branch recorder
 * fast. Otherwise, the hardware register/buffer will be flushed with
 * entries of the triggering event. Therefore, static call is used to
 * stop the hardware recorder.
 */

/*
 * cnt is the number of entries allocated for entries.
 * Return number of entries copied to .
 */
typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
                                           unsigned int cnt);
DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);

#ifndef PERF_NEEDS_LOPWR_CB
static inline void perf_lopwr_cb(bool mode)
{
}
#endif

#endif /* _LINUX_PERF_EVENT_H */







































































































    2 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Copyright (C) 2011 Instituto Nokia de Tecnologia
 *
 * Authors:
 *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
 *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
 */

#ifndef __LOCAL_NFC_H
#define __LOCAL_NFC_H

#include <net/nfc/nfc.h>
#include <net/sock.h>

#define NFC_TARGET_MODE_IDLE 0
#define NFC_TARGET_MODE_SLEEP 1

struct nfc_protocol {
        int id;
        struct proto *proto;
        struct module *owner;
        int (*create)(struct net *net, struct socket *sock,
                      const struct nfc_protocol *nfc_proto, int kern);
};

struct nfc_rawsock {
        struct sock sk;
        struct nfc_dev *dev;
        u32 target_idx;
        struct work_struct tx_work;
        bool tx_work_scheduled;
};

struct nfc_sock_list {
        struct hlist_head head;
        rwlock_t          lock;
};

#define nfc_rawsock(sk) ((struct nfc_rawsock *) sk)
#define to_rawsock_sk(_tx_work) \
        ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))

struct nfc_llcp_sdp_tlv;

void nfc_llcp_mac_is_down(struct nfc_dev *dev);
void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
                        u8 comm_mode, u8 rf_mode);
int nfc_llcp_register_device(struct nfc_dev *dev);
void nfc_llcp_unregister_device(struct nfc_dev *dev);
int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
int nfc_llcp_local_put(struct nfc_llcp_local *local);
int __init nfc_llcp_init(void);
void nfc_llcp_exit(void);
void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);

int __init rawsock_init(void);
void rawsock_exit(void);

int __init af_nfc_init(void);
void af_nfc_exit(void);
int nfc_proto_register(const struct nfc_protocol *nfc_proto);
void nfc_proto_unregister(const struct nfc_protocol *nfc_proto);

extern int nfc_devlist_generation;
extern struct mutex nfc_devlist_mutex;

int __init nfc_genl_init(void);
void nfc_genl_exit(void);

void nfc_genl_data_init(struct nfc_genl_data *genl_data);
void nfc_genl_data_exit(struct nfc_genl_data *genl_data);

int nfc_genl_targets_found(struct nfc_dev *dev);
int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx);

int nfc_genl_device_added(struct nfc_dev *dev);
int nfc_genl_device_removed(struct nfc_dev *dev);

int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
                               u8 comm_mode, u8 rf_mode);
int nfc_genl_dep_link_down_event(struct nfc_dev *dev);

int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
int nfc_genl_tm_deactivated(struct nfc_dev *dev);

int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);

int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
                            struct nfc_evt_transaction *evt_transaction);
int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx);

struct nfc_dev *nfc_get_device(unsigned int idx);

static inline void nfc_put_device(struct nfc_dev *dev)
{
        put_device(&dev->dev);
}

static inline void nfc_device_iter_init(struct class_dev_iter *iter)
{
        class_dev_iter_init(iter, &nfc_class, NULL, NULL);
}

static inline struct nfc_dev *nfc_device_iter_next(struct class_dev_iter *iter)
{
        struct device *d = class_dev_iter_next(iter);
        if (!d)
                return NULL;

        return to_nfc_dev(d);
}

static inline void nfc_device_iter_exit(struct class_dev_iter *iter)
{
        class_dev_iter_exit(iter);
}

int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
                              u32 result);

int nfc_dev_up(struct nfc_dev *dev);

int nfc_dev_down(struct nfc_dev *dev);

int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols);

int nfc_stop_poll(struct nfc_dev *dev);

int nfc_dep_link_up(struct nfc_dev *dev, int target_idx, u8 comm_mode);

int nfc_dep_link_down(struct nfc_dev *dev);

int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);

int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode);

int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
                      data_exchange_cb_t cb, void *cb_context);

int nfc_enable_se(struct nfc_dev *dev, u32 se_idx);
int nfc_disable_se(struct nfc_dev *dev, u32 se_idx);

#endif /* __LOCAL_NFC_H */
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   62 


   63 




   64 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/blkdev.h>
#include <linux/wait.h>
#include <linux/rbtree.h>
#include <linux/kthread.h>
#include <linux/backing-dev.h>
#include <linux/blk-cgroup.h>
#include <linux/freezer.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
#include <trace/events/writeback.h>
#include "internal.h"

struct backing_dev_info noop_backing_dev_info;
EXPORT_SYMBOL_GPL(noop_backing_dev_info);

static const char *bdi_unknown_name = "(unknown)";

/*
 * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
 * reader side locking.
 */
DEFINE_SPINLOCK(bdi_lock);
static u64 bdi_id_cursor;
static struct rb_root bdi_tree = RB_ROOT;
LIST_HEAD(bdi_list);

/* bdi_wq serves all asynchronous writeback tasks */
struct workqueue_struct *bdi_wq;

#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#include <linux/seq_file.h>

static struct dentry *bdi_debug_root;

static void bdi_debug_init(void)
{
        bdi_debug_root = debugfs_create_dir("bdi", NULL);
}

static int bdi_debug_stats_show(struct seq_file *m, void *v)
{
        struct backing_dev_info *bdi = m->private;
        struct bdi_writeback *wb = &bdi->wb;
        unsigned long background_thresh;
        unsigned long dirty_thresh;
        unsigned long wb_thresh;
        unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
        struct inode *inode;

        nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
        spin_lock(&wb->list_lock);
        list_for_each_entry(inode, &wb->b_dirty, i_io_list)
                nr_dirty++;
        list_for_each_entry(inode, &wb->b_io, i_io_list)
                nr_io++;
        list_for_each_entry(inode, &wb->b_more_io, i_io_list)
                nr_more_io++;
        list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
                if (inode->i_state & I_DIRTY_TIME)
                        nr_dirty_time++;
        spin_unlock(&wb->list_lock);

        global_dirty_limits(&background_thresh, &dirty_thresh);
        wb_thresh = wb_calc_thresh(wb, dirty_thresh);

        seq_printf(m,
                   "BdiWriteback:       %10lu kB\n"
                   "BdiReclaimable:     %10lu kB\n"
                   "BdiDirtyThresh:     %10lu kB\n"
                   "DirtyThresh:        %10lu kB\n"
                   "BackgroundThresh:   %10lu kB\n"
                   "BdiDirtied:         %10lu kB\n"
                   "BdiWritten:         %10lu kB\n"
                   "BdiWriteBandwidth:  %10lu kBps\n"
                   "b_dirty:            %10lu\n"
                   "b_io:               %10lu\n"
                   "b_more_io:          %10lu\n"
                   "b_dirty_time:       %10lu\n"
                   "bdi_list:           %10u\n"
                   "state:              %10lx\n",
                   (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
                   (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
                   K(wb_thresh),
                   K(dirty_thresh),
                   K(background_thresh),
                   (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
                   (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
                   (unsigned long) K(wb->write_bandwidth),
                   nr_dirty,
                   nr_io,
                   nr_more_io,
                   nr_dirty_time,
                   !list_empty(&bdi->bdi_list), bdi->wb.state);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);

static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{
        bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);

        debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
                            &bdi_debug_stats_fops);
}

static void bdi_debug_unregister(struct backing_dev_info *bdi)
{
        debugfs_remove_recursive(bdi->debug_dir);
}
#else
static inline void bdi_debug_init(void)
{
}
static inline void bdi_debug_register(struct backing_dev_info *bdi,
                                      const char *name)
{
}
static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{
}
#endif

static ssize_t read_ahead_kb_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned long read_ahead_kb;
        ssize_t ret;

        ret = kstrtoul(buf, 10, &read_ahead_kb);
        if (ret < 0)
                return ret;

        bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10);

        return count;
}

#define BDI_SHOW(name, expr)                                                \
static ssize_t name##_show(struct device *dev,                                \
                           struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct backing_dev_info *bdi = dev_get_drvdata(dev);                \
                                                                        \
        return sysfs_emit(buf, "%lld\n", (long long)expr);                \
}                                                                        \
static DEVICE_ATTR_RW(name);

BDI_SHOW(read_ahead_kb, K(bdi->ra_pages))

static ssize_t min_ratio_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned int ratio;
        ssize_t ret;

        ret = kstrtouint(buf, 10, &ratio);
        if (ret < 0)
                return ret;

        ret = bdi_set_min_ratio(bdi, ratio);
        if (!ret)
                ret = count;

        return ret;
}
BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE)

static ssize_t min_ratio_fine_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned int ratio;
        ssize_t ret;

        ret = kstrtouint(buf, 10, &ratio);
        if (ret < 0)
                return ret;

        ret = bdi_set_min_ratio_no_scale(bdi, ratio);
        if (!ret)
                ret = count;

        return ret;
}
BDI_SHOW(min_ratio_fine, bdi->min_ratio)

static ssize_t max_ratio_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned int ratio;
        ssize_t ret;

        ret = kstrtouint(buf, 10, &ratio);
        if (ret < 0)
                return ret;

        ret = bdi_set_max_ratio(bdi, ratio);
        if (!ret)
                ret = count;

        return ret;
}
BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)

static ssize_t max_ratio_fine_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned int ratio;
        ssize_t ret;

        ret = kstrtouint(buf, 10, &ratio);
        if (ret < 0)
                return ret;

        ret = bdi_set_max_ratio_no_scale(bdi, ratio);
        if (!ret)
                ret = count;

        return ret;
}
BDI_SHOW(max_ratio_fine, bdi->max_ratio)

static ssize_t min_bytes_show(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi));
}

static ssize_t min_bytes_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        u64 bytes;
        ssize_t ret;

        ret = kstrtoull(buf, 10, &bytes);
        if (ret < 0)
                return ret;

        ret = bdi_set_min_bytes(bdi, bytes);
        if (!ret)
                ret = count;

        return ret;
}
static DEVICE_ATTR_RW(min_bytes);

static ssize_t max_bytes_show(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi));
}

static ssize_t max_bytes_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        u64 bytes;
        ssize_t ret;

        ret = kstrtoull(buf, 10, &bytes);
        if (ret < 0)
                return ret;

        ret = bdi_set_max_bytes(bdi, bytes);
        if (!ret)
                ret = count;

        return ret;
}
static DEVICE_ATTR_RW(max_bytes);

static ssize_t stable_pages_required_show(struct device *dev,
                                          struct device_attribute *attr,
                                          char *buf)
{
        dev_warn_once(dev,
                "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n");
        return sysfs_emit(buf, "%d\n", 0);
}
static DEVICE_ATTR_RO(stable_pages_required);

static ssize_t strict_limit_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);
        unsigned int strict_limit;
        ssize_t ret;

        ret = kstrtouint(buf, 10, &strict_limit);
        if (ret < 0)
                return ret;

        ret = bdi_set_strict_limit(bdi, strict_limit);
        if (!ret)
                ret = count;

        return ret;
}

static ssize_t strict_limit_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct backing_dev_info *bdi = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%d\n",
                        !!(bdi->capabilities & BDI_CAP_STRICTLIMIT));
}
static DEVICE_ATTR_RW(strict_limit);

static struct attribute *bdi_dev_attrs[] = {
        &dev_attr_read_ahead_kb.attr,
        &dev_attr_min_ratio.attr,
        &dev_attr_min_ratio_fine.attr,
        &dev_attr_max_ratio.attr,
        &dev_attr_max_ratio_fine.attr,
        &dev_attr_min_bytes.attr,
        &dev_attr_max_bytes.attr,
        &dev_attr_stable_pages_required.attr,
        &dev_attr_strict_limit.attr,
        NULL,
};
ATTRIBUTE_GROUPS(bdi_dev);

static const struct class bdi_class = {
        .name                = "bdi",
        .dev_groups        = bdi_dev_groups,
};

static __init int bdi_class_init(void)
{
        int ret;

        ret = class_register(&bdi_class);
        if (ret)
                return ret;

        bdi_debug_init();

        return 0;
}
postcore_initcall(bdi_class_init);

static int __init default_bdi_init(void)
{
        bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
                                 WQ_SYSFS, 0);
        if (!bdi_wq)
                return -ENOMEM;
        return 0;
}
subsys_initcall(default_bdi_init);

static void wb_update_bandwidth_workfn(struct work_struct *work)
{
        struct bdi_writeback *wb = container_of(to_delayed_work(work),
                                                struct bdi_writeback, bw_dwork);

        wb_update_bandwidth(wb);
}

/*
 * Initial write bandwidth: 100 MB/s
 */
#define INIT_BW                (100 << (20 - PAGE_SHIFT))

static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
                   gfp_t gfp)
{
        int i, err;

        memset(wb, 0, sizeof(*wb));

        wb->bdi = bdi;
        wb->last_old_flush = jiffies;
        INIT_LIST_HEAD(&wb->b_dirty);
        INIT_LIST_HEAD(&wb->b_io);
        INIT_LIST_HEAD(&wb->b_more_io);
        INIT_LIST_HEAD(&wb->b_dirty_time);
        spin_lock_init(&wb->list_lock);

        atomic_set(&wb->writeback_inodes, 0);
        wb->bw_time_stamp = jiffies;
        wb->balanced_dirty_ratelimit = INIT_BW;
        wb->dirty_ratelimit = INIT_BW;
        wb->write_bandwidth = INIT_BW;
        wb->avg_write_bandwidth = INIT_BW;

        spin_lock_init(&wb->work_lock);
        INIT_LIST_HEAD(&wb->work_list);
        INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
        INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);

        err = fprop_local_init_percpu(&wb->completions, gfp);
        if (err)
                return err;

        for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
                err = percpu_counter_init(&wb->stat[i], 0, gfp);
                if (err)
                        goto out_destroy_stat;
        }

        return 0;

out_destroy_stat:
        while (i--)
                percpu_counter_destroy(&wb->stat[i]);
        fprop_local_destroy_percpu(&wb->completions);
        return err;
}

static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);

/*
 * Remove bdi from the global list and shutdown any threads we have running
 */
static void wb_shutdown(struct bdi_writeback *wb)
{
        /* Make sure nobody queues further work */
        spin_lock_irq(&wb->work_lock);
        if (!test_and_clear_bit(WB_registered, &wb->state)) {
                spin_unlock_irq(&wb->work_lock);
                return;
        }
        spin_unlock_irq(&wb->work_lock);

        cgwb_remove_from_bdi_list(wb);
        /*
         * Drain work list and shutdown the delayed_work.  !WB_registered
         * tells wb_workfn() that @wb is dying and its work_list needs to
         * be drained no matter what.
         */
        mod_delayed_work(bdi_wq, &wb->dwork, 0);
        flush_delayed_work(&wb->dwork);
        WARN_ON(!list_empty(&wb->work_list));
        flush_delayed_work(&wb->bw_dwork);
}

static void wb_exit(struct bdi_writeback *wb)
{
        int i;

        WARN_ON(delayed_work_pending(&wb->dwork));

        for (i = 0; i < NR_WB_STAT_ITEMS; i++)
                percpu_counter_destroy(&wb->stat[i]);

        fprop_local_destroy_percpu(&wb->completions);
}

#ifdef CONFIG_CGROUP_WRITEBACK

#include <linux/memcontrol.h>

/*
 * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, offline_cgwbs and
 * memcg->cgwb_list.  bdi->cgwb_tree is also RCU protected.
 */
static DEFINE_SPINLOCK(cgwb_lock);
static struct workqueue_struct *cgwb_release_wq;

static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);

static void cgwb_free_rcu(struct rcu_head *rcu_head)
{
        struct bdi_writeback *wb = container_of(rcu_head,
                        struct bdi_writeback, rcu);

        percpu_ref_exit(&wb->refcnt);
        kfree(wb);
}

static void cgwb_release_workfn(struct work_struct *work)
{
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                release_work);
        struct backing_dev_info *bdi = wb->bdi;

        mutex_lock(&wb->bdi->cgwb_release_mutex);
        wb_shutdown(wb);

        css_put(wb->memcg_css);
        css_put(wb->blkcg_css);
        mutex_unlock(&wb->bdi->cgwb_release_mutex);

        /* triggers blkg destruction if no online users left */
        blkcg_unpin_online(wb->blkcg_css);

        fprop_local_destroy_percpu(&wb->memcg_completions);

        spin_lock_irq(&cgwb_lock);
        list_del(&wb->offline_node);
        spin_unlock_irq(&cgwb_lock);

        wb_exit(wb);
        bdi_put(bdi);
        WARN_ON_ONCE(!list_empty(&wb->b_attached));
        call_rcu(&wb->rcu, cgwb_free_rcu);
}

static void cgwb_release(struct percpu_ref *refcnt)
{
        struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
                                                refcnt);
        queue_work(cgwb_release_wq, &wb->release_work);
}

static void cgwb_kill(struct bdi_writeback *wb)
{
        lockdep_assert_held(&cgwb_lock);

        WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
        list_del(&wb->memcg_node);
        list_del(&wb->blkcg_node);
        list_add(&wb->offline_node, &offline_cgwbs);
        percpu_ref_kill(&wb->refcnt);
}

static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
{
        spin_lock_irq(&cgwb_lock);
        list_del_rcu(&wb->bdi_node);
        spin_unlock_irq(&cgwb_lock);
}

static int cgwb_create(struct backing_dev_info *bdi,
                       struct cgroup_subsys_state *memcg_css, gfp_t gfp)
{
        struct mem_cgroup *memcg;
        struct cgroup_subsys_state *blkcg_css;
        struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
        struct bdi_writeback *wb;
        unsigned long flags;
        int ret = 0;

        memcg = mem_cgroup_from_css(memcg_css);
        blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
        memcg_cgwb_list = &memcg->cgwb_list;
        blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css);

        /* look up again under lock and discard on blkcg mismatch */
        spin_lock_irqsave(&cgwb_lock, flags);
        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
        if (wb && wb->blkcg_css != blkcg_css) {
                cgwb_kill(wb);
                wb = NULL;
        }
        spin_unlock_irqrestore(&cgwb_lock, flags);
        if (wb)
                goto out_put;

        /* need to create a new one */
        wb = kmalloc(sizeof(*wb), gfp);
        if (!wb) {
                ret = -ENOMEM;
                goto out_put;
        }

        ret = wb_init(wb, bdi, gfp);
        if (ret)
                goto err_free;

        ret = percpu_ref_init(&wb->refcnt, cgwb_release, 0, gfp);
        if (ret)
                goto err_wb_exit;

        ret = fprop_local_init_percpu(&wb->memcg_completions, gfp);
        if (ret)
                goto err_ref_exit;

        wb->memcg_css = memcg_css;
        wb->blkcg_css = blkcg_css;
        INIT_LIST_HEAD(&wb->b_attached);
        INIT_WORK(&wb->release_work, cgwb_release_workfn);
        set_bit(WB_registered, &wb->state);
        bdi_get(bdi);

        /*
         * The root wb determines the registered state of the whole bdi and
         * memcg_cgwb_list and blkcg_cgwb_list's next pointers indicate
         * whether they're still online.  Don't link @wb if any is dead.
         * See wb_memcg_offline() and wb_blkcg_offline().
         */
        ret = -ENODEV;
        spin_lock_irqsave(&cgwb_lock, flags);
        if (test_bit(WB_registered, &bdi->wb.state) &&
            blkcg_cgwb_list->next && memcg_cgwb_list->next) {
                /* we might have raced another instance of this function */
                ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
                if (!ret) {
                        list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
                        list_add(&wb->memcg_node, memcg_cgwb_list);
                        list_add(&wb->blkcg_node, blkcg_cgwb_list);
                        blkcg_pin_online(blkcg_css);
                        css_get(memcg_css);
                        css_get(blkcg_css);
                }
        }
        spin_unlock_irqrestore(&cgwb_lock, flags);
        if (ret) {
                if (ret == -EEXIST)
                        ret = 0;
                goto err_fprop_exit;
        }
        goto out_put;

err_fprop_exit:
        bdi_put(bdi);
        fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
        percpu_ref_exit(&wb->refcnt);
err_wb_exit:
        wb_exit(wb);
err_free:
        kfree(wb);
out_put:
        css_put(blkcg_css);
        return ret;
}

/**
 * wb_get_lookup - get wb for a given memcg
 * @bdi: target bdi
 * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
 *
 * Try to get the wb for @memcg_css on @bdi.  The returned wb has its
 * refcount incremented.
 *
 * This function uses css_get() on @memcg_css and thus expects its refcnt
 * to be positive on invocation.  IOW, rcu_read_lock() protection on
 * @memcg_css isn't enough.  try_get it before calling this function.
 *
 * A wb is keyed by its associated memcg.  As blkcg implicitly enables
 * memcg on the default hierarchy, memcg association is guaranteed to be
 * more specific (equal or descendant to the associated blkcg) and thus can
 * identify both the memcg and blkcg associations.
 *
 * Because the blkcg associated with a memcg may change as blkcg is enabled
 * and disabled closer to root in the hierarchy, each wb keeps track of
 * both the memcg and blkcg associated with it and verifies the blkcg on
 * each lookup.  On mismatch, the existing wb is discarded and a new one is
 * created.
 */
struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css)
{
        struct bdi_writeback *wb;

        if (!memcg_css->parent)
                return &bdi->wb;

        rcu_read_lock();
        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
        if (wb) {
                struct cgroup_subsys_state *blkcg_css;

                /* see whether the blkcg association has changed */
                blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
                if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb)))
                        wb = NULL;
                css_put(blkcg_css);
        }
        rcu_read_unlock();

        return wb;
}

/**
 * wb_get_create - get wb for a given memcg, create if necessary
 * @bdi: target bdi
 * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
 * @gfp: allocation mask to use
 *
 * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
 * create one.  See wb_get_lookup() for more details.
 */
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
                                    struct cgroup_subsys_state *memcg_css,
                                    gfp_t gfp)
{
        struct bdi_writeback *wb;

        might_alloc(gfp);

        do {
                wb = wb_get_lookup(bdi, memcg_css);
        } while (!wb && !cgwb_create(bdi, memcg_css, gfp));

        return wb;
}

static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
        int ret;

        INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
        mutex_init(&bdi->cgwb_release_mutex);
        init_rwsem(&bdi->wb_switch_rwsem);

        ret = wb_init(&bdi->wb, bdi, GFP_KERNEL);
        if (!ret) {
                bdi->wb.memcg_css = &root_mem_cgroup->css;
                bdi->wb.blkcg_css = blkcg_root_css;
        }
        return ret;
}

static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
{
        struct radix_tree_iter iter;
        void **slot;
        struct bdi_writeback *wb;

        WARN_ON(test_bit(WB_registered, &bdi->wb.state));

        spin_lock_irq(&cgwb_lock);
        radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
                cgwb_kill(*slot);
        spin_unlock_irq(&cgwb_lock);

        mutex_lock(&bdi->cgwb_release_mutex);
        spin_lock_irq(&cgwb_lock);
        while (!list_empty(&bdi->wb_list)) {
                wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
                                      bdi_node);
                spin_unlock_irq(&cgwb_lock);
                wb_shutdown(wb);
                spin_lock_irq(&cgwb_lock);
        }
        spin_unlock_irq(&cgwb_lock);
        mutex_unlock(&bdi->cgwb_release_mutex);
}

/*
 * cleanup_offline_cgwbs_workfn - try to release dying cgwbs
 *
 * Try to release dying cgwbs by switching attached inodes to the nearest
 * living ancestor's writeback. Processed wbs are placed at the end
 * of the list to guarantee the forward progress.
 */
static void cleanup_offline_cgwbs_workfn(struct work_struct *work)
{
        struct bdi_writeback *wb;
        LIST_HEAD(processed);

        spin_lock_irq(&cgwb_lock);

        while (!list_empty(&offline_cgwbs)) {
                wb = list_first_entry(&offline_cgwbs, struct bdi_writeback,
                                      offline_node);
                list_move(&wb->offline_node, &processed);

                /*
                 * If wb is dirty, cleaning up the writeback by switching
                 * attached inodes will result in an effective removal of any
                 * bandwidth restrictions, which isn't the goal.  Instead,
                 * it can be postponed until the next time, when all io
                 * will be likely completed.  If in the meantime some inodes
                 * will get re-dirtied, they should be eventually switched to
                 * a new cgwb.
                 */
                if (wb_has_dirty_io(wb))
                        continue;

                if (!wb_tryget(wb))
                        continue;

                spin_unlock_irq(&cgwb_lock);
                while (cleanup_offline_cgwb(wb))
                        cond_resched();
                spin_lock_irq(&cgwb_lock);

                wb_put(wb);
        }

        if (!list_empty(&processed))
                list_splice_tail(&processed, &offline_cgwbs);

        spin_unlock_irq(&cgwb_lock);
}

/**
 * wb_memcg_offline - kill all wb's associated with a memcg being offlined
 * @memcg: memcg being offlined
 *
 * Also prevents creation of any new wb's associated with @memcg.
 */
void wb_memcg_offline(struct mem_cgroup *memcg)
{
        struct list_head *memcg_cgwb_list = &memcg->cgwb_list;
        struct bdi_writeback *wb, *next;

        spin_lock_irq(&cgwb_lock);
        list_for_each_entry_safe(wb, next, memcg_cgwb_list, memcg_node)
                cgwb_kill(wb);
        memcg_cgwb_list->next = NULL;        /* prevent new wb's */
        spin_unlock_irq(&cgwb_lock);

        queue_work(system_unbound_wq, &cleanup_offline_cgwbs_work);
}

/**
 * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
 * @css: blkcg being offlined
 *
 * Also prevents creation of any new wb's associated with @blkcg.
 */
void wb_blkcg_offline(struct cgroup_subsys_state *css)
{
        struct bdi_writeback *wb, *next;
        struct list_head *list = blkcg_get_cgwb_list(css);

        spin_lock_irq(&cgwb_lock);
        list_for_each_entry_safe(wb, next, list, blkcg_node)
                cgwb_kill(wb);
        list->next = NULL;        /* prevent new wb's */
        spin_unlock_irq(&cgwb_lock);
}

static void cgwb_bdi_register(struct backing_dev_info *bdi)
{
        spin_lock_irq(&cgwb_lock);
        list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
        spin_unlock_irq(&cgwb_lock);
}

static int __init cgwb_init(void)
{
        /*
         * There can be many concurrent release work items overwhelming
         * system_wq.  Put them in a separate wq and limit concurrency.
         * There's no point in executing many of these in parallel.
         */
        cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
        if (!cgwb_release_wq)
                return -ENOMEM;

        return 0;
}
subsys_initcall(cgwb_init);

#else        /* CONFIG_CGROUP_WRITEBACK */

static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
        return wb_init(&bdi->wb, bdi, GFP_KERNEL);
}

static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }

static void cgwb_bdi_register(struct backing_dev_info *bdi)
{
        list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
}

static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
{
        list_del_rcu(&wb->bdi_node);
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

int bdi_init(struct backing_dev_info *bdi)
{
        bdi->dev = NULL;

        kref_init(&bdi->refcnt);
        bdi->min_ratio = 0;
        bdi->max_ratio = 100 * BDI_RATIO_SCALE;
        bdi->max_prop_frac = FPROP_FRAC_BASE;
        INIT_LIST_HEAD(&bdi->bdi_list);
        INIT_LIST_HEAD(&bdi->wb_list);
        init_waitqueue_head(&bdi->wb_waitq);
        bdi->last_bdp_sleep = jiffies;

        return cgwb_bdi_init(bdi);
}

struct backing_dev_info *bdi_alloc(int node_id)
{
        struct backing_dev_info *bdi;

        bdi = kzalloc_node(sizeof(*bdi), GFP_KERNEL, node_id);
        if (!bdi)
                return NULL;

        if (bdi_init(bdi)) {
                kfree(bdi);
                return NULL;
        }
        bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
        bdi->ra_pages = VM_READAHEAD_PAGES;
        bdi->io_pages = VM_READAHEAD_PAGES;
        timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
        return bdi;
}
EXPORT_SYMBOL(bdi_alloc);

static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp)
{
        struct rb_node **p = &bdi_tree.rb_node;
        struct rb_node *parent = NULL;
        struct backing_dev_info *bdi;

        lockdep_assert_held(&bdi_lock);

        while (*p) {
                parent = *p;
                bdi = rb_entry(parent, struct backing_dev_info, rb_node);

                if (bdi->id > id)
                        p = &(*p)->rb_left;
                else if (bdi->id < id)
                        p = &(*p)->rb_right;
                else
                        break;
        }

        if (parentp)
                *parentp = parent;
        return p;
}

/**
 * bdi_get_by_id - lookup and get bdi from its id
 * @id: bdi id to lookup
 *
 * Find bdi matching @id and get it.  Returns NULL if the matching bdi
 * doesn't exist or is already unregistered.
 */
struct backing_dev_info *bdi_get_by_id(u64 id)
{
        struct backing_dev_info *bdi = NULL;
        struct rb_node **p;

        spin_lock_bh(&bdi_lock);
        p = bdi_lookup_rb_node(id, NULL);
        if (*p) {
                bdi = rb_entry(*p, struct backing_dev_info, rb_node);
                bdi_get(bdi);
        }
        spin_unlock_bh(&bdi_lock);

        return bdi;
}

int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
{
        struct device *dev;
        struct rb_node *parent, **p;

        if (bdi->dev)        /* The driver needs to use separate queues per device */
                return 0;

        vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
        dev = device_create(&bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
        if (IS_ERR(dev))
                return PTR_ERR(dev);

        cgwb_bdi_register(bdi);
        bdi->dev = dev;

        bdi_debug_register(bdi, dev_name(dev));
        set_bit(WB_registered, &bdi->wb.state);

        spin_lock_bh(&bdi_lock);

        bdi->id = ++bdi_id_cursor;

        p = bdi_lookup_rb_node(bdi->id, &parent);
        rb_link_node(&bdi->rb_node, parent, p);
        rb_insert_color(&bdi->rb_node, &bdi_tree);

        list_add_tail_rcu(&bdi->bdi_list, &bdi_list);

        spin_unlock_bh(&bdi_lock);

        trace_writeback_bdi_register(bdi);
        return 0;
}

int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
{
        va_list args;
        int ret;

        va_start(args, fmt);
        ret = bdi_register_va(bdi, fmt, args);
        va_end(args);
        return ret;
}
EXPORT_SYMBOL(bdi_register);

void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner)
{
        WARN_ON_ONCE(bdi->owner);
        bdi->owner = owner;
        get_device(owner);
}

/*
 * Remove bdi from bdi_list, and ensure that it is no longer visible
 */
static void bdi_remove_from_list(struct backing_dev_info *bdi)
{
        spin_lock_bh(&bdi_lock);
        rb_erase(&bdi->rb_node, &bdi_tree);
        list_del_rcu(&bdi->bdi_list);
        spin_unlock_bh(&bdi_lock);

        synchronize_rcu_expedited();
}

void bdi_unregister(struct backing_dev_info *bdi)
{
        del_timer_sync(&bdi->laptop_mode_wb_timer);

        /* make sure nobody finds us on the bdi_list anymore */
        bdi_remove_from_list(bdi);
        wb_shutdown(&bdi->wb);
        cgwb_bdi_unregister(bdi);

        /*
         * If this BDI's min ratio has been set, use bdi_set_min_ratio() to
         * update the global bdi_min_ratio.
         */
        if (bdi->min_ratio)
                bdi_set_min_ratio(bdi, 0);

        if (bdi->dev) {
                bdi_debug_unregister(bdi);
                device_unregister(bdi->dev);
                bdi->dev = NULL;
        }

        if (bdi->owner) {
                put_device(bdi->owner);
                bdi->owner = NULL;
        }
}
EXPORT_SYMBOL(bdi_unregister);

static void release_bdi(struct kref *ref)
{
        struct backing_dev_info *bdi =
                        container_of(ref, struct backing_dev_info, refcnt);

        WARN_ON_ONCE(test_bit(WB_registered, &bdi->wb.state));
        WARN_ON_ONCE(bdi->dev);
        wb_exit(&bdi->wb);
        kfree(bdi);
}

void bdi_put(struct backing_dev_info *bdi)
{
        kref_put(&bdi->refcnt, release_bdi);
}
EXPORT_SYMBOL(bdi_put);

struct backing_dev_info *inode_to_bdi(struct inode *inode)
{
        struct super_block *sb;

        if (!inode)
                return &noop_backing_dev_info;

        sb = inode->i_sb;
#ifdef CONFIG_BLOCK
        if (sb_is_blkdev_sb(sb))
                return I_BDEV(inode)->bd_disk->bdi;
#endif
        return sb->s_bdi;
}
EXPORT_SYMBOL(inode_to_bdi);

const char *bdi_dev_name(struct backing_dev_info *bdi)
{
        if (!bdi || !bdi->dev)
                return bdi_unknown_name;
        return bdi->dev_name;
}
EXPORT_SYMBOL_GPL(bdi_dev_name);







































































































































































































































































































































    5 





   12 







    6 












    5 

    5 







































    3 


    3 
    3 
    3 


    2 




    2 

















































































    1 

























    1 















    1 


    1 










    1 


































































































    1 











    7 

    4 

    4 
    4 













    3 


    3 


    3 




    3 
    3 






















    1 





















































    8 

    4 





    5 

    6 
    8 



    3 

    5 



    1 


    5 


   10 




   10 








   10 

   10 


    1 


    1 


    1 


    1 


    1 


    1 


    1 





    1 


    4 


    1 











    4 







    3 










    3 
    3 

    1 

    3 
    3 

    3 


    3 
    2 
    2 


    2 















    1 



    1 


    1 




    1 



    1 
    1 



    1 























































































































































    1 
    1 


    1 
    1 









    1 





























































































































































































   22 



















































































































   24 















   24 






















   24 



















   24 

   24 
   21 









   24 





   24 
    2 






   24 
   24 








    2 



    2 
    2 



   23 



    1 






   23 





   23 


   23 





   24 






   28 

    3 


   28 
   24 

   28 

   28 
   28 



























































   24 










    3 


    3 

    3 




























































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
// SPDX-License-Identifier: GPL-2.0
/*
 * Written for linux by Johan Myreen as a translation from
 * the assembly version by Linus (with diacriticals added)
 *
 * Some additional features added by Christoph Niemann (ChN), March 1993
 *
 * Loadable keymaps by Risto Kankkunen, May 1993
 *
 * Diacriticals redone & other small changes, aeb@cwi.nl, June 1993
 * Added decr/incr_console, dynamic keymaps, Unicode support,
 * dynamic function/string keys, led setting,  Sept 1994
 * `Sticky' modifier keys, 951006.
 *
 * 11-11-96: SAK should now work in the raw mode (Martin Mares)
 *
 * Modified to provide 'generic' keyboard support by Hamish Macdonald
 * Merge with the m68k keyboard driver and split-off of the PC low-level
 * parts by Geert Uytterhoeven, May 1997
 *
 * 27-05-97: Added support for the Magic SysRq Key (Martin Mares)
 * 30-07-98: Dead keys redone, aeb@cwi.nl.
 * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/consolemap.h>
#include <linux/init.h>
#include <linux/input.h>
#include <linux/jiffies.h>
#include <linux/kbd_diacr.h>
#include <linux/kbd_kern.h>
#include <linux/leds.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/nospec.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/sched/debug.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/tty_flip.h>
#include <linux/tty.h>
#include <linux/uaccess.h>
#include <linux/vt_kern.h>

#include <asm/irq_regs.h>

/*
 * Exported functions/variables
 */

#define KBD_DEFMODE (BIT(VC_REPEAT) | BIT(VC_META))

#if defined(CONFIG_X86) || defined(CONFIG_PARISC)
#include <asm/kbdleds.h>
#else
static inline int kbd_defleds(void)
{
        return 0;
}
#endif

#define KBD_DEFLOCK 0

/*
 * Handler Tables.
 */

#define K_HANDLERS\
        k_self,                k_fn,                k_spec,                k_pad,\
        k_dead,                k_cons,                k_cur,                k_shift,\
        k_meta,                k_ascii,        k_lock,                k_lowercase,\
        k_slock,        k_dead2,        k_brl,                k_ignore

typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
                            char up_flag);
static k_handler_fn K_HANDLERS;
static k_handler_fn *k_handler[16] = { K_HANDLERS };

#define FN_HANDLERS\
        fn_null,        fn_enter,        fn_show_ptregs,        fn_show_mem,\
        fn_show_state,        fn_send_intr,        fn_lastcons,        fn_caps_toggle,\
        fn_num,                fn_hold,        fn_scroll_forw,        fn_scroll_back,\
        fn_boot_it,        fn_caps_on,        fn_compose,        fn_SAK,\
        fn_dec_console, fn_inc_console, fn_spawn_con,        fn_bare_num

typedef void (fn_handler_fn)(struct vc_data *vc);
static fn_handler_fn FN_HANDLERS;
static fn_handler_fn *fn_handler[] = { FN_HANDLERS };

/*
 * Variables exported for vt_ioctl.c
 */

struct vt_spawn_console vt_spawn_con = {
        .lock = __SPIN_LOCK_UNLOCKED(vt_spawn_con.lock),
        .pid  = NULL,
        .sig  = 0,
};


/*
 * Internal Data.
 */

static struct kbd_struct kbd_table[MAX_NR_CONSOLES];
static struct kbd_struct *kbd = kbd_table;

/* maximum values each key_handler can handle */
static const unsigned char max_vals[] = {
        [ KT_LATIN        ] = 255,
        [ KT_FN                ] = ARRAY_SIZE(func_table) - 1,
        [ KT_SPEC        ] = ARRAY_SIZE(fn_handler) - 1,
        [ KT_PAD        ] = NR_PAD - 1,
        [ KT_DEAD        ] = NR_DEAD - 1,
        [ KT_CONS        ] = 255,
        [ KT_CUR        ] = 3,
        [ KT_SHIFT        ] = NR_SHIFT - 1,
        [ KT_META        ] = 255,
        [ KT_ASCII        ] = NR_ASCII - 1,
        [ KT_LOCK        ] = NR_LOCK - 1,
        [ KT_LETTER        ] = 255,
        [ KT_SLOCK        ] = NR_LOCK - 1,
        [ KT_DEAD2        ] = 255,
        [ KT_BRL        ] = NR_BRL - 1,
};

static const int NR_TYPES = ARRAY_SIZE(max_vals);

static void kbd_bh(struct tasklet_struct *unused);
static DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh);

static struct input_handler kbd_handler;
static DEFINE_SPINLOCK(kbd_event_lock);
static DEFINE_SPINLOCK(led_lock);
static DEFINE_SPINLOCK(func_buf_lock); /* guard 'func_buf'  and friends */
static DECLARE_BITMAP(key_down, KEY_CNT);        /* keyboard key bitmap */
static unsigned char shift_down[NR_SHIFT];                /* shift state counters.. */
static bool dead_key_next;

/* Handles a number being assembled on the number pad */
static bool npadch_active;
static unsigned int npadch_value;

static unsigned int diacr;
static bool rep;                        /* flag telling character repeat */

static int shift_state = 0;

static unsigned int ledstate = -1U;                        /* undefined */
static unsigned char ledioctl;
static bool vt_switch;

/*
 * Notifier list for console keyboard events
 */
static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);

int register_keyboard_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(register_keyboard_notifier);

int unregister_keyboard_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);

/*
 * Translation of scancodes to keycodes. We set them on only the first
 * keyboard in the list that accepts the scancode and keycode.
 * Explanation for not choosing the first attached keyboard anymore:
 *  USB keyboards for example have two event devices: one for all "normal"
 *  keys and one for extra function keys (like "volume up", "make coffee",
 *  etc.). So this means that scancodes for the extra function keys won't
 *  be valid for the first event device, but will be for the second.
 */

struct getset_keycode_data {
        struct input_keymap_entry ke;
        int error;
};

static int getkeycode_helper(struct input_handle *handle, void *data)
{
        struct getset_keycode_data *d = data;

        d->error = input_get_keycode(handle->dev, &d->ke);

        return d->error == 0; /* stop as soon as we successfully get one */
}

static int getkeycode(unsigned int scancode)
{
        struct getset_keycode_data d = {
                .ke        = {
                        .flags                = 0,
                        .len                = sizeof(scancode),
                        .keycode        = 0,
                },
                .error        = -ENODEV,
        };

        memcpy(d.ke.scancode, &scancode, sizeof(scancode));

        input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper);

        return d.error ?: d.ke.keycode;
}

static int setkeycode_helper(struct input_handle *handle, void *data)
{
        struct getset_keycode_data *d = data;

        d->error = input_set_keycode(handle->dev, &d->ke);

        return d->error == 0; /* stop as soon as we successfully set one */
}

static int setkeycode(unsigned int scancode, unsigned int keycode)
{
        struct getset_keycode_data d = {
                .ke        = {
                        .flags                = 0,
                        .len                = sizeof(scancode),
                        .keycode        = keycode,
                },
                .error        = -ENODEV,
        };

        memcpy(d.ke.scancode, &scancode, sizeof(scancode));

        input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper);

        return d.error;
}

/*
 * Making beeps and bells. Note that we prefer beeps to bells, but when
 * shutting the sound off we do both.
 */

static int kd_sound_helper(struct input_handle *handle, void *data)
{
        unsigned int *hz = data;
        struct input_dev *dev = handle->dev;

        if (test_bit(EV_SND, dev->evbit)) {
                if (test_bit(SND_TONE, dev->sndbit)) {
                        input_inject_event(handle, EV_SND, SND_TONE, *hz);
                        if (*hz)
                                return 0;
                }
                if (test_bit(SND_BELL, dev->sndbit))
                        input_inject_event(handle, EV_SND, SND_BELL, *hz ? 1 : 0);
        }

        return 0;
}

static void kd_nosound(struct timer_list *unused)
{
        static unsigned int zero;

        input_handler_for_each_handle(&kbd_handler, &zero, kd_sound_helper);
}

static DEFINE_TIMER(kd_mksound_timer, kd_nosound);

void kd_mksound(unsigned int hz, unsigned int ticks)
{
        del_timer_sync(&kd_mksound_timer);

        input_handler_for_each_handle(&kbd_handler, &hz, kd_sound_helper);

        if (hz && ticks)
                mod_timer(&kd_mksound_timer, jiffies + ticks);
}
EXPORT_SYMBOL(kd_mksound);

/*
 * Setting the keyboard rate.
 */

static int kbd_rate_helper(struct input_handle *handle, void *data)
{
        struct input_dev *dev = handle->dev;
        struct kbd_repeat *rpt = data;

        if (test_bit(EV_REP, dev->evbit)) {

                if (rpt[0].delay > 0)
                        input_inject_event(handle,
                                           EV_REP, REP_DELAY, rpt[0].delay);
                if (rpt[0].period > 0)
                        input_inject_event(handle,
                                           EV_REP, REP_PERIOD, rpt[0].period);

                rpt[1].delay = dev->rep[REP_DELAY];
                rpt[1].period = dev->rep[REP_PERIOD];
        }

        return 0;
}

int kbd_rate(struct kbd_repeat *rpt)
{
        struct kbd_repeat data[2] = { *rpt };

        input_handler_for_each_handle(&kbd_handler, data, kbd_rate_helper);
        *rpt = data[1];        /* Copy currently used settings */

        return 0;
}

/*
 * Helper Functions.
 */
static void put_queue(struct vc_data *vc, int ch)
{
        tty_insert_flip_char(&vc->port, ch, 0);
        tty_flip_buffer_push(&vc->port);
}

static void puts_queue(struct vc_data *vc, const char *cp)
{
        tty_insert_flip_string(&vc->port, cp, strlen(cp));
        tty_flip_buffer_push(&vc->port);
}

static void applkey(struct vc_data *vc, int key, char mode)
{
        static char buf[] = { 0x1b, 'O', 0x00, 0x00 };

        buf[1] = (mode ? 'O' : '[');
        buf[2] = key;
        puts_queue(vc, buf);
}

/*
 * Many other routines do put_queue, but I think either
 * they produce ASCII, or they produce some user-assigned
 * string, and in both cases we might assume that it is
 * in utf-8 already.
 */
static void to_utf8(struct vc_data *vc, uint c)
{
        if (c < 0x80)
                /*  0******* */
                put_queue(vc, c);
        else if (c < 0x800) {
                /* 110***** 10****** */
                put_queue(vc, 0xc0 | (c >> 6));
                put_queue(vc, 0x80 | (c & 0x3f));
        } else if (c < 0x10000) {
                if (c >= 0xD800 && c < 0xE000)
                        return;
                if (c == 0xFFFF)
                        return;
                /* 1110**** 10****** 10****** */
                put_queue(vc, 0xe0 | (c >> 12));
                put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
                put_queue(vc, 0x80 | (c & 0x3f));
        } else if (c < 0x110000) {
                /* 11110*** 10****** 10****** 10****** */
                put_queue(vc, 0xf0 | (c >> 18));
                put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
                put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
                put_queue(vc, 0x80 | (c & 0x3f));
        }
}

/* FIXME: review locking for vt.c callers */
static void set_leds(void)
{
        tasklet_schedule(&keyboard_tasklet);
}

/*
 * Called after returning from RAW mode or when changing consoles - recompute
 * shift_down[] and shift_state from key_down[] maybe called when keymap is
 * undefined, so that shiftkey release is seen. The caller must hold the
 * kbd_event_lock.
 */

static void do_compute_shiftstate(void)
{
        unsigned int k, sym, val;

        shift_state = 0;
        memset(shift_down, 0, sizeof(shift_down));

        for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) {
                sym = U(key_maps[0][k]);
                if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
                        continue;

                val = KVAL(sym);
                if (val == KVAL(K_CAPSSHIFT))
                        val = KVAL(K_SHIFT);

                shift_down[val]++;
                shift_state |= BIT(val);
        }
}

/* We still have to export this method to vt.c */
void vt_set_leds_compute_shiftstate(void)
{
        unsigned long flags;

        /*
         * When VT is switched, the keyboard led needs to be set once.
         * Ensure that after the switch is completed, the state of the
         * keyboard LED is consistent with the state of the keyboard lock.
         */
        vt_switch = true;
        set_leds();

        spin_lock_irqsave(&kbd_event_lock, flags);
        do_compute_shiftstate();
        spin_unlock_irqrestore(&kbd_event_lock, flags);
}

/*
 * We have a combining character DIACR here, followed by the character CH.
 * If the combination occurs in the table, return the corresponding value.
 * Otherwise, if CH is a space or equals DIACR, return DIACR.
 * Otherwise, conclude that DIACR was not combining after all,
 * queue it and return CH.
 */
static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch)
{
        unsigned int d = diacr;
        unsigned int i;

        diacr = 0;

        if ((d & ~0xff) == BRL_UC_ROW) {
                if ((ch & ~0xff) == BRL_UC_ROW)
                        return d | ch;
        } else {
                for (i = 0; i < accent_table_size; i++)
                        if (accent_table[i].diacr == d && accent_table[i].base == ch)
                                return accent_table[i].result;
        }

        if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d)
                return d;

        if (kbd->kbdmode == VC_UNICODE)
                to_utf8(vc, d);
        else {
                int c = conv_uni_to_8bit(d);
                if (c != -1)
                        put_queue(vc, c);
        }

        return ch;
}

/*
 * Special function handlers
 */
static void fn_enter(struct vc_data *vc)
{
        if (diacr) {
                if (kbd->kbdmode == VC_UNICODE)
                        to_utf8(vc, diacr);
                else {
                        int c = conv_uni_to_8bit(diacr);
                        if (c != -1)
                                put_queue(vc, c);
                }
                diacr = 0;
        }

        put_queue(vc, '\r');
        if (vc_kbd_mode(kbd, VC_CRLF))
                put_queue(vc, '\n');
}

static void fn_caps_toggle(struct vc_data *vc)
{
        if (rep)
                return;

        chg_vc_kbd_led(kbd, VC_CAPSLOCK);
}

static void fn_caps_on(struct vc_data *vc)
{
        if (rep)
                return;

        set_vc_kbd_led(kbd, VC_CAPSLOCK);
}

static void fn_show_ptregs(struct vc_data *vc)
{
        struct pt_regs *regs = get_irq_regs();

        if (regs)
                show_regs(regs);
}

static void fn_hold(struct vc_data *vc)
{
        struct tty_struct *tty = vc->port.tty;

        if (rep || !tty)
                return;

        /*
         * Note: SCROLLOCK will be set (cleared) by stop_tty (start_tty);
         * these routines are also activated by ^S/^Q.
         * (And SCROLLOCK can also be set by the ioctl KDSKBLED.)
         */
        if (tty->flow.stopped)
                start_tty(tty);
        else
                stop_tty(tty);
}

static void fn_num(struct vc_data *vc)
{
        if (vc_kbd_mode(kbd, VC_APPLIC))
                applkey(vc, 'P', 1);
        else
                fn_bare_num(vc);
}

/*
 * Bind this to Shift-NumLock if you work in application keypad mode
 * but want to be able to change the NumLock flag.
 * Bind this to NumLock if you prefer that the NumLock key always
 * changes the NumLock flag.
 */
static void fn_bare_num(struct vc_data *vc)
{
        if (!rep)
                chg_vc_kbd_led(kbd, VC_NUMLOCK);
}

static void fn_lastcons(struct vc_data *vc)
{
        /* switch to the last used console, ChN */
        set_console(last_console);
}

static void fn_dec_console(struct vc_data *vc)
{
        int i, cur = fg_console;

        /* Currently switching?  Queue this next switch relative to that. */
        if (want_console != -1)
                cur = want_console;

        for (i = cur - 1; i != cur; i--) {
                if (i == -1)
                        i = MAX_NR_CONSOLES - 1;
                if (vc_cons_allocated(i))
                        break;
        }
        set_console(i);
}

static void fn_inc_console(struct vc_data *vc)
{
        int i, cur = fg_console;

        /* Currently switching?  Queue this next switch relative to that. */
        if (want_console != -1)
                cur = want_console;

        for (i = cur+1; i != cur; i++) {
                if (i == MAX_NR_CONSOLES)
                        i = 0;
                if (vc_cons_allocated(i))
                        break;
        }
        set_console(i);
}

static void fn_send_intr(struct vc_data *vc)
{
        tty_insert_flip_char(&vc->port, 0, TTY_BREAK);
        tty_flip_buffer_push(&vc->port);
}

static void fn_scroll_forw(struct vc_data *vc)
{
        scrollfront(vc, 0);
}

static void fn_scroll_back(struct vc_data *vc)
{
        scrollback(vc);
}

static void fn_show_mem(struct vc_data *vc)
{
        show_mem();
}

static void fn_show_state(struct vc_data *vc)
{
        show_state();
}

static void fn_boot_it(struct vc_data *vc)
{
        ctrl_alt_del();
}

static void fn_compose(struct vc_data *vc)
{
        dead_key_next = true;
}

static void fn_spawn_con(struct vc_data *vc)
{
        spin_lock(&vt_spawn_con.lock);
        if (vt_spawn_con.pid)
                if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) {
                        put_pid(vt_spawn_con.pid);
                        vt_spawn_con.pid = NULL;
                }
        spin_unlock(&vt_spawn_con.lock);
}

static void fn_SAK(struct vc_data *vc)
{
        struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
        schedule_work(SAK_work);
}

static void fn_null(struct vc_data *vc)
{
        do_compute_shiftstate();
}

/*
 * Special key handlers
 */
static void k_ignore(struct vc_data *vc, unsigned char value, char up_flag)
{
}

static void k_spec(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag)
                return;
        if (value >= ARRAY_SIZE(fn_handler))
                return;
        if ((kbd->kbdmode == VC_RAW ||
             kbd->kbdmode == VC_MEDIUMRAW ||
             kbd->kbdmode == VC_OFF) &&
             value != KVAL(K_SAK))
                return;                /* SAK is allowed even in raw mode */
        fn_handler[value](vc);
}

static void k_lowercase(struct vc_data *vc, unsigned char value, char up_flag)
{
        pr_err("k_lowercase was called - impossible\n");
}

static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag)
{
        if (up_flag)
                return;                /* no action, if this is a key release */

        if (diacr)
                value = handle_diacr(vc, value);

        if (dead_key_next) {
                dead_key_next = false;
                diacr = value;
                return;
        }
        if (kbd->kbdmode == VC_UNICODE)
                to_utf8(vc, value);
        else {
                int c = conv_uni_to_8bit(value);
                if (c != -1)
                        put_queue(vc, c);
        }
}

/*
 * Handle dead key. Note that we now may have several
 * dead keys modifying the same character. Very useful
 * for Vietnamese.
 */
static void k_deadunicode(struct vc_data *vc, unsigned int value, char up_flag)
{
        if (up_flag)
                return;

        diacr = (diacr ? handle_diacr(vc, value) : value);
}

static void k_self(struct vc_data *vc, unsigned char value, char up_flag)
{
        k_unicode(vc, conv_8bit_to_uni(value), up_flag);
}

static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag)
{
        k_deadunicode(vc, value, up_flag);
}

/*
 * Obsolete - for backwards compatibility only
 */
static void k_dead(struct vc_data *vc, unsigned char value, char up_flag)
{
        static const unsigned char ret_diacr[NR_DEAD] = {
                '`',        /* dead_grave */
                '\'',        /* dead_acute */
                '^',        /* dead_circumflex */
                '~',        /* dead_tilda */
                '"',        /* dead_diaeresis */
                ',',        /* dead_cedilla */
                '_',        /* dead_macron */
                'U',        /* dead_breve */
                '.',        /* dead_abovedot */
                '*',        /* dead_abovering */
                '=',        /* dead_doubleacute */
                'c',        /* dead_caron */
                'k',        /* dead_ogonek */
                'i',        /* dead_iota */
                '#',        /* dead_voiced_sound */
                'o',        /* dead_semivoiced_sound */
                '!',        /* dead_belowdot */
                '?',        /* dead_hook */
                '+',        /* dead_horn */
                '-',        /* dead_stroke */
                ')',        /* dead_abovecomma */
                '(',        /* dead_abovereversedcomma */
                ':',        /* dead_doublegrave */
                'n',        /* dead_invertedbreve */
                ';',        /* dead_belowcomma */
                '$',        /* dead_currency */
                '@',        /* dead_greek */
        };

        k_deadunicode(vc, ret_diacr[value], up_flag);
}

static void k_cons(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag)
                return;

        set_console(value);
}

static void k_fn(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag)
                return;

        if ((unsigned)value < ARRAY_SIZE(func_table)) {
                unsigned long flags;

                spin_lock_irqsave(&func_buf_lock, flags);
                if (func_table[value])
                        puts_queue(vc, func_table[value]);
                spin_unlock_irqrestore(&func_buf_lock, flags);

        } else
                pr_err("k_fn called with value=%d\n", value);
}

static void k_cur(struct vc_data *vc, unsigned char value, char up_flag)
{
        static const char cur_chars[] = "BDCA";

        if (up_flag)
                return;

        applkey(vc, cur_chars[value], vc_kbd_mode(kbd, VC_CKMODE));
}

static void k_pad(struct vc_data *vc, unsigned char value, char up_flag)
{
        static const char pad_chars[] = "0123456789+-*/\015,.?()#";
        static const char app_map[] = "pqrstuvwxylSRQMnnmPQS";

        if (up_flag)
                return;                /* no action, if this is a key release */

        /* kludge... shift forces cursor/number keys */
        if (vc_kbd_mode(kbd, VC_APPLIC) && !shift_down[KG_SHIFT]) {
                applkey(vc, app_map[value], 1);
                return;
        }

        if (!vc_kbd_led(kbd, VC_NUMLOCK)) {

                switch (value) {
                case KVAL(K_PCOMMA):
                case KVAL(K_PDOT):
                        k_fn(vc, KVAL(K_REMOVE), 0);
                        return;
                case KVAL(K_P0):
                        k_fn(vc, KVAL(K_INSERT), 0);
                        return;
                case KVAL(K_P1):
                        k_fn(vc, KVAL(K_SELECT), 0);
                        return;
                case KVAL(K_P2):
                        k_cur(vc, KVAL(K_DOWN), 0);
                        return;
                case KVAL(K_P3):
                        k_fn(vc, KVAL(K_PGDN), 0);
                        return;
                case KVAL(K_P4):
                        k_cur(vc, KVAL(K_LEFT), 0);
                        return;
                case KVAL(K_P6):
                        k_cur(vc, KVAL(K_RIGHT), 0);
                        return;
                case KVAL(K_P7):
                        k_fn(vc, KVAL(K_FIND), 0);
                        return;
                case KVAL(K_P8):
                        k_cur(vc, KVAL(K_UP), 0);
                        return;
                case KVAL(K_P9):
                        k_fn(vc, KVAL(K_PGUP), 0);
                        return;
                case KVAL(K_P5):
                        applkey(vc, 'G', vc_kbd_mode(kbd, VC_APPLIC));
                        return;
                }
        }

        put_queue(vc, pad_chars[value]);
        if (value == KVAL(K_PENTER) && vc_kbd_mode(kbd, VC_CRLF))
                put_queue(vc, '\n');
}

static void k_shift(struct vc_data *vc, unsigned char value, char up_flag)
{
        int old_state = shift_state;

        if (rep)
                return;
        /*
         * Mimic typewriter:
         * a CapsShift key acts like Shift but undoes CapsLock
         */
        if (value == KVAL(K_CAPSSHIFT)) {
                value = KVAL(K_SHIFT);
                if (!up_flag)
                        clr_vc_kbd_led(kbd, VC_CAPSLOCK);
        }

        if (up_flag) {
                /*
                 * handle the case that two shift or control
                 * keys are depressed simultaneously
                 */
                if (shift_down[value])
                        shift_down[value]--;
        } else
                shift_down[value]++;

        if (shift_down[value])
                shift_state |= BIT(value);
        else
                shift_state &= ~BIT(value);

        /* kludge */
        if (up_flag && shift_state != old_state && npadch_active) {
                if (kbd->kbdmode == VC_UNICODE)
                        to_utf8(vc, npadch_value);
                else
                        put_queue(vc, npadch_value & 0xff);
                npadch_active = false;
        }
}

static void k_meta(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag)
                return;

        if (vc_kbd_mode(kbd, VC_META)) {
                put_queue(vc, '\033');
                put_queue(vc, value);
        } else
                put_queue(vc, value | BIT(7));
}

static void k_ascii(struct vc_data *vc, unsigned char value, char up_flag)
{
        unsigned int base;

        if (up_flag)
                return;

        if (value < 10) {
                /* decimal input of code, while Alt depressed */
                base = 10;
        } else {
                /* hexadecimal input of code, while AltGr depressed */
                value -= 10;
                base = 16;
        }

        if (!npadch_active) {
                npadch_value = 0;
                npadch_active = true;
        }

        npadch_value = npadch_value * base + value;
}

static void k_lock(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag || rep)
                return;

        chg_vc_kbd_lock(kbd, value);
}

static void k_slock(struct vc_data *vc, unsigned char value, char up_flag)
{
        k_shift(vc, value, up_flag);
        if (up_flag || rep)
                return;

        chg_vc_kbd_slock(kbd, value);
        /* try to make Alt, oops, AltGr and such work */
        if (!key_maps[kbd->lockstate ^ kbd->slockstate]) {
                kbd->slockstate = 0;
                chg_vc_kbd_slock(kbd, value);
        }
}

/* by default, 300ms interval for combination release */
static unsigned brl_timeout = 300;
MODULE_PARM_DESC(brl_timeout, "Braille keys release delay in ms (0 for commit on first key release)");
module_param(brl_timeout, uint, 0644);

static unsigned brl_nbchords = 1;
MODULE_PARM_DESC(brl_nbchords, "Number of chords that produce a braille pattern (0 for dead chords)");
module_param(brl_nbchords, uint, 0644);

static void k_brlcommit(struct vc_data *vc, unsigned int pattern, char up_flag)
{
        static unsigned long chords;
        static unsigned committed;

        if (!brl_nbchords)
                k_deadunicode(vc, BRL_UC_ROW | pattern, up_flag);
        else {
                committed |= pattern;
                chords++;
                if (chords == brl_nbchords) {
                        k_unicode(vc, BRL_UC_ROW | committed, up_flag);
                        chords = 0;
                        committed = 0;
                }
        }
}

static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
{
        static unsigned pressed, committing;
        static unsigned long releasestart;

        if (kbd->kbdmode != VC_UNICODE) {
                if (!up_flag)
                        pr_warn("keyboard mode must be unicode for braille patterns\n");
                return;
        }

        if (!value) {
                k_unicode(vc, BRL_UC_ROW, up_flag);
                return;
        }

        if (value > 8)
                return;

        if (!up_flag) {
                pressed |= BIT(value - 1);
                if (!brl_timeout)
                        committing = pressed;
        } else if (brl_timeout) {
                if (!committing ||
                    time_after(jiffies,
                               releasestart + msecs_to_jiffies(brl_timeout))) {
                        committing = pressed;
                        releasestart = jiffies;
                }
                pressed &= ~BIT(value - 1);
                if (!pressed && committing) {
                        k_brlcommit(vc, committing, 0);
                        committing = 0;
                }
        } else {
                if (committing) {
                        k_brlcommit(vc, committing, 0);
                        committing = 0;
                }
                pressed &= ~BIT(value - 1);
        }
}

#if IS_ENABLED(CONFIG_INPUT_LEDS) && IS_ENABLED(CONFIG_LEDS_TRIGGERS)

struct kbd_led_trigger {
        struct led_trigger trigger;
        unsigned int mask;
};

static int kbd_led_trigger_activate(struct led_classdev *cdev)
{
        struct kbd_led_trigger *trigger =
                container_of(cdev->trigger, struct kbd_led_trigger, trigger);

        tasklet_disable(&keyboard_tasklet);
        if (ledstate != -1U)
                led_trigger_event(&trigger->trigger,
                                  ledstate & trigger->mask ?
                                        LED_FULL : LED_OFF);
        tasklet_enable(&keyboard_tasklet);

        return 0;
}

#define KBD_LED_TRIGGER(_led_bit, _name) {                        \
                .trigger = {                                        \
                        .name = _name,                                \
                        .activate = kbd_led_trigger_activate,        \
                },                                                \
                .mask        = BIT(_led_bit),                        \
        }

#define KBD_LOCKSTATE_TRIGGER(_led_bit, _name)                \
        KBD_LED_TRIGGER((_led_bit) + 8, _name)

static struct kbd_led_trigger kbd_led_triggers[] = {
        KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"),
        KBD_LED_TRIGGER(VC_NUMLOCK,   "kbd-numlock"),
        KBD_LED_TRIGGER(VC_CAPSLOCK,  "kbd-capslock"),
        KBD_LED_TRIGGER(VC_KANALOCK,  "kbd-kanalock"),

        KBD_LOCKSTATE_TRIGGER(VC_SHIFTLOCK,  "kbd-shiftlock"),
        KBD_LOCKSTATE_TRIGGER(VC_ALTGRLOCK,  "kbd-altgrlock"),
        KBD_LOCKSTATE_TRIGGER(VC_CTRLLOCK,   "kbd-ctrllock"),
        KBD_LOCKSTATE_TRIGGER(VC_ALTLOCK,    "kbd-altlock"),
        KBD_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
        KBD_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
        KBD_LOCKSTATE_TRIGGER(VC_CTRLLLOCK,  "kbd-ctrlllock"),
        KBD_LOCKSTATE_TRIGGER(VC_CTRLRLOCK,  "kbd-ctrlrlock"),
};

static void kbd_propagate_led_state(unsigned int old_state,
                                    unsigned int new_state)
{
        struct kbd_led_trigger *trigger;
        unsigned int changed = old_state ^ new_state;
        int i;

        for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) {
                trigger = &kbd_led_triggers[i];

                if (changed & trigger->mask)
                        led_trigger_event(&trigger->trigger,
                                          new_state & trigger->mask ?
                                                LED_FULL : LED_OFF);
        }
}

static int kbd_update_leds_helper(struct input_handle *handle, void *data)
{
        unsigned int led_state = *(unsigned int *)data;

        if (test_bit(EV_LED, handle->dev->evbit))
                kbd_propagate_led_state(~led_state, led_state);

        return 0;
}

static void kbd_init_leds(void)
{
        int error;
        int i;

        for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); i++) {
                error = led_trigger_register(&kbd_led_triggers[i].trigger);
                if (error)
                        pr_err("error %d while registering trigger %s\n",
                               error, kbd_led_triggers[i].trigger.name);
        }
}

#else

static int kbd_update_leds_helper(struct input_handle *handle, void *data)
{
        unsigned int leds = *(unsigned int *)data;

        if (test_bit(EV_LED, handle->dev->evbit)) {
                input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & BIT(0)));
                input_inject_event(handle, EV_LED, LED_NUML,    !!(leds & BIT(1)));
                input_inject_event(handle, EV_LED, LED_CAPSL,   !!(leds & BIT(2)));
                input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
        }

        return 0;
}

static void kbd_propagate_led_state(unsigned int old_state,
                                    unsigned int new_state)
{
        input_handler_for_each_handle(&kbd_handler, &new_state,
                                      kbd_update_leds_helper);
}

static void kbd_init_leds(void)
{
}

#endif

/*
 * The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
 * or (ii) whatever pattern of lights people want to show using KDSETLED,
 * or (iii) specified bits of specified words in kernel memory.
 */
static unsigned char getledstate(void)
{
        return ledstate & 0xff;
}

void setledstate(struct kbd_struct *kb, unsigned int led)
{
        unsigned long flags;
        spin_lock_irqsave(&led_lock, flags);
        if (!(led & ~7)) {
                ledioctl = led;
                kb->ledmode = LED_SHOW_IOCTL;
        } else
                kb->ledmode = LED_SHOW_FLAGS;

        set_leds();
        spin_unlock_irqrestore(&led_lock, flags);
}

static inline unsigned char getleds(void)
{
        struct kbd_struct *kb = kbd_table + fg_console;

        if (kb->ledmode == LED_SHOW_IOCTL)
                return ledioctl;

        return kb->ledflagstate;
}

/**
 *        vt_get_leds        -        helper for braille console
 *        @console: console to read
 *        @flag: flag we want to check
 *
 *        Check the status of a keyboard led flag and report it back
 */
int vt_get_leds(unsigned int console, int flag)
{
        struct kbd_struct *kb = &kbd_table[console];
        int ret;
        unsigned long flags;

        spin_lock_irqsave(&led_lock, flags);
        ret = vc_kbd_led(kb, flag);
        spin_unlock_irqrestore(&led_lock, flags);

        return ret;
}
EXPORT_SYMBOL_GPL(vt_get_leds);

/**
 *        vt_set_led_state        -        set LED state of a console
 *        @console: console to set
 *        @leds: LED bits
 *
 *        Set the LEDs on a console. This is a wrapper for the VT layer
 *        so that we can keep kbd knowledge internal
 */
void vt_set_led_state(unsigned int console, int leds)
{
        struct kbd_struct *kb = &kbd_table[console];
        setledstate(kb, leds);
}

/**
 *        vt_kbd_con_start        -        Keyboard side of console start
 *        @console: console
 *
 *        Handle console start. This is a wrapper for the VT layer
 *        so that we can keep kbd knowledge internal
 *
 *        FIXME: We eventually need to hold the kbd lock here to protect
 *        the LED updating. We can't do it yet because fn_hold calls stop_tty
 *        and start_tty under the kbd_event_lock, while normal tty paths
 *        don't hold the lock. We probably need to split out an LED lock
 *        but not during an -rc release!
 */
void vt_kbd_con_start(unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;
        spin_lock_irqsave(&led_lock, flags);
        clr_vc_kbd_led(kb, VC_SCROLLOCK);
        set_leds();
        spin_unlock_irqrestore(&led_lock, flags);
}

/**
 *        vt_kbd_con_stop                -        Keyboard side of console stop
 *        @console: console
 *
 *        Handle console stop. This is a wrapper for the VT layer
 *        so that we can keep kbd knowledge internal
 */
void vt_kbd_con_stop(unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;
        spin_lock_irqsave(&led_lock, flags);
        set_vc_kbd_led(kb, VC_SCROLLOCK);
        set_leds();
        spin_unlock_irqrestore(&led_lock, flags);
}

/*
 * This is the tasklet that updates LED state of LEDs using standard
 * keyboard triggers. The reason we use tasklet is that we need to
 * handle the scenario when keyboard handler is not registered yet
 * but we already getting updates from the VT to update led state.
 */
static void kbd_bh(struct tasklet_struct *unused)
{
        unsigned int leds;
        unsigned long flags;

        spin_lock_irqsave(&led_lock, flags);
        leds = getleds();
        leds |= (unsigned int)kbd->lockstate << 8;
        spin_unlock_irqrestore(&led_lock, flags);

        if (vt_switch) {
                ledstate = ~leds;
                vt_switch = false;
        }

        if (leds != ledstate) {
                kbd_propagate_led_state(ledstate, leds);
                ledstate = leds;
        }
}

#if defined(CONFIG_X86) || defined(CONFIG_ALPHA) ||\
    defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\
    defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\
    (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC))

static inline bool kbd_is_hw_raw(const struct input_dev *dev)
{
        if (!test_bit(EV_MSC, dev->evbit) || !test_bit(MSC_RAW, dev->mscbit))
                return false;

        return dev->id.bustype == BUS_I8042 &&
                dev->id.vendor == 0x0001 && dev->id.product == 0x0001;
}

static const unsigned short x86_keycodes[256] =
        { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
         32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
         64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
         80, 81, 82, 83, 84,118, 86, 87, 88,115,120,119,121,112,123, 92,
        284,285,309,  0,312, 91,327,328,329,331,333,335,336,337,338,339,
        367,288,302,304,350, 89,334,326,267,126,268,269,125,347,348,349,
        360,261,262,263,268,376,100,101,321,316,373,286,289,102,351,355,
        103,104,105,275,287,279,258,106,274,107,294,364,358,363,362,361,
        291,108,381,281,290,272,292,305,280, 99,112,257,306,359,113,114,
        264,117,271,374,379,265,266, 93, 94, 95, 85,259,375,260, 90,116,
        377,109,111,277,278,282,283,295,296,297,299,300,301,293,303,307,
        308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330,
        332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 };

#ifdef CONFIG_SPARC
static int sparc_l1_a_state;
extern void sun_do_break(void);
#endif

static int emulate_raw(struct vc_data *vc, unsigned int keycode,
                       unsigned char up_flag)
{
        int code;

        switch (keycode) {

        case KEY_PAUSE:
                put_queue(vc, 0xe1);
                put_queue(vc, 0x1d | up_flag);
                put_queue(vc, 0x45 | up_flag);
                break;

        case KEY_HANGEUL:
                if (!up_flag)
                        put_queue(vc, 0xf2);
                break;

        case KEY_HANJA:
                if (!up_flag)
                        put_queue(vc, 0xf1);
                break;

        case KEY_SYSRQ:
                /*
                 * Real AT keyboards (that's what we're trying
                 * to emulate here) emit 0xe0 0x2a 0xe0 0x37 when
                 * pressing PrtSc/SysRq alone, but simply 0x54
                 * when pressing Alt+PrtSc/SysRq.
                 */
                if (test_bit(KEY_LEFTALT, key_down) ||
                    test_bit(KEY_RIGHTALT, key_down)) {
                        put_queue(vc, 0x54 | up_flag);
                } else {
                        put_queue(vc, 0xe0);
                        put_queue(vc, 0x2a | up_flag);
                        put_queue(vc, 0xe0);
                        put_queue(vc, 0x37 | up_flag);
                }
                break;

        default:
                if (keycode > 255)
                        return -1;

                code = x86_keycodes[keycode];
                if (!code)
                        return -1;

                if (code & 0x100)
                        put_queue(vc, 0xe0);
                put_queue(vc, (code & 0x7f) | up_flag);

                break;
        }

        return 0;
}

#else

static inline bool kbd_is_hw_raw(const struct input_dev *dev)
{
        return false;
}

static int emulate_raw(struct vc_data *vc, unsigned int keycode, unsigned char up_flag)
{
        if (keycode > 127)
                return -1;

        put_queue(vc, keycode | up_flag);
        return 0;
}
#endif

static void kbd_rawcode(unsigned char data)
{
        struct vc_data *vc = vc_cons[fg_console].d;

        kbd = &kbd_table[vc->vc_num];
        if (kbd->kbdmode == VC_RAW)
                put_queue(vc, data);
}

static void kbd_keycode(unsigned int keycode, int down, bool hw_raw)
{
        struct vc_data *vc = vc_cons[fg_console].d;
        unsigned short keysym, *key_map;
        unsigned char type;
        bool raw_mode;
        struct tty_struct *tty;
        int shift_final;
        struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
        int rc;

        tty = vc->port.tty;

        if (tty && (!tty->driver_data)) {
                /* No driver data? Strange. Okay we fix it then. */
                tty->driver_data = vc;
        }

        kbd = &kbd_table[vc->vc_num];

#ifdef CONFIG_SPARC
        if (keycode == KEY_STOP)
                sparc_l1_a_state = down;
#endif

        rep = (down == 2);

        raw_mode = (kbd->kbdmode == VC_RAW);
        if (raw_mode && !hw_raw)
                if (emulate_raw(vc, keycode, !down << 7))
                        if (keycode < BTN_MISC && printk_ratelimit())
                                pr_warn("can't emulate rawmode for keycode %d\n",
                                        keycode);

#ifdef CONFIG_SPARC
        if (keycode == KEY_A && sparc_l1_a_state) {
                sparc_l1_a_state = false;
                sun_do_break();
        }
#endif

        if (kbd->kbdmode == VC_MEDIUMRAW) {
                /*
                 * This is extended medium raw mode, with keys above 127
                 * encoded as 0, high 7 bits, low 7 bits, with the 0 bearing
                 * the 'up' flag if needed. 0 is reserved, so this shouldn't
                 * interfere with anything else. The two bytes after 0 will
                 * always have the up flag set not to interfere with older
                 * applications. This allows for 16384 different keycodes,
                 * which should be enough.
                 */
                if (keycode < 128) {
                        put_queue(vc, keycode | (!down << 7));
                } else {
                        put_queue(vc, !down << 7);
                        put_queue(vc, (keycode >> 7) | BIT(7));
                        put_queue(vc, keycode | BIT(7));
                }
                raw_mode = true;
        }

        assign_bit(keycode, key_down, down);

        if (rep &&
            (!vc_kbd_mode(kbd, VC_REPEAT) ||
             (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) {
                /*
                 * Don't repeat a key if the input buffers are not empty and the
                 * characters get aren't echoed locally. This makes key repeat
                 * usable with slow applications and under heavy loads.
                 */
                return;
        }

        param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
        param.ledstate = kbd->ledflagstate;
        key_map = key_maps[shift_final];

        rc = atomic_notifier_call_chain(&keyboard_notifier_list,
                                        KBD_KEYCODE, &param);
        if (rc == NOTIFY_STOP || !key_map) {
                atomic_notifier_call_chain(&keyboard_notifier_list,
                                           KBD_UNBOUND_KEYCODE, &param);
                do_compute_shiftstate();
                kbd->slockstate = 0;
                return;
        }

        if (keycode < NR_KEYS)
                keysym = key_map[keycode];
        else if (keycode >= KEY_BRL_DOT1 && keycode <= KEY_BRL_DOT8)
                keysym = U(K(KT_BRL, keycode - KEY_BRL_DOT1 + 1));
        else
                return;

        type = KTYP(keysym);

        if (type < 0xf0) {
                param.value = keysym;
                rc = atomic_notifier_call_chain(&keyboard_notifier_list,
                                                KBD_UNICODE, &param);
                if (rc != NOTIFY_STOP)
                        if (down && !raw_mode)
                                k_unicode(vc, keysym, !down);
                return;
        }

        type -= 0xf0;

        if (type == KT_LETTER) {
                type = KT_LATIN;
                if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
                        key_map = key_maps[shift_final ^ BIT(KG_SHIFT)];
                        if (key_map)
                                keysym = key_map[keycode];
                }
        }

        param.value = keysym;
        rc = atomic_notifier_call_chain(&keyboard_notifier_list,
                                        KBD_KEYSYM, &param);
        if (rc == NOTIFY_STOP)
                return;

        if ((raw_mode || kbd->kbdmode == VC_OFF) && type != KT_SPEC && type != KT_SHIFT)
                return;

        (*k_handler[type])(vc, keysym & 0xff, !down);

        param.ledstate = kbd->ledflagstate;
        atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);

        if (type != KT_SLOCK)
                kbd->slockstate = 0;
}

static void kbd_event(struct input_handle *handle, unsigned int event_type,
                      unsigned int event_code, int value)
{
        /* We are called with interrupts disabled, just take the lock */
        spin_lock(&kbd_event_lock);

        if (event_type == EV_MSC && event_code == MSC_RAW &&
                        kbd_is_hw_raw(handle->dev))
                kbd_rawcode(value);
        if (event_type == EV_KEY && event_code <= KEY_MAX)
                kbd_keycode(event_code, value, kbd_is_hw_raw(handle->dev));

        spin_unlock(&kbd_event_lock);

        tasklet_schedule(&keyboard_tasklet);
        do_poke_blanked_console = 1;
        schedule_console_callback();
}

static bool kbd_match(struct input_handler *handler, struct input_dev *dev)
{
        if (test_bit(EV_SND, dev->evbit))
                return true;

        if (test_bit(EV_KEY, dev->evbit)) {
                if (find_next_bit(dev->keybit, BTN_MISC, KEY_RESERVED) <
                                BTN_MISC)
                        return true;
                if (find_next_bit(dev->keybit, KEY_BRL_DOT10 + 1,
                                        KEY_BRL_DOT1) <= KEY_BRL_DOT10)
                        return true;
        }

        return false;
}

/*
 * When a keyboard (or other input device) is found, the kbd_connect
 * function is called. The function then looks at the device, and if it
 * likes it, it can open it and get events from it. In this (kbd_connect)
 * function, we should decide which VT to bind that keyboard to initially.
 */
static int kbd_connect(struct input_handler *handler, struct input_dev *dev,
                        const struct input_device_id *id)
{
        struct input_handle *handle;
        int error;

        handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
        if (!handle)
                return -ENOMEM;

        handle->dev = dev;
        handle->handler = handler;
        handle->name = "kbd";

        error = input_register_handle(handle);
        if (error)
                goto err_free_handle;

        error = input_open_device(handle);
        if (error)
                goto err_unregister_handle;

        return 0;

 err_unregister_handle:
        input_unregister_handle(handle);
 err_free_handle:
        kfree(handle);
        return error;
}

static void kbd_disconnect(struct input_handle *handle)
{
        input_close_device(handle);
        input_unregister_handle(handle);
        kfree(handle);
}

/*
 * Start keyboard handler on the new keyboard by refreshing LED state to
 * match the rest of the system.
 */
static void kbd_start(struct input_handle *handle)
{
        tasklet_disable(&keyboard_tasklet);

        if (ledstate != -1U)
                kbd_update_leds_helper(handle, &ledstate);

        tasklet_enable(&keyboard_tasklet);
}

static const struct input_device_id kbd_ids[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
                .evbit = { BIT_MASK(EV_KEY) },
        },

        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
                .evbit = { BIT_MASK(EV_SND) },
        },

        { },    /* Terminating entry */
};

MODULE_DEVICE_TABLE(input, kbd_ids);

static struct input_handler kbd_handler = {
        .event                = kbd_event,
        .match                = kbd_match,
        .connect        = kbd_connect,
        .disconnect        = kbd_disconnect,
        .start                = kbd_start,
        .name                = "kbd",
        .id_table        = kbd_ids,
};

int __init kbd_init(void)
{
        int i;
        int error;

        for (i = 0; i < MAX_NR_CONSOLES; i++) {
                kbd_table[i].ledflagstate = kbd_defleds();
                kbd_table[i].default_ledflagstate = kbd_defleds();
                kbd_table[i].ledmode = LED_SHOW_FLAGS;
                kbd_table[i].lockstate = KBD_DEFLOCK;
                kbd_table[i].slockstate = 0;
                kbd_table[i].modeflags = KBD_DEFMODE;
                kbd_table[i].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE;
        }

        kbd_init_leds();

        error = input_register_handler(&kbd_handler);
        if (error)
                return error;

        tasklet_enable(&keyboard_tasklet);
        tasklet_schedule(&keyboard_tasklet);

        return 0;
}

/* Ioctl support code */

/**
 *        vt_do_diacrit                -        diacritical table updates
 *        @cmd: ioctl request
 *        @udp: pointer to user data for ioctl
 *        @perm: permissions check computed by caller
 *
 *        Update the diacritical tables atomically and safely. Lock them
 *        against simultaneous keypresses
 */
int vt_do_diacrit(unsigned int cmd, void __user *udp, int perm)
{
        unsigned long flags;
        int asize;
        int ret = 0;

        switch (cmd) {
        case KDGKBDIACR:
        {
                struct kbdiacrs __user *a = udp;
                struct kbdiacr *dia;
                int i;

                dia = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacr),
                                                                GFP_KERNEL);
                if (!dia)
                        return -ENOMEM;

                /* Lock the diacriticals table, make a copy and then
                   copy it after we unlock */
                spin_lock_irqsave(&kbd_event_lock, flags);

                asize = accent_table_size;
                for (i = 0; i < asize; i++) {
                        dia[i].diacr = conv_uni_to_8bit(
                                                accent_table[i].diacr);
                        dia[i].base = conv_uni_to_8bit(
                                                accent_table[i].base);
                        dia[i].result = conv_uni_to_8bit(
                                                accent_table[i].result);
                }
                spin_unlock_irqrestore(&kbd_event_lock, flags);

                if (put_user(asize, &a->kb_cnt))
                        ret = -EFAULT;
                else  if (copy_to_user(a->kbdiacr, dia,
                                asize * sizeof(struct kbdiacr)))
                        ret = -EFAULT;
                kfree(dia);
                return ret;
        }
        case KDGKBDIACRUC:
        {
                struct kbdiacrsuc __user *a = udp;
                void *buf;

                buf = kmalloc_array(MAX_DIACR, sizeof(struct kbdiacruc),
                                                                GFP_KERNEL);
                if (buf == NULL)
                        return -ENOMEM;

                /* Lock the diacriticals table, make a copy and then
                   copy it after we unlock */
                spin_lock_irqsave(&kbd_event_lock, flags);

                asize = accent_table_size;
                memcpy(buf, accent_table, asize * sizeof(struct kbdiacruc));

                spin_unlock_irqrestore(&kbd_event_lock, flags);

                if (put_user(asize, &a->kb_cnt))
                        ret = -EFAULT;
                else if (copy_to_user(a->kbdiacruc, buf,
                                asize*sizeof(struct kbdiacruc)))
                        ret = -EFAULT;
                kfree(buf);
                return ret;
        }

        case KDSKBDIACR:
        {
                struct kbdiacrs __user *a = udp;
                struct kbdiacr *dia = NULL;
                unsigned int ct;
                int i;

                if (!perm)
                        return -EPERM;
                if (get_user(ct, &a->kb_cnt))
                        return -EFAULT;
                if (ct >= MAX_DIACR)
                        return -EINVAL;

                if (ct) {
                        dia = memdup_array_user(a->kbdiacr,
                                                ct, sizeof(struct kbdiacr));
                        if (IS_ERR(dia))
                                return PTR_ERR(dia);
                }

                spin_lock_irqsave(&kbd_event_lock, flags);
                accent_table_size = ct;
                for (i = 0; i < ct; i++) {
                        accent_table[i].diacr =
                                        conv_8bit_to_uni(dia[i].diacr);
                        accent_table[i].base =
                                        conv_8bit_to_uni(dia[i].base);
                        accent_table[i].result =
                                        conv_8bit_to_uni(dia[i].result);
                }
                spin_unlock_irqrestore(&kbd_event_lock, flags);
                kfree(dia);
                return 0;
        }

        case KDSKBDIACRUC:
        {
                struct kbdiacrsuc __user *a = udp;
                unsigned int ct;
                void *buf = NULL;

                if (!perm)
                        return -EPERM;

                if (get_user(ct, &a->kb_cnt))
                        return -EFAULT;

                if (ct >= MAX_DIACR)
                        return -EINVAL;

                if (ct) {
                        buf = memdup_array_user(a->kbdiacruc,
                                                ct, sizeof(struct kbdiacruc));
                        if (IS_ERR(buf))
                                return PTR_ERR(buf);
                } 
                spin_lock_irqsave(&kbd_event_lock, flags);
                if (ct)
                        memcpy(accent_table, buf,
                                        ct * sizeof(struct kbdiacruc));
                accent_table_size = ct;
                spin_unlock_irqrestore(&kbd_event_lock, flags);
                kfree(buf);
                return 0;
        }
        }
        return ret;
}

/**
 *        vt_do_kdskbmode                -        set keyboard mode ioctl
 *        @console: the console to use
 *        @arg: the requested mode
 *
 *        Update the keyboard mode bits while holding the correct locks.
 *        Return 0 for success or an error code.
 */
int vt_do_kdskbmode(unsigned int console, unsigned int arg)
{
        struct kbd_struct *kb = &kbd_table[console];
        int ret = 0;
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        switch(arg) {
        case K_RAW:
                kb->kbdmode = VC_RAW;
                break;
        case K_MEDIUMRAW:
                kb->kbdmode = VC_MEDIUMRAW;
                break;
        case K_XLATE:
                kb->kbdmode = VC_XLATE;
                do_compute_shiftstate();
                break;
        case K_UNICODE:
                kb->kbdmode = VC_UNICODE;
                do_compute_shiftstate();
                break;
        case K_OFF:
                kb->kbdmode = VC_OFF;
                break;
        default:
                ret = -EINVAL;
        }
        spin_unlock_irqrestore(&kbd_event_lock, flags);
        return ret;
}

/**
 *        vt_do_kdskbmeta                -        set keyboard meta state
 *        @console: the console to use
 *        @arg: the requested meta state
 *
 *        Update the keyboard meta bits while holding the correct locks.
 *        Return 0 for success or an error code.
 */
int vt_do_kdskbmeta(unsigned int console, unsigned int arg)
{
        struct kbd_struct *kb = &kbd_table[console];
        int ret = 0;
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        switch(arg) {
        case K_METABIT:
                clr_vc_kbd_mode(kb, VC_META);
                break;
        case K_ESCPREFIX:
                set_vc_kbd_mode(kb, VC_META);
                break;
        default:
                ret = -EINVAL;
        }
        spin_unlock_irqrestore(&kbd_event_lock, flags);
        return ret;
}

int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc,
                                                                int perm)
{
        struct kbkeycode tmp;
        int kc = 0;

        if (copy_from_user(&tmp, user_kbkc, sizeof(struct kbkeycode)))
                return -EFAULT;
        switch (cmd) {
        case KDGETKEYCODE:
                kc = getkeycode(tmp.scancode);
                if (kc >= 0)
                        kc = put_user(kc, &user_kbkc->keycode);
                break;
        case KDSETKEYCODE:
                if (!perm)
                        return -EPERM;
                kc = setkeycode(tmp.scancode, tmp.keycode);
                break;
        }
        return kc;
}

static unsigned short vt_kdgkbent(unsigned char kbdmode, unsigned char idx,
                unsigned char map)
{
        unsigned short *key_map, val;
        unsigned long flags;

        /* Ensure another thread doesn't free it under us */
        spin_lock_irqsave(&kbd_event_lock, flags);
        key_map = key_maps[map];
        if (key_map) {
                val = U(key_map[idx]);
                if (kbdmode != VC_UNICODE && KTYP(val) >= NR_TYPES)
                        val = K_HOLE;
        } else
                val = idx ? K_HOLE : K_NOSUCHMAP;
        spin_unlock_irqrestore(&kbd_event_lock, flags);

        return val;
}

static int vt_kdskbent(unsigned char kbdmode, unsigned char idx,
                unsigned char map, unsigned short val)
{
        unsigned long flags;
        unsigned short *key_map, *new_map, oldval;

        if (!idx && val == K_NOSUCHMAP) {
                spin_lock_irqsave(&kbd_event_lock, flags);
                /* deallocate map */
                key_map = key_maps[map];
                if (map && key_map) {
                        key_maps[map] = NULL;
                        if (key_map[0] == U(K_ALLOCATED)) {
                                kfree(key_map);
                                keymap_count--;
                        }
                }
                spin_unlock_irqrestore(&kbd_event_lock, flags);

                return 0;
        }

        if (KTYP(val) < NR_TYPES) {
                if (KVAL(val) > max_vals[KTYP(val)])
                        return -EINVAL;
        } else if (kbdmode != VC_UNICODE)
                return -EINVAL;

        /* ++Geert: non-PC keyboards may generate keycode zero */
#if !defined(__mc68000__) && !defined(__powerpc__)
        /* assignment to entry 0 only tests validity of args */
        if (!idx)
                return 0;
#endif

        new_map = kmalloc(sizeof(plain_map), GFP_KERNEL);
        if (!new_map)
                return -ENOMEM;

        spin_lock_irqsave(&kbd_event_lock, flags);
        key_map = key_maps[map];
        if (key_map == NULL) {
                int j;

                if (keymap_count >= MAX_NR_OF_USER_KEYMAPS &&
                    !capable(CAP_SYS_RESOURCE)) {
                        spin_unlock_irqrestore(&kbd_event_lock, flags);
                        kfree(new_map);
                        return -EPERM;
                }
                key_maps[map] = new_map;
                key_map = new_map;
                key_map[0] = U(K_ALLOCATED);
                for (j = 1; j < NR_KEYS; j++)
                        key_map[j] = U(K_HOLE);
                keymap_count++;
        } else
                kfree(new_map);

        oldval = U(key_map[idx]);
        if (val == oldval)
                goto out;

        /* Attention Key */
        if ((oldval == K_SAK || val == K_SAK) && !capable(CAP_SYS_ADMIN)) {
                spin_unlock_irqrestore(&kbd_event_lock, flags);
                return -EPERM;
        }

        key_map[idx] = U(val);
        if (!map && (KTYP(oldval) == KT_SHIFT || KTYP(val) == KT_SHIFT))
                do_compute_shiftstate();
out:
        spin_unlock_irqrestore(&kbd_event_lock, flags);

        return 0;
}

int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm,
                                                unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        struct kbentry kbe;

        if (copy_from_user(&kbe, user_kbe, sizeof(struct kbentry)))
                return -EFAULT;

        switch (cmd) {
        case KDGKBENT:
                return put_user(vt_kdgkbent(kb->kbdmode, kbe.kb_index,
                                        kbe.kb_table),
                                &user_kbe->kb_value);
        case KDSKBENT:
                if (!perm || !capable(CAP_SYS_TTY_CONFIG))
                        return -EPERM;
                return vt_kdskbent(kb->kbdmode, kbe.kb_index, kbe.kb_table,
                                kbe.kb_value);
        }
        return 0;
}

static char *vt_kdskbsent(char *kbs, unsigned char cur)
{
        static DECLARE_BITMAP(is_kmalloc, MAX_NR_FUNC);
        char *cur_f = func_table[cur];

        if (cur_f && strlen(cur_f) >= strlen(kbs)) {
                strcpy(cur_f, kbs);
                return kbs;
        }

        func_table[cur] = kbs;

        return __test_and_set_bit(cur, is_kmalloc) ? cur_f : NULL;
}

int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm)
{
        unsigned char kb_func;
        unsigned long flags;
        char *kbs;
        int ret;

        if (get_user(kb_func, &user_kdgkb->kb_func))
                return -EFAULT;

        kb_func = array_index_nospec(kb_func, MAX_NR_FUNC);

        switch (cmd) {
        case KDGKBSENT: {
                /* size should have been a struct member */
                ssize_t len = sizeof(user_kdgkb->kb_string);

                kbs = kmalloc(len, GFP_KERNEL);
                if (!kbs)
                        return -ENOMEM;

                spin_lock_irqsave(&func_buf_lock, flags);
                len = strscpy(kbs, func_table[kb_func] ? : "", len);
                spin_unlock_irqrestore(&func_buf_lock, flags);

                if (len < 0) {
                        ret = -ENOSPC;
                        break;
                }
                ret = copy_to_user(user_kdgkb->kb_string, kbs, len + 1) ?
                        -EFAULT : 0;
                break;
        }
        case KDSKBSENT:
                if (!perm || !capable(CAP_SYS_TTY_CONFIG))
                        return -EPERM;

                kbs = strndup_user(user_kdgkb->kb_string,
                                sizeof(user_kdgkb->kb_string));
                if (IS_ERR(kbs))
                        return PTR_ERR(kbs);

                spin_lock_irqsave(&func_buf_lock, flags);
                kbs = vt_kdskbsent(kbs, kb_func);
                spin_unlock_irqrestore(&func_buf_lock, flags);

                ret = 0;
                break;
        }

        kfree(kbs);

        return ret;
}

int vt_do_kdskled(unsigned int console, int cmd, unsigned long arg, int perm)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;
        unsigned char ucval;

        switch(cmd) {
        /* the ioctls below read/set the flags usually shown in the leds */
        /* don't use them - they will go away without warning */
        case KDGKBLED:
                spin_lock_irqsave(&kbd_event_lock, flags);
                ucval = kb->ledflagstate | (kb->default_ledflagstate << 4);
                spin_unlock_irqrestore(&kbd_event_lock, flags);
                return put_user(ucval, (char __user *)arg);

        case KDSKBLED:
                if (!perm)
                        return -EPERM;
                if (arg & ~0x77)
                        return -EINVAL;
                spin_lock_irqsave(&led_lock, flags);
                kb->ledflagstate = (arg & 7);
                kb->default_ledflagstate = ((arg >> 4) & 7);
                set_leds();
                spin_unlock_irqrestore(&led_lock, flags);
                return 0;

        /* the ioctls below only set the lights, not the functions */
        /* for those, see KDGKBLED and KDSKBLED above */
        case KDGETLED:
                ucval = getledstate();
                return put_user(ucval, (char __user *)arg);

        case KDSETLED:
                if (!perm)
                        return -EPERM;
                setledstate(kb, arg);
                return 0;
        }
        return -ENOIOCTLCMD;
}

int vt_do_kdgkbmode(unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        /* This is a spot read so needs no locking */
        switch (kb->kbdmode) {
        case VC_RAW:
                return K_RAW;
        case VC_MEDIUMRAW:
                return K_MEDIUMRAW;
        case VC_UNICODE:
                return K_UNICODE;
        case VC_OFF:
                return K_OFF;
        default:
                return K_XLATE;
        }
}

/**
 *        vt_do_kdgkbmeta                -        report meta status
 *        @console: console to report
 *
 *        Report the meta flag status of this console
 */
int vt_do_kdgkbmeta(unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        /* Again a spot read so no locking */
        return vc_kbd_mode(kb, VC_META) ? K_ESCPREFIX : K_METABIT;
}

/**
 *        vt_reset_unicode        -        reset the unicode status
 *        @console: console being reset
 *
 *        Restore the unicode console state to its default
 */
void vt_reset_unicode(unsigned int console)
{
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        kbd_table[console].kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE;
        spin_unlock_irqrestore(&kbd_event_lock, flags);
}

/**
 *        vt_get_shift_state        -        shift bit state
 *
 *        Report the shift bits from the keyboard state. We have to export
 *        this to support some oddities in the vt layer.
 */
int vt_get_shift_state(void)
{
        /* Don't lock as this is a transient report */
        return shift_state;
}

/**
 *        vt_reset_keyboard        -        reset keyboard state
 *        @console: console to reset
 *
 *        Reset the keyboard bits for a console as part of a general console
 *        reset event
 */
void vt_reset_keyboard(unsigned int console)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        set_vc_kbd_mode(kb, VC_REPEAT);
        clr_vc_kbd_mode(kb, VC_CKMODE);
        clr_vc_kbd_mode(kb, VC_APPLIC);
        clr_vc_kbd_mode(kb, VC_CRLF);
        kb->lockstate = 0;
        kb->slockstate = 0;
        spin_lock(&led_lock);
        kb->ledmode = LED_SHOW_FLAGS;
        kb->ledflagstate = kb->default_ledflagstate;
        spin_unlock(&led_lock);
        /* do not do set_leds here because this causes an endless tasklet loop
           when the keyboard hasn't been initialized yet */
        spin_unlock_irqrestore(&kbd_event_lock, flags);
}

/**
 *        vt_get_kbd_mode_bit        -        read keyboard status bits
 *        @console: console to read from
 *        @bit: mode bit to read
 *
 *        Report back a vt mode bit. We do this without locking so the
 *        caller must be sure that there are no synchronization needs
 */

int vt_get_kbd_mode_bit(unsigned int console, int bit)
{
        struct kbd_struct *kb = &kbd_table[console];
        return vc_kbd_mode(kb, bit);
}

/**
 *        vt_set_kbd_mode_bit        -        read keyboard status bits
 *        @console: console to read from
 *        @bit: mode bit to read
 *
 *        Set a vt mode bit. We do this without locking so the
 *        caller must be sure that there are no synchronization needs
 */

void vt_set_kbd_mode_bit(unsigned int console, int bit)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        set_vc_kbd_mode(kb, bit);
        spin_unlock_irqrestore(&kbd_event_lock, flags);
}

/**
 *        vt_clr_kbd_mode_bit        -        read keyboard status bits
 *        @console: console to read from
 *        @bit: mode bit to read
 *
 *        Report back a vt mode bit. We do this without locking so the
 *        caller must be sure that there are no synchronization needs
 */

void vt_clr_kbd_mode_bit(unsigned int console, int bit)
{
        struct kbd_struct *kb = &kbd_table[console];
        unsigned long flags;

        spin_lock_irqsave(&kbd_event_lock, flags);
        clr_vc_kbd_mode(kb, bit);
        spin_unlock_irqrestore(&kbd_event_lock, flags);
}
























    1 
    1 


































































    1 




































































    1 










    1 


    1 



















    1 














    1 









    1 














    1 




    1 


    1 

    1 






















































































































   27 



    1 
    1 
    1 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Force feedback support for Linux input subsystem
 *
 *  Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com>
 *  Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru>
 */

/* #define DEBUG */

#include <linux/input.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>

/*
 * Check that the effect_id is a valid effect and whether the user
 * is the owner
 */
static int check_effect_access(struct ff_device *ff, int effect_id,
                                struct file *file)
{
        if (effect_id < 0 || effect_id >= ff->max_effects ||
            !ff->effect_owners[effect_id])
                return -EINVAL;

        if (file && ff->effect_owners[effect_id] != file)
                return -EACCES;

        return 0;
}

/*
 * Checks whether 2 effects can be combined together
 */
static inline int check_effects_compatible(struct ff_effect *e1,
                                           struct ff_effect *e2)
{
        return e1->type == e2->type &&
               (e1->type != FF_PERIODIC ||
                e1->u.periodic.waveform == e2->u.periodic.waveform);
}

/*
 * Convert an effect into compatible one
 */
static int compat_effect(struct ff_device *ff, struct ff_effect *effect)
{
        int magnitude;

        switch (effect->type) {
        case FF_RUMBLE:
                if (!test_bit(FF_PERIODIC, ff->ffbit))
                        return -EINVAL;

                /*
                 * calculate magnitude of sine wave as average of rumble's
                 * 2/3 of strong magnitude and 1/3 of weak magnitude
                 */
                magnitude = effect->u.rumble.strong_magnitude / 3 +
                            effect->u.rumble.weak_magnitude / 6;

                effect->type = FF_PERIODIC;
                effect->u.periodic.waveform = FF_SINE;
                effect->u.periodic.period = 50;
                effect->u.periodic.magnitude = magnitude;
                effect->u.periodic.offset = 0;
                effect->u.periodic.phase = 0;
                effect->u.periodic.envelope.attack_length = 0;
                effect->u.periodic.envelope.attack_level = 0;
                effect->u.periodic.envelope.fade_length = 0;
                effect->u.periodic.envelope.fade_level = 0;

                return 0;

        default:
                /* Let driver handle conversion */
                return 0;
        }
}

/**
 * input_ff_upload() - upload effect into force-feedback device
 * @dev: input device
 * @effect: effect to be uploaded
 * @file: owner of the effect
 */
int input_ff_upload(struct input_dev *dev, struct ff_effect *effect,
                    struct file *file)
{
        struct ff_device *ff = dev->ff;
        struct ff_effect *old;
        int ret = 0;
        int id;

        if (!test_bit(EV_FF, dev->evbit))
                return -ENOSYS;

        if (effect->type < FF_EFFECT_MIN || effect->type > FF_EFFECT_MAX ||
            !test_bit(effect->type, dev->ffbit)) {
                dev_dbg(&dev->dev, "invalid or not supported effect type in upload\n");
                return -EINVAL;
        }

        if (effect->type == FF_PERIODIC &&
            (effect->u.periodic.waveform < FF_WAVEFORM_MIN ||
             effect->u.periodic.waveform > FF_WAVEFORM_MAX ||
             !test_bit(effect->u.periodic.waveform, dev->ffbit))) {
                dev_dbg(&dev->dev, "invalid or not supported wave form in upload\n");
                return -EINVAL;
        }

        if (!test_bit(effect->type, ff->ffbit)) {
                ret = compat_effect(ff, effect);
                if (ret)
                        return ret;
        }

        mutex_lock(&ff->mutex);

        if (effect->id == -1) {
                for (id = 0; id < ff->max_effects; id++)
                        if (!ff->effect_owners[id])
                                break;

                if (id >= ff->max_effects) {
                        ret = -ENOSPC;
                        goto out;
                }

                effect->id = id;
                old = NULL;

        } else {
                id = effect->id;

                ret = check_effect_access(ff, id, file);
                if (ret)
                        goto out;

                old = &ff->effects[id];

                if (!check_effects_compatible(effect, old)) {
                        ret = -EINVAL;
                        goto out;
                }
        }

        ret = ff->upload(dev, effect, old);
        if (ret)
                goto out;

        spin_lock_irq(&dev->event_lock);
        ff->effects[id] = *effect;
        ff->effect_owners[id] = file;
        spin_unlock_irq(&dev->event_lock);

 out:
        mutex_unlock(&ff->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(input_ff_upload);

/*
 * Erases the effect if the requester is also the effect owner. The mutex
 * should already be locked before calling this function.
 */
static int erase_effect(struct input_dev *dev, int effect_id,
                        struct file *file)
{
        struct ff_device *ff = dev->ff;
        int error;

        error = check_effect_access(ff, effect_id, file);
        if (error)
                return error;

        spin_lock_irq(&dev->event_lock);
        ff->playback(dev, effect_id, 0);
        ff->effect_owners[effect_id] = NULL;
        spin_unlock_irq(&dev->event_lock);

        if (ff->erase) {
                error = ff->erase(dev, effect_id);
                if (error) {
                        spin_lock_irq(&dev->event_lock);
                        ff->effect_owners[effect_id] = file;
                        spin_unlock_irq(&dev->event_lock);

                        return error;
                }
        }

        return 0;
}

/**
 * input_ff_erase - erase a force-feedback effect from device
 * @dev: input device to erase effect from
 * @effect_id: id of the effect to be erased
 * @file: purported owner of the request
 *
 * This function erases a force-feedback effect from specified device.
 * The effect will only be erased if it was uploaded through the same
 * file handle that is requesting erase.
 */
int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
{
        struct ff_device *ff = dev->ff;
        int ret;

        if (!test_bit(EV_FF, dev->evbit))
                return -ENOSYS;

        mutex_lock(&ff->mutex);
        ret = erase_effect(dev, effect_id, file);
        mutex_unlock(&ff->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(input_ff_erase);

/*
 * input_ff_flush - erase all effects owned by a file handle
 * @dev: input device to erase effect from
 * @file: purported owner of the effects
 *
 * This function erases all force-feedback effects associated with
 * the given owner from specified device. Note that @file may be %NULL,
 * in which case all effects will be erased.
 */
int input_ff_flush(struct input_dev *dev, struct file *file)
{
        struct ff_device *ff = dev->ff;
        int i;

        dev_dbg(&dev->dev, "flushing now\n");

        mutex_lock(&ff->mutex);

        for (i = 0; i < ff->max_effects; i++)
                erase_effect(dev, i, file);

        mutex_unlock(&ff->mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(input_ff_flush);

/**
 * input_ff_event() - generic handler for force-feedback events
 * @dev: input device to send the effect to
 * @type: event type (anything but EV_FF is ignored)
 * @code: event code
 * @value: event value
 */
int input_ff_event(struct input_dev *dev, unsigned int type,
                   unsigned int code, int value)
{
        struct ff_device *ff = dev->ff;

        if (type != EV_FF)
                return 0;

        switch (code) {
        case FF_GAIN:
                if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffffU)
                        break;

                ff->set_gain(dev, value);
                break;

        case FF_AUTOCENTER:
                if (!test_bit(FF_AUTOCENTER, dev->ffbit) || value > 0xffffU)
                        break;

                ff->set_autocenter(dev, value);
                break;

        default:
                if (check_effect_access(ff, code, NULL) == 0)
                        ff->playback(dev, code, value);
                break;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(input_ff_event);

/**
 * input_ff_create() - create force-feedback device
 * @dev: input device supporting force-feedback
 * @max_effects: maximum number of effects supported by the device
 *
 * This function allocates all necessary memory for a force feedback
 * portion of an input device and installs all default handlers.
 * @dev->ffbit should be already set up before calling this function.
 * Once ff device is created you need to setup its upload, erase,
 * playback and other handlers before registering input device
 */
int input_ff_create(struct input_dev *dev, unsigned int max_effects)
{
        struct ff_device *ff;
        size_t ff_dev_size;
        int i;

        if (!max_effects) {
                dev_err(&dev->dev, "cannot allocate device without any effects\n");
                return -EINVAL;
        }

        if (max_effects > FF_MAX_EFFECTS) {
                dev_err(&dev->dev, "cannot allocate more than FF_MAX_EFFECTS effects\n");
                return -EINVAL;
        }

        ff_dev_size = sizeof(struct ff_device) +
                                max_effects * sizeof(struct file *);
        if (ff_dev_size < max_effects) /* overflow */
                return -EINVAL;

        ff = kzalloc(ff_dev_size, GFP_KERNEL);
        if (!ff)
                return -ENOMEM;

        ff->effects = kcalloc(max_effects, sizeof(struct ff_effect),
                              GFP_KERNEL);
        if (!ff->effects) {
                kfree(ff);
                return -ENOMEM;
        }

        ff->max_effects = max_effects;
        mutex_init(&ff->mutex);

        dev->ff = ff;
        dev->flush = input_ff_flush;
        dev->event = input_ff_event;
        __set_bit(EV_FF, dev->evbit);

        /* Copy "true" bits into ff device bitmap */
        for_each_set_bit(i, dev->ffbit, FF_CNT)
                __set_bit(i, ff->ffbit);

        /* we can emulate RUMBLE with periodic effects */
        if (test_bit(FF_PERIODIC, ff->ffbit))
                __set_bit(FF_RUMBLE, dev->ffbit);

        return 0;
}
EXPORT_SYMBOL_GPL(input_ff_create);

/**
 * input_ff_destroy() - frees force feedback portion of input device
 * @dev: input device supporting force feedback
 *
 * This function is only needed in error path as input core will
 * automatically free force feedback structures when device is
 * destroyed.
 */
void input_ff_destroy(struct input_dev *dev)
{
        struct ff_device *ff = dev->ff;

        __clear_bit(EV_FF, dev->evbit);
        if (ff) {
                if (ff->destroy)
                        ff->destroy(ff);
                kfree(ff->private);
                kfree(ff->effects);
                kfree(ff);
                dev->ff = NULL;
        }
}
EXPORT_SYMBOL_GPL(input_ff_destroy);































































































































   27 






   27 






   27 
















































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM writeback

#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_WRITEBACK_H

#include <linux/tracepoint.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>

#define show_inode_state(state)                                        \
        __print_flags(state, "|",                                \
                {I_DIRTY_SYNC,                "I_DIRTY_SYNC"},        \
                {I_DIRTY_DATASYNC,        "I_DIRTY_DATASYNC"},        \
                {I_DIRTY_PAGES,                "I_DIRTY_PAGES"},        \
                {I_NEW,                        "I_NEW"},                \
                {I_WILL_FREE,                "I_WILL_FREE"},                \
                {I_FREEING,                "I_FREEING"},                \
                {I_CLEAR,                "I_CLEAR"},                \
                {I_SYNC,                "I_SYNC"},                \
                {I_DIRTY_TIME,                "I_DIRTY_TIME"},        \
                {I_REFERENCED,                "I_REFERENCED"}                \
        )

/* enums need to be exported to user space */
#undef EM
#undef EMe
#define EM(a,b)         TRACE_DEFINE_ENUM(a);
#define EMe(a,b)        TRACE_DEFINE_ENUM(a);

#define WB_WORK_REASON                                                        \
        EM( WB_REASON_BACKGROUND,                "background")                \
        EM( WB_REASON_VMSCAN,                        "vmscan")                \
        EM( WB_REASON_SYNC,                        "sync")                        \
        EM( WB_REASON_PERIODIC,                        "periodic")                \
        EM( WB_REASON_LAPTOP_TIMER,                "laptop_timer")                \
        EM( WB_REASON_FS_FREE_SPACE,                "fs_free_space")        \
        EM( WB_REASON_FORKER_THREAD,                "forker_thread")        \
        EMe(WB_REASON_FOREIGN_FLUSH,                "foreign_flush")

WB_WORK_REASON

/*
 * Now redefine the EM() and EMe() macros to map the enums to the strings
 * that will be printed in the output.
 */
#undef EM
#undef EMe
#define EM(a,b)                { a, b },
#define EMe(a,b)        { a, b }

struct wb_writeback_work;

DECLARE_EVENT_CLASS(writeback_folio_template,

        TP_PROTO(struct folio *folio, struct address_space *mapping),

        TP_ARGS(folio, mapping),

        TP_STRUCT__entry (
                __array(char, name, 32)
                __field(ino_t, ino)
                __field(pgoff_t, index)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name,
                            bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
                                         NULL), 32);
                __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
                __entry->index = folio->index;
        ),

        TP_printk("bdi %s: ino=%lu index=%lu",
                __entry->name,
                (unsigned long)__entry->ino,
                __entry->index
        )
);

DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio,

        TP_PROTO(struct folio *folio, struct address_space *mapping),

        TP_ARGS(folio, mapping)
);

DEFINE_EVENT(writeback_folio_template, folio_wait_writeback,

        TP_PROTO(struct folio *folio, struct address_space *mapping),

        TP_ARGS(folio, mapping)
);

DECLARE_EVENT_CLASS(writeback_dirty_inode_template,

        TP_PROTO(struct inode *inode, int flags),

        TP_ARGS(inode, flags),

        TP_STRUCT__entry (
                __array(char, name, 32)
                __field(ino_t, ino)
                __field(unsigned long, state)
                __field(unsigned long, flags)
        ),

        TP_fast_assign(
                struct backing_dev_info *bdi = inode_to_bdi(inode);

                /* may be called for files on pseudo FSes w/ unregistered bdi */
                strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
                __entry->ino                = inode->i_ino;
                __entry->state                = inode->i_state;
                __entry->flags                = flags;
        ),

        TP_printk("bdi %s: ino=%lu state=%s flags=%s",
                __entry->name,
                (unsigned long)__entry->ino,
                show_inode_state(__entry->state),
                show_inode_state(__entry->flags)
        )
);

DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,

        TP_PROTO(struct inode *inode, int flags),

        TP_ARGS(inode, flags)
);

DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,

        TP_PROTO(struct inode *inode, int flags),

        TP_ARGS(inode, flags)
);

DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,

        TP_PROTO(struct inode *inode, int flags),

        TP_ARGS(inode, flags)
);

#ifdef CREATE_TRACE_POINTS
#ifdef CONFIG_CGROUP_WRITEBACK

static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
        return cgroup_ino(wb->memcg_css->cgroup);
}

static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
        if (wbc->wb)
                return __trace_wb_assign_cgroup(wbc->wb);
        else
                return 1;
}
#else        /* CONFIG_CGROUP_WRITEBACK */

static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
        return 1;
}

static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc)
{
        return 1;
}

#endif        /* CONFIG_CGROUP_WRITEBACK */
#endif        /* CREATE_TRACE_POINTS */

#ifdef CONFIG_CGROUP_WRITEBACK
TRACE_EVENT(inode_foreign_history,

        TP_PROTO(struct inode *inode, struct writeback_control *wbc,
                 unsigned int history),

        TP_ARGS(inode, wbc, history),

        TP_STRUCT__entry(
                __array(char,                name, 32)
                __field(ino_t,                ino)
                __field(ino_t,                cgroup_ino)
                __field(unsigned int,        history)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
                __entry->ino                = inode->i_ino;
                __entry->cgroup_ino        = __trace_wbc_assign_cgroup(wbc);
                __entry->history        = history;
        ),

        TP_printk("bdi %s: ino=%lu cgroup_ino=%lu history=0x%x",
                __entry->name,
                (unsigned long)__entry->ino,
                (unsigned long)__entry->cgroup_ino,
                __entry->history
        )
);

TRACE_EVENT(inode_switch_wbs,

        TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb,
                 struct bdi_writeback *new_wb),

        TP_ARGS(inode, old_wb, new_wb),

        TP_STRUCT__entry(
                __array(char,                name, 32)
                __field(ino_t,                ino)
                __field(ino_t,                old_cgroup_ino)
                __field(ino_t,                new_cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32);
                __entry->ino                = inode->i_ino;
                __entry->old_cgroup_ino        = __trace_wb_assign_cgroup(old_wb);
                __entry->new_cgroup_ino        = __trace_wb_assign_cgroup(new_wb);
        ),

        TP_printk("bdi %s: ino=%lu old_cgroup_ino=%lu new_cgroup_ino=%lu",
                __entry->name,
                (unsigned long)__entry->ino,
                (unsigned long)__entry->old_cgroup_ino,
                (unsigned long)__entry->new_cgroup_ino
        )
);

TRACE_EVENT(track_foreign_dirty,

        TP_PROTO(struct folio *folio, struct bdi_writeback *wb),

        TP_ARGS(folio, wb),

        TP_STRUCT__entry(
                __array(char,                name, 32)
                __field(u64,                bdi_id)
                __field(ino_t,                ino)
                __field(unsigned int,        memcg_id)
                __field(ino_t,                cgroup_ino)
                __field(ino_t,                page_cgroup_ino)
        ),

        TP_fast_assign(
                struct address_space *mapping = folio_mapping(folio);
                struct inode *inode = mapping ? mapping->host : NULL;

                strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->bdi_id                = wb->bdi->id;
                __entry->ino                = inode ? inode->i_ino : 0;
                __entry->memcg_id        = wb->memcg_css->id;
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(wb);
                __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
        ),

        TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
                __entry->name,
                __entry->bdi_id,
                (unsigned long)__entry->ino,
                __entry->memcg_id,
                (unsigned long)__entry->cgroup_ino,
                (unsigned long)__entry->page_cgroup_ino
        )
);

TRACE_EVENT(flush_foreign,

        TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id,
                 unsigned int frn_memcg_id),

        TP_ARGS(wb, frn_bdi_id, frn_memcg_id),

        TP_STRUCT__entry(
                __array(char,                name, 32)
                __field(ino_t,                cgroup_ino)
                __field(unsigned int,        frn_bdi_id)
                __field(unsigned int,        frn_memcg_id)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(wb);
                __entry->frn_bdi_id        = frn_bdi_id;
                __entry->frn_memcg_id        = frn_memcg_id;
        ),

        TP_printk("bdi %s: cgroup_ino=%lu frn_bdi_id=%u frn_memcg_id=%u",
                __entry->name,
                (unsigned long)__entry->cgroup_ino,
                __entry->frn_bdi_id,
                __entry->frn_memcg_id
        )
);
#endif

DECLARE_EVENT_CLASS(writeback_write_inode_template,

        TP_PROTO(struct inode *inode, struct writeback_control *wbc),

        TP_ARGS(inode, wbc),

        TP_STRUCT__entry (
                __array(char, name, 32)
                __field(ino_t, ino)
                __field(int, sync_mode)
                __field(ino_t, cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name,
                            bdi_dev_name(inode_to_bdi(inode)), 32);
                __entry->ino                = inode->i_ino;
                __entry->sync_mode        = wbc->sync_mode;
                __entry->cgroup_ino        = __trace_wbc_assign_cgroup(wbc);
        ),

        TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%lu",
                __entry->name,
                (unsigned long)__entry->ino,
                __entry->sync_mode,
                (unsigned long)__entry->cgroup_ino
        )
);

DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start,

        TP_PROTO(struct inode *inode, struct writeback_control *wbc),

        TP_ARGS(inode, wbc)
);

DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode,

        TP_PROTO(struct inode *inode, struct writeback_control *wbc),

        TP_ARGS(inode, wbc)
);

DECLARE_EVENT_CLASS(writeback_work_class,
        TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work),
        TP_ARGS(wb, work),
        TP_STRUCT__entry(
                __array(char, name, 32)
                __field(long, nr_pages)
                __field(dev_t, sb_dev)
                __field(int, sync_mode)
                __field(int, for_kupdate)
                __field(int, range_cyclic)
                __field(int, for_background)
                __field(int, reason)
                __field(ino_t, cgroup_ino)
        ),
        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->nr_pages = work->nr_pages;
                __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
                __entry->sync_mode = work->sync_mode;
                __entry->for_kupdate = work->for_kupdate;
                __entry->range_cyclic = work->range_cyclic;
                __entry->for_background        = work->for_background;
                __entry->reason = work->reason;
                __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
        ),
        TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
                  "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%lu",
                  __entry->name,
                  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
                  __entry->nr_pages,
                  __entry->sync_mode,
                  __entry->for_kupdate,
                  __entry->range_cyclic,
                  __entry->for_background,
                  __print_symbolic(__entry->reason, WB_WORK_REASON),
                  (unsigned long)__entry->cgroup_ino
        )
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
DEFINE_EVENT(writeback_work_class, name, \
        TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \
        TP_ARGS(wb, work))
DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
DEFINE_WRITEBACK_WORK_EVENT(writeback_written);
DEFINE_WRITEBACK_WORK_EVENT(writeback_wait);

TRACE_EVENT(writeback_pages_written,
        TP_PROTO(long pages_written),
        TP_ARGS(pages_written),
        TP_STRUCT__entry(
                __field(long,                pages)
        ),
        TP_fast_assign(
                __entry->pages                = pages_written;
        ),
        TP_printk("%ld", __entry->pages)
);

DECLARE_EVENT_CLASS(writeback_class,
        TP_PROTO(struct bdi_writeback *wb),
        TP_ARGS(wb),
        TP_STRUCT__entry(
                __array(char, name, 32)
                __field(ino_t, cgroup_ino)
        ),
        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
        ),
        TP_printk("bdi %s: cgroup_ino=%lu",
                  __entry->name,
                  (unsigned long)__entry->cgroup_ino
        )
);
#define DEFINE_WRITEBACK_EVENT(name) \
DEFINE_EVENT(writeback_class, name, \
        TP_PROTO(struct bdi_writeback *wb), \
        TP_ARGS(wb))

DEFINE_WRITEBACK_EVENT(writeback_wake_background);

TRACE_EVENT(writeback_bdi_register,
        TP_PROTO(struct backing_dev_info *bdi),
        TP_ARGS(bdi),
        TP_STRUCT__entry(
                __array(char, name, 32)
        ),
        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
        ),
        TP_printk("bdi %s",
                __entry->name
        )
);

DECLARE_EVENT_CLASS(wbc_class,
        TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
        TP_ARGS(wbc, bdi),
        TP_STRUCT__entry(
                __array(char, name, 32)
                __field(long, nr_to_write)
                __field(long, pages_skipped)
                __field(int, sync_mode)
                __field(int, for_kupdate)
                __field(int, for_background)
                __field(int, for_reclaim)
                __field(int, range_cyclic)
                __field(long, range_start)
                __field(long, range_end)
                __field(ino_t, cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
                __entry->nr_to_write        = wbc->nr_to_write;
                __entry->pages_skipped        = wbc->pages_skipped;
                __entry->sync_mode        = wbc->sync_mode;
                __entry->for_kupdate        = wbc->for_kupdate;
                __entry->for_background        = wbc->for_background;
                __entry->for_reclaim        = wbc->for_reclaim;
                __entry->range_cyclic        = wbc->range_cyclic;
                __entry->range_start        = (long)wbc->range_start;
                __entry->range_end        = (long)wbc->range_end;
                __entry->cgroup_ino        = __trace_wbc_assign_cgroup(wbc);
        ),

        TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
                "bgrd=%d reclm=%d cyclic=%d "
                "start=0x%lx end=0x%lx cgroup_ino=%lu",
                __entry->name,
                __entry->nr_to_write,
                __entry->pages_skipped,
                __entry->sync_mode,
                __entry->for_kupdate,
                __entry->for_background,
                __entry->for_reclaim,
                __entry->range_cyclic,
                __entry->range_start,
                __entry->range_end,
                (unsigned long)__entry->cgroup_ino
        )
)

#define DEFINE_WBC_EVENT(name) \
DEFINE_EVENT(wbc_class, name, \
        TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
        TP_ARGS(wbc, bdi))
DEFINE_WBC_EVENT(wbc_writepage);

TRACE_EVENT(writeback_queue_io,
        TP_PROTO(struct bdi_writeback *wb,
                 struct wb_writeback_work *work,
                 unsigned long dirtied_before,
                 int moved),
        TP_ARGS(wb, work, dirtied_before, moved),
        TP_STRUCT__entry(
                __array(char,                name, 32)
                __field(unsigned long,        older)
                __field(long,                age)
                __field(int,                moved)
                __field(int,                reason)
                __field(ino_t,                cgroup_ino)
        ),
        TP_fast_assign(
                strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->older        = dirtied_before;
                __entry->age        = (jiffies - dirtied_before) * 1000 / HZ;
                __entry->moved        = moved;
                __entry->reason        = work->reason;
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(wb);
        ),
        TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu",
                __entry->name,
                __entry->older,        /* dirtied_before in jiffies */
                __entry->age,        /* dirtied_before in relative milliseconds */
                __entry->moved,
                __print_symbolic(__entry->reason, WB_WORK_REASON),
                (unsigned long)__entry->cgroup_ino
        )
);

TRACE_EVENT(global_dirty_state,

        TP_PROTO(unsigned long background_thresh,
                 unsigned long dirty_thresh
        ),

        TP_ARGS(background_thresh,
                dirty_thresh
        ),

        TP_STRUCT__entry(
                __field(unsigned long,        nr_dirty)
                __field(unsigned long,        nr_writeback)
                __field(unsigned long,        background_thresh)
                __field(unsigned long,        dirty_thresh)
                __field(unsigned long,        dirty_limit)
                __field(unsigned long,        nr_dirtied)
                __field(unsigned long,        nr_written)
        ),

        TP_fast_assign(
                __entry->nr_dirty        = global_node_page_state(NR_FILE_DIRTY);
                __entry->nr_writeback        = global_node_page_state(NR_WRITEBACK);
                __entry->nr_dirtied        = global_node_page_state(NR_DIRTIED);
                __entry->nr_written        = global_node_page_state(NR_WRITTEN);
                __entry->background_thresh = background_thresh;
                __entry->dirty_thresh        = dirty_thresh;
                __entry->dirty_limit        = global_wb_domain.dirty_limit;
        ),

        TP_printk("dirty=%lu writeback=%lu "
                  "bg_thresh=%lu thresh=%lu limit=%lu "
                  "dirtied=%lu written=%lu",
                  __entry->nr_dirty,
                  __entry->nr_writeback,
                  __entry->background_thresh,
                  __entry->dirty_thresh,
                  __entry->dirty_limit,
                  __entry->nr_dirtied,
                  __entry->nr_written
        )
);

#define KBps(x)                        ((x) << (PAGE_SHIFT - 10))

TRACE_EVENT(bdi_dirty_ratelimit,

        TP_PROTO(struct bdi_writeback *wb,
                 unsigned long dirty_rate,
                 unsigned long task_ratelimit),

        TP_ARGS(wb, dirty_rate, task_ratelimit),

        TP_STRUCT__entry(
                __array(char,                bdi, 32)
                __field(unsigned long,        write_bw)
                __field(unsigned long,        avg_write_bw)
                __field(unsigned long,        dirty_rate)
                __field(unsigned long,        dirty_ratelimit)
                __field(unsigned long,        task_ratelimit)
                __field(unsigned long,        balanced_dirty_ratelimit)
                __field(ino_t,                cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
                __entry->write_bw        = KBps(wb->write_bandwidth);
                __entry->avg_write_bw        = KBps(wb->avg_write_bandwidth);
                __entry->dirty_rate        = KBps(dirty_rate);
                __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit);
                __entry->task_ratelimit        = KBps(task_ratelimit);
                __entry->balanced_dirty_ratelimit =
                                        KBps(wb->balanced_dirty_ratelimit);
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(wb);
        ),

        TP_printk("bdi %s: "
                  "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
                  "dirty_ratelimit=%lu task_ratelimit=%lu "
                  "balanced_dirty_ratelimit=%lu cgroup_ino=%lu",
                  __entry->bdi,
                  __entry->write_bw,                /* write bandwidth */
                  __entry->avg_write_bw,        /* avg write bandwidth */
                  __entry->dirty_rate,                /* bdi dirty rate */
                  __entry->dirty_ratelimit,        /* base ratelimit */
                  __entry->task_ratelimit, /* ratelimit with position control */
                  __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
                  (unsigned long)__entry->cgroup_ino
        )
);

TRACE_EVENT(balance_dirty_pages,

        TP_PROTO(struct bdi_writeback *wb,
                 unsigned long thresh,
                 unsigned long bg_thresh,
                 unsigned long dirty,
                 unsigned long bdi_thresh,
                 unsigned long bdi_dirty,
                 unsigned long dirty_ratelimit,
                 unsigned long task_ratelimit,
                 unsigned long dirtied,
                 unsigned long period,
                 long pause,
                 unsigned long start_time),

        TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
                dirty_ratelimit, task_ratelimit,
                dirtied, period, pause, start_time),

        TP_STRUCT__entry(
                __array(         char,        bdi, 32)
                __field(unsigned long,        limit)
                __field(unsigned long,        setpoint)
                __field(unsigned long,        dirty)
                __field(unsigned long,        bdi_setpoint)
                __field(unsigned long,        bdi_dirty)
                __field(unsigned long,        dirty_ratelimit)
                __field(unsigned long,        task_ratelimit)
                __field(unsigned int,        dirtied)
                __field(unsigned int,        dirtied_pause)
                __field(unsigned long,        paused)
                __field(         long,        pause)
                __field(unsigned long,        period)
                __field(         long,        think)
                __field(ino_t,                cgroup_ino)
        ),

        TP_fast_assign(
                unsigned long freerun = (thresh + bg_thresh) / 2;
                strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);

                __entry->limit                = global_wb_domain.dirty_limit;
                __entry->setpoint        = (global_wb_domain.dirty_limit +
                                                freerun) / 2;
                __entry->dirty                = dirty;
                __entry->bdi_setpoint        = __entry->setpoint *
                                                bdi_thresh / (thresh + 1);
                __entry->bdi_dirty        = bdi_dirty;
                __entry->dirty_ratelimit = KBps(dirty_ratelimit);
                __entry->task_ratelimit        = KBps(task_ratelimit);
                __entry->dirtied        = dirtied;
                __entry->dirtied_pause        = current->nr_dirtied_pause;
                __entry->think                = current->dirty_paused_when == 0 ? 0 :
                         (long)(jiffies - current->dirty_paused_when) * 1000/HZ;
                __entry->period                = period * 1000 / HZ;
                __entry->pause                = pause * 1000 / HZ;
                __entry->paused                = (jiffies - start_time) * 1000 / HZ;
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(wb);
        ),


        TP_printk("bdi %s: "
                  "limit=%lu setpoint=%lu dirty=%lu "
                  "bdi_setpoint=%lu bdi_dirty=%lu "
                  "dirty_ratelimit=%lu task_ratelimit=%lu "
                  "dirtied=%u dirtied_pause=%u "
                  "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%lu",
                  __entry->bdi,
                  __entry->limit,
                  __entry->setpoint,
                  __entry->dirty,
                  __entry->bdi_setpoint,
                  __entry->bdi_dirty,
                  __entry->dirty_ratelimit,
                  __entry->task_ratelimit,
                  __entry->dirtied,
                  __entry->dirtied_pause,
                  __entry->paused,        /* ms */
                  __entry->pause,        /* ms */
                  __entry->period,        /* ms */
                  __entry->think,        /* ms */
                  (unsigned long)__entry->cgroup_ino
          )
);

TRACE_EVENT(writeback_sb_inodes_requeue,

        TP_PROTO(struct inode *inode),
        TP_ARGS(inode),

        TP_STRUCT__entry(
                __array(char, name, 32)
                __field(ino_t, ino)
                __field(unsigned long, state)
                __field(unsigned long, dirtied_when)
                __field(ino_t, cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name,
                            bdi_dev_name(inode_to_bdi(inode)), 32);
                __entry->ino                = inode->i_ino;
                __entry->state                = inode->i_state;
                __entry->dirtied_when        = inode->dirtied_when;
                __entry->cgroup_ino        = __trace_wb_assign_cgroup(inode_to_wb(inode));
        ),

        TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%lu",
                  __entry->name,
                  (unsigned long)__entry->ino,
                  show_inode_state(__entry->state),
                  __entry->dirtied_when,
                  (jiffies - __entry->dirtied_when) / HZ,
                  (unsigned long)__entry->cgroup_ino
        )
);

DECLARE_EVENT_CLASS(writeback_single_inode_template,

        TP_PROTO(struct inode *inode,
                 struct writeback_control *wbc,
                 unsigned long nr_to_write
        ),

        TP_ARGS(inode, wbc, nr_to_write),

        TP_STRUCT__entry(
                __array(char, name, 32)
                __field(ino_t, ino)
                __field(unsigned long, state)
                __field(unsigned long, dirtied_when)
                __field(unsigned long, writeback_index)
                __field(long, nr_to_write)
                __field(unsigned long, wrote)
                __field(ino_t, cgroup_ino)
        ),

        TP_fast_assign(
                strscpy_pad(__entry->name,
                            bdi_dev_name(inode_to_bdi(inode)), 32);
                __entry->ino                = inode->i_ino;
                __entry->state                = inode->i_state;
                __entry->dirtied_when        = inode->dirtied_when;
                __entry->writeback_index = inode->i_mapping->writeback_index;
                __entry->nr_to_write        = nr_to_write;
                __entry->wrote                = nr_to_write - wbc->nr_to_write;
                __entry->cgroup_ino        = __trace_wbc_assign_cgroup(wbc);
        ),

        TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
                  "index=%lu to_write=%ld wrote=%lu cgroup_ino=%lu",
                  __entry->name,
                  (unsigned long)__entry->ino,
                  show_inode_state(__entry->state),
                  __entry->dirtied_when,
                  (jiffies - __entry->dirtied_when) / HZ,
                  __entry->writeback_index,
                  __entry->nr_to_write,
                  __entry->wrote,
                  (unsigned long)__entry->cgroup_ino
        )
);

DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start,
        TP_PROTO(struct inode *inode,
                 struct writeback_control *wbc,
                 unsigned long nr_to_write),
        TP_ARGS(inode, wbc, nr_to_write)
);

DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
        TP_PROTO(struct inode *inode,
                 struct writeback_control *wbc,
                 unsigned long nr_to_write),
        TP_ARGS(inode, wbc, nr_to_write)
);

DECLARE_EVENT_CLASS(writeback_inode_template,
        TP_PROTO(struct inode *inode),

        TP_ARGS(inode),

        TP_STRUCT__entry(
                __field(        dev_t,        dev                        )
                __field(        ino_t,        ino                        )
                __field(unsigned long,        state                        )
                __field(        __u16, mode                        )
                __field(unsigned long, dirtied_when                )
        ),

        TP_fast_assign(
                __entry->dev        = inode->i_sb->s_dev;
                __entry->ino        = inode->i_ino;
                __entry->state        = inode->i_state;
                __entry->mode        = inode->i_mode;
                __entry->dirtied_when = inode->dirtied_when;
        ),

        TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long)__entry->ino, __entry->dirtied_when,
                  show_inode_state(__entry->state), __entry->mode)
);

DEFINE_EVENT(writeback_inode_template, writeback_lazytime,
        TP_PROTO(struct inode *inode),

        TP_ARGS(inode)
);

DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput,
        TP_PROTO(struct inode *inode),

        TP_ARGS(inode)
);

DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue,

        TP_PROTO(struct inode *inode),

        TP_ARGS(inode)
);

/*
 * Inode writeback list tracking.
 */

DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback,
        TP_PROTO(struct inode *inode),
        TP_ARGS(inode)
);

DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback,
        TP_PROTO(struct inode *inode),
        TP_ARGS(inode)
);

#endif /* _TRACE_WRITEBACK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>






































































































































































































































  260 
  260 
    1 
  259 
  258 























































































































































































































































































































































































































  296 
  293 
  294 


































  296 


  296 
  296 
































































































































































































































































   64 





   64 


   65 










































  296 





   65 




  296 


  296 
  296 
   64 















  293 

  263 
  295 



  296 















  295 


  296 


  294 
  293 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/util.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include <linux/slab.h>
#include <linux/rculist.h>

#include "common.h"

/* Lock for protecting policy. */
DEFINE_MUTEX(tomoyo_policy_lock);

/* Has /sbin/init started? */
bool tomoyo_policy_loaded;

/*
 * Mapping table from "enum tomoyo_mac_index" to
 * "enum tomoyo_mac_category_index".
 */
const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = {
        /* CONFIG::file group */
        [TOMOYO_MAC_FILE_EXECUTE]    = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_OPEN]       = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_CREATE]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_UNLINK]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_GETATTR]    = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MKDIR]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_RMDIR]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MKFIFO]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MKSOCK]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_TRUNCATE]   = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_SYMLINK]    = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MKBLOCK]    = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MKCHAR]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_LINK]       = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_RENAME]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_CHMOD]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_CHOWN]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_CHGRP]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_IOCTL]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_CHROOT]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_MOUNT]      = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_UMOUNT]     = TOMOYO_MAC_CATEGORY_FILE,
        [TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE,
        /* CONFIG::network group */
        [TOMOYO_MAC_NETWORK_INET_STREAM_BIND]       =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN]     =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT]    =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_DGRAM_BIND]        =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_DGRAM_SEND]        =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_RAW_BIND]          =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_INET_RAW_SEND]          =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND]       =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN]     =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT]    =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND]        =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND]        =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND]    =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN]  =
        TOMOYO_MAC_CATEGORY_NETWORK,
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT] =
        TOMOYO_MAC_CATEGORY_NETWORK,
        /* CONFIG::misc group */
        [TOMOYO_MAC_ENVIRON]         = TOMOYO_MAC_CATEGORY_MISC,
};

/**
 * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss.
 *
 * @time64: Seconds since 1970/01/01 00:00:00.
 * @stamp:  Pointer to "struct tomoyo_time".
 *
 * Returns nothing.
 */
void tomoyo_convert_time(time64_t time64, struct tomoyo_time *stamp)
{
        struct tm tm;

        time64_to_tm(time64, 0, &tm);
        stamp->sec = tm.tm_sec;
        stamp->min = tm.tm_min;
        stamp->hour = tm.tm_hour;
        stamp->day = tm.tm_mday;
        stamp->month = tm.tm_mon + 1;
        stamp->year = tm.tm_year + 1900;
}

/**
 * tomoyo_permstr - Find permission keywords.
 *
 * @string: String representation for permissions in foo/bar/buz format.
 * @keyword: Keyword to find from @string/
 *
 * Returns true if @keyword was found in @string, false otherwise.
 *
 * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2.
 */
bool tomoyo_permstr(const char *string, const char *keyword)
{
        const char *cp = strstr(string, keyword);

        if (cp)
                return cp == string || *(cp - 1) == '/';
        return false;
}

/**
 * tomoyo_read_token - Read a word from a line.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns a word on success, "" otherwise.
 *
 * To allow the caller to skip NULL check, this function returns "" rather than
 * NULL if there is no more words to read.
 */
char *tomoyo_read_token(struct tomoyo_acl_param *param)
{
        char *pos = param->data;
        char *del = strchr(pos, ' ');

        if (del)
                *del++ = '\0';
        else
                del = pos + strlen(pos);
        param->data = del;
        return pos;
}

static bool tomoyo_correct_path2(const char *filename, const size_t len);

/**
 * tomoyo_get_domainname - Read a domainname from a line.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns a domainname on success, NULL otherwise.
 */
const struct tomoyo_path_info *tomoyo_get_domainname
(struct tomoyo_acl_param *param)
{
        char *start = param->data;
        char *pos = start;

        while (*pos) {
                if (*pos++ != ' ' ||
                    tomoyo_correct_path2(pos, strchrnul(pos, ' ') - pos))
                        continue;
                *(pos - 1) = '\0';
                break;
        }
        param->data = pos;
        if (tomoyo_correct_domain(start))
                return tomoyo_get_name(start);
        return NULL;
}

/**
 * tomoyo_parse_ulong - Parse an "unsigned long" value.
 *
 * @result: Pointer to "unsigned long".
 * @str:    Pointer to string to parse.
 *
 * Returns one of values in "enum tomoyo_value_type".
 *
 * The @src is updated to point the first character after the value
 * on success.
 */
u8 tomoyo_parse_ulong(unsigned long *result, char **str)
{
        const char *cp = *str;
        char *ep;
        int base = 10;

        if (*cp == '0') {
                char c = *(cp + 1);

                if (c == 'x' || c == 'X') {
                        base = 16;
                        cp += 2;
                } else if (c >= '0' && c <= '7') {
                        base = 8;
                        cp++;
                }
        }
        *result = simple_strtoul(cp, &ep, base);
        if (cp == ep)
                return TOMOYO_VALUE_TYPE_INVALID;
        *str = ep;
        switch (base) {
        case 16:
                return TOMOYO_VALUE_TYPE_HEXADECIMAL;
        case 8:
                return TOMOYO_VALUE_TYPE_OCTAL;
        default:
                return TOMOYO_VALUE_TYPE_DECIMAL;
        }
}

/**
 * tomoyo_print_ulong - Print an "unsigned long" value.
 *
 * @buffer:     Pointer to buffer.
 * @buffer_len: Size of @buffer.
 * @value:      An "unsigned long" value.
 * @type:       Type of @value.
 *
 * Returns nothing.
 */
void tomoyo_print_ulong(char *buffer, const int buffer_len,
                        const unsigned long value, const u8 type)
{
        if (type == TOMOYO_VALUE_TYPE_DECIMAL)
                snprintf(buffer, buffer_len, "%lu", value);
        else if (type == TOMOYO_VALUE_TYPE_OCTAL)
                snprintf(buffer, buffer_len, "0%lo", value);
        else if (type == TOMOYO_VALUE_TYPE_HEXADECIMAL)
                snprintf(buffer, buffer_len, "0x%lX", value);
        else
                snprintf(buffer, buffer_len, "type(%u)", type);
}

/**
 * tomoyo_parse_name_union - Parse a tomoyo_name_union.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 * @ptr:   Pointer to "struct tomoyo_name_union".
 *
 * Returns true on success, false otherwise.
 */
bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
                             struct tomoyo_name_union *ptr)
{
        char *filename;

        if (param->data[0] == '@') {
                param->data++;
                ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP);
                return ptr->group != NULL;
        }
        filename = tomoyo_read_token(param);
        if (!tomoyo_correct_word(filename))
                return false;
        ptr->filename = tomoyo_get_name(filename);
        return ptr->filename != NULL;
}

/**
 * tomoyo_parse_number_union - Parse a tomoyo_number_union.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 * @ptr:   Pointer to "struct tomoyo_number_union".
 *
 * Returns true on success, false otherwise.
 */
bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
                               struct tomoyo_number_union *ptr)
{
        char *data;
        u8 type;
        unsigned long v;

        memset(ptr, 0, sizeof(*ptr));
        if (param->data[0] == '@') {
                param->data++;
                ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP);
                return ptr->group != NULL;
        }
        data = tomoyo_read_token(param);
        type = tomoyo_parse_ulong(&v, &data);
        if (type == TOMOYO_VALUE_TYPE_INVALID)
                return false;
        ptr->values[0] = v;
        ptr->value_type[0] = type;
        if (!*data) {
                ptr->values[1] = v;
                ptr->value_type[1] = type;
                return true;
        }
        if (*data++ != '-')
                return false;
        type = tomoyo_parse_ulong(&v, &data);
        if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v)
                return false;
        ptr->values[1] = v;
        ptr->value_type[1] = type;
        return true;
}

/**
 * tomoyo_byte_range - Check whether the string is a \ooo style octal value.
 *
 * @str: Pointer to the string.
 *
 * Returns true if @str is a \ooo style octal value, false otherwise.
 *
 * TOMOYO uses \ooo style representation for 0x01 - 0x20 and 0x7F - 0xFF.
 * This function verifies that \ooo is in valid range.
 */
static inline bool tomoyo_byte_range(const char *str)
{
        return *str >= '0' && *str++ <= '3' &&
                *str >= '0' && *str++ <= '7' &&
                *str >= '0' && *str <= '7';
}

/**
 * tomoyo_alphabet_char - Check whether the character is an alphabet.
 *
 * @c: The character to check.
 *
 * Returns true if @c is an alphabet character, false otherwise.
 */
static inline bool tomoyo_alphabet_char(const char c)
{
        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

/**
 * tomoyo_make_byte - Make byte value from three octal characters.
 *
 * @c1: The first character.
 * @c2: The second character.
 * @c3: The third character.
 *
 * Returns byte value.
 */
static inline u8 tomoyo_make_byte(const u8 c1, const u8 c2, const u8 c3)
{
        return ((c1 - '0') << 6) + ((c2 - '0') << 3) + (c3 - '0');
}

/**
 * tomoyo_valid - Check whether the character is a valid char.
 *
 * @c: The character to check.
 *
 * Returns true if @c is a valid character, false otherwise.
 */
static inline bool tomoyo_valid(const unsigned char c)
{
        return c > ' ' && c < 127;
}

/**
 * tomoyo_invalid - Check whether the character is an invalid char.
 *
 * @c: The character to check.
 *
 * Returns true if @c is an invalid character, false otherwise.
 */
static inline bool tomoyo_invalid(const unsigned char c)
{
        return c && (c <= ' ' || c >= 127);
}

/**
 * tomoyo_str_starts - Check whether the given string starts with the given keyword.
 *
 * @src:  Pointer to pointer to the string.
 * @find: Pointer to the keyword.
 *
 * Returns true if @src starts with @find, false otherwise.
 *
 * The @src is updated to point the first character after the @find
 * if @src starts with @find.
 */
bool tomoyo_str_starts(char **src, const char *find)
{
        const int len = strlen(find);
        char *tmp = *src;

        if (strncmp(tmp, find, len))
                return false;
        tmp += len;
        *src = tmp;
        return true;
}

/**
 * tomoyo_normalize_line - Format string.
 *
 * @buffer: The line to normalize.
 *
 * Leading and trailing whitespaces are removed.
 * Multiple whitespaces are packed into single space.
 *
 * Returns nothing.
 */
void tomoyo_normalize_line(unsigned char *buffer)
{
        unsigned char *sp = buffer;
        unsigned char *dp = buffer;
        bool first = true;

        while (tomoyo_invalid(*sp))
                sp++;
        while (*sp) {
                if (!first)
                        *dp++ = ' ';
                first = false;
                while (tomoyo_valid(*sp))
                        *dp++ = *sp++;
                while (tomoyo_invalid(*sp))
                        sp++;
        }
        *dp = '\0';
}

/**
 * tomoyo_correct_word2 - Validate a string.
 *
 * @string: The string to check. Maybe non-'\0'-terminated.
 * @len:    Length of @string.
 *
 * Check whether the given string follows the naming rules.
 * Returns true if @string follows the naming rules, false otherwise.
 */
static bool tomoyo_correct_word2(const char *string, size_t len)
{
        u8 recursion = 20;
        const char *const start = string;
        bool in_repetition = false;

        if (!len)
                goto out;
        while (len--) {
                unsigned char c = *string++;

                if (c == '\\') {
                        if (!len--)
                                goto out;
                        c = *string++;
                        if (c >= '0' && c <= '3') {
                                unsigned char d;
                                unsigned char e;

                                if (!len-- || !len--)
                                        goto out;
                                d = *string++;
                                e = *string++;
                                if (d < '0' || d > '7' || e < '0' || e > '7')
                                        goto out;
                                c = tomoyo_make_byte(c, d, e);
                                if (c <= ' ' || c >= 127)
                                        continue;
                                goto out;
                        }
                        switch (c) {
                        case '\\':  /* "\\" */
                        case '+':   /* "\+" */
                        case '?':   /* "\?" */
                        case 'x':   /* "\x" */
                        case 'a':   /* "\a" */
                        case '-':   /* "\-" */
                                continue;
                        }
                        if (!recursion--)
                                goto out;
                        switch (c) {
                        case '*':   /* "\*" */
                        case '@':   /* "\@" */
                        case '$':   /* "\$" */
                        case 'X':   /* "\X" */
                        case 'A':   /* "\A" */
                                continue;
                        case '{':   /* "/\{" */
                                if (string - 3 < start || *(string - 3) != '/')
                                        goto out;
                                in_repetition = true;
                                continue;
                        case '}':   /* "\}/" */
                                if (*string != '/')
                                        goto out;
                                if (!in_repetition)
                                        goto out;
                                in_repetition = false;
                                continue;
                        }
                        goto out;
                } else if (in_repetition && c == '/') {
                        goto out;
                } else if (c <= ' ' || c >= 127) {
                        goto out;
                }
        }
        if (in_repetition)
                goto out;
        return true;
 out:
        return false;
}

/**
 * tomoyo_correct_word - Validate a string.
 *
 * @string: The string to check.
 *
 * Check whether the given string follows the naming rules.
 * Returns true if @string follows the naming rules, false otherwise.
 */
bool tomoyo_correct_word(const char *string)
{
        return tomoyo_correct_word2(string, strlen(string));
}

/**
 * tomoyo_correct_path2 - Check whether the given pathname follows the naming rules.
 *
 * @filename: The pathname to check.
 * @len:      Length of @filename.
 *
 * Returns true if @filename follows the naming rules, false otherwise.
 */
static bool tomoyo_correct_path2(const char *filename, const size_t len)
{
        const char *cp1 = memchr(filename, '/', len);
        const char *cp2 = memchr(filename, '.', len);

        return cp1 && (!cp2 || (cp1 < cp2)) && tomoyo_correct_word2(filename, len);
}

/**
 * tomoyo_correct_path - Validate a pathname.
 *
 * @filename: The pathname to check.
 *
 * Check whether the given pathname follows the naming rules.
 * Returns true if @filename follows the naming rules, false otherwise.
 */
bool tomoyo_correct_path(const char *filename)
{
        return tomoyo_correct_path2(filename, strlen(filename));
}

/**
 * tomoyo_correct_domain - Check whether the given domainname follows the naming rules.
 *
 * @domainname: The domainname to check.
 *
 * Returns true if @domainname follows the naming rules, false otherwise.
 */
bool tomoyo_correct_domain(const unsigned char *domainname)
{
        if (!domainname || !tomoyo_domain_def(domainname))
                return false;
        domainname = strchr(domainname, ' ');
        if (!domainname++)
                return true;
        while (1) {
                const unsigned char *cp = strchr(domainname, ' ');

                if (!cp)
                        break;
                if (!tomoyo_correct_path2(domainname, cp - domainname))
                        return false;
                domainname = cp + 1;
        }
        return tomoyo_correct_path(domainname);
}

/**
 * tomoyo_domain_def - Check whether the given token can be a domainname.
 *
 * @buffer: The token to check.
 *
 * Returns true if @buffer possibly be a domainname, false otherwise.
 */
bool tomoyo_domain_def(const unsigned char *buffer)
{
        const unsigned char *cp;
        int len;

        if (*buffer != '<')
                return false;
        cp = strchr(buffer, ' ');
        if (!cp)
                len = strlen(buffer);
        else
                len = cp - buffer;
        if (buffer[len - 1] != '>' ||
            !tomoyo_correct_word2(buffer + 1, len - 2))
                return false;
        return true;
}

/**
 * tomoyo_find_domain - Find a domain by the given name.
 *
 * @domainname: The domainname to find.
 *
 * Returns pointer to "struct tomoyo_domain_info" if found, NULL otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname)
{
        struct tomoyo_domain_info *domain;
        struct tomoyo_path_info name;

        name.name = domainname;
        tomoyo_fill_path_info(&name);
        list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (!domain->is_deleted &&
                    !tomoyo_pathcmp(&name, domain->domainname))
                        return domain;
        }
        return NULL;
}

/**
 * tomoyo_const_part_length - Evaluate the initial length without a pattern in a token.
 *
 * @filename: The string to evaluate.
 *
 * Returns the initial length without a pattern in @filename.
 */
static int tomoyo_const_part_length(const char *filename)
{
        char c;
        int len = 0;

        if (!filename)
                return 0;
        while ((c = *filename++) != '\0') {
                if (c != '\\') {
                        len++;
                        continue;
                }
                c = *filename++;
                switch (c) {
                case '\\':  /* "\\" */
                        len += 2;
                        continue;
                case '0':   /* "\ooo" */
                case '1':
                case '2':
                case '3':
                        c = *filename++;
                        if (c < '0' || c > '7')
                                break;
                        c = *filename++;
                        if (c < '0' || c > '7')
                                break;
                        len += 4;
                        continue;
                }
                break;
        }
        return len;
}

/**
 * tomoyo_fill_path_info - Fill in "struct tomoyo_path_info" members.
 *
 * @ptr: Pointer to "struct tomoyo_path_info" to fill in.
 *
 * The caller sets "struct tomoyo_path_info"->name.
 */
void tomoyo_fill_path_info(struct tomoyo_path_info *ptr)
{
        const char *name = ptr->name;
        const int len = strlen(name);

        ptr->const_len = tomoyo_const_part_length(name);
        ptr->is_dir = len && (name[len - 1] == '/');
        ptr->is_patterned = (ptr->const_len < len);
        ptr->hash = full_name_hash(NULL, name, len);
}

/**
 * tomoyo_file_matches_pattern2 - Pattern matching without '/' character and "\-" pattern.
 *
 * @filename:     The start of string to check.
 * @filename_end: The end of string to check.
 * @pattern:      The start of pattern to compare.
 * @pattern_end:  The end of pattern to compare.
 *
 * Returns true if @filename matches @pattern, false otherwise.
 */
static bool tomoyo_file_matches_pattern2(const char *filename,
                                         const char *filename_end,
                                         const char *pattern,
                                         const char *pattern_end)
{
        while (filename < filename_end && pattern < pattern_end) {
                char c;
                int i;
                int j;

                if (*pattern != '\\') {
                        if (*filename++ != *pattern++)
                                return false;
                        continue;
                }
                c = *filename;
                pattern++;
                switch (*pattern) {
                case '?':
                        if (c == '/') {
                                return false;
                        } else if (c == '\\') {
                                if (filename[1] == '\\')
                                        filename++;
                                else if (tomoyo_byte_range(filename + 1))
                                        filename += 3;
                                else
                                        return false;
                        }
                        break;
                case '\\':
                        if (c != '\\')
                                return false;
                        if (*++filename != '\\')
                                return false;
                        break;
                case '+':
                        if (!isdigit(c))
                                return false;
                        break;
                case 'x':
                        if (!isxdigit(c))
                                return false;
                        break;
                case 'a':
                        if (!tomoyo_alphabet_char(c))
                                return false;
                        break;
                case '0':
                case '1':
                case '2':
                case '3':
                        if (c == '\\' && tomoyo_byte_range(filename + 1)
                            && strncmp(filename + 1, pattern, 3) == 0) {
                                filename += 3;
                                pattern += 2;
                                break;
                        }
                        return false; /* Not matched. */
                case '*':
                case '@':
                        for (i = 0; i <= filename_end - filename; i++) {
                                if (tomoyo_file_matches_pattern2(
                                                    filename + i, filename_end,
                                                    pattern + 1, pattern_end))
                                        return true;
                                c = filename[i];
                                if (c == '.' && *pattern == '@')
                                        break;
                                if (c != '\\')
                                        continue;
                                if (filename[i + 1] == '\\')
                                        i++;
                                else if (tomoyo_byte_range(filename + i + 1))
                                        i += 3;
                                else
                                        break; /* Bad pattern. */
                        }
                        return false; /* Not matched. */
                default:
                        j = 0;
                        c = *pattern;
                        if (c == '$') {
                                while (isdigit(filename[j]))
                                        j++;
                        } else if (c == 'X') {
                                while (isxdigit(filename[j]))
                                        j++;
                        } else if (c == 'A') {
                                while (tomoyo_alphabet_char(filename[j]))
                                        j++;
                        }
                        for (i = 1; i <= j; i++) {
                                if (tomoyo_file_matches_pattern2(
                                                    filename + i, filename_end,
                                                    pattern + 1, pattern_end))
                                        return true;
                        }
                        return false; /* Not matched or bad pattern. */
                }
                filename++;
                pattern++;
        }
        while (*pattern == '\\' &&
               (*(pattern + 1) == '*' || *(pattern + 1) == '@'))
                pattern += 2;
        return filename == filename_end && pattern == pattern_end;
}

/**
 * tomoyo_file_matches_pattern - Pattern matching without '/' character.
 *
 * @filename:     The start of string to check.
 * @filename_end: The end of string to check.
 * @pattern:      The start of pattern to compare.
 * @pattern_end:  The end of pattern to compare.
 *
 * Returns true if @filename matches @pattern, false otherwise.
 */
static bool tomoyo_file_matches_pattern(const char *filename,
                                        const char *filename_end,
                                        const char *pattern,
                                        const char *pattern_end)
{
        const char *pattern_start = pattern;
        bool first = true;
        bool result;

        while (pattern < pattern_end - 1) {
                /* Split at "\-" pattern. */
                if (*pattern++ != '\\' || *pattern++ != '-')
                        continue;
                result = tomoyo_file_matches_pattern2(filename,
                                                      filename_end,
                                                      pattern_start,
                                                      pattern - 2);
                if (first)
                        result = !result;
                if (result)
                        return false;
                first = false;
                pattern_start = pattern;
        }
        result = tomoyo_file_matches_pattern2(filename, filename_end,
                                              pattern_start, pattern_end);
        return first ? result : !result;
}

/**
 * tomoyo_path_matches_pattern2 - Do pathname pattern matching.
 *
 * @f: The start of string to check.
 * @p: The start of pattern to compare.
 *
 * Returns true if @f matches @p, false otherwise.
 */
static bool tomoyo_path_matches_pattern2(const char *f, const char *p)
{
        const char *f_delimiter;
        const char *p_delimiter;

        while (*f && *p) {
                f_delimiter = strchr(f, '/');
                if (!f_delimiter)
                        f_delimiter = f + strlen(f);
                p_delimiter = strchr(p, '/');
                if (!p_delimiter)
                        p_delimiter = p + strlen(p);
                if (*p == '\\' && *(p + 1) == '{')
                        goto recursive;
                if (!tomoyo_file_matches_pattern(f, f_delimiter, p,
                                                 p_delimiter))
                        return false;
                f = f_delimiter;
                if (*f)
                        f++;
                p = p_delimiter;
                if (*p)
                        p++;
        }
        /* Ignore trailing "\*" and "\@" in @pattern. */
        while (*p == '\\' &&
               (*(p + 1) == '*' || *(p + 1) == '@'))
                p += 2;
        return !*f && !*p;
 recursive:
        /*
         * The "\{" pattern is permitted only after '/' character.
         * This guarantees that below "*(p - 1)" is safe.
         * Also, the "\}" pattern is permitted only before '/' character
         * so that "\{" + "\}" pair will not break the "\-" operator.
         */
        if (*(p - 1) != '/' || p_delimiter <= p + 3 || *p_delimiter != '/' ||
            *(p_delimiter - 1) != '}' || *(p_delimiter - 2) != '\\')
                return false; /* Bad pattern. */
        do {
                /* Compare current component with pattern. */
                if (!tomoyo_file_matches_pattern(f, f_delimiter, p + 2,
                                                 p_delimiter - 2))
                        break;
                /* Proceed to next component. */
                f = f_delimiter;
                if (!*f)
                        break;
                f++;
                /* Continue comparison. */
                if (tomoyo_path_matches_pattern2(f, p_delimiter + 1))
                        return true;
                f_delimiter = strchr(f, '/');
        } while (f_delimiter);
        return false; /* Not matched. */
}

/**
 * tomoyo_path_matches_pattern - Check whether the given filename matches the given pattern.
 *
 * @filename: The filename to check.
 * @pattern:  The pattern to compare.
 *
 * Returns true if matches, false otherwise.
 *
 * The following patterns are available.
 *   \\     \ itself.
 *   \ooo   Octal representation of a byte.
 *   \*     Zero or more repetitions of characters other than '/'.
 *   \@     Zero or more repetitions of characters other than '/' or '.'.
 *   \?     1 byte character other than '/'.
 *   \$     One or more repetitions of decimal digits.
 *   \+     1 decimal digit.
 *   \X     One or more repetitions of hexadecimal digits.
 *   \x     1 hexadecimal digit.
 *   \A     One or more repetitions of alphabet characters.
 *   \a     1 alphabet character.
 *
 *   \-     Subtraction operator.
 *
 *   /\{dir\}/   '/' + 'One or more repetitions of dir/' (e.g. /dir/ /dir/dir/
 *               /dir/dir/dir/ ).
 */
bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename,
                                 const struct tomoyo_path_info *pattern)
{
        const char *f = filename->name;
        const char *p = pattern->name;
        const int len = pattern->const_len;

        /* If @pattern doesn't contain pattern, I can use strcmp(). */
        if (!pattern->is_patterned)
                return !tomoyo_pathcmp(filename, pattern);
        /* Don't compare directory and non-directory. */
        if (filename->is_dir != pattern->is_dir)
                return false;
        /* Compare the initial length without patterns. */
        if (strncmp(f, p, len))
                return false;
        f += len;
        p += len;
        return tomoyo_path_matches_pattern2(f, p);
}

/**
 * tomoyo_get_exe - Get tomoyo_realpath() of current process.
 *
 * Returns the tomoyo_realpath() of current process on success, NULL otherwise.
 *
 * This function uses kzalloc(), so the caller must call kfree()
 * if this function didn't return NULL.
 */
const char *tomoyo_get_exe(void)
{
        struct file *exe_file;
        const char *cp;
        struct mm_struct *mm = current->mm;

        if (!mm)
                return NULL;
        exe_file = get_mm_exe_file(mm);
        if (!exe_file)
                return NULL;

        cp = tomoyo_realpath_from_path(&exe_file->f_path);
        fput(exe_file);
        return cp;
}

/**
 * tomoyo_get_mode - Get MAC mode.
 *
 * @ns:      Pointer to "struct tomoyo_policy_namespace".
 * @profile: Profile number.
 * @index:   Index number of functionality.
 *
 * Returns mode.
 */
int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
                    const u8 index)
{
        u8 mode;
        struct tomoyo_profile *p;

        if (!tomoyo_policy_loaded)
                return TOMOYO_CONFIG_DISABLED;
        p = tomoyo_profile(ns, profile);
        mode = p->config[index];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
                mode = p->config[tomoyo_index2category[index]
                                 + TOMOYO_MAX_MAC_INDEX];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
                mode = p->default_config;
        return mode & 3;
}

/**
 * tomoyo_init_request_info - Initialize "struct tomoyo_request_info" members.
 *
 * @r:      Pointer to "struct tomoyo_request_info" to initialize.
 * @domain: Pointer to "struct tomoyo_domain_info". NULL for tomoyo_domain().
 * @index:  Index number of functionality.
 *
 * Returns mode.
 */
int tomoyo_init_request_info(struct tomoyo_request_info *r,
                             struct tomoyo_domain_info *domain, const u8 index)
{
        u8 profile;

        memset(r, 0, sizeof(*r));
        if (!domain)
                domain = tomoyo_domain();
        r->domain = domain;
        profile = domain->profile;
        r->profile = profile;
        r->type = index;
        r->mode = tomoyo_get_mode(domain->ns, profile, index);
        return r->mode;
}

/**
 * tomoyo_domain_quota_is_ok - Check for domain's quota.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns true if the domain is not exceeded quota, false otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
{
        unsigned int count = 0;
        struct tomoyo_domain_info *domain = r->domain;
        struct tomoyo_acl_info *ptr;

        if (r->mode != TOMOYO_CONFIG_LEARNING)
                return false;
        if (!domain)
                return true;
        if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED]))
                return false;
        list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                u16 perm;

                if (ptr->is_deleted)
                        continue;
                /*
                 * Reading perm bitmap might race with tomoyo_merge_*() because
                 * caller does not hold tomoyo_policy_lock mutex. But exceeding
                 * max_learning_entry parameter by a few entries does not harm.
                 */
                switch (ptr->type) {
                case TOMOYO_TYPE_PATH_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm);
                        break;
                case TOMOYO_TYPE_PATH2_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm);
                        break;
                case TOMOYO_TYPE_PATH_NUMBER_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head)
                                  ->perm);
                        break;
                case TOMOYO_TYPE_MKDEV_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
                        break;
                case TOMOYO_TYPE_INET_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm);
                        break;
                case TOMOYO_TYPE_UNIX_ACL:
                        perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm);
                        break;
                case TOMOYO_TYPE_MANUAL_TASK_ACL:
                        perm = 0;
                        break;
                default:
                        perm = 1;
                }
                count += hweight16(perm);
        }
        if (count < tomoyo_profile(domain->ns, domain->profile)->
            pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
                return true;
        WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true);
        /* r->granted = false; */
        tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
#ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
        pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
                domain->domainname->name);
#endif
        return false;
}






































































































































































































































































































































































































































































































































































    3 

    3 




































































































































































































































































































































































































































































































































































    3 









    3 









    3 









    3 









    3 







































































































































































































































































































































































































































































    3 



























































































































    3 




    3 





    3 




























































































































































































    3 


    3 














































































































































































































    3 































































































































































































































































































    3 














































































































































































































    3 
































































































































    3 

    3 




    3 






    3 



    3 


    3 








    3 

    3 


    3 



    3 

    3 






    3 















    3 


    3 
    3 































    3 

    3 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
/*
   BlueZ - Bluetooth protocol stack for Linux
   Copyright (C) 2000-2001 Qualcomm Incorporated
   Copyright (C) 2011 ProFUSION Embedded Systems

   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as
   published by the Free Software Foundation;

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
   SOFTWARE IS DISCLAIMED.
*/

/* Bluetooth HCI core. */

#include <linux/export.h>
#include <linux/rfkill.h>
#include <linux/debugfs.h>
#include <linux/crypto.h>
#include <linux/kcov.h>
#include <linux/property.h>
#include <linux/suspend.h>
#include <linux/wait.h>
#include <asm/unaligned.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/mgmt.h>

#include "hci_request.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "leds.h"
#include "msft.h"
#include "aosp.h"
#include "hci_codec.h"

static void hci_rx_work(struct work_struct *work);
static void hci_cmd_work(struct work_struct *work);
static void hci_tx_work(struct work_struct *work);

/* HCI device list */
LIST_HEAD(hci_dev_list);
DEFINE_RWLOCK(hci_dev_list_lock);

/* HCI callback list */
LIST_HEAD(hci_cb_list);
DEFINE_MUTEX(hci_cb_list_lock);

/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);

static int hci_scan_req(struct hci_request *req, unsigned long opt)
{
        __u8 scan = opt;

        BT_DBG("%s %x", req->hdev->name, scan);

        /* Inquiry and Page scans */
        hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
        return 0;
}

static int hci_auth_req(struct hci_request *req, unsigned long opt)
{
        __u8 auth = opt;

        BT_DBG("%s %x", req->hdev->name, auth);

        /* Authentication */
        hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
        return 0;
}

static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
{
        __u8 encrypt = opt;

        BT_DBG("%s %x", req->hdev->name, encrypt);

        /* Encryption */
        hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
        return 0;
}

static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
{
        __le16 policy = cpu_to_le16(opt);

        BT_DBG("%s %x", req->hdev->name, policy);

        /* Default link policy */
        hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
        return 0;
}

/* Get HCI device by index.
 * Device is held on return. */
struct hci_dev *hci_dev_get(int index)
{
        struct hci_dev *hdev = NULL, *d;

        BT_DBG("%d", index);

        if (index < 0)
                return NULL;

        read_lock(&hci_dev_list_lock);
        list_for_each_entry(d, &hci_dev_list, list) {
                if (d->id == index) {
                        hdev = hci_dev_hold(d);
                        break;
                }
        }
        read_unlock(&hci_dev_list_lock);
        return hdev;
}

/* ---- Inquiry support ---- */

bool hci_discovery_active(struct hci_dev *hdev)
{
        struct discovery_state *discov = &hdev->discovery;

        switch (discov->state) {
        case DISCOVERY_FINDING:
        case DISCOVERY_RESOLVING:
                return true;

        default:
                return false;
        }
}

void hci_discovery_set_state(struct hci_dev *hdev, int state)
{
        int old_state = hdev->discovery.state;

        BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state);

        if (old_state == state)
                return;

        hdev->discovery.state = state;

        switch (state) {
        case DISCOVERY_STOPPED:
                hci_update_passive_scan(hdev);

                if (old_state != DISCOVERY_STARTING)
                        mgmt_discovering(hdev, 0);
                break;
        case DISCOVERY_STARTING:
                break;
        case DISCOVERY_FINDING:
                mgmt_discovering(hdev, 1);
                break;
        case DISCOVERY_RESOLVING:
                break;
        case DISCOVERY_STOPPING:
                break;
        }
}

void hci_inquiry_cache_flush(struct hci_dev *hdev)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_entry *p, *n;

        list_for_each_entry_safe(p, n, &cache->all, all) {
                list_del(&p->all);
                kfree(p);
        }

        INIT_LIST_HEAD(&cache->unknown);
        INIT_LIST_HEAD(&cache->resolve);
}

struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev,
                                               bdaddr_t *bdaddr)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_entry *e;

        BT_DBG("cache %p, %pMR", cache, bdaddr);

        list_for_each_entry(e, &cache->all, all) {
                if (!bacmp(&e->data.bdaddr, bdaddr))
                        return e;
        }

        return NULL;
}

struct inquiry_entry *hci_inquiry_cache_lookup_unknown(struct hci_dev *hdev,
                                                       bdaddr_t *bdaddr)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_entry *e;

        BT_DBG("cache %p, %pMR", cache, bdaddr);

        list_for_each_entry(e, &cache->unknown, list) {
                if (!bacmp(&e->data.bdaddr, bdaddr))
                        return e;
        }

        return NULL;
}

struct inquiry_entry *hci_inquiry_cache_lookup_resolve(struct hci_dev *hdev,
                                                       bdaddr_t *bdaddr,
                                                       int state)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_entry *e;

        BT_DBG("cache %p bdaddr %pMR state %d", cache, bdaddr, state);

        list_for_each_entry(e, &cache->resolve, list) {
                if (!bacmp(bdaddr, BDADDR_ANY) && e->name_state == state)
                        return e;
                if (!bacmp(&e->data.bdaddr, bdaddr))
                        return e;
        }

        return NULL;
}

void hci_inquiry_cache_update_resolve(struct hci_dev *hdev,
                                      struct inquiry_entry *ie)
{
        struct discovery_state *cache = &hdev->discovery;
        struct list_head *pos = &cache->resolve;
        struct inquiry_entry *p;

        list_del(&ie->list);

        list_for_each_entry(p, &cache->resolve, list) {
                if (p->name_state != NAME_PENDING &&
                    abs(p->data.rssi) >= abs(ie->data.rssi))
                        break;
                pos = &p->list;
        }

        list_add(&ie->list, pos);
}

u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
                             bool name_known)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_entry *ie;
        u32 flags = 0;

        BT_DBG("cache %p, %pMR", cache, &data->bdaddr);

        hci_remove_remote_oob_data(hdev, &data->bdaddr, BDADDR_BREDR);

        if (!data->ssp_mode)
                flags |= MGMT_DEV_FOUND_LEGACY_PAIRING;

        ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr);
        if (ie) {
                if (!ie->data.ssp_mode)
                        flags |= MGMT_DEV_FOUND_LEGACY_PAIRING;

                if (ie->name_state == NAME_NEEDED &&
                    data->rssi != ie->data.rssi) {
                        ie->data.rssi = data->rssi;
                        hci_inquiry_cache_update_resolve(hdev, ie);
                }

                goto update;
        }

        /* Entry not in the cache. Add new one. */
        ie = kzalloc(sizeof(*ie), GFP_KERNEL);
        if (!ie) {
                flags |= MGMT_DEV_FOUND_CONFIRM_NAME;
                goto done;
        }

        list_add(&ie->all, &cache->all);

        if (name_known) {
                ie->name_state = NAME_KNOWN;
        } else {
                ie->name_state = NAME_NOT_KNOWN;
                list_add(&ie->list, &cache->unknown);
        }

update:
        if (name_known && ie->name_state != NAME_KNOWN &&
            ie->name_state != NAME_PENDING) {
                ie->name_state = NAME_KNOWN;
                list_del(&ie->list);
        }

        memcpy(&ie->data, data, sizeof(*data));
        ie->timestamp = jiffies;
        cache->timestamp = jiffies;

        if (ie->name_state == NAME_NOT_KNOWN)
                flags |= MGMT_DEV_FOUND_CONFIRM_NAME;

done:
        return flags;
}

static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
{
        struct discovery_state *cache = &hdev->discovery;
        struct inquiry_info *info = (struct inquiry_info *) buf;
        struct inquiry_entry *e;
        int copied = 0;

        list_for_each_entry(e, &cache->all, all) {
                struct inquiry_data *data = &e->data;

                if (copied >= num)
                        break;

                bacpy(&info->bdaddr, &data->bdaddr);
                info->pscan_rep_mode        = data->pscan_rep_mode;
                info->pscan_period_mode        = data->pscan_period_mode;
                info->pscan_mode        = data->pscan_mode;
                memcpy(info->dev_class, data->dev_class, 3);
                info->clock_offset        = data->clock_offset;

                info++;
                copied++;
        }

        BT_DBG("cache %p, copied %d", cache, copied);
        return copied;
}

static int hci_inq_req(struct hci_request *req, unsigned long opt)
{
        struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
        struct hci_dev *hdev = req->hdev;
        struct hci_cp_inquiry cp;

        BT_DBG("%s", hdev->name);

        if (test_bit(HCI_INQUIRY, &hdev->flags))
                return 0;

        /* Start Inquiry */
        memcpy(&cp.lap, &ir->lap, 3);
        cp.length  = ir->length;
        cp.num_rsp = ir->num_rsp;
        hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);

        return 0;
}

int hci_inquiry(void __user *arg)
{
        __u8 __user *ptr = arg;
        struct hci_inquiry_req ir;
        struct hci_dev *hdev;
        int err = 0, do_inquiry = 0, max_rsp;
        long timeo;
        __u8 *buf;

        if (copy_from_user(&ir, ptr, sizeof(ir)))
                return -EFAULT;

        hdev = hci_dev_get(ir.dev_id);
        if (!hdev)
                return -ENODEV;

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                err = -EBUSY;
                goto done;
        }

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        if (hdev->dev_type != HCI_PRIMARY) {
                err = -EOPNOTSUPP;
                goto done;
        }

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        /* Restrict maximum inquiry length to 60 seconds */
        if (ir.length > 60) {
                err = -EINVAL;
                goto done;
        }

        hci_dev_lock(hdev);
        if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
            inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
                hci_inquiry_cache_flush(hdev);
                do_inquiry = 1;
        }
        hci_dev_unlock(hdev);

        timeo = ir.length * msecs_to_jiffies(2000);

        if (do_inquiry) {
                err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
                                   timeo, NULL);
                if (err < 0)
                        goto done;

                /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
                 * cleared). If it is interrupted by a signal, return -EINTR.
                 */
                if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
                                TASK_INTERRUPTIBLE)) {
                        err = -EINTR;
                        goto done;
                }
        }

        /* for unlimited number of responses we will use buffer with
         * 255 entries
         */
        max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp;

        /* cache_dump can't sleep. Therefore we allocate temp buffer and then
         * copy it to the user space.
         */
        buf = kmalloc_array(max_rsp, sizeof(struct inquiry_info), GFP_KERNEL);
        if (!buf) {
                err = -ENOMEM;
                goto done;
        }

        hci_dev_lock(hdev);
        ir.num_rsp = inquiry_cache_dump(hdev, max_rsp, buf);
        hci_dev_unlock(hdev);

        BT_DBG("num_rsp %d", ir.num_rsp);

        if (!copy_to_user(ptr, &ir, sizeof(ir))) {
                ptr += sizeof(ir);
                if (copy_to_user(ptr, buf, sizeof(struct inquiry_info) *
                                 ir.num_rsp))
                        err = -EFAULT;
        } else
                err = -EFAULT;

        kfree(buf);

done:
        hci_dev_put(hdev);
        return err;
}

static int hci_dev_do_open(struct hci_dev *hdev)
{
        int ret = 0;

        BT_DBG("%s %p", hdev->name, hdev);

        hci_req_sync_lock(hdev);

        ret = hci_dev_open_sync(hdev);

        hci_req_sync_unlock(hdev);
        return ret;
}

/* ---- HCI ioctl helpers ---- */

int hci_dev_open(__u16 dev)
{
        struct hci_dev *hdev;
        int err;

        hdev = hci_dev_get(dev);
        if (!hdev)
                return -ENODEV;

        /* Devices that are marked as unconfigured can only be powered
         * up as user channel. Trying to bring them up as normal devices
         * will result into a failure. Only user channel operation is
         * possible.
         *
         * When this function is called for a user channel, the flag
         * HCI_USER_CHANNEL will be set first before attempting to
         * open the device.
         */
        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
            !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        /* We need to ensure that no other power on/off work is pending
         * before proceeding to call hci_dev_do_open. This is
         * particularly important if the setup procedure has not yet
         * completed.
         */
        if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
                cancel_delayed_work(&hdev->power_off);

        /* After this call it is guaranteed that the setup procedure
         * has finished. This means that error conditions like RFKILL
         * or no valid public or static random address apply.
         */
        flush_workqueue(hdev->req_workqueue);

        /* For controllers not using the management interface and that
         * are brought up using legacy ioctl, set the HCI_BONDABLE bit
         * so that pairing works for them. Once the management interface
         * is in use this bit will be cleared again and userspace has
         * to explicitly enable it.
         */
        if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
            !hci_dev_test_flag(hdev, HCI_MGMT))
                hci_dev_set_flag(hdev, HCI_BONDABLE);

        err = hci_dev_do_open(hdev);

done:
        hci_dev_put(hdev);
        return err;
}

int hci_dev_do_close(struct hci_dev *hdev)
{
        int err;

        BT_DBG("%s %p", hdev->name, hdev);

        hci_req_sync_lock(hdev);

        err = hci_dev_close_sync(hdev);

        hci_req_sync_unlock(hdev);

        return err;
}

int hci_dev_close(__u16 dev)
{
        struct hci_dev *hdev;
        int err;

        hdev = hci_dev_get(dev);
        if (!hdev)
                return -ENODEV;

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                err = -EBUSY;
                goto done;
        }

        cancel_work_sync(&hdev->power_on);
        if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
                cancel_delayed_work(&hdev->power_off);

        err = hci_dev_do_close(hdev);

done:
        hci_dev_put(hdev);
        return err;
}

static int hci_dev_do_reset(struct hci_dev *hdev)
{
        int ret;

        BT_DBG("%s %p", hdev->name, hdev);

        hci_req_sync_lock(hdev);

        /* Drop queues */
        skb_queue_purge(&hdev->rx_q);
        skb_queue_purge(&hdev->cmd_q);

        /* Cancel these to avoid queueing non-chained pending work */
        hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE);
        /* Wait for
         *
         *    if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
         *        queue_delayed_work(&hdev->{cmd,ncmd}_timer)
         *
         * inside RCU section to see the flag or complete scheduling.
         */
        synchronize_rcu();
        /* Explicitly cancel works in case scheduled after setting the flag. */
        cancel_delayed_work(&hdev->cmd_timer);
        cancel_delayed_work(&hdev->ncmd_timer);

        /* Avoid potential lockdep warnings from the *_flush() calls by
         * ensuring the workqueue is empty up front.
         */
        drain_workqueue(hdev->workqueue);

        hci_dev_lock(hdev);
        hci_inquiry_cache_flush(hdev);
        hci_conn_hash_flush(hdev);
        hci_dev_unlock(hdev);

        if (hdev->flush)
                hdev->flush(hdev);

        hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE);

        atomic_set(&hdev->cmd_cnt, 1);
        hdev->acl_cnt = 0;
        hdev->sco_cnt = 0;
        hdev->le_cnt = 0;
        hdev->iso_cnt = 0;

        ret = hci_reset_sync(hdev);

        hci_req_sync_unlock(hdev);
        return ret;
}

int hci_dev_reset(__u16 dev)
{
        struct hci_dev *hdev;
        int err;

        hdev = hci_dev_get(dev);
        if (!hdev)
                return -ENODEV;

        if (!test_bit(HCI_UP, &hdev->flags)) {
                err = -ENETDOWN;
                goto done;
        }

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                err = -EBUSY;
                goto done;
        }

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        err = hci_dev_do_reset(hdev);

done:
        hci_dev_put(hdev);
        return err;
}

int hci_dev_reset_stat(__u16 dev)
{
        struct hci_dev *hdev;
        int ret = 0;

        hdev = hci_dev_get(dev);
        if (!hdev)
                return -ENODEV;

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                ret = -EBUSY;
                goto done;
        }

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
                ret = -EOPNOTSUPP;
                goto done;
        }

        memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));

done:
        hci_dev_put(hdev);
        return ret;
}

static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan)
{
        bool conn_changed, discov_changed;

        BT_DBG("%s scan 0x%02x", hdev->name, scan);

        if ((scan & SCAN_PAGE))
                conn_changed = !hci_dev_test_and_set_flag(hdev,
                                                          HCI_CONNECTABLE);
        else
                conn_changed = hci_dev_test_and_clear_flag(hdev,
                                                           HCI_CONNECTABLE);

        if ((scan & SCAN_INQUIRY)) {
                discov_changed = !hci_dev_test_and_set_flag(hdev,
                                                            HCI_DISCOVERABLE);
        } else {
                hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
                discov_changed = hci_dev_test_and_clear_flag(hdev,
                                                             HCI_DISCOVERABLE);
        }

        if (!hci_dev_test_flag(hdev, HCI_MGMT))
                return;

        if (conn_changed || discov_changed) {
                /* In case this was disabled through mgmt */
                hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);

                if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
                        hci_update_adv_data(hdev, hdev->cur_adv_instance);

                mgmt_new_settings(hdev);
        }
}

int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
        struct hci_dev *hdev;
        struct hci_dev_req dr;
        int err = 0;

        if (copy_from_user(&dr, arg, sizeof(dr)))
                return -EFAULT;

        hdev = hci_dev_get(dr.dev_id);
        if (!hdev)
                return -ENODEV;

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                err = -EBUSY;
                goto done;
        }

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        if (hdev->dev_type != HCI_PRIMARY) {
                err = -EOPNOTSUPP;
                goto done;
        }

        if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
                err = -EOPNOTSUPP;
                goto done;
        }

        switch (cmd) {
        case HCISETAUTH:
                err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
                                   HCI_INIT_TIMEOUT, NULL);
                break;

        case HCISETENCRYPT:
                if (!lmp_encrypt_capable(hdev)) {
                        err = -EOPNOTSUPP;
                        break;
                }

                if (!test_bit(HCI_AUTH, &hdev->flags)) {
                        /* Auth must be enabled first */
                        err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
                                           HCI_INIT_TIMEOUT, NULL);
                        if (err)
                                break;
                }

                err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
                                   HCI_INIT_TIMEOUT, NULL);
                break;

        case HCISETSCAN:
                err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
                                   HCI_INIT_TIMEOUT, NULL);

                /* Ensure that the connectable and discoverable states
                 * get correctly modified as this was a non-mgmt change.
                 */
                if (!err)
                        hci_update_passive_scan_state(hdev, dr.dev_opt);
                break;

        case HCISETLINKPOL:
                err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
                                   HCI_INIT_TIMEOUT, NULL);
                break;

        case HCISETLINKMODE:
                hdev->link_mode = ((__u16) dr.dev_opt) &
                                        (HCI_LM_MASTER | HCI_LM_ACCEPT);
                break;

        case HCISETPTYPE:
                if (hdev->pkt_type == (__u16) dr.dev_opt)
                        break;

                hdev->pkt_type = (__u16) dr.dev_opt;
                mgmt_phy_configuration_changed(hdev, NULL);
                break;

        case HCISETACLMTU:
                hdev->acl_mtu  = *((__u16 *) &dr.dev_opt + 1);
                hdev->acl_pkts = *((__u16 *) &dr.dev_opt + 0);
                break;

        case HCISETSCOMTU:
                hdev->sco_mtu  = *((__u16 *) &dr.dev_opt + 1);
                hdev->sco_pkts = *((__u16 *) &dr.dev_opt + 0);
                break;

        default:
                err = -EINVAL;
                break;
        }

done:
        hci_dev_put(hdev);
        return err;
}

int hci_get_dev_list(void __user *arg)
{
        struct hci_dev *hdev;
        struct hci_dev_list_req *dl;
        struct hci_dev_req *dr;
        int n = 0, size, err;
        __u16 dev_num;

        if (get_user(dev_num, (__u16 __user *) arg))
                return -EFAULT;

        if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr))
                return -EINVAL;

        size = sizeof(*dl) + dev_num * sizeof(*dr);

        dl = kzalloc(size, GFP_KERNEL);
        if (!dl)
                return -ENOMEM;

        dr = dl->dev_req;

        read_lock(&hci_dev_list_lock);
        list_for_each_entry(hdev, &hci_dev_list, list) {
                unsigned long flags = hdev->flags;

                /* When the auto-off is configured it means the transport
                 * is running, but in that case still indicate that the
                 * device is actually down.
                 */
                if (hci_dev_test_flag(hdev, HCI_AUTO_OFF))
                        flags &= ~BIT(HCI_UP);

                (dr + n)->dev_id  = hdev->id;
                (dr + n)->dev_opt = flags;

                if (++n >= dev_num)
                        break;
        }
        read_unlock(&hci_dev_list_lock);

        dl->dev_num = n;
        size = sizeof(*dl) + n * sizeof(*dr);

        err = copy_to_user(arg, dl, size);
        kfree(dl);

        return err ? -EFAULT : 0;
}

int hci_get_dev_info(void __user *arg)
{
        struct hci_dev *hdev;
        struct hci_dev_info di;
        unsigned long flags;
        int err = 0;

        if (copy_from_user(&di, arg, sizeof(di)))
                return -EFAULT;

        hdev = hci_dev_get(di.dev_id);
        if (!hdev)
                return -ENODEV;

        /* When the auto-off is configured it means the transport
         * is running, but in that case still indicate that the
         * device is actually down.
         */
        if (hci_dev_test_flag(hdev, HCI_AUTO_OFF))
                flags = hdev->flags & ~BIT(HCI_UP);
        else
                flags = hdev->flags;

        strscpy(di.name, hdev->name, sizeof(di.name));
        di.bdaddr   = hdev->bdaddr;
        di.type     = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
        di.flags    = flags;
        di.pkt_type = hdev->pkt_type;
        if (lmp_bredr_capable(hdev)) {
                di.acl_mtu  = hdev->acl_mtu;
                di.acl_pkts = hdev->acl_pkts;
                di.sco_mtu  = hdev->sco_mtu;
                di.sco_pkts = hdev->sco_pkts;
        } else {
                di.acl_mtu  = hdev->le_mtu;
                di.acl_pkts = hdev->le_pkts;
                di.sco_mtu  = 0;
                di.sco_pkts = 0;
        }
        di.link_policy = hdev->link_policy;
        di.link_mode   = hdev->link_mode;

        memcpy(&di.stat, &hdev->stat, sizeof(di.stat));
        memcpy(&di.features, &hdev->features, sizeof(di.features));

        if (copy_to_user(arg, &di, sizeof(di)))
                err = -EFAULT;

        hci_dev_put(hdev);

        return err;
}

/* ---- Interface to HCI drivers ---- */

static int hci_dev_do_poweroff(struct hci_dev *hdev)
{
        int err;

        BT_DBG("%s %p", hdev->name, hdev);

        hci_req_sync_lock(hdev);

        err = hci_set_powered_sync(hdev, false);

        hci_req_sync_unlock(hdev);

        return err;
}

static int hci_rfkill_set_block(void *data, bool blocked)
{
        struct hci_dev *hdev = data;
        int err;

        BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                return -EBUSY;

        if (blocked == hci_dev_test_flag(hdev, HCI_RFKILLED))
                return 0;

        if (blocked) {
                hci_dev_set_flag(hdev, HCI_RFKILLED);

                if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
                    !hci_dev_test_flag(hdev, HCI_CONFIG)) {
                        err = hci_dev_do_poweroff(hdev);
                        if (err) {
                                bt_dev_err(hdev, "Error when powering off device on rfkill (%d)",
                                           err);

                                /* Make sure the device is still closed even if
                                 * anything during power off sequence (eg.
                                 * disconnecting devices) failed.
                                 */
                                hci_dev_do_close(hdev);
                        }
                }
        } else {
                hci_dev_clear_flag(hdev, HCI_RFKILLED);
        }

        return 0;
}

static const struct rfkill_ops hci_rfkill_ops = {
        .set_block = hci_rfkill_set_block,
};

static void hci_power_on(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, power_on);
        int err;

        BT_DBG("%s", hdev->name);

        if (test_bit(HCI_UP, &hdev->flags) &&
            hci_dev_test_flag(hdev, HCI_MGMT) &&
            hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
                cancel_delayed_work(&hdev->power_off);
                err = hci_powered_update_sync(hdev);
                mgmt_power_on(hdev, err);
                return;
        }

        err = hci_dev_do_open(hdev);
        if (err < 0) {
                hci_dev_lock(hdev);
                mgmt_set_powered_failed(hdev, err);
                hci_dev_unlock(hdev);
                return;
        }

        /* During the HCI setup phase, a few error conditions are
         * ignored and they need to be checked now. If they are still
         * valid, it is important to turn the device back off.
         */
        if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
            hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
            (hdev->dev_type == HCI_PRIMARY &&
             !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
             !bacmp(&hdev->static_addr, BDADDR_ANY))) {
                hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
                hci_dev_do_close(hdev);
        } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) {
                queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
                                   HCI_AUTO_OFF_TIMEOUT);
        }

        if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) {
                /* For unconfigured devices, set the HCI_RAW flag
                 * so that userspace can easily identify them.
                 */
                if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                        set_bit(HCI_RAW, &hdev->flags);

                /* For fully configured devices, this will send
                 * the Index Added event. For unconfigured devices,
                 * it will send Unconfigued Index Added event.
                 *
                 * Devices with HCI_QUIRK_RAW_DEVICE are ignored
                 * and no event will be send.
                 */
                mgmt_index_added(hdev);
        } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) {
                /* When the controller is now configured, then it
                 * is important to clear the HCI_RAW flag.
                 */
                if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                        clear_bit(HCI_RAW, &hdev->flags);

                /* Powering on the controller with HCI_CONFIG set only
                 * happens with the transition from unconfigured to
                 * configured. This will send the Index Added event.
                 */
                mgmt_index_added(hdev);
        }
}

static void hci_power_off(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            power_off.work);

        BT_DBG("%s", hdev->name);

        hci_dev_do_close(hdev);
}

static void hci_error_reset(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);

        hci_dev_hold(hdev);
        BT_DBG("%s", hdev->name);

        if (hdev->hw_error)
                hdev->hw_error(hdev, hdev->hw_error_code);
        else
                bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);

        if (!hci_dev_do_close(hdev))
                hci_dev_do_open(hdev);

        hci_dev_put(hdev);
}

void hci_uuids_clear(struct hci_dev *hdev)
{
        struct bt_uuid *uuid, *tmp;

        list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) {
                list_del(&uuid->list);
                kfree(uuid);
        }
}

void hci_link_keys_clear(struct hci_dev *hdev)
{
        struct link_key *key, *tmp;

        list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
                list_del_rcu(&key->list);
                kfree_rcu(key, rcu);
        }
}

void hci_smp_ltks_clear(struct hci_dev *hdev)
{
        struct smp_ltk *k, *tmp;

        list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
                list_del_rcu(&k->list);
                kfree_rcu(k, rcu);
        }
}

void hci_smp_irks_clear(struct hci_dev *hdev)
{
        struct smp_irk *k, *tmp;

        list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
                list_del_rcu(&k->list);
                kfree_rcu(k, rcu);
        }
}

void hci_blocked_keys_clear(struct hci_dev *hdev)
{
        struct blocked_key *b, *tmp;

        list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
                list_del_rcu(&b->list);
                kfree_rcu(b, rcu);
        }
}

bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16])
{
        bool blocked = false;
        struct blocked_key *b;

        rcu_read_lock();
        list_for_each_entry_rcu(b, &hdev->blocked_keys, list) {
                if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) {
                        blocked = true;
                        break;
                }
        }

        rcu_read_unlock();
        return blocked;
}

struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
{
        struct link_key *k;

        rcu_read_lock();
        list_for_each_entry_rcu(k, &hdev->link_keys, list) {
                if (bacmp(bdaddr, &k->bdaddr) == 0) {
                        rcu_read_unlock();

                        if (hci_is_blocked_key(hdev,
                                               HCI_BLOCKED_KEY_TYPE_LINKKEY,
                                               k->val)) {
                                bt_dev_warn_ratelimited(hdev,
                                                        "Link key blocked for %pMR",
                                                        &k->bdaddr);
                                return NULL;
                        }

                        return k;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn,
                               u8 key_type, u8 old_key_type)
{
        /* Legacy key */
        if (key_type < 0x03)
                return true;

        /* Debug keys are insecure so don't store them persistently */
        if (key_type == HCI_LK_DEBUG_COMBINATION)
                return false;

        /* Changed combination key and there's no previous one */
        if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff)
                return false;

        /* Security mode 3 case */
        if (!conn)
                return true;

        /* BR/EDR key derived using SC from an LE link */
        if (conn->type == LE_LINK)
                return true;

        /* Neither local nor remote side had no-bonding as requirement */
        if (conn->auth_type > 0x01 && conn->remote_auth > 0x01)
                return true;

        /* Local side had dedicated bonding as requirement */
        if (conn->auth_type == 0x02 || conn->auth_type == 0x03)
                return true;

        /* Remote side had dedicated bonding as requirement */
        if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03)
                return true;

        /* If none of the above criteria match, then don't store the key
         * persistently */
        return false;
}

static u8 ltk_role(u8 type)
{
        if (type == SMP_LTK)
                return HCI_ROLE_MASTER;

        return HCI_ROLE_SLAVE;
}

struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                             u8 addr_type, u8 role)
{
        struct smp_ltk *k;

        rcu_read_lock();
        list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
                if (addr_type != k->bdaddr_type || bacmp(bdaddr, &k->bdaddr))
                        continue;

                if (smp_ltk_is_sc(k) || ltk_role(k->type) == role) {
                        rcu_read_unlock();

                        if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK,
                                               k->val)) {
                                bt_dev_warn_ratelimited(hdev,
                                                        "LTK blocked for %pMR",
                                                        &k->bdaddr);
                                return NULL;
                        }

                        return k;
                }
        }
        rcu_read_unlock();

        return NULL;
}

struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa)
{
        struct smp_irk *irk_to_return = NULL;
        struct smp_irk *irk;

        rcu_read_lock();
        list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
                if (!bacmp(&irk->rpa, rpa)) {
                        irk_to_return = irk;
                        goto done;
                }
        }

        list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
                if (smp_irk_matches(hdev, irk->val, rpa)) {
                        bacpy(&irk->rpa, rpa);
                        irk_to_return = irk;
                        goto done;
                }
        }

done:
        if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK,
                                                irk_to_return->val)) {
                bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR",
                                        &irk_to_return->bdaddr);
                irk_to_return = NULL;
        }

        rcu_read_unlock();

        return irk_to_return;
}

struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                     u8 addr_type)
{
        struct smp_irk *irk_to_return = NULL;
        struct smp_irk *irk;

        /* Identity Address must be public or static random */
        if (addr_type == ADDR_LE_DEV_RANDOM && (bdaddr->b[5] & 0xc0) != 0xc0)
                return NULL;

        rcu_read_lock();
        list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
                if (addr_type == irk->addr_type &&
                    bacmp(bdaddr, &irk->bdaddr) == 0) {
                        irk_to_return = irk;
                        goto done;
                }
        }

done:

        if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK,
                                                irk_to_return->val)) {
                bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR",
                                        &irk_to_return->bdaddr);
                irk_to_return = NULL;
        }

        rcu_read_unlock();

        return irk_to_return;
}

struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
                                  bdaddr_t *bdaddr, u8 *val, u8 type,
                                  u8 pin_len, bool *persistent)
{
        struct link_key *key, *old_key;
        u8 old_key_type;

        old_key = hci_find_link_key(hdev, bdaddr);
        if (old_key) {
                old_key_type = old_key->type;
                key = old_key;
        } else {
                old_key_type = conn ? conn->key_type : 0xff;
                key = kzalloc(sizeof(*key), GFP_KERNEL);
                if (!key)
                        return NULL;
                list_add_rcu(&key->list, &hdev->link_keys);
        }

        BT_DBG("%s key for %pMR type %u", hdev->name, bdaddr, type);

        /* Some buggy controller combinations generate a changed
         * combination key for legacy pairing even when there's no
         * previous key */
        if (type == HCI_LK_CHANGED_COMBINATION &&
            (!conn || conn->remote_auth == 0xff) && old_key_type == 0xff) {
                type = HCI_LK_COMBINATION;
                if (conn)
                        conn->key_type = type;
        }

        bacpy(&key->bdaddr, bdaddr);
        memcpy(key->val, val, HCI_LINK_KEY_SIZE);
        key->pin_len = pin_len;

        if (type == HCI_LK_CHANGED_COMBINATION)
                key->type = old_key_type;
        else
                key->type = type;

        if (persistent)
                *persistent = hci_persistent_key(hdev, conn, type,
                                                 old_key_type);

        return key;
}

struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 addr_type, u8 type, u8 authenticated,
                            u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand)
{
        struct smp_ltk *key, *old_key;
        u8 role = ltk_role(type);

        old_key = hci_find_ltk(hdev, bdaddr, addr_type, role);
        if (old_key)
                key = old_key;
        else {
                key = kzalloc(sizeof(*key), GFP_KERNEL);
                if (!key)
                        return NULL;
                list_add_rcu(&key->list, &hdev->long_term_keys);
        }

        bacpy(&key->bdaddr, bdaddr);
        key->bdaddr_type = addr_type;
        memcpy(key->val, tk, sizeof(key->val));
        key->authenticated = authenticated;
        key->ediv = ediv;
        key->rand = rand;
        key->enc_size = enc_size;
        key->type = type;

        return key;
}

struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 addr_type, u8 val[16], bdaddr_t *rpa)
{
        struct smp_irk *irk;

        irk = hci_find_irk_by_addr(hdev, bdaddr, addr_type);
        if (!irk) {
                irk = kzalloc(sizeof(*irk), GFP_KERNEL);
                if (!irk)
                        return NULL;

                bacpy(&irk->bdaddr, bdaddr);
                irk->addr_type = addr_type;

                list_add_rcu(&irk->list, &hdev->identity_resolving_keys);
        }

        memcpy(irk->val, val, 16);
        bacpy(&irk->rpa, rpa);

        return irk;
}

int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
{
        struct link_key *key;

        key = hci_find_link_key(hdev, bdaddr);
        if (!key)
                return -ENOENT;

        BT_DBG("%s removing %pMR", hdev->name, bdaddr);

        list_del_rcu(&key->list);
        kfree_rcu(key, rcu);

        return 0;
}

int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
{
        struct smp_ltk *k, *tmp;
        int removed = 0;

        list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
                if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
                        continue;

                BT_DBG("%s removing %pMR", hdev->name, bdaddr);

                list_del_rcu(&k->list);
                kfree_rcu(k, rcu);
                removed++;
        }

        return removed ? 0 : -ENOENT;
}

void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
{
        struct smp_irk *k, *tmp;

        list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
                if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
                        continue;

                BT_DBG("%s removing %pMR", hdev->name, bdaddr);

                list_del_rcu(&k->list);
                kfree_rcu(k, rcu);
        }
}

bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
{
        struct smp_ltk *k;
        struct smp_irk *irk;
        u8 addr_type;

        if (type == BDADDR_BREDR) {
                if (hci_find_link_key(hdev, bdaddr))
                        return true;
                return false;
        }

        /* Convert to HCI addr type which struct smp_ltk uses */
        if (type == BDADDR_LE_PUBLIC)
                addr_type = ADDR_LE_DEV_PUBLIC;
        else
                addr_type = ADDR_LE_DEV_RANDOM;

        irk = hci_get_irk(hdev, bdaddr, addr_type);
        if (irk) {
                bdaddr = &irk->bdaddr;
                addr_type = irk->addr_type;
        }

        rcu_read_lock();
        list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
                if (k->bdaddr_type == addr_type && !bacmp(bdaddr, &k->bdaddr)) {
                        rcu_read_unlock();
                        return true;
                }
        }
        rcu_read_unlock();

        return false;
}

/* HCI command timer function */
static void hci_cmd_timeout(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            cmd_timer.work);

        if (hdev->req_skb) {
                u16 opcode = hci_skb_opcode(hdev->req_skb);

                bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);

                hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT);
        } else {
                bt_dev_err(hdev, "command tx timeout");
        }

        if (hdev->cmd_timeout)
                hdev->cmd_timeout(hdev);

        atomic_set(&hdev->cmd_cnt, 1);
        queue_work(hdev->workqueue, &hdev->cmd_work);
}

/* HCI ncmd timer function */
static void hci_ncmd_timeout(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            ncmd_timer.work);

        bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");

        /* During HCI_INIT phase no events can be injected if the ncmd timer
         * triggers since the procedure has its own timeout handling.
         */
        if (test_bit(HCI_INIT, &hdev->flags))
                return;

        /* This is an irrecoverable state, inject hardware error event */
        hci_reset_dev(hdev);
}

struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
                                          bdaddr_t *bdaddr, u8 bdaddr_type)
{
        struct oob_data *data;

        list_for_each_entry(data, &hdev->remote_oob_data, list) {
                if (bacmp(bdaddr, &data->bdaddr) != 0)
                        continue;
                if (data->bdaddr_type != bdaddr_type)
                        continue;
                return data;
        }

        return NULL;
}

int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
                               u8 bdaddr_type)
{
        struct oob_data *data;

        data = hci_find_remote_oob_data(hdev, bdaddr, bdaddr_type);
        if (!data)
                return -ENOENT;

        BT_DBG("%s removing %pMR (%u)", hdev->name, bdaddr, bdaddr_type);

        list_del(&data->list);
        kfree(data);

        return 0;
}

void hci_remote_oob_data_clear(struct hci_dev *hdev)
{
        struct oob_data *data, *n;

        list_for_each_entry_safe(data, n, &hdev->remote_oob_data, list) {
                list_del(&data->list);
                kfree(data);
        }
}

int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 bdaddr_type, u8 *hash192, u8 *rand192,
                            u8 *hash256, u8 *rand256)
{
        struct oob_data *data;

        data = hci_find_remote_oob_data(hdev, bdaddr, bdaddr_type);
        if (!data) {
                data = kmalloc(sizeof(*data), GFP_KERNEL);
                if (!data)
                        return -ENOMEM;

                bacpy(&data->bdaddr, bdaddr);
                data->bdaddr_type = bdaddr_type;
                list_add(&data->list, &hdev->remote_oob_data);
        }

        if (hash192 && rand192) {
                memcpy(data->hash192, hash192, sizeof(data->hash192));
                memcpy(data->rand192, rand192, sizeof(data->rand192));
                if (hash256 && rand256)
                        data->present = 0x03;
        } else {
                memset(data->hash192, 0, sizeof(data->hash192));
                memset(data->rand192, 0, sizeof(data->rand192));
                if (hash256 && rand256)
                        data->present = 0x02;
                else
                        data->present = 0x00;
        }

        if (hash256 && rand256) {
                memcpy(data->hash256, hash256, sizeof(data->hash256));
                memcpy(data->rand256, rand256, sizeof(data->rand256));
        } else {
                memset(data->hash256, 0, sizeof(data->hash256));
                memset(data->rand256, 0, sizeof(data->rand256));
                if (hash192 && rand192)
                        data->present = 0x01;
        }

        BT_DBG("%s for %pMR", hdev->name, bdaddr);

        return 0;
}

/* This function requires the caller holds hdev->lock */
struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
{
        struct adv_info *adv_instance;

        list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
                if (adv_instance->instance == instance)
                        return adv_instance;
        }

        return NULL;
}

/* This function requires the caller holds hdev->lock */
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance)
{
        struct adv_info *cur_instance;

        cur_instance = hci_find_adv_instance(hdev, instance);
        if (!cur_instance)
                return NULL;

        if (cur_instance == list_last_entry(&hdev->adv_instances,
                                            struct adv_info, list))
                return list_first_entry(&hdev->adv_instances,
                                                 struct adv_info, list);
        else
                return list_next_entry(cur_instance, list);
}

/* This function requires the caller holds hdev->lock */
int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
{
        struct adv_info *adv_instance;

        adv_instance = hci_find_adv_instance(hdev, instance);
        if (!adv_instance)
                return -ENOENT;

        BT_DBG("%s removing %dMR", hdev->name, instance);

        if (hdev->cur_adv_instance == instance) {
                if (hdev->adv_instance_timeout) {
                        cancel_delayed_work(&hdev->adv_instance_expire);
                        hdev->adv_instance_timeout = 0;
                }
                hdev->cur_adv_instance = 0x00;
        }

        cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);

        list_del(&adv_instance->list);
        kfree(adv_instance);

        hdev->adv_instance_cnt--;

        return 0;
}

void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired)
{
        struct adv_info *adv_instance, *n;

        list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list)
                adv_instance->rpa_expired = rpa_expired;
}

/* This function requires the caller holds hdev->lock */
void hci_adv_instances_clear(struct hci_dev *hdev)
{
        struct adv_info *adv_instance, *n;

        if (hdev->adv_instance_timeout) {
                cancel_delayed_work(&hdev->adv_instance_expire);
                hdev->adv_instance_timeout = 0;
        }

        list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
                cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
                list_del(&adv_instance->list);
                kfree(adv_instance);
        }

        hdev->adv_instance_cnt = 0;
        hdev->cur_adv_instance = 0x00;
}

static void adv_instance_rpa_expired(struct work_struct *work)
{
        struct adv_info *adv_instance = container_of(work, struct adv_info,
                                                     rpa_expired_cb.work);

        BT_DBG("");

        adv_instance->rpa_expired = true;
}

/* This function requires the caller holds hdev->lock */
struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
                                      u32 flags, u16 adv_data_len, u8 *adv_data,
                                      u16 scan_rsp_len, u8 *scan_rsp_data,
                                      u16 timeout, u16 duration, s8 tx_power,
                                      u32 min_interval, u32 max_interval,
                                      u8 mesh_handle)
{
        struct adv_info *adv;

        adv = hci_find_adv_instance(hdev, instance);
        if (adv) {
                memset(adv->adv_data, 0, sizeof(adv->adv_data));
                memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data));
                memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data));
        } else {
                if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets ||
                    instance < 1 || instance > hdev->le_num_of_adv_sets + 1)
                        return ERR_PTR(-EOVERFLOW);

                adv = kzalloc(sizeof(*adv), GFP_KERNEL);
                if (!adv)
                        return ERR_PTR(-ENOMEM);

                adv->pending = true;
                adv->instance = instance;
                list_add(&adv->list, &hdev->adv_instances);
                hdev->adv_instance_cnt++;
        }

        adv->flags = flags;
        adv->min_interval = min_interval;
        adv->max_interval = max_interval;
        adv->tx_power = tx_power;
        /* Defining a mesh_handle changes the timing units to ms,
         * rather than seconds, and ties the instance to the requested
         * mesh_tx queue.
         */
        adv->mesh = mesh_handle;

        hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data,
                                  scan_rsp_len, scan_rsp_data);

        adv->timeout = timeout;
        adv->remaining_time = timeout;

        if (duration == 0)
                adv->duration = hdev->def_multi_adv_rotation_duration;
        else
                adv->duration = duration;

        INIT_DELAYED_WORK(&adv->rpa_expired_cb, adv_instance_rpa_expired);

        BT_DBG("%s for %dMR", hdev->name, instance);

        return adv;
}

/* This function requires the caller holds hdev->lock */
struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
                                      u32 flags, u8 data_len, u8 *data,
                                      u32 min_interval, u32 max_interval)
{
        struct adv_info *adv;

        adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL,
                                   0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE,
                                   min_interval, max_interval, 0);
        if (IS_ERR(adv))
                return adv;

        adv->periodic = true;
        adv->per_adv_data_len = data_len;

        if (data)
                memcpy(adv->per_adv_data, data, data_len);

        return adv;
}

/* This function requires the caller holds hdev->lock */
int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
                              u16 adv_data_len, u8 *adv_data,
                              u16 scan_rsp_len, u8 *scan_rsp_data)
{
        struct adv_info *adv;

        adv = hci_find_adv_instance(hdev, instance);

        /* If advertisement doesn't exist, we can't modify its data */
        if (!adv)
                return -ENOENT;

        if (adv_data_len && ADV_DATA_CMP(adv, adv_data, adv_data_len)) {
                memset(adv->adv_data, 0, sizeof(adv->adv_data));
                memcpy(adv->adv_data, adv_data, adv_data_len);
                adv->adv_data_len = adv_data_len;
                adv->adv_data_changed = true;
        }

        if (scan_rsp_len && SCAN_RSP_CMP(adv, scan_rsp_data, scan_rsp_len)) {
                memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data));
                memcpy(adv->scan_rsp_data, scan_rsp_data, scan_rsp_len);
                adv->scan_rsp_len = scan_rsp_len;
                adv->scan_rsp_changed = true;
        }

        /* Mark as changed if there are flags which would affect it */
        if (((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) ||
            adv->flags & MGMT_ADV_FLAG_LOCAL_NAME)
                adv->scan_rsp_changed = true;

        return 0;
}

/* This function requires the caller holds hdev->lock */
u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance)
{
        u32 flags;
        struct adv_info *adv;

        if (instance == 0x00) {
                /* Instance 0 always manages the "Tx Power" and "Flags"
                 * fields
                 */
                flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS;

                /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting
                 * corresponds to the "connectable" instance flag.
                 */
                if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
                        flags |= MGMT_ADV_FLAG_CONNECTABLE;

                if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
                        flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
                else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
                        flags |= MGMT_ADV_FLAG_DISCOV;

                return flags;
        }

        adv = hci_find_adv_instance(hdev, instance);

        /* Return 0 when we got an invalid instance identifier. */
        if (!adv)
                return 0;

        return adv->flags;
}

bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance)
{
        struct adv_info *adv;

        /* Instance 0x00 always set local name */
        if (instance == 0x00)
                return true;

        adv = hci_find_adv_instance(hdev, instance);
        if (!adv)
                return false;

        if (adv->flags & MGMT_ADV_FLAG_APPEARANCE ||
            adv->flags & MGMT_ADV_FLAG_LOCAL_NAME)
                return true;

        return adv->scan_rsp_len ? true : false;
}

/* This function requires the caller holds hdev->lock */
void hci_adv_monitors_clear(struct hci_dev *hdev)
{
        struct adv_monitor *monitor;
        int handle;

        idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle)
                hci_free_adv_monitor(hdev, monitor);

        idr_destroy(&hdev->adv_monitors_idr);
}

/* Frees the monitor structure and do some bookkeepings.
 * This function requires the caller holds hdev->lock.
 */
void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
{
        struct adv_pattern *pattern;
        struct adv_pattern *tmp;

        if (!monitor)
                return;

        list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) {
                list_del(&pattern->list);
                kfree(pattern);
        }

        if (monitor->handle)
                idr_remove(&hdev->adv_monitors_idr, monitor->handle);

        if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
                hdev->adv_monitors_cnt--;
                mgmt_adv_monitor_removed(hdev, monitor->handle);
        }

        kfree(monitor);
}

/* Assigns handle to a monitor, and if offloading is supported and power is on,
 * also attempts to forward the request to the controller.
 * This function requires the caller holds hci_req_sync_lock.
 */
int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
{
        int min, max, handle;
        int status = 0;

        if (!monitor)
                return -EINVAL;

        hci_dev_lock(hdev);

        min = HCI_MIN_ADV_MONITOR_HANDLE;
        max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES;
        handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max,
                           GFP_KERNEL);

        hci_dev_unlock(hdev);

        if (handle < 0)
                return handle;

        monitor->handle = handle;

        if (!hdev_is_powered(hdev))
                return status;

        switch (hci_get_adv_monitor_offload_ext(hdev)) {
        case HCI_ADV_MONITOR_EXT_NONE:
                bt_dev_dbg(hdev, "add monitor %d status %d",
                           monitor->handle, status);
                /* Message was not forwarded to controller - not an error */
                break;

        case HCI_ADV_MONITOR_EXT_MSFT:
                status = msft_add_monitor_pattern(hdev, monitor);
                bt_dev_dbg(hdev, "add monitor %d msft status %d",
                           handle, status);
                break;
        }

        return status;
}

/* Attempts to tell the controller and free the monitor. If somehow the
 * controller doesn't have a corresponding handle, remove anyway.
 * This function requires the caller holds hci_req_sync_lock.
 */
static int hci_remove_adv_monitor(struct hci_dev *hdev,
                                  struct adv_monitor *monitor)
{
        int status = 0;
        int handle;

        switch (hci_get_adv_monitor_offload_ext(hdev)) {
        case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
                bt_dev_dbg(hdev, "remove monitor %d status %d",
                           monitor->handle, status);
                goto free_monitor;

        case HCI_ADV_MONITOR_EXT_MSFT:
                handle = monitor->handle;
                status = msft_remove_monitor(hdev, monitor);
                bt_dev_dbg(hdev, "remove monitor %d msft status %d",
                           handle, status);
                break;
        }

        /* In case no matching handle registered, just free the monitor */
        if (status == -ENOENT)
                goto free_monitor;

        return status;

free_monitor:
        if (status == -ENOENT)
                bt_dev_warn(hdev, "Removing monitor with no matching handle %d",
                            monitor->handle);
        hci_free_adv_monitor(hdev, monitor);

        return status;
}

/* This function requires the caller holds hci_req_sync_lock */
int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle)
{
        struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle);

        if (!monitor)
                return -EINVAL;

        return hci_remove_adv_monitor(hdev, monitor);
}

/* This function requires the caller holds hci_req_sync_lock */
int hci_remove_all_adv_monitor(struct hci_dev *hdev)
{
        struct adv_monitor *monitor;
        int idr_next_id = 0;
        int status = 0;

        while (1) {
                monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id);
                if (!monitor)
                        break;

                status = hci_remove_adv_monitor(hdev, monitor);
                if (status)
                        return status;

                idr_next_id++;
        }

        return status;
}

/* This function requires the caller holds hdev->lock */
bool hci_is_adv_monitoring(struct hci_dev *hdev)
{
        return !idr_is_empty(&hdev->adv_monitors_idr);
}

int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev)
{
        if (msft_monitor_supported(hdev))
                return HCI_ADV_MONITOR_EXT_MSFT;

        return HCI_ADV_MONITOR_EXT_NONE;
}

struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
                                         bdaddr_t *bdaddr, u8 type)
{
        struct bdaddr_list *b;

        list_for_each_entry(b, bdaddr_list, list) {
                if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
                        return b;
        }

        return NULL;
}

struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
                                struct list_head *bdaddr_list, bdaddr_t *bdaddr,
                                u8 type)
{
        struct bdaddr_list_with_irk *b;

        list_for_each_entry(b, bdaddr_list, list) {
                if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
                        return b;
        }

        return NULL;
}

struct bdaddr_list_with_flags *
hci_bdaddr_list_lookup_with_flags(struct list_head *bdaddr_list,
                                  bdaddr_t *bdaddr, u8 type)
{
        struct bdaddr_list_with_flags *b;

        list_for_each_entry(b, bdaddr_list, list) {
                if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
                        return b;
        }

        return NULL;
}

void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
{
        struct bdaddr_list *b, *n;

        list_for_each_entry_safe(b, n, bdaddr_list, list) {
                list_del(&b->list);
                kfree(b);
        }
}

int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
        struct bdaddr_list *entry;

        if (!bacmp(bdaddr, BDADDR_ANY))
                return -EBADF;

        if (hci_bdaddr_list_lookup(list, bdaddr, type))
                return -EEXIST;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return -ENOMEM;

        bacpy(&entry->bdaddr, bdaddr);
        entry->bdaddr_type = type;

        list_add(&entry->list, list);

        return 0;
}

int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
                                        u8 type, u8 *peer_irk, u8 *local_irk)
{
        struct bdaddr_list_with_irk *entry;

        if (!bacmp(bdaddr, BDADDR_ANY))
                return -EBADF;

        if (hci_bdaddr_list_lookup(list, bdaddr, type))
                return -EEXIST;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return -ENOMEM;

        bacpy(&entry->bdaddr, bdaddr);
        entry->bdaddr_type = type;

        if (peer_irk)
                memcpy(entry->peer_irk, peer_irk, 16);

        if (local_irk)
                memcpy(entry->local_irk, local_irk, 16);

        list_add(&entry->list, list);

        return 0;
}

int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
                                   u8 type, u32 flags)
{
        struct bdaddr_list_with_flags *entry;

        if (!bacmp(bdaddr, BDADDR_ANY))
                return -EBADF;

        if (hci_bdaddr_list_lookup(list, bdaddr, type))
                return -EEXIST;

        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return -ENOMEM;

        bacpy(&entry->bdaddr, bdaddr);
        entry->bdaddr_type = type;
        entry->flags = flags;

        list_add(&entry->list, list);

        return 0;
}

int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
        struct bdaddr_list *entry;

        if (!bacmp(bdaddr, BDADDR_ANY)) {
                hci_bdaddr_list_clear(list);
                return 0;
        }

        entry = hci_bdaddr_list_lookup(list, bdaddr, type);
        if (!entry)
                return -ENOENT;

        list_del(&entry->list);
        kfree(entry);

        return 0;
}

int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
                                                        u8 type)
{
        struct bdaddr_list_with_irk *entry;

        if (!bacmp(bdaddr, BDADDR_ANY)) {
                hci_bdaddr_list_clear(list);
                return 0;
        }

        entry = hci_bdaddr_list_lookup_with_irk(list, bdaddr, type);
        if (!entry)
                return -ENOENT;

        list_del(&entry->list);
        kfree(entry);

        return 0;
}

int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
                                   u8 type)
{
        struct bdaddr_list_with_flags *entry;

        if (!bacmp(bdaddr, BDADDR_ANY)) {
                hci_bdaddr_list_clear(list);
                return 0;
        }

        entry = hci_bdaddr_list_lookup_with_flags(list, bdaddr, type);
        if (!entry)
                return -ENOENT;

        list_del(&entry->list);
        kfree(entry);

        return 0;
}

/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
                                               bdaddr_t *addr, u8 addr_type)
{
        struct hci_conn_params *params;

        list_for_each_entry(params, &hdev->le_conn_params, list) {
                if (bacmp(&params->addr, addr) == 0 &&
                    params->addr_type == addr_type) {
                        return params;
                }
        }

        return NULL;
}

/* This function requires the caller holds hdev->lock or rcu_read_lock */
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
                                                  bdaddr_t *addr, u8 addr_type)
{
        struct hci_conn_params *param;

        rcu_read_lock();

        list_for_each_entry_rcu(param, list, action) {
                if (bacmp(&param->addr, addr) == 0 &&
                    param->addr_type == addr_type) {
                        rcu_read_unlock();
                        return param;
                }
        }

        rcu_read_unlock();

        return NULL;
}

/* This function requires the caller holds hdev->lock */
void hci_pend_le_list_del_init(struct hci_conn_params *param)
{
        if (list_empty(&param->action))
                return;

        list_del_rcu(&param->action);
        synchronize_rcu();
        INIT_LIST_HEAD(&param->action);
}

/* This function requires the caller holds hdev->lock */
void hci_pend_le_list_add(struct hci_conn_params *param,
                          struct list_head *list)
{
        list_add_rcu(&param->action, list);
}

/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
                                            bdaddr_t *addr, u8 addr_type)
{
        struct hci_conn_params *params;

        params = hci_conn_params_lookup(hdev, addr, addr_type);
        if (params)
                return params;

        params = kzalloc(sizeof(*params), GFP_KERNEL);
        if (!params) {
                bt_dev_err(hdev, "out of memory");
                return NULL;
        }

        bacpy(&params->addr, addr);
        params->addr_type = addr_type;

        list_add(&params->list, &hdev->le_conn_params);
        INIT_LIST_HEAD(&params->action);

        params->conn_min_interval = hdev->le_conn_min_interval;
        params->conn_max_interval = hdev->le_conn_max_interval;
        params->conn_latency = hdev->le_conn_latency;
        params->supervision_timeout = hdev->le_supv_timeout;
        params->auto_connect = HCI_AUTO_CONN_DISABLED;

        BT_DBG("addr %pMR (type %u)", addr, addr_type);

        return params;
}

void hci_conn_params_free(struct hci_conn_params *params)
{
        hci_pend_le_list_del_init(params);

        if (params->conn) {
                hci_conn_drop(params->conn);
                hci_conn_put(params->conn);
        }

        list_del(&params->list);
        kfree(params);
}

/* This function requires the caller holds hdev->lock */
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type)
{
        struct hci_conn_params *params;

        params = hci_conn_params_lookup(hdev, addr, addr_type);
        if (!params)
                return;

        hci_conn_params_free(params);

        hci_update_passive_scan(hdev);

        BT_DBG("addr %pMR (type %u)", addr, addr_type);
}

/* This function requires the caller holds hdev->lock */
void hci_conn_params_clear_disabled(struct hci_dev *hdev)
{
        struct hci_conn_params *params, *tmp;

        list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) {
                if (params->auto_connect != HCI_AUTO_CONN_DISABLED)
                        continue;

                /* If trying to establish one time connection to disabled
                 * device, leave the params, but mark them as just once.
                 */
                if (params->explicit_connect) {
                        params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
                        continue;
                }

                hci_conn_params_free(params);
        }

        BT_DBG("All LE disabled connection parameters were removed");
}

/* This function requires the caller holds hdev->lock */
static void hci_conn_params_clear_all(struct hci_dev *hdev)
{
        struct hci_conn_params *params, *tmp;

        list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list)
                hci_conn_params_free(params);

        BT_DBG("All LE connection parameters were removed");
}

/* Copy the Identity Address of the controller.
 *
 * If the controller has a public BD_ADDR, then by default use that one.
 * If this is a LE only controller without a public address, default to
 * the static random address.
 *
 * For debugging purposes it is possible to force controllers with a
 * public address to use the static random address instead.
 *
 * In case BR/EDR has been disabled on a dual-mode controller and
 * userspace has configured a static address, then that address
 * becomes the identity address instead of the public BR/EDR address.
 */
void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
                               u8 *bdaddr_type)
{
        if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
            !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
            (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
             bacmp(&hdev->static_addr, BDADDR_ANY))) {
                bacpy(bdaddr, &hdev->static_addr);
                *bdaddr_type = ADDR_LE_DEV_RANDOM;
        } else {
                bacpy(bdaddr, &hdev->bdaddr);
                *bdaddr_type = ADDR_LE_DEV_PUBLIC;
        }
}

static void hci_clear_wake_reason(struct hci_dev *hdev)
{
        hci_dev_lock(hdev);

        hdev->wake_reason = 0;
        bacpy(&hdev->wake_addr, BDADDR_ANY);
        hdev->wake_addr_type = 0;

        hci_dev_unlock(hdev);
}

static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
                                void *data)
{
        struct hci_dev *hdev =
                container_of(nb, struct hci_dev, suspend_notifier);
        int ret = 0;

        /* Userspace has full control of this device. Do nothing. */
        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                return NOTIFY_DONE;

        /* To avoid a potential race with hci_unregister_dev. */
        hci_dev_hold(hdev);

        if (action == PM_SUSPEND_PREPARE)
                ret = hci_suspend_dev(hdev);
        else if (action == PM_POST_SUSPEND)
                ret = hci_resume_dev(hdev);

        if (ret)
                bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
                           action, ret);

        hci_dev_put(hdev);
        return NOTIFY_DONE;
}

/* Alloc HCI device */
struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
{
        struct hci_dev *hdev;
        unsigned int alloc_size;

        alloc_size = sizeof(*hdev);
        if (sizeof_priv) {
                /* Fixme: May need ALIGN-ment? */
                alloc_size += sizeof_priv;
        }

        hdev = kzalloc(alloc_size, GFP_KERNEL);
        if (!hdev)
                return NULL;

        hdev->pkt_type  = (HCI_DM1 | HCI_DH1 | HCI_HV1);
        hdev->esco_type = (ESCO_HV1);
        hdev->link_mode = (HCI_LM_ACCEPT);
        hdev->num_iac = 0x01;                /* One IAC support is mandatory */
        hdev->io_capability = 0x03;        /* No Input No Output */
        hdev->manufacturer = 0xffff;        /* Default to internal use */
        hdev->inq_tx_power = HCI_TX_POWER_INVALID;
        hdev->adv_tx_power = HCI_TX_POWER_INVALID;
        hdev->adv_instance_cnt = 0;
        hdev->cur_adv_instance = 0x00;
        hdev->adv_instance_timeout = 0;

        hdev->advmon_allowlist_duration = 300;
        hdev->advmon_no_filter_duration = 500;
        hdev->enable_advmon_interleave_scan = 0x00;        /* Default to disable */

        hdev->sniff_max_interval = 800;
        hdev->sniff_min_interval = 80;

        hdev->le_adv_channel_map = 0x07;
        hdev->le_adv_min_interval = 0x0800;
        hdev->le_adv_max_interval = 0x0800;
        hdev->le_scan_interval = 0x0060;
        hdev->le_scan_window = 0x0030;
        hdev->le_scan_int_suspend = 0x0400;
        hdev->le_scan_window_suspend = 0x0012;
        hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
        hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
        hdev->le_scan_int_adv_monitor = 0x0060;
        hdev->le_scan_window_adv_monitor = 0x0030;
        hdev->le_scan_int_connect = 0x0060;
        hdev->le_scan_window_connect = 0x0060;
        hdev->le_conn_min_interval = 0x0018;
        hdev->le_conn_max_interval = 0x0028;
        hdev->le_conn_latency = 0x0000;
        hdev->le_supv_timeout = 0x002a;
        hdev->le_def_tx_len = 0x001b;
        hdev->le_def_tx_time = 0x0148;
        hdev->le_max_tx_len = 0x001b;
        hdev->le_max_tx_time = 0x0148;
        hdev->le_max_rx_len = 0x001b;
        hdev->le_max_rx_time = 0x0148;
        hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE;
        hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE;
        hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
        hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
        hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
        hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION;
        hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT;
        hdev->min_le_tx_power = HCI_TX_POWER_INVALID;
        hdev->max_le_tx_power = HCI_TX_POWER_INVALID;

        hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
        hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
        hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
        hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
        hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
        hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE;

        /* default 1.28 sec page scan */
        hdev->def_page_scan_type = PAGE_SCAN_TYPE_STANDARD;
        hdev->def_page_scan_int = 0x0800;
        hdev->def_page_scan_window = 0x0012;

        mutex_init(&hdev->lock);
        mutex_init(&hdev->req_lock);

        ida_init(&hdev->unset_handle_ida);

        INIT_LIST_HEAD(&hdev->mesh_pending);
        INIT_LIST_HEAD(&hdev->mgmt_pending);
        INIT_LIST_HEAD(&hdev->reject_list);
        INIT_LIST_HEAD(&hdev->accept_list);
        INIT_LIST_HEAD(&hdev->uuids);
        INIT_LIST_HEAD(&hdev->link_keys);
        INIT_LIST_HEAD(&hdev->long_term_keys);
        INIT_LIST_HEAD(&hdev->identity_resolving_keys);
        INIT_LIST_HEAD(&hdev->remote_oob_data);
        INIT_LIST_HEAD(&hdev->le_accept_list);
        INIT_LIST_HEAD(&hdev->le_resolv_list);
        INIT_LIST_HEAD(&hdev->le_conn_params);
        INIT_LIST_HEAD(&hdev->pend_le_conns);
        INIT_LIST_HEAD(&hdev->pend_le_reports);
        INIT_LIST_HEAD(&hdev->conn_hash.list);
        INIT_LIST_HEAD(&hdev->adv_instances);
        INIT_LIST_HEAD(&hdev->blocked_keys);
        INIT_LIST_HEAD(&hdev->monitored_devices);

        INIT_LIST_HEAD(&hdev->local_codecs);
        INIT_WORK(&hdev->rx_work, hci_rx_work);
        INIT_WORK(&hdev->cmd_work, hci_cmd_work);
        INIT_WORK(&hdev->tx_work, hci_tx_work);
        INIT_WORK(&hdev->power_on, hci_power_on);
        INIT_WORK(&hdev->error_reset, hci_error_reset);

        hci_cmd_sync_init(hdev);

        INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);

        skb_queue_head_init(&hdev->rx_q);
        skb_queue_head_init(&hdev->cmd_q);
        skb_queue_head_init(&hdev->raw_q);

        init_waitqueue_head(&hdev->req_wait_q);

        INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
        INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);

        hci_devcd_setup(hdev);
        hci_request_setup(hdev);

        hci_init_sysfs(hdev);
        discovery_init(hdev);

        return hdev;
}
EXPORT_SYMBOL(hci_alloc_dev_priv);

/* Free HCI device */
void hci_free_dev(struct hci_dev *hdev)
{
        /* will free via device release */
        put_device(&hdev->dev);
}
EXPORT_SYMBOL(hci_free_dev);

/* Register HCI device */
int hci_register_dev(struct hci_dev *hdev)
{
        int id, error;

        if (!hdev->open || !hdev->close || !hdev->send)
                return -EINVAL;

        /* Do not allow HCI_AMP devices to register at index 0,
         * so the index can be used as the AMP controller ID.
         */
        switch (hdev->dev_type) {
        case HCI_PRIMARY:
                id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL);
                break;
        case HCI_AMP:
                id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1,
                                     GFP_KERNEL);
                break;
        default:
                return -EINVAL;
        }

        if (id < 0)
                return id;

        error = dev_set_name(&hdev->dev, "hci%u", id);
        if (error)
                return error;

        hdev->name = dev_name(&hdev->dev);
        hdev->id = id;

        BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);

        hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name);
        if (!hdev->workqueue) {
                error = -ENOMEM;
                goto err;
        }

        hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI,
                                                      hdev->name);
        if (!hdev->req_workqueue) {
                destroy_workqueue(hdev->workqueue);
                error = -ENOMEM;
                goto err;
        }

        if (!IS_ERR_OR_NULL(bt_debugfs))
                hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);

        error = device_add(&hdev->dev);
        if (error < 0)
                goto err_wqueue;

        hci_leds_init(hdev);

        hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
                                    RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops,
                                    hdev);
        if (hdev->rfkill) {
                if (rfkill_register(hdev->rfkill) < 0) {
                        rfkill_destroy(hdev->rfkill);
                        hdev->rfkill = NULL;
                }
        }

        if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
                hci_dev_set_flag(hdev, HCI_RFKILLED);

        hci_dev_set_flag(hdev, HCI_SETUP);
        hci_dev_set_flag(hdev, HCI_AUTO_OFF);

        if (hdev->dev_type == HCI_PRIMARY) {
                /* Assume BR/EDR support until proven otherwise (such as
                 * through reading supported features during init.
                 */
                hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
        }

        write_lock(&hci_dev_list_lock);
        list_add(&hdev->list, &hci_dev_list);
        write_unlock(&hci_dev_list_lock);

        /* Devices that are marked for raw-only usage are unconfigured
         * and should not be included in normal operation.
         */
        if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
                hci_dev_set_flag(hdev, HCI_UNCONFIGURED);

        /* Mark Remote Wakeup connection flag as supported if driver has wakeup
         * callback.
         */
        if (hdev->wakeup)
                hdev->conn_flags |= HCI_CONN_FLAG_REMOTE_WAKEUP;

        hci_sock_dev_event(hdev, HCI_DEV_REG);
        hci_dev_hold(hdev);

        error = hci_register_suspend_notifier(hdev);
        if (error)
                BT_WARN("register suspend notifier failed error:%d\n", error);

        queue_work(hdev->req_workqueue, &hdev->power_on);

        idr_init(&hdev->adv_monitors_idr);
        msft_register(hdev);

        return id;

err_wqueue:
        debugfs_remove_recursive(hdev->debugfs);
        destroy_workqueue(hdev->workqueue);
        destroy_workqueue(hdev->req_workqueue);
err:
        ida_free(&hci_index_ida, hdev->id);

        return error;
}
EXPORT_SYMBOL(hci_register_dev);

/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
        BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);

        mutex_lock(&hdev->unregister_lock);
        hci_dev_set_flag(hdev, HCI_UNREGISTER);
        mutex_unlock(&hdev->unregister_lock);

        write_lock(&hci_dev_list_lock);
        list_del(&hdev->list);
        write_unlock(&hci_dev_list_lock);

        cancel_work_sync(&hdev->power_on);

        hci_cmd_sync_clear(hdev);

        hci_unregister_suspend_notifier(hdev);

        msft_unregister(hdev);

        hci_dev_do_close(hdev);

        if (!test_bit(HCI_INIT, &hdev->flags) &&
            !hci_dev_test_flag(hdev, HCI_SETUP) &&
            !hci_dev_test_flag(hdev, HCI_CONFIG)) {
                hci_dev_lock(hdev);
                mgmt_index_removed(hdev);
                hci_dev_unlock(hdev);
        }

        /* mgmt_index_removed should take care of emptying the
         * pending list */
        BUG_ON(!list_empty(&hdev->mgmt_pending));

        hci_sock_dev_event(hdev, HCI_DEV_UNREG);

        if (hdev->rfkill) {
                rfkill_unregister(hdev->rfkill);
                rfkill_destroy(hdev->rfkill);
        }

        device_del(&hdev->dev);
        /* Actual cleanup is deferred until hci_release_dev(). */
        hci_dev_put(hdev);
}
EXPORT_SYMBOL(hci_unregister_dev);

/* Release HCI device */
void hci_release_dev(struct hci_dev *hdev)
{
        debugfs_remove_recursive(hdev->debugfs);
        kfree_const(hdev->hw_info);
        kfree_const(hdev->fw_info);

        destroy_workqueue(hdev->workqueue);
        destroy_workqueue(hdev->req_workqueue);

        hci_dev_lock(hdev);
        hci_bdaddr_list_clear(&hdev->reject_list);
        hci_bdaddr_list_clear(&hdev->accept_list);
        hci_uuids_clear(hdev);
        hci_link_keys_clear(hdev);
        hci_smp_ltks_clear(hdev);
        hci_smp_irks_clear(hdev);
        hci_remote_oob_data_clear(hdev);
        hci_adv_instances_clear(hdev);
        hci_adv_monitors_clear(hdev);
        hci_bdaddr_list_clear(&hdev->le_accept_list);
        hci_bdaddr_list_clear(&hdev->le_resolv_list);
        hci_conn_params_clear_all(hdev);
        hci_discovery_filter_clear(hdev);
        hci_blocked_keys_clear(hdev);
        hci_codec_list_clear(&hdev->local_codecs);
        hci_dev_unlock(hdev);

        ida_destroy(&hdev->unset_handle_ida);
        ida_free(&hci_index_ida, hdev->id);
        kfree_skb(hdev->sent_cmd);
        kfree_skb(hdev->req_skb);
        kfree_skb(hdev->recv_event);
        kfree(hdev);
}
EXPORT_SYMBOL(hci_release_dev);

int hci_register_suspend_notifier(struct hci_dev *hdev)
{
        int ret = 0;

        if (!hdev->suspend_notifier.notifier_call &&
            !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
                hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
                ret = register_pm_notifier(&hdev->suspend_notifier);
        }

        return ret;
}

int hci_unregister_suspend_notifier(struct hci_dev *hdev)
{
        int ret = 0;

        if (hdev->suspend_notifier.notifier_call) {
                ret = unregister_pm_notifier(&hdev->suspend_notifier);
                if (!ret)
                        hdev->suspend_notifier.notifier_call = NULL;
        }

        return ret;
}

/* Cancel ongoing command synchronously:
 *
 * - Cancel command timer
 * - Reset command counter
 * - Cancel command request
 */
static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
{
        bt_dev_dbg(hdev, "err 0x%2.2x", err);

        cancel_delayed_work_sync(&hdev->cmd_timer);
        cancel_delayed_work_sync(&hdev->ncmd_timer);
        atomic_set(&hdev->cmd_cnt, 1);

        hci_cmd_sync_cancel_sync(hdev, err);
}

/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
{
        int ret;

        bt_dev_dbg(hdev, "");

        /* Suspend should only act on when powered. */
        if (!hdev_is_powered(hdev) ||
            hci_dev_test_flag(hdev, HCI_UNREGISTER))
                return 0;

        /* If powering down don't attempt to suspend */
        if (mgmt_powering_down(hdev))
                return 0;

        /* Cancel potentially blocking sync operation before suspend */
        hci_cancel_cmd_sync(hdev, EHOSTDOWN);

        hci_req_sync_lock(hdev);
        ret = hci_suspend_sync(hdev);
        hci_req_sync_unlock(hdev);

        hci_clear_wake_reason(hdev);
        mgmt_suspending(hdev, hdev->suspend_state);

        hci_sock_dev_event(hdev, HCI_DEV_SUSPEND);
        return ret;
}
EXPORT_SYMBOL(hci_suspend_dev);

/* Resume HCI device */
int hci_resume_dev(struct hci_dev *hdev)
{
        int ret;

        bt_dev_dbg(hdev, "");

        /* Resume should only act on when powered. */
        if (!hdev_is_powered(hdev) ||
            hci_dev_test_flag(hdev, HCI_UNREGISTER))
                return 0;

        /* If powering down don't attempt to resume */
        if (mgmt_powering_down(hdev))
                return 0;

        hci_req_sync_lock(hdev);
        ret = hci_resume_sync(hdev);
        hci_req_sync_unlock(hdev);

        mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr,
                      hdev->wake_addr_type);

        hci_sock_dev_event(hdev, HCI_DEV_RESUME);
        return ret;
}
EXPORT_SYMBOL(hci_resume_dev);

/* Reset HCI device */
int hci_reset_dev(struct hci_dev *hdev)
{
        static const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 };
        struct sk_buff *skb;

        skb = bt_skb_alloc(3, GFP_ATOMIC);
        if (!skb)
                return -ENOMEM;

        hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
        skb_put_data(skb, hw_err, 3);

        bt_dev_err(hdev, "Injecting HCI hardware error event");

        /* Send Hardware Error to upper stack */
        return hci_recv_frame(hdev, skb);
}
EXPORT_SYMBOL(hci_reset_dev);

/* Receive frame from HCI drivers */
int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
        if (!hdev || (!test_bit(HCI_UP, &hdev->flags)
                      && !test_bit(HCI_INIT, &hdev->flags))) {
                kfree_skb(skb);
                return -ENXIO;
        }

        switch (hci_skb_pkt_type(skb)) {
        case HCI_EVENT_PKT:
                break;
        case HCI_ACLDATA_PKT:
                /* Detect if ISO packet has been sent as ACL */
                if (hci_conn_num(hdev, ISO_LINK)) {
                        __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle);
                        __u8 type;

                        type = hci_conn_lookup_type(hdev, hci_handle(handle));
                        if (type == ISO_LINK)
                                hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
                }
                break;
        case HCI_SCODATA_PKT:
                break;
        case HCI_ISODATA_PKT:
                break;
        default:
                kfree_skb(skb);
                return -EINVAL;
        }

        /* Incoming skb */
        bt_cb(skb)->incoming = 1;

        /* Time stamp */
        __net_timestamp(skb);

        skb_queue_tail(&hdev->rx_q, skb);
        queue_work(hdev->workqueue, &hdev->rx_work);

        return 0;
}
EXPORT_SYMBOL(hci_recv_frame);

/* Receive diagnostic message from HCI drivers */
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
{
        /* Mark as diagnostic packet */
        hci_skb_pkt_type(skb) = HCI_DIAG_PKT;

        /* Time stamp */
        __net_timestamp(skb);

        skb_queue_tail(&hdev->rx_q, skb);
        queue_work(hdev->workqueue, &hdev->rx_work);

        return 0;
}
EXPORT_SYMBOL(hci_recv_diag);

void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...)
{
        va_list vargs;

        va_start(vargs, fmt);
        kfree_const(hdev->hw_info);
        hdev->hw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
        va_end(vargs);
}
EXPORT_SYMBOL(hci_set_hw_info);

void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...)
{
        va_list vargs;

        va_start(vargs, fmt);
        kfree_const(hdev->fw_info);
        hdev->fw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs);
        va_end(vargs);
}
EXPORT_SYMBOL(hci_set_fw_info);

/* ---- Interface to upper protocols ---- */

int hci_register_cb(struct hci_cb *cb)
{
        BT_DBG("%p name %s", cb, cb->name);

        mutex_lock(&hci_cb_list_lock);
        list_add_tail(&cb->list, &hci_cb_list);
        mutex_unlock(&hci_cb_list_lock);

        return 0;
}
EXPORT_SYMBOL(hci_register_cb);

int hci_unregister_cb(struct hci_cb *cb)
{
        BT_DBG("%p name %s", cb, cb->name);

        mutex_lock(&hci_cb_list_lock);
        list_del(&cb->list);
        mutex_unlock(&hci_cb_list_lock);

        return 0;
}
EXPORT_SYMBOL(hci_unregister_cb);

static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
        int err;

        BT_DBG("%s type %d len %d", hdev->name, hci_skb_pkt_type(skb),
               skb->len);

        /* Time stamp */
        __net_timestamp(skb);

        /* Send copy to monitor */
        hci_send_to_monitor(hdev, skb);

        if (atomic_read(&hdev->promisc)) {
                /* Send copy to the sockets */
                hci_send_to_sock(hdev, skb);
        }

        /* Get rid of skb owner, prior to sending to the driver. */
        skb_orphan(skb);

        if (!test_bit(HCI_RUNNING, &hdev->flags)) {
                kfree_skb(skb);
                return -EINVAL;
        }

        err = hdev->send(hdev, skb);
        if (err < 0) {
                bt_dev_err(hdev, "sending frame failed (%d)", err);
                kfree_skb(skb);
                return err;
        }

        return 0;
}

/* Send HCI command */
int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
                 const void *param)
{
        struct sk_buff *skb;

        BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);

        skb = hci_prepare_cmd(hdev, opcode, plen, param);
        if (!skb) {
                bt_dev_err(hdev, "no memory for command");
                return -ENOMEM;
        }

        /* Stand-alone HCI commands must be flagged as
         * single-command requests.
         */
        bt_cb(skb)->hci.req_flags |= HCI_REQ_START;

        skb_queue_tail(&hdev->cmd_q, skb);
        queue_work(hdev->workqueue, &hdev->cmd_work);

        return 0;
}

int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
                   const void *param)
{
        struct sk_buff *skb;

        if (hci_opcode_ogf(opcode) != 0x3f) {
                /* A controller receiving a command shall respond with either
                 * a Command Status Event or a Command Complete Event.
                 * Therefore, all standard HCI commands must be sent via the
                 * standard API, using hci_send_cmd or hci_cmd_sync helpers.
                 * Some vendors do not comply with this rule for vendor-specific
                 * commands and do not return any event. We want to support
                 * unresponded commands for such cases only.
                 */
                bt_dev_err(hdev, "unresponded command not supported");
                return -EINVAL;
        }

        skb = hci_prepare_cmd(hdev, opcode, plen, param);
        if (!skb) {
                bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
                           opcode);
                return -ENOMEM;
        }

        hci_send_frame(hdev, skb);

        return 0;
}
EXPORT_SYMBOL(__hci_cmd_send);

/* Get data from the previously sent command */
static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode)
{
        struct hci_command_hdr *hdr;

        if (!skb || skb->len < HCI_COMMAND_HDR_SIZE)
                return NULL;

        hdr = (void *)skb->data;

        if (hdr->opcode != cpu_to_le16(opcode))
                return NULL;

        return skb->data + HCI_COMMAND_HDR_SIZE;
}

/* Get data from the previously sent command */
void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
{
        void *data;

        /* Check if opcode matches last sent command */
        data = hci_cmd_data(hdev->sent_cmd, opcode);
        if (!data)
                /* Check if opcode matches last request */
                data = hci_cmd_data(hdev->req_skb, opcode);

        return data;
}

/* Get data from last received event */
void *hci_recv_event_data(struct hci_dev *hdev, __u8 event)
{
        struct hci_event_hdr *hdr;
        int offset;

        if (!hdev->recv_event)
                return NULL;

        hdr = (void *)hdev->recv_event->data;
        offset = sizeof(*hdr);

        if (hdr->evt != event) {
                /* In case of LE metaevent check the subevent match */
                if (hdr->evt == HCI_EV_LE_META) {
                        struct hci_ev_le_meta *ev;

                        ev = (void *)hdev->recv_event->data + offset;
                        offset += sizeof(*ev);
                        if (ev->subevent == event)
                                goto found;
                }
                return NULL;
        }

found:
        bt_dev_dbg(hdev, "event 0x%2.2x", event);

        return hdev->recv_event->data + offset;
}

/* Send ACL data */
static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
{
        struct hci_acl_hdr *hdr;
        int len = skb->len;

        skb_push(skb, HCI_ACL_HDR_SIZE);
        skb_reset_transport_header(skb);
        hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
        hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
        hdr->dlen   = cpu_to_le16(len);
}

static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
                          struct sk_buff *skb, __u16 flags)
{
        struct hci_conn *conn = chan->conn;
        struct hci_dev *hdev = conn->hdev;
        struct sk_buff *list;

        skb->len = skb_headlen(skb);
        skb->data_len = 0;

        hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;

        switch (hdev->dev_type) {
        case HCI_PRIMARY:
                hci_add_acl_hdr(skb, conn->handle, flags);
                break;
        case HCI_AMP:
                hci_add_acl_hdr(skb, chan->handle, flags);
                break;
        default:
                bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
                return;
        }

        list = skb_shinfo(skb)->frag_list;
        if (!list) {
                /* Non fragmented */
                BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len);

                skb_queue_tail(queue, skb);
        } else {
                /* Fragmented */
                BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);

                skb_shinfo(skb)->frag_list = NULL;

                /* Queue all fragments atomically. We need to use spin_lock_bh
                 * here because of 6LoWPAN links, as there this function is
                 * called from softirq and using normal spin lock could cause
                 * deadlocks.
                 */
                spin_lock_bh(&queue->lock);

                __skb_queue_tail(queue, skb);

                flags &= ~ACL_START;
                flags |= ACL_CONT;
                do {
                        skb = list; list = list->next;

                        hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
                        hci_add_acl_hdr(skb, conn->handle, flags);

                        BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);

                        __skb_queue_tail(queue, skb);
                } while (list);

                spin_unlock_bh(&queue->lock);
        }
}

void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags)
{
        struct hci_dev *hdev = chan->conn->hdev;

        BT_DBG("%s chan %p flags 0x%4.4x", hdev->name, chan, flags);

        hci_queue_acl(chan, &chan->data_q, skb, flags);

        queue_work(hdev->workqueue, &hdev->tx_work);
}

/* Send SCO data */
void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
{
        struct hci_dev *hdev = conn->hdev;
        struct hci_sco_hdr hdr;

        BT_DBG("%s len %d", hdev->name, skb->len);

        hdr.handle = cpu_to_le16(conn->handle);
        hdr.dlen   = skb->len;

        skb_push(skb, HCI_SCO_HDR_SIZE);
        skb_reset_transport_header(skb);
        memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);

        hci_skb_pkt_type(skb) = HCI_SCODATA_PKT;

        skb_queue_tail(&conn->data_q, skb);
        queue_work(hdev->workqueue, &hdev->tx_work);
}

/* Send ISO data */
static void hci_add_iso_hdr(struct sk_buff *skb, __u16 handle, __u8 flags)
{
        struct hci_iso_hdr *hdr;
        int len = skb->len;

        skb_push(skb, HCI_ISO_HDR_SIZE);
        skb_reset_transport_header(skb);
        hdr = (struct hci_iso_hdr *)skb_transport_header(skb);
        hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
        hdr->dlen   = cpu_to_le16(len);
}

static void hci_queue_iso(struct hci_conn *conn, struct sk_buff_head *queue,
                          struct sk_buff *skb)
{
        struct hci_dev *hdev = conn->hdev;
        struct sk_buff *list;
        __u16 flags;

        skb->len = skb_headlen(skb);
        skb->data_len = 0;

        hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;

        list = skb_shinfo(skb)->frag_list;

        flags = hci_iso_flags_pack(list ? ISO_START : ISO_SINGLE, 0x00);
        hci_add_iso_hdr(skb, conn->handle, flags);

        if (!list) {
                /* Non fragmented */
                BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len);

                skb_queue_tail(queue, skb);
        } else {
                /* Fragmented */
                BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);

                skb_shinfo(skb)->frag_list = NULL;

                __skb_queue_tail(queue, skb);

                do {
                        skb = list; list = list->next;

                        hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
                        flags = hci_iso_flags_pack(list ? ISO_CONT : ISO_END,
                                                   0x00);
                        hci_add_iso_hdr(skb, conn->handle, flags);

                        BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);

                        __skb_queue_tail(queue, skb);
                } while (list);
        }
}

void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb)
{
        struct hci_dev *hdev = conn->hdev;

        BT_DBG("%s len %d", hdev->name, skb->len);

        hci_queue_iso(conn, &conn->data_q, skb);

        queue_work(hdev->workqueue, &hdev->tx_work);
}

/* ---- HCI TX task (outgoing data) ---- */

/* HCI Connection scheduler */
static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
{
        struct hci_dev *hdev;
        int cnt, q;

        if (!conn) {
                *quote = 0;
                return;
        }

        hdev = conn->hdev;

        switch (conn->type) {
        case ACL_LINK:
                cnt = hdev->acl_cnt;
                break;
        case AMP_LINK:
                cnt = hdev->block_cnt;
                break;
        case SCO_LINK:
        case ESCO_LINK:
                cnt = hdev->sco_cnt;
                break;
        case LE_LINK:
                cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
                break;
        case ISO_LINK:
                cnt = hdev->iso_mtu ? hdev->iso_cnt :
                        hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
                break;
        default:
                cnt = 0;
                bt_dev_err(hdev, "unknown link type %d", conn->type);
        }

        q = cnt / num;
        *quote = q ? q : 1;
}

static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
                                     int *quote)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn *conn = NULL, *c;
        unsigned int num = 0, min = ~0;

        /* We don't have to lock device here. Connections are always
         * added and removed with TX task disabled. */

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != type || skb_queue_empty(&c->data_q))
                        continue;

                if (c->state != BT_CONNECTED && c->state != BT_CONFIG)
                        continue;

                num++;

                if (c->sent < min) {
                        min  = c->sent;
                        conn = c;
                }

                if (hci_conn_num(hdev, type) == num)
                        break;
        }

        rcu_read_unlock();

        hci_quote_sent(conn, num, quote);

        BT_DBG("conn %p quote %d", conn, *quote);
        return conn;
}

static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn *c;

        bt_dev_err(hdev, "link tx timeout");

        rcu_read_lock();

        /* Kill stalled connections */
        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == type && c->sent) {
                        bt_dev_err(hdev, "killing stalled connection %pMR",
                                   &c->dst);
                        /* hci_disconnect might sleep, so, we have to release
                         * the RCU read lock before calling it.
                         */
                        rcu_read_unlock();
                        hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
                        rcu_read_lock();
                }
        }

        rcu_read_unlock();
}

static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
                                      int *quote)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_chan *chan = NULL;
        unsigned int num = 0, min = ~0, cur_prio = 0;
        struct hci_conn *conn;
        int conn_num = 0;

        BT_DBG("%s", hdev->name);

        rcu_read_lock();

        list_for_each_entry_rcu(conn, &h->list, list) {
                struct hci_chan *tmp;

                if (conn->type != type)
                        continue;

                if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
                        continue;

                conn_num++;

                list_for_each_entry_rcu(tmp, &conn->chan_list, list) {
                        struct sk_buff *skb;

                        if (skb_queue_empty(&tmp->data_q))
                                continue;

                        skb = skb_peek(&tmp->data_q);
                        if (skb->priority < cur_prio)
                                continue;

                        if (skb->priority > cur_prio) {
                                num = 0;
                                min = ~0;
                                cur_prio = skb->priority;
                        }

                        num++;

                        if (conn->sent < min) {
                                min  = conn->sent;
                                chan = tmp;
                        }
                }

                if (hci_conn_num(hdev, type) == conn_num)
                        break;
        }

        rcu_read_unlock();

        if (!chan)
                return NULL;

        hci_quote_sent(chan->conn, num, quote);

        BT_DBG("chan %p quote %d", chan, *quote);
        return chan;
}

static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn *conn;
        int num = 0;

        BT_DBG("%s", hdev->name);

        rcu_read_lock();

        list_for_each_entry_rcu(conn, &h->list, list) {
                struct hci_chan *chan;

                if (conn->type != type)
                        continue;

                if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
                        continue;

                num++;

                list_for_each_entry_rcu(chan, &conn->chan_list, list) {
                        struct sk_buff *skb;

                        if (chan->sent) {
                                chan->sent = 0;
                                continue;
                        }

                        if (skb_queue_empty(&chan->data_q))
                                continue;

                        skb = skb_peek(&chan->data_q);
                        if (skb->priority >= HCI_PRIO_MAX - 1)
                                continue;

                        skb->priority = HCI_PRIO_MAX - 1;

                        BT_DBG("chan %p skb %p promoted to %d", chan, skb,
                               skb->priority);
                }

                if (hci_conn_num(hdev, type) == num)
                        break;
        }

        rcu_read_unlock();

}

static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
{
        /* Calculate count of blocks used by this packet */
        return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
}

static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
        unsigned long last_tx;

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                return;

        switch (type) {
        case LE_LINK:
                last_tx = hdev->le_last_tx;
                break;
        default:
                last_tx = hdev->acl_last_tx;
                break;
        }

        /* tx timeout must be longer than maximum link supervision timeout
         * (40.9 seconds)
         */
        if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT))
                hci_link_tx_to(hdev, type);
}

/* Schedule SCO */
static void hci_sched_sco(struct hci_dev *hdev)
{
        struct hci_conn *conn;
        struct sk_buff *skb;
        int quote;

        BT_DBG("%s", hdev->name);

        if (!hci_conn_num(hdev, SCO_LINK))
                return;

        while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, &quote))) {
                while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
                        BT_DBG("skb %p len %d", skb, skb->len);
                        hci_send_frame(hdev, skb);

                        conn->sent++;
                        if (conn->sent == ~0)
                                conn->sent = 0;
                }
        }
}

static void hci_sched_esco(struct hci_dev *hdev)
{
        struct hci_conn *conn;
        struct sk_buff *skb;
        int quote;

        BT_DBG("%s", hdev->name);

        if (!hci_conn_num(hdev, ESCO_LINK))
                return;

        while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK,
                                                     &quote))) {
                while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
                        BT_DBG("skb %p len %d", skb, skb->len);
                        hci_send_frame(hdev, skb);

                        conn->sent++;
                        if (conn->sent == ~0)
                                conn->sent = 0;
                }
        }
}

static void hci_sched_acl_pkt(struct hci_dev *hdev)
{
        unsigned int cnt = hdev->acl_cnt;
        struct hci_chan *chan;
        struct sk_buff *skb;
        int quote;

        __check_timeout(hdev, cnt, ACL_LINK);

        while (hdev->acl_cnt &&
               (chan = hci_chan_sent(hdev, ACL_LINK, &quote))) {
                u32 priority = (skb_peek(&chan->data_q))->priority;
                while (quote-- && (skb = skb_peek(&chan->data_q))) {
                        BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
                               skb->len, skb->priority);

                        /* Stop if priority has changed */
                        if (skb->priority < priority)
                                break;

                        skb = skb_dequeue(&chan->data_q);

                        hci_conn_enter_active_mode(chan->conn,
                                                   bt_cb(skb)->force_active);

                        hci_send_frame(hdev, skb);
                        hdev->acl_last_tx = jiffies;

                        hdev->acl_cnt--;
                        chan->sent++;
                        chan->conn->sent++;

                        /* Send pending SCO packets right away */
                        hci_sched_sco(hdev);
                        hci_sched_esco(hdev);
                }
        }

        if (cnt != hdev->acl_cnt)
                hci_prio_recalculate(hdev, ACL_LINK);
}

static void hci_sched_acl_blk(struct hci_dev *hdev)
{
        unsigned int cnt = hdev->block_cnt;
        struct hci_chan *chan;
        struct sk_buff *skb;
        int quote;
        u8 type;

        BT_DBG("%s", hdev->name);

        if (hdev->dev_type == HCI_AMP)
                type = AMP_LINK;
        else
                type = ACL_LINK;

        __check_timeout(hdev, cnt, type);

        while (hdev->block_cnt > 0 &&
               (chan = hci_chan_sent(hdev, type, &quote))) {
                u32 priority = (skb_peek(&chan->data_q))->priority;
                while (quote > 0 && (skb = skb_peek(&chan->data_q))) {
                        int blocks;

                        BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
                               skb->len, skb->priority);

                        /* Stop if priority has changed */
                        if (skb->priority < priority)
                                break;

                        skb = skb_dequeue(&chan->data_q);

                        blocks = __get_blocks(hdev, skb);
                        if (blocks > hdev->block_cnt)
                                return;

                        hci_conn_enter_active_mode(chan->conn,
                                                   bt_cb(skb)->force_active);

                        hci_send_frame(hdev, skb);
                        hdev->acl_last_tx = jiffies;

                        hdev->block_cnt -= blocks;
                        quote -= blocks;

                        chan->sent += blocks;
                        chan->conn->sent += blocks;
                }
        }

        if (cnt != hdev->block_cnt)
                hci_prio_recalculate(hdev, type);
}

static void hci_sched_acl(struct hci_dev *hdev)
{
        BT_DBG("%s", hdev->name);

        /* No ACL link over BR/EDR controller */
        if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY)
                return;

        /* No AMP link over AMP controller */
        if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP)
                return;

        switch (hdev->flow_ctl_mode) {
        case HCI_FLOW_CTL_MODE_PACKET_BASED:
                hci_sched_acl_pkt(hdev);
                break;

        case HCI_FLOW_CTL_MODE_BLOCK_BASED:
                hci_sched_acl_blk(hdev);
                break;
        }
}

static void hci_sched_le(struct hci_dev *hdev)
{
        struct hci_chan *chan;
        struct sk_buff *skb;
        int quote, cnt, tmp;

        BT_DBG("%s", hdev->name);

        if (!hci_conn_num(hdev, LE_LINK))
                return;

        cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;

        __check_timeout(hdev, cnt, LE_LINK);

        tmp = cnt;
        while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
                u32 priority = (skb_peek(&chan->data_q))->priority;
                while (quote-- && (skb = skb_peek(&chan->data_q))) {
                        BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
                               skb->len, skb->priority);

                        /* Stop if priority has changed */
                        if (skb->priority < priority)
                                break;

                        skb = skb_dequeue(&chan->data_q);

                        hci_send_frame(hdev, skb);
                        hdev->le_last_tx = jiffies;

                        cnt--;
                        chan->sent++;
                        chan->conn->sent++;

                        /* Send pending SCO packets right away */
                        hci_sched_sco(hdev);
                        hci_sched_esco(hdev);
                }
        }

        if (hdev->le_pkts)
                hdev->le_cnt = cnt;
        else
                hdev->acl_cnt = cnt;

        if (cnt != tmp)
                hci_prio_recalculate(hdev, LE_LINK);
}

/* Schedule CIS */
static void hci_sched_iso(struct hci_dev *hdev)
{
        struct hci_conn *conn;
        struct sk_buff *skb;
        int quote, *cnt;

        BT_DBG("%s", hdev->name);

        if (!hci_conn_num(hdev, ISO_LINK))
                return;

        cnt = hdev->iso_pkts ? &hdev->iso_cnt :
                hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt;
        while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, &quote))) {
                while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
                        BT_DBG("skb %p len %d", skb, skb->len);
                        hci_send_frame(hdev, skb);

                        conn->sent++;
                        if (conn->sent == ~0)
                                conn->sent = 0;
                        (*cnt)--;
                }
        }
}

static void hci_tx_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, tx_work);
        struct sk_buff *skb;

        BT_DBG("%s acl %d sco %d le %d iso %d", hdev->name, hdev->acl_cnt,
               hdev->sco_cnt, hdev->le_cnt, hdev->iso_cnt);

        if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                /* Schedule queues and send stuff to HCI driver */
                hci_sched_sco(hdev);
                hci_sched_esco(hdev);
                hci_sched_iso(hdev);
                hci_sched_acl(hdev);
                hci_sched_le(hdev);
        }

        /* Send next queued raw (unknown type) packet */
        while ((skb = skb_dequeue(&hdev->raw_q)))
                hci_send_frame(hdev, skb);
}

/* ----- HCI RX task (incoming data processing) ----- */

/* ACL data packet */
static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct hci_acl_hdr *hdr = (void *) skb->data;
        struct hci_conn *conn;
        __u16 handle, flags;

        skb_pull(skb, HCI_ACL_HDR_SIZE);

        handle = __le16_to_cpu(hdr->handle);
        flags  = hci_flags(handle);
        handle = hci_handle(handle);

        BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
               handle, flags);

        hdev->stat.acl_rx++;

        hci_dev_lock(hdev);
        conn = hci_conn_hash_lookup_handle(hdev, handle);
        hci_dev_unlock(hdev);

        if (conn) {
                hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF);

                /* Send to upper protocol */
                l2cap_recv_acldata(conn, skb, flags);
                return;
        } else {
                bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
                           handle);
        }

        kfree_skb(skb);
}

/* SCO data packet */
static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct hci_sco_hdr *hdr = (void *) skb->data;
        struct hci_conn *conn;
        __u16 handle, flags;

        skb_pull(skb, HCI_SCO_HDR_SIZE);

        handle = __le16_to_cpu(hdr->handle);
        flags  = hci_flags(handle);
        handle = hci_handle(handle);

        BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
               handle, flags);

        hdev->stat.sco_rx++;

        hci_dev_lock(hdev);
        conn = hci_conn_hash_lookup_handle(hdev, handle);
        hci_dev_unlock(hdev);

        if (conn) {
                /* Send to upper protocol */
                hci_skb_pkt_status(skb) = flags & 0x03;
                sco_recv_scodata(conn, skb);
                return;
        } else {
                bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d",
                                       handle);
        }

        kfree_skb(skb);
}

static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct hci_iso_hdr *hdr;
        struct hci_conn *conn;
        __u16 handle, flags;

        hdr = skb_pull_data(skb, sizeof(*hdr));
        if (!hdr) {
                bt_dev_err(hdev, "ISO packet too small");
                goto drop;
        }

        handle = __le16_to_cpu(hdr->handle);
        flags  = hci_flags(handle);
        handle = hci_handle(handle);

        bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
                   handle, flags);

        hci_dev_lock(hdev);
        conn = hci_conn_hash_lookup_handle(hdev, handle);
        hci_dev_unlock(hdev);

        if (!conn) {
                bt_dev_err(hdev, "ISO packet for unknown connection handle %d",
                           handle);
                goto drop;
        }

        /* Send to upper protocol */
        iso_recv(conn, skb, flags);
        return;

drop:
        kfree_skb(skb);
}

static bool hci_req_is_complete(struct hci_dev *hdev)
{
        struct sk_buff *skb;

        skb = skb_peek(&hdev->cmd_q);
        if (!skb)
                return true;

        return (bt_cb(skb)->hci.req_flags & HCI_REQ_START);
}

static void hci_resend_last(struct hci_dev *hdev)
{
        struct hci_command_hdr *sent;
        struct sk_buff *skb;
        u16 opcode;

        if (!hdev->sent_cmd)
                return;

        sent = (void *) hdev->sent_cmd->data;
        opcode = __le16_to_cpu(sent->opcode);
        if (opcode == HCI_OP_RESET)
                return;

        skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
        if (!skb)
                return;

        skb_queue_head(&hdev->cmd_q, skb);
        queue_work(hdev->workqueue, &hdev->cmd_work);
}

void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
                          hci_req_complete_t *req_complete,
                          hci_req_complete_skb_t *req_complete_skb)
{
        struct sk_buff *skb;
        unsigned long flags;

        BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);

        /* If the completed command doesn't match the last one that was
         * sent we need to do special handling of it.
         */
        if (!hci_sent_cmd_data(hdev, opcode)) {
                /* Some CSR based controllers generate a spontaneous
                 * reset complete event during init and any pending
                 * command will never be completed. In such a case we
                 * need to resend whatever was the last sent
                 * command.
                 */
                if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
                        hci_resend_last(hdev);

                return;
        }

        /* If we reach this point this event matches the last command sent */
        hci_dev_clear_flag(hdev, HCI_CMD_PENDING);

        /* If the command succeeded and there's still more commands in
         * this request the request is not yet complete.
         */
        if (!status && !hci_req_is_complete(hdev))
                return;

        skb = hdev->req_skb;

        /* If this was the last command in a request the complete
         * callback would be found in hdev->req_skb instead of the
         * command queue (hdev->cmd_q).
         */
        if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) {
                *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
                return;
        }

        if (skb && bt_cb(skb)->hci.req_complete) {
                *req_complete = bt_cb(skb)->hci.req_complete;
                return;
        }

        /* Remove all pending commands belonging to this request */
        spin_lock_irqsave(&hdev->cmd_q.lock, flags);
        while ((skb = __skb_dequeue(&hdev->cmd_q))) {
                if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) {
                        __skb_queue_head(&hdev->cmd_q, skb);
                        break;
                }

                if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
                        *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
                else
                        *req_complete = bt_cb(skb)->hci.req_complete;
                dev_kfree_skb_irq(skb);
        }
        spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
}

static void hci_rx_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
        struct sk_buff *skb;

        BT_DBG("%s", hdev->name);

        /* The kcov_remote functions used for collecting packet parsing
         * coverage information from this background thread and associate
         * the coverage with the syscall's thread which originally injected
         * the packet. This helps fuzzing the kernel.
         */
        for (; (skb = skb_dequeue(&hdev->rx_q)); kcov_remote_stop()) {
                kcov_remote_start_common(skb_get_kcov_handle(skb));

                /* Send copy to monitor */
                hci_send_to_monitor(hdev, skb);

                if (atomic_read(&hdev->promisc)) {
                        /* Send copy to the sockets */
                        hci_send_to_sock(hdev, skb);
                }

                /* If the device has been opened in HCI_USER_CHANNEL,
                 * the userspace has exclusive access to device.
                 * When device is HCI_INIT, we still need to process
                 * the data packets to the driver in order
                 * to complete its setup().
                 */
                if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
                    !test_bit(HCI_INIT, &hdev->flags)) {
                        kfree_skb(skb);
                        continue;
                }

                if (test_bit(HCI_INIT, &hdev->flags)) {
                        /* Don't process data packets in this states. */
                        switch (hci_skb_pkt_type(skb)) {
                        case HCI_ACLDATA_PKT:
                        case HCI_SCODATA_PKT:
                        case HCI_ISODATA_PKT:
                                kfree_skb(skb);
                                continue;
                        }
                }

                /* Process frame */
                switch (hci_skb_pkt_type(skb)) {
                case HCI_EVENT_PKT:
                        BT_DBG("%s Event packet", hdev->name);
                        hci_event_packet(hdev, skb);
                        break;

                case HCI_ACLDATA_PKT:
                        BT_DBG("%s ACL data packet", hdev->name);
                        hci_acldata_packet(hdev, skb);
                        break;

                case HCI_SCODATA_PKT:
                        BT_DBG("%s SCO data packet", hdev->name);
                        hci_scodata_packet(hdev, skb);
                        break;

                case HCI_ISODATA_PKT:
                        BT_DBG("%s ISO data packet", hdev->name);
                        hci_isodata_packet(hdev, skb);
                        break;

                default:
                        kfree_skb(skb);
                        break;
                }
        }
}

static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
{
        int err;

        bt_dev_dbg(hdev, "skb %p", skb);

        kfree_skb(hdev->sent_cmd);

        hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
        if (!hdev->sent_cmd) {
                skb_queue_head(&hdev->cmd_q, skb);
                queue_work(hdev->workqueue, &hdev->cmd_work);
                return;
        }

        err = hci_send_frame(hdev, skb);
        if (err < 0) {
                hci_cmd_sync_cancel_sync(hdev, -err);
                return;
        }

        if (hci_req_status_pend(hdev) &&
            !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) {
                kfree_skb(hdev->req_skb);
                hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
        }

        atomic_dec(&hdev->cmd_cnt);
}

static void hci_cmd_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work);
        struct sk_buff *skb;

        BT_DBG("%s cmd_cnt %d cmd queued %d", hdev->name,
               atomic_read(&hdev->cmd_cnt), skb_queue_len(&hdev->cmd_q));

        /* Send queued commands */
        if (atomic_read(&hdev->cmd_cnt)) {
                skb = skb_dequeue(&hdev->cmd_q);
                if (!skb)
                        return;

                hci_send_cmd_sync(hdev, skb);

                rcu_read_lock();
                if (test_bit(HCI_RESET, &hdev->flags) ||
                    hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
                        cancel_delayed_work(&hdev->cmd_timer);
                else
                        queue_delayed_work(hdev->workqueue, &hdev->cmd_timer,
                                           HCI_CMD_TIMEOUT);
                rcu_read_unlock();
        }
}
























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
  Red Black Trees
  (C) 1999  Andrea Arcangeli <andrea@suse.de>
  (C) 2002  David Woodhouse <dwmw2@infradead.org>
  (C) 2012  Michel Lespinasse <walken@google.com>


  linux/include/linux/rbtree_augmented.h
*/

#ifndef _LINUX_RBTREE_AUGMENTED_H
#define _LINUX_RBTREE_AUGMENTED_H

#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/rcupdate.h>

/*
 * Please note - only struct rb_augment_callbacks and the prototypes for
 * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
 * The rest are implementation details you are not expected to depend on.
 *
 * See Documentation/core-api/rbtree.rst for documentation and samples.
 */

struct rb_augment_callbacks {
        void (*propagate)(struct rb_node *node, struct rb_node *stop);
        void (*copy)(struct rb_node *old, struct rb_node *new);
        void (*rotate)(struct rb_node *old, struct rb_node *new);
};

extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new));

/*
 * Fixup the rbtree and update the augmented information when rebalancing.
 *
 * On insertion, the user must update the augmented information on the path
 * leading to the inserted node, then call rb_link_node() as usual and
 * rb_insert_augmented() instead of the usual rb_insert_color() call.
 * If rb_insert_augmented() rebalances the rbtree, it will callback into
 * a user provided function to update the augmented information on the
 * affected subtrees.
 */
static inline void
rb_insert_augmented(struct rb_node *node, struct rb_root *root,
                    const struct rb_augment_callbacks *augment)
{
        __rb_insert_augmented(node, root, augment->rotate);
}

static inline void
rb_insert_augmented_cached(struct rb_node *node,
                           struct rb_root_cached *root, bool newleft,
                           const struct rb_augment_callbacks *augment)
{
        if (newleft)
                root->rb_leftmost = node;
        rb_insert_augmented(node, &root->rb_root, augment);
}

static __always_inline struct rb_node *
rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
                        bool (*less)(struct rb_node *, const struct rb_node *),
                        const struct rb_augment_callbacks *augment)
{
        struct rb_node **link = &tree->rb_root.rb_node;
        struct rb_node *parent = NULL;
        bool leftmost = true;

        while (*link) {
                parent = *link;
                if (less(node, parent)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
                        leftmost = false;
                }
        }

        rb_link_node(node, parent, link);
        augment->propagate(parent, NULL); /* suboptimal */
        rb_insert_augmented_cached(node, tree, leftmost, augment);

        return leftmost ? node : NULL;
}

/*
 * Template for declaring augmented rbtree callbacks (generic case)
 *
 * RBSTATIC:    'static' or empty
 * RBNAME:      name of the rb_augment_callbacks structure
 * RBSTRUCT:    struct type of the tree nodes
 * RBFIELD:     name of struct rb_node field within RBSTRUCT
 * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
 * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
 */

#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                \
                             RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE)        \
static inline void                                                        \
RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)                \
{                                                                        \
        while (rb != stop) {                                                \
                RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);        \
                if (RBCOMPUTE(node, true))                                \
                        break;                                                \
                rb = rb_parent(&node->RBFIELD);                                \
        }                                                                \
}                                                                        \
static inline void                                                        \
RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)                \
{                                                                        \
        RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);                \
        RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);                \
        new->RBAUGMENTED = old->RBAUGMENTED;                                \
}                                                                        \
static void                                                                \
RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)        \
{                                                                        \
        RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);                \
        RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);                \
        new->RBAUGMENTED = old->RBAUGMENTED;                                \
        RBCOMPUTE(old, false);                                                \
}                                                                        \
RBSTATIC const struct rb_augment_callbacks RBNAME = {                        \
        .propagate = RBNAME ## _propagate,                                \
        .copy = RBNAME ## _copy,                                        \
        .rotate = RBNAME ## _rotate                                        \
};

/*
 * Template for declaring augmented rbtree callbacks,
 * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
 *
 * RBSTATIC:    'static' or empty
 * RBNAME:      name of the rb_augment_callbacks structure
 * RBSTRUCT:    struct type of the tree nodes
 * RBFIELD:     name of struct rb_node field within RBSTRUCT
 * RBTYPE:      type of the RBAUGMENTED field
 * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
 * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
 */

#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,              \
                                 RBTYPE, RBAUGMENTED, RBCOMPUTE)              \
static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)              \
{                                                                              \
        RBSTRUCT *child;                                                      \
        RBTYPE max = RBCOMPUTE(node);                                              \
        if (node->RBFIELD.rb_left) {                                              \
                child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
                if (child->RBAUGMENTED > max)                                      \
                        max = child->RBAUGMENTED;                              \
        }                                                                      \
        if (node->RBFIELD.rb_right) {                                              \
                child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
                if (child->RBAUGMENTED > max)                                      \
                        max = child->RBAUGMENTED;                              \
        }                                                                      \
        if (exit && node->RBAUGMENTED == max)                                      \
                return true;                                                      \
        node->RBAUGMENTED = max;                                              \
        return false;                                                              \
}                                                                              \
RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,                                              \
                     RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)


#define        RB_RED                0
#define        RB_BLACK        1

#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))

#define __rb_color(pc)     ((pc) & 1)
#define __rb_is_black(pc)  __rb_color(pc)
#define __rb_is_red(pc)    (!__rb_color(pc))
#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)

static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
{
        rb->__rb_parent_color = rb_color(rb) + (unsigned long)p;
}

static inline void rb_set_parent_color(struct rb_node *rb,
                                       struct rb_node *p, int color)
{
        rb->__rb_parent_color = (unsigned long)p + color;
}

static inline void
__rb_change_child(struct rb_node *old, struct rb_node *new,
                  struct rb_node *parent, struct rb_root *root)
{
        if (parent) {
                if (parent->rb_left == old)
                        WRITE_ONCE(parent->rb_left, new);
                else
                        WRITE_ONCE(parent->rb_right, new);
        } else
                WRITE_ONCE(root->rb_node, new);
}

static inline void
__rb_change_child_rcu(struct rb_node *old, struct rb_node *new,
                      struct rb_node *parent, struct rb_root *root)
{
        if (parent) {
                if (parent->rb_left == old)
                        rcu_assign_pointer(parent->rb_left, new);
                else
                        rcu_assign_pointer(parent->rb_right, new);
        } else
                rcu_assign_pointer(root->rb_node, new);
}

extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
        void (*augment_rotate)(struct rb_node *old, struct rb_node *new));

static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                     const struct rb_augment_callbacks *augment)
{
        struct rb_node *child = node->rb_right;
        struct rb_node *tmp = node->rb_left;
        struct rb_node *parent, *rebalance;
        unsigned long pc;

        if (!tmp) {
                /*
                 * Case 1: node to erase has no more than 1 child (easy!)
                 *
                 * Note that if there is one child it must be red due to 5)
                 * and node must be black due to 4). We adjust colors locally
                 * so as to bypass __rb_erase_color() later on.
                 */
                pc = node->__rb_parent_color;
                parent = __rb_parent(pc);
                __rb_change_child(node, child, parent, root);
                if (child) {
                        child->__rb_parent_color = pc;
                        rebalance = NULL;
                } else
                        rebalance = __rb_is_black(pc) ? parent : NULL;
                tmp = parent;
        } else if (!child) {
                /* Still case 1, but this time the child is node->rb_left */
                tmp->__rb_parent_color = pc = node->__rb_parent_color;
                parent = __rb_parent(pc);
                __rb_change_child(node, tmp, parent, root);
                rebalance = NULL;
                tmp = parent;
        } else {
                struct rb_node *successor = child, *child2;

                tmp = child->rb_left;
                if (!tmp) {
                        /*
                         * Case 2: node's successor is its right child
                         *
                         *    (n)          (s)
                         *    / \          / \
                         *  (x) (s)  ->  (x) (c)
                         *        \
                         *        (c)
                         */
                        parent = successor;
                        child2 = successor->rb_right;

                        augment->copy(node, successor);
                } else {
                        /*
                         * Case 3: node's successor is leftmost under
                         * node's right child subtree
                         *
                         *    (n)          (s)
                         *    / \          / \
                         *  (x) (y)  ->  (x) (y)
                         *      /            /
                         *    (p)          (p)
                         *    /            /
                         *  (s)          (c)
                         *    \
                         *    (c)
                         */
                        do {
                                parent = successor;
                                successor = tmp;
                                tmp = tmp->rb_left;
                        } while (tmp);
                        child2 = successor->rb_right;
                        WRITE_ONCE(parent->rb_left, child2);
                        WRITE_ONCE(successor->rb_right, child);
                        rb_set_parent(child, successor);

                        augment->copy(node, successor);
                        augment->propagate(parent, successor);
                }

                tmp = node->rb_left;
                WRITE_ONCE(successor->rb_left, tmp);
                rb_set_parent(tmp, successor);

                pc = node->__rb_parent_color;
                tmp = __rb_parent(pc);
                __rb_change_child(node, successor, tmp, root);

                if (child2) {
                        rb_set_parent_color(child2, parent, RB_BLACK);
                        rebalance = NULL;
                } else {
                        rebalance = rb_is_black(successor) ? parent : NULL;
                }
                successor->__rb_parent_color = pc;
                tmp = successor;
        }

        augment->propagate(tmp, NULL);
        return rebalance;
}

static __always_inline void
rb_erase_augmented(struct rb_node *node, struct rb_root *root,
                   const struct rb_augment_callbacks *augment)
{
        struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
        if (rebalance)
                __rb_erase_color(rebalance, root, augment->rotate);
}

static __always_inline void
rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root,
                          const struct rb_augment_callbacks *augment)
{
        if (root->rb_leftmost == node)
                root->rb_leftmost = rb_next(node);
        rb_erase_augmented(node, &root->rb_root, augment);
}

#endif        /* _LINUX_RBTREE_AUGMENTED_H */





































































































































































































































































































    4 
























































































































































































































































    4 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net/sched/sch_api.c        Packet scheduler API.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 * Fixes:
 *
 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
#include <linux/slab.h>
#include <linux/hashtable.h>

#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/tc_wrapper.h>

#include <trace/events/qdisc.h>

/*

   Short review.
   -------------

   This file consists of two interrelated parts:

   1. queueing disciplines manager frontend.
   2. traffic classes manager frontend.

   Generally, queueing discipline ("qdisc") is a black box,
   which is able to enqueue packets and to dequeue them (when
   device is ready to send something) in order and at times
   determined by algorithm hidden in it.

   qdisc's are divided to two categories:
   - "queues", which have no internal structure visible from outside.
   - "schedulers", which split all the packets to "traffic classes",
     using "packet classifiers" (look at cls_api.c)

   In turn, classes may have child qdiscs (as rule, queues)
   attached to them etc. etc. etc.

   The goal of the routines in this file is to translate
   information supplied by user in the form of handles
   to more intelligible for kernel form, to make some sanity
   checks and part of work, which is common to all qdiscs
   and to provide rtnetlink notifications.

   All real intelligent work is done inside qdisc modules.



   Every discipline has two major routines: enqueue and dequeue.

   ---dequeue

   dequeue usually returns a skb to send. It is allowed to return NULL,
   but it does not mean that queue is empty, it just means that
   discipline does not want to send anything this time.
   Queue is really empty if q->q.qlen == 0.
   For complicated disciplines with multiple queues q->q is not
   real packet queue, but however q->q.qlen must be valid.

   ---enqueue

   enqueue returns 0, if packet was enqueued successfully.
   If packet (this one or another one) was dropped, it returns
   not zero error code.
   NET_XMIT_DROP         - this packet dropped
     Expected action: do not backoff, but wait until queue will clear.
   NET_XMIT_CN                 - probably this packet enqueued, but another one dropped.
     Expected action: backoff or ignore

   Auxiliary routines:

   ---peek

   like dequeue but without removing a packet from the queue

   ---reset

   returns qdisc to initial state: purge all buffers, clear all
   timers, counters (except for statistics) etc.

   ---init

   initializes newly created qdisc.

   ---destroy

   destroys resources allocated by init and during lifetime of qdisc.

   ---change

   changes qdisc parameters.
 */

/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(qdisc_mod_lock);


/************************************************
 *        Queueing disciplines manipulation.        *
 ************************************************/


/* The list of all installed queueing disciplines. */

static struct Qdisc_ops *qdisc_base;

/* Register/unregister queueing discipline */

int register_qdisc(struct Qdisc_ops *qops)
{
        struct Qdisc_ops *q, **qp;
        int rc = -EEXIST;

        write_lock(&qdisc_mod_lock);
        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
                if (!strcmp(qops->id, q->id))
                        goto out;

        if (qops->enqueue == NULL)
                qops->enqueue = noop_qdisc_ops.enqueue;
        if (qops->peek == NULL) {
                if (qops->dequeue == NULL)
                        qops->peek = noop_qdisc_ops.peek;
                else
                        goto out_einval;
        }
        if (qops->dequeue == NULL)
                qops->dequeue = noop_qdisc_ops.dequeue;

        if (qops->cl_ops) {
                const struct Qdisc_class_ops *cops = qops->cl_ops;

                if (!(cops->find && cops->walk && cops->leaf))
                        goto out_einval;

                if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
                        goto out_einval;
        }

        qops->next = NULL;
        *qp = qops;
        rc = 0;
out:
        write_unlock(&qdisc_mod_lock);
        return rc;

out_einval:
        rc = -EINVAL;
        goto out;
}
EXPORT_SYMBOL(register_qdisc);

void unregister_qdisc(struct Qdisc_ops *qops)
{
        struct Qdisc_ops *q, **qp;
        int err = -ENOENT;

        write_lock(&qdisc_mod_lock);
        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
                if (q == qops)
                        break;
        if (q) {
                *qp = q->next;
                q->next = NULL;
                err = 0;
        }
        write_unlock(&qdisc_mod_lock);

        WARN(err, "unregister qdisc(%s) failed\n", qops->id);
}
EXPORT_SYMBOL(unregister_qdisc);

/* Get default qdisc if not otherwise specified */
void qdisc_get_default(char *name, size_t len)
{
        read_lock(&qdisc_mod_lock);
        strscpy(name, default_qdisc_ops->id, len);
        read_unlock(&qdisc_mod_lock);
}

static struct Qdisc_ops *qdisc_lookup_default(const char *name)
{
        struct Qdisc_ops *q = NULL;

        for (q = qdisc_base; q; q = q->next) {
                if (!strcmp(name, q->id)) {
                        if (!try_module_get(q->owner))
                                q = NULL;
                        break;
                }
        }

        return q;
}

/* Set new default qdisc to use */
int qdisc_set_default(const char *name)
{
        const struct Qdisc_ops *ops;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        write_lock(&qdisc_mod_lock);
        ops = qdisc_lookup_default(name);
        if (!ops) {
                /* Not found, drop lock and try to load module */
                write_unlock(&qdisc_mod_lock);
                request_module(NET_SCH_ALIAS_PREFIX "%s", name);
                write_lock(&qdisc_mod_lock);

                ops = qdisc_lookup_default(name);
        }

        if (ops) {
                /* Set new default */
                module_put(default_qdisc_ops->owner);
                default_qdisc_ops = ops;
        }
        write_unlock(&qdisc_mod_lock);

        return ops ? 0 : -ENOENT;
}

#ifdef CONFIG_NET_SCH_DEFAULT
/* Set default value from kernel config */
static int __init sch_default_qdisc(void)
{
        return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
}
late_initcall(sch_default_qdisc);
#endif

/* We know handle. Find qdisc among all qdisc's attached to device
 * (root qdisc, all its children, children of children etc.)
 * Note: caller either uses rtnl or rcu_read_lock()
 */

static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
{
        struct Qdisc *q;

        if (!qdisc_dev(root))
                return (root->handle == handle ? root : NULL);

        if (!(root->flags & TCQ_F_BUILTIN) &&
            root->handle == handle)
                return root;

        hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
                                   lockdep_rtnl_is_held()) {
                if (q->handle == handle)
                        return q;
        }
        return NULL;
}

void qdisc_hash_add(struct Qdisc *q, bool invisible)
{
        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
                ASSERT_RTNL();
                hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
                if (invisible)
                        q->flags |= TCQ_F_INVISIBLE;
        }
}
EXPORT_SYMBOL(qdisc_hash_add);

void qdisc_hash_del(struct Qdisc *q)
{
        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
                ASSERT_RTNL();
                hash_del_rcu(&q->hash);
        }
}
EXPORT_SYMBOL(qdisc_hash_del);

struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
        struct Qdisc *q;

        if (!handle)
                return NULL;
        q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
        if (q)
                goto out;

        if (dev_ingress_queue(dev))
                q = qdisc_match_from_root(
                        rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
                        handle);
out:
        return q;
}

struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
{
        struct netdev_queue *nq;
        struct Qdisc *q;

        if (!handle)
                return NULL;
        q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
        if (q)
                goto out;

        nq = dev_ingress_queue_rcu(dev);
        if (nq)
                q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
                                          handle);
out:
        return q;
}

static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
        unsigned long cl;
        const struct Qdisc_class_ops *cops = p->ops->cl_ops;

        if (cops == NULL)
                return NULL;
        cl = cops->find(p, classid);

        if (cl == 0)
                return NULL;
        return cops->leaf(p, cl);
}

/* Find queueing discipline by name */

static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
{
        struct Qdisc_ops *q = NULL;

        if (kind) {
                read_lock(&qdisc_mod_lock);
                for (q = qdisc_base; q; q = q->next) {
                        if (nla_strcmp(kind, q->id) == 0) {
                                if (!try_module_get(q->owner))
                                        q = NULL;
                                break;
                        }
                }
                read_unlock(&qdisc_mod_lock);
        }
        return q;
}

/* The linklayer setting were not transferred from iproute2, in older
 * versions, and the rate tables lookup systems have been dropped in
 * the kernel. To keep backward compatible with older iproute2 tc
 * utils, we detect the linklayer setting by detecting if the rate
 * table were modified.
 *
 * For linklayer ATM table entries, the rate table will be aligned to
 * 48 bytes, thus some table entries will contain the same value.  The
 * mpu (min packet unit) is also encoded into the old rate table, thus
 * starting from the mpu, we find low and high table entries for
 * mapping this cell.  If these entries contain the same value, when
 * the rate tables have been modified for linklayer ATM.
 *
 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
 * and then roundup to the next cell, calc the table entry one below,
 * and compare.
 */
static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
{
        int low       = roundup(r->mpu, 48);
        int high      = roundup(low+1, 48);
        int cell_low  = low >> r->cell_log;
        int cell_high = (high >> r->cell_log) - 1;

        /* rtab is too inaccurate at rates > 100Mbit/s */
        if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
                pr_debug("TC linklayer: Giving up ATM detection\n");
                return TC_LINKLAYER_ETHERNET;
        }

        if ((cell_high > cell_low) && (cell_high < 256)
            && (rtab[cell_low] == rtab[cell_high])) {
                pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
                         cell_low, cell_high, rtab[cell_high]);
                return TC_LINKLAYER_ATM;
        }
        return TC_LINKLAYER_ETHERNET;
}

static struct qdisc_rate_table *qdisc_rtab_list;

struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
                                        struct nlattr *tab,
                                        struct netlink_ext_ack *extack)
{
        struct qdisc_rate_table *rtab;

        if (tab == NULL || r->rate == 0 ||
            r->cell_log == 0 || r->cell_log >= 32 ||
            nla_len(tab) != TC_RTAB_SIZE) {
                NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
                return NULL;
        }

        for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
                if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
                    !memcmp(&rtab->data, nla_data(tab), 1024)) {
                        rtab->refcnt++;
                        return rtab;
                }
        }

        rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
        if (rtab) {
                rtab->rate = *r;
                rtab->refcnt = 1;
                memcpy(rtab->data, nla_data(tab), 1024);
                if (r->linklayer == TC_LINKLAYER_UNAWARE)
                        r->linklayer = __detect_linklayer(r, rtab->data);
                rtab->next = qdisc_rtab_list;
                qdisc_rtab_list = rtab;
        } else {
                NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
        }
        return rtab;
}
EXPORT_SYMBOL(qdisc_get_rtab);

void qdisc_put_rtab(struct qdisc_rate_table *tab)
{
        struct qdisc_rate_table *rtab, **rtabp;

        if (!tab || --tab->refcnt)
                return;

        for (rtabp = &qdisc_rtab_list;
             (rtab = *rtabp) != NULL;
             rtabp = &rtab->next) {
                if (rtab == tab) {
                        *rtabp = rtab->next;
                        kfree(rtab);
                        return;
                }
        }
}
EXPORT_SYMBOL(qdisc_put_rtab);

static LIST_HEAD(qdisc_stab_list);

static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
        [TCA_STAB_BASE]        = { .len = sizeof(struct tc_sizespec) },
        [TCA_STAB_DATA] = { .type = NLA_BINARY },
};

static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
                                               struct netlink_ext_ack *extack)
{
        struct nlattr *tb[TCA_STAB_MAX + 1];
        struct qdisc_size_table *stab;
        struct tc_sizespec *s;
        unsigned int tsize = 0;
        u16 *tab = NULL;
        int err;

        err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
                                          extack);
        if (err < 0)
                return ERR_PTR(err);
        if (!tb[TCA_STAB_BASE]) {
                NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
                return ERR_PTR(-EINVAL);
        }

        s = nla_data(tb[TCA_STAB_BASE]);

        if (s->tsize > 0) {
                if (!tb[TCA_STAB_DATA]) {
                        NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
                        return ERR_PTR(-EINVAL);
                }
                tab = nla_data(tb[TCA_STAB_DATA]);
                tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
        }

        if (tsize != s->tsize || (!tab && tsize > 0)) {
                NL_SET_ERR_MSG(extack, "Invalid size of size table");
                return ERR_PTR(-EINVAL);
        }

        list_for_each_entry(stab, &qdisc_stab_list, list) {
                if (memcmp(&stab->szopts, s, sizeof(*s)))
                        continue;
                if (tsize > 0 &&
                    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
                        continue;
                stab->refcnt++;
                return stab;
        }

        if (s->size_log > STAB_SIZE_LOG_MAX ||
            s->cell_log > STAB_SIZE_LOG_MAX) {
                NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
                return ERR_PTR(-EINVAL);
        }

        stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
        if (!stab)
                return ERR_PTR(-ENOMEM);

        stab->refcnt = 1;
        stab->szopts = *s;
        if (tsize > 0)
                memcpy(stab->data, tab, flex_array_size(stab, data, tsize));

        list_add_tail(&stab->list, &qdisc_stab_list);

        return stab;
}

void qdisc_put_stab(struct qdisc_size_table *tab)
{
        if (!tab)
                return;

        if (--tab->refcnt == 0) {
                list_del(&tab->list);
                kfree_rcu(tab, rcu);
        }
}
EXPORT_SYMBOL(qdisc_put_stab);

static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
{
        struct nlattr *nest;

        nest = nla_nest_start_noflag(skb, TCA_STAB);
        if (nest == NULL)
                goto nla_put_failure;
        if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
                goto nla_put_failure;
        nla_nest_end(skb, nest);

        return skb->len;

nla_put_failure:
        return -1;
}

void __qdisc_calculate_pkt_len(struct sk_buff *skb,
                               const struct qdisc_size_table *stab)
{
        int pkt_len, slot;

        pkt_len = skb->len + stab->szopts.overhead;
        if (unlikely(!stab->szopts.tsize))
                goto out;

        slot = pkt_len + stab->szopts.cell_align;
        if (unlikely(slot < 0))
                slot = 0;

        slot >>= stab->szopts.cell_log;
        if (likely(slot < stab->szopts.tsize))
                pkt_len = stab->data[slot];
        else
                pkt_len = stab->data[stab->szopts.tsize - 1] *
                                (slot / stab->szopts.tsize) +
                                stab->data[slot % stab->szopts.tsize];

        pkt_len <<= stab->szopts.size_log;
out:
        if (unlikely(pkt_len < 1))
                pkt_len = 1;
        qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
EXPORT_SYMBOL(__qdisc_calculate_pkt_len);

void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
{
        if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
                pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
                        txt, qdisc->ops->id, qdisc->handle >> 16);
                qdisc->flags |= TCQ_F_WARN_NONWC;
        }
}
EXPORT_SYMBOL(qdisc_warn_nonwc);

static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
        struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
                                                 timer);

        rcu_read_lock();
        __netif_schedule(qdisc_root(wd->qdisc));
        rcu_read_unlock();

        return HRTIMER_NORESTART;
}

void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
                                 clockid_t clockid)
{
        hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
        wd->timer.function = qdisc_watchdog;
        wd->qdisc = qdisc;
}
EXPORT_SYMBOL(qdisc_watchdog_init_clockid);

void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
{
        qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
}
EXPORT_SYMBOL(qdisc_watchdog_init);

void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
                                      u64 delta_ns)
{
        bool deactivated;

        rcu_read_lock();
        deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
                               &qdisc_root_sleeping(wd->qdisc)->state);
        rcu_read_unlock();
        if (deactivated)
                return;

        if (hrtimer_is_queued(&wd->timer)) {
                u64 softexpires;

                softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
                /* If timer is already set in [expires, expires + delta_ns],
                 * do not reprogram it.
                 */
                if (softexpires - expires <= delta_ns)
                        return;
        }

        hrtimer_start_range_ns(&wd->timer,
                               ns_to_ktime(expires),
                               delta_ns,
                               HRTIMER_MODE_ABS_PINNED);
}
EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);

void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
{
        hrtimer_cancel(&wd->timer);
}
EXPORT_SYMBOL(qdisc_watchdog_cancel);

static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
{
        struct hlist_head *h;
        unsigned int i;

        h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);

        if (h != NULL) {
                for (i = 0; i < n; i++)
                        INIT_HLIST_HEAD(&h[i]);
        }
        return h;
}

void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
{
        struct Qdisc_class_common *cl;
        struct hlist_node *next;
        struct hlist_head *nhash, *ohash;
        unsigned int nsize, nmask, osize;
        unsigned int i, h;

        /* Rehash when load factor exceeds 0.75 */
        if (clhash->hashelems * 4 <= clhash->hashsize * 3)
                return;
        nsize = clhash->hashsize * 2;
        nmask = nsize - 1;
        nhash = qdisc_class_hash_alloc(nsize);
        if (nhash == NULL)
                return;

        ohash = clhash->hash;
        osize = clhash->hashsize;

        sch_tree_lock(sch);
        for (i = 0; i < osize; i++) {
                hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
                        h = qdisc_class_hash(cl->classid, nmask);
                        hlist_add_head(&cl->hnode, &nhash[h]);
                }
        }
        clhash->hash     = nhash;
        clhash->hashsize = nsize;
        clhash->hashmask = nmask;
        sch_tree_unlock(sch);

        kvfree(ohash);
}
EXPORT_SYMBOL(qdisc_class_hash_grow);

int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
{
        unsigned int size = 4;

        clhash->hash = qdisc_class_hash_alloc(size);
        if (!clhash->hash)
                return -ENOMEM;
        clhash->hashsize  = size;
        clhash->hashmask  = size - 1;
        clhash->hashelems = 0;
        return 0;
}
EXPORT_SYMBOL(qdisc_class_hash_init);

void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
{
        kvfree(clhash->hash);
}
EXPORT_SYMBOL(qdisc_class_hash_destroy);

void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
                             struct Qdisc_class_common *cl)
{
        unsigned int h;

        INIT_HLIST_NODE(&cl->hnode);
        h = qdisc_class_hash(cl->classid, clhash->hashmask);
        hlist_add_head(&cl->hnode, &clhash->hash[h]);
        clhash->hashelems++;
}
EXPORT_SYMBOL(qdisc_class_hash_insert);

void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
                             struct Qdisc_class_common *cl)
{
        hlist_del(&cl->hnode);
        clhash->hashelems--;
}
EXPORT_SYMBOL(qdisc_class_hash_remove);

/* Allocate an unique handle from space managed by kernel
 * Possible range is [8000-FFFF]:0000 (0x8000 values)
 */
static u32 qdisc_alloc_handle(struct net_device *dev)
{
        int i = 0x8000;
        static u32 autohandle = TC_H_MAKE(0x80000000U, 0);

        do {
                autohandle += TC_H_MAKE(0x10000U, 0);
                if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
                        autohandle = TC_H_MAKE(0x80000000U, 0);
                if (!qdisc_lookup(dev, autohandle))
                        return autohandle;
                cond_resched();
        } while        (--i > 0);

        return 0;
}

void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
        bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
        const struct Qdisc_class_ops *cops;
        unsigned long cl;
        u32 parentid;
        bool notify;
        int drops;

        if (n == 0 && len == 0)
                return;
        drops = max_t(int, n, 0);
        rcu_read_lock();
        while ((parentid = sch->parent)) {
                if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
                        break;

                if (sch->flags & TCQ_F_NOPARENT)
                        break;
                /* Notify parent qdisc only if child qdisc becomes empty.
                 *
                 * If child was empty even before update then backlog
                 * counter is screwed and we skip notification because
                 * parent class is already passive.
                 *
                 * If the original child was offloaded then it is allowed
                 * to be seem as empty, so the parent is notified anyway.
                 */
                notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
                                                       !qdisc_is_offloaded);
                /* TODO: perform the search on a per txq basis */
                sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
                if (sch == NULL) {
                        WARN_ON_ONCE(parentid != TC_H_ROOT);
                        break;
                }
                cops = sch->ops->cl_ops;
                if (notify && cops->qlen_notify) {
                        cl = cops->find(sch, parentid);
                        cops->qlen_notify(sch, cl);
                }
                sch->q.qlen -= n;
                sch->qstats.backlog -= len;
                __qdisc_qstats_drop(sch, drops);
        }
        rcu_read_unlock();
}
EXPORT_SYMBOL(qdisc_tree_reduce_backlog);

int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
                              void *type_data)
{
        struct net_device *dev = qdisc_dev(sch);
        int err;

        sch->flags &= ~TCQ_F_OFFLOADED;
        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return 0;

        err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
        if (err == -EOPNOTSUPP)
                return 0;

        if (!err)
                sch->flags |= TCQ_F_OFFLOADED;

        return err;
}
EXPORT_SYMBOL(qdisc_offload_dump_helper);

void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
                                struct Qdisc *new, struct Qdisc *old,
                                enum tc_setup_type type, void *type_data,
                                struct netlink_ext_ack *extack)
{
        bool any_qdisc_is_offloaded;
        int err;

        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                return;

        err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);

        /* Don't report error if the graft is part of destroy operation. */
        if (!err || !new || new == &noop_qdisc)
                return;

        /* Don't report error if the parent, the old child and the new
         * one are not offloaded.
         */
        any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
        any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
        any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;

        if (any_qdisc_is_offloaded)
                NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
}
EXPORT_SYMBOL(qdisc_offload_graft_helper);

void qdisc_offload_query_caps(struct net_device *dev,
                              enum tc_setup_type type,
                              void *caps, size_t caps_len)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        struct tc_query_caps_base base = {
                .type = type,
                .caps = caps,
        };

        memset(caps, 0, caps_len);

        if (ops->ndo_setup_tc)
                ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
}
EXPORT_SYMBOL(qdisc_offload_query_caps);

static void qdisc_offload_graft_root(struct net_device *dev,
                                     struct Qdisc *new, struct Qdisc *old,
                                     struct netlink_ext_ack *extack)
{
        struct tc_root_qopt_offload graft_offload = {
                .command        = TC_ROOT_GRAFT,
                .handle                = new ? new->handle : 0,
                .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
                                  (old && old->flags & TCQ_F_INGRESS),
        };

        qdisc_offload_graft_helper(dev, NULL, new, old,
                                   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
}

static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                         u32 portid, u32 seq, u16 flags, int event,
                         struct netlink_ext_ack *extack)
{
        struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
        struct gnet_stats_queue __percpu *cpu_qstats = NULL;
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
        unsigned char *b = skb_tail_pointer(skb);
        struct gnet_dump d;
        struct qdisc_size_table *stab;
        u32 block_index;
        __u32 qlen;

        cond_resched();
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
        if (!nlh)
                goto out_nlmsg_trim;
        tcm = nlmsg_data(nlh);
        tcm->tcm_family = AF_UNSPEC;
        tcm->tcm__pad1 = 0;
        tcm->tcm__pad2 = 0;
        tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
        tcm->tcm_parent = clid;
        tcm->tcm_handle = q->handle;
        tcm->tcm_info = refcount_read(&q->refcnt);
        if (nla_put_string(skb, TCA_KIND, q->ops->id))
                goto nla_put_failure;
        if (q->ops->ingress_block_get) {
                block_index = q->ops->ingress_block_get(q);
                if (block_index &&
                    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
                        goto nla_put_failure;
        }
        if (q->ops->egress_block_get) {
                block_index = q->ops->egress_block_get(q);
                if (block_index &&
                    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
                        goto nla_put_failure;
        }
        if (q->ops->dump && q->ops->dump(q, skb) < 0)
                goto nla_put_failure;
        if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
                goto nla_put_failure;
        qlen = qdisc_qlen_sum(q);

        stab = rtnl_dereference(q->stab);
        if (stab && qdisc_dump_stab(skb, stab) < 0)
                goto nla_put_failure;

        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
                                         NULL, &d, TCA_PAD) < 0)
                goto nla_put_failure;

        if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
                goto nla_put_failure;

        if (qdisc_is_percpu_stats(q)) {
                cpu_bstats = q->cpu_bstats;
                cpu_qstats = q->cpu_qstats;
        }

        if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
            gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
            gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
                goto nla_put_failure;

        if (gnet_stats_finish_copy(&d) < 0)
                goto nla_put_failure;

        if (extack && extack->_msg &&
            nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
                goto out_nlmsg_trim;

        nlh->nlmsg_len = skb_tail_pointer(skb) - b;

        return skb->len;

out_nlmsg_trim:
nla_put_failure:
        nlmsg_trim(skb, b);
        return -1;
}

static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
{
        if (q->flags & TCQ_F_BUILTIN)
                return true;
        if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
                return true;

        return false;
}

static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
                            struct nlmsghdr *n, u32 clid, struct Qdisc *q,
                            struct netlink_ext_ack *extack)
{
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        if (!tc_qdisc_dump_ignore(q, false)) {
                if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
                                  RTM_NEWQDISC, extack) < 0)
                        goto err_out;
        }

        if (skb->len)
                return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                                      n->nlmsg_flags & NLM_F_ECHO);

err_out:
        kfree_skb(skb);
        return -EINVAL;
}

static int qdisc_notify(struct net *net, struct sk_buff *oskb,
                        struct nlmsghdr *n, u32 clid,
                        struct Qdisc *old, struct Qdisc *new,
                        struct netlink_ext_ack *extack)
{
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;

        if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
                return 0;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        if (old && !tc_qdisc_dump_ignore(old, false)) {
                if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
                                  0, RTM_DELQDISC, extack) < 0)
                        goto err_out;
        }
        if (new && !tc_qdisc_dump_ignore(new, false)) {
                if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
                                  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
                        goto err_out;
        }

        if (skb->len)
                return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                                      n->nlmsg_flags & NLM_F_ECHO);

err_out:
        kfree_skb(skb);
        return -EINVAL;
}

static void notify_and_destroy(struct net *net, struct sk_buff *skb,
                               struct nlmsghdr *n, u32 clid,
                               struct Qdisc *old, struct Qdisc *new,
                               struct netlink_ext_ack *extack)
{
        if (new || old)
                qdisc_notify(net, skb, n, clid, old, new, extack);

        if (old)
                qdisc_put(old);
}

static void qdisc_clear_nolock(struct Qdisc *sch)
{
        sch->flags &= ~TCQ_F_NOLOCK;
        if (!(sch->flags & TCQ_F_CPUSTATS))
                return;

        free_percpu(sch->cpu_bstats);
        free_percpu(sch->cpu_qstats);
        sch->cpu_bstats = NULL;
        sch->cpu_qstats = NULL;
        sch->flags &= ~TCQ_F_CPUSTATS;
}

/* Graft qdisc "new" to class "classid" of qdisc "parent" or
 * to device "dev".
 *
 * When appropriate send a netlink notification using 'skb'
 * and "n".
 *
 * On success, destroy old qdisc.
 */

static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
                       struct Qdisc *new, struct Qdisc *old,
                       struct netlink_ext_ack *extack)
{
        struct Qdisc *q = old;
        struct net *net = dev_net(dev);

        if (parent == NULL) {
                unsigned int i, num_q, ingress;
                struct netdev_queue *dev_queue;

                ingress = 0;
                num_q = dev->num_tx_queues;
                if ((q && q->flags & TCQ_F_INGRESS) ||
                    (new && new->flags & TCQ_F_INGRESS)) {
                        ingress = 1;
                        dev_queue = dev_ingress_queue(dev);
                        if (!dev_queue) {
                                NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
                                return -ENOENT;
                        }

                        q = rtnl_dereference(dev_queue->qdisc_sleeping);

                        /* This is the counterpart of that qdisc_refcount_inc_nz() call in
                         * __tcf_qdisc_find() for filter requests.
                         */
                        if (!qdisc_refcount_dec_if_one(q)) {
                                NL_SET_ERR_MSG(extack,
                                               "Current ingress or clsact Qdisc has ongoing filter requests");
                                return -EBUSY;
                        }
                }

                if (dev->flags & IFF_UP)
                        dev_deactivate(dev);

                qdisc_offload_graft_root(dev, new, old, extack);

                if (new && new->ops->attach && !ingress)
                        goto skip;

                if (!ingress) {
                        for (i = 0; i < num_q; i++) {
                                dev_queue = netdev_get_tx_queue(dev, i);
                                old = dev_graft_qdisc(dev_queue, new);

                                if (new && i > 0)
                                        qdisc_refcount_inc(new);
                                qdisc_put(old);
                        }
                } else {
                        old = dev_graft_qdisc(dev_queue, NULL);

                        /* {ingress,clsact}_destroy() @old before grafting @new to avoid
                         * unprotected concurrent accesses to net_device::miniq_{in,e}gress
                         * pointer(s) in mini_qdisc_pair_swap().
                         */
                        qdisc_notify(net, skb, n, classid, old, new, extack);
                        qdisc_destroy(old);

                        dev_graft_qdisc(dev_queue, new);
                }

skip:
                if (!ingress) {
                        old = rtnl_dereference(dev->qdisc);
                        if (new && !new->ops->attach)
                                qdisc_refcount_inc(new);
                        rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);

                        notify_and_destroy(net, skb, n, classid, old, new, extack);

                        if (new && new->ops->attach)
                                new->ops->attach(new);
                }

                if (dev->flags & IFF_UP)
                        dev_activate(dev);
        } else {
                const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
                unsigned long cl;
                int err;

                /* Only support running class lockless if parent is lockless */
                if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
                        qdisc_clear_nolock(new);

                if (!cops || !cops->graft)
                        return -EOPNOTSUPP;

                cl = cops->find(parent, classid);
                if (!cl) {
                        NL_SET_ERR_MSG(extack, "Specified class not found");
                        return -ENOENT;
                }

                if (new && new->ops == &noqueue_qdisc_ops) {
                        NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
                        return -EINVAL;
                }

                err = cops->graft(parent, cl, new, &old, extack);
                if (err)
                        return err;
                notify_and_destroy(net, skb, n, classid, old, new, extack);
        }
        return 0;
}

static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
                                   struct netlink_ext_ack *extack)
{
        u32 block_index;

        if (tca[TCA_INGRESS_BLOCK]) {
                block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);

                if (!block_index) {
                        NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
                        return -EINVAL;
                }
                if (!sch->ops->ingress_block_set) {
                        NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
                        return -EOPNOTSUPP;
                }
                sch->ops->ingress_block_set(sch, block_index);
        }
        if (tca[TCA_EGRESS_BLOCK]) {
                block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);

                if (!block_index) {
                        NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
                        return -EINVAL;
                }
                if (!sch->ops->egress_block_set) {
                        NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
                        return -EOPNOTSUPP;
                }
                sch->ops->egress_block_set(sch, block_index);
        }
        return 0;
}

/*
   Allocate and initialize new qdisc.

   Parameters are passed via opt.
 */

static struct Qdisc *qdisc_create(struct net_device *dev,
                                  struct netdev_queue *dev_queue,
                                  u32 parent, u32 handle,
                                  struct nlattr **tca, int *errp,
                                  struct netlink_ext_ack *extack)
{
        int err;
        struct nlattr *kind = tca[TCA_KIND];
        struct Qdisc *sch;
        struct Qdisc_ops *ops;
        struct qdisc_size_table *stab;

        ops = qdisc_lookup_ops(kind);
#ifdef CONFIG_MODULES
        if (ops == NULL && kind != NULL) {
                char name[IFNAMSIZ];
                if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
                        /* We dropped the RTNL semaphore in order to
                         * perform the module load.  So, even if we
                         * succeeded in loading the module we have to
                         * tell the caller to replay the request.  We
                         * indicate this using -EAGAIN.
                         * We replay the request because the device may
                         * go away in the mean time.
                         */
                        rtnl_unlock();
                        request_module(NET_SCH_ALIAS_PREFIX "%s", name);
                        rtnl_lock();
                        ops = qdisc_lookup_ops(kind);
                        if (ops != NULL) {
                                /* We will try again qdisc_lookup_ops,
                                 * so don't keep a reference.
                                 */
                                module_put(ops->owner);
                                err = -EAGAIN;
                                goto err_out;
                        }
                }
        }
#endif

        err = -ENOENT;
        if (!ops) {
                NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
                goto err_out;
        }

        sch = qdisc_alloc(dev_queue, ops, extack);
        if (IS_ERR(sch)) {
                err = PTR_ERR(sch);
                goto err_out2;
        }

        sch->parent = parent;

        if (handle == TC_H_INGRESS) {
                if (!(sch->flags & TCQ_F_INGRESS)) {
                        NL_SET_ERR_MSG(extack,
                                       "Specified parent ID is reserved for ingress and clsact Qdiscs");
                        err = -EINVAL;
                        goto err_out3;
                }
                handle = TC_H_MAKE(TC_H_INGRESS, 0);
        } else {
                if (handle == 0) {
                        handle = qdisc_alloc_handle(dev);
                        if (handle == 0) {
                                NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
                                err = -ENOSPC;
                                goto err_out3;
                        }
                }
                if (!netif_is_multiqueue(dev))
                        sch->flags |= TCQ_F_ONETXQUEUE;
        }

        sch->handle = handle;

        /* This exist to keep backward compatible with a userspace
         * loophole, what allowed userspace to get IFF_NO_QUEUE
         * facility on older kernels by setting tx_queue_len=0 (prior
         * to qdisc init), and then forgot to reinit tx_queue_len
         * before again attaching a qdisc.
         */
        if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
                dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
                netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
        }

        err = qdisc_block_indexes_set(sch, tca, extack);
        if (err)
                goto err_out3;

        if (tca[TCA_STAB]) {
                stab = qdisc_get_stab(tca[TCA_STAB], extack);
                if (IS_ERR(stab)) {
                        err = PTR_ERR(stab);
                        goto err_out3;
                }
                rcu_assign_pointer(sch->stab, stab);
        }

        if (ops->init) {
                err = ops->init(sch, tca[TCA_OPTIONS], extack);
                if (err != 0)
                        goto err_out4;
        }

        if (tca[TCA_RATE]) {
                err = -EOPNOTSUPP;
                if (sch->flags & TCQ_F_MQROOT) {
                        NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
                        goto err_out4;
                }

                err = gen_new_estimator(&sch->bstats,
                                        sch->cpu_bstats,
                                        &sch->rate_est,
                                        NULL,
                                        true,
                                        tca[TCA_RATE]);
                if (err) {
                        NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
                        goto err_out4;
                }
        }

        qdisc_hash_add(sch, false);
        trace_qdisc_create(ops, dev, parent);

        return sch;

err_out4:
        /* Even if ops->init() failed, we call ops->destroy()
         * like qdisc_create_dflt().
         */
        if (ops->destroy)
                ops->destroy(sch);
        qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
        netdev_put(dev, &sch->dev_tracker);
        qdisc_free(sch);
err_out2:
        module_put(ops->owner);
err_out:
        *errp = err;
        return NULL;
}

static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
                        struct netlink_ext_ack *extack)
{
        struct qdisc_size_table *ostab, *stab = NULL;
        int err = 0;

        if (tca[TCA_OPTIONS]) {
                if (!sch->ops->change) {
                        NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
                        return -EINVAL;
                }
                if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
                        NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
                        return -EOPNOTSUPP;
                }
                err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
                if (err)
                        return err;
        }

        if (tca[TCA_STAB]) {
                stab = qdisc_get_stab(tca[TCA_STAB], extack);
                if (IS_ERR(stab))
                        return PTR_ERR(stab);
        }

        ostab = rtnl_dereference(sch->stab);
        rcu_assign_pointer(sch->stab, stab);
        qdisc_put_stab(ostab);

        if (tca[TCA_RATE]) {
                /* NB: ignores errors from replace_estimator
                   because change can't be undone. */
                if (sch->flags & TCQ_F_MQROOT)
                        goto out;
                gen_replace_estimator(&sch->bstats,
                                      sch->cpu_bstats,
                                      &sch->rate_est,
                                      NULL,
                                      true,
                                      tca[TCA_RATE]);
        }
out:
        return 0;
}

struct check_loop_arg {
        struct qdisc_walker        w;
        struct Qdisc                *p;
        int                        depth;
};

static int check_loop_fn(struct Qdisc *q, unsigned long cl,
                         struct qdisc_walker *w);

static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
{
        struct check_loop_arg        arg;

        if (q->ops->cl_ops == NULL)
                return 0;

        arg.w.stop = arg.w.skip = arg.w.count = 0;
        arg.w.fn = check_loop_fn;
        arg.depth = depth;
        arg.p = p;
        q->ops->cl_ops->walk(q, &arg.w);
        return arg.w.stop ? -ELOOP : 0;
}

static int
check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
{
        struct Qdisc *leaf;
        const struct Qdisc_class_ops *cops = q->ops->cl_ops;
        struct check_loop_arg *arg = (struct check_loop_arg *)w;

        leaf = cops->leaf(q, cl);
        if (leaf) {
                if (leaf == arg->p || arg->depth > 7)
                        return -ELOOP;
                return check_loop(leaf, arg->p, arg->depth + 1);
        }
        return 0;
}

const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
        [TCA_KIND]                = { .type = NLA_STRING },
        [TCA_RATE]                = { .type = NLA_BINARY,
                                    .len = sizeof(struct tc_estimator) },
        [TCA_STAB]                = { .type = NLA_NESTED },
        [TCA_DUMP_INVISIBLE]        = { .type = NLA_FLAG },
        [TCA_CHAIN]                = { .type = NLA_U32 },
        [TCA_INGRESS_BLOCK]        = { .type = NLA_U32 },
        [TCA_EGRESS_BLOCK]        = { .type = NLA_U32 },
};

/*
 * Delete/get qdisc.
 */

static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
                        struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct tcmsg *tcm = nlmsg_data(n);
        struct nlattr *tca[TCA_MAX + 1];
        struct net_device *dev;
        u32 clid;
        struct Qdisc *q = NULL;
        struct Qdisc *p = NULL;
        int err;

        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
                                     rtm_tca_policy, extack);
        if (err < 0)
                return err;

        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
        if (!dev)
                return -ENODEV;

        clid = tcm->tcm_parent;
        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
                                if (!p) {
                                        NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
                                        return -ENOENT;
                                }
                                q = qdisc_leaf(p, clid);
                        } else if (dev_ingress_queue(dev)) {
                                q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
                        }
                } else {
                        q = rtnl_dereference(dev->qdisc);
                }
                if (!q) {
                        NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
                        return -ENOENT;
                }

                if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
                        NL_SET_ERR_MSG(extack, "Invalid handle");
                        return -EINVAL;
                }
        } else {
                q = qdisc_lookup(dev, tcm->tcm_handle);
                if (!q) {
                        NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
                        return -ENOENT;
                }
        }

        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
                NL_SET_ERR_MSG(extack, "Invalid qdisc name");
                return -EINVAL;
        }

        if (n->nlmsg_type == RTM_DELQDISC) {
                if (!clid) {
                        NL_SET_ERR_MSG(extack, "Classid cannot be zero");
                        return -EINVAL;
                }
                if (q->handle == 0) {
                        NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
                        return -ENOENT;
                }
                err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
                if (err != 0)
                        return err;
        } else {
                qdisc_get_notify(net, skb, n, clid, q, NULL);
        }
        return 0;
}

static bool req_create_or_replace(struct nlmsghdr *n)
{
        return (n->nlmsg_flags & NLM_F_CREATE &&
                n->nlmsg_flags & NLM_F_REPLACE);
}

static bool req_create_exclusive(struct nlmsghdr *n)
{
        return (n->nlmsg_flags & NLM_F_CREATE &&
                n->nlmsg_flags & NLM_F_EXCL);
}

static bool req_change(struct nlmsghdr *n)
{
        return (!(n->nlmsg_flags & NLM_F_CREATE) &&
                !(n->nlmsg_flags & NLM_F_REPLACE) &&
                !(n->nlmsg_flags & NLM_F_EXCL));
}

/*
 * Create/change qdisc.
 */
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
                           struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct tcmsg *tcm;
        struct nlattr *tca[TCA_MAX + 1];
        struct net_device *dev;
        u32 clid;
        struct Qdisc *q, *p;
        int err;

replay:
        /* Reinit, just in case something touches this. */
        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
                                     rtm_tca_policy, extack);
        if (err < 0)
                return err;

        tcm = nlmsg_data(n);
        clid = tcm->tcm_parent;
        q = p = NULL;

        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
        if (!dev)
                return -ENODEV;


        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (clid != TC_H_INGRESS) {
                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
                                if (!p) {
                                        NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
                                        return -ENOENT;
                                }
                                q = qdisc_leaf(p, clid);
                        } else if (dev_ingress_queue_create(dev)) {
                                q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
                        }
                } else {
                        q = rtnl_dereference(dev->qdisc);
                }

                /* It may be default qdisc, ignore it */
                if (q && q->handle == 0)
                        q = NULL;

                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
                        if (tcm->tcm_handle) {
                                if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
                                        NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
                                        return -EEXIST;
                                }
                                if (TC_H_MIN(tcm->tcm_handle)) {
                                        NL_SET_ERR_MSG(extack, "Invalid minor handle");
                                        return -EINVAL;
                                }
                                q = qdisc_lookup(dev, tcm->tcm_handle);
                                if (!q)
                                        goto create_n_graft;
                                if (n->nlmsg_flags & NLM_F_EXCL) {
                                        NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
                                        return -EEXIST;
                                }
                                if (tca[TCA_KIND] &&
                                    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
                                        NL_SET_ERR_MSG(extack, "Invalid qdisc name");
                                        return -EINVAL;
                                }
                                if (q->flags & TCQ_F_INGRESS) {
                                        NL_SET_ERR_MSG(extack,
                                                       "Cannot regraft ingress or clsact Qdiscs");
                                        return -EINVAL;
                                }
                                if (q == p ||
                                    (p && check_loop(q, p, 0))) {
                                        NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
                                        return -ELOOP;
                                }
                                if (clid == TC_H_INGRESS) {
                                        NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
                                        return -EINVAL;
                                }
                                qdisc_refcount_inc(q);
                                goto graft;
                        } else {
                                if (!q)
                                        goto create_n_graft;

                                /* This magic test requires explanation.
                                 *
                                 *   We know, that some child q is already
                                 *   attached to this parent and have choice:
                                 *   1) change it or 2) create/graft new one.
                                 *   If the requested qdisc kind is different
                                 *   than the existing one, then we choose graft.
                                 *   If they are the same then this is "change"
                                 *   operation - just let it fallthrough..
                                 *
                                 *   1. We are allowed to create/graft only
                                 *   if the request is explicitly stating
                                 *   "please create if it doesn't exist".
                                 *
                                 *   2. If the request is to exclusive create
                                 *   then the qdisc tcm_handle is not expected
                                 *   to exist, so that we choose create/graft too.
                                 *
                                 *   3. The last case is when no flags are set.
                                 *   This will happen when for example tc
                                 *   utility issues a "change" command.
                                 *   Alas, it is sort of hole in API, we
                                 *   cannot decide what to do unambiguously.
                                 *   For now we select create/graft.
                                 */
                                if (tca[TCA_KIND] &&
                                    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
                                        if (req_create_or_replace(n) ||
                                            req_create_exclusive(n))
                                                goto create_n_graft;
                                        else if (req_change(n))
                                                goto create_n_graft2;
                                }
                        }
                }
        } else {
                if (!tcm->tcm_handle) {
                        NL_SET_ERR_MSG(extack, "Handle cannot be zero");
                        return -EINVAL;
                }
                q = qdisc_lookup(dev, tcm->tcm_handle);
        }

        /* Change qdisc parameters */
        if (!q) {
                NL_SET_ERR_MSG(extack, "Specified qdisc not found");
                return -ENOENT;
        }
        if (n->nlmsg_flags & NLM_F_EXCL) {
                NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
                return -EEXIST;
        }
        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
                NL_SET_ERR_MSG(extack, "Invalid qdisc name");
                return -EINVAL;
        }
        err = qdisc_change(q, tca, extack);
        if (err == 0)
                qdisc_notify(net, skb, n, clid, NULL, q, extack);
        return err;

create_n_graft:
        if (!(n->nlmsg_flags & NLM_F_CREATE)) {
                NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
                return -ENOENT;
        }
create_n_graft2:
        if (clid == TC_H_INGRESS) {
                if (dev_ingress_queue(dev)) {
                        q = qdisc_create(dev, dev_ingress_queue(dev),
                                         tcm->tcm_parent, tcm->tcm_parent,
                                         tca, &err, extack);
                } else {
                        NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
                        err = -ENOENT;
                }
        } else {
                struct netdev_queue *dev_queue;

                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
                        dev_queue = p->ops->cl_ops->select_queue(p, tcm);
                else if (p)
                        dev_queue = p->dev_queue;
                else
                        dev_queue = netdev_get_tx_queue(dev, 0);

                q = qdisc_create(dev, dev_queue,
                                 tcm->tcm_parent, tcm->tcm_handle,
                                 tca, &err, extack);
        }
        if (q == NULL) {
                if (err == -EAGAIN)
                        goto replay;
                return err;
        }

graft:
        err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
        if (err) {
                if (q)
                        qdisc_put(q);
                return err;
        }

        return 0;
}

static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                              struct netlink_callback *cb,
                              int *q_idx_p, int s_q_idx, bool recur,
                              bool dump_invisible)
{
        int ret = 0, q_idx = *q_idx_p;
        struct Qdisc *q;
        int b;

        if (!root)
                return 0;

        q = root;
        if (q_idx < s_q_idx) {
                q_idx++;
        } else {
                if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                  RTM_NEWQDISC, NULL) <= 0)
                        goto done;
                q_idx++;
        }

        /* If dumping singletons, there is no qdisc_dev(root) and the singleton
         * itself has already been dumped.
         *
         * If we've already dumped the top-level (ingress) qdisc above and the global
         * qdisc hashtable, we don't want to hit it again
         */
        if (!qdisc_dev(root) || !recur)
                goto out;

        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (q_idx < s_q_idx) {
                        q_idx++;
                        continue;
                }
                if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                  RTM_NEWQDISC, NULL) <= 0)
                        goto done;
                q_idx++;
        }

out:
        *q_idx_p = q_idx;
        return ret;
done:
        ret = -1;
        goto out;
}

static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct net *net = sock_net(skb->sk);
        int idx, q_idx;
        int s_idx, s_q_idx;
        struct net_device *dev;
        const struct nlmsghdr *nlh = cb->nlh;
        struct nlattr *tca[TCA_MAX + 1];
        int err;

        s_idx = cb->args[0];
        s_q_idx = q_idx = cb->args[1];

        idx = 0;
        ASSERT_RTNL();

        err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
                                     rtm_tca_policy, cb->extack);
        if (err < 0)
                return err;

        for_each_netdev(net, dev) {
                struct netdev_queue *dev_queue;

                if (idx < s_idx)
                        goto cont;
                if (idx > s_idx)
                        s_q_idx = 0;
                q_idx = 0;

                if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
                                       skb, cb, &q_idx, s_q_idx,
                                       true, tca[TCA_DUMP_INVISIBLE]) < 0)
                        goto done;

                dev_queue = dev_ingress_queue(dev);
                if (dev_queue &&
                    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
                                       skb, cb, &q_idx, s_q_idx, false,
                                       tca[TCA_DUMP_INVISIBLE]) < 0)
                        goto done;

cont:
                idx++;
        }

done:
        cb->args[0] = idx;
        cb->args[1] = q_idx;

        return skb->len;
}



/************************************************
 *        Traffic classes manipulation.                *
 ************************************************/

static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
                          unsigned long cl, u32 portid, u32 seq, u16 flags,
                          int event, struct netlink_ext_ack *extack)
{
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
        unsigned char *b = skb_tail_pointer(skb);
        struct gnet_dump d;
        const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;

        cond_resched();
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
        if (!nlh)
                goto out_nlmsg_trim;
        tcm = nlmsg_data(nlh);
        tcm->tcm_family = AF_UNSPEC;
        tcm->tcm__pad1 = 0;
        tcm->tcm__pad2 = 0;
        tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
        tcm->tcm_parent = q->handle;
        tcm->tcm_handle = q->handle;
        tcm->tcm_info = 0;
        if (nla_put_string(skb, TCA_KIND, q->ops->id))
                goto nla_put_failure;
        if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
                goto nla_put_failure;

        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
                                         NULL, &d, TCA_PAD) < 0)
                goto nla_put_failure;

        if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
                goto nla_put_failure;

        if (gnet_stats_finish_copy(&d) < 0)
                goto nla_put_failure;

        if (extack && extack->_msg &&
            nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
                goto out_nlmsg_trim;

        nlh->nlmsg_len = skb_tail_pointer(skb) - b;

        return skb->len;

out_nlmsg_trim:
nla_put_failure:
        nlmsg_trim(skb, b);
        return -1;
}

static int tclass_notify(struct net *net, struct sk_buff *oskb,
                         struct nlmsghdr *n, struct Qdisc *q,
                         unsigned long cl, int event, struct netlink_ext_ack *extack)
{
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;

        if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
                return 0;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
                kfree_skb(skb);
                return -EINVAL;
        }

        return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                              n->nlmsg_flags & NLM_F_ECHO);
}

static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
                             struct nlmsghdr *n, struct Qdisc *q,
                             unsigned long cl, struct netlink_ext_ack *extack)
{
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
                           extack) < 0) {
                kfree_skb(skb);
                return -EINVAL;
        }

        return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                              n->nlmsg_flags & NLM_F_ECHO);
}

static int tclass_del_notify(struct net *net,
                             const struct Qdisc_class_ops *cops,
                             struct sk_buff *oskb, struct nlmsghdr *n,
                             struct Qdisc *q, unsigned long cl,
                             struct netlink_ext_ack *extack)
{
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
        struct sk_buff *skb;
        int err = 0;

        if (!cops->delete)
                return -EOPNOTSUPP;

        if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
                skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
                if (!skb)
                        return -ENOBUFS;

                if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
                                   RTM_DELTCLASS, extack) < 0) {
                        kfree_skb(skb);
                        return -EINVAL;
                }
        } else {
                skb = NULL;
        }

        err = cops->delete(q, cl, extack);
        if (err) {
                kfree_skb(skb);
                return err;
        }

        err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
                                   n->nlmsg_flags & NLM_F_ECHO);
        return err;
}

#ifdef CONFIG_NET_CLS

struct tcf_bind_args {
        struct tcf_walker w;
        unsigned long base;
        unsigned long cl;
        u32 classid;
};

static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
{
        struct tcf_bind_args *a = (void *)arg;

        if (n && tp->ops->bind_class) {
                struct Qdisc *q = tcf_block_q(tp->chain->block);

                sch_tree_lock(q);
                tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
                sch_tree_unlock(q);
        }
        return 0;
}

struct tc_bind_class_args {
        struct qdisc_walker w;
        unsigned long new_cl;
        u32 portid;
        u32 clid;
};

static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
                                struct qdisc_walker *w)
{
        struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
        const struct Qdisc_class_ops *cops = q->ops->cl_ops;
        struct tcf_block *block;
        struct tcf_chain *chain;

        block = cops->tcf_block(q, cl, NULL);
        if (!block)
                return 0;
        for (chain = tcf_get_next_chain(block, NULL);
             chain;
             chain = tcf_get_next_chain(block, chain)) {
                struct tcf_proto *tp;

                for (tp = tcf_get_next_proto(chain, NULL);
                     tp; tp = tcf_get_next_proto(chain, tp)) {
                        struct tcf_bind_args arg = {};

                        arg.w.fn = tcf_node_bind;
                        arg.classid = a->clid;
                        arg.base = cl;
                        arg.cl = a->new_cl;
                        tp->ops->walk(tp, &arg.w, true);
                }
        }

        return 0;
}

static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
                           unsigned long new_cl)
{
        const struct Qdisc_class_ops *cops = q->ops->cl_ops;
        struct tc_bind_class_args args = {};

        if (!cops->tcf_block)
                return;
        args.portid = portid;
        args.clid = clid;
        args.new_cl = new_cl;
        args.w.fn = tc_bind_class_walker;
        q->ops->cl_ops->walk(q, &args.w);
}

#else

static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
                           unsigned long new_cl)
{
}

#endif

static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
                         struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct tcmsg *tcm = nlmsg_data(n);
        struct nlattr *tca[TCA_MAX + 1];
        struct net_device *dev;
        struct Qdisc *q = NULL;
        const struct Qdisc_class_ops *cops;
        unsigned long cl = 0;
        unsigned long new_cl;
        u32 portid;
        u32 clid;
        u32 qid;
        int err;

        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
                                     rtm_tca_policy, extack);
        if (err < 0)
                return err;

        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
        if (!dev)
                return -ENODEV;

        /*
           parent == TC_H_UNSPEC - unspecified parent.
           parent == TC_H_ROOT   - class is root, which has no parent.
           parent == X:0         - parent is root class.
           parent == X:Y         - parent is a node in hierarchy.
           parent == 0:Y         - parent is X:Y, where X:0 is qdisc.

           handle == 0:0         - generate handle from kernel pool.
           handle == 0:Y         - class is X:Y, where X:0 is qdisc.
           handle == X:Y         - clear.
           handle == X:0         - root class.
         */

        /* Step 1. Determine qdisc handle X:0 */

        portid = tcm->tcm_parent;
        clid = tcm->tcm_handle;
        qid = TC_H_MAJ(clid);

        if (portid != TC_H_ROOT) {
                u32 qid1 = TC_H_MAJ(portid);

                if (qid && qid1) {
                        /* If both majors are known, they must be identical. */
                        if (qid != qid1)
                                return -EINVAL;
                } else if (qid1) {
                        qid = qid1;
                } else if (qid == 0)
                        qid = rtnl_dereference(dev->qdisc)->handle;

                /* Now qid is genuine qdisc handle consistent
                 * both with parent and child.
                 *
                 * TC_H_MAJ(portid) still may be unspecified, complete it now.
                 */
                if (portid)
                        portid = TC_H_MAKE(qid, portid);
        } else {
                if (qid == 0)
                        qid = rtnl_dereference(dev->qdisc)->handle;
        }

        /* OK. Locate qdisc */
        q = qdisc_lookup(dev, qid);
        if (!q)
                return -ENOENT;

        /* An check that it supports classes */
        cops = q->ops->cl_ops;
        if (cops == NULL)
                return -EINVAL;

        /* Now try to get class */
        if (clid == 0) {
                if (portid == TC_H_ROOT)
                        clid = qid;
        } else
                clid = TC_H_MAKE(qid, clid);

        if (clid)
                cl = cops->find(q, clid);

        if (cl == 0) {
                err = -ENOENT;
                if (n->nlmsg_type != RTM_NEWTCLASS ||
                    !(n->nlmsg_flags & NLM_F_CREATE))
                        goto out;
        } else {
                switch (n->nlmsg_type) {
                case RTM_NEWTCLASS:
                        err = -EEXIST;
                        if (n->nlmsg_flags & NLM_F_EXCL)
                                goto out;
                        break;
                case RTM_DELTCLASS:
                        err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
                        /* Unbind the class with flilters with 0 */
                        tc_bind_tclass(q, portid, clid, 0);
                        goto out;
                case RTM_GETTCLASS:
                        err = tclass_get_notify(net, skb, n, q, cl, extack);
                        goto out;
                default:
                        err = -EINVAL;
                        goto out;
                }
        }

        if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
                NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
                return -EOPNOTSUPP;
        }

        new_cl = cl;
        err = -EOPNOTSUPP;
        if (cops->change)
                err = cops->change(q, clid, portid, tca, &new_cl, extack);
        if (err == 0) {
                tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
                /* We just create a new class, need to do reverse binding. */
                if (cl != new_cl)
                        tc_bind_tclass(q, portid, clid, new_cl);
        }
out:
        return err;
}

struct qdisc_dump_args {
        struct qdisc_walker        w;
        struct sk_buff                *skb;
        struct netlink_callback        *cb;
};

static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
                            struct qdisc_walker *arg)
{
        struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;

        return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
                              RTM_NEWTCLASS, NULL);
}

static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
                                struct tcmsg *tcm, struct netlink_callback *cb,
                                int *t_p, int s_t)
{
        struct qdisc_dump_args arg;

        if (tc_qdisc_dump_ignore(q, false) ||
            *t_p < s_t || !q->ops->cl_ops ||
            (tcm->tcm_parent &&
             TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
                (*t_p)++;
                return 0;
        }
        if (*t_p > s_t)
                memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
        arg.w.fn = qdisc_class_dump;
        arg.skb = skb;
        arg.cb = cb;
        arg.w.stop  = 0;
        arg.w.skip = cb->args[1];
        arg.w.count = 0;
        q->ops->cl_ops->walk(q, &arg.w);
        cb->args[1] = arg.w.count;
        if (arg.w.stop)
                return -1;
        (*t_p)++;
        return 0;
}

static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
                               struct tcmsg *tcm, struct netlink_callback *cb,
                               int *t_p, int s_t, bool recur)
{
        struct Qdisc *q;
        int b;

        if (!root)
                return 0;

        if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
                return -1;

        if (!qdisc_dev(root) || !recur)
                return 0;

        if (tcm->tcm_parent) {
                q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
                if (q && q != root &&
                    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
                        return -1;
                return 0;
        }
        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
                        return -1;
        }

        return 0;
}

static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct tcmsg *tcm = nlmsg_data(cb->nlh);
        struct net *net = sock_net(skb->sk);
        struct netdev_queue *dev_queue;
        struct net_device *dev;
        int t, s_t;

        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
                return 0;
        dev = dev_get_by_index(net, tcm->tcm_ifindex);
        if (!dev)
                return 0;

        s_t = cb->args[0];
        t = 0;

        if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
                                skb, tcm, cb, &t, s_t, true) < 0)
                goto done;

        dev_queue = dev_ingress_queue(dev);
        if (dev_queue &&
            tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
                                skb, tcm, cb, &t, s_t, false) < 0)
                goto done;

done:
        cb->args[0] = t;

        dev_put(dev);
        return skb->len;
}

#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
        seq_printf(seq, "%08x %08x %08x %08x\n",
                   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
                   1000000,
                   (u32)NSEC_PER_SEC / hrtimer_resolution);

        return 0;
}

static int __net_init psched_net_init(struct net *net)
{
        struct proc_dir_entry *e;

        e = proc_create_single("psched", 0, net->proc_net, psched_show);
        if (e == NULL)
                return -ENOMEM;

        return 0;
}

static void __net_exit psched_net_exit(struct net *net)
{
        remove_proc_entry("psched", net->proc_net);
}
#else
static int __net_init psched_net_init(struct net *net)
{
        return 0;
}

static void __net_exit psched_net_exit(struct net *net)
{
}
#endif

static struct pernet_operations psched_net_ops = {
        .init = psched_net_init,
        .exit = psched_net_exit,
};

#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
#endif

static int __init pktsched_init(void)
{
        int err;

        err = register_pernet_subsys(&psched_net_ops);
        if (err) {
                pr_err("pktsched_init: "
                       "cannot initialize per netns operations\n");
                return err;
        }

        register_qdisc(&pfifo_fast_ops);
        register_qdisc(&pfifo_qdisc_ops);
        register_qdisc(&bfifo_qdisc_ops);
        register_qdisc(&pfifo_head_drop_qdisc_ops);
        register_qdisc(&mq_qdisc_ops);
        register_qdisc(&noqueue_qdisc_ops);

        rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
                      0);
        rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
                      0);

        tc_wrapper_init();

        return 0;
}

subsys_initcall(pktsched_init);





























































































   10 












   10 


































    3 









































































































































   10 

















    2 























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Berkeley style UIO structures        -        Alan Cox 1994.
 */
#ifndef __LINUX_UIO_H
#define __LINUX_UIO_H

#include <linux/kernel.h>
#include <linux/thread_info.h>
#include <linux/mm_types.h>
#include <uapi/linux/uio.h>

struct page;

typedef unsigned int __bitwise iov_iter_extraction_t;

struct kvec {
        void *iov_base; /* and that should *never* hold a userland pointer */
        size_t iov_len;
};

enum iter_type {
        /* iter types */
        ITER_UBUF,
        ITER_IOVEC,
        ITER_BVEC,
        ITER_KVEC,
        ITER_XARRAY,
        ITER_DISCARD,
};

#define ITER_SOURCE        1        // == WRITE
#define ITER_DEST        0        // == READ

struct iov_iter_state {
        size_t iov_offset;
        size_t count;
        unsigned long nr_segs;
};

struct iov_iter {
        u8 iter_type;
        bool nofault;
        bool data_source;
        size_t iov_offset;
        /*
         * Hack alert: overlay ubuf_iovec with iovec + count, so
         * that the members resolve correctly regardless of the type
         * of iterator used. This means that you can use:
         *
         * &iter->__ubuf_iovec or iter->__iov
         *
         * interchangably for the user_backed cases, hence simplifying
         * some of the cases that need to deal with both.
         */
        union {
                /*
                 * This really should be a const, but we cannot do that without
                 * also modifying any of the zero-filling iter init functions.
                 * Leave it non-const for now, but it should be treated as such.
                 */
                struct iovec __ubuf_iovec;
                struct {
                        union {
                                /* use iter_iov() to get the current vec */
                                const struct iovec *__iov;
                                const struct kvec *kvec;
                                const struct bio_vec *bvec;
                                struct xarray *xarray;
                                void __user *ubuf;
                        };
                        size_t count;
                };
        };
        union {
                unsigned long nr_segs;
                loff_t xarray_start;
        };
};

static inline const struct iovec *iter_iov(const struct iov_iter *iter)
{
        if (iter->iter_type == ITER_UBUF)
                return (const struct iovec *) &iter->__ubuf_iovec;
        return iter->__iov;
}

#define iter_iov_addr(iter)        (iter_iov(iter)->iov_base + (iter)->iov_offset)
#define iter_iov_len(iter)        (iter_iov(iter)->iov_len - (iter)->iov_offset)

static inline enum iter_type iov_iter_type(const struct iov_iter *i)
{
        return i->iter_type;
}

static inline void iov_iter_save_state(struct iov_iter *iter,
                                       struct iov_iter_state *state)
{
        state->iov_offset = iter->iov_offset;
        state->count = iter->count;
        state->nr_segs = iter->nr_segs;
}

static inline bool iter_is_ubuf(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_UBUF;
}

static inline bool iter_is_iovec(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_IOVEC;
}

static inline bool iov_iter_is_kvec(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_KVEC;
}

static inline bool iov_iter_is_bvec(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_BVEC;
}

static inline bool iov_iter_is_discard(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_DISCARD;
}

static inline bool iov_iter_is_xarray(const struct iov_iter *i)
{
        return iov_iter_type(i) == ITER_XARRAY;
}

static inline unsigned char iov_iter_rw(const struct iov_iter *i)
{
        return i->data_source ? WRITE : READ;
}

static inline bool user_backed_iter(const struct iov_iter *i)
{
        return iter_is_ubuf(i) || iter_is_iovec(i);
}

/*
 * Total number of bytes covered by an iovec.
 *
 * NOTE that it is not safe to use this function until all the iovec's
 * segment lengths have been validated.  Because the individual lengths can
 * overflow a size_t when added together.
 */
static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
{
        unsigned long seg;
        size_t ret = 0;

        for (seg = 0; seg < nr_segs; seg++)
                ret += iov[seg].iov_len;
        return ret;
}

size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
                                  size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
size_t iov_iter_single_seg_count(const struct iov_iter *i);
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);

size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);

static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
                size_t bytes, struct iov_iter *i)
{
        return copy_page_to_iter(&folio->page, offset, bytes, i);
}

static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
                size_t offset, size_t bytes, struct iov_iter *i)
{
        return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
}

size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
                                 size_t bytes, struct iov_iter *i);

static __always_inline __must_check
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
        if (check_copy_size(addr, bytes, true))
                return _copy_to_iter(addr, bytes, i);
        return 0;
}

static __always_inline __must_check
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
        if (check_copy_size(addr, bytes, false))
                return _copy_from_iter(addr, bytes, i);
        return 0;
}

static __always_inline __must_check
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
{
        size_t copied = copy_from_iter(addr, bytes, i);
        if (likely(copied == bytes))
                return true;
        iov_iter_revert(i, copied);
        return false;
}

static __always_inline __must_check
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
        if (check_copy_size(addr, bytes, false))
                return _copy_from_iter_nocache(addr, bytes, i);
        return 0;
}

static __always_inline __must_check
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
        size_t copied = copy_from_iter_nocache(addr, bytes, i);
        if (likely(copied == bytes))
                return true;
        iov_iter_revert(i, copied);
        return false;
}

#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/*
 * Note, users like pmem that depend on the stricter semantics of
 * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
 * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
 * destination is flushed from the cache on return.
 */
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#else
#define _copy_from_iter_flushcache _copy_from_iter_nocache
#endif

#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#else
#define _copy_mc_to_iter _copy_to_iter
#endif

size_t iov_iter_zero(size_t bytes, struct iov_iter *);
bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
                        unsigned len_mask);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
                        unsigned long nr_segs, size_t count);
void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
                        unsigned long nr_segs, size_t count);
void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
                        unsigned long nr_segs, size_t count);
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
                     loff_t start, size_t count);
ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
                        size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
                        size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);

const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);

static inline size_t iov_iter_count(const struct iov_iter *i)
{
        return i->count;
}

/*
 * Cap the iov_iter by given limit; note that the second argument is
 * *not* the new size - it's upper limit for such.  Passing it a value
 * greater than the amount of data in iov_iter is fine - it'll just do
 * nothing in that case.
 */
static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
{
        /*
         * count doesn't have to fit in size_t - comparison extends both
         * operands to u64 here and any value that would be truncated by
         * conversion in assignement is by definition greater than all
         * values of size_t, including old i->count.
         */
        if (i->count > count)
                i->count = count;
}

/*
 * reexpand a previously truncated iterator; count must be no more than how much
 * we had shrunk it.
 */
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
        i->count = count;
}

static inline int
iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
{
        size_t shorted = 0;
        int npages;

        if (iov_iter_count(i) > max_bytes) {
                shorted = iov_iter_count(i) - max_bytes;
                iov_iter_truncate(i, max_bytes);
        }
        npages = iov_iter_npages(i, maxpages);
        if (shorted)
                iov_iter_reexpand(i, iov_iter_count(i) + shorted);

        return npages;
}

struct iovec *iovec_from_user(const struct iovec __user *uvector,
                unsigned long nr_segs, unsigned long fast_segs,
                struct iovec *fast_iov, bool compat);
ssize_t import_iovec(int type, const struct iovec __user *uvec,
                 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
                 struct iov_iter *i);
ssize_t __import_iovec(int type, const struct iovec __user *uvec,
                 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
                 struct iov_iter *i, bool compat);
int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i);

static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
                        void __user *buf, size_t count)
{
        WARN_ON(direction & ~(READ | WRITE));
        *i = (struct iov_iter) {
                .iter_type = ITER_UBUF,
                .data_source = direction,
                .ubuf = buf,
                .count = count,
                .nr_segs = 1
        };
}
/* Flags for iov_iter_get/extract_pages*() */
/* Allow P2PDMA on the extracted pages */
#define ITER_ALLOW_P2PDMA        ((__force iov_iter_extraction_t)0x01)

ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages,
                               size_t maxsize, unsigned int maxpages,
                               iov_iter_extraction_t extraction_flags,
                               size_t *offset0);

/**
 * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained
 * @iter: The iterator
 *
 * Examine the iterator and indicate by returning true or false as to how, if
 * at all, pages extracted from the iterator will be retained by the extraction
 * function.
 *
 * %true indicates that the pages will have a pin placed in them that the
 * caller must unpin.  This is must be done for DMA/async DIO to force fork()
 * to forcibly copy a page for the child (the parent must retain the original
 * page).
 *
 * %false indicates that no measures are taken and that it's up to the caller
 * to retain the pages.
 */
static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter)
{
        return user_backed_iter(iter);
}

struct sg_table;
ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
                           struct sg_table *sgtable, unsigned int sg_max,
                           iov_iter_extraction_t extraction_flags);

#endif










































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>
#include <linux/hid-debug.h>

#include <linux/fb.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>

#include <linux/module.h>
#include <linux/uaccess.h>

#include "hid-picolcd.h"


static int picolcd_debug_reset_show(struct seq_file *f, void *p)
{
        if (picolcd_fbinfo((struct picolcd_data *)f->private))
                seq_printf(f, "all fb\n");
        else
                seq_printf(f, "all\n");
        return 0;
}

static int picolcd_debug_reset_open(struct inode *inode, struct file *f)
{
        return single_open(f, picolcd_debug_reset_show, inode->i_private);
}

static ssize_t picolcd_debug_reset_write(struct file *f, const char __user *user_buf,
                size_t count, loff_t *ppos)
{
        struct picolcd_data *data = ((struct seq_file *)f->private_data)->private;
        char buf[32];
        size_t cnt = min(count, sizeof(buf)-1);
        if (copy_from_user(buf, user_buf, cnt))
                return -EFAULT;

        while (cnt > 0 && (buf[cnt-1] == ' ' || buf[cnt-1] == '\n'))
                cnt--;
        buf[cnt] = '\0';
        if (strcmp(buf, "all") == 0) {
                picolcd_reset(data->hdev);
                picolcd_fb_reset(data, 1);
        } else if (strcmp(buf, "fb") == 0) {
                picolcd_fb_reset(data, 1);
        } else {
                return -EINVAL;
        }
        return count;
}

static const struct file_operations picolcd_debug_reset_fops = {
        .owner    = THIS_MODULE,
        .open     = picolcd_debug_reset_open,
        .read     = seq_read,
        .llseek   = seq_lseek,
        .write    = picolcd_debug_reset_write,
        .release  = single_release,
};

/*
 * The "eeprom" file
 */
static ssize_t picolcd_debug_eeprom_read(struct file *f, char __user *u,
                size_t s, loff_t *off)
{
        struct picolcd_data *data = f->private_data;
        struct picolcd_pending *resp;
        u8 raw_data[3];
        ssize_t ret = -EIO;

        if (s == 0)
                return -EINVAL;
        if (*off > 0x0ff)
                return 0;

        /* prepare buffer with info about what we want to read (addr & len) */
        raw_data[0] = *off & 0xff;
        raw_data[1] = (*off >> 8) & 0xff;
        raw_data[2] = s < 20 ? s : 20;
        if (*off + raw_data[2] > 0xff)
                raw_data[2] = 0x100 - *off;
        resp = picolcd_send_and_wait(data->hdev, REPORT_EE_READ, raw_data,
                        sizeof(raw_data));
        if (!resp)
                return -EIO;

        if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) {
                /* successful read :) */
                ret = resp->raw_data[2];
                if (ret > s)
                        ret = s;
                if (copy_to_user(u, resp->raw_data+3, ret))
                        ret = -EFAULT;
                else
                        *off += ret;
        } /* anything else is some kind of IO error */

        kfree(resp);
        return ret;
}

static ssize_t picolcd_debug_eeprom_write(struct file *f, const char __user *u,
                size_t s, loff_t *off)
{
        struct picolcd_data *data = f->private_data;
        struct picolcd_pending *resp;
        ssize_t ret = -EIO;
        u8 raw_data[23];

        if (s == 0)
                return -EINVAL;
        if (*off > 0x0ff)
                return -ENOSPC;

        memset(raw_data, 0, sizeof(raw_data));
        raw_data[0] = *off & 0xff;
        raw_data[1] = (*off >> 8) & 0xff;
        raw_data[2] = min_t(size_t, 20, s);
        if (*off + raw_data[2] > 0xff)
                raw_data[2] = 0x100 - *off;

        if (copy_from_user(raw_data+3, u, min((u8)20, raw_data[2])))
                return -EFAULT;
        resp = picolcd_send_and_wait(data->hdev, REPORT_EE_WRITE, raw_data,
                        sizeof(raw_data));

        if (!resp)
                return -EIO;

        if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) {
                /* check if written data matches */
                if (memcmp(raw_data, resp->raw_data, 3+raw_data[2]) == 0) {
                        *off += raw_data[2];
                        ret = raw_data[2];
                }
        }
        kfree(resp);
        return ret;
}

/*
 * Notes:
 * - read/write happens in chunks of at most 20 bytes, it's up to userspace
 *   to loop in order to get more data.
 * - on write errors on otherwise correct write request the bytes
 *   that should have been written are in undefined state.
 */
static const struct file_operations picolcd_debug_eeprom_fops = {
        .owner    = THIS_MODULE,
        .open     = simple_open,
        .read     = picolcd_debug_eeprom_read,
        .write    = picolcd_debug_eeprom_write,
        .llseek   = generic_file_llseek,
};

/*
 * The "flash" file
 */
/* record a flash address to buf (bounds check to be done by caller) */
static int _picolcd_flash_setaddr(struct picolcd_data *data, u8 *buf, long off)
{
        buf[0] = off & 0xff;
        buf[1] = (off >> 8) & 0xff;
        if (data->addr_sz == 3)
                buf[2] = (off >> 16) & 0xff;
        return data->addr_sz == 2 ? 2 : 3;
}

/* read a given size of data (bounds check to be done by caller) */
static ssize_t _picolcd_flash_read(struct picolcd_data *data, int report_id,
                char __user *u, size_t s, loff_t *off)
{
        struct picolcd_pending *resp;
        u8 raw_data[4];
        ssize_t ret = 0;
        int len_off, err = -EIO;

        while (s > 0) {
                err = -EIO;
                len_off = _picolcd_flash_setaddr(data, raw_data, *off);
                raw_data[len_off] = s > 32 ? 32 : s;
                resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off+1);
                if (!resp || !resp->in_report)
                        goto skip;
                if (resp->in_report->id == REPORT_MEMORY ||
                        resp->in_report->id == REPORT_BL_READ_MEMORY) {
                        if (memcmp(raw_data, resp->raw_data, len_off+1) != 0)
                                goto skip;
                        if (copy_to_user(u+ret, resp->raw_data+len_off+1, raw_data[len_off])) {
                                err = -EFAULT;
                                goto skip;
                        }
                        *off += raw_data[len_off];
                        s    -= raw_data[len_off];
                        ret  += raw_data[len_off];
                        err   = 0;
                }
skip:
                kfree(resp);
                if (err)
                        return ret > 0 ? ret : err;
        }
        return ret;
}

static ssize_t picolcd_debug_flash_read(struct file *f, char __user *u,
                size_t s, loff_t *off)
{
        struct picolcd_data *data = f->private_data;

        if (s == 0)
                return -EINVAL;
        if (*off > 0x05fff)
                return 0;
        if (*off + s > 0x05fff)
                s = 0x06000 - *off;

        if (data->status & PICOLCD_BOOTLOADER)
                return _picolcd_flash_read(data, REPORT_BL_READ_MEMORY, u, s, off);
        else
                return _picolcd_flash_read(data, REPORT_READ_MEMORY, u, s, off);
}

/* erase block aligned to 64bytes boundary */
static ssize_t _picolcd_flash_erase64(struct picolcd_data *data, int report_id,
                loff_t *off)
{
        struct picolcd_pending *resp;
        u8 raw_data[3];
        int len_off;
        ssize_t ret = -EIO;

        if (*off & 0x3f)
                return -EINVAL;

        len_off = _picolcd_flash_setaddr(data, raw_data, *off);
        resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off);
        if (!resp || !resp->in_report)
                goto skip;
        if (resp->in_report->id == REPORT_MEMORY ||
                resp->in_report->id == REPORT_BL_ERASE_MEMORY) {
                if (memcmp(raw_data, resp->raw_data, len_off) != 0)
                        goto skip;
                ret = 0;
        }
skip:
        kfree(resp);
        return ret;
}

/* write a given size of data (bounds check to be done by caller) */
static ssize_t _picolcd_flash_write(struct picolcd_data *data, int report_id,
                const char __user *u, size_t s, loff_t *off)
{
        struct picolcd_pending *resp;
        u8 raw_data[36];
        ssize_t ret = 0;
        int len_off, err = -EIO;

        while (s > 0) {
                err = -EIO;
                len_off = _picolcd_flash_setaddr(data, raw_data, *off);
                raw_data[len_off] = s > 32 ? 32 : s;
                if (copy_from_user(raw_data+len_off+1, u, raw_data[len_off])) {
                        err = -EFAULT;
                        break;
                }
                resp = picolcd_send_and_wait(data->hdev, report_id, raw_data,
                                len_off+1+raw_data[len_off]);
                if (!resp || !resp->in_report)
                        goto skip;
                if (resp->in_report->id == REPORT_MEMORY ||
                        resp->in_report->id == REPORT_BL_WRITE_MEMORY) {
                        if (memcmp(raw_data, resp->raw_data, len_off+1+raw_data[len_off]) != 0)
                                goto skip;
                        *off += raw_data[len_off];
                        s    -= raw_data[len_off];
                        ret  += raw_data[len_off];
                        err   = 0;
                }
skip:
                kfree(resp);
                if (err)
                        break;
        }
        return ret > 0 ? ret : err;
}

static ssize_t picolcd_debug_flash_write(struct file *f, const char __user *u,
                size_t s, loff_t *off)
{
        struct picolcd_data *data = f->private_data;
        ssize_t err, ret = 0;
        int report_erase, report_write;

        if (s == 0)
                return -EINVAL;
        if (*off > 0x5fff)
                return -ENOSPC;
        if (s & 0x3f)
                return -EINVAL;
        if (*off & 0x3f)
                return -EINVAL;

        if (data->status & PICOLCD_BOOTLOADER) {
                report_erase = REPORT_BL_ERASE_MEMORY;
                report_write = REPORT_BL_WRITE_MEMORY;
        } else {
                report_erase = REPORT_ERASE_MEMORY;
                report_write = REPORT_WRITE_MEMORY;
        }
        mutex_lock(&data->mutex_flash);
        while (s > 0) {
                err = _picolcd_flash_erase64(data, report_erase, off);
                if (err)
                        break;
                err = _picolcd_flash_write(data, report_write, u, 64, off);
                if (err < 0)
                        break;
                ret += err;
                *off += err;
                s -= err;
                if (err != 64)
                        break;
        }
        mutex_unlock(&data->mutex_flash);
        return ret > 0 ? ret : err;
}

/*
 * Notes:
 * - concurrent writing is prevented by mutex and all writes must be
 *   n*64 bytes and 64-byte aligned, each write being preceded by an
 *   ERASE which erases a 64byte block.
 *   If less than requested was written or an error is returned for an
 *   otherwise correct write request the next 64-byte block which should
 *   have been written is in undefined state (mostly: original, erased,
 *   (half-)written with write error)
 * - reading can happen without special restriction
 */
static const struct file_operations picolcd_debug_flash_fops = {
        .owner    = THIS_MODULE,
        .open     = simple_open,
        .read     = picolcd_debug_flash_read,
        .write    = picolcd_debug_flash_write,
        .llseek   = generic_file_llseek,
};


/*
 * Helper code for HID report level dumping/debugging
 */
static const char * const error_codes[] = {
        "success", "parameter missing", "data_missing", "block readonly",
        "block not erasable", "block too big", "section overflow",
        "invalid command length", "invalid data length",
};

static void dump_buff_as_hex(char *dst, size_t dst_sz, const u8 *data,
                const size_t data_len)
{
        int i, j;
        for (i = j = 0; i < data_len && j + 4 < dst_sz; i++) {
                dst[j++] = hex_asc[(data[i] >> 4) & 0x0f];
                dst[j++] = hex_asc[data[i] & 0x0f];
                dst[j++] = ' ';
        }
        dst[j]   = '\0';
        if (j > 0)
                dst[j-1] = '\n';
        if (i < data_len && j > 2)
                dst[j-2] = dst[j-3] = '.';
}

void picolcd_debug_out_report(struct picolcd_data *data,
                struct hid_device *hdev, struct hid_report *report)
{
        u8 *raw_data;
        int raw_size = (report->size >> 3) + 1;
        char *buff;
#define BUFF_SZ 256

        /* Avoid unnecessary overhead if debugfs is disabled */
        if (list_empty(&hdev->debug_list))
                return;

        buff = kmalloc(BUFF_SZ, GFP_ATOMIC);
        if (!buff)
                return;

        raw_data = hid_alloc_report_buf(report, GFP_ATOMIC);
        if (!raw_data) {
                kfree(buff);
                return;
        }

        snprintf(buff, BUFF_SZ, "\nout report %d (size %d) =  ",
                        report->id, raw_size);
        hid_debug_event(hdev, buff);
        raw_data[0] = report->id;
        hid_output_report(report, raw_data);
        dump_buff_as_hex(buff, BUFF_SZ, raw_data, raw_size);
        hid_debug_event(hdev, buff);

        switch (report->id) {
        case REPORT_LED_STATE:
                /* 1 data byte with GPO state */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_LED_STATE", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tGPO state: 0x%02x\n", raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_BRIGHTNESS:
                /* 1 data byte with brightness */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_BRIGHTNESS", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tBrightness: 0x%02x\n", raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_CONTRAST:
                /* 1 data byte with contrast */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_CONTRAST", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tContrast: 0x%02x\n", raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_RESET:
                /* 2 data bytes with reset duration in ms */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_RESET", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tDuration: 0x%02x%02x (%dms)\n",
                                raw_data[2], raw_data[1], raw_data[2] << 8 | raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_LCD_CMD:
                /* 63 data bytes with LCD commands */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_LCD_CMD", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                /* TODO: format decoding */
                break;
        case REPORT_LCD_DATA:
                /* 63 data bytes with LCD data */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_LCD_CMD", report->id, raw_size-1);
                /* TODO: format decoding */
                hid_debug_event(hdev, buff);
                break;
        case REPORT_LCD_CMD_DATA:
                /* 63 data bytes with LCD commands and data */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_LCD_CMD", report->id, raw_size-1);
                /* TODO: format decoding */
                hid_debug_event(hdev, buff);
                break;
        case REPORT_EE_READ:
                /* 3 data bytes with read area description */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_EE_READ", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_EE_WRITE:
                /* 3+1..20 data bytes with write area description */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_EE_WRITE", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                hid_debug_event(hdev, buff);
                if (raw_data[3] == 0) {
                        snprintf(buff, BUFF_SZ, "\tNo data\n");
                } else if (raw_data[3] + 4 <= raw_size) {
                        snprintf(buff, BUFF_SZ, "\tData: ");
                        hid_debug_event(hdev, buff);
                        dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]);
                } else {
                        snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                }
                hid_debug_event(hdev, buff);
                break;
        case REPORT_ERASE_MEMORY:
        case REPORT_BL_ERASE_MEMORY:
                /* 3 data bytes with pointer inside erase block */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_ERASE_MEMORY", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                switch (data->addr_sz) {
                case 2:
                        snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x\n",
                                        raw_data[2], raw_data[1]);
                        break;
                case 3:
                        snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x%02x\n",
                                        raw_data[3], raw_data[2], raw_data[1]);
                        break;
                default:
                        snprintf(buff, BUFF_SZ, "\tNot supported\n");
                }
                hid_debug_event(hdev, buff);
                break;
        case REPORT_READ_MEMORY:
        case REPORT_BL_READ_MEMORY:
                /* 4 data bytes with read area description */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_READ_MEMORY", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                switch (data->addr_sz) {
                case 2:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                        raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                        break;
                case 3:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n",
                                        raw_data[3], raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]);
                        break;
                default:
                        snprintf(buff, BUFF_SZ, "\tNot supported\n");
                }
                hid_debug_event(hdev, buff);
                break;
        case REPORT_WRITE_MEMORY:
        case REPORT_BL_WRITE_MEMORY:
                /* 4+1..32 data bytes with write adrea description */
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_WRITE_MEMORY", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                switch (data->addr_sz) {
                case 2:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                        raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                        hid_debug_event(hdev, buff);
                        if (raw_data[3] == 0) {
                                snprintf(buff, BUFF_SZ, "\tNo data\n");
                        } else if (raw_data[3] + 4 <= raw_size) {
                                snprintf(buff, BUFF_SZ, "\tData: ");
                                hid_debug_event(hdev, buff);
                                dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]);
                        } else {
                                snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                        }
                        break;
                case 3:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n",
                                        raw_data[3], raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]);
                        hid_debug_event(hdev, buff);
                        if (raw_data[4] == 0) {
                                snprintf(buff, BUFF_SZ, "\tNo data\n");
                        } else if (raw_data[4] + 5 <= raw_size) {
                                snprintf(buff, BUFF_SZ, "\tData: ");
                                hid_debug_event(hdev, buff);
                                dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]);
                        } else {
                                snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                        }
                        break;
                default:
                        snprintf(buff, BUFF_SZ, "\tNot supported\n");
                }
                hid_debug_event(hdev, buff);
                break;
        case REPORT_SPLASH_RESTART:
                /* TODO */
                break;
        case REPORT_EXIT_KEYBOARD:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_EXIT_KEYBOARD", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n",
                                raw_data[1] | (raw_data[2] << 8),
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_VERSION:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_VERSION", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_DEVID:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_DEVID", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_SPLASH_SIZE:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_SPLASH_SIZE", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_HOOK_VERSION:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_HOOK_VERSION", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_EXIT_FLASHER:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "REPORT_VERSION", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n",
                                raw_data[1] | (raw_data[2] << 8),
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        default:
                snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n",
                        "<unknown>", report->id, raw_size-1);
                hid_debug_event(hdev, buff);
                break;
        }
        wake_up_interruptible(&hdev->debug_wait);
        kfree(raw_data);
        kfree(buff);
}

void picolcd_debug_raw_event(struct picolcd_data *data,
                struct hid_device *hdev, struct hid_report *report,
                u8 *raw_data, int size)
{
        char *buff;

#define BUFF_SZ 256
        /* Avoid unnecessary overhead if debugfs is disabled */
        if (list_empty(&hdev->debug_list))
                return;

        buff = kmalloc(BUFF_SZ, GFP_ATOMIC);
        if (!buff)
                return;

        switch (report->id) {
        case REPORT_ERROR_CODE:
                /* 2 data bytes with affected report and error code */
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_ERROR_CODE", report->id, size-1);
                hid_debug_event(hdev, buff);
                if (raw_data[2] < ARRAY_SIZE(error_codes))
                        snprintf(buff, BUFF_SZ, "\tError code 0x%02x (%s) in reply to report 0x%02x\n",
                                        raw_data[2], error_codes[raw_data[2]], raw_data[1]);
                else
                        snprintf(buff, BUFF_SZ, "\tError code 0x%02x in reply to report 0x%02x\n",
                                        raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_KEY_STATE:
                /* 2 data bytes with key state */
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_KEY_STATE", report->id, size-1);
                hid_debug_event(hdev, buff);
                if (raw_data[1] == 0)
                        snprintf(buff, BUFF_SZ, "\tNo key pressed\n");
                else if (raw_data[2] == 0)
                        snprintf(buff, BUFF_SZ, "\tOne key pressed: 0x%02x (%d)\n",
                                        raw_data[1], raw_data[1]);
                else
                        snprintf(buff, BUFF_SZ, "\tTwo keys pressed: 0x%02x (%d), 0x%02x (%d)\n",
                                        raw_data[1], raw_data[1], raw_data[2], raw_data[2]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_IR_DATA:
                /* Up to 20 byes of IR scancode data */
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_IR_DATA", report->id, size-1);
                hid_debug_event(hdev, buff);
                if (raw_data[1] == 0) {
                        snprintf(buff, BUFF_SZ, "\tUnexpectedly 0 data length\n");
                        hid_debug_event(hdev, buff);
                } else if (raw_data[1] + 1 <= size) {
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n\tIR Data: ",
                                        raw_data[1]);
                        hid_debug_event(hdev, buff);
                        dump_buff_as_hex(buff, BUFF_SZ, raw_data+2, raw_data[1]);
                        hid_debug_event(hdev, buff);
                } else {
                        snprintf(buff, BUFF_SZ, "\tOverflowing data length: %d\n",
                                        raw_data[1]-1);
                        hid_debug_event(hdev, buff);
                }
                break;
        case REPORT_EE_DATA:
                /* Data buffer in response to REPORT_EE_READ or REPORT_EE_WRITE */
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_EE_DATA", report->id, size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                hid_debug_event(hdev, buff);
                if (raw_data[3] == 0) {
                        snprintf(buff, BUFF_SZ, "\tNo data\n");
                        hid_debug_event(hdev, buff);
                } else if (raw_data[3] + 4 <= size) {
                        snprintf(buff, BUFF_SZ, "\tData: ");
                        hid_debug_event(hdev, buff);
                        dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]);
                        hid_debug_event(hdev, buff);
                } else {
                        snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                        hid_debug_event(hdev, buff);
                }
                break;
        case REPORT_MEMORY:
                /* Data buffer in response to REPORT_READ_MEMORY or REPORT_WRITE_MEMORY */
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_MEMORY", report->id, size-1);
                hid_debug_event(hdev, buff);
                switch (data->addr_sz) {
                case 2:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n",
                                        raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]);
                        hid_debug_event(hdev, buff);
                        if (raw_data[3] == 0) {
                                snprintf(buff, BUFF_SZ, "\tNo data\n");
                        } else if (raw_data[3] + 4 <= size) {
                                snprintf(buff, BUFF_SZ, "\tData: ");
                                hid_debug_event(hdev, buff);
                                dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]);
                        } else {
                                snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                        }
                        break;
                case 3:
                        snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n",
                                        raw_data[3], raw_data[2], raw_data[1]);
                        hid_debug_event(hdev, buff);
                        snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]);
                        hid_debug_event(hdev, buff);
                        if (raw_data[4] == 0) {
                                snprintf(buff, BUFF_SZ, "\tNo data\n");
                        } else if (raw_data[4] + 5 <= size) {
                                snprintf(buff, BUFF_SZ, "\tData: ");
                                hid_debug_event(hdev, buff);
                                dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]);
                        } else {
                                snprintf(buff, BUFF_SZ, "\tData overflowed\n");
                        }
                        break;
                default:
                        snprintf(buff, BUFF_SZ, "\tNot supported\n");
                }
                hid_debug_event(hdev, buff);
                break;
        case REPORT_VERSION:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_VERSION", report->id, size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n",
                                raw_data[2], raw_data[1]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_BL_ERASE_MEMORY:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_BL_ERASE_MEMORY", report->id, size-1);
                hid_debug_event(hdev, buff);
                /* TODO */
                break;
        case REPORT_BL_READ_MEMORY:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_BL_READ_MEMORY", report->id, size-1);
                hid_debug_event(hdev, buff);
                /* TODO */
                break;
        case REPORT_BL_WRITE_MEMORY:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_BL_WRITE_MEMORY", report->id, size-1);
                hid_debug_event(hdev, buff);
                /* TODO */
                break;
        case REPORT_DEVID:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_DEVID", report->id, size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tSerial: 0x%02x%02x%02x%02x\n",
                                raw_data[1], raw_data[2], raw_data[3], raw_data[4]);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tType: 0x%02x\n",
                                raw_data[5]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_SPLASH_SIZE:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_SPLASH_SIZE", report->id, size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tTotal splash space: %d\n",
                                (raw_data[2] << 8) | raw_data[1]);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tUsed splash space: %d\n",
                                (raw_data[4] << 8) | raw_data[3]);
                hid_debug_event(hdev, buff);
                break;
        case REPORT_HOOK_VERSION:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "REPORT_HOOK_VERSION", report->id, size-1);
                hid_debug_event(hdev, buff);
                snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n",
                                raw_data[1], raw_data[2]);
                hid_debug_event(hdev, buff);
                break;
        default:
                snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n",
                        "<unknown>", report->id, size-1);
                hid_debug_event(hdev, buff);
                break;
        }
        wake_up_interruptible(&hdev->debug_wait);
        kfree(buff);
}

void picolcd_init_devfs(struct picolcd_data *data,
                struct hid_report *eeprom_r, struct hid_report *eeprom_w,
                struct hid_report *flash_r, struct hid_report *flash_w,
                struct hid_report *reset)
{
        struct hid_device *hdev = data->hdev;

        mutex_init(&data->mutex_flash);

        /* reset */
        if (reset)
                data->debug_reset = debugfs_create_file("reset", 0600,
                                hdev->debug_dir, data, &picolcd_debug_reset_fops);

        /* eeprom */
        if (eeprom_r || eeprom_w)
                data->debug_eeprom = debugfs_create_file("eeprom",
                        (eeprom_w ? S_IWUSR : 0) | (eeprom_r ? S_IRUSR : 0),
                        hdev->debug_dir, data, &picolcd_debug_eeprom_fops);

        /* flash */
        if (flash_r && flash_r->maxfield == 1 && flash_r->field[0]->report_size == 8)
                data->addr_sz = flash_r->field[0]->report_count - 1;
        else
                data->addr_sz = -1;
        if (data->addr_sz == 2 || data->addr_sz == 3) {
                data->debug_flash = debugfs_create_file("flash",
                        (flash_w ? S_IWUSR : 0) | (flash_r ? S_IRUSR : 0),
                        hdev->debug_dir, data, &picolcd_debug_flash_fops);
        } else if (flash_r || flash_w)
                hid_warn(hdev, "Unexpected FLASH access reports, please submit rdesc for review\n");
}

void picolcd_exit_devfs(struct picolcd_data *data)
{
        struct dentry *dent;

        dent = data->debug_reset;
        data->debug_reset = NULL;
        debugfs_remove(dent);
        dent = data->debug_eeprom;
        data->debug_eeprom = NULL;
        debugfs_remove(dent);
        dent = data->debug_flash;
        data->debug_flash = NULL;
        debugfs_remove(dent);
        mutex_destroy(&data->mutex_flash);
}
































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_IOPRIO_H
#define _UAPI_LINUX_IOPRIO_H

#include <linux/stddef.h>
#include <linux/types.h>

/*
 * Gives us 8 prio classes with 13-bits of data for each class
 */
#define IOPRIO_CLASS_SHIFT        13
#define IOPRIO_NR_CLASSES        8
#define IOPRIO_CLASS_MASK        (IOPRIO_NR_CLASSES - 1)
#define IOPRIO_PRIO_MASK        ((1UL << IOPRIO_CLASS_SHIFT) - 1)

#define IOPRIO_PRIO_CLASS(ioprio)        \
        (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK)
#define IOPRIO_PRIO_DATA(ioprio)        ((ioprio) & IOPRIO_PRIO_MASK)

/*
 * These are the io priority classes as implemented by the BFQ and mq-deadline
 * schedulers. RT is the realtime class, it always gets premium service. For
 * ATA disks supporting NCQ IO priority, RT class IOs will be processed using
 * high priority NCQ commands. BE is the best-effort scheduling class, the
 * default for any process. IDLE is the idle scheduling class, it is only
 * served when no one else is using the disk.
 */
enum {
        IOPRIO_CLASS_NONE        = 0,
        IOPRIO_CLASS_RT                = 1,
        IOPRIO_CLASS_BE                = 2,
        IOPRIO_CLASS_IDLE        = 3,

        /* Special class to indicate an invalid ioprio value */
        IOPRIO_CLASS_INVALID        = 7,
};

/*
 * The RT and BE priority classes both support up to 8 priority levels that
 * can be specified using the lower 3-bits of the priority data.
 */
#define IOPRIO_LEVEL_NR_BITS                3
#define IOPRIO_NR_LEVELS                (1 << IOPRIO_LEVEL_NR_BITS)
#define IOPRIO_LEVEL_MASK                (IOPRIO_NR_LEVELS - 1)
#define IOPRIO_PRIO_LEVEL(ioprio)        ((ioprio) & IOPRIO_LEVEL_MASK)

#define IOPRIO_BE_NR                        IOPRIO_NR_LEVELS

/*
 * Possible values for the "which" argument of the ioprio_get() and
 * ioprio_set() system calls (see "man ioprio_set").
 */
enum {
        IOPRIO_WHO_PROCESS = 1,
        IOPRIO_WHO_PGRP,
        IOPRIO_WHO_USER,
};

/*
 * Fallback BE class priority level.
 */
#define IOPRIO_NORM        4
#define IOPRIO_BE_NORM        IOPRIO_NORM

/*
 * The 10 bits between the priority class and the priority level are used to
 * optionally define I/O hints for any combination of I/O priority class and
 * level. Depending on the kernel configuration, I/O scheduler being used and
 * the target I/O device being used, hints can influence how I/Os are processed
 * without affecting the I/O scheduling ordering defined by the I/O priority
 * class and level.
 */
#define IOPRIO_HINT_SHIFT                IOPRIO_LEVEL_NR_BITS
#define IOPRIO_HINT_NR_BITS                10
#define IOPRIO_NR_HINTS                        (1 << IOPRIO_HINT_NR_BITS)
#define IOPRIO_HINT_MASK                (IOPRIO_NR_HINTS - 1)
#define IOPRIO_PRIO_HINT(ioprio)        \
        (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK)

/*
 * I/O hints.
 */
enum {
        /* No hint */
        IOPRIO_HINT_NONE = 0,

        /*
         * Device command duration limits: indicate to the device a desired
         * duration limit for the commands that will be used to process an I/O.
         * These will currently only be effective for SCSI and ATA devices that
         * support the command duration limits feature. If this feature is
         * enabled, then the commands issued to the device to process an I/O with
         * one of these hints set will have the duration limit index (dld field)
         * set to the value of the hint.
         */
        IOPRIO_HINT_DEV_DURATION_LIMIT_1 = 1,
        IOPRIO_HINT_DEV_DURATION_LIMIT_2 = 2,
        IOPRIO_HINT_DEV_DURATION_LIMIT_3 = 3,
        IOPRIO_HINT_DEV_DURATION_LIMIT_4 = 4,
        IOPRIO_HINT_DEV_DURATION_LIMIT_5 = 5,
        IOPRIO_HINT_DEV_DURATION_LIMIT_6 = 6,
        IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7,
};

#define IOPRIO_BAD_VALUE(val, max) ((val) < 0 || (val) >= (max))

/*
 * Return an I/O priority value based on a class, a level and a hint.
 */
static __always_inline __u16 ioprio_value(int prioclass, int priolevel,
                                          int priohint)
{
        if (IOPRIO_BAD_VALUE(prioclass, IOPRIO_NR_CLASSES) ||
            IOPRIO_BAD_VALUE(priolevel, IOPRIO_NR_LEVELS) ||
            IOPRIO_BAD_VALUE(priohint, IOPRIO_NR_HINTS))
                return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT;

        return (prioclass << IOPRIO_CLASS_SHIFT) |
                (priohint << IOPRIO_HINT_SHIFT) | priolevel;
}

#define IOPRIO_PRIO_VALUE(prioclass, priolevel)                        \
        ioprio_value(prioclass, priolevel, IOPRIO_HINT_NONE)
#define IOPRIO_PRIO_VALUE_HINT(prioclass, priolevel, priohint)        \
        ioprio_value(prioclass, priolevel, priohint)

#endif /* _UAPI_LINUX_IOPRIO_H */




























































    8 






























    8 




    8 
    8 

    8 

    8 




    8 





    8 

    8 

    8 






    8 




    8 
    8 



    8 








    8 

    8 

    8 






























































































































































































    5 
    5 






















































































































   12 

   12 











   12 

   12 
   12 

    5 
    5 








    5 
    5 


    4 
    5 

    5 






    5 
    4 
    5 




    4 


    5 





















    5 

    5 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
 * Authors: David Chinner and Glauber Costa
 *
 * Generic LRU infrastructure
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/list_lru.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/memcontrol.h>
#include "slab.h"
#include "internal.h"

#ifdef CONFIG_MEMCG_KMEM
static LIST_HEAD(memcg_list_lrus);
static DEFINE_MUTEX(list_lrus_mutex);

static inline bool list_lru_memcg_aware(struct list_lru *lru)
{
        return lru->memcg_aware;
}

static void list_lru_register(struct list_lru *lru)
{
        if (!list_lru_memcg_aware(lru))
                return;

        mutex_lock(&list_lrus_mutex);
        list_add(&lru->list, &memcg_list_lrus);
        mutex_unlock(&list_lrus_mutex);
}

static void list_lru_unregister(struct list_lru *lru)
{
        if (!list_lru_memcg_aware(lru))
                return;

        mutex_lock(&list_lrus_mutex);
        list_del(&lru->list);
        mutex_unlock(&list_lrus_mutex);
}

static int lru_shrinker_id(struct list_lru *lru)
{
        return lru->shrinker_id;
}

static inline struct list_lru_one *
list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
{
        if (list_lru_memcg_aware(lru) && idx >= 0) {
                struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);

                return mlru ? &mlru->node[nid] : NULL;
        }
        return &lru->node[nid].lru;
}
#else
static void list_lru_register(struct list_lru *lru)
{
}

static void list_lru_unregister(struct list_lru *lru)
{
}

static int lru_shrinker_id(struct list_lru *lru)
{
        return -1;
}

static inline bool list_lru_memcg_aware(struct list_lru *lru)
{
        return false;
}

static inline struct list_lru_one *
list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
{
        return &lru->node[nid].lru;
}
#endif /* CONFIG_MEMCG_KMEM */

bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
                    struct mem_cgroup *memcg)
{
        struct list_lru_node *nlru = &lru->node[nid];
        struct list_lru_one *l;

        spin_lock(&nlru->lock);
        if (list_empty(item)) {
                l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
                list_add_tail(item, &l->list);
                /* Set shrinker bit if the first element was added */
                if (!l->nr_items++)
                        set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
                nlru->nr_items++;
                spin_unlock(&nlru->lock);
                return true;
        }
        spin_unlock(&nlru->lock);
        return false;
}
EXPORT_SYMBOL_GPL(list_lru_add);

bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
{
        int nid = page_to_nid(virt_to_page(item));
        struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
                mem_cgroup_from_slab_obj(item) : NULL;

        return list_lru_add(lru, item, nid, memcg);
}
EXPORT_SYMBOL_GPL(list_lru_add_obj);

bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
                    struct mem_cgroup *memcg)
{
        struct list_lru_node *nlru = &lru->node[nid];
        struct list_lru_one *l;

        spin_lock(&nlru->lock);
        if (!list_empty(item)) {
                l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
                list_del_init(item);
                l->nr_items--;
                nlru->nr_items--;
                spin_unlock(&nlru->lock);
                return true;
        }
        spin_unlock(&nlru->lock);
        return false;
}
EXPORT_SYMBOL_GPL(list_lru_del);

bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
{
        int nid = page_to_nid(virt_to_page(item));
        struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
                mem_cgroup_from_slab_obj(item) : NULL;

        return list_lru_del(lru, item, nid, memcg);
}
EXPORT_SYMBOL_GPL(list_lru_del_obj);

void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
{
        list_del_init(item);
        list->nr_items--;
}
EXPORT_SYMBOL_GPL(list_lru_isolate);

void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
                           struct list_head *head)
{
        list_move(item, head);
        list->nr_items--;
}
EXPORT_SYMBOL_GPL(list_lru_isolate_move);

unsigned long list_lru_count_one(struct list_lru *lru,
                                 int nid, struct mem_cgroup *memcg)
{
        struct list_lru_one *l;
        long count;

        rcu_read_lock();
        l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
        count = l ? READ_ONCE(l->nr_items) : 0;
        rcu_read_unlock();

        if (unlikely(count < 0))
                count = 0;

        return count;
}
EXPORT_SYMBOL_GPL(list_lru_count_one);

unsigned long list_lru_count_node(struct list_lru *lru, int nid)
{
        struct list_lru_node *nlru;

        nlru = &lru->node[nid];
        return nlru->nr_items;
}
EXPORT_SYMBOL_GPL(list_lru_count_node);

static unsigned long
__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
                    list_lru_walk_cb isolate, void *cb_arg,
                    unsigned long *nr_to_walk)
{
        struct list_lru_node *nlru = &lru->node[nid];
        struct list_lru_one *l;
        struct list_head *item, *n;
        unsigned long isolated = 0;

restart:
        l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
        if (!l)
                goto out;

        list_for_each_safe(item, n, &l->list) {
                enum lru_status ret;

                /*
                 * decrement nr_to_walk first so that we don't livelock if we
                 * get stuck on large numbers of LRU_RETRY items
                 */
                if (!*nr_to_walk)
                        break;
                --*nr_to_walk;

                ret = isolate(item, l, &nlru->lock, cb_arg);
                switch (ret) {
                case LRU_REMOVED_RETRY:
                        assert_spin_locked(&nlru->lock);
                        fallthrough;
                case LRU_REMOVED:
                        isolated++;
                        nlru->nr_items--;
                        /*
                         * If the lru lock has been dropped, our list
                         * traversal is now invalid and so we have to
                         * restart from scratch.
                         */
                        if (ret == LRU_REMOVED_RETRY)
                                goto restart;
                        break;
                case LRU_ROTATE:
                        list_move_tail(item, &l->list);
                        break;
                case LRU_SKIP:
                        break;
                case LRU_RETRY:
                        /*
                         * The lru lock has been dropped, our list traversal is
                         * now invalid and so we have to restart from scratch.
                         */
                        assert_spin_locked(&nlru->lock);
                        goto restart;
                case LRU_STOP:
                        assert_spin_locked(&nlru->lock);
                        goto out;
                default:
                        BUG();
                }
        }
out:
        return isolated;
}

unsigned long
list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
                  list_lru_walk_cb isolate, void *cb_arg,
                  unsigned long *nr_to_walk)
{
        struct list_lru_node *nlru = &lru->node[nid];
        unsigned long ret;

        spin_lock(&nlru->lock);
        ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
                                  cb_arg, nr_to_walk);
        spin_unlock(&nlru->lock);
        return ret;
}
EXPORT_SYMBOL_GPL(list_lru_walk_one);

unsigned long
list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
                      list_lru_walk_cb isolate, void *cb_arg,
                      unsigned long *nr_to_walk)
{
        struct list_lru_node *nlru = &lru->node[nid];
        unsigned long ret;

        spin_lock_irq(&nlru->lock);
        ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
                                  cb_arg, nr_to_walk);
        spin_unlock_irq(&nlru->lock);
        return ret;
}

unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
                                 list_lru_walk_cb isolate, void *cb_arg,
                                 unsigned long *nr_to_walk)
{
        long isolated = 0;

        isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
                                      nr_to_walk);

#ifdef CONFIG_MEMCG_KMEM
        if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
                struct list_lru_memcg *mlru;
                unsigned long index;

                xa_for_each(&lru->xa, index, mlru) {
                        struct list_lru_node *nlru = &lru->node[nid];

                        spin_lock(&nlru->lock);
                        isolated += __list_lru_walk_one(lru, nid, index,
                                                        isolate, cb_arg,
                                                        nr_to_walk);
                        spin_unlock(&nlru->lock);

                        if (*nr_to_walk <= 0)
                                break;
                }
        }
#endif

        return isolated;
}
EXPORT_SYMBOL_GPL(list_lru_walk_node);

static void init_one_lru(struct list_lru_one *l)
{
        INIT_LIST_HEAD(&l->list);
        l->nr_items = 0;
}

#ifdef CONFIG_MEMCG_KMEM
static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
{
        int nid;
        struct list_lru_memcg *mlru;

        mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
        if (!mlru)
                return NULL;

        for_each_node(nid)
                init_one_lru(&mlru->node[nid]);

        return mlru;
}

static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
{
        struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx);

        /*
         * The __list_lru_walk_one() can walk the list of this node.
         * We need kvfree_rcu() here. And the walking of the list
         * is under lru->node[nid]->lock, which can serve as a RCU
         * read-side critical section.
         */
        if (mlru)
                kvfree_rcu(mlru, rcu);
}

static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
{
        if (memcg_aware)
                xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
        lru->memcg_aware = memcg_aware;
}

static void memcg_destroy_list_lru(struct list_lru *lru)
{
        XA_STATE(xas, &lru->xa, 0);
        struct list_lru_memcg *mlru;

        if (!list_lru_memcg_aware(lru))
                return;

        xas_lock_irq(&xas);
        xas_for_each(&xas, mlru, ULONG_MAX) {
                kfree(mlru);
                xas_store(&xas, NULL);
        }
        xas_unlock_irq(&xas);
}

static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
                                         int src_idx, struct mem_cgroup *dst_memcg)
{
        struct list_lru_node *nlru = &lru->node[nid];
        int dst_idx = dst_memcg->kmemcg_id;
        struct list_lru_one *src, *dst;

        /*
         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
         * we have to use IRQ-safe primitives here to avoid deadlock.
         */
        spin_lock_irq(&nlru->lock);

        src = list_lru_from_memcg_idx(lru, nid, src_idx);
        if (!src)
                goto out;
        dst = list_lru_from_memcg_idx(lru, nid, dst_idx);

        list_splice_init(&src->list, &dst->list);

        if (src->nr_items) {
                dst->nr_items += src->nr_items;
                set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
                src->nr_items = 0;
        }
out:
        spin_unlock_irq(&nlru->lock);
}

static void memcg_reparent_list_lru(struct list_lru *lru,
                                    int src_idx, struct mem_cgroup *dst_memcg)
{
        int i;

        for_each_node(i)
                memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);

        memcg_list_lru_free(lru, src_idx);
}

void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
{
        struct cgroup_subsys_state *css;
        struct list_lru *lru;
        int src_idx = memcg->kmemcg_id;

        /*
         * Change kmemcg_id of this cgroup and all its descendants to the
         * parent's id, and then move all entries from this cgroup's list_lrus
         * to ones of the parent.
         *
         * After we have finished, all list_lrus corresponding to this cgroup
         * are guaranteed to remain empty. So we can safely free this cgroup's
         * list lrus in memcg_list_lru_free().
         *
         * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
         * from allocating list lrus for this cgroup after memcg_list_lru_free()
         * call.
         */
        rcu_read_lock();
        css_for_each_descendant_pre(css, &memcg->css) {
                struct mem_cgroup *child;

                child = mem_cgroup_from_css(css);
                WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id);
        }
        rcu_read_unlock();

        mutex_lock(&list_lrus_mutex);
        list_for_each_entry(lru, &memcg_list_lrus, list)
                memcg_reparent_list_lru(lru, src_idx, parent);
        mutex_unlock(&list_lrus_mutex);
}

static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
                                            struct list_lru *lru)
{
        int idx = memcg->kmemcg_id;

        return idx < 0 || xa_load(&lru->xa, idx);
}

int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
                         gfp_t gfp)
{
        int i;
        unsigned long flags;
        struct list_lru_memcg_table {
                struct list_lru_memcg *mlru;
                struct mem_cgroup *memcg;
        } *table;
        XA_STATE(xas, &lru->xa, 0);

        if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
                return 0;

        gfp &= GFP_RECLAIM_MASK;
        table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
        if (!table)
                return -ENOMEM;

        /*
         * Because the list_lru can be reparented to the parent cgroup's
         * list_lru, we should make sure that this cgroup and all its
         * ancestors have allocated list_lru_memcg.
         */
        for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
                if (memcg_list_lru_allocated(memcg, lru))
                        break;

                table[i].memcg = memcg;
                table[i].mlru = memcg_init_list_lru_one(gfp);
                if (!table[i].mlru) {
                        while (i--)
                                kfree(table[i].mlru);
                        kfree(table);
                        return -ENOMEM;
                }
        }

        xas_lock_irqsave(&xas, flags);
        while (i--) {
                int index = READ_ONCE(table[i].memcg->kmemcg_id);
                struct list_lru_memcg *mlru = table[i].mlru;

                xas_set(&xas, index);
retry:
                if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) {
                        kfree(mlru);
                } else {
                        xas_store(&xas, mlru);
                        if (xas_error(&xas) == -ENOMEM) {
                                xas_unlock_irqrestore(&xas, flags);
                                if (xas_nomem(&xas, gfp))
                                        xas_set_err(&xas, 0);
                                xas_lock_irqsave(&xas, flags);
                                /*
                                 * The xas lock has been released, this memcg
                                 * can be reparented before us. So reload
                                 * memcg id. More details see the comments
                                 * in memcg_reparent_list_lrus().
                                 */
                                index = READ_ONCE(table[i].memcg->kmemcg_id);
                                if (index < 0)
                                        xas_set_err(&xas, 0);
                                else if (!xas_error(&xas) && index != xas.xa_index)
                                        xas_set(&xas, index);
                                goto retry;
                        }
                }
        }
        /* xas_nomem() is used to free memory instead of memory allocation. */
        if (xas.xa_alloc)
                xas_nomem(&xas, gfp);
        xas_unlock_irqrestore(&xas, flags);
        kfree(table);

        return xas_error(&xas);
}
#else
static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
{
}

static void memcg_destroy_list_lru(struct list_lru *lru)
{
}
#endif /* CONFIG_MEMCG_KMEM */

int __list_lru_init(struct list_lru *lru, bool memcg_aware,
                    struct lock_class_key *key, struct shrinker *shrinker)
{
        int i;

#ifdef CONFIG_MEMCG_KMEM
        if (shrinker)
                lru->shrinker_id = shrinker->id;
        else
                lru->shrinker_id = -1;

        if (mem_cgroup_kmem_disabled())
                memcg_aware = false;
#endif

        lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
        if (!lru->node)
                return -ENOMEM;

        for_each_node(i) {
                spin_lock_init(&lru->node[i].lock);
                if (key)
                        lockdep_set_class(&lru->node[i].lock, key);
                init_one_lru(&lru->node[i].lru);
        }

        memcg_init_list_lru(lru, memcg_aware);
        list_lru_register(lru);

        return 0;
}
EXPORT_SYMBOL_GPL(__list_lru_init);

void list_lru_destroy(struct list_lru *lru)
{
        /* Already destroyed or not yet initialized? */
        if (!lru->node)
                return;

        list_lru_unregister(lru);

        memcg_destroy_list_lru(lru);
        kfree(lru->node);
        lru->node = NULL;

#ifdef CONFIG_MEMCG_KMEM
        lru->shrinker_id = -1;
#endif
}
EXPORT_SYMBOL_GPL(list_lru_destroy);

















































   14 












   54 
   53 






   53 










   18 
   18 


    3 






   18 






   13 





   67 
   67 














   67 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PERCPU_RWSEM_H
#define _LINUX_PERCPU_RWSEM_H

#include <linux/atomic.h>
#include <linux/percpu.h>
#include <linux/rcuwait.h>
#include <linux/wait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>

struct percpu_rw_semaphore {
        struct rcu_sync                rss;
        unsigned int __percpu        *read_count;
        struct rcuwait                writer;
        wait_queue_head_t        waiters;
        atomic_t                block;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map        dep_map;
#endif
};

#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)        .dep_map = { .name = #lockname },
#else
#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
#endif

#define __DEFINE_PERCPU_RWSEM(name, is_static)                                \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name);                \
is_static struct percpu_rw_semaphore name = {                                \
        .rss = __RCU_SYNC_INITIALIZER(name.rss),                        \
        .read_count = &__percpu_rwsem_rc_##name,                        \
        .writer = __RCUWAIT_INITIALIZER(name.writer),                        \
        .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters),                \
        .block = ATOMIC_INIT(0),                                        \
        __PERCPU_RWSEM_DEP_MAP_INIT(name)                                \
}

#define DEFINE_PERCPU_RWSEM(name)                \
        __DEFINE_PERCPU_RWSEM(name, /* not static */)
#define DEFINE_STATIC_PERCPU_RWSEM(name)        \
        __DEFINE_PERCPU_RWSEM(name, static)

extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);

static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
{
        might_sleep();

        rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);

        preempt_disable();
        /*
         * We are in an RCU-sched read-side critical section, so the writer
         * cannot both change sem->state from readers_fast and start checking
         * counters while we are here. So if we see !sem->state, we know that
         * the writer won't be checking until we're past the preempt_enable()
         * and that once the synchronize_rcu() is done, the writer will see
         * anything we did within this RCU-sched read-size critical section.
         */
        if (likely(rcu_sync_is_idle(&sem->rss)))
                this_cpu_inc(*sem->read_count);
        else
                __percpu_down_read(sem, false); /* Unconditional memory barrier */
        /*
         * The preempt_enable() prevents the compiler from
         * bleeding the critical section out.
         */
        preempt_enable();
}

static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
        bool ret = true;

        preempt_disable();
        /*
         * Same as in percpu_down_read().
         */
        if (likely(rcu_sync_is_idle(&sem->rss)))
                this_cpu_inc(*sem->read_count);
        else
                ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
        preempt_enable();
        /*
         * The barrier() from preempt_enable() prevents the compiler from
         * bleeding the critical section out.
         */

        if (ret)
                rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);

        return ret;
}

static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
        rwsem_release(&sem->dep_map, _RET_IP_);

        preempt_disable();
        /*
         * Same as in percpu_down_read().
         */
        if (likely(rcu_sync_is_idle(&sem->rss))) {
                this_cpu_dec(*sem->read_count);
        } else {
                /*
                 * slowpath; reader will only ever wake a single blocked
                 * writer.
                 */
                smp_mb(); /* B matches C */
                /*
                 * In other words, if they see our decrement (presumably to
                 * aggregate zero, as that is the only time it matters) they
                 * will also see our critical section.
                 */
                this_cpu_dec(*sem->read_count);
                rcuwait_wake_up(&sem->writer);
        }
        preempt_enable();
}

extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);

static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
{
        return atomic_read(&sem->block);
}

extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
                                const char *, struct lock_class_key *);

extern void percpu_free_rwsem(struct percpu_rw_semaphore *);

#define percpu_init_rwsem(sem)                                        \
({                                                                \
        static struct lock_class_key rwsem_key;                        \
        __percpu_init_rwsem(sem, #sem, &rwsem_key);                \
})

#define percpu_rwsem_is_held(sem)        lockdep_is_held(sem)
#define percpu_rwsem_assert_held(sem)        lockdep_assert_held(sem)

static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
                                        bool read, unsigned long ip)
{
        lock_release(&sem->dep_map, ip);
}

static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
                                        bool read, unsigned long ip)
{
        lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
}

#endif





























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * async.h: Asynchronous function calls for boot performance
 *
 * (C) Copyright 2009 Intel Corporation
 * Author: Arjan van de Ven <arjan@linux.intel.com>
 */
#ifndef __ASYNC_H__
#define __ASYNC_H__

#include <linux/types.h>
#include <linux/list.h>
#include <linux/numa.h>
#include <linux/device.h>

typedef u64 async_cookie_t;
typedef void (*async_func_t) (void *data, async_cookie_t cookie);
struct async_domain {
        struct list_head pending;
        unsigned registered:1;
};

/*
 * domain participates in global async_synchronize_full
 */
#define ASYNC_DOMAIN(_name) \
        struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending),        \
                                      .registered = 1 }

/*
 * domain is free to go out of scope as soon as all pending work is
 * complete, this domain does not participate in async_synchronize_full
 */
#define ASYNC_DOMAIN_EXCLUSIVE(_name) \
        struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \
                                      .registered = 0 }

async_cookie_t async_schedule_node(async_func_t func, void *data,
                                   int node);
async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
                                          int node,
                                          struct async_domain *domain);

/**
 * async_schedule - schedule a function for asynchronous execution
 * @func: function to execute asynchronously
 * @data: data pointer to pass to the function
 *
 * Returns an async_cookie_t that may be used for checkpointing later.
 * Note: This function may be called from atomic or non-atomic contexts.
 */
static inline async_cookie_t async_schedule(async_func_t func, void *data)
{
        return async_schedule_node(func, data, NUMA_NO_NODE);
}

/**
 * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
 * @func: function to execute asynchronously
 * @data: data pointer to pass to the function
 * @domain: the domain
 *
 * Returns an async_cookie_t that may be used for checkpointing later.
 * @domain may be used in the async_synchronize_*_domain() functions to
 * wait within a certain synchronization domain rather than globally.
 * Note: This function may be called from atomic or non-atomic contexts.
 */
static inline async_cookie_t
async_schedule_domain(async_func_t func, void *data,
                      struct async_domain *domain)
{
        return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain);
}

/**
 * async_schedule_dev - A device specific version of async_schedule
 * @func: function to execute asynchronously
 * @dev: device argument to be passed to function
 *
 * Returns an async_cookie_t that may be used for checkpointing later.
 * @dev is used as both the argument for the function and to provide NUMA
 * context for where to run the function. By doing this we can try to
 * provide for the best possible outcome by operating on the device on the
 * CPUs closest to the device.
 * Note: This function may be called from atomic or non-atomic contexts.
 */
static inline async_cookie_t
async_schedule_dev(async_func_t func, struct device *dev)
{
        return async_schedule_node(func, dev, dev_to_node(dev));
}

bool async_schedule_dev_nocall(async_func_t func, struct device *dev);

/**
 * async_schedule_dev_domain - A device specific version of async_schedule_domain
 * @func: function to execute asynchronously
 * @dev: device argument to be passed to function
 * @domain: the domain
 *
 * Returns an async_cookie_t that may be used for checkpointing later.
 * @dev is used as both the argument for the function and to provide NUMA
 * context for where to run the function. By doing this we can try to
 * provide for the best possible outcome by operating on the device on the
 * CPUs closest to the device.
 * @domain may be used in the async_synchronize_*_domain() functions to
 * wait within a certain synchronization domain rather than globally.
 * Note: This function may be called from atomic or non-atomic contexts.
 */
static inline async_cookie_t
async_schedule_dev_domain(async_func_t func, struct device *dev,
                          struct async_domain *domain)
{
        return async_schedule_node_domain(func, dev, dev_to_node(dev), domain);
}

extern void async_synchronize_full(void);
extern void async_synchronize_full_domain(struct async_domain *domain);
extern void async_synchronize_cookie(async_cookie_t cookie);
extern void async_synchronize_cookie_domain(async_cookie_t cookie,
                                            struct async_domain *domain);
extern bool current_is_async(void);
extern void async_init(void);
#endif











































  233 



  232 


  162 




  162 
  162 


  162 




  162 
  162 
  162 






























































































  162 











  162 










































































































































  101 





  101 

  101 























  101 


  101 















  101 
  101 










































































  101 





  101 





  101 
  101 
  101 
  100 



  101 
  101 

  101 



































































































































































    5 
    5 
    5 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
// SPDX-License-Identifier: GPL-2.0
/*
 * class.c - basic device class management
 *
 * Copyright (c) 2002-3 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 * Copyright (c) 2003-2004 Greg Kroah-Hartman
 * Copyright (c) 2003-2004 IBM Corp.
 */

#include <linux/device/class.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kdev_t.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "base.h"

/* /sys/class */
static struct kset *class_kset;

#define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr)

/**
 * class_to_subsys - Turn a struct class into a struct subsys_private
 *
 * @class: pointer to the struct bus_type to look up
 *
 * The driver core internals need to work on the subsys_private structure, not
 * the external struct class pointer.  This function walks the list of
 * registered classes in the system and finds the matching one and returns the
 * internal struct subsys_private that relates to that class.
 *
 * Note, the reference count of the return value is INCREMENTED if it is not
 * NULL.  A call to subsys_put() must be done when finished with the pointer in
 * order for it to be properly freed.
 */
struct subsys_private *class_to_subsys(const struct class *class)
{
        struct subsys_private *sp = NULL;
        struct kobject *kobj;

        if (!class || !class_kset)
                return NULL;

        spin_lock(&class_kset->list_lock);

        if (list_empty(&class_kset->list))
                goto done;

        list_for_each_entry(kobj, &class_kset->list, entry) {
                struct kset *kset = container_of(kobj, struct kset, kobj);

                sp = container_of_const(kset, struct subsys_private, subsys);
                if (sp->class == class)
                        goto done;
        }
        sp = NULL;
done:
        sp = subsys_get(sp);
        spin_unlock(&class_kset->list_lock);
        return sp;
}

static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
                               char *buf)
{
        struct class_attribute *class_attr = to_class_attr(attr);
        struct subsys_private *cp = to_subsys_private(kobj);
        ssize_t ret = -EIO;

        if (class_attr->show)
                ret = class_attr->show(cp->class, class_attr, buf);
        return ret;
}

static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
                                const char *buf, size_t count)
{
        struct class_attribute *class_attr = to_class_attr(attr);
        struct subsys_private *cp = to_subsys_private(kobj);
        ssize_t ret = -EIO;

        if (class_attr->store)
                ret = class_attr->store(cp->class, class_attr, buf, count);
        return ret;
}

static void class_release(struct kobject *kobj)
{
        struct subsys_private *cp = to_subsys_private(kobj);
        const struct class *class = cp->class;

        pr_debug("class '%s': release.\n", class->name);

        if (class->class_release)
                class->class_release(class);
        else
                pr_debug("class '%s' does not have a release() function, "
                         "be careful\n", class->name);

        lockdep_unregister_key(&cp->lock_key);
        kfree(cp);
}

static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj)
{
        const struct subsys_private *cp = to_subsys_private(kobj);
        const struct class *class = cp->class;

        return class->ns_type;
}

static const struct sysfs_ops class_sysfs_ops = {
        .show           = class_attr_show,
        .store           = class_attr_store,
};

static const struct kobj_type class_ktype = {
        .sysfs_ops        = &class_sysfs_ops,
        .release        = class_release,
        .child_ns_type        = class_child_ns_type,
};

int class_create_file_ns(const struct class *cls, const struct class_attribute *attr,
                         const void *ns)
{
        struct subsys_private *sp = class_to_subsys(cls);
        int error;

        if (!sp)
                return -EINVAL;

        error = sysfs_create_file_ns(&sp->subsys.kobj, &attr->attr, ns);
        subsys_put(sp);

        return error;
}
EXPORT_SYMBOL_GPL(class_create_file_ns);

void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr,
                          const void *ns)
{
        struct subsys_private *sp = class_to_subsys(cls);

        if (!sp)
                return;

        sysfs_remove_file_ns(&sp->subsys.kobj, &attr->attr, ns);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(class_remove_file_ns);

static struct device *klist_class_to_dev(struct klist_node *n)
{
        struct device_private *p = to_device_private_class(n);
        return p->device;
}

static void klist_class_dev_get(struct klist_node *n)
{
        struct device *dev = klist_class_to_dev(n);

        get_device(dev);
}

static void klist_class_dev_put(struct klist_node *n)
{
        struct device *dev = klist_class_to_dev(n);

        put_device(dev);
}

int class_register(const struct class *cls)
{
        struct subsys_private *cp;
        struct lock_class_key *key;
        int error;

        pr_debug("device class '%s': registering\n", cls->name);

        cp = kzalloc(sizeof(*cp), GFP_KERNEL);
        if (!cp)
                return -ENOMEM;
        klist_init(&cp->klist_devices, klist_class_dev_get, klist_class_dev_put);
        INIT_LIST_HEAD(&cp->interfaces);
        kset_init(&cp->glue_dirs);
        key = &cp->lock_key;
        lockdep_register_key(key);
        __mutex_init(&cp->mutex, "subsys mutex", key);
        error = kobject_set_name(&cp->subsys.kobj, "%s", cls->name);
        if (error)
                goto err_out;

        cp->subsys.kobj.kset = class_kset;
        cp->subsys.kobj.ktype = &class_ktype;
        cp->class = cls;

        error = kset_register(&cp->subsys);
        if (error)
                goto err_out;

        error = sysfs_create_groups(&cp->subsys.kobj, cls->class_groups);
        if (error) {
                kobject_del(&cp->subsys.kobj);
                kfree_const(cp->subsys.kobj.name);
                goto err_out;
        }
        return 0;

err_out:
        lockdep_unregister_key(key);
        kfree(cp);
        return error;
}
EXPORT_SYMBOL_GPL(class_register);

void class_unregister(const struct class *cls)
{
        struct subsys_private *sp = class_to_subsys(cls);

        if (!sp)
                return;

        pr_debug("device class '%s': unregistering\n", cls->name);

        sysfs_remove_groups(&sp->subsys.kobj, cls->class_groups);
        kset_unregister(&sp->subsys);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(class_unregister);

static void class_create_release(const struct class *cls)
{
        pr_debug("%s called for %s\n", __func__, cls->name);
        kfree(cls);
}

/**
 * class_create - create a struct class structure
 * @name: pointer to a string for the name of this class.
 *
 * This is used to create a struct class pointer that can then be used
 * in calls to device_create().
 *
 * Returns &struct class pointer on success, or ERR_PTR() on error.
 *
 * Note, the pointer created here is to be destroyed when finished by
 * making a call to class_destroy().
 */
struct class *class_create(const char *name)
{
        struct class *cls;
        int retval;

        cls = kzalloc(sizeof(*cls), GFP_KERNEL);
        if (!cls) {
                retval = -ENOMEM;
                goto error;
        }

        cls->name = name;
        cls->class_release = class_create_release;

        retval = class_register(cls);
        if (retval)
                goto error;

        return cls;

error:
        kfree(cls);
        return ERR_PTR(retval);
}
EXPORT_SYMBOL_GPL(class_create);

/**
 * class_destroy - destroys a struct class structure
 * @cls: pointer to the struct class that is to be destroyed
 *
 * Note, the pointer to be destroyed must have been created with a call
 * to class_create().
 */
void class_destroy(const struct class *cls)
{
        if (IS_ERR_OR_NULL(cls))
                return;

        class_unregister(cls);
}
EXPORT_SYMBOL_GPL(class_destroy);

/**
 * class_dev_iter_init - initialize class device iterator
 * @iter: class iterator to initialize
 * @class: the class we wanna iterate over
 * @start: the device to start iterating from, if any
 * @type: device_type of the devices to iterate over, NULL for all
 *
 * Initialize class iterator @iter such that it iterates over devices
 * of @class.  If @start is set, the list iteration will start there,
 * otherwise if it is NULL, the iteration starts at the beginning of
 * the list.
 */
void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class,
                         const struct device *start, const struct device_type *type)
{
        struct subsys_private *sp = class_to_subsys(class);
        struct klist_node *start_knode = NULL;

        if (!sp)
                return;

        if (start)
                start_knode = &start->p->knode_class;
        klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode);
        iter->type = type;
        iter->sp = sp;
}
EXPORT_SYMBOL_GPL(class_dev_iter_init);

/**
 * class_dev_iter_next - iterate to the next device
 * @iter: class iterator to proceed
 *
 * Proceed @iter to the next device and return it.  Returns NULL if
 * iteration is complete.
 *
 * The returned device is referenced and won't be released till
 * iterator is proceed to the next device or exited.  The caller is
 * free to do whatever it wants to do with the device including
 * calling back into class code.
 */
struct device *class_dev_iter_next(struct class_dev_iter *iter)
{
        struct klist_node *knode;
        struct device *dev;

        while (1) {
                knode = klist_next(&iter->ki);
                if (!knode)
                        return NULL;
                dev = klist_class_to_dev(knode);
                if (!iter->type || iter->type == dev->type)
                        return dev;
        }
}
EXPORT_SYMBOL_GPL(class_dev_iter_next);

/**
 * class_dev_iter_exit - finish iteration
 * @iter: class iterator to finish
 *
 * Finish an iteration.  Always call this function after iteration is
 * complete whether the iteration ran till the end or not.
 */
void class_dev_iter_exit(struct class_dev_iter *iter)
{
        klist_iter_exit(&iter->ki);
        subsys_put(iter->sp);
}
EXPORT_SYMBOL_GPL(class_dev_iter_exit);

/**
 * class_for_each_device - device iterator
 * @class: the class we're iterating
 * @start: the device to start with in the list, if any.
 * @data: data for the callback
 * @fn: function to be called for each device
 *
 * Iterate over @class's list of devices, and call @fn for each,
 * passing it @data.  If @start is set, the list iteration will start
 * there, otherwise if it is NULL, the iteration starts at the
 * beginning of the list.
 *
 * We check the return of @fn each time. If it returns anything
 * other than 0, we break out and return that value.
 *
 * @fn is allowed to do anything including calling back into class
 * code.  There's no locking restriction.
 */
int class_for_each_device(const struct class *class, const struct device *start,
                          void *data, int (*fn)(struct device *, void *))
{
        struct subsys_private *sp = class_to_subsys(class);
        struct class_dev_iter iter;
        struct device *dev;
        int error = 0;

        if (!class)
                return -EINVAL;
        if (!sp) {
                WARN(1, "%s called for class '%s' before it was initialized",
                     __func__, class->name);
                return -EINVAL;
        }

        class_dev_iter_init(&iter, class, start, NULL);
        while ((dev = class_dev_iter_next(&iter))) {
                error = fn(dev, data);
                if (error)
                        break;
        }
        class_dev_iter_exit(&iter);
        subsys_put(sp);

        return error;
}
EXPORT_SYMBOL_GPL(class_for_each_device);

/**
 * class_find_device - device iterator for locating a particular device
 * @class: the class we're iterating
 * @start: Device to begin with
 * @data: data for the match function
 * @match: function to check device
 *
 * This is similar to the class_for_each_dev() function above, but it
 * returns a reference to a device that is 'found' for later use, as
 * determined by the @match callback.
 *
 * The callback should return 0 if the device doesn't match and non-zero
 * if it does.  If the callback returns non-zero, this function will
 * return to the caller and not iterate over any more devices.
 *
 * Note, you will need to drop the reference with put_device() after use.
 *
 * @match is allowed to do anything including calling back into class
 * code.  There's no locking restriction.
 */
struct device *class_find_device(const struct class *class, const struct device *start,
                                 const void *data,
                                 int (*match)(struct device *, const void *))
{
        struct subsys_private *sp = class_to_subsys(class);
        struct class_dev_iter iter;
        struct device *dev;

        if (!class)
                return NULL;
        if (!sp) {
                WARN(1, "%s called for class '%s' before it was initialized",
                     __func__, class->name);
                return NULL;
        }

        class_dev_iter_init(&iter, class, start, NULL);
        while ((dev = class_dev_iter_next(&iter))) {
                if (match(dev, data)) {
                        get_device(dev);
                        break;
                }
        }
        class_dev_iter_exit(&iter);
        subsys_put(sp);

        return dev;
}
EXPORT_SYMBOL_GPL(class_find_device);

int class_interface_register(struct class_interface *class_intf)
{
        struct subsys_private *sp;
        const struct class *parent;
        struct class_dev_iter iter;
        struct device *dev;

        if (!class_intf || !class_intf->class)
                return -ENODEV;

        parent = class_intf->class;
        sp = class_to_subsys(parent);
        if (!sp)
                return -EINVAL;

        /*
         * Reference in sp is now incremented and will be dropped when
         * the interface is removed in the call to class_interface_unregister()
         */

        mutex_lock(&sp->mutex);
        list_add_tail(&class_intf->node, &sp->interfaces);
        if (class_intf->add_dev) {
                class_dev_iter_init(&iter, parent, NULL, NULL);
                while ((dev = class_dev_iter_next(&iter)))
                        class_intf->add_dev(dev);
                class_dev_iter_exit(&iter);
        }
        mutex_unlock(&sp->mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(class_interface_register);

void class_interface_unregister(struct class_interface *class_intf)
{
        struct subsys_private *sp;
        const struct class *parent = class_intf->class;
        struct class_dev_iter iter;
        struct device *dev;

        if (!parent)
                return;

        sp = class_to_subsys(parent);
        if (!sp)
                return;

        mutex_lock(&sp->mutex);
        list_del_init(&class_intf->node);
        if (class_intf->remove_dev) {
                class_dev_iter_init(&iter, parent, NULL, NULL);
                while ((dev = class_dev_iter_next(&iter)))
                        class_intf->remove_dev(dev);
                class_dev_iter_exit(&iter);
        }
        mutex_unlock(&sp->mutex);

        /*
         * Decrement the reference count twice, once for the class_to_subsys()
         * call in the start of this function, and the second one from the
         * reference increment in class_interface_register()
         */
        subsys_put(sp);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(class_interface_unregister);

ssize_t show_class_attr_string(const struct class *class,
                               const struct class_attribute *attr, char *buf)
{
        struct class_attribute_string *cs;

        cs = container_of(attr, struct class_attribute_string, attr);
        return sysfs_emit(buf, "%s\n", cs->str);
}

EXPORT_SYMBOL_GPL(show_class_attr_string);

struct class_compat {
        struct kobject *kobj;
};

/**
 * class_compat_register - register a compatibility class
 * @name: the name of the class
 *
 * Compatibility class are meant as a temporary user-space compatibility
 * workaround when converting a family of class devices to a bus devices.
 */
struct class_compat *class_compat_register(const char *name)
{
        struct class_compat *cls;

        cls = kmalloc(sizeof(struct class_compat), GFP_KERNEL);
        if (!cls)
                return NULL;
        cls->kobj = kobject_create_and_add(name, &class_kset->kobj);
        if (!cls->kobj) {
                kfree(cls);
                return NULL;
        }
        return cls;
}
EXPORT_SYMBOL_GPL(class_compat_register);

/**
 * class_compat_unregister - unregister a compatibility class
 * @cls: the class to unregister
 */
void class_compat_unregister(struct class_compat *cls)
{
        kobject_put(cls->kobj);
        kfree(cls);
}
EXPORT_SYMBOL_GPL(class_compat_unregister);

/**
 * class_compat_create_link - create a compatibility class device link to
 *                              a bus device
 * @cls: the compatibility class
 * @dev: the target bus device
 * @device_link: an optional device to which a "device" link should be created
 */
int class_compat_create_link(struct class_compat *cls, struct device *dev,
                             struct device *device_link)
{
        int error;

        error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev));
        if (error)
                return error;

        /*
         * Optionally add a "device" link (typically to the parent), as a
         * class device would have one and we want to provide as much
         * backwards compatibility as possible.
         */
        if (device_link) {
                error = sysfs_create_link(&dev->kobj, &device_link->kobj,
                                          "device");
                if (error)
                        sysfs_remove_link(cls->kobj, dev_name(dev));
        }

        return error;
}
EXPORT_SYMBOL_GPL(class_compat_create_link);

/**
 * class_compat_remove_link - remove a compatibility class device link to
 *                              a bus device
 * @cls: the compatibility class
 * @dev: the target bus device
 * @device_link: an optional device to which a "device" link was previously
 *                  created
 */
void class_compat_remove_link(struct class_compat *cls, struct device *dev,
                              struct device *device_link)
{
        if (device_link)
                sysfs_remove_link(&dev->kobj, "device");
        sysfs_remove_link(cls->kobj, dev_name(dev));
}
EXPORT_SYMBOL_GPL(class_compat_remove_link);

/**
 * class_is_registered - determine if at this moment in time, a class is
 *                         registered in the driver core or not.
 * @class: the class to check
 *
 * Returns a boolean to state if the class is registered in the driver core
 * or not.  Note that the value could switch right after this call is made,
 * so only use this in places where you "know" it is safe to do so (usually
 * to determine if the specific class has been registered yet or not).
 *
 * Be careful in using this.
 */
bool class_is_registered(const struct class *class)
{
        struct subsys_private *sp = class_to_subsys(class);
        bool is_initialized = false;

        if (sp) {
                is_initialized = true;
                subsys_put(sp);
        }
        return is_initialized;
}
EXPORT_SYMBOL_GPL(class_is_registered);

int __init classes_init(void)
{
        class_kset = kset_create_and_add("class", NULL, NULL);
        if (!class_kset)
                return -ENOMEM;
        return 0;
}











































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MM_PERCPU_INTERNAL_H
#define _MM_PERCPU_INTERNAL_H

#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/memcontrol.h>

/*
 * pcpu_block_md is the metadata block struct.
 * Each chunk's bitmap is split into a number of full blocks.
 * All units are in terms of bits.
 *
 * The scan hint is the largest known contiguous area before the contig hint.
 * It is not necessarily the actual largest contig hint though.  There is an
 * invariant that the scan_hint_start > contig_hint_start iff
 * scan_hint == contig_hint.  This is necessary because when scanning forward,
 * we don't know if a new contig hint would be better than the current one.
 */
struct pcpu_block_md {
        int                        scan_hint;        /* scan hint for block */
        int                        scan_hint_start; /* block relative starting
                                                    position of the scan hint */
        int                     contig_hint;    /* contig hint for block */
        int                     contig_hint_start; /* block relative starting
                                                      position of the contig hint */
        int                     left_free;      /* size of free space along
                                                   the left side of the block */
        int                     right_free;     /* size of free space along
                                                   the right side of the block */
        int                     first_free;     /* block position of first free */
        int                        nr_bits;        /* total bits responsible for */
};

struct pcpu_chunk {
#ifdef CONFIG_PERCPU_STATS
        int                        nr_alloc;        /* # of allocations */
        size_t                        max_alloc_size; /* largest allocation size */
#endif

        struct list_head        list;                /* linked to pcpu_slot lists */
        int                        free_bytes;        /* free bytes in the chunk */
        struct pcpu_block_md        chunk_md;
        unsigned long                *bound_map;        /* boundary map */

        /*
         * base_addr is the base address of this chunk.
         * To reduce false sharing, current layout is optimized to make sure
         * base_addr locate in the different cacheline with free_bytes and
         * chunk_md.
         */
        void                        *base_addr ____cacheline_aligned_in_smp;

        unsigned long                *alloc_map;        /* allocation map */
        struct pcpu_block_md        *md_blocks;        /* metadata blocks */

        void                        *data;                /* chunk data */
        bool                        immutable;        /* no [de]population allowed */
        bool                        isolated;        /* isolated from active chunk
                                                   slots */
        int                        start_offset;        /* the overlap with the previous
                                                   region to have a page aligned
                                                   base_addr */
        int                        end_offset;        /* additional area required to
                                                   have the region end page
                                                   aligned */
#ifdef CONFIG_MEMCG_KMEM
        struct obj_cgroup        **obj_cgroups;        /* vector of object cgroups */
#endif

        int                        nr_pages;        /* # of pages served by this chunk */
        int                        nr_populated;        /* # of populated pages */
        int                     nr_empty_pop_pages; /* # of empty populated pages */
        unsigned long                populated[];        /* populated bitmap */
};

extern spinlock_t pcpu_lock;

extern struct list_head *pcpu_chunk_lists;
extern int pcpu_nr_slots;
extern int pcpu_sidelined_slot;
extern int pcpu_to_depopulate_slot;
extern int pcpu_nr_empty_pop_pages;

extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;

/**
 * pcpu_chunk_nr_blocks - converts nr_pages to # of md_blocks
 * @chunk: chunk of interest
 *
 * This conversion is from the number of physical pages that the chunk
 * serves to the number of bitmap blocks used.
 */
static inline int pcpu_chunk_nr_blocks(struct pcpu_chunk *chunk)
{
        return chunk->nr_pages * PAGE_SIZE / PCPU_BITMAP_BLOCK_SIZE;
}

/**
 * pcpu_nr_pages_to_map_bits - converts the pages to size of bitmap
 * @pages: number of physical pages
 *
 * This conversion is from physical pages to the number of bits
 * required in the bitmap.
 */
static inline int pcpu_nr_pages_to_map_bits(int pages)
{
        return pages * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
}

/**
 * pcpu_chunk_map_bits - helper to convert nr_pages to size of bitmap
 * @chunk: chunk of interest
 *
 * This conversion is from the number of physical pages that the chunk
 * serves to the number of bits in the bitmap.
 */
static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
{
        return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
}

/**
 * pcpu_obj_full_size - helper to calculate size of each accounted object
 * @size: size of area to allocate in bytes
 *
 * For each accounted object there is an extra space which is used to store
 * obj_cgroup membership if kmemcg is not disabled. Charge it too.
 */
static inline size_t pcpu_obj_full_size(size_t size)
{
        size_t extra_size = 0;

#ifdef CONFIG_MEMCG_KMEM
        if (!mem_cgroup_kmem_disabled())
                extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *);
#endif

        return size * num_possible_cpus() + extra_size;
}

#ifdef CONFIG_PERCPU_STATS

#include <linux/spinlock.h>

struct percpu_stats {
        u64 nr_alloc;                /* lifetime # of allocations */
        u64 nr_dealloc;                /* lifetime # of deallocations */
        u64 nr_cur_alloc;        /* current # of allocations */
        u64 nr_max_alloc;        /* max # of live allocations */
        u32 nr_chunks;                /* current # of live chunks */
        u32 nr_max_chunks;        /* max # of live chunks */
        size_t min_alloc_size;        /* min allocation size */
        size_t max_alloc_size;        /* max allocation size */
};

extern struct percpu_stats pcpu_stats;
extern struct pcpu_alloc_info pcpu_stats_ai;

/*
 * For debug purposes. We don't care about the flexible array.
 */
static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
{
        memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info));

        /* initialize min_alloc_size to unit_size */
        pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size;
}

/*
 * pcpu_stats_area_alloc - increment area allocation stats
 * @chunk: the location of the area being allocated
 * @size: size of area to allocate in bytes
 *
 * CONTEXT:
 * pcpu_lock.
 */
static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
{
        lockdep_assert_held(&pcpu_lock);

        pcpu_stats.nr_alloc++;
        pcpu_stats.nr_cur_alloc++;
        pcpu_stats.nr_max_alloc =
                max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc);
        pcpu_stats.min_alloc_size =
                min(pcpu_stats.min_alloc_size, size);
        pcpu_stats.max_alloc_size =
                max(pcpu_stats.max_alloc_size, size);

        chunk->nr_alloc++;
        chunk->max_alloc_size = max(chunk->max_alloc_size, size);
}

/*
 * pcpu_stats_area_dealloc - decrement allocation stats
 * @chunk: the location of the area being deallocated
 *
 * CONTEXT:
 * pcpu_lock.
 */
static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
{
        lockdep_assert_held(&pcpu_lock);

        pcpu_stats.nr_dealloc++;
        pcpu_stats.nr_cur_alloc--;

        chunk->nr_alloc--;
}

/*
 * pcpu_stats_chunk_alloc - increment chunk stats
 */
static inline void pcpu_stats_chunk_alloc(void)
{
        unsigned long flags;
        spin_lock_irqsave(&pcpu_lock, flags);

        pcpu_stats.nr_chunks++;
        pcpu_stats.nr_max_chunks =
                max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks);

        spin_unlock_irqrestore(&pcpu_lock, flags);
}

/*
 * pcpu_stats_chunk_dealloc - decrement chunk stats
 */
static inline void pcpu_stats_chunk_dealloc(void)
{
        unsigned long flags;
        spin_lock_irqsave(&pcpu_lock, flags);

        pcpu_stats.nr_chunks--;

        spin_unlock_irqrestore(&pcpu_lock, flags);
}

#else

static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
{
}

static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
{
}

static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
{
}

static inline void pcpu_stats_chunk_alloc(void)
{
}

static inline void pcpu_stats_chunk_dealloc(void)
{
}

#endif /* !CONFIG_PERCPU_STATS */

#endif


















































    6 

    6 
    6 


    6 

    6 
    6 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_TRAPS_H
#define _ASM_X86_TRAPS_H

#include <linux/context_tracking_state.h>
#include <linux/kprobes.h>

#include <asm/debugreg.h>
#include <asm/idtentry.h>
#include <asm/siginfo.h>                        /* TRAP_TRACE, ... */
#include <asm/trap_pf.h>

#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif

extern int ibt_selftest(void);
extern int ibt_selftest_noendbr(void);

#ifdef CONFIG_X86_F00F_BUG
/* For handling the FOOF bug */
void handle_invalid_op(struct pt_regs *regs);
#endif

static inline int get_si_code(unsigned long condition)
{
        if (condition & DR_STEP)
                return TRAP_TRACE;
        else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))
                return TRAP_HWBKPT;
        else
                return TRAP_BRKPT;
}

extern int panic_on_unrecovered_nmi;

void math_emulate(struct math_emu_info *);

bool fault_in_kernel_space(unsigned long address);

#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(struct pt_regs *regs,
                                      unsigned long fault_address,
                                      struct stack_info *info);
#endif

static inline void cond_local_irq_enable(struct pt_regs *regs)
{
        if (regs->flags & X86_EFLAGS_IF)
                local_irq_enable();
}

static inline void cond_local_irq_disable(struct pt_regs *regs)
{
        if (regs->flags & X86_EFLAGS_IF)
                local_irq_disable();
}

#endif /* _ASM_X86_TRAPS_H */






























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_IP6_ROUTE_H
#define _NET_IP6_ROUTE_H

#include <net/addrconf.h>
#include <net/flow.h>
#include <net/ip6_fib.h>
#include <net/sock.h>
#include <net/lwtunnel.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/route.h>
#include <net/nexthop.h>

struct route_info {
        __u8                        type;
        __u8                        length;
        __u8                        prefix_len;
#if defined(__BIG_ENDIAN_BITFIELD)
        __u8                        reserved_h:3,
                                route_pref:2,
                                reserved_l:3;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
        __u8                        reserved_l:3,
                                route_pref:2,
                                reserved_h:3;
#endif
        __be32                        lifetime;
        __u8                        prefix[];        /* 0,8 or 16 */
};

#define RT6_LOOKUP_F_IFACE                0x00000001
#define RT6_LOOKUP_F_REACHABLE                0x00000002
#define RT6_LOOKUP_F_HAS_SADDR                0x00000004
#define RT6_LOOKUP_F_SRCPREF_TMP        0x00000008
#define RT6_LOOKUP_F_SRCPREF_PUBLIC        0x00000010
#define RT6_LOOKUP_F_SRCPREF_COA        0x00000020
#define RT6_LOOKUP_F_IGNORE_LINKSTATE        0x00000040
#define RT6_LOOKUP_F_DST_NOREF                0x00000080

/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
 * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
 */
#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr))

/*
 * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
 * between IPV6_ADDR_PREFERENCES socket option values
 *        IPV6_PREFER_SRC_TMP    = 0x1
 *        IPV6_PREFER_SRC_PUBLIC = 0x2
 *        IPV6_PREFER_SRC_COA    = 0x4
 * and above RT6_LOOKUP_F_SRCPREF_xxx flags.
 */
static inline int rt6_srcprefs2flags(unsigned int srcprefs)
{
        return (srcprefs & IPV6_PREFER_SRC_MASK) << 3;
}

static inline unsigned int rt6_flags2srcprefs(int flags)
{
        return (flags >> 3) & IPV6_PREFER_SRC_MASK;
}

static inline bool rt6_need_strict(const struct in6_addr *daddr)
{
        return ipv6_addr_type(daddr) &
                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

/* fib entries using a nexthop object can not be coalesced into
 * a multipath route
 */
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
        /* the RTF_ADDRCONF flag filters out RA's */
        return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
                f6i->fib6_nh->fib_nh_gw_family;
}

void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
                                         struct flowi6 *fl6,
                                         const struct sk_buff *skb, int flags);

struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                         struct flowi6 *fl6, int flags);

static inline struct dst_entry *ip6_route_output(struct net *net,
                                                 const struct sock *sk,
                                                 struct flowi6 *fl6)
{
        return ip6_route_output_flags(net, sk, fl6, 0);
}

/* Only conditionally release dst if flags indicates
 * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list.
 */
static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags)
{
        if (!(flags & RT6_LOOKUP_F_DST_NOREF) ||
            !list_empty(&rt->dst.rt_uncached))
                ip6_rt_put(rt);
}

struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb, int flags);
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                               int ifindex, struct flowi6 *fl6,
                               const struct sk_buff *skb, int flags);

void ip6_route_init_special_entries(void);
int ip6_route_init(void);
void ip6_route_cleanup(void);

int ipv6_route_ioctl(struct net *net, unsigned int cmd,
                struct in6_rtmsg *rtmsg);

int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
                  struct netlink_ext_ack *extack);
int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify);

void rt6_flush_exceptions(struct fib6_info *f6i);
void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
                        unsigned long now);

static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i,
                                      const struct in6_addr *daddr,
                                      unsigned int prefs,
                                      struct in6_addr *saddr)
{
        int err = 0;

        if (f6i && f6i->fib6_prefsrc.plen) {
                *saddr = f6i->fib6_prefsrc.addr;
        } else {
                struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL;

                err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr);
        }

        return err;
}

struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                            const struct in6_addr *saddr, int oif,
                            const struct sk_buff *skb, int flags);
u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
                       const struct sk_buff *skb, struct flow_keys *hkeys);

struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);

void fib6_force_start_gc(struct net *net);

struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
                                     const struct in6_addr *addr, bool anycast,
                                     gfp_t gfp_flags, struct netlink_ext_ack *extack);

struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
                               int flags);

/*
 *        support functions for ND
 *
 */
struct fib6_info *rt6_get_dflt_router(struct net *net,
                                     const struct in6_addr *addr,
                                     struct net_device *dev);
struct fib6_info *rt6_add_dflt_router(struct net *net,
                                     const struct in6_addr *gwaddr,
                                     struct net_device *dev, unsigned int pref,
                                     u32 defrtr_usr_metric,
                                     int lifetime);

void rt6_purge_dflt_routers(struct net *net);

int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                  const struct in6_addr *gwaddr);

void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
                     u32 mark, kuid_t uid);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
                  kuid_t uid);
void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif);
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);

struct netlink_callback;

struct rt6_rtnl_dump_arg {
        struct sk_buff *skb;
        struct netlink_callback *cb;
        struct net *net;
        struct fib_dump_filter filter;
};

int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip);
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
void rt6_sync_up(struct net_device *dev, unsigned char nh_flags);
void rt6_disable_ip(struct net_device *dev, unsigned long event);
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
void rt6_multipath_rebalance(struct fib6_info *f6i);

void rt6_uncached_list_add(struct rt6_info *rt);
void rt6_uncached_list_del(struct rt6_info *rt);

static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
{
        const struct dst_entry *dst = skb_dst(skb);
        const struct rt6_info *rt6 = NULL;

        if (dst)
                rt6 = container_of(dst, struct rt6_info, dst);

        return rt6;
}

/*
 *        Store a destination cache entry in a socket
 */
static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
                                 const struct in6_addr *daddr,
                                 const struct in6_addr *saddr)
{
        struct ipv6_pinfo *np = inet6_sk(sk);

        np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
        sk_setup_caps(sk, dst);
        np->daddr_cache = daddr;
#ifdef CONFIG_IPV6_SUBTREES
        np->saddr_cache = saddr;
#endif
}

void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
                           const struct flowi6 *fl6);

static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
{
        struct rt6_info *rt = (struct rt6_info *) skb_dst(skb);

        return rt->rt6i_flags & RTF_LOCAL;
}

static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
                                            const struct in6_addr *daddr)
{
        struct rt6_info *rt = (struct rt6_info *)dst;

        return rt->rt6i_flags & RTF_ANYCAST ||
                (rt->rt6i_dst.plen < 127 &&
                 !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
                 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
}

int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                 int (*output)(struct net *, struct sock *, struct sk_buff *));

static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
{
        const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
                                inet6_sk(skb->sk) : NULL;
        const struct dst_entry *dst = skb_dst(skb);
        unsigned int mtu;

        if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
                mtu = READ_ONCE(dst->dev->mtu);
                mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
        } else {
                mtu = dst_mtu(dst);
        }
        return mtu;
}

static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{
        u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);

        return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
               pmtudisc != IPV6_PMTUDISC_OMIT;
}

static inline bool ip6_sk_ignore_df(const struct sock *sk)
{
        u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);

        return pmtudisc < IPV6_PMTUDISC_DO ||
               pmtudisc == IPV6_PMTUDISC_OMIT;
}

static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
                                                 const struct in6_addr *daddr)
{
        if (rt->rt6i_flags & RTF_GATEWAY)
                return &rt->rt6i_gateway;
        else if (unlikely(rt->rt6i_flags & RTF_CACHE))
                return &rt->rt6i_dst.addr;
        else
                return daddr;
}

static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
        struct fib6_nh *nha, *nhb;

        if (a->nh || b->nh)
                return nexthop_cmp(a->nh, b->nh);

        nha = a->fib6_nh;
        nhb = b->fib6_nh;
        return nha->fib_nh_dev == nhb->fib_nh_dev &&
               ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
               !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
}

static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
                                                     bool forwarding)
{
        struct inet6_dev *idev;
        unsigned int mtu;

        if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
                mtu = dst_metric_raw(dst, RTAX_MTU);
                if (mtu)
                        goto out;
        }

        mtu = IPV6_MIN_MTU;
        rcu_read_lock();
        idev = __in6_dev_get(dst->dev);
        if (idev)
                mtu = READ_ONCE(idev->cnf.mtu6);
        rcu_read_unlock();

out:
        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}

u32 ip6_mtu_from_fib6(const struct fib6_result *res,
                      const struct in6_addr *daddr,
                      const struct in6_addr *saddr);

struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
                                   struct net_device *dev, struct sk_buff *skb,
                                   const void *daddr);
#endif



























































   19 

   19 
   19 
   19 

   19 






   17 


   17 
   17 


   17 




































































































































   19 

   19 
   19 










   17 

   17 

   17 


   16 



   12 
   12 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Device management routines
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/slab.h>
#include <linux/time.h>
#include <linux/export.h>
#include <linux/errno.h>
#include <sound/core.h>

/**
 * snd_device_new - create an ALSA device component
 * @card: the card instance
 * @type: the device type, SNDRV_DEV_XXX
 * @device_data: the data pointer of this device
 * @ops: the operator table
 *
 * Creates a new device component for the given data pointer.
 * The device will be assigned to the card and managed together
 * by the card.
 *
 * The data pointer plays a role as the identifier, too, so the
 * pointer address must be unique and unchanged.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_device_new(struct snd_card *card, enum snd_device_type type,
                   void *device_data, const struct snd_device_ops *ops)
{
        struct snd_device *dev;
        struct list_head *p;

        if (snd_BUG_ON(!card || !device_data || !ops))
                return -ENXIO;
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;
        INIT_LIST_HEAD(&dev->list);
        dev->card = card;
        dev->type = type;
        dev->state = SNDRV_DEV_BUILD;
        dev->device_data = device_data;
        dev->ops = ops;

        /* insert the entry in an incrementally sorted list */
        list_for_each_prev(p, &card->devices) {
                struct snd_device *pdev = list_entry(p, struct snd_device, list);
                if ((unsigned int)pdev->type <= (unsigned int)type)
                        break;
        }

        list_add(&dev->list, p);
        return 0;
}
EXPORT_SYMBOL(snd_device_new);

static void __snd_device_disconnect(struct snd_device *dev)
{
        if (dev->state == SNDRV_DEV_REGISTERED) {
                if (dev->ops->dev_disconnect &&
                    dev->ops->dev_disconnect(dev))
                        dev_err(dev->card->dev, "device disconnect failure\n");
                dev->state = SNDRV_DEV_DISCONNECTED;
        }
}

static void __snd_device_free(struct snd_device *dev)
{
        /* unlink */
        list_del(&dev->list);

        __snd_device_disconnect(dev);
        if (dev->ops->dev_free) {
                if (dev->ops->dev_free(dev))
                        dev_err(dev->card->dev, "device free failure\n");
        }
        kfree(dev);
}

static struct snd_device *look_for_dev(struct snd_card *card, void *device_data)
{
        struct snd_device *dev;

        list_for_each_entry(dev, &card->devices, list)
                if (dev->device_data == device_data)
                        return dev;

        return NULL;
}

/**
 * snd_device_disconnect - disconnect the device
 * @card: the card instance
 * @device_data: the data pointer to disconnect
 *
 * Turns the device into the disconnection state, invoking
 * dev_disconnect callback, if the device was already registered.
 *
 * Usually called from snd_card_disconnect().
 *
 * Return: Zero if successful, or a negative error code on failure or if the
 * device not found.
 */
void snd_device_disconnect(struct snd_card *card, void *device_data)
{
        struct snd_device *dev;

        if (snd_BUG_ON(!card || !device_data))
                return;
        dev = look_for_dev(card, device_data);
        if (dev)
                __snd_device_disconnect(dev);
        else
                dev_dbg(card->dev, "device disconnect %p (from %pS), not found\n",
                        device_data, __builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(snd_device_disconnect);

/**
 * snd_device_free - release the device from the card
 * @card: the card instance
 * @device_data: the data pointer to release
 *
 * Removes the device from the list on the card and invokes the
 * callbacks, dev_disconnect and dev_free, corresponding to the state.
 * Then release the device.
 */
void snd_device_free(struct snd_card *card, void *device_data)
{
        struct snd_device *dev;
        
        if (snd_BUG_ON(!card || !device_data))
                return;
        dev = look_for_dev(card, device_data);
        if (dev)
                __snd_device_free(dev);
        else
                dev_dbg(card->dev, "device free %p (from %pS), not found\n",
                        device_data, __builtin_return_address(0));
}
EXPORT_SYMBOL(snd_device_free);

static int __snd_device_register(struct snd_device *dev)
{
        if (dev->state == SNDRV_DEV_BUILD) {
                if (dev->ops->dev_register) {
                        int err = dev->ops->dev_register(dev);
                        if (err < 0)
                                return err;
                }
                dev->state = SNDRV_DEV_REGISTERED;
        }
        return 0;
}

/**
 * snd_device_register - register the device
 * @card: the card instance
 * @device_data: the data pointer to register
 *
 * Registers the device which was already created via
 * snd_device_new().  Usually this is called from snd_card_register(),
 * but it can be called later if any new devices are created after
 * invocation of snd_card_register().
 *
 * Return: Zero if successful, or a negative error code on failure or if the
 * device not found.
 */
int snd_device_register(struct snd_card *card, void *device_data)
{
        struct snd_device *dev;

        if (snd_BUG_ON(!card || !device_data))
                return -ENXIO;
        dev = look_for_dev(card, device_data);
        if (dev)
                return __snd_device_register(dev);
        snd_BUG();
        return -ENXIO;
}
EXPORT_SYMBOL(snd_device_register);

/*
 * register all the devices on the card.
 * called from init.c
 */
int snd_device_register_all(struct snd_card *card)
{
        struct snd_device *dev;
        int err;
        
        if (snd_BUG_ON(!card))
                return -ENXIO;
        list_for_each_entry(dev, &card->devices, list) {
                err = __snd_device_register(dev);
                if (err < 0)
                        return err;
        }
        return 0;
}

/*
 * disconnect all the devices on the card.
 * called from init.c
 */
void snd_device_disconnect_all(struct snd_card *card)
{
        struct snd_device *dev;

        if (snd_BUG_ON(!card))
                return;
        list_for_each_entry_reverse(dev, &card->devices, list)
                __snd_device_disconnect(dev);
}

/*
 * release all the devices on the card.
 * called from init.c
 */
void snd_device_free_all(struct snd_card *card)
{
        struct snd_device *dev, *next;

        if (snd_BUG_ON(!card))
                return;
        list_for_each_entry_safe_reverse(dev, next, &card->devices, list) {
                /* exception: free ctl and lowlevel stuff later */
                if (dev->type == SNDRV_DEV_CONTROL ||
                    dev->type == SNDRV_DEV_LOWLEVEL)
                        continue;
                __snd_device_free(dev);
        }

        /* free all */
        list_for_each_entry_safe_reverse(dev, next, &card->devices, list)
                __snd_device_free(dev);
}

/**
 * snd_device_get_state - Get the current state of the given device
 * @card: the card instance
 * @device_data: the data pointer to release
 *
 * Returns the current state of the given device object.  For the valid
 * device, either @SNDRV_DEV_BUILD, @SNDRV_DEV_REGISTERED or
 * @SNDRV_DEV_DISCONNECTED is returned.
 * Or for a non-existing device, -1 is returned as an error.
 *
 * Return: the current state, or -1 if not found
 */
int snd_device_get_state(struct snd_card *card, void *device_data)
{
        struct snd_device *dev;

        dev = look_for_dev(card, device_data);
        if (dev)
                return dev->state;
        return -1;
}
EXPORT_SYMBOL_GPL(snd_device_get_state);















































































































































































































































































































































































  239 










  239 












































  239 


















































  239 

























































































































































































































  237 
  237 
  236 

  237 












































































































































































































































































































































































































































































































































































  236 




















    4 






    4 



    4 

























  239 







  239 







  239 








  239 








  238 
    4 

  239 

  237 







  239 
  233 















  233 

  239 



  238 





  237 


  238 















  239 




















































































































































































































































































































































































































































































































































  232 





  233 








































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  fs/eventpoll.c (Efficient event retrieval implementation)
 *  Copyright (C) 2001,...,2009         Davide Libenzi
 *
 *  Davide Libenzi <davidel@xmailserver.org>
 */

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/spinlock.h>
#include <linux/syscalls.h>
#include <linux/rbtree.h>
#include <linux/wait.h>
#include <linux/eventpoll.h>
#include <linux/mount.h>
#include <linux/bitops.h>
#include <linux/mutex.h>
#include <linux/anon_inodes.h>
#include <linux/device.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/mman.h>
#include <linux/atomic.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
#include <linux/rculist.h>
#include <linux/capability.h>
#include <net/busy_poll.h>

/*
 * LOCKING:
 * There are three level of locking required by epoll :
 *
 * 1) epnested_mutex (mutex)
 * 2) ep->mtx (mutex)
 * 3) ep->lock (rwlock)
 *
 * The acquire order is the one listed above, from 1 to 3.
 * We need a rwlock (ep->lock) because we manipulate objects
 * from inside the poll callback, that might be triggered from
 * a wake_up() that in turn might be called from IRQ context.
 * So we can't sleep inside the poll callback and hence we need
 * a spinlock. During the event transfer loop (from kernel to
 * user space) we could end up sleeping due a copy_to_user(), so
 * we need a lock that will allow us to sleep. This lock is a
 * mutex (ep->mtx). It is acquired during the event transfer loop,
 * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file().
 * The epnested_mutex is acquired when inserting an epoll fd onto another
 * epoll fd. We do this so that we walk the epoll tree and ensure that this
 * insertion does not create a cycle of epoll file descriptors, which
 * could lead to deadlock. We need a global mutex to prevent two
 * simultaneous inserts (A into B and B into A) from racing and
 * constructing a cycle without either insert observing that it is
 * going to.
 * It is necessary to acquire multiple "ep->mtx"es at once in the
 * case when one epoll fd is added to another. In this case, we
 * always acquire the locks in the order of nesting (i.e. after
 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
 * before e2->mtx). Since we disallow cycles of epoll file
 * descriptors, this ensures that the mutexes are well-ordered. In
 * order to communicate this nesting to lockdep, when walking a tree
 * of epoll file descriptors, we use the current recursion depth as
 * the lockdep subkey.
 * It is possible to drop the "ep->mtx" and to use the global
 * mutex "epnested_mutex" (together with "ep->lock") to have it working,
 * but having "ep->mtx" will make the interface more scalable.
 * Events that require holding "epnested_mutex" are very rare, while for
 * normal operations the epoll private "ep->mtx" will guarantee
 * a better scalability.
 */

/* Epoll private bits inside the event mask */
#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)

#define EPOLLINOUT_BITS (EPOLLIN | EPOLLOUT)

#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | EPOLLERR | EPOLLHUP | \
                                EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)

/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4

#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))

#define EP_UNACTIVE_PTR ((void *) -1L)

#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))

struct epoll_filefd {
        struct file *file;
        int fd;
} __packed;

/* Wait structure used by the poll hooks */
struct eppoll_entry {
        /* List header used to link this structure to the "struct epitem" */
        struct eppoll_entry *next;

        /* The "base" pointer is set to the container "struct epitem" */
        struct epitem *base;

        /*
         * Wait queue item that will be linked to the target file wait
         * queue head.
         */
        wait_queue_entry_t wait;

        /* The wait queue head that linked the "wait" wait queue item */
        wait_queue_head_t *whead;
};

/*
 * Each file descriptor added to the eventpoll interface will
 * have an entry of this type linked to the "rbr" RB tree.
 * Avoid increasing the size of this struct, there can be many thousands
 * of these on a server and we do not want this to take another cache line.
 */
struct epitem {
        union {
                /* RB tree node links this structure to the eventpoll RB tree */
                struct rb_node rbn;
                /* Used to free the struct epitem */
                struct rcu_head rcu;
        };

        /* List header used to link this structure to the eventpoll ready list */
        struct list_head rdllink;

        /*
         * Works together "struct eventpoll"->ovflist in keeping the
         * single linked chain of items.
         */
        struct epitem *next;

        /* The file descriptor information this item refers to */
        struct epoll_filefd ffd;

        /*
         * Protected by file->f_lock, true for to-be-released epitem already
         * removed from the "struct file" items list; together with
         * eventpoll->refcount orchestrates "struct eventpoll" disposal
         */
        bool dying;

        /* List containing poll wait queues */
        struct eppoll_entry *pwqlist;

        /* The "container" of this item */
        struct eventpoll *ep;

        /* List header used to link this item to the "struct file" items list */
        struct hlist_node fllink;

        /* wakeup_source used when EPOLLWAKEUP is set */
        struct wakeup_source __rcu *ws;

        /* The structure that describe the interested events and the source fd */
        struct epoll_event event;
};

/*
 * This structure is stored inside the "private_data" member of the file
 * structure and represents the main data structure for the eventpoll
 * interface.
 */
struct eventpoll {
        /*
         * This mutex is used to ensure that files are not removed
         * while epoll is using them. This is held during the event
         * collection loop, the file cleanup path, the epoll file exit
         * code and the ctl operations.
         */
        struct mutex mtx;

        /* Wait queue used by sys_epoll_wait() */
        wait_queue_head_t wq;

        /* Wait queue used by file->poll() */
        wait_queue_head_t poll_wait;

        /* List of ready file descriptors */
        struct list_head rdllist;

        /* Lock which protects rdllist and ovflist */
        rwlock_t lock;

        /* RB tree root used to store monitored fd structs */
        struct rb_root_cached rbr;

        /*
         * This is a single linked list that chains all the "struct epitem" that
         * happened while transferring ready events to userspace w/out
         * holding ->lock.
         */
        struct epitem *ovflist;

        /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */
        struct wakeup_source *ws;

        /* The user that created the eventpoll descriptor */
        struct user_struct *user;

        struct file *file;

        /* used to optimize loop detection check */
        u64 gen;
        struct hlist_head refs;

        /*
         * usage count, used together with epitem->dying to
         * orchestrate the disposal of this struct
         */
        refcount_t refcount;

#ifdef CONFIG_NET_RX_BUSY_POLL
        /* used to track busy poll napi_id */
        unsigned int napi_id;
        /* busy poll timeout */
        u32 busy_poll_usecs;
        /* busy poll packet budget */
        u16 busy_poll_budget;
        bool prefer_busy_poll;
#endif

#ifdef CONFIG_DEBUG_LOCK_ALLOC
        /* tracks wakeup nests for lockdep validation */
        u8 nests;
#endif
};

/* Wrapper struct used by poll queueing */
struct ep_pqueue {
        poll_table pt;
        struct epitem *epi;
};

/*
 * Configuration options available inside /proc/sys/fs/epoll/
 */
/* Maximum number of epoll watched descriptors, per user */
static long max_user_watches __read_mostly;

/* Used for cycles detection */
static DEFINE_MUTEX(epnested_mutex);

static u64 loop_check_gen = 0;

/* Used to check for epoll file descriptor inclusion loops */
static struct eventpoll *inserting_into;

/* Slab cache used to allocate "struct epitem" */
static struct kmem_cache *epi_cache __ro_after_init;

/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __ro_after_init;

/*
 * List of files with newly added links, where we may need to limit the number
 * of emanating paths. Protected by the epnested_mutex.
 */
struct epitems_head {
        struct hlist_head epitems;
        struct epitems_head *next;
};
static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR;

static struct kmem_cache *ephead_cache __ro_after_init;

static inline void free_ephead(struct epitems_head *head)
{
        if (head)
                kmem_cache_free(ephead_cache, head);
}

static void list_file(struct file *file)
{
        struct epitems_head *head;

        head = container_of(file->f_ep, struct epitems_head, epitems);
        if (!head->next) {
                head->next = tfile_check_list;
                tfile_check_list = head;
        }
}

static void unlist_file(struct epitems_head *head)
{
        struct epitems_head *to_free = head;
        struct hlist_node *p = rcu_dereference(hlist_first_rcu(&head->epitems));
        if (p) {
                struct epitem *epi= container_of(p, struct epitem, fllink);
                spin_lock(&epi->ffd.file->f_lock);
                if (!hlist_empty(&head->epitems))
                        to_free = NULL;
                head->next = NULL;
                spin_unlock(&epi->ffd.file->f_lock);
        }
        free_ephead(to_free);
}

#ifdef CONFIG_SYSCTL

#include <linux/sysctl.h>

static long long_zero;
static long long_max = LONG_MAX;

static struct ctl_table epoll_table[] = {
        {
                .procname        = "max_user_watches",
                .data                = &max_user_watches,
                .maxlen                = sizeof(max_user_watches),
                .mode                = 0644,
                .proc_handler        = proc_doulongvec_minmax,
                .extra1                = &long_zero,
                .extra2                = &long_max,
        },
};

static void __init epoll_sysctls_init(void)
{
        register_sysctl("fs/epoll", epoll_table);
}
#else
#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */

static const struct file_operations eventpoll_fops;

static inline int is_file_epoll(struct file *f)
{
        return f->f_op == &eventpoll_fops;
}

/* Setup the structure that is used as key for the RB tree */
static inline void ep_set_ffd(struct epoll_filefd *ffd,
                              struct file *file, int fd)
{
        ffd->file = file;
        ffd->fd = fd;
}

/* Compare RB tree keys */
static inline int ep_cmp_ffd(struct epoll_filefd *p1,
                             struct epoll_filefd *p2)
{
        return (p1->file > p2->file ? +1:
                (p1->file < p2->file ? -1 : p1->fd - p2->fd));
}

/* Tells us if the item is currently linked */
static inline int ep_is_linked(struct epitem *epi)
{
        return !list_empty(&epi->rdllink);
}

static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
{
        return container_of(p, struct eppoll_entry, wait);
}

/* Get the "struct epitem" from a wait queue pointer */
static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
{
        return container_of(p, struct eppoll_entry, wait)->base;
}

/**
 * ep_events_available - Checks if ready events might be available.
 *
 * @ep: Pointer to the eventpoll context.
 *
 * Return: a value different than %zero if ready events are available,
 *          or %zero otherwise.
 */
static inline int ep_events_available(struct eventpoll *ep)
{
        return !list_empty_careful(&ep->rdllist) ||
                READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
}

#ifdef CONFIG_NET_RX_BUSY_POLL
/**
 * busy_loop_ep_timeout - check if busy poll has timed out. The timeout value
 * from the epoll instance ep is preferred, but if it is not set fallback to
 * the system-wide global via busy_loop_timeout.
 *
 * @start_time: The start time used to compute the remaining time until timeout.
 * @ep: Pointer to the eventpoll context.
 *
 * Return: true if the timeout has expired, false otherwise.
 */
static bool busy_loop_ep_timeout(unsigned long start_time,
                                 struct eventpoll *ep)
{
        unsigned long bp_usec = READ_ONCE(ep->busy_poll_usecs);

        if (bp_usec) {
                unsigned long end_time = start_time + bp_usec;
                unsigned long now = busy_loop_current_time();

                return time_after(now, end_time);
        } else {
                return busy_loop_timeout(start_time);
        }
}

static bool ep_busy_loop_on(struct eventpoll *ep)
{
        return !!ep->busy_poll_usecs || net_busy_loop_on();
}

static bool ep_busy_loop_end(void *p, unsigned long start_time)
{
        struct eventpoll *ep = p;

        return ep_events_available(ep) || busy_loop_ep_timeout(start_time, ep);
}

/*
 * Busy poll if globally on and supporting sockets found && no events,
 * busy loop will return if need_resched or ep_events_available.
 *
 * we must do our busy polling with irqs enabled
 */
static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
{
        unsigned int napi_id = READ_ONCE(ep->napi_id);
        u16 budget = READ_ONCE(ep->busy_poll_budget);
        bool prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll);

        if (!budget)
                budget = BUSY_POLL_BUDGET;

        if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) {
                napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end,
                               ep, prefer_busy_poll, budget);
                if (ep_events_available(ep))
                        return true;
                /*
                 * Busy poll timed out.  Drop NAPI ID for now, we can add
                 * it back in when we have moved a socket with a valid NAPI
                 * ID onto the ready list.
                 */
                ep->napi_id = 0;
                return false;
        }
        return false;
}

/*
 * Set epoll busy poll NAPI ID from sk.
 */
static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
        struct eventpoll *ep = epi->ep;
        unsigned int napi_id;
        struct socket *sock;
        struct sock *sk;

        if (!ep_busy_loop_on(ep))
                return;

        sock = sock_from_file(epi->ffd.file);
        if (!sock)
                return;

        sk = sock->sk;
        if (!sk)
                return;

        napi_id = READ_ONCE(sk->sk_napi_id);

        /* Non-NAPI IDs can be rejected
         *        or
         * Nothing to do if we already have this ID
         */
        if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id)
                return;

        /* record NAPI ID for use in next busy poll */
        ep->napi_id = napi_id;
}

static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
                                  unsigned long arg)
{
        struct eventpoll *ep = file->private_data;
        void __user *uarg = (void __user *)arg;
        struct epoll_params epoll_params;

        switch (cmd) {
        case EPIOCSPARAMS:
                if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
                        return -EFAULT;

                /* pad byte must be zero */
                if (epoll_params.__pad)
                        return -EINVAL;

                if (epoll_params.busy_poll_usecs > S32_MAX)
                        return -EINVAL;

                if (epoll_params.prefer_busy_poll > 1)
                        return -EINVAL;

                if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
                    !capable(CAP_NET_ADMIN))
                        return -EPERM;

                WRITE_ONCE(ep->busy_poll_usecs, epoll_params.busy_poll_usecs);
                WRITE_ONCE(ep->busy_poll_budget, epoll_params.busy_poll_budget);
                WRITE_ONCE(ep->prefer_busy_poll, epoll_params.prefer_busy_poll);
                return 0;
        case EPIOCGPARAMS:
                memset(&epoll_params, 0, sizeof(epoll_params));
                epoll_params.busy_poll_usecs = READ_ONCE(ep->busy_poll_usecs);
                epoll_params.busy_poll_budget = READ_ONCE(ep->busy_poll_budget);
                epoll_params.prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll);
                if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
                        return -EFAULT;
                return 0;
        default:
                return -ENOIOCTLCMD;
        }
}

#else

static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
{
        return false;
}

static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
}

static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
                                  unsigned long arg)
{
        return -EOPNOTSUPP;
}

#endif /* CONFIG_NET_RX_BUSY_POLL */

/*
 * As described in commit 0ccf831cb lockdep: annotate epoll
 * the use of wait queues used by epoll is done in a very controlled
 * manner. Wake ups can nest inside each other, but are never done
 * with the same locking. For example:
 *
 *   dfd = socket(...);
 *   efd1 = epoll_create();
 *   efd2 = epoll_create();
 *   epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
 *   epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
 *
 * When a packet arrives to the device underneath "dfd", the net code will
 * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
 * callback wakeup entry on that queue, and the wake_up() performed by the
 * "dfd" net code will end up in ep_poll_callback(). At this point epoll
 * (efd1) notices that it may have some event ready, so it needs to wake up
 * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
 * that ends up in another wake_up(), after having checked about the
 * recursion constraints. That are, no more than EP_MAX_NESTS, to avoid
 * stack blasting.
 *
 * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle
 * this special case of epoll.
 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC

static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
                             unsigned pollflags)
{
        struct eventpoll *ep_src;
        unsigned long flags;
        u8 nests = 0;

        /*
         * To set the subclass or nesting level for spin_lock_irqsave_nested()
         * it might be natural to create a per-cpu nest count. However, since
         * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can
         * schedule() in the -rt kernel, the per-cpu variable are no longer
         * protected. Thus, we are introducing a per eventpoll nest field.
         * If we are not being call from ep_poll_callback(), epi is NULL and
         * we are at the first level of nesting, 0. Otherwise, we are being
         * called from ep_poll_callback() and if a previous wakeup source is
         * not an epoll file itself, we are at depth 1 since the wakeup source
         * is depth 0. If the wakeup source is a previous epoll file in the
         * wakeup chain then we use its nests value and record ours as
         * nests + 1. The previous epoll file nests value is stable since its
         * already holding its own poll_wait.lock.
         */
        if (epi) {
                if ((is_file_epoll(epi->ffd.file))) {
                        ep_src = epi->ffd.file->private_data;
                        nests = ep_src->nests;
                } else {
                        nests = 1;
                }
        }
        spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
        ep->nests = nests + 1;
        wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
        ep->nests = 0;
        spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
}

#else

static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
                             __poll_t pollflags)
{
        wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
}

#endif

static void ep_remove_wait_queue(struct eppoll_entry *pwq)
{
        wait_queue_head_t *whead;

        rcu_read_lock();
        /*
         * If it is cleared by POLLFREE, it should be rcu-safe.
         * If we read NULL we need a barrier paired with
         * smp_store_release() in ep_poll_callback(), otherwise
         * we rely on whead->lock.
         */
        whead = smp_load_acquire(&pwq->whead);
        if (whead)
                remove_wait_queue(whead, &pwq->wait);
        rcu_read_unlock();
}

/*
 * This function unregisters poll callbacks from the associated file
 * descriptor.  Must be called with "mtx" held.
 */
static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
{
        struct eppoll_entry **p = &epi->pwqlist;
        struct eppoll_entry *pwq;

        while ((pwq = *p) != NULL) {
                *p = pwq->next;
                ep_remove_wait_queue(pwq);
                kmem_cache_free(pwq_cache, pwq);
        }
}

/* call only when ep->mtx is held */
static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
{
        return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
}

/* call only when ep->mtx is held */
static inline void ep_pm_stay_awake(struct epitem *epi)
{
        struct wakeup_source *ws = ep_wakeup_source(epi);

        if (ws)
                __pm_stay_awake(ws);
}

static inline bool ep_has_wakeup_source(struct epitem *epi)
{
        return rcu_access_pointer(epi->ws) ? true : false;
}

/* call when ep->mtx cannot be held (ep_poll_callback) */
static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
{
        struct wakeup_source *ws;

        rcu_read_lock();
        ws = rcu_dereference(epi->ws);
        if (ws)
                __pm_stay_awake(ws);
        rcu_read_unlock();
}


/*
 * ep->mutex needs to be held because we could be hit by
 * eventpoll_release_file() and epoll_ctl().
 */
static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
{
        /*
         * Steal the ready list, and re-init the original one to the
         * empty list. Also, set ep->ovflist to NULL so that events
         * happening while looping w/out locks, are not lost. We cannot
         * have the poll callback to queue directly on ep->rdllist,
         * because we want the "sproc" callback to be able to do it
         * in a lockless way.
         */
        lockdep_assert_irqs_enabled();
        write_lock_irq(&ep->lock);
        list_splice_init(&ep->rdllist, txlist);
        WRITE_ONCE(ep->ovflist, NULL);
        write_unlock_irq(&ep->lock);
}

static void ep_done_scan(struct eventpoll *ep,
                         struct list_head *txlist)
{
        struct epitem *epi, *nepi;

        write_lock_irq(&ep->lock);
        /*
         * During the time we spent inside the "sproc" callback, some
         * other events might have been queued by the poll callback.
         * We re-insert them inside the main ready-list here.
         */
        for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
                /*
                 * We need to check if the item is already in the list.
                 * During the "sproc" callback execution time, items are
                 * queued into ->ovflist but the "txlist" might already
                 * contain them, and the list_splice() below takes care of them.
                 */
                if (!ep_is_linked(epi)) {
                        /*
                         * ->ovflist is LIFO, so we have to reverse it in order
                         * to keep in FIFO.
                         */
                        list_add(&epi->rdllink, &ep->rdllist);
                        ep_pm_stay_awake(epi);
                }
        }
        /*
         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
         * releasing the lock, events will be queued in the normal way inside
         * ep->rdllist.
         */
        WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);

        /*
         * Quickly re-inject items left on "txlist".
         */
        list_splice(txlist, &ep->rdllist);
        __pm_relax(ep->ws);

        if (!list_empty(&ep->rdllist)) {
                if (waitqueue_active(&ep->wq))
                        wake_up(&ep->wq);
        }

        write_unlock_irq(&ep->lock);
}

static void ep_get(struct eventpoll *ep)
{
        refcount_inc(&ep->refcount);
}

/*
 * Returns true if the event poll can be disposed
 */
static bool ep_refcount_dec_and_test(struct eventpoll *ep)
{
        if (!refcount_dec_and_test(&ep->refcount))
                return false;

        WARN_ON_ONCE(!RB_EMPTY_ROOT(&ep->rbr.rb_root));
        return true;
}

static void ep_free(struct eventpoll *ep)
{
        mutex_destroy(&ep->mtx);
        free_uid(ep->user);
        wakeup_source_unregister(ep->ws);
        kfree(ep);
}

/*
 * Removes a "struct epitem" from the eventpoll RB tree and deallocates
 * all the associated resources. Must be called with "mtx" held.
 * If the dying flag is set, do the removal only if force is true.
 * This prevents ep_clear_and_put() from dropping all the ep references
 * while running concurrently with eventpoll_release_file().
 * Returns true if the eventpoll can be disposed.
 */
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
{
        struct file *file = epi->ffd.file;
        struct epitems_head *to_free;
        struct hlist_head *head;

        lockdep_assert_irqs_enabled();

        /*
         * Removes poll wait queue hooks.
         */
        ep_unregister_pollwait(ep, epi);

        /* Remove the current item from the list of epoll hooks */
        spin_lock(&file->f_lock);
        if (epi->dying && !force) {
                spin_unlock(&file->f_lock);
                return false;
        }

        to_free = NULL;
        head = file->f_ep;
        if (head->first == &epi->fllink && !epi->fllink.next) {
                file->f_ep = NULL;
                if (!is_file_epoll(file)) {
                        struct epitems_head *v;
                        v = container_of(head, struct epitems_head, epitems);
                        if (!smp_load_acquire(&v->next))
                                to_free = v;
                }
        }
        hlist_del_rcu(&epi->fllink);
        spin_unlock(&file->f_lock);
        free_ephead(to_free);

        rb_erase_cached(&epi->rbn, &ep->rbr);

        write_lock_irq(&ep->lock);
        if (ep_is_linked(epi))
                list_del_init(&epi->rdllink);
        write_unlock_irq(&ep->lock);

        wakeup_source_unregister(ep_wakeup_source(epi));
        /*
         * At this point it is safe to free the eventpoll item. Use the union
         * field epi->rcu, since we are trying to minimize the size of
         * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
         * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
         * use of the rbn field.
         */
        kfree_rcu(epi, rcu);

        percpu_counter_dec(&ep->user->epoll_watches);
        return ep_refcount_dec_and_test(ep);
}

/*
 * ep_remove variant for callers owing an additional reference to the ep
 */
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
{
        WARN_ON_ONCE(__ep_remove(ep, epi, false));
}

static void ep_clear_and_put(struct eventpoll *ep)
{
        struct rb_node *rbp, *next;
        struct epitem *epi;
        bool dispose;

        /* We need to release all tasks waiting for these file */
        if (waitqueue_active(&ep->poll_wait))
                ep_poll_safewake(ep, NULL, 0);

        mutex_lock(&ep->mtx);

        /*
         * Walks through the whole tree by unregistering poll callbacks.
         */
        for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                epi = rb_entry(rbp, struct epitem, rbn);

                ep_unregister_pollwait(ep, epi);
                cond_resched();
        }

        /*
         * Walks through the whole tree and try to free each "struct epitem".
         * Note that ep_remove_safe() will not remove the epitem in case of a
         * racing eventpoll_release_file(); the latter will do the removal.
         * At this point we are sure no poll callbacks will be lingering around.
         * Since we still own a reference to the eventpoll struct, the loop can't
         * dispose it.
         */
        for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) {
                next = rb_next(rbp);
                epi = rb_entry(rbp, struct epitem, rbn);
                ep_remove_safe(ep, epi);
                cond_resched();
        }

        dispose = ep_refcount_dec_and_test(ep);
        mutex_unlock(&ep->mtx);

        if (dispose)
                ep_free(ep);
}

static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd,
                               unsigned long arg)
{
        int ret;

        if (!is_file_epoll(file))
                return -EINVAL;

        switch (cmd) {
        case EPIOCSPARAMS:
        case EPIOCGPARAMS:
                ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
                break;
        default:
                ret = -EINVAL;
                break;
        }

        return ret;
}

static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
        struct eventpoll *ep = file->private_data;

        if (ep)
                ep_clear_and_put(ep);

        return 0;
}

static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth);

static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth)
{
        struct eventpoll *ep = file->private_data;
        LIST_HEAD(txlist);
        struct epitem *epi, *tmp;
        poll_table pt;
        __poll_t res = 0;

        init_poll_funcptr(&pt, NULL);

        /* Insert inside our poll wait queue */
        poll_wait(file, &ep->poll_wait, wait);

        /*
         * Proceed to find out if wanted events are really available inside
         * the ready list.
         */
        mutex_lock_nested(&ep->mtx, depth);
        ep_start_scan(ep, &txlist);
        list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
                if (ep_item_poll(epi, &pt, depth + 1)) {
                        res = EPOLLIN | EPOLLRDNORM;
                        break;
                } else {
                        /*
                         * Item has been dropped into the ready list by the poll
                         * callback, but it's not actually ready, as far as
                         * caller requested events goes. We can remove it here.
                         */
                        __pm_relax(ep_wakeup_source(epi));
                        list_del_init(&epi->rdllink);
                }
        }
        ep_done_scan(ep, &txlist);
        mutex_unlock(&ep->mtx);
        return res;
}

/*
 * The ffd.file pointer may be in the process of being torn down due to
 * being closed, but we may not have finished eventpoll_release() yet.
 *
 * Normally, even with the atomic_long_inc_not_zero, the file may have
 * been free'd and then gotten re-allocated to something else (since
 * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
 *
 * But for epoll, users hold the ep->mtx mutex, and as such any file in
 * the process of being free'd will block in eventpoll_release_file()
 * and thus the underlying file allocation will not be free'd, and the
 * file re-use cannot happen.
 *
 * For the same reason we can avoid a rcu_read_lock() around the
 * operation - 'ffd.file' cannot go away even if the refcount has
 * reached zero (but we must still not call out to ->poll() functions
 * etc).
 */
static struct file *epi_fget(const struct epitem *epi)
{
        struct file *file;

        file = epi->ffd.file;
        if (!atomic_long_inc_not_zero(&file->f_count))
                file = NULL;
        return file;
}

/*
 * Differs from ep_eventpoll_poll() in that internal callers already have
 * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
 * is correctly annotated.
 */
static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
                                 int depth)
{
        struct file *file = epi_fget(epi);
        __poll_t res;

        /*
         * We could return EPOLLERR | EPOLLHUP or something, but let's
         * treat this more as "file doesn't exist, poll didn't happen".
         */
        if (!file)
                return 0;

        pt->_key = epi->event.events;
        if (!is_file_epoll(file))
                res = vfs_poll(file, pt);
        else
                res = __ep_eventpoll_poll(file, pt, depth);
        fput(file);
        return res & epi->event.events;
}

static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait)
{
        return __ep_eventpoll_poll(file, wait, 0);
}

#ifdef CONFIG_PROC_FS
static void ep_show_fdinfo(struct seq_file *m, struct file *f)
{
        struct eventpoll *ep = f->private_data;
        struct rb_node *rbp;

        mutex_lock(&ep->mtx);
        for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
                struct inode *inode = file_inode(epi->ffd.file);

                seq_printf(m, "tfd: %8d events: %8x data: %16llx "
                           " pos:%lli ino:%lx sdev:%x\n",
                           epi->ffd.fd, epi->event.events,
                           (long long)epi->event.data,
                           (long long)epi->ffd.file->f_pos,
                           inode->i_ino, inode->i_sb->s_dev);
                if (seq_has_overflowed(m))
                        break;
        }
        mutex_unlock(&ep->mtx);
}
#endif

/* File callbacks that implement the eventpoll file behaviour */
static const struct file_operations eventpoll_fops = {
#ifdef CONFIG_PROC_FS
        .show_fdinfo        = ep_show_fdinfo,
#endif
        .release        = ep_eventpoll_release,
        .poll                = ep_eventpoll_poll,
        .llseek                = noop_llseek,
        .unlocked_ioctl        = ep_eventpoll_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
};

/*
 * This is called from eventpoll_release() to unlink files from the eventpoll
 * interface. We need to have this facility to cleanup correctly files that are
 * closed without being removed from the eventpoll interface.
 */
void eventpoll_release_file(struct file *file)
{
        struct eventpoll *ep;
        struct epitem *epi;
        bool dispose;

        /*
         * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from
         * touching the epitems list before eventpoll_release_file() can access
         * the ep->mtx.
         */
again:
        spin_lock(&file->f_lock);
        if (file->f_ep && file->f_ep->first) {
                epi = hlist_entry(file->f_ep->first, struct epitem, fllink);
                epi->dying = true;
                spin_unlock(&file->f_lock);

                /*
                 * ep access is safe as we still own a reference to the ep
                 * struct
                 */
                ep = epi->ep;
                mutex_lock(&ep->mtx);
                dispose = __ep_remove(ep, epi, true);
                mutex_unlock(&ep->mtx);

                if (dispose)
                        ep_free(ep);
                goto again;
        }
        spin_unlock(&file->f_lock);
}

static int ep_alloc(struct eventpoll **pep)
{
        struct eventpoll *ep;

        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
        if (unlikely(!ep))
                return -ENOMEM;

        mutex_init(&ep->mtx);
        rwlock_init(&ep->lock);
        init_waitqueue_head(&ep->wq);
        init_waitqueue_head(&ep->poll_wait);
        INIT_LIST_HEAD(&ep->rdllist);
        ep->rbr = RB_ROOT_CACHED;
        ep->ovflist = EP_UNACTIVE_PTR;
        ep->user = get_current_user();
        refcount_set(&ep->refcount, 1);

        *pep = ep;

        return 0;
}

/*
 * Search the file inside the eventpoll tree. The RB tree operations
 * are protected by the "mtx" mutex, and ep_find() must be called with
 * "mtx" held.
 */
static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
{
        int kcmp;
        struct rb_node *rbp;
        struct epitem *epi, *epir = NULL;
        struct epoll_filefd ffd;

        ep_set_ffd(&ffd, file, fd);
        for (rbp = ep->rbr.rb_root.rb_node; rbp; ) {
                epi = rb_entry(rbp, struct epitem, rbn);
                kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
                if (kcmp > 0)
                        rbp = rbp->rb_right;
                else if (kcmp < 0)
                        rbp = rbp->rb_left;
                else {
                        epir = epi;
                        break;
                }
        }

        return epir;
}

#ifdef CONFIG_KCMP
static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
{
        struct rb_node *rbp;
        struct epitem *epi;

        for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                epi = rb_entry(rbp, struct epitem, rbn);
                if (epi->ffd.fd == tfd) {
                        if (toff == 0)
                                return epi;
                        else
                                toff--;
                }
                cond_resched();
        }

        return NULL;
}

struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
                                     unsigned long toff)
{
        struct file *file_raw;
        struct eventpoll *ep;
        struct epitem *epi;

        if (!is_file_epoll(file))
                return ERR_PTR(-EINVAL);

        ep = file->private_data;

        mutex_lock(&ep->mtx);
        epi = ep_find_tfd(ep, tfd, toff);
        if (epi)
                file_raw = epi->ffd.file;
        else
                file_raw = ERR_PTR(-ENOENT);
        mutex_unlock(&ep->mtx);

        return file_raw;
}
#endif /* CONFIG_KCMP */

/*
 * Adds a new entry to the tail of the list in a lockless way, i.e.
 * multiple CPUs are allowed to call this function concurrently.
 *
 * Beware: it is necessary to prevent any other modifications of the
 *         existing list until all changes are completed, in other words
 *         concurrent list_add_tail_lockless() calls should be protected
 *         with a read lock, where write lock acts as a barrier which
 *         makes sure all list_add_tail_lockless() calls are fully
 *         completed.
 *
 *        Also an element can be locklessly added to the list only in one
 *        direction i.e. either to the tail or to the head, otherwise
 *        concurrent access will corrupt the list.
 *
 * Return: %false if element has been already added to the list, %true
 * otherwise.
 */
static inline bool list_add_tail_lockless(struct list_head *new,
                                          struct list_head *head)
{
        struct list_head *prev;

        /*
         * This is simple 'new->next = head' operation, but cmpxchg()
         * is used in order to detect that same element has been just
         * added to the list from another CPU: the winner observes
         * new->next == new.
         */
        if (!try_cmpxchg(&new->next, &new, head))
                return false;

        /*
         * Initially ->next of a new element must be updated with the head
         * (we are inserting to the tail) and only then pointers are atomically
         * exchanged.  XCHG guarantees memory ordering, thus ->next should be
         * updated before pointers are actually swapped and pointers are
         * swapped before prev->next is updated.
         */

        prev = xchg(&head->prev, new);

        /*
         * It is safe to modify prev->next and new->prev, because a new element
         * is added only to the tail and new->next is updated before XCHG.
         */

        prev->next = new;
        new->prev = prev;

        return true;
}

/*
 * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
 * i.e. multiple CPUs are allowed to call this function concurrently.
 *
 * Return: %false if epi element has been already chained, %true otherwise.
 */
static inline bool chain_epi_lockless(struct epitem *epi)
{
        struct eventpoll *ep = epi->ep;

        /* Fast preliminary check */
        if (epi->next != EP_UNACTIVE_PTR)
                return false;

        /* Check that the same epi has not been just chained from another CPU */
        if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
                return false;

        /* Atomically exchange tail */
        epi->next = xchg(&ep->ovflist, epi);

        return true;
}

/*
 * This is the callback that is passed to the wait queue wakeup
 * mechanism. It is called by the stored file descriptors when they
 * have events to report.
 *
 * This callback takes a read lock in order not to contend with concurrent
 * events from another file descriptor, thus all modifications to ->rdllist
 * or ->ovflist are lockless.  Read lock is paired with the write lock from
 * ep_start/done_scan(), which stops all list modifications and guarantees
 * that lists state is seen correctly.
 *
 * Another thing worth to mention is that ep_poll_callback() can be called
 * concurrently for the same @epi from different CPUs if poll table was inited
 * with several wait queues entries.  Plural wakeup from different CPUs of a
 * single wait queue is serialized by wq.lock, but the case when multiple wait
 * queues are used should be detected accordingly.  This is detected using
 * cmpxchg() operation.
 */
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
        int pwake = 0;
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
        __poll_t pollflags = key_to_poll(key);
        unsigned long flags;
        int ewake = 0;

        read_lock_irqsave(&ep->lock, flags);

        ep_set_busy_poll_napi_id(epi);

        /*
         * If the event mask does not contain any poll(2) event, we consider the
         * descriptor to be disabled. This condition is likely the effect of the
         * EPOLLONESHOT bit that disables the descriptor when an event is received,
         * until the next EPOLL_CTL_MOD will be issued.
         */
        if (!(epi->event.events & ~EP_PRIVATE_BITS))
                goto out_unlock;

        /*
         * Check the events coming with the callback. At this stage, not
         * every device reports the events in the "key" parameter of the
         * callback. We need to be able to handle both cases here, hence the
         * test for "key" != NULL before the event match test.
         */
        if (pollflags && !(pollflags & epi->event.events))
                goto out_unlock;

        /*
         * If we are transferring events to userspace, we can hold no locks
         * (because we're accessing user memory, and because of linux f_op->poll()
         * semantics). All the events that happen during that period of time are
         * chained in ep->ovflist and requeued later on.
         */
        if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
                if (chain_epi_lockless(epi))
                        ep_pm_stay_awake_rcu(epi);
        } else if (!ep_is_linked(epi)) {
                /* In the usual case, add event to ready list. */
                if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
                        ep_pm_stay_awake_rcu(epi);
        }

        /*
         * Wake up ( if active ) both the eventpoll wait list and the ->poll()
         * wait list.
         */
        if (waitqueue_active(&ep->wq)) {
                if ((epi->event.events & EPOLLEXCLUSIVE) &&
                                        !(pollflags & POLLFREE)) {
                        switch (pollflags & EPOLLINOUT_BITS) {
                        case EPOLLIN:
                                if (epi->event.events & EPOLLIN)
                                        ewake = 1;
                                break;
                        case EPOLLOUT:
                                if (epi->event.events & EPOLLOUT)
                                        ewake = 1;
                                break;
                        case 0:
                                ewake = 1;
                                break;
                        }
                }
                wake_up(&ep->wq);
        }
        if (waitqueue_active(&ep->poll_wait))
                pwake++;

out_unlock:
        read_unlock_irqrestore(&ep->lock, flags);

        /* We have to call this outside the lock */
        if (pwake)
                ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);

        if (!(epi->event.events & EPOLLEXCLUSIVE))
                ewake = 1;

        if (pollflags & POLLFREE) {
                /*
                 * If we race with ep_remove_wait_queue() it can miss
                 * ->whead = NULL and do another remove_wait_queue() after
                 * us, so we can't use __remove_wait_queue().
                 */
                list_del_init(&wait->entry);
                /*
                 * ->whead != NULL protects us from the race with
                 * ep_clear_and_put() or ep_remove(), ep_remove_wait_queue()
                 * takes whead->lock held by the caller. Once we nullify it,
                 * nothing protects ep/epi or even wait.
                 */
                smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL);
        }

        return ewake;
}

/*
 * This is the callback that is used to add our wait queue to the
 * target file wakeup lists.
 */
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
                                 poll_table *pt)
{
        struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt);
        struct epitem *epi = epq->epi;
        struct eppoll_entry *pwq;

        if (unlikely(!epi))        // an earlier allocation has failed
                return;

        pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL);
        if (unlikely(!pwq)) {
                epq->epi = NULL;
                return;
        }

        init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
        pwq->whead = whead;
        pwq->base = epi;
        if (epi->event.events & EPOLLEXCLUSIVE)
                add_wait_queue_exclusive(whead, &pwq->wait);
        else
                add_wait_queue(whead, &pwq->wait);
        pwq->next = epi->pwqlist;
        epi->pwqlist = pwq;
}

static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
{
        int kcmp;
        struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL;
        struct epitem *epic;
        bool leftmost = true;

        while (*p) {
                parent = *p;
                epic = rb_entry(parent, struct epitem, rbn);
                kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
                if (kcmp > 0) {
                        p = &parent->rb_right;
                        leftmost = false;
                } else
                        p = &parent->rb_left;
        }
        rb_link_node(&epi->rbn, parent, p);
        rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost);
}



#define PATH_ARR_SIZE 5
/*
 * These are the number paths of length 1 to 5, that we are allowing to emanate
 * from a single file of interest. For example, we allow 1000 paths of length
 * 1, to emanate from each file of interest. This essentially represents the
 * potential wakeup paths, which need to be limited in order to avoid massive
 * uncontrolled wakeup storms. The common use case should be a single ep which
 * is connected to n file sources. In this case each file source has 1 path
 * of length 1. Thus, the numbers below should be more than sufficient. These
 * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
 * and delete can't add additional paths. Protected by the epnested_mutex.
 */
static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
static int path_count[PATH_ARR_SIZE];

static int path_count_inc(int nests)
{
        /* Allow an arbitrary number of depth 1 paths */
        if (nests == 0)
                return 0;

        if (++path_count[nests] > path_limits[nests])
                return -1;
        return 0;
}

static void path_count_init(void)
{
        int i;

        for (i = 0; i < PATH_ARR_SIZE; i++)
                path_count[i] = 0;
}

static int reverse_path_check_proc(struct hlist_head *refs, int depth)
{
        int error = 0;
        struct epitem *epi;

        if (depth > EP_MAX_NESTS) /* too deep nesting */
                return -1;

        /* CTL_DEL can remove links here, but that can't increase our count */
        hlist_for_each_entry_rcu(epi, refs, fllink) {
                struct hlist_head *refs = &epi->ep->refs;
                if (hlist_empty(refs))
                        error = path_count_inc(depth);
                else
                        error = reverse_path_check_proc(refs, depth + 1);
                if (error != 0)
                        break;
        }
        return error;
}

/**
 * reverse_path_check - The tfile_check_list is list of epitem_head, which have
 *                      links that are proposed to be newly added. We need to
 *                      make sure that those added links don't add too many
 *                      paths such that we will spend all our time waking up
 *                      eventpoll objects.
 *
 * Return: %zero if the proposed links don't create too many paths,
 *            %-1 otherwise.
 */
static int reverse_path_check(void)
{
        struct epitems_head *p;

        for (p = tfile_check_list; p != EP_UNACTIVE_PTR; p = p->next) {
                int error;
                path_count_init();
                rcu_read_lock();
                error = reverse_path_check_proc(&p->epitems, 0);
                rcu_read_unlock();
                if (error)
                        return error;
        }
        return 0;
}

static int ep_create_wakeup_source(struct epitem *epi)
{
        struct name_snapshot n;
        struct wakeup_source *ws;

        if (!epi->ep->ws) {
                epi->ep->ws = wakeup_source_register(NULL, "eventpoll");
                if (!epi->ep->ws)
                        return -ENOMEM;
        }

        take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
        ws = wakeup_source_register(NULL, n.name.name);
        release_dentry_name_snapshot(&n);

        if (!ws)
                return -ENOMEM;
        rcu_assign_pointer(epi->ws, ws);

        return 0;
}

/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
static noinline void ep_destroy_wakeup_source(struct epitem *epi)
{
        struct wakeup_source *ws = ep_wakeup_source(epi);

        RCU_INIT_POINTER(epi->ws, NULL);

        /*
         * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
         * used internally by wakeup_source_remove, too (called by
         * wakeup_source_unregister), so we cannot use call_rcu
         */
        synchronize_rcu();
        wakeup_source_unregister(ws);
}

static int attach_epitem(struct file *file, struct epitem *epi)
{
        struct epitems_head *to_free = NULL;
        struct hlist_head *head = NULL;
        struct eventpoll *ep = NULL;

        if (is_file_epoll(file))
                ep = file->private_data;

        if (ep) {
                head = &ep->refs;
        } else if (!READ_ONCE(file->f_ep)) {
allocate:
                to_free = kmem_cache_zalloc(ephead_cache, GFP_KERNEL);
                if (!to_free)
                        return -ENOMEM;
                head = &to_free->epitems;
        }
        spin_lock(&file->f_lock);
        if (!file->f_ep) {
                if (unlikely(!head)) {
                        spin_unlock(&file->f_lock);
                        goto allocate;
                }
                file->f_ep = head;
                to_free = NULL;
        }
        hlist_add_head_rcu(&epi->fllink, file->f_ep);
        spin_unlock(&file->f_lock);
        free_ephead(to_free);
        return 0;
}

/*
 * Must be called with "mtx" held.
 */
static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
                     struct file *tfile, int fd, int full_check)
{
        int error, pwake = 0;
        __poll_t revents;
        struct epitem *epi;
        struct ep_pqueue epq;
        struct eventpoll *tep = NULL;

        if (is_file_epoll(tfile))
                tep = tfile->private_data;

        lockdep_assert_irqs_enabled();

        if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
                                            max_user_watches) >= 0))
                return -ENOSPC;
        percpu_counter_inc(&ep->user->epoll_watches);

        if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
                percpu_counter_dec(&ep->user->epoll_watches);
                return -ENOMEM;
        }

        /* Item initialization follow here ... */
        INIT_LIST_HEAD(&epi->rdllink);
        epi->ep = ep;
        ep_set_ffd(&epi->ffd, tfile, fd);
        epi->event = *event;
        epi->next = EP_UNACTIVE_PTR;

        if (tep)
                mutex_lock_nested(&tep->mtx, 1);
        /* Add the current item to the list of active epoll hook for this file */
        if (unlikely(attach_epitem(tfile, epi) < 0)) {
                if (tep)
                        mutex_unlock(&tep->mtx);
                kmem_cache_free(epi_cache, epi);
                percpu_counter_dec(&ep->user->epoll_watches);
                return -ENOMEM;
        }

        if (full_check && !tep)
                list_file(tfile);

        /*
         * Add the current item to the RB tree. All RB tree operations are
         * protected by "mtx", and ep_insert() is called with "mtx" held.
         */
        ep_rbtree_insert(ep, epi);
        if (tep)
                mutex_unlock(&tep->mtx);

        /*
         * ep_remove_safe() calls in the later error paths can't lead to
         * ep_free() as the ep file itself still holds an ep reference.
         */
        ep_get(ep);

        /* now check if we've created too many backpaths */
        if (unlikely(full_check && reverse_path_check())) {
                ep_remove_safe(ep, epi);
                return -EINVAL;
        }

        if (epi->event.events & EPOLLWAKEUP) {
                error = ep_create_wakeup_source(epi);
                if (error) {
                        ep_remove_safe(ep, epi);
                        return error;
                }
        }

        /* Initialize the poll table using the queue callback */
        epq.epi = epi;
        init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);

        /*
         * Attach the item to the poll hooks and get current event bits.
         * We can safely use the file* here because its usage count has
         * been increased by the caller of this function. Note that after
         * this operation completes, the poll callback can start hitting
         * the new item.
         */
        revents = ep_item_poll(epi, &epq.pt, 1);

        /*
         * We have to check if something went wrong during the poll wait queue
         * install process. Namely an allocation for a wait queue failed due
         * high memory pressure.
         */
        if (unlikely(!epq.epi)) {
                ep_remove_safe(ep, epi);
                return -ENOMEM;
        }

        /* We have to drop the new item inside our item list to keep track of it */
        write_lock_irq(&ep->lock);

        /* record NAPI ID of new item if present */
        ep_set_busy_poll_napi_id(epi);

        /* If the file is already "ready" we drop it inside the ready list */
        if (revents && !ep_is_linked(epi)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
                ep_pm_stay_awake(epi);

                /* Notify waiting tasks that events are available */
                if (waitqueue_active(&ep->wq))
                        wake_up(&ep->wq);
                if (waitqueue_active(&ep->poll_wait))
                        pwake++;
        }

        write_unlock_irq(&ep->lock);

        /* We have to call this outside the lock */
        if (pwake)
                ep_poll_safewake(ep, NULL, 0);

        return 0;
}

/*
 * Modify the interest event mask by dropping an event if the new mask
 * has a match in the current file status. Must be called with "mtx" held.
 */
static int ep_modify(struct eventpoll *ep, struct epitem *epi,
                     const struct epoll_event *event)
{
        int pwake = 0;
        poll_table pt;

        lockdep_assert_irqs_enabled();

        init_poll_funcptr(&pt, NULL);

        /*
         * Set the new event interest mask before calling f_op->poll();
         * otherwise we might miss an event that happens between the
         * f_op->poll() call and the new event set registering.
         */
        epi->event.events = event->events; /* need barrier below */
        epi->event.data = event->data; /* protected by mtx */
        if (epi->event.events & EPOLLWAKEUP) {
                if (!ep_has_wakeup_source(epi))
                        ep_create_wakeup_source(epi);
        } else if (ep_has_wakeup_source(epi)) {
                ep_destroy_wakeup_source(epi);
        }

        /*
         * The following barrier has two effects:
         *
         * 1) Flush epi changes above to other CPUs.  This ensures
         *    we do not miss events from ep_poll_callback if an
         *    event occurs immediately after we call f_op->poll().
         *    We need this because we did not take ep->lock while
         *    changing epi above (but ep_poll_callback does take
         *    ep->lock).
         *
         * 2) We also need to ensure we do not miss _past_ events
         *    when calling f_op->poll().  This barrier also
         *    pairs with the barrier in wq_has_sleeper (see
         *    comments for wq_has_sleeper).
         *
         * This barrier will now guarantee ep_poll_callback or f_op->poll
         * (or both) will notice the readiness of an item.
         */
        smp_mb();

        /*
         * Get current event bits. We can safely use the file* here because
         * its usage count has been increased by the caller of this function.
         * If the item is "hot" and it is not registered inside the ready
         * list, push it inside.
         */
        if (ep_item_poll(epi, &pt, 1)) {
                write_lock_irq(&ep->lock);
                if (!ep_is_linked(epi)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
                        ep_pm_stay_awake(epi);

                        /* Notify waiting tasks that events are available */
                        if (waitqueue_active(&ep->wq))
                                wake_up(&ep->wq);
                        if (waitqueue_active(&ep->poll_wait))
                                pwake++;
                }
                write_unlock_irq(&ep->lock);
        }

        /* We have to call this outside the lock */
        if (pwake)
                ep_poll_safewake(ep, NULL, 0);

        return 0;
}

static int ep_send_events(struct eventpoll *ep,
                          struct epoll_event __user *events, int maxevents)
{
        struct epitem *epi, *tmp;
        LIST_HEAD(txlist);
        poll_table pt;
        int res = 0;

        /*
         * Always short-circuit for fatal signals to allow threads to make a
         * timely exit without the chance of finding more events available and
         * fetching repeatedly.
         */
        if (fatal_signal_pending(current))
                return -EINTR;

        init_poll_funcptr(&pt, NULL);

        mutex_lock(&ep->mtx);
        ep_start_scan(ep, &txlist);

        /*
         * We can loop without lock because we are passed a task private list.
         * Items cannot vanish during the loop we are holding ep->mtx.
         */
        list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
                struct wakeup_source *ws;
                __poll_t revents;

                if (res >= maxevents)
                        break;

                /*
                 * Activate ep->ws before deactivating epi->ws to prevent
                 * triggering auto-suspend here (in case we reactive epi->ws
                 * below).
                 *
                 * This could be rearranged to delay the deactivation of epi->ws
                 * instead, but then epi->ws would temporarily be out of sync
                 * with ep_is_linked().
                 */
                ws = ep_wakeup_source(epi);
                if (ws) {
                        if (ws->active)
                                __pm_stay_awake(ep->ws);
                        __pm_relax(ws);
                }

                list_del_init(&epi->rdllink);

                /*
                 * If the event mask intersect the caller-requested one,
                 * deliver the event to userspace. Again, we are holding ep->mtx,
                 * so no operations coming from userspace can change the item.
                 */
                revents = ep_item_poll(epi, &pt, 1);
                if (!revents)
                        continue;

                events = epoll_put_uevent(revents, epi->event.data, events);
                if (!events) {
                        list_add(&epi->rdllink, &txlist);
                        ep_pm_stay_awake(epi);
                        if (!res)
                                res = -EFAULT;
                        break;
                }
                res++;
                if (epi->event.events & EPOLLONESHOT)
                        epi->event.events &= EP_PRIVATE_BITS;
                else if (!(epi->event.events & EPOLLET)) {
                        /*
                         * If this file has been added with Level
                         * Trigger mode, we need to insert back inside
                         * the ready list, so that the next call to
                         * epoll_wait() will check again the events
                         * availability. At this point, no one can insert
                         * into ep->rdllist besides us. The epoll_ctl()
                         * callers are locked out by
                         * ep_send_events() holding "mtx" and the
                         * poll callback will queue them in ep->ovflist.
                         */
                        list_add_tail(&epi->rdllink, &ep->rdllist);
                        ep_pm_stay_awake(epi);
                }
        }
        ep_done_scan(ep, &txlist);
        mutex_unlock(&ep->mtx);

        return res;
}

static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
{
        struct timespec64 now;

        if (ms < 0)
                return NULL;

        if (!ms) {
                to->tv_sec = 0;
                to->tv_nsec = 0;
                return to;
        }

        to->tv_sec = ms / MSEC_PER_SEC;
        to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC);

        ktime_get_ts64(&now);
        *to = timespec64_add_safe(now, *to);
        return to;
}

/*
 * autoremove_wake_function, but remove even on failure to wake up, because we
 * know that default_wake_function/ttwu will only fail if the thread is already
 * woken, and in that case the ep_poll loop will remove the entry anyways, not
 * try to reuse it.
 */
static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
                                       unsigned int mode, int sync, void *key)
{
        int ret = default_wake_function(wq_entry, mode, sync, key);

        /*
         * Pairs with list_empty_careful in ep_poll, and ensures future loop
         * iterations see the cause of this wakeup.
         */
        list_del_init_careful(&wq_entry->entry);
        return ret;
}

/**
 * ep_poll - Retrieves ready events, and delivers them to the caller-supplied
 *           event buffer.
 *
 * @ep: Pointer to the eventpoll context.
 * @events: Pointer to the userspace buffer where the ready events should be
 *          stored.
 * @maxevents: Size (in terms of number of events) of the caller event buffer.
 * @timeout: Maximum timeout for the ready events fetch operation, in
 *           timespec. If the timeout is zero, the function will not block,
 *           while if the @timeout ptr is NULL, the function will block
 *           until at least one event has been retrieved (or an error
 *           occurred).
 *
 * Return: the number of ready events which have been fetched, or an
 *          error code, in case of error.
 */
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
                   int maxevents, struct timespec64 *timeout)
{
        int res, eavail, timed_out = 0;
        u64 slack = 0;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;

        lockdep_assert_irqs_enabled();

        if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
                slack = select_estimate_accuracy(timeout);
                to = &expires;
                *to = timespec64_to_ktime(*timeout);
        } else if (timeout) {
                /*
                 * Avoid the unnecessary trip to the wait queue loop, if the
                 * caller specified a non blocking operation.
                 */
                timed_out = 1;
        }

        /*
         * This call is racy: We may or may not see events that are being added
         * to the ready list under the lock (e.g., in IRQ callbacks). For cases
         * with a non-zero timeout, this thread will check the ready list under
         * lock and will add to the wait queue.  For cases with a zero
         * timeout, the user by definition should not care and will have to
         * recheck again.
         */
        eavail = ep_events_available(ep);

        while (1) {
                if (eavail) {
                        /*
                         * Try to transfer events to user space. In case we get
                         * 0 events and there's still timeout left over, we go
                         * trying again in search of more luck.
                         */
                        res = ep_send_events(ep, events, maxevents);
                        if (res)
                                return res;
                }

                if (timed_out)
                        return 0;

                eavail = ep_busy_loop(ep, timed_out);
                if (eavail)
                        continue;

                if (signal_pending(current))
                        return -EINTR;

                /*
                 * Internally init_wait() uses autoremove_wake_function(),
                 * thus wait entry is removed from the wait queue on each
                 * wakeup. Why it is important? In case of several waiters
                 * each new wakeup will hit the next waiter, giving it the
                 * chance to harvest new event. Otherwise wakeup can be
                 * lost. This is also good performance-wise, because on
                 * normal wakeup path no need to call __remove_wait_queue()
                 * explicitly, thus ep->lock is not taken, which halts the
                 * event delivery.
                 *
                 * In fact, we now use an even more aggressive function that
                 * unconditionally removes, because we don't reuse the wait
                 * entry between loop iterations. This lets us also avoid the
                 * performance issue if a process is killed, causing all of its
                 * threads to wake up without being removed normally.
                 */
                init_wait(&wait);
                wait.func = ep_autoremove_wake_function;

                write_lock_irq(&ep->lock);
                /*
                 * Barrierless variant, waitqueue_active() is called under
                 * the same lock on wakeup ep_poll_callback() side, so it
                 * is safe to avoid an explicit barrier.
                 */
                __set_current_state(TASK_INTERRUPTIBLE);

                /*
                 * Do the final check under the lock. ep_start/done_scan()
                 * plays with two lists (->rdllist and ->ovflist) and there
                 * is always a race when both lists are empty for short
                 * period of time although events are pending, so lock is
                 * important.
                 */
                eavail = ep_events_available(ep);
                if (!eavail)
                        __add_wait_queue_exclusive(&ep->wq, &wait);

                write_unlock_irq(&ep->lock);

                if (!eavail)
                        timed_out = !schedule_hrtimeout_range(to, slack,
                                                              HRTIMER_MODE_ABS);
                __set_current_state(TASK_RUNNING);

                /*
                 * We were woken up, thus go and try to harvest some events.
                 * If timed out and still on the wait queue, recheck eavail
                 * carefully under lock, below.
                 */
                eavail = 1;

                if (!list_empty_careful(&wait.entry)) {
                        write_lock_irq(&ep->lock);
                        /*
                         * If the thread timed out and is not on the wait queue,
                         * it means that the thread was woken up after its
                         * timeout expired before it could reacquire the lock.
                         * Thus, when wait.entry is empty, it needs to harvest
                         * events.
                         */
                        if (timed_out)
                                eavail = list_empty(&wait.entry);
                        __remove_wait_queue(&ep->wq, &wait);
                        write_unlock_irq(&ep->lock);
                }
        }
}

/**
 * ep_loop_check_proc - verify that adding an epoll file inside another
 *                      epoll structure does not violate the constraints, in
 *                      terms of closed loops, or too deep chains (which can
 *                      result in excessive stack usage).
 *
 * @ep: the &struct eventpoll to be currently checked.
 * @depth: Current depth of the path being checked.
 *
 * Return: %zero if adding the epoll @file inside current epoll
 *          structure @ep does not violate the constraints, or %-1 otherwise.
 */
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
{
        int error = 0;
        struct rb_node *rbp;
        struct epitem *epi;

        mutex_lock_nested(&ep->mtx, depth + 1);
        ep->gen = loop_check_gen;
        for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                epi = rb_entry(rbp, struct epitem, rbn);
                if (unlikely(is_file_epoll(epi->ffd.file))) {
                        struct eventpoll *ep_tovisit;
                        ep_tovisit = epi->ffd.file->private_data;
                        if (ep_tovisit->gen == loop_check_gen)
                                continue;
                        if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
                                error = -1;
                        else
                                error = ep_loop_check_proc(ep_tovisit, depth + 1);
                        if (error != 0)
                                break;
                } else {
                        /*
                         * If we've reached a file that is not associated with
                         * an ep, then we need to check if the newly added
                         * links are going to add too many wakeup paths. We do
                         * this by adding it to the tfile_check_list, if it's
                         * not already there, and calling reverse_path_check()
                         * during ep_insert().
                         */
                        list_file(epi->ffd.file);
                }
        }
        mutex_unlock(&ep->mtx);

        return error;
}

/**
 * ep_loop_check - Performs a check to verify that adding an epoll file (@to)
 *                 into another epoll file (represented by @ep) does not create
 *                 closed loops or too deep chains.
 *
 * @ep: Pointer to the epoll we are inserting into.
 * @to: Pointer to the epoll to be inserted.
 *
 * Return: %zero if adding the epoll @to inside the epoll @from
 * does not violate the constraints, or %-1 otherwise.
 */
static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to)
{
        inserting_into = ep;
        return ep_loop_check_proc(to, 0);
}

static void clear_tfile_check_list(void)
{
        rcu_read_lock();
        while (tfile_check_list != EP_UNACTIVE_PTR) {
                struct epitems_head *head = tfile_check_list;
                tfile_check_list = head->next;
                unlist_file(head);
        }
        rcu_read_unlock();
}

/*
 * Open an eventpoll file descriptor.
 */
static int do_epoll_create(int flags)
{
        int error, fd;
        struct eventpoll *ep = NULL;
        struct file *file;

        /* Check the EPOLL_* constant for consistency.  */
        BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);

        if (flags & ~EPOLL_CLOEXEC)
                return -EINVAL;
        /*
         * Create the internal data structure ("struct eventpoll").
         */
        error = ep_alloc(&ep);
        if (error < 0)
                return error;
        /*
         * Creates all the items needed to setup an eventpoll file. That is,
         * a file structure and a free file descriptor.
         */
        fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
        if (fd < 0) {
                error = fd;
                goto out_free_ep;
        }
        file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
                                 O_RDWR | (flags & O_CLOEXEC));
        if (IS_ERR(file)) {
                error = PTR_ERR(file);
                goto out_free_fd;
        }
#ifdef CONFIG_NET_RX_BUSY_POLL
        ep->busy_poll_usecs = 0;
        ep->busy_poll_budget = 0;
        ep->prefer_busy_poll = false;
#endif
        ep->file = file;
        fd_install(fd, file);
        return fd;

out_free_fd:
        put_unused_fd(fd);
out_free_ep:
        ep_clear_and_put(ep);
        return error;
}

SYSCALL_DEFINE1(epoll_create1, int, flags)
{
        return do_epoll_create(flags);
}

SYSCALL_DEFINE1(epoll_create, int, size)
{
        if (size <= 0)
                return -EINVAL;

        return do_epoll_create(0);
}

#ifdef CONFIG_PM_SLEEP
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
{
        if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
                epev->events &= ~EPOLLWAKEUP;
}
#else
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
{
        epev->events &= ~EPOLLWAKEUP;
}
#endif

static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
                                   bool nonblock)
{
        if (!nonblock) {
                mutex_lock_nested(mutex, depth);
                return 0;
        }
        if (mutex_trylock(mutex))
                return 0;
        return -EAGAIN;
}

int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
                 bool nonblock)
{
        int error;
        int full_check = 0;
        struct fd f, tf;
        struct eventpoll *ep;
        struct epitem *epi;
        struct eventpoll *tep = NULL;

        error = -EBADF;
        f = fdget(epfd);
        if (!f.file)
                goto error_return;

        /* Get the "struct file *" for the target file */
        tf = fdget(fd);
        if (!tf.file)
                goto error_fput;

        /* The target file descriptor must support poll */
        error = -EPERM;
        if (!file_can_poll(tf.file))
                goto error_tgt_fput;

        /* Check if EPOLLWAKEUP is allowed */
        if (ep_op_has_event(op))
                ep_take_care_of_epollwakeup(epds);

        /*
         * We have to check that the file structure underneath the file descriptor
         * the user passed to us _is_ an eventpoll file. And also we do not permit
         * adding an epoll file descriptor inside itself.
         */
        error = -EINVAL;
        if (f.file == tf.file || !is_file_epoll(f.file))
                goto error_tgt_fput;

        /*
         * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only,
         * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
         * Also, we do not currently supported nested exclusive wakeups.
         */
        if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
                if (op == EPOLL_CTL_MOD)
                        goto error_tgt_fput;
                if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
                                (epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
                        goto error_tgt_fput;
        }

        /*
         * At this point it is safe to assume that the "private_data" contains
         * our own data structure.
         */
        ep = f.file->private_data;

        /*
         * When we insert an epoll file descriptor inside another epoll file
         * descriptor, there is the chance of creating closed loops, which are
         * better be handled here, than in more critical paths. While we are
         * checking for loops we also determine the list of files reachable
         * and hang them on the tfile_check_list, so we can check that we
         * haven't created too many possible wakeup paths.
         *
         * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
         * the epoll file descriptor is attaching directly to a wakeup source,
         * unless the epoll file descriptor is nested. The purpose of taking the
         * 'epnested_mutex' on add is to prevent complex toplogies such as loops and
         * deep wakeup paths from forming in parallel through multiple
         * EPOLL_CTL_ADD operations.
         */
        error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
        if (error)
                goto error_tgt_fput;
        if (op == EPOLL_CTL_ADD) {
                if (READ_ONCE(f.file->f_ep) || ep->gen == loop_check_gen ||
                    is_file_epoll(tf.file)) {
                        mutex_unlock(&ep->mtx);
                        error = epoll_mutex_lock(&epnested_mutex, 0, nonblock);
                        if (error)
                                goto error_tgt_fput;
                        loop_check_gen++;
                        full_check = 1;
                        if (is_file_epoll(tf.file)) {
                                tep = tf.file->private_data;
                                error = -ELOOP;
                                if (ep_loop_check(ep, tep) != 0)
                                        goto error_tgt_fput;
                        }
                        error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
                        if (error)
                                goto error_tgt_fput;
                }
        }

        /*
         * Try to lookup the file inside our RB tree. Since we grabbed "mtx"
         * above, we can be sure to be able to use the item looked up by
         * ep_find() till we release the mutex.
         */
        epi = ep_find(ep, tf.file, fd);

        error = -EINVAL;
        switch (op) {
        case EPOLL_CTL_ADD:
                if (!epi) {
                        epds->events |= EPOLLERR | EPOLLHUP;
                        error = ep_insert(ep, epds, tf.file, fd, full_check);
                } else
                        error = -EEXIST;
                break;
        case EPOLL_CTL_DEL:
                if (epi) {
                        /*
                         * The eventpoll itself is still alive: the refcount
                         * can't go to zero here.
                         */
                        ep_remove_safe(ep, epi);
                        error = 0;
                } else {
                        error = -ENOENT;
                }
                break;
        case EPOLL_CTL_MOD:
                if (epi) {
                        if (!(epi->event.events & EPOLLEXCLUSIVE)) {
                                epds->events |= EPOLLERR | EPOLLHUP;
                                error = ep_modify(ep, epi, epds);
                        }
                } else
                        error = -ENOENT;
                break;
        }
        mutex_unlock(&ep->mtx);

error_tgt_fput:
        if (full_check) {
                clear_tfile_check_list();
                loop_check_gen++;
                mutex_unlock(&epnested_mutex);
        }

        fdput(tf);
error_fput:
        fdput(f);
error_return:

        return error;
}

/*
 * The following function implements the controller interface for
 * the eventpoll file that enables the insertion/removal/change of
 * file descriptors inside the interest set.
 */
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                struct epoll_event __user *, event)
{
        struct epoll_event epds;

        if (ep_op_has_event(op) &&
            copy_from_user(&epds, event, sizeof(struct epoll_event)))
                return -EFAULT;

        return do_epoll_ctl(epfd, op, fd, &epds, false);
}

/*
 * Implement the event wait interface for the eventpoll file. It is the kernel
 * part of the user space epoll_wait(2).
 */
static int do_epoll_wait(int epfd, struct epoll_event __user *events,
                         int maxevents, struct timespec64 *to)
{
        int error;
        struct fd f;
        struct eventpoll *ep;

        /* The maximum number of event must be greater than zero */
        if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
                return -EINVAL;

        /* Verify that the area passed by the user is writeable */
        if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
                return -EFAULT;

        /* Get the "struct file *" for the eventpoll file */
        f = fdget(epfd);
        if (!f.file)
                return -EBADF;

        /*
         * We have to check that the file structure underneath the fd
         * the user passed to us _is_ an eventpoll file.
         */
        error = -EINVAL;
        if (!is_file_epoll(f.file))
                goto error_fput;

        /*
         * At this point it is safe to assume that the "private_data" contains
         * our own data structure.
         */
        ep = f.file->private_data;

        /* Time to fish for events ... */
        error = ep_poll(ep, events, maxevents, to);

error_fput:
        fdput(f);
        return error;
}

SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
                int, maxevents, int, timeout)
{
        struct timespec64 to;

        return do_epoll_wait(epfd, events, maxevents,
                             ep_timeout_to_timespec(&to, timeout));
}

/*
 * Implement the event wait interface for the eventpoll file. It is the kernel
 * part of the user space epoll_pwait(2).
 */
static int do_epoll_pwait(int epfd, struct epoll_event __user *events,
                          int maxevents, struct timespec64 *to,
                          const sigset_t __user *sigmask, size_t sigsetsize)
{
        int error;

        /*
         * If the caller wants a certain signal mask to be set during the wait,
         * we apply it here.
         */
        error = set_user_sigmask(sigmask, sigsetsize);
        if (error)
                return error;

        error = do_epoll_wait(epfd, events, maxevents, to);

        restore_saved_sigmask_unless(error == -EINTR);

        return error;
}

SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
                int, maxevents, int, timeout, const sigset_t __user *, sigmask,
                size_t, sigsetsize)
{
        struct timespec64 to;

        return do_epoll_pwait(epfd, events, maxevents,
                              ep_timeout_to_timespec(&to, timeout),
                              sigmask, sigsetsize);
}

SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events,
                int, maxevents, const struct __kernel_timespec __user *, timeout,
                const sigset_t __user *, sigmask, size_t, sigsetsize)
{
        struct timespec64 ts, *to = NULL;

        if (timeout) {
                if (get_timespec64(&ts, timeout))
                        return -EFAULT;
                to = &ts;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        return do_epoll_pwait(epfd, events, maxevents, to,
                              sigmask, sigsetsize);
}

#ifdef CONFIG_COMPAT
static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events,
                                 int maxevents, struct timespec64 *timeout,
                                 const compat_sigset_t __user *sigmask,
                                 compat_size_t sigsetsize)
{
        long err;

        /*
         * If the caller wants a certain signal mask to be set during the wait,
         * we apply it here.
         */
        err = set_compat_user_sigmask(sigmask, sigsetsize);
        if (err)
                return err;

        err = do_epoll_wait(epfd, events, maxevents, timeout);

        restore_saved_sigmask_unless(err == -EINTR);

        return err;
}

COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
                       struct epoll_event __user *, events,
                       int, maxevents, int, timeout,
                       const compat_sigset_t __user *, sigmask,
                       compat_size_t, sigsetsize)
{
        struct timespec64 to;

        return do_compat_epoll_pwait(epfd, events, maxevents,
                                     ep_timeout_to_timespec(&to, timeout),
                                     sigmask, sigsetsize);
}

COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd,
                       struct epoll_event __user *, events,
                       int, maxevents,
                       const struct __kernel_timespec __user *, timeout,
                       const compat_sigset_t __user *, sigmask,
                       compat_size_t, sigsetsize)
{
        struct timespec64 ts, *to = NULL;

        if (timeout) {
                if (get_timespec64(&ts, timeout))
                        return -EFAULT;
                to = &ts;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        return do_compat_epoll_pwait(epfd, events, maxevents, to,
                                     sigmask, sigsetsize);
}

#endif

static int __init eventpoll_init(void)
{
        struct sysinfo si;

        si_meminfo(&si);
        /*
         * Allows top 4% of lomem to be allocated for epoll watches (per user).
         */
        max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) /
                EP_ITEM_COST;
        BUG_ON(max_user_watches < 0);

        /*
         * We can have many thousands of epitems, so prevent this from
         * using an extra cache line on 64-bit (and smaller) CPUs
         */
        BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128);

        /* Allocates slab cache used to allocate "struct epitem" items */
        epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);

        /* Allocates slab cache used to allocate "struct eppoll_entry" */
        pwq_cache = kmem_cache_create("eventpoll_pwq",
                sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
        epoll_sysctls_init();

        ephead_cache = kmem_cache_create("ep_head",
                sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);

        return 0;
}
fs_initcall(eventpoll_init);



















































































































































































































































































































































































    2 












    2 


    2 





































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
// SPDX-License-Identifier: GPL-2.0
/*
 * NTP state machine interfaces and logic.
 *
 * This code was mainly moved from kernel/timer.c and kernel/time.c
 * Please see those files for relevant copyright info and historical
 * changelogs.
 */
#include <linux/capability.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
#include <linux/math64.h>
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/audit.h>

#include "ntp_internal.h"
#include "timekeeping_internal.h"


/*
 * NTP timekeeping variables:
 *
 * Note: All of the NTP state is protected by the timekeeping locks.
 */


/* USER_HZ period (usecs): */
unsigned long                        tick_usec = USER_TICK_USEC;

/* SHIFTED_HZ period (nsecs): */
unsigned long                        tick_nsec;

static u64                        tick_length;
static u64                        tick_length_base;

#define SECS_PER_DAY                86400
#define MAX_TICKADJ                500LL                /* usecs */
#define MAX_TICKADJ_SCALED \
        (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
#define MAX_TAI_OFFSET                100000

/*
 * phase-lock loop variables
 */

/*
 * clock synchronization status
 *
 * (TIME_ERROR prevents overwriting the CMOS clock)
 */
static int                        time_state = TIME_OK;

/* clock status bits:                                                        */
static int                        time_status = STA_UNSYNC;

/* time adjustment (nsecs):                                                */
static s64                        time_offset;

/* pll time constant:                                                        */
static long                        time_constant = 2;

/* maximum error (usecs):                                                */
static long                        time_maxerror = NTP_PHASE_LIMIT;

/* estimated error (usecs):                                                */
static long                        time_esterror = NTP_PHASE_LIMIT;

/* frequency offset (scaled nsecs/secs):                                */
static s64                        time_freq;

/* time at last adjustment (secs):                                        */
static time64_t                time_reftime;

static long                        time_adjust;

/* constant (boot-param configurable) NTP tick adjustment (upscaled)        */
static s64                        ntp_tick_adj;

/* second value of the next pending leapsecond, or TIME64_MAX if no leap */
static time64_t                        ntp_next_leap_sec = TIME64_MAX;

#ifdef CONFIG_NTP_PPS

/*
 * The following variables are used when a pulse-per-second (PPS) signal
 * is available. They establish the engineering parameters of the clock
 * discipline loop when controlled by the PPS signal.
 */
#define PPS_VALID        10        /* PPS signal watchdog max (s) */
#define PPS_POPCORN        4        /* popcorn spike threshold (shift) */
#define PPS_INTMIN        2        /* min freq interval (s) (shift) */
#define PPS_INTMAX        8        /* max freq interval (s) (shift) */
#define PPS_INTCOUNT        4        /* number of consecutive good intervals to
                                   increase pps_shift or consecutive bad
                                   intervals to decrease it */
#define PPS_MAXWANDER        100000        /* max PPS freq wander (ns/s) */

static int pps_valid;                /* signal watchdog counter */
static long pps_tf[3];                /* phase median filter */
static long pps_jitter;                /* current jitter (ns) */
static struct timespec64 pps_fbase; /* beginning of the last freq interval */
static int pps_shift;                /* current interval duration (s) (shift) */
static int pps_intcnt;                /* interval counter */
static s64 pps_freq;                /* frequency offset (scaled ns/s) */
static long pps_stabil;                /* current stability (scaled ns/s) */

/*
 * PPS signal quality monitors
 */
static long pps_calcnt;                /* calibration intervals */
static long pps_jitcnt;                /* jitter limit exceeded */
static long pps_stbcnt;                /* stability limit exceeded */
static long pps_errcnt;                /* calibration errors */


/* PPS kernel consumer compensates the whole phase error immediately.
 * Otherwise, reduce the offset by a fixed factor times the time constant.
 */
static inline s64 ntp_offset_chunk(s64 offset)
{
        if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
                return offset;
        else
                return shift_right(offset, SHIFT_PLL + time_constant);
}

static inline void pps_reset_freq_interval(void)
{
        /* the PPS calibration interval may end
           surprisingly early */
        pps_shift = PPS_INTMIN;
        pps_intcnt = 0;
}

/**
 * pps_clear - Clears the PPS state variables
 */
static inline void pps_clear(void)
{
        pps_reset_freq_interval();
        pps_tf[0] = 0;
        pps_tf[1] = 0;
        pps_tf[2] = 0;
        pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
        pps_freq = 0;
}

/* Decrease pps_valid to indicate that another second has passed since
 * the last PPS signal. When it reaches 0, indicate that PPS signal is
 * missing.
 */
static inline void pps_dec_valid(void)
{
        if (pps_valid > 0)
                pps_valid--;
        else {
                time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
                                 STA_PPSWANDER | STA_PPSERROR);
                pps_clear();
        }
}

static inline void pps_set_freq(s64 freq)
{
        pps_freq = freq;
}

static inline int is_error_status(int status)
{
        return (status & (STA_UNSYNC|STA_CLOCKERR))
                /* PPS signal lost when either PPS time or
                 * PPS frequency synchronization requested
                 */
                || ((status & (STA_PPSFREQ|STA_PPSTIME))
                        && !(status & STA_PPSSIGNAL))
                /* PPS jitter exceeded when
                 * PPS time synchronization requested */
                || ((status & (STA_PPSTIME|STA_PPSJITTER))
                        == (STA_PPSTIME|STA_PPSJITTER))
                /* PPS wander exceeded or calibration error when
                 * PPS frequency synchronization requested
                 */
                || ((status & STA_PPSFREQ)
                        && (status & (STA_PPSWANDER|STA_PPSERROR)));
}

static inline void pps_fill_timex(struct __kernel_timex *txc)
{
        txc->ppsfreq           = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
                                         PPM_SCALE_INV, NTP_SCALE_SHIFT);
        txc->jitter           = pps_jitter;
        if (!(time_status & STA_NANO))
                txc->jitter = pps_jitter / NSEC_PER_USEC;
        txc->shift           = pps_shift;
        txc->stabil           = pps_stabil;
        txc->jitcnt           = pps_jitcnt;
        txc->calcnt           = pps_calcnt;
        txc->errcnt           = pps_errcnt;
        txc->stbcnt           = pps_stbcnt;
}

#else /* !CONFIG_NTP_PPS */

static inline s64 ntp_offset_chunk(s64 offset)
{
        return shift_right(offset, SHIFT_PLL + time_constant);
}

static inline void pps_reset_freq_interval(void) {}
static inline void pps_clear(void) {}
static inline void pps_dec_valid(void) {}
static inline void pps_set_freq(s64 freq) {}

static inline int is_error_status(int status)
{
        return status & (STA_UNSYNC|STA_CLOCKERR);
}

static inline void pps_fill_timex(struct __kernel_timex *txc)
{
        /* PPS is not implemented, so these are zero */
        txc->ppsfreq           = 0;
        txc->jitter           = 0;
        txc->shift           = 0;
        txc->stabil           = 0;
        txc->jitcnt           = 0;
        txc->calcnt           = 0;
        txc->errcnt           = 0;
        txc->stbcnt           = 0;
}

#endif /* CONFIG_NTP_PPS */


/**
 * ntp_synced - Returns 1 if the NTP status is not UNSYNC
 *
 */
static inline int ntp_synced(void)
{
        return !(time_status & STA_UNSYNC);
}


/*
 * NTP methods:
 */

/*
 * Update (tick_length, tick_length_base, tick_nsec), based
 * on (tick_usec, ntp_tick_adj, time_freq):
 */
static void ntp_update_frequency(void)
{
        u64 second_length;
        u64 new_base;

        second_length                 = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
                                                << NTP_SCALE_SHIFT;

        second_length                += ntp_tick_adj;
        second_length                += time_freq;

        tick_nsec                 = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
        new_base                 = div_u64(second_length, NTP_INTERVAL_FREQ);

        /*
         * Don't wait for the next second_overflow, apply
         * the change to the tick length immediately:
         */
        tick_length                += new_base - tick_length_base;
        tick_length_base         = new_base;
}

static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
{
        time_status &= ~STA_MODE;

        if (secs < MINSEC)
                return 0;

        if (!(time_status & STA_FLL) && (secs <= MAXSEC))
                return 0;

        time_status |= STA_MODE;

        return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
}

static void ntp_update_offset(long offset)
{
        s64 freq_adj;
        s64 offset64;
        long secs;

        if (!(time_status & STA_PLL))
                return;

        if (!(time_status & STA_NANO)) {
                /* Make sure the multiplication below won't overflow */
                offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC);
                offset *= NSEC_PER_USEC;
        }

        /*
         * Scale the phase adjustment and
         * clamp to the operating range.
         */
        offset = clamp(offset, -MAXPHASE, MAXPHASE);

        /*
         * Select how the frequency is to be controlled
         * and in which mode (PLL or FLL).
         */
        secs = (long)(__ktime_get_real_seconds() - time_reftime);
        if (unlikely(time_status & STA_FREQHOLD))
                secs = 0;

        time_reftime = __ktime_get_real_seconds();

        offset64    = offset;
        freq_adj    = ntp_update_offset_fll(offset64, secs);

        /*
         * Clamp update interval to reduce PLL gain with low
         * sampling rate (e.g. intermittent network connection)
         * to avoid instability.
         */
        if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant)))
                secs = 1 << (SHIFT_PLL + 1 + time_constant);

        freq_adj    += (offset64 * secs) <<
                        (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));

        freq_adj    = min(freq_adj + time_freq, MAXFREQ_SCALED);

        time_freq   = max(freq_adj, -MAXFREQ_SCALED);

        time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
}

/**
 * ntp_clear - Clears the NTP state variables
 */
void ntp_clear(void)
{
        time_adjust        = 0;                /* stop active adjtime() */
        time_status        |= STA_UNSYNC;
        time_maxerror        = NTP_PHASE_LIMIT;
        time_esterror        = NTP_PHASE_LIMIT;

        ntp_update_frequency();

        tick_length        = tick_length_base;
        time_offset        = 0;

        ntp_next_leap_sec = TIME64_MAX;
        /* Clear PPS state variables */
        pps_clear();
}


u64 ntp_tick_length(void)
{
        return tick_length;
}

/**
 * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
 *
 * Provides the time of the next leapsecond against CLOCK_REALTIME in
 * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending.
 */
ktime_t ntp_get_next_leap(void)
{
        ktime_t ret;

        if ((time_state == TIME_INS) && (time_status & STA_INS))
                return ktime_set(ntp_next_leap_sec, 0);
        ret = KTIME_MAX;
        return ret;
}

/*
 * this routine handles the overflow of the microsecond field
 *
 * The tricky bits of code to handle the accurate clock support
 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 * They were originally developed for SUN and DEC kernels.
 * All the kudos should go to Dave for this stuff.
 *
 * Also handles leap second processing, and returns leap offset
 */
int second_overflow(time64_t secs)
{
        s64 delta;
        int leap = 0;
        s32 rem;

        /*
         * Leap second processing. If in leap-insert state at the end of the
         * day, the system clock is set back one second; if in leap-delete
         * state, the system clock is set ahead one second.
         */
        switch (time_state) {
        case TIME_OK:
                if (time_status & STA_INS) {
                        time_state = TIME_INS;
                        div_s64_rem(secs, SECS_PER_DAY, &rem);
                        ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
                } else if (time_status & STA_DEL) {
                        time_state = TIME_DEL;
                        div_s64_rem(secs + 1, SECS_PER_DAY, &rem);
                        ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
                }
                break;
        case TIME_INS:
                if (!(time_status & STA_INS)) {
                        ntp_next_leap_sec = TIME64_MAX;
                        time_state = TIME_OK;
                } else if (secs == ntp_next_leap_sec) {
                        leap = -1;
                        time_state = TIME_OOP;
                        printk(KERN_NOTICE
                                "Clock: inserting leap second 23:59:60 UTC\n");
                }
                break;
        case TIME_DEL:
                if (!(time_status & STA_DEL)) {
                        ntp_next_leap_sec = TIME64_MAX;
                        time_state = TIME_OK;
                } else if (secs == ntp_next_leap_sec) {
                        leap = 1;
                        ntp_next_leap_sec = TIME64_MAX;
                        time_state = TIME_WAIT;
                        printk(KERN_NOTICE
                                "Clock: deleting leap second 23:59:59 UTC\n");
                }
                break;
        case TIME_OOP:
                ntp_next_leap_sec = TIME64_MAX;
                time_state = TIME_WAIT;
                break;
        case TIME_WAIT:
                if (!(time_status & (STA_INS | STA_DEL)))
                        time_state = TIME_OK;
                break;
        }


        /* Bump the maxerror field */
        time_maxerror += MAXFREQ / NSEC_PER_USEC;
        if (time_maxerror > NTP_PHASE_LIMIT) {
                time_maxerror = NTP_PHASE_LIMIT;
                time_status |= STA_UNSYNC;
        }

        /* Compute the phase adjustment for the next second */
        tick_length         = tick_length_base;

        delta                 = ntp_offset_chunk(time_offset);
        time_offset        -= delta;
        tick_length        += delta;

        /* Check PPS signal */
        pps_dec_valid();

        if (!time_adjust)
                goto out;

        if (time_adjust > MAX_TICKADJ) {
                time_adjust -= MAX_TICKADJ;
                tick_length += MAX_TICKADJ_SCALED;
                goto out;
        }

        if (time_adjust < -MAX_TICKADJ) {
                time_adjust += MAX_TICKADJ;
                tick_length -= MAX_TICKADJ_SCALED;
                goto out;
        }

        tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
                                                         << NTP_SCALE_SHIFT;
        time_adjust = 0;

out:
        return leap;
}

#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
static void sync_hw_clock(struct work_struct *work);
static DECLARE_WORK(sync_work, sync_hw_clock);
static struct hrtimer sync_hrtimer;
#define SYNC_PERIOD_NS (11ULL * 60 * NSEC_PER_SEC)

static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer)
{
        queue_work(system_freezable_power_efficient_wq, &sync_work);

        return HRTIMER_NORESTART;
}

static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry)
{
        ktime_t exp = ktime_set(ktime_get_real_seconds(), 0);

        if (retry)
                exp = ktime_add_ns(exp, 2ULL * NSEC_PER_SEC - offset_nsec);
        else
                exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec);

        hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS);
}

/*
 * Check whether @now is correct versus the required time to update the RTC
 * and calculate the value which needs to be written to the RTC so that the
 * next seconds increment of the RTC after the write is aligned with the next
 * seconds increment of clock REALTIME.
 *
 * tsched     t1 write(t2.tv_sec - 1sec))        t2 RTC increments seconds
 *
 * t2.tv_nsec == 0
 * tsched = t2 - set_offset_nsec
 * newval = t2 - NSEC_PER_SEC
 *
 * ==> neval = tsched + set_offset_nsec - NSEC_PER_SEC
 *
 * As the execution of this code is not guaranteed to happen exactly at
 * tsched this allows it to happen within a fuzzy region:
 *
 *        abs(now - tsched) < FUZZ
 *
 * If @now is not inside the allowed window the function returns false.
 */
static inline bool rtc_tv_nsec_ok(unsigned long set_offset_nsec,
                                  struct timespec64 *to_set,
                                  const struct timespec64 *now)
{
        /* Allowed error in tv_nsec, arbitrarily set to 5 jiffies in ns. */
        const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5;
        struct timespec64 delay = {.tv_sec = -1,
                                   .tv_nsec = set_offset_nsec};

        *to_set = timespec64_add(*now, delay);

        if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) {
                to_set->tv_nsec = 0;
                return true;
        }

        if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) {
                to_set->tv_sec++;
                to_set->tv_nsec = 0;
                return true;
        }
        return false;
}

#ifdef CONFIG_GENERIC_CMOS_UPDATE
int __weak update_persistent_clock64(struct timespec64 now64)
{
        return -ENODEV;
}
#else
static inline int update_persistent_clock64(struct timespec64 now64)
{
        return -ENODEV;
}
#endif

#ifdef CONFIG_RTC_SYSTOHC
/* Save NTP synchronized time to the RTC */
static int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec)
{
        struct rtc_device *rtc;
        struct rtc_time tm;
        int err = -ENODEV;

        rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE);
        if (!rtc)
                return -ENODEV;

        if (!rtc->ops || !rtc->ops->set_time)
                goto out_close;

        /* First call might not have the correct offset */
        if (*offset_nsec == rtc->set_offset_nsec) {
                rtc_time64_to_tm(to_set->tv_sec, &tm);
                err = rtc_set_time(rtc, &tm);
        } else {
                /* Store the update offset and let the caller try again */
                *offset_nsec = rtc->set_offset_nsec;
                err = -EAGAIN;
        }
out_close:
        rtc_class_close(rtc);
        return err;
}
#else
static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec)
{
        return -ENODEV;
}
#endif

/*
 * If we have an externally synchronized Linux clock, then update RTC clock
 * accordingly every ~11 minutes. Generally RTCs can only store second
 * precision, but many RTCs will adjust the phase of their second tick to
 * match the moment of update. This infrastructure arranges to call to the RTC
 * set at the correct moment to phase synchronize the RTC second tick over
 * with the kernel clock.
 */
static void sync_hw_clock(struct work_struct *work)
{
        /*
         * The default synchronization offset is 500ms for the deprecated
         * update_persistent_clock64() under the assumption that it uses
         * the infamous CMOS clock (MC146818).
         */
        static unsigned long offset_nsec = NSEC_PER_SEC / 2;
        struct timespec64 now, to_set;
        int res = -EAGAIN;

        /*
         * Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer()
         * managed to schedule the work between the timer firing and the
         * work being able to rearm the timer. Wait for the timer to expire.
         */
        if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer))
                return;

        ktime_get_real_ts64(&now);
        /* If @now is not in the allowed window, try again */
        if (!rtc_tv_nsec_ok(offset_nsec, &to_set, &now))
                goto rearm;

        /* Take timezone adjusted RTCs into account */
        if (persistent_clock_is_local)
                to_set.tv_sec -= (sys_tz.tz_minuteswest * 60);

        /* Try the legacy RTC first. */
        res = update_persistent_clock64(to_set);
        if (res != -ENODEV)
                goto rearm;

        /* Try the RTC class */
        res = update_rtc(&to_set, &offset_nsec);
        if (res == -ENODEV)
                return;
rearm:
        sched_sync_hw_clock(offset_nsec, res != 0);
}

void ntp_notify_cmos_timer(void)
{
        /*
         * When the work is currently executed but has not yet the timer
         * rearmed this queues the work immediately again. No big issue,
         * just a pointless work scheduled.
         */
        if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer))
                queue_work(system_freezable_power_efficient_wq, &sync_work);
}

static void __init ntp_init_cmos_sync(void)
{
        hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
        sync_hrtimer.function = sync_timer_callback;
}
#else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */
static inline void __init ntp_init_cmos_sync(void) { }
#endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */

/*
 * Propagate a new txc->status value into the NTP state:
 */
static inline void process_adj_status(const struct __kernel_timex *txc)
{
        if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
                time_state = TIME_OK;
                time_status = STA_UNSYNC;
                ntp_next_leap_sec = TIME64_MAX;
                /* restart PPS frequency calibration */
                pps_reset_freq_interval();
        }

        /*
         * If we turn on PLL adjustments then reset the
         * reference time to current time.
         */
        if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
                time_reftime = __ktime_get_real_seconds();

        /* only set allowed bits */
        time_status &= STA_RONLY;
        time_status |= txc->status & ~STA_RONLY;
}


static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
                                          s32 *time_tai)
{
        if (txc->modes & ADJ_STATUS)
                process_adj_status(txc);

        if (txc->modes & ADJ_NANO)
                time_status |= STA_NANO;

        if (txc->modes & ADJ_MICRO)
                time_status &= ~STA_NANO;

        if (txc->modes & ADJ_FREQUENCY) {
                time_freq = txc->freq * PPM_SCALE;
                time_freq = min(time_freq, MAXFREQ_SCALED);
                time_freq = max(time_freq, -MAXFREQ_SCALED);
                /* update pps_freq */
                pps_set_freq(time_freq);
        }

        if (txc->modes & ADJ_MAXERROR)
                time_maxerror = txc->maxerror;

        if (txc->modes & ADJ_ESTERROR)
                time_esterror = txc->esterror;

        if (txc->modes & ADJ_TIMECONST) {
                time_constant = txc->constant;
                if (!(time_status & STA_NANO))
                        time_constant += 4;
                time_constant = min(time_constant, (long)MAXTC);
                time_constant = max(time_constant, 0l);
        }

        if (txc->modes & ADJ_TAI &&
                        txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
                *time_tai = txc->constant;

        if (txc->modes & ADJ_OFFSET)
                ntp_update_offset(txc->offset);

        if (txc->modes & ADJ_TICK)
                tick_usec = txc->tick;

        if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
                ntp_update_frequency();
}


/*
 * adjtimex mainly allows reading (and writing, if superuser) of
 * kernel time-keeping variables. used by xntpd.
 */
int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
                  s32 *time_tai, struct audit_ntp_data *ad)
{
        int result;

        if (txc->modes & ADJ_ADJTIME) {
                long save_adjust = time_adjust;

                if (!(txc->modes & ADJ_OFFSET_READONLY)) {
                        /* adjtime() is independent from ntp_adjtime() */
                        time_adjust = txc->offset;
                        ntp_update_frequency();

                        audit_ntp_set_old(ad, AUDIT_NTP_ADJUST,        save_adjust);
                        audit_ntp_set_new(ad, AUDIT_NTP_ADJUST,        time_adjust);
                }
                txc->offset = save_adjust;
        } else {
                /* If there are input parameters, then process them: */
                if (txc->modes) {
                        audit_ntp_set_old(ad, AUDIT_NTP_OFFSET,        time_offset);
                        audit_ntp_set_old(ad, AUDIT_NTP_FREQ,        time_freq);
                        audit_ntp_set_old(ad, AUDIT_NTP_STATUS,        time_status);
                        audit_ntp_set_old(ad, AUDIT_NTP_TAI,        *time_tai);
                        audit_ntp_set_old(ad, AUDIT_NTP_TICK,        tick_usec);

                        process_adjtimex_modes(txc, time_tai);

                        audit_ntp_set_new(ad, AUDIT_NTP_OFFSET,        time_offset);
                        audit_ntp_set_new(ad, AUDIT_NTP_FREQ,        time_freq);
                        audit_ntp_set_new(ad, AUDIT_NTP_STATUS,        time_status);
                        audit_ntp_set_new(ad, AUDIT_NTP_TAI,        *time_tai);
                        audit_ntp_set_new(ad, AUDIT_NTP_TICK,        tick_usec);
                }

                txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
                                  NTP_SCALE_SHIFT);
                if (!(time_status & STA_NANO))
                        txc->offset = (u32)txc->offset / NSEC_PER_USEC;
        }

        result = time_state;        /* mostly `TIME_OK' */
        /* check for errors */
        if (is_error_status(time_status))
                result = TIME_ERROR;

        txc->freq           = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
                                         PPM_SCALE_INV, NTP_SCALE_SHIFT);
        txc->maxerror           = time_maxerror;
        txc->esterror           = time_esterror;
        txc->status           = time_status;
        txc->constant           = time_constant;
        txc->precision           = 1;
        txc->tolerance           = MAXFREQ_SCALED / PPM_SCALE;
        txc->tick           = tick_usec;
        txc->tai           = *time_tai;

        /* fill PPS status fields */
        pps_fill_timex(txc);

        txc->time.tv_sec = ts->tv_sec;
        txc->time.tv_usec = ts->tv_nsec;
        if (!(time_status & STA_NANO))
                txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;

        /* Handle leapsec adjustments */
        if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
                if ((time_state == TIME_INS) && (time_status & STA_INS)) {
                        result = TIME_OOP;
                        txc->tai++;
                        txc->time.tv_sec--;
                }
                if ((time_state == TIME_DEL) && (time_status & STA_DEL)) {
                        result = TIME_WAIT;
                        txc->tai--;
                        txc->time.tv_sec++;
                }
                if ((time_state == TIME_OOP) &&
                                        (ts->tv_sec == ntp_next_leap_sec)) {
                        result = TIME_WAIT;
                }
        }

        return result;
}

#ifdef        CONFIG_NTP_PPS

/* actually struct pps_normtime is good old struct timespec, but it is
 * semantically different (and it is the reason why it was invented):
 * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
 * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
struct pps_normtime {
        s64                sec;        /* seconds */
        long                nsec;        /* nanoseconds */
};

/* normalize the timestamp so that nsec is in the
   ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
{
        struct pps_normtime norm = {
                .sec = ts.tv_sec,
                .nsec = ts.tv_nsec
        };

        if (norm.nsec > (NSEC_PER_SEC >> 1)) {
                norm.nsec -= NSEC_PER_SEC;
                norm.sec++;
        }

        return norm;
}

/* get current phase correction and jitter */
static inline long pps_phase_filter_get(long *jitter)
{
        *jitter = pps_tf[0] - pps_tf[1];
        if (*jitter < 0)
                *jitter = -*jitter;

        /* TODO: test various filters */
        return pps_tf[0];
}

/* add the sample to the phase filter */
static inline void pps_phase_filter_add(long err)
{
        pps_tf[2] = pps_tf[1];
        pps_tf[1] = pps_tf[0];
        pps_tf[0] = err;
}

/* decrease frequency calibration interval length.
 * It is halved after four consecutive unstable intervals.
 */
static inline void pps_dec_freq_interval(void)
{
        if (--pps_intcnt <= -PPS_INTCOUNT) {
                pps_intcnt = -PPS_INTCOUNT;
                if (pps_shift > PPS_INTMIN) {
                        pps_shift--;
                        pps_intcnt = 0;
                }
        }
}

/* increase frequency calibration interval length.
 * It is doubled after four consecutive stable intervals.
 */
static inline void pps_inc_freq_interval(void)
{
        if (++pps_intcnt >= PPS_INTCOUNT) {
                pps_intcnt = PPS_INTCOUNT;
                if (pps_shift < PPS_INTMAX) {
                        pps_shift++;
                        pps_intcnt = 0;
                }
        }
}

/* update clock frequency based on MONOTONIC_RAW clock PPS signal
 * timestamps
 *
 * At the end of the calibration interval the difference between the
 * first and last MONOTONIC_RAW clock timestamps divided by the length
 * of the interval becomes the frequency update. If the interval was
 * too long, the data are discarded.
 * Returns the difference between old and new frequency values.
 */
static long hardpps_update_freq(struct pps_normtime freq_norm)
{
        long delta, delta_mod;
        s64 ftemp;

        /* check if the frequency interval was too long */
        if (freq_norm.sec > (2 << pps_shift)) {
                time_status |= STA_PPSERROR;
                pps_errcnt++;
                pps_dec_freq_interval();
                printk_deferred(KERN_ERR
                        "hardpps: PPSERROR: interval too long - %lld s\n",
                        freq_norm.sec);
                return 0;
        }

        /* here the raw frequency offset and wander (stability) is
         * calculated. If the wander is less than the wander threshold
         * the interval is increased; otherwise it is decreased.
         */
        ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
                        freq_norm.sec);
        delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
        pps_freq = ftemp;
        if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
                printk_deferred(KERN_WARNING
                                "hardpps: PPSWANDER: change=%ld\n", delta);
                time_status |= STA_PPSWANDER;
                pps_stbcnt++;
                pps_dec_freq_interval();
        } else {        /* good sample */
                pps_inc_freq_interval();
        }

        /* the stability metric is calculated as the average of recent
         * frequency changes, but is used only for performance
         * monitoring
         */
        delta_mod = delta;
        if (delta_mod < 0)
                delta_mod = -delta_mod;
        pps_stabil += (div_s64(((s64)delta_mod) <<
                                (NTP_SCALE_SHIFT - SHIFT_USEC),
                                NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;

        /* if enabled, the system clock frequency is updated */
        if ((time_status & STA_PPSFREQ) != 0 &&
            (time_status & STA_FREQHOLD) == 0) {
                time_freq = pps_freq;
                ntp_update_frequency();
        }

        return delta;
}

/* correct REALTIME clock phase error against PPS signal */
static void hardpps_update_phase(long error)
{
        long correction = -error;
        long jitter;

        /* add the sample to the median filter */
        pps_phase_filter_add(correction);
        correction = pps_phase_filter_get(&jitter);

        /* Nominal jitter is due to PPS signal noise. If it exceeds the
         * threshold, the sample is discarded; otherwise, if so enabled,
         * the time offset is updated.
         */
        if (jitter > (pps_jitter << PPS_POPCORN)) {
                printk_deferred(KERN_WARNING
                                "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
                                jitter, (pps_jitter << PPS_POPCORN));
                time_status |= STA_PPSJITTER;
                pps_jitcnt++;
        } else if (time_status & STA_PPSTIME) {
                /* correct the time using the phase offset */
                time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
                                NTP_INTERVAL_FREQ);
                /* cancel running adjtime() */
                time_adjust = 0;
        }
        /* update jitter */
        pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
}

/*
 * __hardpps() - discipline CPU clock oscillator to external PPS signal
 *
 * This routine is called at each PPS signal arrival in order to
 * discipline the CPU clock oscillator to the PPS signal. It takes two
 * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
 * is used to correct clock phase error and the latter is used to
 * correct the frequency.
 *
 * This code is based on David Mills's reference nanokernel
 * implementation. It was mostly rewritten but keeps the same idea.
 */
void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
        struct pps_normtime pts_norm, freq_norm;

        pts_norm = pps_normalize_ts(*phase_ts);

        /* clear the error bits, they will be set again if needed */
        time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);

        /* indicate signal presence */
        time_status |= STA_PPSSIGNAL;
        pps_valid = PPS_VALID;

        /* when called for the first time,
         * just start the frequency interval */
        if (unlikely(pps_fbase.tv_sec == 0)) {
                pps_fbase = *raw_ts;
                return;
        }

        /* ok, now we have a base for frequency calculation */
        freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase));

        /* check that the signal is in the range
         * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
        if ((freq_norm.sec == 0) ||
                        (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
                        (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
                time_status |= STA_PPSJITTER;
                /* restart the frequency calibration interval */
                pps_fbase = *raw_ts;
                printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
                return;
        }

        /* signal is ok */

        /* check if the current frequency interval is finished */
        if (freq_norm.sec >= (1 << pps_shift)) {
                pps_calcnt++;
                /* restart the frequency calibration interval */
                pps_fbase = *raw_ts;
                hardpps_update_freq(freq_norm);
        }

        hardpps_update_phase(pts_norm.nsec);

}
#endif        /* CONFIG_NTP_PPS */

static int __init ntp_tick_adj_setup(char *str)
{
        int rc = kstrtos64(str, 0, &ntp_tick_adj);
        if (rc)
                return rc;

        ntp_tick_adj <<= NTP_SCALE_SHIFT;
        return 1;
}

__setup("ntp_tick_adj=", ntp_tick_adj_setup);

void __init ntp_init(void)
{
        ntp_clear();
        ntp_init_cmos_sync();
}


















































































































































    4 











    3 






    3 


    3 

    3 
























    3 






































    3 













    3 






    3 




    3 




    3 





    3 








    2 




    3 




    3 

    3 




























































    4 






    4 

    1 




    3 
    1 





    3 
    1 
    3 





    3 











    3 














    3 







    3 


    3 


    3 









    3 



    3 







    1 



    1 


    1 











    1 

































    4 

    4 





    6 














    6 

    1 




    5 
    2 




    1 
    3 






























    3 






    3 





    3 





    3 







    3 




    2 





    1 





    1 



    5 

    6 


















































































































































    6 















    4 

    6 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
// SPDX-License-Identifier: GPL-2.0+
/*
 * adutux - driver for ADU devices from Ontrak Control Systems
 * This is an experimental driver. Use at your own risk.
 * This driver is not supported by Ontrak Control Systems.
 *
 * Copyright (c) 2003 John Homppi (SCO, leave this notice here)
 *
 * derived from the Lego USB Tower driver 0.56:
 * Copyright (c) 2003 David Glance <davidgsf@sourceforge.net>
 *               2001 Juergen Stuber <stuber@loria.fr>
 * that was derived from USB Skeleton driver - 0.5
 * Copyright (c) 2001 Greg Kroah-Hartman (greg@kroah.com)
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>

#define DRIVER_AUTHOR "John Homppi"
#define DRIVER_DESC "adutux (see www.ontrak.net)"

/* Define these values to match your device */
#define ADU_VENDOR_ID 0x0a07
#define ADU_PRODUCT_ID 0x0064

/* table of devices that work with this driver */
static const struct usb_device_id device_table[] = {
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID) },                /* ADU100 */
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID+20) },        /* ADU120 */
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID+30) },        /* ADU130 */
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID+100) },        /* ADU200 */
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID+108) },        /* ADU208 */
        { USB_DEVICE(ADU_VENDOR_ID, ADU_PRODUCT_ID+118) },        /* ADU218 */
        { } /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, device_table);

#ifdef CONFIG_USB_DYNAMIC_MINORS
#define ADU_MINOR_BASE        0
#else
#define ADU_MINOR_BASE        67
#endif

/* we can have up to this number of device plugged in at once */
#define MAX_DEVICES        16

#define COMMAND_TIMEOUT        (2*HZ)

/*
 * The locking scheme is a vanilla 3-lock:
 *   adu_device.buflock: A spinlock, covers what IRQs touch.
 *   adutux_mutex:       A Static lock to cover open_count. It would also cover
 *                       any globals, but we don't have them in 2.6.
 *   adu_device.mtx:     A mutex to hold across sleepers like copy_from_user.
 *                       It covers all of adu_device, except the open_count
 *                       and what .buflock covers.
 */

/* Structure to hold all of our device specific stuff */
struct adu_device {
        struct mutex                mtx;
        struct usb_device *udev; /* save off the usb device pointer */
        struct usb_interface *interface;
        unsigned int                minor; /* the starting minor number for this device */
        char                        serial_number[8];

        int                        open_count; /* number of times this port has been opened */
        unsigned long                disconnected:1;

        char                *read_buffer_primary;
        int                        read_buffer_length;
        char                *read_buffer_secondary;
        int                        secondary_head;
        int                        secondary_tail;
        spinlock_t                buflock;

        wait_queue_head_t        read_wait;
        wait_queue_head_t        write_wait;

        char                *interrupt_in_buffer;
        struct usb_endpoint_descriptor *interrupt_in_endpoint;
        struct urb        *interrupt_in_urb;
        int                        read_urb_finished;

        char                *interrupt_out_buffer;
        struct usb_endpoint_descriptor *interrupt_out_endpoint;
        struct urb        *interrupt_out_urb;
        int                        out_urb_finished;
};

static DEFINE_MUTEX(adutux_mutex);

static struct usb_driver adu_driver;

static inline void adu_debug_data(struct device *dev, const char *function,
                                  int size, const unsigned char *data)
{
        dev_dbg(dev, "%s - length = %d, data = %*ph\n",
                function, size, size, data);
}

/*
 * adu_abort_transfers
 *      aborts transfers and frees associated data structures
 */
static void adu_abort_transfers(struct adu_device *dev)
{
        unsigned long flags;

        if (dev->disconnected)
                return;

        /* shutdown transfer */

        /* XXX Anchor these instead */
        spin_lock_irqsave(&dev->buflock, flags);
        if (!dev->read_urb_finished) {
                spin_unlock_irqrestore(&dev->buflock, flags);
                usb_kill_urb(dev->interrupt_in_urb);
        } else
                spin_unlock_irqrestore(&dev->buflock, flags);

        spin_lock_irqsave(&dev->buflock, flags);
        if (!dev->out_urb_finished) {
                spin_unlock_irqrestore(&dev->buflock, flags);
                wait_event_timeout(dev->write_wait, dev->out_urb_finished,
                        COMMAND_TIMEOUT);
                usb_kill_urb(dev->interrupt_out_urb);
        } else
                spin_unlock_irqrestore(&dev->buflock, flags);
}

static void adu_delete(struct adu_device *dev)
{
        /* free data structures */
        usb_free_urb(dev->interrupt_in_urb);
        usb_free_urb(dev->interrupt_out_urb);
        kfree(dev->read_buffer_primary);
        kfree(dev->read_buffer_secondary);
        kfree(dev->interrupt_in_buffer);
        kfree(dev->interrupt_out_buffer);
        usb_put_dev(dev->udev);
        kfree(dev);
}

static void adu_interrupt_in_callback(struct urb *urb)
{
        struct adu_device *dev = urb->context;
        int status = urb->status;
        unsigned long flags;

        adu_debug_data(&dev->udev->dev, __func__,
                       urb->actual_length, urb->transfer_buffer);

        spin_lock_irqsave(&dev->buflock, flags);

        if (status != 0) {
                if ((status != -ENOENT) && (status != -ECONNRESET) &&
                        (status != -ESHUTDOWN)) {
                        dev_dbg(&dev->udev->dev,
                                "%s : nonzero status received: %d\n",
                                __func__, status);
                }
                goto exit;
        }

        if (urb->actual_length > 0 && dev->interrupt_in_buffer[0] != 0x00) {
                if (dev->read_buffer_length <
                    (4 * usb_endpoint_maxp(dev->interrupt_in_endpoint)) -
                     (urb->actual_length)) {
                        memcpy (dev->read_buffer_primary +
                                dev->read_buffer_length,
                                dev->interrupt_in_buffer, urb->actual_length);

                        dev->read_buffer_length += urb->actual_length;
                        dev_dbg(&dev->udev->dev, "%s reading  %d\n", __func__,
                                urb->actual_length);
                } else {
                        dev_dbg(&dev->udev->dev, "%s : read_buffer overflow\n",
                                __func__);
                }
        }

exit:
        dev->read_urb_finished = 1;
        spin_unlock_irqrestore(&dev->buflock, flags);
        /* always wake up so we recover from errors */
        wake_up_interruptible(&dev->read_wait);
}

static void adu_interrupt_out_callback(struct urb *urb)
{
        struct adu_device *dev = urb->context;
        int status = urb->status;
        unsigned long flags;

        adu_debug_data(&dev->udev->dev, __func__,
                       urb->actual_length, urb->transfer_buffer);

        if (status != 0) {
                if ((status != -ENOENT) &&
                    (status != -ESHUTDOWN) &&
                    (status != -ECONNRESET)) {
                        dev_dbg(&dev->udev->dev,
                                "%s :nonzero status received: %d\n", __func__,
                                status);
                }
                return;
        }

        spin_lock_irqsave(&dev->buflock, flags);
        dev->out_urb_finished = 1;
        wake_up(&dev->write_wait);
        spin_unlock_irqrestore(&dev->buflock, flags);
}

static int adu_open(struct inode *inode, struct file *file)
{
        struct adu_device *dev = NULL;
        struct usb_interface *interface;
        int subminor;
        int retval;

        subminor = iminor(inode);

        retval = mutex_lock_interruptible(&adutux_mutex);
        if (retval)
                goto exit_no_lock;

        interface = usb_find_interface(&adu_driver, subminor);
        if (!interface) {
                pr_err("%s - error, can't find device for minor %d\n",
                       __func__, subminor);
                retval = -ENODEV;
                goto exit_no_device;
        }

        dev = usb_get_intfdata(interface);
        if (!dev) {
                retval = -ENODEV;
                goto exit_no_device;
        }

        /* check that nobody else is using the device */
        if (dev->open_count) {
                retval = -EBUSY;
                goto exit_no_device;
        }

        ++dev->open_count;
        dev_dbg(&dev->udev->dev, "%s: open count %d\n", __func__,
                dev->open_count);

        /* save device in the file's private structure */
        file->private_data = dev;

        /* initialize in direction */
        dev->read_buffer_length = 0;

        /* fixup first read by having urb waiting for it */
        usb_fill_int_urb(dev->interrupt_in_urb, dev->udev,
                         usb_rcvintpipe(dev->udev,
                                        dev->interrupt_in_endpoint->bEndpointAddress),
                         dev->interrupt_in_buffer,
                         usb_endpoint_maxp(dev->interrupt_in_endpoint),
                         adu_interrupt_in_callback, dev,
                         dev->interrupt_in_endpoint->bInterval);
        dev->read_urb_finished = 0;
        if (usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL))
                dev->read_urb_finished = 1;
        /* we ignore failure */
        /* end of fixup for first read */

        /* initialize out direction */
        dev->out_urb_finished = 1;

        retval = 0;

exit_no_device:
        mutex_unlock(&adutux_mutex);
exit_no_lock:
        return retval;
}

static void adu_release_internal(struct adu_device *dev)
{
        /* decrement our usage count for the device */
        --dev->open_count;
        dev_dbg(&dev->udev->dev, "%s : open count %d\n", __func__,
                dev->open_count);
        if (dev->open_count <= 0) {
                adu_abort_transfers(dev);
                dev->open_count = 0;
        }
}

static int adu_release(struct inode *inode, struct file *file)
{
        struct adu_device *dev;
        int retval = 0;

        if (file == NULL) {
                retval = -ENODEV;
                goto exit;
        }

        dev = file->private_data;
        if (dev == NULL) {
                retval = -ENODEV;
                goto exit;
        }

        mutex_lock(&adutux_mutex); /* not interruptible */

        if (dev->open_count <= 0) {
                dev_dbg(&dev->udev->dev, "%s : device not opened\n", __func__);
                retval = -ENODEV;
                goto unlock;
        }

        adu_release_internal(dev);
        if (dev->disconnected) {
                /* the device was unplugged before the file was released */
                if (!dev->open_count)        /* ... and we're the last user */
                        adu_delete(dev);
        }
unlock:
        mutex_unlock(&adutux_mutex);
exit:
        return retval;
}

static ssize_t adu_read(struct file *file, __user char *buffer, size_t count,
                        loff_t *ppos)
{
        struct adu_device *dev;
        size_t bytes_read = 0;
        size_t bytes_to_read = count;
        int retval = 0;
        int timeout = 0;
        int should_submit = 0;
        unsigned long flags;
        DECLARE_WAITQUEUE(wait, current);

        dev = file->private_data;
        if (mutex_lock_interruptible(&dev->mtx))
                return -ERESTARTSYS;

        /* verify that the device wasn't unplugged */
        if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto exit;
        }

        /* verify that some data was requested */
        if (count == 0) {
                dev_dbg(&dev->udev->dev, "%s : read request of 0 bytes\n",
                        __func__);
                goto exit;
        }

        timeout = COMMAND_TIMEOUT;
        dev_dbg(&dev->udev->dev, "%s : about to start looping\n", __func__);
        while (bytes_to_read) {
                size_t data_in_secondary = dev->secondary_tail - dev->secondary_head;
                dev_dbg(&dev->udev->dev,
                        "%s : while, data_in_secondary=%zu, status=%d\n",
                        __func__, data_in_secondary,
                        dev->interrupt_in_urb->status);

                if (data_in_secondary) {
                        /* drain secondary buffer */
                        size_t amount = min(bytes_to_read, data_in_secondary);
                        if (copy_to_user(buffer, dev->read_buffer_secondary+dev->secondary_head, amount)) {
                                retval = -EFAULT;
                                goto exit;
                        }
                        dev->secondary_head += amount;
                        bytes_read += amount;
                        bytes_to_read -= amount;
                } else {
                        /* we check the primary buffer */
                        spin_lock_irqsave (&dev->buflock, flags);
                        if (dev->read_buffer_length) {
                                /* we secure access to the primary */
                                dev_dbg(&dev->udev->dev,
                                        "%s : swap, read_buffer_length = %d\n",
                                        __func__, dev->read_buffer_length);
                                swap(dev->read_buffer_primary, dev->read_buffer_secondary);
                                dev->secondary_head = 0;
                                dev->secondary_tail = dev->read_buffer_length;
                                dev->read_buffer_length = 0;
                                spin_unlock_irqrestore(&dev->buflock, flags);
                                /* we have a free buffer so use it */
                                should_submit = 1;
                        } else {
                                /* even the primary was empty - we may need to do IO */
                                if (!dev->read_urb_finished) {
                                        /* somebody is doing IO */
                                        spin_unlock_irqrestore(&dev->buflock, flags);
                                        dev_dbg(&dev->udev->dev,
                                                "%s : submitted already\n",
                                                __func__);
                                } else {
                                        /* we must initiate input */
                                        dev_dbg(&dev->udev->dev,
                                                "%s : initiate input\n",
                                                __func__);
                                        dev->read_urb_finished = 0;
                                        spin_unlock_irqrestore(&dev->buflock, flags);

                                        usb_fill_int_urb(dev->interrupt_in_urb, dev->udev,
                                                        usb_rcvintpipe(dev->udev,
                                                                dev->interrupt_in_endpoint->bEndpointAddress),
                                                         dev->interrupt_in_buffer,
                                                         usb_endpoint_maxp(dev->interrupt_in_endpoint),
                                                         adu_interrupt_in_callback,
                                                         dev,
                                                         dev->interrupt_in_endpoint->bInterval);
                                        retval = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL);
                                        if (retval) {
                                                dev->read_urb_finished = 1;
                                                if (retval == -ENOMEM) {
                                                        retval = bytes_read ? bytes_read : -ENOMEM;
                                                }
                                                dev_dbg(&dev->udev->dev,
                                                        "%s : submit failed\n",
                                                        __func__);
                                                goto exit;
                                        }
                                }

                                /* we wait for I/O to complete */
                                set_current_state(TASK_INTERRUPTIBLE);
                                add_wait_queue(&dev->read_wait, &wait);
                                spin_lock_irqsave(&dev->buflock, flags);
                                if (!dev->read_urb_finished) {
                                        spin_unlock_irqrestore(&dev->buflock, flags);
                                        timeout = schedule_timeout(COMMAND_TIMEOUT);
                                } else {
                                        spin_unlock_irqrestore(&dev->buflock, flags);
                                        set_current_state(TASK_RUNNING);
                                }
                                remove_wait_queue(&dev->read_wait, &wait);

                                if (timeout <= 0) {
                                        dev_dbg(&dev->udev->dev,
                                                "%s : timeout\n", __func__);
                                        retval = bytes_read ? bytes_read : -ETIMEDOUT;
                                        goto exit;
                                }

                                if (signal_pending(current)) {
                                        dev_dbg(&dev->udev->dev,
                                                "%s : signal pending\n",
                                                __func__);
                                        retval = bytes_read ? bytes_read : -EINTR;
                                        goto exit;
                                }
                        }
                }
        }

        retval = bytes_read;
        /* if the primary buffer is empty then use it */
        spin_lock_irqsave(&dev->buflock, flags);
        if (should_submit && dev->read_urb_finished) {
                dev->read_urb_finished = 0;
                spin_unlock_irqrestore(&dev->buflock, flags);
                usb_fill_int_urb(dev->interrupt_in_urb, dev->udev,
                                 usb_rcvintpipe(dev->udev,
                                        dev->interrupt_in_endpoint->bEndpointAddress),
                                dev->interrupt_in_buffer,
                                usb_endpoint_maxp(dev->interrupt_in_endpoint),
                                adu_interrupt_in_callback,
                                dev,
                                dev->interrupt_in_endpoint->bInterval);
                if (usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL) != 0)
                        dev->read_urb_finished = 1;
                /* we ignore failure */
        } else {
                spin_unlock_irqrestore(&dev->buflock, flags);
        }

exit:
        /* unlock the device */
        mutex_unlock(&dev->mtx);

        return retval;
}

static ssize_t adu_write(struct file *file, const __user char *buffer,
                         size_t count, loff_t *ppos)
{
        DECLARE_WAITQUEUE(waita, current);
        struct adu_device *dev;
        size_t bytes_written = 0;
        size_t bytes_to_write;
        size_t buffer_size;
        unsigned long flags;
        int retval;

        dev = file->private_data;

        retval = mutex_lock_interruptible(&dev->mtx);
        if (retval)
                goto exit_nolock;

        /* verify that the device wasn't unplugged */
        if (dev->disconnected) {
                retval = -ENODEV;
                pr_err("No device or device unplugged %d\n", retval);
                goto exit;
        }

        /* verify that we actually have some data to write */
        if (count == 0) {
                dev_dbg(&dev->udev->dev, "%s : write request of 0 bytes\n",
                        __func__);
                goto exit;
        }

        while (count > 0) {
                add_wait_queue(&dev->write_wait, &waita);
                set_current_state(TASK_INTERRUPTIBLE);
                spin_lock_irqsave(&dev->buflock, flags);
                if (!dev->out_urb_finished) {
                        spin_unlock_irqrestore(&dev->buflock, flags);

                        mutex_unlock(&dev->mtx);
                        if (signal_pending(current)) {
                                dev_dbg(&dev->udev->dev, "%s : interrupted\n",
                                        __func__);
                                set_current_state(TASK_RUNNING);
                                retval = -EINTR;
                                goto exit_onqueue;
                        }
                        if (schedule_timeout(COMMAND_TIMEOUT) == 0) {
                                dev_dbg(&dev->udev->dev,
                                        "%s - command timed out.\n", __func__);
                                retval = -ETIMEDOUT;
                                goto exit_onqueue;
                        }
                        remove_wait_queue(&dev->write_wait, &waita);
                        retval = mutex_lock_interruptible(&dev->mtx);
                        if (retval) {
                                retval = bytes_written ? bytes_written : retval;
                                goto exit_nolock;
                        }

                        dev_dbg(&dev->udev->dev,
                                "%s : in progress, count = %zd\n",
                                __func__, count);
                } else {
                        spin_unlock_irqrestore(&dev->buflock, flags);
                        set_current_state(TASK_RUNNING);
                        remove_wait_queue(&dev->write_wait, &waita);
                        dev_dbg(&dev->udev->dev, "%s : sending, count = %zd\n",
                                __func__, count);

                        /* write the data into interrupt_out_buffer from userspace */
                        buffer_size = usb_endpoint_maxp(dev->interrupt_out_endpoint);
                        bytes_to_write = count > buffer_size ? buffer_size : count;
                        dev_dbg(&dev->udev->dev,
                                "%s : buffer_size = %zd, count = %zd, bytes_to_write = %zd\n",
                                __func__, buffer_size, count, bytes_to_write);

                        if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write) != 0) {
                                retval = -EFAULT;
                                goto exit;
                        }

                        /* send off the urb */
                        usb_fill_int_urb(
                                dev->interrupt_out_urb,
                                dev->udev,
                                usb_sndintpipe(dev->udev, dev->interrupt_out_endpoint->bEndpointAddress),
                                dev->interrupt_out_buffer,
                                bytes_to_write,
                                adu_interrupt_out_callback,
                                dev,
                                dev->interrupt_out_endpoint->bInterval);
                        dev->interrupt_out_urb->actual_length = bytes_to_write;
                        dev->out_urb_finished = 0;
                        retval = usb_submit_urb(dev->interrupt_out_urb, GFP_KERNEL);
                        if (retval < 0) {
                                dev->out_urb_finished = 1;
                                dev_err(&dev->udev->dev, "Couldn't submit "
                                        "interrupt_out_urb %d\n", retval);
                                goto exit;
                        }

                        buffer += bytes_to_write;
                        count -= bytes_to_write;

                        bytes_written += bytes_to_write;
                }
        }
        mutex_unlock(&dev->mtx);
        return bytes_written;

exit:
        mutex_unlock(&dev->mtx);
exit_nolock:
        return retval;

exit_onqueue:
        remove_wait_queue(&dev->write_wait, &waita);
        return retval;
}

/* file operations needed when we register this driver */
static const struct file_operations adu_fops = {
        .owner = THIS_MODULE,
        .read  = adu_read,
        .write = adu_write,
        .open = adu_open,
        .release = adu_release,
        .llseek = noop_llseek,
};

/*
 * usb class driver info in order to get a minor number from the usb core,
 * and to have the device registered with devfs and the driver core
 */
static struct usb_class_driver adu_class = {
        .name = "usb/adutux%d",
        .fops = &adu_fops,
        .minor_base = ADU_MINOR_BASE,
};

/*
 * adu_probe
 *
 * Called by the usb core when a new device is connected that it thinks
 * this driver might be interested in.
 */
static int adu_probe(struct usb_interface *interface,
                     const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct adu_device *dev = NULL;
        int retval = -ENOMEM;
        int in_end_size;
        int out_end_size;
        int res;

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(struct adu_device), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;

        mutex_init(&dev->mtx);
        spin_lock_init(&dev->buflock);
        dev->udev = usb_get_dev(udev);
        init_waitqueue_head(&dev->read_wait);
        init_waitqueue_head(&dev->write_wait);

        res = usb_find_common_endpoints_reverse(interface->cur_altsetting,
                        NULL, NULL,
                        &dev->interrupt_in_endpoint,
                        &dev->interrupt_out_endpoint);
        if (res) {
                dev_err(&interface->dev, "interrupt endpoints not found\n");
                retval = res;
                goto error;
        }

        in_end_size = usb_endpoint_maxp(dev->interrupt_in_endpoint);
        out_end_size = usb_endpoint_maxp(dev->interrupt_out_endpoint);

        dev->read_buffer_primary = kmalloc((4 * in_end_size), GFP_KERNEL);
        if (!dev->read_buffer_primary)
                goto error;

        /* debug code prime the buffer */
        memset(dev->read_buffer_primary, 'a', in_end_size);
        memset(dev->read_buffer_primary + in_end_size, 'b', in_end_size);
        memset(dev->read_buffer_primary + (2 * in_end_size), 'c', in_end_size);
        memset(dev->read_buffer_primary + (3 * in_end_size), 'd', in_end_size);

        dev->read_buffer_secondary = kmalloc((4 * in_end_size), GFP_KERNEL);
        if (!dev->read_buffer_secondary)
                goto error;

        /* debug code prime the buffer */
        memset(dev->read_buffer_secondary, 'e', in_end_size);
        memset(dev->read_buffer_secondary + in_end_size, 'f', in_end_size);
        memset(dev->read_buffer_secondary + (2 * in_end_size), 'g', in_end_size);
        memset(dev->read_buffer_secondary + (3 * in_end_size), 'h', in_end_size);

        dev->interrupt_in_buffer = kmalloc(in_end_size, GFP_KERNEL);
        if (!dev->interrupt_in_buffer)
                goto error;

        /* debug code prime the buffer */
        memset(dev->interrupt_in_buffer, 'i', in_end_size);

        dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->interrupt_in_urb)
                goto error;
        dev->interrupt_out_buffer = kmalloc(out_end_size, GFP_KERNEL);
        if (!dev->interrupt_out_buffer)
                goto error;
        dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->interrupt_out_urb)
                goto error;

        if (!usb_string(udev, udev->descriptor.iSerialNumber, dev->serial_number,
                        sizeof(dev->serial_number))) {
                dev_err(&interface->dev, "Could not retrieve serial number\n");
                retval = -EIO;
                goto error;
        }
        dev_dbg(&interface->dev, "serial_number=%s", dev->serial_number);

        /* we can register the device now, as it is ready */
        usb_set_intfdata(interface, dev);

        retval = usb_register_dev(interface, &adu_class);

        if (retval) {
                /* something prevented us from registering this driver */
                dev_err(&interface->dev, "Not able to get a minor for this device.\n");
                usb_set_intfdata(interface, NULL);
                goto error;
        }

        dev->minor = interface->minor;

        /* let the user know what node this device is now attached to */
        dev_info(&interface->dev, "ADU%d %s now attached to /dev/usb/adutux%d\n",
                 le16_to_cpu(udev->descriptor.idProduct), dev->serial_number,
                 (dev->minor - ADU_MINOR_BASE));

        return 0;

error:
        adu_delete(dev);
        return retval;
}

/*
 * adu_disconnect
 *
 * Called by the usb core when the device is removed from the system.
 */
static void adu_disconnect(struct usb_interface *interface)
{
        struct adu_device *dev;

        dev = usb_get_intfdata(interface);

        usb_deregister_dev(interface, &adu_class);

        usb_poison_urb(dev->interrupt_in_urb);
        usb_poison_urb(dev->interrupt_out_urb);

        mutex_lock(&adutux_mutex);
        usb_set_intfdata(interface, NULL);

        mutex_lock(&dev->mtx);        /* not interruptible */
        dev->disconnected = 1;
        mutex_unlock(&dev->mtx);

        /* if the device is not opened, then we clean up right now */
        if (!dev->open_count)
                adu_delete(dev);

        mutex_unlock(&adutux_mutex);
}

/* usb specific object needed to register this driver with the usb subsystem */
static struct usb_driver adu_driver = {
        .name = "adutux",
        .probe = adu_probe,
        .disconnect = adu_disconnect,
        .id_table = device_table,
};

module_usb_driver(adu_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");















































































































































































































































































































































































    1 

    1 




    1 

    1 

    1 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 


   14 
   14 


   14 

   11 




   13 
    1 

    1 


    1 

    1 






    1 



    4 








   14 


































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                PACKET - implements raw packet sockets.
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Alan Cox, <gw4pts@gw4pts.ampr.org>
 *
 * Fixes:
 *                Alan Cox        :        verify_area() now used correctly
 *                Alan Cox        :        new skbuff lists, look ma no backlogs!
 *                Alan Cox        :        tidied skbuff lists.
 *                Alan Cox        :        Now uses generic datagram routines I
 *                                        added. Also fixed the peek/read crash
 *                                        from all old Linux datagram code.
 *                Alan Cox        :        Uses the improved datagram code.
 *                Alan Cox        :        Added NULL's for socket options.
 *                Alan Cox        :        Re-commented the code.
 *                Alan Cox        :        Use new kernel side addressing
 *                Rob Janssen        :        Correct MTU usage.
 *                Dave Platt        :        Counter leaks caused by incorrect
 *                                        interrupt locking and some slightly
 *                                        dubious gcc output. Can you read
 *                                        compiler: it said _VOLATILE_
 *        Richard Kooijman        :        Timestamp fixes.
 *                Alan Cox        :        New buffers. Use sk->mac.raw.
 *                Alan Cox        :        sendmsg/recvmsg support.
 *                Alan Cox        :        Protocol setting support
 *        Alexey Kuznetsov        :        Untied from IPv4 stack.
 *        Cyrus Durgin                :        Fixed kerneld for kmod.
 *        Michal Ostrowski        :       Module initialization cleanup.
 *         Ulises Alonso        :       Frame number limit removal and
 *                                      packet_set_ring memory leak.
 *                Eric Biederman        :        Allow for > 8 byte hardware addresses.
 *                                        The convention is that longer addresses
 *                                        will simply extend the hardware address
 *                                        byte arrays at the end of sockaddr_ll
 *                                        and packet_mreq.
 *                Johann Baudy        :        Added TX RING.
 *                Chetan Loke        :        Implemented TPACKET_V3 block abstraction
 *                                        layer.
 *                                        Copyright (C) 2011, <lokec@ccs.neu.edu>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/ethtool.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/if_vlan.h>
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
#include <linux/percpu.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <linux/bpf.h>
#include <net/compat.h>
#include <linux/netfilter_netdev.h>

#include "internal.h"

/*
   Assumptions:
   - If the device has no dev->header_ops->create, there is no LL header
     visible above the device. In this case, its hard_header_len should be 0.
     The device may prepend its own header internally. In this case, its
     needed_headroom should be set to the space needed for it to add its
     internal header.
     For example, a WiFi driver pretending to be an Ethernet driver should
     set its hard_header_len to be the Ethernet header length, and set its
     needed_headroom to be (the real WiFi header length - the fake Ethernet
     header length).
   - packet socket receives packets with pulled ll header,
     so that SOCK_RAW should push it back.

On receive:
-----------

Incoming, dev_has_header(dev) == true
   mac_header -> ll header
   data       -> data

Outgoing, dev_has_header(dev) == true
   mac_header -> ll header
   data       -> ll header

Incoming, dev_has_header(dev) == false
   mac_header -> data
     However drivers often make it point to the ll header.
     This is incorrect because the ll header should be invisible to us.
   data       -> data

Outgoing, dev_has_header(dev) == false
   mac_header -> data. ll header is invisible to us.
   data       -> data

Resume
  If dev_has_header(dev) == false we are unable to restore the ll header,
    because it is invisible to us.


On transmit:
------------

dev_has_header(dev) == true
   mac_header -> ll header
   data       -> ll header

dev_has_header(dev) == false (ll header is invisible to us)
   mac_header -> data
   data       -> data

   We should set network_header on output to the correct position,
   packet classifier depends on it.
 */

/* Private packet socket structures. */

/* identical to struct packet_mreq except it has
 * a longer address field.
 */
struct packet_mreq_max {
        int                mr_ifindex;
        unsigned short        mr_type;
        unsigned short        mr_alen;
        unsigned char        mr_address[MAX_ADDR_LEN];
};

union tpacket_uhdr {
        struct tpacket_hdr  *h1;
        struct tpacket2_hdr *h2;
        struct tpacket3_hdr *h3;
        void *raw;
};

static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
                int closing, int tx_ring);

#define V3_ALIGNMENT        (8)

#define BLK_HDR_LEN        (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))

#define BLK_PLUS_PRIV(sz_of_priv) \
        (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))

#define BLOCK_STATUS(x)        ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x)        ((x)->hdr.bh1.num_pkts)
#define BLOCK_O2FP(x)                ((x)->hdr.bh1.offset_to_first_pkt)
#define BLOCK_LEN(x)                ((x)->hdr.bh1.blk_len)
#define BLOCK_SNUM(x)                ((x)->hdr.bh1.seq_num)
#define BLOCK_O2PRIV(x)        ((x)->offset_to_priv)

struct packet_sock;
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
                       struct packet_type *pt, struct net_device *orig_dev);

static void *packet_previous_frame(struct packet_sock *po,
                struct packet_ring_buffer *rb,
                int status);
static void packet_increment_head(struct packet_ring_buffer *buff);
static int prb_curr_blk_in_use(struct tpacket_block_desc *);
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
                        struct packet_sock *);
static void prb_retire_current_block(struct tpacket_kbdq_core *,
                struct packet_sock *, unsigned int status);
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
                struct tpacket_block_desc *);
static void prb_retire_rx_blk_timer_expired(struct timer_list *);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
                struct tpacket3_hdr *);
static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
                struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
static u16 packet_pick_tx_queue(struct sk_buff *skb);

struct packet_skb_cb {
        union {
                struct sockaddr_pkt pkt;
                union {
                        /* Trick: alias skb original length with
                         * ll.sll_family and ll.protocol in order
                         * to save room.
                         */
                        unsigned int origlen;
                        struct sockaddr_ll ll;
                };
        } sa;
};

#define vio_le() virtio_legacy_is_little_endian()

#define PACKET_SKB_CB(__skb)        ((struct packet_skb_cb *)((__skb)->cb))

#define GET_PBDQC_FROM_RB(x)        ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
#define GET_PBLOCK_DESC(x, bid)        \
        ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)        \
        ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
#define GET_NEXT_PRB_BLK_NUM(x) \
        (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
        ((x)->kactive_blk_num+1) : 0)

static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);

#ifdef CONFIG_NETFILTER_EGRESS
static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
{
        struct sk_buff *next, *head = NULL, *tail;
        int rc;

        rcu_read_lock();
        for (; skb != NULL; skb = next) {
                next = skb->next;
                skb_mark_not_on_list(skb);

                if (!nf_hook_egress(skb, &rc, skb->dev))
                        continue;

                if (!head)
                        head = skb;
                else
                        tail->next = skb;

                tail = skb;
        }
        rcu_read_unlock();

        return head;
}
#endif

static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb)
{
        if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS))
                return dev_queue_xmit(skb);

#ifdef CONFIG_NETFILTER_EGRESS
        if (nf_hook_egress_active()) {
                skb = nf_hook_direct_egress(skb);
                if (!skb)
                        return NET_XMIT_DROP;
        }
#endif
        return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
}

static struct net_device *packet_cached_dev_get(struct packet_sock *po)
{
        struct net_device *dev;

        rcu_read_lock();
        dev = rcu_dereference(po->cached_dev);
        dev_hold(dev);
        rcu_read_unlock();

        return dev;
}

static void packet_cached_dev_assign(struct packet_sock *po,
                                     struct net_device *dev)
{
        rcu_assign_pointer(po->cached_dev, dev);
}

static void packet_cached_dev_reset(struct packet_sock *po)
{
        RCU_INIT_POINTER(po->cached_dev, NULL);
}

static u16 packet_pick_tx_queue(struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        const struct net_device_ops *ops = dev->netdev_ops;
        int cpu = raw_smp_processor_id();
        u16 queue_index;

#ifdef CONFIG_XPS
        skb->sender_cpu = cpu + 1;
#endif
        skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
        if (ops->ndo_select_queue) {
                queue_index = ops->ndo_select_queue(dev, skb, NULL);
                queue_index = netdev_cap_txqueue(dev, queue_index);
        } else {
                queue_index = netdev_pick_tx(dev, skb, NULL);
        }

        return queue_index;
}

/* __register_prot_hook must be invoked through register_prot_hook
 * or from a context in which asynchronous accesses to the packet
 * socket is not possible (packet_create()).
 */
static void __register_prot_hook(struct sock *sk)
{
        struct packet_sock *po = pkt_sk(sk);

        if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
                if (po->fanout)
                        __fanout_link(sk, po);
                else
                        dev_add_pack(&po->prot_hook);

                sock_hold(sk);
                packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1);
        }
}

static void register_prot_hook(struct sock *sk)
{
        lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
        __register_prot_hook(sk);
}

/* If the sync parameter is true, we will temporarily drop
 * the po->bind_lock and do a synchronize_net to make sure no
 * asynchronous packet processing paths still refer to the elements
 * of po->prot_hook.  If the sync parameter is false, it is the
 * callers responsibility to take care of this.
 */
static void __unregister_prot_hook(struct sock *sk, bool sync)
{
        struct packet_sock *po = pkt_sk(sk);

        lockdep_assert_held_once(&po->bind_lock);

        packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0);

        if (po->fanout)
                __fanout_unlink(sk, po);
        else
                __dev_remove_pack(&po->prot_hook);

        __sock_put(sk);

        if (sync) {
                spin_unlock(&po->bind_lock);
                synchronize_net();
                spin_lock(&po->bind_lock);
        }
}

static void unregister_prot_hook(struct sock *sk, bool sync)
{
        struct packet_sock *po = pkt_sk(sk);

        if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
                __unregister_prot_hook(sk, sync);
}

static inline struct page * __pure pgv_to_page(void *addr)
{
        if (is_vmalloc_addr(addr))
                return vmalloc_to_page(addr);
        return virt_to_page(addr);
}

static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
        union tpacket_uhdr h;

        /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */

        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
                WRITE_ONCE(h.h1->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
                break;
        case TPACKET_V2:
                WRITE_ONCE(h.h2->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
                break;
        case TPACKET_V3:
                WRITE_ONCE(h.h3->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
                break;
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
        }

        smp_wmb();
}

static int __packet_get_status(const struct packet_sock *po, void *frame)
{
        union tpacket_uhdr h;

        smp_rmb();

        /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */

        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
                return READ_ONCE(h.h1->tp_status);
        case TPACKET_V2:
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
                return READ_ONCE(h.h2->tp_status);
        case TPACKET_V3:
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
                return READ_ONCE(h.h3->tp_status);
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
                return 0;
        }
}

static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
                                   unsigned int flags)
{
        struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);

        if (shhwtstamps &&
            (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
            ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
                return TP_STATUS_TS_RAW_HARDWARE;

        if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
            ktime_to_timespec64_cond(skb_tstamp(skb), ts))
                return TP_STATUS_TS_SOFTWARE;

        return 0;
}

static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
                                    struct sk_buff *skb)
{
        union tpacket_uhdr h;
        struct timespec64 ts;
        __u32 ts_status;

        if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp))))
                return 0;

        h.raw = frame;
        /*
         * versions 1 through 3 overflow the timestamps in y2106, since they
         * all store the seconds in a 32-bit unsigned integer.
         * If we create a version 4, that should have a 64-bit timestamp,
         * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
         * nanoseconds.
         */
        switch (po->tp_version) {
        case TPACKET_V1:
                h.h1->tp_sec = ts.tv_sec;
                h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
                break;
        case TPACKET_V2:
                h.h2->tp_sec = ts.tv_sec;
                h.h2->tp_nsec = ts.tv_nsec;
                break;
        case TPACKET_V3:
                h.h3->tp_sec = ts.tv_sec;
                h.h3->tp_nsec = ts.tv_nsec;
                break;
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
        }

        /* one flush is safe, as both fields always lie on the same cacheline */
        flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
        smp_wmb();

        return ts_status;
}

static void *packet_lookup_frame(const struct packet_sock *po,
                                 const struct packet_ring_buffer *rb,
                                 unsigned int position,
                                 int status)
{
        unsigned int pg_vec_pos, frame_offset;
        union tpacket_uhdr h;

        pg_vec_pos = position / rb->frames_per_block;
        frame_offset = position % rb->frames_per_block;

        h.raw = rb->pg_vec[pg_vec_pos].buffer +
                (frame_offset * rb->frame_size);

        if (status != __packet_get_status(po, h.raw))
                return NULL;

        return h.raw;
}

static void *packet_current_frame(struct packet_sock *po,
                struct packet_ring_buffer *rb,
                int status)
{
        return packet_lookup_frame(po, rb, rb->head, status);
}

static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
        del_timer_sync(&pkc->retire_blk_timer);
}

static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
                struct sk_buff_head *rb_queue)
{
        struct tpacket_kbdq_core *pkc;

        pkc = GET_PBDQC_FROM_RB(&po->rx_ring);

        spin_lock_bh(&rb_queue->lock);
        pkc->delete_blk_timer = 1;
        spin_unlock_bh(&rb_queue->lock);

        prb_del_retire_blk_timer(pkc);
}

static void prb_setup_retire_blk_timer(struct packet_sock *po)
{
        struct tpacket_kbdq_core *pkc;

        pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
        timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
                    0);
        pkc->retire_blk_timer.expires = jiffies;
}

static int prb_calc_retire_blk_tmo(struct packet_sock *po,
                                int blk_size_in_bytes)
{
        struct net_device *dev;
        unsigned int mbits, div;
        struct ethtool_link_ksettings ecmd;
        int err;

        rtnl_lock();
        dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
        if (unlikely(!dev)) {
                rtnl_unlock();
                return DEFAULT_PRB_RETIRE_TOV;
        }
        err = __ethtool_get_link_ksettings(dev, &ecmd);
        rtnl_unlock();
        if (err)
                return DEFAULT_PRB_RETIRE_TOV;

        /* If the link speed is so slow you don't really
         * need to worry about perf anyways
         */
        if (ecmd.base.speed < SPEED_1000 ||
            ecmd.base.speed == SPEED_UNKNOWN)
                return DEFAULT_PRB_RETIRE_TOV;

        div = ecmd.base.speed / 1000;
        mbits = (blk_size_in_bytes * 8) / (1024 * 1024);

        if (div)
                mbits /= div;

        if (div)
                return mbits + 1;
        return mbits;
}

static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
                        union tpacket_req_u *req_u)
{
        p1->feature_req_word = req_u->req3.tp_feature_req_word;
}

static void init_prb_bdqc(struct packet_sock *po,
                        struct packet_ring_buffer *rb,
                        struct pgv *pg_vec,
                        union tpacket_req_u *req_u)
{
        struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
        struct tpacket_block_desc *pbd;

        memset(p1, 0x0, sizeof(*p1));

        p1->knxt_seq_num = 1;
        p1->pkbdq = pg_vec;
        pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
        p1->pkblk_start        = pg_vec[0].buffer;
        p1->kblk_size = req_u->req3.tp_block_size;
        p1->knum_blocks        = req_u->req3.tp_block_nr;
        p1->hdrlen = po->tp_hdrlen;
        p1->version = po->tp_version;
        p1->last_kactive_blk_num = 0;
        po->stats.stats3.tp_freeze_q_cnt = 0;
        if (req_u->req3.tp_retire_blk_tov)
                p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
        else
                p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
                                                req_u->req3.tp_block_size);
        p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
        p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
        rwlock_init(&p1->blk_fill_in_prog_lock);

        p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
        prb_init_ft_ops(p1, req_u);
        prb_setup_retire_blk_timer(po);
        prb_open_block(p1, pbd);
}

/*  Do NOT update the last_blk_num first.
 *  Assumes sk_buff_head lock is held.
 */
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
        mod_timer(&pkc->retire_blk_timer,
                        jiffies + pkc->tov_in_jiffies);
        pkc->last_kactive_blk_num = pkc->kactive_blk_num;
}

/*
 * Timer logic:
 * 1) We refresh the timer only when we open a block.
 *    By doing this we don't waste cycles refreshing the timer
 *          on packet-by-packet basis.
 *
 * With a 1MB block-size, on a 1Gbps line, it will take
 * i) ~8 ms to fill a block + ii) memcpy etc.
 * In this cut we are not accounting for the memcpy time.
 *
 * So, if the user sets the 'tmo' to 10ms then the timer
 * will never fire while the block is still getting filled
 * (which is what we want). However, the user could choose
 * to close a block early and that's fine.
 *
 * But when the timer does fire, we check whether or not to refresh it.
 * Since the tmo granularity is in msecs, it is not too expensive
 * to refresh the timer, lets say every '8' msecs.
 * Either the user can set the 'tmo' or we can derive it based on
 * a) line-speed and b) block-size.
 * prb_calc_retire_blk_tmo() calculates the tmo.
 *
 */
static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
{
        struct packet_sock *po =
                from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
        struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
        unsigned int frozen;
        struct tpacket_block_desc *pbd;

        spin_lock(&po->sk.sk_receive_queue.lock);

        frozen = prb_queue_frozen(pkc);
        pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);

        if (unlikely(pkc->delete_blk_timer))
                goto out;

        /* We only need to plug the race when the block is partially filled.
         * tpacket_rcv:
         *                lock(); increment BLOCK_NUM_PKTS; unlock()
         *                copy_bits() is in progress ...
         *                timer fires on other cpu:
         *                we can't retire the current block because copy_bits
         *                is in progress.
         *
         */
        if (BLOCK_NUM_PKTS(pbd)) {
                /* Waiting for skb_copy_bits to finish... */
                write_lock(&pkc->blk_fill_in_prog_lock);
                write_unlock(&pkc->blk_fill_in_prog_lock);
        }

        if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
                if (!frozen) {
                        if (!BLOCK_NUM_PKTS(pbd)) {
                                /* An empty block. Just refresh the timer. */
                                goto refresh_timer;
                        }
                        prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
                        if (!prb_dispatch_next_block(pkc, po))
                                goto refresh_timer;
                        else
                                goto out;
                } else {
                        /* Case 1. Queue was frozen because user-space was
                         *           lagging behind.
                         */
                        if (prb_curr_blk_in_use(pbd)) {
                                /*
                                 * Ok, user-space is still behind.
                                 * So just refresh the timer.
                                 */
                                goto refresh_timer;
                        } else {
                               /* Case 2. queue was frozen,user-space caught up,
                                * now the link went idle && the timer fired.
                                * We don't have a block to close.So we open this
                                * block and restart the timer.
                                * opening a block thaws the queue,restarts timer
                                * Thawing/timer-refresh is a side effect.
                                */
                                prb_open_block(pkc, pbd);
                                goto out;
                        }
                }
        }

refresh_timer:
        _prb_refresh_rx_retire_blk_timer(pkc);

out:
        spin_unlock(&po->sk.sk_receive_queue.lock);
}

static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
                struct tpacket_block_desc *pbd1, __u32 status)
{
        /* Flush everything minus the block header */

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
        u8 *start, *end;

        start = (u8 *)pbd1;

        /* Skip the block header(we know header WILL fit in 4K) */
        start += PAGE_SIZE;

        end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
        for (; start < end; start += PAGE_SIZE)
                flush_dcache_page(pgv_to_page(start));

        smp_wmb();
#endif

        /* Now update the block status. */

        BLOCK_STATUS(pbd1) = status;

        /* Flush the block header */

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
        start = (u8 *)pbd1;
        flush_dcache_page(pgv_to_page(start));

        smp_wmb();
#endif
}

/*
 * Side effect:
 *
 * 1) flush the block
 * 2) Increment active_blk_num
 *
 * Note:We DONT refresh the timer on purpose.
 *        Because almost always the next block will be opened.
 */
static void prb_close_block(struct tpacket_kbdq_core *pkc1,
                struct tpacket_block_desc *pbd1,
                struct packet_sock *po, unsigned int stat)
{
        __u32 status = TP_STATUS_USER | stat;

        struct tpacket3_hdr *last_pkt;
        struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
        struct sock *sk = &po->sk;

        if (atomic_read(&po->tp_drops))
                status |= TP_STATUS_LOSING;

        last_pkt = (struct tpacket3_hdr *)pkc1->prev;
        last_pkt->tp_next_offset = 0;

        /* Get the ts of the last pkt */
        if (BLOCK_NUM_PKTS(pbd1)) {
                h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
                h1->ts_last_pkt.ts_nsec        = last_pkt->tp_nsec;
        } else {
                /* Ok, we tmo'd - so get the current time.
                 *
                 * It shouldn't really happen as we don't close empty
                 * blocks. See prb_retire_rx_blk_timer_expired().
                 */
                struct timespec64 ts;
                ktime_get_real_ts64(&ts);
                h1->ts_last_pkt.ts_sec = ts.tv_sec;
                h1->ts_last_pkt.ts_nsec        = ts.tv_nsec;
        }

        smp_wmb();

        /* Flush the block */
        prb_flush_block(pkc1, pbd1, status);

        sk->sk_data_ready(sk);

        pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
}

static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
{
        pkc->reset_pending_on_curr_blk = 0;
}

/*
 * Side effect of opening a block:
 *
 * 1) prb_queue is thawed.
 * 2) retire_blk_timer is refreshed.
 *
 */
static void prb_open_block(struct tpacket_kbdq_core *pkc1,
        struct tpacket_block_desc *pbd1)
{
        struct timespec64 ts;
        struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;

        smp_rmb();

        /* We could have just memset this but we will lose the
         * flexibility of making the priv area sticky
         */

        BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
        BLOCK_NUM_PKTS(pbd1) = 0;
        BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);

        ktime_get_real_ts64(&ts);

        h1->ts_first_pkt.ts_sec = ts.tv_sec;
        h1->ts_first_pkt.ts_nsec = ts.tv_nsec;

        pkc1->pkblk_start = (char *)pbd1;
        pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);

        BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
        BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;

        pbd1->version = pkc1->version;
        pkc1->prev = pkc1->nxt_offset;
        pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;

        prb_thaw_queue(pkc1);
        _prb_refresh_rx_retire_blk_timer(pkc1);

        smp_wmb();
}

/*
 * Queue freeze logic:
 * 1) Assume tp_block_nr = 8 blocks.
 * 2) At time 't0', user opens Rx ring.
 * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
 * 4) user-space is either sleeping or processing block '0'.
 * 5) tpacket_rcv is currently filling block '7', since there is no space left,
 *    it will close block-7,loop around and try to fill block '0'.
 *    call-flow:
 *    __packet_lookup_frame_in_block
 *      prb_retire_current_block()
 *      prb_dispatch_next_block()
 *        |->(BLOCK_STATUS == USER) evaluates to true
 *    5.1) Since block-0 is currently in-use, we just freeze the queue.
 * 6) Now there are two cases:
 *    6.1) Link goes idle right after the queue is frozen.
 *         But remember, the last open_block() refreshed the timer.
 *         When this timer expires,it will refresh itself so that we can
 *         re-open block-0 in near future.
 *    6.2) Link is busy and keeps on receiving packets. This is a simple
 *         case and __packet_lookup_frame_in_block will check if block-0
 *         is free and can now be re-used.
 */
static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
                                  struct packet_sock *po)
{
        pkc->reset_pending_on_curr_blk = 1;
        po->stats.stats3.tp_freeze_q_cnt++;
}

#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))

/*
 * If the next block is free then we will dispatch it
 * and return a good offset.
 * Else, we will freeze the queue.
 * So, caller must check the return value.
 */
static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
                struct packet_sock *po)
{
        struct tpacket_block_desc *pbd;

        smp_rmb();

        /* 1. Get current block num */
        pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);

        /* 2. If this block is currently in_use then freeze the queue */
        if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
                prb_freeze_queue(pkc, po);
                return NULL;
        }

        /*
         * 3.
         * open this block and return the offset where the first packet
         * needs to get stored.
         */
        prb_open_block(pkc, pbd);
        return (void *)pkc->nxt_offset;
}

static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
                struct packet_sock *po, unsigned int status)
{
        struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);

        /* retire/close the current block */
        if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
                /*
                 * Plug the case where copy_bits() is in progress on
                 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
                 * have space to copy the pkt in the current block and
                 * called prb_retire_current_block()
                 *
                 * We don't need to worry about the TMO case because
                 * the timer-handler already handled this case.
                 */
                if (!(status & TP_STATUS_BLK_TMO)) {
                        /* Waiting for skb_copy_bits to finish... */
                        write_lock(&pkc->blk_fill_in_prog_lock);
                        write_unlock(&pkc->blk_fill_in_prog_lock);
                }
                prb_close_block(pkc, pbd, po, status);
                return;
        }
}

static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
{
        return TP_STATUS_USER & BLOCK_STATUS(pbd);
}

static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
{
        return pkc->reset_pending_on_curr_blk;
}

static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
        __releases(&pkc->blk_fill_in_prog_lock)
{
        struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);

        read_unlock(&pkc->blk_fill_in_prog_lock);
}

static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
{
        ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
}

static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
{
        ppd->hv1.tp_rxhash = 0;
}

static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
{
        if (skb_vlan_tag_present(pkc->skb)) {
                ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
                ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
                ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
        } else {
                ppd->hv1.tp_vlan_tci = 0;
                ppd->hv1.tp_vlan_tpid = 0;
                ppd->tp_status = TP_STATUS_AVAILABLE;
        }
}

static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
                        struct tpacket3_hdr *ppd)
{
        ppd->hv1.tp_padding = 0;
        prb_fill_vlan_info(pkc, ppd);

        if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
                prb_fill_rxhash(pkc, ppd);
        else
                prb_clear_rxhash(pkc, ppd);
}

static void prb_fill_curr_block(char *curr,
                                struct tpacket_kbdq_core *pkc,
                                struct tpacket_block_desc *pbd,
                                unsigned int len)
        __acquires(&pkc->blk_fill_in_prog_lock)
{
        struct tpacket3_hdr *ppd;

        ppd  = (struct tpacket3_hdr *)curr;
        ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
        pkc->prev = curr;
        pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
        BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
        BLOCK_NUM_PKTS(pbd) += 1;
        read_lock(&pkc->blk_fill_in_prog_lock);
        prb_run_all_ft_ops(pkc, ppd);
}

/* Assumes caller has the sk->rx_queue.lock */
static void *__packet_lookup_frame_in_block(struct packet_sock *po,
                                            struct sk_buff *skb,
                                            unsigned int len
                                            )
{
        struct tpacket_kbdq_core *pkc;
        struct tpacket_block_desc *pbd;
        char *curr, *end;

        pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
        pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);

        /* Queue is frozen when user space is lagging behind */
        if (prb_queue_frozen(pkc)) {
                /*
                 * Check if that last block which caused the queue to freeze,
                 * is still in_use by user-space.
                 */
                if (prb_curr_blk_in_use(pbd)) {
                        /* Can't record this packet */
                        return NULL;
                } else {
                        /*
                         * Ok, the block was released by user-space.
                         * Now let's open that block.
                         * opening a block also thaws the queue.
                         * Thawing is a side effect.
                         */
                        prb_open_block(pkc, pbd);
                }
        }

        smp_mb();
        curr = pkc->nxt_offset;
        pkc->skb = skb;
        end = (char *)pbd + pkc->kblk_size;

        /* first try the current block */
        if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
                prb_fill_curr_block(curr, pkc, pbd, len);
                return (void *)curr;
        }

        /* Ok, close the current block */
        prb_retire_current_block(pkc, po, 0);

        /* Now, try to dispatch the next block */
        curr = (char *)prb_dispatch_next_block(pkc, po);
        if (curr) {
                pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
                prb_fill_curr_block(curr, pkc, pbd, len);
                return (void *)curr;
        }

        /*
         * No free blocks are available.user_space hasn't caught up yet.
         * Queue was just frozen and now this packet will get dropped.
         */
        return NULL;
}

static void *packet_current_rx_frame(struct packet_sock *po,
                                            struct sk_buff *skb,
                                            int status, unsigned int len)
{
        char *curr = NULL;
        switch (po->tp_version) {
        case TPACKET_V1:
        case TPACKET_V2:
                curr = packet_lookup_frame(po, &po->rx_ring,
                                        po->rx_ring.head, status);
                return curr;
        case TPACKET_V3:
                return __packet_lookup_frame_in_block(po, skb, len);
        default:
                WARN(1, "TPACKET version not supported\n");
                BUG();
                return NULL;
        }
}

static void *prb_lookup_block(const struct packet_sock *po,
                              const struct packet_ring_buffer *rb,
                              unsigned int idx,
                              int status)
{
        struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
        struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);

        if (status != BLOCK_STATUS(pbd))
                return NULL;
        return pbd;
}

static int prb_previous_blk_num(struct packet_ring_buffer *rb)
{
        unsigned int prev;
        if (rb->prb_bdqc.kactive_blk_num)
                prev = rb->prb_bdqc.kactive_blk_num-1;
        else
                prev = rb->prb_bdqc.knum_blocks-1;
        return prev;
}

/* Assumes caller has held the rx_queue.lock */
static void *__prb_previous_block(struct packet_sock *po,
                                         struct packet_ring_buffer *rb,
                                         int status)
{
        unsigned int previous = prb_previous_blk_num(rb);
        return prb_lookup_block(po, rb, previous, status);
}

static void *packet_previous_rx_frame(struct packet_sock *po,
                                             struct packet_ring_buffer *rb,
                                             int status)
{
        if (po->tp_version <= TPACKET_V2)
                return packet_previous_frame(po, rb, status);

        return __prb_previous_block(po, rb, status);
}

static void packet_increment_rx_head(struct packet_sock *po,
                                            struct packet_ring_buffer *rb)
{
        switch (po->tp_version) {
        case TPACKET_V1:
        case TPACKET_V2:
                return packet_increment_head(rb);
        case TPACKET_V3:
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
                return;
        }
}

static void *packet_previous_frame(struct packet_sock *po,
                struct packet_ring_buffer *rb,
                int status)
{
        unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
        return packet_lookup_frame(po, rb, previous, status);
}

static void packet_increment_head(struct packet_ring_buffer *buff)
{
        buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
}

static void packet_inc_pending(struct packet_ring_buffer *rb)
{
        this_cpu_inc(*rb->pending_refcnt);
}

static void packet_dec_pending(struct packet_ring_buffer *rb)
{
        this_cpu_dec(*rb->pending_refcnt);
}

static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
{
        unsigned int refcnt = 0;
        int cpu;

        /* We don't use pending refcount in rx_ring. */
        if (rb->pending_refcnt == NULL)
                return 0;

        for_each_possible_cpu(cpu)
                refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);

        return refcnt;
}

static int packet_alloc_pending(struct packet_sock *po)
{
        po->rx_ring.pending_refcnt = NULL;

        po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
        if (unlikely(po->tx_ring.pending_refcnt == NULL))
                return -ENOBUFS;

        return 0;
}

static void packet_free_pending(struct packet_sock *po)
{
        free_percpu(po->tx_ring.pending_refcnt);
}

#define ROOM_POW_OFF        2
#define ROOM_NONE        0x0
#define ROOM_LOW        0x1
#define ROOM_NORMAL        0x2

static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
{
        int idx, len;

        len = READ_ONCE(po->rx_ring.frame_max) + 1;
        idx = READ_ONCE(po->rx_ring.head);
        if (pow_off)
                idx += len >> pow_off;
        if (idx >= len)
                idx -= len;
        return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}

static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
{
        int idx, len;

        len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
        idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
        if (pow_off)
                idx += len >> pow_off;
        if (idx >= len)
                idx -= len;
        return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
}

static int __packet_rcv_has_room(const struct packet_sock *po,
                                 const struct sk_buff *skb)
{
        const struct sock *sk = &po->sk;
        int ret = ROOM_NONE;

        if (po->prot_hook.func != tpacket_rcv) {
                int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
                int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
                                   - (skb ? skb->truesize : 0);

                if (avail > (rcvbuf >> ROOM_POW_OFF))
                        return ROOM_NORMAL;
                else if (avail > 0)
                        return ROOM_LOW;
                else
                        return ROOM_NONE;
        }

        if (po->tp_version == TPACKET_V3) {
                if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
                        ret = ROOM_NORMAL;
                else if (__tpacket_v3_has_room(po, 0))
                        ret = ROOM_LOW;
        } else {
                if (__tpacket_has_room(po, ROOM_POW_OFF))
                        ret = ROOM_NORMAL;
                else if (__tpacket_has_room(po, 0))
                        ret = ROOM_LOW;
        }

        return ret;
}

static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
        bool pressure;
        int ret;

        ret = __packet_rcv_has_room(po, skb);
        pressure = ret != ROOM_NORMAL;

        if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure)
                packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure);

        return ret;
}

static void packet_rcv_try_clear_pressure(struct packet_sock *po)
{
        if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) &&
            __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
                packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false);
}

static void packet_sock_destruct(struct sock *sk)
{
        skb_queue_purge(&sk->sk_error_queue);

        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
        WARN_ON(refcount_read(&sk->sk_wmem_alloc));

        if (!sock_flag(sk, SOCK_DEAD)) {
                pr_err("Attempt to release alive packet socket: %p\n", sk);
                return;
        }
}

static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
{
        u32 *history = po->rollover->history;
        u32 victim, rxhash;
        int i, count = 0;

        rxhash = skb_get_hash(skb);
        for (i = 0; i < ROLLOVER_HLEN; i++)
                if (READ_ONCE(history[i]) == rxhash)
                        count++;

        victim = get_random_u32_below(ROLLOVER_HLEN);

        /* Avoid dirtying the cache line if possible */
        if (READ_ONCE(history[victim]) != rxhash)
                WRITE_ONCE(history[victim], rxhash);

        return count > (ROLLOVER_HLEN >> 1);
}

static unsigned int fanout_demux_hash(struct packet_fanout *f,
                                      struct sk_buff *skb,
                                      unsigned int num)
{
        return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
}

static unsigned int fanout_demux_lb(struct packet_fanout *f,
                                    struct sk_buff *skb,
                                    unsigned int num)
{
        unsigned int val = atomic_inc_return(&f->rr_cur);

        return val % num;
}

static unsigned int fanout_demux_cpu(struct packet_fanout *f,
                                     struct sk_buff *skb,
                                     unsigned int num)
{
        return smp_processor_id() % num;
}

static unsigned int fanout_demux_rnd(struct packet_fanout *f,
                                     struct sk_buff *skb,
                                     unsigned int num)
{
        return get_random_u32_below(num);
}

static unsigned int fanout_demux_rollover(struct packet_fanout *f,
                                          struct sk_buff *skb,
                                          unsigned int idx, bool try_self,
                                          unsigned int num)
{
        struct packet_sock *po, *po_next, *po_skip = NULL;
        unsigned int i, j, room = ROOM_NONE;

        po = pkt_sk(rcu_dereference(f->arr[idx]));

        if (try_self) {
                room = packet_rcv_has_room(po, skb);
                if (room == ROOM_NORMAL ||
                    (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
                        return idx;
                po_skip = po;
        }

        i = j = min_t(int, po->rollover->sock, num - 1);
        do {
                po_next = pkt_sk(rcu_dereference(f->arr[i]));
                if (po_next != po_skip &&
                    !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) &&
                    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
                        if (i != j)
                                po->rollover->sock = i;
                        atomic_long_inc(&po->rollover->num);
                        if (room == ROOM_LOW)
                                atomic_long_inc(&po->rollover->num_huge);
                        return i;
                }

                if (++i == num)
                        i = 0;
        } while (i != j);

        atomic_long_inc(&po->rollover->num_failed);
        return idx;
}

static unsigned int fanout_demux_qm(struct packet_fanout *f,
                                    struct sk_buff *skb,
                                    unsigned int num)
{
        return skb_get_queue_mapping(skb) % num;
}

static unsigned int fanout_demux_bpf(struct packet_fanout *f,
                                     struct sk_buff *skb,
                                     unsigned int num)
{
        struct bpf_prog *prog;
        unsigned int ret = 0;

        rcu_read_lock();
        prog = rcu_dereference(f->bpf_prog);
        if (prog)
                ret = bpf_prog_run_clear_cb(prog, skb) % num;
        rcu_read_unlock();

        return ret;
}

static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
{
        return f->flags & (flag >> 8);
}

static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
                             struct packet_type *pt, struct net_device *orig_dev)
{
        struct packet_fanout *f = pt->af_packet_priv;
        unsigned int num = READ_ONCE(f->num_members);
        struct net *net = read_pnet(&f->net);
        struct packet_sock *po;
        unsigned int idx;

        if (!net_eq(dev_net(dev), net) || !num) {
                kfree_skb(skb);
                return 0;
        }

        if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
                skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
                if (!skb)
                        return 0;
        }
        switch (f->type) {
        case PACKET_FANOUT_HASH:
        default:
                idx = fanout_demux_hash(f, skb, num);
                break;
        case PACKET_FANOUT_LB:
                idx = fanout_demux_lb(f, skb, num);
                break;
        case PACKET_FANOUT_CPU:
                idx = fanout_demux_cpu(f, skb, num);
                break;
        case PACKET_FANOUT_RND:
                idx = fanout_demux_rnd(f, skb, num);
                break;
        case PACKET_FANOUT_QM:
                idx = fanout_demux_qm(f, skb, num);
                break;
        case PACKET_FANOUT_ROLLOVER:
                idx = fanout_demux_rollover(f, skb, 0, false, num);
                break;
        case PACKET_FANOUT_CBPF:
        case PACKET_FANOUT_EBPF:
                idx = fanout_demux_bpf(f, skb, num);
                break;
        }

        if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
                idx = fanout_demux_rollover(f, skb, idx, true, num);

        po = pkt_sk(rcu_dereference(f->arr[idx]));
        return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}

DEFINE_MUTEX(fanout_mutex);
EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
static u16 fanout_next_id;

static void __fanout_link(struct sock *sk, struct packet_sock *po)
{
        struct packet_fanout *f = po->fanout;

        spin_lock(&f->lock);
        rcu_assign_pointer(f->arr[f->num_members], sk);
        smp_wmb();
        f->num_members++;
        if (f->num_members == 1)
                dev_add_pack(&f->prot_hook);
        spin_unlock(&f->lock);
}

static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
{
        struct packet_fanout *f = po->fanout;
        int i;

        spin_lock(&f->lock);
        for (i = 0; i < f->num_members; i++) {
                if (rcu_dereference_protected(f->arr[i],
                                              lockdep_is_held(&f->lock)) == sk)
                        break;
        }
        BUG_ON(i >= f->num_members);
        rcu_assign_pointer(f->arr[i],
                           rcu_dereference_protected(f->arr[f->num_members - 1],
                                                     lockdep_is_held(&f->lock)));
        f->num_members--;
        if (f->num_members == 0)
                __dev_remove_pack(&f->prot_hook);
        spin_unlock(&f->lock);
}

static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
{
        if (sk->sk_family != PF_PACKET)
                return false;

        return ptype->af_packet_priv == pkt_sk(sk)->fanout;
}

static void fanout_init_data(struct packet_fanout *f)
{
        switch (f->type) {
        case PACKET_FANOUT_LB:
                atomic_set(&f->rr_cur, 0);
                break;
        case PACKET_FANOUT_CBPF:
        case PACKET_FANOUT_EBPF:
                RCU_INIT_POINTER(f->bpf_prog, NULL);
                break;
        }
}

static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
{
        struct bpf_prog *old;

        spin_lock(&f->lock);
        old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
        rcu_assign_pointer(f->bpf_prog, new);
        spin_unlock(&f->lock);

        if (old) {
                synchronize_net();
                bpf_prog_destroy(old);
        }
}

static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
                                unsigned int len)
{
        struct bpf_prog *new;
        struct sock_fprog fprog;
        int ret;

        if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
                return -EPERM;

        ret = copy_bpf_fprog_from_user(&fprog, data, len);
        if (ret)
                return ret;

        ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
        if (ret)
                return ret;

        __fanout_set_data_bpf(po->fanout, new);
        return 0;
}

static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
                                unsigned int len)
{
        struct bpf_prog *new;
        u32 fd;

        if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
                return -EPERM;
        if (len != sizeof(fd))
                return -EINVAL;
        if (copy_from_sockptr(&fd, data, len))
                return -EFAULT;

        new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
        if (IS_ERR(new))
                return PTR_ERR(new);

        __fanout_set_data_bpf(po->fanout, new);
        return 0;
}

static int fanout_set_data(struct packet_sock *po, sockptr_t data,
                           unsigned int len)
{
        switch (po->fanout->type) {
        case PACKET_FANOUT_CBPF:
                return fanout_set_data_cbpf(po, data, len);
        case PACKET_FANOUT_EBPF:
                return fanout_set_data_ebpf(po, data, len);
        default:
                return -EINVAL;
        }
}

static void fanout_release_data(struct packet_fanout *f)
{
        switch (f->type) {
        case PACKET_FANOUT_CBPF:
        case PACKET_FANOUT_EBPF:
                __fanout_set_data_bpf(f, NULL);
        }
}

static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
{
        struct packet_fanout *f;

        list_for_each_entry(f, &fanout_list, list) {
                if (f->id == candidate_id &&
                    read_pnet(&f->net) == sock_net(sk)) {
                        return false;
                }
        }
        return true;
}

static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
{
        u16 id = fanout_next_id;

        do {
                if (__fanout_id_is_free(sk, id)) {
                        *new_id = id;
                        fanout_next_id = id + 1;
                        return true;
                }

                id++;
        } while (id != fanout_next_id);

        return false;
}

static int fanout_add(struct sock *sk, struct fanout_args *args)
{
        struct packet_rollover *rollover = NULL;
        struct packet_sock *po = pkt_sk(sk);
        u16 type_flags = args->type_flags;
        struct packet_fanout *f, *match;
        u8 type = type_flags & 0xff;
        u8 flags = type_flags >> 8;
        u16 id = args->id;
        int err;

        switch (type) {
        case PACKET_FANOUT_ROLLOVER:
                if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
                        return -EINVAL;
                break;
        case PACKET_FANOUT_HASH:
        case PACKET_FANOUT_LB:
        case PACKET_FANOUT_CPU:
        case PACKET_FANOUT_RND:
        case PACKET_FANOUT_QM:
        case PACKET_FANOUT_CBPF:
        case PACKET_FANOUT_EBPF:
                break;
        default:
                return -EINVAL;
        }

        mutex_lock(&fanout_mutex);

        err = -EALREADY;
        if (po->fanout)
                goto out;

        if (type == PACKET_FANOUT_ROLLOVER ||
            (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
                err = -ENOMEM;
                rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
                if (!rollover)
                        goto out;
                atomic_long_set(&rollover->num, 0);
                atomic_long_set(&rollover->num_huge, 0);
                atomic_long_set(&rollover->num_failed, 0);
        }

        if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
                if (id != 0) {
                        err = -EINVAL;
                        goto out;
                }
                if (!fanout_find_new_id(sk, &id)) {
                        err = -ENOMEM;
                        goto out;
                }
                /* ephemeral flag for the first socket in the group: drop it */
                flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
        }

        match = NULL;
        list_for_each_entry(f, &fanout_list, list) {
                if (f->id == id &&
                    read_pnet(&f->net) == sock_net(sk)) {
                        match = f;
                        break;
                }
        }
        err = -EINVAL;
        if (match) {
                if (match->flags != flags)
                        goto out;
                if (args->max_num_members &&
                    args->max_num_members != match->max_num_members)
                        goto out;
        } else {
                if (args->max_num_members > PACKET_FANOUT_MAX)
                        goto out;
                if (!args->max_num_members)
                        /* legacy PACKET_FANOUT_MAX */
                        args->max_num_members = 256;
                err = -ENOMEM;
                match = kvzalloc(struct_size(match, arr, args->max_num_members),
                                 GFP_KERNEL);
                if (!match)
                        goto out;
                write_pnet(&match->net, sock_net(sk));
                match->id = id;
                match->type = type;
                match->flags = flags;
                INIT_LIST_HEAD(&match->list);
                spin_lock_init(&match->lock);
                refcount_set(&match->sk_ref, 0);
                fanout_init_data(match);
                match->prot_hook.type = po->prot_hook.type;
                match->prot_hook.dev = po->prot_hook.dev;
                match->prot_hook.func = packet_rcv_fanout;
                match->prot_hook.af_packet_priv = match;
                match->prot_hook.af_packet_net = read_pnet(&match->net);
                match->prot_hook.id_match = match_fanout_group;
                match->max_num_members = args->max_num_members;
                match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING;
                list_add(&match->list, &fanout_list);
        }
        err = -EINVAL;

        spin_lock(&po->bind_lock);
        if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
            match->type == type &&
            match->prot_hook.type == po->prot_hook.type &&
            match->prot_hook.dev == po->prot_hook.dev) {
                err = -ENOSPC;
                if (refcount_read(&match->sk_ref) < match->max_num_members) {
                        __dev_remove_pack(&po->prot_hook);

                        /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
                        WRITE_ONCE(po->fanout, match);

                        po->rollover = rollover;
                        rollover = NULL;
                        refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
                        __fanout_link(sk, po);
                        err = 0;
                }
        }
        spin_unlock(&po->bind_lock);

        if (err && !refcount_read(&match->sk_ref)) {
                list_del(&match->list);
                kvfree(match);
        }

out:
        kfree(rollover);
        mutex_unlock(&fanout_mutex);
        return err;
}

/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
 * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
 * It is the responsibility of the caller to call fanout_release_data() and
 * free the returned packet_fanout (after synchronize_net())
 */
static struct packet_fanout *fanout_release(struct sock *sk)
{
        struct packet_sock *po = pkt_sk(sk);
        struct packet_fanout *f;

        mutex_lock(&fanout_mutex);
        f = po->fanout;
        if (f) {
                po->fanout = NULL;

                if (refcount_dec_and_test(&f->sk_ref))
                        list_del(&f->list);
                else
                        f = NULL;
        }
        mutex_unlock(&fanout_mutex);

        return f;
}

static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
                                          struct sk_buff *skb)
{
        /* Earlier code assumed this would be a VLAN pkt, double-check
         * this now that we have the actual packet in hand. We can only
         * do this check on Ethernet devices.
         */
        if (unlikely(dev->type != ARPHRD_ETHER))
                return false;

        skb_reset_mac_header(skb);
        return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
}

static const struct proto_ops packet_ops;

static const struct proto_ops packet_ops_spkt;

static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
                           struct packet_type *pt, struct net_device *orig_dev)
{
        struct sock *sk;
        struct sockaddr_pkt *spkt;

        /*
         *        When we registered the protocol we saved the socket in the data
         *        field for just this event.
         */

        sk = pt->af_packet_priv;

        /*
         *        Yank back the headers [hope the device set this
         *        right or kerboom...]
         *
         *        Incoming packets have ll header pulled,
         *        push it back.
         *
         *        For outgoing ones skb->data == skb_mac_header(skb)
         *        so that this procedure is noop.
         */

        if (skb->pkt_type == PACKET_LOOPBACK)
                goto out;

        if (!net_eq(dev_net(dev), sock_net(sk)))
                goto out;

        skb = skb_share_check(skb, GFP_ATOMIC);
        if (skb == NULL)
                goto oom;

        /* drop any routing info */
        skb_dst_drop(skb);

        /* drop conntrack reference */
        nf_reset_ct(skb);

        spkt = &PACKET_SKB_CB(skb)->sa.pkt;

        skb_push(skb, skb->data - skb_mac_header(skb));

        /*
         *        The SOCK_PACKET socket receives _all_ frames.
         */

        spkt->spkt_family = dev->type;
        strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
        spkt->spkt_protocol = skb->protocol;

        /*
         *        Charge the memory to the socket. This is done specifically
         *        to prevent sockets using all the memory up.
         */

        if (sock_queue_rcv_skb(sk, skb) == 0)
                return 0;

out:
        kfree_skb(skb);
oom:
        return 0;
}

static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
        int depth;

        if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
            sock->type == SOCK_RAW) {
                skb_reset_mac_header(skb);
                skb->protocol = dev_parse_header_protocol(skb);
        }

        /* Move network header to the right position for VLAN tagged packets */
        if (likely(skb->dev->type == ARPHRD_ETHER) &&
            eth_type_vlan(skb->protocol) &&
            vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
                skb_set_network_header(skb, depth);

        skb_probe_transport_header(skb);
}

/*
 *        Output a raw packet to a device layer. This bypasses all the other
 *        protocol layers and you must therefore supply it with a complete frame
 */

static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
                               size_t len)
{
        struct sock *sk = sock->sk;
        DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
        struct sk_buff *skb = NULL;
        struct net_device *dev;
        struct sockcm_cookie sockc;
        __be16 proto = 0;
        int err;
        int extra_len = 0;

        /*
         *        Get and verify the address.
         */

        if (saddr) {
                if (msg->msg_namelen < sizeof(struct sockaddr))
                        return -EINVAL;
                if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
                        proto = saddr->spkt_protocol;
        } else
                return -ENOTCONN;        /* SOCK_PACKET must be sent giving an address */

        /*
         *        Find the device first to size check it
         */

        saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
retry:
        rcu_read_lock();
        dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
        err = -ENODEV;
        if (dev == NULL)
                goto out_unlock;

        err = -ENETDOWN;
        if (!(dev->flags & IFF_UP))
                goto out_unlock;

        /*
         * You may not queue a frame bigger than the mtu. This is the lowest level
         * raw protocol and you must do your own fragmentation at this level.
         */

        if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
                if (!netif_supports_nofcs(dev)) {
                        err = -EPROTONOSUPPORT;
                        goto out_unlock;
                }
                extra_len = 4; /* We're doing our own CRC */
        }

        err = -EMSGSIZE;
        if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
                goto out_unlock;

        if (!skb) {
                size_t reserved = LL_RESERVED_SPACE(dev);
                int tlen = dev->needed_tailroom;
                unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;

                rcu_read_unlock();
                skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
                if (skb == NULL)
                        return -ENOBUFS;
                /* FIXME: Save some space for broken drivers that write a hard
                 * header at transmission time by themselves. PPP is the notable
                 * one here. This should really be fixed at the driver level.
                 */
                skb_reserve(skb, reserved);
                skb_reset_network_header(skb);

                /* Try to align data part correctly */
                if (hhlen) {
                        skb->data -= hhlen;
                        skb->tail -= hhlen;
                        if (len < hhlen)
                                skb_reset_network_header(skb);
                }
                err = memcpy_from_msg(skb_put(skb, len), msg, len);
                if (err)
                        goto out_free;
                goto retry;
        }

        if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
                err = -EINVAL;
                goto out_unlock;
        }
        if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
            !packet_extra_vlan_len_allowed(dev, skb)) {
                err = -EMSGSIZE;
                goto out_unlock;
        }

        sockcm_init(&sockc, sk);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
                if (unlikely(err))
                        goto out_unlock;
        }

        skb->protocol = proto;
        skb->dev = dev;
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = READ_ONCE(sk->sk_mark);
        skb->tstamp = sockc.transmit_time;

        skb_setup_tx_timestamp(skb, sockc.tsflags);

        if (unlikely(extra_len == 4))
                skb->no_fcs = 1;

        packet_parse_headers(skb, sock);

        dev_queue_xmit(skb);
        rcu_read_unlock();
        return len;

out_unlock:
        rcu_read_unlock();
out_free:
        kfree_skb(skb);
        return err;
}

static unsigned int run_filter(struct sk_buff *skb,
                               const struct sock *sk,
                               unsigned int res)
{
        struct sk_filter *filter;

        rcu_read_lock();
        filter = rcu_dereference(sk->sk_filter);
        if (filter != NULL)
                res = bpf_prog_run_clear_cb(filter->prog, skb);
        rcu_read_unlock();

        return res;
}

static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
                           size_t *len, int vnet_hdr_sz)
{
        struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 };

        if (*len < vnet_hdr_sz)
                return -EINVAL;
        *len -= vnet_hdr_sz;

        if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0))
                return -EINVAL;

        return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz);
}

/*
 * This function makes lazy skb cloning in hope that most of packets
 * are discarded by BPF.
 *
 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
 * and skb->cb are mangled. It works because (and until) packets
 * falling here are owned by current CPU. Output packets are cloned
 * by dev_queue_xmit_nit(), input packets are processed by net_bh
 * sequentially, so that if we return skb to original state on exit,
 * we will not harm anyone.
 */

static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
                      struct packet_type *pt, struct net_device *orig_dev)
{
        enum skb_drop_reason drop_reason = SKB_CONSUMED;
        struct sock *sk;
        struct sockaddr_ll *sll;
        struct packet_sock *po;
        u8 *skb_head = skb->data;
        int skb_len = skb->len;
        unsigned int snaplen, res;

        if (skb->pkt_type == PACKET_LOOPBACK)
                goto drop;

        sk = pt->af_packet_priv;
        po = pkt_sk(sk);

        if (!net_eq(dev_net(dev), sock_net(sk)))
                goto drop;

        skb->dev = dev;

        if (dev_has_header(dev)) {
                /* The device has an explicit notion of ll header,
                 * exported to higher levels.
                 *
                 * Otherwise, the device hides details of its frame
                 * structure, so that corresponding packet head is
                 * never delivered to user.
                 */
                if (sk->sk_type != SOCK_DGRAM)
                        skb_push(skb, skb->data - skb_mac_header(skb));
                else if (skb->pkt_type == PACKET_OUTGOING) {
                        /* Special case: outgoing packets have ll header at head */
                        skb_pull(skb, skb_network_offset(skb));
                }
        }

        snaplen = skb->len;

        res = run_filter(skb, sk, snaplen);
        if (!res)
                goto drop_n_restore;
        if (snaplen > res)
                snaplen = res;

        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
                goto drop_n_acct;

        if (skb_shared(skb)) {
                struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
                if (nskb == NULL)
                        goto drop_n_acct;

                if (skb_head != skb->data) {
                        skb->data = skb_head;
                        skb->len = skb_len;
                }
                consume_skb(skb);
                skb = nskb;
        }

        sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);

        sll = &PACKET_SKB_CB(skb)->sa.ll;
        sll->sll_hatype = dev->type;
        sll->sll_pkttype = skb->pkt_type;
        if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
                sll->sll_ifindex = orig_dev->ifindex;
        else
                sll->sll_ifindex = dev->ifindex;

        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);

        /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
         * Use their space for storing the original skb length.
         */
        PACKET_SKB_CB(skb)->sa.origlen = skb->len;

        if (pskb_trim(skb, snaplen))
                goto drop_n_acct;

        skb_set_owner_r(skb, sk);
        skb->dev = NULL;
        skb_dst_drop(skb);

        /* drop conntrack reference */
        nf_reset_ct(skb);

        spin_lock(&sk->sk_receive_queue.lock);
        po->stats.stats1.tp_packets++;
        sock_skb_set_dropcount(sk, skb);
        skb_clear_delivery_time(skb);
        __skb_queue_tail(&sk->sk_receive_queue, skb);
        spin_unlock(&sk->sk_receive_queue.lock);
        sk->sk_data_ready(sk);
        return 0;

drop_n_acct:
        atomic_inc(&po->tp_drops);
        atomic_inc(&sk->sk_drops);
        drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;

drop_n_restore:
        if (skb_head != skb->data && skb_shared(skb)) {
                skb->data = skb_head;
                skb->len = skb_len;
        }
drop:
        kfree_skb_reason(skb, drop_reason);
        return 0;
}

static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
                       struct packet_type *pt, struct net_device *orig_dev)
{
        enum skb_drop_reason drop_reason = SKB_CONSUMED;
        struct sock *sk;
        struct packet_sock *po;
        struct sockaddr_ll *sll;
        union tpacket_uhdr h;
        u8 *skb_head = skb->data;
        int skb_len = skb->len;
        unsigned int snaplen, res;
        unsigned long status = TP_STATUS_USER;
        unsigned short macoff, hdrlen;
        unsigned int netoff;
        struct sk_buff *copy_skb = NULL;
        struct timespec64 ts;
        __u32 ts_status;
        unsigned int slot_id = 0;
        int vnet_hdr_sz = 0;

        /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
         * We may add members to them until current aligned size without forcing
         * userspace to call getsockopt(..., PACKET_HDRLEN, ...).
         */
        BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
        BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);

        if (skb->pkt_type == PACKET_LOOPBACK)
                goto drop;

        sk = pt->af_packet_priv;
        po = pkt_sk(sk);

        if (!net_eq(dev_net(dev), sock_net(sk)))
                goto drop;

        if (dev_has_header(dev)) {
                if (sk->sk_type != SOCK_DGRAM)
                        skb_push(skb, skb->data - skb_mac_header(skb));
                else if (skb->pkt_type == PACKET_OUTGOING) {
                        /* Special case: outgoing packets have ll header at head */
                        skb_pull(skb, skb_network_offset(skb));
                }
        }

        snaplen = skb->len;

        res = run_filter(skb, sk, snaplen);
        if (!res)
                goto drop_n_restore;

        /* If we are flooded, just give up */
        if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
                atomic_inc(&po->tp_drops);
                goto drop_n_restore;
        }

        if (skb->ip_summed == CHECKSUM_PARTIAL)
                status |= TP_STATUS_CSUMNOTREADY;
        else if (skb->pkt_type != PACKET_OUTGOING &&
                 skb_csum_unnecessary(skb))
                status |= TP_STATUS_CSUM_VALID;
        if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
                status |= TP_STATUS_GSO_TCP;

        if (snaplen > res)
                snaplen = res;

        if (sk->sk_type == SOCK_DGRAM) {
                macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
                                  po->tp_reserve;
        } else {
                unsigned int maclen = skb_network_offset(skb);
                netoff = TPACKET_ALIGN(po->tp_hdrlen +
                                       (maclen < 16 ? 16 : maclen)) +
                                       po->tp_reserve;
                vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
                if (vnet_hdr_sz)
                        netoff += vnet_hdr_sz;
                macoff = netoff - maclen;
        }
        if (netoff > USHRT_MAX) {
                atomic_inc(&po->tp_drops);
                goto drop_n_restore;
        }
        if (po->tp_version <= TPACKET_V2) {
                if (macoff + snaplen > po->rx_ring.frame_size) {
                        if (READ_ONCE(po->copy_thresh) &&
                            atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
                                if (skb_shared(skb)) {
                                        copy_skb = skb_clone(skb, GFP_ATOMIC);
                                } else {
                                        copy_skb = skb_get(skb);
                                        skb_head = skb->data;
                                }
                                if (copy_skb) {
                                        memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
                                               sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
                                        skb_set_owner_r(copy_skb, sk);
                                }
                        }
                        snaplen = po->rx_ring.frame_size - macoff;
                        if ((int)snaplen < 0) {
                                snaplen = 0;
                                vnet_hdr_sz = 0;
                        }
                }
        } else if (unlikely(macoff + snaplen >
                            GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
                u32 nval;

                nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
                pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
                            snaplen, nval, macoff);
                snaplen = nval;
                if (unlikely((int)snaplen < 0)) {
                        snaplen = 0;
                        macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
                        vnet_hdr_sz = 0;
                }
        }
        spin_lock(&sk->sk_receive_queue.lock);
        h.raw = packet_current_rx_frame(po, skb,
                                        TP_STATUS_KERNEL, (macoff+snaplen));
        if (!h.raw)
                goto drop_n_account;

        if (po->tp_version <= TPACKET_V2) {
                slot_id = po->rx_ring.head;
                if (test_bit(slot_id, po->rx_ring.rx_owner_map))
                        goto drop_n_account;
                __set_bit(slot_id, po->rx_ring.rx_owner_map);
        }

        if (vnet_hdr_sz &&
            virtio_net_hdr_from_skb(skb, h.raw + macoff -
                                    sizeof(struct virtio_net_hdr),
                                    vio_le(), true, 0)) {
                if (po->tp_version == TPACKET_V3)
                        prb_clear_blk_fill_status(&po->rx_ring);
                goto drop_n_account;
        }

        if (po->tp_version <= TPACKET_V2) {
                packet_increment_rx_head(po, &po->rx_ring);
        /*
         * LOSING will be reported till you read the stats,
         * because it's COR - Clear On Read.
         * Anyways, moving it for V1/V2 only as V3 doesn't need this
         * at packet level.
         */
                if (atomic_read(&po->tp_drops))
                        status |= TP_STATUS_LOSING;
        }

        po->stats.stats1.tp_packets++;
        if (copy_skb) {
                status |= TP_STATUS_COPY;
                skb_clear_delivery_time(copy_skb);
                __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
        }
        spin_unlock(&sk->sk_receive_queue.lock);

        skb_copy_bits(skb, 0, h.raw + macoff, snaplen);

        /* Always timestamp; prefer an existing software timestamp taken
         * closer to the time of capture.
         */
        ts_status = tpacket_get_timestamp(skb, &ts,
                                          READ_ONCE(po->tp_tstamp) |
                                          SOF_TIMESTAMPING_SOFTWARE);
        if (!ts_status)
                ktime_get_real_ts64(&ts);

        status |= ts_status;

        switch (po->tp_version) {
        case TPACKET_V1:
                h.h1->tp_len = skb->len;
                h.h1->tp_snaplen = snaplen;
                h.h1->tp_mac = macoff;
                h.h1->tp_net = netoff;
                h.h1->tp_sec = ts.tv_sec;
                h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
                hdrlen = sizeof(*h.h1);
                break;
        case TPACKET_V2:
                h.h2->tp_len = skb->len;
                h.h2->tp_snaplen = snaplen;
                h.h2->tp_mac = macoff;
                h.h2->tp_net = netoff;
                h.h2->tp_sec = ts.tv_sec;
                h.h2->tp_nsec = ts.tv_nsec;
                if (skb_vlan_tag_present(skb)) {
                        h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
                        h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
                        status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
                } else {
                        h.h2->tp_vlan_tci = 0;
                        h.h2->tp_vlan_tpid = 0;
                }
                memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
                hdrlen = sizeof(*h.h2);
                break;
        case TPACKET_V3:
                /* tp_nxt_offset,vlan are already populated above.
                 * So DONT clear those fields here
                 */
                h.h3->tp_status |= status;
                h.h3->tp_len = skb->len;
                h.h3->tp_snaplen = snaplen;
                h.h3->tp_mac = macoff;
                h.h3->tp_net = netoff;
                h.h3->tp_sec  = ts.tv_sec;
                h.h3->tp_nsec = ts.tv_nsec;
                memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
                hdrlen = sizeof(*h.h3);
                break;
        default:
                BUG();
        }

        sll = h.raw + TPACKET_ALIGN(hdrlen);
        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
        sll->sll_family = AF_PACKET;
        sll->sll_hatype = dev->type;
        sll->sll_protocol = skb->protocol;
        sll->sll_pkttype = skb->pkt_type;
        if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
                sll->sll_ifindex = orig_dev->ifindex;
        else
                sll->sll_ifindex = dev->ifindex;

        smp_mb();

#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
        if (po->tp_version <= TPACKET_V2) {
                u8 *start, *end;

                end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
                                        macoff + snaplen);

                for (start = h.raw; start < end; start += PAGE_SIZE)
                        flush_dcache_page(pgv_to_page(start));
        }
        smp_wmb();
#endif

        if (po->tp_version <= TPACKET_V2) {
                spin_lock(&sk->sk_receive_queue.lock);
                __packet_set_status(po, h.raw, status);
                __clear_bit(slot_id, po->rx_ring.rx_owner_map);
                spin_unlock(&sk->sk_receive_queue.lock);
                sk->sk_data_ready(sk);
        } else if (po->tp_version == TPACKET_V3) {
                prb_clear_blk_fill_status(&po->rx_ring);
        }

drop_n_restore:
        if (skb_head != skb->data && skb_shared(skb)) {
                skb->data = skb_head;
                skb->len = skb_len;
        }
drop:
        kfree_skb_reason(skb, drop_reason);
        return 0;

drop_n_account:
        spin_unlock(&sk->sk_receive_queue.lock);
        atomic_inc(&po->tp_drops);
        drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;

        sk->sk_data_ready(sk);
        kfree_skb_reason(copy_skb, drop_reason);
        goto drop_n_restore;
}

static void tpacket_destruct_skb(struct sk_buff *skb)
{
        struct packet_sock *po = pkt_sk(skb->sk);

        if (likely(po->tx_ring.pg_vec)) {
                void *ph;
                __u32 ts;

                ph = skb_zcopy_get_nouarg(skb);
                packet_dec_pending(&po->tx_ring);

                ts = __packet_set_timestamp(po, ph, skb);
                __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);

                if (!packet_read_pending(&po->tx_ring))
                        complete(&po->skb_completion);
        }

        sock_wfree(skb);
}

static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
{
        if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
            (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
             __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
              __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
                vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
                         __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
                        __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);

        if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
                return -EINVAL;

        return 0;
}

static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
                                 struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz)
{
        int ret;

        if (*len < vnet_hdr_sz)
                return -EINVAL;
        *len -= vnet_hdr_sz;

        if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
                return -EFAULT;

        ret = __packet_snd_vnet_parse(vnet_hdr, *len);
        if (ret)
                return ret;

        /* move iter to point to the start of mac header */
        if (vnet_hdr_sz != sizeof(struct virtio_net_hdr))
                iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr));

        return 0;
}

static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
                void *frame, struct net_device *dev, void *data, int tp_len,
                __be16 proto, unsigned char *addr, int hlen, int copylen,
                const struct sockcm_cookie *sockc)
{
        union tpacket_uhdr ph;
        int to_write, offset, len, nr_frags, len_max;
        struct socket *sock = po->sk.sk_socket;
        struct page *page;
        int err;

        ph.raw = frame;

        skb->protocol = proto;
        skb->dev = dev;
        skb->priority = READ_ONCE(po->sk.sk_priority);
        skb->mark = READ_ONCE(po->sk.sk_mark);
        skb->tstamp = sockc->transmit_time;
        skb_setup_tx_timestamp(skb, sockc->tsflags);
        skb_zcopy_set_nouarg(skb, ph.raw);

        skb_reserve(skb, hlen);
        skb_reset_network_header(skb);

        to_write = tp_len;

        if (sock->type == SOCK_DGRAM) {
                err = dev_hard_header(skb, dev, ntohs(proto), addr,
                                NULL, tp_len);
                if (unlikely(err < 0))
                        return -EINVAL;
        } else if (copylen) {
                int hdrlen = min_t(int, copylen, tp_len);

                skb_push(skb, dev->hard_header_len);
                skb_put(skb, copylen - dev->hard_header_len);
                err = skb_store_bits(skb, 0, data, hdrlen);
                if (unlikely(err))
                        return err;
                if (!dev_validate_header(dev, skb->data, hdrlen))
                        return -EINVAL;

                data += hdrlen;
                to_write -= hdrlen;
        }

        offset = offset_in_page(data);
        len_max = PAGE_SIZE - offset;
        len = ((to_write > len_max) ? len_max : to_write);

        skb->data_len = to_write;
        skb->len += to_write;
        skb->truesize += to_write;
        refcount_add(to_write, &po->sk.sk_wmem_alloc);

        while (likely(to_write)) {
                nr_frags = skb_shinfo(skb)->nr_frags;

                if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
                        pr_err("Packet exceed the number of skb frags(%u)\n",
                               (unsigned int)MAX_SKB_FRAGS);
                        return -EFAULT;
                }

                page = pgv_to_page(data);
                data += len;
                flush_dcache_page(page);
                get_page(page);
                skb_fill_page_desc(skb, nr_frags, page, offset, len);
                to_write -= len;
                offset = 0;
                len_max = PAGE_SIZE;
                len = ((to_write > len_max) ? len_max : to_write);
        }

        packet_parse_headers(skb, sock);

        return tp_len;
}

static int tpacket_parse_header(struct packet_sock *po, void *frame,
                                int size_max, void **data)
{
        union tpacket_uhdr ph;
        int tp_len, off;

        ph.raw = frame;

        switch (po->tp_version) {
        case TPACKET_V3:
                if (ph.h3->tp_next_offset != 0) {
                        pr_warn_once("variable sized slot not supported");
                        return -EINVAL;
                }
                tp_len = ph.h3->tp_len;
                break;
        case TPACKET_V2:
                tp_len = ph.h2->tp_len;
                break;
        default:
                tp_len = ph.h1->tp_len;
                break;
        }
        if (unlikely(tp_len > size_max)) {
                pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
                return -EMSGSIZE;
        }

        if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) {
                int off_min, off_max;

                off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
                off_max = po->tx_ring.frame_size - tp_len;
                if (po->sk.sk_type == SOCK_DGRAM) {
                        switch (po->tp_version) {
                        case TPACKET_V3:
                                off = ph.h3->tp_net;
                                break;
                        case TPACKET_V2:
                                off = ph.h2->tp_net;
                                break;
                        default:
                                off = ph.h1->tp_net;
                                break;
                        }
                } else {
                        switch (po->tp_version) {
                        case TPACKET_V3:
                                off = ph.h3->tp_mac;
                                break;
                        case TPACKET_V2:
                                off = ph.h2->tp_mac;
                                break;
                        default:
                                off = ph.h1->tp_mac;
                                break;
                        }
                }
                if (unlikely((off < off_min) || (off_max < off)))
                        return -EINVAL;
        } else {
                off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
        }

        *data = frame + off;
        return tp_len;
}

static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
        struct sk_buff *skb = NULL;
        struct net_device *dev;
        struct virtio_net_hdr *vnet_hdr = NULL;
        struct sockcm_cookie sockc;
        __be16 proto;
        int err, reserve = 0;
        void *ph;
        DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
        bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
        int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
        unsigned char *addr = NULL;
        int tp_len, size_max;
        void *data;
        int len_sum = 0;
        int status = TP_STATUS_AVAILABLE;
        int hlen, tlen, copylen = 0;
        long timeo = 0;

        mutex_lock(&po->pg_vec_lock);

        /* packet_sendmsg() check on tx_ring.pg_vec was lockless,
         * we need to confirm it under protection of pg_vec_lock.
         */
        if (unlikely(!po->tx_ring.pg_vec)) {
                err = -EBUSY;
                goto out;
        }
        if (likely(saddr == NULL)) {
                dev        = packet_cached_dev_get(po);
                proto        = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
                        goto out;
                if (msg->msg_namelen < (saddr->sll_halen
                                        + offsetof(struct sockaddr_ll,
                                                sll_addr)))
                        goto out;
                proto        = saddr->sll_protocol;
                dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
                if (po->sk.sk_socket->type == SOCK_DGRAM) {
                        if (dev && msg->msg_namelen < dev->addr_len +
                                   offsetof(struct sockaddr_ll, sll_addr))
                                goto out_put;
                        addr = saddr->sll_addr;
                }
        }

        err = -ENXIO;
        if (unlikely(dev == NULL))
                goto out;
        err = -ENETDOWN;
        if (unlikely(!(dev->flags & IFF_UP)))
                goto out_put;

        sockcm_init(&sockc, &po->sk);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(&po->sk, msg, &sockc);
                if (unlikely(err))
                        goto out_put;
        }

        if (po->sk.sk_socket->type == SOCK_RAW)
                reserve = dev->hard_header_len;
        size_max = po->tx_ring.frame_size
                - (po->tp_hdrlen - sizeof(struct sockaddr_ll));

        if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
                size_max = dev->mtu + reserve + VLAN_HLEN;

        reinit_completion(&po->skb_completion);

        do {
                ph = packet_current_frame(po, &po->tx_ring,
                                          TP_STATUS_SEND_REQUEST);
                if (unlikely(ph == NULL)) {
                        if (need_wait && skb) {
                                timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
                                timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
                                if (timeo <= 0) {
                                        err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
                                        goto out_put;
                                }
                        }
                        /* check for additional frames */
                        continue;
                }

                skb = NULL;
                tp_len = tpacket_parse_header(po, ph, size_max, &data);
                if (tp_len < 0)
                        goto tpacket_error;

                status = TP_STATUS_SEND_REQUEST;
                hlen = LL_RESERVED_SPACE(dev);
                tlen = dev->needed_tailroom;
                if (vnet_hdr_sz) {
                        vnet_hdr = data;
                        data += vnet_hdr_sz;
                        tp_len -= vnet_hdr_sz;
                        if (tp_len < 0 ||
                            __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
                                tp_len = -EINVAL;
                                goto tpacket_error;
                        }
                        copylen = __virtio16_to_cpu(vio_le(),
                                                    vnet_hdr->hdr_len);
                }
                copylen = max_t(int, copylen, dev->hard_header_len);
                skb = sock_alloc_send_skb(&po->sk,
                                hlen + tlen + sizeof(struct sockaddr_ll) +
                                (copylen - dev->hard_header_len),
                                !need_wait, &err);

                if (unlikely(skb == NULL)) {
                        /* we assume the socket was initially writeable ... */
                        if (likely(len_sum > 0))
                                err = len_sum;
                        goto out_status;
                }
                tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
                                          addr, hlen, copylen, &sockc);
                if (likely(tp_len >= 0) &&
                    tp_len > dev->mtu + reserve &&
                    !vnet_hdr_sz &&
                    !packet_extra_vlan_len_allowed(dev, skb))
                        tp_len = -EMSGSIZE;

                if (unlikely(tp_len < 0)) {
tpacket_error:
                        if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) {
                                __packet_set_status(po, ph,
                                                TP_STATUS_AVAILABLE);
                                packet_increment_head(&po->tx_ring);
                                kfree_skb(skb);
                                continue;
                        } else {
                                status = TP_STATUS_WRONG_FORMAT;
                                err = tp_len;
                                goto out_status;
                        }
                }

                if (vnet_hdr_sz) {
                        if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
                                tp_len = -EINVAL;
                                goto tpacket_error;
                        }
                        virtio_net_hdr_set_proto(skb, vnet_hdr);
                }

                skb->destructor = tpacket_destruct_skb;
                __packet_set_status(po, ph, TP_STATUS_SENDING);
                packet_inc_pending(&po->tx_ring);

                status = TP_STATUS_SEND_REQUEST;
                err = packet_xmit(po, skb);
                if (unlikely(err != 0)) {
                        if (err > 0)
                                err = net_xmit_errno(err);
                        if (err && __packet_get_status(po, ph) ==
                                   TP_STATUS_AVAILABLE) {
                                /* skb was destructed already */
                                skb = NULL;
                                goto out_status;
                        }
                        /*
                         * skb was dropped but not destructed yet;
                         * let's treat it like congestion or err < 0
                         */
                        err = 0;
                }
                packet_increment_head(&po->tx_ring);
                len_sum += tp_len;
        } while (likely((ph != NULL) ||
                /* Note: packet_read_pending() might be slow if we have
                 * to call it as it's per_cpu variable, but in fast-path
                 * we already short-circuit the loop with the first
                 * condition, and luckily don't have to go that path
                 * anyway.
                 */
                 (need_wait && packet_read_pending(&po->tx_ring))));

        err = len_sum;
        goto out_put;

out_status:
        __packet_set_status(po, ph, status);
        kfree_skb(skb);
out_put:
        dev_put(dev);
out:
        mutex_unlock(&po->pg_vec_lock);
        return err;
}

static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
                                        size_t reserve, size_t len,
                                        size_t linear, int noblock,
                                        int *err)
{
        struct sk_buff *skb;

        /* Under a page?  Don't bother with paged skb. */
        if (prepad + len < PAGE_SIZE || !linear)
                linear = len;

        if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
                linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
        skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
                                   err, PAGE_ALLOC_COSTLY_ORDER);
        if (!skb)
                return NULL;

        skb_reserve(skb, reserve);
        skb_put(skb, linear);
        skb->data_len = len - linear;
        skb->len += len - linear;

        return skb;
}

static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
{
        struct sock *sk = sock->sk;
        DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
        struct sk_buff *skb;
        struct net_device *dev;
        __be16 proto;
        unsigned char *addr = NULL;
        int err, reserve = 0;
        struct sockcm_cookie sockc;
        struct virtio_net_hdr vnet_hdr = { 0 };
        int offset = 0;
        struct packet_sock *po = pkt_sk(sk);
        int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
        int hlen, tlen, linear;
        int extra_len = 0;

        /*
         *        Get and verify the address.
         */

        if (likely(saddr == NULL)) {
                dev        = packet_cached_dev_get(po);
                proto        = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
                        goto out;
                if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
                        goto out;
                proto        = saddr->sll_protocol;
                dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
                if (sock->type == SOCK_DGRAM) {
                        if (dev && msg->msg_namelen < dev->addr_len +
                                   offsetof(struct sockaddr_ll, sll_addr))
                                goto out_unlock;
                        addr = saddr->sll_addr;
                }
        }

        err = -ENXIO;
        if (unlikely(dev == NULL))
                goto out_unlock;
        err = -ENETDOWN;
        if (unlikely(!(dev->flags & IFF_UP)))
                goto out_unlock;

        sockcm_init(&sockc, sk);
        sockc.mark = READ_ONCE(sk->sk_mark);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
                if (unlikely(err))
                        goto out_unlock;
        }

        if (sock->type == SOCK_RAW)
                reserve = dev->hard_header_len;
        if (vnet_hdr_sz) {
                err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz);
                if (err)
                        goto out_unlock;
        }

        if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
                if (!netif_supports_nofcs(dev)) {
                        err = -EPROTONOSUPPORT;
                        goto out_unlock;
                }
                extra_len = 4; /* We're doing our own CRC */
        }

        err = -EMSGSIZE;
        if (!vnet_hdr.gso_type &&
            (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
                goto out_unlock;

        err = -ENOBUFS;
        hlen = LL_RESERVED_SPACE(dev);
        tlen = dev->needed_tailroom;
        linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
        linear = max(linear, min_t(int, len, dev->hard_header_len));
        skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
                               msg->msg_flags & MSG_DONTWAIT, &err);
        if (skb == NULL)
                goto out_unlock;

        skb_reset_network_header(skb);

        err = -EINVAL;
        if (sock->type == SOCK_DGRAM) {
                offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
                if (unlikely(offset < 0))
                        goto out_free;
        } else if (reserve) {
                skb_reserve(skb, -reserve);
                if (len < reserve + sizeof(struct ipv6hdr) &&
                    dev->min_header_len != dev->hard_header_len)
                        skb_reset_network_header(skb);
        }

        /* Returns -EFAULT on error */
        err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
        if (err)
                goto out_free;

        if ((sock->type == SOCK_RAW &&
             !dev_validate_header(dev, skb->data, len)) || !skb->len) {
                err = -EINVAL;
                goto out_free;
        }

        skb_setup_tx_timestamp(skb, sockc.tsflags);

        if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
            !packet_extra_vlan_len_allowed(dev, skb)) {
                err = -EMSGSIZE;
                goto out_free;
        }

        skb->protocol = proto;
        skb->dev = dev;
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc.mark;
        skb->tstamp = sockc.transmit_time;

        if (unlikely(extra_len == 4))
                skb->no_fcs = 1;

        packet_parse_headers(skb, sock);

        if (vnet_hdr_sz) {
                err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
                if (err)
                        goto out_free;
                len += vnet_hdr_sz;
                virtio_net_hdr_set_proto(skb, &vnet_hdr);
        }

        err = packet_xmit(po, skb);

        if (unlikely(err != 0)) {
                if (err > 0)
                        err = net_xmit_errno(err);
                if (err)
                        goto out_unlock;
        }

        dev_put(dev);

        return len;

out_free:
        kfree_skb(skb);
out_unlock:
        dev_put(dev);
out:
        return err;
}

static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);

        /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
         * tpacket_snd() will redo the check safely.
         */
        if (data_race(po->tx_ring.pg_vec))
                return tpacket_snd(po, msg);

        return packet_snd(sock, msg, len);
}

/*
 *        Close a PACKET socket. This is fairly simple. We immediately go
 *        to 'closed' state and remove our protocol entry in the device list.
 */

static int packet_release(struct socket *sock)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po;
        struct packet_fanout *f;
        struct net *net;
        union tpacket_req_u req_u;

        if (!sk)
                return 0;

        net = sock_net(sk);
        po = pkt_sk(sk);

        mutex_lock(&net->packet.sklist_lock);
        sk_del_node_init_rcu(sk);
        mutex_unlock(&net->packet.sklist_lock);

        sock_prot_inuse_add(net, sk->sk_prot, -1);

        spin_lock(&po->bind_lock);
        unregister_prot_hook(sk, false);
        packet_cached_dev_reset(po);

        if (po->prot_hook.dev) {
                netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
                po->prot_hook.dev = NULL;
        }
        spin_unlock(&po->bind_lock);

        packet_flush_mclist(sk);

        lock_sock(sk);
        if (po->rx_ring.pg_vec) {
                memset(&req_u, 0, sizeof(req_u));
                packet_set_ring(sk, &req_u, 1, 0);
        }

        if (po->tx_ring.pg_vec) {
                memset(&req_u, 0, sizeof(req_u));
                packet_set_ring(sk, &req_u, 1, 1);
        }
        release_sock(sk);

        f = fanout_release(sk);

        synchronize_net();

        kfree(po->rollover);
        if (f) {
                fanout_release_data(f);
                kvfree(f);
        }
        /*
         *        Now the socket is dead. No more input will appear.
         */
        sock_orphan(sk);
        sock->sk = NULL;

        /* Purge queues */

        skb_queue_purge(&sk->sk_receive_queue);
        packet_free_pending(po);

        sock_put(sk);
        return 0;
}

/*
 *        Attach a packet hook.
 */

static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                          __be16 proto)
{
        struct packet_sock *po = pkt_sk(sk);
        struct net_device *dev = NULL;
        bool unlisted = false;
        bool need_rehook;
        int ret = 0;

        lock_sock(sk);
        spin_lock(&po->bind_lock);
        if (!proto)
                proto = po->num;

        rcu_read_lock();

        if (po->fanout) {
                ret = -EINVAL;
                goto out_unlock;
        }

        if (name) {
                dev = dev_get_by_name_rcu(sock_net(sk), name);
                if (!dev) {
                        ret = -ENODEV;
                        goto out_unlock;
                }
        } else if (ifindex) {
                dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
                if (!dev) {
                        ret = -ENODEV;
                        goto out_unlock;
                }
        }

        need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;

        if (need_rehook) {
                dev_hold(dev);
                if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
                        rcu_read_unlock();
                        /* prevents packet_notifier() from calling
                         * register_prot_hook()
                         */
                        WRITE_ONCE(po->num, 0);
                        __unregister_prot_hook(sk, true);
                        rcu_read_lock();
                        if (dev)
                                unlisted = !dev_get_by_index_rcu(sock_net(sk),
                                                                 dev->ifindex);
                }

                BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING));
                WRITE_ONCE(po->num, proto);
                po->prot_hook.type = proto;

                netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);

                if (unlikely(unlisted)) {
                        po->prot_hook.dev = NULL;
                        WRITE_ONCE(po->ifindex, -1);
                        packet_cached_dev_reset(po);
                } else {
                        netdev_hold(dev, &po->prot_hook.dev_tracker,
                                    GFP_ATOMIC);
                        po->prot_hook.dev = dev;
                        WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
                        packet_cached_dev_assign(po, dev);
                }
                dev_put(dev);
        }

        if (proto == 0 || !need_rehook)
                goto out_unlock;

        if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
                register_prot_hook(sk);
        } else {
                sk->sk_err = ENETDOWN;
                if (!sock_flag(sk, SOCK_DEAD))
                        sk_error_report(sk);
        }

out_unlock:
        rcu_read_unlock();
        spin_unlock(&po->bind_lock);
        release_sock(sk);
        return ret;
}

/*
 *        Bind a packet socket to a device
 */

static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
                            int addr_len)
{
        struct sock *sk = sock->sk;
        char name[sizeof(uaddr->sa_data_min) + 1];

        /*
         *        Check legality
         */

        if (addr_len != sizeof(struct sockaddr))
                return -EINVAL;
        /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
         * zero-terminated.
         */
        memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min));
        name[sizeof(uaddr->sa_data_min)] = 0;

        return packet_do_bind(sk, name, 0, 0);
}

static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
        struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
        struct sock *sk = sock->sk;

        /*
         *        Check legality
         */

        if (addr_len < sizeof(struct sockaddr_ll))
                return -EINVAL;
        if (sll->sll_family != AF_PACKET)
                return -EINVAL;

        return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
}

static struct proto packet_proto = {
        .name          = "PACKET",
        .owner          = THIS_MODULE,
        .obj_size = sizeof(struct packet_sock),
};

/*
 *        Create a packet of type SOCK_PACKET.
 */

static int packet_create(struct net *net, struct socket *sock, int protocol,
                         int kern)
{
        struct sock *sk;
        struct packet_sock *po;
        __be16 proto = (__force __be16)protocol; /* weird, but documented */
        int err;

        if (!ns_capable(net->user_ns, CAP_NET_RAW))
                return -EPERM;
        if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
            sock->type != SOCK_PACKET)
                return -ESOCKTNOSUPPORT;

        sock->state = SS_UNCONNECTED;

        err = -ENOBUFS;
        sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
        if (sk == NULL)
                goto out;

        sock->ops = &packet_ops;
        if (sock->type == SOCK_PACKET)
                sock->ops = &packet_ops_spkt;

        sock_init_data(sock, sk);

        po = pkt_sk(sk);
        init_completion(&po->skb_completion);
        sk->sk_family = PF_PACKET;
        po->num = proto;

        err = packet_alloc_pending(po);
        if (err)
                goto out2;

        packet_cached_dev_reset(po);

        sk->sk_destruct = packet_sock_destruct;

        /*
         *        Attach a protocol block
         */

        spin_lock_init(&po->bind_lock);
        mutex_init(&po->pg_vec_lock);
        po->rollover = NULL;
        po->prot_hook.func = packet_rcv;

        if (sock->type == SOCK_PACKET)
                po->prot_hook.func = packet_rcv_spkt;

        po->prot_hook.af_packet_priv = sk;
        po->prot_hook.af_packet_net = sock_net(sk);

        if (proto) {
                po->prot_hook.type = proto;
                __register_prot_hook(sk);
        }

        mutex_lock(&net->packet.sklist_lock);
        sk_add_node_tail_rcu(sk, &net->packet.sklist);
        mutex_unlock(&net->packet.sklist_lock);

        sock_prot_inuse_add(net, &packet_proto, 1);

        return 0;
out2:
        sk_free(sk);
out:
        return err;
}

/*
 *        Pull a packet from our receive queue and hand it to the user.
 *        If necessary we block.
 */

static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                          int flags)
{
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
        int copied, err;
        int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
        unsigned int origlen = 0;

        err = -EINVAL;
        if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
                goto out;

#if 0
        /* What error should we return now? EUNATTACH? */
        if (pkt_sk(sk)->ifindex < 0)
                return -ENODEV;
#endif

        if (flags & MSG_ERRQUEUE) {
                err = sock_recv_errqueue(sk, msg, len,
                                         SOL_PACKET, PACKET_TX_TIMESTAMP);
                goto out;
        }

        /*
         *        Call the generic datagram receiver. This handles all sorts
         *        of horrible races and re-entrancy so we can forget about it
         *        in the protocol layers.
         *
         *        Now it will return ENETDOWN, if device have just gone down,
         *        but then it will block.
         */

        skb = skb_recv_datagram(sk, flags, &err);

        /*
         *        An error occurred so return it. Because skb_recv_datagram()
         *        handles the blocking we don't see and worry about blocking
         *        retries.
         */

        if (skb == NULL)
                goto out;

        packet_rcv_try_clear_pressure(pkt_sk(sk));

        if (vnet_hdr_len) {
                err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
                if (err)
                        goto out_free;
        }

        /* You lose any data beyond the buffer you gave. If it worries
         * a user program they can ask the device for its MTU
         * anyway.
         */
        copied = skb->len;
        if (copied > len) {
                copied = len;
                msg->msg_flags |= MSG_TRUNC;
        }

        err = skb_copy_datagram_msg(skb, 0, msg, copied);
        if (err)
                goto out_free;

        if (sock->type != SOCK_PACKET) {
                struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;

                /* Original length was stored in sockaddr_ll fields */
                origlen = PACKET_SKB_CB(skb)->sa.origlen;
                sll->sll_family = AF_PACKET;
                sll->sll_protocol = skb->protocol;
        }

        sock_recv_cmsgs(msg, sk, skb);

        if (msg->msg_name) {
                const size_t max_len = min(sizeof(skb->cb),
                                           sizeof(struct sockaddr_storage));
                int copy_len;

                /* If the address length field is there to be filled
                 * in, we fill it in now.
                 */
                if (sock->type == SOCK_PACKET) {
                        __sockaddr_check_size(sizeof(struct sockaddr_pkt));
                        msg->msg_namelen = sizeof(struct sockaddr_pkt);
                        copy_len = msg->msg_namelen;
                } else {
                        struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;

                        msg->msg_namelen = sll->sll_halen +
                                offsetof(struct sockaddr_ll, sll_addr);
                        copy_len = msg->msg_namelen;
                        if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
                                memset(msg->msg_name +
                                       offsetof(struct sockaddr_ll, sll_addr),
                                       0, sizeof(sll->sll_addr));
                                msg->msg_namelen = sizeof(struct sockaddr_ll);
                        }
                }
                if (WARN_ON_ONCE(copy_len > max_len)) {
                        copy_len = max_len;
                        msg->msg_namelen = copy_len;
                }
                memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
        }

        if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
                struct tpacket_auxdata aux;

                aux.tp_status = TP_STATUS_USER;
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        aux.tp_status |= TP_STATUS_CSUMNOTREADY;
                else if (skb->pkt_type != PACKET_OUTGOING &&
                         skb_csum_unnecessary(skb))
                        aux.tp_status |= TP_STATUS_CSUM_VALID;
                if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
                        aux.tp_status |= TP_STATUS_GSO_TCP;

                aux.tp_len = origlen;
                aux.tp_snaplen = skb->len;
                aux.tp_mac = 0;
                aux.tp_net = skb_network_offset(skb);
                if (skb_vlan_tag_present(skb)) {
                        aux.tp_vlan_tci = skb_vlan_tag_get(skb);
                        aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
                        aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
                } else {
                        aux.tp_vlan_tci = 0;
                        aux.tp_vlan_tpid = 0;
                }
                put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
        }

        /*
         *        Free or return the buffer as appropriate. Again this
         *        hides all the races and re-entrancy issues from us.
         */
        err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);

out_free:
        skb_free_datagram(sk, skb);
out:
        return err;
}

static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
                               int peer)
{
        struct net_device *dev;
        struct sock *sk        = sock->sk;

        if (peer)
                return -EOPNOTSUPP;

        uaddr->sa_family = AF_PACKET;
        memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min));
        rcu_read_lock();
        dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
        if (dev)
                strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min));
        rcu_read_unlock();

        return sizeof(*uaddr);
}

static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
                          int peer)
{
        struct net_device *dev;
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
        int ifindex;

        if (peer)
                return -EOPNOTSUPP;

        ifindex = READ_ONCE(po->ifindex);
        sll->sll_family = AF_PACKET;
        sll->sll_ifindex = ifindex;
        sll->sll_protocol = READ_ONCE(po->num);
        sll->sll_pkttype = 0;
        rcu_read_lock();
        dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
        if (dev) {
                sll->sll_hatype = dev->type;
                sll->sll_halen = dev->addr_len;

                /* Let __fortify_memcpy_chk() know the actual buffer size. */
                memcpy(((struct sockaddr_storage *)sll)->__data +
                       offsetof(struct sockaddr_ll, sll_addr) -
                       offsetofend(struct sockaddr_ll, sll_family),
                       dev->dev_addr, dev->addr_len);
        } else {
                sll->sll_hatype = 0;        /* Bad: we have no ARPHRD_UNSPEC */
                sll->sll_halen = 0;
        }
        rcu_read_unlock();

        return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
}

static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
                         int what)
{
        switch (i->type) {
        case PACKET_MR_MULTICAST:
                if (i->alen != dev->addr_len)
                        return -EINVAL;
                if (what > 0)
                        return dev_mc_add(dev, i->addr);
                else
                        return dev_mc_del(dev, i->addr);
                break;
        case PACKET_MR_PROMISC:
                return dev_set_promiscuity(dev, what);
        case PACKET_MR_ALLMULTI:
                return dev_set_allmulti(dev, what);
        case PACKET_MR_UNICAST:
                if (i->alen != dev->addr_len)
                        return -EINVAL;
                if (what > 0)
                        return dev_uc_add(dev, i->addr);
                else
                        return dev_uc_del(dev, i->addr);
                break;
        default:
                break;
        }
        return 0;
}

static void packet_dev_mclist_delete(struct net_device *dev,
                                     struct packet_mclist **mlp)
{
        struct packet_mclist *ml;

        while ((ml = *mlp) != NULL) {
                if (ml->ifindex == dev->ifindex) {
                        packet_dev_mc(dev, ml, -1);
                        *mlp = ml->next;
                        kfree(ml);
                } else
                        mlp = &ml->next;
        }
}

static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
{
        struct packet_sock *po = pkt_sk(sk);
        struct packet_mclist *ml, *i;
        struct net_device *dev;
        int err;

        rtnl_lock();

        err = -ENODEV;
        dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
        if (!dev)
                goto done;

        err = -EINVAL;
        if (mreq->mr_alen > dev->addr_len)
                goto done;

        err = -ENOBUFS;
        i = kmalloc(sizeof(*i), GFP_KERNEL);
        if (i == NULL)
                goto done;

        err = 0;
        for (ml = po->mclist; ml; ml = ml->next) {
                if (ml->ifindex == mreq->mr_ifindex &&
                    ml->type == mreq->mr_type &&
                    ml->alen == mreq->mr_alen &&
                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
                        ml->count++;
                        /* Free the new element ... */
                        kfree(i);
                        goto done;
                }
        }

        i->type = mreq->mr_type;
        i->ifindex = mreq->mr_ifindex;
        i->alen = mreq->mr_alen;
        memcpy(i->addr, mreq->mr_address, i->alen);
        memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
        i->count = 1;
        i->next = po->mclist;
        po->mclist = i;
        err = packet_dev_mc(dev, i, 1);
        if (err) {
                po->mclist = i->next;
                kfree(i);
        }

done:
        rtnl_unlock();
        return err;
}

static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
{
        struct packet_mclist *ml, **mlp;

        rtnl_lock();

        for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
                if (ml->ifindex == mreq->mr_ifindex &&
                    ml->type == mreq->mr_type &&
                    ml->alen == mreq->mr_alen &&
                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
                        if (--ml->count == 0) {
                                struct net_device *dev;
                                *mlp = ml->next;
                                dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
                                if (dev)
                                        packet_dev_mc(dev, ml, -1);
                                kfree(ml);
                        }
                        break;
                }
        }
        rtnl_unlock();
        return 0;
}

static void packet_flush_mclist(struct sock *sk)
{
        struct packet_sock *po = pkt_sk(sk);
        struct packet_mclist *ml;

        if (!po->mclist)
                return;

        rtnl_lock();
        while ((ml = po->mclist) != NULL) {
                struct net_device *dev;

                po->mclist = ml->next;
                dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
                if (dev != NULL)
                        packet_dev_mc(dev, ml, -1);
                kfree(ml);
        }
        rtnl_unlock();
}

static int
packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
                  unsigned int optlen)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        int ret;

        if (level != SOL_PACKET)
                return -ENOPROTOOPT;

        switch (optname) {
        case PACKET_ADD_MEMBERSHIP:
        case PACKET_DROP_MEMBERSHIP:
        {
                struct packet_mreq_max mreq;
                int len = optlen;
                memset(&mreq, 0, sizeof(mreq));
                if (len < sizeof(struct packet_mreq))
                        return -EINVAL;
                if (len > sizeof(mreq))
                        len = sizeof(mreq);
                if (copy_from_sockptr(&mreq, optval, len))
                        return -EFAULT;
                if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
                        return -EINVAL;
                if (optname == PACKET_ADD_MEMBERSHIP)
                        ret = packet_mc_add(sk, &mreq);
                else
                        ret = packet_mc_drop(sk, &mreq);
                return ret;
        }

        case PACKET_RX_RING:
        case PACKET_TX_RING:
        {
                union tpacket_req_u req_u;
                int len;

                lock_sock(sk);
                switch (po->tp_version) {
                case TPACKET_V1:
                case TPACKET_V2:
                        len = sizeof(req_u.req);
                        break;
                case TPACKET_V3:
                default:
                        len = sizeof(req_u.req3);
                        break;
                }
                if (optlen < len) {
                        ret = -EINVAL;
                } else {
                        if (copy_from_sockptr(&req_u.req, optval, len))
                                ret = -EFAULT;
                        else
                                ret = packet_set_ring(sk, &req_u, 0,
                                                    optname == PACKET_TX_RING);
                }
                release_sock(sk);
                return ret;
        }
        case PACKET_COPY_THRESH:
        {
                int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                WRITE_ONCE(pkt_sk(sk)->copy_thresh, val);
                return 0;
        }
        case PACKET_VERSION:
        {
                int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;
                switch (val) {
                case TPACKET_V1:
                case TPACKET_V2:
                case TPACKET_V3:
                        break;
                default:
                        return -EINVAL;
                }
                lock_sock(sk);
                if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
                        ret = -EBUSY;
                } else {
                        po->tp_version = val;
                        ret = 0;
                }
                release_sock(sk);
                return ret;
        }
        case PACKET_RESERVE:
        {
                unsigned int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;
                if (val > INT_MAX)
                        return -EINVAL;
                lock_sock(sk);
                if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
                        ret = -EBUSY;
                } else {
                        po->tp_reserve = val;
                        ret = 0;
                }
                release_sock(sk);
                return ret;
        }
        case PACKET_LOSS:
        {
                unsigned int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                lock_sock(sk);
                if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
                        ret = -EBUSY;
                } else {
                        packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val);
                        ret = 0;
                }
                release_sock(sk);
                return ret;
        }
        case PACKET_AUXDATA:
        {
                int val;

                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
                return 0;
        }
        case PACKET_ORIGDEV:
        {
                int val;

                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
                return 0;
        }
        case PACKET_VNET_HDR:
        case PACKET_VNET_HDR_SZ:
        {
                int val, hdr_len;

                if (sock->type != SOCK_RAW)
                        return -EINVAL;
                if (optlen < sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                if (optname == PACKET_VNET_HDR_SZ) {
                        if (val && val != sizeof(struct virtio_net_hdr) &&
                            val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
                                return -EINVAL;
                        hdr_len = val;
                } else {
                        hdr_len = val ? sizeof(struct virtio_net_hdr) : 0;
                }
                lock_sock(sk);
                if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
                        ret = -EBUSY;
                } else {
                        WRITE_ONCE(po->vnet_hdr_sz, hdr_len);
                        ret = 0;
                }
                release_sock(sk);
                return ret;
        }
        case PACKET_TIMESTAMP:
        {
                int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                WRITE_ONCE(po->tp_tstamp, val);
                return 0;
        }
        case PACKET_FANOUT:
        {
                struct fanout_args args = { 0 };

                if (optlen != sizeof(int) && optlen != sizeof(args))
                        return -EINVAL;
                if (copy_from_sockptr(&args, optval, optlen))
                        return -EFAULT;

                return fanout_add(sk, &args);
        }
        case PACKET_FANOUT_DATA:
        {
                /* Paired with the WRITE_ONCE() in fanout_add() */
                if (!READ_ONCE(po->fanout))
                        return -EINVAL;

                return fanout_set_data(po, optval, optlen);
        }
        case PACKET_IGNORE_OUTGOING:
        {
                int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;
                if (val < 0 || val > 1)
                        return -EINVAL;

                WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val);
                return 0;
        }
        case PACKET_TX_HAS_OFF:
        {
                unsigned int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                lock_sock(sk);
                if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
                        packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val);

                release_sock(sk);
                return 0;
        }
        case PACKET_QDISC_BYPASS:
        {
                int val;

                if (optlen != sizeof(val))
                        return -EINVAL;
                if (copy_from_sockptr(&val, optval, sizeof(val)))
                        return -EFAULT;

                packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val);
                return 0;
        }
        default:
                return -ENOPROTOOPT;
        }
}

static int packet_getsockopt(struct socket *sock, int level, int optname,
                             char __user *optval, int __user *optlen)
{
        int len;
        int val, lv = sizeof(val);
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        void *data = &val;
        union tpacket_stats_u st;
        struct tpacket_rollover_stats rstats;
        int drops;

        if (level != SOL_PACKET)
                return -ENOPROTOOPT;

        if (get_user(len, optlen))
                return -EFAULT;

        if (len < 0)
                return -EINVAL;

        switch (optname) {
        case PACKET_STATISTICS:
                spin_lock_bh(&sk->sk_receive_queue.lock);
                memcpy(&st, &po->stats, sizeof(st));
                memset(&po->stats, 0, sizeof(po->stats));
                spin_unlock_bh(&sk->sk_receive_queue.lock);
                drops = atomic_xchg(&po->tp_drops, 0);

                if (po->tp_version == TPACKET_V3) {
                        lv = sizeof(struct tpacket_stats_v3);
                        st.stats3.tp_drops = drops;
                        st.stats3.tp_packets += drops;
                        data = &st.stats3;
                } else {
                        lv = sizeof(struct tpacket_stats);
                        st.stats1.tp_drops = drops;
                        st.stats1.tp_packets += drops;
                        data = &st.stats1;
                }

                break;
        case PACKET_AUXDATA:
                val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
                break;
        case PACKET_ORIGDEV:
                val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
                break;
        case PACKET_VNET_HDR:
                val = !!READ_ONCE(po->vnet_hdr_sz);
                break;
        case PACKET_VNET_HDR_SZ:
                val = READ_ONCE(po->vnet_hdr_sz);
                break;
        case PACKET_COPY_THRESH:
                val = READ_ONCE(pkt_sk(sk)->copy_thresh);
                break;
        case PACKET_VERSION:
                val = po->tp_version;
                break;
        case PACKET_HDRLEN:
                if (len > sizeof(int))
                        len = sizeof(int);
                if (len < sizeof(int))
                        return -EINVAL;
                if (copy_from_user(&val, optval, len))
                        return -EFAULT;
                switch (val) {
                case TPACKET_V1:
                        val = sizeof(struct tpacket_hdr);
                        break;
                case TPACKET_V2:
                        val = sizeof(struct tpacket2_hdr);
                        break;
                case TPACKET_V3:
                        val = sizeof(struct tpacket3_hdr);
                        break;
                default:
                        return -EINVAL;
                }
                break;
        case PACKET_RESERVE:
                val = po->tp_reserve;
                break;
        case PACKET_LOSS:
                val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS);
                break;
        case PACKET_TIMESTAMP:
                val = READ_ONCE(po->tp_tstamp);
                break;
        case PACKET_FANOUT:
                val = (po->fanout ?
                       ((u32)po->fanout->id |
                        ((u32)po->fanout->type << 16) |
                        ((u32)po->fanout->flags << 24)) :
                       0);
                break;
        case PACKET_IGNORE_OUTGOING:
                val = READ_ONCE(po->prot_hook.ignore_outgoing);
                break;
        case PACKET_ROLLOVER_STATS:
                if (!po->rollover)
                        return -EINVAL;
                rstats.tp_all = atomic_long_read(&po->rollover->num);
                rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
                rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
                data = &rstats;
                lv = sizeof(rstats);
                break;
        case PACKET_TX_HAS_OFF:
                val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF);
                break;
        case PACKET_QDISC_BYPASS:
                val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS);
                break;
        default:
                return -ENOPROTOOPT;
        }

        if (len > lv)
                len = lv;
        if (put_user(len, optlen))
                return -EFAULT;
        if (copy_to_user(optval, data, len))
                return -EFAULT;
        return 0;
}

static int packet_notifier(struct notifier_block *this,
                           unsigned long msg, void *ptr)
{
        struct sock *sk;
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);

        rcu_read_lock();
        sk_for_each_rcu(sk, &net->packet.sklist) {
                struct packet_sock *po = pkt_sk(sk);

                switch (msg) {
                case NETDEV_UNREGISTER:
                        if (po->mclist)
                                packet_dev_mclist_delete(dev, &po->mclist);
                        fallthrough;

                case NETDEV_DOWN:
                        if (dev->ifindex == po->ifindex) {
                                spin_lock(&po->bind_lock);
                                if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
                                        __unregister_prot_hook(sk, false);
                                        sk->sk_err = ENETDOWN;
                                        if (!sock_flag(sk, SOCK_DEAD))
                                                sk_error_report(sk);
                                }
                                if (msg == NETDEV_UNREGISTER) {
                                        packet_cached_dev_reset(po);
                                        WRITE_ONCE(po->ifindex, -1);
                                        netdev_put(po->prot_hook.dev,
                                                   &po->prot_hook.dev_tracker);
                                        po->prot_hook.dev = NULL;
                                }
                                spin_unlock(&po->bind_lock);
                        }
                        break;
                case NETDEV_UP:
                        if (dev->ifindex == po->ifindex) {
                                spin_lock(&po->bind_lock);
                                if (po->num)
                                        register_prot_hook(sk);
                                spin_unlock(&po->bind_lock);
                        }
                        break;
                }
        }
        rcu_read_unlock();
        return NOTIFY_DONE;
}


static int packet_ioctl(struct socket *sock, unsigned int cmd,
                        unsigned long arg)
{
        struct sock *sk = sock->sk;

        switch (cmd) {
        case SIOCOUTQ:
        {
                int amount = sk_wmem_alloc_get(sk);

                return put_user(amount, (int __user *)arg);
        }
        case SIOCINQ:
        {
                struct sk_buff *skb;
                int amount = 0;

                spin_lock_bh(&sk->sk_receive_queue.lock);
                skb = skb_peek(&sk->sk_receive_queue);
                if (skb)
                        amount = skb->len;
                spin_unlock_bh(&sk->sk_receive_queue.lock);
                return put_user(amount, (int __user *)arg);
        }
#ifdef CONFIG_INET
        case SIOCADDRT:
        case SIOCDELRT:
        case SIOCDARP:
        case SIOCGARP:
        case SIOCSARP:
        case SIOCGIFADDR:
        case SIOCSIFADDR:
        case SIOCGIFBRDADDR:
        case SIOCSIFBRDADDR:
        case SIOCGIFNETMASK:
        case SIOCSIFNETMASK:
        case SIOCGIFDSTADDR:
        case SIOCSIFDSTADDR:
        case SIOCSIFFLAGS:
                return inet_dgram_ops.ioctl(sock, cmd, arg);
#endif

        default:
                return -ENOIOCTLCMD;
        }
        return 0;
}

static __poll_t packet_poll(struct file *file, struct socket *sock,
                                poll_table *wait)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        __poll_t mask = datagram_poll(file, sock, wait);

        spin_lock_bh(&sk->sk_receive_queue.lock);
        if (po->rx_ring.pg_vec) {
                if (!packet_previous_rx_frame(po, &po->rx_ring,
                        TP_STATUS_KERNEL))
                        mask |= EPOLLIN | EPOLLRDNORM;
        }
        packet_rcv_try_clear_pressure(po);
        spin_unlock_bh(&sk->sk_receive_queue.lock);
        spin_lock_bh(&sk->sk_write_queue.lock);
        if (po->tx_ring.pg_vec) {
                if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
                        mask |= EPOLLOUT | EPOLLWRNORM;
        }
        spin_unlock_bh(&sk->sk_write_queue.lock);
        return mask;
}


/* Dirty? Well, I still did not learn better way to account
 * for user mmaps.
 */

static void packet_mm_open(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct socket *sock = file->private_data;
        struct sock *sk = sock->sk;

        if (sk)
                atomic_long_inc(&pkt_sk(sk)->mapped);
}

static void packet_mm_close(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct socket *sock = file->private_data;
        struct sock *sk = sock->sk;

        if (sk)
                atomic_long_dec(&pkt_sk(sk)->mapped);
}

static const struct vm_operations_struct packet_mmap_ops = {
        .open        =        packet_mm_open,
        .close        =        packet_mm_close,
};

static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
                        unsigned int len)
{
        int i;

        for (i = 0; i < len; i++) {
                if (likely(pg_vec[i].buffer)) {
                        if (is_vmalloc_addr(pg_vec[i].buffer))
                                vfree(pg_vec[i].buffer);
                        else
                                free_pages((unsigned long)pg_vec[i].buffer,
                                           order);
                        pg_vec[i].buffer = NULL;
                }
        }
        kfree(pg_vec);
}

static char *alloc_one_pg_vec_page(unsigned long order)
{
        char *buffer;
        gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
                          __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;

        buffer = (char *) __get_free_pages(gfp_flags, order);
        if (buffer)
                return buffer;

        /* __get_free_pages failed, fall back to vmalloc */
        buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
        if (buffer)
                return buffer;

        /* vmalloc failed, lets dig into swap here */
        gfp_flags &= ~__GFP_NORETRY;
        buffer = (char *) __get_free_pages(gfp_flags, order);
        if (buffer)
                return buffer;

        /* complete and utter failure */
        return NULL;
}

static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
{
        unsigned int block_nr = req->tp_block_nr;
        struct pgv *pg_vec;
        int i;

        pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
        if (unlikely(!pg_vec))
                goto out;

        for (i = 0; i < block_nr; i++) {
                pg_vec[i].buffer = alloc_one_pg_vec_page(order);
                if (unlikely(!pg_vec[i].buffer))
                        goto out_free_pgvec;
        }

out:
        return pg_vec;

out_free_pgvec:
        free_pg_vec(pg_vec, order, block_nr);
        pg_vec = NULL;
        goto out;
}

static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
                int closing, int tx_ring)
{
        struct pgv *pg_vec = NULL;
        struct packet_sock *po = pkt_sk(sk);
        unsigned long *rx_owner_map = NULL;
        int was_running, order = 0;
        struct packet_ring_buffer *rb;
        struct sk_buff_head *rb_queue;
        __be16 num;
        int err;
        /* Added to avoid minimal code churn */
        struct tpacket_req *req = &req_u->req;

        rb = tx_ring ? &po->tx_ring : &po->rx_ring;
        rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;

        err = -EBUSY;
        if (!closing) {
                if (atomic_long_read(&po->mapped))
                        goto out;
                if (packet_read_pending(rb))
                        goto out;
        }

        if (req->tp_block_nr) {
                unsigned int min_frame_size;

                /* Sanity tests and some calculations */
                err = -EBUSY;
                if (unlikely(rb->pg_vec))
                        goto out;

                switch (po->tp_version) {
                case TPACKET_V1:
                        po->tp_hdrlen = TPACKET_HDRLEN;
                        break;
                case TPACKET_V2:
                        po->tp_hdrlen = TPACKET2_HDRLEN;
                        break;
                case TPACKET_V3:
                        po->tp_hdrlen = TPACKET3_HDRLEN;
                        break;
                }

                err = -EINVAL;
                if (unlikely((int)req->tp_block_size <= 0))
                        goto out;
                if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
                        goto out;
                min_frame_size = po->tp_hdrlen + po->tp_reserve;
                if (po->tp_version >= TPACKET_V3 &&
                    req->tp_block_size <
                    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
                        goto out;
                if (unlikely(req->tp_frame_size < min_frame_size))
                        goto out;
                if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
                        goto out;

                rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
                if (unlikely(rb->frames_per_block == 0))
                        goto out;
                if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
                        goto out;
                if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
                                        req->tp_frame_nr))
                        goto out;

                err = -ENOMEM;
                order = get_order(req->tp_block_size);
                pg_vec = alloc_pg_vec(req, order);
                if (unlikely(!pg_vec))
                        goto out;
                switch (po->tp_version) {
                case TPACKET_V3:
                        /* Block transmit is not supported yet */
                        if (!tx_ring) {
                                init_prb_bdqc(po, rb, pg_vec, req_u);
                        } else {
                                struct tpacket_req3 *req3 = &req_u->req3;

                                if (req3->tp_retire_blk_tov ||
                                    req3->tp_sizeof_priv ||
                                    req3->tp_feature_req_word) {
                                        err = -EINVAL;
                                        goto out_free_pg_vec;
                                }
                        }
                        break;
                default:
                        if (!tx_ring) {
                                rx_owner_map = bitmap_alloc(req->tp_frame_nr,
                                        GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
                                if (!rx_owner_map)
                                        goto out_free_pg_vec;
                        }
                        break;
                }
        }
        /* Done */
        else {
                err = -EINVAL;
                if (unlikely(req->tp_frame_nr))
                        goto out;
        }


        /* Detach socket from network */
        spin_lock(&po->bind_lock);
        was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING);
        num = po->num;
        if (was_running) {
                WRITE_ONCE(po->num, 0);
                __unregister_prot_hook(sk, false);
        }
        spin_unlock(&po->bind_lock);

        synchronize_net();

        err = -EBUSY;
        mutex_lock(&po->pg_vec_lock);
        if (closing || atomic_long_read(&po->mapped) == 0) {
                err = 0;
                spin_lock_bh(&rb_queue->lock);
                swap(rb->pg_vec, pg_vec);
                if (po->tp_version <= TPACKET_V2)
                        swap(rb->rx_owner_map, rx_owner_map);
                rb->frame_max = (req->tp_frame_nr - 1);
                rb->head = 0;
                rb->frame_size = req->tp_frame_size;
                spin_unlock_bh(&rb_queue->lock);

                swap(rb->pg_vec_order, order);
                swap(rb->pg_vec_len, req->tp_block_nr);

                rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
                po->prot_hook.func = (po->rx_ring.pg_vec) ?
                                                tpacket_rcv : packet_rcv;
                skb_queue_purge(rb_queue);
                if (atomic_long_read(&po->mapped))
                        pr_err("packet_mmap: vma is busy: %ld\n",
                               atomic_long_read(&po->mapped));
        }
        mutex_unlock(&po->pg_vec_lock);

        spin_lock(&po->bind_lock);
        if (was_running) {
                WRITE_ONCE(po->num, num);
                register_prot_hook(sk);
        }
        spin_unlock(&po->bind_lock);
        if (pg_vec && (po->tp_version > TPACKET_V2)) {
                /* Because we don't support block-based V3 on tx-ring */
                if (!tx_ring)
                        prb_shutdown_retire_blk_timer(po, rb_queue);
        }

out_free_pg_vec:
        if (pg_vec) {
                bitmap_free(rx_owner_map);
                free_pg_vec(pg_vec, order, req->tp_block_nr);
        }
out:
        return err;
}

static int packet_mmap(struct file *file, struct socket *sock,
                struct vm_area_struct *vma)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        unsigned long size, expected_size;
        struct packet_ring_buffer *rb;
        unsigned long start;
        int err = -EINVAL;
        int i;

        if (vma->vm_pgoff)
                return -EINVAL;

        mutex_lock(&po->pg_vec_lock);

        expected_size = 0;
        for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
                if (rb->pg_vec) {
                        expected_size += rb->pg_vec_len
                                                * rb->pg_vec_pages
                                                * PAGE_SIZE;
                }
        }

        if (expected_size == 0)
                goto out;

        size = vma->vm_end - vma->vm_start;
        if (size != expected_size)
                goto out;

        start = vma->vm_start;
        for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
                if (rb->pg_vec == NULL)
                        continue;

                for (i = 0; i < rb->pg_vec_len; i++) {
                        struct page *page;
                        void *kaddr = rb->pg_vec[i].buffer;
                        int pg_num;

                        for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
                                page = pgv_to_page(kaddr);
                                err = vm_insert_page(vma, start, page);
                                if (unlikely(err))
                                        goto out;
                                start += PAGE_SIZE;
                                kaddr += PAGE_SIZE;
                        }
                }
        }

        atomic_long_inc(&po->mapped);
        vma->vm_ops = &packet_mmap_ops;
        err = 0;

out:
        mutex_unlock(&po->pg_vec_lock);
        return err;
}

static const struct proto_ops packet_ops_spkt = {
        .family =        PF_PACKET,
        .owner =        THIS_MODULE,
        .release =        packet_release,
        .bind =                packet_bind_spkt,
        .connect =        sock_no_connect,
        .socketpair =        sock_no_socketpair,
        .accept =        sock_no_accept,
        .getname =        packet_getname_spkt,
        .poll =                datagram_poll,
        .ioctl =        packet_ioctl,
        .gettstamp =        sock_gettstamp,
        .listen =        sock_no_listen,
        .shutdown =        sock_no_shutdown,
        .sendmsg =        packet_sendmsg_spkt,
        .recvmsg =        packet_recvmsg,
        .mmap =                sock_no_mmap,
};

static const struct proto_ops packet_ops = {
        .family =        PF_PACKET,
        .owner =        THIS_MODULE,
        .release =        packet_release,
        .bind =                packet_bind,
        .connect =        sock_no_connect,
        .socketpair =        sock_no_socketpair,
        .accept =        sock_no_accept,
        .getname =        packet_getname,
        .poll =                packet_poll,
        .ioctl =        packet_ioctl,
        .gettstamp =        sock_gettstamp,
        .listen =        sock_no_listen,
        .shutdown =        sock_no_shutdown,
        .setsockopt =        packet_setsockopt,
        .getsockopt =        packet_getsockopt,
        .sendmsg =        packet_sendmsg,
        .recvmsg =        packet_recvmsg,
        .mmap =                packet_mmap,
};

static const struct net_proto_family packet_family_ops = {
        .family =        PF_PACKET,
        .create =        packet_create,
        .owner        =        THIS_MODULE,
};

static struct notifier_block packet_netdev_notifier = {
        .notifier_call =        packet_notifier,
};

#ifdef CONFIG_PROC_FS

static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        struct net *net = seq_file_net(seq);

        rcu_read_lock();
        return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
}

static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct net *net = seq_file_net(seq);
        return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
}

static void packet_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        rcu_read_unlock();
}

static int packet_seq_show(struct seq_file *seq, void *v)
{
        if (v == SEQ_START_TOKEN)
                seq_printf(seq,
                           "%*sRefCnt Type Proto  Iface R Rmem   User   Inode\n",
                           IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
        else {
                struct sock *s = sk_entry(v);
                const struct packet_sock *po = pkt_sk(s);

                seq_printf(seq,
                           "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
                           s,
                           refcount_read(&s->sk_refcnt),
                           s->sk_type,
                           ntohs(READ_ONCE(po->num)),
                           READ_ONCE(po->ifindex),
                           packet_sock_flag(po, PACKET_SOCK_RUNNING),
                           atomic_read(&s->sk_rmem_alloc),
                           from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
                           sock_i_ino(s));
        }

        return 0;
}

static const struct seq_operations packet_seq_ops = {
        .start        = packet_seq_start,
        .next        = packet_seq_next,
        .stop        = packet_seq_stop,
        .show        = packet_seq_show,
};
#endif

static int __net_init packet_net_init(struct net *net)
{
        mutex_init(&net->packet.sklist_lock);
        INIT_HLIST_HEAD(&net->packet.sklist);

#ifdef CONFIG_PROC_FS
        if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
                        sizeof(struct seq_net_private)))
                return -ENOMEM;
#endif /* CONFIG_PROC_FS */

        return 0;
}

static void __net_exit packet_net_exit(struct net *net)
{
        remove_proc_entry("packet", net->proc_net);
        WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
}

static struct pernet_operations packet_net_ops = {
        .init = packet_net_init,
        .exit = packet_net_exit,
};


static void __exit packet_exit(void)
{
        sock_unregister(PF_PACKET);
        proto_unregister(&packet_proto);
        unregister_netdevice_notifier(&packet_netdev_notifier);
        unregister_pernet_subsys(&packet_net_ops);
}

static int __init packet_init(void)
{
        int rc;

        rc = register_pernet_subsys(&packet_net_ops);
        if (rc)
                goto out;
        rc = register_netdevice_notifier(&packet_netdev_notifier);
        if (rc)
                goto out_pernet;
        rc = proto_register(&packet_proto, 0);
        if (rc)
                goto out_notifier;
        rc = sock_register(&packet_family_ops);
        if (rc)
                goto out_proto;

        return 0;

out_proto:
        proto_unregister(&packet_proto);
out_notifier:
        unregister_netdevice_notifier(&packet_netdev_notifier);
out_pernet:
        unregister_pernet_subsys(&packet_net_ops);
out:
        return rc;
}

module_init(packet_init);
module_exit(packet_exit);
MODULE_DESCRIPTION("Packet socket support (AF_PACKET)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_PACKET);











































































































   11 























   11 






    5 











































   14 





   14 





   14 



   11 



    5 

   14 















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */

/* A common module to handle registrations and notifications for paravirtual
 * drivers to enable accelerated datapath and support VF live migration.
 *
 * The notifier and event handling code is based on netvsc driver.
 */

#include <linux/module.h>
#include <linux/etherdevice.h>
#include <uapi/linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
#include <net/failover.h>

static LIST_HEAD(failover_list);
static DEFINE_SPINLOCK(failover_lock);

static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
{
        struct net_device *failover_dev;
        struct failover *failover;

        spin_lock(&failover_lock);
        list_for_each_entry(failover, &failover_list, list) {
                failover_dev = rtnl_dereference(failover->failover_dev);
                if (ether_addr_equal(failover_dev->perm_addr, mac)) {
                        *ops = rtnl_dereference(failover->ops);
                        spin_unlock(&failover_lock);
                        return failover_dev;
                }
        }
        spin_unlock(&failover_lock);
        return NULL;
}

/**
 * failover_slave_register - Register a slave netdev
 *
 * @slave_dev: slave netdev that is being registered
 *
 * Registers a slave device to a failover instance. Only ethernet devices
 * are supported.
 */
static int failover_slave_register(struct net_device *slave_dev)
{
        struct netdev_lag_upper_info lag_upper_info;
        struct net_device *failover_dev;
        struct failover_ops *fops;
        int err;

        if (slave_dev->type != ARPHRD_ETHER)
                goto done;

        ASSERT_RTNL();

        failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
        if (!failover_dev)
                goto done;

        if (fops && fops->slave_pre_register &&
            fops->slave_pre_register(slave_dev, failover_dev))
                goto done;

        err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
                                         failover_dev);
        if (err) {
                netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
                           err);
                goto done;
        }

        lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
        err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
                                           &lag_upper_info, NULL);
        if (err) {
                netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
                           failover_dev->name, err);
                goto err_upper_link;
        }

        slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF);

        if (fops && fops->slave_register &&
            !fops->slave_register(slave_dev, failover_dev))
                return NOTIFY_OK;

        netdev_upper_dev_unlink(slave_dev, failover_dev);
        slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF);
err_upper_link:
        netdev_rx_handler_unregister(slave_dev);
done:
        return NOTIFY_DONE;
}

/**
 * failover_slave_unregister - Unregister a slave netdev
 *
 * @slave_dev: slave netdev that is being unregistered
 *
 * Unregisters a slave device from a failover instance.
 */
int failover_slave_unregister(struct net_device *slave_dev)
{
        struct net_device *failover_dev;
        struct failover_ops *fops;

        if (!netif_is_failover_slave(slave_dev))
                goto done;

        ASSERT_RTNL();

        failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
        if (!failover_dev)
                goto done;

        if (fops && fops->slave_pre_unregister &&
            fops->slave_pre_unregister(slave_dev, failover_dev))
                goto done;

        netdev_rx_handler_unregister(slave_dev);
        netdev_upper_dev_unlink(slave_dev, failover_dev);
        slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF);

        if (fops && fops->slave_unregister &&
            !fops->slave_unregister(slave_dev, failover_dev))
                return NOTIFY_OK;

done:
        return NOTIFY_DONE;
}
EXPORT_SYMBOL_GPL(failover_slave_unregister);

static int failover_slave_link_change(struct net_device *slave_dev)
{
        struct net_device *failover_dev;
        struct failover_ops *fops;

        if (!netif_is_failover_slave(slave_dev))
                goto done;

        ASSERT_RTNL();

        failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
        if (!failover_dev)
                goto done;

        if (!netif_running(failover_dev))
                goto done;

        if (fops && fops->slave_link_change &&
            !fops->slave_link_change(slave_dev, failover_dev))
                return NOTIFY_OK;

done:
        return NOTIFY_DONE;
}

static int failover_slave_name_change(struct net_device *slave_dev)
{
        struct net_device *failover_dev;
        struct failover_ops *fops;

        if (!netif_is_failover_slave(slave_dev))
                goto done;

        ASSERT_RTNL();

        failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
        if (!failover_dev)
                goto done;

        if (!netif_running(failover_dev))
                goto done;

        if (fops && fops->slave_name_change &&
            !fops->slave_name_change(slave_dev, failover_dev))
                return NOTIFY_OK;

done:
        return NOTIFY_DONE;
}

static int
failover_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);

        /* Skip parent events */
        if (netif_is_failover(event_dev))
                return NOTIFY_DONE;

        switch (event) {
        case NETDEV_REGISTER:
                return failover_slave_register(event_dev);
        case NETDEV_UNREGISTER:
                return failover_slave_unregister(event_dev);
        case NETDEV_UP:
        case NETDEV_DOWN:
        case NETDEV_CHANGE:
                return failover_slave_link_change(event_dev);
        case NETDEV_CHANGENAME:
                return failover_slave_name_change(event_dev);
        default:
                return NOTIFY_DONE;
        }
}

static struct notifier_block failover_notifier = {
        .notifier_call = failover_event,
};

static void
failover_existing_slave_register(struct net_device *failover_dev)
{
        struct net *net = dev_net(failover_dev);
        struct net_device *dev;

        rtnl_lock();
        for_each_netdev(net, dev) {
                if (netif_is_failover(dev))
                        continue;
                if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
                        failover_slave_register(dev);
        }
        rtnl_unlock();
}

/**
 * failover_register - Register a failover instance
 *
 * @dev: failover netdev
 * @ops: failover ops
 *
 * Allocate and register a failover instance for a failover netdev. ops
 * provides handlers for slave device register/unregister/link change/
 * name change events.
 *
 * Return: pointer to failover instance
 */
struct failover *failover_register(struct net_device *dev,
                                   struct failover_ops *ops)
{
        struct failover *failover;

        if (dev->type != ARPHRD_ETHER)
                return ERR_PTR(-EINVAL);

        failover = kzalloc(sizeof(*failover), GFP_KERNEL);
        if (!failover)
                return ERR_PTR(-ENOMEM);

        rcu_assign_pointer(failover->ops, ops);
        netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL);
        dev->priv_flags |= IFF_FAILOVER;
        rcu_assign_pointer(failover->failover_dev, dev);

        spin_lock(&failover_lock);
        list_add_tail(&failover->list, &failover_list);
        spin_unlock(&failover_lock);

        netdev_info(dev, "failover master:%s registered\n", dev->name);

        failover_existing_slave_register(dev);

        return failover;
}
EXPORT_SYMBOL_GPL(failover_register);

/**
 * failover_unregister - Unregister a failover instance
 *
 * @failover: pointer to failover instance
 *
 * Unregisters and frees a failover instance.
 */
void failover_unregister(struct failover *failover)
{
        struct net_device *failover_dev;

        failover_dev = rcu_dereference(failover->failover_dev);

        netdev_info(failover_dev, "failover master:%s unregistered\n",
                    failover_dev->name);

        failover_dev->priv_flags &= ~IFF_FAILOVER;
        netdev_put(failover_dev, &failover->dev_tracker);

        spin_lock(&failover_lock);
        list_del(&failover->list);
        spin_unlock(&failover_lock);

        kfree(failover);
}
EXPORT_SYMBOL_GPL(failover_unregister);

static __init int
failover_init(void)
{
        register_netdevice_notifier(&failover_notifier);

        return 0;
}
module_init(failover_init);

static __exit
void failover_exit(void)
{
        unregister_netdevice_notifier(&failover_notifier);
}
module_exit(failover_exit);

MODULE_DESCRIPTION("Generic failover infrastructure/interface");
MODULE_LICENSE("GPL v2");













































































































































































































































































   22 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
// SPDX-License-Identifier: GPL-2.0
/*
 * USB Serial Console driver
 *
 * Copyright (C) 2001 - 2002 Greg Kroah-Hartman (greg@kroah.com)
 *
 * Thanks to Randy Dunlap for the original version of this code.
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/console.h>
#include <linux/serial.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>

struct usbcons_info {
        int                        magic;
        int                        break_flag;
        struct usb_serial_port        *port;
};

static struct usbcons_info usbcons_info;
static struct console usbcons;

/*
 * ------------------------------------------------------------
 * USB Serial console driver
 *
 * Much of the code here is copied from drivers/char/serial.c
 * and implements a phony serial console in the same way that
 * serial.c does so that in case some software queries it,
 * it will get the same results.
 *
 * Things that are different from the way the serial port code
 * does things, is that we call the lower level usb-serial
 * driver code to initialize the device, and we set the initial
 * console speeds based on the command line arguments.
 * ------------------------------------------------------------
 */

static const struct tty_operations usb_console_fake_tty_ops = {
};

/*
 * The parsing of the command line works exactly like the
 * serial.c code, except that the specifier is "ttyUSB" instead
 * of "ttyS".
 */
static int usb_console_setup(struct console *co, char *options)
{
        struct usbcons_info *info = &usbcons_info;
        int baud = 9600;
        int bits = 8;
        int parity = 'n';
        int doflow = 0;
        int cflag = CREAD | HUPCL | CLOCAL;
        char *s;
        struct usb_serial *serial;
        struct usb_serial_port *port;
        int retval;
        struct tty_struct *tty = NULL;
        struct ktermios dummy;

        if (options) {
                baud = simple_strtoul(options, NULL, 10);
                s = options;
                while (*s >= '0' && *s <= '9')
                        s++;
                if (*s)
                        parity = *s++;
                if (*s)
                        bits   = *s++ - '0';
                if (*s)
                        doflow = (*s++ == 'r');
        }

        /* Sane default */
        if (baud == 0)
                baud = 9600;

        switch (bits) {
        case 7:
                cflag |= CS7;
                break;
        default:
        case 8:
                cflag |= CS8;
                break;
        }
        switch (parity) {
        case 'o': case 'O':
                cflag |= PARODD;
                break;
        case 'e': case 'E':
                cflag |= PARENB;
                break;
        }

        if (doflow)
                cflag |= CRTSCTS;

        /*
         * no need to check the index here: if the index is wrong, console
         * code won't call us
         */
        port = usb_serial_port_get_by_minor(co->index);
        if (port == NULL) {
                /* no device is connected yet, sorry :( */
                pr_err("No USB device connected to ttyUSB%i\n", co->index);
                return -ENODEV;
        }
        serial = port->serial;

        retval = usb_autopm_get_interface(serial->interface);
        if (retval)
                goto error_get_interface;

        tty_port_tty_set(&port->port, NULL);

        info->port = port;

        ++port->port.count;
        if (!tty_port_initialized(&port->port)) {
                if (serial->type->set_termios) {
                        /*
                         * allocate a fake tty so the driver can initialize
                         * the termios structure, then later call set_termios to
                         * configure according to command line arguments
                         */
                        tty = kzalloc(sizeof(*tty), GFP_KERNEL);
                        if (!tty) {
                                retval = -ENOMEM;
                                goto reset_open_count;
                        }
                        kref_init(&tty->kref);
                        tty->driver = usb_serial_tty_driver;
                        tty->index = co->index;
                        init_ldsem(&tty->ldisc_sem);
                        spin_lock_init(&tty->files_lock);
                        INIT_LIST_HEAD(&tty->tty_files);
                        kref_get(&tty->driver->kref);
                        __module_get(tty->driver->owner);
                        tty->ops = &usb_console_fake_tty_ops;
                        tty_init_termios(tty);
                        tty_port_tty_set(&port->port, tty);
                }

                /* only call the device specific open if this
                 * is the first time the port is opened */
                retval = serial->type->open(NULL, port);
                if (retval) {
                        dev_err(&port->dev, "could not open USB console port\n");
                        goto fail;
                }

                if (serial->type->set_termios) {
                        tty->termios.c_cflag = cflag;
                        tty_termios_encode_baud_rate(&tty->termios, baud, baud);
                        memset(&dummy, 0, sizeof(struct ktermios));
                        serial->type->set_termios(tty, port, &dummy);

                        tty_port_tty_set(&port->port, NULL);
                        tty_save_termios(tty);
                        tty_kref_put(tty);
                }
                tty_port_set_initialized(&port->port, true);
        }
        /* Now that any required fake tty operations are completed restore
         * the tty port count */
        --port->port.count;
        /* The console is special in terms of closing the device so
         * indicate this port is now acting as a system console. */
        port->port.console = 1;

        mutex_unlock(&serial->disc_mutex);
        return retval;

 fail:
        tty_port_tty_set(&port->port, NULL);
        tty_kref_put(tty);
 reset_open_count:
        port->port.count = 0;
        info->port = NULL;
        usb_autopm_put_interface(serial->interface);
 error_get_interface:
        mutex_unlock(&serial->disc_mutex);
        usb_serial_put(serial);
        return retval;
}

static void usb_console_write(struct console *co,
                                        const char *buf, unsigned count)
{
        static struct usbcons_info *info = &usbcons_info;
        struct usb_serial_port *port = info->port;
        struct usb_serial *serial;
        int retval = -ENODEV;

        if (!port || port->serial->dev->state == USB_STATE_NOTATTACHED)
                return;
        serial = port->serial;

        if (count == 0)
                return;

        dev_dbg(&port->dev, "%s - %d byte(s)\n", __func__, count);

        if (!port->port.console) {
                dev_dbg(&port->dev, "%s - port not opened\n", __func__);
                return;
        }

        while (count) {
                unsigned int i;
                unsigned int lf;
                /* search for LF so we can insert CR if necessary */
                for (i = 0, lf = 0 ; i < count ; i++) {
                        if (*(buf + i) == 10) {
                                lf = 1;
                                i++;
                                break;
                        }
                }
                /* pass on to the driver specific version of this function if
                   it is available */
                retval = serial->type->write(NULL, port, buf, i);
                dev_dbg(&port->dev, "%s - write: %d\n", __func__, retval);
                if (lf) {
                        /* append CR after LF */
                        unsigned char cr = 13;
                        retval = serial->type->write(NULL, port, &cr, 1);
                        dev_dbg(&port->dev, "%s - write cr: %d\n",
                                                        __func__, retval);
                }
                buf += i;
                count -= i;
        }
}

static struct tty_driver *usb_console_device(struct console *co, int *index)
{
        struct tty_driver **p = (struct tty_driver **)co->data;

        if (!*p)
                return NULL;

        *index = co->index;
        return *p;
}

static struct console usbcons = {
        .name =                "ttyUSB",
        .write =        usb_console_write,
        .device =        usb_console_device,
        .setup =        usb_console_setup,
        .flags =        CON_PRINTBUFFER,
        .index =        -1,
        .data =         &usb_serial_tty_driver,
};

void usb_serial_console_disconnect(struct usb_serial *serial)
{
        if (serial->port[0] && serial->port[0] == usbcons_info.port) {
                usb_serial_console_exit();
                usb_serial_put(serial);
        }
}

void usb_serial_console_init(int minor)
{
        if (minor == 0) {
                /*
                 * Call register_console() if this is the first device plugged
                 * in.  If we call it earlier, then the callback to
                 * console_setup() will fail, as there is not a device seen by
                 * the USB subsystem yet.
                 */
                /*
                 * Register console.
                 * NOTES:
                 * console_setup() is called (back) immediately (from
                 * register_console). console_write() is called immediately
                 * from register_console iff CON_PRINTBUFFER is set in flags.
                 */
                pr_debug("registering the USB serial console.\n");
                register_console(&usbcons);
        }
}

void usb_serial_console_exit(void)
{
        if (usbcons_info.port) {
                unregister_console(&usbcons);
                usbcons_info.port->port.console = 0;
                usbcons_info.port = NULL;
        }
}










































































































    4 

    4 
    4 










    4 















































































    4 


    4 

    4 
    4 
    4 


    4 








































    4 






    4 



    4 
    4 

























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
// SPDX-License-Identifier: GPL-2.0
/*
 * linux/drivers/scsi/scsi_proc.c
 *
 * The functions in this file provide an interface between
 * the PROC file system and the SCSI device drivers
 * It is mainly used for debugging, statistics and to pass 
 * information directly to the lowlevel driver.
 *
 * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de 
 * Version: 0.99.8   last change: 95/09/13
 * 
 * generic command parser provided by: 
 * Andreas Heilwagen <crashcar@informatik.uni-koblenz.de>
 *
 * generic_proc_info() support of xxxx_info() by:
 * Michael A. Griffith <grif@acm.org>
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/errno.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
#include <linux/gfp.h>
#include <linux/uaccess.h>

#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h>

#include "scsi_priv.h"
#include "scsi_logging.h"


/* 4K page size, but our output routines, use some slack for overruns */
#define PROC_BLOCK_SIZE (3*1024)

static struct proc_dir_entry *proc_scsi;

/* Protects scsi_proc_list */
static DEFINE_MUTEX(global_host_template_mutex);
static LIST_HEAD(scsi_proc_list);

/**
 * struct scsi_proc_entry - (host template, SCSI proc dir) association
 * @entry: entry in scsi_proc_list.
 * @sht: SCSI host template associated with the procfs directory.
 * @proc_dir: procfs directory associated with the SCSI host template.
 * @present: Number of SCSI hosts instantiated for @sht.
 */
struct scsi_proc_entry {
        struct list_head        entry;
        const struct scsi_host_template *sht;
        struct proc_dir_entry        *proc_dir;
        unsigned int                present;
};

static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf,
                           size_t count, loff_t *ppos)
{
        struct Scsi_Host *shost = pde_data(file_inode(file));
        ssize_t ret = -ENOMEM;
        char *page;
    
        if (count > PROC_BLOCK_SIZE)
                return -EOVERFLOW;

        if (!shost->hostt->write_info)
                return -EINVAL;

        page = (char *)__get_free_page(GFP_KERNEL);
        if (page) {
                ret = -EFAULT;
                if (copy_from_user(page, buf, count))
                        goto out;
                ret = shost->hostt->write_info(shost, page, count);
        }
out:
        free_page((unsigned long)page);
        return ret;
}

static int proc_scsi_show(struct seq_file *m, void *v)
{
        struct Scsi_Host *shost = m->private;
        return shost->hostt->show_info(m, shost);
}

static int proc_scsi_host_open(struct inode *inode, struct file *file)
{
        return single_open_size(file, proc_scsi_show, pde_data(inode),
                                4 * PAGE_SIZE);
}

static struct scsi_proc_entry *
__scsi_lookup_proc_entry(const struct scsi_host_template *sht)
{
        struct scsi_proc_entry *e;

        lockdep_assert_held(&global_host_template_mutex);

        list_for_each_entry(e, &scsi_proc_list, entry)
                if (e->sht == sht)
                        return e;

        return NULL;
}

static struct scsi_proc_entry *
scsi_lookup_proc_entry(const struct scsi_host_template *sht)
{
        struct scsi_proc_entry *e;

        mutex_lock(&global_host_template_mutex);
        e = __scsi_lookup_proc_entry(sht);
        mutex_unlock(&global_host_template_mutex);

        return e;
}

/**
 * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template
 * @sht: SCSI host template pointer.
 */
struct proc_dir_entry *
scsi_template_proc_dir(const struct scsi_host_template *sht)
{
        struct scsi_proc_entry *e = scsi_lookup_proc_entry(sht);

        return e ? e->proc_dir : NULL;
}
EXPORT_SYMBOL_GPL(scsi_template_proc_dir);

static const struct proc_ops proc_scsi_ops = {
        .proc_open        = proc_scsi_host_open,
        .proc_release        = single_release,
        .proc_read        = seq_read,
        .proc_lseek        = seq_lseek,
        .proc_write        = proc_scsi_host_write
};

/**
 * scsi_proc_hostdir_add - Create directory in /proc for a scsi host
 * @sht: owner of this directory
 *
 * Sets sht->proc_dir to the new directory.
 */
int scsi_proc_hostdir_add(const struct scsi_host_template *sht)
{
        struct scsi_proc_entry *e;
        int ret;

        if (!sht->show_info)
                return 0;

        mutex_lock(&global_host_template_mutex);
        e = __scsi_lookup_proc_entry(sht);
        if (!e) {
                e = kzalloc(sizeof(*e), GFP_KERNEL);
                if (!e) {
                        ret = -ENOMEM;
                        goto unlock;
                }
        }
        if (e->present++)
                goto success;
        e->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
        if (!e->proc_dir) {
                printk(KERN_ERR "%s: proc_mkdir failed for %s\n", __func__,
                       sht->proc_name);
                ret = -ENOMEM;
                goto unlock;
        }
        e->sht = sht;
        list_add_tail(&e->entry, &scsi_proc_list);
success:
        e = NULL;
        ret = 0;
unlock:
        mutex_unlock(&global_host_template_mutex);

        kfree(e);
        return ret;
}

/**
 * scsi_proc_hostdir_rm - remove directory in /proc for a scsi host
 * @sht: owner of directory
 */
void scsi_proc_hostdir_rm(const struct scsi_host_template *sht)
{
        struct scsi_proc_entry *e;

        if (!sht->show_info)
                return;

        mutex_lock(&global_host_template_mutex);
        e = __scsi_lookup_proc_entry(sht);
        if (e && !--e->present) {
                remove_proc_entry(sht->proc_name, proc_scsi);
                list_del(&e->entry);
                kfree(e);
        }
        mutex_unlock(&global_host_template_mutex);
}


/**
 * scsi_proc_host_add - Add entry for this host to appropriate /proc dir
 * @shost: host to add
 */
void scsi_proc_host_add(struct Scsi_Host *shost)
{
        const struct scsi_host_template *sht = shost->hostt;
        struct scsi_proc_entry *e;
        struct proc_dir_entry *p;
        char name[10];

        if (!sht->show_info)
                return;

        e = scsi_lookup_proc_entry(sht);
        if (!e)
                goto err;

        sprintf(name,"%d", shost->host_no);
        p = proc_create_data(name, S_IRUGO | S_IWUSR, e->proc_dir,
                             &proc_scsi_ops, shost);
        if (!p)
                goto err;
        return;

err:
        shost_printk(KERN_ERR, shost,
                     "%s: Failed to register host (%s failed)\n", __func__,
                     e ? "proc_create_data()" : "scsi_proc_hostdir_add()");
}

/**
 * scsi_proc_host_rm - remove this host's entry from /proc
 * @shost: which host
 */
void scsi_proc_host_rm(struct Scsi_Host *shost)
{
        const struct scsi_host_template *sht = shost->hostt;
        struct scsi_proc_entry *e;
        char name[10];

        if (!sht->show_info)
                return;

        e = scsi_lookup_proc_entry(sht);
        if (!e)
                return;

        sprintf(name,"%d", shost->host_no);
        remove_proc_entry(name, e->proc_dir);
}
/**
 * proc_print_scsidevice - return data about this host
 * @dev: A scsi device
 * @data: &struct seq_file to output to.
 *
 * Description: prints Host, Channel, Id, Lun, Vendor, Model, Rev, Type,
 * and revision.
 */
static int proc_print_scsidevice(struct device *dev, void *data)
{
        struct scsi_device *sdev;
        struct seq_file *s = data;
        int i;

        if (!scsi_is_sdev_device(dev))
                goto out;

        sdev = to_scsi_device(dev);
        seq_printf(s,
                "Host: scsi%d Channel: %02d Id: %02d Lun: %02llu\n  Vendor: ",
                sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
        for (i = 0; i < 8; i++) {
                if (sdev->vendor[i] >= 0x20)
                        seq_putc(s, sdev->vendor[i]);
                else
                        seq_putc(s, ' ');
        }

        seq_puts(s, " Model: ");
        for (i = 0; i < 16; i++) {
                if (sdev->model[i] >= 0x20)
                        seq_putc(s, sdev->model[i]);
                else
                        seq_putc(s, ' ');
        }

        seq_puts(s, " Rev: ");
        for (i = 0; i < 4; i++) {
                if (sdev->rev[i] >= 0x20)
                        seq_putc(s, sdev->rev[i]);
                else
                        seq_putc(s, ' ');
        }

        seq_putc(s, '\n');

        seq_printf(s, "  Type:   %s ", scsi_device_type(sdev->type));
        seq_printf(s, "               ANSI  SCSI revision: %02x",
                        sdev->scsi_level - (sdev->scsi_level > 1));
        if (sdev->scsi_level == 2)
                seq_puts(s, " CCS\n");
        else
                seq_putc(s, '\n');

out:
        return 0;
}

/**
 * scsi_add_single_device - Respond to user request to probe for/add device
 * @host: user-supplied decimal integer
 * @channel: user-supplied decimal integer
 * @id: user-supplied decimal integer
 * @lun: user-supplied decimal integer
 *
 * Description: called by writing "scsi add-single-device" to /proc/scsi/scsi.
 *
 * does scsi_host_lookup() and either user_scan() if that transport
 * type supports it, or else scsi_scan_host_selected()
 *
 * Note: this seems to be aimed exclusively at SCSI parallel busses.
 */

static int scsi_add_single_device(uint host, uint channel, uint id, uint lun)
{
        struct Scsi_Host *shost;
        int error = -ENXIO;

        shost = scsi_host_lookup(host);
        if (!shost)
                return error;

        if (shost->transportt->user_scan)
                error = shost->transportt->user_scan(shost, channel, id, lun);
        else
                error = scsi_scan_host_selected(shost, channel, id, lun,
                                                SCSI_SCAN_MANUAL);
        scsi_host_put(shost);
        return error;
}

/**
 * scsi_remove_single_device - Respond to user request to remove a device
 * @host: user-supplied decimal integer
 * @channel: user-supplied decimal integer
 * @id: user-supplied decimal integer
 * @lun: user-supplied decimal integer
 *
 * Description: called by writing "scsi remove-single-device" to
 * /proc/scsi/scsi.  Does a scsi_device_lookup() and scsi_remove_device()
 */
static int scsi_remove_single_device(uint host, uint channel, uint id, uint lun)
{
        struct scsi_device *sdev;
        struct Scsi_Host *shost;
        int error = -ENXIO;

        shost = scsi_host_lookup(host);
        if (!shost)
                return error;
        sdev = scsi_device_lookup(shost, channel, id, lun);
        if (sdev) {
                scsi_remove_device(sdev);
                scsi_device_put(sdev);
                error = 0;
        }

        scsi_host_put(shost);
        return error;
}

/**
 * proc_scsi_write - handle writes to /proc/scsi/scsi
 * @file: not used
 * @buf: buffer to write
 * @length: length of buf, at most PAGE_SIZE
 * @ppos: not used
 *
 * Description: this provides a legacy mechanism to add or remove devices by
 * Host, Channel, ID, and Lun.  To use,
 * "echo 'scsi add-single-device 0 1 2 3' > /proc/scsi/scsi" or
 * "echo 'scsi remove-single-device 0 1 2 3' > /proc/scsi/scsi" with
 * "0 1 2 3" replaced by the Host, Channel, Id, and Lun.
 *
 * Note: this seems to be aimed at parallel SCSI. Most modern busses (USB,
 * SATA, Firewire, Fibre Channel, etc) dynamically assign these values to
 * provide a unique identifier and nothing more.
 */


static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
                               size_t length, loff_t *ppos)
{
        int host, channel, id, lun;
        char *buffer, *end, *p;
        int err;

        if (!buf || length > PAGE_SIZE)
                return -EINVAL;

        buffer = (char *)__get_free_page(GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        err = -EFAULT;
        if (copy_from_user(buffer, buf, length))
                goto out;

        err = -EINVAL;
        if (length < PAGE_SIZE) {
                end = buffer + length;
                *end = '\0';
        } else {
                end = buffer + PAGE_SIZE - 1;
                if (*end)
                        goto out;
        }

        /*
         * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
         * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
         */
        if (!strncmp("scsi add-single-device", buffer, 22)) {
                p = buffer + 23;

                host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
                channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
                id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
                lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;

                err = scsi_add_single_device(host, channel, id, lun);

        /*
         * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi
         * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
         */
        } else if (!strncmp("scsi remove-single-device", buffer, 25)) {
                p = buffer + 26;

                host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
                channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
                id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
                lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;

                err = scsi_remove_single_device(host, channel, id, lun);
        }

        /*
         * convert success returns so that we return the 
         * number of bytes consumed.
         */
        if (!err)
                err = length;

 out:
        free_page((unsigned long)buffer);
        return err;
}

static inline struct device *next_scsi_device(struct device *start)
{
        struct device *next = bus_find_next_device(&scsi_bus_type, start);

        put_device(start);
        return next;
}

static void *scsi_seq_start(struct seq_file *sfile, loff_t *pos)
{
        struct device *dev = NULL;
        loff_t n = *pos;

        while ((dev = next_scsi_device(dev))) {
                if (!n--)
                        break;
                sfile->private++;
        }
        return dev;
}

static void *scsi_seq_next(struct seq_file *sfile, void *v, loff_t *pos)
{
        (*pos)++;
        sfile->private++;
        return next_scsi_device(v);
}

static void scsi_seq_stop(struct seq_file *sfile, void *v)
{
        put_device(v);
}

static int scsi_seq_show(struct seq_file *sfile, void *dev)
{
        if (!sfile->private)
                seq_puts(sfile, "Attached devices:\n");

        return proc_print_scsidevice(dev, sfile);
}

static const struct seq_operations scsi_seq_ops = {
        .start        = scsi_seq_start,
        .next        = scsi_seq_next,
        .stop        = scsi_seq_stop,
        .show        = scsi_seq_show
};

/**
 * proc_scsi_open - glue function
 * @inode: not used
 * @file: passed to single_open()
 *
 * Associates proc_scsi_show with this file
 */
static int proc_scsi_open(struct inode *inode, struct file *file)
{
        /*
         * We don't really need this for the write case but it doesn't
         * harm either.
         */
        return seq_open(file, &scsi_seq_ops);
}

static const struct proc_ops scsi_scsi_proc_ops = {
        .proc_open        = proc_scsi_open,
        .proc_read        = seq_read,
        .proc_write        = proc_scsi_write,
        .proc_lseek        = seq_lseek,
        .proc_release        = seq_release,
};

/**
 * scsi_init_procfs - create scsi and scsi/scsi in procfs
 */
int __init scsi_init_procfs(void)
{
        struct proc_dir_entry *pde;

        proc_scsi = proc_mkdir("scsi", NULL);
        if (!proc_scsi)
                goto err1;

        pde = proc_create("scsi/scsi", 0, NULL, &scsi_scsi_proc_ops);
        if (!pde)
                goto err2;

        return 0;

err2:
        remove_proc_entry("scsi", NULL);
err1:
        return -ENOMEM;
}

/**
 * scsi_exit_procfs - Remove scsi/scsi and scsi from procfs
 */
void scsi_exit_procfs(void)
{
        remove_proc_entry("scsi/scsi", NULL);
        remove_proc_entry("scsi", NULL);
}











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    7 


    7 
    7 


    3 


    7 


    7 
    5 




    7 

    7 
    1 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/spinlock.h>

#include <linux/mm.h>
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/secretmem.h>

#include <linux/sched/signal.h>
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
#include <linux/migrate.h>
#include <linux/mm_inline.h>
#include <linux/sched/mm.h>
#include <linux/shmem_fs.h>

#include <asm/mmu_context.h>
#include <asm/tlbflush.h>

#include "internal.h"

struct follow_page_context {
        struct dev_pagemap *pgmap;
        unsigned int page_mask;
};

static inline void sanity_check_pinned_pages(struct page **pages,
                                             unsigned long npages)
{
        if (!IS_ENABLED(CONFIG_DEBUG_VM))
                return;

        /*
         * We only pin anonymous pages if they are exclusive. Once pinned, we
         * can no longer turn them possibly shared and PageAnonExclusive() will
         * stick around until the page is freed.
         *
         * We'd like to verify that our pinned anonymous pages are still mapped
         * exclusively. The issue with anon THP is that we don't know how
         * they are/were mapped when pinning them. However, for anon
         * THP we can assume that either the given page (PTE-mapped THP) or
         * the head page (PMD-mapped THP) should be PageAnonExclusive(). If
         * neither is the case, there is certainly something wrong.
         */
        for (; npages; npages--, pages++) {
                struct page *page = *pages;
                struct folio *folio = page_folio(page);

                if (is_zero_page(page) ||
                    !folio_test_anon(folio))
                        continue;
                if (!folio_test_large(folio) || folio_test_hugetlb(folio))
                        VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
                else
                        /* Either a PTE-mapped or a PMD-mapped THP. */
                        VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
                                       !PageAnonExclusive(page), page);
        }
}

/*
 * Return the folio with ref appropriately incremented,
 * or NULL if that failed.
 */
static inline struct folio *try_get_folio(struct page *page, int refs)
{
        struct folio *folio;

retry:
        folio = page_folio(page);
        if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
                return NULL;
        if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
                return NULL;

        /*
         * At this point we have a stable reference to the folio; but it
         * could be that between calling page_folio() and the refcount
         * increment, the folio was split, in which case we'd end up
         * holding a reference on a folio that has nothing to do with the page
         * we were given anymore.
         * So now that the folio is stable, recheck that the page still
         * belongs to this folio.
         */
        if (unlikely(page_folio(page) != folio)) {
                if (!put_devmap_managed_page_refs(&folio->page, refs))
                        folio_put_refs(folio, refs);
                goto retry;
        }

        return folio;
}

/**
 * try_grab_folio() - Attempt to get or pin a folio.
 * @page:  pointer to page to be grabbed
 * @refs:  the value to (effectively) add to the folio's refcount
 * @flags: gup flags: these are the FOLL_* flag values.
 *
 * "grab" names in this file mean, "look at flags to decide whether to use
 * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
 *
 * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
 * same time. (That's true throughout the get_user_pages*() and
 * pin_user_pages*() APIs.) Cases:
 *
 *    FOLL_GET: folio's refcount will be incremented by @refs.
 *
 *    FOLL_PIN on large folios: folio's refcount will be incremented by
 *    @refs, and its pincount will be incremented by @refs.
 *
 *    FOLL_PIN on single-page folios: folio's refcount will be incremented by
 *    @refs * GUP_PIN_COUNTING_BIAS.
 *
 * Return: The folio containing @page (with refcount appropriately
 * incremented) for success, or NULL upon failure. If neither FOLL_GET
 * nor FOLL_PIN was set, that's considered failure, and furthermore,
 * a likely bug in the caller, so a warning is also emitted.
 */
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
{
        struct folio *folio;

        if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
                return NULL;

        if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
                return NULL;

        if (flags & FOLL_GET)
                return try_get_folio(page, refs);

        /* FOLL_PIN is set */

        /*
         * Don't take a pin on the zero page - it's not going anywhere
         * and it is used in a *lot* of places.
         */
        if (is_zero_page(page))
                return page_folio(page);

        folio = try_get_folio(page, refs);
        if (!folio)
                return NULL;

        /*
         * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
         * right zone, so fail and let the caller fall back to the slow
         * path.
         */
        if (unlikely((flags & FOLL_LONGTERM) &&
                     !folio_is_longterm_pinnable(folio))) {
                if (!put_devmap_managed_page_refs(&folio->page, refs))
                        folio_put_refs(folio, refs);
                return NULL;
        }

        /*
         * When pinning a large folio, use an exact count to track it.
         *
         * However, be sure to *also* increment the normal folio
         * refcount field at least once, so that the folio really
         * is pinned.  That's why the refcount from the earlier
         * try_get_folio() is left intact.
         */
        if (folio_test_large(folio))
                atomic_add(refs, &folio->_pincount);
        else
                folio_ref_add(folio,
                                refs * (GUP_PIN_COUNTING_BIAS - 1));
        /*
         * Adjust the pincount before re-checking the PTE for changes.
         * This is essentially a smp_mb() and is paired with a memory
         * barrier in folio_try_share_anon_rmap_*().
         */
        smp_mb__after_atomic();

        node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);

        return folio;
}

static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
        if (flags & FOLL_PIN) {
                if (is_zero_folio(folio))
                        return;
                node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
                if (folio_test_large(folio))
                        atomic_sub(refs, &folio->_pincount);
                else
                        refs *= GUP_PIN_COUNTING_BIAS;
        }

        if (!put_devmap_managed_page_refs(&folio->page, refs))
                folio_put_refs(folio, refs);
}

/**
 * try_grab_page() - elevate a page's refcount by a flag-dependent amount
 * @page:    pointer to page to be grabbed
 * @flags:   gup flags: these are the FOLL_* flag values.
 *
 * This might not do anything at all, depending on the flags argument.
 *
 * "grab" names in this file mean, "look at flags to decide whether to use
 * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
 *
 * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
 * time. Cases: please see the try_grab_folio() documentation, with
 * "refs=1".
 *
 * Return: 0 for success, or if no action was required (if neither FOLL_PIN
 * nor FOLL_GET was set, nothing is done). A negative error code for failure:
 *
 *   -ENOMEM                FOLL_GET or FOLL_PIN was set, but the page could not
 *                        be grabbed.
 */
int __must_check try_grab_page(struct page *page, unsigned int flags)
{
        struct folio *folio = page_folio(page);

        if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
                return -ENOMEM;

        if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
                return -EREMOTEIO;

        if (flags & FOLL_GET)
                folio_ref_inc(folio);
        else if (flags & FOLL_PIN) {
                /*
                 * Don't take a pin on the zero page - it's not going anywhere
                 * and it is used in a *lot* of places.
                 */
                if (is_zero_page(page))
                        return 0;

                /*
                 * Similar to try_grab_folio(): be sure to *also*
                 * increment the normal page refcount field at least once,
                 * so that the page really is pinned.
                 */
                if (folio_test_large(folio)) {
                        folio_ref_add(folio, 1);
                        atomic_add(1, &folio->_pincount);
                } else {
                        folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
                }

                node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
        }

        return 0;
}

/**
 * unpin_user_page() - release a dma-pinned page
 * @page:            pointer to page to be released
 *
 * Pages that were pinned via pin_user_pages*() must be released via either
 * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
 * that such pages can be separately tracked and uniquely handled. In
 * particular, interactions with RDMA and filesystems need special handling.
 */
void unpin_user_page(struct page *page)
{
        sanity_check_pinned_pages(&page, 1);
        gup_put_folio(page_folio(page), 1, FOLL_PIN);
}
EXPORT_SYMBOL(unpin_user_page);

/**
 * folio_add_pin - Try to get an additional pin on a pinned folio
 * @folio: The folio to be pinned
 *
 * Get an additional pin on a folio we already have a pin on.  Makes no change
 * if the folio is a zero_page.
 */
void folio_add_pin(struct folio *folio)
{
        if (is_zero_folio(folio))
                return;

        /*
         * Similar to try_grab_folio(): be sure to *also* increment the normal
         * page refcount field at least once, so that the page really is
         * pinned.
         */
        if (folio_test_large(folio)) {
                WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
                folio_ref_inc(folio);
                atomic_inc(&folio->_pincount);
        } else {
                WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS);
                folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
        }
}

static inline struct folio *gup_folio_range_next(struct page *start,
                unsigned long npages, unsigned long i, unsigned int *ntails)
{
        struct page *next = nth_page(start, i);
        struct folio *folio = page_folio(next);
        unsigned int nr = 1;

        if (folio_test_large(folio))
                nr = min_t(unsigned int, npages - i,
                           folio_nr_pages(folio) - folio_page_idx(folio, next));

        *ntails = nr;
        return folio;
}

static inline struct folio *gup_folio_next(struct page **list,
                unsigned long npages, unsigned long i, unsigned int *ntails)
{
        struct folio *folio = page_folio(list[i]);
        unsigned int nr;

        for (nr = i + 1; nr < npages; nr++) {
                if (page_folio(list[nr]) != folio)
                        break;
        }

        *ntails = nr - i;
        return folio;
}

/**
 * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
 * @pages:  array of pages to be maybe marked dirty, and definitely released.
 * @npages: number of pages in the @pages array.
 * @make_dirty: whether to mark the pages dirty
 *
 * "gup-pinned page" refers to a page that has had one of the get_user_pages()
 * variants called on that page.
 *
 * For each page in the @pages array, make that page (or its head page, if a
 * compound page) dirty, if @make_dirty is true, and if the page was previously
 * listed as clean. In any case, releases all pages using unpin_user_page(),
 * possibly via unpin_user_pages(), for the non-dirty case.
 *
 * Please see the unpin_user_page() documentation for details.
 *
 * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
 * required, then the caller should a) verify that this is really correct,
 * because _lock() is usually required, and b) hand code it:
 * set_page_dirty_lock(), unpin_user_page().
 *
 */
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                                 bool make_dirty)
{
        unsigned long i;
        struct folio *folio;
        unsigned int nr;

        if (!make_dirty) {
                unpin_user_pages(pages, npages);
                return;
        }

        sanity_check_pinned_pages(pages, npages);
        for (i = 0; i < npages; i += nr) {
                folio = gup_folio_next(pages, npages, i, &nr);
                /*
                 * Checking PageDirty at this point may race with
                 * clear_page_dirty_for_io(), but that's OK. Two key
                 * cases:
                 *
                 * 1) This code sees the page as already dirty, so it
                 * skips the call to set_page_dirty(). That could happen
                 * because clear_page_dirty_for_io() called
                 * page_mkclean(), followed by set_page_dirty().
                 * However, now the page is going to get written back,
                 * which meets the original intention of setting it
                 * dirty, so all is well: clear_page_dirty_for_io() goes
                 * on to call TestClearPageDirty(), and write the page
                 * back.
                 *
                 * 2) This code sees the page as clean, so it calls
                 * set_page_dirty(). The page stays dirty, despite being
                 * written back, so it gets written back again in the
                 * next writeback cycle. This is harmless.
                 */
                if (!folio_test_dirty(folio)) {
                        folio_lock(folio);
                        folio_mark_dirty(folio);
                        folio_unlock(folio);
                }
                gup_put_folio(folio, nr, FOLL_PIN);
        }
}
EXPORT_SYMBOL(unpin_user_pages_dirty_lock);

/**
 * unpin_user_page_range_dirty_lock() - release and optionally dirty
 * gup-pinned page range
 *
 * @page:  the starting page of a range maybe marked dirty, and definitely released.
 * @npages: number of consecutive pages to release.
 * @make_dirty: whether to mark the pages dirty
 *
 * "gup-pinned page range" refers to a range of pages that has had one of the
 * pin_user_pages() variants called on that page.
 *
 * For the page ranges defined by [page .. page+npages], make that range (or
 * its head pages, if a compound page) dirty, if @make_dirty is true, and if the
 * page range was previously listed as clean.
 *
 * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
 * required, then the caller should a) verify that this is really correct,
 * because _lock() is usually required, and b) hand code it:
 * set_page_dirty_lock(), unpin_user_page().
 *
 */
void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
                                      bool make_dirty)
{
        unsigned long i;
        struct folio *folio;
        unsigned int nr;

        for (i = 0; i < npages; i += nr) {
                folio = gup_folio_range_next(page, npages, i, &nr);
                if (make_dirty && !folio_test_dirty(folio)) {
                        folio_lock(folio);
                        folio_mark_dirty(folio);
                        folio_unlock(folio);
                }
                gup_put_folio(folio, nr, FOLL_PIN);
        }
}
EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);

static void unpin_user_pages_lockless(struct page **pages, unsigned long npages)
{
        unsigned long i;
        struct folio *folio;
        unsigned int nr;

        /*
         * Don't perform any sanity checks because we might have raced with
         * fork() and some anonymous pages might now actually be shared --
         * which is why we're unpinning after all.
         */
        for (i = 0; i < npages; i += nr) {
                folio = gup_folio_next(pages, npages, i, &nr);
                gup_put_folio(folio, nr, FOLL_PIN);
        }
}

/**
 * unpin_user_pages() - release an array of gup-pinned pages.
 * @pages:  array of pages to be marked dirty and released.
 * @npages: number of pages in the @pages array.
 *
 * For each page in the @pages array, release the page using unpin_user_page().
 *
 * Please see the unpin_user_page() documentation for details.
 */
void unpin_user_pages(struct page **pages, unsigned long npages)
{
        unsigned long i;
        struct folio *folio;
        unsigned int nr;

        /*
         * If this WARN_ON() fires, then the system *might* be leaking pages (by
         * leaving them pinned), but probably not. More likely, gup/pup returned
         * a hard -ERRNO error to the caller, who erroneously passed it here.
         */
        if (WARN_ON(IS_ERR_VALUE(npages)))
                return;

        sanity_check_pinned_pages(pages, npages);
        for (i = 0; i < npages; i += nr) {
                folio = gup_folio_next(pages, npages, i, &nr);
                gup_put_folio(folio, nr, FOLL_PIN);
        }
}
EXPORT_SYMBOL(unpin_user_pages);

/*
 * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's
 * lifecycle.  Avoid setting the bit unless necessary, or it might cause write
 * cache bouncing on large SMP machines for concurrent pinned gups.
 */
static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
{
        if (!test_bit(MMF_HAS_PINNED, mm_flags))
                set_bit(MMF_HAS_PINNED, mm_flags);
}

#ifdef CONFIG_MMU
static struct page *no_page_table(struct vm_area_struct *vma,
                unsigned int flags)
{
        /*
         * When core dumping an enormous anonymous area that nobody
         * has touched so far, we don't want to allocate unnecessary pages or
         * page tables.  Return error instead of NULL to skip handle_mm_fault,
         * then get_dump_page() will return NULL to leave a hole in the dump.
         * But we can only make this optimization where a hole would surely
         * be zero-filled if handle_mm_fault() actually did handle it.
         */
        if ((flags & FOLL_DUMP) &&
                        (vma_is_anonymous(vma) || !vma->vm_ops->fault))
                return ERR_PTR(-EFAULT);
        return NULL;
}

static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
                pte_t *pte, unsigned int flags)
{
        if (flags & FOLL_TOUCH) {
                pte_t orig_entry = ptep_get(pte);
                pte_t entry = orig_entry;

                if (flags & FOLL_WRITE)
                        entry = pte_mkdirty(entry);
                entry = pte_mkyoung(entry);

                if (!pte_same(orig_entry, entry)) {
                        set_pte_at(vma->vm_mm, address, pte, entry);
                        update_mmu_cache(vma, address, pte);
                }
        }

        /* Proper page table entry exists, but no corresponding struct page */
        return -EEXIST;
}

/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
static inline bool can_follow_write_pte(pte_t pte, struct page *page,
                                        struct vm_area_struct *vma,
                                        unsigned int flags)
{
        /* If the pte is writable, we can write to the page. */
        if (pte_write(pte))
                return true;

        /* Maybe FOLL_FORCE is set to override it? */
        if (!(flags & FOLL_FORCE))
                return false;

        /* But FOLL_FORCE has no effect on shared mappings */
        if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
                return false;

        /* ... or read-only private ones */
        if (!(vma->vm_flags & VM_MAYWRITE))
                return false;

        /* ... or already writable ones that just need to take a write fault */
        if (vma->vm_flags & VM_WRITE)
                return false;

        /*
         * See can_change_pte_writable(): we broke COW and could map the page
         * writable if we have an exclusive anonymous page ...
         */
        if (!page || !PageAnon(page) || !PageAnonExclusive(page))
                return false;

        /* ... and a write-fault isn't required for other reasons. */
        if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
                return false;
        return !userfaultfd_pte_wp(vma, pte);
}

static struct page *follow_page_pte(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmd, unsigned int flags,
                struct dev_pagemap **pgmap)
{
        struct mm_struct *mm = vma->vm_mm;
        struct page *page;
        spinlock_t *ptl;
        pte_t *ptep, pte;
        int ret;

        /* FOLL_GET and FOLL_PIN are mutually exclusive. */
        if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
                         (FOLL_PIN | FOLL_GET)))
                return ERR_PTR(-EINVAL);

        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (!ptep)
                return no_page_table(vma, flags);
        pte = ptep_get(ptep);
        if (!pte_present(pte))
                goto no_page;
        if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
                goto no_page;

        page = vm_normal_page(vma, address, pte);

        /*
         * We only care about anon pages in can_follow_write_pte() and don't
         * have to worry about pte_devmap() because they are never anon.
         */
        if ((flags & FOLL_WRITE) &&
            !can_follow_write_pte(pte, page, vma, flags)) {
                page = NULL;
                goto out;
        }

        if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
                /*
                 * Only return device mapping pages in the FOLL_GET or FOLL_PIN
                 * case since they are only valid while holding the pgmap
                 * reference.
                 */
                *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
                if (*pgmap)
                        page = pte_page(pte);
                else
                        goto no_page;
        } else if (unlikely(!page)) {
                if (flags & FOLL_DUMP) {
                        /* Avoid special (like zero) pages in core dumps */
                        page = ERR_PTR(-EFAULT);
                        goto out;
                }

                if (is_zero_pfn(pte_pfn(pte))) {
                        page = pte_page(pte);
                } else {
                        ret = follow_pfn_pte(vma, address, ptep, flags);
                        page = ERR_PTR(ret);
                        goto out;
                }
        }

        if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) {
                page = ERR_PTR(-EMLINK);
                goto out;
        }

        VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
                       !PageAnonExclusive(page), page);

        /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
        ret = try_grab_page(page, flags);
        if (unlikely(ret)) {
                page = ERR_PTR(ret);
                goto out;
        }

        /*
         * We need to make the page accessible if and only if we are going
         * to access its content (the FOLL_PIN case).  Please see
         * Documentation/core-api/pin_user_pages.rst for details.
         */
        if (flags & FOLL_PIN) {
                ret = arch_make_page_accessible(page);
                if (ret) {
                        unpin_user_page(page);
                        page = ERR_PTR(ret);
                        goto out;
                }
        }
        if (flags & FOLL_TOUCH) {
                if ((flags & FOLL_WRITE) &&
                    !pte_dirty(pte) && !PageDirty(page))
                        set_page_dirty(page);
                /*
                 * pte_mkyoung() would be more correct here, but atomic care
                 * is needed to avoid losing the dirty bit: it is easier to use
                 * mark_page_accessed().
                 */
                mark_page_accessed(page);
        }
out:
        pte_unmap_unlock(ptep, ptl);
        return page;
no_page:
        pte_unmap_unlock(ptep, ptl);
        if (!pte_none(pte))
                return NULL;
        return no_page_table(vma, flags);
}

static struct page *follow_pmd_mask(struct vm_area_struct *vma,
                                    unsigned long address, pud_t *pudp,
                                    unsigned int flags,
                                    struct follow_page_context *ctx)
{
        pmd_t *pmd, pmdval;
        spinlock_t *ptl;
        struct page *page;
        struct mm_struct *mm = vma->vm_mm;

        pmd = pmd_offset(pudp, address);
        pmdval = pmdp_get_lockless(pmd);
        if (pmd_none(pmdval))
                return no_page_table(vma, flags);
        if (!pmd_present(pmdval))
                return no_page_table(vma, flags);
        if (pmd_devmap(pmdval)) {
                ptl = pmd_lock(mm, pmd);
                page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
                spin_unlock(ptl);
                if (page)
                        return page;
                return no_page_table(vma, flags);
        }
        if (likely(!pmd_trans_huge(pmdval)))
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);

        if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
                return no_page_table(vma, flags);

        ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_present(*pmd))) {
                spin_unlock(ptl);
                return no_page_table(vma, flags);
        }
        if (unlikely(!pmd_trans_huge(*pmd))) {
                spin_unlock(ptl);
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
        if (flags & FOLL_SPLIT_PMD) {
                spin_unlock(ptl);
                split_huge_pmd(vma, pmd, address);
                /* If pmd was left empty, stuff a page table in there quickly */
                return pte_alloc(mm, pmd) ? ERR_PTR(-ENOMEM) :
                        follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
        page = follow_trans_huge_pmd(vma, address, pmd, flags);
        spin_unlock(ptl);
        ctx->page_mask = HPAGE_PMD_NR - 1;
        return page;
}

static struct page *follow_pud_mask(struct vm_area_struct *vma,
                                    unsigned long address, p4d_t *p4dp,
                                    unsigned int flags,
                                    struct follow_page_context *ctx)
{
        pud_t *pud;
        spinlock_t *ptl;
        struct page *page;
        struct mm_struct *mm = vma->vm_mm;

        pud = pud_offset(p4dp, address);
        if (pud_none(*pud))
                return no_page_table(vma, flags);
        if (pud_devmap(*pud)) {
                ptl = pud_lock(mm, pud);
                page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
                spin_unlock(ptl);
                if (page)
                        return page;
                return no_page_table(vma, flags);
        }
        if (unlikely(pud_bad(*pud)))
                return no_page_table(vma, flags);

        return follow_pmd_mask(vma, address, pud, flags, ctx);
}

static struct page *follow_p4d_mask(struct vm_area_struct *vma,
                                    unsigned long address, pgd_t *pgdp,
                                    unsigned int flags,
                                    struct follow_page_context *ctx)
{
        p4d_t *p4d;

        p4d = p4d_offset(pgdp, address);
        if (p4d_none(*p4d))
                return no_page_table(vma, flags);
        BUILD_BUG_ON(p4d_huge(*p4d));
        if (unlikely(p4d_bad(*p4d)))
                return no_page_table(vma, flags);

        return follow_pud_mask(vma, address, p4d, flags, ctx);
}

/**
 * follow_page_mask - look up a page descriptor from a user-virtual address
 * @vma: vm_area_struct mapping @address
 * @address: virtual address to look up
 * @flags: flags modifying lookup behaviour
 * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
 *       pointer to output page_mask
 *
 * @flags can have FOLL_ flags set, defined in <linux/mm.h>
 *
 * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
 * the device's dev_pagemap metadata to avoid repeating expensive lookups.
 *
 * When getting an anonymous page and the caller has to trigger unsharing
 * of a shared anonymous page first, -EMLINK is returned. The caller should
 * trigger a fault with FAULT_FLAG_UNSHARE set. Note that unsharing is only
 * relevant with FOLL_PIN and !FOLL_WRITE.
 *
 * On output, the @ctx->page_mask is set according to the size of the page.
 *
 * Return: the mapped (struct page *), %NULL if no mapping exists, or
 * an error pointer if there is a mapping to something not represented
 * by a page descriptor (see also vm_normal_page()).
 */
static struct page *follow_page_mask(struct vm_area_struct *vma,
                              unsigned long address, unsigned int flags,
                              struct follow_page_context *ctx)
{
        pgd_t *pgd;
        struct mm_struct *mm = vma->vm_mm;

        ctx->page_mask = 0;

        /*
         * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
         * special hugetlb page table walking code.  This eliminates the
         * need to check for hugetlb entries in the general walking code.
         */
        if (is_vm_hugetlb_page(vma))
                return hugetlb_follow_page_mask(vma, address, flags,
                                                &ctx->page_mask);

        pgd = pgd_offset(mm, address);

        if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
                return no_page_table(vma, flags);

        return follow_p4d_mask(vma, address, pgd, flags, ctx);
}

struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                         unsigned int foll_flags)
{
        struct follow_page_context ctx = { NULL };
        struct page *page;

        if (vma_is_secretmem(vma))
                return NULL;

        if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
                return NULL;

        /*
         * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
         * to fail on PROT_NONE-mapped pages.
         */
        page = follow_page_mask(vma, address, foll_flags, &ctx);
        if (ctx.pgmap)
                put_dev_pagemap(ctx.pgmap);
        return page;
}

static int get_gate_page(struct mm_struct *mm, unsigned long address,
                unsigned int gup_flags, struct vm_area_struct **vma,
                struct page **page)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
        pte_t entry;
        int ret = -EFAULT;

        /* user gate pages are read-only */
        if (gup_flags & FOLL_WRITE)
                return -EFAULT;
        if (address > TASK_SIZE)
                pgd = pgd_offset_k(address);
        else
                pgd = pgd_offset_gate(mm, address);
        if (pgd_none(*pgd))
                return -EFAULT;
        p4d = p4d_offset(pgd, address);
        if (p4d_none(*p4d))
                return -EFAULT;
        pud = pud_offset(p4d, address);
        if (pud_none(*pud))
                return -EFAULT;
        pmd = pmd_offset(pud, address);
        if (!pmd_present(*pmd))
                return -EFAULT;
        pte = pte_offset_map(pmd, address);
        if (!pte)
                return -EFAULT;
        entry = ptep_get(pte);
        if (pte_none(entry))
                goto unmap;
        *vma = get_gate_vma(mm);
        if (!page)
                goto out;
        *page = vm_normal_page(*vma, address, entry);
        if (!*page) {
                if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(entry)))
                        goto unmap;
                *page = pte_page(entry);
        }
        ret = try_grab_page(*page, gup_flags);
        if (unlikely(ret))
                goto unmap;
out:
        ret = 0;
unmap:
        pte_unmap(pte);
        return ret;
}

/*
 * mmap_lock must be held on entry.  If @flags has FOLL_UNLOCKABLE but not
 * FOLL_NOWAIT, the mmap_lock may be released.  If it is, *@locked will be set
 * to 0 and -EBUSY returned.
 */
static int faultin_page(struct vm_area_struct *vma,
                unsigned long address, unsigned int *flags, bool unshare,
                int *locked)
{
        unsigned int fault_flags = 0;
        vm_fault_t ret;

        if (*flags & FOLL_NOFAULT)
                return -EFAULT;
        if (*flags & FOLL_WRITE)
                fault_flags |= FAULT_FLAG_WRITE;
        if (*flags & FOLL_REMOTE)
                fault_flags |= FAULT_FLAG_REMOTE;
        if (*flags & FOLL_UNLOCKABLE) {
                fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
                /*
                 * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set
                 * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE.
                 * That's because some callers may not be prepared to
                 * handle early exits caused by non-fatal signals.
                 */
                if (*flags & FOLL_INTERRUPTIBLE)
                        fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
        }
        if (*flags & FOLL_NOWAIT)
                fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
        if (*flags & FOLL_TRIED) {
                /*
                 * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED
                 * can co-exist
                 */
                fault_flags |= FAULT_FLAG_TRIED;
        }
        if (unshare) {
                fault_flags |= FAULT_FLAG_UNSHARE;
                /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
                VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
        }

        ret = handle_mm_fault(vma, address, fault_flags, NULL);

        if (ret & VM_FAULT_COMPLETED) {
                /*
                 * With FAULT_FLAG_RETRY_NOWAIT we'll never release the
                 * mmap lock in the page fault handler. Sanity check this.
                 */
                WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT);
                *locked = 0;

                /*
                 * We should do the same as VM_FAULT_RETRY, but let's not
                 * return -EBUSY since that's not reflecting the reality of
                 * what has happened - we've just fully completed a page
                 * fault, with the mmap lock released.  Use -EAGAIN to show
                 * that we want to take the mmap lock _again_.
                 */
                return -EAGAIN;
        }

        if (ret & VM_FAULT_ERROR) {
                int err = vm_fault_to_errno(ret, *flags);

                if (err)
                        return err;
                BUG();
        }

        if (ret & VM_FAULT_RETRY) {
                if (!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
                        *locked = 0;
                return -EBUSY;
        }

        return 0;
}

/*
 * Writing to file-backed mappings which require folio dirty tracking using GUP
 * is a fundamentally broken operation, as kernel write access to GUP mappings
 * do not adhere to the semantics expected by a file system.
 *
 * Consider the following scenario:-
 *
 * 1. A folio is written to via GUP which write-faults the memory, notifying
 *    the file system and dirtying the folio.
 * 2. Later, writeback is triggered, resulting in the folio being cleaned and
 *    the PTE being marked read-only.
 * 3. The GUP caller writes to the folio, as it is mapped read/write via the
 *    direct mapping.
 * 4. The GUP caller, now done with the page, unpins it and sets it dirty
 *    (though it does not have to).
 *
 * This results in both data being written to a folio without writenotify, and
 * the folio being dirtied unexpectedly (if the caller decides to do so).
 */
static bool writable_file_mapping_allowed(struct vm_area_struct *vma,
                                          unsigned long gup_flags)
{
        /*
         * If we aren't pinning then no problematic write can occur. A long term
         * pin is the most egregious case so this is the case we disallow.
         */
        if ((gup_flags & (FOLL_PIN | FOLL_LONGTERM)) !=
            (FOLL_PIN | FOLL_LONGTERM))
                return true;

        /*
         * If the VMA does not require dirty tracking then no problematic write
         * can occur either.
         */
        return !vma_needs_dirty_tracking(vma);
}

static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
{
        vm_flags_t vm_flags = vma->vm_flags;
        int write = (gup_flags & FOLL_WRITE);
        int foreign = (gup_flags & FOLL_REMOTE);
        bool vma_anon = vma_is_anonymous(vma);

        if (vm_flags & (VM_IO | VM_PFNMAP))
                return -EFAULT;

        if ((gup_flags & FOLL_ANON) && !vma_anon)
                return -EFAULT;

        if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
                return -EOPNOTSUPP;

        if (vma_is_secretmem(vma))
                return -EFAULT;

        if (write) {
                if (!vma_anon &&
                    !writable_file_mapping_allowed(vma, gup_flags))
                        return -EFAULT;

                if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
                        if (!(gup_flags & FOLL_FORCE))
                                return -EFAULT;
                        /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
                        if (is_vm_hugetlb_page(vma))
                                return -EFAULT;
                        /*
                         * We used to let the write,force case do COW in a
                         * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
                         * set a breakpoint in a read-only mapping of an
                         * executable, without corrupting the file (yet only
                         * when that file had been opened for writing!).
                         * Anon pages in shared mappings are surprising: now
                         * just reject it.
                         */
                        if (!is_cow_mapping(vm_flags))
                                return -EFAULT;
                }
        } else if (!(vm_flags & VM_READ)) {
                if (!(gup_flags & FOLL_FORCE))
                        return -EFAULT;
                /*
                 * Is there actually any vma we can reach here which does not
                 * have VM_MAYREAD set?
                 */
                if (!(vm_flags & VM_MAYREAD))
                        return -EFAULT;
        }
        /*
         * gups are always data accesses, not instruction
         * fetches, so execute=false here
         */
        if (!arch_vma_access_permitted(vma, write, false, foreign))
                return -EFAULT;
        return 0;
}

/*
 * This is "vma_lookup()", but with a warning if we would have
 * historically expanded the stack in the GUP code.
 */
static struct vm_area_struct *gup_vma_lookup(struct mm_struct *mm,
         unsigned long addr)
{
#ifdef CONFIG_STACK_GROWSUP
        return vma_lookup(mm, addr);
#else
        static volatile unsigned long next_warn;
        struct vm_area_struct *vma;
        unsigned long now, next;

        vma = find_vma(mm, addr);
        if (!vma || (addr >= vma->vm_start))
                return vma;

        /* Only warn for half-way relevant accesses */
        if (!(vma->vm_flags & VM_GROWSDOWN))
                return NULL;
        if (vma->vm_start - addr > 65536)
                return NULL;

        /* Let's not warn more than once an hour.. */
        now = jiffies; next = next_warn;
        if (next && time_before(now, next))
                return NULL;
        next_warn = now + 60*60*HZ;

        /* Let people know things may have changed. */
        pr_warn("GUP no longer grows the stack in %s (%d): %lx-%lx (%lx)\n",
                current->comm, task_pid_nr(current),
                vma->vm_start, vma->vm_end, addr);
        dump_stack();
        return NULL;
#endif
}

/**
 * __get_user_pages() - pin user pages in memory
 * @mm:                mm_struct of target mm
 * @start:        starting user address
 * @nr_pages:        number of pages from start to pin
 * @gup_flags:        flags modifying pin behaviour
 * @pages:        array that receives pointers to the pages pinned.
 *                Should be at least nr_pages long. Or NULL, if caller
 *                only intends to ensure the pages are faulted in.
 * @locked:     whether we're still with the mmap_lock held
 *
 * Returns either number of pages pinned (which may be less than the
 * number requested), or an error. Details about the return value:
 *
 * -- If nr_pages is 0, returns 0.
 * -- If nr_pages is >0, but no pages were pinned, returns -errno.
 * -- If nr_pages is >0, and some pages were pinned, returns the number of
 *    pages pinned. Again, this may be less than nr_pages.
 * -- 0 return value is possible when the fault would need to be retried.
 *
 * The caller is responsible for releasing returned @pages, via put_page().
 *
 * Must be called with mmap_lock held.  It may be released.  See below.
 *
 * __get_user_pages walks a process's page tables and takes a reference to
 * each struct page that each user address corresponds to at a given
 * instant. That is, it takes the page that would be accessed if a user
 * thread accesses the given user virtual address at that instant.
 *
 * This does not guarantee that the page exists in the user mappings when
 * __get_user_pages returns, and there may even be a completely different
 * page there in some cases (eg. if mmapped pagecache has been invalidated
 * and subsequently re-faulted). However it does guarantee that the page
 * won't be freed completely. And mostly callers simply care that the page
 * contains data that was valid *at some point in time*. Typically, an IO
 * or similar operation cannot guarantee anything stronger anyway because
 * locks can't be held over the syscall boundary.
 *
 * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
 * the page is written to, set_page_dirty (or set_page_dirty_lock, as
 * appropriate) must be called after the page is finished with, and
 * before put_page is called.
 *
 * If FOLL_UNLOCKABLE is set without FOLL_NOWAIT then the mmap_lock may
 * be released. If this happens *@locked will be set to 0 on return.
 *
 * A caller using such a combination of @gup_flags must therefore hold the
 * mmap_lock for reading only, and recognize when it's been released. Otherwise,
 * it must be held for either reading or writing and will not be released.
 *
 * In most cases, get_user_pages or get_user_pages_fast should be used
 * instead of __get_user_pages. __get_user_pages should be used only if
 * you need some special @gup_flags.
 */
static long __get_user_pages(struct mm_struct *mm,
                unsigned long start, unsigned long nr_pages,
                unsigned int gup_flags, struct page **pages,
                int *locked)
{
        long ret = 0, i = 0;
        struct vm_area_struct *vma = NULL;
        struct follow_page_context ctx = { NULL };

        if (!nr_pages)
                return 0;

        start = untagged_addr_remote(mm, start);

        VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));

        do {
                struct page *page;
                unsigned int foll_flags = gup_flags;
                unsigned int page_increm;

                /* first iteration or cross vma bound */
                if (!vma || start >= vma->vm_end) {
                        /*
                         * MADV_POPULATE_(READ|WRITE) wants to handle VMA
                         * lookups+error reporting differently.
                         */
                        if (gup_flags & FOLL_MADV_POPULATE) {
                                vma = vma_lookup(mm, start);
                                if (!vma) {
                                        ret = -ENOMEM;
                                        goto out;
                                }
                                if (check_vma_flags(vma, gup_flags)) {
                                        ret = -EINVAL;
                                        goto out;
                                }
                                goto retry;
                        }
                        vma = gup_vma_lookup(mm, start);
                        if (!vma && in_gate_area(mm, start)) {
                                ret = get_gate_page(mm, start & PAGE_MASK,
                                                gup_flags, &vma,
                                                pages ? &page : NULL);
                                if (ret)
                                        goto out;
                                ctx.page_mask = 0;
                                goto next_page;
                        }

                        if (!vma) {
                                ret = -EFAULT;
                                goto out;
                        }
                        ret = check_vma_flags(vma, gup_flags);
                        if (ret)
                                goto out;
                }
retry:
                /*
                 * If we have a pending SIGKILL, don't keep faulting pages and
                 * potentially allocating memory.
                 */
                if (fatal_signal_pending(current)) {
                        ret = -EINTR;
                        goto out;
                }
                cond_resched();

                page = follow_page_mask(vma, start, foll_flags, &ctx);
                if (!page || PTR_ERR(page) == -EMLINK) {
                        ret = faultin_page(vma, start, &foll_flags,
                                           PTR_ERR(page) == -EMLINK, locked);
                        switch (ret) {
                        case 0:
                                goto retry;
                        case -EBUSY:
                        case -EAGAIN:
                                ret = 0;
                                fallthrough;
                        case -EFAULT:
                        case -ENOMEM:
                        case -EHWPOISON:
                                goto out;
                        }
                        BUG();
                } else if (PTR_ERR(page) == -EEXIST) {
                        /*
                         * Proper page table entry exists, but no corresponding
                         * struct page. If the caller expects **pages to be
                         * filled in, bail out now, because that can't be done
                         * for this page.
                         */
                        if (pages) {
                                ret = PTR_ERR(page);
                                goto out;
                        }
                } else if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        goto out;
                }
next_page:
                page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
                if (page_increm > nr_pages)
                        page_increm = nr_pages;

                if (pages) {
                        struct page *subpage;
                        unsigned int j;

                        /*
                         * This must be a large folio (and doesn't need to
                         * be the whole folio; it can be part of it), do
                         * the refcount work for all the subpages too.
                         *
                         * NOTE: here the page may not be the head page
                         * e.g. when start addr is not thp-size aligned.
                         * try_grab_folio() should have taken care of tail
                         * pages.
                         */
                        if (page_increm > 1) {
                                struct folio *folio;

                                /*
                                 * Since we already hold refcount on the
                                 * large folio, this should never fail.
                                 */
                                folio = try_grab_folio(page, page_increm - 1,
                                                       foll_flags);
                                if (WARN_ON_ONCE(!folio)) {
                                        /*
                                         * Release the 1st page ref if the
                                         * folio is problematic, fail hard.
                                         */
                                        gup_put_folio(page_folio(page), 1,
                                                      foll_flags);
                                        ret = -EFAULT;
                                        goto out;
                                }
                        }

                        for (j = 0; j < page_increm; j++) {
                                subpage = nth_page(page, j);
                                pages[i + j] = subpage;
                                flush_anon_page(vma, subpage, start + j * PAGE_SIZE);
                                flush_dcache_page(subpage);
                        }
                }

                i += page_increm;
                start += page_increm * PAGE_SIZE;
                nr_pages -= page_increm;
        } while (nr_pages);
out:
        if (ctx.pgmap)
                put_dev_pagemap(ctx.pgmap);
        return i ? i : ret;
}

static bool vma_permits_fault(struct vm_area_struct *vma,
                              unsigned int fault_flags)
{
        bool write   = !!(fault_flags & FAULT_FLAG_WRITE);
        bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
        vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;

        if (!(vm_flags & vma->vm_flags))
                return false;

        /*
         * The architecture might have a hardware protection
         * mechanism other than read/write that can deny access.
         *
         * gup always represents data access, not instruction
         * fetches, so execute=false here:
         */
        if (!arch_vma_access_permitted(vma, write, false, foreign))
                return false;

        return true;
}

/**
 * fixup_user_fault() - manually resolve a user page fault
 * @mm:                mm_struct of target mm
 * @address:        user address
 * @fault_flags:flags to pass down to handle_mm_fault()
 * @unlocked:        did we unlock the mmap_lock while retrying, maybe NULL if caller
 *                does not allow retry. If NULL, the caller must guarantee
 *                that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
 *
 * This is meant to be called in the specific scenario where for locking reasons
 * we try to access user memory in atomic context (within a pagefault_disable()
 * section), this returns -EFAULT, and we want to resolve the user fault before
 * trying again.
 *
 * Typically this is meant to be used by the futex code.
 *
 * The main difference with get_user_pages() is that this function will
 * unconditionally call handle_mm_fault() which will in turn perform all the
 * necessary SW fixup of the dirty and young bits in the PTE, while
 * get_user_pages() only guarantees to update these in the struct page.
 *
 * This is important for some architectures where those bits also gate the
 * access permission to the page because they are maintained in software.  On
 * such architectures, gup() will not be enough to make a subsequent access
 * succeed.
 *
 * This function will not return with an unlocked mmap_lock. So it has not the
 * same semantics wrt the @mm->mmap_lock as does filemap_fault().
 */
int fixup_user_fault(struct mm_struct *mm,
                     unsigned long address, unsigned int fault_flags,
                     bool *unlocked)
{
        struct vm_area_struct *vma;
        vm_fault_t ret;

        address = untagged_addr_remote(mm, address);

        if (unlocked)
                fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

retry:
        vma = gup_vma_lookup(mm, address);
        if (!vma)
                return -EFAULT;

        if (!vma_permits_fault(vma, fault_flags))
                return -EFAULT;

        if ((fault_flags & FAULT_FLAG_KILLABLE) &&
            fatal_signal_pending(current))
                return -EINTR;

        ret = handle_mm_fault(vma, address, fault_flags, NULL);

        if (ret & VM_FAULT_COMPLETED) {
                /*
                 * NOTE: it's a pity that we need to retake the lock here
                 * to pair with the unlock() in the callers. Ideally we
                 * could tell the callers so they do not need to unlock.
                 */
                mmap_read_lock(mm);
                *unlocked = true;
                return 0;
        }

        if (ret & VM_FAULT_ERROR) {
                int err = vm_fault_to_errno(ret, 0);

                if (err)
                        return err;
                BUG();
        }

        if (ret & VM_FAULT_RETRY) {
                mmap_read_lock(mm);
                *unlocked = true;
                fault_flags |= FAULT_FLAG_TRIED;
                goto retry;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(fixup_user_fault);

/*
 * GUP always responds to fatal signals.  When FOLL_INTERRUPTIBLE is
 * specified, it'll also respond to generic signals.  The caller of GUP
 * that has FOLL_INTERRUPTIBLE should take care of the GUP interruption.
 */
static bool gup_signal_pending(unsigned int flags)
{
        if (fatal_signal_pending(current))
                return true;

        if (!(flags & FOLL_INTERRUPTIBLE))
                return false;

        return signal_pending(current);
}

/*
 * Locking: (*locked == 1) means that the mmap_lock has already been acquired by
 * the caller. This function may drop the mmap_lock. If it does so, then it will
 * set (*locked = 0).
 *
 * (*locked == 0) means that the caller expects this function to acquire and
 * drop the mmap_lock. Therefore, the value of *locked will still be zero when
 * the function returns, even though it may have changed temporarily during
 * function execution.
 *
 * Please note that this function, unlike __get_user_pages(), will not return 0
 * for nr_pages > 0, unless FOLL_NOWAIT is used.
 */
static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
                                                unsigned long start,
                                                unsigned long nr_pages,
                                                struct page **pages,
                                                int *locked,
                                                unsigned int flags)
{
        long ret, pages_done;
        bool must_unlock = false;

        if (!nr_pages)
                return 0;

        /*
         * The internal caller expects GUP to manage the lock internally and the
         * lock must be released when this returns.
         */
        if (!*locked) {
                if (mmap_read_lock_killable(mm))
                        return -EAGAIN;
                must_unlock = true;
                *locked = 1;
        }
        else
                mmap_assert_locked(mm);

        if (flags & FOLL_PIN)
                mm_set_has_pinned_flag(&mm->flags);

        /*
         * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
         * is to set FOLL_GET if the caller wants pages[] filled in (but has
         * carelessly failed to specify FOLL_GET), so keep doing that, but only
         * for FOLL_GET, not for the newer FOLL_PIN.
         *
         * FOLL_PIN always expects pages to be non-null, but no need to assert
         * that here, as any failures will be obvious enough.
         */
        if (pages && !(flags & FOLL_PIN))
                flags |= FOLL_GET;

        pages_done = 0;
        for (;;) {
                ret = __get_user_pages(mm, start, nr_pages, flags, pages,
                                       locked);
                if (!(flags & FOLL_UNLOCKABLE)) {
                        /* VM_FAULT_RETRY couldn't trigger, bypass */
                        pages_done = ret;
                        break;
                }

                /* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
                if (!*locked) {
                        BUG_ON(ret < 0);
                        BUG_ON(ret >= nr_pages);
                }

                if (ret > 0) {
                        nr_pages -= ret;
                        pages_done += ret;
                        if (!nr_pages)
                                break;
                }
                if (*locked) {
                        /*
                         * VM_FAULT_RETRY didn't trigger or it was a
                         * FOLL_NOWAIT.
                         */
                        if (!pages_done)
                                pages_done = ret;
                        break;
                }
                /*
                 * VM_FAULT_RETRY triggered, so seek to the faulting offset.
                 * For the prefault case (!pages) we only update counts.
                 */
                if (likely(pages))
                        pages += ret;
                start += ret << PAGE_SHIFT;

                /* The lock was temporarily dropped, so we must unlock later */
                must_unlock = true;

retry:
                /*
                 * Repeat on the address that fired VM_FAULT_RETRY
                 * with both FAULT_FLAG_ALLOW_RETRY and
                 * FAULT_FLAG_TRIED.  Note that GUP can be interrupted
                 * by fatal signals of even common signals, depending on
                 * the caller's request. So we need to check it before we
                 * start trying again otherwise it can loop forever.
                 */
                if (gup_signal_pending(flags)) {
                        if (!pages_done)
                                pages_done = -EINTR;
                        break;
                }

                ret = mmap_read_lock_killable(mm);
                if (ret) {
                        BUG_ON(ret > 0);
                        if (!pages_done)
                                pages_done = ret;
                        break;
                }

                *locked = 1;
                ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
                                       pages, locked);
                if (!*locked) {
                        /* Continue to retry until we succeeded */
                        BUG_ON(ret != 0);
                        goto retry;
                }
                if (ret != 1) {
                        BUG_ON(ret > 1);
                        if (!pages_done)
                                pages_done = ret;
                        break;
                }
                nr_pages--;
                pages_done++;
                if (!nr_pages)
                        break;
                if (likely(pages))
                        pages++;
                start += PAGE_SIZE;
        }
        if (must_unlock && *locked) {
                /*
                 * We either temporarily dropped the lock, or the caller
                 * requested that we both acquire and drop the lock. Either way,
                 * we must now unlock, and notify the caller of that state.
                 */
                mmap_read_unlock(mm);
                *locked = 0;
        }

        /*
         * Failing to pin anything implies something has gone wrong (except when
         * FOLL_NOWAIT is specified).
         */
        if (WARN_ON_ONCE(pages_done == 0 && !(flags & FOLL_NOWAIT)))
                return -EFAULT;

        return pages_done;
}

/**
 * populate_vma_page_range() -  populate a range of pages in the vma.
 * @vma:   target vma
 * @start: start address
 * @end:   end address
 * @locked: whether the mmap_lock is still held
 *
 * This takes care of mlocking the pages too if VM_LOCKED is set.
 *
 * Return either number of pages pinned in the vma, or a negative error
 * code on error.
 *
 * vma->vm_mm->mmap_lock must be held.
 *
 * If @locked is NULL, it may be held for read or write and will
 * be unperturbed.
 *
 * If @locked is non-NULL, it must held for read only and may be
 * released.  If it's released, *@locked will be set to 0.
 */
long populate_vma_page_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *locked)
{
        struct mm_struct *mm = vma->vm_mm;
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int local_locked = 1;
        int gup_flags;
        long ret;

        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
        VM_BUG_ON_VMA(start < vma->vm_start, vma);
        VM_BUG_ON_VMA(end   > vma->vm_end, vma);
        mmap_assert_locked(mm);

        /*
         * Rightly or wrongly, the VM_LOCKONFAULT case has never used
         * faultin_page() to break COW, so it has no work to do here.
         */
        if (vma->vm_flags & VM_LOCKONFAULT)
                return nr_pages;

        /* ... similarly, we've never faulted in PROT_NONE pages */
        if (!vma_is_accessible(vma))
                return -EFAULT;

        gup_flags = FOLL_TOUCH;
        /*
         * We want to touch writable mappings with a write fault in order
         * to break COW, except for shared mappings because these don't COW
         * and we would not want to dirty them for nothing.
         *
         * Otherwise, do a read fault, and use FOLL_FORCE in case it's not
         * readable (ie write-only or executable).
         */
        if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
                gup_flags |= FOLL_WRITE;
        else
                gup_flags |= FOLL_FORCE;

        if (locked)
                gup_flags |= FOLL_UNLOCKABLE;

        /*
         * We made sure addr is within a VMA, so the following will
         * not result in a stack expansion that recurses back here.
         */
        ret = __get_user_pages(mm, start, nr_pages, gup_flags,
                               NULL, locked ? locked : &local_locked);
        lru_add_drain();
        return ret;
}

/*
 * faultin_page_range() - populate (prefault) page tables inside the
 *                          given range readable/writable
 *
 * This takes care of mlocking the pages, too, if VM_LOCKED is set.
 *
 * @mm: the mm to populate page tables in
 * @start: start address
 * @end: end address
 * @write: whether to prefault readable or writable
 * @locked: whether the mmap_lock is still held
 *
 * Returns either number of processed pages in the MM, or a negative error
 * code on error (see __get_user_pages()). Note that this function reports
 * errors related to VMAs, such as incompatible mappings, as expected by
 * MADV_POPULATE_(READ|WRITE).
 *
 * The range must be page-aligned.
 *
 * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
 */
long faultin_page_range(struct mm_struct *mm, unsigned long start,
                        unsigned long end, bool write, int *locked)
{
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int gup_flags;
        long ret;

        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
        mmap_assert_locked(mm);

        /*
         * FOLL_TOUCH: Mark page accessed and thereby young; will also mark
         *               the page dirty with FOLL_WRITE -- which doesn't make a
         *               difference with !FOLL_FORCE, because the page is writable
         *               in the page table.
         * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
         *                  a poisoned page.
         * !FOLL_FORCE: Require proper access permissions.
         */
        gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
                    FOLL_MADV_POPULATE;
        if (write)
                gup_flags |= FOLL_WRITE;

        ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
                                      gup_flags);
        lru_add_drain();
        return ret;
}

/*
 * __mm_populate - populate and/or mlock pages within a range of address space.
 *
 * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
 * flags. VMAs must be already marked with the desired vm_flags, and
 * mmap_lock must not be held.
 */
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
        struct mm_struct *mm = current->mm;
        unsigned long end, nstart, nend;
        struct vm_area_struct *vma = NULL;
        int locked = 0;
        long ret = 0;

        end = start + len;

        for (nstart = start; nstart < end; nstart = nend) {
                /*
                 * We want to fault in pages for [nstart; end) address range.
                 * Find first corresponding VMA.
                 */
                if (!locked) {
                        locked = 1;
                        mmap_read_lock(mm);
                        vma = find_vma_intersection(mm, nstart, end);
                } else if (nstart >= vma->vm_end)
                        vma = find_vma_intersection(mm, vma->vm_end, end);

                if (!vma)
                        break;
                /*
                 * Set [nstart; nend) to intersection of desired address
                 * range with the first VMA. Also, skip undesirable VMA types.
                 */
                nend = min(end, vma->vm_end);
                if (vma->vm_flags & (VM_IO | VM_PFNMAP))
                        continue;
                if (nstart < vma->vm_start)
                        nstart = vma->vm_start;
                /*
                 * Now fault in a range of pages. populate_vma_page_range()
                 * double checks the vma flags, so that it won't mlock pages
                 * if the vma was already munlocked.
                 */
                ret = populate_vma_page_range(vma, nstart, nend, &locked);
                if (ret < 0) {
                        if (ignore_errors) {
                                ret = 0;
                                continue;        /* continue at next VMA */
                        }
                        break;
                }
                nend = nstart + ret * PAGE_SIZE;
                ret = 0;
        }
        if (locked)
                mmap_read_unlock(mm);
        return ret;        /* 0 or negative error code */
}
#else /* CONFIG_MMU */
static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
                unsigned long nr_pages, struct page **pages,
                int *locked, unsigned int foll_flags)
{
        struct vm_area_struct *vma;
        bool must_unlock = false;
        unsigned long vm_flags;
        long i;

        if (!nr_pages)
                return 0;

        /*
         * The internal caller expects GUP to manage the lock internally and the
         * lock must be released when this returns.
         */
        if (!*locked) {
                if (mmap_read_lock_killable(mm))
                        return -EAGAIN;
                must_unlock = true;
                *locked = 1;
        }

        /* calculate required read or write permissions.
         * If FOLL_FORCE is set, we only require the "MAY" flags.
         */
        vm_flags  = (foll_flags & FOLL_WRITE) ?
                        (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
        vm_flags &= (foll_flags & FOLL_FORCE) ?
                        (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);

        for (i = 0; i < nr_pages; i++) {
                vma = find_vma(mm, start);
                if (!vma)
                        break;

                /* protect what we can, including chardevs */
                if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
                    !(vm_flags & vma->vm_flags))
                        break;

                if (pages) {
                        pages[i] = virt_to_page((void *)start);
                        if (pages[i])
                                get_page(pages[i]);
                }

                start = (start + PAGE_SIZE) & PAGE_MASK;
        }

        if (must_unlock && *locked) {
                mmap_read_unlock(mm);
                *locked = 0;
        }

        return i ? : -EFAULT;
}
#endif /* !CONFIG_MMU */

/**
 * fault_in_writeable - fault in userspace address range for writing
 * @uaddr: start of address range
 * @size: size of address range
 *
 * Returns the number of bytes not faulted in (like copy_to_user() and
 * copy_from_user()).
 */
size_t fault_in_writeable(char __user *uaddr, size_t size)
{
        char __user *start = uaddr, *end;

        if (unlikely(size == 0))
                return 0;
        if (!user_write_access_begin(uaddr, size))
                return size;
        if (!PAGE_ALIGNED(uaddr)) {
                unsafe_put_user(0, uaddr, out);
                uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
        }
        end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
        if (unlikely(end < start))
                end = NULL;
        while (uaddr != end) {
                unsafe_put_user(0, uaddr, out);
                uaddr += PAGE_SIZE;
        }

out:
        user_write_access_end();
        if (size > uaddr - start)
                return size - (uaddr - start);
        return 0;
}
EXPORT_SYMBOL(fault_in_writeable);

/**
 * fault_in_subpage_writeable - fault in an address range for writing
 * @uaddr: start of address range
 * @size: size of address range
 *
 * Fault in a user address range for writing while checking for permissions at
 * sub-page granularity (e.g. arm64 MTE). This function should be used when
 * the caller cannot guarantee forward progress of a copy_to_user() loop.
 *
 * Returns the number of bytes not faulted in (like copy_to_user() and
 * copy_from_user()).
 */
size_t fault_in_subpage_writeable(char __user *uaddr, size_t size)
{
        size_t faulted_in;

        /*
         * Attempt faulting in at page granularity first for page table
         * permission checking. The arch-specific probe_subpage_writeable()
         * functions may not check for this.
         */
        faulted_in = size - fault_in_writeable(uaddr, size);
        if (faulted_in)
                faulted_in -= probe_subpage_writeable(uaddr, faulted_in);

        return size - faulted_in;
}
EXPORT_SYMBOL(fault_in_subpage_writeable);

/*
 * fault_in_safe_writeable - fault in an address range for writing
 * @uaddr: start of address range
 * @size: length of address range
 *
 * Faults in an address range for writing.  This is primarily useful when we
 * already know that some or all of the pages in the address range aren't in
 * memory.
 *
 * Unlike fault_in_writeable(), this function is non-destructive.
 *
 * Note that we don't pin or otherwise hold the pages referenced that we fault
 * in.  There's no guarantee that they'll stay in memory for any duration of
 * time.
 *
 * Returns the number of bytes not faulted in, like copy_to_user() and
 * copy_from_user().
 */
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
{
        unsigned long start = (unsigned long)uaddr, end;
        struct mm_struct *mm = current->mm;
        bool unlocked = false;

        if (unlikely(size == 0))
                return 0;
        end = PAGE_ALIGN(start + size);
        if (end < start)
                end = 0;

        mmap_read_lock(mm);
        do {
                if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
                        break;
                start = (start + PAGE_SIZE) & PAGE_MASK;
        } while (start != end);
        mmap_read_unlock(mm);

        if (size > (unsigned long)uaddr - start)
                return size - ((unsigned long)uaddr - start);
        return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);

/**
 * fault_in_readable - fault in userspace address range for reading
 * @uaddr: start of user address range
 * @size: size of user address range
 *
 * Returns the number of bytes not faulted in (like copy_to_user() and
 * copy_from_user()).
 */
size_t fault_in_readable(const char __user *uaddr, size_t size)
{
        const char __user *start = uaddr, *end;
        volatile char c;

        if (unlikely(size == 0))
                return 0;
        if (!user_read_access_begin(uaddr, size))
                return size;
        if (!PAGE_ALIGNED(uaddr)) {
                unsafe_get_user(c, uaddr, out);
                uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
        }
        end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
        if (unlikely(end < start))
                end = NULL;
        while (uaddr != end) {
                unsafe_get_user(c, uaddr, out);
                uaddr += PAGE_SIZE;
        }

out:
        user_read_access_end();
        (void)c;
        if (size > uaddr - start)
                return size - (uaddr - start);
        return 0;
}
EXPORT_SYMBOL(fault_in_readable);

/**
 * get_dump_page() - pin user page in memory while writing it to core dump
 * @addr: user address
 *
 * Returns struct page pointer of user page pinned for dump,
 * to be freed afterwards by put_page().
 *
 * Returns NULL on any kind of failure - a hole must then be inserted into
 * the corefile, to preserve alignment with its headers; and also returns
 * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
 * allowing a hole to be left in the corefile to save disk space.
 *
 * Called without mmap_lock (takes and releases the mmap_lock by itself).
 */
#ifdef CONFIG_ELF_CORE
struct page *get_dump_page(unsigned long addr)
{
        struct page *page;
        int locked = 0;
        int ret;

        ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
                                      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
        return (ret == 1) ? page : NULL;
}
#endif /* CONFIG_ELF_CORE */

#ifdef CONFIG_MIGRATION
/*
 * Returns the number of collected pages. Return value is always >= 0.
 */
static unsigned long collect_longterm_unpinnable_pages(
                                        struct list_head *movable_page_list,
                                        unsigned long nr_pages,
                                        struct page **pages)
{
        unsigned long i, collected = 0;
        struct folio *prev_folio = NULL;
        bool drain_allow = true;

        for (i = 0; i < nr_pages; i++) {
                struct folio *folio = page_folio(pages[i]);

                if (folio == prev_folio)
                        continue;
                prev_folio = folio;

                if (folio_is_longterm_pinnable(folio))
                        continue;

                collected++;

                if (folio_is_device_coherent(folio))
                        continue;

                if (folio_test_hugetlb(folio)) {
                        isolate_hugetlb(folio, movable_page_list);
                        continue;
                }

                if (!folio_test_lru(folio) && drain_allow) {
                        lru_add_drain_all();
                        drain_allow = false;
                }

                if (!folio_isolate_lru(folio))
                        continue;

                list_add_tail(&folio->lru, movable_page_list);
                node_stat_mod_folio(folio,
                                    NR_ISOLATED_ANON + folio_is_file_lru(folio),
                                    folio_nr_pages(folio));
        }

        return collected;
}

/*
 * Unpins all pages and migrates device coherent pages and movable_page_list.
 * Returns -EAGAIN if all pages were successfully migrated or -errno for failure
 * (or partial success).
 */
static int migrate_longterm_unpinnable_pages(
                                        struct list_head *movable_page_list,
                                        unsigned long nr_pages,
                                        struct page **pages)
{
        int ret;
        unsigned long i;

        for (i = 0; i < nr_pages; i++) {
                struct folio *folio = page_folio(pages[i]);

                if (folio_is_device_coherent(folio)) {
                        /*
                         * Migration will fail if the page is pinned, so convert
                         * the pin on the source page to a normal reference.
                         */
                        pages[i] = NULL;
                        folio_get(folio);
                        gup_put_folio(folio, 1, FOLL_PIN);

                        if (migrate_device_coherent_page(&folio->page)) {
                                ret = -EBUSY;
                                goto err;
                        }

                        continue;
                }

                /*
                 * We can't migrate pages with unexpected references, so drop
                 * the reference obtained by __get_user_pages_locked().
                 * Migrating pages have been added to movable_page_list after
                 * calling folio_isolate_lru() which takes a reference so the
                 * page won't be freed if it's migrating.
                 */
                unpin_user_page(pages[i]);
                pages[i] = NULL;
        }

        if (!list_empty(movable_page_list)) {
                struct migration_target_control mtc = {
                        .nid = NUMA_NO_NODE,
                        .gfp_mask = GFP_USER | __GFP_NOWARN,
                };

                if (migrate_pages(movable_page_list, alloc_migration_target,
                                  NULL, (unsigned long)&mtc, MIGRATE_SYNC,
                                  MR_LONGTERM_PIN, NULL)) {
                        ret = -ENOMEM;
                        goto err;
                }
        }

        putback_movable_pages(movable_page_list);

        return -EAGAIN;

err:
        for (i = 0; i < nr_pages; i++)
                if (pages[i])
                        unpin_user_page(pages[i]);
        putback_movable_pages(movable_page_list);

        return ret;
}

/*
 * Check whether all pages are *allowed* to be pinned. Rather confusingly, all
 * pages in the range are required to be pinned via FOLL_PIN, before calling
 * this routine.
 *
 * If any pages in the range are not allowed to be pinned, then this routine
 * will migrate those pages away, unpin all the pages in the range and return
 * -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then
 * call this routine again.
 *
 * If an error other than -EAGAIN occurs, this indicates a migration failure.
 * The caller should give up, and propagate the error back up the call stack.
 *
 * If everything is OK and all pages in the range are allowed to be pinned, then
 * this routine leaves all pages pinned and returns zero for success.
 */
static long check_and_migrate_movable_pages(unsigned long nr_pages,
                                            struct page **pages)
{
        unsigned long collected;
        LIST_HEAD(movable_page_list);

        collected = collect_longterm_unpinnable_pages(&movable_page_list,
                                                nr_pages, pages);
        if (!collected)
                return 0;

        return migrate_longterm_unpinnable_pages(&movable_page_list, nr_pages,
                                                pages);
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
                                            struct page **pages)
{
        return 0;
}
#endif /* CONFIG_MIGRATION */

/*
 * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
 * allows us to process the FOLL_LONGTERM flag.
 */
static long __gup_longterm_locked(struct mm_struct *mm,
                                  unsigned long start,
                                  unsigned long nr_pages,
                                  struct page **pages,
                                  int *locked,
                                  unsigned int gup_flags)
{
        unsigned int flags;
        long rc, nr_pinned_pages;

        if (!(gup_flags & FOLL_LONGTERM))
                return __get_user_pages_locked(mm, start, nr_pages, pages,
                                               locked, gup_flags);

        flags = memalloc_pin_save();
        do {
                nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
                                                          pages, locked,
                                                          gup_flags);
                if (nr_pinned_pages <= 0) {
                        rc = nr_pinned_pages;
                        break;
                }

                /* FOLL_LONGTERM implies FOLL_PIN */
                rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
        } while (rc == -EAGAIN);
        memalloc_pin_restore(flags);
        return rc ? rc : nr_pinned_pages;
}

/*
 * Check that the given flags are valid for the exported gup/pup interface, and
 * update them with the required flags that the caller must have set.
 */
static bool is_valid_gup_args(struct page **pages, int *locked,
                              unsigned int *gup_flags_p, unsigned int to_set)
{
        unsigned int gup_flags = *gup_flags_p;

        /*
         * These flags not allowed to be specified externally to the gup
         * interfaces:
         * - FOLL_TOUCH/FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY are internal only
         * - FOLL_REMOTE is internal only and used on follow_page()
         * - FOLL_UNLOCKABLE is internal only and used if locked is !NULL
         */
        if (WARN_ON_ONCE(gup_flags & INTERNAL_GUP_FLAGS))
                return false;

        gup_flags |= to_set;
        if (locked) {
                /* At the external interface locked must be set */
                if (WARN_ON_ONCE(*locked != 1))
                        return false;

                gup_flags |= FOLL_UNLOCKABLE;
        }

        /* FOLL_GET and FOLL_PIN are mutually exclusive. */
        if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
                         (FOLL_PIN | FOLL_GET)))
                return false;

        /* LONGTERM can only be specified when pinning */
        if (WARN_ON_ONCE(!(gup_flags & FOLL_PIN) && (gup_flags & FOLL_LONGTERM)))
                return false;

        /* Pages input must be given if using GET/PIN */
        if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
                return false;

        /* We want to allow the pgmap to be hot-unplugged at all times */
        if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
                         (gup_flags & FOLL_PCI_P2PDMA)))
                return false;

        *gup_flags_p = gup_flags;
        return true;
}

#ifdef CONFIG_MMU
/**
 * get_user_pages_remote() - pin user pages in memory
 * @mm:                mm_struct of target mm
 * @start:        starting user address
 * @nr_pages:        number of pages from start to pin
 * @gup_flags:        flags modifying lookup behaviour
 * @pages:        array that receives pointers to the pages pinned.
 *                Should be at least nr_pages long. Or NULL, if caller
 *                only intends to ensure the pages are faulted in.
 * @locked:        pointer to lock flag indicating whether lock is held and
 *                subsequently whether VM_FAULT_RETRY functionality can be
 *                utilised. Lock must initially be held.
 *
 * Returns either number of pages pinned (which may be less than the
 * number requested), or an error. Details about the return value:
 *
 * -- If nr_pages is 0, returns 0.
 * -- If nr_pages is >0, but no pages were pinned, returns -errno.
 * -- If nr_pages is >0, and some pages were pinned, returns the number of
 *    pages pinned. Again, this may be less than nr_pages.
 *
 * The caller is responsible for releasing returned @pages, via put_page().
 *
 * Must be called with mmap_lock held for read or write.
 *
 * get_user_pages_remote walks a process's page tables and takes a reference
 * to each struct page that each user address corresponds to at a given
 * instant. That is, it takes the page that would be accessed if a user
 * thread accesses the given user virtual address at that instant.
 *
 * This does not guarantee that the page exists in the user mappings when
 * get_user_pages_remote returns, and there may even be a completely different
 * page there in some cases (eg. if mmapped pagecache has been invalidated
 * and subsequently re-faulted). However it does guarantee that the page
 * won't be freed completely. And mostly callers simply care that the page
 * contains data that was valid *at some point in time*. Typically, an IO
 * or similar operation cannot guarantee anything stronger anyway because
 * locks can't be held over the syscall boundary.
 *
 * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
 * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
 * be called after the page is finished with, and before put_page is called.
 *
 * get_user_pages_remote is typically used for fewer-copy IO operations,
 * to get a handle on the memory by some means other than accesses
 * via the user virtual addresses. The pages may be submitted for
 * DMA to devices or accessed via their kernel linear mapping (via the
 * kmap APIs). Care should be taken to use the correct cache flushing APIs.
 *
 * See also get_user_pages_fast, for performance critical applications.
 *
 * get_user_pages_remote should be phased out in favor of
 * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
 * should use get_user_pages_remote because it cannot pass
 * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
 */
long get_user_pages_remote(struct mm_struct *mm,
                unsigned long start, unsigned long nr_pages,
                unsigned int gup_flags, struct page **pages,
                int *locked)
{
        int local_locked = 1;

        if (!is_valid_gup_args(pages, locked, &gup_flags,
                               FOLL_TOUCH | FOLL_REMOTE))
                return -EINVAL;

        return __get_user_pages_locked(mm, start, nr_pages, pages,
                                       locked ? locked : &local_locked,
                                       gup_flags);
}
EXPORT_SYMBOL(get_user_pages_remote);

#else /* CONFIG_MMU */
long get_user_pages_remote(struct mm_struct *mm,
                           unsigned long start, unsigned long nr_pages,
                           unsigned int gup_flags, struct page **pages,
                           int *locked)
{
        return 0;
}
#endif /* !CONFIG_MMU */

/**
 * get_user_pages() - pin user pages in memory
 * @start:      starting user address
 * @nr_pages:   number of pages from start to pin
 * @gup_flags:  flags modifying lookup behaviour
 * @pages:      array that receives pointers to the pages pinned.
 *              Should be at least nr_pages long. Or NULL, if caller
 *              only intends to ensure the pages are faulted in.
 *
 * This is the same as get_user_pages_remote(), just with a less-flexible
 * calling convention where we assume that the mm being operated on belongs to
 * the current task, and doesn't allow passing of a locked parameter.  We also
 * obviously don't pass FOLL_REMOTE in here.
 */
long get_user_pages(unsigned long start, unsigned long nr_pages,
                    unsigned int gup_flags, struct page **pages)
{
        int locked = 1;

        if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_TOUCH))
                return -EINVAL;

        return __get_user_pages_locked(current->mm, start, nr_pages, pages,
                                       &locked, gup_flags);
}
EXPORT_SYMBOL(get_user_pages);

/*
 * get_user_pages_unlocked() is suitable to replace the form:
 *
 *      mmap_read_lock(mm);
 *      get_user_pages(mm, ..., pages, NULL);
 *      mmap_read_unlock(mm);
 *
 *  with:
 *
 *      get_user_pages_unlocked(mm, ..., pages);
 *
 * It is functionally equivalent to get_user_pages_fast so
 * get_user_pages_fast should be used instead if specific gup_flags
 * (e.g. FOLL_FORCE) are not required.
 */
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                             struct page **pages, unsigned int gup_flags)
{
        int locked = 0;

        if (!is_valid_gup_args(pages, NULL, &gup_flags,
                               FOLL_TOUCH | FOLL_UNLOCKABLE))
                return -EINVAL;

        return __get_user_pages_locked(current->mm, start, nr_pages, pages,
                                       &locked, gup_flags);
}
EXPORT_SYMBOL(get_user_pages_unlocked);

/*
 * Fast GUP
 *
 * get_user_pages_fast attempts to pin user pages by walking the page
 * tables directly and avoids taking locks. Thus the walker needs to be
 * protected from page table pages being freed from under it, and should
 * block any THP splits.
 *
 * One way to achieve this is to have the walker disable interrupts, and
 * rely on IPIs from the TLB flushing code blocking before the page table
 * pages are freed. This is unsuitable for architectures that do not need
 * to broadcast an IPI when invalidating TLBs.
 *
 * Another way to achieve this is to batch up page table containing pages
 * belonging to more than one mm_user, then rcu_sched a callback to free those
 * pages. Disabling interrupts will allow the fast_gup walker to both block
 * the rcu_sched callback, and an IPI that we broadcast for splitting THPs
 * (which is a relatively rare event). The code below adopts this strategy.
 *
 * Before activating this code, please be aware that the following assumptions
 * are currently made:
 *
 *  *) Either MMU_GATHER_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
 *  free pages containing page tables or TLB flushing requires IPI broadcast.
 *
 *  *) ptes can be read atomically by the architecture.
 *
 *  *) access_ok is sufficient to validate userspace address ranges.
 *
 * The last two assumptions can be relaxed by the addition of helper functions.
 *
 * This code is based heavily on the PowerPC implementation by Nick Piggin.
 */
#ifdef CONFIG_HAVE_FAST_GUP

/*
 * Used in the GUP-fast path to determine whether a pin is permitted for a
 * specific folio.
 *
 * This call assumes the caller has pinned the folio, that the lowest page table
 * level still points to this folio, and that interrupts have been disabled.
 *
 * Writing to pinned file-backed dirty tracked folios is inherently problematic
 * (see comment describing the writable_file_mapping_allowed() function). We
 * therefore try to avoid the most egregious case of a long-term mapping doing
 * so.
 *
 * This function cannot be as thorough as that one as the VMA is not available
 * in the fast path, so instead we whitelist known good cases and if in doubt,
 * fall back to the slow path.
 */
static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
{
        struct address_space *mapping;
        unsigned long mapping_flags;

        /*
         * If we aren't pinning then no problematic write can occur. A long term
         * pin is the most egregious case so this is the one we disallow.
         */
        if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
            (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
                return true;

        /* The folio is pinned, so we can safely access folio fields. */

        if (WARN_ON_ONCE(folio_test_slab(folio)))
                return false;

        /* hugetlb mappings do not require dirty-tracking. */
        if (folio_test_hugetlb(folio))
                return true;

        /*
         * GUP-fast disables IRQs. When IRQS are disabled, RCU grace periods
         * cannot proceed, which means no actions performed under RCU can
         * proceed either.
         *
         * inodes and thus their mappings are freed under RCU, which means the
         * mapping cannot be freed beneath us and thus we can safely dereference
         * it.
         */
        lockdep_assert_irqs_disabled();

        /*
         * However, there may be operations which _alter_ the mapping, so ensure
         * we read it once and only once.
         */
        mapping = READ_ONCE(folio->mapping);

        /*
         * The mapping may have been truncated, in any case we cannot determine
         * if this mapping is safe - fall back to slow path to determine how to
         * proceed.
         */
        if (!mapping)
                return false;

        /* Anonymous folios pose no problem. */
        mapping_flags = (unsigned long)mapping & PAGE_MAPPING_FLAGS;
        if (mapping_flags)
                return mapping_flags & PAGE_MAPPING_ANON;

        /*
         * At this point, we know the mapping is non-null and points to an
         * address_space object. The only remaining whitelisted file system is
         * shmem.
         */
        return shmem_mapping(mapping);
}

static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
                                            unsigned int flags,
                                            struct page **pages)
{
        while ((*nr) - nr_start) {
                struct page *page = pages[--(*nr)];

                ClearPageReferenced(page);
                if (flags & FOLL_PIN)
                        unpin_user_page(page);
                else
                        put_page(page);
        }
}

#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
/*
 * Fast-gup relies on pte change detection to avoid concurrent pgtable
 * operations.
 *
 * To pin the page, fast-gup needs to do below in order:
 * (1) pin the page (by prefetching pte), then (2) check pte not changed.
 *
 * For the rest of pgtable operations where pgtable updates can be racy
 * with fast-gup, we need to do (1) clear pte, then (2) check whether page
 * is pinned.
 *
 * Above will work for all pte-level operations, including THP split.
 *
 * For THP collapse, it's a bit more complicated because fast-gup may be
 * walking a pgtable page that is being freed (pte is still valid but pmd
 * can be cleared already).  To avoid race in such condition, we need to
 * also check pmd here to make sure pmd doesn't change (corresponds to
 * pmdp_collapse_flush() in the THP collapse code path).
 */
static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                         unsigned long end, unsigned int flags,
                         struct page **pages, int *nr)
{
        struct dev_pagemap *pgmap = NULL;
        int nr_start = *nr, ret = 0;
        pte_t *ptep, *ptem;

        ptem = ptep = pte_offset_map(&pmd, addr);
        if (!ptep)
                return 0;
        do {
                pte_t pte = ptep_get_lockless(ptep);
                struct page *page;
                struct folio *folio;

                /*
                 * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
                 * pte_access_permitted() better should reject these pages
                 * either way: otherwise, GUP-fast might succeed in
                 * cases where ordinary GUP would fail due to VMA access
                 * permissions.
                 */
                if (pte_protnone(pte))
                        goto pte_unmap;

                if (!pte_access_permitted(pte, flags & FOLL_WRITE))
                        goto pte_unmap;

                if (pte_devmap(pte)) {
                        if (unlikely(flags & FOLL_LONGTERM))
                                goto pte_unmap;

                        pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
                        if (unlikely(!pgmap)) {
                                undo_dev_pagemap(nr, nr_start, flags, pages);
                                goto pte_unmap;
                        }
                } else if (pte_special(pte))
                        goto pte_unmap;

                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
                page = pte_page(pte);

                folio = try_grab_folio(page, 1, flags);
                if (!folio)
                        goto pte_unmap;

                if (unlikely(folio_is_secretmem(folio))) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }

                if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
                    unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }

                if (!folio_fast_pin_allowed(folio, flags)) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }

                if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) {
                        gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }

                /*
                 * We need to make the page accessible if and only if we are
                 * going to access its content (the FOLL_PIN case).  Please
                 * see Documentation/core-api/pin_user_pages.rst for
                 * details.
                 */
                if (flags & FOLL_PIN) {
                        ret = arch_make_page_accessible(page);
                        if (ret) {
                                gup_put_folio(folio, 1, flags);
                                goto pte_unmap;
                        }
                }
                folio_set_referenced(folio);
                pages[*nr] = page;
                (*nr)++;
        } while (ptep++, addr += PAGE_SIZE, addr != end);

        ret = 1;

pte_unmap:
        if (pgmap)
                put_dev_pagemap(pgmap);
        pte_unmap(ptem);
        return ret;
}
#else

/*
 * If we can't determine whether or not a pte is special, then fail immediately
 * for ptes. Note, we can still pin HugeTLB and THP as these are guaranteed not
 * to be special.
 *
 * For a futex to be placed on a THP tail page, get_futex_key requires a
 * get_user_pages_fast_only implementation that can pin pages. Thus it's still
 * useful to have gup_huge_pmd even if we can't operate on ptes.
 */
static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                         unsigned long end, unsigned int flags,
                         struct page **pages, int *nr)
{
        return 0;
}
#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */

#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
static int __gup_device_huge(unsigned long pfn, unsigned long addr,
                             unsigned long end, unsigned int flags,
                             struct page **pages, int *nr)
{
        int nr_start = *nr;
        struct dev_pagemap *pgmap = NULL;

        do {
                struct page *page = pfn_to_page(pfn);

                pgmap = get_dev_pagemap(pfn, pgmap);
                if (unlikely(!pgmap)) {
                        undo_dev_pagemap(nr, nr_start, flags, pages);
                        break;
                }

                if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
                        undo_dev_pagemap(nr, nr_start, flags, pages);
                        break;
                }

                SetPageReferenced(page);
                pages[*nr] = page;
                if (unlikely(try_grab_page(page, flags))) {
                        undo_dev_pagemap(nr, nr_start, flags, pages);
                        break;
                }
                (*nr)++;
                pfn++;
        } while (addr += PAGE_SIZE, addr != end);

        put_dev_pagemap(pgmap);
        return addr == end;
}

static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                                 unsigned long end, unsigned int flags,
                                 struct page **pages, int *nr)
{
        unsigned long fault_pfn;
        int nr_start = *nr;

        fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
        if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
                return 0;

        if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
                undo_dev_pagemap(nr, nr_start, flags, pages);
                return 0;
        }
        return 1;
}

static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                                 unsigned long end, unsigned int flags,
                                 struct page **pages, int *nr)
{
        unsigned long fault_pfn;
        int nr_start = *nr;

        fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
        if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
                return 0;

        if (unlikely(pud_val(orig) != pud_val(*pudp))) {
                undo_dev_pagemap(nr, nr_start, flags, pages);
                return 0;
        }
        return 1;
}
#else
static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                                 unsigned long end, unsigned int flags,
                                 struct page **pages, int *nr)
{
        BUILD_BUG();
        return 0;
}

static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
                                 unsigned long end, unsigned int flags,
                                 struct page **pages, int *nr)
{
        BUILD_BUG();
        return 0;
}
#endif

static int record_subpages(struct page *page, unsigned long addr,
                           unsigned long end, struct page **pages)
{
        int nr;

        for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
                pages[nr] = nth_page(page, nr);

        return nr;
}

#ifdef CONFIG_ARCH_HAS_HUGEPD
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
                                      unsigned long sz)
{
        unsigned long __boundary = (addr + sz) & ~(sz-1);
        return (__boundary - 1 < end - 1) ? __boundary : end;
}

static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
                       unsigned long end, unsigned int flags,
                       struct page **pages, int *nr)
{
        unsigned long pte_end;
        struct page *page;
        struct folio *folio;
        pte_t pte;
        int refs;

        pte_end = (addr + sz) & ~(sz-1);
        if (pte_end < end)
                end = pte_end;

        pte = huge_ptep_get(ptep);

        if (!pte_access_permitted(pte, flags & FOLL_WRITE))
                return 0;

        /* hugepages are never "special" */
        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));

        page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);

        folio = try_grab_folio(page, refs, flags);
        if (!folio)
                return 0;

        if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!folio_fast_pin_allowed(folio, flags)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        *nr += refs;
        folio_set_referenced(folio);
        return 1;
}

static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
                unsigned int pdshift, unsigned long end, unsigned int flags,
                struct page **pages, int *nr)
{
        pte_t *ptep;
        unsigned long sz = 1UL << hugepd_shift(hugepd);
        unsigned long next;

        ptep = hugepte_offset(hugepd, addr, pdshift);
        do {
                next = hugepte_addr_end(addr, end, sz);
                if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
                        return 0;
        } while (ptep++, addr = next, addr != end);

        return 1;
}
#else
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
                unsigned int pdshift, unsigned long end, unsigned int flags,
                struct page **pages, int *nr)
{
        return 0;
}
#endif /* CONFIG_ARCH_HAS_HUGEPD */

static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                        unsigned long end, unsigned int flags,
                        struct page **pages, int *nr)
{
        struct page *page;
        struct folio *folio;
        int refs;

        if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
                return 0;

        if (pmd_devmap(orig)) {
                if (unlikely(flags & FOLL_LONGTERM))
                        return 0;
                return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
                                             pages, nr);
        }

        page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);

        folio = try_grab_folio(page, refs, flags);
        if (!folio)
                return 0;

        if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!folio_fast_pin_allowed(folio, flags)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }
        if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        *nr += refs;
        folio_set_referenced(folio);
        return 1;
}

static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                        unsigned long end, unsigned int flags,
                        struct page **pages, int *nr)
{
        struct page *page;
        struct folio *folio;
        int refs;

        if (!pud_access_permitted(orig, flags & FOLL_WRITE))
                return 0;

        if (pud_devmap(orig)) {
                if (unlikely(flags & FOLL_LONGTERM))
                        return 0;
                return __gup_device_huge_pud(orig, pudp, addr, end, flags,
                                             pages, nr);
        }

        page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);

        folio = try_grab_folio(page, refs, flags);
        if (!folio)
                return 0;

        if (unlikely(pud_val(orig) != pud_val(*pudp))) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!folio_fast_pin_allowed(folio, flags)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        *nr += refs;
        folio_set_referenced(folio);
        return 1;
}

static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
                        unsigned long end, unsigned int flags,
                        struct page **pages, int *nr)
{
        int refs;
        struct page *page;
        struct folio *folio;

        if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
                return 0;

        BUILD_BUG_ON(pgd_devmap(orig));

        page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);

        folio = try_grab_folio(page, refs, flags);
        if (!folio)
                return 0;

        if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!pgd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        if (!folio_fast_pin_allowed(folio, flags)) {
                gup_put_folio(folio, refs, flags);
                return 0;
        }

        *nr += refs;
        folio_set_referenced(folio);
        return 1;
}

static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
                unsigned int flags, struct page **pages, int *nr)
{
        unsigned long next;
        pmd_t *pmdp;

        pmdp = pmd_offset_lockless(pudp, pud, addr);
        do {
                pmd_t pmd = pmdp_get_lockless(pmdp);

                next = pmd_addr_end(addr, end);
                if (!pmd_present(pmd))
                        return 0;

                if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
                             pmd_devmap(pmd))) {
                        /* See gup_pte_range() */
                        if (pmd_protnone(pmd))
                                return 0;

                        if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
                                pages, nr))
                                return 0;

                } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
                        /*
                         * architecture have different format for hugetlbfs
                         * pmd format and THP pmd format
                         */
                        if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
                                         PMD_SHIFT, next, flags, pages, nr))
                                return 0;
                } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
                        return 0;
        } while (pmdp++, addr = next, addr != end);

        return 1;
}

static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
                         unsigned int flags, struct page **pages, int *nr)
{
        unsigned long next;
        pud_t *pudp;

        pudp = pud_offset_lockless(p4dp, p4d, addr);
        do {
                pud_t pud = READ_ONCE(*pudp);

                next = pud_addr_end(addr, end);
                if (unlikely(!pud_present(pud)))
                        return 0;
                if (unlikely(pud_huge(pud) || pud_devmap(pud))) {
                        if (!gup_huge_pud(pud, pudp, addr, next, flags,
                                          pages, nr))
                                return 0;
                } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
                        if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
                                         PUD_SHIFT, next, flags, pages, nr))
                                return 0;
                } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
                        return 0;
        } while (pudp++, addr = next, addr != end);

        return 1;
}

static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
                         unsigned int flags, struct page **pages, int *nr)
{
        unsigned long next;
        p4d_t *p4dp;

        p4dp = p4d_offset_lockless(pgdp, pgd, addr);
        do {
                p4d_t p4d = READ_ONCE(*p4dp);

                next = p4d_addr_end(addr, end);
                if (p4d_none(p4d))
                        return 0;
                BUILD_BUG_ON(p4d_huge(p4d));
                if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
                        if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
                                         P4D_SHIFT, next, flags, pages, nr))
                                return 0;
                } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
                        return 0;
        } while (p4dp++, addr = next, addr != end);

        return 1;
}

static void gup_pgd_range(unsigned long addr, unsigned long end,
                unsigned int flags, struct page **pages, int *nr)
{
        unsigned long next;
        pgd_t *pgdp;

        pgdp = pgd_offset(current->mm, addr);
        do {
                pgd_t pgd = READ_ONCE(*pgdp);

                next = pgd_addr_end(addr, end);
                if (pgd_none(pgd))
                        return;
                if (unlikely(pgd_huge(pgd))) {
                        if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
                                          pages, nr))
                                return;
                } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
                        if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
                                         PGDIR_SHIFT, next, flags, pages, nr))
                                return;
                } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
                        return;
        } while (pgdp++, addr = next, addr != end);
}
#else
static inline void gup_pgd_range(unsigned long addr, unsigned long end,
                unsigned int flags, struct page **pages, int *nr)
{
}
#endif /* CONFIG_HAVE_FAST_GUP */

#ifndef gup_fast_permitted
/*
 * Check if it's allowed to use get_user_pages_fast_only() for the range, or
 * we need to fall back to the slow version:
 */
static bool gup_fast_permitted(unsigned long start, unsigned long end)
{
        return true;
}
#endif

static unsigned long lockless_pages_from_mm(unsigned long start,
                                            unsigned long end,
                                            unsigned int gup_flags,
                                            struct page **pages)
{
        unsigned long flags;
        int nr_pinned = 0;
        unsigned seq;

        if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
            !gup_fast_permitted(start, end))
                return 0;

        if (gup_flags & FOLL_PIN) {
                seq = raw_read_seqcount(&current->mm->write_protect_seq);
                if (seq & 1)
                        return 0;
        }

        /*
         * Disable interrupts. The nested form is used, in order to allow full,
         * general purpose use of this routine.
         *
         * With interrupts disabled, we block page table pages from being freed
         * from under us. See struct mmu_table_batch comments in
         * include/asm-generic/tlb.h for more details.
         *
         * We do not adopt an rcu_read_lock() here as we also want to block IPIs
         * that come from THPs splitting.
         */
        local_irq_save(flags);
        gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
        local_irq_restore(flags);

        /*
         * When pinning pages for DMA there could be a concurrent write protect
         * from fork() via copy_page_range(), in this case always fail fast GUP.
         */
        if (gup_flags & FOLL_PIN) {
                if (read_seqcount_retry(&current->mm->write_protect_seq, seq)) {
                        unpin_user_pages_lockless(pages, nr_pinned);
                        return 0;
                } else {
                        sanity_check_pinned_pages(pages, nr_pinned);
                }
        }
        return nr_pinned;
}

static int internal_get_user_pages_fast(unsigned long start,
                                        unsigned long nr_pages,
                                        unsigned int gup_flags,
                                        struct page **pages)
{
        unsigned long len, end;
        unsigned long nr_pinned;
        int locked = 0;
        int ret;

        if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
                                       FOLL_FORCE | FOLL_PIN | FOLL_GET |
                                       FOLL_FAST_ONLY | FOLL_NOFAULT |
                                       FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
                return -EINVAL;

        if (gup_flags & FOLL_PIN)
                mm_set_has_pinned_flag(&current->mm->flags);

        if (!(gup_flags & FOLL_FAST_ONLY))
                might_lock_read(&current->mm->mmap_lock);

        start = untagged_addr(start) & PAGE_MASK;
        len = nr_pages << PAGE_SHIFT;
        if (check_add_overflow(start, len, &end))
                return -EOVERFLOW;
        if (end > TASK_SIZE_MAX)
                return -EFAULT;
        if (unlikely(!access_ok((void __user *)start, len)))
                return -EFAULT;

        nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
        if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
                return nr_pinned;

        /* Slow path: try to get the remaining pages with get_user_pages */
        start += nr_pinned << PAGE_SHIFT;
        pages += nr_pinned;
        ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
                                    pages, &locked,
                                    gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE);
        if (ret < 0) {
                /*
                 * The caller has to unpin the pages we already pinned so
                 * returning -errno is not an option
                 */
                if (nr_pinned)
                        return nr_pinned;
                return ret;
        }
        return ret + nr_pinned;
}

/**
 * get_user_pages_fast_only() - pin user pages in memory
 * @start:      starting user address
 * @nr_pages:   number of pages from start to pin
 * @gup_flags:  flags modifying pin behaviour
 * @pages:      array that receives pointers to the pages pinned.
 *              Should be at least nr_pages long.
 *
 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
 * the regular GUP.
 *
 * If the architecture does not support this function, simply return with no
 * pages pinned.
 *
 * Careful, careful! COW breaking can go either way, so a non-write
 * access can get ambiguous page results. If you call this function without
 * 'write' set, you'd better be sure that you're ok with that ambiguity.
 */
int get_user_pages_fast_only(unsigned long start, int nr_pages,
                             unsigned int gup_flags, struct page **pages)
{
        /*
         * Internally (within mm/gup.c), gup fast variants must set FOLL_GET,
         * because gup fast is always a "pin with a +1 page refcount" request.
         *
         * FOLL_FAST_ONLY is required in order to match the API description of
         * this routine: no fall back to regular ("slow") GUP.
         */
        if (!is_valid_gup_args(pages, NULL, &gup_flags,
                               FOLL_GET | FOLL_FAST_ONLY))
                return -EINVAL;

        return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(get_user_pages_fast_only);

/**
 * get_user_pages_fast() - pin user pages in memory
 * @start:      starting user address
 * @nr_pages:   number of pages from start to pin
 * @gup_flags:  flags modifying pin behaviour
 * @pages:      array that receives pointers to the pages pinned.
 *              Should be at least nr_pages long.
 *
 * Attempt to pin user pages in memory without taking mm->mmap_lock.
 * If not successful, it will fall back to taking the lock and
 * calling get_user_pages().
 *
 * Returns number of pages pinned. This may be fewer than the number requested.
 * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns
 * -errno.
 */
int get_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages)
{
        /*
         * The caller may or may not have explicitly set FOLL_GET; either way is
         * OK. However, internally (within mm/gup.c), gup fast variants must set
         * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
         * request.
         */
        if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET))
                return -EINVAL;
        return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(get_user_pages_fast);

/**
 * pin_user_pages_fast() - pin user pages in memory without taking locks
 *
 * @start:      starting user address
 * @nr_pages:   number of pages from start to pin
 * @gup_flags:  flags modifying pin behaviour
 * @pages:      array that receives pointers to the pages pinned.
 *              Should be at least nr_pages long.
 *
 * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See
 * get_user_pages_fast() for documentation on the function arguments, because
 * the arguments here are identical.
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for further details.
 *
 * Note that if a zero_page is amongst the returned pages, it will not have
 * pins in it and unpin_user_page() will not remove pins from it.
 */
int pin_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages)
{
        if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN))
                return -EINVAL;
        return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(pin_user_pages_fast);

/**
 * pin_user_pages_remote() - pin pages of a remote process
 *
 * @mm:                mm_struct of target mm
 * @start:        starting user address
 * @nr_pages:        number of pages from start to pin
 * @gup_flags:        flags modifying lookup behaviour
 * @pages:        array that receives pointers to the pages pinned.
 *                Should be at least nr_pages long.
 * @locked:        pointer to lock flag indicating whether lock is held and
 *                subsequently whether VM_FAULT_RETRY functionality can be
 *                utilised. Lock must initially be held.
 *
 * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See
 * get_user_pages_remote() for documentation on the function arguments, because
 * the arguments here are identical.
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for details.
 *
 * Note that if a zero_page is amongst the returned pages, it will not have
 * pins in it and unpin_user_page*() will not remove pins from it.
 */
long pin_user_pages_remote(struct mm_struct *mm,
                           unsigned long start, unsigned long nr_pages,
                           unsigned int gup_flags, struct page **pages,
                           int *locked)
{
        int local_locked = 1;

        if (!is_valid_gup_args(pages, locked, &gup_flags,
                               FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
                return 0;
        return __gup_longterm_locked(mm, start, nr_pages, pages,
                                     locked ? locked : &local_locked,
                                     gup_flags);
}
EXPORT_SYMBOL(pin_user_pages_remote);

/**
 * pin_user_pages() - pin user pages in memory for use by other devices
 *
 * @start:        starting user address
 * @nr_pages:        number of pages from start to pin
 * @gup_flags:        flags modifying lookup behaviour
 * @pages:        array that receives pointers to the pages pinned.
 *                Should be at least nr_pages long.
 *
 * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and
 * FOLL_PIN is set.
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for details.
 *
 * Note that if a zero_page is amongst the returned pages, it will not have
 * pins in it and unpin_user_page*() will not remove pins from it.
 */
long pin_user_pages(unsigned long start, unsigned long nr_pages,
                    unsigned int gup_flags, struct page **pages)
{
        int locked = 1;

        if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN))
                return 0;
        return __gup_longterm_locked(current->mm, start, nr_pages,
                                     pages, &locked, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages);

/*
 * pin_user_pages_unlocked() is the FOLL_PIN variant of
 * get_user_pages_unlocked(). Behavior is the same, except that this one sets
 * FOLL_PIN and rejects FOLL_GET.
 *
 * Note that if a zero_page is amongst the returned pages, it will not have
 * pins in it and unpin_user_page*() will not remove pins from it.
 */
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                             struct page **pages, unsigned int gup_flags)
{
        int locked = 0;

        if (!is_valid_gup_args(pages, NULL, &gup_flags,
                               FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE))
                return 0;

        return __gup_longterm_locked(current->mm, start, nr_pages, pages,
                                     &locked, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages_unlocked);


























































































    2 

















































































































































































































































































































































































































































































    2 




    2 





    2 



    2 





























































































































































































































































































































































































































































































































































































































































    1 

























































































    2 












    2 






















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  The NFC Controller Interface is the communication protocol between an
 *  NFC Controller (NFCC) and a Device Host (DH).
 *
 *  Copyright (C) 2011 Texas Instruments, Inc.
 *  Copyright (C) 2014 Marvell International Ltd.
 *
 *  Written by Ilan Elias <ilane@ti.com>
 *
 *  Acknowledgements:
 *  This file is based on hci_core.c, which was written
 *  by Maxim Krasnyansky.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/completion.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/bitops.h>
#include <linux/skbuff.h>
#include <linux/kcov.h>

#include "../nfc.h"
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include <linux/nfc.h>

struct core_conn_create_data {
        int length;
        struct nci_core_conn_create_cmd *cmd;
};

static void nci_cmd_work(struct work_struct *work);
static void nci_rx_work(struct work_struct *work);
static void nci_tx_work(struct work_struct *work);

struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
                                                   int conn_id)
{
        struct nci_conn_info *conn_info;

        list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
                if (conn_info->conn_id == conn_id)
                        return conn_info;
        }

        return NULL;
}

int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
                                          const struct dest_spec_params *params)
{
        const struct nci_conn_info *conn_info;

        list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
                if (conn_info->dest_type == dest_type) {
                        if (!params)
                                return conn_info->conn_id;

                        if (params->id == conn_info->dest_params->id &&
                            params->protocol == conn_info->dest_params->protocol)
                                return conn_info->conn_id;
                }
        }

        return -EINVAL;
}
EXPORT_SYMBOL(nci_get_conn_info_by_dest_type_params);

/* ---- NCI requests ---- */

void nci_req_complete(struct nci_dev *ndev, int result)
{
        if (ndev->req_status == NCI_REQ_PEND) {
                ndev->req_result = result;
                ndev->req_status = NCI_REQ_DONE;
                complete(&ndev->req_completion);
        }
}
EXPORT_SYMBOL(nci_req_complete);

static void nci_req_cancel(struct nci_dev *ndev, int err)
{
        if (ndev->req_status == NCI_REQ_PEND) {
                ndev->req_result = err;
                ndev->req_status = NCI_REQ_CANCELED;
                complete(&ndev->req_completion);
        }
}

/* Execute request and wait for completion. */
static int __nci_request(struct nci_dev *ndev,
                         void (*req)(struct nci_dev *ndev, const void *opt),
                         const void *opt, __u32 timeout)
{
        int rc = 0;
        long completion_rc;

        ndev->req_status = NCI_REQ_PEND;

        reinit_completion(&ndev->req_completion);
        req(ndev, opt);
        completion_rc =
                wait_for_completion_interruptible_timeout(&ndev->req_completion,
                                                          timeout);

        pr_debug("wait_for_completion return %ld\n", completion_rc);

        if (completion_rc > 0) {
                switch (ndev->req_status) {
                case NCI_REQ_DONE:
                        rc = nci_to_errno(ndev->req_result);
                        break;

                case NCI_REQ_CANCELED:
                        rc = -ndev->req_result;
                        break;

                default:
                        rc = -ETIMEDOUT;
                        break;
                }
        } else {
                pr_err("wait_for_completion_interruptible_timeout failed %ld\n",
                       completion_rc);

                rc = ((completion_rc == 0) ? (-ETIMEDOUT) : (completion_rc));
        }

        ndev->req_status = ndev->req_result = 0;

        return rc;
}

inline int nci_request(struct nci_dev *ndev,
                       void (*req)(struct nci_dev *ndev,
                                   const void *opt),
                       const void *opt, __u32 timeout)
{
        int rc;

        /* Serialize all requests */
        mutex_lock(&ndev->req_lock);
        /* check the state after obtaing the lock against any races
         * from nci_close_device when the device gets removed.
         */
        if (test_bit(NCI_UP, &ndev->flags))
                rc = __nci_request(ndev, req, opt, timeout);
        else
                rc = -ENETDOWN;
        mutex_unlock(&ndev->req_lock);

        return rc;
}

static void nci_reset_req(struct nci_dev *ndev, const void *opt)
{
        struct nci_core_reset_cmd cmd;

        cmd.reset_type = NCI_RESET_TYPE_RESET_CONFIG;
        nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
}

static void nci_init_req(struct nci_dev *ndev, const void *opt)
{
        u8 plen = 0;

        if (opt)
                plen = sizeof(struct nci_core_init_v2_cmd);

        nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
}

static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
{
        struct nci_rf_disc_map_cmd cmd;
        struct disc_map_config *cfg = cmd.mapping_configs;
        __u8 *num = &cmd.num_mapping_configs;
        int i;

        /* set rf mapping configurations */
        *num = 0;

        /* by default mapping is set to NCI_RF_INTERFACE_FRAME */
        for (i = 0; i < ndev->num_supported_rf_interfaces; i++) {
                if (ndev->supported_rf_interfaces[i] ==
                    NCI_RF_INTERFACE_ISO_DEP) {
                        cfg[*num].rf_protocol = NCI_RF_PROTOCOL_ISO_DEP;
                        cfg[*num].mode = NCI_DISC_MAP_MODE_POLL |
                                NCI_DISC_MAP_MODE_LISTEN;
                        cfg[*num].rf_interface = NCI_RF_INTERFACE_ISO_DEP;
                        (*num)++;
                } else if (ndev->supported_rf_interfaces[i] ==
                           NCI_RF_INTERFACE_NFC_DEP) {
                        cfg[*num].rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
                        cfg[*num].mode = NCI_DISC_MAP_MODE_POLL |
                                NCI_DISC_MAP_MODE_LISTEN;
                        cfg[*num].rf_interface = NCI_RF_INTERFACE_NFC_DEP;
                        (*num)++;
                }

                if (*num == NCI_MAX_NUM_MAPPING_CONFIGS)
                        break;
        }

        nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_MAP_CMD,
                     (1 + ((*num) * sizeof(struct disc_map_config))), &cmd);
}

struct nci_set_config_param {
        __u8                id;
        size_t                len;
        const __u8        *val;
};

static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_set_config_param *param = opt;
        struct nci_core_set_config_cmd cmd;

        BUG_ON(param->len > NCI_MAX_PARAM_LEN);

        cmd.num_params = 1;
        cmd.param.id = param->id;
        cmd.param.len = param->len;
        memcpy(cmd.param.val, param->val, param->len);

        nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd);
}

struct nci_rf_discover_param {
        __u32        im_protocols;
        __u32        tm_protocols;
};

static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_rf_discover_param *param = opt;
        struct nci_rf_disc_cmd cmd;

        cmd.num_disc_configs = 0;

        if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) &&
            (param->im_protocols & NFC_PROTO_JEWEL_MASK ||
             param->im_protocols & NFC_PROTO_MIFARE_MASK ||
             param->im_protocols & NFC_PROTO_ISO14443_MASK ||
             param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) {
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_A_PASSIVE_POLL_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
        }

        if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) &&
            (param->im_protocols & NFC_PROTO_ISO14443_B_MASK)) {
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_B_PASSIVE_POLL_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
        }

        if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) &&
            (param->im_protocols & NFC_PROTO_FELICA_MASK ||
             param->im_protocols & NFC_PROTO_NFC_DEP_MASK)) {
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_F_PASSIVE_POLL_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
        }

        if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) &&
            (param->im_protocols & NFC_PROTO_ISO15693_MASK)) {
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_V_PASSIVE_POLL_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
        }

        if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS - 1) &&
            (param->tm_protocols & NFC_PROTO_NFC_DEP_MASK)) {
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_A_PASSIVE_LISTEN_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
                cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode =
                        NCI_NFC_F_PASSIVE_LISTEN_MODE;
                cmd.disc_configs[cmd.num_disc_configs].frequency = 1;
                cmd.num_disc_configs++;
        }

        nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_CMD,
                     (1 + (cmd.num_disc_configs * sizeof(struct disc_config))),
                     &cmd);
}

struct nci_rf_discover_select_param {
        __u8        rf_discovery_id;
        __u8        rf_protocol;
};

static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_rf_discover_select_param *param = opt;
        struct nci_rf_discover_select_cmd cmd;

        cmd.rf_discovery_id = param->rf_discovery_id;
        cmd.rf_protocol = param->rf_protocol;

        switch (cmd.rf_protocol) {
        case NCI_RF_PROTOCOL_ISO_DEP:
                cmd.rf_interface = NCI_RF_INTERFACE_ISO_DEP;
                break;

        case NCI_RF_PROTOCOL_NFC_DEP:
                cmd.rf_interface = NCI_RF_INTERFACE_NFC_DEP;
                break;

        default:
                cmd.rf_interface = NCI_RF_INTERFACE_FRAME;
                break;
        }

        nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_SELECT_CMD,
                     sizeof(struct nci_rf_discover_select_cmd), &cmd);
}

static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
{
        struct nci_rf_deactivate_cmd cmd;

        cmd.type = (unsigned long)opt;

        nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
                     sizeof(struct nci_rf_deactivate_cmd), &cmd);
}

struct nci_cmd_param {
        __u16 opcode;
        size_t len;
        const __u8 *payload;
};

static void nci_generic_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_cmd_param *param = opt;

        nci_send_cmd(ndev, param->opcode, param->len, param->payload);
}

int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload)
{
        struct nci_cmd_param param;

        param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
        param.len = len;
        param.payload = payload;

        return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_prop_cmd);

int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
                 const __u8 *payload)
{
        struct nci_cmd_param param;

        param.opcode = opcode;
        param.len = len;
        param.payload = payload;

        return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_cmd);

int nci_core_reset(struct nci_dev *ndev)
{
        return __nci_request(ndev, nci_reset_req, (void *)0,
                             msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_reset);

int nci_core_init(struct nci_dev *ndev)
{
        return __nci_request(ndev, nci_init_req, (void *)0,
                             msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_init);

struct nci_loopback_data {
        u8 conn_id;
        struct sk_buff *data;
};

static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_loopback_data *data = opt;

        nci_send_data(ndev, data->conn_id, data->data);
}

static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
{
        struct nci_dev *ndev = (struct nci_dev *)context;
        struct nci_conn_info *conn_info;

        conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
        if (!conn_info) {
                nci_req_complete(ndev, NCI_STATUS_REJECTED);
                return;
        }

        conn_info->rx_skb = skb;

        nci_req_complete(ndev, NCI_STATUS_OK);
}

int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
                      struct sk_buff **resp)
{
        int r;
        struct nci_loopback_data loopback_data;
        struct nci_conn_info *conn_info;
        struct sk_buff *skb;
        int conn_id = nci_get_conn_info_by_dest_type_params(ndev,
                                        NCI_DESTINATION_NFCC_LOOPBACK, NULL);

        if (conn_id < 0) {
                r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCC_LOOPBACK,
                                         0, 0, NULL);
                if (r != NCI_STATUS_OK)
                        return r;

                conn_id = nci_get_conn_info_by_dest_type_params(ndev,
                                        NCI_DESTINATION_NFCC_LOOPBACK,
                                        NULL);
        }

        conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
        if (!conn_info)
                return -EPROTO;

        /* store cb and context to be used on receiving data */
        conn_info->data_exchange_cb = nci_nfcc_loopback_cb;
        conn_info->data_exchange_cb_context = ndev;

        skb = nci_skb_alloc(ndev, NCI_DATA_HDR_SIZE + data_len, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;

        skb_reserve(skb, NCI_DATA_HDR_SIZE);
        skb_put_data(skb, data, data_len);

        loopback_data.conn_id = conn_id;
        loopback_data.data = skb;

        ndev->cur_conn_id = conn_id;
        r = nci_request(ndev, nci_send_data_req, &loopback_data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK && resp)
                *resp = conn_info->rx_skb;

        return r;
}
EXPORT_SYMBOL(nci_nfcc_loopback);

static int nci_open_device(struct nci_dev *ndev)
{
        int rc = 0;

        mutex_lock(&ndev->req_lock);

        if (test_bit(NCI_UNREG, &ndev->flags)) {
                rc = -ENODEV;
                goto done;
        }

        if (test_bit(NCI_UP, &ndev->flags)) {
                rc = -EALREADY;
                goto done;
        }

        if (ndev->ops->open(ndev)) {
                rc = -EIO;
                goto done;
        }

        atomic_set(&ndev->cmd_cnt, 1);

        set_bit(NCI_INIT, &ndev->flags);

        if (ndev->ops->init)
                rc = ndev->ops->init(ndev);

        if (!rc) {
                rc = __nci_request(ndev, nci_reset_req, (void *)0,
                                   msecs_to_jiffies(NCI_RESET_TIMEOUT));
        }

        if (!rc && ndev->ops->setup) {
                rc = ndev->ops->setup(ndev);
        }

        if (!rc) {
                struct nci_core_init_v2_cmd nci_init_v2_cmd = {
                        .feature1 = NCI_FEATURE_DISABLE,
                        .feature2 = NCI_FEATURE_DISABLE
                };
                const void *opt = NULL;

                if (ndev->nci_ver & NCI_VER_2_MASK)
                        opt = &nci_init_v2_cmd;

                rc = __nci_request(ndev, nci_init_req, opt,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
        }

        if (!rc && ndev->ops->post_setup)
                rc = ndev->ops->post_setup(ndev);

        if (!rc) {
                rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
        }

        clear_bit(NCI_INIT, &ndev->flags);

        if (!rc) {
                set_bit(NCI_UP, &ndev->flags);
                nci_clear_target_list(ndev);
                atomic_set(&ndev->state, NCI_IDLE);
        } else {
                /* Init failed, cleanup */
                skb_queue_purge(&ndev->cmd_q);
                skb_queue_purge(&ndev->rx_q);
                skb_queue_purge(&ndev->tx_q);

                ndev->ops->close(ndev);
                ndev->flags &= BIT(NCI_UNREG);
        }

done:
        mutex_unlock(&ndev->req_lock);
        return rc;
}

static int nci_close_device(struct nci_dev *ndev)
{
        nci_req_cancel(ndev, ENODEV);

        /* This mutex needs to be held as a barrier for
         * caller nci_unregister_device
         */
        mutex_lock(&ndev->req_lock);

        if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
                /* Need to flush the cmd wq in case
                 * there is a queued/running cmd_work
                 */
                flush_workqueue(ndev->cmd_wq);
                del_timer_sync(&ndev->cmd_timer);
                del_timer_sync(&ndev->data_timer);
                mutex_unlock(&ndev->req_lock);
                return 0;
        }

        /* Drop RX and TX queues */
        skb_queue_purge(&ndev->rx_q);
        skb_queue_purge(&ndev->tx_q);

        /* Flush RX and TX wq */
        flush_workqueue(ndev->rx_wq);
        flush_workqueue(ndev->tx_wq);

        /* Reset device */
        skb_queue_purge(&ndev->cmd_q);
        atomic_set(&ndev->cmd_cnt, 1);

        set_bit(NCI_INIT, &ndev->flags);
        __nci_request(ndev, nci_reset_req, (void *)0,
                      msecs_to_jiffies(NCI_RESET_TIMEOUT));

        /* After this point our queues are empty
         * and no works are scheduled.
         */
        ndev->ops->close(ndev);

        clear_bit(NCI_INIT, &ndev->flags);

        /* Flush cmd wq */
        flush_workqueue(ndev->cmd_wq);

        del_timer_sync(&ndev->cmd_timer);

        /* Clear flags except NCI_UNREG */
        ndev->flags &= BIT(NCI_UNREG);

        mutex_unlock(&ndev->req_lock);

        return 0;
}

/* NCI command timer function */
static void nci_cmd_timer(struct timer_list *t)
{
        struct nci_dev *ndev = from_timer(ndev, t, cmd_timer);

        atomic_set(&ndev->cmd_cnt, 1);
        queue_work(ndev->cmd_wq, &ndev->cmd_work);
}

/* NCI data exchange timer function */
static void nci_data_timer(struct timer_list *t)
{
        struct nci_dev *ndev = from_timer(ndev, t, data_timer);

        set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
        queue_work(ndev->rx_wq, &ndev->rx_work);
}

static int nci_dev_up(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        return nci_open_device(ndev);
}

static int nci_dev_down(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        return nci_close_device(ndev);
}

int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
{
        struct nci_set_config_param param;

        if (!val || !len)
                return 0;

        param.id = id;
        param.len = len;
        param.val = val;

        return __nci_request(ndev, nci_set_config_req, &param,
                             msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
EXPORT_SYMBOL(nci_set_config);

static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
{
        struct nci_nfcee_discover_cmd cmd;
        __u8 action = (unsigned long)opt;

        cmd.discovery_action = action;

        nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd);
}

int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
{
        unsigned long opt = action;

        return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
                                msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_discover);

static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
{
        const struct nci_nfcee_mode_set_cmd *cmd = opt;

        nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
                     sizeof(struct nci_nfcee_mode_set_cmd), cmd);
}

int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
{
        struct nci_nfcee_mode_set_cmd cmd;

        cmd.nfcee_id = nfcee_id;
        cmd.nfcee_mode = nfcee_mode;

        return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_mode_set);

static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
{
        const struct core_conn_create_data *data = opt;

        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
}

int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
                         u8 number_destination_params,
                         size_t params_len,
                         const struct core_conn_create_dest_spec_params *params)
{
        int r;
        struct nci_core_conn_create_cmd *cmd;
        struct core_conn_create_data data;

        data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
        cmd = kzalloc(data.length, GFP_KERNEL);
        if (!cmd)
                return -ENOMEM;

        cmd->destination_type = destination_type;
        cmd->number_destination_params = number_destination_params;

        data.cmd = cmd;

        if (params) {
                memcpy(cmd->params, params, params_len);
                if (params->length > 0)
                        memcpy(&ndev->cur_params,
                               &params->value[DEST_SPEC_PARAMS_ID_INDEX],
                               sizeof(struct dest_spec_params));
                else
                        ndev->cur_params.id = 0;
        } else {
                ndev->cur_params.id = 0;
        }
        ndev->cur_dest_type = destination_type;

        r = __nci_request(ndev, nci_core_conn_create_req, &data,
                          msecs_to_jiffies(NCI_CMD_TIMEOUT));
        kfree(cmd);
        return r;
}
EXPORT_SYMBOL(nci_core_conn_create);

static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
{
        __u8 conn_id = (unsigned long)opt;

        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
}

int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
        unsigned long opt = conn_id;

        ndev->cur_conn_id = conn_id;
        return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_conn_close);

static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        struct nci_set_config_param param;
        int rc;

        param.val = nfc_get_local_general_bytes(nfc_dev, &param.len);
        if ((param.val == NULL) || (param.len == 0))
                return 0;

        if (param.len > NFC_MAX_GT_LEN)
                return -EINVAL;

        param.id = NCI_PN_ATR_REQ_GEN_BYTES;

        rc = nci_request(ndev, nci_set_config_req, &param,
                         msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
        if (rc)
                return rc;

        param.id = NCI_LN_ATR_RES_GEN_BYTES;

        return nci_request(ndev, nci_set_config_req, &param,
                           msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}

static int nci_set_listen_parameters(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;
        __u8 val;

        val = NCI_LA_SEL_INFO_NFC_DEP_MASK;

        rc = nci_set_config(ndev, NCI_LA_SEL_INFO, 1, &val);
        if (rc)
                return rc;

        val = NCI_LF_PROTOCOL_TYPE_NFC_DEP_MASK;

        rc = nci_set_config(ndev, NCI_LF_PROTOCOL_TYPE, 1, &val);
        if (rc)
                return rc;

        val = NCI_LF_CON_BITR_F_212 | NCI_LF_CON_BITR_F_424;

        return nci_set_config(ndev, NCI_LF_CON_BITR_F, 1, &val);
}

static int nci_start_poll(struct nfc_dev *nfc_dev,
                          __u32 im_protocols, __u32 tm_protocols)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        struct nci_rf_discover_param param;
        int rc;

        if ((atomic_read(&ndev->state) == NCI_DISCOVERY) ||
            (atomic_read(&ndev->state) == NCI_W4_ALL_DISCOVERIES)) {
                pr_err("unable to start poll, since poll is already active\n");
                return -EBUSY;
        }

        if (ndev->target_active_prot) {
                pr_err("there is an active target\n");
                return -EBUSY;
        }

        if ((atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) ||
            (atomic_read(&ndev->state) == NCI_POLL_ACTIVE)) {
                pr_debug("target active or w4 select, implicitly deactivate\n");

                rc = nci_request(ndev, nci_rf_deactivate_req,
                                 (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                                 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                if (rc)
                        return -EBUSY;
        }

        if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) {
                rc = nci_set_local_general_bytes(nfc_dev);
                if (rc) {
                        pr_err("failed to set local general bytes\n");
                        return rc;
                }
        }

        if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) {
                rc = nci_set_listen_parameters(nfc_dev);
                if (rc)
                        pr_err("failed to set listen parameters\n");
        }

        param.im_protocols = im_protocols;
        param.tm_protocols = tm_protocols;
        rc = nci_request(ndev, nci_rf_discover_req, &param,
                         msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));

        if (!rc)
                ndev->poll_prots = im_protocols;

        return rc;
}

static void nci_stop_poll(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if ((atomic_read(&ndev->state) != NCI_DISCOVERY) &&
            (atomic_read(&ndev->state) != NCI_W4_ALL_DISCOVERIES)) {
                pr_err("unable to stop poll, since poll is not active\n");
                return;
        }

        nci_request(ndev, nci_rf_deactivate_req,
                    (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                    msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}

static int nci_activate_target(struct nfc_dev *nfc_dev,
                               struct nfc_target *target, __u32 protocol)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        struct nci_rf_discover_select_param param;
        const struct nfc_target *nci_target = NULL;
        int i;
        int rc = 0;

        pr_debug("target_idx %d, protocol 0x%x\n", target->idx, protocol);

        if ((atomic_read(&ndev->state) != NCI_W4_HOST_SELECT) &&
            (atomic_read(&ndev->state) != NCI_POLL_ACTIVE)) {
                pr_err("there is no available target to activate\n");
                return -EINVAL;
        }

        if (ndev->target_active_prot) {
                pr_err("there is already an active target\n");
                return -EBUSY;
        }

        for (i = 0; i < ndev->n_targets; i++) {
                if (ndev->targets[i].idx == target->idx) {
                        nci_target = &ndev->targets[i];
                        break;
                }
        }

        if (!nci_target) {
                pr_err("unable to find the selected target\n");
                return -EINVAL;
        }

        if (protocol >= NFC_PROTO_MAX) {
                pr_err("the requested nfc protocol is invalid\n");
                return -EINVAL;
        }

        if (!(nci_target->supported_protocols & (1 << protocol))) {
                pr_err("target does not support the requested protocol 0x%x\n",
                       protocol);
                return -EINVAL;
        }

        if (atomic_read(&ndev->state) == NCI_W4_HOST_SELECT) {
                param.rf_discovery_id = nci_target->logical_idx;

                if (protocol == NFC_PROTO_JEWEL)
                        param.rf_protocol = NCI_RF_PROTOCOL_T1T;
                else if (protocol == NFC_PROTO_MIFARE)
                        param.rf_protocol = NCI_RF_PROTOCOL_T2T;
                else if (protocol == NFC_PROTO_FELICA)
                        param.rf_protocol = NCI_RF_PROTOCOL_T3T;
                else if (protocol == NFC_PROTO_ISO14443 ||
                         protocol == NFC_PROTO_ISO14443_B)
                        param.rf_protocol = NCI_RF_PROTOCOL_ISO_DEP;
                else
                        param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;

                rc = nci_request(ndev, nci_rf_discover_select_req, &param,
                                 msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
        }

        if (!rc)
                ndev->target_active_prot = protocol;

        return rc;
}

static void nci_deactivate_target(struct nfc_dev *nfc_dev,
                                  struct nfc_target *target,
                                  __u8 mode)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;

        if (!ndev->target_active_prot) {
                pr_err("unable to deactivate target, no active target\n");
                return;
        }

        ndev->target_active_prot = 0;

        switch (mode) {
        case NFC_TARGET_MODE_SLEEP:
                nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE;
                break;
        }

        if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
                nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
                            msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
        }
}

static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
                           __u8 comm_mode, __u8 *gb, size_t gb_len)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;

        pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode);

        rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP);
        if (rc)
                return rc;

        rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb,
                                          ndev->remote_gb_len);
        if (!rc)
                rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE,
                                        NFC_RF_INITIATOR);

        return rc;
}

static int nci_dep_link_down(struct nfc_dev *nfc_dev)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;

        if (nfc_dev->rf_mode == NFC_RF_INITIATOR) {
                nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE);
        } else {
                if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
                    atomic_read(&ndev->state) == NCI_DISCOVERY) {
                        nci_request(ndev, nci_rf_deactivate_req, (void *)0,
                                    msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                }

                rc = nfc_tm_deactivated(nfc_dev);
                if (rc)
                        pr_err("error when signaling tm deactivation\n");
        }

        return 0;
}


static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
                          struct sk_buff *skb,
                          data_exchange_cb_t cb, void *cb_context)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;
        struct nci_conn_info *conn_info;

        conn_info = ndev->rf_conn_info;
        if (!conn_info)
                return -EPROTO;

        pr_debug("target_idx %d, len %d\n", target->idx, skb->len);

        if (!ndev->target_active_prot) {
                pr_err("unable to exchange data, no active target\n");
                return -EINVAL;
        }

        if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags))
                return -EBUSY;

        /* store cb and context to be used on receiving data */
        conn_info->data_exchange_cb = cb;
        conn_info->data_exchange_cb_context = cb_context;

        rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb);
        if (rc)
                clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);

        return rc;
}

static int nci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;

        rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb);
        if (rc)
                pr_err("unable to send data\n");

        return rc;
}

static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if (ndev->ops->enable_se)
                return ndev->ops->enable_se(ndev, se_idx);

        return 0;
}

static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if (ndev->ops->disable_se)
                return ndev->ops->disable_se(ndev, se_idx);

        return 0;
}

static int nci_discover_se(struct nfc_dev *nfc_dev)
{
        int r;
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if (ndev->ops->discover_se) {
                r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE);
                if (r != NCI_STATUS_OK)
                        return -EPROTO;

                return ndev->ops->discover_se(ndev);
        }

        return 0;
}

static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx,
                     u8 *apdu, size_t apdu_length,
                     se_io_cb_t cb, void *cb_context)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if (ndev->ops->se_io)
                return ndev->ops->se_io(ndev, se_idx, apdu,
                                apdu_length, cb, cb_context);

        return 0;
}

static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
{
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);

        if (!ndev->ops->fw_download)
                return -ENOTSUPP;

        return ndev->ops->fw_download(ndev, firmware_name);
}

static const struct nfc_ops nci_nfc_ops = {
        .dev_up = nci_dev_up,
        .dev_down = nci_dev_down,
        .start_poll = nci_start_poll,
        .stop_poll = nci_stop_poll,
        .dep_link_up = nci_dep_link_up,
        .dep_link_down = nci_dep_link_down,
        .activate_target = nci_activate_target,
        .deactivate_target = nci_deactivate_target,
        .im_transceive = nci_transceive,
        .tm_send = nci_tm_send,
        .enable_se = nci_enable_se,
        .disable_se = nci_disable_se,
        .discover_se = nci_discover_se,
        .se_io = nci_se_io,
        .fw_download = nci_fw_download,
};

/* ---- Interface to NCI drivers ---- */
/**
 * nci_allocate_device - allocate a new nci device
 *
 * @ops: device operations
 * @supported_protocols: NFC protocols supported by the device
 * @tx_headroom: Reserved space at beginning of skb
 * @tx_tailroom: Reserved space at end of skb
 */
struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
                                    __u32 supported_protocols,
                                    int tx_headroom, int tx_tailroom)
{
        struct nci_dev *ndev;

        pr_debug("supported_protocols 0x%x\n", supported_protocols);

        if (!ops->open || !ops->close || !ops->send)
                return NULL;

        if (!supported_protocols)
                return NULL;

        ndev = kzalloc(sizeof(struct nci_dev), GFP_KERNEL);
        if (!ndev)
                return NULL;

        ndev->ops = ops;

        if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
                pr_err("Too many proprietary commands: %zd\n",
                       ops->n_prop_ops);
                goto free_nci;
        }

        ndev->tx_headroom = tx_headroom;
        ndev->tx_tailroom = tx_tailroom;
        init_completion(&ndev->req_completion);

        ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops,
                                            supported_protocols,
                                            tx_headroom + NCI_DATA_HDR_SIZE,
                                            tx_tailroom);
        if (!ndev->nfc_dev)
                goto free_nci;

        ndev->hci_dev = nci_hci_allocate(ndev);
        if (!ndev->hci_dev)
                goto free_nfc;

        nfc_set_drvdata(ndev->nfc_dev, ndev);

        return ndev;

free_nfc:
        nfc_free_device(ndev->nfc_dev);
free_nci:
        kfree(ndev);
        return NULL;
}
EXPORT_SYMBOL(nci_allocate_device);

/**
 * nci_free_device - deallocate nci device
 *
 * @ndev: The nci device to deallocate
 */
void nci_free_device(struct nci_dev *ndev)
{
        nfc_free_device(ndev->nfc_dev);
        nci_hci_deallocate(ndev);

        /* drop partial rx data packet if present */
        if (ndev->rx_data_reassembly)
                kfree_skb(ndev->rx_data_reassembly);
        kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);

/**
 * nci_register_device - register a nci device in the nfc subsystem
 *
 * @ndev: The nci device to register
 */
int nci_register_device(struct nci_dev *ndev)
{
        int rc;
        struct device *dev = &ndev->nfc_dev->dev;
        char name[32];

        ndev->flags = 0;

        INIT_WORK(&ndev->cmd_work, nci_cmd_work);
        snprintf(name, sizeof(name), "%s_nci_cmd_wq", dev_name(dev));
        ndev->cmd_wq = create_singlethread_workqueue(name);
        if (!ndev->cmd_wq) {
                rc = -ENOMEM;
                goto exit;
        }

        INIT_WORK(&ndev->rx_work, nci_rx_work);
        snprintf(name, sizeof(name), "%s_nci_rx_wq", dev_name(dev));
        ndev->rx_wq = create_singlethread_workqueue(name);
        if (!ndev->rx_wq) {
                rc = -ENOMEM;
                goto destroy_cmd_wq_exit;
        }

        INIT_WORK(&ndev->tx_work, nci_tx_work);
        snprintf(name, sizeof(name), "%s_nci_tx_wq", dev_name(dev));
        ndev->tx_wq = create_singlethread_workqueue(name);
        if (!ndev->tx_wq) {
                rc = -ENOMEM;
                goto destroy_rx_wq_exit;
        }

        skb_queue_head_init(&ndev->cmd_q);
        skb_queue_head_init(&ndev->rx_q);
        skb_queue_head_init(&ndev->tx_q);

        timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0);
        timer_setup(&ndev->data_timer, nci_data_timer, 0);

        mutex_init(&ndev->req_lock);
        INIT_LIST_HEAD(&ndev->conn_info_list);

        rc = nfc_register_device(ndev->nfc_dev);
        if (rc)
                goto destroy_tx_wq_exit;

        goto exit;

destroy_tx_wq_exit:
        destroy_workqueue(ndev->tx_wq);

destroy_rx_wq_exit:
        destroy_workqueue(ndev->rx_wq);

destroy_cmd_wq_exit:
        destroy_workqueue(ndev->cmd_wq);

exit:
        return rc;
}
EXPORT_SYMBOL(nci_register_device);

/**
 * nci_unregister_device - unregister a nci device in the nfc subsystem
 *
 * @ndev: The nci device to unregister
 */
void nci_unregister_device(struct nci_dev *ndev)
{
        struct nci_conn_info *conn_info, *n;

        /* This set_bit is not protected with specialized barrier,
         * However, it is fine because the mutex_lock(&ndev->req_lock);
         * in nci_close_device() will help to emit one.
         */
        set_bit(NCI_UNREG, &ndev->flags);

        nci_close_device(ndev);

        destroy_workqueue(ndev->cmd_wq);
        destroy_workqueue(ndev->rx_wq);
        destroy_workqueue(ndev->tx_wq);

        list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) {
                list_del(&conn_info->list);
                /* conn_info is allocated with devm_kzalloc */
        }

        nfc_unregister_device(ndev->nfc_dev);
}
EXPORT_SYMBOL(nci_unregister_device);

/**
 * nci_recv_frame - receive frame from NCI drivers
 *
 * @ndev: The nci device
 * @skb: The sk_buff to receive
 */
int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
{
        pr_debug("len %d\n", skb->len);

        if (!ndev || (!test_bit(NCI_UP, &ndev->flags) &&
            !test_bit(NCI_INIT, &ndev->flags))) {
                kfree_skb(skb);
                return -ENXIO;
        }

        /* Queue frame for rx worker thread */
        skb_queue_tail(&ndev->rx_q, skb);
        queue_work(ndev->rx_wq, &ndev->rx_work);

        return 0;
}
EXPORT_SYMBOL(nci_recv_frame);

int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
{
        pr_debug("len %d\n", skb->len);

        if (!ndev) {
                kfree_skb(skb);
                return -ENODEV;
        }

        /* Get rid of skb owner, prior to sending to the driver. */
        skb_orphan(skb);

        /* Send copy to sniffer */
        nfc_send_to_raw_sock(ndev->nfc_dev, skb,
                             RAW_PAYLOAD_NCI, NFC_DIRECTION_TX);

        return ndev->ops->send(ndev, skb);
}
EXPORT_SYMBOL(nci_send_frame);

/* Send NCI command */
int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload)
{
        struct nci_ctrl_hdr *hdr;
        struct sk_buff *skb;

        pr_debug("opcode 0x%x, plen %d\n", opcode, plen);

        skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + plen), GFP_KERNEL);
        if (!skb) {
                pr_err("no memory for command\n");
                return -ENOMEM;
        }

        hdr = skb_put(skb, NCI_CTRL_HDR_SIZE);
        hdr->gid = nci_opcode_gid(opcode);
        hdr->oid = nci_opcode_oid(opcode);
        hdr->plen = plen;

        nci_mt_set((__u8 *)hdr, NCI_MT_CMD_PKT);
        nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST);

        if (plen)
                skb_put_data(skb, payload, plen);

        skb_queue_tail(&ndev->cmd_q, skb);
        queue_work(ndev->cmd_wq, &ndev->cmd_work);

        return 0;
}
EXPORT_SYMBOL(nci_send_cmd);

/* Proprietary commands API */
static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops,
                                                   size_t n_ops,
                                                   __u16 opcode)
{
        size_t i;
        const struct nci_driver_ops *op;

        if (!ops || !n_ops)
                return NULL;

        for (i = 0; i < n_ops; i++) {
                op = &ops[i];
                if (op->opcode == opcode)
                        return op;
        }

        return NULL;
}

static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
                             struct sk_buff *skb, const struct nci_driver_ops *ops,
                             size_t n_ops)
{
        const struct nci_driver_ops *op;

        op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
        if (!op || !op->rsp)
                return -ENOTSUPP;

        return op->rsp(ndev, skb);
}

static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
                             struct sk_buff *skb, const struct nci_driver_ops *ops,
                             size_t n_ops)
{
        const struct nci_driver_ops *op;

        op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
        if (!op || !op->ntf)
                return -ENOTSUPP;

        return op->ntf(ndev, skb);
}

int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode,
                        struct sk_buff *skb)
{
        return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops,
                                 ndev->ops->n_prop_ops);
}

int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode,
                        struct sk_buff *skb)
{
        return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops,
                                 ndev->ops->n_prop_ops);
}

int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
                        struct sk_buff *skb)
{
        return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops,
                                  ndev->ops->n_core_ops);
}

int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
                        struct sk_buff *skb)
{
        return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops,
                                 ndev->ops->n_core_ops);
}

/* ---- NCI TX Data worker thread ---- */

static void nci_tx_work(struct work_struct *work)
{
        struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
        struct nci_conn_info *conn_info;
        struct sk_buff *skb;

        conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
        if (!conn_info)
                return;

        pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt));

        /* Send queued tx data */
        while (atomic_read(&conn_info->credits_cnt)) {
                skb = skb_dequeue(&ndev->tx_q);
                if (!skb)
                        return;
                kcov_remote_start_common(skb_get_kcov_handle(skb));

                /* Check if data flow control is used */
                if (atomic_read(&conn_info->credits_cnt) !=
                    NCI_DATA_FLOW_CONTROL_NOT_USED)
                        atomic_dec(&conn_info->credits_cnt);

                pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n",
                         nci_pbf(skb->data),
                         nci_conn_id(skb->data),
                         nci_plen(skb->data));

                nci_send_frame(ndev, skb);

                mod_timer(&ndev->data_timer,
                          jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT));
                kcov_remote_stop();
        }
}

/* ----- NCI RX worker thread (data & control) ----- */

static void nci_rx_work(struct work_struct *work)
{
        struct nci_dev *ndev = container_of(work, struct nci_dev, rx_work);
        struct sk_buff *skb;

        for (; (skb = skb_dequeue(&ndev->rx_q)); kcov_remote_stop()) {
                kcov_remote_start_common(skb_get_kcov_handle(skb));

                /* Send copy to sniffer */
                nfc_send_to_raw_sock(ndev->nfc_dev, skb,
                                     RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);

                if (!nci_plen(skb->data)) {
                        kfree_skb(skb);
                        break;
                }

                /* Process frame */
                switch (nci_mt(skb->data)) {
                case NCI_MT_RSP_PKT:
                        nci_rsp_packet(ndev, skb);
                        break;

                case NCI_MT_NTF_PKT:
                        nci_ntf_packet(ndev, skb);
                        break;

                case NCI_MT_DATA_PKT:
                        nci_rx_data_packet(ndev, skb);
                        break;

                default:
                        pr_err("unknown MT 0x%x\n", nci_mt(skb->data));
                        kfree_skb(skb);
                        break;
                }
        }

        /* check if a data exchange timeout has occurred */
        if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) {
                /* complete the data exchange transaction, if exists */
                if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
                        nci_data_exchange_complete(ndev, NULL,
                                                   ndev->cur_conn_id,
                                                   -ETIMEDOUT);

                clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
        }
}

/* ----- NCI TX CMD worker thread ----- */

static void nci_cmd_work(struct work_struct *work)
{
        struct nci_dev *ndev = container_of(work, struct nci_dev, cmd_work);
        struct sk_buff *skb;

        pr_debug("cmd_cnt %d\n", atomic_read(&ndev->cmd_cnt));

        /* Send queued command */
        if (atomic_read(&ndev->cmd_cnt)) {
                skb = skb_dequeue(&ndev->cmd_q);
                if (!skb)
                        return;

                kcov_remote_start_common(skb_get_kcov_handle(skb));
                atomic_dec(&ndev->cmd_cnt);

                pr_debug("NCI TX: MT=cmd, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n",
                         nci_pbf(skb->data),
                         nci_opcode_gid(nci_opcode(skb->data)),
                         nci_opcode_oid(nci_opcode(skb->data)),
                         nci_plen(skb->data));

                nci_send_frame(ndev, skb);

                mod_timer(&ndev->cmd_timer,
                          jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT));
                kcov_remote_stop();
        }
}

MODULE_DESCRIPTION("NFC Controller Interface");
MODULE_LICENSE("GPL");


































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * fwnode.h - Firmware device node object handle type definition.
 *
 * Copyright (C) 2015, Intel Corporation
 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 */

#ifndef _LINUX_FWNODE_H_
#define _LINUX_FWNODE_H_

#include <linux/bits.h>
#include <linux/err.h>
#include <linux/list.h>
#include <linux/types.h>

enum dev_dma_attr {
        DEV_DMA_NOT_SUPPORTED,
        DEV_DMA_NON_COHERENT,
        DEV_DMA_COHERENT,
};

struct fwnode_operations;
struct device;

/*
 * fwnode flags
 *
 * LINKS_ADDED:        The fwnode has already be parsed to add fwnode links.
 * NOT_DEVICE:        The fwnode will never be populated as a struct device.
 * INITIALIZED: The hardware corresponding to fwnode has been initialized.
 * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its
 *                             driver needs its child devices to be bound with
 *                             their respective drivers as soon as they are
 *                             added.
 * BEST_EFFORT: The fwnode/device needs to probe early and might be missing some
 *                suppliers. Only enforce ordering with suppliers that have
 *                drivers.
 */
#define FWNODE_FLAG_LINKS_ADDED                        BIT(0)
#define FWNODE_FLAG_NOT_DEVICE                        BIT(1)
#define FWNODE_FLAG_INITIALIZED                        BIT(2)
#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD        BIT(3)
#define FWNODE_FLAG_BEST_EFFORT                        BIT(4)
#define FWNODE_FLAG_VISITED                        BIT(5)

struct fwnode_handle {
        struct fwnode_handle *secondary;
        const struct fwnode_operations *ops;

        /* The below is used solely by device links, don't use otherwise */
        struct device *dev;
        struct list_head suppliers;
        struct list_head consumers;
        u8 flags;
};

/*
 * fwnode link flags
 *
 * CYCLE:        The fwnode link is part of a cycle. Don't defer probe.
 * IGNORE:        Completely ignore this link, even during cycle detection.
 */
#define FWLINK_FLAG_CYCLE                        BIT(0)
#define FWLINK_FLAG_IGNORE                        BIT(1)

struct fwnode_link {
        struct fwnode_handle *supplier;
        struct list_head s_hook;
        struct fwnode_handle *consumer;
        struct list_head c_hook;
        u8 flags;
};

/**
 * struct fwnode_endpoint - Fwnode graph endpoint
 * @port: Port number
 * @id: Endpoint id
 * @local_fwnode: reference to the related fwnode
 */
struct fwnode_endpoint {
        unsigned int port;
        unsigned int id;
        const struct fwnode_handle *local_fwnode;
};

/*
 * ports and endpoints defined as software_nodes should all follow a common
 * naming scheme; use these macros to ensure commonality.
 */
#define SWNODE_GRAPH_PORT_NAME_FMT                "port@%u"
#define SWNODE_GRAPH_ENDPOINT_NAME_FMT                "endpoint@%u"

#define NR_FWNODE_REFERENCE_ARGS        8

/**
 * struct fwnode_reference_args - Fwnode reference with additional arguments
 * @fwnode:- A reference to the base fwnode
 * @nargs: Number of elements in @args array
 * @args: Integer arguments on the fwnode
 */
struct fwnode_reference_args {
        struct fwnode_handle *fwnode;
        unsigned int nargs;
        u64 args[NR_FWNODE_REFERENCE_ARGS];
};

/**
 * struct fwnode_operations - Operations for fwnode interface
 * @get: Get a reference to an fwnode.
 * @put: Put a reference to an fwnode.
 * @device_is_available: Return true if the device is available.
 * @device_get_match_data: Return the device driver match data.
 * @property_present: Return true if a property is present.
 * @property_read_int_array: Read an array of integer properties. Return zero on
 *                             success, a negative error code otherwise.
 * @property_read_string_array: Read an array of string properties. Return zero
 *                                on success, a negative error code otherwise.
 * @get_name: Return the name of an fwnode.
 * @get_name_prefix: Get a prefix for a node (for printing purposes).
 * @get_parent: Return the parent of an fwnode.
 * @get_next_child_node: Return the next child node in an iteration.
 * @get_named_child_node: Return a child node with a given name.
 * @get_reference_args: Return a reference pointed to by a property, with args
 * @graph_get_next_endpoint: Return an endpoint node in an iteration.
 * @graph_get_remote_endpoint: Return the remote endpoint node of a local
 *                               endpoint node.
 * @graph_get_port_parent: Return the parent node of a port node.
 * @graph_parse_endpoint: Parse endpoint for port and endpoint id.
 * @add_links:        Create fwnode links to all the suppliers of the fwnode. Return
 *                zero on success, a negative error code otherwise.
 */
struct fwnode_operations {
        struct fwnode_handle *(*get)(struct fwnode_handle *fwnode);
        void (*put)(struct fwnode_handle *fwnode);
        bool (*device_is_available)(const struct fwnode_handle *fwnode);
        const void *(*device_get_match_data)(const struct fwnode_handle *fwnode,
                                             const struct device *dev);
        bool (*device_dma_supported)(const struct fwnode_handle *fwnode);
        enum dev_dma_attr
        (*device_get_dma_attr)(const struct fwnode_handle *fwnode);
        bool (*property_present)(const struct fwnode_handle *fwnode,
                                 const char *propname);
        int (*property_read_int_array)(const struct fwnode_handle *fwnode,
                                       const char *propname,
                                       unsigned int elem_size, void *val,
                                       size_t nval);
        int
        (*property_read_string_array)(const struct fwnode_handle *fwnode_handle,
                                      const char *propname, const char **val,
                                      size_t nval);
        const char *(*get_name)(const struct fwnode_handle *fwnode);
        const char *(*get_name_prefix)(const struct fwnode_handle *fwnode);
        struct fwnode_handle *(*get_parent)(const struct fwnode_handle *fwnode);
        struct fwnode_handle *
        (*get_next_child_node)(const struct fwnode_handle *fwnode,
                               struct fwnode_handle *child);
        struct fwnode_handle *
        (*get_named_child_node)(const struct fwnode_handle *fwnode,
                                const char *name);
        int (*get_reference_args)(const struct fwnode_handle *fwnode,
                                  const char *prop, const char *nargs_prop,
                                  unsigned int nargs, unsigned int index,
                                  struct fwnode_reference_args *args);
        struct fwnode_handle *
        (*graph_get_next_endpoint)(const struct fwnode_handle *fwnode,
                                   struct fwnode_handle *prev);
        struct fwnode_handle *
        (*graph_get_remote_endpoint)(const struct fwnode_handle *fwnode);
        struct fwnode_handle *
        (*graph_get_port_parent)(struct fwnode_handle *fwnode);
        int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode,
                                    struct fwnode_endpoint *endpoint);
        void __iomem *(*iomap)(struct fwnode_handle *fwnode, int index);
        int (*irq_get)(const struct fwnode_handle *fwnode, unsigned int index);
        int (*add_links)(struct fwnode_handle *fwnode);
};

#define fwnode_has_op(fwnode, op)                                        \
        (!IS_ERR_OR_NULL(fwnode) && (fwnode)->ops && (fwnode)->ops->op)

#define fwnode_call_int_op(fwnode, op, ...)                                \
        (fwnode_has_op(fwnode, op) ?                                        \
         (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : (IS_ERR_OR_NULL(fwnode) ? -EINVAL : -ENXIO))

#define fwnode_call_bool_op(fwnode, op, ...)                \
        (fwnode_has_op(fwnode, op) ?                        \
         (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false)

#define fwnode_call_ptr_op(fwnode, op, ...)                \
        (fwnode_has_op(fwnode, op) ?                        \
         (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
#define fwnode_call_void_op(fwnode, op, ...)                                \
        do {                                                                \
                if (fwnode_has_op(fwnode, op))                                \
                        (fwnode)->ops->op(fwnode, ## __VA_ARGS__);        \
        } while (false)

static inline void fwnode_init(struct fwnode_handle *fwnode,
                               const struct fwnode_operations *ops)
{
        fwnode->ops = ops;
        INIT_LIST_HEAD(&fwnode->consumers);
        INIT_LIST_HEAD(&fwnode->suppliers);
}

static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode,
                                          bool initialized)
{
        if (IS_ERR_OR_NULL(fwnode))
                return;

        if (initialized)
                fwnode->flags |= FWNODE_FLAG_INITIALIZED;
        else
                fwnode->flags &= ~FWNODE_FLAG_INITIALIZED;
}

int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup,
                    u8 flags);
void fwnode_links_purge(struct fwnode_handle *fwnode);
void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode);
bool fw_devlink_is_strict(void);

#endif
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 


    3 


    3 
































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
// SPDX-License-Identifier: GPL-2.0+
/*
 * Driver for SanDisk SDDR-09 SmartMedia reader
 *
 *   (c) 2000, 2001 Robert Baruch (autophile@starband.net)
 *   (c) 2002 Andries Brouwer (aeb@cwi.nl)
 * Developed with the assistance of:
 *   (c) 2002 Alan Stern <stern@rowland.org>
 *
 * The SanDisk SDDR-09 SmartMedia reader uses the Shuttle EUSB-01 chip.
 * This chip is a programmable USB controller. In the SDDR-09, it has
 * been programmed to obey a certain limited set of SCSI commands.
 * This driver translates the "real" SCSI commands to the SDDR-09 SCSI
 * commands.
 */

/*
 * Known vendor commands: 12 bytes, first byte is opcode
 *
 * E7: read scatter gather
 * E8: read
 * E9: write
 * EA: erase
 * EB: reset
 * EC: read status
 * ED: read ID
 * EE: write CIS (?)
 * EF: compute checksum (?)
 */

#include <linux/errno.h>
#include <linux/module.h>
#include <linux/slab.h>

#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>

#include "usb.h"
#include "transport.h"
#include "protocol.h"
#include "debug.h"
#include "scsiglue.h"

#define DRV_NAME "ums-sddr09"

MODULE_DESCRIPTION("Driver for SanDisk SDDR-09 SmartMedia reader");
MODULE_AUTHOR("Andries Brouwer <aeb@cwi.nl>, Robert Baruch <autophile@starband.net>");
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS(USB_STORAGE);

static int usb_stor_sddr09_dpcm_init(struct us_data *us);
static int sddr09_transport(struct scsi_cmnd *srb, struct us_data *us);
static int usb_stor_sddr09_init(struct us_data *us);


/*
 * The table of devices
 */
#define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \
                    vendorName, productName, useProtocol, useTransport, \
                    initFunction, flags) \
{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \
  .driver_info = (flags) }

static struct usb_device_id sddr09_usb_ids[] = {
#        include "unusual_sddr09.h"
        { }                /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, sddr09_usb_ids);

#undef UNUSUAL_DEV

/*
 * The flags table
 */
#define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \
                    vendor_name, product_name, use_protocol, use_transport, \
                    init_function, Flags) \
{ \
        .vendorName = vendor_name,        \
        .productName = product_name,        \
        .useProtocol = use_protocol,        \
        .useTransport = use_transport,        \
        .initFunction = init_function,        \
}

static struct us_unusual_dev sddr09_unusual_dev_list[] = {
#        include "unusual_sddr09.h"
        { }                /* Terminating entry */
};

#undef UNUSUAL_DEV


#define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) )
#define LSB_of(s) ((s)&0xFF)
#define MSB_of(s) ((s)>>8)

/*
 * First some stuff that does not belong here:
 * data on SmartMedia and other cards, completely
 * unrelated to this driver.
 * Similar stuff occurs in <linux/mtd/nand_ids.h>.
 */

struct nand_flash_dev {
        int model_id;
        int chipshift;                /* 1<<cs bytes total capacity */
        char pageshift;                /* 1<<ps bytes in a page */
        char blockshift;        /* 1<<bs pages in an erase block */
        char zoneshift;                /* 1<<zs blocks in a zone */
                                /* # of logical blocks is 125/128 of this */
        char pageadrlen;        /* length of an address in bytes - 1 */
};

/*
 * NAND Flash Manufacturer ID Codes
 */
#define NAND_MFR_AMD                0x01
#define NAND_MFR_NATSEMI        0x8f
#define NAND_MFR_TOSHIBA        0x98
#define NAND_MFR_SAMSUNG        0xec

static inline char *nand_flash_manufacturer(int manuf_id) {
        switch(manuf_id) {
        case NAND_MFR_AMD:
                return "AMD";
        case NAND_MFR_NATSEMI:
                return "NATSEMI";
        case NAND_MFR_TOSHIBA:
                return "Toshiba";
        case NAND_MFR_SAMSUNG:
                return "Samsung";
        default:
                return "unknown";
        }
}

/*
 * It looks like it is unnecessary to attach manufacturer to the
 * remaining data: SSFDC prescribes manufacturer-independent id codes.
 *
 * 256 MB NAND flash has a 5-byte ID with 2nd byte 0xaa, 0xba, 0xca or 0xda.
 */

static struct nand_flash_dev nand_flash_ids[] = {
        /* NAND flash */
        { 0x6e, 20, 8, 4, 8, 2},        /* 1 MB */
        { 0xe8, 20, 8, 4, 8, 2},        /* 1 MB */
        { 0xec, 20, 8, 4, 8, 2},        /* 1 MB */
        { 0x64, 21, 8, 4, 9, 2},         /* 2 MB */
        { 0xea, 21, 8, 4, 9, 2},        /* 2 MB */
        { 0x6b, 22, 9, 4, 9, 2},        /* 4 MB */
        { 0xe3, 22, 9, 4, 9, 2},        /* 4 MB */
        { 0xe5, 22, 9, 4, 9, 2},        /* 4 MB */
        { 0xe6, 23, 9, 4, 10, 2},        /* 8 MB */
        { 0x73, 24, 9, 5, 10, 2},        /* 16 MB */
        { 0x75, 25, 9, 5, 10, 2},        /* 32 MB */
        { 0x76, 26, 9, 5, 10, 3},        /* 64 MB */
        { 0x79, 27, 9, 5, 10, 3},        /* 128 MB */

        /* MASK ROM */
        { 0x5d, 21, 9, 4, 8, 2},        /* 2 MB */
        { 0xd5, 22, 9, 4, 9, 2},        /* 4 MB */
        { 0xd6, 23, 9, 4, 10, 2},        /* 8 MB */
        { 0x57, 24, 9, 4, 11, 2},        /* 16 MB */
        { 0x58, 25, 9, 4, 12, 2},        /* 32 MB */
        { 0,}
};

static struct nand_flash_dev *
nand_find_id(unsigned char id) {
        int i;

        for (i = 0; i < ARRAY_SIZE(nand_flash_ids); i++)
                if (nand_flash_ids[i].model_id == id)
                        return &(nand_flash_ids[i]);
        return NULL;
}

/*
 * ECC computation.
 */
static unsigned char parity[256];
static unsigned char ecc2[256];

static void nand_init_ecc(void) {
        int i, j, a;

        parity[0] = 0;
        for (i = 1; i < 256; i++)
                parity[i] = (parity[i&(i-1)] ^ 1);

        for (i = 0; i < 256; i++) {
                a = 0;
                for (j = 0; j < 8; j++) {
                        if (i & (1<<j)) {
                                if ((j & 1) == 0)
                                        a ^= 0x04;
                                if ((j & 2) == 0)
                                        a ^= 0x10;
                                if ((j & 4) == 0)
                                        a ^= 0x40;
                        }
                }
                ecc2[i] = ~(a ^ (a<<1) ^ (parity[i] ? 0xa8 : 0));
        }
}

/* compute 3-byte ecc on 256 bytes */
static void nand_compute_ecc(unsigned char *data, unsigned char *ecc) {
        int i, j, a;
        unsigned char par = 0, bit, bits[8] = {0};

        /* collect 16 checksum bits */
        for (i = 0; i < 256; i++) {
                par ^= data[i];
                bit = parity[data[i]];
                for (j = 0; j < 8; j++)
                        if ((i & (1<<j)) == 0)
                                bits[j] ^= bit;
        }

        /* put 4+4+4 = 12 bits in the ecc */
        a = (bits[3] << 6) + (bits[2] << 4) + (bits[1] << 2) + bits[0];
        ecc[0] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0));

        a = (bits[7] << 6) + (bits[6] << 4) + (bits[5] << 2) + bits[4];
        ecc[1] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0));

        ecc[2] = ecc2[par];
}

static int nand_compare_ecc(unsigned char *data, unsigned char *ecc) {
        return (data[0] == ecc[0] && data[1] == ecc[1] && data[2] == ecc[2]);
}

static void nand_store_ecc(unsigned char *data, unsigned char *ecc) {
        memcpy(data, ecc, 3);
}

/*
 * The actual driver starts here.
 */

struct sddr09_card_info {
        unsigned long        capacity;        /* Size of card in bytes */
        int                pagesize;        /* Size of page in bytes */
        int                pageshift;        /* log2 of pagesize */
        int                blocksize;        /* Size of block in pages */
        int                blockshift;        /* log2 of blocksize */
        int                blockmask;        /* 2^blockshift - 1 */
        int                *lba_to_pba;        /* logical to physical map */
        int                *pba_to_lba;        /* physical to logical map */
        int                lbact;                /* number of available pages */
        int                flags;
#define        SDDR09_WP        1                /* write protected */
};

/*
 * On my 16MB card, control blocks have size 64 (16 real control bytes,
 * and 48 junk bytes). In reality of course the card uses 16 control bytes,
 * so the reader makes up the remaining 48. Don't know whether these numbers
 * depend on the card. For now a constant.
 */
#define CONTROL_SHIFT 6

/*
 * On my Combo CF/SM reader, the SM reader has LUN 1.
 * (and things fail with LUN 0).
 * It seems LUN is irrelevant for others.
 */
#define LUN        1
#define        LUNBITS        (LUN << 5)

/*
 * LBA and PBA are unsigned ints. Special values.
 */
#define UNDEF    0xffffffff
#define SPARE    0xfffffffe
#define UNUSABLE 0xfffffffd

static const int erase_bad_lba_entries = 0;

/* send vendor interface command (0x41) */
/* called for requests 0, 1, 8 */
static int
sddr09_send_command(struct us_data *us,
                    unsigned char request,
                    unsigned char direction,
                    unsigned char *xfer_data,
                    unsigned int xfer_len) {
        unsigned int pipe;
        unsigned char requesttype = (0x41 | direction);
        int rc;

        // Get the receive or send control pipe number

        if (direction == USB_DIR_IN)
                pipe = us->recv_ctrl_pipe;
        else
                pipe = us->send_ctrl_pipe;

        rc = usb_stor_ctrl_transfer(us, pipe, request, requesttype,
                                   0, 0, xfer_data, xfer_len);
        switch (rc) {
                case USB_STOR_XFER_GOOD:        return 0;
                case USB_STOR_XFER_STALLED:        return -EPIPE;
                default:                        return -EIO;
        }
}

static int
sddr09_send_scsi_command(struct us_data *us,
                         unsigned char *command,
                         unsigned int command_len) {
        return sddr09_send_command(us, 0, USB_DIR_OUT, command, command_len);
}

#if 0
/*
 * Test Unit Ready Command: 12 bytes.
 * byte 0: opcode: 00
 */
static int
sddr09_test_unit_ready(struct us_data *us) {
        unsigned char *command = us->iobuf;
        int result;

        memset(command, 0, 6);
        command[1] = LUNBITS;

        result = sddr09_send_scsi_command(us, command, 6);

        usb_stor_dbg(us, "sddr09_test_unit_ready returns %d\n", result);

        return result;
}
#endif

/*
 * Request Sense Command: 12 bytes.
 * byte 0: opcode: 03
 * byte 4: data length
 */
static int
sddr09_request_sense(struct us_data *us, unsigned char *sensebuf, int buflen) {
        unsigned char *command = us->iobuf;
        int result;

        memset(command, 0, 12);
        command[0] = 0x03;
        command[1] = LUNBITS;
        command[4] = buflen;

        result = sddr09_send_scsi_command(us, command, 12);
        if (result)
                return result;

        result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
                        sensebuf, buflen, NULL);
        return (result == USB_STOR_XFER_GOOD ? 0 : -EIO);
}

/*
 * Read Command: 12 bytes.
 * byte 0: opcode: E8
 * byte 1: last two bits: 00: read data, 01: read blockwise control,
 *                        10: read both, 11: read pagewise control.
 *         It turns out we need values 20, 21, 22, 23 here (LUN 1).
 * bytes 2-5: address (interpretation depends on byte 1, see below)
 * bytes 10-11: count (idem)
 *
 * A page has 512 data bytes and 64 control bytes (16 control and 48 junk).
 * A read data command gets data in 512-byte pages.
 * A read control command gets control in 64-byte chunks.
 * A read both command gets data+control in 576-byte chunks.
 *
 * Blocks are groups of 32 pages, and read blockwise control jumps to the
 * next block, while read pagewise control jumps to the next page after
 * reading a group of 64 control bytes.
 * [Here 512 = 1<<pageshift, 32 = 1<<blockshift, 64 is constant?]
 *
 * (1 MB and 2 MB cards are a bit different, but I have only a 16 MB card.)
 */

static int
sddr09_readX(struct us_data *us, int x, unsigned long fromaddress,
             int nr_of_pages, int bulklen, unsigned char *buf,
             int use_sg) {

        unsigned char *command = us->iobuf;
        int result;

        command[0] = 0xE8;
        command[1] = LUNBITS | x;
        command[2] = MSB_of(fromaddress>>16);
        command[3] = LSB_of(fromaddress>>16); 
        command[4] = MSB_of(fromaddress & 0xFFFF);
        command[5] = LSB_of(fromaddress & 0xFFFF); 
        command[6] = 0;
        command[7] = 0;
        command[8] = 0;
        command[9] = 0;
        command[10] = MSB_of(nr_of_pages);
        command[11] = LSB_of(nr_of_pages);

        result = sddr09_send_scsi_command(us, command, 12);

        if (result) {
                usb_stor_dbg(us, "Result for send_control in sddr09_read2%d %d\n",
                             x, result);
                return result;
        }

        result = usb_stor_bulk_transfer_sg(us, us->recv_bulk_pipe,
                                       buf, bulklen, use_sg, NULL);

        if (result != USB_STOR_XFER_GOOD) {
                usb_stor_dbg(us, "Result for bulk_transfer in sddr09_read2%d %d\n",
                             x, result);
                return -EIO;
        }
        return 0;
}

/*
 * Read Data
 *
 * fromaddress counts data shorts:
 * increasing it by 256 shifts the bytestream by 512 bytes;
 * the last 8 bits are ignored.
 *
 * nr_of_pages counts pages of size (1 << pageshift).
 */
static int
sddr09_read20(struct us_data *us, unsigned long fromaddress,
              int nr_of_pages, int pageshift, unsigned char *buf, int use_sg) {
        int bulklen = nr_of_pages << pageshift;

        /* The last 8 bits of fromaddress are ignored. */
        return sddr09_readX(us, 0, fromaddress, nr_of_pages, bulklen,
                            buf, use_sg);
}

/*
 * Read Blockwise Control
 *
 * fromaddress gives the starting position (as in read data;
 * the last 8 bits are ignored); increasing it by 32*256 shifts
 * the output stream by 64 bytes.
 *
 * count counts control groups of size (1 << controlshift).
 * For me, controlshift = 6. Is this constant?
 *
 * After getting one control group, jump to the next block
 * (fromaddress += 8192).
 */
static int
sddr09_read21(struct us_data *us, unsigned long fromaddress,
              int count, int controlshift, unsigned char *buf, int use_sg) {

        int bulklen = (count << controlshift);
        return sddr09_readX(us, 1, fromaddress, count, bulklen,
                            buf, use_sg);
}

/*
 * Read both Data and Control
 *
 * fromaddress counts data shorts, ignoring control:
 * increasing it by 256 shifts the bytestream by 576 = 512+64 bytes;
 * the last 8 bits are ignored.
 *
 * nr_of_pages counts pages of size (1 << pageshift) + (1 << controlshift).
 */
static int
sddr09_read22(struct us_data *us, unsigned long fromaddress,
              int nr_of_pages, int pageshift, unsigned char *buf, int use_sg) {

        int bulklen = (nr_of_pages << pageshift) + (nr_of_pages << CONTROL_SHIFT);
        usb_stor_dbg(us, "reading %d pages, %d bytes\n", nr_of_pages, bulklen);
        return sddr09_readX(us, 2, fromaddress, nr_of_pages, bulklen,
                            buf, use_sg);
}

#if 0
/*
 * Read Pagewise Control
 *
 * fromaddress gives the starting position (as in read data;
 * the last 8 bits are ignored); increasing it by 256 shifts
 * the output stream by 64 bytes.
 *
 * count counts control groups of size (1 << controlshift).
 * For me, controlshift = 6. Is this constant?
 *
 * After getting one control group, jump to the next page
 * (fromaddress += 256).
 */
static int
sddr09_read23(struct us_data *us, unsigned long fromaddress,
              int count, int controlshift, unsigned char *buf, int use_sg) {

        int bulklen = (count << controlshift);
        return sddr09_readX(us, 3, fromaddress, count, bulklen,
                            buf, use_sg);
}
#endif

/*
 * Erase Command: 12 bytes.
 * byte 0: opcode: EA
 * bytes 6-9: erase address (big-endian, counting shorts, sector aligned).
 * 
 * Always precisely one block is erased; bytes 2-5 and 10-11 are ignored.
 * The byte address being erased is 2*Eaddress.
 * The CIS cannot be erased.
 */
static int
sddr09_erase(struct us_data *us, unsigned long Eaddress) {
        unsigned char *command = us->iobuf;
        int result;

        usb_stor_dbg(us, "erase address %lu\n", Eaddress);

        memset(command, 0, 12);
        command[0] = 0xEA;
        command[1] = LUNBITS;
        command[6] = MSB_of(Eaddress>>16);
        command[7] = LSB_of(Eaddress>>16);
        command[8] = MSB_of(Eaddress & 0xFFFF);
        command[9] = LSB_of(Eaddress & 0xFFFF);

        result = sddr09_send_scsi_command(us, command, 12);

        if (result)
                usb_stor_dbg(us, "Result for send_control in sddr09_erase %d\n",
                             result);

        return result;
}

/*
 * Write CIS Command: 12 bytes.
 * byte 0: opcode: EE
 * bytes 2-5: write address in shorts
 * bytes 10-11: sector count
 *
 * This writes at the indicated address. Don't know how it differs
 * from E9. Maybe it does not erase? However, it will also write to
 * the CIS.
 *
 * When two such commands on the same page follow each other directly,
 * the second one is not done.
 */

/*
 * Write Command: 12 bytes.
 * byte 0: opcode: E9
 * bytes 2-5: write address (big-endian, counting shorts, sector aligned).
 * bytes 6-9: erase address (big-endian, counting shorts, sector aligned).
 * bytes 10-11: sector count (big-endian, in 512-byte sectors).
 *
 * If write address equals erase address, the erase is done first,
 * otherwise the write is done first. When erase address equals zero
 * no erase is done?
 */
static int
sddr09_writeX(struct us_data *us,
              unsigned long Waddress, unsigned long Eaddress,
              int nr_of_pages, int bulklen, unsigned char *buf, int use_sg) {

        unsigned char *command = us->iobuf;
        int result;

        command[0] = 0xE9;
        command[1] = LUNBITS;

        command[2] = MSB_of(Waddress>>16);
        command[3] = LSB_of(Waddress>>16);
        command[4] = MSB_of(Waddress & 0xFFFF);
        command[5] = LSB_of(Waddress & 0xFFFF);

        command[6] = MSB_of(Eaddress>>16);
        command[7] = LSB_of(Eaddress>>16);
        command[8] = MSB_of(Eaddress & 0xFFFF);
        command[9] = LSB_of(Eaddress & 0xFFFF);

        command[10] = MSB_of(nr_of_pages);
        command[11] = LSB_of(nr_of_pages);

        result = sddr09_send_scsi_command(us, command, 12);

        if (result) {
                usb_stor_dbg(us, "Result for send_control in sddr09_writeX %d\n",
                             result);
                return result;
        }

        result = usb_stor_bulk_transfer_sg(us, us->send_bulk_pipe,
                                       buf, bulklen, use_sg, NULL);

        if (result != USB_STOR_XFER_GOOD) {
                usb_stor_dbg(us, "Result for bulk_transfer in sddr09_writeX %d\n",
                             result);
                return -EIO;
        }
        return 0;
}

/* erase address, write same address */
static int
sddr09_write_inplace(struct us_data *us, unsigned long address,
                     int nr_of_pages, int pageshift, unsigned char *buf,
                     int use_sg) {
        int bulklen = (nr_of_pages << pageshift) + (nr_of_pages << CONTROL_SHIFT);
        return sddr09_writeX(us, address, address, nr_of_pages, bulklen,
                             buf, use_sg);
}

#if 0
/*
 * Read Scatter Gather Command: 3+4n bytes.
 * byte 0: opcode E7
 * byte 2: n
 * bytes 4i-1,4i,4i+1: page address
 * byte 4i+2: page count
 * (i=1..n)
 *
 * This reads several pages from the card to a single memory buffer.
 * The last two bits of byte 1 have the same meaning as for E8.
 */
static int
sddr09_read_sg_test_only(struct us_data *us) {
        unsigned char *command = us->iobuf;
        int result, bulklen, nsg, ct;
        unsigned char *buf;
        unsigned long address;

        nsg = bulklen = 0;
        command[0] = 0xE7;
        command[1] = LUNBITS;
        command[2] = 0;
        address = 040000; ct = 1;
        nsg++;
        bulklen += (ct << 9);
        command[4*nsg+2] = ct;
        command[4*nsg+1] = ((address >> 9) & 0xFF);
        command[4*nsg+0] = ((address >> 17) & 0xFF);
        command[4*nsg-1] = ((address >> 25) & 0xFF);

        address = 0340000; ct = 1;
        nsg++;
        bulklen += (ct << 9);
        command[4*nsg+2] = ct;
        command[4*nsg+1] = ((address >> 9) & 0xFF);
        command[4*nsg+0] = ((address >> 17) & 0xFF);
        command[4*nsg-1] = ((address >> 25) & 0xFF);

        address = 01000000; ct = 2;
        nsg++;
        bulklen += (ct << 9);
        command[4*nsg+2] = ct;
        command[4*nsg+1] = ((address >> 9) & 0xFF);
        command[4*nsg+0] = ((address >> 17) & 0xFF);
        command[4*nsg-1] = ((address >> 25) & 0xFF);

        command[2] = nsg;

        result = sddr09_send_scsi_command(us, command, 4*nsg+3);

        if (result) {
                usb_stor_dbg(us, "Result for send_control in sddr09_read_sg %d\n",
                             result);
                return result;
        }

        buf = kmalloc(bulklen, GFP_NOIO);
        if (!buf)
                return -ENOMEM;

        result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
                                       buf, bulklen, NULL);
        kfree(buf);
        if (result != USB_STOR_XFER_GOOD) {
                usb_stor_dbg(us, "Result for bulk_transfer in sddr09_read_sg %d\n",
                             result);
                return -EIO;
        }

        return 0;
}
#endif

/*
 * Read Status Command: 12 bytes.
 * byte 0: opcode: EC
 *
 * Returns 64 bytes, all zero except for the first.
 * bit 0: 1: Error
 * bit 5: 1: Suspended
 * bit 6: 1: Ready
 * bit 7: 1: Not write-protected
 */

static int
sddr09_read_status(struct us_data *us, unsigned char *status) {

        unsigned char *command = us->iobuf;
        unsigned char *data = us->iobuf;
        int result;

        usb_stor_dbg(us, "Reading status...\n");

        memset(command, 0, 12);
        command[0] = 0xEC;
        command[1] = LUNBITS;

        result = sddr09_send_scsi_command(us, command, 12);
        if (result)
                return result;

        result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
                                       data, 64, NULL);
        *status = data[0];
        return (result == USB_STOR_XFER_GOOD ? 0 : -EIO);
}

static int
sddr09_read_data(struct us_data *us,
                 unsigned long address,
                 unsigned int sectors) {

        struct sddr09_card_info *info = (struct sddr09_card_info *) us->extra;
        unsigned char *buffer;
        unsigned int lba, maxlba, pba;
        unsigned int page, pages;
        unsigned int len, offset;
        struct scatterlist *sg;
        int result;

        // Figure out the initial LBA and page
        lba = address >> info->blockshift;
        page = (address & info->blockmask);
        maxlba = info->capacity >> (info->pageshift + info->blockshift);
        if (lba >= maxlba)
                return -EIO;

        // Since we only read in one block at a time, we have to create
        // a bounce buffer and move the data a piece at a time between the
        // bounce buffer and the actual transfer buffer.

        len = min(sectors, (unsigned int) info->blocksize) * info->pagesize;
        buffer = kmalloc(len, GFP_NOIO);
        if (!buffer)
                return -ENOMEM;

        // This could be made much more efficient by checking for
        // contiguous LBA's. Another exercise left to the student.

        result = 0;
        offset = 0;
        sg = NULL;

        while (sectors > 0) {

                /* Find number of pages we can read in this block */
                pages = min(sectors, info->blocksize - page);
                len = pages << info->pageshift;

                /* Not overflowing capacity? */
                if (lba >= maxlba) {
                        usb_stor_dbg(us, "Error: Requested lba %u exceeds maximum %u\n",
                                     lba, maxlba);
                        result = -EIO;
                        break;
                }

                /* Find where this lba lives on disk */
                pba = info->lba_to_pba[lba];

                if (pba == UNDEF) {        /* this lba was never written */

                        usb_stor_dbg(us, "Read %d zero pages (LBA %d) page %d\n",
                                     pages, lba, page);

                        /*
                         * This is not really an error. It just means
                         * that the block has never been written.
                         * Instead of returning an error
                         * it is better to return all zero data.
                         */

                        memset(buffer, 0, len);

                } else {
                        usb_stor_dbg(us, "Read %d pages, from PBA %d (LBA %d) page %d\n",
                                     pages, pba, lba, page);

                        address = ((pba << info->blockshift) + page) << 
                                info->pageshift;

                        result = sddr09_read20(us, address>>1,
                                        pages, info->pageshift, buffer, 0);
                        if (result)
                                break;
                }

                // Store the data in the transfer buffer
                usb_stor_access_xfer_buf(buffer, len, us->srb,
                                &sg, &offset, TO_XFER_BUF);

                page = 0;
                lba++;
                sectors -= pages;
        }

        kfree(buffer);
        return result;
}

static unsigned int
sddr09_find_unused_pba(struct sddr09_card_info *info, unsigned int lba) {
        static unsigned int lastpba = 1;
        int zonestart, end, i;

        zonestart = (lba/1000) << 10;
        end = info->capacity >> (info->blockshift + info->pageshift);
        end -= zonestart;
        if (end > 1024)
                end = 1024;

        for (i = lastpba+1; i < end; i++) {
                if (info->pba_to_lba[zonestart+i] == UNDEF) {
                        lastpba = i;
                        return zonestart+i;
                }
        }
        for (i = 0; i <= lastpba; i++) {
                if (info->pba_to_lba[zonestart+i] == UNDEF) {
                        lastpba = i;
                        return zonestart+i;
                }
        }
        return 0;
}

static int
sddr09_write_lba(struct us_data *us, unsigned int lba,
                 unsigned int page, unsigned int pages,
                 unsigned char *ptr, unsigned char *blockbuffer) {

        struct sddr09_card_info *info = (struct sddr09_card_info *) us->extra;
        unsigned long address;
        unsigned int pba, lbap;
        unsigned int pagelen;
        unsigned char *bptr, *cptr, *xptr;
        unsigned char ecc[3];
        int i, result;

        lbap = ((lba % 1000) << 1) | 0x1000;
        if (parity[MSB_of(lbap) ^ LSB_of(lbap)])
                lbap ^= 1;
        pba = info->lba_to_pba[lba];

        if (pba == UNDEF) {
                pba = sddr09_find_unused_pba(info, lba);
                if (!pba) {
                        printk(KERN_WARNING
                               "sddr09_write_lba: Out of unused blocks\n");
                        return -ENOSPC;
                }
                info->pba_to_lba[pba] = lba;
                info->lba_to_pba[lba] = pba;
        }

        if (pba == 1) {
                /*
                 * Maybe it is impossible to write to PBA 1.
                 * Fake success, but don't do anything.
                 */
                printk(KERN_WARNING "sddr09: avoid writing to pba 1\n");
                return 0;
        }

        pagelen = (1 << info->pageshift) + (1 << CONTROL_SHIFT);

        /* read old contents */
        address = (pba << (info->pageshift + info->blockshift));
        result = sddr09_read22(us, address>>1, info->blocksize,
                               info->pageshift, blockbuffer, 0);
        if (result)
                return result;

        /* check old contents and fill lba */
        for (i = 0; i < info->blocksize; i++) {
                bptr = blockbuffer + i*pagelen;
                cptr = bptr + info->pagesize;
                nand_compute_ecc(bptr, ecc);
                if (!nand_compare_ecc(cptr+13, ecc)) {
                        usb_stor_dbg(us, "Warning: bad ecc in page %d- of pba %d\n",
                                     i, pba);
                        nand_store_ecc(cptr+13, ecc);
                }
                nand_compute_ecc(bptr+(info->pagesize / 2), ecc);
                if (!nand_compare_ecc(cptr+8, ecc)) {
                        usb_stor_dbg(us, "Warning: bad ecc in page %d+ of pba %d\n",
                                     i, pba);
                        nand_store_ecc(cptr+8, ecc);
                }
                cptr[6] = cptr[11] = MSB_of(lbap);
                cptr[7] = cptr[12] = LSB_of(lbap);
        }

        /* copy in new stuff and compute ECC */
        xptr = ptr;
        for (i = page; i < page+pages; i++) {
                bptr = blockbuffer + i*pagelen;
                cptr = bptr + info->pagesize;
                memcpy(bptr, xptr, info->pagesize);
                xptr += info->pagesize;
                nand_compute_ecc(bptr, ecc);
                nand_store_ecc(cptr+13, ecc);
                nand_compute_ecc(bptr+(info->pagesize / 2), ecc);
                nand_store_ecc(cptr+8, ecc);
        }

        usb_stor_dbg(us, "Rewrite PBA %d (LBA %d)\n", pba, lba);

        result = sddr09_write_inplace(us, address>>1, info->blocksize,
                                      info->pageshift, blockbuffer, 0);

        usb_stor_dbg(us, "sddr09_write_inplace returns %d\n", result);

#if 0
        {
                unsigned char status = 0;
                int result2 = sddr09_read_status(us, &status);
                if (result2)
                        usb_stor_dbg(us, "cannot read status\n");
                else if (status != 0xc0)
                        usb_stor_dbg(us, "status after write: 0x%x\n", status);
        }
#endif

#if 0
        {
                int result2 = sddr09_test_unit_ready(us);
        }
#endif

        return result;
}

static int
sddr09_write_data(struct us_data *us,
                  unsigned long address,
                  unsigned int sectors) {

        struct sddr09_card_info *info = (struct sddr09_card_info *) us->extra;
        unsigned int lba, maxlba, page, pages;
        unsigned int pagelen, blocklen;
        unsigned char *blockbuffer;
        unsigned char *buffer;
        unsigned int len, offset;
        struct scatterlist *sg;
        int result;

        /* Figure out the initial LBA and page */
        lba = address >> info->blockshift;
        page = (address & info->blockmask);
        maxlba = info->capacity >> (info->pageshift + info->blockshift);
        if (lba >= maxlba)
                return -EIO;

        /*
         * blockbuffer is used for reading in the old data, overwriting
         * with the new data, and performing ECC calculations
         */

        /*
         * TODO: instead of doing kmalloc/kfree for each write,
         * add a bufferpointer to the info structure
         */

        pagelen = (1 << info->pageshift) + (1 << CONTROL_SHIFT);
        blocklen = (pagelen << info->blockshift);
        blockbuffer = kmalloc(blocklen, GFP_NOIO);
        if (!blockbuffer)
                return -ENOMEM;

        /*
         * Since we don't write the user data directly to the device,
         * we have to create a bounce buffer and move the data a piece
         * at a time between the bounce buffer and the actual transfer buffer.
         */

        len = min(sectors, (unsigned int) info->blocksize) * info->pagesize;
        buffer = kmalloc(len, GFP_NOIO);
        if (!buffer) {
                kfree(blockbuffer);
                return -ENOMEM;
        }

        result = 0;
        offset = 0;
        sg = NULL;

        while (sectors > 0) {

                /* Write as many sectors as possible in this block */

                pages = min(sectors, info->blocksize - page);
                len = (pages << info->pageshift);

                /* Not overflowing capacity? */
                if (lba >= maxlba) {
                        usb_stor_dbg(us, "Error: Requested lba %u exceeds maximum %u\n",
                                     lba, maxlba);
                        result = -EIO;
                        break;
                }

                /* Get the data from the transfer buffer */
                usb_stor_access_xfer_buf(buffer, len, us->srb,
                                &sg, &offset, FROM_XFER_BUF);

                result = sddr09_write_lba(us, lba, page, pages,
                                buffer, blockbuffer);
                if (result)
                        break;

                page = 0;
                lba++;
                sectors -= pages;
        }

        kfree(buffer);
        kfree(blockbuffer);

        return result;
}

static int
sddr09_read_control(struct us_data *us,
                unsigned long address,
                unsigned int blocks,
                unsigned char *content,
                int use_sg) {

        usb_stor_dbg(us, "Read control address %lu, blocks %d\n",
                     address, blocks);

        return sddr09_read21(us, address, blocks,
                             CONTROL_SHIFT, content, use_sg);
}

/*
 * Read Device ID Command: 12 bytes.
 * byte 0: opcode: ED
 *
 * Returns 2 bytes: Manufacturer ID and Device ID.
 * On more recent cards 3 bytes: the third byte is an option code A5
 * signifying that the secret command to read an 128-bit ID is available.
 * On still more recent cards 4 bytes: the fourth byte C0 means that
 * a second read ID cmd is available.
 */
static int
sddr09_read_deviceID(struct us_data *us, unsigned char *deviceID) {
        unsigned char *command = us->iobuf;
        unsigned char *content = us->iobuf;
        int result, i;

        memset(command, 0, 12);
        command[0] = 0xED;
        command[1] = LUNBITS;

        result = sddr09_send_scsi_command(us, command, 12);
        if (result)
                return result;

        result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
                        content, 64, NULL);

        for (i = 0; i < 4; i++)
                deviceID[i] = content[i];

        return (result == USB_STOR_XFER_GOOD ? 0 : -EIO);
}

static int
sddr09_get_wp(struct us_data *us, struct sddr09_card_info *info) {
        int result;
        unsigned char status;
        const char *wp_fmt;

        result = sddr09_read_status(us, &status);
        if (result) {
                usb_stor_dbg(us, "read_status fails\n");
                return result;
        }
        if ((status & 0x80) == 0) {
                info->flags |= SDDR09_WP;        /* write protected */
                wp_fmt = " WP";
        } else {
                wp_fmt = "";
        }
        usb_stor_dbg(us, "status 0x%02X%s%s%s%s\n", status, wp_fmt,
                     status & 0x40 ? " Ready" : "",
                     status & LUNBITS ? " Suspended" : "",
                     status & 0x01 ? " Error" : "");

        return 0;
}

#if 0
/*
 * Reset Command: 12 bytes.
 * byte 0: opcode: EB
 */
static int
sddr09_reset(struct us_data *us) {

        unsigned char *command = us->iobuf;

        memset(command, 0, 12);
        command[0] = 0xEB;
        command[1] = LUNBITS;

        return sddr09_send_scsi_command(us, command, 12);
}
#endif

static struct nand_flash_dev *
sddr09_get_cardinfo(struct us_data *us, unsigned char flags) {
        struct nand_flash_dev *cardinfo;
        unsigned char deviceID[4];
        char blurbtxt[256];
        int result;

        usb_stor_dbg(us, "Reading capacity...\n");

        result = sddr09_read_deviceID(us, deviceID);

        if (result) {
                usb_stor_dbg(us, "Result of read_deviceID is %d\n", result);
                printk(KERN_WARNING "sddr09: could not read card info\n");
                return NULL;
        }

        sprintf(blurbtxt, "sddr09: Found Flash card, ID = %4ph", deviceID);

        /* Byte 0 is the manufacturer */
        sprintf(blurbtxt + strlen(blurbtxt),
                ": Manuf. %s",
                nand_flash_manufacturer(deviceID[0]));

        /* Byte 1 is the device type */
        cardinfo = nand_find_id(deviceID[1]);
        if (cardinfo) {
                /*
                 * MB or MiB? It is neither. A 16 MB card has
                 * 17301504 raw bytes, of which 16384000 are
                 * usable for user data.
                 */
                sprintf(blurbtxt + strlen(blurbtxt),
                        ", %d MB", 1<<(cardinfo->chipshift - 20));
        } else {
                sprintf(blurbtxt + strlen(blurbtxt),
                        ", type unrecognized");
        }

        /* Byte 2 is code to signal availability of 128-bit ID */
        if (deviceID[2] == 0xa5) {
                sprintf(blurbtxt + strlen(blurbtxt),
                        ", 128-bit ID");
        }

        /* Byte 3 announces the availability of another read ID command */
        if (deviceID[3] == 0xc0) {
                sprintf(blurbtxt + strlen(blurbtxt),
                        ", extra cmd");
        }

        if (flags & SDDR09_WP)
                sprintf(blurbtxt + strlen(blurbtxt),
                        ", WP");

        printk(KERN_WARNING "%s\n", blurbtxt);

        return cardinfo;
}

static int
sddr09_read_map(struct us_data *us) {

        struct sddr09_card_info *info = (struct sddr09_card_info *) us->extra;
        int numblocks, alloc_len, alloc_blocks;
        int i, j, result;
        unsigned char *buffer, *buffer_end, *ptr;
        unsigned int lba, lbact;

        if (!info->capacity)
                return -1;

        /*
         * size of a block is 1 << (blockshift + pageshift) bytes
         * divide into the total capacity to get the number of blocks
         */

        numblocks = info->capacity >> (info->blockshift + info->pageshift);

        /*
         * read 64 bytes for every block (actually 1 << CONTROL_SHIFT)
         * but only use a 64 KB buffer
         * buffer size used must be a multiple of (1 << CONTROL_SHIFT)
         */
#define SDDR09_READ_MAP_BUFSZ 65536

        alloc_blocks = min(numblocks, SDDR09_READ_MAP_BUFSZ >> CONTROL_SHIFT);
        alloc_len = (alloc_blocks << CONTROL_SHIFT);
        buffer = kmalloc(alloc_len, GFP_NOIO);
        if (!buffer) {
                result = -1;
                goto done;
        }
        buffer_end = buffer + alloc_len;

#undef SDDR09_READ_MAP_BUFSZ

        kfree(info->lba_to_pba);
        kfree(info->pba_to_lba);
        info->lba_to_pba = kmalloc_array(numblocks, sizeof(int), GFP_NOIO);
        info->pba_to_lba = kmalloc_array(numblocks, sizeof(int), GFP_NOIO);

        if (info->lba_to_pba == NULL || info->pba_to_lba == NULL) {
                printk(KERN_WARNING "sddr09_read_map: out of memory\n");
                result = -1;
                goto done;
        }

        for (i = 0; i < numblocks; i++)
                info->lba_to_pba[i] = info->pba_to_lba[i] = UNDEF;

        /*
         * Define lba-pba translation table
         */

        ptr = buffer_end;
        for (i = 0; i < numblocks; i++) {
                ptr += (1 << CONTROL_SHIFT);
                if (ptr >= buffer_end) {
                        unsigned long address;

                        address = i << (info->pageshift + info->blockshift);
                        result = sddr09_read_control(
                                us, address>>1,
                                min(alloc_blocks, numblocks - i),
                                buffer, 0);
                        if (result) {
                                result = -1;
                                goto done;
                        }
                        ptr = buffer;
                }

                if (i == 0 || i == 1) {
                        info->pba_to_lba[i] = UNUSABLE;
                        continue;
                }

                /* special PBAs have control field 0^16 */
                for (j = 0; j < 16; j++)
                        if (ptr[j] != 0)
                                goto nonz;
                info->pba_to_lba[i] = UNUSABLE;
                printk(KERN_WARNING "sddr09: PBA %d has no logical mapping\n",
                       i);
                continue;

        nonz:
                /* unwritten PBAs have control field FF^16 */
                for (j = 0; j < 16; j++)
                        if (ptr[j] != 0xff)
                                goto nonff;
                continue;

        nonff:
                /* normal PBAs start with six FFs */
                if (j < 6) {
                        printk(KERN_WARNING
                               "sddr09: PBA %d has no logical mapping: "
                               "reserved area = %02X%02X%02X%02X "
                               "data status %02X block status %02X\n",
                               i, ptr[0], ptr[1], ptr[2], ptr[3],
                               ptr[4], ptr[5]);
                        info->pba_to_lba[i] = UNUSABLE;
                        continue;
                }

                if ((ptr[6] >> 4) != 0x01) {
                        printk(KERN_WARNING
                               "sddr09: PBA %d has invalid address field "
                               "%02X%02X/%02X%02X\n",
                               i, ptr[6], ptr[7], ptr[11], ptr[12]);
                        info->pba_to_lba[i] = UNUSABLE;
                        continue;
                }

                /* check even parity */
                if (parity[ptr[6] ^ ptr[7]]) {
                        printk(KERN_WARNING
                               "sddr09: Bad parity in LBA for block %d"
                               " (%02X %02X)\n", i, ptr[6], ptr[7]);
                        info->pba_to_lba[i] = UNUSABLE;
                        continue;
                }

                lba = short_pack(ptr[7], ptr[6]);
                lba = (lba & 0x07FF) >> 1;

                /*
                 * Every 1024 physical blocks ("zone"), the LBA numbers
                 * go back to zero, but are within a higher block of LBA's.
                 * Also, there is a maximum of 1000 LBA's per zone.
                 * In other words, in PBA 1024-2047 you will find LBA 0-999
                 * which are really LBA 1000-1999. This allows for 24 bad
                 * or special physical blocks per zone.
                 */

                if (lba >= 1000) {
                        printk(KERN_WARNING
                               "sddr09: Bad low LBA %d for block %d\n",
                               lba, i);
                        goto possibly_erase;
                }

                lba += 1000*(i/0x400);

                if (info->lba_to_pba[lba] != UNDEF) {
                        printk(KERN_WARNING
                               "sddr09: LBA %d seen for PBA %d and %d\n",
                               lba, info->lba_to_pba[lba], i);
                        goto possibly_erase;
                }

                info->pba_to_lba[i] = lba;
                info->lba_to_pba[lba] = i;
                continue;

        possibly_erase:
                if (erase_bad_lba_entries) {
                        unsigned long address;

                        address = (i << (info->pageshift + info->blockshift));
                        sddr09_erase(us, address>>1);
                        info->pba_to_lba[i] = UNDEF;
                } else
                        info->pba_to_lba[i] = UNUSABLE;
        }

        /*
         * Approximate capacity. This is not entirely correct yet,
         * since a zone with less than 1000 usable pages leads to
         * missing LBAs. Especially if it is the last zone, some
         * LBAs can be past capacity.
         */
        lbact = 0;
        for (i = 0; i < numblocks; i += 1024) {
                int ct = 0;

                for (j = 0; j < 1024 && i+j < numblocks; j++) {
                        if (info->pba_to_lba[i+j] != UNUSABLE) {
                                if (ct >= 1000)
                                        info->pba_to_lba[i+j] = SPARE;
                                else
                                        ct++;
                        }
                }
                lbact += ct;
        }
        info->lbact = lbact;
        usb_stor_dbg(us, "Found %d LBA's\n", lbact);
        result = 0;

 done:
        if (result != 0) {
                kfree(info->lba_to_pba);
                kfree(info->pba_to_lba);
                info->lba_to_pba = NULL;
                info->pba_to_lba = NULL;
        }
        kfree(buffer);
        return result;
}

static void
sddr09_card_info_destructor(void *extra) {
        struct sddr09_card_info *info = (struct sddr09_card_info *)extra;

        if (!info)
                return;

        kfree(info->lba_to_pba);
        kfree(info->pba_to_lba);
}

static int
sddr09_common_init(struct us_data *us) {
        int result;

        /* set the configuration -- STALL is an acceptable response here */
        if (us->pusb_dev->actconfig->desc.bConfigurationValue != 1) {
                usb_stor_dbg(us, "active config #%d != 1 ??\n",
                             us->pusb_dev->actconfig->desc.bConfigurationValue);
                return -EINVAL;
        }

        result = usb_reset_configuration(us->pusb_dev);
        usb_stor_dbg(us, "Result of usb_reset_configuration is %d\n", result);
        if (result == -EPIPE) {
                usb_stor_dbg(us, "-- stall on control interface\n");
        } else if (result != 0) {
                /* it's not a stall, but another error -- time to bail */
                usb_stor_dbg(us, "-- Unknown error.  Rejecting device\n");
                return -EINVAL;
        }

        us->extra = kzalloc(sizeof(struct sddr09_card_info), GFP_NOIO);
        if (!us->extra)
                return -ENOMEM;
        us->extra_destructor = sddr09_card_info_destructor;

        nand_init_ecc();
        return 0;
}


/*
 * This is needed at a very early stage. If this is not listed in the
 * unusual devices list but called from here then LUN 0 of the combo reader
 * is not recognized. But I do not know what precisely these calls do.
 */
static int
usb_stor_sddr09_dpcm_init(struct us_data *us) {
        int result;
        unsigned char *data = us->iobuf;

        result = sddr09_common_init(us);
        if (result)
                return result;

        result = sddr09_send_command(us, 0x01, USB_DIR_IN, data, 2);
        if (result) {
                usb_stor_dbg(us, "send_command fails\n");
                return result;
        }

        usb_stor_dbg(us, "%02X %02X\n", data[0], data[1]);
        // get 07 02

        result = sddr09_send_command(us, 0x08, USB_DIR_IN, data, 2);
        if (result) {
                usb_stor_dbg(us, "2nd send_command fails\n");
                return result;
        }

        usb_stor_dbg(us, "%02X %02X\n", data[0], data[1]);
        // get 07 00

        result = sddr09_request_sense(us, data, 18);
        if (result == 0 && data[2] != 0) {
                int j;
                for (j=0; j<18; j++)
                        printk(" %02X", data[j]);
                printk("\n");
                // get 70 00 00 00 00 00 00 * 00 00 00 00 00 00
                // 70: current command
                // sense key 0, sense code 0, extd sense code 0
                // additional transfer length * = sizeof(data) - 7
                // Or: 70 00 06 00 00 00 00 0b 00 00 00 00 28 00 00 00 00 00
                // sense key 06, sense code 28: unit attention,
                // not ready to ready transition
        }

        // test unit ready

        return 0;                /* not result */
}

/*
 * Transport for the Microtech DPCM-USB
 */
static int dpcm_transport(struct scsi_cmnd *srb, struct us_data *us)
{
        int ret;

        usb_stor_dbg(us, "LUN=%d\n", (u8)srb->device->lun);

        switch (srb->device->lun) {
        case 0:

                /*
                 * LUN 0 corresponds to the CompactFlash card reader.
                 */
                ret = usb_stor_CB_transport(srb, us);
                break;

        case 1:

                /*
                 * LUN 1 corresponds to the SmartMedia card reader.
                 */

                /*
                 * Set the LUN to 0 (just in case).
                 */
                srb->device->lun = 0;
                ret = sddr09_transport(srb, us);
                srb->device->lun = 1;
                break;

        default:
            usb_stor_dbg(us, "Invalid LUN %d\n", (u8)srb->device->lun);
                ret = USB_STOR_TRANSPORT_ERROR;
                break;
        }
        return ret;
}


/*
 * Transport for the Sandisk SDDR-09
 */
static int sddr09_transport(struct scsi_cmnd *srb, struct us_data *us)
{
        static unsigned char sensekey = 0, sensecode = 0;
        static unsigned char havefakesense = 0;
        int result, i;
        unsigned char *ptr = us->iobuf;
        unsigned long capacity;
        unsigned int page, pages;

        struct sddr09_card_info *info;

        static unsigned char inquiry_response[8] = {
                0x00, 0x80, 0x00, 0x02, 0x1F, 0x00, 0x00, 0x00
        };

        /* note: no block descriptor support */
        static unsigned char mode_page_01[19] = {
                0x00, 0x0F, 0x00, 0x0, 0x0, 0x0, 0x00,
                0x01, 0x0A,
                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
        };

        info = (struct sddr09_card_info *)us->extra;

        if (srb->cmnd[0] == REQUEST_SENSE && havefakesense) {
                /* for a faked command, we have to follow with a faked sense */
                memset(ptr, 0, 18);
                ptr[0] = 0x70;
                ptr[2] = sensekey;
                ptr[7] = 11;
                ptr[12] = sensecode;
                usb_stor_set_xfer_buf(ptr, 18, srb);
                sensekey = sensecode = havefakesense = 0;
                return USB_STOR_TRANSPORT_GOOD;
        }

        havefakesense = 1;

        /*
         * Dummy up a response for INQUIRY since SDDR09 doesn't
         * respond to INQUIRY commands
         */

        if (srb->cmnd[0] == INQUIRY) {
                memcpy(ptr, inquiry_response, 8);
                fill_inquiry_response(us, ptr, 36);
                return USB_STOR_TRANSPORT_GOOD;
        }

        if (srb->cmnd[0] == READ_CAPACITY) {
                struct nand_flash_dev *cardinfo;

                sddr09_get_wp(us, info);        /* read WP bit */

                cardinfo = sddr09_get_cardinfo(us, info->flags);
                if (!cardinfo) {
                        /* probably no media */
                init_error:
                        sensekey = 0x02;        /* not ready */
                        sensecode = 0x3a;        /* medium not present */
                        return USB_STOR_TRANSPORT_FAILED;
                }

                info->capacity = (1 << cardinfo->chipshift);
                info->pageshift = cardinfo->pageshift;
                info->pagesize = (1 << info->pageshift);
                info->blockshift = cardinfo->blockshift;
                info->blocksize = (1 << info->blockshift);
                info->blockmask = info->blocksize - 1;

                // map initialization, must follow get_cardinfo()
                if (sddr09_read_map(us)) {
                        /* probably out of memory */
                        goto init_error;
                }

                // Report capacity

                capacity = (info->lbact << info->blockshift) - 1;

                ((__be32 *) ptr)[0] = cpu_to_be32(capacity);

                // Report page size

                ((__be32 *) ptr)[1] = cpu_to_be32(info->pagesize);
                usb_stor_set_xfer_buf(ptr, 8, srb);

                return USB_STOR_TRANSPORT_GOOD;
        }

        if (srb->cmnd[0] == MODE_SENSE_10) {
                int modepage = (srb->cmnd[2] & 0x3F);

                /*
                 * They ask for the Read/Write error recovery page,
                 * or for all pages.
                 */
                /* %% We should check DBD %% */
                if (modepage == 0x01 || modepage == 0x3F) {
                        usb_stor_dbg(us, "Dummy up request for mode page 0x%x\n",
                                     modepage);

                        memcpy(ptr, mode_page_01, sizeof(mode_page_01));
                        ((__be16*)ptr)[0] = cpu_to_be16(sizeof(mode_page_01) - 2);
                        ptr[3] = (info->flags & SDDR09_WP) ? 0x80 : 0;
                        usb_stor_set_xfer_buf(ptr, sizeof(mode_page_01), srb);
                        return USB_STOR_TRANSPORT_GOOD;
                }

                sensekey = 0x05;        /* illegal request */
                sensecode = 0x24;        /* invalid field in CDB */
                return USB_STOR_TRANSPORT_FAILED;
        }

        if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL)
                return USB_STOR_TRANSPORT_GOOD;

        havefakesense = 0;

        if (srb->cmnd[0] == READ_10) {

                page = short_pack(srb->cmnd[3], srb->cmnd[2]);
                page <<= 16;
                page |= short_pack(srb->cmnd[5], srb->cmnd[4]);
                pages = short_pack(srb->cmnd[8], srb->cmnd[7]);

                usb_stor_dbg(us, "READ_10: read page %d pagect %d\n",
                             page, pages);

                result = sddr09_read_data(us, page, pages);
                return (result == 0 ? USB_STOR_TRANSPORT_GOOD :
                                USB_STOR_TRANSPORT_ERROR);
        }

        if (srb->cmnd[0] == WRITE_10) {

                page = short_pack(srb->cmnd[3], srb->cmnd[2]);
                page <<= 16;
                page |= short_pack(srb->cmnd[5], srb->cmnd[4]);
                pages = short_pack(srb->cmnd[8], srb->cmnd[7]);

                usb_stor_dbg(us, "WRITE_10: write page %d pagect %d\n",
                             page, pages);

                result = sddr09_write_data(us, page, pages);
                return (result == 0 ? USB_STOR_TRANSPORT_GOOD :
                                USB_STOR_TRANSPORT_ERROR);
        }

        /*
         * catch-all for all other commands, except
         * pass TEST_UNIT_READY and REQUEST_SENSE through
         */
        if (srb->cmnd[0] != TEST_UNIT_READY &&
            srb->cmnd[0] != REQUEST_SENSE) {
                sensekey = 0x05;        /* illegal request */
                sensecode = 0x20;        /* invalid command */
                havefakesense = 1;
                return USB_STOR_TRANSPORT_FAILED;
        }

        for (; srb->cmd_len<12; srb->cmd_len++)
                srb->cmnd[srb->cmd_len] = 0;

        srb->cmnd[1] = LUNBITS;

        ptr[0] = 0;
        for (i=0; i<12; i++)
                sprintf(ptr+strlen(ptr), "%02X ", srb->cmnd[i]);

        usb_stor_dbg(us, "Send control for command %s\n", ptr);

        result = sddr09_send_scsi_command(us, srb->cmnd, 12);
        if (result) {
                usb_stor_dbg(us, "sddr09_send_scsi_command returns %d\n",
                             result);
                return USB_STOR_TRANSPORT_ERROR;
        }

        if (scsi_bufflen(srb) == 0)
                return USB_STOR_TRANSPORT_GOOD;

        if (srb->sc_data_direction == DMA_TO_DEVICE ||
            srb->sc_data_direction == DMA_FROM_DEVICE) {
                unsigned int pipe = (srb->sc_data_direction == DMA_TO_DEVICE)
                                ? us->send_bulk_pipe : us->recv_bulk_pipe;

                usb_stor_dbg(us, "%s %d bytes\n",
                             (srb->sc_data_direction == DMA_TO_DEVICE) ?
                             "sending" : "receiving",
                             scsi_bufflen(srb));

                result = usb_stor_bulk_srb(us, pipe, srb);

                return (result == USB_STOR_XFER_GOOD ?
                        USB_STOR_TRANSPORT_GOOD : USB_STOR_TRANSPORT_ERROR);
        } 

        return USB_STOR_TRANSPORT_GOOD;
}

/*
 * Initialization routine for the sddr09 subdriver
 */
static int
usb_stor_sddr09_init(struct us_data *us) {
        return sddr09_common_init(us);
}

static struct scsi_host_template sddr09_host_template;

static int sddr09_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
{
        struct us_data *us;
        int result;

        result = usb_stor_probe1(&us, intf, id,
                        (id - sddr09_usb_ids) + sddr09_unusual_dev_list,
                        &sddr09_host_template);
        if (result)
                return result;

        if (us->protocol == USB_PR_DPCM_USB) {
                us->transport_name = "Control/Bulk-EUSB/SDDR09";
                us->transport = dpcm_transport;
                us->transport_reset = usb_stor_CB_reset;
                us->max_lun = 1;
        } else {
                us->transport_name = "EUSB/SDDR09";
                us->transport = sddr09_transport;
                us->transport_reset = usb_stor_CB_reset;
                us->max_lun = 0;
        }

        result = usb_stor_probe2(us);
        return result;
}

static struct usb_driver sddr09_driver = {
        .name =                DRV_NAME,
        .probe =        sddr09_probe,
        .disconnect =        usb_stor_disconnect,
        .suspend =        usb_stor_suspend,
        .resume =        usb_stor_resume,
        .reset_resume =        usb_stor_reset_resume,
        .pre_reset =        usb_stor_pre_reset,
        .post_reset =        usb_stor_post_reset,
        .id_table =        sddr09_usb_ids,
        .soft_unbind =        1,
        .no_dynamic_id = 1,
};

module_usb_stor_driver(sddr09_driver, sddr09_host_template, DRV_NAME);



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   29 
   29 

   28 






















   29 



   29 
   28 


   29 
   29 


   29 


   29 



   29 







































































































































   29 








   29 


   29 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
// SPDX-License-Identifier: GPL-2.0+
/*
 * 2002-10-15  Posix Clocks & timers
 *                           by George Anzinger george@mvista.com
 *                             Copyright (C) 2002 2003 by MontaVista Software.
 *
 * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
 *                             Copyright (C) 2004 Boris Hu
 *
 * These are all the functions necessary to implement POSIX clocks & timers
 */
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>

#include <linux/uaccess.h>
#include <linux/list.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/hash.h>
#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/export.h>
#include <linux/hashtable.h>
#include <linux/compat.h>
#include <linux/nospec.h>
#include <linux/time_namespace.h>

#include "timekeeping.h"
#include "posix-timers.h"

static struct kmem_cache *posix_timers_cache;

/*
 * Timers are managed in a hash table for lockless lookup. The hash key is
 * constructed from current::signal and the timer ID and the timer is
 * matched against current::signal and the timer ID when walking the hash
 * bucket list.
 *
 * This allows checkpoint/restore to reconstruct the exact timer IDs for
 * a process.
 */
static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
static DEFINE_SPINLOCK(hash_lock);

static const struct k_clock * const posix_clocks[];
static const struct k_clock *clockid_to_kclock(const clockid_t id);
static const struct k_clock clock_realtime, clock_monotonic;

/* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
                        ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif

static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);

#define lock_timer(tid, flags)                                                   \
({        struct k_itimer *__timr;                                           \
        __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
        __timr;                                                                   \
})

static int hash(struct signal_struct *sig, unsigned int nr)
{
        return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
}

static struct k_itimer *__posix_timers_find(struct hlist_head *head,
                                            struct signal_struct *sig,
                                            timer_t id)
{
        struct k_itimer *timer;

        hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) {
                /* timer->it_signal can be set concurrently */
                if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id))
                        return timer;
        }
        return NULL;
}

static struct k_itimer *posix_timer_by_id(timer_t id)
{
        struct signal_struct *sig = current->signal;
        struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];

        return __posix_timers_find(head, sig, id);
}

static int posix_timer_add(struct k_itimer *timer)
{
        struct signal_struct *sig = current->signal;
        struct hlist_head *head;
        unsigned int cnt, id;

        /*
         * FIXME: Replace this by a per signal struct xarray once there is
         * a plan to handle the resulting CRIU regression gracefully.
         */
        for (cnt = 0; cnt <= INT_MAX; cnt++) {
                spin_lock(&hash_lock);
                id = sig->next_posix_timer_id;

                /* Write the next ID back. Clamp it to the positive space */
                sig->next_posix_timer_id = (id + 1) & INT_MAX;

                head = &posix_timers_hashtable[hash(sig, id)];
                if (!__posix_timers_find(head, sig, id)) {
                        hlist_add_head_rcu(&timer->t_hash, head);
                        spin_unlock(&hash_lock);
                        return id;
                }
                spin_unlock(&hash_lock);
        }
        /* POSIX return code when no timer ID could be allocated */
        return -EAGAIN;
}

static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
{
        spin_unlock_irqrestore(&timr->it_lock, flags);
}

static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_real_ts64(tp);
        return 0;
}

static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
{
        return ktime_get_real();
}

static int posix_clock_realtime_set(const clockid_t which_clock,
                                    const struct timespec64 *tp)
{
        return do_sys_settimeofday64(tp, NULL);
}

static int posix_clock_realtime_adj(const clockid_t which_clock,
                                    struct __kernel_timex *t)
{
        return do_adjtimex(t);
}

static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_ts64(tp);
        timens_add_monotonic(tp);
        return 0;
}

static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
{
        return ktime_get();
}

static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_raw_ts64(tp);
        timens_add_monotonic(tp);
        return 0;
}

static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_coarse_real_ts64(tp);
        return 0;
}

static int posix_get_monotonic_coarse(clockid_t which_clock,
                                                struct timespec64 *tp)
{
        ktime_get_coarse_ts64(tp);
        timens_add_monotonic(tp);
        return 0;
}

static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp)
{
        *tp = ktime_to_timespec64(KTIME_LOW_RES);
        return 0;
}

static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_boottime_ts64(tp);
        timens_add_boottime(tp);
        return 0;
}

static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
{
        return ktime_get_boottime();
}

static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
{
        ktime_get_clocktai_ts64(tp);
        return 0;
}

static ktime_t posix_get_tai_ktime(clockid_t which_clock)
{
        return ktime_get_clocktai();
}

static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
{
        tp->tv_sec = 0;
        tp->tv_nsec = hrtimer_resolution;
        return 0;
}

static __init int init_posix_timers(void)
{
        posix_timers_cache = kmem_cache_create("posix_timers_cache",
                                        sizeof(struct k_itimer), 0,
                                        SLAB_PANIC | SLAB_ACCOUNT, NULL);
        return 0;
}
__initcall(init_posix_timers);

/*
 * The siginfo si_overrun field and the return value of timer_getoverrun(2)
 * are of type int. Clamp the overrun value to INT_MAX
 */
static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
{
        s64 sum = timr->it_overrun_last + (s64)baseval;

        return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
}

static void common_hrtimer_rearm(struct k_itimer *timr)
{
        struct hrtimer *timer = &timr->it.real.timer;

        timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
                                            timr->it_interval);
        hrtimer_restart(timer);
}

/*
 * This function is called from the signal delivery code if
 * info->si_sys_private is not zero, which indicates that the timer has to
 * be rearmed. Restart the timer and update info::si_overrun.
 */
void posixtimer_rearm(struct kernel_siginfo *info)
{
        struct k_itimer *timr;
        unsigned long flags;

        timr = lock_timer(info->si_tid, &flags);
        if (!timr)
                return;

        if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) {
                timr->kclock->timer_rearm(timr);

                timr->it_active = 1;
                timr->it_overrun_last = timr->it_overrun;
                timr->it_overrun = -1LL;
                ++timr->it_requeue_pending;

                info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
        }

        unlock_timer(timr, flags);
}

int posix_timer_event(struct k_itimer *timr, int si_private)
{
        enum pid_type type;
        int ret;
        /*
         * FIXME: if ->sigq is queued we can race with
         * dequeue_signal()->posixtimer_rearm().
         *
         * If dequeue_signal() sees the "right" value of
         * si_sys_private it calls posixtimer_rearm().
         * We re-queue ->sigq and drop ->it_lock().
         * posixtimer_rearm() locks the timer
         * and re-schedules it while ->sigq is pending.
         * Not really bad, but not that we want.
         */
        timr->sigq->info.si_sys_private = si_private;

        type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
        ret = send_sigqueue(timr->sigq, timr->it_pid, type);
        /* If we failed to send the signal the timer stops. */
        return ret > 0;
}

/*
 * This function gets called when a POSIX.1b interval timer expires from
 * the HRTIMER interrupt (soft interrupt on RT kernels).
 *
 * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI
 * based timers.
 */
static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
{
        enum hrtimer_restart ret = HRTIMER_NORESTART;
        struct k_itimer *timr;
        unsigned long flags;
        int si_private = 0;

        timr = container_of(timer, struct k_itimer, it.real.timer);
        spin_lock_irqsave(&timr->it_lock, flags);

        timr->it_active = 0;
        if (timr->it_interval != 0)
                si_private = ++timr->it_requeue_pending;

        if (posix_timer_event(timr, si_private)) {
                /*
                 * The signal was not queued due to SIG_IGN. As a
                 * consequence the timer is not going to be rearmed from
                 * the signal delivery path. But as a real signal handler
                 * can be installed later the timer must be rearmed here.
                 */
                if (timr->it_interval != 0) {
                        ktime_t now = hrtimer_cb_get_time(timer);

                        /*
                         * FIXME: What we really want, is to stop this
                         * timer completely and restart it in case the
                         * SIG_IGN is removed. This is a non trivial
                         * change to the signal handling code.
                         *
                         * For now let timers with an interval less than a
                         * jiffie expire every jiffie and recheck for a
                         * valid signal handler.
                         *
                         * This avoids interrupt starvation in case of a
                         * very small interval, which would expire the
                         * timer immediately again.
                         *
                         * Moving now ahead of time by one jiffie tricks
                         * hrtimer_forward() to expire the timer later,
                         * while it still maintains the overrun accuracy
                         * for the price of a slight inconsistency in the
                         * timer_gettime() case. This is at least better
                         * than a timer storm.
                         *
                         * Only required when high resolution timers are
                         * enabled as the periodic tick based timers are
                         * automatically aligned to the next tick.
                         */
                        if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) {
                                ktime_t kj = TICK_NSEC;

                                if (timr->it_interval < kj)
                                        now = ktime_add(now, kj);
                        }

                        timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval);
                        ret = HRTIMER_RESTART;
                        ++timr->it_requeue_pending;
                        timr->it_active = 1;
                }
        }

        unlock_timer(timr, flags);
        return ret;
}

static struct pid *good_sigevent(sigevent_t * event)
{
        struct pid *pid = task_tgid(current);
        struct task_struct *rtn;

        switch (event->sigev_notify) {
        case SIGEV_SIGNAL | SIGEV_THREAD_ID:
                pid = find_vpid(event->sigev_notify_thread_id);
                rtn = pid_task(pid, PIDTYPE_PID);
                if (!rtn || !same_thread_group(rtn, current))
                        return NULL;
                fallthrough;
        case SIGEV_SIGNAL:
        case SIGEV_THREAD:
                if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
                        return NULL;
                fallthrough;
        case SIGEV_NONE:
                return pid;
        default:
                return NULL;
        }
}

static struct k_itimer * alloc_posix_timer(void)
{
        struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);

        if (!tmr)
                return tmr;
        if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
                kmem_cache_free(posix_timers_cache, tmr);
                return NULL;
        }
        clear_siginfo(&tmr->sigq->info);
        return tmr;
}

static void k_itimer_rcu_free(struct rcu_head *head)
{
        struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);

        kmem_cache_free(posix_timers_cache, tmr);
}

static void posix_timer_free(struct k_itimer *tmr)
{
        put_pid(tmr->it_pid);
        sigqueue_free(tmr->sigq);
        call_rcu(&tmr->rcu, k_itimer_rcu_free);
}

static void posix_timer_unhash_and_free(struct k_itimer *tmr)
{
        spin_lock(&hash_lock);
        hlist_del_rcu(&tmr->t_hash);
        spin_unlock(&hash_lock);
        posix_timer_free(tmr);
}

static int common_timer_create(struct k_itimer *new_timer)
{
        hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
        return 0;
}

/* Create a POSIX.1b interval timer. */
static int do_timer_create(clockid_t which_clock, struct sigevent *event,
                           timer_t __user *created_timer_id)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct k_itimer *new_timer;
        int error, new_timer_id;

        if (!kc)
                return -EINVAL;
        if (!kc->timer_create)
                return -EOPNOTSUPP;

        new_timer = alloc_posix_timer();
        if (unlikely(!new_timer))
                return -EAGAIN;

        spin_lock_init(&new_timer->it_lock);

        /*
         * Add the timer to the hash table. The timer is not yet valid
         * because new_timer::it_signal is still NULL. The timer id is also
         * not yet visible to user space.
         */
        new_timer_id = posix_timer_add(new_timer);
        if (new_timer_id < 0) {
                posix_timer_free(new_timer);
                return new_timer_id;
        }

        new_timer->it_id = (timer_t) new_timer_id;
        new_timer->it_clock = which_clock;
        new_timer->kclock = kc;
        new_timer->it_overrun = -1LL;

        if (event) {
                rcu_read_lock();
                new_timer->it_pid = get_pid(good_sigevent(event));
                rcu_read_unlock();
                if (!new_timer->it_pid) {
                        error = -EINVAL;
                        goto out;
                }
                new_timer->it_sigev_notify     = event->sigev_notify;
                new_timer->sigq->info.si_signo = event->sigev_signo;
                new_timer->sigq->info.si_value = event->sigev_value;
        } else {
                new_timer->it_sigev_notify     = SIGEV_SIGNAL;
                new_timer->sigq->info.si_signo = SIGALRM;
                memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
                new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
                new_timer->it_pid = get_pid(task_tgid(current));
        }

        new_timer->sigq->info.si_tid   = new_timer->it_id;
        new_timer->sigq->info.si_code  = SI_TIMER;

        if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
                error = -EFAULT;
                goto out;
        }
        /*
         * After succesful copy out, the timer ID is visible to user space
         * now but not yet valid because new_timer::signal is still NULL.
         *
         * Complete the initialization with the clock specific create
         * callback.
         */
        error = kc->timer_create(new_timer);
        if (error)
                goto out;

        spin_lock_irq(&current->sighand->siglock);
        /* This makes the timer valid in the hash table */
        WRITE_ONCE(new_timer->it_signal, current->signal);
        list_add(&new_timer->list, &current->signal->posix_timers);
        spin_unlock_irq(&current->sighand->siglock);
        /*
         * After unlocking sighand::siglock @new_timer is subject to
         * concurrent removal and cannot be touched anymore
         */
        return 0;
out:
        posix_timer_unhash_and_free(new_timer);
        return error;
}

SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
                struct sigevent __user *, timer_event_spec,
                timer_t __user *, created_timer_id)
{
        if (timer_event_spec) {
                sigevent_t event;

                if (copy_from_user(&event, timer_event_spec, sizeof (event)))
                        return -EFAULT;
                return do_timer_create(which_clock, &event, created_timer_id);
        }
        return do_timer_create(which_clock, NULL, created_timer_id);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock,
                       struct compat_sigevent __user *, timer_event_spec,
                       timer_t __user *, created_timer_id)
{
        if (timer_event_spec) {
                sigevent_t event;

                if (get_compat_sigevent(&event, timer_event_spec))
                        return -EFAULT;
                return do_timer_create(which_clock, &event, created_timer_id);
        }
        return do_timer_create(which_clock, NULL, created_timer_id);
}
#endif

static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
{
        struct k_itimer *timr;

        /*
         * timer_t could be any type >= int and we want to make sure any
         * @timer_id outside positive int range fails lookup.
         */
        if ((unsigned long long)timer_id > INT_MAX)
                return NULL;

        /*
         * The hash lookup and the timers are RCU protected.
         *
         * Timers are added to the hash in invalid state where
         * timr::it_signal == NULL. timer::it_signal is only set after the
         * rest of the initialization succeeded.
         *
         * Timer destruction happens in steps:
         *  1) Set timr::it_signal to NULL with timr::it_lock held
         *  2) Release timr::it_lock
         *  3) Remove from the hash under hash_lock
         *  4) Call RCU for removal after the grace period
         *
         * Holding rcu_read_lock() accross the lookup ensures that
         * the timer cannot be freed.
         *
         * The lookup validates locklessly that timr::it_signal ==
         * current::it_signal and timr::it_id == @timer_id. timr::it_id
         * can't change, but timr::it_signal becomes NULL during
         * destruction.
         */
        rcu_read_lock();
        timr = posix_timer_by_id(timer_id);
        if (timr) {
                spin_lock_irqsave(&timr->it_lock, *flags);
                /*
                 * Validate under timr::it_lock that timr::it_signal is
                 * still valid. Pairs with #1 above.
                 */
                if (timr->it_signal == current->signal) {
                        rcu_read_unlock();
                        return timr;
                }
                spin_unlock_irqrestore(&timr->it_lock, *flags);
        }
        rcu_read_unlock();

        return NULL;
}

static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
{
        struct hrtimer *timer = &timr->it.real.timer;

        return __hrtimer_expires_remaining_adjusted(timer, now);
}

static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
{
        struct hrtimer *timer = &timr->it.real.timer;

        return hrtimer_forward(timer, now, timr->it_interval);
}

/*
 * Get the time remaining on a POSIX.1b interval timer.
 *
 * Two issues to handle here:
 *
 *  1) The timer has a requeue pending. The return value must appear as
 *     if the timer has been requeued right now.
 *
 *  2) The timer is a SIGEV_NONE timer. These timers are never enqueued
 *     into the hrtimer queue and therefore never expired. Emulate expiry
 *     here taking #1 into account.
 */
void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
{
        const struct k_clock *kc = timr->kclock;
        ktime_t now, remaining, iv;
        bool sig_none;

        sig_none = timr->it_sigev_notify == SIGEV_NONE;
        iv = timr->it_interval;

        /* interval timer ? */
        if (iv) {
                cur_setting->it_interval = ktime_to_timespec64(iv);
        } else if (!timr->it_active) {
                /*
                 * SIGEV_NONE oneshot timers are never queued and therefore
                 * timr->it_active is always false. The check below
                 * vs. remaining time will handle this case.
                 *
                 * For all other timers there is nothing to update here, so
                 * return.
                 */
                if (!sig_none)
                        return;
        }

        now = kc->clock_get_ktime(timr->it_clock);

        /*
         * If this is an interval timer and either has requeue pending or
         * is a SIGEV_NONE timer move the expiry time forward by intervals,
         * so expiry is > now.
         */
        if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
                timr->it_overrun += kc->timer_forward(timr, now);

        remaining = kc->timer_remaining(timr, now);
        /*
         * As @now is retrieved before a possible timer_forward() and
         * cannot be reevaluated by the compiler @remaining is based on the
         * same @now value. Therefore @remaining is consistent vs. @now.
         *
         * Consequently all interval timers, i.e. @iv > 0, cannot have a
         * remaining time <= 0 because timer_forward() guarantees to move
         * them forward so that the next timer expiry is > @now.
         */
        if (remaining <= 0) {
                /*
                 * A single shot SIGEV_NONE timer must return 0, when it is
                 * expired! Timers which have a real signal delivery mode
                 * must return a remaining time greater than 0 because the
                 * signal has not yet been delivered.
                 */
                if (!sig_none)
                        cur_setting->it_value.tv_nsec = 1;
        } else {
                cur_setting->it_value = ktime_to_timespec64(remaining);
        }
}

static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
{
        const struct k_clock *kc;
        struct k_itimer *timr;
        unsigned long flags;
        int ret = 0;

        timr = lock_timer(timer_id, &flags);
        if (!timr)
                return -EINVAL;

        memset(setting, 0, sizeof(*setting));
        kc = timr->kclock;
        if (WARN_ON_ONCE(!kc || !kc->timer_get))
                ret = -EINVAL;
        else
                kc->timer_get(timr, setting);

        unlock_timer(timr, flags);
        return ret;
}

/* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
                struct __kernel_itimerspec __user *, setting)
{
        struct itimerspec64 cur_setting;

        int ret = do_timer_gettime(timer_id, &cur_setting);
        if (!ret) {
                if (put_itimerspec64(&cur_setting, setting))
                        ret = -EFAULT;
        }
        return ret;
}

#ifdef CONFIG_COMPAT_32BIT_TIME

SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
                struct old_itimerspec32 __user *, setting)
{
        struct itimerspec64 cur_setting;

        int ret = do_timer_gettime(timer_id, &cur_setting);
        if (!ret) {
                if (put_old_itimerspec32(&cur_setting, setting))
                        ret = -EFAULT;
        }
        return ret;
}

#endif

/**
 * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer
 * @timer_id:        The timer ID which identifies the timer
 *
 * The "overrun count" of a timer is one plus the number of expiration
 * intervals which have elapsed between the first expiry, which queues the
 * signal and the actual signal delivery. On signal delivery the "overrun
 * count" is calculated and cached, so it can be returned directly here.
 *
 * As this is relative to the last queued signal the returned overrun count
 * is meaningless outside of the signal delivery path and even there it
 * does not accurately reflect the current state when user space evaluates
 * it.
 *
 * Returns:
 *        -EINVAL                @timer_id is invalid
 *        1..INT_MAX        The number of overruns related to the last delivered signal
 */
SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
{
        struct k_itimer *timr;
        unsigned long flags;
        int overrun;

        timr = lock_timer(timer_id, &flags);
        if (!timr)
                return -EINVAL;

        overrun = timer_overrun_to_int(timr, 0);
        unlock_timer(timr, flags);

        return overrun;
}

static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
                               bool absolute, bool sigev_none)
{
        struct hrtimer *timer = &timr->it.real.timer;
        enum hrtimer_mode mode;

        mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
        /*
         * Posix magic: Relative CLOCK_REALTIME timers are not affected by
         * clock modifications, so they become CLOCK_MONOTONIC based under the
         * hood. See hrtimer_init(). Update timr->kclock, so the generic
         * functions which use timr->kclock->clock_get_*() work.
         *
         * Note: it_clock stays unmodified, because the next timer_set() might
         * use ABSTIME, so it needs to switch back.
         */
        if (timr->it_clock == CLOCK_REALTIME)
                timr->kclock = absolute ? &clock_realtime : &clock_monotonic;

        hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
        timr->it.real.timer.function = posix_timer_fn;

        if (!absolute)
                expires = ktime_add_safe(expires, timer->base->get_time());
        hrtimer_set_expires(timer, expires);

        if (!sigev_none)
                hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}

static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
{
        return hrtimer_try_to_cancel(&timr->it.real.timer);
}

static void common_timer_wait_running(struct k_itimer *timer)
{
        hrtimer_cancel_wait_running(&timer->it.real.timer);
}

/*
 * On PREEMPT_RT this prevents priority inversion and a potential livelock
 * against the ksoftirqd thread in case that ksoftirqd gets preempted while
 * executing a hrtimer callback.
 *
 * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this
 * just results in a cpu_relax().
 *
 * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is
 * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this
 * prevents spinning on an eventually scheduled out task and a livelock
 * when the task which tries to delete or disarm the timer has preempted
 * the task which runs the expiry in task work context.
 */
static struct k_itimer *timer_wait_running(struct k_itimer *timer,
                                           unsigned long *flags)
{
        const struct k_clock *kc = READ_ONCE(timer->kclock);
        timer_t timer_id = READ_ONCE(timer->it_id);

        /* Prevent kfree(timer) after dropping the lock */
        rcu_read_lock();
        unlock_timer(timer, *flags);

        /*
         * kc->timer_wait_running() might drop RCU lock. So @timer
         * cannot be touched anymore after the function returns!
         */
        if (!WARN_ON_ONCE(!kc->timer_wait_running))
                kc->timer_wait_running(timer);

        rcu_read_unlock();
        /* Relock the timer. It might be not longer hashed. */
        return lock_timer(timer_id, flags);
}

/* Set a POSIX.1b interval timer. */
int common_timer_set(struct k_itimer *timr, int flags,
                     struct itimerspec64 *new_setting,
                     struct itimerspec64 *old_setting)
{
        const struct k_clock *kc = timr->kclock;
        bool sigev_none;
        ktime_t expires;

        if (old_setting)
                common_timer_get(timr, old_setting);

        /* Prevent rearming by clearing the interval */
        timr->it_interval = 0;
        /*
         * Careful here. On SMP systems the timer expiry function could be
         * active and spinning on timr->it_lock.
         */
        if (kc->timer_try_to_cancel(timr) < 0)
                return TIMER_RETRY;

        timr->it_active = 0;
        timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
                ~REQUEUE_PENDING;
        timr->it_overrun_last = 0;

        /* Switch off the timer when it_value is zero */
        if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
                return 0;

        timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
        expires = timespec64_to_ktime(new_setting->it_value);
        if (flags & TIMER_ABSTIME)
                expires = timens_ktime_to_host(timr->it_clock, expires);
        sigev_none = timr->it_sigev_notify == SIGEV_NONE;

        kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
        timr->it_active = !sigev_none;
        return 0;
}

static int do_timer_settime(timer_t timer_id, int tmr_flags,
                            struct itimerspec64 *new_spec64,
                            struct itimerspec64 *old_spec64)
{
        const struct k_clock *kc;
        struct k_itimer *timr;
        unsigned long flags;
        int error = 0;

        if (!timespec64_valid(&new_spec64->it_interval) ||
            !timespec64_valid(&new_spec64->it_value))
                return -EINVAL;

        if (old_spec64)
                memset(old_spec64, 0, sizeof(*old_spec64));

        timr = lock_timer(timer_id, &flags);
retry:
        if (!timr)
                return -EINVAL;

        kc = timr->kclock;
        if (WARN_ON_ONCE(!kc || !kc->timer_set))
                error = -EINVAL;
        else
                error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);

        if (error == TIMER_RETRY) {
                // We already got the old time...
                old_spec64 = NULL;
                /* Unlocks and relocks the timer if it still exists */
                timr = timer_wait_running(timr, &flags);
                goto retry;
        }
        unlock_timer(timr, flags);

        return error;
}

/* Set a POSIX.1b interval timer */
SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
                const struct __kernel_itimerspec __user *, new_setting,
                struct __kernel_itimerspec __user *, old_setting)
{
        struct itimerspec64 new_spec, old_spec, *rtn;
        int error = 0;

        if (!new_setting)
                return -EINVAL;

        if (get_itimerspec64(&new_spec, new_setting))
                return -EFAULT;

        rtn = old_setting ? &old_spec : NULL;
        error = do_timer_settime(timer_id, flags, &new_spec, rtn);
        if (!error && old_setting) {
                if (put_itimerspec64(&old_spec, old_setting))
                        error = -EFAULT;
        }
        return error;
}

#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
                struct old_itimerspec32 __user *, new,
                struct old_itimerspec32 __user *, old)
{
        struct itimerspec64 new_spec, old_spec;
        struct itimerspec64 *rtn = old ? &old_spec : NULL;
        int error = 0;

        if (!new)
                return -EINVAL;
        if (get_old_itimerspec32(&new_spec, new))
                return -EFAULT;

        error = do_timer_settime(timer_id, flags, &new_spec, rtn);
        if (!error && old) {
                if (put_old_itimerspec32(&old_spec, old))
                        error = -EFAULT;
        }
        return error;
}
#endif

int common_timer_del(struct k_itimer *timer)
{
        const struct k_clock *kc = timer->kclock;

        timer->it_interval = 0;
        if (kc->timer_try_to_cancel(timer) < 0)
                return TIMER_RETRY;
        timer->it_active = 0;
        return 0;
}

static inline int timer_delete_hook(struct k_itimer *timer)
{
        const struct k_clock *kc = timer->kclock;

        if (WARN_ON_ONCE(!kc || !kc->timer_del))
                return -EINVAL;
        return kc->timer_del(timer);
}

/* Delete a POSIX.1b interval timer. */
SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
{
        struct k_itimer *timer;
        unsigned long flags;

        timer = lock_timer(timer_id, &flags);

retry_delete:
        if (!timer)
                return -EINVAL;

        if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
                /* Unlocks and relocks the timer if it still exists */
                timer = timer_wait_running(timer, &flags);
                goto retry_delete;
        }

        spin_lock(&current->sighand->siglock);
        list_del(&timer->list);
        spin_unlock(&current->sighand->siglock);
        /*
         * A concurrent lookup could check timer::it_signal lockless. It
         * will reevaluate with timer::it_lock held and observe the NULL.
         */
        WRITE_ONCE(timer->it_signal, NULL);

        unlock_timer(timer, flags);
        posix_timer_unhash_and_free(timer);
        return 0;
}

/*
 * Delete a timer if it is armed, remove it from the hash and schedule it
 * for RCU freeing.
 */
static void itimer_delete(struct k_itimer *timer)
{
        unsigned long flags;

        /*
         * irqsave is required to make timer_wait_running() work.
         */
        spin_lock_irqsave(&timer->it_lock, flags);

retry_delete:
        /*
         * Even if the timer is not longer accessible from other tasks
         * it still might be armed and queued in the underlying timer
         * mechanism. Worse, that timer mechanism might run the expiry
         * function concurrently.
         */
        if (timer_delete_hook(timer) == TIMER_RETRY) {
                /*
                 * Timer is expired concurrently, prevent livelocks
                 * and pointless spinning on RT.
                 *
                 * timer_wait_running() drops timer::it_lock, which opens
                 * the possibility for another task to delete the timer.
                 *
                 * That's not possible here because this is invoked from
                 * do_exit() only for the last thread of the thread group.
                 * So no other task can access and delete that timer.
                 */
                if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
                        return;

                goto retry_delete;
        }
        list_del(&timer->list);

        /*
         * Setting timer::it_signal to NULL is technically not required
         * here as nothing can access the timer anymore legitimately via
         * the hash table. Set it to NULL nevertheless so that all deletion
         * paths are consistent.
         */
        WRITE_ONCE(timer->it_signal, NULL);

        spin_unlock_irqrestore(&timer->it_lock, flags);
        posix_timer_unhash_and_free(timer);
}

/*
 * Invoked from do_exit() when the last thread of a thread group exits.
 * At that point no other task can access the timers of the dying
 * task anymore.
 */
void exit_itimers(struct task_struct *tsk)
{
        struct list_head timers;
        struct k_itimer *tmr;

        if (list_empty(&tsk->signal->posix_timers))
                return;

        /* Protect against concurrent read via /proc/$PID/timers */
        spin_lock_irq(&tsk->sighand->siglock);
        list_replace_init(&tsk->signal->posix_timers, &timers);
        spin_unlock_irq(&tsk->sighand->siglock);

        /* The timers are not longer accessible via tsk::signal */
        while (!list_empty(&timers)) {
                tmr = list_first_entry(&timers, struct k_itimer, list);
                itimer_delete(tmr);
        }
}

SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
                const struct __kernel_timespec __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 new_tp;

        if (!kc || !kc->clock_set)
                return -EINVAL;

        if (get_timespec64(&new_tp, tp))
                return -EFAULT;

        /*
         * Permission checks have to be done inside the clock specific
         * setter callback.
         */
        return kc->clock_set(which_clock, &new_tp);
}

SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
                struct __kernel_timespec __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 kernel_tp;
        int error;

        if (!kc)
                return -EINVAL;

        error = kc->clock_get_timespec(which_clock, &kernel_tp);

        if (!error && put_timespec64(&kernel_tp, tp))
                error = -EFAULT;

        return error;
}

int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);

        if (!kc)
                return -EINVAL;
        if (!kc->clock_adj)
                return -EOPNOTSUPP;

        return kc->clock_adj(which_clock, ktx);
}

SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
                struct __kernel_timex __user *, utx)
{
        struct __kernel_timex ktx;
        int err;

        if (copy_from_user(&ktx, utx, sizeof(ktx)))
                return -EFAULT;

        err = do_clock_adjtime(which_clock, &ktx);

        if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
                return -EFAULT;

        return err;
}

/**
 * sys_clock_getres - Get the resolution of a clock
 * @which_clock:        The clock to get the resolution for
 * @tp:                        Pointer to a a user space timespec64 for storage
 *
 * POSIX defines:
 *
 * "The clock_getres() function shall return the resolution of any
 * clock. Clock resolutions are implementation-defined and cannot be set by
 * a process. If the argument res is not NULL, the resolution of the
 * specified clock shall be stored in the location pointed to by res. If
 * res is NULL, the clock resolution is not returned. If the time argument
 * of clock_settime() is not a multiple of res, then the value is truncated
 * to a multiple of res."
 *
 * Due to the various hardware constraints the real resolution can vary
 * wildly and even change during runtime when the underlying devices are
 * replaced. The kernel also can use hardware devices with different
 * resolutions for reading the time and for arming timers.
 *
 * The kernel therefore deviates from the POSIX spec in various aspects:
 *
 * 1) The resolution returned to user space
 *
 *    For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI,
 *    CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW
 *    the kernel differentiates only two cases:
 *
 *    I)  Low resolution mode:
 *
 *          When high resolution timers are disabled at compile or runtime
 *          the resolution returned is nanoseconds per tick, which represents
 *          the precision at which timers expire.
 *
 *    II) High resolution mode:
 *
 *          When high resolution timers are enabled the resolution returned
 *          is always one nanosecond independent of the actual resolution of
 *          the underlying hardware devices.
 *
 *          For CLOCK_*_ALARM the actual resolution depends on system
 *          state. When system is running the resolution is the same as the
 *          resolution of the other clocks. During suspend the actual
 *          resolution is the resolution of the underlying RTC device which
 *          might be way less precise than the clockevent device used during
 *          running state.
 *
 *   For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution
 *   returned is always nanoseconds per tick.
 *
 *   For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution
 *   returned is always one nanosecond under the assumption that the
 *   underlying scheduler clock has a better resolution than nanoseconds
 *   per tick.
 *
 *   For dynamic POSIX clocks (PTP devices) the resolution returned is
 *   always one nanosecond.
 *
 * 2) Affect on sys_clock_settime()
 *
 *    The kernel does not truncate the time which is handed in to
 *    sys_clock_settime(). The kernel internal timekeeping is always using
 *    nanoseconds precision independent of the clocksource device which is
 *    used to read the time from. The resolution of that device only
 *    affects the presicion of the time returned by sys_clock_gettime().
 *
 * Returns:
 *        0                Success. @tp contains the resolution
 *        -EINVAL                @which_clock is not a valid clock ID
 *        -EFAULT                Copying the resolution to @tp faulted
 *        -ENODEV                Dynamic POSIX clock is not backed by a device
 *        -EOPNOTSUPP        Dynamic POSIX clock does not support getres()
 */
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
                struct __kernel_timespec __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 rtn_tp;
        int error;

        if (!kc)
                return -EINVAL;

        error = kc->clock_getres(which_clock, &rtn_tp);

        if (!error && tp && put_timespec64(&rtn_tp, tp))
                error = -EFAULT;

        return error;
}

#ifdef CONFIG_COMPAT_32BIT_TIME

SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
                struct old_timespec32 __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 ts;

        if (!kc || !kc->clock_set)
                return -EINVAL;

        if (get_old_timespec32(&ts, tp))
                return -EFAULT;

        return kc->clock_set(which_clock, &ts);
}

SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
                struct old_timespec32 __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 ts;
        int err;

        if (!kc)
                return -EINVAL;

        err = kc->clock_get_timespec(which_clock, &ts);

        if (!err && put_old_timespec32(&ts, tp))
                err = -EFAULT;

        return err;
}

SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
                struct old_timex32 __user *, utp)
{
        struct __kernel_timex ktx;
        int err;

        err = get_old_timex32(&ktx, utp);
        if (err)
                return err;

        err = do_clock_adjtime(which_clock, &ktx);

        if (err >= 0 && put_old_timex32(utp, &ktx))
                return -EFAULT;

        return err;
}

SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
                struct old_timespec32 __user *, tp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 ts;
        int err;

        if (!kc)
                return -EINVAL;

        err = kc->clock_getres(which_clock, &ts);
        if (!err && tp && put_old_timespec32(&ts, tp))
                return -EFAULT;

        return err;
}

#endif

/*
 * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI
 */
static int common_nsleep(const clockid_t which_clock, int flags,
                         const struct timespec64 *rqtp)
{
        ktime_t texp = timespec64_to_ktime(*rqtp);

        return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
                                 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
                                 which_clock);
}

/*
 * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME
 *
 * Absolute nanosleeps for these clocks are time-namespace adjusted.
 */
static int common_nsleep_timens(const clockid_t which_clock, int flags,
                                const struct timespec64 *rqtp)
{
        ktime_t texp = timespec64_to_ktime(*rqtp);

        if (flags & TIMER_ABSTIME)
                texp = timens_ktime_to_host(which_clock, texp);

        return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
                                 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
                                 which_clock);
}

SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
                const struct __kernel_timespec __user *, rqtp,
                struct __kernel_timespec __user *, rmtp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 t;

        if (!kc)
                return -EINVAL;
        if (!kc->nsleep)
                return -EOPNOTSUPP;

        if (get_timespec64(&t, rqtp))
                return -EFAULT;

        if (!timespec64_valid(&t))
                return -EINVAL;
        if (flags & TIMER_ABSTIME)
                rmtp = NULL;
        current->restart_block.fn = do_no_restart_syscall;
        current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
        current->restart_block.nanosleep.rmtp = rmtp;

        return kc->nsleep(which_clock, flags, &t);
}

#ifdef CONFIG_COMPAT_32BIT_TIME

SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
                struct old_timespec32 __user *, rqtp,
                struct old_timespec32 __user *, rmtp)
{
        const struct k_clock *kc = clockid_to_kclock(which_clock);
        struct timespec64 t;

        if (!kc)
                return -EINVAL;
        if (!kc->nsleep)
                return -EOPNOTSUPP;

        if (get_old_timespec32(&t, rqtp))
                return -EFAULT;

        if (!timespec64_valid(&t))
                return -EINVAL;
        if (flags & TIMER_ABSTIME)
                rmtp = NULL;
        current->restart_block.fn = do_no_restart_syscall;
        current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
        current->restart_block.nanosleep.compat_rmtp = rmtp;

        return kc->nsleep(which_clock, flags, &t);
}

#endif

static const struct k_clock clock_realtime = {
        .clock_getres                = posix_get_hrtimer_res,
        .clock_get_timespec        = posix_get_realtime_timespec,
        .clock_get_ktime        = posix_get_realtime_ktime,
        .clock_set                = posix_clock_realtime_set,
        .clock_adj                = posix_clock_realtime_adj,
        .nsleep                        = common_nsleep,
        .timer_create                = common_timer_create,
        .timer_set                = common_timer_set,
        .timer_get                = common_timer_get,
        .timer_del                = common_timer_del,
        .timer_rearm                = common_hrtimer_rearm,
        .timer_forward                = common_hrtimer_forward,
        .timer_remaining        = common_hrtimer_remaining,
        .timer_try_to_cancel        = common_hrtimer_try_to_cancel,
        .timer_wait_running        = common_timer_wait_running,
        .timer_arm                = common_hrtimer_arm,
};

static const struct k_clock clock_monotonic = {
        .clock_getres                = posix_get_hrtimer_res,
        .clock_get_timespec        = posix_get_monotonic_timespec,
        .clock_get_ktime        = posix_get_monotonic_ktime,
        .nsleep                        = common_nsleep_timens,
        .timer_create                = common_timer_create,
        .timer_set                = common_timer_set,
        .timer_get                = common_timer_get,
        .timer_del                = common_timer_del,
        .timer_rearm                = common_hrtimer_rearm,
        .timer_forward                = common_hrtimer_forward,
        .timer_remaining        = common_hrtimer_remaining,
        .timer_try_to_cancel        = common_hrtimer_try_to_cancel,
        .timer_wait_running        = common_timer_wait_running,
        .timer_arm                = common_hrtimer_arm,
};

static const struct k_clock clock_monotonic_raw = {
        .clock_getres                = posix_get_hrtimer_res,
        .clock_get_timespec        = posix_get_monotonic_raw,
};

static const struct k_clock clock_realtime_coarse = {
        .clock_getres                = posix_get_coarse_res,
        .clock_get_timespec        = posix_get_realtime_coarse,
};

static const struct k_clock clock_monotonic_coarse = {
        .clock_getres                = posix_get_coarse_res,
        .clock_get_timespec        = posix_get_monotonic_coarse,
};

static const struct k_clock clock_tai = {
        .clock_getres                = posix_get_hrtimer_res,
        .clock_get_ktime        = posix_get_tai_ktime,
        .clock_get_timespec        = posix_get_tai_timespec,
        .nsleep                        = common_nsleep,
        .timer_create                = common_timer_create,
        .timer_set                = common_timer_set,
        .timer_get                = common_timer_get,
        .timer_del                = common_timer_del,
        .timer_rearm                = common_hrtimer_rearm,
        .timer_forward                = common_hrtimer_forward,
        .timer_remaining        = common_hrtimer_remaining,
        .timer_try_to_cancel        = common_hrtimer_try_to_cancel,
        .timer_wait_running        = common_timer_wait_running,
        .timer_arm                = common_hrtimer_arm,
};

static const struct k_clock clock_boottime = {
        .clock_getres                = posix_get_hrtimer_res,
        .clock_get_ktime        = posix_get_boottime_ktime,
        .clock_get_timespec        = posix_get_boottime_timespec,
        .nsleep                        = common_nsleep_timens,
        .timer_create                = common_timer_create,
        .timer_set                = common_timer_set,
        .timer_get                = common_timer_get,
        .timer_del                = common_timer_del,
        .timer_rearm                = common_hrtimer_rearm,
        .timer_forward                = common_hrtimer_forward,
        .timer_remaining        = common_hrtimer_remaining,
        .timer_try_to_cancel        = common_hrtimer_try_to_cancel,
        .timer_wait_running        = common_timer_wait_running,
        .timer_arm                = common_hrtimer_arm,
};

static const struct k_clock * const posix_clocks[] = {
        [CLOCK_REALTIME]                = &clock_realtime,
        [CLOCK_MONOTONIC]                = &clock_monotonic,
        [CLOCK_PROCESS_CPUTIME_ID]        = &clock_process,
        [CLOCK_THREAD_CPUTIME_ID]        = &clock_thread,
        [CLOCK_MONOTONIC_RAW]                = &clock_monotonic_raw,
        [CLOCK_REALTIME_COARSE]                = &clock_realtime_coarse,
        [CLOCK_MONOTONIC_COARSE]        = &clock_monotonic_coarse,
        [CLOCK_BOOTTIME]                = &clock_boottime,
        [CLOCK_REALTIME_ALARM]                = &alarm_clock,
        [CLOCK_BOOTTIME_ALARM]                = &alarm_clock,
        [CLOCK_TAI]                        = &clock_tai,
};

static const struct k_clock *clockid_to_kclock(const clockid_t id)
{
        clockid_t idx = id;

        if (id < 0) {
                return (id & CLOCKFD_MASK) == CLOCKFD ?
                        &clock_posix_dynamic : &clock_posix_cpu;
        }

        if (id >= ARRAY_SIZE(posix_clocks))
                return NULL;

        return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))];
}




















































































































































































































































    1 










    1 


    1 










    1 

    1 

























































































































































































    1 





    1 






































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * USB Synaptics device driver
 *
 *  Copyright (c) 2002 Rob Miller (rob@inpharmatica . co . uk)
 *  Copyright (c) 2003 Ron Lee (ron@debian.org)
 *        cPad driver for kernel 2.4
 *
 *  Copyright (c) 2004 Jan Steinhoff (cpad@jan-steinhoff . de)
 *  Copyright (c) 2004 Ron Lee (ron@debian.org)
 *        rewritten for kernel 2.6
 *
 *  cPad display character device part is not included. It can be found at
 *  http://jan-steinhoff.de/linux/synaptics-usb.html
 *
 * Bases on:        usb_skeleton.c v2.2 by Greg Kroah-Hartman
 *                drivers/hid/usbhid/usbmouse.c by Vojtech Pavlik
 *                drivers/input/mouse/synaptics.c by Peter Osterlund
 *
 * Trademarks are the property of their respective owners.
 */

/*
 * There are three different types of Synaptics USB devices: Touchpads,
 * touchsticks (or trackpoints), and touchscreens. Touchpads are well supported
 * by this driver, touchstick support has not been tested much yet, and
 * touchscreens have not been tested at all.
 *
 * Up to three alternate settings are possible:
 *        setting 0: one int endpoint for relative movement (used by usbhid.ko)
 *        setting 1: one int endpoint for absolute finger position
 *        setting 2 (cPad only): one int endpoint for absolute finger position and
 *                   two bulk endpoints for the display (in/out)
 * This driver uses setting 1.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/usb.h>
#include <linux/input.h>
#include <linux/usb/input.h>

#define USB_VENDOR_ID_SYNAPTICS        0x06cb
#define USB_DEVICE_ID_SYNAPTICS_TP        0x0001        /* Synaptics USB TouchPad */
#define USB_DEVICE_ID_SYNAPTICS_INT_TP        0x0002        /* Integrated USB TouchPad */
#define USB_DEVICE_ID_SYNAPTICS_CPAD        0x0003        /* Synaptics cPad */
#define USB_DEVICE_ID_SYNAPTICS_TS        0x0006        /* Synaptics TouchScreen */
#define USB_DEVICE_ID_SYNAPTICS_STICK        0x0007        /* Synaptics USB Styk */
#define USB_DEVICE_ID_SYNAPTICS_WP        0x0008        /* Synaptics USB WheelPad */
#define USB_DEVICE_ID_SYNAPTICS_COMP_TP        0x0009        /* Composite USB TouchPad */
#define USB_DEVICE_ID_SYNAPTICS_WTP        0x0010        /* Wireless TouchPad */
#define USB_DEVICE_ID_SYNAPTICS_DPAD        0x0013        /* DisplayPad */

#define SYNUSB_TOUCHPAD                        (1 << 0)
#define SYNUSB_STICK                        (1 << 1)
#define SYNUSB_TOUCHSCREEN                (1 << 2)
#define SYNUSB_AUXDISPLAY                (1 << 3) /* For cPad */
#define SYNUSB_COMBO                        (1 << 4) /* Composite device (TP + stick) */
#define SYNUSB_IO_ALWAYS                (1 << 5)

#define USB_DEVICE_SYNAPTICS(prod, kind)                \
        USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,                \
                   USB_DEVICE_ID_SYNAPTICS_##prod),        \
        .driver_info = (kind),

#define SYNUSB_RECV_SIZE        8

#define XMIN_NOMINAL                1472
#define XMAX_NOMINAL                5472
#define YMIN_NOMINAL                1408
#define YMAX_NOMINAL                4448

struct synusb {
        struct usb_device *udev;
        struct usb_interface *intf;
        struct urb *urb;
        unsigned char *data;

        /* serialize access to open/suspend */
        struct mutex pm_mutex;
        bool is_open;

        /* input device related data structures */
        struct input_dev *input;
        char name[128];
        char phys[64];

        /* characteristics of the device */
        unsigned long flags;
};

static void synusb_report_buttons(struct synusb *synusb)
{
        struct input_dev *input_dev = synusb->input;

        input_report_key(input_dev, BTN_LEFT, synusb->data[1] & 0x04);
        input_report_key(input_dev, BTN_RIGHT, synusb->data[1] & 0x01);
        input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x02);
}

static void synusb_report_stick(struct synusb *synusb)
{
        struct input_dev *input_dev = synusb->input;
        int x, y;
        unsigned int pressure;

        pressure = synusb->data[6];
        x = (s16)(be16_to_cpup((__be16 *)&synusb->data[2]) << 3) >> 7;
        y = (s16)(be16_to_cpup((__be16 *)&synusb->data[4]) << 3) >> 7;

        if (pressure > 0) {
                input_report_rel(input_dev, REL_X, x);
                input_report_rel(input_dev, REL_Y, -y);
        }

        input_report_abs(input_dev, ABS_PRESSURE, pressure);

        synusb_report_buttons(synusb);

        input_sync(input_dev);
}

static void synusb_report_touchpad(struct synusb *synusb)
{
        struct input_dev *input_dev = synusb->input;
        unsigned int num_fingers, tool_width;
        unsigned int x, y;
        unsigned int pressure, w;

        pressure = synusb->data[6];
        x = be16_to_cpup((__be16 *)&synusb->data[2]);
        y = be16_to_cpup((__be16 *)&synusb->data[4]);
        w = synusb->data[0] & 0x0f;

        if (pressure > 0) {
                num_fingers = 1;
                tool_width = 5;
                switch (w) {
                case 0 ... 1:
                        num_fingers = 2 + w;
                        break;

                case 2:                        /* pen, pretend its a finger */
                        break;

                case 4 ... 15:
                        tool_width = w;
                        break;
                }
        } else {
                num_fingers = 0;
                tool_width = 0;
        }

        /*
         * Post events
         * BTN_TOUCH has to be first as mousedev relies on it when doing
         * absolute -> relative conversion
         */

        if (pressure > 30)
                input_report_key(input_dev, BTN_TOUCH, 1);
        if (pressure < 25)
                input_report_key(input_dev, BTN_TOUCH, 0);

        if (num_fingers > 0) {
                input_report_abs(input_dev, ABS_X, x);
                input_report_abs(input_dev, ABS_Y,
                                 YMAX_NOMINAL + YMIN_NOMINAL - y);
        }

        input_report_abs(input_dev, ABS_PRESSURE, pressure);
        input_report_abs(input_dev, ABS_TOOL_WIDTH, tool_width);

        input_report_key(input_dev, BTN_TOOL_FINGER, num_fingers == 1);
        input_report_key(input_dev, BTN_TOOL_DOUBLETAP, num_fingers == 2);
        input_report_key(input_dev, BTN_TOOL_TRIPLETAP, num_fingers == 3);

        synusb_report_buttons(synusb);
        if (synusb->flags & SYNUSB_AUXDISPLAY)
                input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x08);

        input_sync(input_dev);
}

static void synusb_irq(struct urb *urb)
{
        struct synusb *synusb = urb->context;
        int error;

        /* Check our status in case we need to bail out early. */
        switch (urb->status) {
        case 0:
                usb_mark_last_busy(synusb->udev);
                break;

        /* Device went away so don't keep trying to read from it. */
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                return;

        default:
                goto resubmit;
                break;
        }

        if (synusb->flags & SYNUSB_STICK)
                synusb_report_stick(synusb);
        else
                synusb_report_touchpad(synusb);

resubmit:
        error = usb_submit_urb(urb, GFP_ATOMIC);
        if (error && error != -EPERM)
                dev_err(&synusb->intf->dev,
                        "%s - usb_submit_urb failed with result: %d",
                        __func__, error);
}

static struct usb_endpoint_descriptor *
synusb_get_in_endpoint(struct usb_host_interface *iface)
{

        struct usb_endpoint_descriptor *endpoint;
        int i;

        for (i = 0; i < iface->desc.bNumEndpoints; ++i) {
                endpoint = &iface->endpoint[i].desc;

                if (usb_endpoint_is_int_in(endpoint)) {
                        /* we found our interrupt in endpoint */
                        return endpoint;
                }
        }

        return NULL;
}

static int synusb_open(struct input_dev *dev)
{
        struct synusb *synusb = input_get_drvdata(dev);
        int retval;

        retval = usb_autopm_get_interface(synusb->intf);
        if (retval) {
                dev_err(&synusb->intf->dev,
                        "%s - usb_autopm_get_interface failed, error: %d\n",
                        __func__, retval);
                return retval;
        }

        mutex_lock(&synusb->pm_mutex);
        retval = usb_submit_urb(synusb->urb, GFP_KERNEL);
        if (retval) {
                dev_err(&synusb->intf->dev,
                        "%s - usb_submit_urb failed, error: %d\n",
                        __func__, retval);
                retval = -EIO;
                goto out;
        }

        synusb->intf->needs_remote_wakeup = 1;
        synusb->is_open = true;

out:
        mutex_unlock(&synusb->pm_mutex);
        usb_autopm_put_interface(synusb->intf);
        return retval;
}

static void synusb_close(struct input_dev *dev)
{
        struct synusb *synusb = input_get_drvdata(dev);
        int autopm_error;

        autopm_error = usb_autopm_get_interface(synusb->intf);

        mutex_lock(&synusb->pm_mutex);
        usb_kill_urb(synusb->urb);
        synusb->intf->needs_remote_wakeup = 0;
        synusb->is_open = false;
        mutex_unlock(&synusb->pm_mutex);

        if (!autopm_error)
                usb_autopm_put_interface(synusb->intf);
}

static int synusb_probe(struct usb_interface *intf,
                        const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usb_endpoint_descriptor *ep;
        struct synusb *synusb;
        struct input_dev *input_dev;
        unsigned int intf_num = intf->cur_altsetting->desc.bInterfaceNumber;
        unsigned int altsetting = min(intf->num_altsetting, 1U);
        int error;

        error = usb_set_interface(udev, intf_num, altsetting);
        if (error) {
                dev_err(&udev->dev,
                        "Can not set alternate setting to %i, error: %i",
                        altsetting, error);
                return error;
        }

        ep = synusb_get_in_endpoint(intf->cur_altsetting);
        if (!ep)
                return -ENODEV;

        synusb = kzalloc(sizeof(*synusb), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!synusb || !input_dev) {
                error = -ENOMEM;
                goto err_free_mem;
        }

        synusb->udev = udev;
        synusb->intf = intf;
        synusb->input = input_dev;
        mutex_init(&synusb->pm_mutex);

        synusb->flags = id->driver_info;
        if (synusb->flags & SYNUSB_COMBO) {
                /*
                 * This is a combo device, we need to set proper
                 * capability, depending on the interface.
                 */
                synusb->flags |= intf_num == 1 ?
                                        SYNUSB_STICK : SYNUSB_TOUCHPAD;
        }

        synusb->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!synusb->urb) {
                error = -ENOMEM;
                goto err_free_mem;
        }

        synusb->data = usb_alloc_coherent(udev, SYNUSB_RECV_SIZE, GFP_KERNEL,
                                          &synusb->urb->transfer_dma);
        if (!synusb->data) {
                error = -ENOMEM;
                goto err_free_urb;
        }

        usb_fill_int_urb(synusb->urb, udev,
                         usb_rcvintpipe(udev, ep->bEndpointAddress),
                         synusb->data, SYNUSB_RECV_SIZE,
                         synusb_irq, synusb,
                         ep->bInterval);
        synusb->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        if (udev->manufacturer)
                strscpy(synusb->name, udev->manufacturer,
                        sizeof(synusb->name));

        if (udev->product) {
                if (udev->manufacturer)
                        strlcat(synusb->name, " ", sizeof(synusb->name));
                strlcat(synusb->name, udev->product, sizeof(synusb->name));
        }

        if (!strlen(synusb->name))
                snprintf(synusb->name, sizeof(synusb->name),
                         "USB Synaptics Device %04x:%04x",
                         le16_to_cpu(udev->descriptor.idVendor),
                         le16_to_cpu(udev->descriptor.idProduct));

        if (synusb->flags & SYNUSB_STICK)
                strlcat(synusb->name, " (Stick)", sizeof(synusb->name));

        usb_make_path(udev, synusb->phys, sizeof(synusb->phys));
        strlcat(synusb->phys, "/input0", sizeof(synusb->phys));

        input_dev->name = synusb->name;
        input_dev->phys = synusb->phys;
        usb_to_input_id(udev, &input_dev->id);
        input_dev->dev.parent = &synusb->intf->dev;

        if (!(synusb->flags & SYNUSB_IO_ALWAYS)) {
                input_dev->open = synusb_open;
                input_dev->close = synusb_close;
        }

        input_set_drvdata(input_dev, synusb);

        __set_bit(EV_ABS, input_dev->evbit);
        __set_bit(EV_KEY, input_dev->evbit);

        if (synusb->flags & SYNUSB_STICK) {
                __set_bit(EV_REL, input_dev->evbit);
                __set_bit(REL_X, input_dev->relbit);
                __set_bit(REL_Y, input_dev->relbit);
                __set_bit(INPUT_PROP_POINTING_STICK, input_dev->propbit);
                input_set_abs_params(input_dev, ABS_PRESSURE, 0, 127, 0, 0);
        } else {
                input_set_abs_params(input_dev, ABS_X,
                                     XMIN_NOMINAL, XMAX_NOMINAL, 0, 0);
                input_set_abs_params(input_dev, ABS_Y,
                                     YMIN_NOMINAL, YMAX_NOMINAL, 0, 0);
                input_set_abs_params(input_dev, ABS_PRESSURE, 0, 255, 0, 0);
                input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 15, 0, 0);
                __set_bit(BTN_TOUCH, input_dev->keybit);
                __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
                __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
                __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
        }

        if (synusb->flags & SYNUSB_TOUCHSCREEN)
                __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
        else
                __set_bit(INPUT_PROP_POINTER, input_dev->propbit);

        __set_bit(BTN_LEFT, input_dev->keybit);
        __set_bit(BTN_RIGHT, input_dev->keybit);
        __set_bit(BTN_MIDDLE, input_dev->keybit);

        usb_set_intfdata(intf, synusb);

        if (synusb->flags & SYNUSB_IO_ALWAYS) {
                error = synusb_open(input_dev);
                if (error)
                        goto err_free_dma;
        }

        error = input_register_device(input_dev);
        if (error) {
                dev_err(&udev->dev,
                        "Failed to register input device, error %d\n",
                        error);
                goto err_stop_io;
        }

        return 0;

err_stop_io:
        if (synusb->flags & SYNUSB_IO_ALWAYS)
                synusb_close(synusb->input);
err_free_dma:
        usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data,
                          synusb->urb->transfer_dma);
err_free_urb:
        usb_free_urb(synusb->urb);
err_free_mem:
        input_free_device(input_dev);
        kfree(synusb);
        usb_set_intfdata(intf, NULL);

        return error;
}

static void synusb_disconnect(struct usb_interface *intf)
{
        struct synusb *synusb = usb_get_intfdata(intf);
        struct usb_device *udev = interface_to_usbdev(intf);

        if (synusb->flags & SYNUSB_IO_ALWAYS)
                synusb_close(synusb->input);

        input_unregister_device(synusb->input);

        usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data,
                          synusb->urb->transfer_dma);
        usb_free_urb(synusb->urb);
        kfree(synusb);

        usb_set_intfdata(intf, NULL);
}

static int synusb_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct synusb *synusb = usb_get_intfdata(intf);

        mutex_lock(&synusb->pm_mutex);
        usb_kill_urb(synusb->urb);
        mutex_unlock(&synusb->pm_mutex);

        return 0;
}

static int synusb_resume(struct usb_interface *intf)
{
        struct synusb *synusb = usb_get_intfdata(intf);
        int retval = 0;

        mutex_lock(&synusb->pm_mutex);

        if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) &&
            usb_submit_urb(synusb->urb, GFP_NOIO) < 0) {
                retval = -EIO;
        }

        mutex_unlock(&synusb->pm_mutex);

        return retval;
}

static int synusb_pre_reset(struct usb_interface *intf)
{
        struct synusb *synusb = usb_get_intfdata(intf);

        mutex_lock(&synusb->pm_mutex);
        usb_kill_urb(synusb->urb);

        return 0;
}

static int synusb_post_reset(struct usb_interface *intf)
{
        struct synusb *synusb = usb_get_intfdata(intf);
        int retval = 0;

        if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) &&
            usb_submit_urb(synusb->urb, GFP_NOIO) < 0) {
                retval = -EIO;
        }

        mutex_unlock(&synusb->pm_mutex);

        return retval;
}

static int synusb_reset_resume(struct usb_interface *intf)
{
        return synusb_resume(intf);
}

static const struct usb_device_id synusb_idtable[] = {
        { USB_DEVICE_SYNAPTICS(TP, SYNUSB_TOUCHPAD) },
        { USB_DEVICE_SYNAPTICS(INT_TP, SYNUSB_TOUCHPAD) },
        { USB_DEVICE_SYNAPTICS(CPAD,
                SYNUSB_TOUCHPAD | SYNUSB_AUXDISPLAY | SYNUSB_IO_ALWAYS) },
        { USB_DEVICE_SYNAPTICS(TS, SYNUSB_TOUCHSCREEN) },
        { USB_DEVICE_SYNAPTICS(STICK, SYNUSB_STICK) },
        { USB_DEVICE_SYNAPTICS(WP, SYNUSB_TOUCHPAD) },
        { USB_DEVICE_SYNAPTICS(COMP_TP, SYNUSB_COMBO) },
        { USB_DEVICE_SYNAPTICS(WTP, SYNUSB_TOUCHPAD) },
        { USB_DEVICE_SYNAPTICS(DPAD, SYNUSB_TOUCHPAD) },
        { }
};
MODULE_DEVICE_TABLE(usb, synusb_idtable);

static struct usb_driver synusb_driver = {
        .name                = "synaptics_usb",
        .probe                = synusb_probe,
        .disconnect        = synusb_disconnect,
        .id_table        = synusb_idtable,
        .suspend        = synusb_suspend,
        .resume                = synusb_resume,
        .pre_reset        = synusb_pre_reset,
        .post_reset        = synusb_post_reset,
        .reset_resume        = synusb_reset_resume,
        .supports_autosuspend = 1,
};

module_usb_driver(synusb_driver);

MODULE_AUTHOR("Rob Miller <rob@inpharmatica.co.uk>, "
              "Ron Lee <ron@debian.org>, "
              "Jan Steinhoff <cpad@jan-steinhoff.de>");
MODULE_DESCRIPTION("Synaptics USB device driver");
MODULE_LICENSE("GPL");




























































































































    1 
    2 
    5 



    5 









    2 
    3 



    3 









































































































    1 







    1 









    1 





    1 
    1 





















































    1 



































    4 













    2 


    2 











    1 


    1 




    1 




    1 





    1 
    1 





    1 


    1 







    1 




    1 

























    1 



    1 









    2 








































































































































































































































    3 





    3 



    3 


    3 






    3 






    3 

    3 



    1 







    3 

    2 
    3 

    3 

























































































































































































































































































































































































    2 



    2 
    2 










































































    4 
    4 




















    4 



    4 

    1 
    4 









































    1 




    1 










    1 




    1 
    1 

    1 































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
// SPDX-License-Identifier: GPL-2.0
/*
 * cdc-wdm.c
 *
 * This driver supports USB CDC WCM Device Management.
 *
 * Copyright (c) 2007-2009 Oliver Neukum
 *
 * Some code taken from cdc-acm.c
 *
 * Released under the GPLv2.
 *
 * Many thanks to Carl Nordbeck
 */
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/ioctl.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/poll.h>
#include <linux/skbuff.h>
#include <linux/usb.h>
#include <linux/usb/cdc.h>
#include <linux/wwan.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include <linux/usb/cdc-wdm.h>

#define DRIVER_AUTHOR "Oliver Neukum"
#define DRIVER_DESC "USB Abstract Control Model driver for USB WCM Device Management"

static const struct usb_device_id wdm_ids[] = {
        {
                .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS |
                                 USB_DEVICE_ID_MATCH_INT_SUBCLASS,
                .bInterfaceClass = USB_CLASS_COMM,
                .bInterfaceSubClass = USB_CDC_SUBCLASS_DMM
        },
        { }
};

MODULE_DEVICE_TABLE (usb, wdm_ids);

#define WDM_MINOR_BASE        176


#define WDM_IN_USE                1
#define WDM_DISCONNECTING        2
#define WDM_RESULT                3
#define WDM_READ                4
#define WDM_INT_STALL                5
#define WDM_POLL_RUNNING        6
#define WDM_RESPONDING                7
#define WDM_SUSPENDING                8
#define WDM_RESETTING                9
#define WDM_OVERFLOW                10
#define WDM_WWAN_IN_USE                11

#define WDM_MAX                        16

/* we cannot wait forever at flush() */
#define WDM_FLUSH_TIMEOUT        (30 * HZ)

/* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */
#define WDM_DEFAULT_BUFSIZE        256

static DEFINE_MUTEX(wdm_mutex);
static DEFINE_SPINLOCK(wdm_device_list_lock);
static LIST_HEAD(wdm_device_list);

/* --- method tables --- */

struct wdm_device {
        u8                        *inbuf; /* buffer for response */
        u8                        *outbuf; /* buffer for command */
        u8                        *sbuf; /* buffer for status */
        u8                        *ubuf; /* buffer for copy to user space */

        struct urb                *command;
        struct urb                *response;
        struct urb                *validity;
        struct usb_interface        *intf;
        struct usb_ctrlrequest        *orq;
        struct usb_ctrlrequest        *irq;
        spinlock_t                iuspin;

        unsigned long                flags;
        u16                        bufsize;
        u16                        wMaxCommand;
        u16                        wMaxPacketSize;
        __le16                        inum;
        int                        reslength;
        int                        length;
        int                        read;
        int                        count;
        dma_addr_t                shandle;
        dma_addr_t                ihandle;
        struct mutex                wlock;
        struct mutex                rlock;
        wait_queue_head_t        wait;
        struct work_struct        rxwork;
        struct work_struct        service_outs_intr;
        int                        werr;
        int                        rerr;
        int                     resp_count;

        struct list_head        device_list;
        int                        (*manage_power)(struct usb_interface *, int);

        enum wwan_port_type        wwanp_type;
        struct wwan_port        *wwanp;
};

static struct usb_driver wdm_driver;

/* return intfdata if we own the interface, else look up intf in the list */
static struct wdm_device *wdm_find_device(struct usb_interface *intf)
{
        struct wdm_device *desc;

        spin_lock(&wdm_device_list_lock);
        list_for_each_entry(desc, &wdm_device_list, device_list)
                if (desc->intf == intf)
                        goto found;
        desc = NULL;
found:
        spin_unlock(&wdm_device_list_lock);

        return desc;
}

static struct wdm_device *wdm_find_device_by_minor(int minor)
{
        struct wdm_device *desc;

        spin_lock(&wdm_device_list_lock);
        list_for_each_entry(desc, &wdm_device_list, device_list)
                if (desc->intf->minor == minor)
                        goto found;
        desc = NULL;
found:
        spin_unlock(&wdm_device_list_lock);

        return desc;
}

/* --- callbacks --- */
static void wdm_out_callback(struct urb *urb)
{
        struct wdm_device *desc;
        unsigned long flags;

        desc = urb->context;
        spin_lock_irqsave(&desc->iuspin, flags);
        desc->werr = urb->status;
        spin_unlock_irqrestore(&desc->iuspin, flags);
        kfree(desc->outbuf);
        desc->outbuf = NULL;
        clear_bit(WDM_IN_USE, &desc->flags);
        wake_up_all(&desc->wait);
}

static void wdm_wwan_rx(struct wdm_device *desc, int length);

static void wdm_in_callback(struct urb *urb)
{
        unsigned long flags;
        struct wdm_device *desc = urb->context;
        int status = urb->status;
        int length = urb->actual_length;

        spin_lock_irqsave(&desc->iuspin, flags);
        clear_bit(WDM_RESPONDING, &desc->flags);

        if (status) {
                switch (status) {
                case -ENOENT:
                        dev_dbg(&desc->intf->dev,
                                "nonzero urb status received: -ENOENT\n");
                        goto skip_error;
                case -ECONNRESET:
                        dev_dbg(&desc->intf->dev,
                                "nonzero urb status received: -ECONNRESET\n");
                        goto skip_error;
                case -ESHUTDOWN:
                        dev_dbg(&desc->intf->dev,
                                "nonzero urb status received: -ESHUTDOWN\n");
                        goto skip_error;
                case -EPIPE:
                        dev_err(&desc->intf->dev,
                                "nonzero urb status received: -EPIPE\n");
                        break;
                default:
                        dev_err(&desc->intf->dev,
                                "Unexpected error %d\n", status);
                        break;
                }
        }

        if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) {
                wdm_wwan_rx(desc, length);
                goto out;
        }

        /*
         * only set a new error if there is no previous error.
         * Errors are only cleared during read/open
         * Avoid propagating -EPIPE (stall) to userspace since it is
         * better handled as an empty read
         */
        if (desc->rerr == 0 && status != -EPIPE)
                desc->rerr = status;

        if (length + desc->length > desc->wMaxCommand) {
                /* The buffer would overflow */
                set_bit(WDM_OVERFLOW, &desc->flags);
        } else {
                /* we may already be in overflow */
                if (!test_bit(WDM_OVERFLOW, &desc->flags)) {
                        memmove(desc->ubuf + desc->length, desc->inbuf, length);
                        desc->length += length;
                        desc->reslength = length;
                }
        }
skip_error:

        if (desc->rerr) {
                /*
                 * Since there was an error, userspace may decide to not read
                 * any data after poll'ing.
                 * We should respond to further attempts from the device to send
                 * data, so that we can get unstuck.
                 */
                schedule_work(&desc->service_outs_intr);
        } else {
                set_bit(WDM_READ, &desc->flags);
                wake_up(&desc->wait);
        }
out:
        spin_unlock_irqrestore(&desc->iuspin, flags);
}

static void wdm_int_callback(struct urb *urb)
{
        unsigned long flags;
        int rv = 0;
        int responding;
        int status = urb->status;
        struct wdm_device *desc;
        struct usb_cdc_notification *dr;

        desc = urb->context;
        dr = (struct usb_cdc_notification *)desc->sbuf;

        if (status) {
                switch (status) {
                case -ESHUTDOWN:
                case -ENOENT:
                case -ECONNRESET:
                        return; /* unplug */
                case -EPIPE:
                        set_bit(WDM_INT_STALL, &desc->flags);
                        dev_err(&desc->intf->dev, "Stall on int endpoint\n");
                        goto sw; /* halt is cleared in work */
                default:
                        dev_err(&desc->intf->dev,
                                "nonzero urb status received: %d\n", status);
                        break;
                }
        }

        if (urb->actual_length < sizeof(struct usb_cdc_notification)) {
                dev_err(&desc->intf->dev, "wdm_int_callback - %d bytes\n",
                        urb->actual_length);
                goto exit;
        }

        switch (dr->bNotificationType) {
        case USB_CDC_NOTIFY_RESPONSE_AVAILABLE:
                dev_dbg(&desc->intf->dev,
                        "NOTIFY_RESPONSE_AVAILABLE received: index %d len %d\n",
                        le16_to_cpu(dr->wIndex), le16_to_cpu(dr->wLength));
                break;

        case USB_CDC_NOTIFY_NETWORK_CONNECTION:

                dev_dbg(&desc->intf->dev,
                        "NOTIFY_NETWORK_CONNECTION %s network\n",
                        dr->wValue ? "connected to" : "disconnected from");
                goto exit;
        case USB_CDC_NOTIFY_SPEED_CHANGE:
                dev_dbg(&desc->intf->dev, "SPEED_CHANGE received (len %u)\n",
                        urb->actual_length);
                goto exit;
        default:
                clear_bit(WDM_POLL_RUNNING, &desc->flags);
                dev_err(&desc->intf->dev,
                        "unknown notification %d received: index %d len %d\n",
                        dr->bNotificationType,
                        le16_to_cpu(dr->wIndex),
                        le16_to_cpu(dr->wLength));
                goto exit;
        }

        spin_lock_irqsave(&desc->iuspin, flags);
        responding = test_and_set_bit(WDM_RESPONDING, &desc->flags);
        if (!desc->resp_count++ && !responding
                && !test_bit(WDM_DISCONNECTING, &desc->flags)
                && !test_bit(WDM_SUSPENDING, &desc->flags)) {
                rv = usb_submit_urb(desc->response, GFP_ATOMIC);
                dev_dbg(&desc->intf->dev, "submit response URB %d\n", rv);
        }
        spin_unlock_irqrestore(&desc->iuspin, flags);
        if (rv < 0) {
                clear_bit(WDM_RESPONDING, &desc->flags);
                if (rv == -EPERM)
                        return;
                if (rv == -ENOMEM) {
sw:
                        rv = schedule_work(&desc->rxwork);
                        if (rv)
                                dev_err(&desc->intf->dev,
                                        "Cannot schedule work\n");
                }
        }
exit:
        rv = usb_submit_urb(urb, GFP_ATOMIC);
        if (rv)
                dev_err(&desc->intf->dev,
                        "%s - usb_submit_urb failed with result %d\n",
                        __func__, rv);

}

static void poison_urbs(struct wdm_device *desc)
{
        /* the order here is essential */
        usb_poison_urb(desc->command);
        usb_poison_urb(desc->validity);
        usb_poison_urb(desc->response);
}

static void unpoison_urbs(struct wdm_device *desc)
{
        /*
         *  the order here is not essential
         *  it is symmetrical just to be nice
         */
        usb_unpoison_urb(desc->response);
        usb_unpoison_urb(desc->validity);
        usb_unpoison_urb(desc->command);
}

static void free_urbs(struct wdm_device *desc)
{
        usb_free_urb(desc->validity);
        usb_free_urb(desc->response);
        usb_free_urb(desc->command);
}

static void cleanup(struct wdm_device *desc)
{
        kfree(desc->sbuf);
        kfree(desc->inbuf);
        kfree(desc->orq);
        kfree(desc->irq);
        kfree(desc->ubuf);
        free_urbs(desc);
        kfree(desc);
}

static ssize_t wdm_write
(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
        u8 *buf;
        int rv = -EMSGSIZE, r, we;
        struct wdm_device *desc = file->private_data;
        struct usb_ctrlrequest *req;

        if (count > desc->wMaxCommand)
                count = desc->wMaxCommand;

        spin_lock_irq(&desc->iuspin);
        we = desc->werr;
        desc->werr = 0;
        spin_unlock_irq(&desc->iuspin);
        if (we < 0)
                return usb_translate_errors(we);

        buf = memdup_user(buffer, count);
        if (IS_ERR(buf))
                return PTR_ERR(buf);

        /* concurrent writes and disconnect */
        r = mutex_lock_interruptible(&desc->wlock);
        rv = -ERESTARTSYS;
        if (r)
                goto out_free_mem;

        if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                rv = -ENODEV;
                goto out_free_mem_lock;
        }

        r = usb_autopm_get_interface(desc->intf);
        if (r < 0) {
                rv = usb_translate_errors(r);
                goto out_free_mem_lock;
        }

        if (!(file->f_flags & O_NONBLOCK))
                r = wait_event_interruptible(desc->wait, !test_bit(WDM_IN_USE,
                                                                &desc->flags));
        else
                if (test_bit(WDM_IN_USE, &desc->flags))
                        r = -EAGAIN;

        if (test_bit(WDM_RESETTING, &desc->flags))
                r = -EIO;

        if (test_bit(WDM_DISCONNECTING, &desc->flags))
                r = -ENODEV;

        if (r < 0) {
                rv = r;
                goto out_free_mem_pm;
        }

        req = desc->orq;
        usb_fill_control_urb(
                desc->command,
                interface_to_usbdev(desc->intf),
                /* using common endpoint 0 */
                usb_sndctrlpipe(interface_to_usbdev(desc->intf), 0),
                (unsigned char *)req,
                buf,
                count,
                wdm_out_callback,
                desc
        );

        req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS |
                             USB_RECIP_INTERFACE);
        req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND;
        req->wValue = 0;
        req->wIndex = desc->inum; /* already converted */
        req->wLength = cpu_to_le16(count);
        set_bit(WDM_IN_USE, &desc->flags);
        desc->outbuf = buf;

        rv = usb_submit_urb(desc->command, GFP_KERNEL);
        if (rv < 0) {
                desc->outbuf = NULL;
                clear_bit(WDM_IN_USE, &desc->flags);
                wake_up_all(&desc->wait); /* for wdm_wait_for_response() */
                dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv);
                rv = usb_translate_errors(rv);
                goto out_free_mem_pm;
        } else {
                dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d\n",
                        le16_to_cpu(req->wIndex));
        }

        usb_autopm_put_interface(desc->intf);
        mutex_unlock(&desc->wlock);
        return count;

out_free_mem_pm:
        usb_autopm_put_interface(desc->intf);
out_free_mem_lock:
        mutex_unlock(&desc->wlock);
out_free_mem:
        kfree(buf);
        return rv;
}

/*
 * Submit the read urb if resp_count is non-zero.
 *
 * Called with desc->iuspin locked
 */
static int service_outstanding_interrupt(struct wdm_device *desc)
{
        int rv = 0;

        /* submit read urb only if the device is waiting for it */
        if (!desc->resp_count || !--desc->resp_count)
                goto out;

        if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                rv = -ENODEV;
                goto out;
        }
        if (test_bit(WDM_RESETTING, &desc->flags)) {
                rv = -EIO;
                goto out;
        }

        set_bit(WDM_RESPONDING, &desc->flags);
        spin_unlock_irq(&desc->iuspin);
        rv = usb_submit_urb(desc->response, GFP_KERNEL);
        spin_lock_irq(&desc->iuspin);
        if (rv) {
                if (!test_bit(WDM_DISCONNECTING, &desc->flags))
                        dev_err(&desc->intf->dev,
                                "usb_submit_urb failed with result %d\n", rv);

                /* make sure the next notification trigger a submit */
                clear_bit(WDM_RESPONDING, &desc->flags);
                desc->resp_count = 0;
        }
out:
        return rv;
}

static ssize_t wdm_read
(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
        int rv, cntr;
        int i = 0;
        struct wdm_device *desc = file->private_data;


        rv = mutex_lock_interruptible(&desc->rlock); /*concurrent reads */
        if (rv < 0)
                return -ERESTARTSYS;

        cntr = READ_ONCE(desc->length);
        if (cntr == 0) {
                desc->read = 0;
retry:
                if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                        rv = -ENODEV;
                        goto err;
                }
                if (test_bit(WDM_OVERFLOW, &desc->flags)) {
                        clear_bit(WDM_OVERFLOW, &desc->flags);
                        rv = -ENOBUFS;
                        goto err;
                }
                i++;
                if (file->f_flags & O_NONBLOCK) {
                        if (!test_bit(WDM_READ, &desc->flags)) {
                                rv = -EAGAIN;
                                goto err;
                        }
                        rv = 0;
                } else {
                        rv = wait_event_interruptible(desc->wait,
                                test_bit(WDM_READ, &desc->flags));
                }

                /* may have happened while we slept */
                if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                        rv = -ENODEV;
                        goto err;
                }
                if (test_bit(WDM_RESETTING, &desc->flags)) {
                        rv = -EIO;
                        goto err;
                }
                usb_mark_last_busy(interface_to_usbdev(desc->intf));
                if (rv < 0) {
                        rv = -ERESTARTSYS;
                        goto err;
                }

                spin_lock_irq(&desc->iuspin);

                if (desc->rerr) { /* read completed, error happened */
                        rv = usb_translate_errors(desc->rerr);
                        desc->rerr = 0;
                        spin_unlock_irq(&desc->iuspin);
                        goto err;
                }
                /*
                 * recheck whether we've lost the race
                 * against the completion handler
                 */
                if (!test_bit(WDM_READ, &desc->flags)) { /* lost race */
                        spin_unlock_irq(&desc->iuspin);
                        goto retry;
                }

                if (!desc->reslength) { /* zero length read */
                        dev_dbg(&desc->intf->dev, "zero length - clearing WDM_READ\n");
                        clear_bit(WDM_READ, &desc->flags);
                        rv = service_outstanding_interrupt(desc);
                        spin_unlock_irq(&desc->iuspin);
                        if (rv < 0)
                                goto err;
                        goto retry;
                }
                cntr = desc->length;
                spin_unlock_irq(&desc->iuspin);
        }

        if (cntr > count)
                cntr = count;
        rv = copy_to_user(buffer, desc->ubuf, cntr);
        if (rv > 0) {
                rv = -EFAULT;
                goto err;
        }

        spin_lock_irq(&desc->iuspin);

        for (i = 0; i < desc->length - cntr; i++)
                desc->ubuf[i] = desc->ubuf[i + cntr];

        desc->length -= cntr;
        /* in case we had outstanding data */
        if (!desc->length) {
                clear_bit(WDM_READ, &desc->flags);
                service_outstanding_interrupt(desc);
        }
        spin_unlock_irq(&desc->iuspin);
        rv = cntr;

err:
        mutex_unlock(&desc->rlock);
        return rv;
}

static int wdm_wait_for_response(struct file *file, long timeout)
{
        struct wdm_device *desc = file->private_data;
        long rv; /* Use long here because (int) MAX_SCHEDULE_TIMEOUT < 0. */

        /*
         * Needs both flags. We cannot do with one because resetting it would
         * cause a race with write() yet we need to signal a disconnect.
         */
        rv = wait_event_interruptible_timeout(desc->wait,
                              !test_bit(WDM_IN_USE, &desc->flags) ||
                              test_bit(WDM_DISCONNECTING, &desc->flags),
                              timeout);

        /*
         * To report the correct error. This is best effort.
         * We are inevitably racing with the hardware.
         */
        if (test_bit(WDM_DISCONNECTING, &desc->flags))
                return -ENODEV;
        if (!rv)
                return -EIO;
        if (rv < 0)
                return -EINTR;

        spin_lock_irq(&desc->iuspin);
        rv = desc->werr;
        desc->werr = 0;
        spin_unlock_irq(&desc->iuspin);

        return usb_translate_errors(rv);

}

/*
 * You need to send a signal when you react to malicious or defective hardware.
 * Also, don't abort when fsync() returned -EINVAL, for older kernels which do
 * not implement wdm_flush() will return -EINVAL.
 */
static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
        return wdm_wait_for_response(file, MAX_SCHEDULE_TIMEOUT);
}

/*
 * Same with wdm_fsync(), except it uses finite timeout in order to react to
 * malicious or defective hardware which ceased communication after close() was
 * implicitly called due to process termination.
 */
static int wdm_flush(struct file *file, fl_owner_t id)
{
        return wdm_wait_for_response(file, WDM_FLUSH_TIMEOUT);
}

static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
{
        struct wdm_device *desc = file->private_data;
        unsigned long flags;
        __poll_t mask = 0;

        spin_lock_irqsave(&desc->iuspin, flags);
        if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                mask = EPOLLHUP | EPOLLERR;
                spin_unlock_irqrestore(&desc->iuspin, flags);
                goto desc_out;
        }
        if (test_bit(WDM_READ, &desc->flags))
                mask = EPOLLIN | EPOLLRDNORM;
        if (desc->rerr || desc->werr)
                mask |= EPOLLERR;
        if (!test_bit(WDM_IN_USE, &desc->flags))
                mask |= EPOLLOUT | EPOLLWRNORM;
        spin_unlock_irqrestore(&desc->iuspin, flags);

        poll_wait(file, &desc->wait, wait);

desc_out:
        return mask;
}

static int wdm_open(struct inode *inode, struct file *file)
{
        int minor = iminor(inode);
        int rv = -ENODEV;
        struct usb_interface *intf;
        struct wdm_device *desc;

        mutex_lock(&wdm_mutex);
        desc = wdm_find_device_by_minor(minor);
        if (!desc)
                goto out;

        intf = desc->intf;
        if (test_bit(WDM_DISCONNECTING, &desc->flags))
                goto out;
        file->private_data = desc;

        if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) {
                rv = -EBUSY;
                goto out;
        }

        rv = usb_autopm_get_interface(desc->intf);
        if (rv < 0) {
                dev_err(&desc->intf->dev, "Error autopm - %d\n", rv);
                goto out;
        }

        /* using write lock to protect desc->count */
        mutex_lock(&desc->wlock);
        if (!desc->count++) {
                desc->werr = 0;
                desc->rerr = 0;
                rv = usb_submit_urb(desc->validity, GFP_KERNEL);
                if (rv < 0) {
                        desc->count--;
                        dev_err(&desc->intf->dev,
                                "Error submitting int urb - %d\n", rv);
                        rv = usb_translate_errors(rv);
                }
        } else {
                rv = 0;
        }
        mutex_unlock(&desc->wlock);
        if (desc->count == 1)
                desc->manage_power(intf, 1);
        usb_autopm_put_interface(desc->intf);
out:
        mutex_unlock(&wdm_mutex);
        return rv;
}

static int wdm_release(struct inode *inode, struct file *file)
{
        struct wdm_device *desc = file->private_data;

        mutex_lock(&wdm_mutex);

        /* using write lock to protect desc->count */
        mutex_lock(&desc->wlock);
        desc->count--;
        mutex_unlock(&desc->wlock);

        if (!desc->count) {
                if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
                        dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
                        poison_urbs(desc);
                        spin_lock_irq(&desc->iuspin);
                        desc->resp_count = 0;
                        clear_bit(WDM_RESPONDING, &desc->flags);
                        spin_unlock_irq(&desc->iuspin);
                        desc->manage_power(desc->intf, 0);
                        unpoison_urbs(desc);
                } else {
                        /* must avoid dev_printk here as desc->intf is invalid */
                        pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
                        cleanup(desc);
                }
        }
        mutex_unlock(&wdm_mutex);
        return 0;
}

static long wdm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct wdm_device *desc = file->private_data;
        int rv = 0;

        switch (cmd) {
        case IOCTL_WDM_MAX_COMMAND:
                if (copy_to_user((void __user *)arg, &desc->wMaxCommand, sizeof(desc->wMaxCommand)))
                        rv = -EFAULT;
                break;
        default:
                rv = -ENOTTY;
        }
        return rv;
}

static const struct file_operations wdm_fops = {
        .owner =        THIS_MODULE,
        .read =                wdm_read,
        .write =        wdm_write,
        .fsync =        wdm_fsync,
        .open =                wdm_open,
        .flush =        wdm_flush,
        .release =        wdm_release,
        .poll =                wdm_poll,
        .unlocked_ioctl = wdm_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .llseek =        noop_llseek,
};

static struct usb_class_driver wdm_class = {
        .name =                "cdc-wdm%d",
        .fops =                &wdm_fops,
        .minor_base =        WDM_MINOR_BASE,
};

/* --- WWAN framework integration --- */
#ifdef CONFIG_WWAN
static int wdm_wwan_port_start(struct wwan_port *port)
{
        struct wdm_device *desc = wwan_port_get_drvdata(port);

        /* The interface is both exposed via the WWAN framework and as a
         * legacy usbmisc chardev. If chardev is already open, just fail
         * to prevent concurrent usage. Otherwise, switch to WWAN mode.
         */
        mutex_lock(&wdm_mutex);
        if (desc->count) {
                mutex_unlock(&wdm_mutex);
                return -EBUSY;
        }
        set_bit(WDM_WWAN_IN_USE, &desc->flags);
        mutex_unlock(&wdm_mutex);

        desc->manage_power(desc->intf, 1);

        /* tx is allowed */
        wwan_port_txon(port);

        /* Start getting events */
        return usb_submit_urb(desc->validity, GFP_KERNEL);
}

static void wdm_wwan_port_stop(struct wwan_port *port)
{
        struct wdm_device *desc = wwan_port_get_drvdata(port);

        /* Stop all transfers and disable WWAN mode */
        poison_urbs(desc);
        desc->manage_power(desc->intf, 0);
        clear_bit(WDM_READ, &desc->flags);
        clear_bit(WDM_WWAN_IN_USE, &desc->flags);
        unpoison_urbs(desc);
}

static void wdm_wwan_port_tx_complete(struct urb *urb)
{
        struct sk_buff *skb = urb->context;
        struct wdm_device *desc = skb_shinfo(skb)->destructor_arg;

        usb_autopm_put_interface(desc->intf);
        wwan_port_txon(desc->wwanp);
        kfree_skb(skb);
}

static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb)
{
        struct wdm_device *desc = wwan_port_get_drvdata(port);
        struct usb_interface *intf = desc->intf;
        struct usb_ctrlrequest *req = desc->orq;
        int rv;

        rv = usb_autopm_get_interface(intf);
        if (rv)
                return rv;

        usb_fill_control_urb(
                desc->command,
                interface_to_usbdev(intf),
                usb_sndctrlpipe(interface_to_usbdev(intf), 0),
                (unsigned char *)req,
                skb->data,
                skb->len,
                wdm_wwan_port_tx_complete,
                skb
        );

        req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE);
        req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND;
        req->wValue = 0;
        req->wIndex = desc->inum;
        req->wLength = cpu_to_le16(skb->len);

        skb_shinfo(skb)->destructor_arg = desc;

        rv = usb_submit_urb(desc->command, GFP_KERNEL);
        if (rv)
                usb_autopm_put_interface(intf);
        else /* One transfer at a time, stop TX until URB completion */
                wwan_port_txoff(port);

        return rv;
}

static const struct wwan_port_ops wdm_wwan_port_ops = {
        .start = wdm_wwan_port_start,
        .stop = wdm_wwan_port_stop,
        .tx = wdm_wwan_port_tx,
};

static void wdm_wwan_init(struct wdm_device *desc)
{
        struct usb_interface *intf = desc->intf;
        struct wwan_port *port;

        /* Only register to WWAN core if protocol/type is known */
        if (desc->wwanp_type == WWAN_PORT_UNKNOWN) {
                dev_info(&intf->dev, "Unknown control protocol\n");
                return;
        }

        port = wwan_create_port(&intf->dev, desc->wwanp_type, &wdm_wwan_port_ops,
                                NULL, desc);
        if (IS_ERR(port)) {
                dev_err(&intf->dev, "%s: Unable to create WWAN port\n",
                        dev_name(intf->usb_dev));
                return;
        }

        desc->wwanp = port;
}

static void wdm_wwan_deinit(struct wdm_device *desc)
{
        if (!desc->wwanp)
                return;

        wwan_remove_port(desc->wwanp);
        desc->wwanp = NULL;
}

static void wdm_wwan_rx(struct wdm_device *desc, int length)
{
        struct wwan_port *port = desc->wwanp;
        struct sk_buff *skb;

        /* Forward data to WWAN port */
        skb = alloc_skb(length, GFP_ATOMIC);
        if (!skb)
                return;

        skb_put_data(skb, desc->inbuf, length);
        wwan_port_rx(port, skb);

        /* inbuf has been copied, it is safe to check for outstanding data */
        schedule_work(&desc->service_outs_intr);
}
#else /* CONFIG_WWAN */
static void wdm_wwan_init(struct wdm_device *desc) {}
static void wdm_wwan_deinit(struct wdm_device *desc) {}
static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
#endif /* CONFIG_WWAN */

/* --- error handling --- */
static void wdm_rxwork(struct work_struct *work)
{
        struct wdm_device *desc = container_of(work, struct wdm_device, rxwork);
        unsigned long flags;
        int rv = 0;
        int responding;

        spin_lock_irqsave(&desc->iuspin, flags);
        if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
                spin_unlock_irqrestore(&desc->iuspin, flags);
        } else {
                responding = test_and_set_bit(WDM_RESPONDING, &desc->flags);
                spin_unlock_irqrestore(&desc->iuspin, flags);
                if (!responding)
                        rv = usb_submit_urb(desc->response, GFP_KERNEL);
                if (rv < 0 && rv != -EPERM) {
                        spin_lock_irqsave(&desc->iuspin, flags);
                        clear_bit(WDM_RESPONDING, &desc->flags);
                        if (!test_bit(WDM_DISCONNECTING, &desc->flags))
                                schedule_work(&desc->rxwork);
                        spin_unlock_irqrestore(&desc->iuspin, flags);
                }
        }
}

static void service_interrupt_work(struct work_struct *work)
{
        struct wdm_device *desc;

        desc = container_of(work, struct wdm_device, service_outs_intr);

        spin_lock_irq(&desc->iuspin);
        service_outstanding_interrupt(desc);
        if (!desc->resp_count) {
                set_bit(WDM_READ, &desc->flags);
                wake_up(&desc->wait);
        }
        spin_unlock_irq(&desc->iuspin);
}

/* --- hotplug --- */

static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep,
                      u16 bufsize, enum wwan_port_type type,
                      int (*manage_power)(struct usb_interface *, int))
{
        int rv = -ENOMEM;
        struct wdm_device *desc;

        desc = kzalloc(sizeof(struct wdm_device), GFP_KERNEL);
        if (!desc)
                goto out;
        INIT_LIST_HEAD(&desc->device_list);
        mutex_init(&desc->rlock);
        mutex_init(&desc->wlock);
        spin_lock_init(&desc->iuspin);
        init_waitqueue_head(&desc->wait);
        desc->wMaxCommand = bufsize;
        /* this will be expanded and needed in hardware endianness */
        desc->inum = cpu_to_le16((u16)intf->cur_altsetting->desc.bInterfaceNumber);
        desc->intf = intf;
        desc->wwanp_type = type;
        INIT_WORK(&desc->rxwork, wdm_rxwork);
        INIT_WORK(&desc->service_outs_intr, service_interrupt_work);

        if (!usb_endpoint_is_int_in(ep)) {
                rv = -EINVAL;
                goto err;
        }

        desc->wMaxPacketSize = usb_endpoint_maxp(ep);

        desc->orq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
        if (!desc->orq)
                goto err;
        desc->irq = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
        if (!desc->irq)
                goto err;

        desc->validity = usb_alloc_urb(0, GFP_KERNEL);
        if (!desc->validity)
                goto err;

        desc->response = usb_alloc_urb(0, GFP_KERNEL);
        if (!desc->response)
                goto err;

        desc->command = usb_alloc_urb(0, GFP_KERNEL);
        if (!desc->command)
                goto err;

        desc->ubuf = kmalloc(desc->wMaxCommand, GFP_KERNEL);
        if (!desc->ubuf)
                goto err;

        desc->sbuf = kmalloc(desc->wMaxPacketSize, GFP_KERNEL);
        if (!desc->sbuf)
                goto err;

        desc->inbuf = kmalloc(desc->wMaxCommand, GFP_KERNEL);
        if (!desc->inbuf)
                goto err;

        usb_fill_int_urb(
                desc->validity,
                interface_to_usbdev(intf),
                usb_rcvintpipe(interface_to_usbdev(intf), ep->bEndpointAddress),
                desc->sbuf,
                desc->wMaxPacketSize,
                wdm_int_callback,
                desc,
                ep->bInterval
        );

        desc->irq->bRequestType = (USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE);
        desc->irq->bRequest = USB_CDC_GET_ENCAPSULATED_RESPONSE;
        desc->irq->wValue = 0;
        desc->irq->wIndex = desc->inum; /* already converted */
        desc->irq->wLength = cpu_to_le16(desc->wMaxCommand);

        usb_fill_control_urb(
                desc->response,
                interface_to_usbdev(intf),
                /* using common endpoint 0 */
                usb_rcvctrlpipe(interface_to_usbdev(desc->intf), 0),
                (unsigned char *)desc->irq,
                desc->inbuf,
                desc->wMaxCommand,
                wdm_in_callback,
                desc
        );

        desc->manage_power = manage_power;

        spin_lock(&wdm_device_list_lock);
        list_add(&desc->device_list, &wdm_device_list);
        spin_unlock(&wdm_device_list_lock);

        rv = usb_register_dev(intf, &wdm_class);
        if (rv < 0)
                goto err;
        else
                dev_info(&intf->dev, "%s: USB WDM device\n", dev_name(intf->usb_dev));

        wdm_wwan_init(desc);

out:
        return rv;
err:
        spin_lock(&wdm_device_list_lock);
        list_del(&desc->device_list);
        spin_unlock(&wdm_device_list_lock);
        cleanup(desc);
        return rv;
}

static int wdm_manage_power(struct usb_interface *intf, int on)
{
        /* need autopm_get/put here to ensure the usbcore sees the new value */
        int rv = usb_autopm_get_interface(intf);

        intf->needs_remote_wakeup = on;
        if (!rv)
                usb_autopm_put_interface(intf);
        return 0;
}

static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        int rv = -EINVAL;
        struct usb_host_interface *iface;
        struct usb_endpoint_descriptor *ep;
        struct usb_cdc_parsed_header hdr;
        u8 *buffer = intf->altsetting->extra;
        int buflen = intf->altsetting->extralen;
        u16 maxcom = WDM_DEFAULT_BUFSIZE;

        if (!buffer)
                goto err;

        cdc_parse_cdc_header(&hdr, intf, buffer, buflen);

        if (hdr.usb_cdc_dmm_desc)
                maxcom = le16_to_cpu(hdr.usb_cdc_dmm_desc->wMaxCommand);

        iface = intf->cur_altsetting;
        if (iface->desc.bNumEndpoints != 1)
                goto err;
        ep = &iface->endpoint[0].desc;

        rv = wdm_create(intf, ep, maxcom, WWAN_PORT_UNKNOWN, &wdm_manage_power);

err:
        return rv;
}

/**
 * usb_cdc_wdm_register - register a WDM subdriver
 * @intf: usb interface the subdriver will associate with
 * @ep: interrupt endpoint to monitor for notifications
 * @bufsize: maximum message size to support for read/write
 * @type: Type/protocol of the transported data (MBIM, QMI...)
 * @manage_power: call-back invoked during open and release to
 *                manage the device's power
 * Create WDM usb class character device and associate it with intf
 * without binding, allowing another driver to manage the interface.
 *
 * The subdriver will manage the given interrupt endpoint exclusively
 * and will issue control requests referring to the given intf. It
 * will otherwise avoid interferring, and in particular not do
 * usb_set_intfdata/usb_get_intfdata on intf.
 *
 * The return value is a pointer to the subdriver's struct usb_driver.
 * The registering driver is responsible for calling this subdriver's
 * disconnect, suspend, resume, pre_reset and post_reset methods from
 * its own.
 */
struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
                                        struct usb_endpoint_descriptor *ep,
                                        int bufsize, enum wwan_port_type type,
                                        int (*manage_power)(struct usb_interface *, int))
{
        int rv;

        rv = wdm_create(intf, ep, bufsize, type, manage_power);
        if (rv < 0)
                goto err;

        return &wdm_driver;
err:
        return ERR_PTR(rv);
}
EXPORT_SYMBOL(usb_cdc_wdm_register);

static void wdm_disconnect(struct usb_interface *intf)
{
        struct wdm_device *desc;
        unsigned long flags;

        usb_deregister_dev(intf, &wdm_class);
        desc = wdm_find_device(intf);
        mutex_lock(&wdm_mutex);

        wdm_wwan_deinit(desc);

        /* the spinlock makes sure no new urbs are generated in the callbacks */
        spin_lock_irqsave(&desc->iuspin, flags);
        set_bit(WDM_DISCONNECTING, &desc->flags);
        set_bit(WDM_READ, &desc->flags);
        spin_unlock_irqrestore(&desc->iuspin, flags);
        wake_up_all(&desc->wait);
        mutex_lock(&desc->rlock);
        mutex_lock(&desc->wlock);
        poison_urbs(desc);
        cancel_work_sync(&desc->rxwork);
        cancel_work_sync(&desc->service_outs_intr);
        mutex_unlock(&desc->wlock);
        mutex_unlock(&desc->rlock);

        /* the desc->intf pointer used as list key is now invalid */
        spin_lock(&wdm_device_list_lock);
        list_del(&desc->device_list);
        spin_unlock(&wdm_device_list_lock);

        if (!desc->count)
                cleanup(desc);
        else
                dev_dbg(&intf->dev, "%d open files - postponing cleanup\n", desc->count);
        mutex_unlock(&wdm_mutex);
}

#ifdef CONFIG_PM
static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct wdm_device *desc = wdm_find_device(intf);
        int rv = 0;

        dev_dbg(&desc->intf->dev, "wdm%d_suspend\n", intf->minor);

        /* if this is an autosuspend the caller does the locking */
        if (!PMSG_IS_AUTO(message)) {
                mutex_lock(&desc->rlock);
                mutex_lock(&desc->wlock);
        }
        spin_lock_irq(&desc->iuspin);

        if (PMSG_IS_AUTO(message) &&
                        (test_bit(WDM_IN_USE, &desc->flags)
                        || test_bit(WDM_RESPONDING, &desc->flags))) {
                spin_unlock_irq(&desc->iuspin);
                rv = -EBUSY;
        } else {

                set_bit(WDM_SUSPENDING, &desc->flags);
                spin_unlock_irq(&desc->iuspin);
                /* callback submits work - order is essential */
                poison_urbs(desc);
                cancel_work_sync(&desc->rxwork);
                cancel_work_sync(&desc->service_outs_intr);
                unpoison_urbs(desc);
        }
        if (!PMSG_IS_AUTO(message)) {
                mutex_unlock(&desc->wlock);
                mutex_unlock(&desc->rlock);
        }

        return rv;
}
#endif

static int recover_from_urb_loss(struct wdm_device *desc)
{
        int rv = 0;

        if (desc->count) {
                rv = usb_submit_urb(desc->validity, GFP_NOIO);
                if (rv < 0)
                        dev_err(&desc->intf->dev,
                                "Error resume submitting int urb - %d\n", rv);
        }
        return rv;
}

#ifdef CONFIG_PM
static int wdm_resume(struct usb_interface *intf)
{
        struct wdm_device *desc = wdm_find_device(intf);
        int rv;

        dev_dbg(&desc->intf->dev, "wdm%d_resume\n", intf->minor);

        clear_bit(WDM_SUSPENDING, &desc->flags);
        rv = recover_from_urb_loss(desc);

        return rv;
}
#endif

static int wdm_pre_reset(struct usb_interface *intf)
{
        struct wdm_device *desc = wdm_find_device(intf);

        /*
         * we notify everybody using poll of
         * an exceptional situation
         * must be done before recovery lest a spontaneous
         * message from the device is lost
         */
        spin_lock_irq(&desc->iuspin);
        set_bit(WDM_RESETTING, &desc->flags);        /* inform read/write */
        set_bit(WDM_READ, &desc->flags);        /* unblock read */
        clear_bit(WDM_IN_USE, &desc->flags);        /* unblock write */
        desc->rerr = -EINTR;
        spin_unlock_irq(&desc->iuspin);
        wake_up_all(&desc->wait);
        mutex_lock(&desc->rlock);
        mutex_lock(&desc->wlock);
        poison_urbs(desc);
        cancel_work_sync(&desc->rxwork);
        cancel_work_sync(&desc->service_outs_intr);
        return 0;
}

static int wdm_post_reset(struct usb_interface *intf)
{
        struct wdm_device *desc = wdm_find_device(intf);
        int rv;

        unpoison_urbs(desc);
        clear_bit(WDM_OVERFLOW, &desc->flags);
        clear_bit(WDM_RESETTING, &desc->flags);
        rv = recover_from_urb_loss(desc);
        mutex_unlock(&desc->wlock);
        mutex_unlock(&desc->rlock);
        return rv;
}

static struct usb_driver wdm_driver = {
        .name =                "cdc_wdm",
        .probe =        wdm_probe,
        .disconnect =        wdm_disconnect,
#ifdef CONFIG_PM
        .suspend =        wdm_suspend,
        .resume =        wdm_resume,
        .reset_resume =        wdm_resume,
#endif
        .pre_reset =        wdm_pre_reset,
        .post_reset =        wdm_post_reset,
        .id_table =        wdm_ids,
        .supports_autosuspend = 1,
        .disable_hub_initiated_lpm = 1,
};

module_usb_driver(wdm_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
























































































































































































    2 





    1 




    1 













































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Marvell NFC driver: major functions
 *
 * Copyright (C) 2014-2015 Marvell International Ltd.
 */

#include <linux/module.h>
#include <linux/gpio.h>
#include <linux/delay.h>
#include <linux/of_gpio.h>
#include <linux/nfc.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include "nfcmrvl.h"

static int nfcmrvl_nci_open(struct nci_dev *ndev)
{
        struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
        int err;

        if (test_and_set_bit(NFCMRVL_NCI_RUNNING, &priv->flags))
                return 0;

        /* Reset possible fault of previous session */
        clear_bit(NFCMRVL_PHY_ERROR, &priv->flags);

        err = priv->if_ops->nci_open(priv);

        if (err)
                clear_bit(NFCMRVL_NCI_RUNNING, &priv->flags);

        return err;
}

static int nfcmrvl_nci_close(struct nci_dev *ndev)
{
        struct nfcmrvl_private *priv = nci_get_drvdata(ndev);

        if (!test_and_clear_bit(NFCMRVL_NCI_RUNNING, &priv->flags))
                return 0;

        priv->if_ops->nci_close(priv);

        return 0;
}

static int nfcmrvl_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
{
        struct nfcmrvl_private *priv = nci_get_drvdata(ndev);

        nfc_info(priv->dev, "send entry, len %d\n", skb->len);

        skb->dev = (void *)ndev;

        if (priv->config.hci_muxed) {
                unsigned char *hdr;
                unsigned char len = skb->len;

                hdr = skb_push(skb, NFCMRVL_HCI_EVENT_HEADER_SIZE);
                hdr[0] = NFCMRVL_HCI_COMMAND_CODE;
                hdr[1] = NFCMRVL_HCI_OGF;
                hdr[2] = NFCMRVL_HCI_OCF;
                hdr[3] = len;
        }

        return priv->if_ops->nci_send(priv, skb);
}

static int nfcmrvl_nci_setup(struct nci_dev *ndev)
{
        __u8 val = 1;

        nci_set_config(ndev, NFCMRVL_PB_BAIL_OUT, 1, &val);
        return 0;
}

static int nfcmrvl_nci_fw_download(struct nci_dev *ndev,
                                   const char *firmware_name)
{
        return nfcmrvl_fw_dnld_start(ndev, firmware_name);
}

static const struct nci_ops nfcmrvl_nci_ops = {
        .open = nfcmrvl_nci_open,
        .close = nfcmrvl_nci_close,
        .send = nfcmrvl_nci_send,
        .setup = nfcmrvl_nci_setup,
        .fw_download = nfcmrvl_nci_fw_download,
};

struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
                                void *drv_data,
                                const struct nfcmrvl_if_ops *ops,
                                struct device *dev,
                                const struct nfcmrvl_platform_data *pdata)
{
        struct nfcmrvl_private *priv;
        int rc;
        int headroom;
        int tailroom;
        u32 protocols;

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return ERR_PTR(-ENOMEM);

        priv->drv_data = drv_data;
        priv->if_ops = ops;
        priv->dev = dev;
        priv->phy = phy;

        memcpy(&priv->config, pdata, sizeof(*pdata));

        if (gpio_is_valid(priv->config.reset_n_io)) {
                rc = gpio_request_one(priv->config.reset_n_io,
                                      GPIOF_OUT_INIT_LOW,
                                      "nfcmrvl_reset_n");
                if (rc < 0) {
                        priv->config.reset_n_io = -EINVAL;
                        nfc_err(dev, "failed to request reset_n io\n");
                }
        }

        if (phy == NFCMRVL_PHY_SPI) {
                headroom = NCI_SPI_HDR_LEN;
                tailroom = 1;
        } else
                headroom = tailroom = 0;

        if (priv->config.hci_muxed)
                headroom += NFCMRVL_HCI_EVENT_HEADER_SIZE;

        protocols = NFC_PROTO_JEWEL_MASK
                | NFC_PROTO_MIFARE_MASK
                | NFC_PROTO_FELICA_MASK
                | NFC_PROTO_ISO14443_MASK
                | NFC_PROTO_ISO14443_B_MASK
                | NFC_PROTO_ISO15693_MASK
                | NFC_PROTO_NFC_DEP_MASK;

        priv->ndev = nci_allocate_device(&nfcmrvl_nci_ops, protocols,
                                         headroom, tailroom);
        if (!priv->ndev) {
                nfc_err(dev, "nci_allocate_device failed\n");
                rc = -ENOMEM;
                goto error_free_gpio;
        }

        rc = nfcmrvl_fw_dnld_init(priv);
        if (rc) {
                nfc_err(dev, "failed to initialize FW download %d\n", rc);
                goto error_free_dev;
        }

        nci_set_drvdata(priv->ndev, priv);

        rc = nci_register_device(priv->ndev);
        if (rc) {
                nfc_err(dev, "nci_register_device failed %d\n", rc);
                goto error_fw_dnld_deinit;
        }

        /* Ensure that controller is powered off */
        nfcmrvl_chip_halt(priv);

        nfc_info(dev, "registered with nci successfully\n");
        return priv;

error_fw_dnld_deinit:
        nfcmrvl_fw_dnld_deinit(priv);
error_free_dev:
        nci_free_device(priv->ndev);
error_free_gpio:
        if (gpio_is_valid(priv->config.reset_n_io))
                gpio_free(priv->config.reset_n_io);
        kfree(priv);
        return ERR_PTR(rc);
}
EXPORT_SYMBOL_GPL(nfcmrvl_nci_register_dev);

void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
{
        struct nci_dev *ndev = priv->ndev;

        nci_unregister_device(ndev);
        if (priv->ndev->nfc_dev->fw_download_in_progress)
                nfcmrvl_fw_dnld_abort(priv);

        nfcmrvl_fw_dnld_deinit(priv);

        if (gpio_is_valid(priv->config.reset_n_io))
                gpio_free(priv->config.reset_n_io);

        nci_free_device(ndev);
        kfree(priv);
}
EXPORT_SYMBOL_GPL(nfcmrvl_nci_unregister_dev);

int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb)
{
        if (priv->config.hci_muxed) {
                if (skb->data[0] == NFCMRVL_HCI_EVENT_CODE &&
                    skb->data[1] == NFCMRVL_HCI_NFC_EVENT_CODE) {
                        /* Data packet, let's extract NCI payload */
                        skb_pull(skb, NFCMRVL_HCI_EVENT_HEADER_SIZE);
                } else {
                        /* Skip this packet */
                        kfree_skb(skb);
                        return 0;
                }
        }

        if (priv->ndev->nfc_dev->fw_download_in_progress) {
                nfcmrvl_fw_dnld_recv_frame(priv, skb);
                return 0;
        }

        if (test_bit(NFCMRVL_NCI_RUNNING, &priv->flags))
                nci_recv_frame(priv->ndev, skb);
        else {
                /* Drop this packet since nobody wants it */
                kfree_skb(skb);
                return 0;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(nfcmrvl_nci_recv_frame);

void nfcmrvl_chip_reset(struct nfcmrvl_private *priv)
{
        /* Reset possible fault of previous session */
        clear_bit(NFCMRVL_PHY_ERROR, &priv->flags);

        if (gpio_is_valid(priv->config.reset_n_io)) {
                nfc_info(priv->dev, "reset the chip\n");
                gpio_set_value(priv->config.reset_n_io, 0);
                usleep_range(5000, 10000);
                gpio_set_value(priv->config.reset_n_io, 1);
        } else
                nfc_info(priv->dev, "no reset available on this interface\n");
}

void nfcmrvl_chip_halt(struct nfcmrvl_private *priv)
{
        if (gpio_is_valid(priv->config.reset_n_io))
                gpio_set_value(priv->config.reset_n_io, 0);
}

int nfcmrvl_parse_dt(struct device_node *node,
                     struct nfcmrvl_platform_data *pdata)
{
        int reset_n_io;

        reset_n_io = of_get_named_gpio(node, "reset-n-io", 0);
        if (reset_n_io < 0) {
                pr_info("no reset-n-io config\n");
        } else if (!gpio_is_valid(reset_n_io)) {
                pr_err("invalid reset-n-io GPIO\n");
                return reset_n_io;
        }
        pdata->reset_n_io = reset_n_io;
        pdata->hci_muxed = of_property_read_bool(node, "hci-muxed");

        return 0;
}
EXPORT_SYMBOL_GPL(nfcmrvl_parse_dt);

MODULE_AUTHOR("Marvell International Ltd.");
MODULE_DESCRIPTION("Marvell NFC driver");
MODULE_LICENSE("GPL v2");










































































































































































































































































































































































































































































































































    1 

































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Backlight Lowlevel Control Abstraction
 *
 * Copyright (C) 2003,2004 Hewlett-Packard Company
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/backlight.h>
#include <linux/notifier.h>
#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/fb.h>
#include <linux/slab.h>

#ifdef CONFIG_PMAC_BACKLIGHT
#include <asm/backlight.h>
#endif

/**
 * DOC: overview
 *
 * The backlight core supports implementing backlight drivers.
 *
 * A backlight driver registers a driver using
 * devm_backlight_device_register(). The properties of the backlight
 * driver such as type and max_brightness must be specified.
 * When the core detect changes in for example brightness or power state
 * the update_status() operation is called. The backlight driver shall
 * implement this operation and use it to adjust backlight.
 *
 * Several sysfs attributes are provided by the backlight core::
 *
 * - brightness         R/W, set the requested brightness level
 * - actual_brightness  RO, the brightness level used by the HW
 * - max_brightness     RO, the maximum  brightness level supported
 *
 * See Documentation/ABI/stable/sysfs-class-backlight for the full list.
 *
 * The backlight can be adjusted using the sysfs interface, and
 * the backlight driver may also support adjusting backlight using
 * a hot-key or some other platform or firmware specific way.
 *
 * The driver must implement the get_brightness() operation if
 * the HW do not support all the levels that can be specified in
 * brightness, thus providing user-space access to the actual level
 * via the actual_brightness attribute.
 *
 * When the backlight changes this is reported to user-space using
 * an uevent connected to the actual_brightness attribute.
 * When brightness is set by platform specific means, for example
 * a hot-key to adjust backlight, the driver must notify the backlight
 * core that brightness has changed using backlight_force_update().
 *
 * The backlight driver core receives notifications from fbdev and
 * if the event is FB_EVENT_BLANK and if the value of blank, from the
 * FBIOBLANK ioctrl, results in a change in the backlight state the
 * update_status() operation is called.
 */

static struct list_head backlight_dev_list;
static struct mutex backlight_dev_list_mutex;
static struct blocking_notifier_head backlight_notifier;

static const char *const backlight_types[] = {
        [BACKLIGHT_RAW] = "raw",
        [BACKLIGHT_PLATFORM] = "platform",
        [BACKLIGHT_FIRMWARE] = "firmware",
};

static const char *const backlight_scale_types[] = {
        [BACKLIGHT_SCALE_UNKNOWN]        = "unknown",
        [BACKLIGHT_SCALE_LINEAR]        = "linear",
        [BACKLIGHT_SCALE_NON_LINEAR]        = "non-linear",
};

#if defined(CONFIG_FB_CORE) || (defined(CONFIG_FB_CORE_MODULE) && \
                                defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE))
/*
 * fb_notifier_callback
 *
 * This callback gets called when something important happens inside a
 * framebuffer driver. The backlight core only cares about FB_BLANK_UNBLANK
 * which is reported to the driver using backlight_update_status()
 * as a state change.
 *
 * There may be several fbdev's connected to the backlight device,
 * in which case they are kept track of. A state change is only reported
 * if there is a change in backlight for the specified fbdev.
 */
static int fb_notifier_callback(struct notifier_block *self,
                                unsigned long event, void *data)
{
        struct backlight_device *bd;
        struct fb_event *evdata = data;
        int node = evdata->info->node;
        int fb_blank = 0;

        /* If we aren't interested in this event, skip it immediately ... */
        if (event != FB_EVENT_BLANK)
                return 0;

        bd = container_of(self, struct backlight_device, fb_notif);
        mutex_lock(&bd->ops_lock);

        if (!bd->ops)
                goto out;
        if (bd->ops->check_fb && !bd->ops->check_fb(bd, evdata->info))
                goto out;

        fb_blank = *(int *)evdata->data;
        if (fb_blank == FB_BLANK_UNBLANK && !bd->fb_bl_on[node]) {
                bd->fb_bl_on[node] = true;
                if (!bd->use_count++) {
                        bd->props.state &= ~BL_CORE_FBBLANK;
                        bd->props.fb_blank = FB_BLANK_UNBLANK;
                        backlight_update_status(bd);
                }
        } else if (fb_blank != FB_BLANK_UNBLANK && bd->fb_bl_on[node]) {
                bd->fb_bl_on[node] = false;
                if (!(--bd->use_count)) {
                        bd->props.state |= BL_CORE_FBBLANK;
                        bd->props.fb_blank = fb_blank;
                        backlight_update_status(bd);
                }
        }
out:
        mutex_unlock(&bd->ops_lock);
        return 0;
}

static int backlight_register_fb(struct backlight_device *bd)
{
        memset(&bd->fb_notif, 0, sizeof(bd->fb_notif));
        bd->fb_notif.notifier_call = fb_notifier_callback;

        return fb_register_client(&bd->fb_notif);
}

static void backlight_unregister_fb(struct backlight_device *bd)
{
        fb_unregister_client(&bd->fb_notif);
}
#else
static inline int backlight_register_fb(struct backlight_device *bd)
{
        return 0;
}

static inline void backlight_unregister_fb(struct backlight_device *bd)
{
}
#endif /* CONFIG_FB_CORE */

static void backlight_generate_event(struct backlight_device *bd,
                                     enum backlight_update_reason reason)
{
        char *envp[2];

        switch (reason) {
        case BACKLIGHT_UPDATE_SYSFS:
                envp[0] = "SOURCE=sysfs";
                break;
        case BACKLIGHT_UPDATE_HOTKEY:
                envp[0] = "SOURCE=hotkey";
                break;
        default:
                envp[0] = "SOURCE=unknown";
                break;
        }
        envp[1] = NULL;
        kobject_uevent_env(&bd->dev.kobj, KOBJ_CHANGE, envp);
        sysfs_notify(&bd->dev.kobj, NULL, "actual_brightness");
}

static ssize_t bl_power_show(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct backlight_device *bd = to_backlight_device(dev);

        return sprintf(buf, "%d\n", bd->props.power);
}

static ssize_t bl_power_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
{
        int rc;
        struct backlight_device *bd = to_backlight_device(dev);
        unsigned long power, old_power;

        rc = kstrtoul(buf, 0, &power);
        if (rc)
                return rc;

        rc = -ENXIO;
        mutex_lock(&bd->ops_lock);
        if (bd->ops) {
                pr_debug("set power to %lu\n", power);
                if (bd->props.power != power) {
                        old_power = bd->props.power;
                        bd->props.power = power;
                        rc = backlight_update_status(bd);
                        if (rc)
                                bd->props.power = old_power;
                        else
                                rc = count;
                } else {
                        rc = count;
                }
        }
        mutex_unlock(&bd->ops_lock);

        return rc;
}
static DEVICE_ATTR_RW(bl_power);

static ssize_t brightness_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct backlight_device *bd = to_backlight_device(dev);

        return sprintf(buf, "%d\n", bd->props.brightness);
}

int backlight_device_set_brightness(struct backlight_device *bd,
                                    unsigned long brightness)
{
        int rc = -ENXIO;

        mutex_lock(&bd->ops_lock);
        if (bd->ops) {
                if (brightness > bd->props.max_brightness)
                        rc = -EINVAL;
                else {
                        pr_debug("set brightness to %lu\n", brightness);
                        bd->props.brightness = brightness;
                        rc = backlight_update_status(bd);
                }
        }
        mutex_unlock(&bd->ops_lock);

        backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);

        return rc;
}
EXPORT_SYMBOL(backlight_device_set_brightness);

static ssize_t brightness_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        int rc;
        struct backlight_device *bd = to_backlight_device(dev);
        unsigned long brightness;

        rc = kstrtoul(buf, 0, &brightness);
        if (rc)
                return rc;

        rc = backlight_device_set_brightness(bd, brightness);

        return rc ? rc : count;
}
static DEVICE_ATTR_RW(brightness);

static ssize_t type_show(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct backlight_device *bd = to_backlight_device(dev);

        return sprintf(buf, "%s\n", backlight_types[bd->props.type]);
}
static DEVICE_ATTR_RO(type);

static ssize_t max_brightness_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct backlight_device *bd = to_backlight_device(dev);

        return sprintf(buf, "%d\n", bd->props.max_brightness);
}
static DEVICE_ATTR_RO(max_brightness);

static ssize_t actual_brightness_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        int rc = -ENXIO;
        struct backlight_device *bd = to_backlight_device(dev);

        mutex_lock(&bd->ops_lock);
        if (bd->ops && bd->ops->get_brightness) {
                rc = bd->ops->get_brightness(bd);
                if (rc >= 0)
                        rc = sprintf(buf, "%d\n", rc);
        } else {
                rc = sprintf(buf, "%d\n", bd->props.brightness);
        }
        mutex_unlock(&bd->ops_lock);

        return rc;
}
static DEVICE_ATTR_RO(actual_brightness);

static ssize_t scale_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct backlight_device *bd = to_backlight_device(dev);

        if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR))
                return sprintf(buf, "unknown\n");

        return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]);
}
static DEVICE_ATTR_RO(scale);

static struct class *backlight_class;

#ifdef CONFIG_PM_SLEEP
static int backlight_suspend(struct device *dev)
{
        struct backlight_device *bd = to_backlight_device(dev);

        mutex_lock(&bd->ops_lock);
        if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) {
                bd->props.state |= BL_CORE_SUSPENDED;
                backlight_update_status(bd);
        }
        mutex_unlock(&bd->ops_lock);

        return 0;
}

static int backlight_resume(struct device *dev)
{
        struct backlight_device *bd = to_backlight_device(dev);

        mutex_lock(&bd->ops_lock);
        if (bd->ops && bd->ops->options & BL_CORE_SUSPENDRESUME) {
                bd->props.state &= ~BL_CORE_SUSPENDED;
                backlight_update_status(bd);
        }
        mutex_unlock(&bd->ops_lock);

        return 0;
}
#endif

static SIMPLE_DEV_PM_OPS(backlight_class_dev_pm_ops, backlight_suspend,
                         backlight_resume);

static void bl_device_release(struct device *dev)
{
        struct backlight_device *bd = to_backlight_device(dev);
        kfree(bd);
}

static struct attribute *bl_device_attrs[] = {
        &dev_attr_bl_power.attr,
        &dev_attr_brightness.attr,
        &dev_attr_actual_brightness.attr,
        &dev_attr_max_brightness.attr,
        &dev_attr_scale.attr,
        &dev_attr_type.attr,
        NULL,
};
ATTRIBUTE_GROUPS(bl_device);

/**
 * backlight_force_update - tell the backlight subsystem that hardware state
 *   has changed
 * @bd: the backlight device to update
 * @reason: reason for update
 *
 * Updates the internal state of the backlight in response to a hardware event,
 * and generates an uevent to notify userspace. A backlight driver shall call
 * backlight_force_update() when the backlight is changed using, for example,
 * a hot-key. The updated brightness is read using get_brightness() and the
 * brightness value is reported using an uevent.
 */
void backlight_force_update(struct backlight_device *bd,
                            enum backlight_update_reason reason)
{
        int brightness;

        mutex_lock(&bd->ops_lock);
        if (bd->ops && bd->ops->get_brightness) {
                brightness = bd->ops->get_brightness(bd);
                if (brightness >= 0)
                        bd->props.brightness = brightness;
                else
                        dev_err(&bd->dev,
                                "Could not update brightness from device: %pe\n",
                                ERR_PTR(brightness));
        }
        mutex_unlock(&bd->ops_lock);
        backlight_generate_event(bd, reason);
}
EXPORT_SYMBOL(backlight_force_update);

/* deprecated - use devm_backlight_device_register() */
struct backlight_device *backlight_device_register(const char *name,
        struct device *parent, void *devdata, const struct backlight_ops *ops,
        const struct backlight_properties *props)
{
        struct backlight_device *new_bd;
        int rc;

        pr_debug("backlight_device_register: name=%s\n", name);

        new_bd = kzalloc(sizeof(struct backlight_device), GFP_KERNEL);
        if (!new_bd)
                return ERR_PTR(-ENOMEM);

        mutex_init(&new_bd->update_lock);
        mutex_init(&new_bd->ops_lock);

        new_bd->dev.class = backlight_class;
        new_bd->dev.parent = parent;
        new_bd->dev.release = bl_device_release;
        dev_set_name(&new_bd->dev, "%s", name);
        dev_set_drvdata(&new_bd->dev, devdata);

        /* Set default properties */
        if (props) {
                memcpy(&new_bd->props, props,
                       sizeof(struct backlight_properties));
                if (props->type <= 0 || props->type >= BACKLIGHT_TYPE_MAX) {
                        WARN(1, "%s: invalid backlight type", name);
                        new_bd->props.type = BACKLIGHT_RAW;
                }
        } else {
                new_bd->props.type = BACKLIGHT_RAW;
        }

        rc = device_register(&new_bd->dev);
        if (rc) {
                put_device(&new_bd->dev);
                return ERR_PTR(rc);
        }

        rc = backlight_register_fb(new_bd);
        if (rc) {
                device_unregister(&new_bd->dev);
                return ERR_PTR(rc);
        }

        new_bd->ops = ops;

#ifdef CONFIG_PMAC_BACKLIGHT
        mutex_lock(&pmac_backlight_mutex);
        if (!pmac_backlight)
                pmac_backlight = new_bd;
        mutex_unlock(&pmac_backlight_mutex);
#endif

        mutex_lock(&backlight_dev_list_mutex);
        list_add(&new_bd->entry, &backlight_dev_list);
        mutex_unlock(&backlight_dev_list_mutex);

        blocking_notifier_call_chain(&backlight_notifier,
                                     BACKLIGHT_REGISTERED, new_bd);

        return new_bd;
}
EXPORT_SYMBOL(backlight_device_register);

/** backlight_device_get_by_type - find first backlight device of a type
 * @type: the type of backlight device
 *
 * Look up the first backlight device of the specified type
 *
 * RETURNS:
 *
 * Pointer to backlight device if any was found. Otherwise NULL.
 */
struct backlight_device *backlight_device_get_by_type(enum backlight_type type)
{
        bool found = false;
        struct backlight_device *bd;

        mutex_lock(&backlight_dev_list_mutex);
        list_for_each_entry(bd, &backlight_dev_list, entry) {
                if (bd->props.type == type) {
                        found = true;
                        break;
                }
        }
        mutex_unlock(&backlight_dev_list_mutex);

        return found ? bd : NULL;
}
EXPORT_SYMBOL(backlight_device_get_by_type);

/**
 * backlight_device_get_by_name - Get backlight device by name
 * @name: Device name
 *
 * This function looks up a backlight device by its name. It obtains a reference
 * on the backlight device and it is the caller's responsibility to drop the
 * reference by calling put_device().
 *
 * Returns:
 * A pointer to the backlight device if found, otherwise NULL.
 */
struct backlight_device *backlight_device_get_by_name(const char *name)
{
        struct device *dev;

        dev = class_find_device_by_name(backlight_class, name);

        return dev ? to_backlight_device(dev) : NULL;
}
EXPORT_SYMBOL(backlight_device_get_by_name);

/* deprecated - use devm_backlight_device_unregister() */
void backlight_device_unregister(struct backlight_device *bd)
{
        if (!bd)
                return;

        mutex_lock(&backlight_dev_list_mutex);
        list_del(&bd->entry);
        mutex_unlock(&backlight_dev_list_mutex);

#ifdef CONFIG_PMAC_BACKLIGHT
        mutex_lock(&pmac_backlight_mutex);
        if (pmac_backlight == bd)
                pmac_backlight = NULL;
        mutex_unlock(&pmac_backlight_mutex);
#endif

        blocking_notifier_call_chain(&backlight_notifier,
                                     BACKLIGHT_UNREGISTERED, bd);

        mutex_lock(&bd->ops_lock);
        bd->ops = NULL;
        mutex_unlock(&bd->ops_lock);

        backlight_unregister_fb(bd);
        device_unregister(&bd->dev);
}
EXPORT_SYMBOL(backlight_device_unregister);

static void devm_backlight_device_release(struct device *dev, void *res)
{
        struct backlight_device *backlight = *(struct backlight_device **)res;

        backlight_device_unregister(backlight);
}

static int devm_backlight_device_match(struct device *dev, void *res,
                                        void *data)
{
        struct backlight_device **r = res;

        return *r == data;
}

/**
 * backlight_register_notifier - get notified of backlight (un)registration
 * @nb: notifier block with the notifier to call on backlight (un)registration
 *
 * Register a notifier to get notified when backlight devices get registered
 * or unregistered.
 *
 * RETURNS:
 *
 * 0 on success, otherwise a negative error code
 */
int backlight_register_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&backlight_notifier, nb);
}
EXPORT_SYMBOL(backlight_register_notifier);

/**
 * backlight_unregister_notifier - unregister a backlight notifier
 * @nb: notifier block to unregister
 *
 * Register a notifier to get notified when backlight devices get registered
 * or unregistered.
 *
 * RETURNS:
 *
 * 0 on success, otherwise a negative error code
 */
int backlight_unregister_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&backlight_notifier, nb);
}
EXPORT_SYMBOL(backlight_unregister_notifier);

/**
 * devm_backlight_device_register - register a new backlight device
 * @dev: the device to register
 * @name: the name of the device
 * @parent: a pointer to the parent device (often the same as @dev)
 * @devdata: an optional pointer to be stored for private driver use
 * @ops: the backlight operations structure
 * @props: the backlight properties
 *
 * Creates and registers new backlight device. When a backlight device
 * is registered the configuration must be specified in the @props
 * parameter. See description of &backlight_properties.
 *
 * RETURNS:
 *
 * struct backlight on success, or an ERR_PTR on error
 */
struct backlight_device *devm_backlight_device_register(struct device *dev,
        const char *name, struct device *parent, void *devdata,
        const struct backlight_ops *ops,
        const struct backlight_properties *props)
{
        struct backlight_device **ptr, *backlight;

        ptr = devres_alloc(devm_backlight_device_release, sizeof(*ptr),
                        GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        backlight = backlight_device_register(name, parent, devdata, ops,
                                                props);
        if (!IS_ERR(backlight)) {
                *ptr = backlight;
                devres_add(dev, ptr);
        } else {
                devres_free(ptr);
        }

        return backlight;
}
EXPORT_SYMBOL(devm_backlight_device_register);

/**
 * devm_backlight_device_unregister - unregister backlight device
 * @dev: the device to unregister
 * @bd: the backlight device to unregister
 *
 * Deallocates a backlight allocated with devm_backlight_device_register().
 * Normally this function will not need to be called and the resource management
 * code will ensure that the resources are freed.
 */
void devm_backlight_device_unregister(struct device *dev,
                                struct backlight_device *bd)
{
        int rc;

        rc = devres_release(dev, devm_backlight_device_release,
                                devm_backlight_device_match, bd);
        WARN_ON(rc);
}
EXPORT_SYMBOL(devm_backlight_device_unregister);

#ifdef CONFIG_OF
static int of_parent_match(struct device *dev, const void *data)
{
        return dev->parent && dev->parent->of_node == data;
}

/**
 * of_find_backlight_by_node() - find backlight device by device-tree node
 * @node: device-tree node of the backlight device
 *
 * Returns a pointer to the backlight device corresponding to the given DT
 * node or NULL if no such backlight device exists or if the device hasn't
 * been probed yet.
 *
 * This function obtains a reference on the backlight device and it is the
 * caller's responsibility to drop the reference by calling put_device() on
 * the backlight device's .dev field.
 */
struct backlight_device *of_find_backlight_by_node(struct device_node *node)
{
        struct device *dev;

        dev = class_find_device(backlight_class, NULL, node, of_parent_match);

        return dev ? to_backlight_device(dev) : NULL;
}
EXPORT_SYMBOL(of_find_backlight_by_node);
#endif

static struct backlight_device *of_find_backlight(struct device *dev)
{
        struct backlight_device *bd = NULL;
        struct device_node *np;

        if (!dev)
                return NULL;

        if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
                np = of_parse_phandle(dev->of_node, "backlight", 0);
                if (np) {
                        bd = of_find_backlight_by_node(np);
                        of_node_put(np);
                        if (!bd)
                                return ERR_PTR(-EPROBE_DEFER);
                }
        }

        return bd;
}

static void devm_backlight_release(void *data)
{
        struct backlight_device *bd = data;

        put_device(&bd->dev);
}

/**
 * devm_of_find_backlight - find backlight for a device
 * @dev: the device
 *
 * This function looks for a property named 'backlight' on the DT node
 * connected to @dev and looks up the backlight device. The lookup is
 * device managed so the reference to the backlight device is automatically
 * dropped on driver detach.
 *
 * RETURNS:
 *
 * A pointer to the backlight device if found.
 * Error pointer -EPROBE_DEFER if the DT property is set, but no backlight
 * device is found. NULL if there's no backlight property.
 */
struct backlight_device *devm_of_find_backlight(struct device *dev)
{
        struct backlight_device *bd;
        int ret;

        bd = of_find_backlight(dev);
        if (IS_ERR_OR_NULL(bd))
                return bd;
        ret = devm_add_action_or_reset(dev, devm_backlight_release, bd);
        if (ret)
                return ERR_PTR(ret);

        return bd;
}
EXPORT_SYMBOL(devm_of_find_backlight);

static void __exit backlight_class_exit(void)
{
        class_destroy(backlight_class);
}

static int __init backlight_class_init(void)
{
        backlight_class = class_create("backlight");
        if (IS_ERR(backlight_class)) {
                pr_warn("Unable to create backlight class; errno = %ld\n",
                        PTR_ERR(backlight_class));
                return PTR_ERR(backlight_class);
        }

        backlight_class->dev_groups = bl_device_groups;
        backlight_class->pm = &backlight_class_dev_pm_ops;
        INIT_LIST_HEAD(&backlight_dev_list);
        mutex_init(&backlight_dev_list_mutex);
        BLOCKING_INIT_NOTIFIER_HEAD(&backlight_notifier);

        return 0;
}

/*
 * if this is compiled into the kernel, we need to ensure that the
 * class is registered before users of the class try to register lcd's
 */
postcore_initcall(backlight_class_init);
module_exit(backlight_class_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jamey Hicks <jamey.hicks@hp.com>, Andrew Zabolotny <zap@homelink.ru>");
MODULE_DESCRIPTION("Backlight Lowlevel Control Abstraction");









































































































































































    3 





    3 



    3 







    3 

    3 












    3 
    3 





    3 






























































    3 










    3 







    2 


    2 





    2 










    2 
    1 








    1 


    1 









    1 


    1 






















    1 
























    3 
    3 




    3 










    2 









    2 







    1 

    1 
    1 







































    1 





    2 
    2 




    2 






































































































































    1 













































































































































    6 




    6 





    6 


    6 























































































































































































































































































































































    3 


















    3 
    3 












    3 










    1 




    1 







    1 



    1 




    1 





    1 












    1 





    1 






    1 











    1 







    2 
    2 










    1 

    2 



    3 
    3 

    3 






    3 


































































    2 






























    2 














    2 







    2 





    2 










    2 

    2 












































































































    2 

    2 













    6 









    6 









    6 





    6 






    6 






    6 



















    6 
    2 


    4 


    6 

    2 



    4 


    4 



    4 






    1 




    3 















    3 


    3 










    6 










    5 





    5 




    5 




    1 



    4 

    4 
    1 








    3 


    3 








    2 

    2 


    1 
    1 





















    1 























    1 

    1 













































    1 


















































































































































































    5 




















































































   13 










   13 













    5 



    2 



    3 



    5 








































































    1 






























    1 




   13 
    5 















































































































































































































































































    8 












    1 


    8 

    8 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
// SPDX-License-Identifier: GPL-2.0+
/*
 * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver
 *
 * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany
 * Copyright (C) 2008 Novell, Inc.
 * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (C) 2018 IVI Foundation, Inc.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/mutex.h>
#include <linux/usb.h>
#include <linux/compat.h>
#include <linux/usb/tmc.h>

/* Increment API VERSION when changing tmc.h with new flags or ioctls
 * or when changing a significant behavior of the driver.
 */
#define USBTMC_API_VERSION (3)

#define USBTMC_HEADER_SIZE        12
#define USBTMC_MINOR_BASE        176

/* Minimum USB timeout (in milliseconds) */
#define USBTMC_MIN_TIMEOUT        100
/* Default USB timeout (in milliseconds) */
#define USBTMC_TIMEOUT                5000

/* Max number of urbs used in write transfers */
#define MAX_URBS_IN_FLIGHT        16
/* I/O buffer size used in generic read/write functions */
#define USBTMC_BUFSIZE                (4096)

/*
 * Maximum number of read cycles to empty bulk in endpoint during CLEAR and
 * ABORT_BULK_IN requests. Ends the loop if (for whatever reason) a short
 * packet is never read.
 */
#define USBTMC_MAX_READS_TO_CLEAR_BULK_IN        100

static const struct usb_device_id usbtmc_devices[] = {
        { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 0), },
        { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, 3, 1), },
        { 0, } /* terminating entry */
};
MODULE_DEVICE_TABLE(usb, usbtmc_devices);

/*
 * This structure is the capabilities for the device
 * See section 4.2.1.8 of the USBTMC specification,
 * and section 4.2.2 of the USBTMC usb488 subclass
 * specification for details.
 */
struct usbtmc_dev_capabilities {
        __u8 interface_capabilities;
        __u8 device_capabilities;
        __u8 usb488_interface_capabilities;
        __u8 usb488_device_capabilities;
};

/* This structure holds private data for each USBTMC device. One copy is
 * allocated for each USBTMC device in the driver's probe function.
 */
struct usbtmc_device_data {
        const struct usb_device_id *id;
        struct usb_device *usb_dev;
        struct usb_interface *intf;
        struct list_head file_list;

        unsigned int bulk_in;
        unsigned int bulk_out;

        u8 bTag;
        u8 bTag_last_write;        /* needed for abort */
        u8 bTag_last_read;        /* needed for abort */

        /* packet size of IN bulk */
        u16            wMaxPacketSize;

        /* data for interrupt in endpoint handling */
        u8             bNotify1;
        u8             bNotify2;
        u16            ifnum;
        u8             iin_bTag;
        u8            *iin_buffer;
        atomic_t       iin_data_valid;
        unsigned int   iin_ep;
        int            iin_ep_present;
        int            iin_interval;
        struct urb    *iin_urb;
        u16            iin_wMaxPacketSize;

        /* coalesced usb488_caps from usbtmc_dev_capabilities */
        __u8 usb488_caps;

        bool zombie; /* fd of disconnected device */

        struct usbtmc_dev_capabilities        capabilities;
        struct kref kref;
        struct mutex io_mutex;        /* only one i/o function running at a time */
        wait_queue_head_t waitq;
        struct fasync_struct *fasync;
        spinlock_t dev_lock; /* lock for file_list */
};
#define to_usbtmc_data(d) container_of(d, struct usbtmc_device_data, kref)

/*
 * This structure holds private data for each USBTMC file handle.
 */
struct usbtmc_file_data {
        struct usbtmc_device_data *data;
        struct list_head file_elem;

        u32            timeout;
        u8             srq_byte;
        atomic_t       srq_asserted;
        atomic_t       closing;
        u8             bmTransferAttributes; /* member of DEV_DEP_MSG_IN */

        u8             eom_val;
        u8             term_char;
        bool           term_char_enabled;
        bool           auto_abort;

        spinlock_t     err_lock; /* lock for errors */

        struct usb_anchor submitted;

        /* data for generic_write */
        struct semaphore limit_write_sem;
        u32 out_transfer_size;
        int out_status;

        /* data for generic_read */
        u32 in_transfer_size;
        int in_status;
        int in_urbs_used;
        struct usb_anchor in_anchor;
        wait_queue_head_t wait_bulk_in;
};

/* Forward declarations */
static struct usb_driver usbtmc_driver;
static void usbtmc_draw_down(struct usbtmc_file_data *file_data);

static void usbtmc_delete(struct kref *kref)
{
        struct usbtmc_device_data *data = to_usbtmc_data(kref);

        usb_put_dev(data->usb_dev);
        kfree(data);
}

static int usbtmc_open(struct inode *inode, struct file *filp)
{
        struct usb_interface *intf;
        struct usbtmc_device_data *data;
        struct usbtmc_file_data *file_data;

        intf = usb_find_interface(&usbtmc_driver, iminor(inode));
        if (!intf) {
                pr_err("can not find device for minor %d", iminor(inode));
                return -ENODEV;
        }

        file_data = kzalloc(sizeof(*file_data), GFP_KERNEL);
        if (!file_data)
                return -ENOMEM;

        spin_lock_init(&file_data->err_lock);
        sema_init(&file_data->limit_write_sem, MAX_URBS_IN_FLIGHT);
        init_usb_anchor(&file_data->submitted);
        init_usb_anchor(&file_data->in_anchor);
        init_waitqueue_head(&file_data->wait_bulk_in);

        data = usb_get_intfdata(intf);
        /* Protect reference to data from file structure until release */
        kref_get(&data->kref);

        mutex_lock(&data->io_mutex);
        file_data->data = data;

        atomic_set(&file_data->closing, 0);

        file_data->timeout = USBTMC_TIMEOUT;
        file_data->term_char = '\n';
        file_data->term_char_enabled = 0;
        file_data->auto_abort = 0;
        file_data->eom_val = 1;

        INIT_LIST_HEAD(&file_data->file_elem);
        spin_lock_irq(&data->dev_lock);
        list_add_tail(&file_data->file_elem, &data->file_list);
        spin_unlock_irq(&data->dev_lock);
        mutex_unlock(&data->io_mutex);

        /* Store pointer in file structure's private data field */
        filp->private_data = file_data;

        return 0;
}

/*
 * usbtmc_flush - called before file handle is closed
 */
static int usbtmc_flush(struct file *file, fl_owner_t id)
{
        struct usbtmc_file_data *file_data;
        struct usbtmc_device_data *data;

        file_data = file->private_data;
        if (file_data == NULL)
                return -ENODEV;

        atomic_set(&file_data->closing, 1);
        data = file_data->data;

        /* wait for io to stop */
        mutex_lock(&data->io_mutex);

        usbtmc_draw_down(file_data);

        spin_lock_irq(&file_data->err_lock);
        file_data->in_status = 0;
        file_data->in_transfer_size = 0;
        file_data->in_urbs_used = 0;
        file_data->out_status = 0;
        file_data->out_transfer_size = 0;
        spin_unlock_irq(&file_data->err_lock);

        wake_up_interruptible_all(&data->waitq);
        mutex_unlock(&data->io_mutex);

        return 0;
}

static int usbtmc_release(struct inode *inode, struct file *file)
{
        struct usbtmc_file_data *file_data = file->private_data;

        /* prevent IO _AND_ usbtmc_interrupt */
        mutex_lock(&file_data->data->io_mutex);
        spin_lock_irq(&file_data->data->dev_lock);

        list_del(&file_data->file_elem);

        spin_unlock_irq(&file_data->data->dev_lock);
        mutex_unlock(&file_data->data->io_mutex);

        kref_put(&file_data->data->kref, usbtmc_delete);
        file_data->data = NULL;
        kfree(file_data);
        return 0;
}

static int usbtmc_ioctl_abort_bulk_in_tag(struct usbtmc_device_data *data,
                                          u8 tag)
{
        u8 *buffer;
        struct device *dev;
        int rv;
        int n;
        int actual;

        dev = &data->intf->dev;
        buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_INITIATE_ABORT_BULK_IN,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
                             tag, data->bulk_in,
                             buffer, 2, USB_CTRL_GET_TIMEOUT);

        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "INITIATE_ABORT_BULK_IN returned %x with tag %02x\n",
                buffer[0], buffer[1]);

        if (buffer[0] == USBTMC_STATUS_FAILED) {
                /* No transfer in progress and the Bulk-OUT FIFO is empty. */
                rv = 0;
                goto exit;
        }

        if (buffer[0] == USBTMC_STATUS_TRANSFER_NOT_IN_PROGRESS) {
                /* The device returns this status if either:
                 * - There is a transfer in progress, but the specified bTag
                 *   does not match.
                 * - There is no transfer in progress, but the Bulk-OUT FIFO
                 *   is not empty.
                 */
                rv = -ENOMSG;
                goto exit;
        }

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "INITIATE_ABORT_BULK_IN returned %x\n",
                        buffer[0]);
                rv = -EPERM;
                goto exit;
        }

        n = 0;

usbtmc_abort_bulk_in_status:
        dev_dbg(dev, "Reading from bulk in EP\n");

        /* Data must be present. So use low timeout 300 ms */
        actual = 0;
        rv = usb_bulk_msg(data->usb_dev,
                          usb_rcvbulkpipe(data->usb_dev,
                                          data->bulk_in),
                          buffer, USBTMC_BUFSIZE,
                          &actual, 300);

        print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1,
                             buffer, actual, true);

        n++;

        if (rv < 0) {
                dev_err(dev, "usb_bulk_msg returned %d\n", rv);
                if (rv != -ETIMEDOUT)
                        goto exit;
        }

        if (actual == USBTMC_BUFSIZE)
                goto usbtmc_abort_bulk_in_status;

        if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) {
                dev_err(dev, "Couldn't clear device buffer within %d cycles\n",
                        USBTMC_MAX_READS_TO_CLEAR_BULK_IN);
                rv = -EPERM;
                goto exit;
        }

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_CHECK_ABORT_BULK_IN_STATUS,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
                             0, data->bulk_in, buffer, 0x08,
                             USB_CTRL_GET_TIMEOUT);

        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]);

        if (buffer[0] == USBTMC_STATUS_SUCCESS) {
                rv = 0;
                goto exit;
        }

        if (buffer[0] != USBTMC_STATUS_PENDING) {
                dev_err(dev, "CHECK_ABORT_BULK_IN returned %x\n", buffer[0]);
                rv = -EPERM;
                goto exit;
        }

        if ((buffer[1] & 1) > 0) {
                /* The device has 1 or more queued packets the Host can read */
                goto usbtmc_abort_bulk_in_status;
        }

        /* The Host must send CHECK_ABORT_BULK_IN_STATUS at a later time. */
        rv = -EAGAIN;
exit:
        kfree(buffer);
        return rv;
}

static int usbtmc_ioctl_abort_bulk_in(struct usbtmc_device_data *data)
{
        return usbtmc_ioctl_abort_bulk_in_tag(data, data->bTag_last_read);
}

static int usbtmc_ioctl_abort_bulk_out_tag(struct usbtmc_device_data *data,
                                           u8 tag)
{
        struct device *dev;
        u8 *buffer;
        int rv;
        int n;

        dev = &data->intf->dev;

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_INITIATE_ABORT_BULK_OUT,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
                             tag, data->bulk_out,
                             buffer, 2, USB_CTRL_GET_TIMEOUT);

        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "INITIATE_ABORT_BULK_OUT returned %x\n", buffer[0]);

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "INITIATE_ABORT_BULK_OUT returned %x\n",
                        buffer[0]);
                rv = -EPERM;
                goto exit;
        }

        n = 0;

usbtmc_abort_bulk_out_check_status:
        /* do not stress device with subsequent requests */
        msleep(50);
        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_CHECK_ABORT_BULK_OUT_STATUS,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT,
                             0, data->bulk_out, buffer, 0x08,
                             USB_CTRL_GET_TIMEOUT);
        n++;
        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "CHECK_ABORT_BULK_OUT returned %x\n", buffer[0]);

        if (buffer[0] == USBTMC_STATUS_SUCCESS)
                goto usbtmc_abort_bulk_out_clear_halt;

        if ((buffer[0] == USBTMC_STATUS_PENDING) &&
            (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN))
                goto usbtmc_abort_bulk_out_check_status;

        rv = -EPERM;
        goto exit;

usbtmc_abort_bulk_out_clear_halt:
        rv = usb_clear_halt(data->usb_dev,
                            usb_sndbulkpipe(data->usb_dev, data->bulk_out));

        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }
        rv = 0;

exit:
        kfree(buffer);
        return rv;
}

static int usbtmc_ioctl_abort_bulk_out(struct usbtmc_device_data *data)
{
        return usbtmc_ioctl_abort_bulk_out_tag(data, data->bTag_last_write);
}

static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb)
{
        struct usbtmc_device_data *data = file_data->data;
        struct device *dev = &data->intf->dev;
        u8 *buffer;
        u8 tag;
        int rv;

        dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n",
                data->iin_ep_present);

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        atomic_set(&data->iin_data_valid, 0);

        rv = usb_control_msg(data->usb_dev,
                        usb_rcvctrlpipe(data->usb_dev, 0),
                        USBTMC488_REQUEST_READ_STATUS_BYTE,
                        USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        data->iin_bTag,
                        data->ifnum,
                        buffer, 0x03, USB_CTRL_GET_TIMEOUT);
        if (rv < 0) {
                dev_err(dev, "stb usb_control_msg returned %d\n", rv);
                goto exit;
        }

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "control status returned %x\n", buffer[0]);
                rv = -EIO;
                goto exit;
        }

        if (data->iin_ep_present) {
                rv = wait_event_interruptible_timeout(
                        data->waitq,
                        atomic_read(&data->iin_data_valid) != 0,
                        file_data->timeout);
                if (rv < 0) {
                        dev_dbg(dev, "wait interrupted %d\n", rv);
                        goto exit;
                }

                if (rv == 0) {
                        dev_dbg(dev, "wait timed out\n");
                        rv = -ETIMEDOUT;
                        goto exit;
                }

                tag = data->bNotify1 & 0x7f;
                if (tag != data->iin_bTag) {
                        dev_err(dev, "expected bTag %x got %x\n",
                                data->iin_bTag, tag);
                }

                *stb = data->bNotify2;
        } else {
                *stb = buffer[2];
        }

        dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv);

 exit:
        /* bump interrupt bTag */
        data->iin_bTag += 1;
        if (data->iin_bTag > 127)
                /* 1 is for SRQ see USBTMC-USB488 subclass spec section 4.3.1 */
                data->iin_bTag = 2;

        kfree(buffer);
        return rv;
}

static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        int srq_asserted = 0;
        __u8 stb;
        int rv;

        rv = usbtmc_get_stb(file_data, &stb);

        if (rv > 0) {
                srq_asserted = atomic_xchg(&file_data->srq_asserted,
                                        srq_asserted);
                if (srq_asserted)
                        stb |= 0x40; /* Set RQS bit */

                rv = put_user(stb, (__u8 __user *)arg);
        }
        return rv;

}

static int usbtmc_ioctl_get_srq_stb(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        struct usbtmc_device_data *data = file_data->data;
        struct device *dev = &data->intf->dev;
        int srq_asserted = 0;
        __u8 stb = 0;
        int rv;

        spin_lock_irq(&data->dev_lock);
        srq_asserted  = atomic_xchg(&file_data->srq_asserted, srq_asserted);

        if (srq_asserted) {
                stb = file_data->srq_byte;
                spin_unlock_irq(&data->dev_lock);
                rv = put_user(stb, (__u8 __user *)arg);
        } else {
                spin_unlock_irq(&data->dev_lock);
                rv = -ENOMSG;
        }

        dev_dbg(dev, "stb:0x%02x with srq received %d\n", (unsigned int)stb, rv);

        return rv;
}

static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data,
                                    __u32 __user *arg)
{
        struct usbtmc_device_data *data = file_data->data;
        struct device *dev = &data->intf->dev;
        int rv;
        u32 timeout;
        unsigned long expire;

        if (!data->iin_ep_present) {
                dev_dbg(dev, "no interrupt endpoint present\n");
                return -EFAULT;
        }

        if (get_user(timeout, arg))
                return -EFAULT;

        expire = msecs_to_jiffies(timeout);

        mutex_unlock(&data->io_mutex);

        rv = wait_event_interruptible_timeout(
                        data->waitq,
                        atomic_read(&file_data->srq_asserted) != 0 ||
                        atomic_read(&file_data->closing),
                        expire);

        mutex_lock(&data->io_mutex);

        /* Note! disconnect or close could be called in the meantime */
        if (atomic_read(&file_data->closing) || data->zombie)
                rv = -ENODEV;

        if (rv < 0) {
                /* dev can be invalid now! */
                pr_debug("%s - wait interrupted %d\n", __func__, rv);
                return rv;
        }

        if (rv == 0) {
                dev_dbg(dev, "%s - wait timed out\n", __func__);
                return -ETIMEDOUT;
        }

        dev_dbg(dev, "%s - srq asserted\n", __func__);
        return 0;
}

static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data,
                                void __user *arg, unsigned int cmd)
{
        struct device *dev = &data->intf->dev;
        __u8 val;
        u8 *buffer;
        u16 wValue;
        int rv;

        if (!(data->usb488_caps & USBTMC488_CAPABILITY_SIMPLE))
                return -EINVAL;

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        if (cmd == USBTMC488_REQUEST_REN_CONTROL) {
                rv = copy_from_user(&val, arg, sizeof(val));
                if (rv) {
                        rv = -EFAULT;
                        goto exit;
                }
                wValue = val ? 1 : 0;
        } else {
                wValue = 0;
        }

        rv = usb_control_msg(data->usb_dev,
                        usb_rcvctrlpipe(data->usb_dev, 0),
                        cmd,
                        USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        wValue,
                        data->ifnum,
                        buffer, 0x01, USB_CTRL_GET_TIMEOUT);
        if (rv < 0) {
                dev_err(dev, "simple usb_control_msg failed %d\n", rv);
                goto exit;
        } else if (rv != 1) {
                dev_warn(dev, "simple usb_control_msg returned %d\n", rv);
                rv = -EIO;
                goto exit;
        }

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "simple control status returned %x\n", buffer[0]);
                rv = -EIO;
                goto exit;
        }
        rv = 0;

 exit:
        kfree(buffer);
        return rv;
}

/*
 * Sends a TRIGGER Bulk-OUT command message
 * See the USBTMC-USB488 specification, Table 2.
 *
 * Also updates bTag_last_write.
 */
static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data)
{
        struct usbtmc_device_data *data = file_data->data;
        int retval;
        u8 *buffer;
        int actual;

        buffer = kzalloc(USBTMC_HEADER_SIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        buffer[0] = 128;
        buffer[1] = data->bTag;
        buffer[2] = ~data->bTag;

        retval = usb_bulk_msg(data->usb_dev,
                              usb_sndbulkpipe(data->usb_dev,
                                              data->bulk_out),
                              buffer, USBTMC_HEADER_SIZE,
                              &actual, file_data->timeout);

        /* Store bTag (in case we need to abort) */
        data->bTag_last_write = data->bTag;

        /* Increment bTag -- and increment again if zero */
        data->bTag++;
        if (!data->bTag)
                data->bTag++;

        kfree(buffer);
        if (retval < 0) {
                dev_err(&data->intf->dev, "%s returned %d\n",
                        __func__, retval);
                return retval;
        }

        return 0;
}

static struct urb *usbtmc_create_urb(void)
{
        const size_t bufsize = USBTMC_BUFSIZE;
        u8 *dmabuf = NULL;
        struct urb *urb = usb_alloc_urb(0, GFP_KERNEL);

        if (!urb)
                return NULL;

        dmabuf = kmalloc(bufsize, GFP_KERNEL);
        if (!dmabuf) {
                usb_free_urb(urb);
                return NULL;
        }

        urb->transfer_buffer = dmabuf;
        urb->transfer_buffer_length = bufsize;
        urb->transfer_flags |= URB_FREE_BUFFER;
        return urb;
}

static void usbtmc_read_bulk_cb(struct urb *urb)
{
        struct usbtmc_file_data *file_data = urb->context;
        int status = urb->status;
        unsigned long flags;

        /* sync/async unlink faults aren't errors */
        if (status) {
                if (!(/* status == -ENOENT || */
                        status == -ECONNRESET ||
                        status == -EREMOTEIO || /* Short packet */
                        status == -ESHUTDOWN))
                        dev_err(&file_data->data->intf->dev,
                        "%s - nonzero read bulk status received: %d\n",
                        __func__, status);

                spin_lock_irqsave(&file_data->err_lock, flags);
                if (!file_data->in_status)
                        file_data->in_status = status;
                spin_unlock_irqrestore(&file_data->err_lock, flags);
        }

        spin_lock_irqsave(&file_data->err_lock, flags);
        file_data->in_transfer_size += urb->actual_length;
        dev_dbg(&file_data->data->intf->dev,
                "%s - total size: %u current: %d status: %d\n",
                __func__, file_data->in_transfer_size,
                urb->actual_length, status);
        spin_unlock_irqrestore(&file_data->err_lock, flags);
        usb_anchor_urb(urb, &file_data->in_anchor);

        wake_up_interruptible(&file_data->wait_bulk_in);
        wake_up_interruptible(&file_data->data->waitq);
}

static inline bool usbtmc_do_transfer(struct usbtmc_file_data *file_data)
{
        bool data_or_error;

        spin_lock_irq(&file_data->err_lock);
        data_or_error = !usb_anchor_empty(&file_data->in_anchor)
                        || file_data->in_status;
        spin_unlock_irq(&file_data->err_lock);
        dev_dbg(&file_data->data->intf->dev, "%s: returns %d\n", __func__,
                data_or_error);
        return data_or_error;
}

static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data,
                                   void __user *user_buffer,
                                   u32 transfer_size,
                                   u32 *transferred,
                                   u32 flags)
{
        struct usbtmc_device_data *data = file_data->data;
        struct device *dev = &data->intf->dev;
        u32 done = 0;
        u32 remaining;
        const u32 bufsize = USBTMC_BUFSIZE;
        int retval = 0;
        u32 max_transfer_size;
        unsigned long expire;
        int bufcount = 1;
        int again = 0;

        /* mutex already locked */

        *transferred = done;

        max_transfer_size = transfer_size;

        if (flags & USBTMC_FLAG_IGNORE_TRAILER) {
                /* The device may send extra alignment bytes (up to
                 * wMaxPacketSize – 1) to avoid sending a zero-length
                 * packet
                 */
                remaining = transfer_size;
                if ((max_transfer_size % data->wMaxPacketSize) == 0)
                        max_transfer_size += (data->wMaxPacketSize - 1);
        } else {
                /* round down to bufsize to avoid truncated data left */
                if (max_transfer_size > bufsize) {
                        max_transfer_size =
                                roundup(max_transfer_size + 1 - bufsize,
                                        bufsize);
                }
                remaining = max_transfer_size;
        }

        spin_lock_irq(&file_data->err_lock);

        if (file_data->in_status) {
                /* return the very first error */
                retval = file_data->in_status;
                spin_unlock_irq(&file_data->err_lock);
                goto error;
        }

        if (flags & USBTMC_FLAG_ASYNC) {
                if (usb_anchor_empty(&file_data->in_anchor))
                        again = 1;

                if (file_data->in_urbs_used == 0) {
                        file_data->in_transfer_size = 0;
                        file_data->in_status = 0;
                }
        } else {
                file_data->in_transfer_size = 0;
                file_data->in_status = 0;
        }

        if (max_transfer_size == 0) {
                bufcount = 0;
        } else {
                bufcount = roundup(max_transfer_size, bufsize) / bufsize;
                if (bufcount > file_data->in_urbs_used)
                        bufcount -= file_data->in_urbs_used;
                else
                        bufcount = 0;

                if (bufcount + file_data->in_urbs_used > MAX_URBS_IN_FLIGHT) {
                        bufcount = MAX_URBS_IN_FLIGHT -
                                        file_data->in_urbs_used;
                }
        }
        spin_unlock_irq(&file_data->err_lock);

        dev_dbg(dev, "%s: requested=%u flags=0x%X size=%u bufs=%d used=%d\n",
                __func__, transfer_size, flags,
                max_transfer_size, bufcount, file_data->in_urbs_used);

        while (bufcount > 0) {
                u8 *dmabuf = NULL;
                struct urb *urb = usbtmc_create_urb();

                if (!urb) {
                        retval = -ENOMEM;
                        goto error;
                }

                dmabuf = urb->transfer_buffer;

                usb_fill_bulk_urb(urb, data->usb_dev,
                        usb_rcvbulkpipe(data->usb_dev, data->bulk_in),
                        dmabuf, bufsize,
                        usbtmc_read_bulk_cb, file_data);

                usb_anchor_urb(urb, &file_data->submitted);
                retval = usb_submit_urb(urb, GFP_KERNEL);
                /* urb is anchored. We can release our reference. */
                usb_free_urb(urb);
                if (unlikely(retval)) {
                        usb_unanchor_urb(urb);
                        goto error;
                }
                file_data->in_urbs_used++;
                bufcount--;
        }

        if (again) {
                dev_dbg(dev, "%s: ret=again\n", __func__);
                return -EAGAIN;
        }

        if (user_buffer == NULL)
                return -EINVAL;

        expire = msecs_to_jiffies(file_data->timeout);

        while (max_transfer_size > 0) {
                u32 this_part;
                struct urb *urb = NULL;

                if (!(flags & USBTMC_FLAG_ASYNC)) {
                        dev_dbg(dev, "%s: before wait time %lu\n",
                                __func__, expire);
                        retval = wait_event_interruptible_timeout(
                                file_data->wait_bulk_in,
                                usbtmc_do_transfer(file_data),
                                expire);

                        dev_dbg(dev, "%s: wait returned %d\n",
                                __func__, retval);

                        if (retval <= 0) {
                                if (retval == 0)
                                        retval = -ETIMEDOUT;
                                goto error;
                        }
                }

                urb = usb_get_from_anchor(&file_data->in_anchor);
                if (!urb) {
                        if (!(flags & USBTMC_FLAG_ASYNC)) {
                                /* synchronous case: must not happen */
                                retval = -EFAULT;
                                goto error;
                        }

                        /* asynchronous case: ready, do not block or wait */
                        *transferred = done;
                        dev_dbg(dev, "%s: (async) done=%u ret=0\n",
                                __func__, done);
                        return 0;
                }

                file_data->in_urbs_used--;

                if (max_transfer_size > urb->actual_length)
                        max_transfer_size -= urb->actual_length;
                else
                        max_transfer_size = 0;

                if (remaining > urb->actual_length)
                        this_part = urb->actual_length;
                else
                        this_part = remaining;

                print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE, 16, 1,
                        urb->transfer_buffer, urb->actual_length, true);

                if (copy_to_user(user_buffer + done,
                                 urb->transfer_buffer, this_part)) {
                        usb_free_urb(urb);
                        retval = -EFAULT;
                        goto error;
                }

                remaining -= this_part;
                done += this_part;

                spin_lock_irq(&file_data->err_lock);
                if (urb->status) {
                        /* return the very first error */
                        retval = file_data->in_status;
                        spin_unlock_irq(&file_data->err_lock);
                        usb_free_urb(urb);
                        goto error;
                }
                spin_unlock_irq(&file_data->err_lock);

                if (urb->actual_length < bufsize) {
                        /* short packet or ZLP received => ready */
                        usb_free_urb(urb);
                        retval = 1;
                        break;
                }

                if (!(flags & USBTMC_FLAG_ASYNC) &&
                    max_transfer_size > (bufsize * file_data->in_urbs_used)) {
                        /* resubmit, since other buffers still not enough */
                        usb_anchor_urb(urb, &file_data->submitted);
                        retval = usb_submit_urb(urb, GFP_KERNEL);
                        if (unlikely(retval)) {
                                usb_unanchor_urb(urb);
                                usb_free_urb(urb);
                                goto error;
                        }
                        file_data->in_urbs_used++;
                }
                usb_free_urb(urb);
                retval = 0;
        }

error:
        *transferred = done;

        dev_dbg(dev, "%s: before kill\n", __func__);
        /* Attention: killing urbs can take long time (2 ms) */
        usb_kill_anchored_urbs(&file_data->submitted);
        dev_dbg(dev, "%s: after kill\n", __func__);
        usb_scuttle_anchored_urbs(&file_data->in_anchor);
        file_data->in_urbs_used = 0;
        file_data->in_status = 0; /* no spinlock needed here */
        dev_dbg(dev, "%s: done=%u ret=%d\n", __func__, done, retval);

        return retval;
}

static ssize_t usbtmc_ioctl_generic_read(struct usbtmc_file_data *file_data,
                                         void __user *arg)
{
        struct usbtmc_message msg;
        ssize_t retval = 0;

        /* mutex already locked */

        if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message)))
                return -EFAULT;

        retval = usbtmc_generic_read(file_data, msg.message,
                                     msg.transfer_size, &msg.transferred,
                                     msg.flags);

        if (put_user(msg.transferred,
                     &((struct usbtmc_message __user *)arg)->transferred))
                return -EFAULT;

        return retval;
}

static void usbtmc_write_bulk_cb(struct urb *urb)
{
        struct usbtmc_file_data *file_data = urb->context;
        int wakeup = 0;
        unsigned long flags;

        spin_lock_irqsave(&file_data->err_lock, flags);
        file_data->out_transfer_size += urb->actual_length;

        /* sync/async unlink faults aren't errors */
        if (urb->status) {
                if (!(urb->status == -ENOENT ||
                        urb->status == -ECONNRESET ||
                        urb->status == -ESHUTDOWN))
                        dev_err(&file_data->data->intf->dev,
                                "%s - nonzero write bulk status received: %d\n",
                                __func__, urb->status);

                if (!file_data->out_status) {
                        file_data->out_status = urb->status;
                        wakeup = 1;
                }
        }
        spin_unlock_irqrestore(&file_data->err_lock, flags);

        dev_dbg(&file_data->data->intf->dev,
                "%s - write bulk total size: %u\n",
                __func__, file_data->out_transfer_size);

        up(&file_data->limit_write_sem);
        if (usb_anchor_empty(&file_data->submitted) || wakeup)
                wake_up_interruptible(&file_data->data->waitq);
}

static ssize_t usbtmc_generic_write(struct usbtmc_file_data *file_data,
                                    const void __user *user_buffer,
                                    u32 transfer_size,
                                    u32 *transferred,
                                    u32 flags)
{
        struct usbtmc_device_data *data = file_data->data;
        struct device *dev;
        u32 done = 0;
        u32 remaining;
        unsigned long expire;
        const u32 bufsize = USBTMC_BUFSIZE;
        struct urb *urb = NULL;
        int retval = 0;
        u32 timeout;

        *transferred = 0;

        /* Get pointer to private data structure */
        dev = &data->intf->dev;

        dev_dbg(dev, "%s: size=%u flags=0x%X sema=%u\n",
                __func__, transfer_size, flags,
                file_data->limit_write_sem.count);

        if (flags & USBTMC_FLAG_APPEND) {
                spin_lock_irq(&file_data->err_lock);
                retval = file_data->out_status;
                spin_unlock_irq(&file_data->err_lock);
                if (retval < 0)
                        return retval;
        } else {
                spin_lock_irq(&file_data->err_lock);
                file_data->out_transfer_size = 0;
                file_data->out_status = 0;
                spin_unlock_irq(&file_data->err_lock);
        }

        remaining = transfer_size;
        if (remaining > INT_MAX)
                remaining = INT_MAX;

        timeout = file_data->timeout;
        expire = msecs_to_jiffies(timeout);

        while (remaining > 0) {
                u32 this_part, aligned;
                u8 *buffer = NULL;

                if (flags & USBTMC_FLAG_ASYNC) {
                        if (down_trylock(&file_data->limit_write_sem)) {
                                retval = (done)?(0):(-EAGAIN);
                                goto exit;
                        }
                } else {
                        retval = down_timeout(&file_data->limit_write_sem,
                                              expire);
                        if (retval < 0) {
                                retval = -ETIMEDOUT;
                                goto error;
                        }
                }

                spin_lock_irq(&file_data->err_lock);
                retval = file_data->out_status;
                spin_unlock_irq(&file_data->err_lock);
                if (retval < 0) {
                        up(&file_data->limit_write_sem);
                        goto error;
                }

                /* prepare next urb to send */
                urb = usbtmc_create_urb();
                if (!urb) {
                        retval = -ENOMEM;
                        up(&file_data->limit_write_sem);
                        goto error;
                }
                buffer = urb->transfer_buffer;

                if (remaining > bufsize)
                        this_part = bufsize;
                else
                        this_part = remaining;

                if (copy_from_user(buffer, user_buffer + done, this_part)) {
                        retval = -EFAULT;
                        up(&file_data->limit_write_sem);
                        goto error;
                }

                print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
                        16, 1, buffer, this_part, true);

                /* fill bulk with 32 bit alignment to meet USBTMC specification
                 * (size + 3 & ~3) rounds up and simplifies user code
                 */
                aligned = (this_part + 3) & ~3;
                dev_dbg(dev, "write(size:%u align:%u done:%u)\n",
                        (unsigned int)this_part,
                        (unsigned int)aligned,
                        (unsigned int)done);

                usb_fill_bulk_urb(urb, data->usb_dev,
                        usb_sndbulkpipe(data->usb_dev, data->bulk_out),
                        urb->transfer_buffer, aligned,
                        usbtmc_write_bulk_cb, file_data);

                usb_anchor_urb(urb, &file_data->submitted);
                retval = usb_submit_urb(urb, GFP_KERNEL);
                if (unlikely(retval)) {
                        usb_unanchor_urb(urb);
                        up(&file_data->limit_write_sem);
                        goto error;
                }

                usb_free_urb(urb);
                urb = NULL; /* urb will be finally released by usb driver */

                remaining -= this_part;
                done += this_part;
        }

        /* All urbs are on the fly */
        if (!(flags & USBTMC_FLAG_ASYNC)) {
                if (!usb_wait_anchor_empty_timeout(&file_data->submitted,
                                                   timeout)) {
                        retval = -ETIMEDOUT;
                        goto error;
                }
        }

        retval = 0;
        goto exit;

error:
        usb_kill_anchored_urbs(&file_data->submitted);
exit:
        usb_free_urb(urb);

        spin_lock_irq(&file_data->err_lock);
        if (!(flags & USBTMC_FLAG_ASYNC))
                done = file_data->out_transfer_size;
        if (!retval && file_data->out_status)
                retval = file_data->out_status;
        spin_unlock_irq(&file_data->err_lock);

        *transferred = done;

        dev_dbg(dev, "%s: done=%u, retval=%d, urbstat=%d\n",
                __func__, done, retval, file_data->out_status);

        return retval;
}

static ssize_t usbtmc_ioctl_generic_write(struct usbtmc_file_data *file_data,
                                          void __user *arg)
{
        struct usbtmc_message msg;
        ssize_t retval = 0;

        /* mutex already locked */

        if (copy_from_user(&msg, arg, sizeof(struct usbtmc_message)))
                return -EFAULT;

        retval = usbtmc_generic_write(file_data, msg.message,
                                      msg.transfer_size, &msg.transferred,
                                      msg.flags);

        if (put_user(msg.transferred,
                     &((struct usbtmc_message __user *)arg)->transferred))
                return -EFAULT;

        return retval;
}

/*
 * Get the generic write result
 */
static ssize_t usbtmc_ioctl_write_result(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        u32 transferred;
        int retval;

        spin_lock_irq(&file_data->err_lock);
        transferred = file_data->out_transfer_size;
        retval = file_data->out_status;
        spin_unlock_irq(&file_data->err_lock);

        if (put_user(transferred, (__u32 __user *)arg))
                return -EFAULT;

        return retval;
}

/*
 * Sends a REQUEST_DEV_DEP_MSG_IN message on the Bulk-OUT endpoint.
 * @transfer_size: number of bytes to request from the device.
 *
 * See the USBTMC specification, Table 4.
 *
 * Also updates bTag_last_write.
 */
static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data,
                                       u32 transfer_size)
{
        struct usbtmc_device_data *data = file_data->data;
        int retval;
        u8 *buffer;
        int actual;

        buffer = kmalloc(USBTMC_HEADER_SIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;
        /* Setup IO buffer for REQUEST_DEV_DEP_MSG_IN message
         * Refer to class specs for details
         */
        buffer[0] = 2;
        buffer[1] = data->bTag;
        buffer[2] = ~data->bTag;
        buffer[3] = 0; /* Reserved */
        buffer[4] = transfer_size >> 0;
        buffer[5] = transfer_size >> 8;
        buffer[6] = transfer_size >> 16;
        buffer[7] = transfer_size >> 24;
        buffer[8] = file_data->term_char_enabled * 2;
        /* Use term character? */
        buffer[9] = file_data->term_char;
        buffer[10] = 0; /* Reserved */
        buffer[11] = 0; /* Reserved */

        /* Send bulk URB */
        retval = usb_bulk_msg(data->usb_dev,
                              usb_sndbulkpipe(data->usb_dev,
                                              data->bulk_out),
                              buffer, USBTMC_HEADER_SIZE,
                              &actual, file_data->timeout);

        /* Store bTag (in case we need to abort) */
        data->bTag_last_write = data->bTag;

        /* Increment bTag -- and increment again if zero */
        data->bTag++;
        if (!data->bTag)
                data->bTag++;

        kfree(buffer);
        if (retval < 0)
                dev_err(&data->intf->dev, "%s returned %d\n",
                        __func__, retval);

        return retval;
}

static ssize_t usbtmc_read(struct file *filp, char __user *buf,
                           size_t count, loff_t *f_pos)
{
        struct usbtmc_file_data *file_data;
        struct usbtmc_device_data *data;
        struct device *dev;
        const u32 bufsize = USBTMC_BUFSIZE;
        u32 n_characters;
        u8 *buffer;
        int actual;
        u32 done = 0;
        u32 remaining;
        int retval;

        /* Get pointer to private data structure */
        file_data = filp->private_data;
        data = file_data->data;
        dev = &data->intf->dev;

        buffer = kmalloc(bufsize, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        mutex_lock(&data->io_mutex);
        if (data->zombie) {
                retval = -ENODEV;
                goto exit;
        }

        if (count > INT_MAX)
                count = INT_MAX;

        dev_dbg(dev, "%s(count:%zu)\n", __func__, count);

        retval = send_request_dev_dep_msg_in(file_data, count);

        if (retval < 0) {
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_out(data);
                goto exit;
        }

        /* Loop until we have fetched everything we requested */
        remaining = count;
        actual = 0;

        /* Send bulk URB */
        retval = usb_bulk_msg(data->usb_dev,
                              usb_rcvbulkpipe(data->usb_dev,
                                              data->bulk_in),
                              buffer, bufsize, &actual,
                              file_data->timeout);

        dev_dbg(dev, "%s: bulk_msg retval(%u), actual(%d)\n",
                __func__, retval, actual);

        /* Store bTag (in case we need to abort) */
        data->bTag_last_read = data->bTag;

        if (retval < 0) {
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_in(data);
                goto exit;
        }

        /* Sanity checks for the header */
        if (actual < USBTMC_HEADER_SIZE) {
                dev_err(dev, "Device sent too small first packet: %u < %u\n",
                        actual, USBTMC_HEADER_SIZE);
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_in(data);
                goto exit;
        }

        if (buffer[0] != 2) {
                dev_err(dev, "Device sent reply with wrong MsgID: %u != 2\n",
                        buffer[0]);
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_in(data);
                goto exit;
        }

        if (buffer[1] != data->bTag_last_write) {
                dev_err(dev, "Device sent reply with wrong bTag: %u != %u\n",
                buffer[1], data->bTag_last_write);
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_in(data);
                goto exit;
        }

        /* How many characters did the instrument send? */
        n_characters = buffer[4] +
                       (buffer[5] << 8) +
                       (buffer[6] << 16) +
                       (buffer[7] << 24);

        file_data->bmTransferAttributes = buffer[8];

        dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n",
                n_characters, buffer[8]);

        if (n_characters > remaining) {
                dev_err(dev, "Device wants to return more data than requested: %u > %zu\n",
                        n_characters, count);
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_in(data);
                goto exit;
        }

        print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
                             16, 1, buffer, actual, true);

        remaining = n_characters;

        /* Remove the USBTMC header */
        actual -= USBTMC_HEADER_SIZE;

        /* Remove padding if it exists */
        if (actual > remaining)
                actual = remaining;

        remaining -= actual;

        /* Copy buffer to user space */
        if (copy_to_user(buf, &buffer[USBTMC_HEADER_SIZE], actual)) {
                /* There must have been an addressing problem */
                retval = -EFAULT;
                goto exit;
        }

        if ((actual + USBTMC_HEADER_SIZE) == bufsize) {
                retval = usbtmc_generic_read(file_data, buf + actual,
                                             remaining,
                                             &done,
                                             USBTMC_FLAG_IGNORE_TRAILER);
                if (retval < 0)
                        goto exit;
        }
        done += actual;

        /* Update file position value */
        *f_pos = *f_pos + done;
        retval = done;

exit:
        mutex_unlock(&data->io_mutex);
        kfree(buffer);
        return retval;
}

static ssize_t usbtmc_write(struct file *filp, const char __user *buf,
                            size_t count, loff_t *f_pos)
{
        struct usbtmc_file_data *file_data;
        struct usbtmc_device_data *data;
        struct urb *urb = NULL;
        ssize_t retval = 0;
        u8 *buffer;
        u32 remaining, done;
        u32 transfersize, aligned, buflen;

        file_data = filp->private_data;
        data = file_data->data;

        mutex_lock(&data->io_mutex);

        if (data->zombie) {
                retval = -ENODEV;
                goto exit;
        }

        done = 0;

        spin_lock_irq(&file_data->err_lock);
        file_data->out_transfer_size = 0;
        file_data->out_status = 0;
        spin_unlock_irq(&file_data->err_lock);

        if (!count)
                goto exit;

        if (down_trylock(&file_data->limit_write_sem)) {
                /* previous calls were async */
                retval = -EBUSY;
                goto exit;
        }

        urb = usbtmc_create_urb();
        if (!urb) {
                retval = -ENOMEM;
                up(&file_data->limit_write_sem);
                goto exit;
        }

        buffer = urb->transfer_buffer;
        buflen = urb->transfer_buffer_length;

        if (count > INT_MAX) {
                transfersize = INT_MAX;
                buffer[8] = 0;
        } else {
                transfersize = count;
                buffer[8] = file_data->eom_val;
        }

        /* Setup IO buffer for DEV_DEP_MSG_OUT message */
        buffer[0] = 1;
        buffer[1] = data->bTag;
        buffer[2] = ~data->bTag;
        buffer[3] = 0; /* Reserved */
        buffer[4] = transfersize >> 0;
        buffer[5] = transfersize >> 8;
        buffer[6] = transfersize >> 16;
        buffer[7] = transfersize >> 24;
        /* buffer[8] is set above... */
        buffer[9] = 0; /* Reserved */
        buffer[10] = 0; /* Reserved */
        buffer[11] = 0; /* Reserved */

        remaining = transfersize;

        if (transfersize + USBTMC_HEADER_SIZE > buflen) {
                transfersize = buflen - USBTMC_HEADER_SIZE;
                aligned = buflen;
        } else {
                aligned = (transfersize + (USBTMC_HEADER_SIZE + 3)) & ~3;
        }

        if (copy_from_user(&buffer[USBTMC_HEADER_SIZE], buf, transfersize)) {
                retval = -EFAULT;
                up(&file_data->limit_write_sem);
                goto exit;
        }

        dev_dbg(&data->intf->dev, "%s(size:%u align:%u)\n", __func__,
                (unsigned int)transfersize, (unsigned int)aligned);

        print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
                             16, 1, buffer, aligned, true);

        usb_fill_bulk_urb(urb, data->usb_dev,
                usb_sndbulkpipe(data->usb_dev, data->bulk_out),
                urb->transfer_buffer, aligned,
                usbtmc_write_bulk_cb, file_data);

        usb_anchor_urb(urb, &file_data->submitted);
        retval = usb_submit_urb(urb, GFP_KERNEL);
        if (unlikely(retval)) {
                usb_unanchor_urb(urb);
                up(&file_data->limit_write_sem);
                goto exit;
        }

        remaining -= transfersize;

        data->bTag_last_write = data->bTag;
        data->bTag++;

        if (!data->bTag)
                data->bTag++;

        /* call generic_write even when remaining = 0 */
        retval = usbtmc_generic_write(file_data, buf + transfersize, remaining,
                                      &done, USBTMC_FLAG_APPEND);
        /* truncate alignment bytes */
        if (done > remaining)
                done = remaining;

        /*add size of first urb*/
        done += transfersize;

        if (retval < 0) {
                usb_kill_anchored_urbs(&file_data->submitted);

                dev_err(&data->intf->dev,
                        "Unable to send data, error %d\n", (int)retval);
                if (file_data->auto_abort)
                        usbtmc_ioctl_abort_bulk_out(data);
                goto exit;
        }

        retval = done;
exit:
        usb_free_urb(urb);
        mutex_unlock(&data->io_mutex);
        return retval;
}

static int usbtmc_ioctl_clear(struct usbtmc_device_data *data)
{
        struct device *dev;
        u8 *buffer;
        int rv;
        int n;
        int actual = 0;

        dev = &data->intf->dev;

        dev_dbg(dev, "Sending INITIATE_CLEAR request\n");

        buffer = kmalloc(USBTMC_BUFSIZE, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_INITIATE_CLEAR,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                             0, 0, buffer, 1, USB_CTRL_GET_TIMEOUT);
        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "INITIATE_CLEAR returned %x\n", buffer[0]);

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "INITIATE_CLEAR returned %x\n", buffer[0]);
                rv = -EPERM;
                goto exit;
        }

        n = 0;

usbtmc_clear_check_status:

        dev_dbg(dev, "Sending CHECK_CLEAR_STATUS request\n");

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_CHECK_CLEAR_STATUS,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                             0, 0, buffer, 2, USB_CTRL_GET_TIMEOUT);
        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]);

        if (buffer[0] == USBTMC_STATUS_SUCCESS)
                goto usbtmc_clear_bulk_out_halt;

        if (buffer[0] != USBTMC_STATUS_PENDING) {
                dev_err(dev, "CHECK_CLEAR_STATUS returned %x\n", buffer[0]);
                rv = -EPERM;
                goto exit;
        }

        if ((buffer[1] & 1) != 0) {
                do {
                        dev_dbg(dev, "Reading from bulk in EP\n");

                        actual = 0;
                        rv = usb_bulk_msg(data->usb_dev,
                                          usb_rcvbulkpipe(data->usb_dev,
                                                          data->bulk_in),
                                          buffer, USBTMC_BUFSIZE,
                                          &actual, USB_CTRL_GET_TIMEOUT);

                        print_hex_dump_debug("usbtmc ", DUMP_PREFIX_NONE,
                                             16, 1, buffer, actual, true);

                        n++;

                        if (rv < 0) {
                                dev_err(dev, "usb_control_msg returned %d\n",
                                        rv);
                                goto exit;
                        }
                } while ((actual == USBTMC_BUFSIZE) &&
                          (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN));
        } else {
                /* do not stress device with subsequent requests */
                msleep(50);
                n++;
        }

        if (n >= USBTMC_MAX_READS_TO_CLEAR_BULK_IN) {
                dev_err(dev, "Couldn't clear device buffer within %d cycles\n",
                        USBTMC_MAX_READS_TO_CLEAR_BULK_IN);
                rv = -EPERM;
                goto exit;
        }

        goto usbtmc_clear_check_status;

usbtmc_clear_bulk_out_halt:

        rv = usb_clear_halt(data->usb_dev,
                            usb_sndbulkpipe(data->usb_dev, data->bulk_out));
        if (rv < 0) {
                dev_err(dev, "usb_clear_halt returned %d\n", rv);
                goto exit;
        }
        rv = 0;

exit:
        kfree(buffer);
        return rv;
}

static int usbtmc_ioctl_clear_out_halt(struct usbtmc_device_data *data)
{
        int rv;

        rv = usb_clear_halt(data->usb_dev,
                            usb_sndbulkpipe(data->usb_dev, data->bulk_out));

        if (rv < 0)
                dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv);
        return rv;
}

static int usbtmc_ioctl_clear_in_halt(struct usbtmc_device_data *data)
{
        int rv;

        rv = usb_clear_halt(data->usb_dev,
                            usb_rcvbulkpipe(data->usb_dev, data->bulk_in));

        if (rv < 0)
                dev_err(&data->usb_dev->dev, "%s returned %d\n", __func__, rv);
        return rv;
}

static int usbtmc_ioctl_cancel_io(struct usbtmc_file_data *file_data)
{
        spin_lock_irq(&file_data->err_lock);
        file_data->in_status = -ECANCELED;
        file_data->out_status = -ECANCELED;
        spin_unlock_irq(&file_data->err_lock);
        usb_kill_anchored_urbs(&file_data->submitted);
        return 0;
}

static int usbtmc_ioctl_cleanup_io(struct usbtmc_file_data *file_data)
{
        usb_kill_anchored_urbs(&file_data->submitted);
        usb_scuttle_anchored_urbs(&file_data->in_anchor);
        spin_lock_irq(&file_data->err_lock);
        file_data->in_status = 0;
        file_data->in_transfer_size = 0;
        file_data->out_status = 0;
        file_data->out_transfer_size = 0;
        spin_unlock_irq(&file_data->err_lock);

        file_data->in_urbs_used = 0;
        return 0;
}

static int get_capabilities(struct usbtmc_device_data *data)
{
        struct device *dev = &data->usb_dev->dev;
        char *buffer;
        int rv = 0;

        buffer = kmalloc(0x18, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        rv = usb_control_msg(data->usb_dev, usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_GET_CAPABILITIES,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                             0, 0, buffer, 0x18, USB_CTRL_GET_TIMEOUT);
        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto err_out;
        }

        dev_dbg(dev, "GET_CAPABILITIES returned %x\n", buffer[0]);
        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "GET_CAPABILITIES returned %x\n", buffer[0]);
                rv = -EPERM;
                goto err_out;
        }
        dev_dbg(dev, "Interface capabilities are %x\n", buffer[4]);
        dev_dbg(dev, "Device capabilities are %x\n", buffer[5]);
        dev_dbg(dev, "USB488 interface capabilities are %x\n", buffer[14]);
        dev_dbg(dev, "USB488 device capabilities are %x\n", buffer[15]);

        data->capabilities.interface_capabilities = buffer[4];
        data->capabilities.device_capabilities = buffer[5];
        data->capabilities.usb488_interface_capabilities = buffer[14];
        data->capabilities.usb488_device_capabilities = buffer[15];
        data->usb488_caps = (buffer[14] & 0x07) | ((buffer[15] & 0x0f) << 4);
        rv = 0;

err_out:
        kfree(buffer);
        return rv;
}

#define capability_attribute(name)                                        \
static ssize_t name##_show(struct device *dev,                                \
                           struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct usb_interface *intf = to_usb_interface(dev);                \
        struct usbtmc_device_data *data = usb_get_intfdata(intf);        \
                                                                        \
        return sprintf(buf, "%d\n", data->capabilities.name);                \
}                                                                        \
static DEVICE_ATTR_RO(name)

capability_attribute(interface_capabilities);
capability_attribute(device_capabilities);
capability_attribute(usb488_interface_capabilities);
capability_attribute(usb488_device_capabilities);

static struct attribute *usbtmc_attrs[] = {
        &dev_attr_interface_capabilities.attr,
        &dev_attr_device_capabilities.attr,
        &dev_attr_usb488_interface_capabilities.attr,
        &dev_attr_usb488_device_capabilities.attr,
        NULL,
};
ATTRIBUTE_GROUPS(usbtmc);

static int usbtmc_ioctl_indicator_pulse(struct usbtmc_device_data *data)
{
        struct device *dev;
        u8 *buffer;
        int rv;

        dev = &data->intf->dev;

        buffer = kmalloc(2, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;

        rv = usb_control_msg(data->usb_dev,
                             usb_rcvctrlpipe(data->usb_dev, 0),
                             USBTMC_REQUEST_INDICATOR_PULSE,
                             USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                             0, 0, buffer, 0x01, USB_CTRL_GET_TIMEOUT);

        if (rv < 0) {
                dev_err(dev, "usb_control_msg returned %d\n", rv);
                goto exit;
        }

        dev_dbg(dev, "INDICATOR_PULSE returned %x\n", buffer[0]);

        if (buffer[0] != USBTMC_STATUS_SUCCESS) {
                dev_err(dev, "INDICATOR_PULSE returned %x\n", buffer[0]);
                rv = -EPERM;
                goto exit;
        }
        rv = 0;

exit:
        kfree(buffer);
        return rv;
}

static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
                                void __user *arg)
{
        struct device *dev = &data->intf->dev;
        struct usbtmc_ctrlrequest request;
        u8 *buffer = NULL;
        int rv;
        unsigned int is_in, pipe;
        unsigned long res;

        res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest));
        if (res)
                return -EFAULT;

        if (request.req.wLength > USBTMC_BUFSIZE)
                return -EMSGSIZE;
        if (request.req.wLength == 0)        /* Length-0 requests are never IN */
                request.req.bRequestType &= ~USB_DIR_IN;

        is_in = request.req.bRequestType & USB_DIR_IN;

        if (request.req.wLength) {
                buffer = kmalloc(request.req.wLength, GFP_KERNEL);
                if (!buffer)
                        return -ENOMEM;

                if (!is_in) {
                        /* Send control data to device */
                        res = copy_from_user(buffer, request.data,
                                             request.req.wLength);
                        if (res) {
                                rv = -EFAULT;
                                goto exit;
                        }
                }
        }

        if (is_in)
                pipe = usb_rcvctrlpipe(data->usb_dev, 0);
        else
                pipe = usb_sndctrlpipe(data->usb_dev, 0);
        rv = usb_control_msg(data->usb_dev,
                        pipe,
                        request.req.bRequest,
                        request.req.bRequestType,
                        request.req.wValue,
                        request.req.wIndex,
                        buffer, request.req.wLength, USB_CTRL_GET_TIMEOUT);

        if (rv < 0) {
                dev_err(dev, "%s failed %d\n", __func__, rv);
                goto exit;
        }

        if (rv && is_in) {
                /* Read control data from device */
                res = copy_to_user(request.data, buffer, rv);
                if (res)
                        rv = -EFAULT;
        }

 exit:
        kfree(buffer);
        return rv;
}

/*
 * Get the usb timeout value
 */
static int usbtmc_ioctl_get_timeout(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        u32 timeout;

        timeout = file_data->timeout;

        return put_user(timeout, (__u32 __user *)arg);
}

/*
 * Set the usb timeout value
 */
static int usbtmc_ioctl_set_timeout(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        u32 timeout;

        if (get_user(timeout, (__u32 __user *)arg))
                return -EFAULT;

        /* Note that timeout = 0 means
         * MAX_SCHEDULE_TIMEOUT in usb_control_msg
         */
        if (timeout < USBTMC_MIN_TIMEOUT)
                return -EINVAL;

        file_data->timeout = timeout;

        return 0;
}

/*
 * enables/disables sending EOM on write
 */
static int usbtmc_ioctl_eom_enable(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        u8 eom_enable;

        if (copy_from_user(&eom_enable, arg, sizeof(eom_enable)))
                return -EFAULT;

        if (eom_enable > 1)
                return -EINVAL;

        file_data->eom_val = eom_enable;

        return 0;
}

/*
 * Configure termination character for read()
 */
static int usbtmc_ioctl_config_termc(struct usbtmc_file_data *file_data,
                                void __user *arg)
{
        struct usbtmc_termchar termc;

        if (copy_from_user(&termc, arg, sizeof(termc)))
                return -EFAULT;

        if ((termc.term_char_enabled > 1) ||
                (termc.term_char_enabled &&
                !(file_data->data->capabilities.device_capabilities & 1)))
                return -EINVAL;

        file_data->term_char = termc.term_char;
        file_data->term_char_enabled = termc.term_char_enabled;

        return 0;
}

static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct usbtmc_file_data *file_data;
        struct usbtmc_device_data *data;
        int retval = -EBADRQC;
        __u8 tmp_byte;

        file_data = file->private_data;
        data = file_data->data;

        mutex_lock(&data->io_mutex);
        if (data->zombie) {
                retval = -ENODEV;
                goto skip_io_on_zombie;
        }

        switch (cmd) {
        case USBTMC_IOCTL_CLEAR_OUT_HALT:
                retval = usbtmc_ioctl_clear_out_halt(data);
                break;

        case USBTMC_IOCTL_CLEAR_IN_HALT:
                retval = usbtmc_ioctl_clear_in_halt(data);
                break;

        case USBTMC_IOCTL_INDICATOR_PULSE:
                retval = usbtmc_ioctl_indicator_pulse(data);
                break;

        case USBTMC_IOCTL_CLEAR:
                retval = usbtmc_ioctl_clear(data);
                break;

        case USBTMC_IOCTL_ABORT_BULK_OUT:
                retval = usbtmc_ioctl_abort_bulk_out(data);
                break;

        case USBTMC_IOCTL_ABORT_BULK_IN:
                retval = usbtmc_ioctl_abort_bulk_in(data);
                break;

        case USBTMC_IOCTL_CTRL_REQUEST:
                retval = usbtmc_ioctl_request(data, (void __user *)arg);
                break;

        case USBTMC_IOCTL_GET_TIMEOUT:
                retval = usbtmc_ioctl_get_timeout(file_data,
                                                  (void __user *)arg);
                break;

        case USBTMC_IOCTL_SET_TIMEOUT:
                retval = usbtmc_ioctl_set_timeout(file_data,
                                                  (void __user *)arg);
                break;

        case USBTMC_IOCTL_EOM_ENABLE:
                retval = usbtmc_ioctl_eom_enable(file_data,
                                                 (void __user *)arg);
                break;

        case USBTMC_IOCTL_CONFIG_TERMCHAR:
                retval = usbtmc_ioctl_config_termc(file_data,
                                                   (void __user *)arg);
                break;

        case USBTMC_IOCTL_WRITE:
                retval = usbtmc_ioctl_generic_write(file_data,
                                                    (void __user *)arg);
                break;

        case USBTMC_IOCTL_READ:
                retval = usbtmc_ioctl_generic_read(file_data,
                                                   (void __user *)arg);
                break;

        case USBTMC_IOCTL_WRITE_RESULT:
                retval = usbtmc_ioctl_write_result(file_data,
                                                   (void __user *)arg);
                break;

        case USBTMC_IOCTL_API_VERSION:
                retval = put_user(USBTMC_API_VERSION,
                                  (__u32 __user *)arg);
                break;

        case USBTMC488_IOCTL_GET_CAPS:
                retval = put_user(data->usb488_caps,
                                  (unsigned char __user *)arg);
                break;

        case USBTMC488_IOCTL_READ_STB:
                retval = usbtmc488_ioctl_read_stb(file_data,
                                                  (void __user *)arg);
                break;

        case USBTMC488_IOCTL_REN_CONTROL:
                retval = usbtmc488_ioctl_simple(data, (void __user *)arg,
                                                USBTMC488_REQUEST_REN_CONTROL);
                break;

        case USBTMC488_IOCTL_GOTO_LOCAL:
                retval = usbtmc488_ioctl_simple(data, (void __user *)arg,
                                                USBTMC488_REQUEST_GOTO_LOCAL);
                break;

        case USBTMC488_IOCTL_LOCAL_LOCKOUT:
                retval = usbtmc488_ioctl_simple(data, (void __user *)arg,
                                                USBTMC488_REQUEST_LOCAL_LOCKOUT);
                break;

        case USBTMC488_IOCTL_TRIGGER:
                retval = usbtmc488_ioctl_trigger(file_data);
                break;

        case USBTMC488_IOCTL_WAIT_SRQ:
                retval = usbtmc488_ioctl_wait_srq(file_data,
                                                  (__u32 __user *)arg);
                break;

        case USBTMC_IOCTL_MSG_IN_ATTR:
                retval = put_user(file_data->bmTransferAttributes,
                                  (__u8 __user *)arg);
                break;

        case USBTMC_IOCTL_AUTO_ABORT:
                retval = get_user(tmp_byte, (unsigned char __user *)arg);
                if (retval == 0)
                        file_data->auto_abort = !!tmp_byte;
                break;

        case USBTMC_IOCTL_GET_STB:
                retval = usbtmc_get_stb(file_data, &tmp_byte);
                if (retval > 0)
                        retval = put_user(tmp_byte, (__u8 __user *)arg);
                break;

        case USBTMC_IOCTL_GET_SRQ_STB:
                retval = usbtmc_ioctl_get_srq_stb(file_data,
                                                  (void __user *)arg);
                break;

        case USBTMC_IOCTL_CANCEL_IO:
                retval = usbtmc_ioctl_cancel_io(file_data);
                break;

        case USBTMC_IOCTL_CLEANUP_IO:
                retval = usbtmc_ioctl_cleanup_io(file_data);
                break;
        }

skip_io_on_zombie:
        mutex_unlock(&data->io_mutex);
        return retval;
}

static int usbtmc_fasync(int fd, struct file *file, int on)
{
        struct usbtmc_file_data *file_data = file->private_data;

        return fasync_helper(fd, file, on, &file_data->data->fasync);
}

static __poll_t usbtmc_poll(struct file *file, poll_table *wait)
{
        struct usbtmc_file_data *file_data = file->private_data;
        struct usbtmc_device_data *data = file_data->data;
        __poll_t mask;

        mutex_lock(&data->io_mutex);

        if (data->zombie) {
                mask = EPOLLHUP | EPOLLERR;
                goto no_poll;
        }

        poll_wait(file, &data->waitq, wait);

        /* Note that EPOLLPRI is now assigned to SRQ, and
         * EPOLLIN|EPOLLRDNORM to normal read data.
         */
        mask = 0;
        if (atomic_read(&file_data->srq_asserted))
                mask |= EPOLLPRI;

        /* Note that the anchor submitted includes all urbs for BULK IN
         * and OUT. So EPOLLOUT is signaled when BULK OUT is empty and
         * all BULK IN urbs are completed and moved to in_anchor.
         */
        if (usb_anchor_empty(&file_data->submitted))
                mask |= (EPOLLOUT | EPOLLWRNORM);
        if (!usb_anchor_empty(&file_data->in_anchor))
                mask |= (EPOLLIN | EPOLLRDNORM);

        spin_lock_irq(&file_data->err_lock);
        if (file_data->in_status || file_data->out_status)
                mask |= EPOLLERR;
        spin_unlock_irq(&file_data->err_lock);

        dev_dbg(&data->intf->dev, "poll mask = %x\n", mask);

no_poll:
        mutex_unlock(&data->io_mutex);
        return mask;
}

static const struct file_operations fops = {
        .owner                = THIS_MODULE,
        .read                = usbtmc_read,
        .write                = usbtmc_write,
        .open                = usbtmc_open,
        .release        = usbtmc_release,
        .flush                = usbtmc_flush,
        .unlocked_ioctl        = usbtmc_ioctl,
        .compat_ioctl        = compat_ptr_ioctl,
        .fasync         = usbtmc_fasync,
        .poll           = usbtmc_poll,
        .llseek                = default_llseek,
};

static struct usb_class_driver usbtmc_class = {
        .name =                "usbtmc%d",
        .fops =                &fops,
        .minor_base =        USBTMC_MINOR_BASE,
};

static void usbtmc_interrupt(struct urb *urb)
{
        struct usbtmc_device_data *data = urb->context;
        struct device *dev = &data->intf->dev;
        int status = urb->status;
        int rv;

        dev_dbg(&data->intf->dev, "int status: %d len %d\n",
                status, urb->actual_length);

        switch (status) {
        case 0: /* SUCCESS */
                /* check for valid STB notification */
                if (data->iin_buffer[0] > 0x81) {
                        data->bNotify1 = data->iin_buffer[0];
                        data->bNotify2 = data->iin_buffer[1];
                        atomic_set(&data->iin_data_valid, 1);
                        wake_up_interruptible(&data->waitq);
                        goto exit;
                }
                /* check for SRQ notification */
                if (data->iin_buffer[0] == 0x81) {
                        unsigned long flags;
                        struct list_head *elem;

                        if (data->fasync)
                                kill_fasync(&data->fasync,
                                        SIGIO, POLL_PRI);

                        spin_lock_irqsave(&data->dev_lock, flags);
                        list_for_each(elem, &data->file_list) {
                                struct usbtmc_file_data *file_data;

                                file_data = list_entry(elem,
                                                       struct usbtmc_file_data,
                                                       file_elem);
                                file_data->srq_byte = data->iin_buffer[1];
                                atomic_set(&file_data->srq_asserted, 1);
                        }
                        spin_unlock_irqrestore(&data->dev_lock, flags);

                        dev_dbg(dev, "srq received bTag %x stb %x\n",
                                (unsigned int)data->iin_buffer[0],
                                (unsigned int)data->iin_buffer[1]);
                        wake_up_interruptible_all(&data->waitq);
                        goto exit;
                }
                dev_warn(dev, "invalid notification: %x\n",
                         data->iin_buffer[0]);
                break;
        case -EOVERFLOW:
                dev_err(dev, "overflow with length %d, actual length is %d\n",
                        data->iin_wMaxPacketSize, urb->actual_length);
                fallthrough;
        default:
                /* urb terminated, clean up */
                dev_dbg(dev, "urb terminated, status: %d\n", status);
                return;
        }
exit:
        rv = usb_submit_urb(urb, GFP_ATOMIC);
        if (rv)
                dev_err(dev, "usb_submit_urb failed: %d\n", rv);
}

static void usbtmc_free_int(struct usbtmc_device_data *data)
{
        if (!data->iin_ep_present || !data->iin_urb)
                return;
        usb_kill_urb(data->iin_urb);
        kfree(data->iin_buffer);
        data->iin_buffer = NULL;
        usb_free_urb(data->iin_urb);
        data->iin_urb = NULL;
        kref_put(&data->kref, usbtmc_delete);
}

static int usbtmc_probe(struct usb_interface *intf,
                        const struct usb_device_id *id)
{
        struct usbtmc_device_data *data;
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in;
        int retcode;

        dev_dbg(&intf->dev, "%s called\n", __func__);

        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        data->intf = intf;
        data->id = id;
        data->usb_dev = usb_get_dev(interface_to_usbdev(intf));
        usb_set_intfdata(intf, data);
        kref_init(&data->kref);
        mutex_init(&data->io_mutex);
        init_waitqueue_head(&data->waitq);
        atomic_set(&data->iin_data_valid, 0);
        INIT_LIST_HEAD(&data->file_list);
        spin_lock_init(&data->dev_lock);

        data->zombie = 0;

        /* Initialize USBTMC bTag and other fields */
        data->bTag        = 1;
        /*  2 <= bTag <= 127   USBTMC-USB488 subclass specification 4.3.1 */
        data->iin_bTag = 2;

        /* USBTMC devices have only one setting, so use that */
        iface_desc = data->intf->cur_altsetting;
        data->ifnum = iface_desc->desc.bInterfaceNumber;

        /* Find bulk endpoints */
        retcode = usb_find_common_endpoints(iface_desc,
                        &bulk_in, &bulk_out, NULL, NULL);
        if (retcode) {
                dev_err(&intf->dev, "bulk endpoints not found\n");
                goto err_put;
        }

        retcode = -EINVAL;
        data->bulk_in = bulk_in->bEndpointAddress;
        data->wMaxPacketSize = usb_endpoint_maxp(bulk_in);
        if (!data->wMaxPacketSize)
                goto err_put;
        dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in);

        data->bulk_out = bulk_out->bEndpointAddress;
        dev_dbg(&intf->dev, "Found Bulk out endpoint at %u\n", data->bulk_out);

        /* Find int endpoint */
        retcode = usb_find_int_in_endpoint(iface_desc, &int_in);
        if (!retcode) {
                data->iin_ep_present = 1;
                data->iin_ep = int_in->bEndpointAddress;
                data->iin_wMaxPacketSize = usb_endpoint_maxp(int_in);
                data->iin_interval = int_in->bInterval;
                dev_dbg(&intf->dev, "Found Int in endpoint at %u\n",
                                data->iin_ep);
        }

        retcode = get_capabilities(data);
        if (retcode)
                dev_err(&intf->dev, "can't read capabilities\n");

        if (data->iin_ep_present) {
                /* allocate int urb */
                data->iin_urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!data->iin_urb) {
                        retcode = -ENOMEM;
                        goto error_register;
                }

                /* Protect interrupt in endpoint data until iin_urb is freed */
                kref_get(&data->kref);

                /* allocate buffer for interrupt in */
                data->iin_buffer = kmalloc(data->iin_wMaxPacketSize,
                                        GFP_KERNEL);
                if (!data->iin_buffer) {
                        retcode = -ENOMEM;
                        goto error_register;
                }

                /* fill interrupt urb */
                usb_fill_int_urb(data->iin_urb, data->usb_dev,
                                usb_rcvintpipe(data->usb_dev, data->iin_ep),
                                data->iin_buffer, data->iin_wMaxPacketSize,
                                usbtmc_interrupt,
                                data, data->iin_interval);

                retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL);
                if (retcode) {
                        dev_err(&intf->dev, "Failed to submit iin_urb\n");
                        goto error_register;
                }
        }

        retcode = usb_register_dev(intf, &usbtmc_class);
        if (retcode) {
                dev_err(&intf->dev, "Not able to get a minor (base %u, slice default): %d\n",
                        USBTMC_MINOR_BASE,
                        retcode);
                goto error_register;
        }
        dev_dbg(&intf->dev, "Using minor number %d\n", intf->minor);

        return 0;

error_register:
        usbtmc_free_int(data);
err_put:
        kref_put(&data->kref, usbtmc_delete);
        return retcode;
}

static void usbtmc_disconnect(struct usb_interface *intf)
{
        struct usbtmc_device_data *data  = usb_get_intfdata(intf);
        struct list_head *elem;

        usb_deregister_dev(intf, &usbtmc_class);
        mutex_lock(&data->io_mutex);
        data->zombie = 1;
        wake_up_interruptible_all(&data->waitq);
        list_for_each(elem, &data->file_list) {
                struct usbtmc_file_data *file_data;

                file_data = list_entry(elem,
                                       struct usbtmc_file_data,
                                       file_elem);
                usb_kill_anchored_urbs(&file_data->submitted);
                usb_scuttle_anchored_urbs(&file_data->in_anchor);
        }
        mutex_unlock(&data->io_mutex);
        usbtmc_free_int(data);
        kref_put(&data->kref, usbtmc_delete);
}

static void usbtmc_draw_down(struct usbtmc_file_data *file_data)
{
        int time;

        time = usb_wait_anchor_empty_timeout(&file_data->submitted, 1000);
        if (!time)
                usb_kill_anchored_urbs(&file_data->submitted);
        usb_scuttle_anchored_urbs(&file_data->in_anchor);
}

static int usbtmc_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct usbtmc_device_data *data = usb_get_intfdata(intf);
        struct list_head *elem;

        if (!data)
                return 0;

        mutex_lock(&data->io_mutex);
        list_for_each(elem, &data->file_list) {
                struct usbtmc_file_data *file_data;

                file_data = list_entry(elem,
                                       struct usbtmc_file_data,
                                       file_elem);
                usbtmc_draw_down(file_data);
        }

        if (data->iin_ep_present && data->iin_urb)
                usb_kill_urb(data->iin_urb);

        mutex_unlock(&data->io_mutex);
        return 0;
}

static int usbtmc_resume(struct usb_interface *intf)
{
        struct usbtmc_device_data *data = usb_get_intfdata(intf);
        int retcode = 0;

        if (data->iin_ep_present && data->iin_urb)
                retcode = usb_submit_urb(data->iin_urb, GFP_KERNEL);
        if (retcode)
                dev_err(&intf->dev, "Failed to submit iin_urb\n");

        return retcode;
}

static int usbtmc_pre_reset(struct usb_interface *intf)
{
        struct usbtmc_device_data *data  = usb_get_intfdata(intf);
        struct list_head *elem;

        if (!data)
                return 0;

        mutex_lock(&data->io_mutex);

        list_for_each(elem, &data->file_list) {
                struct usbtmc_file_data *file_data;

                file_data = list_entry(elem,
                                       struct usbtmc_file_data,
                                       file_elem);
                usbtmc_ioctl_cancel_io(file_data);
        }

        return 0;
}

static int usbtmc_post_reset(struct usb_interface *intf)
{
        struct usbtmc_device_data *data  = usb_get_intfdata(intf);

        mutex_unlock(&data->io_mutex);

        return 0;
}

static struct usb_driver usbtmc_driver = {
        .name                = "usbtmc",
        .id_table        = usbtmc_devices,
        .probe                = usbtmc_probe,
        .disconnect        = usbtmc_disconnect,
        .suspend        = usbtmc_suspend,
        .resume                = usbtmc_resume,
        .pre_reset        = usbtmc_pre_reset,
        .post_reset        = usbtmc_post_reset,
        .dev_groups        = usbtmc_groups,
};

module_usb_driver(usbtmc_driver);

MODULE_LICENSE("GPL");








































































   85 




   82 































   50 




   52 






















    1 









    1 
















    1 









    1 
















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Christian Brauner <brauner@kernel.org> */

#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>

#include "internal.h"

/*
 * Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
 * never from raw values. These are just internal helpers.
 */
#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }

struct mnt_idmap {
        struct uid_gid_map uid_map;
        struct uid_gid_map gid_map;
        refcount_t count;
};

/*
 * Carries the initial idmapping of 0:0:4294967295 which is an identity
 * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
 * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
 */
struct mnt_idmap nop_mnt_idmap = {
        .count        = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL_GPL(nop_mnt_idmap);

/**
 * initial_idmapping - check whether this is the initial mapping
 * @ns: idmapping to check
 *
 * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
 * [...], 1000 to 1000 [...].
 *
 * Return: true if this is the initial mapping, false if not.
 */
static inline bool initial_idmapping(const struct user_namespace *ns)
{
        return ns == &init_user_ns;
}

/**
 * make_vfsuid - map a filesystem kuid according to an idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @kuid : kuid to be mapped
 *
 * Take a @kuid and remap it from @fs_userns into @idmap. Use this
 * function when preparing a @kuid to be reported to userspace.
 *
 * If initial_idmapping() determines that this is not an idmapped mount
 * we can simply return @kuid unchanged.
 * If initial_idmapping() tells us that the filesystem is not mounted with an
 * idmapping we know the value of @kuid won't change when calling
 * from_kuid() so we can simply retrieve the value via __kuid_val()
 * directly.
 *
 * Return: @kuid mapped according to @idmap.
 * If @kuid has no mapping in either @idmap or @fs_userns INVALID_UID is
 * returned.
 */

vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
                     struct user_namespace *fs_userns,
                     kuid_t kuid)
{
        uid_t uid;

        if (idmap == &nop_mnt_idmap)
                return VFSUIDT_INIT(kuid);
        if (initial_idmapping(fs_userns))
                uid = __kuid_val(kuid);
        else
                uid = from_kuid(fs_userns, kuid);
        if (uid == (uid_t)-1)
                return INVALID_VFSUID;
        return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
}
EXPORT_SYMBOL_GPL(make_vfsuid);

/**
 * make_vfsgid - map a filesystem kgid according to an idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @kgid : kgid to be mapped
 *
 * Take a @kgid and remap it from @fs_userns into @idmap. Use this
 * function when preparing a @kgid to be reported to userspace.
 *
 * If initial_idmapping() determines that this is not an idmapped mount
 * we can simply return @kgid unchanged.
 * If initial_idmapping() tells us that the filesystem is not mounted with an
 * idmapping we know the value of @kgid won't change when calling
 * from_kgid() so we can simply retrieve the value via __kgid_val()
 * directly.
 *
 * Return: @kgid mapped according to @idmap.
 * If @kgid has no mapping in either @idmap or @fs_userns INVALID_GID is
 * returned.
 */
vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
                     struct user_namespace *fs_userns, kgid_t kgid)
{
        gid_t gid;

        if (idmap == &nop_mnt_idmap)
                return VFSGIDT_INIT(kgid);
        if (initial_idmapping(fs_userns))
                gid = __kgid_val(kgid);
        else
                gid = from_kgid(fs_userns, kgid);
        if (gid == (gid_t)-1)
                return INVALID_VFSGID;
        return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
}
EXPORT_SYMBOL_GPL(make_vfsgid);

/**
 * from_vfsuid - map a vfsuid into the filesystem idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @vfsuid : vfsuid to be mapped
 *
 * Map @vfsuid into the filesystem idmapping. This function has to be used in
 * order to e.g. write @vfsuid to inode->i_uid.
 *
 * Return: @vfsuid mapped into the filesystem idmapping
 */
kuid_t from_vfsuid(struct mnt_idmap *idmap,
                   struct user_namespace *fs_userns, vfsuid_t vfsuid)
{
        uid_t uid;

        if (idmap == &nop_mnt_idmap)
                return AS_KUIDT(vfsuid);
        uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
        if (uid == (uid_t)-1)
                return INVALID_UID;
        if (initial_idmapping(fs_userns))
                return KUIDT_INIT(uid);
        return make_kuid(fs_userns, uid);
}
EXPORT_SYMBOL_GPL(from_vfsuid);

/**
 * from_vfsgid - map a vfsgid into the filesystem idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @vfsgid : vfsgid to be mapped
 *
 * Map @vfsgid into the filesystem idmapping. This function has to be used in
 * order to e.g. write @vfsgid to inode->i_gid.
 *
 * Return: @vfsgid mapped into the filesystem idmapping
 */
kgid_t from_vfsgid(struct mnt_idmap *idmap,
                   struct user_namespace *fs_userns, vfsgid_t vfsgid)
{
        gid_t gid;

        if (idmap == &nop_mnt_idmap)
                return AS_KGIDT(vfsgid);
        gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
        if (gid == (gid_t)-1)
                return INVALID_GID;
        if (initial_idmapping(fs_userns))
                return KGIDT_INIT(gid);
        return make_kgid(fs_userns, gid);
}
EXPORT_SYMBOL_GPL(from_vfsgid);

#ifdef CONFIG_MULTIUSER
/**
 * vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
 * @vfsgid: the mnt gid to match
 *
 * This function can be used to determine whether @vfsuid matches any of the
 * caller's groups.
 *
 * Return: 1 if vfsuid matches caller's groups, 0 if not.
 */
int vfsgid_in_group_p(vfsgid_t vfsgid)
{
        return in_group_p(AS_KGIDT(vfsgid));
}
#else
int vfsgid_in_group_p(vfsgid_t vfsgid)
{
        return 1;
}
#endif
EXPORT_SYMBOL_GPL(vfsgid_in_group_p);

static int copy_mnt_idmap(struct uid_gid_map *map_from,
                          struct uid_gid_map *map_to)
{
        struct uid_gid_extent *forward, *reverse;
        u32 nr_extents = READ_ONCE(map_from->nr_extents);
        /* Pairs with smp_wmb() when writing the idmapping. */
        smp_rmb();

        /*
         * Don't blindly copy @map_to into @map_from if nr_extents is
         * smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
         * read @nr_extents someone could have written an idmapping and
         * then we might end up with inconsistent data. So just don't do
         * anything at all.
         */
        if (nr_extents == 0)
                return -EINVAL;

        /*
         * Here we know that nr_extents is greater than zero which means
         * a map has been written. Since idmappings can't be changed
         * once they have been written we know that we can safely copy
         * from @map_to into @map_from.
         */

        if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
                *map_to = *map_from;
                return 0;
        }

        forward = kmemdup(map_from->forward,
                          nr_extents * sizeof(struct uid_gid_extent),
                          GFP_KERNEL_ACCOUNT);
        if (!forward)
                return -ENOMEM;

        reverse = kmemdup(map_from->reverse,
                          nr_extents * sizeof(struct uid_gid_extent),
                          GFP_KERNEL_ACCOUNT);
        if (!reverse) {
                kfree(forward);
                return -ENOMEM;
        }

        /*
         * The idmapping isn't exposed anywhere so we don't need to care
         * about ordering between extent pointers and @nr_extents
         * initialization.
         */
        map_to->forward = forward;
        map_to->reverse = reverse;
        map_to->nr_extents = nr_extents;
        return 0;
}

static void free_mnt_idmap(struct mnt_idmap *idmap)
{
        if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                kfree(idmap->uid_map.forward);
                kfree(idmap->uid_map.reverse);
        }
        if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                kfree(idmap->gid_map.forward);
                kfree(idmap->gid_map.reverse);
        }
        kfree(idmap);
}

struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
{
        struct mnt_idmap *idmap;
        int ret;

        idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
        if (!idmap)
                return ERR_PTR(-ENOMEM);

        refcount_set(&idmap->count, 1);
        ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
        if (!ret)
                ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
        if (ret) {
                free_mnt_idmap(idmap);
                idmap = ERR_PTR(ret);
        }
        return idmap;
}

/**
 * mnt_idmap_get - get a reference to an idmapping
 * @idmap: the idmap to bump the reference on
 *
 * If @idmap is not the @nop_mnt_idmap bump the reference count.
 *
 * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed.
 */
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
{
        if (idmap != &nop_mnt_idmap)
                refcount_inc(&idmap->count);

        return idmap;
}
EXPORT_SYMBOL_GPL(mnt_idmap_get);

/**
 * mnt_idmap_put - put a reference to an idmapping
 * @idmap: the idmap to put the reference on
 *
 * If this is a non-initial idmapping, put the reference count when a mount is
 * released and free it if we're the last user.
 */
void mnt_idmap_put(struct mnt_idmap *idmap)
{
        if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
                free_mnt_idmap(idmap);
}
EXPORT_SYMBOL_GPL(mnt_idmap_put);






























































































































































































































































































































































































































    2 




















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_APIC_H
#define _ASM_X86_APIC_H

#include <linux/cpumask.h>
#include <linux/static_call.h>

#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/apicdef.h>
#include <linux/atomic.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
#include <asm/io.h>

#define ARCH_APICTIMER_STOPS_ON_C3        1

/*
 * Debugging macros
 */
#define APIC_QUIET   0
#define APIC_VERBOSE 1
#define APIC_DEBUG   2

/* Macros for apic_extnmi which controls external NMI masking */
#define APIC_EXTNMI_BSP                0 /* Default */
#define APIC_EXTNMI_ALL                1
#define APIC_EXTNMI_NONE        2

/*
 * Define the default level of output to be very little
 * This can be turned up by using apic=verbose for more
 * information and apic=debug for _lots_ of information.
 * apic_verbosity is defined in apic.c
 */
#define apic_printk(v, s, a...) do {       \
                if ((v) <= apic_verbosity) \
                        printk(s, ##a);    \
        } while (0)


#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
extern void x86_32_probe_apic(void);
#else
static inline void x86_32_probe_apic(void) { }
#endif

extern u32 cpuid_to_apicid[];

#define CPU_ACPIID_INVALID        U32_MAX

#ifdef CONFIG_X86_LOCAL_APIC

extern int apic_verbosity;
extern int local_apic_timer_c2_ok;

extern bool apic_is_disabled;
extern unsigned int lapic_timer_period;

extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
        APIC_PIC,
        APIC_VIRTUAL_WIRE,
        APIC_VIRTUAL_WIRE_NO_CONFIG,
        APIC_SYMMETRIC_IO,
        APIC_SYMMETRIC_IO_NO_ROUTING
};

/*
 * With 82489DX we can't rely on apic feature bit
 * retrieved via cpuid but still have to deal with
 * such an apic chip so we assume that SMP configuration
 * is found from MP table (64bit case uses ACPI mostly
 * which set smp presence flag as well so we are safe
 * to use this helper too).
 */
static inline bool apic_from_smp_config(void)
{
        return smp_found_config && !apic_is_disabled;
}

/*
 * Basic functions accessing APICs.
 */
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif

static inline void native_apic_mem_write(u32 reg, u32 v)
{
        volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);

        alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
                       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
                       ASM_OUTPUT2("0" (v), "m" (*addr)));
}

static inline u32 native_apic_mem_read(u32 reg)
{
        return readl((void __iomem *)(APIC_BASE + reg));
}

static inline void native_apic_mem_eoi(void)
{
        native_apic_mem_write(APIC_EOI, APIC_EOI_ACK);
}

extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);

static inline bool apic_is_x2apic_enabled(void)
{
        u64 msr;

        if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
                return false;
        return msr & X2APIC_ENABLE;
}

extern void enable_IR_x2apic(void);

extern int get_physical_broadcast(void);

extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void apic_soft_disable(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void apic_intr_mode_select(void);
extern void apic_intr_mode_init(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern void lapic_update_tsc_freq(void);

#ifdef CONFIG_X86_64
static inline bool apic_force_enable(unsigned long addr)
{
        return false;
}
#else
extern bool apic_force_enable(unsigned long addr);
#endif

extern void apic_ap_setup(void);

/*
 * On 32bit this is mach-xxx local
 */
#ifdef CONFIG_X86_64
extern int apic_is_clustered_box(void);
#else
static inline int apic_is_clustered_box(void)
{
        return 0;
}
#endif

extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);

extern void apic_send_IPI_allbutself(unsigned int vector);

extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
extern void topology_register_boot_apic(u32 apic_id);
extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
extern void topology_hotunplug_apic(unsigned int cpu);
extern void topology_apply_cmdline_limits_early(void);
extern void topology_init_possible_cpus(void);
extern void topology_reset_possible_cpus_up(void);

#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok                1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
static inline void init_bsp_APIC(void) { }
static inline void apic_intr_mode_select(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
static inline bool apic_needs_pit(void) { return true; }
static inline void topology_apply_cmdline_limits_early(void) { }
static inline void topology_init_possible_cpus(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */

#ifdef CONFIG_X86_X2APIC
static inline void native_apic_msr_write(u32 reg, u32 v)
{
        if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
            reg == APIC_LVR)
                return;

        wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
}

static inline void native_apic_msr_eoi(void)
{
        __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
}

static inline u32 native_apic_msr_read(u32 reg)
{
        u64 msr;

        if (reg == APIC_DFR)
                return -1;

        rdmsrl(APIC_BASE_MSR + (reg >> 4), msr);
        return (u32)msr;
}

static inline void native_x2apic_icr_write(u32 low, u32 id)
{
        wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}

static inline u64 native_x2apic_icr_read(void)
{
        unsigned long val;

        rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
        return val;
}

extern int x2apic_mode;
extern int x2apic_phys;
extern void __init x2apic_set_max_apicid(u32 apicid);
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
        return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
}

#define x2apic_supported()        (boot_cpu_has(X86_FEATURE_X2APIC))
#else /* !CONFIG_X86_X2APIC */
static inline void x2apic_setup(void) { }
static inline int x2apic_enabled(void) { return 0; }
static inline u32 native_apic_msr_read(u32 reg) { BUG(); }
#define x2apic_mode                (0)
#define        x2apic_supported()        (0)
#endif /* !CONFIG_X86_X2APIC */
extern void __init check_x2apic(void);

struct irq_data;

/*
 * Copyright 2004 James Cleverdon, IBM.
 *
 * Generic APIC sub-arch data struct.
 *
 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
 * James Cleverdon.
 */
struct apic {
        /* Hotpath functions first */
        void        (*eoi)(void);
        void        (*native_eoi)(void);
        void        (*write)(u32 reg, u32 v);
        u32        (*read)(u32 reg);

        /* IPI related functions */
        void        (*wait_icr_idle)(void);
        u32        (*safe_wait_icr_idle)(void);

        void        (*send_IPI)(int cpu, int vector);
        void        (*send_IPI_mask)(const struct cpumask *mask, int vector);
        void        (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
        void        (*send_IPI_allbutself)(int vector);
        void        (*send_IPI_all)(int vector);
        void        (*send_IPI_self)(int vector);

        u32        disable_esr                : 1,
                dest_mode_logical        : 1,
                x2apic_set_max_apicid        : 1,
                nmi_to_offline_cpu        : 1;

        u32        (*calc_dest_apicid)(unsigned int cpu);

        /* ICR related functions */
        u64        (*icr_read)(void);
        void        (*icr_write)(u32 low, u32 high);

        /* The limit of the APIC ID space. */
        u32        max_apic_id;

        /* Probe, setup and smpboot functions */
        int        (*probe)(void);
        int        (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);

        void        (*init_apic_ldr)(void);
        u32        (*cpu_present_to_apicid)(int mps_cpu);

        u32        (*get_apic_id)(u32 id);

        /* wakeup_secondary_cpu */
        int        (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip);
        /* wakeup secondary CPU using 64-bit wakeup point */
        int        (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip);

        char        *name;
};

struct apic_override {
        void        (*eoi)(void);
        void        (*native_eoi)(void);
        void        (*write)(u32 reg, u32 v);
        u32        (*read)(u32 reg);
        void        (*send_IPI)(int cpu, int vector);
        void        (*send_IPI_mask)(const struct cpumask *mask, int vector);
        void        (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec);
        void        (*send_IPI_allbutself)(int vector);
        void        (*send_IPI_all)(int vector);
        void        (*send_IPI_self)(int vector);
        u64        (*icr_read)(void);
        void        (*icr_write)(u32 low, u32 high);
        int        (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip);
        int        (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip);
};

/*
 * Pointer to the local APIC driver in use on this system (there's
 * always just one such driver in use - the kernel decides via an
 * early probing process which one it picks - and then sticks to it):
 */
extern struct apic *apic;

/*
 * APIC drivers are probed based on how they are listed in the .apicdrivers
 * section. So the order is important and enforced by the ordering
 * of different apic driver files in the Makefile.
 *
 * For the files having two apic drivers, we use apic_drivers()
 * to enforce the order with in them.
 */
#define apic_driver(sym)                                        \
        static const struct apic *__apicdrivers_##sym __used                \
        __aligned(sizeof(struct apic *))                        \
        __section(".apicdrivers") = { &sym }

#define apic_drivers(sym1, sym2)                                        \
        static struct apic *__apicdrivers_##sym1##sym2[2] __used        \
        __aligned(sizeof(struct apic *))                                \
        __section(".apicdrivers") = { &sym1, &sym2 }

extern struct apic *__apicdrivers[], *__apicdrivers_end[];

/*
 * APIC functionality to boot other CPUs - only used on SMP:
 */
#ifdef CONFIG_SMP
extern int lapic_can_unplug_cpu(void);
#endif

#ifdef CONFIG_X86_LOCAL_APIC
extern struct apic_override __x86_apic_override;

void __init apic_setup_apic_calls(void);
void __init apic_install_driver(struct apic *driver);

#define apic_update_callback(_callback, _fn) {                                        \
                __x86_apic_override._callback = _fn;                                \
                apic->_callback = _fn;                                                \
                static_call_update(apic_call_##_callback, _fn);                        \
                pr_info("APIC: %s() replaced with %ps()\n", #_callback, _fn);        \
}

#define DECLARE_APIC_CALL(__cb)                                                        \
        DECLARE_STATIC_CALL(apic_call_##__cb, *apic->__cb)

DECLARE_APIC_CALL(eoi);
DECLARE_APIC_CALL(native_eoi);
DECLARE_APIC_CALL(icr_read);
DECLARE_APIC_CALL(icr_write);
DECLARE_APIC_CALL(read);
DECLARE_APIC_CALL(send_IPI);
DECLARE_APIC_CALL(send_IPI_mask);
DECLARE_APIC_CALL(send_IPI_mask_allbutself);
DECLARE_APIC_CALL(send_IPI_allbutself);
DECLARE_APIC_CALL(send_IPI_all);
DECLARE_APIC_CALL(send_IPI_self);
DECLARE_APIC_CALL(wait_icr_idle);
DECLARE_APIC_CALL(wakeup_secondary_cpu);
DECLARE_APIC_CALL(wakeup_secondary_cpu_64);
DECLARE_APIC_CALL(write);

static __always_inline u32 apic_read(u32 reg)
{
        return static_call(apic_call_read)(reg);
}

static __always_inline void apic_write(u32 reg, u32 val)
{
        static_call(apic_call_write)(reg, val);
}

static __always_inline void apic_eoi(void)
{
        static_call(apic_call_eoi)();
}

static __always_inline void apic_native_eoi(void)
{
        static_call(apic_call_native_eoi)();
}

static __always_inline u64 apic_icr_read(void)
{
        return static_call(apic_call_icr_read)();
}

static __always_inline void apic_icr_write(u32 low, u32 high)
{
        static_call(apic_call_icr_write)(low, high);
}

static __always_inline void __apic_send_IPI(int cpu, int vector)
{
        static_call(apic_call_send_IPI)(cpu, vector);
}

static __always_inline void __apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
        static_call_mod(apic_call_send_IPI_mask)(mask, vector);
}

static __always_inline void __apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
        static_call(apic_call_send_IPI_mask_allbutself)(mask, vector);
}

static __always_inline void __apic_send_IPI_allbutself(int vector)
{
        static_call(apic_call_send_IPI_allbutself)(vector);
}

static __always_inline void __apic_send_IPI_all(int vector)
{
        static_call(apic_call_send_IPI_all)(vector);
}

static __always_inline void __apic_send_IPI_self(int vector)
{
        static_call_mod(apic_call_send_IPI_self)(vector);
}

static __always_inline void apic_wait_icr_idle(void)
{
        static_call_cond(apic_call_wait_icr_idle)();
}

static __always_inline u32 safe_apic_wait_icr_idle(void)
{
        return apic->safe_wait_icr_idle ? apic->safe_wait_icr_idle() : 0;
}

static __always_inline bool apic_id_valid(u32 apic_id)
{
        return apic_id <= apic->max_apic_id;
}

#else /* CONFIG_X86_LOCAL_APIC */

static inline u32 apic_read(u32 reg) { return 0; }
static inline void apic_write(u32 reg, u32 val) { }
static inline void apic_eoi(void) { }
static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
static inline void apic_set_eoi_cb(void (*eoi)(void)) {}
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }

#define apic_update_callback(_callback, _fn) do { } while (0)

#endif /* CONFIG_X86_LOCAL_APIC */

extern void apic_ack_irq(struct irq_data *data);

static inline bool lapic_vector_set_in_irr(unsigned int vector)
{
        u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));

        return !!(irr & (1U << (vector % 32)));
}

/*
 * Warm reset vector position:
 */
#define TRAMPOLINE_PHYS_LOW                0x467
#define TRAMPOLINE_PHYS_HIGH                0x469

extern void generic_bigsmp_probe(void);

#ifdef CONFIG_X86_LOCAL_APIC

#include <asm/smp.h>

extern struct apic apic_noop;

static inline u32 read_apic_id(void)
{
        u32 reg = apic_read(APIC_ID);

        return apic->get_apic_id(reg);
}

#ifdef CONFIG_X86_64
typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip);
extern int default_acpi_madt_oem_check(char *, char *);
extern void x86_64_probe_apic(void);
#else
static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
static inline void x86_64_probe_apic(void) { }
#endif

extern int default_apic_id_valid(u32 apicid);

extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);

extern u32 default_cpu_present_to_apicid(int mps_cpu);

void apic_send_nmi_to_offline_cpu(unsigned int cpu);

#else /* CONFIG_X86_LOCAL_APIC */

static inline u32 read_apic_id(void) { return 0; }

#endif /* !CONFIG_X86_LOCAL_APIC */

#ifdef CONFIG_SMP
void apic_smt_update(void);
#else
static inline void apic_smt_update(void) { }
#endif

struct msi_msg;
struct irq_cfg;

extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
                                  bool dmar);

extern void ioapic_zap_locks(void);

#endif /* _ASM_X86_APIC_H */


























































































































































































































































































  422 


  421 












































































































































































































































































































































































































































































































































































































































































































































































































































































  299 














































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/timer.h>
#include <linux/acpi_pmtmr.h>
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/clocksource.h>
#include <linux/percpu.h>
#include <linux/timex.h>
#include <linux/static_key.h>
#include <linux/static_call.h>

#include <asm/hpet.h>
#include <asm/timer.h>
#include <asm/vgtod.h>
#include <asm/time.h>
#include <asm/delay.h>
#include <asm/hypervisor.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
#include <asm/apic.h>
#include <asm/intel-family.h>
#include <asm/i8259.h>
#include <asm/uv/uv.h>

unsigned int __read_mostly cpu_khz;        /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);

unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz);

#define KHZ        1000

/*
 * TSC can be unstable due to cpufreq or due to unsynced TSCs
 */
static int __read_mostly tsc_unstable;
static unsigned int __initdata tsc_early_khz;

static DEFINE_STATIC_KEY_FALSE(__use_tsc);

int tsc_clocksource_reliable;

static int __read_mostly tsc_force_recalibrate;

static u32 art_to_tsc_numerator;
static u32 art_to_tsc_denominator;
static u64 art_to_tsc_offset;
static bool have_art;

struct cyc2ns {
        struct cyc2ns_data data[2];        /*  0 + 2*16 = 32 */
        seqcount_latch_t   seq;                /* 32 + 4    = 36 */

}; /* fits one cacheline */

static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);

static int __init tsc_early_khz_setup(char *buf)
{
        return kstrtouint(buf, 0, &tsc_early_khz);
}
early_param("tsc_early_khz", tsc_early_khz_setup);

__always_inline void __cyc2ns_read(struct cyc2ns_data *data)
{
        int seq, idx;

        do {
                seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
                idx = seq & 1;

                data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
                data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
                data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);

        } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}

__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
        preempt_disable_notrace();
        __cyc2ns_read(data);
}

__always_inline void cyc2ns_read_end(void)
{
        preempt_enable_notrace();
}

/*
 * Accelerators for sched_clock()
 * convert from cycles(64bits) => nanoseconds (64bits)
 *  basic equation:
 *              ns = cycles / (freq / ns_per_sec)
 *              ns = cycles * (ns_per_sec / freq)
 *              ns = cycles * (10^9 / (cpu_khz * 10^3))
 *              ns = cycles * (10^6 / cpu_khz)
 *
 *      Then we use scaling math (suggested by george@mvista.com) to get:
 *              ns = cycles * (10^6 * SC / cpu_khz) / SC
 *              ns = cycles * cyc2ns_scale / SC
 *
 *      And since SC is a constant power of two, we can convert the div
 *  into a shift. The larger SC is, the more accurate the conversion, but
 *  cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
 *  (64-bit result) can be used.
 *
 *  We can use khz divisor instead of mhz to keep a better precision.
 *  (mathieu.desnoyers@polymtl.ca)
 *
 *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
 */

static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
        struct cyc2ns_data data;
        unsigned long long ns;

        __cyc2ns_read(&data);

        ns = data.cyc2ns_offset;
        ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);

        return ns;
}

static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
        unsigned long long ns;
        preempt_disable_notrace();
        ns = __cycles_2_ns(cyc);
        preempt_enable_notrace();
        return ns;
}

static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
        unsigned long long ns_now;
        struct cyc2ns_data data;
        struct cyc2ns *c2n;

        ns_now = cycles_2_ns(tsc_now);

        /*
         * Compute a new multiplier as per the above comment and ensure our
         * time function is continuous; see the comment near struct
         * cyc2ns_data.
         */
        clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
                               NSEC_PER_MSEC, 0);

        /*
         * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
         * not expected to be greater than 31 due to the original published
         * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
         * value) - refer perf_event_mmap_page documentation in perf_event.h.
         */
        if (data.cyc2ns_shift == 32) {
                data.cyc2ns_shift = 31;
                data.cyc2ns_mul >>= 1;
        }

        data.cyc2ns_offset = ns_now -
                mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);

        c2n = per_cpu_ptr(&cyc2ns, cpu);

        raw_write_seqcount_latch(&c2n->seq);
        c2n->data[0] = data;
        raw_write_seqcount_latch(&c2n->seq);
        c2n->data[1] = data;
}

static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
        unsigned long flags;

        local_irq_save(flags);
        sched_clock_idle_sleep_event();

        if (khz)
                __set_cyc2ns_scale(khz, cpu, tsc_now);

        sched_clock_idle_wakeup_event();
        local_irq_restore(flags);
}

/*
 * Initialize cyc2ns for boot cpu
 */
static void __init cyc2ns_init_boot_cpu(void)
{
        struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);

        seqcount_latch_init(&c2n->seq);
        __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}

/*
 * Secondary CPUs do not run through tsc_init(), so set up
 * all the scale factors for all CPUs, assuming the same
 * speed as the bootup CPU.
 */
static void __init cyc2ns_init_secondary_cpus(void)
{
        unsigned int cpu, this_cpu = smp_processor_id();
        struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
        struct cyc2ns_data *data = c2n->data;

        for_each_possible_cpu(cpu) {
                if (cpu != this_cpu) {
                        seqcount_latch_init(&c2n->seq);
                        c2n = per_cpu_ptr(&cyc2ns, cpu);
                        c2n->data[0] = data[0];
                        c2n->data[1] = data[1];
                }
        }
}

/*
 * Scheduler clock - returns current time in nanosec units.
 */
noinstr u64 native_sched_clock(void)
{
        if (static_branch_likely(&__use_tsc)) {
                u64 tsc_now = rdtsc();

                /* return the value in ns */
                return __cycles_2_ns(tsc_now);
        }

        /*
         * Fall back to jiffies if there's no TSC available:
         * ( But note that we still use it if the TSC is marked
         *   unstable. We do this because unlike Time Of Day,
         *   the scheduler clock tolerates small errors and it's
         *   very important for it to be as fast as the platform
         *   can achieve it. )
         */

        /* No locking but a rare wrong value is not a big deal: */
        return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
}

/*
 * Generate a sched_clock if you already have a TSC value.
 */
u64 native_sched_clock_from_tsc(u64 tsc)
{
        return cycles_2_ns(tsc);
}

/* We need to define a real function for sched_clock, to override the
   weak default version */
#ifdef CONFIG_PARAVIRT
noinstr u64 sched_clock_noinstr(void)
{
        return paravirt_sched_clock();
}

bool using_native_sched_clock(void)
{
        return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock")));

bool using_native_sched_clock(void) { return true; }
#endif

notrace u64 sched_clock(void)
{
        u64 now;
        preempt_disable_notrace();
        now = sched_clock_noinstr();
        preempt_enable_notrace();
        return now;
}

int check_tsc_unstable(void)
{
        return tsc_unstable;
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);

#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
        mark_tsc_unstable("boot parameter notsc");
        return 1;
}
#else
/*
 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
 * in cpu/common.c
 */
int __init notsc_setup(char *str)
{
        setup_clear_cpu_cap(X86_FEATURE_TSC);
        return 1;
}
#endif

__setup("notsc", notsc_setup);

static int no_sched_irq_time;
static int no_tsc_watchdog;
static int tsc_as_watchdog;

static int __init tsc_setup(char *str)
{
        if (!strcmp(str, "reliable"))
                tsc_clocksource_reliable = 1;
        if (!strncmp(str, "noirqtime", 9))
                no_sched_irq_time = 1;
        if (!strcmp(str, "unstable"))
                mark_tsc_unstable("boot parameter");
        if (!strcmp(str, "nowatchdog")) {
                no_tsc_watchdog = 1;
                if (tsc_as_watchdog)
                        pr_alert("%s: Overriding earlier tsc=watchdog with tsc=nowatchdog\n",
                                 __func__);
                tsc_as_watchdog = 0;
        }
        if (!strcmp(str, "recalibrate"))
                tsc_force_recalibrate = 1;
        if (!strcmp(str, "watchdog")) {
                if (no_tsc_watchdog)
                        pr_alert("%s: tsc=watchdog overridden by earlier tsc=nowatchdog\n",
                                 __func__);
                else
                        tsc_as_watchdog = 1;
        }
        return 1;
}

__setup("tsc=", tsc_setup);

#define MAX_RETRIES                5
#define TSC_DEFAULT_THRESHOLD        0x20000

/*
 * Read TSC and the reference counters. Take care of any disturbances
 */
static u64 tsc_read_refs(u64 *p, int hpet)
{
        u64 t1, t2;
        u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
        int i;

        for (i = 0; i < MAX_RETRIES; i++) {
                t1 = get_cycles();
                if (hpet)
                        *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
                else
                        *p = acpi_pm_read_early();
                t2 = get_cycles();
                if ((t2 - t1) < thresh)
                        return t2;
        }
        return ULLONG_MAX;
}

/*
 * Calculate the TSC frequency from HPET reference
 */
static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
{
        u64 tmp;

        if (hpet2 < hpet1)
                hpet2 += 0x100000000ULL;
        hpet2 -= hpet1;
        tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
        do_div(tmp, 1000000);
        deltatsc = div64_u64(deltatsc, tmp);

        return (unsigned long) deltatsc;
}

/*
 * Calculate the TSC frequency from PMTimer reference
 */
static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
{
        u64 tmp;

        if (!pm1 && !pm2)
                return ULONG_MAX;

        if (pm2 < pm1)
                pm2 += (u64)ACPI_PM_OVRRUN;
        pm2 -= pm1;
        tmp = pm2 * 1000000000LL;
        do_div(tmp, PMTMR_TICKS_PER_SEC);
        do_div(deltatsc, tmp);

        return (unsigned long) deltatsc;
}

#define CAL_MS                10
#define CAL_LATCH        (PIT_TICK_RATE / (1000 / CAL_MS))
#define CAL_PIT_LOOPS        1000

#define CAL2_MS                50
#define CAL2_LATCH        (PIT_TICK_RATE / (1000 / CAL2_MS))
#define CAL2_PIT_LOOPS        5000


/*
 * Try to calibrate the TSC against the Programmable
 * Interrupt Timer and return the frequency of the TSC
 * in kHz.
 *
 * Return ULONG_MAX on failure to calibrate.
 */
static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
{
        u64 tsc, t1, t2, delta;
        unsigned long tscmin, tscmax;
        int pitcnt;

        if (!has_legacy_pic()) {
                /*
                 * Relies on tsc_early_delay_calibrate() to have given us semi
                 * usable udelay(), wait for the same 50ms we would have with
                 * the PIT loop below.
                 */
                udelay(10 * USEC_PER_MSEC);
                udelay(10 * USEC_PER_MSEC);
                udelay(10 * USEC_PER_MSEC);
                udelay(10 * USEC_PER_MSEC);
                udelay(10 * USEC_PER_MSEC);
                return ULONG_MAX;
        }

        /* Set the Gate high, disable speaker */
        outb((inb(0x61) & ~0x02) | 0x01, 0x61);

        /*
         * Setup CTC channel 2* for mode 0, (interrupt on terminal
         * count mode), binary count. Set the latch register to 50ms
         * (LSB then MSB) to begin countdown.
         */
        outb(0xb0, 0x43);
        outb(latch & 0xff, 0x42);
        outb(latch >> 8, 0x42);

        tsc = t1 = t2 = get_cycles();

        pitcnt = 0;
        tscmax = 0;
        tscmin = ULONG_MAX;
        while ((inb(0x61) & 0x20) == 0) {
                t2 = get_cycles();
                delta = t2 - tsc;
                tsc = t2;
                if ((unsigned long) delta < tscmin)
                        tscmin = (unsigned int) delta;
                if ((unsigned long) delta > tscmax)
                        tscmax = (unsigned int) delta;
                pitcnt++;
        }

        /*
         * Sanity checks:
         *
         * If we were not able to read the PIT more than loopmin
         * times, then we have been hit by a massive SMI
         *
         * If the maximum is 10 times larger than the minimum,
         * then we got hit by an SMI as well.
         */
        if (pitcnt < loopmin || tscmax > 10 * tscmin)
                return ULONG_MAX;

        /* Calculate the PIT value */
        delta = t2 - t1;
        do_div(delta, ms);
        return delta;
}

/*
 * This reads the current MSB of the PIT counter, and
 * checks if we are running on sufficiently fast and
 * non-virtualized hardware.
 *
 * Our expectations are:
 *
 *  - the PIT is running at roughly 1.19MHz
 *
 *  - each IO is going to take about 1us on real hardware,
 *    but we allow it to be much faster (by a factor of 10) or
 *    _slightly_ slower (ie we allow up to a 2us read+counter
 *    update - anything else implies a unacceptably slow CPU
 *    or PIT for the fast calibration to work.
 *
 *  - with 256 PIT ticks to read the value, we have 214us to
 *    see the same MSB (and overhead like doing a single TSC
 *    read per MSB value etc).
 *
 *  - We're doing 2 reads per loop (LSB, MSB), and we expect
 *    them each to take about a microsecond on real hardware.
 *    So we expect a count value of around 100. But we'll be
 *    generous, and accept anything over 50.
 *
 *  - if the PIT is stuck, and we see *many* more reads, we
 *    return early (and the next caller of pit_expect_msb()
 *    then consider it a failure when they don't see the
 *    next expected value).
 *
 * These expectations mean that we know that we have seen the
 * transition from one expected value to another with a fairly
 * high accuracy, and we didn't miss any events. We can thus
 * use the TSC value at the transitions to calculate a pretty
 * good value for the TSC frequency.
 */
static inline int pit_verify_msb(unsigned char val)
{
        /* Ignore LSB */
        inb(0x42);
        return inb(0x42) == val;
}

static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
{
        int count;
        u64 tsc = 0, prev_tsc = 0;

        for (count = 0; count < 50000; count++) {
                if (!pit_verify_msb(val))
                        break;
                prev_tsc = tsc;
                tsc = get_cycles();
        }
        *deltap = get_cycles() - prev_tsc;
        *tscp = tsc;

        /*
         * We require _some_ success, but the quality control
         * will be based on the error terms on the TSC values.
         */
        return count > 5;
}

/*
 * How many MSB values do we want to see? We aim for
 * a maximum error rate of 500ppm (in practice the
 * real error is much smaller), but refuse to spend
 * more than 50ms on it.
 */
#define MAX_QUICK_PIT_MS 50
#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)

static unsigned long quick_pit_calibrate(void)
{
        int i;
        u64 tsc, delta;
        unsigned long d1, d2;

        if (!has_legacy_pic())
                return 0;

        /* Set the Gate high, disable speaker */
        outb((inb(0x61) & ~0x02) | 0x01, 0x61);

        /*
         * Counter 2, mode 0 (one-shot), binary count
         *
         * NOTE! Mode 2 decrements by two (and then the
         * output is flipped each time, giving the same
         * final output frequency as a decrement-by-one),
         * so mode 0 is much better when looking at the
         * individual counts.
         */
        outb(0xb0, 0x43);

        /* Start at 0xffff */
        outb(0xff, 0x42);
        outb(0xff, 0x42);

        /*
         * The PIT starts counting at the next edge, so we
         * need to delay for a microsecond. The easiest way
         * to do that is to just read back the 16-bit counter
         * once from the PIT.
         */
        pit_verify_msb(0);

        if (pit_expect_msb(0xff, &tsc, &d1)) {
                for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
                        if (!pit_expect_msb(0xff-i, &delta, &d2))
                                break;

                        delta -= tsc;

                        /*
                         * Extrapolate the error and fail fast if the error will
                         * never be below 500 ppm.
                         */
                        if (i == 1 &&
                            d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
                                return 0;

                        /*
                         * Iterate until the error is less than 500 ppm
                         */
                        if (d1+d2 >= delta >> 11)
                                continue;

                        /*
                         * Check the PIT one more time to verify that
                         * all TSC reads were stable wrt the PIT.
                         *
                         * This also guarantees serialization of the
                         * last cycle read ('d2') in pit_expect_msb.
                         */
                        if (!pit_verify_msb(0xfe - i))
                                break;
                        goto success;
                }
        }
        pr_info("Fast TSC calibration failed\n");
        return 0;

success:
        /*
         * Ok, if we get here, then we've seen the
         * MSB of the PIT decrement 'i' times, and the
         * error has shrunk to less than 500 ppm.
         *
         * As a result, we can depend on there not being
         * any odd delays anywhere, and the TSC reads are
         * reliable (within the error).
         *
         * kHz = ticks / time-in-seconds / 1000;
         * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
         * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
         */
        delta *= PIT_TICK_RATE;
        do_div(delta, i*256*1000);
        pr_info("Fast TSC calibration using PIT\n");
        return delta;
}

/**
 * native_calibrate_tsc - determine TSC frequency
 * Determine TSC frequency via CPUID, else return 0.
 */
unsigned long native_calibrate_tsc(void)
{
        unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
        unsigned int crystal_khz;

        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
                return 0;

        if (boot_cpu_data.cpuid_level < 0x15)
                return 0;

        eax_denominator = ebx_numerator = ecx_hz = edx = 0;

        /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
        cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);

        if (ebx_numerator == 0 || eax_denominator == 0)
                return 0;

        crystal_khz = ecx_hz / 1000;

        /*
         * Denverton SoCs don't report crystal clock, and also don't support
         * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
         * clock.
         */
        if (crystal_khz == 0 &&
                        boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
                crystal_khz = 25000;

        /*
         * TSC frequency reported directly by CPUID is a "hardware reported"
         * frequency and is the most accurate one so far we have. This
         * is considered a known frequency.
         */
        if (crystal_khz != 0)
                setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);

        /*
         * Some Intel SoCs like Skylake and Kabylake don't report the crystal
         * clock, but we can easily calculate it to a high degree of accuracy
         * by considering the crystal ratio and the CPU speed.
         */
        if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
                unsigned int eax_base_mhz, ebx, ecx, edx;

                cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
                crystal_khz = eax_base_mhz * 1000 *
                        eax_denominator / ebx_numerator;
        }

        if (crystal_khz == 0)
                return 0;

        /*
         * For Atom SoCs TSC is the only reliable clocksource.
         * Mark TSC reliable so no watchdog on it.
         */
        if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
                setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);

#ifdef CONFIG_X86_LOCAL_APIC
        /*
         * The local APIC appears to be fed by the core crystal clock
         * (which sounds entirely sensible). We can set the global
         * lapic_timer_period here to avoid having to calibrate the APIC
         * timer later.
         */
        lapic_timer_period = crystal_khz * 1000 / HZ;
#endif

        return crystal_khz * ebx_numerator / eax_denominator;
}

static unsigned long cpu_khz_from_cpuid(void)
{
        unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;

        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
                return 0;

        if (boot_cpu_data.cpuid_level < 0x16)
                return 0;

        eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;

        cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);

        return eax_base_mhz * 1000;
}

/*
 * calibrate cpu using pit, hpet, and ptimer methods. They are available
 * later in boot after acpi is initialized.
 */
static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
{
        u64 tsc1, tsc2, delta, ref1, ref2;
        unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
        unsigned long flags, latch, ms;
        int hpet = is_hpet_enabled(), i, loopmin;

        /*
         * Run 5 calibration loops to get the lowest frequency value
         * (the best estimate). We use two different calibration modes
         * here:
         *
         * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
         * load a timeout of 50ms. We read the time right after we
         * started the timer and wait until the PIT count down reaches
         * zero. In each wait loop iteration we read the TSC and check
         * the delta to the previous read. We keep track of the min
         * and max values of that delta. The delta is mostly defined
         * by the IO time of the PIT access, so we can detect when
         * any disturbance happened between the two reads. If the
         * maximum time is significantly larger than the minimum time,
         * then we discard the result and have another try.
         *
         * 2) Reference counter. If available we use the HPET or the
         * PMTIMER as a reference to check the sanity of that value.
         * We use separate TSC readouts and check inside of the
         * reference read for any possible disturbance. We discard
         * disturbed values here as well. We do that around the PIT
         * calibration delay loop as we have to wait for a certain
         * amount of time anyway.
         */

        /* Preset PIT loop values */
        latch = CAL_LATCH;
        ms = CAL_MS;
        loopmin = CAL_PIT_LOOPS;

        for (i = 0; i < 3; i++) {
                unsigned long tsc_pit_khz;

                /*
                 * Read the start value and the reference count of
                 * hpet/pmtimer when available. Then do the PIT
                 * calibration, which will take at least 50ms, and
                 * read the end value.
                 */
                local_irq_save(flags);
                tsc1 = tsc_read_refs(&ref1, hpet);
                tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
                tsc2 = tsc_read_refs(&ref2, hpet);
                local_irq_restore(flags);

                /* Pick the lowest PIT TSC calibration so far */
                tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);

                /* hpet or pmtimer available ? */
                if (ref1 == ref2)
                        continue;

                /* Check, whether the sampling was disturbed */
                if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
                        continue;

                tsc2 = (tsc2 - tsc1) * 1000000LL;
                if (hpet)
                        tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
                else
                        tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);

                tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);

                /* Check the reference deviation */
                delta = ((u64) tsc_pit_min) * 100;
                do_div(delta, tsc_ref_min);

                /*
                 * If both calibration results are inside a 10% window
                 * then we can be sure, that the calibration
                 * succeeded. We break out of the loop right away. We
                 * use the reference value, as it is more precise.
                 */
                if (delta >= 90 && delta <= 110) {
                        pr_info("PIT calibration matches %s. %d loops\n",
                                hpet ? "HPET" : "PMTIMER", i + 1);
                        return tsc_ref_min;
                }

                /*
                 * Check whether PIT failed more than once. This
                 * happens in virtualized environments. We need to
                 * give the virtual PC a slightly longer timeframe for
                 * the HPET/PMTIMER to make the result precise.
                 */
                if (i == 1 && tsc_pit_min == ULONG_MAX) {
                        latch = CAL2_LATCH;
                        ms = CAL2_MS;
                        loopmin = CAL2_PIT_LOOPS;
                }
        }

        /*
         * Now check the results.
         */
        if (tsc_pit_min == ULONG_MAX) {
                /* PIT gave no useful value */
                pr_warn("Unable to calibrate against PIT\n");

                /* We don't have an alternative source, disable TSC */
                if (!hpet && !ref1 && !ref2) {
                        pr_notice("No reference (HPET/PMTIMER) available\n");
                        return 0;
                }

                /* The alternative source failed as well, disable TSC */
                if (tsc_ref_min == ULONG_MAX) {
                        pr_warn("HPET/PMTIMER calibration failed\n");
                        return 0;
                }

                /* Use the alternative source */
                pr_info("using %s reference calibration\n",
                        hpet ? "HPET" : "PMTIMER");

                return tsc_ref_min;
        }

        /* We don't have an alternative source, use the PIT calibration value */
        if (!hpet && !ref1 && !ref2) {
                pr_info("Using PIT calibration value\n");
                return tsc_pit_min;
        }

        /* The alternative source failed, use the PIT calibration value */
        if (tsc_ref_min == ULONG_MAX) {
                pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
                return tsc_pit_min;
        }

        /*
         * The calibration values differ too much. In doubt, we use
         * the PIT value as we know that there are PMTIMERs around
         * running at double speed. At least we let the user know:
         */
        pr_warn("PIT calibration deviates from %s: %lu %lu\n",
                hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
        pr_info("Using PIT calibration value\n");
        return tsc_pit_min;
}

/**
 * native_calibrate_cpu_early - can calibrate the cpu early in boot
 */
unsigned long native_calibrate_cpu_early(void)
{
        unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();

        if (!fast_calibrate)
                fast_calibrate = cpu_khz_from_msr();
        if (!fast_calibrate) {
                local_irq_save(flags);
                fast_calibrate = quick_pit_calibrate();
                local_irq_restore(flags);
        }
        return fast_calibrate;
}


/**
 * native_calibrate_cpu - calibrate the cpu
 */
static unsigned long native_calibrate_cpu(void)
{
        unsigned long tsc_freq = native_calibrate_cpu_early();

        if (!tsc_freq)
                tsc_freq = pit_hpet_ptimer_calibrate_cpu();

        return tsc_freq;
}

void recalibrate_cpu_khz(void)
{
#ifndef CONFIG_SMP
        unsigned long cpu_khz_old = cpu_khz;

        if (!boot_cpu_has(X86_FEATURE_TSC))
                return;

        cpu_khz = x86_platform.calibrate_cpu();
        tsc_khz = x86_platform.calibrate_tsc();
        if (tsc_khz == 0)
                tsc_khz = cpu_khz;
        else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
                cpu_khz = tsc_khz;
        cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
                                                    cpu_khz_old, cpu_khz);
#endif
}
EXPORT_SYMBOL_GPL(recalibrate_cpu_khz);


static unsigned long long cyc2ns_suspend;

void tsc_save_sched_clock_state(void)
{
        if (!sched_clock_stable())
                return;

        cyc2ns_suspend = sched_clock();
}

/*
 * Even on processors with invariant TSC, TSC gets reset in some the
 * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
 * arbitrary value (still sync'd across cpu's) during resume from such sleep
 * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
 * that sched_clock() continues from the point where it was left off during
 * suspend.
 */
void tsc_restore_sched_clock_state(void)
{
        unsigned long long offset;
        unsigned long flags;
        int cpu;

        if (!sched_clock_stable())
                return;

        local_irq_save(flags);

        /*
         * We're coming out of suspend, there's no concurrency yet; don't
         * bother being nice about the RCU stuff, just write to both
         * data fields.
         */

        this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
        this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);

        offset = cyc2ns_suspend - sched_clock();

        for_each_possible_cpu(cpu) {
                per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
                per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
        }

        local_irq_restore(flags);
}

#ifdef CONFIG_CPU_FREQ
/*
 * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
 * changes.
 *
 * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
 * as unstable and give up in those cases.
 *
 * Should fix up last_tsc too. Currently gettimeofday in the
 * first tick after the change will be slightly wrong.
 */

static unsigned int  ref_freq;
static unsigned long loops_per_jiffy_ref;
static unsigned long tsc_khz_ref;

static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                void *data)
{
        struct cpufreq_freqs *freq = data;

        if (num_online_cpus() > 1) {
                mark_tsc_unstable("cpufreq changes on SMP");
                return 0;
        }

        if (!ref_freq) {
                ref_freq = freq->old;
                loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
                tsc_khz_ref = tsc_khz;
        }

        if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
            (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
                boot_cpu_data.loops_per_jiffy =
                        cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);

                tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
                if (!(freq->flags & CPUFREQ_CONST_LOOPS))
                        mark_tsc_unstable("cpufreq changes");

                set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
        }

        return 0;
}

static struct notifier_block time_cpufreq_notifier_block = {
        .notifier_call  = time_cpufreq_notifier
};

static int __init cpufreq_register_tsc_scaling(void)
{
        if (!boot_cpu_has(X86_FEATURE_TSC))
                return 0;
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                return 0;
        cpufreq_register_notifier(&time_cpufreq_notifier_block,
                                CPUFREQ_TRANSITION_NOTIFIER);
        return 0;
}

core_initcall(cpufreq_register_tsc_scaling);

#endif /* CONFIG_CPU_FREQ */

#define ART_CPUID_LEAF (0x15)
#define ART_MIN_DENOMINATOR (1)


/*
 * If ART is present detect the numerator:denominator to convert to TSC
 */
static void __init detect_art(void)
{
        unsigned int unused[2];

        if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
                return;

        /*
         * Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
         * and the TSC counter resets must not occur asynchronously.
         */
        if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
            !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
            !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
            tsc_async_resets)
                return;

        cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
              &art_to_tsc_numerator, unused, unused+1);

        if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
                return;

        rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);

        /* Make this sticky over multiple CPU init calls */
        setup_force_cpu_cap(X86_FEATURE_ART);
}


/* clocksource code */

static void tsc_resume(struct clocksource *cs)
{
        tsc_verify_tsc_adjust(true);
}

/*
 * We used to compare the TSC to the cycle_last value in the clocksource
 * structure to avoid a nasty time-warp. This can be observed in a
 * very small window right after one CPU updated cycle_last under
 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
 * is smaller than the cycle_last reference value due to a TSC which
 * is slightly behind. This delta is nowhere else observable, but in
 * that case it results in a forward time jump in the range of hours
 * due to the unsigned delta calculation of the time keeping core
 * code, which is necessary to support wrapping clocksources like pm
 * timer.
 *
 * This sanity check is now done in the core timekeeping code.
 * checking the result of read_tsc() - cycle_last for being negative.
 * That works because CLOCKSOURCE_MASK(64) does not mask out any bit.
 */
static u64 read_tsc(struct clocksource *cs)
{
        return (u64)rdtsc_ordered();
}

static void tsc_cs_mark_unstable(struct clocksource *cs)
{
        if (tsc_unstable)
                return;

        tsc_unstable = 1;
        if (using_native_sched_clock())
                clear_sched_clock_stable();
        disable_sched_clock_irqtime();
        pr_info("Marking TSC unstable due to clocksource watchdog\n");
}

static void tsc_cs_tick_stable(struct clocksource *cs)
{
        if (tsc_unstable)
                return;

        if (using_native_sched_clock())
                sched_clock_tick_stable();
}

static int tsc_cs_enable(struct clocksource *cs)
{
        vclocks_set_used(VDSO_CLOCKMODE_TSC);
        return 0;
}

/*
 * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
 */
static struct clocksource clocksource_tsc_early = {
        .name                        = "tsc-early",
        .rating                        = 299,
        .uncertainty_margin        = 32 * NSEC_PER_MSEC,
        .read                        = read_tsc,
        .mask                        = CLOCKSOURCE_MASK(64),
        .flags                        = CLOCK_SOURCE_IS_CONTINUOUS |
                                  CLOCK_SOURCE_MUST_VERIFY,
        .id                        = CSID_X86_TSC_EARLY,
        .vdso_clock_mode        = VDSO_CLOCKMODE_TSC,
        .enable                        = tsc_cs_enable,
        .resume                        = tsc_resume,
        .mark_unstable                = tsc_cs_mark_unstable,
        .tick_stable                = tsc_cs_tick_stable,
        .list                        = LIST_HEAD_INIT(clocksource_tsc_early.list),
};

/*
 * Must mark VALID_FOR_HRES early such that when we unregister tsc_early
 * this one will immediately take over. We will only register if TSC has
 * been found good.
 */
static struct clocksource clocksource_tsc = {
        .name                        = "tsc",
        .rating                        = 300,
        .read                        = read_tsc,
        .mask                        = CLOCKSOURCE_MASK(64),
        .flags                        = CLOCK_SOURCE_IS_CONTINUOUS |
                                  CLOCK_SOURCE_VALID_FOR_HRES |
                                  CLOCK_SOURCE_MUST_VERIFY |
                                  CLOCK_SOURCE_VERIFY_PERCPU,
        .id                        = CSID_X86_TSC,
        .vdso_clock_mode        = VDSO_CLOCKMODE_TSC,
        .enable                        = tsc_cs_enable,
        .resume                        = tsc_resume,
        .mark_unstable                = tsc_cs_mark_unstable,
        .tick_stable                = tsc_cs_tick_stable,
        .list                        = LIST_HEAD_INIT(clocksource_tsc.list),
};

void mark_tsc_unstable(char *reason)
{
        if (tsc_unstable)
                return;

        tsc_unstable = 1;
        if (using_native_sched_clock())
                clear_sched_clock_stable();
        disable_sched_clock_irqtime();
        pr_info("Marking TSC unstable due to %s\n", reason);

        clocksource_mark_unstable(&clocksource_tsc_early);
        clocksource_mark_unstable(&clocksource_tsc);
}

EXPORT_SYMBOL_GPL(mark_tsc_unstable);

static void __init tsc_disable_clocksource_watchdog(void)
{
        clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
        clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}

bool tsc_clocksource_watchdog_disabled(void)
{
        return !(clocksource_tsc.flags & CLOCK_SOURCE_MUST_VERIFY) &&
               tsc_as_watchdog && !no_tsc_watchdog;
}

static void __init check_system_tsc_reliable(void)
{
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
        if (is_geode_lx()) {
                /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
                unsigned long res_low, res_high;

                rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
                /* Geode_LX - the OLPC CPU has a very reliable TSC */
                if (res_low & RTSC_SUSP)
                        tsc_clocksource_reliable = 1;
        }
#endif
        if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
                tsc_clocksource_reliable = 1;

        /*
         * Disable the clocksource watchdog when the system has:
         *  - TSC running at constant frequency
         *  - TSC which does not stop in C-States
         *  - the TSC_ADJUST register which allows to detect even minimal
         *    modifications
         *  - not more than two sockets. As the number of sockets cannot be
         *    evaluated at the early boot stage where this has to be
         *    invoked, check the number of online memory nodes as a
         *    fallback solution which is an reasonable estimate.
         */
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
            boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
            boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
            nr_online_nodes <= 4)
                tsc_disable_clocksource_watchdog();
}

/*
 * Make an educated guess if the TSC is trustworthy and synchronized
 * over all CPUs.
 */
int unsynchronized_tsc(void)
{
        if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
                return 1;

#ifdef CONFIG_SMP
        if (apic_is_clustered_box())
                return 1;
#endif

        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                return 0;

        if (tsc_clocksource_reliable)
                return 0;
        /*
         * Intel systems are normally all synchronized.
         * Exceptions must mark TSC as unstable:
         */
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
                /* assume multi socket systems are not synchronized: */
                if (num_possible_cpus() > 1)
                        return 1;
        }

        return 0;
}

/*
 * Convert ART to TSC given numerator/denominator found in detect_art()
 */
struct system_counterval_t convert_art_to_tsc(u64 art)
{
        u64 tmp, res, rem;

        rem = do_div(art, art_to_tsc_denominator);

        res = art * art_to_tsc_numerator;
        tmp = rem * art_to_tsc_numerator;

        do_div(tmp, art_to_tsc_denominator);
        res += tmp + art_to_tsc_offset;

        return (struct system_counterval_t) {
                .cs_id        = have_art ? CSID_X86_TSC : CSID_GENERIC,
                .cycles        = res,
        };
}
EXPORT_SYMBOL(convert_art_to_tsc);

/**
 * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC.
 * @art_ns: ART (Always Running Timer) in unit of nanoseconds
 *
 * PTM requires all timestamps to be in units of nanoseconds. When user
 * software requests a cross-timestamp, this function converts system timestamp
 * to TSC.
 *
 * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set
 * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check
 * that this flag is set before conversion to TSC is attempted.
 *
 * Return:
 * struct system_counterval_t - system counter value with the ID of the
 *        corresponding clocksource:
 *        cycles:                System counter value
 *        cs_id:                The clocksource ID for validating comparability
 */

struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
{
        u64 tmp, res, rem;

        rem = do_div(art_ns, USEC_PER_SEC);

        res = art_ns * tsc_khz;
        tmp = rem * tsc_khz;

        do_div(tmp, USEC_PER_SEC);
        res += tmp;

        return (struct system_counterval_t) {
                .cs_id        = have_art ? CSID_X86_TSC : CSID_GENERIC,
                .cycles        = res,
        };
}
EXPORT_SYMBOL(convert_art_ns_to_tsc);


static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
/**
 * tsc_refine_calibration_work - Further refine tsc freq calibration
 * @work: ignored.
 *
 * This functions uses delayed work over a period of a
 * second to further refine the TSC freq value. Since this is
 * timer based, instead of loop based, we don't block the boot
 * process while this longer calibration is done.
 *
 * If there are any calibration anomalies (too many SMIs, etc),
 * or the refined calibration is off by 1% of the fast early
 * calibration, we throw out the new calibration and use the
 * early calibration.
 */
static void tsc_refine_calibration_work(struct work_struct *work)
{
        static u64 tsc_start = ULLONG_MAX, ref_start;
        static int hpet;
        u64 tsc_stop, ref_stop, delta;
        unsigned long freq;
        int cpu;

        /* Don't bother refining TSC on unstable systems */
        if (tsc_unstable)
                goto unreg;

        /*
         * Since the work is started early in boot, we may be
         * delayed the first time we expire. So set the workqueue
         * again once we know timers are working.
         */
        if (tsc_start == ULLONG_MAX) {
restart:
                /*
                 * Only set hpet once, to avoid mixing hardware
                 * if the hpet becomes enabled later.
                 */
                hpet = is_hpet_enabled();
                tsc_start = tsc_read_refs(&ref_start, hpet);
                schedule_delayed_work(&tsc_irqwork, HZ);
                return;
        }

        tsc_stop = tsc_read_refs(&ref_stop, hpet);

        /* hpet or pmtimer available ? */
        if (ref_start == ref_stop)
                goto out;

        /* Check, whether the sampling was disturbed */
        if (tsc_stop == ULLONG_MAX)
                goto restart;

        delta = tsc_stop - tsc_start;
        delta *= 1000000LL;
        if (hpet)
                freq = calc_hpet_ref(delta, ref_start, ref_stop);
        else
                freq = calc_pmtimer_ref(delta, ref_start, ref_stop);

        /* Will hit this only if tsc_force_recalibrate has been set */
        if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {

                /* Warn if the deviation exceeds 500 ppm */
                if (abs(tsc_khz - freq) > (tsc_khz >> 11)) {
                        pr_warn("Warning: TSC freq calibrated by CPUID/MSR differs from what is calibrated by HW timer, please check with vendor!!\n");
                        pr_info("Previous calibrated TSC freq:\t %lu.%03lu MHz\n",
                                (unsigned long)tsc_khz / 1000,
                                (unsigned long)tsc_khz % 1000);
                }

                pr_info("TSC freq recalibrated by [%s]:\t %lu.%03lu MHz\n",
                        hpet ? "HPET" : "PM_TIMER",
                        (unsigned long)freq / 1000,
                        (unsigned long)freq % 1000);

                return;
        }

        /* Make sure we're within 1% */
        if (abs(tsc_khz - freq) > tsc_khz/100)
                goto out;

        tsc_khz = freq;
        pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
                (unsigned long)tsc_khz / 1000,
                (unsigned long)tsc_khz % 1000);

        /* Inform the TSC deadline clockevent devices about the recalibration */
        lapic_update_tsc_freq();

        /* Update the sched_clock() rate to match the clocksource one */
        for_each_possible_cpu(cpu)
                set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);

out:
        if (tsc_unstable)
                goto unreg;

        if (boot_cpu_has(X86_FEATURE_ART))
                have_art = true;
        clocksource_register_khz(&clocksource_tsc, tsc_khz);
unreg:
        clocksource_unregister(&clocksource_tsc_early);
}


static int __init init_tsc_clocksource(void)
{
        if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
                return 0;

        if (tsc_unstable) {
                clocksource_unregister(&clocksource_tsc_early);
                return 0;
        }

        if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
                clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;

        /*
         * When TSC frequency is known (retrieved via MSR or CPUID), we skip
         * the refined calibration and directly register it as a clocksource.
         */
        if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
                if (boot_cpu_has(X86_FEATURE_ART))
                        have_art = true;
                clocksource_register_khz(&clocksource_tsc, tsc_khz);
                clocksource_unregister(&clocksource_tsc_early);

                if (!tsc_force_recalibrate)
                        return 0;
        }

        schedule_delayed_work(&tsc_irqwork, 0);
        return 0;
}
/*
 * We use device_initcall here, to ensure we run after the hpet
 * is fully initialized, which may occur at fs_initcall time.
 */
device_initcall(init_tsc_clocksource);

static bool __init determine_cpu_tsc_frequencies(bool early)
{
        /* Make sure that cpu and tsc are not already calibrated */
        WARN_ON(cpu_khz || tsc_khz);

        if (early) {
                cpu_khz = x86_platform.calibrate_cpu();
                if (tsc_early_khz)
                        tsc_khz = tsc_early_khz;
                else
                        tsc_khz = x86_platform.calibrate_tsc();
        } else {
                /* We should not be here with non-native cpu calibration */
                WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
                cpu_khz = pit_hpet_ptimer_calibrate_cpu();
        }

        /*
         * Trust non-zero tsc_khz as authoritative,
         * and use it to sanity check cpu_khz,
         * which will be off if system timer is off.
         */
        if (tsc_khz == 0)
                tsc_khz = cpu_khz;
        else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
                cpu_khz = tsc_khz;

        if (tsc_khz == 0)
                return false;

        pr_info("Detected %lu.%03lu MHz processor\n",
                (unsigned long)cpu_khz / KHZ,
                (unsigned long)cpu_khz % KHZ);

        if (cpu_khz != tsc_khz) {
                pr_info("Detected %lu.%03lu MHz TSC",
                        (unsigned long)tsc_khz / KHZ,
                        (unsigned long)tsc_khz % KHZ);
        }
        return true;
}

static unsigned long __init get_loops_per_jiffy(void)
{
        u64 lpj = (u64)tsc_khz * KHZ;

        do_div(lpj, HZ);
        return lpj;
}

static void __init tsc_enable_sched_clock(void)
{
        loops_per_jiffy = get_loops_per_jiffy();
        use_tsc_delay();

        /* Sanitize TSC ADJUST before cyc2ns gets initialized */
        tsc_store_and_check_tsc_adjust(true);
        cyc2ns_init_boot_cpu();
        static_branch_enable(&__use_tsc);
}

void __init tsc_early_init(void)
{
        if (!boot_cpu_has(X86_FEATURE_TSC))
                return;
        /* Don't change UV TSC multi-chassis synchronization */
        if (is_early_uv_system())
                return;
        if (!determine_cpu_tsc_frequencies(true))
                return;
        tsc_enable_sched_clock();
}

void __init tsc_init(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_TSC)) {
                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
                return;
        }

        /*
         * native_calibrate_cpu_early can only calibrate using methods that are
         * available early in boot.
         */
        if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
                x86_platform.calibrate_cpu = native_calibrate_cpu;

        if (!tsc_khz) {
                /* We failed to determine frequencies earlier, try again */
                if (!determine_cpu_tsc_frequencies(false)) {
                        mark_tsc_unstable("could not calculate TSC khz");
                        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
                        return;
                }
                tsc_enable_sched_clock();
        }

        cyc2ns_init_secondary_cpus();

        if (!no_sched_irq_time)
                enable_sched_clock_irqtime();

        lpj_fine = get_loops_per_jiffy();

        check_system_tsc_reliable();

        if (unsynchronized_tsc()) {
                mark_tsc_unstable("TSCs unsynchronized");
                return;
        }

        if (tsc_clocksource_reliable || no_tsc_watchdog)
                tsc_disable_clocksource_watchdog();

        clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
        detect_art();
}

#ifdef CONFIG_SMP
/*
 * Check whether existing calibration data can be reused.
 */
unsigned long calibrate_delay_is_known(void)
{
        int sibling, cpu = smp_processor_id();
        int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
        const struct cpumask *mask = topology_core_cpumask(cpu);

        /*
         * If TSC has constant frequency and TSC is synchronized across
         * sockets then reuse CPU0 calibration.
         */
        if (constant_tsc && !tsc_unstable)
                return cpu_data(0).loops_per_jiffy;

        /*
         * If TSC has constant frequency and TSC is not synchronized across
         * sockets and this is not the first CPU in the socket, then reuse
         * the calibration value of an already online CPU on that socket.
         *
         * This assumes that CONSTANT_TSC is consistent for all CPUs in a
         * socket.
         */
        if (!constant_tsc || !mask)
                return 0;

        sibling = cpumask_any_but(mask, cpu);
        if (sibling < nr_cpu_ids)
                return cpu_data(sibling).loops_per_jiffy;
        return 0;
}
#endif




































































































































































































































































































    3 

    3 





























































































































































































































































































































































































































































































































































































































    3 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
/*
   BlueZ - Bluetooth protocol stack for Linux

   Copyright (C) 2014 Intel Corporation

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as
   published by the Free Software Foundation;

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
   SOFTWARE IS DISCLAIMED.
*/

#include <linux/sched/signal.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>

#include "smp.h"
#include "hci_request.h"
#include "msft.h"
#include "eir.h"

void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
{
        skb_queue_head_init(&req->cmd_q);
        req->hdev = hdev;
        req->err = 0;
}

void hci_req_purge(struct hci_request *req)
{
        skb_queue_purge(&req->cmd_q);
}

bool hci_req_status_pend(struct hci_dev *hdev)
{
        return hdev->req_status == HCI_REQ_PEND;
}

static int req_run(struct hci_request *req, hci_req_complete_t complete,
                   hci_req_complete_skb_t complete_skb)
{
        struct hci_dev *hdev = req->hdev;
        struct sk_buff *skb;
        unsigned long flags;

        bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q));

        /* If an error occurred during request building, remove all HCI
         * commands queued on the HCI request queue.
         */
        if (req->err) {
                skb_queue_purge(&req->cmd_q);
                return req->err;
        }

        /* Do not allow empty requests */
        if (skb_queue_empty(&req->cmd_q))
                return -ENODATA;

        skb = skb_peek_tail(&req->cmd_q);
        if (complete) {
                bt_cb(skb)->hci.req_complete = complete;
        } else if (complete_skb) {
                bt_cb(skb)->hci.req_complete_skb = complete_skb;
                bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB;
        }

        spin_lock_irqsave(&hdev->cmd_q.lock, flags);
        skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
        spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);

        queue_work(hdev->workqueue, &hdev->cmd_work);

        return 0;
}

int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
{
        return req_run(req, complete, NULL);
}

int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete)
{
        return req_run(req, NULL, complete);
}

void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
                           struct sk_buff *skb)
{
        bt_dev_dbg(hdev, "result 0x%2.2x", result);

        if (hdev->req_status == HCI_REQ_PEND) {
                hdev->req_result = result;
                hdev->req_status = HCI_REQ_DONE;
                if (skb) {
                        kfree_skb(hdev->req_skb);
                        hdev->req_skb = skb_get(skb);
                }
                wake_up_interruptible(&hdev->req_wait_q);
        }
}

/* Execute request and wait for completion. */
int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                                                     unsigned long opt),
                   unsigned long opt, u32 timeout, u8 *hci_status)
{
        struct hci_request req;
        int err = 0;

        bt_dev_dbg(hdev, "start");

        hci_req_init(&req, hdev);

        hdev->req_status = HCI_REQ_PEND;

        err = func(&req, opt);
        if (err) {
                if (hci_status)
                        *hci_status = HCI_ERROR_UNSPECIFIED;
                return err;
        }

        err = hci_req_run_skb(&req, hci_req_sync_complete);
        if (err < 0) {
                hdev->req_status = 0;

                /* ENODATA means the HCI request command queue is empty.
                 * This can happen when a request with conditionals doesn't
                 * trigger any commands to be sent. This is normal behavior
                 * and should not trigger an error return.
                 */
                if (err == -ENODATA) {
                        if (hci_status)
                                *hci_status = 0;
                        return 0;
                }

                if (hci_status)
                        *hci_status = HCI_ERROR_UNSPECIFIED;

                return err;
        }

        err = wait_event_interruptible_timeout(hdev->req_wait_q,
                        hdev->req_status != HCI_REQ_PEND, timeout);

        if (err == -ERESTARTSYS)
                return -EINTR;

        switch (hdev->req_status) {
        case HCI_REQ_DONE:
                err = -bt_to_errno(hdev->req_result);
                if (hci_status)
                        *hci_status = hdev->req_result;
                break;

        case HCI_REQ_CANCELED:
                err = -hdev->req_result;
                if (hci_status)
                        *hci_status = HCI_ERROR_UNSPECIFIED;
                break;

        default:
                err = -ETIMEDOUT;
                if (hci_status)
                        *hci_status = HCI_ERROR_UNSPECIFIED;
                break;
        }

        kfree_skb(hdev->req_skb);
        hdev->req_skb = NULL;
        hdev->req_status = hdev->req_result = 0;

        bt_dev_dbg(hdev, "end: err %d", err);

        return err;
}

int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
                                                  unsigned long opt),
                 unsigned long opt, u32 timeout, u8 *hci_status)
{
        int ret;

        /* Serialize all requests */
        hci_req_sync_lock(hdev);
        /* check the state after obtaing the lock to protect the HCI_UP
         * against any races from hci_dev_do_close when the controller
         * gets removed.
         */
        if (test_bit(HCI_UP, &hdev->flags))
                ret = __hci_req_sync(hdev, req, opt, timeout, hci_status);
        else
                ret = -ENETDOWN;
        hci_req_sync_unlock(hdev);

        return ret;
}

struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
                                const void *param)
{
        int len = HCI_COMMAND_HDR_SIZE + plen;
        struct hci_command_hdr *hdr;
        struct sk_buff *skb;

        skb = bt_skb_alloc(len, GFP_ATOMIC);
        if (!skb)
                return NULL;

        hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE);
        hdr->opcode = cpu_to_le16(opcode);
        hdr->plen   = plen;

        if (plen)
                skb_put_data(skb, param, plen);

        bt_dev_dbg(hdev, "skb len %d", skb->len);

        hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
        hci_skb_opcode(skb) = opcode;

        return skb;
}

/* Queue a command to an asynchronous HCI request */
void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
                    const void *param, u8 event)
{
        struct hci_dev *hdev = req->hdev;
        struct sk_buff *skb;

        bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);

        /* If an error occurred during request building, there is no point in
         * queueing the HCI command. We can simply return.
         */
        if (req->err)
                return;

        skb = hci_prepare_cmd(hdev, opcode, plen, param);
        if (!skb) {
                bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
                           opcode);
                req->err = -ENOMEM;
                return;
        }

        if (skb_queue_empty(&req->cmd_q))
                bt_cb(skb)->hci.req_flags |= HCI_REQ_START;

        hci_skb_event(skb) = event;

        skb_queue_tail(&req->cmd_q, skb);
}

void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
                 const void *param)
{
        bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
        hci_req_add_ev(req, opcode, plen, param, 0);
}

static void start_interleave_scan(struct hci_dev *hdev)
{
        hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
        queue_delayed_work(hdev->req_workqueue,
                           &hdev->interleave_scan, 0);
}

static bool is_interleave_scanning(struct hci_dev *hdev)
{
        return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
}

static void cancel_interleave_scan(struct hci_dev *hdev)
{
        bt_dev_dbg(hdev, "cancelling interleave scan");

        cancel_delayed_work_sync(&hdev->interleave_scan);

        hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE;
}

/* Return true if interleave_scan wasn't started until exiting this function,
 * otherwise, return false
 */
static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
{
        /* Do interleaved scan only if all of the following are true:
         * - There is at least one ADV monitor
         * - At least one pending LE connection or one device to be scanned for
         * - Monitor offloading is not supported
         * If so, we should alternate between allowlist scan and one without
         * any filters to save power.
         */
        bool use_interleaving = hci_is_adv_monitoring(hdev) &&
                                !(list_empty(&hdev->pend_le_conns) &&
                                  list_empty(&hdev->pend_le_reports)) &&
                                hci_get_adv_monitor_offload_ext(hdev) ==
                                    HCI_ADV_MONITOR_EXT_NONE;
        bool is_interleaving = is_interleave_scanning(hdev);

        if (use_interleaving && !is_interleaving) {
                start_interleave_scan(hdev);
                bt_dev_dbg(hdev, "starting interleave scan");
                return true;
        }

        if (!use_interleaving && is_interleaving)
                cancel_interleave_scan(hdev);

        return false;
}

void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
{
        struct hci_dev *hdev = req->hdev;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return;
        }

        if (use_ext_scan(hdev)) {
                struct hci_cp_le_set_ext_scan_enable cp;

                memset(&cp, 0, sizeof(cp));
                cp.enable = LE_SCAN_DISABLE;
                hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp),
                            &cp);
        } else {
                struct hci_cp_le_set_scan_enable cp;

                memset(&cp, 0, sizeof(cp));
                cp.enable = LE_SCAN_DISABLE;
                hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
        }

        /* Disable address resolution */
        if (hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) {
                __u8 enable = 0x00;

                hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
        }
}

static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr,
                                 u8 bdaddr_type)
{
        struct hci_cp_le_del_from_accept_list cp;

        cp.bdaddr_type = bdaddr_type;
        bacpy(&cp.bdaddr, bdaddr);

        bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr,
                   cp.bdaddr_type);
        hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp);

        if (use_ll_privacy(req->hdev)) {
                struct smp_irk *irk;

                irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type);
                if (irk) {
                        struct hci_cp_le_del_from_resolv_list cp;

                        cp.bdaddr_type = bdaddr_type;
                        bacpy(&cp.bdaddr, bdaddr);

                        hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST,
                                    sizeof(cp), &cp);
                }
        }
}

/* Adds connection to accept list if needed. On error, returns -1. */
static int add_to_accept_list(struct hci_request *req,
                              struct hci_conn_params *params, u8 *num_entries,
                              bool allow_rpa)
{
        struct hci_cp_le_add_to_accept_list cp;
        struct hci_dev *hdev = req->hdev;

        /* Already in accept list */
        if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr,
                                   params->addr_type))
                return 0;

        /* Select filter policy to accept all advertising */
        if (*num_entries >= hdev->le_accept_list_size)
                return -1;

        /* Accept list can not be used with RPAs */
        if (!allow_rpa &&
            !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
            hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
                return -1;
        }

        /* During suspend, only wakeable devices can be in accept list */
        if (hdev->suspended &&
            !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
                return 0;

        *num_entries += 1;
        cp.bdaddr_type = params->addr_type;
        bacpy(&cp.bdaddr, &params->addr);

        bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr,
                   cp.bdaddr_type);
        hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp);

        if (use_ll_privacy(hdev)) {
                struct smp_irk *irk;

                irk = hci_find_irk_by_addr(hdev, &params->addr,
                                           params->addr_type);
                if (irk) {
                        struct hci_cp_le_add_to_resolv_list cp;

                        cp.bdaddr_type = params->addr_type;
                        bacpy(&cp.bdaddr, &params->addr);
                        memcpy(cp.peer_irk, irk->val, 16);

                        if (hci_dev_test_flag(hdev, HCI_PRIVACY))
                                memcpy(cp.local_irk, hdev->irk, 16);
                        else
                                memset(cp.local_irk, 0, 16);

                        hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST,
                                    sizeof(cp), &cp);
                }
        }

        return 0;
}

static u8 update_accept_list(struct hci_request *req)
{
        struct hci_dev *hdev = req->hdev;
        struct hci_conn_params *params;
        struct bdaddr_list *b;
        u8 num_entries = 0;
        bool pend_conn, pend_report;
        /* We allow usage of accept list even with RPAs in suspend. In the worst
         * case, we won't be able to wake from devices that use the privacy1.2
         * features. Additionally, once we support privacy1.2 and IRK
         * offloading, we can update this to also check for those conditions.
         */
        bool allow_rpa = hdev->suspended;

        if (use_ll_privacy(hdev))
                allow_rpa = true;

        /* Go through the current accept list programmed into the
         * controller one by one and check if that address is still
         * in the list of pending connections or list of devices to
         * report. If not present in either list, then queue the
         * command to remove it from the controller.
         */
        list_for_each_entry(b, &hdev->le_accept_list, list) {
                pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
                                                      &b->bdaddr,
                                                      b->bdaddr_type);
                pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports,
                                                        &b->bdaddr,
                                                        b->bdaddr_type);

                /* If the device is not likely to connect or report,
                 * remove it from the accept list.
                 */
                if (!pend_conn && !pend_report) {
                        del_from_accept_list(req, &b->bdaddr, b->bdaddr_type);
                        continue;
                }

                /* Accept list can not be used with RPAs */
                if (!allow_rpa &&
                    !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
                    hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
                        return 0x00;
                }

                num_entries++;
        }

        /* Since all no longer valid accept list entries have been
         * removed, walk through the list of pending connections
         * and ensure that any new device gets programmed into
         * the controller.
         *
         * If the list of the devices is larger than the list of
         * available accept list entries in the controller, then
         * just abort and return filer policy value to not use the
         * accept list.
         */
        list_for_each_entry(params, &hdev->pend_le_conns, action) {
                if (add_to_accept_list(req, params, &num_entries, allow_rpa))
                        return 0x00;
        }

        /* After adding all new pending connections, walk through
         * the list of pending reports and also add these to the
         * accept list if there is still space. Abort if space runs out.
         */
        list_for_each_entry(params, &hdev->pend_le_reports, action) {
                if (add_to_accept_list(req, params, &num_entries, allow_rpa))
                        return 0x00;
        }

        /* Use the allowlist unless the following conditions are all true:
         * - We are not currently suspending
         * - There are 1 or more ADV monitors registered and it's not offloaded
         * - Interleaved scanning is not currently using the allowlist
         */
        if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended &&
            hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE &&
            hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
                return 0x00;

        /* Select filter policy to use accept list */
        return 0x01;
}

static bool scan_use_rpa(struct hci_dev *hdev)
{
        return hci_dev_test_flag(hdev, HCI_PRIVACY);
}

static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
                               u16 window, u8 own_addr_type, u8 filter_policy,
                               bool filter_dup, bool addr_resolv)
{
        struct hci_dev *hdev = req->hdev;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return;
        }

        if (use_ll_privacy(hdev) && addr_resolv) {
                u8 enable = 0x01;

                hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
        }

        /* Use ext scanning if set ext scan param and ext scan enable is
         * supported
         */
        if (use_ext_scan(hdev)) {
                struct hci_cp_le_set_ext_scan_params *ext_param_cp;
                struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
                struct hci_cp_le_scan_phy_params *phy_params;
                u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 2];
                u32 plen;

                ext_param_cp = (void *)data;
                phy_params = (void *)ext_param_cp->data;

                memset(ext_param_cp, 0, sizeof(*ext_param_cp));
                ext_param_cp->own_addr_type = own_addr_type;
                ext_param_cp->filter_policy = filter_policy;

                plen = sizeof(*ext_param_cp);

                if (scan_1m(hdev) || scan_2m(hdev)) {
                        ext_param_cp->scanning_phys |= LE_SCAN_PHY_1M;

                        memset(phy_params, 0, sizeof(*phy_params));
                        phy_params->type = type;
                        phy_params->interval = cpu_to_le16(interval);
                        phy_params->window = cpu_to_le16(window);

                        plen += sizeof(*phy_params);
                        phy_params++;
                }

                if (scan_coded(hdev)) {
                        ext_param_cp->scanning_phys |= LE_SCAN_PHY_CODED;

                        memset(phy_params, 0, sizeof(*phy_params));
                        phy_params->type = type;
                        phy_params->interval = cpu_to_le16(interval);
                        phy_params->window = cpu_to_le16(window);

                        plen += sizeof(*phy_params);
                        phy_params++;
                }

                hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
                            plen, ext_param_cp);

                memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
                ext_enable_cp.enable = LE_SCAN_ENABLE;
                ext_enable_cp.filter_dup = filter_dup;

                hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
                            sizeof(ext_enable_cp), &ext_enable_cp);
        } else {
                struct hci_cp_le_set_scan_param param_cp;
                struct hci_cp_le_set_scan_enable enable_cp;

                memset(&param_cp, 0, sizeof(param_cp));
                param_cp.type = type;
                param_cp.interval = cpu_to_le16(interval);
                param_cp.window = cpu_to_le16(window);
                param_cp.own_address_type = own_addr_type;
                param_cp.filter_policy = filter_policy;
                hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
                            &param_cp);

                memset(&enable_cp, 0, sizeof(enable_cp));
                enable_cp.enable = LE_SCAN_ENABLE;
                enable_cp.filter_dup = filter_dup;
                hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
                            &enable_cp);
        }
}

static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
static int hci_update_random_address(struct hci_request *req,
                                     bool require_privacy, bool use_rpa,
                                     u8 *own_addr_type)
{
        struct hci_dev *hdev = req->hdev;
        int err;

        /* If privacy is enabled use a resolvable private address. If
         * current RPA has expired or there is something else than
         * the current RPA in use, then generate a new one.
         */
        if (use_rpa) {
                /* If Controller supports LL Privacy use own address type is
                 * 0x03
                 */
                if (use_ll_privacy(hdev))
                        *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
                else
                        *own_addr_type = ADDR_LE_DEV_RANDOM;

                if (rpa_valid(hdev))
                        return 0;

                err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
                if (err < 0) {
                        bt_dev_err(hdev, "failed to generate new RPA");
                        return err;
                }

                set_random_addr(req, &hdev->rpa);

                return 0;
        }

        /* In case of required privacy without resolvable private address,
         * use an non-resolvable private address. This is useful for active
         * scanning and non-connectable advertising.
         */
        if (require_privacy) {
                bdaddr_t nrpa;

                while (true) {
                        /* The non-resolvable private address is generated
                         * from random six bytes with the two most significant
                         * bits cleared.
                         */
                        get_random_bytes(&nrpa, 6);
                        nrpa.b[5] &= 0x3f;

                        /* The non-resolvable private address shall not be
                         * equal to the public address.
                         */
                        if (bacmp(&hdev->bdaddr, &nrpa))
                                break;
                }

                *own_addr_type = ADDR_LE_DEV_RANDOM;
                set_random_addr(req, &nrpa);
                return 0;
        }

        /* If forcing static address is in use or there is no public
         * address use the static address as random address (but skip
         * the HCI command if the current random address is already the
         * static one.
         *
         * In case BR/EDR has been disabled on a dual-mode controller
         * and a static address has been configured, then use that
         * address instead of the public BR/EDR address.
         */
        if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
            !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
            (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
             bacmp(&hdev->static_addr, BDADDR_ANY))) {
                *own_addr_type = ADDR_LE_DEV_RANDOM;
                if (bacmp(&hdev->static_addr, &hdev->random_addr))
                        hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
                                    &hdev->static_addr);
                return 0;
        }

        /* Neither privacy nor static address is being used so use a
         * public address.
         */
        *own_addr_type = ADDR_LE_DEV_PUBLIC;

        return 0;
}

/* Ensure to call hci_req_add_le_scan_disable() first to disable the
 * controller based address resolution to be able to reconfigure
 * resolving list.
 */
void hci_req_add_le_passive_scan(struct hci_request *req)
{
        struct hci_dev *hdev = req->hdev;
        u8 own_addr_type;
        u8 filter_policy;
        u16 window, interval;
        /* Default is to enable duplicates filter */
        u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
        /* Background scanning should run with address resolution */
        bool addr_resolv = true;

        if (hdev->scanning_paused) {
                bt_dev_dbg(hdev, "Scanning is paused for suspend");
                return;
        }

        /* Set require_privacy to false since no SCAN_REQ are send
         * during passive scanning. Not using an non-resolvable address
         * here is important so that peer devices using direct
         * advertising with our address will be correctly reported
         * by the controller.
         */
        if (hci_update_random_address(req, false, scan_use_rpa(hdev),
                                      &own_addr_type))
                return;

        if (hdev->enable_advmon_interleave_scan &&
            __hci_update_interleaved_scan(hdev))
                return;

        bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);
        /* Adding or removing entries from the accept list must
         * happen before enabling scanning. The controller does
         * not allow accept list modification while scanning.
         */
        filter_policy = update_accept_list(req);

        /* When the controller is using random resolvable addresses and
         * with that having LE privacy enabled, then controllers with
         * Extended Scanner Filter Policies support can now enable support
         * for handling directed advertising.
         *
         * So instead of using filter polices 0x00 (no accept list)
         * and 0x01 (accept list enabled) use the new filter policies
         * 0x02 (no accept list) and 0x03 (accept list enabled).
         */
        if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
            (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
                filter_policy |= 0x02;

        if (hdev->suspended) {
                window = hdev->le_scan_window_suspend;
                interval = hdev->le_scan_int_suspend;
        } else if (hci_is_le_conn_scanning(hdev)) {
                window = hdev->le_scan_window_connect;
                interval = hdev->le_scan_int_connect;
        } else if (hci_is_adv_monitoring(hdev)) {
                window = hdev->le_scan_window_adv_monitor;
                interval = hdev->le_scan_int_adv_monitor;

                /* Disable duplicates filter when scanning for advertisement
                 * monitor for the following reasons.
                 *
                 * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
                 * controllers ignore RSSI_Sampling_Period when the duplicates
                 * filter is enabled.
                 *
                 * For SW pattern filtering, when we're not doing interleaved
                 * scanning, it is necessary to disable duplicates filter,
                 * otherwise hosts can only receive one advertisement and it's
                 * impossible to know if a peer is still in range.
                 */
                filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
        } else {
                window = hdev->le_scan_window;
                interval = hdev->le_scan_interval;
        }

        bt_dev_dbg(hdev, "LE passive scan with accept list = %d",
                   filter_policy);
        hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
                           own_addr_type, filter_policy, filter_dup,
                           addr_resolv);
}

static int hci_req_add_le_interleaved_scan(struct hci_request *req,
                                           unsigned long opt)
{
        struct hci_dev *hdev = req->hdev;
        int ret = 0;

        hci_dev_lock(hdev);

        if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
                hci_req_add_le_scan_disable(req, false);
        hci_req_add_le_passive_scan(req);

        switch (hdev->interleave_scan_state) {
        case INTERLEAVE_SCAN_ALLOWLIST:
                bt_dev_dbg(hdev, "next state: allowlist");
                hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
                break;
        case INTERLEAVE_SCAN_NO_FILTER:
                bt_dev_dbg(hdev, "next state: no filter");
                hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST;
                break;
        case INTERLEAVE_SCAN_NONE:
                BT_ERR("unexpected error");
                ret = -1;
        }

        hci_dev_unlock(hdev);

        return ret;
}

static void interleave_scan_work(struct work_struct *work)
{
        struct hci_dev *hdev = container_of(work, struct hci_dev,
                                            interleave_scan.work);
        u8 status;
        unsigned long timeout;

        if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) {
                timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration);
        } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) {
                timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration);
        } else {
                bt_dev_err(hdev, "unexpected error");
                return;
        }

        hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0,
                     HCI_CMD_TIMEOUT, &status);

        /* Don't continue interleaving if it was canceled */
        if (is_interleave_scanning(hdev))
                queue_delayed_work(hdev->req_workqueue,
                                   &hdev->interleave_scan, timeout);
}

static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
{
        struct hci_dev *hdev = req->hdev;

        /* If we're advertising or initiating an LE connection we can't
         * go ahead and change the random address at this time. This is
         * because the eventual initiator address used for the
         * subsequently created connection will be undefined (some
         * controllers use the new address and others the one we had
         * when the operation started).
         *
         * In this kind of scenario skip the update and let the random
         * address be updated at the next cycle.
         */
        if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
            hci_lookup_le_connect(hdev)) {
                bt_dev_dbg(hdev, "Deferring random address update");
                hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
                return;
        }

        hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
}

void hci_request_setup(struct hci_dev *hdev)
{
        INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
}

void hci_request_cancel_all(struct hci_dev *hdev)
{
        hci_cmd_sync_cancel_sync(hdev, ENODEV);

        cancel_interleave_scan(hdev);
}
















    9 





















    9 

























   72 



























































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM sched

#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SCHED_H

#include <linux/kthread.h>
#include <linux/sched/numa_balancing.h>
#include <linux/tracepoint.h>
#include <linux/binfmts.h>

/*
 * Tracepoint for calling kthread_stop, performed to end a kthread:
 */
TRACE_EVENT(sched_kthread_stop,

        TP_PROTO(struct task_struct *t),

        TP_ARGS(t),

        TP_STRUCT__entry(
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
                __entry->pid        = t->pid;
        ),

        TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
);

/*
 * Tracepoint for the return value of the kthread stopping:
 */
TRACE_EVENT(sched_kthread_stop_ret,

        TP_PROTO(int ret),

        TP_ARGS(ret),

        TP_STRUCT__entry(
                __field(        int,        ret        )
        ),

        TP_fast_assign(
                __entry->ret        = ret;
        ),

        TP_printk("ret=%d", __entry->ret)
);

/**
 * sched_kthread_work_queue_work - called when a work gets queued
 * @worker:        pointer to the kthread_worker
 * @work:        pointer to struct kthread_work
 *
 * This event occurs when a work is queued immediately or once a
 * delayed work is actually queued (ie: once the delay has been
 * reached).
 */
TRACE_EVENT(sched_kthread_work_queue_work,

        TP_PROTO(struct kthread_worker *worker,
                 struct kthread_work *work),

        TP_ARGS(worker, work),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
                __field( void *,        worker)
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = work->func;
                __entry->worker                = worker;
        ),

        TP_printk("work struct=%p function=%ps worker=%p",
                  __entry->work, __entry->function, __entry->worker)
);

/**
 * sched_kthread_work_execute_start - called immediately before the work callback
 * @work:        pointer to struct kthread_work
 *
 * Allows to track kthread work execution.
 */
TRACE_EVENT(sched_kthread_work_execute_start,

        TP_PROTO(struct kthread_work *work),

        TP_ARGS(work),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = work->func;
        ),

        TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
);

/**
 * sched_kthread_work_execute_end - called immediately after the work callback
 * @work:        pointer to struct work_struct
 * @function:   pointer to worker function
 *
 * Allows to track workqueue execution.
 */
TRACE_EVENT(sched_kthread_work_execute_end,

        TP_PROTO(struct kthread_work *work, kthread_work_func_t function),

        TP_ARGS(work, function),

        TP_STRUCT__entry(
                __field( void *,        work        )
                __field( void *,        function)
        ),

        TP_fast_assign(
                __entry->work                = work;
                __entry->function        = function;
        ),

        TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
);

/*
 * Tracepoint for waking up a task:
 */
DECLARE_EVENT_CLASS(sched_wakeup_template,

        TP_PROTO(struct task_struct *p),

        TP_ARGS(__perf_task(p)),

        TP_STRUCT__entry(
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
                __field(        int,        prio                        )
                __field(        int,        target_cpu                )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
                __entry->pid                = p->pid;
                __entry->prio                = p->prio; /* XXX SCHED_DEADLINE */
                __entry->target_cpu        = task_cpu(p);
        ),

        TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
                  __entry->comm, __entry->pid, __entry->prio,
                  __entry->target_cpu)
);

/*
 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
 * called from the waking context.
 */
DEFINE_EVENT(sched_wakeup_template, sched_waking,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));

/*
 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING.
 * It is not always called from the waking context.
 */
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));

/*
 * Tracepoint for waking up a new task:
 */
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));

#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt,
                                              unsigned int prev_state,
                                              struct task_struct *p)
{
        unsigned int state;

#ifdef CONFIG_SCHED_DEBUG
        BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */

        /*
         * Preemption ignores task state, therefore preempted tasks are always
         * RUNNING (we will not have dequeued if state != RUNNING).
         */
        if (preempt)
                return TASK_REPORT_MAX;

        /*
         * task_state_index() uses fls() and returns a value from 0-8 range.
         * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
         * it for left shift operation to get the correct task->state
         * mapping.
         */
        state = __task_state_index(prev_state, p->exit_state);

        return state ? (1 << (state - 1)) : state;
}
#endif /* CREATE_TRACE_POINTS */

/*
 * Tracepoint for task switches, performed by the scheduler:
 */
TRACE_EVENT(sched_switch,

        TP_PROTO(bool preempt,
                 struct task_struct *prev,
                 struct task_struct *next,
                 unsigned int prev_state),

        TP_ARGS(preempt, prev, next, prev_state),

        TP_STRUCT__entry(
                __array(        char,        prev_comm,        TASK_COMM_LEN        )
                __field(        pid_t,        prev_pid                        )
                __field(        int,        prev_prio                        )
                __field(        long,        prev_state                        )
                __array(        char,        next_comm,        TASK_COMM_LEN        )
                __field(        pid_t,        next_pid                        )
                __field(        int,        next_prio                        )
        ),

        TP_fast_assign(
                memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
                __entry->prev_pid        = prev->pid;
                __entry->prev_prio        = prev->prio;
                __entry->prev_state        = __trace_sched_switch_state(preempt, prev_state, prev);
                memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
                __entry->next_pid        = next->pid;
                __entry->next_prio        = next->prio;
                /* XXX SCHED_DEADLINE */
        ),

        TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
                __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,

                (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
                  __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
                                { TASK_INTERRUPTIBLE, "S" },
                                { TASK_UNINTERRUPTIBLE, "D" },
                                { __TASK_STOPPED, "T" },
                                { __TASK_TRACED, "t" },
                                { EXIT_DEAD, "X" },
                                { EXIT_ZOMBIE, "Z" },
                                { TASK_PARKED, "P" },
                                { TASK_DEAD, "I" }) :
                  "R",

                __entry->prev_state & TASK_REPORT_MAX ? "+" : "",
                __entry->next_comm, __entry->next_pid, __entry->next_prio)
);

/*
 * Tracepoint for a task being migrated:
 */
TRACE_EVENT(sched_migrate_task,

        TP_PROTO(struct task_struct *p, int dest_cpu),

        TP_ARGS(p, dest_cpu),

        TP_STRUCT__entry(
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
                __field(        int,        prio                        )
                __field(        int,        orig_cpu                )
                __field(        int,        dest_cpu                )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
                __entry->pid                = p->pid;
                __entry->prio                = p->prio; /* XXX SCHED_DEADLINE */
                __entry->orig_cpu        = task_cpu(p);
                __entry->dest_cpu        = dest_cpu;
        ),

        TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
                  __entry->comm, __entry->pid, __entry->prio,
                  __entry->orig_cpu, __entry->dest_cpu)
);

DECLARE_EVENT_CLASS(sched_process_template,

        TP_PROTO(struct task_struct *p),

        TP_ARGS(p),

        TP_STRUCT__entry(
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
                __field(        int,        prio                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
                __entry->pid                = p->pid;
                __entry->prio                = p->prio; /* XXX SCHED_DEADLINE */
        ),

        TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
);

/*
 * Tracepoint for freeing a task:
 */
DEFINE_EVENT(sched_process_template, sched_process_free,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));

/*
 * Tracepoint for a task exiting:
 */
DEFINE_EVENT(sched_process_template, sched_process_exit,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));

/*
 * Tracepoint for waiting on task to unschedule:
 */
DEFINE_EVENT(sched_process_template, sched_wait_task,
        TP_PROTO(struct task_struct *p),
        TP_ARGS(p));

/*
 * Tracepoint for a waiting task:
 */
TRACE_EVENT(sched_process_wait,

        TP_PROTO(struct pid *pid),

        TP_ARGS(pid),

        TP_STRUCT__entry(
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
                __field(        int,        prio                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
                __entry->pid                = pid_nr(pid);
                __entry->prio                = current->prio; /* XXX SCHED_DEADLINE */
        ),

        TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
);

/*
 * Tracepoint for kernel_clone:
 */
TRACE_EVENT(sched_process_fork,

        TP_PROTO(struct task_struct *parent, struct task_struct *child),

        TP_ARGS(parent, child),

        TP_STRUCT__entry(
                __array(        char,        parent_comm,        TASK_COMM_LEN        )
                __field(        pid_t,        parent_pid                        )
                __array(        char,        child_comm,        TASK_COMM_LEN        )
                __field(        pid_t,        child_pid                        )
        ),

        TP_fast_assign(
                memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
                __entry->parent_pid        = parent->pid;
                memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
                __entry->child_pid        = child->pid;
        ),

        TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
                __entry->parent_comm, __entry->parent_pid,
                __entry->child_comm, __entry->child_pid)
);

/*
 * Tracepoint for exec:
 */
TRACE_EVENT(sched_process_exec,

        TP_PROTO(struct task_struct *p, pid_t old_pid,
                 struct linux_binprm *bprm),

        TP_ARGS(p, old_pid, bprm),

        TP_STRUCT__entry(
                __string(        filename,        bprm->filename        )
                __field(        pid_t,                pid                )
                __field(        pid_t,                old_pid                )
        ),

        TP_fast_assign(
                __assign_str(filename, bprm->filename);
                __entry->pid                = p->pid;
                __entry->old_pid        = old_pid;
        ),

        TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
                  __entry->pid, __entry->old_pid)
);


#ifdef CONFIG_SCHEDSTATS
#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT
#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS
#else
#define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP
#define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP
#endif

/*
 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
 *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
 */
DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template,

        TP_PROTO(struct task_struct *tsk, u64 delay),

        TP_ARGS(__perf_task(tsk), __perf_count(delay)),

        TP_STRUCT__entry(
                __array( char,        comm,        TASK_COMM_LEN        )
                __field( pid_t,        pid                        )
                __field( u64,        delay                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
                __entry->pid        = tsk->pid;
                __entry->delay        = delay;
        ),

        TP_printk("comm=%s pid=%d delay=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->delay)
);

/*
 * Tracepoint for accounting wait time (time the task is runnable
 * but not actually running due to scheduler contention).
 */
DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));

/*
 * Tracepoint for accounting sleep time (time the task is not runnable,
 * including iowait, see below).
 */
DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));

/*
 * Tracepoint for accounting iowait time (time the task is not runnable
 * due to waiting on IO to complete).
 */
DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));

/*
 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
 */
DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
             TP_PROTO(struct task_struct *tsk, u64 delay),
             TP_ARGS(tsk, delay));

/*
 * Tracepoint for accounting runtime (time the task is executing
 * on a CPU).
 */
DECLARE_EVENT_CLASS(sched_stat_runtime,

        TP_PROTO(struct task_struct *tsk, u64 runtime),

        TP_ARGS(tsk, __perf_count(runtime)),

        TP_STRUCT__entry(
                __array( char,        comm,        TASK_COMM_LEN        )
                __field( pid_t,        pid                        )
                __field( u64,        runtime                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
                __entry->pid                = tsk->pid;
                __entry->runtime        = runtime;
        ),

        TP_printk("comm=%s pid=%d runtime=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->runtime)
);

DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
             TP_PROTO(struct task_struct *tsk, u64 runtime),
             TP_ARGS(tsk, runtime));

/*
 * Tracepoint for showing priority inheritance modifying a tasks
 * priority.
 */
TRACE_EVENT(sched_pi_setprio,

        TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),

        TP_ARGS(tsk, pi_task),

        TP_STRUCT__entry(
                __array( char,        comm,        TASK_COMM_LEN        )
                __field( pid_t,        pid                        )
                __field( int,        oldprio                        )
                __field( int,        newprio                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
                __entry->pid                = tsk->pid;
                __entry->oldprio        = tsk->prio;
                __entry->newprio        = pi_task ?
                                min(tsk->normal_prio, pi_task->prio) :
                                tsk->normal_prio;
                /* XXX SCHED_DEADLINE bits missing */
        ),

        TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
                        __entry->comm, __entry->pid,
                        __entry->oldprio, __entry->newprio)
);

#ifdef CONFIG_DETECT_HUNG_TASK
TRACE_EVENT(sched_process_hang,
        TP_PROTO(struct task_struct *tsk),
        TP_ARGS(tsk),

        TP_STRUCT__entry(
                __array( char,        comm,        TASK_COMM_LEN        )
                __field( pid_t,        pid                        )
        ),

        TP_fast_assign(
                memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
                __entry->pid = tsk->pid;
        ),

        TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
);
#endif /* CONFIG_DETECT_HUNG_TASK */

/*
 * Tracks migration of tasks from one runqueue to another. Can be used to
 * detect if automatic NUMA balancing is bouncing between nodes.
 */
TRACE_EVENT(sched_move_numa,

        TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),

        TP_ARGS(tsk, src_cpu, dst_cpu),

        TP_STRUCT__entry(
                __field( pid_t,        pid                        )
                __field( pid_t,        tgid                        )
                __field( pid_t,        ngid                        )
                __field( int,        src_cpu                        )
                __field( int,        src_nid                        )
                __field( int,        dst_cpu                        )
                __field( int,        dst_nid                        )
        ),

        TP_fast_assign(
                __entry->pid                = task_pid_nr(tsk);
                __entry->tgid                = task_tgid_nr(tsk);
                __entry->ngid                = task_numa_group_id(tsk);
                __entry->src_cpu        = src_cpu;
                __entry->src_nid        = cpu_to_node(src_cpu);
                __entry->dst_cpu        = dst_cpu;
                __entry->dst_nid        = cpu_to_node(dst_cpu);
        ),

        TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
                        __entry->pid, __entry->tgid, __entry->ngid,
                        __entry->src_cpu, __entry->src_nid,
                        __entry->dst_cpu, __entry->dst_nid)
);

DECLARE_EVENT_CLASS(sched_numa_pair_template,

        TP_PROTO(struct task_struct *src_tsk, int src_cpu,
                 struct task_struct *dst_tsk, int dst_cpu),

        TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),

        TP_STRUCT__entry(
                __field( pid_t,        src_pid                        )
                __field( pid_t,        src_tgid                )
                __field( pid_t,        src_ngid                )
                __field( int,        src_cpu                        )
                __field( int,        src_nid                        )
                __field( pid_t,        dst_pid                        )
                __field( pid_t,        dst_tgid                )
                __field( pid_t,        dst_ngid                )
                __field( int,        dst_cpu                        )
                __field( int,        dst_nid                        )
        ),

        TP_fast_assign(
                __entry->src_pid        = task_pid_nr(src_tsk);
                __entry->src_tgid        = task_tgid_nr(src_tsk);
                __entry->src_ngid        = task_numa_group_id(src_tsk);
                __entry->src_cpu        = src_cpu;
                __entry->src_nid        = cpu_to_node(src_cpu);
                __entry->dst_pid        = dst_tsk ? task_pid_nr(dst_tsk) : 0;
                __entry->dst_tgid        = dst_tsk ? task_tgid_nr(dst_tsk) : 0;
                __entry->dst_ngid        = dst_tsk ? task_numa_group_id(dst_tsk) : 0;
                __entry->dst_cpu        = dst_cpu;
                __entry->dst_nid        = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1;
        ),

        TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
                        __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
                        __entry->src_cpu, __entry->src_nid,
                        __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
                        __entry->dst_cpu, __entry->dst_nid)
);

DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa,

        TP_PROTO(struct task_struct *src_tsk, int src_cpu,
                 struct task_struct *dst_tsk, int dst_cpu),

        TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu)
);

DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa,

        TP_PROTO(struct task_struct *src_tsk, int src_cpu,
                 struct task_struct *dst_tsk, int dst_cpu),

        TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu)
);

#ifdef CONFIG_NUMA_BALANCING
#define NUMAB_SKIP_REASON                                        \
        EM( NUMAB_SKIP_UNSUITABLE,                "unsuitable" )        \
        EM( NUMAB_SKIP_SHARED_RO,                "shared_ro" )        \
        EM( NUMAB_SKIP_INACCESSIBLE,                "inaccessible" )        \
        EM( NUMAB_SKIP_SCAN_DELAY,                "scan_delay" )        \
        EM( NUMAB_SKIP_PID_INACTIVE,                "pid_inactive" )        \
        EM( NUMAB_SKIP_IGNORE_PID,                "ignore_pid_inactive" )                \
        EMe(NUMAB_SKIP_SEQ_COMPLETED,                "seq_completed" )

/* Redefine for export. */
#undef EM
#undef EMe
#define EM(a, b)        TRACE_DEFINE_ENUM(a);
#define EMe(a, b)        TRACE_DEFINE_ENUM(a);

NUMAB_SKIP_REASON

/* Redefine for symbolic printing. */
#undef EM
#undef EMe
#define EM(a, b)        { a, b },
#define EMe(a, b)        { a, b }

TRACE_EVENT(sched_skip_vma_numa,

        TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma,
                 enum numa_vmaskip_reason reason),

        TP_ARGS(mm, vma, reason),

        TP_STRUCT__entry(
                __field(unsigned long, numa_scan_offset)
                __field(unsigned long, vm_start)
                __field(unsigned long, vm_end)
                __field(enum numa_vmaskip_reason, reason)
        ),

        TP_fast_assign(
                __entry->numa_scan_offset        = mm->numa_scan_offset;
                __entry->vm_start                = vma->vm_start;
                __entry->vm_end                        = vma->vm_end;
                __entry->reason                        = reason;
        ),

        TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s",
                  __entry->numa_scan_offset,
                  __entry->vm_start,
                  __entry->vm_end,
                  __print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
);
#endif /* CONFIG_NUMA_BALANCING */

/*
 * Tracepoint for waking a polling cpu without an IPI.
 */
TRACE_EVENT(sched_wake_idle_without_ipi,

        TP_PROTO(int cpu),

        TP_ARGS(cpu),

        TP_STRUCT__entry(
                __field(        int,        cpu        )
        ),

        TP_fast_assign(
                __entry->cpu        = cpu;
        ),

        TP_printk("cpu=%d", __entry->cpu)
);

/*
 * Following tracepoints are not exported in tracefs and provide hooking
 * mechanisms only for testing and debugging purposes.
 *
 * Postfixed with _tp to make them easily identifiable in the code.
 */
DECLARE_TRACE(pelt_cfs_tp,
        TP_PROTO(struct cfs_rq *cfs_rq),
        TP_ARGS(cfs_rq));

DECLARE_TRACE(pelt_rt_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));

DECLARE_TRACE(pelt_dl_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));

DECLARE_TRACE(pelt_thermal_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));

DECLARE_TRACE(pelt_irq_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));

DECLARE_TRACE(pelt_se_tp,
        TP_PROTO(struct sched_entity *se),
        TP_ARGS(se));

DECLARE_TRACE(sched_cpu_capacity_tp,
        TP_PROTO(struct rq *rq),
        TP_ARGS(rq));

DECLARE_TRACE(sched_overutilized_tp,
        TP_PROTO(struct root_domain *rd, bool overutilized),
        TP_ARGS(rd, overutilized));

DECLARE_TRACE(sched_util_est_cfs_tp,
        TP_PROTO(struct cfs_rq *cfs_rq),
        TP_ARGS(cfs_rq));

DECLARE_TRACE(sched_util_est_se_tp,
        TP_PROTO(struct sched_entity *se),
        TP_ARGS(se));

DECLARE_TRACE(sched_update_nr_running_tp,
        TP_PROTO(struct rq *rq, int change),
        TP_ARGS(rq, change));

DECLARE_TRACE(sched_compute_energy_tp,
        TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy,
                 unsigned long max_util, unsigned long busy_time),
        TP_ARGS(p, dst_cpu, energy, max_util, busy_time));

#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
#include <trace/define_trace.h>











































































































































  239 







  242 
  242 


  242 
  242 
  241 








  242 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600
9601
9602
9603
9604
9605
9606
9607
9608
9609
9610
9611
9612
9613
9614
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
9767
9768
9769
9770
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868
9869
9870
9871
9872
9873
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
9893
9894
9895
9896
9897
9898
9899
9900
9901
9902
9903
9904
9905
9906
9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938
9939
9940
9941
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
10028
10029
10030
10031
10032
10033
10034
10035
10036
10037
10038
10039
10040
10041
10042
10043
10044
10045
10046
10047
10048
10049
10050
10051
10052
10053
10054
10055
10056
10057
10058
10059
10060
10061
10062
10063
10064
10065
10066
10067
10068
10069
10070
10071
10072
10073
10074
10075
10076
10077
10078
10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10193
10194
10195
10196
10197
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
10250
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271
10272
10273
10274
10275
10276
10277
10278
10279
10280
10281
10282
10283
10284
10285
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
10336
10337
10338
10339
10340
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359
10360
10361
10362
10363
10364
10365
10366
10367
10368
10369
10370
10371
10372
10373
10374
10375
10376
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388
10389
10390
10391
10392
10393
10394
10395
10396
10397
10398
10399
10400
10401
10402
10403
10404
10405
10406
10407
10408
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419
10420
10421
10422
10423
10424
10425
10426
10427
10428
10429
10430
10431
10432
10433
10434
10435
10436
10437
10438
10439
10440
10441
10442
10443
10444
10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457
10458
10459
10460
10461
10462
10463
10464
10465
10466
10467
10468
10469
10470
10471
10472
10473
10474
10475
10476
10477
10478
10479
10480
10481
10482
10483
10484
10485
10486
10487
10488
10489
10490
10491
10492
10493
10494
10495
10496
10497
10498
10499
10500
10501
10502
10503
10504
10505
10506
10507
10508
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528
10529
10530
10531
10532
10533
10534
10535
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556
10557
10558
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632
10633
10634
10635
10636
10637
10638
10639
10640
10641
10642
10643
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670
10671
10672
10673
10674
10675
10676
10677
10678
10679
10680
10681
10682
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764
10765
10766
10767
10768
10769
10770
10771
10772
10773
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783
10784
10785
10786
10787
10788
10789
10790
10791
10792
10793
10794
10795
10796
10797
10798
10799
10800
10801
10802
10803
10804
10805
10806
10807
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831
10832
10833
10834
10835
10836
10837
10838
10839
10840
10841
10842
10843
10844
10845
10846
10847
10848
10849
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
10860
10861
10862
10863
10864
10865
10866
10867
10868
10869
10870
10871
10872
10873
10874
10875
10876
10877
10878
10879
10880
10881
10882
10883
10884
10885
10886
10887
10888
10889
10890
10891
10892
10893
10894
10895
10896
10897
10898
10899
10900
10901
10902
10903
10904
10905
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916
10917
10918
10919
10920
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940
10941
10942
10943
10944
10945
10946
10947
10948
10949
10950
10951
10952
10953
10954
10955
10956
10957
10958
10959
10960
10961
10962
10963
10964
10965
10966
10967
10968
10969
10970
10971
10972
10973
10974
10975
10976
10977
10978
10979
10980
10981
10982
10983
10984
10985
10986
10987
10988
10989
10990
10991
10992
10993
10994
10995
10996
10997
10998
10999
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009
11010
11011
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021
11022
11023
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035
11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057
11058
11059
11060
11061
11062
11063
11064
11065
11066
11067
11068
11069
11070
11071
11072
11073
11074
11075
11076
11077
11078
11079
11080
11081
11082
11083
11084
11085
11086
11087
11088
11089
11090
11091
11092
11093
11094
11095
11096
11097
11098
11099
11100
11101
11102
11103
11104
11105
11106
11107
11108
11109
11110
11111
11112
11113
11114
11115
11116
11117
11118
11119
11120
11121
11122
11123
11124
11125
11126
11127
11128
11129
11130
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
11141
11142
11143
11144
11145
11146
11147
11148
11149
11150
11151
11152
11153
11154
11155
11156
11157
11158
11159
11160
11161
11162
11163
11164
11165
11166
11167
11168
11169
11170
11171
11172
11173
11174
11175
11176
11177
11178
11179
11180
11181
11182
11183
11184
11185
11186
11187
11188
11189
11190
11191
11192
11193
11194
11195
11196
11197
11198
11199
11200
11201
11202
11203
11204
11205
11206
11207
11208
11209
11210
11211
11212
11213
11214
11215
11216
11217
11218
11219
11220
11221
11222
11223
11224
11225
11226
11227
11228
11229
11230
11231
11232
11233
11234
11235
11236
11237
11238
11239
11240
11241
11242
11243
11244
11245
11246
11247
11248
11249
11250
11251
11252
11253
11254
11255
11256
11257
11258
11259
11260
11261
11262
11263
11264
11265
11266
11267
11268
11269
11270
11271
11272
11273
11274
11275
11276
11277
11278
11279
11280
11281
11282
11283
11284
11285
11286
11287
11288
11289
11290
11291
11292
11293
11294
11295
11296
11297
11298
11299
11300
11301
11302
11303
11304
11305
11306
11307
11308
11309
11310
11311
11312
11313
11314
11315
11316
11317
11318
11319
11320
11321
11322
11323
11324
11325
11326
11327
11328
11329
11330
11331
11332
11333
11334
11335
11336
11337
11338
11339
11340
11341
11342
11343
11344
11345
11346
11347
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358
11359
11360
11361
11362
11363
11364
11365
11366
11367
11368
11369
11370
11371
11372
11373
11374
11375
11376
11377
11378
11379
11380
11381
11382
11383
11384
11385
11386
11387
11388
11389
11390
11391
11392
11393
11394
11395
11396
11397
11398
11399
11400
11401
11402
11403
11404
11405
11406
11407
11408
11409
11410
11411
11412
11413
11414
11415
11416
11417
11418
11419
11420
11421
11422
11423
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434
11435
11436
11437
11438
11439
11440
11441
11442
11443
11444
11445
11446
11447
11448
11449
11450
11451
11452
11453
11454
11455
11456
11457
11458
11459
11460
11461
11462
11463
11464
11465
11466
11467
11468
11469
11470
11471
11472
11473
11474
11475
11476
11477
11478
11479
11480
11481
11482
11483
11484
11485
11486
11487
11488
11489
11490
11491
11492
11493
11494
11495
11496
11497
11498
11499
11500
11501
11502
11503
11504
11505
11506
11507
11508
11509
11510
11511
11512
11513
11514
11515
11516
11517
11518
11519
11520
11521
11522
11523
11524
11525
11526
11527
11528
11529
11530
11531
11532
11533
11534
11535
11536
11537
11538
11539
11540
11541
11542
11543
11544
11545
11546
11547
11548
11549
11550
11551
11552
11553
11554
11555
11556
11557
11558
11559
11560
11561
11562
11563
11564
11565
11566
11567
11568
11569
11570
11571
11572
11573
11574
11575
11576
11577
11578
11579
11580
11581
11582
11583
11584
11585
11586
11587
11588
11589
11590
11591
11592
11593
11594
11595
11596
11597
11598
11599
11600
11601
11602
11603
11604
11605
11606
11607
11608
11609
11610
11611
11612
11613
11614
11615
11616
11617
11618
11619
11620
11621
11622
11623
11624
11625
11626
11627
11628
11629
11630
11631
11632
11633
11634
11635
11636
11637
11638
11639
11640
11641
11642
11643
11644
11645
11646
11647
11648
11649
11650
11651
11652
11653
11654
11655
11656
11657
11658
11659
11660
11661
11662
11663
11664
11665
11666
11667
11668
11669
11670
11671
11672
11673
11674
11675
11676
11677
11678
11679
11680
11681
11682
11683
11684
11685
11686
11687
11688
11689
11690
11691
11692
11693
11694
11695
11696
11697
11698
11699
11700
11701
11702
11703
11704
11705
11706
11707
11708
11709
11710
11711
11712
11713
11714
11715
11716
11717
11718
11719
11720
11721
11722
11723
11724
11725
11726
11727
11728
11729
11730
11731
11732
11733
11734
11735
11736
11737
11738
11739
11740
11741
11742
11743
11744
11745
11746
11747
11748
11749
11750
11751
11752
11753
11754
11755
11756
11757
11758
11759
11760
11761
11762
11763
11764
11765
11766
11767
11768
11769
11770
11771
11772
11773
11774
11775
11776
11777
11778
11779
11780
11781
11782
11783
11784
11785
11786
11787
11788
11789
11790
11791
11792
11793
11794
11795
11796
11797
11798
11799
11800
11801
11802
11803
11804
11805
11806
11807
11808
11809
11810
11811
11812
11813
11814
11815
11816
11817
11818
11819
11820
11821
11822
11823
11824
11825
11826
11827
11828
11829
11830
11831
11832
11833
11834
11835
11836
11837
11838
11839
11840
11841
11842
11843
11844
11845
11846
11847
11848
11849
11850
11851
11852
11853
11854
11855
11856
11857
11858
11859
11860
11861
11862
11863
11864
11865
11866
11867
11868
11869
11870
11871
11872
11873
11874
11875
11876
11877
11878
11879
11880
11881
11882
11883
11884
11885
11886
11887
11888
11889
11890
11891
11892
11893
11894
11895
11896
11897
11898
11899
11900
11901
11902
11903
11904
11905
11906
11907
11908
11909
11910
11911
11912
11913
11914
11915
11916
11917
11918
11919
11920
11921
11922
11923
11924
11925
11926
11927
11928
11929
11930
11931
11932
11933
11934
11935
11936
11937
11938
11939
11940
11941
11942
11943
11944
11945
11946
11947
11948
11949
11950
11951
11952
11953
11954
11955
11956
11957
11958
11959
11960
11961
11962
11963
11964
11965
11966
11967
11968
11969
11970
11971
11972
11973
11974
11975
11976
11977
11978
11979
11980
11981
11982
11983
11984
11985
11986
11987
11988
11989
11990
11991
11992
11993
11994
11995
11996
11997
11998
11999
12000
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022
12023
12024
12025
12026
12027
12028
12029
12030
12031
12032
12033
12034
12035
12036
12037
12038
12039
12040
12041
12042
12043
12044
12045
12046
12047
12048
12049
12050
12051
12052
12053
12054
12055
12056
12057
12058
12059
12060
12061
12062
12063
12064
12065
12066
12067
12068
12069
12070
12071
12072
12073
12074
12075
12076
12077
12078
12079
12080
12081
12082
12083
12084
12085
12086
12087
12088
12089
12090
12091
12092
12093
12094
12095
12096
12097
12098
12099
12100
12101
12102
12103
12104
12105
12106
12107
12108
12109
12110
12111
12112
12113
12114
12115
12116
12117
12118
12119
12120
12121
12122
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Based on the design of the Berkeley Packet Filter. The new
 * internal format has been designed by PLUMgrid:
 *
 *        Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
 *
 * Authors:
 *
 *        Jay Schulist <jschlst@samba.org>
 *        Alexei Starovoitov <ast@plumgrid.com>
 *        Daniel Borkmann <dborkman@redhat.com>
 *
 * Andi Kleen - Fix a few bad bugs and races.
 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
 */

#include <linux/atomic.h>
#include <linux/bpf_verifier.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/sock_diag.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/gfp.h>
#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <linux/skmsg.h>
#include <net/sock.h>
#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <net/xfrm.h>
#include <net/udp.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
#include <linux/inetdevice.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
#include <net/net_namespace.h>
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
#include <net/lwtunnel.h>
#include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h>
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
#include <net/tls.h>
#include <net/xdp.h>
#include <net/mptcp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netkit.h>
#include <linux/un.h>
#include <net/xdp_sock_drv.h>

#include "dev.h"

static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);

int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
        if (in_compat_syscall()) {
                struct compat_sock_fprog f32;

                if (len != sizeof(f32))
                        return -EINVAL;
                if (copy_from_sockptr(&f32, src, sizeof(f32)))
                        return -EFAULT;
                memset(dst, 0, sizeof(*dst));
                dst->len = f32.len;
                dst->filter = compat_ptr(f32.filter);
        } else {
                if (len != sizeof(*dst))
                        return -EINVAL;
                if (copy_from_sockptr(dst, src, sizeof(*dst)))
                        return -EFAULT;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);

/**
 *        sk_filter_trim_cap - run a packet through a socket filter
 *        @sk: sock associated with &sk_buff
 *        @skb: buffer to filter
 *        @cap: limit on how short the eBPF program may trim the packet
 *
 * Run the eBPF program and then cut skb->data to correct size returned by
 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to bpf_prog_run. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
{
        int err;
        struct sk_filter *filter;

        /*
         * If the skb was allocated from pfmemalloc reserves, only
         * allow SOCK_MEMALLOC sockets to use it as this socket is
         * helping free memory
         */
        if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
                NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
                return -ENOMEM;
        }
        err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
        if (err)
                return err;

        err = security_sock_rcv_skb(sk, skb);
        if (err)
                return err;

        rcu_read_lock();
        filter = rcu_dereference(sk->sk_filter);
        if (filter) {
                struct sock *save_sk = skb->sk;
                unsigned int pkt_len;

                skb->sk = sk;
                pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
                skb->sk = save_sk;
                err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
        }
        rcu_read_unlock();

        return err;
}
EXPORT_SYMBOL(sk_filter_trim_cap);

BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
{
        return skb_get_poff(skb);
}

BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
{
        struct nlattr *nla;

        if (skb_is_nonlinear(skb))
                return 0;

        if (skb->len < sizeof(struct nlattr))
                return 0;

        if (a > skb->len - sizeof(struct nlattr))
                return 0;

        nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
        if (nla)
                return (void *) nla - (void *) skb->data;

        return 0;
}

BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
{
        struct nlattr *nla;

        if (skb_is_nonlinear(skb))
                return 0;

        if (skb->len < sizeof(struct nlattr))
                return 0;

        if (a > skb->len - sizeof(struct nlattr))
                return 0;

        nla = (struct nlattr *) &skb->data[a];
        if (!nla_ok(nla, skb->len - a))
                return 0;

        nla = nla_find_nested(nla, x);
        if (nla)
                return (void *) nla - (void *) skb->data;

        return 0;
}

BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
           data, int, headlen, int, offset)
{
        u8 tmp, *ptr;
        const int len = sizeof(tmp);

        if (offset >= 0) {
                if (headlen - offset >= len)
                        return *(u8 *)(data + offset);
                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
                        return tmp;
        } else {
                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
                if (likely(ptr))
                        return *(u8 *)ptr;
        }

        return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
           int, offset)
{
        return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
                                         offset);
}

BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
           data, int, headlen, int, offset)
{
        __be16 tmp, *ptr;
        const int len = sizeof(tmp);

        if (offset >= 0) {
                if (headlen - offset >= len)
                        return get_unaligned_be16(data + offset);
                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
                        return be16_to_cpu(tmp);
        } else {
                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
                if (likely(ptr))
                        return get_unaligned_be16(ptr);
        }

        return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
           int, offset)
{
        return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
                                          offset);
}

BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
           data, int, headlen, int, offset)
{
        __be32 tmp, *ptr;
        const int len = sizeof(tmp);

        if (likely(offset >= 0)) {
                if (headlen - offset >= len)
                        return get_unaligned_be32(data + offset);
                if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
                        return be32_to_cpu(tmp);
        } else {
                ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
                if (likely(ptr))
                        return get_unaligned_be32(ptr);
        }

        return -EFAULT;
}

BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
           int, offset)
{
        return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
                                          offset);
}

static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
                              struct bpf_insn *insn_buf)
{
        struct bpf_insn *insn = insn_buf;

        switch (skb_field) {
        case SKF_AD_MARK:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);

                *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
                                      offsetof(struct sk_buff, mark));
                break;

        case SKF_AD_PKTTYPE:
                *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET);
                *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
                *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
#endif
                break;

        case SKF_AD_QUEUE:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);

                *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
                                      offsetof(struct sk_buff, queue_mapping));
                break;

        case SKF_AD_VLAN_TAG:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);

                /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
                *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
                                      offsetof(struct sk_buff, vlan_tci));
                break;
        case SKF_AD_VLAN_TAG_PRESENT:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_all) != 4);
                *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
                                      offsetof(struct sk_buff, vlan_all));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
                *insn++ = BPF_ALU32_IMM(BPF_MOV, dst_reg, 1);
                break;
        }

        return insn - insn_buf;
}

static bool convert_bpf_extensions(struct sock_filter *fp,
                                   struct bpf_insn **insnp)
{
        struct bpf_insn *insn = *insnp;
        u32 cnt;

        switch (fp->k) {
        case SKF_AD_OFF + SKF_AD_PROTOCOL:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);

                /* A = *(u16 *) (CTX + offsetof(protocol)) */
                *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
                                      offsetof(struct sk_buff, protocol));
                /* A = ntohs(A) [emitting a nop or swap16] */
                *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
                break;

        case SKF_AD_OFF + SKF_AD_PKTTYPE:
                cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
                insn += cnt - 1;
                break;

        case SKF_AD_OFF + SKF_AD_IFINDEX:
        case SKF_AD_OFF + SKF_AD_HATYPE:
                BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
                BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                      BPF_REG_TMP, BPF_REG_CTX,
                                      offsetof(struct sk_buff, dev));
                /* if (tmp != 0) goto pc + 1 */
                *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
                *insn++ = BPF_EXIT_INSN();
                if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
                        *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
                                            offsetof(struct net_device, ifindex));
                else
                        *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
                                            offsetof(struct net_device, type));
                break;

        case SKF_AD_OFF + SKF_AD_MARK:
                cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
                insn += cnt - 1;
                break;

        case SKF_AD_OFF + SKF_AD_RXHASH:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);

                *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
                                    offsetof(struct sk_buff, hash));
                break;

        case SKF_AD_OFF + SKF_AD_QUEUE:
                cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
                insn += cnt - 1;
                break;

        case SKF_AD_OFF + SKF_AD_VLAN_TAG:
                cnt = convert_skb_access(SKF_AD_VLAN_TAG,
                                         BPF_REG_A, BPF_REG_CTX, insn);
                insn += cnt - 1;
                break;

        case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
                cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
                                         BPF_REG_A, BPF_REG_CTX, insn);
                insn += cnt - 1;
                break;

        case SKF_AD_OFF + SKF_AD_VLAN_TPID:
                BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);

                /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
                *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
                                      offsetof(struct sk_buff, vlan_proto));
                /* A = ntohs(A) [emitting a nop or swap16] */
                *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
                break;

        case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
        case SKF_AD_OFF + SKF_AD_NLATTR:
        case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
        case SKF_AD_OFF + SKF_AD_CPU:
        case SKF_AD_OFF + SKF_AD_RANDOM:
                /* arg1 = CTX */
                *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
                /* arg2 = A */
                *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
                /* arg3 = X */
                *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
                /* Emit call(arg1=CTX, arg2=A, arg3=X) */
                switch (fp->k) {
                case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
                        *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
                        break;
                case SKF_AD_OFF + SKF_AD_NLATTR:
                        *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
                        break;
                case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
                        *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
                        break;
                case SKF_AD_OFF + SKF_AD_CPU:
                        *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
                        break;
                case SKF_AD_OFF + SKF_AD_RANDOM:
                        *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
                        bpf_user_rnd_init_once();
                        break;
                }
                break;

        case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
                /* A ^= X */
                *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
                break;

        default:
                /* This is just a dummy call to avoid letting the compiler
                 * evict __bpf_call_base() as an optimization. Placed here
                 * where no-one bothers.
                 */
                BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
                return false;
        }

        *insnp = insn;
        return true;
}

static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
{
        const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
        int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
        bool endian = BPF_SIZE(fp->code) == BPF_H ||
                      BPF_SIZE(fp->code) == BPF_W;
        bool indirect = BPF_MODE(fp->code) == BPF_IND;
        const int ip_align = NET_IP_ALIGN;
        struct bpf_insn *insn = *insnp;
        int offset = fp->k;

        if (!indirect &&
            ((unaligned_ok && offset >= 0) ||
             (!unaligned_ok && offset >= 0 &&
              offset + ip_align >= 0 &&
              offset + ip_align % size == 0))) {
                bool ldx_off_ok = offset <= S16_MAX;

                *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
                if (offset)
                        *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
                *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
                                      size, 2 + endian + (!ldx_off_ok * 2));
                if (ldx_off_ok) {
                        *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
                                              BPF_REG_D, offset);
                } else {
                        *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
                        *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
                                              BPF_REG_TMP, 0);
                }
                if (endian)
                        *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
                *insn++ = BPF_JMP_A(8);
        }

        *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
        *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
        *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
        if (!indirect) {
                *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
        } else {
                *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
                if (fp->k)
                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
        }

        switch (BPF_SIZE(fp->code)) {
        case BPF_B:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
                break;
        case BPF_H:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
                break;
        case BPF_W:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
                break;
        default:
                return false;
        }

        *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
        *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
        *insn   = BPF_EXIT_INSN();

        *insnp = insn;
        return true;
}

/**
 *        bpf_convert_filter - convert filter program
 *        @prog: the user passed filter program
 *        @len: the length of the user passed filter program
 *        @new_prog: allocated 'struct bpf_prog' or NULL
 *        @new_len: pointer to store length of converted program
 *        @seen_ld_abs: bool whether we've seen ld_abs/ind
 *
 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
 * style extended BPF (eBPF).
 * Conversion workflow:
 *
 * 1) First pass for calculating the new program length:
 *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
 *
 * 2) 2nd pass to remap in two passes: 1st pass finds new
 *    jump offsets, 2nd pass remapping:
 *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
 */
static int bpf_convert_filter(struct sock_filter *prog, int len,
                              struct bpf_prog *new_prog, int *new_len,
                              bool *seen_ld_abs)
{
        int new_flen = 0, pass = 0, target, i, stack_off;
        struct bpf_insn *new_insn, *first_insn = NULL;
        struct sock_filter *fp;
        int *addrs = NULL;
        u8 bpf_src;

        BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
        BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);

        if (len <= 0 || len > BPF_MAXINSNS)
                return -EINVAL;

        if (new_prog) {
                first_insn = new_prog->insnsi;
                addrs = kcalloc(len, sizeof(*addrs),
                                GFP_KERNEL | __GFP_NOWARN);
                if (!addrs)
                        return -ENOMEM;
        }

do_pass:
        new_insn = first_insn;
        fp = prog;

        /* Classic BPF related prologue emission. */
        if (new_prog) {
                /* Classic BPF expects A and X to be reset first. These need
                 * to be guaranteed to be the first two instructions.
                 */
                *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
                *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);

                /* All programs must keep CTX in callee saved BPF_REG_CTX.
                 * In eBPF case it's done by the compiler, here we need to
                 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
                 */
                *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
                if (*seen_ld_abs) {
                        /* For packet access in classic BPF, cache skb->data
                         * in callee-saved BPF R8 and skb->len - skb->data_len
                         * (headlen) in BPF R9. Since classic BPF is read-only
                         * on CTX, we only need to cache it once.
                         */
                        *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
                                                  BPF_REG_D, BPF_REG_CTX,
                                                  offsetof(struct sk_buff, data));
                        *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
                                                  offsetof(struct sk_buff, len));
                        *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
                                                  offsetof(struct sk_buff, data_len));
                        *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
                }
        } else {
                new_insn += 3;
        }

        for (i = 0; i < len; fp++, i++) {
                struct bpf_insn tmp_insns[32] = { };
                struct bpf_insn *insn = tmp_insns;

                if (addrs)
                        addrs[i] = new_insn - first_insn;

                switch (fp->code) {
                /* All arithmetic insns and skb loads map as-is. */
                case BPF_ALU | BPF_ADD | BPF_X:
                case BPF_ALU | BPF_ADD | BPF_K:
                case BPF_ALU | BPF_SUB | BPF_X:
                case BPF_ALU | BPF_SUB | BPF_K:
                case BPF_ALU | BPF_AND | BPF_X:
                case BPF_ALU | BPF_AND | BPF_K:
                case BPF_ALU | BPF_OR | BPF_X:
                case BPF_ALU | BPF_OR | BPF_K:
                case BPF_ALU | BPF_LSH | BPF_X:
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_X:
                case BPF_ALU | BPF_RSH | BPF_K:
                case BPF_ALU | BPF_XOR | BPF_X:
                case BPF_ALU | BPF_XOR | BPF_K:
                case BPF_ALU | BPF_MUL | BPF_X:
                case BPF_ALU | BPF_MUL | BPF_K:
                case BPF_ALU | BPF_DIV | BPF_X:
                case BPF_ALU | BPF_DIV | BPF_K:
                case BPF_ALU | BPF_MOD | BPF_X:
                case BPF_ALU | BPF_MOD | BPF_K:
                case BPF_ALU | BPF_NEG:
                case BPF_LD | BPF_ABS | BPF_W:
                case BPF_LD | BPF_ABS | BPF_H:
                case BPF_LD | BPF_ABS | BPF_B:
                case BPF_LD | BPF_IND | BPF_W:
                case BPF_LD | BPF_IND | BPF_H:
                case BPF_LD | BPF_IND | BPF_B:
                        /* Check for overloaded BPF extension and
                         * directly convert it if found, otherwise
                         * just move on with mapping.
                         */
                        if (BPF_CLASS(fp->code) == BPF_LD &&
                            BPF_MODE(fp->code) == BPF_ABS &&
                            convert_bpf_extensions(fp, &insn))
                                break;
                        if (BPF_CLASS(fp->code) == BPF_LD &&
                            convert_bpf_ld_abs(fp, &insn)) {
                                *seen_ld_abs = true;
                                break;
                        }

                        if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
                            fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
                                *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
                                /* Error with exception code on div/mod by 0.
                                 * For cBPF programs, this was always return 0.
                                 */
                                *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
                                *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
                                *insn++ = BPF_EXIT_INSN();
                        }

                        *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
                        break;

                /* Jump transformation cannot use BPF block macros
                 * everywhere as offset calculation and target updates
                 * require a bit more work than the rest, i.e. jump
                 * opcodes map as-is, but offsets need adjustment.
                 */

#define BPF_EMIT_JMP                                                        \
        do {                                                                \
                const s32 off_min = S16_MIN, off_max = S16_MAX;                \
                s32 off;                                                \
                                                                        \
                if (target >= len || target < 0)                        \
                        goto err;                                        \
                off = addrs ? addrs[target] - addrs[i] - 1 : 0;                \
                /* Adjust pc relative offset for 2nd or 3rd insn. */        \
                off -= insn - tmp_insns;                                \
                /* Reject anything not fitting into insn->off. */        \
                if (off < off_min || off > off_max)                        \
                        goto err;                                        \
                insn->off = off;                                        \
        } while (0)

                case BPF_JMP | BPF_JA:
                        target = i + fp->k + 1;
                        insn->code = fp->code;
                        BPF_EMIT_JMP;
                        break;

                case BPF_JMP | BPF_JEQ | BPF_K:
                case BPF_JMP | BPF_JEQ | BPF_X:
                case BPF_JMP | BPF_JSET | BPF_K:
                case BPF_JMP | BPF_JSET | BPF_X:
                case BPF_JMP | BPF_JGT | BPF_K:
                case BPF_JMP | BPF_JGT | BPF_X:
                case BPF_JMP | BPF_JGE | BPF_K:
                case BPF_JMP | BPF_JGE | BPF_X:
                        if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
                                /* BPF immediates are signed, zero extend
                                 * immediate into tmp register and use it
                                 * in compare insn.
                                 */
                                *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);

                                insn->dst_reg = BPF_REG_A;
                                insn->src_reg = BPF_REG_TMP;
                                bpf_src = BPF_X;
                        } else {
                                insn->dst_reg = BPF_REG_A;
                                insn->imm = fp->k;
                                bpf_src = BPF_SRC(fp->code);
                                insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
                        }

                        /* Common case where 'jump_false' is next insn. */
                        if (fp->jf == 0) {
                                insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
                                target = i + fp->jt + 1;
                                BPF_EMIT_JMP;
                                break;
                        }

                        /* Convert some jumps when 'jump_true' is next insn. */
                        if (fp->jt == 0) {
                                switch (BPF_OP(fp->code)) {
                                case BPF_JEQ:
                                        insn->code = BPF_JMP | BPF_JNE | bpf_src;
                                        break;
                                case BPF_JGT:
                                        insn->code = BPF_JMP | BPF_JLE | bpf_src;
                                        break;
                                case BPF_JGE:
                                        insn->code = BPF_JMP | BPF_JLT | bpf_src;
                                        break;
                                default:
                                        goto jmp_rest;
                                }

                                target = i + fp->jf + 1;
                                BPF_EMIT_JMP;
                                break;
                        }
jmp_rest:
                        /* Other jumps are mapped into two insns: Jxx and JA. */
                        target = i + fp->jt + 1;
                        insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
                        BPF_EMIT_JMP;
                        insn++;

                        insn->code = BPF_JMP | BPF_JA;
                        target = i + fp->jf + 1;
                        BPF_EMIT_JMP;
                        break;

                /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */
                case BPF_LDX | BPF_MSH | BPF_B: {
                        struct sock_filter tmp = {
                                .code        = BPF_LD | BPF_ABS | BPF_B,
                                .k        = fp->k,
                        };

                        *seen_ld_abs = true;

                        /* X = A */
                        *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
                        /* A = BPF_R0 = *(u8 *) (skb->data + K) */
                        convert_bpf_ld_abs(&tmp, &insn);
                        insn++;
                        /* A &= 0xf */
                        *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
                        /* A <<= 2 */
                        *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
                        /* tmp = X */
                        *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
                        /* X = A */
                        *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
                        /* A = tmp */
                        *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
                        break;
                }
                /* RET_K is remapped into 2 insns. RET_A case doesn't need an
                 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
                 */
                case BPF_RET | BPF_A:
                case BPF_RET | BPF_K:
                        if (BPF_RVAL(fp->code) == BPF_K)
                                *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
                                                        0, fp->k);
                        *insn = BPF_EXIT_INSN();
                        break;

                /* Store to stack. */
                case BPF_ST:
                case BPF_STX:
                        stack_off = fp->k * 4  + 4;
                        *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
                                            BPF_ST ? BPF_REG_A : BPF_REG_X,
                                            -stack_off);
                        /* check_load_and_stores() verifies that classic BPF can
                         * load from stack only after write, so tracking
                         * stack_depth for ST|STX insns is enough
                         */
                        if (new_prog && new_prog->aux->stack_depth < stack_off)
                                new_prog->aux->stack_depth = stack_off;
                        break;

                /* Load from stack. */
                case BPF_LD | BPF_MEM:
                case BPF_LDX | BPF_MEM:
                        stack_off = fp->k * 4  + 4;
                        *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
                                            BPF_REG_A : BPF_REG_X, BPF_REG_FP,
                                            -stack_off);
                        break;

                /* A = K or X = K */
                case BPF_LD | BPF_IMM:
                case BPF_LDX | BPF_IMM:
                        *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
                                              BPF_REG_A : BPF_REG_X, fp->k);
                        break;

                /* X = A */
                case BPF_MISC | BPF_TAX:
                        *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
                        break;

                /* A = X */
                case BPF_MISC | BPF_TXA:
                        *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
                        break;

                /* A = skb->len or X = skb->len */
                case BPF_LD | BPF_W | BPF_LEN:
                case BPF_LDX | BPF_W | BPF_LEN:
                        *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
                                            BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
                                            offsetof(struct sk_buff, len));
                        break;

                /* Access seccomp_data fields. */
                case BPF_LDX | BPF_ABS | BPF_W:
                        /* A = *(u32 *) (ctx + K) */
                        *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
                        break;

                /* Unknown instruction. */
                default:
                        goto err;
                }

                insn++;
                if (new_prog)
                        memcpy(new_insn, tmp_insns,
                               sizeof(*insn) * (insn - tmp_insns));
                new_insn += insn - tmp_insns;
        }

        if (!new_prog) {
                /* Only calculating new length. */
                *new_len = new_insn - first_insn;
                if (*seen_ld_abs)
                        *new_len += 4; /* Prologue bits. */
                return 0;
        }

        pass++;
        if (new_flen != new_insn - first_insn) {
                new_flen = new_insn - first_insn;
                if (pass > 2)
                        goto err;
                goto do_pass;
        }

        kfree(addrs);
        BUG_ON(*new_len != new_flen);
        return 0;
err:
        kfree(addrs);
        return -EINVAL;
}

/* Security:
 *
 * As we dont want to clear mem[] array for each packet going through
 * __bpf_prog_run(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
 * a malicious user doesn't try to abuse us.
 */
static int check_load_and_stores(const struct sock_filter *filter, int flen)
{
        u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
        int pc, ret = 0;

        BUILD_BUG_ON(BPF_MEMWORDS > 16);

        masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
        if (!masks)
                return -ENOMEM;

        memset(masks, 0xff, flen * sizeof(*masks));

        for (pc = 0; pc < flen; pc++) {
                memvalid &= masks[pc];

                switch (filter[pc].code) {
                case BPF_ST:
                case BPF_STX:
                        memvalid |= (1 << filter[pc].k);
                        break;
                case BPF_LD | BPF_MEM:
                case BPF_LDX | BPF_MEM:
                        if (!(memvalid & (1 << filter[pc].k))) {
                                ret = -EINVAL;
                                goto error;
                        }
                        break;
                case BPF_JMP | BPF_JA:
                        /* A jump must set masks on target */
                        masks[pc + 1 + filter[pc].k] &= memvalid;
                        memvalid = ~0;
                        break;
                case BPF_JMP | BPF_JEQ | BPF_K:
                case BPF_JMP | BPF_JEQ | BPF_X:
                case BPF_JMP | BPF_JGE | BPF_K:
                case BPF_JMP | BPF_JGE | BPF_X:
                case BPF_JMP | BPF_JGT | BPF_K:
                case BPF_JMP | BPF_JGT | BPF_X:
                case BPF_JMP | BPF_JSET | BPF_K:
                case BPF_JMP | BPF_JSET | BPF_X:
                        /* A jump must set masks on targets */
                        masks[pc + 1 + filter[pc].jt] &= memvalid;
                        masks[pc + 1 + filter[pc].jf] &= memvalid;
                        memvalid = ~0;
                        break;
                }
        }
error:
        kfree(masks);
        return ret;
}

static bool chk_code_allowed(u16 code_to_probe)
{
        static const bool codes[] = {
                /* 32 bit ALU operations */
                [BPF_ALU | BPF_ADD | BPF_K] = true,
                [BPF_ALU | BPF_ADD | BPF_X] = true,
                [BPF_ALU | BPF_SUB | BPF_K] = true,
                [BPF_ALU | BPF_SUB | BPF_X] = true,
                [BPF_ALU | BPF_MUL | BPF_K] = true,
                [BPF_ALU | BPF_MUL | BPF_X] = true,
                [BPF_ALU | BPF_DIV | BPF_K] = true,
                [BPF_ALU | BPF_DIV | BPF_X] = true,
                [BPF_ALU | BPF_MOD | BPF_K] = true,
                [BPF_ALU | BPF_MOD | BPF_X] = true,
                [BPF_ALU | BPF_AND | BPF_K] = true,
                [BPF_ALU | BPF_AND | BPF_X] = true,
                [BPF_ALU | BPF_OR | BPF_K] = true,
                [BPF_ALU | BPF_OR | BPF_X] = true,
                [BPF_ALU | BPF_XOR | BPF_K] = true,
                [BPF_ALU | BPF_XOR | BPF_X] = true,
                [BPF_ALU | BPF_LSH | BPF_K] = true,
                [BPF_ALU | BPF_LSH | BPF_X] = true,
                [BPF_ALU | BPF_RSH | BPF_K] = true,
                [BPF_ALU | BPF_RSH | BPF_X] = true,
                [BPF_ALU | BPF_NEG] = true,
                /* Load instructions */
                [BPF_LD | BPF_W | BPF_ABS] = true,
                [BPF_LD | BPF_H | BPF_ABS] = true,
                [BPF_LD | BPF_B | BPF_ABS] = true,
                [BPF_LD | BPF_W | BPF_LEN] = true,
                [BPF_LD | BPF_W | BPF_IND] = true,
                [BPF_LD | BPF_H | BPF_IND] = true,
                [BPF_LD | BPF_B | BPF_IND] = true,
                [BPF_LD | BPF_IMM] = true,
                [BPF_LD | BPF_MEM] = true,
                [BPF_LDX | BPF_W | BPF_LEN] = true,
                [BPF_LDX | BPF_B | BPF_MSH] = true,
                [BPF_LDX | BPF_IMM] = true,
                [BPF_LDX | BPF_MEM] = true,
                /* Store instructions */
                [BPF_ST] = true,
                [BPF_STX] = true,
                /* Misc instructions */
                [BPF_MISC | BPF_TAX] = true,
                [BPF_MISC | BPF_TXA] = true,
                /* Return instructions */
                [BPF_RET | BPF_K] = true,
                [BPF_RET | BPF_A] = true,
                /* Jump instructions */
                [BPF_JMP | BPF_JA] = true,
                [BPF_JMP | BPF_JEQ | BPF_K] = true,
                [BPF_JMP | BPF_JEQ | BPF_X] = true,
                [BPF_JMP | BPF_JGE | BPF_K] = true,
                [BPF_JMP | BPF_JGE | BPF_X] = true,
                [BPF_JMP | BPF_JGT | BPF_K] = true,
                [BPF_JMP | BPF_JGT | BPF_X] = true,
                [BPF_JMP | BPF_JSET | BPF_K] = true,
                [BPF_JMP | BPF_JSET | BPF_X] = true,
        };

        if (code_to_probe >= ARRAY_SIZE(codes))
                return false;

        return codes[code_to_probe];
}

static bool bpf_check_basics_ok(const struct sock_filter *filter,
                                unsigned int flen)
{
        if (filter == NULL)
                return false;
        if (flen == 0 || flen > BPF_MAXINSNS)
                return false;

        return true;
}

/**
 *        bpf_check_classic - verify socket filter code
 *        @filter: filter to verify
 *        @flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
 *
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
 */
static int bpf_check_classic(const struct sock_filter *filter,
                             unsigned int flen)
{
        bool anc_found;
        int pc;

        /* Check the filter code now */
        for (pc = 0; pc < flen; pc++) {
                const struct sock_filter *ftest = &filter[pc];

                /* May we actually operate on this code? */
                if (!chk_code_allowed(ftest->code))
                        return -EINVAL;

                /* Some instructions need special checks */
                switch (ftest->code) {
                case BPF_ALU | BPF_DIV | BPF_K:
                case BPF_ALU | BPF_MOD | BPF_K:
                        /* Check for division by zero */
                        if (ftest->k == 0)
                                return -EINVAL;
                        break;
                case BPF_ALU | BPF_LSH | BPF_K:
                case BPF_ALU | BPF_RSH | BPF_K:
                        if (ftest->k >= 32)
                                return -EINVAL;
                        break;
                case BPF_LD | BPF_MEM:
                case BPF_LDX | BPF_MEM:
                case BPF_ST:
                case BPF_STX:
                        /* Check for invalid memory addresses */
                        if (ftest->k >= BPF_MEMWORDS)
                                return -EINVAL;
                        break;
                case BPF_JMP | BPF_JA:
                        /* Note, the large ftest->k might cause loops.
                         * Compare this with conditional jumps below,
                         * where offsets are limited. --ANK (981016)
                         */
                        if (ftest->k >= (unsigned int)(flen - pc - 1))
                                return -EINVAL;
                        break;
                case BPF_JMP | BPF_JEQ | BPF_K:
                case BPF_JMP | BPF_JEQ | BPF_X:
                case BPF_JMP | BPF_JGE | BPF_K:
                case BPF_JMP | BPF_JGE | BPF_X:
                case BPF_JMP | BPF_JGT | BPF_K:
                case BPF_JMP | BPF_JGT | BPF_X:
                case BPF_JMP | BPF_JSET | BPF_K:
                case BPF_JMP | BPF_JSET | BPF_X:
                        /* Both conditionals must be safe */
                        if (pc + ftest->jt + 1 >= flen ||
                            pc + ftest->jf + 1 >= flen)
                                return -EINVAL;
                        break;
                case BPF_LD | BPF_W | BPF_ABS:
                case BPF_LD | BPF_H | BPF_ABS:
                case BPF_LD | BPF_B | BPF_ABS:
                        anc_found = false;
                        if (bpf_anc_helper(ftest) & BPF_ANC)
                                anc_found = true;
                        /* Ancillary operation unknown or unsupported */
                        if (anc_found == false && ftest->k >= SKF_AD_OFF)
                                return -EINVAL;
                }
        }

        /* Last instruction must be a RET code */
        switch (filter[flen - 1].code) {
        case BPF_RET | BPF_K:
        case BPF_RET | BPF_A:
                return check_load_and_stores(filter, flen);
        }

        return -EINVAL;
}

static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
                                      const struct sock_fprog *fprog)
{
        unsigned int fsize = bpf_classic_proglen(fprog);
        struct sock_fprog_kern *fkprog;

        fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
        if (!fp->orig_prog)
                return -ENOMEM;

        fkprog = fp->orig_prog;
        fkprog->len = fprog->len;

        fkprog->filter = kmemdup(fp->insns, fsize,
                                 GFP_KERNEL | __GFP_NOWARN);
        if (!fkprog->filter) {
                kfree(fp->orig_prog);
                return -ENOMEM;
        }

        return 0;
}

static void bpf_release_orig_filter(struct bpf_prog *fp)
{
        struct sock_fprog_kern *fprog = fp->orig_prog;

        if (fprog) {
                kfree(fprog->filter);
                kfree(fprog);
        }
}

static void __bpf_prog_release(struct bpf_prog *prog)
{
        if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
                bpf_prog_put(prog);
        } else {
                bpf_release_orig_filter(prog);
                bpf_prog_free(prog);
        }
}

static void __sk_filter_release(struct sk_filter *fp)
{
        __bpf_prog_release(fp->prog);
        kfree(fp);
}

/**
 *         sk_filter_release_rcu - Release a socket filter by rcu_head
 *        @rcu: rcu_head that contains the sk_filter to free
 */
static void sk_filter_release_rcu(struct rcu_head *rcu)
{
        struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

        __sk_filter_release(fp);
}

/**
 *        sk_filter_release - release a socket filter
 *        @fp: filter to remove
 *
 *        Remove a filter from a socket and release its resources.
 */
static void sk_filter_release(struct sk_filter *fp)
{
        if (refcount_dec_and_test(&fp->refcnt))
                call_rcu(&fp->rcu, sk_filter_release_rcu);
}

void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
        u32 filter_size = bpf_prog_size(fp->prog->len);

        atomic_sub(filter_size, &sk->sk_omem_alloc);
        sk_filter_release(fp);
}

/* try to charge the socket memory if there is space available
 * return true on success
 */
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
        int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
        u32 filter_size = bpf_prog_size(fp->prog->len);

        /* same check as in sock_kmalloc() */
        if (filter_size <= optmem_max &&
            atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
                atomic_add(filter_size, &sk->sk_omem_alloc);
                return true;
        }
        return false;
}

bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
        if (!refcount_inc_not_zero(&fp->refcnt))
                return false;

        if (!__sk_filter_charge(sk, fp)) {
                sk_filter_release(fp);
                return false;
        }
        return true;
}

static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
        struct sock_filter *old_prog;
        struct bpf_prog *old_fp;
        int err, new_len, old_len = fp->len;
        bool seen_ld_abs = false;

        /* We are free to overwrite insns et al right here as it won't be used at
         * this point in time anymore internally after the migration to the eBPF
         * instruction representation.
         */
        BUILD_BUG_ON(sizeof(struct sock_filter) !=
                     sizeof(struct bpf_insn));

        /* Conversion cannot happen on overlapping memory areas,
         * so we need to keep the user BPF around until the 2nd
         * pass. At this time, the user BPF is stored in fp->insns.
         */
        old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
                           GFP_KERNEL | __GFP_NOWARN);
        if (!old_prog) {
                err = -ENOMEM;
                goto out_err;
        }

        /* 1st pass: calculate the new program length. */
        err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
                                 &seen_ld_abs);
        if (err)
                goto out_err_free;

        /* Expand fp for appending the new filter representation. */
        old_fp = fp;
        fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
        if (!fp) {
                /* The old_fp is still around in case we couldn't
                 * allocate new memory, so uncharge on that one.
                 */
                fp = old_fp;
                err = -ENOMEM;
                goto out_err_free;
        }

        fp->len = new_len;

        /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
        err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
                                 &seen_ld_abs);
        if (err)
                /* 2nd bpf_convert_filter() can fail only if it fails
                 * to allocate memory, remapping must succeed. Note,
                 * that at this time old_fp has already been released
                 * by krealloc().
                 */
                goto out_err_free;

        fp = bpf_prog_select_runtime(fp, &err);
        if (err)
                goto out_err_free;

        kfree(old_prog);
        return fp;

out_err_free:
        kfree(old_prog);
out_err:
        __bpf_prog_release(fp);
        return ERR_PTR(err);
}

static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
                                           bpf_aux_classic_check_t trans)
{
        int err;

        fp->bpf_func = NULL;
        fp->jited = 0;

        err = bpf_check_classic(fp->insns, fp->len);
        if (err) {
                __bpf_prog_release(fp);
                return ERR_PTR(err);
        }

        /* There might be additional checks and transformations
         * needed on classic filters, f.e. in case of seccomp.
         */
        if (trans) {
                err = trans(fp->insns, fp->len);
                if (err) {
                        __bpf_prog_release(fp);
                        return ERR_PTR(err);
                }
        }

        /* Probe if we can JIT compile the filter and if so, do
         * the compilation of the filter.
         */
        bpf_jit_compile(fp);

        /* JIT compiler couldn't process this filter, so do the eBPF translation
         * for the optimized interpreter.
         */
        if (!fp->jited)
                fp = bpf_migrate_filter(fp);

        return fp;
}

/**
 *        bpf_prog_create - create an unattached filter
 *        @pfp: the unattached filter that is created
 *        @fprog: the filter program
 *
 * Create a filter independent of any socket. We first run some
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
{
        unsigned int fsize = bpf_classic_proglen(fprog);
        struct bpf_prog *fp;

        /* Make sure new filter is there and in the right amounts. */
        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
                return -EINVAL;

        fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
        if (!fp)
                return -ENOMEM;

        memcpy(fp->insns, fprog->filter, fsize);

        fp->len = fprog->len;
        /* Since unattached filters are not copied back to user
         * space through sk_get_filter(), we do not need to hold
         * a copy here, and can spare us the work.
         */
        fp->orig_prog = NULL;

        /* bpf_prepare_filter() already takes care of freeing
         * memory in case something goes wrong.
         */
        fp = bpf_prepare_filter(fp, NULL);
        if (IS_ERR(fp))
                return PTR_ERR(fp);

        *pfp = fp;
        return 0;
}
EXPORT_SYMBOL_GPL(bpf_prog_create);

/**
 *        bpf_prog_create_from_user - create an unattached filter from user buffer
 *        @pfp: the unattached filter that is created
 *        @fprog: the filter program
 *        @trans: post-classic verifier transformation handler
 *        @save_orig: save classic BPF program
 *
 * This function effectively does the same as bpf_prog_create(), only
 * that it builds up its insns buffer from user space provided buffer.
 * It also allows for passing a bpf_aux_classic_check_t handler.
 */
int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
                              bpf_aux_classic_check_t trans, bool save_orig)
{
        unsigned int fsize = bpf_classic_proglen(fprog);
        struct bpf_prog *fp;
        int err;

        /* Make sure new filter is there and in the right amounts. */
        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
                return -EINVAL;

        fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
        if (!fp)
                return -ENOMEM;

        if (copy_from_user(fp->insns, fprog->filter, fsize)) {
                __bpf_prog_free(fp);
                return -EFAULT;
        }

        fp->len = fprog->len;
        fp->orig_prog = NULL;

        if (save_orig) {
                err = bpf_prog_store_orig_filter(fp, fprog);
                if (err) {
                        __bpf_prog_free(fp);
                        return -ENOMEM;
                }
        }

        /* bpf_prepare_filter() already takes care of freeing
         * memory in case something goes wrong.
         */
        fp = bpf_prepare_filter(fp, trans);
        if (IS_ERR(fp))
                return PTR_ERR(fp);

        *pfp = fp;
        return 0;
}
EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);

void bpf_prog_destroy(struct bpf_prog *fp)
{
        __bpf_prog_release(fp);
}
EXPORT_SYMBOL_GPL(bpf_prog_destroy);

static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
        struct sk_filter *fp, *old_fp;

        fp = kmalloc(sizeof(*fp), GFP_KERNEL);
        if (!fp)
                return -ENOMEM;

        fp->prog = prog;

        if (!__sk_filter_charge(sk, fp)) {
                kfree(fp);
                return -ENOMEM;
        }
        refcount_set(&fp->refcnt, 1);

        old_fp = rcu_dereference_protected(sk->sk_filter,
                                           lockdep_sock_is_held(sk));
        rcu_assign_pointer(sk->sk_filter, fp);

        if (old_fp)
                sk_filter_uncharge(sk, old_fp);

        return 0;
}

static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
        unsigned int fsize = bpf_classic_proglen(fprog);
        struct bpf_prog *prog;
        int err;

        if (sock_flag(sk, SOCK_FILTER_LOCKED))
                return ERR_PTR(-EPERM);

        /* Make sure new filter is there and in the right amounts. */
        if (!bpf_check_basics_ok(fprog->filter, fprog->len))
                return ERR_PTR(-EINVAL);

        prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
        if (!prog)
                return ERR_PTR(-ENOMEM);

        if (copy_from_user(prog->insns, fprog->filter, fsize)) {
                __bpf_prog_free(prog);
                return ERR_PTR(-EFAULT);
        }

        prog->len = fprog->len;

        err = bpf_prog_store_orig_filter(prog, fprog);
        if (err) {
                __bpf_prog_free(prog);
                return ERR_PTR(-ENOMEM);
        }

        /* bpf_prepare_filter() already takes care of freeing
         * memory in case something goes wrong.
         */
        return bpf_prepare_filter(prog, NULL);
}

/**
 *        sk_attach_filter - attach a socket filter
 *        @fprog: the filter program
 *        @sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
        struct bpf_prog *prog = __get_filter(fprog, sk);
        int err;

        if (IS_ERR(prog))
                return PTR_ERR(prog);

        err = __sk_attach_prog(prog, sk);
        if (err < 0) {
                __bpf_prog_release(prog);
                return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);

int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
        struct bpf_prog *prog = __get_filter(fprog, sk);
        int err, optmem_max;

        if (IS_ERR(prog))
                return PTR_ERR(prog);

        optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
        if (bpf_prog_size(prog->len) > optmem_max)
                err = -ENOMEM;
        else
                err = reuseport_attach_prog(sk, prog);

        if (err)
                __bpf_prog_release(prog);

        return err;
}

static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
        if (sock_flag(sk, SOCK_FILTER_LOCKED))
                return ERR_PTR(-EPERM);

        return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
}

int sk_attach_bpf(u32 ufd, struct sock *sk)
{
        struct bpf_prog *prog = __get_bpf(ufd, sk);
        int err;

        if (IS_ERR(prog))
                return PTR_ERR(prog);

        err = __sk_attach_prog(prog, sk);
        if (err < 0) {
                bpf_prog_put(prog);
                return err;
        }

        return 0;
}

int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
        struct bpf_prog *prog;
        int err, optmem_max;

        if (sock_flag(sk, SOCK_FILTER_LOCKED))
                return -EPERM;

        prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
        if (PTR_ERR(prog) == -EINVAL)
                prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
        if (IS_ERR(prog))
                return PTR_ERR(prog);

        if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
                /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
                 * bpf prog (e.g. sockmap).  It depends on the
                 * limitation imposed by bpf_prog_load().
                 * Hence, sysctl_optmem_max is not checked.
                 */
                if ((sk->sk_type != SOCK_STREAM &&
                     sk->sk_type != SOCK_DGRAM) ||
                    (sk->sk_protocol != IPPROTO_UDP &&
                     sk->sk_protocol != IPPROTO_TCP) ||
                    (sk->sk_family != AF_INET &&
                     sk->sk_family != AF_INET6)) {
                        err = -ENOTSUPP;
                        goto err_prog_put;
                }
        } else {
                /* BPF_PROG_TYPE_SOCKET_FILTER */
                optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
                if (bpf_prog_size(prog->len) > optmem_max) {
                        err = -ENOMEM;
                        goto err_prog_put;
                }
        }

        err = reuseport_attach_prog(sk, prog);
err_prog_put:
        if (err)
                bpf_prog_put(prog);

        return err;
}

void sk_reuseport_prog_free(struct bpf_prog *prog)
{
        if (!prog)
                return;

        if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
                bpf_prog_put(prog);
        else
                bpf_prog_destroy(prog);
}

struct bpf_scratchpad {
        union {
                __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
                u8     buff[MAX_BPF_STACK];
        };
};

static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);

static inline int __bpf_try_make_writable(struct sk_buff *skb,
                                          unsigned int write_len)
{
        return skb_ensure_writable(skb, write_len);
}

static inline int bpf_try_make_writable(struct sk_buff *skb,
                                        unsigned int write_len)
{
        int err = __bpf_try_make_writable(skb, write_len);

        bpf_compute_data_pointers(skb);
        return err;
}

static int bpf_try_make_head_writable(struct sk_buff *skb)
{
        return bpf_try_make_writable(skb, skb_headlen(skb));
}

static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
{
        if (skb_at_tc_ingress(skb))
                skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
}

static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
{
        if (skb_at_tc_ingress(skb))
                skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
}

BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
           const void *, from, u32, len, u64, flags)
{
        void *ptr;

        if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
                return -EINVAL;
        if (unlikely(offset > INT_MAX))
                return -EFAULT;
        if (unlikely(bpf_try_make_writable(skb, offset + len)))
                return -EFAULT;

        ptr = skb->data + offset;
        if (flags & BPF_F_RECOMPUTE_CSUM)
                __skb_postpull_rcsum(skb, ptr, len, offset);

        memcpy(ptr, from, len);

        if (flags & BPF_F_RECOMPUTE_CSUM)
                __skb_postpush_rcsum(skb, ptr, len, offset);
        if (flags & BPF_F_INVALIDATE_HASH)
                skb_clear_hash(skb);

        return 0;
}

static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
        .func                = bpf_skb_store_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE,
        .arg5_type        = ARG_ANYTHING,
};

int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
                          u32 len, u64 flags)
{
        return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
}

BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
           void *, to, u32, len)
{
        void *ptr;

        if (unlikely(offset > INT_MAX))
                goto err_clear;

        ptr = skb_header_pointer(skb, offset, len, to);
        if (unlikely(!ptr))
                goto err_clear;
        if (ptr != to)
                memcpy(to, ptr, len);

        return 0;
err_clear:
        memset(to, 0, len);
        return -EFAULT;
}

static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
        .func                = bpf_skb_load_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
};

int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
{
        return ____bpf_skb_load_bytes(skb, offset, to, len);
}

BPF_CALL_4(bpf_flow_dissector_load_bytes,
           const struct bpf_flow_dissector *, ctx, u32, offset,
           void *, to, u32, len)
{
        void *ptr;

        if (unlikely(offset > 0xffff))
                goto err_clear;

        if (unlikely(!ctx->skb))
                goto err_clear;

        ptr = skb_header_pointer(ctx->skb, offset, len, to);
        if (unlikely(!ptr))
                goto err_clear;
        if (ptr != to)
                memcpy(to, ptr, len);

        return 0;
err_clear:
        memset(to, 0, len);
        return -EFAULT;
}

static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
        .func                = bpf_flow_dissector_load_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
           u32, offset, void *, to, u32, len, u32, start_header)
{
        u8 *end = skb_tail_pointer(skb);
        u8 *start, *ptr;

        if (unlikely(offset > 0xffff))
                goto err_clear;

        switch (start_header) {
        case BPF_HDR_START_MAC:
                if (unlikely(!skb_mac_header_was_set(skb)))
                        goto err_clear;
                start = skb_mac_header(skb);
                break;
        case BPF_HDR_START_NET:
                start = skb_network_header(skb);
                break;
        default:
                goto err_clear;
        }

        ptr = start + offset;

        if (likely(ptr + len <= end)) {
                memcpy(to, ptr, len);
                return 0;
        }

err_clear:
        memset(to, 0, len);
        return -EFAULT;
}

static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
        .func                = bpf_skb_load_bytes_relative,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
{
        /* Idea is the following: should the needed direct read/write
         * test fail during runtime, we can pull in more data and redo
         * again, since implicitly, we invalidate previous checks here.
         *
         * Or, since we know how much we need to make read/writeable,
         * this can be done once at the program beginning for direct
         * access case. By this we overcome limitations of only current
         * headroom being accessible.
         */
        return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
}

static const struct bpf_func_proto bpf_skb_pull_data_proto = {
        .func                = bpf_skb_pull_data,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
{
        return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
}

static const struct bpf_func_proto bpf_sk_fullsock_proto = {
        .func                = bpf_sk_fullsock,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_SOCK_COMMON,
};

static inline int sk_skb_try_make_writable(struct sk_buff *skb,
                                           unsigned int write_len)
{
        return __bpf_try_make_writable(skb, write_len);
}

BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
{
        /* Idea is the following: should the needed direct read/write
         * test fail during runtime, we can pull in more data and redo
         * again, since implicitly, we invalidate previous checks here.
         *
         * Or, since we know how much we need to make read/writeable,
         * this can be done once at the program beginning for direct
         * access case. By this we overcome limitations of only current
         * headroom being accessible.
         */
        return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
}

static const struct bpf_func_proto sk_skb_pull_data_proto = {
        .func                = sk_skb_pull_data,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
           u64, from, u64, to, u64, flags)
{
        __sum16 *ptr;

        if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
                return -EINVAL;
        if (unlikely(offset > 0xffff || offset & 1))
                return -EFAULT;
        if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                return -EFAULT;

        ptr = (__sum16 *)(skb->data + offset);
        switch (flags & BPF_F_HDR_FIELD_MASK) {
        case 0:
                if (unlikely(from != 0))
                        return -EINVAL;

                csum_replace_by_diff(ptr, to);
                break;
        case 2:
                csum_replace2(ptr, from, to);
                break;
        case 4:
                csum_replace4(ptr, from, to);
                break;
        default:
                return -EINVAL;
        }

        return 0;
}

static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
        .func                = bpf_l3_csum_replace,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
           u64, from, u64, to, u64, flags)
{
        bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
        bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
        bool do_mforce = flags & BPF_F_MARK_ENFORCE;
        __sum16 *ptr;

        if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
                               BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
                return -EINVAL;
        if (unlikely(offset > 0xffff || offset & 1))
                return -EFAULT;
        if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                return -EFAULT;

        ptr = (__sum16 *)(skb->data + offset);
        if (is_mmzero && !do_mforce && !*ptr)
                return 0;

        switch (flags & BPF_F_HDR_FIELD_MASK) {
        case 0:
                if (unlikely(from != 0))
                        return -EINVAL;

                inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
                break;
        case 2:
                inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
                break;
        case 4:
                inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
                break;
        default:
                return -EINVAL;
        }

        if (is_mmzero && !*ptr)
                *ptr = CSUM_MANGLED_0;
        return 0;
}

static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
        .func                = bpf_l4_csum_replace,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
           __be32 *, to, u32, to_size, __wsum, seed)
{
        struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
        u32 diff_size = from_size + to_size;
        int i, j = 0;

        /* This is quite flexible, some examples:
         *
         * from_size == 0, to_size > 0,  seed := csum --> pushing data
         * from_size > 0,  to_size == 0, seed := csum --> pulling data
         * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
         *
         * Even for diffing, from_size and to_size don't need to be equal.
         */
        if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
                     diff_size > sizeof(sp->diff)))
                return -EINVAL;

        for (i = 0; i < from_size / sizeof(__be32); i++, j++)
                sp->diff[j] = ~from[i];
        for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
                sp->diff[j] = to[i];

        return csum_partial(sp->diff, diff_size, seed);
}

static const struct bpf_func_proto bpf_csum_diff_proto = {
        .func                = bpf_csum_diff,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
        .arg2_type        = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type        = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE_OR_ZERO,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
{
        /* The interface is to be used in combination with bpf_csum_diff()
         * for direct packet writes. csum rotation for alignment as well
         * as emulating csum_sub() can be done from the eBPF program.
         */
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                return (skb->csum = csum_add(skb->csum, csum));

        return -ENOTSUPP;
}

static const struct bpf_func_proto bpf_csum_update_proto = {
        .func                = bpf_csum_update,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
{
        /* The interface is to be used in combination with bpf_skb_adjust_room()
         * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
         * is passed as flags, for example.
         */
        switch (level) {
        case BPF_CSUM_LEVEL_INC:
                __skb_incr_checksum_unnecessary(skb);
                break;
        case BPF_CSUM_LEVEL_DEC:
                __skb_decr_checksum_unnecessary(skb);
                break;
        case BPF_CSUM_LEVEL_RESET:
                __skb_reset_checksum_unnecessary(skb);
                break;
        case BPF_CSUM_LEVEL_QUERY:
                return skb->ip_summed == CHECKSUM_UNNECESSARY ?
                       skb->csum_level : -EACCES;
        default:
                return -EINVAL;
        }

        return 0;
}

static const struct bpf_func_proto bpf_csum_level_proto = {
        .func                = bpf_csum_level,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
        return dev_forward_skb_nomtu(dev, skb);
}

static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
                                      struct sk_buff *skb)
{
        int ret = ____dev_forward_skb(dev, skb, false);

        if (likely(!ret)) {
                skb->dev = dev;
                ret = netif_rx(skb);
        }

        return ret;
}

static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
        int ret;

        if (dev_xmit_recursion()) {
                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
                kfree_skb(skb);
                return -ENETDOWN;
        }

        skb->dev = dev;
        skb_set_redirected_noclear(skb, skb_at_tc_ingress(skb));
        skb_clear_tstamp(skb);

        dev_xmit_recursion_inc();
        ret = dev_queue_xmit(skb);
        dev_xmit_recursion_dec();

        return ret;
}

static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
                                 u32 flags)
{
        unsigned int mlen = skb_network_offset(skb);

        if (unlikely(skb->len <= mlen)) {
                kfree_skb(skb);
                return -ERANGE;
        }

        if (mlen) {
                __skb_pull(skb, mlen);

                /* At ingress, the mac header has already been pulled once.
                 * At egress, skb_pospull_rcsum has to be done in case that
                 * the skb is originated from ingress (i.e. a forwarded skb)
                 * to ensure that rcsum starts at net header.
                 */
                if (!skb_at_tc_ingress(skb))
                        skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
        }
        skb_pop_mac_header(skb);
        skb_reset_mac_len(skb);
        return flags & BPF_F_INGRESS ?
               __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
}

static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
                                 u32 flags)
{
        /* Verify that a link layer header is carried */
        if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) {
                kfree_skb(skb);
                return -ERANGE;
        }

        bpf_push_mac_rcsum(skb);
        return flags & BPF_F_INGRESS ?
               __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}

static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
                          u32 flags)
{
        if (dev_is_mac_header_xmit(dev))
                return __bpf_redirect_common(skb, dev, flags);
        else
                return __bpf_redirect_no_mac(skb, dev, flags);
}

#if IS_ENABLED(CONFIG_IPV6)
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
                            struct net_device *dev, struct bpf_nh_params *nh)
{
        u32 hh_len = LL_RESERVED_SPACE(dev);
        const struct in6_addr *nexthop;
        struct dst_entry *dst = NULL;
        struct neighbour *neigh;

        if (dev_xmit_recursion()) {
                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
                goto out_drop;
        }

        skb->dev = dev;
        skb_clear_tstamp(skb);

        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
                skb = skb_expand_head(skb, hh_len);
                if (!skb)
                        return -ENOMEM;
        }

        rcu_read_lock();
        if (!nh) {
                dst = skb_dst(skb);
                nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
                                      &ipv6_hdr(skb)->daddr);
        } else {
                nexthop = &nh->ipv6_nh;
        }
        neigh = ip_neigh_gw6(dev, nexthop);
        if (likely(!IS_ERR(neigh))) {
                int ret;

                sock_confirm_neigh(skb, neigh);
                local_bh_disable();
                dev_xmit_recursion_inc();
                ret = neigh_output(neigh, skb, false);
                dev_xmit_recursion_dec();
                local_bh_enable();
                rcu_read_unlock();
                return ret;
        }
        rcu_read_unlock_bh();
        if (dst)
                IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
        kfree_skb(skb);
        return -ENETDOWN;
}

static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
                                   struct bpf_nh_params *nh)
{
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
        struct net *net = dev_net(dev);
        int err, ret = NET_XMIT_DROP;

        if (!nh) {
                struct dst_entry *dst;
                struct flowi6 fl6 = {
                        .flowi6_flags = FLOWI_FLAG_ANYSRC,
                        .flowi6_mark  = skb->mark,
                        .flowlabel    = ip6_flowinfo(ip6h),
                        .flowi6_oif   = dev->ifindex,
                        .flowi6_proto = ip6h->nexthdr,
                        .daddr              = ip6h->daddr,
                        .saddr              = ip6h->saddr,
                };

                dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
                if (IS_ERR(dst))
                        goto out_drop;

                skb_dst_set(skb, dst);
        } else if (nh->nh_family != AF_INET6) {
                goto out_drop;
        }

        err = bpf_out_neigh_v6(net, skb, dev, nh);
        if (unlikely(net_xmit_eval(err)))
                dev->stats.tx_errors++;
        else
                ret = NET_XMIT_SUCCESS;
        goto out_xmit;
out_drop:
        dev->stats.tx_errors++;
        kfree_skb(skb);
out_xmit:
        return ret;
}
#else
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
                                   struct bpf_nh_params *nh)
{
        kfree_skb(skb);
        return NET_XMIT_DROP;
}
#endif /* CONFIG_IPV6 */

#if IS_ENABLED(CONFIG_INET)
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
                            struct net_device *dev, struct bpf_nh_params *nh)
{
        u32 hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
        bool is_v6gw = false;

        if (dev_xmit_recursion()) {
                net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
                goto out_drop;
        }

        skb->dev = dev;
        skb_clear_tstamp(skb);

        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
                skb = skb_expand_head(skb, hh_len);
                if (!skb)
                        return -ENOMEM;
        }

        rcu_read_lock();
        if (!nh) {
                struct dst_entry *dst = skb_dst(skb);
                struct rtable *rt = container_of(dst, struct rtable, dst);

                neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        } else if (nh->nh_family == AF_INET6) {
                neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
                is_v6gw = true;
        } else if (nh->nh_family == AF_INET) {
                neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
        } else {
                rcu_read_unlock();
                goto out_drop;
        }

        if (likely(!IS_ERR(neigh))) {
                int ret;

                sock_confirm_neigh(skb, neigh);
                local_bh_disable();
                dev_xmit_recursion_inc();
                ret = neigh_output(neigh, skb, is_v6gw);
                dev_xmit_recursion_dec();
                local_bh_enable();
                rcu_read_unlock();
                return ret;
        }
        rcu_read_unlock();
out_drop:
        kfree_skb(skb);
        return -ENETDOWN;
}

static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
                                   struct bpf_nh_params *nh)
{
        const struct iphdr *ip4h = ip_hdr(skb);
        struct net *net = dev_net(dev);
        int err, ret = NET_XMIT_DROP;

        if (!nh) {
                struct flowi4 fl4 = {
                        .flowi4_flags = FLOWI_FLAG_ANYSRC,
                        .flowi4_mark  = skb->mark,
                        .flowi4_tos   = RT_TOS(ip4h->tos),
                        .flowi4_oif   = dev->ifindex,
                        .flowi4_proto = ip4h->protocol,
                        .daddr              = ip4h->daddr,
                        .saddr              = ip4h->saddr,
                };
                struct rtable *rt;

                rt = ip_route_output_flow(net, &fl4, NULL);
                if (IS_ERR(rt))
                        goto out_drop;
                if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
                        ip_rt_put(rt);
                        goto out_drop;
                }

                skb_dst_set(skb, &rt->dst);
        }

        err = bpf_out_neigh_v4(net, skb, dev, nh);
        if (unlikely(net_xmit_eval(err)))
                dev->stats.tx_errors++;
        else
                ret = NET_XMIT_SUCCESS;
        goto out_xmit;
out_drop:
        dev->stats.tx_errors++;
        kfree_skb(skb);
out_xmit:
        return ret;
}
#else
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
                                   struct bpf_nh_params *nh)
{
        kfree_skb(skb);
        return NET_XMIT_DROP;
}
#endif /* CONFIG_INET */

static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
                                struct bpf_nh_params *nh)
{
        struct ethhdr *ethh = eth_hdr(skb);

        if (unlikely(skb->mac_header >= skb->network_header))
                goto out;
        bpf_push_mac_rcsum(skb);
        if (is_multicast_ether_addr(ethh->h_dest))
                goto out;

        skb_pull(skb, sizeof(*ethh));
        skb_unset_mac_header(skb);
        skb_reset_network_header(skb);

        if (skb->protocol == htons(ETH_P_IP))
                return __bpf_redirect_neigh_v4(skb, dev, nh);
        else if (skb->protocol == htons(ETH_P_IPV6))
                return __bpf_redirect_neigh_v6(skb, dev, nh);
out:
        kfree_skb(skb);
        return -ENOTSUPP;
}

/* Internal, non-exposed redirect flags. */
enum {
        BPF_F_NEIGH        = (1ULL << 1),
        BPF_F_PEER        = (1ULL << 2),
        BPF_F_NEXTHOP        = (1ULL << 3),
#define BPF_F_REDIRECT_INTERNAL        (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
};

BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
        struct net_device *dev;
        struct sk_buff *clone;
        int ret;

        if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
                return -EINVAL;

        dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
        if (unlikely(!dev))
                return -EINVAL;

        clone = skb_clone(skb, GFP_ATOMIC);
        if (unlikely(!clone))
                return -ENOMEM;

        /* For direct write, we need to keep the invariant that the skbs
         * we're dealing with need to be uncloned. Should uncloning fail
         * here, we need to free the just generated clone to unclone once
         * again.
         */
        ret = bpf_try_make_head_writable(skb);
        if (unlikely(ret)) {
                kfree_skb(clone);
                return -ENOMEM;
        }

        return __bpf_redirect(clone, dev, flags);
}

static const struct bpf_func_proto bpf_clone_redirect_proto = {
        .func           = bpf_clone_redirect,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
};

DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);

static struct net_device *skb_get_peer_dev(struct net_device *dev)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (likely(ops->ndo_get_peer_dev))
                return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
                                       netkit_peer_dev, dev);
        return NULL;
}

int skb_do_redirect(struct sk_buff *skb)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        struct net *net = dev_net(skb->dev);
        struct net_device *dev;
        u32 flags = ri->flags;

        dev = dev_get_by_index_rcu(net, ri->tgt_index);
        ri->tgt_index = 0;
        ri->flags = 0;
        if (unlikely(!dev))
                goto out_drop;
        if (flags & BPF_F_PEER) {
                if (unlikely(!skb_at_tc_ingress(skb)))
                        goto out_drop;
                dev = skb_get_peer_dev(dev);
                if (unlikely(!dev ||
                             !(dev->flags & IFF_UP) ||
                             net_eq(net, dev_net(dev))))
                        goto out_drop;
                skb->dev = dev;
                dev_sw_netstats_rx_add(dev, skb->len);
                return -EAGAIN;
        }
        return flags & BPF_F_NEIGH ?
               __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
                                    &ri->nh : NULL) :
               __bpf_redirect(skb, dev, flags);
out_drop:
        kfree_skb(skb);
        return -EINVAL;
}

BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
                return TC_ACT_SHOT;

        ri->flags = flags;
        ri->tgt_index = ifindex;

        return TC_ACT_REDIRECT;
}

static const struct bpf_func_proto bpf_redirect_proto = {
        .func           = bpf_redirect,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        if (unlikely(flags))
                return TC_ACT_SHOT;

        ri->flags = BPF_F_PEER;
        ri->tgt_index = ifindex;

        return TC_ACT_REDIRECT;
}

static const struct bpf_func_proto bpf_redirect_peer_proto = {
        .func           = bpf_redirect_peer,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
           int, plen, u64, flags)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        if (unlikely((plen && plen < sizeof(*params)) || flags))
                return TC_ACT_SHOT;

        ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
        ri->tgt_index = ifindex;

        BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
        if (plen)
                memcpy(&ri->nh, params, sizeof(ri->nh));

        return TC_ACT_REDIRECT;
}

static const struct bpf_func_proto bpf_redirect_neigh_proto = {
        .func                = bpf_redirect_neigh,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_ANYTHING,
        .arg2_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
        .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
{
        msg->apply_bytes = bytes;
        return 0;
}

static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
        .func           = bpf_msg_apply_bytes,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
{
        msg->cork_bytes = bytes;
        return 0;
}

static void sk_msg_reset_curr(struct sk_msg *msg)
{
        u32 i = msg->sg.start;
        u32 len = 0;

        do {
                len += sk_msg_elem(msg, i)->length;
                sk_msg_iter_var_next(i);
                if (len >= msg->sg.size)
                        break;
        } while (i != msg->sg.end);

        msg->sg.curr = i;
        msg->sg.copybreak = 0;
}

static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
        .func           = bpf_msg_cork_bytes,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
           u32, end, u64, flags)
{
        u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
        u32 first_sge, last_sge, i, shift, bytes_sg_total;
        struct scatterlist *sge;
        u8 *raw, *to, *from;
        struct page *page;

        if (unlikely(flags || end <= start))
                return -EINVAL;

        /* First find the starting scatterlist element */
        i = msg->sg.start;
        do {
                offset += len;
                len = sk_msg_elem(msg, i)->length;
                if (start < offset + len)
                        break;
                sk_msg_iter_var_next(i);
        } while (i != msg->sg.end);

        if (unlikely(start >= offset + len))
                return -EINVAL;

        first_sge = i;
        /* The start may point into the sg element so we need to also
         * account for the headroom.
         */
        bytes_sg_total = start - offset + bytes;
        if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
                goto out;

        /* At this point we need to linearize multiple scatterlist
         * elements or a single shared page. Either way we need to
         * copy into a linear buffer exclusively owned by BPF. Then
         * place the buffer in the scatterlist and fixup the original
         * entries by removing the entries now in the linear buffer
         * and shifting the remaining entries. For now we do not try
         * to copy partial entries to avoid complexity of running out
         * of sg_entry slots. The downside is reading a single byte
         * will copy the entire sg entry.
         */
        do {
                copy += sk_msg_elem(msg, i)->length;
                sk_msg_iter_var_next(i);
                if (bytes_sg_total <= copy)
                        break;
        } while (i != msg->sg.end);
        last_sge = i;

        if (unlikely(bytes_sg_total > copy))
                return -EINVAL;

        page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
                           get_order(copy));
        if (unlikely(!page))
                return -ENOMEM;

        raw = page_address(page);
        i = first_sge;
        do {
                sge = sk_msg_elem(msg, i);
                from = sg_virt(sge);
                len = sge->length;
                to = raw + poffset;

                memcpy(to, from, len);
                poffset += len;
                sge->length = 0;
                put_page(sg_page(sge));

                sk_msg_iter_var_next(i);
        } while (i != last_sge);

        sg_set_page(&msg->sg.data[first_sge], page, copy, 0);

        /* To repair sg ring we need to shift entries. If we only
         * had a single entry though we can just replace it and
         * be done. Otherwise walk the ring and shift the entries.
         */
        WARN_ON_ONCE(last_sge == first_sge);
        shift = last_sge > first_sge ?
                last_sge - first_sge - 1 :
                NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
        if (!shift)
                goto out;

        i = first_sge;
        sk_msg_iter_var_next(i);
        do {
                u32 move_from;

                if (i + shift >= NR_MSG_FRAG_IDS)
                        move_from = i + shift - NR_MSG_FRAG_IDS;
                else
                        move_from = i + shift;
                if (move_from == msg->sg.end)
                        break;

                msg->sg.data[i] = msg->sg.data[move_from];
                msg->sg.data[move_from].length = 0;
                msg->sg.data[move_from].page_link = 0;
                msg->sg.data[move_from].offset = 0;
                sk_msg_iter_var_next(i);
        } while (1);

        msg->sg.end = msg->sg.end - shift > msg->sg.end ?
                      msg->sg.end - shift + NR_MSG_FRAG_IDS :
                      msg->sg.end - shift;
out:
        sk_msg_reset_curr(msg);
        msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
        msg->data_end = msg->data + bytes;
        return 0;
}

static const struct bpf_func_proto bpf_msg_pull_data_proto = {
        .func                = bpf_msg_pull_data,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
           u32, len, u64, flags)
{
        struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
        u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
        u8 *raw, *to, *from;
        struct page *page;

        if (unlikely(flags))
                return -EINVAL;

        if (unlikely(len == 0))
                return 0;

        /* First find the starting scatterlist element */
        i = msg->sg.start;
        do {
                offset += l;
                l = sk_msg_elem(msg, i)->length;

                if (start < offset + l)
                        break;
                sk_msg_iter_var_next(i);
        } while (i != msg->sg.end);

        if (start >= offset + l)
                return -EINVAL;

        space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);

        /* If no space available will fallback to copy, we need at
         * least one scatterlist elem available to push data into
         * when start aligns to the beginning of an element or two
         * when it falls inside an element. We handle the start equals
         * offset case because its the common case for inserting a
         * header.
         */
        if (!space || (space == 1 && start != offset))
                copy = msg->sg.data[i].length;

        page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
                           get_order(copy + len));
        if (unlikely(!page))
                return -ENOMEM;

        if (copy) {
                int front, back;

                raw = page_address(page);

                psge = sk_msg_elem(msg, i);
                front = start - offset;
                back = psge->length - front;
                from = sg_virt(psge);

                if (front)
                        memcpy(raw, from, front);

                if (back) {
                        from += front;
                        to = raw + front + len;

                        memcpy(to, from, back);
                }

                put_page(sg_page(psge));
        } else if (start - offset) {
                psge = sk_msg_elem(msg, i);
                rsge = sk_msg_elem_cpy(msg, i);

                psge->length = start - offset;
                rsge.length -= psge->length;
                rsge.offset += start;

                sk_msg_iter_var_next(i);
                sg_unmark_end(psge);
                sg_unmark_end(&rsge);
                sk_msg_iter_next(msg, end);
        }

        /* Slot(s) to place newly allocated data */
        new = i;

        /* Shift one or two slots as needed */
        if (!copy) {
                sge = sk_msg_elem_cpy(msg, i);

                sk_msg_iter_var_next(i);
                sg_unmark_end(&sge);
                sk_msg_iter_next(msg, end);

                nsge = sk_msg_elem_cpy(msg, i);
                if (rsge.length) {
                        sk_msg_iter_var_next(i);
                        nnsge = sk_msg_elem_cpy(msg, i);
                }

                while (i != msg->sg.end) {
                        msg->sg.data[i] = sge;
                        sge = nsge;
                        sk_msg_iter_var_next(i);
                        if (rsge.length) {
                                nsge = nnsge;
                                nnsge = sk_msg_elem_cpy(msg, i);
                        } else {
                                nsge = sk_msg_elem_cpy(msg, i);
                        }
                }
        }

        /* Place newly allocated data buffer */
        sk_mem_charge(msg->sk, len);
        msg->sg.size += len;
        __clear_bit(new, msg->sg.copy);
        sg_set_page(&msg->sg.data[new], page, len + copy, 0);
        if (rsge.length) {
                get_page(sg_page(&rsge));
                sk_msg_iter_var_next(new);
                msg->sg.data[new] = rsge;
        }

        sk_msg_reset_curr(msg);
        sk_msg_compute_data_pointers(msg);
        return 0;
}

static const struct bpf_func_proto bpf_msg_push_data_proto = {
        .func                = bpf_msg_push_data,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
};

static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
        int prev;

        do {
                prev = i;
                sk_msg_iter_var_next(i);
                msg->sg.data[prev] = msg->sg.data[i];
        } while (i != msg->sg.end);

        sk_msg_iter_prev(msg, end);
}

static void sk_msg_shift_right(struct sk_msg *msg, int i)
{
        struct scatterlist tmp, sge;

        sk_msg_iter_next(msg, end);
        sge = sk_msg_elem_cpy(msg, i);
        sk_msg_iter_var_next(i);
        tmp = sk_msg_elem_cpy(msg, i);

        while (i != msg->sg.end) {
                msg->sg.data[i] = sge;
                sk_msg_iter_var_next(i);
                sge = tmp;
                tmp = sk_msg_elem_cpy(msg, i);
        }
}

BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
           u32, len, u64, flags)
{
        u32 i = 0, l = 0, space, offset = 0;
        u64 last = start + len;
        int pop;

        if (unlikely(flags))
                return -EINVAL;

        /* First find the starting scatterlist element */
        i = msg->sg.start;
        do {
                offset += l;
                l = sk_msg_elem(msg, i)->length;

                if (start < offset + l)
                        break;
                sk_msg_iter_var_next(i);
        } while (i != msg->sg.end);

        /* Bounds checks: start and pop must be inside message */
        if (start >= offset + l || last >= msg->sg.size)
                return -EINVAL;

        space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);

        pop = len;
        /* --------------| offset
         * -| start      |-------- len -------|
         *
         *  |----- a ----|-------- pop -------|----- b ----|
         *  |______________________________________________| length
         *
         *
         * a:   region at front of scatter element to save
         * b:   region at back of scatter element to save when length > A + pop
         * pop: region to pop from element, same as input 'pop' here will be
         *      decremented below per iteration.
         *
         * Two top-level cases to handle when start != offset, first B is non
         * zero and second B is zero corresponding to when a pop includes more
         * than one element.
         *
         * Then if B is non-zero AND there is no space allocate space and
         * compact A, B regions into page. If there is space shift ring to
         * the right free'ing the next element in ring to place B, leaving
         * A untouched except to reduce length.
         */
        if (start != offset) {
                struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
                int a = start;
                int b = sge->length - pop - a;

                sk_msg_iter_var_next(i);

                if (pop < sge->length - a) {
                        if (space) {
                                sge->length = a;
                                sk_msg_shift_right(msg, i);
                                nsge = sk_msg_elem(msg, i);
                                get_page(sg_page(sge));
                                sg_set_page(nsge,
                                            sg_page(sge),
                                            b, sge->offset + pop + a);
                        } else {
                                struct page *page, *orig;
                                u8 *to, *from;

                                page = alloc_pages(__GFP_NOWARN |
                                                   __GFP_COMP   | GFP_ATOMIC,
                                                   get_order(a + b));
                                if (unlikely(!page))
                                        return -ENOMEM;

                                sge->length = a;
                                orig = sg_page(sge);
                                from = sg_virt(sge);
                                to = page_address(page);
                                memcpy(to, from, a);
                                memcpy(to + a, from + a + pop, b);
                                sg_set_page(sge, page, a + b, 0);
                                put_page(orig);
                        }
                        pop = 0;
                } else if (pop >= sge->length - a) {
                        pop -= (sge->length - a);
                        sge->length = a;
                }
        }

        /* From above the current layout _must_ be as follows,
         *
         * -| offset
         * -| start
         *
         *  |---- pop ---|---------------- b ------------|
         *  |____________________________________________| length
         *
         * Offset and start of the current msg elem are equal because in the
         * previous case we handled offset != start and either consumed the
         * entire element and advanced to the next element OR pop == 0.
         *
         * Two cases to handle here are first pop is less than the length
         * leaving some remainder b above. Simply adjust the element's layout
         * in this case. Or pop >= length of the element so that b = 0. In this
         * case advance to next element decrementing pop.
         */
        while (pop) {
                struct scatterlist *sge = sk_msg_elem(msg, i);

                if (pop < sge->length) {
                        sge->length -= pop;
                        sge->offset += pop;
                        pop = 0;
                } else {
                        pop -= sge->length;
                        sk_msg_shift_left(msg, i);
                }
                sk_msg_iter_var_next(i);
        }

        sk_mem_uncharge(msg->sk, len - pop);
        msg->sg.size -= (len - pop);
        sk_msg_reset_curr(msg);
        sk_msg_compute_data_pointers(msg);
        return 0;
}

static const struct bpf_func_proto bpf_msg_pop_data_proto = {
        .func                = bpf_msg_pop_data,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
};

#ifdef CONFIG_CGROUP_NET_CLASSID
BPF_CALL_0(bpf_get_cgroup_classid_curr)
{
        return __task_get_classid(current);
}

const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
        .func                = bpf_get_cgroup_classid_curr,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
};

BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
{
        struct sock *sk = skb_to_full_sk(skb);

        if (!sk || !sk_fullsock(sk))
                return 0;

        return sock_cgroup_classid(&sk->sk_cgrp_data);
}

static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
        .func                = bpf_skb_cgroup_classid,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};
#endif

BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
        return task_get_classid(skb);
}

static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
        .func           = bpf_get_cgroup_classid,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
{
        return dst_tclassid(skb);
}

static const struct bpf_func_proto bpf_get_route_realm_proto = {
        .func           = bpf_get_route_realm,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
{
        /* If skb_clear_hash() was called due to mangling, we can
         * trigger SW recalculation here. Later access to hash
         * can then use the inline skb->hash via context directly
         * instead of calling this helper again.
         */
        return skb_get_hash(skb);
}

static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
        .func                = bpf_get_hash_recalc,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
{
        /* After all direct packet write, this can be used once for
         * triggering a lazy recalc on next skb_get_hash() invocation.
         */
        skb_clear_hash(skb);
        return 0;
}

static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
        .func                = bpf_set_hash_invalid,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
{
        /* Set user specified hash as L4(+), so that it gets returned
         * on skb_get_hash() call unless BPF prog later on triggers a
         * skb_clear_hash().
         */
        __skb_set_sw_hash(skb, hash, true);
        return 0;
}

static const struct bpf_func_proto bpf_set_hash_proto = {
        .func                = bpf_set_hash,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
           u16, vlan_tci)
{
        int ret;

        if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
                     vlan_proto != htons(ETH_P_8021AD)))
                vlan_proto = htons(ETH_P_8021Q);

        bpf_push_mac_rcsum(skb);
        ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
        bpf_pull_mac_rcsum(skb);

        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
        .func           = bpf_skb_vlan_push,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
};

BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
{
        int ret;

        bpf_push_mac_rcsum(skb);
        ret = skb_vlan_pop(skb);
        bpf_pull_mac_rcsum(skb);

        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
        .func           = bpf_skb_vlan_pop,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
        /* Caller already did skb_cow() with len as headroom,
         * so no need to do it here.
         */
        skb_push(skb, len);
        memmove(skb->data, skb->data + len, off);
        memset(skb->data + off, 0, len);

        /* No skb_postpush_rcsum(skb, skb->data + off, len)
         * needed here as it does not change the skb->csum
         * result for checksum complete when summing over
         * zeroed blocks.
         */
        return 0;
}

static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
        void *old_data;

        /* skb_ensure_writable() is not needed here, as we're
         * already working on an uncloned skb.
         */
        if (unlikely(!pskb_may_pull(skb, off + len)))
                return -ENOMEM;

        old_data = skb->data;
        __skb_pull(skb, len);
        skb_postpull_rcsum(skb, old_data + off, len);
        memmove(skb->data, old_data, off);

        return 0;
}

static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
{
        bool trans_same = skb->transport_header == skb->network_header;
        int ret;

        /* There's no need for __skb_push()/__skb_pull() pair to
         * get to the start of the mac header as we're guaranteed
         * to always start from here under eBPF.
         */
        ret = bpf_skb_generic_push(skb, off, len);
        if (likely(!ret)) {
                skb->mac_header -= len;
                skb->network_header -= len;
                if (trans_same)
                        skb->transport_header = skb->network_header;
        }

        return ret;
}

static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
{
        bool trans_same = skb->transport_header == skb->network_header;
        int ret;

        /* Same here, __skb_push()/__skb_pull() pair not needed. */
        ret = bpf_skb_generic_pop(skb, off, len);
        if (likely(!ret)) {
                skb->mac_header += len;
                skb->network_header += len;
                if (trans_same)
                        skb->transport_header = skb->network_header;
        }

        return ret;
}

static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
        const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
        u32 off = skb_mac_header_len(skb);
        int ret;

        ret = skb_cow(skb, len_diff);
        if (unlikely(ret < 0))
                return ret;

        ret = bpf_skb_net_hdr_push(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

                /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
                if (shinfo->gso_type & SKB_GSO_TCPV4) {
                        shinfo->gso_type &= ~SKB_GSO_TCPV4;
                        shinfo->gso_type |=  SKB_GSO_TCPV6;
                }
        }

        skb->protocol = htons(ETH_P_IPV6);
        skb_clear_hash(skb);

        return 0;
}

static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
{
        const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
        u32 off = skb_mac_header_len(skb);
        int ret;

        ret = skb_unclone(skb, GFP_ATOMIC);
        if (unlikely(ret < 0))
                return ret;

        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

                /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
                if (shinfo->gso_type & SKB_GSO_TCPV6) {
                        shinfo->gso_type &= ~SKB_GSO_TCPV6;
                        shinfo->gso_type |=  SKB_GSO_TCPV4;
                }
        }

        skb->protocol = htons(ETH_P_IP);
        skb_clear_hash(skb);

        return 0;
}

static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
{
        __be16 from_proto = skb->protocol;

        if (from_proto == htons(ETH_P_IP) &&
              to_proto == htons(ETH_P_IPV6))
                return bpf_skb_proto_4_to_6(skb);

        if (from_proto == htons(ETH_P_IPV6) &&
              to_proto == htons(ETH_P_IP))
                return bpf_skb_proto_6_to_4(skb);

        return -ENOTSUPP;
}

BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
           u64, flags)
{
        int ret;

        if (unlikely(flags))
                return -EINVAL;

        /* General idea is that this helper does the basic groundwork
         * needed for changing the protocol, and eBPF program fills the
         * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
         * and other helpers, rather than passing a raw buffer here.
         *
         * The rationale is to keep this minimal and without a need to
         * deal with raw packet data. F.e. even if we would pass buffers
         * here, the program still needs to call the bpf_lX_csum_replace()
         * helpers anyway. Plus, this way we keep also separation of
         * concerns, since f.e. bpf_skb_store_bytes() should only take
         * care of stores.
         *
         * Currently, additional options and extension header space are
         * not supported, but flags register is reserved so we can adapt
         * that. For offloads, we mark packet as dodgy, so that headers
         * need to be verified first.
         */
        ret = bpf_skb_proto_xlat(skb, proto);
        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_change_proto_proto = {
        .func                = bpf_skb_change_proto,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
{
        /* We only allow a restricted subset to be changed for now. */
        if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
                     !skb_pkt_type_ok(pkt_type)))
                return -EINVAL;

        skb->pkt_type = pkt_type;
        return 0;
}

static const struct bpf_func_proto bpf_skb_change_type_proto = {
        .func                = bpf_skb_change_type,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
{
        switch (skb->protocol) {
        case htons(ETH_P_IP):
                return sizeof(struct iphdr);
        case htons(ETH_P_IPV6):
                return sizeof(struct ipv6hdr);
        default:
                return ~0U;
        }
}

#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK        (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)

#define BPF_F_ADJ_ROOM_DECAP_L3_MASK        (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
                                         BPF_F_ADJ_ROOM_DECAP_L3_IPV6)

#define BPF_F_ADJ_ROOM_MASK                (BPF_F_ADJ_ROOM_FIXED_GSO | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2( \
                                          BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
                                         BPF_F_ADJ_ROOM_DECAP_L3_MASK)

static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
                            u64 flags)
{
        u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
        bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
        u16 mac_len = 0, inner_net = 0, inner_trans = 0;
        unsigned int gso_type = SKB_GSO_DODGY;
        int ret;

        if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
                /* udp gso_size delineates datagrams, only allow if fixed */
                if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
                    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
                        return -ENOTSUPP;
        }

        ret = skb_cow_head(skb, len_diff);
        if (unlikely(ret < 0))
                return ret;

        if (encap) {
                if (skb->protocol != htons(ETH_P_IP) &&
                    skb->protocol != htons(ETH_P_IPV6))
                        return -ENOTSUPP;

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
                    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
                        return -EINVAL;

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
                    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
                        return -EINVAL;

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
                    inner_mac_len < ETH_HLEN)
                        return -EINVAL;

                if (skb->encapsulation)
                        return -EALREADY;

                mac_len = skb->network_header - skb->mac_header;
                inner_net = skb->network_header;
                if (inner_mac_len > len_diff)
                        return -EINVAL;
                inner_trans = skb->transport_header;
        }

        ret = bpf_skb_net_hdr_push(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

        if (encap) {
                skb->inner_mac_header = inner_net - inner_mac_len;
                skb->inner_network_header = inner_net;
                skb->inner_transport_header = inner_trans;

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
                        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
                else
                        skb_set_inner_protocol(skb, skb->protocol);

                skb->encapsulation = 1;
                skb_set_network_header(skb, mac_len);

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
                        gso_type |= SKB_GSO_UDP_TUNNEL;
                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
                        gso_type |= SKB_GSO_GRE;
                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
                        gso_type |= SKB_GSO_IPXIP6;
                else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
                        gso_type |= SKB_GSO_IPXIP4;

                if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
                    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
                        int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
                                        sizeof(struct ipv6hdr) :
                                        sizeof(struct iphdr);

                        skb_set_transport_header(skb, mac_len + nh_len);
                }

                /* Match skb->protocol to new outer l3 protocol */
                if (skb->protocol == htons(ETH_P_IP) &&
                    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
                        skb->protocol = htons(ETH_P_IPV6);
                else if (skb->protocol == htons(ETH_P_IPV6) &&
                         flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
                        skb->protocol = htons(ETH_P_IP);
        }

        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

                /* Due to header grow, MSS needs to be downgraded. */
                if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
                        skb_decrease_gso_size(shinfo, len_diff);

                /* Header must be checked, and gso_segs recomputed. */
                shinfo->gso_type |= gso_type;
                shinfo->gso_segs = 0;
        }

        return 0;
}

static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
                              u64 flags)
{
        int ret;

        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
                               BPF_F_ADJ_ROOM_DECAP_L3_MASK |
                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
                return -EINVAL;

        if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
                /* udp gso_size delineates datagrams, only allow if fixed */
                if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
                    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
                        return -ENOTSUPP;
        }

        ret = skb_unclone(skb, GFP_ATOMIC);
        if (unlikely(ret < 0))
                return ret;

        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

        /* Match skb->protocol to new outer l3 protocol */
        if (skb->protocol == htons(ETH_P_IP) &&
            flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
                skb->protocol = htons(ETH_P_IPV6);
        else if (skb->protocol == htons(ETH_P_IPV6) &&
                 flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
                skb->protocol = htons(ETH_P_IP);

        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

                /* Due to header shrink, MSS can be upgraded. */
                if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
                        skb_increase_gso_size(shinfo, len_diff);

                /* Header must be checked, and gso_segs recomputed. */
                shinfo->gso_type |= SKB_GSO_DODGY;
                shinfo->gso_segs = 0;
        }

        return 0;
}

#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC

BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
           u32, mode, u64, flags)
{
        u32 len_diff_abs = abs(len_diff);
        bool shrink = len_diff < 0;
        int ret = 0;

        if (unlikely(flags || mode))
                return -EINVAL;
        if (unlikely(len_diff_abs > 0xfffU))
                return -EFAULT;

        if (!shrink) {
                ret = skb_cow(skb, len_diff);
                if (unlikely(ret < 0))
                        return ret;
                __skb_push(skb, len_diff_abs);
                memset(skb->data, 0, len_diff_abs);
        } else {
                if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
                        return -ENOMEM;
                __skb_pull(skb, len_diff_abs);
        }
        if (tls_sw_has_ctx_rx(skb->sk)) {
                struct strp_msg *rxm = strp_msg(skb);

                rxm->full_len += len_diff;
        }
        return ret;
}

static const struct bpf_func_proto sk_skb_adjust_room_proto = {
        .func                = sk_skb_adjust_room,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
           u32, mode, u64, flags)
{
        u32 len_cur, len_diff_abs = abs(len_diff);
        u32 len_min = bpf_skb_net_base_len(skb);
        u32 len_max = BPF_SKB_MAX_LEN;
        __be16 proto = skb->protocol;
        bool shrink = len_diff < 0;
        u32 off;
        int ret;

        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
                return -EINVAL;
        if (unlikely(len_diff_abs > 0xfffU))
                return -EFAULT;
        if (unlikely(proto != htons(ETH_P_IP) &&
                     proto != htons(ETH_P_IPV6)))
                return -ENOTSUPP;

        off = skb_mac_header_len(skb);
        switch (mode) {
        case BPF_ADJ_ROOM_NET:
                off += bpf_skb_net_base_len(skb);
                break;
        case BPF_ADJ_ROOM_MAC:
                break;
        default:
                return -ENOTSUPP;
        }

        if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
                if (!shrink)
                        return -EINVAL;

                switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
                case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
                        len_min = sizeof(struct iphdr);
                        break;
                case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
                        len_min = sizeof(struct ipv6hdr);
                        break;
                default:
                        return -EINVAL;
                }
        }

        len_cur = skb->len - skb_network_offset(skb);
        if ((shrink && (len_diff_abs >= len_cur ||
                        len_cur - len_diff_abs < len_min)) ||
            (!shrink && (skb->len + len_diff_abs > len_max &&
                         !skb_is_gso(skb))))
                return -ENOTSUPP;

        ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
                       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
        if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
                __skb_reset_checksum_unnecessary(skb);

        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
        .func                = bpf_skb_adjust_room,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_ANYTHING,
};

static u32 __bpf_skb_min_len(const struct sk_buff *skb)
{
        u32 min_len = skb_network_offset(skb);

        if (skb_transport_header_was_set(skb))
                min_len = skb_transport_offset(skb);
        if (skb->ip_summed == CHECKSUM_PARTIAL)
                min_len = skb_checksum_start_offset(skb) +
                          skb->csum_offset + sizeof(__sum16);
        return min_len;
}

static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
{
        unsigned int old_len = skb->len;
        int ret;

        ret = __skb_grow_rcsum(skb, new_len);
        if (!ret)
                memset(skb->data + old_len, 0, new_len - old_len);
        return ret;
}

static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
{
        return __skb_trim_rcsum(skb, new_len);
}

static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
                                        u64 flags)
{
        u32 max_len = BPF_SKB_MAX_LEN;
        u32 min_len = __bpf_skb_min_len(skb);
        int ret;

        if (unlikely(flags || new_len > max_len || new_len < min_len))
                return -EINVAL;
        if (skb->encapsulation)
                return -ENOTSUPP;

        /* The basic idea of this helper is that it's performing the
         * needed work to either grow or trim an skb, and eBPF program
         * rewrites the rest via helpers like bpf_skb_store_bytes(),
         * bpf_lX_csum_replace() and others rather than passing a raw
         * buffer here. This one is a slow path helper and intended
         * for replies with control messages.
         *
         * Like in bpf_skb_change_proto(), we want to keep this rather
         * minimal and without protocol specifics so that we are able
         * to separate concerns as in bpf_skb_store_bytes() should only
         * be the one responsible for writing buffers.
         *
         * It's really expected to be a slow path operation here for
         * control message replies, so we're implicitly linearizing,
         * uncloning and drop offloads from the skb by this.
         */
        ret = __bpf_try_make_writable(skb, skb->len);
        if (!ret) {
                if (new_len > skb->len)
                        ret = bpf_skb_grow_rcsum(skb, new_len);
                else if (new_len < skb->len)
                        ret = bpf_skb_trim_rcsum(skb, new_len);
                if (!ret && skb_is_gso(skb))
                        skb_gso_reset(skb);
        }
        return ret;
}

BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
           u64, flags)
{
        int ret = __bpf_skb_change_tail(skb, new_len, flags);

        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_change_tail_proto = {
        .func                = bpf_skb_change_tail,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
           u64, flags)
{
        return __bpf_skb_change_tail(skb, new_len, flags);
}

static const struct bpf_func_proto sk_skb_change_tail_proto = {
        .func                = sk_skb_change_tail,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
                                        u64 flags)
{
        u32 max_len = BPF_SKB_MAX_LEN;
        u32 new_len = skb->len + head_room;
        int ret;

        if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
                     new_len < skb->len))
                return -EINVAL;

        ret = skb_cow(skb, head_room);
        if (likely(!ret)) {
                /* Idea for this helper is that we currently only
                 * allow to expand on mac header. This means that
                 * skb->protocol network header, etc, stay as is.
                 * Compared to bpf_skb_change_tail(), we're more
                 * flexible due to not needing to linearize or
                 * reset GSO. Intention for this helper is to be
                 * used by an L3 skb that needs to push mac header
                 * for redirection into L2 device.
                 */
                __skb_push(skb, head_room);
                memset(skb->data, 0, head_room);
                skb_reset_mac_header(skb);
                skb_reset_mac_len(skb);
        }

        return ret;
}

BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
           u64, flags)
{
        int ret = __bpf_skb_change_head(skb, head_room, flags);

        bpf_compute_data_pointers(skb);
        return ret;
}

static const struct bpf_func_proto bpf_skb_change_head_proto = {
        .func                = bpf_skb_change_head,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
           u64, flags)
{
        return __bpf_skb_change_head(skb, head_room, flags);
}

static const struct bpf_func_proto sk_skb_change_head_proto = {
        .func                = sk_skb_change_head,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
{
        return xdp_get_buff_len(xdp);
}

static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
        .func                = bpf_xdp_get_buff_len,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)

const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
        .func                = bpf_xdp_get_buff_len,
        .gpl_only        = false,
        .arg1_type        = ARG_PTR_TO_BTF_ID,
        .arg1_btf_id        = &bpf_xdp_get_buff_len_bpf_ids[0],
};

static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
        return xdp_data_meta_unsupported(xdp) ? 0 :
               xdp->data - xdp->data_meta;
}

BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
        void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
        unsigned long metalen = xdp_get_metalen(xdp);
        void *data_start = xdp_frame_end + metalen;
        void *data = xdp->data + offset;

        if (unlikely(data < data_start ||
                     data > xdp->data_end - ETH_HLEN))
                return -EINVAL;

        if (metalen)
                memmove(xdp->data_meta + offset,
                        xdp->data_meta, metalen);
        xdp->data_meta += offset;
        xdp->data = data;

        return 0;
}

static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
        .func                = bpf_xdp_adjust_head,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
                      void *buf, unsigned long len, bool flush)
{
        unsigned long ptr_len, ptr_off = 0;
        skb_frag_t *next_frag, *end_frag;
        struct skb_shared_info *sinfo;
        void *src, *dst;
        u8 *ptr_buf;

        if (likely(xdp->data_end - xdp->data >= off + len)) {
                src = flush ? buf : xdp->data + off;
                dst = flush ? xdp->data + off : buf;
                memcpy(dst, src, len);
                return;
        }

        sinfo = xdp_get_shared_info_from_buff(xdp);
        end_frag = &sinfo->frags[sinfo->nr_frags];
        next_frag = &sinfo->frags[0];

        ptr_len = xdp->data_end - xdp->data;
        ptr_buf = xdp->data;

        while (true) {
                if (off < ptr_off + ptr_len) {
                        unsigned long copy_off = off - ptr_off;
                        unsigned long copy_len = min(len, ptr_len - copy_off);

                        src = flush ? buf : ptr_buf + copy_off;
                        dst = flush ? ptr_buf + copy_off : buf;
                        memcpy(dst, src, copy_len);

                        off += copy_len;
                        len -= copy_len;
                        buf += copy_len;
                }

                if (!len || next_frag == end_frag)
                        break;

                ptr_off += ptr_len;
                ptr_buf = skb_frag_address(next_frag);
                ptr_len = skb_frag_size(next_frag);
                next_frag++;
        }
}

void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
        u32 size = xdp->data_end - xdp->data;
        struct skb_shared_info *sinfo;
        void *addr = xdp->data;
        int i;

        if (unlikely(offset > 0xffff || len > 0xffff))
                return ERR_PTR(-EFAULT);

        if (unlikely(offset + len > xdp_get_buff_len(xdp)))
                return ERR_PTR(-EINVAL);

        if (likely(offset < size)) /* linear area */
                goto out;

        sinfo = xdp_get_shared_info_from_buff(xdp);
        offset -= size;
        for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
                u32 frag_size = skb_frag_size(&sinfo->frags[i]);

                if  (offset < frag_size) {
                        addr = skb_frag_address(&sinfo->frags[i]);
                        size = frag_size;
                        break;
                }
                offset -= frag_size;
        }
out:
        return offset + len <= size ? addr + offset : NULL;
}

BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
           void *, buf, u32, len)
{
        void *ptr;

        ptr = bpf_xdp_pointer(xdp, offset, len);
        if (IS_ERR(ptr))
                return PTR_ERR(ptr);

        if (!ptr)
                bpf_xdp_copy_buf(xdp, offset, buf, len, false);
        else
                memcpy(buf, ptr, len);

        return 0;
}

static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
        .func                = bpf_xdp_load_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
};

int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
{
        return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
}

BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
           void *, buf, u32, len)
{
        void *ptr;

        ptr = bpf_xdp_pointer(xdp, offset, len);
        if (IS_ERR(ptr))
                return PTR_ERR(ptr);

        if (!ptr)
                bpf_xdp_copy_buf(xdp, offset, buf, len, true);
        else
                memcpy(ptr, buf, len);

        return 0;
}

static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
        .func                = bpf_xdp_store_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
};

int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
{
        return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
}

static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
        struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
        struct xdp_rxq_info *rxq = xdp->rxq;
        unsigned int tailroom;

        if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
                return -EOPNOTSUPP;

        tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
        if (unlikely(offset > tailroom))
                return -EINVAL;

        memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
        skb_frag_size_add(frag, offset);
        sinfo->xdp_frags_size += offset;
        if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
                xsk_buff_get_tail(xdp)->data_end += offset;

        return 0;
}

static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
                                   struct xdp_mem_info *mem_info, bool release)
{
        struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);

        if (release) {
                xsk_buff_del_tail(zc_frag);
                __xdp_return(NULL, mem_info, false, zc_frag);
        } else {
                zc_frag->data_end -= shrink;
        }
}

static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
                                int shrink)
{
        struct xdp_mem_info *mem_info = &xdp->rxq->mem;
        bool release = skb_frag_size(frag) == shrink;

        if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
                bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
                goto out;
        }

        if (release) {
                struct page *page = skb_frag_page(frag);

                __xdp_return(page_address(page), mem_info, false, NULL);
        }

out:
        return release;
}

static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
        struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
        int i, n_frags_free = 0, len_free = 0;

        if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
                return -EINVAL;

        for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
                skb_frag_t *frag = &sinfo->frags[i];
                int shrink = min_t(int, offset, skb_frag_size(frag));

                len_free += shrink;
                offset -= shrink;
                if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
                        n_frags_free++;
                } else {
                        skb_frag_size_sub(frag, shrink);
                        break;
                }
        }
        sinfo->nr_frags -= n_frags_free;
        sinfo->xdp_frags_size -= len_free;

        if (unlikely(!sinfo->nr_frags)) {
                xdp_buff_clear_frags_flag(xdp);
                xdp->data_end -= offset;
        }

        return 0;
}

BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
        void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
        void *data_end = xdp->data_end + offset;

        if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
                if (offset < 0)
                        return bpf_xdp_frags_shrink_tail(xdp, -offset);

                return bpf_xdp_frags_increase_tail(xdp, offset);
        }

        /* Notice that xdp_data_hard_end have reserved some tailroom */
        if (unlikely(data_end > data_hard_end))
                return -EINVAL;

        if (unlikely(data_end < xdp->data + ETH_HLEN))
                return -EINVAL;

        /* Clear memory area on grow, can contain uninit kernel memory */
        if (offset > 0)
                memset(xdp->data_end, 0, offset);

        xdp->data_end = data_end;

        return 0;
}

static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
        .func                = bpf_xdp_adjust_tail,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
{
        void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
        void *meta = xdp->data_meta + offset;
        unsigned long metalen = xdp->data - meta;

        if (xdp_data_meta_unsupported(xdp))
                return -ENOTSUPP;
        if (unlikely(meta < xdp_frame_end ||
                     meta > xdp->data))
                return -EINVAL;
        if (unlikely(xdp_metalen_invalid(metalen)))
                return -EACCES;

        xdp->data_meta = meta;

        return 0;
}

static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
        .func                = bpf_xdp_adjust_meta,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

/**
 * DOC: xdp redirect
 *
 * XDP_REDIRECT works by a three-step process, implemented in the functions
 * below:
 *
 * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
 *    of the redirect and store it (along with some other metadata) in a per-CPU
 *    struct bpf_redirect_info.
 *
 * 2. When the program returns the XDP_REDIRECT return code, the driver will
 *    call xdp_do_redirect() which will use the information in struct
 *    bpf_redirect_info to actually enqueue the frame into a map type-specific
 *    bulk queue structure.
 *
 * 3. Before exiting its NAPI poll loop, the driver will call
 *    xdp_do_flush(), which will flush all the different bulk queues,
 *    thus completing the redirect. Note that xdp_do_flush() must be
 *    called before napi_complete_done() in the driver, as the
 *    XDP_REDIRECT logic relies on being inside a single NAPI instance
 *    through to the xdp_do_flush() call for RCU protection of all
 *    in-kernel data structures.
 */
/*
 * Pointers to the map entries will be kept around for this whole sequence of
 * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
 * the core code; instead, the RCU protection relies on everything happening
 * inside a single NAPI poll sequence, which means it's between a pair of calls
 * to local_bh_disable()/local_bh_enable().
 *
 * The map entries are marked as __rcu and the map code makes sure to
 * dereference those pointers with rcu_dereference_check() in a way that works
 * for both sections that to hold an rcu_read_lock() and sections that are
 * called from NAPI without a separate rcu_read_lock(). The code below does not
 * use RCU annotations, but relies on those in the map code.
 */
void xdp_do_flush(void)
{
        __dev_flush();
        __cpu_map_flush();
        __xsk_map_flush();
}
EXPORT_SYMBOL_GPL(xdp_do_flush);

#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
void xdp_do_check_flushed(struct napi_struct *napi)
{
        bool ret;

        ret = dev_check_flush();
        ret |= cpu_map_check_flush();
        ret |= xsk_map_check_flush();

        WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
                  napi->poll);
}
#endif

void bpf_clear_redirect_map(struct bpf_map *map)
{
        struct bpf_redirect_info *ri;
        int cpu;

        for_each_possible_cpu(cpu) {
                ri = per_cpu_ptr(&bpf_redirect_info, cpu);
                /* Avoid polluting remote cacheline due to writes if
                 * not needed. Once we pass this test, we need the
                 * cmpxchg() to make sure it hasn't been changed in
                 * the meantime by remote CPU.
                 */
                if (unlikely(READ_ONCE(ri->map) == map))
                        cmpxchg(&ri->map, map, NULL);
        }
}

DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);

u32 xdp_master_redirect(struct xdp_buff *xdp)
{
        struct net_device *master, *slave;
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
        slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
        if (slave && slave != xdp->rxq->dev) {
                /* The target device is different from the receiving device, so
                 * redirect it to the new device.
                 * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
                 * drivers to unmap the packet from their rx ring.
                 */
                ri->tgt_index = slave->ifindex;
                ri->map_id = INT_MAX;
                ri->map_type = BPF_MAP_TYPE_UNSPEC;
                return XDP_REDIRECT;
        }
        return XDP_TX;
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);

static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
                                        struct net_device *dev,
                                        struct xdp_buff *xdp,
                                        struct bpf_prog *xdp_prog)
{
        enum bpf_map_type map_type = ri->map_type;
        void *fwd = ri->tgt_value;
        u32 map_id = ri->map_id;
        int err;

        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
        ri->map_type = BPF_MAP_TYPE_UNSPEC;

        err = __xsk_map_redirect(fwd, xdp);
        if (unlikely(err))
                goto err;

        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
        return 0;
err:
        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
        return err;
}

static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
                                                   struct net_device *dev,
                                                   struct xdp_frame *xdpf,
                                                   struct bpf_prog *xdp_prog)
{
        enum bpf_map_type map_type = ri->map_type;
        void *fwd = ri->tgt_value;
        u32 map_id = ri->map_id;
        u32 flags = ri->flags;
        struct bpf_map *map;
        int err;

        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
        ri->flags = 0;
        ri->map_type = BPF_MAP_TYPE_UNSPEC;

        if (unlikely(!xdpf)) {
                err = -EOVERFLOW;
                goto err;
        }

        switch (map_type) {
        case BPF_MAP_TYPE_DEVMAP:
                fallthrough;
        case BPF_MAP_TYPE_DEVMAP_HASH:
                if (unlikely(flags & BPF_F_BROADCAST)) {
                        map = READ_ONCE(ri->map);

                        /* The map pointer is cleared when the map is being torn
                         * down by bpf_clear_redirect_map()
                         */
                        if (unlikely(!map)) {
                                err = -ENOENT;
                                break;
                        }

                        WRITE_ONCE(ri->map, NULL);
                        err = dev_map_enqueue_multi(xdpf, dev, map,
                                                    flags & BPF_F_EXCLUDE_INGRESS);
                } else {
                        err = dev_map_enqueue(fwd, xdpf, dev);
                }
                break;
        case BPF_MAP_TYPE_CPUMAP:
                err = cpu_map_enqueue(fwd, xdpf, dev);
                break;
        case BPF_MAP_TYPE_UNSPEC:
                if (map_id == INT_MAX) {
                        fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
                        if (unlikely(!fwd)) {
                                err = -EINVAL;
                                break;
                        }
                        err = dev_xdp_enqueue(fwd, xdpf, dev);
                        break;
                }
                fallthrough;
        default:
                err = -EBADRQC;
        }

        if (unlikely(err))
                goto err;

        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
        return 0;
err:
        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
        return err;
}

int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                    struct bpf_prog *xdp_prog)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        enum bpf_map_type map_type = ri->map_type;

        if (map_type == BPF_MAP_TYPE_XSKMAP)
                return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);

        return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
                                       xdp_prog);
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);

int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
                          struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        enum bpf_map_type map_type = ri->map_type;

        if (map_type == BPF_MAP_TYPE_XSKMAP)
                return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);

        return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
}
EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);

static int xdp_do_generic_redirect_map(struct net_device *dev,
                                       struct sk_buff *skb,
                                       struct xdp_buff *xdp,
                                       struct bpf_prog *xdp_prog, void *fwd,
                                       enum bpf_map_type map_type, u32 map_id,
                                       u32 flags)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        struct bpf_map *map;
        int err;

        switch (map_type) {
        case BPF_MAP_TYPE_DEVMAP:
                fallthrough;
        case BPF_MAP_TYPE_DEVMAP_HASH:
                if (unlikely(flags & BPF_F_BROADCAST)) {
                        map = READ_ONCE(ri->map);

                        /* The map pointer is cleared when the map is being torn
                         * down by bpf_clear_redirect_map()
                         */
                        if (unlikely(!map)) {
                                err = -ENOENT;
                                break;
                        }

                        WRITE_ONCE(ri->map, NULL);
                        err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
                                                     flags & BPF_F_EXCLUDE_INGRESS);
                } else {
                        err = dev_map_generic_redirect(fwd, skb, xdp_prog);
                }
                if (unlikely(err))
                        goto err;
                break;
        case BPF_MAP_TYPE_XSKMAP:
                err = xsk_generic_rcv(fwd, xdp);
                if (err)
                        goto err;
                consume_skb(skb);
                break;
        case BPF_MAP_TYPE_CPUMAP:
                err = cpu_map_generic_redirect(fwd, skb);
                if (unlikely(err))
                        goto err;
                break;
        default:
                err = -EBADRQC;
                goto err;
        }

        _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
        return 0;
err:
        _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
        return err;
}

int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
                            struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        enum bpf_map_type map_type = ri->map_type;
        void *fwd = ri->tgt_value;
        u32 map_id = ri->map_id;
        u32 flags = ri->flags;
        int err;

        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
        ri->flags = 0;
        ri->map_type = BPF_MAP_TYPE_UNSPEC;

        if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
                fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
                if (unlikely(!fwd)) {
                        err = -EINVAL;
                        goto err;
                }

                err = xdp_ok_fwd_dev(fwd, skb->len);
                if (unlikely(err))
                        goto err;

                skb->dev = fwd;
                _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
                generic_xdp_tx(skb, xdp_prog);
                return 0;
        }

        return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
        _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
        return err;
}

BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        if (unlikely(flags))
                return XDP_ABORTED;

        /* NB! Map type UNSPEC and map_id == INT_MAX (never generated
         * by map_idr) is used for ifindex based XDP redirect.
         */
        ri->tgt_index = ifindex;
        ri->map_id = INT_MAX;
        ri->map_type = BPF_MAP_TYPE_UNSPEC;

        return XDP_REDIRECT;
}

static const struct bpf_func_proto bpf_xdp_redirect_proto = {
        .func           = bpf_xdp_redirect,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
           u64, flags)
{
        return map->ops->map_redirect(map, key, flags);
}

static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
        .func           = bpf_xdp_redirect_map,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
};

static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
                                  unsigned long off, unsigned long len)
{
        void *ptr = skb_header_pointer(skb, off, len, dst_buff);

        if (unlikely(!ptr))
                return len;
        if (ptr != dst_buff)
                memcpy(dst_buff, ptr, len);

        return 0;
}

BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
           u64, flags, void *, meta, u64, meta_size)
{
        u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;

        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
        if (unlikely(!skb || skb_size > skb->len))
                return -EFAULT;

        return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
                                bpf_skb_copy);
}

static const struct bpf_func_proto bpf_skb_event_output_proto = {
        .func                = bpf_skb_event_output,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_CONST_MAP_PTR,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE_OR_ZERO,
};

BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)

const struct bpf_func_proto bpf_skb_output_proto = {
        .func                = bpf_skb_event_output,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID,
        .arg1_btf_id        = &bpf_skb_output_btf_ids[0],
        .arg2_type        = ARG_CONST_MAP_PTR,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE_OR_ZERO,
};

static unsigned short bpf_tunnel_key_af(u64 flags)
{
        return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
}

BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
           u32, size, u64, flags)
{
        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
        u8 compat[sizeof(struct bpf_tunnel_key)];
        void *to_orig = to;
        int err;

        if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6 |
                                         BPF_F_TUNINFO_FLAGS)))) {
                err = -EINVAL;
                goto err_clear;
        }
        if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
                err = -EPROTO;
                goto err_clear;
        }
        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
                err = -EINVAL;
                switch (size) {
                case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
                case offsetof(struct bpf_tunnel_key, tunnel_label):
                case offsetof(struct bpf_tunnel_key, tunnel_ext):
                        goto set_compat;
                case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
                        /* Fixup deprecated structure layouts here, so we have
                         * a common path later on.
                         */
                        if (ip_tunnel_info_af(info) != AF_INET)
                                goto err_clear;
set_compat:
                        to = (struct bpf_tunnel_key *)compat;
                        break;
                default:
                        goto err_clear;
                }
        }

        to->tunnel_id = be64_to_cpu(info->key.tun_id);
        to->tunnel_tos = info->key.tos;
        to->tunnel_ttl = info->key.ttl;
        if (flags & BPF_F_TUNINFO_FLAGS)
                to->tunnel_flags = info->key.tun_flags;
        else
                to->tunnel_ext = 0;

        if (flags & BPF_F_TUNINFO_IPV6) {
                memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
                       sizeof(to->remote_ipv6));
                memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
                       sizeof(to->local_ipv6));
                to->tunnel_label = be32_to_cpu(info->key.label);
        } else {
                to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
                memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
                to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
                memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
                to->tunnel_label = 0;
        }

        if (unlikely(size != sizeof(struct bpf_tunnel_key)))
                memcpy(to_orig, to, size);

        return 0;
err_clear:
        memset(to_orig, 0, size);
        return err;
}

static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
        .func                = bpf_skb_get_tunnel_key,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
{
        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
        int err;

        if (unlikely(!info ||
                     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
                err = -ENOENT;
                goto err_clear;
        }
        if (unlikely(size < info->options_len)) {
                err = -ENOMEM;
                goto err_clear;
        }

        ip_tunnel_info_opts_get(to, info);
        if (size > info->options_len)
                memset(to + info->options_len, 0, size - info->options_len);

        return info->options_len;
err_clear:
        memset(to, 0, size);
        return err;
}

static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
        .func                = bpf_skb_get_tunnel_opt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg3_type        = ARG_CONST_SIZE,
};

static struct metadata_dst __percpu *md_dst;

BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
           const struct bpf_tunnel_key *, from, u32, size, u64, flags)
{
        struct metadata_dst *md = this_cpu_ptr(md_dst);
        u8 compat[sizeof(struct bpf_tunnel_key)];
        struct ip_tunnel_info *info;

        if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
                               BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER |
                               BPF_F_NO_TUNNEL_KEY)))
                return -EINVAL;
        if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
                switch (size) {
                case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
                case offsetof(struct bpf_tunnel_key, tunnel_label):
                case offsetof(struct bpf_tunnel_key, tunnel_ext):
                case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
                        /* Fixup deprecated structure layouts here, so we have
                         * a common path later on.
                         */
                        memcpy(compat, from, size);
                        memset(compat + size, 0, sizeof(compat) - size);
                        from = (const struct bpf_tunnel_key *) compat;
                        break;
                default:
                        return -EINVAL;
                }
        }
        if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
                     from->tunnel_ext))
                return -EINVAL;

        skb_dst_drop(skb);
        dst_hold((struct dst_entry *) md);
        skb_dst_set(skb, (struct dst_entry *) md);

        info = &md->u.tun_info;
        memset(info, 0, sizeof(*info));
        info->mode = IP_TUNNEL_INFO_TX;

        info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
        if (flags & BPF_F_DONT_FRAGMENT)
                info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
        if (flags & BPF_F_ZERO_CSUM_TX)
                info->key.tun_flags &= ~TUNNEL_CSUM;
        if (flags & BPF_F_SEQ_NUMBER)
                info->key.tun_flags |= TUNNEL_SEQ;
        if (flags & BPF_F_NO_TUNNEL_KEY)
                info->key.tun_flags &= ~TUNNEL_KEY;

        info->key.tun_id = cpu_to_be64(from->tunnel_id);
        info->key.tos = from->tunnel_tos;
        info->key.ttl = from->tunnel_ttl;

        if (flags & BPF_F_TUNINFO_IPV6) {
                info->mode |= IP_TUNNEL_INFO_IPV6;
                memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
                       sizeof(from->remote_ipv6));
                memcpy(&info->key.u.ipv6.src, from->local_ipv6,
                       sizeof(from->local_ipv6));
                info->key.label = cpu_to_be32(from->tunnel_label) &
                                  IPV6_FLOWLABEL_MASK;
        } else {
                info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
                info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
                info->key.flow_flags = FLOWI_FLAG_ANYSRC;
        }

        return 0;
}

static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
        .func                = bpf_skb_set_tunnel_key,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
           const u8 *, from, u32, size)
{
        struct ip_tunnel_info *info = skb_tunnel_info(skb);
        const struct metadata_dst *md = this_cpu_ptr(md_dst);

        if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
                return -EINVAL;
        if (unlikely(size > IP_TUNNEL_OPTS_MAX))
                return -ENOMEM;

        ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);

        return 0;
}

static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
        .func                = bpf_skb_set_tunnel_opt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
};

static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
        if (!md_dst) {
                struct metadata_dst __percpu *tmp;

                tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
                                                METADATA_IP_TUNNEL,
                                                GFP_KERNEL);
                if (!tmp)
                        return NULL;
                if (cmpxchg(&md_dst, NULL, tmp))
                        metadata_dst_free_percpu(tmp);
        }

        switch (which) {
        case BPF_FUNC_skb_set_tunnel_key:
                return &bpf_skb_set_tunnel_key_proto;
        case BPF_FUNC_skb_set_tunnel_opt:
                return &bpf_skb_set_tunnel_opt_proto;
        default:
                return NULL;
        }
}

BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
           u32, idx)
{
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct cgroup *cgrp;
        struct sock *sk;

        sk = skb_to_full_sk(skb);
        if (!sk || !sk_fullsock(sk))
                return -ENOENT;
        if (unlikely(idx >= array->map.max_entries))
                return -E2BIG;

        cgrp = READ_ONCE(array->ptrs[idx]);
        if (unlikely(!cgrp))
                return -EAGAIN;

        return sk_under_cgroup_hierarchy(sk, cgrp);
}

static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
        .func                = bpf_skb_under_cgroup,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_CONST_MAP_PTR,
        .arg3_type        = ARG_ANYTHING,
};

#ifdef CONFIG_SOCK_CGROUP_DATA
static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
{
        struct cgroup *cgrp;

        sk = sk_to_full_sk(sk);
        if (!sk || !sk_fullsock(sk))
                return 0;

        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        return cgroup_id(cgrp);
}

BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
{
        return __bpf_sk_cgroup_id(skb->sk);
}

static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
        .func           = bpf_skb_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
                                              int ancestor_level)
{
        struct cgroup *ancestor;
        struct cgroup *cgrp;

        sk = sk_to_full_sk(sk);
        if (!sk || !sk_fullsock(sk))
                return 0;

        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        ancestor = cgroup_ancestor(cgrp, ancestor_level);
        if (!ancestor)
                return 0;

        return cgroup_id(ancestor);
}

BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
           ancestor_level)
{
        return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
}

static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
        .func           = bpf_skb_ancestor_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
{
        return __bpf_sk_cgroup_id(sk);
}

static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
        .func           = bpf_sk_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};

BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
{
        return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
}

static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
        .func           = bpf_sk_ancestor_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type      = ARG_ANYTHING,
};
#endif

static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
                                  unsigned long off, unsigned long len)
{
        struct xdp_buff *xdp = (struct xdp_buff *)ctx;

        bpf_xdp_copy_buf(xdp, off, dst, len, false);
        return 0;
}

BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
           u64, flags, void *, meta, u64, meta_size)
{
        u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;

        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;

        if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
                return -EFAULT;

        return bpf_event_output(map, flags, meta, meta_size, xdp,
                                xdp_size, bpf_xdp_copy);
}

static const struct bpf_func_proto bpf_xdp_event_output_proto = {
        .func                = bpf_xdp_event_output,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_CONST_MAP_PTR,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE_OR_ZERO,
};

BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)

const struct bpf_func_proto bpf_xdp_output_proto = {
        .func                = bpf_xdp_event_output,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID,
        .arg1_btf_id        = &bpf_xdp_output_btf_ids[0],
        .arg2_type        = ARG_CONST_MAP_PTR,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE_OR_ZERO,
};

BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
{
        return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
}

static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
        .func           = bpf_get_socket_cookie,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
        return __sock_gen_cookie(ctx->sk);
}

static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
        .func                = bpf_get_socket_cookie_sock_addr,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
{
        return __sock_gen_cookie(ctx);
}

static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
        .func                = bpf_get_socket_cookie_sock,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
{
        return sk ? sock_gen_cookie(sk) : 0;
}

const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
        .func                = bpf_get_socket_ptr_cookie,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON | PTR_MAYBE_NULL,
};

BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
        return __sock_gen_cookie(ctx->sk);
}

static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
        .func                = bpf_get_socket_cookie_sock_ops,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
};

static u64 __bpf_get_netns_cookie(struct sock *sk)
{
        const struct net *net = sk ? sock_net(sk) : &init_net;

        return net->net_cookie;
}

BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
{
        return __bpf_get_netns_cookie(ctx);
}

static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
        .func                = bpf_get_netns_cookie_sock,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX_OR_NULL,
};

BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
}

static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
        .func                = bpf_get_netns_cookie_sock_addr,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX_OR_NULL,
};

BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
}

static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
        .func                = bpf_get_netns_cookie_sock_ops,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX_OR_NULL,
};

BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
{
        return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
}

static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
        .func                = bpf_get_netns_cookie_sk_msg,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX_OR_NULL,
};

BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
        struct sock *sk = sk_to_full_sk(skb->sk);
        kuid_t kuid;

        if (!sk || !sk_fullsock(sk))
                return overflowuid;
        kuid = sock_net_uid(sock_net(sk), sk);
        return from_kuid_munged(sock_net(sk)->user_ns, kuid);
}

static const struct bpf_func_proto bpf_get_socket_uid_proto = {
        .func           = bpf_get_socket_uid,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

static int sol_socket_sockopt(struct sock *sk, int optname,
                              char *optval, int *optlen,
                              bool getopt)
{
        switch (optname) {
        case SO_REUSEADDR:
        case SO_SNDBUF:
        case SO_RCVBUF:
        case SO_KEEPALIVE:
        case SO_PRIORITY:
        case SO_REUSEPORT:
        case SO_RCVLOWAT:
        case SO_MARK:
        case SO_MAX_PACING_RATE:
        case SO_BINDTOIFINDEX:
        case SO_TXREHASH:
                if (*optlen != sizeof(int))
                        return -EINVAL;
                break;
        case SO_BINDTODEVICE:
                break;
        default:
                return -EINVAL;
        }

        if (getopt) {
                if (optname == SO_BINDTODEVICE)
                        return -EINVAL;
                return sk_getsockopt(sk, SOL_SOCKET, optname,
                                     KERNEL_SOCKPTR(optval),
                                     KERNEL_SOCKPTR(optlen));
        }

        return sk_setsockopt(sk, SOL_SOCKET, optname,
                             KERNEL_SOCKPTR(optval), *optlen);
}

static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
                                  char *optval, int optlen)
{
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned long timeout;
        int val;

        if (optlen != sizeof(int))
                return -EINVAL;

        val = *(int *)optval;

        /* Only some options are supported */
        switch (optname) {
        case TCP_BPF_IW:
                if (val <= 0 || tp->data_segs_out > tp->syn_data)
                        return -EINVAL;
                tcp_snd_cwnd_set(tp, val);
                break;
        case TCP_BPF_SNDCWND_CLAMP:
                if (val <= 0)
                        return -EINVAL;
                tp->snd_cwnd_clamp = val;
                tp->snd_ssthresh = val;
                break;
        case TCP_BPF_DELACK_MAX:
                timeout = usecs_to_jiffies(val);
                if (timeout > TCP_DELACK_MAX ||
                    timeout < TCP_TIMEOUT_MIN)
                        return -EINVAL;
                inet_csk(sk)->icsk_delack_max = timeout;
                break;
        case TCP_BPF_RTO_MIN:
                timeout = usecs_to_jiffies(val);
                if (timeout > TCP_RTO_MIN ||
                    timeout < TCP_TIMEOUT_MIN)
                        return -EINVAL;
                inet_csk(sk)->icsk_rto_min = timeout;
                break;
        default:
                return -EINVAL;
        }

        return 0;
}

static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
                                      int *optlen, bool getopt)
{
        struct tcp_sock *tp;
        int ret;

        if (*optlen < 2)
                return -EINVAL;

        if (getopt) {
                if (!inet_csk(sk)->icsk_ca_ops)
                        return -EINVAL;
                /* BPF expects NULL-terminated tcp-cc string */
                optval[--(*optlen)] = '\0';
                return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
                                         KERNEL_SOCKPTR(optval),
                                         KERNEL_SOCKPTR(optlen));
        }

        /* "cdg" is the only cc that alloc a ptr
         * in inet_csk_ca area.  The bpf-tcp-cc may
         * overwrite this ptr after switching to cdg.
         */
        if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen))
                return -ENOTSUPP;

        /* It stops this looping
         *
         * .init => bpf_setsockopt(tcp_cc) => .init =>
         * bpf_setsockopt(tcp_cc)" => .init => ....
         *
         * The second bpf_setsockopt(tcp_cc) is not allowed
         * in order to break the loop when both .init
         * are the same bpf prog.
         *
         * This applies even the second bpf_setsockopt(tcp_cc)
         * does not cause a loop.  This limits only the first
         * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
         * pick a fallback cc (eg. peer does not support ECN)
         * and the second '.init' cannot fallback to
         * another.
         */
        tp = tcp_sk(sk);
        if (tp->bpf_chg_cc_inprogress)
                return -EBUSY;

        tp->bpf_chg_cc_inprogress = 1;
        ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
                                KERNEL_SOCKPTR(optval), *optlen);
        tp->bpf_chg_cc_inprogress = 0;
        return ret;
}

static int sol_tcp_sockopt(struct sock *sk, int optname,
                           char *optval, int *optlen,
                           bool getopt)
{
        if (sk->sk_protocol != IPPROTO_TCP)
                return -EINVAL;

        switch (optname) {
        case TCP_NODELAY:
        case TCP_MAXSEG:
        case TCP_KEEPIDLE:
        case TCP_KEEPINTVL:
        case TCP_KEEPCNT:
        case TCP_SYNCNT:
        case TCP_WINDOW_CLAMP:
        case TCP_THIN_LINEAR_TIMEOUTS:
        case TCP_USER_TIMEOUT:
        case TCP_NOTSENT_LOWAT:
        case TCP_SAVE_SYN:
                if (*optlen != sizeof(int))
                        return -EINVAL;
                break;
        case TCP_CONGESTION:
                return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt);
        case TCP_SAVED_SYN:
                if (*optlen < 1)
                        return -EINVAL;
                break;
        default:
                if (getopt)
                        return -EINVAL;
                return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
        }

        if (getopt) {
                if (optname == TCP_SAVED_SYN) {
                        struct tcp_sock *tp = tcp_sk(sk);

                        if (!tp->saved_syn ||
                            *optlen > tcp_saved_syn_len(tp->saved_syn))
                                return -EINVAL;
                        memcpy(optval, tp->saved_syn->data, *optlen);
                        /* It cannot free tp->saved_syn here because it
                         * does not know if the user space still needs it.
                         */
                        return 0;
                }

                return do_tcp_getsockopt(sk, SOL_TCP, optname,
                                         KERNEL_SOCKPTR(optval),
                                         KERNEL_SOCKPTR(optlen));
        }

        return do_tcp_setsockopt(sk, SOL_TCP, optname,
                                 KERNEL_SOCKPTR(optval), *optlen);
}

static int sol_ip_sockopt(struct sock *sk, int optname,
                          char *optval, int *optlen,
                          bool getopt)
{
        if (sk->sk_family != AF_INET)
                return -EINVAL;

        switch (optname) {
        case IP_TOS:
                if (*optlen != sizeof(int))
                        return -EINVAL;
                break;
        default:
                return -EINVAL;
        }

        if (getopt)
                return do_ip_getsockopt(sk, SOL_IP, optname,
                                        KERNEL_SOCKPTR(optval),
                                        KERNEL_SOCKPTR(optlen));

        return do_ip_setsockopt(sk, SOL_IP, optname,
                                KERNEL_SOCKPTR(optval), *optlen);
}

static int sol_ipv6_sockopt(struct sock *sk, int optname,
                            char *optval, int *optlen,
                            bool getopt)
{
        if (sk->sk_family != AF_INET6)
                return -EINVAL;

        switch (optname) {
        case IPV6_TCLASS:
        case IPV6_AUTOFLOWLABEL:
                if (*optlen != sizeof(int))
                        return -EINVAL;
                break;
        default:
                return -EINVAL;
        }

        if (getopt)
                return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
                                                      KERNEL_SOCKPTR(optval),
                                                      KERNEL_SOCKPTR(optlen));

        return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
                                              KERNEL_SOCKPTR(optval), *optlen);
}

static int __bpf_setsockopt(struct sock *sk, int level, int optname,
                            char *optval, int optlen)
{
        if (!sk_fullsock(sk))
                return -EINVAL;

        if (level == SOL_SOCKET)
                return sol_socket_sockopt(sk, optname, optval, &optlen, false);
        else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
                return sol_ip_sockopt(sk, optname, optval, &optlen, false);
        else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
                return sol_ipv6_sockopt(sk, optname, optval, &optlen, false);
        else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
                return sol_tcp_sockopt(sk, optname, optval, &optlen, false);

        return -EINVAL;
}

static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen)
{
        if (sk_fullsock(sk))
                sock_owned_by_me(sk);
        return __bpf_setsockopt(sk, level, optname, optval, optlen);
}

static int __bpf_getsockopt(struct sock *sk, int level, int optname,
                            char *optval, int optlen)
{
        int err, saved_optlen = optlen;

        if (!sk_fullsock(sk)) {
                err = -EINVAL;
                goto done;
        }

        if (level == SOL_SOCKET)
                err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
        else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
                err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
        else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
                err = sol_ip_sockopt(sk, optname, optval, &optlen, true);
        else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
                err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true);
        else
                err = -EINVAL;

done:
        if (err)
                optlen = 0;
        if (optlen < saved_optlen)
                memset(optval + optlen, 0, saved_optlen - optlen);
        return err;
}

static int _bpf_getsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen)
{
        if (sk_fullsock(sk))
                sock_owned_by_me(sk);
        return __bpf_getsockopt(sk, level, optname, optval, optlen);
}

BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
           int, optname, char *, optval, int, optlen)
{
        return _bpf_setsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_sk_setsockopt_proto = {
        .func                = bpf_sk_setsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
           int, optname, char *, optval, int, optlen)
{
        return _bpf_getsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_sk_getsockopt_proto = {
        .func                = bpf_sk_getsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
           int, optname, char *, optval, int, optlen)
{
        return __bpf_setsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
        .func                = bpf_unlocked_sk_setsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
           int, optname, char *, optval, int, optlen)
{
        return __bpf_getsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
        .func                = bpf_unlocked_sk_getsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
           int, level, int, optname, char *, optval, int, optlen)
{
        return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
        .func                = bpf_sock_addr_setsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
           int, level, int, optname, char *, optval, int, optlen)
{
        return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
        .func                = bpf_sock_addr_getsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
           int, level, int, optname, char *, optval, int, optlen)
{
        return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
        .func                = bpf_sock_ops_setsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
                                int optname, const u8 **start)
{
        struct sk_buff *syn_skb = bpf_sock->syn_skb;
        const u8 *hdr_start;
        int ret;

        if (syn_skb) {
                /* sk is a request_sock here */

                if (optname == TCP_BPF_SYN) {
                        hdr_start = syn_skb->data;
                        ret = tcp_hdrlen(syn_skb);
                } else if (optname == TCP_BPF_SYN_IP) {
                        hdr_start = skb_network_header(syn_skb);
                        ret = skb_network_header_len(syn_skb) +
                                tcp_hdrlen(syn_skb);
                } else {
                        /* optname == TCP_BPF_SYN_MAC */
                        hdr_start = skb_mac_header(syn_skb);
                        ret = skb_mac_header_len(syn_skb) +
                                skb_network_header_len(syn_skb) +
                                tcp_hdrlen(syn_skb);
                }
        } else {
                struct sock *sk = bpf_sock->sk;
                struct saved_syn *saved_syn;

                if (sk->sk_state == TCP_NEW_SYN_RECV)
                        /* synack retransmit. bpf_sock->syn_skb will
                         * not be available.  It has to resort to
                         * saved_syn (if it is saved).
                         */
                        saved_syn = inet_reqsk(sk)->saved_syn;
                else
                        saved_syn = tcp_sk(sk)->saved_syn;

                if (!saved_syn)
                        return -ENOENT;

                if (optname == TCP_BPF_SYN) {
                        hdr_start = saved_syn->data +
                                saved_syn->mac_hdrlen +
                                saved_syn->network_hdrlen;
                        ret = saved_syn->tcp_hdrlen;
                } else if (optname == TCP_BPF_SYN_IP) {
                        hdr_start = saved_syn->data +
                                saved_syn->mac_hdrlen;
                        ret = saved_syn->network_hdrlen +
                                saved_syn->tcp_hdrlen;
                } else {
                        /* optname == TCP_BPF_SYN_MAC */

                        /* TCP_SAVE_SYN may not have saved the mac hdr */
                        if (!saved_syn->mac_hdrlen)
                                return -ENOENT;

                        hdr_start = saved_syn->data;
                        ret = saved_syn->mac_hdrlen +
                                saved_syn->network_hdrlen +
                                saved_syn->tcp_hdrlen;
                }
        }

        *start = hdr_start;
        return ret;
}

BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
           int, level, int, optname, char *, optval, int, optlen)
{
        if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
            optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
                int ret, copy_len = 0;
                const u8 *start;

                ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
                if (ret > 0) {
                        copy_len = ret;
                        if (optlen < copy_len) {
                                copy_len = optlen;
                                ret = -ENOSPC;
                        }

                        memcpy(optval, start, copy_len);
                }

                /* Zero out unused buffer at the end */
                memset(optval + copy_len, 0, optlen - copy_len);

                return ret;
        }

        return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
        .func                = bpf_sock_ops_getsockopt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
        .arg4_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
           int, argval)
{
        struct sock *sk = bpf_sock->sk;
        int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;

        if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
                return -EINVAL;

        tcp_sk(sk)->bpf_sock_ops_cb_flags = val;

        return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
}

static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
        .func                = bpf_sock_ops_cb_flags_set,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
};

const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
EXPORT_SYMBOL_GPL(ipv6_bpf_stub);

BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
           int, addr_len)
{
#ifdef CONFIG_INET
        struct sock *sk = ctx->sk;
        u32 flags = BIND_FROM_BPF;
        int err;

        err = -EINVAL;
        if (addr_len < offsetofend(struct sockaddr, sa_family))
                return err;
        if (addr->sa_family == AF_INET) {
                if (addr_len < sizeof(struct sockaddr_in))
                        return err;
                if (((struct sockaddr_in *)addr)->sin_port == htons(0))
                        flags |= BIND_FORCE_ADDRESS_NO_PORT;
                return __inet_bind(sk, addr, addr_len, flags);
#if IS_ENABLED(CONFIG_IPV6)
        } else if (addr->sa_family == AF_INET6) {
                if (addr_len < SIN6_LEN_RFC2133)
                        return err;
                if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
                        flags |= BIND_FORCE_ADDRESS_NO_PORT;
                /* ipv6_bpf_stub cannot be NULL, since it's called from
                 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
                 */
                return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
#endif /* CONFIG_IPV6 */
        }
#endif /* CONFIG_INET */

        return -EAFNOSUPPORT;
}

static const struct bpf_func_proto bpf_bind_proto = {
        .func                = bpf_bind,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
};

#ifdef CONFIG_XFRM

#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
    (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))

struct metadata_dst __percpu *xfrm_bpf_md_dst;
EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);

#endif

BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
           struct bpf_xfrm_state *, to, u32, size, u64, flags)
{
        const struct sec_path *sp = skb_sec_path(skb);
        const struct xfrm_state *x;

        if (!sp || unlikely(index >= sp->len || flags))
                goto err_clear;

        x = sp->xvec[index];

        if (unlikely(size != sizeof(struct bpf_xfrm_state)))
                goto err_clear;

        to->reqid = x->props.reqid;
        to->spi = x->id.spi;
        to->family = x->props.family;
        to->ext = 0;

        if (to->family == AF_INET6) {
                memcpy(to->remote_ipv6, x->props.saddr.a6,
                       sizeof(to->remote_ipv6));
        } else {
                to->remote_ipv4 = x->props.saddr.a4;
                memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
        }

        return 0;
err_clear:
        memset(to, 0, size);
        return -EINVAL;
}

static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
        .func                = bpf_skb_get_xfrm_state,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
        .arg5_type        = ARG_ANYTHING,
};
#endif

#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
{
        params->h_vlan_TCI = 0;
        params->h_vlan_proto = 0;
        if (mtu)
                params->mtu_result = mtu; /* union with tot_len */

        return 0;
}
#endif

#if IS_ENABLED(CONFIG_INET)
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
                               u32 flags, bool check_mtu)
{
        struct fib_nh_common *nhc;
        struct in_device *in_dev;
        struct neighbour *neigh;
        struct net_device *dev;
        struct fib_result res;
        struct flowi4 fl4;
        u32 mtu = 0;
        int err;

        dev = dev_get_by_index_rcu(net, params->ifindex);
        if (unlikely(!dev))
                return -ENODEV;

        /* verify forwarding is enabled on this interface */
        in_dev = __in_dev_get_rcu(dev);
        if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
                return BPF_FIB_LKUP_RET_FWD_DISABLED;

        if (flags & BPF_FIB_LOOKUP_OUTPUT) {
                fl4.flowi4_iif = 1;
                fl4.flowi4_oif = params->ifindex;
        } else {
                fl4.flowi4_iif = params->ifindex;
                fl4.flowi4_oif = 0;
        }
        fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
        fl4.flowi4_flags = 0;

        fl4.flowi4_proto = params->l4_protocol;
        fl4.daddr = params->ipv4_dst;
        fl4.saddr = params->ipv4_src;
        fl4.fl4_sport = params->sport;
        fl4.fl4_dport = params->dport;
        fl4.flowi4_multipath_hash = 0;

        if (flags & BPF_FIB_LOOKUP_DIRECT) {
                u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
                struct fib_table *tb;

                if (flags & BPF_FIB_LOOKUP_TBID) {
                        tbid = params->tbid;
                        /* zero out for vlan output */
                        params->tbid = 0;
                }

                tb = fib_get_table(net, tbid);
                if (unlikely(!tb))
                        return BPF_FIB_LKUP_RET_NOT_FWDED;

                err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
        } else {
                fl4.flowi4_mark = 0;
                fl4.flowi4_secid = 0;
                fl4.flowi4_tun_key.tun_id = 0;
                fl4.flowi4_uid = sock_net_uid(net, NULL);

                err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
        }

        if (err) {
                /* map fib lookup errors to RTN_ type */
                if (err == -EINVAL)
                        return BPF_FIB_LKUP_RET_BLACKHOLE;
                if (err == -EHOSTUNREACH)
                        return BPF_FIB_LKUP_RET_UNREACHABLE;
                if (err == -EACCES)
                        return BPF_FIB_LKUP_RET_PROHIBIT;

                return BPF_FIB_LKUP_RET_NOT_FWDED;
        }

        if (res.type != RTN_UNICAST)
                return BPF_FIB_LKUP_RET_NOT_FWDED;

        if (fib_info_num_path(res.fi) > 1)
                fib_select_path(net, &res, &fl4, NULL);

        if (check_mtu) {
                mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
                if (params->tot_len > mtu) {
                        params->mtu_result = mtu; /* union with tot_len */
                        return BPF_FIB_LKUP_RET_FRAG_NEEDED;
                }
        }

        nhc = res.nhc;

        /* do not handle lwt encaps right now */
        if (nhc->nhc_lwtstate)
                return BPF_FIB_LKUP_RET_UNSUPP_LWT;

        dev = nhc->nhc_dev;

        params->rt_metric = res.fi->fib_priority;
        params->ifindex = dev->ifindex;

        if (flags & BPF_FIB_LOOKUP_SRC)
                params->ipv4_src = fib_result_prefsrc(net, &res);

        /* xdp and cls_bpf programs are run in RCU-bh so
         * rcu_read_lock_bh is not needed here
         */
        if (likely(nhc->nhc_gw_family != AF_INET6)) {
                if (nhc->nhc_gw_family)
                        params->ipv4_dst = nhc->nhc_gw.ipv4;
        } else {
                struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;

                params->family = AF_INET6;
                *dst = nhc->nhc_gw.ipv6;
        }

        if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
                goto set_fwd_params;

        if (likely(nhc->nhc_gw_family != AF_INET6))
                neigh = __ipv4_neigh_lookup_noref(dev,
                                                  (__force u32)params->ipv4_dst);
        else
                neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);

        if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
                return BPF_FIB_LKUP_RET_NO_NEIGH;
        memcpy(params->dmac, neigh->ha, ETH_ALEN);
        memcpy(params->smac, dev->dev_addr, ETH_ALEN);

set_fwd_params:
        return bpf_fib_set_fwd_params(params, mtu);
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
                               u32 flags, bool check_mtu)
{
        struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
        struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
        struct fib6_result res = {};
        struct neighbour *neigh;
        struct net_device *dev;
        struct inet6_dev *idev;
        struct flowi6 fl6;
        int strict = 0;
        int oif, err;
        u32 mtu = 0;

        /* link local addresses are never forwarded */
        if (rt6_need_strict(dst) || rt6_need_strict(src))
                return BPF_FIB_LKUP_RET_NOT_FWDED;

        dev = dev_get_by_index_rcu(net, params->ifindex);
        if (unlikely(!dev))
                return -ENODEV;

        idev = __in6_dev_get_safely(dev);
        if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding)))
                return BPF_FIB_LKUP_RET_FWD_DISABLED;

        if (flags & BPF_FIB_LOOKUP_OUTPUT) {
                fl6.flowi6_iif = 1;
                oif = fl6.flowi6_oif = params->ifindex;
        } else {
                oif = fl6.flowi6_iif = params->ifindex;
                fl6.flowi6_oif = 0;
                strict = RT6_LOOKUP_F_HAS_SADDR;
        }
        fl6.flowlabel = params->flowinfo;
        fl6.flowi6_scope = 0;
        fl6.flowi6_flags = 0;
        fl6.mp_hash = 0;

        fl6.flowi6_proto = params->l4_protocol;
        fl6.daddr = *dst;
        fl6.saddr = *src;
        fl6.fl6_sport = params->sport;
        fl6.fl6_dport = params->dport;

        if (flags & BPF_FIB_LOOKUP_DIRECT) {
                u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
                struct fib6_table *tb;

                if (flags & BPF_FIB_LOOKUP_TBID) {
                        tbid = params->tbid;
                        /* zero out for vlan output */
                        params->tbid = 0;
                }

                tb = ipv6_stub->fib6_get_table(net, tbid);
                if (unlikely(!tb))
                        return BPF_FIB_LKUP_RET_NOT_FWDED;

                err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
                                                   strict);
        } else {
                fl6.flowi6_mark = 0;
                fl6.flowi6_secid = 0;
                fl6.flowi6_tun_key.tun_id = 0;
                fl6.flowi6_uid = sock_net_uid(net, NULL);

                err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
        }

        if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
                     res.f6i == net->ipv6.fib6_null_entry))
                return BPF_FIB_LKUP_RET_NOT_FWDED;

        switch (res.fib6_type) {
        /* only unicast is forwarded */
        case RTN_UNICAST:
                break;
        case RTN_BLACKHOLE:
                return BPF_FIB_LKUP_RET_BLACKHOLE;
        case RTN_UNREACHABLE:
                return BPF_FIB_LKUP_RET_UNREACHABLE;
        case RTN_PROHIBIT:
                return BPF_FIB_LKUP_RET_PROHIBIT;
        default:
                return BPF_FIB_LKUP_RET_NOT_FWDED;
        }

        ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
                                    fl6.flowi6_oif != 0, NULL, strict);

        if (check_mtu) {
                mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
                if (params->tot_len > mtu) {
                        params->mtu_result = mtu; /* union with tot_len */
                        return BPF_FIB_LKUP_RET_FRAG_NEEDED;
                }
        }

        if (res.nh->fib_nh_lws)
                return BPF_FIB_LKUP_RET_UNSUPP_LWT;

        if (res.nh->fib_nh_gw_family)
                *dst = res.nh->fib_nh_gw6;

        dev = res.nh->fib_nh_dev;
        params->rt_metric = res.f6i->fib6_metric;
        params->ifindex = dev->ifindex;

        if (flags & BPF_FIB_LOOKUP_SRC) {
                if (res.f6i->fib6_prefsrc.plen) {
                        *src = res.f6i->fib6_prefsrc.addr;
                } else {
                        err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
                                                                &fl6.daddr, 0,
                                                                src);
                        if (err)
                                return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
                }
        }

        if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
                goto set_fwd_params;

        /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
         * not needed here.
         */
        neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
        if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
                return BPF_FIB_LKUP_RET_NO_NEIGH;
        memcpy(params->dmac, neigh->ha, ETH_ALEN);
        memcpy(params->smac, dev->dev_addr, ETH_ALEN);

set_fwd_params:
        return bpf_fib_set_fwd_params(params, mtu);
}
#endif

#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
                             BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
                             BPF_FIB_LOOKUP_SRC)

BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
           struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
        if (plen < sizeof(*params))
                return -EINVAL;

        if (flags & ~BPF_FIB_LOOKUP_MASK)
                return -EINVAL;

        switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
        case AF_INET:
                return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
                                           flags, true);
#endif
#if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
                return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
                                           flags, true);
#endif
        }
        return -EAFNOSUPPORT;
}

static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
        .func                = bpf_xdp_fib_lookup,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_MEM,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
           struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
        struct net *net = dev_net(skb->dev);
        int rc = -EAFNOSUPPORT;
        bool check_mtu = false;

        if (plen < sizeof(*params))
                return -EINVAL;

        if (flags & ~BPF_FIB_LOOKUP_MASK)
                return -EINVAL;

        if (params->tot_len)
                check_mtu = true;

        switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
        case AF_INET:
                rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
                break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
                rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
                break;
#endif
        }

        if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
                struct net_device *dev;

                /* When tot_len isn't provided by user, check skb
                 * against MTU of FIB lookup resulting net_device
                 */
                dev = dev_get_by_index_rcu(net, params->ifindex);
                if (!is_skb_forwardable(dev, skb))
                        rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;

                params->mtu_result = dev->mtu; /* union with tot_len */
        }

        return rc;
}

static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
        .func                = bpf_skb_fib_lookup,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_MEM,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
                                            u32 ifindex)
{
        struct net *netns = dev_net(dev_curr);

        /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
        if (ifindex == 0)
                return dev_curr;

        return dev_get_by_index_rcu(netns, ifindex);
}

BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
           u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
{
        int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
        struct net_device *dev = skb->dev;
        int skb_len, dev_len;
        int mtu;

        if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
                return -EINVAL;

        if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
                return -EINVAL;

        dev = __dev_via_ifindex(dev, ifindex);
        if (unlikely(!dev))
                return -ENODEV;

        mtu = READ_ONCE(dev->mtu);

        dev_len = mtu + dev->hard_header_len;

        /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
        skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;

        skb_len += len_diff; /* minus result pass check */
        if (skb_len <= dev_len) {
                ret = BPF_MTU_CHK_RET_SUCCESS;
                goto out;
        }
        /* At this point, skb->len exceed MTU, but as it include length of all
         * segments, it can still be below MTU.  The SKB can possibly get
         * re-segmented in transmit path (see validate_xmit_skb).  Thus, user
         * must choose if segs are to be MTU checked.
         */
        if (skb_is_gso(skb)) {
                ret = BPF_MTU_CHK_RET_SUCCESS;

                if (flags & BPF_MTU_CHK_SEGS &&
                    !skb_gso_validate_network_len(skb, mtu))
                        ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
        }
out:
        /* BPF verifier guarantees valid pointer */
        *mtu_len = mtu;

        return ret;
}

BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
           u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
{
        struct net_device *dev = xdp->rxq->dev;
        int xdp_len = xdp->data_end - xdp->data;
        int ret = BPF_MTU_CHK_RET_SUCCESS;
        int mtu, dev_len;

        /* XDP variant doesn't support multi-buffer segment check (yet) */
        if (unlikely(flags))
                return -EINVAL;

        dev = __dev_via_ifindex(dev, ifindex);
        if (unlikely(!dev))
                return -ENODEV;

        mtu = READ_ONCE(dev->mtu);

        /* Add L2-header as dev MTU is L3 size */
        dev_len = mtu + dev->hard_header_len;

        /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
        if (*mtu_len)
                xdp_len = *mtu_len + dev->hard_header_len;

        xdp_len += len_diff; /* minus result pass check */
        if (xdp_len > dev_len)
                ret = BPF_MTU_CHK_RET_FRAG_NEEDED;

        /* BPF verifier guarantees valid pointer */
        *mtu_len = mtu;

        return ret;
}

static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
        .func                = bpf_skb_check_mtu,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_PTR_TO_INT,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
        .func                = bpf_xdp_check_mtu,
        .gpl_only        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_PTR_TO_INT,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
{
        int err;
        struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;

        if (!seg6_validate_srh(srh, len, false))
                return -EINVAL;

        switch (type) {
        case BPF_LWT_ENCAP_SEG6_INLINE:
                if (skb->protocol != htons(ETH_P_IPV6))
                        return -EBADMSG;

                err = seg6_do_srh_inline(skb, srh);
                break;
        case BPF_LWT_ENCAP_SEG6:
                skb_reset_inner_headers(skb);
                skb->encapsulation = 1;
                err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
                break;
        default:
                return -EINVAL;
        }

        bpf_compute_data_pointers(skb);
        if (err)
                return err;

        skb_set_transport_header(skb, sizeof(struct ipv6hdr));

        return seg6_lookup_nexthop(skb, NULL, 0);
}
#endif /* CONFIG_IPV6_SEG6_BPF */

#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
                             bool ingress)
{
        return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
}
#endif

BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
           u32, len)
{
        switch (type) {
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
        case BPF_LWT_ENCAP_SEG6:
        case BPF_LWT_ENCAP_SEG6_INLINE:
                return bpf_push_seg6_encap(skb, type, hdr, len);
#endif
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
        case BPF_LWT_ENCAP_IP:
                return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
#endif
        default:
                return -EINVAL;
        }
}

BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
           void *, hdr, u32, len)
{
        switch (type) {
#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
        case BPF_LWT_ENCAP_IP:
                return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
#endif
        default:
                return -EINVAL;
        }
}

static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
        .func                = bpf_lwt_in_push_encap,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE
};

static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
        .func                = bpf_lwt_xmit_push_encap,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE
};

#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
           const void *, from, u32, len)
{
        struct seg6_bpf_srh_state *srh_state =
                this_cpu_ptr(&seg6_bpf_srh_states);
        struct ipv6_sr_hdr *srh = srh_state->srh;
        void *srh_tlvs, *srh_end, *ptr;
        int srhoff = 0;

        if (srh == NULL)
                return -EINVAL;

        srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
        srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);

        ptr = skb->data + offset;
        if (ptr >= srh_tlvs && ptr + len <= srh_end)
                srh_state->valid = false;
        else if (ptr < (void *)&srh->flags ||
                 ptr + len > (void *)&srh->segments)
                return -EFAULT;

        if (unlikely(bpf_try_make_writable(skb, offset + len)))
                return -EFAULT;
        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
                return -EINVAL;
        srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);

        memcpy(skb->data + offset, from, len);
        return 0;
}

static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
        .func                = bpf_lwt_seg6_store_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE
};

static void bpf_update_srh_state(struct sk_buff *skb)
{
        struct seg6_bpf_srh_state *srh_state =
                this_cpu_ptr(&seg6_bpf_srh_states);
        int srhoff = 0;

        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
                srh_state->srh = NULL;
        } else {
                srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
                srh_state->hdrlen = srh_state->srh->hdrlen << 3;
                srh_state->valid = true;
        }
}

BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
           u32, action, void *, param, u32, param_len)
{
        struct seg6_bpf_srh_state *srh_state =
                this_cpu_ptr(&seg6_bpf_srh_states);
        int hdroff = 0;
        int err;

        switch (action) {
        case SEG6_LOCAL_ACTION_END_X:
                if (!seg6_bpf_has_valid_srh(skb))
                        return -EBADMSG;
                if (param_len != sizeof(struct in6_addr))
                        return -EINVAL;
                return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
        case SEG6_LOCAL_ACTION_END_T:
                if (!seg6_bpf_has_valid_srh(skb))
                        return -EBADMSG;
                if (param_len != sizeof(int))
                        return -EINVAL;
                return seg6_lookup_nexthop(skb, NULL, *(int *)param);
        case SEG6_LOCAL_ACTION_END_DT6:
                if (!seg6_bpf_has_valid_srh(skb))
                        return -EBADMSG;
                if (param_len != sizeof(int))
                        return -EINVAL;

                if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
                        return -EBADMSG;
                if (!pskb_pull(skb, hdroff))
                        return -EBADMSG;

                skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
                skb_reset_network_header(skb);
                skb_reset_transport_header(skb);
                skb->encapsulation = 0;

                bpf_compute_data_pointers(skb);
                bpf_update_srh_state(skb);
                return seg6_lookup_nexthop(skb, NULL, *(int *)param);
        case SEG6_LOCAL_ACTION_END_B6:
                if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
                        return -EBADMSG;
                err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
                                          param, param_len);
                if (!err)
                        bpf_update_srh_state(skb);

                return err;
        case SEG6_LOCAL_ACTION_END_B6_ENCAP:
                if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
                        return -EBADMSG;
                err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
                                          param, param_len);
                if (!err)
                        bpf_update_srh_state(skb);

                return err;
        default:
                return -EINVAL;
        }
}

static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
        .func                = bpf_lwt_seg6_action,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type        = ARG_CONST_SIZE
};

BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
           s32, len)
{
        struct seg6_bpf_srh_state *srh_state =
                this_cpu_ptr(&seg6_bpf_srh_states);
        struct ipv6_sr_hdr *srh = srh_state->srh;
        void *srh_end, *srh_tlvs, *ptr;
        struct ipv6hdr *hdr;
        int srhoff = 0;
        int ret;

        if (unlikely(srh == NULL))
                return -EINVAL;

        srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
                        ((srh->first_segment + 1) << 4));
        srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
                        srh_state->hdrlen);
        ptr = skb->data + offset;

        if (unlikely(ptr < srh_tlvs || ptr > srh_end))
                return -EFAULT;
        if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
                return -EFAULT;

        if (len > 0) {
                ret = skb_cow_head(skb, len);
                if (unlikely(ret < 0))
                        return ret;

                ret = bpf_skb_net_hdr_push(skb, offset, len);
        } else {
                ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
        }

        bpf_compute_data_pointers(skb);
        if (unlikely(ret < 0))
                return ret;

        hdr = (struct ipv6hdr *)skb->data;
        hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));

        if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
                return -EINVAL;
        srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
        srh_state->hdrlen += len;
        srh_state->valid = false;
        return 0;
}

static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
        .func                = bpf_lwt_seg6_adjust_srh,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};
#endif /* CONFIG_IPV6_SEG6_BPF */

#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
                              int dif, int sdif, u8 family, u8 proto)
{
        struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
        bool refcounted = false;
        struct sock *sk = NULL;

        if (family == AF_INET) {
                __be32 src4 = tuple->ipv4.saddr;
                __be32 dst4 = tuple->ipv4.daddr;

                if (proto == IPPROTO_TCP)
                        sk = __inet_lookup(net, hinfo, NULL, 0,
                                           src4, tuple->ipv4.sport,
                                           dst4, tuple->ipv4.dport,
                                           dif, sdif, &refcounted);
                else
                        sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
                                               dst4, tuple->ipv4.dport,
                                               dif, sdif, net->ipv4.udp_table, NULL);
#if IS_ENABLED(CONFIG_IPV6)
        } else {
                struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
                struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;

                if (proto == IPPROTO_TCP)
                        sk = __inet6_lookup(net, hinfo, NULL, 0,
                                            src6, tuple->ipv6.sport,
                                            dst6, ntohs(tuple->ipv6.dport),
                                            dif, sdif, &refcounted);
                else if (likely(ipv6_bpf_stub))
                        sk = ipv6_bpf_stub->udp6_lib_lookup(net,
                                                            src6, tuple->ipv6.sport,
                                                            dst6, tuple->ipv6.dport,
                                                            dif, sdif,
                                                            net->ipv4.udp_table, NULL);
#endif
        }

        if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
                WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
                sk = NULL;
        }
        return sk;
}

/* bpf_skc_lookup performs the core lookup for different types of sockets,
 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
 */
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
                 u64 flags, int sdif)
{
        struct sock *sk = NULL;
        struct net *net;
        u8 family;

        if (len == sizeof(tuple->ipv4))
                family = AF_INET;
        else if (len == sizeof(tuple->ipv6))
                family = AF_INET6;
        else
                return NULL;

        if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
                goto out;

        if (sdif < 0) {
                if (family == AF_INET)
                        sdif = inet_sdif(skb);
                else
                        sdif = inet6_sdif(skb);
        }

        if ((s32)netns_id < 0) {
                net = caller_net;
                sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
        } else {
                net = get_net_ns_by_id(caller_net, netns_id);
                if (unlikely(!net))
                        goto out;
                sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
                put_net(net);
        }

out:
        return sk;
}

static struct sock *
__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
                struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
                u64 flags, int sdif)
{
        struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
                                           ifindex, proto, netns_id, flags,
                                           sdif);

        if (sk) {
                struct sock *sk2 = sk_to_full_sk(sk);

                /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
                 * sock refcnt is decremented to prevent a request_sock leak.
                 */
                if (!sk_fullsock(sk2))
                        sk2 = NULL;
                if (sk2 != sk) {
                        sock_gen_put(sk);
                        /* Ensure there is no need to bump sk2 refcnt */
                        if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
                                WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
                                return NULL;
                        }
                        sk = sk2;
                }
        }

        return sk;
}

static struct sock *
bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
               u8 proto, u64 netns_id, u64 flags)
{
        struct net *caller_net;
        int ifindex;

        if (skb->dev) {
                caller_net = dev_net(skb->dev);
                ifindex = skb->dev->ifindex;
        } else {
                caller_net = sock_net(skb->sk);
                ifindex = 0;
        }

        return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
                                netns_id, flags, -1);
}

static struct sock *
bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
              u8 proto, u64 netns_id, u64 flags)
{
        struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
                                         flags);

        if (sk) {
                struct sock *sk2 = sk_to_full_sk(sk);

                /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
                 * sock refcnt is decremented to prevent a request_sock leak.
                 */
                if (!sk_fullsock(sk2))
                        sk2 = NULL;
                if (sk2 != sk) {
                        sock_gen_put(sk);
                        /* Ensure there is no need to bump sk2 refcnt */
                        if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
                                WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
                                return NULL;
                        }
                        sk = sk2;
                }
        }

        return sk;
}

BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
                                             netns_id, flags);
}

static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
        .func                = bpf_skc_lookup_tcp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCK_COMMON_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
                                            netns_id, flags);
}

static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
        .func                = bpf_sk_lookup_tcp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
                                            netns_id, flags);
}

static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
        .func                = bpf_sk_lookup_udp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        struct net_device *dev = skb->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
                                               ifindex, IPPROTO_TCP, netns_id,
                                               flags, sdif);
}

static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
        .func                = bpf_tc_skc_lookup_tcp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCK_COMMON_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        struct net_device *dev = skb->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
                                              ifindex, IPPROTO_TCP, netns_id,
                                              flags, sdif);
}

static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
        .func                = bpf_tc_sk_lookup_tcp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        struct net_device *dev = skb->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
                                              ifindex, IPPROTO_UDP, netns_id,
                                              flags, sdif);
}

static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
        .func                = bpf_tc_sk_lookup_udp,
        .gpl_only        = false,
        .pkt_access        = true,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
        if (sk && sk_is_refcounted(sk))
                sock_gen_put(sk);
        return 0;
}

static const struct bpf_func_proto bpf_sk_release_proto = {
        .func                = bpf_sk_release,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
};

BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
        struct net_device *dev = ctx->rxq->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
                                              ifindex, IPPROTO_UDP, netns_id,
                                              flags, sdif);
}

static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
        .func           = bpf_xdp_sk_lookup_udp,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
        struct net_device *dev = ctx->rxq->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
                                               ifindex, IPPROTO_TCP, netns_id,
                                               flags, sdif);
}

static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
        .func           = bpf_xdp_skc_lookup_tcp,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
        struct net_device *dev = ctx->rxq->dev;
        int ifindex = dev->ifindex, sdif = dev_sdif(dev);
        struct net *caller_net = dev_net(dev);

        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
                                              ifindex, IPPROTO_TCP, netns_id,
                                              flags, sdif);
}

static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
        .func           = bpf_xdp_sk_lookup_tcp,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type      = ARG_CONST_SIZE,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
                                               sock_net(ctx->sk), 0,
                                               IPPROTO_TCP, netns_id, flags,
                                               -1);
}

static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
        .func                = bpf_sock_addr_skc_lookup_tcp,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_SOCK_COMMON_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
                                              sock_net(ctx->sk), 0, IPPROTO_TCP,
                                              netns_id, flags, -1);
}

static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
        .func                = bpf_sock_addr_sk_lookup_tcp,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
           struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
        return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
                                              sock_net(ctx->sk), 0, IPPROTO_UDP,
                                              netns_id, flags, -1);
}

static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
        .func                = bpf_sock_addr_sk_lookup_udp,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
        .arg5_type        = ARG_ANYTHING,
};

bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
                                  struct bpf_insn_access_aux *info)
{
        if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
                                          icsk_retransmits))
                return false;

        if (off % size != 0)
                return false;

        switch (off) {
        case offsetof(struct bpf_tcp_sock, bytes_received):
        case offsetof(struct bpf_tcp_sock, bytes_acked):
                return size == sizeof(__u64);
        default:
                return size == sizeof(__u32);
        }
}

u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
                                    const struct bpf_insn *si,
                                    struct bpf_insn *insn_buf,
                                    struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

#define BPF_TCP_SOCK_GET_COMMON(FIELD)                                        \
        do {                                                                \
                BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) >        \
                             sizeof_field(struct bpf_tcp_sock, FIELD));        \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
                                      si->dst_reg, si->src_reg,                \
                                      offsetof(struct tcp_sock, FIELD)); \
        } while (0)

#define BPF_INET_SOCK_GET_COMMON(FIELD)                                        \
        do {                                                                \
                BUILD_BUG_ON(sizeof_field(struct inet_connection_sock,        \
                                          FIELD) >                        \
                             sizeof_field(struct bpf_tcp_sock, FIELD));        \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                        \
                                        struct inet_connection_sock,        \
                                        FIELD),                                \
                                      si->dst_reg, si->src_reg,                \
                                      offsetof(                                \
                                        struct inet_connection_sock,        \
                                        FIELD));                        \
        } while (0)

        BTF_TYPE_EMIT(struct bpf_tcp_sock);

        switch (si->off) {
        case offsetof(struct bpf_tcp_sock, rtt_min):
                BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
                             sizeof(struct minmax));
                BUILD_BUG_ON(sizeof(struct minmax) <
                             sizeof(struct minmax_sample));

                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct tcp_sock, rtt_min) +
                                      offsetof(struct minmax_sample, v));
                break;
        case offsetof(struct bpf_tcp_sock, snd_cwnd):
                BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
                break;
        case offsetof(struct bpf_tcp_sock, srtt_us):
                BPF_TCP_SOCK_GET_COMMON(srtt_us);
                break;
        case offsetof(struct bpf_tcp_sock, snd_ssthresh):
                BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
                break;
        case offsetof(struct bpf_tcp_sock, rcv_nxt):
                BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
                break;
        case offsetof(struct bpf_tcp_sock, snd_nxt):
                BPF_TCP_SOCK_GET_COMMON(snd_nxt);
                break;
        case offsetof(struct bpf_tcp_sock, snd_una):
                BPF_TCP_SOCK_GET_COMMON(snd_una);
                break;
        case offsetof(struct bpf_tcp_sock, mss_cache):
                BPF_TCP_SOCK_GET_COMMON(mss_cache);
                break;
        case offsetof(struct bpf_tcp_sock, ecn_flags):
                BPF_TCP_SOCK_GET_COMMON(ecn_flags);
                break;
        case offsetof(struct bpf_tcp_sock, rate_delivered):
                BPF_TCP_SOCK_GET_COMMON(rate_delivered);
                break;
        case offsetof(struct bpf_tcp_sock, rate_interval_us):
                BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
                break;
        case offsetof(struct bpf_tcp_sock, packets_out):
                BPF_TCP_SOCK_GET_COMMON(packets_out);
                break;
        case offsetof(struct bpf_tcp_sock, retrans_out):
                BPF_TCP_SOCK_GET_COMMON(retrans_out);
                break;
        case offsetof(struct bpf_tcp_sock, total_retrans):
                BPF_TCP_SOCK_GET_COMMON(total_retrans);
                break;
        case offsetof(struct bpf_tcp_sock, segs_in):
                BPF_TCP_SOCK_GET_COMMON(segs_in);
                break;
        case offsetof(struct bpf_tcp_sock, data_segs_in):
                BPF_TCP_SOCK_GET_COMMON(data_segs_in);
                break;
        case offsetof(struct bpf_tcp_sock, segs_out):
                BPF_TCP_SOCK_GET_COMMON(segs_out);
                break;
        case offsetof(struct bpf_tcp_sock, data_segs_out):
                BPF_TCP_SOCK_GET_COMMON(data_segs_out);
                break;
        case offsetof(struct bpf_tcp_sock, lost_out):
                BPF_TCP_SOCK_GET_COMMON(lost_out);
                break;
        case offsetof(struct bpf_tcp_sock, sacked_out):
                BPF_TCP_SOCK_GET_COMMON(sacked_out);
                break;
        case offsetof(struct bpf_tcp_sock, bytes_received):
                BPF_TCP_SOCK_GET_COMMON(bytes_received);
                break;
        case offsetof(struct bpf_tcp_sock, bytes_acked):
                BPF_TCP_SOCK_GET_COMMON(bytes_acked);
                break;
        case offsetof(struct bpf_tcp_sock, dsack_dups):
                BPF_TCP_SOCK_GET_COMMON(dsack_dups);
                break;
        case offsetof(struct bpf_tcp_sock, delivered):
                BPF_TCP_SOCK_GET_COMMON(delivered);
                break;
        case offsetof(struct bpf_tcp_sock, delivered_ce):
                BPF_TCP_SOCK_GET_COMMON(delivered_ce);
                break;
        case offsetof(struct bpf_tcp_sock, icsk_retransmits):
                BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
                break;
        }

        return insn - insn_buf;
}

BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
        if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_tcp_sock_proto = {
        .func                = bpf_tcp_sock,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_TCP_SOCK_OR_NULL,
        .arg1_type        = ARG_PTR_TO_SOCK_COMMON,
};

BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
        sk = sk_to_full_sk(sk);

        if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

static const struct bpf_func_proto bpf_get_listener_sock_proto = {
        .func                = bpf_get_listener_sock,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_SOCKET_OR_NULL,
        .arg1_type        = ARG_PTR_TO_SOCK_COMMON,
};

BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
        unsigned int iphdr_len;

        switch (skb_protocol(skb, true)) {
        case cpu_to_be16(ETH_P_IP):
                iphdr_len = sizeof(struct iphdr);
                break;
        case cpu_to_be16(ETH_P_IPV6):
                iphdr_len = sizeof(struct ipv6hdr);
                break;
        default:
                return 0;
        }

        if (skb_headlen(skb) < iphdr_len)
                return 0;

        if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
                return 0;

        return INET_ECN_set_ce(skb);
}

bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
                                  struct bpf_insn_access_aux *info)
{
        if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
                return false;

        if (off % size != 0)
                return false;

        switch (off) {
        default:
                return size == sizeof(__u32);
        }
}

u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
                                    const struct bpf_insn *si,
                                    struct bpf_insn *insn_buf,
                                    struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

#define BPF_XDP_SOCK_GET(FIELD)                                                \
        do {                                                                \
                BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) >        \
                             sizeof_field(struct bpf_xdp_sock, FIELD));        \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
                                      si->dst_reg, si->src_reg,                \
                                      offsetof(struct xdp_sock, FIELD)); \
        } while (0)

        switch (si->off) {
        case offsetof(struct bpf_xdp_sock, queue_id):
                BPF_XDP_SOCK_GET(queue_id);
                break;
        }

        return insn - insn_buf;
}

static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
        .func           = bpf_skb_ecn_set_ce,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
};

BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
           struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
        int ret;

        if (unlikely(!sk || th_len < sizeof(*th)))
                return -EINVAL;

        /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
                return -EINVAL;

        if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
                return -EINVAL;

        if (!th->ack || th->rst || th->syn)
                return -ENOENT;

        if (unlikely(iph_len < sizeof(struct iphdr)))
                return -EINVAL;

        if (tcp_synq_no_recent_overflow(sk))
                return -ENOENT;

        /* Both struct iphdr and struct ipv6hdr have the version field at the
         * same offset so we can cast to the shorter header (struct iphdr).
         */
        switch (((struct iphdr *)iph)->version) {
        case 4:
                if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
                        return -EINVAL;

                ret = __cookie_v4_check((struct iphdr *)iph, th);
                break;

#if IS_BUILTIN(CONFIG_IPV6)
        case 6:
                if (unlikely(iph_len < sizeof(struct ipv6hdr)))
                        return -EINVAL;

                if (sk->sk_family != AF_INET6)
                        return -EINVAL;

                ret = __cookie_v6_check((struct ipv6hdr *)iph, th);
                break;
#endif /* CONFIG_IPV6 */

        default:
                return -EPROTONOSUPPORT;
        }

        if (ret > 0)
                return 0;

        return -ENOENT;
#else
        return -ENOTSUPP;
#endif
}

static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
        .func                = bpf_tcp_check_syncookie,
        .gpl_only        = true,
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
           struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
        u32 cookie;
        u16 mss;

        if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
                return -EINVAL;

        if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
                return -EINVAL;

        if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
                return -ENOENT;

        if (!th->syn || th->ack || th->fin || th->rst)
                return -EINVAL;

        if (unlikely(iph_len < sizeof(struct iphdr)))
                return -EINVAL;

        /* Both struct iphdr and struct ipv6hdr have the version field at the
         * same offset so we can cast to the shorter header (struct iphdr).
         */
        switch (((struct iphdr *)iph)->version) {
        case 4:
                if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
                        return -EINVAL;

                mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
                break;

#if IS_BUILTIN(CONFIG_IPV6)
        case 6:
                if (unlikely(iph_len < sizeof(struct ipv6hdr)))
                        return -EINVAL;

                if (sk->sk_family != AF_INET6)
                        return -EINVAL;

                mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
                break;
#endif /* CONFIG_IPV6 */

        default:
                return -EPROTONOSUPPORT;
        }
        if (mss == 0)
                return -ENOENT;

        return cookie | ((u64)mss << 32);
#else
        return -EOPNOTSUPP;
#endif /* CONFIG_SYN_COOKIES */
}

static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
        .func                = bpf_tcp_gen_syncookie,
        .gpl_only        = true, /* __cookie_v*_init_sequence() is GPL */
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type        = ARG_CONST_SIZE,
};

BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
{
        if (!sk || flags != 0)
                return -EINVAL;
        if (!skb_at_tc_ingress(skb))
                return -EOPNOTSUPP;
        if (unlikely(dev_net(skb->dev) != sock_net(sk)))
                return -ENETUNREACH;
        if (sk_unhashed(sk))
                return -EOPNOTSUPP;
        if (sk_is_refcounted(sk) &&
            unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
                return -ENOENT;

        skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sock_pfree;

        return 0;
}

static const struct bpf_func_proto bpf_sk_assign_proto = {
        .func                = bpf_sk_assign,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .arg3_type        = ARG_ANYTHING,
};

static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
                                    u8 search_kind, const u8 *magic,
                                    u8 magic_len, bool *eol)
{
        u8 kind, kind_len;

        *eol = false;

        while (op < opend) {
                kind = op[0];

                if (kind == TCPOPT_EOL) {
                        *eol = true;
                        return ERR_PTR(-ENOMSG);
                } else if (kind == TCPOPT_NOP) {
                        op++;
                        continue;
                }

                if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
                        /* Something is wrong in the received header.
                         * Follow the TCP stack's tcp_parse_options()
                         * and just bail here.
                         */
                        return ERR_PTR(-EFAULT);

                kind_len = op[1];
                if (search_kind == kind) {
                        if (!magic_len)
                                return op;

                        if (magic_len > kind_len - 2)
                                return ERR_PTR(-ENOMSG);

                        if (!memcmp(&op[2], magic, magic_len))
                                return op;
                }

                op += kind_len;
        }

        return ERR_PTR(-ENOMSG);
}

BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
           void *, search_res, u32, len, u64, flags)
{
        bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
        const u8 *op, *opend, *magic, *search = search_res;
        u8 search_kind, search_len, copy_len, magic_len;
        int ret;

        /* 2 byte is the minimal option len except TCPOPT_NOP and
         * TCPOPT_EOL which are useless for the bpf prog to learn
         * and this helper disallow loading them also.
         */
        if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
                return -EINVAL;

        search_kind = search[0];
        search_len = search[1];

        if (search_len > len || search_kind == TCPOPT_NOP ||
            search_kind == TCPOPT_EOL)
                return -EINVAL;

        if (search_kind == TCPOPT_EXP || search_kind == 253) {
                /* 16 or 32 bit magic.  +2 for kind and kind length */
                if (search_len != 4 && search_len != 6)
                        return -EINVAL;
                magic = &search[2];
                magic_len = search_len - 2;
        } else {
                if (search_len)
                        return -EINVAL;
                magic = NULL;
                magic_len = 0;
        }

        if (load_syn) {
                ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
                if (ret < 0)
                        return ret;

                opend = op + ret;
                op += sizeof(struct tcphdr);
        } else {
                if (!bpf_sock->skb ||
                    bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
                        /* This bpf_sock->op cannot call this helper */
                        return -EPERM;

                opend = bpf_sock->skb_data_end;
                op = bpf_sock->skb->data + sizeof(struct tcphdr);
        }

        op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
                                &eol);
        if (IS_ERR(op))
                return PTR_ERR(op);

        copy_len = op[1];
        ret = copy_len;
        if (copy_len > len) {
                ret = -ENOSPC;
                copy_len = len;
        }

        memcpy(search_res, op, copy_len);
        return ret;
}

static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
        .func                = bpf_sock_ops_load_hdr_opt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
           const void *, from, u32, len, u64, flags)
{
        u8 new_kind, new_kind_len, magic_len = 0, *opend;
        const u8 *op, *new_op, *magic = NULL;
        struct sk_buff *skb;
        bool eol;

        if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
                return -EPERM;

        if (len < 2 || flags)
                return -EINVAL;

        new_op = from;
        new_kind = new_op[0];
        new_kind_len = new_op[1];

        if (new_kind_len > len || new_kind == TCPOPT_NOP ||
            new_kind == TCPOPT_EOL)
                return -EINVAL;

        if (new_kind_len > bpf_sock->remaining_opt_len)
                return -ENOSPC;

        /* 253 is another experimental kind */
        if (new_kind == TCPOPT_EXP || new_kind == 253)  {
                if (new_kind_len < 4)
                        return -EINVAL;
                /* Match for the 2 byte magic also.
                 * RFC 6994: the magic could be 2 or 4 bytes.
                 * Hence, matching by 2 byte only is on the
                 * conservative side but it is the right
                 * thing to do for the 'search-for-duplication'
                 * purpose.
                 */
                magic = &new_op[2];
                magic_len = 2;
        }

        /* Check for duplication */
        skb = bpf_sock->skb;
        op = skb->data + sizeof(struct tcphdr);
        opend = bpf_sock->skb_data_end;

        op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
                                &eol);
        if (!IS_ERR(op))
                return -EEXIST;

        if (PTR_ERR(op) != -ENOMSG)
                return PTR_ERR(op);

        if (eol)
                /* The option has been ended.  Treat it as no more
                 * header option can be written.
                 */
                return -ENOSPC;

        /* No duplication found.  Store the header option. */
        memcpy(opend, from, new_kind_len);

        bpf_sock->remaining_opt_len -= new_kind_len;
        bpf_sock->skb_data_end += new_kind_len;

        return 0;
}

static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
        .func                = bpf_sock_ops_store_hdr_opt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg3_type        = ARG_CONST_SIZE,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
           u32, len, u64, flags)
{
        if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
                return -EPERM;

        if (flags || len < 2)
                return -EINVAL;

        if (len > bpf_sock->remaining_opt_len)
                return -ENOSPC;

        bpf_sock->remaining_opt_len -= len;

        return 0;
}

static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
        .func                = bpf_sock_ops_reserve_hdr_opt,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_ANYTHING,
};

BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
           u64, tstamp, u32, tstamp_type)
{
        /* skb_clear_delivery_time() is done for inet protocol */
        if (skb->protocol != htons(ETH_P_IP) &&
            skb->protocol != htons(ETH_P_IPV6))
                return -EOPNOTSUPP;

        switch (tstamp_type) {
        case BPF_SKB_TSTAMP_DELIVERY_MONO:
                if (!tstamp)
                        return -EINVAL;
                skb->tstamp = tstamp;
                skb->mono_delivery_time = 1;
                break;
        case BPF_SKB_TSTAMP_UNSPEC:
                if (tstamp)
                        return -EINVAL;
                skb->tstamp = 0;
                skb->mono_delivery_time = 0;
                break;
        default:
                return -EINVAL;
        }

        return 0;
}

static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
        .func           = bpf_skb_set_tstamp,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
};

#ifdef CONFIG_SYN_COOKIES
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
           struct tcphdr *, th, u32, th_len)
{
        u32 cookie;
        u16 mss;

        if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
                return -EINVAL;

        mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
        cookie = __cookie_v4_init_sequence(iph, th, &mss);

        return cookie | ((u64)mss << 32);
}

static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
        .func                = bpf_tcp_raw_gen_syncookie_ipv4,
        .gpl_only        = true, /* __cookie_v4_init_sequence() is GPL */
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg1_size        = sizeof(struct iphdr),
        .arg2_type        = ARG_PTR_TO_MEM,
        .arg3_type        = ARG_CONST_SIZE_OR_ZERO,
};

BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
           struct tcphdr *, th, u32, th_len)
{
#if IS_BUILTIN(CONFIG_IPV6)
        const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
                sizeof(struct ipv6hdr);
        u32 cookie;
        u16 mss;

        if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
                return -EINVAL;

        mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
        cookie = __cookie_v6_init_sequence(iph, th, &mss);

        return cookie | ((u64)mss << 32);
#else
        return -EPROTONOSUPPORT;
#endif
}

static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
        .func                = bpf_tcp_raw_gen_syncookie_ipv6,
        .gpl_only        = true, /* __cookie_v6_init_sequence() is GPL */
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg1_size        = sizeof(struct ipv6hdr),
        .arg2_type        = ARG_PTR_TO_MEM,
        .arg3_type        = ARG_CONST_SIZE_OR_ZERO,
};

BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
           struct tcphdr *, th)
{
        if (__cookie_v4_check(iph, th) > 0)
                return 0;

        return -EACCES;
}

static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
        .func                = bpf_tcp_raw_check_syncookie_ipv4,
        .gpl_only        = true, /* __cookie_v4_check is GPL */
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg1_size        = sizeof(struct iphdr),
        .arg2_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg2_size        = sizeof(struct tcphdr),
};

BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
           struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
        if (__cookie_v6_check(iph, th) > 0)
                return 0;

        return -EACCES;
#else
        return -EPROTONOSUPPORT;
#endif
}

static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
        .func                = bpf_tcp_raw_check_syncookie_ipv6,
        .gpl_only        = true, /* __cookie_v6_check is GPL */
        .pkt_access        = true,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg1_size        = sizeof(struct ipv6hdr),
        .arg2_type        = ARG_PTR_TO_FIXED_SIZE_MEM,
        .arg2_size        = sizeof(struct tcphdr),
};
#endif /* CONFIG_SYN_COOKIES */

#endif /* CONFIG_INET */

bool bpf_helper_changes_pkt_data(void *func)
{
        if (func == bpf_skb_vlan_push ||
            func == bpf_skb_vlan_pop ||
            func == bpf_skb_store_bytes ||
            func == bpf_skb_change_proto ||
            func == bpf_skb_change_head ||
            func == sk_skb_change_head ||
            func == bpf_skb_change_tail ||
            func == sk_skb_change_tail ||
            func == bpf_skb_adjust_room ||
            func == sk_skb_adjust_room ||
            func == bpf_skb_pull_data ||
            func == sk_skb_pull_data ||
            func == bpf_clone_redirect ||
            func == bpf_l3_csum_replace ||
            func == bpf_l4_csum_replace ||
            func == bpf_xdp_adjust_head ||
            func == bpf_xdp_adjust_meta ||
            func == bpf_msg_pull_data ||
            func == bpf_msg_push_data ||
            func == bpf_msg_pop_data ||
            func == bpf_xdp_adjust_tail ||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
            func == bpf_lwt_seg6_store_bytes ||
            func == bpf_lwt_seg6_adjust_srh ||
            func == bpf_lwt_seg6_action ||
#endif
#ifdef CONFIG_INET
            func == bpf_sock_ops_store_hdr_opt ||
#endif
            func == bpf_lwt_in_push_encap ||
            func == bpf_lwt_xmit_push_encap)
                return true;

        return false;
}

const struct bpf_func_proto bpf_event_output_data_proto __weak;
const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;

static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        const struct bpf_func_proto *func_proto;

        func_proto = cgroup_common_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        func_proto = cgroup_current_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        switch (func_id) {
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_sock_proto;
        case BPF_FUNC_get_netns_cookie:
                return &bpf_get_netns_cookie_sock_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_cg_sock_proto;
        case BPF_FUNC_ktime_get_coarse_ns:
                return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        const struct bpf_func_proto *func_proto;

        func_proto = cgroup_common_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        func_proto = cgroup_current_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        switch (func_id) {
        case BPF_FUNC_bind:
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_INET4_CONNECT:
                case BPF_CGROUP_INET6_CONNECT:
                        return &bpf_bind_proto;
                default:
                        return NULL;
                }
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_sock_addr_proto;
        case BPF_FUNC_get_netns_cookie:
                return &bpf_get_netns_cookie_sock_addr_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
#ifdef CONFIG_INET
        case BPF_FUNC_sk_lookup_tcp:
                return &bpf_sock_addr_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_lookup_udp:
                return &bpf_sock_addr_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        case BPF_FUNC_skc_lookup_tcp:
                return &bpf_sock_addr_skc_lookup_tcp_proto;
#endif /* CONFIG_INET */
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
        case BPF_FUNC_setsockopt:
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_INET4_BIND:
                case BPF_CGROUP_INET6_BIND:
                case BPF_CGROUP_INET4_CONNECT:
                case BPF_CGROUP_INET6_CONNECT:
                case BPF_CGROUP_UNIX_CONNECT:
                case BPF_CGROUP_UDP4_RECVMSG:
                case BPF_CGROUP_UDP6_RECVMSG:
                case BPF_CGROUP_UNIX_RECVMSG:
                case BPF_CGROUP_UDP4_SENDMSG:
                case BPF_CGROUP_UDP6_SENDMSG:
                case BPF_CGROUP_UNIX_SENDMSG:
                case BPF_CGROUP_INET4_GETPEERNAME:
                case BPF_CGROUP_INET6_GETPEERNAME:
                case BPF_CGROUP_UNIX_GETPEERNAME:
                case BPF_CGROUP_INET4_GETSOCKNAME:
                case BPF_CGROUP_INET6_GETSOCKNAME:
                case BPF_CGROUP_UNIX_GETSOCKNAME:
                        return &bpf_sock_addr_setsockopt_proto;
                default:
                        return NULL;
                }
        case BPF_FUNC_getsockopt:
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_INET4_BIND:
                case BPF_CGROUP_INET6_BIND:
                case BPF_CGROUP_INET4_CONNECT:
                case BPF_CGROUP_INET6_CONNECT:
                case BPF_CGROUP_UNIX_CONNECT:
                case BPF_CGROUP_UDP4_RECVMSG:
                case BPF_CGROUP_UDP6_RECVMSG:
                case BPF_CGROUP_UNIX_RECVMSG:
                case BPF_CGROUP_UDP4_SENDMSG:
                case BPF_CGROUP_UDP6_SENDMSG:
                case BPF_CGROUP_UNIX_SENDMSG:
                case BPF_CGROUP_INET4_GETPEERNAME:
                case BPF_CGROUP_INET6_GETPEERNAME:
                case BPF_CGROUP_UNIX_GETPEERNAME:
                case BPF_CGROUP_INET4_GETSOCKNAME:
                case BPF_CGROUP_INET6_GETSOCKNAME:
                case BPF_CGROUP_UNIX_GETSOCKNAME:
                        return &bpf_sock_addr_getsockopt_proto;
                default:
                        return NULL;
                }
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
        case BPF_FUNC_skb_load_bytes_relative:
                return &bpf_skb_load_bytes_relative_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
                return &bpf_get_socket_uid_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;

static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        const struct bpf_func_proto *func_proto;

        func_proto = cgroup_common_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        switch (func_id) {
        case BPF_FUNC_sk_fullsock:
                return &bpf_sk_fullsock_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
        case BPF_FUNC_skb_cgroup_id:
                return &bpf_skb_cgroup_id_proto;
        case BPF_FUNC_skb_ancestor_cgroup_id:
                return &bpf_skb_ancestor_cgroup_id_proto;
        case BPF_FUNC_sk_cgroup_id:
                return &bpf_sk_cgroup_id_proto;
        case BPF_FUNC_sk_ancestor_cgroup_id:
                return &bpf_sk_ancestor_cgroup_id_proto;
#endif
#ifdef CONFIG_INET
        case BPF_FUNC_sk_lookup_tcp:
                return &bpf_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_lookup_udp:
                return &bpf_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        case BPF_FUNC_skc_lookup_tcp:
                return &bpf_skc_lookup_tcp_proto;
        case BPF_FUNC_tcp_sock:
                return &bpf_tcp_sock_proto;
        case BPF_FUNC_get_listener_sock:
                return &bpf_get_listener_sock_proto;
        case BPF_FUNC_skb_ecn_set_ce:
                return &bpf_skb_ecn_set_ce_proto;
#endif
        default:
                return sk_filter_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_store_bytes:
                return &bpf_skb_store_bytes_proto;
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
        case BPF_FUNC_skb_load_bytes_relative:
                return &bpf_skb_load_bytes_relative_proto;
        case BPF_FUNC_skb_pull_data:
                return &bpf_skb_pull_data_proto;
        case BPF_FUNC_csum_diff:
                return &bpf_csum_diff_proto;
        case BPF_FUNC_csum_update:
                return &bpf_csum_update_proto;
        case BPF_FUNC_csum_level:
                return &bpf_csum_level_proto;
        case BPF_FUNC_l3_csum_replace:
                return &bpf_l3_csum_replace_proto;
        case BPF_FUNC_l4_csum_replace:
                return &bpf_l4_csum_replace_proto;
        case BPF_FUNC_clone_redirect:
                return &bpf_clone_redirect_proto;
        case BPF_FUNC_get_cgroup_classid:
                return &bpf_get_cgroup_classid_proto;
        case BPF_FUNC_skb_vlan_push:
                return &bpf_skb_vlan_push_proto;
        case BPF_FUNC_skb_vlan_pop:
                return &bpf_skb_vlan_pop_proto;
        case BPF_FUNC_skb_change_proto:
                return &bpf_skb_change_proto_proto;
        case BPF_FUNC_skb_change_type:
                return &bpf_skb_change_type_proto;
        case BPF_FUNC_skb_adjust_room:
                return &bpf_skb_adjust_room_proto;
        case BPF_FUNC_skb_change_tail:
                return &bpf_skb_change_tail_proto;
        case BPF_FUNC_skb_change_head:
                return &bpf_skb_change_head_proto;
        case BPF_FUNC_skb_get_tunnel_key:
                return &bpf_skb_get_tunnel_key_proto;
        case BPF_FUNC_skb_set_tunnel_key:
                return bpf_get_skb_set_tunnel_proto(func_id);
        case BPF_FUNC_skb_get_tunnel_opt:
                return &bpf_skb_get_tunnel_opt_proto;
        case BPF_FUNC_skb_set_tunnel_opt:
                return bpf_get_skb_set_tunnel_proto(func_id);
        case BPF_FUNC_redirect:
                return &bpf_redirect_proto;
        case BPF_FUNC_redirect_neigh:
                return &bpf_redirect_neigh_proto;
        case BPF_FUNC_redirect_peer:
                return &bpf_redirect_peer_proto;
        case BPF_FUNC_get_route_realm:
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_get_hash_recalc:
                return &bpf_get_hash_recalc_proto;
        case BPF_FUNC_set_hash_invalid:
                return &bpf_set_hash_invalid_proto;
        case BPF_FUNC_set_hash:
                return &bpf_set_hash_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_skb_under_cgroup:
                return &bpf_skb_under_cgroup_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
                return &bpf_get_socket_uid_proto;
        case BPF_FUNC_fib_lookup:
                return &bpf_skb_fib_lookup_proto;
        case BPF_FUNC_check_mtu:
                return &bpf_skb_check_mtu_proto;
        case BPF_FUNC_sk_fullsock:
                return &bpf_sk_fullsock_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_XFRM
        case BPF_FUNC_skb_get_xfrm_state:
                return &bpf_skb_get_xfrm_state_proto;
#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
        case BPF_FUNC_skb_cgroup_classid:
                return &bpf_skb_cgroup_classid_proto;
#endif
#ifdef CONFIG_SOCK_CGROUP_DATA
        case BPF_FUNC_skb_cgroup_id:
                return &bpf_skb_cgroup_id_proto;
        case BPF_FUNC_skb_ancestor_cgroup_id:
                return &bpf_skb_ancestor_cgroup_id_proto;
#endif
#ifdef CONFIG_INET
        case BPF_FUNC_sk_lookup_tcp:
                return &bpf_tc_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_lookup_udp:
                return &bpf_tc_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        case BPF_FUNC_tcp_sock:
                return &bpf_tcp_sock_proto;
        case BPF_FUNC_get_listener_sock:
                return &bpf_get_listener_sock_proto;
        case BPF_FUNC_skc_lookup_tcp:
                return &bpf_tc_skc_lookup_tcp_proto;
        case BPF_FUNC_tcp_check_syncookie:
                return &bpf_tcp_check_syncookie_proto;
        case BPF_FUNC_skb_ecn_set_ce:
                return &bpf_skb_ecn_set_ce_proto;
        case BPF_FUNC_tcp_gen_syncookie:
                return &bpf_tcp_gen_syncookie_proto;
        case BPF_FUNC_sk_assign:
                return &bpf_sk_assign_proto;
        case BPF_FUNC_skb_set_tstamp:
                return &bpf_skb_set_tstamp_proto;
#ifdef CONFIG_SYN_COOKIES
        case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
                return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
        case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
                return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
        case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
                return &bpf_tcp_raw_check_syncookie_ipv4_proto;
        case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
                return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_perf_event_output:
                return &bpf_xdp_event_output_proto;
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_csum_diff:
                return &bpf_csum_diff_proto;
        case BPF_FUNC_xdp_adjust_head:
                return &bpf_xdp_adjust_head_proto;
        case BPF_FUNC_xdp_adjust_meta:
                return &bpf_xdp_adjust_meta_proto;
        case BPF_FUNC_redirect:
                return &bpf_xdp_redirect_proto;
        case BPF_FUNC_redirect_map:
                return &bpf_xdp_redirect_map_proto;
        case BPF_FUNC_xdp_adjust_tail:
                return &bpf_xdp_adjust_tail_proto;
        case BPF_FUNC_xdp_get_buff_len:
                return &bpf_xdp_get_buff_len_proto;
        case BPF_FUNC_xdp_load_bytes:
                return &bpf_xdp_load_bytes_proto;
        case BPF_FUNC_xdp_store_bytes:
                return &bpf_xdp_store_bytes_proto;
        case BPF_FUNC_fib_lookup:
                return &bpf_xdp_fib_lookup_proto;
        case BPF_FUNC_check_mtu:
                return &bpf_xdp_check_mtu_proto;
#ifdef CONFIG_INET
        case BPF_FUNC_sk_lookup_udp:
                return &bpf_xdp_sk_lookup_udp_proto;
        case BPF_FUNC_sk_lookup_tcp:
                return &bpf_xdp_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        case BPF_FUNC_skc_lookup_tcp:
                return &bpf_xdp_skc_lookup_tcp_proto;
        case BPF_FUNC_tcp_check_syncookie:
                return &bpf_tcp_check_syncookie_proto;
        case BPF_FUNC_tcp_gen_syncookie:
                return &bpf_tcp_gen_syncookie_proto;
#ifdef CONFIG_SYN_COOKIES
        case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
                return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
        case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
                return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
        case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
                return &bpf_tcp_raw_check_syncookie_ipv4_proto;
        case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
                return &bpf_tcp_raw_check_syncookie_ipv6_proto;
#endif
#endif
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }

#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
        /* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
         * kfuncs are defined in two different modules, and we want to be able
         * to use them interchangeably with the same BTF type ID. Because modules
         * can't de-duplicate BTF IDs between each other, we need the type to be
         * referenced in the vmlinux BTF or the verifier will get confused about
         * the different types. So we add this dummy type reference which will
         * be included in vmlinux BTF, allowing both modules to refer to the
         * same type ID.
         */
        BTF_TYPE_EMIT(struct nf_conn___init);
#endif
}

const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;

static const struct bpf_func_proto *
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        const struct bpf_func_proto *func_proto;

        func_proto = cgroup_common_func_proto(func_id, prog);
        if (func_proto)
                return func_proto;

        switch (func_id) {
        case BPF_FUNC_setsockopt:
                return &bpf_sock_ops_setsockopt_proto;
        case BPF_FUNC_getsockopt:
                return &bpf_sock_ops_getsockopt_proto;
        case BPF_FUNC_sock_ops_cb_flags_set:
                return &bpf_sock_ops_cb_flags_set_proto;
        case BPF_FUNC_sock_map_update:
                return &bpf_sock_map_update_proto;
        case BPF_FUNC_sock_hash_update:
                return &bpf_sock_hash_update_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_sock_ops_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
        case BPF_FUNC_get_netns_cookie:
                return &bpf_get_netns_cookie_sock_ops_proto;
#ifdef CONFIG_INET
        case BPF_FUNC_load_hdr_opt:
                return &bpf_sock_ops_load_hdr_opt_proto;
        case BPF_FUNC_store_hdr_opt:
                return &bpf_sock_ops_store_hdr_opt_proto;
        case BPF_FUNC_reserve_hdr_opt:
                return &bpf_sock_ops_reserve_hdr_opt_proto;
        case BPF_FUNC_tcp_sock:
                return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;

static const struct bpf_func_proto *
sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_msg_redirect_map:
                return &bpf_msg_redirect_map_proto;
        case BPF_FUNC_msg_redirect_hash:
                return &bpf_msg_redirect_hash_proto;
        case BPF_FUNC_msg_apply_bytes:
                return &bpf_msg_apply_bytes_proto;
        case BPF_FUNC_msg_cork_bytes:
                return &bpf_msg_cork_bytes_proto;
        case BPF_FUNC_msg_pull_data:
                return &bpf_msg_pull_data_proto;
        case BPF_FUNC_msg_push_data:
                return &bpf_msg_push_data_proto;
        case BPF_FUNC_msg_pop_data:
                return &bpf_msg_pop_data_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
        case BPF_FUNC_get_current_uid_gid:
                return &bpf_get_current_uid_gid_proto;
        case BPF_FUNC_get_current_pid_tgid:
                return &bpf_get_current_pid_tgid_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
        case BPF_FUNC_get_netns_cookie:
                return &bpf_get_netns_cookie_sk_msg_proto;
#ifdef CONFIG_CGROUP_NET_CLASSID
        case BPF_FUNC_get_cgroup_classid:
                return &bpf_get_cgroup_classid_curr_proto;
#endif
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;

static const struct bpf_func_proto *
sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_store_bytes:
                return &bpf_skb_store_bytes_proto;
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
        case BPF_FUNC_skb_pull_data:
                return &sk_skb_pull_data_proto;
        case BPF_FUNC_skb_change_tail:
                return &sk_skb_change_tail_proto;
        case BPF_FUNC_skb_change_head:
                return &sk_skb_change_head_proto;
        case BPF_FUNC_skb_adjust_room:
                return &sk_skb_adjust_room_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
                return &bpf_get_socket_uid_proto;
        case BPF_FUNC_sk_redirect_map:
                return &bpf_sk_redirect_map_proto;
        case BPF_FUNC_sk_redirect_hash:
                return &bpf_sk_redirect_hash_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
#ifdef CONFIG_INET
        case BPF_FUNC_sk_lookup_tcp:
                return &bpf_sk_lookup_tcp_proto;
        case BPF_FUNC_sk_lookup_udp:
                return &bpf_sk_lookup_udp_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        case BPF_FUNC_skc_lookup_tcp:
                return &bpf_skc_lookup_tcp_proto;
#endif
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_load_bytes:
                return &bpf_flow_dissector_load_bytes_proto;
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_load_bytes:
                return &bpf_skb_load_bytes_proto;
        case BPF_FUNC_skb_pull_data:
                return &bpf_skb_pull_data_proto;
        case BPF_FUNC_csum_diff:
                return &bpf_csum_diff_proto;
        case BPF_FUNC_get_cgroup_classid:
                return &bpf_get_cgroup_classid_proto;
        case BPF_FUNC_get_route_realm:
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_get_hash_recalc:
                return &bpf_get_hash_recalc_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_skb_under_cgroup:
                return &bpf_skb_under_cgroup_proto;
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_lwt_push_encap:
                return &bpf_lwt_in_push_encap_proto;
        default:
                return lwt_out_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_skb_get_tunnel_key:
                return &bpf_skb_get_tunnel_key_proto;
        case BPF_FUNC_skb_set_tunnel_key:
                return bpf_get_skb_set_tunnel_proto(func_id);
        case BPF_FUNC_skb_get_tunnel_opt:
                return &bpf_skb_get_tunnel_opt_proto;
        case BPF_FUNC_skb_set_tunnel_opt:
                return bpf_get_skb_set_tunnel_proto(func_id);
        case BPF_FUNC_redirect:
                return &bpf_redirect_proto;
        case BPF_FUNC_clone_redirect:
                return &bpf_clone_redirect_proto;
        case BPF_FUNC_skb_change_tail:
                return &bpf_skb_change_tail_proto;
        case BPF_FUNC_skb_change_head:
                return &bpf_skb_change_head_proto;
        case BPF_FUNC_skb_store_bytes:
                return &bpf_skb_store_bytes_proto;
        case BPF_FUNC_csum_update:
                return &bpf_csum_update_proto;
        case BPF_FUNC_csum_level:
                return &bpf_csum_level_proto;
        case BPF_FUNC_l3_csum_replace:
                return &bpf_l3_csum_replace_proto;
        case BPF_FUNC_l4_csum_replace:
                return &bpf_l4_csum_replace_proto;
        case BPF_FUNC_set_hash_invalid:
                return &bpf_set_hash_invalid_proto;
        case BPF_FUNC_lwt_push_encap:
                return &bpf_lwt_xmit_push_encap_proto;
        default:
                return lwt_out_func_proto(func_id, prog);
        }
}

static const struct bpf_func_proto *
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
        case BPF_FUNC_lwt_seg6_store_bytes:
                return &bpf_lwt_seg6_store_bytes_proto;
        case BPF_FUNC_lwt_seg6_action:
                return &bpf_lwt_seg6_action_proto;
        case BPF_FUNC_lwt_seg6_adjust_srh:
                return &bpf_lwt_seg6_adjust_srh_proto;
#endif
        default:
                return lwt_out_func_proto(func_id, prog);
        }
}

static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
                                    const struct bpf_prog *prog,
                                    struct bpf_insn_access_aux *info)
{
        const int size_default = sizeof(__u32);

        if (off < 0 || off >= sizeof(struct __sk_buff))
                return false;

        /* The verifier guarantees that size > 0. */
        if (off % size != 0)
                return false;

        switch (off) {
        case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                if (off + size > offsetofend(struct __sk_buff, cb[4]))
                        return false;
                break;
        case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
        case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
        case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
        case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
        case bpf_ctx_range(struct __sk_buff, data):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, data_end):
                if (size != size_default)
                        return false;
                break;
        case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                return false;
        case bpf_ctx_range(struct __sk_buff, hwtstamp):
                if (type == BPF_WRITE || size != sizeof(__u64))
                        return false;
                break;
        case bpf_ctx_range(struct __sk_buff, tstamp):
                if (size != sizeof(__u64))
                        return false;
                break;
        case offsetof(struct __sk_buff, sk):
                if (type == BPF_WRITE || size != sizeof(__u64))
                        return false;
                info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
                break;
        case offsetof(struct __sk_buff, tstamp_type):
                return false;
        case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
                /* Explicitly prohibit access to padding in __sk_buff. */
                return false;
        default:
                /* Only narrow read access allowed for now. */
                if (type == BPF_WRITE) {
                        if (size != size_default)
                                return false;
                } else {
                        bpf_ctx_record_field_size(info, size_default);
                        if (!bpf_ctx_narrow_access_ok(off, size, size_default))
                                return false;
                }
        }

        return true;
}

static bool sk_filter_is_valid_access(int off, int size,
                                      enum bpf_access_type type,
                                      const struct bpf_prog *prog,
                                      struct bpf_insn_access_aux *info)
{
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, data_end):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
        case bpf_ctx_range(struct __sk_buff, tstamp):
        case bpf_ctx_range(struct __sk_buff, wire_len):
        case bpf_ctx_range(struct __sk_buff, hwtstamp):
                return false;
        }

        if (type == BPF_WRITE) {
                switch (off) {
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                        break;
                default:
                        return false;
                }
        }

        return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static bool cg_skb_is_valid_access(int off, int size,
                                   enum bpf_access_type type,
                                   const struct bpf_prog *prog,
                                   struct bpf_insn_access_aux *info)
{
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, wire_len):
                return false;
        case bpf_ctx_range(struct __sk_buff, data):
        case bpf_ctx_range(struct __sk_buff, data_end):
                if (!bpf_token_capable(prog->aux->token, CAP_BPF))
                        return false;
                break;
        }

        if (type == BPF_WRITE) {
                switch (off) {
                case bpf_ctx_range(struct __sk_buff, mark):
                case bpf_ctx_range(struct __sk_buff, priority):
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                        break;
                case bpf_ctx_range(struct __sk_buff, tstamp):
                        if (!bpf_token_capable(prog->aux->token, CAP_BPF))
                                return false;
                        break;
                default:
                        return false;
                }
        }

        switch (off) {
        case bpf_ctx_range(struct __sk_buff, data):
                info->reg_type = PTR_TO_PACKET;
                break;
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
        }

        return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static bool lwt_is_valid_access(int off, int size,
                                enum bpf_access_type type,
                                const struct bpf_prog *prog,
                                struct bpf_insn_access_aux *info)
{
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, tstamp):
        case bpf_ctx_range(struct __sk_buff, wire_len):
        case bpf_ctx_range(struct __sk_buff, hwtstamp):
                return false;
        }

        if (type == BPF_WRITE) {
                switch (off) {
                case bpf_ctx_range(struct __sk_buff, mark):
                case bpf_ctx_range(struct __sk_buff, priority):
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                        break;
                default:
                        return false;
                }
        }

        switch (off) {
        case bpf_ctx_range(struct __sk_buff, data):
                info->reg_type = PTR_TO_PACKET;
                break;
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
        }

        return bpf_skb_is_valid_access(off, size, type, prog, info);
}

/* Attach type specific accesses */
static bool __sock_filter_check_attach_type(int off,
                                            enum bpf_access_type access_type,
                                            enum bpf_attach_type attach_type)
{
        switch (off) {
        case offsetof(struct bpf_sock, bound_dev_if):
        case offsetof(struct bpf_sock, mark):
        case offsetof(struct bpf_sock, priority):
                switch (attach_type) {
                case BPF_CGROUP_INET_SOCK_CREATE:
                case BPF_CGROUP_INET_SOCK_RELEASE:
                        goto full_access;
                default:
                        return false;
                }
        case bpf_ctx_range(struct bpf_sock, src_ip4):
                switch (attach_type) {
                case BPF_CGROUP_INET4_POST_BIND:
                        goto read_only;
                default:
                        return false;
                }
        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
                switch (attach_type) {
                case BPF_CGROUP_INET6_POST_BIND:
                        goto read_only;
                default:
                        return false;
                }
        case bpf_ctx_range(struct bpf_sock, src_port):
                switch (attach_type) {
                case BPF_CGROUP_INET4_POST_BIND:
                case BPF_CGROUP_INET6_POST_BIND:
                        goto read_only;
                default:
                        return false;
                }
        }
read_only:
        return access_type == BPF_READ;
full_access:
        return true;
}

bool bpf_sock_common_is_valid_access(int off, int size,
                                     enum bpf_access_type type,
                                     struct bpf_insn_access_aux *info)
{
        switch (off) {
        case bpf_ctx_range_till(struct bpf_sock, type, priority):
                return false;
        default:
                return bpf_sock_is_valid_access(off, size, type, info);
        }
}

bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
                              struct bpf_insn_access_aux *info)
{
        const int size_default = sizeof(__u32);
        int field_size;

        if (off < 0 || off >= sizeof(struct bpf_sock))
                return false;
        if (off % size != 0)
                return false;

        switch (off) {
        case offsetof(struct bpf_sock, state):
        case offsetof(struct bpf_sock, family):
        case offsetof(struct bpf_sock, type):
        case offsetof(struct bpf_sock, protocol):
        case offsetof(struct bpf_sock, src_port):
        case offsetof(struct bpf_sock, rx_queue_mapping):
        case bpf_ctx_range(struct bpf_sock, src_ip4):
        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
        case bpf_ctx_range(struct bpf_sock, dst_ip4):
        case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
                bpf_ctx_record_field_size(info, size_default);
                return bpf_ctx_narrow_access_ok(off, size, size_default);
        case bpf_ctx_range(struct bpf_sock, dst_port):
                field_size = size == size_default ?
                        size_default : sizeof_field(struct bpf_sock, dst_port);
                bpf_ctx_record_field_size(info, field_size);
                return bpf_ctx_narrow_access_ok(off, size, field_size);
        case offsetofend(struct bpf_sock, dst_port) ...
             offsetof(struct bpf_sock, dst_ip4) - 1:
                return false;
        }

        return size == size_default;
}

static bool sock_filter_is_valid_access(int off, int size,
                                        enum bpf_access_type type,
                                        const struct bpf_prog *prog,
                                        struct bpf_insn_access_aux *info)
{
        if (!bpf_sock_is_valid_access(off, size, type, info))
                return false;
        return __sock_filter_check_attach_type(off, type,
                                               prog->expected_attach_type);
}

static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
                             const struct bpf_prog *prog)
{
        /* Neither direct read nor direct write requires any preliminary
         * action.
         */
        return 0;
}

static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
                                const struct bpf_prog *prog, int drop_verdict)
{
        struct bpf_insn *insn = insn_buf;

        if (!direct_write)
                return 0;

        /* if (!skb->cloned)
         *       goto start;
         *
         * (Fast-path, otherwise approximation that we might be
         *  a clone, do the rest in helper.)
         */
        *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET);
        *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
        *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);

        /* ret = bpf_skb_pull_data(skb, 0); */
        *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
        *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
        *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
                               BPF_FUNC_skb_pull_data);
        /* if (!ret)
         *      goto restore;
         * return TC_ACT_SHOT;
         */
        *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
        *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
        *insn++ = BPF_EXIT_INSN();

        /* restore: */
        *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
        /* start: */
        *insn++ = prog->insnsi[0];

        return insn - insn_buf;
}

static int bpf_gen_ld_abs(const struct bpf_insn *orig,
                          struct bpf_insn *insn_buf)
{
        bool indirect = BPF_MODE(orig->code) == BPF_IND;
        struct bpf_insn *insn = insn_buf;

        if (!indirect) {
                *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
        } else {
                *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
                if (orig->imm)
                        *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
        }
        /* We're guaranteed here that CTX is in R6. */
        *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);

        switch (BPF_SIZE(orig->code)) {
        case BPF_B:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
                break;
        case BPF_H:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
                break;
        case BPF_W:
                *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
                break;
        }

        *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
        *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
        *insn++ = BPF_EXIT_INSN();

        return insn - insn_buf;
}

static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
                               const struct bpf_prog *prog)
{
        return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
}

static bool tc_cls_act_is_valid_access(int off, int size,
                                       enum bpf_access_type type,
                                       const struct bpf_prog *prog,
                                       struct bpf_insn_access_aux *info)
{
        if (type == BPF_WRITE) {
                switch (off) {
                case bpf_ctx_range(struct __sk_buff, mark):
                case bpf_ctx_range(struct __sk_buff, tc_index):
                case bpf_ctx_range(struct __sk_buff, priority):
                case bpf_ctx_range(struct __sk_buff, tc_classid):
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                case bpf_ctx_range(struct __sk_buff, tstamp):
                case bpf_ctx_range(struct __sk_buff, queue_mapping):
                        break;
                default:
                        return false;
                }
        }

        switch (off) {
        case bpf_ctx_range(struct __sk_buff, data):
                info->reg_type = PTR_TO_PACKET;
                break;
        case bpf_ctx_range(struct __sk_buff, data_meta):
                info->reg_type = PTR_TO_PACKET_META;
                break;
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
                return false;
        case offsetof(struct __sk_buff, tstamp_type):
                /* The convert_ctx_access() on reading and writing
                 * __sk_buff->tstamp depends on whether the bpf prog
                 * has used __sk_buff->tstamp_type or not.
                 * Thus, we need to set prog->tstamp_type_access
                 * earlier during is_valid_access() here.
                 */
                ((struct bpf_prog *)prog)->tstamp_type_access = 1;
                return size == sizeof(__u8);
        }

        return bpf_skb_is_valid_access(off, size, type, prog, info);
}

DEFINE_MUTEX(nf_conn_btf_access_lock);
EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);

int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
                              const struct bpf_reg_state *reg,
                              int off, int size);
EXPORT_SYMBOL_GPL(nfct_btf_struct_access);

static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
                                        const struct bpf_reg_state *reg,
                                        int off, int size)
{
        int ret = -EACCES;

        mutex_lock(&nf_conn_btf_access_lock);
        if (nfct_btf_struct_access)
                ret = nfct_btf_struct_access(log, reg, off, size);
        mutex_unlock(&nf_conn_btf_access_lock);

        return ret;
}

static bool __is_valid_xdp_access(int off, int size)
{
        if (off < 0 || off >= sizeof(struct xdp_md))
                return false;
        if (off % size != 0)
                return false;
        if (size != sizeof(__u32))
                return false;

        return true;
}

static bool xdp_is_valid_access(int off, int size,
                                enum bpf_access_type type,
                                const struct bpf_prog *prog,
                                struct bpf_insn_access_aux *info)
{
        if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
                switch (off) {
                case offsetof(struct xdp_md, egress_ifindex):
                        return false;
                }
        }

        if (type == BPF_WRITE) {
                if (bpf_prog_is_offloaded(prog->aux)) {
                        switch (off) {
                        case offsetof(struct xdp_md, rx_queue_index):
                                return __is_valid_xdp_access(off, size);
                        }
                }
                return false;
        }

        switch (off) {
        case offsetof(struct xdp_md, data):
                info->reg_type = PTR_TO_PACKET;
                break;
        case offsetof(struct xdp_md, data_meta):
                info->reg_type = PTR_TO_PACKET_META;
                break;
        case offsetof(struct xdp_md, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
        }

        return __is_valid_xdp_access(off, size);
}

void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act)
{
        const u32 act_max = XDP_REDIRECT;

        pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n",
                     act > act_max ? "Illegal" : "Driver unsupported",
                     act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A");
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);

static int xdp_btf_struct_access(struct bpf_verifier_log *log,
                                 const struct bpf_reg_state *reg,
                                 int off, int size)
{
        int ret = -EACCES;

        mutex_lock(&nf_conn_btf_access_lock);
        if (nfct_btf_struct_access)
                ret = nfct_btf_struct_access(log, reg, off, size);
        mutex_unlock(&nf_conn_btf_access_lock);

        return ret;
}

static bool sock_addr_is_valid_access(int off, int size,
                                      enum bpf_access_type type,
                                      const struct bpf_prog *prog,
                                      struct bpf_insn_access_aux *info)
{
        const int size_default = sizeof(__u32);

        if (off < 0 || off >= sizeof(struct bpf_sock_addr))
                return false;
        if (off % size != 0)
                return false;

        /* Disallow access to fields not belonging to the attach type's address
         * family.
         */
        switch (off) {
        case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_INET4_BIND:
                case BPF_CGROUP_INET4_CONNECT:
                case BPF_CGROUP_INET4_GETPEERNAME:
                case BPF_CGROUP_INET4_GETSOCKNAME:
                case BPF_CGROUP_UDP4_SENDMSG:
                case BPF_CGROUP_UDP4_RECVMSG:
                        break;
                default:
                        return false;
                }
                break;
        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_INET6_BIND:
                case BPF_CGROUP_INET6_CONNECT:
                case BPF_CGROUP_INET6_GETPEERNAME:
                case BPF_CGROUP_INET6_GETSOCKNAME:
                case BPF_CGROUP_UDP6_SENDMSG:
                case BPF_CGROUP_UDP6_RECVMSG:
                        break;
                default:
                        return false;
                }
                break;
        case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_UDP4_SENDMSG:
                        break;
                default:
                        return false;
                }
                break;
        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
                                msg_src_ip6[3]):
                switch (prog->expected_attach_type) {
                case BPF_CGROUP_UDP6_SENDMSG:
                        break;
                default:
                        return false;
                }
                break;
        }

        switch (off) {
        case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
        case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
                                msg_src_ip6[3]):
        case bpf_ctx_range(struct bpf_sock_addr, user_port):
                if (type == BPF_READ) {
                        bpf_ctx_record_field_size(info, size_default);

                        if (bpf_ctx_wide_access_ok(off, size,
                                                   struct bpf_sock_addr,
                                                   user_ip6))
                                return true;

                        if (bpf_ctx_wide_access_ok(off, size,
                                                   struct bpf_sock_addr,
                                                   msg_src_ip6))
                                return true;

                        if (!bpf_ctx_narrow_access_ok(off, size, size_default))
                                return false;
                } else {
                        if (bpf_ctx_wide_access_ok(off, size,
                                                   struct bpf_sock_addr,
                                                   user_ip6))
                                return true;

                        if (bpf_ctx_wide_access_ok(off, size,
                                                   struct bpf_sock_addr,
                                                   msg_src_ip6))
                                return true;

                        if (size != size_default)
                                return false;
                }
                break;
        case offsetof(struct bpf_sock_addr, sk):
                if (type != BPF_READ)
                        return false;
                if (size != sizeof(__u64))
                        return false;
                info->reg_type = PTR_TO_SOCKET;
                break;
        default:
                if (type == BPF_READ) {
                        if (size != size_default)
                                return false;
                } else {
                        return false;
                }
        }

        return true;
}

static bool sock_ops_is_valid_access(int off, int size,
                                     enum bpf_access_type type,
                                     const struct bpf_prog *prog,
                                     struct bpf_insn_access_aux *info)
{
        const int size_default = sizeof(__u32);

        if (off < 0 || off >= sizeof(struct bpf_sock_ops))
                return false;

        /* The verifier guarantees that size > 0. */
        if (off % size != 0)
                return false;

        if (type == BPF_WRITE) {
                switch (off) {
                case offsetof(struct bpf_sock_ops, reply):
                case offsetof(struct bpf_sock_ops, sk_txhash):
                        if (size != size_default)
                                return false;
                        break;
                default:
                        return false;
                }
        } else {
                switch (off) {
                case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
                                        bytes_acked):
                        if (size != sizeof(__u64))
                                return false;
                        break;
                case offsetof(struct bpf_sock_ops, sk):
                        if (size != sizeof(__u64))
                                return false;
                        info->reg_type = PTR_TO_SOCKET_OR_NULL;
                        break;
                case offsetof(struct bpf_sock_ops, skb_data):
                        if (size != sizeof(__u64))
                                return false;
                        info->reg_type = PTR_TO_PACKET;
                        break;
                case offsetof(struct bpf_sock_ops, skb_data_end):
                        if (size != sizeof(__u64))
                                return false;
                        info->reg_type = PTR_TO_PACKET_END;
                        break;
                case offsetof(struct bpf_sock_ops, skb_tcp_flags):
                        bpf_ctx_record_field_size(info, size_default);
                        return bpf_ctx_narrow_access_ok(off, size,
                                                        size_default);
                case offsetof(struct bpf_sock_ops, skb_hwtstamp):
                        if (size != sizeof(__u64))
                                return false;
                        break;
                default:
                        if (size != size_default)
                                return false;
                        break;
                }
        }

        return true;
}

static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
                           const struct bpf_prog *prog)
{
        return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
}

static bool sk_skb_is_valid_access(int off, int size,
                                   enum bpf_access_type type,
                                   const struct bpf_prog *prog,
                                   struct bpf_insn_access_aux *info)
{
        switch (off) {
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, tstamp):
        case bpf_ctx_range(struct __sk_buff, wire_len):
        case bpf_ctx_range(struct __sk_buff, hwtstamp):
                return false;
        }

        if (type == BPF_WRITE) {
                switch (off) {
                case bpf_ctx_range(struct __sk_buff, tc_index):
                case bpf_ctx_range(struct __sk_buff, priority):
                        break;
                default:
                        return false;
                }
        }

        switch (off) {
        case bpf_ctx_range(struct __sk_buff, mark):
                return false;
        case bpf_ctx_range(struct __sk_buff, data):
                info->reg_type = PTR_TO_PACKET;
                break;
        case bpf_ctx_range(struct __sk_buff, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                break;
        }

        return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static bool sk_msg_is_valid_access(int off, int size,
                                   enum bpf_access_type type,
                                   const struct bpf_prog *prog,
                                   struct bpf_insn_access_aux *info)
{
        if (type == BPF_WRITE)
                return false;

        if (off % size != 0)
                return false;

        switch (off) {
        case offsetof(struct sk_msg_md, data):
                info->reg_type = PTR_TO_PACKET;
                if (size != sizeof(__u64))
                        return false;
                break;
        case offsetof(struct sk_msg_md, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                if (size != sizeof(__u64))
                        return false;
                break;
        case offsetof(struct sk_msg_md, sk):
                if (size != sizeof(__u64))
                        return false;
                info->reg_type = PTR_TO_SOCKET;
                break;
        case bpf_ctx_range(struct sk_msg_md, family):
        case bpf_ctx_range(struct sk_msg_md, remote_ip4):
        case bpf_ctx_range(struct sk_msg_md, local_ip4):
        case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
        case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
        case bpf_ctx_range(struct sk_msg_md, remote_port):
        case bpf_ctx_range(struct sk_msg_md, local_port):
        case bpf_ctx_range(struct sk_msg_md, size):
                if (size != sizeof(__u32))
                        return false;
                break;
        default:
                return false;
        }
        return true;
}

static bool flow_dissector_is_valid_access(int off, int size,
                                           enum bpf_access_type type,
                                           const struct bpf_prog *prog,
                                           struct bpf_insn_access_aux *info)
{
        const int size_default = sizeof(__u32);

        if (off < 0 || off >= sizeof(struct __sk_buff))
                return false;

        if (type == BPF_WRITE)
                return false;

        switch (off) {
        case bpf_ctx_range(struct __sk_buff, data):
                if (size != size_default)
                        return false;
                info->reg_type = PTR_TO_PACKET;
                return true;
        case bpf_ctx_range(struct __sk_buff, data_end):
                if (size != size_default)
                        return false;
                info->reg_type = PTR_TO_PACKET_END;
                return true;
        case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
                if (size != sizeof(__u64))
                        return false;
                info->reg_type = PTR_TO_FLOW_KEYS;
                return true;
        default:
                return false;
        }
}

static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
                                             const struct bpf_insn *si,
                                             struct bpf_insn *insn_buf,
                                             struct bpf_prog *prog,
                                             u32 *target_size)

{
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct __sk_buff, data):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_flow_dissector, data));
                break;

        case offsetof(struct __sk_buff, data_end):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_flow_dissector, data_end));
                break;

        case offsetof(struct __sk_buff, flow_keys):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_flow_dissector, flow_keys));
                break;
        }

        return insn - insn_buf;
}

static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
                                                     struct bpf_insn *insn)
{
        __u8 value_reg = si->dst_reg;
        __u8 skb_reg = si->src_reg;
        /* AX is needed because src_reg and dst_reg could be the same */
        __u8 tmp_reg = BPF_REG_AX;

        *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
                              SKB_BF_MONO_TC_OFFSET);
        *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
                                SKB_MONO_DELIVERY_TIME_MASK, 2);
        *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
        *insn++ = BPF_JMP_A(1);
        *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);

        return insn;
}

static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
                                                  struct bpf_insn *insn)
{
        /* si->dst_reg = skb_shinfo(SKB); */
#ifdef NET_SKBUFF_DATA_USES_OFFSET
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
                              BPF_REG_AX, skb_reg,
                              offsetof(struct sk_buff, end));
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
                              dst_reg, skb_reg,
                              offsetof(struct sk_buff, head));
        *insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
#else
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
                              dst_reg, skb_reg,
                              offsetof(struct sk_buff, end));
#endif

        return insn;
}

static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
                                                const struct bpf_insn *si,
                                                struct bpf_insn *insn)
{
        __u8 value_reg = si->dst_reg;
        __u8 skb_reg = si->src_reg;

#ifdef CONFIG_NET_XGRESS
        /* If the tstamp_type is read,
         * the bpf prog is aware the tstamp could have delivery time.
         * Thus, read skb->tstamp as is if tstamp_type_access is true.
         */
        if (!prog->tstamp_type_access) {
                /* AX is needed because src_reg and dst_reg could be the same */
                __u8 tmp_reg = BPF_REG_AX;

                *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
                *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
                                        TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
                *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
                                        TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
                /* skb->tc_at_ingress && skb->mono_delivery_time,
                 * read 0 as the (rcv) timestamp.
                 */
                *insn++ = BPF_MOV64_IMM(value_reg, 0);
                *insn++ = BPF_JMP_A(1);
        }
#endif

        *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
                              offsetof(struct sk_buff, tstamp));
        return insn;
}

static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
                                                 const struct bpf_insn *si,
                                                 struct bpf_insn *insn)
{
        __u8 value_reg = si->src_reg;
        __u8 skb_reg = si->dst_reg;

#ifdef CONFIG_NET_XGRESS
        /* If the tstamp_type is read,
         * the bpf prog is aware the tstamp could have delivery time.
         * Thus, write skb->tstamp as is if tstamp_type_access is true.
         * Otherwise, writing at ingress will have to clear the
         * mono_delivery_time bit also.
         */
        if (!prog->tstamp_type_access) {
                __u8 tmp_reg = BPF_REG_AX;

                *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
                /* Writing __sk_buff->tstamp as ingress, goto <clear> */
                *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
                /* goto <store> */
                *insn++ = BPF_JMP_A(2);
                /* <clear>: mono_delivery_time */
                *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
                *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
        }
#endif

        /* <store>: skb->tstamp = tstamp */
        *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
                               skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
        return insn;
}

#define BPF_EMIT_STORE(size, si, off)                                        \
        BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM,                \
                     (si)->dst_reg, (si)->src_reg, (off), (si)->imm)

static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                                  const struct bpf_insn *si,
                                  struct bpf_insn *insn_buf,
                                  struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;
        int off;

        switch (si->off) {
        case offsetof(struct __sk_buff, len):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, len, 4,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, protocol):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, protocol, 2,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, vlan_proto):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, vlan_proto, 2,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, priority):
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si,
                                                 bpf_target_off(struct sk_buff, priority, 4,
                                                                target_size));
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                              bpf_target_off(struct sk_buff, priority, 4,
                                                             target_size));
                break;

        case offsetof(struct __sk_buff, ingress_ifindex):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, skb_iif, 4,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, ifindex):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, dev));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct net_device, ifindex, 4,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, hash):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, hash, 4,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, mark):
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si,
                                                 bpf_target_off(struct sk_buff, mark, 4,
                                                                target_size));
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                              bpf_target_off(struct sk_buff, mark, 4,
                                                             target_size));
                break;

        case offsetof(struct __sk_buff, pkt_type):
                *target_size = 1;
                *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
                                      PKT_TYPE_OFFSET);
                *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
                *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
#endif
                break;

        case offsetof(struct __sk_buff, queue_mapping):
                if (type == BPF_WRITE) {
                        u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);

                        if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
                                *insn++ = BPF_JMP_A(0); /* noop */
                                break;
                        }

                        if (BPF_CLASS(si->code) == BPF_STX)
                                *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
                        *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
                } else {
                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                              bpf_target_off(struct sk_buff,
                                                             queue_mapping,
                                                             2, target_size));
                }
                break;

        case offsetof(struct __sk_buff, vlan_present):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff,
                                                     vlan_all, 4, target_size));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_ALU32_IMM(BPF_MOV, si->dst_reg, 1);
                break;

        case offsetof(struct __sk_buff, vlan_tci):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, vlan_tci, 2,
                                                     target_size));
                break;

        case offsetof(struct __sk_buff, cb[0]) ...
             offsetofend(struct __sk_buff, cb[4]) - 1:
                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
                BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
                              offsetof(struct qdisc_skb_cb, data)) %
                             sizeof(__u64));

                prog->cb_access = 1;
                off  = si->off;
                off -= offsetof(struct __sk_buff, cb[0]);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct qdisc_skb_cb, data);
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
                else
                        *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
                                              si->src_reg, off);
                break;

        case offsetof(struct __sk_buff, tc_classid):
                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);

                off  = si->off;
                off -= offsetof(struct __sk_buff, tc_classid);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct qdisc_skb_cb, tc_classid);
                *target_size = 2;
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
                else
                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
                                              si->src_reg, off);
                break;

        case offsetof(struct __sk_buff, data):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, data));
                break;

        case offsetof(struct __sk_buff, data_meta):
                off  = si->off;
                off -= offsetof(struct __sk_buff, data_meta);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct bpf_skb_data_end, data_meta);
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
                                      si->src_reg, off);
                break;

        case offsetof(struct __sk_buff, data_end):
                off  = si->off;
                off -= offsetof(struct __sk_buff, data_end);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct bpf_skb_data_end, data_end);
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
                                      si->src_reg, off);
                break;

        case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_H, si,
                                                 bpf_target_off(struct sk_buff, tc_index, 2,
                                                                target_size));
                else
                        *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                              bpf_target_off(struct sk_buff, tc_index, 2,
                                                             target_size));
#else
                *target_size = 2;
                if (type == BPF_WRITE)
                        *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
                else
                        *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct __sk_buff, napi_id):
#if defined(CONFIG_NET_RX_BUSY_POLL)
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct sk_buff, napi_id, 4,
                                                     target_size));
                *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
                *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#else
                *target_size = 4;
                *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
                break;
        case offsetof(struct __sk_buff, family):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct sock_common,
                                                     skc_family,
                                                     2, target_size));
                break;
        case offsetof(struct __sk_buff, remote_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct sock_common,
                                                     skc_daddr,
                                                     4, target_size));
                break;
        case offsetof(struct __sk_buff, local_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_rcv_saddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct sock_common,
                                                     skc_rcv_saddr,
                                                     4, target_size));
                break;
        case offsetof(struct __sk_buff, remote_ip6[0]) ...
             offsetof(struct __sk_buff, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_daddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct __sk_buff, remote_ip6[0]);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_daddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;
        case offsetof(struct __sk_buff, local_ip6[0]) ...
             offsetof(struct __sk_buff, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct __sk_buff, local_ip6[0]);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_rcv_saddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct __sk_buff, remote_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct sock_common,
                                                     skc_dport,
                                                     2, target_size));
#ifndef __BIG_ENDIAN_BITFIELD
                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
#endif
                break;

        case offsetof(struct __sk_buff, local_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct sock_common,
                                                     skc_num, 2, target_size));
                break;

        case offsetof(struct __sk_buff, tstamp):
                BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);

                if (type == BPF_WRITE)
                        insn = bpf_convert_tstamp_write(prog, si, insn);
                else
                        insn = bpf_convert_tstamp_read(prog, si, insn);
                break;

        case offsetof(struct __sk_buff, tstamp_type):
                insn = bpf_convert_tstamp_type_read(si, insn);
                break;

        case offsetof(struct __sk_buff, gso_segs):
                insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
                                      si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct skb_shared_info,
                                                     gso_segs, 2,
                                                     target_size));
                break;
        case offsetof(struct __sk_buff, gso_size):
                insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
                                      si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct skb_shared_info,
                                                     gso_size, 2,
                                                     target_size));
                break;
        case offsetof(struct __sk_buff, wire_len):
                BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);

                off = si->off;
                off -= offsetof(struct __sk_buff, wire_len);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct qdisc_skb_cb, pkt_len);
                *target_size = 4;
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
                break;

        case offsetof(struct __sk_buff, sk):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, sk));
                break;
        case offsetof(struct __sk_buff, hwtstamp):
                BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
                BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);

                insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
                *insn++ = BPF_LDX_MEM(BPF_DW,
                                      si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct skb_shared_info,
                                                     hwtstamps, 8,
                                                     target_size));
                break;
        }

        return insn - insn_buf;
}

u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
                                const struct bpf_insn *si,
                                struct bpf_insn *insn_buf,
                                struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;
        int off;

        switch (si->off) {
        case offsetof(struct bpf_sock, bound_dev_if):
                BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);

                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si,
                                                 offsetof(struct sock, sk_bound_dev_if));
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct sock, sk_bound_dev_if));
                break;

        case offsetof(struct bpf_sock, mark):
                BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);

                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si,
                                                 offsetof(struct sock, sk_mark));
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct sock, sk_mark));
                break;

        case offsetof(struct bpf_sock, priority):
                BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);

                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si,
                                                 offsetof(struct sock, sk_priority));
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      offsetof(struct sock, sk_priority));
                break;

        case offsetof(struct bpf_sock, family):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock_common, skc_family),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common,
                                       skc_family,
                                       sizeof_field(struct sock_common,
                                                    skc_family),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, type):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock, sk_type),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock, sk_type,
                                       sizeof_field(struct sock, sk_type),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, protocol):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock, sk_protocol),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock, sk_protocol,
                                       sizeof_field(struct sock, sk_protocol),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, src_ip4):
                *insn++ = BPF_LDX_MEM(
                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common, skc_rcv_saddr,
                                       sizeof_field(struct sock_common,
                                                    skc_rcv_saddr),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, dst_ip4):
                *insn++ = BPF_LDX_MEM(
                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common, skc_daddr,
                                       sizeof_field(struct sock_common,
                                                    skc_daddr),
                                       target_size));
                break;

        case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                off = si->off;
                off -= offsetof(struct bpf_sock, src_ip6[0]);
                *insn++ = BPF_LDX_MEM(
                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
                        bpf_target_off(
                                struct sock_common,
                                skc_v6_rcv_saddr.s6_addr32[0],
                                sizeof_field(struct sock_common,
                                             skc_v6_rcv_saddr.s6_addr32[0]),
                                target_size) + off);
#else
                (void)off;
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                off = si->off;
                off -= offsetof(struct bpf_sock, dst_ip6[0]);
                *insn++ = BPF_LDX_MEM(
                        BPF_SIZE(si->code), si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common,
                                       skc_v6_daddr.s6_addr32[0],
                                       sizeof_field(struct sock_common,
                                                    skc_v6_daddr.s6_addr32[0]),
                                       target_size) + off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
                *target_size = 4;
#endif
                break;

        case offsetof(struct bpf_sock, src_port):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock_common, skc_num),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common, skc_num,
                                       sizeof_field(struct sock_common,
                                                    skc_num),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, dst_port):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common, skc_dport,
                                       sizeof_field(struct sock_common,
                                                    skc_dport),
                                       target_size));
                break;

        case offsetof(struct bpf_sock, state):
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock_common, skc_state),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock_common, skc_state,
                                       sizeof_field(struct sock_common,
                                                    skc_state),
                                       target_size));
                break;
        case offsetof(struct bpf_sock, rx_queue_mapping):
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
                *insn++ = BPF_LDX_MEM(
                        BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
                        si->dst_reg, si->src_reg,
                        bpf_target_off(struct sock, sk_rx_queue_mapping,
                                       sizeof_field(struct sock,
                                                    sk_rx_queue_mapping),
                                       target_size));
                *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
                                      1);
                *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
#else
                *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
                *target_size = 2;
#endif
                break;
        }

        return insn - insn_buf;
}

static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
                                         const struct bpf_insn *si,
                                         struct bpf_insn *insn_buf,
                                         struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct __sk_buff, ifindex):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_buff, dev));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct net_device, ifindex, 4,
                                                     target_size));
                break;
        default:
                return bpf_convert_ctx_access(type, si, insn_buf, prog,
                                              target_size);
        }

        return insn - insn_buf;
}

static u32 xdp_convert_ctx_access(enum bpf_access_type type,
                                  const struct bpf_insn *si,
                                  struct bpf_insn *insn_buf,
                                  struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct xdp_md, data):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, data));
                break;
        case offsetof(struct xdp_md, data_meta):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, data_meta));
                break;
        case offsetof(struct xdp_md, data_end):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, data_end));
                break;
        case offsetof(struct xdp_md, ingress_ifindex):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, rxq));
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
                                      si->dst_reg, si->dst_reg,
                                      offsetof(struct xdp_rxq_info, dev));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct net_device, ifindex));
                break;
        case offsetof(struct xdp_md, rx_queue_index):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, rxq));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct xdp_rxq_info,
                                               queue_index));
                break;
        case offsetof(struct xdp_md, egress_ifindex):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct xdp_buff, txq));
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
                                      si->dst_reg, si->dst_reg,
                                      offsetof(struct xdp_txq_info, dev));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct net_device, ifindex));
                break;
        }

        return insn - insn_buf;
}

/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
 * context Structure, F is Field in context structure that contains a pointer
 * to Nested Structure of type NS that has the field NF.
 *
 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
 * sure that SIZE is not greater than actual size of S.F.NF.
 *
 * If offset OFF is provided, the load happens from that offset relative to
 * offset of NF.
 */
#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)               \
        do {                                                                       \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
                                      si->src_reg, offsetof(S, F));               \
                *insn++ = BPF_LDX_MEM(                                               \
                        SIZE, si->dst_reg, si->dst_reg,                               \
                        bpf_target_off(NS, NF, sizeof_field(NS, NF),               \
                                       target_size)                               \
                                + OFF);                                               \
        } while (0)

#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)                               \
        SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,                       \
                                             BPF_FIELD_SIZEOF(NS, NF), 0)

/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
 *
 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
 * "register" since two registers available in convert_ctx_access are not
 * enough: we can't override neither SRC, since it contains value to store, nor
 * DST since it contains pointer to context that may be used by later
 * instructions. But we need a temporary place to save pointer to nested
 * structure whose field we want to store to.
 */
#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF)               \
        do {                                                                       \
                int tmp_reg = BPF_REG_9;                                       \
                if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)               \
                        --tmp_reg;                                               \
                if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)               \
                        --tmp_reg;                                               \
                *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,               \
                                      offsetof(S, TF));                               \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,               \
                                      si->dst_reg, offsetof(S, F));               \
                *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code),   \
                                       tmp_reg, si->src_reg,                       \
                        bpf_target_off(NS, NF, sizeof_field(NS, NF),               \
                                       target_size)                               \
                                       + OFF,                                       \
                                       si->imm);                               \
                *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,               \
                                      offsetof(S, TF));                               \
        } while (0)

#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
                                                      TF)                       \
        do {                                                                       \
                if (type == BPF_WRITE) {                                       \
                        SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE,   \
                                                         OFF, TF);               \
                } else {                                                       \
                        SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(                       \
                                S, NS, F, NF, SIZE, OFF);  \
                }                                                               \
        } while (0)

#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)                       \
        SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(                               \
                S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)

static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
                                        const struct bpf_insn *si,
                                        struct bpf_insn *insn_buf,
                                        struct bpf_prog *prog, u32 *target_size)
{
        int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct bpf_sock_addr, user_family):
                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
                                            struct sockaddr, uaddr, sa_family);
                break;

        case offsetof(struct bpf_sock_addr, user_ip4):
                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
                        struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
                        sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
                break;

        case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
                off = si->off;
                off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
                        struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
                        sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
                        tmp_reg);
                break;

        case offsetof(struct bpf_sock_addr, user_port):
                /* To get port we need to know sa_family first and then treat
                 * sockaddr as either sockaddr_in or sockaddr_in6.
                 * Though we can simplify since port field has same offset and
                 * size in both structures.
                 * Here we check this invariant and use just one of the
                 * structures if it's true.
                 */
                BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
                             offsetof(struct sockaddr_in6, sin6_port));
                BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
                             sizeof_field(struct sockaddr_in6, sin6_port));
                /* Account for sin6_port being smaller than user_port. */
                port_size = min(port_size, BPF_LDST_BYTES(si));
                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
                        struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
                        sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
                break;

        case offsetof(struct bpf_sock_addr, family):
                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
                                            struct sock, sk, sk_family);
                break;

        case offsetof(struct bpf_sock_addr, type):
                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
                                            struct sock, sk, sk_type);
                break;

        case offsetof(struct bpf_sock_addr, protocol):
                SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
                                            struct sock, sk, sk_protocol);
                break;

        case offsetof(struct bpf_sock_addr, msg_src_ip4):
                /* Treat t_ctx as struct in_addr for msg_src_ip4. */
                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
                        struct bpf_sock_addr_kern, struct in_addr, t_ctx,
                        s_addr, BPF_SIZE(si->code), 0, tmp_reg);
                break;

        case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
                                msg_src_ip6[3]):
                off = si->off;
                off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
                /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
                SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
                        struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
                        s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
                break;
        case offsetof(struct bpf_sock_addr, sk):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_addr_kern, sk));
                break;
        }

        return insn - insn_buf;
}

static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
                                       const struct bpf_insn *si,
                                       struct bpf_insn *insn_buf,
                                       struct bpf_prog *prog,
                                       u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;
        int off;

/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                              \
        do {                                                                      \
                int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2;     \
                BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >                      \
                             sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                if (si->dst_reg == si->src_reg) {                              \
                        *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,              \
                                          offsetof(struct bpf_sock_ops_kern,  \
                                          temp));                              \
                        fullsock_reg = reg;                                      \
                        jmp += 2;                                              \
                }                                                              \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern,     \
                                                is_fullsock),                      \
                                      fullsock_reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                               is_fullsock));                      \
                *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);              \
                if (si->dst_reg == si->src_reg)                                      \
                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                      temp));                                      \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern, sk),\
                                      si->dst_reg, si->src_reg,                      \
                                      offsetof(struct bpf_sock_ops_kern, sk));\
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,                      \
                                                       OBJ_FIELD),              \
                                      si->dst_reg, si->dst_reg,                      \
                                      offsetof(OBJ, OBJ_FIELD));              \
                if (si->dst_reg == si->src_reg)        {                              \
                        *insn++ = BPF_JMP_A(1);                                      \
                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                      temp));                                      \
                }                                                              \
        } while (0)

#define SOCK_OPS_GET_SK()                                                              \
        do {                                                                      \
                int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1;     \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                if (si->dst_reg == si->src_reg) {                              \
                        *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,              \
                                          offsetof(struct bpf_sock_ops_kern,  \
                                          temp));                              \
                        fullsock_reg = reg;                                      \
                        jmp += 2;                                              \
                }                                                              \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern,     \
                                                is_fullsock),                      \
                                      fullsock_reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                               is_fullsock));                      \
                *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);              \
                if (si->dst_reg == si->src_reg)                                      \
                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                      temp));                                      \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern, sk),\
                                      si->dst_reg, si->src_reg,                      \
                                      offsetof(struct bpf_sock_ops_kern, sk));\
                if (si->dst_reg == si->src_reg)        {                              \
                        *insn++ = BPF_JMP_A(1);                                      \
                        *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                      temp));                                      \
                }                                                              \
        } while (0)

#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
                SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)

/* Helper macro for adding write access to tcp_sock or sock fields.
 * The macro is called with two registers, dst_reg which contains a pointer
 * to ctx (context) and src_reg which contains the value that should be
 * stored. However, we need an additional register since we cannot overwrite
 * dst_reg because it may be used later in the program.
 * Instead we "borrow" one of the other register. We first save its value
 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
 * it at the end of the macro.
 */
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                              \
        do {                                                                      \
                int reg = BPF_REG_9;                                              \
                BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >                      \
                             sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                if (si->dst_reg == reg || si->src_reg == reg)                      \
                        reg--;                                                      \
                *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,                      \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                               temp));                              \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern,     \
                                                is_fullsock),                      \
                                      reg, si->dst_reg,                              \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                               is_fullsock));                      \
                *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);                      \
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                              \
                                                struct bpf_sock_ops_kern, sk),\
                                      reg, si->dst_reg,                              \
                                      offsetof(struct bpf_sock_ops_kern, sk));\
                *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) |     \
                                       BPF_MEM | BPF_CLASS(si->code),              \
                                       reg, si->src_reg,                      \
                                       offsetof(OBJ, OBJ_FIELD),              \
                                       si->imm);                              \
                *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,                      \
                                      offsetof(struct bpf_sock_ops_kern,      \
                                               temp));                              \
        } while (0)

#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)              \
        do {                                                                      \
                if (TYPE == BPF_WRITE)                                              \
                        SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);              \
                else                                                              \
                        SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);              \
        } while (0)

        switch (si->off) {
        case offsetof(struct bpf_sock_ops, op):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       op),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, op));
                break;

        case offsetof(struct bpf_sock_ops, replylong[0]) ...
             offsetof(struct bpf_sock_ops, replylong[3]):
                BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
                             sizeof_field(struct bpf_sock_ops_kern, reply));
                BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
                             sizeof_field(struct bpf_sock_ops_kern, replylong));
                off = si->off;
                off -= offsetof(struct bpf_sock_ops, replylong[0]);
                off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_W, si, off);
                else
                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                              off);
                break;

        case offsetof(struct bpf_sock_ops, family):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                              struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_family));
                break;

        case offsetof(struct bpf_sock_ops, remote_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_daddr));
                break;

        case offsetof(struct bpf_sock_ops, local_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_rcv_saddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                              struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_rcv_saddr));
                break;

        case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
             offsetof(struct bpf_sock_ops, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_daddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_daddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
             offsetof(struct bpf_sock_ops, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_rcv_saddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct bpf_sock_ops, remote_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_dport));
#ifndef __BIG_ENDIAN_BITFIELD
                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
#endif
                break;

        case offsetof(struct bpf_sock_ops, local_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_num));
                break;

        case offsetof(struct bpf_sock_ops, is_fullsock):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern,
                                                is_fullsock),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               is_fullsock));
                break;

        case offsetof(struct bpf_sock_ops, state):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_state));
                break;

        case offsetof(struct bpf_sock_ops, rtt_min):
                BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
                             sizeof(struct minmax));
                BUILD_BUG_ON(sizeof(struct minmax) <
                             sizeof(struct minmax_sample));

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct bpf_sock_ops_kern, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct tcp_sock, rtt_min) +
                                      sizeof_field(struct minmax_sample, t));
                break;

        case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
                SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
                                   struct tcp_sock);
                break;

        case offsetof(struct bpf_sock_ops, sk_txhash):
                SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
                                          struct sock, type);
                break;
        case offsetof(struct bpf_sock_ops, snd_cwnd):
                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
                break;
        case offsetof(struct bpf_sock_ops, srtt_us):
                SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
                break;
        case offsetof(struct bpf_sock_ops, snd_ssthresh):
                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
                break;
        case offsetof(struct bpf_sock_ops, rcv_nxt):
                SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
                break;
        case offsetof(struct bpf_sock_ops, snd_nxt):
                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
                break;
        case offsetof(struct bpf_sock_ops, snd_una):
                SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
                break;
        case offsetof(struct bpf_sock_ops, mss_cache):
                SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
                break;
        case offsetof(struct bpf_sock_ops, ecn_flags):
                SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
                break;
        case offsetof(struct bpf_sock_ops, rate_delivered):
                SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
                break;
        case offsetof(struct bpf_sock_ops, rate_interval_us):
                SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
                break;
        case offsetof(struct bpf_sock_ops, packets_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
                break;
        case offsetof(struct bpf_sock_ops, retrans_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
                break;
        case offsetof(struct bpf_sock_ops, total_retrans):
                SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
                break;
        case offsetof(struct bpf_sock_ops, segs_in):
                SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
                break;
        case offsetof(struct bpf_sock_ops, data_segs_in):
                SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
                break;
        case offsetof(struct bpf_sock_ops, segs_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
                break;
        case offsetof(struct bpf_sock_ops, data_segs_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
                break;
        case offsetof(struct bpf_sock_ops, lost_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
                break;
        case offsetof(struct bpf_sock_ops, sacked_out):
                SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
                break;
        case offsetof(struct bpf_sock_ops, bytes_received):
                SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
                break;
        case offsetof(struct bpf_sock_ops, bytes_acked):
                SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
                break;
        case offsetof(struct bpf_sock_ops, sk):
                SOCK_OPS_GET_SK();
                break;
        case offsetof(struct bpf_sock_ops, skb_data_end):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       skb_data_end),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               skb_data_end));
                break;
        case offsetof(struct bpf_sock_ops, skb_data):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       skb),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               skb));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
                                      si->dst_reg, si->dst_reg,
                                      offsetof(struct sk_buff, data));
                break;
        case offsetof(struct bpf_sock_ops, skb_len):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       skb),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               skb));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
                                      si->dst_reg, si->dst_reg,
                                      offsetof(struct sk_buff, len));
                break;
        case offsetof(struct bpf_sock_ops, skb_tcp_flags):
                off = offsetof(struct sk_buff, cb);
                off += offsetof(struct tcp_skb_cb, tcp_flags);
                *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       skb),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               skb));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
                                                       tcp_flags),
                                      si->dst_reg, si->dst_reg, off);
                break;
        case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
                struct bpf_insn *jmp_on_null_skb;

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
                                                       skb),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sock_ops_kern,
                                               skb));
                /* Reserve one insn to test skb == NULL */
                jmp_on_null_skb = insn++;
                insn = bpf_convert_shinfo_access(si->dst_reg, si->dst_reg, insn);
                *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
                                      bpf_target_off(struct skb_shared_info,
                                                     hwtstamps, 8,
                                                     target_size));
                *jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0,
                                               insn - jmp_on_null_skb - 1);
                break;
        }
        }
        return insn - insn_buf;
}

/* data_end = skb->data + skb_headlen() */
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
                                                    struct bpf_insn *insn)
{
        int reg;
        int temp_reg_off = offsetof(struct sk_buff, cb) +
                           offsetof(struct sk_skb_cb, temp_reg);

        if (si->src_reg == si->dst_reg) {
                /* We need an extra register, choose and save a register. */
                reg = BPF_REG_9;
                if (si->src_reg == reg || si->dst_reg == reg)
                        reg--;
                if (si->src_reg == reg || si->dst_reg == reg)
                        reg--;
                *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
        } else {
                reg = si->dst_reg;
        }

        /* reg = skb->data */
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
                              reg, si->src_reg,
                              offsetof(struct sk_buff, data));
        /* AX = skb->len */
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
                              BPF_REG_AX, si->src_reg,
                              offsetof(struct sk_buff, len));
        /* reg = skb->data + skb->len */
        *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
        /* AX = skb->data_len */
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
                              BPF_REG_AX, si->src_reg,
                              offsetof(struct sk_buff, data_len));

        /* reg = skb->data + skb->len - skb->data_len */
        *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);

        if (si->src_reg == si->dst_reg) {
                /* Restore the saved register */
                *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
                *insn++ = BPF_MOV64_REG(si->dst_reg, reg);
                *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
        }

        return insn;
}

static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
                                     const struct bpf_insn *si,
                                     struct bpf_insn *insn_buf,
                                     struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;
        int off;

        switch (si->off) {
        case offsetof(struct __sk_buff, data_end):
                insn = bpf_convert_data_end_access(si, insn);
                break;
        case offsetof(struct __sk_buff, cb[0]) ...
             offsetofend(struct __sk_buff, cb[4]) - 1:
                BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
                BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
                              offsetof(struct sk_skb_cb, data)) %
                             sizeof(__u64));

                prog->cb_access = 1;
                off  = si->off;
                off -= offsetof(struct __sk_buff, cb[0]);
                off += offsetof(struct sk_buff, cb);
                off += offsetof(struct sk_skb_cb, data);
                if (type == BPF_WRITE)
                        *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
                else
                        *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
                                              si->src_reg, off);
                break;


        default:
                return bpf_convert_ctx_access(type, si, insn_buf, prog,
                                              target_size);
        }

        return insn - insn_buf;
}

static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
                                     const struct bpf_insn *si,
                                     struct bpf_insn *insn_buf,
                                     struct bpf_prog *prog, u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;
#if IS_ENABLED(CONFIG_IPV6)
        int off;
#endif

        /* convert ctx uses the fact sg element is first in struct */
        BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);

        switch (si->off) {
        case offsetof(struct sk_msg_md, data):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, data));
                break;
        case offsetof(struct sk_msg_md, data_end):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, data_end));
                break;
        case offsetof(struct sk_msg_md, family):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                              struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_family));
                break;

        case offsetof(struct sk_msg_md, remote_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_daddr));
                break;

        case offsetof(struct sk_msg_md, local_ip4):
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_rcv_saddr) != 4);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                              struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_rcv_saddr));
                break;

        case offsetof(struct sk_msg_md, remote_ip6[0]) ...
             offsetof(struct sk_msg_md, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_daddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct sk_msg_md, remote_ip6[0]);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_daddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct sk_msg_md, local_ip6[0]) ...
             offsetof(struct sk_msg_md, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
                BUILD_BUG_ON(sizeof_field(struct sock_common,
                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);

                off = si->off;
                off -= offsetof(struct sk_msg_md, local_ip6[0]);
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common,
                                               skc_v6_rcv_saddr.s6_addr32[0]) +
                                      off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;

        case offsetof(struct sk_msg_md, remote_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_dport));
#ifndef __BIG_ENDIAN_BITFIELD
                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
#endif
                break;

        case offsetof(struct sk_msg_md, local_port):
                BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);

                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
                                                struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
                                      offsetof(struct sock_common, skc_num));
                break;

        case offsetof(struct sk_msg_md, size):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg_sg, size));
                break;

        case offsetof(struct sk_msg_md, sk):
                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
                                      si->dst_reg, si->src_reg,
                                      offsetof(struct sk_msg, sk));
                break;
        }

        return insn - insn_buf;
}

const struct bpf_verifier_ops sk_filter_verifier_ops = {
        .get_func_proto                = sk_filter_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
        .gen_ld_abs                = bpf_gen_ld_abs,
};

const struct bpf_prog_ops sk_filter_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
        .get_func_proto                = tc_cls_act_func_proto,
        .is_valid_access        = tc_cls_act_is_valid_access,
        .convert_ctx_access        = tc_cls_act_convert_ctx_access,
        .gen_prologue                = tc_cls_act_prologue,
        .gen_ld_abs                = bpf_gen_ld_abs,
        .btf_struct_access        = tc_cls_act_btf_struct_access,
};

const struct bpf_prog_ops tc_cls_act_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops xdp_verifier_ops = {
        .get_func_proto                = xdp_func_proto,
        .is_valid_access        = xdp_is_valid_access,
        .convert_ctx_access        = xdp_convert_ctx_access,
        .gen_prologue                = bpf_noop_prologue,
        .btf_struct_access        = xdp_btf_struct_access,
};

const struct bpf_prog_ops xdp_prog_ops = {
        .test_run                = bpf_prog_test_run_xdp,
};

const struct bpf_verifier_ops cg_skb_verifier_ops = {
        .get_func_proto                = cg_skb_func_proto,
        .is_valid_access        = cg_skb_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
};

const struct bpf_prog_ops cg_skb_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops lwt_in_verifier_ops = {
        .get_func_proto                = lwt_in_func_proto,
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
};

const struct bpf_prog_ops lwt_in_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops lwt_out_verifier_ops = {
        .get_func_proto                = lwt_out_func_proto,
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
};

const struct bpf_prog_ops lwt_out_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
        .get_func_proto                = lwt_xmit_func_proto,
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
        .gen_prologue                = tc_cls_act_prologue,
};

const struct bpf_prog_ops lwt_xmit_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
        .get_func_proto                = lwt_seg6local_func_proto,
        .is_valid_access        = lwt_is_valid_access,
        .convert_ctx_access        = bpf_convert_ctx_access,
};

const struct bpf_prog_ops lwt_seg6local_prog_ops = {
        .test_run                = bpf_prog_test_run_skb,
};

const struct bpf_verifier_ops cg_sock_verifier_ops = {
        .get_func_proto                = sock_filter_func_proto,
        .is_valid_access        = sock_filter_is_valid_access,
        .convert_ctx_access        = bpf_sock_convert_ctx_access,
};

const struct bpf_prog_ops cg_sock_prog_ops = {
};

const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
        .get_func_proto                = sock_addr_func_proto,
        .is_valid_access        = sock_addr_is_valid_access,
        .convert_ctx_access        = sock_addr_convert_ctx_access,
};

const struct bpf_prog_ops cg_sock_addr_prog_ops = {
};

const struct bpf_verifier_ops sock_ops_verifier_ops = {
        .get_func_proto                = sock_ops_func_proto,
        .is_valid_access        = sock_ops_is_valid_access,
        .convert_ctx_access        = sock_ops_convert_ctx_access,
};

const struct bpf_prog_ops sock_ops_prog_ops = {
};

const struct bpf_verifier_ops sk_skb_verifier_ops = {
        .get_func_proto                = sk_skb_func_proto,
        .is_valid_access        = sk_skb_is_valid_access,
        .convert_ctx_access        = sk_skb_convert_ctx_access,
        .gen_prologue                = sk_skb_prologue,
};

const struct bpf_prog_ops sk_skb_prog_ops = {
};

const struct bpf_verifier_ops sk_msg_verifier_ops = {
        .get_func_proto                = sk_msg_func_proto,
        .is_valid_access        = sk_msg_is_valid_access,
        .convert_ctx_access        = sk_msg_convert_ctx_access,
        .gen_prologue                = bpf_noop_prologue,
};

const struct bpf_prog_ops sk_msg_prog_ops = {
};

const struct bpf_verifier_ops flow_dissector_verifier_ops = {
        .get_func_proto                = flow_dissector_func_proto,
        .is_valid_access        = flow_dissector_is_valid_access,
        .convert_ctx_access        = flow_dissector_convert_ctx_access,
};

const struct bpf_prog_ops flow_dissector_prog_ops = {
        .test_run                = bpf_prog_test_run_flow_dissector,
};

int sk_detach_filter(struct sock *sk)
{
        int ret = -ENOENT;
        struct sk_filter *filter;

        if (sock_flag(sk, SOCK_FILTER_LOCKED))
                return -EPERM;

        filter = rcu_dereference_protected(sk->sk_filter,
                                           lockdep_sock_is_held(sk));
        if (filter) {
                RCU_INIT_POINTER(sk->sk_filter, NULL);
                sk_filter_uncharge(sk, filter);
                ret = 0;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);

int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
{
        struct sock_fprog_kern *fprog;
        struct sk_filter *filter;
        int ret = 0;

        sockopt_lock_sock(sk);
        filter = rcu_dereference_protected(sk->sk_filter,
                                           lockdep_sock_is_held(sk));
        if (!filter)
                goto out;

        /* We're copying the filter that has been originally attached,
         * so no conversion/decode needed anymore. eBPF programs that
         * have no original program cannot be dumped through this.
         */
        ret = -EACCES;
        fprog = filter->prog->orig_prog;
        if (!fprog)
                goto out;

        ret = fprog->len;
        if (!len)
                /* User space only enquires number of filter blocks. */
                goto out;

        ret = -EINVAL;
        if (len < fprog->len)
                goto out;

        ret = -EFAULT;
        if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
                goto out;

        /* Instead of bytes, the API requests to return the number
         * of filter blocks.
         */
        ret = fprog->len;
out:
        sockopt_release_sock(sk);
        return ret;
}

#ifdef CONFIG_INET
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
                                    struct sock_reuseport *reuse,
                                    struct sock *sk, struct sk_buff *skb,
                                    struct sock *migrating_sk,
                                    u32 hash)
{
        reuse_kern->skb = skb;
        reuse_kern->sk = sk;
        reuse_kern->selected_sk = NULL;
        reuse_kern->migrating_sk = migrating_sk;
        reuse_kern->data_end = skb->data + skb_headlen(skb);
        reuse_kern->hash = hash;
        reuse_kern->reuseport_id = reuse->reuseport_id;
        reuse_kern->bind_inany = reuse->bind_inany;
}

struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                  struct bpf_prog *prog, struct sk_buff *skb,
                                  struct sock *migrating_sk,
                                  u32 hash)
{
        struct sk_reuseport_kern reuse_kern;
        enum sk_action action;

        bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
        action = bpf_prog_run(prog, &reuse_kern);

        if (action == SK_PASS)
                return reuse_kern.selected_sk;
        else
                return ERR_PTR(-ECONNREFUSED);
}

BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
           struct bpf_map *, map, void *, key, u32, flags)
{
        bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
        struct sock_reuseport *reuse;
        struct sock *selected_sk;

        selected_sk = map->ops->map_lookup_elem(map, key);
        if (!selected_sk)
                return -ENOENT;

        reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
        if (!reuse) {
                /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
                if (sk_is_refcounted(selected_sk))
                        sock_put(selected_sk);

                /* reuseport_array has only sk with non NULL sk_reuseport_cb.
                 * The only (!reuse) case here is - the sk has already been
                 * unhashed (e.g. by close()), so treat it as -ENOENT.
                 *
                 * Other maps (e.g. sock_map) do not provide this guarantee and
                 * the sk may never be in the reuseport group to begin with.
                 */
                return is_sockarray ? -ENOENT : -EINVAL;
        }

        if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
                struct sock *sk = reuse_kern->sk;

                if (sk->sk_protocol != selected_sk->sk_protocol)
                        return -EPROTOTYPE;
                else if (sk->sk_family != selected_sk->sk_family)
                        return -EAFNOSUPPORT;

                /* Catch all. Likely bound to a different sockaddr. */
                return -EBADFD;
        }

        reuse_kern->selected_sk = selected_sk;

        return 0;
}

static const struct bpf_func_proto sk_select_reuseport_proto = {
        .func           = sk_select_reuseport,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_PTR_TO_MAP_KEY,
        .arg4_type        = ARG_ANYTHING,
};

BPF_CALL_4(sk_reuseport_load_bytes,
           const struct sk_reuseport_kern *, reuse_kern, u32, offset,
           void *, to, u32, len)
{
        return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
}

static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
        .func                = sk_reuseport_load_bytes,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
};

BPF_CALL_5(sk_reuseport_load_bytes_relative,
           const struct sk_reuseport_kern *, reuse_kern, u32, offset,
           void *, to, u32, len, u32, start_header)
{
        return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
                                               len, start_header);
}

static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
        .func                = sk_reuseport_load_bytes_relative,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_ANYTHING,
        .arg3_type        = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type        = ARG_CONST_SIZE,
        .arg5_type        = ARG_ANYTHING,
};

static const struct bpf_func_proto *
sk_reuseport_func_proto(enum bpf_func_id func_id,
                        const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_sk_select_reuseport:
                return &sk_select_reuseport_proto;
        case BPF_FUNC_skb_load_bytes:
                return &sk_reuseport_load_bytes_proto;
        case BPF_FUNC_skb_load_bytes_relative:
                return &sk_reuseport_load_bytes_relative_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_ptr_cookie_proto;
        case BPF_FUNC_ktime_get_coarse_ns:
                return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id, prog);
        }
}

static bool
sk_reuseport_is_valid_access(int off, int size,
                             enum bpf_access_type type,
                             const struct bpf_prog *prog,
                             struct bpf_insn_access_aux *info)
{
        const u32 size_default = sizeof(__u32);

        if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
            off % size || type != BPF_READ)
                return false;

        switch (off) {
        case offsetof(struct sk_reuseport_md, data):
                info->reg_type = PTR_TO_PACKET;
                return size == sizeof(__u64);

        case offsetof(struct sk_reuseport_md, data_end):
                info->reg_type = PTR_TO_PACKET_END;
                return size == sizeof(__u64);

        case offsetof(struct sk_reuseport_md, hash):
                return size == size_default;

        case offsetof(struct sk_reuseport_md, sk):
                info->reg_type = PTR_TO_SOCKET;
                return size == sizeof(__u64);

        case offsetof(struct sk_reuseport_md, migrating_sk):
                info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
                return size == sizeof(__u64);

        /* Fields that allow narrowing */
        case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
                if (size < sizeof_field(struct sk_buff, protocol))
                        return false;
                fallthrough;
        case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
        case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
        case bpf_ctx_range(struct sk_reuseport_md, len):
                bpf_ctx_record_field_size(info, size_default);
                return bpf_ctx_narrow_access_ok(off, size, size_default);

        default:
                return false;
        }
}

#define SK_REUSEPORT_LOAD_FIELD(F) ({                                        \
        *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
                              si->dst_reg, si->src_reg,                        \
                              bpf_target_off(struct sk_reuseport_kern, F, \
                                             sizeof_field(struct sk_reuseport_kern, F), \
                                             target_size));                \
        })

#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)                                \
        SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,                \
                                    struct sk_buff,                        \
                                    skb,                                \
                                    SKB_FIELD)

#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD)                                \
        SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,                \
                                    struct sock,                        \
                                    sk,                                        \
                                    SK_FIELD)

static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
                                           const struct bpf_insn *si,
                                           struct bpf_insn *insn_buf,
                                           struct bpf_prog *prog,
                                           u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct sk_reuseport_md, data):
                SK_REUSEPORT_LOAD_SKB_FIELD(data);
                break;

        case offsetof(struct sk_reuseport_md, len):
                SK_REUSEPORT_LOAD_SKB_FIELD(len);
                break;

        case offsetof(struct sk_reuseport_md, eth_protocol):
                SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
                break;

        case offsetof(struct sk_reuseport_md, ip_protocol):
                SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
                break;

        case offsetof(struct sk_reuseport_md, data_end):
                SK_REUSEPORT_LOAD_FIELD(data_end);
                break;

        case offsetof(struct sk_reuseport_md, hash):
                SK_REUSEPORT_LOAD_FIELD(hash);
                break;

        case offsetof(struct sk_reuseport_md, bind_inany):
                SK_REUSEPORT_LOAD_FIELD(bind_inany);
                break;

        case offsetof(struct sk_reuseport_md, sk):
                SK_REUSEPORT_LOAD_FIELD(sk);
                break;

        case offsetof(struct sk_reuseport_md, migrating_sk):
                SK_REUSEPORT_LOAD_FIELD(migrating_sk);
                break;
        }

        return insn - insn_buf;
}

const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
        .get_func_proto                = sk_reuseport_func_proto,
        .is_valid_access        = sk_reuseport_is_valid_access,
        .convert_ctx_access        = sk_reuseport_convert_ctx_access,
};

const struct bpf_prog_ops sk_reuseport_prog_ops = {
};

DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
EXPORT_SYMBOL(bpf_sk_lookup_enabled);

BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
           struct sock *, sk, u64, flags)
{
        if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
                               BPF_SK_LOOKUP_F_NO_REUSEPORT)))
                return -EINVAL;
        if (unlikely(sk && sk_is_refcounted(sk)))
                return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
        if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
                return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
        if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
                return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */

        /* Check if socket is suitable for packet L3/L4 protocol */
        if (sk && sk->sk_protocol != ctx->protocol)
                return -EPROTOTYPE;
        if (sk && sk->sk_family != ctx->family &&
            (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
                return -EAFNOSUPPORT;

        if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
                return -EEXIST;

        /* Select socket as lookup result */
        ctx->selected_sk = sk;
        ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
        return 0;
}

static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
        .func                = bpf_sk_lookup_assign,
        .gpl_only        = false,
        .ret_type        = RET_INTEGER,
        .arg1_type        = ARG_PTR_TO_CTX,
        .arg2_type        = ARG_PTR_TO_SOCKET_OR_NULL,
        .arg3_type        = ARG_ANYTHING,
};

static const struct bpf_func_proto *
sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_perf_event_output:
                return &bpf_event_output_data_proto;
        case BPF_FUNC_sk_assign:
                return &bpf_sk_lookup_assign_proto;
        case BPF_FUNC_sk_release:
                return &bpf_sk_release_proto;
        default:
                return bpf_sk_base_func_proto(func_id, prog);
        }
}

static bool sk_lookup_is_valid_access(int off, int size,
                                      enum bpf_access_type type,
                                      const struct bpf_prog *prog,
                                      struct bpf_insn_access_aux *info)
{
        if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
                return false;
        if (off % size != 0)
                return false;
        if (type != BPF_READ)
                return false;

        switch (off) {
        case offsetof(struct bpf_sk_lookup, sk):
                info->reg_type = PTR_TO_SOCKET_OR_NULL;
                return size == sizeof(__u64);

        case bpf_ctx_range(struct bpf_sk_lookup, family):
        case bpf_ctx_range(struct bpf_sk_lookup, protocol):
        case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
        case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
        case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
        case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
        case bpf_ctx_range(struct bpf_sk_lookup, local_port):
        case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
                bpf_ctx_record_field_size(info, sizeof(__u32));
                return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));

        case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
                /* Allow 4-byte access to 2-byte field for backward compatibility */
                if (size == sizeof(__u32))
                        return true;
                bpf_ctx_record_field_size(info, sizeof(__be16));
                return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16));

        case offsetofend(struct bpf_sk_lookup, remote_port) ...
             offsetof(struct bpf_sk_lookup, local_ip4) - 1:
                /* Allow access to zero padding for backward compatibility */
                bpf_ctx_record_field_size(info, sizeof(__u16));
                return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));

        default:
                return false;
        }
}

static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
                                        const struct bpf_insn *si,
                                        struct bpf_insn *insn_buf,
                                        struct bpf_prog *prog,
                                        u32 *target_size)
{
        struct bpf_insn *insn = insn_buf;

        switch (si->off) {
        case offsetof(struct bpf_sk_lookup, sk):
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sk_lookup_kern, selected_sk));
                break;

        case offsetof(struct bpf_sk_lookup, family):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     family, 2, target_size));
                break;

        case offsetof(struct bpf_sk_lookup, protocol):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     protocol, 2, target_size));
                break;

        case offsetof(struct bpf_sk_lookup, remote_ip4):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     v4.saddr, 4, target_size));
                break;

        case offsetof(struct bpf_sk_lookup, local_ip4):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     v4.daddr, 4, target_size));
                break;

        case bpf_ctx_range_till(struct bpf_sk_lookup,
                                remote_ip6[0], remote_ip6[3]): {
#if IS_ENABLED(CONFIG_IPV6)
                int off = si->off;

                off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
                off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sk_lookup_kern, v6.saddr));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;
        }
        case bpf_ctx_range_till(struct bpf_sk_lookup,
                                local_ip6[0], local_ip6[3]): {
#if IS_ENABLED(CONFIG_IPV6)
                int off = si->off;

                off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
                off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
                                      offsetof(struct bpf_sk_lookup_kern, v6.daddr));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
#else
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
                break;
        }
        case offsetof(struct bpf_sk_lookup, remote_port):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     sport, 2, target_size));
                break;

        case offsetofend(struct bpf_sk_lookup, remote_port):
                *target_size = 2;
                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
                break;

        case offsetof(struct bpf_sk_lookup, local_port):
                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     dport, 2, target_size));
                break;

        case offsetof(struct bpf_sk_lookup, ingress_ifindex):
                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
                                      bpf_target_off(struct bpf_sk_lookup_kern,
                                                     ingress_ifindex, 4, target_size));
                break;
        }

        return insn - insn_buf;
}

const struct bpf_prog_ops sk_lookup_prog_ops = {
        .test_run = bpf_prog_test_run_sk_lookup,
};

const struct bpf_verifier_ops sk_lookup_verifier_ops = {
        .get_func_proto                = sk_lookup_func_proto,
        .is_valid_access        = sk_lookup_is_valid_access,
        .convert_ctx_access        = sk_lookup_convert_ctx_access,
};

#endif /* CONFIG_INET */

DEFINE_BPF_DISPATCHER(xdp)

void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
        bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}

BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE)
#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
BTF_SOCK_TYPE_xxx
#undef BTF_SOCK_TYPE

BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
{
        /* tcp6_sock type is not generated in dwarf and hence btf,
         * trigger an explicit type generation here.
         */
        BTF_TYPE_EMIT(struct tcp6_sock);
        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
            sk->sk_family == AF_INET6)
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
        .func                        = bpf_skc_to_tcp6_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};

BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
{
        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
        .func                        = bpf_skc_to_tcp_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};

BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
{
        /* BTF types for tcp_timewait_sock and inet_timewait_sock are not
         * generated if CONFIG_INET=n. Trigger an explicit generation here.
         */
        BTF_TYPE_EMIT(struct inet_timewait_sock);
        BTF_TYPE_EMIT(struct tcp_timewait_sock);

#ifdef CONFIG_INET
        if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
                return (unsigned long)sk;
#endif

#if IS_BUILTIN(CONFIG_IPV6)
        if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
                return (unsigned long)sk;
#endif

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
        .func                        = bpf_skc_to_tcp_timewait_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};

BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
{
#ifdef CONFIG_INET
        if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
                return (unsigned long)sk;
#endif

#if IS_BUILTIN(CONFIG_IPV6)
        if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
                return (unsigned long)sk;
#endif

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
        .func                        = bpf_skc_to_tcp_request_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};

BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
{
        /* udp6_sock type is not generated in dwarf and hence btf,
         * trigger an explicit type generation here.
         */
        BTF_TYPE_EMIT(struct udp6_sock);
        if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
            sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
        .func                        = bpf_skc_to_udp6_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};

BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
{
        /* unix_sock type is not generated in dwarf and hence btf,
         * trigger an explicit type generation here.
         */
        BTF_TYPE_EMIT(struct unix_sock);
        if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
                return (unsigned long)sk;

        return (unsigned long)NULL;
}

const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
        .func                        = bpf_skc_to_unix_sock,
        .gpl_only                = false,
        .ret_type                = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type                = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
        .ret_btf_id                = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
};

BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
{
        BTF_TYPE_EMIT(struct mptcp_sock);
        return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
}

const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
        .func                = bpf_skc_to_mptcp_sock,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_BTF_ID_OR_NULL,
        .arg1_type        = ARG_PTR_TO_SOCK_COMMON,
        .ret_btf_id        = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
};

BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
        return (unsigned long)sock_from_file(file);
}

BTF_ID_LIST(bpf_sock_from_file_btf_ids)
BTF_ID(struct, socket)
BTF_ID(struct, file)

const struct bpf_func_proto bpf_sock_from_file_proto = {
        .func                = bpf_sock_from_file,
        .gpl_only        = false,
        .ret_type        = RET_PTR_TO_BTF_ID_OR_NULL,
        .ret_btf_id        = &bpf_sock_from_file_btf_ids[0],
        .arg1_type        = ARG_PTR_TO_BTF_ID,
        .arg1_btf_id        = &bpf_sock_from_file_btf_ids[1],
};

static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        const struct bpf_func_proto *func;

        switch (func_id) {
        case BPF_FUNC_skc_to_tcp6_sock:
                func = &bpf_skc_to_tcp6_sock_proto;
                break;
        case BPF_FUNC_skc_to_tcp_sock:
                func = &bpf_skc_to_tcp_sock_proto;
                break;
        case BPF_FUNC_skc_to_tcp_timewait_sock:
                func = &bpf_skc_to_tcp_timewait_sock_proto;
                break;
        case BPF_FUNC_skc_to_tcp_request_sock:
                func = &bpf_skc_to_tcp_request_sock_proto;
                break;
        case BPF_FUNC_skc_to_udp6_sock:
                func = &bpf_skc_to_udp6_sock_proto;
                break;
        case BPF_FUNC_skc_to_unix_sock:
                func = &bpf_skc_to_unix_sock_proto;
                break;
        case BPF_FUNC_skc_to_mptcp_sock:
                func = &bpf_skc_to_mptcp_sock_proto;
                break;
        case BPF_FUNC_ktime_get_coarse_ns:
                return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id, prog);
        }

        if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
                return NULL;

        return func;
}

__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
                                    struct bpf_dynptr_kern *ptr__uninit)
{
        if (flags) {
                bpf_dynptr_set_null(ptr__uninit);
                return -EINVAL;
        }

        bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);

        return 0;
}

__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
                                    struct bpf_dynptr_kern *ptr__uninit)
{
        if (flags) {
                bpf_dynptr_set_null(ptr__uninit);
                return -EINVAL;
        }

        bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));

        return 0;
}

__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
                                           const u8 *sun_path, u32 sun_path__sz)
{
        struct sockaddr_un *un;

        if (sa_kern->sk->sk_family != AF_UNIX)
                return -EINVAL;

        /* We do not allow changing the address to unnamed or larger than the
         * maximum allowed address size for a unix sockaddr.
         */
        if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX)
                return -EINVAL;

        un = (struct sockaddr_un *)sa_kern->uaddr;
        memcpy(un->sun_path, sun_path, sun_path__sz);
        sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;

        return 0;
}

__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
                                        struct bpf_tcp_req_attrs *attrs, int attrs__sz)
{
#if IS_ENABLED(CONFIG_SYN_COOKIES)
        const struct request_sock_ops *ops;
        struct inet_request_sock *ireq;
        struct tcp_request_sock *treq;
        struct request_sock *req;
        struct net *net;
        __u16 min_mss;
        u32 tsoff = 0;

        if (attrs__sz != sizeof(*attrs) ||
            attrs->reserved[0] || attrs->reserved[1] || attrs->reserved[2])
                return -EINVAL;

        if (!skb_at_tc_ingress(skb))
                return -EINVAL;

        net = dev_net(skb->dev);
        if (net != sock_net(sk))
                return -ENETUNREACH;

        switch (skb->protocol) {
        case htons(ETH_P_IP):
                ops = &tcp_request_sock_ops;
                min_mss = 536;
                break;
#if IS_BUILTIN(CONFIG_IPV6)
        case htons(ETH_P_IPV6):
                ops = &tcp6_request_sock_ops;
                min_mss = IPV6_MIN_MTU - 60;
                break;
#endif
        default:
                return -EINVAL;
        }

        if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN ||
            sk_is_mptcp(sk))
                return -EINVAL;

        if (attrs->mss < min_mss)
                return -EINVAL;

        if (attrs->wscale_ok) {
                if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling))
                        return -EINVAL;

                if (attrs->snd_wscale > TCP_MAX_WSCALE ||
                    attrs->rcv_wscale > TCP_MAX_WSCALE)
                        return -EINVAL;
        }

        if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
                return -EINVAL;

        if (attrs->tstamp_ok) {
                if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
                        return -EINVAL;

                tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(attrs->usec_ts_ok, tcp_clock_ns());
        }

        req = inet_reqsk_alloc(ops, sk, false);
        if (!req)
                return -ENOMEM;

        ireq = inet_rsk(req);
        treq = tcp_rsk(req);

        req->rsk_listener = sk;
        req->syncookie = 1;
        req->mss = attrs->mss;
        req->ts_recent = attrs->rcv_tsval;

        ireq->snd_wscale = attrs->snd_wscale;
        ireq->rcv_wscale = attrs->rcv_wscale;
        ireq->tstamp_ok        = !!attrs->tstamp_ok;
        ireq->sack_ok = !!attrs->sack_ok;
        ireq->wscale_ok = !!attrs->wscale_ok;
        ireq->ecn_ok = !!attrs->ecn_ok;

        treq->req_usec_ts = !!attrs->usec_ts_ok;
        treq->ts_off = tsoff;

        skb_orphan(skb);
        skb->sk = req_to_sk(req);
        skb->destructor = sock_pfree;

        return 0;
#else
        return -EOPNOTSUPP;
#endif
}

__bpf_kfunc_end_defs();

int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
                               struct bpf_dynptr_kern *ptr__uninit)
{
        int err;

        err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
        if (err)
                return err;

        bpf_dynptr_set_rdonly(ptr__uninit);

        return 0;
}

BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)

BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)

BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr)

BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk)
BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk)

static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
        .owner = THIS_MODULE,
        .set = &bpf_kfunc_check_set_skb,
};

static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
        .owner = THIS_MODULE,
        .set = &bpf_kfunc_check_set_xdp,
};

static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
        .owner = THIS_MODULE,
        .set = &bpf_kfunc_check_set_sock_addr,
};

static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
        .owner = THIS_MODULE,
        .set = &bpf_kfunc_check_set_tcp_reqsk,
};

static int __init bpf_kfunc_init(void)
{
        int ret;

        ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
                                               &bpf_kfunc_set_sock_addr);
        return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
}
late_initcall(bpf_kfunc_init);

__bpf_kfunc_start_defs();

/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
 *
 * The function expects a non-NULL pointer to a socket, and invokes the
 * protocol specific socket destroy handlers.
 *
 * The helper can only be called from BPF contexts that have acquired the socket
 * locks.
 *
 * Parameters:
 * @sock: Pointer to socket to be destroyed
 *
 * Return:
 * On error, may return EPROTONOSUPPORT, EINVAL.
 * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
 * 0 otherwise
 */
__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
{
        struct sock *sk = (struct sock *)sock;

        /* The locking semantics that allow for synchronous execution of the
         * destroy handlers are only supported for TCP and UDP.
         * Supporting protocols will need to acquire sock lock in the BPF context
         * prior to invoking this kfunc.
         */
        if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
                                           sk->sk_protocol != IPPROTO_UDP))
                return -EOPNOTSUPP;

        return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
}

__bpf_kfunc_end_defs();

BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids)

static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
        if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
            prog->expected_attach_type != BPF_TRACE_ITER)
                return -EACCES;
        return 0;
}

static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
        .owner = THIS_MODULE,
        .set   = &bpf_sk_iter_kfunc_ids,
        .filter = tracing_iter_filter,
};

static int init_subsystem(void)
{
        return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
}
late_initcall(init_subsystem);






























































































































































































































































































































































   55 




   55 





   24 

   12 
   24 















   55 



   17 





   18 




   17 
   18 



   46 

   45 





   55 



   55 

   55 




   55 


   12 














   44 






   44 















   18 





   18 

















































































































   36 

   33 




















   60 







   16 








   32 








































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/char_dev.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/slab.h>
#include <linux/string.h>

#include <linux/major.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/seq_file.h>

#include <linux/kobject.h>
#include <linux/kobj_map.h>
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/tty.h>

#include "internal.h"

static struct kobj_map *cdev_map __ro_after_init;

static DEFINE_MUTEX(chrdevs_lock);

#define CHRDEV_MAJOR_HASH_SIZE 255

static struct char_device_struct {
        struct char_device_struct *next;
        unsigned int major;
        unsigned int baseminor;
        int minorct;
        char name[64];
        struct cdev *cdev;                /* will die */
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];

/* index in the above */
static inline int major_to_index(unsigned major)
{
        return major % CHRDEV_MAJOR_HASH_SIZE;
}

#ifdef CONFIG_PROC_FS

void chrdev_show(struct seq_file *f, off_t offset)
{
        struct char_device_struct *cd;

        mutex_lock(&chrdevs_lock);
        for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) {
                if (cd->major == offset)
                        seq_printf(f, "%3d %s\n", cd->major, cd->name);
        }
        mutex_unlock(&chrdevs_lock);
}

#endif /* CONFIG_PROC_FS */

static int find_dynamic_major(void)
{
        int i;
        struct char_device_struct *cd;

        for (i = ARRAY_SIZE(chrdevs)-1; i >= CHRDEV_MAJOR_DYN_END; i--) {
                if (chrdevs[i] == NULL)
                        return i;
        }

        for (i = CHRDEV_MAJOR_DYN_EXT_START;
             i >= CHRDEV_MAJOR_DYN_EXT_END; i--) {
                for (cd = chrdevs[major_to_index(i)]; cd; cd = cd->next)
                        if (cd->major == i)
                                break;

                if (cd == NULL)
                        return i;
        }

        return -EBUSY;
}

/*
 * Register a single major with a specified minor range.
 *
 * If major == 0 this function will dynamically allocate an unused major.
 * If major > 0 this function will attempt to reserve the range of minors
 * with given major.
 *
 */
static struct char_device_struct *
__register_chrdev_region(unsigned int major, unsigned int baseminor,
                           int minorct, const char *name)
{
        struct char_device_struct *cd, *curr, *prev = NULL;
        int ret;
        int i;

        if (major >= CHRDEV_MAJOR_MAX) {
                pr_err("CHRDEV \"%s\" major requested (%u) is greater than the maximum (%u)\n",
                       name, major, CHRDEV_MAJOR_MAX-1);
                return ERR_PTR(-EINVAL);
        }

        if (minorct > MINORMASK + 1 - baseminor) {
                pr_err("CHRDEV \"%s\" minor range requested (%u-%u) is out of range of maximum range (%u-%u) for a single major\n",
                        name, baseminor, baseminor + minorct - 1, 0, MINORMASK);
                return ERR_PTR(-EINVAL);
        }

        cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL);
        if (cd == NULL)
                return ERR_PTR(-ENOMEM);

        mutex_lock(&chrdevs_lock);

        if (major == 0) {
                ret = find_dynamic_major();
                if (ret < 0) {
                        pr_err("CHRDEV \"%s\" dynamic allocation region is full\n",
                               name);
                        goto out;
                }
                major = ret;
        }

        ret = -EBUSY;
        i = major_to_index(major);
        for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) {
                if (curr->major < major)
                        continue;

                if (curr->major > major)
                        break;

                if (curr->baseminor + curr->minorct <= baseminor)
                        continue;

                if (curr->baseminor >= baseminor + minorct)
                        break;

                goto out;
        }

        cd->major = major;
        cd->baseminor = baseminor;
        cd->minorct = minorct;
        strscpy(cd->name, name, sizeof(cd->name));

        if (!prev) {
                cd->next = curr;
                chrdevs[i] = cd;
        } else {
                cd->next = prev->next;
                prev->next = cd;
        }

        mutex_unlock(&chrdevs_lock);
        return cd;
out:
        mutex_unlock(&chrdevs_lock);
        kfree(cd);
        return ERR_PTR(ret);
}

static struct char_device_struct *
__unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
{
        struct char_device_struct *cd = NULL, **cp;
        int i = major_to_index(major);

        mutex_lock(&chrdevs_lock);
        for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
                if ((*cp)->major == major &&
                    (*cp)->baseminor == baseminor &&
                    (*cp)->minorct == minorct)
                        break;
        if (*cp) {
                cd = *cp;
                *cp = cd->next;
        }
        mutex_unlock(&chrdevs_lock);
        return cd;
}

/**
 * register_chrdev_region() - register a range of device numbers
 * @from: the first in the desired range of device numbers; must include
 *        the major number.
 * @count: the number of consecutive device numbers required
 * @name: the name of the device or driver.
 *
 * Return value is zero on success, a negative error code on failure.
 */
int register_chrdev_region(dev_t from, unsigned count, const char *name)
{
        struct char_device_struct *cd;
        dev_t to = from + count;
        dev_t n, next;

        for (n = from; n < to; n = next) {
                next = MKDEV(MAJOR(n)+1, 0);
                if (next > to)
                        next = to;
                cd = __register_chrdev_region(MAJOR(n), MINOR(n),
                               next - n, name);
                if (IS_ERR(cd))
                        goto fail;
        }
        return 0;
fail:
        to = n;
        for (n = from; n < to; n = next) {
                next = MKDEV(MAJOR(n)+1, 0);
                kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
        }
        return PTR_ERR(cd);
}

/**
 * alloc_chrdev_region() - register a range of char device numbers
 * @dev: output parameter for first assigned number
 * @baseminor: first of the requested range of minor numbers
 * @count: the number of minor numbers required
 * @name: the name of the associated device or driver
 *
 * Allocates a range of char device numbers.  The major number will be
 * chosen dynamically, and returned (along with the first minor number)
 * in @dev.  Returns zero or a negative error code.
 */
int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
                        const char *name)
{
        struct char_device_struct *cd;
        cd = __register_chrdev_region(0, baseminor, count, name);
        if (IS_ERR(cd))
                return PTR_ERR(cd);
        *dev = MKDEV(cd->major, cd->baseminor);
        return 0;
}

/**
 * __register_chrdev() - create and register a cdev occupying a range of minors
 * @major: major device number or 0 for dynamic allocation
 * @baseminor: first of the requested range of minor numbers
 * @count: the number of minor numbers required
 * @name: name of this range of devices
 * @fops: file operations associated with this devices
 *
 * If @major == 0 this functions will dynamically allocate a major and return
 * its number.
 *
 * If @major > 0 this function will attempt to reserve a device with the given
 * major number and will return zero on success.
 *
 * Returns a -ve errno on failure.
 *
 * The name of this device has nothing to do with the name of the device in
 * /dev. It only helps to keep track of the different owners of devices. If
 * your module name has only one type of devices it's ok to use e.g. the name
 * of the module here.
 */
int __register_chrdev(unsigned int major, unsigned int baseminor,
                      unsigned int count, const char *name,
                      const struct file_operations *fops)
{
        struct char_device_struct *cd;
        struct cdev *cdev;
        int err = -ENOMEM;

        cd = __register_chrdev_region(major, baseminor, count, name);
        if (IS_ERR(cd))
                return PTR_ERR(cd);

        cdev = cdev_alloc();
        if (!cdev)
                goto out2;

        cdev->owner = fops->owner;
        cdev->ops = fops;
        kobject_set_name(&cdev->kobj, "%s", name);

        err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
        if (err)
                goto out;

        cd->cdev = cdev;

        return major ? 0 : cd->major;
out:
        kobject_put(&cdev->kobj);
out2:
        kfree(__unregister_chrdev_region(cd->major, baseminor, count));
        return err;
}

/**
 * unregister_chrdev_region() - unregister a range of device numbers
 * @from: the first in the range of numbers to unregister
 * @count: the number of device numbers to unregister
 *
 * This function will unregister a range of @count device numbers,
 * starting with @from.  The caller should normally be the one who
 * allocated those numbers in the first place...
 */
void unregister_chrdev_region(dev_t from, unsigned count)
{
        dev_t to = from + count;
        dev_t n, next;

        for (n = from; n < to; n = next) {
                next = MKDEV(MAJOR(n)+1, 0);
                if (next > to)
                        next = to;
                kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n));
        }
}

/**
 * __unregister_chrdev - unregister and destroy a cdev
 * @major: major device number
 * @baseminor: first of the range of minor numbers
 * @count: the number of minor numbers this cdev is occupying
 * @name: name of this range of devices
 *
 * Unregister and destroy the cdev occupying the region described by
 * @major, @baseminor and @count.  This function undoes what
 * __register_chrdev() did.
 */
void __unregister_chrdev(unsigned int major, unsigned int baseminor,
                         unsigned int count, const char *name)
{
        struct char_device_struct *cd;

        cd = __unregister_chrdev_region(major, baseminor, count);
        if (cd && cd->cdev)
                cdev_del(cd->cdev);
        kfree(cd);
}

static DEFINE_SPINLOCK(cdev_lock);

static struct kobject *cdev_get(struct cdev *p)
{
        struct module *owner = p->owner;
        struct kobject *kobj;

        if (!try_module_get(owner))
                return NULL;
        kobj = kobject_get_unless_zero(&p->kobj);
        if (!kobj)
                module_put(owner);
        return kobj;
}

void cdev_put(struct cdev *p)
{
        if (p) {
                struct module *owner = p->owner;
                kobject_put(&p->kobj);
                module_put(owner);
        }
}

/*
 * Called every time a character special file is opened
 */
static int chrdev_open(struct inode *inode, struct file *filp)
{
        const struct file_operations *fops;
        struct cdev *p;
        struct cdev *new = NULL;
        int ret = 0;

        spin_lock(&cdev_lock);
        p = inode->i_cdev;
        if (!p) {
                struct kobject *kobj;
                int idx;
                spin_unlock(&cdev_lock);
                kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
                if (!kobj)
                        return -ENXIO;
                new = container_of(kobj, struct cdev, kobj);
                spin_lock(&cdev_lock);
                /* Check i_cdev again in case somebody beat us to it while
                   we dropped the lock. */
                p = inode->i_cdev;
                if (!p) {
                        inode->i_cdev = p = new;
                        list_add(&inode->i_devices, &p->list);
                        new = NULL;
                } else if (!cdev_get(p))
                        ret = -ENXIO;
        } else if (!cdev_get(p))
                ret = -ENXIO;
        spin_unlock(&cdev_lock);
        cdev_put(new);
        if (ret)
                return ret;

        ret = -ENXIO;
        fops = fops_get(p->ops);
        if (!fops)
                goto out_cdev_put;

        replace_fops(filp, fops);
        if (filp->f_op->open) {
                ret = filp->f_op->open(inode, filp);
                if (ret)
                        goto out_cdev_put;
        }

        return 0;

 out_cdev_put:
        cdev_put(p);
        return ret;
}

void cd_forget(struct inode *inode)
{
        spin_lock(&cdev_lock);
        list_del_init(&inode->i_devices);
        inode->i_cdev = NULL;
        inode->i_mapping = &inode->i_data;
        spin_unlock(&cdev_lock);
}

static void cdev_purge(struct cdev *cdev)
{
        spin_lock(&cdev_lock);
        while (!list_empty(&cdev->list)) {
                struct inode *inode;
                inode = container_of(cdev->list.next, struct inode, i_devices);
                list_del_init(&inode->i_devices);
                inode->i_cdev = NULL;
        }
        spin_unlock(&cdev_lock);
}

/*
 * Dummy default file-operations: the only thing this does
 * is contain the open that then fills in the correct operations
 * depending on the special file...
 */
const struct file_operations def_chr_fops = {
        .open = chrdev_open,
        .llseek = noop_llseek,
};

static struct kobject *exact_match(dev_t dev, int *part, void *data)
{
        struct cdev *p = data;
        return &p->kobj;
}

static int exact_lock(dev_t dev, void *data)
{
        struct cdev *p = data;
        return cdev_get(p) ? 0 : -1;
}

/**
 * cdev_add() - add a char device to the system
 * @p: the cdev structure for the device
 * @dev: the first device number for which this device is responsible
 * @count: the number of consecutive minor numbers corresponding to this
 *         device
 *
 * cdev_add() adds the device represented by @p to the system, making it
 * live immediately.  A negative error code is returned on failure.
 */
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
        int error;

        p->dev = dev;
        p->count = count;

        if (WARN_ON(dev == WHITEOUT_DEV)) {
                error = -EBUSY;
                goto err;
        }

        error = kobj_map(cdev_map, dev, count, NULL,
                         exact_match, exact_lock, p);
        if (error)
                goto err;

        kobject_get(p->kobj.parent);

        return 0;

err:
        kfree_const(p->kobj.name);
        p->kobj.name = NULL;
        return error;
}

/**
 * cdev_set_parent() - set the parent kobject for a char device
 * @p: the cdev structure
 * @kobj: the kobject to take a reference to
 *
 * cdev_set_parent() sets a parent kobject which will be referenced
 * appropriately so the parent is not freed before the cdev. This
 * should be called before cdev_add.
 */
void cdev_set_parent(struct cdev *p, struct kobject *kobj)
{
        WARN_ON(!kobj->state_initialized);
        p->kobj.parent = kobj;
}

/**
 * cdev_device_add() - add a char device and it's corresponding
 *        struct device, linkink
 * @dev: the device structure
 * @cdev: the cdev structure
 *
 * cdev_device_add() adds the char device represented by @cdev to the system,
 * just as cdev_add does. It then adds @dev to the system using device_add
 * The dev_t for the char device will be taken from the struct device which
 * needs to be initialized first. This helper function correctly takes a
 * reference to the parent device so the parent will not get released until
 * all references to the cdev are released.
 *
 * This helper uses dev->devt for the device number. If it is not set
 * it will not add the cdev and it will be equivalent to device_add.
 *
 * This function should be used whenever the struct cdev and the
 * struct device are members of the same structure whose lifetime is
 * managed by the struct device.
 *
 * NOTE: Callers must assume that userspace was able to open the cdev and
 * can call cdev fops callbacks at any time, even if this function fails.
 */
int cdev_device_add(struct cdev *cdev, struct device *dev)
{
        int rc = 0;

        if (dev->devt) {
                cdev_set_parent(cdev, &dev->kobj);

                rc = cdev_add(cdev, dev->devt, 1);
                if (rc)
                        return rc;
        }

        rc = device_add(dev);
        if (rc && dev->devt)
                cdev_del(cdev);

        return rc;
}

/**
 * cdev_device_del() - inverse of cdev_device_add
 * @dev: the device structure
 * @cdev: the cdev structure
 *
 * cdev_device_del() is a helper function to call cdev_del and device_del.
 * It should be used whenever cdev_device_add is used.
 *
 * If dev->devt is not set it will not remove the cdev and will be equivalent
 * to device_del.
 *
 * NOTE: This guarantees that associated sysfs callbacks are not running
 * or runnable, however any cdevs already open will remain and their fops
 * will still be callable even after this function returns.
 */
void cdev_device_del(struct cdev *cdev, struct device *dev)
{
        device_del(dev);
        if (dev->devt)
                cdev_del(cdev);
}

static void cdev_unmap(dev_t dev, unsigned count)
{
        kobj_unmap(cdev_map, dev, count);
}

/**
 * cdev_del() - remove a cdev from the system
 * @p: the cdev structure to be removed
 *
 * cdev_del() removes @p from the system, possibly freeing the structure
 * itself.
 *
 * NOTE: This guarantees that cdev device will no longer be able to be
 * opened, however any cdevs already open will remain and their fops will
 * still be callable even after cdev_del returns.
 */
void cdev_del(struct cdev *p)
{
        cdev_unmap(p->dev, p->count);
        kobject_put(&p->kobj);
}


static void cdev_default_release(struct kobject *kobj)
{
        struct cdev *p = container_of(kobj, struct cdev, kobj);
        struct kobject *parent = kobj->parent;

        cdev_purge(p);
        kobject_put(parent);
}

static void cdev_dynamic_release(struct kobject *kobj)
{
        struct cdev *p = container_of(kobj, struct cdev, kobj);
        struct kobject *parent = kobj->parent;

        cdev_purge(p);
        kfree(p);
        kobject_put(parent);
}

static struct kobj_type ktype_cdev_default = {
        .release        = cdev_default_release,
};

static struct kobj_type ktype_cdev_dynamic = {
        .release        = cdev_dynamic_release,
};

/**
 * cdev_alloc() - allocate a cdev structure
 *
 * Allocates and returns a cdev structure, or NULL on failure.
 */
struct cdev *cdev_alloc(void)
{
        struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
        if (p) {
                INIT_LIST_HEAD(&p->list);
                kobject_init(&p->kobj, &ktype_cdev_dynamic);
        }
        return p;
}

/**
 * cdev_init() - initialize a cdev structure
 * @cdev: the structure to initialize
 * @fops: the file_operations for this device
 *
 * Initializes @cdev, remembering @fops, making it ready to add to the
 * system with cdev_add().
 */
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
        memset(cdev, 0, sizeof *cdev);
        INIT_LIST_HEAD(&cdev->list);
        kobject_init(&cdev->kobj, &ktype_cdev_default);
        cdev->ops = fops;
}

static struct kobject *base_probe(dev_t dev, int *part, void *data)
{
        if (request_module("char-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
                /* Make old-style 2.4 aliases work */
                request_module("char-major-%d", MAJOR(dev));
        return NULL;
}

void __init chrdev_init(void)
{
        cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
}


/* Let modules do char dev stuff */
EXPORT_SYMBOL(register_chrdev_region);
EXPORT_SYMBOL(unregister_chrdev_region);
EXPORT_SYMBOL(alloc_chrdev_region);
EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
EXPORT_SYMBOL(cdev_set_parent);
EXPORT_SYMBOL(cdev_device_add);
EXPORT_SYMBOL(cdev_device_del);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);




















































































































































































































































































































































































































































































































































    1 

















    1 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
/*
 * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * Copyright (c) 2009-2010, Code Aurora Forum.
 * Copyright 2016 Intel Corp.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef _DRM_DRV_H_
#define _DRM_DRV_H_

#include <linux/list.h>
#include <linux/irqreturn.h>

#include <video/nomodeset.h>

#include <drm/drm_device.h>

struct drm_file;
struct drm_gem_object;
struct drm_master;
struct drm_minor;
struct dma_buf;
struct dma_buf_attachment;
struct drm_display_mode;
struct drm_mode_create_dumb;
struct drm_printer;
struct sg_table;

/**
 * enum drm_driver_feature - feature flags
 *
 * See &drm_driver.driver_features, drm_device.driver_features and
 * drm_core_check_feature().
 */
enum drm_driver_feature {
        /**
         * @DRIVER_GEM:
         *
         * Driver use the GEM memory manager. This should be set for all modern
         * drivers.
         */
        DRIVER_GEM                        = BIT(0),
        /**
         * @DRIVER_MODESET:
         *
         * Driver supports mode setting interfaces (KMS).
         */
        DRIVER_MODESET                        = BIT(1),
        /**
         * @DRIVER_RENDER:
         *
         * Driver supports dedicated render nodes. See also the :ref:`section on
         * render nodes <drm_render_node>` for details.
         */
        DRIVER_RENDER                        = BIT(3),
        /**
         * @DRIVER_ATOMIC:
         *
         * Driver supports the full atomic modesetting userspace API. Drivers
         * which only use atomic internally, but do not support the full
         * userspace API (e.g. not all properties converted to atomic, or
         * multi-plane updates are not guaranteed to be tear-free) should not
         * set this flag.
         */
        DRIVER_ATOMIC                        = BIT(4),
        /**
         * @DRIVER_SYNCOBJ:
         *
         * Driver supports &drm_syncobj for explicit synchronization of command
         * submission.
         */
        DRIVER_SYNCOBJ                  = BIT(5),
        /**
         * @DRIVER_SYNCOBJ_TIMELINE:
         *
         * Driver supports the timeline flavor of &drm_syncobj for explicit
         * synchronization of command submission.
         */
        DRIVER_SYNCOBJ_TIMELINE         = BIT(6),
        /**
         * @DRIVER_COMPUTE_ACCEL:
         *
         * Driver supports compute acceleration devices. This flag is mutually exclusive with
         * @DRIVER_RENDER and @DRIVER_MODESET. Devices that support both graphics and compute
         * acceleration should be handled by two drivers that are connected using auxiliary bus.
         */
        DRIVER_COMPUTE_ACCEL            = BIT(7),
        /**
         * @DRIVER_GEM_GPUVA:
         *
         * Driver supports user defined GPU VA bindings for GEM objects.
         */
        DRIVER_GEM_GPUVA                = BIT(8),
        /**
         * @DRIVER_CURSOR_HOTSPOT:
         *
         * Driver supports and requires cursor hotspot information in the
         * cursor plane (e.g. cursor plane has to actually track the mouse
         * cursor and the clients are required to set hotspot in order for
         * the cursor planes to work correctly).
         */
        DRIVER_CURSOR_HOTSPOT           = BIT(9),

        /* IMPORTANT: Below are all the legacy flags, add new ones above. */

        /**
         * @DRIVER_USE_AGP:
         *
         * Set up DRM AGP support, see drm_agp_init(), the DRM core will manage
         * AGP resources. New drivers don't need this.
         */
        DRIVER_USE_AGP                        = BIT(25),
        /**
         * @DRIVER_LEGACY:
         *
         * Denote a legacy driver using shadow attach. Do not use.
         */
        DRIVER_LEGACY                        = BIT(26),
        /**
         * @DRIVER_PCI_DMA:
         *
         * Driver is capable of PCI DMA, mapping of PCI DMA buffers to userspace
         * will be enabled. Only for legacy drivers. Do not use.
         */
        DRIVER_PCI_DMA                        = BIT(27),
        /**
         * @DRIVER_SG:
         *
         * Driver can perform scatter/gather DMA, allocation and mapping of
         * scatter/gather buffers will be enabled. Only for legacy drivers. Do
         * not use.
         */
        DRIVER_SG                        = BIT(28),

        /**
         * @DRIVER_HAVE_DMA:
         *
         * Driver supports DMA, the userspace DMA API will be supported. Only
         * for legacy drivers. Do not use.
         */
        DRIVER_HAVE_DMA                        = BIT(29),
        /**
         * @DRIVER_HAVE_IRQ:
         *
         * Legacy irq support. Only for legacy drivers. Do not use.
         */
        DRIVER_HAVE_IRQ                        = BIT(30),
};

/**
 * struct drm_driver - DRM driver structure
 *
 * This structure represent the common code for a family of cards. There will be
 * one &struct drm_device for each card present in this family. It contains lots
 * of vfunc entries, and a pile of those probably should be moved to more
 * appropriate places like &drm_mode_config_funcs or into a new operations
 * structure for GEM drivers.
 */
struct drm_driver {
        /**
         * @load:
         *
         * Backward-compatible driver callback to complete initialization steps
         * after the driver is registered.  For this reason, may suffer from
         * race conditions and its use is deprecated for new drivers.  It is
         * therefore only supported for existing drivers not yet converted to
         * the new scheme.  See devm_drm_dev_alloc() and drm_dev_register() for
         * proper and race-free way to set up a &struct drm_device.
         *
         * This is deprecated, do not use!
         *
         * Returns:
         *
         * Zero on success, non-zero value on failure.
         */
        int (*load) (struct drm_device *, unsigned long flags);

        /**
         * @open:
         *
         * Driver callback when a new &struct drm_file is opened. Useful for
         * setting up driver-private data structures like buffer allocators,
         * execution contexts or similar things. Such driver-private resources
         * must be released again in @postclose.
         *
         * Since the display/modeset side of DRM can only be owned by exactly
         * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
         * there should never be a need to set up any modeset related resources
         * in this callback. Doing so would be a driver design bug.
         *
         * Returns:
         *
         * 0 on success, a negative error code on failure, which will be
         * promoted to userspace as the result of the open() system call.
         */
        int (*open) (struct drm_device *, struct drm_file *);

        /**
         * @postclose:
         *
         * One of the driver callbacks when a new &struct drm_file is closed.
         * Useful for tearing down driver-private data structures allocated in
         * @open like buffer allocators, execution contexts or similar things.
         *
         * Since the display/modeset side of DRM can only be owned by exactly
         * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
         * there should never be a need to tear down any modeset related
         * resources in this callback. Doing so would be a driver design bug.
         */
        void (*postclose) (struct drm_device *, struct drm_file *);

        /**
         * @lastclose:
         *
         * Called when the last &struct drm_file has been closed and there's
         * currently no userspace client for the &struct drm_device.
         *
         * Modern drivers should only use this to force-restore the fbdev
         * framebuffer using drm_fb_helper_restore_fbdev_mode_unlocked().
         * Anything else would indicate there's something seriously wrong.
         * Modern drivers can also use this to execute delayed power switching
         * state changes, e.g. in conjunction with the :ref:`vga_switcheroo`
         * infrastructure.
         *
         * This is called after @postclose hook has been called.
         *
         * NOTE:
         *
         * All legacy drivers use this callback to de-initialize the hardware.
         * This is purely because of the shadow-attach model, where the DRM
         * kernel driver does not really own the hardware. Instead ownershipe is
         * handled with the help of userspace through an inheritedly racy dance
         * to set/unset the VT into raw mode.
         *
         * Legacy drivers initialize the hardware in the @firstopen callback,
         * which isn't even called for modern drivers.
         */
        void (*lastclose) (struct drm_device *);

        /**
         * @unload:
         *
         * Reverse the effects of the driver load callback.  Ideally,
         * the clean up performed by the driver should happen in the
         * reverse order of the initialization.  Similarly to the load
         * hook, this handler is deprecated and its usage should be
         * dropped in favor of an open-coded teardown function at the
         * driver layer.  See drm_dev_unregister() and drm_dev_put()
         * for the proper way to remove a &struct drm_device.
         *
         * The unload() hook is called right after unregistering
         * the device.
         *
         */
        void (*unload) (struct drm_device *);

        /**
         * @release:
         *
         * Optional callback for destroying device data after the final
         * reference is released, i.e. the device is being destroyed.
         *
         * This is deprecated, clean up all memory allocations associated with a
         * &drm_device using drmm_add_action(), drmm_kmalloc() and related
         * managed resources functions.
         */
        void (*release) (struct drm_device *);

        /**
         * @master_set:
         *
         * Called whenever the minor master is set. Only used by vmwgfx.
         */
        void (*master_set)(struct drm_device *dev, struct drm_file *file_priv,
                           bool from_open);
        /**
         * @master_drop:
         *
         * Called whenever the minor master is dropped. Only used by vmwgfx.
         */
        void (*master_drop)(struct drm_device *dev, struct drm_file *file_priv);

        /**
         * @debugfs_init:
         *
         * Allows drivers to create driver-specific debugfs files.
         */
        void (*debugfs_init)(struct drm_minor *minor);

        /**
         * @gem_create_object: constructor for gem objects
         *
         * Hook for allocating the GEM object struct, for use by the CMA
         * and SHMEM GEM helpers. Returns a GEM object on success, or an
         * ERR_PTR()-encoded error code otherwise.
         */
        struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
                                                    size_t size);

        /**
         * @prime_handle_to_fd:
         *
         * PRIME export function. Only used by vmwgfx.
         */
        int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
                                uint32_t handle, uint32_t flags, int *prime_fd);
        /**
         * @prime_fd_to_handle:
         *
         * PRIME import function. Only used by vmwgfx.
         */
        int (*prime_fd_to_handle)(struct drm_device *dev, struct drm_file *file_priv,
                                int prime_fd, uint32_t *handle);

        /**
         * @gem_prime_import:
         *
         * Import hook for GEM drivers.
         *
         * This defaults to drm_gem_prime_import() if not set.
         */
        struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
                                struct dma_buf *dma_buf);
        /**
         * @gem_prime_import_sg_table:
         *
         * Optional hook used by the PRIME helper functions
         * drm_gem_prime_import() respectively drm_gem_prime_import_dev().
         */
        struct drm_gem_object *(*gem_prime_import_sg_table)(
                                struct drm_device *dev,
                                struct dma_buf_attachment *attach,
                                struct sg_table *sgt);

        /**
         * @dumb_create:
         *
         * This creates a new dumb buffer in the driver's backing storage manager (GEM,
         * TTM or something else entirely) and returns the resulting buffer handle. This
         * handle can then be wrapped up into a framebuffer modeset object.
         *
         * Note that userspace is not allowed to use such objects for render
         * acceleration - drivers must create their own private ioctls for such a use
         * case.
         *
         * Width, height and depth are specified in the &drm_mode_create_dumb
         * argument. The callback needs to fill the handle, pitch and size for
         * the created buffer.
         *
         * Called by the user via ioctl.
         *
         * Returns:
         *
         * Zero on success, negative errno on failure.
         */
        int (*dumb_create)(struct drm_file *file_priv,
                           struct drm_device *dev,
                           struct drm_mode_create_dumb *args);
        /**
         * @dumb_map_offset:
         *
         * Allocate an offset in the drm device node's address space to be able to
         * memory map a dumb buffer.
         *
         * The default implementation is drm_gem_create_mmap_offset(). GEM based
         * drivers must not overwrite this.
         *
         * Called by the user via ioctl.
         *
         * Returns:
         *
         * Zero on success, negative errno on failure.
         */
        int (*dumb_map_offset)(struct drm_file *file_priv,
                               struct drm_device *dev, uint32_t handle,
                               uint64_t *offset);

        /**
         * @show_fdinfo:
         *
         * Print device specific fdinfo.  See Documentation/gpu/drm-usage-stats.rst.
         */
        void (*show_fdinfo)(struct drm_printer *p, struct drm_file *f);

        /** @major: driver major number */
        int major;
        /** @minor: driver minor number */
        int minor;
        /** @patchlevel: driver patch level */
        int patchlevel;
        /** @name: driver name */
        char *name;
        /** @desc: driver description */
        char *desc;
        /** @date: driver date */
        char *date;

        /**
         * @driver_features:
         * Driver features, see &enum drm_driver_feature. Drivers can disable
         * some features on a per-instance basis using
         * &drm_device.driver_features.
         */
        u32 driver_features;

        /**
         * @ioctls:
         *
         * Array of driver-private IOCTL description entries. See the chapter on
         * :ref:`IOCTL support in the userland interfaces
         * chapter<drm_driver_ioctl>` for the full details.
         */

        const struct drm_ioctl_desc *ioctls;
        /** @num_ioctls: Number of entries in @ioctls. */
        int num_ioctls;

        /**
         * @fops:
         *
         * File operations for the DRM device node. See the discussion in
         * :ref:`file operations<drm_driver_fops>` for in-depth coverage and
         * some examples.
         */
        const struct file_operations *fops;
};

void *__devm_drm_dev_alloc(struct device *parent,
                           const struct drm_driver *driver,
                           size_t size, size_t offset);

/**
 * devm_drm_dev_alloc - Resource managed allocation of a &drm_device instance
 * @parent: Parent device object
 * @driver: DRM driver
 * @type: the type of the struct which contains struct &drm_device
 * @member: the name of the &drm_device within @type.
 *
 * This allocates and initialize a new DRM device. No device registration is done.
 * Call drm_dev_register() to advertice the device to user space and register it
 * with other core subsystems. This should be done last in the device
 * initialization sequence to make sure userspace can't access an inconsistent
 * state.
 *
 * The initial ref-count of the object is 1. Use drm_dev_get() and
 * drm_dev_put() to take and drop further ref-counts.
 *
 * It is recommended that drivers embed &struct drm_device into their own device
 * structure.
 *
 * Note that this manages the lifetime of the resulting &drm_device
 * automatically using devres. The DRM device initialized with this function is
 * automatically put on driver detach using drm_dev_put().
 *
 * RETURNS:
 * Pointer to new DRM device, or ERR_PTR on failure.
 */
#define devm_drm_dev_alloc(parent, driver, type, member) \
        ((type *) __devm_drm_dev_alloc(parent, driver, sizeof(type), \
                                       offsetof(type, member)))

struct drm_device *drm_dev_alloc(const struct drm_driver *driver,
                                 struct device *parent);
int drm_dev_register(struct drm_device *dev, unsigned long flags);
void drm_dev_unregister(struct drm_device *dev);

void drm_dev_get(struct drm_device *dev);
void drm_dev_put(struct drm_device *dev);
void drm_put_dev(struct drm_device *dev);
bool drm_dev_enter(struct drm_device *dev, int *idx);
void drm_dev_exit(int idx);
void drm_dev_unplug(struct drm_device *dev);

/**
 * drm_dev_is_unplugged - is a DRM device unplugged
 * @dev: DRM device
 *
 * This function can be called to check whether a hotpluggable is unplugged.
 * Unplugging itself is singalled through drm_dev_unplug(). If a device is
 * unplugged, these two functions guarantee that any store before calling
 * drm_dev_unplug() is visible to callers of this function after it completes
 *
 * WARNING: This function fundamentally races against drm_dev_unplug(). It is
 * recommended that drivers instead use the underlying drm_dev_enter() and
 * drm_dev_exit() function pairs.
 */
static inline bool drm_dev_is_unplugged(struct drm_device *dev)
{
        int idx;

        if (drm_dev_enter(dev, &idx)) {
                drm_dev_exit(idx);
                return false;
        }

        return true;
}

/**
 * drm_core_check_all_features - check driver feature flags mask
 * @dev: DRM device to check
 * @features: feature flag(s) mask
 *
 * This checks @dev for driver features, see &drm_driver.driver_features,
 * &drm_device.driver_features, and the various &enum drm_driver_feature flags.
 *
 * Returns true if all features in the @features mask are supported, false
 * otherwise.
 */
static inline bool drm_core_check_all_features(const struct drm_device *dev,
                                               u32 features)
{
        u32 supported = dev->driver->driver_features & dev->driver_features;

        return features && (supported & features) == features;
}

/**
 * drm_core_check_feature - check driver feature flags
 * @dev: DRM device to check
 * @feature: feature flag
 *
 * This checks @dev for driver features, see &drm_driver.driver_features,
 * &drm_device.driver_features, and the various &enum drm_driver_feature flags.
 *
 * Returns true if the @feature is supported, false otherwise.
 */
static inline bool drm_core_check_feature(const struct drm_device *dev,
                                          enum drm_driver_feature feature)
{
        return drm_core_check_all_features(dev, feature);
}

/**
 * drm_drv_uses_atomic_modeset - check if the driver implements
 * atomic_commit()
 * @dev: DRM device
 *
 * This check is useful if drivers do not have DRIVER_ATOMIC set but
 * have atomic modesetting internally implemented.
 */
static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev)
{
        return drm_core_check_feature(dev, DRIVER_ATOMIC) ||
                (dev->mode_config.funcs && dev->mode_config.funcs->atomic_commit != NULL);
}


/* TODO: Inline drm_firmware_drivers_only() in all its callers. */
static inline bool drm_firmware_drivers_only(void)
{
        return video_firmware_drivers_only();
}

#if defined(CONFIG_DEBUG_FS)
void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root);
#else
static inline void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root)
{
}
#endif

#endif










































































































































































































































































































































































































































    1 




























































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
 * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
 *
 * This file contains the interrupt descriptor management code. Detailed
 * information is available in Documentation/core-api/genericirq.rst
 *
 */
#include <linux/irq.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/maple_tree.h>
#include <linux/irqdomain.h>
#include <linux/sysfs.h>

#include "internals.h"

/*
 * lockdep: we want to handle all irq_desc locks as a single lock-class:
 */
static struct lock_class_key irq_desc_lock_class;

#if defined(CONFIG_SMP)
static int __init irq_affinity_setup(char *str)
{
        alloc_bootmem_cpumask_var(&irq_default_affinity);
        cpulist_parse(str, irq_default_affinity);
        /*
         * Set at least the boot cpu. We don't want to end up with
         * bugreports caused by random commandline masks
         */
        cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
        return 1;
}
__setup("irqaffinity=", irq_affinity_setup);

static void __init init_irq_default_affinity(void)
{
        if (!cpumask_available(irq_default_affinity))
                zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
        if (cpumask_empty(irq_default_affinity))
                cpumask_setall(irq_default_affinity);
}
#else
static void __init init_irq_default_affinity(void)
{
}
#endif

#ifdef CONFIG_SMP
static int alloc_masks(struct irq_desc *desc, int node)
{
        if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
                                     GFP_KERNEL, node))
                return -ENOMEM;

#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
        if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
                                     GFP_KERNEL, node)) {
                free_cpumask_var(desc->irq_common_data.affinity);
                return -ENOMEM;
        }
#endif

#ifdef CONFIG_GENERIC_PENDING_IRQ
        if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) {
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
                free_cpumask_var(desc->irq_common_data.effective_affinity);
#endif
                free_cpumask_var(desc->irq_common_data.affinity);
                return -ENOMEM;
        }
#endif
        return 0;
}

static void desc_smp_init(struct irq_desc *desc, int node,
                          const struct cpumask *affinity)
{
        if (!affinity)
                affinity = irq_default_affinity;
        cpumask_copy(desc->irq_common_data.affinity, affinity);

#ifdef CONFIG_GENERIC_PENDING_IRQ
        cpumask_clear(desc->pending_mask);
#endif
#ifdef CONFIG_NUMA
        desc->irq_common_data.node = node;
#endif
}

static void free_masks(struct irq_desc *desc)
{
#ifdef CONFIG_GENERIC_PENDING_IRQ
        free_cpumask_var(desc->pending_mask);
#endif
        free_cpumask_var(desc->irq_common_data.affinity);
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
        free_cpumask_var(desc->irq_common_data.effective_affinity);
#endif
}

#else
static inline int
alloc_masks(struct irq_desc *desc, int node) { return 0; }
static inline void
desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
static inline void free_masks(struct irq_desc *desc) { }
#endif

static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
                              const struct cpumask *affinity, struct module *owner)
{
        int cpu;

        desc->irq_common_data.handler_data = NULL;
        desc->irq_common_data.msi_desc = NULL;

        desc->irq_data.common = &desc->irq_common_data;
        desc->irq_data.irq = irq;
        desc->irq_data.chip = &no_irq_chip;
        desc->irq_data.chip_data = NULL;
        irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
        irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
        irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
        desc->handle_irq = handle_bad_irq;
        desc->depth = 1;
        desc->irq_count = 0;
        desc->irqs_unhandled = 0;
        desc->tot_count = 0;
        desc->name = NULL;
        desc->owner = owner;
        for_each_possible_cpu(cpu)
                *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
        desc_smp_init(desc, node, affinity);
}

int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);

static DEFINE_MUTEX(sparse_irq_lock);
static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs,
                                        MT_FLAGS_ALLOC_RANGE |
                                        MT_FLAGS_LOCK_EXTERN |
                                        MT_FLAGS_USE_RCU,
                                        sparse_irq_lock);

static int irq_find_free_area(unsigned int from, unsigned int cnt)
{
        MA_STATE(mas, &sparse_irqs, 0, 0);

        if (mas_empty_area(&mas, from, MAX_SPARSE_IRQS, cnt))
                return -ENOSPC;
        return mas.index;
}

static unsigned int irq_find_at_or_after(unsigned int offset)
{
        unsigned long index = offset;
        struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs);

        return desc ? irq_desc_get_irq(desc) : nr_irqs;
}

static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
{
        MA_STATE(mas, &sparse_irqs, irq, irq);
        WARN_ON(mas_store_gfp(&mas, desc, GFP_KERNEL) != 0);
}

static void delete_irq_desc(unsigned int irq)
{
        MA_STATE(mas, &sparse_irqs, irq, irq);
        mas_erase(&mas);
}

#ifdef CONFIG_SPARSE_IRQ
static const struct kobj_type irq_kobj_type;
#endif

static int init_desc(struct irq_desc *desc, int irq, int node,
                     unsigned int flags,
                     const struct cpumask *affinity,
                     struct module *owner)
{
        desc->kstat_irqs = alloc_percpu(unsigned int);
        if (!desc->kstat_irqs)
                return -ENOMEM;

        if (alloc_masks(desc, node)) {
                free_percpu(desc->kstat_irqs);
                return -ENOMEM;
        }

        raw_spin_lock_init(&desc->lock);
        lockdep_set_class(&desc->lock, &irq_desc_lock_class);
        mutex_init(&desc->request_mutex);
        init_waitqueue_head(&desc->wait_for_threads);
        desc_set_defaults(irq, desc, node, affinity, owner);
        irqd_set(&desc->irq_data, flags);
        irq_resend_init(desc);
#ifdef CONFIG_SPARSE_IRQ
        kobject_init(&desc->kobj, &irq_kobj_type);
        init_rcu_head(&desc->rcu);
#endif

        return 0;
}

#ifdef CONFIG_SPARSE_IRQ

static void irq_kobj_release(struct kobject *kobj);

#ifdef CONFIG_SYSFS
static struct kobject *irq_kobj_base;

#define IRQ_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)

static ssize_t per_cpu_count_show(struct kobject *kobj,
                                  struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;
        char *p = "";
        int cpu;

        for_each_possible_cpu(cpu) {
                unsigned int c = irq_desc_kstat_cpu(desc, cpu);

                ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
                p = ",";
        }

        ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
        return ret;
}
IRQ_ATTR_RO(per_cpu_count);

static ssize_t chip_name_show(struct kobject *kobj,
                              struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;

        raw_spin_lock_irq(&desc->lock);
        if (desc->irq_data.chip && desc->irq_data.chip->name) {
                ret = scnprintf(buf, PAGE_SIZE, "%s\n",
                                desc->irq_data.chip->name);
        }
        raw_spin_unlock_irq(&desc->lock);

        return ret;
}
IRQ_ATTR_RO(chip_name);

static ssize_t hwirq_show(struct kobject *kobj,
                          struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;

        raw_spin_lock_irq(&desc->lock);
        if (desc->irq_data.domain)
                ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq);
        raw_spin_unlock_irq(&desc->lock);

        return ret;
}
IRQ_ATTR_RO(hwirq);

static ssize_t type_show(struct kobject *kobj,
                         struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;

        raw_spin_lock_irq(&desc->lock);
        ret = sprintf(buf, "%s\n",
                      irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
        raw_spin_unlock_irq(&desc->lock);

        return ret;

}
IRQ_ATTR_RO(type);

static ssize_t wakeup_show(struct kobject *kobj,
                           struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;

        raw_spin_lock_irq(&desc->lock);
        ret = sprintf(buf, "%s\n",
                      irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled");
        raw_spin_unlock_irq(&desc->lock);

        return ret;

}
IRQ_ATTR_RO(wakeup);

static ssize_t name_show(struct kobject *kobj,
                         struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        ssize_t ret = 0;

        raw_spin_lock_irq(&desc->lock);
        if (desc->name)
                ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
        raw_spin_unlock_irq(&desc->lock);

        return ret;
}
IRQ_ATTR_RO(name);

static ssize_t actions_show(struct kobject *kobj,
                            struct kobj_attribute *attr, char *buf)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
        struct irqaction *action;
        ssize_t ret = 0;
        char *p = "";

        raw_spin_lock_irq(&desc->lock);
        for_each_action_of_desc(desc, action) {
                ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
                                 p, action->name);
                p = ",";
        }
        raw_spin_unlock_irq(&desc->lock);

        if (ret)
                ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");

        return ret;
}
IRQ_ATTR_RO(actions);

static struct attribute *irq_attrs[] = {
        &per_cpu_count_attr.attr,
        &chip_name_attr.attr,
        &hwirq_attr.attr,
        &type_attr.attr,
        &wakeup_attr.attr,
        &name_attr.attr,
        &actions_attr.attr,
        NULL
};
ATTRIBUTE_GROUPS(irq);

static const struct kobj_type irq_kobj_type = {
        .release        = irq_kobj_release,
        .sysfs_ops        = &kobj_sysfs_ops,
        .default_groups = irq_groups,
};

static void irq_sysfs_add(int irq, struct irq_desc *desc)
{
        if (irq_kobj_base) {
                /*
                 * Continue even in case of failure as this is nothing
                 * crucial and failures in the late irq_sysfs_init()
                 * cannot be rolled back.
                 */
                if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
                        pr_warn("Failed to add kobject for irq %d\n", irq);
                else
                        desc->istate |= IRQS_SYSFS;
        }
}

static void irq_sysfs_del(struct irq_desc *desc)
{
        /*
         * Only invoke kobject_del() when kobject_add() was successfully
         * invoked for the descriptor. This covers both early boot, where
         * sysfs is not initialized yet, and the case of a failed
         * kobject_add() invocation.
         */
        if (desc->istate & IRQS_SYSFS)
                kobject_del(&desc->kobj);
}

static int __init irq_sysfs_init(void)
{
        struct irq_desc *desc;
        int irq;

        /* Prevent concurrent irq alloc/free */
        irq_lock_sparse();

        irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
        if (!irq_kobj_base) {
                irq_unlock_sparse();
                return -ENOMEM;
        }

        /* Add the already allocated interrupts */
        for_each_irq_desc(irq, desc)
                irq_sysfs_add(irq, desc);
        irq_unlock_sparse();

        return 0;
}
postcore_initcall(irq_sysfs_init);

#else /* !CONFIG_SYSFS */

static const struct kobj_type irq_kobj_type = {
        .release        = irq_kobj_release,
};

static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
static void irq_sysfs_del(struct irq_desc *desc) {}

#endif /* CONFIG_SYSFS */

struct irq_desc *irq_to_desc(unsigned int irq)
{
        return mtree_load(&sparse_irqs, irq);
}
#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif

void irq_lock_sparse(void)
{
        mutex_lock(&sparse_irq_lock);
}

void irq_unlock_sparse(void)
{
        mutex_unlock(&sparse_irq_lock);
}

static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
                                   const struct cpumask *affinity,
                                   struct module *owner)
{
        struct irq_desc *desc;
        int ret;

        desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node);
        if (!desc)
                return NULL;

        ret = init_desc(desc, irq, node, flags, affinity, owner);
        if (unlikely(ret)) {
                kfree(desc);
                return NULL;
        }

        return desc;
}

static void irq_kobj_release(struct kobject *kobj)
{
        struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);

        free_masks(desc);
        free_percpu(desc->kstat_irqs);
        kfree(desc);
}

static void delayed_free_desc(struct rcu_head *rhp)
{
        struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);

        kobject_put(&desc->kobj);
}

static void free_desc(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);

        irq_remove_debugfs_entry(desc);
        unregister_irq_proc(irq, desc);

        /*
         * sparse_irq_lock protects also show_interrupts() and
         * kstat_irq_usr(). Once we deleted the descriptor from the
         * sparse tree we can free it. Access in proc will fail to
         * lookup the descriptor.
         *
         * The sysfs entry must be serialized against a concurrent
         * irq_sysfs_init() as well.
         */
        irq_sysfs_del(desc);
        delete_irq_desc(irq);

        /*
         * We free the descriptor, masks and stat fields via RCU. That
         * allows demultiplex interrupts to do rcu based management of
         * the child interrupts.
         * This also allows us to use rcu in kstat_irqs_usr().
         */
        call_rcu(&desc->rcu, delayed_free_desc);
}

static int alloc_descs(unsigned int start, unsigned int cnt, int node,
                       const struct irq_affinity_desc *affinity,
                       struct module *owner)
{
        struct irq_desc *desc;
        int i;

        /* Validate affinity mask(s) */
        if (affinity) {
                for (i = 0; i < cnt; i++) {
                        if (cpumask_empty(&affinity[i].mask))
                                return -EINVAL;
                }
        }

        for (i = 0; i < cnt; i++) {
                const struct cpumask *mask = NULL;
                unsigned int flags = 0;

                if (affinity) {
                        if (affinity->is_managed) {
                                flags = IRQD_AFFINITY_MANAGED |
                                        IRQD_MANAGED_SHUTDOWN;
                        }
                        mask = &affinity->mask;
                        node = cpu_to_node(cpumask_first(mask));
                        affinity++;
                }

                desc = alloc_desc(start + i, node, flags, mask, owner);
                if (!desc)
                        goto err;
                irq_insert_desc(start + i, desc);
                irq_sysfs_add(start + i, desc);
                irq_add_debugfs_entry(start + i, desc);
        }
        return start;

err:
        for (i--; i >= 0; i--)
                free_desc(start + i);
        return -ENOMEM;
}

static int irq_expand_nr_irqs(unsigned int nr)
{
        if (nr > MAX_SPARSE_IRQS)
                return -ENOMEM;
        nr_irqs = nr;
        return 0;
}

int __init early_irq_init(void)
{
        int i, initcnt, node = first_online_node;
        struct irq_desc *desc;

        init_irq_default_affinity();

        /* Let arch update nr_irqs and return the nr of preallocated irqs */
        initcnt = arch_probe_nr_irqs();
        printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
               NR_IRQS, nr_irqs, initcnt);

        if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
                nr_irqs = MAX_SPARSE_IRQS;

        if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
                initcnt = MAX_SPARSE_IRQS;

        if (initcnt > nr_irqs)
                nr_irqs = initcnt;

        for (i = 0; i < initcnt; i++) {
                desc = alloc_desc(i, node, 0, NULL, NULL);
                irq_insert_desc(i, desc);
        }
        return arch_early_irq_init();
}

#else /* !CONFIG_SPARSE_IRQ */

struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
        [0 ... NR_IRQS-1] = {
                .handle_irq        = handle_bad_irq,
                .depth                = 1,
                .lock                = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
        }
};

int __init early_irq_init(void)
{
        int count, i, node = first_online_node;
        int ret;

        init_irq_default_affinity();

        printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);

        count = ARRAY_SIZE(irq_desc);

        for (i = 0; i < count; i++) {
                ret = init_desc(irq_desc + i, i, node, 0, NULL, NULL);
                if (unlikely(ret))
                        goto __free_desc_res;
        }

        return arch_early_irq_init();

__free_desc_res:
        while (--i >= 0) {
                free_masks(irq_desc + i);
                free_percpu(irq_desc[i].kstat_irqs);
        }

        return ret;
}

struct irq_desc *irq_to_desc(unsigned int irq)
{
        return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
EXPORT_SYMBOL(irq_to_desc);

static void free_desc(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;

        raw_spin_lock_irqsave(&desc->lock, flags);
        desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        delete_irq_desc(irq);
}

static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
                              const struct irq_affinity_desc *affinity,
                              struct module *owner)
{
        u32 i;

        for (i = 0; i < cnt; i++) {
                struct irq_desc *desc = irq_to_desc(start + i);

                desc->owner = owner;
                irq_insert_desc(start + i, desc);
        }
        return start;
}

static int irq_expand_nr_irqs(unsigned int nr)
{
        return -ENOMEM;
}

void irq_mark_irq(unsigned int irq)
{
        mutex_lock(&sparse_irq_lock);
        irq_insert_desc(irq, irq_desc + irq);
        mutex_unlock(&sparse_irq_lock);
}

#ifdef CONFIG_GENERIC_IRQ_LEGACY
void irq_init_desc(unsigned int irq)
{
        free_desc(irq);
}
#endif

#endif /* !CONFIG_SPARSE_IRQ */

int handle_irq_desc(struct irq_desc *desc)
{
        struct irq_data *data;

        if (!desc)
                return -EINVAL;

        data = irq_desc_get_irq_data(desc);
        if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data)))
                return -EPERM;

        generic_handle_irq_desc(desc);
        return 0;
}

/**
 * generic_handle_irq - Invoke the handler for a particular irq
 * @irq:        The irq number to handle
 *
 * Returns:        0 on success, or -EINVAL if conversion has failed
 *
 *                 This function must be called from an IRQ context with irq regs
 *                 initialized.
  */
int generic_handle_irq(unsigned int irq)
{
        return handle_irq_desc(irq_to_desc(irq));
}
EXPORT_SYMBOL_GPL(generic_handle_irq);

/**
 * generic_handle_irq_safe - Invoke the handler for a particular irq from any
 *                             context.
 * @irq:        The irq number to handle
 *
 * Returns:        0 on success, a negative value on error.
 *
 * This function can be called from any context (IRQ or process context). It
 * will report an error if not invoked from IRQ context and the irq has been
 * marked to enforce IRQ-context only.
 */
int generic_handle_irq_safe(unsigned int irq)
{
        unsigned long flags;
        int ret;

        local_irq_save(flags);
        ret = handle_irq_desc(irq_to_desc(irq));
        local_irq_restore(flags);
        return ret;
}
EXPORT_SYMBOL_GPL(generic_handle_irq_safe);

#ifdef CONFIG_IRQ_DOMAIN
/**
 * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
 *                             to a domain.
 * @domain:        The domain where to perform the lookup
 * @hwirq:        The HW irq number to convert to a logical one
 *
 * Returns:        0 on success, or -EINVAL if conversion has failed
 *
 *                 This function must be called from an IRQ context with irq regs
 *                 initialized.
 */
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
{
        return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
EXPORT_SYMBOL_GPL(generic_handle_domain_irq);

 /**
 * generic_handle_irq_safe - Invoke the handler for a HW irq belonging
 *                             to a domain from any context.
 * @domain:        The domain where to perform the lookup
 * @hwirq:        The HW irq number to convert to a logical one
 *
 * Returns:        0 on success, a negative value on error.
 *
 * This function can be called from any context (IRQ or process
 * context). If the interrupt is marked as 'enforce IRQ-context only' then
 * the function must be invoked from hard interrupt context.
 */
int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq)
{
        unsigned long flags;
        int ret;

        local_irq_save(flags);
        ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq));
        local_irq_restore(flags);
        return ret;
}
EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe);

/**
 * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging
 *                             to a domain.
 * @domain:        The domain where to perform the lookup
 * @hwirq:        The HW irq number to convert to a logical one
 *
 * Returns:        0 on success, or -EINVAL if conversion has failed
 *
 *                 This function must be called from an NMI context with irq regs
 *                 initialized.
 **/
int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq)
{
        WARN_ON_ONCE(!in_nmi());
        return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
#endif

/* Dynamic interrupt handling */

/**
 * irq_free_descs - free irq descriptors
 * @from:        Start of descriptor range
 * @cnt:        Number of consecutive irqs to free
 */
void irq_free_descs(unsigned int from, unsigned int cnt)
{
        int i;

        if (from >= nr_irqs || (from + cnt) > nr_irqs)
                return;

        mutex_lock(&sparse_irq_lock);
        for (i = 0; i < cnt; i++)
                free_desc(from + i);

        mutex_unlock(&sparse_irq_lock);
}
EXPORT_SYMBOL_GPL(irq_free_descs);

/**
 * __irq_alloc_descs - allocate and initialize a range of irq descriptors
 * @irq:        Allocate for specific irq number if irq >= 0
 * @from:        Start the search from this irq number
 * @cnt:        Number of consecutive irqs to allocate.
 * @node:        Preferred node on which the irq descriptor should be allocated
 * @owner:        Owning module (can be NULL)
 * @affinity:        Optional pointer to an affinity mask array of size @cnt which
 *                hints where the irq descriptors should be allocated and which
 *                default affinities to use
 *
 * Returns the first irq number or error code
 */
int __ref
__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
                  struct module *owner, const struct irq_affinity_desc *affinity)
{
        int start, ret;

        if (!cnt)
                return -EINVAL;

        if (irq >= 0) {
                if (from > irq)
                        return -EINVAL;
                from = irq;
        } else {
                /*
                 * For interrupts which are freely allocated the
                 * architecture can force a lower bound to the @from
                 * argument. x86 uses this to exclude the GSI space.
                 */
                from = arch_dynirq_lower_bound(from);
        }

        mutex_lock(&sparse_irq_lock);

        start = irq_find_free_area(from, cnt);
        ret = -EEXIST;
        if (irq >=0 && start != irq)
                goto unlock;

        if (start + cnt > nr_irqs) {
                ret = irq_expand_nr_irqs(start + cnt);
                if (ret)
                        goto unlock;
        }
        ret = alloc_descs(start, cnt, node, affinity, owner);
unlock:
        mutex_unlock(&sparse_irq_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(__irq_alloc_descs);

/**
 * irq_get_next_irq - get next allocated irq number
 * @offset:        where to start the search
 *
 * Returns next irq number after offset or nr_irqs if none is found.
 */
unsigned int irq_get_next_irq(unsigned int offset)
{
        return irq_find_at_or_after(offset);
}

struct irq_desc *
__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
                    unsigned int check)
{
        struct irq_desc *desc = irq_to_desc(irq);

        if (desc) {
                if (check & _IRQ_DESC_CHECK) {
                        if ((check & _IRQ_DESC_PERCPU) &&
                            !irq_settings_is_per_cpu_devid(desc))
                                return NULL;

                        if (!(check & _IRQ_DESC_PERCPU) &&
                            irq_settings_is_per_cpu_devid(desc))
                                return NULL;
                }

                if (bus)
                        chip_bus_lock(desc);
                raw_spin_lock_irqsave(&desc->lock, *flags);
        }
        return desc;
}

void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
        __releases(&desc->lock)
{
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        if (bus)
                chip_bus_sync_unlock(desc);
}

int irq_set_percpu_devid_partition(unsigned int irq,
                                   const struct cpumask *affinity)
{
        struct irq_desc *desc = irq_to_desc(irq);

        if (!desc)
                return -EINVAL;

        if (desc->percpu_enabled)
                return -EINVAL;

        desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL);

        if (!desc->percpu_enabled)
                return -ENOMEM;

        if (affinity)
                desc->percpu_affinity = affinity;
        else
                desc->percpu_affinity = cpu_possible_mask;

        irq_set_percpu_devid_flags(irq);
        return 0;
}

int irq_set_percpu_devid(unsigned int irq)
{
        return irq_set_percpu_devid_partition(irq, NULL);
}

int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity)
{
        struct irq_desc *desc = irq_to_desc(irq);

        if (!desc || !desc->percpu_enabled)
                return -EINVAL;

        if (affinity)
                cpumask_copy(affinity, desc->percpu_affinity);

        return 0;
}
EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition);

void kstat_incr_irq_this_cpu(unsigned int irq)
{
        kstat_incr_irqs_this_cpu(irq_to_desc(irq));
}

/**
 * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
 * @irq:        The interrupt number
 * @cpu:        The cpu number
 *
 * Returns the sum of interrupt counts on @cpu since boot for
 * @irq. The caller must ensure that the interrupt is not removed
 * concurrently.
 */
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
        struct irq_desc *desc = irq_to_desc(irq);

        return desc && desc->kstat_irqs ?
                        *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}

static bool irq_is_nmi(struct irq_desc *desc)
{
        return desc->istate & IRQS_NMI;
}

static unsigned int kstat_irqs(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);
        unsigned int sum = 0;
        int cpu;

        if (!desc || !desc->kstat_irqs)
                return 0;
        if (!irq_settings_is_per_cpu_devid(desc) &&
            !irq_settings_is_per_cpu(desc) &&
            !irq_is_nmi(desc))
                return data_race(desc->tot_count);

        for_each_possible_cpu(cpu)
                sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
        return sum;
}

/**
 * kstat_irqs_usr - Get the statistics for an interrupt from thread context
 * @irq:        The interrupt number
 *
 * Returns the sum of interrupt counts on all cpus since boot for @irq.
 *
 * It uses rcu to protect the access since a concurrent removal of an
 * interrupt descriptor is observing an rcu grace period before
 * delayed_free_desc()/irq_kobj_release().
 */
unsigned int kstat_irqs_usr(unsigned int irq)
{
        unsigned int sum;

        rcu_read_lock();
        sum = kstat_irqs(irq);
        rcu_read_unlock();
        return sum;
}

#ifdef CONFIG_LOCKDEP
void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
                             struct lock_class_key *request_class)
{
        struct irq_desc *desc = irq_to_desc(irq);

        if (desc) {
                lockdep_set_class(&desc->lock, lock_class);
                lockdep_set_class(&desc->request_mutex, request_class);
        }
}
EXPORT_SYMBOL_GPL(__irq_set_lockdep_class);
#endif























































































































































































































































































































































































































































































  239 











  264 










   15 











  249 











  125 












   38 













  122 













   92 

























































































  261 











   15 

    1 
































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
 * This file holds USB constants and structures that are needed for
 * USB device APIs.  These are used by the USB device model, which is
 * defined in chapter 9 of the USB 2.0 specification and in the
 * Wireless USB 1.0 spec (now defunct).  Linux has several APIs in C that
 * need these:
 *
 * - the master/host side Linux-USB kernel driver API;
 * - the "usbfs" user space API; and
 * - the Linux "gadget" slave/device/peripheral side driver API.
 *
 * USB 2.0 adds an additional "On The Go" (OTG) mode, which lets systems
 * act either as a USB master/host or as a USB slave/device.  That means
 * the master and slave side APIs benefit from working well together.
 *
 * Note all descriptors are declared '__attribute__((packed))' so that:
 *
 * [a] they never get padded, either internally (USB spec writers
 *     probably handled that) or externally;
 *
 * [b] so that accessing bigger-than-a-bytes fields will never
 *     generate bus errors on any platform, even when the location of
 *     its descriptor inside a bundle isn't "naturally aligned", and
 *
 * [c] for consistency, removing all doubt even when it appears to
 *     someone that the two other points are non-issues for that
 *     particular descriptor type.
 */

#ifndef _UAPI__LINUX_USB_CH9_H
#define _UAPI__LINUX_USB_CH9_H

#include <linux/types.h>        /* __u8 etc */
#include <asm/byteorder.h>        /* le16_to_cpu */

/*-------------------------------------------------------------------------*/

/* CONTROL REQUEST SUPPORT */

/*
 * USB directions
 *
 * This bit flag is used in endpoint descriptors' bEndpointAddress field.
 * It's also one of three fields in control requests bRequestType.
 */
#define USB_DIR_OUT                        0                /* to device */
#define USB_DIR_IN                        0x80                /* to host */

/*
 * USB types, the second of three bRequestType fields
 */
#define USB_TYPE_MASK                        (0x03 << 5)
#define USB_TYPE_STANDARD                (0x00 << 5)
#define USB_TYPE_CLASS                        (0x01 << 5)
#define USB_TYPE_VENDOR                        (0x02 << 5)
#define USB_TYPE_RESERVED                (0x03 << 5)

/*
 * USB recipients, the third of three bRequestType fields
 */
#define USB_RECIP_MASK                        0x1f
#define USB_RECIP_DEVICE                0x00
#define USB_RECIP_INTERFACE                0x01
#define USB_RECIP_ENDPOINT                0x02
#define USB_RECIP_OTHER                        0x03
/* From Wireless USB 1.0 */
#define USB_RECIP_PORT                        0x04
#define USB_RECIP_RPIPE                0x05

/*
 * Standard requests, for the bRequest field of a SETUP packet.
 *
 * These are qualified by the bRequestType field, so that for example
 * TYPE_CLASS or TYPE_VENDOR specific feature flags could be retrieved
 * by a GET_STATUS request.
 */
#define USB_REQ_GET_STATUS                0x00
#define USB_REQ_CLEAR_FEATURE                0x01
#define USB_REQ_SET_FEATURE                0x03
#define USB_REQ_SET_ADDRESS                0x05
#define USB_REQ_GET_DESCRIPTOR                0x06
#define USB_REQ_SET_DESCRIPTOR                0x07
#define USB_REQ_GET_CONFIGURATION        0x08
#define USB_REQ_SET_CONFIGURATION        0x09
#define USB_REQ_GET_INTERFACE                0x0A
#define USB_REQ_SET_INTERFACE                0x0B
#define USB_REQ_SYNCH_FRAME                0x0C
#define USB_REQ_SET_SEL                        0x30
#define USB_REQ_SET_ISOCH_DELAY                0x31

#define USB_REQ_SET_ENCRYPTION                0x0D        /* Wireless USB */
#define USB_REQ_GET_ENCRYPTION                0x0E
#define USB_REQ_RPIPE_ABORT                0x0E
#define USB_REQ_SET_HANDSHAKE                0x0F
#define USB_REQ_RPIPE_RESET                0x0F
#define USB_REQ_GET_HANDSHAKE                0x10
#define USB_REQ_SET_CONNECTION                0x11
#define USB_REQ_SET_SECURITY_DATA        0x12
#define USB_REQ_GET_SECURITY_DATA        0x13
#define USB_REQ_SET_WUSB_DATA                0x14
#define USB_REQ_LOOPBACK_DATA_WRITE        0x15
#define USB_REQ_LOOPBACK_DATA_READ        0x16
#define USB_REQ_SET_INTERFACE_DS        0x17

/* specific requests for USB Power Delivery */
#define USB_REQ_GET_PARTNER_PDO                20
#define USB_REQ_GET_BATTERY_STATUS        21
#define USB_REQ_SET_PDO                        22
#define USB_REQ_GET_VDM                        23
#define USB_REQ_SEND_VDM                24

/* The Link Power Management (LPM) ECN defines USB_REQ_TEST_AND_SET command,
 * used by hubs to put ports into a new L1 suspend state, except that it
 * forgot to define its number ...
 */

/*
 * USB feature flags are written using USB_REQ_{CLEAR,SET}_FEATURE, and
 * are read as a bit array returned by USB_REQ_GET_STATUS.  (So there
 * are at most sixteen features of each type.)  Hubs may also support a
 * new USB_REQ_TEST_AND_SET_FEATURE to put ports into L1 suspend.
 */
#define USB_DEVICE_SELF_POWERED                0        /* (read only) */
#define USB_DEVICE_REMOTE_WAKEUP        1        /* dev may initiate wakeup */
#define USB_DEVICE_TEST_MODE                2        /* (wired high speed only) */
#define USB_DEVICE_BATTERY                2        /* (wireless) */
#define USB_DEVICE_B_HNP_ENABLE                3        /* (otg) dev may initiate HNP */
#define USB_DEVICE_WUSB_DEVICE                3        /* (wireless)*/
#define USB_DEVICE_A_HNP_SUPPORT        4        /* (otg) RH port supports HNP */
#define USB_DEVICE_A_ALT_HNP_SUPPORT        5        /* (otg) other RH port does */
#define USB_DEVICE_DEBUG_MODE                6        /* (special devices only) */

/*
 * Test Mode Selectors
 * See USB 2.0 spec Table 9-7
 */
#define        USB_TEST_J                1
#define        USB_TEST_K                2
#define        USB_TEST_SE0_NAK        3
#define        USB_TEST_PACKET                4
#define        USB_TEST_FORCE_ENABLE        5

/* Status Type */
#define USB_STATUS_TYPE_STANDARD        0
#define USB_STATUS_TYPE_PTM                1

/*
 * New Feature Selectors as added by USB 3.0
 * See USB 3.0 spec Table 9-7
 */
#define USB_DEVICE_U1_ENABLE        48        /* dev may initiate U1 transition */
#define USB_DEVICE_U2_ENABLE        49        /* dev may initiate U2 transition */
#define USB_DEVICE_LTM_ENABLE        50        /* dev may send LTM */
#define USB_INTRF_FUNC_SUSPEND        0        /* function suspend */

#define USB_INTR_FUNC_SUSPEND_OPT_MASK        0xFF00
/*
 * Suspend Options, Table 9-8 USB 3.0 spec
 */
#define USB_INTRF_FUNC_SUSPEND_LP        (1 << (8 + 0))
#define USB_INTRF_FUNC_SUSPEND_RW        (1 << (8 + 1))

/*
 * Interface status, Figure 9-5 USB 3.0 spec
 */
#define USB_INTRF_STAT_FUNC_RW_CAP     1
#define USB_INTRF_STAT_FUNC_RW         2

#define USB_ENDPOINT_HALT                0        /* IN/OUT will STALL */

/* Bit array elements as returned by the USB_REQ_GET_STATUS request. */
#define USB_DEV_STAT_U1_ENABLED                2        /* transition into U1 state */
#define USB_DEV_STAT_U2_ENABLED                3        /* transition into U2 state */
#define USB_DEV_STAT_LTM_ENABLED        4        /* Latency tolerance messages */

/*
 * Feature selectors from Table 9-8 USB Power Delivery spec
 */
#define USB_DEVICE_BATTERY_WAKE_MASK        40
#define USB_DEVICE_OS_IS_PD_AWARE        41
#define USB_DEVICE_POLICY_MODE                42
#define USB_PORT_PR_SWAP                43
#define USB_PORT_GOTO_MIN                44
#define USB_PORT_RETURN_POWER                45
#define USB_PORT_ACCEPT_PD_REQUEST        46
#define USB_PORT_REJECT_PD_REQUEST        47
#define USB_PORT_PORT_PD_RESET                48
#define USB_PORT_C_PORT_PD_CHANGE        49
#define USB_PORT_CABLE_PD_RESET                50
#define USB_DEVICE_CHARGING_POLICY        54

/**
 * struct usb_ctrlrequest - SETUP data for a USB device control request
 * @bRequestType: matches the USB bmRequestType field
 * @bRequest: matches the USB bRequest field
 * @wValue: matches the USB wValue field (le16 byte order)
 * @wIndex: matches the USB wIndex field (le16 byte order)
 * @wLength: matches the USB wLength field (le16 byte order)
 *
 * This structure is used to send control requests to a USB device.  It matches
 * the different fields of the USB 2.0 Spec section 9.3, table 9-2.  See the
 * USB spec for a fuller description of the different fields, and what they are
 * used for.
 *
 * Note that the driver for any interface can issue control requests.
 * For most devices, interfaces don't coordinate with each other, so
 * such requests may be made at any time.
 */
struct usb_ctrlrequest {
        __u8 bRequestType;
        __u8 bRequest;
        __le16 wValue;
        __le16 wIndex;
        __le16 wLength;
} __attribute__ ((packed));

/*-------------------------------------------------------------------------*/

/*
 * STANDARD DESCRIPTORS ... as returned by GET_DESCRIPTOR, or
 * (rarely) accepted by SET_DESCRIPTOR.
 *
 * Note that all multi-byte values here are encoded in little endian
 * byte order "on the wire".  Within the kernel and when exposed
 * through the Linux-USB APIs, they are not converted to cpu byte
 * order; it is the responsibility of the client code to do this.
 * The single exception is when device and configuration descriptors (but
 * not other descriptors) are read from character devices
 * (i.e. /dev/bus/usb/BBB/DDD);
 * in this case the fields are converted to host endianness by the kernel.
 */

/*
 * Descriptor types ... USB 2.0 spec table 9.5
 */
#define USB_DT_DEVICE                        0x01
#define USB_DT_CONFIG                        0x02
#define USB_DT_STRING                        0x03
#define USB_DT_INTERFACE                0x04
#define USB_DT_ENDPOINT                        0x05
#define USB_DT_DEVICE_QUALIFIER                0x06
#define USB_DT_OTHER_SPEED_CONFIG        0x07
#define USB_DT_INTERFACE_POWER                0x08
/* these are from a minor usb 2.0 revision (ECN) */
#define USB_DT_OTG                        0x09
#define USB_DT_DEBUG                        0x0a
#define USB_DT_INTERFACE_ASSOCIATION        0x0b
/* these are from the Wireless USB spec */
#define USB_DT_SECURITY                        0x0c
#define USB_DT_KEY                        0x0d
#define USB_DT_ENCRYPTION_TYPE                0x0e
#define USB_DT_BOS                        0x0f
#define USB_DT_DEVICE_CAPABILITY        0x10
#define USB_DT_WIRELESS_ENDPOINT_COMP        0x11
#define USB_DT_WIRE_ADAPTER                0x21
#define USB_DT_RPIPE                        0x22
#define USB_DT_CS_RADIO_CONTROL                0x23
/* From the T10 UAS specification */
#define USB_DT_PIPE_USAGE                0x24
/* From the USB 3.0 spec */
#define        USB_DT_SS_ENDPOINT_COMP                0x30
/* From the USB 3.1 spec */
#define        USB_DT_SSP_ISOC_ENDPOINT_COMP        0x31

/* Conventional codes for class-specific descriptors.  The convention is
 * defined in the USB "Common Class" Spec (3.11).  Individual class specs
 * are authoritative for their usage, not the "common class" writeup.
 */
#define USB_DT_CS_DEVICE                (USB_TYPE_CLASS | USB_DT_DEVICE)
#define USB_DT_CS_CONFIG                (USB_TYPE_CLASS | USB_DT_CONFIG)
#define USB_DT_CS_STRING                (USB_TYPE_CLASS | USB_DT_STRING)
#define USB_DT_CS_INTERFACE                (USB_TYPE_CLASS | USB_DT_INTERFACE)
#define USB_DT_CS_ENDPOINT                (USB_TYPE_CLASS | USB_DT_ENDPOINT)

/* All standard descriptors have these 2 fields at the beginning */
struct usb_descriptor_header {
        __u8  bLength;
        __u8  bDescriptorType;
} __attribute__ ((packed));


/*-------------------------------------------------------------------------*/

/* USB_DT_DEVICE: Device descriptor */
struct usb_device_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __le16 bcdUSB;
        __u8  bDeviceClass;
        __u8  bDeviceSubClass;
        __u8  bDeviceProtocol;
        __u8  bMaxPacketSize0;
        __le16 idVendor;
        __le16 idProduct;
        __le16 bcdDevice;
        __u8  iManufacturer;
        __u8  iProduct;
        __u8  iSerialNumber;
        __u8  bNumConfigurations;
} __attribute__ ((packed));

#define USB_DT_DEVICE_SIZE                18


/*
 * Device and/or Interface Class codes
 * as found in bDeviceClass or bInterfaceClass
 * and defined by www.usb.org documents
 */
#define USB_CLASS_PER_INTERFACE                0        /* for DeviceClass */
#define USB_CLASS_AUDIO                        1
#define USB_CLASS_COMM                        2
#define USB_CLASS_HID                        3
#define USB_CLASS_PHYSICAL                5
#define USB_CLASS_STILL_IMAGE                6
#define USB_CLASS_PRINTER                7
#define USB_CLASS_MASS_STORAGE                8
#define USB_CLASS_HUB                        9
#define USB_CLASS_CDC_DATA                0x0a
#define USB_CLASS_CSCID                        0x0b        /* chip+ smart card */
#define USB_CLASS_CONTENT_SEC                0x0d        /* content security */
#define USB_CLASS_VIDEO                        0x0e
#define USB_CLASS_WIRELESS_CONTROLLER        0xe0
#define USB_CLASS_PERSONAL_HEALTHCARE        0x0f
#define USB_CLASS_AUDIO_VIDEO                0x10
#define USB_CLASS_BILLBOARD                0x11
#define USB_CLASS_USB_TYPE_C_BRIDGE        0x12
#define USB_CLASS_MISC                        0xef
#define USB_CLASS_APP_SPEC                0xfe
#define USB_CLASS_VENDOR_SPEC                0xff

#define USB_SUBCLASS_VENDOR_SPEC        0xff

/*-------------------------------------------------------------------------*/

/* USB_DT_CONFIG: Configuration descriptor information.
 *
 * USB_DT_OTHER_SPEED_CONFIG is the same descriptor, except that the
 * descriptor type is different.  Highspeed-capable devices can look
 * different depending on what speed they're currently running.  Only
 * devices with a USB_DT_DEVICE_QUALIFIER have any OTHER_SPEED_CONFIG
 * descriptors.
 */
struct usb_config_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __le16 wTotalLength;
        __u8  bNumInterfaces;
        __u8  bConfigurationValue;
        __u8  iConfiguration;
        __u8  bmAttributes;
        __u8  bMaxPower;
} __attribute__ ((packed));

#define USB_DT_CONFIG_SIZE                9

/* from config descriptor bmAttributes */
#define USB_CONFIG_ATT_ONE                (1 << 7)        /* must be set */
#define USB_CONFIG_ATT_SELFPOWER        (1 << 6)        /* self powered */
#define USB_CONFIG_ATT_WAKEUP                (1 << 5)        /* can wakeup */
#define USB_CONFIG_ATT_BATTERY                (1 << 4)        /* battery powered */

/*-------------------------------------------------------------------------*/

/* USB String descriptors can contain at most 126 characters. */
#define USB_MAX_STRING_LEN        126

/* USB_DT_STRING: String descriptor */
struct usb_string_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        union {
                __le16 legacy_padding;
                __DECLARE_FLEX_ARRAY(__le16, wData);        /* UTF-16LE encoded */
        };
} __attribute__ ((packed));

/* note that "string" zero is special, it holds language codes that
 * the device supports, not Unicode characters.
 */

/*-------------------------------------------------------------------------*/

/* USB_DT_INTERFACE: Interface descriptor */
struct usb_interface_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bInterfaceNumber;
        __u8  bAlternateSetting;
        __u8  bNumEndpoints;
        __u8  bInterfaceClass;
        __u8  bInterfaceSubClass;
        __u8  bInterfaceProtocol;
        __u8  iInterface;
} __attribute__ ((packed));

#define USB_DT_INTERFACE_SIZE                9

/*-------------------------------------------------------------------------*/

/* USB_DT_ENDPOINT: Endpoint descriptor */
struct usb_endpoint_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bEndpointAddress;
        __u8  bmAttributes;
        __le16 wMaxPacketSize;
        __u8  bInterval;

        /* NOTE:  these two are _only_ in audio endpoints. */
        /* use USB_DT_ENDPOINT*_SIZE in bLength, not sizeof. */
        __u8  bRefresh;
        __u8  bSynchAddress;
} __attribute__ ((packed));

#define USB_DT_ENDPOINT_SIZE                7
#define USB_DT_ENDPOINT_AUDIO_SIZE        9        /* Audio extension */


/*
 * Endpoints
 */
#define USB_ENDPOINT_NUMBER_MASK        0x0f        /* in bEndpointAddress */
#define USB_ENDPOINT_DIR_MASK                0x80

#define USB_ENDPOINT_XFERTYPE_MASK        0x03        /* in bmAttributes */
#define USB_ENDPOINT_XFER_CONTROL        0
#define USB_ENDPOINT_XFER_ISOC                1
#define USB_ENDPOINT_XFER_BULK                2
#define USB_ENDPOINT_XFER_INT                3
#define USB_ENDPOINT_MAX_ADJUSTABLE        0x80

#define USB_ENDPOINT_MAXP_MASK        0x07ff
#define USB_EP_MAXP_MULT_SHIFT        11
#define USB_EP_MAXP_MULT_MASK        (3 << USB_EP_MAXP_MULT_SHIFT)
#define USB_EP_MAXP_MULT(m) \
        (((m) & USB_EP_MAXP_MULT_MASK) >> USB_EP_MAXP_MULT_SHIFT)

/* The USB 3.0 spec redefines bits 5:4 of bmAttributes as interrupt ep type. */
#define USB_ENDPOINT_INTRTYPE                0x30
#define USB_ENDPOINT_INTR_PERIODIC        (0 << 4)
#define USB_ENDPOINT_INTR_NOTIFICATION        (1 << 4)

#define USB_ENDPOINT_SYNCTYPE                0x0c
#define USB_ENDPOINT_SYNC_NONE                (0 << 2)
#define USB_ENDPOINT_SYNC_ASYNC                (1 << 2)
#define USB_ENDPOINT_SYNC_ADAPTIVE        (2 << 2)
#define USB_ENDPOINT_SYNC_SYNC                (3 << 2)

#define USB_ENDPOINT_USAGE_MASK                0x30
#define USB_ENDPOINT_USAGE_DATA                0x00
#define USB_ENDPOINT_USAGE_FEEDBACK        0x10
#define USB_ENDPOINT_USAGE_IMPLICIT_FB        0x20        /* Implicit feedback Data endpoint */

/*-------------------------------------------------------------------------*/

/**
 * usb_endpoint_num - get the endpoint's number
 * @epd: endpoint to be checked
 *
 * Returns @epd's number: 0 to 15.
 */
static inline int usb_endpoint_num(const struct usb_endpoint_descriptor *epd)
{
        return epd->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
}

/**
 * usb_endpoint_type - get the endpoint's transfer type
 * @epd: endpoint to be checked
 *
 * Returns one of USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT} according
 * to @epd's transfer type.
 */
static inline int usb_endpoint_type(const struct usb_endpoint_descriptor *epd)
{
        return epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
}

/**
 * usb_endpoint_dir_in - check if the endpoint has IN direction
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type IN, otherwise it returns false.
 */
static inline int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN);
}

/**
 * usb_endpoint_dir_out - check if the endpoint has OUT direction
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type OUT, otherwise it returns false.
 */
static inline int usb_endpoint_dir_out(
                                const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT);
}

/**
 * usb_endpoint_xfer_bulk - check if the endpoint has bulk transfer type
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type bulk, otherwise it returns false.
 */
static inline int usb_endpoint_xfer_bulk(
                                const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                USB_ENDPOINT_XFER_BULK);
}

/**
 * usb_endpoint_xfer_control - check if the endpoint has control transfer type
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type control, otherwise it returns false.
 */
static inline int usb_endpoint_xfer_control(
                                const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                USB_ENDPOINT_XFER_CONTROL);
}

/**
 * usb_endpoint_xfer_int - check if the endpoint has interrupt transfer type
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type interrupt, otherwise it returns
 * false.
 */
static inline int usb_endpoint_xfer_int(
                                const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                USB_ENDPOINT_XFER_INT);
}

/**
 * usb_endpoint_xfer_isoc - check if the endpoint has isochronous transfer type
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint is of type isochronous, otherwise it returns
 * false.
 */
static inline int usb_endpoint_xfer_isoc(
                                const struct usb_endpoint_descriptor *epd)
{
        return ((epd->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                USB_ENDPOINT_XFER_ISOC);
}

/**
 * usb_endpoint_is_bulk_in - check if the endpoint is bulk IN
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has bulk transfer type and IN direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_bulk_in(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_in(epd);
}

/**
 * usb_endpoint_is_bulk_out - check if the endpoint is bulk OUT
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has bulk transfer type and OUT direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_bulk_out(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_out(epd);
}

/**
 * usb_endpoint_is_int_in - check if the endpoint is interrupt IN
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has interrupt transfer type and IN direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_int_in(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_int(epd) && usb_endpoint_dir_in(epd);
}

/**
 * usb_endpoint_is_int_out - check if the endpoint is interrupt OUT
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has interrupt transfer type and OUT direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_int_out(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_int(epd) && usb_endpoint_dir_out(epd);
}

/**
 * usb_endpoint_is_isoc_in - check if the endpoint is isochronous IN
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has isochronous transfer type and IN direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_isoc_in(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_in(epd);
}

/**
 * usb_endpoint_is_isoc_out - check if the endpoint is isochronous OUT
 * @epd: endpoint to be checked
 *
 * Returns true if the endpoint has isochronous transfer type and OUT direction,
 * otherwise it returns false.
 */
static inline int usb_endpoint_is_isoc_out(
                                const struct usb_endpoint_descriptor *epd)
{
        return usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_out(epd);
}

/**
 * usb_endpoint_maxp - get endpoint's max packet size
 * @epd: endpoint to be checked
 *
 * Returns @epd's max packet bits [10:0]
 */
static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd)
{
        return __le16_to_cpu(epd->wMaxPacketSize) & USB_ENDPOINT_MAXP_MASK;
}

/**
 * usb_endpoint_maxp_mult - get endpoint's transactional opportunities
 * @epd: endpoint to be checked
 *
 * Return @epd's wMaxPacketSize[12:11] + 1
 */
static inline int
usb_endpoint_maxp_mult(const struct usb_endpoint_descriptor *epd)
{
        int maxp = __le16_to_cpu(epd->wMaxPacketSize);

        return USB_EP_MAXP_MULT(maxp) + 1;
}

static inline int usb_endpoint_interrupt_type(
                const struct usb_endpoint_descriptor *epd)
{
        return epd->bmAttributes & USB_ENDPOINT_INTRTYPE;
}

/*-------------------------------------------------------------------------*/

/* USB_DT_SSP_ISOC_ENDPOINT_COMP: SuperSpeedPlus Isochronous Endpoint Companion
 * descriptor
 */
struct usb_ssp_isoc_ep_comp_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __le16 wReseved;
        __le32 dwBytesPerInterval;
} __attribute__ ((packed));

#define USB_DT_SSP_ISOC_EP_COMP_SIZE                8

/*-------------------------------------------------------------------------*/

/* USB_DT_SS_ENDPOINT_COMP: SuperSpeed Endpoint Companion descriptor */
struct usb_ss_ep_comp_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bMaxBurst;
        __u8  bmAttributes;
        __le16 wBytesPerInterval;
} __attribute__ ((packed));

#define USB_DT_SS_EP_COMP_SIZE                6

/* Bits 4:0 of bmAttributes if this is a bulk endpoint */
static inline int
usb_ss_max_streams(const struct usb_ss_ep_comp_descriptor *comp)
{
        int                max_streams;

        if (!comp)
                return 0;

        max_streams = comp->bmAttributes & 0x1f;

        if (!max_streams)
                return 0;

        max_streams = 1 << max_streams;

        return max_streams;
}

/* Bits 1:0 of bmAttributes if this is an isoc endpoint */
#define USB_SS_MULT(p)                        (1 + ((p) & 0x3))
/* Bit 7 of bmAttributes if a SSP isoc endpoint companion descriptor exists */
#define USB_SS_SSP_ISOC_COMP(p)                ((p) & (1 << 7))

/*-------------------------------------------------------------------------*/

/* USB_DT_DEVICE_QUALIFIER: Device Qualifier descriptor */
struct usb_qualifier_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __le16 bcdUSB;
        __u8  bDeviceClass;
        __u8  bDeviceSubClass;
        __u8  bDeviceProtocol;
        __u8  bMaxPacketSize0;
        __u8  bNumConfigurations;
        __u8  bRESERVED;
} __attribute__ ((packed));


/*-------------------------------------------------------------------------*/

/* USB_DT_OTG (from OTG 1.0a supplement) */
struct usb_otg_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bmAttributes;        /* support for HNP, SRP, etc */
} __attribute__ ((packed));

/* USB_DT_OTG (from OTG 2.0 supplement) */
struct usb_otg20_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bmAttributes;        /* support for HNP, SRP and ADP, etc */
        __le16 bcdOTG;                /* OTG and EH supplement release number
                                 * in binary-coded decimal(i.e. 2.0 is 0200H)
                                 */
} __attribute__ ((packed));

/* from usb_otg_descriptor.bmAttributes */
#define USB_OTG_SRP                (1 << 0)
#define USB_OTG_HNP                (1 << 1)        /* swap host/device roles */
#define USB_OTG_ADP                (1 << 2)        /* support ADP */
/* OTG 3.0 */
#define USB_OTG_RSP                (1 << 3)        /* support RSP */

#define OTG_STS_SELECTOR        0xF000                /* OTG status selector */
/*-------------------------------------------------------------------------*/

/* USB_DT_DEBUG:  for special highspeed devices, replacing serial console */
struct usb_debug_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        /* bulk endpoints with 8 byte maxpacket */
        __u8  bDebugInEndpoint;
        __u8  bDebugOutEndpoint;
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB_DT_INTERFACE_ASSOCIATION: groups interfaces */
struct usb_interface_assoc_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bFirstInterface;
        __u8  bInterfaceCount;
        __u8  bFunctionClass;
        __u8  bFunctionSubClass;
        __u8  bFunctionProtocol;
        __u8  iFunction;
} __attribute__ ((packed));

#define USB_DT_INTERFACE_ASSOCIATION_SIZE        8

/*-------------------------------------------------------------------------*/

/* USB_DT_SECURITY:  group of wireless security descriptors, including
 * encryption types available for setting up a CC/association.
 */
struct usb_security_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __le16 wTotalLength;
        __u8  bNumEncryptionTypes;
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB_DT_KEY:  used with {GET,SET}_SECURITY_DATA; only public keys
 * may be retrieved.
 */
struct usb_key_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  tTKID[3];
        __u8  bReserved;
        __u8  bKeyData[];
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB_DT_ENCRYPTION_TYPE:  bundled in DT_SECURITY groups */
struct usb_encryption_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bEncryptionType;
#define        USB_ENC_TYPE_UNSECURE                0
#define        USB_ENC_TYPE_WIRED                1        /* non-wireless mode */
#define        USB_ENC_TYPE_CCM_1                2        /* aes128/cbc session */
#define        USB_ENC_TYPE_RSA_1                3        /* rsa3072/sha1 auth */
        __u8  bEncryptionValue;                /* use in SET_ENCRYPTION */
        __u8  bAuthKeyIndex;
} __attribute__((packed));


/*-------------------------------------------------------------------------*/

/* USB_DT_BOS:  group of device-level capabilities */
struct usb_bos_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __le16 wTotalLength;
        __u8  bNumDeviceCaps;
} __attribute__((packed));

#define USB_DT_BOS_SIZE                5
/*-------------------------------------------------------------------------*/

/* USB_DT_DEVICE_CAPABILITY:  grouped with BOS */
struct usb_dev_cap_header {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
} __attribute__((packed));

#define        USB_CAP_TYPE_WIRELESS_USB        1

struct usb_wireless_cap_descriptor {        /* Ultra Wide Band */
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;

        __u8  bmAttributes;
#define        USB_WIRELESS_P2P_DRD                (1 << 1)
#define        USB_WIRELESS_BEACON_MASK        (3 << 2)
#define        USB_WIRELESS_BEACON_SELF        (1 << 2)
#define        USB_WIRELESS_BEACON_DIRECTED        (2 << 2)
#define        USB_WIRELESS_BEACON_NONE        (3 << 2)
        __le16 wPHYRates;        /* bit rates, Mbps */
#define        USB_WIRELESS_PHY_53                (1 << 0)        /* always set */
#define        USB_WIRELESS_PHY_80                (1 << 1)
#define        USB_WIRELESS_PHY_107                (1 << 2)        /* always set */
#define        USB_WIRELESS_PHY_160                (1 << 3)
#define        USB_WIRELESS_PHY_200                (1 << 4)        /* always set */
#define        USB_WIRELESS_PHY_320                (1 << 5)
#define        USB_WIRELESS_PHY_400                (1 << 6)
#define        USB_WIRELESS_PHY_480                (1 << 7)
        __u8  bmTFITXPowerInfo;        /* TFI power levels */
        __u8  bmFFITXPowerInfo;        /* FFI power levels */
        __le16 bmBandGroup;
        __u8  bReserved;
} __attribute__((packed));

#define USB_DT_USB_WIRELESS_CAP_SIZE        11

/* USB 2.0 Extension descriptor */
#define        USB_CAP_TYPE_EXT                2

struct usb_ext_cap_descriptor {                /* Link Power Management */
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
        __le32 bmAttributes;
#define USB_LPM_SUPPORT                        (1 << 1)        /* supports LPM */
#define USB_BESL_SUPPORT                (1 << 2)        /* supports BESL */
#define USB_BESL_BASELINE_VALID                (1 << 3)        /* Baseline BESL valid*/
#define USB_BESL_DEEP_VALID                (1 << 4)        /* Deep BESL valid */
#define USB_SET_BESL_BASELINE(p)        (((p) & 0xf) << 8)
#define USB_SET_BESL_DEEP(p)                (((p) & 0xf) << 12)
#define USB_GET_BESL_BASELINE(p)        (((p) & (0xf << 8)) >> 8)
#define USB_GET_BESL_DEEP(p)                (((p) & (0xf << 12)) >> 12)
} __attribute__((packed));

#define USB_DT_USB_EXT_CAP_SIZE        7

/*
 * SuperSpeed USB Capability descriptor: Defines the set of SuperSpeed USB
 * specific device level capabilities
 */
#define                USB_SS_CAP_TYPE                3
struct usb_ss_cap_descriptor {                /* Link Power Management */
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
        __u8  bmAttributes;
#define USB_LTM_SUPPORT                        (1 << 1) /* supports LTM */
        __le16 wSpeedSupported;
#define USB_LOW_SPEED_OPERATION                (1)         /* Low speed operation */
#define USB_FULL_SPEED_OPERATION        (1 << 1) /* Full speed operation */
#define USB_HIGH_SPEED_OPERATION        (1 << 2) /* High speed operation */
#define USB_5GBPS_OPERATION                (1 << 3) /* Operation at 5Gbps */
        __u8  bFunctionalitySupport;
        __u8  bU1devExitLat;
        __le16 bU2DevExitLat;
} __attribute__((packed));

#define USB_DT_USB_SS_CAP_SIZE        10

/*
 * Container ID Capability descriptor: Defines the instance unique ID used to
 * identify the instance across all operating modes
 */
#define        CONTAINER_ID_TYPE        4
struct usb_ss_container_id_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
        __u8  bReserved;
        __u8  ContainerID[16]; /* 128-bit number */
} __attribute__((packed));

#define USB_DT_USB_SS_CONTN_ID_SIZE        20

/*
 * Platform Device Capability descriptor: Defines platform specific device
 * capabilities
 */
#define        USB_PLAT_DEV_CAP_TYPE        5
struct usb_plat_dev_cap_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
        __u8  bReserved;
        __u8  UUID[16];
        __u8  CapabilityData[];
} __attribute__((packed));

#define USB_DT_USB_PLAT_DEV_CAP_SIZE(capability_data_size)        (20 + capability_data_size)

/*
 * SuperSpeed Plus USB Capability descriptor: Defines the set of
 * SuperSpeed Plus USB specific device level capabilities
 */
#define        USB_SSP_CAP_TYPE        0xa
struct usb_ssp_cap_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
        __u8  bReserved;
        __le32 bmAttributes;
#define USB_SSP_SUBLINK_SPEED_ATTRIBS        (0x1f << 0) /* sublink speed entries */
#define USB_SSP_SUBLINK_SPEED_IDS        (0xf << 5)  /* speed ID entries */
        __le16  wFunctionalitySupport;
#define USB_SSP_MIN_SUBLINK_SPEED_ATTRIBUTE_ID        (0xf)
#define USB_SSP_MIN_RX_LANE_COUNT                (0xf << 8)
#define USB_SSP_MIN_TX_LANE_COUNT                (0xf << 12)
        __le16 wReserved;
        union {
                __le32 legacy_padding;
                /* list of sublink speed attrib entries */
                __DECLARE_FLEX_ARRAY(__le32, bmSublinkSpeedAttr);
        };
#define USB_SSP_SUBLINK_SPEED_SSID        (0xf)                /* sublink speed ID */
#define USB_SSP_SUBLINK_SPEED_LSE        (0x3 << 4)        /* Lanespeed exponent */
#define USB_SSP_SUBLINK_SPEED_LSE_BPS                0
#define USB_SSP_SUBLINK_SPEED_LSE_KBPS                1
#define USB_SSP_SUBLINK_SPEED_LSE_MBPS                2
#define USB_SSP_SUBLINK_SPEED_LSE_GBPS                3

#define USB_SSP_SUBLINK_SPEED_ST        (0x3 << 6)        /* Sublink type */
#define USB_SSP_SUBLINK_SPEED_ST_SYM_RX                0
#define USB_SSP_SUBLINK_SPEED_ST_ASYM_RX        1
#define USB_SSP_SUBLINK_SPEED_ST_SYM_TX                2
#define USB_SSP_SUBLINK_SPEED_ST_ASYM_TX        3

#define USB_SSP_SUBLINK_SPEED_RSVD        (0x3f << 8)        /* Reserved */
#define USB_SSP_SUBLINK_SPEED_LP        (0x3 << 14)        /* Link protocol */
#define USB_SSP_SUBLINK_SPEED_LP_SS                0
#define USB_SSP_SUBLINK_SPEED_LP_SSP                1

#define USB_SSP_SUBLINK_SPEED_LSM        (0xff << 16)        /* Lanespeed mantissa */
} __attribute__((packed));

/*
 * USB Power Delivery Capability Descriptor:
 * Defines capabilities for PD
 */
/* Defines the various PD Capabilities of this device */
#define USB_PD_POWER_DELIVERY_CAPABILITY        0x06
/* Provides information on each battery supported by the device */
#define USB_PD_BATTERY_INFO_CAPABILITY                0x07
/* The Consumer characteristics of a Port on the device */
#define USB_PD_PD_CONSUMER_PORT_CAPABILITY        0x08
/* The provider characteristics of a Port on the device */
#define USB_PD_PD_PROVIDER_PORT_CAPABILITY        0x09

struct usb_pd_cap_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType; /* set to USB_PD_POWER_DELIVERY_CAPABILITY */
        __u8  bReserved;
        __le32 bmAttributes;
#define USB_PD_CAP_BATTERY_CHARGING        (1 << 1) /* supports Battery Charging specification */
#define USB_PD_CAP_USB_PD                (1 << 2) /* supports USB Power Delivery specification */
#define USB_PD_CAP_PROVIDER                (1 << 3) /* can provide power */
#define USB_PD_CAP_CONSUMER                (1 << 4) /* can consume power */
#define USB_PD_CAP_CHARGING_POLICY        (1 << 5) /* supports CHARGING_POLICY feature */
#define USB_PD_CAP_TYPE_C_CURRENT        (1 << 6) /* supports power capabilities defined in the USB Type-C Specification */

#define USB_PD_CAP_PWR_AC                (1 << 8)
#define USB_PD_CAP_PWR_BAT                (1 << 9)
#define USB_PD_CAP_PWR_USE_V_BUS        (1 << 14)

        __le16 bmProviderPorts; /* Bit zero refers to the UFP of the device */
        __le16 bmConsumerPorts;
        __le16 bcdBCVersion;
        __le16 bcdPDVersion;
        __le16 bcdUSBTypeCVersion;
} __attribute__((packed));

struct usb_pd_cap_battery_info_descriptor {
        __u8 bLength;
        __u8 bDescriptorType;
        __u8 bDevCapabilityType;
        /* Index of string descriptor shall contain the user friendly name for this battery */
        __u8 iBattery;
        /* Index of string descriptor shall contain the Serial Number String for this battery */
        __u8 iSerial;
        __u8 iManufacturer;
        __u8 bBatteryId; /* uniquely identifies this battery in status Messages */
        __u8 bReserved;
        /*
         * Shall contain the Battery Charge value above which this
         * battery is considered to be fully charged but not necessarily
         * “topped off.”
         */
        __le32 dwChargedThreshold; /* in mWh */
        /*
         * Shall contain the minimum charge level of this battery such
         * that above this threshold, a device can be assured of being
         * able to power up successfully (see Battery Charging 1.2).
         */
        __le32 dwWeakThreshold; /* in mWh */
        __le32 dwBatteryDesignCapacity; /* in mWh */
        __le32 dwBatteryLastFullchargeCapacity; /* in mWh */
} __attribute__((packed));

struct usb_pd_cap_consumer_port_descriptor {
        __u8 bLength;
        __u8 bDescriptorType;
        __u8 bDevCapabilityType;
        __u8 bReserved;
        __u8 bmCapabilities;
/* port will oerate under: */
#define USB_PD_CAP_CONSUMER_BC                (1 << 0) /* BC */
#define USB_PD_CAP_CONSUMER_PD                (1 << 1) /* PD */
#define USB_PD_CAP_CONSUMER_TYPE_C        (1 << 2) /* USB Type-C Current */
        __le16 wMinVoltage; /* in 50mV units */
        __le16 wMaxVoltage; /* in 50mV units */
        __u16 wReserved;
        __le32 dwMaxOperatingPower; /* in 10 mW - operating at steady state */
        __le32 dwMaxPeakPower; /* in 10mW units - operating at peak power */
        __le32 dwMaxPeakPowerTime; /* in 100ms units - duration of peak */
#define USB_PD_CAP_CONSUMER_UNKNOWN_PEAK_POWER_TIME 0xffff
} __attribute__((packed));

struct usb_pd_cap_provider_port_descriptor {
        __u8 bLength;
        __u8 bDescriptorType;
        __u8 bDevCapabilityType;
        __u8 bReserved1;
        __u8 bmCapabilities;
/* port will oerate under: */
#define USB_PD_CAP_PROVIDER_BC                (1 << 0) /* BC */
#define USB_PD_CAP_PROVIDER_PD                (1 << 1) /* PD */
#define USB_PD_CAP_PROVIDER_TYPE_C        (1 << 2) /* USB Type-C Current */
        __u8 bNumOfPDObjects;
        __u8 bReserved2;
        __le32 wPowerDataObject[];
} __attribute__((packed));

/*
 * Precision time measurement capability descriptor: advertised by devices and
 * hubs that support PTM
 */
#define        USB_PTM_CAP_TYPE        0xb
struct usb_ptm_cap_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __u8  bDevCapabilityType;
} __attribute__((packed));

#define USB_DT_USB_PTM_ID_SIZE                3
/*
 * The size of the descriptor for the Sublink Speed Attribute Count
 * (SSAC) specified in bmAttributes[4:0]. SSAC is zero-based
 */
#define USB_DT_USB_SSP_CAP_SIZE(ssac)        (12 + (ssac + 1) * 4)

/*-------------------------------------------------------------------------*/

/* USB_DT_WIRELESS_ENDPOINT_COMP:  companion descriptor associated with
 * each endpoint descriptor for a wireless device
 */
struct usb_wireless_ep_comp_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;

        __u8  bMaxBurst;
        __u8  bMaxSequence;
        __le16 wMaxStreamDelay;
        __le16 wOverTheAirPacketSize;
        __u8  bOverTheAirInterval;
        __u8  bmCompAttributes;
#define USB_ENDPOINT_SWITCH_MASK        0x03        /* in bmCompAttributes */
#define USB_ENDPOINT_SWITCH_NO                0
#define USB_ENDPOINT_SWITCH_SWITCH        1
#define USB_ENDPOINT_SWITCH_SCALE        2
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB_REQ_SET_HANDSHAKE is a four-way handshake used between a wireless
 * host and a device for connection set up, mutual authentication, and
 * exchanging short lived session keys.  The handshake depends on a CC.
 */
struct usb_handshake {
        __u8 bMessageNumber;
        __u8 bStatus;
        __u8 tTKID[3];
        __u8 bReserved;
        __u8 CDID[16];
        __u8 nonce[16];
        __u8 MIC[8];
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB_REQ_SET_CONNECTION modifies or revokes a connection context (CC).
 * A CC may also be set up using non-wireless secure channels (including
 * wired USB!), and some devices may support CCs with multiple hosts.
 */
struct usb_connection_context {
        __u8 CHID[16];                /* persistent host id */
        __u8 CDID[16];                /* device id (unique w/in host context) */
        __u8 CK[16];                /* connection key */
} __attribute__((packed));

/*-------------------------------------------------------------------------*/

/* USB 2.0 defines three speeds, here's how Linux identifies them */

enum usb_device_speed {
        USB_SPEED_UNKNOWN = 0,                        /* enumerating */
        USB_SPEED_LOW, USB_SPEED_FULL,                /* usb 1.1 */
        USB_SPEED_HIGH,                                /* usb 2.0 */
        USB_SPEED_WIRELESS,                        /* wireless (usb 2.5) */
        USB_SPEED_SUPER,                        /* usb 3.0 */
        USB_SPEED_SUPER_PLUS,                        /* usb 3.1 */
};


enum usb_device_state {
        /* NOTATTACHED isn't in the USB spec, and this state acts
         * the same as ATTACHED ... but it's clearer this way.
         */
        USB_STATE_NOTATTACHED = 0,

        /* chapter 9 and authentication (wireless) device states */
        USB_STATE_ATTACHED,
        USB_STATE_POWERED,                        /* wired */
        USB_STATE_RECONNECTING,                        /* auth */
        USB_STATE_UNAUTHENTICATED,                /* auth */
        USB_STATE_DEFAULT,                        /* limited function */
        USB_STATE_ADDRESS,
        USB_STATE_CONFIGURED,                        /* most functions */

        USB_STATE_SUSPENDED

        /* NOTE:  there are actually four different SUSPENDED
         * states, returning to POWERED, DEFAULT, ADDRESS, or
         * CONFIGURED respectively when SOF tokens flow again.
         * At this level there's no difference between L1 and L2
         * suspend states.  (L2 being original USB 1.1 suspend.)
         */
};

enum usb3_link_state {
        USB3_LPM_U0 = 0,
        USB3_LPM_U1,
        USB3_LPM_U2,
        USB3_LPM_U3
};

/*
 * A U1 timeout of 0x0 means the parent hub will reject any transitions to U1.
 * 0xff means the parent hub will accept transitions to U1, but will not
 * initiate a transition.
 *
 * A U1 timeout of 0x1 to 0x7F also causes the hub to initiate a transition to
 * U1 after that many microseconds.  Timeouts of 0x80 to 0xFE are reserved
 * values.
 *
 * A U2 timeout of 0x0 means the parent hub will reject any transitions to U2.
 * 0xff means the parent hub will accept transitions to U2, but will not
 * initiate a transition.
 *
 * A U2 timeout of 0x1 to 0xFE also causes the hub to initiate a transition to
 * U2 after N*256 microseconds.  Therefore a U2 timeout value of 0x1 means a U2
 * idle timer of 256 microseconds, 0x2 means 512 microseconds, 0xFE means
 * 65.024ms.
 */
#define USB3_LPM_DISABLED                0x0
#define USB3_LPM_U1_MAX_TIMEOUT                0x7F
#define USB3_LPM_U2_MAX_TIMEOUT                0xFE
#define USB3_LPM_DEVICE_INITIATED        0xFF

struct usb_set_sel_req {
        __u8        u1_sel;
        __u8        u1_pel;
        __le16        u2_sel;
        __le16        u2_pel;
} __attribute__ ((packed));

/*
 * The Set System Exit Latency control transfer provides one byte each for
 * U1 SEL and U1 PEL, so the max exit latency is 0xFF.  U2 SEL and U2 PEL each
 * are two bytes long.
 */
#define USB3_LPM_MAX_U1_SEL_PEL                0xFF
#define USB3_LPM_MAX_U2_SEL_PEL                0xFFFF

/*-------------------------------------------------------------------------*/

/*
 * As per USB compliance update, a device that is actively drawing
 * more than 100mA from USB must report itself as bus-powered in
 * the GetStatus(DEVICE) call.
 * https://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34
 */
#define USB_SELF_POWER_VBUS_MAX_DRAW                100

#endif /* _UAPI__LINUX_USB_CH9_H */

































































































































































































































































































































































































































































































































































































































































































    8 




























































    3 


























































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Linux INET6 implementation
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 */

#ifndef _NET_IPV6_H
#define _NET_IPV6_H

#include <linux/ipv6.h>
#include <linux/hardirq.h>
#include <linux/jhash.h>
#include <linux/refcount.h>
#include <linux/jump_label_ratelimit.h>
#include <net/if_inet6.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/inet_dscp.h>
#include <net/snmp.h>
#include <net/netns/hash.h>

struct ip_tunnel_info;

#define SIN6_LEN_RFC2133        24

#define IPV6_MAXPLEN                65535

/*
 *        NextHeader field of IPv6 header
 */

#define NEXTHDR_HOP                0        /* Hop-by-hop option header. */
#define NEXTHDR_IPV4                4        /* IPv4 in IPv6 */
#define NEXTHDR_TCP                6        /* TCP segment. */
#define NEXTHDR_UDP                17        /* UDP message. */
#define NEXTHDR_IPV6                41        /* IPv6 in IPv6 */
#define NEXTHDR_ROUTING                43        /* Routing header. */
#define NEXTHDR_FRAGMENT        44        /* Fragmentation/reassembly header. */
#define NEXTHDR_GRE                47        /* GRE header. */
#define NEXTHDR_ESP                50        /* Encapsulating security payload. */
#define NEXTHDR_AUTH                51        /* Authentication header. */
#define NEXTHDR_ICMP                58        /* ICMP for IPv6. */
#define NEXTHDR_NONE                59        /* No next header */
#define NEXTHDR_DEST                60        /* Destination options header. */
#define NEXTHDR_SCTP                132        /* SCTP message. */
#define NEXTHDR_MOBILITY        135        /* Mobility header. */

#define NEXTHDR_MAX                255

#define IPV6_DEFAULT_HOPLIMIT   64
#define IPV6_DEFAULT_MCASTHOPS        1

/* Limits on Hop-by-Hop and Destination options.
 *
 * Per RFC8200 there is no limit on the maximum number or lengths of options in
 * Hop-by-Hop or Destination options other then the packet must fit in an MTU.
 * We allow configurable limits in order to mitigate potential denial of
 * service attacks.
 *
 * There are three limits that may be set:
 *   - Limit the number of options in a Hop-by-Hop or Destination options
 *     extension header
 *   - Limit the byte length of a Hop-by-Hop or Destination options extension
 *     header
 *   - Disallow unknown options
 *
 * The limits are expressed in corresponding sysctls:
 *
 * ipv6.sysctl.max_dst_opts_cnt
 * ipv6.sysctl.max_hbh_opts_cnt
 * ipv6.sysctl.max_dst_opts_len
 * ipv6.sysctl.max_hbh_opts_len
 *
 * max_*_opts_cnt is the number of TLVs that are allowed for Destination
 * options or Hop-by-Hop options. If the number is less than zero then unknown
 * TLVs are disallowed and the number of known options that are allowed is the
 * absolute value. Setting the value to INT_MAX indicates no limit.
 *
 * max_*_opts_len is the length limit in bytes of a Destination or
 * Hop-by-Hop options extension header. Setting the value to INT_MAX
 * indicates no length limit.
 *
 * If a limit is exceeded when processing an extension header the packet is
 * silently discarded.
 */

/* Default limits for Hop-by-Hop and Destination options */
#define IP6_DEFAULT_MAX_DST_OPTS_CNT         8
#define IP6_DEFAULT_MAX_HBH_OPTS_CNT         8
#define IP6_DEFAULT_MAX_DST_OPTS_LEN         INT_MAX /* No limit */
#define IP6_DEFAULT_MAX_HBH_OPTS_LEN         INT_MAX /* No limit */

/*
 *        Addr type
 *        
 *        type        -        unicast | multicast
 *        scope        -        local        | site            | global
 *        v4        -        compat
 *        v4mapped
 *        any
 *        loopback
 */

#define IPV6_ADDR_ANY                0x0000U

#define IPV6_ADDR_UNICAST        0x0001U
#define IPV6_ADDR_MULTICAST        0x0002U

#define IPV6_ADDR_LOOPBACK        0x0010U
#define IPV6_ADDR_LINKLOCAL        0x0020U
#define IPV6_ADDR_SITELOCAL        0x0040U

#define IPV6_ADDR_COMPATv4        0x0080U

#define IPV6_ADDR_SCOPE_MASK        0x00f0U

#define IPV6_ADDR_MAPPED        0x1000U

/*
 *        Addr scopes
 */
#define IPV6_ADDR_MC_SCOPE(a)        \
        ((a)->s6_addr[1] & 0x0f)        /* nonstandard */
#define __IPV6_ADDR_SCOPE_INVALID        -1
#define IPV6_ADDR_SCOPE_NODELOCAL        0x01
#define IPV6_ADDR_SCOPE_LINKLOCAL        0x02
#define IPV6_ADDR_SCOPE_SITELOCAL        0x05
#define IPV6_ADDR_SCOPE_ORGLOCAL        0x08
#define IPV6_ADDR_SCOPE_GLOBAL                0x0e

/*
 *        Addr flags
 */
#define IPV6_ADDR_MC_FLAG_TRANSIENT(a)        \
        ((a)->s6_addr[1] & 0x10)
#define IPV6_ADDR_MC_FLAG_PREFIX(a)        \
        ((a)->s6_addr[1] & 0x20)
#define IPV6_ADDR_MC_FLAG_RENDEZVOUS(a)        \
        ((a)->s6_addr[1] & 0x40)

/*
 *        fragmentation header
 */

struct frag_hdr {
        __u8        nexthdr;
        __u8        reserved;
        __be16        frag_off;
        __be32        identification;
};

/*
 * Jumbo payload option, as described in RFC 2675 2.
 */
struct hop_jumbo_hdr {
        u8        nexthdr;
        u8        hdrlen;
        u8        tlv_type;        /* IPV6_TLV_JUMBO, 0xC2 */
        u8        tlv_len;        /* 4 */
        __be32        jumbo_payload_len;
};

#define        IP6_MF                0x0001
#define        IP6_OFFSET        0xFFF8

struct ip6_fraglist_iter {
        struct ipv6hdr        *tmp_hdr;
        struct sk_buff        *frag;
        int                offset;
        unsigned int        hlen;
        __be32                frag_id;
        u8                nexthdr;
};

int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
                      u8 nexthdr, __be32 frag_id,
                      struct ip6_fraglist_iter *iter);
void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter);

static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter)
{
        struct sk_buff *skb = iter->frag;

        iter->frag = skb->next;
        skb_mark_not_on_list(skb);

        return skb;
}

struct ip6_frag_state {
        u8                *prevhdr;
        unsigned int        hlen;
        unsigned int        mtu;
        unsigned int        left;
        int                offset;
        int                ptr;
        int                hroom;
        int                troom;
        __be32                frag_id;
        u8                nexthdr;
};

void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
                   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
                   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state);
struct sk_buff *ip6_frag_next(struct sk_buff *skb,
                              struct ip6_frag_state *state);

#define IP6_REPLY_MARK(net, mark) \
        ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)

#include <net/sock.h>

/* sysctls */
extern int sysctl_mld_max_msf;
extern int sysctl_mld_qrv;

#define _DEVINC(net, statname, mod, idev, field)                        \
({                                                                        \
        struct inet6_dev *_idev = (idev);                                \
        if (likely(_idev != NULL))                                        \
                mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\
        mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\
})

/* per device counters are atomic_long_t */
#define _DEVINCATOMIC(net, statname, mod, idev, field)                        \
({                                                                        \
        struct inet6_dev *_idev = (idev);                                \
        if (likely(_idev != NULL))                                        \
                SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \
        mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\
})

/* per device and per net counters are atomic_long_t */
#define _DEVINC_ATOMIC_ATOMIC(net, statname, idev, field)                \
({                                                                        \
        struct inet6_dev *_idev = (idev);                                \
        if (likely(_idev != NULL))                                        \
                SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, (field)); \
        SNMP_INC_STATS_ATOMIC_LONG((net)->mib.statname##_statistics, (field));\
})

#define _DEVADD(net, statname, mod, idev, field, val)                        \
({                                                                        \
        struct inet6_dev *_idev = (idev);                                \
        if (likely(_idev != NULL))                                        \
                mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \
        mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\
})

#define _DEVUPD(net, statname, mod, idev, field, val)                        \
({                                                                        \
        struct inet6_dev *_idev = (idev);                                \
        if (likely(_idev != NULL))                                        \
                mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \
        mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\
})

/* MIBs */

#define IP6_INC_STATS(net, idev,field)                \
                _DEVINC(net, ipv6, , idev, field)
#define __IP6_INC_STATS(net, idev,field)        \
                _DEVINC(net, ipv6, __, idev, field)
#define IP6_ADD_STATS(net, idev,field,val)        \
                _DEVADD(net, ipv6, , idev, field, val)
#define __IP6_ADD_STATS(net, idev,field,val)        \
                _DEVADD(net, ipv6, __, idev, field, val)
#define IP6_UPD_PO_STATS(net, idev,field,val)   \
                _DEVUPD(net, ipv6, , idev, field, val)
#define __IP6_UPD_PO_STATS(net, idev,field,val)   \
                _DEVUPD(net, ipv6, __, idev, field, val)
#define ICMP6_INC_STATS(net, idev, field)        \
                _DEVINCATOMIC(net, icmpv6, , idev, field)
#define __ICMP6_INC_STATS(net, idev, field)        \
                _DEVINCATOMIC(net, icmpv6, __, idev, field)

#define ICMP6MSGOUT_INC_STATS(net, idev, field)                \
        _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256)
#define ICMP6MSGIN_INC_STATS(net, idev, field)        \
        _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field)

struct ip6_ra_chain {
        struct ip6_ra_chain        *next;
        struct sock                *sk;
        int                        sel;
        void                        (*destructor)(struct sock *);
};

extern struct ip6_ra_chain        *ip6_ra_chain;
extern rwlock_t ip6_ra_lock;

/*
   This structure is prepared by protocol, when parsing
   ancillary data and passed to IPv6.
 */

struct ipv6_txoptions {
        refcount_t                refcnt;
        /* Length of this structure */
        int                        tot_len;

        /* length of extension headers   */

        __u16                        opt_flen;        /* after fragment hdr */
        __u16                        opt_nflen;        /* before fragment hdr */

        struct ipv6_opt_hdr        *hopopt;
        struct ipv6_opt_hdr        *dst0opt;
        struct ipv6_rt_hdr        *srcrt;        /* Routing Header */
        struct ipv6_opt_hdr        *dst1opt;
        struct rcu_head                rcu;
        /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
};

/* flowlabel_reflect sysctl values */
enum flowlabel_reflect {
        FLOWLABEL_REFLECT_ESTABLISHED                = 1,
        FLOWLABEL_REFLECT_TCP_RESET                = 2,
        FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES        = 4,
};

struct ip6_flowlabel {
        struct ip6_flowlabel __rcu *next;
        __be32                        label;
        atomic_t                users;
        struct in6_addr                dst;
        struct ipv6_txoptions        *opt;
        unsigned long                linger;
        struct rcu_head                rcu;
        u8                        share;
        union {
                struct pid *pid;
                kuid_t uid;
        } owner;
        unsigned long                lastuse;
        unsigned long                expires;
        struct net                *fl_net;
};

#define IPV6_FLOWINFO_MASK                cpu_to_be32(0x0FFFFFFF)
#define IPV6_FLOWLABEL_MASK                cpu_to_be32(0x000FFFFF)
#define IPV6_FLOWLABEL_STATELESS_FLAG        cpu_to_be32(0x00080000)

#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT        20

struct ipv6_fl_socklist {
        struct ipv6_fl_socklist        __rcu        *next;
        struct ip6_flowlabel                *fl;
        struct rcu_head                        rcu;
};

struct ipcm6_cookie {
        struct sockcm_cookie sockc;
        __s16 hlimit;
        __s16 tclass;
        __u16 gso_size;
        __s8  dontfrag;
        struct ipv6_txoptions *opt;
};

static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
{
        *ipc6 = (struct ipcm6_cookie) {
                .hlimit = -1,
                .tclass = -1,
                .dontfrag = -1,
        };
}

static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
                                 const struct sock *sk)
{
        *ipc6 = (struct ipcm6_cookie) {
                .hlimit = -1,
                .tclass = inet6_sk(sk)->tclass,
                .dontfrag = inet6_test_bit(DONTFRAG, sk),
        };
}

static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
{
        struct ipv6_txoptions *opt;

        rcu_read_lock();
        opt = rcu_dereference(np->opt);
        if (opt) {
                if (!refcount_inc_not_zero(&opt->refcnt))
                        opt = NULL;
                else
                        opt = rcu_pointer_handoff(opt);
        }
        rcu_read_unlock();
        return opt;
}

static inline void txopt_put(struct ipv6_txoptions *opt)
{
        if (opt && refcount_dec_and_test(&opt->refcnt))
                kfree_rcu(opt, rcu);
}

#if IS_ENABLED(CONFIG_IPV6)
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label);

extern struct static_key_false_deferred ipv6_flowlabel_exclusive;
static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk,
                                                    __be32 label)
{
        if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) &&
            READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl))
                return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);

        return NULL;
}
#endif

struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
                                         struct ip6_flowlabel *fl,
                                         struct ipv6_txoptions *fopt);
void fl6_free_socklist(struct sock *sk);
int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen);
int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
                           int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
bool ip6_autoflowlabel(struct net *net, const struct sock *sk);

static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
        if (fl)
                atomic_dec(&fl->users);
}

enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
                                   u8 code, __be32 info);

void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
                                struct icmp6hdr *thdr, int len);

int ip6_ra_control(struct sock *sk, int sel);

int ipv6_parse_hopopts(struct sk_buff *skb);

struct ipv6_txoptions *ipv6_dup_options(struct sock *sk,
                                        struct ipv6_txoptions *opt);
struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
                                          struct ipv6_txoptions *opt,
                                          int newtype,
                                          struct ipv6_opt_hdr *newopt);
struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
                                            struct ipv6_txoptions *opt);

static inline struct ipv6_txoptions *
ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt)
{
        if (!opt)
                return NULL;
        return __ipv6_fixup_options(opt_space, opt);
}

bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
                       const struct inet6_skb_parm *opt);
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
                                           struct ipv6_txoptions *opt);

/* This helper is specialized for BIG TCP needs.
 * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
 * It assumes headers are already in skb->head.
 * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there.
 */
static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
{
        const struct hop_jumbo_hdr *jhdr;
        const struct ipv6hdr *nhdr;

        if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
                return 0;

        if (skb->protocol != htons(ETH_P_IPV6))
                return 0;

        if (skb_network_offset(skb) +
            sizeof(struct ipv6hdr) +
            sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
                return 0;

        nhdr = ipv6_hdr(skb);

        if (nhdr->nexthdr != NEXTHDR_HOP)
                return 0;

        jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1);
        if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
            jhdr->nexthdr != IPPROTO_TCP)
                return 0;
        return jhdr->nexthdr;
}

/* Return 0 if HBH header is successfully removed
 * Or if HBH removal is unnecessary (packet is not big TCP)
 * Return error to indicate dropping the packet
 */
static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
{
        const int hophdr_len = sizeof(struct hop_jumbo_hdr);
        int nexthdr = ipv6_has_hopopt_jumbo(skb);
        struct ipv6hdr *h6;

        if (!nexthdr)
                return 0;

        if (skb_cow_head(skb, 0))
                return -1;

        /* Remove the HBH header.
         * Layout: [Ethernet header][IPv6 header][HBH][L4 Header]
         */
        memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb),
                skb_network_header(skb) - skb_mac_header(skb) +
                sizeof(struct ipv6hdr));

        __skb_pull(skb, hophdr_len);
        skb->network_header += hophdr_len;
        skb->mac_header += hophdr_len;

        h6 = ipv6_hdr(skb);
        h6->nexthdr = nexthdr;

        return 0;
}

static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
        s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);

        /* If forwarding is enabled, RA are not accepted unless the special
         * hybrid mode (accept_ra=2) is enabled.
         */
        return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 :
                accept_ra;
}

#define IPV6_FRAG_HIGH_THRESH        (4 * 1024*1024)        /* 4194304 */
#define IPV6_FRAG_LOW_THRESH        (3 * 1024*1024)        /* 3145728 */
#define IPV6_FRAG_TIMEOUT        (60 * HZ)        /* 60 seconds */

int __ipv6_addr_type(const struct in6_addr *addr);
static inline int ipv6_addr_type(const struct in6_addr *addr)
{
        return __ipv6_addr_type(addr) & 0xffff;
}

static inline int ipv6_addr_scope(const struct in6_addr *addr)
{
        return __ipv6_addr_type(addr) & IPV6_ADDR_SCOPE_MASK;
}

static inline int __ipv6_addr_src_scope(int type)
{
        return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16);
}

static inline int ipv6_addr_src_scope(const struct in6_addr *addr)
{
        return __ipv6_addr_src_scope(__ipv6_addr_type(addr));
}

static inline bool __ipv6_addr_needs_scope_id(int type)
{
        return type & IPV6_ADDR_LINKLOCAL ||
               (type & IPV6_ADDR_MULTICAST &&
                (type & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)));
}

static inline __u32 ipv6_iface_scope_id(const struct in6_addr *addr, int iface)
{
        return __ipv6_addr_needs_scope_id(__ipv6_addr_type(addr)) ? iface : 0;
}

static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2)
{
        return memcmp(a1, a2, sizeof(struct in6_addr));
}

static inline bool
ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
                     const struct in6_addr *a2)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        const unsigned long *ul1 = (const unsigned long *)a1;
        const unsigned long *ulm = (const unsigned long *)m;
        const unsigned long *ul2 = (const unsigned long *)a2;

        return !!(((ul1[0] ^ ul2[0]) & ulm[0]) |
                  ((ul1[1] ^ ul2[1]) & ulm[1]));
#else
        return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) |
                  ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) |
                  ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) |
                  ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]));
#endif
}

static inline void ipv6_addr_prefix(struct in6_addr *pfx,
                                    const struct in6_addr *addr,
                                    int plen)
{
        /* caller must guarantee 0 <= plen <= 128 */
        int o = plen >> 3,
            b = plen & 0x7;

        memset(pfx->s6_addr, 0, sizeof(pfx->s6_addr));
        memcpy(pfx->s6_addr, addr, o);
        if (b != 0)
                pfx->s6_addr[o] = addr->s6_addr[o] & (0xff00 >> b);
}

static inline void ipv6_addr_prefix_copy(struct in6_addr *addr,
                                         const struct in6_addr *pfx,
                                         int plen)
{
        /* caller must guarantee 0 <= plen <= 128 */
        int o = plen >> 3,
            b = plen & 0x7;

        memcpy(addr->s6_addr, pfx, o);
        if (b != 0) {
                addr->s6_addr[o] &= ~(0xff00 >> b);
                addr->s6_addr[o] |= (pfx->s6_addr[o] & (0xff00 >> b));
        }
}

static inline void __ipv6_addr_set_half(__be32 *addr,
                                        __be32 wh, __be32 wl)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
#if defined(__BIG_ENDIAN)
        if (__builtin_constant_p(wh) && __builtin_constant_p(wl)) {
                *(__force u64 *)addr = ((__force u64)(wh) << 32 | (__force u64)(wl));
                return;
        }
#elif defined(__LITTLE_ENDIAN)
        if (__builtin_constant_p(wl) && __builtin_constant_p(wh)) {
                *(__force u64 *)addr = ((__force u64)(wl) << 32 | (__force u64)(wh));
                return;
        }
#endif
#endif
        addr[0] = wh;
        addr[1] = wl;
}

static inline void ipv6_addr_set(struct in6_addr *addr,
                                     __be32 w1, __be32 w2,
                                     __be32 w3, __be32 w4)
{
        __ipv6_addr_set_half(&addr->s6_addr32[0], w1, w2);
        __ipv6_addr_set_half(&addr->s6_addr32[2], w3, w4);
}

static inline bool ipv6_addr_equal(const struct in6_addr *a1,
                                   const struct in6_addr *a2)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        const unsigned long *ul1 = (const unsigned long *)a1;
        const unsigned long *ul2 = (const unsigned long *)a2;

        return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL;
#else
        return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
                (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
                (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
                (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
#endif
}

#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
static inline bool __ipv6_prefix_equal64_half(const __be64 *a1,
                                              const __be64 *a2,
                                              unsigned int len)
{
        if (len && ((*a1 ^ *a2) & cpu_to_be64((~0UL) << (64 - len))))
                return false;
        return true;
}

static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
                                     const struct in6_addr *addr2,
                                     unsigned int prefixlen)
{
        const __be64 *a1 = (const __be64 *)addr1;
        const __be64 *a2 = (const __be64 *)addr2;

        if (prefixlen >= 64) {
                if (a1[0] ^ a2[0])
                        return false;
                return __ipv6_prefix_equal64_half(a1 + 1, a2 + 1, prefixlen - 64);
        }
        return __ipv6_prefix_equal64_half(a1, a2, prefixlen);
}
#else
static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
                                     const struct in6_addr *addr2,
                                     unsigned int prefixlen)
{
        const __be32 *a1 = addr1->s6_addr32;
        const __be32 *a2 = addr2->s6_addr32;
        unsigned int pdw, pbi;

        /* check complete u32 in prefix */
        pdw = prefixlen >> 5;
        if (pdw && memcmp(a1, a2, pdw << 2))
                return false;

        /* check incomplete u32 in prefix */
        pbi = prefixlen & 0x1f;
        if (pbi && ((a1[pdw] ^ a2[pdw]) & htonl((0xffffffff) << (32 - pbi))))
                return false;

        return true;
}
#endif

static inline bool ipv6_addr_any(const struct in6_addr *a)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        const unsigned long *ul = (const unsigned long *)a;

        return (ul[0] | ul[1]) == 0UL;
#else
        return (a->s6_addr32[0] | a->s6_addr32[1] |
                a->s6_addr32[2] | a->s6_addr32[3]) == 0;
#endif
}

static inline u32 ipv6_addr_hash(const struct in6_addr *a)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        const unsigned long *ul = (const unsigned long *)a;
        unsigned long x = ul[0] ^ ul[1];

        return (u32)(x ^ (x >> 32));
#else
        return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^
                             a->s6_addr32[2] ^ a->s6_addr32[3]);
#endif
}

/* more secured version of ipv6_addr_hash() */
static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
        return jhash2((__force const u32 *)a->s6_addr32,
                      ARRAY_SIZE(a->s6_addr32), initval);
}

static inline bool ipv6_addr_loopback(const struct in6_addr *a)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        const __be64 *be = (const __be64 *)a;

        return (be[0] | (be[1] ^ cpu_to_be64(1))) == 0UL;
#else
        return (a->s6_addr32[0] | a->s6_addr32[1] |
                a->s6_addr32[2] | (a->s6_addr32[3] ^ cpu_to_be32(1))) == 0;
#endif
}

/*
 * Note that we must __force cast these to unsigned long to make sparse happy,
 * since all of the endian-annotated types are fixed size regardless of arch.
 */
static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
{
        return (
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
                *(unsigned long *)a |
#else
                (__force unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) |
#endif
                (__force unsigned long)(a->s6_addr32[2] ^
                                        cpu_to_be32(0x0000ffff))) == 0UL;
}

static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
{
        return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);
}

static inline u32 ipv6_portaddr_hash(const struct net *net,
                                     const struct in6_addr *addr6,
                                     unsigned int port)
{
        unsigned int hash, mix = net_hash_mix(net);

        if (ipv6_addr_any(addr6))
                hash = jhash_1word(0, mix);
        else if (ipv6_addr_v4mapped(addr6))
                hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
        else
                hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);

        return hash ^ port;
}

/*
 * Check for a RFC 4843 ORCHID address
 * (Overlay Routable Cryptographic Hash Identifiers)
 */
static inline bool ipv6_addr_orchid(const struct in6_addr *a)
{
        return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010);
}

static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr)
{
        return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
}

static inline void ipv6_addr_set_v4mapped(const __be32 addr,
                                          struct in6_addr *v4mapped)
{
        ipv6_addr_set(v4mapped,
                        0, 0,
                        htonl(0x0000FFFF),
                        addr);
}

/*
 * find the first different bit between two addresses
 * length of address must be a multiple of 32bits
 */
static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int addrlen)
{
        const __be32 *a1 = token1, *a2 = token2;
        int i;

        addrlen >>= 2;

        for (i = 0; i < addrlen; i++) {
                __be32 xb = a1[i] ^ a2[i];
                if (xb)
                        return i * 32 + 31 - __fls(ntohl(xb));
        }

        /*
         *        we should *never* get to this point since that
         *        would mean the addrs are equal
         *
         *        However, we do get to it 8) And exacly, when
         *        addresses are equal 8)
         *
         *        ip route add 1111::/128 via ...
         *        ip route add 1111::/64 via ...
         *        and we are here.
         *
         *        Ideally, this function should stop comparison
         *        at prefix length. It does not, but it is still OK,
         *        if returned value is greater than prefix length.
         *                                        --ANK (980803)
         */
        return addrlen << 5;
}

#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
static inline int __ipv6_addr_diff64(const void *token1, const void *token2, int addrlen)
{
        const __be64 *a1 = token1, *a2 = token2;
        int i;

        addrlen >>= 3;

        for (i = 0; i < addrlen; i++) {
                __be64 xb = a1[i] ^ a2[i];
                if (xb)
                        return i * 64 + 63 - __fls(be64_to_cpu(xb));
        }

        return addrlen << 6;
}
#endif

static inline int __ipv6_addr_diff(const void *token1, const void *token2, int addrlen)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
        if (__builtin_constant_p(addrlen) && !(addrlen & 7))
                return __ipv6_addr_diff64(token1, token2, addrlen);
#endif
        return __ipv6_addr_diff32(token1, token2, addrlen);
}

static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2)
{
        return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
}

__be32 ipv6_select_ident(struct net *net,
                         const struct in6_addr *daddr,
                         const struct in6_addr *saddr);
__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);

int ip6_dst_hoplimit(struct dst_entry *dst);

static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
                                      struct dst_entry *dst)
{
        int hlimit;

        if (ipv6_addr_is_multicast(&fl6->daddr))
                hlimit = READ_ONCE(np->mcast_hops);
        else
                hlimit = READ_ONCE(np->hop_limit);
        if (hlimit < 0)
                hlimit = ip6_dst_hoplimit(dst);
        return hlimit;
}

/* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
 * Equivalent to :        flow->v6addrs.src = iph->saddr;
 *                        flow->v6addrs.dst = iph->daddr;
 */
static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
                                            const struct ipv6hdr *iph)
{
        BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) !=
                     offsetof(typeof(flow->addrs), v6addrs.src) +
                     sizeof(flow->addrs.v6addrs.src));
        memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs));
        flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}

#if IS_ENABLED(CONFIG_IPV6)

static inline bool ipv6_can_nonlocal_bind(struct net *net,
                                          struct inet_sock *inet)
{
        return net->ipv6.sysctl.ip_nonlocal_bind ||
                test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
                test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}

/* Sysctl settings for net ipv6.auto_flowlabels */
#define IP6_AUTO_FLOW_LABEL_OFF                0
#define IP6_AUTO_FLOW_LABEL_OPTOUT        1
#define IP6_AUTO_FLOW_LABEL_OPTIN        2
#define IP6_AUTO_FLOW_LABEL_FORCED        3

#define IP6_AUTO_FLOW_LABEL_MAX                IP6_AUTO_FLOW_LABEL_FORCED

#define IP6_DEFAULT_AUTO_FLOW_LABELS        IP6_AUTO_FLOW_LABEL_OPTOUT

static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
                                        __be32 flowlabel, bool autolabel,
                                        struct flowi6 *fl6)
{
        u32 hash;

        /* @flowlabel may include more than a flow label, eg, the traffic class.
         * Here we want only the flow label value.
         */
        flowlabel &= IPV6_FLOWLABEL_MASK;

        if (flowlabel ||
            net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
            (!autolabel &&
             net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED))
                return flowlabel;

        hash = skb_get_hash_flowi6(skb, fl6);

        /* Since this is being sent on the wire obfuscate hash a bit
         * to minimize possbility that any useful information to an
         * attacker is leaked. Only lower 20 bits are relevant.
         */
        hash = rol32(hash, 16);

        flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;

        if (net->ipv6.sysctl.flowlabel_state_ranges)
                flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;

        return flowlabel;
}

static inline int ip6_default_np_autolabel(struct net *net)
{
        switch (net->ipv6.sysctl.auto_flowlabels) {
        case IP6_AUTO_FLOW_LABEL_OFF:
        case IP6_AUTO_FLOW_LABEL_OPTIN:
        default:
                return 0;
        case IP6_AUTO_FLOW_LABEL_OPTOUT:
        case IP6_AUTO_FLOW_LABEL_FORCED:
                return 1;
        }
}
#else
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
                                        __be32 flowlabel, bool autolabel,
                                        struct flowi6 *fl6)
{
        return flowlabel;
}
static inline int ip6_default_np_autolabel(struct net *net)
{
        return 0;
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static inline int ip6_multipath_hash_policy(const struct net *net)
{
        return net->ipv6.sysctl.multipath_hash_policy;
}
static inline u32 ip6_multipath_hash_fields(const struct net *net)
{
        return net->ipv6.sysctl.multipath_hash_fields;
}
#else
static inline int ip6_multipath_hash_policy(const struct net *net)
{
        return 0;
}
static inline u32 ip6_multipath_hash_fields(const struct net *net)
{
        return 0;
}
#endif

/*
 *        Header manipulation
 */
static inline void ip6_flow_hdr(struct ipv6hdr *hdr, unsigned int tclass,
                                __be32 flowlabel)
{
        *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | flowlabel;
}

static inline __be32 ip6_flowinfo(const struct ipv6hdr *hdr)
{
        return *(__be32 *)hdr & IPV6_FLOWINFO_MASK;
}

static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
{
        return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
}

static inline u8 ip6_tclass(__be32 flowinfo)
{
        return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
}

static inline dscp_t ip6_dscp(__be32 flowinfo)
{
        return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
}

static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
{
        return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
}

static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
{
        return fl6->flowlabel & IPV6_FLOWLABEL_MASK;
}

/*
 *        Prototypes exported by ipv6
 */

/*
 *        rcv function (called from netdevice level)
 */

int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
             struct packet_type *pt, struct net_device *orig_dev);
void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
                   struct net_device *orig_dev);

int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);

/*
 *        upper-layer output functions
 */
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
             __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);

int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);

int ip6_append_data(struct sock *sk,
                    int getfrag(void *from, char *to, int offset, int len,
                                int odd, struct sk_buff *skb),
                    void *from, size_t length, int transhdrlen,
                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
                    struct rt6_info *rt, unsigned int flags);

int ip6_push_pending_frames(struct sock *sk);

void ip6_flush_pending_frames(struct sock *sk);

int ip6_send_skb(struct sk_buff *skb);

struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
                               struct inet_cork_full *cork,
                               struct inet6_cork *v6_cork);
struct sk_buff *ip6_make_skb(struct sock *sk,
                             int getfrag(void *from, char *to, int offset,
                                         int len, int odd, struct sk_buff *skb),
                             void *from, size_t length, int transhdrlen,
                             struct ipcm6_cookie *ipc6,
                             struct rt6_info *rt, unsigned int flags,
                             struct inet_cork_full *cork);

static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
        return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
                              &inet6_sk(sk)->cork);
}

int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
                   struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
                                      const struct in6_addr *final_dst);
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
                                         const struct in6_addr *final_dst,
                                         bool connected);
struct dst_entry *ip6_blackhole_route(struct net *net,
                                      struct dst_entry *orig_dst);

/*
 *        skb processing functions
 */

int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip6_forward(struct sk_buff *skb);
int ip6_input(struct sk_buff *skb);
int ip6_mc_input(struct sk_buff *skb);
void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
                              bool have_final);

int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);

/*
 *        Extension header (options) processing
 */

void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
                          u8 *proto, struct in6_addr **daddr_p,
                          struct in6_addr *saddr);
void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
                         u8 *proto);

int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
                     __be16 *frag_offp);

bool ipv6_ext_hdr(u8 nexthdr);

enum {
        IP6_FH_F_FRAG                = (1 << 0),
        IP6_FH_F_AUTH                = (1 << 1),
        IP6_FH_F_SKIP_RH        = (1 << 2),
};

/* find specified header and get offset to it */
int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target,
                  unsigned short *fragoff, int *fragflg);

int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type);

struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
                                const struct ipv6_txoptions *opt,
                                struct in6_addr *orig);

/*
 *        socket options (ipv6_sockglue.c)
 */
DECLARE_STATIC_KEY_FALSE(ip6_min_hopcount);

int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
                       unsigned int optlen);
int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
                    unsigned int optlen);
int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                       sockptr_t optval, sockptr_t optlen);
int ipv6_getsockopt(struct sock *sk, int level, int optname,
                    char __user *optval, int __user *optlen);

int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr,
                           int addr_len);
int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
                                 int addr_len);
int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
void ip6_datagram_release_cb(struct sock *sk);

int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
                    int *addr_len);
int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
                     int *addr_len);
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
                     u32 info, u8 *payload);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);

void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
                  int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int inet6_compat_ioctl(struct socket *sock, unsigned int cmd,
                unsigned long arg);

int inet6_hash_connect(struct inet_timewait_death_row *death_row,
                              struct sock *sk);
int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                  int flags);

/*
 * reassembly.c
 */
extern const struct proto_ops inet6_stream_ops;
extern const struct proto_ops inet6_dgram_ops;
extern const struct proto_ops inet6_sockraw_ops;

struct group_source_req;
struct group_filter;

int ip6_mc_source(int add, int omode, struct sock *sk,
                  struct group_source_req *pgsr);
int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                  struct sockaddr_storage *list);
int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
                  sockptr_t optval, size_t ss_offset);

#ifdef CONFIG_PROC_FS
int ac6_proc_init(struct net *net);
void ac6_proc_exit(struct net *net);
int raw6_proc_init(void);
void raw6_proc_exit(void);
int tcp6_proc_init(struct net *net);
void tcp6_proc_exit(struct net *net);
int udp6_proc_init(struct net *net);
void udp6_proc_exit(struct net *net);
int udplite6_proc_init(void);
void udplite6_proc_exit(void);
int ipv6_misc_proc_init(void);
void ipv6_misc_proc_exit(void);
int snmp6_register_dev(struct inet6_dev *idev);
int snmp6_unregister_dev(struct inet6_dev *idev);

#else
static inline int ac6_proc_init(struct net *net) { return 0; }
static inline void ac6_proc_exit(struct net *net) { }
static inline int snmp6_register_dev(struct inet6_dev *idev) { return 0; }
static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; }
#endif

#ifdef CONFIG_SYSCTL
struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
size_t ipv6_icmp_sysctl_table_size(void);
struct ctl_table *ipv6_route_sysctl_init(struct net *net);
size_t ipv6_route_sysctl_table_size(struct net *net);
int ipv6_sysctl_register(void);
void ipv6_sysctl_unregister(void);
#endif

int ipv6_sock_mc_join(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
                          const struct in6_addr *addr, unsigned int mode);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);

static inline int ip6_sock_set_v6only(struct sock *sk)
{
        if (inet_sk(sk)->inet_num)
                return -EINVAL;
        lock_sock(sk);
        sk->sk_ipv6only = true;
        release_sock(sk);
        return 0;
}

static inline void ip6_sock_set_recverr(struct sock *sk)
{
        inet6_set_bit(RECVERR6, sk);
}

#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \
                              IPV6_PREFER_SRC_COA)

static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
{
        unsigned int prefmask = ~IPV6_PREFER_SRC_MASK;
        unsigned int pref = 0;

        /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
        switch (val & (IPV6_PREFER_SRC_PUBLIC |
                       IPV6_PREFER_SRC_TMP |
                       IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
        case IPV6_PREFER_SRC_PUBLIC:
                pref |= IPV6_PREFER_SRC_PUBLIC;
                prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
                              IPV6_PREFER_SRC_TMP);
                break;
        case IPV6_PREFER_SRC_TMP:
                pref |= IPV6_PREFER_SRC_TMP;
                prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
                              IPV6_PREFER_SRC_TMP);
                break;
        case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
                prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
                              IPV6_PREFER_SRC_TMP);
                break;
        case 0:
                break;
        default:
                return -EINVAL;
        }

        /* check HOME/COA conflicts */
        switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
        case IPV6_PREFER_SRC_HOME:
                prefmask &= ~IPV6_PREFER_SRC_COA;
                break;
        case IPV6_PREFER_SRC_COA:
                pref |= IPV6_PREFER_SRC_COA;
                break;
        case 0:
                break;
        default:
                return -EINVAL;
        }

        /* check CGA/NONCGA conflicts */
        switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
        case IPV6_PREFER_SRC_CGA:
        case IPV6_PREFER_SRC_NONCGA:
        case 0:
                break;
        default:
                return -EINVAL;
        }

        WRITE_ONCE(inet6_sk(sk)->srcprefs,
                   (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref);
        return 0;
}

static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
{
        lock_sock(sk);
        inet6_sk(sk)->rxopt.bits.rxinfo = true;
        release_sock(sk);
}

#endif /* _NET_IPV6_H */






























































































































































































































































































































    3 

    3 
    3 



























































































































































































    6 


    6 





    6 


    6 














    6 


    6 





    6 




    6 























    6 













    6 



    6 







    6 























































































































































































































































































































































































    1 





    1 

    1 



    1 




    1 



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 


    2 

    3 













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 


   14 
   14 




































































































































   14 

































   14 































   14 
   14 









   14 

   14 
   14 






    8 
    6 












    4 

    4 


    4 

    4 




    8 

    8 

    8 
    8 
    3 



    3 


    3 




    8 


    8 






    8 

    4 


    8 















    8 
    3 
    8 

    8 



    3 

    3 






    3 











    3 









    3 








    1 
    1 

    1 




    7 
    6 

    3 
    3 






   13 



    6 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 











































    3 
    3 

    3 

    3 
    3 






    3 


    3 




















    3 




    3 



    3 



    3 



    3 


    3 



    3 












































































































































































































































































































































































































    3 













    3 






    3 
    3 










    4 























































































































    4 

    4 

    4 






    4 




    4 


    4 

    4 


    4 












    4 







    4 





    4 





    4 


    4 



    4 


    4 


    4 




    4 




    4 







    4 












   14 


   14 





   14 

   14 


    4 


   14 






























































































































































































































































































































































































    3 


    3 

    3 


























    3 

    3 









    2 
    2 


    2 


    2 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    6 
    6 
    6 






















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        IPv6 Address [auto]configuration
 *        Linux INET6 implementation
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *        Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
 */

/*
 *        Changes:
 *
 *        Janos Farkas                        :        delete timer on ifdown
 *        <chexum@bankinf.banki.hu>
 *        Andi Kleen                        :        kill double kfree on module
 *                                                unload.
 *        Maciej W. Rozycki                :        FDDI support
 *        sekiya@USAGI                        :        Don't send too many RS
 *                                                packets.
 *        yoshfuji@USAGI                        :       Fixed interval between DAD
 *                                                packets.
 *        YOSHIFUJI Hideaki @USAGI        :        improved accuracy of
 *                                                address validation timer.
 *        YOSHIFUJI Hideaki @USAGI        :        Privacy Extensions (RFC3041)
 *                                                support.
 *        Yuji SEKIYA @USAGI                :        Don't assign a same IPv6
 *                                                address on a same interface.
 *        YOSHIFUJI Hideaki @USAGI        :        ARCnet support
 *        YOSHIFUJI Hideaki @USAGI        :        convert /proc/net/if_inet6 to
 *                                                seq_file.
 *        YOSHIFUJI Hideaki @USAGI        :        improved source address
 *                                                selection; consider scope,
 *                                                status etc.
 */

#define pr_fmt(fmt) "IPv6: " fmt

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/inet.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/if_arcnet.h>
#include <linux/if_infiniband.h>
#include <linux/route.h>
#include <linux/inetdevice.h>
#include <linux/init.h>
#include <linux/slab.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/string.h>
#include <linux/hash.h>

#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>

#include <net/6lowpan.h>
#include <net/firewire.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/tcp.h>
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/l3mdev.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
#include <linux/random.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>

#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/ioam6.h>

#define        INFINITY_LIFE_TIME        0xFFFFFFFF

#define IPV6_MAX_STRLEN \
        sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")

static inline u32 cstamp_delta(unsigned long cstamp)
{
        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}

static inline s32 rfc3315_s14_backoff_init(s32 irt)
{
        /* multiply 'initial retransmission time' by 0.9 .. 1.1 */
        u64 tmp = get_random_u32_inclusive(900000, 1100000) * (u64)irt;
        do_div(tmp, 1000000);
        return (s32)tmp;
}

static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
{
        /* multiply 'retransmission timeout' by 1.9 .. 2.1 */
        u64 tmp = get_random_u32_inclusive(1900000, 2100000) * (u64)rt;
        do_div(tmp, 1000000);
        if ((s32)tmp > mrt) {
                /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
                tmp = get_random_u32_inclusive(900000, 1100000) * (u64)mrt;
                do_div(tmp, 1000000);
        }
        return (s32)tmp;
}

#ifdef CONFIG_SYSCTL
static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
#else
static inline int addrconf_sysctl_register(struct inet6_dev *idev)
{
        return 0;
}

static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
{
}
#endif

static void ipv6_gen_rnd_iid(struct in6_addr *addr);

static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(const struct inet6_dev *idev);
static int ipv6_generate_stable_address(struct in6_addr *addr,
                                        u8 dad_count,
                                        const struct inet6_dev *idev);

#define IN6_ADDR_HSIZE_SHIFT        8
#define IN6_ADDR_HSIZE                (1 << IN6_ADDR_HSIZE_SHIFT)

static void addrconf_verify(struct net *net);
static void addrconf_verify_rtnl(struct net *net);

static struct workqueue_struct *addrconf_wq;

static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);

static void addrconf_type_change(struct net_device *dev,
                                 unsigned long event);
static int addrconf_ifdown(struct net_device *dev, bool unregister);

static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
                                                  int plen,
                                                  const struct net_device *dev,
                                                  u32 flags, u32 noflags,
                                                  bool no_gw);

static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
                                   bool send_na);
static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);

static void inet6_prefix_notify(int event, struct inet6_dev *idev,
                                struct prefix_info *pinfo);

static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .forwarding                = 0,
        .hop_limit                = IPV6_DEFAULT_HOPLIMIT,
        .mtu6                        = IPV6_MIN_MTU,
        .accept_ra                = 1,
        .accept_redirects        = 1,
        .autoconf                = 1,
        .force_mld_version        = 0,
        .mldv1_unsolicited_report_interval = 10 * HZ,
        .mldv2_unsolicited_report_interval = HZ,
        .dad_transmits                = 1,
        .rtr_solicits                = MAX_RTR_SOLICITATIONS,
        .rtr_solicit_interval        = RTR_SOLICITATION_INTERVAL,
        .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
        .rtr_solicit_delay        = MAX_RTR_SOLICITATION_DELAY,
        .use_tempaddr                = 0,
        .temp_valid_lft                = TEMP_VALID_LIFETIME,
        .temp_prefered_lft        = TEMP_PREFERRED_LIFETIME,
        .regen_min_advance        = REGEN_MIN_ADVANCE,
        .regen_max_retry        = REGEN_MAX_RETRY,
        .max_desync_factor        = MAX_DESYNC_FACTOR,
        .max_addresses                = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr        = 1,
        .ra_defrtr_metric        = IP6_RT_PRIO_USER,
        .accept_ra_from_local        = 0,
        .accept_ra_min_hop_limit= 1,
        .accept_ra_min_lft        = 0,
        .accept_ra_pinfo        = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
        .accept_ra_rtr_pref        = 1,
        .rtr_probe_interval        = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
        .accept_ra_rt_info_min_plen = 0,
        .accept_ra_rt_info_max_plen = 0,
#endif
#endif
        .proxy_ndp                = 0,
        .accept_source_route        = 0,        /* we do not accept RH0 by default. */
        .disable_ipv6                = 0,
        .accept_dad                = 0,
        .suppress_frag_ndisc        = 1,
        .accept_ra_mtu                = 1,
        .stable_secret                = {
                .initialized = false,
        },
        .use_oif_addrs_only        = 0,
        .ignore_routes_with_linkdown = 0,
        .keep_addr_on_down        = 0,
        .seg6_enabled                = 0,
#ifdef CONFIG_IPV6_SEG6_HMAC
        .seg6_require_hmac        = 0,
#endif
        .enhanced_dad           = 1,
        .addr_gen_mode                = IN6_ADDR_GEN_MODE_EUI64,
        .disable_policy                = 0,
        .rpl_seg_enabled        = 0,
        .ioam6_enabled                = 0,
        .ioam6_id               = IOAM6_DEFAULT_IF_ID,
        .ioam6_id_wide                = IOAM6_DEFAULT_IF_ID_WIDE,
        .ndisc_evict_nocarrier        = 1,
        .ra_honor_pio_life        = 0,
};

static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .forwarding                = 0,
        .hop_limit                = IPV6_DEFAULT_HOPLIMIT,
        .mtu6                        = IPV6_MIN_MTU,
        .accept_ra                = 1,
        .accept_redirects        = 1,
        .autoconf                = 1,
        .force_mld_version        = 0,
        .mldv1_unsolicited_report_interval = 10 * HZ,
        .mldv2_unsolicited_report_interval = HZ,
        .dad_transmits                = 1,
        .rtr_solicits                = MAX_RTR_SOLICITATIONS,
        .rtr_solicit_interval        = RTR_SOLICITATION_INTERVAL,
        .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
        .rtr_solicit_delay        = MAX_RTR_SOLICITATION_DELAY,
        .use_tempaddr                = 0,
        .temp_valid_lft                = TEMP_VALID_LIFETIME,
        .temp_prefered_lft        = TEMP_PREFERRED_LIFETIME,
        .regen_min_advance        = REGEN_MIN_ADVANCE,
        .regen_max_retry        = REGEN_MAX_RETRY,
        .max_desync_factor        = MAX_DESYNC_FACTOR,
        .max_addresses                = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr        = 1,
        .ra_defrtr_metric        = IP6_RT_PRIO_USER,
        .accept_ra_from_local        = 0,
        .accept_ra_min_hop_limit= 1,
        .accept_ra_min_lft        = 0,
        .accept_ra_pinfo        = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
        .accept_ra_rtr_pref        = 1,
        .rtr_probe_interval        = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
        .accept_ra_rt_info_min_plen = 0,
        .accept_ra_rt_info_max_plen = 0,
#endif
#endif
        .proxy_ndp                = 0,
        .accept_source_route        = 0,        /* we do not accept RH0 by default. */
        .disable_ipv6                = 0,
        .accept_dad                = 1,
        .suppress_frag_ndisc        = 1,
        .accept_ra_mtu                = 1,
        .stable_secret                = {
                .initialized = false,
        },
        .use_oif_addrs_only        = 0,
        .ignore_routes_with_linkdown = 0,
        .keep_addr_on_down        = 0,
        .seg6_enabled                = 0,
#ifdef CONFIG_IPV6_SEG6_HMAC
        .seg6_require_hmac        = 0,
#endif
        .enhanced_dad           = 1,
        .addr_gen_mode                = IN6_ADDR_GEN_MODE_EUI64,
        .disable_policy                = 0,
        .rpl_seg_enabled        = 0,
        .ioam6_enabled                = 0,
        .ioam6_id               = IOAM6_DEFAULT_IF_ID,
        .ioam6_id_wide                = IOAM6_DEFAULT_IF_ID_WIDE,
        .ndisc_evict_nocarrier        = 1,
        .ra_honor_pio_life        = 0,
};

/* Check if link is ready: is it up and is a valid qdisc available */
static inline bool addrconf_link_ready(const struct net_device *dev)
{
        return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
}

static void addrconf_del_rs_timer(struct inet6_dev *idev)
{
        if (del_timer(&idev->rs_timer))
                __in6_dev_put(idev);
}

static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
        if (cancel_delayed_work(&ifp->dad_work))
                __in6_ifa_put(ifp);
}

static void addrconf_mod_rs_timer(struct inet6_dev *idev,
                                  unsigned long when)
{
        if (!mod_timer(&idev->rs_timer, jiffies + when))
                in6_dev_hold(idev);
}

static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
                                   unsigned long delay)
{
        in6_ifa_hold(ifp);
        if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
                in6_ifa_put(ifp);
}

static int snmp6_alloc_dev(struct inet6_dev *idev)
{
        int i;

        idev->stats.ipv6 = alloc_percpu_gfp(struct ipstats_mib, GFP_KERNEL_ACCOUNT);
        if (!idev->stats.ipv6)
                goto err_ip;

        for_each_possible_cpu(i) {
                struct ipstats_mib *addrconf_stats;
                addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
                u64_stats_init(&addrconf_stats->syncp);
        }


        idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),
                                        GFP_KERNEL);
        if (!idev->stats.icmpv6dev)
                goto err_icmp;
        idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
                                           GFP_KERNEL_ACCOUNT);
        if (!idev->stats.icmpv6msgdev)
                goto err_icmpmsg;

        return 0;

err_icmpmsg:
        kfree(idev->stats.icmpv6dev);
err_icmp:
        free_percpu(idev->stats.ipv6);
err_ip:
        return -ENOMEM;
}

static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
        struct inet6_dev *ndev;
        int err = -ENOMEM;

        ASSERT_RTNL();

        if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
                return ERR_PTR(-EINVAL);

        ndev = kzalloc(sizeof(*ndev), GFP_KERNEL_ACCOUNT);
        if (!ndev)
                return ERR_PTR(err);

        rwlock_init(&ndev->lock);
        ndev->dev = dev;
        INIT_LIST_HEAD(&ndev->addr_list);
        timer_setup(&ndev->rs_timer, addrconf_rs_timer, 0);
        memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));

        if (ndev->cnf.stable_secret.initialized)
                ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;

        ndev->cnf.mtu6 = dev->mtu;
        ndev->ra_mtu = 0;
        ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
        if (!ndev->nd_parms) {
                kfree(ndev);
                return ERR_PTR(err);
        }
        if (ndev->cnf.forwarding)
                dev_disable_lro(dev);
        /* We refer to the device */
        netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);

        if (snmp6_alloc_dev(ndev) < 0) {
                netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
                           __func__);
                neigh_parms_release(&nd_tbl, ndev->nd_parms);
                netdev_put(dev, &ndev->dev_tracker);
                kfree(ndev);
                return ERR_PTR(err);
        }

        if (dev != blackhole_netdev) {
                if (snmp6_register_dev(ndev) < 0) {
                        netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
                                   __func__, dev->name);
                        goto err_release;
                }
        }
        /* One reference from device. */
        refcount_set(&ndev->refcnt, 1);

        if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
                ndev->cnf.accept_dad = -1;

#if IS_ENABLED(CONFIG_IPV6_SIT)
        if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
                pr_info("%s: Disabled Multicast RS\n", dev->name);
                ndev->cnf.rtr_solicits = 0;
        }
#endif

        INIT_LIST_HEAD(&ndev->tempaddr_list);
        ndev->desync_factor = U32_MAX;
        if ((dev->flags&IFF_LOOPBACK) ||
            dev->type == ARPHRD_TUNNEL ||
            dev->type == ARPHRD_TUNNEL6 ||
            dev->type == ARPHRD_SIT ||
            dev->type == ARPHRD_NONE) {
                ndev->cnf.use_tempaddr = -1;
        }

        ndev->token = in6addr_any;

        if (netif_running(dev) && addrconf_link_ready(dev))
                ndev->if_flags |= IF_READY;

        ipv6_mc_init_dev(ndev);
        ndev->tstamp = jiffies;
        if (dev != blackhole_netdev) {
                err = addrconf_sysctl_register(ndev);
                if (err) {
                        ipv6_mc_destroy_dev(ndev);
                        snmp6_unregister_dev(ndev);
                        goto err_release;
                }
        }
        /* protected by rtnl_lock */
        rcu_assign_pointer(dev->ip6_ptr, ndev);

        if (dev != blackhole_netdev) {
                /* Join interface-local all-node multicast group */
                ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);

                /* Join all-node multicast group */
                ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);

                /* Join all-router multicast group if forwarding is set */
                if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
                        ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
        }
        return ndev;

err_release:
        neigh_parms_release(&nd_tbl, ndev->nd_parms);
        ndev->dead = 1;
        in6_dev_finish_destroy(ndev);
        return ERR_PTR(err);
}

static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
{
        struct inet6_dev *idev;

        ASSERT_RTNL();

        idev = __in6_dev_get(dev);
        if (!idev) {
                idev = ipv6_add_dev(dev);
                if (IS_ERR(idev))
                        return idev;
        }

        if (dev->flags&IFF_UP)
                ipv6_mc_up(idev);
        return idev;
}

static int inet6_netconf_msgsize_devconf(int type)
{
        int size =  NLMSG_ALIGN(sizeof(struct netconfmsg))
                    + nla_total_size(4);        /* NETCONFA_IFINDEX */
        bool all = false;

        if (type == NETCONFA_ALL)
                all = true;

        if (all || type == NETCONFA_FORWARDING)
                size += nla_total_size(4);
#ifdef CONFIG_IPV6_MROUTE
        if (all || type == NETCONFA_MC_FORWARDING)
                size += nla_total_size(4);
#endif
        if (all || type == NETCONFA_PROXY_NEIGH)
                size += nla_total_size(4);

        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
                size += nla_total_size(4);

        return size;
}

static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
                                      struct ipv6_devconf *devconf, u32 portid,
                                      u32 seq, int event, unsigned int flags,
                                      int type)
{
        struct nlmsghdr  *nlh;
        struct netconfmsg *ncm;
        bool all = false;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
                        flags);
        if (!nlh)
                return -EMSGSIZE;

        if (type == NETCONFA_ALL)
                all = true;

        ncm = nlmsg_data(nlh);
        ncm->ncm_family = AF_INET6;

        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
                goto nla_put_failure;

        if (!devconf)
                goto out;

        if ((all || type == NETCONFA_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_FORWARDING,
                        READ_ONCE(devconf->forwarding)) < 0)
                goto nla_put_failure;
#ifdef CONFIG_IPV6_MROUTE
        if ((all || type == NETCONFA_MC_FORWARDING) &&
            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
                        atomic_read(&devconf->mc_forwarding)) < 0)
                goto nla_put_failure;
#endif
        if ((all || type == NETCONFA_PROXY_NEIGH) &&
            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
                        READ_ONCE(devconf->proxy_ndp)) < 0)
                goto nla_put_failure;

        if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                        READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0)
                goto nla_put_failure;

out:
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

void inet6_netconf_notify_devconf(struct net *net, int event, int type,
                                  int ifindex, struct ipv6_devconf *devconf)
{
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
        if (!skb)
                goto errout;

        err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
                                         event, 0, type);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
        return;
errout:
        rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
}

static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
        [NETCONFA_IFINDEX]        = { .len = sizeof(int) },
        [NETCONFA_FORWARDING]        = { .len = sizeof(int) },
        [NETCONFA_PROXY_NEIGH]        = { .len = sizeof(int) },
        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]        = { .len = sizeof(int) },
};

static int inet6_netconf_valid_get_req(struct sk_buff *skb,
                                       const struct nlmsghdr *nlh,
                                       struct nlattr **tb,
                                       struct netlink_ext_ack *extack)
{
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
                                              tb, NETCONFA_MAX,
                                              devconf_ipv6_policy, extack);

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
                                            tb, NETCONFA_MAX,
                                            devconf_ipv6_policy, extack);
        if (err)
                return err;

        for (i = 0; i <= NETCONFA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case NETCONFA_IFINDEX:
                        break;
                default:
                        NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
                                     struct nlmsghdr *nlh,
                                     struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[NETCONFA_MAX+1];
        struct inet6_dev *in6_dev = NULL;
        struct net_device *dev = NULL;
        struct sk_buff *skb;
        struct ipv6_devconf *devconf;
        int ifindex;
        int err;

        err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack);
        if (err < 0)
                return err;

        if (!tb[NETCONFA_IFINDEX])
                return -EINVAL;

        err = -EINVAL;
        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
        switch (ifindex) {
        case NETCONFA_IFINDEX_ALL:
                devconf = net->ipv6.devconf_all;
                break;
        case NETCONFA_IFINDEX_DEFAULT:
                devconf = net->ipv6.devconf_dflt;
                break;
        default:
                dev = dev_get_by_index(net, ifindex);
                if (!dev)
                        return -EINVAL;
                in6_dev = in6_dev_get(dev);
                if (!in6_dev)
                        goto errout;
                devconf = &in6_dev->cnf;
                break;
        }

        err = -ENOBUFS;
        skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
        if (!skb)
                goto errout;

        err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
                                         NETLINK_CB(in_skb).portid,
                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
                                         NETCONFA_ALL);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
        if (in6_dev)
                in6_dev_put(in6_dev);
        dev_put(dev);
        return err;
}

/* Combine dev_addr_genid and dev_base_seq to detect changes.
 */
static u32 inet6_base_seq(const struct net *net)
{
        u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
                  READ_ONCE(net->dev_base_seq);

        /* Must not return 0 (see nl_dump_check_consistent()).
         * Chose a value far away from 0.
         */
        if (!res)
                res = 0x80000000;
        return res;
}

static int inet6_netconf_dump_devconf(struct sk_buff *skb,
                                      struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        struct {
                unsigned long ifindex;
                unsigned int all_default;
        } *ctx = (void *)cb->ctx;
        struct net_device *dev;
        struct inet6_dev *idev;
        int err = 0;

        if (cb->strict_check) {
                struct netlink_ext_ack *extack = cb->extack;
                struct netconfmsg *ncm;

                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
                        NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
                        return -EINVAL;
                }

                if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
                        NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
                        return -EINVAL;
                }
        }

        rcu_read_lock();
        for_each_netdev_dump(net, dev, ctx->ifindex) {
                idev = __in6_dev_get(dev);
                if (!idev)
                        continue;
                err = inet6_netconf_fill_devconf(skb, dev->ifindex,
                                                 &idev->cnf,
                                                 NETLINK_CB(cb->skb).portid,
                                                 nlh->nlmsg_seq,
                                                 RTM_NEWNETCONF,
                                                 NLM_F_MULTI,
                                                 NETCONFA_ALL);
                if (err < 0)
                        goto done;
        }
        if (ctx->all_default == 0) {
                err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
                                                 net->ipv6.devconf_all,
                                                 NETLINK_CB(cb->skb).portid,
                                                 nlh->nlmsg_seq,
                                                 RTM_NEWNETCONF, NLM_F_MULTI,
                                                 NETCONFA_ALL);
                if (err < 0)
                        goto done;
                ctx->all_default++;
        }
        if (ctx->all_default == 1) {
                err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
                                                 net->ipv6.devconf_dflt,
                                                 NETLINK_CB(cb->skb).portid,
                                                 nlh->nlmsg_seq,
                                                 RTM_NEWNETCONF, NLM_F_MULTI,
                                                 NETCONFA_ALL);
                if (err < 0)
                        goto done;
                ctx->all_default++;
        }
done:
        rcu_read_unlock();
        return err;
}

#ifdef CONFIG_SYSCTL
static void dev_forward_change(struct inet6_dev *idev)
{
        struct net_device *dev;
        struct inet6_ifaddr *ifa;
        LIST_HEAD(tmp_addr_list);

        if (!idev)
                return;
        dev = idev->dev;
        if (idev->cnf.forwarding)
                dev_disable_lro(dev);
        if (dev->flags & IFF_MULTICAST) {
                if (idev->cnf.forwarding) {
                        ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
                        ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
                        ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
                } else {
                        ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
                        ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
                        ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
                }
        }

        read_lock_bh(&idev->lock);
        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                if (ifa->flags&IFA_F_TENTATIVE)
                        continue;
                list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
        }
        read_unlock_bh(&idev->lock);

        while (!list_empty(&tmp_addr_list)) {
                ifa = list_first_entry(&tmp_addr_list,
                                       struct inet6_ifaddr, if_list_aux);
                list_del(&ifa->if_list_aux);
                if (idev->cnf.forwarding)
                        addrconf_join_anycast(ifa);
                else
                        addrconf_leave_anycast(ifa);
        }

        inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
                                     NETCONFA_FORWARDING,
                                     dev->ifindex, &idev->cnf);
}


static void addrconf_forward_change(struct net *net, __s32 newf)
{
        struct net_device *dev;
        struct inet6_dev *idev;

        for_each_netdev(net, dev) {
                idev = __in6_dev_get(dev);
                if (idev) {
                        int changed = (!idev->cnf.forwarding) ^ (!newf);

                        WRITE_ONCE(idev->cnf.forwarding, newf);
                        if (changed)
                                dev_forward_change(idev);
                }
        }
}

static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
{
        struct net *net;
        int old;

        if (!rtnl_trylock())
                return restart_syscall();

        net = (struct net *)table->extra2;
        old = *p;
        WRITE_ONCE(*p, newf);

        if (p == &net->ipv6.devconf_dflt->forwarding) {
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
                rtnl_unlock();
                return 0;
        }

        if (p == &net->ipv6.devconf_all->forwarding) {
                int old_dflt = net->ipv6.devconf_dflt->forwarding;

                WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf);
                if ((!newf) ^ (!old_dflt))
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);

                addrconf_forward_change(net, newf);
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_FORWARDING,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
        } else if ((!newf) ^ (!old))
                dev_forward_change((struct inet6_dev *)table->extra1);
        rtnl_unlock();

        if (newf)
                rt6_purge_dflt_routers(net);
        return 1;
}

static void addrconf_linkdown_change(struct net *net, __s32 newf)
{
        struct net_device *dev;
        struct inet6_dev *idev;

        for_each_netdev(net, dev) {
                idev = __in6_dev_get(dev);
                if (idev) {
                        int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);

                        WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf);
                        if (changed)
                                inet6_netconf_notify_devconf(dev_net(dev),
                                                             RTM_NEWNETCONF,
                                                             NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                             dev->ifindex,
                                                             &idev->cnf);
                }
        }
}

static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
{
        struct net *net;
        int old;

        if (!rtnl_trylock())
                return restart_syscall();

        net = (struct net *)table->extra2;
        old = *p;
        WRITE_ONCE(*p, newf);

        if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net,
                                                     RTM_NEWNETCONF,
                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
                rtnl_unlock();
                return 0;
        }

        if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
                WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf);
                addrconf_linkdown_change(net, newf);
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net,
                                                     RTM_NEWNETCONF,
                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
        }
        rtnl_unlock();

        return 1;
}

#endif

/* Nobody refers to this ifaddr, destroy it */
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
{
        WARN_ON(!hlist_unhashed(&ifp->addr_lst));

#ifdef NET_REFCNT_DEBUG
        pr_debug("%s\n", __func__);
#endif

        in6_dev_put(ifp->idev);

        if (cancel_delayed_work(&ifp->dad_work))
                pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
                          ifp);

        if (ifp->state != INET6_IFADDR_STATE_DEAD) {
                pr_warn("Freeing alive inet6 address %p\n", ifp);
                return;
        }

        kfree_rcu(ifp, rcu);
}

static void
ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
{
        struct list_head *p;
        int ifp_scope = ipv6_addr_src_scope(&ifp->addr);

        /*
         * Each device address list is sorted in order of scope -
         * global before linklocal.
         */
        list_for_each(p, &idev->addr_list) {
                struct inet6_ifaddr *ifa
                        = list_entry(p, struct inet6_ifaddr, if_list);
                if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
                        break;
        }

        list_add_tail_rcu(&ifp->if_list, p);
}

static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
{
        u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);

        return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}

static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
                               struct net_device *dev, unsigned int hash)
{
        struct inet6_ifaddr *ifp;

        hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (!dev || ifp->idev->dev == dev)
                                return true;
                }
        }
        return false;
}

static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
        struct net *net = dev_net(dev);
        unsigned int hash = inet6_addr_hash(net, &ifa->addr);
        int err = 0;

        spin_lock_bh(&net->ipv6.addrconf_hash_lock);

        /* Ignore adding duplicate addresses on an interface */
        if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
                netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
                err = -EEXIST;
        } else {
                hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
        }

        spin_unlock_bh(&net->ipv6.addrconf_hash_lock);

        return err;
}

/* On success it returns ifp with increased reference count */

static struct inet6_ifaddr *
ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
              bool can_block, struct netlink_ext_ack *extack)
{
        gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
        int addr_type = ipv6_addr_type(cfg->pfx);
        struct net *net = dev_net(idev->dev);
        struct inet6_ifaddr *ifa = NULL;
        struct fib6_info *f6i = NULL;
        int err = 0;

        if (addr_type == IPV6_ADDR_ANY) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid address");
                return ERR_PTR(-EADDRNOTAVAIL);
        } else if (addr_type & IPV6_ADDR_MULTICAST &&
                   !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) {
                NL_SET_ERR_MSG_MOD(extack, "Cannot assign multicast address without \"IFA_F_MCAUTOJOIN\" flag");
                return ERR_PTR(-EADDRNOTAVAIL);
        } else if (!(idev->dev->flags & IFF_LOOPBACK) &&
                   !netif_is_l3_master(idev->dev) &&
                   addr_type & IPV6_ADDR_LOOPBACK) {
                NL_SET_ERR_MSG_MOD(extack, "Cannot assign loopback address on this device");
                return ERR_PTR(-EADDRNOTAVAIL);
        }

        if (idev->dead) {
                NL_SET_ERR_MSG_MOD(extack, "device is going away");
                err = -ENODEV;
                goto out;
        }

        if (idev->cnf.disable_ipv6) {
                NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
                err = -EACCES;
                goto out;
        }

        /* validator notifier needs to be blocking;
         * do not call in atomic context
         */
        if (can_block) {
                struct in6_validator_info i6vi = {
                        .i6vi_addr = *cfg->pfx,
                        .i6vi_dev = idev,
                        .extack = extack,
                };

                err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
                err = notifier_to_errno(err);
                if (err < 0)
                        goto out;
        }

        ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
        if (!ifa) {
                err = -ENOBUFS;
                goto out;
        }

        f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags, extack);
        if (IS_ERR(f6i)) {
                err = PTR_ERR(f6i);
                f6i = NULL;
                goto out;
        }

        neigh_parms_data_state_setall(idev->nd_parms);

        ifa->addr = *cfg->pfx;
        if (cfg->peer_pfx)
                ifa->peer_addr = *cfg->peer_pfx;

        spin_lock_init(&ifa->lock);
        INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
        INIT_HLIST_NODE(&ifa->addr_lst);
        ifa->scope = cfg->scope;
        ifa->prefix_len = cfg->plen;
        ifa->rt_priority = cfg->rt_priority;
        ifa->flags = cfg->ifa_flags;
        ifa->ifa_proto = cfg->ifa_proto;
        /* No need to add the TENTATIVE flag for addresses with NODAD */
        if (!(cfg->ifa_flags & IFA_F_NODAD))
                ifa->flags |= IFA_F_TENTATIVE;
        ifa->valid_lft = cfg->valid_lft;
        ifa->prefered_lft = cfg->preferred_lft;
        ifa->cstamp = ifa->tstamp = jiffies;
        ifa->tokenized = false;

        ifa->rt = f6i;

        ifa->idev = idev;
        in6_dev_hold(idev);

        /* For caller */
        refcount_set(&ifa->refcnt, 1);

        rcu_read_lock();

        err = ipv6_add_addr_hash(idev->dev, ifa);
        if (err < 0) {
                rcu_read_unlock();
                goto out;
        }

        write_lock_bh(&idev->lock);

        /* Add to inet6_dev unicast addr list. */
        ipv6_link_dev_addr(idev, ifa);

        if (ifa->flags&IFA_F_TEMPORARY) {
                list_add(&ifa->tmp_list, &idev->tempaddr_list);
                in6_ifa_hold(ifa);
        }

        in6_ifa_hold(ifa);
        write_unlock_bh(&idev->lock);

        rcu_read_unlock();

        inet6addr_notifier_call_chain(NETDEV_UP, ifa);
out:
        if (unlikely(err < 0)) {
                fib6_info_release(f6i);

                if (ifa) {
                        if (ifa->idev)
                                in6_dev_put(ifa->idev);
                        kfree(ifa);
                }
                ifa = ERR_PTR(err);
        }

        return ifa;
}

enum cleanup_prefix_rt_t {
        CLEANUP_PREFIX_RT_NOP,    /* no cleanup action for prefix route */
        CLEANUP_PREFIX_RT_DEL,    /* delete the prefix route */
        CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */
};

/*
 * Check, whether the prefix for ifp would still need a prefix route
 * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_*
 * constants.
 *
 * 1) we don't purge prefix if address was not permanent.
 *    prefix is managed by its own lifetime.
 * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE.
 * 3) if there are no addresses, delete prefix.
 * 4) if there are still other permanent address(es),
 *    corresponding prefix is still permanent.
 * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE,
 *    don't purge the prefix, assume user space is managing it.
 * 6) otherwise, update prefix lifetime to the
 *    longest valid lifetime among the corresponding
 *    addresses on the device.
 *    Note: subsequent RA will update lifetime.
 **/
static enum cleanup_prefix_rt_t
check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
{
        struct inet6_ifaddr *ifa;
        struct inet6_dev *idev = ifp->idev;
        unsigned long lifetime;
        enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL;

        *expires = jiffies;

        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                if (ifa == ifp)
                        continue;
                if (ifa->prefix_len != ifp->prefix_len ||
                    !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
                                       ifp->prefix_len))
                        continue;
                if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
                        return CLEANUP_PREFIX_RT_NOP;

                action = CLEANUP_PREFIX_RT_EXPIRE;

                spin_lock(&ifa->lock);

                lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
                /*
                 * Note: Because this address is
                 * not permanent, lifetime <
                 * LONG_MAX / HZ here.
                 */
                if (time_before(*expires, ifa->tstamp + lifetime * HZ))
                        *expires = ifa->tstamp + lifetime * HZ;
                spin_unlock(&ifa->lock);
        }

        return action;
}

static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
                     bool del_rt, bool del_peer)
{
        struct fib6_table *table;
        struct fib6_info *f6i;

        f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
                                        ifp->prefix_len,
                                        ifp->idev->dev, 0, RTF_DEFAULT, true);
        if (f6i) {
                if (del_rt)
                        ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
                else {
                        if (!(f6i->fib6_flags & RTF_EXPIRES)) {
                                table = f6i->fib6_table;
                                spin_lock_bh(&table->tb6_lock);

                                fib6_set_expires(f6i, expires);
                                fib6_add_gc_list(f6i);

                                spin_unlock_bh(&table->tb6_lock);
                        }
                        fib6_info_release(f6i);
                }
        }
}


/* This function wants to get referenced ifp and releases it before return */

static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
        enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
        struct net *net = dev_net(ifp->idev->dev);
        unsigned long expires;
        int state;

        ASSERT_RTNL();

        spin_lock_bh(&ifp->lock);
        state = ifp->state;
        ifp->state = INET6_IFADDR_STATE_DEAD;
        spin_unlock_bh(&ifp->lock);

        if (state == INET6_IFADDR_STATE_DEAD)
                goto out;

        spin_lock_bh(&net->ipv6.addrconf_hash_lock);
        hlist_del_init_rcu(&ifp->addr_lst);
        spin_unlock_bh(&net->ipv6.addrconf_hash_lock);

        write_lock_bh(&ifp->idev->lock);

        if (ifp->flags&IFA_F_TEMPORARY) {
                list_del(&ifp->tmp_list);
                if (ifp->ifpub) {
                        in6_ifa_put(ifp->ifpub);
                        ifp->ifpub = NULL;
                }
                __in6_ifa_put(ifp);
        }

        if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
                action = check_cleanup_prefix_route(ifp, &expires);

        list_del_rcu(&ifp->if_list);
        __in6_ifa_put(ifp);

        write_unlock_bh(&ifp->idev->lock);

        addrconf_del_dad_work(ifp);

        ipv6_ifa_notify(RTM_DELADDR, ifp);

        inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);

        if (action != CLEANUP_PREFIX_RT_NOP) {
                cleanup_prefix_route(ifp, expires,
                        action == CLEANUP_PREFIX_RT_DEL, false);
        }

        /* clean up prefsrc entries */
        rt6_remove_prefsrc(ifp);
out:
        in6_ifa_put(ifp);
}

static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev)
{
        return READ_ONCE(idev->cnf.regen_min_advance) +
                READ_ONCE(idev->cnf.regen_max_retry) *
                READ_ONCE(idev->cnf.dad_transmits) *
                max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
}

static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
{
        struct inet6_dev *idev = ifp->idev;
        unsigned long tmp_tstamp, age;
        unsigned long regen_advance;
        unsigned long now = jiffies;
        u32 if_public_preferred_lft;
        s32 cnf_temp_preferred_lft;
        struct inet6_ifaddr *ift;
        struct ifa6_config cfg;
        long max_desync_factor;
        struct in6_addr addr;
        int ret = 0;

        write_lock_bh(&idev->lock);

retry:
        in6_dev_hold(idev);
        if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) {
                write_unlock_bh(&idev->lock);
                pr_info("%s: use_tempaddr is disabled\n", __func__);
                in6_dev_put(idev);
                ret = -1;
                goto out;
        }
        spin_lock_bh(&ifp->lock);
        if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) {
                WRITE_ONCE(idev->cnf.use_tempaddr, -1);        /*XXX*/
                spin_unlock_bh(&ifp->lock);
                write_unlock_bh(&idev->lock);
                pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
                        __func__);
                in6_dev_put(idev);
                ret = -1;
                goto out;
        }
        in6_ifa_hold(ifp);
        memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
        ipv6_gen_rnd_iid(&addr);

        age = (now - ifp->tstamp) / HZ;

        regen_advance = ipv6_get_regen_advance(idev);

        /* recalculate max_desync_factor each time and update
         * idev->desync_factor if it's larger
         */
        cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
        max_desync_factor = min_t(long,
                                  READ_ONCE(idev->cnf.max_desync_factor),
                                  cnf_temp_preferred_lft - regen_advance);

        if (unlikely(idev->desync_factor > max_desync_factor)) {
                if (max_desync_factor > 0) {
                        get_random_bytes(&idev->desync_factor,
                                         sizeof(idev->desync_factor));
                        idev->desync_factor %= max_desync_factor;
                } else {
                        idev->desync_factor = 0;
                }
        }

        if_public_preferred_lft = ifp->prefered_lft;

        memset(&cfg, 0, sizeof(cfg));
        cfg.valid_lft = min_t(__u32, ifp->valid_lft,
                              READ_ONCE(idev->cnf.temp_valid_lft) + age);
        cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
        cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft);
        cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);

        cfg.plen = ifp->prefix_len;
        tmp_tstamp = ifp->tstamp;
        spin_unlock_bh(&ifp->lock);

        write_unlock_bh(&idev->lock);

        /* From RFC 4941:
         *
         *     A temporary address is created only if this calculated Preferred
         *     Lifetime is greater than REGEN_ADVANCE time units.  In
         *     particular, an implementation must not create a temporary address
         *     with a zero Preferred Lifetime.
         *
         *     ...
         *
         *     When creating a temporary address, the lifetime values MUST be
         *     derived from the corresponding prefix as follows:
         *
         *     ...
         *
         *     *  Its Preferred Lifetime is the lower of the Preferred Lifetime
         *        of the public address or TEMP_PREFERRED_LIFETIME -
         *        DESYNC_FACTOR.
         *
         * To comply with the RFC's requirements, clamp the preferred lifetime
         * to a minimum of regen_advance, unless that would exceed valid_lft or
         * ifp->prefered_lft.
         *
         * Use age calculation as in addrconf_verify to avoid unnecessary
         * temporary addresses being generated.
         */
        age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
        if (cfg.preferred_lft <= regen_advance + age) {
                cfg.preferred_lft = regen_advance + age + 1;
                if (cfg.preferred_lft > cfg.valid_lft ||
                    cfg.preferred_lft > if_public_preferred_lft) {
                        in6_ifa_put(ifp);
                        in6_dev_put(idev);
                        ret = -1;
                        goto out;
                }
        }

        cfg.ifa_flags = IFA_F_TEMPORARY;
        /* set in addrconf_prefix_rcv() */
        if (ifp->flags & IFA_F_OPTIMISTIC)
                cfg.ifa_flags |= IFA_F_OPTIMISTIC;

        cfg.pfx = &addr;
        cfg.scope = ipv6_addr_scope(cfg.pfx);

        ift = ipv6_add_addr(idev, &cfg, block, NULL);
        if (IS_ERR(ift)) {
                in6_ifa_put(ifp);
                in6_dev_put(idev);
                pr_info("%s: retry temporary address regeneration\n", __func__);
                write_lock_bh(&idev->lock);
                goto retry;
        }

        spin_lock_bh(&ift->lock);
        ift->ifpub = ifp;
        ift->cstamp = now;
        ift->tstamp = tmp_tstamp;
        spin_unlock_bh(&ift->lock);

        addrconf_dad_start(ift);
        in6_ifa_put(ift);
        in6_dev_put(idev);
out:
        return ret;
}

/*
 *        Choose an appropriate source address (RFC3484)
 */
enum {
        IPV6_SADDR_RULE_INIT = 0,
        IPV6_SADDR_RULE_LOCAL,
        IPV6_SADDR_RULE_SCOPE,
        IPV6_SADDR_RULE_PREFERRED,
#ifdef CONFIG_IPV6_MIP6
        IPV6_SADDR_RULE_HOA,
#endif
        IPV6_SADDR_RULE_OIF,
        IPV6_SADDR_RULE_LABEL,
        IPV6_SADDR_RULE_PRIVACY,
        IPV6_SADDR_RULE_ORCHID,
        IPV6_SADDR_RULE_PREFIX,
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        IPV6_SADDR_RULE_NOT_OPTIMISTIC,
#endif
        IPV6_SADDR_RULE_MAX
};

struct ipv6_saddr_score {
        int                        rule;
        int                        addr_type;
        struct inet6_ifaddr        *ifa;
        DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX);
        int                        scopedist;
        int                        matchlen;
};

struct ipv6_saddr_dst {
        const struct in6_addr *addr;
        int ifindex;
        int scope;
        int label;
        unsigned int prefs;
};

static inline int ipv6_saddr_preferred(int type)
{
        if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK))
                return 1;
        return 0;
}

static bool ipv6_use_optimistic_addr(const struct net *net,
                                     const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        if (!idev)
                return false;
        if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
            !READ_ONCE(idev->cnf.optimistic_dad))
                return false;
        if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) &&
            !READ_ONCE(idev->cnf.use_optimistic))
                return false;

        return true;
#else
        return false;
#endif
}

static bool ipv6_allow_optimistic_dad(const struct net *net,
                                      const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        if (!idev)
                return false;
        if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
            !READ_ONCE(idev->cnf.optimistic_dad))
                return false;

        return true;
#else
        return false;
#endif
}

static int ipv6_get_saddr_eval(struct net *net,
                               struct ipv6_saddr_score *score,
                               struct ipv6_saddr_dst *dst,
                               int i)
{
        int ret;

        if (i <= score->rule) {
                switch (i) {
                case IPV6_SADDR_RULE_SCOPE:
                        ret = score->scopedist;
                        break;
                case IPV6_SADDR_RULE_PREFIX:
                        ret = score->matchlen;
                        break;
                default:
                        ret = !!test_bit(i, score->scorebits);
                }
                goto out;
        }

        switch (i) {
        case IPV6_SADDR_RULE_INIT:
                /* Rule 0: remember if hiscore is not ready yet */
                ret = !!score->ifa;
                break;
        case IPV6_SADDR_RULE_LOCAL:
                /* Rule 1: Prefer same address */
                ret = ipv6_addr_equal(&score->ifa->addr, dst->addr);
                break;
        case IPV6_SADDR_RULE_SCOPE:
                /* Rule 2: Prefer appropriate scope
                 *
                 *      ret
                 *       ^
                 *    -1 |  d 15
                 *    ---+--+-+---> scope
                 *       |
                 *       |             d is scope of the destination.
                 *  B-d  |  \
                 *       |   \      <- smaller scope is better if
                 *  B-15 |    \        if scope is enough for destination.
                 *       |             ret = B - scope (-1 <= scope >= d <= 15).
                 * d-C-1 | /
                 *       |/         <- greater is better
                 *   -C  /             if scope is not enough for destination.
                 *      /|             ret = scope - C (-1 <= d < scope <= 15).
                 *
                 * d - C - 1 < B -15 (for all -1 <= d <= 15).
                 * C > d + 14 - B >= 15 + 14 - B = 29 - B.
                 * Assume B = 0 and we get C > 29.
                 */
                ret = __ipv6_addr_src_scope(score->addr_type);
                if (ret >= dst->scope)
                        ret = -ret;
                else
                        ret -= 128;        /* 30 is enough */
                score->scopedist = ret;
                break;
        case IPV6_SADDR_RULE_PREFERRED:
            {
                /* Rule 3: Avoid deprecated and optimistic addresses */
                u8 avoid = IFA_F_DEPRECATED;

                if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
                        avoid |= IFA_F_OPTIMISTIC;
                ret = ipv6_saddr_preferred(score->addr_type) ||
                      !(score->ifa->flags & avoid);
                break;
            }
#ifdef CONFIG_IPV6_MIP6
        case IPV6_SADDR_RULE_HOA:
            {
                /* Rule 4: Prefer home address */
                int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA);
                ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome;
                break;
            }
#endif
        case IPV6_SADDR_RULE_OIF:
                /* Rule 5: Prefer outgoing interface */
                ret = (!dst->ifindex ||
                       dst->ifindex == score->ifa->idev->dev->ifindex);
                break;
        case IPV6_SADDR_RULE_LABEL:
                /* Rule 6: Prefer matching label */
                ret = ipv6_addr_label(net,
                                      &score->ifa->addr, score->addr_type,
                                      score->ifa->idev->dev->ifindex) == dst->label;
                break;
        case IPV6_SADDR_RULE_PRIVACY:
            {
                /* Rule 7: Prefer public address
                 * Note: prefer temporary address if use_tempaddr >= 2
                 */
                int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
                                !!(dst->prefs & IPV6_PREFER_SRC_TMP) :
                                READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2;
                ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
                break;
            }
        case IPV6_SADDR_RULE_ORCHID:
                /* Rule 8-: Prefer ORCHID vs ORCHID or
                 *            non-ORCHID vs non-ORCHID
                 */
                ret = !(ipv6_addr_orchid(&score->ifa->addr) ^
                        ipv6_addr_orchid(dst->addr));
                break;
        case IPV6_SADDR_RULE_PREFIX:
                /* Rule 8: Use longest matching prefix */
                ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
                if (ret > score->ifa->prefix_len)
                        ret = score->ifa->prefix_len;
                score->matchlen = ret;
                break;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        case IPV6_SADDR_RULE_NOT_OPTIMISTIC:
                /* Optimistic addresses still have lower precedence than other
                 * preferred addresses.
                 */
                ret = !(score->ifa->flags & IFA_F_OPTIMISTIC);
                break;
#endif
        default:
                ret = 0;
        }

        if (ret)
                __set_bit(i, score->scorebits);
        score->rule = i;
out:
        return ret;
}

static int __ipv6_dev_get_saddr(struct net *net,
                                struct ipv6_saddr_dst *dst,
                                struct inet6_dev *idev,
                                struct ipv6_saddr_score *scores,
                                int hiscore_idx)
{
        struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];

        list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
                int i;

                /*
                 * - Tentative Address (RFC2462 section 5.4)
                 *  - A tentative address is not considered
                 *    "assigned to an interface" in the traditional
                 *    sense, unless it is also flagged as optimistic.
                 * - Candidate Source Address (section 4)
                 *  - In any case, anycast addresses, multicast
                 *    addresses, and the unspecified address MUST
                 *    NOT be included in a candidate set.
                 */
                if ((score->ifa->flags & IFA_F_TENTATIVE) &&
                    (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
                        continue;

                score->addr_type = __ipv6_addr_type(&score->ifa->addr);

                if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
                             score->addr_type & IPV6_ADDR_MULTICAST)) {
                        net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
                                            idev->dev->name);
                        continue;
                }

                score->rule = -1;
                bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);

                for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
                        int minihiscore, miniscore;

                        minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
                        miniscore = ipv6_get_saddr_eval(net, score, dst, i);

                        if (minihiscore > miniscore) {
                                if (i == IPV6_SADDR_RULE_SCOPE &&
                                    score->scopedist > 0) {
                                        /*
                                         * special case:
                                         * each remaining entry
                                         * has too small (not enough)
                                         * scope, because ifa entries
                                         * are sorted by their scope
                                         * values.
                                         */
                                        goto out;
                                }
                                break;
                        } else if (minihiscore < miniscore) {
                                swap(hiscore, score);
                                hiscore_idx = 1 - hiscore_idx;

                                /* restore our iterator */
                                score->ifa = hiscore->ifa;

                                break;
                        }
                }
        }
out:
        return hiscore_idx;
}

static int ipv6_get_saddr_master(struct net *net,
                                 const struct net_device *dst_dev,
                                 const struct net_device *master,
                                 struct ipv6_saddr_dst *dst,
                                 struct ipv6_saddr_score *scores,
                                 int hiscore_idx)
{
        struct inet6_dev *idev;

        idev = __in6_dev_get(dst_dev);
        if (idev)
                hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
                                                   scores, hiscore_idx);

        idev = __in6_dev_get(master);
        if (idev)
                hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
                                                   scores, hiscore_idx);

        return hiscore_idx;
}

int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
                       const struct in6_addr *daddr, unsigned int prefs,
                       struct in6_addr *saddr)
{
        struct ipv6_saddr_score scores[2], *hiscore;
        struct ipv6_saddr_dst dst;
        struct inet6_dev *idev;
        struct net_device *dev;
        int dst_type;
        bool use_oif_addr = false;
        int hiscore_idx = 0;
        int ret = 0;

        dst_type = __ipv6_addr_type(daddr);
        dst.addr = daddr;
        dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
        dst.scope = __ipv6_addr_src_scope(dst_type);
        dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
        dst.prefs = prefs;

        scores[hiscore_idx].rule = -1;
        scores[hiscore_idx].ifa = NULL;

        rcu_read_lock();

        /* Candidate Source Address (section 4)
         *  - multicast and link-local destination address,
         *    the set of candidate source address MUST only
         *    include addresses assigned to interfaces
         *    belonging to the same link as the outgoing
         *    interface.
         * (- For site-local destination addresses, the
         *    set of candidate source addresses MUST only
         *    include addresses assigned to interfaces
         *    belonging to the same site as the outgoing
         *    interface.)
         *  - "It is RECOMMENDED that the candidate source addresses
         *    be the set of unicast addresses assigned to the
         *    interface that will be used to send to the destination
         *    (the 'outgoing' interface)." (RFC 6724)
         */
        if (dst_dev) {
                idev = __in6_dev_get(dst_dev);
                if ((dst_type & IPV6_ADDR_MULTICAST) ||
                    dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
                    (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) {
                        use_oif_addr = true;
                }
        }

        if (use_oif_addr) {
                if (idev)
                        hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
        } else {
                const struct net_device *master;
                int master_idx = 0;

                /* if dst_dev exists and is enslaved to an L3 device, then
                 * prefer addresses from dst_dev and then the master over
                 * any other enslaved devices in the L3 domain.
                 */
                master = l3mdev_master_dev_rcu(dst_dev);
                if (master) {
                        master_idx = master->ifindex;

                        hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
                                                            master, &dst,
                                                            scores, hiscore_idx);

                        if (scores[hiscore_idx].ifa)
                                goto out;
                }

                for_each_netdev_rcu(net, dev) {
                        /* only consider addresses on devices in the
                         * same L3 domain
                         */
                        if (l3mdev_master_ifindex_rcu(dev) != master_idx)
                                continue;
                        idev = __in6_dev_get(dev);
                        if (!idev)
                                continue;
                        hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
                }
        }

out:
        hiscore = &scores[hiscore_idx];
        if (!hiscore->ifa)
                ret = -EADDRNOTAVAIL;
        else
                *saddr = hiscore->ifa->addr;

        rcu_read_unlock();
        return ret;
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);

static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
                              u32 banned_flags)
{
        struct inet6_ifaddr *ifp;
        int err = -EADDRNOTAVAIL;

        list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
                if (ifp->scope > IFA_LINK)
                        break;
                if (ifp->scope == IFA_LINK &&
                    !(ifp->flags & banned_flags)) {
                        *addr = ifp->addr;
                        err = 0;
                        break;
                }
        }
        return err;
}

int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
                    u32 banned_flags)
{
        struct inet6_dev *idev;
        int err = -EADDRNOTAVAIL;

        rcu_read_lock();
        idev = __in6_dev_get(dev);
        if (idev) {
                read_lock_bh(&idev->lock);
                err = __ipv6_get_lladdr(idev, addr, banned_flags);
                read_unlock_bh(&idev->lock);
        }
        rcu_read_unlock();
        return err;
}

static int ipv6_count_addresses(const struct inet6_dev *idev)
{
        const struct inet6_ifaddr *ifp;
        int cnt = 0;

        rcu_read_lock();
        list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
                cnt++;
        rcu_read_unlock();
        return cnt;
}

int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict)
{
        return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
                                       strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);

/* device argument is used to find the L3 domain of interest. If
 * skip_dev_check is set, then the ifp device is not checked against
 * the passed in dev argument. So the 2 cases for addresses checks are:
 *   1. does the address exist in the L3 domain that dev is part of
 *      (skip_dev_check = true), or
 *
 *   2. does the address exist on the specific device
 *      (skip_dev_check = false)
 */
static struct net_device *
__ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
                          const struct net_device *dev, bool skip_dev_check,
                          int strict, u32 banned_flags)
{
        unsigned int hash = inet6_addr_hash(net, addr);
        struct net_device *l3mdev, *ndev;
        struct inet6_ifaddr *ifp;
        u32 ifp_flags;

        rcu_read_lock();

        l3mdev = l3mdev_master_dev_rcu(dev);
        if (skip_dev_check)
                dev = NULL;

        hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
                ndev = ifp->idev->dev;

                if (l3mdev_master_dev_rcu(ndev) != l3mdev)
                        continue;

                /* Decouple optimistic from tentative for evaluation here.
                 * Ban optimistic addresses explicitly, when required.
                 */
                ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC)
                            ? (ifp->flags&~IFA_F_TENTATIVE)
                            : ifp->flags;
                if (ipv6_addr_equal(&ifp->addr, addr) &&
                    !(ifp_flags&banned_flags) &&
                    (!dev || ndev == dev ||
                     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
                        rcu_read_unlock();
                        return ndev;
                }
        }

        rcu_read_unlock();
        return NULL;
}

int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
                            const struct net_device *dev, bool skip_dev_check,
                            int strict, u32 banned_flags)
{
        return __ipv6_chk_addr_and_flags(net, addr, dev, skip_dev_check,
                                         strict, banned_flags) ? 1 : 0;
}
EXPORT_SYMBOL(ipv6_chk_addr_and_flags);


/* Compares an address/prefix_len with addresses on device @dev.
 * If one is found it returns true.
 */
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
        const unsigned int prefix_len, struct net_device *dev)
{
        const struct inet6_ifaddr *ifa;
        const struct inet6_dev *idev;
        bool ret = false;

        rcu_read_lock();
        idev = __in6_dev_get(dev);
        if (idev) {
                list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
                        ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
                        if (ret)
                                break;
                }
        }
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL(ipv6_chk_custom_prefix);

int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
        const struct inet6_ifaddr *ifa;
        const struct inet6_dev *idev;
        int        onlink;

        onlink = 0;
        rcu_read_lock();
        idev = __in6_dev_get(dev);
        if (idev) {
                list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
                        onlink = ipv6_prefix_equal(addr, &ifa->addr,
                                                   ifa->prefix_len);
                        if (onlink)
                                break;
                }
        }
        rcu_read_unlock();
        return onlink;
}
EXPORT_SYMBOL(ipv6_chk_prefix);

/**
 * ipv6_dev_find - find the first device with a given source address.
 * @net: the net namespace
 * @addr: the source address
 * @dev: used to find the L3 domain of interest
 *
 * The caller should be protected by RCU, or RTNL.
 */
struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr,
                                 struct net_device *dev)
{
        return __ipv6_chk_addr_and_flags(net, addr, dev, !dev, 1,
                                         IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_dev_find);

struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
                                     struct net_device *dev, int strict)
{
        unsigned int hash = inet6_addr_hash(net, addr);
        struct inet6_ifaddr *ifp, *result = NULL;

        rcu_read_lock();
        hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (!dev || ifp->idev->dev == dev ||
                            !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
                                if (in6_ifa_hold_safe(ifp)) {
                                        result = ifp;
                                        break;
                                }
                        }
                }
        }
        rcu_read_unlock();

        return result;
}

/* Gets referenced address, destroys ifaddr */

static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
        if (dad_failed)
                ifp->flags |= IFA_F_DADFAILED;

        if (ifp->flags&IFA_F_TEMPORARY) {
                struct inet6_ifaddr *ifpub;
                spin_lock_bh(&ifp->lock);
                ifpub = ifp->ifpub;
                if (ifpub) {
                        in6_ifa_hold(ifpub);
                        spin_unlock_bh(&ifp->lock);
                        ipv6_create_tempaddr(ifpub, true);
                        in6_ifa_put(ifpub);
                } else {
                        spin_unlock_bh(&ifp->lock);
                }
                ipv6_del_addr(ifp);
        } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) {
                spin_lock_bh(&ifp->lock);
                addrconf_del_dad_work(ifp);
                ifp->flags |= IFA_F_TENTATIVE;
                if (dad_failed)
                        ifp->flags &= ~IFA_F_OPTIMISTIC;
                spin_unlock_bh(&ifp->lock);
                if (dad_failed)
                        ipv6_ifa_notify(0, ifp);
                in6_ifa_put(ifp);
        } else {
                ipv6_del_addr(ifp);
        }
}

static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
        int err = -ENOENT;

        spin_lock_bh(&ifp->lock);
        if (ifp->state == INET6_IFADDR_STATE_DAD) {
                ifp->state = INET6_IFADDR_STATE_POSTDAD;
                err = 0;
        }
        spin_unlock_bh(&ifp->lock);

        return err;
}

void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
        struct inet6_dev *idev = ifp->idev;
        struct net *net = dev_net(idev->dev);
        int max_addresses;

        if (addrconf_dad_end(ifp)) {
                in6_ifa_put(ifp);
                return;
        }

        net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
                             ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);

        spin_lock_bh(&ifp->lock);

        if (ifp->flags & IFA_F_STABLE_PRIVACY) {
                struct in6_addr new_addr;
                struct inet6_ifaddr *ifp2;
                int retries = ifp->stable_privacy_retry + 1;
                struct ifa6_config cfg = {
                        .pfx = &new_addr,
                        .plen = ifp->prefix_len,
                        .ifa_flags = ifp->flags,
                        .valid_lft = ifp->valid_lft,
                        .preferred_lft = ifp->prefered_lft,
                        .scope = ifp->scope,
                };

                if (retries > net->ipv6.sysctl.idgen_retries) {
                        net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n",
                                             ifp->idev->dev->name);
                        goto errdad;
                }

                new_addr = ifp->addr;
                if (ipv6_generate_stable_address(&new_addr, retries,
                                                 idev))
                        goto errdad;

                spin_unlock_bh(&ifp->lock);

                max_addresses = READ_ONCE(idev->cnf.max_addresses);
                if (max_addresses &&
                    ipv6_count_addresses(idev) >= max_addresses)
                        goto lock_errdad;

                net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
                                     ifp->idev->dev->name);

                ifp2 = ipv6_add_addr(idev, &cfg, false, NULL);
                if (IS_ERR(ifp2))
                        goto lock_errdad;

                spin_lock_bh(&ifp2->lock);
                ifp2->stable_privacy_retry = retries;
                ifp2->state = INET6_IFADDR_STATE_PREDAD;
                spin_unlock_bh(&ifp2->lock);

                addrconf_mod_dad_work(ifp2, net->ipv6.sysctl.idgen_delay);
                in6_ifa_put(ifp2);
lock_errdad:
                spin_lock_bh(&ifp->lock);
        }

errdad:
        /* transition from _POSTDAD to _ERRDAD */
        ifp->state = INET6_IFADDR_STATE_ERRDAD;
        spin_unlock_bh(&ifp->lock);

        addrconf_mod_dad_work(ifp, 0);
        in6_ifa_put(ifp);
}

/* Join to solicited addr multicast group.
 * caller must hold RTNL */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
        struct in6_addr maddr;

        if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
                return;

        addrconf_addr_solict_mult(addr, &maddr);
        ipv6_dev_mc_inc(dev, &maddr);
}

/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
        struct in6_addr maddr;

        if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
                return;

        addrconf_addr_solict_mult(addr, &maddr);
        __ipv6_dev_mc_dec(idev, &maddr);
}

/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
        struct in6_addr addr;

        if (ifp->prefix_len >= 127) /* RFC 6164 */
                return;
        ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
        if (ipv6_addr_any(&addr))
                return;
        __ipv6_dev_ac_inc(ifp->idev, &addr);
}

/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
        struct in6_addr addr;

        if (ifp->prefix_len >= 127) /* RFC 6164 */
                return;
        ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
        if (ipv6_addr_any(&addr))
                return;
        __ipv6_dev_ac_dec(ifp->idev, &addr);
}

static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev)
{
        switch (dev->addr_len) {
        case ETH_ALEN:
                memcpy(eui, dev->dev_addr, 3);
                eui[3] = 0xFF;
                eui[4] = 0xFE;
                memcpy(eui + 5, dev->dev_addr + 3, 3);
                break;
        case EUI64_ADDR_LEN:
                memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
                eui[0] ^= 2;
                break;
        default:
                return -1;
        }

        return 0;
}

static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
{
        const union fwnet_hwaddr *ha;

        if (dev->addr_len != FWNET_ALEN)
                return -1;

        ha = (const union fwnet_hwaddr *)dev->dev_addr;

        memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
        eui[0] ^= 2;
        return 0;
}

static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
{
        /* XXX: inherit EUI-64 from other interface -- yoshfuji */
        if (dev->addr_len != ARCNET_ALEN)
                return -1;
        memset(eui, 0, 7);
        eui[7] = *(u8 *)dev->dev_addr;
        return 0;
}

static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
{
        if (dev->addr_len != INFINIBAND_ALEN)
                return -1;
        memcpy(eui, dev->dev_addr + 12, 8);
        eui[0] |= 2;
        return 0;
}

static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
{
        if (addr == 0)
                return -1;
        eui[0] = (ipv4_is_zeronet(addr) || ipv4_is_private_10(addr) ||
                  ipv4_is_loopback(addr) || ipv4_is_linklocal_169(addr) ||
                  ipv4_is_private_172(addr) || ipv4_is_test_192(addr) ||
                  ipv4_is_anycast_6to4(addr) || ipv4_is_private_192(addr) ||
                  ipv4_is_test_198(addr) || ipv4_is_multicast(addr) ||
                  ipv4_is_lbcast(addr)) ? 0x00 : 0x02;
        eui[1] = 0;
        eui[2] = 0x5E;
        eui[3] = 0xFE;
        memcpy(eui + 4, &addr, 4);
        return 0;
}

static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
{
        if (dev->priv_flags & IFF_ISATAP)
                return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
        return -1;
}

static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
{
        return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
}

static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
{
        memcpy(eui, dev->perm_addr, 3);
        memcpy(eui + 5, dev->perm_addr + 3, 3);
        eui[3] = 0xFF;
        eui[4] = 0xFE;
        eui[0] ^= 2;
        return 0;
}

static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
        switch (dev->type) {
        case ARPHRD_ETHER:
        case ARPHRD_FDDI:
                return addrconf_ifid_eui48(eui, dev);
        case ARPHRD_ARCNET:
                return addrconf_ifid_arcnet(eui, dev);
        case ARPHRD_INFINIBAND:
                return addrconf_ifid_infiniband(eui, dev);
        case ARPHRD_SIT:
                return addrconf_ifid_sit(eui, dev);
        case ARPHRD_IPGRE:
        case ARPHRD_TUNNEL:
                return addrconf_ifid_gre(eui, dev);
        case ARPHRD_6LOWPAN:
                return addrconf_ifid_6lowpan(eui, dev);
        case ARPHRD_IEEE1394:
                return addrconf_ifid_ieee1394(eui, dev);
        case ARPHRD_TUNNEL6:
        case ARPHRD_IP6GRE:
        case ARPHRD_RAWIP:
                return addrconf_ifid_ip6tnl(eui, dev);
        }
        return -1;
}

static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
{
        int err = -1;
        struct inet6_ifaddr *ifp;

        read_lock_bh(&idev->lock);
        list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) {
                if (ifp->scope > IFA_LINK)
                        break;
                if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
                        memcpy(eui, ifp->addr.s6_addr+8, 8);
                        err = 0;
                        break;
                }
        }
        read_unlock_bh(&idev->lock);
        return err;
}

/* Generation of a randomized Interface Identifier
 * draft-ietf-6man-rfc4941bis, Section 3.3.1
 */

static void ipv6_gen_rnd_iid(struct in6_addr *addr)
{
regen:
        get_random_bytes(&addr->s6_addr[8], 8);

        /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1:
         * check if generated address is not inappropriate:
         *
         * - Reserved IPv6 Interface Identifiers
         * - XXX: already assigned to an address on the device
         */

        /* Subnet-router anycast: 0000:0000:0000:0000 */
        if (!(addr->s6_addr32[2] | addr->s6_addr32[3]))
                goto regen;

        /* IANA Ethernet block: 0200:5EFF:FE00:0000-0200:5EFF:FE00:5212
         * Proxy Mobile IPv6:   0200:5EFF:FE00:5213
         * IANA Ethernet block: 0200:5EFF:FE00:5214-0200:5EFF:FEFF:FFFF
         */
        if (ntohl(addr->s6_addr32[2]) == 0x02005eff &&
            (ntohl(addr->s6_addr32[3]) & 0Xff000000) == 0xfe000000)
                goto regen;

        /* Reserved subnet anycast addresses */
        if (ntohl(addr->s6_addr32[2]) == 0xfdffffff &&
            ntohl(addr->s6_addr32[3]) >= 0Xffffff80)
                goto regen;
}

/*
 *        Add prefix route.
 */

static void
addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
                      struct net_device *dev, unsigned long expires,
                      u32 flags, gfp_t gfp_flags)
{
        struct fib6_config cfg = {
                .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
                .fc_metric = metric ? : IP6_RT_PRIO_ADDRCONF,
                .fc_ifindex = dev->ifindex,
                .fc_expires = expires,
                .fc_dst_len = plen,
                .fc_flags = RTF_UP | flags,
                .fc_nlinfo.nl_net = dev_net(dev),
                .fc_protocol = RTPROT_KERNEL,
                .fc_type = RTN_UNICAST,
        };

        cfg.fc_dst = *pfx;

        /* Prevent useless cloning on PtP SIT.
           This thing is done here expecting that the whole
           class of non-broadcast devices need not cloning.
         */
#if IS_ENABLED(CONFIG_IPV6_SIT)
        if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
                cfg.fc_flags |= RTF_NONEXTHOP;
#endif

        ip6_route_add(&cfg, gfp_flags, NULL);
}


static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
                                                  int plen,
                                                  const struct net_device *dev,
                                                  u32 flags, u32 noflags,
                                                  bool no_gw)
{
        struct fib6_node *fn;
        struct fib6_info *rt = NULL;
        struct fib6_table *table;
        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;

        table = fib6_get_table(dev_net(dev), tb_id);
        if (!table)
                return NULL;

        rcu_read_lock();
        fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
        if (!fn)
                goto out;

        for_each_fib6_node_rt_rcu(fn) {
                /* prefix routes only use builtin fib6_nh */
                if (rt->nh)
                        continue;

                if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
                        continue;
                if (no_gw && rt->fib6_nh->fib_nh_gw_family)
                        continue;
                if ((rt->fib6_flags & flags) != flags)
                        continue;
                if ((rt->fib6_flags & noflags) != 0)
                        continue;
                if (!fib6_info_hold_safe(rt))
                        continue;
                break;
        }
out:
        rcu_read_unlock();
        return rt;
}


/* Create "default" multicast route to the interface */

static void addrconf_add_mroute(struct net_device *dev)
{
        struct fib6_config cfg = {
                .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
                .fc_metric = IP6_RT_PRIO_ADDRCONF,
                .fc_ifindex = dev->ifindex,
                .fc_dst_len = 8,
                .fc_flags = RTF_UP,
                .fc_type = RTN_MULTICAST,
                .fc_nlinfo.nl_net = dev_net(dev),
                .fc_protocol = RTPROT_KERNEL,
        };

        ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);

        ip6_route_add(&cfg, GFP_KERNEL, NULL);
}

static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
{
        struct inet6_dev *idev;

        ASSERT_RTNL();

        idev = ipv6_find_idev(dev);
        if (IS_ERR(idev))
                return idev;

        if (idev->cnf.disable_ipv6)
                return ERR_PTR(-EACCES);

        /* Add default multicast route */
        if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
                addrconf_add_mroute(dev);

        return idev;
}

static void manage_tempaddrs(struct inet6_dev *idev,
                             struct inet6_ifaddr *ifp,
                             __u32 valid_lft, __u32 prefered_lft,
                             bool create, unsigned long now)
{
        u32 flags;
        struct inet6_ifaddr *ift;

        read_lock_bh(&idev->lock);
        /* update all temporary addresses in the list */
        list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) {
                int age, max_valid, max_prefered;

                if (ifp != ift->ifpub)
                        continue;

                /* RFC 4941 section 3.3:
                 * If a received option will extend the lifetime of a public
                 * address, the lifetimes of temporary addresses should
                 * be extended, subject to the overall constraint that no
                 * temporary addresses should ever remain "valid" or "preferred"
                 * for a time longer than (TEMP_VALID_LIFETIME) or
                 * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
                 */
                age = (now - ift->cstamp) / HZ;
                max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age;
                if (max_valid < 0)
                        max_valid = 0;

                max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) -
                               idev->desync_factor - age;
                if (max_prefered < 0)
                        max_prefered = 0;

                if (valid_lft > max_valid)
                        valid_lft = max_valid;

                if (prefered_lft > max_prefered)
                        prefered_lft = max_prefered;

                spin_lock(&ift->lock);
                flags = ift->flags;
                ift->valid_lft = valid_lft;
                ift->prefered_lft = prefered_lft;
                ift->tstamp = now;
                if (prefered_lft > 0)
                        ift->flags &= ~IFA_F_DEPRECATED;

                spin_unlock(&ift->lock);
                if (!(flags&IFA_F_TENTATIVE))
                        ipv6_ifa_notify(0, ift);
        }

        /* Also create a temporary address if it's enabled but no temporary
         * address currently exists.
         * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
         * as part of cleanup (ie. deleting the mngtmpaddr).
         * We don't want that to result in creating a new temporary ip address.
         */
        if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
                create = true;

        if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) {
                /* When a new public address is created as described
                 * in [ADDRCONF], also create a new temporary address.
                 */
                read_unlock_bh(&idev->lock);
                ipv6_create_tempaddr(ifp, false);
        } else {
                read_unlock_bh(&idev->lock);
        }
}

static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
{
        return idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY ||
               idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
}

int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
                                 const struct prefix_info *pinfo,
                                 struct inet6_dev *in6_dev,
                                 const struct in6_addr *addr, int addr_type,
                                 u32 addr_flags, bool sllao, bool tokenized,
                                 __u32 valid_lft, u32 prefered_lft)
{
        struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
        int create = 0, update_lft = 0;

        if (!ifp && valid_lft) {
                int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses);
                struct ifa6_config cfg = {
                        .pfx = addr,
                        .plen = pinfo->prefix_len,
                        .ifa_flags = addr_flags,
                        .valid_lft = valid_lft,
                        .preferred_lft = prefered_lft,
                        .scope = addr_type & IPV6_ADDR_SCOPE_MASK,
                        .ifa_proto = IFAPROT_KERNEL_RA
                };

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
                if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) ||
                     READ_ONCE(in6_dev->cnf.optimistic_dad)) &&
                    !net->ipv6.devconf_all->forwarding && sllao)
                        cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif

                /* Do not allow to create too much of autoconfigured
                 * addresses; this would be too easy way to crash kernel.
                 */
                if (!max_addresses ||
                    ipv6_count_addresses(in6_dev) < max_addresses)
                        ifp = ipv6_add_addr(in6_dev, &cfg, false, NULL);

                if (IS_ERR_OR_NULL(ifp))
                        return -1;

                create = 1;
                spin_lock_bh(&ifp->lock);
                ifp->flags |= IFA_F_MANAGETEMPADDR;
                ifp->cstamp = jiffies;
                ifp->tokenized = tokenized;
                spin_unlock_bh(&ifp->lock);
                addrconf_dad_start(ifp);
        }

        if (ifp) {
                u32 flags;
                unsigned long now;
                u32 stored_lft;

                /* update lifetime (RFC2462 5.5.3 e) */
                spin_lock_bh(&ifp->lock);
                now = jiffies;
                if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
                        stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
                else
                        stored_lft = 0;

                /* RFC4862 Section 5.5.3e:
                 * "Note that the preferred lifetime of the
                 *  corresponding address is always reset to
                 *  the Preferred Lifetime in the received
                 *  Prefix Information option, regardless of
                 *  whether the valid lifetime is also reset or
                 *  ignored."
                 *
                 * So we should always update prefered_lft here.
                 */
                update_lft = !create && stored_lft;

                if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) {
                        const u32 minimum_lft = min_t(u32,
                                stored_lft, MIN_VALID_LIFETIME);
                        valid_lft = max(valid_lft, minimum_lft);
                }

                if (update_lft) {
                        ifp->valid_lft = valid_lft;
                        ifp->prefered_lft = prefered_lft;
                        WRITE_ONCE(ifp->tstamp, now);
                        flags = ifp->flags;
                        ifp->flags &= ~IFA_F_DEPRECATED;
                        spin_unlock_bh(&ifp->lock);

                        if (!(flags&IFA_F_TENTATIVE))
                                ipv6_ifa_notify(0, ifp);
                } else
                        spin_unlock_bh(&ifp->lock);

                manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
                                 create, now);

                in6_ifa_put(ifp);
                addrconf_verify(net);
        }

        return 0;
}
EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);

void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
        struct prefix_info *pinfo;
        struct fib6_table *table;
        __u32 valid_lft;
        __u32 prefered_lft;
        int addr_type, err;
        u32 addr_flags = 0;
        struct inet6_dev *in6_dev;
        struct net *net = dev_net(dev);

        pinfo = (struct prefix_info *) opt;

        if (len < sizeof(struct prefix_info)) {
                netdev_dbg(dev, "addrconf: prefix option too short\n");
                return;
        }

        /*
         *        Validation checks ([ADDRCONF], page 19)
         */

        addr_type = ipv6_addr_type(&pinfo->prefix);

        if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
                return;

        valid_lft = ntohl(pinfo->valid);
        prefered_lft = ntohl(pinfo->prefered);

        if (prefered_lft > valid_lft) {
                net_warn_ratelimited("addrconf: prefix option has invalid lifetime\n");
                return;
        }

        in6_dev = in6_dev_get(dev);

        if (!in6_dev) {
                net_dbg_ratelimited("addrconf: device %s not configured\n",
                                    dev->name);
                return;
        }

        if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
                goto put;

        /*
         *        Two things going on here:
         *        1) Add routes for on-link prefixes
         *        2) Configure prefixes with the auto flag set
         */

        if (pinfo->onlink) {
                struct fib6_info *rt;
                unsigned long rt_expires;

                /* Avoid arithmetic overflow. Really, we could
                 * save rt_expires in seconds, likely valid_lft,
                 * but it would require division in fib gc, that it
                 * not good.
                 */
                if (HZ > USER_HZ)
                        rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
                else
                        rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);

                if (addrconf_finite_timeout(rt_expires))
                        rt_expires *= HZ;

                rt = addrconf_get_prefix_route(&pinfo->prefix,
                                               pinfo->prefix_len,
                                               dev,
                                               RTF_ADDRCONF | RTF_PREFIX_RT,
                                               RTF_DEFAULT, true);

                if (rt) {
                        /* Autoconf prefix route */
                        if (valid_lft == 0) {
                                ip6_del_rt(net, rt, false);
                                rt = NULL;
                        } else {
                                table = rt->fib6_table;
                                spin_lock_bh(&table->tb6_lock);

                                if (addrconf_finite_timeout(rt_expires)) {
                                        /* not infinity */
                                        fib6_set_expires(rt, jiffies + rt_expires);
                                        fib6_add_gc_list(rt);
                                } else {
                                        fib6_clean_expires(rt);
                                        fib6_remove_gc_list(rt);
                                }

                                spin_unlock_bh(&table->tb6_lock);
                        }
                } else if (valid_lft) {
                        clock_t expires = 0;
                        int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
                        if (addrconf_finite_timeout(rt_expires)) {
                                /* not infinity */
                                flags |= RTF_EXPIRES;
                                expires = jiffies_to_clock_t(rt_expires);
                        }
                        addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
                                              0, dev, expires, flags,
                                              GFP_ATOMIC);
                }
                fib6_info_release(rt);
        }

        /* Try to figure out our local address for this prefix */

        if (pinfo->autoconf && in6_dev->cnf.autoconf) {
                struct in6_addr addr;
                bool tokenized = false, dev_addr_generated = false;

                if (pinfo->prefix_len == 64) {
                        memcpy(&addr, &pinfo->prefix, 8);

                        if (!ipv6_addr_any(&in6_dev->token)) {
                                read_lock_bh(&in6_dev->lock);
                                memcpy(addr.s6_addr + 8,
                                       in6_dev->token.s6_addr + 8, 8);
                                read_unlock_bh(&in6_dev->lock);
                                tokenized = true;
                        } else if (is_addr_mode_generate_stable(in6_dev) &&
                                   !ipv6_generate_stable_address(&addr, 0,
                                                                 in6_dev)) {
                                addr_flags |= IFA_F_STABLE_PRIVACY;
                                goto ok;
                        } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
                                   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
                                goto put;
                        } else {
                                dev_addr_generated = true;
                        }
                        goto ok;
                }
                net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
                                    pinfo->prefix_len);
                goto put;

ok:
                err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
                                                   &addr, addr_type,
                                                   addr_flags, sllao,
                                                   tokenized, valid_lft,
                                                   prefered_lft);
                if (err)
                        goto put;

                /* Ignore error case here because previous prefix add addr was
                 * successful which will be notified.
                 */
                ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
                                              addr_type, addr_flags, sllao,
                                              tokenized, valid_lft,
                                              prefered_lft,
                                              dev_addr_generated);
        }
        inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
put:
        in6_dev_put(in6_dev);
}

static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
                struct in6_ifreq *ireq)
{
        struct ip_tunnel_parm p = { };
        int err;

        if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
                return -EADDRNOTAVAIL;

        p.iph.daddr = ireq->ifr6_addr.s6_addr32[3];
        p.iph.version = 4;
        p.iph.ihl = 5;
        p.iph.protocol = IPPROTO_IPV6;
        p.iph.ttl = 64;

        if (!dev->netdev_ops->ndo_tunnel_ctl)
                return -EOPNOTSUPP;
        err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL);
        if (err)
                return err;

        dev = __dev_get_by_name(net, p.name);
        if (!dev)
                return -ENOBUFS;
        return dev_open(dev, NULL);
}

/*
 *        Set destination address.
 *        Special case for SIT interfaces where we create a new "virtual"
 *        device.
 */
int addrconf_set_dstaddr(struct net *net, void __user *arg)
{
        struct net_device *dev;
        struct in6_ifreq ireq;
        int err = -ENODEV;

        if (!IS_ENABLED(CONFIG_IPV6_SIT))
                return -ENODEV;
        if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
                return -EFAULT;

        rtnl_lock();
        dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
        if (dev && dev->type == ARPHRD_SIT)
                err = addrconf_set_sit_dstaddr(net, dev, &ireq);
        rtnl_unlock();
        return err;
}

static int ipv6_mc_config(struct sock *sk, bool join,
                          const struct in6_addr *addr, int ifindex)
{
        int ret;

        ASSERT_RTNL();

        lock_sock(sk);
        if (join)
                ret = ipv6_sock_mc_join(sk, ifindex, addr);
        else
                ret = ipv6_sock_mc_drop(sk, ifindex, addr);
        release_sock(sk);

        return ret;
}

/*
 *        Manual configuration of address on an interface
 */
static int inet6_addr_add(struct net *net, int ifindex,
                          struct ifa6_config *cfg,
                          struct netlink_ext_ack *extack)
{
        struct inet6_ifaddr *ifp;
        struct inet6_dev *idev;
        struct net_device *dev;
        unsigned long timeout;
        clock_t expires;
        u32 flags;

        ASSERT_RTNL();

        if (cfg->plen > 128) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
                return -EINVAL;
        }

        /* check the lifetime */
        if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
                NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
                return -EINVAL;
        }

        if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
                NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, ifindex);
        if (!dev)
                return -ENODEV;

        idev = addrconf_add_dev(dev);
        if (IS_ERR(idev)) {
                NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
                return PTR_ERR(idev);
        }

        if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
                int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
                                         true, cfg->pfx, ifindex);

                if (ret < 0) {
                        NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
                        return ret;
                }
        }

        cfg->scope = ipv6_addr_scope(cfg->pfx);

        timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                expires = jiffies_to_clock_t(timeout * HZ);
                cfg->valid_lft = timeout;
                flags = RTF_EXPIRES;
        } else {
                expires = 0;
                flags = 0;
                cfg->ifa_flags |= IFA_F_PERMANENT;
        }

        timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                if (timeout == 0)
                        cfg->ifa_flags |= IFA_F_DEPRECATED;
                cfg->preferred_lft = timeout;
        }

        ifp = ipv6_add_addr(idev, cfg, true, extack);
        if (!IS_ERR(ifp)) {
                if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
                        addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
                                              ifp->rt_priority, dev, expires,
                                              flags, GFP_KERNEL);
                }

                /* Send a netlink notification if DAD is enabled and
                 * optimistic flag is not set
                 */
                if (!(ifp->flags & (IFA_F_OPTIMISTIC | IFA_F_NODAD)))
                        ipv6_ifa_notify(0, ifp);
                /*
                 * Note that section 3.1 of RFC 4429 indicates
                 * that the Optimistic flag should not be set for
                 * manually configured addresses
                 */
                addrconf_dad_start(ifp);
                if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR)
                        manage_tempaddrs(idev, ifp, cfg->valid_lft,
                                         cfg->preferred_lft, true, jiffies);
                in6_ifa_put(ifp);
                addrconf_verify_rtnl(net);
                return 0;
        } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
                ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
                               cfg->pfx, ifindex);
        }

        return PTR_ERR(ifp);
}

static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
                          const struct in6_addr *pfx, unsigned int plen,
                          struct netlink_ext_ack *extack)
{
        struct inet6_ifaddr *ifp;
        struct inet6_dev *idev;
        struct net_device *dev;

        if (plen > 128) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
                return -EINVAL;
        }

        dev = __dev_get_by_index(net, ifindex);
        if (!dev) {
                NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
                return -ENODEV;
        }

        idev = __in6_dev_get(dev);
        if (!idev) {
                NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
                return -ENXIO;
        }

        read_lock_bh(&idev->lock);
        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                if (ifp->prefix_len == plen &&
                    ipv6_addr_equal(pfx, &ifp->addr)) {
                        in6_ifa_hold(ifp);
                        read_unlock_bh(&idev->lock);

                        if (!(ifp->flags & IFA_F_TEMPORARY) &&
                            (ifa_flags & IFA_F_MANAGETEMPADDR))
                                manage_tempaddrs(idev, ifp, 0, 0, false,
                                                 jiffies);
                        ipv6_del_addr(ifp);
                        addrconf_verify_rtnl(net);
                        if (ipv6_addr_is_multicast(pfx)) {
                                ipv6_mc_config(net->ipv6.mc_autojoin_sk,
                                               false, pfx, dev->ifindex);
                        }
                        return 0;
                }
        }
        read_unlock_bh(&idev->lock);

        NL_SET_ERR_MSG_MOD(extack, "address not found");
        return -EADDRNOTAVAIL;
}


int addrconf_add_ifaddr(struct net *net, void __user *arg)
{
        struct ifa6_config cfg = {
                .ifa_flags = IFA_F_PERMANENT,
                .preferred_lft = INFINITY_LIFE_TIME,
                .valid_lft = INFINITY_LIFE_TIME,
        };
        struct in6_ifreq ireq;
        int err;

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
                return -EFAULT;

        cfg.pfx = &ireq.ifr6_addr;
        cfg.plen = ireq.ifr6_prefixlen;

        rtnl_lock();
        err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
        rtnl_unlock();
        return err;
}

int addrconf_del_ifaddr(struct net *net, void __user *arg)
{
        struct in6_ifreq ireq;
        int err;

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
                return -EFAULT;

        rtnl_lock();
        err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
                             ireq.ifr6_prefixlen, NULL);
        rtnl_unlock();
        return err;
}

static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
                     int plen, int scope, u8 proto)
{
        struct inet6_ifaddr *ifp;
        struct ifa6_config cfg = {
                .pfx = addr,
                .plen = plen,
                .ifa_flags = IFA_F_PERMANENT,
                .valid_lft = INFINITY_LIFE_TIME,
                .preferred_lft = INFINITY_LIFE_TIME,
                .scope = scope,
                .ifa_proto = proto
        };

        ifp = ipv6_add_addr(idev, &cfg, true, NULL);
        if (!IS_ERR(ifp)) {
                spin_lock_bh(&ifp->lock);
                ifp->flags &= ~IFA_F_TENTATIVE;
                spin_unlock_bh(&ifp->lock);
                rt_genid_bump_ipv6(dev_net(idev->dev));
                ipv6_ifa_notify(RTM_NEWADDR, ifp);
                in6_ifa_put(ifp);
        }
}

#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
static void add_v4_addrs(struct inet6_dev *idev)
{
        struct in6_addr addr;
        struct net_device *dev;
        struct net *net = dev_net(idev->dev);
        int scope, plen, offset = 0;
        u32 pflags = 0;

        ASSERT_RTNL();

        memset(&addr, 0, sizeof(struct in6_addr));
        /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
        if (idev->dev->addr_len == sizeof(struct in6_addr))
                offset = sizeof(struct in6_addr) - 4;
        memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);

        if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
                scope = IPV6_ADDR_COMPATv4;
                plen = 96;
                pflags |= RTF_NONEXTHOP;
        } else {
                if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
                        return;

                addr.s6_addr32[0] = htonl(0xfe800000);
                scope = IFA_LINK;
                plen = 64;
        }

        if (addr.s6_addr32[3]) {
                add_addr(idev, &addr, plen, scope, IFAPROT_UNSPEC);
                addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
                                      GFP_KERNEL);
                return;
        }

        for_each_netdev(net, dev) {
                struct in_device *in_dev = __in_dev_get_rtnl(dev);
                if (in_dev && (dev->flags & IFF_UP)) {
                        struct in_ifaddr *ifa;
                        int flag = scope;

                        in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                                addr.s6_addr32[3] = ifa->ifa_local;

                                if (ifa->ifa_scope == RT_SCOPE_LINK)
                                        continue;
                                if (ifa->ifa_scope >= RT_SCOPE_HOST) {
                                        if (idev->dev->flags&IFF_POINTOPOINT)
                                                continue;
                                        flag |= IFA_HOST;
                                }

                                add_addr(idev, &addr, plen, flag,
                                         IFAPROT_UNSPEC);
                                addrconf_prefix_route(&addr, plen, 0, idev->dev,
                                                      0, pflags, GFP_KERNEL);
                        }
                }
        }
}
#endif

static void init_loopback(struct net_device *dev)
{
        struct inet6_dev  *idev;

        /* ::1 */

        ASSERT_RTNL();

        idev = ipv6_find_idev(dev);
        if (IS_ERR(idev)) {
                pr_debug("%s: add_dev failed\n", __func__);
                return;
        }

        add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFAPROT_KERNEL_LO);
}

void addrconf_add_linklocal(struct inet6_dev *idev,
                            const struct in6_addr *addr, u32 flags)
{
        struct ifa6_config cfg = {
                .pfx = addr,
                .plen = 64,
                .ifa_flags = flags | IFA_F_PERMANENT,
                .valid_lft = INFINITY_LIFE_TIME,
                .preferred_lft = INFINITY_LIFE_TIME,
                .scope = IFA_LINK,
                .ifa_proto = IFAPROT_KERNEL_LL
        };
        struct inet6_ifaddr *ifp;

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) ||
             READ_ONCE(idev->cnf.optimistic_dad)) &&
            !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
                cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif

        ifp = ipv6_add_addr(idev, &cfg, true, NULL);
        if (!IS_ERR(ifp)) {
                addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 0, idev->dev,
                                      0, 0, GFP_ATOMIC);
                addrconf_dad_start(ifp);
                in6_ifa_put(ifp);
        }
}
EXPORT_SYMBOL_GPL(addrconf_add_linklocal);

static bool ipv6_reserved_interfaceid(struct in6_addr address)
{
        if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0)
                return true;

        if (address.s6_addr32[2] == htonl(0x02005eff) &&
            ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000)))
                return true;

        if (address.s6_addr32[2] == htonl(0xfdffffff) &&
            ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80)))
                return true;

        return false;
}

static int ipv6_generate_stable_address(struct in6_addr *address,
                                        u8 dad_count,
                                        const struct inet6_dev *idev)
{
        static DEFINE_SPINLOCK(lock);
        static __u32 digest[SHA1_DIGEST_WORDS];
        static __u32 workspace[SHA1_WORKSPACE_WORDS];

        static union {
                char __data[SHA1_BLOCK_SIZE];
                struct {
                        struct in6_addr secret;
                        __be32 prefix[2];
                        unsigned char hwaddr[MAX_ADDR_LEN];
                        u8 dad_count;
                } __packed;
        } data;

        struct in6_addr secret;
        struct in6_addr temp;
        struct net *net = dev_net(idev->dev);

        BUILD_BUG_ON(sizeof(data.__data) != sizeof(data));

        if (idev->cnf.stable_secret.initialized)
                secret = idev->cnf.stable_secret.secret;
        else if (net->ipv6.devconf_dflt->stable_secret.initialized)
                secret = net->ipv6.devconf_dflt->stable_secret.secret;
        else
                return -1;

retry:
        spin_lock_bh(&lock);

        sha1_init(digest);
        memset(&data, 0, sizeof(data));
        memset(workspace, 0, sizeof(workspace));
        memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len);
        data.prefix[0] = address->s6_addr32[0];
        data.prefix[1] = address->s6_addr32[1];
        data.secret = secret;
        data.dad_count = dad_count;

        sha1_transform(digest, data.__data, workspace);

        temp = *address;
        temp.s6_addr32[2] = (__force __be32)digest[0];
        temp.s6_addr32[3] = (__force __be32)digest[1];

        spin_unlock_bh(&lock);

        if (ipv6_reserved_interfaceid(temp)) {
                dad_count++;
                if (dad_count > dev_net(idev->dev)->ipv6.sysctl.idgen_retries)
                        return -1;
                goto retry;
        }

        *address = temp;
        return 0;
}

static void ipv6_gen_mode_random_init(struct inet6_dev *idev)
{
        struct ipv6_stable_secret *s = &idev->cnf.stable_secret;

        if (s->initialized)
                return;
        s = &idev->cnf.stable_secret;
        get_random_bytes(&s->secret, sizeof(s->secret));
        s->initialized = true;
}

static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
        struct in6_addr addr;

        /* no link local addresses on L3 master devices */
        if (netif_is_l3_master(idev->dev))
                return;

        /* no link local addresses on devices flagged as slaves */
        if (idev->dev->priv_flags & IFF_NO_ADDRCONF)
                return;

        ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);

        switch (idev->cnf.addr_gen_mode) {
        case IN6_ADDR_GEN_MODE_RANDOM:
                ipv6_gen_mode_random_init(idev);
                fallthrough;
        case IN6_ADDR_GEN_MODE_STABLE_PRIVACY:
                if (!ipv6_generate_stable_address(&addr, 0, idev))
                        addrconf_add_linklocal(idev, &addr,
                                               IFA_F_STABLE_PRIVACY);
                else if (prefix_route)
                        addrconf_prefix_route(&addr, 64, 0, idev->dev,
                                              0, 0, GFP_KERNEL);
                break;
        case IN6_ADDR_GEN_MODE_EUI64:
                /* addrconf_add_linklocal also adds a prefix_route and we
                 * only need to care about prefix routes if ipv6_generate_eui64
                 * couldn't generate one.
                 */
                if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
                        addrconf_add_linklocal(idev, &addr, 0);
                else if (prefix_route)
                        addrconf_prefix_route(&addr, 64, 0, idev->dev,
                                              0, 0, GFP_KERNEL);
                break;
        case IN6_ADDR_GEN_MODE_NONE:
        default:
                /* will not add any link local address */
                break;
        }
}

static void addrconf_dev_config(struct net_device *dev)
{
        struct inet6_dev *idev;

        ASSERT_RTNL();

        if ((dev->type != ARPHRD_ETHER) &&
            (dev->type != ARPHRD_FDDI) &&
            (dev->type != ARPHRD_ARCNET) &&
            (dev->type != ARPHRD_INFINIBAND) &&
            (dev->type != ARPHRD_IEEE1394) &&
            (dev->type != ARPHRD_TUNNEL6) &&
            (dev->type != ARPHRD_6LOWPAN) &&
            (dev->type != ARPHRD_TUNNEL) &&
            (dev->type != ARPHRD_NONE) &&
            (dev->type != ARPHRD_RAWIP)) {
                /* Alas, we support only Ethernet autoconfiguration. */
                idev = __in6_dev_get(dev);
                if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP &&
                    dev->flags & IFF_MULTICAST)
                        ipv6_mc_up(idev);
                return;
        }

        idev = addrconf_add_dev(dev);
        if (IS_ERR(idev))
                return;

        /* this device type has no EUI support */
        if (dev->type == ARPHRD_NONE &&
            idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
                WRITE_ONCE(idev->cnf.addr_gen_mode,
                           IN6_ADDR_GEN_MODE_RANDOM);

        addrconf_addr_gen(idev, false);
}

#if IS_ENABLED(CONFIG_IPV6_SIT)
static void addrconf_sit_config(struct net_device *dev)
{
        struct inet6_dev *idev;

        ASSERT_RTNL();

        /*
         * Configure the tunnel with one of our IPv4
         * addresses... we should configure all of
         * our v4 addrs in the tunnel
         */

        idev = ipv6_find_idev(dev);
        if (IS_ERR(idev)) {
                pr_debug("%s: add_dev failed\n", __func__);
                return;
        }

        if (dev->priv_flags & IFF_ISATAP) {
                addrconf_addr_gen(idev, false);
                return;
        }

        add_v4_addrs(idev);

        if (dev->flags&IFF_POINTOPOINT)
                addrconf_add_mroute(dev);
}
#endif

#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
static void addrconf_gre_config(struct net_device *dev)
{
        struct inet6_dev *idev;

        ASSERT_RTNL();

        idev = ipv6_find_idev(dev);
        if (IS_ERR(idev)) {
                pr_debug("%s: add_dev failed\n", __func__);
                return;
        }

        if (dev->type == ARPHRD_ETHER) {
                addrconf_addr_gen(idev, true);
                return;
        }

        add_v4_addrs(idev);

        if (dev->flags & IFF_POINTOPOINT)
                addrconf_add_mroute(dev);
}
#endif

static void addrconf_init_auto_addrs(struct net_device *dev)
{
        switch (dev->type) {
#if IS_ENABLED(CONFIG_IPV6_SIT)
        case ARPHRD_SIT:
                addrconf_sit_config(dev);
                break;
#endif
#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
        case ARPHRD_IP6GRE:
        case ARPHRD_IPGRE:
                addrconf_gre_config(dev);
                break;
#endif
        case ARPHRD_LOOPBACK:
                init_loopback(dev);
                break;

        default:
                addrconf_dev_config(dev);
                break;
        }
}

static int fixup_permanent_addr(struct net *net,
                                struct inet6_dev *idev,
                                struct inet6_ifaddr *ifp)
{
        /* !fib6_node means the host route was removed from the
         * FIB, for example, if 'lo' device is taken down. In that
         * case regenerate the host route.
         */
        if (!ifp->rt || !ifp->rt->fib6_node) {
                struct fib6_info *f6i, *prev;

                f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
                                         GFP_ATOMIC, NULL);
                if (IS_ERR(f6i))
                        return PTR_ERR(f6i);

                /* ifp->rt can be accessed outside of rtnl */
                spin_lock(&ifp->lock);
                prev = ifp->rt;
                ifp->rt = f6i;
                spin_unlock(&ifp->lock);

                fib6_info_release(prev);
        }

        if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
                addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
                                      ifp->rt_priority, idev->dev, 0, 0,
                                      GFP_ATOMIC);
        }

        if (ifp->state == INET6_IFADDR_STATE_PREDAD)
                addrconf_dad_start(ifp);

        return 0;
}

static void addrconf_permanent_addr(struct net *net, struct net_device *dev)
{
        struct inet6_ifaddr *ifp, *tmp;
        struct inet6_dev *idev;

        idev = __in6_dev_get(dev);
        if (!idev)
                return;

        write_lock_bh(&idev->lock);

        list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
                if ((ifp->flags & IFA_F_PERMANENT) &&
                    fixup_permanent_addr(net, idev, ifp) < 0) {
                        write_unlock_bh(&idev->lock);
                        in6_ifa_hold(ifp);
                        ipv6_del_addr(ifp);
                        write_lock_bh(&idev->lock);

                        net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
                                             idev->dev->name, &ifp->addr);
                }
        }

        write_unlock_bh(&idev->lock);
}

static int addrconf_notify(struct notifier_block *this, unsigned long event,
                           void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_change_info *change_info;
        struct netdev_notifier_changeupper_info *info;
        struct inet6_dev *idev = __in6_dev_get(dev);
        struct net *net = dev_net(dev);
        int run_pending = 0;
        int err;

        switch (event) {
        case NETDEV_REGISTER:
                if (!idev && dev->mtu >= IPV6_MIN_MTU) {
                        idev = ipv6_add_dev(dev);
                        if (IS_ERR(idev))
                                return notifier_from_errno(PTR_ERR(idev));
                }
                break;

        case NETDEV_CHANGEMTU:
                /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
                if (dev->mtu < IPV6_MIN_MTU) {
                        addrconf_ifdown(dev, dev != net->loopback_dev);
                        break;
                }

                if (idev) {
                        rt6_mtu_change(dev, dev->mtu);
                        WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
                        break;
                }

                /* allocate new idev */
                idev = ipv6_add_dev(dev);
                if (IS_ERR(idev))
                        break;

                /* device is still not ready */
                if (!(idev->if_flags & IF_READY))
                        break;

                run_pending = 1;
                fallthrough;
        case NETDEV_UP:
        case NETDEV_CHANGE:
                if (idev && idev->cnf.disable_ipv6)
                        break;

                if (dev->priv_flags & IFF_NO_ADDRCONF) {
                        if (event == NETDEV_UP && !IS_ERR_OR_NULL(idev) &&
                            dev->flags & IFF_UP && dev->flags & IFF_MULTICAST)
                                ipv6_mc_up(idev);
                        break;
                }

                if (event == NETDEV_UP) {
                        /* restore routes for permanent addresses */
                        addrconf_permanent_addr(net, dev);

                        if (!addrconf_link_ready(dev)) {
                                /* device is not ready yet. */
                                pr_debug("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
                                         dev->name);
                                break;
                        }

                        if (!idev && dev->mtu >= IPV6_MIN_MTU)
                                idev = ipv6_add_dev(dev);

                        if (!IS_ERR_OR_NULL(idev)) {
                                idev->if_flags |= IF_READY;
                                run_pending = 1;
                        }
                } else if (event == NETDEV_CHANGE) {
                        if (!addrconf_link_ready(dev)) {
                                /* device is still not ready. */
                                rt6_sync_down_dev(dev, event);
                                break;
                        }

                        if (!IS_ERR_OR_NULL(idev)) {
                                if (idev->if_flags & IF_READY) {
                                        /* device is already configured -
                                         * but resend MLD reports, we might
                                         * have roamed and need to update
                                         * multicast snooping switches
                                         */
                                        ipv6_mc_up(idev);
                                        change_info = ptr;
                                        if (change_info->flags_changed & IFF_NOARP)
                                                addrconf_dad_run(idev, true);
                                        rt6_sync_up(dev, RTNH_F_LINKDOWN);
                                        break;
                                }
                                idev->if_flags |= IF_READY;
                        }

                        pr_debug("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
                                 dev->name);

                        run_pending = 1;
                }

                addrconf_init_auto_addrs(dev);

                if (!IS_ERR_OR_NULL(idev)) {
                        if (run_pending)
                                addrconf_dad_run(idev, false);

                        /* Device has an address by now */
                        rt6_sync_up(dev, RTNH_F_DEAD);

                        /*
                         * If the MTU changed during the interface down,
                         * when the interface up, the changed MTU must be
                         * reflected in the idev as well as routers.
                         */
                        if (idev->cnf.mtu6 != dev->mtu &&
                            dev->mtu >= IPV6_MIN_MTU) {
                                rt6_mtu_change(dev, dev->mtu);
                                WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
                        }
                        WRITE_ONCE(idev->tstamp, jiffies);
                        inet6_ifinfo_notify(RTM_NEWLINK, idev);

                        /*
                         * If the changed mtu during down is lower than
                         * IPV6_MIN_MTU stop IPv6 on this interface.
                         */
                        if (dev->mtu < IPV6_MIN_MTU)
                                addrconf_ifdown(dev, dev != net->loopback_dev);
                }
                break;

        case NETDEV_DOWN:
        case NETDEV_UNREGISTER:
                /*
                 *        Remove all addresses from this interface.
                 */
                addrconf_ifdown(dev, event != NETDEV_DOWN);
                break;

        case NETDEV_CHANGENAME:
                if (idev) {
                        snmp6_unregister_dev(idev);
                        addrconf_sysctl_unregister(idev);
                        err = addrconf_sysctl_register(idev);
                        if (err)
                                return notifier_from_errno(err);
                        err = snmp6_register_dev(idev);
                        if (err) {
                                addrconf_sysctl_unregister(idev);
                                return notifier_from_errno(err);
                        }
                }
                break;

        case NETDEV_PRE_TYPE_CHANGE:
        case NETDEV_POST_TYPE_CHANGE:
                if (idev)
                        addrconf_type_change(dev, event);
                break;

        case NETDEV_CHANGEUPPER:
                info = ptr;

                /* flush all routes if dev is linked to or unlinked from
                 * an L3 master device (e.g., VRF)
                 */
                if (info->upper_dev && netif_is_l3_master(info->upper_dev))
                        addrconf_ifdown(dev, false);
        }

        return NOTIFY_OK;
}

/*
 *        addrconf module should be notified of a device going up
 */
static struct notifier_block ipv6_dev_notf = {
        .notifier_call = addrconf_notify,
        .priority = ADDRCONF_NOTIFY_PRIORITY,
};

static void addrconf_type_change(struct net_device *dev, unsigned long event)
{
        struct inet6_dev *idev;
        ASSERT_RTNL();

        idev = __in6_dev_get(dev);

        if (event == NETDEV_POST_TYPE_CHANGE)
                ipv6_mc_remap(idev);
        else if (event == NETDEV_PRE_TYPE_CHANGE)
                ipv6_mc_unmap(idev);
}

static bool addr_is_local(const struct in6_addr *addr)
{
        return ipv6_addr_type(addr) &
                (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

static int addrconf_ifdown(struct net_device *dev, bool unregister)
{
        unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
        struct net *net = dev_net(dev);
        struct inet6_dev *idev;
        struct inet6_ifaddr *ifa;
        LIST_HEAD(tmp_addr_list);
        bool keep_addr = false;
        bool was_ready;
        int state, i;

        ASSERT_RTNL();

        rt6_disable_ip(dev, event);

        idev = __in6_dev_get(dev);
        if (!idev)
                return -ENODEV;

        /*
         * Step 1: remove reference to ipv6 device from parent device.
         *           Do not dev_put!
         */
        if (unregister) {
                idev->dead = 1;

                /* protected by rtnl_lock */
                RCU_INIT_POINTER(dev->ip6_ptr, NULL);

                /* Step 1.5: remove snmp6 entry */
                snmp6_unregister_dev(idev);

        }

        /* combine the user config with event to determine if permanent
         * addresses are to be removed from address hash table
         */
        if (!unregister && !idev->cnf.disable_ipv6) {
                /* aggregate the system setting and interface setting */
                int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down);

                if (!_keep_addr)
                        _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down);

                keep_addr = (_keep_addr > 0);
        }

        /* Step 2: clear hash table */
        for (i = 0; i < IN6_ADDR_HSIZE; i++) {
                struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];

                spin_lock_bh(&net->ipv6.addrconf_hash_lock);
restart:
                hlist_for_each_entry_rcu(ifa, h, addr_lst) {
                        if (ifa->idev == idev) {
                                addrconf_del_dad_work(ifa);
                                /* combined flag + permanent flag decide if
                                 * address is retained on a down event
                                 */
                                if (!keep_addr ||
                                    !(ifa->flags & IFA_F_PERMANENT) ||
                                    addr_is_local(&ifa->addr)) {
                                        hlist_del_init_rcu(&ifa->addr_lst);
                                        goto restart;
                                }
                        }
                }
                spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
        }

        write_lock_bh(&idev->lock);

        addrconf_del_rs_timer(idev);

        /* Step 2: clear flags for stateless addrconf, repeated down
         *         detection
         */
        was_ready = idev->if_flags & IF_READY;
        if (!unregister)
                idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);

        /* Step 3: clear tempaddr list */
        while (!list_empty(&idev->tempaddr_list)) {
                ifa = list_first_entry(&idev->tempaddr_list,
                                       struct inet6_ifaddr, tmp_list);
                list_del(&ifa->tmp_list);
                write_unlock_bh(&idev->lock);
                spin_lock_bh(&ifa->lock);

                if (ifa->ifpub) {
                        in6_ifa_put(ifa->ifpub);
                        ifa->ifpub = NULL;
                }
                spin_unlock_bh(&ifa->lock);
                in6_ifa_put(ifa);
                write_lock_bh(&idev->lock);
        }

        list_for_each_entry(ifa, &idev->addr_list, if_list)
                list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
        write_unlock_bh(&idev->lock);

        while (!list_empty(&tmp_addr_list)) {
                struct fib6_info *rt = NULL;
                bool keep;

                ifa = list_first_entry(&tmp_addr_list,
                                       struct inet6_ifaddr, if_list_aux);
                list_del(&ifa->if_list_aux);

                addrconf_del_dad_work(ifa);

                keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
                        !addr_is_local(&ifa->addr);

                spin_lock_bh(&ifa->lock);

                if (keep) {
                        /* set state to skip the notifier below */
                        state = INET6_IFADDR_STATE_DEAD;
                        ifa->state = INET6_IFADDR_STATE_PREDAD;
                        if (!(ifa->flags & IFA_F_NODAD))
                                ifa->flags |= IFA_F_TENTATIVE;

                        rt = ifa->rt;
                        ifa->rt = NULL;
                } else {
                        state = ifa->state;
                        ifa->state = INET6_IFADDR_STATE_DEAD;
                }

                spin_unlock_bh(&ifa->lock);

                if (rt)
                        ip6_del_rt(net, rt, false);

                if (state != INET6_IFADDR_STATE_DEAD) {
                        __ipv6_ifa_notify(RTM_DELADDR, ifa);
                        inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
                } else {
                        if (idev->cnf.forwarding)
                                addrconf_leave_anycast(ifa);
                        addrconf_leave_solict(ifa->idev, &ifa->addr);
                }

                if (!keep) {
                        write_lock_bh(&idev->lock);
                        list_del_rcu(&ifa->if_list);
                        write_unlock_bh(&idev->lock);
                        in6_ifa_put(ifa);
                }
        }

        /* Step 5: Discard anycast and multicast list */
        if (unregister) {
                ipv6_ac_destroy_dev(idev);
                ipv6_mc_destroy_dev(idev);
        } else if (was_ready) {
                ipv6_mc_down(idev);
        }

        WRITE_ONCE(idev->tstamp, jiffies);
        idev->ra_mtu = 0;

        /* Last: Shot the device (if unregistered) */
        if (unregister) {
                addrconf_sysctl_unregister(idev);
                neigh_parms_release(&nd_tbl, idev->nd_parms);
                neigh_ifdown(&nd_tbl, dev);
                in6_dev_put(idev);
        }
        return 0;
}

static void addrconf_rs_timer(struct timer_list *t)
{
        struct inet6_dev *idev = from_timer(idev, t, rs_timer);
        struct net_device *dev = idev->dev;
        struct in6_addr lladdr;
        int rtr_solicits;

        write_lock(&idev->lock);
        if (idev->dead || !(idev->if_flags & IF_READY))
                goto out;

        if (!ipv6_accept_ra(idev))
                goto out;

        /* Announcement received after solicitation was sent */
        if (idev->if_flags & IF_RA_RCVD)
                goto out;

        rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits);

        if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) {
                write_unlock(&idev->lock);
                if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
                        ndisc_send_rs(dev, &lladdr,
                                      &in6addr_linklocal_allrouters);
                else
                        goto put;

                write_lock(&idev->lock);
                idev->rs_interval = rfc3315_s14_backoff_update(
                                idev->rs_interval,
                                READ_ONCE(idev->cnf.rtr_solicit_max_interval));
                /* The wait after the last probe can be shorter */
                addrconf_mod_rs_timer(idev, (idev->rs_probes ==
                                             READ_ONCE(idev->cnf.rtr_solicits)) ?
                                      READ_ONCE(idev->cnf.rtr_solicit_delay) :
                                      idev->rs_interval);
        } else {
                /*
                 * Note: we do not support deprecated "all on-link"
                 * assumption any longer.
                 */
                pr_debug("%s: no IPv6 routers present\n", idev->dev->name);
        }

out:
        write_unlock(&idev->lock);
put:
        in6_dev_put(idev);
}

/*
 *        Duplicate Address Detection
 */
static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
{
        struct inet6_dev *idev = ifp->idev;
        unsigned long rand_num;
        u64 nonce;

        if (ifp->flags & IFA_F_OPTIMISTIC)
                rand_num = 0;
        else
                rand_num = get_random_u32_below(
                                READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1);

        nonce = 0;
        if (READ_ONCE(idev->cnf.enhanced_dad) ||
            READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) {
                do
                        get_random_bytes(&nonce, 6);
                while (nonce == 0);
        }
        ifp->dad_nonce = nonce;
        ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits);
        addrconf_mod_dad_work(ifp, rand_num);
}

static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
        struct inet6_dev *idev = ifp->idev;
        struct net_device *dev = idev->dev;
        bool bump_id, notify = false;
        struct net *net;

        addrconf_join_solict(dev, &ifp->addr);

        read_lock_bh(&idev->lock);
        spin_lock(&ifp->lock);
        if (ifp->state == INET6_IFADDR_STATE_DEAD)
                goto out;

        net = dev_net(dev);
        if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
            (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 &&
             READ_ONCE(idev->cnf.accept_dad) < 1) ||
            !(ifp->flags&IFA_F_TENTATIVE) ||
            ifp->flags & IFA_F_NODAD) {
                bool send_na = false;

                if (ifp->flags & IFA_F_TENTATIVE &&
                    !(ifp->flags & IFA_F_OPTIMISTIC))
                        send_na = true;
                bump_id = ifp->flags & IFA_F_TENTATIVE;
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
                spin_unlock(&ifp->lock);
                read_unlock_bh(&idev->lock);

                addrconf_dad_completed(ifp, bump_id, send_na);
                return;
        }

        if (!(idev->if_flags & IF_READY)) {
                spin_unlock(&ifp->lock);
                read_unlock_bh(&idev->lock);
                /*
                 * If the device is not ready:
                 * - keep it tentative if it is a permanent address.
                 * - otherwise, kill it.
                 */
                in6_ifa_hold(ifp);
                addrconf_dad_stop(ifp, 0);
                return;
        }

        /*
         * Optimistic nodes can start receiving
         * Frames right away
         */
        if (ifp->flags & IFA_F_OPTIMISTIC) {
                ip6_ins_rt(net, ifp->rt);
                if (ipv6_use_optimistic_addr(net, idev)) {
                        /* Because optimistic nodes can use this address,
                         * notify listeners. If DAD fails, RTM_DELADDR is sent.
                         */
                        notify = true;
                }
        }

        addrconf_dad_kick(ifp);
out:
        spin_unlock(&ifp->lock);
        read_unlock_bh(&idev->lock);
        if (notify)
                ipv6_ifa_notify(RTM_NEWADDR, ifp);
}

static void addrconf_dad_start(struct inet6_ifaddr *ifp)
{
        bool begin_dad = false;

        spin_lock_bh(&ifp->lock);
        if (ifp->state != INET6_IFADDR_STATE_DEAD) {
                ifp->state = INET6_IFADDR_STATE_PREDAD;
                begin_dad = true;
        }
        spin_unlock_bh(&ifp->lock);

        if (begin_dad)
                addrconf_mod_dad_work(ifp, 0);
}

static void addrconf_dad_work(struct work_struct *w)
{
        struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
                                                struct inet6_ifaddr,
                                                dad_work);
        struct inet6_dev *idev = ifp->idev;
        bool bump_id, disable_ipv6 = false;
        struct in6_addr mcaddr;

        enum {
                DAD_PROCESS,
                DAD_BEGIN,
                DAD_ABORT,
        } action = DAD_PROCESS;

        rtnl_lock();

        spin_lock_bh(&ifp->lock);
        if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
                action = DAD_BEGIN;
                ifp->state = INET6_IFADDR_STATE_DAD;
        } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
                action = DAD_ABORT;
                ifp->state = INET6_IFADDR_STATE_POSTDAD;

                if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 ||
                     READ_ONCE(idev->cnf.accept_dad) > 1) &&
                    !idev->cnf.disable_ipv6 &&
                    !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
                        struct in6_addr addr;

                        addr.s6_addr32[0] = htonl(0xfe800000);
                        addr.s6_addr32[1] = 0;

                        if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
                            ipv6_addr_equal(&ifp->addr, &addr)) {
                                /* DAD failed for link-local based on MAC */
                                WRITE_ONCE(idev->cnf.disable_ipv6, 1);

                                pr_info("%s: IPv6 being disabled!\n",
                                        ifp->idev->dev->name);
                                disable_ipv6 = true;
                        }
                }
        }
        spin_unlock_bh(&ifp->lock);

        if (action == DAD_BEGIN) {
                addrconf_dad_begin(ifp);
                goto out;
        } else if (action == DAD_ABORT) {
                in6_ifa_hold(ifp);
                addrconf_dad_stop(ifp, 1);
                if (disable_ipv6)
                        addrconf_ifdown(idev->dev, false);
                goto out;
        }

        if (!ifp->dad_probes && addrconf_dad_end(ifp))
                goto out;

        write_lock_bh(&idev->lock);
        if (idev->dead || !(idev->if_flags & IF_READY)) {
                write_unlock_bh(&idev->lock);
                goto out;
        }

        spin_lock(&ifp->lock);
        if (ifp->state == INET6_IFADDR_STATE_DEAD) {
                spin_unlock(&ifp->lock);
                write_unlock_bh(&idev->lock);
                goto out;
        }

        if (ifp->dad_probes == 0) {
                bool send_na = false;

                /*
                 * DAD was successful
                 */

                if (ifp->flags & IFA_F_TENTATIVE &&
                    !(ifp->flags & IFA_F_OPTIMISTIC))
                        send_na = true;
                bump_id = ifp->flags & IFA_F_TENTATIVE;
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
                spin_unlock(&ifp->lock);
                write_unlock_bh(&idev->lock);

                addrconf_dad_completed(ifp, bump_id, send_na);

                goto out;
        }

        ifp->dad_probes--;
        addrconf_mod_dad_work(ifp,
                              max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME),
                                  HZ/100));
        spin_unlock(&ifp->lock);
        write_unlock_bh(&idev->lock);

        /* send a neighbour solicitation for our addr */
        addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
        ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any,
                      ifp->dad_nonce);
out:
        in6_ifa_put(ifp);
        rtnl_unlock();
}

/* ifp->idev must be at least read locked */
static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
{
        struct inet6_ifaddr *ifpiter;
        struct inet6_dev *idev = ifp->idev;

        list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) {
                if (ifpiter->scope > IFA_LINK)
                        break;
                if (ifp != ifpiter && ifpiter->scope == IFA_LINK &&
                    (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|
                                       IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) ==
                    IFA_F_PERMANENT)
                        return false;
        }
        return true;
}

static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
                                   bool send_na)
{
        struct net_device *dev = ifp->idev->dev;
        struct in6_addr lladdr;
        bool send_rs, send_mld;

        addrconf_del_dad_work(ifp);

        /*
         *        Configure the address for reception. Now it is valid.
         */

        ipv6_ifa_notify(RTM_NEWADDR, ifp);

        /* If added prefix is link local and we are prepared to process
           router advertisements, start sending router solicitations.
         */

        read_lock_bh(&ifp->idev->lock);
        send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
        send_rs = send_mld &&
                  ipv6_accept_ra(ifp->idev) &&
                  READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 &&
                  (dev->flags & IFF_LOOPBACK) == 0 &&
                  (dev->type != ARPHRD_TUNNEL) &&
                  !netif_is_team_port(dev);
        read_unlock_bh(&ifp->idev->lock);

        /* While dad is in progress mld report's source address is in6_addrany.
         * Resend with proper ll now.
         */
        if (send_mld)
                ipv6_mc_dad_complete(ifp->idev);

        /* send unsolicited NA if enabled */
        if (send_na &&
            (READ_ONCE(ifp->idev->cnf.ndisc_notify) ||
             READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) {
                ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
                              /*router=*/ !!ifp->idev->cnf.forwarding,
                              /*solicited=*/ false, /*override=*/ true,
                              /*inc_opt=*/ true);
        }

        if (send_rs) {
                /*
                 *        If a host as already performed a random delay
                 *        [...] as part of DAD [...] there is no need
                 *        to delay again before sending the first RS
                 */
                if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
                        return;
                ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters);

                write_lock_bh(&ifp->idev->lock);
                spin_lock(&ifp->lock);
                ifp->idev->rs_interval = rfc3315_s14_backoff_init(
                        READ_ONCE(ifp->idev->cnf.rtr_solicit_interval));
                ifp->idev->rs_probes = 1;
                ifp->idev->if_flags |= IF_RS_SENT;
                addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
                spin_unlock(&ifp->lock);
                write_unlock_bh(&ifp->idev->lock);
        }

        if (bump_id)
                rt_genid_bump_ipv6(dev_net(dev));

        /* Make sure that a new temporary address will be created
         * before this temporary address becomes deprecated.
         */
        if (ifp->flags & IFA_F_TEMPORARY)
                addrconf_verify_rtnl(dev_net(dev));
}

static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
        struct inet6_ifaddr *ifp;

        read_lock_bh(&idev->lock);
        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                spin_lock(&ifp->lock);
                if ((ifp->flags & IFA_F_TENTATIVE &&
                     ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
                        if (restart)
                                ifp->state = INET6_IFADDR_STATE_PREDAD;
                        addrconf_dad_kick(ifp);
                }
                spin_unlock(&ifp->lock);
        }
        read_unlock_bh(&idev->lock);
}

#ifdef CONFIG_PROC_FS
struct if6_iter_state {
        struct seq_net_private p;
        int bucket;
        int offset;
};

static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
{
        struct if6_iter_state *state = seq->private;
        struct net *net = seq_file_net(seq);
        struct inet6_ifaddr *ifa = NULL;
        int p = 0;

        /* initial bucket if pos is 0 */
        if (pos == 0) {
                state->bucket = 0;
                state->offset = 0;
        }

        for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
                hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
                                         addr_lst) {
                        /* sync with offset */
                        if (p < state->offset) {
                                p++;
                                continue;
                        }
                        return ifa;
                }

                /* prepare for next bucket */
                state->offset = 0;
                p = 0;
        }
        return NULL;
}

static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
                                         struct inet6_ifaddr *ifa)
{
        struct if6_iter_state *state = seq->private;
        struct net *net = seq_file_net(seq);

        hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
                state->offset++;
                return ifa;
        }

        state->offset = 0;
        while (++state->bucket < IN6_ADDR_HSIZE) {
                hlist_for_each_entry_rcu(ifa,
                                     &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
                        return ifa;
                }
        }

        return NULL;
}

static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(rcu)
{
        rcu_read_lock();
        return if6_get_first(seq, *pos);
}

static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct inet6_ifaddr *ifa;

        ifa = if6_get_next(seq, v);
        ++*pos;
        return ifa;
}

static void if6_seq_stop(struct seq_file *seq, void *v)
        __releases(rcu)
{
        rcu_read_unlock();
}

static int if6_seq_show(struct seq_file *seq, void *v)
{
        struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
        seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
                   &ifp->addr,
                   ifp->idev->dev->ifindex,
                   ifp->prefix_len,
                   ifp->scope,
                   (u8) ifp->flags,
                   ifp->idev->dev->name);
        return 0;
}

static const struct seq_operations if6_seq_ops = {
        .start        = if6_seq_start,
        .next        = if6_seq_next,
        .show        = if6_seq_show,
        .stop        = if6_seq_stop,
};

static int __net_init if6_proc_net_init(struct net *net)
{
        if (!proc_create_net("if_inet6", 0444, net->proc_net, &if6_seq_ops,
                        sizeof(struct if6_iter_state)))
                return -ENOMEM;
        return 0;
}

static void __net_exit if6_proc_net_exit(struct net *net)
{
        remove_proc_entry("if_inet6", net->proc_net);
}

static struct pernet_operations if6_proc_net_ops = {
        .init = if6_proc_net_init,
        .exit = if6_proc_net_exit,
};

int __init if6_proc_init(void)
{
        return register_pernet_subsys(&if6_proc_net_ops);
}

void if6_proc_exit(void)
{
        unregister_pernet_subsys(&if6_proc_net_ops);
}
#endif        /* CONFIG_PROC_FS */

#if IS_ENABLED(CONFIG_IPV6_MIP6)
/* Check if address is a home address configured on any interface. */
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
        unsigned int hash = inet6_addr_hash(net, addr);
        struct inet6_ifaddr *ifp = NULL;
        int ret = 0;

        rcu_read_lock();
        hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
                if (ipv6_addr_equal(&ifp->addr, addr) &&
                    (ifp->flags & IFA_F_HOMEADDRESS)) {
                        ret = 1;
                        break;
                }
        }
        rcu_read_unlock();
        return ret;
}
#endif

/* RFC6554 has some algorithm to avoid loops in segment routing by
 * checking if the segments contains any of a local interface address.
 *
 * Quote:
 *
 * To detect loops in the SRH, a router MUST determine if the SRH
 * includes multiple addresses assigned to any interface on that router.
 * If such addresses appear more than once and are separated by at least
 * one address not assigned to that router.
 */
int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
                          unsigned char nsegs)
{
        const struct in6_addr *addr;
        int i, ret = 0, found = 0;
        struct inet6_ifaddr *ifp;
        bool separated = false;
        unsigned int hash;
        bool hash_found;

        rcu_read_lock();
        for (i = 0; i < nsegs; i++) {
                addr = &segs[i];
                hash = inet6_addr_hash(net, addr);

                hash_found = false;
                hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {

                        if (ipv6_addr_equal(&ifp->addr, addr)) {
                                hash_found = true;
                                break;
                        }
                }

                if (hash_found) {
                        if (found > 1 && separated) {
                                ret = 1;
                                break;
                        }

                        separated = false;
                        found++;
                } else {
                        separated = true;
                }
        }
        rcu_read_unlock();

        return ret;
}

/*
 *        Periodic address status verification
 */

static void addrconf_verify_rtnl(struct net *net)
{
        unsigned long now, next, next_sec, next_sched;
        struct inet6_ifaddr *ifp;
        int i;

        ASSERT_RTNL();

        rcu_read_lock_bh();
        now = jiffies;
        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);

        cancel_delayed_work(&net->ipv6.addr_chk_work);

        for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
                hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
                        unsigned long age;

                        /* When setting preferred_lft to a value not zero or
                         * infinity, while valid_lft is infinity
                         * IFA_F_PERMANENT has a non-infinity life time.
                         */
                        if ((ifp->flags & IFA_F_PERMANENT) &&
                            (ifp->prefered_lft == INFINITY_LIFE_TIME))
                                continue;

                        spin_lock(&ifp->lock);
                        /* We try to batch several events at once. */
                        age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;

                        if ((ifp->flags&IFA_F_TEMPORARY) &&
                            !(ifp->flags&IFA_F_TENTATIVE) &&
                            ifp->prefered_lft != INFINITY_LIFE_TIME &&
                            !ifp->regen_count && ifp->ifpub) {
                                /* This is a non-regenerated temporary addr. */

                                unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev);

                                if (age + regen_advance >= ifp->prefered_lft) {
                                        struct inet6_ifaddr *ifpub = ifp->ifpub;
                                        if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
                                                next = ifp->tstamp + ifp->prefered_lft * HZ;

                                        ifp->regen_count++;
                                        in6_ifa_hold(ifp);
                                        in6_ifa_hold(ifpub);
                                        spin_unlock(&ifp->lock);

                                        spin_lock(&ifpub->lock);
                                        ifpub->regen_count = 0;
                                        spin_unlock(&ifpub->lock);
                                        rcu_read_unlock_bh();
                                        ipv6_create_tempaddr(ifpub, true);
                                        in6_ifa_put(ifpub);
                                        in6_ifa_put(ifp);
                                        rcu_read_lock_bh();
                                        goto restart;
                                } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
                                        next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
                        }

                        if (ifp->valid_lft != INFINITY_LIFE_TIME &&
                            age >= ifp->valid_lft) {
                                spin_unlock(&ifp->lock);
                                in6_ifa_hold(ifp);
                                rcu_read_unlock_bh();
                                ipv6_del_addr(ifp);
                                rcu_read_lock_bh();
                                goto restart;
                        } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
                                spin_unlock(&ifp->lock);
                                continue;
                        } else if (age >= ifp->prefered_lft) {
                                /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */
                                int deprecate = 0;

                                if (!(ifp->flags&IFA_F_DEPRECATED)) {
                                        deprecate = 1;
                                        ifp->flags |= IFA_F_DEPRECATED;
                                }

                                if ((ifp->valid_lft != INFINITY_LIFE_TIME) &&
                                    (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)))
                                        next = ifp->tstamp + ifp->valid_lft * HZ;

                                spin_unlock(&ifp->lock);

                                if (deprecate) {
                                        in6_ifa_hold(ifp);

                                        ipv6_ifa_notify(0, ifp);
                                        in6_ifa_put(ifp);
                                        goto restart;
                                }
                        } else {
                                /* ifp->prefered_lft <= ifp->valid_lft */
                                if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
                                        next = ifp->tstamp + ifp->prefered_lft * HZ;
                                spin_unlock(&ifp->lock);
                        }
                }
        }

        next_sec = round_jiffies_up(next);
        next_sched = next;

        /* If rounded timeout is accurate enough, accept it. */
        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
                next_sched = next_sec;

        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
        if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
                next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;

        pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
                 now, next, next_sec, next_sched);
        mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
        rcu_read_unlock_bh();
}

static void addrconf_verify_work(struct work_struct *w)
{
        struct net *net = container_of(to_delayed_work(w), struct net,
                                       ipv6.addr_chk_work);

        rtnl_lock();
        addrconf_verify_rtnl(net);
        rtnl_unlock();
}

static void addrconf_verify(struct net *net)
{
        mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
}

static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
                                     struct in6_addr **peer_pfx)
{
        struct in6_addr *pfx = NULL;

        *peer_pfx = NULL;

        if (addr)
                pfx = nla_data(addr);

        if (local) {
                if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
                        *peer_pfx = pfx;
                pfx = nla_data(local);
        }

        return pfx;
}

static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
        [IFA_ADDRESS]                = { .len = sizeof(struct in6_addr) },
        [IFA_LOCAL]                = { .len = sizeof(struct in6_addr) },
        [IFA_CACHEINFO]                = { .len = sizeof(struct ifa_cacheinfo) },
        [IFA_FLAGS]                = { .len = sizeof(u32) },
        [IFA_RT_PRIORITY]        = { .len = sizeof(u32) },
        [IFA_TARGET_NETNSID]        = { .type = NLA_S32 },
        [IFA_PROTO]                = { .type = NLA_U8 },
};

static int
inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
                  struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ifaddrmsg *ifm;
        struct nlattr *tb[IFA_MAX+1];
        struct in6_addr *pfx, *peer_pfx;
        u32 ifa_flags;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
                                     ifa_ipv6_policy, extack);
        if (err < 0)
                return err;

        ifm = nlmsg_data(nlh);
        pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
        if (!pfx)
                return -EINVAL;

        ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;

        /* We ignore other flags so far. */
        ifa_flags &= IFA_F_MANAGETEMPADDR;

        return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
                              ifm->ifa_prefixlen, extack);
}

static int modify_prefix_route(struct inet6_ifaddr *ifp,
                               unsigned long expires, u32 flags,
                               bool modify_peer)
{
        struct fib6_table *table;
        struct fib6_info *f6i;
        u32 prio;

        f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
                                        ifp->prefix_len,
                                        ifp->idev->dev, 0, RTF_DEFAULT, true);
        if (!f6i)
                return -ENOENT;

        prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
        if (f6i->fib6_metric != prio) {
                /* delete old one */
                ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);

                /* add new one */
                addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
                                      ifp->prefix_len,
                                      ifp->rt_priority, ifp->idev->dev,
                                      expires, flags, GFP_KERNEL);
        } else {
                table = f6i->fib6_table;
                spin_lock_bh(&table->tb6_lock);

                if (!(flags & RTF_EXPIRES)) {
                        fib6_clean_expires(f6i);
                        fib6_remove_gc_list(f6i);
                } else {
                        fib6_set_expires(f6i, expires);
                        fib6_add_gc_list(f6i);
                }

                spin_unlock_bh(&table->tb6_lock);

                fib6_info_release(f6i);
        }

        return 0;
}

static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
                             struct ifa6_config *cfg)
{
        u32 flags;
        clock_t expires;
        unsigned long timeout;
        bool was_managetempaddr;
        bool had_prefixroute;
        bool new_peer = false;

        ASSERT_RTNL();

        if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
                return -EINVAL;

        if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
            (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
                return -EINVAL;

        if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
                cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;

        timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                expires = jiffies_to_clock_t(timeout * HZ);
                cfg->valid_lft = timeout;
                flags = RTF_EXPIRES;
        } else {
                expires = 0;
                flags = 0;
                cfg->ifa_flags |= IFA_F_PERMANENT;
        }

        timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
        if (addrconf_finite_timeout(timeout)) {
                if (timeout == 0)
                        cfg->ifa_flags |= IFA_F_DEPRECATED;
                cfg->preferred_lft = timeout;
        }

        if (cfg->peer_pfx &&
            memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
                if (!ipv6_addr_any(&ifp->peer_addr))
                        cleanup_prefix_route(ifp, expires, true, true);
                new_peer = true;
        }

        spin_lock_bh(&ifp->lock);
        was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
        had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
                          !(ifp->flags & IFA_F_NOPREFIXROUTE);
        ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
                        IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
                        IFA_F_NOPREFIXROUTE);
        ifp->flags |= cfg->ifa_flags;
        WRITE_ONCE(ifp->tstamp, jiffies);
        WRITE_ONCE(ifp->valid_lft, cfg->valid_lft);
        WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft);
        WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto);

        if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
                WRITE_ONCE(ifp->rt_priority, cfg->rt_priority);

        if (new_peer)
                ifp->peer_addr = *cfg->peer_pfx;

        spin_unlock_bh(&ifp->lock);
        if (!(ifp->flags&IFA_F_TENTATIVE))
                ipv6_ifa_notify(0, ifp);

        if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
                int rc = -ENOENT;

                if (had_prefixroute)
                        rc = modify_prefix_route(ifp, expires, flags, false);

                /* prefix route could have been deleted; if so restore it */
                if (rc == -ENOENT) {
                        addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
                                              ifp->rt_priority, ifp->idev->dev,
                                              expires, flags, GFP_KERNEL);
                }

                if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
                        rc = modify_prefix_route(ifp, expires, flags, true);

                if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
                        addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
                                              ifp->rt_priority, ifp->idev->dev,
                                              expires, flags, GFP_KERNEL);
                }
        } else if (had_prefixroute) {
                enum cleanup_prefix_rt_t action;
                unsigned long rt_expires;

                write_lock_bh(&ifp->idev->lock);
                action = check_cleanup_prefix_route(ifp, &rt_expires);
                write_unlock_bh(&ifp->idev->lock);

                if (action != CLEANUP_PREFIX_RT_NOP) {
                        cleanup_prefix_route(ifp, rt_expires,
                                action == CLEANUP_PREFIX_RT_DEL, false);
                }
        }

        if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
                if (was_managetempaddr &&
                    !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
                        cfg->valid_lft = 0;
                        cfg->preferred_lft = 0;
                }
                manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
                                 cfg->preferred_lft, !was_managetempaddr,
                                 jiffies);
        }

        addrconf_verify_rtnl(net);

        return 0;
}

static int
inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
                  struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct ifaddrmsg *ifm;
        struct nlattr *tb[IFA_MAX+1];
        struct in6_addr *peer_pfx;
        struct inet6_ifaddr *ifa;
        struct net_device *dev;
        struct inet6_dev *idev;
        struct ifa6_config cfg;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
                                     ifa_ipv6_policy, extack);
        if (err < 0)
                return err;

        memset(&cfg, 0, sizeof(cfg));

        ifm = nlmsg_data(nlh);
        cfg.pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
        if (!cfg.pfx)
                return -EINVAL;

        cfg.peer_pfx = peer_pfx;
        cfg.plen = ifm->ifa_prefixlen;
        if (tb[IFA_RT_PRIORITY])
                cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);

        if (tb[IFA_PROTO])
                cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);

        cfg.valid_lft = INFINITY_LIFE_TIME;
        cfg.preferred_lft = INFINITY_LIFE_TIME;

        if (tb[IFA_CACHEINFO]) {
                struct ifa_cacheinfo *ci;

                ci = nla_data(tb[IFA_CACHEINFO]);
                cfg.valid_lft = ci->ifa_valid;
                cfg.preferred_lft = ci->ifa_prefered;
        }

        dev =  __dev_get_by_index(net, ifm->ifa_index);
        if (!dev) {
                NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
                return -ENODEV;
        }

        if (tb[IFA_FLAGS])
                cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
        else
                cfg.ifa_flags = ifm->ifa_flags;

        /* We ignore other flags so far. */
        cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
                         IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
                         IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;

        idev = ipv6_find_idev(dev);
        if (IS_ERR(idev))
                return PTR_ERR(idev);

        if (!ipv6_allow_optimistic_dad(net, idev))
                cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;

        if (cfg.ifa_flags & IFA_F_NODAD &&
            cfg.ifa_flags & IFA_F_OPTIMISTIC) {
                NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
                return -EINVAL;
        }

        ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
        if (!ifa) {
                /*
                 * It would be best to check for !NLM_F_CREATE here but
                 * userspace already relies on not having to provide this.
                 */
                return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
        }

        if (nlh->nlmsg_flags & NLM_F_EXCL ||
            !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
                NL_SET_ERR_MSG_MOD(extack, "address already assigned");
                err = -EEXIST;
        } else {
                err = inet6_addr_modify(net, ifa, &cfg);
        }

        in6_ifa_put(ifa);

        return err;
}

static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags,
                          u8 scope, int ifindex)
{
        struct ifaddrmsg *ifm;

        ifm = nlmsg_data(nlh);
        ifm->ifa_family = AF_INET6;
        ifm->ifa_prefixlen = prefixlen;
        ifm->ifa_flags = flags;
        ifm->ifa_scope = scope;
        ifm->ifa_index = ifindex;
}

static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
                         unsigned long tstamp, u32 preferred, u32 valid)
{
        struct ifa_cacheinfo ci;

        ci.cstamp = cstamp_delta(cstamp);
        ci.tstamp = cstamp_delta(tstamp);
        ci.ifa_prefered = preferred;
        ci.ifa_valid = valid;

        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}

static inline int rt_scope(int ifa_scope)
{
        if (ifa_scope & IFA_HOST)
                return RT_SCOPE_HOST;
        else if (ifa_scope & IFA_LINK)
                return RT_SCOPE_LINK;
        else if (ifa_scope & IFA_SITE)
                return RT_SCOPE_SITE;
        else
                return RT_SCOPE_UNIVERSE;
}

static inline int inet6_ifaddr_msgsize(void)
{
        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
               + nla_total_size(16) /* IFA_LOCAL */
               + nla_total_size(16) /* IFA_ADDRESS */
               + nla_total_size(sizeof(struct ifa_cacheinfo))
               + nla_total_size(4)  /* IFA_FLAGS */
               + nla_total_size(1)  /* IFA_PROTO */
               + nla_total_size(4)  /* IFA_RT_PRIORITY */;
}

enum addr_type_t {
        UNICAST_ADDR,
        MULTICAST_ADDR,
        ANYCAST_ADDR,
};

struct inet6_fill_args {
        u32 portid;
        u32 seq;
        int event;
        unsigned int flags;
        int netnsid;
        int ifindex;
        enum addr_type_t type;
};

static int inet6_fill_ifaddr(struct sk_buff *skb,
                             const struct inet6_ifaddr *ifa,
                             struct inet6_fill_args *args)
{
        struct nlmsghdr *nlh;
        u32 preferred, valid;
        u32 flags, priority;
        u8 proto;

        nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
                        sizeof(struct ifaddrmsg), args->flags);
        if (!nlh)
                return -EMSGSIZE;

        flags = READ_ONCE(ifa->flags);
        put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
                      ifa->idev->dev->ifindex);

        if (args->netnsid >= 0 &&
            nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
                goto error;

        preferred = READ_ONCE(ifa->prefered_lft);
        valid = READ_ONCE(ifa->valid_lft);

        if (!((flags & IFA_F_PERMANENT) &&
              (preferred == INFINITY_LIFE_TIME))) {
                if (preferred != INFINITY_LIFE_TIME) {
                        long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ;

                        if (preferred > tval)
                                preferred -= tval;
                        else
                                preferred = 0;
                        if (valid != INFINITY_LIFE_TIME) {
                                if (valid > tval)
                                        valid -= tval;
                                else
                                        valid = 0;
                        }
                }
        } else {
                preferred = INFINITY_LIFE_TIME;
                valid = INFINITY_LIFE_TIME;
        }

        if (!ipv6_addr_any(&ifa->peer_addr)) {
                if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
                    nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
                        goto error;
        } else {
                if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
                        goto error;
        }

        priority = READ_ONCE(ifa->rt_priority);
        if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority))
                goto error;

        if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp),
                          preferred, valid) < 0)
                goto error;

        if (nla_put_u32(skb, IFA_FLAGS, flags) < 0)
                goto error;

        proto = READ_ONCE(ifa->ifa_proto);
        if (proto && nla_put_u8(skb, IFA_PROTO, proto))
                goto error;

        nlmsg_end(skb, nlh);
        return 0;

error:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int inet6_fill_ifmcaddr(struct sk_buff *skb,
                               const struct ifmcaddr6 *ifmca,
                               struct inet6_fill_args *args)
{
        int ifindex = ifmca->idev->dev->ifindex;
        u8 scope = RT_SCOPE_UNIVERSE;
        struct nlmsghdr *nlh;

        if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
                scope = RT_SCOPE_SITE;

        nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
                        sizeof(struct ifaddrmsg), args->flags);
        if (!nlh)
                return -EMSGSIZE;

        if (args->netnsid >= 0 &&
            nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
                nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
        }

        put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
        if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
            put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp),
                          INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
                nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
        }

        nlmsg_end(skb, nlh);
        return 0;
}

static int inet6_fill_ifacaddr(struct sk_buff *skb,
                               const struct ifacaddr6 *ifaca,
                               struct inet6_fill_args *args)
{
        struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
        int ifindex = dev ? dev->ifindex : 1;
        u8 scope = RT_SCOPE_UNIVERSE;
        struct nlmsghdr *nlh;

        if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
                scope = RT_SCOPE_SITE;

        nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
                        sizeof(struct ifaddrmsg), args->flags);
        if (!nlh)
                return -EMSGSIZE;

        if (args->netnsid >= 0 &&
            nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
                nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
        }

        put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
        if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
            put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp),
                          INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
                nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
        }

        nlmsg_end(skb, nlh);
        return 0;
}

/* called with rcu_read_lock() */
static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
                          struct netlink_callback *cb, int *s_ip_idx,
                          struct inet6_fill_args *fillargs)
{
        const struct ifmcaddr6 *ifmca;
        const struct ifacaddr6 *ifaca;
        int ip_idx = 0;
        int err = 0;

        switch (fillargs->type) {
        case UNICAST_ADDR: {
                const struct inet6_ifaddr *ifa;
                fillargs->event = RTM_NEWADDR;

                /* unicast address incl. temp addr */
                list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
                        if (ip_idx < *s_ip_idx)
                                goto next;
                        err = inet6_fill_ifaddr(skb, ifa, fillargs);
                        if (err < 0)
                                break;
                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
next:
                        ip_idx++;
                }
                break;
        }
        case MULTICAST_ADDR:
                fillargs->event = RTM_GETMULTICAST;

                /* multicast address */
                for (ifmca = rcu_dereference(idev->mc_list);
                     ifmca;
                     ifmca = rcu_dereference(ifmca->next), ip_idx++) {
                        if (ip_idx < *s_ip_idx)
                                continue;
                        err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
                        if (err < 0)
                                break;
                }
                break;
        case ANYCAST_ADDR:
                fillargs->event = RTM_GETANYCAST;
                /* anycast address */
                for (ifaca = rcu_dereference(idev->ac_list); ifaca;
                     ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) {
                        if (ip_idx < *s_ip_idx)
                                continue;
                        err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
                        if (err < 0)
                                break;
                }
                break;
        default:
                break;
        }
        *s_ip_idx = err ? ip_idx : 0;
        return err;
}

static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
                                       struct inet6_fill_args *fillargs,
                                       struct net **tgt_net, struct sock *sk,
                                       struct netlink_callback *cb)
{
        struct netlink_ext_ack *extack = cb->extack;
        struct nlattr *tb[IFA_MAX+1];
        struct ifaddrmsg *ifm;
        int err, i;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
                return -EINVAL;
        }

        ifm = nlmsg_data(nlh);
        if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
                return -EINVAL;
        }

        fillargs->ifindex = ifm->ifa_index;
        if (fillargs->ifindex) {
                cb->answer_flags |= NLM_F_DUMP_FILTERED;
                fillargs->flags |= NLM_F_DUMP_FILTERED;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
                                            ifa_ipv6_policy, extack);
        if (err < 0)
                return err;

        for (i = 0; i <= IFA_MAX; ++i) {
                if (!tb[i])
                        continue;

                if (i == IFA_TARGET_NETNSID) {
                        struct net *net;

                        fillargs->netnsid = nla_get_s32(tb[i]);
                        net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
                        if (IS_ERR(net)) {
                                fillargs->netnsid = -1;
                                NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
                                return PTR_ERR(net);
                        }
                        *tgt_net = net;
                } else {
                        NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
                           enum addr_type_t type)
{
        struct net *tgt_net = sock_net(skb->sk);
        const struct nlmsghdr *nlh = cb->nlh;
        struct inet6_fill_args fillargs = {
                .portid = NETLINK_CB(cb->skb).portid,
                .seq = cb->nlh->nlmsg_seq,
                .flags = NLM_F_MULTI,
                .netnsid = -1,
                .type = type,
        };
        struct {
                unsigned long ifindex;
                int ip_idx;
        } *ctx = (void *)cb->ctx;
        struct net_device *dev;
        struct inet6_dev *idev;
        int err = 0;

        rcu_read_lock();
        if (cb->strict_check) {
                err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
                                                  skb->sk, cb);
                if (err < 0)
                        goto done;

                err = 0;
                if (fillargs.ifindex) {
                        dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
                        if (!dev) {
                                err = -ENODEV;
                                goto done;
                        }
                        idev = __in6_dev_get(dev);
                        if (idev)
                                err = in6_dump_addrs(idev, skb, cb,
                                                     &ctx->ip_idx,
                                                     &fillargs);
                        goto done;
                }
        }

        cb->seq = inet6_base_seq(tgt_net);
        for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
                idev = __in6_dev_get(dev);
                if (!idev)
                        continue;
                err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx,
                                     &fillargs);
                if (err < 0)
                        goto done;
        }
done:
        rcu_read_unlock();
        if (fillargs.netnsid >= 0)
                put_net(tgt_net);

        return err;
}

static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
        enum addr_type_t type = UNICAST_ADDR;

        return inet6_dump_addr(skb, cb, type);
}

static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
        enum addr_type_t type = MULTICAST_ADDR;

        return inet6_dump_addr(skb, cb, type);
}


static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
        enum addr_type_t type = ANYCAST_ADDR;

        return inet6_dump_addr(skb, cb, type);
}

static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
                                       const struct nlmsghdr *nlh,
                                       struct nlattr **tb,
                                       struct netlink_ext_ack *extack)
{
        struct ifaddrmsg *ifm;
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
                                              ifa_ipv6_policy, extack);

        ifm = nlmsg_data(nlh);
        if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
                                            ifa_ipv6_policy, extack);
        if (err)
                return err;

        for (i = 0; i <= IFA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case IFA_TARGET_NETNSID:
                case IFA_ADDRESS:
                case IFA_LOCAL:
                        break;
                default:
                        NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *tgt_net = sock_net(in_skb->sk);
        struct inet6_fill_args fillargs = {
                .portid = NETLINK_CB(in_skb).portid,
                .seq = nlh->nlmsg_seq,
                .event = RTM_NEWADDR,
                .flags = 0,
                .netnsid = -1,
        };
        struct ifaddrmsg *ifm;
        struct nlattr *tb[IFA_MAX+1];
        struct in6_addr *addr = NULL, *peer;
        struct net_device *dev = NULL;
        struct inet6_ifaddr *ifa;
        struct sk_buff *skb;
        int err;

        err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack);
        if (err < 0)
                return err;

        if (tb[IFA_TARGET_NETNSID]) {
                fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]);

                tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(in_skb).sk,
                                                  fillargs.netnsid);
                if (IS_ERR(tgt_net))
                        return PTR_ERR(tgt_net);
        }

        addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
        if (!addr) {
                err = -EINVAL;
                goto errout;
        }
        ifm = nlmsg_data(nlh);
        if (ifm->ifa_index)
                dev = dev_get_by_index(tgt_net, ifm->ifa_index);

        ifa = ipv6_get_ifaddr(tgt_net, addr, dev, 1);
        if (!ifa) {
                err = -EADDRNOTAVAIL;
                goto errout;
        }

        skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
        if (!skb) {
                err = -ENOBUFS;
                goto errout_ifa;
        }

        err = inet6_fill_ifaddr(skb, ifa, &fillargs);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout_ifa;
        }
        err = rtnl_unicast(skb, tgt_net, NETLINK_CB(in_skb).portid);
errout_ifa:
        in6_ifa_put(ifa);
errout:
        dev_put(dev);
        if (fillargs.netnsid >= 0)
                put_net(tgt_net);

        return err;
}

static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
{
        struct sk_buff *skb;
        struct net *net = dev_net(ifa->idev->dev);
        struct inet6_fill_args fillargs = {
                .portid = 0,
                .seq = 0,
                .event = event,
                .flags = 0,
                .netnsid = -1,
        };
        int err = -ENOBUFS;

        skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
        if (!skb)
                goto errout;

        err = inet6_fill_ifaddr(skb, ifa, &fillargs);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}

static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
                               __s32 *array, int bytes)
{
        BUG_ON(bytes < (DEVCONF_MAX * 4));

        memset(array, 0, bytes);
        array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
        array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
        array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
        array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
        array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
        array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
        array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
        array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
        array[DEVCONF_RTR_SOLICIT_INTERVAL] =
                jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
        array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
                jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
        array[DEVCONF_RTR_SOLICIT_DELAY] =
                jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
        array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
        array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
                jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
        array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
                jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
        array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
        array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
        array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
        array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
        array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
        array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
        array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
        array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
        array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
                READ_ONCE(cnf->accept_ra_min_hop_limit);
        array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
        array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
        array[DEVCONF_RTR_PROBE_INTERVAL] =
                jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
        array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
                READ_ONCE(cnf->accept_ra_rt_info_min_plen);
        array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
                READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
        array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
        array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
                READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
        array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
        array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
        array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
        array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
        array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
        array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
        array[DEVCONF_SUPPRESS_FRAG_NDISC] =
                READ_ONCE(cnf->suppress_frag_ndisc);
        array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
                READ_ONCE(cnf->accept_ra_from_local);
        array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
        array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
                READ_ONCE(cnf->ignore_routes_with_linkdown);
        /* we omit DEVCONF_STABLE_SECRET for now */
        array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
        array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
                READ_ONCE(cnf->drop_unicast_in_l2_multicast);
        array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
        array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
        array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
        array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
        array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
        array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
        array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
        array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
        array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
        array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
        array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
        array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
        array[DEVCONF_NDISC_EVICT_NOCARRIER] =
                READ_ONCE(cnf->ndisc_evict_nocarrier);
        array[DEVCONF_ACCEPT_UNTRACKED_NA] =
                READ_ONCE(cnf->accept_untracked_na);
        array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}

static inline size_t inet6_ifla6_size(void)
{
        return nla_total_size(4) /* IFLA_INET6_FLAGS */
             + nla_total_size(sizeof(struct ifla_cacheinfo))
             + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
             + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
             + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
             + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
             + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
             + nla_total_size(4) /* IFLA_INET6_RA_MTU */
             + 0;
}

static inline size_t inet6_if_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct ifinfomsg))
               + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
               + nla_total_size(4) /* IFLA_MTU */
               + nla_total_size(4) /* IFLA_LINK */
               + nla_total_size(1) /* IFLA_OPERSTATE */
               + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}

static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
                                        int bytes)
{
        int i;
        int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
        BUG_ON(pad < 0);

        /* Use put_unaligned() because stats may not be aligned for u64. */
        put_unaligned(ICMP6_MIB_MAX, &stats[0]);
        for (i = 1; i < ICMP6_MIB_MAX; i++)
                put_unaligned(atomic_long_read(&mib[i]), &stats[i]);

        memset(&stats[ICMP6_MIB_MAX], 0, pad);
}

static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
                                        int bytes, size_t syncpoff)
{
        int i, c;
        u64 buff[IPSTATS_MIB_MAX];
        int pad = bytes - sizeof(u64) * IPSTATS_MIB_MAX;

        BUG_ON(pad < 0);

        memset(buff, 0, sizeof(buff));
        buff[0] = IPSTATS_MIB_MAX;

        for_each_possible_cpu(c) {
                for (i = 1; i < IPSTATS_MIB_MAX; i++)
                        buff[i] += snmp_get_cpu_field64(mib, c, i, syncpoff);
        }

        memcpy(stats, buff, IPSTATS_MIB_MAX * sizeof(u64));
        memset(&stats[IPSTATS_MIB_MAX], 0, pad);
}

static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
                             int bytes)
{
        switch (attrtype) {
        case IFLA_INET6_STATS:
                __snmp6_fill_stats64(stats, idev->stats.ipv6, bytes,
                                     offsetof(struct ipstats_mib, syncp));
                break;
        case IFLA_INET6_ICMP6STATS:
                __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
                break;
        }
}

static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
                                  u32 ext_filter_mask)
{
        struct ifla_cacheinfo ci;
        struct nlattr *nla;
        u32 ra_mtu;

        if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
                goto nla_put_failure;
        ci.max_reasm_len = IPV6_MAXPLEN;
        ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
        ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
        ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
        if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
                goto nla_put_failure;
        nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
        if (!nla)
                goto nla_put_failure;
        ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));

        /* XXX - MC not implemented */

        if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
                return 0;

        nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
        if (!nla)
                goto nla_put_failure;
        snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));

        nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
        if (!nla)
                goto nla_put_failure;
        snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));

        nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
        if (!nla)
                goto nla_put_failure;
        read_lock_bh(&idev->lock);
        memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
        read_unlock_bh(&idev->lock);

        if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
                       READ_ONCE(idev->cnf.addr_gen_mode)))
                goto nla_put_failure;

        ra_mtu = READ_ONCE(idev->ra_mtu);
        if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
                goto nla_put_failure;

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}

static size_t inet6_get_link_af_size(const struct net_device *dev,
                                     u32 ext_filter_mask)
{
        if (!__in6_dev_get(dev))
                return 0;

        return inet6_ifla6_size();
}

static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
                              u32 ext_filter_mask)
{
        struct inet6_dev *idev = __in6_dev_get(dev);

        if (!idev)
                return -ENODATA;

        if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0)
                return -EMSGSIZE;

        return 0;
}

static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
                             struct netlink_ext_ack *extack)
{
        struct inet6_ifaddr *ifp;
        struct net_device *dev = idev->dev;
        bool clear_token, update_rs = false;
        struct in6_addr ll_addr;

        ASSERT_RTNL();

        if (!token)
                return -EINVAL;

        if (dev->flags & IFF_LOOPBACK) {
                NL_SET_ERR_MSG_MOD(extack, "Device is loopback");
                return -EINVAL;
        }

        if (dev->flags & IFF_NOARP) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Device does not do neighbour discovery");
                return -EINVAL;
        }

        if (!ipv6_accept_ra(idev)) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Router advertisement is disabled on device");
                return -EINVAL;
        }

        if (READ_ONCE(idev->cnf.rtr_solicits) == 0) {
                NL_SET_ERR_MSG(extack,
                               "Router solicitation is disabled on device");
                return -EINVAL;
        }

        write_lock_bh(&idev->lock);

        BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
        memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);

        write_unlock_bh(&idev->lock);

        clear_token = ipv6_addr_any(token);
        if (clear_token)
                goto update_lft;

        if (!idev->dead && (idev->if_flags & IF_READY) &&
            !ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
                             IFA_F_OPTIMISTIC)) {
                /* If we're not ready, then normal ifup will take care
                 * of this. Otherwise, we need to request our rs here.
                 */
                ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
                update_rs = true;
        }

update_lft:
        write_lock_bh(&idev->lock);

        if (update_rs) {
                idev->if_flags |= IF_RS_SENT;
                idev->rs_interval = rfc3315_s14_backoff_init(
                        READ_ONCE(idev->cnf.rtr_solicit_interval));
                idev->rs_probes = 1;
                addrconf_mod_rs_timer(idev, idev->rs_interval);
        }

        /* Well, that's kinda nasty ... */
        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                spin_lock(&ifp->lock);
                if (ifp->tokenized) {
                        ifp->valid_lft = 0;
                        ifp->prefered_lft = 0;
                }
                spin_unlock(&ifp->lock);
        }

        write_unlock_bh(&idev->lock);
        inet6_ifinfo_notify(RTM_NEWLINK, idev);
        addrconf_verify_rtnl(dev_net(dev));
        return 0;
}

static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
        [IFLA_INET6_ADDR_GEN_MODE]        = { .type = NLA_U8 },
        [IFLA_INET6_TOKEN]                = { .len = sizeof(struct in6_addr) },
        [IFLA_INET6_RA_MTU]                = { .type = NLA_REJECT,
                                            .reject_message =
                                                "IFLA_INET6_RA_MTU can not be set" },
};

static int check_addr_gen_mode(int mode)
{
        if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
            mode != IN6_ADDR_GEN_MODE_NONE &&
            mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
            mode != IN6_ADDR_GEN_MODE_RANDOM)
                return -EINVAL;
        return 1;
}

static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
                                int mode)
{
        if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
            !idev->cnf.stable_secret.initialized &&
            !net->ipv6.devconf_dflt->stable_secret.initialized)
                return -EINVAL;
        return 1;
}

static int inet6_validate_link_af(const struct net_device *dev,
                                  const struct nlattr *nla,
                                  struct netlink_ext_ack *extack)
{
        struct nlattr *tb[IFLA_INET6_MAX + 1];
        struct inet6_dev *idev = NULL;
        int err;

        if (dev) {
                idev = __in6_dev_get(dev);
                if (!idev)
                        return -EAFNOSUPPORT;
        }

        err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
                                          inet6_af_policy, extack);
        if (err)
                return err;

        if (!tb[IFLA_INET6_TOKEN] && !tb[IFLA_INET6_ADDR_GEN_MODE])
                return -EINVAL;

        if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
                u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);

                if (check_addr_gen_mode(mode) < 0)
                        return -EINVAL;
                if (dev && check_stable_privacy(idev, dev_net(dev), mode) < 0)
                        return -EINVAL;
        }

        return 0;
}

static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
                             struct netlink_ext_ack *extack)
{
        struct inet6_dev *idev = __in6_dev_get(dev);
        struct nlattr *tb[IFLA_INET6_MAX + 1];
        int err;

        if (!idev)
                return -EAFNOSUPPORT;

        if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
                return -EINVAL;

        if (tb[IFLA_INET6_TOKEN]) {
                err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
                                        extack);
                if (err)
                        return err;
        }

        if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
                u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);

                WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
        }

        return 0;
}

static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
                             u32 portid, u32 seq, int event, unsigned int flags)
{
        struct net_device *dev = idev->dev;
        struct ifinfomsg *hdr;
        struct nlmsghdr *nlh;
        int ifindex, iflink;
        void *protoinfo;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
        if (!nlh)
                return -EMSGSIZE;

        hdr = nlmsg_data(nlh);
        hdr->ifi_family = AF_INET6;
        hdr->__ifi_pad = 0;
        hdr->ifi_type = dev->type;
        ifindex = READ_ONCE(dev->ifindex);
        hdr->ifi_index = ifindex;
        hdr->ifi_flags = dev_get_flags(dev);
        hdr->ifi_change = 0;

        iflink = dev_get_iflink(dev);
        if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
            (dev->addr_len &&
             nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
            nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
            (ifindex != iflink &&
             nla_put_u32(skb, IFLA_LINK, iflink)) ||
            nla_put_u8(skb, IFLA_OPERSTATE,
                       netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
                goto nla_put_failure;
        protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
        if (!protoinfo)
                goto nla_put_failure;

        if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0)
                goto nla_put_failure;

        nla_nest_end(skb, protoinfo);
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
                                   struct netlink_ext_ack *extack)
{
        struct ifinfomsg *ifm;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
                return -EINVAL;
        }

        if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid data after header");
                return -EINVAL;
        }

        ifm = nlmsg_data(nlh);
        if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
            ifm->ifi_change || ifm->ifi_index) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
                return -EINVAL;
        }

        return 0;
}

static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct net *net = sock_net(skb->sk);
        struct {
                unsigned long ifindex;
        } *ctx = (void *)cb->ctx;
        struct net_device *dev;
        struct inet6_dev *idev;
        int err;

        /* only requests using strict checking can pass data to
         * influence the dump
         */
        if (cb->strict_check) {
                err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);

                if (err < 0)
                        return err;
        }

        err = 0;
        rcu_read_lock();
        for_each_netdev_dump(net, dev, ctx->ifindex) {
                idev = __in6_dev_get(dev);
                if (!idev)
                        continue;
                err = inet6_fill_ifinfo(skb, idev,
                                        NETLINK_CB(cb->skb).portid,
                                        cb->nlh->nlmsg_seq,
                                        RTM_NEWLINK, NLM_F_MULTI);
                if (err < 0)
                        break;
        }
        rcu_read_unlock();

        return err;
}

void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
{
        struct sk_buff *skb;
        struct net *net = dev_net(idev->dev);
        int err = -ENOBUFS;

        skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
        if (!skb)
                goto errout;

        err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}

static inline size_t inet6_prefix_nlmsg_size(void)
{
        return NLMSG_ALIGN(sizeof(struct prefixmsg))
               + nla_total_size(sizeof(struct in6_addr))
               + nla_total_size(sizeof(struct prefix_cacheinfo));
}

static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
                             struct prefix_info *pinfo, u32 portid, u32 seq,
                             int event, unsigned int flags)
{
        struct prefixmsg *pmsg;
        struct nlmsghdr *nlh;
        struct prefix_cacheinfo        ci;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
        if (!nlh)
                return -EMSGSIZE;

        pmsg = nlmsg_data(nlh);
        pmsg->prefix_family = AF_INET6;
        pmsg->prefix_pad1 = 0;
        pmsg->prefix_pad2 = 0;
        pmsg->prefix_ifindex = idev->dev->ifindex;
        pmsg->prefix_len = pinfo->prefix_len;
        pmsg->prefix_type = pinfo->type;
        pmsg->prefix_pad3 = 0;
        pmsg->prefix_flags = pinfo->flags;

        if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix))
                goto nla_put_failure;
        ci.preferred_time = ntohl(pinfo->prefered);
        ci.valid_time = ntohl(pinfo->valid);
        if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci))
                goto nla_put_failure;
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static void inet6_prefix_notify(int event, struct inet6_dev *idev,
                         struct prefix_info *pinfo)
{
        struct sk_buff *skb;
        struct net *net = dev_net(idev->dev);
        int err = -ENOBUFS;

        skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
        if (!skb)
                goto errout;

        err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}

static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
        struct net *net = dev_net(ifp->idev->dev);

        if (event)
                ASSERT_RTNL();

        inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);

        switch (event) {
        case RTM_NEWADDR:
                /*
                 * If the address was optimistic we inserted the route at the
                 * start of our DAD process, so we don't need to do it again.
                 * If the device was taken down in the middle of the DAD
                 * cycle there is a race where we could get here without a
                 * host route, so nothing to insert. That will be fixed when
                 * the device is brought up.
                 */
                if (ifp->rt && !rcu_access_pointer(ifp->rt->fib6_node)) {
                        ip6_ins_rt(net, ifp->rt);
                } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) {
                        pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n",
                                &ifp->addr, ifp->idev->dev->name);
                }

                if (ifp->idev->cnf.forwarding)
                        addrconf_join_anycast(ifp);
                if (!ipv6_addr_any(&ifp->peer_addr))
                        addrconf_prefix_route(&ifp->peer_addr, 128,
                                              ifp->rt_priority, ifp->idev->dev,
                                              0, 0, GFP_ATOMIC);
                break;
        case RTM_DELADDR:
                if (ifp->idev->cnf.forwarding)
                        addrconf_leave_anycast(ifp);
                addrconf_leave_solict(ifp->idev, &ifp->addr);
                if (!ipv6_addr_any(&ifp->peer_addr)) {
                        struct fib6_info *rt;

                        rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
                                                       ifp->idev->dev, 0, 0,
                                                       false);
                        if (rt)
                                ip6_del_rt(net, rt, false);
                }
                if (ifp->rt) {
                        ip6_del_rt(net, ifp->rt, false);
                        ifp->rt = NULL;
                }
                rt_genid_bump_ipv6(net);
                break;
        }
        atomic_inc(&net->ipv6.dev_addr_genid);
}

static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
        if (likely(ifp->idev->dead == 0))
                __ipv6_ifa_notify(event, ifp);
}

#ifdef CONFIG_SYSCTL

static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
        struct ctl_table lctl;
        int ret;

        /*
         * ctl->data points to idev->cnf.forwarding, we should
         * not modify it until we get the rtnl lock.
         */
        lctl = *ctl;
        lctl.data = &val;

        ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);

        if (write)
                ret = addrconf_fixup_forwarding(ctl, valp, val);
        if (ret)
                *ppos = pos;
        return ret;
}

static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        struct inet6_dev *idev = ctl->extra1;
        int min_mtu = IPV6_MIN_MTU;
        struct ctl_table lctl;

        lctl = *ctl;
        lctl.extra1 = &min_mtu;
        lctl.extra2 = idev ? &idev->dev->mtu : NULL;

        return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
}

static void dev_disable_change(struct inet6_dev *idev)
{
        struct netdev_notifier_info info;

        if (!idev || !idev->dev)
                return;

        netdev_notifier_info_init(&info, idev->dev);
        if (idev->cnf.disable_ipv6)
                addrconf_notify(NULL, NETDEV_DOWN, &info);
        else
                addrconf_notify(NULL, NETDEV_UP, &info);
}

static void addrconf_disable_change(struct net *net, __s32 newf)
{
        struct net_device *dev;
        struct inet6_dev *idev;

        for_each_netdev(net, dev) {
                idev = __in6_dev_get(dev);
                if (idev) {
                        int changed = (!idev->cnf.disable_ipv6) ^ (!newf);

                        WRITE_ONCE(idev->cnf.disable_ipv6, newf);
                        if (changed)
                                dev_disable_change(idev);
                }
        }
}

static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
{
        struct net *net = (struct net *)table->extra2;
        int old;

        if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
                WRITE_ONCE(*p, newf);
                return 0;
        }

        if (!rtnl_trylock())
                return restart_syscall();

        old = *p;
        WRITE_ONCE(*p, newf);

        if (p == &net->ipv6.devconf_all->disable_ipv6) {
                WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
                addrconf_disable_change(net, newf);
        } else if ((!newf) ^ (!old))
                dev_disable_change((struct inet6_dev *)table->extra1);

        rtnl_unlock();
        return 0;
}

static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
        struct ctl_table lctl;
        int ret;

        /*
         * ctl->data points to idev->cnf.disable_ipv6, we should
         * not modify it until we get the rtnl lock.
         */
        lctl = *ctl;
        lctl.data = &val;

        ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);

        if (write)
                ret = addrconf_disable_ipv6(ctl, valp, val);
        if (ret)
                *ppos = pos;
        return ret;
}

static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int ret;
        int old, new;

        old = *valp;
        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
        new = *valp;

        if (write && old != new) {
                struct net *net = ctl->extra2;

                if (!rtnl_trylock())
                        return restart_syscall();

                if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_PROXY_NEIGH,
                                                     NETCONFA_IFINDEX_DEFAULT,
                                                     net->ipv6.devconf_dflt);
                else if (valp == &net->ipv6.devconf_all->proxy_ndp)
                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_PROXY_NEIGH,
                                                     NETCONFA_IFINDEX_ALL,
                                                     net->ipv6.devconf_all);
                else {
                        struct inet6_dev *idev = ctl->extra1;

                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
                                                     NETCONFA_PROXY_NEIGH,
                                                     idev->dev->ifindex,
                                                     &idev->cnf);
                }
                rtnl_unlock();
        }

        return ret;
}

static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
                                         void *buffer, size_t *lenp,
                                         loff_t *ppos)
{
        int ret = 0;
        u32 new_val;
        struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
        struct net *net = (struct net *)ctl->extra2;
        struct ctl_table tmp = {
                .data = &new_val,
                .maxlen = sizeof(new_val),
                .mode = ctl->mode,
        };

        if (!rtnl_trylock())
                return restart_syscall();

        new_val = *((u32 *)ctl->data);

        ret = proc_douintvec(&tmp, write, buffer, lenp, ppos);
        if (ret != 0)
                goto out;

        if (write) {
                if (check_addr_gen_mode(new_val) < 0) {
                        ret = -EINVAL;
                        goto out;
                }

                if (idev) {
                        if (check_stable_privacy(idev, net, new_val) < 0) {
                                ret = -EINVAL;
                                goto out;
                        }

                        if (idev->cnf.addr_gen_mode != new_val) {
                                WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
                                addrconf_init_auto_addrs(idev->dev);
                        }
                } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
                        struct net_device *dev;

                        WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
                        for_each_netdev(net, dev) {
                                idev = __in6_dev_get(dev);
                                if (idev &&
                                    idev->cnf.addr_gen_mode != new_val) {
                                        WRITE_ONCE(idev->cnf.addr_gen_mode,
                                                  new_val);
                                        addrconf_init_auto_addrs(idev->dev);
                                }
                        }
                }

                WRITE_ONCE(*((u32 *)ctl->data), new_val);
        }

out:
        rtnl_unlock();

        return ret;
}

static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
                                         void *buffer, size_t *lenp,
                                         loff_t *ppos)
{
        int err;
        struct in6_addr addr;
        char str[IPV6_MAX_STRLEN];
        struct ctl_table lctl = *ctl;
        struct net *net = ctl->extra2;
        struct ipv6_stable_secret *secret = ctl->data;

        if (&net->ipv6.devconf_all->stable_secret == ctl->data)
                return -EIO;

        lctl.maxlen = IPV6_MAX_STRLEN;
        lctl.data = str;

        if (!rtnl_trylock())
                return restart_syscall();

        if (!write && !secret->initialized) {
                err = -EIO;
                goto out;
        }

        err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
        if (err >= sizeof(str)) {
                err = -EIO;
                goto out;
        }

        err = proc_dostring(&lctl, write, buffer, lenp, ppos);
        if (err || !write)
                goto out;

        if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) {
                err = -EIO;
                goto out;
        }

        secret->initialized = true;
        secret->secret = addr;

        if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) {
                struct net_device *dev;

                for_each_netdev(net, dev) {
                        struct inet6_dev *idev = __in6_dev_get(dev);

                        if (idev) {
                                WRITE_ONCE(idev->cnf.addr_gen_mode,
                                           IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
                        }
                }
        } else {
                struct inet6_dev *idev = ctl->extra1;

                WRITE_ONCE(idev->cnf.addr_gen_mode,
                           IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
        }

out:
        rtnl_unlock();

        return err;
}

static
int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
                                                int write, void *buffer,
                                                size_t *lenp,
                                                loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
        struct ctl_table lctl;
        int ret;

        /* ctl->data points to idev->cnf.ignore_routes_when_linkdown
         * we should not modify it until we get the rtnl lock.
         */
        lctl = *ctl;
        lctl.data = &val;

        ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);

        if (write)
                ret = addrconf_fixup_linkdown(ctl, valp, val);
        if (ret)
                *ppos = pos;
        return ret;
}

static
void addrconf_set_nopolicy(struct rt6_info *rt, int action)
{
        if (rt) {
                if (action)
                        rt->dst.flags |= DST_NOPOLICY;
                else
                        rt->dst.flags &= ~DST_NOPOLICY;
        }
}

static
void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
{
        struct inet6_ifaddr *ifa;

        read_lock_bh(&idev->lock);
        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                spin_lock(&ifa->lock);
                if (ifa->rt) {
                        /* host routes only use builtin fib6_nh */
                        struct fib6_nh *nh = ifa->rt->fib6_nh;
                        int cpu;

                        rcu_read_lock();
                        ifa->rt->dst_nopolicy = val ? true : false;
                        if (nh->rt6i_pcpu) {
                                for_each_possible_cpu(cpu) {
                                        struct rt6_info **rtp;

                                        rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
                                        addrconf_set_nopolicy(*rtp, val);
                                }
                        }
                        rcu_read_unlock();
                }
                spin_unlock(&ifa->lock);
        }
        read_unlock_bh(&idev->lock);
}

static
int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
{
        struct net *net = (struct net *)ctl->extra2;
        struct inet6_dev *idev;

        if (valp == &net->ipv6.devconf_dflt->disable_policy) {
                WRITE_ONCE(*valp, val);
                return 0;
        }

        if (!rtnl_trylock())
                return restart_syscall();

        WRITE_ONCE(*valp, val);

        if (valp == &net->ipv6.devconf_all->disable_policy)  {
                struct net_device *dev;

                for_each_netdev(net, dev) {
                        idev = __in6_dev_get(dev);
                        if (idev)
                                addrconf_disable_policy_idev(idev, val);
                }
        } else {
                idev = (struct inet6_dev *)ctl->extra1;
                addrconf_disable_policy_idev(idev, val);
        }

        rtnl_unlock();
        return 0;
}

static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
                                   void *buffer, size_t *lenp, loff_t *ppos)
{
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
        struct ctl_table lctl;
        int ret;

        lctl = *ctl;
        lctl.data = &val;
        ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);

        if (write && (*valp != val))
                ret = addrconf_disable_policy(ctl, valp, val);

        if (ret)
                *ppos = pos;

        return ret;
}

static int minus_one = -1;
static const int two_five_five = 255;
static u32 ioam6_if_id_max = U16_MAX;

static const struct ctl_table addrconf_sysctl[] = {
        {
                .procname        = "forwarding",
                .data                = &ipv6_devconf.forwarding,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_forward,
        },
        {
                .procname        = "hop_limit",
                .data                = &ipv6_devconf.hop_limit,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = (void *)SYSCTL_ONE,
                .extra2                = (void *)&two_five_five,
        },
        {
                .procname        = "mtu",
                .data                = &ipv6_devconf.mtu6,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_mtu,
        },
        {
                .procname        = "accept_ra",
                .data                = &ipv6_devconf.accept_ra,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_redirects",
                .data                = &ipv6_devconf.accept_redirects,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "autoconf",
                .data                = &ipv6_devconf.autoconf,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "dad_transmits",
                .data                = &ipv6_devconf.dad_transmits,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "router_solicitations",
                .data                = &ipv6_devconf.rtr_solicits,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = &minus_one,
        },
        {
                .procname        = "router_solicitation_interval",
                .data                = &ipv6_devconf.rtr_solicit_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "router_solicitation_max_interval",
                .data                = &ipv6_devconf.rtr_solicit_max_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "router_solicitation_delay",
                .data                = &ipv6_devconf.rtr_solicit_delay,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {
                .procname        = "force_mld_version",
                .data                = &ipv6_devconf.force_mld_version,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "mldv1_unsolicited_report_interval",
                .data                =
                        &ipv6_devconf.mldv1_unsolicited_report_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_ms_jiffies,
        },
        {
                .procname        = "mldv2_unsolicited_report_interval",
                .data                =
                        &ipv6_devconf.mldv2_unsolicited_report_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_ms_jiffies,
        },
        {
                .procname        = "use_tempaddr",
                .data                = &ipv6_devconf.use_tempaddr,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "temp_valid_lft",
                .data                = &ipv6_devconf.temp_valid_lft,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "temp_prefered_lft",
                .data                = &ipv6_devconf.temp_prefered_lft,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname       = "regen_min_advance",
                .data           = &ipv6_devconf.regen_min_advance,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
        {
                .procname        = "regen_max_retry",
                .data                = &ipv6_devconf.regen_max_retry,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "max_desync_factor",
                .data                = &ipv6_devconf.max_desync_factor,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "max_addresses",
                .data                = &ipv6_devconf.max_addresses,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_ra_defrtr",
                .data                = &ipv6_devconf.accept_ra_defrtr,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "ra_defrtr_metric",
                .data                = &ipv6_devconf.ra_defrtr_metric,
                .maxlen                = sizeof(u32),
                .mode                = 0644,
                .proc_handler        = proc_douintvec_minmax,
                .extra1                = (void *)SYSCTL_ONE,
        },
        {
                .procname        = "accept_ra_min_hop_limit",
                .data                = &ipv6_devconf.accept_ra_min_hop_limit,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_ra_min_lft",
                .data                = &ipv6_devconf.accept_ra_min_lft,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_ra_pinfo",
                .data                = &ipv6_devconf.accept_ra_pinfo,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "ra_honor_pio_life",
                .data                = &ipv6_devconf.ra_honor_pio_life,
                .maxlen                = sizeof(u8),
                .mode                = 0644,
                .proc_handler        = proc_dou8vec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#ifdef CONFIG_IPV6_ROUTER_PREF
        {
                .procname        = "accept_ra_rtr_pref",
                .data                = &ipv6_devconf.accept_ra_rtr_pref,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "router_probe_interval",
                .data                = &ipv6_devconf.rtr_probe_interval,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
#ifdef CONFIG_IPV6_ROUTE_INFO
        {
                .procname        = "accept_ra_rt_info_min_plen",
                .data                = &ipv6_devconf.accept_ra_rt_info_min_plen,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_ra_rt_info_max_plen",
                .data                = &ipv6_devconf.accept_ra_rt_info_max_plen,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#endif
#endif
        {
                .procname        = "proxy_ndp",
                .data                = &ipv6_devconf.proxy_ndp,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_proxy_ndp,
        },
        {
                .procname        = "accept_source_route",
                .data                = &ipv6_devconf.accept_source_route,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
        {
                .procname        = "optimistic_dad",
                .data                = &ipv6_devconf.optimistic_dad,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler   = proc_dointvec,
        },
        {
                .procname        = "use_optimistic",
                .data                = &ipv6_devconf.use_optimistic,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#endif
#ifdef CONFIG_IPV6_MROUTE
        {
                .procname        = "mc_forwarding",
                .data                = &ipv6_devconf.mc_forwarding,
                .maxlen                = sizeof(int),
                .mode                = 0444,
                .proc_handler        = proc_dointvec,
        },
#endif
        {
                .procname        = "disable_ipv6",
                .data                = &ipv6_devconf.disable_ipv6,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_disable,
        },
        {
                .procname        = "accept_dad",
                .data                = &ipv6_devconf.accept_dad,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "force_tllao",
                .data                = &ipv6_devconf.force_tllao,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec
        },
        {
                .procname        = "ndisc_notify",
                .data                = &ipv6_devconf.ndisc_notify,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec
        },
        {
                .procname        = "suppress_frag_ndisc",
                .data                = &ipv6_devconf.suppress_frag_ndisc,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec
        },
        {
                .procname        = "accept_ra_from_local",
                .data                = &ipv6_devconf.accept_ra_from_local,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "accept_ra_mtu",
                .data                = &ipv6_devconf.accept_ra_mtu,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "stable_secret",
                .data                = &ipv6_devconf.stable_secret,
                .maxlen                = IPV6_MAX_STRLEN,
                .mode                = 0600,
                .proc_handler        = addrconf_sysctl_stable_secret,
        },
        {
                .procname        = "use_oif_addrs_only",
                .data                = &ipv6_devconf.use_oif_addrs_only,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "ignore_routes_with_linkdown",
                .data                = &ipv6_devconf.ignore_routes_with_linkdown,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_ignore_routes_with_linkdown,
        },
        {
                .procname        = "drop_unicast_in_l2_multicast",
                .data                = &ipv6_devconf.drop_unicast_in_l2_multicast,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "drop_unsolicited_na",
                .data                = &ipv6_devconf.drop_unsolicited_na,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "keep_addr_on_down",
                .data                = &ipv6_devconf.keep_addr_on_down,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,

        },
        {
                .procname        = "seg6_enabled",
                .data                = &ipv6_devconf.seg6_enabled,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#ifdef CONFIG_IPV6_SEG6_HMAC
        {
                .procname        = "seg6_require_hmac",
                .data                = &ipv6_devconf.seg6_require_hmac,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#endif
        {
                .procname       = "enhanced_dad",
                .data           = &ipv6_devconf.enhanced_dad,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
        {
                .procname        = "addr_gen_mode",
                .data                = &ipv6_devconf.addr_gen_mode,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = addrconf_sysctl_addr_gen_mode,
        },
        {
                .procname       = "disable_policy",
                .data           = &ipv6_devconf.disable_policy,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = addrconf_sysctl_disable_policy,
        },
        {
                .procname        = "ndisc_tclass",
                .data                = &ipv6_devconf.ndisc_tclass,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = (void *)SYSCTL_ZERO,
                .extra2                = (void *)&two_five_five,
        },
        {
                .procname        = "rpl_seg_enabled",
                .data                = &ipv6_devconf.rpl_seg_enabled,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "ioam6_enabled",
                .data                = &ipv6_devconf.ioam6_enabled,
                .maxlen                = sizeof(u8),
                .mode                = 0644,
                .proc_handler        = proc_dou8vec_minmax,
                .extra1                = (void *)SYSCTL_ZERO,
                .extra2                = (void *)SYSCTL_ONE,
        },
        {
                .procname        = "ioam6_id",
                .data                = &ipv6_devconf.ioam6_id,
                .maxlen                = sizeof(u32),
                .mode                = 0644,
                .proc_handler        = proc_douintvec_minmax,
                .extra1                = (void *)SYSCTL_ZERO,
                .extra2                = (void *)&ioam6_if_id_max,
        },
        {
                .procname        = "ioam6_id_wide",
                .data                = &ipv6_devconf.ioam6_id_wide,
                .maxlen                = sizeof(u32),
                .mode                = 0644,
                .proc_handler        = proc_douintvec,
        },
        {
                .procname        = "ndisc_evict_nocarrier",
                .data                = &ipv6_devconf.ndisc_evict_nocarrier,
                .maxlen                = sizeof(u8),
                .mode                = 0644,
                .proc_handler        = proc_dou8vec_minmax,
                .extra1                = (void *)SYSCTL_ZERO,
                .extra2                = (void *)SYSCTL_ONE,
        },
        {
                .procname        = "accept_untracked_na",
                .data                = &ipv6_devconf.accept_untracked_na,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_TWO,
        },
        {
                /* sentinel */
        }
};

static int __addrconf_sysctl_register(struct net *net, char *dev_name,
                struct inet6_dev *idev, struct ipv6_devconf *p)
{
        int i, ifindex;
        struct ctl_table *table;
        char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];

        table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL_ACCOUNT);
        if (!table)
                goto out;

        for (i = 0; table[i].data; i++) {
                table[i].data += (char *)p - (char *)&ipv6_devconf;
                /* If one of these is already set, then it is not safe to
                 * overwrite either of them: this makes proc_dointvec_minmax
                 * usable.
                 */
                if (!table[i].extra1 && !table[i].extra2) {
                        table[i].extra1 = idev; /* embedded; no ref */
                        table[i].extra2 = net;
                }
        }

        snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);

        p->sysctl_header = register_net_sysctl_sz(net, path, table,
                                                  ARRAY_SIZE(addrconf_sysctl));
        if (!p->sysctl_header)
                goto free;

        if (!strcmp(dev_name, "all"))
                ifindex = NETCONFA_IFINDEX_ALL;
        else if (!strcmp(dev_name, "default"))
                ifindex = NETCONFA_IFINDEX_DEFAULT;
        else
                ifindex = idev->dev->ifindex;
        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
                                     ifindex, p);
        return 0;

free:
        kfree(table);
out:
        return -ENOBUFS;
}

static void __addrconf_sysctl_unregister(struct net *net,
                                         struct ipv6_devconf *p, int ifindex)
{
        struct ctl_table *table;

        if (!p->sysctl_header)
                return;

        table = p->sysctl_header->ctl_table_arg;
        unregister_net_sysctl_table(p->sysctl_header);
        p->sysctl_header = NULL;
        kfree(table);

        inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}

static int addrconf_sysctl_register(struct inet6_dev *idev)
{
        int err;

        if (!sysctl_dev_name_is_allowed(idev->dev->name))
                return -EINVAL;

        err = neigh_sysctl_register(idev->dev, idev->nd_parms,
                                    &ndisc_ifinfo_sysctl_change);
        if (err)
                return err;
        err = __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
                                         idev, &idev->cnf);
        if (err)
                neigh_sysctl_unregister(idev->nd_parms);

        return err;
}

static void addrconf_sysctl_unregister(struct inet6_dev *idev)
{
        __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf,
                                     idev->dev->ifindex);
        neigh_sysctl_unregister(idev->nd_parms);
}


#endif

static int __net_init addrconf_init_net(struct net *net)
{
        int err = -ENOMEM;
        struct ipv6_devconf *all, *dflt;

        spin_lock_init(&net->ipv6.addrconf_hash_lock);
        INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
        net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
                                           sizeof(struct hlist_head),
                                           GFP_KERNEL);
        if (!net->ipv6.inet6_addr_lst)
                goto err_alloc_addr;

        all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
        if (!all)
                goto err_alloc_all;

        dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
        if (!dflt)
                goto err_alloc_dflt;

        if (!net_eq(net, &init_net)) {
                switch (net_inherit_devconf()) {
                case 1:  /* copy from init_net */
                        memcpy(all, init_net.ipv6.devconf_all,
                               sizeof(ipv6_devconf));
                        memcpy(dflt, init_net.ipv6.devconf_dflt,
                               sizeof(ipv6_devconf_dflt));
                        break;
                case 3: /* copy from the current netns */
                        memcpy(all, current->nsproxy->net_ns->ipv6.devconf_all,
                               sizeof(ipv6_devconf));
                        memcpy(dflt,
                               current->nsproxy->net_ns->ipv6.devconf_dflt,
                               sizeof(ipv6_devconf_dflt));
                        break;
                case 0:
                case 2:
                        /* use compiled values */
                        break;
                }
        }

        /* these will be inherited by all namespaces */
        dflt->autoconf = ipv6_defaults.autoconf;
        dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;

        dflt->stable_secret.initialized = false;
        all->stable_secret.initialized = false;

        net->ipv6.devconf_all = all;
        net->ipv6.devconf_dflt = dflt;

#ifdef CONFIG_SYSCTL
        err = __addrconf_sysctl_register(net, "all", NULL, all);
        if (err < 0)
                goto err_reg_all;

        err = __addrconf_sysctl_register(net, "default", NULL, dflt);
        if (err < 0)
                goto err_reg_dflt;
#endif
        return 0;

#ifdef CONFIG_SYSCTL
err_reg_dflt:
        __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
        kfree(dflt);
        net->ipv6.devconf_dflt = NULL;
#endif
err_alloc_dflt:
        kfree(all);
        net->ipv6.devconf_all = NULL;
err_alloc_all:
        kfree(net->ipv6.inet6_addr_lst);
err_alloc_addr:
        return err;
}

static void __net_exit addrconf_exit_net(struct net *net)
{
        int i;

#ifdef CONFIG_SYSCTL
        __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
                                     NETCONFA_IFINDEX_DEFAULT);
        __addrconf_sysctl_unregister(net, net->ipv6.devconf_all,
                                     NETCONFA_IFINDEX_ALL);
#endif
        kfree(net->ipv6.devconf_dflt);
        net->ipv6.devconf_dflt = NULL;
        kfree(net->ipv6.devconf_all);
        net->ipv6.devconf_all = NULL;

        cancel_delayed_work_sync(&net->ipv6.addr_chk_work);
        /*
         *        Check hash table, then free it.
         */
        for (i = 0; i < IN6_ADDR_HSIZE; i++)
                WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));

        kfree(net->ipv6.inet6_addr_lst);
        net->ipv6.inet6_addr_lst = NULL;
}

static struct pernet_operations addrconf_ops = {
        .init = addrconf_init_net,
        .exit = addrconf_exit_net,
};

static struct rtnl_af_ops inet6_ops __read_mostly = {
        .family                  = AF_INET6,
        .fill_link_af          = inet6_fill_link_af,
        .get_link_af_size = inet6_get_link_af_size,
        .validate_link_af = inet6_validate_link_af,
        .set_link_af          = inet6_set_link_af,
};

/*
 *        Init / cleanup code
 */

int __init addrconf_init(void)
{
        struct inet6_dev *idev;
        int err;

        err = ipv6_addr_label_init();
        if (err < 0) {
                pr_crit("%s: cannot initialize default policy table: %d\n",
                        __func__, err);
                goto out;
        }

        err = register_pernet_subsys(&addrconf_ops);
        if (err < 0)
                goto out_addrlabel;

        /* All works using addrconf_wq need to lock rtnl. */
        addrconf_wq = create_singlethread_workqueue("ipv6_addrconf");
        if (!addrconf_wq) {
                err = -ENOMEM;
                goto out_nowq;
        }

        rtnl_lock();
        idev = ipv6_add_dev(blackhole_netdev);
        rtnl_unlock();
        if (IS_ERR(idev)) {
                err = PTR_ERR(idev);
                goto errlo;
        }

        ip6_route_init_special_entries();

        register_netdevice_notifier(&ipv6_dev_notf);

        addrconf_verify(&init_net);

        rtnl_af_register(&inet6_ops);

        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
                                   NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
        if (err < 0)
                goto errout;

        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
                                   inet6_rtm_newaddr, NULL, 0);
        if (err < 0)
                goto errout;
        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
                                   inet6_rtm_deladdr, NULL, 0);
        if (err < 0)
                goto errout;
        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
                                   inet6_rtm_getaddr, inet6_dump_ifaddr,
                                   RTNL_FLAG_DOIT_UNLOCKED |
                                   RTNL_FLAG_DUMP_UNLOCKED);
        if (err < 0)
                goto errout;
        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
                                   NULL, inet6_dump_ifmcaddr,
                                   RTNL_FLAG_DUMP_UNLOCKED);
        if (err < 0)
                goto errout;
        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
                                   NULL, inet6_dump_ifacaddr,
                                   RTNL_FLAG_DUMP_UNLOCKED);
        if (err < 0)
                goto errout;
        err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
                                   inet6_netconf_get_devconf,
                                   inet6_netconf_dump_devconf,
                                   RTNL_FLAG_DOIT_UNLOCKED |
                                   RTNL_FLAG_DUMP_UNLOCKED);
        if (err < 0)
                goto errout;
        err = ipv6_addr_label_rtnl_register();
        if (err < 0)
                goto errout;

        return 0;
errout:
        rtnl_unregister_all(PF_INET6);
        rtnl_af_unregister(&inet6_ops);
        unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
        destroy_workqueue(addrconf_wq);
out_nowq:
        unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
        ipv6_addr_label_cleanup();
out:
        return err;
}

void addrconf_cleanup(void)
{
        struct net_device *dev;

        unregister_netdevice_notifier(&ipv6_dev_notf);
        unregister_pernet_subsys(&addrconf_ops);
        ipv6_addr_label_cleanup();

        rtnl_af_unregister(&inet6_ops);

        rtnl_lock();

        /* clean dev list */
        for_each_netdev(&init_net, dev) {
                if (__in6_dev_get(dev) == NULL)
                        continue;
                addrconf_ifdown(dev, true);
        }
        addrconf_ifdown(init_net.loopback_dev, true);

        rtnl_unlock();

        destroy_workqueue(addrconf_wq);
}

































































































   14 
















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_RTNETLINK_H
#define __LINUX_RTNETLINK_H


#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/wait.h>
#include <linux/refcount.h>
#include <uapi/linux/rtnetlink.h>

extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);

static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net,
                                       u32 pid, u32 group, int echo)
{
        return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo);
}

extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid,
                        u32 group, const struct nlmsghdr *nlh, gfp_t flags);
extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
                              u32 id, long expires, u32 error);

void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags,
                  u32 portid, const struct nlmsghdr *nlh);
void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
                         gfp_t flags, int *new_nsid, int new_ifindex);
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
                                       unsigned change, u32 event,
                                       gfp_t flags, int *new_nsid,
                                       int new_ifindex, u32 portid,
                                       const struct nlmsghdr *nlh);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
                       gfp_t flags, u32 portid, const struct nlmsghdr *nlh);


/* RTNL is used as a global lock for all changes to network configuration  */
extern void rtnl_lock(void);
extern void rtnl_unlock(void);
extern int rtnl_trylock(void);
extern int rtnl_is_locked(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);

extern wait_queue_head_t netdev_unregistering_wq;
extern atomic_t dev_unreg_count;
extern struct rw_semaphore pernet_ops_rwsem;
extern struct rw_semaphore net_rwsem;

#ifdef CONFIG_PROVE_LOCKING
extern bool lockdep_rtnl_is_held(void);
#else
static inline bool lockdep_rtnl_is_held(void)
{
        return true;
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */

/**
 * rcu_dereference_rtnl - rcu_dereference with debug checking
 * @p: The pointer to read, prior to dereferencing
 *
 * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
 * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference()
 */
#define rcu_dereference_rtnl(p)                                        \
        rcu_dereference_check(p, lockdep_rtnl_is_held())

/**
 * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
 * @p: The pointer to read, prior to dereferencing
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * the READ_ONCE(), because caller holds RTNL.
 */
#define rtnl_dereference(p)                                        \
        rcu_dereference_protected(p, lockdep_rtnl_is_held())

/**
 * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning
 * its old value
 * @rp: RCU pointer, whose value is returned
 * @p: regular pointer
 *
 * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated
 * pointer. The old value of @rp is returned, and @rp is set to @p
 */
#define rcu_replace_pointer_rtnl(rp, p)                        \
        rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())

static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
{
        return rtnl_dereference(dev->ingress_queue);
}

static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev)
{
        return rcu_dereference(dev->ingress_queue);
}

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);

#ifdef CONFIG_NET_INGRESS
void net_inc_ingress_queue(void);
void net_dec_ingress_queue(void);
#endif

#ifdef CONFIG_NET_EGRESS
void net_inc_egress_queue(void);
void net_dec_egress_queue(void);
void netdev_xmit_skip_txqueue(bool skip);
#endif

void rtnetlink_init(void);
void __rtnl_unlock(void);
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail);

#define ASSERT_RTNL() \
        WARN_ONCE(!rtnl_is_locked(), \
                  "RTNL: assertion failed at %s (%d)\n", __FILE__,  __LINE__)

extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
                             struct netlink_callback *cb,
                             struct net_device *dev,
                             struct net_device *filter_dev,
                             int *idx);
extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
                            struct nlattr *tb[],
                            struct net_device *dev,
                            const unsigned char *addr,
                            u16 vid,
                            u16 flags);
extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
                            struct nlattr *tb[],
                            struct net_device *dev,
                            const unsigned char *addr,
                            u16 vid);

extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                   struct net_device *dev, u16 mode,
                                   u32 flags, u32 mask, int nlflags,
                                   u32 filter_mask,
                                   int (*vlan_fill)(struct sk_buff *skb,
                                                    struct net_device *dev,
                                                    u32 filter_mask));

extern void rtnl_offload_xstats_notify(struct net_device *dev);

static inline int rtnl_has_listeners(const struct net *net, u32 group)
{
        struct sock *rtnl = net->rtnl;

        return netlink_has_listeners(rtnl, group);
}

/**
 * rtnl_notify_needed - check if notification is needed
 * @net: Pointer to the net namespace
 * @nlflags: netlink ingress message flags
 * @group: rtnl group
 *
 * Based on the ingress message flags and rtnl group, returns true
 * if a notification is needed, false otherwise.
 */
static inline bool
rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
{
        return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}

void netdev_set_operstate(struct net_device *dev, int newstate);

#endif        /* __LINUX_RTNETLINK_H */







































































  521 
  298 

  365 























































































































































  323 
  391 














































































































































































  331 








































   61 

























    2 

























    2 






































































  192 





































































  241 












  240 


























































































































  359 

























  132 























   53 


























































































































































































































































  296 

  351 
  353 













   61 












  296 























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SEQLOCK_H
#define __LINUX_SEQLOCK_H

/*
 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
 * lockless readers (read-only retry loops), and no writer starvation.
 *
 * See Documentation/locking/seqlock.rst
 *
 * Copyrights:
 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
 * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
 */

#include <linux/compiler.h>
#include <linux/kcsan-checks.h>
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
#include <linux/seqlock_types.h>
#include <linux/spinlock.h>

#include <asm/processor.h>

/*
 * The seqlock seqcount_t interface does not prescribe a precise sequence of
 * read begin/retry/end. For readers, typically there is a call to
 * read_seqcount_begin() and read_seqcount_retry(), however, there are more
 * esoteric cases which do not follow this pattern.
 *
 * As a consequence, we take the following best-effort approach for raw usage
 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
 * atomics; if there is a matching read_seqcount_retry() call, no following
 * memory operations are considered atomic. Usage of the seqlock_t interface
 * is not affected.
 */
#define KCSAN_SEQLOCK_REGION_MAX 1000

static inline void __seqcount_init(seqcount_t *s, const char *name,
                                          struct lock_class_key *key)
{
        /*
         * Make sure we are not reinitializing a held lock:
         */
        lockdep_init_map(&s->dep_map, name, key, 0);
        s->sequence = 0;
}

#ifdef CONFIG_DEBUG_LOCK_ALLOC

# define SEQCOUNT_DEP_MAP_INIT(lockname)                                \
                .dep_map = { .name = #lockname }

/**
 * seqcount_init() - runtime initializer for seqcount_t
 * @s: Pointer to the seqcount_t instance
 */
# define seqcount_init(s)                                                \
        do {                                                                \
                static struct lock_class_key __key;                        \
                __seqcount_init((s), #s, &__key);                        \
        } while (0)

static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
{
        seqcount_t *l = (seqcount_t *)s;
        unsigned long flags;

        local_irq_save(flags);
        seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
        seqcount_release(&l->dep_map, _RET_IP_);
        local_irq_restore(flags);
}

#else
# define SEQCOUNT_DEP_MAP_INIT(lockname)
# define seqcount_init(s) __seqcount_init(s, NULL, NULL)
# define seqcount_lockdep_reader_access(x)
#endif

/**
 * SEQCNT_ZERO() - static initializer for seqcount_t
 * @name: Name of the seqcount_t instance
 */
#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }

/*
 * Sequence counters with associated locks (seqcount_LOCKNAME_t)
 *
 * A sequence counter which associates the lock used for writer
 * serialization at initialization time. This enables lockdep to validate
 * that the write side critical section is properly serialized.
 *
 * For associated locks which do not implicitly disable preemption,
 * preemption protection is enforced in the write side function.
 *
 * Lockdep is never used in any for the raw write variants.
 *
 * See Documentation/locking/seqlock.rst
 */

/*
 * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
 * @seqcount:        The real sequence counter
 * @lock:        Pointer to the associated lock
 *
 * A plain sequence counter with external writer synchronization by
 * LOCKNAME @lock. The lock is associated to the sequence counter in the
 * static initializer or init function. This enables lockdep to validate
 * that the write side critical section is properly serialized.
 *
 * LOCKNAME:        raw_spinlock, spinlock, rwlock or mutex
 */

/*
 * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
 * @s:                Pointer to the seqcount_LOCKNAME_t instance
 * @lock:        Pointer to the associated lock
 */

#define seqcount_LOCKNAME_init(s, _lock, lockname)                        \
        do {                                                                \
                seqcount_##lockname##_t *____s = (s);                        \
                seqcount_init(&____s->seqcount);                        \
                __SEQ_LOCK(____s->lock = (_lock));                        \
        } while (0)

#define seqcount_raw_spinlock_init(s, lock)        seqcount_LOCKNAME_init(s, lock, raw_spinlock)
#define seqcount_spinlock_init(s, lock)                seqcount_LOCKNAME_init(s, lock, spinlock)
#define seqcount_rwlock_init(s, lock)                seqcount_LOCKNAME_init(s, lock, rwlock)
#define seqcount_mutex_init(s, lock)                seqcount_LOCKNAME_init(s, lock, mutex)

/*
 * SEQCOUNT_LOCKNAME()        - Instantiate seqcount_LOCKNAME_t and helpers
 * seqprop_LOCKNAME_*()        - Property accessors for seqcount_LOCKNAME_t
 *
 * @lockname:                "LOCKNAME" part of seqcount_LOCKNAME_t
 * @locktype:                LOCKNAME canonical C data type
 * @preemptible:        preemptibility of above locktype
 * @lockbase:                prefix for associated lock/unlock
 */
#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase)        \
static __always_inline seqcount_t *                                        \
__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)                        \
{                                                                        \
        return &s->seqcount;                                                \
}                                                                        \
                                                                        \
static __always_inline const seqcount_t *                                \
__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s)        \
{                                                                        \
        return &s->seqcount;                                                \
}                                                                        \
                                                                        \
static __always_inline unsigned                                                \
__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)        \
{                                                                        \
        unsigned seq = READ_ONCE(s->seqcount.sequence);                        \
                                                                        \
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))                                \
                return seq;                                                \
                                                                        \
        if (preemptible && unlikely(seq & 1)) {                                \
                __SEQ_LOCK(lockbase##_lock(s->lock));                        \
                __SEQ_LOCK(lockbase##_unlock(s->lock));                        \
                                                                        \
                /*                                                        \
                 * Re-read the sequence counter since the (possibly        \
                 * preempted) writer made progress.                        \
                 */                                                        \
                seq = READ_ONCE(s->seqcount.sequence);                        \
        }                                                                \
                                                                        \
        return seq;                                                        \
}                                                                        \
                                                                        \
static __always_inline bool                                                \
__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)        \
{                                                                        \
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))                                \
                return preemptible;                                        \
                                                                        \
        /* PREEMPT_RT relies on the above LOCK+UNLOCK */                \
        return false;                                                        \
}                                                                        \
                                                                        \
static __always_inline void                                                \
__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s)                \
{                                                                        \
        __SEQ_LOCK(lockdep_assert_held(s->lock));                        \
}

/*
 * __seqprop() for seqcount_t
 */

static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
{
        return s;
}

static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s)
{
        return s;
}

static inline unsigned __seqprop_sequence(const seqcount_t *s)
{
        return READ_ONCE(s->sequence);
}

static inline bool __seqprop_preemptible(const seqcount_t *s)
{
        return false;
}

static inline void __seqprop_assert(const seqcount_t *s)
{
        lockdep_assert_preemption_disabled();
}

#define __SEQ_RT        IS_ENABLED(CONFIG_PREEMPT_RT)

SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    raw_spin)
SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, spin)
SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, read)
SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
#undef SEQCOUNT_LOCKNAME

/*
 * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
 * @name:        Name of the seqcount_LOCKNAME_t instance
 * @lock:        Pointer to the associated LOCKNAME
 */

#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) {                        \
        .seqcount                = SEQCNT_ZERO(seq_name.seqcount),        \
        __SEQ_LOCK(.lock        = (assoc_lock))                                \
}

#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)        SEQCOUNT_LOCKNAME_ZERO(name, lock)
#define SEQCNT_SPINLOCK_ZERO(name, lock)        SEQCOUNT_LOCKNAME_ZERO(name, lock)
#define SEQCNT_RWLOCK_ZERO(name, lock)                SEQCOUNT_LOCKNAME_ZERO(name, lock)
#define SEQCNT_MUTEX_ZERO(name, lock)                SEQCOUNT_LOCKNAME_ZERO(name, lock)
#define SEQCNT_WW_MUTEX_ZERO(name, lock)         SEQCOUNT_LOCKNAME_ZERO(name, lock)

#define __seqprop_case(s, lockname, prop)                                \
        seqcount_##lockname##_t: __seqprop_##lockname##_##prop

#define __seqprop(s, prop) _Generic(*(s),                                \
        seqcount_t:                __seqprop_##prop,                        \
        __seqprop_case((s),        raw_spinlock,        prop),                        \
        __seqprop_case((s),        spinlock,        prop),                        \
        __seqprop_case((s),        rwlock,                prop),                        \
        __seqprop_case((s),        mutex,                prop))

#define seqprop_ptr(s)                        __seqprop(s, ptr)(s)
#define seqprop_const_ptr(s)                __seqprop(s, const_ptr)(s)
#define seqprop_sequence(s)                __seqprop(s, sequence)(s)
#define seqprop_preemptible(s)                __seqprop(s, preemptible)(s)
#define seqprop_assert(s)                __seqprop(s, assert)(s)

/**
 * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
 * provided before actually loading any of the variables that are to be
 * protected in this critical section.
 *
 * Use carefully, only in critical code, and comment how the barrier is
 * provided.
 *
 * Return: count to be passed to read_seqcount_retry()
 */
#define __read_seqcount_begin(s)                                        \
({                                                                        \
        unsigned __seq;                                                        \
                                                                        \
        while ((__seq = seqprop_sequence(s)) & 1)                        \
                cpu_relax();                                                \
                                                                        \
        kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);                        \
        __seq;                                                                \
})

/**
 * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Return: count to be passed to read_seqcount_retry()
 */
#define raw_read_seqcount_begin(s)                                        \
({                                                                        \
        unsigned _seq = __read_seqcount_begin(s);                        \
                                                                        \
        smp_rmb();                                                        \
        _seq;                                                                \
})

/**
 * read_seqcount_begin() - begin a seqcount_t read critical section
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Return: count to be passed to read_seqcount_retry()
 */
#define read_seqcount_begin(s)                                                \
({                                                                        \
        seqcount_lockdep_reader_access(seqprop_const_ptr(s));                \
        raw_read_seqcount_begin(s);                                        \
})

/**
 * raw_read_seqcount() - read the raw seqcount_t counter value
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * raw_read_seqcount opens a read critical section of the given
 * seqcount_t, without any lockdep checking, and without checking or
 * masking the sequence counter LSB. Calling code is responsible for
 * handling that.
 *
 * Return: count to be passed to read_seqcount_retry()
 */
#define raw_read_seqcount(s)                                                \
({                                                                        \
        unsigned __seq = seqprop_sequence(s);                                \
                                                                        \
        smp_rmb();                                                        \
        kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);                        \
        __seq;                                                                \
})

/**
 * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
 *                        lockdep and w/o counter stabilization
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * raw_seqcount_begin opens a read critical section of the given
 * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
 * for the count to stabilize. If a writer is active when it begins, it
 * will fail the read_seqcount_retry() at the end of the read critical
 * section instead of stabilizing at the beginning of it.
 *
 * Use this only in special kernel hot paths where the read section is
 * small and has a high probability of success through other external
 * means. It will save a single branching instruction.
 *
 * Return: count to be passed to read_seqcount_retry()
 */
#define raw_seqcount_begin(s)                                                \
({                                                                        \
        /*                                                                \
         * If the counter is odd, let read_seqcount_retry() fail        \
         * by decrementing the counter.                                        \
         */                                                                \
        raw_read_seqcount(s) & ~1;                                        \
})

/**
 * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 * @start: count, from read_seqcount_begin()
 *
 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
 * provided before actually loading any of the variables that are to be
 * protected in this critical section.
 *
 * Use carefully, only in critical code, and comment how the barrier is
 * provided.
 *
 * Return: true if a read section retry is required, else false
 */
#define __read_seqcount_retry(s, start)                                        \
        do___read_seqcount_retry(seqprop_const_ptr(s), start)

static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
{
        kcsan_atomic_next(0);
        return unlikely(READ_ONCE(s->sequence) != start);
}

/**
 * read_seqcount_retry() - end a seqcount_t read critical section
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 * @start: count, from read_seqcount_begin()
 *
 * read_seqcount_retry closes the read critical section of given
 * seqcount_t.  If the critical section was invalid, it must be ignored
 * (and typically retried).
 *
 * Return: true if a read section retry is required, else false
 */
#define read_seqcount_retry(s, start)                                        \
        do_read_seqcount_retry(seqprop_const_ptr(s), start)

static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
{
        smp_rmb();
        return do___read_seqcount_retry(s, start);
}

/**
 * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Context: check write_seqcount_begin()
 */
#define raw_write_seqcount_begin(s)                                        \
do {                                                                        \
        if (seqprop_preemptible(s))                                        \
                preempt_disable();                                        \
                                                                        \
        do_raw_write_seqcount_begin(seqprop_ptr(s));                        \
} while (0)

static inline void do_raw_write_seqcount_begin(seqcount_t *s)
{
        kcsan_nestable_atomic_begin();
        s->sequence++;
        smp_wmb();
}

/**
 * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Context: check write_seqcount_end()
 */
#define raw_write_seqcount_end(s)                                        \
do {                                                                        \
        do_raw_write_seqcount_end(seqprop_ptr(s));                        \
                                                                        \
        if (seqprop_preemptible(s))                                        \
                preempt_enable();                                        \
} while (0)

static inline void do_raw_write_seqcount_end(seqcount_t *s)
{
        smp_wmb();
        s->sequence++;
        kcsan_nestable_atomic_end();
}

/**
 * write_seqcount_begin_nested() - start a seqcount_t write section with
 *                                 custom lockdep nesting level
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 * @subclass: lockdep nesting level
 *
 * See Documentation/locking/lockdep-design.rst
 * Context: check write_seqcount_begin()
 */
#define write_seqcount_begin_nested(s, subclass)                        \
do {                                                                        \
        seqprop_assert(s);                                                \
                                                                        \
        if (seqprop_preemptible(s))                                        \
                preempt_disable();                                        \
                                                                        \
        do_write_seqcount_begin_nested(seqprop_ptr(s), subclass);        \
} while (0)

static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
{
        seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
        do_raw_write_seqcount_begin(s);
}

/**
 * write_seqcount_begin() - start a seqcount_t write side critical section
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Context: sequence counter write side sections must be serialized and
 * non-preemptible. Preemption will be automatically disabled if and
 * only if the seqcount write serialization lock is associated, and
 * preemptible.  If readers can be invoked from hardirq or softirq
 * context, interrupts or bottom halves must be respectively disabled.
 */
#define write_seqcount_begin(s)                                                \
do {                                                                        \
        seqprop_assert(s);                                                \
                                                                        \
        if (seqprop_preemptible(s))                                        \
                preempt_disable();                                        \
                                                                        \
        do_write_seqcount_begin(seqprop_ptr(s));                        \
} while (0)

static inline void do_write_seqcount_begin(seqcount_t *s)
{
        do_write_seqcount_begin_nested(s, 0);
}

/**
 * write_seqcount_end() - end a seqcount_t write side critical section
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * Context: Preemption will be automatically re-enabled if and only if
 * the seqcount write serialization lock is associated, and preemptible.
 */
#define write_seqcount_end(s)                                                \
do {                                                                        \
        do_write_seqcount_end(seqprop_ptr(s));                                \
                                                                        \
        if (seqprop_preemptible(s))                                        \
                preempt_enable();                                        \
} while (0)

static inline void do_write_seqcount_end(seqcount_t *s)
{
        seqcount_release(&s->dep_map, _RET_IP_);
        do_raw_write_seqcount_end(s);
}

/**
 * raw_write_seqcount_barrier() - do a seqcount_t write barrier
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * This can be used to provide an ordering guarantee instead of the usual
 * consistency guarantee. It is one wmb cheaper, because it can collapse
 * the two back-to-back wmb()s.
 *
 * Note that writes surrounding the barrier should be declared atomic (e.g.
 * via WRITE_ONCE): a) to ensure the writes become visible to other threads
 * atomically, avoiding compiler optimizations; b) to document which writes are
 * meant to propagate to the reader critical section. This is necessary because
 * neither writes before nor after the barrier are enclosed in a seq-writer
 * critical section that would ensure readers are aware of ongoing writes::
 *
 *        seqcount_t seq;
 *        bool X = true, Y = false;
 *
 *        void read(void)
 *        {
 *                bool x, y;
 *
 *                do {
 *                        int s = read_seqcount_begin(&seq);
 *
 *                        x = X; y = Y;
 *
 *                } while (read_seqcount_retry(&seq, s));
 *
 *                BUG_ON(!x && !y);
 *      }
 *
 *      void write(void)
 *      {
 *                WRITE_ONCE(Y, true);
 *
 *                raw_write_seqcount_barrier(seq);
 *
 *                WRITE_ONCE(X, false);
 *      }
 */
#define raw_write_seqcount_barrier(s)                                        \
        do_raw_write_seqcount_barrier(seqprop_ptr(s))

static inline void do_raw_write_seqcount_barrier(seqcount_t *s)
{
        kcsan_nestable_atomic_begin();
        s->sequence++;
        smp_wmb();
        s->sequence++;
        kcsan_nestable_atomic_end();
}

/**
 * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
 *                               side operations
 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
 *
 * After write_seqcount_invalidate, no seqcount_t read side operations
 * will complete successfully and see data older than this.
 */
#define write_seqcount_invalidate(s)                                        \
        do_write_seqcount_invalidate(seqprop_ptr(s))

static inline void do_write_seqcount_invalidate(seqcount_t *s)
{
        smp_wmb();
        kcsan_nestable_atomic_begin();
        s->sequence+=2;
        kcsan_nestable_atomic_end();
}

/*
 * Latch sequence counters (seqcount_latch_t)
 *
 * A sequence counter variant where the counter even/odd value is used to
 * switch between two copies of protected data. This allows the read path,
 * typically NMIs, to safely interrupt the write side critical section.
 *
 * As the write sections are fully preemptible, no special handling for
 * PREEMPT_RT is needed.
 */
typedef struct {
        seqcount_t seqcount;
} seqcount_latch_t;

/**
 * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
 * @seq_name: Name of the seqcount_latch_t instance
 */
#define SEQCNT_LATCH_ZERO(seq_name) {                                        \
        .seqcount                = SEQCNT_ZERO(seq_name.seqcount),        \
}

/**
 * seqcount_latch_init() - runtime initializer for seqcount_latch_t
 * @s: Pointer to the seqcount_latch_t instance
 */
#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount)

/**
 * raw_read_seqcount_latch() - pick even/odd latch data copy
 * @s: Pointer to seqcount_latch_t
 *
 * See raw_write_seqcount_latch() for details and a full reader/writer
 * usage example.
 *
 * Return: sequence counter raw value. Use the lowest bit as an index for
 * picking which data copy to read. The full counter must then be checked
 * with raw_read_seqcount_latch_retry().
 */
static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
{
        /*
         * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
         * Due to the dependent load, a full smp_rmb() is not needed.
         */
        return READ_ONCE(s->seqcount.sequence);
}

/**
 * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section
 * @s:                Pointer to seqcount_latch_t
 * @start:        count, from raw_read_seqcount_latch()
 *
 * Return: true if a read section retry is required, else false
 */
static __always_inline int
raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
{
        smp_rmb();
        return unlikely(READ_ONCE(s->seqcount.sequence) != start);
}

/**
 * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
 * @s: Pointer to seqcount_latch_t
 *
 * The latch technique is a multiversion concurrency control method that allows
 * queries during non-atomic modifications. If you can guarantee queries never
 * interrupt the modification -- e.g. the concurrency is strictly between CPUs
 * -- you most likely do not need this.
 *
 * Where the traditional RCU/lockless data structures rely on atomic
 * modifications to ensure queries observe either the old or the new state the
 * latch allows the same for non-atomic updates. The trade-off is doubling the
 * cost of storage; we have to maintain two copies of the entire data
 * structure.
 *
 * Very simply put: we first modify one copy and then the other. This ensures
 * there is always one copy in a stable state, ready to give us an answer.
 *
 * The basic form is a data structure like::
 *
 *        struct latch_struct {
 *                seqcount_latch_t        seq;
 *                struct data_struct        data[2];
 *        };
 *
 * Where a modification, which is assumed to be externally serialized, does the
 * following::
 *
 *        void latch_modify(struct latch_struct *latch, ...)
 *        {
 *                smp_wmb();        // Ensure that the last data[1] update is visible
 *                latch->seq.sequence++;
 *                smp_wmb();        // Ensure that the seqcount update is visible
 *
 *                modify(latch->data[0], ...);
 *
 *                smp_wmb();        // Ensure that the data[0] update is visible
 *                latch->seq.sequence++;
 *                smp_wmb();        // Ensure that the seqcount update is visible
 *
 *                modify(latch->data[1], ...);
 *        }
 *
 * The query will have a form like::
 *
 *        struct entry *latch_query(struct latch_struct *latch, ...)
 *        {
 *                struct entry *entry;
 *                unsigned seq, idx;
 *
 *                do {
 *                        seq = raw_read_seqcount_latch(&latch->seq);
 *
 *                        idx = seq & 0x01;
 *                        entry = data_query(latch->data[idx], ...);
 *
 *                // This includes needed smp_rmb()
 *                } while (raw_read_seqcount_latch_retry(&latch->seq, seq));
 *
 *                return entry;
 *        }
 *
 * So during the modification, queries are first redirected to data[1]. Then we
 * modify data[0]. When that is complete, we redirect queries back to data[0]
 * and we can modify data[1].
 *
 * NOTE:
 *
 *        The non-requirement for atomic modifications does _NOT_ include
 *        the publishing of new entries in the case where data is a dynamic
 *        data structure.
 *
 *        An iteration might start in data[0] and get suspended long enough
 *        to miss an entire modification sequence, once it resumes it might
 *        observe the new entry.
 *
 * NOTE2:
 *
 *        When data is a dynamic data structure; one should use regular RCU
 *        patterns to manage the lifetimes of the objects within.
 */
static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
{
        smp_wmb();        /* prior stores before incrementing "sequence" */
        s->seqcount.sequence++;
        smp_wmb();      /* increment "sequence" before following stores */
}

#define __SEQLOCK_UNLOCKED(lockname)                                        \
        {                                                                \
                .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
                .lock =        __SPIN_LOCK_UNLOCKED(lockname)                        \
        }

/**
 * seqlock_init() - dynamic initializer for seqlock_t
 * @sl: Pointer to the seqlock_t instance
 */
#define seqlock_init(sl)                                                \
        do {                                                                \
                spin_lock_init(&(sl)->lock);                                \
                seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock);        \
        } while (0)

/**
 * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t
 * @sl: Name of the seqlock_t instance
 */
#define DEFINE_SEQLOCK(sl) \
                seqlock_t sl = __SEQLOCK_UNLOCKED(sl)

/**
 * read_seqbegin() - start a seqlock_t read side critical section
 * @sl: Pointer to seqlock_t
 *
 * Return: count, to be passed to read_seqretry()
 */
static inline unsigned read_seqbegin(const seqlock_t *sl)
{
        unsigned ret = read_seqcount_begin(&sl->seqcount);

        kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
        kcsan_flat_atomic_begin();
        return ret;
}

/**
 * read_seqretry() - end a seqlock_t read side section
 * @sl: Pointer to seqlock_t
 * @start: count, from read_seqbegin()
 *
 * read_seqretry closes the read side critical section of given seqlock_t.
 * If the critical section was invalid, it must be ignored (and typically
 * retried).
 *
 * Return: true if a read section retry is required, else false
 */
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
{
        /*
         * Assume not nested: read_seqretry() may be called multiple times when
         * completing read critical section.
         */
        kcsan_flat_atomic_end();

        return read_seqcount_retry(&sl->seqcount, start);
}

/*
 * For all seqlock_t write side functions, use the internal
 * do_write_seqcount_begin() instead of generic write_seqcount_begin().
 * This way, no redundant lockdep_assert_held() checks are added.
 */

/**
 * write_seqlock() - start a seqlock_t write side critical section
 * @sl: Pointer to seqlock_t
 *
 * write_seqlock opens a write side critical section for the given
 * seqlock_t.  It also implicitly acquires the spinlock_t embedded inside
 * that sequential lock. All seqlock_t write side sections are thus
 * automatically serialized and non-preemptible.
 *
 * Context: if the seqlock_t read section, or other write side critical
 * sections, can be invoked from hardirq or softirq contexts, use the
 * _irqsave or _bh variants of this function instead.
 */
static inline void write_seqlock(seqlock_t *sl)
{
        spin_lock(&sl->lock);
        do_write_seqcount_begin(&sl->seqcount.seqcount);
}

/**
 * write_sequnlock() - end a seqlock_t write side critical section
 * @sl: Pointer to seqlock_t
 *
 * write_sequnlock closes the (serialized and non-preemptible) write side
 * critical section of given seqlock_t.
 */
static inline void write_sequnlock(seqlock_t *sl)
{
        do_write_seqcount_end(&sl->seqcount.seqcount);
        spin_unlock(&sl->lock);
}

/**
 * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
 * @sl: Pointer to seqlock_t
 *
 * _bh variant of write_seqlock(). Use only if the read side section, or
 * other write side sections, can be invoked from softirq contexts.
 */
static inline void write_seqlock_bh(seqlock_t *sl)
{
        spin_lock_bh(&sl->lock);
        do_write_seqcount_begin(&sl->seqcount.seqcount);
}

/**
 * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
 * @sl: Pointer to seqlock_t
 *
 * write_sequnlock_bh closes the serialized, non-preemptible, and
 * softirqs-disabled, seqlock_t write side critical section opened with
 * write_seqlock_bh().
 */
static inline void write_sequnlock_bh(seqlock_t *sl)
{
        do_write_seqcount_end(&sl->seqcount.seqcount);
        spin_unlock_bh(&sl->lock);
}

/**
 * write_seqlock_irq() - start a non-interruptible seqlock_t write section
 * @sl: Pointer to seqlock_t
 *
 * _irq variant of write_seqlock(). Use only if the read side section, or
 * other write sections, can be invoked from hardirq contexts.
 */
static inline void write_seqlock_irq(seqlock_t *sl)
{
        spin_lock_irq(&sl->lock);
        do_write_seqcount_begin(&sl->seqcount.seqcount);
}

/**
 * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
 * @sl: Pointer to seqlock_t
 *
 * write_sequnlock_irq closes the serialized and non-interruptible
 * seqlock_t write side section opened with write_seqlock_irq().
 */
static inline void write_sequnlock_irq(seqlock_t *sl)
{
        do_write_seqcount_end(&sl->seqcount.seqcount);
        spin_unlock_irq(&sl->lock);
}

static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
{
        unsigned long flags;

        spin_lock_irqsave(&sl->lock, flags);
        do_write_seqcount_begin(&sl->seqcount.seqcount);
        return flags;
}

/**
 * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
 *                           section
 * @lock:  Pointer to seqlock_t
 * @flags: Stack-allocated storage for saving caller's local interrupt
 *         state, to be passed to write_sequnlock_irqrestore().
 *
 * _irqsave variant of write_seqlock(). Use it only if the read side
 * section, or other write sections, can be invoked from hardirq context.
 */
#define write_seqlock_irqsave(lock, flags)                                \
        do { flags = __write_seqlock_irqsave(lock); } while (0)

/**
 * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
 *                                section
 * @sl:    Pointer to seqlock_t
 * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
 *
 * write_sequnlock_irqrestore closes the serialized and non-interruptible
 * seqlock_t write section previously opened with write_seqlock_irqsave().
 */
static inline void
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
{
        do_write_seqcount_end(&sl->seqcount.seqcount);
        spin_unlock_irqrestore(&sl->lock, flags);
}

/**
 * read_seqlock_excl() - begin a seqlock_t locking reader section
 * @sl:        Pointer to seqlock_t
 *
 * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
 * locking reader exclusively locks out *both* other writers *and* other
 * locking readers, but it does not update the embedded sequence number.
 *
 * Locking readers act like a normal spin_lock()/spin_unlock().
 *
 * Context: if the seqlock_t write section, *or other read sections*, can
 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
 * variant of this function instead.
 *
 * The opened read section must be closed with read_sequnlock_excl().
 */
static inline void read_seqlock_excl(seqlock_t *sl)
{
        spin_lock(&sl->lock);
}

/**
 * read_sequnlock_excl() - end a seqlock_t locking reader critical section
 * @sl: Pointer to seqlock_t
 */
static inline void read_sequnlock_excl(seqlock_t *sl)
{
        spin_unlock(&sl->lock);
}

/**
 * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
 *                            softirqs disabled
 * @sl: Pointer to seqlock_t
 *
 * _bh variant of read_seqlock_excl(). Use this variant only if the
 * seqlock_t write side section, *or other read sections*, can be invoked
 * from softirq contexts.
 */
static inline void read_seqlock_excl_bh(seqlock_t *sl)
{
        spin_lock_bh(&sl->lock);
}

/**
 * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
 *                              reader section
 * @sl: Pointer to seqlock_t
 */
static inline void read_sequnlock_excl_bh(seqlock_t *sl)
{
        spin_unlock_bh(&sl->lock);
}

/**
 * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
 *                             reader section
 * @sl: Pointer to seqlock_t
 *
 * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
 * write side section, *or other read sections*, can be invoked from a
 * hardirq context.
 */
static inline void read_seqlock_excl_irq(seqlock_t *sl)
{
        spin_lock_irq(&sl->lock);
}

/**
 * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
 *                             locking reader section
 * @sl: Pointer to seqlock_t
 */
static inline void read_sequnlock_excl_irq(seqlock_t *sl)
{
        spin_unlock_irq(&sl->lock);
}

static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
{
        unsigned long flags;

        spin_lock_irqsave(&sl->lock, flags);
        return flags;
}

/**
 * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
 *                                 locking reader section
 * @lock:  Pointer to seqlock_t
 * @flags: Stack-allocated storage for saving caller's local interrupt
 *         state, to be passed to read_sequnlock_excl_irqrestore().
 *
 * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
 * write side section, *or other read sections*, can be invoked from a
 * hardirq context.
 */
#define read_seqlock_excl_irqsave(lock, flags)                                \
        do { flags = __read_seqlock_excl_irqsave(lock); } while (0)

/**
 * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
 *                                      locking reader section
 * @sl:    Pointer to seqlock_t
 * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
 */
static inline void
read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
{
        spin_unlock_irqrestore(&sl->lock, flags);
}

/**
 * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
 * @lock: Pointer to seqlock_t
 * @seq : Marker and return parameter. If the passed value is even, the
 * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
 * If the passed value is odd, the reader will become a *locking* reader
 * as in read_seqlock_excl().  In the first call to this function, the
 * caller *must* initialize and pass an even value to @seq; this way, a
 * lockless read can be optimistically tried first.
 *
 * read_seqbegin_or_lock is an API designed to optimistically try a normal
 * lockless seqlock_t read section first.  If an odd counter is found, the
 * lockless read trial has failed, and the next read iteration transforms
 * itself into a full seqlock_t locking reader.
 *
 * This is typically used to avoid seqlock_t lockless readers starvation
 * (too much retry loops) in the case of a sharp spike in write side
 * activity.
 *
 * Context: if the seqlock_t write section, *or other read sections*, can
 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
 * variant of this function instead.
 *
 * Check Documentation/locking/seqlock.rst for template example code.
 *
 * Return: the encountered sequence counter value, through the @seq
 * parameter, which is overloaded as a return parameter. This returned
 * value must be checked with need_seqretry(). If the read section need to
 * be retried, this returned value must also be passed as the @seq
 * parameter of the next read_seqbegin_or_lock() iteration.
 */
static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
{
        if (!(*seq & 1))        /* Even */
                *seq = read_seqbegin(lock);
        else                        /* Odd */
                read_seqlock_excl(lock);
}

/**
 * need_seqretry() - validate seqlock_t "locking or lockless" read section
 * @lock: Pointer to seqlock_t
 * @seq: sequence count, from read_seqbegin_or_lock()
 *
 * Return: true if a read section retry is required, false otherwise
 */
static inline int need_seqretry(seqlock_t *lock, int seq)
{
        return !(seq & 1) && read_seqretry(lock, seq);
}

/**
 * done_seqretry() - end seqlock_t "locking or lockless" reader section
 * @lock: Pointer to seqlock_t
 * @seq: count, from read_seqbegin_or_lock()
 *
 * done_seqretry finishes the seqlock_t read side critical section started
 * with read_seqbegin_or_lock() and validated by need_seqretry().
 */
static inline void done_seqretry(seqlock_t *lock, int seq)
{
        if (seq & 1)
                read_sequnlock_excl(lock);
}

/**
 * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
 *                                   a non-interruptible locking reader
 * @lock: Pointer to seqlock_t
 * @seq:  Marker and return parameter. Check read_seqbegin_or_lock().
 *
 * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
 * the seqlock_t write section, *or other read sections*, can be invoked
 * from hardirq context.
 *
 * Note: Interrupts will be disabled only for "locking reader" mode.
 *
 * Return:
 *
 *   1. The saved local interrupts state in case of a locking reader, to
 *      be passed to done_seqretry_irqrestore().
 *
 *   2. The encountered sequence counter value, returned through @seq
 *      overloaded as a return parameter. Check read_seqbegin_or_lock().
 */
static inline unsigned long
read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
{
        unsigned long flags = 0;

        if (!(*seq & 1))        /* Even */
                *seq = read_seqbegin(lock);
        else                        /* Odd */
                read_seqlock_excl_irqsave(lock, flags);

        return flags;
}

/**
 * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
 *                                non-interruptible locking reader section
 * @lock:  Pointer to seqlock_t
 * @seq:   Count, from read_seqbegin_or_lock_irqsave()
 * @flags: Caller's saved local interrupt state in case of a locking
 *           reader, also from read_seqbegin_or_lock_irqsave()
 *
 * This is the _irqrestore variant of done_seqretry(). The read section
 * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
 * by need_seqretry().
 */
static inline void
done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
{
        if (seq & 1)
                read_sequnlock_excl_irqrestore(lock, flags);
}
#endif /* __LINUX_SEQLOCK_H */


























































































































































   10 





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   10 







   10 


























   10 

























































































   10 






























































































































































































   10 

   10 





   10 






   10 

















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
// SPDX-License-Identifier: GPL-2.0
/*
 * Implement CPU time clocks for the POSIX clock interface.
 */

#include <linux/sched/signal.h>
#include <linux/sched/cputime.h>
#include <linux/posix-timers.h>
#include <linux/errno.h>
#include <linux/math64.h>
#include <linux/uaccess.h>
#include <linux/kernel_stat.h>
#include <trace/events/timer.h>
#include <linux/tick.h>
#include <linux/workqueue.h>
#include <linux/compat.h>
#include <linux/sched/deadline.h>
#include <linux/task_work.h>

#include "posix-timers.h"

static void posix_cpu_timer_rearm(struct k_itimer *timer);

void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
{
        posix_cputimers_init(pct);
        if (cpu_limit != RLIM_INFINITY) {
                pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
                pct->timers_active = true;
        }
}

/*
 * Called after updating RLIMIT_CPU to run cpu timer and update
 * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
 * necessary. Needs siglock protection since other code may update the
 * expiration cache as well.
 *
 * Returns 0 on success, -ESRCH on failure.  Can fail if the task is exiting and
 * we cannot lock_task_sighand.  Cannot fail if task is current.
 */
int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
        u64 nsecs = rlim_new * NSEC_PER_SEC;
        unsigned long irq_fl;

        if (!lock_task_sighand(task, &irq_fl))
                return -ESRCH;
        set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
        unlock_task_sighand(task, &irq_fl);
        return 0;
}

/*
 * Functions for validating access to tasks.
 */
static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
{
        const bool thread = !!CPUCLOCK_PERTHREAD(clock);
        const pid_t upid = CPUCLOCK_PID(clock);
        struct pid *pid;

        if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
                return NULL;

        /*
         * If the encoded PID is 0, then the timer is targeted at current
         * or the process to which current belongs.
         */
        if (upid == 0)
                return thread ? task_pid(current) : task_tgid(current);

        pid = find_vpid(upid);
        if (!pid)
                return NULL;

        if (thread) {
                struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
                return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
        }

        /*
         * For clock_gettime(PROCESS) allow finding the process by
         * with the pid of the current task.  The code needs the tgid
         * of the process so that pid_task(pid, PIDTYPE_TGID) can be
         * used to find the process.
         */
        if (gettime && (pid == task_pid(current)))
                return task_tgid(current);

        /*
         * For processes require that pid identifies a process.
         */
        return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
}

static inline int validate_clock_permissions(const clockid_t clock)
{
        int ret;

        rcu_read_lock();
        ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
        rcu_read_unlock();

        return ret;
}

static inline enum pid_type clock_pid_type(const clockid_t clock)
{
        return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
}

static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
{
        return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
}

/*
 * Update expiry time from increment, and increase overrun count,
 * given the current clock sample.
 */
static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
{
        u64 delta, incr, expires = timer->it.cpu.node.expires;
        int i;

        if (!timer->it_interval)
                return expires;

        if (now < expires)
                return expires;

        incr = timer->it_interval;
        delta = now + incr - expires;

        /* Don't use (incr*2 < delta), incr*2 might overflow. */
        for (i = 0; incr < delta - incr; i++)
                incr = incr << 1;

        for (; i >= 0; incr >>= 1, i--) {
                if (delta < incr)
                        continue;

                timer->it.cpu.node.expires += incr;
                timer->it_overrun += 1LL << i;
                delta -= incr;
        }
        return timer->it.cpu.node.expires;
}

/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
{
        return !(~pct->bases[CPUCLOCK_PROF].nextevt |
                 ~pct->bases[CPUCLOCK_VIRT].nextevt |
                 ~pct->bases[CPUCLOCK_SCHED].nextevt);
}

static int
posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
{
        int error = validate_clock_permissions(which_clock);

        if (!error) {
                tp->tv_sec = 0;
                tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
                if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
                        /*
                         * If sched_clock is using a cycle counter, we
                         * don't have any idea of its true resolution
                         * exported, but it is much more than 1s/HZ.
                         */
                        tp->tv_nsec = 1;
                }
        }
        return error;
}

static int
posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
{
        int error = validate_clock_permissions(clock);

        /*
         * You can never reset a CPU clock, but we check for other errors
         * in the call before failing with EPERM.
         */
        return error ? : -EPERM;
}

/*
 * Sample a per-thread clock for the given task. clkid is validated.
 */
static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
{
        u64 utime, stime;

        if (clkid == CPUCLOCK_SCHED)
                return task_sched_runtime(p);

        task_cputime(p, &utime, &stime);

        switch (clkid) {
        case CPUCLOCK_PROF:
                return utime + stime;
        case CPUCLOCK_VIRT:
                return utime;
        default:
                WARN_ON_ONCE(1);
        }
        return 0;
}

static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
{
        samples[CPUCLOCK_PROF] = stime + utime;
        samples[CPUCLOCK_VIRT] = utime;
        samples[CPUCLOCK_SCHED] = rtime;
}

static void task_sample_cputime(struct task_struct *p, u64 *samples)
{
        u64 stime, utime;

        task_cputime(p, &utime, &stime);
        store_samples(samples, stime, utime, p->se.sum_exec_runtime);
}

static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
                                       u64 *samples)
{
        u64 stime, utime, rtime;

        utime = atomic64_read(&at->utime);
        stime = atomic64_read(&at->stime);
        rtime = atomic64_read(&at->sum_exec_runtime);
        store_samples(samples, stime, utime, rtime);
}

/*
 * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
 * to avoid race conditions with concurrent updates to cputime.
 */
static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
{
        u64 curr_cputime = atomic64_read(cputime);

        do {
                if (sum_cputime <= curr_cputime)
                        return;
        } while (!atomic64_try_cmpxchg(cputime, &curr_cputime, sum_cputime));
}

static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
                              struct task_cputime *sum)
{
        __update_gt_cputime(&cputime_atomic->utime, sum->utime);
        __update_gt_cputime(&cputime_atomic->stime, sum->stime);
        __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
}

/**
 * thread_group_sample_cputime - Sample cputime for a given task
 * @tsk:        Task for which cputime needs to be started
 * @samples:        Storage for time samples
 *
 * Called from sys_getitimer() to calculate the expiry time of an active
 * timer. That means group cputime accounting is already active. Called
 * with task sighand lock held.
 *
 * Updates @times with an uptodate sample of the thread group cputimes.
 */
void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
{
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
        struct posix_cputimers *pct = &tsk->signal->posix_cputimers;

        WARN_ON_ONCE(!pct->timers_active);

        proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
}

/**
 * thread_group_start_cputime - Start cputime and return a sample
 * @tsk:        Task for which cputime needs to be started
 * @samples:        Storage for time samples
 *
 * The thread group cputime accounting is avoided when there are no posix
 * CPU timers armed. Before starting a timer it's required to check whether
 * the time accounting is active. If not, a full update of the atomic
 * accounting store needs to be done and the accounting enabled.
 *
 * Updates @times with an uptodate sample of the thread group cputimes.
 */
static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
{
        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
        struct posix_cputimers *pct = &tsk->signal->posix_cputimers;

        lockdep_assert_task_sighand_held(tsk);

        /* Check if cputimer isn't running. This is accessed without locking. */
        if (!READ_ONCE(pct->timers_active)) {
                struct task_cputime sum;

                /*
                 * The POSIX timer interface allows for absolute time expiry
                 * values through the TIMER_ABSTIME flag, therefore we have
                 * to synchronize the timer to the clock every time we start it.
                 */
                thread_group_cputime(tsk, &sum);
                update_gt_cputime(&cputimer->cputime_atomic, &sum);

                /*
                 * We're setting timers_active without a lock. Ensure this
                 * only gets written to in one operation. We set it after
                 * update_gt_cputime() as a small optimization, but
                 * barriers are not required because update_gt_cputime()
                 * can handle concurrent updates.
                 */
                WRITE_ONCE(pct->timers_active, true);
        }
        proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
}

static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
{
        struct task_cputime ct;

        thread_group_cputime(tsk, &ct);
        store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
}

/*
 * Sample a process (thread group) clock for the given task clkid. If the
 * group's cputime accounting is already enabled, read the atomic
 * store. Otherwise a full update is required.  clkid is already validated.
 */
static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
                                  bool start)
{
        struct thread_group_cputimer *cputimer = &p->signal->cputimer;
        struct posix_cputimers *pct = &p->signal->posix_cputimers;
        u64 samples[CPUCLOCK_MAX];

        if (!READ_ONCE(pct->timers_active)) {
                if (start)
                        thread_group_start_cputime(p, samples);
                else
                        __thread_group_cputime(p, samples);
        } else {
                proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
        }

        return samples[clkid];
}

static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
{
        const clockid_t clkid = CPUCLOCK_WHICH(clock);
        struct task_struct *tsk;
        u64 t;

        rcu_read_lock();
        tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
        if (!tsk) {
                rcu_read_unlock();
                return -EINVAL;
        }

        if (CPUCLOCK_PERTHREAD(clock))
                t = cpu_clock_sample(clkid, tsk);
        else
                t = cpu_clock_sample_group(clkid, tsk, false);
        rcu_read_unlock();

        *tp = ns_to_timespec64(t);
        return 0;
}

/*
 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
 * new timer already all-zeros initialized.
 */
static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
        static struct lock_class_key posix_cpu_timers_key;
        struct pid *pid;

        rcu_read_lock();
        pid = pid_for_clock(new_timer->it_clock, false);
        if (!pid) {
                rcu_read_unlock();
                return -EINVAL;
        }

        /*
         * If posix timer expiry is handled in task work context then
         * timer::it_lock can be taken without disabling interrupts as all
         * other locking happens in task context. This requires a separate
         * lock class key otherwise regular posix timer expiry would record
         * the lock class being taken in interrupt context and generate a
         * false positive warning.
         */
        if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
                lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);

        new_timer->kclock = &clock_posix_cpu;
        timerqueue_init(&new_timer->it.cpu.node);
        new_timer->it.cpu.pid = get_pid(pid);
        rcu_read_unlock();
        return 0;
}

static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
                                              struct task_struct *tsk)
{
        int clkidx = CPUCLOCK_WHICH(timer->it_clock);

        if (CPUCLOCK_PERTHREAD(timer->it_clock))
                return tsk->posix_cputimers.bases + clkidx;
        else
                return tsk->signal->posix_cputimers.bases + clkidx;
}

/*
 * Force recalculating the base earliest expiration on the next tick.
 * This will also re-evaluate the need to keep around the process wide
 * cputime counter and tick dependency and eventually shut these down
 * if necessary.
 */
static void trigger_base_recalc_expires(struct k_itimer *timer,
                                        struct task_struct *tsk)
{
        struct posix_cputimer_base *base = timer_base(timer, tsk);

        base->nextevt = 0;
}

/*
 * Dequeue the timer and reset the base if it was its earliest expiration.
 * It makes sure the next tick recalculates the base next expiration so we
 * don't keep the costly process wide cputime counter around for a random
 * amount of time, along with the tick dependency.
 *
 * If another timer gets queued between this and the next tick, its
 * expiration will update the base next event if necessary on the next
 * tick.
 */
static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
{
        struct cpu_timer *ctmr = &timer->it.cpu;
        struct posix_cputimer_base *base;

        if (!cpu_timer_dequeue(ctmr))
                return;

        base = timer_base(timer, p);
        if (cpu_timer_getexpires(ctmr) == base->nextevt)
                trigger_base_recalc_expires(timer, p);
}


/*
 * Clean up a CPU-clock timer that is about to be destroyed.
 * This is called from timer deletion with the timer already locked.
 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 * and try again.  (This happens when the timer is in the middle of firing.)
 */
static int posix_cpu_timer_del(struct k_itimer *timer)
{
        struct cpu_timer *ctmr = &timer->it.cpu;
        struct sighand_struct *sighand;
        struct task_struct *p;
        unsigned long flags;
        int ret = 0;

        rcu_read_lock();
        p = cpu_timer_task_rcu(timer);
        if (!p)
                goto out;

        /*
         * Protect against sighand release/switch in exit/exec and process/
         * thread timer list entry concurrent read/writes.
         */
        sighand = lock_task_sighand(p, &flags);
        if (unlikely(sighand == NULL)) {
                /*
                 * This raced with the reaping of the task. The exit cleanup
                 * should have removed this timer from the timer queue.
                 */
                WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
        } else {
                if (timer->it.cpu.firing)
                        ret = TIMER_RETRY;
                else
                        disarm_timer(timer, p);

                unlock_task_sighand(p, &flags);
        }

out:
        rcu_read_unlock();
        if (!ret)
                put_pid(ctmr->pid);

        return ret;
}

static void cleanup_timerqueue(struct timerqueue_head *head)
{
        struct timerqueue_node *node;
        struct cpu_timer *ctmr;

        while ((node = timerqueue_getnext(head))) {
                timerqueue_del(head, node);
                ctmr = container_of(node, struct cpu_timer, node);
                ctmr->head = NULL;
        }
}

/*
 * Clean out CPU timers which are still armed when a thread exits. The
 * timers are only removed from the list. No other updates are done. The
 * corresponding posix timers are still accessible, but cannot be rearmed.
 *
 * This must be called with the siglock held.
 */
static void cleanup_timers(struct posix_cputimers *pct)
{
        cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
        cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
        cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
}

/*
 * These are both called with the siglock held, when the current thread
 * is being reaped.  When the final (leader) thread in the group is reaped,
 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
 */
void posix_cpu_timers_exit(struct task_struct *tsk)
{
        cleanup_timers(&tsk->posix_cputimers);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
        cleanup_timers(&tsk->signal->posix_cputimers);
}

/*
 * Insert the timer on the appropriate list before any timers that
 * expire later.  This must be called with the sighand lock held.
 */
static void arm_timer(struct k_itimer *timer, struct task_struct *p)
{
        struct posix_cputimer_base *base = timer_base(timer, p);
        struct cpu_timer *ctmr = &timer->it.cpu;
        u64 newexp = cpu_timer_getexpires(ctmr);

        if (!cpu_timer_enqueue(&base->tqhead, ctmr))
                return;

        /*
         * We are the new earliest-expiring POSIX 1.b timer, hence
         * need to update expiration cache. Take into account that
         * for process timers we share expiration cache with itimers
         * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
         */
        if (newexp < base->nextevt)
                base->nextevt = newexp;

        if (CPUCLOCK_PERTHREAD(timer->it_clock))
                tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
        else
                tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
}

/*
 * The timer is locked, fire it and arrange for its reload.
 */
static void cpu_timer_fire(struct k_itimer *timer)
{
        struct cpu_timer *ctmr = &timer->it.cpu;

        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
                /*
                 * User don't want any signal.
                 */
                cpu_timer_setexpires(ctmr, 0);
        } else if (unlikely(timer->sigq == NULL)) {
                /*
                 * This a special case for clock_nanosleep,
                 * not a normal timer from sys_timer_create.
                 */
                wake_up_process(timer->it_process);
                cpu_timer_setexpires(ctmr, 0);
        } else if (!timer->it_interval) {
                /*
                 * One-shot timer.  Clear it as soon as it's fired.
                 */
                posix_timer_event(timer, 0);
                cpu_timer_setexpires(ctmr, 0);
        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
                /*
                 * The signal did not get queued because the signal
                 * was ignored, so we won't get any callback to
                 * reload the timer.  But we need to keep it
                 * ticking in case the signal is deliverable next time.
                 */
                posix_cpu_timer_rearm(timer);
                ++timer->it_requeue_pending;
        }
}

/*
 * Guts of sys_timer_settime for CPU timers.
 * This is called with the timer locked and interrupts disabled.
 * If we return TIMER_RETRY, it's necessary to release the timer's lock
 * and try again.  (This happens when the timer is in the middle of firing.)
 */
static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
                               struct itimerspec64 *new, struct itimerspec64 *old)
{
        clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
        u64 old_expires, new_expires, old_incr, val;
        struct cpu_timer *ctmr = &timer->it.cpu;
        struct sighand_struct *sighand;
        struct task_struct *p;
        unsigned long flags;
        int ret = 0;

        rcu_read_lock();
        p = cpu_timer_task_rcu(timer);
        if (!p) {
                /*
                 * If p has just been reaped, we can no
                 * longer get any information about it at all.
                 */
                rcu_read_unlock();
                return -ESRCH;
        }

        /*
         * Use the to_ktime conversion because that clamps the maximum
         * value to KTIME_MAX and avoid multiplication overflows.
         */
        new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));

        /*
         * Protect against sighand release/switch in exit/exec and p->cpu_timers
         * and p->signal->cpu_timers read/write in arm_timer()
         */
        sighand = lock_task_sighand(p, &flags);
        /*
         * If p has just been reaped, we can no
         * longer get any information about it at all.
         */
        if (unlikely(sighand == NULL)) {
                rcu_read_unlock();
                return -ESRCH;
        }

        /*
         * Disarm any old timer after extracting its expiry time.
         */
        old_incr = timer->it_interval;
        old_expires = cpu_timer_getexpires(ctmr);

        if (unlikely(timer->it.cpu.firing)) {
                timer->it.cpu.firing = -1;
                ret = TIMER_RETRY;
        } else {
                cpu_timer_dequeue(ctmr);
        }

        /*
         * We need to sample the current value to convert the new
         * value from to relative and absolute, and to convert the
         * old value from absolute to relative.  To set a process
         * timer, we need a sample to balance the thread expiry
         * times (in arm_timer).  With an absolute time, we must
         * check if it's already passed.  In short, we need a sample.
         */
        if (CPUCLOCK_PERTHREAD(timer->it_clock))
                val = cpu_clock_sample(clkid, p);
        else
                val = cpu_clock_sample_group(clkid, p, true);

        if (old) {
                if (old_expires == 0) {
                        old->it_value.tv_sec = 0;
                        old->it_value.tv_nsec = 0;
                } else {
                        /*
                         * Update the timer in case it has overrun already.
                         * If it has, we'll report it as having overrun and
                         * with the next reloaded timer already ticking,
                         * though we are swallowing that pending
                         * notification here to install the new setting.
                         */
                        u64 exp = bump_cpu_timer(timer, val);

                        if (val < exp) {
                                old_expires = exp - val;
                                old->it_value = ns_to_timespec64(old_expires);
                        } else {
                                old->it_value.tv_nsec = 1;
                                old->it_value.tv_sec = 0;
                        }
                }
        }

        if (unlikely(ret)) {
                /*
                 * We are colliding with the timer actually firing.
                 * Punt after filling in the timer's old value, and
                 * disable this firing since we are already reporting
                 * it as an overrun (thanks to bump_cpu_timer above).
                 */
                unlock_task_sighand(p, &flags);
                goto out;
        }

        if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
                new_expires += val;
        }

        /*
         * Install the new expiry time (or zero).
         * For a timer with no notification action, we don't actually
         * arm the timer (we'll just fake it for timer_gettime).
         */
        cpu_timer_setexpires(ctmr, new_expires);
        if (new_expires != 0 && val < new_expires) {
                arm_timer(timer, p);
        }

        unlock_task_sighand(p, &flags);
        /*
         * Install the new reload setting, and
         * set up the signal and overrun bookkeeping.
         */
        timer->it_interval = timespec64_to_ktime(new->it_interval);

        /*
         * This acts as a modification timestamp for the timer,
         * so any automatic reload attempt will punt on seeing
         * that we have reset the timer manually.
         */
        timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
                ~REQUEUE_PENDING;
        timer->it_overrun_last = 0;
        timer->it_overrun = -1;

        if (val >= new_expires) {
                if (new_expires != 0) {
                        /*
                         * The designated time already passed, so we notify
                         * immediately, even if the thread never runs to
                         * accumulate more time on this clock.
                         */
                        cpu_timer_fire(timer);
                }

                /*
                 * Make sure we don't keep around the process wide cputime
                 * counter or the tick dependency if they are not necessary.
                 */
                sighand = lock_task_sighand(p, &flags);
                if (!sighand)
                        goto out;

                if (!cpu_timer_queued(ctmr))
                        trigger_base_recalc_expires(timer, p);

                unlock_task_sighand(p, &flags);
        }
 out:
        rcu_read_unlock();
        if (old)
                old->it_interval = ns_to_timespec64(old_incr);

        return ret;
}

static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
{
        clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
        struct cpu_timer *ctmr = &timer->it.cpu;
        u64 now, expires = cpu_timer_getexpires(ctmr);
        struct task_struct *p;

        rcu_read_lock();
        p = cpu_timer_task_rcu(timer);
        if (!p)
                goto out;

        /*
         * Easy part: convert the reload time.
         */
        itp->it_interval = ktime_to_timespec64(timer->it_interval);

        if (!expires)
                goto out;

        /*
         * Sample the clock to take the difference with the expiry time.
         */
        if (CPUCLOCK_PERTHREAD(timer->it_clock))
                now = cpu_clock_sample(clkid, p);
        else
                now = cpu_clock_sample_group(clkid, p, false);

        if (now < expires) {
                itp->it_value = ns_to_timespec64(expires - now);
        } else {
                /*
                 * The timer should have expired already, but the firing
                 * hasn't taken place yet.  Say it's just about to expire.
                 */
                itp->it_value.tv_nsec = 1;
                itp->it_value.tv_sec = 0;
        }
out:
        rcu_read_unlock();
}

#define MAX_COLLECTED        20

static u64 collect_timerqueue(struct timerqueue_head *head,
                              struct list_head *firing, u64 now)
{
        struct timerqueue_node *next;
        int i = 0;

        while ((next = timerqueue_getnext(head))) {
                struct cpu_timer *ctmr;
                u64 expires;

                ctmr = container_of(next, struct cpu_timer, node);
                expires = cpu_timer_getexpires(ctmr);
                /* Limit the number of timers to expire at once */
                if (++i == MAX_COLLECTED || now < expires)
                        return expires;

                ctmr->firing = 1;
                /* See posix_cpu_timer_wait_running() */
                rcu_assign_pointer(ctmr->handling, current);
                cpu_timer_dequeue(ctmr);
                list_add_tail(&ctmr->elist, firing);
        }

        return U64_MAX;
}

static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
                                    struct list_head *firing)
{
        struct posix_cputimer_base *base = pct->bases;
        int i;

        for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
                base->nextevt = collect_timerqueue(&base->tqhead, firing,
                                                    samples[i]);
        }
}

static inline void check_dl_overrun(struct task_struct *tsk)
{
        if (tsk->dl.dl_overrun) {
                tsk->dl.dl_overrun = 0;
                send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
        }
}

static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
{
        if (time < limit)
                return false;

        if (print_fatal_signals) {
                pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
                        rt ? "RT" : "CPU", hard ? "hard" : "soft",
                        current->comm, task_pid_nr(current));
        }
        send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID);
        return true;
}

/*
 * Check for any per-thread CPU timers that have fired and move them off
 * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
 */
static void check_thread_timers(struct task_struct *tsk,
                                struct list_head *firing)
{
        struct posix_cputimers *pct = &tsk->posix_cputimers;
        u64 samples[CPUCLOCK_MAX];
        unsigned long soft;

        if (dl_task(tsk))
                check_dl_overrun(tsk);

        if (expiry_cache_is_inactive(pct))
                return;

        task_sample_cputime(tsk, samples);
        collect_posix_cputimers(pct, samples, firing);

        /*
         * Check for the special case thread timers.
         */
        soft = task_rlimit(tsk, RLIMIT_RTTIME);
        if (soft != RLIM_INFINITY) {
                /* Task RT timeout is accounted in jiffies. RTTIME is usec */
                unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
                unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);

                /* At the hard limit, send SIGKILL. No further action. */
                if (hard != RLIM_INFINITY &&
                    check_rlimit(rttime, hard, SIGKILL, true, true))
                        return;

                /* At the soft limit, send a SIGXCPU every second */
                if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
                        soft += USEC_PER_SEC;
                        tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
                }
        }

        if (expiry_cache_is_inactive(pct))
                tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}

static inline void stop_process_timers(struct signal_struct *sig)
{
        struct posix_cputimers *pct = &sig->posix_cputimers;

        /* Turn off the active flag. This is done without locking. */
        WRITE_ONCE(pct->timers_active, false);
        tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
}

static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
                             u64 *expires, u64 cur_time, int signo)
{
        if (!it->expires)
                return;

        if (cur_time >= it->expires) {
                if (it->incr)
                        it->expires += it->incr;
                else
                        it->expires = 0;

                trace_itimer_expire(signo == SIGPROF ?
                                    ITIMER_PROF : ITIMER_VIRTUAL,
                                    task_tgid(tsk), cur_time);
                send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
        }

        if (it->expires && it->expires < *expires)
                *expires = it->expires;
}

/*
 * Check for any per-thread CPU timers that have fired and move them
 * off the tsk->*_timers list onto the firing list.  Per-thread timers
 * have already been taken off.
 */
static void check_process_timers(struct task_struct *tsk,
                                 struct list_head *firing)
{
        struct signal_struct *const sig = tsk->signal;
        struct posix_cputimers *pct = &sig->posix_cputimers;
        u64 samples[CPUCLOCK_MAX];
        unsigned long soft;

        /*
         * If there are no active process wide timers (POSIX 1.b, itimers,
         * RLIMIT_CPU) nothing to check. Also skip the process wide timer
         * processing when there is already another task handling them.
         */
        if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
                return;

        /*
         * Signify that a thread is checking for process timers.
         * Write access to this field is protected by the sighand lock.
         */
        pct->expiry_active = true;

        /*
         * Collect the current process totals. Group accounting is active
         * so the sample can be taken directly.
         */
        proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
        collect_posix_cputimers(pct, samples, firing);

        /*
         * Check for the special case process timers.
         */
        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
                         &pct->bases[CPUCLOCK_PROF].nextevt,
                         samples[CPUCLOCK_PROF], SIGPROF);
        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
                         &pct->bases[CPUCLOCK_VIRT].nextevt,
                         samples[CPUCLOCK_VIRT], SIGVTALRM);

        soft = task_rlimit(tsk, RLIMIT_CPU);
        if (soft != RLIM_INFINITY) {
                /* RLIMIT_CPU is in seconds. Samples are nanoseconds */
                unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
                u64 ptime = samples[CPUCLOCK_PROF];
                u64 softns = (u64)soft * NSEC_PER_SEC;
                u64 hardns = (u64)hard * NSEC_PER_SEC;

                /* At the hard limit, send SIGKILL. No further action. */
                if (hard != RLIM_INFINITY &&
                    check_rlimit(ptime, hardns, SIGKILL, false, true))
                        return;

                /* At the soft limit, send a SIGXCPU every second */
                if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
                        sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
                        softns += NSEC_PER_SEC;
                }

                /* Update the expiry cache */
                if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
                        pct->bases[CPUCLOCK_PROF].nextevt = softns;
        }

        if (expiry_cache_is_inactive(pct))
                stop_process_timers(sig);

        pct->expiry_active = false;
}

/*
 * This is called from the signal code (via posixtimer_rearm)
 * when the last timer signal was delivered and we have to reload the timer.
 */
static void posix_cpu_timer_rearm(struct k_itimer *timer)
{
        clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
        struct task_struct *p;
        struct sighand_struct *sighand;
        unsigned long flags;
        u64 now;

        rcu_read_lock();
        p = cpu_timer_task_rcu(timer);
        if (!p)
                goto out;

        /* Protect timer list r/w in arm_timer() */
        sighand = lock_task_sighand(p, &flags);
        if (unlikely(sighand == NULL))
                goto out;

        /*
         * Fetch the current sample and update the timer's expiry time.
         */
        if (CPUCLOCK_PERTHREAD(timer->it_clock))
                now = cpu_clock_sample(clkid, p);
        else
                now = cpu_clock_sample_group(clkid, p, true);

        bump_cpu_timer(timer, now);

        /*
         * Now re-arm for the new expiry time.
         */
        arm_timer(timer, p);
        unlock_task_sighand(p, &flags);
out:
        rcu_read_unlock();
}

/**
 * task_cputimers_expired - Check whether posix CPU timers are expired
 *
 * @samples:        Array of current samples for the CPUCLOCK clocks
 * @pct:        Pointer to a posix_cputimers container
 *
 * Returns true if any member of @samples is greater than the corresponding
 * member of @pct->bases[CLK].nextevt. False otherwise
 */
static inline bool
task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
{
        int i;

        for (i = 0; i < CPUCLOCK_MAX; i++) {
                if (samples[i] >= pct->bases[i].nextevt)
                        return true;
        }
        return false;
}

/**
 * fastpath_timer_check - POSIX CPU timers fast path.
 *
 * @tsk:        The task (thread) being checked.
 *
 * Check the task and thread group timers.  If both are zero (there are no
 * timers set) return false.  Otherwise snapshot the task and thread group
 * timers and compare them with the corresponding expiration times.  Return
 * true if a timer has expired, else return false.
 */
static inline bool fastpath_timer_check(struct task_struct *tsk)
{
        struct posix_cputimers *pct = &tsk->posix_cputimers;
        struct signal_struct *sig;

        if (!expiry_cache_is_inactive(pct)) {
                u64 samples[CPUCLOCK_MAX];

                task_sample_cputime(tsk, samples);
                if (task_cputimers_expired(samples, pct))
                        return true;
        }

        sig = tsk->signal;
        pct = &sig->posix_cputimers;
        /*
         * Check if thread group timers expired when timers are active and
         * no other thread in the group is already handling expiry for
         * thread group cputimers. These fields are read without the
         * sighand lock. However, this is fine because this is meant to be
         * a fastpath heuristic to determine whether we should try to
         * acquire the sighand lock to handle timer expiry.
         *
         * In the worst case scenario, if concurrently timers_active is set
         * or expiry_active is cleared, but the current thread doesn't see
         * the change yet, the timer checks are delayed until the next
         * thread in the group gets a scheduler interrupt to handle the
         * timer. This isn't an issue in practice because these types of
         * delays with signals actually getting sent are expected.
         */
        if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
                u64 samples[CPUCLOCK_MAX];

                proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
                                           samples);

                if (task_cputimers_expired(samples, pct))
                        return true;
        }

        if (dl_task(tsk) && tsk->dl.dl_overrun)
                return true;

        return false;
}

static void handle_posix_cpu_timers(struct task_struct *tsk);

#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
static void posix_cpu_timers_work(struct callback_head *work)
{
        struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);

        mutex_lock(&cw->mutex);
        handle_posix_cpu_timers(current);
        mutex_unlock(&cw->mutex);
}

/*
 * Invoked from the posix-timer core when a cancel operation failed because
 * the timer is marked firing. The caller holds rcu_read_lock(), which
 * protects the timer and the task which is expiring it from being freed.
 */
static void posix_cpu_timer_wait_running(struct k_itimer *timr)
{
        struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);

        /* Has the handling task completed expiry already? */
        if (!tsk)
                return;

        /* Ensure that the task cannot go away */
        get_task_struct(tsk);
        /* Now drop the RCU protection so the mutex can be locked */
        rcu_read_unlock();
        /* Wait on the expiry mutex */
        mutex_lock(&tsk->posix_cputimers_work.mutex);
        /* Release it immediately again. */
        mutex_unlock(&tsk->posix_cputimers_work.mutex);
        /* Drop the task reference. */
        put_task_struct(tsk);
        /* Relock RCU so the callsite is balanced */
        rcu_read_lock();
}

static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
{
        /* Ensure that timr->it.cpu.handling task cannot go away */
        rcu_read_lock();
        spin_unlock_irq(&timr->it_lock);
        posix_cpu_timer_wait_running(timr);
        rcu_read_unlock();
        /* @timr is on stack and is valid */
        spin_lock_irq(&timr->it_lock);
}

/*
 * Clear existing posix CPU timers task work.
 */
void clear_posix_cputimers_work(struct task_struct *p)
{
        /*
         * A copied work entry from the old task is not meaningful, clear it.
         * N.B. init_task_work will not do this.
         */
        memset(&p->posix_cputimers_work.work, 0,
               sizeof(p->posix_cputimers_work.work));
        init_task_work(&p->posix_cputimers_work.work,
                       posix_cpu_timers_work);
        mutex_init(&p->posix_cputimers_work.mutex);
        p->posix_cputimers_work.scheduled = false;
}

/*
 * Initialize posix CPU timers task work in init task. Out of line to
 * keep the callback static and to avoid header recursion hell.
 */
void __init posix_cputimers_init_work(void)
{
        clear_posix_cputimers_work(current);
}

/*
 * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
 * in hard interrupt context or in task context with interrupts
 * disabled. Aside of that the writer/reader interaction is always in the
 * context of the current task, which means they are strict per CPU.
 */
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
{
        return tsk->posix_cputimers_work.scheduled;
}

static inline void __run_posix_cpu_timers(struct task_struct *tsk)
{
        if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
                return;

        /* Schedule task work to actually expire the timers */
        tsk->posix_cputimers_work.scheduled = true;
        task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
}

static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
                                                unsigned long start)
{
        bool ret = true;

        /*
         * On !RT kernels interrupts are disabled while collecting expired
         * timers, so no tick can happen and the fast path check can be
         * reenabled without further checks.
         */
        if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
                tsk->posix_cputimers_work.scheduled = false;
                return true;
        }

        /*
         * On RT enabled kernels ticks can happen while the expired timers
         * are collected under sighand lock. But any tick which observes
         * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
         * checks. So reenabling the tick work has do be done carefully:
         *
         * Disable interrupts and run the fast path check if jiffies have
         * advanced since the collecting of expired timers started. If
         * jiffies have not advanced or the fast path check did not find
         * newly expired timers, reenable the fast path check in the timer
         * interrupt. If there are newly expired timers, return false and
         * let the collection loop repeat.
         */
        local_irq_disable();
        if (start != jiffies && fastpath_timer_check(tsk))
                ret = false;
        else
                tsk->posix_cputimers_work.scheduled = false;
        local_irq_enable();

        return ret;
}
#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
static inline void __run_posix_cpu_timers(struct task_struct *tsk)
{
        lockdep_posixtimer_enter();
        handle_posix_cpu_timers(tsk);
        lockdep_posixtimer_exit();
}

static void posix_cpu_timer_wait_running(struct k_itimer *timr)
{
        cpu_relax();
}

static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
{
        spin_unlock_irq(&timr->it_lock);
        cpu_relax();
        spin_lock_irq(&timr->it_lock);
}

static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
{
        return false;
}

static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
                                                unsigned long start)
{
        return true;
}
#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */

static void handle_posix_cpu_timers(struct task_struct *tsk)
{
        struct k_itimer *timer, *next;
        unsigned long flags, start;
        LIST_HEAD(firing);

        if (!lock_task_sighand(tsk, &flags))
                return;

        do {
                /*
                 * On RT locking sighand lock does not disable interrupts,
                 * so this needs to be careful vs. ticks. Store the current
                 * jiffies value.
                 */
                start = READ_ONCE(jiffies);
                barrier();

                /*
                 * Here we take off tsk->signal->cpu_timers[N] and
                 * tsk->cpu_timers[N] all the timers that are firing, and
                 * put them on the firing list.
                 */
                check_thread_timers(tsk, &firing);

                check_process_timers(tsk, &firing);

                /*
                 * The above timer checks have updated the expiry cache and
                 * because nothing can have queued or modified timers after
                 * sighand lock was taken above it is guaranteed to be
                 * consistent. So the next timer interrupt fastpath check
                 * will find valid data.
                 *
                 * If timer expiry runs in the timer interrupt context then
                 * the loop is not relevant as timers will be directly
                 * expired in interrupt context. The stub function below
                 * returns always true which allows the compiler to
                 * optimize the loop out.
                 *
                 * If timer expiry is deferred to task work context then
                 * the following rules apply:
                 *
                 * - On !RT kernels no tick can have happened on this CPU
                 *   after sighand lock was acquired because interrupts are
                 *   disabled. So reenabling task work before dropping
                 *   sighand lock and reenabling interrupts is race free.
                 *
                 * - On RT kernels ticks might have happened but the tick
                 *   work ignored posix CPU timer handling because the
                 *   CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
                 *   must be done very carefully including a check whether
                 *   ticks have happened since the start of the timer
                 *   expiry checks. posix_cpu_timers_enable_work() takes
                 *   care of that and eventually lets the expiry checks
                 *   run again.
                 */
        } while (!posix_cpu_timers_enable_work(tsk, start));

        /*
         * We must release sighand lock before taking any timer's lock.
         * There is a potential race with timer deletion here, as the
         * siglock now protects our private firing list.  We have set
         * the firing flag in each timer, so that a deletion attempt
         * that gets the timer lock before we do will give it up and
         * spin until we've taken care of that timer below.
         */
        unlock_task_sighand(tsk, &flags);

        /*
         * Now that all the timers on our list have the firing flag,
         * no one will touch their list entries but us.  We'll take
         * each timer's lock before clearing its firing flag, so no
         * timer call will interfere.
         */
        list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
                int cpu_firing;

                /*
                 * spin_lock() is sufficient here even independent of the
                 * expiry context. If expiry happens in hard interrupt
                 * context it's obvious. For task work context it's safe
                 * because all other operations on timer::it_lock happen in
                 * task context (syscall or exit).
                 */
                spin_lock(&timer->it_lock);
                list_del_init(&timer->it.cpu.elist);
                cpu_firing = timer->it.cpu.firing;
                timer->it.cpu.firing = 0;
                /*
                 * The firing flag is -1 if we collided with a reset
                 * of the timer, which already reported this
                 * almost-firing as an overrun.  So don't generate an event.
                 */
                if (likely(cpu_firing >= 0))
                        cpu_timer_fire(timer);
                /* See posix_cpu_timer_wait_running() */
                rcu_assign_pointer(timer->it.cpu.handling, NULL);
                spin_unlock(&timer->it_lock);
        }
}

/*
 * This is called from the timer interrupt handler.  The irq handler has
 * already updated our counts.  We need to check if any timers fire now.
 * Interrupts are disabled.
 */
void run_posix_cpu_timers(void)
{
        struct task_struct *tsk = current;

        lockdep_assert_irqs_disabled();

        /*
         * If the actual expiry is deferred to task work context and the
         * work is already scheduled there is no point to do anything here.
         */
        if (posix_cpu_timers_work_scheduled(tsk))
                return;

        /*
         * The fast path checks that there are no expired thread or thread
         * group timers.  If that's so, just return.
         */
        if (!fastpath_timer_check(tsk))
                return;

        __run_posix_cpu_timers(tsk);
}

/*
 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
 * The tsk->sighand->siglock must be held by the caller.
 */
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
                           u64 *newval, u64 *oldval)
{
        u64 now, *nextevt;

        if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
                return;

        nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
        now = cpu_clock_sample_group(clkid, tsk, true);

        if (oldval) {
                /*
                 * We are setting itimer. The *oldval is absolute and we update
                 * it to be relative, *newval argument is relative and we update
                 * it to be absolute.
                 */
                if (*oldval) {
                        if (*oldval <= now) {
                                /* Just about to fire. */
                                *oldval = TICK_NSEC;
                        } else {
                                *oldval -= now;
                        }
                }

                if (*newval)
                        *newval += now;
        }

        /*
         * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
         * expiry cache is also used by RLIMIT_CPU!.
         */
        if (*newval < *nextevt)
                *nextevt = *newval;

        tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
}

static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
                            const struct timespec64 *rqtp)
{
        struct itimerspec64 it;
        struct k_itimer timer;
        u64 expires;
        int error;

        /*
         * Set up a temporary timer and then wait for it to go off.
         */
        memset(&timer, 0, sizeof timer);
        spin_lock_init(&timer.it_lock);
        timer.it_clock = which_clock;
        timer.it_overrun = -1;
        error = posix_cpu_timer_create(&timer);
        timer.it_process = current;

        if (!error) {
                static struct itimerspec64 zero_it;
                struct restart_block *restart;

                memset(&it, 0, sizeof(it));
                it.it_value = *rqtp;

                spin_lock_irq(&timer.it_lock);
                error = posix_cpu_timer_set(&timer, flags, &it, NULL);
                if (error) {
                        spin_unlock_irq(&timer.it_lock);
                        return error;
                }

                while (!signal_pending(current)) {
                        if (!cpu_timer_getexpires(&timer.it.cpu)) {
                                /*
                                 * Our timer fired and was reset, below
                                 * deletion can not fail.
                                 */
                                posix_cpu_timer_del(&timer);
                                spin_unlock_irq(&timer.it_lock);
                                return 0;
                        }

                        /*
                         * Block until cpu_timer_fire (or a signal) wakes us.
                         */
                        __set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&timer.it_lock);
                        schedule();
                        spin_lock_irq(&timer.it_lock);
                }

                /*
                 * We were interrupted by a signal.
                 */
                expires = cpu_timer_getexpires(&timer.it.cpu);
                error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
                if (!error) {
                        /* Timer is now unarmed, deletion can not fail. */
                        posix_cpu_timer_del(&timer);
                } else {
                        while (error == TIMER_RETRY) {
                                posix_cpu_timer_wait_running_nsleep(&timer);
                                error = posix_cpu_timer_del(&timer);
                        }
                }

                spin_unlock_irq(&timer.it_lock);

                if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
                        /*
                         * It actually did fire already.
                         */
                        return 0;
                }

                error = -ERESTART_RESTARTBLOCK;
                /*
                 * Report back to the user the time still remaining.
                 */
                restart = &current->restart_block;
                restart->nanosleep.expires = expires;
                if (restart->nanosleep.type != TT_NONE)
                        error = nanosleep_copyout(restart, &it.it_value);
        }

        return error;
}

static long posix_cpu_nsleep_restart(struct restart_block *restart_block);

static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
                            const struct timespec64 *rqtp)
{
        struct restart_block *restart_block = &current->restart_block;
        int error;

        /*
         * Diagnose required errors first.
         */
        if (CPUCLOCK_PERTHREAD(which_clock) &&
            (CPUCLOCK_PID(which_clock) == 0 ||
             CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
                return -EINVAL;

        error = do_cpu_nanosleep(which_clock, flags, rqtp);

        if (error == -ERESTART_RESTARTBLOCK) {

                if (flags & TIMER_ABSTIME)
                        return -ERESTARTNOHAND;

                restart_block->nanosleep.clockid = which_clock;
                set_restart_fn(restart_block, posix_cpu_nsleep_restart);
        }
        return error;
}

static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
{
        clockid_t which_clock = restart_block->nanosleep.clockid;
        struct timespec64 t;

        t = ns_to_timespec64(restart_block->nanosleep.expires);

        return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
}

#define PROCESS_CLOCK        make_process_cpuclock(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK        make_thread_cpuclock(0, CPUCLOCK_SCHED)

static int process_cpu_clock_getres(const clockid_t which_clock,
                                    struct timespec64 *tp)
{
        return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
}
static int process_cpu_clock_get(const clockid_t which_clock,
                                 struct timespec64 *tp)
{
        return posix_cpu_clock_get(PROCESS_CLOCK, tp);
}
static int process_cpu_timer_create(struct k_itimer *timer)
{
        timer->it_clock = PROCESS_CLOCK;
        return posix_cpu_timer_create(timer);
}
static int process_cpu_nsleep(const clockid_t which_clock, int flags,
                              const struct timespec64 *rqtp)
{
        return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
}
static int thread_cpu_clock_getres(const clockid_t which_clock,
                                   struct timespec64 *tp)
{
        return posix_cpu_clock_getres(THREAD_CLOCK, tp);
}
static int thread_cpu_clock_get(const clockid_t which_clock,
                                struct timespec64 *tp)
{
        return posix_cpu_clock_get(THREAD_CLOCK, tp);
}
static int thread_cpu_timer_create(struct k_itimer *timer)
{
        timer->it_clock = THREAD_CLOCK;
        return posix_cpu_timer_create(timer);
}

const struct k_clock clock_posix_cpu = {
        .clock_getres                = posix_cpu_clock_getres,
        .clock_set                = posix_cpu_clock_set,
        .clock_get_timespec        = posix_cpu_clock_get,
        .timer_create                = posix_cpu_timer_create,
        .nsleep                        = posix_cpu_nsleep,
        .timer_set                = posix_cpu_timer_set,
        .timer_del                = posix_cpu_timer_del,
        .timer_get                = posix_cpu_timer_get,
        .timer_rearm                = posix_cpu_timer_rearm,
        .timer_wait_running        = posix_cpu_timer_wait_running,
};

const struct k_clock clock_process = {
        .clock_getres                = process_cpu_clock_getres,
        .clock_get_timespec        = process_cpu_clock_get,
        .timer_create                = process_cpu_timer_create,
        .nsleep                        = process_cpu_nsleep,
};

const struct k_clock clock_thread = {
        .clock_getres                = thread_cpu_clock_getres,
        .clock_get_timespec        = thread_cpu_clock_get,
        .timer_create                = thread_cpu_timer_create,
};
















































































































































































































   54 







   54 

   54 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
// SPDX-License-Identifier: GPL-2.0
/*
 * This file contains the procedures for the handling of select and poll
 *
 * Created for Linux based loosely upon Mathius Lattner's minix
 * patches by Peter MacDonald. Heavily edited by Linus.
 *
 *  4 February 1994
 *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
 *     flag set in its personality we do *not* modify the given timeout
 *     parameter to reflect time remaining.
 *
 *  24 January 2000
 *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
 *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
 */

#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/personality.h> /* for STICKY_TIMEOUTS */
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
#include <linux/freezer.h>
#include <net/busy_poll.h>
#include <linux/vmalloc.h>

#include <linux/uaccess.h>


/*
 * Estimate expected accuracy in ns from a timeval.
 *
 * After quite a bit of churning around, we've settled on
 * a simple thing of taking 0.1% of the timeout as the
 * slack, with a cap of 100 msec.
 * "nice" tasks get a 0.5% slack instead.
 *
 * Consider this comment an open invitation to come up with even
 * better solutions..
 */

#define MAX_SLACK        (100 * NSEC_PER_MSEC)

static long __estimate_accuracy(struct timespec64 *tv)
{
        long slack;
        int divfactor = 1000;

        if (tv->tv_sec < 0)
                return 0;

        if (task_nice(current) > 0)
                divfactor = divfactor / 5;

        if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
                return MAX_SLACK;

        slack = tv->tv_nsec / divfactor;
        slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);

        if (slack > MAX_SLACK)
                return MAX_SLACK;

        return slack;
}

u64 select_estimate_accuracy(struct timespec64 *tv)
{
        u64 ret;
        struct timespec64 now;

        /*
         * Realtime tasks get a slack of 0 for obvious reasons.
         */

        if (rt_task(current))
                return 0;

        ktime_get_ts64(&now);
        now = timespec64_sub(*tv, now);
        ret = __estimate_accuracy(&now);
        if (ret < current->timer_slack_ns)
                return current->timer_slack_ns;
        return ret;
}



struct poll_table_page {
        struct poll_table_page * next;
        struct poll_table_entry * entry;
        struct poll_table_entry entries[];
};

#define POLL_TABLE_FULL(table) \
        ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))

/*
 * Ok, Peter made a complicated, but straightforward multiple_wait() function.
 * I have rewritten this, taking some shortcuts: This code may not be easy to
 * follow, but it should be free of race-conditions, and it's practical. If you
 * understand what I'm doing here, then you understand how the linux
 * sleep/wakeup mechanism works.
 *
 * Two very simple procedures, poll_wait() and poll_freewait() make all the
 * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
 * as all select/poll functions have to call it to add an entry to the
 * poll table.
 */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
                       poll_table *p);

void poll_initwait(struct poll_wqueues *pwq)
{
        init_poll_funcptr(&pwq->pt, __pollwait);
        pwq->polling_task = current;
        pwq->triggered = 0;
        pwq->error = 0;
        pwq->table = NULL;
        pwq->inline_index = 0;
}
EXPORT_SYMBOL(poll_initwait);

static void free_poll_entry(struct poll_table_entry *entry)
{
        remove_wait_queue(entry->wait_address, &entry->wait);
        fput(entry->filp);
}

void poll_freewait(struct poll_wqueues *pwq)
{
        struct poll_table_page * p = pwq->table;
        int i;
        for (i = 0; i < pwq->inline_index; i++)
                free_poll_entry(pwq->inline_entries + i);
        while (p) {
                struct poll_table_entry * entry;
                struct poll_table_page *old;

                entry = p->entry;
                do {
                        entry--;
                        free_poll_entry(entry);
                } while (entry > p->entries);
                old = p;
                p = p->next;
                free_page((unsigned long) old);
        }
}
EXPORT_SYMBOL(poll_freewait);

static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
{
        struct poll_table_page *table = p->table;

        if (p->inline_index < N_INLINE_POLL_ENTRIES)
                return p->inline_entries + p->inline_index++;

        if (!table || POLL_TABLE_FULL(table)) {
                struct poll_table_page *new_table;

                new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
                if (!new_table) {
                        p->error = -ENOMEM;
                        return NULL;
                }
                new_table->entry = new_table->entries;
                new_table->next = table;
                p->table = new_table;
                table = new_table;
        }

        return table->entry++;
}

static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
        struct poll_wqueues *pwq = wait->private;
        DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);

        /*
         * Although this function is called under waitqueue lock, LOCK
         * doesn't imply write barrier and the users expect write
         * barrier semantics on wakeup functions.  The following
         * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
         * and is paired with smp_store_mb() in poll_schedule_timeout.
         */
        smp_wmb();
        pwq->triggered = 1;

        /*
         * Perform the default wake up operation using a dummy
         * waitqueue.
         *
         * TODO: This is hacky but there currently is no interface to
         * pass in @sync.  @sync is scheduled to be removed and once
         * that happens, wake_up_process() can be used directly.
         */
        return default_wake_function(&dummy_wait, mode, sync, key);
}

static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
        struct poll_table_entry *entry;

        entry = container_of(wait, struct poll_table_entry, wait);
        if (key && !(key_to_poll(key) & entry->key))
                return 0;
        return __pollwake(wait, mode, sync, key);
}

/* Add a new entry */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
                                poll_table *p)
{
        struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
        struct poll_table_entry *entry = poll_get_entry(pwq);
        if (!entry)
                return;
        entry->filp = get_file(filp);
        entry->wait_address = wait_address;
        entry->key = p->_key;
        init_waitqueue_func_entry(&entry->wait, pollwake);
        entry->wait.private = pwq;
        add_wait_queue(wait_address, &entry->wait);
}

static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
                          ktime_t *expires, unsigned long slack)
{
        int rc = -EINTR;

        set_current_state(state);
        if (!pwq->triggered)
                rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
        __set_current_state(TASK_RUNNING);

        /*
         * Prepare for the next iteration.
         *
         * The following smp_store_mb() serves two purposes.  First, it's
         * the counterpart rmb of the wmb in pollwake() such that data
         * written before wake up is always visible after wake up.
         * Second, the full barrier guarantees that triggered clearing
         * doesn't pass event check of the next iteration.  Note that
         * this problem doesn't exist for the first iteration as
         * add_wait_queue() has full barrier semantics.
         */
        smp_store_mb(pwq->triggered, 0);

        return rc;
}

/**
 * poll_select_set_timeout - helper function to setup the timeout value
 * @to:                pointer to timespec64 variable for the final timeout
 * @sec:        seconds (from user space)
 * @nsec:        nanoseconds (from user space)
 *
 * Note, we do not use a timespec for the user space value here, That
 * way we can use the function for timeval and compat interfaces as well.
 *
 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
 */
int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
{
        struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};

        if (!timespec64_valid(&ts))
                return -EINVAL;

        /* Optimize for the zero timeout value here */
        if (!sec && !nsec) {
                to->tv_sec = to->tv_nsec = 0;
        } else {
                ktime_get_ts64(to);
                *to = timespec64_add_safe(*to, ts);
        }
        return 0;
}

enum poll_time_type {
        PT_TIMEVAL = 0,
        PT_OLD_TIMEVAL = 1,
        PT_TIMESPEC = 2,
        PT_OLD_TIMESPEC = 3,
};

static int poll_select_finish(struct timespec64 *end_time,
                              void __user *p,
                              enum poll_time_type pt_type, int ret)
{
        struct timespec64 rts;

        restore_saved_sigmask_unless(ret == -ERESTARTNOHAND);

        if (!p)
                return ret;

        if (current->personality & STICKY_TIMEOUTS)
                goto sticky;

        /* No update for zero timeout */
        if (!end_time->tv_sec && !end_time->tv_nsec)
                return ret;

        ktime_get_ts64(&rts);
        rts = timespec64_sub(*end_time, rts);
        if (rts.tv_sec < 0)
                rts.tv_sec = rts.tv_nsec = 0;


        switch (pt_type) {
        case PT_TIMEVAL:
                {
                        struct __kernel_old_timeval rtv;

                        if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
                                memset(&rtv, 0, sizeof(rtv));
                        rtv.tv_sec = rts.tv_sec;
                        rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
                        if (!copy_to_user(p, &rtv, sizeof(rtv)))
                                return ret;
                }
                break;
        case PT_OLD_TIMEVAL:
                {
                        struct old_timeval32 rtv;

                        rtv.tv_sec = rts.tv_sec;
                        rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
                        if (!copy_to_user(p, &rtv, sizeof(rtv)))
                                return ret;
                }
                break;
        case PT_TIMESPEC:
                if (!put_timespec64(&rts, p))
                        return ret;
                break;
        case PT_OLD_TIMESPEC:
                if (!put_old_timespec32(&rts, p))
                        return ret;
                break;
        default:
                BUG();
        }
        /*
         * If an application puts its timeval in read-only memory, we
         * don't want the Linux-specific update to the timeval to
         * cause a fault after the select has completed
         * successfully. However, because we're not updating the
         * timeval, we can't restart the system call.
         */

sticky:
        if (ret == -ERESTARTNOHAND)
                ret = -EINTR;
        return ret;
}

/*
 * Scalable version of the fd_set.
 */

typedef struct {
        unsigned long *in, *out, *ex;
        unsigned long *res_in, *res_out, *res_ex;
} fd_set_bits;

/*
 * How many longwords for "nr" bits?
 */
#define FDS_BITPERLONG        (8*sizeof(long))
#define FDS_LONGS(nr)        (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
#define FDS_BYTES(nr)        (FDS_LONGS(nr)*sizeof(long))

/*
 * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
 */
static inline
int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
{
        nr = FDS_BYTES(nr);
        if (ufdset)
                return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;

        memset(fdset, 0, nr);
        return 0;
}

static inline unsigned long __must_check
set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
{
        if (ufdset)
                return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
        return 0;
}

static inline
void zero_fd_set(unsigned long nr, unsigned long *fdset)
{
        memset(fdset, 0, FDS_BYTES(nr));
}

#define FDS_IN(fds, n)                (fds->in + n)
#define FDS_OUT(fds, n)                (fds->out + n)
#define FDS_EX(fds, n)                (fds->ex + n)

#define BITS(fds, n)        (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))

static int max_select_fd(unsigned long n, fd_set_bits *fds)
{
        unsigned long *open_fds;
        unsigned long set;
        int max;
        struct fdtable *fdt;

        /* handle last in-complete long-word first */
        set = ~(~0UL << (n & (BITS_PER_LONG-1)));
        n /= BITS_PER_LONG;
        fdt = files_fdtable(current->files);
        open_fds = fdt->open_fds + n;
        max = 0;
        if (set) {
                set &= BITS(fds, n);
                if (set) {
                        if (!(set & ~*open_fds))
                                goto get_max;
                        return -EBADF;
                }
        }
        while (n) {
                open_fds--;
                n--;
                set = BITS(fds, n);
                if (!set)
                        continue;
                if (set & ~*open_fds)
                        return -EBADF;
                if (max)
                        continue;
get_max:
                do {
                        max++;
                        set >>= 1;
                } while (set);
                max += n * BITS_PER_LONG;
        }

        return max;
}

#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
                        EPOLLNVAL)
#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
                         EPOLLNVAL)
#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)

static inline void wait_key_set(poll_table *wait, unsigned long in,
                                unsigned long out, unsigned long bit,
                                __poll_t ll_flag)
{
        wait->_key = POLLEX_SET | ll_flag;
        if (in & bit)
                wait->_key |= POLLIN_SET;
        if (out & bit)
                wait->_key |= POLLOUT_SET;
}

static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
{
        ktime_t expire, *to = NULL;
        struct poll_wqueues table;
        poll_table *wait;
        int retval, i, timed_out = 0;
        u64 slack = 0;
        __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
        unsigned long busy_start = 0;

        rcu_read_lock();
        retval = max_select_fd(n, fds);
        rcu_read_unlock();

        if (retval < 0)
                return retval;
        n = retval;

        poll_initwait(&table);
        wait = &table.pt;
        if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
                wait->_qproc = NULL;
                timed_out = 1;
        }

        if (end_time && !timed_out)
                slack = select_estimate_accuracy(end_time);

        retval = 0;
        for (;;) {
                unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
                bool can_busy_loop = false;

                inp = fds->in; outp = fds->out; exp = fds->ex;
                rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;

                for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
                        unsigned long in, out, ex, all_bits, bit = 1, j;
                        unsigned long res_in = 0, res_out = 0, res_ex = 0;
                        __poll_t mask;

                        in = *inp++; out = *outp++; ex = *exp++;
                        all_bits = in | out | ex;
                        if (all_bits == 0) {
                                i += BITS_PER_LONG;
                                continue;
                        }

                        for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
                                struct fd f;
                                if (i >= n)
                                        break;
                                if (!(bit & all_bits))
                                        continue;
                                mask = EPOLLNVAL;
                                f = fdget(i);
                                if (f.file) {
                                        wait_key_set(wait, in, out, bit,
                                                     busy_flag);
                                        mask = vfs_poll(f.file, wait);

                                        fdput(f);
                                }
                                if ((mask & POLLIN_SET) && (in & bit)) {
                                        res_in |= bit;
                                        retval++;
                                        wait->_qproc = NULL;
                                }
                                if ((mask & POLLOUT_SET) && (out & bit)) {
                                        res_out |= bit;
                                        retval++;
                                        wait->_qproc = NULL;
                                }
                                if ((mask & POLLEX_SET) && (ex & bit)) {
                                        res_ex |= bit;
                                        retval++;
                                        wait->_qproc = NULL;
                                }
                                /* got something, stop busy polling */
                                if (retval) {
                                        can_busy_loop = false;
                                        busy_flag = 0;

                                /*
                                 * only remember a returned
                                 * POLL_BUSY_LOOP if we asked for it
                                 */
                                } else if (busy_flag & mask)
                                        can_busy_loop = true;

                        }
                        if (res_in)
                                *rinp = res_in;
                        if (res_out)
                                *routp = res_out;
                        if (res_ex)
                                *rexp = res_ex;
                        cond_resched();
                }
                wait->_qproc = NULL;
                if (retval || timed_out || signal_pending(current))
                        break;
                if (table.error) {
                        retval = table.error;
                        break;
                }

                /* only if found POLL_BUSY_LOOP sockets && not out of time */
                if (can_busy_loop && !need_resched()) {
                        if (!busy_start) {
                                busy_start = busy_loop_current_time();
                                continue;
                        }
                        if (!busy_loop_timeout(busy_start))
                                continue;
                }
                busy_flag = 0;

                /*
                 * If this is the first loop and we have a timeout
                 * given, then we convert to ktime_t and set the to
                 * pointer to the expiry value.
                 */
                if (end_time && !to) {
                        expire = timespec64_to_ktime(*end_time);
                        to = &expire;
                }

                if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
                                           to, slack))
                        timed_out = 1;
        }

        poll_freewait(&table);

        return retval;
}

/*
 * We can actually return ERESTARTSYS instead of EINTR, but I'd
 * like to be certain this leads to no problems. So I return
 * EINTR just for safety.
 *
 * Update: ERESTARTSYS breaks at least the xview clock binary, so
 * I'm trying ERESTARTNOHAND which restart only when you want to.
 */
int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
                           fd_set __user *exp, struct timespec64 *end_time)
{
        fd_set_bits fds;
        void *bits;
        int ret, max_fds;
        size_t size, alloc_size;
        struct fdtable *fdt;
        /* Allocate small arguments on the stack to save memory and be faster */
        long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];

        ret = -EINVAL;
        if (n < 0)
                goto out_nofds;

        /* max_fds can increase, so grab it once to avoid race */
        rcu_read_lock();
        fdt = files_fdtable(current->files);
        max_fds = fdt->max_fds;
        rcu_read_unlock();
        if (n > max_fds)
                n = max_fds;

        /*
         * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
         * since we used fdset we need to allocate memory in units of
         * long-words. 
         */
        size = FDS_BYTES(n);
        bits = stack_fds;
        if (size > sizeof(stack_fds) / 6) {
                /* Not enough space in on-stack array; must use kmalloc */
                ret = -ENOMEM;
                if (size > (SIZE_MAX / 6))
                        goto out_nofds;

                alloc_size = 6 * size;
                bits = kvmalloc(alloc_size, GFP_KERNEL);
                if (!bits)
                        goto out_nofds;
        }
        fds.in      = bits;
        fds.out     = bits +   size;
        fds.ex      = bits + 2*size;
        fds.res_in  = bits + 3*size;
        fds.res_out = bits + 4*size;
        fds.res_ex  = bits + 5*size;

        if ((ret = get_fd_set(n, inp, fds.in)) ||
            (ret = get_fd_set(n, outp, fds.out)) ||
            (ret = get_fd_set(n, exp, fds.ex)))
                goto out;
        zero_fd_set(n, fds.res_in);
        zero_fd_set(n, fds.res_out);
        zero_fd_set(n, fds.res_ex);

        ret = do_select(n, &fds, end_time);

        if (ret < 0)
                goto out;
        if (!ret) {
                ret = -ERESTARTNOHAND;
                if (signal_pending(current))
                        goto out;
                ret = 0;
        }

        if (set_fd_set(n, inp, fds.res_in) ||
            set_fd_set(n, outp, fds.res_out) ||
            set_fd_set(n, exp, fds.res_ex))
                ret = -EFAULT;

out:
        if (bits != stack_fds)
                kvfree(bits);
out_nofds:
        return ret;
}

static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
                       fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
{
        struct timespec64 end_time, *to = NULL;
        struct __kernel_old_timeval tv;
        int ret;

        if (tvp) {
                if (copy_from_user(&tv, tvp, sizeof(tv)))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to,
                                tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
                                (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
                        return -EINVAL;
        }

        ret = core_sys_select(n, inp, outp, exp, to);
        return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret);
}

SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
                fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
{
        return kern_select(n, inp, outp, exp, tvp);
}

static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
                       fd_set __user *exp, void __user *tsp,
                       const sigset_t __user *sigmask, size_t sigsetsize,
                       enum poll_time_type type)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                switch (type) {
                case PT_TIMESPEC:
                        if (get_timespec64(&ts, tsp))
                                return -EFAULT;
                        break;
                case PT_OLD_TIMESPEC:
                        if (get_old_timespec32(&ts, tsp))
                                return -EFAULT;
                        break;
                default:
                        BUG();
                }

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = core_sys_select(n, inp, outp, exp, to);
        return poll_select_finish(&end_time, tsp, type, ret);
}

/*
 * Most architectures can't handle 7-argument syscalls. So we provide a
 * 6-argument version where the sixth argument is a pointer to a structure
 * which has a pointer to the sigset_t itself followed by a size_t containing
 * the sigset size.
 */
struct sigset_argpack {
        sigset_t __user *p;
        size_t size;
};

static inline int get_sigset_argpack(struct sigset_argpack *to,
                                     struct sigset_argpack __user *from)
{
        // the path is hot enough for overhead of copy_from_user() to matter
        if (from) {
                if (!user_read_access_begin(from, sizeof(*from)))
                        return -EFAULT;
                unsafe_get_user(to->p, &from->p, Efault);
                unsafe_get_user(to->size, &from->size, Efault);
                user_read_access_end();
        }
        return 0;
Efault:
        user_access_end();
        return -EFAULT;
}

SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
                fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
                void __user *, sig)
{
        struct sigset_argpack x = {NULL, 0};

        if (get_sigset_argpack(&x, sig))
                return -EFAULT;

        return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC);
}

#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)

SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
                fd_set __user *, exp, struct old_timespec32 __user *, tsp,
                void __user *, sig)
{
        struct sigset_argpack x = {NULL, 0};

        if (get_sigset_argpack(&x, sig))
                return -EFAULT;

        return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC);
}

#endif

#ifdef __ARCH_WANT_SYS_OLD_SELECT
struct sel_arg_struct {
        unsigned long n;
        fd_set __user *inp, *outp, *exp;
        struct __kernel_old_timeval __user *tvp;
};

SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
{
        struct sel_arg_struct a;

        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
        return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
}
#endif

struct poll_list {
        struct poll_list *next;
        unsigned int len;
        struct pollfd entries[];
};

#define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))

/*
 * Fish for pollable events on the pollfd->fd file descriptor. We're only
 * interested in events matching the pollfd->events mask, and the result
 * matching that mask is both recorded in pollfd->revents and returned. The
 * pwait poll_table will be used by the fd-provided poll handler for waiting,
 * if pwait->_qproc is non-NULL.
 */
static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
                                     bool *can_busy_poll,
                                     __poll_t busy_flag)
{
        int fd = pollfd->fd;
        __poll_t mask = 0, filter;
        struct fd f;

        if (fd < 0)
                goto out;
        mask = EPOLLNVAL;
        f = fdget(fd);
        if (!f.file)
                goto out;

        /* userland u16 ->events contains POLL... bitmap */
        filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
        pwait->_key = filter | busy_flag;
        mask = vfs_poll(f.file, pwait);
        if (mask & busy_flag)
                *can_busy_poll = true;
        mask &= filter;                /* Mask out unneeded events. */
        fdput(f);

out:
        /* ... and so does ->revents */
        pollfd->revents = mangle_poll(mask);
        return mask;
}

static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
                   struct timespec64 *end_time)
{
        poll_table* pt = &wait->pt;
        ktime_t expire, *to = NULL;
        int timed_out = 0, count = 0;
        u64 slack = 0;
        __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
        unsigned long busy_start = 0;

        /* Optimise the no-wait case */
        if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
                pt->_qproc = NULL;
                timed_out = 1;
        }

        if (end_time && !timed_out)
                slack = select_estimate_accuracy(end_time);

        for (;;) {
                struct poll_list *walk;
                bool can_busy_loop = false;

                for (walk = list; walk != NULL; walk = walk->next) {
                        struct pollfd * pfd, * pfd_end;

                        pfd = walk->entries;
                        pfd_end = pfd + walk->len;
                        for (; pfd != pfd_end; pfd++) {
                                /*
                                 * Fish for events. If we found one, record it
                                 * and kill poll_table->_qproc, so we don't
                                 * needlessly register any other waiters after
                                 * this. They'll get immediately deregistered
                                 * when we break out and return.
                                 */
                                if (do_pollfd(pfd, pt, &can_busy_loop,
                                              busy_flag)) {
                                        count++;
                                        pt->_qproc = NULL;
                                        /* found something, stop busy polling */
                                        busy_flag = 0;
                                        can_busy_loop = false;
                                }
                        }
                }
                /*
                 * All waiters have already been registered, so don't provide
                 * a poll_table->_qproc to them on the next loop iteration.
                 */
                pt->_qproc = NULL;
                if (!count) {
                        count = wait->error;
                        if (signal_pending(current))
                                count = -ERESTARTNOHAND;
                }
                if (count || timed_out)
                        break;

                /* only if found POLL_BUSY_LOOP sockets && not out of time */
                if (can_busy_loop && !need_resched()) {
                        if (!busy_start) {
                                busy_start = busy_loop_current_time();
                                continue;
                        }
                        if (!busy_loop_timeout(busy_start))
                                continue;
                }
                busy_flag = 0;

                /*
                 * If this is the first loop and we have a timeout
                 * given, then we convert to ktime_t and set the to
                 * pointer to the expiry value.
                 */
                if (end_time && !to) {
                        expire = timespec64_to_ktime(*end_time);
                        to = &expire;
                }

                if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
                        timed_out = 1;
        }
        return count;
}

#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
                        sizeof(struct pollfd))

static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
                struct timespec64 *end_time)
{
        struct poll_wqueues table;
        int err = -EFAULT, fdcount;
        /* Allocate small arguments on the stack to save memory and be
           faster - use long to make sure the buffer is aligned properly
           on 64 bit archs to avoid unaligned access */
        long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
        struct poll_list *const head = (struct poll_list *)stack_pps;
         struct poll_list *walk = head;
        unsigned int todo = nfds;
        unsigned int len;

        if (nfds > rlimit(RLIMIT_NOFILE))
                return -EINVAL;

        len = min_t(unsigned int, nfds, N_STACK_PPS);
        for (;;) {
                walk->next = NULL;
                walk->len = len;
                if (!len)
                        break;

                if (copy_from_user(walk->entries, ufds + nfds-todo,
                                        sizeof(struct pollfd) * walk->len))
                        goto out_fds;

                if (walk->len >= todo)
                        break;
                todo -= walk->len;

                len = min(todo, POLLFD_PER_PAGE);
                walk = walk->next = kmalloc(struct_size(walk, entries, len),
                                            GFP_KERNEL);
                if (!walk) {
                        err = -ENOMEM;
                        goto out_fds;
                }
        }

        poll_initwait(&table);
        fdcount = do_poll(head, &table, end_time);
        poll_freewait(&table);

        if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
                goto out_fds;

        for (walk = head; walk; walk = walk->next) {
                struct pollfd *fds = walk->entries;
                unsigned int j;

                for (j = walk->len; j; fds++, ufds++, j--)
                        unsafe_put_user(fds->revents, &ufds->revents, Efault);
          }
        user_write_access_end();

        err = fdcount;
out_fds:
        walk = head->next;
        while (walk) {
                struct poll_list *pos = walk;
                walk = walk->next;
                kfree(pos);
        }

        return err;

Efault:
        user_write_access_end();
        err = -EFAULT;
        goto out_fds;
}

static long do_restart_poll(struct restart_block *restart_block)
{
        struct pollfd __user *ufds = restart_block->poll.ufds;
        int nfds = restart_block->poll.nfds;
        struct timespec64 *to = NULL, end_time;
        int ret;

        if (restart_block->poll.has_timeout) {
                end_time.tv_sec = restart_block->poll.tv_sec;
                end_time.tv_nsec = restart_block->poll.tv_nsec;
                to = &end_time;
        }

        ret = do_sys_poll(ufds, nfds, to);

        if (ret == -ERESTARTNOHAND)
                ret = set_restart_fn(restart_block, do_restart_poll);

        return ret;
}

SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
                int, timeout_msecs)
{
        struct timespec64 end_time, *to = NULL;
        int ret;

        if (timeout_msecs >= 0) {
                to = &end_time;
                poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
                        NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
        }

        ret = do_sys_poll(ufds, nfds, to);

        if (ret == -ERESTARTNOHAND) {
                struct restart_block *restart_block;

                restart_block = &current->restart_block;
                restart_block->poll.ufds = ufds;
                restart_block->poll.nfds = nfds;

                if (timeout_msecs >= 0) {
                        restart_block->poll.tv_sec = end_time.tv_sec;
                        restart_block->poll.tv_nsec = end_time.tv_nsec;
                        restart_block->poll.has_timeout = 1;
                } else
                        restart_block->poll.has_timeout = 0;

                ret = set_restart_fn(restart_block, do_restart_poll);
        }
        return ret;
}

SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
                struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
                size_t, sigsetsize)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                if (get_timespec64(&ts, tsp))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = do_sys_poll(ufds, nfds, to);
        return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
}

#if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)

SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
                struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
                size_t, sigsetsize)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                if (get_old_timespec32(&ts, tsp))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = do_sys_poll(ufds, nfds, to);
        return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
}
#endif

#ifdef CONFIG_COMPAT
#define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))

/*
 * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
 * 64-bit unsigned longs.
 */
static
int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
                        unsigned long *fdset)
{
        if (ufdset) {
                return compat_get_bitmap(fdset, ufdset, nr);
        } else {
                zero_fd_set(nr, fdset);
                return 0;
        }
}

static
int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
                      unsigned long *fdset)
{
        if (!ufdset)
                return 0;
        return compat_put_bitmap(ufdset, fdset, nr);
}


/*
 * This is a virtual copy of sys_select from fs/select.c and probably
 * should be compared to it from time to time
 */

/*
 * We can actually return ERESTARTSYS instead of EINTR, but I'd
 * like to be certain this leads to no problems. So I return
 * EINTR just for safety.
 *
 * Update: ERESTARTSYS breaks at least the xview clock binary, so
 * I'm trying ERESTARTNOHAND which restart only when you want to.
 */
static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
        struct timespec64 *end_time)
{
        fd_set_bits fds;
        void *bits;
        int size, max_fds, ret = -EINVAL;
        struct fdtable *fdt;
        long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];

        if (n < 0)
                goto out_nofds;

        /* max_fds can increase, so grab it once to avoid race */
        rcu_read_lock();
        fdt = files_fdtable(current->files);
        max_fds = fdt->max_fds;
        rcu_read_unlock();
        if (n > max_fds)
                n = max_fds;

        /*
         * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
         * since we used fdset we need to allocate memory in units of
         * long-words.
         */
        size = FDS_BYTES(n);
        bits = stack_fds;
        if (size > sizeof(stack_fds) / 6) {
                bits = kmalloc_array(6, size, GFP_KERNEL);
                ret = -ENOMEM;
                if (!bits)
                        goto out_nofds;
        }
        fds.in      = (unsigned long *)  bits;
        fds.out     = (unsigned long *) (bits +   size);
        fds.ex      = (unsigned long *) (bits + 2*size);
        fds.res_in  = (unsigned long *) (bits + 3*size);
        fds.res_out = (unsigned long *) (bits + 4*size);
        fds.res_ex  = (unsigned long *) (bits + 5*size);

        if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
            (ret = compat_get_fd_set(n, outp, fds.out)) ||
            (ret = compat_get_fd_set(n, exp, fds.ex)))
                goto out;
        zero_fd_set(n, fds.res_in);
        zero_fd_set(n, fds.res_out);
        zero_fd_set(n, fds.res_ex);

        ret = do_select(n, &fds, end_time);

        if (ret < 0)
                goto out;
        if (!ret) {
                ret = -ERESTARTNOHAND;
                if (signal_pending(current))
                        goto out;
                ret = 0;
        }

        if (compat_set_fd_set(n, inp, fds.res_in) ||
            compat_set_fd_set(n, outp, fds.res_out) ||
            compat_set_fd_set(n, exp, fds.res_ex))
                ret = -EFAULT;
out:
        if (bits != stack_fds)
                kfree(bits);
out_nofds:
        return ret;
}

static int do_compat_select(int n, compat_ulong_t __user *inp,
        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
        struct old_timeval32 __user *tvp)
{
        struct timespec64 end_time, *to = NULL;
        struct old_timeval32 tv;
        int ret;

        if (tvp) {
                if (copy_from_user(&tv, tvp, sizeof(tv)))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to,
                                tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
                                (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
                        return -EINVAL;
        }

        ret = compat_core_sys_select(n, inp, outp, exp, to);
        return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret);
}

COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
        compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
        struct old_timeval32 __user *, tvp)
{
        return do_compat_select(n, inp, outp, exp, tvp);
}

struct compat_sel_arg_struct {
        compat_ulong_t n;
        compat_uptr_t inp;
        compat_uptr_t outp;
        compat_uptr_t exp;
        compat_uptr_t tvp;
};

COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
{
        struct compat_sel_arg_struct a;

        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
        return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
                                compat_ptr(a.exp), compat_ptr(a.tvp));
}

static long do_compat_pselect(int n, compat_ulong_t __user *inp,
        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
        void __user *tsp, compat_sigset_t __user *sigmask,
        compat_size_t sigsetsize, enum poll_time_type type)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                switch (type) {
                case PT_OLD_TIMESPEC:
                        if (get_old_timespec32(&ts, tsp))
                                return -EFAULT;
                        break;
                case PT_TIMESPEC:
                        if (get_timespec64(&ts, tsp))
                                return -EFAULT;
                        break;
                default:
                        BUG();
                }

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_compat_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = compat_core_sys_select(n, inp, outp, exp, to);
        return poll_select_finish(&end_time, tsp, type, ret);
}

struct compat_sigset_argpack {
        compat_uptr_t p;
        compat_size_t size;
};
static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
                                            struct compat_sigset_argpack __user *from)
{
        if (from) {
                if (!user_read_access_begin(from, sizeof(*from)))
                        return -EFAULT;
                unsafe_get_user(to->p, &from->p, Efault);
                unsafe_get_user(to->size, &from->size, Efault);
                user_read_access_end();
        }
        return 0;
Efault:
        user_access_end();
        return -EFAULT;
}

COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
        compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
        struct __kernel_timespec __user *, tsp, void __user *, sig)
{
        struct compat_sigset_argpack x = {0, 0};

        if (get_compat_sigset_argpack(&x, sig))
                return -EFAULT;

        return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
                                 x.size, PT_TIMESPEC);
}

#if defined(CONFIG_COMPAT_32BIT_TIME)

COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
        compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
        struct old_timespec32 __user *, tsp, void __user *, sig)
{
        struct compat_sigset_argpack x = {0, 0};

        if (get_compat_sigset_argpack(&x, sig))
                return -EFAULT;

        return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
                                 x.size, PT_OLD_TIMESPEC);
}

#endif

#if defined(CONFIG_COMPAT_32BIT_TIME)
COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
        unsigned int,  nfds, struct old_timespec32 __user *, tsp,
        const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                if (get_old_timespec32(&ts, tsp))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_compat_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = do_sys_poll(ufds, nfds, to);
        return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
}
#endif

/* New compat syscall for 64 bit time_t*/
COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
        unsigned int,  nfds, struct __kernel_timespec __user *, tsp,
        const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
{
        struct timespec64 ts, end_time, *to = NULL;
        int ret;

        if (tsp) {
                if (get_timespec64(&ts, tsp))
                        return -EFAULT;

                to = &end_time;
                if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }

        ret = set_compat_user_sigmask(sigmask, sigsetsize);
        if (ret)
                return ret;

        ret = do_sys_poll(ufds, nfds, to);
        return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
}

#endif






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 


   14 
   14 



































   14 








   11 




   14 



    5 











    5 


















   14 

    5 




















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                IPv4 Forwarding Information Base: FIB frontend.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>

#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
#include <net/lwtunnel.h>
#include <trace/events/fib.h>

#ifndef CONFIG_IP_MULTIPLE_TABLES

static int __net_init fib4_rules_init(struct net *net)
{
        struct fib_table *local_table, *main_table;

        main_table  = fib_trie_table(RT_TABLE_MAIN, NULL);
        if (!main_table)
                return -ENOMEM;

        local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
        if (!local_table)
                goto fail;

        hlist_add_head_rcu(&local_table->tb_hlist,
                                &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
        hlist_add_head_rcu(&main_table->tb_hlist,
                                &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
        return 0;

fail:
        fib_free_table(main_table);
        return -ENOMEM;
}
#else

struct fib_table *fib_new_table(struct net *net, u32 id)
{
        struct fib_table *tb, *alias = NULL;
        unsigned int h;

        if (id == 0)
                id = RT_TABLE_MAIN;
        tb = fib_get_table(net, id);
        if (tb)
                return tb;

        if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
                alias = fib_new_table(net, RT_TABLE_MAIN);

        tb = fib_trie_table(id, alias);
        if (!tb)
                return NULL;

        switch (id) {
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, tb);
                break;
        case RT_TABLE_DEFAULT:
                rcu_assign_pointer(net->ipv4.fib_default, tb);
                break;
        default:
                break;
        }

        h = id & (FIB_TABLE_HASHSZ - 1);
        hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
        return tb;
}
EXPORT_SYMBOL_GPL(fib_new_table);

/* caller must hold either rtnl or rcu read lock */
struct fib_table *fib_get_table(struct net *net, u32 id)
{
        struct fib_table *tb;
        struct hlist_head *head;
        unsigned int h;

        if (id == 0)
                id = RT_TABLE_MAIN;
        h = id & (FIB_TABLE_HASHSZ - 1);

        head = &net->ipv4.fib_table_hash[h];
        hlist_for_each_entry_rcu(tb, head, tb_hlist,
                                 lockdep_rtnl_is_held()) {
                if (tb->tb_id == id)
                        return tb;
        }
        return NULL;
}
#endif /* CONFIG_IP_MULTIPLE_TABLES */

static void fib_replace_table(struct net *net, struct fib_table *old,
                              struct fib_table *new)
{
#ifdef CONFIG_IP_MULTIPLE_TABLES
        switch (new->tb_id) {
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, new);
                break;
        case RT_TABLE_DEFAULT:
                rcu_assign_pointer(net->ipv4.fib_default, new);
                break;
        default:
                break;
        }

#endif
        /* replace the old table in the hlist */
        hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
}

int fib_unmerge(struct net *net)
{
        struct fib_table *old, *new, *main_table;

        /* attempt to fetch local table if it has been allocated */
        old = fib_get_table(net, RT_TABLE_LOCAL);
        if (!old)
                return 0;

        new = fib_trie_unmerge(old);
        if (!new)
                return -ENOMEM;

        /* table is already unmerged */
        if (new == old)
                return 0;

        /* replace merged table with clean table */
        fib_replace_table(net, old, new);
        fib_free_table(old);

        /* attempt to fetch main table if it has been allocated */
        main_table = fib_get_table(net, RT_TABLE_MAIN);
        if (!main_table)
                return 0;

        /* flush local entries from main table */
        fib_table_flush_external(main_table);

        return 0;
}

void fib_flush(struct net *net)
{
        int flushed = 0;
        unsigned int h;

        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
                struct hlist_node *tmp;
                struct fib_table *tb;

                hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
                        flushed += fib_table_flush(net, tb, false);
        }

        if (flushed)
                rt_cache_flush(net);
}

/*
 * Find address type as if only "dev" was present in the system. If
 * on_dev is NULL then all interfaces are taken into consideration.
 */
static inline unsigned int __inet_dev_addr_type(struct net *net,
                                                const struct net_device *dev,
                                                __be32 addr, u32 tb_id)
{
        struct flowi4                fl4 = { .daddr = addr };
        struct fib_result        res;
        unsigned int ret = RTN_BROADCAST;
        struct fib_table *table;

        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
                return RTN_BROADCAST;
        if (ipv4_is_multicast(addr))
                return RTN_MULTICAST;

        rcu_read_lock();

        table = fib_get_table(net, tb_id);
        if (table) {
                ret = RTN_UNICAST;
                if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
                        struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);

                        if (!dev || dev == nhc->nhc_dev)
                                ret = res.type;
                }
        }

        rcu_read_unlock();
        return ret;
}

unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
{
        return __inet_dev_addr_type(net, NULL, addr, tb_id);
}
EXPORT_SYMBOL(inet_addr_type_table);

unsigned int inet_addr_type(struct net *net, __be32 addr)
{
        return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
}
EXPORT_SYMBOL(inet_addr_type);

unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
                                __be32 addr)
{
        u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;

        return __inet_dev_addr_type(net, dev, addr, rt_table);
}
EXPORT_SYMBOL(inet_dev_addr_type);

/* inet_addr_type with dev == NULL but using the table from a dev
 * if one is associated
 */
unsigned int inet_addr_type_dev_table(struct net *net,
                                      const struct net_device *dev,
                                      __be32 addr)
{
        u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;

        return __inet_dev_addr_type(net, NULL, addr, rt_table);
}
EXPORT_SYMBOL(inet_addr_type_dev_table);

__be32 fib_compute_spec_dst(struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        struct in_device *in_dev;
        struct fib_result res;
        struct rtable *rt;
        struct net *net;
        int scope;

        rt = skb_rtable(skb);
        if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
            RTCF_LOCAL)
                return ip_hdr(skb)->daddr;

        in_dev = __in_dev_get_rcu(dev);

        net = dev_net(dev);

        scope = RT_SCOPE_UNIVERSE;
        if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
                bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
                struct flowi4 fl4 = {
                        .flowi4_iif = LOOPBACK_IFINDEX,
                        .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
                        .daddr = ip_hdr(skb)->saddr,
                        .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
                        .flowi4_scope = scope,
                        .flowi4_mark = vmark ? skb->mark : 0,
                };
                if (!fib_lookup(net, &fl4, &res, 0))
                        return fib_result_prefsrc(net, &res);
        } else {
                scope = RT_SCOPE_LINK;
        }

        return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
}

bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
{
        bool dev_match = false;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (unlikely(fi->nh)) {
                dev_match = nexthop_uses_dev(fi->nh, dev);
        } else {
                int ret;

                for (ret = 0; ret < fib_info_num_path(fi); ret++) {
                        const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);

                        if (nhc_l3mdev_matches_dev(nhc, dev)) {
                                dev_match = true;
                                break;
                        }
                }
        }
#else
        if (fib_info_nhc(fi, 0)->nhc_dev == dev)
                dev_match = true;
#endif

        return dev_match;
}
EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);

/* Given (packet source, input interface) and optional (dst, oif, tos):
 * - (main) check, that source is valid i.e. not broadcast or our local
 *   address.
 * - figure out what "logical" interface this packet arrived
 *   and calculate "specific destination" address.
 * - check, that packet arrived from expected physical interface.
 * called with rcu_read_lock()
 */
static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                                 u8 tos, int oif, struct net_device *dev,
                                 int rpf, struct in_device *idev, u32 *itag)
{
        struct net *net = dev_net(dev);
        struct flow_keys flkeys;
        int ret, no_addr;
        struct fib_result res;
        struct flowi4 fl4;
        bool dev_match;

        fl4.flowi4_oif = 0;
        fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
        fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
        fl4.daddr = src;
        fl4.saddr = dst;
        fl4.flowi4_tos = tos;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
        fl4.flowi4_tun_key.tun_id = 0;
        fl4.flowi4_flags = 0;
        fl4.flowi4_uid = sock_net_uid(net, NULL);
        fl4.flowi4_multipath_hash = 0;

        no_addr = idev->ifa_list == NULL;

        fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
        if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) {
                fl4.flowi4_proto = 0;
                fl4.fl4_sport = 0;
                fl4.fl4_dport = 0;
        } else {
                swap(fl4.fl4_sport, fl4.fl4_dport);
        }

        if (fib_lookup(net, &fl4, &res, 0))
                goto last_resort;
        if (res.type != RTN_UNICAST &&
            (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
                goto e_inval;
        fib_combine_itag(itag, &res);

        dev_match = fib_info_nh_uses_dev(res.fi, dev);
        /* This is not common, loopback packets retain skb_dst so normally they
         * would not even hit this slow path.
         */
        dev_match = dev_match || (res.type == RTN_LOCAL &&
                                  dev == net->loopback_dev);
        if (dev_match) {
                ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
                return ret;
        }
        if (no_addr)
                goto last_resort;
        if (rpf == 1)
                goto e_rpf;
        fl4.flowi4_oif = dev->ifindex;

        ret = 0;
        if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
                if (res.type == RTN_UNICAST)
                        ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
        }
        return ret;

last_resort:
        if (rpf)
                goto e_rpf;
        *itag = 0;
        return 0;

e_inval:
        return -EINVAL;
e_rpf:
        return -EXDEV;
}

/* Ignore rp_filter for packets protected by IPsec. */
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                        u8 tos, int oif, struct net_device *dev,
                        struct in_device *idev, u32 *itag)
{
        int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
        struct net *net = dev_net(dev);

        if (!r && !fib_num_tclassid_users(net) &&
            (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
                if (IN_DEV_ACCEPT_LOCAL(idev))
                        goto ok;
                /* with custom local routes in place, checking local addresses
                 * only will be too optimistic, with custom rules, checking
                 * local addresses only can be too strict, e.g. due to vrf
                 */
                if (net->ipv4.fib_has_custom_local_routes ||
                    fib4_has_custom_rules(net))
                        goto full_check;
                /* Within the same container, it is regarded as a martian source,
                 * and the same host but different containers are not.
                 */
                if (inet_lookup_ifaddr_rcu(net, src))
                        return -EINVAL;

ok:
                *itag = 0;
                return 0;
        }

full_check:
        return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}

static inline __be32 sk_extract_addr(struct sockaddr *addr)
{
        return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
}

static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
{
        struct nlattr *nla;

        nla = (struct nlattr *) ((char *) mx + len);
        nla->nla_type = type;
        nla->nla_len = nla_attr_size(4);
        *(u32 *) nla_data(nla) = value;

        return len + nla_total_size(4);
}

static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
                                 struct fib_config *cfg)
{
        __be32 addr;
        int plen;

        memset(cfg, 0, sizeof(*cfg));
        cfg->fc_nlinfo.nl_net = net;

        if (rt->rt_dst.sa_family != AF_INET)
                return -EAFNOSUPPORT;

        /*
         * Check mask for validity:
         * a) it must be contiguous.
         * b) destination must have all host bits clear.
         * c) if application forgot to set correct family (AF_INET),
         *    reject request unless it is absolutely clear i.e.
         *    both family and mask are zero.
         */
        plen = 32;
        addr = sk_extract_addr(&rt->rt_dst);
        if (!(rt->rt_flags & RTF_HOST)) {
                __be32 mask = sk_extract_addr(&rt->rt_genmask);

                if (rt->rt_genmask.sa_family != AF_INET) {
                        if (mask || rt->rt_genmask.sa_family)
                                return -EAFNOSUPPORT;
                }

                if (bad_mask(mask, addr))
                        return -EINVAL;

                plen = inet_mask_len(mask);
        }

        cfg->fc_dst_len = plen;
        cfg->fc_dst = addr;

        if (cmd != SIOCDELRT) {
                cfg->fc_nlflags = NLM_F_CREATE;
                cfg->fc_protocol = RTPROT_BOOT;
        }

        if (rt->rt_metric)
                cfg->fc_priority = rt->rt_metric - 1;

        if (rt->rt_flags & RTF_REJECT) {
                cfg->fc_scope = RT_SCOPE_HOST;
                cfg->fc_type = RTN_UNREACHABLE;
                return 0;
        }

        cfg->fc_scope = RT_SCOPE_NOWHERE;
        cfg->fc_type = RTN_UNICAST;

        if (rt->rt_dev) {
                char *colon;
                struct net_device *dev;
                char devname[IFNAMSIZ];

                if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
                        return -EFAULT;

                devname[IFNAMSIZ-1] = 0;
                colon = strchr(devname, ':');
                if (colon)
                        *colon = 0;
                dev = __dev_get_by_name(net, devname);
                if (!dev)
                        return -ENODEV;
                cfg->fc_oif = dev->ifindex;
                cfg->fc_table = l3mdev_fib_table(dev);
                if (colon) {
                        const struct in_ifaddr *ifa;
                        struct in_device *in_dev;

                        in_dev = __in_dev_get_rtnl(dev);
                        if (!in_dev)
                                return -ENODEV;

                        *colon = ':';

                        rcu_read_lock();
                        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                                if (strcmp(ifa->ifa_label, devname) == 0)
                                        break;
                        }
                        rcu_read_unlock();

                        if (!ifa)
                                return -ENODEV;
                        cfg->fc_prefsrc = ifa->ifa_local;
                }
        }

        addr = sk_extract_addr(&rt->rt_gateway);
        if (rt->rt_gateway.sa_family == AF_INET && addr) {
                unsigned int addr_type;

                cfg->fc_gw4 = addr;
                cfg->fc_gw_family = AF_INET;
                addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
                if (rt->rt_flags & RTF_GATEWAY &&
                    addr_type == RTN_UNICAST)
                        cfg->fc_scope = RT_SCOPE_UNIVERSE;
        }

        if (!cfg->fc_table)
                cfg->fc_table = RT_TABLE_MAIN;

        if (cmd == SIOCDELRT)
                return 0;

        if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family)
                return -EINVAL;

        if (cfg->fc_scope == RT_SCOPE_NOWHERE)
                cfg->fc_scope = RT_SCOPE_LINK;

        if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
                struct nlattr *mx;
                int len = 0;

                mx = kcalloc(3, nla_total_size(4), GFP_KERNEL);
                if (!mx)
                        return -ENOMEM;

                if (rt->rt_flags & RTF_MTU)
                        len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);

                if (rt->rt_flags & RTF_WINDOW)
                        len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);

                if (rt->rt_flags & RTF_IRTT)
                        len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);

                cfg->fc_mx = mx;
                cfg->fc_mx_len = len;
        }

        return 0;
}

/*
 * Handle IP routing ioctl calls.
 * These are used to manipulate the routing tables
 */
int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
{
        struct fib_config cfg;
        int err;

        switch (cmd) {
        case SIOCADDRT:                /* Add a route */
        case SIOCDELRT:                /* Delete a route */
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;

                rtnl_lock();
                err = rtentry_to_fib_config(net, cmd, rt, &cfg);
                if (err == 0) {
                        struct fib_table *tb;

                        if (cmd == SIOCDELRT) {
                                tb = fib_get_table(net, cfg.fc_table);
                                if (tb)
                                        err = fib_table_delete(net, tb, &cfg,
                                                               NULL);
                                else
                                        err = -ESRCH;
                        } else {
                                tb = fib_new_table(net, cfg.fc_table);
                                if (tb)
                                        err = fib_table_insert(net, tb,
                                                               &cfg, NULL);
                                else
                                        err = -ENOBUFS;
                        }

                        /* allocated by rtentry_to_fib_config() */
                        kfree(cfg.fc_mx);
                }
                rtnl_unlock();
                return err;
        }
        return -EINVAL;
}

const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_UNSPEC]                = { .strict_start_type = RTA_DPORT + 1 },
        [RTA_DST]                = { .type = NLA_U32 },
        [RTA_SRC]                = { .type = NLA_U32 },
        [RTA_IIF]                = { .type = NLA_U32 },
        [RTA_OIF]                = { .type = NLA_U32 },
        [RTA_GATEWAY]                = { .type = NLA_U32 },
        [RTA_PRIORITY]                = { .type = NLA_U32 },
        [RTA_PREFSRC]                = { .type = NLA_U32 },
        [RTA_METRICS]                = { .type = NLA_NESTED },
        [RTA_MULTIPATH]                = { .len = sizeof(struct rtnexthop) },
        [RTA_FLOW]                = { .type = NLA_U32 },
        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [RTA_ENCAP]                = { .type = NLA_NESTED },
        [RTA_UID]                = { .type = NLA_U32 },
        [RTA_MARK]                = { .type = NLA_U32 },
        [RTA_TABLE]                = { .type = NLA_U32 },
        [RTA_IP_PROTO]                = { .type = NLA_U8 },
        [RTA_SPORT]                = { .type = NLA_U16 },
        [RTA_DPORT]                = { .type = NLA_U16 },
        [RTA_NH_ID]                = { .type = NLA_U32 },
};

int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
                    struct netlink_ext_ack *extack)
{
        struct rtvia *via;
        int alen;

        if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
                NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA");
                return -EINVAL;
        }

        via = nla_data(nla);
        alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr);

        switch (via->rtvia_family) {
        case AF_INET:
                if (alen != sizeof(__be32)) {
                        NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA");
                        return -EINVAL;
                }
                cfg->fc_gw_family = AF_INET;
                cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
                break;
        case AF_INET6:
#if IS_ENABLED(CONFIG_IPV6)
                if (alen != sizeof(struct in6_addr)) {
                        NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
                        return -EINVAL;
                }
                cfg->fc_gw_family = AF_INET6;
                cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr);
#else
                NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
                return -EINVAL;
#endif
                break;
        default:
                NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA");
                return -EINVAL;
        }

        return 0;
}

static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
                             struct nlmsghdr *nlh, struct fib_config *cfg,
                             struct netlink_ext_ack *extack)
{
        bool has_gw = false, has_via = false;
        struct nlattr *attr;
        int err, remaining;
        struct rtmsg *rtm;

        err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
                                        rtm_ipv4_policy, extack);
        if (err < 0)
                goto errout;

        memset(cfg, 0, sizeof(*cfg));

        rtm = nlmsg_data(nlh);

        if (!inet_validate_dscp(rtm->rtm_tos)) {
                NL_SET_ERR_MSG(extack,
                               "Invalid dsfield (tos): ECN bits must be 0");
                err = -EINVAL;
                goto errout;
        }
        cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);

        cfg->fc_dst_len = rtm->rtm_dst_len;
        cfg->fc_table = rtm->rtm_table;
        cfg->fc_protocol = rtm->rtm_protocol;
        cfg->fc_scope = rtm->rtm_scope;
        cfg->fc_type = rtm->rtm_type;
        cfg->fc_flags = rtm->rtm_flags;
        cfg->fc_nlflags = nlh->nlmsg_flags;

        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
        cfg->fc_nlinfo.nlh = nlh;
        cfg->fc_nlinfo.nl_net = net;

        if (cfg->fc_type > RTN_MAX) {
                NL_SET_ERR_MSG(extack, "Invalid route type");
                err = -EINVAL;
                goto errout;
        }

        nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
                switch (nla_type(attr)) {
                case RTA_DST:
                        cfg->fc_dst = nla_get_be32(attr);
                        break;
                case RTA_OIF:
                        cfg->fc_oif = nla_get_u32(attr);
                        break;
                case RTA_GATEWAY:
                        has_gw = true;
                        cfg->fc_gw4 = nla_get_be32(attr);
                        if (cfg->fc_gw4)
                                cfg->fc_gw_family = AF_INET;
                        break;
                case RTA_VIA:
                        has_via = true;
                        err = fib_gw_from_via(cfg, attr, extack);
                        if (err)
                                goto errout;
                        break;
                case RTA_PRIORITY:
                        cfg->fc_priority = nla_get_u32(attr);
                        break;
                case RTA_PREFSRC:
                        cfg->fc_prefsrc = nla_get_be32(attr);
                        break;
                case RTA_METRICS:
                        cfg->fc_mx = nla_data(attr);
                        cfg->fc_mx_len = nla_len(attr);
                        break;
                case RTA_MULTIPATH:
                        err = lwtunnel_valid_encap_type_attr(nla_data(attr),
                                                             nla_len(attr),
                                                             extack);
                        if (err < 0)
                                goto errout;
                        cfg->fc_mp = nla_data(attr);
                        cfg->fc_mp_len = nla_len(attr);
                        break;
                case RTA_FLOW:
                        cfg->fc_flow = nla_get_u32(attr);
                        break;
                case RTA_TABLE:
                        cfg->fc_table = nla_get_u32(attr);
                        break;
                case RTA_ENCAP:
                        cfg->fc_encap = attr;
                        break;
                case RTA_ENCAP_TYPE:
                        cfg->fc_encap_type = nla_get_u16(attr);
                        err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
                                                        extack);
                        if (err < 0)
                                goto errout;
                        break;
                case RTA_NH_ID:
                        cfg->fc_nh_id = nla_get_u32(attr);
                        break;
                }
        }

        if (cfg->fc_nh_id) {
                if (cfg->fc_oif || cfg->fc_gw_family ||
                    cfg->fc_encap || cfg->fc_mp) {
                        NL_SET_ERR_MSG(extack,
                                       "Nexthop specification and nexthop id are mutually exclusive");
                        return -EINVAL;
                }
        }

        if (has_gw && has_via) {
                NL_SET_ERR_MSG(extack,
                               "Nexthop configuration can not contain both GATEWAY and VIA");
                return -EINVAL;
        }

        if (!cfg->fc_table)
                cfg->fc_table = RT_TABLE_MAIN;

        return 0;
errout:
        return err;
}

static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct fib_config cfg;
        struct fib_table *tb;
        int err;

        err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
        if (err < 0)
                goto errout;

        if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
                NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
                err = -EINVAL;
                goto errout;
        }

        tb = fib_get_table(net, cfg.fc_table);
        if (!tb) {
                NL_SET_ERR_MSG(extack, "FIB table does not exist");
                err = -ESRCH;
                goto errout;
        }

        err = fib_table_delete(net, tb, &cfg, extack);
errout:
        return err;
}

static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct fib_config cfg;
        struct fib_table *tb;
        int err;

        err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
        if (err < 0)
                goto errout;

        tb = fib_new_table(net, cfg.fc_table);
        if (!tb) {
                err = -ENOBUFS;
                goto errout;
        }

        err = fib_table_insert(net, tb, &cfg, extack);
        if (!err && cfg.fc_type == RTN_LOCAL)
                net->ipv4.fib_has_custom_local_routes = true;
errout:
        return err;
}

int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
                          struct fib_dump_filter *filter,
                          struct netlink_callback *cb)
{
        struct netlink_ext_ack *extack = cb->extack;
        struct nlattr *tb[RTA_MAX + 1];
        struct rtmsg *rtm;
        int err, i;

        if (filter->rtnl_held)
                ASSERT_RTNL();

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
                NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
                return -EINVAL;
        }

        rtm = nlmsg_data(nlh);
        if (rtm->rtm_dst_len || rtm->rtm_src_len  || rtm->rtm_tos   ||
            rtm->rtm_scope) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
                return -EINVAL;
        }

        if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
                NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
                return -EINVAL;
        }
        if (rtm->rtm_flags & RTM_F_CLONED)
                filter->dump_routes = false;
        else
                filter->dump_exceptions = false;

        filter->flags    = rtm->rtm_flags;
        filter->protocol = rtm->rtm_protocol;
        filter->rt_type  = rtm->rtm_type;
        filter->table_id = rtm->rtm_table;

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
                                            rtm_ipv4_policy, extack);
        if (err < 0)
                return err;

        for (i = 0; i <= RTA_MAX; ++i) {
                int ifindex;

                if (!tb[i])
                        continue;

                switch (i) {
                case RTA_TABLE:
                        filter->table_id = nla_get_u32(tb[i]);
                        break;
                case RTA_OIF:
                        ifindex = nla_get_u32(tb[i]);
                        if (filter->rtnl_held)
                                filter->dev = __dev_get_by_index(net, ifindex);
                        else
                                filter->dev = dev_get_by_index_rcu(net, ifindex);
                        if (!filter->dev)
                                return -ENODEV;
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
                        return -EINVAL;
                }
        }

        if (filter->flags || filter->protocol || filter->rt_type ||
            filter->table_id || filter->dev) {
                filter->filter_set = 1;
                cb->answer_flags = NLM_F_DUMP_FILTERED;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);

static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct fib_dump_filter filter = {
                .dump_routes = true,
                .dump_exceptions = true,
                .rtnl_held = false,
        };
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        unsigned int h, s_h;
        unsigned int e = 0, s_e;
        struct fib_table *tb;
        struct hlist_head *head;
        int dumped = 0, err = 0;

        rcu_read_lock();
        if (cb->strict_check) {
                err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
                if (err < 0)
                        goto unlock;
        } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
                struct rtmsg *rtm = nlmsg_data(nlh);

                filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
        }

        /* ipv4 does not use prefix flag */
        if (filter.flags & RTM_F_PREFIX)
                goto unlock;

        if (filter.table_id) {
                tb = fib_get_table(net, filter.table_id);
                if (!tb) {
                        if (rtnl_msg_family(cb->nlh) != PF_INET)
                                goto unlock;

                        NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
                        err = -ENOENT;
                        goto unlock;
                }
                err = fib_table_dump(tb, skb, cb, &filter);
                goto unlock;
        }

        s_h = cb->args[0];
        s_e = cb->args[1];

        err = 0;
        for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
                e = 0;
                head = &net->ipv4.fib_table_hash[h];
                hlist_for_each_entry_rcu(tb, head, tb_hlist) {
                        if (e < s_e)
                                goto next;
                        if (dumped)
                                memset(&cb->args[2], 0, sizeof(cb->args) -
                                                 2 * sizeof(cb->args[0]));
                        err = fib_table_dump(tb, skb, cb, &filter);
                        if (err < 0)
                                goto out;
                        dumped = 1;
next:
                        e++;
                }
        }

        /* Don't let NLM_DONE coalesce into a message, even if it could.
         * Some user space expects NLM_DONE in a separate recv().
         */
        err = skb->len;
out:

        cb->args[1] = e;
        cb->args[0] = h;

unlock:
        rcu_read_unlock();
        return err;
}

/* Prepare and feed intra-kernel routing request.
 * Really, it should be netlink message, but :-( netlink
 * can be not configured, so that we feed it directly
 * to fib engine. It is legal, because all events occur
 * only when netlink is already locked.
 */
static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
                      struct in_ifaddr *ifa, u32 rt_priority)
{
        struct net *net = dev_net(ifa->ifa_dev->dev);
        u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
        struct fib_table *tb;
        struct fib_config cfg = {
                .fc_protocol = RTPROT_KERNEL,
                .fc_type = type,
                .fc_dst = dst,
                .fc_dst_len = dst_len,
                .fc_priority = rt_priority,
                .fc_prefsrc = ifa->ifa_local,
                .fc_oif = ifa->ifa_dev->dev->ifindex,
                .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
                .fc_nlinfo = {
                        .nl_net = net,
                },
        };

        if (!tb_id)
                tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;

        tb = fib_new_table(net, tb_id);
        if (!tb)
                return;

        cfg.fc_table = tb->tb_id;

        if (type != RTN_LOCAL)
                cfg.fc_scope = RT_SCOPE_LINK;
        else
                cfg.fc_scope = RT_SCOPE_HOST;

        if (cmd == RTM_NEWROUTE)
                fib_table_insert(net, tb, &cfg, NULL);
        else
                fib_table_delete(net, tb, &cfg, NULL);
}

void fib_add_ifaddr(struct in_ifaddr *ifa)
{
        struct in_device *in_dev = ifa->ifa_dev;
        struct net_device *dev = in_dev->dev;
        struct in_ifaddr *prim = ifa;
        __be32 mask = ifa->ifa_mask;
        __be32 addr = ifa->ifa_local;
        __be32 prefix = ifa->ifa_address & mask;

        if (ifa->ifa_flags & IFA_F_SECONDARY) {
                prim = inet_ifa_byprefix(in_dev, prefix, mask);
                if (!prim) {
                        pr_warn("%s: bug: prim == NULL\n", __func__);
                        return;
                }
        }

        fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);

        if (!(dev->flags & IFF_UP))
                return;

        /* Add broadcast address, if it is explicitly assigned. */
        if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
                fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
                          prim, 0);
                arp_invalidate(dev, ifa->ifa_broadcast, false);
        }

        if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
            (prefix != addr || ifa->ifa_prefixlen < 32)) {
                if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
                        fib_magic(RTM_NEWROUTE,
                                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
                                  prefix, ifa->ifa_prefixlen, prim,
                                  ifa->ifa_rt_priority);

                /* Add the network broadcast address, when it makes sense */
                if (ifa->ifa_prefixlen < 31) {
                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
                                  32, prim, 0);
                        arp_invalidate(dev, prefix | ~mask, false);
                }
        }
}

void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
{
        __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
        struct in_device *in_dev = ifa->ifa_dev;
        struct net_device *dev = in_dev->dev;

        if (!(dev->flags & IFF_UP) ||
            ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
            ipv4_is_zeronet(prefix) ||
            (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
                return;

        /* add the new */
        fib_magic(RTM_NEWROUTE,
                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
                  prefix, ifa->ifa_prefixlen, ifa, new_metric);

        /* delete the old */
        fib_magic(RTM_DELROUTE,
                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
                  prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
}

/* Delete primary or secondary address.
 * Optionally, on secondary address promotion consider the addresses
 * from subnet iprim as deleted, even if they are in device list.
 * In this case the secondary ifa can be in device list.
 */
void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
{
        struct in_device *in_dev = ifa->ifa_dev;
        struct net_device *dev = in_dev->dev;
        struct in_ifaddr *ifa1;
        struct in_ifaddr *prim = ifa, *prim1 = NULL;
        __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
        __be32 any = ifa->ifa_address & ifa->ifa_mask;
#define LOCAL_OK        1
#define BRD_OK                2
#define BRD0_OK                4
#define BRD1_OK                8
        unsigned int ok = 0;
        int subnet = 0;                /* Primary network */
        int gone = 1;                /* Address is missing */
        int same_prefsrc = 0;        /* Another primary with same IP */

        if (ifa->ifa_flags & IFA_F_SECONDARY) {
                prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
                if (!prim) {
                        /* if the device has been deleted, we don't perform
                         * address promotion
                         */
                        if (!in_dev->dead)
                                pr_warn("%s: bug: prim == NULL\n", __func__);
                        return;
                }
                if (iprim && iprim != prim) {
                        pr_warn("%s: bug: iprim != prim\n", __func__);
                        return;
                }
        } else if (!ipv4_is_zeronet(any) &&
                   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
                if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
                        fib_magic(RTM_DELROUTE,
                                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
                                  any, ifa->ifa_prefixlen, prim, 0);
                subnet = 1;
        }

        if (in_dev->dead)
                goto no_promotions;

        /* Deletion is more complicated than add.
         * We should take care of not to delete too much :-)
         *
         * Scan address list to be sure that addresses are really gone.
         */
        rcu_read_lock();
        in_dev_for_each_ifa_rcu(ifa1, in_dev) {
                if (ifa1 == ifa) {
                        /* promotion, keep the IP */
                        gone = 0;
                        continue;
                }
                /* Ignore IFAs from our subnet */
                if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
                    inet_ifa_match(ifa1->ifa_address, iprim))
                        continue;

                /* Ignore ifa1 if it uses different primary IP (prefsrc) */
                if (ifa1->ifa_flags & IFA_F_SECONDARY) {
                        /* Another address from our subnet? */
                        if (ifa1->ifa_mask == prim->ifa_mask &&
                            inet_ifa_match(ifa1->ifa_address, prim))
                                prim1 = prim;
                        else {
                                /* We reached the secondaries, so
                                 * same_prefsrc should be determined.
                                 */
                                if (!same_prefsrc)
                                        continue;
                                /* Search new prim1 if ifa1 is not
                                 * using the current prim1
                                 */
                                if (!prim1 ||
                                    ifa1->ifa_mask != prim1->ifa_mask ||
                                    !inet_ifa_match(ifa1->ifa_address, prim1))
                                        prim1 = inet_ifa_byprefix(in_dev,
                                                        ifa1->ifa_address,
                                                        ifa1->ifa_mask);
                                if (!prim1)
                                        continue;
                                if (prim1->ifa_local != prim->ifa_local)
                                        continue;
                        }
                } else {
                        if (prim->ifa_local != ifa1->ifa_local)
                                continue;
                        prim1 = ifa1;
                        if (prim != prim1)
                                same_prefsrc = 1;
                }
                if (ifa->ifa_local == ifa1->ifa_local)
                        ok |= LOCAL_OK;
                if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
                        ok |= BRD_OK;
                if (brd == ifa1->ifa_broadcast)
                        ok |= BRD1_OK;
                if (any == ifa1->ifa_broadcast)
                        ok |= BRD0_OK;
                /* primary has network specific broadcasts */
                if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
                        __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
                        __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;

                        if (!ipv4_is_zeronet(any1)) {
                                if (ifa->ifa_broadcast == brd1 ||
                                    ifa->ifa_broadcast == any1)
                                        ok |= BRD_OK;
                                if (brd == brd1 || brd == any1)
                                        ok |= BRD1_OK;
                                if (any == brd1 || any == any1)
                                        ok |= BRD0_OK;
                        }
                }
        }
        rcu_read_unlock();

no_promotions:
        if (!(ok & BRD_OK))
                fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
                          prim, 0);
        if (subnet && ifa->ifa_prefixlen < 31) {
                if (!(ok & BRD1_OK))
                        fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
                                  prim, 0);
                if (!(ok & BRD0_OK))
                        fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
                                  prim, 0);
        }
        if (!(ok & LOCAL_OK)) {
                unsigned int addr_type;

                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);

                /* Check, that this local address finally disappeared. */
                addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
                                                     ifa->ifa_local);
                if (gone && addr_type != RTN_LOCAL) {
                        /* And the last, but not the least thing.
                         * We must flush stray FIB entries.
                         *
                         * First of all, we scan fib_info list searching
                         * for stray nexthop entries, then ignite fib_flush.
                         */
                        if (fib_sync_down_addr(dev, ifa->ifa_local))
                                fib_flush(dev_net(dev));
                }
        }
#undef LOCAL_OK
#undef BRD_OK
#undef BRD0_OK
#undef BRD1_OK
}

static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
{

        struct fib_result       res;
        struct flowi4           fl4 = {
                .flowi4_mark = frn->fl_mark,
                .daddr = frn->fl_addr,
                .flowi4_tos = frn->fl_tos,
                .flowi4_scope = frn->fl_scope,
        };
        struct fib_table *tb;

        rcu_read_lock();

        tb = fib_get_table(net, frn->tb_id_in);

        frn->err = -ENOENT;
        if (tb) {
                local_bh_disable();

                frn->tb_id = tb->tb_id;
                frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);

                if (!frn->err) {
                        frn->prefixlen = res.prefixlen;
                        frn->nh_sel = res.nh_sel;
                        frn->type = res.type;
                        frn->scope = res.scope;
                }
                local_bh_enable();
        }

        rcu_read_unlock();
}

static void nl_fib_input(struct sk_buff *skb)
{
        struct net *net;
        struct fib_result_nl *frn;
        struct nlmsghdr *nlh;
        u32 portid;

        net = sock_net(skb->sk);
        nlh = nlmsg_hdr(skb);
        if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
            skb->len < nlh->nlmsg_len ||
            nlmsg_len(nlh) < sizeof(*frn))
                return;

        skb = netlink_skb_clone(skb, GFP_KERNEL);
        if (!skb)
                return;
        nlh = nlmsg_hdr(skb);

        frn = nlmsg_data(nlh);
        nl_fib_lookup(net, frn);

        portid = NETLINK_CB(skb).portid;      /* netlink portid */
        NETLINK_CB(skb).portid = 0;        /* from kernel */
        NETLINK_CB(skb).dst_group = 0;  /* unicast */
        nlmsg_unicast(net->ipv4.fibnl, skb, portid);
}

static int __net_init nl_fib_lookup_init(struct net *net)
{
        struct sock *sk;
        struct netlink_kernel_cfg cfg = {
                .input        = nl_fib_input,
        };

        sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
        if (!sk)
                return -EAFNOSUPPORT;
        net->ipv4.fibnl = sk;
        return 0;
}

static void nl_fib_lookup_exit(struct net *net)
{
        netlink_kernel_release(net->ipv4.fibnl);
        net->ipv4.fibnl = NULL;
}

static void fib_disable_ip(struct net_device *dev, unsigned long event,
                           bool force)
{
        if (fib_sync_down_dev(dev, event, force))
                fib_flush(dev_net(dev));
        else
                rt_cache_flush(dev_net(dev));
        arp_ifdown(dev);
}

static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct in_ifaddr *ifa = ptr;
        struct net_device *dev = ifa->ifa_dev->dev;
        struct net *net = dev_net(dev);

        switch (event) {
        case NETDEV_UP:
                fib_add_ifaddr(ifa);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
                fib_sync_up(dev, RTNH_F_DEAD);
#endif
                atomic_inc(&net->ipv4.dev_addr_genid);
                rt_cache_flush(dev_net(dev));
                break;
        case NETDEV_DOWN:
                fib_del_ifaddr(ifa, NULL);
                atomic_inc(&net->ipv4.dev_addr_genid);
                if (!ifa->ifa_dev->ifa_list) {
                        /* Last address was deleted from this interface.
                         * Disable IP.
                         */
                        fib_disable_ip(dev, event, true);
                } else {
                        rt_cache_flush(dev_net(dev));
                }
                break;
        }
        return NOTIFY_DONE;
}

static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *upper_info = ptr;
        struct netdev_notifier_info_ext *info_ext = ptr;
        struct in_device *in_dev;
        struct net *net = dev_net(dev);
        struct in_ifaddr *ifa;
        unsigned int flags;

        if (event == NETDEV_UNREGISTER) {
                fib_disable_ip(dev, event, true);
                rt_flush_dev(dev);
                return NOTIFY_DONE;
        }

        in_dev = __in_dev_get_rtnl(dev);
        if (!in_dev)
                return NOTIFY_DONE;

        switch (event) {
        case NETDEV_UP:
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                        fib_add_ifaddr(ifa);
                }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
                fib_sync_up(dev, RTNH_F_DEAD);
#endif
                atomic_inc(&net->ipv4.dev_addr_genid);
                rt_cache_flush(net);
                break;
        case NETDEV_DOWN:
                fib_disable_ip(dev, event, false);
                break;
        case NETDEV_CHANGE:
                flags = dev_get_flags(dev);
                if (flags & (IFF_RUNNING | IFF_LOWER_UP))
                        fib_sync_up(dev, RTNH_F_LINKDOWN);
                else
                        fib_sync_down_dev(dev, event, false);
                rt_cache_flush(net);
                break;
        case NETDEV_CHANGEMTU:
                fib_sync_mtu(dev, info_ext->ext.mtu);
                rt_cache_flush(net);
                break;
        case NETDEV_CHANGEUPPER:
                upper_info = ptr;
                /* flush all routes if dev is linked to or unlinked from
                 * an L3 master device (e.g., VRF)
                 */
                if (upper_info->upper_dev &&
                    netif_is_l3_master(upper_info->upper_dev))
                        fib_disable_ip(dev, NETDEV_DOWN, true);
                break;
        }
        return NOTIFY_DONE;
}

static struct notifier_block fib_inetaddr_notifier = {
        .notifier_call = fib_inetaddr_event,
};

static struct notifier_block fib_netdev_notifier = {
        .notifier_call = fib_netdev_event,
};

static int __net_init ip_fib_net_init(struct net *net)
{
        int err;
        size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;

        err = fib4_notifier_init(net);
        if (err)
                return err;

#ifdef CONFIG_IP_ROUTE_MULTIPATH
        /* Default to 3-tuple */
        net->ipv4.sysctl_fib_multipath_hash_fields =
                FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
#endif

        /* Avoid false sharing : Use at least a full cache line */
        size = max_t(size_t, size, L1_CACHE_BYTES);

        net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
        if (!net->ipv4.fib_table_hash) {
                err = -ENOMEM;
                goto err_table_hash_alloc;
        }

        err = fib4_rules_init(net);
        if (err < 0)
                goto err_rules_init;
        return 0;

err_rules_init:
        kfree(net->ipv4.fib_table_hash);
err_table_hash_alloc:
        fib4_notifier_exit(net);
        return err;
}

static void ip_fib_net_exit(struct net *net)
{
        int i;

        ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
        /* Destroy the tables in reverse order to guarantee that the
         * local table, ID 255, is destroyed before the main table, ID
         * 254. This is necessary as the local table may contain
         * references to data contained in the main table.
         */
        for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[i];
                struct hlist_node *tmp;
                struct fib_table *tb;

                hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
                        hlist_del(&tb->tb_hlist);
                        fib_table_flush(net, tb, true);
                        fib_free_table(tb);
                }
        }

#ifdef CONFIG_IP_MULTIPLE_TABLES
        fib4_rules_exit(net);
#endif

        kfree(net->ipv4.fib_table_hash);
        fib4_notifier_exit(net);
}

static int __net_init fib_net_init(struct net *net)
{
        int error;

#ifdef CONFIG_IP_ROUTE_CLASSID
        atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
#endif
        error = ip_fib_net_init(net);
        if (error < 0)
                goto out;
        error = nl_fib_lookup_init(net);
        if (error < 0)
                goto out_nlfl;
        error = fib_proc_init(net);
        if (error < 0)
                goto out_proc;
out:
        return error;

out_proc:
        nl_fib_lookup_exit(net);
out_nlfl:
        rtnl_lock();
        ip_fib_net_exit(net);
        rtnl_unlock();
        goto out;
}

static void __net_exit fib_net_exit(struct net *net)
{
        fib_proc_exit(net);
        nl_fib_lookup_exit(net);
}

static void __net_exit fib_net_exit_batch(struct list_head *net_list)
{
        struct net *net;

        rtnl_lock();
        list_for_each_entry(net, net_list, exit_list)
                ip_fib_net_exit(net);

        rtnl_unlock();
}

static struct pernet_operations fib_net_ops = {
        .init = fib_net_init,
        .exit = fib_net_exit,
        .exit_batch = fib_net_exit_batch,
};

void __init ip_fib_init(void)
{
        fib_trie_init();

        register_pernet_subsys(&fib_net_ops);

        register_netdevice_notifier(&fib_netdev_notifier);
        register_inetaddr_notifier(&fib_inetaddr_notifier);

        rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
        rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
        rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
                      RTNL_FLAG_DUMP_UNLOCKED);
}
























   61 



   60 












    9 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>

/*
 * This is an implementation of the notion of "decrement a
 * reference count, and return locked if it decremented to zero".
 *
 * NOTE NOTE NOTE! This is _not_ equivalent to
 *
 *        if (atomic_dec_and_test(&atomic)) {
 *                spin_lock(&lock);
 *                return 1;
 *        }
 *        return 0;
 *
 * because the spin-lock and the decrement must be
 * "atomic".
 */
int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
{
        /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
        if (atomic_add_unless(atomic, -1, 1))
                return 0;

        /* Otherwise do it the slow way */
        spin_lock(lock);
        if (atomic_dec_and_test(atomic))
                return 1;
        spin_unlock(lock);
        return 0;
}

EXPORT_SYMBOL(_atomic_dec_and_lock);

int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
                                 unsigned long *flags)
{
        /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
        if (atomic_add_unless(atomic, -1, 1))
                return 0;

        /* Otherwise do it the slow way */
        spin_lock_irqsave(lock, *flags);
        if (atomic_dec_and_test(atomic))
                return 1;
        spin_unlock_irqrestore(lock, *flags);
        return 0;
}
EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);

int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock)
{
        /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
        if (atomic_add_unless(atomic, -1, 1))
                return 0;

        /* Otherwise do it the slow way */
        raw_spin_lock(lock);
        if (atomic_dec_and_test(atomic))
                return 1;
        raw_spin_unlock(lock);
        return 0;
}
EXPORT_SYMBOL(_atomic_dec_and_raw_lock);

int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
                                     unsigned long *flags)
{
        /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
        if (atomic_add_unless(atomic, -1, 1))
                return 0;

        /* Otherwise do it the slow way */
        raw_spin_lock_irqsave(lock, *flags);
        if (atomic_dec_and_test(atomic))
                return 1;
        raw_spin_unlock_irqrestore(lock, *flags);
        return 0;
}
EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave);




































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Wireless configuration interface internals.
 *
 * Copyright 2006-2010        Johannes Berg <johannes@sipsolutions.net>
 * Copyright (C) 2018-2024 Intel Corporation
 */
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/rbtree.h>
#include <linux/debugfs.h>
#include <linux/rfkill.h>
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
#include "reg.h"


#define WIPHY_IDX_INVALID        -1

struct cfg80211_registered_device {
        const struct cfg80211_ops *ops;
        struct list_head list;

        /* rfkill support */
        struct rfkill_ops rfkill_ops;
        struct work_struct rfkill_block;

        /* ISO / IEC 3166 alpha2 for which this device is receiving
         * country IEs on, this can help disregard country IEs from APs
         * on the same alpha2 quickly. The alpha2 may differ from
         * cfg80211_regdomain's alpha2 when an intersection has occurred.
         * If the AP is reconfigured this can also be used to tell us if
         * the country on the country IE changed. */
        char country_ie_alpha2[2];

        /*
         * the driver requests the regulatory core to set this regulatory
         * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED
         * devices using the regulatory_set_wiphy_regd() API
         */
        const struct ieee80211_regdomain *requested_regd;

        /* If a Country IE has been received this tells us the environment
         * which its telling us its in. This defaults to ENVIRON_ANY */
        enum environment_cap env;

        /* wiphy index, internal only */
        int wiphy_idx;

        /* protected by RTNL */
        int devlist_generation, wdev_id;
        int opencount;
        wait_queue_head_t dev_wait;

        struct list_head beacon_registrations;
        spinlock_t beacon_registrations_lock;

        /* protected by RTNL only */
        int num_running_ifaces;
        int num_running_monitor_ifaces;
        u64 cookie_counter;

        /* BSSes/scanning */
        spinlock_t bss_lock;
        struct list_head bss_list;
        struct rb_root bss_tree;
        u32 bss_generation;
        u32 bss_entries;
        struct cfg80211_scan_request *scan_req; /* protected by RTNL */
        struct cfg80211_scan_request *int_scan_req;
        struct sk_buff *scan_msg;
        struct list_head sched_scan_req_list;
        time64_t suspend_at;
        struct wiphy_work scan_done_wk;

        struct genl_info *cur_cmd_info;

        struct work_struct conn_work;
        struct work_struct event_work;

        struct delayed_work dfs_update_channels_wk;

        struct wireless_dev *background_radar_wdev;
        struct cfg80211_chan_def background_radar_chandef;
        struct delayed_work background_cac_done_wk;
        struct work_struct background_cac_abort_wk;

        /* netlink port which started critical protocol (0 means not started) */
        u32 crit_proto_nlportid;

        struct cfg80211_coalesce *coalesce;

        struct work_struct destroy_work;
        struct wiphy_work sched_scan_stop_wk;
        struct work_struct sched_scan_res_wk;

        struct cfg80211_chan_def radar_chandef;
        struct work_struct propagate_radar_detect_wk;

        struct cfg80211_chan_def cac_done_chandef;
        struct work_struct propagate_cac_done_wk;

        struct work_struct mgmt_registrations_update_wk;
        /* lock for all wdev lists */
        spinlock_t mgmt_registrations_lock;

        struct work_struct wiphy_work;
        struct list_head wiphy_work_list;
        /* protects the list above */
        spinlock_t wiphy_work_lock;
        bool suspended;

        /* must be last because of the way we do wiphy_priv(),
         * and it should at least be aligned to NETDEV_ALIGN */
        struct wiphy wiphy __aligned(NETDEV_ALIGN);
};

static inline
struct cfg80211_registered_device *wiphy_to_rdev(struct wiphy *wiphy)
{
        BUG_ON(!wiphy);
        return container_of(wiphy, struct cfg80211_registered_device, wiphy);
}

static inline void
cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
{
#ifdef CONFIG_PM
        int i;

        if (!rdev->wiphy.wowlan_config)
                return;
        for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++)
                kfree(rdev->wiphy.wowlan_config->patterns[i].mask);
        kfree(rdev->wiphy.wowlan_config->patterns);
        if (rdev->wiphy.wowlan_config->tcp &&
            rdev->wiphy.wowlan_config->tcp->sock)
                sock_release(rdev->wiphy.wowlan_config->tcp->sock);
        kfree(rdev->wiphy.wowlan_config->tcp);
        kfree(rdev->wiphy.wowlan_config->nd_config);
        kfree(rdev->wiphy.wowlan_config);
#endif
}

static inline u64 cfg80211_assign_cookie(struct cfg80211_registered_device *rdev)
{
        u64 r = ++rdev->cookie_counter;

        if (WARN_ON(r == 0))
                r = ++rdev->cookie_counter;

        return r;
}

extern struct workqueue_struct *cfg80211_wq;
extern struct list_head cfg80211_rdev_list;
extern int cfg80211_rdev_list_generation;

/* This is constructed like this so it can be used in if/else */
static inline int for_each_rdev_check_rtnl(void)
{
        ASSERT_RTNL();
        return 0;
}
#define for_each_rdev(rdev)                                                \
        if (for_each_rdev_check_rtnl()) {} else                                \
                list_for_each_entry(rdev, &cfg80211_rdev_list, list)

struct cfg80211_internal_bss {
        struct list_head list;
        struct list_head hidden_list;
        struct rb_node rbn;
        u64 ts_boottime;
        unsigned long ts;
        unsigned long refcount;
        atomic_t hold;

        /* time at the start of the reception of the first octet of the
         * timestamp field of the last beacon/probe received for this BSS.
         * The time is the TSF of the BSS specified by %parent_bssid.
         */
        u64 parent_tsf;

        /* the BSS according to which %parent_tsf is set. This is set to
         * the BSS that the interface that requested the scan was connected to
         * when the beacon/probe was received.
         */
        u8 parent_bssid[ETH_ALEN] __aligned(2);

        /* must be last because of priv member */
        struct cfg80211_bss pub;
};

static inline struct cfg80211_internal_bss *bss_from_pub(struct cfg80211_bss *pub)
{
        return container_of(pub, struct cfg80211_internal_bss, pub);
}

static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss)
{
        atomic_inc(&bss->hold);
        if (bss->pub.transmitted_bss) {
                bss = container_of(bss->pub.transmitted_bss,
                                   struct cfg80211_internal_bss, pub);
                atomic_inc(&bss->hold);
        }
}

static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss)
{
        int r = atomic_dec_return(&bss->hold);
        WARN_ON(r < 0);
        if (bss->pub.transmitted_bss) {
                bss = container_of(bss->pub.transmitted_bss,
                                   struct cfg80211_internal_bss, pub);
                r = atomic_dec_return(&bss->hold);
                WARN_ON(r < 0);
        }
}


struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx);
int get_wiphy_idx(struct wiphy *wiphy);

struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);

int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
                          struct net *net);

void cfg80211_init_wdev(struct wireless_dev *wdev);
void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
                            struct wireless_dev *wdev);

static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev)
{
        lockdep_assert_held(&rdev->wiphy.mtx);

        return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces &&
               rdev->num_running_ifaces > 0;
}

enum cfg80211_event_type {
        EVENT_CONNECT_RESULT,
        EVENT_ROAMED,
        EVENT_DISCONNECTED,
        EVENT_IBSS_JOINED,
        EVENT_STOPPED,
        EVENT_PORT_AUTHORIZED,
};

struct cfg80211_event {
        struct list_head list;
        enum cfg80211_event_type type;

        union {
                struct cfg80211_connect_resp_params cr;
                struct cfg80211_roam_info rm;
                struct {
                        const u8 *ie;
                        size_t ie_len;
                        u16 reason;
                        bool locally_generated;
                } dc;
                struct {
                        u8 bssid[ETH_ALEN];
                        struct ieee80211_channel *channel;
                } ij;
                struct {
                        u8 peer_addr[ETH_ALEN];
                        const u8 *td_bitmap;
                        u8 td_bitmap_len;
                } pa;
        };
};

struct cfg80211_cached_keys {
        struct key_params params[4];
        u8 data[4][WLAN_KEY_LEN_WEP104];
        int def;
};

struct cfg80211_beacon_registration {
        struct list_head list;
        u32 nlportid;
};

struct cfg80211_cqm_config {
        struct rcu_head rcu_head;
        u32 rssi_hyst;
        s32 last_rssi_event_value;
        enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
        bool use_range_api;
        int n_rssi_thresholds;
        s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
};

void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy,
                                   struct wiphy_work *work);

void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);

/* free object */
void cfg80211_dev_free(struct cfg80211_registered_device *rdev);

int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
                        char *newname);

void ieee80211_set_bitrate_flags(struct wiphy *wiphy);

void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
                      unsigned long age_secs);
void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
                                     unsigned int link,
                                     struct ieee80211_channel *channel);

/* IBSS */
int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
                         struct net_device *dev,
                         struct cfg80211_ibss_params *params,
                         struct cfg80211_cached_keys *connkeys);
void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
                        struct net_device *dev, bool nowext);
void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
                            struct ieee80211_channel *channel);
int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
                            struct wireless_dev *wdev);

/* mesh */
extern const struct mesh_config default_mesh_config;
extern const struct mesh_setup default_mesh_setup;
int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
                         struct net_device *dev,
                         struct mesh_setup *setup,
                         const struct mesh_config *conf);
int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
                        struct net_device *dev);
int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
                              struct wireless_dev *wdev,
                              struct cfg80211_chan_def *chandef);

/* OCB */
int cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct ocb_setup *setup);
int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
                       struct net_device *dev);

/* AP */
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
                     struct net_device *dev, int link,
                     bool notify);

/* MLME */
int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
                       struct net_device *dev,
                       struct cfg80211_auth_request *req);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
                        struct net_device *dev,
                        struct cfg80211_assoc_request *req,
                        struct netlink_ext_ack *extack);
int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
                         struct net_device *dev, const u8 *bssid,
                         const u8 *ie, int ie_len, u16 reason,
                         bool local_state_change);
int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
                           struct net_device *dev, const u8 *ap_addr,
                           const u8 *ie, int ie_len, u16 reason,
                           bool local_state_change);
void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
                        struct net_device *dev);
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
                                u16 frame_type, const u8 *match_data,
                                int match_len, bool multicast_rx,
                                struct netlink_ext_ack *extack);
void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk);
void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
                          struct wireless_dev *wdev,
                          struct cfg80211_mgmt_tx_params *params,
                          u64 *cookie);
void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
                               const struct ieee80211_ht_cap *ht_capa_mask);
void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
                                const struct ieee80211_vht_cap *vht_capa_mask);

/* SME events */
int cfg80211_connect(struct cfg80211_registered_device *rdev,
                     struct net_device *dev,
                     struct cfg80211_connect_params *connect,
                     struct cfg80211_cached_keys *connkeys,
                     const u8 *prev_bssid);
void __cfg80211_connect_result(struct net_device *dev,
                               struct cfg80211_connect_resp_params *params,
                               bool wextev);
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
                             size_t ie_len, u16 reason, bool from_ap);
int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
                        struct net_device *dev, u16 reason,
                        bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
                       struct cfg80211_roam_info *info);
void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr,
                                const u8 *td_bitmap, u8 td_bitmap_len);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
                              struct wireless_dev *wdev);
void cfg80211_autodisconnect_wk(struct work_struct *work);

/* SME implementation */
void cfg80211_conn_work(struct work_struct *work);
void cfg80211_sme_scan_done(struct net_device *dev);
bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status);
void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len);
void cfg80211_sme_disassoc(struct wireless_dev *wdev);
void cfg80211_sme_deauth(struct wireless_dev *wdev);
void cfg80211_sme_auth_timeout(struct wireless_dev *wdev);
void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev);
void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev);

/* internal helpers */
bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher);
bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev,
                            int key_idx, bool pairwise);
int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
                                   struct key_params *params, int key_idx,
                                   bool pairwise, const u8 *mac_addr);
void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk);
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
                           bool send_message);
void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
                                 struct cfg80211_sched_scan_request *req);
int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev,
                                     bool want_multi);
void cfg80211_sched_scan_results_wk(struct work_struct *work);
int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev,
                                 struct cfg80211_sched_scan_request *req,
                                 bool driver_initiated);
int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
                               u64 reqid, bool driver_initiated);
void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
                          struct net_device *dev, enum nl80211_iftype ntype,
                          struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev,
                                  struct wiphy_work *end);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);

bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
                                u32 center_freq_khz, u32 bw_khz);

int cfg80211_scan(struct cfg80211_registered_device *rdev);

extern struct work_struct cfg80211_disconnect_work;

#define NL80211_BSS_USE_FOR_ALL        (NL80211_BSS_USE_FOR_NORMAL | \
                                 NL80211_BSS_USE_FOR_MLD_LINK)

void cfg80211_set_dfs_state(struct wiphy *wiphy,
                            const struct cfg80211_chan_def *chandef,
                            enum nl80211_dfs_state dfs_state);

void cfg80211_dfs_channels_update_work(struct work_struct *work);

void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev);

int
cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev,
                                          struct wireless_dev *wdev,
                                          struct cfg80211_chan_def *chandef);

void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev);

void cfg80211_background_cac_done_wk(struct work_struct *work);

void cfg80211_background_cac_abort_wk(struct work_struct *work);

bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
                                  struct ieee80211_channel *chan);

bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev);

bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
                          struct ieee80211_channel *chan,
                          bool primary_only);
bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
                               struct ieee80211_channel *chan,
                               bool primary_only);
bool _cfg80211_chandef_usable(struct wiphy *wiphy,
                              const struct cfg80211_chan_def *chandef,
                              u32 prohibited_flags, bool monitor);

static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
        unsigned long end = jiffies;

        if (end >= start)
                return jiffies_to_msecs(end - start);

        return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}

int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
                                 struct cfg80211_chan_def *chandef);

int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
                           const u8 *rates, unsigned int n_rates,
                           u32 *mask);

int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
                                 enum nl80211_iftype iftype, u32 beacon_int);

void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
                               enum nl80211_iftype iftype, int num);

void cfg80211_leave(struct cfg80211_registered_device *rdev,
                    struct wireless_dev *wdev);

void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
                              struct wireless_dev *wdev);

void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
                       struct wireless_dev *wdev);

struct cfg80211_internal_bss *
cfg80211_bss_update(struct cfg80211_registered_device *rdev,
                    struct cfg80211_internal_bss *tmp,
                    bool signal_valid, unsigned long ts);
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
#define CFG80211_DEV_WARN_ON(cond)        WARN_ON(cond)
#else
/*
 * Trick to enable using it as a condition,
 * and also not give a warning when it's
 * not used that way.
 */
#define CFG80211_DEV_WARN_ON(cond)        ({bool __r = (cond); __r; })
#endif

void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
void cfg80211_pmsr_free_wk(struct work_struct *work);

void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id);
void cfg80211_remove_links(struct wireless_dev *wdev);
int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
                                 struct wireless_dev *wdev);
void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);

/**
 * struct cfg80211_colocated_ap - colocated AP information
 *
 * @list: linked list to all colocated APs
 * @bssid: BSSID of the reported AP
 * @ssid: SSID of the reported AP
 * @ssid_len: length of the ssid
 * @center_freq: frequency the reported AP is on
 * @unsolicited_probe: the reported AP is part of an ESS, where all the APs
 *        that operate in the same channel as the reported AP and that might be
 *        detected by a STA receiving this frame, are transmitting unsolicited
 *        Probe Response frames every 20 TUs
 * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP
 * @same_ssid: the reported AP has the same SSID as the reporting AP
 * @multi_bss: the reported AP is part of a multiple BSSID set
 * @transmitted_bssid: the reported AP is the transmitting BSSID
 * @colocated_ess: all the APs that share the same ESS as the reported AP are
 *        colocated and can be discovered via legacy bands.
 * @short_ssid_valid: short_ssid is valid and can be used
 * @short_ssid: the short SSID for this SSID
 * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
 */
struct cfg80211_colocated_ap {
        struct list_head list;
        u8 bssid[ETH_ALEN];
        u8 ssid[IEEE80211_MAX_SSID_LEN];
        size_t ssid_len;
        u32 short_ssid;
        u32 center_freq;
        u8 unsolicited_probe:1,
           oct_recommended:1,
           same_ssid:1,
           multi_bss:1,
           transmitted_bssid:1,
           colocated_ess:1,
           short_ssid_valid:1;
        s8 psd_20;
};

#if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST)
#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
#define VISIBLE_IF_CFG80211_KUNIT
void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list);

int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
                                struct list_head *list);

size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
                           const u8 *subie, size_t subie_len,
                           u8 *new_ie, size_t new_ie_len);
#else
#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym)
#define VISIBLE_IF_CFG80211_KUNIT static
#endif /* IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) */

#endif /* __NET_WIRELESS_CORE_H */























































































    3 











    2 




























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM irq_vectors

#if !defined(_TRACE_IRQ_VECTORS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_IRQ_VECTORS_H

#include <linux/tracepoint.h>
#include <asm/trace/common.h>

#ifdef CONFIG_X86_LOCAL_APIC

DECLARE_EVENT_CLASS(x86_irq_vector,

        TP_PROTO(int vector),

        TP_ARGS(vector),

        TP_STRUCT__entry(
                __field(                int,        vector        )
        ),

        TP_fast_assign(
                __entry->vector = vector;
        ),

        TP_printk("vector=%d", __entry->vector) );

#define DEFINE_IRQ_VECTOR_EVENT(name)                \
DEFINE_EVENT_FN(x86_irq_vector, name##_entry,        \
        TP_PROTO(int vector),                        \
        TP_ARGS(vector), NULL, NULL);                \
DEFINE_EVENT_FN(x86_irq_vector, name##_exit,        \
        TP_PROTO(int vector),                        \
        TP_ARGS(vector), NULL, NULL);

/*
 * local_timer - called when entering/exiting a local timer interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(local_timer);

/*
 * spurious_apic - called when entering/exiting a spurious apic vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(spurious_apic);

/*
 * error_apic - called when entering/exiting an error apic vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(error_apic);

/*
 * x86_platform_ipi - called when entering/exiting a x86 platform ipi interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);

#ifdef CONFIG_IRQ_WORK
/*
 * irq_work - called when entering/exiting a irq work interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(irq_work);

/*
 * We must dis-allow sampling irq_work_exit() because perf event sampling
 * itself can cause irq_work, which would lead to an infinite loop;
 *
 *  1) irq_work_exit happens
 *  2) generates perf sample
 *  3) generates irq_work
 *  4) goto 1
 */
TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
#endif

/*
 * The ifdef is required because that tracepoint macro hell emits tracepoint
 * code in files which include this header even if the tracepoint is not
 * enabled. Brilliant stuff that.
 */
#ifdef CONFIG_SMP
/*
 * reschedule - called when entering/exiting a reschedule vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(reschedule);

/*
 * call_function - called when entering/exiting a call function interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(call_function);

/*
 * call_function_single - called when entering/exiting a call function
 * single interrupt vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(call_function_single);
#endif

#ifdef CONFIG_X86_MCE_THRESHOLD
/*
 * threshold_apic - called when entering/exiting a threshold apic interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
#endif

#ifdef CONFIG_X86_MCE_AMD
/*
 * deferred_error_apic - called when entering/exiting a deferred apic interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
#endif

#ifdef CONFIG_X86_THERMAL_VECTOR
/*
 * thermal_apic - called when entering/exiting a thermal apic interrupt
 * vector handler
 */
DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
#endif

TRACE_EVENT(vector_config,

        TP_PROTO(unsigned int irq, unsigned int vector,
                 unsigned int cpu, unsigned int apicdest),

        TP_ARGS(irq, vector, cpu, apicdest),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        unsigned int,        vector                )
                __field(        unsigned int,        cpu                )
                __field(        unsigned int,        apicdest        )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->vector                = vector;
                __entry->cpu                = cpu;
                __entry->apicdest        = apicdest;
        ),

        TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x",
                  __entry->irq, __entry->vector, __entry->cpu,
                  __entry->apicdest)
);

DECLARE_EVENT_CLASS(vector_mod,

        TP_PROTO(unsigned int irq, unsigned int vector,
                 unsigned int cpu, unsigned int prev_vector,
                 unsigned int prev_cpu),

        TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        unsigned int,        vector                )
                __field(        unsigned int,        cpu                )
                __field(        unsigned int,        prev_vector        )
                __field(        unsigned int,        prev_cpu        )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->vector                = vector;
                __entry->cpu                = cpu;
                __entry->prev_vector        = prev_vector;
                __entry->prev_cpu        = prev_cpu;

        ),

        TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u",
                  __entry->irq, __entry->vector, __entry->cpu,
                  __entry->prev_vector, __entry->prev_cpu)
);

#define DEFINE_IRQ_VECTOR_MOD_EVENT(name)                                \
DEFINE_EVENT_FN(vector_mod, name,                                        \
        TP_PROTO(unsigned int irq, unsigned int vector,                        \
                 unsigned int cpu, unsigned int prev_vector,                \
                 unsigned int prev_cpu),                                \
        TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL);        \

DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update);
DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear);

DECLARE_EVENT_CLASS(vector_reserve,

        TP_PROTO(unsigned int irq, int ret),

        TP_ARGS(irq, ret),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq        )
                __field(        int,                ret        )
        ),

        TP_fast_assign(
                __entry->irq = irq;
                __entry->ret = ret;
        ),

        TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret)
);

#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name)        \
DEFINE_EVENT_FN(vector_reserve, name,        \
        TP_PROTO(unsigned int irq, int ret),        \
        TP_ARGS(irq, ret), NULL, NULL);                \

DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed);
DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve);

TRACE_EVENT(vector_alloc,

        TP_PROTO(unsigned int irq, unsigned int vector, bool reserved,
                 int ret),

        TP_ARGS(irq, vector, reserved, ret),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        unsigned int,        vector                )
                __field(        bool,                reserved        )
                __field(        int,                ret                )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->vector                = ret < 0 ? 0 : vector;
                __entry->reserved        = reserved;
                __entry->ret                = ret > 0 ? 0 : ret;
        ),

        TP_printk("irq=%u vector=%u reserved=%d ret=%d",
                  __entry->irq, __entry->vector,
                  __entry->reserved, __entry->ret)
);

TRACE_EVENT(vector_alloc_managed,

        TP_PROTO(unsigned int irq, unsigned int vector,
                 int ret),

        TP_ARGS(irq, vector, ret),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        unsigned int,        vector                )
                __field(        int,                ret                )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->vector                = ret < 0 ? 0 : vector;
                __entry->ret                = ret > 0 ? 0 : ret;
        ),

        TP_printk("irq=%u vector=%u ret=%d",
                  __entry->irq, __entry->vector, __entry->ret)
);

DECLARE_EVENT_CLASS(vector_activate,

        TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
                 bool reserve),

        TP_ARGS(irq, is_managed, can_reserve, reserve),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        bool,                is_managed        )
                __field(        bool,                can_reserve        )
                __field(        bool,                reserve                )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->is_managed        = is_managed;
                __entry->can_reserve        = can_reserve;
                __entry->reserve        = reserve;
        ),

        TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
                  __entry->irq, __entry->is_managed, __entry->can_reserve,
                  __entry->reserve)
);

#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)                                \
DEFINE_EVENT_FN(vector_activate, name,                                        \
        TP_PROTO(unsigned int irq, bool is_managed,                        \
                 bool can_reserve, bool reserve),                        \
        TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL);        \

DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);

TRACE_EVENT(vector_teardown,

        TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved),

        TP_ARGS(irq, is_managed, has_reserved),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        bool,                is_managed        )
                __field(        bool,                has_reserved        )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->is_managed        = is_managed;
                __entry->has_reserved        = has_reserved;
        ),

        TP_printk("irq=%u is_managed=%d has_reserved=%d",
                  __entry->irq, __entry->is_managed, __entry->has_reserved)
);

TRACE_EVENT(vector_setup,

        TP_PROTO(unsigned int irq, bool is_legacy, int ret),

        TP_ARGS(irq, is_legacy, ret),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        bool,                is_legacy        )
                __field(        int,                ret                )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->is_legacy        = is_legacy;
                __entry->ret                = ret;
        ),

        TP_printk("irq=%u is_legacy=%d ret=%d",
                  __entry->irq, __entry->is_legacy, __entry->ret)
);

TRACE_EVENT(vector_free_moved,

        TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector,
                 bool is_managed),

        TP_ARGS(irq, cpu, vector, is_managed),

        TP_STRUCT__entry(
                __field(        unsigned int,        irq                )
                __field(        unsigned int,        cpu                )
                __field(        unsigned int,        vector                )
                __field(        bool,                is_managed        )
        ),

        TP_fast_assign(
                __entry->irq                = irq;
                __entry->cpu                = cpu;
                __entry->vector                = vector;
                __entry->is_managed        = is_managed;
        ),

        TP_printk("irq=%u cpu=%u vector=%u is_managed=%d",
                  __entry->irq, __entry->cpu, __entry->vector,
                  __entry->is_managed)
);


#endif /* CONFIG_X86_LOCAL_APIC */

#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE irq_vectors
#endif /*  _TRACE_IRQ_VECTORS_H */

/* This part must be outside protection */
#include <trace/define_trace.h>









































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Media device node
 *
 * Copyright (C) 2010 Nokia Corporation
 *
 * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 *             Sakari Ailus <sakari.ailus@iki.fi>
 *
 * --
 *
 * Common functions for media-related drivers to register and unregister media
 * device nodes.
 */

#ifndef _MEDIA_DEVNODE_H
#define _MEDIA_DEVNODE_H

#include <linux/poll.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cdev.h>

struct media_device;

/*
 * Flag to mark the media_devnode struct as registered. Drivers must not touch
 * this flag directly, it will be set and cleared by media_devnode_register and
 * media_devnode_unregister.
 */
#define MEDIA_FLAG_REGISTERED        0

/**
 * struct media_file_operations - Media device file operations
 *
 * @owner: should be filled with %THIS_MODULE
 * @read: pointer to the function that implements read() syscall
 * @write: pointer to the function that implements write() syscall
 * @poll: pointer to the function that implements poll() syscall
 * @ioctl: pointer to the function that implements ioctl() syscall
 * @compat_ioctl: pointer to the function that will handle 32 bits userspace
 *        calls to the ioctl() syscall on a Kernel compiled with 64 bits.
 * @open: pointer to the function that implements open() syscall
 * @release: pointer to the function that will release the resources allocated
 *        by the @open function.
 */
struct media_file_operations {
        struct module *owner;
        ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        __poll_t (*poll) (struct file *, struct poll_table_struct *);
        long (*ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
        int (*open) (struct file *);
        int (*release) (struct file *);
};

/**
 * struct media_devnode - Media device node
 * @media_dev:        pointer to struct &media_device
 * @fops:        pointer to struct &media_file_operations with media device ops
 * @dev:        pointer to struct &device containing the media controller device
 * @cdev:        struct cdev pointer character device
 * @parent:        parent device
 * @minor:        device node minor number
 * @flags:        flags, combination of the ``MEDIA_FLAG_*`` constants
 * @release:        release callback called at the end of ``media_devnode_release()``
 *                routine at media-device.c.
 *
 * This structure represents a media-related device node.
 *
 * The @parent is a physical device. It must be set by core or device drivers
 * before registering the node.
 */
struct media_devnode {
        struct media_device *media_dev;

        /* device ops */
        const struct media_file_operations *fops;

        /* sysfs */
        struct device dev;                /* media device */
        struct cdev cdev;                /* character device */
        struct device *parent;                /* device parent */

        /* device info */
        int minor;
        unsigned long flags;                /* Use bitops to access flags */

        /* callbacks */
        void (*release)(struct media_devnode *devnode);
};

/* dev to media_devnode */
#define to_media_devnode(cd) container_of(cd, struct media_devnode, dev)

/**
 * media_devnode_register - register a media device node
 *
 * @mdev: struct media_device we want to register a device node
 * @devnode: media device node structure we want to register
 * @owner: should be filled with %THIS_MODULE
 *
 * The registration code assigns minor numbers and registers the new device node
 * with the kernel. An error is returned if no free minor number can be found,
 * or if the registration of the device node fails.
 *
 * Zero is returned on success.
 *
 * Note that if the media_devnode_register call fails, the release() callback of
 * the media_devnode structure is *not* called, so the caller is responsible for
 * freeing any data.
 */
int __must_check media_devnode_register(struct media_device *mdev,
                                        struct media_devnode *devnode,
                                        struct module *owner);

/**
 * media_devnode_unregister_prepare - clear the media device node register bit
 * @devnode: the device node to prepare for unregister
 *
 * This clears the passed device register bit. Future open calls will be met
 * with errors. Should be called before media_devnode_unregister() to avoid
 * races with unregister and device file open calls.
 *
 * This function can safely be called if the device node has never been
 * registered or has already been unregistered.
 */
void media_devnode_unregister_prepare(struct media_devnode *devnode);

/**
 * media_devnode_unregister - unregister a media device node
 * @devnode: the device node to unregister
 *
 * This unregisters the passed device. Future open calls will be met with
 * errors.
 *
 * Should be called after media_devnode_unregister_prepare()
 */
void media_devnode_unregister(struct media_devnode *devnode);

/**
 * media_devnode_data - returns a pointer to the &media_devnode
 *
 * @filp: pointer to struct &file
 */
static inline struct media_devnode *media_devnode_data(struct file *filp)
{
        return filp->private_data;
}

/**
 * media_devnode_is_registered - returns true if &media_devnode is registered;
 *        false otherwise.
 *
 * @devnode: pointer to struct &media_devnode.
 *
 * Note: If mdev is NULL, it also returns false.
 */
static inline int media_devnode_is_registered(struct media_devnode *devnode)
{
        if (!devnode)
                return false;

        return test_bit(MEDIA_FLAG_REGISTERED, &devnode->flags);
}

#endif /* _MEDIA_DEVNODE_H */






























































































  232 

  232 




















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
// SPDX-License-Identifier: GPL-2.0
/*
 * Provides code common for host and device side USB.
 *
 * If either host side (ie. CONFIG_USB=y) or device side USB stack
 * (ie. CONFIG_USB_GADGET=y) is compiled in the kernel, this module is
 * compiled-in as well.  Otherwise, if either of the two stacks is
 * compiled as module, this file is compiled as module as well.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/usb/ch9.h>
#include <linux/usb/of.h>
#include <linux/usb/otg.h>
#include <linux/of_platform.h>
#include <linux/debugfs.h>
#include "common.h"

static const char *const ep_type_names[] = {
        [USB_ENDPOINT_XFER_CONTROL] = "ctrl",
        [USB_ENDPOINT_XFER_ISOC] = "isoc",
        [USB_ENDPOINT_XFER_BULK] = "bulk",
        [USB_ENDPOINT_XFER_INT] = "intr",
};

/**
 * usb_ep_type_string() - Returns human readable-name of the endpoint type.
 * @ep_type: The endpoint type to return human-readable name for.  If it's not
 *   any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT},
 *   usually got by usb_endpoint_type(), the string 'unknown' will be returned.
 */
const char *usb_ep_type_string(int ep_type)
{
        if (ep_type < 0 || ep_type >= ARRAY_SIZE(ep_type_names))
                return "unknown";

        return ep_type_names[ep_type];
}
EXPORT_SYMBOL_GPL(usb_ep_type_string);

const char *usb_otg_state_string(enum usb_otg_state state)
{
        static const char *const names[] = {
                [OTG_STATE_A_IDLE] = "a_idle",
                [OTG_STATE_A_WAIT_VRISE] = "a_wait_vrise",
                [OTG_STATE_A_WAIT_BCON] = "a_wait_bcon",
                [OTG_STATE_A_HOST] = "a_host",
                [OTG_STATE_A_SUSPEND] = "a_suspend",
                [OTG_STATE_A_PERIPHERAL] = "a_peripheral",
                [OTG_STATE_A_WAIT_VFALL] = "a_wait_vfall",
                [OTG_STATE_A_VBUS_ERR] = "a_vbus_err",
                [OTG_STATE_B_IDLE] = "b_idle",
                [OTG_STATE_B_SRP_INIT] = "b_srp_init",
                [OTG_STATE_B_PERIPHERAL] = "b_peripheral",
                [OTG_STATE_B_WAIT_ACON] = "b_wait_acon",
                [OTG_STATE_B_HOST] = "b_host",
        };

        if (state < 0 || state >= ARRAY_SIZE(names))
                return "UNDEFINED";

        return names[state];
}
EXPORT_SYMBOL_GPL(usb_otg_state_string);

static const char *const speed_names[] = {
        [USB_SPEED_UNKNOWN] = "UNKNOWN",
        [USB_SPEED_LOW] = "low-speed",
        [USB_SPEED_FULL] = "full-speed",
        [USB_SPEED_HIGH] = "high-speed",
        [USB_SPEED_WIRELESS] = "wireless",
        [USB_SPEED_SUPER] = "super-speed",
        [USB_SPEED_SUPER_PLUS] = "super-speed-plus",
};

static const char *const ssp_rate[] = {
        [USB_SSP_GEN_UNKNOWN] = "UNKNOWN",
        [USB_SSP_GEN_2x1] = "super-speed-plus-gen2x1",
        [USB_SSP_GEN_1x2] = "super-speed-plus-gen1x2",
        [USB_SSP_GEN_2x2] = "super-speed-plus-gen2x2",
};

/**
 * usb_speed_string() - Returns human readable-name of the speed.
 * @speed: The speed to return human-readable name for.  If it's not
 *   any of the speeds defined in usb_device_speed enum, string for
 *   USB_SPEED_UNKNOWN will be returned.
 */
const char *usb_speed_string(enum usb_device_speed speed)
{
        if (speed < 0 || speed >= ARRAY_SIZE(speed_names))
                speed = USB_SPEED_UNKNOWN;
        return speed_names[speed];
}
EXPORT_SYMBOL_GPL(usb_speed_string);

/**
 * usb_get_maximum_speed - Get maximum requested speed for a given USB
 * controller.
 * @dev: Pointer to the given USB controller device
 *
 * The function gets the maximum speed string from property "maximum-speed",
 * and returns the corresponding enum usb_device_speed.
 */
enum usb_device_speed usb_get_maximum_speed(struct device *dev)
{
        const char *maximum_speed;
        int ret;

        ret = device_property_read_string(dev, "maximum-speed", &maximum_speed);
        if (ret < 0)
                return USB_SPEED_UNKNOWN;

        ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed);
        if (ret > 0)
                return USB_SPEED_SUPER_PLUS;

        ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed);
        return (ret < 0) ? USB_SPEED_UNKNOWN : ret;
}
EXPORT_SYMBOL_GPL(usb_get_maximum_speed);

/**
 * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count
 *        of a SuperSpeed Plus capable device.
 * @dev: Pointer to the given USB controller device
 *
 * If the string from "maximum-speed" property is super-speed-plus-genXxY where
 * 'X' is the generation number and 'Y' is the number of lanes, then this
 * function returns the corresponding enum usb_ssp_rate.
 */
enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev)
{
        const char *maximum_speed;
        int ret;

        ret = device_property_read_string(dev, "maximum-speed", &maximum_speed);
        if (ret < 0)
                return USB_SSP_GEN_UNKNOWN;

        ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed);
        return (ret < 0) ? USB_SSP_GEN_UNKNOWN : ret;
}
EXPORT_SYMBOL_GPL(usb_get_maximum_ssp_rate);

/**
 * usb_state_string - Returns human readable name for the state.
 * @state: The state to return a human-readable name for. If it's not
 *        any of the states devices in usb_device_state_string enum,
 *        the string UNKNOWN will be returned.
 */
const char *usb_state_string(enum usb_device_state state)
{
        static const char *const names[] = {
                [USB_STATE_NOTATTACHED] = "not attached",
                [USB_STATE_ATTACHED] = "attached",
                [USB_STATE_POWERED] = "powered",
                [USB_STATE_RECONNECTING] = "reconnecting",
                [USB_STATE_UNAUTHENTICATED] = "unauthenticated",
                [USB_STATE_DEFAULT] = "default",
                [USB_STATE_ADDRESS] = "addressed",
                [USB_STATE_CONFIGURED] = "configured",
                [USB_STATE_SUSPENDED] = "suspended",
        };

        if (state < 0 || state >= ARRAY_SIZE(names))
                return "UNKNOWN";

        return names[state];
}
EXPORT_SYMBOL_GPL(usb_state_string);

static const char *const usb_dr_modes[] = {
        [USB_DR_MODE_UNKNOWN]                = "",
        [USB_DR_MODE_HOST]                = "host",
        [USB_DR_MODE_PERIPHERAL]        = "peripheral",
        [USB_DR_MODE_OTG]                = "otg",
};

static enum usb_dr_mode usb_get_dr_mode_from_string(const char *str)
{
        int ret;

        ret = match_string(usb_dr_modes, ARRAY_SIZE(usb_dr_modes), str);
        return (ret < 0) ? USB_DR_MODE_UNKNOWN : ret;
}

enum usb_dr_mode usb_get_dr_mode(struct device *dev)
{
        const char *dr_mode;
        int err;

        err = device_property_read_string(dev, "dr_mode", &dr_mode);
        if (err < 0)
                return USB_DR_MODE_UNKNOWN;

        return usb_get_dr_mode_from_string(dr_mode);
}
EXPORT_SYMBOL_GPL(usb_get_dr_mode);

/**
 * usb_get_role_switch_default_mode - Get default mode for given device
 * @dev: Pointer to the given device
 *
 * The function gets string from property 'role-switch-default-mode',
 * and returns the corresponding enum usb_dr_mode.
 */
enum usb_dr_mode usb_get_role_switch_default_mode(struct device *dev)
{
        const char *str;
        int ret;

        ret = device_property_read_string(dev, "role-switch-default-mode", &str);
        if (ret < 0)
                return USB_DR_MODE_UNKNOWN;

        return usb_get_dr_mode_from_string(str);
}
EXPORT_SYMBOL_GPL(usb_get_role_switch_default_mode);

/**
 * usb_decode_interval - Decode bInterval into the time expressed in 1us unit
 * @epd: The descriptor of the endpoint
 * @speed: The speed that the endpoint works as
 *
 * Function returns the interval expressed in 1us unit for servicing
 * endpoint for data transfers.
 */
unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd,
                                 enum usb_device_speed speed)
{
        unsigned int interval = 0;

        switch (usb_endpoint_type(epd)) {
        case USB_ENDPOINT_XFER_CONTROL:
                /* uframes per NAK */
                if (speed == USB_SPEED_HIGH)
                        interval = epd->bInterval;
                break;
        case USB_ENDPOINT_XFER_ISOC:
                interval = 1 << (epd->bInterval - 1);
                break;
        case USB_ENDPOINT_XFER_BULK:
                /* uframes per NAK */
                if (speed == USB_SPEED_HIGH && usb_endpoint_dir_out(epd))
                        interval = epd->bInterval;
                break;
        case USB_ENDPOINT_XFER_INT:
                if (speed >= USB_SPEED_HIGH)
                        interval = 1 << (epd->bInterval - 1);
                else
                        interval = epd->bInterval;
                break;
        }

        interval *= (speed >= USB_SPEED_HIGH) ? 125 : 1000;

        return interval;
}
EXPORT_SYMBOL_GPL(usb_decode_interval);

#ifdef CONFIG_OF
/**
 * of_usb_get_dr_mode_by_phy - Get dual role mode for the controller device
 * which is associated with the given phy device_node
 * @np:        Pointer to the given phy device_node
 * @arg0: phandle args[0] for phy's with #phy-cells >= 1, or -1 for
 *        phys which do not have phy-cells
 *
 * In dts a usb controller associates with phy devices.  The function gets
 * the string from property 'dr_mode' of the controller associated with the
 * given phy device node, and returns the correspondig enum usb_dr_mode.
 */
enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0)
{
        struct device_node *controller = NULL;
        struct of_phandle_args args;
        const char *dr_mode;
        int index;
        int err;

        do {
                controller = of_find_node_with_property(controller, "phys");
                if (!of_device_is_available(controller))
                        continue;
                index = 0;
                do {
                        if (arg0 == -1) {
                                args.np = of_parse_phandle(controller, "phys",
                                                        index);
                                args.args_count = 0;
                        } else {
                                err = of_parse_phandle_with_args(controller,
                                                        "phys", "#phy-cells",
                                                        index, &args);
                                if (err)
                                        break;
                        }

                        of_node_put(args.np);
                        if (args.np == np && (args.args_count == 0 ||
                                              args.args[0] == arg0))
                                goto finish;
                        index++;
                } while (args.np);
        } while (controller);

finish:
        err = of_property_read_string(controller, "dr_mode", &dr_mode);
        of_node_put(controller);

        if (err < 0)
                return USB_DR_MODE_UNKNOWN;

        return usb_get_dr_mode_from_string(dr_mode);
}
EXPORT_SYMBOL_GPL(of_usb_get_dr_mode_by_phy);

/**
 * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported
 * for given targeted hosts (non-PC hosts)
 * @np: Pointer to the given device_node
 *
 * The function gets if the targeted hosts support TPL or not
 */
bool of_usb_host_tpl_support(struct device_node *np)
{
        return of_property_read_bool(np, "tpl-support");
}
EXPORT_SYMBOL_GPL(of_usb_host_tpl_support);

/**
 * of_usb_update_otg_caps - to update usb otg capabilities according to
 * the passed properties in DT.
 * @np: Pointer to the given device_node
 * @otg_caps: Pointer to the target usb_otg_caps to be set
 *
 * The function updates the otg capabilities
 */
int of_usb_update_otg_caps(struct device_node *np,
                        struct usb_otg_caps *otg_caps)
{
        u32 otg_rev;

        if (!otg_caps)
                return -EINVAL;

        if (!of_property_read_u32(np, "otg-rev", &otg_rev)) {
                switch (otg_rev) {
                case 0x0100:
                case 0x0120:
                case 0x0130:
                case 0x0200:
                        /* Choose the lesser one if it's already been set */
                        if (otg_caps->otg_rev)
                                otg_caps->otg_rev = min_t(u16, otg_rev,
                                                        otg_caps->otg_rev);
                        else
                                otg_caps->otg_rev = otg_rev;
                        break;
                default:
                        pr_err("%pOF: unsupported otg-rev: 0x%x\n",
                                                np, otg_rev);
                        return -EINVAL;
                }
        } else {
                /*
                 * otg-rev is mandatory for otg properties, if not passed
                 * we set it to be 0 and assume it's a legacy otg device.
                 * Non-dt platform can set it afterwards.
                 */
                otg_caps->otg_rev = 0;
        }

        if (of_property_read_bool(np, "hnp-disable"))
                otg_caps->hnp_support = false;
        if (of_property_read_bool(np, "srp-disable"))
                otg_caps->srp_support = false;
        if (of_property_read_bool(np, "adp-disable") ||
                                (otg_caps->otg_rev < 0x0200))
                otg_caps->adp_support = false;

        return 0;
}
EXPORT_SYMBOL_GPL(of_usb_update_otg_caps);

/**
 * usb_of_get_companion_dev - Find the companion device
 * @dev: the device pointer to find a companion
 *
 * Find the companion device from platform bus.
 *
 * Takes a reference to the returned struct device which needs to be dropped
 * after use.
 *
 * Return: On success, a pointer to the companion device, %NULL on failure.
 */
struct device *usb_of_get_companion_dev(struct device *dev)
{
        struct device_node *node;
        struct platform_device *pdev = NULL;

        node = of_parse_phandle(dev->of_node, "companion", 0);
        if (node)
                pdev = of_find_device_by_node(node);

        of_node_put(node);

        return pdev ? &pdev->dev : NULL;
}
EXPORT_SYMBOL_GPL(usb_of_get_companion_dev);
#endif

struct dentry *usb_debug_root;
EXPORT_SYMBOL_GPL(usb_debug_root);

static int __init usb_common_init(void)
{
        usb_debug_root = debugfs_create_dir("usb", NULL);
        ledtrig_usb_init();
        return 0;
}

static void __exit usb_common_exit(void)
{
        ledtrig_usb_exit();
        debugfs_remove_recursive(usb_debug_root);
}

subsys_initcall(usb_common_init);
module_exit(usb_common_exit);

MODULE_LICENSE("GPL");


























  531 















  525 










    3 
    3 












  531 









  525 

















    3 







  531 























  304 







  525 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _X86_IRQFLAGS_H_
#define _X86_IRQFLAGS_H_

#include <asm/processor-flags.h>

#ifndef __ASSEMBLY__

#include <asm/nospec-branch.h>

/*
 * Interrupt control:
 */

/* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */
extern inline unsigned long native_save_fl(void);
extern __always_inline unsigned long native_save_fl(void)
{
        unsigned long flags;

        /*
         * "=rm" is safe here, because "pop" adjusts the stack before
         * it evaluates its effective address -- this is part of the
         * documented behavior of the "pop" instruction.
         */
        asm volatile("# __raw_save_flags\n\t"
                     "pushf ; pop %0"
                     : "=rm" (flags)
                     : /* no input */
                     : "memory");

        return flags;
}

static __always_inline void native_irq_disable(void)
{
        asm volatile("cli": : :"memory");
}

static __always_inline void native_irq_enable(void)
{
        asm volatile("sti": : :"memory");
}

static __always_inline void native_safe_halt(void)
{
        mds_idle_clear_cpu_buffers();
        asm volatile("sti; hlt": : :"memory");
}

static __always_inline void native_halt(void)
{
        mds_idle_clear_cpu_buffers();
        asm volatile("hlt": : :"memory");
}

#endif

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#ifndef __ASSEMBLY__
#include <linux/types.h>

static __always_inline unsigned long arch_local_save_flags(void)
{
        return native_save_fl();
}

static __always_inline void arch_local_irq_disable(void)
{
        native_irq_disable();
}

static __always_inline void arch_local_irq_enable(void)
{
        native_irq_enable();
}

/*
 * Used in the idle loop; sti takes one instruction cycle
 * to complete:
 */
static __always_inline void arch_safe_halt(void)
{
        native_safe_halt();
}

/*
 * Used when interrupts are already enabled or to
 * shutdown the processor:
 */
static __always_inline void halt(void)
{
        native_halt();
}

/*
 * For spinlocks, etc:
 */
static __always_inline unsigned long arch_local_irq_save(void)
{
        unsigned long flags = arch_local_save_flags();
        arch_local_irq_disable();
        return flags;
}
#else

#ifdef CONFIG_X86_64
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS                pushfq; popq %rax
#endif

#endif

#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT_XXL */

#ifndef __ASSEMBLY__
static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
{
        return !(flags & X86_EFLAGS_IF);
}

static __always_inline int arch_irqs_disabled(void)
{
        unsigned long flags = arch_local_save_flags();

        return arch_irqs_disabled_flags(flags);
}

static __always_inline void arch_local_irq_restore(unsigned long flags)
{
        if (!arch_irqs_disabled_flags(flags))
                arch_local_irq_enable();
}
#endif /* !__ASSEMBLY__ */

#endif























































































































































































































































































































































































































































































































































































































































































































    2 






































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
/* SPDX-License-Identifier: GPL-2.0-only */

/* The industrial I/O core
 *
 * Copyright (c) 2008 Jonathan Cameron
 */
#ifndef _INDUSTRIAL_IO_H_
#define _INDUSTRIAL_IO_H_

#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/cleanup.h>
#include <linux/slab.h>
#include <linux/iio/types.h>
/* IIO TODO LIST */
/*
 * Provide means of adjusting timer accuracy.
 * Currently assumes nano seconds.
 */

struct fwnode_reference_args;

enum iio_shared_by {
        IIO_SEPARATE,
        IIO_SHARED_BY_TYPE,
        IIO_SHARED_BY_DIR,
        IIO_SHARED_BY_ALL
};

enum iio_endian {
        IIO_CPU,
        IIO_BE,
        IIO_LE,
};

struct iio_chan_spec;
struct iio_dev;

/**
 * struct iio_chan_spec_ext_info - Extended channel info attribute
 * @name:        Info attribute name
 * @shared:        Whether this attribute is shared between all channels.
 * @read:        Read callback for this info attribute, may be NULL.
 * @write:        Write callback for this info attribute, may be NULL.
 * @private:        Data private to the driver.
 */
struct iio_chan_spec_ext_info {
        const char *name;
        enum iio_shared_by shared;
        ssize_t (*read)(struct iio_dev *, uintptr_t private,
                        struct iio_chan_spec const *, char *buf);
        ssize_t (*write)(struct iio_dev *, uintptr_t private,
                         struct iio_chan_spec const *, const char *buf,
                         size_t len);
        uintptr_t private;
};

/**
 * struct iio_enum - Enum channel info attribute
 * @items:        An array of strings.
 * @num_items:        Length of the item array.
 * @set:        Set callback function, may be NULL.
 * @get:        Get callback function, may be NULL.
 *
 * The iio_enum struct can be used to implement enum style channel attributes.
 * Enum style attributes are those which have a set of strings which map to
 * unsigned integer values. The IIO enum helper code takes care of mapping
 * between value and string as well as generating a "_available" file which
 * contains a list of all available items. The set callback will be called when
 * the attribute is updated. The last parameter is the index to the newly
 * activated item. The get callback will be used to query the currently active
 * item and is supposed to return the index for it.
 */
struct iio_enum {
        const char * const *items;
        unsigned int num_items;
        int (*set)(struct iio_dev *, const struct iio_chan_spec *, unsigned int);
        int (*get)(struct iio_dev *, const struct iio_chan_spec *);
};

ssize_t iio_enum_available_read(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, char *buf);
ssize_t iio_enum_read(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, char *buf);
ssize_t iio_enum_write(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, const char *buf,
        size_t len);

/**
 * IIO_ENUM() - Initialize enum extended channel attribute
 * @_name:        Attribute name
 * @_shared:        Whether the attribute is shared between all channels
 * @_e:                Pointer to an iio_enum struct
 *
 * This should usually be used together with IIO_ENUM_AVAILABLE()
 */
#define IIO_ENUM(_name, _shared, _e) \
{ \
        .name = (_name), \
        .shared = (_shared), \
        .read = iio_enum_read, \
        .write = iio_enum_write, \
        .private = (uintptr_t)(_e), \
}

/**
 * IIO_ENUM_AVAILABLE() - Initialize enum available extended channel attribute
 * @_name:        Attribute name ("_available" will be appended to the name)
 * @_shared:        Whether the attribute is shared between all channels
 * @_e:                Pointer to an iio_enum struct
 *
 * Creates a read only attribute which lists all the available enum items in a
 * space separated list. This should usually be used together with IIO_ENUM()
 */
#define IIO_ENUM_AVAILABLE(_name, _shared, _e) \
{ \
        .name = (_name "_available"), \
        .shared = _shared, \
        .read = iio_enum_available_read, \
        .private = (uintptr_t)(_e), \
}

/**
 * struct iio_mount_matrix - iio mounting matrix
 * @rotation: 3 dimensional space rotation matrix defining sensor alignment with
 *            main hardware
 */
struct iio_mount_matrix {
        const char *rotation[9];
};

ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv,
                              const struct iio_chan_spec *chan, char *buf);
int iio_read_mount_matrix(struct device *dev, struct iio_mount_matrix *matrix);

typedef const struct iio_mount_matrix *
        (iio_get_mount_matrix_t)(const struct iio_dev *indio_dev,
                                 const struct iio_chan_spec *chan);

/**
 * IIO_MOUNT_MATRIX() - Initialize mount matrix extended channel attribute
 * @_shared:        Whether the attribute is shared between all channels
 * @_get:        Pointer to an iio_get_mount_matrix_t accessor
 */
#define IIO_MOUNT_MATRIX(_shared, _get) \
{ \
        .name = "mount_matrix", \
        .shared = (_shared), \
        .read = iio_show_mount_matrix, \
        .private = (uintptr_t)(_get), \
}

/**
 * struct iio_event_spec - specification for a channel event
 * @type:                    Type of the event
 * @dir:                    Direction of the event
 * @mask_separate:            Bit mask of enum iio_event_info values. Attributes
 *                            set in this mask will be registered per channel.
 * @mask_shared_by_type:    Bit mask of enum iio_event_info values. Attributes
 *                            set in this mask will be shared by channel type.
 * @mask_shared_by_dir:            Bit mask of enum iio_event_info values. Attributes
 *                            set in this mask will be shared by channel type and
 *                            direction.
 * @mask_shared_by_all:            Bit mask of enum iio_event_info values. Attributes
 *                            set in this mask will be shared by all channels.
 */
struct iio_event_spec {
        enum iio_event_type type;
        enum iio_event_direction dir;
        unsigned long mask_separate;
        unsigned long mask_shared_by_type;
        unsigned long mask_shared_by_dir;
        unsigned long mask_shared_by_all;
};

/**
 * struct iio_chan_spec - specification of a single channel
 * @type:                What type of measurement is the channel making.
 * @channel:                What number do we wish to assign the channel.
 * @channel2:                If there is a second number for a differential
 *                        channel then this is it. If modified is set then the
 *                        value here specifies the modifier.
 * @address:                Driver specific identifier.
 * @scan_index:                Monotonic index to give ordering in scans when read
 *                        from a buffer.
 * @scan_type:                struct describing the scan type
 * @scan_type.sign:                's' or 'u' to specify signed or unsigned
 * @scan_type.realbits:                Number of valid bits of data
 * @scan_type.storagebits:        Realbits + padding
 * @scan_type.shift:                Shift right by this before masking out
 *                                realbits.
 * @scan_type.repeat:                Number of times real/storage bits repeats.
 *                                When the repeat element is more than 1, then
 *                                the type element in sysfs will show a repeat
 *                                value. Otherwise, the number of repetitions
 *                                is omitted.
 * @scan_type.endianness:        little or big endian
 * @info_mask_separate: What information is to be exported that is specific to
 *                        this channel.
 * @info_mask_separate_available: What availability information is to be
 *                        exported that is specific to this channel.
 * @info_mask_shared_by_type: What information is to be exported that is shared
 *                        by all channels of the same type.
 * @info_mask_shared_by_type_available: What availability information is to be
 *                        exported that is shared by all channels of the same
 *                        type.
 * @info_mask_shared_by_dir: What information is to be exported that is shared
 *                        by all channels of the same direction.
 * @info_mask_shared_by_dir_available: What availability information is to be
 *                        exported that is shared by all channels of the same
 *                        direction.
 * @info_mask_shared_by_all: What information is to be exported that is shared
 *                        by all channels.
 * @info_mask_shared_by_all_available: What availability information is to be
 *                        exported that is shared by all channels.
 * @event_spec:                Array of events which should be registered for this
 *                        channel.
 * @num_event_specs:        Size of the event_spec array.
 * @ext_info:                Array of extended info attributes for this channel.
 *                        The array is NULL terminated, the last element should
 *                        have its name field set to NULL.
 * @extend_name:        Allows labeling of channel attributes with an
 *                        informative name. Note this has no effect codes etc,
 *                        unlike modifiers.
 *                        This field is deprecated in favour of providing
 *                        iio_info->read_label() to override the label, which
 *                        unlike @extend_name does not affect sysfs filenames.
 * @datasheet_name:        A name used in in-kernel mapping of channels. It should
 *                        correspond to the first name that the channel is referred
 *                        to by in the datasheet (e.g. IND), or the nearest
 *                        possible compound name (e.g. IND-INC).
 * @modified:                Does a modifier apply to this channel. What these are
 *                        depends on the channel type.  Modifier is set in
 *                        channel2. Examples are IIO_MOD_X for axial sensors about
 *                        the 'x' axis.
 * @indexed:                Specify the channel has a numerical index. If not,
 *                        the channel index number will be suppressed for sysfs
 *                        attributes but not for event codes.
 * @output:                Channel is output.
 * @differential:        Channel is differential.
 */
struct iio_chan_spec {
        enum iio_chan_type        type;
        int                        channel;
        int                        channel2;
        unsigned long                address;
        int                        scan_index;
        struct {
                char        sign;
                u8        realbits;
                u8        storagebits;
                u8        shift;
                u8        repeat;
                enum iio_endian endianness;
        } scan_type;
        long                        info_mask_separate;
        long                        info_mask_separate_available;
        long                        info_mask_shared_by_type;
        long                        info_mask_shared_by_type_available;
        long                        info_mask_shared_by_dir;
        long                        info_mask_shared_by_dir_available;
        long                        info_mask_shared_by_all;
        long                        info_mask_shared_by_all_available;
        const struct iio_event_spec *event_spec;
        unsigned int                num_event_specs;
        const struct iio_chan_spec_ext_info *ext_info;
        const char                *extend_name;
        const char                *datasheet_name;
        unsigned                modified:1;
        unsigned                indexed:1;
        unsigned                output:1;
        unsigned                differential:1;
};


/**
 * iio_channel_has_info() - Checks whether a channel supports a info attribute
 * @chan: The channel to be queried
 * @type: Type of the info attribute to be checked
 *
 * Returns true if the channels supports reporting values for the given info
 * attribute type, false otherwise.
 */
static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
        enum iio_chan_info_enum type)
{
        return (chan->info_mask_separate & BIT(type)) |
                (chan->info_mask_shared_by_type & BIT(type)) |
                (chan->info_mask_shared_by_dir & BIT(type)) |
                (chan->info_mask_shared_by_all & BIT(type));
}

/**
 * iio_channel_has_available() - Checks if a channel has an available attribute
 * @chan: The channel to be queried
 * @type: Type of the available attribute to be checked
 *
 * Returns true if the channel supports reporting available values for the
 * given attribute type, false otherwise.
 */
static inline bool iio_channel_has_available(const struct iio_chan_spec *chan,
                                             enum iio_chan_info_enum type)
{
        return (chan->info_mask_separate_available & BIT(type)) |
                (chan->info_mask_shared_by_type_available & BIT(type)) |
                (chan->info_mask_shared_by_dir_available & BIT(type)) |
                (chan->info_mask_shared_by_all_available & BIT(type));
}

#define IIO_CHAN_SOFT_TIMESTAMP(_si) {                                        \
        .type = IIO_TIMESTAMP,                                                \
        .channel = -1,                                                        \
        .scan_index = _si,                                                \
        .scan_type = {                                                        \
                .sign = 's',                                                \
                .realbits = 64,                                        \
                .storagebits = 64,                                        \
                },                                                        \
}

s64 iio_get_time_ns(const struct iio_dev *indio_dev);

/*
 * Device operating modes
 * @INDIO_DIRECT_MODE: There is an access to either:
 * a) The last single value available for devices that do not provide
 *    on-demand reads.
 * b) A new value after performing an on-demand read otherwise.
 * On most devices, this is a single-shot read. On some devices with data
 * streams without an 'on-demand' function, this might also be the 'last value'
 * feature. Above all, this mode internally means that we are not in any of the
 * other modes, and sysfs reads should work.
 * Device drivers should inform the core if they support this mode.
 * @INDIO_BUFFER_TRIGGERED: Common mode when dealing with kfifo buffers.
 * It indicates that an explicit trigger is required. This requests the core to
 * attach a poll function when enabling the buffer, which is indicated by the
 * _TRIGGERED suffix.
 * The core will ensure this mode is set when registering a triggered buffer
 * with iio_triggered_buffer_setup().
 * @INDIO_BUFFER_SOFTWARE: Another kfifo buffer mode, but not event triggered.
 * No poll function can be attached because there is no triggered infrastructure
 * we can use to cause capture. There is a kfifo that the driver will fill, but
 * not "only one scan at a time". Typically, hardware will have a buffer that
 * can hold multiple scans. Software may read one or more scans at a single time
 * and push the available data to a Kfifo. This means the core will not attach
 * any poll function when enabling the buffer.
 * The core will ensure this mode is set when registering a simple kfifo buffer
 * with devm_iio_kfifo_buffer_setup().
 * @INDIO_BUFFER_HARDWARE: For specific hardware, if unsure do not use this mode.
 * Same as above but this time the buffer is not a kfifo where we have direct
 * access to the data. Instead, the consumer driver must access the data through
 * non software visible channels (or DMA when there is no demux possible in
 * software)
 * The core will ensure this mode is set when registering a dmaengine buffer
 * with devm_iio_dmaengine_buffer_setup().
 * @INDIO_EVENT_TRIGGERED: Very unusual mode.
 * Triggers usually refer to an external event which will start data capture.
 * Here it is kind of the opposite as, a particular state of the data might
 * produce an event which can be considered as an event. We don't necessarily
 * have access to the data itself, but to the event produced. For example, this
 * can be a threshold detector. The internal path of this mode is very close to
 * the INDIO_BUFFER_TRIGGERED mode.
 * The core will ensure this mode is set when registering a triggered event.
 * @INDIO_HARDWARE_TRIGGERED: Very unusual mode.
 * Here, triggers can result in data capture and can be routed to multiple
 * hardware components, which make them close to regular triggers in the way
 * they must be managed by the core, but without the entire interrupts/poll
 * functions burden. Interrupts are irrelevant as the data flow is hardware
 * mediated and distributed.
 */
#define INDIO_DIRECT_MODE                0x01
#define INDIO_BUFFER_TRIGGERED                0x02
#define INDIO_BUFFER_SOFTWARE                0x04
#define INDIO_BUFFER_HARDWARE                0x08
#define INDIO_EVENT_TRIGGERED                0x10
#define INDIO_HARDWARE_TRIGGERED        0x20

#define INDIO_ALL_BUFFER_MODES                                        \
        (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE)

#define INDIO_ALL_TRIGGERED_MODES        \
        (INDIO_BUFFER_TRIGGERED                \
         | INDIO_EVENT_TRIGGERED        \
         | INDIO_HARDWARE_TRIGGERED)

#define INDIO_MAX_RAW_ELEMENTS                4

struct iio_val_int_plus_micro {
        int integer;
        int micro;
};

struct iio_trigger; /* forward declaration */

/**
 * struct iio_info - constant information about device
 * @event_attrs:        event control attributes
 * @attrs:                general purpose device attributes
 * @read_raw:                function to request a value from the device.
 *                        mask specifies which value. Note 0 means a reading of
 *                        the channel in question.  Return value will specify the
 *                        type of value returned by the device. val and val2 will
 *                        contain the elements making up the returned value.
 * @read_raw_multi:        function to return values from the device.
 *                        mask specifies which value. Note 0 means a reading of
 *                        the channel in question.  Return value will specify the
 *                        type of value returned by the device. vals pointer
 *                        contain the elements making up the returned value.
 *                        max_len specifies maximum number of elements
 *                        vals pointer can contain. val_len is used to return
 *                        length of valid elements in vals.
 * @read_avail:                function to return the available values from the device.
 *                        mask specifies which value. Note 0 means the available
 *                        values for the channel in question.  Return value
 *                        specifies if a IIO_AVAIL_LIST or a IIO_AVAIL_RANGE is
 *                        returned in vals. The type of the vals are returned in
 *                        type and the number of vals is returned in length. For
 *                        ranges, there are always three vals returned; min, step
 *                        and max. For lists, all possible values are enumerated.
 * @write_raw:                function to write a value to the device.
 *                        Parameters are the same as for read_raw.
 * @read_label:                function to request label name for a specified label,
 *                        for better channel identification.
 * @write_raw_get_fmt:        callback function to query the expected
 *                        format/precision. If not set by the driver, write_raw
 *                        returns IIO_VAL_INT_PLUS_MICRO.
 * @read_event_config:        find out if the event is enabled.
 * @write_event_config:        set if the event is enabled.
 * @read_event_value:        read a configuration value associated with the event.
 * @write_event_value:        write a configuration value for the event.
 * @read_event_label:        function to request label name for a specified label,
 *                        for better event identification.
 * @validate_trigger:        function to validate the trigger when the
 *                        current trigger gets changed.
 * @update_scan_mode:        function to configure device and scan buffer when
 *                        channels have changed
 * @debugfs_reg_access:        function to read or write register value of device
 * @fwnode_xlate:        fwnode based function pointer to obtain channel specifier index.
 * @hwfifo_set_watermark: function pointer to set the current hardware
 *                        fifo watermark level; see hwfifo_* entries in
 *                        Documentation/ABI/testing/sysfs-bus-iio for details on
 *                        how the hardware fifo operates
 * @hwfifo_flush_to_buffer: function pointer to flush the samples stored
 *                        in the hardware fifo to the device buffer. The driver
 *                        should not flush more than count samples. The function
 *                        must return the number of samples flushed, 0 if no
 *                        samples were flushed or a negative integer if no samples
 *                        were flushed and there was an error.
 **/
struct iio_info {
        const struct attribute_group        *event_attrs;
        const struct attribute_group        *attrs;

        int (*read_raw)(struct iio_dev *indio_dev,
                        struct iio_chan_spec const *chan,
                        int *val,
                        int *val2,
                        long mask);

        int (*read_raw_multi)(struct iio_dev *indio_dev,
                        struct iio_chan_spec const *chan,
                        int max_len,
                        int *vals,
                        int *val_len,
                        long mask);

        int (*read_avail)(struct iio_dev *indio_dev,
                          struct iio_chan_spec const *chan,
                          const int **vals,
                          int *type,
                          int *length,
                          long mask);

        int (*write_raw)(struct iio_dev *indio_dev,
                         struct iio_chan_spec const *chan,
                         int val,
                         int val2,
                         long mask);

        int (*read_label)(struct iio_dev *indio_dev,
                         struct iio_chan_spec const *chan,
                         char *label);

        int (*write_raw_get_fmt)(struct iio_dev *indio_dev,
                         struct iio_chan_spec const *chan,
                         long mask);

        int (*read_event_config)(struct iio_dev *indio_dev,
                                 const struct iio_chan_spec *chan,
                                 enum iio_event_type type,
                                 enum iio_event_direction dir);

        int (*write_event_config)(struct iio_dev *indio_dev,
                                  const struct iio_chan_spec *chan,
                                  enum iio_event_type type,
                                  enum iio_event_direction dir,
                                  int state);

        int (*read_event_value)(struct iio_dev *indio_dev,
                                const struct iio_chan_spec *chan,
                                enum iio_event_type type,
                                enum iio_event_direction dir,
                                enum iio_event_info info, int *val, int *val2);

        int (*write_event_value)(struct iio_dev *indio_dev,
                                 const struct iio_chan_spec *chan,
                                 enum iio_event_type type,
                                 enum iio_event_direction dir,
                                 enum iio_event_info info, int val, int val2);

        int (*read_event_label)(struct iio_dev *indio_dev,
                                struct iio_chan_spec const *chan,
                                enum iio_event_type type,
                                enum iio_event_direction dir,
                                char *label);

        int (*validate_trigger)(struct iio_dev *indio_dev,
                                struct iio_trigger *trig);
        int (*update_scan_mode)(struct iio_dev *indio_dev,
                                const unsigned long *scan_mask);
        int (*debugfs_reg_access)(struct iio_dev *indio_dev,
                                  unsigned reg, unsigned writeval,
                                  unsigned *readval);
        int (*fwnode_xlate)(struct iio_dev *indio_dev,
                            const struct fwnode_reference_args *iiospec);
        int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val);
        int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev,
                                      unsigned count);
};

/**
 * struct iio_buffer_setup_ops - buffer setup related callbacks
 * @preenable:                [DRIVER] function to run prior to marking buffer enabled
 * @postenable:                [DRIVER] function to run after marking buffer enabled
 * @predisable:                [DRIVER] function to run prior to marking buffer
 *                        disabled
 * @postdisable:        [DRIVER] function to run after marking buffer disabled
 * @validate_scan_mask: [DRIVER] function callback to check whether a given
 *                        scan mask is valid for the device.
 */
struct iio_buffer_setup_ops {
        int (*preenable)(struct iio_dev *);
        int (*postenable)(struct iio_dev *);
        int (*predisable)(struct iio_dev *);
        int (*postdisable)(struct iio_dev *);
        bool (*validate_scan_mask)(struct iio_dev *indio_dev,
                                   const unsigned long *scan_mask);
};

/**
 * struct iio_dev - industrial I/O device
 * @modes:                [DRIVER] bitmask listing all the operating modes
 *                        supported by the IIO device. This list should be
 *                        initialized before registering the IIO device. It can
 *                        also be filed up by the IIO core, as a result of
 *                        enabling particular features in the driver
 *                        (see iio_triggered_event_setup()).
 * @dev:                [DRIVER] device structure, should be assigned a parent
 *                        and owner
 * @buffer:                [DRIVER] any buffer present
 * @scan_bytes:                [INTERN] num bytes captured to be fed to buffer demux
 * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the
 *                           array in order of preference, the most preferred
 *                           masks first.
 * @masklength:                [INTERN] the length of the mask established from
 *                        channels
 * @active_scan_mask:        [INTERN] union of all scan masks requested by buffers
 * @scan_timestamp:        [INTERN] set if any buffers have requested timestamp
 * @trig:                [INTERN] current device trigger (buffer modes)
 * @pollfunc:                [DRIVER] function run on trigger being received
 * @pollfunc_event:        [DRIVER] function run on events trigger being received
 * @channels:                [DRIVER] channel specification structure table
 * @num_channels:        [DRIVER] number of channels specified in @channels.
 * @name:                [DRIVER] name of the device.
 * @label:              [DRIVER] unique name to identify which device this is
 * @info:                [DRIVER] callbacks and constant info from driver
 * @setup_ops:                [DRIVER] callbacks to call before and after buffer
 *                        enable/disable
 * @priv:                [DRIVER] reference to driver's private information
 *                        **MUST** be accessed **ONLY** via iio_priv() helper
 */
struct iio_dev {
        int                                modes;
        struct device                        dev;

        struct iio_buffer                *buffer;
        int                                scan_bytes;

        const unsigned long                *available_scan_masks;
        unsigned                        masklength;
        const unsigned long                *active_scan_mask;
        bool                                scan_timestamp;
        struct iio_trigger                *trig;
        struct iio_poll_func                *pollfunc;
        struct iio_poll_func                *pollfunc_event;

        struct iio_chan_spec const        *channels;
        int                                num_channels;

        const char                        *name;
        const char                        *label;
        const struct iio_info                *info;
        const struct iio_buffer_setup_ops        *setup_ops;

        void                                *priv;
};

int iio_device_id(struct iio_dev *indio_dev);
int iio_device_get_current_mode(struct iio_dev *indio_dev);
bool iio_buffer_enabled(struct iio_dev *indio_dev);

const struct iio_chan_spec
*iio_find_channel_from_si(struct iio_dev *indio_dev, int si);
/**
 * iio_device_register() - register a device with the IIO subsystem
 * @indio_dev:                Device structure filled by the device driver
 **/
#define iio_device_register(indio_dev) \
        __iio_device_register((indio_dev), THIS_MODULE)
int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod);
void iio_device_unregister(struct iio_dev *indio_dev);
/**
 * devm_iio_device_register - Resource-managed iio_device_register()
 * @dev:        Device to allocate iio_dev for
 * @indio_dev:        Device structure filled by the device driver
 *
 * Managed iio_device_register.  The IIO device registered with this
 * function is automatically unregistered on driver detach. This function
 * calls iio_device_register() internally. Refer to that function for more
 * information.
 *
 * RETURNS:
 * 0 on success, negative error number on failure.
 */
#define devm_iio_device_register(dev, indio_dev) \
        __devm_iio_device_register((dev), (indio_dev), THIS_MODULE)
int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
                               struct module *this_mod);
int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp);
int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
void iio_device_release_direct_mode(struct iio_dev *indio_dev);

/*
 * This autocleanup logic is normally used via
 * iio_device_claim_direct_scoped().
 */
DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T),
             iio_device_release_direct_mode(_T))

DEFINE_GUARD_COND(iio_claim_direct, _try, ({
                        struct iio_dev *dev;
                        int d = iio_device_claim_direct_mode(_T);

                        if (d < 0)
                                dev = NULL;
                        else
                                dev = _T;
                        dev;
                }))

/**
 * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct.
 * @fail: What to do on failure to claim device.
 * @iio_dev: Pointer to the IIO devices structure
 */
#define iio_device_claim_direct_scoped(fail, iio_dev) \
        scoped_cond_guard(iio_claim_direct_try, fail, iio_dev)

int iio_device_claim_buffer_mode(struct iio_dev *indio_dev);
void iio_device_release_buffer_mode(struct iio_dev *indio_dev);

extern const struct bus_type iio_bus_type;

/**
 * iio_device_put() - reference counted deallocation of struct device
 * @indio_dev: IIO device structure containing the device
 **/
static inline void iio_device_put(struct iio_dev *indio_dev)
{
        if (indio_dev)
                put_device(&indio_dev->dev);
}

clockid_t iio_device_get_clock(const struct iio_dev *indio_dev);
int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id);

/**
 * dev_to_iio_dev() - Get IIO device struct from a device struct
 * @dev:                 The device embedded in the IIO device
 *
 * Note: The device must be a IIO device, otherwise the result is undefined.
 */
static inline struct iio_dev *dev_to_iio_dev(struct device *dev)
{
        return container_of(dev, struct iio_dev, dev);
}

/**
 * iio_device_get() - increment reference count for the device
 * @indio_dev:                 IIO device structure
 *
 * Returns: The passed IIO device
 **/
static inline struct iio_dev *iio_device_get(struct iio_dev *indio_dev)
{
        return indio_dev ? dev_to_iio_dev(get_device(&indio_dev->dev)) : NULL;
}

/**
 * iio_device_set_parent() - assign parent device to the IIO device object
 * @indio_dev:                 IIO device structure
 * @parent:                reference to parent device object
 *
 * This utility must be called between IIO device allocation
 * (via devm_iio_device_alloc()) & IIO device registration
 * (via iio_device_register() and devm_iio_device_register())).
 * By default, the device allocation will also assign a parent device to
 * the IIO device object. In cases where devm_iio_device_alloc() is used,
 * sometimes the parent device must be different than the device used to
 * manage the allocation.
 * In that case, this helper should be used to change the parent, hence the
 * requirement to call this between allocation & registration.
 **/
static inline void iio_device_set_parent(struct iio_dev *indio_dev,
                                         struct device *parent)
{
        indio_dev->dev.parent = parent;
}

/**
 * iio_device_set_drvdata() - Set device driver data
 * @indio_dev: IIO device structure
 * @data: Driver specific data
 *
 * Allows to attach an arbitrary pointer to an IIO device, which can later be
 * retrieved by iio_device_get_drvdata().
 */
static inline void iio_device_set_drvdata(struct iio_dev *indio_dev, void *data)
{
        dev_set_drvdata(&indio_dev->dev, data);
}

/**
 * iio_device_get_drvdata() - Get device driver data
 * @indio_dev: IIO device structure
 *
 * Returns the data previously set with iio_device_set_drvdata()
 */
static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
{
        return dev_get_drvdata(&indio_dev->dev);
}

/*
 * Used to ensure the iio_priv() structure is aligned to allow that structure
 * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which
 * must not share  cachelines with the rest of the structure, thus making
 * them safe for use with non-coherent DMA.
 */
#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN
struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);

/* The information at the returned address is guaranteed to be cacheline aligned */
static inline void *iio_priv(const struct iio_dev *indio_dev)
{
        return indio_dev->priv;
}

void iio_device_free(struct iio_dev *indio_dev);
struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv);

#define devm_iio_trigger_alloc(parent, fmt, ...) \
        __devm_iio_trigger_alloc((parent), THIS_MODULE, (fmt), ##__VA_ARGS__)
__printf(3, 4)
struct iio_trigger *__devm_iio_trigger_alloc(struct device *parent,
                                             struct module *this_mod,
                                             const char *fmt, ...);
/**
 * iio_get_debugfs_dentry() - helper function to get the debugfs_dentry
 * @indio_dev:                IIO device structure for device
 **/
#if defined(CONFIG_DEBUG_FS)
struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev);
#else
static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
{
        return NULL;
}
#endif

ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals);

int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
        int *fract);

/**
 * IIO_DEGREE_TO_RAD() - Convert degree to rad
 * @deg: A value in degree
 *
 * Returns the given value converted from degree to rad
 */
#define IIO_DEGREE_TO_RAD(deg) (((deg) * 314159ULL + 9000000ULL) / 18000000ULL)

/**
 * IIO_RAD_TO_DEGREE() - Convert rad to degree
 * @rad: A value in rad
 *
 * Returns the given value converted from rad to degree
 */
#define IIO_RAD_TO_DEGREE(rad) \
        (((rad) * 18000000ULL + 314159ULL / 2) / 314159ULL)

/**
 * IIO_G_TO_M_S_2() - Convert g to meter / second**2
 * @g: A value in g
 *
 * Returns the given value converted from g to meter / second**2
 */
#define IIO_G_TO_M_S_2(g) ((g) * 980665ULL / 100000ULL)

/**
 * IIO_M_S_2_TO_G() - Convert meter / second**2 to g
 * @ms2: A value in meter / second**2
 *
 * Returns the given value converted from meter / second**2 to g
 */
#define IIO_M_S_2_TO_G(ms2) (((ms2) * 100000ULL + 980665ULL / 2) / 980665ULL)

#endif /* _INDUSTRIAL_IO_H_ */




















   14 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BH_H
#define _LINUX_BH_H

#include <linux/instruction_pointer.h>
#include <linux/preempt.h>

#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
#else
static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
        preempt_count_add(cnt);
        barrier();
}
#endif

static inline void local_bh_disable(void)
{
        __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}

extern void _local_bh_enable(void);
extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt);

static inline void local_bh_enable_ip(unsigned long ip)
{
        __local_bh_enable_ip(ip, SOFTIRQ_DISABLE_OFFSET);
}

static inline void local_bh_enable(void)
{
        __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}

#ifdef CONFIG_PREEMPT_RT
extern bool local_bh_blocked(void);
#else
static inline bool local_bh_blocked(void) { return false; }
#endif

#endif /* _LINUX_BH_H */
















































































































































































































































































    6 


    6 

    6 
    1 

    4 
    3 


    6 




































































































































































































































































































































































    2 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_HIGHMEM_H
#define _LINUX_HIGHMEM_H

#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/cacheflush.h>
#include <linux/kmsan.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>

#include "highmem-internal.h"

/**
 * kmap - Map a page for long term usage
 * @page:        Pointer to the page to be mapped
 *
 * Returns: The virtual address of the mapping
 *
 * Can only be invoked from preemptible task context because on 32bit
 * systems with CONFIG_HIGHMEM enabled this function might sleep.
 *
 * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area
 * this returns the virtual address of the direct kernel mapping.
 *
 * The returned virtual address is globally visible and valid up to the
 * point where it is unmapped via kunmap(). The pointer can be handed to
 * other contexts.
 *
 * For highmem pages on 32bit systems this can be slow as the mapping space
 * is limited and protected by a global lock. In case that there is no
 * mapping slot available the function blocks until a slot is released via
 * kunmap().
 */
static inline void *kmap(struct page *page);

/**
 * kunmap - Unmap the virtual address mapped by kmap()
 * @page:        Pointer to the page which was mapped by kmap()
 *
 * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of
 * pages in the low memory area.
 */
static inline void kunmap(struct page *page);

/**
 * kmap_to_page - Get the page for a kmap'ed address
 * @addr:        The address to look up
 *
 * Returns: The page which is mapped to @addr.
 */
static inline struct page *kmap_to_page(void *addr);

/**
 * kmap_flush_unused - Flush all unused kmap mappings in order to
 *                       remove stray mappings
 */
static inline void kmap_flush_unused(void);

/**
 * kmap_local_page - Map a page for temporary usage
 * @page: Pointer to the page to be mapped
 *
 * Returns: The virtual address of the mapping
 *
 * Can be invoked from any context, including interrupts.
 *
 * Requires careful handling when nesting multiple mappings because the map
 * management is stack based. The unmap has to be in the reverse order of
 * the map operation:
 *
 * addr1 = kmap_local_page(page1);
 * addr2 = kmap_local_page(page2);
 * ...
 * kunmap_local(addr2);
 * kunmap_local(addr1);
 *
 * Unmapping addr1 before addr2 is invalid and causes malfunction.
 *
 * Contrary to kmap() mappings the mapping is only valid in the context of
 * the caller and cannot be handed to other contexts.
 *
 * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
 * virtual address of the direct mapping. Only real highmem pages are
 * temporarily mapped.
 *
 * While kmap_local_page() is significantly faster than kmap() for the highmem
 * case it comes with restrictions about the pointer validity.
 *
 * On HIGHMEM enabled systems mapping a highmem page has the side effect of
 * disabling migration in order to keep the virtual address stable across
 * preemption. No caller of kmap_local_page() can rely on this side effect.
 */
static inline void *kmap_local_page(struct page *page);

/**
 * kmap_local_folio - Map a page in this folio for temporary usage
 * @folio: The folio containing the page.
 * @offset: The byte offset within the folio which identifies the page.
 *
 * Requires careful handling when nesting multiple mappings because the map
 * management is stack based. The unmap has to be in the reverse order of
 * the map operation::
 *
 *   addr1 = kmap_local_folio(folio1, offset1);
 *   addr2 = kmap_local_folio(folio2, offset2);
 *   ...
 *   kunmap_local(addr2);
 *   kunmap_local(addr1);
 *
 * Unmapping addr1 before addr2 is invalid and causes malfunction.
 *
 * Contrary to kmap() mappings the mapping is only valid in the context of
 * the caller and cannot be handed to other contexts.
 *
 * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
 * virtual address of the direct mapping. Only real highmem pages are
 * temporarily mapped.
 *
 * While it is significantly faster than kmap() for the highmem case it
 * comes with restrictions about the pointer validity.
 *
 * On HIGHMEM enabled systems mapping a highmem page has the side effect of
 * disabling migration in order to keep the virtual address stable across
 * preemption. No caller of kmap_local_folio() can rely on this side effect.
 *
 * Context: Can be invoked from any context.
 * Return: The virtual address of @offset.
 */
static inline void *kmap_local_folio(struct folio *folio, size_t offset);

/**
 * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
 * @page:        Pointer to the page to be mapped
 *
 * Returns: The virtual address of the mapping
 *
 * In fact a wrapper around kmap_local_page() which also disables pagefaults
 * and, depending on PREEMPT_RT configuration, also CPU migration and
 * preemption. Therefore users should not count on the latter two side effects.
 *
 * Mappings should always be released by kunmap_atomic().
 *
 * Do not use in new code. Use kmap_local_page() instead.
 *
 * It is used in atomic context when code wants to access the contents of a
 * page that might be allocated from high memory (see __GFP_HIGHMEM), for
 * example a page in the pagecache.  The API has two functions, and they
 * can be used in a manner similar to the following::
 *
 *   // Find the page of interest.
 *   struct page *page = find_get_page(mapping, offset);
 *
 *   // Gain access to the contents of that page.
 *   void *vaddr = kmap_atomic(page);
 *
 *   // Do something to the contents of that page.
 *   memset(vaddr, 0, PAGE_SIZE);
 *
 *   // Unmap that page.
 *   kunmap_atomic(vaddr);
 *
 * Note that the kunmap_atomic() call takes the result of the kmap_atomic()
 * call, not the argument.
 *
 * If you need to map two pages because you want to copy from one page to
 * another you need to keep the kmap_atomic calls strictly nested, like:
 *
 * vaddr1 = kmap_atomic(page1);
 * vaddr2 = kmap_atomic(page2);
 *
 * memcpy(vaddr1, vaddr2, PAGE_SIZE);
 *
 * kunmap_atomic(vaddr2);
 * kunmap_atomic(vaddr1);
 */
static inline void *kmap_atomic(struct page *page);

/* Highmem related interfaces for management code */
static inline unsigned int nr_free_highpages(void);
static inline unsigned long totalhigh_pages(void);

#ifndef ARCH_HAS_FLUSH_ANON_PAGE
static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
}
#endif

#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
static inline void flush_kernel_vmap_range(void *vaddr, int size)
{
}
static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
{
}
#endif

/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
#ifndef clear_user_highpage
static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
{
        void *addr = kmap_local_page(page);
        clear_user_page(addr, vaddr, page);
        kunmap_local(addr);
}
#endif

#ifndef vma_alloc_zeroed_movable_folio
/**
 * vma_alloc_zeroed_movable_folio - Allocate a zeroed page for a VMA.
 * @vma: The VMA the page is to be allocated for.
 * @vaddr: The virtual address the page will be inserted into.
 *
 * This function will allocate a page suitable for inserting into this
 * VMA at this virtual address.  It may be allocated from highmem or
 * the movable zone.  An architecture may provide its own implementation.
 *
 * Return: A folio containing one allocated and zeroed page or NULL if
 * we are out of memory.
 */
static inline
struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
                                   unsigned long vaddr)
{
        struct folio *folio;

        folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false);
        if (folio)
                clear_user_highpage(&folio->page, vaddr);

        return folio;
}
#endif

static inline void clear_highpage(struct page *page)
{
        void *kaddr = kmap_local_page(page);
        clear_page(kaddr);
        kunmap_local(kaddr);
}

static inline void clear_highpage_kasan_tagged(struct page *page)
{
        void *kaddr = kmap_local_page(page);

        clear_page(kasan_reset_tag(kaddr));
        kunmap_local(kaddr);
}

#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE

static inline void tag_clear_highpage(struct page *page)
{
}

#endif

/*
 * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
 * If we pass in a head page, we can zero up to the size of the compound page.
 */
#ifdef CONFIG_HIGHMEM
void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
                unsigned start2, unsigned end2);
#else
static inline void zero_user_segments(struct page *page,
                unsigned start1, unsigned end1,
                unsigned start2, unsigned end2)
{
        void *kaddr = kmap_local_page(page);
        unsigned int i;

        BUG_ON(end1 > page_size(page) || end2 > page_size(page));

        if (end1 > start1)
                memset(kaddr + start1, 0, end1 - start1);

        if (end2 > start2)
                memset(kaddr + start2, 0, end2 - start2);

        kunmap_local(kaddr);
        for (i = 0; i < compound_nr(page); i++)
                flush_dcache_page(page + i);
}
#endif

static inline void zero_user_segment(struct page *page,
        unsigned start, unsigned end)
{
        zero_user_segments(page, start, end, 0, 0);
}

static inline void zero_user(struct page *page,
        unsigned start, unsigned size)
{
        zero_user_segments(page, start, start + size, 0, 0);
}

#ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE

static inline void copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
{
        char *vfrom, *vto;

        vfrom = kmap_local_page(from);
        vto = kmap_local_page(to);
        copy_user_page(vto, vfrom, vaddr, to);
        kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
        kunmap_local(vto);
        kunmap_local(vfrom);
}

#endif

#ifndef __HAVE_ARCH_COPY_HIGHPAGE

static inline void copy_highpage(struct page *to, struct page *from)
{
        char *vfrom, *vto;

        vfrom = kmap_local_page(from);
        vto = kmap_local_page(to);
        copy_page(vto, vfrom);
        kmsan_copy_page_meta(to, from);
        kunmap_local(vto);
        kunmap_local(vfrom);
}

#endif

#ifdef copy_mc_to_kernel
/*
 * If architecture supports machine check exception handling, define the
 * #MC versions of copy_user_highpage and copy_highpage. They copy a memory
 * page with #MC in source page (@from) handled, and return the number
 * of bytes not copied if there was a #MC, otherwise 0 for success.
 */
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
                                        unsigned long vaddr, struct vm_area_struct *vma)
{
        unsigned long ret;
        char *vfrom, *vto;

        vfrom = kmap_local_page(from);
        vto = kmap_local_page(to);
        ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
        if (!ret)
                kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
        kunmap_local(vto);
        kunmap_local(vfrom);

        return ret;
}

static inline int copy_mc_highpage(struct page *to, struct page *from)
{
        unsigned long ret;
        char *vfrom, *vto;

        vfrom = kmap_local_page(from);
        vto = kmap_local_page(to);
        ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
        if (!ret)
                kmsan_copy_page_meta(to, from);
        kunmap_local(vto);
        kunmap_local(vfrom);

        return ret;
}
#else
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
                                        unsigned long vaddr, struct vm_area_struct *vma)
{
        copy_user_highpage(to, from, vaddr, vma);
        return 0;
}

static inline int copy_mc_highpage(struct page *to, struct page *from)
{
        copy_highpage(to, from);
        return 0;
}
#endif

static inline void memcpy_page(struct page *dst_page, size_t dst_off,
                               struct page *src_page, size_t src_off,
                               size_t len)
{
        char *dst = kmap_local_page(dst_page);
        char *src = kmap_local_page(src_page);

        VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
        memcpy(dst + dst_off, src + src_off, len);
        kunmap_local(src);
        kunmap_local(dst);
}

static inline void memset_page(struct page *page, size_t offset, int val,
                               size_t len)
{
        char *addr = kmap_local_page(page);

        VM_BUG_ON(offset + len > PAGE_SIZE);
        memset(addr + offset, val, len);
        kunmap_local(addr);
}

static inline void memcpy_from_page(char *to, struct page *page,
                                    size_t offset, size_t len)
{
        char *from = kmap_local_page(page);

        VM_BUG_ON(offset + len > PAGE_SIZE);
        memcpy(to, from + offset, len);
        kunmap_local(from);
}

static inline void memcpy_to_page(struct page *page, size_t offset,
                                  const char *from, size_t len)
{
        char *to = kmap_local_page(page);

        VM_BUG_ON(offset + len > PAGE_SIZE);
        memcpy(to + offset, from, len);
        flush_dcache_page(page);
        kunmap_local(to);
}

static inline void memzero_page(struct page *page, size_t offset, size_t len)
{
        char *addr = kmap_local_page(page);

        VM_BUG_ON(offset + len > PAGE_SIZE);
        memset(addr + offset, 0, len);
        flush_dcache_page(page);
        kunmap_local(addr);
}

/**
 * memcpy_from_folio - Copy a range of bytes from a folio.
 * @to: The memory to copy to.
 * @folio: The folio to read from.
 * @offset: The first byte in the folio to read.
 * @len: The number of bytes to copy.
 */
static inline void memcpy_from_folio(char *to, struct folio *folio,
                size_t offset, size_t len)
{
        VM_BUG_ON(offset + len > folio_size(folio));

        do {
                const char *from = kmap_local_folio(folio, offset);
                size_t chunk = len;

                if (folio_test_highmem(folio) &&
                    chunk > PAGE_SIZE - offset_in_page(offset))
                        chunk = PAGE_SIZE - offset_in_page(offset);
                memcpy(to, from, chunk);
                kunmap_local(from);

                to += chunk;
                offset += chunk;
                len -= chunk;
        } while (len > 0);
}

/**
 * memcpy_to_folio - Copy a range of bytes to a folio.
 * @folio: The folio to write to.
 * @offset: The first byte in the folio to store to.
 * @from: The memory to copy from.
 * @len: The number of bytes to copy.
 */
static inline void memcpy_to_folio(struct folio *folio, size_t offset,
                const char *from, size_t len)
{
        VM_BUG_ON(offset + len > folio_size(folio));

        do {
                char *to = kmap_local_folio(folio, offset);
                size_t chunk = len;

                if (folio_test_highmem(folio) &&
                    chunk > PAGE_SIZE - offset_in_page(offset))
                        chunk = PAGE_SIZE - offset_in_page(offset);
                memcpy(to, from, chunk);
                kunmap_local(to);

                from += chunk;
                offset += chunk;
                len -= chunk;
        } while (len > 0);

        flush_dcache_folio(folio);
}

/**
 * folio_zero_tail - Zero the tail of a folio.
 * @folio: The folio to zero.
 * @offset: The byte offset in the folio to start zeroing at.
 * @kaddr: The address the folio is currently mapped to.
 *
 * If you have already used kmap_local_folio() to map a folio, written
 * some data to it and now need to zero the end of the folio (and flush
 * the dcache), you can use this function.  If you do not have the
 * folio kmapped (eg the folio has been partially populated by DMA),
 * use folio_zero_range() or folio_zero_segment() instead.
 *
 * Return: An address which can be passed to kunmap_local().
 */
static inline __must_check void *folio_zero_tail(struct folio *folio,
                size_t offset, void *kaddr)
{
        size_t len = folio_size(folio) - offset;

        if (folio_test_highmem(folio)) {
                size_t max = PAGE_SIZE - offset_in_page(offset);

                while (len > max) {
                        memset(kaddr, 0, max);
                        kunmap_local(kaddr);
                        len -= max;
                        offset += max;
                        max = PAGE_SIZE;
                        kaddr = kmap_local_folio(folio, offset);
                }
        }

        memset(kaddr, 0, len);
        flush_dcache_folio(folio);

        return kaddr;
}

/**
 * folio_fill_tail - Copy some data to a folio and pad with zeroes.
 * @folio: The destination folio.
 * @offset: The offset into @folio at which to start copying.
 * @from: The data to copy.
 * @len: How many bytes of data to copy.
 *
 * This function is most useful for filesystems which support inline data.
 * When they want to copy data from the inode into the page cache, this
 * function does everything for them.  It supports large folios even on
 * HIGHMEM configurations.
 */
static inline void folio_fill_tail(struct folio *folio, size_t offset,
                const char *from, size_t len)
{
        char *to = kmap_local_folio(folio, offset);

        VM_BUG_ON(offset + len > folio_size(folio));

        if (folio_test_highmem(folio)) {
                size_t max = PAGE_SIZE - offset_in_page(offset);

                while (len > max) {
                        memcpy(to, from, max);
                        kunmap_local(to);
                        len -= max;
                        from += max;
                        offset += max;
                        max = PAGE_SIZE;
                        to = kmap_local_folio(folio, offset);
                }
        }

        memcpy(to, from, len);
        to = folio_zero_tail(folio, offset + len, to + len);
        kunmap_local(to);
}

/**
 * memcpy_from_file_folio - Copy some bytes from a file folio.
 * @to: The destination buffer.
 * @folio: The folio to copy from.
 * @pos: The position in the file.
 * @len: The maximum number of bytes to copy.
 *
 * Copy up to @len bytes from this folio.  This may be limited by PAGE_SIZE
 * if the folio comes from HIGHMEM, and by the size of the folio.
 *
 * Return: The number of bytes copied from the folio.
 */
static inline size_t memcpy_from_file_folio(char *to, struct folio *folio,
                loff_t pos, size_t len)
{
        size_t offset = offset_in_folio(folio, pos);
        char *from = kmap_local_folio(folio, offset);

        if (folio_test_highmem(folio)) {
                offset = offset_in_page(offset);
                len = min_t(size_t, len, PAGE_SIZE - offset);
        } else
                len = min(len, folio_size(folio) - offset);

        memcpy(to, from, len);
        kunmap_local(from);

        return len;
}

/**
 * folio_zero_segments() - Zero two byte ranges in a folio.
 * @folio: The folio to write to.
 * @start1: The first byte to zero.
 * @xend1: One more than the last byte in the first range.
 * @start2: The first byte to zero in the second range.
 * @xend2: One more than the last byte in the second range.
 */
static inline void folio_zero_segments(struct folio *folio,
                size_t start1, size_t xend1, size_t start2, size_t xend2)
{
        zero_user_segments(&folio->page, start1, xend1, start2, xend2);
}

/**
 * folio_zero_segment() - Zero a byte range in a folio.
 * @folio: The folio to write to.
 * @start: The first byte to zero.
 * @xend: One more than the last byte to zero.
 */
static inline void folio_zero_segment(struct folio *folio,
                size_t start, size_t xend)
{
        zero_user_segments(&folio->page, start, xend, 0, 0);
}

/**
 * folio_zero_range() - Zero a byte range in a folio.
 * @folio: The folio to write to.
 * @start: The first byte to zero.
 * @length: The number of bytes to zero.
 */
static inline void folio_zero_range(struct folio *folio,
                size_t start, size_t length)
{
        zero_user_segments(&folio->page, start, start + length, 0, 0);
}

/**
 * folio_release_kmap - Unmap a folio and drop a refcount.
 * @folio: The folio to release.
 * @addr: The address previously returned by a call to kmap_local_folio().
 *
 * It is common, eg in directory handling to kmap a folio.  This function
 * unmaps the folio and drops the refcount that was being held to keep the
 * folio alive while we accessed it.
 */
static inline void folio_release_kmap(struct folio *folio, void *addr)
{
        kunmap_local(addr);
        folio_put(folio);
}

static inline void unmap_and_put_page(struct page *page, void *addr)
{
        folio_release_kmap(page_folio(page), addr);
}

#endif /* _LINUX_HIGHMEM_H */



















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SPECIAL_INSNS_H
#define _ASM_X86_SPECIAL_INSNS_H

#ifdef __KERNEL__
#include <asm/nops.h>
#include <asm/processor-flags.h>

#include <linux/errno.h>
#include <linux/irqflags.h>
#include <linux/jump_label.h>

/*
 * The compiler should not reorder volatile asm statements with respect to each
 * other: they should execute in program order. However GCC 4.9.x and 5.x have
 * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
 * volatile asm. The write functions are not affected since they have memory
 * clobbers preventing reordering. To prevent reads from being reordered with
 * respect to writes, use a dummy memory operand.
 */

#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)

void native_write_cr0(unsigned long val);

static inline unsigned long native_read_cr0(void)
{
        unsigned long val;
        asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
        return val;
}

static __always_inline unsigned long native_read_cr2(void)
{
        unsigned long val;
        asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
        return val;
}

static __always_inline void native_write_cr2(unsigned long val)
{
        asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}

static inline unsigned long __native_read_cr3(void)
{
        unsigned long val;
        asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
        return val;
}

static inline void native_write_cr3(unsigned long val)
{
        asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}

static inline unsigned long native_read_cr4(void)
{
        unsigned long val;
#ifdef CONFIG_X86_32
        /*
         * This could fault if CR4 does not exist.  Non-existent CR4
         * is functionally equivalent to CR4 == 0.  Keep it simple and pretend
         * that CR4 == 0 on CPUs that don't have CR4.
         */
        asm volatile("1: mov %%cr4, %0\n"
                     "2:\n"
                     _ASM_EXTABLE(1b, 2b)
                     : "=r" (val) : "0" (0), __FORCE_ORDER);
#else
        /* CR4 always exists on x86_64. */
        asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
#endif
        return val;
}

void native_write_cr4(unsigned long val);

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
static inline u32 rdpkru(void)
{
        u32 ecx = 0;
        u32 edx, pkru;

        /*
         * "rdpkru" instruction.  Places PKRU contents in to EAX,
         * clears EDX and requires that ecx=0.
         */
        asm volatile(".byte 0x0f,0x01,0xee\n\t"
                     : "=a" (pkru), "=d" (edx)
                     : "c" (ecx));
        return pkru;
}

static inline void wrpkru(u32 pkru)
{
        u32 ecx = 0, edx = 0;

        /*
         * "wrpkru" instruction.  Loads contents in EAX to PKRU,
         * requires that ecx = edx = 0.
         */
        asm volatile(".byte 0x0f,0x01,0xef\n\t"
                     : : "a" (pkru), "c"(ecx), "d"(edx));
}

#else
static inline u32 rdpkru(void)
{
        return 0;
}

static inline void wrpkru(u32 pkru)
{
}
#endif

static __always_inline void native_wbinvd(void)
{
        asm volatile("wbinvd": : :"memory");
}

static inline unsigned long __read_cr4(void)
{
        return native_read_cr4();
}

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else

static inline unsigned long read_cr0(void)
{
        return native_read_cr0();
}

static inline void write_cr0(unsigned long x)
{
        native_write_cr0(x);
}

static __always_inline unsigned long read_cr2(void)
{
        return native_read_cr2();
}

static __always_inline void write_cr2(unsigned long x)
{
        native_write_cr2(x);
}

/*
 * Careful!  CR3 contains more than just an address.  You probably want
 * read_cr3_pa() instead.
 */
static inline unsigned long __read_cr3(void)
{
        return __native_read_cr3();
}

static inline void write_cr3(unsigned long x)
{
        native_write_cr3(x);
}

static inline void __write_cr4(unsigned long x)
{
        native_write_cr4(x);
}

static __always_inline void wbinvd(void)
{
        native_wbinvd();
}

#endif /* CONFIG_PARAVIRT_XXL */

static __always_inline void clflush(volatile void *__p)
{
        asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
}

static inline void clflushopt(volatile void *__p)
{
        alternative_io(".byte 0x3e; clflush %P0",
                       ".byte 0x66; clflush %P0",
                       X86_FEATURE_CLFLUSHOPT,
                       "+m" (*(volatile char __force *)__p));
}

static inline void clwb(volatile void *__p)
{
        volatile struct { char x[64]; } *p = __p;

        asm volatile(ALTERNATIVE_2(
                ".byte 0x3e; clflush (%[pax])",
                ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
                X86_FEATURE_CLFLUSHOPT,
                ".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
                X86_FEATURE_CLWB)
                : [p] "+m" (*p)
                : [pax] "a" (p));
}

#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
        asm goto("1: wrussq %[val], (%[addr])\n"
                          _ASM_EXTABLE(1b, %l[fail])
                          :: [addr] "r" (addr), [val] "r" (val)
                          :: fail);
        return 0;
fail:
        return -EFAULT;
}
#endif /* CONFIG_X86_USER_SHADOW_STACK */

#define nop() asm volatile ("nop")

static inline void serialize(void)
{
        /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
        asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
}

/* The dst parameter must be 64-bytes aligned */
static inline void movdir64b(void *dst, const void *src)
{
        const struct { char _[64]; } *__src = src;
        struct { char _[64]; } *__dst = dst;

        /*
         * MOVDIR64B %(rdx), rax.
         *
         * Both __src and __dst must be memory constraints in order to tell the
         * compiler that no other memory accesses should be reordered around
         * this one.
         *
         * Also, both must be supplied as lvalues because this tells
         * the compiler what the object is (its size) the instruction accesses.
         * I.e., not the pointers but what they point to, thus the deref'ing '*'.
         */
        asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
                     : "+m" (*__dst)
                     :  "m" (*__src), "a" (__dst), "d" (__src));
}

static inline void movdir64b_io(void __iomem *dst, const void *src)
{
        movdir64b((void __force *)dst, src);
}

/**
 * enqcmds - Enqueue a command in supervisor (CPL0) mode
 * @dst: destination, in MMIO space (must be 512-bit aligned)
 * @src: 512 bits memory operand
 *
 * The ENQCMDS instruction allows software to write a 512-bit command to
 * a 512-bit-aligned special MMIO region that supports the instruction.
 * A return status is loaded into the ZF flag in the RFLAGS register.
 * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
 *
 * This function issues the ENQCMDS instruction to submit data from
 * kernel space to MMIO space, in a unit of 512 bits. Order of data access
 * is not guaranteed, nor is a memory barrier performed afterwards. It
 * returns 0 on success and -EAGAIN on failure.
 *
 * Warning: Do not use this helper unless your driver has checked that the
 * ENQCMDS instruction is supported on the platform and the device accepts
 * ENQCMDS.
 */
static inline int enqcmds(void __iomem *dst, const void *src)
{
        const struct { char _[64]; } *__src = src;
        struct { char _[64]; } __iomem *__dst = dst;
        bool zf;

        /*
         * ENQCMDS %(rdx), rax
         *
         * See movdir64b()'s comment on operand specification.
         */
        asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
                     CC_SET(z)
                     : CC_OUT(z) (zf), "+m" (*__dst)
                     : "m" (*__src), "a" (__dst), "d" (__src));

        /* Submission failure is indicated via EFLAGS.ZF=1 */
        if (zf)
                return -EAGAIN;

        return 0;
}

static __always_inline void tile_release(void)
{
        /*
         * Instruction opcode for TILERELEASE; supported in binutils
         * version >= 2.36.
         */
        asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
}

#endif /* __KERNEL__ */

#endif /* _ASM_X86_SPECIAL_INSNS_H */




















































































  132 


























































  296 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/ns_common.h>
#include <linux/fs_pin.h>

struct mnt_namespace {
        struct ns_common        ns;
        struct mount *        root;
        struct rb_root                mounts; /* Protected by namespace_sem */
        struct user_namespace        *user_ns;
        struct ucounts                *ucounts;
        u64                        seq;        /* Sequence number to prevent loops */
        wait_queue_head_t poll;
        u64 event;
        unsigned int                nr_mounts; /* # of mounts in the namespace */
        unsigned int                pending_mounts;
} __randomize_layout;

struct mnt_pcp {
        int mnt_count;
        int mnt_writers;
};

struct mountpoint {
        struct hlist_node m_hash;
        struct dentry *m_dentry;
        struct hlist_head m_list;
        int m_count;
};

struct mount {
        struct hlist_node mnt_hash;
        struct mount *mnt_parent;
        struct dentry *mnt_mountpoint;
        struct vfsmount mnt;
        union {
                struct rcu_head mnt_rcu;
                struct llist_node mnt_llist;
        };
#ifdef CONFIG_SMP
        struct mnt_pcp __percpu *mnt_pcp;
#else
        int mnt_count;
        int mnt_writers;
#endif
        struct list_head mnt_mounts;        /* list of children, anchored here */
        struct list_head mnt_child;        /* and going through their mnt_child */
        struct list_head mnt_instance;        /* mount instance on sb->s_mounts */
        const char *mnt_devname;        /* Name of device e.g. /dev/dsk/hda1 */
        union {
                struct rb_node mnt_node;        /* Under ns->mounts */
                struct list_head mnt_list;
        };
        struct list_head mnt_expire;        /* link in fs-specific expiry list */
        struct list_head mnt_share;        /* circular list of shared mounts */
        struct list_head mnt_slave_list;/* list of slave mounts */
        struct list_head mnt_slave;        /* slave list entry */
        struct mount *mnt_master;        /* slave is on master->mnt_slave_list */
        struct mnt_namespace *mnt_ns;        /* containing namespace */
        struct mountpoint *mnt_mp;        /* where is it mounted */
        union {
                struct hlist_node mnt_mp_list;        /* list mounts with the same mountpoint */
                struct hlist_node mnt_umount;
        };
        struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
        struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
        __u32 mnt_fsnotify_mask;
#endif
        int mnt_id;                        /* mount identifier, reused */
        u64 mnt_id_unique;                /* mount ID unique until reboot */
        int mnt_group_id;                /* peer group identifier */
        int mnt_expiry_mark;                /* true if marked for expiry */
        struct hlist_head mnt_pins;
        struct hlist_head mnt_stuck_children;
} __randomize_layout;

#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */

static inline struct mount *real_mount(struct vfsmount *mnt)
{
        return container_of(mnt, struct mount, mnt);
}

static inline int mnt_has_parent(struct mount *mnt)
{
        return mnt != mnt->mnt_parent;
}

static inline int is_mounted(struct vfsmount *mnt)
{
        /* neither detached nor internal? */
        return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}

extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);

extern int __legitimize_mnt(struct vfsmount *, unsigned);

static inline bool __path_is_mountpoint(const struct path *path)
{
        struct mount *m = __lookup_mnt(path->mnt, path->dentry);
        return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT));
}

extern void __detach_mounts(struct dentry *dentry);

static inline void detach_mounts(struct dentry *dentry)
{
        if (!d_mountpoint(dentry))
                return;
        __detach_mounts(dentry);
}

static inline void get_mnt_ns(struct mnt_namespace *ns)
{
        refcount_inc(&ns->ns.count);
}

extern seqlock_t mount_lock;

struct proc_mounts {
        struct mnt_namespace *ns;
        struct path root;
        int (*show)(struct seq_file *, struct vfsmount *);
};

extern const struct seq_operations mounts_op;

extern bool __is_local_mountpoint(struct dentry *dentry);
static inline bool is_local_mountpoint(struct dentry *dentry)
{
        if (!d_mountpoint(dentry))
                return false;

        return __is_local_mountpoint(dentry);
}

static inline bool is_anon_ns(struct mnt_namespace *ns)
{
        return ns->seq == 0;
}

static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
        WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
        mnt->mnt.mnt_flags &= ~MNT_ONRB;
        rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
        list_add_tail(&mnt->mnt_list, dt_list);
}

extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);


























































































































   13 






















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#ifndef _LINUX_JHASH_H
#define _LINUX_JHASH_H

/* jhash.h: Jenkins hash support.
 *
 * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
 *
 * https://burtleburtle.net/bob/hash/
 *
 * These are the credits from Bob's sources:
 *
 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
 *
 * These are functions for producing 32-bit hashes for hash table lookup.
 * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
 * are externally useful functions.  Routines to test the hash are included
 * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
 * the public domain.  It has no warranty.
 *
 * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@netfilter.org)
 *
 * I've modified Bob's hash to be useful in the Linux kernel, and
 * any bugs present are my fault.
 * Jozsef
 */
#include <linux/bitops.h>
#include <linux/unaligned/packed_struct.h>

/* Best hash sizes are of power of two */
#define jhash_size(n)   ((u32)1<<(n))
/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
#define jhash_mask(n)   (jhash_size(n)-1)

/* __jhash_mix -- mix 3 32-bit values reversibly. */
#define __jhash_mix(a, b, c)                        \
{                                                \
        a -= c;  a ^= rol32(c, 4);  c += b;        \
        b -= a;  b ^= rol32(a, 6);  a += c;        \
        c -= b;  c ^= rol32(b, 8);  b += a;        \
        a -= c;  a ^= rol32(c, 16); c += b;        \
        b -= a;  b ^= rol32(a, 19); a += c;        \
        c -= b;  c ^= rol32(b, 4);  b += a;        \
}

/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
#define __jhash_final(a, b, c)                        \
{                                                \
        c ^= b; c -= rol32(b, 14);                \
        a ^= c; a -= rol32(c, 11);                \
        b ^= a; b -= rol32(a, 25);                \
        c ^= b; c -= rol32(b, 16);                \
        a ^= c; a -= rol32(c, 4);                \
        b ^= a; b -= rol32(a, 14);                \
        c ^= b; c -= rol32(b, 24);                \
}

/* An arbitrary initial parameter */
#define JHASH_INITVAL                0xdeadbeef

/* jhash - hash an arbitrary key
 * @k: sequence of bytes as key
 * @length: the length of the key
 * @initval: the previous hash, or an arbitray value
 *
 * The generic version, hashes an arbitrary sequence of bytes.
 * No alignment or length assumptions are made about the input key.
 *
 * Returns the hash value of the key. The result depends on endianness.
 */
static inline u32 jhash(const void *key, u32 length, u32 initval)
{
        u32 a, b, c;
        const u8 *k = key;

        /* Set up the internal state */
        a = b = c = JHASH_INITVAL + length + initval;

        /* All but the last block: affect some 32 bits of (a,b,c) */
        while (length > 12) {
                a += __get_unaligned_cpu32(k);
                b += __get_unaligned_cpu32(k + 4);
                c += __get_unaligned_cpu32(k + 8);
                __jhash_mix(a, b, c);
                length -= 12;
                k += 12;
        }
        /* Last block: affect all 32 bits of (c) */
        switch (length) {
        case 12: c += (u32)k[11]<<24;        fallthrough;
        case 11: c += (u32)k[10]<<16;        fallthrough;
        case 10: c += (u32)k[9]<<8;        fallthrough;
        case 9:  c += k[8];                fallthrough;
        case 8:  b += (u32)k[7]<<24;        fallthrough;
        case 7:  b += (u32)k[6]<<16;        fallthrough;
        case 6:  b += (u32)k[5]<<8;        fallthrough;
        case 5:  b += k[4];                fallthrough;
        case 4:  a += (u32)k[3]<<24;        fallthrough;
        case 3:  a += (u32)k[2]<<16;        fallthrough;
        case 2:  a += (u32)k[1]<<8;        fallthrough;
        case 1:  a += k[0];
                 __jhash_final(a, b, c);
                 break;
        case 0: /* Nothing left to add */
                break;
        }

        return c;
}

/* jhash2 - hash an array of u32's
 * @k: the key which must be an array of u32's
 * @length: the number of u32's in the key
 * @initval: the previous hash, or an arbitray value
 *
 * Returns the hash value of the key.
 */
static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
{
        u32 a, b, c;

        /* Set up the internal state */
        a = b = c = JHASH_INITVAL + (length<<2) + initval;

        /* Handle most of the key */
        while (length > 3) {
                a += k[0];
                b += k[1];
                c += k[2];
                __jhash_mix(a, b, c);
                length -= 3;
                k += 3;
        }

        /* Handle the last 3 u32's */
        switch (length) {
        case 3: c += k[2];        fallthrough;
        case 2: b += k[1];        fallthrough;
        case 1: a += k[0];
                __jhash_final(a, b, c);
                break;
        case 0:        /* Nothing left to add */
                break;
        }

        return c;
}


/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
        a += initval;
        b += initval;
        c += initval;

        __jhash_final(a, b, c);

        return c;
}

static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
{
        return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
}

static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
        return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}

static inline u32 jhash_1word(u32 a, u32 initval)
{
        return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
}

#endif /* _LINUX_JHASH_H */
























    1 

    1 






    1 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// SPDX-License-Identifier: GPL-2.0
/*
 * LED Triggers for USB Activity
 *
 * Copyright 2014 Michal Sojka <sojka@merica.cz>
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/leds.h>
#include <linux/usb.h>
#include "common.h"

#define BLINK_DELAY 30

DEFINE_LED_TRIGGER(ledtrig_usb_gadget);
DEFINE_LED_TRIGGER(ledtrig_usb_host);

void usb_led_activity(enum usb_led_event ev)
{
        struct led_trigger *trig = NULL;

        switch (ev) {
        case USB_LED_EVENT_GADGET:
                trig = ledtrig_usb_gadget;
                break;
        case USB_LED_EVENT_HOST:
                trig = ledtrig_usb_host;
                break;
        }
        /* led_trigger_blink_oneshot() handles trig == NULL gracefully */
        led_trigger_blink_oneshot(trig, BLINK_DELAY, BLINK_DELAY, 0);
}
EXPORT_SYMBOL_GPL(usb_led_activity);


void __init ledtrig_usb_init(void)
{
        led_trigger_register_simple("usb-gadget", &ledtrig_usb_gadget);
        led_trigger_register_simple("usb-host", &ledtrig_usb_host);
}

void __exit ledtrig_usb_exit(void)
{
        led_trigger_unregister_simple(ledtrig_usb_gadget);
        led_trigger_unregister_simple(ledtrig_usb_host);
}
































































































































































































































































































  237 





  239 










  238 











  236 
  235 
  236 




  236 

  236 




  235 


  234 


  232 



















    9 















    9 

    9 

















  239 
  239 


  240 
   30 






    9 
    9 
    9 


  236 


    9 












  191 
  191 

  191 



  137 









  138 


















  243 













  234 

  242 




  243 


  242 







  243 









  243 
  240 
  200 






  235 
  236 

   31 
  241 







  241 




  239 






  238 


  239 


  240 




  240 
    8 
    8 






  240 
  235 

    7 






  239 








   44 



  191 







  240 




  240 




  237 

























  239 

  241 














  242 













  240 







  240 










  240 

  239 









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
// SPDX-License-Identifier: GPL-2.0
/*
 * kernel userspace event delivery
 *
 * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
 * Copyright (C) 2004 Novell, Inc.  All rights reserved.
 * Copyright (C) 2004 IBM, Inc. All rights reserved.
 *
 * Authors:
 *        Robert Love                <rml@novell.com>
 *        Kay Sievers                <kay.sievers@vrfy.org>
 *        Arjan van de Ven        <arjanv@redhat.com>
 *        Greg Kroah-Hartman        <greg@kroah.com>
 */

#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/kobject.h>
#include <linux/export.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/uidgid.h>
#include <linux/uuid.h>
#include <linux/ctype.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>


atomic64_t uevent_seqnum;
#ifdef CONFIG_UEVENT_HELPER
char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
#endif

struct uevent_sock {
        struct list_head list;
        struct sock *sk;
};

#ifdef CONFIG_NET
static LIST_HEAD(uevent_sock_list);
/* This lock protects uevent_sock_list */
static DEFINE_MUTEX(uevent_sock_mutex);
#endif

/* the strings here must match the enum in include/linux/kobject.h */
static const char *kobject_actions[] = {
        [KOBJ_ADD] =                "add",
        [KOBJ_REMOVE] =                "remove",
        [KOBJ_CHANGE] =                "change",
        [KOBJ_MOVE] =                "move",
        [KOBJ_ONLINE] =                "online",
        [KOBJ_OFFLINE] =        "offline",
        [KOBJ_BIND] =                "bind",
        [KOBJ_UNBIND] =                "unbind",
};

static int kobject_action_type(const char *buf, size_t count,
                               enum kobject_action *type,
                               const char **args)
{
        enum kobject_action action;
        size_t count_first;
        const char *args_start;
        int ret = -EINVAL;

        if (count && (buf[count-1] == '\n' || buf[count-1] == '\0'))
                count--;

        if (!count)
                goto out;

        args_start = strnchr(buf, count, ' ');
        if (args_start) {
                count_first = args_start - buf;
                args_start = args_start + 1;
        } else
                count_first = count;

        for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) {
                if (strncmp(kobject_actions[action], buf, count_first) != 0)
                        continue;
                if (kobject_actions[action][count_first] != '\0')
                        continue;
                if (args)
                        *args = args_start;
                *type = action;
                ret = 0;
                break;
        }
out:
        return ret;
}

static const char *action_arg_word_end(const char *buf, const char *buf_end,
                                       char delim)
{
        const char *next = buf;

        while (next <= buf_end && *next != delim)
                if (!isalnum(*next++))
                        return NULL;

        if (next == buf)
                return NULL;

        return next;
}

static int kobject_action_args(const char *buf, size_t count,
                               struct kobj_uevent_env **ret_env)
{
        struct kobj_uevent_env *env = NULL;
        const char *next, *buf_end, *key;
        int key_len;
        int r = -EINVAL;

        if (count && (buf[count - 1] == '\n' || buf[count - 1] == '\0'))
                count--;

        if (!count)
                return -EINVAL;

        env = kzalloc(sizeof(*env), GFP_KERNEL);
        if (!env)
                return -ENOMEM;

        /* first arg is UUID */
        if (count < UUID_STRING_LEN || !uuid_is_valid(buf) ||
            add_uevent_var(env, "SYNTH_UUID=%.*s", UUID_STRING_LEN, buf))
                goto out;

        /*
         * the rest are custom environment variables in KEY=VALUE
         * format with ' ' delimiter between each KEY=VALUE pair
         */
        next = buf + UUID_STRING_LEN;
        buf_end = buf + count - 1;

        while (next <= buf_end) {
                if (*next != ' ')
                        goto out;

                /* skip the ' ', key must follow */
                key = ++next;
                if (key > buf_end)
                        goto out;

                buf = next;
                next = action_arg_word_end(buf, buf_end, '=');
                if (!next || next > buf_end || *next != '=')
                        goto out;
                key_len = next - buf;

                /* skip the '=', value must follow */
                if (++next > buf_end)
                        goto out;

                buf = next;
                next = action_arg_word_end(buf, buf_end, ' ');
                if (!next)
                        goto out;

                if (add_uevent_var(env, "SYNTH_ARG_%.*s=%.*s",
                                   key_len, key, (int) (next - buf), buf))
                        goto out;
        }

        r = 0;
out:
        if (r)
                kfree(env);
        else
                *ret_env = env;
        return r;
}

/**
 * kobject_synth_uevent - send synthetic uevent with arguments
 *
 * @kobj: struct kobject for which synthetic uevent is to be generated
 * @buf: buffer containing action type and action args, newline is ignored
 * @count: length of buffer
 *
 * Returns 0 if kobject_synthetic_uevent() is completed with success or the
 * corresponding error when it fails.
 */
int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
{
        char *no_uuid_envp[] = { "SYNTH_UUID=0", NULL };
        enum kobject_action action;
        const char *action_args;
        struct kobj_uevent_env *env;
        const char *msg = NULL, *devpath;
        int r;

        r = kobject_action_type(buf, count, &action, &action_args);
        if (r) {
                msg = "unknown uevent action string";
                goto out;
        }

        if (!action_args) {
                r = kobject_uevent_env(kobj, action, no_uuid_envp);
                goto out;
        }

        r = kobject_action_args(action_args,
                                count - (action_args - buf), &env);
        if (r == -EINVAL) {
                msg = "incorrect uevent action arguments";
                goto out;
        }

        if (r)
                goto out;

        r = kobject_uevent_env(kobj, action, env->envp);
        kfree(env);
out:
        if (r) {
                devpath = kobject_get_path(kobj, GFP_KERNEL);
                pr_warn("synth uevent: %s: %s\n",
                       devpath ?: "unknown device",
                       msg ?: "failed to send uevent");
                kfree(devpath);
        }
        return r;
}

#ifdef CONFIG_UEVENT_HELPER
static int kobj_usermode_filter(struct kobject *kobj)
{
        const struct kobj_ns_type_operations *ops;

        ops = kobj_ns_ops(kobj);
        if (ops) {
                const void *init_ns, *ns;

                ns = kobj->ktype->namespace(kobj);
                init_ns = ops->initial_ns();
                return ns != init_ns;
        }

        return 0;
}

static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem)
{
        int buffer_size = sizeof(env->buf) - env->buflen;
        int len;

        len = strscpy(&env->buf[env->buflen], subsystem, buffer_size);
        if (len < 0) {
                pr_warn("%s: insufficient buffer space (%u left) for %s\n",
                        __func__, buffer_size, subsystem);
                return -ENOMEM;
        }

        env->argv[0] = uevent_helper;
        env->argv[1] = &env->buf[env->buflen];
        env->argv[2] = NULL;

        env->buflen += len + 1;
        return 0;
}

static void cleanup_uevent_env(struct subprocess_info *info)
{
        kfree(info->data);
}
#endif

#ifdef CONFIG_NET
static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env,
                                        const char *action_string,
                                        const char *devpath)
{
        struct netlink_skb_parms *parms;
        struct sk_buff *skb = NULL;
        char *scratch;
        size_t len;

        /* allocate message with maximum possible size */
        len = strlen(action_string) + strlen(devpath) + 2;
        skb = alloc_skb(len + env->buflen, GFP_KERNEL);
        if (!skb)
                return NULL;

        /* add header */
        scratch = skb_put(skb, len);
        sprintf(scratch, "%s@%s", action_string, devpath);

        skb_put_data(skb, env->buf, env->buflen);

        parms = &NETLINK_CB(skb);
        parms->creds.uid = GLOBAL_ROOT_UID;
        parms->creds.gid = GLOBAL_ROOT_GID;
        parms->dst_group = 1;
        parms->portid = 0;

        return skb;
}

static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env,
                                         const char *action_string,
                                         const char *devpath)
{
        struct sk_buff *skb = NULL;
        struct uevent_sock *ue_sk;
        int retval = 0;

        /* send netlink message */
        mutex_lock(&uevent_sock_mutex);
        list_for_each_entry(ue_sk, &uevent_sock_list, list) {
                struct sock *uevent_sock = ue_sk->sk;

                if (!netlink_has_listeners(uevent_sock, 1))
                        continue;

                if (!skb) {
                        retval = -ENOMEM;
                        skb = alloc_uevent_skb(env, action_string, devpath);
                        if (!skb)
                                continue;
                }

                retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1,
                                           GFP_KERNEL);
                /* ENOBUFS should be handled in userspace */
                if (retval == -ENOBUFS || retval == -ESRCH)
                        retval = 0;
        }
        mutex_unlock(&uevent_sock_mutex);
        consume_skb(skb);

        return retval;
}

static int uevent_net_broadcast_tagged(struct sock *usk,
                                       struct kobj_uevent_env *env,
                                       const char *action_string,
                                       const char *devpath)
{
        struct user_namespace *owning_user_ns = sock_net(usk)->user_ns;
        struct sk_buff *skb = NULL;
        int ret = 0;

        skb = alloc_uevent_skb(env, action_string, devpath);
        if (!skb)
                return -ENOMEM;

        /* fix credentials */
        if (owning_user_ns != &init_user_ns) {
                struct netlink_skb_parms *parms = &NETLINK_CB(skb);
                kuid_t root_uid;
                kgid_t root_gid;

                /* fix uid */
                root_uid = make_kuid(owning_user_ns, 0);
                if (uid_valid(root_uid))
                        parms->creds.uid = root_uid;

                /* fix gid */
                root_gid = make_kgid(owning_user_ns, 0);
                if (gid_valid(root_gid))
                        parms->creds.gid = root_gid;
        }

        ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL);
        /* ENOBUFS should be handled in userspace */
        if (ret == -ENOBUFS || ret == -ESRCH)
                ret = 0;

        return ret;
}
#endif

static int kobject_uevent_net_broadcast(struct kobject *kobj,
                                        struct kobj_uevent_env *env,
                                        const char *action_string,
                                        const char *devpath)
{
        int ret = 0;

#ifdef CONFIG_NET
        const struct kobj_ns_type_operations *ops;
        const struct net *net = NULL;

        ops = kobj_ns_ops(kobj);
        if (!ops && kobj->kset) {
                struct kobject *ksobj = &kobj->kset->kobj;

                if (ksobj->parent != NULL)
                        ops = kobj_ns_ops(ksobj->parent);
        }

        /* kobjects currently only carry network namespace tags and they
         * are the only tag relevant here since we want to decide which
         * network namespaces to broadcast the uevent into.
         */
        if (ops && ops->netlink_ns && kobj->ktype->namespace)
                if (ops->type == KOBJ_NS_TYPE_NET)
                        net = kobj->ktype->namespace(kobj);

        if (!net)
                ret = uevent_net_broadcast_untagged(env, action_string,
                                                    devpath);
        else
                ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env,
                                                  action_string, devpath);
#endif

        return ret;
}

static void zap_modalias_env(struct kobj_uevent_env *env)
{
        static const char modalias_prefix[] = "MODALIAS=";
        size_t len;
        int i, j;

        for (i = 0; i < env->envp_idx;) {
                if (strncmp(env->envp[i], modalias_prefix,
                            sizeof(modalias_prefix) - 1)) {
                        i++;
                        continue;
                }

                len = strlen(env->envp[i]) + 1;

                if (i != env->envp_idx - 1) {
                        memmove(env->envp[i], env->envp[i + 1],
                                env->buflen - len);

                        for (j = i; j < env->envp_idx - 1; j++)
                                env->envp[j] = env->envp[j + 1] - len;
                }

                env->envp_idx--;
                env->buflen -= len;
        }
}

/**
 * kobject_uevent_env - send an uevent with environmental data
 *
 * @kobj: struct kobject that the action is happening to
 * @action: action that is happening
 * @envp_ext: pointer to environmental data
 *
 * Returns 0 if kobject_uevent_env() is completed with success or the
 * corresponding error when it fails.
 */
int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                       char *envp_ext[])
{
        struct kobj_uevent_env *env;
        const char *action_string = kobject_actions[action];
        const char *devpath = NULL;
        const char *subsystem;
        struct kobject *top_kobj;
        struct kset *kset;
        const struct kset_uevent_ops *uevent_ops;
        int i = 0;
        int retval = 0;

        /*
         * Mark "remove" event done regardless of result, for some subsystems
         * do not want to re-trigger "remove" event via automatic cleanup.
         */
        if (action == KOBJ_REMOVE)
                kobj->state_remove_uevent_sent = 1;

        pr_debug("kobject: '%s' (%p): %s\n",
                 kobject_name(kobj), kobj, __func__);

        /* search the kset we belong to */
        top_kobj = kobj;
        while (!top_kobj->kset && top_kobj->parent)
                top_kobj = top_kobj->parent;

        if (!top_kobj->kset) {
                pr_debug("kobject: '%s' (%p): %s: attempted to send uevent "
                         "without kset!\n", kobject_name(kobj), kobj,
                         __func__);
                return -EINVAL;
        }

        kset = top_kobj->kset;
        uevent_ops = kset->uevent_ops;

        /* skip the event, if uevent_suppress is set*/
        if (kobj->uevent_suppress) {
                pr_debug("kobject: '%s' (%p): %s: uevent_suppress "
                                 "caused the event to drop!\n",
                                 kobject_name(kobj), kobj, __func__);
                return 0;
        }
        /* skip the event, if the filter returns zero. */
        if (uevent_ops && uevent_ops->filter)
                if (!uevent_ops->filter(kobj)) {
                        pr_debug("kobject: '%s' (%p): %s: filter function "
                                 "caused the event to drop!\n",
                                 kobject_name(kobj), kobj, __func__);
                        return 0;
                }

        /* originating subsystem */
        if (uevent_ops && uevent_ops->name)
                subsystem = uevent_ops->name(kobj);
        else
                subsystem = kobject_name(&kset->kobj);
        if (!subsystem) {
                pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the "
                         "event to drop!\n", kobject_name(kobj), kobj,
                         __func__);
                return 0;
        }

        /* environment buffer */
        env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
        if (!env)
                return -ENOMEM;

        /* complete object path */
        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
                retval = -ENOENT;
                goto exit;
        }

        /* default keys */
        retval = add_uevent_var(env, "ACTION=%s", action_string);
        if (retval)
                goto exit;
        retval = add_uevent_var(env, "DEVPATH=%s", devpath);
        if (retval)
                goto exit;
        retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem);
        if (retval)
                goto exit;

        /* keys passed in from the caller */
        if (envp_ext) {
                for (i = 0; envp_ext[i]; i++) {
                        retval = add_uevent_var(env, "%s", envp_ext[i]);
                        if (retval)
                                goto exit;
                }
        }

        /* let the kset specific function add its stuff */
        if (uevent_ops && uevent_ops->uevent) {
                retval = uevent_ops->uevent(kobj, env);
                if (retval) {
                        pr_debug("kobject: '%s' (%p): %s: uevent() returned "
                                 "%d\n", kobject_name(kobj), kobj,
                                 __func__, retval);
                        goto exit;
                }
        }

        switch (action) {
        case KOBJ_ADD:
                /*
                 * Mark "add" event so we can make sure we deliver "remove"
                 * event to userspace during automatic cleanup. If
                 * the object did send an "add" event, "remove" will
                 * automatically generated by the core, if not already done
                 * by the caller.
                 */
                kobj->state_add_uevent_sent = 1;
                break;

        case KOBJ_UNBIND:
                zap_modalias_env(env);
                break;

        default:
                break;
        }

        /* we will send an event, so request a new sequence number */
        retval = add_uevent_var(env, "SEQNUM=%llu",
                                atomic64_inc_return(&uevent_seqnum));
        if (retval)
                goto exit;

        retval = kobject_uevent_net_broadcast(kobj, env, action_string,
                                              devpath);

#ifdef CONFIG_UEVENT_HELPER
        /* call uevent_helper, usually only enabled during early boot */
        if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
                struct subprocess_info *info;

                retval = add_uevent_var(env, "HOME=/");
                if (retval)
                        goto exit;
                retval = add_uevent_var(env,
                                        "PATH=/sbin:/bin:/usr/sbin:/usr/bin");
                if (retval)
                        goto exit;
                retval = init_uevent_argv(env, subsystem);
                if (retval)
                        goto exit;

                retval = -ENOMEM;
                info = call_usermodehelper_setup(env->argv[0], env->argv,
                                                 env->envp, GFP_KERNEL,
                                                 NULL, cleanup_uevent_env, env);
                if (info) {
                        retval = call_usermodehelper_exec(info, UMH_NO_WAIT);
                        env = NULL;        /* freed by cleanup_uevent_env */
                }
        }
#endif

exit:
        kfree(devpath);
        kfree(env);
        return retval;
}
EXPORT_SYMBOL_GPL(kobject_uevent_env);

/**
 * kobject_uevent - notify userspace by sending an uevent
 *
 * @kobj: struct kobject that the action is happening to
 * @action: action that is happening
 *
 * Returns 0 if kobject_uevent() is completed with success or the
 * corresponding error when it fails.
 */
int kobject_uevent(struct kobject *kobj, enum kobject_action action)
{
        return kobject_uevent_env(kobj, action, NULL);
}
EXPORT_SYMBOL_GPL(kobject_uevent);

/**
 * add_uevent_var - add key value string to the environment buffer
 * @env: environment buffer structure
 * @format: printf format for the key=value pair
 *
 * Returns 0 if environment variable was added successfully or -ENOMEM
 * if no space was available.
 */
int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
{
        va_list args;
        int len;

        if (env->envp_idx >= ARRAY_SIZE(env->envp)) {
                WARN(1, KERN_ERR "add_uevent_var: too many keys\n");
                return -ENOMEM;
        }

        va_start(args, format);
        len = vsnprintf(&env->buf[env->buflen],
                        sizeof(env->buf) - env->buflen,
                        format, args);
        va_end(args);

        if (len >= (sizeof(env->buf) - env->buflen)) {
                WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n");
                return -ENOMEM;
        }

        env->envp[env->envp_idx++] = &env->buf[env->buflen];
        env->buflen += len + 1;
        return 0;
}
EXPORT_SYMBOL_GPL(add_uevent_var);

#if defined(CONFIG_NET)
static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
                                struct netlink_ext_ack *extack)
{
        /* u64 to chars: 2^64 - 1 = 21 chars */
        char buf[sizeof("SEQNUM=") + 21];
        struct sk_buff *skbc;
        int ret;

        /* bump and prepare sequence number */
        ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu",
                       atomic64_inc_return(&uevent_seqnum));
        if (ret < 0 || (size_t)ret >= sizeof(buf))
                return -ENOMEM;
        ret++;

        /* verify message does not overflow */
        if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
                NL_SET_ERR_MSG(extack, "uevent message too big");
                return -EINVAL;
        }

        /* copy skb and extend to accommodate sequence number */
        skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
        if (!skbc)
                return -ENOMEM;

        /* append sequence number */
        skb_put_data(skbc, buf, ret);

        /* remove msg header */
        skb_pull(skbc, NLMSG_HDRLEN);

        /* set portid 0 to inform userspace message comes from kernel */
        NETLINK_CB(skbc).portid = 0;
        NETLINK_CB(skbc).dst_group = 1;

        ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
        /* ENOBUFS should be handled in userspace */
        if (ret == -ENOBUFS || ret == -ESRCH)
                ret = 0;

        return ret;
}

static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
                              struct netlink_ext_ack *extack)
{
        struct net *net;
        int ret;

        if (!nlmsg_data(nlh))
                return -EINVAL;

        /*
         * Verify that we are allowed to send messages to the target
         * network namespace. The caller must have CAP_SYS_ADMIN in the
         * owning user namespace of the target network namespace.
         */
        net = sock_net(NETLINK_CB(skb).sk);
        if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
                NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
                return -EPERM;
        }

        ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);

        return ret;
}

static void uevent_net_rcv(struct sk_buff *skb)
{
        netlink_rcv_skb(skb, &uevent_net_rcv_skb);
}

static int uevent_net_init(struct net *net)
{
        struct uevent_sock *ue_sk;
        struct netlink_kernel_cfg cfg = {
                .groups        = 1,
                .input = uevent_net_rcv,
                .flags        = NL_CFG_F_NONROOT_RECV
        };

        ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
        if (!ue_sk)
                return -ENOMEM;

        ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg);
        if (!ue_sk->sk) {
                pr_err("kobject_uevent: unable to create netlink socket!\n");
                kfree(ue_sk);
                return -ENODEV;
        }

        net->uevent_sock = ue_sk;

        /* Restrict uevents to initial user namespace. */
        if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
                mutex_lock(&uevent_sock_mutex);
                list_add_tail(&ue_sk->list, &uevent_sock_list);
                mutex_unlock(&uevent_sock_mutex);
        }

        return 0;
}

static void uevent_net_exit(struct net *net)
{
        struct uevent_sock *ue_sk = net->uevent_sock;

        if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
                mutex_lock(&uevent_sock_mutex);
                list_del(&ue_sk->list);
                mutex_unlock(&uevent_sock_mutex);
        }

        netlink_kernel_release(ue_sk->sk);
        kfree(ue_sk);
}

static struct pernet_operations uevent_net_ops = {
        .init        = uevent_net_init,
        .exit        = uevent_net_exit,
};

static int __init kobject_uevent_init(void)
{
        return register_pernet_subsys(&uevent_net_ops);
}


postcore_initcall(kobject_uevent_init);
#endif


































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_TASK_STACK_H
#define _LINUX_SCHED_TASK_STACK_H

/*
 * task->stack (kernel stack) handling interfaces:
 */

#include <linux/sched.h>
#include <linux/magic.h>
#include <linux/refcount.h>

#ifdef CONFIG_THREAD_INFO_IN_TASK

/*
 * When accessing the stack of a non-current task that might exit, use
 * try_get_task_stack() instead.  task_stack_page will return a pointer
 * that could get freed out from under you.
 */
static __always_inline void *task_stack_page(const struct task_struct *task)
{
        return task->stack;
}

#define setup_thread_stack(new,old)        do { } while(0)

static __always_inline unsigned long *end_of_stack(const struct task_struct *task)
{
#ifdef CONFIG_STACK_GROWSUP
        return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1;
#else
        return task->stack;
#endif
}

#elif !defined(__HAVE_THREAD_FUNCTIONS)

#define task_stack_page(task)        ((void *)(task)->stack)

static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
        *task_thread_info(p) = *task_thread_info(org);
        task_thread_info(p)->task = p;
}

/*
 * Return the address of the last usable long on the stack.
 *
 * When the stack grows down, this is just above the thread
 * info struct. Going any lower will corrupt the threadinfo.
 *
 * When the stack grows up, this is the highest address.
 * Beyond that position, we corrupt data on the next page.
 */
static inline unsigned long *end_of_stack(struct task_struct *p)
{
#ifdef CONFIG_STACK_GROWSUP
        return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
#else
        return (unsigned long *)(task_thread_info(p) + 1);
#endif
}

#endif

#ifdef CONFIG_THREAD_INFO_IN_TASK
static inline void *try_get_task_stack(struct task_struct *tsk)
{
        return refcount_inc_not_zero(&tsk->stack_refcount) ?
                task_stack_page(tsk) : NULL;
}

extern void put_task_stack(struct task_struct *tsk);
#else
static inline void *try_get_task_stack(struct task_struct *tsk)
{
        return task_stack_page(tsk);
}

static inline void put_task_stack(struct task_struct *tsk) {}
#endif

void exit_task_stack_account(struct task_struct *tsk);

#define task_stack_end_corrupted(task) \
                (*(end_of_stack(task)) != STACK_END_MAGIC)

static inline int object_is_on_stack(const void *obj)
{
        void *stack = task_stack_page(current);

        return (obj >= stack) && (obj < (stack + THREAD_SIZE));
}

extern void thread_stack_cache_init(void);

#ifdef CONFIG_DEBUG_STACK_USAGE
static inline unsigned long stack_not_used(struct task_struct *p)
{
        unsigned long *n = end_of_stack(p);

        do {         /* Skip over canary */
# ifdef CONFIG_STACK_GROWSUP
                n--;
# else
                n++;
# endif
        } while (!*n);

# ifdef CONFIG_STACK_GROWSUP
        return (unsigned long)end_of_stack(p) - (unsigned long)n;
# else
        return (unsigned long)n - (unsigned long)end_of_stack(p);
# endif
}
#endif
extern void set_task_stack_end_magic(struct task_struct *tsk);

#ifndef __HAVE_ARCH_KSTACK_END
static inline int kstack_end(void *addr)
{
        /* Reliable end of stack detection:
         * Some APM bios versions misalign the stack
         */
        return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
}
#endif

#endif /* _LINUX_SCHED_TASK_STACK_H */













































































































































































































































































































































   14 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


    4 
















    4 



























    4 


    4 
    4 







    4 

































































































































































































































































































   14 








   14 


















































   14 


































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                IPv4 Forwarding Information Base: semantics.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>
#include <linux/nospec.h>

#include <net/arp.h>
#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/nexthop.h>
#include <net/netlink.h>
#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
#include <net/addrconf.h>

#include "fib_lookup.h"

static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;

#define DEVINDEX_HASHBITS 8
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];

/* for_nexthops and change_nexthops only used when nexthop object
 * is not set in a fib_info. The logic within can reference fib_nh.
 */
#ifdef CONFIG_IP_ROUTE_MULTIPATH

#define for_nexthops(fi) {                                                \
        int nhsel; const struct fib_nh *nh;                                \
        for (nhsel = 0, nh = (fi)->fib_nh;                                \
             nhsel < fib_info_num_path((fi));                                \
             nh++, nhsel++)

#define change_nexthops(fi) {                                                \
        int nhsel; struct fib_nh *nexthop_nh;                                \
        for (nhsel = 0,        nexthop_nh = (struct fib_nh *)((fi)->fib_nh);        \
             nhsel < fib_info_num_path((fi));                                \
             nexthop_nh++, nhsel++)

#else /* CONFIG_IP_ROUTE_MULTIPATH */

/* Hope, that gcc will optimize it to get rid of dummy loop */

#define for_nexthops(fi) {                                                \
        int nhsel; const struct fib_nh *nh = (fi)->fib_nh;                \
        for (nhsel = 0; nhsel < 1; nhsel++)

#define change_nexthops(fi) {                                                \
        int nhsel;                                                        \
        struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh);        \
        for (nhsel = 0; nhsel < 1; nhsel++)

#endif /* CONFIG_IP_ROUTE_MULTIPATH */

#define endfor_nexthops(fi) }


const struct fib_prop fib_props[RTN_MAX + 1] = {
        [RTN_UNSPEC] = {
                .error        = 0,
                .scope        = RT_SCOPE_NOWHERE,
        },
        [RTN_UNICAST] = {
                .error        = 0,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_LOCAL] = {
                .error        = 0,
                .scope        = RT_SCOPE_HOST,
        },
        [RTN_BROADCAST] = {
                .error        = 0,
                .scope        = RT_SCOPE_LINK,
        },
        [RTN_ANYCAST] = {
                .error        = 0,
                .scope        = RT_SCOPE_LINK,
        },
        [RTN_MULTICAST] = {
                .error        = 0,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_BLACKHOLE] = {
                .error        = -EINVAL,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_UNREACHABLE] = {
                .error        = -EHOSTUNREACH,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_PROHIBIT] = {
                .error        = -EACCES,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_THROW] = {
                .error        = -EAGAIN,
                .scope        = RT_SCOPE_UNIVERSE,
        },
        [RTN_NAT] = {
                .error        = -EINVAL,
                .scope        = RT_SCOPE_NOWHERE,
        },
        [RTN_XRESOLVE] = {
                .error        = -EINVAL,
                .scope        = RT_SCOPE_NOWHERE,
        },
};

static void rt_fibinfo_free(struct rtable __rcu **rtp)
{
        struct rtable *rt = rcu_dereference_protected(*rtp, 1);

        if (!rt)
                return;

        /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
         * because we waited an RCU grace period before calling
         * free_fib_info_rcu()
         */

        dst_dev_put(&rt->dst);
        dst_release_immediate(&rt->dst);
}

static void free_nh_exceptions(struct fib_nh_common *nhc)
{
        struct fnhe_hash_bucket *hash;
        int i;

        hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
        if (!hash)
                return;
        for (i = 0; i < FNHE_HASH_SIZE; i++) {
                struct fib_nh_exception *fnhe;

                fnhe = rcu_dereference_protected(hash[i].chain, 1);
                while (fnhe) {
                        struct fib_nh_exception *next;

                        next = rcu_dereference_protected(fnhe->fnhe_next, 1);

                        rt_fibinfo_free(&fnhe->fnhe_rth_input);
                        rt_fibinfo_free(&fnhe->fnhe_rth_output);

                        kfree(fnhe);

                        fnhe = next;
                }
        }
        kfree(hash);
}

static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
{
        int cpu;

        if (!rtp)
                return;

        for_each_possible_cpu(cpu) {
                struct rtable *rt;

                rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
                if (rt) {
                        dst_dev_put(&rt->dst);
                        dst_release_immediate(&rt->dst);
                }
        }
        free_percpu(rtp);
}

void fib_nh_common_release(struct fib_nh_common *nhc)
{
        netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker);
        lwtstate_put(nhc->nhc_lwtstate);
        rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
        rt_fibinfo_free(&nhc->nhc_rth_input);
        free_nh_exceptions(nhc);
}
EXPORT_SYMBOL_GPL(fib_nh_common_release);

void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
        if (fib_nh->nh_tclassid)
                atomic_dec(&net->ipv4.fib_num_tclassid_users);
#endif
        fib_nh_common_release(&fib_nh->nh_common);
}

/* Release a nexthop info record */
static void free_fib_info_rcu(struct rcu_head *head)
{
        struct fib_info *fi = container_of(head, struct fib_info, rcu);

        if (fi->nh) {
                nexthop_put(fi->nh);
        } else {
                change_nexthops(fi) {
                        fib_nh_release(fi->fib_net, nexthop_nh);
                } endfor_nexthops(fi);
        }

        ip_fib_metrics_put(fi->fib_metrics);

        kfree(fi);
}

void free_fib_info(struct fib_info *fi)
{
        if (fi->fib_dead == 0) {
                pr_warn("Freeing alive fib_info %p\n", fi);
                return;
        }

        call_rcu(&fi->rcu, free_fib_info_rcu);
}
EXPORT_SYMBOL_GPL(free_fib_info);

void fib_release_info(struct fib_info *fi)
{
        spin_lock_bh(&fib_info_lock);
        if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
                hlist_del(&fi->fib_hash);

                /* Paired with READ_ONCE() in fib_create_info(). */
                WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);

                if (fi->fib_prefsrc)
                        hlist_del(&fi->fib_lhash);
                if (fi->nh) {
                        list_del(&fi->nh_list);
                } else {
                        change_nexthops(fi) {
                                if (!nexthop_nh->fib_nh_dev)
                                        continue;
                                hlist_del(&nexthop_nh->nh_hash);
                        } endfor_nexthops(fi)
                }
                /* Paired with READ_ONCE() from fib_table_lookup() */
                WRITE_ONCE(fi->fib_dead, 1);
                fib_info_put(fi);
        }
        spin_unlock_bh(&fib_info_lock);
}

static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
{
        const struct fib_nh *onh;

        if (fi->nh || ofi->nh)
                return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;

        if (ofi->fib_nhs == 0)
                return 0;

        for_nexthops(fi) {
                onh = fib_info_nh(ofi, nhsel);

                if (nh->fib_nh_oif != onh->fib_nh_oif ||
                    nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
                    nh->fib_nh_scope != onh->fib_nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
                    nh->fib_nh_weight != onh->fib_nh_weight ||
#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
                    nh->nh_tclassid != onh->nh_tclassid ||
#endif
                    lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
                    ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
                        return -1;

                if (nh->fib_nh_gw_family == AF_INET &&
                    nh->fib_nh_gw4 != onh->fib_nh_gw4)
                        return -1;

                if (nh->fib_nh_gw_family == AF_INET6 &&
                    ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
                        return -1;
        } endfor_nexthops(fi);
        return 0;
}

static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
        return hash_32(val, DEVINDEX_HASHBITS);
}

static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
{
        u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;

        return &fib_info_devhash[fib_devindex_hashfn(val)];
}

static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
                                      u32 prefsrc, u32 priority)
{
        unsigned int val = init_val;

        val ^= (protocol << 8) | scope;
        val ^= prefsrc;
        val ^= priority;

        return val;
}

static unsigned int fib_info_hashfn_result(unsigned int val)
{
        unsigned int mask = (fib_info_hash_size - 1);

        return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}

static inline unsigned int fib_info_hashfn(struct fib_info *fi)
{
        unsigned int val;

        val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
                                fi->fib_scope, (__force u32)fi->fib_prefsrc,
                                fi->fib_priority);

        if (fi->nh) {
                val ^= fib_devindex_hashfn(fi->nh->id);
        } else {
                for_nexthops(fi) {
                        val ^= fib_devindex_hashfn(nh->fib_nh_oif);
                } endfor_nexthops(fi)
        }

        return fib_info_hashfn_result(val);
}

/* no metrics, only nexthop id */
static struct fib_info *fib_find_info_nh(struct net *net,
                                         const struct fib_config *cfg)
{
        struct hlist_head *head;
        struct fib_info *fi;
        unsigned int hash;

        hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
                                 cfg->fc_protocol, cfg->fc_scope,
                                 (__force u32)cfg->fc_prefsrc,
                                 cfg->fc_priority);
        hash = fib_info_hashfn_result(hash);
        head = &fib_info_hash[hash];

        hlist_for_each_entry(fi, head, fib_hash) {
                if (!net_eq(fi->fib_net, net))
                        continue;
                if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
                        continue;
                if (cfg->fc_protocol == fi->fib_protocol &&
                    cfg->fc_scope == fi->fib_scope &&
                    cfg->fc_prefsrc == fi->fib_prefsrc &&
                    cfg->fc_priority == fi->fib_priority &&
                    cfg->fc_type == fi->fib_type &&
                    cfg->fc_table == fi->fib_tb_id &&
                    !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK))
                        return fi;
        }

        return NULL;
}

static struct fib_info *fib_find_info(struct fib_info *nfi)
{
        struct hlist_head *head;
        struct fib_info *fi;
        unsigned int hash;

        hash = fib_info_hashfn(nfi);
        head = &fib_info_hash[hash];

        hlist_for_each_entry(fi, head, fib_hash) {
                if (!net_eq(fi->fib_net, nfi->fib_net))
                        continue;
                if (fi->fib_nhs != nfi->fib_nhs)
                        continue;
                if (nfi->fib_protocol == fi->fib_protocol &&
                    nfi->fib_scope == fi->fib_scope &&
                    nfi->fib_prefsrc == fi->fib_prefsrc &&
                    nfi->fib_priority == fi->fib_priority &&
                    nfi->fib_type == fi->fib_type &&
                    nfi->fib_tb_id == fi->fib_tb_id &&
                    memcmp(nfi->fib_metrics, fi->fib_metrics,
                           sizeof(u32) * RTAX_MAX) == 0 &&
                    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
                    nh_comp(fi, nfi) == 0)
                        return fi;
        }

        return NULL;
}

/* Check, that the gateway is already configured.
 * Used only by redirect accept routine.
 */
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
        struct hlist_head *head;
        struct fib_nh *nh;

        spin_lock(&fib_info_lock);

        head = fib_info_devhash_bucket(dev);

        hlist_for_each_entry(nh, head, nh_hash) {
                if (nh->fib_nh_dev == dev &&
                    nh->fib_nh_gw4 == gw &&
                    !(nh->fib_nh_flags & RTNH_F_DEAD)) {
                        spin_unlock(&fib_info_lock);
                        return 0;
                }
        }

        spin_unlock(&fib_info_lock);

        return -1;
}

size_t fib_nlmsg_size(struct fib_info *fi)
{
        size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
                         + nla_total_size(4) /* RTA_TABLE */
                         + nla_total_size(4) /* RTA_DST */
                         + nla_total_size(4) /* RTA_PRIORITY */
                         + nla_total_size(4) /* RTA_PREFSRC */
                         + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
        unsigned int nhs = fib_info_num_path(fi);

        /* space for nested metrics */
        payload += nla_total_size((RTAX_MAX * nla_total_size(4)));

        if (fi->nh)
                payload += nla_total_size(4); /* RTA_NH_ID */

        if (nhs) {
                size_t nh_encapsize = 0;
                /* Also handles the special case nhs == 1 */

                /* each nexthop is packed in an attribute */
                size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
                unsigned int i;

                /* may contain flow and gateway attribute */
                nhsize += 2 * nla_total_size(4);

                /* grab encap info */
                for (i = 0; i < fib_info_num_path(fi); i++) {
                        struct fib_nh_common *nhc = fib_info_nhc(fi, i);

                        if (nhc->nhc_lwtstate) {
                                /* RTA_ENCAP_TYPE */
                                nh_encapsize += lwtunnel_get_encap_size(
                                                nhc->nhc_lwtstate);
                                /* RTA_ENCAP */
                                nh_encapsize +=  nla_total_size(2);
                        }
                }

                /* all nexthops are packed in a nested attribute */
                payload += nla_total_size((nhs * nhsize) + nh_encapsize);

        }

        return payload;
}

void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
               int dst_len, u32 tb_id, const struct nl_info *info,
               unsigned int nlm_flags)
{
        struct fib_rt_info fri;
        struct sk_buff *skb;
        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
        int err = -ENOBUFS;

        skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
        if (!skb)
                goto errout;

        fri.fi = fa->fa_info;
        fri.tb_id = tb_id;
        fri.dst = key;
        fri.dst_len = dst_len;
        fri.dscp = fa->fa_dscp;
        fri.type = fa->fa_type;
        fri.offload = READ_ONCE(fa->offload);
        fri.trap = READ_ONCE(fa->trap);
        fri.offload_failed = READ_ONCE(fa->offload_failed);
        err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
                    info->nlh, GFP_KERNEL);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
}

static int fib_detect_death(struct fib_info *fi, int order,
                            struct fib_info **last_resort, int *last_idx,
                            int dflt)
{
        const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
        struct neighbour *n;
        int state = NUD_NONE;

        if (likely(nhc->nhc_gw_family == AF_INET))
                n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
        else if (nhc->nhc_gw_family == AF_INET6)
                n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
                                 nhc->nhc_dev);
        else
                n = NULL;

        if (n) {
                state = READ_ONCE(n->nud_state);
                neigh_release(n);
        } else {
                return 0;
        }
        if (state == NUD_REACHABLE)
                return 0;
        if ((state & NUD_VALID) && order != dflt)
                return 0;
        if ((state & NUD_VALID) ||
            (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
                *last_resort = fi;
                *last_idx = order;
        }
        return 1;
}

int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc,
                       struct nlattr *encap, u16 encap_type,
                       void *cfg, gfp_t gfp_flags,
                       struct netlink_ext_ack *extack)
{
        int err;

        nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
                                                    gfp_flags);
        if (!nhc->nhc_pcpu_rth_output)
                return -ENOMEM;

        if (encap) {
                struct lwtunnel_state *lwtstate;

                if (encap_type == LWTUNNEL_ENCAP_NONE) {
                        NL_SET_ERR_MSG(extack, "LWT encap type not specified");
                        err = -EINVAL;
                        goto lwt_failure;
                }
                err = lwtunnel_build_state(net, encap_type, encap,
                                           nhc->nhc_family, cfg, &lwtstate,
                                           extack);
                if (err)
                        goto lwt_failure;

                nhc->nhc_lwtstate = lwtstate_get(lwtstate);
        }

        return 0;

lwt_failure:
        rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
        nhc->nhc_pcpu_rth_output = NULL;
        return err;
}
EXPORT_SYMBOL_GPL(fib_nh_common_init);

int fib_nh_init(struct net *net, struct fib_nh *nh,
                struct fib_config *cfg, int nh_weight,
                struct netlink_ext_ack *extack)
{
        int err;

        nh->fib_nh_family = AF_INET;

        err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap,
                                 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
        if (err)
                return err;

        nh->fib_nh_oif = cfg->fc_oif;
        nh->fib_nh_gw_family = cfg->fc_gw_family;
        if (cfg->fc_gw_family == AF_INET)
                nh->fib_nh_gw4 = cfg->fc_gw4;
        else if (cfg->fc_gw_family == AF_INET6)
                nh->fib_nh_gw6 = cfg->fc_gw6;

        nh->fib_nh_flags = cfg->fc_flags;

#ifdef CONFIG_IP_ROUTE_CLASSID
        nh->nh_tclassid = cfg->fc_flow;
        if (nh->nh_tclassid)
                atomic_inc(&net->ipv4.fib_num_tclassid_users);
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
        nh->fib_nh_weight = nh_weight;
#endif
        return 0;
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH

static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
                              struct netlink_ext_ack *extack)
{
        int nhs = 0;

        while (rtnh_ok(rtnh, remaining)) {
                nhs++;
                rtnh = rtnh_next(rtnh, &remaining);
        }

        /* leftover implies invalid nexthop configuration, discard it */
        if (remaining > 0) {
                NL_SET_ERR_MSG(extack,
                               "Invalid nexthop configuration - extra data after nexthops");
                nhs = 0;
        }

        return nhs;
}

static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla,
                            struct netlink_ext_ack *extack)
{
        if (nla_len(nla) < sizeof(*gw)) {
                NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY");
                return -EINVAL;
        }

        *gw = nla_get_in_addr(nla);

        return 0;
}

/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
                       int remaining, struct fib_config *cfg,
                       struct netlink_ext_ack *extack)
{
        struct net *net = fi->fib_net;
        struct fib_config fib_cfg;
        struct fib_nh *nh;
        int ret;

        change_nexthops(fi) {
                int attrlen;

                memset(&fib_cfg, 0, sizeof(fib_cfg));

                if (!rtnh_ok(rtnh, remaining)) {
                        NL_SET_ERR_MSG(extack,
                                       "Invalid nexthop configuration - extra data after nexthop");
                        return -EINVAL;
                }

                if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
                        NL_SET_ERR_MSG(extack,
                                       "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
                        return -EINVAL;
                }

                fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
                fib_cfg.fc_oif = rtnh->rtnh_ifindex;

                attrlen = rtnh_attrlen(rtnh);
                if (attrlen > 0) {
                        struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);

                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        nlav = nla_find(attrs, attrlen, RTA_VIA);
                        if (nla && nlav) {
                                NL_SET_ERR_MSG(extack,
                                               "Nexthop configuration can not contain both GATEWAY and VIA");
                                return -EINVAL;
                        }
                        if (nla) {
                                ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla,
                                                       extack);
                                if (ret)
                                        goto errout;

                                if (fib_cfg.fc_gw4)
                                        fib_cfg.fc_gw_family = AF_INET;
                        } else if (nlav) {
                                ret = fib_gw_from_via(&fib_cfg, nlav, extack);
                                if (ret)
                                        goto errout;
                        }

                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        if (nla) {
                                if (nla_len(nla) < sizeof(u32)) {
                                        NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
                                        return -EINVAL;
                                }
                                fib_cfg.fc_flow = nla_get_u32(nla);
                        }

                        fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
                        /* RTA_ENCAP_TYPE length checked in
                         * lwtunnel_valid_encap_type_attr
                         */
                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
                        if (nla)
                                fib_cfg.fc_encap_type = nla_get_u16(nla);
                }

                ret = fib_nh_init(net, nexthop_nh, &fib_cfg,
                                  rtnh->rtnh_hops + 1, extack);
                if (ret)
                        goto errout;

                rtnh = rtnh_next(rtnh, &remaining);
        } endfor_nexthops(fi);

        ret = -EINVAL;
        nh = fib_info_nh(fi, 0);
        if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
                NL_SET_ERR_MSG(extack,
                               "Nexthop device index does not match RTA_OIF");
                goto errout;
        }
        if (cfg->fc_gw_family) {
                if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
                    (cfg->fc_gw_family == AF_INET &&
                     nh->fib_nh_gw4 != cfg->fc_gw4) ||
                    (cfg->fc_gw_family == AF_INET6 &&
                     ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
                        NL_SET_ERR_MSG(extack,
                                       "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
                        goto errout;
                }
        }
#ifdef CONFIG_IP_ROUTE_CLASSID
        if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
                NL_SET_ERR_MSG(extack,
                               "Nexthop class id does not match RTA_FLOW");
                goto errout;
        }
#endif
        ret = 0;
errout:
        return ret;
}

/* only called when fib_nh is integrated into fib_info */
static void fib_rebalance(struct fib_info *fi)
{
        int total;
        int w;

        if (fib_info_num_path(fi) < 2)
                return;

        total = 0;
        for_nexthops(fi) {
                if (nh->fib_nh_flags & RTNH_F_DEAD)
                        continue;

                if (ip_ignore_linkdown(nh->fib_nh_dev) &&
                    nh->fib_nh_flags & RTNH_F_LINKDOWN)
                        continue;

                total += nh->fib_nh_weight;
        } endfor_nexthops(fi);

        w = 0;
        change_nexthops(fi) {
                int upper_bound;

                if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) {
                        upper_bound = -1;
                } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) &&
                           nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
                        upper_bound = -1;
                } else {
                        w += nexthop_nh->fib_nh_weight;
                        upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
                                                            total) - 1;
                }

                atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound);
        } endfor_nexthops(fi);
}
#else /* CONFIG_IP_ROUTE_MULTIPATH */

static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
                       int remaining, struct fib_config *cfg,
                       struct netlink_ext_ack *extack)
{
        NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel");

        return -EINVAL;
}

#define fib_rebalance(fi) do { } while (0)

#endif /* CONFIG_IP_ROUTE_MULTIPATH */

static int fib_encap_match(struct net *net, u16 encap_type,
                           struct nlattr *encap,
                           const struct fib_nh *nh,
                           const struct fib_config *cfg,
                           struct netlink_ext_ack *extack)
{
        struct lwtunnel_state *lwtstate;
        int ret, result = 0;

        if (encap_type == LWTUNNEL_ENCAP_NONE)
                return 0;

        ret = lwtunnel_build_state(net, encap_type, encap, AF_INET,
                                   cfg, &lwtstate, extack);
        if (!ret) {
                result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
                lwtstate_free(lwtstate);
        }

        return result;
}

int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
                 struct netlink_ext_ack *extack)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
        struct rtnexthop *rtnh;
        int remaining;
#endif

        if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
                return 1;

        if (cfg->fc_nh_id) {
                if (fi->nh && cfg->fc_nh_id == fi->nh->id)
                        return 0;
                return 1;
        }

        if (fi->nh) {
                if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
                        return 1;
                return 0;
        }

        if (cfg->fc_oif || cfg->fc_gw_family) {
                struct fib_nh *nh;

                nh = fib_info_nh(fi, 0);
                if (cfg->fc_encap) {
                        if (fib_encap_match(net, cfg->fc_encap_type,
                                            cfg->fc_encap, nh, cfg, extack))
                                return 1;
                }
#ifdef CONFIG_IP_ROUTE_CLASSID
                if (cfg->fc_flow &&
                    cfg->fc_flow != nh->nh_tclassid)
                        return 1;
#endif
                if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
                    (cfg->fc_gw_family &&
                     cfg->fc_gw_family != nh->fib_nh_gw_family))
                        return 1;

                if (cfg->fc_gw_family == AF_INET &&
                    cfg->fc_gw4 != nh->fib_nh_gw4)
                        return 1;

                if (cfg->fc_gw_family == AF_INET6 &&
                    ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
                        return 1;

                return 0;
        }

#ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (!cfg->fc_mp)
                return 0;

        rtnh = cfg->fc_mp;
        remaining = cfg->fc_mp_len;

        for_nexthops(fi) {
                int attrlen;

                if (!rtnh_ok(rtnh, remaining))
                        return -EINVAL;

                if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif)
                        return 1;

                attrlen = rtnh_attrlen(rtnh);
                if (attrlen > 0) {
                        struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
                        int err;

                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        nlav = nla_find(attrs, attrlen, RTA_VIA);
                        if (nla && nlav) {
                                NL_SET_ERR_MSG(extack,
                                               "Nexthop configuration can not contain both GATEWAY and VIA");
                                return -EINVAL;
                        }

                        if (nla) {
                                __be32 gw;

                                err = fib_gw_from_attr(&gw, nla, extack);
                                if (err)
                                        return err;

                                if (nh->fib_nh_gw_family != AF_INET ||
                                    gw != nh->fib_nh_gw4)
                                        return 1;
                        } else if (nlav) {
                                struct fib_config cfg2;

                                err = fib_gw_from_via(&cfg2, nlav, extack);
                                if (err)
                                        return err;

                                switch (nh->fib_nh_gw_family) {
                                case AF_INET:
                                        if (cfg2.fc_gw_family != AF_INET ||
                                            cfg2.fc_gw4 != nh->fib_nh_gw4)
                                                return 1;
                                        break;
                                case AF_INET6:
                                        if (cfg2.fc_gw_family != AF_INET6 ||
                                            ipv6_addr_cmp(&cfg2.fc_gw6,
                                                          &nh->fib_nh_gw6))
                                                return 1;
                                        break;
                                }
                        }

#ifdef CONFIG_IP_ROUTE_CLASSID
                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        if (nla) {
                                if (nla_len(nla) < sizeof(u32)) {
                                        NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
                                        return -EINVAL;
                                }
                                if (nla_get_u32(nla) != nh->nh_tclassid)
                                        return 1;
                        }
#endif
                }

                rtnh = rtnh_next(rtnh, &remaining);
        } endfor_nexthops(fi);
#endif
        return 0;
}

bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
{
        struct nlattr *nla;
        int remaining;

        if (!cfg->fc_mx)
                return true;

        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
                int type = nla_type(nla);
                u32 fi_val, val;

                if (!type)
                        continue;
                if (type > RTAX_MAX)
                        return false;

                type = array_index_nospec(type, RTAX_MAX + 1);
                if (type == RTAX_CC_ALGO) {
                        char tmp[TCP_CA_NAME_MAX];
                        bool ecn_ca = false;

                        nla_strscpy(tmp, nla, sizeof(tmp));
                        val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
                } else {
                        if (nla_len(nla) != sizeof(u32))
                                return false;
                        val = nla_get_u32(nla);
                }

                fi_val = fi->fib_metrics->metrics[type - 1];
                if (type == RTAX_FEATURES)
                        fi_val &= ~DST_FEATURE_ECN_CA;

                if (fi_val != val)
                        return false;
        }

        return true;
}

static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
                              u32 table, struct netlink_ext_ack *extack)
{
        struct fib6_config cfg = {
                .fc_table = table,
                .fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
                .fc_ifindex = nh->fib_nh_oif,
                .fc_gateway = nh->fib_nh_gw6,
        };
        struct fib6_nh fib6_nh = {};
        int err;

        err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
        if (!err) {
                nh->fib_nh_dev = fib6_nh.fib_nh_dev;
                netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker,
                            GFP_KERNEL);
                nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
                nh->fib_nh_scope = RT_SCOPE_LINK;

                ipv6_stub->fib6_nh_release(&fib6_nh);
        }

        return err;
}

/*
 * Picture
 * -------
 *
 * Semantics of nexthop is very messy by historical reasons.
 * We have to take into account, that:
 * a) gateway can be actually local interface address,
 *    so that gatewayed route is direct.
 * b) gateway must be on-link address, possibly
 *    described not by an ifaddr, but also by a direct route.
 * c) If both gateway and interface are specified, they should not
 *    contradict.
 * d) If we use tunnel routes, gateway could be not on-link.
 *
 * Attempt to reconcile all of these (alas, self-contradictory) conditions
 * results in pretty ugly and hairy code with obscure logic.
 *
 * I chose to generalized it instead, so that the size
 * of code does not increase practically, but it becomes
 * much more general.
 * Every prefix is assigned a "scope" value: "host" is local address,
 * "link" is direct route,
 * [ ... "site" ... "interior" ... ]
 * and "universe" is true gateway route with global meaning.
 *
 * Every prefix refers to a set of "nexthop"s (gw, oif),
 * where gw must have narrower scope. This recursion stops
 * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
 * which means that gw is forced to be on link.
 *
 * Code is still hairy, but now it is apparently logically
 * consistent and very flexible. F.e. as by-product it allows
 * to co-exists in peace independent exterior and interior
 * routing processes.
 *
 * Normally it looks as following.
 *
 * {universe prefix}  -> (gw, oif) [scope link]
 *                  |
 *                  |-> {link prefix} -> (gw, oif) [scope local]
 *                                        |
 *                                        |-> {local prefix} (terminal node)
 */
static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
                              u8 scope, struct netlink_ext_ack *extack)
{
        struct net_device *dev;
        struct fib_result res;
        int err = 0;

        if (nh->fib_nh_flags & RTNH_F_ONLINK) {
                unsigned int addr_type;

                if (scope >= RT_SCOPE_LINK) {
                        NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
                        return -EINVAL;
                }
                dev = __dev_get_by_index(net, nh->fib_nh_oif);
                if (!dev) {
                        NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
                        return -ENODEV;
                }
                if (!(dev->flags & IFF_UP)) {
                        NL_SET_ERR_MSG(extack, "Nexthop device is not up");
                        return -ENETDOWN;
                }
                addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
                if (addr_type != RTN_UNICAST) {
                        NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
                        return -EINVAL;
                }
                if (!netif_carrier_ok(dev))
                        nh->fib_nh_flags |= RTNH_F_LINKDOWN;
                nh->fib_nh_dev = dev;
                netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
                nh->fib_nh_scope = RT_SCOPE_LINK;
                return 0;
        }
        rcu_read_lock();
        {
                struct fib_table *tbl = NULL;
                struct flowi4 fl4 = {
                        .daddr = nh->fib_nh_gw4,
                        .flowi4_scope = scope + 1,
                        .flowi4_oif = nh->fib_nh_oif,
                        .flowi4_iif = LOOPBACK_IFINDEX,
                };

                /* It is not necessary, but requires a bit of thinking */
                if (fl4.flowi4_scope < RT_SCOPE_LINK)
                        fl4.flowi4_scope = RT_SCOPE_LINK;

                if (table && table != RT_TABLE_MAIN)
                        tbl = fib_get_table(net, table);

                if (tbl)
                        err = fib_table_lookup(tbl, &fl4, &res,
                                               FIB_LOOKUP_IGNORE_LINKSTATE |
                                               FIB_LOOKUP_NOREF);

                /* on error or if no table given do full lookup. This
                 * is needed for example when nexthops are in the local
                 * table rather than the given table
                 */
                if (!tbl || err) {
                        err = fib_lookup(net, &fl4, &res,
                                         FIB_LOOKUP_IGNORE_LINKSTATE);
                }

                if (err) {
                        NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
                        goto out;
                }
        }

        err = -EINVAL;
        if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
                NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
                goto out;
        }
        nh->fib_nh_scope = res.scope;
        nh->fib_nh_oif = FIB_RES_OIF(res);
        nh->fib_nh_dev = dev = FIB_RES_DEV(res);
        if (!dev) {
                NL_SET_ERR_MSG(extack,
                               "No egress device for nexthop gateway");
                goto out;
        }
        netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
        if (!netif_carrier_ok(dev))
                nh->fib_nh_flags |= RTNH_F_LINKDOWN;
        err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
out:
        rcu_read_unlock();
        return err;
}

static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
                              struct netlink_ext_ack *extack)
{
        struct in_device *in_dev;
        int err;

        if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
                NL_SET_ERR_MSG(extack,
                               "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
                return -EINVAL;
        }

        rcu_read_lock();

        err = -ENODEV;
        in_dev = inetdev_by_index(net, nh->fib_nh_oif);
        if (!in_dev)
                goto out;
        err = -ENETDOWN;
        if (!(in_dev->dev->flags & IFF_UP)) {
                NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
                goto out;
        }

        nh->fib_nh_dev = in_dev->dev;
        netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
        nh->fib_nh_scope = RT_SCOPE_HOST;
        if (!netif_carrier_ok(nh->fib_nh_dev))
                nh->fib_nh_flags |= RTNH_F_LINKDOWN;
        err = 0;
out:
        rcu_read_unlock();
        return err;
}

int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
                 struct netlink_ext_ack *extack)
{
        int err;

        if (nh->fib_nh_gw_family == AF_INET)
                err = fib_check_nh_v4_gw(net, nh, table, scope, extack);
        else if (nh->fib_nh_gw_family == AF_INET6)
                err = fib_check_nh_v6_gw(net, nh, table, extack);
        else
                err = fib_check_nh_nongw(net, nh, extack);

        return err;
}

static struct hlist_head *
fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
        u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
                           fib_info_hash_bits);

        return &fib_info_laddrhash[slot];
}

static void fib_info_hash_move(struct hlist_head *new_info_hash,
                               struct hlist_head *new_laddrhash,
                               unsigned int new_size)
{
        struct hlist_head *old_info_hash, *old_laddrhash;
        unsigned int old_size = fib_info_hash_size;
        unsigned int i;

        spin_lock_bh(&fib_info_lock);
        old_info_hash = fib_info_hash;
        old_laddrhash = fib_info_laddrhash;
        fib_info_hash_size = new_size;
        fib_info_hash_bits = ilog2(new_size);

        for (i = 0; i < old_size; i++) {
                struct hlist_head *head = &fib_info_hash[i];
                struct hlist_node *n;
                struct fib_info *fi;

                hlist_for_each_entry_safe(fi, n, head, fib_hash) {
                        struct hlist_head *dest;
                        unsigned int new_hash;

                        new_hash = fib_info_hashfn(fi);
                        dest = &new_info_hash[new_hash];
                        hlist_add_head(&fi->fib_hash, dest);
                }
        }
        fib_info_hash = new_info_hash;

        fib_info_laddrhash = new_laddrhash;
        for (i = 0; i < old_size; i++) {
                struct hlist_head *lhead = &old_laddrhash[i];
                struct hlist_node *n;
                struct fib_info *fi;

                hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
                        struct hlist_head *ldest;

                        ldest = fib_info_laddrhash_bucket(fi->fib_net,
                                                          fi->fib_prefsrc);
                        hlist_add_head(&fi->fib_lhash, ldest);
                }
        }

        spin_unlock_bh(&fib_info_lock);

        kvfree(old_info_hash);
        kvfree(old_laddrhash);
}

__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
                                 unsigned char scope)
{
        struct fib_nh *nh;
        __be32 saddr;

        if (nhc->nhc_family != AF_INET)
                return inet_select_addr(nhc->nhc_dev, 0, scope);

        nh = container_of(nhc, struct fib_nh, nh_common);
        saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);

        WRITE_ONCE(nh->nh_saddr, saddr);
        WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid));

        return saddr;
}

__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
{
        struct fib_nh_common *nhc = res->nhc;

        if (res->fi->fib_prefsrc)
                return res->fi->fib_prefsrc;

        if (nhc->nhc_family == AF_INET) {
                struct fib_nh *nh;

                nh = container_of(nhc, struct fib_nh, nh_common);
                if (READ_ONCE(nh->nh_saddr_genid) ==
                    atomic_read(&net->ipv4.dev_addr_genid))
                        return READ_ONCE(nh->nh_saddr);
        }

        return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
}

static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
{
        if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
            fib_prefsrc != cfg->fc_dst) {
                u32 tb_id = cfg->fc_table;
                int rc;

                if (tb_id == RT_TABLE_MAIN)
                        tb_id = RT_TABLE_LOCAL;

                rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
                                          fib_prefsrc, tb_id);

                if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
                        rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
                                                  fib_prefsrc, RT_TABLE_LOCAL);
                }

                if (rc != RTN_LOCAL)
                        return false;
        }
        return true;
}

struct fib_info *fib_create_info(struct fib_config *cfg,
                                 struct netlink_ext_ack *extack)
{
        int err;
        struct fib_info *fi = NULL;
        struct nexthop *nh = NULL;
        struct fib_info *ofi;
        int nhs = 1;
        struct net *net = cfg->fc_nlinfo.nl_net;

        if (cfg->fc_type > RTN_MAX)
                goto err_inval;

        /* Fast check to catch the most weird cases */
        if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
                NL_SET_ERR_MSG(extack, "Invalid scope");
                goto err_inval;
        }

        if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
                NL_SET_ERR_MSG(extack,
                               "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
                goto err_inval;
        }

        if (cfg->fc_nh_id) {
                if (!cfg->fc_mx) {
                        fi = fib_find_info_nh(net, cfg);
                        if (fi) {
                                refcount_inc(&fi->fib_treeref);
                                return fi;
                        }
                }

                nh = nexthop_find_by_id(net, cfg->fc_nh_id);
                if (!nh) {
                        NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
                        goto err_inval;
                }
                nhs = 0;
        }

#ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (cfg->fc_mp) {
                nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
                if (nhs == 0)
                        goto err_inval;
        }
#endif

        err = -ENOBUFS;

        /* Paired with WRITE_ONCE() in fib_release_info() */
        if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
                unsigned int new_size = fib_info_hash_size << 1;
                struct hlist_head *new_info_hash;
                struct hlist_head *new_laddrhash;
                size_t bytes;

                if (!new_size)
                        new_size = 16;
                bytes = (size_t)new_size * sizeof(struct hlist_head *);
                new_info_hash = kvzalloc(bytes, GFP_KERNEL);
                new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
                if (!new_info_hash || !new_laddrhash) {
                        kvfree(new_info_hash);
                        kvfree(new_laddrhash);
                } else {
                        fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
                }
                if (!fib_info_hash_size)
                        goto failure;
        }

        fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
        if (!fi)
                goto failure;
        fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
                                              cfg->fc_mx_len, extack);
        if (IS_ERR(fi->fib_metrics)) {
                err = PTR_ERR(fi->fib_metrics);
                kfree(fi);
                return ERR_PTR(err);
        }

        fi->fib_net = net;
        fi->fib_protocol = cfg->fc_protocol;
        fi->fib_scope = cfg->fc_scope;
        fi->fib_flags = cfg->fc_flags;
        fi->fib_priority = cfg->fc_priority;
        fi->fib_prefsrc = cfg->fc_prefsrc;
        fi->fib_type = cfg->fc_type;
        fi->fib_tb_id = cfg->fc_table;

        fi->fib_nhs = nhs;
        if (nh) {
                if (!nexthop_get(nh)) {
                        NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
                        err = -EINVAL;
                } else {
                        err = 0;
                        fi->nh = nh;
                }
        } else {
                change_nexthops(fi) {
                        nexthop_nh->nh_parent = fi;
                } endfor_nexthops(fi)

                if (cfg->fc_mp)
                        err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg,
                                          extack);
                else
                        err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
        }

        if (err != 0)
                goto failure;

        if (fib_props[cfg->fc_type].error) {
                if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
                        NL_SET_ERR_MSG(extack,
                                       "Gateway, device and multipath can not be specified for this route type");
                        goto err_inval;
                }
                goto link_it;
        } else {
                switch (cfg->fc_type) {
                case RTN_UNICAST:
                case RTN_LOCAL:
                case RTN_BROADCAST:
                case RTN_ANYCAST:
                case RTN_MULTICAST:
                        break;
                default:
                        NL_SET_ERR_MSG(extack, "Invalid route type");
                        goto err_inval;
                }
        }

        if (cfg->fc_scope > RT_SCOPE_HOST) {
                NL_SET_ERR_MSG(extack, "Invalid scope");
                goto err_inval;
        }

        if (fi->nh) {
                err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);
                if (err)
                        goto failure;
        } else if (cfg->fc_scope == RT_SCOPE_HOST) {
                struct fib_nh *nh = fi->fib_nh;

                /* Local address is added. */
                if (nhs != 1) {
                        NL_SET_ERR_MSG(extack,
                                       "Route with host scope can not have multiple nexthops");
                        goto err_inval;
                }
                if (nh->fib_nh_gw_family) {
                        NL_SET_ERR_MSG(extack,
                                       "Route with host scope can not have a gateway");
                        goto err_inval;
                }
                nh->fib_nh_scope = RT_SCOPE_NOWHERE;
                nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
                err = -ENODEV;
                if (!nh->fib_nh_dev)
                        goto failure;
                netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker,
                                     GFP_KERNEL);
        } else {
                int linkdown = 0;

                change_nexthops(fi) {
                        err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
                                           cfg->fc_table, cfg->fc_scope,
                                           extack);
                        if (err != 0)
                                goto failure;
                        if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
                                linkdown++;
                } endfor_nexthops(fi)
                if (linkdown == fi->fib_nhs)
                        fi->fib_flags |= RTNH_F_LINKDOWN;
        }

        if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
                NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
                goto err_inval;
        }

        if (!fi->nh) {
                change_nexthops(fi) {
                        fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
                                                  fi->fib_scope);
                        if (nexthop_nh->fib_nh_gw_family == AF_INET6)
                                fi->fib_nh_is_v6 = true;
                } endfor_nexthops(fi)

                fib_rebalance(fi);
        }

link_it:
        ofi = fib_find_info(fi);
        if (ofi) {
                /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
                refcount_inc(&ofi->fib_treeref);
                return ofi;
        }

        refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock_bh(&fib_info_lock);
        fib_info_cnt++;
        hlist_add_head(&fi->fib_hash,
                       &fib_info_hash[fib_info_hashfn(fi)]);
        if (fi->fib_prefsrc) {
                struct hlist_head *head;

                head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
                hlist_add_head(&fi->fib_lhash, head);
        }
        if (fi->nh) {
                list_add(&fi->nh_list, &nh->fi_list);
        } else {
                change_nexthops(fi) {
                        struct hlist_head *head;

                        if (!nexthop_nh->fib_nh_dev)
                                continue;
                        head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
                        hlist_add_head(&nexthop_nh->nh_hash, head);
                } endfor_nexthops(fi)
        }
        spin_unlock_bh(&fib_info_lock);
        return fi;

err_inval:
        err = -EINVAL;

failure:
        if (fi) {
                /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
        }

        return ERR_PTR(err);
}

int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
                     u8 rt_family, unsigned char *flags, bool skip_oif)
{
        if (nhc->nhc_flags & RTNH_F_DEAD)
                *flags |= RTNH_F_DEAD;

        if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
                *flags |= RTNH_F_LINKDOWN;

                rcu_read_lock();
                switch (nhc->nhc_family) {
                case AF_INET:
                        if (ip_ignore_linkdown(nhc->nhc_dev))
                                *flags |= RTNH_F_DEAD;
                        break;
                case AF_INET6:
                        if (ip6_ignore_linkdown(nhc->nhc_dev))
                                *flags |= RTNH_F_DEAD;
                        break;
                }
                rcu_read_unlock();
        }

        switch (nhc->nhc_gw_family) {
        case AF_INET:
                if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
                        goto nla_put_failure;
                break;
        case AF_INET6:
                /* if gateway family does not match nexthop family
                 * gateway is encoded as RTA_VIA
                 */
                if (rt_family != nhc->nhc_gw_family) {
                        int alen = sizeof(struct in6_addr);
                        struct nlattr *nla;
                        struct rtvia *via;

                        nla = nla_reserve(skb, RTA_VIA, alen + 2);
                        if (!nla)
                                goto nla_put_failure;

                        via = nla_data(nla);
                        via->rtvia_family = AF_INET6;
                        memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
                } else if (nla_put_in6_addr(skb, RTA_GATEWAY,
                                            &nhc->nhc_gw.ipv6) < 0) {
                        goto nla_put_failure;
                }
                break;
        }

        *flags |= (nhc->nhc_flags &
                   (RTNH_F_ONLINK | RTNH_F_OFFLOAD | RTNH_F_TRAP));

        if (!skip_oif && nhc->nhc_dev &&
            nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
                goto nla_put_failure;

        if (nhc->nhc_lwtstate &&
            lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
                                RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
                goto nla_put_failure;

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(fib_nexthop_info);

#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
                    int nh_weight, u8 rt_family, u32 nh_tclassid)
{
        const struct net_device *dev = nhc->nhc_dev;
        struct rtnexthop *rtnh;
        unsigned char flags = 0;

        rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
        if (!rtnh)
                goto nla_put_failure;

        rtnh->rtnh_hops = nh_weight - 1;
        rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;

        if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0)
                goto nla_put_failure;

        rtnh->rtnh_flags = flags;

        if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
                goto nla_put_failure;

        /* length of rtnetlink header + attributes */
        rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(fib_add_nexthop);
#endif

#ifdef CONFIG_IP_ROUTE_MULTIPATH
static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
{
        struct nlattr *mp;

        mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
        if (!mp)
                goto nla_put_failure;

        if (unlikely(fi->nh)) {
                if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0)
                        goto nla_put_failure;
                goto mp_end;
        }

        for_nexthops(fi) {
                u32 nh_tclassid = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
                nh_tclassid = nh->nh_tclassid;
#endif
                if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
                                    AF_INET, nh_tclassid) < 0)
                        goto nla_put_failure;
        } endfor_nexthops(fi);

mp_end:
        nla_nest_end(skb, mp);

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}
#else
static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
{
        return 0;
}
#endif

int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
                  const struct fib_rt_info *fri, unsigned int flags)
{
        unsigned int nhs = fib_info_num_path(fri->fi);
        struct fib_info *fi = fri->fi;
        u32 tb_id = fri->tb_id;
        struct nlmsghdr *nlh;
        struct rtmsg *rtm;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
        if (!nlh)
                return -EMSGSIZE;

        rtm = nlmsg_data(nlh);
        rtm->rtm_family = AF_INET;
        rtm->rtm_dst_len = fri->dst_len;
        rtm->rtm_src_len = 0;
        rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
        if (tb_id < 256)
                rtm->rtm_table = tb_id;
        else
                rtm->rtm_table = RT_TABLE_COMPAT;
        if (nla_put_u32(skb, RTA_TABLE, tb_id))
                goto nla_put_failure;
        rtm->rtm_type = fri->type;
        rtm->rtm_flags = fi->fib_flags;
        rtm->rtm_scope = fi->fib_scope;
        rtm->rtm_protocol = fi->fib_protocol;

        if (rtm->rtm_dst_len &&
            nla_put_in_addr(skb, RTA_DST, fri->dst))
                goto nla_put_failure;
        if (fi->fib_priority &&
            nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
                goto nla_put_failure;
        if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
                goto nla_put_failure;

        if (fi->fib_prefsrc &&
            nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
                goto nla_put_failure;

        if (fi->nh) {
                if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
                        goto nla_put_failure;
                if (nexthop_is_blackhole(fi->nh))
                        rtm->rtm_type = RTN_BLACKHOLE;
                if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
                        goto offload;
        }

        if (nhs == 1) {
                const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
                unsigned char flags = 0;

                if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0)
                        goto nla_put_failure;

                rtm->rtm_flags = flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
                if (nhc->nhc_family == AF_INET) {
                        struct fib_nh *nh;

                        nh = container_of(nhc, struct fib_nh, nh_common);
                        if (nh->nh_tclassid &&
                            nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
                                goto nla_put_failure;
                }
#endif
        } else {
                if (fib_add_multipath(skb, fi) < 0)
                        goto nla_put_failure;
        }

offload:
        if (fri->offload)
                rtm->rtm_flags |= RTM_F_OFFLOAD;
        if (fri->trap)
                rtm->rtm_flags |= RTM_F_TRAP;
        if (fri->offload_failed)
                rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

/*
 * Update FIB if:
 * - local address disappeared -> we must delete all the entries
 *   referring to it.
 * - device went down -> we must shutdown all nexthops going via it.
 */
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
        int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
        struct net *net = dev_net(dev);
        struct hlist_head *head;
        struct fib_info *fi;
        int ret = 0;

        if (!fib_info_laddrhash || local == 0)
                return 0;

        head = fib_info_laddrhash_bucket(net, local);
        hlist_for_each_entry(fi, head, fib_lhash) {
                if (!net_eq(fi->fib_net, net) ||
                    fi->fib_tb_id != tb_id)
                        continue;
                if (fi->fib_prefsrc == local) {
                        fi->fib_flags |= RTNH_F_DEAD;
                        fi->pfsrc_removed = true;
                        ret++;
                }
        }
        return ret;
}

static int call_fib_nh_notifiers(struct fib_nh *nh,
                                 enum fib_event_type event_type)
{
        bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
        struct fib_nh_notifier_info info = {
                .fib_nh = nh,
        };

        switch (event_type) {
        case FIB_EVENT_NH_ADD:
                if (nh->fib_nh_flags & RTNH_F_DEAD)
                        break;
                if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
                        break;
                return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
                                           &info.info);
        case FIB_EVENT_NH_DEL:
                if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
                    (nh->fib_nh_flags & RTNH_F_DEAD))
                        return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
                                                   event_type, &info.info);
                break;
        default:
                break;
        }

        return NOTIFY_DONE;
}

/* Update the PMTU of exceptions when:
 * - the new MTU of the first hop becomes smaller than the PMTU
 * - the old MTU was the same as the PMTU, and it limited discovery of
 *   larger MTUs on the path. With that limit raised, we can now
 *   discover larger MTUs
 * A special case is locked exceptions, for which the PMTU is smaller
 * than the minimal accepted PMTU:
 * - if the new MTU is greater than the PMTU, don't make any change
 * - otherwise, unlock and set PMTU
 */
void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{
        struct fnhe_hash_bucket *bucket;
        int i;

        bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
        if (!bucket)
                return;

        for (i = 0; i < FNHE_HASH_SIZE; i++) {
                struct fib_nh_exception *fnhe;

                for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
                     fnhe;
                     fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
                        if (fnhe->fnhe_mtu_locked) {
                                if (new <= fnhe->fnhe_pmtu) {
                                        fnhe->fnhe_pmtu = new;
                                        fnhe->fnhe_mtu_locked = false;
                                }
                        } else if (new < fnhe->fnhe_pmtu ||
                                   orig == fnhe->fnhe_pmtu) {
                                fnhe->fnhe_pmtu = new;
                        }
                }
        }
}

void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
        struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_nh *nh;

        hlist_for_each_entry(nh, head, nh_hash) {
                if (nh->fib_nh_dev == dev)
                        fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
        }
}

/* Event              force Flags           Description
 * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
 * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
 * NETDEV_DOWN        1     LINKDOWN|DEAD   Last address removed
 * NETDEV_UNREGISTER  1     LINKDOWN|DEAD   Device removed
 *
 * only used when fib_nh is built into fib_info
 */
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
        struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_info *prev_fi = NULL;
        int scope = RT_SCOPE_NOWHERE;
        struct fib_nh *nh;
        int ret = 0;

        if (force)
                scope = -1;

        hlist_for_each_entry(nh, head, nh_hash) {
                struct fib_info *fi = nh->nh_parent;
                int dead;

                BUG_ON(!fi->fib_nhs);
                if (nh->fib_nh_dev != dev || fi == prev_fi)
                        continue;
                prev_fi = fi;
                dead = 0;
                change_nexthops(fi) {
                        if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
                                dead++;
                        else if (nexthop_nh->fib_nh_dev == dev &&
                                 nexthop_nh->fib_nh_scope != scope) {
                                switch (event) {
                                case NETDEV_DOWN:
                                case NETDEV_UNREGISTER:
                                        nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
                                        fallthrough;
                                case NETDEV_CHANGE:
                                        nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
                                        break;
                                }
                                call_fib_nh_notifiers(nexthop_nh,
                                                      FIB_EVENT_NH_DEL);
                                dead++;
                        }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
                        if (event == NETDEV_UNREGISTER &&
                            nexthop_nh->fib_nh_dev == dev) {
                                dead = fi->fib_nhs;
                                break;
                        }
#endif
                } endfor_nexthops(fi)
                if (dead == fi->fib_nhs) {
                        switch (event) {
                        case NETDEV_DOWN:
                        case NETDEV_UNREGISTER:
                                fi->fib_flags |= RTNH_F_DEAD;
                                fallthrough;
                        case NETDEV_CHANGE:
                                fi->fib_flags |= RTNH_F_LINKDOWN;
                                break;
                        }
                        ret++;
                }

                fib_rebalance(fi);
        }

        return ret;
}

/* Must be invoked inside of an RCU protected region.  */
static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
{
        struct fib_info *fi = NULL, *last_resort = NULL;
        struct hlist_head *fa_head = res->fa_head;
        struct fib_table *tb = res->table;
        u8 slen = 32 - res->prefixlen;
        int order = -1, last_idx = -1;
        struct fib_alias *fa, *fa1 = NULL;
        u32 last_prio = res->fi->fib_priority;
        dscp_t last_dscp = 0;

        hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
                struct fib_info *next_fi = fa->fa_info;
                struct fib_nh_common *nhc;

                if (fa->fa_slen != slen)
                        continue;
                if (fa->fa_dscp &&
                    fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
                        continue;
                if (fa->tb_id != tb->tb_id)
                        continue;
                if (next_fi->fib_priority > last_prio &&
                    fa->fa_dscp == last_dscp) {
                        if (last_dscp)
                                continue;
                        break;
                }
                if (next_fi->fib_flags & RTNH_F_DEAD)
                        continue;
                last_dscp = fa->fa_dscp;
                last_prio = next_fi->fib_priority;

                if (next_fi->fib_scope != res->scope ||
                    fa->fa_type != RTN_UNICAST)
                        continue;

                nhc = fib_info_nhc(next_fi, 0);
                if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK)
                        continue;

                fib_alias_accessed(fa);

                if (!fi) {
                        if (next_fi != res->fi)
                                break;
                        fa1 = fa;
                } else if (!fib_detect_death(fi, order, &last_resort,
                                             &last_idx, fa1->fa_default)) {
                        fib_result_assign(res, fi);
                        fa1->fa_default = order;
                        goto out;
                }
                fi = next_fi;
                order++;
        }

        if (order <= 0 || !fi) {
                if (fa1)
                        fa1->fa_default = -1;
                goto out;
        }

        if (!fib_detect_death(fi, order, &last_resort, &last_idx,
                              fa1->fa_default)) {
                fib_result_assign(res, fi);
                fa1->fa_default = order;
                goto out;
        }

        if (last_idx >= 0)
                fib_result_assign(res, last_resort);
        fa1->fa_default = last_idx;
out:
        return;
}

/*
 * Dead device goes up. We wake up dead nexthops.
 * It takes sense only on multipath routes.
 *
 * only used when fib_nh is built into fib_info
 */
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
        struct fib_info *prev_fi;
        struct hlist_head *head;
        struct fib_nh *nh;
        int ret;

        if (!(dev->flags & IFF_UP))
                return 0;

        if (nh_flags & RTNH_F_DEAD) {
                unsigned int flags = dev_get_flags(dev);

                if (flags & (IFF_RUNNING | IFF_LOWER_UP))
                        nh_flags |= RTNH_F_LINKDOWN;
        }

        prev_fi = NULL;
        head = fib_info_devhash_bucket(dev);
        ret = 0;

        hlist_for_each_entry(nh, head, nh_hash) {
                struct fib_info *fi = nh->nh_parent;
                int alive;

                BUG_ON(!fi->fib_nhs);
                if (nh->fib_nh_dev != dev || fi == prev_fi)
                        continue;

                prev_fi = fi;
                alive = 0;
                change_nexthops(fi) {
                        if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
                                alive++;
                                continue;
                        }
                        if (!nexthop_nh->fib_nh_dev ||
                            !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
                                continue;
                        if (nexthop_nh->fib_nh_dev != dev ||
                            !__in_dev_get_rtnl(dev))
                                continue;
                        alive++;
                        nexthop_nh->fib_nh_flags &= ~nh_flags;
                        call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
                } endfor_nexthops(fi)

                if (alive > 0) {
                        fi->fib_flags &= ~nh_flags;
                        ret++;
                }

                fib_rebalance(fi);
        }

        return ret;
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH
static bool fib_good_nh(const struct fib_nh *nh)
{
        int state = NUD_REACHABLE;

        if (nh->fib_nh_scope == RT_SCOPE_LINK) {
                struct neighbour *n;

                rcu_read_lock();

                if (likely(nh->fib_nh_gw_family == AF_INET))
                        n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
                                                   (__force u32)nh->fib_nh_gw4);
                else if (nh->fib_nh_gw_family == AF_INET6)
                        n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
                                                           &nh->fib_nh_gw6);
                else
                        n = NULL;
                if (n)
                        state = READ_ONCE(n->nud_state);

                rcu_read_unlock();
        }

        return !!(state & NUD_VALID);
}

void fib_select_multipath(struct fib_result *res, int hash)
{
        struct fib_info *fi = res->fi;
        struct net *net = fi->fib_net;
        bool first = false;

        if (unlikely(res->fi->nh)) {
                nexthop_path_fib_result(res, hash);
                return;
        }

        change_nexthops(fi) {
                if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
                        if (!fib_good_nh(nexthop_nh))
                                continue;
                        if (!first) {
                                res->nh_sel = nhsel;
                                res->nhc = &nexthop_nh->nh_common;
                                first = true;
                        }
                }

                if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
                        continue;

                res->nh_sel = nhsel;
                res->nhc = &nexthop_nh->nh_common;
                return;
        } endfor_nexthops(fi);
}
#endif

void fib_select_path(struct net *net, struct fib_result *res,
                     struct flowi4 *fl4, const struct sk_buff *skb)
{
        if (fl4->flowi4_oif)
                goto check_saddr;

#ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (fib_info_num_path(res->fi) > 1) {
                int h = fib_multipath_hash(net, fl4, skb, NULL);

                fib_select_multipath(res, h);
        }
        else
#endif
        if (!res->prefixlen &&
            res->table->tb_num_default > 1 &&
            res->type == RTN_UNICAST)
                fib_select_default(fl4, res);

check_saddr:
        if (!fl4->saddr)
                fl4->saddr = fib_result_prefsrc(net, res);
}










































































































































































































































































































































































































































































































































































































































































































































    4 
   28 












    3 
















    3 







    3 
























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
/* SPDX-License-Identifier: GPL-2.0 */
/* interrupt.h */
#ifndef _LINUX_INTERRUPT_H
#define _LINUX_INTERRUPT_H

#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/cpumask.h>
#include <linux/irqreturn.h>
#include <linux/irqnr.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/hrtimer.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
#include <linux/jump_label.h>

#include <linux/atomic.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/sections.h>

/*
 * These correspond to the IORESOURCE_IRQ_* defines in
 * linux/ioport.h to select the interrupt line behaviour.  When
 * requesting an interrupt without specifying a IRQF_TRIGGER, the
 * setting should be assumed to be "as already configured", which
 * may be as per machine or firmware initialisation.
 */
#define IRQF_TRIGGER_NONE        0x00000000
#define IRQF_TRIGGER_RISING        0x00000001
#define IRQF_TRIGGER_FALLING        0x00000002
#define IRQF_TRIGGER_HIGH        0x00000004
#define IRQF_TRIGGER_LOW        0x00000008
#define IRQF_TRIGGER_MASK        (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | \
                                 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)
#define IRQF_TRIGGER_PROBE        0x00000010

/*
 * These flags used only by the kernel as part of the
 * irq handling routines.
 *
 * IRQF_SHARED - allow sharing the irq among several devices
 * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur
 * IRQF_TIMER - Flag to mark this interrupt as timer interrupt
 * IRQF_PERCPU - Interrupt is per cpu
 * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing
 * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is
 *                registered first in a shared interrupt is considered for
 *                performance reasons)
 * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
 *                Used by threaded interrupts which need to keep the
 *                irq line disabled until the threaded handler has been run.
 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend.  Does not guarantee
 *                   that this interrupt will wake the system from a suspended
 *                   state.  See Documentation/power/suspend-and-interrupts.rst
 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
 * IRQF_NO_THREAD - Interrupt cannot be threaded
 * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
 *                resume time.
 * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this
 *                interrupt handler after suspending interrupts. For system
 *                wakeup devices users need to implement wakeup detection in
 *                their interrupt handlers.
 * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it.
 *                Users will enable it explicitly by enable_irq() or enable_nmi()
 *                later.
 * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
 *                   depends on IRQF_PERCPU.
 * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared
 *                 interrupt.
 */
#define IRQF_SHARED                0x00000080
#define IRQF_PROBE_SHARED        0x00000100
#define __IRQF_TIMER                0x00000200
#define IRQF_PERCPU                0x00000400
#define IRQF_NOBALANCING        0x00000800
#define IRQF_IRQPOLL                0x00001000
#define IRQF_ONESHOT                0x00002000
#define IRQF_NO_SUSPEND                0x00004000
#define IRQF_FORCE_RESUME        0x00008000
#define IRQF_NO_THREAD                0x00010000
#define IRQF_EARLY_RESUME        0x00020000
#define IRQF_COND_SUSPEND        0x00040000
#define IRQF_NO_AUTOEN                0x00080000
#define IRQF_NO_DEBUG                0x00100000
#define IRQF_COND_ONESHOT        0x00200000

#define IRQF_TIMER                (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)

/*
 * These values can be returned by request_any_context_irq() and
 * describe the context the interrupt will be run in.
 *
 * IRQC_IS_HARDIRQ - interrupt runs in hardirq context
 * IRQC_IS_NESTED - interrupt runs in a nested threaded context
 */
enum {
        IRQC_IS_HARDIRQ        = 0,
        IRQC_IS_NESTED,
};

typedef irqreturn_t (*irq_handler_t)(int, void *);

/**
 * struct irqaction - per interrupt action descriptor
 * @handler:        interrupt handler function
 * @name:        name of the device
 * @dev_id:        cookie to identify the device
 * @percpu_dev_id:        cookie to identify the device
 * @next:        pointer to the next irqaction for shared interrupts
 * @irq:        interrupt number
 * @flags:        flags (see IRQF_* above)
 * @thread_fn:        interrupt handler function for threaded interrupts
 * @thread:        thread pointer for threaded interrupts
 * @secondary:        pointer to secondary irqaction (force threading)
 * @thread_flags:        flags related to @thread
 * @thread_mask:        bitmask for keeping track of @thread activity
 * @dir:        pointer to the proc/irq/NN/name entry
 */
struct irqaction {
        irq_handler_t                handler;
        void                        *dev_id;
        void __percpu                *percpu_dev_id;
        struct irqaction        *next;
        irq_handler_t                thread_fn;
        struct task_struct        *thread;
        struct irqaction        *secondary;
        unsigned int                irq;
        unsigned int                flags;
        unsigned long                thread_flags;
        unsigned long                thread_mask;
        const char                *name;
        struct proc_dir_entry        *dir;
} ____cacheline_internodealigned_in_smp;

extern irqreturn_t no_action(int cpl, void *dev_id);

/*
 * If a (PCI) device interrupt is not connected we set dev->irq to
 * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
 * can distingiush that case from other error returns.
 *
 * 0x80000000 is guaranteed to be outside the available range of interrupts
 * and easy to distinguish from other possible incorrect values.
 */
#define IRQ_NOTCONNECTED        (1U << 31)

extern int __must_check
request_threaded_irq(unsigned int irq, irq_handler_t handler,
                     irq_handler_t thread_fn,
                     unsigned long flags, const char *name, void *dev);

/**
 * request_irq - Add a handler for an interrupt line
 * @irq:        The interrupt line to allocate
 * @handler:        Function to be called when the IRQ occurs.
 *                Primary handler for threaded interrupts
 *                If NULL, the default primary handler is installed
 * @flags:        Handling flags
 * @name:        Name of the device generating this interrupt
 * @dev:        A cookie passed to the handler function
 *
 * This call allocates an interrupt and establishes a handler; see
 * the documentation for request_threaded_irq() for details.
 */
static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
            const char *name, void *dev)
{
        return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}

extern int __must_check
request_any_context_irq(unsigned int irq, irq_handler_t handler,
                        unsigned long flags, const char *name, void *dev_id);

extern int __must_check
__request_percpu_irq(unsigned int irq, irq_handler_t handler,
                     unsigned long flags, const char *devname,
                     void __percpu *percpu_dev_id);

extern int __must_check
request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags,
            const char *name, void *dev);

static inline int __must_check
request_percpu_irq(unsigned int irq, irq_handler_t handler,
                   const char *devname, void __percpu *percpu_dev_id)
{
        return __request_percpu_irq(irq, handler, 0,
                                    devname, percpu_dev_id);
}

extern int __must_check
request_percpu_nmi(unsigned int irq, irq_handler_t handler,
                   const char *devname, void __percpu *dev);

extern const void *free_irq(unsigned int, void *);
extern void free_percpu_irq(unsigned int, void __percpu *);

extern const void *free_nmi(unsigned int irq, void *dev_id);
extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id);

struct device;

extern int __must_check
devm_request_threaded_irq(struct device *dev, unsigned int irq,
                          irq_handler_t handler, irq_handler_t thread_fn,
                          unsigned long irqflags, const char *devname,
                          void *dev_id);

static inline int __must_check
devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
                 unsigned long irqflags, const char *devname, void *dev_id)
{
        return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
                                         devname, dev_id);
}

extern int __must_check
devm_request_any_context_irq(struct device *dev, unsigned int irq,
                 irq_handler_t handler, unsigned long irqflags,
                 const char *devname, void *dev_id);

extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);

bool irq_has_action(unsigned int irq);
extern void disable_irq_nosync(unsigned int irq);
extern bool disable_hardirq(unsigned int irq);
extern void disable_irq(unsigned int irq);
extern void disable_percpu_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
extern void enable_percpu_irq(unsigned int irq, unsigned int type);
extern bool irq_percpu_is_enabled(unsigned int irq);
extern void irq_wake_thread(unsigned int irq, void *dev_id);

extern void disable_nmi_nosync(unsigned int irq);
extern void disable_percpu_nmi(unsigned int irq);
extern void enable_nmi(unsigned int irq);
extern void enable_percpu_nmi(unsigned int irq, unsigned int type);
extern int prepare_percpu_nmi(unsigned int irq);
extern void teardown_percpu_nmi(unsigned int irq);

extern int irq_inject_interrupt(unsigned int irq);

/* The following three functions are for the core kernel use only. */
extern void suspend_device_irqs(void);
extern void resume_device_irqs(void);
extern void rearm_wake_irq(unsigned int irq);

/**
 * struct irq_affinity_notify - context for notification of IRQ affinity changes
 * @irq:                Interrupt to which notification applies
 * @kref:                Reference count, for internal use
 * @work:                Work item, for internal use
 * @notify:                Function to be called on change.  This will be
 *                        called in process context.
 * @release:                Function to be called on release.  This will be
 *                        called in process context.  Once registered, the
 *                        structure must only be freed when this function is
 *                        called or later.
 */
struct irq_affinity_notify {
        unsigned int irq;
        struct kref kref;
        struct work_struct work;
        void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
        void (*release)(struct kref *ref);
};

#define        IRQ_AFFINITY_MAX_SETS  4

/**
 * struct irq_affinity - Description for automatic irq affinity assignements
 * @pre_vectors:        Don't apply affinity to @pre_vectors at beginning of
 *                        the MSI(-X) vector space
 * @post_vectors:        Don't apply affinity to @post_vectors at end of
 *                        the MSI(-X) vector space
 * @nr_sets:                The number of interrupt sets for which affinity
 *                        spreading is required
 * @set_size:                Array holding the size of each interrupt set
 * @calc_sets:                Callback for calculating the number and size
 *                        of interrupt sets
 * @priv:                Private data for usage by @calc_sets, usually a
 *                        pointer to driver/device specific data.
 */
struct irq_affinity {
        unsigned int        pre_vectors;
        unsigned int        post_vectors;
        unsigned int        nr_sets;
        unsigned int        set_size[IRQ_AFFINITY_MAX_SETS];
        void                (*calc_sets)(struct irq_affinity *, unsigned int nvecs);
        void                *priv;
};

/**
 * struct irq_affinity_desc - Interrupt affinity descriptor
 * @mask:        cpumask to hold the affinity assignment
 * @is_managed: 1 if the interrupt is managed internally
 */
struct irq_affinity_desc {
        struct cpumask        mask;
        unsigned int        is_managed : 1;
};

#if defined(CONFIG_SMP)

extern cpumask_var_t irq_default_affinity;

extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask);

extern int irq_can_set_affinity(unsigned int irq);
extern int irq_select_affinity(unsigned int irq);

extern int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
                                     bool setaffinity);

/**
 * irq_update_affinity_hint - Update the affinity hint
 * @irq:        Interrupt to update
 * @m:                cpumask pointer (NULL to clear the hint)
 *
 * Updates the affinity hint, but does not change the affinity of the interrupt.
 */
static inline int
irq_update_affinity_hint(unsigned int irq, const struct cpumask *m)
{
        return __irq_apply_affinity_hint(irq, m, false);
}

/**
 * irq_set_affinity_and_hint - Update the affinity hint and apply the provided
 *                             cpumask to the interrupt
 * @irq:        Interrupt to update
 * @m:                cpumask pointer (NULL to clear the hint)
 *
 * Updates the affinity hint and if @m is not NULL it applies it as the
 * affinity of that interrupt.
 */
static inline int
irq_set_affinity_and_hint(unsigned int irq, const struct cpumask *m)
{
        return __irq_apply_affinity_hint(irq, m, true);
}

/*
 * Deprecated. Use irq_update_affinity_hint() or irq_set_affinity_and_hint()
 * instead.
 */
static inline int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
{
        return irq_set_affinity_and_hint(irq, m);
}

extern int irq_update_affinity_desc(unsigned int irq,
                                    struct irq_affinity_desc *affinity);

extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);

struct irq_affinity_desc *
irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd);

unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
                                       const struct irq_affinity *affd);

#else /* CONFIG_SMP */

static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
{
        return -EINVAL;
}

static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
{
        return 0;
}

static inline int irq_can_set_affinity(unsigned int irq)
{
        return 0;
}

static inline int irq_select_affinity(unsigned int irq)  { return 0; }

static inline int irq_update_affinity_hint(unsigned int irq,
                                           const struct cpumask *m)
{
        return -EINVAL;
}

static inline int irq_set_affinity_and_hint(unsigned int irq,
                                            const struct cpumask *m)
{
        return -EINVAL;
}

static inline int irq_set_affinity_hint(unsigned int irq,
                                        const struct cpumask *m)
{
        return -EINVAL;
}

static inline int irq_update_affinity_desc(unsigned int irq,
                                           struct irq_affinity_desc *affinity)
{
        return -EINVAL;
}

static inline int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
{
        return 0;
}

static inline struct irq_affinity_desc *
irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd)
{
        return NULL;
}

static inline unsigned int
irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
                          const struct irq_affinity *affd)
{
        return maxvec;
}

#endif /* CONFIG_SMP */

/*
 * Special lockdep variants of irq disabling/enabling.
 * These should be used for locking constructs that
 * know that a particular irq context which is disabled,
 * and which is the only irq-context user of a lock,
 * that it's safe to take the lock in the irq-disabled
 * section without disabling hardirqs.
 *
 * On !CONFIG_LOCKDEP they are equivalent to the normal
 * irq disable/enable methods.
 */
static inline void disable_irq_nosync_lockdep(unsigned int irq)
{
        disable_irq_nosync(irq);
#ifdef CONFIG_LOCKDEP
        local_irq_disable();
#endif
}

static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags)
{
        disable_irq_nosync(irq);
#ifdef CONFIG_LOCKDEP
        local_irq_save(*flags);
#endif
}

static inline void disable_irq_lockdep(unsigned int irq)
{
        disable_irq(irq);
#ifdef CONFIG_LOCKDEP
        local_irq_disable();
#endif
}

static inline void enable_irq_lockdep(unsigned int irq)
{
#ifdef CONFIG_LOCKDEP
        local_irq_enable();
#endif
        enable_irq(irq);
}

static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags)
{
#ifdef CONFIG_LOCKDEP
        local_irq_restore(*flags);
#endif
        enable_irq(irq);
}

/* IRQ wakeup (PM) control: */
extern int irq_set_irq_wake(unsigned int irq, unsigned int on);

static inline int enable_irq_wake(unsigned int irq)
{
        return irq_set_irq_wake(irq, 1);
}

static inline int disable_irq_wake(unsigned int irq)
{
        return irq_set_irq_wake(irq, 0);
}

/*
 * irq_get_irqchip_state/irq_set_irqchip_state specific flags
 */
enum irqchip_irq_state {
        IRQCHIP_STATE_PENDING,                /* Is interrupt pending? */
        IRQCHIP_STATE_ACTIVE,                /* Is interrupt in progress? */
        IRQCHIP_STATE_MASKED,                /* Is interrupt masked? */
        IRQCHIP_STATE_LINE_LEVEL,        /* Is IRQ line high? */
};

extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
                                 bool *state);
extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
                                 bool state);

#ifdef CONFIG_IRQ_FORCED_THREADING
# ifdef CONFIG_PREEMPT_RT
#  define force_irqthreads()        (true)
# else
DECLARE_STATIC_KEY_FALSE(force_irqthreads_key);
#  define force_irqthreads()        (static_branch_unlikely(&force_irqthreads_key))
# endif
#else
#define force_irqthreads()        (false)
#endif

#ifndef local_softirq_pending

#ifndef local_softirq_pending_ref
#define local_softirq_pending_ref irq_stat.__softirq_pending
#endif

#define local_softirq_pending()        (__this_cpu_read(local_softirq_pending_ref))
#define set_softirq_pending(x)        (__this_cpu_write(local_softirq_pending_ref, (x)))
#define or_softirq_pending(x)        (__this_cpu_or(local_softirq_pending_ref, (x)))

#endif /* local_softirq_pending */

/* Some architectures might implement lazy enabling/disabling of
 * interrupts. In some cases, such as stop_machine, we might want
 * to ensure that after a local_irq_disable(), interrupts have
 * really been disabled in hardware. Such architectures need to
 * implement the following hook.
 */
#ifndef hard_irq_disable
#define hard_irq_disable()        do { } while(0)
#endif

/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
   frequency threaded job scheduling. For almost all the purposes
   tasklets are more than enough. F.e. all serial device BHs et
   al. should be converted to tasklets, not to softirqs.
 */

enum
{
        HI_SOFTIRQ=0,
        TIMER_SOFTIRQ,
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
        BLOCK_SOFTIRQ,
        IRQ_POLL_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
        HRTIMER_SOFTIRQ,
        RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

        NR_SOFTIRQS
};

/*
 * The following vectors can be safely ignored after ksoftirqd is parked:
 *
 * _ RCU:
 *         1) rcutree_migrate_callbacks() migrates the queue.
 *         2) rcutree_report_cpu_dead() reports the final quiescent states.
 *
 * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
 *
 * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue
 */
#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\
                                   BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ))


/* map softirq index to softirq name. update 'softirq_to_name' in
 * kernel/softirq.c when adding a new softirq.
 */
extern const char * const softirq_to_name[NR_SOFTIRQS];

/* softirq mask and active fields moved to irq_cpustat_t in
 * asm/hardirq.h to get better cache usage.  KAO
 */

struct softirq_action
{
        void        (*action)(struct softirq_action *);
};

asmlinkage void do_softirq(void);
asmlinkage void __do_softirq(void);

#ifdef CONFIG_PREEMPT_RT
extern void do_softirq_post_smp_call_flush(unsigned int was_pending);
#else
static inline void do_softirq_post_smp_call_flush(unsigned int unused)
{
        do_softirq();
}
#endif

extern void open_softirq(int nr, void (*action)(struct softirq_action *));
extern void softirq_init(void);
extern void __raise_softirq_irqoff(unsigned int nr);

extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);

DECLARE_PER_CPU(struct task_struct *, ksoftirqd);

static inline struct task_struct *this_cpu_ksoftirqd(void)
{
        return this_cpu_read(ksoftirqd);
}

/* Tasklets --- multithreaded analogue of BHs.

   This API is deprecated. Please consider using threaded IRQs instead:
   https://lore.kernel.org/lkml/20200716081538.2sivhkj4hcyrusem@linutronix.de

   Main feature differing them of generic softirqs: tasklet
   is running only on one CPU simultaneously.

   Main feature differing them of BHs: different tasklets
   may be run simultaneously on different CPUs.

   Properties:
   * If tasklet_schedule() is called, then tasklet is guaranteed
     to be executed on some cpu at least once after this.
   * If the tasklet is already scheduled, but its execution is still not
     started, it will be executed only once.
   * If this tasklet is already running on another CPU (or schedule is called
     from tasklet itself), it is rescheduled for later.
   * Tasklet is strictly serialized wrt itself, but not
     wrt another tasklets. If client needs some intertask synchronization,
     he makes it with spinlocks.
 */

struct tasklet_struct
{
        struct tasklet_struct *next;
        unsigned long state;
        atomic_t count;
        bool use_callback;
        union {
                void (*func)(unsigned long data);
                void (*callback)(struct tasklet_struct *t);
        };
        unsigned long data;
};

#define DECLARE_TASKLET(name, _callback)                \
struct tasklet_struct name = {                                \
        .count = ATOMIC_INIT(0),                        \
        .callback = _callback,                                \
        .use_callback = true,                                \
}

#define DECLARE_TASKLET_DISABLED(name, _callback)        \
struct tasklet_struct name = {                                \
        .count = ATOMIC_INIT(1),                        \
        .callback = _callback,                                \
        .use_callback = true,                                \
}

#define from_tasklet(var, callback_tasklet, tasklet_fieldname)        \
        container_of(callback_tasklet, typeof(*var), tasklet_fieldname)

#define DECLARE_TASKLET_OLD(name, _func)                \
struct tasklet_struct name = {                                \
        .count = ATOMIC_INIT(0),                        \
        .func = _func,                                        \
}

#define DECLARE_TASKLET_DISABLED_OLD(name, _func)        \
struct tasklet_struct name = {                                \
        .count = ATOMIC_INIT(1),                        \
        .func = _func,                                        \
}

enum
{
        TASKLET_STATE_SCHED,        /* Tasklet is scheduled for execution */
        TASKLET_STATE_RUN        /* Tasklet is running (SMP only) */
};

#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
static inline int tasklet_trylock(struct tasklet_struct *t)
{
        return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
}

void tasklet_unlock(struct tasklet_struct *t);
void tasklet_unlock_wait(struct tasklet_struct *t);
void tasklet_unlock_spin_wait(struct tasklet_struct *t);

#else
static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; }
static inline void tasklet_unlock(struct tasklet_struct *t) { }
static inline void tasklet_unlock_wait(struct tasklet_struct *t) { }
static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { }
#endif

extern void __tasklet_schedule(struct tasklet_struct *t);

static inline void tasklet_schedule(struct tasklet_struct *t)
{
        if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
                __tasklet_schedule(t);
}

extern void __tasklet_hi_schedule(struct tasklet_struct *t);

static inline void tasklet_hi_schedule(struct tasklet_struct *t)
{
        if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
                __tasklet_hi_schedule(t);
}

static inline void tasklet_disable_nosync(struct tasklet_struct *t)
{
        atomic_inc(&t->count);
        smp_mb__after_atomic();
}

/*
 * Do not use in new code. Disabling tasklets from atomic contexts is
 * error prone and should be avoided.
 */
static inline void tasklet_disable_in_atomic(struct tasklet_struct *t)
{
        tasklet_disable_nosync(t);
        tasklet_unlock_spin_wait(t);
        smp_mb();
}

static inline void tasklet_disable(struct tasklet_struct *t)
{
        tasklet_disable_nosync(t);
        tasklet_unlock_wait(t);
        smp_mb();
}

static inline void tasklet_enable(struct tasklet_struct *t)
{
        smp_mb__before_atomic();
        atomic_dec(&t->count);
}

extern void tasklet_kill(struct tasklet_struct *t);
extern void tasklet_init(struct tasklet_struct *t,
                         void (*func)(unsigned long), unsigned long data);
extern void tasklet_setup(struct tasklet_struct *t,
                          void (*callback)(struct tasklet_struct *));

/*
 * Autoprobing for irqs:
 *
 * probe_irq_on() and probe_irq_off() provide robust primitives
 * for accurate IRQ probing during kernel initialization.  They are
 * reasonably simple to use, are not "fooled" by spurious interrupts,
 * and, unlike other attempts at IRQ probing, they do not get hung on
 * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards).
 *
 * For reasonably foolproof probing, use them as follows:
 *
 * 1. clear and/or mask the device's internal interrupt.
 * 2. sti();
 * 3. irqs = probe_irq_on();      // "take over" all unassigned idle IRQs
 * 4. enable the device and cause it to trigger an interrupt.
 * 5. wait for the device to interrupt, using non-intrusive polling or a delay.
 * 6. irq = probe_irq_off(irqs);  // get IRQ number, 0=none, negative=multiple
 * 7. service the device to clear its pending interrupt.
 * 8. loop again if paranoia is required.
 *
 * probe_irq_on() returns a mask of allocated irq's.
 *
 * probe_irq_off() takes the mask as a parameter,
 * and returns the irq number which occurred,
 * or zero if none occurred, or a negative irq number
 * if more than one irq occurred.
 */

#if !defined(CONFIG_GENERIC_IRQ_PROBE) 
static inline unsigned long probe_irq_on(void)
{
        return 0;
}
static inline int probe_irq_off(unsigned long val)
{
        return 0;
}
static inline unsigned int probe_irq_mask(unsigned long val)
{
        return 0;
}
#else
extern unsigned long probe_irq_on(void);        /* returns 0 on failure */
extern int probe_irq_off(unsigned long);        /* returns 0 or negative on failure */
extern unsigned int probe_irq_mask(unsigned long);        /* returns mask of ISA interrupts */
#endif

#ifdef CONFIG_PROC_FS
/* Initialize /proc/irq/ */
extern void init_irq_proc(void);
#else
static inline void init_irq_proc(void)
{
}
#endif

#ifdef CONFIG_IRQ_TIMINGS
void irq_timings_enable(void);
void irq_timings_disable(void);
u64 irq_timings_next_event(u64 now);
#endif

struct seq_file;
int show_interrupts(struct seq_file *p, void *v);
int arch_show_interrupts(struct seq_file *p, int prec);

extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);

/*
 * We want to know which function is an entrypoint of a hardirq or a softirq.
 */
#ifndef __irq_entry
# define __irq_entry         __section(".irqentry.text")
#endif

#define __softirq_entry  __section(".softirqentry.text")

#endif












































































































   53 









   74 

   73 






















































  135 
















   53 
   53 


   53 
















































































   50 

















































   42 
   41 























   40 
































   41 
   42 



   42 

   41 
















   17 
   16 


   17 












    9 

   19 




   10 
    9 


    9 















   10 
   10 


   10 













   32 

    4 













   15 
   15 





   19 





   10 
   10 







































































































































   73 

   73 
   74 

   73 






















































   74 


   73 
   74 












































































































































































































































































































































































































































































































































































































  131 


  129 
  129 









   82 
   82 


   53 








   53 





   53 


































  132 
  132 



  132 






  116 
  116 
  118 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/fs/namespace.c
 *
 * (C) Copyright Al Viro 2000, 2001
 *
 * Based on code from fs/super.c, copyright Linus Torvalds and others.
 * Heavily rewritten.
 */

#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/capability.h>
#include <linux/mnt_namespace.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/idr.h>
#include <linux/init.h>                /* init_rootfs */
#include <linux/fs_struct.h>        /* get_fs_root et.al. */
#include <linux/fsnotify.h>        /* fsnotify_vfsmount_delete */
#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/memblock.h>
#include <linux/proc_fs.h>
#include <linux/task_work.h>
#include <linux/sched/task.h>
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
#include <linux/shmem_fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/nospec.h>

#include "pnode.h"
#include "internal.h"

/* Maximum number of mounts in a mount namespace */
static unsigned int sysctl_mount_max __read_mostly = 100000;

static unsigned int m_hash_mask __ro_after_init;
static unsigned int m_hash_shift __ro_after_init;
static unsigned int mp_hash_mask __ro_after_init;
static unsigned int mp_hash_shift __ro_after_init;

static __initdata unsigned long mhash_entries;
static int __init set_mhash_entries(char *str)
{
        if (!str)
                return 0;
        mhash_entries = simple_strtoul(str, &str, 0);
        return 1;
}
__setup("mhash_entries=", set_mhash_entries);

static __initdata unsigned long mphash_entries;
static int __init set_mphash_entries(char *str)
{
        if (!str)
                return 0;
        mphash_entries = simple_strtoul(str, &str, 0);
        return 1;
}
__setup("mphash_entries=", set_mphash_entries);

static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);

/* Don't allow confusion with old 32bit mount ID */
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);

static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted);        /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */

struct mount_kattr {
        unsigned int attr_set;
        unsigned int attr_clr;
        unsigned int propagation;
        unsigned int lookup_flags;
        bool recurse;
        struct user_namespace *mnt_userns;
        struct mnt_idmap *mnt_idmap;
};

/* /sys/fs */
struct kobject *fs_kobj __ro_after_init;
EXPORT_SYMBOL_GPL(fs_kobj);

/*
 * vfsmount lock may be taken for read to prevent changes to the
 * vfsmount hash, ie. during mountpoint lookups or walking back
 * up the tree.
 *
 * It should be taken for write in all cases where the vfsmount
 * tree or hash is modified or when a vfsmount structure is modified.
 */
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);

static inline void lock_mount_hash(void)
{
        write_seqlock(&mount_lock);
}

static inline void unlock_mount_hash(void)
{
        write_sequnlock(&mount_lock);
}

static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
{
        unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
        tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
        tmp = tmp + (tmp >> m_hash_shift);
        return &mount_hashtable[tmp & m_hash_mask];
}

static inline struct hlist_head *mp_hash(struct dentry *dentry)
{
        unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
        tmp = tmp + (tmp >> mp_hash_shift);
        return &mountpoint_hashtable[tmp & mp_hash_mask];
}

static int mnt_alloc_id(struct mount *mnt)
{
        int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);

        if (res < 0)
                return res;
        mnt->mnt_id = res;
        mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr);
        return 0;
}

static void mnt_free_id(struct mount *mnt)
{
        ida_free(&mnt_id_ida, mnt->mnt_id);
}

/*
 * Allocate a new peer group ID
 */
static int mnt_alloc_group_id(struct mount *mnt)
{
        int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);

        if (res < 0)
                return res;
        mnt->mnt_group_id = res;
        return 0;
}

/*
 * Release a peer group ID
 */
void mnt_release_group_id(struct mount *mnt)
{
        ida_free(&mnt_group_ida, mnt->mnt_group_id);
        mnt->mnt_group_id = 0;
}

/*
 * vfsmount lock must be held for read
 */
static inline void mnt_add_count(struct mount *mnt, int n)
{
#ifdef CONFIG_SMP
        this_cpu_add(mnt->mnt_pcp->mnt_count, n);
#else
        preempt_disable();
        mnt->mnt_count += n;
        preempt_enable();
#endif
}

/*
 * vfsmount lock must be held for write
 */
int mnt_get_count(struct mount *mnt)
{
#ifdef CONFIG_SMP
        int count = 0;
        int cpu;

        for_each_possible_cpu(cpu) {
                count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
        }

        return count;
#else
        return mnt->mnt_count;
#endif
}

static struct mount *alloc_vfsmnt(const char *name)
{
        struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
        if (mnt) {
                int err;

                err = mnt_alloc_id(mnt);
                if (err)
                        goto out_free_cache;

                if (name) {
                        mnt->mnt_devname = kstrdup_const(name,
                                                         GFP_KERNEL_ACCOUNT);
                        if (!mnt->mnt_devname)
                                goto out_free_id;
                }

#ifdef CONFIG_SMP
                mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
                if (!mnt->mnt_pcp)
                        goto out_free_devname;

                this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
#else
                mnt->mnt_count = 1;
                mnt->mnt_writers = 0;
#endif

                INIT_HLIST_NODE(&mnt->mnt_hash);
                INIT_LIST_HEAD(&mnt->mnt_child);
                INIT_LIST_HEAD(&mnt->mnt_mounts);
                INIT_LIST_HEAD(&mnt->mnt_list);
                INIT_LIST_HEAD(&mnt->mnt_expire);
                INIT_LIST_HEAD(&mnt->mnt_share);
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
                INIT_LIST_HEAD(&mnt->mnt_slave);
                INIT_HLIST_NODE(&mnt->mnt_mp_list);
                INIT_LIST_HEAD(&mnt->mnt_umounting);
                INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
                mnt->mnt.mnt_idmap = &nop_mnt_idmap;
        }
        return mnt;

#ifdef CONFIG_SMP
out_free_devname:
        kfree_const(mnt->mnt_devname);
#endif
out_free_id:
        mnt_free_id(mnt);
out_free_cache:
        kmem_cache_free(mnt_cache, mnt);
        return NULL;
}

/*
 * Most r/o checks on a fs are for operations that take
 * discrete amounts of time, like a write() or unlink().
 * We must keep track of when those operations start
 * (for permission checks) and when they end, so that
 * we can determine when writes are able to occur to
 * a filesystem.
 */
/*
 * __mnt_is_readonly: check whether a mount is read-only
 * @mnt: the mount to check for its write status
 *
 * This shouldn't be used directly ouside of the VFS.
 * It does not guarantee that the filesystem will stay
 * r/w, just that it is right *now*.  This can not and
 * should not be used in place of IS_RDONLY(inode).
 * mnt_want/drop_write() will _keep_ the filesystem
 * r/w.
 */
bool __mnt_is_readonly(struct vfsmount *mnt)
{
        return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);

static inline void mnt_inc_writers(struct mount *mnt)
{
#ifdef CONFIG_SMP
        this_cpu_inc(mnt->mnt_pcp->mnt_writers);
#else
        mnt->mnt_writers++;
#endif
}

static inline void mnt_dec_writers(struct mount *mnt)
{
#ifdef CONFIG_SMP
        this_cpu_dec(mnt->mnt_pcp->mnt_writers);
#else
        mnt->mnt_writers--;
#endif
}

static unsigned int mnt_get_writers(struct mount *mnt)
{
#ifdef CONFIG_SMP
        unsigned int count = 0;
        int cpu;

        for_each_possible_cpu(cpu) {
                count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
        }

        return count;
#else
        return mnt->mnt_writers;
#endif
}

static int mnt_is_readonly(struct vfsmount *mnt)
{
        if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
                return 1;
        /*
         * The barrier pairs with the barrier in sb_start_ro_state_change()
         * making sure if we don't see s_readonly_remount set yet, we also will
         * not see any superblock / mount flag changes done by remount.
         * It also pairs with the barrier in sb_end_ro_state_change()
         * assuring that if we see s_readonly_remount already cleared, we will
         * see the values of superblock / mount flags updated by remount.
         */
        smp_rmb();
        return __mnt_is_readonly(mnt);
}

/*
 * Most r/o & frozen checks on a fs are for operations that take discrete
 * amounts of time, like a write() or unlink().  We must keep track of when
 * those operations start (for permission checks) and when they end, so that we
 * can determine when writes are able to occur to a filesystem.
 */
/**
 * mnt_get_write_access - get write access to a mount without freeze protection
 * @m: the mount on which to take a write
 *
 * This tells the low-level filesystem that a write is about to be performed to
 * it, and makes sure that writes are allowed (mnt it read-write) before
 * returning success. This operation does not protect against filesystem being
 * frozen. When the write operation is finished, mnt_put_write_access() must be
 * called. This is effectively a refcount.
 */
int mnt_get_write_access(struct vfsmount *m)
{
        struct mount *mnt = real_mount(m);
        int ret = 0;

        preempt_disable();
        mnt_inc_writers(mnt);
        /*
         * The store to mnt_inc_writers must be visible before we pass
         * MNT_WRITE_HOLD loop below, so that the slowpath can see our
         * incremented count after it has set MNT_WRITE_HOLD.
         */
        smp_mb();
        might_lock(&mount_lock.lock);
        while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
                if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
                        cpu_relax();
                } else {
                        /*
                         * This prevents priority inversion, if the task
                         * setting MNT_WRITE_HOLD got preempted on a remote
                         * CPU, and it prevents life lock if the task setting
                         * MNT_WRITE_HOLD has a lower priority and is bound to
                         * the same CPU as the task that is spinning here.
                         */
                        preempt_enable();
                        lock_mount_hash();
                        unlock_mount_hash();
                        preempt_disable();
                }
        }
        /*
         * The barrier pairs with the barrier sb_start_ro_state_change() making
         * sure that if we see MNT_WRITE_HOLD cleared, we will also see
         * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
         * mnt_is_readonly() and bail in case we are racing with remount
         * read-only.
         */
        smp_rmb();
        if (mnt_is_readonly(m)) {
                mnt_dec_writers(mnt);
                ret = -EROFS;
        }
        preempt_enable();

        return ret;
}
EXPORT_SYMBOL_GPL(mnt_get_write_access);

/**
 * mnt_want_write - get write access to a mount
 * @m: the mount on which to take a write
 *
 * This tells the low-level filesystem that a write is about to be performed to
 * it, and makes sure that writes are allowed (mount is read-write, filesystem
 * is not frozen) before returning success.  When the write operation is
 * finished, mnt_drop_write() must be called.  This is effectively a refcount.
 */
int mnt_want_write(struct vfsmount *m)
{
        int ret;

        sb_start_write(m->mnt_sb);
        ret = mnt_get_write_access(m);
        if (ret)
                sb_end_write(m->mnt_sb);
        return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);

/**
 * mnt_get_write_access_file - get write access to a file's mount
 * @file: the file who's mount on which to take a write
 *
 * This is like mnt_get_write_access, but if @file is already open for write it
 * skips incrementing mnt_writers (since the open file already has a reference)
 * and instead only does the check for emergency r/o remounts.  This must be
 * paired with mnt_put_write_access_file.
 */
int mnt_get_write_access_file(struct file *file)
{
        if (file->f_mode & FMODE_WRITER) {
                /*
                 * Superblock may have become readonly while there are still
                 * writable fd's, e.g. due to a fs error with errors=remount-ro
                 */
                if (__mnt_is_readonly(file->f_path.mnt))
                        return -EROFS;
                return 0;
        }
        return mnt_get_write_access(file->f_path.mnt);
}

/**
 * mnt_want_write_file - get write access to a file's mount
 * @file: the file who's mount on which to take a write
 *
 * This is like mnt_want_write, but if the file is already open for writing it
 * skips incrementing mnt_writers (since the open file already has a reference)
 * and instead only does the freeze protection and the check for emergency r/o
 * remounts.  This must be paired with mnt_drop_write_file.
 */
int mnt_want_write_file(struct file *file)
{
        int ret;

        sb_start_write(file_inode(file)->i_sb);
        ret = mnt_get_write_access_file(file);
        if (ret)
                sb_end_write(file_inode(file)->i_sb);
        return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file);

/**
 * mnt_put_write_access - give up write access to a mount
 * @mnt: the mount on which to give up write access
 *
 * Tells the low-level filesystem that we are done
 * performing writes to it.  Must be matched with
 * mnt_get_write_access() call above.
 */
void mnt_put_write_access(struct vfsmount *mnt)
{
        preempt_disable();
        mnt_dec_writers(real_mount(mnt));
        preempt_enable();
}
EXPORT_SYMBOL_GPL(mnt_put_write_access);

/**
 * mnt_drop_write - give up write access to a mount
 * @mnt: the mount on which to give up write access
 *
 * Tells the low-level filesystem that we are done performing writes to it and
 * also allows filesystem to be frozen again.  Must be matched with
 * mnt_want_write() call above.
 */
void mnt_drop_write(struct vfsmount *mnt)
{
        mnt_put_write_access(mnt);
        sb_end_write(mnt->mnt_sb);
}
EXPORT_SYMBOL_GPL(mnt_drop_write);

void mnt_put_write_access_file(struct file *file)
{
        if (!(file->f_mode & FMODE_WRITER))
                mnt_put_write_access(file->f_path.mnt);
}

void mnt_drop_write_file(struct file *file)
{
        mnt_put_write_access_file(file);
        sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file);

/**
 * mnt_hold_writers - prevent write access to the given mount
 * @mnt: mnt to prevent write access to
 *
 * Prevents write access to @mnt if there are no active writers for @mnt.
 * This function needs to be called and return successfully before changing
 * properties of @mnt that need to remain stable for callers with write access
 * to @mnt.
 *
 * After this functions has been called successfully callers must pair it with
 * a call to mnt_unhold_writers() in order to stop preventing write access to
 * @mnt.
 *
 * Context: This function expects lock_mount_hash() to be held serializing
 *          setting MNT_WRITE_HOLD.
 * Return: On success 0 is returned.
 *           On error, -EBUSY is returned.
 */
static inline int mnt_hold_writers(struct mount *mnt)
{
        mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
        /*
         * After storing MNT_WRITE_HOLD, we'll read the counters. This store
         * should be visible before we do.
         */
        smp_mb();

        /*
         * With writers on hold, if this value is zero, then there are
         * definitely no active writers (although held writers may subsequently
         * increment the count, they'll have to wait, and decrement it after
         * seeing MNT_READONLY).
         *
         * It is OK to have counter incremented on one CPU and decremented on
         * another: the sum will add up correctly. The danger would be when we
         * sum up each counter, if we read a counter before it is incremented,
         * but then read another CPU's count which it has been subsequently
         * decremented from -- we would see more decrements than we should.
         * MNT_WRITE_HOLD protects against this scenario, because
         * mnt_want_write first increments count, then smp_mb, then spins on
         * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
         * we're counting up here.
         */
        if (mnt_get_writers(mnt) > 0)
                return -EBUSY;

        return 0;
}

/**
 * mnt_unhold_writers - stop preventing write access to the given mount
 * @mnt: mnt to stop preventing write access to
 *
 * Stop preventing write access to @mnt allowing callers to gain write access
 * to @mnt again.
 *
 * This function can only be called after a successful call to
 * mnt_hold_writers().
 *
 * Context: This function expects lock_mount_hash() to be held.
 */
static inline void mnt_unhold_writers(struct mount *mnt)
{
        /*
         * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
         * that become unheld will see MNT_READONLY.
         */
        smp_wmb();
        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
}

static int mnt_make_readonly(struct mount *mnt)
{
        int ret;

        ret = mnt_hold_writers(mnt);
        if (!ret)
                mnt->mnt.mnt_flags |= MNT_READONLY;
        mnt_unhold_writers(mnt);
        return ret;
}

int sb_prepare_remount_readonly(struct super_block *sb)
{
        struct mount *mnt;
        int err = 0;

        /* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
        if (atomic_long_read(&sb->s_remove_count))
                return -EBUSY;

        lock_mount_hash();
        list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
                if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
                        err = mnt_hold_writers(mnt);
                        if (err)
                                break;
                }
        }
        if (!err && atomic_long_read(&sb->s_remove_count))
                err = -EBUSY;

        if (!err)
                sb_start_ro_state_change(sb);
        list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
                if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
                        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
        }
        unlock_mount_hash();

        return err;
}

static void free_vfsmnt(struct mount *mnt)
{
        mnt_idmap_put(mnt_idmap(&mnt->mnt));
        kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
        free_percpu(mnt->mnt_pcp);
#endif
        kmem_cache_free(mnt_cache, mnt);
}

static void delayed_free_vfsmnt(struct rcu_head *head)
{
        free_vfsmnt(container_of(head, struct mount, mnt_rcu));
}

/* call under rcu_read_lock */
int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
{
        struct mount *mnt;
        if (read_seqretry(&mount_lock, seq))
                return 1;
        if (bastard == NULL)
                return 0;
        mnt = real_mount(bastard);
        mnt_add_count(mnt, 1);
        smp_mb();                        // see mntput_no_expire()
        if (likely(!read_seqretry(&mount_lock, seq)))
                return 0;
        if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                mnt_add_count(mnt, -1);
                return 1;
        }
        lock_mount_hash();
        if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
                mnt_add_count(mnt, -1);
                unlock_mount_hash();
                return 1;
        }
        unlock_mount_hash();
        /* caller will mntput() */
        return -1;
}

/* call under rcu_read_lock */
static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
{
        int res = __legitimize_mnt(bastard, seq);
        if (likely(!res))
                return true;
        if (unlikely(res < 0)) {
                rcu_read_unlock();
                mntput(bastard);
                rcu_read_lock();
        }
        return false;
}

/**
 * __lookup_mnt - find first child mount
 * @mnt:        parent mount
 * @dentry:        mountpoint
 *
 * If @mnt has a child mount @c mounted @dentry find and return it.
 *
 * Note that the child mount @c need not be unique. There are cases
 * where shadow mounts are created. For example, during mount
 * propagation when a source mount @mnt whose root got overmounted by a
 * mount @o after path lookup but before @namespace_sem could be
 * acquired gets copied and propagated. So @mnt gets copied including
 * @o. When @mnt is propagated to a destination mount @d that already
 * has another mount @n mounted at the same mountpoint then the source
 * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on
 * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt
 * on @dentry.
 *
 * Return: The first child of @mnt mounted @dentry or NULL.
 */
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
        struct hlist_head *head = m_hash(mnt, dentry);
        struct mount *p;

        hlist_for_each_entry_rcu(p, head, mnt_hash)
                if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
                        return p;
        return NULL;
}

/*
 * lookup_mnt - Return the first child mount mounted at path
 *
 * "First" means first mounted chronologically.  If you create the
 * following mounts:
 *
 * mount /dev/sda1 /mnt
 * mount /dev/sda2 /mnt
 * mount /dev/sda3 /mnt
 *
 * Then lookup_mnt() on the base /mnt dentry in the root mount will
 * return successively the root dentry and vfsmount of /dev/sda1, then
 * /dev/sda2, then /dev/sda3, then NULL.
 *
 * lookup_mnt takes a reference to the found vfsmount.
 */
struct vfsmount *lookup_mnt(const struct path *path)
{
        struct mount *child_mnt;
        struct vfsmount *m;
        unsigned seq;

        rcu_read_lock();
        do {
                seq = read_seqbegin(&mount_lock);
                child_mnt = __lookup_mnt(path->mnt, path->dentry);
                m = child_mnt ? &child_mnt->mnt : NULL;
        } while (!legitimize_mnt(m, seq));
        rcu_read_unlock();
        return m;
}

/*
 * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
 *                         current mount namespace.
 *
 * The common case is dentries are not mountpoints at all and that
 * test is handled inline.  For the slow case when we are actually
 * dealing with a mountpoint of some kind, walk through all of the
 * mounts in the current mount namespace and test to see if the dentry
 * is a mountpoint.
 *
 * The mount_hashtable is not usable in the context because we
 * need to identify all mounts that may be in the current mount
 * namespace not just a mount that happens to have some specified
 * parent mount.
 */
bool __is_local_mountpoint(struct dentry *dentry)
{
        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
        struct mount *mnt, *n;
        bool is_covered = false;

        down_read(&namespace_sem);
        rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
                is_covered = (mnt->mnt_mountpoint == dentry);
                if (is_covered)
                        break;
        }
        up_read(&namespace_sem);

        return is_covered;
}

static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
{
        struct hlist_head *chain = mp_hash(dentry);
        struct mountpoint *mp;

        hlist_for_each_entry(mp, chain, m_hash) {
                if (mp->m_dentry == dentry) {
                        mp->m_count++;
                        return mp;
                }
        }
        return NULL;
}

static struct mountpoint *get_mountpoint(struct dentry *dentry)
{
        struct mountpoint *mp, *new = NULL;
        int ret;

        if (d_mountpoint(dentry)) {
                /* might be worth a WARN_ON() */
                if (d_unlinked(dentry))
                        return ERR_PTR(-ENOENT);
mountpoint:
                read_seqlock_excl(&mount_lock);
                mp = lookup_mountpoint(dentry);
                read_sequnlock_excl(&mount_lock);
                if (mp)
                        goto done;
        }

        if (!new)
                new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
        if (!new)
                return ERR_PTR(-ENOMEM);


        /* Exactly one processes may set d_mounted */
        ret = d_set_mounted(dentry);

        /* Someone else set d_mounted? */
        if (ret == -EBUSY)
                goto mountpoint;

        /* The dentry is not available as a mountpoint? */
        mp = ERR_PTR(ret);
        if (ret)
                goto done;

        /* Add the new mountpoint to the hash table */
        read_seqlock_excl(&mount_lock);
        new->m_dentry = dget(dentry);
        new->m_count = 1;
        hlist_add_head(&new->m_hash, mp_hash(dentry));
        INIT_HLIST_HEAD(&new->m_list);
        read_sequnlock_excl(&mount_lock);

        mp = new;
        new = NULL;
done:
        kfree(new);
        return mp;
}

/*
 * vfsmount lock must be held.  Additionally, the caller is responsible
 * for serializing calls for given disposal list.
 */
static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
{
        if (!--mp->m_count) {
                struct dentry *dentry = mp->m_dentry;
                BUG_ON(!hlist_empty(&mp->m_list));
                spin_lock(&dentry->d_lock);
                dentry->d_flags &= ~DCACHE_MOUNTED;
                spin_unlock(&dentry->d_lock);
                dput_to_list(dentry, list);
                hlist_del(&mp->m_hash);
                kfree(mp);
        }
}

/* called with namespace_lock and vfsmount lock */
static void put_mountpoint(struct mountpoint *mp)
{
        __put_mountpoint(mp, &ex_mountpoints);
}

static inline int check_mnt(struct mount *mnt)
{
        return mnt->mnt_ns == current->nsproxy->mnt_ns;
}

/*
 * vfsmount lock must be held for write
 */
static void touch_mnt_namespace(struct mnt_namespace *ns)
{
        if (ns) {
                ns->event = ++event;
                wake_up_interruptible(&ns->poll);
        }
}

/*
 * vfsmount lock must be held for write
 */
static void __touch_mnt_namespace(struct mnt_namespace *ns)
{
        if (ns && ns->event != event) {
                ns->event = event;
                wake_up_interruptible(&ns->poll);
        }
}

/*
 * vfsmount lock must be held for write
 */
static struct mountpoint *unhash_mnt(struct mount *mnt)
{
        struct mountpoint *mp;
        mnt->mnt_parent = mnt;
        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
        list_del_init(&mnt->mnt_child);
        hlist_del_init_rcu(&mnt->mnt_hash);
        hlist_del_init(&mnt->mnt_mp_list);
        mp = mnt->mnt_mp;
        mnt->mnt_mp = NULL;
        return mp;
}

/*
 * vfsmount lock must be held for write
 */
static void umount_mnt(struct mount *mnt)
{
        put_mountpoint(unhash_mnt(mnt));
}

/*
 * vfsmount lock must be held for write
 */
void mnt_set_mountpoint(struct mount *mnt,
                        struct mountpoint *mp,
                        struct mount *child_mnt)
{
        mp->m_count++;
        mnt_add_count(mnt, 1);        /* essentially, that's mntget */
        child_mnt->mnt_mountpoint = mp->m_dentry;
        child_mnt->mnt_parent = mnt;
        child_mnt->mnt_mp = mp;
        hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}

/**
 * mnt_set_mountpoint_beneath - mount a mount beneath another one
 *
 * @new_parent: the source mount
 * @top_mnt:    the mount beneath which @new_parent is mounted
 * @new_mp:     the new mountpoint of @top_mnt on @new_parent
 *
 * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and
 * parent @top_mnt->mnt_parent and mount it on top of @new_parent at
 * @new_mp. And mount @new_parent on the old parent and old
 * mountpoint of @top_mnt.
 *
 * Context: This function expects namespace_lock() and lock_mount_hash()
 *          to have been acquired in that order.
 */
static void mnt_set_mountpoint_beneath(struct mount *new_parent,
                                       struct mount *top_mnt,
                                       struct mountpoint *new_mp)
{
        struct mount *old_top_parent = top_mnt->mnt_parent;
        struct mountpoint *old_top_mp = top_mnt->mnt_mp;

        mnt_set_mountpoint(old_top_parent, old_top_mp, new_parent);
        mnt_change_mountpoint(new_parent, new_mp, top_mnt);
}


static void __attach_mnt(struct mount *mnt, struct mount *parent)
{
        hlist_add_head_rcu(&mnt->mnt_hash,
                           m_hash(&parent->mnt, mnt->mnt_mountpoint));
        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
}

/**
 * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
 *              list of child mounts
 * @parent:  the parent
 * @mnt:     the new mount
 * @mp:      the new mountpoint
 * @beneath: whether to mount @mnt beneath or on top of @parent
 *
 * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt
 * to @parent's child mount list and to @mount_hashtable.
 *
 * If @beneath is true, remove @mnt from its current parent and
 * mountpoint and mount it on @mp on @parent, and mount @parent on the
 * old parent and old mountpoint of @mnt. Finally, attach @parent to
 * @mnt_hashtable and @parent->mnt_parent->mnt_mounts.
 *
 * Note, when __attach_mnt() is called @mnt->mnt_parent already points
 * to the correct parent.
 *
 * Context: This function expects namespace_lock() and lock_mount_hash()
 *          to have been acquired in that order.
 */
static void attach_mnt(struct mount *mnt, struct mount *parent,
                       struct mountpoint *mp, bool beneath)
{
        if (beneath)
                mnt_set_mountpoint_beneath(mnt, parent, mp);
        else
                mnt_set_mountpoint(parent, mp, mnt);
        /*
         * Note, @mnt->mnt_parent has to be used. If @mnt was mounted
         * beneath @parent then @mnt will need to be attached to
         * @parent's old parent, not @parent. IOW, @mnt->mnt_parent
         * isn't the same mount as @parent.
         */
        __attach_mnt(mnt, mnt->mnt_parent);
}

void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
        struct mountpoint *old_mp = mnt->mnt_mp;
        struct mount *old_parent = mnt->mnt_parent;

        list_del_init(&mnt->mnt_child);
        hlist_del_init(&mnt->mnt_mp_list);
        hlist_del_init_rcu(&mnt->mnt_hash);

        attach_mnt(mnt, parent, mp, false);

        put_mountpoint(old_mp);
        mnt_add_count(old_parent, -1);
}

static inline struct mount *node_to_mount(struct rb_node *node)
{
        return node ? rb_entry(node, struct mount, mnt_node) : NULL;
}

static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
{
        struct rb_node **link = &ns->mounts.rb_node;
        struct rb_node *parent = NULL;

        WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
        mnt->mnt_ns = ns;
        while (*link) {
                parent = *link;
                if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique)
                        link = &parent->rb_left;
                else
                        link = &parent->rb_right;
        }
        rb_link_node(&mnt->mnt_node, parent, link);
        rb_insert_color(&mnt->mnt_node, &ns->mounts);
        mnt->mnt.mnt_flags |= MNT_ONRB;
}

/*
 * vfsmount lock must be held for write
 */
static void commit_tree(struct mount *mnt)
{
        struct mount *parent = mnt->mnt_parent;
        struct mount *m;
        LIST_HEAD(head);
        struct mnt_namespace *n = parent->mnt_ns;

        BUG_ON(parent == mnt);

        list_add_tail(&head, &mnt->mnt_list);
        while (!list_empty(&head)) {
                m = list_first_entry(&head, typeof(*m), mnt_list);
                list_del(&m->mnt_list);

                mnt_add_to_ns(n, m);
        }
        n->nr_mounts += n->pending_mounts;
        n->pending_mounts = 0;

        __attach_mnt(mnt, parent);
        touch_mnt_namespace(n);
}

static struct mount *next_mnt(struct mount *p, struct mount *root)
{
        struct list_head *next = p->mnt_mounts.next;
        if (next == &p->mnt_mounts) {
                while (1) {
                        if (p == root)
                                return NULL;
                        next = p->mnt_child.next;
                        if (next != &p->mnt_parent->mnt_mounts)
                                break;
                        p = p->mnt_parent;
                }
        }
        return list_entry(next, struct mount, mnt_child);
}

static struct mount *skip_mnt_tree(struct mount *p)
{
        struct list_head *prev = p->mnt_mounts.prev;
        while (prev != &p->mnt_mounts) {
                p = list_entry(prev, struct mount, mnt_child);
                prev = p->mnt_mounts.prev;
        }
        return p;
}

/**
 * vfs_create_mount - Create a mount for a configured superblock
 * @fc: The configuration context with the superblock attached
 *
 * Create a mount to an already configured superblock.  If necessary, the
 * caller should invoke vfs_get_tree() before calling this.
 *
 * Note that this does not attach the mount to anything.
 */
struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
        struct mount *mnt;

        if (!fc->root)
                return ERR_PTR(-EINVAL);

        mnt = alloc_vfsmnt(fc->source ?: "none");
        if (!mnt)
                return ERR_PTR(-ENOMEM);

        if (fc->sb_flags & SB_KERNMOUNT)
                mnt->mnt.mnt_flags = MNT_INTERNAL;

        atomic_inc(&fc->root->d_sb->s_active);
        mnt->mnt.mnt_sb                = fc->root->d_sb;
        mnt->mnt.mnt_root        = dget(fc->root);
        mnt->mnt_mountpoint        = mnt->mnt.mnt_root;
        mnt->mnt_parent                = mnt;

        lock_mount_hash();
        list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
        unlock_mount_hash();
        return &mnt->mnt;
}
EXPORT_SYMBOL(vfs_create_mount);

struct vfsmount *fc_mount(struct fs_context *fc)
{
        int err = vfs_get_tree(fc);
        if (!err) {
                up_write(&fc->root->d_sb->s_umount);
                return vfs_create_mount(fc);
        }
        return ERR_PTR(err);
}
EXPORT_SYMBOL(fc_mount);

struct vfsmount *vfs_kern_mount(struct file_system_type *type,
                                int flags, const char *name,
                                void *data)
{
        struct fs_context *fc;
        struct vfsmount *mnt;
        int ret = 0;

        if (!type)
                return ERR_PTR(-EINVAL);

        fc = fs_context_for_mount(type, flags);
        if (IS_ERR(fc))
                return ERR_CAST(fc);

        if (name)
                ret = vfs_parse_fs_string(fc, "source",
                                          name, strlen(name));
        if (!ret)
                ret = parse_monolithic_mount_data(fc, data);
        if (!ret)
                mnt = fc_mount(fc);
        else
                mnt = ERR_PTR(ret);

        put_fs_context(fc);
        return mnt;
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);

struct vfsmount *
vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
             const char *name, void *data)
{
        /* Until it is worked out how to pass the user namespace
         * through from the parent mount to the submount don't support
         * unprivileged mounts with submounts.
         */
        if (mountpoint->d_sb->s_user_ns != &init_user_ns)
                return ERR_PTR(-EPERM);

        return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
}
EXPORT_SYMBOL_GPL(vfs_submount);

static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                                        int flag)
{
        struct super_block *sb = old->mnt.mnt_sb;
        struct mount *mnt;
        int err;

        mnt = alloc_vfsmnt(old->mnt_devname);
        if (!mnt)
                return ERR_PTR(-ENOMEM);

        if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
                mnt->mnt_group_id = 0; /* not a peer of original */
        else
                mnt->mnt_group_id = old->mnt_group_id;

        if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
                err = mnt_alloc_group_id(mnt);
                if (err)
                        goto out_free;
        }

        mnt->mnt.mnt_flags = old->mnt.mnt_flags;
        mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);

        atomic_inc(&sb->s_active);
        mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));

        mnt->mnt.mnt_sb = sb;
        mnt->mnt.mnt_root = dget(root);
        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
        mnt->mnt_parent = mnt;
        lock_mount_hash();
        list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
        unlock_mount_hash();

        if ((flag & CL_SLAVE) ||
            ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
                list_add(&mnt->mnt_slave, &old->mnt_slave_list);
                mnt->mnt_master = old;
                CLEAR_MNT_SHARED(mnt);
        } else if (!(flag & CL_PRIVATE)) {
                if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
                        list_add(&mnt->mnt_share, &old->mnt_share);
                if (IS_MNT_SLAVE(old))
                        list_add(&mnt->mnt_slave, &old->mnt_slave);
                mnt->mnt_master = old->mnt_master;
        } else {
                CLEAR_MNT_SHARED(mnt);
        }
        if (flag & CL_MAKE_SHARED)
                set_mnt_shared(mnt);

        /* stick the duplicate mount on the same expiry list
         * as the original if that was on one */
        if (flag & CL_EXPIRE) {
                if (!list_empty(&old->mnt_expire))
                        list_add(&mnt->mnt_expire, &old->mnt_expire);
        }

        return mnt;

 out_free:
        mnt_free_id(mnt);
        free_vfsmnt(mnt);
        return ERR_PTR(err);
}

static void cleanup_mnt(struct mount *mnt)
{
        struct hlist_node *p;
        struct mount *m;
        /*
         * The warning here probably indicates that somebody messed
         * up a mnt_want/drop_write() pair.  If this happens, the
         * filesystem was probably unable to make r/w->r/o transitions.
         * The locking used to deal with mnt_count decrement provides barriers,
         * so mnt_get_writers() below is safe.
         */
        WARN_ON(mnt_get_writers(mnt));
        if (unlikely(mnt->mnt_pins.first))
                mnt_pin_kill(mnt);
        hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
                hlist_del(&m->mnt_umount);
                mntput(&m->mnt);
        }
        fsnotify_vfsmount_delete(&mnt->mnt);
        dput(mnt->mnt.mnt_root);
        deactivate_super(mnt->mnt.mnt_sb);
        mnt_free_id(mnt);
        call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
}

static void __cleanup_mnt(struct rcu_head *head)
{
        cleanup_mnt(container_of(head, struct mount, mnt_rcu));
}

static LLIST_HEAD(delayed_mntput_list);
static void delayed_mntput(struct work_struct *unused)
{
        struct llist_node *node = llist_del_all(&delayed_mntput_list);
        struct mount *m, *t;

        llist_for_each_entry_safe(m, t, node, mnt_llist)
                cleanup_mnt(m);
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);

static void mntput_no_expire(struct mount *mnt)
{
        LIST_HEAD(list);
        int count;

        rcu_read_lock();
        if (likely(READ_ONCE(mnt->mnt_ns))) {
                /*
                 * Since we don't do lock_mount_hash() here,
                 * ->mnt_ns can change under us.  However, if it's
                 * non-NULL, then there's a reference that won't
                 * be dropped until after an RCU delay done after
                 * turning ->mnt_ns NULL.  So if we observe it
                 * non-NULL under rcu_read_lock(), the reference
                 * we are dropping is not the final one.
                 */
                mnt_add_count(mnt, -1);
                rcu_read_unlock();
                return;
        }
        lock_mount_hash();
        /*
         * make sure that if __legitimize_mnt() has not seen us grab
         * mount_lock, we'll see their refcount increment here.
         */
        smp_mb();
        mnt_add_count(mnt, -1);
        count = mnt_get_count(mnt);
        if (count != 0) {
                WARN_ON(count < 0);
                rcu_read_unlock();
                unlock_mount_hash();
                return;
        }
        if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
                rcu_read_unlock();
                unlock_mount_hash();
                return;
        }
        mnt->mnt.mnt_flags |= MNT_DOOMED;
        rcu_read_unlock();

        list_del(&mnt->mnt_instance);

        if (unlikely(!list_empty(&mnt->mnt_mounts))) {
                struct mount *p, *tmp;
                list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
                        __put_mountpoint(unhash_mnt(p), &list);
                        hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
                }
        }
        unlock_mount_hash();
        shrink_dentry_list(&list);

        if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
                struct task_struct *task = current;
                if (likely(!(task->flags & PF_KTHREAD))) {
                        init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
                        if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
                                return;
                }
                if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
                        schedule_delayed_work(&delayed_mntput_work, 1);
                return;
        }
        cleanup_mnt(mnt);
}

void mntput(struct vfsmount *mnt)
{
        if (mnt) {
                struct mount *m = real_mount(mnt);
                /* avoid cacheline pingpong */
                if (unlikely(m->mnt_expiry_mark))
                        WRITE_ONCE(m->mnt_expiry_mark, 0);
                mntput_no_expire(m);
        }
}
EXPORT_SYMBOL(mntput);

struct vfsmount *mntget(struct vfsmount *mnt)
{
        if (mnt)
                mnt_add_count(real_mount(mnt), 1);
        return mnt;
}
EXPORT_SYMBOL(mntget);

/*
 * Make a mount point inaccessible to new lookups.
 * Because there may still be current users, the caller MUST WAIT
 * for an RCU grace period before destroying the mount point.
 */
void mnt_make_shortterm(struct vfsmount *mnt)
{
        if (mnt)
                real_mount(mnt)->mnt_ns = NULL;
}

/**
 * path_is_mountpoint() - Check if path is a mount in the current namespace.
 * @path: path to check
 *
 *  d_mountpoint() can only be used reliably to establish if a dentry is
 *  not mounted in any namespace and that common case is handled inline.
 *  d_mountpoint() isn't aware of the possibility there may be multiple
 *  mounts using a given dentry in a different namespace. This function
 *  checks if the passed in path is a mountpoint rather than the dentry
 *  alone.
 */
bool path_is_mountpoint(const struct path *path)
{
        unsigned seq;
        bool res;

        if (!d_mountpoint(path->dentry))
                return false;

        rcu_read_lock();
        do {
                seq = read_seqbegin(&mount_lock);
                res = __path_is_mountpoint(path);
        } while (read_seqretry(&mount_lock, seq));
        rcu_read_unlock();

        return res;
}
EXPORT_SYMBOL(path_is_mountpoint);

struct vfsmount *mnt_clone_internal(const struct path *path)
{
        struct mount *p;
        p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
        if (IS_ERR(p))
                return ERR_CAST(p);
        p->mnt.mnt_flags |= MNT_INTERNAL;
        return &p->mnt;
}

/*
 * Returns the mount which either has the specified mnt_id, or has the next
 * smallest id afer the specified one.
 */
static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
{
        struct rb_node *node = ns->mounts.rb_node;
        struct mount *ret = NULL;

        while (node) {
                struct mount *m = node_to_mount(node);

                if (mnt_id <= m->mnt_id_unique) {
                        ret = node_to_mount(node);
                        if (mnt_id == m->mnt_id_unique)
                                break;
                        node = node->rb_left;
                } else {
                        node = node->rb_right;
                }
        }
        return ret;
}

#ifdef CONFIG_PROC_FS

/* iterator; we want it to have access to namespace_sem, thus here... */
static void *m_start(struct seq_file *m, loff_t *pos)
{
        struct proc_mounts *p = m->private;

        down_read(&namespace_sem);

        return mnt_find_id_at(p->ns, *pos);
}

static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
        struct mount *next = NULL, *mnt = v;
        struct rb_node *node = rb_next(&mnt->mnt_node);

        ++*pos;
        if (node) {
                next = node_to_mount(node);
                *pos = next->mnt_id_unique;
        }
        return next;
}

static void m_stop(struct seq_file *m, void *v)
{
        up_read(&namespace_sem);
}

static int m_show(struct seq_file *m, void *v)
{
        struct proc_mounts *p = m->private;
        struct mount *r = v;
        return p->show(m, &r->mnt);
}

const struct seq_operations mounts_op = {
        .start        = m_start,
        .next        = m_next,
        .stop        = m_stop,
        .show        = m_show,
};

#endif  /* CONFIG_PROC_FS */

/**
 * may_umount_tree - check if a mount tree is busy
 * @m: root of mount tree
 *
 * This is called to check if a tree of mounts has any
 * open files, pwds, chroots or sub mounts that are
 * busy.
 */
int may_umount_tree(struct vfsmount *m)
{
        struct mount *mnt = real_mount(m);
        int actual_refs = 0;
        int minimum_refs = 0;
        struct mount *p;
        BUG_ON(!m);

        /* write lock needed for mnt_get_count */
        lock_mount_hash();
        for (p = mnt; p; p = next_mnt(p, mnt)) {
                actual_refs += mnt_get_count(p);
                minimum_refs += 2;
        }
        unlock_mount_hash();

        if (actual_refs > minimum_refs)
                return 0;

        return 1;
}

EXPORT_SYMBOL(may_umount_tree);

/**
 * may_umount - check if a mount point is busy
 * @mnt: root of mount
 *
 * This is called to check if a mount point has any
 * open files, pwds, chroots or sub mounts. If the
 * mount has sub mounts this will return busy
 * regardless of whether the sub mounts are busy.
 *
 * Doesn't take quota and stuff into account. IOW, in some cases it will
 * give false negatives. The main reason why it's here is that we need
 * a non-destructive way to look for easily umountable filesystems.
 */
int may_umount(struct vfsmount *mnt)
{
        int ret = 1;
        down_read(&namespace_sem);
        lock_mount_hash();
        if (propagate_mount_busy(real_mount(mnt), 2))
                ret = 0;
        unlock_mount_hash();
        up_read(&namespace_sem);
        return ret;
}

EXPORT_SYMBOL(may_umount);

static void namespace_unlock(void)
{
        struct hlist_head head;
        struct hlist_node *p;
        struct mount *m;
        LIST_HEAD(list);

        hlist_move_list(&unmounted, &head);
        list_splice_init(&ex_mountpoints, &list);

        up_write(&namespace_sem);

        shrink_dentry_list(&list);

        if (likely(hlist_empty(&head)))
                return;

        synchronize_rcu_expedited();

        hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
                hlist_del(&m->mnt_umount);
                mntput(&m->mnt);
        }
}

static inline void namespace_lock(void)
{
        down_write(&namespace_sem);
}

enum umount_tree_flags {
        UMOUNT_SYNC = 1,
        UMOUNT_PROPAGATE = 2,
        UMOUNT_CONNECTED = 4,
};

static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
{
        /* Leaving mounts connected is only valid for lazy umounts */
        if (how & UMOUNT_SYNC)
                return true;

        /* A mount without a parent has nothing to be connected to */
        if (!mnt_has_parent(mnt))
                return true;

        /* Because the reference counting rules change when mounts are
         * unmounted and connected, umounted mounts may not be
         * connected to mounted mounts.
         */
        if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
                return true;

        /* Has it been requested that the mount remain connected? */
        if (how & UMOUNT_CONNECTED)
                return false;

        /* Is the mount locked such that it needs to remain connected? */
        if (IS_MNT_LOCKED(mnt))
                return false;

        /* By default disconnect the mount */
        return true;
}

/*
 * mount_lock must be held
 * namespace_sem must be held for write
 */
static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
{
        LIST_HEAD(tmp_list);
        struct mount *p;

        if (how & UMOUNT_PROPAGATE)
                propagate_mount_unlock(mnt);

        /* Gather the mounts to umount */
        for (p = mnt; p; p = next_mnt(p, mnt)) {
                p->mnt.mnt_flags |= MNT_UMOUNT;
                if (p->mnt.mnt_flags & MNT_ONRB)
                        move_from_ns(p, &tmp_list);
                else
                        list_move(&p->mnt_list, &tmp_list);
        }

        /* Hide the mounts from mnt_mounts */
        list_for_each_entry(p, &tmp_list, mnt_list) {
                list_del_init(&p->mnt_child);
        }

        /* Add propogated mounts to the tmp_list */
        if (how & UMOUNT_PROPAGATE)
                propagate_umount(&tmp_list);

        while (!list_empty(&tmp_list)) {
                struct mnt_namespace *ns;
                bool disconnect;
                p = list_first_entry(&tmp_list, struct mount, mnt_list);
                list_del_init(&p->mnt_expire);
                list_del_init(&p->mnt_list);
                ns = p->mnt_ns;
                if (ns) {
                        ns->nr_mounts--;
                        __touch_mnt_namespace(ns);
                }
                p->mnt_ns = NULL;
                if (how & UMOUNT_SYNC)
                        p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;

                disconnect = disconnect_mount(p, how);
                if (mnt_has_parent(p)) {
                        mnt_add_count(p->mnt_parent, -1);
                        if (!disconnect) {
                                /* Don't forget about p */
                                list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
                        } else {
                                umount_mnt(p);
                        }
                }
                change_mnt_propagation(p, MS_PRIVATE);
                if (disconnect)
                        hlist_add_head(&p->mnt_umount, &unmounted);
        }
}

static void shrink_submounts(struct mount *mnt);

static int do_umount_root(struct super_block *sb)
{
        int ret = 0;

        down_write(&sb->s_umount);
        if (!sb_rdonly(sb)) {
                struct fs_context *fc;

                fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
                                                SB_RDONLY);
                if (IS_ERR(fc)) {
                        ret = PTR_ERR(fc);
                } else {
                        ret = parse_monolithic_mount_data(fc, NULL);
                        if (!ret)
                                ret = reconfigure_super(fc);
                        put_fs_context(fc);
                }
        }
        up_write(&sb->s_umount);
        return ret;
}

static int do_umount(struct mount *mnt, int flags)
{
        struct super_block *sb = mnt->mnt.mnt_sb;
        int retval;

        retval = security_sb_umount(&mnt->mnt, flags);
        if (retval)
                return retval;

        /*
         * Allow userspace to request a mountpoint be expired rather than
         * unmounting unconditionally. Unmount only happens if:
         *  (1) the mark is already set (the mark is cleared by mntput())
         *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
         */
        if (flags & MNT_EXPIRE) {
                if (&mnt->mnt == current->fs->root.mnt ||
                    flags & (MNT_FORCE | MNT_DETACH))
                        return -EINVAL;

                /*
                 * probably don't strictly need the lock here if we examined
                 * all race cases, but it's a slowpath.
                 */
                lock_mount_hash();
                if (mnt_get_count(mnt) != 2) {
                        unlock_mount_hash();
                        return -EBUSY;
                }
                unlock_mount_hash();

                if (!xchg(&mnt->mnt_expiry_mark, 1))
                        return -EAGAIN;
        }

        /*
         * If we may have to abort operations to get out of this
         * mount, and they will themselves hold resources we must
         * allow the fs to do things. In the Unix tradition of
         * 'Gee thats tricky lets do it in userspace' the umount_begin
         * might fail to complete on the first run through as other tasks
         * must return, and the like. Thats for the mount program to worry
         * about for the moment.
         */

        if (flags & MNT_FORCE && sb->s_op->umount_begin) {
                sb->s_op->umount_begin(sb);
        }

        /*
         * No sense to grab the lock for this test, but test itself looks
         * somewhat bogus. Suggestions for better replacement?
         * Ho-hum... In principle, we might treat that as umount + switch
         * to rootfs. GC would eventually take care of the old vfsmount.
         * Actually it makes sense, especially if rootfs would contain a
         * /reboot - static binary that would close all descriptors and
         * call reboot(9). Then init(8) could umount root and exec /reboot.
         */
        if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
                /*
                 * Special case for "unmounting" root ...
                 * we just try to remount it readonly.
                 */
                if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                        return -EPERM;
                return do_umount_root(sb);
        }

        namespace_lock();
        lock_mount_hash();

        /* Recheck MNT_LOCKED with the locks held */
        retval = -EINVAL;
        if (mnt->mnt.mnt_flags & MNT_LOCKED)
                goto out;

        event++;
        if (flags & MNT_DETACH) {
                if (mnt->mnt.mnt_flags & MNT_ONRB ||
                    !list_empty(&mnt->mnt_list))
                        umount_tree(mnt, UMOUNT_PROPAGATE);
                retval = 0;
        } else {
                shrink_submounts(mnt);
                retval = -EBUSY;
                if (!propagate_mount_busy(mnt, 2)) {
                        if (mnt->mnt.mnt_flags & MNT_ONRB ||
                            !list_empty(&mnt->mnt_list))
                                umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
                        retval = 0;
                }
        }
out:
        unlock_mount_hash();
        namespace_unlock();
        return retval;
}

/*
 * __detach_mounts - lazily unmount all mounts on the specified dentry
 *
 * During unlink, rmdir, and d_drop it is possible to loose the path
 * to an existing mountpoint, and wind up leaking the mount.
 * detach_mounts allows lazily unmounting those mounts instead of
 * leaking them.
 *
 * The caller may hold dentry->d_inode->i_mutex.
 */
void __detach_mounts(struct dentry *dentry)
{
        struct mountpoint *mp;
        struct mount *mnt;

        namespace_lock();
        lock_mount_hash();
        mp = lookup_mountpoint(dentry);
        if (!mp)
                goto out_unlock;

        event++;
        while (!hlist_empty(&mp->m_list)) {
                mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
                if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
                        umount_mnt(mnt);
                        hlist_add_head(&mnt->mnt_umount, &unmounted);
                }
                else umount_tree(mnt, UMOUNT_CONNECTED);
        }
        put_mountpoint(mp);
out_unlock:
        unlock_mount_hash();
        namespace_unlock();
}

/*
 * Is the caller allowed to modify his namespace?
 */
bool may_mount(void)
{
        return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}

/**
 * path_mounted - check whether path is mounted
 * @path: path to check
 *
 * Determine whether @path refers to the root of a mount.
 *
 * Return: true if @path is the root of a mount, false if not.
 */
static inline bool path_mounted(const struct path *path)
{
        return path->mnt->mnt_root == path->dentry;
}

static void warn_mandlock(void)
{
        pr_warn_once("=======================================================\n"
                     "WARNING: The mand mount option has been deprecated and\n"
                     "         and is ignored by this kernel. Remove the mand\n"
                     "         option from the mount to silence this warning.\n"
                     "=======================================================\n");
}

static int can_umount(const struct path *path, int flags)
{
        struct mount *mnt = real_mount(path->mnt);

        if (!may_mount())
                return -EPERM;
        if (!path_mounted(path))
                return -EINVAL;
        if (!check_mnt(mnt))
                return -EINVAL;
        if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                return -EINVAL;
        if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
                return -EPERM;
        return 0;
}

// caller is responsible for flags being sane
int path_umount(struct path *path, int flags)
{
        struct mount *mnt = real_mount(path->mnt);
        int ret;

        ret = can_umount(path, flags);
        if (!ret)
                ret = do_umount(mnt, flags);

        /* we mustn't call path_put() as that would clear mnt_expiry_mark */
        dput(path->dentry);
        mntput_no_expire(mnt);
        return ret;
}

static int ksys_umount(char __user *name, int flags)
{
        int lookup_flags = LOOKUP_MOUNTPOINT;
        struct path path;
        int ret;

        // basic validity checks done first
        if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
                return -EINVAL;

        if (!(flags & UMOUNT_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;
        ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
        if (ret)
                return ret;
        return path_umount(&path, flags);
}

SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
{
        return ksys_umount(name, flags);
}

#ifdef __ARCH_WANT_SYS_OLDUMOUNT

/*
 *        The 2.0 compatible umount. No flags.
 */
SYSCALL_DEFINE1(oldumount, char __user *, name)
{
        return ksys_umount(name, 0);
}

#endif

static bool is_mnt_ns_file(struct dentry *dentry)
{
        /* Is this a proxy for a mount namespace? */
        return dentry->d_op == &ns_dentry_operations &&
               dentry->d_fsdata == &mntns_operations;
}

static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
        return container_of(ns, struct mnt_namespace, ns);
}

struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
{
        return &mnt->ns;
}

static bool mnt_ns_loop(struct dentry *dentry)
{
        /* Could bind mounting the mount namespace inode cause a
         * mount namespace loop?
         */
        struct mnt_namespace *mnt_ns;
        if (!is_mnt_ns_file(dentry))
                return false;

        mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
        return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                        int flag)
{
        struct mount *res, *p, *q, *r, *parent;

        if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
                return ERR_PTR(-EINVAL);

        if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
                return ERR_PTR(-EINVAL);

        res = q = clone_mnt(mnt, dentry, flag);
        if (IS_ERR(q))
                return q;

        q->mnt_mountpoint = mnt->mnt_mountpoint;

        p = mnt;
        list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
                struct mount *s;
                if (!is_subdir(r->mnt_mountpoint, dentry))
                        continue;

                for (s = r; s; s = next_mnt(s, r)) {
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
                                if (s->mnt.mnt_flags & MNT_LOCKED) {
                                        /* Both unbindable and locked. */
                                        q = ERR_PTR(-EPERM);
                                        goto out;
                                } else {
                                        s = skip_mnt_tree(s);
                                        continue;
                                }
                        }
                        if (!(flag & CL_COPY_MNT_NS_FILE) &&
                            is_mnt_ns_file(s->mnt.mnt_root)) {
                                s = skip_mnt_tree(s);
                                continue;
                        }
                        while (p != s->mnt_parent) {
                                p = p->mnt_parent;
                                q = q->mnt_parent;
                        }
                        p = s;
                        parent = q;
                        q = clone_mnt(p, p->mnt.mnt_root, flag);
                        if (IS_ERR(q))
                                goto out;
                        lock_mount_hash();
                        list_add_tail(&q->mnt_list, &res->mnt_list);
                        attach_mnt(q, parent, p->mnt_mp, false);
                        unlock_mount_hash();
                }
        }
        return res;
out:
        if (res) {
                lock_mount_hash();
                umount_tree(res, UMOUNT_SYNC);
                unlock_mount_hash();
        }
        return q;
}

/* Caller should check returned pointer for errors */

struct vfsmount *collect_mounts(const struct path *path)
{
        struct mount *tree;
        namespace_lock();
        if (!check_mnt(real_mount(path->mnt)))
                tree = ERR_PTR(-EINVAL);
        else
                tree = copy_tree(real_mount(path->mnt), path->dentry,
                                 CL_COPY_ALL | CL_PRIVATE);
        namespace_unlock();
        if (IS_ERR(tree))
                return ERR_CAST(tree);
        return &tree->mnt;
}

static void free_mnt_ns(struct mnt_namespace *);
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);

void dissolve_on_fput(struct vfsmount *mnt)
{
        struct mnt_namespace *ns;
        namespace_lock();
        lock_mount_hash();
        ns = real_mount(mnt)->mnt_ns;
        if (ns) {
                if (is_anon_ns(ns))
                        umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
                else
                        ns = NULL;
        }
        unlock_mount_hash();
        namespace_unlock();
        if (ns)
                free_mnt_ns(ns);
}

void drop_collected_mounts(struct vfsmount *mnt)
{
        namespace_lock();
        lock_mount_hash();
        umount_tree(real_mount(mnt), 0);
        unlock_mount_hash();
        namespace_unlock();
}

static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
        struct mount *child;

        list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
                if (!is_subdir(child->mnt_mountpoint, dentry))
                        continue;

                if (child->mnt.mnt_flags & MNT_LOCKED)
                        return true;
        }
        return false;
}

/**
 * clone_private_mount - create a private clone of a path
 * @path: path to clone
 *
 * This creates a new vfsmount, which will be the clone of @path.  The new mount
 * will not be attached anywhere in the namespace and will be private (i.e.
 * changes to the originating mount won't be propagated into this).
 *
 * Release with mntput().
 */
struct vfsmount *clone_private_mount(const struct path *path)
{
        struct mount *old_mnt = real_mount(path->mnt);
        struct mount *new_mnt;

        down_read(&namespace_sem);
        if (IS_MNT_UNBINDABLE(old_mnt))
                goto invalid;

        if (!check_mnt(old_mnt))
                goto invalid;

        if (has_locked_children(old_mnt, path->dentry))
                goto invalid;

        new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
        up_read(&namespace_sem);

        if (IS_ERR(new_mnt))
                return ERR_CAST(new_mnt);

        /* Longterm mount to be removed by kern_unmount*() */
        new_mnt->mnt_ns = MNT_NS_INTERNAL;

        return &new_mnt->mnt;

invalid:
        up_read(&namespace_sem);
        return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);

int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
                   struct vfsmount *root)
{
        struct mount *mnt;
        int res = f(root, arg);
        if (res)
                return res;
        list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
                res = f(&mnt->mnt, arg);
                if (res)
                        return res;
        }
        return 0;
}

static void lock_mnt_tree(struct mount *mnt)
{
        struct mount *p;

        for (p = mnt; p; p = next_mnt(p, mnt)) {
                int flags = p->mnt.mnt_flags;
                /* Don't allow unprivileged users to change mount flags */
                flags |= MNT_LOCK_ATIME;

                if (flags & MNT_READONLY)
                        flags |= MNT_LOCK_READONLY;

                if (flags & MNT_NODEV)
                        flags |= MNT_LOCK_NODEV;

                if (flags & MNT_NOSUID)
                        flags |= MNT_LOCK_NOSUID;

                if (flags & MNT_NOEXEC)
                        flags |= MNT_LOCK_NOEXEC;
                /* Don't allow unprivileged users to reveal what is under a mount */
                if (list_empty(&p->mnt_expire))
                        flags |= MNT_LOCKED;
                p->mnt.mnt_flags = flags;
        }
}

static void cleanup_group_ids(struct mount *mnt, struct mount *end)
{
        struct mount *p;

        for (p = mnt; p != end; p = next_mnt(p, mnt)) {
                if (p->mnt_group_id && !IS_MNT_SHARED(p))
                        mnt_release_group_id(p);
        }
}

static int invent_group_ids(struct mount *mnt, bool recurse)
{
        struct mount *p;

        for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
                if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
                        int err = mnt_alloc_group_id(p);
                        if (err) {
                                cleanup_group_ids(mnt, p);
                                return err;
                        }
                }
        }

        return 0;
}

int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
{
        unsigned int max = READ_ONCE(sysctl_mount_max);
        unsigned int mounts = 0;
        struct mount *p;

        if (ns->nr_mounts >= max)
                return -ENOSPC;
        max -= ns->nr_mounts;
        if (ns->pending_mounts >= max)
                return -ENOSPC;
        max -= ns->pending_mounts;

        for (p = mnt; p; p = next_mnt(p, mnt))
                mounts++;

        if (mounts > max)
                return -ENOSPC;

        ns->pending_mounts += mounts;
        return 0;
}

enum mnt_tree_flags_t {
        MNT_TREE_MOVE = BIT(0),
        MNT_TREE_BENEATH = BIT(1),
};

/**
 * attach_recursive_mnt - attach a source mount tree
 * @source_mnt: mount tree to be attached
 * @top_mnt:    mount that @source_mnt will be mounted on or mounted beneath
 * @dest_mp:    the mountpoint @source_mnt will be mounted at
 * @flags:      modify how @source_mnt is supposed to be attached
 *
 *  NOTE: in the table below explains the semantics when a source mount
 *  of a given type is attached to a destination mount of a given type.
 * ---------------------------------------------------------------------------
 * |         BIND MOUNT OPERATION                                            |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
 * ***************************************************************************
 * A bind operation clones the source mount and mounts the clone on the
 * destination mount.
 *
 * (++)  the cloned mount is propagated to all the mounts in the propagation
 *          tree of the destination mount and the cloned mount is added to
 *          the peer group of the source mount.
 * (+)   the cloned mount is created under the destination mount and is marked
 *       as shared. The cloned mount is added to the peer group of the source
 *       mount.
 * (+++) the mount is propagated to all the mounts in the propagation tree
 *       of the destination mount and the cloned mount is made slave
 *       of the same master as that of the source mount. The cloned mount
 *       is marked as 'shared and slave'.
 * (*)   the cloned mount is made a slave of the same master as that of the
 *          source mount.
 *
 * ---------------------------------------------------------------------------
 * |                         MOVE MOUNT OPERATION                                 |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
 * ***************************************************************************
 *
 * (+)  the mount is moved to the destination. And is then propagated to
 *         all the mounts in the propagation tree of the destination mount.
 * (+*)  the mount is moved to the destination.
 * (+++)  the mount is moved to the destination and is then propagated to
 *         all the mounts belonging to the destination mount's propagation tree.
 *         the mount is marked as 'shared and slave'.
 * (*)        the mount continues to be a slave at the new location.
 *
 * if the source mount is a tree, the operations explained above is
 * applied to each mount in the tree.
 * Must be called without spinlocks held, since this function can sleep
 * in allocations.
 *
 * Context: The function expects namespace_lock() to be held.
 * Return: If @source_mnt was successfully attached 0 is returned.
 *         Otherwise a negative error code is returned.
 */
static int attach_recursive_mnt(struct mount *source_mnt,
                                struct mount *top_mnt,
                                struct mountpoint *dest_mp,
                                enum mnt_tree_flags_t flags)
{
        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        HLIST_HEAD(tree_list);
        struct mnt_namespace *ns = top_mnt->mnt_ns;
        struct mountpoint *smp;
        struct mount *child, *dest_mnt, *p;
        struct hlist_node *n;
        int err = 0;
        bool moving = flags & MNT_TREE_MOVE, beneath = flags & MNT_TREE_BENEATH;

        /*
         * Preallocate a mountpoint in case the new mounts need to be
         * mounted beneath mounts on the same mountpoint.
         */
        smp = get_mountpoint(source_mnt->mnt.mnt_root);
        if (IS_ERR(smp))
                return PTR_ERR(smp);

        /* Is there space to add these mounts to the mount namespace? */
        if (!moving) {
                err = count_mounts(ns, source_mnt);
                if (err)
                        goto out;
        }

        if (beneath)
                dest_mnt = top_mnt->mnt_parent;
        else
                dest_mnt = top_mnt;

        if (IS_MNT_SHARED(dest_mnt)) {
                err = invent_group_ids(source_mnt, true);
                if (err)
                        goto out;
                err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
        }
        lock_mount_hash();
        if (err)
                goto out_cleanup_ids;

        if (IS_MNT_SHARED(dest_mnt)) {
                for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                        set_mnt_shared(p);
        }

        if (moving) {
                if (beneath)
                        dest_mp = smp;
                unhash_mnt(source_mnt);
                attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
                if (source_mnt->mnt_ns) {
                        LIST_HEAD(head);

                        /* move from anon - the caller will destroy */
                        for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                                move_from_ns(p, &head);
                        list_del_init(&head);
                }
                if (beneath)
                        mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp);
                else
                        mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
                commit_tree(source_mnt);
        }

        hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
                struct mount *q;
                hlist_del_init(&child->mnt_hash);
                q = __lookup_mnt(&child->mnt_parent->mnt,
                                 child->mnt_mountpoint);
                if (q)
                        mnt_change_mountpoint(child, smp, q);
                /* Notice when we are propagating across user namespaces */
                if (child->mnt_parent->mnt_ns->user_ns != user_ns)
                        lock_mnt_tree(child);
                child->mnt.mnt_flags &= ~MNT_LOCKED;
                commit_tree(child);
        }
        put_mountpoint(smp);
        unlock_mount_hash();

        return 0;

 out_cleanup_ids:
        while (!hlist_empty(&tree_list)) {
                child = hlist_entry(tree_list.first, struct mount, mnt_hash);
                child->mnt_parent->mnt_ns->pending_mounts = 0;
                umount_tree(child, UMOUNT_SYNC);
        }
        unlock_mount_hash();
        cleanup_group_ids(source_mnt, NULL);
 out:
        ns->pending_mounts = 0;

        read_seqlock_excl(&mount_lock);
        put_mountpoint(smp);
        read_sequnlock_excl(&mount_lock);

        return err;
}

/**
 * do_lock_mount - lock mount and mountpoint
 * @path:    target path
 * @beneath: whether the intention is to mount beneath @path
 *
 * Follow the mount stack on @path until the top mount @mnt is found. If
 * the initial @path->{mnt,dentry} is a mountpoint lookup the first
 * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root}
 * until nothing is stacked on top of it anymore.
 *
 * Acquire the inode_lock() on the top mount's ->mnt_root to protect
 * against concurrent removal of the new mountpoint from another mount
 * namespace.
 *
 * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint
 * @mp on @mnt->mnt_parent must be acquired. This protects against a
 * concurrent unlink of @mp->mnt_dentry from another mount namespace
 * where @mnt doesn't have a child mount mounted @mp. A concurrent
 * removal of @mnt->mnt_root doesn't matter as nothing will be mounted
 * on top of it for @beneath.
 *
 * In addition, @beneath needs to make sure that @mnt hasn't been
 * unmounted or moved from its current mountpoint in between dropping
 * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt
 * being unmounted would be detected later by e.g., calling
 * check_mnt(mnt) in the function it's called from. For the @beneath
 * case however, it's useful to detect it directly in do_lock_mount().
 * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points
 * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will
 * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL.
 *
 * Return: Either the target mountpoint on the top mount or the top
 *         mount's mountpoint.
 */
static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
{
        struct vfsmount *mnt = path->mnt;
        struct dentry *dentry;
        struct mountpoint *mp = ERR_PTR(-ENOENT);

        for (;;) {
                struct mount *m;

                if (beneath) {
                        m = real_mount(mnt);
                        read_seqlock_excl(&mount_lock);
                        dentry = dget(m->mnt_mountpoint);
                        read_sequnlock_excl(&mount_lock);
                } else {
                        dentry = path->dentry;
                }

                inode_lock(dentry->d_inode);
                if (unlikely(cant_mount(dentry))) {
                        inode_unlock(dentry->d_inode);
                        goto out;
                }

                namespace_lock();

                if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
                        namespace_unlock();
                        inode_unlock(dentry->d_inode);
                        goto out;
                }

                mnt = lookup_mnt(path);
                if (likely(!mnt))
                        break;

                namespace_unlock();
                inode_unlock(dentry->d_inode);
                if (beneath)
                        dput(dentry);
                path_put(path);
                path->mnt = mnt;
                path->dentry = dget(mnt->mnt_root);
        }

        mp = get_mountpoint(dentry);
        if (IS_ERR(mp)) {
                namespace_unlock();
                inode_unlock(dentry->d_inode);
        }

out:
        if (beneath)
                dput(dentry);

        return mp;
}

static inline struct mountpoint *lock_mount(struct path *path)
{
        return do_lock_mount(path, false);
}

static void unlock_mount(struct mountpoint *where)
{
        struct dentry *dentry = where->m_dentry;

        read_seqlock_excl(&mount_lock);
        put_mountpoint(where);
        read_sequnlock_excl(&mount_lock);

        namespace_unlock();
        inode_unlock(dentry->d_inode);
}

static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
{
        if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
                return -EINVAL;

        if (d_is_dir(mp->m_dentry) !=
              d_is_dir(mnt->mnt.mnt_root))
                return -ENOTDIR;

        return attach_recursive_mnt(mnt, p, mp, 0);
}

/*
 * Sanity check the flags to change_mnt_propagation.
 */

static int flags_to_propagation_type(int ms_flags)
{
        int type = ms_flags & ~(MS_REC | MS_SILENT);

        /* Fail if any non-propagation flags are set */
        if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
                return 0;
        /* Only one propagation flag should be set */
        if (!is_power_of_2(type))
                return 0;
        return type;
}

/*
 * recursively change the type of the mountpoint.
 */
static int do_change_type(struct path *path, int ms_flags)
{
        struct mount *m;
        struct mount *mnt = real_mount(path->mnt);
        int recurse = ms_flags & MS_REC;
        int type;
        int err = 0;

        if (!path_mounted(path))
                return -EINVAL;

        type = flags_to_propagation_type(ms_flags);
        if (!type)
                return -EINVAL;

        namespace_lock();
        if (type == MS_SHARED) {
                err = invent_group_ids(mnt, recurse);
                if (err)
                        goto out_unlock;
        }

        lock_mount_hash();
        for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
                change_mnt_propagation(m, type);
        unlock_mount_hash();

 out_unlock:
        namespace_unlock();
        return err;
}

static struct mount *__do_loopback(struct path *old_path, int recurse)
{
        struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);

        if (IS_MNT_UNBINDABLE(old))
                return mnt;

        if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
                return mnt;

        if (!recurse && has_locked_children(old, old_path->dentry))
                return mnt;

        if (recurse)
                mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
        else
                mnt = clone_mnt(old, old_path->dentry, 0);

        if (!IS_ERR(mnt))
                mnt->mnt.mnt_flags &= ~MNT_LOCKED;

        return mnt;
}

/*
 * do loopback mount.
 */
static int do_loopback(struct path *path, const char *old_name,
                                int recurse)
{
        struct path old_path;
        struct mount *mnt = NULL, *parent;
        struct mountpoint *mp;
        int err;
        if (!old_name || !*old_name)
                return -EINVAL;
        err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
        if (err)
                return err;

        err = -EINVAL;
        if (mnt_ns_loop(old_path.dentry))
                goto out;

        mp = lock_mount(path);
        if (IS_ERR(mp)) {
                err = PTR_ERR(mp);
                goto out;
        }

        parent = real_mount(path->mnt);
        if (!check_mnt(parent))
                goto out2;

        mnt = __do_loopback(&old_path, recurse);
        if (IS_ERR(mnt)) {
                err = PTR_ERR(mnt);
                goto out2;
        }

        err = graft_tree(mnt, parent, mp);
        if (err) {
                lock_mount_hash();
                umount_tree(mnt, UMOUNT_SYNC);
                unlock_mount_hash();
        }
out2:
        unlock_mount(mp);
out:
        path_put(&old_path);
        return err;
}

static struct file *open_detached_copy(struct path *path, bool recursive)
{
        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
        struct mount *mnt, *p;
        struct file *file;

        if (IS_ERR(ns))
                return ERR_CAST(ns);

        namespace_lock();
        mnt = __do_loopback(path, recursive);
        if (IS_ERR(mnt)) {
                namespace_unlock();
                free_mnt_ns(ns);
                return ERR_CAST(mnt);
        }

        lock_mount_hash();
        for (p = mnt; p; p = next_mnt(p, mnt)) {
                mnt_add_to_ns(ns, p);
                ns->nr_mounts++;
        }
        ns->root = mnt;
        mntget(&mnt->mnt);
        unlock_mount_hash();
        namespace_unlock();

        mntput(path->mnt);
        path->mnt = &mnt->mnt;
        file = dentry_open(path, O_PATH, current_cred());
        if (IS_ERR(file))
                dissolve_on_fput(path->mnt);
        else
                file->f_mode |= FMODE_NEED_UNMOUNT;
        return file;
}

SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
{
        struct file *file;
        struct path path;
        int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
        bool detached = flags & OPEN_TREE_CLONE;
        int error;
        int fd;

        BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);

        if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
                      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
                      OPEN_TREE_CLOEXEC))
                return -EINVAL;

        if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
                return -EINVAL;

        if (flags & AT_NO_AUTOMOUNT)
                lookup_flags &= ~LOOKUP_AUTOMOUNT;
        if (flags & AT_SYMLINK_NOFOLLOW)
                lookup_flags &= ~LOOKUP_FOLLOW;
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;

        if (detached && !may_mount())
                return -EPERM;

        fd = get_unused_fd_flags(flags & O_CLOEXEC);
        if (fd < 0)
                return fd;

        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (unlikely(error)) {
                file = ERR_PTR(error);
        } else {
                if (detached)
                        file = open_detached_copy(&path, flags & AT_RECURSIVE);
                else
                        file = dentry_open(&path, O_PATH, current_cred());
                path_put(&path);
        }
        if (IS_ERR(file)) {
                put_unused_fd(fd);
                return PTR_ERR(file);
        }
        fd_install(fd, file);
        return fd;
}

/*
 * Don't allow locked mount flags to be cleared.
 *
 * No locks need to be held here while testing the various MNT_LOCK
 * flags because those flags can never be cleared once they are set.
 */
static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
{
        unsigned int fl = mnt->mnt.mnt_flags;

        if ((fl & MNT_LOCK_READONLY) &&
            !(mnt_flags & MNT_READONLY))
                return false;

        if ((fl & MNT_LOCK_NODEV) &&
            !(mnt_flags & MNT_NODEV))
                return false;

        if ((fl & MNT_LOCK_NOSUID) &&
            !(mnt_flags & MNT_NOSUID))
                return false;

        if ((fl & MNT_LOCK_NOEXEC) &&
            !(mnt_flags & MNT_NOEXEC))
                return false;

        if ((fl & MNT_LOCK_ATIME) &&
            ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
                return false;

        return true;
}

static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
{
        bool readonly_request = (mnt_flags & MNT_READONLY);

        if (readonly_request == __mnt_is_readonly(&mnt->mnt))
                return 0;

        if (readonly_request)
                return mnt_make_readonly(mnt);

        mnt->mnt.mnt_flags &= ~MNT_READONLY;
        return 0;
}

static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
{
        mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
        mnt->mnt.mnt_flags = mnt_flags;
        touch_mnt_namespace(mnt->mnt_ns);
}

static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
{
        struct super_block *sb = mnt->mnt_sb;

        if (!__mnt_is_readonly(mnt) &&
           (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
           (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
                char *buf = (char *)__get_free_page(GFP_KERNEL);
                char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);

                pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n",
                        sb->s_type->name,
                        is_mounted(mnt) ? "remounted" : "mounted",
                        mntpath, &sb->s_time_max,
                        (unsigned long long)sb->s_time_max);

                free_page((unsigned long)buf);
                sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
        }
}

/*
 * Handle reconfiguration of the mountpoint only without alteration of the
 * superblock it refers to.  This is triggered by specifying MS_REMOUNT|MS_BIND
 * to mount(2).
 */
static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
{
        struct super_block *sb = path->mnt->mnt_sb;
        struct mount *mnt = real_mount(path->mnt);
        int ret;

        if (!check_mnt(mnt))
                return -EINVAL;

        if (!path_mounted(path))
                return -EINVAL;

        if (!can_change_locked_flags(mnt, mnt_flags))
                return -EPERM;

        /*
         * We're only checking whether the superblock is read-only not
         * changing it, so only take down_read(&sb->s_umount).
         */
        down_read(&sb->s_umount);
        lock_mount_hash();
        ret = change_mount_ro_state(mnt, mnt_flags);
        if (ret == 0)
                set_mount_attributes(mnt, mnt_flags);
        unlock_mount_hash();
        up_read(&sb->s_umount);

        mnt_warn_timestamp_expiry(path, &mnt->mnt);

        return ret;
}

/*
 * change filesystem flags. dir should be a physical root of filesystem.
 * If you've mounted a non-root directory somewhere and want to do remount
 * on it - tough luck.
 */
static int do_remount(struct path *path, int ms_flags, int sb_flags,
                      int mnt_flags, void *data)
{
        int err;
        struct super_block *sb = path->mnt->mnt_sb;
        struct mount *mnt = real_mount(path->mnt);
        struct fs_context *fc;

        if (!check_mnt(mnt))
                return -EINVAL;

        if (!path_mounted(path))
                return -EINVAL;

        if (!can_change_locked_flags(mnt, mnt_flags))
                return -EPERM;

        fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
        if (IS_ERR(fc))
                return PTR_ERR(fc);

        /*
         * Indicate to the filesystem that the remount request is coming
         * from the legacy mount system call.
         */
        fc->oldapi = true;

        err = parse_monolithic_mount_data(fc, data);
        if (!err) {
                down_write(&sb->s_umount);
                err = -EPERM;
                if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
                        err = reconfigure_super(fc);
                        if (!err) {
                                lock_mount_hash();
                                set_mount_attributes(mnt, mnt_flags);
                                unlock_mount_hash();
                        }
                }
                up_write(&sb->s_umount);
        }

        mnt_warn_timestamp_expiry(path, &mnt->mnt);

        put_fs_context(fc);
        return err;
}

static inline int tree_contains_unbindable(struct mount *mnt)
{
        struct mount *p;
        for (p = mnt; p; p = next_mnt(p, mnt)) {
                if (IS_MNT_UNBINDABLE(p))
                        return 1;
        }
        return 0;
}

/*
 * Check that there aren't references to earlier/same mount namespaces in the
 * specified subtree.  Such references can act as pins for mount namespaces
 * that aren't checked by the mount-cycle checking code, thereby allowing
 * cycles to be made.
 */
static bool check_for_nsfs_mounts(struct mount *subtree)
{
        struct mount *p;
        bool ret = false;

        lock_mount_hash();
        for (p = subtree; p; p = next_mnt(p, subtree))
                if (mnt_ns_loop(p->mnt.mnt_root))
                        goto out;

        ret = true;
out:
        unlock_mount_hash();
        return ret;
}

static int do_set_group(struct path *from_path, struct path *to_path)
{
        struct mount *from, *to;
        int err;

        from = real_mount(from_path->mnt);
        to = real_mount(to_path->mnt);

        namespace_lock();

        err = -EINVAL;
        /* To and From must be mounted */
        if (!is_mounted(&from->mnt))
                goto out;
        if (!is_mounted(&to->mnt))
                goto out;

        err = -EPERM;
        /* We should be allowed to modify mount namespaces of both mounts */
        if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
                goto out;
        if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
                goto out;

        err = -EINVAL;
        /* To and From paths should be mount roots */
        if (!path_mounted(from_path))
                goto out;
        if (!path_mounted(to_path))
                goto out;

        /* Setting sharing groups is only allowed across same superblock */
        if (from->mnt.mnt_sb != to->mnt.mnt_sb)
                goto out;

        /* From mount root should be wider than To mount root */
        if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
                goto out;

        /* From mount should not have locked children in place of To's root */
        if (has_locked_children(from, to->mnt.mnt_root))
                goto out;

        /* Setting sharing groups is only allowed on private mounts */
        if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
                goto out;

        /* From should not be private */
        if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
                goto out;

        if (IS_MNT_SLAVE(from)) {
                struct mount *m = from->mnt_master;

                list_add(&to->mnt_slave, &m->mnt_slave_list);
                to->mnt_master = m;
        }

        if (IS_MNT_SHARED(from)) {
                to->mnt_group_id = from->mnt_group_id;
                list_add(&to->mnt_share, &from->mnt_share);
                lock_mount_hash();
                set_mnt_shared(to);
                unlock_mount_hash();
        }

        err = 0;
out:
        namespace_unlock();
        return err;
}

/**
 * path_overmounted - check if path is overmounted
 * @path: path to check
 *
 * Check if path is overmounted, i.e., if there's a mount on top of
 * @path->mnt with @path->dentry as mountpoint.
 *
 * Context: This function expects namespace_lock() to be held.
 * Return: If path is overmounted true is returned, false if not.
 */
static inline bool path_overmounted(const struct path *path)
{
        rcu_read_lock();
        if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
                rcu_read_unlock();
                return true;
        }
        rcu_read_unlock();
        return false;
}

/**
 * can_move_mount_beneath - check that we can mount beneath the top mount
 * @from: mount to mount beneath
 * @to:   mount under which to mount
 * @mp:   mountpoint of @to
 *
 * - Make sure that @to->dentry is actually the root of a mount under
 *   which we can mount another mount.
 * - Make sure that nothing can be mounted beneath the caller's current
 *   root or the rootfs of the namespace.
 * - Make sure that the caller can unmount the topmost mount ensuring
 *   that the caller could reveal the underlying mountpoint.
 * - Ensure that nothing has been mounted on top of @from before we
 *   grabbed @namespace_sem to avoid creating pointless shadow mounts.
 * - Prevent mounting beneath a mount if the propagation relationship
 *   between the source mount, parent mount, and top mount would lead to
 *   nonsensical mount trees.
 *
 * Context: This function expects namespace_lock() to be held.
 * Return: On success 0, and on error a negative error code is returned.
 */
static int can_move_mount_beneath(const struct path *from,
                                  const struct path *to,
                                  const struct mountpoint *mp)
{
        struct mount *mnt_from = real_mount(from->mnt),
                     *mnt_to = real_mount(to->mnt),
                     *parent_mnt_to = mnt_to->mnt_parent;

        if (!mnt_has_parent(mnt_to))
                return -EINVAL;

        if (!path_mounted(to))
                return -EINVAL;

        if (IS_MNT_LOCKED(mnt_to))
                return -EINVAL;

        /* Avoid creating shadow mounts during mount propagation. */
        if (path_overmounted(from))
                return -EINVAL;

        /*
         * Mounting beneath the rootfs only makes sense when the
         * semantics of pivot_root(".", ".") are used.
         */
        if (&mnt_to->mnt == current->fs->root.mnt)
                return -EINVAL;
        if (parent_mnt_to == current->nsproxy->mnt_ns->root)
                return -EINVAL;

        for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent)
                if (p == mnt_to)
                        return -EINVAL;

        /*
         * If the parent mount propagates to the child mount this would
         * mean mounting @mnt_from on @mnt_to->mnt_parent and then
         * propagating a copy @c of @mnt_from on top of @mnt_to. This
         * defeats the whole purpose of mounting beneath another mount.
         */
        if (propagation_would_overmount(parent_mnt_to, mnt_to, mp))
                return -EINVAL;

        /*
         * If @mnt_to->mnt_parent propagates to @mnt_from this would
         * mean propagating a copy @c of @mnt_from on top of @mnt_from.
         * Afterwards @mnt_from would be mounted on top of
         * @mnt_to->mnt_parent and @mnt_to would be unmounted from
         * @mnt->mnt_parent and remounted on @mnt_from. But since @c is
         * already mounted on @mnt_from, @mnt_to would ultimately be
         * remounted on top of @c. Afterwards, @mnt_from would be
         * covered by a copy @c of @mnt_from and @c would be covered by
         * @mnt_from itself. This defeats the whole purpose of mounting
         * @mnt_from beneath @mnt_to.
         */
        if (propagation_would_overmount(parent_mnt_to, mnt_from, mp))
                return -EINVAL;

        return 0;
}

static int do_move_mount(struct path *old_path, struct path *new_path,
                         bool beneath)
{
        struct mnt_namespace *ns;
        struct mount *p;
        struct mount *old;
        struct mount *parent;
        struct mountpoint *mp, *old_mp;
        int err;
        bool attached;
        enum mnt_tree_flags_t flags = 0;

        mp = do_lock_mount(new_path, beneath);
        if (IS_ERR(mp))
                return PTR_ERR(mp);

        old = real_mount(old_path->mnt);
        p = real_mount(new_path->mnt);
        parent = old->mnt_parent;
        attached = mnt_has_parent(old);
        if (attached)
                flags |= MNT_TREE_MOVE;
        old_mp = old->mnt_mp;
        ns = old->mnt_ns;

        err = -EINVAL;
        /* The mountpoint must be in our namespace. */
        if (!check_mnt(p))
                goto out;

        /* The thing moved must be mounted... */
        if (!is_mounted(&old->mnt))
                goto out;

        /* ... and either ours or the root of anon namespace */
        if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
                goto out;

        if (old->mnt.mnt_flags & MNT_LOCKED)
                goto out;

        if (!path_mounted(old_path))
                goto out;

        if (d_is_dir(new_path->dentry) !=
            d_is_dir(old_path->dentry))
                goto out;
        /*
         * Don't move a mount residing in a shared parent.
         */
        if (attached && IS_MNT_SHARED(parent))
                goto out;

        if (beneath) {
                err = can_move_mount_beneath(old_path, new_path, mp);
                if (err)
                        goto out;

                err = -EINVAL;
                p = p->mnt_parent;
                flags |= MNT_TREE_BENEATH;
        }

        /*
         * Don't move a mount tree containing unbindable mounts to a destination
         * mount which is shared.
         */
        if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
                goto out;
        err = -ELOOP;
        if (!check_for_nsfs_mounts(old))
                goto out;
        for (; mnt_has_parent(p); p = p->mnt_parent)
                if (p == old)
                        goto out;

        err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags);
        if (err)
                goto out;

        /* if the mount is moved, it should no longer be expire
         * automatically */
        list_del_init(&old->mnt_expire);
        if (attached)
                put_mountpoint(old_mp);
out:
        unlock_mount(mp);
        if (!err) {
                if (attached)
                        mntput_no_expire(parent);
                else
                        free_mnt_ns(ns);
        }
        return err;
}

static int do_move_mount_old(struct path *path, const char *old_name)
{
        struct path old_path;
        int err;

        if (!old_name || !*old_name)
                return -EINVAL;

        err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
        if (err)
                return err;

        err = do_move_mount(&old_path, path, false);
        path_put(&old_path);
        return err;
}

/*
 * add a mount into a namespace's mount tree
 */
static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
                        const struct path *path, int mnt_flags)
{
        struct mount *parent = real_mount(path->mnt);

        mnt_flags &= ~MNT_INTERNAL_FLAGS;

        if (unlikely(!check_mnt(parent))) {
                /* that's acceptable only for automounts done in private ns */
                if (!(mnt_flags & MNT_SHRINKABLE))
                        return -EINVAL;
                /* ... and for those we'd better have mountpoint still alive */
                if (!parent->mnt_ns)
                        return -EINVAL;
        }

        /* Refuse the same filesystem on the same mount point */
        if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path))
                return -EBUSY;

        if (d_is_symlink(newmnt->mnt.mnt_root))
                return -EINVAL;

        newmnt->mnt.mnt_flags = mnt_flags;
        return graft_tree(newmnt, parent, mp);
}

static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);

/*
 * Create a new mount using a superblock configuration and request it
 * be added to the namespace tree.
 */
static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
                           unsigned int mnt_flags)
{
        struct vfsmount *mnt;
        struct mountpoint *mp;
        struct super_block *sb = fc->root->d_sb;
        int error;

        error = security_sb_kern_mount(sb);
        if (!error && mount_too_revealing(sb, &mnt_flags))
                error = -EPERM;

        if (unlikely(error)) {
                fc_drop_locked(fc);
                return error;
        }

        up_write(&sb->s_umount);

        mnt = vfs_create_mount(fc);
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);

        mnt_warn_timestamp_expiry(mountpoint, mnt);

        mp = lock_mount(mountpoint);
        if (IS_ERR(mp)) {
                mntput(mnt);
                return PTR_ERR(mp);
        }
        error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
        unlock_mount(mp);
        if (error < 0)
                mntput(mnt);
        return error;
}

/*
 * create a new mount for userspace and request it to be added into the
 * namespace's tree
 */
static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
                        int mnt_flags, const char *name, void *data)
{
        struct file_system_type *type;
        struct fs_context *fc;
        const char *subtype = NULL;
        int err = 0;

        if (!fstype)
                return -EINVAL;

        type = get_fs_type(fstype);
        if (!type)
                return -ENODEV;

        if (type->fs_flags & FS_HAS_SUBTYPE) {
                subtype = strchr(fstype, '.');
                if (subtype) {
                        subtype++;
                        if (!*subtype) {
                                put_filesystem(type);
                                return -EINVAL;
                        }
                }
        }

        fc = fs_context_for_mount(type, sb_flags);
        put_filesystem(type);
        if (IS_ERR(fc))
                return PTR_ERR(fc);

        /*
         * Indicate to the filesystem that the mount request is coming
         * from the legacy mount system call.
         */
        fc->oldapi = true;

        if (subtype)
                err = vfs_parse_fs_string(fc, "subtype",
                                          subtype, strlen(subtype));
        if (!err && name)
                err = vfs_parse_fs_string(fc, "source", name, strlen(name));
        if (!err)
                err = parse_monolithic_mount_data(fc, data);
        if (!err && !mount_capable(fc))
                err = -EPERM;
        if (!err)
                err = vfs_get_tree(fc);
        if (!err)
                err = do_new_mount_fc(fc, path, mnt_flags);

        put_fs_context(fc);
        return err;
}

int finish_automount(struct vfsmount *m, const struct path *path)
{
        struct dentry *dentry = path->dentry;
        struct mountpoint *mp;
        struct mount *mnt;
        int err;

        if (!m)
                return 0;
        if (IS_ERR(m))
                return PTR_ERR(m);

        mnt = real_mount(m);
        /* The new mount record should have at least 2 refs to prevent it being
         * expired before we get a chance to add it
         */
        BUG_ON(mnt_get_count(mnt) < 2);

        if (m->mnt_sb == path->mnt->mnt_sb &&
            m->mnt_root == dentry) {
                err = -ELOOP;
                goto discard;
        }

        /*
         * we don't want to use lock_mount() - in this case finding something
         * that overmounts our mountpoint to be means "quitely drop what we've
         * got", not "try to mount it on top".
         */
        inode_lock(dentry->d_inode);
        namespace_lock();
        if (unlikely(cant_mount(dentry))) {
                err = -ENOENT;
                goto discard_locked;
        }
        if (path_overmounted(path)) {
                err = 0;
                goto discard_locked;
        }
        mp = get_mountpoint(dentry);
        if (IS_ERR(mp)) {
                err = PTR_ERR(mp);
                goto discard_locked;
        }

        err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
        unlock_mount(mp);
        if (unlikely(err))
                goto discard;
        mntput(m);
        return 0;

discard_locked:
        namespace_unlock();
        inode_unlock(dentry->d_inode);
discard:
        /* remove m from any expiration list it may be on */
        if (!list_empty(&mnt->mnt_expire)) {
                namespace_lock();
                list_del_init(&mnt->mnt_expire);
                namespace_unlock();
        }
        mntput(m);
        mntput(m);
        return err;
}

/**
 * mnt_set_expiry - Put a mount on an expiration list
 * @mnt: The mount to list.
 * @expiry_list: The list to add the mount to.
 */
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
        namespace_lock();

        list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);

        namespace_unlock();
}
EXPORT_SYMBOL(mnt_set_expiry);

/*
 * process a list of expirable mountpoints with the intent of discarding any
 * mountpoints that aren't in use and haven't been touched since last we came
 * here
 */
void mark_mounts_for_expiry(struct list_head *mounts)
{
        struct mount *mnt, *next;
        LIST_HEAD(graveyard);

        if (list_empty(mounts))
                return;

        namespace_lock();
        lock_mount_hash();

        /* extract from the expiration list every vfsmount that matches the
         * following criteria:
         * - only referenced by its parent vfsmount
         * - still marked for expiry (marked on the last call here; marks are
         *   cleared by mntput())
         */
        list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
                if (!xchg(&mnt->mnt_expiry_mark, 1) ||
                        propagate_mount_busy(mnt, 1))
                        continue;
                list_move(&mnt->mnt_expire, &graveyard);
        }
        while (!list_empty(&graveyard)) {
                mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
                touch_mnt_namespace(mnt->mnt_ns);
                umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
        }
        unlock_mount_hash();
        namespace_unlock();
}

EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);

/*
 * Ripoff of 'select_parent()'
 *
 * search the list of submounts for a given mountpoint, and move any
 * shrinkable submounts to the 'graveyard' list.
 */
static int select_submounts(struct mount *parent, struct list_head *graveyard)
{
        struct mount *this_parent = parent;
        struct list_head *next;
        int found = 0;

repeat:
        next = this_parent->mnt_mounts.next;
resume:
        while (next != &this_parent->mnt_mounts) {
                struct list_head *tmp = next;
                struct mount *mnt = list_entry(tmp, struct mount, mnt_child);

                next = tmp->next;
                if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
                        continue;
                /*
                 * Descend a level if the d_mounts list is non-empty.
                 */
                if (!list_empty(&mnt->mnt_mounts)) {
                        this_parent = mnt;
                        goto repeat;
                }

                if (!propagate_mount_busy(mnt, 1)) {
                        list_move_tail(&mnt->mnt_expire, graveyard);
                        found++;
                }
        }
        /*
         * All done at this level ... ascend and resume the search
         */
        if (this_parent != parent) {
                next = this_parent->mnt_child.next;
                this_parent = this_parent->mnt_parent;
                goto resume;
        }
        return found;
}

/*
 * process a list of expirable mountpoints with the intent of discarding any
 * submounts of a specific parent mountpoint
 *
 * mount_lock must be held for write
 */
static void shrink_submounts(struct mount *mnt)
{
        LIST_HEAD(graveyard);
        struct mount *m;

        /* extract submounts of 'mountpoint' from the expiration list */
        while (select_submounts(mnt, &graveyard)) {
                while (!list_empty(&graveyard)) {
                        m = list_first_entry(&graveyard, struct mount,
                                                mnt_expire);
                        touch_mnt_namespace(m->mnt_ns);
                        umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
                }
        }
}

static void *copy_mount_options(const void __user * data)
{
        char *copy;
        unsigned left, offset;

        if (!data)
                return NULL;

        copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
        if (!copy)
                return ERR_PTR(-ENOMEM);

        left = copy_from_user(copy, data, PAGE_SIZE);

        /*
         * Not all architectures have an exact copy_from_user(). Resort to
         * byte at a time.
         */
        offset = PAGE_SIZE - left;
        while (left) {
                char c;
                if (get_user(c, (const char __user *)data + offset))
                        break;
                copy[offset] = c;
                left--;
                offset++;
        }

        if (left == PAGE_SIZE) {
                kfree(copy);
                return ERR_PTR(-EFAULT);
        }

        return copy;
}

static char *copy_mount_string(const void __user *data)
{
        return data ? strndup_user(data, PATH_MAX) : NULL;
}

/*
 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
 *
 * data is a (void *) that can point to any structure up to
 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
 * information (or be NULL).
 *
 * Pre-0.97 versions of mount() didn't have a flags word.
 * When the flags word was introduced its top half was required
 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
 * Therefore, if this magic number is present, it carries no information
 * and must be discarded.
 */
int path_mount(const char *dev_name, struct path *path,
                const char *type_page, unsigned long flags, void *data_page)
{
        unsigned int mnt_flags = 0, sb_flags;
        int ret;

        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;

        /* Basic sanity checks */
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;

        if (flags & MS_NOUSER)
                return -EINVAL;

        ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
        if (ret)
                return ret;
        if (!may_mount())
                return -EPERM;
        if (flags & SB_MANDLOCK)
                warn_mandlock();

        /* Default to relatime unless overriden */
        if (!(flags & MS_NOATIME))
                mnt_flags |= MNT_RELATIME;

        /* Separate the per-mountpoint flags */
        if (flags & MS_NOSUID)
                mnt_flags |= MNT_NOSUID;
        if (flags & MS_NODEV)
                mnt_flags |= MNT_NODEV;
        if (flags & MS_NOEXEC)
                mnt_flags |= MNT_NOEXEC;
        if (flags & MS_NOATIME)
                mnt_flags |= MNT_NOATIME;
        if (flags & MS_NODIRATIME)
                mnt_flags |= MNT_NODIRATIME;
        if (flags & MS_STRICTATIME)
                mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
        if (flags & MS_RDONLY)
                mnt_flags |= MNT_READONLY;
        if (flags & MS_NOSYMFOLLOW)
                mnt_flags |= MNT_NOSYMFOLLOW;

        /* The default atime for remount is preservation */
        if ((flags & MS_REMOUNT) &&
            ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
                       MS_STRICTATIME)) == 0)) {
                mnt_flags &= ~MNT_ATIME_MASK;
                mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
        }

        sb_flags = flags & (SB_RDONLY |
                            SB_SYNCHRONOUS |
                            SB_MANDLOCK |
                            SB_DIRSYNC |
                            SB_SILENT |
                            SB_POSIXACL |
                            SB_LAZYTIME |
                            SB_I_VERSION);

        if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
                return do_reconfigure_mnt(path, mnt_flags);
        if (flags & MS_REMOUNT)
                return do_remount(path, flags, sb_flags, mnt_flags, data_page);
        if (flags & MS_BIND)
                return do_loopback(path, dev_name, flags & MS_REC);
        if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
                return do_change_type(path, flags);
        if (flags & MS_MOVE)
                return do_move_mount_old(path, dev_name);

        return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
                            data_page);
}

long do_mount(const char *dev_name, const char __user *dir_name,
                const char *type_page, unsigned long flags, void *data_page)
{
        struct path path;
        int ret;

        ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
        if (ret)
                return ret;
        ret = path_mount(dev_name, &path, type_page, flags, data_page);
        path_put(&path);
        return ret;
}

static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
{
        return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
}

static void dec_mnt_namespaces(struct ucounts *ucounts)
{
        dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
}

static void free_mnt_ns(struct mnt_namespace *ns)
{
        if (!is_anon_ns(ns))
                ns_free_inum(&ns->ns);
        dec_mnt_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
        kfree(ns);
}

/*
 * Assign a sequence number so we can detect when we attempt to bind
 * mount a reference to an older mount namespace into the current
 * mount namespace, preventing reference counting loops.  A 64bit
 * number incrementing at 10Ghz will take 12,427 years to wrap which
 * is effectively never, so we can ignore the possibility.
 */
static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);

static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
{
        struct mnt_namespace *new_ns;
        struct ucounts *ucounts;
        int ret;

        ucounts = inc_mnt_namespaces(user_ns);
        if (!ucounts)
                return ERR_PTR(-ENOSPC);

        new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
        if (!new_ns) {
                dec_mnt_namespaces(ucounts);
                return ERR_PTR(-ENOMEM);
        }
        if (!anon) {
                ret = ns_alloc_inum(&new_ns->ns);
                if (ret) {
                        kfree(new_ns);
                        dec_mnt_namespaces(ucounts);
                        return ERR_PTR(ret);
                }
        }
        new_ns->ns.ops = &mntns_operations;
        if (!anon)
                new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
        refcount_set(&new_ns->ns.count, 1);
        new_ns->mounts = RB_ROOT;
        init_waitqueue_head(&new_ns->poll);
        new_ns->user_ns = get_user_ns(user_ns);
        new_ns->ucounts = ucounts;
        return new_ns;
}

__latent_entropy
struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
                struct user_namespace *user_ns, struct fs_struct *new_fs)
{
        struct mnt_namespace *new_ns;
        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
        struct mount *p, *q;
        struct mount *old;
        struct mount *new;
        int copy_flags;

        BUG_ON(!ns);

        if (likely(!(flags & CLONE_NEWNS))) {
                get_mnt_ns(ns);
                return ns;
        }

        old = ns->root;

        new_ns = alloc_mnt_ns(user_ns, false);
        if (IS_ERR(new_ns))
                return new_ns;

        namespace_lock();
        /* First pass: copy the tree topology */
        copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
        if (user_ns != ns->user_ns)
                copy_flags |= CL_SHARED_TO_SLAVE;
        new = copy_tree(old, old->mnt.mnt_root, copy_flags);
        if (IS_ERR(new)) {
                namespace_unlock();
                free_mnt_ns(new_ns);
                return ERR_CAST(new);
        }
        if (user_ns != ns->user_ns) {
                lock_mount_hash();
                lock_mnt_tree(new);
                unlock_mount_hash();
        }
        new_ns->root = new;

        /*
         * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
         * as belonging to new namespace.  We have already acquired a private
         * fs_struct, so tsk->fs->lock is not needed.
         */
        p = old;
        q = new;
        while (p) {
                mnt_add_to_ns(new_ns, q);
                new_ns->nr_mounts++;
                if (new_fs) {
                        if (&p->mnt == new_fs->root.mnt) {
                                new_fs->root.mnt = mntget(&q->mnt);
                                rootmnt = &p->mnt;
                        }
                        if (&p->mnt == new_fs->pwd.mnt) {
                                new_fs->pwd.mnt = mntget(&q->mnt);
                                pwdmnt = &p->mnt;
                        }
                }
                p = next_mnt(p, old);
                q = next_mnt(q, new);
                if (!q)
                        break;
                // an mntns binding we'd skipped?
                while (p->mnt.mnt_root != q->mnt.mnt_root)
                        p = next_mnt(skip_mnt_tree(p), old);
        }
        namespace_unlock();

        if (rootmnt)
                mntput(rootmnt);
        if (pwdmnt)
                mntput(pwdmnt);

        return new_ns;
}

struct dentry *mount_subtree(struct vfsmount *m, const char *name)
{
        struct mount *mnt = real_mount(m);
        struct mnt_namespace *ns;
        struct super_block *s;
        struct path path;
        int err;

        ns = alloc_mnt_ns(&init_user_ns, true);
        if (IS_ERR(ns)) {
                mntput(m);
                return ERR_CAST(ns);
        }
        ns->root = mnt;
        ns->nr_mounts++;
        mnt_add_to_ns(ns, mnt);

        err = vfs_path_lookup(m->mnt_root, m,
                        name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);

        put_mnt_ns(ns);

        if (err)
                return ERR_PTR(err);

        /* trade a vfsmount reference for active sb one */
        s = path.mnt->mnt_sb;
        atomic_inc(&s->s_active);
        mntput(path.mnt);
        /* lock the sucker */
        down_write(&s->s_umount);
        /* ... and return the root of (sub)tree on it */
        return path.dentry;
}
EXPORT_SYMBOL(mount_subtree);

SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                char __user *, type, unsigned long, flags, void __user *, data)
{
        int ret;
        char *kernel_type;
        char *kernel_dev;
        void *options;

        kernel_type = copy_mount_string(type);
        ret = PTR_ERR(kernel_type);
        if (IS_ERR(kernel_type))
                goto out_type;

        kernel_dev = copy_mount_string(dev_name);
        ret = PTR_ERR(kernel_dev);
        if (IS_ERR(kernel_dev))
                goto out_dev;

        options = copy_mount_options(data);
        ret = PTR_ERR(options);
        if (IS_ERR(options))
                goto out_data;

        ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);

        kfree(options);
out_data:
        kfree(kernel_dev);
out_dev:
        kfree(kernel_type);
out_type:
        return ret;
}

#define FSMOUNT_VALID_FLAGS                                                    \
        (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |            \
         MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME |       \
         MOUNT_ATTR_NOSYMFOLLOW)

#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)

#define MOUNT_SETATTR_PROPAGATION_FLAGS \
        (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)

static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
{
        unsigned int mnt_flags = 0;

        if (attr_flags & MOUNT_ATTR_RDONLY)
                mnt_flags |= MNT_READONLY;
        if (attr_flags & MOUNT_ATTR_NOSUID)
                mnt_flags |= MNT_NOSUID;
        if (attr_flags & MOUNT_ATTR_NODEV)
                mnt_flags |= MNT_NODEV;
        if (attr_flags & MOUNT_ATTR_NOEXEC)
                mnt_flags |= MNT_NOEXEC;
        if (attr_flags & MOUNT_ATTR_NODIRATIME)
                mnt_flags |= MNT_NODIRATIME;
        if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
                mnt_flags |= MNT_NOSYMFOLLOW;

        return mnt_flags;
}

/*
 * Create a kernel mount representation for a new, prepared superblock
 * (specified by fs_fd) and attach to an open_tree-like file descriptor.
 */
SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
                unsigned int, attr_flags)
{
        struct mnt_namespace *ns;
        struct fs_context *fc;
        struct file *file;
        struct path newmount;
        struct mount *mnt;
        struct fd f;
        unsigned int mnt_flags = 0;
        long ret;

        if (!may_mount())
                return -EPERM;

        if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
                return -EINVAL;

        if (attr_flags & ~FSMOUNT_VALID_FLAGS)
                return -EINVAL;

        mnt_flags = attr_flags_to_mnt_flags(attr_flags);

        switch (attr_flags & MOUNT_ATTR__ATIME) {
        case MOUNT_ATTR_STRICTATIME:
                break;
        case MOUNT_ATTR_NOATIME:
                mnt_flags |= MNT_NOATIME;
                break;
        case MOUNT_ATTR_RELATIME:
                mnt_flags |= MNT_RELATIME;
                break;
        default:
                return -EINVAL;
        }

        f = fdget(fs_fd);
        if (!f.file)
                return -EBADF;

        ret = -EINVAL;
        if (f.file->f_op != &fscontext_fops)
                goto err_fsfd;

        fc = f.file->private_data;

        ret = mutex_lock_interruptible(&fc->uapi_mutex);
        if (ret < 0)
                goto err_fsfd;

        /* There must be a valid superblock or we can't mount it */
        ret = -EINVAL;
        if (!fc->root)
                goto err_unlock;

        ret = -EPERM;
        if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
                pr_warn("VFS: Mount too revealing\n");
                goto err_unlock;
        }

        ret = -EBUSY;
        if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
                goto err_unlock;

        if (fc->sb_flags & SB_MANDLOCK)
                warn_mandlock();

        newmount.mnt = vfs_create_mount(fc);
        if (IS_ERR(newmount.mnt)) {
                ret = PTR_ERR(newmount.mnt);
                goto err_unlock;
        }
        newmount.dentry = dget(fc->root);
        newmount.mnt->mnt_flags = mnt_flags;

        /* We've done the mount bit - now move the file context into more or
         * less the same state as if we'd done an fspick().  We don't want to
         * do any memory allocation or anything like that at this point as we
         * don't want to have to handle any errors incurred.
         */
        vfs_clean_context(fc);

        ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
        if (IS_ERR(ns)) {
                ret = PTR_ERR(ns);
                goto err_path;
        }
        mnt = real_mount(newmount.mnt);
        ns->root = mnt;
        ns->nr_mounts = 1;
        mnt_add_to_ns(ns, mnt);
        mntget(newmount.mnt);

        /* Attach to an apparent O_PATH fd with a note that we need to unmount
         * it, not just simply put it.
         */
        file = dentry_open(&newmount, O_PATH, fc->cred);
        if (IS_ERR(file)) {
                dissolve_on_fput(newmount.mnt);
                ret = PTR_ERR(file);
                goto err_path;
        }
        file->f_mode |= FMODE_NEED_UNMOUNT;

        ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
        if (ret >= 0)
                fd_install(ret, file);
        else
                fput(file);

err_path:
        path_put(&newmount);
err_unlock:
        mutex_unlock(&fc->uapi_mutex);
err_fsfd:
        fdput(f);
        return ret;
}

/*
 * Move a mount from one place to another.  In combination with
 * fsopen()/fsmount() this is used to install a new mount and in combination
 * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
 * a mount subtree.
 *
 * Note the flags value is a combination of MOVE_MOUNT_* flags.
 */
SYSCALL_DEFINE5(move_mount,
                int, from_dfd, const char __user *, from_pathname,
                int, to_dfd, const char __user *, to_pathname,
                unsigned int, flags)
{
        struct path from_path, to_path;
        unsigned int lflags;
        int ret = 0;

        if (!may_mount())
                return -EPERM;

        if (flags & ~MOVE_MOUNT__MASK)
                return -EINVAL;

        if ((flags & (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP)) ==
            (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP))
                return -EINVAL;

        /* If someone gives a pathname, they aren't permitted to move
         * from an fd that requires unmount as we can't get at the flag
         * to clear it afterwards.
         */
        lflags = 0;
        if (flags & MOVE_MOUNT_F_SYMLINKS)        lflags |= LOOKUP_FOLLOW;
        if (flags & MOVE_MOUNT_F_AUTOMOUNTS)        lflags |= LOOKUP_AUTOMOUNT;
        if (flags & MOVE_MOUNT_F_EMPTY_PATH)        lflags |= LOOKUP_EMPTY;

        ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
        if (ret < 0)
                return ret;

        lflags = 0;
        if (flags & MOVE_MOUNT_T_SYMLINKS)        lflags |= LOOKUP_FOLLOW;
        if (flags & MOVE_MOUNT_T_AUTOMOUNTS)        lflags |= LOOKUP_AUTOMOUNT;
        if (flags & MOVE_MOUNT_T_EMPTY_PATH)        lflags |= LOOKUP_EMPTY;

        ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
        if (ret < 0)
                goto out_from;

        ret = security_move_mount(&from_path, &to_path);
        if (ret < 0)
                goto out_to;

        if (flags & MOVE_MOUNT_SET_GROUP)
                ret = do_set_group(&from_path, &to_path);
        else
                ret = do_move_mount(&from_path, &to_path,
                                    (flags & MOVE_MOUNT_BENEATH));

out_to:
        path_put(&to_path);
out_from:
        path_put(&from_path);
        return ret;
}

/*
 * Return true if path is reachable from root
 *
 * namespace_sem or mount_lock is held
 */
bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
                         const struct path *root)
{
        while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
                dentry = mnt->mnt_mountpoint;
                mnt = mnt->mnt_parent;
        }
        return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
}

bool path_is_under(const struct path *path1, const struct path *path2)
{
        bool res;
        read_seqlock_excl(&mount_lock);
        res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
        read_sequnlock_excl(&mount_lock);
        return res;
}
EXPORT_SYMBOL(path_is_under);

/*
 * pivot_root Semantics:
 * Moves the root file system of the current process to the directory put_old,
 * makes new_root as the new root file system of the current process, and sets
 * root/cwd of all processes which had them on the current root to new_root.
 *
 * Restrictions:
 * The new_root and put_old must be directories, and  must not be on the
 * same file  system as the current process root. The put_old  must  be
 * underneath new_root,  i.e. adding a non-zero number of /.. to the string
 * pointed to by put_old must yield the same directory as new_root. No other
 * file system may be mounted on put_old. After all, new_root is a mountpoint.
 *
 * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
 * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
 * in this situation.
 *
 * Notes:
 *  - we don't move root/cwd if they are not at the root (reason: if something
 *    cared enough to change them, it's probably wrong to force them elsewhere)
 *  - it's okay to pick a root that isn't the root of a file system, e.g.
 *    /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
 *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
 *    first.
 */
SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
                const char __user *, put_old)
{
        struct path new, old, root;
        struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
        struct mountpoint *old_mp, *root_mp;
        int error;

        if (!may_mount())
                return -EPERM;

        error = user_path_at(AT_FDCWD, new_root,
                             LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
        if (error)
                goto out0;

        error = user_path_at(AT_FDCWD, put_old,
                             LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
        if (error)
                goto out1;

        error = security_sb_pivotroot(&old, &new);
        if (error)
                goto out2;

        get_fs_root(current->fs, &root);
        old_mp = lock_mount(&old);
        error = PTR_ERR(old_mp);
        if (IS_ERR(old_mp))
                goto out3;

        error = -EINVAL;
        new_mnt = real_mount(new.mnt);
        root_mnt = real_mount(root.mnt);
        old_mnt = real_mount(old.mnt);
        ex_parent = new_mnt->mnt_parent;
        root_parent = root_mnt->mnt_parent;
        if (IS_MNT_SHARED(old_mnt) ||
                IS_MNT_SHARED(ex_parent) ||
                IS_MNT_SHARED(root_parent))
                goto out4;
        if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
                goto out4;
        if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
                goto out4;
        error = -ENOENT;
        if (d_unlinked(new.dentry))
                goto out4;
        error = -EBUSY;
        if (new_mnt == root_mnt || old_mnt == root_mnt)
                goto out4; /* loop, on the same file system  */
        error = -EINVAL;
        if (!path_mounted(&root))
                goto out4; /* not a mountpoint */
        if (!mnt_has_parent(root_mnt))
                goto out4; /* not attached */
        if (!path_mounted(&new))
                goto out4; /* not a mountpoint */
        if (!mnt_has_parent(new_mnt))
                goto out4; /* not attached */
        /* make sure we can reach put_old from new_root */
        if (!is_path_reachable(old_mnt, old.dentry, &new))
                goto out4;
        /* make certain new is below the root */
        if (!is_path_reachable(new_mnt, new.dentry, &root))
                goto out4;
        lock_mount_hash();
        umount_mnt(new_mnt);
        root_mp = unhash_mnt(root_mnt);  /* we'll need its mountpoint */
        if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
                new_mnt->mnt.mnt_flags |= MNT_LOCKED;
                root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
        }
        /* mount old root on put_old */
        attach_mnt(root_mnt, old_mnt, old_mp, false);
        /* mount new_root on / */
        attach_mnt(new_mnt, root_parent, root_mp, false);
        mnt_add_count(root_parent, -1);
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        /* A moved mount should not expire automatically */
        list_del_init(&new_mnt->mnt_expire);
        put_mountpoint(root_mp);
        unlock_mount_hash();
        chroot_fs_refs(&root, &new);
        error = 0;
out4:
        unlock_mount(old_mp);
        if (!error)
                mntput_no_expire(ex_parent);
out3:
        path_put(&root);
out2:
        path_put(&old);
out1:
        path_put(&new);
out0:
        return error;
}

static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
{
        unsigned int flags = mnt->mnt.mnt_flags;

        /*  flags to clear */
        flags &= ~kattr->attr_clr;
        /* flags to raise */
        flags |= kattr->attr_set;

        return flags;
}

static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
        struct vfsmount *m = &mnt->mnt;
        struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;

        if (!kattr->mnt_idmap)
                return 0;

        /*
         * Creating an idmapped mount with the filesystem wide idmapping
         * doesn't make sense so block that. We don't allow mushy semantics.
         */
        if (kattr->mnt_userns == m->mnt_sb->s_user_ns)
                return -EINVAL;

        /*
         * Once a mount has been idmapped we don't allow it to change its
         * mapping. It makes things simpler and callers can just create
         * another bind-mount they can idmap if they want to.
         */
        if (is_idmapped_mnt(m))
                return -EPERM;

        /* The underlying filesystem doesn't support idmapped mounts yet. */
        if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
                return -EINVAL;

        /* We're not controlling the superblock. */
        if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
                return -EPERM;

        /* Mount has already been visible in the filesystem hierarchy. */
        if (!is_anon_ns(mnt->mnt_ns))
                return -EINVAL;

        return 0;
}

/**
 * mnt_allow_writers() - check whether the attribute change allows writers
 * @kattr: the new mount attributes
 * @mnt: the mount to which @kattr will be applied
 *
 * Check whether thew new mount attributes in @kattr allow concurrent writers.
 *
 * Return: true if writers need to be held, false if not
 */
static inline bool mnt_allow_writers(const struct mount_kattr *kattr,
                                     const struct mount *mnt)
{
        return (!(kattr->attr_set & MNT_READONLY) ||
                (mnt->mnt.mnt_flags & MNT_READONLY)) &&
               !kattr->mnt_idmap;
}

static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
{
        struct mount *m;
        int err;

        for (m = mnt; m; m = next_mnt(m, mnt)) {
                if (!can_change_locked_flags(m, recalc_flags(kattr, m))) {
                        err = -EPERM;
                        break;
                }

                err = can_idmap_mount(kattr, m);
                if (err)
                        break;

                if (!mnt_allow_writers(kattr, m)) {
                        err = mnt_hold_writers(m);
                        if (err)
                                break;
                }

                if (!kattr->recurse)
                        return 0;
        }

        if (err) {
                struct mount *p;

                /*
                 * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will
                 * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all
                 * mounts and needs to take care to include the first mount.
                 */
                for (p = mnt; p; p = next_mnt(p, mnt)) {
                        /* If we had to hold writers unblock them. */
                        if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
                                mnt_unhold_writers(p);

                        /*
                         * We're done once the first mount we changed got
                         * MNT_WRITE_HOLD unset.
                         */
                        if (p == m)
                                break;
                }
        }
        return err;
}

static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
        if (!kattr->mnt_idmap)
                return;

        /*
         * Pairs with smp_load_acquire() in mnt_idmap().
         *
         * Since we only allow a mount to change the idmapping once and
         * verified this in can_idmap_mount() we know that the mount has
         * @nop_mnt_idmap attached to it. So there's no need to drop any
         * references.
         */
        smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));
}

static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
{
        struct mount *m;

        for (m = mnt; m; m = next_mnt(m, mnt)) {
                unsigned int flags;

                do_idmap_mount(kattr, m);
                flags = recalc_flags(kattr, m);
                WRITE_ONCE(m->mnt.mnt_flags, flags);

                /* If we had to hold writers unblock them. */
                if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
                        mnt_unhold_writers(m);

                if (kattr->propagation)
                        change_mnt_propagation(m, kattr->propagation);
                if (!kattr->recurse)
                        break;
        }
        touch_mnt_namespace(mnt->mnt_ns);
}

static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
{
        struct mount *mnt = real_mount(path->mnt);
        int err = 0;

        if (!path_mounted(path))
                return -EINVAL;

        if (kattr->mnt_userns) {
                struct mnt_idmap *mnt_idmap;

                mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns);
                if (IS_ERR(mnt_idmap))
                        return PTR_ERR(mnt_idmap);
                kattr->mnt_idmap = mnt_idmap;
        }

        if (kattr->propagation) {
                /*
                 * Only take namespace_lock() if we're actually changing
                 * propagation.
                 */
                namespace_lock();
                if (kattr->propagation == MS_SHARED) {
                        err = invent_group_ids(mnt, kattr->recurse);
                        if (err) {
                                namespace_unlock();
                                return err;
                        }
                }
        }

        err = -EINVAL;
        lock_mount_hash();

        /* Ensure that this isn't anything purely vfs internal. */
        if (!is_mounted(&mnt->mnt))
                goto out;

        /*
         * If this is an attached mount make sure it's located in the callers
         * mount namespace. If it's not don't let the caller interact with it.
         *
         * If this mount doesn't have a parent it's most often simply a
         * detached mount with an anonymous mount namespace. IOW, something
         * that's simply not attached yet. But there are apparently also users
         * that do change mount properties on the rootfs itself. That obviously
         * neither has a parent nor is it a detached mount so we cannot
         * unconditionally check for detached mounts.
         */
        if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
                goto out;

        /*
         * First, we get the mount tree in a shape where we can change mount
         * properties without failure. If we succeeded to do so we commit all
         * changes and if we failed we clean up.
         */
        err = mount_setattr_prepare(kattr, mnt);
        if (!err)
                mount_setattr_commit(kattr, mnt);

out:
        unlock_mount_hash();

        if (kattr->propagation) {
                if (err)
                        cleanup_group_ids(mnt, NULL);
                namespace_unlock();
        }

        return err;
}

static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
                                struct mount_kattr *kattr, unsigned int flags)
{
        int err = 0;
        struct ns_common *ns;
        struct user_namespace *mnt_userns;
        struct fd f;

        if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
                return 0;

        /*
         * We currently do not support clearing an idmapped mount. If this ever
         * is a use-case we can revisit this but for now let's keep it simple
         * and not allow it.
         */
        if (attr->attr_clr & MOUNT_ATTR_IDMAP)
                return -EINVAL;

        if (attr->userns_fd > INT_MAX)
                return -EINVAL;

        f = fdget(attr->userns_fd);
        if (!f.file)
                return -EBADF;

        if (!proc_ns_file(f.file)) {
                err = -EINVAL;
                goto out_fput;
        }

        ns = get_proc_ns(file_inode(f.file));
        if (ns->ops->type != CLONE_NEWUSER) {
                err = -EINVAL;
                goto out_fput;
        }

        /*
         * The initial idmapping cannot be used to create an idmapped
         * mount. We use the initial idmapping as an indicator of a mount
         * that is not idmapped. It can simply be passed into helpers that
         * are aware of idmapped mounts as a convenient shortcut. A user
         * can just create a dedicated identity mapping to achieve the same
         * result.
         */
        mnt_userns = container_of(ns, struct user_namespace, ns);
        if (mnt_userns == &init_user_ns) {
                err = -EPERM;
                goto out_fput;
        }

        /* We're not controlling the target namespace. */
        if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
                err = -EPERM;
                goto out_fput;
        }

        kattr->mnt_userns = get_user_ns(mnt_userns);

out_fput:
        fdput(f);
        return err;
}

static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
                             struct mount_kattr *kattr, unsigned int flags)
{
        unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;

        if (flags & AT_NO_AUTOMOUNT)
                lookup_flags &= ~LOOKUP_AUTOMOUNT;
        if (flags & AT_SYMLINK_NOFOLLOW)
                lookup_flags &= ~LOOKUP_FOLLOW;
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;

        *kattr = (struct mount_kattr) {
                .lookup_flags        = lookup_flags,
                .recurse        = !!(flags & AT_RECURSIVE),
        };

        if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
                return -EINVAL;
        if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
                return -EINVAL;
        kattr->propagation = attr->propagation;

        if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
                return -EINVAL;

        kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
        kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);

        /*
         * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
         * users wanting to transition to a different atime setting cannot
         * simply specify the atime setting in @attr_set, but must also
         * specify MOUNT_ATTR__ATIME in the @attr_clr field.
         * So ensure that MOUNT_ATTR__ATIME can't be partially set in
         * @attr_clr and that @attr_set can't have any atime bits set if
         * MOUNT_ATTR__ATIME isn't set in @attr_clr.
         */
        if (attr->attr_clr & MOUNT_ATTR__ATIME) {
                if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
                        return -EINVAL;

                /*
                 * Clear all previous time settings as they are mutually
                 * exclusive.
                 */
                kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
                switch (attr->attr_set & MOUNT_ATTR__ATIME) {
                case MOUNT_ATTR_RELATIME:
                        kattr->attr_set |= MNT_RELATIME;
                        break;
                case MOUNT_ATTR_NOATIME:
                        kattr->attr_set |= MNT_NOATIME;
                        break;
                case MOUNT_ATTR_STRICTATIME:
                        break;
                default:
                        return -EINVAL;
                }
        } else {
                if (attr->attr_set & MOUNT_ATTR__ATIME)
                        return -EINVAL;
        }

        return build_mount_idmapped(attr, usize, kattr, flags);
}

static void finish_mount_kattr(struct mount_kattr *kattr)
{
        put_user_ns(kattr->mnt_userns);
        kattr->mnt_userns = NULL;

        if (kattr->mnt_idmap)
                mnt_idmap_put(kattr->mnt_idmap);
}

SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
                unsigned int, flags, struct mount_attr __user *, uattr,
                size_t, usize)
{
        int err;
        struct path target;
        struct mount_attr attr;
        struct mount_kattr kattr;

        BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);

        if (flags & ~(AT_EMPTY_PATH |
                      AT_RECURSIVE |
                      AT_SYMLINK_NOFOLLOW |
                      AT_NO_AUTOMOUNT))
                return -EINVAL;

        if (unlikely(usize > PAGE_SIZE))
                return -E2BIG;
        if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
                return -EINVAL;

        if (!may_mount())
                return -EPERM;

        err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
        if (err)
                return err;

        /* Don't bother walking through the mounts if this is a nop. */
        if (attr.attr_set == 0 &&
            attr.attr_clr == 0 &&
            attr.propagation == 0)
                return 0;

        err = build_mount_kattr(&attr, usize, &kattr, flags);
        if (err)
                return err;

        err = user_path_at(dfd, path, kattr.lookup_flags, &target);
        if (!err) {
                err = do_mount_setattr(&target, &kattr);
                path_put(&target);
        }
        finish_mount_kattr(&kattr);
        return err;
}

int show_path(struct seq_file *m, struct dentry *root)
{
        if (root->d_sb->s_op->show_path)
                return root->d_sb->s_op->show_path(m, root);

        seq_dentry(m, root, " \t\n\\");
        return 0;
}

static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns)
{
        struct mount *mnt = mnt_find_id_at(ns, id);

        if (!mnt || mnt->mnt_id_unique != id)
                return NULL;

        return &mnt->mnt;
}

struct kstatmount {
        struct statmount __user *buf;
        size_t bufsize;
        struct vfsmount *mnt;
        u64 mask;
        struct path root;
        struct statmount sm;
        struct seq_file seq;
};

static u64 mnt_to_attr_flags(struct vfsmount *mnt)
{
        unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags);
        u64 attr_flags = 0;

        if (mnt_flags & MNT_READONLY)
                attr_flags |= MOUNT_ATTR_RDONLY;
        if (mnt_flags & MNT_NOSUID)
                attr_flags |= MOUNT_ATTR_NOSUID;
        if (mnt_flags & MNT_NODEV)
                attr_flags |= MOUNT_ATTR_NODEV;
        if (mnt_flags & MNT_NOEXEC)
                attr_flags |= MOUNT_ATTR_NOEXEC;
        if (mnt_flags & MNT_NODIRATIME)
                attr_flags |= MOUNT_ATTR_NODIRATIME;
        if (mnt_flags & MNT_NOSYMFOLLOW)
                attr_flags |= MOUNT_ATTR_NOSYMFOLLOW;

        if (mnt_flags & MNT_NOATIME)
                attr_flags |= MOUNT_ATTR_NOATIME;
        else if (mnt_flags & MNT_RELATIME)
                attr_flags |= MOUNT_ATTR_RELATIME;
        else
                attr_flags |= MOUNT_ATTR_STRICTATIME;

        if (is_idmapped_mnt(mnt))
                attr_flags |= MOUNT_ATTR_IDMAP;

        return attr_flags;
}

static u64 mnt_to_propagation_flags(struct mount *m)
{
        u64 propagation = 0;

        if (IS_MNT_SHARED(m))
                propagation |= MS_SHARED;
        if (IS_MNT_SLAVE(m))
                propagation |= MS_SLAVE;
        if (IS_MNT_UNBINDABLE(m))
                propagation |= MS_UNBINDABLE;
        if (!propagation)
                propagation |= MS_PRIVATE;

        return propagation;
}

static void statmount_sb_basic(struct kstatmount *s)
{
        struct super_block *sb = s->mnt->mnt_sb;

        s->sm.mask |= STATMOUNT_SB_BASIC;
        s->sm.sb_dev_major = MAJOR(sb->s_dev);
        s->sm.sb_dev_minor = MINOR(sb->s_dev);
        s->sm.sb_magic = sb->s_magic;
        s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME);
}

static void statmount_mnt_basic(struct kstatmount *s)
{
        struct mount *m = real_mount(s->mnt);

        s->sm.mask |= STATMOUNT_MNT_BASIC;
        s->sm.mnt_id = m->mnt_id_unique;
        s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique;
        s->sm.mnt_id_old = m->mnt_id;
        s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id;
        s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt);
        s->sm.mnt_propagation = mnt_to_propagation_flags(m);
        s->sm.mnt_peer_group = IS_MNT_SHARED(m) ? m->mnt_group_id : 0;
        s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0;
}

static void statmount_propagate_from(struct kstatmount *s)
{
        struct mount *m = real_mount(s->mnt);

        s->sm.mask |= STATMOUNT_PROPAGATE_FROM;
        if (IS_MNT_SLAVE(m))
                s->sm.propagate_from = get_dominating_id(m, &current->fs->root);
}

static int statmount_mnt_root(struct kstatmount *s, struct seq_file *seq)
{
        int ret;
        size_t start = seq->count;

        ret = show_path(seq, s->mnt->mnt_root);
        if (ret)
                return ret;

        if (unlikely(seq_has_overflowed(seq)))
                return -EAGAIN;

        /*
         * Unescape the result. It would be better if supplied string was not
         * escaped in the first place, but that's a pretty invasive change.
         */
        seq->buf[seq->count] = '\0';
        seq->count = start;
        seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));
        return 0;
}

static int statmount_mnt_point(struct kstatmount *s, struct seq_file *seq)
{
        struct vfsmount *mnt = s->mnt;
        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
        int err;

        err = seq_path_root(seq, &mnt_path, &s->root, "");
        return err == SEQ_SKIP ? 0 : err;
}

static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
{
        struct super_block *sb = s->mnt->mnt_sb;

        seq_puts(seq, sb->s_type->name);
        return 0;
}

static int statmount_string(struct kstatmount *s, u64 flag)
{
        int ret;
        size_t kbufsize;
        struct seq_file *seq = &s->seq;
        struct statmount *sm = &s->sm;

        switch (flag) {
        case STATMOUNT_FS_TYPE:
                sm->fs_type = seq->count;
                ret = statmount_fs_type(s, seq);
                break;
        case STATMOUNT_MNT_ROOT:
                sm->mnt_root = seq->count;
                ret = statmount_mnt_root(s, seq);
                break;
        case STATMOUNT_MNT_POINT:
                sm->mnt_point = seq->count;
                ret = statmount_mnt_point(s, seq);
                break;
        default:
                WARN_ON_ONCE(true);
                return -EINVAL;
        }

        if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize)))
                return -EOVERFLOW;
        if (kbufsize >= s->bufsize)
                return -EOVERFLOW;

        /* signal a retry */
        if (unlikely(seq_has_overflowed(seq)))
                return -EAGAIN;

        if (ret)
                return ret;

        seq->buf[seq->count++] = '\0';
        sm->mask |= flag;
        return 0;
}

static int copy_statmount_to_user(struct kstatmount *s)
{
        struct statmount *sm = &s->sm;
        struct seq_file *seq = &s->seq;
        char __user *str = ((char __user *)s->buf) + sizeof(*sm);
        size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm));

        if (seq->count && copy_to_user(str, seq->buf, seq->count))
                return -EFAULT;

        /* Return the number of bytes copied to the buffer */
        sm->size = copysize + seq->count;
        if (copy_to_user(s->buf, sm, copysize))
                return -EFAULT;

        return 0;
}

static int do_statmount(struct kstatmount *s)
{
        struct mount *m = real_mount(s->mnt);
        int err;

        /*
         * Don't trigger audit denials. We just want to determine what
         * mounts to show users.
         */
        if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
            !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        err = security_sb_statfs(s->mnt->mnt_root);
        if (err)
                return err;

        if (s->mask & STATMOUNT_SB_BASIC)
                statmount_sb_basic(s);

        if (s->mask & STATMOUNT_MNT_BASIC)
                statmount_mnt_basic(s);

        if (s->mask & STATMOUNT_PROPAGATE_FROM)
                statmount_propagate_from(s);

        if (s->mask & STATMOUNT_FS_TYPE)
                err = statmount_string(s, STATMOUNT_FS_TYPE);

        if (!err && s->mask & STATMOUNT_MNT_ROOT)
                err = statmount_string(s, STATMOUNT_MNT_ROOT);

        if (!err && s->mask & STATMOUNT_MNT_POINT)
                err = statmount_string(s, STATMOUNT_MNT_POINT);

        if (err)
                return err;

        return 0;
}

static inline bool retry_statmount(const long ret, size_t *seq_size)
{
        if (likely(ret != -EAGAIN))
                return false;
        if (unlikely(check_mul_overflow(*seq_size, 2, seq_size)))
                return false;
        if (unlikely(*seq_size > MAX_RW_COUNT))
                return false;
        return true;
}

static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
                              struct statmount __user *buf, size_t bufsize,
                              size_t seq_size)
{
        if (!access_ok(buf, bufsize))
                return -EFAULT;

        memset(ks, 0, sizeof(*ks));
        ks->mask = kreq->param;
        ks->buf = buf;
        ks->bufsize = bufsize;
        ks->seq.size = seq_size;
        ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
        if (!ks->seq.buf)
                return -ENOMEM;
        return 0;
}

static int copy_mnt_id_req(const struct mnt_id_req __user *req,
                           struct mnt_id_req *kreq)
{
        int ret;
        size_t usize;

        BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0);

        ret = get_user(usize, &req->size);
        if (ret)
                return -EFAULT;
        if (unlikely(usize > PAGE_SIZE))
                return -E2BIG;
        if (unlikely(usize < MNT_ID_REQ_SIZE_VER0))
                return -EINVAL;
        memset(kreq, 0, sizeof(*kreq));
        ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
        if (ret)
                return ret;
        if (kreq->spare != 0)
                return -EINVAL;
        return 0;
}

SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
                struct statmount __user *, buf, size_t, bufsize,
                unsigned int, flags)
{
        struct vfsmount *mnt;
        struct mnt_id_req kreq;
        struct kstatmount ks;
        /* We currently support retrieval of 3 strings. */
        size_t seq_size = 3 * PATH_MAX;
        int ret;

        if (flags)
                return -EINVAL;

        ret = copy_mnt_id_req(req, &kreq);
        if (ret)
                return ret;

retry:
        ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size);
        if (ret)
                return ret;

        down_read(&namespace_sem);
        mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
        if (!mnt) {
                up_read(&namespace_sem);
                kvfree(ks.seq.buf);
                return -ENOENT;
        }

        ks.mnt = mnt;
        get_fs_root(current->fs, &ks.root);
        ret = do_statmount(&ks);
        path_put(&ks.root);
        up_read(&namespace_sem);

        if (!ret)
                ret = copy_statmount_to_user(&ks);
        kvfree(ks.seq.buf);
        if (retry_statmount(ret, &seq_size))
                goto retry;
        return ret;
}

static struct mount *listmnt_next(struct mount *curr)
{
        return node_to_mount(rb_next(&curr->mnt_node));
}

static ssize_t do_listmount(struct mount *first, struct path *orig,
                            u64 mnt_parent_id, u64 __user *mnt_ids,
                            size_t nr_mnt_ids, const struct path *root)
{
        struct mount *r;
        ssize_t ret;

        /*
         * Don't trigger audit denials. We just want to determine what
         * mounts to show users.
         */
        if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) &&
            !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        ret = security_sb_statfs(orig->dentry);
        if (ret)
                return ret;

        for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) {
                if (r->mnt_id_unique == mnt_parent_id)
                        continue;
                if (!is_path_reachable(r, r->mnt.mnt_root, orig))
                        continue;
                if (put_user(r->mnt_id_unique, mnt_ids))
                        return -EFAULT;
                mnt_ids++;
                nr_mnt_ids--;
                ret++;
        }
        return ret;
}

SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
                mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
        struct mnt_id_req kreq;
        struct mount *first;
        struct path root, orig;
        u64 mnt_parent_id, last_mnt_id;
        const size_t maxcount = (size_t)-1 >> 3;
        ssize_t ret;

        if (flags)
                return -EINVAL;

        if (unlikely(nr_mnt_ids > maxcount))
                return -EFAULT;

        if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
                return -EFAULT;

        ret = copy_mnt_id_req(req, &kreq);
        if (ret)
                return ret;
        mnt_parent_id = kreq.mnt_id;
        last_mnt_id = kreq.param;

        down_read(&namespace_sem);
        get_fs_root(current->fs, &root);
        if (mnt_parent_id == LSMT_ROOT) {
                orig = root;
        } else {
                ret = -ENOENT;
                orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
                if (!orig.mnt)
                        goto err;
                orig.dentry = orig.mnt->mnt_root;
        }
        if (!last_mnt_id)
                first = node_to_mount(rb_first(&ns->mounts));
        else
                first = mnt_find_id_at(ns, last_mnt_id + 1);

        ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
err:
        path_put(&root);
        up_read(&namespace_sem);
        return ret;
}


static void __init init_mount_tree(void)
{
        struct vfsmount *mnt;
        struct mount *m;
        struct mnt_namespace *ns;
        struct path root;

        mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
        if (IS_ERR(mnt))
                panic("Can't create rootfs");

        ns = alloc_mnt_ns(&init_user_ns, false);
        if (IS_ERR(ns))
                panic("Can't allocate initial namespace");
        m = real_mount(mnt);
        ns->root = m;
        ns->nr_mounts = 1;
        mnt_add_to_ns(ns, m);
        init_task.nsproxy->mnt_ns = ns;
        get_mnt_ns(ns);

        root.mnt = mnt;
        root.dentry = mnt->mnt_root;
        mnt->mnt_flags |= MNT_LOCKED;

        set_fs_pwd(current->fs, &root);
        set_fs_root(current->fs, &root);
}

void __init mnt_init(void)
{
        int err;

        mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);

        mount_hashtable = alloc_large_system_hash("Mount-cache",
                                sizeof(struct hlist_head),
                                mhash_entries, 19,
                                HASH_ZERO,
                                &m_hash_shift, &m_hash_mask, 0, 0);
        mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
                                sizeof(struct hlist_head),
                                mphash_entries, 19,
                                HASH_ZERO,
                                &mp_hash_shift, &mp_hash_mask, 0, 0);

        if (!mount_hashtable || !mountpoint_hashtable)
                panic("Failed to allocate mount hash table\n");

        kernfs_init();

        err = sysfs_init();
        if (err)
                printk(KERN_WARNING "%s: sysfs_init error: %d\n",
                        __func__, err);
        fs_kobj = kobject_create_and_add("fs", NULL);
        if (!fs_kobj)
                printk(KERN_WARNING "%s: kobj create error\n", __func__);
        shmem_init();
        init_rootfs();
        init_mount_tree();
}

void put_mnt_ns(struct mnt_namespace *ns)
{
        if (!refcount_dec_and_test(&ns->ns.count))
                return;
        drop_collected_mounts(&ns->root->mnt);
        free_mnt_ns(ns);
}

struct vfsmount *kern_mount(struct file_system_type *type)
{
        struct vfsmount *mnt;
        mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
        if (!IS_ERR(mnt)) {
                /*
                 * it is a longterm mount, don't release mnt until
                 * we unmount before file sys is unregistered
                */
                real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
        }
        return mnt;
}
EXPORT_SYMBOL_GPL(kern_mount);

void kern_unmount(struct vfsmount *mnt)
{
        /* release long term mount so mount point can be released */
        if (!IS_ERR(mnt)) {
                mnt_make_shortterm(mnt);
                synchronize_rcu();        /* yecchhh... */
                mntput(mnt);
        }
}
EXPORT_SYMBOL(kern_unmount);

void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
{
        unsigned int i;

        for (i = 0; i < num; i++)
                mnt_make_shortterm(mnt[i]);
        synchronize_rcu_expedited();
        for (i = 0; i < num; i++)
                mntput(mnt[i]);
}
EXPORT_SYMBOL(kern_unmount_array);

bool our_mnt(struct vfsmount *mnt)
{
        return check_mnt(real_mount(mnt));
}

bool current_chrooted(void)
{
        /* Does the current process have a non-standard root */
        struct path ns_root;
        struct path fs_root;
        bool chrooted;

        /* Find the namespace root */
        ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
        ns_root.dentry = ns_root.mnt->mnt_root;
        path_get(&ns_root);
        while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
                ;

        get_fs_root(current->fs, &fs_root);

        chrooted = !path_equal(&fs_root, &ns_root);

        path_put(&fs_root);
        path_put(&ns_root);

        return chrooted;
}

static bool mnt_already_visible(struct mnt_namespace *ns,
                                const struct super_block *sb,
                                int *new_mnt_flags)
{
        int new_flags = *new_mnt_flags;
        struct mount *mnt, *n;
        bool visible = false;

        down_read(&namespace_sem);
        rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
                struct mount *child;
                int mnt_flags;

                if (mnt->mnt.mnt_sb->s_type != sb->s_type)
                        continue;

                /* This mount is not fully visible if it's root directory
                 * is not the root directory of the filesystem.
                 */
                if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
                        continue;

                /* A local view of the mount flags */
                mnt_flags = mnt->mnt.mnt_flags;

                /* Don't miss readonly hidden in the superblock flags */
                if (sb_rdonly(mnt->mnt.mnt_sb))
                        mnt_flags |= MNT_LOCK_READONLY;

                /* Verify the mount flags are equal to or more permissive
                 * than the proposed new mount.
                 */
                if ((mnt_flags & MNT_LOCK_READONLY) &&
                    !(new_flags & MNT_READONLY))
                        continue;
                if ((mnt_flags & MNT_LOCK_ATIME) &&
                    ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
                        continue;

                /* This mount is not fully visible if there are any
                 * locked child mounts that cover anything except for
                 * empty directories.
                 */
                list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
                        struct inode *inode = child->mnt_mountpoint->d_inode;
                        /* Only worry about locked mounts */
                        if (!(child->mnt.mnt_flags & MNT_LOCKED))
                                continue;
                        /* Is the directory permanetly empty? */
                        if (!is_empty_dir_inode(inode))
                                goto next;
                }
                /* Preserve the locked attributes */
                *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
                                               MNT_LOCK_ATIME);
                visible = true;
                goto found;
        next:        ;
        }
found:
        up_read(&namespace_sem);
        return visible;
}

static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
{
        const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
        unsigned long s_iflags;

        if (ns->user_ns == &init_user_ns)
                return false;

        /* Can this filesystem be too revealing? */
        s_iflags = sb->s_iflags;
        if (!(s_iflags & SB_I_USERNS_VISIBLE))
                return false;

        if ((s_iflags & required_iflags) != required_iflags) {
                WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
                          required_iflags);
                return true;
        }

        return !mnt_already_visible(ns, sb, new_mnt_flags);
}

bool mnt_may_suid(struct vfsmount *mnt)
{
        /*
         * Foreign mounts (accessed via fchdir or through /proc
         * symlinks) are always treated as if they are nosuid.  This
         * prevents namespaces from trusting potentially unsafe
         * suid/sgid bits, file caps, or security labels that originate
         * in other namespaces.
         */
        return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
               current_in_userns(mnt->mnt_sb->s_user_ns);
}

static struct ns_common *mntns_get(struct task_struct *task)
{
        struct ns_common *ns = NULL;
        struct nsproxy *nsproxy;

        task_lock(task);
        nsproxy = task->nsproxy;
        if (nsproxy) {
                ns = &nsproxy->mnt_ns->ns;
                get_mnt_ns(to_mnt_ns(ns));
        }
        task_unlock(task);

        return ns;
}

static void mntns_put(struct ns_common *ns)
{
        put_mnt_ns(to_mnt_ns(ns));
}

static int mntns_install(struct nsset *nsset, struct ns_common *ns)
{
        struct nsproxy *nsproxy = nsset->nsproxy;
        struct fs_struct *fs = nsset->fs;
        struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
        struct user_namespace *user_ns = nsset->cred->user_ns;
        struct path root;
        int err;

        if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
            !ns_capable(user_ns, CAP_SYS_CHROOT) ||
            !ns_capable(user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        if (is_anon_ns(mnt_ns))
                return -EINVAL;

        if (fs->users != 1)
                return -EINVAL;

        get_mnt_ns(mnt_ns);
        old_mnt_ns = nsproxy->mnt_ns;
        nsproxy->mnt_ns = mnt_ns;

        /* Find the root */
        err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
                                "/", LOOKUP_DOWN, &root);
        if (err) {
                /* revert to old namespace */
                nsproxy->mnt_ns = old_mnt_ns;
                put_mnt_ns(mnt_ns);
                return err;
        }

        put_mnt_ns(old_mnt_ns);

        /* Update the pwd and root */
        set_fs_pwd(fs, &root);
        set_fs_root(fs, &root);

        path_put(&root);
        return 0;
}

static struct user_namespace *mntns_owner(struct ns_common *ns)
{
        return to_mnt_ns(ns)->user_ns;
}

const struct proc_ns_operations mntns_operations = {
        .name                = "mnt",
        .type                = CLONE_NEWNS,
        .get                = mntns_get,
        .put                = mntns_put,
        .install        = mntns_install,
        .owner                = mntns_owner,
};

#ifdef CONFIG_SYSCTL
static struct ctl_table fs_namespace_sysctls[] = {
        {
                .procname        = "mount-max",
                .data                = &sysctl_mount_max,
                .maxlen                = sizeof(unsigned int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ONE,
        },
};

static int __init init_fs_namespace_sysctls(void)
{
        register_sysctl_init("fs", fs_namespace_sysctls);
        return 0;
}
fs_initcall(init_fs_namespace_sysctls);

#endif /* CONFIG_SYSCTL */













































































































































































































   30 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
// SPDX-License-Identifier: GPL-2.0-only
/*
 * linux/lib/cmdline.c
 * Helper functions generally used for parsing kernel command line
 * and module options.
 *
 * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
 *
 * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
 */

#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/ctype.h>

/*
 *        If a hyphen was found in get_option, this will handle the
 *        range of numbers, M-N.  This will expand the range and insert
 *        the values[M, M+1, ..., N] into the ints array in get_options.
 */

static int get_range(char **str, int *pint, int n)
{
        int x, inc_counter, upper_range;

        (*str)++;
        upper_range = simple_strtol((*str), NULL, 0);
        inc_counter = upper_range - *pint;
        for (x = *pint; n && x < upper_range; x++, n--)
                *pint++ = x;
        return inc_counter;
}

/**
 *        get_option - Parse integer from an option string
 *        @str: option string
 *        @pint: (optional output) integer value parsed from @str
 *
 *        Read an int from an option string; if available accept a subsequent
 *        comma as well.
 *
 *        When @pint is NULL the function can be used as a validator of
 *        the current option in the string.
 *
 *        Return values:
 *        0 - no int in string
 *        1 - int found, no subsequent comma
 *        2 - int found including a subsequent comma
 *        3 - hyphen found to denote a range
 *
 *        Leading hyphen without integer is no integer case, but we consume it
 *        for the sake of simplification.
 */

int get_option(char **str, int *pint)
{
        char *cur = *str;
        int value;

        if (!cur || !(*cur))
                return 0;
        if (*cur == '-')
                value = -simple_strtoull(++cur, str, 0);
        else
                value = simple_strtoull(cur, str, 0);
        if (pint)
                *pint = value;
        if (cur == *str)
                return 0;
        if (**str == ',') {
                (*str)++;
                return 2;
        }
        if (**str == '-')
                return 3;

        return 1;
}
EXPORT_SYMBOL(get_option);

/**
 *        get_options - Parse a string into a list of integers
 *        @str: String to be parsed
 *        @nints: size of integer array
 *        @ints: integer array (must have room for at least one element)
 *
 *        This function parses a string containing a comma-separated
 *        list of integers, a hyphen-separated range of _positive_ integers,
 *        or a combination of both.  The parse halts when the array is
 *        full, or when no more numbers can be retrieved from the
 *        string.
 *
 *        When @nints is 0, the function just validates the given @str and
 *        returns the amount of parseable integers as described below.
 *
 *        Returns:
 *
 *        The first element is filled by the number of collected integers
 *        in the range. The rest is what was parsed from the @str.
 *
 *        Return value is the character in the string which caused
 *        the parse to end (typically a null terminator, if @str is
 *        completely parseable).
 */

char *get_options(const char *str, int nints, int *ints)
{
        bool validate = (nints == 0);
        int res, i = 1;

        while (i < nints || validate) {
                int *pint = validate ? ints : ints + i;

                res = get_option((char **)&str, pint);
                if (res == 0)
                        break;
                if (res == 3) {
                        int n = validate ? 0 : nints - i;
                        int range_nums;

                        range_nums = get_range((char **)&str, pint, n);
                        if (range_nums < 0)
                                break;
                        /*
                         * Decrement the result by one to leave out the
                         * last number in the range.  The next iteration
                         * will handle the upper number in the range
                         */
                        i += (range_nums - 1);
                }
                i++;
                if (res == 1)
                        break;
        }
        ints[0] = i - 1;
        return (char *)str;
}
EXPORT_SYMBOL(get_options);

/**
 *        memparse - parse a string with mem suffixes into a number
 *        @ptr: Where parse begins
 *        @retptr: (output) Optional pointer to next char after parse completes
 *
 *        Parses a string into a number.  The number stored at @ptr is
 *        potentially suffixed with K, M, G, T, P, E.
 */

unsigned long long memparse(const char *ptr, char **retptr)
{
        char *endptr;        /* local pointer to end of parsed string */

        unsigned long long ret = simple_strtoull(ptr, &endptr, 0);

        switch (*endptr) {
        case 'E':
        case 'e':
                ret <<= 10;
                fallthrough;
        case 'P':
        case 'p':
                ret <<= 10;
                fallthrough;
        case 'T':
        case 't':
                ret <<= 10;
                fallthrough;
        case 'G':
        case 'g':
                ret <<= 10;
                fallthrough;
        case 'M':
        case 'm':
                ret <<= 10;
                fallthrough;
        case 'K':
        case 'k':
                ret <<= 10;
                endptr++;
                fallthrough;
        default:
                break;
        }

        if (retptr)
                *retptr = endptr;

        return ret;
}
EXPORT_SYMBOL(memparse);

/**
 *        parse_option_str - Parse a string and check an option is set or not
 *        @str: String to be parsed
 *        @option: option name
 *
 *        This function parses a string containing a comma-separated list of
 *        strings like a=b,c.
 *
 *        Return true if there's such option in the string, or return false.
 */
bool parse_option_str(const char *str, const char *option)
{
        while (*str) {
                if (!strncmp(str, option, strlen(option))) {
                        str += strlen(option);
                        if (!*str || *str == ',')
                                return true;
                }

                while (*str && *str != ',')
                        str++;

                if (*str == ',')
                        str++;
        }

        return false;
}

/*
 * Parse a string to get a param value pair.
 * You can use " around spaces, but can't escape ".
 * Hyphens and underscores equivalent in parameter names.
 */
char *next_arg(char *args, char **param, char **val)
{
        unsigned int i, equals = 0;
        int in_quote = 0, quoted = 0;

        if (*args == '"') {
                args++;
                in_quote = 1;
                quoted = 1;
        }

        for (i = 0; args[i]; i++) {
                if (isspace(args[i]) && !in_quote)
                        break;
                if (equals == 0) {
                        if (args[i] == '=')
                                equals = i;
                }
                if (args[i] == '"')
                        in_quote = !in_quote;
        }

        *param = args;
        if (!equals)
                *val = NULL;
        else {
                args[equals] = '\0';
                *val = args + equals + 1;

                /* Don't include quotes in value. */
                if (**val == '"') {
                        (*val)++;
                        if (args[i-1] == '"')
                                args[i-1] = '\0';
                }
        }
        if (quoted && i > 0 && args[i-1] == '"')
                args[i-1] = '\0';

        if (args[i]) {
                args[i] = '\0';
                args += i + 1;
        } else
                args += i;

        /* Chew up trailing spaces. */
        return skip_spaces(args);
}
EXPORT_SYMBOL(next_arg);
























   10 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_DEADLINE_H
#define _LINUX_SCHED_DEADLINE_H

/*
 * SCHED_DEADLINE tasks has negative priorities, reflecting
 * the fact that any of them has higher prio than RT and
 * NORMAL/BATCH tasks.
 */

#include <linux/sched.h>

#define MAX_DL_PRIO                0

static inline int dl_prio(int prio)
{
        if (unlikely(prio < MAX_DL_PRIO))
                return 1;
        return 0;
}

static inline int dl_task(struct task_struct *p)
{
        return dl_prio(p->prio);
}

static inline bool dl_time_before(u64 a, u64 b)
{
        return (s64)(a - b) < 0;
}

#ifdef CONFIG_SMP

struct root_domain;
extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);

#endif /* CONFIG_SMP */

#endif /* _LINUX_SCHED_DEADLINE_H */














































































































    4 








    3 




















    4 
    4 
    4 
    4 
    4 

    4 
    4 
    1 
















    4 



    4 
    4 



    4 





    1 





    1 





    1 









    3 




    3 
    1 












    3 
    1 

    3 


































































































































































































































































































































































































   13 


    1 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 





    2 








    2 


    2 

    2 




















    2 
    2 
    2 





    2 




    2 



































    2 

























































    2 

    2 











































    2 

    2 
    2 




    2 
    2 

    2 












    2 


    2 
    2 

















































































































    2 






    2 









    2 












    5 










































































































































































































































































































































































































































































































   13 

   12 
   12 

   12 


    6 







    6 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (c) 2000-2001 Vojtech Pavlik
 *  Copyright (c) 2006-2010 Jiri Kosina
 *
 *  HID to Linux Input mapping
 */

/*
 *
 * Should you need to contact me, the author, you can do so either by
 * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
 * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kernel.h>

#include <linux/hid.h>
#include <linux/hid-debug.h>

#include "hid-ids.h"

#define unk        KEY_UNKNOWN

static const unsigned char hid_keyboard[256] = {
          0,  0,  0,  0, 30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38,
         50, 49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44,  2,  3,
          4,  5,  6,  7,  8,  9, 10, 11, 28,  1, 14, 15, 57, 12, 13, 26,
         27, 43, 43, 39, 40, 41, 51, 52, 53, 58, 59, 60, 61, 62, 63, 64,
         65, 66, 67, 68, 87, 88, 99, 70,119,110,102,104,111,107,109,106,
        105,108,103, 69, 98, 55, 74, 78, 96, 79, 80, 81, 75, 76, 77, 71,
         72, 73, 82, 83, 86,127,116,117,183,184,185,186,187,188,189,190,
        191,192,193,194,134,138,130,132,128,129,131,137,133,135,136,113,
        115,114,unk,unk,unk,121,unk, 89, 93,124, 92, 94, 95,unk,unk,unk,
        122,123, 90, 91, 85,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk,
        unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,
        unk,unk,unk,unk,unk,unk,179,180,unk,unk,unk,unk,unk,unk,unk,unk,
        unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,unk,
        unk,unk,unk,unk,unk,unk,unk,unk,111,unk,unk,unk,unk,unk,unk,unk,
         29, 42, 56,125, 97, 54,100,126,164,166,165,163,161,115,114,113,
        150,158,159,128,136,177,178,176,142,152,173,140,unk,unk,unk,unk
};

static const struct {
        __s32 x;
        __s32 y;
}  hid_hat_to_axis[] = {{ 0, 0}, { 0,-1}, { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, {-1, 1}, {-1, 0}, {-1,-1}};

struct usage_priority {
        __u32 usage;                        /* the HID usage associated */
        bool global;                        /* we assume all usages to be slotted,
                                         * unless global
                                         */
        unsigned int slot_overwrite;        /* for globals: allows to set the usage
                                         * before or after the slots
                                         */
};

/*
 * hid-input will convert this list into priorities:
 * the first element will have the highest priority
 * (the length of the following array) and the last
 * element the lowest (1).
 *
 * hid-input will then shift the priority by 8 bits to leave some space
 * in case drivers want to interleave other fields.
 *
 * To accommodate slotted devices, the slot priority is
 * defined in the next 8 bits (defined by 0xff - slot).
 *
 * If drivers want to add fields before those, hid-input will
 * leave out the first 8 bits of the priority value.
 *
 * This still leaves us 65535 individual priority values.
 */
static const struct usage_priority hidinput_usages_priorities[] = {
        { /* Eraser (eraser touching) must always come before tipswitch */
          .usage = HID_DG_ERASER,
        },
        { /* Invert must always come before In Range */
          .usage = HID_DG_INVERT,
        },
        { /* Is the tip of the tool touching? */
          .usage = HID_DG_TIPSWITCH,
        },
        { /* Tip Pressure might emulate tip switch */
          .usage = HID_DG_TIPPRESSURE,
        },
        { /* In Range needs to come after the other tool states */
          .usage = HID_DG_INRANGE,
        },
};

#define map_abs(c)        hid_map_usage(hidinput, usage, &bit, &max, EV_ABS, (c))
#define map_rel(c)        hid_map_usage(hidinput, usage, &bit, &max, EV_REL, (c))
#define map_key(c)        hid_map_usage(hidinput, usage, &bit, &max, EV_KEY, (c))
#define map_led(c)        hid_map_usage(hidinput, usage, &bit, &max, EV_LED, (c))
#define map_msc(c)        hid_map_usage(hidinput, usage, &bit, &max, EV_MSC, (c))

#define map_abs_clear(c)        hid_map_usage_clear(hidinput, usage, &bit, \
                &max, EV_ABS, (c))
#define map_key_clear(c)        hid_map_usage_clear(hidinput, usage, &bit, \
                &max, EV_KEY, (c))

static bool match_scancode(struct hid_usage *usage,
                           unsigned int cur_idx, unsigned int scancode)
{
        return (usage->hid & (HID_USAGE_PAGE | HID_USAGE)) == scancode;
}

static bool match_keycode(struct hid_usage *usage,
                          unsigned int cur_idx, unsigned int keycode)
{
        /*
         * We should exclude unmapped usages when doing lookup by keycode.
         */
        return (usage->type == EV_KEY && usage->code == keycode);
}

static bool match_index(struct hid_usage *usage,
                        unsigned int cur_idx, unsigned int idx)
{
        return cur_idx == idx;
}

typedef bool (*hid_usage_cmp_t)(struct hid_usage *usage,
                                unsigned int cur_idx, unsigned int val);

static struct hid_usage *hidinput_find_key(struct hid_device *hid,
                                           hid_usage_cmp_t match,
                                           unsigned int value,
                                           unsigned int *usage_idx)
{
        unsigned int i, j, k, cur_idx = 0;
        struct hid_report *report;
        struct hid_usage *usage;

        for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) {
                list_for_each_entry(report, &hid->report_enum[k].report_list, list) {
                        for (i = 0; i < report->maxfield; i++) {
                                for (j = 0; j < report->field[i]->maxusage; j++) {
                                        usage = report->field[i]->usage + j;
                                        if (usage->type == EV_KEY || usage->type == 0) {
                                                if (match(usage, cur_idx, value)) {
                                                        if (usage_idx)
                                                                *usage_idx = cur_idx;
                                                        return usage;
                                                }
                                                cur_idx++;
                                        }
                                }
                        }
                }
        }
        return NULL;
}

static struct hid_usage *hidinput_locate_usage(struct hid_device *hid,
                                        const struct input_keymap_entry *ke,
                                        unsigned int *index)
{
        struct hid_usage *usage;
        unsigned int scancode;

        if (ke->flags & INPUT_KEYMAP_BY_INDEX)
                usage = hidinput_find_key(hid, match_index, ke->index, index);
        else if (input_scancode_to_scalar(ke, &scancode) == 0)
                usage = hidinput_find_key(hid, match_scancode, scancode, index);
        else
                usage = NULL;

        return usage;
}

static int hidinput_getkeycode(struct input_dev *dev,
                               struct input_keymap_entry *ke)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct hid_usage *usage;
        unsigned int scancode, index;

        usage = hidinput_locate_usage(hid, ke, &index);
        if (usage) {
                ke->keycode = usage->type == EV_KEY ?
                                usage->code : KEY_RESERVED;
                ke->index = index;
                scancode = usage->hid & (HID_USAGE_PAGE | HID_USAGE);
                ke->len = sizeof(scancode);
                memcpy(ke->scancode, &scancode, sizeof(scancode));
                return 0;
        }

        return -EINVAL;
}

static int hidinput_setkeycode(struct input_dev *dev,
                               const struct input_keymap_entry *ke,
                               unsigned int *old_keycode)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct hid_usage *usage;

        usage = hidinput_locate_usage(hid, ke, NULL);
        if (usage) {
                *old_keycode = usage->type == EV_KEY ?
                                usage->code : KEY_RESERVED;
                usage->type = EV_KEY;
                usage->code = ke->keycode;

                clear_bit(*old_keycode, dev->keybit);
                set_bit(usage->code, dev->keybit);
                dbg_hid("Assigned keycode %d to HID usage code %x\n",
                        usage->code, usage->hid);

                /*
                 * Set the keybit for the old keycode if the old keycode is used
                 * by another key
                 */
                if (hidinput_find_key(hid, match_keycode, *old_keycode, NULL))
                        set_bit(*old_keycode, dev->keybit);

                return 0;
        }

        return -EINVAL;
}


/**
 * hidinput_calc_abs_res - calculate an absolute axis resolution
 * @field: the HID report field to calculate resolution for
 * @code: axis code
 *
 * The formula is:
 *                         (logical_maximum - logical_minimum)
 * resolution = ----------------------------------------------------------
 *              (physical_maximum - physical_minimum) * 10 ^ unit_exponent
 *
 * as seen in the HID specification v1.11 6.2.2.7 Global Items.
 *
 * Only exponent 1 length units are processed. Centimeters and inches are
 * converted to millimeters. Degrees are converted to radians.
 */
__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code)
{
        __s32 unit_exponent = field->unit_exponent;
        __s32 logical_extents = field->logical_maximum -
                                        field->logical_minimum;
        __s32 physical_extents = field->physical_maximum -
                                        field->physical_minimum;
        __s32 prev;

        /* Check if the extents are sane */
        if (logical_extents <= 0 || physical_extents <= 0)
                return 0;

        /*
         * Verify and convert units.
         * See HID specification v1.11 6.2.2.7 Global Items for unit decoding
         */
        switch (code) {
        case ABS_X:
        case ABS_Y:
        case ABS_Z:
        case ABS_MT_POSITION_X:
        case ABS_MT_POSITION_Y:
        case ABS_MT_TOOL_X:
        case ABS_MT_TOOL_Y:
        case ABS_MT_TOUCH_MAJOR:
        case ABS_MT_TOUCH_MINOR:
                if (field->unit == 0x11) {                /* If centimeters */
                        /* Convert to millimeters */
                        unit_exponent += 1;
                } else if (field->unit == 0x13) {        /* If inches */
                        /* Convert to millimeters */
                        prev = physical_extents;
                        physical_extents *= 254;
                        if (physical_extents < prev)
                                return 0;
                        unit_exponent -= 1;
                } else {
                        return 0;
                }
                break;

        case ABS_RX:
        case ABS_RY:
        case ABS_RZ:
        case ABS_WHEEL:
        case ABS_TILT_X:
        case ABS_TILT_Y:
                if (field->unit == 0x14) {                /* If degrees */
                        /* Convert to radians */
                        prev = logical_extents;
                        logical_extents *= 573;
                        if (logical_extents < prev)
                                return 0;
                        unit_exponent += 1;
                } else if (field->unit != 0x12) {        /* If not radians */
                        return 0;
                }
                break;

        default:
                return 0;
        }

        /* Apply negative unit exponent */
        for (; unit_exponent < 0; unit_exponent++) {
                prev = logical_extents;
                logical_extents *= 10;
                if (logical_extents < prev)
                        return 0;
        }
        /* Apply positive unit exponent */
        for (; unit_exponent > 0; unit_exponent--) {
                prev = physical_extents;
                physical_extents *= 10;
                if (physical_extents < prev)
                        return 0;
        }

        /* Calculate resolution */
        return DIV_ROUND_CLOSEST(logical_extents, physical_extents);
}
EXPORT_SYMBOL_GPL(hidinput_calc_abs_res);

#ifdef CONFIG_HID_BATTERY_STRENGTH
static enum power_supply_property hidinput_battery_props[] = {
        POWER_SUPPLY_PROP_PRESENT,
        POWER_SUPPLY_PROP_ONLINE,
        POWER_SUPPLY_PROP_CAPACITY,
        POWER_SUPPLY_PROP_MODEL_NAME,
        POWER_SUPPLY_PROP_STATUS,
        POWER_SUPPLY_PROP_SCOPE,
};

#define HID_BATTERY_QUIRK_PERCENT        (1 << 0) /* always reports percent */
#define HID_BATTERY_QUIRK_FEATURE        (1 << 1) /* ask for feature report */
#define HID_BATTERY_QUIRK_IGNORE        (1 << 2) /* completely ignore the battery */
#define HID_BATTERY_QUIRK_AVOID_QUERY        (1 << 3) /* do not query the battery */

static const struct hid_device_id hid_battery_quirks[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
          HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
          HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI),
          HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                               USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO),
          HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
          HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICTRACKPAD),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM,
                USB_DEVICE_ID_ELECOM_BM084),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL,
                USB_DEVICE_ID_SYMBOL_SCANNER_3),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK,
                USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_TP420IA_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_GV301RA_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L),
          HID_BATTERY_QUIRK_AVOID_QUERY },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW),
          HID_BATTERY_QUIRK_AVOID_QUERY },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW),
          HID_BATTERY_QUIRK_AVOID_QUERY },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_EU0009NV),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_13_AW0020NG),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15_EU0556NG),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_CHROMEBOOK_TROGDOR_POMPOM),
          HID_BATTERY_QUIRK_AVOID_QUERY },
        {}
};

static unsigned find_battery_quirk(struct hid_device *hdev)
{
        unsigned quirks = 0;
        const struct hid_device_id *match;

        match = hid_match_id(hdev, hid_battery_quirks);
        if (match != NULL)
                quirks = match->driver_data;

        return quirks;
}

static int hidinput_scale_battery_capacity(struct hid_device *dev,
                                           int value)
{
        if (dev->battery_min < dev->battery_max &&
            value >= dev->battery_min && value <= dev->battery_max)
                value = ((value - dev->battery_min) * 100) /
                        (dev->battery_max - dev->battery_min);

        return value;
}

static int hidinput_query_battery_capacity(struct hid_device *dev)
{
        u8 *buf;
        int ret;

        buf = kmalloc(4, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(dev, dev->battery_report_id, buf, 4,
                                 dev->battery_report_type, HID_REQ_GET_REPORT);
        if (ret < 2) {
                kfree(buf);
                return -ENODATA;
        }

        ret = hidinput_scale_battery_capacity(dev, buf[1]);
        kfree(buf);
        return ret;
}

static int hidinput_get_battery_property(struct power_supply *psy,
                                         enum power_supply_property prop,
                                         union power_supply_propval *val)
{
        struct hid_device *dev = power_supply_get_drvdata(psy);
        int value;
        int ret = 0;

        switch (prop) {
        case POWER_SUPPLY_PROP_PRESENT:
        case POWER_SUPPLY_PROP_ONLINE:
                val->intval = 1;
                break;

        case POWER_SUPPLY_PROP_CAPACITY:
                if (dev->battery_status != HID_BATTERY_REPORTED &&
                    !dev->battery_avoid_query) {
                        value = hidinput_query_battery_capacity(dev);
                        if (value < 0)
                                return value;
                } else  {
                        value = dev->battery_capacity;
                }

                val->intval = value;
                break;

        case POWER_SUPPLY_PROP_MODEL_NAME:
                val->strval = dev->name;
                break;

        case POWER_SUPPLY_PROP_STATUS:
                if (dev->battery_status != HID_BATTERY_REPORTED &&
                    !dev->battery_avoid_query) {
                        value = hidinput_query_battery_capacity(dev);
                        if (value < 0)
                                return value;

                        dev->battery_capacity = value;
                        dev->battery_status = HID_BATTERY_QUERIED;
                }

                if (dev->battery_status == HID_BATTERY_UNKNOWN)
                        val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
                else
                        val->intval = dev->battery_charge_status;
                break;

        case POWER_SUPPLY_PROP_SCOPE:
                val->intval = POWER_SUPPLY_SCOPE_DEVICE;
                break;

        default:
                ret = -EINVAL;
                break;
        }

        return ret;
}

static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
                                  struct hid_field *field, bool is_percentage)
{
        struct power_supply_desc *psy_desc;
        struct power_supply_config psy_cfg = { .drv_data = dev, };
        unsigned quirks;
        s32 min, max;
        int error;

        if (dev->battery)
                return 0;        /* already initialized? */

        quirks = find_battery_quirk(dev);

        hid_dbg(dev, "device %x:%x:%x %d quirks %d\n",
                dev->bus, dev->vendor, dev->product, dev->version, quirks);

        if (quirks & HID_BATTERY_QUIRK_IGNORE)
                return 0;

        psy_desc = kzalloc(sizeof(*psy_desc), GFP_KERNEL);
        if (!psy_desc)
                return -ENOMEM;

        psy_desc->name = kasprintf(GFP_KERNEL, "hid-%s-battery",
                                   strlen(dev->uniq) ?
                                        dev->uniq : dev_name(&dev->dev));
        if (!psy_desc->name) {
                error = -ENOMEM;
                goto err_free_mem;
        }

        psy_desc->type = POWER_SUPPLY_TYPE_BATTERY;
        psy_desc->properties = hidinput_battery_props;
        psy_desc->num_properties = ARRAY_SIZE(hidinput_battery_props);
        psy_desc->use_for_apm = 0;
        psy_desc->get_property = hidinput_get_battery_property;

        min = field->logical_minimum;
        max = field->logical_maximum;

        if (is_percentage || (quirks & HID_BATTERY_QUIRK_PERCENT)) {
                min = 0;
                max = 100;
        }

        if (quirks & HID_BATTERY_QUIRK_FEATURE)
                report_type = HID_FEATURE_REPORT;

        dev->battery_min = min;
        dev->battery_max = max;
        dev->battery_report_type = report_type;
        dev->battery_report_id = field->report->id;
        dev->battery_charge_status = POWER_SUPPLY_STATUS_DISCHARGING;

        /*
         * Stylus is normally not connected to the device and thus we
         * can't query the device and get meaningful battery strength.
         * We have to wait for the device to report it on its own.
         */
        dev->battery_avoid_query = report_type == HID_INPUT_REPORT &&
                                   field->physical == HID_DG_STYLUS;

        if (quirks & HID_BATTERY_QUIRK_AVOID_QUERY)
                dev->battery_avoid_query = true;

        dev->battery = power_supply_register(&dev->dev, psy_desc, &psy_cfg);
        if (IS_ERR(dev->battery)) {
                error = PTR_ERR(dev->battery);
                hid_warn(dev, "can't register power supply: %d\n", error);
                goto err_free_name;
        }

        power_supply_powers(dev->battery, &dev->dev);
        return 0;

err_free_name:
        kfree(psy_desc->name);
err_free_mem:
        kfree(psy_desc);
        dev->battery = NULL;
        return error;
}

static void hidinput_cleanup_battery(struct hid_device *dev)
{
        const struct power_supply_desc *psy_desc;

        if (!dev->battery)
                return;

        psy_desc = dev->battery->desc;
        power_supply_unregister(dev->battery);
        kfree(psy_desc->name);
        kfree(psy_desc);
        dev->battery = NULL;
}

static void hidinput_update_battery(struct hid_device *dev, int value)
{
        int capacity;

        if (!dev->battery)
                return;

        if (value == 0 || value < dev->battery_min || value > dev->battery_max)
                return;

        capacity = hidinput_scale_battery_capacity(dev, value);

        if (dev->battery_status != HID_BATTERY_REPORTED ||
            capacity != dev->battery_capacity ||
            ktime_after(ktime_get_coarse(), dev->battery_ratelimit_time)) {
                dev->battery_capacity = capacity;
                dev->battery_status = HID_BATTERY_REPORTED;
                dev->battery_ratelimit_time =
                        ktime_add_ms(ktime_get_coarse(), 30 * 1000);
                power_supply_changed(dev->battery);
        }
}

static bool hidinput_set_battery_charge_status(struct hid_device *dev,
                                               unsigned int usage, int value)
{
        switch (usage) {
        case HID_BAT_CHARGING:
                dev->battery_charge_status = value ?
                                             POWER_SUPPLY_STATUS_CHARGING :
                                             POWER_SUPPLY_STATUS_DISCHARGING;
                return true;
        }

        return false;
}
#else  /* !CONFIG_HID_BATTERY_STRENGTH */
static int hidinput_setup_battery(struct hid_device *dev, unsigned report_type,
                                  struct hid_field *field, bool is_percentage)
{
        return 0;
}

static void hidinput_cleanup_battery(struct hid_device *dev)
{
}

static void hidinput_update_battery(struct hid_device *dev, int value)
{
}

static bool hidinput_set_battery_charge_status(struct hid_device *dev,
                                               unsigned int usage, int value)
{
        return false;
}
#endif        /* CONFIG_HID_BATTERY_STRENGTH */

static bool hidinput_field_in_collection(struct hid_device *device, struct hid_field *field,
                                         unsigned int type, unsigned int usage)
{
        struct hid_collection *collection;

        collection = &device->collection[field->usage->collection_index];

        return collection->type == type && collection->usage == usage;
}

static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field,
                                     struct hid_usage *usage, unsigned int usage_index)
{
        struct input_dev *input = hidinput->input;
        struct hid_device *device = input_get_drvdata(input);
        const struct usage_priority *usage_priority = NULL;
        int max = 0, code;
        unsigned int i = 0;
        unsigned long *bit = NULL;

        field->hidinput = hidinput;

        if (field->flags & HID_MAIN_ITEM_CONSTANT)
                goto ignore;

        /* Ignore if report count is out of bounds. */
        if (field->report_count < 1)
                goto ignore;

        /* only LED usages are supported in output fields */
        if (field->report_type == HID_OUTPUT_REPORT &&
                        (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) {
                goto ignore;
        }

        /* assign a priority based on the static list declared here */
        for (i = 0; i < ARRAY_SIZE(hidinput_usages_priorities); i++) {
                if (usage->hid == hidinput_usages_priorities[i].usage) {
                        usage_priority = &hidinput_usages_priorities[i];

                        field->usages_priorities[usage_index] =
                                (ARRAY_SIZE(hidinput_usages_priorities) - i) << 8;
                        break;
                }
        }

        /*
         * For slotted devices, we need to also add the slot index
         * in the priority.
         */
        if (usage_priority && usage_priority->global)
                field->usages_priorities[usage_index] |=
                        usage_priority->slot_overwrite;
        else
                field->usages_priorities[usage_index] |=
                        (0xff - field->slot_idx) << 16;

        if (device->driver->input_mapping) {
                int ret = device->driver->input_mapping(device, hidinput, field,
                                usage, &bit, &max);
                if (ret > 0)
                        goto mapped;
                if (ret < 0)
                        goto ignore;
        }

        switch (usage->hid & HID_USAGE_PAGE) {
        case HID_UP_UNDEFINED:
                goto ignore;

        case HID_UP_KEYBOARD:
                set_bit(EV_REP, input->evbit);

                if ((usage->hid & HID_USAGE) < 256) {
                        if (!hid_keyboard[usage->hid & HID_USAGE]) goto ignore;
                        map_key_clear(hid_keyboard[usage->hid & HID_USAGE]);
                } else
                        map_key(KEY_UNKNOWN);

                break;

        case HID_UP_BUTTON:
                code = ((usage->hid - 1) & HID_USAGE);

                switch (field->application) {
                case HID_GD_MOUSE:
                case HID_GD_POINTER:  code += BTN_MOUSE; break;
                case HID_GD_JOYSTICK:
                                if (code <= 0xf)
                                        code += BTN_JOYSTICK;
                                else
                                        code += BTN_TRIGGER_HAPPY - 0x10;
                                break;
                case HID_GD_GAMEPAD:
                                if (code <= 0xf)
                                        code += BTN_GAMEPAD;
                                else
                                        code += BTN_TRIGGER_HAPPY - 0x10;
                                break;
                case HID_CP_CONSUMER_CONTROL:
                                if (hidinput_field_in_collection(device, field,
                                                                 HID_COLLECTION_NAMED_ARRAY,
                                                                 HID_CP_PROGRAMMABLEBUTTONS)) {
                                        if (code <= 0x1d)
                                                code += KEY_MACRO1;
                                        else
                                                code += BTN_TRIGGER_HAPPY - 0x1e;
                                        break;
                                }
                                fallthrough;
                default:
                        switch (field->physical) {
                        case HID_GD_MOUSE:
                        case HID_GD_POINTER:  code += BTN_MOUSE; break;
                        case HID_GD_JOYSTICK: code += BTN_JOYSTICK; break;
                        case HID_GD_GAMEPAD:  code += BTN_GAMEPAD; break;
                        default:              code += BTN_MISC;
                        }
                }

                map_key(code);
                break;

        case HID_UP_SIMULATION:
                switch (usage->hid & 0xffff) {
                case 0xba: map_abs(ABS_RUDDER);   break;
                case 0xbb: map_abs(ABS_THROTTLE); break;
                case 0xc4: map_abs(ABS_GAS);      break;
                case 0xc5: map_abs(ABS_BRAKE);    break;
                case 0xc8: map_abs(ABS_WHEEL);    break;
                default:   goto ignore;
                }
                break;

        case HID_UP_GENDESK:
                if ((usage->hid & 0xf0) == 0x80) {        /* SystemControl */
                        switch (usage->hid & 0xf) {
                        case 0x1: map_key_clear(KEY_POWER);  break;
                        case 0x2: map_key_clear(KEY_SLEEP);  break;
                        case 0x3: map_key_clear(KEY_WAKEUP); break;
                        case 0x4: map_key_clear(KEY_CONTEXT_MENU); break;
                        case 0x5: map_key_clear(KEY_MENU); break;
                        case 0x6: map_key_clear(KEY_PROG1); break;
                        case 0x7: map_key_clear(KEY_HELP); break;
                        case 0x8: map_key_clear(KEY_EXIT); break;
                        case 0x9: map_key_clear(KEY_SELECT); break;
                        case 0xa: map_key_clear(KEY_RIGHT); break;
                        case 0xb: map_key_clear(KEY_LEFT); break;
                        case 0xc: map_key_clear(KEY_UP); break;
                        case 0xd: map_key_clear(KEY_DOWN); break;
                        case 0xe: map_key_clear(KEY_POWER2); break;
                        case 0xf: map_key_clear(KEY_RESTART); break;
                        default: goto unknown;
                        }
                        break;
                }

                if ((usage->hid & 0xf0) == 0xa0) {        /* SystemControl */
                        switch (usage->hid & 0xf) {
                        case 0x9: map_key_clear(KEY_MICMUTE); break;
                        default: goto ignore;
                        }
                        break;
                }

                if ((usage->hid & 0xf0) == 0xb0) {        /* SC - Display */
                        switch (usage->hid & 0xf) {
                        case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
                        default: goto ignore;
                        }
                        break;
                }

                /*
                 * Some lazy vendors declare 255 usages for System Control,
                 * leading to the creation of ABS_X|Y axis and too many others.
                 * It wouldn't be a problem if joydev doesn't consider the
                 * device as a joystick then.
                 */
                if (field->application == HID_GD_SYSTEM_CONTROL)
                        goto ignore;

                if ((usage->hid & 0xf0) == 0x90) {        /* D-pad */
                        switch (usage->hid) {
                        case HID_GD_UP:           usage->hat_dir = 1; break;
                        case HID_GD_DOWN:  usage->hat_dir = 5; break;
                        case HID_GD_RIGHT: usage->hat_dir = 3; break;
                        case HID_GD_LEFT:  usage->hat_dir = 7; break;
                        default: goto unknown;
                        }
                        if (field->dpad) {
                                map_abs(field->dpad);
                                goto ignore;
                        }
                        map_abs(ABS_HAT0X);
                        break;
                }

                switch (usage->hid) {
                /* These usage IDs map directly to the usage codes. */
                case HID_GD_X: case HID_GD_Y: case HID_GD_Z:
                case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ:
                        if (field->flags & HID_MAIN_ITEM_RELATIVE)
                                map_rel(usage->hid & 0xf);
                        else
                                map_abs_clear(usage->hid & 0xf);
                        break;

                case HID_GD_WHEEL:
                        if (field->flags & HID_MAIN_ITEM_RELATIVE) {
                                set_bit(REL_WHEEL, input->relbit);
                                map_rel(REL_WHEEL_HI_RES);
                        } else {
                                map_abs(usage->hid & 0xf);
                        }
                        break;
                case HID_GD_SLIDER: case HID_GD_DIAL:
                        if (field->flags & HID_MAIN_ITEM_RELATIVE)
                                map_rel(usage->hid & 0xf);
                        else
                                map_abs(usage->hid & 0xf);
                        break;

                case HID_GD_HATSWITCH:
                        usage->hat_min = field->logical_minimum;
                        usage->hat_max = field->logical_maximum;
                        map_abs(ABS_HAT0X);
                        break;

                case HID_GD_START:        map_key_clear(BTN_START);        break;
                case HID_GD_SELECT:        map_key_clear(BTN_SELECT);        break;

                case HID_GD_RFKILL_BTN:
                        /* MS wireless radio ctl extension, also check CA */
                        if (field->application == HID_GD_WIRELESS_RADIO_CTLS) {
                                map_key_clear(KEY_RFKILL);
                                /* We need to simulate the btn release */
                                field->flags |= HID_MAIN_ITEM_RELATIVE;
                                break;
                        }
                        goto unknown;

                default: goto unknown;
                }

                break;

        case HID_UP_LED:
                switch (usage->hid & 0xffff) {                      /* HID-Value:                   */
                case 0x01:  map_led (LED_NUML);     break;    /*   "Num Lock"                 */
                case 0x02:  map_led (LED_CAPSL);    break;    /*   "Caps Lock"                */
                case 0x03:  map_led (LED_SCROLLL);  break;    /*   "Scroll Lock"              */
                case 0x04:  map_led (LED_COMPOSE);  break;    /*   "Compose"                  */
                case 0x05:  map_led (LED_KANA);     break;    /*   "Kana"                     */
                case 0x27:  map_led (LED_SLEEP);    break;    /*   "Stand-By"                 */
                case 0x4c:  map_led (LED_SUSPEND);  break;    /*   "System Suspend"           */
                case 0x09:  map_led (LED_MUTE);     break;    /*   "Mute"                     */
                case 0x4b:  map_led (LED_MISC);     break;    /*   "Generic Indicator"        */
                case 0x19:  map_led (LED_MAIL);     break;    /*   "Message Waiting"          */
                case 0x4d:  map_led (LED_CHARGING); break;    /*   "External Power Connected" */

                default: goto ignore;
                }
                break;

        case HID_UP_DIGITIZER:
                if ((field->application & 0xff) == 0x01) /* Digitizer */
                        __set_bit(INPUT_PROP_POINTER, input->propbit);
                else if ((field->application & 0xff) == 0x02) /* Pen */
                        __set_bit(INPUT_PROP_DIRECT, input->propbit);

                switch (usage->hid & 0xff) {
                case 0x00: /* Undefined */
                        goto ignore;

                case 0x30: /* TipPressure */
                        if (!test_bit(BTN_TOUCH, input->keybit)) {
                                device->quirks |= HID_QUIRK_NOTOUCH;
                                set_bit(EV_KEY, input->evbit);
                                set_bit(BTN_TOUCH, input->keybit);
                        }
                        map_abs_clear(ABS_PRESSURE);
                        break;

                case 0x32: /* InRange */
                        switch (field->physical) {
                        case HID_DG_PUCK:
                                map_key(BTN_TOOL_MOUSE);
                                break;
                        case HID_DG_FINGER:
                                map_key(BTN_TOOL_FINGER);
                                break;
                        default:
                                /*
                                 * If the physical is not given,
                                 * rely on the application.
                                 */
                                if (!field->physical) {
                                        switch (field->application) {
                                        case HID_DG_TOUCHSCREEN:
                                        case HID_DG_TOUCHPAD:
                                                map_key_clear(BTN_TOOL_FINGER);
                                                break;
                                        default:
                                                map_key_clear(BTN_TOOL_PEN);
                                        }
                                } else {
                                        map_key(BTN_TOOL_PEN);
                                }
                                break;
                        }
                        break;

                case 0x3b: /* Battery Strength */
                        hidinput_setup_battery(device, HID_INPUT_REPORT, field, false);
                        usage->type = EV_PWR;
                        return;

                case 0x3c: /* Invert */
                        device->quirks &= ~HID_QUIRK_NOINVERT;
                        map_key_clear(BTN_TOOL_RUBBER);
                        break;

                case 0x3d: /* X Tilt */
                        map_abs_clear(ABS_TILT_X);
                        break;

                case 0x3e: /* Y Tilt */
                        map_abs_clear(ABS_TILT_Y);
                        break;

                case 0x33: /* Touch */
                case 0x42: /* TipSwitch */
                case 0x43: /* TipSwitch2 */
                        device->quirks &= ~HID_QUIRK_NOTOUCH;
                        map_key_clear(BTN_TOUCH);
                        break;

                case 0x44: /* BarrelSwitch */
                        map_key_clear(BTN_STYLUS);
                        break;

                case 0x45: /* ERASER */
                        /*
                         * This event is reported when eraser tip touches the surface.
                         * Actual eraser (BTN_TOOL_RUBBER) is set and released either
                         * by Invert if tool reports proximity or by Eraser directly.
                         */
                        if (!test_bit(BTN_TOOL_RUBBER, input->keybit)) {
                                device->quirks |= HID_QUIRK_NOINVERT;
                                set_bit(BTN_TOOL_RUBBER, input->keybit);
                        }
                        map_key_clear(BTN_TOUCH);
                        break;

                case 0x46: /* TabletPick */
                case 0x5a: /* SecondaryBarrelSwitch */
                        map_key_clear(BTN_STYLUS2);
                        break;

                case 0x5b: /* TransducerSerialNumber */
                case 0x6e: /* TransducerSerialNumber2 */
                        map_msc(MSC_SERIAL);
                        break;

                default:  goto unknown;
                }
                break;

        case HID_UP_TELEPHONY:
                switch (usage->hid & HID_USAGE) {
                case 0x2f: map_key_clear(KEY_MICMUTE);                break;
                case 0xb0: map_key_clear(KEY_NUMERIC_0);        break;
                case 0xb1: map_key_clear(KEY_NUMERIC_1);        break;
                case 0xb2: map_key_clear(KEY_NUMERIC_2);        break;
                case 0xb3: map_key_clear(KEY_NUMERIC_3);        break;
                case 0xb4: map_key_clear(KEY_NUMERIC_4);        break;
                case 0xb5: map_key_clear(KEY_NUMERIC_5);        break;
                case 0xb6: map_key_clear(KEY_NUMERIC_6);        break;
                case 0xb7: map_key_clear(KEY_NUMERIC_7);        break;
                case 0xb8: map_key_clear(KEY_NUMERIC_8);        break;
                case 0xb9: map_key_clear(KEY_NUMERIC_9);        break;
                case 0xba: map_key_clear(KEY_NUMERIC_STAR);        break;
                case 0xbb: map_key_clear(KEY_NUMERIC_POUND);        break;
                case 0xbc: map_key_clear(KEY_NUMERIC_A);        break;
                case 0xbd: map_key_clear(KEY_NUMERIC_B);        break;
                case 0xbe: map_key_clear(KEY_NUMERIC_C);        break;
                case 0xbf: map_key_clear(KEY_NUMERIC_D);        break;
                default: goto ignore;
                }
                break;

        case HID_UP_CONSUMER:        /* USB HUT v1.12, pages 75-84 */
                switch (usage->hid & HID_USAGE) {
                case 0x000: goto ignore;
                case 0x030: map_key_clear(KEY_POWER);                break;
                case 0x031: map_key_clear(KEY_RESTART);                break;
                case 0x032: map_key_clear(KEY_SLEEP);                break;
                case 0x034: map_key_clear(KEY_SLEEP);                break;
                case 0x035: map_key_clear(KEY_KBDILLUMTOGGLE);        break;
                case 0x036: map_key_clear(BTN_MISC);                break;

                case 0x040: map_key_clear(KEY_MENU);                break; /* Menu */
                case 0x041: map_key_clear(KEY_SELECT);                break; /* Menu Pick */
                case 0x042: map_key_clear(KEY_UP);                break; /* Menu Up */
                case 0x043: map_key_clear(KEY_DOWN);                break; /* Menu Down */
                case 0x044: map_key_clear(KEY_LEFT);                break; /* Menu Left */
                case 0x045: map_key_clear(KEY_RIGHT);                break; /* Menu Right */
                case 0x046: map_key_clear(KEY_ESC);                break; /* Menu Escape */
                case 0x047: map_key_clear(KEY_KPPLUS);                break; /* Menu Value Increase */
                case 0x048: map_key_clear(KEY_KPMINUS);                break; /* Menu Value Decrease */

                case 0x060: map_key_clear(KEY_INFO);                break; /* Data On Screen */
                case 0x061: map_key_clear(KEY_SUBTITLE);        break; /* Closed Caption */
                case 0x063: map_key_clear(KEY_VCR);                break; /* VCR/TV */
                case 0x065: map_key_clear(KEY_CAMERA);                break; /* Snapshot */
                case 0x069: map_key_clear(KEY_RED);                break;
                case 0x06a: map_key_clear(KEY_GREEN);                break;
                case 0x06b: map_key_clear(KEY_BLUE);                break;
                case 0x06c: map_key_clear(KEY_YELLOW);                break;
                case 0x06d: map_key_clear(KEY_ASPECT_RATIO);        break;

                case 0x06f: map_key_clear(KEY_BRIGHTNESSUP);                break;
                case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN);                break;
                case 0x072: map_key_clear(KEY_BRIGHTNESS_TOGGLE);        break;
                case 0x073: map_key_clear(KEY_BRIGHTNESS_MIN);                break;
                case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX);                break;
                case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO);                break;

                case 0x076: map_key_clear(KEY_CAMERA_ACCESS_ENABLE);        break;
                case 0x077: map_key_clear(KEY_CAMERA_ACCESS_DISABLE);        break;
                case 0x078: map_key_clear(KEY_CAMERA_ACCESS_TOGGLE);        break;

                case 0x079: map_key_clear(KEY_KBDILLUMUP);        break;
                case 0x07a: map_key_clear(KEY_KBDILLUMDOWN);        break;
                case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE);        break;

                case 0x082: map_key_clear(KEY_VIDEO_NEXT);        break;
                case 0x083: map_key_clear(KEY_LAST);                break;
                case 0x084: map_key_clear(KEY_ENTER);                break;
                case 0x088: map_key_clear(KEY_PC);                break;
                case 0x089: map_key_clear(KEY_TV);                break;
                case 0x08a: map_key_clear(KEY_WWW);                break;
                case 0x08b: map_key_clear(KEY_DVD);                break;
                case 0x08c: map_key_clear(KEY_PHONE);                break;
                case 0x08d: map_key_clear(KEY_PROGRAM);                break;
                case 0x08e: map_key_clear(KEY_VIDEOPHONE);        break;
                case 0x08f: map_key_clear(KEY_GAMES);                break;
                case 0x090: map_key_clear(KEY_MEMO);                break;
                case 0x091: map_key_clear(KEY_CD);                break;
                case 0x092: map_key_clear(KEY_VCR);                break;
                case 0x093: map_key_clear(KEY_TUNER);                break;
                case 0x094: map_key_clear(KEY_EXIT);                break;
                case 0x095: map_key_clear(KEY_HELP);                break;
                case 0x096: map_key_clear(KEY_TAPE);                break;
                case 0x097: map_key_clear(KEY_TV2);                break;
                case 0x098: map_key_clear(KEY_SAT);                break;
                case 0x09a: map_key_clear(KEY_PVR);                break;

                case 0x09c: map_key_clear(KEY_CHANNELUP);        break;
                case 0x09d: map_key_clear(KEY_CHANNELDOWN);        break;
                case 0x0a0: map_key_clear(KEY_VCR2);                break;

                case 0x0b0: map_key_clear(KEY_PLAY);                break;
                case 0x0b1: map_key_clear(KEY_PAUSE);                break;
                case 0x0b2: map_key_clear(KEY_RECORD);                break;
                case 0x0b3: map_key_clear(KEY_FASTFORWARD);        break;
                case 0x0b4: map_key_clear(KEY_REWIND);                break;
                case 0x0b5: map_key_clear(KEY_NEXTSONG);        break;
                case 0x0b6: map_key_clear(KEY_PREVIOUSSONG);        break;
                case 0x0b7: map_key_clear(KEY_STOPCD);                break;
                case 0x0b8: map_key_clear(KEY_EJECTCD);                break;
                case 0x0bc: map_key_clear(KEY_MEDIA_REPEAT);        break;
                case 0x0b9: map_key_clear(KEY_SHUFFLE);                break;
                case 0x0bf: map_key_clear(KEY_SLOW);                break;

                case 0x0cd: map_key_clear(KEY_PLAYPAUSE);        break;
                case 0x0cf: map_key_clear(KEY_VOICECOMMAND);        break;

                case 0x0d8: map_key_clear(KEY_DICTATE);                break;
                case 0x0d9: map_key_clear(KEY_EMOJI_PICKER);        break;

                case 0x0e0: map_abs_clear(ABS_VOLUME);                break;
                case 0x0e2: map_key_clear(KEY_MUTE);                break;
                case 0x0e5: map_key_clear(KEY_BASSBOOST);        break;
                case 0x0e9: map_key_clear(KEY_VOLUMEUP);        break;
                case 0x0ea: map_key_clear(KEY_VOLUMEDOWN);        break;
                case 0x0f5: map_key_clear(KEY_SLOW);                break;

                case 0x181: map_key_clear(KEY_BUTTONCONFIG);        break;
                case 0x182: map_key_clear(KEY_BOOKMARKS);        break;
                case 0x183: map_key_clear(KEY_CONFIG);                break;
                case 0x184: map_key_clear(KEY_WORDPROCESSOR);        break;
                case 0x185: map_key_clear(KEY_EDITOR);                break;
                case 0x186: map_key_clear(KEY_SPREADSHEET);        break;
                case 0x187: map_key_clear(KEY_GRAPHICSEDITOR);        break;
                case 0x188: map_key_clear(KEY_PRESENTATION);        break;
                case 0x189: map_key_clear(KEY_DATABASE);        break;
                case 0x18a: map_key_clear(KEY_MAIL);                break;
                case 0x18b: map_key_clear(KEY_NEWS);                break;
                case 0x18c: map_key_clear(KEY_VOICEMAIL);        break;
                case 0x18d: map_key_clear(KEY_ADDRESSBOOK);        break;
                case 0x18e: map_key_clear(KEY_CALENDAR);        break;
                case 0x18f: map_key_clear(KEY_TASKMANAGER);        break;
                case 0x190: map_key_clear(KEY_JOURNAL);                break;
                case 0x191: map_key_clear(KEY_FINANCE);                break;
                case 0x192: map_key_clear(KEY_CALC);                break;
                case 0x193: map_key_clear(KEY_PLAYER);                break;
                case 0x194: map_key_clear(KEY_FILE);                break;
                case 0x196: map_key_clear(KEY_WWW);                break;
                case 0x199: map_key_clear(KEY_CHAT);                break;
                case 0x19c: map_key_clear(KEY_LOGOFF);                break;
                case 0x19e: map_key_clear(KEY_COFFEE);                break;
                case 0x19f: map_key_clear(KEY_CONTROLPANEL);                break;
                case 0x1a2: map_key_clear(KEY_APPSELECT);                break;
                case 0x1a3: map_key_clear(KEY_NEXT);                break;
                case 0x1a4: map_key_clear(KEY_PREVIOUS);        break;
                case 0x1a6: map_key_clear(KEY_HELP);                break;
                case 0x1a7: map_key_clear(KEY_DOCUMENTS);        break;
                case 0x1ab: map_key_clear(KEY_SPELLCHECK);        break;
                case 0x1ae: map_key_clear(KEY_KEYBOARD);        break;
                case 0x1b1: map_key_clear(KEY_SCREENSAVER);                break;
                case 0x1b4: map_key_clear(KEY_FILE);                break;
                case 0x1b6: map_key_clear(KEY_IMAGES);                break;
                case 0x1b7: map_key_clear(KEY_AUDIO);                break;
                case 0x1b8: map_key_clear(KEY_VIDEO);                break;
                case 0x1bc: map_key_clear(KEY_MESSENGER);        break;
                case 0x1bd: map_key_clear(KEY_INFO);                break;
                case 0x1cb: map_key_clear(KEY_ASSISTANT);        break;
                case 0x201: map_key_clear(KEY_NEW);                break;
                case 0x202: map_key_clear(KEY_OPEN);                break;
                case 0x203: map_key_clear(KEY_CLOSE);                break;
                case 0x204: map_key_clear(KEY_EXIT);                break;
                case 0x207: map_key_clear(KEY_SAVE);                break;
                case 0x208: map_key_clear(KEY_PRINT);                break;
                case 0x209: map_key_clear(KEY_PROPS);                break;
                case 0x21a: map_key_clear(KEY_UNDO);                break;
                case 0x21b: map_key_clear(KEY_COPY);                break;
                case 0x21c: map_key_clear(KEY_CUT);                break;
                case 0x21d: map_key_clear(KEY_PASTE);                break;
                case 0x21f: map_key_clear(KEY_FIND);                break;
                case 0x221: map_key_clear(KEY_SEARCH);                break;
                case 0x222: map_key_clear(KEY_GOTO);                break;
                case 0x223: map_key_clear(KEY_HOMEPAGE);        break;
                case 0x224: map_key_clear(KEY_BACK);                break;
                case 0x225: map_key_clear(KEY_FORWARD);                break;
                case 0x226: map_key_clear(KEY_STOP);                break;
                case 0x227: map_key_clear(KEY_REFRESH);                break;
                case 0x22a: map_key_clear(KEY_BOOKMARKS);        break;
                case 0x22d: map_key_clear(KEY_ZOOMIN);                break;
                case 0x22e: map_key_clear(KEY_ZOOMOUT);                break;
                case 0x22f: map_key_clear(KEY_ZOOMRESET);        break;
                case 0x232: map_key_clear(KEY_FULL_SCREEN);        break;
                case 0x233: map_key_clear(KEY_SCROLLUP);        break;
                case 0x234: map_key_clear(KEY_SCROLLDOWN);        break;
                case 0x238: /* AC Pan */
                        set_bit(REL_HWHEEL, input->relbit);
                        map_rel(REL_HWHEEL_HI_RES);
                        break;
                case 0x23d: map_key_clear(KEY_EDIT);                break;
                case 0x25f: map_key_clear(KEY_CANCEL);                break;
                case 0x269: map_key_clear(KEY_INSERT);                break;
                case 0x26a: map_key_clear(KEY_DELETE);                break;
                case 0x279: map_key_clear(KEY_REDO);                break;

                case 0x289: map_key_clear(KEY_REPLY);                break;
                case 0x28b: map_key_clear(KEY_FORWARDMAIL);        break;
                case 0x28c: map_key_clear(KEY_SEND);                break;

                case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT);        break;

                case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS);        break;

                case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV);                break;
                case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT);                break;
                case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP);                break;
                case 0x2ca: map_key_clear(KEY_KBDINPUTASSIST_NEXTGROUP);                break;
                case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT);        break;
                case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL);        break;

                case 0x29f: map_key_clear(KEY_SCALE);                break;

                default: map_key_clear(KEY_UNKNOWN);
                }
                break;

        case HID_UP_GENDEVCTRLS:
                switch (usage->hid) {
                case HID_DC_BATTERYSTRENGTH:
                        hidinput_setup_battery(device, HID_INPUT_REPORT, field, false);
                        usage->type = EV_PWR;
                        return;
                }
                goto unknown;

        case HID_UP_BATTERY:
                switch (usage->hid) {
                case HID_BAT_ABSOLUTESTATEOFCHARGE:
                        hidinput_setup_battery(device, HID_INPUT_REPORT, field, true);
                        usage->type = EV_PWR;
                        return;
                case HID_BAT_CHARGING:
                        usage->type = EV_PWR;
                        return;
                }
                goto unknown;
        case HID_UP_CAMERA:
                switch (usage->hid & HID_USAGE) {
                case 0x020:
                        map_key_clear(KEY_CAMERA_FOCUS);        break;
                case 0x021:
                        map_key_clear(KEY_CAMERA);                break;
                default:
                        goto ignore;
                }
                break;

        case HID_UP_HPVENDOR:        /* Reported on a Dutch layout HP5308 */
                set_bit(EV_REP, input->evbit);
                switch (usage->hid & HID_USAGE) {
                case 0x021: map_key_clear(KEY_PRINT);           break;
                case 0x070: map_key_clear(KEY_HP);                break;
                case 0x071: map_key_clear(KEY_CAMERA);                break;
                case 0x072: map_key_clear(KEY_SOUND);                break;
                case 0x073: map_key_clear(KEY_QUESTION);        break;
                case 0x080: map_key_clear(KEY_EMAIL);                break;
                case 0x081: map_key_clear(KEY_CHAT);                break;
                case 0x082: map_key_clear(KEY_SEARCH);                break;
                case 0x083: map_key_clear(KEY_CONNECT);                break;
                case 0x084: map_key_clear(KEY_FINANCE);                break;
                case 0x085: map_key_clear(KEY_SPORT);                break;
                case 0x086: map_key_clear(KEY_SHOP);                break;
                default:    goto ignore;
                }
                break;

        case HID_UP_HPVENDOR2:
                set_bit(EV_REP, input->evbit);
                switch (usage->hid & HID_USAGE) {
                case 0x001: map_key_clear(KEY_MICMUTE);                break;
                case 0x003: map_key_clear(KEY_BRIGHTNESSDOWN);        break;
                case 0x004: map_key_clear(KEY_BRIGHTNESSUP);        break;
                default:    goto ignore;
                }
                break;

        case HID_UP_MSVENDOR:
                goto ignore;

        case HID_UP_CUSTOM: /* Reported on Logitech and Apple USB keyboards */
                set_bit(EV_REP, input->evbit);
                goto ignore;

        case HID_UP_LOGIVENDOR:
                /* intentional fallback */
        case HID_UP_LOGIVENDOR2:
                /* intentional fallback */
        case HID_UP_LOGIVENDOR3:
                goto ignore;

        case HID_UP_PID:
                switch (usage->hid & HID_USAGE) {
                case 0xa4: map_key_clear(BTN_DEAD);        break;
                default: goto ignore;
                }
                break;

        default:
        unknown:
                if (field->report_size == 1) {
                        if (field->report->type == HID_OUTPUT_REPORT) {
                                map_led(LED_MISC);
                                break;
                        }
                        map_key(BTN_MISC);
                        break;
                }
                if (field->flags & HID_MAIN_ITEM_RELATIVE) {
                        map_rel(REL_MISC);
                        break;
                }
                map_abs(ABS_MISC);
                break;
        }

mapped:
        /* Mapping failed, bail out */
        if (!bit)
                return;

        if (device->driver->input_mapped &&
            device->driver->input_mapped(device, hidinput, field, usage,
                                         &bit, &max) < 0) {
                /*
                 * The driver indicated that no further generic handling
                 * of the usage is desired.
                 */
                return;
        }

        set_bit(usage->type, input->evbit);

        /*
         * This part is *really* controversial:
         * - HID aims at being generic so we should do our best to export
         *   all incoming events
         * - HID describes what events are, so there is no reason for ABS_X
         *   to be mapped to ABS_Y
         * - HID is using *_MISC+N as a default value, but nothing prevents
         *   *_MISC+N to overwrite a legitimate even, which confuses userspace
         *   (for instance ABS_MISC + 7 is ABS_MT_SLOT, which has a different
         *   processing)
         *
         * If devices still want to use this (at their own risk), they will
         * have to use the quirk HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, but
         * the default should be a reliable mapping.
         */
        while (usage->code <= max && test_and_set_bit(usage->code, bit)) {
                if (device->quirks & HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE) {
                        usage->code = find_next_zero_bit(bit,
                                                         max + 1,
                                                         usage->code);
                } else {
                        device->status |= HID_STAT_DUP_DETECTED;
                        goto ignore;
                }
        }

        if (usage->code > max)
                goto ignore;

        if (usage->type == EV_ABS) {

                int a = field->logical_minimum;
                int b = field->logical_maximum;

                if ((device->quirks & HID_QUIRK_BADPAD) && (usage->code == ABS_X || usage->code == ABS_Y)) {
                        a = field->logical_minimum = 0;
                        b = field->logical_maximum = 255;
                }

                if (field->application == HID_GD_GAMEPAD || field->application == HID_GD_JOYSTICK)
                        input_set_abs_params(input, usage->code, a, b, (b - a) >> 8, (b - a) >> 4);
                else        input_set_abs_params(input, usage->code, a, b, 0, 0);

                input_abs_set_res(input, usage->code,
                                  hidinput_calc_abs_res(field, usage->code));

                /* use a larger default input buffer for MT devices */
                if (usage->code == ABS_MT_POSITION_X && input->hint_events_per_packet == 0)
                        input_set_events_per_packet(input, 60);
        }

        if (usage->type == EV_ABS &&
            (usage->hat_min < usage->hat_max || usage->hat_dir)) {
                int i;
                for (i = usage->code; i < usage->code + 2 && i <= max; i++) {
                        input_set_abs_params(input, i, -1, 1, 0, 0);
                        set_bit(i, input->absbit);
                }
                if (usage->hat_dir && !field->dpad)
                        field->dpad = usage->code;
        }

        /* for those devices which produce Consumer volume usage as relative,
         * we emulate pressing volumeup/volumedown appropriate number of times
         * in hidinput_hid_event()
         */
        if ((usage->type == EV_ABS) && (field->flags & HID_MAIN_ITEM_RELATIVE) &&
                        (usage->code == ABS_VOLUME)) {
                set_bit(KEY_VOLUMEUP, input->keybit);
                set_bit(KEY_VOLUMEDOWN, input->keybit);
        }

        if (usage->type == EV_KEY) {
                set_bit(EV_MSC, input->evbit);
                set_bit(MSC_SCAN, input->mscbit);
        }

        return;

ignore:
        usage->type = 0;
        usage->code = 0;
}

static void hidinput_handle_scroll(struct hid_usage *usage,
                                   struct input_dev *input,
                                   __s32 value)
{
        int code;
        int hi_res, lo_res;

        if (value == 0)
                return;

        if (usage->code == REL_WHEEL_HI_RES)
                code = REL_WHEEL;
        else
                code = REL_HWHEEL;

        /*
         * Windows reports one wheel click as value 120. Where a high-res
         * scroll wheel is present, a fraction of 120 is reported instead.
         * Our REL_WHEEL_HI_RES axis does the same because all HW must
         * adhere to the 120 expectation.
         */
        hi_res = value * 120/usage->resolution_multiplier;

        usage->wheel_accumulated += hi_res;
        lo_res = usage->wheel_accumulated/120;
        if (lo_res)
                usage->wheel_accumulated -= lo_res * 120;

        input_event(input, EV_REL, code, lo_res);
        input_event(input, EV_REL, usage->code, hi_res);
}

static void hid_report_release_tool(struct hid_report *report, struct input_dev *input,
                                    unsigned int tool)
{
        /* if the given tool is not currently reported, ignore */
        if (!test_bit(tool, input->key))
                return;

        /*
         * if the given tool was previously set, release it,
         * release any TOUCH and send an EV_SYN
         */
        input_event(input, EV_KEY, BTN_TOUCH, 0);
        input_event(input, EV_KEY, tool, 0);
        input_event(input, EV_SYN, SYN_REPORT, 0);

        report->tool = 0;
}

static void hid_report_set_tool(struct hid_report *report, struct input_dev *input,
                                unsigned int new_tool)
{
        if (report->tool != new_tool)
                hid_report_release_tool(report, input, report->tool);

        input_event(input, EV_KEY, new_tool, 1);
        report->tool = new_tool;
}

void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value)
{
        struct input_dev *input;
        struct hid_report *report = field->report;
        unsigned *quirks = &hid->quirks;

        if (!usage->type)
                return;

        if (usage->type == EV_PWR) {
                bool handled = hidinput_set_battery_charge_status(hid, usage->hid, value);

                if (!handled)
                        hidinput_update_battery(hid, value);

                return;
        }

        if (!field->hidinput)
                return;

        input = field->hidinput->input;

        if (usage->hat_min < usage->hat_max || usage->hat_dir) {
                int hat_dir = usage->hat_dir;
                if (!hat_dir)
                        hat_dir = (value - usage->hat_min) * 8 / (usage->hat_max - usage->hat_min + 1) + 1;
                if (hat_dir < 0 || hat_dir > 8) hat_dir = 0;
                input_event(input, usage->type, usage->code    , hid_hat_to_axis[hat_dir].x);
                input_event(input, usage->type, usage->code + 1, hid_hat_to_axis[hat_dir].y);
                return;
        }

        /*
         * Ignore out-of-range values as per HID specification,
         * section 5.10 and 6.2.25, when NULL state bit is present.
         * When it's not, clamp the value to match Microsoft's input
         * driver as mentioned in "Required HID usages for digitizers":
         * https://msdn.microsoft.com/en-us/library/windows/hardware/dn672278(v=vs.85).asp
         *
         * The logical_minimum < logical_maximum check is done so that we
         * don't unintentionally discard values sent by devices which
         * don't specify logical min and max.
         */
        if ((field->flags & HID_MAIN_ITEM_VARIABLE) &&
            field->logical_minimum < field->logical_maximum) {
                if (field->flags & HID_MAIN_ITEM_NULL_STATE &&
                    (value < field->logical_minimum ||
                     value > field->logical_maximum)) {
                        dbg_hid("Ignoring out-of-range value %x\n", value);
                        return;
                }
                value = clamp(value,
                              field->logical_minimum,
                              field->logical_maximum);
        }

        switch (usage->hid) {
        case HID_DG_ERASER:
                report->tool_active |= !!value;

                /*
                 * if eraser is set, we must enforce BTN_TOOL_RUBBER
                 * to accommodate for devices not following the spec.
                 */
                if (value)
                        hid_report_set_tool(report, input, BTN_TOOL_RUBBER);
                else if (report->tool != BTN_TOOL_RUBBER)
                        /* value is off, tool is not rubber, ignore */
                        return;
                else if (*quirks & HID_QUIRK_NOINVERT &&
                         !test_bit(BTN_TOUCH, input->key)) {
                        /*
                         * There is no invert to release the tool, let hid_input
                         * send BTN_TOUCH with scancode and release the tool after.
                         */
                        hid_report_release_tool(report, input, BTN_TOOL_RUBBER);
                        return;
                }

                /* let hid-input set BTN_TOUCH */
                break;

        case HID_DG_INVERT:
                report->tool_active |= !!value;

                /*
                 * If invert is set, we store BTN_TOOL_RUBBER.
                 */
                if (value)
                        hid_report_set_tool(report, input, BTN_TOOL_RUBBER);
                else if (!report->tool_active)
                        /* tool_active not set means Invert and Eraser are not set */
                        hid_report_release_tool(report, input, BTN_TOOL_RUBBER);

                /* no further processing */
                return;

        case HID_DG_INRANGE:
                report->tool_active |= !!value;

                if (report->tool_active) {
                        /*
                         * if tool is not set but is marked as active,
                         * assume ours
                         */
                        if (!report->tool)
                                report->tool = usage->code;

                        /* drivers may have changed the value behind our back, resend it */
                        hid_report_set_tool(report, input, report->tool);
                } else {
                        hid_report_release_tool(report, input, usage->code);
                }

                /* reset tool_active for the next event */
                report->tool_active = false;

                /* no further processing */
                return;

        case HID_DG_TIPSWITCH:
                report->tool_active |= !!value;

                /* if tool is set to RUBBER we should ignore the current value */
                if (report->tool == BTN_TOOL_RUBBER)
                        return;

                break;

        case HID_DG_TIPPRESSURE:
                if (*quirks & HID_QUIRK_NOTOUCH) {
                        int a = field->logical_minimum;
                        int b = field->logical_maximum;

                        if (value > a + ((b - a) >> 3)) {
                                input_event(input, EV_KEY, BTN_TOUCH, 1);
                                report->tool_active = true;
                        }
                }
                break;

        case HID_UP_PID | 0x83UL: /* Simultaneous Effects Max */
                dbg_hid("Maximum Effects - %d\n",value);
                return;

        case HID_UP_PID | 0x7fUL:
                dbg_hid("PID Pool Report\n");
                return;
        }

        switch (usage->type) {
        case EV_KEY:
                if (usage->code == 0) /* Key 0 is "unassigned", not KEY_UNKNOWN */
                        return;
                break;

        case EV_REL:
                if (usage->code == REL_WHEEL_HI_RES ||
                    usage->code == REL_HWHEEL_HI_RES) {
                        hidinput_handle_scroll(usage, input, value);
                        return;
                }
                break;

        case EV_ABS:
                if ((field->flags & HID_MAIN_ITEM_RELATIVE) &&
                    usage->code == ABS_VOLUME) {
                        int count = abs(value);
                        int direction = value > 0 ? KEY_VOLUMEUP : KEY_VOLUMEDOWN;
                        int i;

                        for (i = 0; i < count; i++) {
                                input_event(input, EV_KEY, direction, 1);
                                input_sync(input);
                                input_event(input, EV_KEY, direction, 0);
                                input_sync(input);
                        }
                        return;

                } else if (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) ||
                           ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y))
                        value = field->logical_maximum - value;
                break;
        }

        /*
         * Ignore reports for absolute data if the data didn't change. This is
         * not only an optimization but also fixes 'dead' key reports. Some
         * RollOver implementations for localized keys (like BACKSLASH/PIPE; HID
         * 0x31 and 0x32) report multiple keys, even though a localized keyboard
         * can only have one of them physically available. The 'dead' keys
         * report constant 0. As all map to the same keycode, they'd confuse
         * the input layer. If we filter the 'dead' keys on the HID level, we
         * skip the keycode translation and only forward real events.
         */
        if (!(field->flags & (HID_MAIN_ITEM_RELATIVE |
                              HID_MAIN_ITEM_BUFFERED_BYTE)) &&
                              (field->flags & HID_MAIN_ITEM_VARIABLE) &&
            usage->usage_index < field->maxusage &&
            value == field->value[usage->usage_index])
                return;

        /* report the usage code as scancode if the key status has changed */
        if (usage->type == EV_KEY &&
            (!test_bit(usage->code, input->key)) == value)
                input_event(input, EV_MSC, MSC_SCAN, usage->hid);

        input_event(input, usage->type, usage->code, value);

        if ((field->flags & HID_MAIN_ITEM_RELATIVE) &&
            usage->type == EV_KEY && value) {
                input_sync(input);
                input_event(input, usage->type, usage->code, 0);
        }
}

void hidinput_report_event(struct hid_device *hid, struct hid_report *report)
{
        struct hid_input *hidinput;

        if (hid->quirks & HID_QUIRK_NO_INPUT_SYNC)
                return;

        list_for_each_entry(hidinput, &hid->inputs, list)
                input_sync(hidinput->input);
}
EXPORT_SYMBOL_GPL(hidinput_report_event);

static int hidinput_find_field(struct hid_device *hid, unsigned int type,
                               unsigned int code, struct hid_field **field)
{
        struct hid_report *report;
        int i, j;

        list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) {
                for (i = 0; i < report->maxfield; i++) {
                        *field = report->field[i];
                        for (j = 0; j < (*field)->maxusage; j++)
                                if ((*field)->usage[j].type == type && (*field)->usage[j].code == code)
                                        return j;
                }
        }
        return -1;
}

struct hid_field *hidinput_get_led_field(struct hid_device *hid)
{
        struct hid_report *report;
        struct hid_field *field;
        int i, j;

        list_for_each_entry(report,
                            &hid->report_enum[HID_OUTPUT_REPORT].report_list,
                            list) {
                for (i = 0; i < report->maxfield; i++) {
                        field = report->field[i];
                        for (j = 0; j < field->maxusage; j++)
                                if (field->usage[j].type == EV_LED)
                                        return field;
                }
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(hidinput_get_led_field);

unsigned int hidinput_count_leds(struct hid_device *hid)
{
        struct hid_report *report;
        struct hid_field *field;
        int i, j;
        unsigned int count = 0;

        list_for_each_entry(report,
                            &hid->report_enum[HID_OUTPUT_REPORT].report_list,
                            list) {
                for (i = 0; i < report->maxfield; i++) {
                        field = report->field[i];
                        for (j = 0; j < field->maxusage; j++)
                                if (field->usage[j].type == EV_LED &&
                                    field->value[j])
                                        count += 1;
                }
        }
        return count;
}
EXPORT_SYMBOL_GPL(hidinput_count_leds);

static void hidinput_led_worker(struct work_struct *work)
{
        struct hid_device *hid = container_of(work, struct hid_device,
                                              led_work);
        struct hid_field *field;
        struct hid_report *report;
        int ret;
        u32 len;
        __u8 *buf;

        field = hidinput_get_led_field(hid);
        if (!field)
                return;

        /*
         * field->report is accessed unlocked regarding HID core. So there might
         * be another incoming SET-LED request from user-space, which changes
         * the LED state while we assemble our outgoing buffer. However, this
         * doesn't matter as hid_output_report() correctly converts it into a
         * boolean value no matter what information is currently set on the LED
         * field (even garbage). So the remote device will always get a valid
         * request.
         * And in case we send a wrong value, a next led worker is spawned
         * for every SET-LED request so the following worker will send the
         * correct value, guaranteed!
         */

        report = field->report;

        /* use custom SET_REPORT request if possible (asynchronous) */
        if (hid->ll_driver->request)
                return hid->ll_driver->request(hid, report, HID_REQ_SET_REPORT);

        /* fall back to generic raw-output-report */
        len = hid_report_len(report);
        buf = hid_alloc_report_buf(report, GFP_KERNEL);
        if (!buf)
                return;

        hid_output_report(report, buf);
        /* synchronous output report */
        ret = hid_hw_output_report(hid, buf, len);
        if (ret == -ENOSYS)
                hid_hw_raw_request(hid, report->id, buf, len, HID_OUTPUT_REPORT,
                                HID_REQ_SET_REPORT);
        kfree(buf);
}

static int hidinput_input_event(struct input_dev *dev, unsigned int type,
                                unsigned int code, int value)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct hid_field *field;
        int offset;

        if (type == EV_FF)
                return input_ff_event(dev, type, code, value);

        if (type != EV_LED)
                return -1;

        if ((offset = hidinput_find_field(hid, type, code, &field)) == -1) {
                hid_warn(dev, "event field not found\n");
                return -1;
        }

        hid_set_field(field, offset, value);

        schedule_work(&hid->led_work);
        return 0;
}

static int hidinput_open(struct input_dev *dev)
{
        struct hid_device *hid = input_get_drvdata(dev);

        return hid_hw_open(hid);
}

static void hidinput_close(struct input_dev *dev)
{
        struct hid_device *hid = input_get_drvdata(dev);

        hid_hw_close(hid);
}

static bool __hidinput_change_resolution_multipliers(struct hid_device *hid,
                struct hid_report *report, bool use_logical_max)
{
        struct hid_usage *usage;
        bool update_needed = false;
        bool get_report_completed = false;
        int i, j;

        if (report->maxfield == 0)
                return false;

        for (i = 0; i < report->maxfield; i++) {
                __s32 value = use_logical_max ?
                              report->field[i]->logical_maximum :
                              report->field[i]->logical_minimum;

                /* There is no good reason for a Resolution
                 * Multiplier to have a count other than 1.
                 * Ignore that case.
                 */
                if (report->field[i]->report_count != 1)
                        continue;

                for (j = 0; j < report->field[i]->maxusage; j++) {
                        usage = &report->field[i]->usage[j];

                        if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER)
                                continue;

                        /*
                         * If we have more than one feature within this
                         * report we need to fill in the bits from the
                         * others before we can overwrite the ones for the
                         * Resolution Multiplier.
                         *
                         * But if we're not allowed to read from the device,
                         * we just bail. Such a device should not exist
                         * anyway.
                         */
                        if (!get_report_completed && report->maxfield > 1) {
                                if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS)
                                        return update_needed;

                                hid_hw_request(hid, report, HID_REQ_GET_REPORT);
                                hid_hw_wait(hid);
                                get_report_completed = true;
                        }

                        report->field[i]->value[j] = value;
                        update_needed = true;
                }
        }

        return update_needed;
}

static void hidinput_change_resolution_multipliers(struct hid_device *hid)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        int ret;

        rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                bool update_needed = __hidinput_change_resolution_multipliers(hid,
                                                                     rep, true);

                if (update_needed) {
                        ret = __hid_request(hid, rep, HID_REQ_SET_REPORT);
                        if (ret) {
                                __hidinput_change_resolution_multipliers(hid,
                                                                    rep, false);
                                return;
                        }
                }
        }

        /* refresh our structs */
        hid_setup_resolution_multiplier(hid);
}

static void report_features(struct hid_device *hid)
{
        struct hid_driver *drv = hid->driver;
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        struct hid_usage *usage;
        int i, j;

        rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list)
                for (i = 0; i < rep->maxfield; i++) {
                        /* Ignore if report count is out of bounds. */
                        if (rep->field[i]->report_count < 1)
                                continue;

                        for (j = 0; j < rep->field[i]->maxusage; j++) {
                                usage = &rep->field[i]->usage[j];

                                /* Verify if Battery Strength feature is available */
                                if (usage->hid == HID_DC_BATTERYSTRENGTH)
                                        hidinput_setup_battery(hid, HID_FEATURE_REPORT,
                                                               rep->field[i], false);

                                if (drv->feature_mapping)
                                        drv->feature_mapping(hid, rep->field[i], usage);
                        }
                }
}

static struct hid_input *hidinput_allocate(struct hid_device *hid,
                                           unsigned int application)
{
        struct hid_input *hidinput = kzalloc(sizeof(*hidinput), GFP_KERNEL);
        struct input_dev *input_dev = input_allocate_device();
        const char *suffix = NULL;
        size_t suffix_len, name_len;

        if (!hidinput || !input_dev)
                goto fail;

        if ((hid->quirks & HID_QUIRK_INPUT_PER_APP) &&
            hid->maxapplication > 1) {
                switch (application) {
                case HID_GD_KEYBOARD:
                        suffix = "Keyboard";
                        break;
                case HID_GD_KEYPAD:
                        suffix = "Keypad";
                        break;
                case HID_GD_MOUSE:
                        suffix = "Mouse";
                        break;
                case HID_DG_PEN:
                        /*
                         * yes, there is an issue here:
                         *  DG_PEN -> "Stylus"
                         *  DG_STYLUS -> "Pen"
                         * But changing this now means users with config snippets
                         * will have to change it and the test suite will not be happy.
                         */
                        suffix = "Stylus";
                        break;
                case HID_DG_STYLUS:
                        suffix = "Pen";
                        break;
                case HID_DG_TOUCHSCREEN:
                        suffix = "Touchscreen";
                        break;
                case HID_DG_TOUCHPAD:
                        suffix = "Touchpad";
                        break;
                case HID_GD_SYSTEM_CONTROL:
                        suffix = "System Control";
                        break;
                case HID_CP_CONSUMER_CONTROL:
                        suffix = "Consumer Control";
                        break;
                case HID_GD_WIRELESS_RADIO_CTLS:
                        suffix = "Wireless Radio Control";
                        break;
                case HID_GD_SYSTEM_MULTIAXIS:
                        suffix = "System Multi Axis";
                        break;
                default:
                        break;
                }
        }

        if (suffix) {
                name_len = strlen(hid->name);
                suffix_len = strlen(suffix);
                if ((name_len < suffix_len) ||
                    strcmp(hid->name + name_len - suffix_len, suffix)) {
                        hidinput->name = kasprintf(GFP_KERNEL, "%s %s",
                                                   hid->name, suffix);
                        if (!hidinput->name)
                                goto fail;
                }
        }

        input_set_drvdata(input_dev, hid);
        input_dev->event = hidinput_input_event;
        input_dev->open = hidinput_open;
        input_dev->close = hidinput_close;
        input_dev->setkeycode = hidinput_setkeycode;
        input_dev->getkeycode = hidinput_getkeycode;

        input_dev->name = hidinput->name ? hidinput->name : hid->name;
        input_dev->phys = hid->phys;
        input_dev->uniq = hid->uniq;
        input_dev->id.bustype = hid->bus;
        input_dev->id.vendor  = hid->vendor;
        input_dev->id.product = hid->product;
        input_dev->id.version = hid->version;
        input_dev->dev.parent = &hid->dev;

        hidinput->input = input_dev;
        hidinput->application = application;
        list_add_tail(&hidinput->list, &hid->inputs);

        INIT_LIST_HEAD(&hidinput->reports);

        return hidinput;

fail:
        kfree(hidinput);
        input_free_device(input_dev);
        hid_err(hid, "Out of memory during hid input probe\n");
        return NULL;
}

static bool hidinput_has_been_populated(struct hid_input *hidinput)
{
        int i;
        unsigned long r = 0;

        for (i = 0; i < BITS_TO_LONGS(EV_CNT); i++)
                r |= hidinput->input->evbit[i];

        for (i = 0; i < BITS_TO_LONGS(KEY_CNT); i++)
                r |= hidinput->input->keybit[i];

        for (i = 0; i < BITS_TO_LONGS(REL_CNT); i++)
                r |= hidinput->input->relbit[i];

        for (i = 0; i < BITS_TO_LONGS(ABS_CNT); i++)
                r |= hidinput->input->absbit[i];

        for (i = 0; i < BITS_TO_LONGS(MSC_CNT); i++)
                r |= hidinput->input->mscbit[i];

        for (i = 0; i < BITS_TO_LONGS(LED_CNT); i++)
                r |= hidinput->input->ledbit[i];

        for (i = 0; i < BITS_TO_LONGS(SND_CNT); i++)
                r |= hidinput->input->sndbit[i];

        for (i = 0; i < BITS_TO_LONGS(FF_CNT); i++)
                r |= hidinput->input->ffbit[i];

        for (i = 0; i < BITS_TO_LONGS(SW_CNT); i++)
                r |= hidinput->input->swbit[i];

        return !!r;
}

static void hidinput_cleanup_hidinput(struct hid_device *hid,
                struct hid_input *hidinput)
{
        struct hid_report *report;
        int i, k;

        list_del(&hidinput->list);
        input_free_device(hidinput->input);
        kfree(hidinput->name);

        for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) {
                if (k == HID_OUTPUT_REPORT &&
                        hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS)
                        continue;

                list_for_each_entry(report, &hid->report_enum[k].report_list,
                                    list) {

                        for (i = 0; i < report->maxfield; i++)
                                if (report->field[i]->hidinput == hidinput)
                                        report->field[i]->hidinput = NULL;
                }
        }

        kfree(hidinput);
}

static struct hid_input *hidinput_match(struct hid_report *report)
{
        struct hid_device *hid = report->device;
        struct hid_input *hidinput;

        list_for_each_entry(hidinput, &hid->inputs, list) {
                if (hidinput->report &&
                    hidinput->report->id == report->id)
                        return hidinput;
        }

        return NULL;
}

static struct hid_input *hidinput_match_application(struct hid_report *report)
{
        struct hid_device *hid = report->device;
        struct hid_input *hidinput;

        list_for_each_entry(hidinput, &hid->inputs, list) {
                if (hidinput->application == report->application)
                        return hidinput;

                /*
                 * Keep SystemControl and ConsumerControl applications together
                 * with the main keyboard, if present.
                 */
                if ((report->application == HID_GD_SYSTEM_CONTROL ||
                     report->application == HID_CP_CONSUMER_CONTROL) &&
                    hidinput->application == HID_GD_KEYBOARD) {
                        return hidinput;
                }
        }

        return NULL;
}

static inline void hidinput_configure_usages(struct hid_input *hidinput,
                                             struct hid_report *report)
{
        int i, j, k;
        int first_field_index = 0;
        int slot_collection_index = -1;
        int prev_collection_index = -1;
        unsigned int slot_idx = 0;
        struct hid_field *field;

        /*
         * First tag all the fields that are part of a slot,
         * a slot needs to have one Contact ID in the collection
         */
        for (i = 0; i < report->maxfield; i++) {
                field = report->field[i];

                /* ignore fields without usage */
                if (field->maxusage < 1)
                        continue;

                /*
                 * janitoring when collection_index changes
                 */
                if (prev_collection_index != field->usage->collection_index) {
                        prev_collection_index = field->usage->collection_index;
                        first_field_index = i;
                }

                /*
                 * if we already found a Contact ID in the collection,
                 * tag and continue to the next.
                 */
                if (slot_collection_index == field->usage->collection_index) {
                        field->slot_idx = slot_idx;
                        continue;
                }

                /* check if the current field has Contact ID */
                for (j = 0; j < field->maxusage; j++) {
                        if (field->usage[j].hid == HID_DG_CONTACTID) {
                                slot_collection_index = field->usage->collection_index;
                                slot_idx++;

                                /*
                                 * mark all previous fields and this one in the
                                 * current collection to be slotted.
                                 */
                                for (k = first_field_index; k <= i; k++)
                                        report->field[k]->slot_idx = slot_idx;
                                break;
                        }
                }
        }

        for (i = 0; i < report->maxfield; i++)
                for (j = 0; j < report->field[i]->maxusage; j++)
                        hidinput_configure_usage(hidinput, report->field[i],
                                                 report->field[i]->usage + j,
                                                 j);
}

/*
 * Register the input device; print a message.
 * Configure the input layer interface
 * Read all reports and initialize the absolute field values.
 */

int hidinput_connect(struct hid_device *hid, unsigned int force)
{
        struct hid_driver *drv = hid->driver;
        struct hid_report *report;
        struct hid_input *next, *hidinput = NULL;
        unsigned int application;
        int i, k;

        INIT_LIST_HEAD(&hid->inputs);
        INIT_WORK(&hid->led_work, hidinput_led_worker);

        hid->status &= ~HID_STAT_DUP_DETECTED;

        if (!force) {
                for (i = 0; i < hid->maxcollection; i++) {
                        struct hid_collection *col = &hid->collection[i];
                        if (col->type == HID_COLLECTION_APPLICATION ||
                                        col->type == HID_COLLECTION_PHYSICAL)
                                if (IS_INPUT_APPLICATION(col->usage))
                                        break;
                }

                if (i == hid->maxcollection)
                        return -1;
        }

        report_features(hid);

        for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) {
                if (k == HID_OUTPUT_REPORT &&
                        hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORTS)
                        continue;

                list_for_each_entry(report, &hid->report_enum[k].report_list, list) {

                        if (!report->maxfield)
                                continue;

                        application = report->application;

                        /*
                         * Find the previous hidinput report attached
                         * to this report id.
                         */
                        if (hid->quirks & HID_QUIRK_MULTI_INPUT)
                                hidinput = hidinput_match(report);
                        else if (hid->maxapplication > 1 &&
                                 (hid->quirks & HID_QUIRK_INPUT_PER_APP))
                                hidinput = hidinput_match_application(report);

                        if (!hidinput) {
                                hidinput = hidinput_allocate(hid, application);
                                if (!hidinput)
                                        goto out_unwind;
                        }

                        hidinput_configure_usages(hidinput, report);

                        if (hid->quirks & HID_QUIRK_MULTI_INPUT)
                                hidinput->report = report;

                        list_add_tail(&report->hidinput_list,
                                      &hidinput->reports);
                }
        }

        hidinput_change_resolution_multipliers(hid);

        list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
                if (drv->input_configured &&
                    drv->input_configured(hid, hidinput))
                        goto out_unwind;

                if (!hidinput_has_been_populated(hidinput)) {
                        /* no need to register an input device not populated */
                        hidinput_cleanup_hidinput(hid, hidinput);
                        continue;
                }

                if (input_register_device(hidinput->input))
                        goto out_unwind;
                hidinput->registered = true;
        }

        if (list_empty(&hid->inputs)) {
                hid_err(hid, "No inputs registered, leaving\n");
                goto out_unwind;
        }

        if (hid->status & HID_STAT_DUP_DETECTED)
                hid_dbg(hid,
                        "Some usages could not be mapped, please use HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE if this is legitimate.\n");

        return 0;

out_unwind:
        /* unwind the ones we already registered */
        hidinput_disconnect(hid);

        return -1;
}
EXPORT_SYMBOL_GPL(hidinput_connect);

void hidinput_disconnect(struct hid_device *hid)
{
        struct hid_input *hidinput, *next;

        hidinput_cleanup_battery(hid);

        list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
                list_del(&hidinput->list);
                if (hidinput->registered)
                        input_unregister_device(hidinput->input);
                else
                        input_free_device(hidinput->input);
                kfree(hidinput->name);
                kfree(hidinput);
        }

        /* led_work is spawned by input_dev callbacks, but doesn't access the
         * parent input_dev at all. Once all input devices are removed, we
         * know that led_work will never get restarted, so we can cancel it
         * synchronously and are safe. */
        cancel_work_sync(&hid->led_work);
}
EXPORT_SYMBOL_GPL(hidinput_disconnect);

#ifdef CONFIG_HID_KUNIT_TEST
#include "hid-input-test.c"
#endif






































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for the Prodikeys PC-MIDI Keyboard
 *  providing midi & extra multimedia keys functionality
 *
 *  Copyright (c) 2009 Don Prince <dhprince.devel@yahoo.co.uk>
 *
 *  Controls for Octave Shift Up/Down, Channel, and
 *  Sustain Duration available via sysfs.
 */

/*
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/mutex.h>
#include <linux/hid.h>
#include <sound/core.h>
#include <sound/initval.h>
#include <sound/rawmidi.h>
#include "hid-ids.h"


#define pk_debug(format, arg...) \
        pr_debug("hid-prodikeys: " format "\n" , ## arg)
#define pk_error(format, arg...) \
        pr_err("hid-prodikeys: " format "\n" , ## arg)

struct pcmidi_snd;

struct pcmidi_sustain {
        unsigned long                in_use;
        struct pcmidi_snd        *pm;
        struct timer_list        timer;
        unsigned char                status;
        unsigned char                note;
        unsigned char                velocity;
};

#define PCMIDI_SUSTAINED_MAX        32
struct pcmidi_snd {
        struct hid_device                *hdev;
        unsigned short                        ifnum;
        struct hid_report                *pcmidi_report6;
        struct input_dev                *input_ep82;
        unsigned short                        midi_mode;
        unsigned short                        midi_sustain_mode;
        unsigned short                        midi_sustain;
        unsigned short                        midi_channel;
        short                                midi_octave;
        struct pcmidi_sustain                sustained_notes[PCMIDI_SUSTAINED_MAX];
        unsigned short                        fn_state;
        unsigned short                        last_key[24];
        spinlock_t                        rawmidi_in_lock;
        struct snd_card                        *card;
        struct snd_rawmidi                *rwmidi;
        struct snd_rawmidi_substream        *in_substream;
        unsigned long                        in_triggered;
};

#define PK_QUIRK_NOGET        0x00010000
#define PCMIDI_MIDDLE_C 60
#define PCMIDI_CHANNEL_MIN 0
#define PCMIDI_CHANNEL_MAX 15
#define PCMIDI_OCTAVE_MIN (-2)
#define PCMIDI_OCTAVE_MAX 2
#define PCMIDI_SUSTAIN_MIN 0
#define PCMIDI_SUSTAIN_MAX 5000

static const char shortname[] = "PC-MIDI";
static const char longname[] = "Prodikeys PC-MIDI Keyboard";

static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;

module_param_array(index, int, NULL, 0444);
module_param_array(id, charp, NULL, 0444);
module_param_array(enable, bool, NULL, 0444);
MODULE_PARM_DESC(index, "Index value for the PC-MIDI virtual audio driver");
MODULE_PARM_DESC(id, "ID string for the PC-MIDI virtual audio driver");
MODULE_PARM_DESC(enable, "Enable for the PC-MIDI virtual audio driver");


/* Output routine for the sysfs channel file */
static ssize_t show_channel(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        dbg_hid("pcmidi sysfs read channel=%u\n", pm->midi_channel);

        return sprintf(buf, "%u (min:%u, max:%u)\n", pm->midi_channel,
                PCMIDI_CHANNEL_MIN, PCMIDI_CHANNEL_MAX);
}

/* Input routine for the sysfs channel file */
static ssize_t store_channel(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        unsigned channel = 0;

        if (sscanf(buf, "%u", &channel) > 0 && channel <= PCMIDI_CHANNEL_MAX) {
                dbg_hid("pcmidi sysfs write channel=%u\n", channel);
                pm->midi_channel = channel;
                return strlen(buf);
        }
        return -EINVAL;
}

static DEVICE_ATTR(channel, S_IRUGO | S_IWUSR | S_IWGRP , show_channel,
                store_channel);

static struct device_attribute *sysfs_device_attr_channel = {
                &dev_attr_channel,
                };

/* Output routine for the sysfs sustain file */
static ssize_t show_sustain(struct device *dev,
 struct device_attribute *attr, char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        dbg_hid("pcmidi sysfs read sustain=%u\n", pm->midi_sustain);

        return sprintf(buf, "%u (off:%u, max:%u (ms))\n", pm->midi_sustain,
                PCMIDI_SUSTAIN_MIN, PCMIDI_SUSTAIN_MAX);
}

/* Input routine for the sysfs sustain file */
static ssize_t store_sustain(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        unsigned sustain = 0;

        if (sscanf(buf, "%u", &sustain) > 0 && sustain <= PCMIDI_SUSTAIN_MAX) {
                dbg_hid("pcmidi sysfs write sustain=%u\n", sustain);
                pm->midi_sustain = sustain;
                pm->midi_sustain_mode = (0 == sustain || !pm->midi_mode) ? 0 : 1;
                return strlen(buf);
        }
        return -EINVAL;
}

static DEVICE_ATTR(sustain, S_IRUGO | S_IWUSR | S_IWGRP, show_sustain,
                store_sustain);

static struct device_attribute *sysfs_device_attr_sustain = {
                &dev_attr_sustain,
                };

/* Output routine for the sysfs octave file */
static ssize_t show_octave(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        dbg_hid("pcmidi sysfs read octave=%d\n", pm->midi_octave);

        return sprintf(buf, "%d (min:%d, max:%d)\n", pm->midi_octave,
                PCMIDI_OCTAVE_MIN, PCMIDI_OCTAVE_MAX);
}

/* Input routine for the sysfs octave file */
static ssize_t store_octave(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        int octave = 0;

        if (sscanf(buf, "%d", &octave) > 0 &&
                octave >= PCMIDI_OCTAVE_MIN && octave <= PCMIDI_OCTAVE_MAX) {
                dbg_hid("pcmidi sysfs write octave=%d\n", octave);
                pm->midi_octave = octave;
                return strlen(buf);
        }
        return -EINVAL;
}

static DEVICE_ATTR(octave, S_IRUGO | S_IWUSR | S_IWGRP, show_octave,
                store_octave);

static struct device_attribute *sysfs_device_attr_octave = {
                &dev_attr_octave,
                };


static void pcmidi_send_note(struct pcmidi_snd *pm,
        unsigned char status, unsigned char note, unsigned char velocity)
{
        unsigned long flags;
        unsigned char buffer[3];

        buffer[0] = status;
        buffer[1] = note;
        buffer[2] = velocity;

        spin_lock_irqsave(&pm->rawmidi_in_lock, flags);

        if (!pm->in_substream)
                goto drop_note;
        if (!test_bit(pm->in_substream->number, &pm->in_triggered))
                goto drop_note;

        snd_rawmidi_receive(pm->in_substream, buffer, 3);

drop_note:
        spin_unlock_irqrestore(&pm->rawmidi_in_lock, flags);

        return;
}

static void pcmidi_sustained_note_release(struct timer_list *t)
{
        struct pcmidi_sustain *pms = from_timer(pms, t, timer);

        pcmidi_send_note(pms->pm, pms->status, pms->note, pms->velocity);
        pms->in_use = 0;
}

static void init_sustain_timers(struct pcmidi_snd *pm)
{
        struct pcmidi_sustain *pms;
        unsigned i;

        for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) {
                pms = &pm->sustained_notes[i];
                pms->in_use = 0;
                pms->pm = pm;
                timer_setup(&pms->timer, pcmidi_sustained_note_release, 0);
        }
}

static void stop_sustain_timers(struct pcmidi_snd *pm)
{
        struct pcmidi_sustain *pms;
        unsigned i;

        for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) {
                pms = &pm->sustained_notes[i];
                pms->in_use = 1;
                del_timer_sync(&pms->timer);
        }
}

static int pcmidi_get_output_report(struct pcmidi_snd *pm)
{
        struct hid_device *hdev = pm->hdev;
        struct hid_report *report;

        list_for_each_entry(report,
                &hdev->report_enum[HID_OUTPUT_REPORT].report_list, list) {
                if (!(6 == report->id))
                        continue;

                if (report->maxfield < 1) {
                        hid_err(hdev, "output report is empty\n");
                        break;
                }
                if (report->field[0]->report_count != 2) {
                        hid_err(hdev, "field count too low\n");
                        break;
                }
                pm->pcmidi_report6 = report;
                return 0;
        }
        /* should never get here */
        return -ENODEV;
}

static void pcmidi_submit_output_report(struct pcmidi_snd *pm, int state)
{
        struct hid_device *hdev = pm->hdev;
        struct hid_report *report = pm->pcmidi_report6;
        report->field[0]->value[0] = 0x01;
        report->field[0]->value[1] = state;

        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
}

static int pcmidi_handle_report1(struct pcmidi_snd *pm, u8 *data)
{
        u32 bit_mask;

        bit_mask = data[1];
        bit_mask = (bit_mask << 8) | data[2];
        bit_mask = (bit_mask << 8) | data[3];

        dbg_hid("pcmidi mode: %d\n", pm->midi_mode);

        /*KEY_MAIL or octave down*/
        if (pm->midi_mode && bit_mask == 0x004000) {
                /* octave down */
                pm->midi_octave--;
                if (pm->midi_octave < -2)
                        pm->midi_octave = -2;
                dbg_hid("pcmidi mode: %d octave: %d\n",
                        pm->midi_mode, pm->midi_octave);
                return 1;
        }
        /*KEY_WWW or sustain*/
        else if (pm->midi_mode && bit_mask == 0x000004) {
                /* sustain on/off*/
                pm->midi_sustain_mode ^= 0x1;
                return 1;
        }

        return 0; /* continue key processing */
}

static int pcmidi_handle_report3(struct pcmidi_snd *pm, u8 *data, int size)
{
        struct pcmidi_sustain *pms;
        unsigned i, j;
        unsigned char status, note, velocity;

        unsigned num_notes = (size-1)/2;
        for (j = 0; j < num_notes; j++)        {
                note = data[j*2+1];
                velocity = data[j*2+2];

                if (note < 0x81) { /* note on */
                        status = 128 + 16 + pm->midi_channel; /* 1001nnnn */
                        note = note - 0x54 + PCMIDI_MIDDLE_C +
                                (pm->midi_octave * 12);
                        if (0 == velocity)
                                velocity = 1; /* force note on */
                } else { /* note off */
                        status = 128 + pm->midi_channel; /* 1000nnnn */
                        note = note - 0x94 + PCMIDI_MIDDLE_C +
                                (pm->midi_octave*12);

                        if (pm->midi_sustain_mode) {
                                for (i = 0; i < PCMIDI_SUSTAINED_MAX; i++) {
                                        pms = &pm->sustained_notes[i];
                                        if (!pms->in_use) {
                                                pms->status = status;
                                                pms->note = note;
                                                pms->velocity = velocity;
                                                pms->in_use = 1;

                                                mod_timer(&pms->timer,
                                                        jiffies +
                                        msecs_to_jiffies(pm->midi_sustain));
                                                return 1;
                                        }
                                }
                        }
                }
                pcmidi_send_note(pm, status, note, velocity);
        }

        return 1;
}

static int pcmidi_handle_report4(struct pcmidi_snd *pm, u8 *data)
{
        unsigned        key;
        u32                bit_mask;
        u32                bit_index;

        bit_mask = data[1];
        bit_mask = (bit_mask << 8) | data[2];
        bit_mask = (bit_mask << 8) | data[3];

        /* break keys */
        for (bit_index = 0; bit_index < 24; bit_index++) {
                if (!((0x01 << bit_index) & bit_mask)) {
                        input_event(pm->input_ep82, EV_KEY,
                                pm->last_key[bit_index], 0);
                        pm->last_key[bit_index] = 0;
                }
        }

        /* make keys */
        for (bit_index = 0; bit_index < 24; bit_index++) {
                key = 0;
                switch ((0x01 << bit_index) & bit_mask) {
                case 0x000010: /* Fn lock*/
                        pm->fn_state ^= 0x000010;
                        if (pm->fn_state)
                                pcmidi_submit_output_report(pm, 0xc5);
                        else
                                pcmidi_submit_output_report(pm, 0xc6);
                        continue;
                case 0x020000: /* midi launcher..send a key (qwerty) or not? */
                        pcmidi_submit_output_report(pm, 0xc1);
                        pm->midi_mode ^= 0x01;

                        dbg_hid("pcmidi mode: %d\n", pm->midi_mode);
                        continue;
                case 0x100000: /* KEY_MESSENGER or octave up */
                        dbg_hid("pcmidi mode: %d\n", pm->midi_mode);
                        if (pm->midi_mode) {
                                pm->midi_octave++;
                                if (pm->midi_octave > 2)
                                        pm->midi_octave = 2;
                                dbg_hid("pcmidi mode: %d octave: %d\n",
                                        pm->midi_mode, pm->midi_octave);
                                continue;
                        } else
                                key = KEY_MESSENGER;
                        break;
                case 0x400000:
                        key = KEY_CALENDAR;
                        break;
                case 0x080000:
                        key = KEY_ADDRESSBOOK;
                        break;
                case 0x040000:
                        key = KEY_DOCUMENTS;
                        break;
                case 0x800000:
                        key = KEY_WORDPROCESSOR;
                        break;
                case 0x200000:
                        key = KEY_SPREADSHEET;
                        break;
                case 0x010000:
                        key = KEY_COFFEE;
                        break;
                case 0x000100:
                        key = KEY_HELP;
                        break;
                case 0x000200:
                        key = KEY_SEND;
                        break;
                case 0x000400:
                        key = KEY_REPLY;
                        break;
                case 0x000800:
                        key = KEY_FORWARDMAIL;
                        break;
                case 0x001000:
                        key = KEY_NEW;
                        break;
                case 0x002000:
                        key = KEY_OPEN;
                        break;
                case 0x004000:
                        key = KEY_CLOSE;
                        break;
                case 0x008000:
                        key = KEY_SAVE;
                        break;
                case 0x000001:
                        key = KEY_UNDO;
                        break;
                case 0x000002:
                        key = KEY_REDO;
                        break;
                case 0x000004:
                        key = KEY_SPELLCHECK;
                        break;
                case 0x000008:
                        key = KEY_PRINT;
                        break;
                }
                if (key) {
                        input_event(pm->input_ep82, EV_KEY, key, 1);
                        pm->last_key[bit_index] = key;
                }
        }

        return 1;
}

static int pcmidi_handle_report(
        struct pcmidi_snd *pm, unsigned report_id, u8 *data, int size)
{
        int ret = 0;

        switch (report_id) {
        case 0x01: /* midi keys (qwerty)*/
                ret = pcmidi_handle_report1(pm, data);
                break;
        case 0x03: /* midi keyboard (musical)*/
                ret = pcmidi_handle_report3(pm, data, size);
                break;
        case 0x04: /* multimedia/midi keys (qwerty)*/
                ret = pcmidi_handle_report4(pm, data);
                break;
        }
        return ret;
}

static void pcmidi_setup_extra_keys(
        struct pcmidi_snd *pm, struct input_dev *input)
{
        /* reassigned functionality for N/A keys
                MY PICTURES =>        KEY_WORDPROCESSOR
                MY MUSIC=>        KEY_SPREADSHEET
        */
        static const unsigned int keys[] = {
                KEY_FN,
                KEY_MESSENGER, KEY_CALENDAR,
                KEY_ADDRESSBOOK, KEY_DOCUMENTS,
                KEY_WORDPROCESSOR,
                KEY_SPREADSHEET,
                KEY_COFFEE,
                KEY_HELP, KEY_SEND,
                KEY_REPLY, KEY_FORWARDMAIL,
                KEY_NEW, KEY_OPEN,
                KEY_CLOSE, KEY_SAVE,
                KEY_UNDO, KEY_REDO,
                KEY_SPELLCHECK,        KEY_PRINT,
                0
        };

        const unsigned int *pkeys = &keys[0];
        unsigned short i;

        if (pm->ifnum != 1)  /* only set up ONCE for interace 1 */
                return;

        pm->input_ep82 = input;

        for (i = 0; i < 24; i++)
                pm->last_key[i] = 0;

        while (*pkeys != 0) {
                set_bit(*pkeys, pm->input_ep82->keybit);
                ++pkeys;
        }
}

static int pcmidi_set_operational(struct pcmidi_snd *pm)
{
        int rc;

        if (pm->ifnum != 1)
                return 0; /* only set up ONCE for interace 1 */

        rc = pcmidi_get_output_report(pm);
        if (rc < 0)
                return rc;
        pcmidi_submit_output_report(pm, 0xc1);
        return 0;
}

static int pcmidi_snd_free(struct snd_device *dev)
{
        return 0;
}

static int pcmidi_in_open(struct snd_rawmidi_substream *substream)
{
        struct pcmidi_snd *pm = substream->rmidi->private_data;

        dbg_hid("pcmidi in open\n");
        pm->in_substream = substream;
        return 0;
}

static int pcmidi_in_close(struct snd_rawmidi_substream *substream)
{
        dbg_hid("pcmidi in close\n");
        return 0;
}

static void pcmidi_in_trigger(struct snd_rawmidi_substream *substream, int up)
{
        struct pcmidi_snd *pm = substream->rmidi->private_data;

        dbg_hid("pcmidi in trigger %d\n", up);

        pm->in_triggered = up;
}

static const struct snd_rawmidi_ops pcmidi_in_ops = {
        .open = pcmidi_in_open,
        .close = pcmidi_in_close,
        .trigger = pcmidi_in_trigger
};

static int pcmidi_snd_initialise(struct pcmidi_snd *pm)
{
        static int dev;
        struct snd_card *card;
        struct snd_rawmidi *rwmidi;
        int err;

        static struct snd_device_ops ops = {
                .dev_free = pcmidi_snd_free,
        };

        if (pm->ifnum != 1)
                return 0; /* only set up midi device ONCE for interace 1 */

        if (dev >= SNDRV_CARDS)
                return -ENODEV;

        if (!enable[dev]) {
                dev++;
                return -ENOENT;
        }

        /* Setup sound card */

        err = snd_card_new(&pm->hdev->dev, index[dev], id[dev],
                           THIS_MODULE, 0, &card);
        if (err < 0) {
                pk_error("failed to create pc-midi sound card\n");
                err = -ENOMEM;
                goto fail;
        }
        pm->card = card;

        /* Setup sound device */
        err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, pm, &ops);
        if (err < 0) {
                pk_error("failed to create pc-midi sound device: error %d\n",
                        err);
                goto fail;
        }

        strscpy(card->driver, shortname, sizeof(card->driver));
        strscpy(card->shortname, shortname, sizeof(card->shortname));
        strscpy(card->longname, longname, sizeof(card->longname));

        /* Set up rawmidi */
        err = snd_rawmidi_new(card, card->shortname, 0,
                              0, 1, &rwmidi);
        if (err < 0) {
                pk_error("failed to create pc-midi rawmidi device: error %d\n",
                        err);
                goto fail;
        }
        pm->rwmidi = rwmidi;
        strscpy(rwmidi->name, card->shortname, sizeof(rwmidi->name));
        rwmidi->info_flags = SNDRV_RAWMIDI_INFO_INPUT;
        rwmidi->private_data = pm;

        snd_rawmidi_set_ops(rwmidi, SNDRV_RAWMIDI_STREAM_INPUT,
                &pcmidi_in_ops);

        /* create sysfs variables */
        err = device_create_file(&pm->hdev->dev,
                                 sysfs_device_attr_channel);
        if (err < 0) {
                pk_error("failed to create sysfs attribute channel: error %d\n",
                        err);
                goto fail;
        }

        err = device_create_file(&pm->hdev->dev,
                                sysfs_device_attr_sustain);
        if (err < 0) {
                pk_error("failed to create sysfs attribute sustain: error %d\n",
                        err);
                goto fail_attr_sustain;
        }

        err = device_create_file(&pm->hdev->dev,
                         sysfs_device_attr_octave);
        if (err < 0) {
                pk_error("failed to create sysfs attribute octave: error %d\n",
                        err);
                goto fail_attr_octave;
        }

        spin_lock_init(&pm->rawmidi_in_lock);

        init_sustain_timers(pm);
        err = pcmidi_set_operational(pm);
        if (err < 0) {
                pk_error("failed to find output report\n");
                goto fail_register;
        }

        /* register it */
        err = snd_card_register(card);
        if (err < 0) {
                pk_error("failed to register pc-midi sound card: error %d\n",
                         err);
                goto fail_register;
        }

        dbg_hid("pcmidi_snd_initialise finished ok\n");
        return 0;

fail_register:
        stop_sustain_timers(pm);
        device_remove_file(&pm->hdev->dev, sysfs_device_attr_octave);
fail_attr_octave:
        device_remove_file(&pm->hdev->dev, sysfs_device_attr_sustain);
fail_attr_sustain:
        device_remove_file(&pm->hdev->dev, sysfs_device_attr_channel);
fail:
        if (pm->card) {
                snd_card_free(pm->card);
                pm->card = NULL;
        }
        return err;
}

static int pcmidi_snd_terminate(struct pcmidi_snd *pm)
{
        if (pm->card) {
                stop_sustain_timers(pm);

                device_remove_file(&pm->hdev->dev, sysfs_device_attr_channel);
                device_remove_file(&pm->hdev->dev, sysfs_device_attr_sustain);
                device_remove_file(&pm->hdev->dev, sysfs_device_attr_octave);

                snd_card_disconnect(pm->card);
                snd_card_free_when_closed(pm->card);
        }

        return 0;
}

/*
 * PC-MIDI report descriptor for report id is wrong.
 */
static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
{
        if (*rsize == 178 &&
              rdesc[111] == 0x06 && rdesc[112] == 0x00 &&
              rdesc[113] == 0xff) {
                hid_info(hdev,
                         "fixing up pc-midi keyboard report descriptor\n");

                rdesc[144] = 0x18; /* report 4: was 0x10 report count */
        }
        return rdesc;
}

static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        if (HID_UP_MSVENDOR == (usage->hid & HID_USAGE_PAGE) &&
                1 == pm->ifnum) {
                pcmidi_setup_extra_keys(pm, hi->input);
                return 0;
        }

        return 0;
}


static int pk_raw_event(struct hid_device *hdev, struct hid_report *report,
        u8 *data, int size)
{
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);
        int ret = 0;

        if (1 == pm->ifnum) {
                if (report->id == data[0])
                        switch (report->id) {
                        case 0x01: /* midi keys (qwerty)*/
                        case 0x03: /* midi keyboard (musical)*/
                        case 0x04: /* extra/midi keys (qwerty)*/
                                ret = pcmidi_handle_report(pm,
                                                report->id, data, size);
                                break;
                        }
        }

        return ret;
}

static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int ret;
        struct usb_interface *intf;
        unsigned short ifnum;
        unsigned long quirks = id->driver_data;
        struct pcmidi_snd *pm;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        intf = to_usb_interface(hdev->dev.parent);
        ifnum = intf->cur_altsetting->desc.bInterfaceNumber;

        pm = kzalloc(sizeof(*pm), GFP_KERNEL);
        if (pm == NULL) {
                hid_err(hdev, "can't alloc descriptor\n");
                return -ENOMEM;
        }

        pm->hdev = hdev;
        pm->ifnum = ifnum;

        hid_set_drvdata(hdev, pm);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "hid parse failed\n");
                goto err_free;
        }

        if (quirks & PK_QUIRK_NOGET) { /* hid_parse cleared all the quirks */
                hdev->quirks |= HID_QUIRK_NOGET;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto err_free;
        }

        ret = pcmidi_snd_initialise(pm);
        if (ret < 0)
                goto err_stop;

        return 0;
err_stop:
        hid_hw_stop(hdev);
err_free:
        kfree(pm);

        return ret;
}

static void pk_remove(struct hid_device *hdev)
{
        struct pcmidi_snd *pm = hid_get_drvdata(hdev);

        pcmidi_snd_terminate(pm);
        hid_hw_stop(hdev);

        kfree(pm);
}

static const struct hid_device_id pk_devices[] = {
        {HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS,
                USB_DEVICE_ID_PRODIKEYS_PCMIDI),
            .driver_data = PK_QUIRK_NOGET},
        { }
};
MODULE_DEVICE_TABLE(hid, pk_devices);

static struct hid_driver pk_driver = {
        .name = "prodikeys",
        .id_table = pk_devices,
        .report_fixup = pk_report_fixup,
        .input_mapping = pk_input_mapping,
        .raw_event = pk_raw_event,
        .probe = pk_probe,
        .remove = pk_remove,
};
module_hid_driver(pk_driver);

MODULE_LICENSE("GPL");









































   53 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Device core Trace Support
 * Copyright (C) 2021, Intel Corporation
 *
 * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM dev

#if !defined(__DEV_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
#define __DEV_TRACE_H

#include <linux/device.h>
#include <linux/tracepoint.h>
#include <linux/types.h>

DECLARE_EVENT_CLASS(devres,
        TP_PROTO(struct device *dev, const char *op, void *node, const char *name, size_t size),
        TP_ARGS(dev, op, node, name, size),
        TP_STRUCT__entry(
                __string(devname, dev_name(dev))
                __field(struct device *, dev)
                __field(const char *, op)
                __field(void *, node)
                __field(const char *, name)
                __field(size_t, size)
        ),
        TP_fast_assign(
                __assign_str(devname, dev_name(dev));
                __entry->op = op;
                __entry->node = node;
                __entry->name = name;
                __entry->size = size;
        ),
        TP_printk("%s %3s %p %s (%zu bytes)", __get_str(devname),
                  __entry->op, __entry->node, __entry->name, __entry->size)
);

DEFINE_EVENT(devres, devres_log,
        TP_PROTO(struct device *dev, const char *op, void *node, const char *name, size_t size),
        TP_ARGS(dev, op, node, name, size)
);

#endif /* __DEV_TRACE_H */

/* this part has to be here */

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .

#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace

#include <trace/define_trace.h>








































































































































   14 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Linux NET3:        Internet Group Management Protocol  [IGMP]
 *
 *        Authors:
 *                Alan Cox <alan@lxorguk.ukuu.org.uk>
 *
 *        Extended to talk the BSD extended IGMP protocol of mrouted 3.6
 */
#ifndef _LINUX_IGMP_H
#define _LINUX_IGMP_H

#include <linux/skbuff.h>
#include <linux/timer.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/refcount.h>
#include <linux/sockptr.h>
#include <uapi/linux/igmp.h>

static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
{
        return (struct igmphdr *)skb_transport_header(skb);
}

static inline struct igmpv3_report *
                        igmpv3_report_hdr(const struct sk_buff *skb)
{
        return (struct igmpv3_report *)skb_transport_header(skb);
}

static inline struct igmpv3_query *
                        igmpv3_query_hdr(const struct sk_buff *skb)
{
        return (struct igmpv3_query *)skb_transport_header(skb);
}

struct ip_sf_socklist {
        unsigned int                sl_max;
        unsigned int                sl_count;
        struct rcu_head                rcu;
        __be32                        sl_addr[] __counted_by(sl_max);
};

#define IP_SFBLOCK        10        /* allocate this many at once */

/* ip_mc_socklist is real list now. Speed is not argument;
   this list never used in fast path code
 */

struct ip_mc_socklist {
        struct ip_mc_socklist __rcu *next_rcu;
        struct ip_mreqn                multi;
        unsigned int                sfmode;                /* MCAST_{INCLUDE,EXCLUDE} */
        struct ip_sf_socklist __rcu        *sflist;
        struct rcu_head                rcu;
};

struct ip_sf_list {
        struct ip_sf_list        *sf_next;
        unsigned long                sf_count[2];        /* include/exclude counts */
        __be32                        sf_inaddr;
        unsigned char                sf_gsresp;        /* include in g & s response? */
        unsigned char                sf_oldin;        /* change state */
        unsigned char                sf_crcount;        /* retrans. left to send */
};

struct ip_mc_list {
        struct in_device        *interface;
        __be32                        multiaddr;
        unsigned int                sfmode;
        struct ip_sf_list        *sources;
        struct ip_sf_list        *tomb;
        unsigned long                sfcount[2];
        union {
                struct ip_mc_list *next;
                struct ip_mc_list __rcu *next_rcu;
        };
        struct ip_mc_list __rcu *next_hash;
        struct timer_list        timer;
        int                        users;
        refcount_t                refcnt;
        spinlock_t                lock;
        char                        tm_running;
        char                        reporter;
        char                        unsolicit_count;
        char                        loaded;
        unsigned char                gsquery;        /* check source marks? */
        unsigned char                crcount;
        struct rcu_head                rcu;
};

/* V3 exponential field decoding */
#define IGMPV3_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
#define IGMPV3_EXP(thresh, nbmant, nbexp, value) \
        ((value) < (thresh) ? (value) : \
        ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant))) << \
         (IGMPV3_MASK((value) >> (nbmant), nbexp) + (nbexp))))

#define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value)
#define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value)

static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len)
{
        if (skb_transport_offset(skb) + ip_transport_len(skb) < len)
                return 0;

        return pskb_may_pull(skb, len);
}

extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto);
extern int igmp_rcv(struct sk_buff *);
extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
extern int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
                                unsigned int mode);
extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
extern void ip_mc_drop_socket(struct sock *sk);
extern int ip_mc_source(int add, int omode, struct sock *sk,
                struct ip_mreq_source *mreqs, int ifindex);
extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex);
extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
                        sockptr_t optval, sockptr_t optlen);
extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
                        sockptr_t optval, size_t offset);
extern int ip_mc_sf_allow(const struct sock *sk, __be32 local, __be32 rmt,
                          int dif, int sdif);
extern void ip_mc_init_dev(struct in_device *);
extern void ip_mc_destroy_dev(struct in_device *);
extern void ip_mc_up(struct in_device *);
extern void ip_mc_down(struct in_device *);
extern void ip_mc_unmap(struct in_device *);
extern void ip_mc_remap(struct in_device *);
extern void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp);
static inline void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
{
        return __ip_mc_dec_group(in_dev, addr, GFP_KERNEL);
}
extern void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
                              gfp_t gfp);
extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
int ip_mc_check_igmp(struct sk_buff *skb);

#endif

































































  250 
  251 






  167 





























































































   29 













































   62 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
 * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2008-2012 Novell Inc.
 * Copyright (c) 2012-2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 * Copyright (c) 2012-2019 Linux Foundation
 *
 * Core driver model functions and structures that should not be
 * shared outside of the drivers/base/ directory.
 *
 */
#include <linux/notifier.h>

/**
 * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure.
 *
 * @subsys - the struct kset that defines this subsystem
 * @devices_kset - the subsystem's 'devices' directory
 * @interfaces - list of subsystem interfaces associated
 * @mutex - protect the devices, and interfaces lists.
 *
 * @drivers_kset - the list of drivers associated
 * @klist_devices - the klist to iterate over the @devices_kset
 * @klist_drivers - the klist to iterate over the @drivers_kset
 * @bus_notifier - the bus notifier list for anything that cares about things
 *                 on this bus.
 * @bus - pointer back to the struct bus_type that this structure is associated
 *        with.
 * @dev_root: Default device to use as the parent.
 *
 * @glue_dirs - "glue" directory to put in-between the parent device to
 *              avoid namespace conflicts
 * @class - pointer back to the struct class that this structure is associated
 *          with.
 * @lock_key:        Lock class key for use by the lock validator
 *
 * This structure is the one that is the actual kobject allowing struct
 * bus_type/class to be statically allocated safely.  Nothing outside of the
 * driver core should ever touch these fields.
 */
struct subsys_private {
        struct kset subsys;
        struct kset *devices_kset;
        struct list_head interfaces;
        struct mutex mutex;

        struct kset *drivers_kset;
        struct klist klist_devices;
        struct klist klist_drivers;
        struct blocking_notifier_head bus_notifier;
        unsigned int drivers_autoprobe:1;
        const struct bus_type *bus;
        struct device *dev_root;

        struct kset glue_dirs;
        const struct class *class;

        struct lock_class_key lock_key;
};
#define to_subsys_private(obj) container_of_const(obj, struct subsys_private, subsys.kobj)

static inline struct subsys_private *subsys_get(struct subsys_private *sp)
{
        if (sp)
                kset_get(&sp->subsys);
        return sp;
}

static inline void subsys_put(struct subsys_private *sp)
{
        if (sp)
                kset_put(&sp->subsys);
}

struct subsys_private *class_to_subsys(const struct class *class);

struct driver_private {
        struct kobject kobj;
        struct klist klist_devices;
        struct klist_node knode_bus;
        struct module_kobject *mkobj;
        struct device_driver *driver;
};
#define to_driver(obj) container_of(obj, struct driver_private, kobj)

/**
 * struct device_private - structure to hold the private to the driver core portions of the device structure.
 *
 * @klist_children - klist containing all children of this device
 * @knode_parent - node in sibling list
 * @knode_driver - node in driver list
 * @knode_bus - node in bus list
 * @knode_class - node in class list
 * @deferred_probe - entry in deferred_probe_list which is used to retry the
 *        binding of drivers which were unable to get all the resources needed by
 *        the device; typically because it depends on another driver getting
 *        probed first.
 * @async_driver - pointer to device driver awaiting probe via async_probe
 * @device - pointer back to the struct device that this structure is
 * associated with.
 * @dead - This device is currently either in the process of or has been
 *        removed from the system. Any asynchronous events scheduled for this
 *        device should exit without taking any action.
 *
 * Nothing outside of the driver core should ever touch these fields.
 */
struct device_private {
        struct klist klist_children;
        struct klist_node knode_parent;
        struct klist_node knode_driver;
        struct klist_node knode_bus;
        struct klist_node knode_class;
        struct list_head deferred_probe;
        struct device_driver *async_driver;
        char *deferred_probe_reason;
        struct device *device;
        u8 dead:1;
};
#define to_device_private_parent(obj)        \
        container_of(obj, struct device_private, knode_parent)
#define to_device_private_driver(obj)        \
        container_of(obj, struct device_private, knode_driver)
#define to_device_private_bus(obj)        \
        container_of(obj, struct device_private, knode_bus)
#define to_device_private_class(obj)        \
        container_of(obj, struct device_private, knode_class)

/* initialisation functions */
int devices_init(void);
int buses_init(void);
int classes_init(void);
int firmware_init(void);
#ifdef CONFIG_SYS_HYPERVISOR
int hypervisor_init(void);
#else
static inline int hypervisor_init(void) { return 0; }
#endif
int platform_bus_init(void);
void cpu_dev_init(void);
void container_dev_init(void);
#ifdef CONFIG_AUXILIARY_BUS
void auxiliary_bus_init(void);
#else
static inline void auxiliary_bus_init(void) { }
#endif

struct kobject *virtual_device_parent(struct device *dev);

int bus_add_device(struct device *dev);
void bus_probe_device(struct device *dev);
void bus_remove_device(struct device *dev);
void bus_notify(struct device *dev, enum bus_notifier_event value);
bool bus_is_registered(const struct bus_type *bus);

int bus_add_driver(struct device_driver *drv);
void bus_remove_driver(struct device_driver *drv);
void device_release_driver_internal(struct device *dev, struct device_driver *drv,
                                    struct device *parent);

void driver_detach(struct device_driver *drv);
void driver_deferred_probe_del(struct device *dev);
void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf);
static inline int driver_match_device(struct device_driver *drv,
                                      struct device *dev)
{
        return drv->bus->match ? drv->bus->match(dev, drv) : 1;
}

static inline void dev_sync_state(struct device *dev)
{
        if (dev->bus->sync_state)
                dev->bus->sync_state(dev);
        else if (dev->driver && dev->driver->sync_state)
                dev->driver->sync_state(dev);
}

int driver_add_groups(struct device_driver *drv, const struct attribute_group **groups);
void driver_remove_groups(struct device_driver *drv, const struct attribute_group **groups);
void device_driver_detach(struct device *dev);

int devres_release_all(struct device *dev);
void device_block_probing(void);
void device_unblock_probing(void);
void deferred_probe_extend_timeout(void);
void driver_deferred_probe_trigger(void);
const char *device_get_devnode(const struct device *dev, umode_t *mode,
                               kuid_t *uid, kgid_t *gid, const char **tmp);

/* /sys/devices directory */
extern struct kset *devices_kset;
void devices_kset_move_last(struct device *dev);

#if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
void module_add_driver(struct module *mod, struct device_driver *drv);
void module_remove_driver(struct device_driver *drv);
#else
static inline void module_add_driver(struct module *mod,
                                     struct device_driver *drv) { }
static inline void module_remove_driver(struct device_driver *drv) { }
#endif

#ifdef CONFIG_DEVTMPFS
int devtmpfs_init(void);
#else
static inline int devtmpfs_init(void) { return 0; }
#endif

#ifdef CONFIG_BLOCK
extern const struct class block_class;
static inline bool is_blockdev(struct device *dev)
{
        return dev->class == &block_class;
}
#else
static inline bool is_blockdev(struct device *dev) { return false; }
#endif

/* Device links support */
int device_links_read_lock(void);
void device_links_read_unlock(int idx);
int device_links_read_lock_held(void);
int device_links_check_suppliers(struct device *dev);
void device_links_force_bind(struct device *dev);
void device_links_driver_bound(struct device *dev);
void device_links_driver_cleanup(struct device *dev);
void device_links_no_driver(struct device *dev);
bool device_links_busy(struct device *dev);
void device_links_unbind_consumers(struct device *dev);
void fw_devlink_drivers_done(void);
void fw_devlink_probing_done(void);

/* device pm support */
void device_pm_move_to_tail(struct device *dev);

#ifdef CONFIG_DEVTMPFS
int devtmpfs_create_node(struct device *dev);
int devtmpfs_delete_node(struct device *dev);
#else
static inline int devtmpfs_create_node(struct device *dev) { return 0; }
static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
#endif

void software_node_notify(struct device *dev);
void software_node_notify_remove(struct device *dev);















   81 























   81 











































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// SPDX-License-Identifier: GPL-2.0
/*
 * Device physical location support
 *
 * Author: Won Chung <wonchung@google.com>
 */

#include <linux/acpi.h>
#include <linux/sysfs.h>

#include "physical_location.h"

bool dev_add_physical_location(struct device *dev)
{
        struct acpi_pld_info *pld;
        acpi_status status;

        if (!has_acpi_companion(dev))
                return false;

        status = acpi_get_physical_device_location(ACPI_HANDLE(dev), &pld);
        if (ACPI_FAILURE(status))
                return false;

        dev->physical_location =
                kzalloc(sizeof(*dev->physical_location), GFP_KERNEL);
        if (!dev->physical_location) {
                ACPI_FREE(pld);
                return false;
        }

        dev->physical_location->panel = pld->panel;
        dev->physical_location->vertical_position = pld->vertical_position;
        dev->physical_location->horizontal_position = pld->horizontal_position;
        dev->physical_location->dock = pld->dock;
        dev->physical_location->lid = pld->lid;

        ACPI_FREE(pld);
        return true;
}

static ssize_t panel_show(struct device *dev, struct device_attribute *attr,
        char *buf)
{
        const char *panel;

        switch (dev->physical_location->panel) {
        case DEVICE_PANEL_TOP:
                panel = "top";
                break;
        case DEVICE_PANEL_BOTTOM:
                panel = "bottom";
                break;
        case DEVICE_PANEL_LEFT:
                panel = "left";
                break;
        case DEVICE_PANEL_RIGHT:
                panel = "right";
                break;
        case DEVICE_PANEL_FRONT:
                panel = "front";
                break;
        case DEVICE_PANEL_BACK:
                panel = "back";
                break;
        default:
                panel = "unknown";
        }
        return sysfs_emit(buf, "%s\n", panel);
}
static DEVICE_ATTR_RO(panel);

static ssize_t vertical_position_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        const char *vertical_position;

        switch (dev->physical_location->vertical_position) {
        case DEVICE_VERT_POS_UPPER:
                vertical_position = "upper";
                break;
        case DEVICE_VERT_POS_CENTER:
                vertical_position = "center";
                break;
        case DEVICE_VERT_POS_LOWER:
                vertical_position = "lower";
                break;
        default:
                vertical_position = "unknown";
        }
        return sysfs_emit(buf, "%s\n", vertical_position);
}
static DEVICE_ATTR_RO(vertical_position);

static ssize_t horizontal_position_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        const char *horizontal_position;

        switch (dev->physical_location->horizontal_position) {
        case DEVICE_HORI_POS_LEFT:
                horizontal_position = "left";
                break;
        case DEVICE_HORI_POS_CENTER:
                horizontal_position = "center";
                break;
        case DEVICE_HORI_POS_RIGHT:
                horizontal_position = "right";
                break;
        default:
                horizontal_position = "unknown";
        }
        return sysfs_emit(buf, "%s\n", horizontal_position);
}
static DEVICE_ATTR_RO(horizontal_position);

static ssize_t dock_show(struct device *dev, struct device_attribute *attr,
        char *buf)
{
        return sysfs_emit(buf, "%s\n",
                dev->physical_location->dock ? "yes" : "no");
}
static DEVICE_ATTR_RO(dock);

static ssize_t lid_show(struct device *dev, struct device_attribute *attr,
        char *buf)
{
        return sysfs_emit(buf, "%s\n",
                dev->physical_location->lid ? "yes" : "no");
}
static DEVICE_ATTR_RO(lid);

static struct attribute *dev_attr_physical_location[] = {
        &dev_attr_panel.attr,
        &dev_attr_vertical_position.attr,
        &dev_attr_horizontal_position.attr,
        &dev_attr_dock.attr,
        &dev_attr_lid.attr,
        NULL,
};

const struct attribute_group dev_attr_physical_location_group = {
        .name = "physical_location",
        .attrs = dev_attr_physical_location,
};





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *  acpi_bus.h - ACPI Bus Driver ($Revision: 22 $)
 *
 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 */

#ifndef __ACPI_BUS_H__
#define __ACPI_BUS_H__

#include <linux/device.h>
#include <linux/property.h>

struct acpi_handle_list {
        u32 count;
        acpi_handle *handles;
};

/* acpi_utils.h */
acpi_status
acpi_extract_package(union acpi_object *package,
                     struct acpi_buffer *format, struct acpi_buffer *buffer);
acpi_status
acpi_evaluate_integer(acpi_handle handle,
                      acpi_string pathname,
                      struct acpi_object_list *arguments, unsigned long long *data);
bool acpi_evaluate_reference(acpi_handle handle, acpi_string pathname,
                             struct acpi_object_list *arguments,
                             struct acpi_handle_list *list);
bool acpi_handle_list_equal(struct acpi_handle_list *list1,
                            struct acpi_handle_list *list2);
void acpi_handle_list_replace(struct acpi_handle_list *dst,
                              struct acpi_handle_list *src);
void acpi_handle_list_free(struct acpi_handle_list *list);
bool acpi_device_dep(acpi_handle target, acpi_handle match);
acpi_status
acpi_evaluate_ost(acpi_handle handle, u32 source_event, u32 status_code,
                  struct acpi_buffer *status_buf);

acpi_status
acpi_get_physical_device_location(acpi_handle handle, struct acpi_pld_info **pld);

bool acpi_has_method(acpi_handle handle, char *name);
acpi_status acpi_execute_simple_method(acpi_handle handle, char *method,
                                       u64 arg);
acpi_status acpi_evaluate_ej0(acpi_handle handle);
acpi_status acpi_evaluate_lck(acpi_handle handle, int lock);
acpi_status acpi_evaluate_reg(acpi_handle handle, u8 space_id, u32 function);
bool acpi_ata_match(acpi_handle handle);
bool acpi_bay_match(acpi_handle handle);
bool acpi_dock_match(acpi_handle handle);

bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
                        u64 rev, u64 func, union acpi_object *argv4);
#ifdef CONFIG_ACPI
static inline union acpi_object *
acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
                        u64 func, union acpi_object *argv4,
                        acpi_object_type type)
{
        union acpi_object *obj;

        obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
        if (obj && obj->type != type) {
                ACPI_FREE(obj);
                obj = NULL;
        }

        return obj;
}
#endif

#define        ACPI_INIT_DSM_ARGV4(cnt, eles)                        \
        {                                                \
          .package.type = ACPI_TYPE_PACKAGE,                \
          .package.count = (cnt),                        \
          .package.elements = (eles)                        \
        }

bool acpi_dev_found(const char *hid);
bool acpi_dev_present(const char *hid, const char *uid, s64 hrv);
bool acpi_reduced_hardware(void);

#ifdef CONFIG_ACPI

struct proc_dir_entry;

#define ACPI_BUS_FILE_ROOT        "acpi"
extern struct proc_dir_entry *acpi_root_dir;

enum acpi_bus_device_type {
        ACPI_BUS_TYPE_DEVICE = 0,
        ACPI_BUS_TYPE_POWER,
        ACPI_BUS_TYPE_PROCESSOR,
        ACPI_BUS_TYPE_THERMAL,
        ACPI_BUS_TYPE_POWER_BUTTON,
        ACPI_BUS_TYPE_SLEEP_BUTTON,
        ACPI_BUS_TYPE_ECDT_EC,
        ACPI_BUS_DEVICE_TYPE_COUNT
};

struct acpi_driver;
struct acpi_device;

/*
 * ACPI Scan Handler
 * -----------------
 */

struct acpi_hotplug_profile {
        struct kobject kobj;
        int (*scan_dependent)(struct acpi_device *adev);
        void (*notify_online)(struct acpi_device *adev);
        bool enabled:1;
        bool demand_offline:1;
};

static inline struct acpi_hotplug_profile *to_acpi_hotplug_profile(
                                                struct kobject *kobj)
{
        return container_of(kobj, struct acpi_hotplug_profile, kobj);
}

struct acpi_scan_handler {
        const struct acpi_device_id *ids;
        struct list_head list_node;
        bool (*match)(const char *idstr, const struct acpi_device_id **matchid);
        int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
        void (*detach)(struct acpi_device *dev);
        void (*bind)(struct device *phys_dev);
        void (*unbind)(struct device *phys_dev);
        struct acpi_hotplug_profile hotplug;
};

/*
 * ACPI Hotplug Context
 * --------------------
 */

struct acpi_hotplug_context {
        struct acpi_device *self;
        int (*notify)(struct acpi_device *, u32);
        void (*uevent)(struct acpi_device *, u32);
        void (*fixup)(struct acpi_device *);
};

/*
 * ACPI Driver
 * -----------
 */

typedef int (*acpi_op_add) (struct acpi_device * device);
typedef void (*acpi_op_remove) (struct acpi_device *device);
typedef void (*acpi_op_notify) (struct acpi_device * device, u32 event);

struct acpi_device_ops {
        acpi_op_add add;
        acpi_op_remove remove;
        acpi_op_notify notify;
};

#define ACPI_DRIVER_ALL_NOTIFY_EVENTS        0x1        /* system AND device events */

struct acpi_driver {
        char name[80];
        char class[80];
        const struct acpi_device_id *ids; /* Supported Hardware IDs */
        unsigned int flags;
        struct acpi_device_ops ops;
        struct device_driver drv;
        struct module *owner;
};

/*
 * ACPI Device
 * -----------
 */

/* Status (_STA) */

struct acpi_device_status {
        u32 present:1;
        u32 enabled:1;
        u32 show_in_ui:1;
        u32 functional:1;
        u32 battery_present:1;
        u32 reserved:27;
};

/* Flags */

struct acpi_device_flags {
        u32 dynamic_status:1;
        u32 removable:1;
        u32 ejectable:1;
        u32 power_manageable:1;
        u32 match_driver:1;
        u32 initialized:1;
        u32 visited:1;
        u32 hotplug_notify:1;
        u32 is_dock_station:1;
        u32 of_compatible_ok:1;
        u32 coherent_dma:1;
        u32 cca_seen:1;
        u32 enumeration_by_parent:1;
        u32 honor_deps:1;
        u32 reserved:18;
};

/* File System */

struct acpi_device_dir {
        struct proc_dir_entry *entry;
};

#define acpi_device_dir(d)        ((d)->dir.entry)

/* Plug and Play */

typedef char acpi_bus_id[8];
typedef u64 acpi_bus_address;
typedef char acpi_device_name[40];
typedef char acpi_device_class[20];

struct acpi_hardware_id {
        struct list_head list;
        const char *id;
};

struct acpi_pnp_type {
        u32 hardware_id:1;
        u32 bus_address:1;
        u32 platform_id:1;
        u32 backlight:1;
        u32 reserved:28;
};

struct acpi_device_pnp {
        acpi_bus_id bus_id;                /* Object name */
        int instance_no;                /* Instance number of this object */
        struct acpi_pnp_type type;        /* ID type */
        acpi_bus_address bus_address;        /* _ADR */
        char *unique_id;                /* _UID */
        struct list_head ids;                /* _HID and _CIDs */
        acpi_device_name device_name;        /* Driver-determined */
        acpi_device_class device_class;        /*        "          */
        union acpi_object *str_obj;        /* unicode string for _STR method */
};

#define acpi_device_bid(d)        ((d)->pnp.bus_id)
#define acpi_device_adr(d)        ((d)->pnp.bus_address)
const char *acpi_device_hid(struct acpi_device *device);
#define acpi_device_uid(d)        ((d)->pnp.unique_id)
#define acpi_device_name(d)        ((d)->pnp.device_name)
#define acpi_device_class(d)        ((d)->pnp.device_class)

/* Power Management */

struct acpi_device_power_flags {
        u32 explicit_get:1;        /* _PSC present? */
        u32 power_resources:1;        /* Power resources */
        u32 inrush_current:1;        /* Serialize Dx->D0 */
        u32 power_removed:1;        /* Optimize Dx->D0 */
        u32 ignore_parent:1;        /* Power is independent of parent power state */
        u32 dsw_present:1;        /* _DSW present? */
        u32 reserved:26;
};

struct acpi_device_power_state {
        struct {
                u8 valid:1;
                u8 explicit_set:1;        /* _PSx present? */
                u8 reserved:6;
        } flags;
        int power;                /* % Power (compared to D0) */
        int latency;                /* Dx->D0 time (microseconds) */
        struct list_head resources;        /* Power resources referenced */
};

struct acpi_device_power {
        int state;                /* Current state */
        struct acpi_device_power_flags flags;
        struct acpi_device_power_state states[ACPI_D_STATE_COUNT];        /* Power states (D0-D3Cold) */
        u8 state_for_enumeration; /* Deepest power state for enumeration */
};

struct acpi_dep_data {
        struct list_head node;
        acpi_handle supplier;
        acpi_handle consumer;
        bool honor_dep;
        bool met;
        bool free_when_met;
};

/* Performance Management */

struct acpi_device_perf_flags {
        u8 reserved:8;
};

struct acpi_device_perf_state {
        struct {
                u8 valid:1;
                u8 reserved:7;
        } flags;
        u8 power;                /* % Power (compared to P0) */
        u8 performance;                /* % Performance (    "   ) */
        int latency;                /* Px->P0 time (microseconds) */
};

struct acpi_device_perf {
        int state;
        struct acpi_device_perf_flags flags;
        int state_count;
        struct acpi_device_perf_state *states;
};

/* Wakeup Management */
struct acpi_device_wakeup_flags {
        u8 valid:1;                /* Can successfully enable wakeup? */
        u8 notifier_present:1;  /* Wake-up notify handler has been installed */
};

struct acpi_device_wakeup_context {
        void (*func)(struct acpi_device_wakeup_context *context);
        struct device *dev;
};

struct acpi_device_wakeup {
        acpi_handle gpe_device;
        u64 gpe_number;
        u64 sleep_state;
        struct list_head resources;
        struct acpi_device_wakeup_flags flags;
        struct acpi_device_wakeup_context context;
        struct wakeup_source *ws;
        int prepare_count;
        int enable_count;
};

struct acpi_device_physical_node {
        unsigned int node_id;
        struct list_head node;
        struct device *dev;
        bool put_online:1;
};

struct acpi_device_properties {
        const guid_t *guid;
        union acpi_object *properties;
        struct list_head list;
        void **bufs;
};

/* ACPI Device Specific Data (_DSD) */
struct acpi_device_data {
        const union acpi_object *pointer;
        struct list_head properties;
        const union acpi_object *of_compatible;
        struct list_head subnodes;
};

struct acpi_gpio_mapping;

#define ACPI_DEVICE_SWNODE_ROOT                        0

/*
 * The maximum expected number of CSI-2 data lanes.
 *
 * This number is not expected to ever have to be equal to or greater than the
 * number of bits in an unsigned long variable, but if it needs to be increased
 * above that limit, code will need to be adjusted accordingly.
 */
#define ACPI_DEVICE_CSI2_DATA_LANES                8

#define ACPI_DEVICE_SWNODE_PORT_NAME_LENGTH        8

enum acpi_device_swnode_dev_props {
        ACPI_DEVICE_SWNODE_DEV_ROTATION,
        ACPI_DEVICE_SWNODE_DEV_CLOCK_FREQUENCY,
        ACPI_DEVICE_SWNODE_DEV_LED_MAX_MICROAMP,
        ACPI_DEVICE_SWNODE_DEV_FLASH_MAX_MICROAMP,
        ACPI_DEVICE_SWNODE_DEV_FLASH_MAX_TIMEOUT_US,
        ACPI_DEVICE_SWNODE_DEV_NUM_OF,
        ACPI_DEVICE_SWNODE_DEV_NUM_ENTRIES
};

enum acpi_device_swnode_port_props {
        ACPI_DEVICE_SWNODE_PORT_REG,
        ACPI_DEVICE_SWNODE_PORT_NUM_OF,
        ACPI_DEVICE_SWNODE_PORT_NUM_ENTRIES
};

enum acpi_device_swnode_ep_props {
        ACPI_DEVICE_SWNODE_EP_REMOTE_EP,
        ACPI_DEVICE_SWNODE_EP_BUS_TYPE,
        ACPI_DEVICE_SWNODE_EP_REG,
        ACPI_DEVICE_SWNODE_EP_CLOCK_LANES,
        ACPI_DEVICE_SWNODE_EP_DATA_LANES,
        ACPI_DEVICE_SWNODE_EP_LANE_POLARITIES,
        /* TX only */
        ACPI_DEVICE_SWNODE_EP_LINK_FREQUENCIES,
        ACPI_DEVICE_SWNODE_EP_NUM_OF,
        ACPI_DEVICE_SWNODE_EP_NUM_ENTRIES
};

/*
 * Each device has a root software node plus two times as many nodes as the
 * number of CSI-2 ports.
 */
#define ACPI_DEVICE_SWNODE_PORT(port)        (2 * (port) + 1)
#define ACPI_DEVICE_SWNODE_EP(endpoint)        \
                (ACPI_DEVICE_SWNODE_PORT(endpoint) + 1)

/**
 * struct acpi_device_software_node_port - MIPI DisCo for Imaging CSI-2 port
 * @port_name: Port name.
 * @data_lanes: "data-lanes" property values.
 * @lane_polarities: "lane-polarities" property values.
 * @link_frequencies: "link_frequencies" property values.
 * @port_nr: Port number.
 * @crs_crs2_local: _CRS CSI2 record present (i.e. this is a transmitter one).
 * @port_props: Port properties.
 * @ep_props: Endpoint properties.
 * @remote_ep: Reference to the remote endpoint.
 */
struct acpi_device_software_node_port {
        char port_name[ACPI_DEVICE_SWNODE_PORT_NAME_LENGTH + 1];
        u32 data_lanes[ACPI_DEVICE_CSI2_DATA_LANES];
        u32 lane_polarities[ACPI_DEVICE_CSI2_DATA_LANES + 1 /* clock lane */];
        u64 link_frequencies[ACPI_DEVICE_CSI2_DATA_LANES];
        unsigned int port_nr;
        bool crs_csi2_local;

        struct property_entry port_props[ACPI_DEVICE_SWNODE_PORT_NUM_ENTRIES];
        struct property_entry ep_props[ACPI_DEVICE_SWNODE_EP_NUM_ENTRIES];

        struct software_node_ref_args remote_ep[1];
};

/**
 * struct acpi_device_software_nodes - Software nodes for an ACPI device
 * @dev_props: Device properties.
 * @nodes: Software nodes for root as well as ports and endpoints.
 * @nodeprts: Array of software node pointers, for (un)registering them.
 * @ports: Information related to each port and endpoint within a port.
 * @num_ports: The number of ports.
 */
struct acpi_device_software_nodes {
        struct property_entry dev_props[ACPI_DEVICE_SWNODE_DEV_NUM_ENTRIES];
        struct software_node *nodes;
        const struct software_node **nodeptrs;
        struct acpi_device_software_node_port *ports;
        unsigned int num_ports;
};

/* Device */
struct acpi_device {
        u32 pld_crc;
        int device_type;
        acpi_handle handle;                /* no handle for fixed hardware */
        struct fwnode_handle fwnode;
        struct list_head wakeup_list;
        struct list_head del_list;
        struct acpi_device_status status;
        struct acpi_device_flags flags;
        struct acpi_device_pnp pnp;
        struct acpi_device_power power;
        struct acpi_device_wakeup wakeup;
        struct acpi_device_perf performance;
        struct acpi_device_dir dir;
        struct acpi_device_data data;
        struct acpi_scan_handler *handler;
        struct acpi_hotplug_context *hp;
        struct acpi_device_software_nodes *swnodes;
        const struct acpi_gpio_mapping *driver_gpios;
        void *driver_data;
        struct device dev;
        unsigned int physical_node_count;
        unsigned int dep_unmet;
        struct list_head physical_node_list;
        struct mutex physical_node_lock;
        void (*remove)(struct acpi_device *);
};

/* Non-device subnode */
struct acpi_data_node {
        const char *name;
        acpi_handle handle;
        struct fwnode_handle fwnode;
        struct fwnode_handle *parent;
        struct acpi_device_data data;
        struct list_head sibling;
        struct kobject kobj;
        struct completion kobj_done;
};

extern const struct fwnode_operations acpi_device_fwnode_ops;
extern const struct fwnode_operations acpi_data_fwnode_ops;
extern const struct fwnode_operations acpi_static_fwnode_ops;

bool is_acpi_device_node(const struct fwnode_handle *fwnode);
bool is_acpi_data_node(const struct fwnode_handle *fwnode);

static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
{
        return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode));
}

#define to_acpi_device_node(__fwnode)                                        \
        ({                                                                \
                typeof(__fwnode) __to_acpi_device_node_fwnode = __fwnode; \
                                                                        \
                is_acpi_device_node(__to_acpi_device_node_fwnode) ?        \
                        container_of(__to_acpi_device_node_fwnode,        \
                                     struct acpi_device, fwnode) :        \
                        NULL;                                                \
        })

#define to_acpi_data_node(__fwnode)                                        \
        ({                                                                \
                typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode;        \
                                                                        \
                is_acpi_data_node(__to_acpi_data_node_fwnode) ?                \
                        container_of(__to_acpi_data_node_fwnode,        \
                                     struct acpi_data_node, fwnode) :        \
                        NULL;                                                \
        })

static inline bool is_acpi_static_node(const struct fwnode_handle *fwnode)
{
        return !IS_ERR_OR_NULL(fwnode) &&
                fwnode->ops == &acpi_static_fwnode_ops;
}

static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode,
                                        const char *name)
{
        return is_acpi_data_node(fwnode) ?
                (!strcmp(to_acpi_data_node(fwnode)->name, name)) : false;
}

static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev)
{
        return &adev->fwnode;
}

static inline void *acpi_driver_data(struct acpi_device *d)
{
        return d->driver_data;
}

#define to_acpi_device(d)        container_of(d, struct acpi_device, dev)
#define to_acpi_driver(d)        container_of(d, struct acpi_driver, drv)

static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev)
{
        if (adev->dev.parent)
                return to_acpi_device(adev->dev.parent);

        return NULL;
}

static inline void acpi_set_device_status(struct acpi_device *adev, u32 sta)
{
        *((u32 *)&adev->status) = sta;
}

static inline void acpi_set_hp_context(struct acpi_device *adev,
                                       struct acpi_hotplug_context *hp)
{
        hp->self = adev;
        adev->hp = hp;
}

void acpi_initialize_hp_context(struct acpi_device *adev,
                                struct acpi_hotplug_context *hp,
                                int (*notify)(struct acpi_device *, u32),
                                void (*uevent)(struct acpi_device *, u32));

/* acpi_device.dev.bus == &acpi_bus_type */
extern const struct bus_type acpi_bus_type;

int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data);
int acpi_dev_for_each_child(struct acpi_device *adev,
                            int (*fn)(struct acpi_device *, void *), void *data);
int acpi_dev_for_each_child_reverse(struct acpi_device *adev,
                                    int (*fn)(struct acpi_device *, void *),
                                    void *data);

/*
 * Events
 * ------
 */

struct acpi_bus_event {
        struct list_head node;
        acpi_device_class device_class;
        acpi_bus_id bus_id;
        u32 type;
        u32 data;
};

extern struct kobject *acpi_kobj;
extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int);
void acpi_bus_private_data_handler(acpi_handle, void *);
int acpi_bus_get_private_data(acpi_handle, void **);
int acpi_bus_attach_private_data(acpi_handle, void *);
void acpi_bus_detach_private_data(acpi_handle);
int acpi_dev_install_notify_handler(struct acpi_device *adev,
                                    u32 handler_type,
                                    acpi_notify_handler handler, void *context);
void acpi_dev_remove_notify_handler(struct acpi_device *adev,
                                    u32 handler_type,
                                    acpi_notify_handler handler);
extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
extern int register_acpi_notifier(struct notifier_block *);
extern int unregister_acpi_notifier(struct notifier_block *);

/*
 * External Functions
 */

acpi_status acpi_bus_get_status_handle(acpi_handle handle,
                                       unsigned long long *sta);
int acpi_bus_get_status(struct acpi_device *device);

int acpi_bus_set_power(acpi_handle handle, int state);
const char *acpi_power_state_string(int state);
int acpi_device_set_power(struct acpi_device *device, int state);
int acpi_bus_init_power(struct acpi_device *device);
int acpi_device_fix_up_power(struct acpi_device *device);
void acpi_device_fix_up_power_extended(struct acpi_device *adev);
void acpi_device_fix_up_power_children(struct acpi_device *adev);
int acpi_bus_update_power(acpi_handle handle, int *state_p);
int acpi_device_update_power(struct acpi_device *device, int *state_p);
bool acpi_bus_power_manageable(acpi_handle handle);
void acpi_dev_power_up_children_with_adr(struct acpi_device *adev);
u8 acpi_dev_power_state_for_wake(struct acpi_device *adev);
int acpi_device_power_add_dependent(struct acpi_device *adev,
                                    struct device *dev);
void acpi_device_power_remove_dependent(struct acpi_device *adev,
                                        struct device *dev);

#ifdef CONFIG_PM
bool acpi_bus_can_wakeup(acpi_handle handle);
#else
static inline bool acpi_bus_can_wakeup(acpi_handle handle) { return false; }
#endif

void acpi_scan_lock_acquire(void);
void acpi_scan_lock_release(void);
void acpi_lock_hp_context(void);
void acpi_unlock_hp_context(void);
int acpi_scan_add_handler(struct acpi_scan_handler *handler);
int acpi_bus_register_driver(struct acpi_driver *driver);
void acpi_bus_unregister_driver(struct acpi_driver *driver);
int acpi_bus_scan(acpi_handle handle);
void acpi_bus_trim(struct acpi_device *start);
acpi_status acpi_bus_get_ejd(acpi_handle handle, acpi_handle * ejd);
int acpi_match_device_ids(struct acpi_device *device,
                          const struct acpi_device_id *ids);
void acpi_set_modalias(struct acpi_device *adev, const char *default_id,
                       char *modalias, size_t len);

static inline bool acpi_device_enumerated(struct acpi_device *adev)
{
        return adev && adev->flags.initialized && adev->flags.visited;
}

/**
 * module_acpi_driver(acpi_driver) - Helper macro for registering an ACPI driver
 * @__acpi_driver: acpi_driver struct
 *
 * Helper macro for ACPI drivers which do not do anything special in module
 * init/exit. This eliminates a lot of boilerplate. Each module may only
 * use this macro once, and calling it replaces module_init() and module_exit()
 */
#define module_acpi_driver(__acpi_driver) \
        module_driver(__acpi_driver, acpi_bus_register_driver, \
                      acpi_bus_unregister_driver)

/*
 * Bind physical devices with ACPI devices
 */
struct acpi_bus_type {
        struct list_head list;
        const char *name;
        bool (*match)(struct device *dev);
        struct acpi_device * (*find_companion)(struct device *);
        void (*setup)(struct device *);
};
int register_acpi_bus_type(struct acpi_bus_type *);
int unregister_acpi_bus_type(struct acpi_bus_type *);
int acpi_bind_one(struct device *dev, struct acpi_device *adev);
int acpi_unbind_one(struct device *dev);

enum acpi_bridge_type {
        ACPI_BRIDGE_TYPE_PCIE = 1,
        ACPI_BRIDGE_TYPE_CXL,
};

struct acpi_pci_root {
        struct acpi_device * device;
        struct pci_bus *bus;
        u16 segment;
        int bridge_type;
        struct resource secondary;        /* downstream bus range */

        u32 osc_support_set;                /* _OSC state of support bits */
        u32 osc_control_set;                /* _OSC state of control bits */
        u32 osc_ext_support_set;        /* _OSC state of extended support bits */
        u32 osc_ext_control_set;        /* _OSC state of extended control bits */
        phys_addr_t mcfg_addr;
};

/* helper */

struct iommu_ops;

bool acpi_dma_supported(const struct acpi_device *adev);
enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
int acpi_iommu_fwspec_init(struct device *dev, u32 id,
                           struct fwnode_handle *fwnode,
                           const struct iommu_ops *ops);
int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map);
int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
                           const u32 *input_id);
static inline int acpi_dma_configure(struct device *dev,
                                     enum dev_dma_attr attr)
{
        return acpi_dma_configure_id(dev, attr, NULL);
}
struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
                                           u64 address, bool check_children);
struct acpi_device *acpi_find_child_by_adr(struct acpi_device *adev,
                                           acpi_bus_address adr);
int acpi_is_root_bridge(acpi_handle);
struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);

int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
int acpi_disable_wakeup_device_power(struct acpi_device *dev);

#ifdef CONFIG_X86
bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
bool acpi_quirk_skip_acpi_ac_and_battery(void);
int acpi_install_cmos_rtc_space_handler(acpi_handle handle);
void acpi_remove_cmos_rtc_space_handler(acpi_handle handle);
int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip);
#else
static inline bool acpi_device_override_status(struct acpi_device *adev,
                                               unsigned long long *status)
{
        return false;
}
static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
{
        return false;
}
static inline int acpi_install_cmos_rtc_space_handler(acpi_handle handle)
{
        return 1;
}
static inline void acpi_remove_cmos_rtc_space_handler(acpi_handle handle)
{
}
static inline int
acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip)
{
        *skip = false;
        return 0;
}
#endif

#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev);
bool acpi_quirk_skip_gpio_event_handlers(void);
#else
static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev)
{
        return false;
}
static inline bool acpi_quirk_skip_gpio_event_handlers(void)
{
        return false;
}
#endif

#ifdef CONFIG_PM
void acpi_pm_wakeup_event(struct device *dev);
acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
                        void (*func)(struct acpi_device_wakeup_context *context));
acpi_status acpi_remove_pm_notifier(struct acpi_device *adev);
bool acpi_pm_device_can_wakeup(struct device *dev);
int acpi_pm_device_sleep_state(struct device *, int *, int);
int acpi_pm_set_device_wakeup(struct device *dev, bool enable);
#else
static inline void acpi_pm_wakeup_event(struct device *dev)
{
}
static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
                                               struct device *dev,
                                               void (*func)(struct acpi_device_wakeup_context *context))
{
        return AE_SUPPORT;
}
static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev)
{
        return AE_SUPPORT;
}
static inline bool acpi_pm_device_can_wakeup(struct device *dev)
{
        return false;
}
static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m)
{
        if (p)
                *p = ACPI_STATE_D0;

        return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ?
                m : ACPI_STATE_D0;
}
static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable)
{
        return -ENODEV;
}
#endif

#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT
bool acpi_sleep_state_supported(u8 sleep_state);
#else
static inline bool acpi_sleep_state_supported(u8 sleep_state) { return false; }
#endif

#ifdef CONFIG_ACPI_SLEEP
u32 acpi_target_system_state(void);
#else
static inline u32 acpi_target_system_state(void) { return ACPI_STATE_S0; }
#endif

static inline bool acpi_device_power_manageable(struct acpi_device *adev)
{
        return adev->flags.power_manageable;
}

static inline bool acpi_device_can_wakeup(struct acpi_device *adev)
{
        return adev->wakeup.flags.valid;
}

static inline bool acpi_device_can_poweroff(struct acpi_device *adev)
{
        return adev->power.states[ACPI_STATE_D3_COLD].flags.valid ||
                ((acpi_gbl_FADT.header.revision < 6) &&
                adev->power.states[ACPI_STATE_D3_HOT].flags.explicit_set);
}

int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer);

static inline bool acpi_dev_hid_match(struct acpi_device *adev, const char *hid2)
{
        const char *hid1 = acpi_device_hid(adev);

        return hid1 && hid2 && !strcmp(hid1, hid2);
}

static inline bool acpi_str_uid_match(struct acpi_device *adev, const char *uid2)
{
        const char *uid1 = acpi_device_uid(adev);

        return uid1 && uid2 && !strcmp(uid1, uid2);
}

static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
{
        u64 uid1;

        return !acpi_dev_uid_to_integer(adev, &uid1) && uid1 == uid2;
}

#define TYPE_ENTRY(type, x)                        \
        const type: x,                                \
        type: x

#define ACPI_STR_TYPES(match)                        \
        TYPE_ENTRY(unsigned char *, match),        \
        TYPE_ENTRY(signed char *, match),                \
        TYPE_ENTRY(char *, match),                \
        TYPE_ENTRY(void *, match)

/**
 * acpi_dev_uid_match - Match device by supplied UID
 * @adev: ACPI device to match.
 * @uid2: Unique ID of the device.
 *
 * Matches UID in @adev with given @uid2.
 *
 * Returns: %true if matches, %false otherwise.
 */
#define acpi_dev_uid_match(adev, uid2)                                        \
        _Generic(uid2,                                                        \
                 /* Treat @uid2 as a string for acpi string types */        \
                 ACPI_STR_TYPES(acpi_str_uid_match),                        \
                 /* Treat as an integer otherwise */                        \
                 default: acpi_int_uid_match)(adev, uid2)

/**
 * acpi_dev_hid_uid_match - Match device by supplied HID and UID
 * @adev: ACPI device to match.
 * @hid2: Hardware ID of the device.
 * @uid2: Unique ID of the device, pass NULL to not check _UID.
 *
 * Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
 * will be treated as a match. If user wants to validate @uid2, it should be
 * done before calling this function.
 *
 * Returns: %true if matches or @uid2 is NULL, %false otherwise.
 */
#define acpi_dev_hid_uid_match(adev, hid2, uid2)                        \
        (acpi_dev_hid_match(adev, hid2) &&                                \
                /* Distinguish integer 0 from NULL @uid2 */                \
                (_Generic(uid2,        ACPI_STR_TYPES(!(uid2)), default: 0) ||        \
                acpi_dev_uid_match(adev, uid2)))

void acpi_dev_clear_dependencies(struct acpi_device *supplier);
bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier,
                                                   struct acpi_device *start);

/**
 * for_each_acpi_consumer_dev - iterate over the consumer ACPI devices for a
 *                                given supplier
 * @supplier: Pointer to the supplier's ACPI device
 * @consumer: Pointer to &struct acpi_device to hold the consumer, initially NULL
 */
#define for_each_acpi_consumer_dev(supplier, consumer)                        \
        for (consumer = acpi_dev_get_next_consumer_dev(supplier, NULL);        \
             consumer;                                                        \
             consumer = acpi_dev_get_next_consumer_dev(supplier, consumer))

struct acpi_device *
acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv);
struct acpi_device *
acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);

/**
 * for_each_acpi_dev_match - iterate over ACPI devices that matching the criteria
 * @adev: pointer to the matching ACPI device, NULL at the end of the loop
 * @hid: Hardware ID of the device.
 * @uid: Unique ID of the device, pass NULL to not check _UID
 * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
 *
 * The caller is responsible for invoking acpi_dev_put() on the returned device.
 */
#define for_each_acpi_dev_match(adev, hid, uid, hrv)                        \
        for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv);        \
             adev;                                                        \
             adev = acpi_dev_get_next_match_dev(adev, hid, uid, hrv))

static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev)
{
        return adev ? to_acpi_device(get_device(&adev->dev)) : NULL;
}

static inline void acpi_dev_put(struct acpi_device *adev)
{
        if (adev)
                put_device(&adev->dev);
}

struct acpi_device *acpi_fetch_acpi_dev(acpi_handle handle);
struct acpi_device *acpi_get_acpi_dev(acpi_handle handle);

static inline void acpi_put_acpi_dev(struct acpi_device *adev)
{
        acpi_dev_put(adev);
}
#else        /* CONFIG_ACPI */

static inline int register_acpi_bus_type(void *bus) { return 0; }
static inline int unregister_acpi_bus_type(void *bus) { return 0; }

#endif                                /* CONFIG_ACPI */

#endif /*__ACPI_BUS_H__*/













































































































    2 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Nano River Technologies viperboard driver
 *
 *  This is the core driver for the viperboard. There are cell drivers
 *  available for I2C, ADC and both GPIOs. SPI is not yet supported.
 *  The drivers do not support all features the board exposes. See user
 *  manual of the viperboard.
 *
 *  (C) 2012 by Lemonage GmbH
 *  Author: Lars Poeschel <poeschel@lemonage.de>
 *  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/mutex.h>

#include <linux/mfd/core.h>
#include <linux/mfd/viperboard.h>

#include <linux/usb.h>


static const struct usb_device_id vprbrd_table[] = {
        { USB_DEVICE(0x2058, 0x1005) },   /* Nano River Technologies */
        { }                               /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, vprbrd_table);

static const struct mfd_cell vprbrd_devs[] = {
        {
                .name = "viperboard-gpio",
        },
        {
                .name = "viperboard-i2c",
        },
        {
                .name = "viperboard-adc",
        },
};

static int vprbrd_probe(struct usb_interface *interface,
                              const struct usb_device_id *id)
{
        struct vprbrd *vb;

        u16 version = 0;
        int pipe, ret;

        /* allocate memory for our device state and initialize it */
        vb = kzalloc(sizeof(*vb), GFP_KERNEL);
        if (!vb)
                return -ENOMEM;

        mutex_init(&vb->lock);

        vb->usb_dev = usb_get_dev(interface_to_usbdev(interface));

        /* save our data pointer in this interface device */
        usb_set_intfdata(interface, vb);
        dev_set_drvdata(&vb->pdev.dev, vb);

        /* get version information, major first, minor then */
        pipe = usb_rcvctrlpipe(vb->usb_dev, 0);
        ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MAJOR,
                VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, vb->buf, 1,
                VPRBRD_USB_TIMEOUT_MS);
        if (ret == 1)
                version = vb->buf[0];

        ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MINOR,
                VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, vb->buf, 1,
                VPRBRD_USB_TIMEOUT_MS);
        if (ret == 1) {
                version <<= 8;
                version = version | vb->buf[0];
        }

        dev_info(&interface->dev,
                 "version %x.%02x found at bus %03d address %03d\n",
                 version >> 8, version & 0xff,
                 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);

        ret = mfd_add_hotplug_devices(&interface->dev, vprbrd_devs,
                                      ARRAY_SIZE(vprbrd_devs));
        if (ret != 0) {
                dev_err(&interface->dev, "Failed to add mfd devices to core.");
                goto error;
        }

        return 0;

error:
        if (vb) {
                usb_put_dev(vb->usb_dev);
                kfree(vb);
        }

        return ret;
}

static void vprbrd_disconnect(struct usb_interface *interface)
{
        struct vprbrd *vb = usb_get_intfdata(interface);

        mfd_remove_devices(&interface->dev);
        usb_set_intfdata(interface, NULL);
        usb_put_dev(vb->usb_dev);
        kfree(vb);

        dev_dbg(&interface->dev, "disconnected\n");
}

static struct usb_driver vprbrd_driver = {
        .name                = "viperboard",
        .probe                = vprbrd_probe,
        .disconnect        = vprbrd_disconnect,
        .id_table        = vprbrd_table,
};

module_usb_driver(vprbrd_driver);

MODULE_DESCRIPTION("Nano River Technologies viperboard mfd core driver");
MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
MODULE_LICENSE("GPL");





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 












































































































































































































































































































































































































































































































































































































































































































































    3 











































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
/*
   BlueZ - Bluetooth protocol stack for Linux
   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
   Copyright 2023-2024 NXP

   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as
   published by the Free Software Foundation;

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
   SOFTWARE IS DISCLAIMED.
*/

#ifndef __HCI_CORE_H
#define __HCI_CORE_H

#include <linux/idr.h>
#include <linux/leds.h>
#include <linux/rculist.h>

#include <net/bluetooth/hci.h>
#include <net/bluetooth/hci_sync.h>
#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/coredump.h>

/* HCI priority */
#define HCI_PRIO_MAX        7

/* HCI maximum id value */
#define HCI_MAX_ID 10000

/* HCI Core structures */
struct inquiry_data {
        bdaddr_t        bdaddr;
        __u8                pscan_rep_mode;
        __u8                pscan_period_mode;
        __u8                pscan_mode;
        __u8                dev_class[3];
        __le16                clock_offset;
        __s8                rssi;
        __u8                ssp_mode;
};

struct inquiry_entry {
        struct list_head        all;                /* inq_cache.all */
        struct list_head        list;                /* unknown or resolve */
        enum {
                NAME_NOT_KNOWN,
                NAME_NEEDED,
                NAME_PENDING,
                NAME_KNOWN,
        } name_state;
        __u32                        timestamp;
        struct inquiry_data        data;
};

struct discovery_state {
        int                        type;
        enum {
                DISCOVERY_STOPPED,
                DISCOVERY_STARTING,
                DISCOVERY_FINDING,
                DISCOVERY_RESOLVING,
                DISCOVERY_STOPPING,
        } state;
        struct list_head        all;        /* All devices found during inquiry */
        struct list_head        unknown;        /* Name state not known */
        struct list_head        resolve;        /* Name needs to be resolved */
        __u32                        timestamp;
        bdaddr_t                last_adv_addr;
        u8                        last_adv_addr_type;
        s8                        last_adv_rssi;
        u32                        last_adv_flags;
        u8                        last_adv_data[HCI_MAX_EXT_AD_LENGTH];
        u8                        last_adv_data_len;
        bool                        report_invalid_rssi;
        bool                        result_filtering;
        bool                        limited;
        s8                        rssi;
        u16                        uuid_count;
        u8                        (*uuids)[16];
        unsigned long                scan_start;
        unsigned long                scan_duration;
        unsigned long                name_resolve_timeout;
};

#define SUSPEND_NOTIFIER_TIMEOUT        msecs_to_jiffies(2000) /* 2 seconds */

enum suspend_tasks {
        SUSPEND_PAUSE_DISCOVERY,
        SUSPEND_UNPAUSE_DISCOVERY,

        SUSPEND_PAUSE_ADVERTISING,
        SUSPEND_UNPAUSE_ADVERTISING,

        SUSPEND_SCAN_DISABLE,
        SUSPEND_SCAN_ENABLE,
        SUSPEND_DISCONNECTING,

        SUSPEND_POWERING_DOWN,

        SUSPEND_PREPARE_NOTIFIER,

        SUSPEND_SET_ADV_FILTER,
        __SUSPEND_NUM_TASKS
};

enum suspended_state {
        BT_RUNNING = 0,
        BT_SUSPEND_DISCONNECT,
        BT_SUSPEND_CONFIGURE_WAKE,
};

struct hci_conn_hash {
        struct list_head list;
        unsigned int     acl_num;
        unsigned int     amp_num;
        unsigned int     sco_num;
        unsigned int     iso_num;
        unsigned int     le_num;
        unsigned int     le_num_peripheral;
};

struct bdaddr_list {
        struct list_head list;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
};

struct codec_list {
        struct list_head list;
        u8        id;
        __u16        cid;
        __u16        vid;
        u8        transport;
        u8        num_caps;
        u32        len;
        struct hci_codec_caps caps[];
};

struct bdaddr_list_with_irk {
        struct list_head list;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        u8 peer_irk[16];
        u8 local_irk[16];
};

/* Bitmask of connection flags */
enum hci_conn_flags {
        HCI_CONN_FLAG_REMOTE_WAKEUP = 1,
        HCI_CONN_FLAG_DEVICE_PRIVACY = 2,
};
typedef u8 hci_conn_flags_t;

struct bdaddr_list_with_flags {
        struct list_head list;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        hci_conn_flags_t flags;
};

struct bt_uuid {
        struct list_head list;
        u8 uuid[16];
        u8 size;
        u8 svc_hint;
};

struct blocked_key {
        struct list_head list;
        struct rcu_head rcu;
        u8 type;
        u8 val[16];
};

struct smp_csrk {
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        u8 link_type;
        u8 type;
        u8 val[16];
};

struct smp_ltk {
        struct list_head list;
        struct rcu_head rcu;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        u8 link_type;
        u8 authenticated;
        u8 type;
        u8 enc_size;
        __le16 ediv;
        __le64 rand;
        u8 val[16];
};

struct smp_irk {
        struct list_head list;
        struct rcu_head rcu;
        bdaddr_t rpa;
        bdaddr_t bdaddr;
        u8 addr_type;
        u8 link_type;
        u8 val[16];
};

struct link_key {
        struct list_head list;
        struct rcu_head rcu;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        u8 link_type;
        u8 type;
        u8 val[HCI_LINK_KEY_SIZE];
        u8 pin_len;
};

struct oob_data {
        struct list_head list;
        bdaddr_t bdaddr;
        u8 bdaddr_type;
        u8 present;
        u8 hash192[16];
        u8 rand192[16];
        u8 hash256[16];
        u8 rand256[16];
};

struct adv_info {
        struct list_head list;
        bool        enabled;
        bool        pending;
        bool        periodic;
        __u8        mesh;
        __u8        instance;
        __u32        flags;
        __u16        timeout;
        __u16        remaining_time;
        __u16        duration;
        __u16        adv_data_len;
        __u8        adv_data[HCI_MAX_EXT_AD_LENGTH];
        bool        adv_data_changed;
        __u16        scan_rsp_len;
        __u8        scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
        bool        scan_rsp_changed;
        __u16        per_adv_data_len;
        __u8        per_adv_data[HCI_MAX_PER_AD_LENGTH];
        __s8        tx_power;
        __u32   min_interval;
        __u32   max_interval;
        bdaddr_t        random_addr;
        bool                 rpa_expired;
        struct delayed_work        rpa_expired_cb;
};

#define HCI_MAX_ADV_INSTANCES                5
#define HCI_DEFAULT_ADV_DURATION        2

#define HCI_ADV_TX_POWER_NO_PREFERENCE 0x7F

#define DATA_CMP(_d1, _l1, _d2, _l2) \
        (_l1 == _l2 ? memcmp(_d1, _d2, _l1) : _l1 - _l2)

#define ADV_DATA_CMP(_adv, _data, _len) \
        DATA_CMP((_adv)->adv_data, (_adv)->adv_data_len, _data, _len)

#define SCAN_RSP_CMP(_adv, _data, _len) \
        DATA_CMP((_adv)->scan_rsp_data, (_adv)->scan_rsp_len, _data, _len)

struct monitored_device {
        struct list_head list;

        bdaddr_t bdaddr;
        __u8     addr_type;
        __u16    handle;
        bool     notified;
};

struct adv_pattern {
        struct list_head list;
        __u8 ad_type;
        __u8 offset;
        __u8 length;
        __u8 value[HCI_MAX_EXT_AD_LENGTH];
};

struct adv_rssi_thresholds {
        __s8 low_threshold;
        __s8 high_threshold;
        __u16 low_threshold_timeout;
        __u16 high_threshold_timeout;
        __u8 sampling_period;
};

struct adv_monitor {
        struct list_head patterns;
        struct adv_rssi_thresholds rssi;
        __u16                handle;

        enum {
                ADV_MONITOR_STATE_NOT_REGISTERED,
                ADV_MONITOR_STATE_REGISTERED,
                ADV_MONITOR_STATE_OFFLOADED
        } state;
};

#define HCI_MIN_ADV_MONITOR_HANDLE                1
#define HCI_MAX_ADV_MONITOR_NUM_HANDLES                32
#define HCI_MAX_ADV_MONITOR_NUM_PATTERNS        16
#define HCI_ADV_MONITOR_EXT_NONE                1
#define HCI_ADV_MONITOR_EXT_MSFT                2

#define HCI_MAX_SHORT_NAME_LENGTH        10

#define HCI_CONN_HANDLE_MAX                0x0eff
#define HCI_CONN_HANDLE_UNSET(_handle)        (_handle > HCI_CONN_HANDLE_MAX)

/* Min encryption key size to match with SMP */
#define HCI_MIN_ENC_KEY_SIZE                7

/* Default LE RPA expiry time, 15 minutes */
#define HCI_DEFAULT_RPA_TIMEOUT                (15 * 60)

/* Default min/max age of connection information (1s/3s) */
#define DEFAULT_CONN_INFO_MIN_AGE        1000
#define DEFAULT_CONN_INFO_MAX_AGE        3000
/* Default authenticated payload timeout 30s */
#define DEFAULT_AUTH_PAYLOAD_TIMEOUT   0x0bb8

struct amp_assoc {
        __u16        len;
        __u16        offset;
        __u16        rem_len;
        __u16        len_so_far;
        __u8        data[HCI_MAX_AMP_ASSOC_SIZE];
};

#define HCI_MAX_PAGES        3

struct hci_dev {
        struct list_head list;
        struct mutex        lock;

        struct ida        unset_handle_ida;

        const char        *name;
        unsigned long        flags;
        __u16                id;
        __u8                bus;
        __u8                dev_type;
        bdaddr_t        bdaddr;
        bdaddr_t        setup_addr;
        bdaddr_t        public_addr;
        bdaddr_t        random_addr;
        bdaddr_t        static_addr;
        __u8                adv_addr_type;
        __u8                dev_name[HCI_MAX_NAME_LENGTH];
        __u8                short_name[HCI_MAX_SHORT_NAME_LENGTH];
        __u8                eir[HCI_MAX_EIR_LENGTH];
        __u16                appearance;
        __u8                dev_class[3];
        __u8                major_class;
        __u8                minor_class;
        __u8                max_page;
        __u8                features[HCI_MAX_PAGES][8];
        __u8                le_features[8];
        __u8                le_accept_list_size;
        __u8                le_resolv_list_size;
        __u8                le_num_of_adv_sets;
        __u8                le_states[8];
        __u8                mesh_ad_types[16];
        __u8                mesh_send_ref;
        __u8                commands[64];
        __u8                hci_ver;
        __u16                hci_rev;
        __u8                lmp_ver;
        __u16                manufacturer;
        __u16                lmp_subver;
        __u16                voice_setting;
        __u8                num_iac;
        __u16                stored_max_keys;
        __u16                stored_num_keys;
        __u8                io_capability;
        __s8                inq_tx_power;
        __u8                err_data_reporting;
        __u16                page_scan_interval;
        __u16                page_scan_window;
        __u8                page_scan_type;
        __u8                le_adv_channel_map;
        __u16                le_adv_min_interval;
        __u16                le_adv_max_interval;
        __u8                le_scan_type;
        __u16                le_scan_interval;
        __u16                le_scan_window;
        __u16                le_scan_int_suspend;
        __u16                le_scan_window_suspend;
        __u16                le_scan_int_discovery;
        __u16                le_scan_window_discovery;
        __u16                le_scan_int_adv_monitor;
        __u16                le_scan_window_adv_monitor;
        __u16                le_scan_int_connect;
        __u16                le_scan_window_connect;
        __u16                le_conn_min_interval;
        __u16                le_conn_max_interval;
        __u16                le_conn_latency;
        __u16                le_supv_timeout;
        __u16                le_def_tx_len;
        __u16                le_def_tx_time;
        __u16                le_max_tx_len;
        __u16                le_max_tx_time;
        __u16                le_max_rx_len;
        __u16                le_max_rx_time;
        __u8                le_max_key_size;
        __u8                le_min_key_size;
        __u16                discov_interleaved_timeout;
        __u16                conn_info_min_age;
        __u16                conn_info_max_age;
        __u16                auth_payload_timeout;
        __u8                min_enc_key_size;
        __u8                max_enc_key_size;
        __u8                pairing_opts;
        __u8                ssp_debug_mode;
        __u8                hw_error_code;
        __u32                clock;
        __u16                advmon_allowlist_duration;
        __u16                advmon_no_filter_duration;
        __u8                enable_advmon_interleave_scan;

        __u16                devid_source;
        __u16                devid_vendor;
        __u16                devid_product;
        __u16                devid_version;

        __u8                def_page_scan_type;
        __u16                def_page_scan_int;
        __u16                def_page_scan_window;
        __u8                def_inq_scan_type;
        __u16                def_inq_scan_int;
        __u16                def_inq_scan_window;
        __u16                def_br_lsto;
        __u16                def_page_timeout;
        __u16                def_multi_adv_rotation_duration;
        __u16                def_le_autoconnect_timeout;
        __s8                min_le_tx_power;
        __s8                max_le_tx_power;

        __u16                pkt_type;
        __u16                esco_type;
        __u16                link_policy;
        __u16                link_mode;

        __u32                idle_timeout;
        __u16                sniff_min_interval;
        __u16                sniff_max_interval;

        __u8                amp_status;
        __u32                amp_total_bw;
        __u32                amp_max_bw;
        __u32                amp_min_latency;
        __u32                amp_max_pdu;
        __u8                amp_type;
        __u16                amp_pal_cap;
        __u16                amp_assoc_size;
        __u32                amp_max_flush_to;
        __u32                amp_be_flush_to;

        struct amp_assoc        loc_assoc;

        __u8                flow_ctl_mode;

        unsigned int        auto_accept_delay;

        unsigned long        quirks;

        atomic_t        cmd_cnt;
        unsigned int        acl_cnt;
        unsigned int        sco_cnt;
        unsigned int        le_cnt;
        unsigned int        iso_cnt;

        unsigned int        acl_mtu;
        unsigned int        sco_mtu;
        unsigned int        le_mtu;
        unsigned int        iso_mtu;
        unsigned int        acl_pkts;
        unsigned int        sco_pkts;
        unsigned int        le_pkts;
        unsigned int        iso_pkts;

        __u16                block_len;
        __u16                block_mtu;
        __u16                num_blocks;
        __u16                block_cnt;

        unsigned long        acl_last_tx;
        unsigned long        sco_last_tx;
        unsigned long        le_last_tx;

        __u8                le_tx_def_phys;
        __u8                le_rx_def_phys;

        struct workqueue_struct        *workqueue;
        struct workqueue_struct        *req_workqueue;

        struct work_struct        power_on;
        struct delayed_work        power_off;
        struct work_struct        error_reset;
        struct work_struct        cmd_sync_work;
        struct list_head        cmd_sync_work_list;
        struct mutex                cmd_sync_work_lock;
        struct mutex                unregister_lock;
        struct work_struct        cmd_sync_cancel_work;
        struct work_struct        reenable_adv_work;

        __u16                        discov_timeout;
        struct delayed_work        discov_off;

        struct delayed_work        service_cache;

        struct delayed_work        cmd_timer;
        struct delayed_work        ncmd_timer;

        struct work_struct        rx_work;
        struct work_struct        cmd_work;
        struct work_struct        tx_work;

        struct delayed_work        le_scan_disable;

        struct sk_buff_head        rx_q;
        struct sk_buff_head        raw_q;
        struct sk_buff_head        cmd_q;

        struct sk_buff                *sent_cmd;
        struct sk_buff                *recv_event;

        struct mutex                req_lock;
        wait_queue_head_t        req_wait_q;
        __u32                        req_status;
        __u32                        req_result;
        struct sk_buff                *req_skb;
        struct sk_buff                *req_rsp;

        void                        *smp_data;
        void                        *smp_bredr_data;

        struct discovery_state        discovery;

        int                        discovery_old_state;
        bool                        discovery_paused;
        int                        advertising_old_state;
        bool                        advertising_paused;

        struct notifier_block        suspend_notifier;
        enum suspended_state        suspend_state_next;
        enum suspended_state        suspend_state;
        bool                        scanning_paused;
        bool                        suspended;
        u8                        wake_reason;
        bdaddr_t                wake_addr;
        u8                        wake_addr_type;

        struct hci_conn_hash        conn_hash;

        struct list_head        mesh_pending;
        struct list_head        mgmt_pending;
        struct list_head        reject_list;
        struct list_head        accept_list;
        struct list_head        uuids;
        struct list_head        link_keys;
        struct list_head        long_term_keys;
        struct list_head        identity_resolving_keys;
        struct list_head        remote_oob_data;
        struct list_head        le_accept_list;
        struct list_head        le_resolv_list;
        struct list_head        le_conn_params;
        struct list_head        pend_le_conns;
        struct list_head        pend_le_reports;
        struct list_head        blocked_keys;
        struct list_head        local_codecs;

        struct hci_dev_stats        stat;

        atomic_t                promisc;

        const char                *hw_info;
        const char                *fw_info;
        struct dentry                *debugfs;

        struct hci_devcoredump        dump;

        struct device                dev;

        struct rfkill                *rfkill;

        DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
        hci_conn_flags_t        conn_flags;

        __s8                        adv_tx_power;
        __u8                        adv_data[HCI_MAX_EXT_AD_LENGTH];
        __u8                        adv_data_len;
        __u8                        scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
        __u8                        scan_rsp_data_len;
        __u8                        per_adv_data[HCI_MAX_PER_AD_LENGTH];
        __u8                        per_adv_data_len;

        struct list_head        adv_instances;
        unsigned int                adv_instance_cnt;
        __u8                        cur_adv_instance;
        __u16                        adv_instance_timeout;
        struct delayed_work        adv_instance_expire;

        struct idr                adv_monitors_idr;
        unsigned int                adv_monitors_cnt;

        __u8                        irk[16];
        __u32                        rpa_timeout;
        struct delayed_work        rpa_expired;
        bdaddr_t                rpa;

        struct delayed_work        mesh_send_done;

        enum {
                INTERLEAVE_SCAN_NONE,
                INTERLEAVE_SCAN_NO_FILTER,
                INTERLEAVE_SCAN_ALLOWLIST
        } interleave_scan_state;

        struct delayed_work        interleave_scan;

        struct list_head        monitored_devices;
        bool                        advmon_pend_notify;

#if IS_ENABLED(CONFIG_BT_LEDS)
        struct led_trigger        *power_led;
#endif

#if IS_ENABLED(CONFIG_BT_MSFTEXT)
        __u16                        msft_opcode;
        void                        *msft_data;
        bool                        msft_curve_validity;
#endif

#if IS_ENABLED(CONFIG_BT_AOSPEXT)
        bool                        aosp_capable;
        bool                        aosp_quality_report;
#endif

        int (*open)(struct hci_dev *hdev);
        int (*close)(struct hci_dev *hdev);
        int (*flush)(struct hci_dev *hdev);
        int (*setup)(struct hci_dev *hdev);
        int (*shutdown)(struct hci_dev *hdev);
        int (*send)(struct hci_dev *hdev, struct sk_buff *skb);
        void (*notify)(struct hci_dev *hdev, unsigned int evt);
        void (*hw_error)(struct hci_dev *hdev, u8 code);
        int (*post_init)(struct hci_dev *hdev);
        int (*set_diag)(struct hci_dev *hdev, bool enable);
        int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
        void (*cmd_timeout)(struct hci_dev *hdev);
        void (*reset)(struct hci_dev *hdev);
        bool (*wakeup)(struct hci_dev *hdev);
        int (*set_quality_report)(struct hci_dev *hdev, bool enable);
        int (*get_data_path_id)(struct hci_dev *hdev, __u8 *data_path);
        int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type,
                                     struct bt_codec *codec, __u8 *vnd_len,
                                     __u8 **vnd_data);
};

#define HCI_PHY_HANDLE(handle)        (handle & 0xff)

enum conn_reasons {
        CONN_REASON_PAIR_DEVICE,
        CONN_REASON_L2CAP_CHAN,
        CONN_REASON_SCO_CONNECT,
        CONN_REASON_ISO_CONNECT,
};

struct hci_conn {
        struct list_head list;

        atomic_t        refcnt;

        bdaddr_t        dst;
        __u8                dst_type;
        bdaddr_t        src;
        __u8                src_type;
        bdaddr_t        init_addr;
        __u8                init_addr_type;
        bdaddr_t        resp_addr;
        __u8                resp_addr_type;
        __u8                adv_instance;
        __u16                handle;
        __u16                sync_handle;
        __u16                state;
        __u8                mode;
        __u8                type;
        __u8                role;
        bool                out;
        __u8                attempt;
        __u8                dev_class[3];
        __u8                features[HCI_MAX_PAGES][8];
        __u16                pkt_type;
        __u16                link_policy;
        __u8                key_type;
        __u8                auth_type;
        __u8                sec_level;
        __u8                pending_sec_level;
        __u8                pin_length;
        __u8                enc_key_size;
        __u8                io_capability;
        __u32                passkey_notify;
        __u8                passkey_entered;
        __u16                disc_timeout;
        __u16                conn_timeout;
        __u16                setting;
        __u16                auth_payload_timeout;
        __u16                le_conn_min_interval;
        __u16                le_conn_max_interval;
        __u16                le_conn_interval;
        __u16                le_conn_latency;
        __u16                le_supv_timeout;
        __u8                le_adv_data[HCI_MAX_EXT_AD_LENGTH];
        __u8                le_adv_data_len;
        __u8                le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN];
        __u16                le_per_adv_data_len;
        __u16                le_per_adv_data_offset;
        __u8                le_adv_phy;
        __u8                le_adv_sec_phy;
        __u8                le_tx_phy;
        __u8                le_rx_phy;
        __s8                rssi;
        __s8                tx_power;
        __s8                max_tx_power;
        struct bt_iso_qos iso_qos;
        unsigned long        flags;

        enum conn_reasons conn_reason;
        __u8                abort_reason;

        __u32                clock;
        __u16                clock_accuracy;

        unsigned long        conn_info_timestamp;

        __u8                remote_cap;
        __u8                remote_auth;
        __u8                remote_id;

        unsigned int        sent;

        struct sk_buff_head data_q;
        struct list_head chan_list;

        struct delayed_work disc_work;
        struct delayed_work auto_accept_work;
        struct delayed_work idle_work;
        struct delayed_work le_conn_timeout;

        struct device        dev;
        struct dentry        *debugfs;

        struct hci_dev        *hdev;
        void                *l2cap_data;
        void                *sco_data;
        void                *iso_data;
        struct amp_mgr        *amp_mgr;

        struct list_head link_list;
        struct hci_conn        *parent;
        struct hci_link *link;

        struct bt_codec codec;

        void (*connect_cfm_cb)        (struct hci_conn *conn, u8 status);
        void (*security_cfm_cb)        (struct hci_conn *conn, u8 status);
        void (*disconn_cfm_cb)        (struct hci_conn *conn, u8 reason);

        void (*cleanup)(struct hci_conn *conn);
};

struct hci_link {
        struct list_head list;
        struct hci_conn *conn;
};

struct hci_chan {
        struct list_head list;
        __u16 handle;
        struct hci_conn *conn;
        struct sk_buff_head data_q;
        unsigned int        sent;
        __u8                state;
        bool                amp;
};

struct hci_conn_params {
        struct list_head list;
        struct list_head action;

        bdaddr_t addr;
        u8 addr_type;

        u16 conn_min_interval;
        u16 conn_max_interval;
        u16 conn_latency;
        u16 supervision_timeout;

        enum {
                HCI_AUTO_CONN_DISABLED,
                HCI_AUTO_CONN_REPORT,
                HCI_AUTO_CONN_DIRECT,
                HCI_AUTO_CONN_ALWAYS,
                HCI_AUTO_CONN_LINK_LOSS,
                HCI_AUTO_CONN_EXPLICIT,
        } auto_connect;

        struct hci_conn *conn;
        bool explicit_connect;
        /* Accessed without hdev->lock: */
        hci_conn_flags_t flags;
        u8  privacy_mode;
};

extern struct list_head hci_dev_list;
extern struct list_head hci_cb_list;
extern rwlock_t hci_dev_list_lock;
extern struct mutex hci_cb_list_lock;

#define hci_dev_set_flag(hdev, nr)             set_bit((nr), (hdev)->dev_flags)
#define hci_dev_clear_flag(hdev, nr)           clear_bit((nr), (hdev)->dev_flags)
#define hci_dev_change_flag(hdev, nr)          change_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_flag(hdev, nr)            test_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_set_flag(hdev, nr)    test_and_set_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_clear_flag(hdev, nr)  test_and_clear_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags)

#define hci_dev_clear_volatile_flags(hdev)                        \
        do {                                                        \
                hci_dev_clear_flag(hdev, HCI_LE_SCAN);                \
                hci_dev_clear_flag(hdev, HCI_LE_ADV);                \
                hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
                hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ);        \
                hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT);        \
        } while (0)

#define hci_dev_le_state_simultaneous(hdev) \
        (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \
         (hdev->le_states[4] & 0x08) &&        /* Central */ \
         (hdev->le_states[4] & 0x40) &&        /* Peripheral */ \
         (hdev->le_states[3] & 0x10))        /* Simultaneous */

/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
int l2cap_disconn_ind(struct hci_conn *hcon);
void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);

#if IS_ENABLED(CONFIG_BT_BREDR)
int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb);
#else
static inline int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                  __u8 *flags)
{
        return 0;
}

static inline void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
{
}
#endif

#if IS_ENABLED(CONFIG_BT_LE)
int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
#else
static inline int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                  __u8 *flags)
{
        return 0;
}
static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb,
                            u16 flags)
{
}
#endif

/* ----- Inquiry cache ----- */
#define INQUIRY_CACHE_AGE_MAX   (HZ*30)   /* 30 seconds */
#define INQUIRY_ENTRY_AGE_MAX   (HZ*60)   /* 60 seconds */

static inline void discovery_init(struct hci_dev *hdev)
{
        hdev->discovery.state = DISCOVERY_STOPPED;
        INIT_LIST_HEAD(&hdev->discovery.all);
        INIT_LIST_HEAD(&hdev->discovery.unknown);
        INIT_LIST_HEAD(&hdev->discovery.resolve);
        hdev->discovery.report_invalid_rssi = true;
        hdev->discovery.rssi = HCI_RSSI_INVALID;
}

static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
{
        hdev->discovery.result_filtering = false;
        hdev->discovery.report_invalid_rssi = true;
        hdev->discovery.rssi = HCI_RSSI_INVALID;
        hdev->discovery.uuid_count = 0;
        kfree(hdev->discovery.uuids);
        hdev->discovery.uuids = NULL;
        hdev->discovery.scan_start = 0;
        hdev->discovery.scan_duration = 0;
}

bool hci_discovery_active(struct hci_dev *hdev);

void hci_discovery_set_state(struct hci_dev *hdev, int state);

static inline int inquiry_cache_empty(struct hci_dev *hdev)
{
        return list_empty(&hdev->discovery.all);
}

static inline long inquiry_cache_age(struct hci_dev *hdev)
{
        struct discovery_state *c = &hdev->discovery;
        return jiffies - c->timestamp;
}

static inline long inquiry_entry_age(struct inquiry_entry *e)
{
        return jiffies - e->timestamp;
}

struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev,
                                               bdaddr_t *bdaddr);
struct inquiry_entry *hci_inquiry_cache_lookup_unknown(struct hci_dev *hdev,
                                                       bdaddr_t *bdaddr);
struct inquiry_entry *hci_inquiry_cache_lookup_resolve(struct hci_dev *hdev,
                                                       bdaddr_t *bdaddr,
                                                       int state);
void hci_inquiry_cache_update_resolve(struct hci_dev *hdev,
                                      struct inquiry_entry *ie);
u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data,
                             bool name_known);
void hci_inquiry_cache_flush(struct hci_dev *hdev);

/* ----- HCI Connections ----- */
enum {
        HCI_CONN_AUTH_PEND,
        HCI_CONN_ENCRYPT_PEND,
        HCI_CONN_RSWITCH_PEND,
        HCI_CONN_MODE_CHANGE_PEND,
        HCI_CONN_SCO_SETUP_PEND,
        HCI_CONN_MGMT_CONNECTED,
        HCI_CONN_SSP_ENABLED,
        HCI_CONN_SC_ENABLED,
        HCI_CONN_AES_CCM,
        HCI_CONN_POWER_SAVE,
        HCI_CONN_FLUSH_KEY,
        HCI_CONN_ENCRYPT,
        HCI_CONN_AUTH,
        HCI_CONN_SECURE,
        HCI_CONN_FIPS,
        HCI_CONN_STK_ENCRYPT,
        HCI_CONN_AUTH_INITIATOR,
        HCI_CONN_DROP,
        HCI_CONN_CANCEL,
        HCI_CONN_PARAM_REMOVAL_PEND,
        HCI_CONN_NEW_LINK_KEY,
        HCI_CONN_SCANNING,
        HCI_CONN_AUTH_FAILURE,
        HCI_CONN_PER_ADV,
        HCI_CONN_BIG_CREATED,
        HCI_CONN_CREATE_CIS,
        HCI_CONN_BIG_SYNC,
        HCI_CONN_BIG_SYNC_FAILED,
        HCI_CONN_PA_SYNC,
        HCI_CONN_PA_SYNC_FAILED,
};

static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;
        return hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
               test_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
}

static inline bool hci_conn_sc_enabled(struct hci_conn *conn)
{
        struct hci_dev *hdev = conn->hdev;
        return hci_dev_test_flag(hdev, HCI_SC_ENABLED) &&
               test_bit(HCI_CONN_SC_ENABLED, &conn->flags);
}

static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        list_add_tail_rcu(&c->list, &h->list);
        switch (c->type) {
        case ACL_LINK:
                h->acl_num++;
                break;
        case AMP_LINK:
                h->amp_num++;
                break;
        case LE_LINK:
                h->le_num++;
                if (c->role == HCI_ROLE_SLAVE)
                        h->le_num_peripheral++;
                break;
        case SCO_LINK:
        case ESCO_LINK:
                h->sco_num++;
                break;
        case ISO_LINK:
                h->iso_num++;
                break;
        }
}

static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c)
{
        struct hci_conn_hash *h = &hdev->conn_hash;

        list_del_rcu(&c->list);
        synchronize_rcu();

        switch (c->type) {
        case ACL_LINK:
                h->acl_num--;
                break;
        case AMP_LINK:
                h->amp_num--;
                break;
        case LE_LINK:
                h->le_num--;
                if (c->role == HCI_ROLE_SLAVE)
                        h->le_num_peripheral--;
                break;
        case SCO_LINK:
        case ESCO_LINK:
                h->sco_num--;
                break;
        case ISO_LINK:
                h->iso_num--;
                break;
        }
}

static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        switch (type) {
        case ACL_LINK:
                return h->acl_num;
        case AMP_LINK:
                return h->amp_num;
        case LE_LINK:
                return h->le_num;
        case SCO_LINK:
        case ESCO_LINK:
                return h->sco_num;
        case ISO_LINK:
                return h->iso_num;
        default:
                return 0;
        }
}

static inline unsigned int hci_conn_count(struct hci_dev *hdev)
{
        struct hci_conn_hash *c = &hdev->conn_hash;

        return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num;
}

static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c == conn) {
                        rcu_read_unlock();
                        return true;
                }
        }
        rcu_read_unlock();

        return false;
}

static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn *c;
        __u8 type = INVALID_LINK;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->handle == handle) {
                        type = c->type;
                        break;
                }
        }

        rcu_read_unlock();

        return type;
}

static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
                                                        bdaddr_t *ba, __u8 bis)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
                        continue;

                if (c->iso_qos.bcast.bis == bis) {
                        rcu_read_unlock();
                        return c;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *
hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
                                 bdaddr_t *ba,
                                 __u8 big, __u8 bis)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (bacmp(&c->dst, ba) || c->type != ISO_LINK ||
                        !test_bit(HCI_CONN_PER_ADV, &c->flags))
                        continue;

                if (c->iso_qos.bcast.big == big &&
                    c->iso_qos.bcast.bis == bis) {
                        rcu_read_unlock();
                        return c;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_handle(struct hci_dev *hdev,
                                                                __u16 handle)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->handle == handle) {
                        rcu_read_unlock();
                        return c;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev,
                                                        __u8 type, bdaddr_t *ba)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == type && !bacmp(&c->dst, ba)) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
                                                       bdaddr_t *ba,
                                                       __u8 ba_type)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != LE_LINK)
                       continue;

                if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
                                                        bdaddr_t *ba,
                                                        __u8 ba_type,
                                                        __u8 cig,
                                                        __u8 id)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
                        continue;

                /* Match CIG ID if set */
                if (cig != c->iso_qos.ucast.cig)
                        continue;

                /* Match CIS ID if set */
                if (id != c->iso_qos.ucast.cis)
                        continue;

                /* Match destination address if set */
                if (!ba || (ba_type == c->dst_type && !bacmp(&c->dst, ba))) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
                                                        __u8 handle)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
                        continue;

                if (handle == c->iso_qos.ucast.cig) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
                                                        __u8 handle)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK)
                        continue;

                if (handle == c->iso_qos.bcast.big) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *
hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle,  __u16 state)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK ||
                        c->state != state)
                        continue;

                if (handle == c->iso_qos.bcast.big) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *
hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != ISO_LINK ||
                        !test_bit(HCI_CONN_PA_SYNC, &c->flags))
                        continue;

                if (c->iso_qos.bcast.big == big) {
                        rcu_read_unlock();
                        return c;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *
hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type != ISO_LINK ||
                        !test_bit(HCI_CONN_PA_SYNC, &c->flags))
                        continue;

                if (c->sync_handle == sync_handle) {
                        rcu_read_unlock();
                        return c;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
                                                        __u8 type, __u16 state)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == type && c->state == state) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

typedef void (*hci_conn_func_t)(struct hci_conn *conn, void *data);
static inline void hci_conn_hash_list_state(struct hci_dev *hdev,
                                            hci_conn_func_t func, __u8 type,
                                            __u16 state, void *data)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        if (!func)
                return;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == type && c->state == state)
                        func(c, data);
        }

        rcu_read_unlock();
}

static inline void hci_conn_hash_list_flag(struct hci_dev *hdev,
                                            hci_conn_func_t func, __u8 type,
                                            __u8 flag, void *data)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        if (!func)
                return;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == type && test_bit(flag, &c->flags))
                        func(c, data);
        }

        rcu_read_unlock();
}

static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == LE_LINK && c->state == BT_CONNECT &&
                    !test_bit(HCI_CONN_SCANNING, &c->flags)) {
                        rcu_read_unlock();
                        return c;
                }
        }

        rcu_read_unlock();

        return NULL;
}

/* Returns true if an le connection is in the scanning state */
static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
{
        struct hci_conn_hash *h = &hdev->conn_hash;
        struct hci_conn  *c;

        rcu_read_lock();

        list_for_each_entry_rcu(c, &h->list, list) {
                if (c->type == LE_LINK && c->state == BT_CONNECT &&
                    test_bit(HCI_CONN_SCANNING, &c->flags)) {
                        rcu_read_unlock();
                        return true;
                }
        }

        rcu_read_unlock();

        return false;
}

int hci_disconnect(struct hci_conn *conn, __u8 reason);
bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
bool hci_iso_setup_path(struct hci_conn *conn);
int hci_le_create_cis_pending(struct hci_dev *hdev);
int hci_conn_check_create_cis(struct hci_conn *conn);

struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
                              u8 role, u16 handle);
struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
                                    bdaddr_t *dst, u8 role);
void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);

struct hci_chan *hci_chan_create(struct hci_conn *conn);
void hci_chan_del(struct hci_chan *chan);
void hci_chan_list_flush(struct hci_conn *conn);
struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle);

struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
                                     u8 dst_type, u8 sec_level,
                                     u16 conn_timeout,
                                     enum conn_reasons conn_reason);
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
                                u8 dst_type, bool dst_resolved, u8 sec_level,
                                u16 conn_timeout, u8 role, u8 phy, u8 sec_phy);
void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
                                 u8 sec_level, u8 auth_type,
                                 enum conn_reasons conn_reason, u16 timeout);
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
                                 __u16 setting, struct bt_codec *codec,
                                 u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
                              __u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
                              struct bt_iso_qos *qos,
                              __u8 base_len, __u8 *base);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
                                 __u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
                                 __u8 dst_type, struct bt_iso_qos *qos,
                                 __u8 data_len, __u8 *data);
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
                       __u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
                           struct bt_iso_qos *qos,
                           __u16 sync_handle, __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
                      bool initiator);
int hci_conn_switch_role(struct hci_conn *conn, __u8 role);

void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);

void hci_conn_failed(struct hci_conn *conn, u8 status);
u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);

/*
 * hci_conn_get() and hci_conn_put() are used to control the life-time of an
 * "hci_conn" object. They do not guarantee that the hci_conn object is running,
 * working or anything else. They just guarantee that the object is available
 * and can be dereferenced. So you can use its locks, local variables and any
 * other constant data.
 * Before accessing runtime data, you _must_ lock the object and then check that
 * it is still running. As soon as you release the locks, the connection might
 * get dropped, though.
 *
 * On the other hand, hci_conn_hold() and hci_conn_drop() are used to control
 * how long the underlying connection is held. So every channel that runs on the
 * hci_conn object calls this to prevent the connection from disappearing. As
 * long as you hold a device, you must also guarantee that you have a valid
 * reference to the device via hci_conn_get() (or the initial reference from
 * hci_conn_add()).
 * The hold()/drop() ref-count is known to drop below 0 sometimes, which doesn't
 * break because nobody cares for that. But this means, we cannot use
 * _get()/_drop() in it, but require the caller to have a valid ref (FIXME).
 */

static inline struct hci_conn *hci_conn_get(struct hci_conn *conn)
{
        get_device(&conn->dev);
        return conn;
}

static inline void hci_conn_put(struct hci_conn *conn)
{
        put_device(&conn->dev);
}

static inline struct hci_conn *hci_conn_hold(struct hci_conn *conn)
{
        BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));

        atomic_inc(&conn->refcnt);
        cancel_delayed_work(&conn->disc_work);

        return conn;
}

static inline void hci_conn_drop(struct hci_conn *conn)
{
        BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));

        if (atomic_dec_and_test(&conn->refcnt)) {
                unsigned long timeo;

                switch (conn->type) {
                case ACL_LINK:
                case LE_LINK:
                        cancel_delayed_work(&conn->idle_work);
                        if (conn->state == BT_CONNECTED) {
                                timeo = conn->disc_timeout;
                                if (!conn->out)
                                        timeo *= 2;
                        } else {
                                timeo = 0;
                        }
                        break;

                case AMP_LINK:
                        timeo = conn->disc_timeout;
                        break;

                default:
                        timeo = 0;
                        break;
                }

                cancel_delayed_work(&conn->disc_work);
                queue_delayed_work(conn->hdev->workqueue,
                                   &conn->disc_work, timeo);
        }
}

/* ----- HCI Devices ----- */
static inline void hci_dev_put(struct hci_dev *d)
{
        BT_DBG("%s orig refcnt %d", d->name,
               kref_read(&d->dev.kobj.kref));

        put_device(&d->dev);
}

static inline struct hci_dev *hci_dev_hold(struct hci_dev *d)
{
        BT_DBG("%s orig refcnt %d", d->name,
               kref_read(&d->dev.kobj.kref));

        get_device(&d->dev);
        return d;
}

#define hci_dev_lock(d)                mutex_lock(&d->lock)
#define hci_dev_unlock(d)        mutex_unlock(&d->lock)

#define to_hci_dev(d) container_of(d, struct hci_dev, dev)
#define to_hci_conn(c) container_of(c, struct hci_conn, dev)

static inline void *hci_get_drvdata(struct hci_dev *hdev)
{
        return dev_get_drvdata(&hdev->dev);
}

static inline void hci_set_drvdata(struct hci_dev *hdev, void *data)
{
        dev_set_drvdata(&hdev->dev, data);
}

static inline void *hci_get_priv(struct hci_dev *hdev)
{
        return (char *)hdev + sizeof(*hdev);
}

struct hci_dev *hci_dev_get(int index);
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type);

struct hci_dev *hci_alloc_dev_priv(int sizeof_priv);

static inline struct hci_dev *hci_alloc_dev(void)
{
        return hci_alloc_dev_priv(0);
}

void hci_free_dev(struct hci_dev *hdev);
int hci_register_dev(struct hci_dev *hdev);
void hci_unregister_dev(struct hci_dev *hdev);
void hci_release_dev(struct hci_dev *hdev);
int hci_register_suspend_notifier(struct hci_dev *hdev);
int hci_unregister_suspend_notifier(struct hci_dev *hdev);
int hci_suspend_dev(struct hci_dev *hdev);
int hci_resume_dev(struct hci_dev *hdev);
int hci_reset_dev(struct hci_dev *hdev);
int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);

static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode)
{
#if IS_ENABLED(CONFIG_BT_MSFTEXT)
        hdev->msft_opcode = opcode;
#endif
}

static inline void hci_set_aosp_capable(struct hci_dev *hdev)
{
#if IS_ENABLED(CONFIG_BT_AOSPEXT)
        hdev->aosp_capable = true;
#endif
}

static inline void hci_devcd_setup(struct hci_dev *hdev)
{
#ifdef CONFIG_DEV_COREDUMP
        INIT_WORK(&hdev->dump.dump_rx, hci_devcd_rx);
        INIT_DELAYED_WORK(&hdev->dump.dump_timeout, hci_devcd_timeout);
        skb_queue_head_init(&hdev->dump.dump_q);
#endif
}

int hci_dev_open(__u16 dev);
int hci_dev_close(__u16 dev);
int hci_dev_do_close(struct hci_dev *hdev);
int hci_dev_reset(__u16 dev);
int hci_dev_reset_stat(__u16 dev);
int hci_dev_cmd(unsigned int cmd, void __user *arg);
int hci_get_dev_list(void __user *arg);
int hci_get_dev_info(void __user *arg);
int hci_get_conn_list(void __user *arg);
int hci_get_conn_info(struct hci_dev *hdev, void __user *arg);
int hci_get_auth_info(struct hci_dev *hdev, void __user *arg);
int hci_inquiry(void __user *arg);

struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list,
                                           bdaddr_t *bdaddr, u8 type);
struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
                                    struct list_head *list, bdaddr_t *bdaddr,
                                    u8 type);
struct bdaddr_list_with_flags *
hci_bdaddr_list_lookup_with_flags(struct list_head *list, bdaddr_t *bdaddr,
                                  u8 type);
int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
                                 u8 type, u8 *peer_irk, u8 *local_irk);
int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
                                   u8 type, u32 flags);
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
                                 u8 type);
int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
                                   u8 type);
void hci_bdaddr_list_clear(struct list_head *list);

struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
                                               bdaddr_t *addr, u8 addr_type);
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
                                            bdaddr_t *addr, u8 addr_type);
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
void hci_conn_params_clear_disabled(struct hci_dev *hdev);
void hci_conn_params_free(struct hci_conn_params *param);

void hci_pend_le_list_del_init(struct hci_conn_params *param);
void hci_pend_le_list_add(struct hci_conn_params *param,
                          struct list_head *list);
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
                                                  bdaddr_t *addr,
                                                  u8 addr_type);

void hci_uuids_clear(struct hci_dev *hdev);

void hci_link_keys_clear(struct hci_dev *hdev);
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr);
struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
                                  bdaddr_t *bdaddr, u8 *val, u8 type,
                                  u8 pin_len, bool *persistent);
struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 addr_type, u8 type, u8 authenticated,
                            u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand);
struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                             u8 addr_type, u8 role);
int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type);
void hci_smp_ltks_clear(struct hci_dev *hdev);
int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr);

struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa);
struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                     u8 addr_type);
struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 addr_type, u8 val[16], bdaddr_t *rpa);
void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type);
bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]);
void hci_blocked_keys_clear(struct hci_dev *hdev);
void hci_smp_irks_clear(struct hci_dev *hdev);

bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type);

void hci_remote_oob_data_clear(struct hci_dev *hdev);
struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
                                          bdaddr_t *bdaddr, u8 bdaddr_type);
int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 bdaddr_type, u8 *hash192, u8 *rand192,
                            u8 *hash256, u8 *rand256);
int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
                               u8 bdaddr_type);

void hci_adv_instances_clear(struct hci_dev *hdev);
struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance);
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance);
struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
                                      u32 flags, u16 adv_data_len, u8 *adv_data,
                                      u16 scan_rsp_len, u8 *scan_rsp_data,
                                      u16 timeout, u16 duration, s8 tx_power,
                                      u32 min_interval, u32 max_interval,
                                      u8 mesh_handle);
struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
                                      u32 flags, u8 data_len, u8 *data,
                                      u32 min_interval, u32 max_interval);
int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
                         u16 adv_data_len, u8 *adv_data,
                         u16 scan_rsp_len, u8 *scan_rsp_data);
int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired);
u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance);
bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance);

void hci_adv_monitors_clear(struct hci_dev *hdev);
void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor);
int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor);
int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle);
int hci_remove_all_adv_monitor(struct hci_dev *hdev);
bool hci_is_adv_monitoring(struct hci_dev *hdev);
int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev);

void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);

void hci_init_sysfs(struct hci_dev *hdev);
void hci_conn_init_sysfs(struct hci_conn *conn);
void hci_conn_add_sysfs(struct hci_conn *conn);
void hci_conn_del_sysfs(struct hci_conn *conn);

#define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev))
#define GET_HCIDEV_DEV(hdev) ((hdev)->dev.parent)

/* ----- LMP capabilities ----- */
#define lmp_encrypt_capable(dev)   ((dev)->features[0][0] & LMP_ENCRYPT)
#define lmp_rswitch_capable(dev)   ((dev)->features[0][0] & LMP_RSWITCH)
#define lmp_hold_capable(dev)      ((dev)->features[0][0] & LMP_HOLD)
#define lmp_sniff_capable(dev)     ((dev)->features[0][0] & LMP_SNIFF)
#define lmp_park_capable(dev)      ((dev)->features[0][1] & LMP_PARK)
#define lmp_inq_rssi_capable(dev)  ((dev)->features[0][3] & LMP_RSSI_INQ)
#define lmp_esco_capable(dev)      ((dev)->features[0][3] & LMP_ESCO)
#define lmp_bredr_capable(dev)     (!((dev)->features[0][4] & LMP_NO_BREDR))
#define lmp_le_capable(dev)        ((dev)->features[0][4] & LMP_LE)
#define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR)
#define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC)
#define lmp_esco_2m_capable(dev)   ((dev)->features[0][5] & LMP_EDR_ESCO_2M)
#define lmp_ext_inq_capable(dev)   ((dev)->features[0][6] & LMP_EXT_INQ)
#define lmp_le_br_capable(dev)     (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR))
#define lmp_ssp_capable(dev)       ((dev)->features[0][6] & LMP_SIMPLE_PAIR)
#define lmp_no_flush_capable(dev)  ((dev)->features[0][6] & LMP_NO_FLUSH)
#define lmp_lsto_capable(dev)      ((dev)->features[0][7] & LMP_LSTO)
#define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR)
#define lmp_ext_feat_capable(dev)  ((dev)->features[0][7] & LMP_EXTFEATURES)
#define lmp_transp_capable(dev)    ((dev)->features[0][2] & LMP_TRANSPARENT)
#define lmp_edr_2m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_2M)
#define lmp_edr_3m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_3M)
#define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT)
#define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)

/* ----- Extended LMP capabilities ----- */
#define lmp_cpb_central_capable(dev) ((dev)->features[2][0] & LMP_CPB_CENTRAL)
#define lmp_cpb_peripheral_capable(dev) ((dev)->features[2][0] & LMP_CPB_PERIPHERAL)
#define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN)
#define lmp_sync_scan_capable(dev)  ((dev)->features[2][0] & LMP_SYNC_SCAN)
#define lmp_sc_capable(dev)         ((dev)->features[2][1] & LMP_SC)
#define lmp_ping_capable(dev)       ((dev)->features[2][1] & LMP_PING)

/* ----- Host capabilities ----- */
#define lmp_host_ssp_capable(dev)  ((dev)->features[1][0] & LMP_HOST_SSP)
#define lmp_host_sc_capable(dev)   ((dev)->features[1][0] & LMP_HOST_SC)
#define lmp_host_le_capable(dev)   (!!((dev)->features[1][0] & LMP_HOST_LE))
#define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR))

#define hdev_is_powered(dev)   (test_bit(HCI_UP, &(dev)->flags) && \
                                !hci_dev_test_flag(dev, HCI_AUTO_OFF))
#define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
                                hci_dev_test_flag(dev, HCI_SC_ENABLED))
#define rpa_valid(dev)         (bacmp(&dev->rpa, BDADDR_ANY) && \
                                !hci_dev_test_flag(dev, HCI_RPA_EXPIRED))
#define adv_rpa_valid(adv)     (bacmp(&adv->random_addr, BDADDR_ANY) && \
                                !adv->rpa_expired)

#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
                      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))

#define le_2m_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_2M))

#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
                      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))

#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \
                               !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \
                                         &(dev)->quirks))

#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
                         ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))

#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)

/* Use LL Privacy based address resolution if supported */
#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
                             hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))

#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
                                   (hdev->commands[39] & 0x04))

#define read_key_size_capable(dev) \
        ((dev)->commands[20] & 0x10 && \
         !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))

/* Use enhanced synchronous connection if command is supported and its quirk
 * has not been set.
 */
#define enhanced_sync_conn_capable(dev) \
        (((dev)->commands[29] & 0x08) && \
         !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks))

/* Use ext scanning if set ext scan param and ext scan enable is supported */
#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
                           ((dev)->commands[37] & 0x40) && \
                           !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks))

/* Use ext create connection if command is supported */
#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)

/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))

/* Maximum advertising length */
#define max_adv_len(dev) \
        (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH)

/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789:
 *
 * C24: Mandatory if the LE Controller supports Connection State and either
 * LE Feature (LL Privacy) or LE Feature (Extended Advertising) is supported
 */
#define use_enhanced_conn_complete(dev) (ll_privacy_capable(dev) || \
                                         ext_adv_capable(dev))

/* Periodic advertising support */
#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV))

/* CIS Master/Slave and BIS support */
#define iso_capable(dev) (cis_capable(dev) || bis_capable(dev))
#define cis_capable(dev) \
        (cis_central_capable(dev) || cis_peripheral_capable(dev))
#define cis_central_capable(dev) \
        ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL)
#define cis_peripheral_capable(dev) \
        ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL)
#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER)
#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)

#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
        (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))

/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER             0x01

static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                        __u8 type, __u8 *flags)
{
        switch (type) {
        case ACL_LINK:
                return l2cap_connect_ind(hdev, bdaddr);

        case SCO_LINK:
        case ESCO_LINK:
                return sco_connect_ind(hdev, bdaddr, flags);

        case ISO_LINK:
                return iso_connect_ind(hdev, bdaddr, flags);

        default:
                BT_ERR("unknown link type %d", type);
                return -EINVAL;
        }
}

static inline int hci_proto_disconn_ind(struct hci_conn *conn)
{
        if (conn->type != ACL_LINK && conn->type != LE_LINK)
                return HCI_ERROR_REMOTE_USER_TERM;

        return l2cap_disconn_ind(conn);
}

/* ----- HCI callbacks ----- */
struct hci_cb {
        struct list_head list;

        char *name;

        void (*connect_cfm)        (struct hci_conn *conn, __u8 status);
        void (*disconn_cfm)        (struct hci_conn *conn, __u8 status);
        void (*security_cfm)        (struct hci_conn *conn, __u8 status,
                                                                __u8 encrypt);
        void (*key_change_cfm)        (struct hci_conn *conn, __u8 status);
        void (*role_switch_cfm)        (struct hci_conn *conn, __u8 status, __u8 role);
};

static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
{
        struct hci_cb *cb;

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->connect_cfm)
                        cb->connect_cfm(conn, status);
        }
        mutex_unlock(&hci_cb_list_lock);

        if (conn->connect_cfm_cb)
                conn->connect_cfm_cb(conn, status);
}

static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
{
        struct hci_cb *cb;

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->disconn_cfm)
                        cb->disconn_cfm(conn, reason);
        }
        mutex_unlock(&hci_cb_list_lock);

        if (conn->disconn_cfm_cb)
                conn->disconn_cfm_cb(conn, reason);
}

static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
{
        struct hci_cb *cb;
        __u8 encrypt;

        if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
                return;

        encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->security_cfm)
                        cb->security_cfm(conn, status, encrypt);
        }
        mutex_unlock(&hci_cb_list_lock);

        if (conn->security_cfm_cb)
                conn->security_cfm_cb(conn, status);
}

static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
        struct hci_cb *cb;
        __u8 encrypt;

        if (conn->state == BT_CONFIG) {
                if (!status)
                        conn->state = BT_CONNECTED;

                hci_connect_cfm(conn, status);
                hci_conn_drop(conn);
                return;
        }

        if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
                encrypt = 0x00;
        else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
                encrypt = 0x02;
        else
                encrypt = 0x01;

        if (!status) {
                if (conn->sec_level == BT_SECURITY_SDP)
                        conn->sec_level = BT_SECURITY_LOW;

                if (conn->pending_sec_level > conn->sec_level)
                        conn->sec_level = conn->pending_sec_level;
        }

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->security_cfm)
                        cb->security_cfm(conn, status, encrypt);
        }
        mutex_unlock(&hci_cb_list_lock);

        if (conn->security_cfm_cb)
                conn->security_cfm_cb(conn, status);
}

static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
{
        struct hci_cb *cb;

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->key_change_cfm)
                        cb->key_change_cfm(conn, status);
        }
        mutex_unlock(&hci_cb_list_lock);
}

static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
                                                                __u8 role)
{
        struct hci_cb *cb;

        mutex_lock(&hci_cb_list_lock);
        list_for_each_entry(cb, &hci_cb_list, list) {
                if (cb->role_switch_cfm)
                        cb->role_switch_cfm(conn, status, role);
        }
        mutex_unlock(&hci_cb_list_lock);
}

static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
{
        if (addr_type != ADDR_LE_DEV_RANDOM)
                return false;

        if ((bdaddr->b[5] & 0xc0) == 0x40)
               return true;

        return false;
}

static inline bool hci_is_identity_address(bdaddr_t *addr, u8 addr_type)
{
        if (addr_type == ADDR_LE_DEV_PUBLIC)
                return true;

        /* Check for Random Static address type */
        if ((addr->b[5] & 0xc0) == 0xc0)
                return true;

        return false;
}

static inline struct smp_irk *hci_get_irk(struct hci_dev *hdev,
                                          bdaddr_t *bdaddr, u8 addr_type)
{
        if (!hci_bdaddr_is_rpa(bdaddr, addr_type))
                return NULL;

        return hci_find_irk_by_rpa(hdev, bdaddr);
}

static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
                                        u16 to_multiplier)
{
        u16 max_latency;

        if (min > max || min < 6 || max > 3200)
                return -EINVAL;

        if (to_multiplier < 10 || to_multiplier > 3200)
                return -EINVAL;

        if (max >= to_multiplier * 8)
                return -EINVAL;

        max_latency = (to_multiplier * 4 / max) - 1;
        if (latency > 499 || latency > max_latency)
                return -EINVAL;

        return 0;
}

int hci_register_cb(struct hci_cb *hcb);
int hci_unregister_cb(struct hci_cb *hcb);

int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen,
                   const void *param);

int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
                 const void *param);
void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags);
void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb);

void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
void *hci_recv_event_data(struct hci_dev *hdev, __u8 event);

u32 hci_conn_get_phy(struct hci_conn *conn);

/* ----- HCI Sockets ----- */
void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
                         int flag, struct sock *skip_sk);
void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
                                 void *data, u16 data_len, ktime_t tstamp,
                                 int flag, struct sock *skip_sk);

void hci_sock_dev_event(struct hci_dev *hdev, int event);

#define HCI_MGMT_VAR_LEN        BIT(0)
#define HCI_MGMT_NO_HDEV        BIT(1)
#define HCI_MGMT_UNTRUSTED        BIT(2)
#define HCI_MGMT_UNCONFIGURED        BIT(3)
#define HCI_MGMT_HDEV_OPTIONAL        BIT(4)

struct hci_mgmt_handler {
        int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
                     u16 data_len);
        size_t data_len;
        unsigned long flags;
};

struct hci_mgmt_chan {
        struct list_head list;
        unsigned short channel;
        size_t handler_count;
        const struct hci_mgmt_handler *handlers;
        void (*hdev_init) (struct sock *sk, struct hci_dev *hdev);
};

int hci_mgmt_chan_register(struct hci_mgmt_chan *c);
void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);

/* Management interface */
#define DISCOV_TYPE_BREDR                (BIT(BDADDR_BREDR))
#define DISCOV_TYPE_LE                        (BIT(BDADDR_LE_PUBLIC) | \
                                         BIT(BDADDR_LE_RANDOM))
#define DISCOV_TYPE_INTERLEAVED                (BIT(BDADDR_BREDR) | \
                                         BIT(BDADDR_LE_PUBLIC) | \
                                         BIT(BDADDR_LE_RANDOM))

/* These LE scan and inquiry parameters were chosen according to LE General
 * Discovery Procedure specification.
 */
#define DISCOV_LE_SCAN_WIN                0x12
#define DISCOV_LE_SCAN_INT                0x12
#define DISCOV_LE_TIMEOUT                10240        /* msec */
#define DISCOV_INTERLEAVED_TIMEOUT        5120        /* msec */
#define DISCOV_INTERLEAVED_INQUIRY_LEN        0x04
#define DISCOV_BREDR_INQUIRY_LEN        0x08
#define DISCOV_LE_RESTART_DELAY                msecs_to_jiffies(200)        /* msec */
#define DISCOV_LE_FAST_ADV_INT_MIN        0x00A0        /* 100 msec */
#define DISCOV_LE_FAST_ADV_INT_MAX        0x00F0        /* 150 msec */
#define DISCOV_LE_PER_ADV_INT_MIN        0x00A0        /* 200 msec */
#define DISCOV_LE_PER_ADV_INT_MAX        0x00A0        /* 200 msec */
#define DISCOV_LE_ADV_MESH_MIN                0x00A0  /* 100 msec */
#define DISCOV_LE_ADV_MESH_MAX                0x00A0  /* 100 msec */
#define INTERVAL_TO_MS(x)                (((x) * 10) / 0x10)

#define NAME_RESOLVE_DURATION                msecs_to_jiffies(10240)        /* 10.24 sec */

void mgmt_fill_version_info(void *ver);
int mgmt_new_settings(struct hci_dev *hdev);
void mgmt_index_added(struct hci_dev *hdev);
void mgmt_index_removed(struct hci_dev *hdev);
void mgmt_set_powered_failed(struct hci_dev *hdev, int err);
void mgmt_power_on(struct hci_dev *hdev, int err);
void __mgmt_power_off(struct hci_dev *hdev);
void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
                       bool persistent);
void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
                           u8 *name, u8 name_len);
void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
                              u8 link_type, u8 addr_type, u8 reason,
                              bool mgmt_connected);
void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
                            u8 link_type, u8 addr_type, u8 status);
void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                         u8 addr_type, u8 status);
void mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure);
void mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                  u8 status);
void mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                      u8 status);
int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
                              u8 link_type, u8 addr_type, u32 value,
                              u8 confirm_hint);
int mgmt_user_confirm_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                     u8 link_type, u8 addr_type, u8 status);
int mgmt_user_confirm_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                         u8 link_type, u8 addr_type, u8 status);
int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
                              u8 link_type, u8 addr_type);
int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                     u8 link_type, u8 addr_type, u8 status);
int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
                                         u8 link_type, u8 addr_type, u8 status);
int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
                             u8 link_type, u8 addr_type, u32 passkey,
                             u8 entered);
void mgmt_auth_failed(struct hci_conn *conn, u8 status);
void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
                                    u8 status);
void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                       u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
                       u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
                       u64 instant);
void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
                      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
void mgmt_discovering(struct hci_dev *hdev, u8 discovering);
void mgmt_suspending(struct hci_dev *hdev, u8 state);
void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr,
                   u8 addr_type);
bool mgmt_powering_down(struct hci_dev *hdev);
void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent);
void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent);
void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
                   bool persistent);
void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
                         u8 bdaddr_type, u8 store_hint, u16 min_interval,
                         u16 max_interval, u16 latency, u16 timeout);
void mgmt_smp_complete(struct hci_conn *conn, bool complete);
bool mgmt_get_connectable(struct hci_dev *hdev);
u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev);
void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
                            u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
                              u8 instance);
void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
                                  bdaddr_t *bdaddr, u8 addr_type);

int hci_abort_conn(struct hci_conn *conn, u8 reason);
u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
                      u16 to_multiplier);
void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
                      __u8 ltk[16], __u8 key_size);

void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
                               u8 *bdaddr_type);

#define SCO_AIRMODE_MASK       0x0003
#define SCO_AIRMODE_CVSD       0x0000
#define SCO_AIRMODE_TRANSP     0x0003

#define LOCAL_CODEC_ACL_MASK        BIT(0)
#define LOCAL_CODEC_SCO_MASK        BIT(1)

#define TRANSPORT_TYPE_MAX        0x04

#endif /* __HCI_CORE_H */













































    7 




    7 



    7 


    7 



    7 












    7 










































































   32 







   32 



   32 
   32 



   32 

   32 






   32 



   31 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// SPDX-License-Identifier: GPL-2.0
#include <linux/spinlock.h>
#include <linux/task_work.h>
#include <linux/resume_user_mode.h>

static struct callback_head work_exited; /* all we need is ->next == NULL */

/**
 * task_work_add - ask the @task to execute @work->func()
 * @task: the task which should run the callback
 * @work: the callback to run
 * @notify: how to notify the targeted task
 *
 * Queue @work for task_work_run() below and notify the @task if @notify
 * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
 *
 * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
 * task and run the task_work, regardless of whether the task is currently
 * running in the kernel or userspace.
 * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a
 * reschedule IPI to force the targeted task to reschedule and run task_work.
 * This can be advantageous if there's no strict requirement that the
 * task_work be run as soon as possible, just whenever the task enters the
 * kernel anyway.
 * @TWA_RESUME work is run only when the task exits the kernel and returns to
 * user mode, or before entering guest mode.
 *
 * Fails if the @task is exiting/exited and thus it can't process this @work.
 * Otherwise @work->func() will be called when the @task goes through one of
 * the aforementioned transitions, or exits.
 *
 * If the targeted task is exiting, then an error is returned and the work item
 * is not queued. It's up to the caller to arrange for an alternative mechanism
 * in that case.
 *
 * Note: there is no ordering guarantee on works queued here. The task_work
 * list is LIFO.
 *
 * RETURNS:
 * 0 if succeeds or -ESRCH.
 */
int task_work_add(struct task_struct *task, struct callback_head *work,
                  enum task_work_notify_mode notify)
{
        struct callback_head *head;

        /* record the work call stack in order to print it in KASAN reports */
        kasan_record_aux_stack(work);

        head = READ_ONCE(task->task_works);
        do {
                if (unlikely(head == &work_exited))
                        return -ESRCH;
                work->next = head;
        } while (!try_cmpxchg(&task->task_works, &head, work));

        switch (notify) {
        case TWA_NONE:
                break;
        case TWA_RESUME:
                set_notify_resume(task);
                break;
        case TWA_SIGNAL:
                set_notify_signal(task);
                break;
        case TWA_SIGNAL_NO_IPI:
                __set_notify_signal(task);
                break;
        default:
                WARN_ON_ONCE(1);
                break;
        }

        return 0;
}

/**
 * task_work_cancel_match - cancel a pending work added by task_work_add()
 * @task: the task which should execute the work
 * @match: match function to call
 * @data: data to be passed in to match function
 *
 * RETURNS:
 * The found work or NULL if not found.
 */
struct callback_head *
task_work_cancel_match(struct task_struct *task,
                       bool (*match)(struct callback_head *, void *data),
                       void *data)
{
        struct callback_head **pprev = &task->task_works;
        struct callback_head *work;
        unsigned long flags;

        if (likely(!task_work_pending(task)))
                return NULL;
        /*
         * If cmpxchg() fails we continue without updating pprev.
         * Either we raced with task_work_add() which added the
         * new entry before this work, we will find it again. Or
         * we raced with task_work_run(), *pprev == NULL/exited.
         */
        raw_spin_lock_irqsave(&task->pi_lock, flags);
        work = READ_ONCE(*pprev);
        while (work) {
                if (!match(work, data)) {
                        pprev = &work->next;
                        work = READ_ONCE(*pprev);
                } else if (try_cmpxchg(pprev, &work, work->next))
                        break;
        }
        raw_spin_unlock_irqrestore(&task->pi_lock, flags);

        return work;
}

static bool task_work_func_match(struct callback_head *cb, void *data)
{
        return cb->func == data;
}

/**
 * task_work_cancel - cancel a pending work added by task_work_add()
 * @task: the task which should execute the work
 * @func: identifies the work to remove
 *
 * Find the last queued pending work with ->func == @func and remove
 * it from queue.
 *
 * RETURNS:
 * The found work or NULL if not found.
 */
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
        return task_work_cancel_match(task, task_work_func_match, func);
}

/**
 * task_work_run - execute the works added by task_work_add()
 *
 * Flush the pending works. Should be used by the core kernel code.
 * Called before the task returns to the user-mode or stops, or when
 * it exits. In the latter case task_work_add() can no longer add the
 * new work after task_work_run() returns.
 */
void task_work_run(void)
{
        struct task_struct *task = current;
        struct callback_head *work, *head, *next;

        for (;;) {
                /*
                 * work->func() can do task_work_add(), do not set
                 * work_exited unless the list is empty.
                 */
                work = READ_ONCE(task->task_works);
                do {
                        head = NULL;
                        if (!work) {
                                if (task->flags & PF_EXITING)
                                        head = &work_exited;
                                else
                                        break;
                        }
                } while (!try_cmpxchg(&task->task_works, &work, head));

                if (!work)
                        break;
                /*
                 * Synchronize with task_work_cancel(). It can not remove
                 * the first entry == work, cmpxchg(task_works) must fail.
                 * But it can remove another entry from the ->next list.
                 */
                raw_spin_lock_irq(&task->pi_lock);
                raw_spin_unlock_irq(&task->pi_lock);

                do {
                        next = work->next;
                        work->func(work);
                        work = next;
                        cond_resched();
                } while (work);
        }
}






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
















































































































    1 
















    1 




    1 
























































































































































































































































































    1 






    1 

    1 

    1 

    1 

    1 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for Sony / PS2 / PS3 / PS4 BD devices.
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2008 Jiri Slaby
 *  Copyright (c) 2012 David Dillow <dave@thedillows.org>
 *  Copyright (c) 2006-2013 Jiri Kosina
 *  Copyright (c) 2013 Colin Leitner <colin.leitner@gmail.com>
 *  Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com>
 *  Copyright (c) 2018 Todd Kelner
 *  Copyright (c) 2020-2021 Pascal Giard <pascal.giard@etsmtl.ca>
 *  Copyright (c) 2020 Sanjay Govind <sanjay.govind9@gmail.com>
 *  Copyright (c) 2021 Daniel Nguyen <daniel.nguyen.1@ens.etsmtl.ca>
 */

/*
 */

/*
 * NOTE: in order for the Sony PS3 BD Remote Control to be found by
 * a Bluetooth host, the key combination Start+Enter has to be kept pressed
 * for about 7 seconds with the Bluetooth Host Controller in discovering mode.
 *
 * There will be no PIN request from the device.
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/leds.h>
#include <linux/power_supply.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/idr.h>
#include <linux/input/mt.h>
#include <linux/crc32.h>
#include <linux/usb.h>
#include <linux/timer.h>
#include <asm/unaligned.h>

#include "hid-ids.h"

#define VAIO_RDESC_CONSTANT       BIT(0)
#define SIXAXIS_CONTROLLER_USB    BIT(1)
#define SIXAXIS_CONTROLLER_BT     BIT(2)
#define BUZZ_CONTROLLER           BIT(3)
#define PS3REMOTE                 BIT(4)
#define MOTION_CONTROLLER_USB     BIT(5)
#define MOTION_CONTROLLER_BT      BIT(6)
#define NAVIGATION_CONTROLLER_USB BIT(7)
#define NAVIGATION_CONTROLLER_BT  BIT(8)
#define SINO_LITE_CONTROLLER      BIT(9)
#define FUTUREMAX_DANCE_MAT       BIT(10)
#define NSG_MR5U_REMOTE_BT        BIT(11)
#define NSG_MR7U_REMOTE_BT        BIT(12)
#define SHANWAN_GAMEPAD           BIT(13)
#define GH_GUITAR_CONTROLLER      BIT(14)
#define GHL_GUITAR_PS3WIIU        BIT(15)
#define GHL_GUITAR_PS4            BIT(16)

#define SIXAXIS_CONTROLLER (SIXAXIS_CONTROLLER_USB | SIXAXIS_CONTROLLER_BT)
#define MOTION_CONTROLLER (MOTION_CONTROLLER_USB | MOTION_CONTROLLER_BT)
#define NAVIGATION_CONTROLLER (NAVIGATION_CONTROLLER_USB |\
                                NAVIGATION_CONTROLLER_BT)
#define SONY_LED_SUPPORT (SIXAXIS_CONTROLLER | BUZZ_CONTROLLER |\
                                MOTION_CONTROLLER | NAVIGATION_CONTROLLER)
#define SONY_BATTERY_SUPPORT (SIXAXIS_CONTROLLER | MOTION_CONTROLLER_BT | NAVIGATION_CONTROLLER)
#define SONY_FF_SUPPORT (SIXAXIS_CONTROLLER | MOTION_CONTROLLER)
#define SONY_BT_DEVICE (SIXAXIS_CONTROLLER_BT | MOTION_CONTROLLER_BT | NAVIGATION_CONTROLLER_BT)
#define NSG_MRXU_REMOTE (NSG_MR5U_REMOTE_BT | NSG_MR7U_REMOTE_BT)

#define MAX_LEDS 4
#define NSG_MRXU_MAX_X 1667
#define NSG_MRXU_MAX_Y 1868

/* The PS3/Wii U dongles require a poke every 10 seconds, but the PS4
 * requires one every 8 seconds. Using 8 seconds for all for simplicity.
 */
#define GHL_GUITAR_POKE_INTERVAL 8 /* In seconds */
#define GUITAR_TILT_USAGE 44

/* Magic data taken from GHLtarUtility:
 * https://github.com/ghlre/GHLtarUtility/blob/master/PS3Guitar.cs
 * Note: The Wii U and PS3 dongles happen to share the same!
 */
static const char ghl_ps3wiiu_magic_data[] = {
        0x02, 0x08, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00
};

/* Magic data for the PS4 dongles sniffed with a USB protocol
 * analyzer.
 */
static const char ghl_ps4_magic_data[] = {
        0x30, 0x02, 0x08, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00
};

/* PS/3 Motion controller */
static u8 motion_rdesc[] = {
        0x05, 0x01,         /*  Usage Page (Desktop),               */
        0x09, 0x04,         /*  Usage (Joystick),                   */
        0xA1, 0x01,         /*  Collection (Application),           */
        0xA1, 0x02,         /*      Collection (Logical),           */
        0x85, 0x01,         /*          Report ID (1),              */
        0x75, 0x01,         /*          Report Size (1),            */
        0x95, 0x15,         /*          Report Count (21),          */
        0x15, 0x00,         /*          Logical Minimum (0),        */
        0x25, 0x01,         /*          Logical Maximum (1),        */
        0x35, 0x00,         /*          Physical Minimum (0),       */
        0x45, 0x01,         /*          Physical Maximum (1),       */
        0x05, 0x09,         /*          Usage Page (Button),        */
        0x19, 0x01,         /*          Usage Minimum (01h),        */
        0x29, 0x15,         /*          Usage Maximum (15h),        */
        0x81, 0x02,         /*          Input (Variable),           * Buttons */
        0x95, 0x0B,         /*          Report Count (11),          */
        0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
        0x81, 0x03,         /*          Input (Constant, Variable), * Padding */
        0x15, 0x00,         /*          Logical Minimum (0),        */
        0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
        0x05, 0x01,         /*          Usage Page (Desktop),       */
        0xA1, 0x00,         /*          Collection (Physical),      */
        0x75, 0x08,         /*              Report Size (8),        */
        0x95, 0x01,         /*              Report Count (1),       */
        0x35, 0x00,         /*              Physical Minimum (0),   */
        0x46, 0xFF, 0x00,   /*              Physical Maximum (255), */
        0x09, 0x30,         /*              Usage (X),              */
        0x81, 0x02,         /*              Input (Variable),       * Trigger */
        0xC0,               /*          End Collection,             */
        0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x07,         /*          Report Count (7),           * skip 7 bytes */
        0x81, 0x02,         /*          Input (Variable),           */
        0x05, 0x01,         /*          Usage Page (Desktop),       */
        0x75, 0x10,         /*          Report Size (16),           */
        0x46, 0xFF, 0xFF,   /*          Physical Maximum (65535),   */
        0x27, 0xFF, 0xFF, 0x00, 0x00, /*      Logical Maximum (65535),    */
        0x95, 0x03,         /*          Report Count (3),           * 3x Accels */
        0x09, 0x33,         /*              Usage (rX),             */
        0x09, 0x34,         /*              Usage (rY),             */
        0x09, 0x35,         /*              Usage (rZ),             */
        0x81, 0x02,         /*          Input (Variable),           */
        0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
        0x95, 0x03,         /*          Report Count (3),           * Skip Accels 2nd frame */
        0x81, 0x02,         /*          Input (Variable),           */
        0x05, 0x01,         /*          Usage Page (Desktop),       */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0x95, 0x03,         /*          Report Count (3),           * 3x Gyros */
        0x81, 0x02,         /*          Input (Variable),           */
        0x06, 0x00, 0xFF,   /*          Usage Page (FF00h),         */
        0x95, 0x03,         /*          Report Count (3),           * Skip Gyros 2nd frame */
        0x81, 0x02,         /*          Input (Variable),           */
        0x75, 0x0C,         /*          Report Size (12),           */
        0x46, 0xFF, 0x0F,   /*          Physical Maximum (4095),    */
        0x26, 0xFF, 0x0F,   /*          Logical Maximum (4095),     */
        0x95, 0x04,         /*          Report Count (4),           * Skip Temp and Magnetometers */
        0x81, 0x02,         /*          Input (Variable),           */
        0x75, 0x08,         /*          Report Size (8),            */
        0x46, 0xFF, 0x00,   /*          Physical Maximum (255),     */
        0x26, 0xFF, 0x00,   /*          Logical Maximum (255),      */
        0x95, 0x06,         /*          Report Count (6),           * Skip Timestamp and Extension Bytes */
        0x81, 0x02,         /*          Input (Variable),           */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x30,         /*          Report Count (48),          */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0x91, 0x02,         /*          Output (Variable),          */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x30,         /*          Report Count (48),          */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0xB1, 0x02,         /*          Feature (Variable),         */
        0xC0,               /*      End Collection,                 */
        0xA1, 0x02,         /*      Collection (Logical),           */
        0x85, 0x02,         /*          Report ID (2),              */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x30,         /*          Report Count (48),          */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0xB1, 0x02,         /*          Feature (Variable),         */
        0xC0,               /*      End Collection,                 */
        0xA1, 0x02,         /*      Collection (Logical),           */
        0x85, 0xEE,         /*          Report ID (238),            */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x30,         /*          Report Count (48),          */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0xB1, 0x02,         /*          Feature (Variable),         */
        0xC0,               /*      End Collection,                 */
        0xA1, 0x02,         /*      Collection (Logical),           */
        0x85, 0xEF,         /*          Report ID (239),            */
        0x75, 0x08,         /*          Report Size (8),            */
        0x95, 0x30,         /*          Report Count (48),          */
        0x09, 0x01,         /*          Usage (Pointer),            */
        0xB1, 0x02,         /*          Feature (Variable),         */
        0xC0,               /*      End Collection,                 */
        0xC0                /*  End Collection                      */
};

static u8 ps3remote_rdesc[] = {
        0x05, 0x01,          /* GUsagePage Generic Desktop */
        0x09, 0x05,          /* LUsage 0x05 [Game Pad] */
        0xA1, 0x01,          /* MCollection Application (mouse, keyboard) */

         /* Use collection 1 for joypad buttons */
         0xA1, 0x02,         /* MCollection Logical (interrelated data) */

          /*
           * Ignore the 1st byte, maybe it is used for a controller
           * number but it's not needed for correct operation
           */
          0x75, 0x08,        /* GReportSize 0x08 [8] */
          0x95, 0x01,        /* GReportCount 0x01 [1] */
          0x81, 0x01,        /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */

          /*
           * Bytes from 2nd to 4th are a bitmap for joypad buttons, for these
           * buttons multiple keypresses are allowed
           */
          0x05, 0x09,        /* GUsagePage Button */
          0x19, 0x01,        /* LUsageMinimum 0x01 [Button 1 (primary/trigger)] */
          0x29, 0x18,        /* LUsageMaximum 0x18 [Button 24] */
          0x14,              /* GLogicalMinimum [0] */
          0x25, 0x01,        /* GLogicalMaximum 0x01 [1] */
          0x75, 0x01,        /* GReportSize 0x01 [1] */
          0x95, 0x18,        /* GReportCount 0x18 [24] */
          0x81, 0x02,        /* MInput 0x02 (Data[0] Var[1] Abs[2]) */

          0xC0,              /* MEndCollection */

         /* Use collection 2 for remote control buttons */
         0xA1, 0x02,         /* MCollection Logical (interrelated data) */

          /* 5th byte is used for remote control buttons */
          0x05, 0x09,        /* GUsagePage Button */
          0x18,              /* LUsageMinimum [No button pressed] */
          0x29, 0xFE,        /* LUsageMaximum 0xFE [Button 254] */
          0x14,              /* GLogicalMinimum [0] */
          0x26, 0xFE, 0x00,  /* GLogicalMaximum 0x00FE [254] */
          0x75, 0x08,        /* GReportSize 0x08 [8] */
          0x95, 0x01,        /* GReportCount 0x01 [1] */
          0x80,              /* MInput  */

          /*
           * Ignore bytes from 6th to 11th, 6th to 10th are always constant at
           * 0xff and 11th is for press indication
           */
          0x75, 0x08,        /* GReportSize 0x08 [8] */
          0x95, 0x06,        /* GReportCount 0x06 [6] */
          0x81, 0x01,        /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */

          /* 12th byte is for battery strength */
          0x05, 0x06,        /* GUsagePage Generic Device Controls */
          0x09, 0x20,        /* LUsage 0x20 [Battery Strength] */
          0x14,              /* GLogicalMinimum [0] */
          0x25, 0x05,        /* GLogicalMaximum 0x05 [5] */
          0x75, 0x08,        /* GReportSize 0x08 [8] */
          0x95, 0x01,        /* GReportCount 0x01 [1] */
          0x81, 0x02,        /* MInput 0x02 (Data[0] Var[1] Abs[2]) */

          0xC0,              /* MEndCollection */

         0xC0                /* MEndCollection [Game Pad] */
};

static const unsigned int ps3remote_keymap_joypad_buttons[] = {
        [0x01] = KEY_SELECT,
        [0x02] = BTN_THUMBL,                /* L3 */
        [0x03] = BTN_THUMBR,                /* R3 */
        [0x04] = BTN_START,
        [0x05] = KEY_UP,
        [0x06] = KEY_RIGHT,
        [0x07] = KEY_DOWN,
        [0x08] = KEY_LEFT,
        [0x09] = BTN_TL2,                /* L2 */
        [0x0a] = BTN_TR2,                /* R2 */
        [0x0b] = BTN_TL,                /* L1 */
        [0x0c] = BTN_TR,                /* R1 */
        [0x0d] = KEY_OPTION,                /* options/triangle */
        [0x0e] = KEY_BACK,                /* back/circle */
        [0x0f] = BTN_0,                        /* cross */
        [0x10] = KEY_SCREEN,                /* view/square */
        [0x11] = KEY_HOMEPAGE,                /* PS button */
        [0x14] = KEY_ENTER,
};
static const unsigned int ps3remote_keymap_remote_buttons[] = {
        [0x00] = KEY_1,
        [0x01] = KEY_2,
        [0x02] = KEY_3,
        [0x03] = KEY_4,
        [0x04] = KEY_5,
        [0x05] = KEY_6,
        [0x06] = KEY_7,
        [0x07] = KEY_8,
        [0x08] = KEY_9,
        [0x09] = KEY_0,
        [0x0e] = KEY_ESC,                /* return */
        [0x0f] = KEY_CLEAR,
        [0x16] = KEY_EJECTCD,
        [0x1a] = KEY_MENU,                /* top menu */
        [0x28] = KEY_TIME,
        [0x30] = KEY_PREVIOUS,
        [0x31] = KEY_NEXT,
        [0x32] = KEY_PLAY,
        [0x33] = KEY_REWIND,                /* scan back */
        [0x34] = KEY_FORWARD,                /* scan forward */
        [0x38] = KEY_STOP,
        [0x39] = KEY_PAUSE,
        [0x40] = KEY_CONTEXT_MENU,        /* pop up/menu */
        [0x60] = KEY_FRAMEBACK,                /* slow/step back */
        [0x61] = KEY_FRAMEFORWARD,        /* slow/step forward */
        [0x63] = KEY_SUBTITLE,
        [0x64] = KEY_AUDIO,
        [0x65] = KEY_ANGLE,
        [0x70] = KEY_INFO,                /* display */
        [0x80] = KEY_BLUE,
        [0x81] = KEY_RED,
        [0x82] = KEY_GREEN,
        [0x83] = KEY_YELLOW,
};

static const unsigned int buzz_keymap[] = {
        /*
         * The controller has 4 remote buzzers, each with one LED and 5
         * buttons.
         *
         * We use the mapping chosen by the controller, which is:
         *
         * Key          Offset
         * -------------------
         * Buzz              1
         * Blue              5
         * Orange            4
         * Green             3
         * Yellow            2
         *
         * So, for example, the orange button on the third buzzer is mapped to
         * BTN_TRIGGER_HAPPY14
         */
         [1] = BTN_TRIGGER_HAPPY1,
         [2] = BTN_TRIGGER_HAPPY2,
         [3] = BTN_TRIGGER_HAPPY3,
         [4] = BTN_TRIGGER_HAPPY4,
         [5] = BTN_TRIGGER_HAPPY5,
         [6] = BTN_TRIGGER_HAPPY6,
         [7] = BTN_TRIGGER_HAPPY7,
         [8] = BTN_TRIGGER_HAPPY8,
         [9] = BTN_TRIGGER_HAPPY9,
        [10] = BTN_TRIGGER_HAPPY10,
        [11] = BTN_TRIGGER_HAPPY11,
        [12] = BTN_TRIGGER_HAPPY12,
        [13] = BTN_TRIGGER_HAPPY13,
        [14] = BTN_TRIGGER_HAPPY14,
        [15] = BTN_TRIGGER_HAPPY15,
        [16] = BTN_TRIGGER_HAPPY16,
        [17] = BTN_TRIGGER_HAPPY17,
        [18] = BTN_TRIGGER_HAPPY18,
        [19] = BTN_TRIGGER_HAPPY19,
        [20] = BTN_TRIGGER_HAPPY20,
};

/* The Navigation controller is a partial DS3 and uses the same HID report
 * and hence the same keymap indices, however not all axes/buttons
 * are physically present. We use the same axis and button mapping as
 * the DS3, which uses the Linux gamepad spec.
 */
static const unsigned int navigation_absmap[] = {
        [0x30] = ABS_X,
        [0x31] = ABS_Y,
        [0x33] = ABS_Z, /* L2 */
};

/* Buttons not physically available on the device, but still available
 * in the reports are explicitly set to 0 for documentation purposes.
 */
static const unsigned int navigation_keymap[] = {
        [0x01] = 0, /* Select */
        [0x02] = BTN_THUMBL, /* L3 */
        [0x03] = 0, /* R3 */
        [0x04] = 0, /* Start */
        [0x05] = BTN_DPAD_UP, /* Up */
        [0x06] = BTN_DPAD_RIGHT, /* Right */
        [0x07] = BTN_DPAD_DOWN, /* Down */
        [0x08] = BTN_DPAD_LEFT, /* Left */
        [0x09] = BTN_TL2, /* L2 */
        [0x0a] = 0, /* R2 */
        [0x0b] = BTN_TL, /* L1 */
        [0x0c] = 0, /* R1 */
        [0x0d] = BTN_NORTH, /* Triangle */
        [0x0e] = BTN_EAST, /* Circle */
        [0x0f] = BTN_SOUTH, /* Cross */
        [0x10] = BTN_WEST, /* Square */
        [0x11] = BTN_MODE, /* PS */
};

static const unsigned int sixaxis_absmap[] = {
        [0x30] = ABS_X,
        [0x31] = ABS_Y,
        [0x32] = ABS_RX, /* right stick X */
        [0x35] = ABS_RY, /* right stick Y */
};

static const unsigned int sixaxis_keymap[] = {
        [0x01] = BTN_SELECT, /* Select */
        [0x02] = BTN_THUMBL, /* L3 */
        [0x03] = BTN_THUMBR, /* R3 */
        [0x04] = BTN_START, /* Start */
        [0x05] = BTN_DPAD_UP, /* Up */
        [0x06] = BTN_DPAD_RIGHT, /* Right */
        [0x07] = BTN_DPAD_DOWN, /* Down */
        [0x08] = BTN_DPAD_LEFT, /* Left */
        [0x09] = BTN_TL2, /* L2 */
        [0x0a] = BTN_TR2, /* R2 */
        [0x0b] = BTN_TL, /* L1 */
        [0x0c] = BTN_TR, /* R1 */
        [0x0d] = BTN_NORTH, /* Triangle */
        [0x0e] = BTN_EAST, /* Circle */
        [0x0f] = BTN_SOUTH, /* Cross */
        [0x10] = BTN_WEST, /* Square */
        [0x11] = BTN_MODE, /* PS */
};

static enum power_supply_property sony_battery_props[] = {
        POWER_SUPPLY_PROP_PRESENT,
        POWER_SUPPLY_PROP_CAPACITY,
        POWER_SUPPLY_PROP_SCOPE,
        POWER_SUPPLY_PROP_STATUS,
};

struct sixaxis_led {
        u8 time_enabled; /* the total time the led is active (0xff means forever) */
        u8 duty_length;  /* how long a cycle is in deciseconds (0 means "really fast") */
        u8 enabled;
        u8 duty_off; /* % of duty_length the led is off (0xff means 100%) */
        u8 duty_on;  /* % of duty_length the led is on (0xff mean 100%) */
} __packed;

struct sixaxis_rumble {
        u8 padding;
        u8 right_duration; /* Right motor duration (0xff means forever) */
        u8 right_motor_on; /* Right (small) motor on/off, only supports values of 0 or 1 (off/on) */
        u8 left_duration;    /* Left motor duration (0xff means forever) */
        u8 left_motor_force; /* left (large) motor, supports force values from 0 to 255 */
} __packed;

struct sixaxis_output_report {
        u8 report_id;
        struct sixaxis_rumble rumble;
        u8 padding[4];
        u8 leds_bitmap; /* bitmap of enabled LEDs: LED_1 = 0x02, LED_2 = 0x04, ... */
        struct sixaxis_led led[4];    /* LEDx at (4 - x) */
        struct sixaxis_led _reserved; /* LED5, not actually soldered */
} __packed;

union sixaxis_output_report_01 {
        struct sixaxis_output_report data;
        u8 buf[36];
};

struct motion_output_report_02 {
        u8 type, zero;
        u8 r, g, b;
        u8 zero2;
        u8 rumble;
};

#define SIXAXIS_REPORT_0xF2_SIZE 17
#define SIXAXIS_REPORT_0xF5_SIZE 8
#define MOTION_REPORT_0x02_SIZE 49

#define SENSOR_SUFFIX " Motion Sensors"
#define TOUCHPAD_SUFFIX " Touchpad"

#define SIXAXIS_INPUT_REPORT_ACC_X_OFFSET 41
#define SIXAXIS_ACC_RES_PER_G 113

static DEFINE_SPINLOCK(sony_dev_list_lock);
static LIST_HEAD(sony_device_list);
static DEFINE_IDA(sony_device_id_allocator);

enum sony_worker {
        SONY_WORKER_STATE
};

struct sony_sc {
        spinlock_t lock;
        struct list_head list_node;
        struct hid_device *hdev;
        struct input_dev *touchpad;
        struct input_dev *sensor_dev;
        struct led_classdev *leds[MAX_LEDS];
        unsigned long quirks;
        struct work_struct state_worker;
        void (*send_output_report)(struct sony_sc *);
        struct power_supply *battery;
        struct power_supply_desc battery_desc;
        int device_id;
        u8 *output_report_dmabuf;

#ifdef CONFIG_SONY_FF
        u8 left;
        u8 right;
#endif

        u8 mac_address[6];
        u8 state_worker_initialized;
        u8 defer_initialization;
        u8 battery_capacity;
        int battery_status;
        u8 led_state[MAX_LEDS];
        u8 led_delay_on[MAX_LEDS];
        u8 led_delay_off[MAX_LEDS];
        u8 led_count;

        /* GH Live */
        struct urb *ghl_urb;
        struct timer_list ghl_poke_timer;
};

static void sony_set_leds(struct sony_sc *sc);

static inline void sony_schedule_work(struct sony_sc *sc,
                                      enum sony_worker which)
{
        unsigned long flags;

        switch (which) {
        case SONY_WORKER_STATE:
                spin_lock_irqsave(&sc->lock, flags);
                if (!sc->defer_initialization && sc->state_worker_initialized)
                        schedule_work(&sc->state_worker);
                spin_unlock_irqrestore(&sc->lock, flags);
                break;
        }
}

static void ghl_magic_poke_cb(struct urb *urb)
{
        struct sony_sc *sc = urb->context;

        if (urb->status < 0)
                hid_err(sc->hdev, "URB transfer failed : %d", urb->status);

        mod_timer(&sc->ghl_poke_timer, jiffies + GHL_GUITAR_POKE_INTERVAL*HZ);
}

static void ghl_magic_poke(struct timer_list *t)
{
        int ret;
        struct sony_sc *sc = from_timer(sc, t, ghl_poke_timer);

        ret = usb_submit_urb(sc->ghl_urb, GFP_ATOMIC);
        if (ret < 0)
                hid_err(sc->hdev, "usb_submit_urb failed: %d", ret);
}

static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev,
                                           const char ghl_magic_data[], u16 poke_size)
{
        struct usb_ctrlrequest *cr;
        u8 *databuf;
        unsigned int pipe;
        u16 ghl_magic_value = (((HID_OUTPUT_REPORT + 1) << 8) | ghl_magic_data[0]);

        pipe = usb_sndctrlpipe(usbdev, 0);

        cr = devm_kzalloc(&sc->hdev->dev, sizeof(*cr), GFP_ATOMIC);
        if (cr == NULL)
                return -ENOMEM;

        databuf = devm_kzalloc(&sc->hdev->dev, poke_size, GFP_ATOMIC);
        if (databuf == NULL)
                return -ENOMEM;

        cr->bRequestType =
                USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT;
        cr->bRequest = USB_REQ_SET_CONFIGURATION;
        cr->wValue = cpu_to_le16(ghl_magic_value);
        cr->wIndex = 0;
        cr->wLength = cpu_to_le16(poke_size);
        memcpy(databuf, ghl_magic_data, poke_size);
        usb_fill_control_urb(
                sc->ghl_urb, usbdev, pipe,
                (unsigned char *) cr, databuf, poke_size,
                ghl_magic_poke_cb, sc);
        return 0;
}

static int guitar_mapping(struct hid_device *hdev, struct hid_input *hi,
                          struct hid_field *field, struct hid_usage *usage,
                          unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) {
                unsigned int abs = usage->hid & HID_USAGE;

                if (abs == GUITAR_TILT_USAGE) {
                        hid_map_usage_clear(hi, usage, bit, max, EV_ABS, ABS_RY);
                        return 1;
                }
        }
        return 0;
}

static u8 *motion_fixup(struct hid_device *hdev, u8 *rdesc,
                             unsigned int *rsize)
{
        *rsize = sizeof(motion_rdesc);
        return motion_rdesc;
}

static u8 *ps3remote_fixup(struct hid_device *hdev, u8 *rdesc,
                             unsigned int *rsize)
{
        *rsize = sizeof(ps3remote_rdesc);
        return ps3remote_rdesc;
}

static int ps3remote_mapping(struct hid_device *hdev, struct hid_input *hi,
                             struct hid_field *field, struct hid_usage *usage,
                             unsigned long **bit, int *max)
{
        unsigned int key = usage->hid & HID_USAGE;

        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_BUTTON)
                return -1;

        switch (usage->collection_index) {
        case 1:
                if (key >= ARRAY_SIZE(ps3remote_keymap_joypad_buttons))
                        return -1;

                key = ps3remote_keymap_joypad_buttons[key];
                if (!key)
                        return -1;
                break;
        case 2:
                if (key >= ARRAY_SIZE(ps3remote_keymap_remote_buttons))
                        return -1;

                key = ps3remote_keymap_remote_buttons[key];
                if (!key)
                        return -1;
                break;
        default:
                return -1;
        }

        hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key);
        return 1;
}

static int navigation_mapping(struct hid_device *hdev, struct hid_input *hi,
                          struct hid_field *field, struct hid_usage *usage,
                          unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
                unsigned int key = usage->hid & HID_USAGE;

                if (key >= ARRAY_SIZE(sixaxis_keymap))
                        return -1;

                key = navigation_keymap[key];
                if (!key)
                        return -1;

                hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key);
                return 1;
        } else if (usage->hid == HID_GD_POINTER) {
                /* See comment in sixaxis_mapping, basically the L2 (and R2)
                 * triggers are reported through GD Pointer.
                 * In addition we ignore any analog button 'axes' and only
                 * support digital buttons.
                 */
                switch (usage->usage_index) {
                case 8: /* L2 */
                        usage->hid = HID_GD_Z;
                        break;
                default:
                        return -1;
                }

                hid_map_usage_clear(hi, usage, bit, max, EV_ABS, usage->hid & 0xf);
                return 1;
        } else if ((usage->hid & HID_USAGE_PAGE) == HID_UP_GENDESK) {
                unsigned int abs = usage->hid & HID_USAGE;

                if (abs >= ARRAY_SIZE(navigation_absmap))
                        return -1;

                abs = navigation_absmap[abs];

                hid_map_usage_clear(hi, usage, bit, max, EV_ABS, abs);
                return 1;
        }

        return -1;
}


static int sixaxis_mapping(struct hid_device *hdev, struct hid_input *hi,
                          struct hid_field *field, struct hid_usage *usage,
                          unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
                unsigned int key = usage->hid & HID_USAGE;

                if (key >= ARRAY_SIZE(sixaxis_keymap))
                        return -1;

                key = sixaxis_keymap[key];
                hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key);
                return 1;
        } else if (usage->hid == HID_GD_POINTER) {
                /* The DS3 provides analog values for most buttons and even
                 * for HAT axes through GD Pointer. L2 and R2 are reported
                 * among these as well instead of as GD Z / RZ. Remap L2
                 * and R2 and ignore other analog 'button axes' as there is
                 * no good way for reporting them.
                 */
                switch (usage->usage_index) {
                case 8: /* L2 */
                        usage->hid = HID_GD_Z;
                        break;
                case 9: /* R2 */
                        usage->hid = HID_GD_RZ;
                        break;
                default:
                        return -1;
                }

                hid_map_usage_clear(hi, usage, bit, max, EV_ABS, usage->hid & 0xf);
                return 1;
        } else if ((usage->hid & HID_USAGE_PAGE) == HID_UP_GENDESK) {
                unsigned int abs = usage->hid & HID_USAGE;

                if (abs >= ARRAY_SIZE(sixaxis_absmap))
                        return -1;

                abs = sixaxis_absmap[abs];

                hid_map_usage_clear(hi, usage, bit, max, EV_ABS, abs);
                return 1;
        }

        return -1;
}

static u8 *sony_report_fixup(struct hid_device *hdev, u8 *rdesc,
                unsigned int *rsize)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);

        if (sc->quirks & (SINO_LITE_CONTROLLER | FUTUREMAX_DANCE_MAT))
                return rdesc;

        /*
         * Some Sony RF receivers wrongly declare the mouse pointer as a
         * a constant non-data variable.
         */
        if ((sc->quirks & VAIO_RDESC_CONSTANT) && *rsize >= 56 &&
            /* usage page: generic desktop controls */
            /* rdesc[0] == 0x05 && rdesc[1] == 0x01 && */
            /* usage: mouse */
            rdesc[2] == 0x09 && rdesc[3] == 0x02 &&
            /* input (usage page for x,y axes): constant, variable, relative */
            rdesc[54] == 0x81 && rdesc[55] == 0x07) {
                hid_info(hdev, "Fixing up Sony RF Receiver report descriptor\n");
                /* input: data, variable, relative */
                rdesc[55] = 0x06;
        }

        if (sc->quirks & MOTION_CONTROLLER)
                return motion_fixup(hdev, rdesc, rsize);

        if (sc->quirks & PS3REMOTE)
                return ps3remote_fixup(hdev, rdesc, rsize);

        /*
         * Some knock-off USB dongles incorrectly report their button count
         * as 13 instead of 16 causing three non-functional buttons.
         */
        if ((sc->quirks & SIXAXIS_CONTROLLER_USB) && *rsize >= 45 &&
                /* Report Count (13) */
                rdesc[23] == 0x95 && rdesc[24] == 0x0D &&
                /* Usage Maximum (13) */
                rdesc[37] == 0x29 && rdesc[38] == 0x0D &&
                /* Report Count (3) */
                rdesc[43] == 0x95 && rdesc[44] == 0x03) {
                hid_info(hdev, "Fixing up USB dongle report descriptor\n");
                rdesc[24] = 0x10;
                rdesc[38] = 0x10;
                rdesc[44] = 0x00;
        }

        return rdesc;
}

static void sixaxis_parse_report(struct sony_sc *sc, u8 *rd, int size)
{
        static const u8 sixaxis_battery_capacity[] = { 0, 1, 25, 50, 75, 100 };
        unsigned long flags;
        int offset;
        u8 battery_capacity;
        int battery_status;

        /*
         * The sixaxis is charging if the battery value is 0xee
         * and it is fully charged if the value is 0xef.
         * It does not report the actual level while charging so it
         * is set to 100% while charging is in progress.
         */
        offset = (sc->quirks & MOTION_CONTROLLER) ? 12 : 30;

        if (rd[offset] >= 0xee) {
                battery_capacity = 100;
                battery_status = (rd[offset] & 0x01) ? POWER_SUPPLY_STATUS_FULL : POWER_SUPPLY_STATUS_CHARGING;
        } else {
                u8 index = rd[offset] <= 5 ? rd[offset] : 5;
                battery_capacity = sixaxis_battery_capacity[index];
                battery_status = POWER_SUPPLY_STATUS_DISCHARGING;
        }

        spin_lock_irqsave(&sc->lock, flags);
        sc->battery_capacity = battery_capacity;
        sc->battery_status = battery_status;
        spin_unlock_irqrestore(&sc->lock, flags);

        if (sc->quirks & SIXAXIS_CONTROLLER) {
                int val;

                offset = SIXAXIS_INPUT_REPORT_ACC_X_OFFSET;
                val = ((rd[offset+1] << 8) | rd[offset]) - 511;
                input_report_abs(sc->sensor_dev, ABS_X, val);

                /* Y and Z are swapped and inversed */
                val = 511 - ((rd[offset+5] << 8) | rd[offset+4]);
                input_report_abs(sc->sensor_dev, ABS_Y, val);

                val = 511 - ((rd[offset+3] << 8) | rd[offset+2]);
                input_report_abs(sc->sensor_dev, ABS_Z, val);

                input_sync(sc->sensor_dev);
        }
}

static void nsg_mrxu_parse_report(struct sony_sc *sc, u8 *rd, int size)
{
        int n, offset, relx, rely;
        u8 active;

        /*
         * The NSG-MRxU multi-touch trackpad data starts at offset 1 and
         *   the touch-related data starts at offset 2.
         * For the first byte, bit 0 is set when touchpad button is pressed.
         * Bit 2 is set when a touch is active and the drag (Fn) key is pressed.
         * This drag key is mapped to BTN_LEFT.  It is operational only when a 
         *   touch point is active.
         * Bit 4 is set when only the first touch point is active.
         * Bit 6 is set when only the second touch point is active.
         * Bits 5 and 7 are set when both touch points are active.
         * The next 3 bytes are two 12 bit X/Y coordinates for the first touch.
         * The following byte, offset 5, has the touch width and length.
         *   Bits 0-4=X (width), bits 5-7=Y (length).
         * A signed relative X coordinate is at offset 6.
         * The bytes at offset 7-9 are the second touch X/Y coordinates.
         * Offset 10 has the second touch width and length.
         * Offset 11 has the relative Y coordinate.
         */
        offset = 1;

        input_report_key(sc->touchpad, BTN_LEFT, rd[offset] & 0x0F);
        active = (rd[offset] >> 4);
        relx = (s8) rd[offset+5];
        rely = ((s8) rd[offset+10]) * -1;

        offset++;

        for (n = 0; n < 2; n++) {
                u16 x, y;
                u8 contactx, contacty;

                x = rd[offset] | ((rd[offset+1] & 0x0F) << 8);
                y = ((rd[offset+1] & 0xF0) >> 4) | (rd[offset+2] << 4);

                input_mt_slot(sc->touchpad, n);
                input_mt_report_slot_state(sc->touchpad, MT_TOOL_FINGER, active & 0x03);

                if (active & 0x03) {
                        contactx = rd[offset+3] & 0x0F;
                        contacty = rd[offset+3] >> 4;
                        input_report_abs(sc->touchpad, ABS_MT_TOUCH_MAJOR,
                                max(contactx, contacty));
                        input_report_abs(sc->touchpad, ABS_MT_TOUCH_MINOR,
                                min(contactx, contacty));
                        input_report_abs(sc->touchpad, ABS_MT_ORIENTATION,
                                (bool) (contactx > contacty));
                        input_report_abs(sc->touchpad, ABS_MT_POSITION_X, x);
                        input_report_abs(sc->touchpad, ABS_MT_POSITION_Y,
                                NSG_MRXU_MAX_Y - y);
                        /*
                         * The relative coordinates belong to the first touch
                         * point, when present, or to the second touch point
                         * when the first is not active.
                         */
                        if ((n == 0) || ((n == 1) && (active & 0x01))) {
                                input_report_rel(sc->touchpad, REL_X, relx);
                                input_report_rel(sc->touchpad, REL_Y, rely);
                        }
                }

                offset += 5;
                active >>= 2;
        }

        input_mt_sync_frame(sc->touchpad);

        input_sync(sc->touchpad);
}

static int sony_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *rd, int size)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);

        /*
         * Sixaxis HID report has acclerometers/gyro with MSByte first, this
         * has to be BYTE_SWAPPED before passing up to joystick interface
         */
        if ((sc->quirks & SIXAXIS_CONTROLLER) && rd[0] == 0x01 && size == 49) {
                /*
                 * When connected via Bluetooth the Sixaxis occasionally sends
                 * a report with the second byte 0xff and the rest zeroed.
                 *
                 * This report does not reflect the actual state of the
                 * controller must be ignored to avoid generating false input
                 * events.
                 */
                if (rd[1] == 0xff)
                        return -EINVAL;

                swap(rd[41], rd[42]);
                swap(rd[43], rd[44]);
                swap(rd[45], rd[46]);
                swap(rd[47], rd[48]);

                sixaxis_parse_report(sc, rd, size);
        } else if ((sc->quirks & MOTION_CONTROLLER_BT) && rd[0] == 0x01 && size == 49) {
                sixaxis_parse_report(sc, rd, size);
        } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 &&
                        size == 49) {
                sixaxis_parse_report(sc, rd, size);
        } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02) {
                nsg_mrxu_parse_report(sc, rd, size);
                return 1;
        }

        if (sc->defer_initialization) {
                sc->defer_initialization = 0;
                sony_schedule_work(sc, SONY_WORKER_STATE);
        }

        return 0;
}

static int sony_mapping(struct hid_device *hdev, struct hid_input *hi,
                        struct hid_field *field, struct hid_usage *usage,
                        unsigned long **bit, int *max)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);

        if (sc->quirks & BUZZ_CONTROLLER) {
                unsigned int key = usage->hid & HID_USAGE;

                if ((usage->hid & HID_USAGE_PAGE) != HID_UP_BUTTON)
                        return -1;

                switch (usage->collection_index) {
                case 1:
                        if (key >= ARRAY_SIZE(buzz_keymap))
                                return -1;

                        key = buzz_keymap[key];
                        if (!key)
                                return -1;
                        break;
                default:
                        return -1;
                }

                hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key);
                return 1;
        }

        if (sc->quirks & PS3REMOTE)
                return ps3remote_mapping(hdev, hi, field, usage, bit, max);

        if (sc->quirks & NAVIGATION_CONTROLLER)
                return navigation_mapping(hdev, hi, field, usage, bit, max);

        if (sc->quirks & SIXAXIS_CONTROLLER)
                return sixaxis_mapping(hdev, hi, field, usage, bit, max);

        if (sc->quirks & GH_GUITAR_CONTROLLER)
                return guitar_mapping(hdev, hi, field, usage, bit, max);

        /* Let hid-core decide for the others */
        return 0;
}

static int sony_register_touchpad(struct sony_sc *sc, int touch_count,
                int w, int h, int touch_major, int touch_minor, int orientation)
{
        size_t name_sz;
        char *name;
        int ret;

        sc->touchpad = devm_input_allocate_device(&sc->hdev->dev);
        if (!sc->touchpad)
                return -ENOMEM;

        input_set_drvdata(sc->touchpad, sc);
        sc->touchpad->dev.parent = &sc->hdev->dev;
        sc->touchpad->phys = sc->hdev->phys;
        sc->touchpad->uniq = sc->hdev->uniq;
        sc->touchpad->id.bustype = sc->hdev->bus;
        sc->touchpad->id.vendor = sc->hdev->vendor;
        sc->touchpad->id.product = sc->hdev->product;
        sc->touchpad->id.version = sc->hdev->version;

        /* This suffix was originally apended when hid-sony also
         * supported DS4 devices. The DS4 was implemented using multiple
         * evdev nodes and hence had the need to separete them out using
         * a suffix. Other devices which were added later like Sony TV remotes
         * inhirited this suffix.
         */
        name_sz = strlen(sc->hdev->name) + sizeof(TOUCHPAD_SUFFIX);
        name = devm_kzalloc(&sc->hdev->dev, name_sz, GFP_KERNEL);
        if (!name)
                return -ENOMEM;
        snprintf(name, name_sz, "%s" TOUCHPAD_SUFFIX, sc->hdev->name);
        sc->touchpad->name = name;

        /* We map the button underneath the touchpad to BTN_LEFT. */
        __set_bit(EV_KEY, sc->touchpad->evbit);
        __set_bit(BTN_LEFT, sc->touchpad->keybit);
        __set_bit(INPUT_PROP_BUTTONPAD, sc->touchpad->propbit);

        input_set_abs_params(sc->touchpad, ABS_MT_POSITION_X, 0, w, 0, 0);
        input_set_abs_params(sc->touchpad, ABS_MT_POSITION_Y, 0, h, 0, 0);

        if (touch_major > 0) {
                input_set_abs_params(sc->touchpad, ABS_MT_TOUCH_MAJOR, 
                        0, touch_major, 0, 0);
                if (touch_minor > 0)
                        input_set_abs_params(sc->touchpad, ABS_MT_TOUCH_MINOR, 
                                0, touch_minor, 0, 0);
                if (orientation > 0)
                        input_set_abs_params(sc->touchpad, ABS_MT_ORIENTATION, 
                                0, orientation, 0, 0);
        }

        if (sc->quirks & NSG_MRXU_REMOTE) {
                __set_bit(EV_REL, sc->touchpad->evbit);
        }

        ret = input_mt_init_slots(sc->touchpad, touch_count, INPUT_MT_POINTER);
        if (ret < 0)
                return ret;

        ret = input_register_device(sc->touchpad);
        if (ret < 0)
                return ret;

        return 0;
}

static int sony_register_sensors(struct sony_sc *sc)
{
        size_t name_sz;
        char *name;
        int ret;

        sc->sensor_dev = devm_input_allocate_device(&sc->hdev->dev);
        if (!sc->sensor_dev)
                return -ENOMEM;

        input_set_drvdata(sc->sensor_dev, sc);
        sc->sensor_dev->dev.parent = &sc->hdev->dev;
        sc->sensor_dev->phys = sc->hdev->phys;
        sc->sensor_dev->uniq = sc->hdev->uniq;
        sc->sensor_dev->id.bustype = sc->hdev->bus;
        sc->sensor_dev->id.vendor = sc->hdev->vendor;
        sc->sensor_dev->id.product = sc->hdev->product;
        sc->sensor_dev->id.version = sc->hdev->version;

        /* Append a suffix to the controller name as there are various
         * DS4 compatible non-Sony devices with different names.
         */
        name_sz = strlen(sc->hdev->name) + sizeof(SENSOR_SUFFIX);
        name = devm_kzalloc(&sc->hdev->dev, name_sz, GFP_KERNEL);
        if (!name)
                return -ENOMEM;
        snprintf(name, name_sz, "%s" SENSOR_SUFFIX, sc->hdev->name);
        sc->sensor_dev->name = name;

        if (sc->quirks & SIXAXIS_CONTROLLER) {
                /* For the DS3 we only support the accelerometer, which works
                 * quite well even without calibration. The device also has
                 * a 1-axis gyro, but it is very difficult to manage from within
                 * the driver even to get data, the sensor is inaccurate and
                 * the behavior is very different between hardware revisions.
                 */
                input_set_abs_params(sc->sensor_dev, ABS_X, -512, 511, 4, 0);
                input_set_abs_params(sc->sensor_dev, ABS_Y, -512, 511, 4, 0);
                input_set_abs_params(sc->sensor_dev, ABS_Z, -512, 511, 4, 0);
                input_abs_set_res(sc->sensor_dev, ABS_X, SIXAXIS_ACC_RES_PER_G);
                input_abs_set_res(sc->sensor_dev, ABS_Y, SIXAXIS_ACC_RES_PER_G);
                input_abs_set_res(sc->sensor_dev, ABS_Z, SIXAXIS_ACC_RES_PER_G);
        }

        __set_bit(INPUT_PROP_ACCELEROMETER, sc->sensor_dev->propbit);

        ret = input_register_device(sc->sensor_dev);
        if (ret < 0)
                return ret;

        return 0;
}

/*
 * Sending HID_REQ_GET_REPORT changes the operation mode of the ps3 controller
 * to "operational".  Without this, the ps3 controller will not report any
 * events.
 */
static int sixaxis_set_operational_usb(struct hid_device *hdev)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);
        const int buf_size =
                max(SIXAXIS_REPORT_0xF2_SIZE, SIXAXIS_REPORT_0xF5_SIZE);
        u8 *buf;
        int ret;

        buf = kmalloc(buf_size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, 0xf2, buf, SIXAXIS_REPORT_0xF2_SIZE,
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "can't set operational mode: step 1\n");
                goto out;
        }

        /*
         * Some compatible controllers like the Speedlink Strike FX and
         * Gasia need another query plus an USB interrupt to get operational.
         */
        ret = hid_hw_raw_request(hdev, 0xf5, buf, SIXAXIS_REPORT_0xF5_SIZE,
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "can't set operational mode: step 2\n");
                goto out;
        }

        /*
         * But the USB interrupt would cause SHANWAN controllers to
         * start rumbling non-stop, so skip step 3 for these controllers.
         */
        if (sc->quirks & SHANWAN_GAMEPAD)
                goto out;

        ret = hid_hw_output_report(hdev, buf, 1);
        if (ret < 0) {
                hid_info(hdev, "can't set operational mode: step 3, ignoring\n");
                ret = 0;
        }

out:
        kfree(buf);

        return ret;
}

static int sixaxis_set_operational_bt(struct hid_device *hdev)
{
        static const u8 report[] = { 0xf4, 0x42, 0x03, 0x00, 0x00 };
        u8 *buf;
        int ret;

        buf = kmemdup(report, sizeof(report), GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(report),
                                  HID_FEATURE_REPORT, HID_REQ_SET_REPORT);

        kfree(buf);

        return ret;
}

static void sixaxis_set_leds_from_id(struct sony_sc *sc)
{
        static const u8 sixaxis_leds[10][4] = {
                                { 0x01, 0x00, 0x00, 0x00 },
                                { 0x00, 0x01, 0x00, 0x00 },
                                { 0x00, 0x00, 0x01, 0x00 },
                                { 0x00, 0x00, 0x00, 0x01 },
                                { 0x01, 0x00, 0x00, 0x01 },
                                { 0x00, 0x01, 0x00, 0x01 },
                                { 0x00, 0x00, 0x01, 0x01 },
                                { 0x01, 0x00, 0x01, 0x01 },
                                { 0x00, 0x01, 0x01, 0x01 },
                                { 0x01, 0x01, 0x01, 0x01 }
        };

        int id = sc->device_id;

        BUILD_BUG_ON(MAX_LEDS < ARRAY_SIZE(sixaxis_leds[0]));

        if (id < 0)
                return;

        id %= 10;
        memcpy(sc->led_state, sixaxis_leds[id], sizeof(sixaxis_leds[id]));
}

static void buzz_set_leds(struct sony_sc *sc)
{
        struct hid_device *hdev = sc->hdev;
        struct list_head *report_list =
                &hdev->report_enum[HID_OUTPUT_REPORT].report_list;
        struct hid_report *report = list_entry(report_list->next,
                struct hid_report, list);
        s32 *value = report->field[0]->value;

        BUILD_BUG_ON(MAX_LEDS < 4);

        value[0] = 0x00;
        value[1] = sc->led_state[0] ? 0xff : 0x00;
        value[2] = sc->led_state[1] ? 0xff : 0x00;
        value[3] = sc->led_state[2] ? 0xff : 0x00;
        value[4] = sc->led_state[3] ? 0xff : 0x00;
        value[5] = 0x00;
        value[6] = 0x00;
        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
}

static void sony_set_leds(struct sony_sc *sc)
{
        if (!(sc->quirks & BUZZ_CONTROLLER))
                sony_schedule_work(sc, SONY_WORKER_STATE);
        else
                buzz_set_leds(sc);
}

static void sony_led_set_brightness(struct led_classdev *led,
                                    enum led_brightness value)
{
        struct device *dev = led->dev->parent;
        struct hid_device *hdev = to_hid_device(dev);
        struct sony_sc *drv_data;

        int n;
        int force_update;

        drv_data = hid_get_drvdata(hdev);
        if (!drv_data) {
                hid_err(hdev, "No device data\n");
                return;
        }

        /*
         * The Sixaxis on USB will override any LED settings sent to it
         * and keep flashing all of the LEDs until the PS button is pressed.
         * Updates, even if redundant, must be always be sent to the
         * controller to avoid having to toggle the state of an LED just to
         * stop the flashing later on.
         */
        force_update = !!(drv_data->quirks & SIXAXIS_CONTROLLER_USB);

        for (n = 0; n < drv_data->led_count; n++) {
                if (led == drv_data->leds[n] && (force_update ||
                        (value != drv_data->led_state[n] ||
                        drv_data->led_delay_on[n] ||
                        drv_data->led_delay_off[n]))) {

                        drv_data->led_state[n] = value;

                        /* Setting the brightness stops the blinking */
                        drv_data->led_delay_on[n] = 0;
                        drv_data->led_delay_off[n] = 0;

                        sony_set_leds(drv_data);
                        break;
                }
        }
}

static enum led_brightness sony_led_get_brightness(struct led_classdev *led)
{
        struct device *dev = led->dev->parent;
        struct hid_device *hdev = to_hid_device(dev);
        struct sony_sc *drv_data;

        int n;

        drv_data = hid_get_drvdata(hdev);
        if (!drv_data) {
                hid_err(hdev, "No device data\n");
                return LED_OFF;
        }

        for (n = 0; n < drv_data->led_count; n++) {
                if (led == drv_data->leds[n])
                        return drv_data->led_state[n];
        }

        return LED_OFF;
}

static int sony_led_blink_set(struct led_classdev *led, unsigned long *delay_on,
                                unsigned long *delay_off)
{
        struct device *dev = led->dev->parent;
        struct hid_device *hdev = to_hid_device(dev);
        struct sony_sc *drv_data = hid_get_drvdata(hdev);
        int n;
        u8 new_on, new_off;

        if (!drv_data) {
                hid_err(hdev, "No device data\n");
                return -EINVAL;
        }

        /* Max delay is 255 deciseconds or 2550 milliseconds */
        if (*delay_on > 2550)
                *delay_on = 2550;
        if (*delay_off > 2550)
                *delay_off = 2550;

        /* Blink at 1 Hz if both values are zero */
        if (!*delay_on && !*delay_off)
                *delay_on = *delay_off = 500;

        new_on = *delay_on / 10;
        new_off = *delay_off / 10;

        for (n = 0; n < drv_data->led_count; n++) {
                if (led == drv_data->leds[n])
                        break;
        }

        /* This LED is not registered on this device */
        if (n >= drv_data->led_count)
                return -EINVAL;

        /* Don't schedule work if the values didn't change */
        if (new_on != drv_data->led_delay_on[n] ||
                new_off != drv_data->led_delay_off[n]) {
                drv_data->led_delay_on[n] = new_on;
                drv_data->led_delay_off[n] = new_off;
                sony_schedule_work(drv_data, SONY_WORKER_STATE);
        }

        return 0;
}

static int sony_leds_init(struct sony_sc *sc)
{
        struct hid_device *hdev = sc->hdev;
        int n, ret = 0;
        int use_color_names;
        struct led_classdev *led;
        size_t name_sz;
        char *name;
        size_t name_len;
        const char *name_fmt;
        static const char * const color_name_str[] = { "red", "green", "blue",
                                                  "global" };
        u8 max_brightness[MAX_LEDS] = { [0 ... (MAX_LEDS - 1)] = 1 };
        u8 use_hw_blink[MAX_LEDS] = { 0 };

        BUG_ON(!(sc->quirks & SONY_LED_SUPPORT));

        if (sc->quirks & BUZZ_CONTROLLER) {
                sc->led_count = 4;
                use_color_names = 0;
                name_len = strlen("::buzz#");
                name_fmt = "%s::buzz%d";
                /* Validate expected report characteristics. */
                if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 7))
                        return -ENODEV;
        } else if (sc->quirks & MOTION_CONTROLLER) {
                sc->led_count = 3;
                memset(max_brightness, 255, 3);
                use_color_names = 1;
                name_len = 0;
                name_fmt = "%s:%s";
        } else if (sc->quirks & NAVIGATION_CONTROLLER) {
                static const u8 navigation_leds[4] = {0x01, 0x00, 0x00, 0x00};

                memcpy(sc->led_state, navigation_leds, sizeof(navigation_leds));
                sc->led_count = 1;
                memset(use_hw_blink, 1, 4);
                use_color_names = 0;
                name_len = strlen("::sony#");
                name_fmt = "%s::sony%d";
        } else {
                sixaxis_set_leds_from_id(sc);
                sc->led_count = 4;
                memset(use_hw_blink, 1, 4);
                use_color_names = 0;
                name_len = strlen("::sony#");
                name_fmt = "%s::sony%d";
        }

        /*
         * Clear LEDs as we have no way of reading their initial state. This is
         * only relevant if the driver is loaded after somebody actively set the
         * LEDs to on
         */
        sony_set_leds(sc);

        name_sz = strlen(dev_name(&hdev->dev)) + name_len + 1;

        for (n = 0; n < sc->led_count; n++) {

                if (use_color_names)
                        name_sz = strlen(dev_name(&hdev->dev)) + strlen(color_name_str[n]) + 2;

                led = devm_kzalloc(&hdev->dev, sizeof(struct led_classdev) + name_sz, GFP_KERNEL);
                if (!led) {
                        hid_err(hdev, "Couldn't allocate memory for LED %d\n", n);
                        return -ENOMEM;
                }

                name = (void *)(&led[1]);
                if (use_color_names)
                        snprintf(name, name_sz, name_fmt, dev_name(&hdev->dev),
                        color_name_str[n]);
                else
                        snprintf(name, name_sz, name_fmt, dev_name(&hdev->dev), n + 1);
                led->name = name;
                led->brightness = sc->led_state[n];
                led->max_brightness = max_brightness[n];
                led->flags = LED_CORE_SUSPENDRESUME;
                led->brightness_get = sony_led_get_brightness;
                led->brightness_set = sony_led_set_brightness;

                if (use_hw_blink[n])
                        led->blink_set = sony_led_blink_set;

                sc->leds[n] = led;

                ret = devm_led_classdev_register(&hdev->dev, led);
                if (ret) {
                        hid_err(hdev, "Failed to register LED %d\n", n);
                        return ret;
                }
        }

        return 0;
}

static void sixaxis_send_output_report(struct sony_sc *sc)
{
        static const union sixaxis_output_report_01 default_report = {
                .buf = {
                        0x01,
                        0x01, 0xff, 0x00, 0xff, 0x00,
                        0x00, 0x00, 0x00, 0x00, 0x00,
                        0xff, 0x27, 0x10, 0x00, 0x32,
                        0xff, 0x27, 0x10, 0x00, 0x32,
                        0xff, 0x27, 0x10, 0x00, 0x32,
                        0xff, 0x27, 0x10, 0x00, 0x32,
                        0x00, 0x00, 0x00, 0x00, 0x00
                }
        };
        struct sixaxis_output_report *report =
                (struct sixaxis_output_report *)sc->output_report_dmabuf;
        int n;

        /* Initialize the report with default values */
        memcpy(report, &default_report, sizeof(struct sixaxis_output_report));

#ifdef CONFIG_SONY_FF
        report->rumble.right_motor_on = sc->right ? 1 : 0;
        report->rumble.left_motor_force = sc->left;
#endif

        report->leds_bitmap |= sc->led_state[0] << 1;
        report->leds_bitmap |= sc->led_state[1] << 2;
        report->leds_bitmap |= sc->led_state[2] << 3;
        report->leds_bitmap |= sc->led_state[3] << 4;

        /* Set flag for all leds off, required for 3rd party INTEC controller */
        if ((report->leds_bitmap & 0x1E) == 0)
                report->leds_bitmap |= 0x20;

        /*
         * The LEDs in the report are indexed in reverse order to their
         * corresponding light on the controller.
         * Index 0 = LED 4, index 1 = LED 3, etc...
         *
         * In the case of both delay values being zero (blinking disabled) the
         * default report values should be used or the controller LED will be
         * always off.
         */
        for (n = 0; n < 4; n++) {
                if (sc->led_delay_on[n] || sc->led_delay_off[n]) {
                        report->led[3 - n].duty_off = sc->led_delay_off[n];
                        report->led[3 - n].duty_on = sc->led_delay_on[n];
                }
        }

        /* SHANWAN controllers require output reports via intr channel */
        if (sc->quirks & SHANWAN_GAMEPAD)
                hid_hw_output_report(sc->hdev, (u8 *)report,
                                sizeof(struct sixaxis_output_report));
        else
                hid_hw_raw_request(sc->hdev, report->report_id, (u8 *)report,
                                sizeof(struct sixaxis_output_report),
                                HID_OUTPUT_REPORT, HID_REQ_SET_REPORT);
}

static void motion_send_output_report(struct sony_sc *sc)
{
        struct hid_device *hdev = sc->hdev;
        struct motion_output_report_02 *report =
                (struct motion_output_report_02 *)sc->output_report_dmabuf;

        memset(report, 0, MOTION_REPORT_0x02_SIZE);

        report->type = 0x02; /* set leds */
        report->r = sc->led_state[0];
        report->g = sc->led_state[1];
        report->b = sc->led_state[2];

#ifdef CONFIG_SONY_FF
        report->rumble = max(sc->right, sc->left);
#endif

        hid_hw_output_report(hdev, (u8 *)report, MOTION_REPORT_0x02_SIZE);
}

#ifdef CONFIG_SONY_FF
static inline void sony_send_output_report(struct sony_sc *sc)
{
        if (sc->send_output_report)
                sc->send_output_report(sc);
}
#endif

static void sony_state_worker(struct work_struct *work)
{
        struct sony_sc *sc = container_of(work, struct sony_sc, state_worker);

        sc->send_output_report(sc);
}

static int sony_allocate_output_report(struct sony_sc *sc)
{
        if ((sc->quirks & SIXAXIS_CONTROLLER) ||
                        (sc->quirks & NAVIGATION_CONTROLLER))
                sc->output_report_dmabuf =
                        devm_kmalloc(&sc->hdev->dev,
                                sizeof(union sixaxis_output_report_01),
                                GFP_KERNEL);
        else if (sc->quirks & MOTION_CONTROLLER)
                sc->output_report_dmabuf = devm_kmalloc(&sc->hdev->dev,
                                                MOTION_REPORT_0x02_SIZE,
                                                GFP_KERNEL);
        else
                return 0;

        if (!sc->output_report_dmabuf)
                return -ENOMEM;

        return 0;
}

#ifdef CONFIG_SONY_FF
static int sony_play_effect(struct input_dev *dev, void *data,
                            struct ff_effect *effect)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct sony_sc *sc = hid_get_drvdata(hid);

        if (effect->type != FF_RUMBLE)
                return 0;

        sc->left = effect->u.rumble.strong_magnitude / 256;
        sc->right = effect->u.rumble.weak_magnitude / 256;

        sony_schedule_work(sc, SONY_WORKER_STATE);
        return 0;
}

static int sony_init_ff(struct sony_sc *sc)
{
        struct hid_input *hidinput;
        struct input_dev *input_dev;

        if (list_empty(&sc->hdev->inputs)) {
                hid_err(sc->hdev, "no inputs found\n");
                return -ENODEV;
        }
        hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list);
        input_dev = hidinput->input;

        input_set_capability(input_dev, EV_FF, FF_RUMBLE);
        return input_ff_create_memless(input_dev, NULL, sony_play_effect);
}

#else
static int sony_init_ff(struct sony_sc *sc)
{
        return 0;
}

#endif

static int sony_battery_get_property(struct power_supply *psy,
                                     enum power_supply_property psp,
                                     union power_supply_propval *val)
{
        struct sony_sc *sc = power_supply_get_drvdata(psy);
        unsigned long flags;
        int ret = 0;
        u8 battery_capacity;
        int battery_status;

        spin_lock_irqsave(&sc->lock, flags);
        battery_capacity = sc->battery_capacity;
        battery_status = sc->battery_status;
        spin_unlock_irqrestore(&sc->lock, flags);

        switch (psp) {
        case POWER_SUPPLY_PROP_PRESENT:
                val->intval = 1;
                break;
        case POWER_SUPPLY_PROP_SCOPE:
                val->intval = POWER_SUPPLY_SCOPE_DEVICE;
                break;
        case POWER_SUPPLY_PROP_CAPACITY:
                val->intval = battery_capacity;
                break;
        case POWER_SUPPLY_PROP_STATUS:
                val->intval = battery_status;
                break;
        default:
                ret = -EINVAL;
                break;
        }
        return ret;
}

static int sony_battery_probe(struct sony_sc *sc, int append_dev_id)
{
        const char *battery_str_fmt = append_dev_id ?
                "sony_controller_battery_%pMR_%i" :
                "sony_controller_battery_%pMR";
        struct power_supply_config psy_cfg = { .drv_data = sc, };
        struct hid_device *hdev = sc->hdev;
        int ret;

        /*
         * Set the default battery level to 100% to avoid low battery warnings
         * if the battery is polled before the first device report is received.
         */
        sc->battery_capacity = 100;

        sc->battery_desc.properties = sony_battery_props;
        sc->battery_desc.num_properties = ARRAY_SIZE(sony_battery_props);
        sc->battery_desc.get_property = sony_battery_get_property;
        sc->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY;
        sc->battery_desc.use_for_apm = 0;
        sc->battery_desc.name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
                                          battery_str_fmt, sc->mac_address, sc->device_id);
        if (!sc->battery_desc.name)
                return -ENOMEM;

        sc->battery = devm_power_supply_register(&hdev->dev, &sc->battery_desc,
                                            &psy_cfg);
        if (IS_ERR(sc->battery)) {
                ret = PTR_ERR(sc->battery);
                hid_err(hdev, "Unable to register battery device\n");
                return ret;
        }

        power_supply_powers(sc->battery, &hdev->dev);
        return 0;
}

/*
 * If a controller is plugged in via USB while already connected via Bluetooth
 * it will show up as two devices. A global list of connected controllers and
 * their MAC addresses is maintained to ensure that a device is only connected
 * once.
 *
 * Some USB-only devices masquerade as Sixaxis controllers and all have the
 * same dummy Bluetooth address, so a comparison of the connection type is
 * required.  Devices are only rejected in the case where two devices have
 * matching Bluetooth addresses on different bus types.
 */
static inline int sony_compare_connection_type(struct sony_sc *sc0,
                                                struct sony_sc *sc1)
{
        const int sc0_not_bt = !(sc0->quirks & SONY_BT_DEVICE);
        const int sc1_not_bt = !(sc1->quirks & SONY_BT_DEVICE);

        return sc0_not_bt == sc1_not_bt;
}

static int sony_check_add_dev_list(struct sony_sc *sc)
{
        struct sony_sc *entry;
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&sony_dev_list_lock, flags);

        list_for_each_entry(entry, &sony_device_list, list_node) {
                ret = memcmp(sc->mac_address, entry->mac_address,
                                sizeof(sc->mac_address));
                if (!ret) {
                        if (sony_compare_connection_type(sc, entry)) {
                                ret = 1;
                        } else {
                                ret = -EEXIST;
                                hid_info(sc->hdev,
                                "controller with MAC address %pMR already connected\n",
                                sc->mac_address);
                        }
                        goto unlock;
                }
        }

        ret = 0;
        list_add(&(sc->list_node), &sony_device_list);

unlock:
        spin_unlock_irqrestore(&sony_dev_list_lock, flags);
        return ret;
}

static void sony_remove_dev_list(struct sony_sc *sc)
{
        unsigned long flags;

        if (sc->list_node.next) {
                spin_lock_irqsave(&sony_dev_list_lock, flags);
                list_del(&(sc->list_node));
                spin_unlock_irqrestore(&sony_dev_list_lock, flags);
        }
}

static int sony_get_bt_devaddr(struct sony_sc *sc)
{
        int ret;

        /* HIDP stores the device MAC address as a string in the uniq field. */
        ret = strlen(sc->hdev->uniq);
        if (ret != 17)
                return -EINVAL;

        ret = sscanf(sc->hdev->uniq,
                "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx",
                &sc->mac_address[5], &sc->mac_address[4], &sc->mac_address[3],
                &sc->mac_address[2], &sc->mac_address[1], &sc->mac_address[0]);

        if (ret != 6)
                return -EINVAL;

        return 0;
}

static int sony_check_add(struct sony_sc *sc)
{
        u8 *buf = NULL;
        int n, ret;

        if ((sc->quirks & MOTION_CONTROLLER_BT) ||
            (sc->quirks & NAVIGATION_CONTROLLER_BT) ||
            (sc->quirks & SIXAXIS_CONTROLLER_BT)) {
                /*
                 * sony_get_bt_devaddr() attempts to parse the Bluetooth MAC
                 * address from the uniq string where HIDP stores it.
                 * As uniq cannot be guaranteed to be a MAC address in all cases
                 * a failure of this function should not prevent the connection.
                 */
                if (sony_get_bt_devaddr(sc) < 0) {
                        hid_warn(sc->hdev, "UNIQ does not contain a MAC address; duplicate check skipped\n");
                        return 0;
                }
        } else if ((sc->quirks & SIXAXIS_CONTROLLER_USB) ||
                        (sc->quirks & NAVIGATION_CONTROLLER_USB)) {
                buf = kmalloc(SIXAXIS_REPORT_0xF2_SIZE, GFP_KERNEL);
                if (!buf)
                        return -ENOMEM;

                /*
                 * The MAC address of a Sixaxis controller connected via USB can
                 * be retrieved with feature report 0xf2. The address begins at
                 * offset 4.
                 */
                ret = hid_hw_raw_request(sc->hdev, 0xf2, buf,
                                SIXAXIS_REPORT_0xF2_SIZE, HID_FEATURE_REPORT,
                                HID_REQ_GET_REPORT);

                if (ret != SIXAXIS_REPORT_0xF2_SIZE) {
                        hid_err(sc->hdev, "failed to retrieve feature report 0xf2 with the Sixaxis MAC address\n");
                        ret = ret < 0 ? ret : -EINVAL;
                        goto out_free;
                }

                /*
                 * The Sixaxis device MAC in the report is big-endian and must
                 * be byte-swapped.
                 */
                for (n = 0; n < 6; n++)
                        sc->mac_address[5-n] = buf[4+n];

                snprintf(sc->hdev->uniq, sizeof(sc->hdev->uniq),
                         "%pMR", sc->mac_address);
        } else {
                return 0;
        }

        ret = sony_check_add_dev_list(sc);

out_free:

        kfree(buf);

        return ret;
}

static int sony_set_device_id(struct sony_sc *sc)
{
        int ret;

        /*
         * Only Sixaxis controllers get an id.
         * All others are set to -1.
         */
        if (sc->quirks & SIXAXIS_CONTROLLER) {
                ret = ida_simple_get(&sony_device_id_allocator, 0, 0,
                                        GFP_KERNEL);
                if (ret < 0) {
                        sc->device_id = -1;
                        return ret;
                }
                sc->device_id = ret;
        } else {
                sc->device_id = -1;
        }

        return 0;
}

static void sony_release_device_id(struct sony_sc *sc)
{
        if (sc->device_id >= 0) {
                ida_simple_remove(&sony_device_id_allocator, sc->device_id);
                sc->device_id = -1;
        }
}

static inline void sony_init_output_report(struct sony_sc *sc,
                                void (*send_output_report)(struct sony_sc *))
{
        sc->send_output_report = send_output_report;

        if (!sc->state_worker_initialized)
                INIT_WORK(&sc->state_worker, sony_state_worker);

        sc->state_worker_initialized = 1;
}

static inline void sony_cancel_work_sync(struct sony_sc *sc)
{
        unsigned long flags;

        if (sc->state_worker_initialized) {
                spin_lock_irqsave(&sc->lock, flags);
                sc->state_worker_initialized = 0;
                spin_unlock_irqrestore(&sc->lock, flags);
                cancel_work_sync(&sc->state_worker);
        }
}

static int sony_input_configured(struct hid_device *hdev,
                                        struct hid_input *hidinput)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);
        int append_dev_id;
        int ret;

        ret = sony_set_device_id(sc);
        if (ret < 0) {
                hid_err(hdev, "failed to allocate the device id\n");
                goto err_stop;
        }

        ret = append_dev_id = sony_check_add(sc);
        if (ret < 0)
                goto err_stop;

        ret = sony_allocate_output_report(sc);
        if (ret < 0) {
                hid_err(hdev, "failed to allocate the output report buffer\n");
                goto err_stop;
        }

        if (sc->quirks & NAVIGATION_CONTROLLER_USB) {
                /*
                 * The Sony Sixaxis does not handle HID Output Reports on the
                 * Interrupt EP like it could, so we need to force HID Output
                 * Reports to use HID_REQ_SET_REPORT on the Control EP.
                 *
                 * There is also another issue about HID Output Reports via USB,
                 * the Sixaxis does not want the report_id as part of the data
                 * packet, so we have to discard buf[0] when sending the actual
                 * control message, even for numbered reports, humpf!
                 *
                 * Additionally, the Sixaxis on USB isn't properly initialized
                 * until the PS logo button is pressed and as such won't retain
                 * any state set by an output report, so the initial
                 * configuration report is deferred until the first input
                 * report arrives.
                 */
                hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP;
                hdev->quirks |= HID_QUIRK_SKIP_OUTPUT_REPORT_ID;
                sc->defer_initialization = 1;

                ret = sixaxis_set_operational_usb(hdev);
                if (ret < 0) {
                        hid_err(hdev, "Failed to set controller into operational mode\n");
                        goto err_stop;
                }

                sony_init_output_report(sc, sixaxis_send_output_report);
        } else if (sc->quirks & NAVIGATION_CONTROLLER_BT) {
                /*
                 * The Navigation controller wants output reports sent on the ctrl
                 * endpoint when connected via Bluetooth.
                 */
                hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP;

                ret = sixaxis_set_operational_bt(hdev);
                if (ret < 0) {
                        hid_err(hdev, "Failed to set controller into operational mode\n");
                        goto err_stop;
                }

                sony_init_output_report(sc, sixaxis_send_output_report);
        } else if (sc->quirks & SIXAXIS_CONTROLLER_USB) {
                /*
                 * The Sony Sixaxis does not handle HID Output Reports on the
                 * Interrupt EP and the device only becomes active when the
                 * PS button is pressed. See comment for Navigation controller
                 * above for more details.
                 */
                hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP;
                hdev->quirks |= HID_QUIRK_SKIP_OUTPUT_REPORT_ID;
                sc->defer_initialization = 1;

                ret = sixaxis_set_operational_usb(hdev);
                if (ret < 0) {
                        hid_err(hdev, "Failed to set controller into operational mode\n");
                        goto err_stop;
                }

                ret = sony_register_sensors(sc);
                if (ret) {
                        hid_err(sc->hdev,
                        "Unable to initialize motion sensors: %d\n", ret);
                        goto err_stop;
                }

                sony_init_output_report(sc, sixaxis_send_output_report);
        } else if (sc->quirks & SIXAXIS_CONTROLLER_BT) {
                /*
                 * The Sixaxis wants output reports sent on the ctrl endpoint
                 * when connected via Bluetooth.
                 */
                hdev->quirks |= HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP;

                ret = sixaxis_set_operational_bt(hdev);
                if (ret < 0) {
                        hid_err(hdev, "Failed to set controller into operational mode\n");
                        goto err_stop;
                }

                ret = sony_register_sensors(sc);
                if (ret) {
                        hid_err(sc->hdev,
                        "Unable to initialize motion sensors: %d\n", ret);
                        goto err_stop;
                }

                sony_init_output_report(sc, sixaxis_send_output_report);
        } else if (sc->quirks & NSG_MRXU_REMOTE) {
                /*
                 * The NSG-MRxU touchpad supports 2 touches and has a
                 * resolution of 1667x1868
                 */
                ret = sony_register_touchpad(sc, 2,
                        NSG_MRXU_MAX_X, NSG_MRXU_MAX_Y, 15, 15, 1);
                if (ret) {
                        hid_err(sc->hdev,
                        "Unable to initialize multi-touch slots: %d\n",
                        ret);
                        goto err_stop;
                }

        } else if (sc->quirks & MOTION_CONTROLLER) {
                sony_init_output_report(sc, motion_send_output_report);
        } else {
                ret = 0;
        }

        if (sc->quirks & SONY_LED_SUPPORT) {
                ret = sony_leds_init(sc);
                if (ret < 0)
                        goto err_stop;
        }

        if (sc->quirks & SONY_BATTERY_SUPPORT) {
                ret = sony_battery_probe(sc, append_dev_id);
                if (ret < 0)
                        goto err_stop;

                /* Open the device to receive reports with battery info */
                ret = hid_hw_open(hdev);
                if (ret < 0) {
                        hid_err(hdev, "hw open failed\n");
                        goto err_stop;
                }
        }

        if (sc->quirks & SONY_FF_SUPPORT) {
                ret = sony_init_ff(sc);
                if (ret < 0)
                        goto err_close;
        }

        return 0;
err_close:
        hid_hw_close(hdev);
err_stop:
        sony_cancel_work_sync(sc);
        sony_remove_dev_list(sc);
        sony_release_device_id(sc);
        return ret;
}

static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int ret;
        unsigned long quirks = id->driver_data;
        struct sony_sc *sc;
        struct usb_device *usbdev;
        unsigned int connect_mask = HID_CONNECT_DEFAULT;

        if (!strcmp(hdev->name, "FutureMax Dance Mat"))
                quirks |= FUTUREMAX_DANCE_MAT;

        if (!strcmp(hdev->name, "SHANWAN PS3 GamePad") ||
            !strcmp(hdev->name, "ShanWan PS(R) Ga`epad"))
                quirks |= SHANWAN_GAMEPAD;

        sc = devm_kzalloc(&hdev->dev, sizeof(*sc), GFP_KERNEL);
        if (sc == NULL) {
                hid_err(hdev, "can't alloc sony descriptor\n");
                return -ENOMEM;
        }

        spin_lock_init(&sc->lock);

        sc->quirks = quirks;
        hid_set_drvdata(hdev, sc);
        sc->hdev = hdev;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                return ret;
        }

        if (sc->quirks & VAIO_RDESC_CONSTANT)
                connect_mask |= HID_CONNECT_HIDDEV_FORCE;
        else if (sc->quirks & SIXAXIS_CONTROLLER)
                connect_mask |= HID_CONNECT_HIDDEV_FORCE;

        /* Patch the hw version on DS3 compatible devices, so applications can
         * distinguish between the default HID mappings and the mappings defined
         * by the Linux game controller spec. This is important for the SDL2
         * library, which has a game controller database, which uses device ids
         * in combination with version as a key.
         */
        if (sc->quirks & SIXAXIS_CONTROLLER)
                hdev->version |= 0x8000;

        ret = hid_hw_start(hdev, connect_mask);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                return ret;
        }

        /* sony_input_configured can fail, but this doesn't result
         * in hid_hw_start failures (intended). Check whether
         * the HID layer claimed the device else fail.
         * We don't know the actual reason for the failure, most
         * likely it is due to EEXIST in case of double connection
         * of USB and Bluetooth, but could have been due to ENOMEM
         * or other reasons as well.
         */
        if (!(hdev->claimed & HID_CLAIMED_INPUT)) {
                hid_err(hdev, "failed to claim input\n");
                ret = -ENODEV;
                goto err;
        }

        if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) {
                if (!hid_is_usb(hdev)) {
                        ret = -EINVAL;
                        goto err;
                }

                usbdev = to_usb_device(sc->hdev->dev.parent->parent);

                sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC);
                if (!sc->ghl_urb) {
                        ret = -ENOMEM;
                        goto err;
                }

                if (sc->quirks & GHL_GUITAR_PS3WIIU)
                        ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data,
                                                           ARRAY_SIZE(ghl_ps3wiiu_magic_data));
                else if (sc->quirks & GHL_GUITAR_PS4)
                        ret = ghl_init_urb(sc, usbdev, ghl_ps4_magic_data,
                                                           ARRAY_SIZE(ghl_ps4_magic_data));
                if (ret) {
                        hid_err(hdev, "error preparing URB\n");
                        goto err;
                }

                timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0);
                mod_timer(&sc->ghl_poke_timer,
                          jiffies + GHL_GUITAR_POKE_INTERVAL*HZ);
        }

        return ret;

err:
        usb_free_urb(sc->ghl_urb);

        hid_hw_stop(hdev);
        return ret;
}

static void sony_remove(struct hid_device *hdev)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);

        if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) {
                del_timer_sync(&sc->ghl_poke_timer);
                usb_free_urb(sc->ghl_urb);
        }

        hid_hw_close(hdev);

        sony_cancel_work_sync(sc);

        sony_remove_dev_list(sc);

        sony_release_device_id(sc);

        hid_hw_stop(hdev);
}

#ifdef CONFIG_PM

static int sony_suspend(struct hid_device *hdev, pm_message_t message)
{
#ifdef CONFIG_SONY_FF

        /* On suspend stop any running force-feedback events */
        if (SONY_FF_SUPPORT) {
                struct sony_sc *sc = hid_get_drvdata(hdev);

                sc->left = sc->right = 0;
                sony_send_output_report(sc);
        }

#endif
        return 0;
}

static int sony_resume(struct hid_device *hdev)
{
        struct sony_sc *sc = hid_get_drvdata(hdev);

        /*
         * The Sixaxis and navigation controllers on USB need to be
         * reinitialized on resume or they won't behave properly.
         */
        if ((sc->quirks & SIXAXIS_CONTROLLER_USB) ||
                (sc->quirks & NAVIGATION_CONTROLLER_USB)) {
                sixaxis_set_operational_usb(sc->hdev);
                sc->defer_initialization = 1;
        }

        return 0;
}

#endif

static const struct hid_device_id sony_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER),
                .driver_data = SIXAXIS_CONTROLLER_USB },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER),
                .driver_data = NAVIGATION_CONTROLLER_USB },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER),
                .driver_data = NAVIGATION_CONTROLLER_BT },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER),
                .driver_data = MOTION_CONTROLLER_USB },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_MOTION_CONTROLLER),
                .driver_data = MOTION_CONTROLLER_BT },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER),
                .driver_data = SIXAXIS_CONTROLLER_BT },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE),
                .driver_data = VAIO_RDESC_CONSTANT },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE),
                .driver_data = VAIO_RDESC_CONSTANT },
        /*
         * Wired Buzz Controller. Reported as Sony Hub from its USB ID and as
         * Logitech joystick from the device descriptor.
         */
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER),
                .driver_data = BUZZ_CONTROLLER },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER),
                .driver_data = BUZZ_CONTROLLER },
        /* PS3 BD Remote Control */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_BDREMOTE),
                .driver_data = PS3REMOTE },
        /* Logitech Harmony Adapter for PS3 */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3),
                .driver_data = PS3REMOTE },
        /* SMK-Link PS3 BD Remote Control */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE),
                .driver_data = PS3REMOTE },
        /* Nyko Core Controller for PS3 */
        { HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER),
                .driver_data = SIXAXIS_CONTROLLER_USB | SINO_LITE_CONTROLLER },
        /* SMK-Link NSG-MR5U Remote Control */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR5U_REMOTE),
                .driver_data = NSG_MR5U_REMOTE_BT },
        /* SMK-Link NSG-MR7U Remote Control */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR7U_REMOTE),
                .driver_data = NSG_MR7U_REMOTE_BT },
        /* Guitar Hero Live PS3 and Wii U guitar dongles */
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE),
                .driver_data = GHL_GUITAR_PS3WIIU | GH_GUITAR_CONTROLLER },
        /* Guitar Hero PC Guitar Dongle */
        { HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_GUITAR_DONGLE),
                .driver_data = GH_GUITAR_CONTROLLER },
        /* Guitar Hero PS3 World Tour Guitar Dongle */
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3_GUITAR_DONGLE),
                .driver_data = GH_GUITAR_CONTROLLER },
        /* Guitar Hero Live PS4 guitar dongles */
        { HID_USB_DEVICE(USB_VENDOR_ID_REDOCTANE, USB_DEVICE_ID_REDOCTANE_PS4_GHLIVE_DONGLE),
                .driver_data = GHL_GUITAR_PS4 | GH_GUITAR_CONTROLLER },
        { }
};
MODULE_DEVICE_TABLE(hid, sony_devices);

static struct hid_driver sony_driver = {
        .name             = "sony",
        .id_table         = sony_devices,
        .input_mapping    = sony_mapping,
        .input_configured = sony_input_configured,
        .probe            = sony_probe,
        .remove           = sony_remove,
        .report_fixup     = sony_report_fixup,
        .raw_event        = sony_raw_event,

#ifdef CONFIG_PM
        .suspend          = sony_suspend,
        .resume                  = sony_resume,
        .reset_resume     = sony_resume,
#endif
};

static int __init sony_init(void)
{
        dbg_hid("Sony:%s\n", __func__);

        return hid_register_driver(&sony_driver);
}

static void __exit sony_exit(void)
{
        dbg_hid("Sony:%s\n", __func__);

        hid_unregister_driver(&sony_driver);
        ida_destroy(&sony_device_id_allocator);
}
module_init(sony_init);
module_exit(sony_exit);

MODULE_LICENSE("GPL");

















































    2 

    2 
    2 
    2 
    2 

    2 


































































































































































    2 



























    2 






    2 









    2 
    2 

    2 









    2 
    2 

    2 

    2 







    2 







    2 





    2 

    2 



    2 

    2 



    2 












































































































































































































































































































































































































    2 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   ALSA sequencer Ports
 *   Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl>
 *                         Jaroslav Kysela <perex@perex.cz>
 */

#include <sound/core.h>
#include <linux/slab.h>
#include <linux/module.h>
#include "seq_system.h"
#include "seq_ports.h"
#include "seq_clientmgr.h"

/*

   registration of client ports

 */


/* 

NOTE: the current implementation of the port structure as a linked list is
not optimal for clients that have many ports. For sending messages to all
subscribers of a port we first need to find the address of the port
structure, which means we have to traverse the list. A direct access table
(array) would be better, but big preallocated arrays waste memory.

Possible actions:

1) leave it this way, a client does normaly does not have more than a few
ports

2) replace the linked list of ports by a array of pointers which is
dynamicly kmalloced. When a port is added or deleted we can simply allocate
a new array, copy the corresponding pointers, and delete the old one. We
then only need a pointer to this array, and an integer that tells us how
much elements are in array.

*/

/* return pointer to port structure - port is locked if found */
struct snd_seq_client_port *snd_seq_port_use_ptr(struct snd_seq_client *client,
                                                 int num)
{
        struct snd_seq_client_port *port;

        if (client == NULL)
                return NULL;
        guard(read_lock)(&client->ports_lock);
        list_for_each_entry(port, &client->ports_list_head, list) {
                if (port->addr.port == num) {
                        if (port->closing)
                                break; /* deleting now */
                        snd_use_lock_use(&port->use_lock);
                        return port;
                }
        }
        return NULL;                /* not found */
}


/* search for the next port - port is locked if found */
struct snd_seq_client_port *snd_seq_port_query_nearest(struct snd_seq_client *client,
                                                       struct snd_seq_port_info *pinfo)
{
        int num;
        struct snd_seq_client_port *port, *found;
        bool check_inactive = (pinfo->capability & SNDRV_SEQ_PORT_CAP_INACTIVE);

        num = pinfo->addr.port;
        found = NULL;
        guard(read_lock)(&client->ports_lock);
        list_for_each_entry(port, &client->ports_list_head, list) {
                if ((port->capability & SNDRV_SEQ_PORT_CAP_INACTIVE) &&
                    !check_inactive)
                        continue; /* skip inactive ports */
                if (port->addr.port < num)
                        continue;
                if (port->addr.port == num) {
                        found = port;
                        break;
                }
                if (found == NULL || port->addr.port < found->addr.port)
                        found = port;
        }
        if (found) {
                if (found->closing)
                        found = NULL;
                else
                        snd_use_lock_use(&found->use_lock);
        }
        return found;
}


/* initialize snd_seq_port_subs_info */
static void port_subs_info_init(struct snd_seq_port_subs_info *grp)
{
        INIT_LIST_HEAD(&grp->list_head);
        grp->count = 0;
        grp->exclusive = 0;
        rwlock_init(&grp->list_lock);
        init_rwsem(&grp->list_mutex);
        grp->open = NULL;
        grp->close = NULL;
}


/* create a port, port number or a negative error code is returned
 * the caller needs to unref the port via snd_seq_port_unlock() appropriately
 */
int snd_seq_create_port(struct snd_seq_client *client, int port,
                        struct snd_seq_client_port **port_ret)
{
        struct snd_seq_client_port *new_port, *p;
        int num;
        
        *port_ret = NULL;

        /* sanity check */
        if (snd_BUG_ON(!client))
                return -EINVAL;

        if (client->num_ports >= SNDRV_SEQ_MAX_PORTS) {
                pr_warn("ALSA: seq: too many ports for client %d\n", client->number);
                return -EINVAL;
        }

        /* create a new port */
        new_port = kzalloc(sizeof(*new_port), GFP_KERNEL);
        if (!new_port)
                return -ENOMEM;        /* failure, out of memory */
        /* init port data */
        new_port->addr.client = client->number;
        new_port->addr.port = -1;
        new_port->owner = THIS_MODULE;
        snd_use_lock_init(&new_port->use_lock);
        port_subs_info_init(&new_port->c_src);
        port_subs_info_init(&new_port->c_dest);
        snd_use_lock_use(&new_port->use_lock);

        num = max(port, 0);
        guard(mutex)(&client->ports_mutex);
        guard(write_lock_irq)(&client->ports_lock);
        list_for_each_entry(p, &client->ports_list_head, list) {
                if (p->addr.port == port) {
                        kfree(new_port);
                        return -EBUSY;
                }
                if (p->addr.port > num)
                        break;
                if (port < 0) /* auto-probe mode */
                        num = p->addr.port + 1;
        }
        /* insert the new port */
        list_add_tail(&new_port->list, &p->list);
        client->num_ports++;
        new_port->addr.port = num;        /* store the port number in the port */
        sprintf(new_port->name, "port-%d", num);
        *port_ret = new_port;

        return num;
}

/* */
static int subscribe_port(struct snd_seq_client *client,
                          struct snd_seq_client_port *port,
                          struct snd_seq_port_subs_info *grp,
                          struct snd_seq_port_subscribe *info, int send_ack);
static int unsubscribe_port(struct snd_seq_client *client,
                            struct snd_seq_client_port *port,
                            struct snd_seq_port_subs_info *grp,
                            struct snd_seq_port_subscribe *info, int send_ack);


static struct snd_seq_client_port *get_client_port(struct snd_seq_addr *addr,
                                                   struct snd_seq_client **cp)
{
        struct snd_seq_client_port *p;
        *cp = snd_seq_client_use_ptr(addr->client);
        if (*cp) {
                p = snd_seq_port_use_ptr(*cp, addr->port);
                if (! p) {
                        snd_seq_client_unlock(*cp);
                        *cp = NULL;
                }
                return p;
        }
        return NULL;
}

static void delete_and_unsubscribe_port(struct snd_seq_client *client,
                                        struct snd_seq_client_port *port,
                                        struct snd_seq_subscribers *subs,
                                        bool is_src, bool ack);

static inline struct snd_seq_subscribers *
get_subscriber(struct list_head *p, bool is_src)
{
        if (is_src)
                return list_entry(p, struct snd_seq_subscribers, src_list);
        else
                return list_entry(p, struct snd_seq_subscribers, dest_list);
}

/*
 * remove all subscribers on the list
 * this is called from port_delete, for each src and dest list.
 */
static void clear_subscriber_list(struct snd_seq_client *client,
                                  struct snd_seq_client_port *port,
                                  struct snd_seq_port_subs_info *grp,
                                  int is_src)
{
        struct list_head *p, *n;

        list_for_each_safe(p, n, &grp->list_head) {
                struct snd_seq_subscribers *subs;
                struct snd_seq_client *c;
                struct snd_seq_client_port *aport;

                subs = get_subscriber(p, is_src);
                if (is_src)
                        aport = get_client_port(&subs->info.dest, &c);
                else
                        aport = get_client_port(&subs->info.sender, &c);
                delete_and_unsubscribe_port(client, port, subs, is_src, false);

                if (!aport) {
                        /* looks like the connected port is being deleted.
                         * we decrease the counter, and when both ports are deleted
                         * remove the subscriber info
                         */
                        if (atomic_dec_and_test(&subs->ref_count))
                                kfree(subs);
                        continue;
                }

                /* ok we got the connected port */
                delete_and_unsubscribe_port(c, aport, subs, !is_src, true);
                kfree(subs);
                snd_seq_port_unlock(aport);
                snd_seq_client_unlock(c);
        }
}

/* delete port data */
static int port_delete(struct snd_seq_client *client,
                       struct snd_seq_client_port *port)
{
        /* set closing flag and wait for all port access are gone */
        port->closing = 1;
        snd_use_lock_sync(&port->use_lock); 

        /* clear subscribers info */
        clear_subscriber_list(client, port, &port->c_src, true);
        clear_subscriber_list(client, port, &port->c_dest, false);

        if (port->private_free)
                port->private_free(port->private_data);

        snd_BUG_ON(port->c_src.count != 0);
        snd_BUG_ON(port->c_dest.count != 0);

        kfree(port);
        return 0;
}


/* delete a port with the given port id */
int snd_seq_delete_port(struct snd_seq_client *client, int port)
{
        struct snd_seq_client_port *found = NULL, *p;

        scoped_guard(mutex, &client->ports_mutex) {
                guard(write_lock_irq)(&client->ports_lock);
                list_for_each_entry(p, &client->ports_list_head, list) {
                        if (p->addr.port == port) {
                                /* ok found.  delete from the list at first */
                                list_del(&p->list);
                                client->num_ports--;
                                found = p;
                                break;
                        }
                }
        }
        if (found)
                return port_delete(client, found);
        else
                return -ENOENT;
}

/* delete the all ports belonging to the given client */
int snd_seq_delete_all_ports(struct snd_seq_client *client)
{
        struct list_head deleted_list;
        struct snd_seq_client_port *port, *tmp;
        
        /* move the port list to deleted_list, and
         * clear the port list in the client data.
         */
        guard(mutex)(&client->ports_mutex);
        scoped_guard(write_lock_irq, &client->ports_lock) {
                if (!list_empty(&client->ports_list_head)) {
                        list_add(&deleted_list, &client->ports_list_head);
                        list_del_init(&client->ports_list_head);
                } else {
                        INIT_LIST_HEAD(&deleted_list);
                }
                client->num_ports = 0;
        }

        /* remove each port in deleted_list */
        list_for_each_entry_safe(port, tmp, &deleted_list, list) {
                list_del(&port->list);
                snd_seq_system_client_ev_port_exit(port->addr.client, port->addr.port);
                port_delete(client, port);
        }
        return 0;
}

/* set port info fields */
int snd_seq_set_port_info(struct snd_seq_client_port * port,
                          struct snd_seq_port_info * info)
{
        if (snd_BUG_ON(!port || !info))
                return -EINVAL;

        /* set port name */
        if (info->name[0])
                strscpy(port->name, info->name, sizeof(port->name));
        
        /* set capabilities */
        port->capability = info->capability;
        
        /* get port type */
        port->type = info->type;

        /* information about supported channels/voices */
        port->midi_channels = info->midi_channels;
        port->midi_voices = info->midi_voices;
        port->synth_voices = info->synth_voices;

        /* timestamping */
        port->timestamping = (info->flags & SNDRV_SEQ_PORT_FLG_TIMESTAMP) ? 1 : 0;
        port->time_real = (info->flags & SNDRV_SEQ_PORT_FLG_TIME_REAL) ? 1 : 0;
        port->time_queue = info->time_queue;

        /* UMP direction and group */
        port->direction = info->direction;
        port->ump_group = info->ump_group;
        if (port->ump_group > SNDRV_UMP_MAX_GROUPS)
                port->ump_group = 0;

        /* fill default port direction */
        if (!port->direction) {
                if (info->capability & SNDRV_SEQ_PORT_CAP_READ)
                        port->direction |= SNDRV_SEQ_PORT_DIR_INPUT;
                if (info->capability & SNDRV_SEQ_PORT_CAP_WRITE)
                        port->direction |= SNDRV_SEQ_PORT_DIR_OUTPUT;
        }

        return 0;
}

/* get port info fields */
int snd_seq_get_port_info(struct snd_seq_client_port * port,
                          struct snd_seq_port_info * info)
{
        if (snd_BUG_ON(!port || !info))
                return -EINVAL;

        /* get port name */
        strscpy(info->name, port->name, sizeof(info->name));
        
        /* get capabilities */
        info->capability = port->capability;

        /* get port type */
        info->type = port->type;

        /* information about supported channels/voices */
        info->midi_channels = port->midi_channels;
        info->midi_voices = port->midi_voices;
        info->synth_voices = port->synth_voices;

        /* get subscriber counts */
        info->read_use = port->c_src.count;
        info->write_use = port->c_dest.count;
        
        /* timestamping */
        info->flags = 0;
        if (port->timestamping) {
                info->flags |= SNDRV_SEQ_PORT_FLG_TIMESTAMP;
                if (port->time_real)
                        info->flags |= SNDRV_SEQ_PORT_FLG_TIME_REAL;
                info->time_queue = port->time_queue;
        }

        /* UMP direction and group */
        info->direction = port->direction;
        info->ump_group = port->ump_group;

        return 0;
}



/*
 * call callback functions (if any):
 * the callbacks are invoked only when the first (for connection) or
 * the last subscription (for disconnection) is done.  Second or later
 * subscription results in increment of counter, but no callback is
 * invoked.
 * This feature is useful if these callbacks are associated with
 * initialization or termination of devices (see seq_midi.c).
 */

static int subscribe_port(struct snd_seq_client *client,
                          struct snd_seq_client_port *port,
                          struct snd_seq_port_subs_info *grp,
                          struct snd_seq_port_subscribe *info,
                          int send_ack)
{
        int err = 0;

        if (!try_module_get(port->owner))
                return -EFAULT;
        grp->count++;
        if (grp->open && grp->count == 1) {
                err = grp->open(port->private_data, info);
                if (err < 0) {
                        module_put(port->owner);
                        grp->count--;
                }
        }
        if (err >= 0 && send_ack && client->type == USER_CLIENT)
                snd_seq_client_notify_subscription(port->addr.client, port->addr.port,
                                                   info, SNDRV_SEQ_EVENT_PORT_SUBSCRIBED);

        return err;
}

static int unsubscribe_port(struct snd_seq_client *client,
                            struct snd_seq_client_port *port,
                            struct snd_seq_port_subs_info *grp,
                            struct snd_seq_port_subscribe *info,
                            int send_ack)
{
        int err = 0;

        if (! grp->count)
                return -EINVAL;
        grp->count--;
        if (grp->close && grp->count == 0)
                err = grp->close(port->private_data, info);
        if (send_ack && client->type == USER_CLIENT)
                snd_seq_client_notify_subscription(port->addr.client, port->addr.port,
                                                   info, SNDRV_SEQ_EVENT_PORT_UNSUBSCRIBED);
        module_put(port->owner);
        return err;
}



/* check if both addresses are identical */
static inline int addr_match(struct snd_seq_addr *r, struct snd_seq_addr *s)
{
        return (r->client == s->client) && (r->port == s->port);
}

/* check the two subscribe info match */
/* if flags is zero, checks only sender and destination addresses */
static int match_subs_info(struct snd_seq_port_subscribe *r,
                           struct snd_seq_port_subscribe *s)
{
        if (addr_match(&r->sender, &s->sender) &&
            addr_match(&r->dest, &s->dest)) {
                if (r->flags && r->flags == s->flags)
                        return r->queue == s->queue;
                else if (! r->flags)
                        return 1;
        }
        return 0;
}

static int check_and_subscribe_port(struct snd_seq_client *client,
                                    struct snd_seq_client_port *port,
                                    struct snd_seq_subscribers *subs,
                                    bool is_src, bool exclusive, bool ack)
{
        struct snd_seq_port_subs_info *grp;
        struct list_head *p;
        struct snd_seq_subscribers *s;
        int err;

        grp = is_src ? &port->c_src : &port->c_dest;
        guard(rwsem_write)(&grp->list_mutex);
        if (exclusive) {
                if (!list_empty(&grp->list_head))
                        return -EBUSY;
        } else {
                if (grp->exclusive)
                        return -EBUSY;
                /* check whether already exists */
                list_for_each(p, &grp->list_head) {
                        s = get_subscriber(p, is_src);
                        if (match_subs_info(&subs->info, &s->info))
                                return -EBUSY;
                }
        }

        err = subscribe_port(client, port, grp, &subs->info, ack);
        if (err < 0) {
                grp->exclusive = 0;
                return err;
        }

        /* add to list */
        guard(write_lock_irq)(&grp->list_lock);
        if (is_src)
                list_add_tail(&subs->src_list, &grp->list_head);
        else
                list_add_tail(&subs->dest_list, &grp->list_head);
        grp->exclusive = exclusive;
        atomic_inc(&subs->ref_count);

        return 0;
}

/* called with grp->list_mutex held */
static void __delete_and_unsubscribe_port(struct snd_seq_client *client,
                                          struct snd_seq_client_port *port,
                                          struct snd_seq_subscribers *subs,
                                          bool is_src, bool ack)
{
        struct snd_seq_port_subs_info *grp;
        struct list_head *list;
        bool empty;

        grp = is_src ? &port->c_src : &port->c_dest;
        list = is_src ? &subs->src_list : &subs->dest_list;
        scoped_guard(write_lock_irq, &grp->list_lock) {
                empty = list_empty(list);
                if (!empty)
                        list_del_init(list);
                grp->exclusive = 0;
        }

        if (!empty)
                unsubscribe_port(client, port, grp, &subs->info, ack);
}

static void delete_and_unsubscribe_port(struct snd_seq_client *client,
                                        struct snd_seq_client_port *port,
                                        struct snd_seq_subscribers *subs,
                                        bool is_src, bool ack)
{
        struct snd_seq_port_subs_info *grp;

        grp = is_src ? &port->c_src : &port->c_dest;
        guard(rwsem_write)(&grp->list_mutex);
        __delete_and_unsubscribe_port(client, port, subs, is_src, ack);
}

/* connect two ports */
int snd_seq_port_connect(struct snd_seq_client *connector,
                         struct snd_seq_client *src_client,
                         struct snd_seq_client_port *src_port,
                         struct snd_seq_client *dest_client,
                         struct snd_seq_client_port *dest_port,
                         struct snd_seq_port_subscribe *info)
{
        struct snd_seq_subscribers *subs;
        bool exclusive;
        int err;

        subs = kzalloc(sizeof(*subs), GFP_KERNEL);
        if (!subs)
                return -ENOMEM;

        subs->info = *info;
        atomic_set(&subs->ref_count, 0);
        INIT_LIST_HEAD(&subs->src_list);
        INIT_LIST_HEAD(&subs->dest_list);

        exclusive = !!(info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE);

        err = check_and_subscribe_port(src_client, src_port, subs, true,
                                       exclusive,
                                       connector->number != src_client->number);
        if (err < 0)
                goto error;
        err = check_and_subscribe_port(dest_client, dest_port, subs, false,
                                       exclusive,
                                       connector->number != dest_client->number);
        if (err < 0)
                goto error_dest;

        return 0;

 error_dest:
        delete_and_unsubscribe_port(src_client, src_port, subs, true,
                                    connector->number != src_client->number);
 error:
        kfree(subs);
        return err;
}

/* remove the connection */
int snd_seq_port_disconnect(struct snd_seq_client *connector,
                            struct snd_seq_client *src_client,
                            struct snd_seq_client_port *src_port,
                            struct snd_seq_client *dest_client,
                            struct snd_seq_client_port *dest_port,
                            struct snd_seq_port_subscribe *info)
{
        struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
        struct snd_seq_subscribers *subs;
        int err = -ENOENT;

        /* always start from deleting the dest port for avoiding concurrent
         * deletions
         */
        scoped_guard(rwsem_write, &dest->list_mutex) {
                /* look for the connection */
                list_for_each_entry(subs, &dest->list_head, dest_list) {
                        if (match_subs_info(info, &subs->info)) {
                                __delete_and_unsubscribe_port(dest_client, dest_port,
                                                              subs, false,
                                                              connector->number != dest_client->number);
                                err = 0;
                                break;
                        }
                }
        }
        if (err < 0)
                return err;

        delete_and_unsubscribe_port(src_client, src_port, subs, true,
                                    connector->number != src_client->number);
        kfree(subs);
        return 0;
}


/* get matched subscriber */
int snd_seq_port_get_subscription(struct snd_seq_port_subs_info *src_grp,
                                  struct snd_seq_addr *dest_addr,
                                  struct snd_seq_port_subscribe *subs)
{
        struct snd_seq_subscribers *s;
        int err = -ENOENT;

        guard(rwsem_read)(&src_grp->list_mutex);
        list_for_each_entry(s, &src_grp->list_head, src_list) {
                if (addr_match(dest_addr, &s->info.dest)) {
                        *subs = s->info;
                        err = 0;
                        break;
                }
        }
        return err;
}

/*
 * Attach a device driver that wants to receive events from the
 * sequencer.  Returns the new port number on success.
 * A driver that wants to receive the events converted to midi, will
 * use snd_seq_midisynth_register_port().
 */
/* exported */
int snd_seq_event_port_attach(int client,
                              struct snd_seq_port_callback *pcbp,
                              int cap, int type, int midi_channels,
                              int midi_voices, char *portname)
{
        struct snd_seq_port_info portinfo;
        int  ret;

        /* Set up the port */
        memset(&portinfo, 0, sizeof(portinfo));
        portinfo.addr.client = client;
        strscpy(portinfo.name, portname ? portname : "Unnamed port",
                sizeof(portinfo.name));

        portinfo.capability = cap;
        portinfo.type = type;
        portinfo.kernel = pcbp;
        portinfo.midi_channels = midi_channels;
        portinfo.midi_voices = midi_voices;

        /* Create it */
        ret = snd_seq_kernel_client_ctl(client,
                                        SNDRV_SEQ_IOCTL_CREATE_PORT,
                                        &portinfo);

        if (ret >= 0)
                ret = portinfo.addr.port;

        return ret;
}
EXPORT_SYMBOL(snd_seq_event_port_attach);

/*
 * Detach the driver from a port.
 */
/* exported */
int snd_seq_event_port_detach(int client, int port)
{
        struct snd_seq_port_info portinfo;
        int  err;

        memset(&portinfo, 0, sizeof(portinfo));
        portinfo.addr.client = client;
        portinfo.addr.port   = port;
        err = snd_seq_kernel_client_ctl(client,
                                        SNDRV_SEQ_IOCTL_DELETE_PORT,
                                        &portinfo);

        return err;
}
EXPORT_SYMBOL(snd_seq_event_port_detach);


















































    3 
    3 
    3 
    3 


    3 

    3 





















  251 

  232 
  232 








  232 
  230 




  230 


  232 

  252 






























































































































   26 
   26 
   26 


































































































    3 


    3 


    3 






































  232 









  242 
  232 



  243 





































































   16 





















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/vmalloc.h>
#include <linux/reboot.h>

#define CREATE_TRACE_POINTS
#include <trace/events/notifier.h>

/*
 *        Notifier list for kernel code which wants to be called
 *        at shutdown. This is used to stop any idling DMA operations
 *        and the like.
 */
BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);

/*
 *        Notifier chain core routines.  The exported routines below
 *        are layered on top of these, with appropriate locking added.
 */

static int notifier_chain_register(struct notifier_block **nl,
                                   struct notifier_block *n,
                                   bool unique_priority)
{
        while ((*nl) != NULL) {
                if (unlikely((*nl) == n)) {
                        WARN(1, "notifier callback %ps already registered",
                             n->notifier_call);
                        return -EEXIST;
                }
                if (n->priority > (*nl)->priority)
                        break;
                if (n->priority == (*nl)->priority && unique_priority)
                        return -EBUSY;
                nl = &((*nl)->next);
        }
        n->next = *nl;
        rcu_assign_pointer(*nl, n);
        trace_notifier_register((void *)n->notifier_call);
        return 0;
}

static int notifier_chain_unregister(struct notifier_block **nl,
                struct notifier_block *n)
{
        while ((*nl) != NULL) {
                if ((*nl) == n) {
                        rcu_assign_pointer(*nl, n->next);
                        trace_notifier_unregister((void *)n->notifier_call);
                        return 0;
                }
                nl = &((*nl)->next);
        }
        return -ENOENT;
}

/**
 * notifier_call_chain - Informs the registered notifiers about an event.
 *        @nl:                Pointer to head of the blocking notifier chain
 *        @val:                Value passed unmodified to notifier function
 *        @v:                Pointer passed unmodified to notifier function
 *        @nr_to_call:        Number of notifier functions to be called. Don't care
 *                        value of this parameter is -1.
 *        @nr_calls:        Records the number of notifications sent. Don't care
 *                        value of this field is NULL.
 *        Return:                notifier_call_chain returns the value returned by the
 *                        last notifier function called.
 */
static int notifier_call_chain(struct notifier_block **nl,
                               unsigned long val, void *v,
                               int nr_to_call, int *nr_calls)
{
        int ret = NOTIFY_DONE;
        struct notifier_block *nb, *next_nb;

        nb = rcu_dereference_raw(*nl);

        while (nb && nr_to_call) {
                next_nb = rcu_dereference_raw(nb->next);

#ifdef CONFIG_DEBUG_NOTIFIERS
                if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
                        WARN(1, "Invalid notifier called!");
                        nb = next_nb;
                        continue;
                }
#endif
                trace_notifier_run((void *)nb->notifier_call);
                ret = nb->notifier_call(nb, val, v);

                if (nr_calls)
                        (*nr_calls)++;

                if (ret & NOTIFY_STOP_MASK)
                        break;
                nb = next_nb;
                nr_to_call--;
        }
        return ret;
}
NOKPROBE_SYMBOL(notifier_call_chain);

/**
 * notifier_call_chain_robust - Inform the registered notifiers about an event
 *                              and rollback on error.
 * @nl:                Pointer to head of the blocking notifier chain
 * @val_up:        Value passed unmodified to the notifier function
 * @val_down:        Value passed unmodified to the notifier function when recovering
 *              from an error on @val_up
 * @v:                Pointer passed unmodified to the notifier function
 *
 * NOTE:        It is important the @nl chain doesn't change between the two
 *                invocations of notifier_call_chain() such that we visit the
 *                exact same notifier callbacks; this rules out any RCU usage.
 *
 * Return:        the return value of the @val_up call.
 */
static int notifier_call_chain_robust(struct notifier_block **nl,
                                     unsigned long val_up, unsigned long val_down,
                                     void *v)
{
        int ret, nr = 0;

        ret = notifier_call_chain(nl, val_up, v, -1, &nr);
        if (ret & NOTIFY_STOP_MASK)
                notifier_call_chain(nl, val_down, v, nr-1, NULL);

        return ret;
}

/*
 *        Atomic notifier chain routines.  Registration and unregistration
 *        use a spinlock, and call_chain is synchronized by RCU (no locks).
 */

/**
 *        atomic_notifier_chain_register - Add notifier to an atomic notifier chain
 *        @nh: Pointer to head of the atomic notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to an atomic notifier chain.
 *
 *        Returns 0 on success, %-EEXIST on error.
 */
int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
                struct notifier_block *n)
{
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&nh->lock, flags);
        ret = notifier_chain_register(&nh->head, n, false);
        spin_unlock_irqrestore(&nh->lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);

/**
 *        atomic_notifier_chain_register_unique_prio - Add notifier to an atomic notifier chain
 *        @nh: Pointer to head of the atomic notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to an atomic notifier chain if there is no other
 *        notifier registered using the same priority.
 *
 *        Returns 0 on success, %-EEXIST or %-EBUSY on error.
 */
int atomic_notifier_chain_register_unique_prio(struct atomic_notifier_head *nh,
                                               struct notifier_block *n)
{
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&nh->lock, flags);
        ret = notifier_chain_register(&nh->head, n, true);
        spin_unlock_irqrestore(&nh->lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_register_unique_prio);

/**
 *        atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
 *        @nh: Pointer to head of the atomic notifier chain
 *        @n: Entry to remove from notifier chain
 *
 *        Removes a notifier from an atomic notifier chain.
 *
 *        Returns zero on success or %-ENOENT on failure.
 */
int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
                struct notifier_block *n)
{
        unsigned long flags;
        int ret;

        spin_lock_irqsave(&nh->lock, flags);
        ret = notifier_chain_unregister(&nh->head, n);
        spin_unlock_irqrestore(&nh->lock, flags);
        synchronize_rcu();
        return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);

/**
 *        atomic_notifier_call_chain - Call functions in an atomic notifier chain
 *        @nh: Pointer to head of the atomic notifier chain
 *        @val: Value passed unmodified to notifier function
 *        @v: Pointer passed unmodified to notifier function
 *
 *        Calls each function in a notifier chain in turn.  The functions
 *        run in an atomic context, so they must not block.
 *        This routine uses RCU to synchronize with changes to the chain.
 *
 *        If the return value of the notifier can be and'ed
 *        with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
 *        will return immediately, with the return value of
 *        the notifier function which halted execution.
 *        Otherwise the return value is the return value
 *        of the last notifier function called.
 */
int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
                               unsigned long val, void *v)
{
        int ret;

        rcu_read_lock();
        ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
NOKPROBE_SYMBOL(atomic_notifier_call_chain);

/**
 *        atomic_notifier_call_chain_is_empty - Check whether notifier chain is empty
 *        @nh: Pointer to head of the atomic notifier chain
 *
 *        Checks whether notifier chain is empty.
 *
 *        Returns true is notifier chain is empty, false otherwise.
 */
bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh)
{
        return !rcu_access_pointer(nh->head);
}

/*
 *        Blocking notifier chain routines.  All access to the chain is
 *        synchronized by an rwsem.
 */

static int __blocking_notifier_chain_register(struct blocking_notifier_head *nh,
                                              struct notifier_block *n,
                                              bool unique_priority)
{
        int ret;

        /*
         * This code gets used during boot-up, when task switching is
         * not yet working and interrupts must remain disabled.  At
         * such times we must not call down_write().
         */
        if (unlikely(system_state == SYSTEM_BOOTING))
                return notifier_chain_register(&nh->head, n, unique_priority);

        down_write(&nh->rwsem);
        ret = notifier_chain_register(&nh->head, n, unique_priority);
        up_write(&nh->rwsem);
        return ret;
}

/**
 *        blocking_notifier_chain_register - Add notifier to a blocking notifier chain
 *        @nh: Pointer to head of the blocking notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to a blocking notifier chain.
 *        Must be called in process context.
 *
 *        Returns 0 on success, %-EEXIST on error.
 */
int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
                struct notifier_block *n)
{
        return __blocking_notifier_chain_register(nh, n, false);
}
EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);

/**
 *        blocking_notifier_chain_register_unique_prio - Add notifier to a blocking notifier chain
 *        @nh: Pointer to head of the blocking notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to an blocking notifier chain if there is no other
 *        notifier registered using the same priority.
 *
 *        Returns 0 on success, %-EEXIST or %-EBUSY on error.
 */
int blocking_notifier_chain_register_unique_prio(struct blocking_notifier_head *nh,
                                                 struct notifier_block *n)
{
        return __blocking_notifier_chain_register(nh, n, true);
}
EXPORT_SYMBOL_GPL(blocking_notifier_chain_register_unique_prio);

/**
 *        blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
 *        @nh: Pointer to head of the blocking notifier chain
 *        @n: Entry to remove from notifier chain
 *
 *        Removes a notifier from a blocking notifier chain.
 *        Must be called from process context.
 *
 *        Returns zero on success or %-ENOENT on failure.
 */
int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
                struct notifier_block *n)
{
        int ret;

        /*
         * This code gets used during boot-up, when task switching is
         * not yet working and interrupts must remain disabled.  At
         * such times we must not call down_write().
         */
        if (unlikely(system_state == SYSTEM_BOOTING))
                return notifier_chain_unregister(&nh->head, n);

        down_write(&nh->rwsem);
        ret = notifier_chain_unregister(&nh->head, n);
        up_write(&nh->rwsem);
        return ret;
}
EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);

int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
                unsigned long val_up, unsigned long val_down, void *v)
{
        int ret = NOTIFY_DONE;

        /*
         * We check the head outside the lock, but if this access is
         * racy then it does not matter what the result of the test
         * is, we re-check the list after having taken the lock anyway:
         */
        if (rcu_access_pointer(nh->head)) {
                down_read(&nh->rwsem);
                ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
                up_read(&nh->rwsem);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_robust);

/**
 *        blocking_notifier_call_chain - Call functions in a blocking notifier chain
 *        @nh: Pointer to head of the blocking notifier chain
 *        @val: Value passed unmodified to notifier function
 *        @v: Pointer passed unmodified to notifier function
 *
 *        Calls each function in a notifier chain in turn.  The functions
 *        run in a process context, so they are allowed to block.
 *
 *        If the return value of the notifier can be and'ed
 *        with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
 *        will return immediately, with the return value of
 *        the notifier function which halted execution.
 *        Otherwise the return value is the return value
 *        of the last notifier function called.
 */
int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
                unsigned long val, void *v)
{
        int ret = NOTIFY_DONE;

        /*
         * We check the head outside the lock, but if this access is
         * racy then it does not matter what the result of the test
         * is, we re-check the list after having taken the lock anyway:
         */
        if (rcu_access_pointer(nh->head)) {
                down_read(&nh->rwsem);
                ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
                up_read(&nh->rwsem);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);

/*
 *        Raw notifier chain routines.  There is no protection;
 *        the caller must provide it.  Use at your own risk!
 */

/**
 *        raw_notifier_chain_register - Add notifier to a raw notifier chain
 *        @nh: Pointer to head of the raw notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to a raw notifier chain.
 *        All locking must be provided by the caller.
 *
 *        Returns 0 on success, %-EEXIST on error.
 */
int raw_notifier_chain_register(struct raw_notifier_head *nh,
                struct notifier_block *n)
{
        return notifier_chain_register(&nh->head, n, false);
}
EXPORT_SYMBOL_GPL(raw_notifier_chain_register);

/**
 *        raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
 *        @nh: Pointer to head of the raw notifier chain
 *        @n: Entry to remove from notifier chain
 *
 *        Removes a notifier from a raw notifier chain.
 *        All locking must be provided by the caller.
 *
 *        Returns zero on success or %-ENOENT on failure.
 */
int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
                struct notifier_block *n)
{
        return notifier_chain_unregister(&nh->head, n);
}
EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);

int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
                unsigned long val_up, unsigned long val_down, void *v)
{
        return notifier_call_chain_robust(&nh->head, val_up, val_down, v);
}
EXPORT_SYMBOL_GPL(raw_notifier_call_chain_robust);

/**
 *        raw_notifier_call_chain - Call functions in a raw notifier chain
 *        @nh: Pointer to head of the raw notifier chain
 *        @val: Value passed unmodified to notifier function
 *        @v: Pointer passed unmodified to notifier function
 *
 *        Calls each function in a notifier chain in turn.  The functions
 *        run in an undefined context.
 *        All locking must be provided by the caller.
 *
 *        If the return value of the notifier can be and'ed
 *        with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
 *        will return immediately, with the return value of
 *        the notifier function which halted execution.
 *        Otherwise the return value is the return value
 *        of the last notifier function called.
 */
int raw_notifier_call_chain(struct raw_notifier_head *nh,
                unsigned long val, void *v)
{
        return notifier_call_chain(&nh->head, val, v, -1, NULL);
}
EXPORT_SYMBOL_GPL(raw_notifier_call_chain);

/*
 *        SRCU notifier chain routines.    Registration and unregistration
 *        use a mutex, and call_chain is synchronized by SRCU (no locks).
 */

/**
 *        srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
 *        @nh: Pointer to head of the SRCU notifier chain
 *        @n: New entry in notifier chain
 *
 *        Adds a notifier to an SRCU notifier chain.
 *        Must be called in process context.
 *
 *        Returns 0 on success, %-EEXIST on error.
 */
int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
                struct notifier_block *n)
{
        int ret;

        /*
         * This code gets used during boot-up, when task switching is
         * not yet working and interrupts must remain disabled.  At
         * such times we must not call mutex_lock().
         */
        if (unlikely(system_state == SYSTEM_BOOTING))
                return notifier_chain_register(&nh->head, n, false);

        mutex_lock(&nh->mutex);
        ret = notifier_chain_register(&nh->head, n, false);
        mutex_unlock(&nh->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);

/**
 *        srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
 *        @nh: Pointer to head of the SRCU notifier chain
 *        @n: Entry to remove from notifier chain
 *
 *        Removes a notifier from an SRCU notifier chain.
 *        Must be called from process context.
 *
 *        Returns zero on success or %-ENOENT on failure.
 */
int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
                struct notifier_block *n)
{
        int ret;

        /*
         * This code gets used during boot-up, when task switching is
         * not yet working and interrupts must remain disabled.  At
         * such times we must not call mutex_lock().
         */
        if (unlikely(system_state == SYSTEM_BOOTING))
                return notifier_chain_unregister(&nh->head, n);

        mutex_lock(&nh->mutex);
        ret = notifier_chain_unregister(&nh->head, n);
        mutex_unlock(&nh->mutex);
        synchronize_srcu(&nh->srcu);
        return ret;
}
EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);

/**
 *        srcu_notifier_call_chain - Call functions in an SRCU notifier chain
 *        @nh: Pointer to head of the SRCU notifier chain
 *        @val: Value passed unmodified to notifier function
 *        @v: Pointer passed unmodified to notifier function
 *
 *        Calls each function in a notifier chain in turn.  The functions
 *        run in a process context, so they are allowed to block.
 *
 *        If the return value of the notifier can be and'ed
 *        with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
 *        will return immediately, with the return value of
 *        the notifier function which halted execution.
 *        Otherwise the return value is the return value
 *        of the last notifier function called.
 */
int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
                unsigned long val, void *v)
{
        int ret;
        int idx;

        idx = srcu_read_lock(&nh->srcu);
        ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
        srcu_read_unlock(&nh->srcu, idx);
        return ret;
}
EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);

/**
 *        srcu_init_notifier_head - Initialize an SRCU notifier head
 *        @nh: Pointer to head of the srcu notifier chain
 *
 *        Unlike other sorts of notifier heads, SRCU notifier heads require
 *        dynamic initialization.  Be sure to call this routine before
 *        calling any of the other SRCU notifier routines for this head.
 *
 *        If an SRCU notifier head is deallocated, it must first be cleaned
 *        up by calling srcu_cleanup_notifier_head().  Otherwise the head's
 *        per-cpu data (used by the SRCU mechanism) will leak.
 */
void srcu_init_notifier_head(struct srcu_notifier_head *nh)
{
        mutex_init(&nh->mutex);
        if (init_srcu_struct(&nh->srcu) < 0)
                BUG();
        nh->head = NULL;
}
EXPORT_SYMBOL_GPL(srcu_init_notifier_head);

static ATOMIC_NOTIFIER_HEAD(die_chain);

int notrace notify_die(enum die_val val, const char *str,
               struct pt_regs *regs, long err, int trap, int sig)
{
        struct die_args args = {
                .regs        = regs,
                .str        = str,
                .err        = err,
                .trapnr        = trap,
                .signr        = sig,

        };
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                           "notify_die called but RCU thinks we're quiescent");
        return atomic_notifier_call_chain(&die_chain, val, &args);
}
NOKPROBE_SYMBOL(notify_die);

int register_die_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);

int unregister_die_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_unregister(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_die_notifier);












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 






    1 




    1 










    1 
    1 












































































    1 






    1 














































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Xbox gamepad driver
 *
 * Copyright (c) 2002 Marko Friedemann <mfr@bmx-chemnitz.de>
 *               2004 Oliver Schwartz <Oliver.Schwartz@gmx.de>,
 *                    Steven Toth <steve@toth.demon.co.uk>,
 *                    Franz Lehner <franz@caos.at>,
 *                    Ivan Hawkes <blackhawk@ivanhawkes.com>
 *               2005 Dominic Cerquetti <binary1230@yahoo.com>
 *               2006 Adam Buchbinder <adam.buchbinder@gmail.com>
 *               2007 Jan Kratochvil <honza@jikos.cz>
 *               2010 Christoph Fritz <chf.fritz@googlemail.com>
 *
 * This driver is based on:
 *  - information from     http://euc.jp/periphs/xbox-controller.ja.html
 *  - the iForce driver    drivers/char/joystick/iforce.c
 *  - the skeleton-driver  drivers/usb/usb-skeleton.c
 *  - Xbox 360 information http://www.free60.org/wiki/Gamepad
 *  - Xbox One information https://github.com/quantus/xbox-one-controller-protocol
 *
 * Thanks to:
 *  - ITO Takayuki for providing essential xpad information on his website
 *  - Vojtech Pavlik     - iforce driver / input subsystem
 *  - Greg Kroah-Hartman - usb-skeleton driver
 *  - Xbox Linux project - extra USB IDs
 *  - Pekka Pöyry (quantus) - Xbox One controller reverse-engineering
 *
 * TODO:
 *  - fine tune axes (especially trigger axes)
 *  - fix "analog" buttons (reported as digital now)
 *  - get rumble working
 *  - need USB IDs for other dance pads
 *
 * History:
 *
 * 2002-06-27 - 0.0.1 : first version, just said "XBOX HID controller"
 *
 * 2002-07-02 - 0.0.2 : basic working version
 *  - all axes and 9 of the 10 buttons work (german InterAct device)
 *  - the black button does not work
 *
 * 2002-07-14 - 0.0.3 : rework by Vojtech Pavlik
 *  - indentation fixes
 *  - usb + input init sequence fixes
 *
 * 2002-07-16 - 0.0.4 : minor changes, merge with Vojtech's v0.0.3
 *  - verified the lack of HID and report descriptors
 *  - verified that ALL buttons WORK
 *  - fixed d-pad to axes mapping
 *
 * 2002-07-17 - 0.0.5 : simplified d-pad handling
 *
 * 2004-10-02 - 0.0.6 : DDR pad support
 *  - borrowed from the Xbox Linux kernel
 *  - USB id's for commonly used dance pads are present
 *  - dance pads will map D-PAD to buttons, not axes
 *  - pass the module paramater 'dpad_to_buttons' to force
 *    the D-PAD to map to buttons if your pad is not detected
 *
 * Later changes can be tracked in SCM.
 */

#include <linux/bits.h>
#include <linux/kernel.h>
#include <linux/input.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/usb/input.h>
#include <linux/usb/quirks.h>

#define XPAD_PKT_LEN 64

/*
 * xbox d-pads should map to buttons, as is required for DDR pads
 * but we map them to axes when possible to simplify things
 */
#define MAP_DPAD_TO_BUTTONS                (1 << 0)
#define MAP_TRIGGERS_TO_BUTTONS                (1 << 1)
#define MAP_STICKS_TO_NULL                (1 << 2)
#define MAP_SELECT_BUTTON                (1 << 3)
#define MAP_PADDLES                        (1 << 4)
#define MAP_PROFILE_BUTTON                (1 << 5)

#define DANCEPAD_MAP_CONFIG        (MAP_DPAD_TO_BUTTONS |                        \
                                MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL)

#define XTYPE_XBOX        0
#define XTYPE_XBOX360     1
#define XTYPE_XBOX360W    2
#define XTYPE_XBOXONE     3
#define XTYPE_UNKNOWN     4

/* Send power-off packet to xpad360w after holding the mode button for this many
 * seconds
 */
#define XPAD360W_POWEROFF_TIMEOUT 5

#define PKT_XB              0
#define PKT_XBE1            1
#define PKT_XBE2_FW_OLD     2
#define PKT_XBE2_FW_5_EARLY 3
#define PKT_XBE2_FW_5_11    4

static bool dpad_to_buttons;
module_param(dpad_to_buttons, bool, S_IRUGO);
MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads");

static bool triggers_to_buttons;
module_param(triggers_to_buttons, bool, S_IRUGO);
MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads");

static bool sticks_to_null;
module_param(sticks_to_null, bool, S_IRUGO);
MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads");

static bool auto_poweroff = true;
module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend");

static const struct xpad_device {
        u16 idVendor;
        u16 idProduct;
        char *name;
        u8 mapping;
        u8 xtype;
} xpad_device[] = {
        /* Please keep this list sorted by vendor and product ID. */
        { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 },
        { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 },
        { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 },
        { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 },                        /* wired */
        { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 },                        /* wireless */
        { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE },
        { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE },
        { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE },                /* v2 */
        { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE },
        { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX },
        { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX },
        { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
        { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX },
        { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
        { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX },
        { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX },
        { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX },
        { 0x045e, 0x0288, "Microsoft Xbox Controller S v2", 0, XTYPE_XBOX },
        { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
        { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
        { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 },
        { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
        { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE },
        { 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE },
        { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
        { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE },
        { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE },
        { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE },
        { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 },
        { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 },
        { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 },
        { 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 },
        { 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX },
        { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX },
        { 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX },
        { 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 },
        { 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 },
        { 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX },
        { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX },
        { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX },
        { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX },
        { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX },
        { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX },
        { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX },
        { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX },
        { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX },
        { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX },
        { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX },
        { 0x0738, 0x4522, "Mad Catz LumiCON", 0, XTYPE_XBOX },
        { 0x0738, 0x4526, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX },
        { 0x0738, 0x4530, "Mad Catz Universal MC2 Racing Wheel and Pedals", 0, XTYPE_XBOX },
        { 0x0738, 0x4536, "Mad Catz MicroCON", 0, XTYPE_XBOX },
        { 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX },
        { 0x0738, 0x4586, "Mad Catz MicroCon Wireless Controller", 0, XTYPE_XBOX },
        { 0x0738, 0x4588, "Mad Catz Blaster", 0, XTYPE_XBOX },
        { 0x0738, 0x45ff, "Mad Catz Beat Pad (w/ Handle)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4718, "Mad Catz Street Fighter IV FightStick SE", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4726, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0738, 0x4736, "Mad Catz MicroCon Gamepad", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 },
        { 0x0738, 0x4743, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x0738, 0x4758, "Mad Catz Arcade Game Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0738, 0x4a01, "Mad Catz FightStick TE 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 },
        { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 },
        { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 },
        { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 },
        { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 },
        { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 },
        { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE },
        { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX },
        { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX },
        { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
        { 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX },
        { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX },
        { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
        { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
        { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX },
        { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX },
        { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX },
        { 0x0e4c, 0x3510, "Radica Gamester", 0, XTYPE_XBOX },
        { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX },
        { 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX },
        { 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX },
        { 0x0e6f, 0x0008, "After Glow Pro Controller", 0, XTYPE_XBOX },
        { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0131, "PDP EA Sports Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0133, "Xbox 360 Wired Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x013a, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0164, "PDP Battlefield One", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0165, "PDP Titanfall 2", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE },
        { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0x0501, "PDP Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x0e6f, 0xf900, "PDP Afterglow AX.1", 0, XTYPE_XBOX360 },
        { 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX },
        { 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX },
        { 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 },
        { 0x0f0d, 0x000c, "Hori PadEX Turbo", 0, XTYPE_XBOX360 },
        { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f0d, 0x001b, "Hori Real Arcade Pro VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f0d, 0x0063, "Hori Real Arcade Pro Hayabusa (USA) Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE },
        { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
        { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
        { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
        { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
        { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
        { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
        { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 },
        { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 },
        { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 },
        { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 },
        { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 },
        { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
        { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 },
        { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
        { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE },
        { 0x1532, 0x0a29, "Razer Wolverine V2", 0, XTYPE_XBOXONE },
        { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 },
        { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
        { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 },
        { 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 },
        { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
        { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
        { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
        { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 },
        { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 },
        { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
        { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf025, "Mad Catz Call Of Duty", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf027, "Mad Catz FPS Pro", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf030, "Mad Catz Xbox 360 MC2 MicroCon Racing Wheel", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf036, "Mad Catz MicroCon GamePad Pro", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf039, "Mad Catz MvC2 TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf03d, "Street Fighter IV Arcade Stick TE - Chun Li", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf03e, "Mad Catz MLG FightStick TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf03f, "Mad Catz FightStick SoulCaliber", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf042, "Mad Catz FightStick TES+", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf080, "Mad Catz FightStick TE2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf501, "HoriPad EX2 Turbo", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf502, "Hori Real Arcade Pro.VX SA", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf503, "Hori Fighting Stick VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf504, "Hori Real Arcade Pro. EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf505, "Hori Fighting Stick EX2B", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xf506, "Hori Real Arcade Pro.EX Premium VLX", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf904, "PDP Versus Fighting Pad", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xf906, "MortalKombat FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 },
        { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE },
        { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE },
        { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x530a, "Xbox 360 Pro EX Controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
        { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 },
        { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE },
        { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE },
        { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE },
        { 0x2dc8, 0x3106, "8BitDo Pro 2 Wired Controller", 0, XTYPE_XBOX360 },
        { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
        { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
        { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
        { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
        { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
        { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
};

/* buttons shared with xbox and xbox360 */
static const signed short xpad_common_btn[] = {
        BTN_A, BTN_B, BTN_X, BTN_Y,                        /* "analog" buttons */
        BTN_START, BTN_SELECT, BTN_THUMBL, BTN_THUMBR,        /* start/back/sticks */
        -1                                                /* terminating entry */
};

/* original xbox controllers only */
static const signed short xpad_btn[] = {
        BTN_C, BTN_Z,                /* "analog" buttons */
        -1                        /* terminating entry */
};

/* used when dpad is mapped to buttons */
static const signed short xpad_btn_pad[] = {
        BTN_TRIGGER_HAPPY1, BTN_TRIGGER_HAPPY2,                /* d-pad left, right */
        BTN_TRIGGER_HAPPY3, BTN_TRIGGER_HAPPY4,                /* d-pad up, down */
        -1                                /* terminating entry */
};

/* used when triggers are mapped to buttons */
static const signed short xpad_btn_triggers[] = {
        BTN_TL2, BTN_TR2,                /* triggers left/right */
        -1
};

static const signed short xpad360_btn[] = {  /* buttons for x360 controller */
        BTN_TL, BTN_TR,                /* Button LB/RB */
        BTN_MODE,                /* The big X button */
        -1
};

static const signed short xpad_abs[] = {
        ABS_X, ABS_Y,                /* left stick */
        ABS_RX, ABS_RY,                /* right stick */
        -1                        /* terminating entry */
};

/* used when dpad is mapped to axes */
static const signed short xpad_abs_pad[] = {
        ABS_HAT0X, ABS_HAT0Y,        /* d-pad axes */
        -1                        /* terminating entry */
};

/* used when triggers are mapped to axes */
static const signed short xpad_abs_triggers[] = {
        ABS_Z, ABS_RZ,                /* triggers left/right */
        -1
};

/* used when the controller has extra paddle buttons */
static const signed short xpad_btn_paddles[] = {
        BTN_TRIGGER_HAPPY5, BTN_TRIGGER_HAPPY6, /* paddle upper right, lower right */
        BTN_TRIGGER_HAPPY7, BTN_TRIGGER_HAPPY8, /* paddle upper left, lower left */
        -1                                                /* terminating entry */
};

/*
 * Xbox 360 has a vendor-specific class, so we cannot match it with only
 * USB_INTERFACE_INFO (also specifically refused by USB subsystem), so we
 * match against vendor id as well. Wired Xbox 360 devices have protocol 1,
 * wireless controllers have protocol 129.
 */
#define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \
        .idVendor = (vend), \
        .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \
        .bInterfaceSubClass = 93, \
        .bInterfaceProtocol = (pr)
#define XPAD_XBOX360_VENDOR(vend) \
        { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \
        { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) }

/* The Xbox One controller uses subclass 71 and protocol 208. */
#define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \
        .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \
        .idVendor = (vend), \
        .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \
        .bInterfaceSubClass = 71, \
        .bInterfaceProtocol = (pr)
#define XPAD_XBOXONE_VENDOR(vend) \
        { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) }

static const struct usb_device_id xpad_table[] = {
        /*
         * Please keep this list sorted by vendor ID. Note that there are 2
         * macros - XPAD_XBOX360_VENDOR and XPAD_XBOXONE_VENDOR.
         */
        { USB_INTERFACE_INFO('X', 'B', 0) },        /* Xbox USB-IF not-approved class */
        XPAD_XBOX360_VENDOR(0x0079),                /* GPD Win 2 controller */
        XPAD_XBOX360_VENDOR(0x03eb),                /* Wooting Keyboards (Legacy) */
        XPAD_XBOX360_VENDOR(0x03f0),                /* HP HyperX Xbox 360 controllers */
        XPAD_XBOXONE_VENDOR(0x03f0),                /* HP HyperX Xbox One controllers */
        XPAD_XBOX360_VENDOR(0x044f),                /* Thrustmaster Xbox 360 controllers */
        XPAD_XBOX360_VENDOR(0x045e),                /* Microsoft Xbox 360 controllers */
        XPAD_XBOXONE_VENDOR(0x045e),                /* Microsoft Xbox One controllers */
        XPAD_XBOX360_VENDOR(0x046d),                /* Logitech Xbox 360-style controllers */
        XPAD_XBOX360_VENDOR(0x056e),                /* Elecom JC-U3613M */
        XPAD_XBOX360_VENDOR(0x06a3),                /* Saitek P3600 */
        XPAD_XBOX360_VENDOR(0x0738),                /* Mad Catz Xbox 360 controllers */
        { USB_DEVICE(0x0738, 0x4540) },                /* Mad Catz Beat Pad */
        XPAD_XBOXONE_VENDOR(0x0738),                /* Mad Catz FightStick TE 2 */
        XPAD_XBOX360_VENDOR(0x07ff),                /* Mad Catz Gamepad */
        XPAD_XBOXONE_VENDOR(0x0b05),                /* ASUS controllers */
        XPAD_XBOX360_VENDOR(0x0c12),                /* Zeroplus X-Box 360 controllers */
        XPAD_XBOX360_VENDOR(0x0e6f),                /* 0x0e6f Xbox 360 controllers */
        XPAD_XBOXONE_VENDOR(0x0e6f),                /* 0x0e6f Xbox One controllers */
        XPAD_XBOX360_VENDOR(0x0f0d),                /* Hori controllers */
        XPAD_XBOXONE_VENDOR(0x0f0d),                /* Hori controllers */
        XPAD_XBOX360_VENDOR(0x1038),                /* SteelSeries controllers */
        XPAD_XBOXONE_VENDOR(0x10f5),                /* Turtle Beach Controllers */
        XPAD_XBOX360_VENDOR(0x11c9),                /* Nacon GC100XF */
        XPAD_XBOX360_VENDOR(0x11ff),                /* PXN V900 */
        XPAD_XBOX360_VENDOR(0x1209),                /* Ardwiino Controllers */
        XPAD_XBOX360_VENDOR(0x12ab),                /* Xbox 360 dance pads */
        XPAD_XBOX360_VENDOR(0x1430),                /* RedOctane Xbox 360 controllers */
        XPAD_XBOX360_VENDOR(0x146b),                /* Bigben Interactive controllers */
        XPAD_XBOX360_VENDOR(0x1532),                /* Razer Sabertooth */
        XPAD_XBOXONE_VENDOR(0x1532),                /* Razer Wildcat */
        XPAD_XBOX360_VENDOR(0x15e4),                /* Numark Xbox 360 controllers */
        XPAD_XBOX360_VENDOR(0x162e),                /* Joytech Xbox 360 controllers */
        XPAD_XBOX360_VENDOR(0x1689),                /* Razer Onza */
        XPAD_XBOX360_VENDOR(0x17ef),                /* Lenovo */
        XPAD_XBOX360_VENDOR(0x1949),                /* Amazon controllers */
        XPAD_XBOX360_VENDOR(0x1bad),                /* Harmonix Rock Band guitar and drums */
        XPAD_XBOX360_VENDOR(0x20d6),                /* PowerA controllers */
        XPAD_XBOXONE_VENDOR(0x20d6),                /* PowerA controllers */
        XPAD_XBOX360_VENDOR(0x24c6),                /* PowerA controllers */
        XPAD_XBOXONE_VENDOR(0x24c6),                /* PowerA controllers */
        XPAD_XBOX360_VENDOR(0x2563),                /* OneXPlayer Gamepad */
        XPAD_XBOX360_VENDOR(0x260d),                /* Dareu H101 */
       XPAD_XBOXONE_VENDOR(0x294b),            /* Snakebyte */
        XPAD_XBOX360_VENDOR(0x2c22),                /* Qanba Controllers */
        XPAD_XBOX360_VENDOR(0x2dc8),            /* 8BitDo Pro 2 Wired Controller */
        XPAD_XBOXONE_VENDOR(0x2dc8),                /* 8BitDo Pro 2 Wired Controller for Xbox */
        XPAD_XBOXONE_VENDOR(0x2e24),                /* Hyperkin Duke Xbox One pad */
        XPAD_XBOX360_VENDOR(0x2f24),                /* GameSir controllers */
        XPAD_XBOX360_VENDOR(0x31e3),                /* Wooting Keyboards */
        XPAD_XBOX360_VENDOR(0x3285),                /* Nacon GC-100 */
        XPAD_XBOX360_VENDOR(0x3537),                /* GameSir Controllers */
        XPAD_XBOXONE_VENDOR(0x3537),                /* GameSir Controllers */
        { }
};

MODULE_DEVICE_TABLE(usb, xpad_table);

struct xboxone_init_packet {
        u16 idVendor;
        u16 idProduct;
        const u8 *data;
        u8 len;
};

#define XBOXONE_INIT_PKT(_vid, _pid, _data)                \
        {                                                \
                .idVendor        = (_vid),                \
                .idProduct        = (_pid),                \
                .data                = (_data),                \
                .len                = ARRAY_SIZE(_data),        \
        }

/*
 * starting with xbox one, the game input protocol is used
 * magic numbers are taken from
 * - https://github.com/xpadneo/gip-dissector/blob/main/src/gip-dissector.lua
 * - https://github.com/medusalix/xone/blob/master/bus/protocol.c
 */
#define GIP_CMD_ACK      0x01
#define GIP_CMD_IDENTIFY 0x04
#define GIP_CMD_POWER    0x05
#define GIP_CMD_AUTHENTICATE 0x06
#define GIP_CMD_VIRTUAL_KEY  0x07
#define GIP_CMD_RUMBLE   0x09
#define GIP_CMD_LED      0x0a
#define GIP_CMD_FIRMWARE 0x0c
#define GIP_CMD_INPUT    0x20

#define GIP_SEQ0 0x00

#define GIP_OPT_ACK      0x10
#define GIP_OPT_INTERNAL 0x20

/*
 * length of the command payload encoded with
 * https://en.wikipedia.org/wiki/LEB128
 * which is a no-op for N < 128
 */
#define GIP_PL_LEN(N) (N)

/*
 * payload specific defines
 */
#define GIP_PWR_ON 0x00
#define GIP_LED_ON 0x01

#define GIP_MOTOR_R  BIT(0)
#define GIP_MOTOR_L  BIT(1)
#define GIP_MOTOR_RT BIT(2)
#define GIP_MOTOR_LT BIT(3)
#define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT)

#define GIP_WIRED_INTF_DATA 0
#define GIP_WIRED_INTF_AUDIO 1

/*
 * This packet is required for all Xbox One pads with 2015
 * or later firmware installed (or present from the factory).
 */
static const u8 xboxone_power_on[] = {
        GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(1), GIP_PWR_ON
};

/*
 * This packet is required for Xbox One S (0x045e:0x02ea)
 * and Xbox One Elite Series 2 (0x045e:0x0b00) pads to
 * initialize the controller that was previously used in
 * Bluetooth mode.
 */
static const u8 xboxone_s_init[] = {
        GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, 0x0f, 0x06
};

/*
 * This packet is required to get additional input data
 * from Xbox One Elite Series 2 (0x045e:0x0b00) pads.
 * We mostly do this right now to get paddle data
 */
static const u8 extra_input_packet_init[] = {
        0x4d, 0x10, 0x01, 0x02, 0x07, 0x00
};

/*
 * This packet is required for the Titanfall 2 Xbox One pads
 * (0x0e6f:0x0165) to finish initialization and for Hori pads
 * (0x0f0d:0x0067) to make the analog sticks work.
 */
static const u8 xboxone_hori_ack_id[] = {
        GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9),
        0x00, GIP_CMD_IDENTIFY, GIP_OPT_INTERNAL, 0x3a, 0x00, 0x00, 0x00, 0x80, 0x00
};

/*
 * This packet is required for most (all?) of the PDP pads to start
 * sending input reports. These pads include: (0x0e6f:0x02ab),
 * (0x0e6f:0x02a4), (0x0e6f:0x02a6).
 */
static const u8 xboxone_pdp_led_on[] = {
        GIP_CMD_LED, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(3), 0x00, GIP_LED_ON, 0x14
};

/*
 * This packet is required for most (all?) of the PDP pads to start
 * sending input reports. These pads include: (0x0e6f:0x02ab),
 * (0x0e6f:0x02a4), (0x0e6f:0x02a6).
 */
static const u8 xboxone_pdp_auth[] = {
        GIP_CMD_AUTHENTICATE, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(2), 0x01, 0x00
};

/*
 * A specific rumble packet is required for some PowerA pads to start
 * sending input reports. One of those pads is (0x24c6:0x543a).
 */
static const u8 xboxone_rumblebegin_init[] = {
        GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9),
        0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x1D, 0x1D, 0xFF, 0x00, 0x00
};

/*
 * A rumble packet with zero FF intensity will immediately
 * terminate the rumbling required to init PowerA pads.
 * This should happen fast enough that the motors don't
 * spin up to enough speed to actually vibrate the gamepad.
 */
static const u8 xboxone_rumbleend_init[] = {
        GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9),
        0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};

/*
 * This specifies the selection of init packets that a gamepad
 * will be sent on init *and* the order in which they will be
 * sent. The correct sequence number will be added when the
 * packet is going to be sent.
 */
static const struct xboxone_init_packet xboxone_init_packets[] = {
        XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_ack_id),
        XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_ack_id),
        XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_power_on),
        XBOXONE_INIT_PKT(0x045e, 0x02ea, xboxone_s_init),
        XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init),
        XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init),
        XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on),
        XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth),
        XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init),
        XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init),
        XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init),
        XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumbleend_init),
        XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumbleend_init),
        XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumbleend_init),
};

struct xpad_output_packet {
        u8 data[XPAD_PKT_LEN];
        u8 len;
        bool pending;
};

#define XPAD_OUT_CMD_IDX        0
#define XPAD_OUT_FF_IDX                1
#define XPAD_OUT_LED_IDX        (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
#define XPAD_NUM_OUT_PACKETS        (1 + \
                                 IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
                                 IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))

struct usb_xpad {
        struct input_dev *dev;                /* input device interface */
        struct input_dev __rcu *x360w_dev;
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;        /* usb interface */

        bool pad_present;
        bool input_created;

        struct urb *irq_in;                /* urb for interrupt in report */
        unsigned char *idata;                /* input data */
        dma_addr_t idata_dma;

        struct urb *irq_out;                /* urb for interrupt out report */
        struct usb_anchor irq_out_anchor;
        bool irq_out_active;                /* we must not use an active URB */
        u8 odata_serial;                /* serial number for xbox one protocol */
        unsigned char *odata;                /* output data */
        dma_addr_t odata_dma;
        spinlock_t odata_lock;

        struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
        int last_out_packet;
        int init_seq;

#if defined(CONFIG_JOYSTICK_XPAD_LEDS)
        struct xpad_led *led;
#endif

        char phys[64];                        /* physical device path */

        int mapping;                        /* map d-pad to buttons or to axes */
        int xtype;                        /* type of xbox device */
        int packet_type;                /* type of the extended packet */
        int pad_nr;                        /* the order x360 pads were attached */
        const char *name;                /* name of the device */
        struct work_struct work;        /* init/remove device from callback */
        time64_t mode_btn_down_ts;
};

static int xpad_init_input(struct usb_xpad *xpad);
static void xpad_deinit_input(struct usb_xpad *xpad);
static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num);
static void xpad360w_poweroff_controller(struct usb_xpad *xpad);

/*
 *        xpad_process_packet
 *
 *        Completes a request by converting the data into events for the
 *        input subsystem.
 *
 *        The used report descriptor was taken from ITO Takayuki's website:
 *         http://euc.jp/periphs/xbox-controller.ja.html
 */
static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
{
        struct input_dev *dev = xpad->dev;

        if (!(xpad->mapping & MAP_STICKS_TO_NULL)) {
                /* left stick */
                input_report_abs(dev, ABS_X,
                                 (__s16) le16_to_cpup((__le16 *)(data + 12)));
                input_report_abs(dev, ABS_Y,
                                 ~(__s16) le16_to_cpup((__le16 *)(data + 14)));

                /* right stick */
                input_report_abs(dev, ABS_RX,
                                 (__s16) le16_to_cpup((__le16 *)(data + 16)));
                input_report_abs(dev, ABS_RY,
                                 ~(__s16) le16_to_cpup((__le16 *)(data + 18)));
        }

        /* triggers left/right */
        if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) {
                input_report_key(dev, BTN_TL2, data[10]);
                input_report_key(dev, BTN_TR2, data[11]);
        } else {
                input_report_abs(dev, ABS_Z, data[10]);
                input_report_abs(dev, ABS_RZ, data[11]);
        }

        /* digital pad */
        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                /* dpad as buttons (left, right, up, down) */
                input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2));
                input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3));
                input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0));
                input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1));
        } else {
                input_report_abs(dev, ABS_HAT0X,
                                 !!(data[2] & 0x08) - !!(data[2] & 0x04));
                input_report_abs(dev, ABS_HAT0Y,
                                 !!(data[2] & 0x02) - !!(data[2] & 0x01));
        }

        /* start/back buttons and stick press left/right */
        input_report_key(dev, BTN_START,  data[2] & BIT(4));
        input_report_key(dev, BTN_SELECT, data[2] & BIT(5));
        input_report_key(dev, BTN_THUMBL, data[2] & BIT(6));
        input_report_key(dev, BTN_THUMBR, data[2] & BIT(7));

        /* "analog" buttons A, B, X, Y */
        input_report_key(dev, BTN_A, data[4]);
        input_report_key(dev, BTN_B, data[5]);
        input_report_key(dev, BTN_X, data[6]);
        input_report_key(dev, BTN_Y, data[7]);

        /* "analog" buttons black, white */
        input_report_key(dev, BTN_C, data[8]);
        input_report_key(dev, BTN_Z, data[9]);


        input_sync(dev);
}

/*
 *        xpad360_process_packet
 *
 *        Completes a request by converting the data into events for the
 *        input subsystem. It is version for xbox 360 controller
 *
 *        The used report descriptor was taken from:
 *                http://www.free60.org/wiki/Gamepad
 */

static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
                                   u16 cmd, unsigned char *data)
{
        /* valid pad data */
        if (data[0] != 0x00)
                return;

        /* digital pad */
        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                /* dpad as buttons (left, right, up, down) */
                input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2));
                input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3));
                input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0));
                input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1));
        }

        /*
         * This should be a simple else block. However historically
         * xbox360w has mapped DPAD to buttons while xbox360 did not. This
         * made no sense, but now we can not just switch back and have to
         * support both behaviors.
         */
        if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) ||
            xpad->xtype == XTYPE_XBOX360W) {
                input_report_abs(dev, ABS_HAT0X,
                                 !!(data[2] & 0x08) - !!(data[2] & 0x04));
                input_report_abs(dev, ABS_HAT0Y,
                                 !!(data[2] & 0x02) - !!(data[2] & 0x01));
        }

        /* start/back buttons */
        input_report_key(dev, BTN_START,  data[2] & BIT(4));
        input_report_key(dev, BTN_SELECT, data[2] & BIT(5));

        /* stick press left/right */
        input_report_key(dev, BTN_THUMBL, data[2] & BIT(6));
        input_report_key(dev, BTN_THUMBR, data[2] & BIT(7));

        /* buttons A,B,X,Y,TL,TR and MODE */
        input_report_key(dev, BTN_A,        data[3] & BIT(4));
        input_report_key(dev, BTN_B,        data[3] & BIT(5));
        input_report_key(dev, BTN_X,        data[3] & BIT(6));
        input_report_key(dev, BTN_Y,        data[3] & BIT(7));
        input_report_key(dev, BTN_TL,        data[3] & BIT(0));
        input_report_key(dev, BTN_TR,        data[3] & BIT(1));
        input_report_key(dev, BTN_MODE,        data[3] & BIT(2));

        if (!(xpad->mapping & MAP_STICKS_TO_NULL)) {
                /* left stick */
                input_report_abs(dev, ABS_X,
                                 (__s16) le16_to_cpup((__le16 *)(data + 6)));
                input_report_abs(dev, ABS_Y,
                                 ~(__s16) le16_to_cpup((__le16 *)(data + 8)));

                /* right stick */
                input_report_abs(dev, ABS_RX,
                                 (__s16) le16_to_cpup((__le16 *)(data + 10)));
                input_report_abs(dev, ABS_RY,
                                 ~(__s16) le16_to_cpup((__le16 *)(data + 12)));
        }

        /* triggers left/right */
        if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) {
                input_report_key(dev, BTN_TL2, data[4]);
                input_report_key(dev, BTN_TR2, data[5]);
        } else {
                input_report_abs(dev, ABS_Z, data[4]);
                input_report_abs(dev, ABS_RZ, data[5]);
        }

        input_sync(dev);

        /* XBOX360W controllers can't be turned off without driver assistance */
        if (xpad->xtype == XTYPE_XBOX360W) {
                if (xpad->mode_btn_down_ts > 0 && xpad->pad_present &&
                    ((ktime_get_seconds() - xpad->mode_btn_down_ts) >=
                     XPAD360W_POWEROFF_TIMEOUT)) {
                        xpad360w_poweroff_controller(xpad);
                        xpad->mode_btn_down_ts = 0;
                        return;
                }

                /* mode button down/up */
                if (data[3] & BIT(2))
                        xpad->mode_btn_down_ts = ktime_get_seconds();
                else
                        xpad->mode_btn_down_ts = 0;
        }
}

static void xpad_presence_work(struct work_struct *work)
{
        struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
        int error;

        if (xpad->pad_present) {
                error = xpad_init_input(xpad);
                if (error) {
                        /* complain only, not much else we can do here */
                        dev_err(&xpad->dev->dev,
                                "unable to init device: %d\n", error);
                } else {
                        rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
                }
        } else {
                RCU_INIT_POINTER(xpad->x360w_dev, NULL);
                synchronize_rcu();
                /*
                 * Now that we are sure xpad360w_process_packet is not
                 * using input device we can get rid of it.
                 */
                xpad_deinit_input(xpad);
        }
}

/*
 * xpad360w_process_packet
 *
 * Completes a request by converting the data into events for the
 * input subsystem. It is version for xbox 360 wireless controller.
 *
 * Byte.Bit
 * 00.1 - Status change: The controller or headset has connected/disconnected
 *                       Bits 01.7 and 01.6 are valid
 * 01.7 - Controller present
 * 01.6 - Headset present
 * 01.1 - Pad state (Bytes 4+) valid
 *
 */
static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
{
        struct input_dev *dev;
        bool present;

        /* Presence change */
        if (data[0] & 0x08) {
                present = (data[1] & 0x80) != 0;

                if (xpad->pad_present != present) {
                        xpad->pad_present = present;
                        schedule_work(&xpad->work);
                }
        }

        /* Valid pad data */
        if (data[1] != 0x1)
                return;

        rcu_read_lock();
        dev = rcu_dereference(xpad->x360w_dev);
        if (dev)
                xpad360_process_packet(xpad, dev, cmd, &data[4]);
        rcu_read_unlock();
}

/*
 *        xpadone_process_packet
 *
 *        Completes a request by converting the data into events for the
 *        input subsystem. This version is for the Xbox One controller.
 *
 *        The report format was gleaned from
 *        https://github.com/kylelemons/xbox/blob/master/xbox.go
 */
static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
{
        struct input_dev *dev = xpad->dev;
        bool do_sync = false;

        /* the xbox button has its own special report */
        if (data[0] == GIP_CMD_VIRTUAL_KEY) {
                /*
                 * The Xbox One S controller requires these reports to be
                 * acked otherwise it continues sending them forever and
                 * won't report further mode button events.
                 */
                if (data[1] == (GIP_OPT_ACK | GIP_OPT_INTERNAL))
                        xpadone_ack_mode_report(xpad, data[2]);

                input_report_key(dev, BTN_MODE, data[4] & GENMASK(1, 0));
                input_sync(dev);

                do_sync = true;
        } else if (data[0] == GIP_CMD_FIRMWARE) {
                /* Some packet formats force us to use this separate to poll paddle inputs */
                if (xpad->packet_type == PKT_XBE2_FW_5_11) {
                        /* Mute paddles if controller is in a custom profile slot
                         * Checked by looking at the active profile slot to
                         * verify it's the default slot
                         */
                        if (data[19] != 0)
                                data[18] = 0;

                        /* Elite Series 2 split packet paddle bits */
                        input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0));
                        input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1));
                        input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2));
                        input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3));

                        do_sync = true;
                }
        } else if (data[0] == GIP_CMD_INPUT) { /* The main valid packet type for inputs */
                /* menu/view buttons */
                input_report_key(dev, BTN_START,  data[4] & BIT(2));
                input_report_key(dev, BTN_SELECT, data[4] & BIT(3));
                if (xpad->mapping & MAP_SELECT_BUTTON)
                        input_report_key(dev, KEY_RECORD, data[22] & BIT(0));

                /* buttons A,B,X,Y */
                input_report_key(dev, BTN_A,        data[4] & BIT(4));
                input_report_key(dev, BTN_B,        data[4] & BIT(5));
                input_report_key(dev, BTN_X,        data[4] & BIT(6));
                input_report_key(dev, BTN_Y,        data[4] & BIT(7));

                /* digital pad */
                if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                        /* dpad as buttons (left, right, up, down) */
                        input_report_key(dev, BTN_TRIGGER_HAPPY1, data[5] & BIT(2));
                        input_report_key(dev, BTN_TRIGGER_HAPPY2, data[5] & BIT(3));
                        input_report_key(dev, BTN_TRIGGER_HAPPY3, data[5] & BIT(0));
                        input_report_key(dev, BTN_TRIGGER_HAPPY4, data[5] & BIT(1));
                } else {
                        input_report_abs(dev, ABS_HAT0X,
                                        !!(data[5] & 0x08) - !!(data[5] & 0x04));
                        input_report_abs(dev, ABS_HAT0Y,
                                        !!(data[5] & 0x02) - !!(data[5] & 0x01));
                }

                /* TL/TR */
                input_report_key(dev, BTN_TL,        data[5] & BIT(4));
                input_report_key(dev, BTN_TR,        data[5] & BIT(5));

                /* stick press left/right */
                input_report_key(dev, BTN_THUMBL, data[5] & BIT(6));
                input_report_key(dev, BTN_THUMBR, data[5] & BIT(7));

                if (!(xpad->mapping & MAP_STICKS_TO_NULL)) {
                        /* left stick */
                        input_report_abs(dev, ABS_X,
                                        (__s16) le16_to_cpup((__le16 *)(data + 10)));
                        input_report_abs(dev, ABS_Y,
                                        ~(__s16) le16_to_cpup((__le16 *)(data + 12)));

                        /* right stick */
                        input_report_abs(dev, ABS_RX,
                                        (__s16) le16_to_cpup((__le16 *)(data + 14)));
                        input_report_abs(dev, ABS_RY,
                                        ~(__s16) le16_to_cpup((__le16 *)(data + 16)));
                }

                /* triggers left/right */
                if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) {
                        input_report_key(dev, BTN_TL2,
                                        (__u16) le16_to_cpup((__le16 *)(data + 6)));
                        input_report_key(dev, BTN_TR2,
                                        (__u16) le16_to_cpup((__le16 *)(data + 8)));
                } else {
                        input_report_abs(dev, ABS_Z,
                                        (__u16) le16_to_cpup((__le16 *)(data + 6)));
                        input_report_abs(dev, ABS_RZ,
                                        (__u16) le16_to_cpup((__le16 *)(data + 8)));
                }

                /* Profile button has a value of 0-3, so it is reported as an axis */
                if (xpad->mapping & MAP_PROFILE_BUTTON)
                        input_report_abs(dev, ABS_PROFILE, data[34]);

                /* paddle handling */
                /* based on SDL's SDL_hidapi_xboxone.c */
                if (xpad->mapping & MAP_PADDLES) {
                        if (xpad->packet_type == PKT_XBE1) {
                                /* Mute paddles if controller has a custom mapping applied.
                                 * Checked by comparing the current mapping
                                 * config against the factory mapping config
                                 */
                                if (memcmp(&data[4], &data[18], 2) != 0)
                                        data[32] = 0;

                                /* OG Elite Series Controller paddle bits */
                                input_report_key(dev, BTN_TRIGGER_HAPPY5, data[32] & BIT(1));
                                input_report_key(dev, BTN_TRIGGER_HAPPY6, data[32] & BIT(3));
                                input_report_key(dev, BTN_TRIGGER_HAPPY7, data[32] & BIT(0));
                                input_report_key(dev, BTN_TRIGGER_HAPPY8, data[32] & BIT(2));
                        } else if (xpad->packet_type == PKT_XBE2_FW_OLD) {
                                /* Mute paddles if controller has a custom mapping applied.
                                 * Checked by comparing the current mapping
                                 * config against the factory mapping config
                                 */
                                if (data[19] != 0)
                                        data[18] = 0;

                                /* Elite Series 2 4.x firmware paddle bits */
                                input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0));
                                input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1));
                                input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2));
                                input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3));
                        } else if (xpad->packet_type == PKT_XBE2_FW_5_EARLY) {
                                /* Mute paddles if controller has a custom mapping applied.
                                 * Checked by comparing the current mapping
                                 * config against the factory mapping config
                                 */
                                if (data[23] != 0)
                                        data[22] = 0;

                                /* Elite Series 2 5.x firmware paddle bits
                                 * (before the packet was split)
                                 */
                                input_report_key(dev, BTN_TRIGGER_HAPPY5, data[22] & BIT(0));
                                input_report_key(dev, BTN_TRIGGER_HAPPY6, data[22] & BIT(1));
                                input_report_key(dev, BTN_TRIGGER_HAPPY7, data[22] & BIT(2));
                                input_report_key(dev, BTN_TRIGGER_HAPPY8, data[22] & BIT(3));
                        }
                }

                do_sync = true;
        }

        if (do_sync)
                input_sync(dev);
}

static void xpad_irq_in(struct urb *urb)
{
        struct usb_xpad *xpad = urb->context;
        struct device *dev = &xpad->intf->dev;
        int retval, status;

        status = urb->status;

        switch (status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
                return;
        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
                goto exit;
        }

        switch (xpad->xtype) {
        case XTYPE_XBOX360:
                xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
                break;
        case XTYPE_XBOX360W:
                xpad360w_process_packet(xpad, 0, xpad->idata);
                break;
        case XTYPE_XBOXONE:
                xpadone_process_packet(xpad, 0, xpad->idata);
                break;
        default:
                xpad_process_packet(xpad, 0, xpad->idata);
        }

exit:
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
                        __func__, retval);
}

/* Callers must hold xpad->odata_lock spinlock */
static bool xpad_prepare_next_init_packet(struct usb_xpad *xpad)
{
        const struct xboxone_init_packet *init_packet;

        if (xpad->xtype != XTYPE_XBOXONE)
                return false;

        /* Perform initialization sequence for Xbox One pads that require it */
        while (xpad->init_seq < ARRAY_SIZE(xboxone_init_packets)) {
                init_packet = &xboxone_init_packets[xpad->init_seq++];

                if (init_packet->idVendor != 0 &&
                    init_packet->idVendor != xpad->dev->id.vendor)
                        continue;

                if (init_packet->idProduct != 0 &&
                    init_packet->idProduct != xpad->dev->id.product)
                        continue;

                /* This packet applies to our device, so prepare to send it */
                memcpy(xpad->odata, init_packet->data, init_packet->len);
                xpad->irq_out->transfer_buffer_length = init_packet->len;

                /* Update packet with current sequence number */
                xpad->odata[2] = xpad->odata_serial++;
                return true;
        }

        return false;
}

/* Callers must hold xpad->odata_lock spinlock */
static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
{
        struct xpad_output_packet *pkt, *packet = NULL;
        int i;

        /* We may have init packets to send before we can send user commands */
        if (xpad_prepare_next_init_packet(xpad))
                return true;

        for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
                if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
                        xpad->last_out_packet = 0;

                pkt = &xpad->out_packets[xpad->last_out_packet];
                if (pkt->pending) {
                        dev_dbg(&xpad->intf->dev,
                                "%s - found pending output packet %d\n",
                                __func__, xpad->last_out_packet);
                        packet = pkt;
                        break;
                }
        }

        if (packet) {
                memcpy(xpad->odata, packet->data, packet->len);
                xpad->irq_out->transfer_buffer_length = packet->len;
                packet->pending = false;
                return true;
        }

        return false;
}

/* Callers must hold xpad->odata_lock spinlock */
static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
{
        int error;

        if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
                usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
                error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
                if (error) {
                        dev_err(&xpad->intf->dev,
                                "%s - usb_submit_urb failed with result %d\n",
                                __func__, error);
                        usb_unanchor_urb(xpad->irq_out);
                        return -EIO;
                }

                xpad->irq_out_active = true;
        }

        return 0;
}

static void xpad_irq_out(struct urb *urb)
{
        struct usb_xpad *xpad = urb->context;
        struct device *dev = &xpad->intf->dev;
        int status = urb->status;
        int error;
        unsigned long flags;

        spin_lock_irqsave(&xpad->odata_lock, flags);

        switch (status) {
        case 0:
                /* success */
                xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
                break;

        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
                xpad->irq_out_active = false;
                break;

        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
                break;
        }

        if (xpad->irq_out_active) {
                usb_anchor_urb(urb, &xpad->irq_out_anchor);
                error = usb_submit_urb(urb, GFP_ATOMIC);
                if (error) {
                        dev_err(dev,
                                "%s - usb_submit_urb failed with result %d\n",
                                __func__, error);
                        usb_unanchor_urb(urb);
                        xpad->irq_out_active = false;
                }
        }

        spin_unlock_irqrestore(&xpad->odata_lock, flags);
}

static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad,
                        struct usb_endpoint_descriptor *ep_irq_out)
{
        int error;

        if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;

        init_usb_anchor(&xpad->irq_out_anchor);

        xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
                                         GFP_KERNEL, &xpad->odata_dma);
        if (!xpad->odata)
                return -ENOMEM;

        spin_lock_init(&xpad->odata_lock);

        xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
        if (!xpad->irq_out) {
                error = -ENOMEM;
                goto err_free_coherent;
        }

        usb_fill_int_urb(xpad->irq_out, xpad->udev,
                         usb_sndintpipe(xpad->udev, ep_irq_out->bEndpointAddress),
                         xpad->odata, XPAD_PKT_LEN,
                         xpad_irq_out, xpad, ep_irq_out->bInterval);
        xpad->irq_out->transfer_dma = xpad->odata_dma;
        xpad->irq_out->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        return 0;

err_free_coherent:
        usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma);
        return error;
}

static void xpad_stop_output(struct usb_xpad *xpad)
{
        if (xpad->xtype != XTYPE_UNKNOWN) {
                if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
                                                   5000)) {
                        dev_warn(&xpad->intf->dev,
                                 "timed out waiting for output URB to complete, killing\n");
                        usb_kill_anchored_urbs(&xpad->irq_out_anchor);
                }
        }
}

static void xpad_deinit_output(struct usb_xpad *xpad)
{
        if (xpad->xtype != XTYPE_UNKNOWN) {
                usb_free_urb(xpad->irq_out);
                usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
                                xpad->odata, xpad->odata_dma);
        }
}

static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
{
        struct xpad_output_packet *packet =
                        &xpad->out_packets[XPAD_OUT_CMD_IDX];
        unsigned long flags;
        int retval;

        spin_lock_irqsave(&xpad->odata_lock, flags);

        packet->data[0] = 0x08;
        packet->data[1] = 0x00;
        packet->data[2] = 0x0F;
        packet->data[3] = 0xC0;
        packet->data[4] = 0x00;
        packet->data[5] = 0x00;
        packet->data[6] = 0x00;
        packet->data[7] = 0x00;
        packet->data[8] = 0x00;
        packet->data[9] = 0x00;
        packet->data[10] = 0x00;
        packet->data[11] = 0x00;
        packet->len = 12;
        packet->pending = true;

        /* Reset the sequence so we send out presence first */
        xpad->last_out_packet = -1;
        retval = xpad_try_sending_next_out_packet(xpad);

        spin_unlock_irqrestore(&xpad->odata_lock, flags);

        return retval;
}

static int xpad_start_xbox_one(struct usb_xpad *xpad)
{
        unsigned long flags;
        int retval;

        if (usb_ifnum_to_if(xpad->udev, GIP_WIRED_INTF_AUDIO)) {
                /*
                 * Explicitly disable the audio interface. This is needed
                 * for some controllers, such as the PowerA Enhanced Wired
                 * Controller for Series X|S (0x20d6:0x200e) to report the
                 * guide button.
                 */
                retval = usb_set_interface(xpad->udev,
                                           GIP_WIRED_INTF_AUDIO, 0);
                if (retval)
                        dev_warn(&xpad->dev->dev,
                                 "unable to disable audio interface: %d\n",
                                 retval);
        }

        spin_lock_irqsave(&xpad->odata_lock, flags);

        /*
         * Begin the init sequence by attempting to send a packet.
         * We will cycle through the init packet sequence before
         * sending any packets from the output ring.
         */
        xpad->init_seq = 0;
        retval = xpad_try_sending_next_out_packet(xpad);

        spin_unlock_irqrestore(&xpad->odata_lock, flags);

        return retval;
}

static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num)
{
        unsigned long flags;
        struct xpad_output_packet *packet =
                        &xpad->out_packets[XPAD_OUT_CMD_IDX];
        static const u8 mode_report_ack[] = {
                GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9),
                0x00, GIP_CMD_VIRTUAL_KEY, GIP_OPT_INTERNAL, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00
        };

        spin_lock_irqsave(&xpad->odata_lock, flags);

        packet->len = sizeof(mode_report_ack);
        memcpy(packet->data, mode_report_ack, packet->len);
        packet->data[2] = seq_num;
        packet->pending = true;

        /* Reset the sequence so we send out the ack now */
        xpad->last_out_packet = -1;
        xpad_try_sending_next_out_packet(xpad);

        spin_unlock_irqrestore(&xpad->odata_lock, flags);
}

#ifdef CONFIG_JOYSTICK_XPAD_FF
static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
{
        struct usb_xpad *xpad = input_get_drvdata(dev);
        struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
        __u16 strong;
        __u16 weak;
        int retval;
        unsigned long flags;

        if (effect->type != FF_RUMBLE)
                return 0;

        strong = effect->u.rumble.strong_magnitude;
        weak = effect->u.rumble.weak_magnitude;

        spin_lock_irqsave(&xpad->odata_lock, flags);

        switch (xpad->xtype) {
        case XTYPE_XBOX:
                packet->data[0] = 0x00;
                packet->data[1] = 0x06;
                packet->data[2] = 0x00;
                packet->data[3] = strong / 256;        /* left actuator */
                packet->data[4] = 0x00;
                packet->data[5] = weak / 256;        /* right actuator */
                packet->len = 6;
                packet->pending = true;
                break;

        case XTYPE_XBOX360:
                packet->data[0] = 0x00;
                packet->data[1] = 0x08;
                packet->data[2] = 0x00;
                packet->data[3] = strong / 256;  /* left actuator? */
                packet->data[4] = weak / 256;        /* right actuator? */
                packet->data[5] = 0x00;
                packet->data[6] = 0x00;
                packet->data[7] = 0x00;
                packet->len = 8;
                packet->pending = true;
                break;

        case XTYPE_XBOX360W:
                packet->data[0] = 0x00;
                packet->data[1] = 0x01;
                packet->data[2] = 0x0F;
                packet->data[3] = 0xC0;
                packet->data[4] = 0x00;
                packet->data[5] = strong / 256;
                packet->data[6] = weak / 256;
                packet->data[7] = 0x00;
                packet->data[8] = 0x00;
                packet->data[9] = 0x00;
                packet->data[10] = 0x00;
                packet->data[11] = 0x00;
                packet->len = 12;
                packet->pending = true;
                break;

        case XTYPE_XBOXONE:
                packet->data[0] = GIP_CMD_RUMBLE; /* activate rumble */
                packet->data[1] = 0x00;
                packet->data[2] = xpad->odata_serial++;
                packet->data[3] = GIP_PL_LEN(9);
                packet->data[4] = 0x00;
                packet->data[5] = GIP_MOTOR_ALL;
                packet->data[6] = 0x00; /* left trigger */
                packet->data[7] = 0x00; /* right trigger */
                packet->data[8] = strong / 512;        /* left actuator */
                packet->data[9] = weak / 512;        /* right actuator */
                packet->data[10] = 0xFF; /* on period */
                packet->data[11] = 0x00; /* off period */
                packet->data[12] = 0xFF; /* repeat count */
                packet->len = 13;
                packet->pending = true;
                break;

        default:
                dev_dbg(&xpad->dev->dev,
                        "%s - rumble command sent to unsupported xpad type: %d\n",
                        __func__, xpad->xtype);
                retval = -EINVAL;
                goto out;
        }

        retval = xpad_try_sending_next_out_packet(xpad);

out:
        spin_unlock_irqrestore(&xpad->odata_lock, flags);
        return retval;
}

static int xpad_init_ff(struct usb_xpad *xpad)
{
        if (xpad->xtype == XTYPE_UNKNOWN)
                return 0;

        input_set_capability(xpad->dev, EV_FF, FF_RUMBLE);

        return input_ff_create_memless(xpad->dev, NULL, xpad_play_effect);
}

#else
static int xpad_init_ff(struct usb_xpad *xpad) { return 0; }
#endif

#if defined(CONFIG_JOYSTICK_XPAD_LEDS)
#include <linux/leds.h>
#include <linux/idr.h>

static DEFINE_IDA(xpad_pad_seq);

struct xpad_led {
        char name[16];
        struct led_classdev led_cdev;
        struct usb_xpad *xpad;
};

/*
 * set the LEDs on Xbox 360 / Wireless Controllers
 * @param command
 *  0: off
 *  1: all blink, then previous setting
 *  2: 1/top-left blink, then on
 *  3: 2/top-right blink, then on
 *  4: 3/bottom-left blink, then on
 *  5: 4/bottom-right blink, then on
 *  6: 1/top-left on
 *  7: 2/top-right on
 *  8: 3/bottom-left on
 *  9: 4/bottom-right on
 * 10: rotate
 * 11: blink, based on previous setting
 * 12: slow blink, based on previous setting
 * 13: rotate with two lights
 * 14: persistent slow all blink
 * 15: blink once, then previous setting
 */
static void xpad_send_led_command(struct usb_xpad *xpad, int command)
{
        struct xpad_output_packet *packet =
                        &xpad->out_packets[XPAD_OUT_LED_IDX];
        unsigned long flags;

        command %= 16;

        spin_lock_irqsave(&xpad->odata_lock, flags);

        switch (xpad->xtype) {
        case XTYPE_XBOX360:
                packet->data[0] = 0x01;
                packet->data[1] = 0x03;
                packet->data[2] = command;
                packet->len = 3;
                packet->pending = true;
                break;

        case XTYPE_XBOX360W:
                packet->data[0] = 0x00;
                packet->data[1] = 0x00;
                packet->data[2] = 0x08;
                packet->data[3] = 0x40 + command;
                packet->data[4] = 0x00;
                packet->data[5] = 0x00;
                packet->data[6] = 0x00;
                packet->data[7] = 0x00;
                packet->data[8] = 0x00;
                packet->data[9] = 0x00;
                packet->data[10] = 0x00;
                packet->data[11] = 0x00;
                packet->len = 12;
                packet->pending = true;
                break;
        }

        xpad_try_sending_next_out_packet(xpad);

        spin_unlock_irqrestore(&xpad->odata_lock, flags);
}

/*
 * Light up the segment corresponding to the pad number on
 * Xbox 360 Controllers.
 */
static void xpad_identify_controller(struct usb_xpad *xpad)
{
        led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
}

static void xpad_led_set(struct led_classdev *led_cdev,
                         enum led_brightness value)
{
        struct xpad_led *xpad_led = container_of(led_cdev,
                                                 struct xpad_led, led_cdev);

        xpad_send_led_command(xpad_led->xpad, value);
}

static int xpad_led_probe(struct usb_xpad *xpad)
{
        struct xpad_led *led;
        struct led_classdev *led_cdev;
        int error;

        if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W)
                return 0;

        xpad->led = led = kzalloc(sizeof(struct xpad_led), GFP_KERNEL);
        if (!led)
                return -ENOMEM;

        xpad->pad_nr = ida_alloc(&xpad_pad_seq, GFP_KERNEL);
        if (xpad->pad_nr < 0) {
                error = xpad->pad_nr;
                goto err_free_mem;
        }

        snprintf(led->name, sizeof(led->name), "xpad%d", xpad->pad_nr);
        led->xpad = xpad;

        led_cdev = &led->led_cdev;
        led_cdev->name = led->name;
        led_cdev->brightness_set = xpad_led_set;
        led_cdev->flags = LED_CORE_SUSPENDRESUME;

        error = led_classdev_register(&xpad->udev->dev, led_cdev);
        if (error)
                goto err_free_id;

        xpad_identify_controller(xpad);

        return 0;

err_free_id:
        ida_free(&xpad_pad_seq, xpad->pad_nr);
err_free_mem:
        kfree(led);
        xpad->led = NULL;
        return error;
}

static void xpad_led_disconnect(struct usb_xpad *xpad)
{
        struct xpad_led *xpad_led = xpad->led;

        if (xpad_led) {
                led_classdev_unregister(&xpad_led->led_cdev);
                ida_free(&xpad_pad_seq, xpad->pad_nr);
                kfree(xpad_led);
        }
}
#else
static int xpad_led_probe(struct usb_xpad *xpad) { return 0; }
static void xpad_led_disconnect(struct usb_xpad *xpad) { }
#endif

static int xpad_start_input(struct usb_xpad *xpad)
{
        int error;

        if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
                return -EIO;

        if (xpad->xtype == XTYPE_XBOXONE) {
                error = xpad_start_xbox_one(xpad);
                if (error) {
                        usb_kill_urb(xpad->irq_in);
                        return error;
                }
        }
        if (xpad->xtype == XTYPE_XBOX360) {
                /*
                 * Some third-party controllers Xbox 360-style controllers
                 * require this message to finish initialization.
                 */
                u8 dummy[20];

                error = usb_control_msg_recv(xpad->udev, 0,
                                             /* bRequest */ 0x01,
                                             /* bmRequestType */
                                             USB_TYPE_VENDOR | USB_DIR_IN |
                                                USB_RECIP_INTERFACE,
                                             /* wValue */ 0x100,
                                             /* wIndex */ 0x00,
                                             dummy, sizeof(dummy),
                                             25, GFP_KERNEL);
                if (error)
                        dev_warn(&xpad->dev->dev,
                                 "unable to receive magic message: %d\n",
                                 error);
        }

        return 0;
}

static void xpad_stop_input(struct usb_xpad *xpad)
{
        usb_kill_urb(xpad->irq_in);
}

static void xpad360w_poweroff_controller(struct usb_xpad *xpad)
{
        unsigned long flags;
        struct xpad_output_packet *packet =
                        &xpad->out_packets[XPAD_OUT_CMD_IDX];

        spin_lock_irqsave(&xpad->odata_lock, flags);

        packet->data[0] = 0x00;
        packet->data[1] = 0x00;
        packet->data[2] = 0x08;
        packet->data[3] = 0xC0;
        packet->data[4] = 0x00;
        packet->data[5] = 0x00;
        packet->data[6] = 0x00;
        packet->data[7] = 0x00;
        packet->data[8] = 0x00;
        packet->data[9] = 0x00;
        packet->data[10] = 0x00;
        packet->data[11] = 0x00;
        packet->len = 12;
        packet->pending = true;

        /* Reset the sequence so we send out poweroff now */
        xpad->last_out_packet = -1;
        xpad_try_sending_next_out_packet(xpad);

        spin_unlock_irqrestore(&xpad->odata_lock, flags);
}

static int xpad360w_start_input(struct usb_xpad *xpad)
{
        int error;

        error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
        if (error)
                return -EIO;

        /*
         * Send presence packet.
         * This will force the controller to resend connection packets.
         * This is useful in the case we activate the module after the
         * adapter has been plugged in, as it won't automatically
         * send us info about the controllers.
         */
        error = xpad_inquiry_pad_presence(xpad);
        if (error) {
                usb_kill_urb(xpad->irq_in);
                return error;
        }

        return 0;
}

static void xpad360w_stop_input(struct usb_xpad *xpad)
{
        usb_kill_urb(xpad->irq_in);

        /* Make sure we are done with presence work if it was scheduled */
        flush_work(&xpad->work);
}

static int xpad_open(struct input_dev *dev)
{
        struct usb_xpad *xpad = input_get_drvdata(dev);

        return xpad_start_input(xpad);
}

static void xpad_close(struct input_dev *dev)
{
        struct usb_xpad *xpad = input_get_drvdata(dev);

        xpad_stop_input(xpad);
}

static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
{
        struct usb_xpad *xpad = input_get_drvdata(input_dev);

        switch (abs) {
        case ABS_X:
        case ABS_Y:
        case ABS_RX:
        case ABS_RY:        /* the two sticks */
                input_set_abs_params(input_dev, abs, -32768, 32767, 16, 128);
                break;
        case ABS_Z:
        case ABS_RZ:        /* the triggers (if mapped to axes) */
                if (xpad->xtype == XTYPE_XBOXONE)
                        input_set_abs_params(input_dev, abs, 0, 1023, 0, 0);
                else
                        input_set_abs_params(input_dev, abs, 0, 255, 0, 0);
                break;
        case ABS_HAT0X:
        case ABS_HAT0Y:        /* the d-pad (only if dpad is mapped to axes */
                input_set_abs_params(input_dev, abs, -1, 1, 0, 0);
                break;
        case ABS_PROFILE: /* 4 value profile button (such as on XAC) */
                input_set_abs_params(input_dev, abs, 0, 4, 0, 0);
                break;
        default:
                input_set_abs_params(input_dev, abs, 0, 0, 0, 0);
                break;
        }
}

static void xpad_deinit_input(struct usb_xpad *xpad)
{
        if (xpad->input_created) {
                xpad->input_created = false;
                xpad_led_disconnect(xpad);
                input_unregister_device(xpad->dev);
        }
}

static int xpad_init_input(struct usb_xpad *xpad)
{
        struct input_dev *input_dev;
        int i, error;

        input_dev = input_allocate_device();
        if (!input_dev)
                return -ENOMEM;

        xpad->dev = input_dev;
        input_dev->name = xpad->name;
        input_dev->phys = xpad->phys;
        usb_to_input_id(xpad->udev, &input_dev->id);

        if (xpad->xtype == XTYPE_XBOX360W) {
                /* x360w controllers and the receiver have different ids */
                input_dev->id.product = 0x02a1;
        }

        input_dev->dev.parent = &xpad->intf->dev;

        input_set_drvdata(input_dev, xpad);

        if (xpad->xtype != XTYPE_XBOX360W) {
                input_dev->open = xpad_open;
                input_dev->close = xpad_close;
        }

        if (!(xpad->mapping & MAP_STICKS_TO_NULL)) {
                /* set up axes */
                for (i = 0; xpad_abs[i] >= 0; i++)
                        xpad_set_up_abs(input_dev, xpad_abs[i]);
        }

        /* set up standard buttons */
        for (i = 0; xpad_common_btn[i] >= 0; i++)
                input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]);

        /* set up model-specific ones */
        if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W ||
            xpad->xtype == XTYPE_XBOXONE) {
                for (i = 0; xpad360_btn[i] >= 0; i++)
                        input_set_capability(input_dev, EV_KEY, xpad360_btn[i]);
                if (xpad->mapping & MAP_SELECT_BUTTON)
                        input_set_capability(input_dev, EV_KEY, KEY_RECORD);
        } else {
                for (i = 0; xpad_btn[i] >= 0; i++)
                        input_set_capability(input_dev, EV_KEY, xpad_btn[i]);
        }

        if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
                for (i = 0; xpad_btn_pad[i] >= 0; i++)
                        input_set_capability(input_dev, EV_KEY,
                                             xpad_btn_pad[i]);
        }

        /* set up paddles if the controller has them */
        if (xpad->mapping & MAP_PADDLES) {
                for (i = 0; xpad_btn_paddles[i] >= 0; i++)
                        input_set_capability(input_dev, EV_KEY, xpad_btn_paddles[i]);
        }

        /*
         * This should be a simple else block. However historically
         * xbox360w has mapped DPAD to buttons while xbox360 did not. This
         * made no sense, but now we can not just switch back and have to
         * support both behaviors.
         */
        if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) ||
            xpad->xtype == XTYPE_XBOX360W) {
                for (i = 0; xpad_abs_pad[i] >= 0; i++)
                        xpad_set_up_abs(input_dev, xpad_abs_pad[i]);
        }

        if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) {
                for (i = 0; xpad_btn_triggers[i] >= 0; i++)
                        input_set_capability(input_dev, EV_KEY,
                                             xpad_btn_triggers[i]);
        } else {
                for (i = 0; xpad_abs_triggers[i] >= 0; i++)
                        xpad_set_up_abs(input_dev, xpad_abs_triggers[i]);
        }

        /* setup profile button as an axis with 4 possible values */
        if (xpad->mapping & MAP_PROFILE_BUTTON)
                xpad_set_up_abs(input_dev, ABS_PROFILE);

        error = xpad_init_ff(xpad);
        if (error)
                goto err_free_input;

        error = xpad_led_probe(xpad);
        if (error)
                goto err_destroy_ff;

        error = input_register_device(xpad->dev);
        if (error)
                goto err_disconnect_led;

        xpad->input_created = true;
        return 0;

err_disconnect_led:
        xpad_led_disconnect(xpad);
err_destroy_ff:
        input_ff_destroy(input_dev);
err_free_input:
        input_free_device(input_dev);
        return error;
}

static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usb_xpad *xpad;
        struct usb_endpoint_descriptor *ep_irq_in, *ep_irq_out;
        int i, error;

        if (intf->cur_altsetting->desc.bNumEndpoints != 2)
                return -ENODEV;

        for (i = 0; xpad_device[i].idVendor; i++) {
                if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) &&
                    (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct))
                        break;
        }

        xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
        if (!xpad)
                return -ENOMEM;

        usb_make_path(udev, xpad->phys, sizeof(xpad->phys));
        strlcat(xpad->phys, "/input0", sizeof(xpad->phys));

        xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN,
                                         GFP_KERNEL, &xpad->idata_dma);
        if (!xpad->idata) {
                error = -ENOMEM;
                goto err_free_mem;
        }

        xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL);
        if (!xpad->irq_in) {
                error = -ENOMEM;
                goto err_free_idata;
        }

        xpad->udev = udev;
        xpad->intf = intf;
        xpad->mapping = xpad_device[i].mapping;
        xpad->xtype = xpad_device[i].xtype;
        xpad->name = xpad_device[i].name;
        xpad->packet_type = PKT_XB;
        INIT_WORK(&xpad->work, xpad_presence_work);

        if (xpad->xtype == XTYPE_UNKNOWN) {
                if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
                        if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
                                xpad->xtype = XTYPE_XBOX360W;
                        else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
                                xpad->xtype = XTYPE_XBOXONE;
                        else
                                xpad->xtype = XTYPE_XBOX360;
                } else {
                        xpad->xtype = XTYPE_XBOX;
                }

                if (dpad_to_buttons)
                        xpad->mapping |= MAP_DPAD_TO_BUTTONS;
                if (triggers_to_buttons)
                        xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS;
                if (sticks_to_null)
                        xpad->mapping |= MAP_STICKS_TO_NULL;
        }

        if (xpad->xtype == XTYPE_XBOXONE &&
            intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) {
                /*
                 * The Xbox One controller lists three interfaces all with the
                 * same interface class, subclass and protocol. Differentiate by
                 * interface number.
                 */
                error = -ENODEV;
                goto err_free_in_urb;
        }

        ep_irq_in = ep_irq_out = NULL;

        for (i = 0; i < 2; i++) {
                struct usb_endpoint_descriptor *ep =
                                &intf->cur_altsetting->endpoint[i].desc;

                if (usb_endpoint_xfer_int(ep)) {
                        if (usb_endpoint_dir_in(ep))
                                ep_irq_in = ep;
                        else
                                ep_irq_out = ep;
                }
        }

        if (!ep_irq_in || !ep_irq_out) {
                error = -ENODEV;
                goto err_free_in_urb;
        }

        error = xpad_init_output(intf, xpad, ep_irq_out);
        if (error)
                goto err_free_in_urb;

        usb_fill_int_urb(xpad->irq_in, udev,
                         usb_rcvintpipe(udev, ep_irq_in->bEndpointAddress),
                         xpad->idata, XPAD_PKT_LEN, xpad_irq_in,
                         xpad, ep_irq_in->bInterval);
        xpad->irq_in->transfer_dma = xpad->idata_dma;
        xpad->irq_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        usb_set_intfdata(intf, xpad);

        /* Packet type detection */
        if (le16_to_cpu(udev->descriptor.idVendor) == 0x045e) { /* Microsoft controllers */
                if (le16_to_cpu(udev->descriptor.idProduct) == 0x02e3) {
                        /* The original elite controller always uses the oldest
                         * type of extended packet
                         */
                        xpad->packet_type = PKT_XBE1;
                } else if (le16_to_cpu(udev->descriptor.idProduct) == 0x0b00) {
                        /* The elite 2 controller has seen multiple packet
                         * revisions. These are tied to specific firmware
                         * versions
                         */
                        if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x0500) {
                                /* This is the format that the Elite 2 used
                                 * prior to the BLE update
                                 */
                                xpad->packet_type = PKT_XBE2_FW_OLD;
                        } else if (le16_to_cpu(udev->descriptor.bcdDevice) <
                                   0x050b) {
                                /* This is the format that the Elite 2 used
                                 * prior to the update that split the packet
                                 */
                                xpad->packet_type = PKT_XBE2_FW_5_EARLY;
                        } else {
                                /* The split packet format that was introduced
                                 * in firmware v5.11
                                 */
                                xpad->packet_type = PKT_XBE2_FW_5_11;
                        }
                }
        }

        if (xpad->xtype == XTYPE_XBOX360W) {
                /*
                 * Submit the int URB immediately rather than waiting for open
                 * because we get status messages from the device whether
                 * or not any controllers are attached.  In fact, it's
                 * exactly the message that a controller has arrived that
                 * we're waiting for.
                 */
                error = xpad360w_start_input(xpad);
                if (error)
                        goto err_deinit_output;
                /*
                 * Wireless controllers require RESET_RESUME to work properly
                 * after suspend. Ideally this quirk should be in usb core
                 * quirk list, but we have too many vendors producing these
                 * controllers and we'd need to maintain 2 identical lists
                 * here in this driver and in usb core.
                 */
                udev->quirks |= USB_QUIRK_RESET_RESUME;
        } else {
                error = xpad_init_input(xpad);
                if (error)
                        goto err_deinit_output;
        }
        return 0;

err_deinit_output:
        xpad_deinit_output(xpad);
err_free_in_urb:
        usb_free_urb(xpad->irq_in);
err_free_idata:
        usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma);
err_free_mem:
        kfree(xpad);
        return error;
}

static void xpad_disconnect(struct usb_interface *intf)
{
        struct usb_xpad *xpad = usb_get_intfdata(intf);

        if (xpad->xtype == XTYPE_XBOX360W)
                xpad360w_stop_input(xpad);

        xpad_deinit_input(xpad);

        /*
         * Now that both input device and LED device are gone we can
         * stop output URB.
         */
        xpad_stop_output(xpad);

        xpad_deinit_output(xpad);

        usb_free_urb(xpad->irq_in);
        usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
                        xpad->idata, xpad->idata_dma);

        kfree(xpad);

        usb_set_intfdata(intf, NULL);
}

static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct usb_xpad *xpad = usb_get_intfdata(intf);
        struct input_dev *input = xpad->dev;

        if (xpad->xtype == XTYPE_XBOX360W) {
                /*
                 * Wireless controllers always listen to input so
                 * they are notified when controller shows up
                 * or goes away.
                 */
                xpad360w_stop_input(xpad);

                /*
                 * The wireless adapter is going off now, so the
                 * gamepads are going to become disconnected.
                 * Unless explicitly disabled, power them down
                 * so they don't just sit there flashing.
                 */
                if (auto_poweroff && xpad->pad_present)
                        xpad360w_poweroff_controller(xpad);
        } else {
                mutex_lock(&input->mutex);
                if (input_device_enabled(input))
                        xpad_stop_input(xpad);
                mutex_unlock(&input->mutex);
        }

        xpad_stop_output(xpad);

        return 0;
}

static int xpad_resume(struct usb_interface *intf)
{
        struct usb_xpad *xpad = usb_get_intfdata(intf);
        struct input_dev *input = xpad->dev;
        int retval = 0;

        if (xpad->xtype == XTYPE_XBOX360W) {
                retval = xpad360w_start_input(xpad);
        } else {
                mutex_lock(&input->mutex);
                if (input_device_enabled(input)) {
                        retval = xpad_start_input(xpad);
                } else if (xpad->xtype == XTYPE_XBOXONE) {
                        /*
                         * Even if there are no users, we'll send Xbox One pads
                         * the startup sequence so they don't sit there and
                         * blink until somebody opens the input device again.
                         */
                        retval = xpad_start_xbox_one(xpad);
                }
                mutex_unlock(&input->mutex);
        }

        return retval;
}

static struct usb_driver xpad_driver = {
        .name                = "xpad",
        .probe                = xpad_probe,
        .disconnect        = xpad_disconnect,
        .suspend        = xpad_suspend,
        .resume                = xpad_resume,
        .id_table        = xpad_table,
};

module_usb_driver(xpad_driver);

MODULE_AUTHOR("Marko Friedemann <mfr@bmx-chemnitz.de>");
MODULE_DESCRIPTION("Xbox pad driver");
MODULE_LICENSE("GPL");





















































































































   14 



   14 









   14 



    9 


   14 

   14 





















    6 


    6 



    6 


    6 

    6 






















   20 

   20 

   20 















    8 
    8 












































































   22 






   22 


   22 

   22 


























































    4 
    4 

    4 






















    1 
    1 

    1 



















    1 
    1 



    1 

    1 












































































































































































































































   16 









   16 

   16 





































   17 




   17 










   17 

   17 


   17 

   17 





























    4 




    4 


    4 








    4 

    4 

    4 

    4 
    4 


    4 

    4 












































































































































































































    3 
    1 

    3 

    3 


























   15 








   15 
   15 

   15 
   15 

   15 

    8 






   15 







    1 






    8 

    8 













    8 


    8 









































   15 










   17 





















































  233 





























   16 

   17 

























    4 






















   17 

   16 

   17 
   17 

















   17 




























    4 























































































































































































































































   18 




   18 
   18 



   18 









   17 









   17 





   17 

   17 



   17 
   17 




   17 
   17 

   17 


   17 

























   17 




    4 



    4 

    4 



    4 
    4 

    4 




    4 
    4 
















   18 


   18 









   18 

    2 
    2 








   17 

   17 
    2 






    4 


    4 
    4 








































































































































   21 




   21 





   21 

   17 

    4 
   20 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
// SPDX-License-Identifier: GPL-2.0
/*
 * udc.c - Core UDC Framework
 *
 * Copyright (C) 2010 Texas Instruments
 * Author: Felipe Balbi <balbi@ti.com>
 */

#define pr_fmt(fmt)        "UDC core: " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/list.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/dma-mapping.h>
#include <linux/sched/task_stack.h>
#include <linux/workqueue.h>

#include <linux/usb/ch9.h>
#include <linux/usb/gadget.h>
#include <linux/usb.h>

#include "trace.h"

static DEFINE_IDA(gadget_id_numbers);

static const struct bus_type gadget_bus_type;

/**
 * struct usb_udc - describes one usb device controller
 * @driver: the gadget driver pointer. For use by the class code
 * @dev: the child device to the actual controller
 * @gadget: the gadget. For use by the class code
 * @list: for use by the udc class driver
 * @vbus: for udcs who care about vbus status, this value is real vbus status;
 * for udcs who do not care about vbus status, this value is always true
 * @started: the UDC's started state. True if the UDC had started.
 * @allow_connect: Indicates whether UDC is allowed to be pulled up.
 * Set/cleared by gadget_(un)bind_driver() after gadget driver is bound or
 * unbound.
 * @vbus_work: work routine to handle VBUS status change notifications.
 * @connect_lock: protects udc->started, gadget->connect,
 * gadget->allow_connect and gadget->deactivate. The routines
 * usb_gadget_connect_locked(), usb_gadget_disconnect_locked(),
 * usb_udc_connect_control_locked(), usb_gadget_udc_start_locked() and
 * usb_gadget_udc_stop_locked() are called with this lock held.
 *
 * This represents the internal data structure which is used by the UDC-class
 * to hold information about udc driver and gadget together.
 */
struct usb_udc {
        struct usb_gadget_driver        *driver;
        struct usb_gadget                *gadget;
        struct device                        dev;
        struct list_head                list;
        bool                                vbus;
        bool                                started;
        bool                                allow_connect;
        struct work_struct                vbus_work;
        struct mutex                        connect_lock;
};

static const struct class udc_class;
static LIST_HEAD(udc_list);

/* Protects udc_list, udc->driver, driver->is_bound, and related calls */
static DEFINE_MUTEX(udc_lock);

/* ------------------------------------------------------------------------- */

/**
 * usb_ep_set_maxpacket_limit - set maximum packet size limit for endpoint
 * @ep:the endpoint being configured
 * @maxpacket_limit:value of maximum packet size limit
 *
 * This function should be used only in UDC drivers to initialize endpoint
 * (usually in probe function).
 */
void usb_ep_set_maxpacket_limit(struct usb_ep *ep,
                                              unsigned maxpacket_limit)
{
        ep->maxpacket_limit = maxpacket_limit;
        ep->maxpacket = maxpacket_limit;

        trace_usb_ep_set_maxpacket_limit(ep, 0);
}
EXPORT_SYMBOL_GPL(usb_ep_set_maxpacket_limit);

/**
 * usb_ep_enable - configure endpoint, making it usable
 * @ep:the endpoint being configured.  may not be the endpoint named "ep0".
 *        drivers discover endpoints through the ep_list of a usb_gadget.
 *
 * When configurations are set, or when interface settings change, the driver
 * will enable or disable the relevant endpoints.  while it is enabled, an
 * endpoint may be used for i/o until the driver receives a disconnect() from
 * the host or until the endpoint is disabled.
 *
 * the ep0 implementation (which calls this routine) must ensure that the
 * hardware capabilities of each endpoint match the descriptor provided
 * for it.  for example, an endpoint named "ep2in-bulk" would be usable
 * for interrupt transfers as well as bulk, but it likely couldn't be used
 * for iso transfers or for endpoint 14.  some endpoints are fully
 * configurable, with more generic names like "ep-a".  (remember that for
 * USB, "in" means "towards the USB host".)
 *
 * This routine may be called in an atomic (interrupt) context.
 *
 * returns zero, or a negative error code.
 */
int usb_ep_enable(struct usb_ep *ep)
{
        int ret = 0;

        if (ep->enabled)
                goto out;

        /* UDC drivers can't handle endpoints with maxpacket size 0 */
        if (usb_endpoint_maxp(ep->desc) == 0) {
                /*
                 * We should log an error message here, but we can't call
                 * dev_err() because there's no way to find the gadget
                 * given only ep.
                 */
                ret = -EINVAL;
                goto out;
        }

        ret = ep->ops->enable(ep, ep->desc);
        if (ret)
                goto out;

        ep->enabled = true;

out:
        trace_usb_ep_enable(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_enable);

/**
 * usb_ep_disable - endpoint is no longer usable
 * @ep:the endpoint being unconfigured.  may not be the endpoint named "ep0".
 *
 * no other task may be using this endpoint when this is called.
 * any pending and uncompleted requests will complete with status
 * indicating disconnect (-ESHUTDOWN) before this call returns.
 * gadget drivers must call usb_ep_enable() again before queueing
 * requests to the endpoint.
 *
 * This routine may be called in an atomic (interrupt) context.
 *
 * returns zero, or a negative error code.
 */
int usb_ep_disable(struct usb_ep *ep)
{
        int ret = 0;

        if (!ep->enabled)
                goto out;

        ret = ep->ops->disable(ep);
        if (ret)
                goto out;

        ep->enabled = false;

out:
        trace_usb_ep_disable(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_disable);

/**
 * usb_ep_alloc_request - allocate a request object to use with this endpoint
 * @ep:the endpoint to be used with with the request
 * @gfp_flags:GFP_* flags to use
 *
 * Request objects must be allocated with this call, since they normally
 * need controller-specific setup and may even need endpoint-specific
 * resources such as allocation of DMA descriptors.
 * Requests may be submitted with usb_ep_queue(), and receive a single
 * completion callback.  Free requests with usb_ep_free_request(), when
 * they are no longer needed.
 *
 * Returns the request, or null if one could not be allocated.
 */
struct usb_request *usb_ep_alloc_request(struct usb_ep *ep,
                                                       gfp_t gfp_flags)
{
        struct usb_request *req = NULL;

        req = ep->ops->alloc_request(ep, gfp_flags);

        trace_usb_ep_alloc_request(ep, req, req ? 0 : -ENOMEM);

        return req;
}
EXPORT_SYMBOL_GPL(usb_ep_alloc_request);

/**
 * usb_ep_free_request - frees a request object
 * @ep:the endpoint associated with the request
 * @req:the request being freed
 *
 * Reverses the effect of usb_ep_alloc_request().
 * Caller guarantees the request is not queued, and that it will
 * no longer be requeued (or otherwise used).
 */
void usb_ep_free_request(struct usb_ep *ep,
                                       struct usb_request *req)
{
        trace_usb_ep_free_request(ep, req, 0);
        ep->ops->free_request(ep, req);
}
EXPORT_SYMBOL_GPL(usb_ep_free_request);

/**
 * usb_ep_queue - queues (submits) an I/O request to an endpoint.
 * @ep:the endpoint associated with the request
 * @req:the request being submitted
 * @gfp_flags: GFP_* flags to use in case the lower level driver couldn't
 *        pre-allocate all necessary memory with the request.
 *
 * This tells the device controller to perform the specified request through
 * that endpoint (reading or writing a buffer).  When the request completes,
 * including being canceled by usb_ep_dequeue(), the request's completion
 * routine is called to return the request to the driver.  Any endpoint
 * (except control endpoints like ep0) may have more than one transfer
 * request queued; they complete in FIFO order.  Once a gadget driver
 * submits a request, that request may not be examined or modified until it
 * is given back to that driver through the completion callback.
 *
 * Each request is turned into one or more packets.  The controller driver
 * never merges adjacent requests into the same packet.  OUT transfers
 * will sometimes use data that's already buffered in the hardware.
 * Drivers can rely on the fact that the first byte of the request's buffer
 * always corresponds to the first byte of some USB packet, for both
 * IN and OUT transfers.
 *
 * Bulk endpoints can queue any amount of data; the transfer is packetized
 * automatically.  The last packet will be short if the request doesn't fill it
 * out completely.  Zero length packets (ZLPs) should be avoided in portable
 * protocols since not all usb hardware can successfully handle zero length
 * packets.  (ZLPs may be explicitly written, and may be implicitly written if
 * the request 'zero' flag is set.)  Bulk endpoints may also be used
 * for interrupt transfers; but the reverse is not true, and some endpoints
 * won't support every interrupt transfer.  (Such as 768 byte packets.)
 *
 * Interrupt-only endpoints are less functional than bulk endpoints, for
 * example by not supporting queueing or not handling buffers that are
 * larger than the endpoint's maxpacket size.  They may also treat data
 * toggle differently.
 *
 * Control endpoints ... after getting a setup() callback, the driver queues
 * one response (even if it would be zero length).  That enables the
 * status ack, after transferring data as specified in the response.  Setup
 * functions may return negative error codes to generate protocol stalls.
 * (Note that some USB device controllers disallow protocol stall responses
 * in some cases.)  When control responses are deferred (the response is
 * written after the setup callback returns), then usb_ep_set_halt() may be
 * used on ep0 to trigger protocol stalls.  Depending on the controller,
 * it may not be possible to trigger a status-stage protocol stall when the
 * data stage is over, that is, from within the response's completion
 * routine.
 *
 * For periodic endpoints, like interrupt or isochronous ones, the usb host
 * arranges to poll once per interval, and the gadget driver usually will
 * have queued some data to transfer at that time.
 *
 * Note that @req's ->complete() callback must never be called from
 * within usb_ep_queue() as that can create deadlock situations.
 *
 * This routine may be called in interrupt context.
 *
 * Returns zero, or a negative error code.  Endpoints that are not enabled
 * report errors; errors will also be
 * reported when the usb peripheral is disconnected.
 *
 * If and only if @req is successfully queued (the return value is zero),
 * @req->complete() will be called exactly once, when the Gadget core and
 * UDC are finished with the request.  When the completion function is called,
 * control of the request is returned to the device driver which submitted it.
 * The completion handler may then immediately free or reuse @req.
 */
int usb_ep_queue(struct usb_ep *ep,
                               struct usb_request *req, gfp_t gfp_flags)
{
        int ret = 0;

        if (!ep->enabled && ep->address) {
                pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n",
                                 ep->address, ep->name);
                ret = -ESHUTDOWN;
                goto out;
        }

        ret = ep->ops->queue(ep, req, gfp_flags);

out:
        trace_usb_ep_queue(ep, req, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_queue);

/**
 * usb_ep_dequeue - dequeues (cancels, unlinks) an I/O request from an endpoint
 * @ep:the endpoint associated with the request
 * @req:the request being canceled
 *
 * If the request is still active on the endpoint, it is dequeued and
 * eventually its completion routine is called (with status -ECONNRESET);
 * else a negative error code is returned.  This routine is asynchronous,
 * that is, it may return before the completion routine runs.
 *
 * Note that some hardware can't clear out write fifos (to unlink the request
 * at the head of the queue) except as part of disconnecting from usb. Such
 * restrictions prevent drivers from supporting configuration changes,
 * even to configuration zero (a "chapter 9" requirement).
 *
 * This routine may be called in interrupt context.
 */
int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
{
        int ret;

        ret = ep->ops->dequeue(ep, req);
        trace_usb_ep_dequeue(ep, req, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_dequeue);

/**
 * usb_ep_set_halt - sets the endpoint halt feature.
 * @ep: the non-isochronous endpoint being stalled
 *
 * Use this to stall an endpoint, perhaps as an error report.
 * Except for control endpoints,
 * the endpoint stays halted (will not stream any data) until the host
 * clears this feature; drivers may need to empty the endpoint's request
 * queue first, to make sure no inappropriate transfers happen.
 *
 * Note that while an endpoint CLEAR_FEATURE will be invisible to the
 * gadget driver, a SET_INTERFACE will not be.  To reset endpoints for the
 * current altsetting, see usb_ep_clear_halt().  When switching altsettings,
 * it's simplest to use usb_ep_enable() or usb_ep_disable() for the endpoints.
 *
 * This routine may be called in interrupt context.
 *
 * Returns zero, or a negative error code.  On success, this call sets
 * underlying hardware state that blocks data transfers.
 * Attempts to halt IN endpoints will fail (returning -EAGAIN) if any
 * transfer requests are still queued, or if the controller hardware
 * (usually a FIFO) still holds bytes that the host hasn't collected.
 */
int usb_ep_set_halt(struct usb_ep *ep)
{
        int ret;

        ret = ep->ops->set_halt(ep, 1);
        trace_usb_ep_set_halt(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_set_halt);

/**
 * usb_ep_clear_halt - clears endpoint halt, and resets toggle
 * @ep:the bulk or interrupt endpoint being reset
 *
 * Use this when responding to the standard usb "set interface" request,
 * for endpoints that aren't reconfigured, after clearing any other state
 * in the endpoint's i/o queue.
 *
 * This routine may be called in interrupt context.
 *
 * Returns zero, or a negative error code.  On success, this call clears
 * the underlying hardware state reflecting endpoint halt and data toggle.
 * Note that some hardware can't support this request (like pxa2xx_udc),
 * and accordingly can't correctly implement interface altsettings.
 */
int usb_ep_clear_halt(struct usb_ep *ep)
{
        int ret;

        ret = ep->ops->set_halt(ep, 0);
        trace_usb_ep_clear_halt(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_clear_halt);

/**
 * usb_ep_set_wedge - sets the halt feature and ignores clear requests
 * @ep: the endpoint being wedged
 *
 * Use this to stall an endpoint and ignore CLEAR_FEATURE(HALT_ENDPOINT)
 * requests. If the gadget driver clears the halt status, it will
 * automatically unwedge the endpoint.
 *
 * This routine may be called in interrupt context.
 *
 * Returns zero on success, else negative errno.
 */
int usb_ep_set_wedge(struct usb_ep *ep)
{
        int ret;

        if (ep->ops->set_wedge)
                ret = ep->ops->set_wedge(ep);
        else
                ret = ep->ops->set_halt(ep, 1);

        trace_usb_ep_set_wedge(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_set_wedge);

/**
 * usb_ep_fifo_status - returns number of bytes in fifo, or error
 * @ep: the endpoint whose fifo status is being checked.
 *
 * FIFO endpoints may have "unclaimed data" in them in certain cases,
 * such as after aborted transfers.  Hosts may not have collected all
 * the IN data written by the gadget driver (and reported by a request
 * completion).  The gadget driver may not have collected all the data
 * written OUT to it by the host.  Drivers that need precise handling for
 * fault reporting or recovery may need to use this call.
 *
 * This routine may be called in interrupt context.
 *
 * This returns the number of such bytes in the fifo, or a negative
 * errno if the endpoint doesn't use a FIFO or doesn't support such
 * precise handling.
 */
int usb_ep_fifo_status(struct usb_ep *ep)
{
        int ret;

        if (ep->ops->fifo_status)
                ret = ep->ops->fifo_status(ep);
        else
                ret = -EOPNOTSUPP;

        trace_usb_ep_fifo_status(ep, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_ep_fifo_status);

/**
 * usb_ep_fifo_flush - flushes contents of a fifo
 * @ep: the endpoint whose fifo is being flushed.
 *
 * This call may be used to flush the "unclaimed data" that may exist in
 * an endpoint fifo after abnormal transaction terminations.  The call
 * must never be used except when endpoint is not being used for any
 * protocol translation.
 *
 * This routine may be called in interrupt context.
 */
void usb_ep_fifo_flush(struct usb_ep *ep)
{
        if (ep->ops->fifo_flush)
                ep->ops->fifo_flush(ep);

        trace_usb_ep_fifo_flush(ep, 0);
}
EXPORT_SYMBOL_GPL(usb_ep_fifo_flush);

/* ------------------------------------------------------------------------- */

/**
 * usb_gadget_frame_number - returns the current frame number
 * @gadget: controller that reports the frame number
 *
 * Returns the usb frame number, normally eleven bits from a SOF packet,
 * or negative errno if this device doesn't support this capability.
 */
int usb_gadget_frame_number(struct usb_gadget *gadget)
{
        int ret;

        ret = gadget->ops->get_frame(gadget);

        trace_usb_gadget_frame_number(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_frame_number);

/**
 * usb_gadget_wakeup - tries to wake up the host connected to this gadget
 * @gadget: controller used to wake up the host
 *
 * Returns zero on success, else negative error code if the hardware
 * doesn't support such attempts, or its support has not been enabled
 * by the usb host.  Drivers must return device descriptors that report
 * their ability to support this, or hosts won't enable it.
 *
 * This may also try to use SRP to wake the host and start enumeration,
 * even if OTG isn't otherwise in use.  OTG devices may also start
 * remote wakeup even when hosts don't explicitly enable it.
 */
int usb_gadget_wakeup(struct usb_gadget *gadget)
{
        int ret = 0;

        if (!gadget->ops->wakeup) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->wakeup(gadget);

out:
        trace_usb_gadget_wakeup(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_wakeup);

/**
 * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature.
 * @gadget:the device being configured for remote wakeup
 * @set:value to be configured.
 *
 * set to one to enable remote wakeup feature and zero to disable it.
 *
 * returns zero on success, else negative errno.
 */
int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
{
        int ret = 0;

        if (!gadget->ops->set_remote_wakeup) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->set_remote_wakeup(gadget, set);

out:
        trace_usb_gadget_set_remote_wakeup(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup);

/**
 * usb_gadget_set_selfpowered - sets the device selfpowered feature.
 * @gadget:the device being declared as self-powered
 *
 * this affects the device status reported by the hardware driver
 * to reflect that it now has a local power supply.
 *
 * returns zero on success, else negative errno.
 */
int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
{
        int ret = 0;

        if (!gadget->ops->set_selfpowered) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->set_selfpowered(gadget, 1);

out:
        trace_usb_gadget_set_selfpowered(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_set_selfpowered);

/**
 * usb_gadget_clear_selfpowered - clear the device selfpowered feature.
 * @gadget:the device being declared as bus-powered
 *
 * this affects the device status reported by the hardware driver.
 * some hardware may not support bus-powered operation, in which
 * case this feature's value can never change.
 *
 * returns zero on success, else negative errno.
 */
int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
{
        int ret = 0;

        if (!gadget->ops->set_selfpowered) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->set_selfpowered(gadget, 0);

out:
        trace_usb_gadget_clear_selfpowered(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_clear_selfpowered);

/**
 * usb_gadget_vbus_connect - Notify controller that VBUS is powered
 * @gadget:The device which now has VBUS power.
 * Context: can sleep
 *
 * This call is used by a driver for an external transceiver (or GPIO)
 * that detects a VBUS power session starting.  Common responses include
 * resuming the controller, activating the D+ (or D-) pullup to let the
 * host detect that a USB device is attached, and starting to draw power
 * (8mA or possibly more, especially after SET_CONFIGURATION).
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_vbus_connect(struct usb_gadget *gadget)
{
        int ret = 0;

        if (!gadget->ops->vbus_session) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->vbus_session(gadget, 1);

out:
        trace_usb_gadget_vbus_connect(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_vbus_connect);

/**
 * usb_gadget_vbus_draw - constrain controller's VBUS power usage
 * @gadget:The device whose VBUS usage is being described
 * @mA:How much current to draw, in milliAmperes.  This should be twice
 *        the value listed in the configuration descriptor bMaxPower field.
 *
 * This call is used by gadget drivers during SET_CONFIGURATION calls,
 * reporting how much power the device may consume.  For example, this
 * could affect how quickly batteries are recharged.
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
{
        int ret = 0;

        if (!gadget->ops->vbus_draw) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->vbus_draw(gadget, mA);
        if (!ret)
                gadget->mA = mA;

out:
        trace_usb_gadget_vbus_draw(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_vbus_draw);

/**
 * usb_gadget_vbus_disconnect - notify controller about VBUS session end
 * @gadget:the device whose VBUS supply is being described
 * Context: can sleep
 *
 * This call is used by a driver for an external transceiver (or GPIO)
 * that detects a VBUS power session ending.  Common responses include
 * reversing everything done in usb_gadget_vbus_connect().
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_vbus_disconnect(struct usb_gadget *gadget)
{
        int ret = 0;

        if (!gadget->ops->vbus_session) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        ret = gadget->ops->vbus_session(gadget, 0);

out:
        trace_usb_gadget_vbus_disconnect(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect);

static int usb_gadget_connect_locked(struct usb_gadget *gadget)
        __must_hold(&gadget->udc->connect_lock)
{
        int ret = 0;

        if (!gadget->ops->pullup) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        if (gadget->deactivated || !gadget->udc->allow_connect || !gadget->udc->started) {
                /*
                 * If the gadget isn't usable (because it is deactivated,
                 * unbound, or not yet started), we only save the new state.
                 * The gadget will be connected automatically when it is
                 * activated/bound/started.
                 */
                gadget->connected = true;
                goto out;
        }

        ret = gadget->ops->pullup(gadget, 1);
        if (!ret)
                gadget->connected = 1;

out:
        trace_usb_gadget_connect(gadget, ret);

        return ret;
}

/**
 * usb_gadget_connect - software-controlled connect to USB host
 * @gadget:the peripheral being connected
 *
 * Enables the D+ (or potentially D-) pullup.  The host will start
 * enumerating this gadget when the pullup is active and a VBUS session
 * is active (the link is powered).
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_connect(struct usb_gadget *gadget)
{
        int ret;

        mutex_lock(&gadget->udc->connect_lock);
        ret = usb_gadget_connect_locked(gadget);
        mutex_unlock(&gadget->udc->connect_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_connect);

static int usb_gadget_disconnect_locked(struct usb_gadget *gadget)
        __must_hold(&gadget->udc->connect_lock)
{
        int ret = 0;

        if (!gadget->ops->pullup) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        if (!gadget->connected)
                goto out;

        if (gadget->deactivated || !gadget->udc->started) {
                /*
                 * If gadget is deactivated we only save new state.
                 * Gadget will stay disconnected after activation.
                 */
                gadget->connected = false;
                goto out;
        }

        ret = gadget->ops->pullup(gadget, 0);
        if (!ret)
                gadget->connected = 0;

        mutex_lock(&udc_lock);
        if (gadget->udc->driver)
                gadget->udc->driver->disconnect(gadget);
        mutex_unlock(&udc_lock);

out:
        trace_usb_gadget_disconnect(gadget, ret);

        return ret;
}

/**
 * usb_gadget_disconnect - software-controlled disconnect from USB host
 * @gadget:the peripheral being disconnected
 *
 * Disables the D+ (or potentially D-) pullup, which the host may see
 * as a disconnect (when a VBUS session is active).  Not all systems
 * support software pullup controls.
 *
 * Following a successful disconnect, invoke the ->disconnect() callback
 * for the current gadget driver so that UDC drivers don't need to.
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_disconnect(struct usb_gadget *gadget)
{
        int ret;

        mutex_lock(&gadget->udc->connect_lock);
        ret = usb_gadget_disconnect_locked(gadget);
        mutex_unlock(&gadget->udc->connect_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_disconnect);

/**
 * usb_gadget_deactivate - deactivate function which is not ready to work
 * @gadget: the peripheral being deactivated
 *
 * This routine may be used during the gadget driver bind() call to prevent
 * the peripheral from ever being visible to the USB host, unless later
 * usb_gadget_activate() is called.  For example, user mode components may
 * need to be activated before the system can talk to hosts.
 *
 * This routine may sleep; it must not be called in interrupt context
 * (such as from within a gadget driver's disconnect() callback).
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_deactivate(struct usb_gadget *gadget)
{
        int ret = 0;

        mutex_lock(&gadget->udc->connect_lock);
        if (gadget->deactivated)
                goto unlock;

        if (gadget->connected) {
                ret = usb_gadget_disconnect_locked(gadget);
                if (ret)
                        goto unlock;

                /*
                 * If gadget was being connected before deactivation, we want
                 * to reconnect it in usb_gadget_activate().
                 */
                gadget->connected = true;
        }
        gadget->deactivated = true;

unlock:
        mutex_unlock(&gadget->udc->connect_lock);
        trace_usb_gadget_deactivate(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_deactivate);

/**
 * usb_gadget_activate - activate function which is not ready to work
 * @gadget: the peripheral being activated
 *
 * This routine activates gadget which was previously deactivated with
 * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
 *
 * This routine may sleep; it must not be called in interrupt context.
 *
 * Returns zero on success, else negative errno.
 */
int usb_gadget_activate(struct usb_gadget *gadget)
{
        int ret = 0;

        mutex_lock(&gadget->udc->connect_lock);
        if (!gadget->deactivated)
                goto unlock;

        gadget->deactivated = false;

        /*
         * If gadget has been connected before deactivation, or became connected
         * while it was being deactivated, we call usb_gadget_connect().
         */
        if (gadget->connected)
                ret = usb_gadget_connect_locked(gadget);

unlock:
        mutex_unlock(&gadget->udc->connect_lock);
        trace_usb_gadget_activate(gadget, ret);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_activate);

/* ------------------------------------------------------------------------- */

#ifdef        CONFIG_HAS_DMA

int usb_gadget_map_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in)
{
        if (req->length == 0)
                return 0;

        if (req->sg_was_mapped) {
                req->num_mapped_sgs = req->num_sgs;
                return 0;
        }

        if (req->num_sgs) {
                int     mapped;

                mapped = dma_map_sg(dev, req->sg, req->num_sgs,
                                is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
                if (mapped == 0) {
                        dev_err(dev, "failed to map SGs\n");
                        return -EFAULT;
                }

                req->num_mapped_sgs = mapped;
        } else {
                if (is_vmalloc_addr(req->buf)) {
                        dev_err(dev, "buffer is not dma capable\n");
                        return -EFAULT;
                } else if (object_is_on_stack(req->buf)) {
                        dev_err(dev, "buffer is on stack\n");
                        return -EFAULT;
                }

                req->dma = dma_map_single(dev, req->buf, req->length,
                                is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);

                if (dma_mapping_error(dev, req->dma)) {
                        dev_err(dev, "failed to map buffer\n");
                        return -EFAULT;
                }

                req->dma_mapped = 1;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(usb_gadget_map_request_by_dev);

int usb_gadget_map_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in)
{
        return usb_gadget_map_request_by_dev(gadget->dev.parent, req, is_in);
}
EXPORT_SYMBOL_GPL(usb_gadget_map_request);

void usb_gadget_unmap_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in)
{
        if (req->length == 0 || req->sg_was_mapped)
                return;

        if (req->num_mapped_sgs) {
                dma_unmap_sg(dev, req->sg, req->num_sgs,
                                is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);

                req->num_mapped_sgs = 0;
        } else if (req->dma_mapped) {
                dma_unmap_single(dev, req->dma, req->length,
                                is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
                req->dma_mapped = 0;
        }
}
EXPORT_SYMBOL_GPL(usb_gadget_unmap_request_by_dev);

void usb_gadget_unmap_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in)
{
        usb_gadget_unmap_request_by_dev(gadget->dev.parent, req, is_in);
}
EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);

#endif        /* CONFIG_HAS_DMA */

/* ------------------------------------------------------------------------- */

/**
 * usb_gadget_giveback_request - give the request back to the gadget layer
 * @ep: the endpoint to be used with with the request
 * @req: the request being given back
 *
 * This is called by device controller drivers in order to return the
 * completed request back to the gadget layer.
 */
void usb_gadget_giveback_request(struct usb_ep *ep,
                struct usb_request *req)
{
        if (likely(req->status == 0))
                usb_led_activity(USB_LED_EVENT_GADGET);

        trace_usb_gadget_giveback_request(ep, req, 0);

        req->complete(ep, req);
}
EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);

/* ------------------------------------------------------------------------- */

/**
 * gadget_find_ep_by_name - returns ep whose name is the same as sting passed
 *        in second parameter or NULL if searched endpoint not found
 * @g: controller to check for quirk
 * @name: name of searched endpoint
 */
struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g, const char *name)
{
        struct usb_ep *ep;

        gadget_for_each_ep(ep, g) {
                if (!strcmp(ep->name, name))
                        return ep;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(gadget_find_ep_by_name);

/* ------------------------------------------------------------------------- */

int usb_gadget_ep_match_desc(struct usb_gadget *gadget,
                struct usb_ep *ep, struct usb_endpoint_descriptor *desc,
                struct usb_ss_ep_comp_descriptor *ep_comp)
{
        u8                type;
        u16                max;
        int                num_req_streams = 0;

        /* endpoint already claimed? */
        if (ep->claimed)
                return 0;

        type = usb_endpoint_type(desc);
        max = usb_endpoint_maxp(desc);

        if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in)
                return 0;
        if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out)
                return 0;

        if (max > ep->maxpacket_limit)
                return 0;

        /* "high bandwidth" works only at high speed */
        if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1)
                return 0;

        switch (type) {
        case USB_ENDPOINT_XFER_CONTROL:
                /* only support ep0 for portable CONTROL traffic */
                return 0;
        case USB_ENDPOINT_XFER_ISOC:
                if (!ep->caps.type_iso)
                        return 0;
                /* ISO:  limit 1023 bytes full speed, 1024 high/super speed */
                if (!gadget_is_dualspeed(gadget) && max > 1023)
                        return 0;
                break;
        case USB_ENDPOINT_XFER_BULK:
                if (!ep->caps.type_bulk)
                        return 0;
                if (ep_comp && gadget_is_superspeed(gadget)) {
                        /* Get the number of required streams from the
                         * EP companion descriptor and see if the EP
                         * matches it
                         */
                        num_req_streams = ep_comp->bmAttributes & 0x1f;
                        if (num_req_streams > ep->max_streams)
                                return 0;
                }
                break;
        case USB_ENDPOINT_XFER_INT:
                /* Bulk endpoints handle interrupt transfers,
                 * except the toggle-quirky iso-synch kind
                 */
                if (!ep->caps.type_int && !ep->caps.type_bulk)
                        return 0;
                /* INT:  limit 64 bytes full speed, 1024 high/super speed */
                if (!gadget_is_dualspeed(gadget) && max > 64)
                        return 0;
                break;
        }

        return 1;
}
EXPORT_SYMBOL_GPL(usb_gadget_ep_match_desc);

/**
 * usb_gadget_check_config - checks if the UDC can support the binded
 *        configuration
 * @gadget: controller to check the USB configuration
 *
 * Ensure that a UDC is able to support the requested resources by a
 * configuration, and that there are no resource limitations, such as
 * internal memory allocated to all requested endpoints.
 *
 * Returns zero on success, else a negative errno.
 */
int usb_gadget_check_config(struct usb_gadget *gadget)
{
        if (gadget->ops->check_config)
                return gadget->ops->check_config(gadget);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_gadget_check_config);

/* ------------------------------------------------------------------------- */

static void usb_gadget_state_work(struct work_struct *work)
{
        struct usb_gadget *gadget = work_to_gadget(work);
        struct usb_udc *udc = gadget->udc;

        if (udc)
                sysfs_notify(&udc->dev.kobj, NULL, "state");
}

void usb_gadget_set_state(struct usb_gadget *gadget,
                enum usb_device_state state)
{
        gadget->state = state;
        schedule_work(&gadget->work);
}
EXPORT_SYMBOL_GPL(usb_gadget_set_state);

/* ------------------------------------------------------------------------- */

/* Acquire connect_lock before calling this function. */
static int usb_udc_connect_control_locked(struct usb_udc *udc) __must_hold(&udc->connect_lock)
{
        if (udc->vbus)
                return usb_gadget_connect_locked(udc->gadget);
        else
                return usb_gadget_disconnect_locked(udc->gadget);
}

static void vbus_event_work(struct work_struct *work)
{
        struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work);

        mutex_lock(&udc->connect_lock);
        usb_udc_connect_control_locked(udc);
        mutex_unlock(&udc->connect_lock);
}

/**
 * usb_udc_vbus_handler - updates the udc core vbus status, and try to
 * connect or disconnect gadget
 * @gadget: The gadget which vbus change occurs
 * @status: The vbus status
 *
 * The udc driver calls it when it wants to connect or disconnect gadget
 * according to vbus status.
 *
 * This function can be invoked from interrupt context by irq handlers of
 * the gadget drivers, however, usb_udc_connect_control() has to run in
 * non-atomic context due to the following:
 * a. Some of the gadget driver implementations expect the ->pullup
 * callback to be invoked in non-atomic context.
 * b. usb_gadget_disconnect() acquires udc_lock which is a mutex.
 * Hence offload invocation of usb_udc_connect_control() to workqueue.
 */
void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
{
        struct usb_udc *udc = gadget->udc;

        if (udc) {
                udc->vbus = status;
                schedule_work(&udc->vbus_work);
        }
}
EXPORT_SYMBOL_GPL(usb_udc_vbus_handler);

/**
 * usb_gadget_udc_reset - notifies the udc core that bus reset occurs
 * @gadget: The gadget which bus reset occurs
 * @driver: The gadget driver we want to notify
 *
 * If the udc driver has bus reset handler, it needs to call this when the bus
 * reset occurs, it notifies the gadget driver that the bus reset occurs as
 * well as updates gadget state.
 */
void usb_gadget_udc_reset(struct usb_gadget *gadget,
                struct usb_gadget_driver *driver)
{
        driver->reset(gadget);
        usb_gadget_set_state(gadget, USB_STATE_DEFAULT);
}
EXPORT_SYMBOL_GPL(usb_gadget_udc_reset);

/**
 * usb_gadget_udc_start_locked - tells usb device controller to start up
 * @udc: The UDC to be started
 *
 * This call is issued by the UDC Class driver when it's about
 * to register a gadget driver to the device controller, before
 * calling gadget driver's bind() method.
 *
 * It allows the controller to be powered off until strictly
 * necessary to have it powered on.
 *
 * Returns zero on success, else negative errno.
 *
 * Caller should acquire connect_lock before invoking this function.
 */
static inline int usb_gadget_udc_start_locked(struct usb_udc *udc)
        __must_hold(&udc->connect_lock)
{
        int ret;

        if (udc->started) {
                dev_err(&udc->dev, "UDC had already started\n");
                return -EBUSY;
        }

        ret = udc->gadget->ops->udc_start(udc->gadget, udc->driver);
        if (!ret)
                udc->started = true;

        return ret;
}

/**
 * usb_gadget_udc_stop_locked - tells usb device controller we don't need it anymore
 * @udc: The UDC to be stopped
 *
 * This call is issued by the UDC Class driver after calling
 * gadget driver's unbind() method.
 *
 * The details are implementation specific, but it can go as
 * far as powering off UDC completely and disable its data
 * line pullups.
 *
 * Caller should acquire connect lock before invoking this function.
 */
static inline void usb_gadget_udc_stop_locked(struct usb_udc *udc)
        __must_hold(&udc->connect_lock)
{
        if (!udc->started) {
                dev_err(&udc->dev, "UDC had already stopped\n");
                return;
        }

        udc->gadget->ops->udc_stop(udc->gadget);
        udc->started = false;
}

/**
 * usb_gadget_udc_set_speed - tells usb device controller speed supported by
 *    current driver
 * @udc: The device we want to set maximum speed
 * @speed: The maximum speed to allowed to run
 *
 * This call is issued by the UDC Class driver before calling
 * usb_gadget_udc_start() in order to make sure that we don't try to
 * connect on speeds the gadget driver doesn't support.
 */
static inline void usb_gadget_udc_set_speed(struct usb_udc *udc,
                                            enum usb_device_speed speed)
{
        struct usb_gadget *gadget = udc->gadget;
        enum usb_device_speed s;

        if (speed == USB_SPEED_UNKNOWN)
                s = gadget->max_speed;
        else
                s = min(speed, gadget->max_speed);

        if (s == USB_SPEED_SUPER_PLUS && gadget->ops->udc_set_ssp_rate)
                gadget->ops->udc_set_ssp_rate(gadget, gadget->max_ssp_rate);
        else if (gadget->ops->udc_set_speed)
                gadget->ops->udc_set_speed(gadget, s);
}

/**
 * usb_gadget_enable_async_callbacks - tell usb device controller to enable asynchronous callbacks
 * @udc: The UDC which should enable async callbacks
 *
 * This routine is used when binding gadget drivers.  It undoes the effect
 * of usb_gadget_disable_async_callbacks(); the UDC driver should enable IRQs
 * (if necessary) and resume issuing callbacks.
 *
 * This routine will always be called in process context.
 */
static inline void usb_gadget_enable_async_callbacks(struct usb_udc *udc)
{
        struct usb_gadget *gadget = udc->gadget;

        if (gadget->ops->udc_async_callbacks)
                gadget->ops->udc_async_callbacks(gadget, true);
}

/**
 * usb_gadget_disable_async_callbacks - tell usb device controller to disable asynchronous callbacks
 * @udc: The UDC which should disable async callbacks
 *
 * This routine is used when unbinding gadget drivers.  It prevents a race:
 * The UDC driver doesn't know when the gadget driver's ->unbind callback
 * runs, so unless it is told to disable asynchronous callbacks, it might
 * issue a callback (such as ->disconnect) after the unbind has completed.
 *
 * After this function runs, the UDC driver must suppress all ->suspend,
 * ->resume, ->disconnect, ->reset, and ->setup callbacks to the gadget driver
 * until async callbacks are again enabled.  A simple-minded but effective
 * way to accomplish this is to tell the UDC hardware not to generate any
 * more IRQs.
 *
 * Request completion callbacks must still be issued.  However, it's okay
 * to defer them until the request is cancelled, since the pull-up will be
 * turned off during the time period when async callbacks are disabled.
 *
 * This routine will always be called in process context.
 */
static inline void usb_gadget_disable_async_callbacks(struct usb_udc *udc)
{
        struct usb_gadget *gadget = udc->gadget;

        if (gadget->ops->udc_async_callbacks)
                gadget->ops->udc_async_callbacks(gadget, false);
}

/**
 * usb_udc_release - release the usb_udc struct
 * @dev: the dev member within usb_udc
 *
 * This is called by driver's core in order to free memory once the last
 * reference is released.
 */
static void usb_udc_release(struct device *dev)
{
        struct usb_udc *udc;

        udc = container_of(dev, struct usb_udc, dev);
        dev_dbg(dev, "releasing '%s'\n", dev_name(dev));
        kfree(udc);
}

static const struct attribute_group *usb_udc_attr_groups[];

static void usb_udc_nop_release(struct device *dev)
{
        dev_vdbg(dev, "%s\n", __func__);
}

/**
 * usb_initialize_gadget - initialize a gadget and its embedded struct device
 * @parent: the parent device to this udc. Usually the controller driver's
 * device.
 * @gadget: the gadget to be initialized.
 * @release: a gadget release function.
 */
void usb_initialize_gadget(struct device *parent, struct usb_gadget *gadget,
                void (*release)(struct device *dev))
{
        INIT_WORK(&gadget->work, usb_gadget_state_work);
        gadget->dev.parent = parent;

        if (release)
                gadget->dev.release = release;
        else
                gadget->dev.release = usb_udc_nop_release;

        device_initialize(&gadget->dev);
        gadget->dev.bus = &gadget_bus_type;
}
EXPORT_SYMBOL_GPL(usb_initialize_gadget);

/**
 * usb_add_gadget - adds a new gadget to the udc class driver list
 * @gadget: the gadget to be added to the list.
 *
 * Returns zero on success, negative errno otherwise.
 * Does not do a final usb_put_gadget() if an error occurs.
 */
int usb_add_gadget(struct usb_gadget *gadget)
{
        struct usb_udc                *udc;
        int                        ret = -ENOMEM;

        udc = kzalloc(sizeof(*udc), GFP_KERNEL);
        if (!udc)
                goto error;

        device_initialize(&udc->dev);
        udc->dev.release = usb_udc_release;
        udc->dev.class = &udc_class;
        udc->dev.groups = usb_udc_attr_groups;
        udc->dev.parent = gadget->dev.parent;
        ret = dev_set_name(&udc->dev, "%s",
                        kobject_name(&gadget->dev.parent->kobj));
        if (ret)
                goto err_put_udc;

        udc->gadget = gadget;
        gadget->udc = udc;
        mutex_init(&udc->connect_lock);

        udc->started = false;

        mutex_lock(&udc_lock);
        list_add_tail(&udc->list, &udc_list);
        mutex_unlock(&udc_lock);
        INIT_WORK(&udc->vbus_work, vbus_event_work);

        ret = device_add(&udc->dev);
        if (ret)
                goto err_unlist_udc;

        usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
        udc->vbus = true;

        ret = ida_alloc(&gadget_id_numbers, GFP_KERNEL);
        if (ret < 0)
                goto err_del_udc;
        gadget->id_number = ret;
        dev_set_name(&gadget->dev, "gadget.%d", ret);

        ret = device_add(&gadget->dev);
        if (ret)
                goto err_free_id;

        ret = sysfs_create_link(&udc->dev.kobj,
                                &gadget->dev.kobj, "gadget");
        if (ret)
                goto err_del_gadget;

        return 0;

 err_del_gadget:
        device_del(&gadget->dev);

 err_free_id:
        ida_free(&gadget_id_numbers, gadget->id_number);

 err_del_udc:
        flush_work(&gadget->work);
        device_del(&udc->dev);

 err_unlist_udc:
        mutex_lock(&udc_lock);
        list_del(&udc->list);
        mutex_unlock(&udc_lock);

 err_put_udc:
        put_device(&udc->dev);

 error:
        return ret;
}
EXPORT_SYMBOL_GPL(usb_add_gadget);

/**
 * usb_add_gadget_udc_release - adds a new gadget to the udc class driver list
 * @parent: the parent device to this udc. Usually the controller driver's
 * device.
 * @gadget: the gadget to be added to the list.
 * @release: a gadget release function.
 *
 * Returns zero on success, negative errno otherwise.
 * Calls the gadget release function in the latter case.
 */
int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
                void (*release)(struct device *dev))
{
        int        ret;

        usb_initialize_gadget(parent, gadget, release);
        ret = usb_add_gadget(gadget);
        if (ret)
                usb_put_gadget(gadget);
        return ret;
}
EXPORT_SYMBOL_GPL(usb_add_gadget_udc_release);

/**
 * usb_get_gadget_udc_name - get the name of the first UDC controller
 * This functions returns the name of the first UDC controller in the system.
 * Please note that this interface is usefull only for legacy drivers which
 * assume that there is only one UDC controller in the system and they need to
 * get its name before initialization. There is no guarantee that the UDC
 * of the returned name will be still available, when gadget driver registers
 * itself.
 *
 * Returns pointer to string with UDC controller name on success, NULL
 * otherwise. Caller should kfree() returned string.
 */
char *usb_get_gadget_udc_name(void)
{
        struct usb_udc *udc;
        char *name = NULL;

        /* For now we take the first available UDC */
        mutex_lock(&udc_lock);
        list_for_each_entry(udc, &udc_list, list) {
                if (!udc->driver) {
                        name = kstrdup(udc->gadget->name, GFP_KERNEL);
                        break;
                }
        }
        mutex_unlock(&udc_lock);
        return name;
}
EXPORT_SYMBOL_GPL(usb_get_gadget_udc_name);

/**
 * usb_add_gadget_udc - adds a new gadget to the udc class driver list
 * @parent: the parent device to this udc. Usually the controller
 * driver's device.
 * @gadget: the gadget to be added to the list
 *
 * Returns zero on success, negative errno otherwise.
 */
int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget)
{
        return usb_add_gadget_udc_release(parent, gadget, NULL);
}
EXPORT_SYMBOL_GPL(usb_add_gadget_udc);

/**
 * usb_del_gadget - deletes a gadget and unregisters its udc
 * @gadget: the gadget to be deleted.
 *
 * This will unbind @gadget, if it is bound.
 * It will not do a final usb_put_gadget().
 */
void usb_del_gadget(struct usb_gadget *gadget)
{
        struct usb_udc *udc = gadget->udc;

        if (!udc)
                return;

        dev_vdbg(gadget->dev.parent, "unregistering gadget\n");

        mutex_lock(&udc_lock);
        list_del(&udc->list);
        mutex_unlock(&udc_lock);

        kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
        sysfs_remove_link(&udc->dev.kobj, "gadget");
        flush_work(&gadget->work);
        device_del(&gadget->dev);
        ida_free(&gadget_id_numbers, gadget->id_number);
        cancel_work_sync(&udc->vbus_work);
        device_unregister(&udc->dev);
}
EXPORT_SYMBOL_GPL(usb_del_gadget);

/**
 * usb_del_gadget_udc - unregisters a gadget
 * @gadget: the gadget to be unregistered.
 *
 * Calls usb_del_gadget() and does a final usb_put_gadget().
 */
void usb_del_gadget_udc(struct usb_gadget *gadget)
{
        usb_del_gadget(gadget);
        usb_put_gadget(gadget);
}
EXPORT_SYMBOL_GPL(usb_del_gadget_udc);

/* ------------------------------------------------------------------------- */

static int gadget_match_driver(struct device *dev, struct device_driver *drv)
{
        struct usb_gadget *gadget = dev_to_usb_gadget(dev);
        struct usb_udc *udc = gadget->udc;
        struct usb_gadget_driver *driver = container_of(drv,
                        struct usb_gadget_driver, driver);

        /* If the driver specifies a udc_name, it must match the UDC's name */
        if (driver->udc_name &&
                        strcmp(driver->udc_name, dev_name(&udc->dev)) != 0)
                return 0;

        /* If the driver is already bound to a gadget, it doesn't match */
        if (driver->is_bound)
                return 0;

        /* Otherwise any gadget driver matches any UDC */
        return 1;
}

static int gadget_bind_driver(struct device *dev)
{
        struct usb_gadget *gadget = dev_to_usb_gadget(dev);
        struct usb_udc *udc = gadget->udc;
        struct usb_gadget_driver *driver = container_of(dev->driver,
                        struct usb_gadget_driver, driver);
        int ret = 0;

        mutex_lock(&udc_lock);
        if (driver->is_bound) {
                mutex_unlock(&udc_lock);
                return -ENXIO;                /* Driver binds to only one gadget */
        }
        driver->is_bound = true;
        udc->driver = driver;
        mutex_unlock(&udc_lock);

        dev_dbg(&udc->dev, "binding gadget driver [%s]\n", driver->function);

        usb_gadget_udc_set_speed(udc, driver->max_speed);

        ret = driver->bind(udc->gadget, driver);
        if (ret)
                goto err_bind;

        mutex_lock(&udc->connect_lock);
        ret = usb_gadget_udc_start_locked(udc);
        if (ret) {
                mutex_unlock(&udc->connect_lock);
                goto err_start;
        }
        usb_gadget_enable_async_callbacks(udc);
        udc->allow_connect = true;
        ret = usb_udc_connect_control_locked(udc);
        if (ret)
                goto err_connect_control;

        mutex_unlock(&udc->connect_lock);

        kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
        return 0;

 err_connect_control:
        udc->allow_connect = false;
        usb_gadget_disable_async_callbacks(udc);
        if (gadget->irq)
                synchronize_irq(gadget->irq);
        usb_gadget_udc_stop_locked(udc);
        mutex_unlock(&udc->connect_lock);

 err_start:
        driver->unbind(udc->gadget);

 err_bind:
        if (ret != -EISNAM)
                dev_err(&udc->dev, "failed to start %s: %d\n",
                        driver->function, ret);

        mutex_lock(&udc_lock);
        udc->driver = NULL;
        driver->is_bound = false;
        mutex_unlock(&udc_lock);

        return ret;
}

static void gadget_unbind_driver(struct device *dev)
{
        struct usb_gadget *gadget = dev_to_usb_gadget(dev);
        struct usb_udc *udc = gadget->udc;
        struct usb_gadget_driver *driver = udc->driver;

        dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function);

        udc->allow_connect = false;
        cancel_work_sync(&udc->vbus_work);
        mutex_lock(&udc->connect_lock);
        usb_gadget_disconnect_locked(gadget);
        usb_gadget_disable_async_callbacks(udc);
        if (gadget->irq)
                synchronize_irq(gadget->irq);
        mutex_unlock(&udc->connect_lock);

        udc->driver->unbind(gadget);

        mutex_lock(&udc->connect_lock);
        usb_gadget_udc_stop_locked(udc);
        mutex_unlock(&udc->connect_lock);

        mutex_lock(&udc_lock);
        driver->is_bound = false;
        udc->driver = NULL;
        mutex_unlock(&udc_lock);

        kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
}

/* ------------------------------------------------------------------------- */

int usb_gadget_register_driver_owner(struct usb_gadget_driver *driver,
                struct module *owner, const char *mod_name)
{
        int ret;

        if (!driver || !driver->bind || !driver->setup)
                return -EINVAL;

        driver->driver.bus = &gadget_bus_type;
        driver->driver.owner = owner;
        driver->driver.mod_name = mod_name;
        ret = driver_register(&driver->driver);
        if (ret) {
                pr_warn("%s: driver registration failed: %d\n",
                                driver->function, ret);
                return ret;
        }

        mutex_lock(&udc_lock);
        if (!driver->is_bound) {
                if (driver->match_existing_only) {
                        pr_warn("%s: couldn't find an available UDC or it's busy\n",
                                        driver->function);
                        ret = -EBUSY;
                } else {
                        pr_info("%s: couldn't find an available UDC\n",
                                        driver->function);
                        ret = 0;
                }
        }
        mutex_unlock(&udc_lock);

        if (ret)
                driver_unregister(&driver->driver);
        return ret;
}
EXPORT_SYMBOL_GPL(usb_gadget_register_driver_owner);

int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
{
        if (!driver || !driver->unbind)
                return -EINVAL;

        driver_unregister(&driver->driver);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_gadget_unregister_driver);

/* ------------------------------------------------------------------------- */

static ssize_t srp_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t n)
{
        struct usb_udc                *udc = container_of(dev, struct usb_udc, dev);

        if (sysfs_streq(buf, "1"))
                usb_gadget_wakeup(udc->gadget);

        return n;
}
static DEVICE_ATTR_WO(srp);

static ssize_t soft_connect_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t n)
{
        struct usb_udc                *udc = container_of(dev, struct usb_udc, dev);
        ssize_t                        ret;

        device_lock(&udc->gadget->dev);
        if (!udc->driver) {
                dev_err(dev, "soft-connect without a gadget driver\n");
                ret = -EOPNOTSUPP;
                goto out;
        }

        if (sysfs_streq(buf, "connect")) {
                mutex_lock(&udc->connect_lock);
                usb_gadget_udc_start_locked(udc);
                usb_gadget_connect_locked(udc->gadget);
                mutex_unlock(&udc->connect_lock);
        } else if (sysfs_streq(buf, "disconnect")) {
                mutex_lock(&udc->connect_lock);
                usb_gadget_disconnect_locked(udc->gadget);
                usb_gadget_udc_stop_locked(udc);
                mutex_unlock(&udc->connect_lock);
        } else {
                dev_err(dev, "unsupported command '%s'\n", buf);
                ret = -EINVAL;
                goto out;
        }

        ret = n;
out:
        device_unlock(&udc->gadget->dev);
        return ret;
}
static DEVICE_ATTR_WO(soft_connect);

static ssize_t state_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_udc                *udc = container_of(dev, struct usb_udc, dev);
        struct usb_gadget        *gadget = udc->gadget;

        return sprintf(buf, "%s\n", usb_state_string(gadget->state));
}
static DEVICE_ATTR_RO(state);

static ssize_t function_show(struct device *dev, struct device_attribute *attr,
                             char *buf)
{
        struct usb_udc                *udc = container_of(dev, struct usb_udc, dev);
        struct usb_gadget_driver *drv;
        int                        rc = 0;

        mutex_lock(&udc_lock);
        drv = udc->driver;
        if (drv && drv->function)
                rc = scnprintf(buf, PAGE_SIZE, "%s\n", drv->function);
        mutex_unlock(&udc_lock);
        return rc;
}
static DEVICE_ATTR_RO(function);

#define USB_UDC_SPEED_ATTR(name, param)                                        \
ssize_t name##_show(struct device *dev,                                        \
                struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct usb_udc *udc = container_of(dev, struct usb_udc, dev);        \
        return scnprintf(buf, PAGE_SIZE, "%s\n",                        \
                        usb_speed_string(udc->gadget->param));                \
}                                                                        \
static DEVICE_ATTR_RO(name)

static USB_UDC_SPEED_ATTR(current_speed, speed);
static USB_UDC_SPEED_ATTR(maximum_speed, max_speed);

#define USB_UDC_ATTR(name)                                        \
ssize_t name##_show(struct device *dev,                                \
                struct device_attribute *attr, char *buf)        \
{                                                                \
        struct usb_udc                *udc = container_of(dev, struct usb_udc, dev); \
        struct usb_gadget        *gadget = udc->gadget;                \
                                                                \
        return scnprintf(buf, PAGE_SIZE, "%d\n", gadget->name);        \
}                                                                \
static DEVICE_ATTR_RO(name)

static USB_UDC_ATTR(is_otg);
static USB_UDC_ATTR(is_a_peripheral);
static USB_UDC_ATTR(b_hnp_enable);
static USB_UDC_ATTR(a_hnp_support);
static USB_UDC_ATTR(a_alt_hnp_support);
static USB_UDC_ATTR(is_selfpowered);

static struct attribute *usb_udc_attrs[] = {
        &dev_attr_srp.attr,
        &dev_attr_soft_connect.attr,
        &dev_attr_state.attr,
        &dev_attr_function.attr,
        &dev_attr_current_speed.attr,
        &dev_attr_maximum_speed.attr,

        &dev_attr_is_otg.attr,
        &dev_attr_is_a_peripheral.attr,
        &dev_attr_b_hnp_enable.attr,
        &dev_attr_a_hnp_support.attr,
        &dev_attr_a_alt_hnp_support.attr,
        &dev_attr_is_selfpowered.attr,
        NULL,
};

static const struct attribute_group usb_udc_attr_group = {
        .attrs = usb_udc_attrs,
};

static const struct attribute_group *usb_udc_attr_groups[] = {
        &usb_udc_attr_group,
        NULL,
};

static int usb_udc_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct usb_udc        *udc = container_of(dev, struct usb_udc, dev);
        int                        ret;

        ret = add_uevent_var(env, "USB_UDC_NAME=%s", udc->gadget->name);
        if (ret) {
                dev_err(dev, "failed to add uevent USB_UDC_NAME\n");
                return ret;
        }

        mutex_lock(&udc_lock);
        if (udc->driver)
                ret = add_uevent_var(env, "USB_UDC_DRIVER=%s",
                                udc->driver->function);
        mutex_unlock(&udc_lock);
        if (ret) {
                dev_err(dev, "failed to add uevent USB_UDC_DRIVER\n");
                return ret;
        }

        return 0;
}

static const struct class udc_class = {
        .name                = "udc",
        .dev_uevent        = usb_udc_uevent,
};

static const struct bus_type gadget_bus_type = {
        .name = "gadget",
        .probe = gadget_bind_driver,
        .remove = gadget_unbind_driver,
        .match = gadget_match_driver,
};

static int __init usb_udc_init(void)
{
        int rc;

        rc = class_register(&udc_class);
        if (rc)
                return rc;

        rc = bus_register(&gadget_bus_type);
        if (rc)
                class_unregister(&udc_class);
        return rc;
}
subsys_initcall(usb_udc_init);

static void __exit usb_udc_exit(void)
{
        bus_unregister(&gadget_bus_type);
        class_unregister(&udc_class);
}
module_exit(usb_udc_exit);

MODULE_DESCRIPTION("UDC Framework");
MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
MODULE_LICENSE("GPL v2");
































































































































































































































    5 

    5 
    5 

    5 






    5 
    5 
    5 

    5 



















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
#include <linux/if_arp.h>
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>
#include <trace/events/napi.h>
#include <linux/kconfig.h>

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

static struct sk_buff_head skb_pool;

DEFINE_STATIC_SRCU(netpoll_srcu);

#define USEC_PER_POLL        50

#define MAX_SKB_SIZE                                                        \
        (sizeof(struct ethhdr) +                                        \
         sizeof(struct iphdr) +                                                \
         sizeof(struct udphdr) +                                        \
         MAX_UDP_CHUNK)

static void zap_completion_queue(void);

static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

#define np_info(np, fmt, ...)                                \
        pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)                                \
        pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)                                \
        pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
                                      struct net_device *dev,
                                      struct netdev_queue *txq)
{
        netdev_tx_t status = NETDEV_TX_OK;
        netdev_features_t features;

        features = netif_skb_features(skb);

        if (skb_vlan_tag_present(skb) &&
            !vlan_hw_offload_capable(features, skb->vlan_proto)) {
                skb = __vlan_hwaccel_push_inside(skb);
                if (unlikely(!skb)) {
                        /* This is actually a packet drop, but we
                         * don't want the code that calls this
                         * function to try and operate on a NULL skb.
                         */
                        goto out;
                }
        }

        status = netdev_start_xmit(skb, dev, txq, false);

out:
        return status;
}

static void queue_process(struct work_struct *work)
{
        struct netpoll_info *npinfo =
                container_of(work, struct netpoll_info, tx_work.work);
        struct sk_buff *skb;
        unsigned long flags;

        while ((skb = skb_dequeue(&npinfo->txq))) {
                struct net_device *dev = skb->dev;
                struct netdev_queue *txq;
                unsigned int q_index;

                if (!netif_device_present(dev) || !netif_running(dev)) {
                        kfree_skb(skb);
                        continue;
                }

                local_irq_save(flags);
                /* check if skb->queue_mapping is still valid */
                q_index = skb_get_queue_mapping(skb);
                if (unlikely(q_index >= dev->real_num_tx_queues)) {
                        q_index = q_index % dev->real_num_tx_queues;
                        skb_set_queue_mapping(skb, q_index);
                }
                txq = netdev_get_tx_queue(dev, q_index);
                HARD_TX_LOCK(dev, txq, smp_processor_id());
                if (netif_xmit_frozen_or_stopped(txq) ||
                    !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
                        skb_queue_head(&npinfo->txq, skb);
                        HARD_TX_UNLOCK(dev, txq);
                        local_irq_restore(flags);

                        schedule_delayed_work(&npinfo->tx_work, HZ/10);
                        return;
                }
                HARD_TX_UNLOCK(dev, txq);
                local_irq_restore(flags);
        }
}

static int netif_local_xmit_active(struct net_device *dev)
{
        int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
                        return 1;
        }

        return 0;
}

static void poll_one_napi(struct napi_struct *napi)
{
        int work;

        /* If we set this bit but see that it has already been set,
         * that indicates that napi has been disabled and we need
         * to abort this operation
         */
        if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
                return;

        /* We explicilty pass the polling call a budget of 0 to
         * indicate that we are clearing the Tx path only.
         */
        work = napi->poll(napi, 0);
        WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll);
        trace_napi_poll(napi, work, 0);

        clear_bit(NAPI_STATE_NPSVC, &napi->state);
}

static void poll_napi(struct net_device *dev)
{
        struct napi_struct *napi;
        int cpu = smp_processor_id();

        list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
                if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
                        poll_one_napi(napi);
                        smp_store_release(&napi->poll_owner, -1);
                }
        }
}

void netpoll_poll_dev(struct net_device *dev)
{
        struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
        const struct net_device_ops *ops;

        /* Don't do any rx activity if the dev_lock mutex is held
         * the dev_open/close paths use this to block netpoll activity
         * while changing device state
         */
        if (!ni || down_trylock(&ni->dev_lock))
                return;

        /* Some drivers will take the same locks in poll and xmit,
         * we can't poll if local CPU is already in xmit.
         */
        if (!netif_running(dev) || netif_local_xmit_active(dev)) {
                up(&ni->dev_lock);
                return;
        }

        ops = dev->netdev_ops;
        if (ops->ndo_poll_controller)
                ops->ndo_poll_controller(dev);

        poll_napi(dev);

        up(&ni->dev_lock);

        zap_completion_queue();
}
EXPORT_SYMBOL(netpoll_poll_dev);

void netpoll_poll_disable(struct net_device *dev)
{
        struct netpoll_info *ni;
        int idx;
        might_sleep();
        idx = srcu_read_lock(&netpoll_srcu);
        ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
        if (ni)
                down(&ni->dev_lock);
        srcu_read_unlock(&netpoll_srcu, idx);
}
EXPORT_SYMBOL(netpoll_poll_disable);

void netpoll_poll_enable(struct net_device *dev)
{
        struct netpoll_info *ni;
        rcu_read_lock();
        ni = rcu_dereference(dev->npinfo);
        if (ni)
                up(&ni->dev_lock);
        rcu_read_unlock();
}
EXPORT_SYMBOL(netpoll_poll_enable);

static void refill_skbs(void)
{
        struct sk_buff *skb;
        unsigned long flags;

        spin_lock_irqsave(&skb_pool.lock, flags);
        while (skb_pool.qlen < MAX_SKBS) {
                skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
                if (!skb)
                        break;

                __skb_queue_tail(&skb_pool, skb);
        }
        spin_unlock_irqrestore(&skb_pool.lock, flags);
}

static void zap_completion_queue(void)
{
        unsigned long flags;
        struct softnet_data *sd = &get_cpu_var(softnet_data);

        if (sd->completion_queue) {
                struct sk_buff *clist;

                local_irq_save(flags);
                clist = sd->completion_queue;
                sd->completion_queue = NULL;
                local_irq_restore(flags);

                while (clist != NULL) {
                        struct sk_buff *skb = clist;
                        clist = clist->next;
                        if (!skb_irq_freeable(skb)) {
                                refcount_set(&skb->users, 1);
                                dev_kfree_skb_any(skb); /* put this one back */
                        } else {
                                __kfree_skb(skb);
                        }
                }
        }

        put_cpu_var(softnet_data);
}

static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
{
        int count = 0;
        struct sk_buff *skb;

        zap_completion_queue();
        refill_skbs();
repeat:

        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb)
                skb = skb_dequeue(&skb_pool);

        if (!skb) {
                if (++count < 10) {
                        netpoll_poll_dev(np->dev);
                        goto repeat;
                }
                return NULL;
        }

        refcount_set(&skb->users, 1);
        skb_reserve(skb, reserve);
        return skb;
}

static int netpoll_owner_active(struct net_device *dev)
{
        struct napi_struct *napi;

        list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
                if (napi->poll_owner == smp_processor_id())
                        return 1;
        }
        return 0;
}

/* call with IRQ disabled */
static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
        netdev_tx_t status = NETDEV_TX_BUSY;
        struct net_device *dev;
        unsigned long tries;
        /* It is up to the caller to keep npinfo alive. */
        struct netpoll_info *npinfo;

        lockdep_assert_irqs_disabled();

        dev = np->dev;
        npinfo = rcu_dereference_bh(dev->npinfo);

        if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
                dev_kfree_skb_irq(skb);
                return NET_XMIT_DROP;
        }

        /* don't get messages out of order, and no recursion */
        if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
                struct netdev_queue *txq;

                txq = netdev_core_pick_tx(dev, skb, NULL);

                /* try until next clock tick */
                for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
                     tries > 0; --tries) {
                        if (HARD_TX_TRYLOCK(dev, txq)) {
                                if (!netif_xmit_stopped(txq))
                                        status = netpoll_start_xmit(skb, dev, txq);

                                HARD_TX_UNLOCK(dev, txq);

                                if (dev_xmit_complete(status))
                                        break;

                        }

                        /* tickle device maybe there is some cleanup */
                        netpoll_poll_dev(np->dev);

                        udelay(USEC_PER_POLL);
                }

                WARN_ONCE(!irqs_disabled(),
                        "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
                        dev->name, dev->netdev_ops->ndo_start_xmit);

        }

        if (!dev_xmit_complete(status)) {
                skb_queue_tail(&npinfo->txq, skb);
                schedule_delayed_work(&npinfo->tx_work,0);
        }
        return NETDEV_TX_OK;
}

netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
        unsigned long flags;
        netdev_tx_t ret;

        if (unlikely(!np)) {
                dev_kfree_skb_irq(skb);
                ret = NET_XMIT_DROP;
        } else {
                local_irq_save(flags);
                ret = __netpoll_send_skb(np, skb);
                local_irq_restore(flags);
        }
        return ret;
}
EXPORT_SYMBOL(netpoll_send_skb);

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
        int total_len, ip_len, udp_len;
        struct sk_buff *skb;
        struct udphdr *udph;
        struct iphdr *iph;
        struct ethhdr *eth;
        static atomic_t ip_ident;
        struct ipv6hdr *ip6h;

        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                WARN_ON_ONCE(!irqs_disabled());

        udp_len = len + sizeof(*udph);
        if (np->ipv6)
                ip_len = udp_len + sizeof(*ip6h);
        else
                ip_len = udp_len + sizeof(*iph);

        total_len = ip_len + LL_RESERVED_SPACE(np->dev);

        skb = find_skb(np, total_len + np->dev->needed_tailroom,
                       total_len - len);
        if (!skb)
                return;

        skb_copy_to_linear_data(skb, msg, len);
        skb_put(skb, len);

        skb_push(skb, sizeof(*udph));
        skb_reset_transport_header(skb);
        udph = udp_hdr(skb);
        udph->source = htons(np->local_port);
        udph->dest = htons(np->remote_port);
        udph->len = htons(udp_len);

        if (np->ipv6) {
                udph->check = 0;
                udph->check = csum_ipv6_magic(&np->local_ip.in6,
                                              &np->remote_ip.in6,
                                              udp_len, IPPROTO_UDP,
                                              csum_partial(udph, udp_len, 0));
                if (udph->check == 0)
                        udph->check = CSUM_MANGLED_0;

                skb_push(skb, sizeof(*ip6h));
                skb_reset_network_header(skb);
                ip6h = ipv6_hdr(skb);

                /* ip6h->version = 6; ip6h->priority = 0; */
                *(unsigned char *)ip6h = 0x60;
                ip6h->flow_lbl[0] = 0;
                ip6h->flow_lbl[1] = 0;
                ip6h->flow_lbl[2] = 0;

                ip6h->payload_len = htons(sizeof(struct udphdr) + len);
                ip6h->nexthdr = IPPROTO_UDP;
                ip6h->hop_limit = 32;
                ip6h->saddr = np->local_ip.in6;
                ip6h->daddr = np->remote_ip.in6;

                eth = skb_push(skb, ETH_HLEN);
                skb_reset_mac_header(skb);
                skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
        } else {
                udph->check = 0;
                udph->check = csum_tcpudp_magic(np->local_ip.ip,
                                                np->remote_ip.ip,
                                                udp_len, IPPROTO_UDP,
                                                csum_partial(udph, udp_len, 0));
                if (udph->check == 0)
                        udph->check = CSUM_MANGLED_0;

                skb_push(skb, sizeof(*iph));
                skb_reset_network_header(skb);
                iph = ip_hdr(skb);

                /* iph->version = 4; iph->ihl = 5; */
                *(unsigned char *)iph = 0x45;
                iph->tos      = 0;
                put_unaligned(htons(ip_len), &(iph->tot_len));
                iph->id       = htons(atomic_inc_return(&ip_ident));
                iph->frag_off = 0;
                iph->ttl      = 64;
                iph->protocol = IPPROTO_UDP;
                iph->check    = 0;
                put_unaligned(np->local_ip.ip, &(iph->saddr));
                put_unaligned(np->remote_ip.ip, &(iph->daddr));
                iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

                eth = skb_push(skb, ETH_HLEN);
                skb_reset_mac_header(skb);
                skb->protocol = eth->h_proto = htons(ETH_P_IP);
        }

        ether_addr_copy(eth->h_source, np->dev->dev_addr);
        ether_addr_copy(eth->h_dest, np->remote_mac);

        skb->dev = np->dev;

        netpoll_send_skb(np, skb);
}
EXPORT_SYMBOL(netpoll_send_udp);

void netpoll_print_options(struct netpoll *np)
{
        np_info(np, "local port %d\n", np->local_port);
        if (np->ipv6)
                np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
        else
                np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
        np_info(np, "interface '%s'\n", np->dev_name);
        np_info(np, "remote port %d\n", np->remote_port);
        if (np->ipv6)
                np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
        else
                np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
        np_info(np, "remote ethernet address %pM\n", np->remote_mac);
}
EXPORT_SYMBOL(netpoll_print_options);

static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
        const char *end;

        if (!strchr(str, ':') &&
            in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
                if (!*end)
                        return 0;
        }
        if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
                if (!*end)
                        return 1;
#else
                return -1;
#endif
        }
        return -1;
}

int netpoll_parse_options(struct netpoll *np, char *opt)
{
        char *cur=opt, *delim;
        int ipv6;
        bool ipversion_set = false;

        if (*cur != '@') {
                if ((delim = strchr(cur, '@')) == NULL)
                        goto parse_failed;
                *delim = 0;
                if (kstrtou16(cur, 10, &np->local_port))
                        goto parse_failed;
                cur = delim;
        }
        cur++;

        if (*cur != '/') {
                ipversion_set = true;
                if ((delim = strchr(cur, '/')) == NULL)
                        goto parse_failed;
                *delim = 0;
                ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
                if (ipv6 < 0)
                        goto parse_failed;
                else
                        np->ipv6 = (bool)ipv6;
                cur = delim;
        }
        cur++;

        if (*cur != ',') {
                /* parse out dev name */
                if ((delim = strchr(cur, ',')) == NULL)
                        goto parse_failed;
                *delim = 0;
                strscpy(np->dev_name, cur, sizeof(np->dev_name));
                cur = delim;
        }
        cur++;

        if (*cur != '@') {
                /* dst port */
                if ((delim = strchr(cur, '@')) == NULL)
                        goto parse_failed;
                *delim = 0;
                if (*cur == ' ' || *cur == '\t')
                        np_info(np, "warning: whitespace is not allowed\n");
                if (kstrtou16(cur, 10, &np->remote_port))
                        goto parse_failed;
                cur = delim;
        }
        cur++;

        /* dst ip */
        if ((delim = strchr(cur, '/')) == NULL)
                goto parse_failed;
        *delim = 0;
        ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
        if (ipv6 < 0)
                goto parse_failed;
        else if (ipversion_set && np->ipv6 != (bool)ipv6)
                goto parse_failed;
        else
                np->ipv6 = (bool)ipv6;
        cur = delim + 1;

        if (*cur != 0) {
                /* MAC address */
                if (!mac_pton(cur, np->remote_mac))
                        goto parse_failed;
        }

        netpoll_print_options(np);

        return 0;

 parse_failed:
        np_info(np, "couldn't parse config at '%s'!\n", cur);
        return -1;
}
EXPORT_SYMBOL(netpoll_parse_options);

int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
{
        struct netpoll_info *npinfo;
        const struct net_device_ops *ops;
        int err;

        np->dev = ndev;
        strscpy(np->dev_name, ndev->name, IFNAMSIZ);

        if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
                np_err(np, "%s doesn't support polling, aborting\n",
                       np->dev_name);
                err = -ENOTSUPP;
                goto out;
        }

        if (!ndev->npinfo) {
                npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
                if (!npinfo) {
                        err = -ENOMEM;
                        goto out;
                }

                sema_init(&npinfo->dev_lock, 1);
                skb_queue_head_init(&npinfo->txq);
                INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

                refcount_set(&npinfo->refcnt, 1);

                ops = np->dev->netdev_ops;
                if (ops->ndo_netpoll_setup) {
                        err = ops->ndo_netpoll_setup(ndev, npinfo);
                        if (err)
                                goto free_npinfo;
                }
        } else {
                npinfo = rtnl_dereference(ndev->npinfo);
                refcount_inc(&npinfo->refcnt);
        }

        npinfo->netpoll = np;

        /* last thing to do is link it to the net device structure */
        rcu_assign_pointer(ndev->npinfo, npinfo);

        return 0;

free_npinfo:
        kfree(npinfo);
out:
        return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
        struct net_device *ndev = NULL;
        struct in_device *in_dev;
        int err;

        rtnl_lock();
        if (np->dev_name[0]) {
                struct net *net = current->nsproxy->net_ns;
                ndev = __dev_get_by_name(net, np->dev_name);
        }
        if (!ndev) {
                np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
                err = -ENODEV;
                goto unlock;
        }
        netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL);

        if (netdev_master_upper_dev_get(ndev)) {
                np_err(np, "%s is a slave device, aborting\n", np->dev_name);
                err = -EBUSY;
                goto put;
        }

        if (!netif_running(ndev)) {
                unsigned long atmost;

                np_info(np, "device %s not up yet, forcing it\n", np->dev_name);

                err = dev_open(ndev, NULL);

                if (err) {
                        np_err(np, "failed to open %s\n", ndev->name);
                        goto put;
                }

                rtnl_unlock();
                atmost = jiffies + carrier_timeout * HZ;
                while (!netif_carrier_ok(ndev)) {
                        if (time_after(jiffies, atmost)) {
                                np_notice(np, "timeout waiting for carrier\n");
                                break;
                        }
                        msleep(1);
                }

                rtnl_lock();
        }

        if (!np->local_ip.ip) {
                if (!np->ipv6) {
                        const struct in_ifaddr *ifa;

                        in_dev = __in_dev_get_rtnl(ndev);
                        if (!in_dev)
                                goto put_noaddr;

                        ifa = rtnl_dereference(in_dev->ifa_list);
                        if (!ifa) {
put_noaddr:
                                np_err(np, "no IP address for %s, aborting\n",
                                       np->dev_name);
                                err = -EDESTADDRREQ;
                                goto put;
                        }

                        np->local_ip.ip = ifa->ifa_local;
                        np_info(np, "local IP %pI4\n", &np->local_ip.ip);
                } else {
#if IS_ENABLED(CONFIG_IPV6)
                        struct inet6_dev *idev;

                        err = -EDESTADDRREQ;
                        idev = __in6_dev_get(ndev);
                        if (idev) {
                                struct inet6_ifaddr *ifp;

                                read_lock_bh(&idev->lock);
                                list_for_each_entry(ifp, &idev->addr_list, if_list) {
                                        if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
                                            !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
                                                continue;
                                        np->local_ip.in6 = ifp->addr;
                                        err = 0;
                                        break;
                                }
                                read_unlock_bh(&idev->lock);
                        }
                        if (err) {
                                np_err(np, "no IPv6 address for %s, aborting\n",
                                       np->dev_name);
                                goto put;
                        } else
                                np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
                        np_err(np, "IPv6 is not supported %s, aborting\n",
                               np->dev_name);
                        err = -EINVAL;
                        goto put;
#endif
                }
        }

        /* fill up the skb queue */
        refill_skbs();

        err = __netpoll_setup(np, ndev);
        if (err)
                goto put;
        rtnl_unlock();
        return 0;

put:
        netdev_put(ndev, &np->dev_tracker);
unlock:
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(netpoll_setup);

static int __init netpoll_init(void)
{
        skb_queue_head_init(&skb_pool);
        return 0;
}
core_initcall(netpoll_init);

static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
        struct netpoll_info *npinfo =
                        container_of(rcu_head, struct netpoll_info, rcu);

        skb_queue_purge(&npinfo->txq);

        /* we can't call cancel_delayed_work_sync here, as we are in softirq */
        cancel_delayed_work(&npinfo->tx_work);

        /* clean after last, unfinished work */
        __skb_queue_purge(&npinfo->txq);
        /* now cancel it again */
        cancel_delayed_work(&npinfo->tx_work);
        kfree(npinfo);
}

void __netpoll_cleanup(struct netpoll *np)
{
        struct netpoll_info *npinfo;

        npinfo = rtnl_dereference(np->dev->npinfo);
        if (!npinfo)
                return;

        synchronize_srcu(&netpoll_srcu);

        if (refcount_dec_and_test(&npinfo->refcnt)) {
                const struct net_device_ops *ops;

                ops = np->dev->netdev_ops;
                if (ops->ndo_netpoll_cleanup)
                        ops->ndo_netpoll_cleanup(np->dev);

                RCU_INIT_POINTER(np->dev->npinfo, NULL);
                call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
        } else
                RCU_INIT_POINTER(np->dev->npinfo, NULL);
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);

void __netpoll_free(struct netpoll *np)
{
        ASSERT_RTNL();

        /* Wait for transmitting packets to finish before freeing. */
        synchronize_rcu();
        __netpoll_cleanup(np);
        kfree(np);
}
EXPORT_SYMBOL_GPL(__netpoll_free);

void netpoll_cleanup(struct netpoll *np)
{
        rtnl_lock();
        if (!np->dev)
                goto out;
        __netpoll_cleanup(np);
        netdev_put(np->dev, &np->dev_tracker);
        np->dev = NULL;
out:
        rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);



















































































































































































































































































































































































   33 




   29 












































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (c) 1999-2002 Vojtech Pavlik
 */
#ifndef _INPUT_H
#define _INPUT_H

#include <linux/time.h>
#include <linux/list.h>
#include <uapi/linux/input.h>
/* Implementation details, userspace should not care about these */
#define ABS_MT_FIRST                ABS_MT_TOUCH_MAJOR
#define ABS_MT_LAST                ABS_MT_TOOL_Y

/*
 * In-kernel definitions.
 */

#include <linux/device.h>
#include <linux/fs.h>
#include <linux/timer.h>
#include <linux/mod_devicetable.h>

struct input_dev_poller;

/**
 * struct input_value - input value representation
 * @type: type of value (EV_KEY, EV_ABS, etc)
 * @code: the value code
 * @value: the value
 */
struct input_value {
        __u16 type;
        __u16 code;
        __s32 value;
};

enum input_clock_type {
        INPUT_CLK_REAL = 0,
        INPUT_CLK_MONO,
        INPUT_CLK_BOOT,
        INPUT_CLK_MAX
};

/**
 * struct input_dev - represents an input device
 * @name: name of the device
 * @phys: physical path to the device in the system hierarchy
 * @uniq: unique identification code for the device (if device has it)
 * @id: id of the device (struct input_id)
 * @propbit: bitmap of device properties and quirks
 * @evbit: bitmap of types of events supported by the device (EV_KEY,
 *        EV_REL, etc.)
 * @keybit: bitmap of keys/buttons this device has
 * @relbit: bitmap of relative axes for the device
 * @absbit: bitmap of absolute axes for the device
 * @mscbit: bitmap of miscellaneous events supported by the device
 * @ledbit: bitmap of leds present on the device
 * @sndbit: bitmap of sound effects supported by the device
 * @ffbit: bitmap of force feedback effects supported by the device
 * @swbit: bitmap of switches present on the device
 * @hint_events_per_packet: average number of events generated by the
 *        device in a packet (between EV_SYN/SYN_REPORT events). Used by
 *        event handlers to estimate size of the buffer needed to hold
 *        events.
 * @keycodemax: size of keycode table
 * @keycodesize: size of elements in keycode table
 * @keycode: map of scancodes to keycodes for this device
 * @getkeycode: optional legacy method to retrieve current keymap.
 * @setkeycode: optional method to alter current keymap, used to implement
 *        sparse keymaps. If not supplied default mechanism will be used.
 *        The method is being called while holding event_lock and thus must
 *        not sleep
 * @ff: force feedback structure associated with the device if device
 *        supports force feedback effects
 * @poller: poller structure associated with the device if device is
 *        set up to use polling mode
 * @repeat_key: stores key code of the last key pressed; used to implement
 *        software autorepeat
 * @timer: timer for software autorepeat
 * @rep: current values for autorepeat parameters (delay, rate)
 * @mt: pointer to multitouch state
 * @absinfo: array of &struct input_absinfo elements holding information
 *        about absolute axes (current value, min, max, flat, fuzz,
 *        resolution)
 * @key: reflects current state of device's keys/buttons
 * @led: reflects current state of device's LEDs
 * @snd: reflects current state of sound effects
 * @sw: reflects current state of device's switches
 * @open: this method is called when the very first user calls
 *        input_open_device(). The driver must prepare the device
 *        to start generating events (start polling thread,
 *        request an IRQ, submit URB, etc.). The meaning of open() is
 *        to start providing events to the input core.
 * @close: this method is called when the very last user calls
 *        input_close_device(). The meaning of close() is to stop
 *        providing events to the input core.
 * @flush: purges the device. Most commonly used to get rid of force
 *        feedback effects loaded into the device when disconnecting
 *        from it
 * @event: event handler for events sent _to_ the device, like EV_LED
 *        or EV_SND. The device is expected to carry out the requested
 *        action (turn on a LED, play sound, etc.) The call is protected
 *        by @event_lock and must not sleep
 * @grab: input handle that currently has the device grabbed (via
 *        EVIOCGRAB ioctl). When a handle grabs a device it becomes sole
 *        recipient for all input events coming from the device
 * @event_lock: this spinlock is taken when input core receives
 *        and processes a new event for the device (in input_event()).
 *        Code that accesses and/or modifies parameters of a device
 *        (such as keymap or absmin, absmax, absfuzz, etc.) after device
 *        has been registered with input core must take this lock.
 * @mutex: serializes calls to open(), close() and flush() methods
 * @users: stores number of users (input handlers) that opened this
 *        device. It is used by input_open_device() and input_close_device()
 *        to make sure that dev->open() is only called when the first
 *        user opens device and dev->close() is called when the very
 *        last user closes the device
 * @going_away: marks devices that are in a middle of unregistering and
 *        causes input_open_device*() fail with -ENODEV.
 * @dev: driver model's view of this device
 * @h_list: list of input handles associated with the device. When
 *        accessing the list dev->mutex must be held
 * @node: used to place the device onto input_dev_list
 * @num_vals: number of values queued in the current frame
 * @max_vals: maximum number of values queued in a frame
 * @vals: array of values queued in the current frame
 * @devres_managed: indicates that devices is managed with devres framework
 *        and needs not be explicitly unregistered or freed.
 * @timestamp: storage for a timestamp set by input_set_timestamp called
 *  by a driver
 * @inhibited: indicates that the input device is inhibited. If that is
 * the case then input core ignores any events generated by the device.
 * Device's close() is called when it is being inhibited and its open()
 * is called when it is being uninhibited.
 */
struct input_dev {
        const char *name;
        const char *phys;
        const char *uniq;
        struct input_id id;

        unsigned long propbit[BITS_TO_LONGS(INPUT_PROP_CNT)];

        unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
        unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
        unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
        unsigned long absbit[BITS_TO_LONGS(ABS_CNT)];
        unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)];
        unsigned long ledbit[BITS_TO_LONGS(LED_CNT)];
        unsigned long sndbit[BITS_TO_LONGS(SND_CNT)];
        unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
        unsigned long swbit[BITS_TO_LONGS(SW_CNT)];

        unsigned int hint_events_per_packet;

        unsigned int keycodemax;
        unsigned int keycodesize;
        void *keycode;

        int (*setkeycode)(struct input_dev *dev,
                          const struct input_keymap_entry *ke,
                          unsigned int *old_keycode);
        int (*getkeycode)(struct input_dev *dev,
                          struct input_keymap_entry *ke);

        struct ff_device *ff;

        struct input_dev_poller *poller;

        unsigned int repeat_key;
        struct timer_list timer;

        int rep[REP_CNT];

        struct input_mt *mt;

        struct input_absinfo *absinfo;

        unsigned long key[BITS_TO_LONGS(KEY_CNT)];
        unsigned long led[BITS_TO_LONGS(LED_CNT)];
        unsigned long snd[BITS_TO_LONGS(SND_CNT)];
        unsigned long sw[BITS_TO_LONGS(SW_CNT)];

        int (*open)(struct input_dev *dev);
        void (*close)(struct input_dev *dev);
        int (*flush)(struct input_dev *dev, struct file *file);
        int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value);

        struct input_handle __rcu *grab;

        spinlock_t event_lock;
        struct mutex mutex;

        unsigned int users;
        bool going_away;

        struct device dev;

        struct list_head        h_list;
        struct list_head        node;

        unsigned int num_vals;
        unsigned int max_vals;
        struct input_value *vals;

        bool devres_managed;

        ktime_t timestamp[INPUT_CLK_MAX];

        bool inhibited;
};
#define to_input_dev(d) container_of(d, struct input_dev, dev)

/*
 * Verify that we are in sync with input_device_id mod_devicetable.h #defines
 */

#if EV_MAX != INPUT_DEVICE_ID_EV_MAX
#error "EV_MAX and INPUT_DEVICE_ID_EV_MAX do not match"
#endif

#if KEY_MIN_INTERESTING != INPUT_DEVICE_ID_KEY_MIN_INTERESTING
#error "KEY_MIN_INTERESTING and INPUT_DEVICE_ID_KEY_MIN_INTERESTING do not match"
#endif

#if KEY_MAX != INPUT_DEVICE_ID_KEY_MAX
#error "KEY_MAX and INPUT_DEVICE_ID_KEY_MAX do not match"
#endif

#if REL_MAX != INPUT_DEVICE_ID_REL_MAX
#error "REL_MAX and INPUT_DEVICE_ID_REL_MAX do not match"
#endif

#if ABS_MAX != INPUT_DEVICE_ID_ABS_MAX
#error "ABS_MAX and INPUT_DEVICE_ID_ABS_MAX do not match"
#endif

#if MSC_MAX != INPUT_DEVICE_ID_MSC_MAX
#error "MSC_MAX and INPUT_DEVICE_ID_MSC_MAX do not match"
#endif

#if LED_MAX != INPUT_DEVICE_ID_LED_MAX
#error "LED_MAX and INPUT_DEVICE_ID_LED_MAX do not match"
#endif

#if SND_MAX != INPUT_DEVICE_ID_SND_MAX
#error "SND_MAX and INPUT_DEVICE_ID_SND_MAX do not match"
#endif

#if FF_MAX != INPUT_DEVICE_ID_FF_MAX
#error "FF_MAX and INPUT_DEVICE_ID_FF_MAX do not match"
#endif

#if SW_MAX != INPUT_DEVICE_ID_SW_MAX
#error "SW_MAX and INPUT_DEVICE_ID_SW_MAX do not match"
#endif

#if INPUT_PROP_MAX != INPUT_DEVICE_ID_PROP_MAX
#error "INPUT_PROP_MAX and INPUT_DEVICE_ID_PROP_MAX do not match"
#endif

#define INPUT_DEVICE_ID_MATCH_DEVICE \
        (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT)
#define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION \
        (INPUT_DEVICE_ID_MATCH_DEVICE | INPUT_DEVICE_ID_MATCH_VERSION)

struct input_handle;

/**
 * struct input_handler - implements one of interfaces for input devices
 * @private: driver-specific data
 * @event: event handler. This method is being called by input core with
 *        interrupts disabled and dev->event_lock spinlock held and so
 *        it may not sleep
 * @events: event sequence handler. This method is being called by
 *        input core with interrupts disabled and dev->event_lock
 *        spinlock held and so it may not sleep
 * @filter: similar to @event; separates normal event handlers from
 *        "filters".
 * @match: called after comparing device's id with handler's id_table
 *        to perform fine-grained matching between device and handler
 * @connect: called when attaching a handler to an input device
 * @disconnect: disconnects a handler from input device
 * @start: starts handler for given handle. This function is called by
 *        input core right after connect() method and also when a process
 *        that "grabbed" a device releases it
 * @legacy_minors: set to %true by drivers using legacy minor ranges
 * @minor: beginning of range of 32 legacy minors for devices this driver
 *        can provide
 * @name: name of the handler, to be shown in /proc/bus/input/handlers
 * @id_table: pointer to a table of input_device_ids this driver can
 *        handle
 * @h_list: list of input handles associated with the handler
 * @node: for placing the driver onto input_handler_list
 *
 * Input handlers attach to input devices and create input handles. There
 * are likely several handlers attached to any given input device at the
 * same time. All of them will get their copy of input event generated by
 * the device.
 *
 * The very same structure is used to implement input filters. Input core
 * allows filters to run first and will not pass event to regular handlers
 * if any of the filters indicate that the event should be filtered (by
 * returning %true from their filter() method).
 *
 * Note that input core serializes calls to connect() and disconnect()
 * methods.
 */
struct input_handler {

        void *private;

        void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
        void (*events)(struct input_handle *handle,
                       const struct input_value *vals, unsigned int count);
        bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
        bool (*match)(struct input_handler *handler, struct input_dev *dev);
        int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id);
        void (*disconnect)(struct input_handle *handle);
        void (*start)(struct input_handle *handle);

        bool legacy_minors;
        int minor;
        const char *name;

        const struct input_device_id *id_table;

        struct list_head        h_list;
        struct list_head        node;
};

/**
 * struct input_handle - links input device with an input handler
 * @private: handler-specific data
 * @open: counter showing whether the handle is 'open', i.e. should deliver
 *        events from its device
 * @name: name given to the handle by handler that created it
 * @dev: input device the handle is attached to
 * @handler: handler that works with the device through this handle
 * @d_node: used to put the handle on device's list of attached handles
 * @h_node: used to put the handle on handler's list of handles from which
 *        it gets events
 */
struct input_handle {

        void *private;

        int open;
        const char *name;

        struct input_dev *dev;
        struct input_handler *handler;

        struct list_head        d_node;
        struct list_head        h_node;
};

struct input_dev __must_check *input_allocate_device(void);
struct input_dev __must_check *devm_input_allocate_device(struct device *);
void input_free_device(struct input_dev *dev);

static inline struct input_dev *input_get_device(struct input_dev *dev)
{
        return dev ? to_input_dev(get_device(&dev->dev)) : NULL;
}

static inline void input_put_device(struct input_dev *dev)
{
        if (dev)
                put_device(&dev->dev);
}

static inline void *input_get_drvdata(struct input_dev *dev)
{
        return dev_get_drvdata(&dev->dev);
}

static inline void input_set_drvdata(struct input_dev *dev, void *data)
{
        dev_set_drvdata(&dev->dev, data);
}

int __must_check input_register_device(struct input_dev *);
void input_unregister_device(struct input_dev *);

void input_reset_device(struct input_dev *);

int input_setup_polling(struct input_dev *dev,
                        void (*poll_fn)(struct input_dev *dev));
void input_set_poll_interval(struct input_dev *dev, unsigned int interval);
void input_set_min_poll_interval(struct input_dev *dev, unsigned int interval);
void input_set_max_poll_interval(struct input_dev *dev, unsigned int interval);
int input_get_poll_interval(struct input_dev *dev);

int __must_check input_register_handler(struct input_handler *);
void input_unregister_handler(struct input_handler *);

int __must_check input_get_new_minor(int legacy_base, unsigned int legacy_num,
                                     bool allow_dynamic);
void input_free_minor(unsigned int minor);

int input_handler_for_each_handle(struct input_handler *, void *data,
                                  int (*fn)(struct input_handle *, void *));

int input_register_handle(struct input_handle *);
void input_unregister_handle(struct input_handle *);

int input_grab_device(struct input_handle *);
void input_release_device(struct input_handle *);

int input_open_device(struct input_handle *);
void input_close_device(struct input_handle *);

int input_flush_device(struct input_handle *handle, struct file *file);

void input_set_timestamp(struct input_dev *dev, ktime_t timestamp);
ktime_t *input_get_timestamp(struct input_dev *dev);

void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);
void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value);

static inline void input_report_key(struct input_dev *dev, unsigned int code, int value)
{
        input_event(dev, EV_KEY, code, !!value);
}

static inline void input_report_rel(struct input_dev *dev, unsigned int code, int value)
{
        input_event(dev, EV_REL, code, value);
}

static inline void input_report_abs(struct input_dev *dev, unsigned int code, int value)
{
        input_event(dev, EV_ABS, code, value);
}

static inline void input_report_ff_status(struct input_dev *dev, unsigned int code, int value)
{
        input_event(dev, EV_FF_STATUS, code, value);
}

static inline void input_report_switch(struct input_dev *dev, unsigned int code, int value)
{
        input_event(dev, EV_SW, code, !!value);
}

static inline void input_sync(struct input_dev *dev)
{
        input_event(dev, EV_SYN, SYN_REPORT, 0);
}

static inline void input_mt_sync(struct input_dev *dev)
{
        input_event(dev, EV_SYN, SYN_MT_REPORT, 0);
}

void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code);

/**
 * input_set_events_per_packet - tell handlers about the driver event rate
 * @dev: the input device used by the driver
 * @n_events: the average number of events between calls to input_sync()
 *
 * If the event rate sent from a device is unusually large, use this
 * function to set the expected event rate. This will allow handlers
 * to set up an appropriate buffer size for the event stream, in order
 * to minimize information loss.
 */
static inline void input_set_events_per_packet(struct input_dev *dev, int n_events)
{
        dev->hint_events_per_packet = n_events;
}

void input_alloc_absinfo(struct input_dev *dev);
void input_set_abs_params(struct input_dev *dev, unsigned int axis,
                          int min, int max, int fuzz, int flat);
void input_copy_abs(struct input_dev *dst, unsigned int dst_axis,
                    const struct input_dev *src, unsigned int src_axis);

#define INPUT_GENERATE_ABS_ACCESSORS(_suffix, _item)                        \
static inline int input_abs_get_##_suffix(struct input_dev *dev,        \
                                          unsigned int axis)                \
{                                                                        \
        return dev->absinfo ? dev->absinfo[axis]._item : 0;                \
}                                                                        \
                                                                        \
static inline void input_abs_set_##_suffix(struct input_dev *dev,        \
                                           unsigned int axis, int val)        \
{                                                                        \
        input_alloc_absinfo(dev);                                        \
        if (dev->absinfo)                                                \
                dev->absinfo[axis]._item = val;                                \
}

INPUT_GENERATE_ABS_ACCESSORS(val, value)
INPUT_GENERATE_ABS_ACCESSORS(min, minimum)
INPUT_GENERATE_ABS_ACCESSORS(max, maximum)
INPUT_GENERATE_ABS_ACCESSORS(fuzz, fuzz)
INPUT_GENERATE_ABS_ACCESSORS(flat, flat)
INPUT_GENERATE_ABS_ACCESSORS(res, resolution)

int input_scancode_to_scalar(const struct input_keymap_entry *ke,
                             unsigned int *scancode);

int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke);
int input_set_keycode(struct input_dev *dev,
                      const struct input_keymap_entry *ke);

bool input_match_device_id(const struct input_dev *dev,
                           const struct input_device_id *id);

void input_enable_softrepeat(struct input_dev *dev, int delay, int period);

bool input_device_enabled(struct input_dev *dev);

extern const struct class input_class;

/**
 * struct ff_device - force-feedback part of an input device
 * @upload: Called to upload an new effect into device
 * @erase: Called to erase an effect from device
 * @playback: Called to request device to start playing specified effect
 * @set_gain: Called to set specified gain
 * @set_autocenter: Called to auto-center device
 * @destroy: called by input core when parent input device is being
 *        destroyed
 * @private: driver-specific data, will be freed automatically
 * @ffbit: bitmap of force feedback capabilities truly supported by
 *        device (not emulated like ones in input_dev->ffbit)
 * @mutex: mutex for serializing access to the device
 * @max_effects: maximum number of effects supported by device
 * @effects: pointer to an array of effects currently loaded into device
 * @effect_owners: array of effect owners; when file handle owning
 *        an effect gets closed the effect is automatically erased
 *
 * Every force-feedback device must implement upload() and playback()
 * methods; erase() is optional. set_gain() and set_autocenter() need
 * only be implemented if driver sets up FF_GAIN and FF_AUTOCENTER
 * bits.
 *
 * Note that playback(), set_gain() and set_autocenter() are called with
 * dev->event_lock spinlock held and interrupts off and thus may not
 * sleep.
 */
struct ff_device {
        int (*upload)(struct input_dev *dev, struct ff_effect *effect,
                      struct ff_effect *old);
        int (*erase)(struct input_dev *dev, int effect_id);

        int (*playback)(struct input_dev *dev, int effect_id, int value);
        void (*set_gain)(struct input_dev *dev, u16 gain);
        void (*set_autocenter)(struct input_dev *dev, u16 magnitude);

        void (*destroy)(struct ff_device *);

        void *private;

        unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];

        struct mutex mutex;

        int max_effects;
        struct ff_effect *effects;
        struct file *effect_owners[] __counted_by(max_effects);
};

int input_ff_create(struct input_dev *dev, unsigned int max_effects);
void input_ff_destroy(struct input_dev *dev);

int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);

int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file);
int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_flush(struct input_dev *dev, struct file *file);

int input_ff_create_memless(struct input_dev *dev, void *data,
                int (*play_effect)(struct input_dev *, void *, struct ff_effect *));

#endif


























































































































































































































































































































































































































































































































































































































































































































   38 































































































































































































































































































































































































































































    5 












































    1 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2001 Vojtech Pavlik
 *  Copyright (c) 2006-2007 Jiri Kosina
 */
/*
 *
 * Should you need to contact me, the author, you can do so either by
 * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
 * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
 */
#ifndef __HID_H
#define __HID_H


#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/mod_devicetable.h> /* hid_device_id */
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/input.h>
#include <linux/semaphore.h>
#include <linux/mutex.h>
#include <linux/power_supply.h>
#include <uapi/linux/hid.h>
#include <linux/hid_bpf.h>

/*
 * We parse each description item into this structure. Short items data
 * values are expanded to 32-bit signed int, long items contain a pointer
 * into the data area.
 */

struct hid_item {
        unsigned  format;
        __u8      size;
        __u8      type;
        __u8      tag;
        union {
            __u8   u8;
            __s8   s8;
            __u16  u16;
            __s16  s16;
            __u32  u32;
            __s32  s32;
            __u8  *longdata;
        } data;
};

/*
 * HID report item format
 */

#define HID_ITEM_FORMAT_SHORT        0
#define HID_ITEM_FORMAT_LONG        1

/*
 * Special tag indicating long items
 */

#define HID_ITEM_TAG_LONG        15

/*
 * HID report descriptor item type (prefix bit 2,3)
 */

#define HID_ITEM_TYPE_MAIN                0
#define HID_ITEM_TYPE_GLOBAL                1
#define HID_ITEM_TYPE_LOCAL                2
#define HID_ITEM_TYPE_RESERVED                3

/*
 * HID report descriptor main item tags
 */

#define HID_MAIN_ITEM_TAG_INPUT                        8
#define HID_MAIN_ITEM_TAG_OUTPUT                9
#define HID_MAIN_ITEM_TAG_FEATURE                11
#define HID_MAIN_ITEM_TAG_BEGIN_COLLECTION        10
#define HID_MAIN_ITEM_TAG_END_COLLECTION        12

/*
 * HID report descriptor main item contents
 */

#define HID_MAIN_ITEM_CONSTANT                0x001
#define HID_MAIN_ITEM_VARIABLE                0x002
#define HID_MAIN_ITEM_RELATIVE                0x004
#define HID_MAIN_ITEM_WRAP                0x008
#define HID_MAIN_ITEM_NONLINEAR                0x010
#define HID_MAIN_ITEM_NO_PREFERRED        0x020
#define HID_MAIN_ITEM_NULL_STATE        0x040
#define HID_MAIN_ITEM_VOLATILE                0x080
#define HID_MAIN_ITEM_BUFFERED_BYTE        0x100

/*
 * HID report descriptor collection item types
 */

#define HID_COLLECTION_PHYSICAL                0
#define HID_COLLECTION_APPLICATION        1
#define HID_COLLECTION_LOGICAL                2
#define HID_COLLECTION_NAMED_ARRAY        4

/*
 * HID report descriptor global item tags
 */

#define HID_GLOBAL_ITEM_TAG_USAGE_PAGE                0
#define HID_GLOBAL_ITEM_TAG_LOGICAL_MINIMUM        1
#define HID_GLOBAL_ITEM_TAG_LOGICAL_MAXIMUM        2
#define HID_GLOBAL_ITEM_TAG_PHYSICAL_MINIMUM        3
#define HID_GLOBAL_ITEM_TAG_PHYSICAL_MAXIMUM        4
#define HID_GLOBAL_ITEM_TAG_UNIT_EXPONENT        5
#define HID_GLOBAL_ITEM_TAG_UNIT                6
#define HID_GLOBAL_ITEM_TAG_REPORT_SIZE                7
#define HID_GLOBAL_ITEM_TAG_REPORT_ID                8
#define HID_GLOBAL_ITEM_TAG_REPORT_COUNT        9
#define HID_GLOBAL_ITEM_TAG_PUSH                10
#define HID_GLOBAL_ITEM_TAG_POP                        11

/*
 * HID report descriptor local item tags
 */

#define HID_LOCAL_ITEM_TAG_USAGE                0
#define HID_LOCAL_ITEM_TAG_USAGE_MINIMUM        1
#define HID_LOCAL_ITEM_TAG_USAGE_MAXIMUM        2
#define HID_LOCAL_ITEM_TAG_DESIGNATOR_INDEX        3
#define HID_LOCAL_ITEM_TAG_DESIGNATOR_MINIMUM        4
#define HID_LOCAL_ITEM_TAG_DESIGNATOR_MAXIMUM        5
#define HID_LOCAL_ITEM_TAG_STRING_INDEX                7
#define HID_LOCAL_ITEM_TAG_STRING_MINIMUM        8
#define HID_LOCAL_ITEM_TAG_STRING_MAXIMUM        9
#define HID_LOCAL_ITEM_TAG_DELIMITER                10

/*
 * HID usage tables
 */

#define HID_USAGE_PAGE                0xffff0000

#define HID_UP_UNDEFINED        0x00000000
#define HID_UP_GENDESK                0x00010000
#define HID_UP_SIMULATION        0x00020000
#define HID_UP_GENDEVCTRLS        0x00060000
#define HID_UP_KEYBOARD                0x00070000
#define HID_UP_LED                0x00080000
#define HID_UP_BUTTON                0x00090000
#define HID_UP_ORDINAL                0x000a0000
#define HID_UP_TELEPHONY        0x000b0000
#define HID_UP_CONSUMER                0x000c0000
#define HID_UP_DIGITIZER        0x000d0000
#define HID_UP_PID                0x000f0000
#define HID_UP_BATTERY                0x00850000
#define HID_UP_CAMERA                0x00900000
#define HID_UP_HPVENDOR         0xff7f0000
#define HID_UP_HPVENDOR2        0xff010000
#define HID_UP_MSVENDOR                0xff000000
#define HID_UP_CUSTOM                0x00ff0000
#define HID_UP_LOGIVENDOR        0xffbc0000
#define HID_UP_LOGIVENDOR2   0xff090000
#define HID_UP_LOGIVENDOR3   0xff430000
#define HID_UP_LNVENDOR                0xffa00000
#define HID_UP_SENSOR                0x00200000
#define HID_UP_ASUSVENDOR        0xff310000
#define HID_UP_GOOGLEVENDOR        0xffd10000

#define HID_USAGE                0x0000ffff

#define HID_GD_POINTER                0x00010001
#define HID_GD_MOUSE                0x00010002
#define HID_GD_JOYSTICK                0x00010004
#define HID_GD_GAMEPAD                0x00010005
#define HID_GD_KEYBOARD                0x00010006
#define HID_GD_KEYPAD                0x00010007
#define HID_GD_MULTIAXIS        0x00010008
/*
 * Microsoft Win8 Wireless Radio Controls extensions CA, see:
 * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf
 */
#define HID_GD_WIRELESS_RADIO_CTLS        0x0001000c
/*
 * System Multi-Axis, see:
 * http://www.usb.org/developers/hidpage/HUTRR62_-_Generic_Desktop_CA_for_System_Multi-Axis_Controllers.txt
 */
#define HID_GD_SYSTEM_MULTIAXIS        0x0001000e

#define HID_GD_X                0x00010030
#define HID_GD_Y                0x00010031
#define HID_GD_Z                0x00010032
#define HID_GD_RX                0x00010033
#define HID_GD_RY                0x00010034
#define HID_GD_RZ                0x00010035
#define HID_GD_SLIDER                0x00010036
#define HID_GD_DIAL                0x00010037
#define HID_GD_WHEEL                0x00010038
#define HID_GD_HATSWITCH        0x00010039
#define HID_GD_BUFFER                0x0001003a
#define HID_GD_BYTECOUNT        0x0001003b
#define HID_GD_MOTION                0x0001003c
#define HID_GD_START                0x0001003d
#define HID_GD_SELECT                0x0001003e
#define HID_GD_VX                0x00010040
#define HID_GD_VY                0x00010041
#define HID_GD_VZ                0x00010042
#define HID_GD_VBRX                0x00010043
#define HID_GD_VBRY                0x00010044
#define HID_GD_VBRZ                0x00010045
#define HID_GD_VNO                0x00010046
#define HID_GD_FEATURE                0x00010047
#define HID_GD_RESOLUTION_MULTIPLIER        0x00010048
#define HID_GD_SYSTEM_CONTROL        0x00010080
#define HID_GD_UP                0x00010090
#define HID_GD_DOWN                0x00010091
#define HID_GD_RIGHT                0x00010092
#define HID_GD_LEFT                0x00010093
/* Microsoft Win8 Wireless Radio Controls CA usage codes */
#define HID_GD_RFKILL_BTN        0x000100c6
#define HID_GD_RFKILL_LED        0x000100c7
#define HID_GD_RFKILL_SWITCH        0x000100c8

#define HID_DC_BATTERYSTRENGTH        0x00060020

#define HID_CP_CONSUMER_CONTROL        0x000c0001
#define HID_CP_AC_PAN                0x000c0238

#define HID_DG_DIGITIZER        0x000d0001
#define HID_DG_PEN                0x000d0002
#define HID_DG_LIGHTPEN                0x000d0003
#define HID_DG_TOUCHSCREEN        0x000d0004
#define HID_DG_TOUCHPAD                0x000d0005
#define HID_DG_WHITEBOARD        0x000d0006
#define HID_DG_STYLUS                0x000d0020
#define HID_DG_PUCK                0x000d0021
#define HID_DG_FINGER                0x000d0022
#define HID_DG_TIPPRESSURE        0x000d0030
#define HID_DG_BARRELPRESSURE        0x000d0031
#define HID_DG_INRANGE                0x000d0032
#define HID_DG_TOUCH                0x000d0033
#define HID_DG_UNTOUCH                0x000d0034
#define HID_DG_TAP                0x000d0035
#define HID_DG_TRANSDUCER_INDEX        0x000d0038
#define HID_DG_TABLETFUNCTIONKEY        0x000d0039
#define HID_DG_PROGRAMCHANGEKEY        0x000d003a
#define HID_DG_BATTERYSTRENGTH        0x000d003b
#define HID_DG_INVERT                0x000d003c
#define HID_DG_TILT_X                0x000d003d
#define HID_DG_TILT_Y                0x000d003e
#define HID_DG_TWIST                0x000d0041
#define HID_DG_TIPSWITCH        0x000d0042
#define HID_DG_TIPSWITCH2        0x000d0043
#define HID_DG_BARRELSWITCH        0x000d0044
#define HID_DG_ERASER                0x000d0045
#define HID_DG_TABLETPICK        0x000d0046
#define HID_DG_PEN_COLOR                        0x000d005c
#define HID_DG_PEN_LINE_WIDTH                        0x000d005e
#define HID_DG_PEN_LINE_STYLE                        0x000d0070
#define HID_DG_PEN_LINE_STYLE_INK                0x000d0072
#define HID_DG_PEN_LINE_STYLE_PENCIL                0x000d0073
#define HID_DG_PEN_LINE_STYLE_HIGHLIGHTER        0x000d0074
#define HID_DG_PEN_LINE_STYLE_CHISEL_MARKER        0x000d0075
#define HID_DG_PEN_LINE_STYLE_BRUSH                0x000d0076
#define HID_DG_PEN_LINE_STYLE_NO_PREFERENCE        0x000d0077

#define HID_CP_CONSUMERCONTROL        0x000c0001
#define HID_CP_NUMERICKEYPAD        0x000c0002
#define HID_CP_PROGRAMMABLEBUTTONS        0x000c0003
#define HID_CP_MICROPHONE        0x000c0004
#define HID_CP_HEADPHONE        0x000c0005
#define HID_CP_GRAPHICEQUALIZER        0x000c0006
#define HID_CP_FUNCTIONBUTTONS        0x000c0036
#define HID_CP_SELECTION        0x000c0080
#define HID_CP_MEDIASELECTION        0x000c0087
#define HID_CP_SELECTDISC        0x000c00ba
#define HID_CP_VOLUMEUP                0x000c00e9
#define HID_CP_VOLUMEDOWN        0x000c00ea
#define HID_CP_PLAYBACKSPEED        0x000c00f1
#define HID_CP_PROXIMITY        0x000c0109
#define HID_CP_SPEAKERSYSTEM        0x000c0160
#define HID_CP_CHANNELLEFT        0x000c0161
#define HID_CP_CHANNELRIGHT        0x000c0162
#define HID_CP_CHANNELCENTER        0x000c0163
#define HID_CP_CHANNELFRONT        0x000c0164
#define HID_CP_CHANNELCENTERFRONT        0x000c0165
#define HID_CP_CHANNELSIDE        0x000c0166
#define HID_CP_CHANNELSURROUND        0x000c0167
#define HID_CP_CHANNELLOWFREQUENCYENHANCEMENT        0x000c0168
#define HID_CP_CHANNELTOP        0x000c0169
#define HID_CP_CHANNELUNKNOWN        0x000c016a
#define HID_CP_APPLICATIONLAUNCHBUTTONS        0x000c0180
#define HID_CP_GENERICGUIAPPLICATIONCONTROLS        0x000c0200

#define HID_DG_DEVICECONFIG        0x000d000e
#define HID_DG_DEVICESETTINGS        0x000d0023
#define HID_DG_AZIMUTH                0x000d003f
#define HID_DG_CONFIDENCE        0x000d0047
#define HID_DG_WIDTH                0x000d0048
#define HID_DG_HEIGHT                0x000d0049
#define HID_DG_CONTACTID        0x000d0051
#define HID_DG_INPUTMODE        0x000d0052
#define HID_DG_DEVICEINDEX        0x000d0053
#define HID_DG_CONTACTCOUNT        0x000d0054
#define HID_DG_CONTACTMAX        0x000d0055
#define HID_DG_SCANTIME                0x000d0056
#define HID_DG_SURFACESWITCH        0x000d0057
#define HID_DG_BUTTONSWITCH        0x000d0058
#define HID_DG_BUTTONTYPE        0x000d0059
#define HID_DG_BARRELSWITCH2        0x000d005a
#define HID_DG_TOOLSERIALNUMBER        0x000d005b
#define HID_DG_LATENCYMODE        0x000d0060

#define HID_BAT_ABSOLUTESTATEOFCHARGE        0x00850065
#define HID_BAT_CHARGING                0x00850044

#define HID_VD_ASUS_CUSTOM_MEDIA_KEYS        0xff310076

/*
 * HID connect requests
 */

#define HID_CONNECT_HIDINPUT                BIT(0)
#define HID_CONNECT_HIDINPUT_FORCE        BIT(1)
#define HID_CONNECT_HIDRAW                BIT(2)
#define HID_CONNECT_HIDDEV                BIT(3)
#define HID_CONNECT_HIDDEV_FORCE        BIT(4)
#define HID_CONNECT_FF                        BIT(5)
#define HID_CONNECT_DRIVER                BIT(6)
#define HID_CONNECT_DEFAULT        (HID_CONNECT_HIDINPUT|HID_CONNECT_HIDRAW| \
                HID_CONNECT_HIDDEV|HID_CONNECT_FF)

/*
 * HID device quirks.
 */

/*
 * Increase this if you need to configure more HID quirks at module load time
 */
#define MAX_USBHID_BOOT_QUIRKS 4

/**
 * DOC: HID quirks
 * | @HID_QUIRK_NOTOUCH:
 * | @HID_QUIRK_IGNORE: ignore this device
 * | @HID_QUIRK_NOGET:
 * | @HID_QUIRK_HIDDEV_FORCE:
 * | @HID_QUIRK_BADPAD:
 * | @HID_QUIRK_MULTI_INPUT:
 * | @HID_QUIRK_HIDINPUT_FORCE:
 * | @HID_QUIRK_ALWAYS_POLL:
 * | @HID_QUIRK_INPUT_PER_APP:
 * | @HID_QUIRK_X_INVERT:
 * | @HID_QUIRK_Y_INVERT:
 * | @HID_QUIRK_SKIP_OUTPUT_REPORTS:
 * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID:
 * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP:
 * | @HID_QUIRK_HAVE_SPECIAL_DRIVER:
 * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE:
 * | @HID_QUIRK_FULLSPEED_INTERVAL:
 * | @HID_QUIRK_NO_INIT_REPORTS:
 * | @HID_QUIRK_NO_IGNORE:
 * | @HID_QUIRK_NO_INPUT_SYNC:
 */
/* BIT(0) reserved for backward compatibility, was HID_QUIRK_INVERT */
#define HID_QUIRK_NOTOUCH                        BIT(1)
#define HID_QUIRK_IGNORE                        BIT(2)
#define HID_QUIRK_NOGET                                BIT(3)
#define HID_QUIRK_HIDDEV_FORCE                        BIT(4)
#define HID_QUIRK_BADPAD                        BIT(5)
#define HID_QUIRK_MULTI_INPUT                        BIT(6)
#define HID_QUIRK_HIDINPUT_FORCE                BIT(7)
/* BIT(8) reserved for backward compatibility, was HID_QUIRK_NO_EMPTY_INPUT */
/* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */
#define HID_QUIRK_ALWAYS_POLL                        BIT(10)
#define HID_QUIRK_INPUT_PER_APP                        BIT(11)
#define HID_QUIRK_X_INVERT                        BIT(12)
#define HID_QUIRK_Y_INVERT                        BIT(13)
#define HID_QUIRK_SKIP_OUTPUT_REPORTS                BIT(16)
#define HID_QUIRK_SKIP_OUTPUT_REPORT_ID                BIT(17)
#define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP        BIT(18)
#define HID_QUIRK_HAVE_SPECIAL_DRIVER                BIT(19)
#define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE        BIT(20)
#define HID_QUIRK_NOINVERT                        BIT(21)
#define HID_QUIRK_FULLSPEED_INTERVAL                BIT(28)
#define HID_QUIRK_NO_INIT_REPORTS                BIT(29)
#define HID_QUIRK_NO_IGNORE                        BIT(30)
#define HID_QUIRK_NO_INPUT_SYNC                        BIT(31)

/*
 * HID device groups
 *
 * Note: HID_GROUP_ANY is declared in linux/mod_devicetable.h
 * and has a value of 0x0000
 */
#define HID_GROUP_GENERIC                        0x0001
#define HID_GROUP_MULTITOUCH                        0x0002
#define HID_GROUP_SENSOR_HUB                        0x0003
#define HID_GROUP_MULTITOUCH_WIN_8                0x0004

/*
 * Vendor specific HID device groups
 */
#define HID_GROUP_RMI                                0x0100
#define HID_GROUP_WACOM                                0x0101
#define HID_GROUP_LOGITECH_DJ_DEVICE                0x0102
#define HID_GROUP_STEAM                                0x0103
#define HID_GROUP_LOGITECH_27MHZ_DEVICE                0x0104
#define HID_GROUP_VIVALDI                        0x0105

/*
 * HID protocol status
 */
#define HID_REPORT_PROTOCOL        1
#define HID_BOOT_PROTOCOL        0

/*
 * This is the global environment of the parser. This information is
 * persistent for main-items. The global environment can be saved and
 * restored with PUSH/POP statements.
 */

struct hid_global {
        unsigned usage_page;
        __s32    logical_minimum;
        __s32    logical_maximum;
        __s32    physical_minimum;
        __s32    physical_maximum;
        __s32    unit_exponent;
        unsigned unit;
        unsigned report_id;
        unsigned report_size;
        unsigned report_count;
};

/*
 * This is the local environment. It is persistent up the next main-item.
 */

#define HID_MAX_USAGES                        12288
#define HID_DEFAULT_NUM_COLLECTIONS        16

struct hid_local {
        unsigned usage[HID_MAX_USAGES]; /* usage array */
        u8 usage_size[HID_MAX_USAGES]; /* usage size array */
        unsigned collection_index[HID_MAX_USAGES]; /* collection index array */
        unsigned usage_index;
        unsigned usage_minimum;
        unsigned delimiter_depth;
        unsigned delimiter_branch;
};

/*
 * This is the collection stack. We climb up the stack to determine
 * application and function of each field.
 */

struct hid_collection {
        int parent_idx; /* device->collection */
        unsigned type;
        unsigned usage;
        unsigned level;
};

struct hid_usage {
        unsigned  hid;                        /* hid usage code */
        unsigned  collection_index;        /* index into collection array */
        unsigned  usage_index;                /* index into usage array */
        __s8          resolution_multiplier;/* Effective Resolution Multiplier
                                           (HUT v1.12, 4.3.1), default: 1 */
        /* hidinput data */
        __s8          wheel_factor;                /* 120/resolution_multiplier */
        __u16     code;                        /* input driver code */
        __u8      type;                        /* input driver type */
        __s8          hat_min;                /* hat switch fun */
        __s8          hat_max;                /* ditto */
        __s8          hat_dir;                /* ditto */
        __s16          wheel_accumulated;        /* hi-res wheel */
};

struct hid_input;

struct hid_field {
        unsigned  physical;                /* physical usage for this field */
        unsigned  logical;                /* logical usage for this field */
        unsigned  application;                /* application usage for this field */
        struct hid_usage *usage;        /* usage table for this function */
        unsigned  maxusage;                /* maximum usage index */
        unsigned  flags;                /* main-item flags (i.e. volatile,array,constant) */
        unsigned  report_offset;        /* bit offset in the report */
        unsigned  report_size;                /* size of this field in the report */
        unsigned  report_count;                /* number of this field in the report */
        unsigned  report_type;                /* (input,output,feature) */
        __s32    *value;                /* last known value(s) */
        __s32    *new_value;                /* newly read value(s) */
        __s32    *usages_priorities;        /* priority of each usage when reading the report
                                         * bits 8-16 are reserved for hid-input usage
                                         */
        __s32     logical_minimum;
        __s32     logical_maximum;
        __s32     physical_minimum;
        __s32     physical_maximum;
        __s32     unit_exponent;
        unsigned  unit;
        bool      ignored;                /* this field is ignored in this event */
        struct hid_report *report;        /* associated report */
        unsigned index;                        /* index into report->field[] */
        /* hidinput data */
        struct hid_input *hidinput;        /* associated input structure */
        __u16 dpad;                        /* dpad input code */
        unsigned int slot_idx;                /* slot index in a report */
};

#define HID_MAX_FIELDS 256

struct hid_field_entry {
        struct list_head list;
        struct hid_field *field;
        unsigned int index;
        __s32 priority;
};

struct hid_report {
        struct list_head list;
        struct list_head hidinput_list;
        struct list_head field_entry_list;                /* ordered list of input fields */
        unsigned int id;                                /* id of this report */
        enum hid_report_type type;                        /* report type */
        unsigned int application;                        /* application usage for this report */
        struct hid_field *field[HID_MAX_FIELDS];        /* fields of the report */
        struct hid_field_entry *field_entries;                /* allocated memory of input field_entry */
        unsigned maxfield;                                /* maximum valid field index */
        unsigned size;                                        /* size of the report (bits) */
        struct hid_device *device;                        /* associated device */

        /* tool related state */
        bool tool_active;                                /* whether the current tool is active */
        unsigned int tool;                                /* BTN_TOOL_* */
};

#define HID_MAX_IDS 256

struct hid_report_enum {
        unsigned numbered;
        struct list_head report_list;
        struct hid_report *report_id_hash[HID_MAX_IDS];
};

#define HID_MIN_BUFFER_SIZE        64                /* make sure there is at least a packet size of space */
#define HID_MAX_BUFFER_SIZE        16384                /* 16kb */
#define HID_CONTROL_FIFO_SIZE        256                /* to init devices with >100 reports */
#define HID_OUTPUT_FIFO_SIZE        64

struct hid_control_fifo {
        unsigned char dir;
        struct hid_report *report;
        char *raw_report;
};

struct hid_output_fifo {
        struct hid_report *report;
        char *raw_report;
};

#define HID_CLAIMED_INPUT        BIT(0)
#define HID_CLAIMED_HIDDEV        BIT(1)
#define HID_CLAIMED_HIDRAW        BIT(2)
#define HID_CLAIMED_DRIVER        BIT(3)

#define HID_STAT_ADDED                BIT(0)
#define HID_STAT_PARSED                BIT(1)
#define HID_STAT_DUP_DETECTED        BIT(2)
#define HID_STAT_REPROBED        BIT(3)

struct hid_input {
        struct list_head list;
        struct hid_report *report;
        struct input_dev *input;
        const char *name;
        struct list_head reports;        /* the list of reports */
        unsigned int application;        /* application usage for this input */
        bool registered;
};

enum hid_type {
        HID_TYPE_OTHER = 0,
        HID_TYPE_USBMOUSE,
        HID_TYPE_USBNONE
};

enum hid_battery_status {
        HID_BATTERY_UNKNOWN = 0,
        HID_BATTERY_QUERIED,                /* Kernel explicitly queried battery strength */
        HID_BATTERY_REPORTED,                /* Device sent unsolicited battery strength report */
};

struct hid_driver;
struct hid_ll_driver;

struct hid_device {                                                        /* device report descriptor */
        __u8 *dev_rdesc;
        unsigned dev_rsize;
        __u8 *rdesc;
        unsigned rsize;
        struct hid_collection *collection;                                /* List of HID collections */
        unsigned collection_size;                                        /* Number of allocated hid_collections */
        unsigned maxcollection;                                                /* Number of parsed collections */
        unsigned maxapplication;                                        /* Number of applications */
        __u16 bus;                                                        /* BUS ID */
        __u16 group;                                                        /* Report group */
        __u32 vendor;                                                        /* Vendor ID */
        __u32 product;                                                        /* Product ID */
        __u32 version;                                                        /* HID version */
        enum hid_type type;                                                /* device type (mouse, kbd, ...) */
        unsigned country;                                                /* HID country */
        struct hid_report_enum report_enum[HID_REPORT_TYPES];
        struct work_struct led_work;                                        /* delayed LED worker */

        struct semaphore driver_input_lock;                                /* protects the current driver */
        struct device dev;                                                /* device */
        struct hid_driver *driver;
        void *devres_group_id;                                                /* ID of probe devres group        */

        const struct hid_ll_driver *ll_driver;
        struct mutex ll_open_lock;
        unsigned int ll_open_count;

#ifdef CONFIG_HID_BATTERY_STRENGTH
        /*
         * Power supply information for HID devices which report
         * battery strength. power_supply was successfully registered if
         * battery is non-NULL.
         */
        struct power_supply *battery;
        __s32 battery_capacity;
        __s32 battery_min;
        __s32 battery_max;
        __s32 battery_report_type;
        __s32 battery_report_id;
        __s32 battery_charge_status;
        enum hid_battery_status battery_status;
        bool battery_avoid_query;
        ktime_t battery_ratelimit_time;
#endif

        unsigned long status;                                                /* see STAT flags above */
        unsigned claimed;                                                /* Claimed by hidinput, hiddev? */
        unsigned quirks;                                                /* Various quirks the device can pull on us */
        unsigned initial_quirks;                                        /* Initial set of quirks supplied when creating device */
        bool io_started;                                                /* If IO has started */

        struct list_head inputs;                                        /* The list of inputs */
        void *hiddev;                                                        /* The hiddev structure */
        void *hidraw;

        char name[128];                                                        /* Device name */
        char phys[64];                                                        /* Device physical location */
        char uniq[64];                                                        /* Device unique identifier (serial #) */

        void *driver_data;

        /* temporary hid_ff handling (until moved to the drivers) */
        int (*ff_init)(struct hid_device *);

        /* hiddev event handler */
        int (*hiddev_connect)(struct hid_device *, unsigned int);
        void (*hiddev_disconnect)(struct hid_device *);
        void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
                                  struct hid_usage *, __s32);
        void (*hiddev_report_event) (struct hid_device *, struct hid_report *);

        /* debugging support via debugfs */
        unsigned short debug;
        struct dentry *debug_dir;
        struct dentry *debug_rdesc;
        struct dentry *debug_events;
        struct list_head debug_list;
        spinlock_t  debug_list_lock;
        wait_queue_head_t debug_wait;
        struct kref                        ref;

        unsigned int id;                                                /* system unique id */

#ifdef CONFIG_HID_BPF
        struct hid_bpf bpf;                                                /* hid-bpf data */
#endif /* CONFIG_HID_BPF */
};

void hiddev_free(struct kref *ref);

#define to_hid_device(pdev) \
        container_of(pdev, struct hid_device, dev)

static inline void *hid_get_drvdata(struct hid_device *hdev)
{
        return dev_get_drvdata(&hdev->dev);
}

static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
{
        dev_set_drvdata(&hdev->dev, data);
}

#define HID_GLOBAL_STACK_SIZE 4
#define HID_COLLECTION_STACK_SIZE 4

#define HID_SCAN_FLAG_MT_WIN_8                        BIT(0)
#define HID_SCAN_FLAG_VENDOR_SPECIFIC                BIT(1)
#define HID_SCAN_FLAG_GD_POINTER                BIT(2)

struct hid_parser {
        struct hid_global     global;
        struct hid_global     global_stack[HID_GLOBAL_STACK_SIZE];
        unsigned int          global_stack_ptr;
        struct hid_local      local;
        unsigned int         *collection_stack;
        unsigned int          collection_stack_ptr;
        unsigned int          collection_stack_size;
        struct hid_device    *device;
        unsigned int          scan_flags;
};

struct hid_class_descriptor {
        __u8  bDescriptorType;
        __le16 wDescriptorLength;
} __attribute__ ((packed));

struct hid_descriptor {
        __u8  bLength;
        __u8  bDescriptorType;
        __le16 bcdHID;
        __u8  bCountryCode;
        __u8  bNumDescriptors;

        struct hid_class_descriptor desc[1];
} __attribute__ ((packed));

#define HID_DEVICE(b, g, ven, prod)                                        \
        .bus = (b), .group = (g), .vendor = (ven), .product = (prod)
#define HID_USB_DEVICE(ven, prod)                                \
        .bus = BUS_USB, .vendor = (ven), .product = (prod)
#define HID_BLUETOOTH_DEVICE(ven, prod)                                        \
        .bus = BUS_BLUETOOTH, .vendor = (ven), .product = (prod)
#define HID_I2C_DEVICE(ven, prod)                                \
        .bus = BUS_I2C, .vendor = (ven), .product = (prod)

#define HID_REPORT_ID(rep) \
        .report_type = (rep)
#define HID_USAGE_ID(uhid, utype, ucode) \
        .usage_hid = (uhid), .usage_type = (utype), .usage_code = (ucode)
/* we don't want to catch types and codes equal to 0 */
#define HID_TERMINATOR                (HID_ANY_ID - 1)

struct hid_report_id {
        __u32 report_type;
};
struct hid_usage_id {
        __u32 usage_hid;
        __u32 usage_type;
        __u32 usage_code;
};

/**
 * struct hid_driver
 * @name: driver name (e.g. "Footech_bar-wheel")
 * @id_table: which devices is this driver for (must be non-NULL for probe
 *               to be called)
 * @dyn_list: list of dynamically added device ids
 * @dyn_lock: lock protecting @dyn_list
 * @match: check if the given device is handled by this driver
 * @probe: new device inserted
 * @remove: device removed (NULL if not a hot-plug capable driver)
 * @report_table: on which reports to call raw_event (NULL means all)
 * @raw_event: if report in report_table, this hook is called (NULL means nop)
 * @usage_table: on which events to call event (NULL means all)
 * @event: if usage in usage_table, this hook is called (NULL means nop)
 * @report: this hook is called after parsing a report (NULL means nop)
 * @report_fixup: called before report descriptor parsing (NULL means nop)
 * @input_mapping: invoked on input registering before mapping an usage
 * @input_mapped: invoked on input registering after mapping an usage
 * @input_configured: invoked just before the device is registered
 * @feature_mapping: invoked on feature registering
 * @suspend: invoked on suspend (NULL means nop)
 * @resume: invoked on resume if device was not reset (NULL means nop)
 * @reset_resume: invoked on resume if device was reset (NULL means nop)
 *
 * probe should return -errno on error, or 0 on success. During probe,
 * input will not be passed to raw_event unless hid_device_io_start is
 * called.
 *
 * raw_event and event should return negative on error, any other value will
 * pass the event on to .event() typically return 0 for success.
 *
 * input_mapping shall return a negative value to completely ignore this usage
 * (e.g. doubled or invalid usage), zero to continue with parsing of this
 * usage by generic code (no special handling needed) or positive to skip
 * generic parsing (needed special handling which was done in the hook already)
 * input_mapped shall return negative to inform the layer that this usage
 * should not be considered for further processing or zero to notify that
 * no processing was performed and should be done in a generic manner
 * Both these functions may be NULL which means the same behavior as returning
 * zero from them.
 */
struct hid_driver {
        char *name;
        const struct hid_device_id *id_table;

        struct list_head dyn_list;
        spinlock_t dyn_lock;

        bool (*match)(struct hid_device *dev, bool ignore_special_driver);
        int (*probe)(struct hid_device *dev, const struct hid_device_id *id);
        void (*remove)(struct hid_device *dev);

        const struct hid_report_id *report_table;
        int (*raw_event)(struct hid_device *hdev, struct hid_report *report,
                        u8 *data, int size);
        const struct hid_usage_id *usage_table;
        int (*event)(struct hid_device *hdev, struct hid_field *field,
                        struct hid_usage *usage, __s32 value);
        void (*report)(struct hid_device *hdev, struct hid_report *report);

        __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
                        unsigned int *size);

        int (*input_mapping)(struct hid_device *hdev,
                        struct hid_input *hidinput, struct hid_field *field,
                        struct hid_usage *usage, unsigned long **bit, int *max);
        int (*input_mapped)(struct hid_device *hdev,
                        struct hid_input *hidinput, struct hid_field *field,
                        struct hid_usage *usage, unsigned long **bit, int *max);
        int (*input_configured)(struct hid_device *hdev,
                                struct hid_input *hidinput);
        void (*feature_mapping)(struct hid_device *hdev,
                        struct hid_field *field,
                        struct hid_usage *usage);

        int (*suspend)(struct hid_device *hdev, pm_message_t message);
        int (*resume)(struct hid_device *hdev);
        int (*reset_resume)(struct hid_device *hdev);

/* private: */
        struct device_driver driver;
};

#define to_hid_driver(pdrv) \
        container_of(pdrv, struct hid_driver, driver)

/**
 * struct hid_ll_driver - low level driver callbacks
 * @start: called on probe to start the device
 * @stop: called on remove
 * @open: called by input layer on open
 * @close: called by input layer on close
 * @power: request underlying hardware to enter requested power mode
 * @parse: this method is called only once to parse the device data,
 *           shouldn't allocate anything to not leak memory
 * @request: send report request to device (e.g. feature report)
 * @wait: wait for buffered io to complete (send/recv reports)
 * @raw_request: send raw report request to device (e.g. feature report)
 * @output_report: send output report to device
 * @idle: send idle request to device
 * @may_wakeup: return if device may act as a wakeup source during system-suspend
 * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
 */
struct hid_ll_driver {
        int (*start)(struct hid_device *hdev);
        void (*stop)(struct hid_device *hdev);

        int (*open)(struct hid_device *hdev);
        void (*close)(struct hid_device *hdev);

        int (*power)(struct hid_device *hdev, int level);

        int (*parse)(struct hid_device *hdev);

        void (*request)(struct hid_device *hdev,
                        struct hid_report *report, int reqtype);

        int (*wait)(struct hid_device *hdev);

        int (*raw_request) (struct hid_device *hdev, unsigned char reportnum,
                            __u8 *buf, size_t len, unsigned char rtype,
                            int reqtype);

        int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len);

        int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
        bool (*may_wakeup)(struct hid_device *hdev);

        unsigned int max_buffer_size;
};

extern bool hid_is_usb(const struct hid_device *hdev);

#define        PM_HINT_FULLON        1<<5
#define PM_HINT_NORMAL        1<<1

/* Applications from HID Usage Tables 4/8/99 Version 1.1 */
/* We ignore a few input applications that are not widely used */
#define IS_INPUT_APPLICATION(a) \
                (((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \
                || ((a >= HID_DG_DIGITIZER) && (a <= HID_DG_WHITEBOARD)) \
                || (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL) \
                || (a == HID_GD_WIRELESS_RADIO_CTLS))

/* HID core API */

extern bool hid_ignore(struct hid_device *);
extern int hid_add_device(struct hid_device *);
extern void hid_destroy_device(struct hid_device *);

extern const struct bus_type hid_bus_type;

extern int __must_check __hid_register_driver(struct hid_driver *,
                struct module *, const char *mod_name);

/* use a define to avoid include chaining to get THIS_MODULE & friends */
#define hid_register_driver(driver) \
        __hid_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)

extern void hid_unregister_driver(struct hid_driver *);

/**
 * module_hid_driver() - Helper macro for registering a HID driver
 * @__hid_driver: hid_driver struct
 *
 * Helper macro for HID drivers which do not do anything special in module
 * init/exit. This eliminates a lot of boilerplate. Each module may only
 * use this macro once, and calling it replaces module_init() and module_exit()
 */
#define module_hid_driver(__hid_driver) \
        module_driver(__hid_driver, hid_register_driver, \
                      hid_unregister_driver)

extern void hidinput_hid_event(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
extern void hidinput_report_event(struct hid_device *hid, struct hid_report *report);
extern int hidinput_connect(struct hid_device *hid, unsigned int force);
extern void hidinput_disconnect(struct hid_device *);

int hid_set_field(struct hid_field *, unsigned, __s32);
int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
                     int interrupt);
struct hid_field *hidinput_get_led_field(struct hid_device *hid);
unsigned int hidinput_count_leds(struct hid_device *hid);
__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code);
void hid_output_report(struct hid_report *report, __u8 *data);
int __hid_request(struct hid_device *hid, struct hid_report *rep, enum hid_class_request reqtype);
u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags);
struct hid_device *hid_allocate_device(void);
struct hid_report *hid_register_report(struct hid_device *device,
                                       enum hid_report_type type, unsigned int id,
                                       unsigned int application);
int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
struct hid_report *hid_validate_values(struct hid_device *hid,
                                       enum hid_report_type type, unsigned int id,
                                       unsigned int field_index,
                                       unsigned int report_counts);

void hid_setup_resolution_multiplier(struct hid_device *hid);
int hid_open_report(struct hid_device *device);
int hid_check_keys_pressed(struct hid_device *hid);
int hid_connect(struct hid_device *hid, unsigned int connect_mask);
void hid_disconnect(struct hid_device *hid);
bool hid_match_one_id(const struct hid_device *hdev,
                      const struct hid_device_id *id);
const struct hid_device_id *hid_match_id(const struct hid_device *hdev,
                                         const struct hid_device_id *id);
const struct hid_device_id *hid_match_device(struct hid_device *hdev,
                                             struct hid_driver *hdrv);
bool hid_compare_device_paths(struct hid_device *hdev_a,
                              struct hid_device *hdev_b, char separator);
s32 hid_snto32(__u32 value, unsigned n);
__u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
                     unsigned offset, unsigned n);

#ifdef CONFIG_PM
int hid_driver_suspend(struct hid_device *hdev, pm_message_t state);
int hid_driver_reset_resume(struct hid_device *hdev);
int hid_driver_resume(struct hid_device *hdev);
#else
static inline int hid_driver_suspend(struct hid_device *hdev, pm_message_t state) { return 0; }
static inline int hid_driver_reset_resume(struct hid_device *hdev) { return 0; }
static inline int hid_driver_resume(struct hid_device *hdev) { return 0; }
#endif

/**
 * hid_device_io_start - enable HID input during probe, remove
 *
 * @hid: the device
 *
 * This should only be called during probe or remove and only be
 * called by the thread calling probe or remove. It will allow
 * incoming packets to be delivered to the driver.
 */
static inline void hid_device_io_start(struct hid_device *hid) {
        if (hid->io_started) {
                dev_warn(&hid->dev, "io already started\n");
                return;
        }
        hid->io_started = true;
        up(&hid->driver_input_lock);
}

/**
 * hid_device_io_stop - disable HID input during probe, remove
 *
 * @hid: the device
 *
 * Should only be called after hid_device_io_start. It will prevent
 * incoming packets from going to the driver for the duration of
 * probe, remove. If called during probe, packets will still go to the
 * driver after probe is complete. This function should only be called
 * by the thread calling probe or remove.
 */
static inline void hid_device_io_stop(struct hid_device *hid) {
        if (!hid->io_started) {
                dev_warn(&hid->dev, "io already stopped\n");
                return;
        }
        hid->io_started = false;
        down(&hid->driver_input_lock);
}

/**
 * hid_map_usage - map usage input bits
 *
 * @hidinput: hidinput which we are interested in
 * @usage: usage to fill in
 * @bit: pointer to input->{}bit (out parameter)
 * @max: maximal valid usage->code to consider later (out parameter)
 * @type: input event type (EV_KEY, EV_REL, ...)
 * @c: code which corresponds to this usage and type
 *
 * The value pointed to by @bit will be set to NULL if either @type is
 * an unhandled event type, or if @c is out of range for @type. This
 * can be used as an error condition.
 */
static inline void hid_map_usage(struct hid_input *hidinput,
                struct hid_usage *usage, unsigned long **bit, int *max,
                __u8 type, unsigned int c)
{
        struct input_dev *input = hidinput->input;
        unsigned long *bmap = NULL;
        unsigned int limit = 0;

        switch (type) {
        case EV_ABS:
                bmap = input->absbit;
                limit = ABS_MAX;
                break;
        case EV_REL:
                bmap = input->relbit;
                limit = REL_MAX;
                break;
        case EV_KEY:
                bmap = input->keybit;
                limit = KEY_MAX;
                break;
        case EV_LED:
                bmap = input->ledbit;
                limit = LED_MAX;
                break;
        case EV_MSC:
                bmap = input->mscbit;
                limit = MSC_MAX;
                break;
        }

        if (unlikely(c > limit || !bmap)) {
                pr_warn_ratelimited("%s: Invalid code %d type %d\n",
                                    input->name, c, type);
                *bit = NULL;
                return;
        }

        usage->type = type;
        usage->code = c;
        *max = limit;
        *bit = bmap;
}

/**
 * hid_map_usage_clear - map usage input bits and clear the input bit
 *
 * @hidinput: hidinput which we are interested in
 * @usage: usage to fill in
 * @bit: pointer to input->{}bit (out parameter)
 * @max: maximal valid usage->code to consider later (out parameter)
 * @type: input event type (EV_KEY, EV_REL, ...)
 * @c: code which corresponds to this usage and type
 *
 * The same as hid_map_usage, except the @c bit is also cleared in supported
 * bits (@bit).
 */
static inline void hid_map_usage_clear(struct hid_input *hidinput,
                struct hid_usage *usage, unsigned long **bit, int *max,
                __u8 type, __u16 c)
{
        hid_map_usage(hidinput, usage, bit, max, type, c);
        if (*bit)
                clear_bit(usage->code, *bit);
}

/**
 * hid_parse - parse HW reports
 *
 * @hdev: hid device
 *
 * Call this from probe after you set up the device (if needed). Your
 * report_fixup will be called (if non-NULL) after reading raw report from
 * device before passing it to hid layer for real parsing.
 */
static inline int __must_check hid_parse(struct hid_device *hdev)
{
        return hid_open_report(hdev);
}

int __must_check hid_hw_start(struct hid_device *hdev,
                              unsigned int connect_mask);
void hid_hw_stop(struct hid_device *hdev);
int __must_check hid_hw_open(struct hid_device *hdev);
void hid_hw_close(struct hid_device *hdev);
void hid_hw_request(struct hid_device *hdev,
                    struct hid_report *report, enum hid_class_request reqtype);
int hid_hw_raw_request(struct hid_device *hdev,
                       unsigned char reportnum, __u8 *buf,
                       size_t len, enum hid_report_type rtype,
                       enum hid_class_request reqtype);
int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len);

/**
 * hid_hw_power - requests underlying HW to go into given power mode
 *
 * @hdev: hid device
 * @level: requested power level (one of %PM_HINT_* defines)
 *
 * This function requests underlying hardware to enter requested power
 * mode.
 */

static inline int hid_hw_power(struct hid_device *hdev, int level)
{
        return hdev->ll_driver->power ? hdev->ll_driver->power(hdev, level) : 0;
}


/**
 * hid_hw_idle - send idle request to device
 *
 * @hdev: hid device
 * @report: report to control
 * @idle: idle state
 * @reqtype: hid request type
 */
static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle,
                enum hid_class_request reqtype)
{
        if (hdev->ll_driver->idle)
                return hdev->ll_driver->idle(hdev, report, idle, reqtype);

        return 0;
}

/**
 * hid_hw_may_wakeup - return if the hid device may act as a wakeup source during system-suspend
 *
 * @hdev: hid device
 */
static inline bool hid_hw_may_wakeup(struct hid_device *hdev)
{
        if (hdev->ll_driver->may_wakeup)
                return hdev->ll_driver->may_wakeup(hdev);

        if (hdev->dev.parent)
                return device_may_wakeup(hdev->dev.parent);

        return false;
}

/**
 * hid_hw_wait - wait for buffered io to complete
 *
 * @hdev: hid device
 */
static inline void hid_hw_wait(struct hid_device *hdev)
{
        if (hdev->ll_driver->wait)
                hdev->ll_driver->wait(hdev);
}

/**
 * hid_report_len - calculate the report length
 *
 * @report: the report we want to know the length
 */
static inline u32 hid_report_len(struct hid_report *report)
{
        return DIV_ROUND_UP(report->size, 8) + (report->id > 0);
}

int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
                         int interrupt);

/* HID quirks API */
unsigned long hid_lookup_quirk(const struct hid_device *hdev);
int hid_quirks_init(char **quirks_param, __u16 bus, int count);
void hid_quirks_exit(__u16 bus);

#ifdef CONFIG_HID_PID
int hid_pidff_init(struct hid_device *hid);
#else
#define hid_pidff_init NULL
#endif

#define dbg_hid(fmt, ...) pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__)

#define hid_err(hid, fmt, ...)                                \
        dev_err(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_notice(hid, fmt, ...)                        \
        dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_warn(hid, fmt, ...)                                \
        dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_info(hid, fmt, ...)                                \
        dev_info(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_dbg(hid, fmt, ...)                                \
        dev_dbg(&(hid)->dev, fmt, ##__VA_ARGS__)

#define hid_err_once(hid, fmt, ...)                        \
        dev_err_once(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_notice_once(hid, fmt, ...)                        \
        dev_notice_once(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_warn_once(hid, fmt, ...)                        \
        dev_warn_once(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_info_once(hid, fmt, ...)                        \
        dev_info_once(&(hid)->dev, fmt, ##__VA_ARGS__)
#define hid_dbg_once(hid, fmt, ...)                        \
        dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__)

#endif




































    1 

    1 

    1 







    1 



    1 
    1 
    1 





    1 






















    1 


    1 

    1 





























    1 


























































    1 






    1 








































































































































































































































































































































































































































































    1 


    1 
    1 

    1 

    1 







































































































































































































































































































































































































    1 
    1 


    1 




    1 
























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2002,2003 by Andreas Gruenbacher <a.gruenbacher@computer.org>
 *
 * Fixes from William Schumacher incorporated on 15 March 2001.
 *    (Reported by Charles Bertsch, <CBertsch@microtest.com>).
 */

/*
 *  This file contains generic functions for manipulating
 *  POSIX 1003.1e draft standard 17 ACLs.
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
#include <linux/mnt_idmapping.h>
#include <linux/iversion.h>
#include <linux/security.h>
#include <linux/fsnotify.h>
#include <linux/filelock.h>

#include "internal.h"

static struct posix_acl **acl_by_type(struct inode *inode, int type)
{
        switch (type) {
        case ACL_TYPE_ACCESS:
                return &inode->i_acl;
        case ACL_TYPE_DEFAULT:
                return &inode->i_default_acl;
        default:
                BUG();
        }
}

struct posix_acl *get_cached_acl(struct inode *inode, int type)
{
        struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *acl;

        for (;;) {
                rcu_read_lock();
                acl = rcu_dereference(*p);
                if (!acl || is_uncached_acl(acl) ||
                    refcount_inc_not_zero(&acl->a_refcount))
                        break;
                rcu_read_unlock();
                cpu_relax();
        }
        rcu_read_unlock();
        return acl;
}
EXPORT_SYMBOL(get_cached_acl);

struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
{
        struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type));

        if (acl == ACL_DONT_CACHE) {
                struct posix_acl *ret;

                ret = inode->i_op->get_inode_acl(inode, type, LOOKUP_RCU);
                if (!IS_ERR(ret))
                        acl = ret;
        }

        return acl;
}
EXPORT_SYMBOL(get_cached_acl_rcu);

void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
{
        struct posix_acl **p = acl_by_type(inode, type);
        struct posix_acl *old;

        old = xchg(p, posix_acl_dup(acl));
        if (!is_uncached_acl(old))
                posix_acl_release(old);
}
EXPORT_SYMBOL(set_cached_acl);

static void __forget_cached_acl(struct posix_acl **p)
{
        struct posix_acl *old;

        old = xchg(p, ACL_NOT_CACHED);
        if (!is_uncached_acl(old))
                posix_acl_release(old);
}

void forget_cached_acl(struct inode *inode, int type)
{
        __forget_cached_acl(acl_by_type(inode, type));
}
EXPORT_SYMBOL(forget_cached_acl);

void forget_all_cached_acls(struct inode *inode)
{
        __forget_cached_acl(&inode->i_acl);
        __forget_cached_acl(&inode->i_default_acl);
}
EXPORT_SYMBOL(forget_all_cached_acls);

static struct posix_acl *__get_acl(struct mnt_idmap *idmap,
                                   struct dentry *dentry, struct inode *inode,
                                   int type)
{
        struct posix_acl *sentinel;
        struct posix_acl **p;
        struct posix_acl *acl;

        /*
         * The sentinel is used to detect when another operation like
         * set_cached_acl() or forget_cached_acl() races with get_inode_acl().
         * It is guaranteed that is_uncached_acl(sentinel) is true.
         */

        acl = get_cached_acl(inode, type);
        if (!is_uncached_acl(acl))
                return acl;

        if (!IS_POSIXACL(inode))
                return NULL;

        sentinel = uncached_acl_sentinel(current);
        p = acl_by_type(inode, type);

        /*
         * If the ACL isn't being read yet, set our sentinel.  Otherwise, the
         * current value of the ACL will not be ACL_NOT_CACHED and so our own
         * sentinel will not be set; another task will update the cache.  We
         * could wait for that other task to complete its job, but it's easier
         * to just call ->get_inode_acl to fetch the ACL ourself.  (This is
         * going to be an unlikely race.)
         */
        cmpxchg(p, ACL_NOT_CACHED, sentinel);

        /*
         * Normally, the ACL returned by ->get{_inode}_acl will be cached.
         * A filesystem can prevent that by calling
         * forget_cached_acl(inode, type) in ->get{_inode}_acl.
         *
         * If the filesystem doesn't have a get{_inode}_ acl() function at all,
         * we'll just create the negative cache entry.
         */
        if (dentry && inode->i_op->get_acl) {
                acl = inode->i_op->get_acl(idmap, dentry, type);
        } else if (inode->i_op->get_inode_acl) {
                acl = inode->i_op->get_inode_acl(inode, type, false);
        } else {
                set_cached_acl(inode, type, NULL);
                return NULL;
        }
        if (IS_ERR(acl)) {
                /*
                 * Remove our sentinel so that we don't block future attempts
                 * to cache the ACL.
                 */
                cmpxchg(p, sentinel, ACL_NOT_CACHED);
                return acl;
        }

        /*
         * Cache the result, but only if our sentinel is still in place.
         */
        posix_acl_dup(acl);
        if (unlikely(!try_cmpxchg(p, &sentinel, acl)))
                posix_acl_release(acl);
        return acl;
}

struct posix_acl *get_inode_acl(struct inode *inode, int type)
{
        return __get_acl(&nop_mnt_idmap, NULL, inode, type);
}
EXPORT_SYMBOL(get_inode_acl);

/*
 * Init a fresh posix_acl
 */
void
posix_acl_init(struct posix_acl *acl, int count)
{
        refcount_set(&acl->a_refcount, 1);
        acl->a_count = count;
}
EXPORT_SYMBOL(posix_acl_init);

/*
 * Allocate a new ACL with the specified number of entries.
 */
struct posix_acl *
posix_acl_alloc(int count, gfp_t flags)
{
        const size_t size = sizeof(struct posix_acl) +
                            count * sizeof(struct posix_acl_entry);
        struct posix_acl *acl = kmalloc(size, flags);
        if (acl)
                posix_acl_init(acl, count);
        return acl;
}
EXPORT_SYMBOL(posix_acl_alloc);

/*
 * Clone an ACL.
 */
struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
        struct posix_acl *clone = NULL;

        if (acl) {
                int size = sizeof(struct posix_acl) + acl->a_count *
                           sizeof(struct posix_acl_entry);
                clone = kmemdup(acl, size, flags);
                if (clone)
                        refcount_set(&clone->a_refcount, 1);
        }
        return clone;
}
EXPORT_SYMBOL_GPL(posix_acl_clone);

/*
 * Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
 */
int
posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
{
        const struct posix_acl_entry *pa, *pe;
        int state = ACL_USER_OBJ;
        int needs_mask = 0;

        FOREACH_ACL_ENTRY(pa, acl, pe) {
                if (pa->e_perm & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
                        return -EINVAL;
                switch (pa->e_tag) {
                        case ACL_USER_OBJ:
                                if (state == ACL_USER_OBJ) {
                                        state = ACL_USER;
                                        break;
                                }
                                return -EINVAL;

                        case ACL_USER:
                                if (state != ACL_USER)
                                        return -EINVAL;
                                if (!kuid_has_mapping(user_ns, pa->e_uid))
                                        return -EINVAL;
                                needs_mask = 1;
                                break;

                        case ACL_GROUP_OBJ:
                                if (state == ACL_USER) {
                                        state = ACL_GROUP;
                                        break;
                                }
                                return -EINVAL;

                        case ACL_GROUP:
                                if (state != ACL_GROUP)
                                        return -EINVAL;
                                if (!kgid_has_mapping(user_ns, pa->e_gid))
                                        return -EINVAL;
                                needs_mask = 1;
                                break;

                        case ACL_MASK:
                                if (state != ACL_GROUP)
                                        return -EINVAL;
                                state = ACL_OTHER;
                                break;

                        case ACL_OTHER:
                                if (state == ACL_OTHER ||
                                    (state == ACL_GROUP && !needs_mask)) {
                                        state = 0;
                                        break;
                                }
                                return -EINVAL;

                        default:
                                return -EINVAL;
                }
        }
        if (state == 0)
                return 0;
        return -EINVAL;
}
EXPORT_SYMBOL(posix_acl_valid);

/*
 * Returns 0 if the acl can be exactly represented in the traditional
 * file mode permission bits, or else 1. Returns -E... on error.
 */
int
posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
{
        const struct posix_acl_entry *pa, *pe;
        umode_t mode = 0;
        int not_equiv = 0;

        /*
         * A null ACL can always be presented as mode bits.
         */
        if (!acl)
                return 0;

        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch (pa->e_tag) {
                        case ACL_USER_OBJ:
                                mode |= (pa->e_perm & S_IRWXO) << 6;
                                break;
                        case ACL_GROUP_OBJ:
                                mode |= (pa->e_perm & S_IRWXO) << 3;
                                break;
                        case ACL_OTHER:
                                mode |= pa->e_perm & S_IRWXO;
                                break;
                        case ACL_MASK:
                                mode = (mode & ~S_IRWXG) |
                                       ((pa->e_perm & S_IRWXO) << 3);
                                not_equiv = 1;
                                break;
                        case ACL_USER:
                        case ACL_GROUP:
                                not_equiv = 1;
                                break;
                        default:
                                return -EINVAL;
                }
        }
        if (mode_p)
                *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
        return not_equiv;
}
EXPORT_SYMBOL(posix_acl_equiv_mode);

/*
 * Create an ACL representing the file mode permission bits of an inode.
 */
struct posix_acl *
posix_acl_from_mode(umode_t mode, gfp_t flags)
{
        struct posix_acl *acl = posix_acl_alloc(3, flags);
        if (!acl)
                return ERR_PTR(-ENOMEM);

        acl->a_entries[0].e_tag  = ACL_USER_OBJ;
        acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6;

        acl->a_entries[1].e_tag  = ACL_GROUP_OBJ;
        acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3;

        acl->a_entries[2].e_tag  = ACL_OTHER;
        acl->a_entries[2].e_perm = (mode & S_IRWXO);
        return acl;
}
EXPORT_SYMBOL(posix_acl_from_mode);

/*
 * Return 0 if current is granted want access to the inode
 * by the acl. Returns -E... otherwise.
 */
int
posix_acl_permission(struct mnt_idmap *idmap, struct inode *inode,
                     const struct posix_acl *acl, int want)
{
        const struct posix_acl_entry *pa, *pe, *mask_obj;
        struct user_namespace *fs_userns = i_user_ns(inode);
        int found = 0;
        vfsuid_t vfsuid;
        vfsgid_t vfsgid;

        want &= MAY_READ | MAY_WRITE | MAY_EXEC;

        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch(pa->e_tag) {
                        case ACL_USER_OBJ:
                                /* (May have been checked already) */
                                vfsuid = i_uid_into_vfsuid(idmap, inode);
                                if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
                                        goto check_perm;
                                break;
                        case ACL_USER:
                                vfsuid = make_vfsuid(idmap, fs_userns,
                                                     pa->e_uid);
                                if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
                                        goto mask;
                                break;
                        case ACL_GROUP_OBJ:
                                vfsgid = i_gid_into_vfsgid(idmap, inode);
                                if (vfsgid_in_group_p(vfsgid)) {
                                        found = 1;
                                        if ((pa->e_perm & want) == want)
                                                goto mask;
                                }
                                break;
                        case ACL_GROUP:
                                vfsgid = make_vfsgid(idmap, fs_userns,
                                                     pa->e_gid);
                                if (vfsgid_in_group_p(vfsgid)) {
                                        found = 1;
                                        if ((pa->e_perm & want) == want)
                                                goto mask;
                                }
                                break;
                        case ACL_MASK:
                                break;
                        case ACL_OTHER:
                                if (found)
                                        return -EACCES;
                                else
                                        goto check_perm;
                        default:
                                return -EIO;
                }
        }
        return -EIO;

mask:
        for (mask_obj = pa+1; mask_obj != pe; mask_obj++) {
                if (mask_obj->e_tag == ACL_MASK) {
                        if ((pa->e_perm & mask_obj->e_perm & want) == want)
                                return 0;
                        return -EACCES;
                }
        }

check_perm:
        if ((pa->e_perm & want) == want)
                return 0;
        return -EACCES;
}

/*
 * Modify acl when creating a new inode. The caller must ensure the acl is
 * only referenced once.
 *
 * mode_p initially must contain the mode parameter to the open() / creat()
 * system calls. All permissions that are not granted by the acl are removed.
 * The permissions in the acl are changed to reflect the mode_p parameter.
 */
static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
{
        struct posix_acl_entry *pa, *pe;
        struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
        umode_t mode = *mode_p;
        int not_equiv = 0;

        /* assert(atomic_read(acl->a_refcount) == 1); */

        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch(pa->e_tag) {
                        case ACL_USER_OBJ:
                                pa->e_perm &= (mode >> 6) | ~S_IRWXO;
                                mode &= (pa->e_perm << 6) | ~S_IRWXU;
                                break;

                        case ACL_USER:
                        case ACL_GROUP:
                                not_equiv = 1;
                                break;

                        case ACL_GROUP_OBJ:
                                group_obj = pa;
                                break;

                        case ACL_OTHER:
                                pa->e_perm &= mode | ~S_IRWXO;
                                mode &= pa->e_perm | ~S_IRWXO;
                                break;

                        case ACL_MASK:
                                mask_obj = pa;
                                not_equiv = 1;
                                break;

                        default:
                                return -EIO;
                }
        }

        if (mask_obj) {
                mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
                mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
        } else {
                if (!group_obj)
                        return -EIO;
                group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
                mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
        }

        *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
        return not_equiv;
}

/*
 * Modify the ACL for the chmod syscall.
 */
static int __posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode)
{
        struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
        struct posix_acl_entry *pa, *pe;

        /* assert(atomic_read(acl->a_refcount) == 1); */

        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch(pa->e_tag) {
                        case ACL_USER_OBJ:
                                pa->e_perm = (mode & S_IRWXU) >> 6;
                                break;

                        case ACL_USER:
                        case ACL_GROUP:
                                break;

                        case ACL_GROUP_OBJ:
                                group_obj = pa;
                                break;

                        case ACL_MASK:
                                mask_obj = pa;
                                break;

                        case ACL_OTHER:
                                pa->e_perm = (mode & S_IRWXO);
                                break;

                        default:
                                return -EIO;
                }
        }

        if (mask_obj) {
                mask_obj->e_perm = (mode & S_IRWXG) >> 3;
        } else {
                if (!group_obj)
                        return -EIO;
                group_obj->e_perm = (mode & S_IRWXG) >> 3;
        }

        return 0;
}

int
__posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p)
{
        struct posix_acl *clone = posix_acl_clone(*acl, gfp);
        int err = -ENOMEM;
        if (clone) {
                err = posix_acl_create_masq(clone, mode_p);
                if (err < 0) {
                        posix_acl_release(clone);
                        clone = NULL;
                }
        }
        posix_acl_release(*acl);
        *acl = clone;
        return err;
}
EXPORT_SYMBOL(__posix_acl_create);

int
__posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
{
        struct posix_acl *clone = posix_acl_clone(*acl, gfp);
        int err = -ENOMEM;
        if (clone) {
                err = __posix_acl_chmod_masq(clone, mode);
                if (err) {
                        posix_acl_release(clone);
                        clone = NULL;
                }
        }
        posix_acl_release(*acl);
        *acl = clone;
        return err;
}
EXPORT_SYMBOL(__posix_acl_chmod);

/**
 * posix_acl_chmod - chmod a posix acl
 *
 * @idmap:        idmap of the mount @inode was found from
 * @dentry:        dentry to check permissions on
 * @mode:        the new mode of @inode
 *
 * If the dentry has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
int
 posix_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry,
                    umode_t mode)
{
        struct inode *inode = d_inode(dentry);
        struct posix_acl *acl;
        int ret = 0;

        if (!IS_POSIXACL(inode))
                return 0;
        if (!inode->i_op->set_acl)
                return -EOPNOTSUPP;

        acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
        if (IS_ERR_OR_NULL(acl)) {
                if (acl == ERR_PTR(-EOPNOTSUPP))
                        return 0;
                return PTR_ERR(acl);
        }

        ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
        if (ret)
                return ret;
        ret = inode->i_op->set_acl(idmap, dentry, acl, ACL_TYPE_ACCESS);
        posix_acl_release(acl);
        return ret;
}
EXPORT_SYMBOL(posix_acl_chmod);

int
posix_acl_create(struct inode *dir, umode_t *mode,
                struct posix_acl **default_acl, struct posix_acl **acl)
{
        struct posix_acl *p;
        struct posix_acl *clone;
        int ret;

        *acl = NULL;
        *default_acl = NULL;

        if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
                return 0;

        p = get_inode_acl(dir, ACL_TYPE_DEFAULT);
        if (!p || p == ERR_PTR(-EOPNOTSUPP)) {
                *mode &= ~current_umask();
                return 0;
        }
        if (IS_ERR(p))
                return PTR_ERR(p);

        ret = -ENOMEM;
        clone = posix_acl_clone(p, GFP_NOFS);
        if (!clone)
                goto err_release;

        ret = posix_acl_create_masq(clone, mode);
        if (ret < 0)
                goto err_release_clone;

        if (ret == 0)
                posix_acl_release(clone);
        else
                *acl = clone;

        if (!S_ISDIR(*mode))
                posix_acl_release(p);
        else
                *default_acl = p;

        return 0;

err_release_clone:
        posix_acl_release(clone);
err_release:
        posix_acl_release(p);
        return ret;
}
EXPORT_SYMBOL_GPL(posix_acl_create);

/**
 * posix_acl_update_mode  -  update mode in set_acl
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        target inode
 * @mode_p:        mode (pointer) for update
 * @acl:        acl pointer
 *
 * Update the file mode when setting an ACL: compute the new file permission
 * bits based on the ACL.  In addition, if the ACL is equivalent to the new
 * file mode, set *@acl to NULL to indicate that no ACL should be set.
 *
 * As with chmod, clear the setgid bit if the caller is not in the owning group
 * or capable of CAP_FSETID (see inode_change_ok).
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * Called from set_acl inode operations.
 */
int posix_acl_update_mode(struct mnt_idmap *idmap,
                          struct inode *inode, umode_t *mode_p,
                          struct posix_acl **acl)
{
        umode_t mode = inode->i_mode;
        int error;

        error = posix_acl_equiv_mode(*acl, &mode);
        if (error < 0)
                return error;
        if (error == 0)
                *acl = NULL;
        if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
            !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
                mode &= ~S_ISGID;
        *mode_p = mode;
        return 0;
}
EXPORT_SYMBOL(posix_acl_update_mode);

/*
 * Fix up the uids and gids in posix acl extended attributes in place.
 */
static int posix_acl_fix_xattr_common(const void *value, size_t size)
{
        const struct posix_acl_xattr_header *header = value;
        int count;

        if (!header)
                return -EINVAL;
        if (size < sizeof(struct posix_acl_xattr_header))
                return -EINVAL;
        if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
                return -EOPNOTSUPP;

        count = posix_acl_xattr_count(size);
        if (count < 0)
                return -EINVAL;
        if (count == 0)
                return 0;

        return count;
}

/**
 * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format
 * @userns: the filesystem's idmapping
 * @value: the uapi representation of POSIX ACLs
 * @size: the size of @void
 *
 * Filesystems that store POSIX ACLs in the unaltered uapi format should use
 * posix_acl_from_xattr() when reading them from the backing store and
 * converting them into the struct posix_acl VFS format. The helper is
 * specifically intended to be called from the acl inode operation.
 *
 * The posix_acl_from_xattr() function will map the raw {g,u}id values stored
 * in ACL_{GROUP,USER} entries into idmapping in @userns.
 *
 * Note that posix_acl_from_xattr() does not take idmapped mounts into account.
 * If it did it calling it from the get acl inode operation would return POSIX
 * ACLs mapped according to an idmapped mount which would mean that the value
 * couldn't be cached for the filesystem. Idmapped mounts are taken into
 * account on the fly during permission checking or right at the VFS -
 * userspace boundary before reporting them to the user.
 *
 * Return: Allocated struct posix_acl on success, NULL for a valid header but
 *         without actual POSIX ACL entries, or ERR_PTR() encoded error code.
 */
struct posix_acl *posix_acl_from_xattr(struct user_namespace *userns,
                                       const void *value, size_t size)
{
        const struct posix_acl_xattr_header *header = value;
        const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end;
        int count;
        struct posix_acl *acl;
        struct posix_acl_entry *acl_e;

        count = posix_acl_fix_xattr_common(value, size);
        if (count < 0)
                return ERR_PTR(count);
        if (count == 0)
                return NULL;

        acl = posix_acl_alloc(count, GFP_NOFS);
        if (!acl)
                return ERR_PTR(-ENOMEM);
        acl_e = acl->a_entries;

        for (end = entry + count; entry != end; acl_e++, entry++) {
                acl_e->e_tag  = le16_to_cpu(entry->e_tag);
                acl_e->e_perm = le16_to_cpu(entry->e_perm);

                switch(acl_e->e_tag) {
                        case ACL_USER_OBJ:
                        case ACL_GROUP_OBJ:
                        case ACL_MASK:
                        case ACL_OTHER:
                                break;

                        case ACL_USER:
                                acl_e->e_uid = make_kuid(userns,
                                                le32_to_cpu(entry->e_id));
                                if (!uid_valid(acl_e->e_uid))
                                        goto fail;
                                break;
                        case ACL_GROUP:
                                acl_e->e_gid = make_kgid(userns,
                                                le32_to_cpu(entry->e_id));
                                if (!gid_valid(acl_e->e_gid))
                                        goto fail;
                                break;

                        default:
                                goto fail;
                }
        }
        return acl;

fail:
        posix_acl_release(acl);
        return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL (posix_acl_from_xattr);

/*
 * Convert from in-memory to extended attribute representation.
 */
int
posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
                   void *buffer, size_t size)
{
        struct posix_acl_xattr_header *ext_acl = buffer;
        struct posix_acl_xattr_entry *ext_entry;
        int real_size, n;

        real_size = posix_acl_xattr_size(acl->a_count);
        if (!buffer)
                return real_size;
        if (real_size > size)
                return -ERANGE;

        ext_entry = (void *)(ext_acl + 1);
        ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);

        for (n=0; n < acl->a_count; n++, ext_entry++) {
                const struct posix_acl_entry *acl_e = &acl->a_entries[n];
                ext_entry->e_tag  = cpu_to_le16(acl_e->e_tag);
                ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
                switch(acl_e->e_tag) {
                case ACL_USER:
                        ext_entry->e_id =
                                cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
                        break;
                case ACL_GROUP:
                        ext_entry->e_id =
                                cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
                        break;
                default:
                        ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
                        break;
                }
        }
        return real_size;
}
EXPORT_SYMBOL (posix_acl_to_xattr);

/**
 * vfs_posix_acl_to_xattr - convert from kernel to userspace representation
 * @idmap: idmap of the mount
 * @inode: inode the posix acls are set on
 * @acl: the posix acls as represented by the vfs
 * @buffer: the buffer into which to convert @acl
 * @size: size of @buffer
 *
 * This converts @acl from the VFS representation in the filesystem idmapping
 * to the uapi form reportable to userspace. And mount and caller idmappings
 * are handled appropriately.
 *
 * Return: On success, the size of the stored uapi posix acls, on error a
 * negative errno.
 */
static ssize_t vfs_posix_acl_to_xattr(struct mnt_idmap *idmap,
                                      struct inode *inode,
                                      const struct posix_acl *acl, void *buffer,
                                      size_t size)

{
        struct posix_acl_xattr_header *ext_acl = buffer;
        struct posix_acl_xattr_entry *ext_entry;
        struct user_namespace *fs_userns, *caller_userns;
        ssize_t real_size, n;
        vfsuid_t vfsuid;
        vfsgid_t vfsgid;

        real_size = posix_acl_xattr_size(acl->a_count);
        if (!buffer)
                return real_size;
        if (real_size > size)
                return -ERANGE;

        ext_entry = (void *)(ext_acl + 1);
        ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);

        fs_userns = i_user_ns(inode);
        caller_userns = current_user_ns();
        for (n=0; n < acl->a_count; n++, ext_entry++) {
                const struct posix_acl_entry *acl_e = &acl->a_entries[n];
                ext_entry->e_tag  = cpu_to_le16(acl_e->e_tag);
                ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
                switch(acl_e->e_tag) {
                case ACL_USER:
                        vfsuid = make_vfsuid(idmap, fs_userns, acl_e->e_uid);
                        ext_entry->e_id = cpu_to_le32(from_kuid(
                                caller_userns, vfsuid_into_kuid(vfsuid)));
                        break;
                case ACL_GROUP:
                        vfsgid = make_vfsgid(idmap, fs_userns, acl_e->e_gid);
                        ext_entry->e_id = cpu_to_le32(from_kgid(
                                caller_userns, vfsgid_into_kgid(vfsgid)));
                        break;
                default:
                        ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
                        break;
                }
        }
        return real_size;
}

int
set_posix_acl(struct mnt_idmap *idmap, struct dentry *dentry,
              int type, struct posix_acl *acl)
{
        struct inode *inode = d_inode(dentry);

        if (!IS_POSIXACL(inode))
                return -EOPNOTSUPP;
        if (!inode->i_op->set_acl)
                return -EOPNOTSUPP;

        if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
                return acl ? -EACCES : 0;
        if (!inode_owner_or_capable(idmap, inode))
                return -EPERM;

        if (acl) {
                int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
                if (ret)
                        return ret;
        }
        return inode->i_op->set_acl(idmap, dentry, acl, type);
}
EXPORT_SYMBOL(set_posix_acl);

int posix_acl_listxattr(struct inode *inode, char **buffer,
                        ssize_t *remaining_size)
{
        int err;

        if (!IS_POSIXACL(inode))
                return 0;

        if (inode->i_acl) {
                err = xattr_list_one(buffer, remaining_size,
                                     XATTR_NAME_POSIX_ACL_ACCESS);
                if (err)
                        return err;
        }

        if (inode->i_default_acl) {
                err = xattr_list_one(buffer, remaining_size,
                                     XATTR_NAME_POSIX_ACL_DEFAULT);
                if (err)
                        return err;
        }

        return 0;
}

static bool
posix_acl_xattr_list(struct dentry *dentry)
{
        return IS_POSIXACL(d_backing_inode(dentry));
}

/*
 * nop_posix_acl_access - legacy xattr handler for access POSIX ACLs
 *
 * This is the legacy POSIX ACL access xattr handler. It is used by some
 * filesystems to implement their ->listxattr() inode operation. New code
 * should never use them.
 */
const struct xattr_handler nop_posix_acl_access = {
        .name = XATTR_NAME_POSIX_ACL_ACCESS,
        .list = posix_acl_xattr_list,
};
EXPORT_SYMBOL_GPL(nop_posix_acl_access);

/*
 * nop_posix_acl_default - legacy xattr handler for default POSIX ACLs
 *
 * This is the legacy POSIX ACL default xattr handler. It is used by some
 * filesystems to implement their ->listxattr() inode operation. New code
 * should never use them.
 */
const struct xattr_handler nop_posix_acl_default = {
        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
        .list = posix_acl_xattr_list,
};
EXPORT_SYMBOL_GPL(nop_posix_acl_default);

int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                   struct posix_acl *acl, int type)
{
        int error;
        struct inode *inode = d_inode(dentry);

        if (type == ACL_TYPE_ACCESS) {
                error = posix_acl_update_mode(idmap, inode,
                                &inode->i_mode, &acl);
                if (error)
                        return error;
        }

        inode_set_ctime_current(inode);
        if (IS_I_VERSION(inode))
                inode_inc_iversion(inode);
        set_cached_acl(inode, type, acl);
        return 0;
}

int simple_acl_create(struct inode *dir, struct inode *inode)
{
        struct posix_acl *default_acl, *acl;
        int error;

        error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
        if (error)
                return error;

        set_cached_acl(inode, ACL_TYPE_DEFAULT, default_acl);
        set_cached_acl(inode, ACL_TYPE_ACCESS, acl);

        if (default_acl)
                posix_acl_release(default_acl);
        if (acl)
                posix_acl_release(acl);
        return 0;
}

static int vfs_set_acl_idmapped_mnt(struct mnt_idmap *idmap,
                                    struct user_namespace *fs_userns,
                                    struct posix_acl *acl)
{
        for (int n = 0; n < acl->a_count; n++) {
                struct posix_acl_entry *acl_e = &acl->a_entries[n];

                switch (acl_e->e_tag) {
                case ACL_USER:
                        acl_e->e_uid = from_vfsuid(idmap, fs_userns,
                                                   VFSUIDT_INIT(acl_e->e_uid));
                        break;
                case ACL_GROUP:
                        acl_e->e_gid = from_vfsgid(idmap, fs_userns,
                                                   VFSGIDT_INIT(acl_e->e_gid));
                        break;
                }
        }

        return 0;
}

/**
 * vfs_set_acl - set posix acls
 * @idmap: idmap of the mount
 * @dentry: the dentry based on which to set the posix acls
 * @acl_name: the name of the posix acl
 * @kacl: the posix acls in the appropriate VFS format
 *
 * This function sets @kacl. The caller must all posix_acl_release() on @kacl
 * afterwards.
 *
 * Return: On success 0, on error negative errno.
 */
int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                const char *acl_name, struct posix_acl *kacl)
{
        int acl_type;
        int error;
        struct inode *inode = d_inode(dentry);
        struct inode *delegated_inode = NULL;

        acl_type = posix_acl_type(acl_name);
        if (acl_type < 0)
                return -EINVAL;

        if (kacl) {
                /*
                 * If we're on an idmapped mount translate from mount specific
                 * vfs{g,u}id_t into global filesystem k{g,u}id_t.
                 * Afterwards we can cache the POSIX ACLs filesystem wide and -
                 * if this is a filesystem with a backing store - ultimately
                 * translate them to backing store values.
                 */
                error = vfs_set_acl_idmapped_mnt(idmap, i_user_ns(inode), kacl);
                if (error)
                        return error;
        }

retry_deleg:
        inode_lock(inode);

        /*
         * We only care about restrictions the inode struct itself places upon
         * us otherwise POSIX ACLs aren't subject to any VFS restrictions.
         */
        error = may_write_xattr(idmap, inode);
        if (error)
                goto out_inode_unlock;

        error = security_inode_set_acl(idmap, dentry, acl_name, kacl);
        if (error)
                goto out_inode_unlock;

        error = try_break_deleg(inode, &delegated_inode);
        if (error)
                goto out_inode_unlock;

        if (likely(!is_bad_inode(inode)))
                error = set_posix_acl(idmap, dentry, acl_type, kacl);
        else
                error = -EIO;
        if (!error) {
                fsnotify_xattr(dentry);
                security_inode_post_set_acl(dentry, acl_name, kacl);
        }

out_inode_unlock:
        inode_unlock(inode);

        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }

        return error;
}
EXPORT_SYMBOL_GPL(vfs_set_acl);

/**
 * vfs_get_acl - get posix acls
 * @idmap: idmap of the mount
 * @dentry: the dentry based on which to retrieve the posix acls
 * @acl_name: the name of the posix acl
 *
 * This function retrieves @kacl from the filesystem. The caller must all
 * posix_acl_release() on @kacl.
 *
 * Return: On success POSIX ACLs in VFS format, on error negative errno.
 */
struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
                              struct dentry *dentry, const char *acl_name)
{
        struct inode *inode = d_inode(dentry);
        struct posix_acl *acl;
        int acl_type, error;

        acl_type = posix_acl_type(acl_name);
        if (acl_type < 0)
                return ERR_PTR(-EINVAL);

        /*
         * The VFS has no restrictions on reading POSIX ACLs so calling
         * something like xattr_permission() isn't needed. Only LSMs get a say.
         */
        error = security_inode_get_acl(idmap, dentry, acl_name);
        if (error)
                return ERR_PTR(error);

        if (!IS_POSIXACL(inode))
                return ERR_PTR(-EOPNOTSUPP);
        if (S_ISLNK(inode->i_mode))
                return ERR_PTR(-EOPNOTSUPP);

        acl = __get_acl(idmap, dentry, inode, acl_type);
        if (IS_ERR(acl))
                return acl;
        if (!acl)
                return ERR_PTR(-ENODATA);

        return acl;
}
EXPORT_SYMBOL_GPL(vfs_get_acl);

/**
 * vfs_remove_acl - remove posix acls
 * @idmap: idmap of the mount
 * @dentry: the dentry based on which to retrieve the posix acls
 * @acl_name: the name of the posix acl
 *
 * This function removes posix acls.
 *
 * Return: On success 0, on error negative errno.
 */
int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                   const char *acl_name)
{
        int acl_type;
        int error;
        struct inode *inode = d_inode(dentry);
        struct inode *delegated_inode = NULL;

        acl_type = posix_acl_type(acl_name);
        if (acl_type < 0)
                return -EINVAL;

retry_deleg:
        inode_lock(inode);

        /*
         * We only care about restrictions the inode struct itself places upon
         * us otherwise POSIX ACLs aren't subject to any VFS restrictions.
         */
        error = may_write_xattr(idmap, inode);
        if (error)
                goto out_inode_unlock;

        error = security_inode_remove_acl(idmap, dentry, acl_name);
        if (error)
                goto out_inode_unlock;

        error = try_break_deleg(inode, &delegated_inode);
        if (error)
                goto out_inode_unlock;

        if (likely(!is_bad_inode(inode)))
                error = set_posix_acl(idmap, dentry, acl_type, NULL);
        else
                error = -EIO;
        if (!error) {
                fsnotify_xattr(dentry);
                security_inode_post_remove_acl(idmap, dentry, acl_name);
        }

out_inode_unlock:
        inode_unlock(inode);

        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }

        return error;
}
EXPORT_SYMBOL_GPL(vfs_remove_acl);

int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
               const char *acl_name, const void *kvalue, size_t size)
{
        int error;
        struct posix_acl *acl = NULL;

        if (size) {
                /*
                 * Note that posix_acl_from_xattr() uses GFP_NOFS when it
                 * probably doesn't need to here.
                 */
                acl = posix_acl_from_xattr(current_user_ns(), kvalue, size);
                if (IS_ERR(acl))
                        return PTR_ERR(acl);
        }

        error = vfs_set_acl(idmap, dentry, acl_name, acl);
        posix_acl_release(acl);
        return error;
}

ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                   const char *acl_name, void *kvalue, size_t size)
{
        ssize_t error;
        struct posix_acl *acl;

        acl = vfs_get_acl(idmap, dentry, acl_name);
        if (IS_ERR(acl))
                return PTR_ERR(acl);

        error = vfs_posix_acl_to_xattr(idmap, d_inode(dentry),
                                       acl, kvalue, size);
        posix_acl_release(acl);
        return error;
}




























   16 
































    4 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pagemap

#if !defined(_TRACE_PAGEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PAGEMAP_H

#include <linux/tracepoint.h>
#include <linux/mm.h>

#define        PAGEMAP_MAPPED                0x0001u
#define PAGEMAP_ANONYMOUS        0x0002u
#define PAGEMAP_FILE                0x0004u
#define PAGEMAP_SWAPCACHE        0x0008u
#define PAGEMAP_SWAPBACKED        0x0010u
#define PAGEMAP_MAPPEDDISK        0x0020u
#define PAGEMAP_BUFFERS                0x0040u

#define trace_pagemap_flags(folio) ( \
        (folio_test_anon(folio)                ? PAGEMAP_ANONYMOUS  : PAGEMAP_FILE) | \
        (folio_mapped(folio)                ? PAGEMAP_MAPPED     : 0) | \
        (folio_test_swapcache(folio)        ? PAGEMAP_SWAPCACHE  : 0) | \
        (folio_test_swapbacked(folio)        ? PAGEMAP_SWAPBACKED : 0) | \
        (folio_test_mappedtodisk(folio)        ? PAGEMAP_MAPPEDDISK : 0) | \
        (folio_test_private(folio)        ? PAGEMAP_BUFFERS    : 0) \
        )

TRACE_EVENT(mm_lru_insertion,

        TP_PROTO(struct folio *folio),

        TP_ARGS(folio),

        TP_STRUCT__entry(
                __field(struct folio *,        folio        )
                __field(unsigned long,        pfn        )
                __field(enum lru_list,        lru        )
                __field(unsigned long,        flags        )
        ),

        TP_fast_assign(
                __entry->folio        = folio;
                __entry->pfn        = folio_pfn(folio);
                __entry->lru        = folio_lru_list(folio);
                __entry->flags        = trace_pagemap_flags(folio);
        ),

        /* Flag format is based on page-types.c formatting for pagemap */
        TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
                        __entry->folio,
                        __entry->pfn,
                        __entry->lru,
                        __entry->flags & PAGEMAP_MAPPED                ? "M" : " ",
                        __entry->flags & PAGEMAP_ANONYMOUS        ? "a" : "f",
                        __entry->flags & PAGEMAP_SWAPCACHE        ? "s" : " ",
                        __entry->flags & PAGEMAP_SWAPBACKED        ? "b" : " ",
                        __entry->flags & PAGEMAP_MAPPEDDISK        ? "d" : " ",
                        __entry->flags & PAGEMAP_BUFFERS        ? "B" : " ")
);

TRACE_EVENT(mm_lru_activate,

        TP_PROTO(struct folio *folio),

        TP_ARGS(folio),

        TP_STRUCT__entry(
                __field(struct folio *,        folio        )
                __field(unsigned long,        pfn        )
        ),

        TP_fast_assign(
                __entry->folio        = folio;
                __entry->pfn        = folio_pfn(folio);
        ),

        TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn)
);

#endif /* _TRACE_PAGEMAP_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
















































































































































































































































































































   19 
    8 
   19 
   19 
   19 

















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Advanced Linux Sound Architecture
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/init.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/debugfs.h>
#include <sound/core.h>
#include <sound/minors.h>
#include <sound/info.h>
#include <sound/control.h>
#include <sound/initval.h>
#include <linux/kmod.h>
#include <linux/mutex.h>

static int major = CONFIG_SND_MAJOR;
int snd_major;
EXPORT_SYMBOL(snd_major);

static int cards_limit = 1;

MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("Advanced Linux Sound Architecture driver for soundcards.");
MODULE_LICENSE("GPL");
module_param(major, int, 0444);
MODULE_PARM_DESC(major, "Major # for sound driver.");
module_param(cards_limit, int, 0444);
MODULE_PARM_DESC(cards_limit, "Count of auto-loadable soundcards.");
MODULE_ALIAS_CHARDEV_MAJOR(CONFIG_SND_MAJOR);

/* this one holds the actual max. card number currently available.
 * as default, it's identical with cards_limit option.  when more
 * modules are loaded manually, this limit number increases, too.
 */
int snd_ecards_limit;
EXPORT_SYMBOL(snd_ecards_limit);

#ifdef CONFIG_SND_DEBUG
struct dentry *sound_debugfs_root;
EXPORT_SYMBOL_GPL(sound_debugfs_root);
#endif

static struct snd_minor *snd_minors[SNDRV_OS_MINORS];
static DEFINE_MUTEX(sound_mutex);

#ifdef CONFIG_MODULES

/**
 * snd_request_card - try to load the card module
 * @card: the card number
 *
 * Tries to load the module "snd-card-X" for the given card number
 * via request_module.  Returns immediately if already loaded.
 */
void snd_request_card(int card)
{
        if (snd_card_locked(card))
                return;
        if (card < 0 || card >= cards_limit)
                return;
        request_module("snd-card-%i", card);
}
EXPORT_SYMBOL(snd_request_card);

static void snd_request_other(int minor)
{
        char *str;

        switch (minor) {
        case SNDRV_MINOR_SEQUENCER:        str = "snd-seq";        break;
        case SNDRV_MINOR_TIMER:                str = "snd-timer";        break;
        default:                        return;
        }
        request_module(str);
}

#endif        /* modular kernel */

/**
 * snd_lookup_minor_data - get user data of a registered device
 * @minor: the minor number
 * @type: device type (SNDRV_DEVICE_TYPE_XXX)
 *
 * Checks that a minor device with the specified type is registered, and returns
 * its user data pointer.
 *
 * This function increments the reference counter of the card instance
 * if an associated instance with the given minor number and type is found.
 * The caller must call snd_card_unref() appropriately later.
 *
 * Return: The user data pointer if the specified device is found. %NULL
 * otherwise.
 */
void *snd_lookup_minor_data(unsigned int minor, int type)
{
        struct snd_minor *mreg;
        void *private_data;

        if (minor >= ARRAY_SIZE(snd_minors))
                return NULL;
        guard(mutex)(&sound_mutex);
        mreg = snd_minors[minor];
        if (mreg && mreg->type == type) {
                private_data = mreg->private_data;
                if (private_data && mreg->card_ptr)
                        get_device(&mreg->card_ptr->card_dev);
        } else
                private_data = NULL;
        return private_data;
}
EXPORT_SYMBOL(snd_lookup_minor_data);

#ifdef CONFIG_MODULES
static struct snd_minor *autoload_device(unsigned int minor)
{
        int dev;
        mutex_unlock(&sound_mutex); /* release lock temporarily */
        dev = SNDRV_MINOR_DEVICE(minor);
        if (dev == SNDRV_MINOR_CONTROL) {
                /* /dev/aloadC? */
                int card = SNDRV_MINOR_CARD(minor);
                struct snd_card *ref = snd_card_ref(card);
                if (!ref)
                        snd_request_card(card);
                else
                        snd_card_unref(ref);
        } else if (dev == SNDRV_MINOR_GLOBAL) {
                /* /dev/aloadSEQ */
                snd_request_other(minor);
        }
        mutex_lock(&sound_mutex); /* reacuire lock */
        return snd_minors[minor];
}
#else /* !CONFIG_MODULES */
#define autoload_device(minor)        NULL
#endif /* CONFIG_MODULES */

static int snd_open(struct inode *inode, struct file *file)
{
        unsigned int minor = iminor(inode);
        struct snd_minor *mptr = NULL;
        const struct file_operations *new_fops;
        int err = 0;

        if (minor >= ARRAY_SIZE(snd_minors))
                return -ENODEV;
        scoped_guard(mutex, &sound_mutex) {
                mptr = snd_minors[minor];
                if (mptr == NULL) {
                        mptr = autoload_device(minor);
                        if (!mptr)
                                return -ENODEV;
                }
                new_fops = fops_get(mptr->f_ops);
        }
        if (!new_fops)
                return -ENODEV;
        replace_fops(file, new_fops);

        if (file->f_op->open)
                err = file->f_op->open(inode, file);
        return err;
}

static const struct file_operations snd_fops =
{
        .owner =        THIS_MODULE,
        .open =                snd_open,
        .llseek =        noop_llseek,
};

#ifdef CONFIG_SND_DYNAMIC_MINORS
static int snd_find_free_minor(int type, struct snd_card *card, int dev)
{
        int minor;

        /* static minors for module auto loading */
        if (type == SNDRV_DEVICE_TYPE_SEQUENCER)
                return SNDRV_MINOR_SEQUENCER;
        if (type == SNDRV_DEVICE_TYPE_TIMER)
                return SNDRV_MINOR_TIMER;

        for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) {
                /* skip static minors still used for module auto loading */
                if (SNDRV_MINOR_DEVICE(minor) == SNDRV_MINOR_CONTROL)
                        continue;
                if (minor == SNDRV_MINOR_SEQUENCER ||
                    minor == SNDRV_MINOR_TIMER)
                        continue;
                if (!snd_minors[minor])
                        return minor;
        }
        return -EBUSY;
}
#else
static int snd_find_free_minor(int type, struct snd_card *card, int dev)
{
        int minor;

        switch (type) {
        case SNDRV_DEVICE_TYPE_SEQUENCER:
        case SNDRV_DEVICE_TYPE_TIMER:
                minor = type;
                break;
        case SNDRV_DEVICE_TYPE_CONTROL:
                if (snd_BUG_ON(!card))
                        return -EINVAL;
                minor = SNDRV_MINOR(card->number, type);
                break;
        case SNDRV_DEVICE_TYPE_HWDEP:
        case SNDRV_DEVICE_TYPE_RAWMIDI:
        case SNDRV_DEVICE_TYPE_PCM_PLAYBACK:
        case SNDRV_DEVICE_TYPE_PCM_CAPTURE:
        case SNDRV_DEVICE_TYPE_COMPRESS:
                if (snd_BUG_ON(!card))
                        return -EINVAL;
                minor = SNDRV_MINOR(card->number, type + dev);
                break;
        default:
                return -EINVAL;
        }
        if (snd_BUG_ON(minor < 0 || minor >= SNDRV_OS_MINORS))
                return -EINVAL;
        if (snd_minors[minor])
                return -EBUSY;
        return minor;
}
#endif

/**
 * snd_register_device - Register the ALSA device file for the card
 * @type: the device type, SNDRV_DEVICE_TYPE_XXX
 * @card: the card instance
 * @dev: the device index
 * @f_ops: the file operations
 * @private_data: user pointer for f_ops->open()
 * @device: the device to register
 *
 * Registers an ALSA device file for the given card.
 * The operators have to be set in reg parameter.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_register_device(int type, struct snd_card *card, int dev,
                        const struct file_operations *f_ops,
                        void *private_data, struct device *device)
{
        int minor;
        int err = 0;
        struct snd_minor *preg;

        if (snd_BUG_ON(!device))
                return -EINVAL;

        preg = kmalloc(sizeof *preg, GFP_KERNEL);
        if (preg == NULL)
                return -ENOMEM;
        preg->type = type;
        preg->card = card ? card->number : -1;
        preg->device = dev;
        preg->f_ops = f_ops;
        preg->private_data = private_data;
        preg->card_ptr = card;
        guard(mutex)(&sound_mutex);
        minor = snd_find_free_minor(type, card, dev);
        if (minor < 0) {
                err = minor;
                goto error;
        }

        preg->dev = device;
        device->devt = MKDEV(major, minor);
        err = device_add(device);
        if (err < 0)
                goto error;

        snd_minors[minor] = preg;
 error:
        if (err < 0)
                kfree(preg);
        return err;
}
EXPORT_SYMBOL(snd_register_device);

/**
 * snd_unregister_device - unregister the device on the given card
 * @dev: the device instance
 *
 * Unregisters the device file already registered via
 * snd_register_device().
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_unregister_device(struct device *dev)
{
        int minor;
        struct snd_minor *preg;

        guard(mutex)(&sound_mutex);
        for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) {
                preg = snd_minors[minor];
                if (preg && preg->dev == dev) {
                        snd_minors[minor] = NULL;
                        device_del(dev);
                        kfree(preg);
                        break;
                }
        }
        if (minor >= ARRAY_SIZE(snd_minors))
                return -ENOENT;
        return 0;
}
EXPORT_SYMBOL(snd_unregister_device);

#ifdef CONFIG_SND_PROC_FS
/*
 *  INFO PART
 */
static const char *snd_device_type_name(int type)
{
        switch (type) {
        case SNDRV_DEVICE_TYPE_CONTROL:
                return "control";
        case SNDRV_DEVICE_TYPE_HWDEP:
                return "hardware dependent";
        case SNDRV_DEVICE_TYPE_RAWMIDI:
                return "raw midi";
        case SNDRV_DEVICE_TYPE_PCM_PLAYBACK:
                return "digital audio playback";
        case SNDRV_DEVICE_TYPE_PCM_CAPTURE:
                return "digital audio capture";
        case SNDRV_DEVICE_TYPE_SEQUENCER:
                return "sequencer";
        case SNDRV_DEVICE_TYPE_TIMER:
                return "timer";
        case SNDRV_DEVICE_TYPE_COMPRESS:
                return "compress";
        default:
                return "?";
        }
}

static void snd_minor_info_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer)
{
        int minor;
        struct snd_minor *mptr;

        guard(mutex)(&sound_mutex);
        for (minor = 0; minor < SNDRV_OS_MINORS; ++minor) {
                mptr = snd_minors[minor];
                if (!mptr)
                        continue;
                if (mptr->card >= 0) {
                        if (mptr->device >= 0)
                                snd_iprintf(buffer, "%3i: [%2i-%2i]: %s\n",
                                            minor, mptr->card, mptr->device,
                                            snd_device_type_name(mptr->type));
                        else
                                snd_iprintf(buffer, "%3i: [%2i]   : %s\n",
                                            minor, mptr->card,
                                            snd_device_type_name(mptr->type));
                } else
                        snd_iprintf(buffer, "%3i:        : %s\n", minor,
                                    snd_device_type_name(mptr->type));
        }
}

int __init snd_minor_info_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "devices", NULL);
        if (!entry)
                return -ENOMEM;
        entry->c.text.read = snd_minor_info_read;
        return snd_info_register(entry); /* freed in error path */
}
#endif /* CONFIG_SND_PROC_FS */

/*
 *  INIT PART
 */

static int __init alsa_sound_init(void)
{
        snd_major = major;
        snd_ecards_limit = cards_limit;
        if (register_chrdev(major, "alsa", &snd_fops)) {
                pr_err("ALSA core: unable to register native major device number %d\n", major);
                return -EIO;
        }
        if (snd_info_init() < 0) {
                unregister_chrdev(major, "alsa");
                return -ENOMEM;
        }

#ifdef CONFIG_SND_DEBUG
        sound_debugfs_root = debugfs_create_dir("sound", NULL);
#endif
#ifndef MODULE
        pr_info("Advanced Linux Sound Architecture Driver Initialized.\n");
#endif
        return 0;
}

static void __exit alsa_sound_exit(void)
{
#ifdef CONFIG_SND_DEBUG
        debugfs_remove(sound_debugfs_root);
#endif
        snd_info_done();
        unregister_chrdev(major, "alsa");
}

subsys_initcall(alsa_sound_init);
module_exit(alsa_sound_exit);

















































































   14 
   14 




   14 
   14 





    2 


    2 
    2 

    2 
































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 





    4 



    4 

















































    4 
    4 






    4 



    4 

    4 


    4 






















    4 

    4 























































































































































































































































































































































































































































































































































































































































































































































































































































































    4 








    4 










    4 









    4 



















    4 
    4 
    4 
    4 
    4 


    4 


    4 













    4 


    4 


    4 


    4 
    4 

    4 





    4 






    4 

    4 
    4 




    4 





    4 
    4 




    4 


    4 



































   14 


   14 
   14 


   14 










   14 

   14 



   14 


   14 

   14 



   13 



   14 


   14 




   14 
   14 


   14 



   14 


   14 

   14 











   14 


   14 
   14 














   14 


   14 


   14 
   14 







   14 





    2 
    2 

   14 

   14 
   14 


   14 
   14 

    4 











   14 






   14 


















   14 























   14 

   14 

   14 
   14 




   14 





   14 











    2 













































    4 










    4 

    4 

    4 

    4 




    4 




    4 




    4 



    4 



    4 








    4 
    4 





























































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Linux INET6 implementation
 *        Forwarding Information Database
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *
 *        Changes:
 *        Yuji SEKIYA @USAGI:        Support default route on router node;
 *                                remove ip6_null_entry from the top of
 *                                routing table.
 *        Ville Nuorvala:                Fixed routing subtrees.
 */

#define pr_fmt(fmt) "IPv6: " fmt

#include <linux/bpf.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>

#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>

#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>

static struct kmem_cache *fib6_node_kmem __read_mostly;

struct fib6_cleaner {
        struct fib6_walker w;
        struct net *net;
        int (*func)(struct fib6_info *, void *arg);
        int sernum;
        void *arg;
        bool skip_notify;
};

#ifdef CONFIG_IPV6_SUBTREES
#define FWS_INIT FWS_S
#else
#define FWS_INIT FWS_L
#endif

static struct fib6_info *fib6_find_prefix(struct net *net,
                                         struct fib6_table *table,
                                         struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net,
                                          struct fib6_table *table,
                                          struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);

/*
 *        A routing update causes an increase of the serial number on the
 *        affected subtree. This allows for cached routes to be asynchronously
 *        tested when modifications are made to the destination cache as a
 *        result of redirects, path MTU changes, etc.
 */

static void fib6_gc_timer_cb(struct timer_list *t);

#define FOR_WALKERS(net, w) \
        list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)

static void fib6_walker_link(struct net *net, struct fib6_walker *w)
{
        write_lock_bh(&net->ipv6.fib6_walker_lock);
        list_add(&w->lh, &net->ipv6.fib6_walkers);
        write_unlock_bh(&net->ipv6.fib6_walker_lock);
}

static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
{
        write_lock_bh(&net->ipv6.fib6_walker_lock);
        list_del(&w->lh);
        write_unlock_bh(&net->ipv6.fib6_walker_lock);
}

static int fib6_new_sernum(struct net *net)
{
        int new, old = atomic_read(&net->ipv6.fib6_sernum);

        do {
                new = old < INT_MAX ? old + 1 : 1;
        } while (!atomic_try_cmpxchg(&net->ipv6.fib6_sernum, &old, new));

        return new;
}

enum {
        FIB6_NO_SERNUM_CHANGE = 0,
};

void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
{
        struct fib6_node *fn;

        fn = rcu_dereference_protected(f6i->fib6_node,
                        lockdep_is_held(&f6i->fib6_table->tb6_lock));
        if (fn)
                WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}

/*
 *        Auxiliary address test functions for the radix tree.
 *
 *        These assume a 32bit processor (although it will work on
 *        64bit processors)
 */

/*
 *        test bit
 */
#if defined(__LITTLE_ENDIAN)
# define BITOP_BE32_SWIZZLE        (0x1F & ~7)
#else
# define BITOP_BE32_SWIZZLE        0
#endif

static __be32 addr_bit_set(const void *token, int fn_bit)
{
        const __be32 *addr = token;
        /*
         * Here,
         *        1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
         * is optimized version of
         *        htonl(1 << ((~fn_bit)&0x1F))
         * See include/asm-generic/bitops/le.h.
         */
        return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
               addr[fn_bit >> 5];
}

struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
{
        struct fib6_info *f6i;
        size_t sz = sizeof(*f6i);

        if (with_fib6_nh)
                sz += sizeof(struct fib6_nh);

        f6i = kzalloc(sz, gfp_flags);
        if (!f6i)
                return NULL;

        /* fib6_siblings is a union with nh_list, so this initializes both */
        INIT_LIST_HEAD(&f6i->fib6_siblings);
        refcount_set(&f6i->fib6_ref, 1);

        INIT_HLIST_NODE(&f6i->gc_link);

        return f6i;
}

void fib6_info_destroy_rcu(struct rcu_head *head)
{
        struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);

        WARN_ON(f6i->fib6_node);

        if (f6i->nh)
                nexthop_put(f6i->nh);
        else
                fib6_nh_release(f6i->fib6_nh);

        ip_fib_metrics_put(f6i->fib6_metrics);
        kfree(f6i);
}
EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);

static struct fib6_node *node_alloc(struct net *net)
{
        struct fib6_node *fn;

        fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
        if (fn)
                net->ipv6.rt6_stats->fib_nodes++;

        return fn;
}

static void node_free_immediate(struct net *net, struct fib6_node *fn)
{
        kmem_cache_free(fib6_node_kmem, fn);
        net->ipv6.rt6_stats->fib_nodes--;
}

static void node_free_rcu(struct rcu_head *head)
{
        struct fib6_node *fn = container_of(head, struct fib6_node, rcu);

        kmem_cache_free(fib6_node_kmem, fn);
}

static void node_free(struct net *net, struct fib6_node *fn)
{
        call_rcu(&fn->rcu, node_free_rcu);
        net->ipv6.rt6_stats->fib_nodes--;
}

static void fib6_free_table(struct fib6_table *table)
{
        inetpeer_invalidate_tree(&table->tb6_peers);
        kfree(table);
}

static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
        unsigned int h;

        /*
         * Initialize table lock at a single place to give lockdep a key,
         * tables aren't visible prior to being linked to the list.
         */
        spin_lock_init(&tb->tb6_lock);
        h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);

        /*
         * No protection necessary, this is the only list mutatation
         * operation, tables never disappear once they exist.
         */
        hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
}

#ifdef CONFIG_IPV6_MULTIPLE_TABLES

static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
{
        struct fib6_table *table;

        table = kzalloc(sizeof(*table), GFP_ATOMIC);
        if (table) {
                table->tb6_id = id;
                rcu_assign_pointer(table->tb6_root.leaf,
                                   net->ipv6.fib6_null_entry);
                table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
                inet_peer_base_init(&table->tb6_peers);
                INIT_HLIST_HEAD(&table->tb6_gc_hlist);
        }

        return table;
}

struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
        struct fib6_table *tb;

        if (id == 0)
                id = RT6_TABLE_MAIN;
        tb = fib6_get_table(net, id);
        if (tb)
                return tb;

        tb = fib6_alloc_table(net, id);
        if (tb)
                fib6_link_table(net, tb);

        return tb;
}
EXPORT_SYMBOL_GPL(fib6_new_table);

struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
        struct fib6_table *tb;
        struct hlist_head *head;
        unsigned int h;

        if (id == 0)
                id = RT6_TABLE_MAIN;
        h = id & (FIB6_TABLE_HASHSZ - 1);
        rcu_read_lock();
        head = &net->ipv6.fib_table_hash[h];
        hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
                if (tb->tb6_id == id) {
                        rcu_read_unlock();
                        return tb;
                }
        }
        rcu_read_unlock();

        return NULL;
}
EXPORT_SYMBOL_GPL(fib6_get_table);

static void __net_init fib6_tables_init(struct net *net)
{
        fib6_link_table(net, net->ipv6.fib6_main_tbl);
        fib6_link_table(net, net->ipv6.fib6_local_tbl);
}
#else

struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
        return fib6_get_table(net, id);
}

struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
          return net->ipv6.fib6_main_tbl;
}

struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup)
{
        struct rt6_info *rt;

        rt = pol_lookup_func(lookup,
                        net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
        if (rt->dst.error == -EAGAIN) {
                ip6_rt_put_flags(rt, flags);
                rt = net->ipv6.ip6_null_entry;
                if (!(flags & RT6_LOOKUP_F_DST_NOREF))
                        dst_hold(&rt->dst);
        }

        return &rt->dst;
}

/* called with rcu lock held; no reference taken on fib6_info */
int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
                struct fib6_result *res, int flags)
{
        return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
                                 res, flags);
}

static void __net_init fib6_tables_init(struct net *net)
{
        fib6_link_table(net, net->ipv6.fib6_main_tbl);
}

#endif

unsigned int fib6_tables_seq_read(struct net *net)
{
        unsigned int h, fib_seq = 0;

        rcu_read_lock();
        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                struct hlist_head *head = &net->ipv6.fib_table_hash[h];
                struct fib6_table *tb;

                hlist_for_each_entry_rcu(tb, head, tb6_hlist)
                        fib_seq += tb->fib_seq;
        }
        rcu_read_unlock();

        return fib_seq;
}

static int call_fib6_entry_notifier(struct notifier_block *nb,
                                    enum fib_event_type event_type,
                                    struct fib6_info *rt,
                                    struct netlink_ext_ack *extack)
{
        struct fib6_entry_notifier_info info = {
                .info.extack = extack,
                .rt = rt,
        };

        return call_fib6_notifier(nb, event_type, &info.info);
}

static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
                                              enum fib_event_type event_type,
                                              struct fib6_info *rt,
                                              unsigned int nsiblings,
                                              struct netlink_ext_ack *extack)
{
        struct fib6_entry_notifier_info info = {
                .info.extack = extack,
                .rt = rt,
                .nsiblings = nsiblings,
        };

        return call_fib6_notifier(nb, event_type, &info.info);
}

int call_fib6_entry_notifiers(struct net *net,
                              enum fib_event_type event_type,
                              struct fib6_info *rt,
                              struct netlink_ext_ack *extack)
{
        struct fib6_entry_notifier_info info = {
                .info.extack = extack,
                .rt = rt,
        };

        rt->fib6_table->fib_seq++;
        return call_fib6_notifiers(net, event_type, &info.info);
}

int call_fib6_multipath_entry_notifiers(struct net *net,
                                        enum fib_event_type event_type,
                                        struct fib6_info *rt,
                                        unsigned int nsiblings,
                                        struct netlink_ext_ack *extack)
{
        struct fib6_entry_notifier_info info = {
                .info.extack = extack,
                .rt = rt,
                .nsiblings = nsiblings,
        };

        rt->fib6_table->fib_seq++;
        return call_fib6_notifiers(net, event_type, &info.info);
}

int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
{
        struct fib6_entry_notifier_info info = {
                .rt = rt,
                .nsiblings = rt->fib6_nsiblings,
        };

        rt->fib6_table->fib_seq++;
        return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}

struct fib6_dump_arg {
        struct net *net;
        struct notifier_block *nb;
        struct netlink_ext_ack *extack;
};

static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
        enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
        int err;

        if (!rt || rt == arg->net->ipv6.fib6_null_entry)
                return 0;

        if (rt->fib6_nsiblings)
                err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
                                                         rt,
                                                         rt->fib6_nsiblings,
                                                         arg->extack);
        else
                err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
                                               arg->extack);

        return err;
}

static int fib6_node_dump(struct fib6_walker *w)
{
        int err;

        err = fib6_rt_dump(w->leaf, w->args);
        w->leaf = NULL;
        return err;
}

static int fib6_table_dump(struct net *net, struct fib6_table *tb,
                           struct fib6_walker *w)
{
        int err;

        w->root = &tb->tb6_root;
        spin_lock_bh(&tb->tb6_lock);
        err = fib6_walk(net, w);
        spin_unlock_bh(&tb->tb6_lock);
        return err;
}

/* Called with rcu_read_lock() */
int fib6_tables_dump(struct net *net, struct notifier_block *nb,
                     struct netlink_ext_ack *extack)
{
        struct fib6_dump_arg arg;
        struct fib6_walker *w;
        unsigned int h;
        int err = 0;

        w = kzalloc(sizeof(*w), GFP_ATOMIC);
        if (!w)
                return -ENOMEM;

        w->func = fib6_node_dump;
        arg.net = net;
        arg.nb = nb;
        arg.extack = extack;
        w->args = &arg;

        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                struct hlist_head *head = &net->ipv6.fib_table_hash[h];
                struct fib6_table *tb;

                hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
                        err = fib6_table_dump(net, tb, w);
                        if (err)
                                goto out;
                }
        }

out:
        kfree(w);

        /* The tree traversal function should never return a positive value. */
        return err > 0 ? -EINVAL : err;
}

static int fib6_dump_node(struct fib6_walker *w)
{
        int res;
        struct fib6_info *rt;

        for_each_fib6_walker_rt(w) {
                res = rt6_dump_route(rt, w->args, w->skip_in_node);
                if (res >= 0) {
                        /* Frame is full, suspend walking */
                        w->leaf = rt;

                        /* We'll restart from this node, so if some routes were
                         * already dumped, skip them next time.
                         */
                        w->skip_in_node += res;

                        return 1;
                }
                w->skip_in_node = 0;

                /* Multipath routes are dumped in one route with the
                 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
                 * last sibling of this route (no need to dump the
                 * sibling routes again)
                 */
                if (rt->fib6_nsiblings)
                        rt = list_last_entry(&rt->fib6_siblings,
                                             struct fib6_info,
                                             fib6_siblings);
        }
        w->leaf = NULL;
        return 0;
}

static void fib6_dump_end(struct netlink_callback *cb)
{
        struct net *net = sock_net(cb->skb->sk);
        struct fib6_walker *w = (void *)cb->args[2];

        if (w) {
                if (cb->args[4]) {
                        cb->args[4] = 0;
                        fib6_walker_unlink(net, w);
                }
                cb->args[2] = 0;
                kfree(w);
        }
        cb->done = (void *)cb->args[3];
        cb->args[1] = 3;
}

static int fib6_dump_done(struct netlink_callback *cb)
{
        fib6_dump_end(cb);
        return cb->done ? cb->done(cb) : 0;
}

static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
                           struct netlink_callback *cb)
{
        struct net *net = sock_net(skb->sk);
        struct fib6_walker *w;
        int res;

        w = (void *)cb->args[2];
        w->root = &table->tb6_root;

        if (cb->args[4] == 0) {
                w->count = 0;
                w->skip = 0;
                w->skip_in_node = 0;

                spin_lock_bh(&table->tb6_lock);
                res = fib6_walk(net, w);
                spin_unlock_bh(&table->tb6_lock);
                if (res > 0) {
                        cb->args[4] = 1;
                        cb->args[5] = READ_ONCE(w->root->fn_sernum);
                }
        } else {
                int sernum = READ_ONCE(w->root->fn_sernum);
                if (cb->args[5] != sernum) {
                        /* Begin at the root if the tree changed */
                        cb->args[5] = sernum;
                        w->state = FWS_INIT;
                        w->node = w->root;
                        w->skip = w->count;
                        w->skip_in_node = 0;
                } else
                        w->skip = 0;

                spin_lock_bh(&table->tb6_lock);
                res = fib6_walk_continue(w);
                spin_unlock_bh(&table->tb6_lock);
                if (res <= 0) {
                        fib6_walker_unlink(net, w);
                        cb->args[4] = 0;
                }
        }

        return res;
}

static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct rt6_rtnl_dump_arg arg = {
                .filter.dump_exceptions = true,
                .filter.dump_routes = true,
                .filter.rtnl_held = true,
        };
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        unsigned int h, s_h;
        unsigned int e = 0, s_e;
        struct fib6_walker *w;
        struct fib6_table *tb;
        struct hlist_head *head;
        int res = 0;

        if (cb->strict_check) {
                int err;

                err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
                if (err < 0)
                        return err;
        } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
                struct rtmsg *rtm = nlmsg_data(nlh);

                if (rtm->rtm_flags & RTM_F_PREFIX)
                        arg.filter.flags = RTM_F_PREFIX;
        }

        w = (void *)cb->args[2];
        if (!w) {
                /* New dump:
                 *
                 * 1. allocate and initialize walker.
                 */
                w = kzalloc(sizeof(*w), GFP_ATOMIC);
                if (!w)
                        return -ENOMEM;
                w->func = fib6_dump_node;
                cb->args[2] = (long)w;

                /* 2. hook callback destructor.
                 */
                cb->args[3] = (long)cb->done;
                cb->done = fib6_dump_done;

        }

        arg.skb = skb;
        arg.cb = cb;
        arg.net = net;
        w->args = &arg;

        if (arg.filter.table_id) {
                tb = fib6_get_table(net, arg.filter.table_id);
                if (!tb) {
                        if (rtnl_msg_family(cb->nlh) != PF_INET6)
                                goto out;

                        NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
                        return -ENOENT;
                }

                if (!cb->args[0]) {
                        res = fib6_dump_table(tb, skb, cb);
                        if (!res)
                                cb->args[0] = 1;
                }
                goto out;
        }

        s_h = cb->args[0];
        s_e = cb->args[1];

        rcu_read_lock();
        for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
                e = 0;
                head = &net->ipv6.fib_table_hash[h];
                hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
                        if (e < s_e)
                                goto next;
                        res = fib6_dump_table(tb, skb, cb);
                        if (res != 0)
                                goto out_unlock;
next:
                        e++;
                }
        }
out_unlock:
        rcu_read_unlock();
        cb->args[1] = e;
        cb->args[0] = h;
out:
        res = res < 0 ? res : skb->len;
        if (res <= 0)
                fib6_dump_end(cb);
        return res;
}

void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
{
        if (!f6i)
                return;

        if (f6i->fib6_metrics == &dst_default_metrics) {
                struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);

                if (!p)
                        return;

                refcount_set(&p->refcnt, 1);
                f6i->fib6_metrics = p;
        }

        f6i->fib6_metrics->metrics[metric - 1] = val;
}

/*
 *        Routing Table
 *
 *        return the appropriate node for a routing tree "add" operation
 *        by either creating and inserting or by returning an existing
 *        node.
 */

static struct fib6_node *fib6_add_1(struct net *net,
                                    struct fib6_table *table,
                                    struct fib6_node *root,
                                    struct in6_addr *addr, int plen,
                                    int offset, int allow_create,
                                    int replace_required,
                                    struct netlink_ext_ack *extack)
{
        struct fib6_node *fn, *in, *ln;
        struct fib6_node *pn = NULL;
        struct rt6key *key;
        int        bit;
        __be32        dir = 0;

        /* insert node in tree */

        fn = root;

        do {
                struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
                                            lockdep_is_held(&table->tb6_lock));
                key = (struct rt6key *)((u8 *)leaf + offset);

                /*
                 *        Prefix match
                 */
                if (plen < fn->fn_bit ||
                    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
                        if (!allow_create) {
                                if (replace_required) {
                                        NL_SET_ERR_MSG(extack,
                                                       "Can not replace route - no match found");
                                        pr_warn("Can't replace route, no match found\n");
                                        return ERR_PTR(-ENOENT);
                                }
                                pr_warn("NLM_F_CREATE should be set when creating new route\n");
                        }
                        goto insert_above;
                }

                /*
                 *        Exact match ?
                 */

                if (plen == fn->fn_bit) {
                        /* clean up an intermediate node */
                        if (!(fn->fn_flags & RTN_RTINFO)) {
                                RCU_INIT_POINTER(fn->leaf, NULL);
                                fib6_info_release(leaf);
                        /* remove null_entry in the root node */
                        } else if (fn->fn_flags & RTN_TL_ROOT &&
                                   rcu_access_pointer(fn->leaf) ==
                                   net->ipv6.fib6_null_entry) {
                                RCU_INIT_POINTER(fn->leaf, NULL);
                        }

                        return fn;
                }

                /*
                 *        We have more bits to go
                 */

                /* Try to walk down on tree. */
                dir = addr_bit_set(addr, fn->fn_bit);
                pn = fn;
                fn = dir ?
                     rcu_dereference_protected(fn->right,
                                        lockdep_is_held(&table->tb6_lock)) :
                     rcu_dereference_protected(fn->left,
                                        lockdep_is_held(&table->tb6_lock));
        } while (fn);

        if (!allow_create) {
                /* We should not create new node because
                 * NLM_F_REPLACE was specified without NLM_F_CREATE
                 * I assume it is safe to require NLM_F_CREATE when
                 * REPLACE flag is used! Later we may want to remove the
                 * check for replace_required, because according
                 * to netlink specification, NLM_F_CREATE
                 * MUST be specified if new route is created.
                 * That would keep IPv6 consistent with IPv4
                 */
                if (replace_required) {
                        NL_SET_ERR_MSG(extack,
                                       "Can not replace route - no match found");
                        pr_warn("Can't replace route, no match found\n");
                        return ERR_PTR(-ENOENT);
                }
                pr_warn("NLM_F_CREATE should be set when creating new route\n");
        }
        /*
         *        We walked to the bottom of tree.
         *        Create new leaf node without children.
         */

        ln = node_alloc(net);

        if (!ln)
                return ERR_PTR(-ENOMEM);
        ln->fn_bit = plen;
        RCU_INIT_POINTER(ln->parent, pn);

        if (dir)
                rcu_assign_pointer(pn->right, ln);
        else
                rcu_assign_pointer(pn->left, ln);

        return ln;


insert_above:
        /*
         * split since we don't have a common prefix anymore or
         * we have a less significant route.
         * we've to insert an intermediate node on the list
         * this new node will point to the one we need to create
         * and the current
         */

        pn = rcu_dereference_protected(fn->parent,
                                       lockdep_is_held(&table->tb6_lock));

        /* find 1st bit in difference between the 2 addrs.

           See comment in __ipv6_addr_diff: bit may be an invalid value,
           but if it is >= plen, the value is ignored in any case.
         */

        bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));

        /*
         *                (intermediate)[in]
         *                  /           \
         *        (new leaf node)[ln] (old node)[fn]
         */
        if (plen > bit) {
                in = node_alloc(net);
                ln = node_alloc(net);

                if (!in || !ln) {
                        if (in)
                                node_free_immediate(net, in);
                        if (ln)
                                node_free_immediate(net, ln);
                        return ERR_PTR(-ENOMEM);
                }

                /*
                 * new intermediate node.
                 * RTN_RTINFO will
                 * be off since that an address that chooses one of
                 * the branches would not match less specific routes
                 * in the other branch
                 */

                in->fn_bit = bit;

                RCU_INIT_POINTER(in->parent, pn);
                in->leaf = fn->leaf;
                fib6_info_hold(rcu_dereference_protected(in->leaf,
                                lockdep_is_held(&table->tb6_lock)));

                /* update parent pointer */
                if (dir)
                        rcu_assign_pointer(pn->right, in);
                else
                        rcu_assign_pointer(pn->left, in);

                ln->fn_bit = plen;

                RCU_INIT_POINTER(ln->parent, in);
                rcu_assign_pointer(fn->parent, in);

                if (addr_bit_set(addr, bit)) {
                        rcu_assign_pointer(in->right, ln);
                        rcu_assign_pointer(in->left, fn);
                } else {
                        rcu_assign_pointer(in->left, ln);
                        rcu_assign_pointer(in->right, fn);
                }
        } else { /* plen <= bit */

                /*
                 *                (new leaf node)[ln]
                 *                  /           \
                 *             (old node)[fn] NULL
                 */

                ln = node_alloc(net);

                if (!ln)
                        return ERR_PTR(-ENOMEM);

                ln->fn_bit = plen;

                RCU_INIT_POINTER(ln->parent, pn);

                if (addr_bit_set(&key->addr, plen))
                        RCU_INIT_POINTER(ln->right, fn);
                else
                        RCU_INIT_POINTER(ln->left, fn);

                rcu_assign_pointer(fn->parent, ln);

                if (dir)
                        rcu_assign_pointer(pn->right, ln);
                else
                        rcu_assign_pointer(pn->left, ln);
        }
        return ln;
}

static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
                                  const struct fib6_info *match,
                                  const struct fib6_table *table)
{
        int cpu;

        if (!fib6_nh->rt6i_pcpu)
                return;

        /* release the reference to this fib entry from
         * all of its cached pcpu routes
         */
        for_each_possible_cpu(cpu) {
                struct rt6_info **ppcpu_rt;
                struct rt6_info *pcpu_rt;

                ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
                pcpu_rt = *ppcpu_rt;

                /* only dropping the 'from' reference if the cached route
                 * is using 'match'. The cached pcpu_rt->from only changes
                 * from a fib6_info to NULL (ip6_dst_destroy); it can never
                 * change from one fib6_info reference to another
                 */
                if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
                        struct fib6_info *from;

                        from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
                        fib6_info_release(from);
                }
        }
}

struct fib6_nh_pcpu_arg {
        struct fib6_info        *from;
        const struct fib6_table *table;
};

static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_pcpu_arg *arg = _arg;

        __fib6_drop_pcpu_from(nh, arg->from, arg->table);
        return 0;
}

static void fib6_drop_pcpu_from(struct fib6_info *f6i,
                                const struct fib6_table *table)
{
        /* Make sure rt6_make_pcpu_route() wont add other percpu routes
         * while we are cleaning them here.
         */
        f6i->fib6_destroying = 1;
        mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */

        if (f6i->nh) {
                struct fib6_nh_pcpu_arg arg = {
                        .from = f6i,
                        .table = table
                };

                nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
                                         &arg);
        } else {
                struct fib6_nh *fib6_nh;

                fib6_nh = f6i->fib6_nh;
                __fib6_drop_pcpu_from(fib6_nh, f6i, table);
        }
}

static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
                          struct net *net)
{
        struct fib6_table *table = rt->fib6_table;

        /* Flush all cached dst in exception table */
        rt6_flush_exceptions(rt);
        fib6_drop_pcpu_from(rt, table);

        if (rt->nh && !list_empty(&rt->nh_list))
                list_del_init(&rt->nh_list);

        if (refcount_read(&rt->fib6_ref) != 1) {
                /* This route is used as dummy address holder in some split
                 * nodes. It is not leaked, but it still holds other resources,
                 * which must be released in time. So, scan ascendant nodes
                 * and replace dummy references to this route with references
                 * to still alive ones.
                 */
                while (fn) {
                        struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
                                            lockdep_is_held(&table->tb6_lock));
                        struct fib6_info *new_leaf;
                        if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
                                new_leaf = fib6_find_prefix(net, table, fn);
                                fib6_info_hold(new_leaf);

                                rcu_assign_pointer(fn->leaf, new_leaf);
                                fib6_info_release(rt);
                        }
                        fn = rcu_dereference_protected(fn->parent,
                                    lockdep_is_held(&table->tb6_lock));
                }
        }

        fib6_clean_expires(rt);
        fib6_remove_gc_list(rt);
}

/*
 *        Insert routing information in a node.
 */

static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                            struct nl_info *info,
                            struct netlink_ext_ack *extack)
{
        struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
        struct fib6_info *iter = NULL;
        struct fib6_info __rcu **ins;
        struct fib6_info __rcu **fallback_ins = NULL;
        int replace = (info->nlh &&
                       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
        int add = (!info->nlh ||
                   (info->nlh->nlmsg_flags & NLM_F_CREATE));
        int found = 0;
        bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
        bool notify_sibling_rt = false;
        u16 nlflags = NLM_F_EXCL;
        int err;

        if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
                nlflags |= NLM_F_APPEND;

        ins = &fn->leaf;

        for (iter = leaf; iter;
             iter = rcu_dereference_protected(iter->fib6_next,
                                lockdep_is_held(&rt->fib6_table->tb6_lock))) {
                /*
                 *        Search for duplicates
                 */

                if (iter->fib6_metric == rt->fib6_metric) {
                        /*
                         *        Same priority level
                         */
                        if (info->nlh &&
                            (info->nlh->nlmsg_flags & NLM_F_EXCL))
                                return -EEXIST;

                        nlflags &= ~NLM_F_EXCL;
                        if (replace) {
                                if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
                                        found++;
                                        break;
                                }
                                fallback_ins = fallback_ins ?: ins;
                                goto next_iter;
                        }

                        if (rt6_duplicate_nexthop(iter, rt)) {
                                if (rt->fib6_nsiblings)
                                        rt->fib6_nsiblings = 0;
                                if (!(iter->fib6_flags & RTF_EXPIRES))
                                        return -EEXIST;
                                if (!(rt->fib6_flags & RTF_EXPIRES)) {
                                        fib6_clean_expires(iter);
                                        fib6_remove_gc_list(iter);
                                } else {
                                        fib6_set_expires(iter, rt->expires);
                                        fib6_add_gc_list(iter);
                                }

                                if (rt->fib6_pmtu)
                                        fib6_metric_set(iter, RTAX_MTU,
                                                        rt->fib6_pmtu);
                                return -EEXIST;
                        }
                        /* If we have the same destination and the same metric,
                         * but not the same gateway, then the route we try to
                         * add is sibling to this route, increment our counter
                         * of siblings, and later we will add our route to the
                         * list.
                         * Only static routes (which don't have flag
                         * RTF_EXPIRES) are used for ECMPv6.
                         *
                         * To avoid long list, we only had siblings if the
                         * route have a gateway.
                         */
                        if (rt_can_ecmp &&
                            rt6_qualify_for_ecmp(iter))
                                rt->fib6_nsiblings++;
                }

                if (iter->fib6_metric > rt->fib6_metric)
                        break;

next_iter:
                ins = &iter->fib6_next;
        }

        if (fallback_ins && !found) {
                /* No matching route with same ecmp-able-ness found, replace
                 * first matching route
                 */
                ins = fallback_ins;
                iter = rcu_dereference_protected(*ins,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
                found++;
        }

        /* Reset round-robin state, if necessary */
        if (ins == &fn->leaf)
                fn->rr_ptr = NULL;

        /* Link this route to others same route. */
        if (rt->fib6_nsiblings) {
                unsigned int fib6_nsiblings;
                struct fib6_info *sibling, *temp_sibling;

                /* Find the first route that have the same metric */
                sibling = leaf;
                notify_sibling_rt = true;
                while (sibling) {
                        if (sibling->fib6_metric == rt->fib6_metric &&
                            rt6_qualify_for_ecmp(sibling)) {
                                list_add_tail(&rt->fib6_siblings,
                                              &sibling->fib6_siblings);
                                break;
                        }
                        sibling = rcu_dereference_protected(sibling->fib6_next,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
                        notify_sibling_rt = false;
                }
                /* For each sibling in the list, increment the counter of
                 * siblings. BUG() if counters does not match, list of siblings
                 * is broken!
                 */
                fib6_nsiblings = 0;
                list_for_each_entry_safe(sibling, temp_sibling,
                                         &rt->fib6_siblings, fib6_siblings) {
                        sibling->fib6_nsiblings++;
                        BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
                        fib6_nsiblings++;
                }
                BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
                rt6_multipath_rebalance(temp_sibling);
        }

        /*
         *        insert node
         */
        if (!replace) {
                if (!add)
                        pr_warn("NLM_F_CREATE should be set when creating new route\n");

add:
                nlflags |= NLM_F_CREATE;

                /* The route should only be notified if it is the first
                 * route in the node or if it is added as a sibling
                 * route to the first route in the node.
                 */
                if (!info->skip_notify_kernel &&
                    (notify_sibling_rt || ins == &fn->leaf)) {
                        enum fib_event_type fib_event;

                        if (notify_sibling_rt)
                                fib_event = FIB_EVENT_ENTRY_APPEND;
                        else
                                fib_event = FIB_EVENT_ENTRY_REPLACE;
                        err = call_fib6_entry_notifiers(info->nl_net,
                                                        fib_event, rt,
                                                        extack);
                        if (err) {
                                struct fib6_info *sibling, *next_sibling;

                                /* If the route has siblings, then it first
                                 * needs to be unlinked from them.
                                 */
                                if (!rt->fib6_nsiblings)
                                        return err;

                                list_for_each_entry_safe(sibling, next_sibling,
                                                         &rt->fib6_siblings,
                                                         fib6_siblings)
                                        sibling->fib6_nsiblings--;
                                rt->fib6_nsiblings = 0;
                                list_del_init(&rt->fib6_siblings);
                                rt6_multipath_rebalance(next_sibling);
                                return err;
                        }
                }

                rcu_assign_pointer(rt->fib6_next, iter);
                fib6_info_hold(rt);
                rcu_assign_pointer(rt->fib6_node, fn);
                rcu_assign_pointer(*ins, rt);
                if (!info->skip_notify)
                        inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
                info->nl_net->ipv6.rt6_stats->fib_rt_entries++;

                if (!(fn->fn_flags & RTN_RTINFO)) {
                        info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
                        fn->fn_flags |= RTN_RTINFO;
                }

        } else {
                int nsiblings;

                if (!found) {
                        if (add)
                                goto add;
                        pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
                        return -ENOENT;
                }

                if (!info->skip_notify_kernel && ins == &fn->leaf) {
                        err = call_fib6_entry_notifiers(info->nl_net,
                                                        FIB_EVENT_ENTRY_REPLACE,
                                                        rt, extack);
                        if (err)
                                return err;
                }

                fib6_info_hold(rt);
                rcu_assign_pointer(rt->fib6_node, fn);
                rt->fib6_next = iter->fib6_next;
                rcu_assign_pointer(*ins, rt);
                if (!info->skip_notify)
                        inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
                if (!(fn->fn_flags & RTN_RTINFO)) {
                        info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
                        fn->fn_flags |= RTN_RTINFO;
                }
                nsiblings = iter->fib6_nsiblings;
                iter->fib6_node = NULL;
                fib6_purge_rt(iter, fn, info->nl_net);
                if (rcu_access_pointer(fn->rr_ptr) == iter)
                        fn->rr_ptr = NULL;
                fib6_info_release(iter);

                if (nsiblings) {
                        /* Replacing an ECMP route, remove all siblings */
                        ins = &rt->fib6_next;
                        iter = rcu_dereference_protected(*ins,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
                        while (iter) {
                                if (iter->fib6_metric > rt->fib6_metric)
                                        break;
                                if (rt6_qualify_for_ecmp(iter)) {
                                        *ins = iter->fib6_next;
                                        iter->fib6_node = NULL;
                                        fib6_purge_rt(iter, fn, info->nl_net);
                                        if (rcu_access_pointer(fn->rr_ptr) == iter)
                                                fn->rr_ptr = NULL;
                                        fib6_info_release(iter);
                                        nsiblings--;
                                        info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
                                } else {
                                        ins = &iter->fib6_next;
                                }
                                iter = rcu_dereference_protected(*ins,
                                        lockdep_is_held(&rt->fib6_table->tb6_lock));
                        }
                        WARN_ON(nsiblings != 0);
                }
        }

        return 0;
}

static void fib6_start_gc(struct net *net, struct fib6_info *rt)
{
        if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
            (rt->fib6_flags & RTF_EXPIRES))
                mod_timer(&net->ipv6.ip6_fib_timer,
                          jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}

void fib6_force_start_gc(struct net *net)
{
        if (!timer_pending(&net->ipv6.ip6_fib_timer))
                mod_timer(&net->ipv6.ip6_fib_timer,
                          jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}

static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
                                           int sernum)
{
        struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
                                lockdep_is_held(&rt->fib6_table->tb6_lock));

        /* paired with smp_rmb() in fib6_get_cookie_safe() */
        smp_wmb();
        while (fn) {
                WRITE_ONCE(fn->fn_sernum, sernum);
                fn = rcu_dereference_protected(fn->parent,
                                lockdep_is_held(&rt->fib6_table->tb6_lock));
        }
}

void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
{
        __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
}

/* allow ipv4 to update sernum via ipv6_stub */
void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
{
        spin_lock_bh(&f6i->fib6_table->tb6_lock);
        fib6_update_sernum_upto_root(net, f6i);
        spin_unlock_bh(&f6i->fib6_table->tb6_lock);
}

/*
 *        Add routing information to the routing tree.
 *        <destination addr>/<source addr>
 *        with source addr info in sub-trees
 *        Need to own table->tb6_lock
 */

int fib6_add(struct fib6_node *root, struct fib6_info *rt,
             struct nl_info *info, struct netlink_ext_ack *extack)
{
        struct fib6_table *table = rt->fib6_table;
        struct fib6_node *fn;
#ifdef CONFIG_IPV6_SUBTREES
        struct fib6_node *pn = NULL;
#endif
        int err = -ENOMEM;
        int allow_create = 1;
        int replace_required = 0;

        if (info->nlh) {
                if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
                        allow_create = 0;
                if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
                        replace_required = 1;
        }
        if (!allow_create && !replace_required)
                pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");

        fn = fib6_add_1(info->nl_net, table, root,
                        &rt->fib6_dst.addr, rt->fib6_dst.plen,
                        offsetof(struct fib6_info, fib6_dst), allow_create,
                        replace_required, extack);
        if (IS_ERR(fn)) {
                err = PTR_ERR(fn);
                fn = NULL;
                goto out;
        }

#ifdef CONFIG_IPV6_SUBTREES
        pn = fn;

        if (rt->fib6_src.plen) {
                struct fib6_node *sn;

                if (!rcu_access_pointer(fn->subtree)) {
                        struct fib6_node *sfn;

                        /*
                         * Create subtree.
                         *
                         *                fn[main tree]
                         *                |
                         *                sfn[subtree root]
                         *                   \
                         *                    sn[new leaf node]
                         */

                        /* Create subtree root node */
                        sfn = node_alloc(info->nl_net);
                        if (!sfn)
                                goto failure;

                        fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
                        rcu_assign_pointer(sfn->leaf,
                                           info->nl_net->ipv6.fib6_null_entry);
                        sfn->fn_flags = RTN_ROOT;

                        /* Now add the first leaf node to new subtree */

                        sn = fib6_add_1(info->nl_net, table, sfn,
                                        &rt->fib6_src.addr, rt->fib6_src.plen,
                                        offsetof(struct fib6_info, fib6_src),
                                        allow_create, replace_required, extack);

                        if (IS_ERR(sn)) {
                                /* If it is failed, discard just allocated
                                   root, and then (in failure) stale node
                                   in main tree.
                                 */
                                node_free_immediate(info->nl_net, sfn);
                                err = PTR_ERR(sn);
                                goto failure;
                        }

                        /* Now link new subtree to main tree */
                        rcu_assign_pointer(sfn->parent, fn);
                        rcu_assign_pointer(fn->subtree, sfn);
                } else {
                        sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
                                        &rt->fib6_src.addr, rt->fib6_src.plen,
                                        offsetof(struct fib6_info, fib6_src),
                                        allow_create, replace_required, extack);

                        if (IS_ERR(sn)) {
                                err = PTR_ERR(sn);
                                goto failure;
                        }
                }

                if (!rcu_access_pointer(fn->leaf)) {
                        if (fn->fn_flags & RTN_TL_ROOT) {
                                /* put back null_entry for root node */
                                rcu_assign_pointer(fn->leaf,
                                            info->nl_net->ipv6.fib6_null_entry);
                        } else {
                                fib6_info_hold(rt);
                                rcu_assign_pointer(fn->leaf, rt);
                        }
                }
                fn = sn;
        }
#endif

        err = fib6_add_rt2node(fn, rt, info, extack);
        if (!err) {
                if (rt->nh)
                        list_add(&rt->nh_list, &rt->nh->f6i_list);
                __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));

                if (rt->fib6_flags & RTF_EXPIRES)
                        fib6_add_gc_list(rt);

                fib6_start_gc(info->nl_net, rt);
        }

out:
        if (err) {
#ifdef CONFIG_IPV6_SUBTREES
                /*
                 * If fib6_add_1 has cleared the old leaf pointer in the
                 * super-tree leaf node we have to find a new one for it.
                 */
                if (pn != fn) {
                        struct fib6_info *pn_leaf =
                                rcu_dereference_protected(pn->leaf,
                                    lockdep_is_held(&table->tb6_lock));
                        if (pn_leaf == rt) {
                                pn_leaf = NULL;
                                RCU_INIT_POINTER(pn->leaf, NULL);
                                fib6_info_release(rt);
                        }
                        if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
                                pn_leaf = fib6_find_prefix(info->nl_net, table,
                                                           pn);
                                if (!pn_leaf)
                                        pn_leaf =
                                            info->nl_net->ipv6.fib6_null_entry;
                                fib6_info_hold(pn_leaf);
                                rcu_assign_pointer(pn->leaf, pn_leaf);
                        }
                }
#endif
                goto failure;
        } else if (fib6_requires_src(rt)) {
                fib6_routes_require_src_inc(info->nl_net);
        }
        return err;

failure:
        /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
         * 1. fn is an intermediate node and we failed to add the new
         * route to it in both subtree creation failure and fib6_add_rt2node()
         * failure case.
         * 2. fn is the root node in the table and we fail to add the first
         * default route to it.
         */
        if (fn &&
            (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
             (fn->fn_flags & RTN_TL_ROOT &&
              !rcu_access_pointer(fn->leaf))))
                fib6_repair_tree(info->nl_net, table, fn);
        return err;
}

/*
 *        Routing tree lookup
 *
 */

struct lookup_args {
        int                        offset;                /* key offset on fib6_info */
        const struct in6_addr        *addr;                /* search key                        */
};

static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
                                            struct lookup_args *args)
{
        struct fib6_node *fn;
        __be32 dir;

        if (unlikely(args->offset == 0))
                return NULL;

        /*
         *        Descend on a tree
         */

        fn = root;

        for (;;) {
                struct fib6_node *next;

                dir = addr_bit_set(args->addr, fn->fn_bit);

                next = dir ? rcu_dereference(fn->right) :
                             rcu_dereference(fn->left);

                if (next) {
                        fn = next;
                        continue;
                }
                break;
        }

        while (fn) {
                struct fib6_node *subtree = FIB6_SUBTREE(fn);

                if (subtree || fn->fn_flags & RTN_RTINFO) {
                        struct fib6_info *leaf = rcu_dereference(fn->leaf);
                        struct rt6key *key;

                        if (!leaf)
                                goto backtrack;

                        key = (struct rt6key *) ((u8 *)leaf + args->offset);

                        if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
                                if (subtree) {
                                        struct fib6_node *sfn;
                                        sfn = fib6_node_lookup_1(subtree,
                                                                 args + 1);
                                        if (!sfn)
                                                goto backtrack;
                                        fn = sfn;
                                }
#endif
                                if (fn->fn_flags & RTN_RTINFO)
                                        return fn;
                        }
                }
backtrack:
                if (fn->fn_flags & RTN_ROOT)
                        break;

                fn = rcu_dereference(fn->parent);
        }

        return NULL;
}

/* called with rcu_read_lock() held
 */
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
                                   const struct in6_addr *daddr,
                                   const struct in6_addr *saddr)
{
        struct fib6_node *fn;
        struct lookup_args args[] = {
                {
                        .offset = offsetof(struct fib6_info, fib6_dst),
                        .addr = daddr,
                },
#ifdef CONFIG_IPV6_SUBTREES
                {
                        .offset = offsetof(struct fib6_info, fib6_src),
                        .addr = saddr,
                },
#endif
                {
                        .offset = 0,        /* sentinel */
                }
        };

        fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
        if (!fn || fn->fn_flags & RTN_TL_ROOT)
                fn = root;

        return fn;
}

/*
 *        Get node with specified destination prefix (and source prefix,
 *        if subtrees are used)
 *        exact_match == true means we try to find fn with exact match of
 *        the passed in prefix addr
 *        exact_match == false means we try to find fn with longest prefix
 *        match of the passed in prefix addr. This is useful for finding fn
 *        for cached route as it will be stored in the exception table under
 *        the node with longest prefix length.
 */


static struct fib6_node *fib6_locate_1(struct fib6_node *root,
                                       const struct in6_addr *addr,
                                       int plen, int offset,
                                       bool exact_match)
{
        struct fib6_node *fn, *prev = NULL;

        for (fn = root; fn ; ) {
                struct fib6_info *leaf = rcu_dereference(fn->leaf);
                struct rt6key *key;

                /* This node is being deleted */
                if (!leaf) {
                        if (plen <= fn->fn_bit)
                                goto out;
                        else
                                goto next;
                }

                key = (struct rt6key *)((u8 *)leaf + offset);

                /*
                 *        Prefix match
                 */
                if (plen < fn->fn_bit ||
                    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
                        goto out;

                if (plen == fn->fn_bit)
                        return fn;

                if (fn->fn_flags & RTN_RTINFO)
                        prev = fn;

next:
                /*
                 *        We have more bits to go
                 */
                if (addr_bit_set(addr, fn->fn_bit))
                        fn = rcu_dereference(fn->right);
                else
                        fn = rcu_dereference(fn->left);
        }
out:
        if (exact_match)
                return NULL;
        else
                return prev;
}

struct fib6_node *fib6_locate(struct fib6_node *root,
                              const struct in6_addr *daddr, int dst_len,
                              const struct in6_addr *saddr, int src_len,
                              bool exact_match)
{
        struct fib6_node *fn;

        fn = fib6_locate_1(root, daddr, dst_len,
                           offsetof(struct fib6_info, fib6_dst),
                           exact_match);

#ifdef CONFIG_IPV6_SUBTREES
        if (src_len) {
                WARN_ON(saddr == NULL);
                if (fn) {
                        struct fib6_node *subtree = FIB6_SUBTREE(fn);

                        if (subtree) {
                                fn = fib6_locate_1(subtree, saddr, src_len,
                                           offsetof(struct fib6_info, fib6_src),
                                           exact_match);
                        }
                }
        }
#endif

        if (fn && fn->fn_flags & RTN_RTINFO)
                return fn;

        return NULL;
}


/*
 *        Deletion
 *
 */

static struct fib6_info *fib6_find_prefix(struct net *net,
                                         struct fib6_table *table,
                                         struct fib6_node *fn)
{
        struct fib6_node *child_left, *child_right;

        if (fn->fn_flags & RTN_ROOT)
                return net->ipv6.fib6_null_entry;

        while (fn) {
                child_left = rcu_dereference_protected(fn->left,
                                    lockdep_is_held(&table->tb6_lock));
                child_right = rcu_dereference_protected(fn->right,
                                    lockdep_is_held(&table->tb6_lock));
                if (child_left)
                        return rcu_dereference_protected(child_left->leaf,
                                        lockdep_is_held(&table->tb6_lock));
                if (child_right)
                        return rcu_dereference_protected(child_right->leaf,
                                        lockdep_is_held(&table->tb6_lock));

                fn = FIB6_SUBTREE(fn);
        }
        return NULL;
}

/*
 *        Called to trim the tree of intermediate nodes when possible. "fn"
 *        is the node we want to try and remove.
 *        Need to own table->tb6_lock
 */

static struct fib6_node *fib6_repair_tree(struct net *net,
                                          struct fib6_table *table,
                                          struct fib6_node *fn)
{
        int children;
        int nstate;
        struct fib6_node *child;
        struct fib6_walker *w;
        int iter = 0;

        /* Set fn->leaf to null_entry for root node. */
        if (fn->fn_flags & RTN_TL_ROOT) {
                rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
                return fn;
        }

        for (;;) {
                struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_node *pn = rcu_dereference_protected(fn->parent,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
                                            lockdep_is_held(&table->tb6_lock));
                struct fib6_info *new_fn_leaf;

                pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
                iter++;

                WARN_ON(fn->fn_flags & RTN_RTINFO);
                WARN_ON(fn->fn_flags & RTN_TL_ROOT);
                WARN_ON(fn_leaf);

                children = 0;
                child = NULL;
                if (fn_r) {
                        child = fn_r;
                        children |= 1;
                }
                if (fn_l) {
                        child = fn_l;
                        children |= 2;
                }

                if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
                    /* Subtree root (i.e. fn) may have one child */
                    || (children && fn->fn_flags & RTN_ROOT)
#endif
                    ) {
                        new_fn_leaf = fib6_find_prefix(net, table, fn);
#if RT6_DEBUG >= 2
                        if (!new_fn_leaf) {
                                WARN_ON(!new_fn_leaf);
                                new_fn_leaf = net->ipv6.fib6_null_entry;
                        }
#endif
                        fib6_info_hold(new_fn_leaf);
                        rcu_assign_pointer(fn->leaf, new_fn_leaf);
                        return pn;
                }

#ifdef CONFIG_IPV6_SUBTREES
                if (FIB6_SUBTREE(pn) == fn) {
                        WARN_ON(!(fn->fn_flags & RTN_ROOT));
                        RCU_INIT_POINTER(pn->subtree, NULL);
                        nstate = FWS_L;
                } else {
                        WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
                        if (pn_r == fn)
                                rcu_assign_pointer(pn->right, child);
                        else if (pn_l == fn)
                                rcu_assign_pointer(pn->left, child);
#if RT6_DEBUG >= 2
                        else
                                WARN_ON(1);
#endif
                        if (child)
                                rcu_assign_pointer(child->parent, pn);
                        nstate = FWS_R;
#ifdef CONFIG_IPV6_SUBTREES
                }
#endif

                read_lock(&net->ipv6.fib6_walker_lock);
                FOR_WALKERS(net, w) {
                        if (!child) {
                                if (w->node == fn) {
                                        pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
                                                 w, w->state, nstate);
                                        w->node = pn;
                                        w->state = nstate;
                                }
                        } else {
                                if (w->node == fn) {
                                        w->node = child;
                                        if (children&2) {
                                                pr_debug("W %p adjusted by delnode 2, s=%d\n",
                                                         w, w->state);
                                                w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
                                        } else {
                                                pr_debug("W %p adjusted by delnode 2, s=%d\n",
                                                         w, w->state);
                                                w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
                                        }
                                }
                        }
                }
                read_unlock(&net->ipv6.fib6_walker_lock);

                node_free(net, fn);
                if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
                        return pn;

                RCU_INIT_POINTER(pn->leaf, NULL);
                fib6_info_release(pn_leaf);
                fn = pn;
        }
}

static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
                           struct fib6_info __rcu **rtp, struct nl_info *info)
{
        struct fib6_info *leaf, *replace_rt = NULL;
        struct fib6_walker *w;
        struct fib6_info *rt = rcu_dereference_protected(*rtp,
                                    lockdep_is_held(&table->tb6_lock));
        struct net *net = info->nl_net;
        bool notify_del = false;

        /* If the deleted route is the first in the node and it is not part of
         * a multipath route, then we need to replace it with the next route
         * in the node, if exists.
         */
        leaf = rcu_dereference_protected(fn->leaf,
                                         lockdep_is_held(&table->tb6_lock));
        if (leaf == rt && !rt->fib6_nsiblings) {
                if (rcu_access_pointer(rt->fib6_next))
                        replace_rt = rcu_dereference_protected(rt->fib6_next,
                                            lockdep_is_held(&table->tb6_lock));
                else
                        notify_del = true;
        }

        /* Unlink it */
        *rtp = rt->fib6_next;
        rt->fib6_node = NULL;
        net->ipv6.rt6_stats->fib_rt_entries--;
        net->ipv6.rt6_stats->fib_discarded_routes++;

        /* Reset round-robin state, if necessary */
        if (rcu_access_pointer(fn->rr_ptr) == rt)
                fn->rr_ptr = NULL;

        /* Remove this entry from other siblings */
        if (rt->fib6_nsiblings) {
                struct fib6_info *sibling, *next_sibling;

                /* The route is deleted from a multipath route. If this
                 * multipath route is the first route in the node, then we need
                 * to emit a delete notification. Otherwise, we need to skip
                 * the notification.
                 */
                if (rt->fib6_metric == leaf->fib6_metric &&
                    rt6_qualify_for_ecmp(leaf))
                        notify_del = true;
                list_for_each_entry_safe(sibling, next_sibling,
                                         &rt->fib6_siblings, fib6_siblings)
                        sibling->fib6_nsiblings--;
                rt->fib6_nsiblings = 0;
                list_del_init(&rt->fib6_siblings);
                rt6_multipath_rebalance(next_sibling);
        }

        /* Adjust walkers */
        read_lock(&net->ipv6.fib6_walker_lock);
        FOR_WALKERS(net, w) {
                if (w->state == FWS_C && w->leaf == rt) {
                        pr_debug("walker %p adjusted by delroute\n", w);
                        w->leaf = rcu_dereference_protected(rt->fib6_next,
                                            lockdep_is_held(&table->tb6_lock));
                        if (!w->leaf)
                                w->state = FWS_U;
                }
        }
        read_unlock(&net->ipv6.fib6_walker_lock);

        /* If it was last route, call fib6_repair_tree() to:
         * 1. For root node, put back null_entry as how the table was created.
         * 2. For other nodes, expunge its radix tree node.
         */
        if (!rcu_access_pointer(fn->leaf)) {
                if (!(fn->fn_flags & RTN_TL_ROOT)) {
                        fn->fn_flags &= ~RTN_RTINFO;
                        net->ipv6.rt6_stats->fib_route_nodes--;
                }
                fn = fib6_repair_tree(net, table, fn);
        }

        fib6_purge_rt(rt, fn, net);

        if (!info->skip_notify_kernel) {
                if (notify_del)
                        call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
                                                  rt, NULL);
                else if (replace_rt)
                        call_fib6_entry_notifiers_replace(net, replace_rt);
        }
        if (!info->skip_notify)
                inet6_rt_notify(RTM_DELROUTE, rt, info, 0);

        fib6_info_release(rt);
}

/* Need to own table->tb6_lock */
int fib6_del(struct fib6_info *rt, struct nl_info *info)
{
        struct net *net = info->nl_net;
        struct fib6_info __rcu **rtp;
        struct fib6_info __rcu **rtp_next;
        struct fib6_table *table;
        struct fib6_node *fn;

        if (rt == net->ipv6.fib6_null_entry)
                return -ENOENT;

        table = rt->fib6_table;
        fn = rcu_dereference_protected(rt->fib6_node,
                                       lockdep_is_held(&table->tb6_lock));
        if (!fn)
                return -ENOENT;

        WARN_ON(!(fn->fn_flags & RTN_RTINFO));

        /*
         *        Walk the leaf entries looking for ourself
         */

        for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
                struct fib6_info *cur = rcu_dereference_protected(*rtp,
                                        lockdep_is_held(&table->tb6_lock));
                if (rt == cur) {
                        if (fib6_requires_src(cur))
                                fib6_routes_require_src_dec(info->nl_net);
                        fib6_del_route(table, fn, rtp, info);
                        return 0;
                }
                rtp_next = &cur->fib6_next;
        }
        return -ENOENT;
}

/*
 *        Tree traversal function.
 *
 *        Certainly, it is not interrupt safe.
 *        However, it is internally reenterable wrt itself and fib6_add/fib6_del.
 *        It means, that we can modify tree during walking
 *        and use this function for garbage collection, clone pruning,
 *        cleaning tree when a device goes down etc. etc.
 *
 *        It guarantees that every node will be traversed,
 *        and that it will be traversed only once.
 *
 *        Callback function w->func may return:
 *        0 -> continue walking.
 *        positive value -> walking is suspended (used by tree dumps,
 *        and probably by gc, if it will be split to several slices)
 *        negative value -> terminate walking.
 *
 *        The function itself returns:
 *        0   -> walk is complete.
 *        >0  -> walk is incomplete (i.e. suspended)
 *        <0  -> walk is terminated by an error.
 *
 *        This function is called with tb6_lock held.
 */

static int fib6_walk_continue(struct fib6_walker *w)
{
        struct fib6_node *fn, *pn, *left, *right;

        /* w->root should always be table->tb6_root */
        WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));

        for (;;) {
                fn = w->node;
                if (!fn)
                        return 0;

                switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
                case FWS_S:
                        if (FIB6_SUBTREE(fn)) {
                                w->node = FIB6_SUBTREE(fn);
                                continue;
                        }
                        w->state = FWS_L;
                        fallthrough;
#endif
                case FWS_L:
                        left = rcu_dereference_protected(fn->left, 1);
                        if (left) {
                                w->node = left;
                                w->state = FWS_INIT;
                                continue;
                        }
                        w->state = FWS_R;
                        fallthrough;
                case FWS_R:
                        right = rcu_dereference_protected(fn->right, 1);
                        if (right) {
                                w->node = right;
                                w->state = FWS_INIT;
                                continue;
                        }
                        w->state = FWS_C;
                        w->leaf = rcu_dereference_protected(fn->leaf, 1);
                        fallthrough;
                case FWS_C:
                        if (w->leaf && fn->fn_flags & RTN_RTINFO) {
                                int err;

                                if (w->skip) {
                                        w->skip--;
                                        goto skip;
                                }

                                err = w->func(w);
                                if (err)
                                        return err;

                                w->count++;
                                continue;
                        }
skip:
                        w->state = FWS_U;
                        fallthrough;
                case FWS_U:
                        if (fn == w->root)
                                return 0;
                        pn = rcu_dereference_protected(fn->parent, 1);
                        left = rcu_dereference_protected(pn->left, 1);
                        right = rcu_dereference_protected(pn->right, 1);
                        w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
                        if (FIB6_SUBTREE(pn) == fn) {
                                WARN_ON(!(fn->fn_flags & RTN_ROOT));
                                w->state = FWS_L;
                                continue;
                        }
#endif
                        if (left == fn) {
                                w->state = FWS_R;
                                continue;
                        }
                        if (right == fn) {
                                w->state = FWS_C;
                                w->leaf = rcu_dereference_protected(w->node->leaf, 1);
                                continue;
                        }
#if RT6_DEBUG >= 2
                        WARN_ON(1);
#endif
                }
        }
}

static int fib6_walk(struct net *net, struct fib6_walker *w)
{
        int res;

        w->state = FWS_INIT;
        w->node = w->root;

        fib6_walker_link(net, w);
        res = fib6_walk_continue(w);
        if (res <= 0)
                fib6_walker_unlink(net, w);
        return res;
}

static int fib6_clean_node(struct fib6_walker *w)
{
        int res;
        struct fib6_info *rt;
        struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
        struct nl_info info = {
                .nl_net = c->net,
                .skip_notify = c->skip_notify,
        };

        if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
            READ_ONCE(w->node->fn_sernum) != c->sernum)
                WRITE_ONCE(w->node->fn_sernum, c->sernum);

        if (!c->func) {
                WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
                w->leaf = NULL;
                return 0;
        }

        for_each_fib6_walker_rt(w) {
                res = c->func(rt, c->arg);
                if (res == -1) {
                        w->leaf = rt;
                        res = fib6_del(rt, &info);
                        if (res) {
#if RT6_DEBUG >= 2
                                pr_debug("%s: del failed: rt=%p@%p err=%d\n",
                                         __func__, rt,
                                         rcu_access_pointer(rt->fib6_node),
                                         res);
#endif
                                continue;
                        }
                        return 0;
                } else if (res == -2) {
                        if (WARN_ON(!rt->fib6_nsiblings))
                                continue;
                        rt = list_last_entry(&rt->fib6_siblings,
                                             struct fib6_info, fib6_siblings);
                        continue;
                }
                WARN_ON(res != 0);
        }
        w->leaf = rt;
        return 0;
}

/*
 *        Convenient frontend to tree walker.
 *
 *        func is called on each route.
 *                It may return -2 -> skip multipath route.
 *                              -1 -> delete this route.
 *                              0  -> continue walking
 */

static void fib6_clean_tree(struct net *net, struct fib6_node *root,
                            int (*func)(struct fib6_info *, void *arg),
                            int sernum, void *arg, bool skip_notify)
{
        struct fib6_cleaner c;

        c.w.root = root;
        c.w.func = fib6_clean_node;
        c.w.count = 0;
        c.w.skip = 0;
        c.w.skip_in_node = 0;
        c.func = func;
        c.sernum = sernum;
        c.arg = arg;
        c.net = net;
        c.skip_notify = skip_notify;

        fib6_walk(net, &c.w);
}

static void __fib6_clean_all(struct net *net,
                             int (*func)(struct fib6_info *, void *),
                             int sernum, void *arg, bool skip_notify)
{
        struct fib6_table *table;
        struct hlist_head *head;
        unsigned int h;

        rcu_read_lock();
        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                head = &net->ipv6.fib_table_hash[h];
                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
                        spin_lock_bh(&table->tb6_lock);
                        fib6_clean_tree(net, &table->tb6_root,
                                        func, sernum, arg, skip_notify);
                        spin_unlock_bh(&table->tb6_lock);
                }
        }
        rcu_read_unlock();
}

void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
                    void *arg)
{
        __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
}

void fib6_clean_all_skip_notify(struct net *net,
                                int (*func)(struct fib6_info *, void *),
                                void *arg)
{
        __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
}

static void fib6_flush_trees(struct net *net)
{
        int new_sernum = fib6_new_sernum(net);

        __fib6_clean_all(net, NULL, new_sernum, NULL, false);
}

/*
 *        Garbage collection
 */

static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
        unsigned long now = jiffies;

        /*
         *        check addrconf expiration here.
         *        Routes are expired even if they are in use.
         */

        if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
                if (time_after(now, rt->expires)) {
                        pr_debug("expiring %p\n", rt);
                        return -1;
                }
                gc_args->more++;
        }

        /*        Also age clones in the exception table.
         *        Note, that clones are aged out
         *        only if they are not in use now.
         */
        rt6_age_exceptions(rt, gc_args, now);

        return 0;
}

static void fib6_gc_table(struct net *net,
                          struct fib6_table *tb6,
                          struct fib6_gc_args *gc_args)
{
        struct fib6_info *rt;
        struct hlist_node *n;
        struct nl_info info = {
                .nl_net = net,
                .skip_notify = false,
        };

        hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
                if (fib6_age(rt, gc_args) == -1)
                        fib6_del(rt, &info);
}

static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
{
        struct fib6_table *table;
        struct hlist_head *head;
        unsigned int h;

        rcu_read_lock();
        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                head = &net->ipv6.fib_table_hash[h];
                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
                        spin_lock_bh(&table->tb6_lock);

                        fib6_gc_table(net, table, gc_args);

                        spin_unlock_bh(&table->tb6_lock);
                }
        }
        rcu_read_unlock();
}

void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
        struct fib6_gc_args gc_args;
        unsigned long now;

        if (force) {
                spin_lock_bh(&net->ipv6.fib6_gc_lock);
        } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
                mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
                return;
        }
        gc_args.timeout = expires ? (int)expires :
                          net->ipv6.sysctl.ip6_rt_gc_interval;
        gc_args.more = 0;

        fib6_gc_all(net, &gc_args);
        now = jiffies;
        net->ipv6.ip6_rt_last_gc = now;

        if (gc_args.more)
                mod_timer(&net->ipv6.ip6_fib_timer,
                          round_jiffies(now
                                        + net->ipv6.sysctl.ip6_rt_gc_interval));
        else
                del_timer(&net->ipv6.ip6_fib_timer);
        spin_unlock_bh(&net->ipv6.fib6_gc_lock);
}

static void fib6_gc_timer_cb(struct timer_list *t)
{
        struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);

        fib6_run_gc(0, arg, true);
}

static int __net_init fib6_net_init(struct net *net)
{
        size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
        int err;

        err = fib6_notifier_init(net);
        if (err)
                return err;

        /* Default to 3-tuple */
        net->ipv6.sysctl.multipath_hash_fields =
                FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;

        spin_lock_init(&net->ipv6.fib6_gc_lock);
        rwlock_init(&net->ipv6.fib6_walker_lock);
        INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
        timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);

        net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
        if (!net->ipv6.rt6_stats)
                goto out_notifier;

        /* Avoid false sharing : Use at least a full cache line */
        size = max_t(size_t, size, L1_CACHE_BYTES);

        net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
        if (!net->ipv6.fib_table_hash)
                goto out_rt6_stats;

        net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
                                          GFP_KERNEL);
        if (!net->ipv6.fib6_main_tbl)
                goto out_fib_table_hash;

        net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
        rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
                           net->ipv6.fib6_null_entry);
        net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
                RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
        inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
        INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
                                           GFP_KERNEL);
        if (!net->ipv6.fib6_local_tbl)
                goto out_fib6_main_tbl;
        net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
        rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
                           net->ipv6.fib6_null_entry);
        net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
                RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
        inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
        INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
        fib6_tables_init(net);

        return 0;

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
out_fib6_main_tbl:
        kfree(net->ipv6.fib6_main_tbl);
#endif
out_fib_table_hash:
        kfree(net->ipv6.fib_table_hash);
out_rt6_stats:
        kfree(net->ipv6.rt6_stats);
out_notifier:
        fib6_notifier_exit(net);
        return -ENOMEM;
}

static void fib6_net_exit(struct net *net)
{
        unsigned int i;

        del_timer_sync(&net->ipv6.ip6_fib_timer);

        for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
                struct hlist_head *head = &net->ipv6.fib_table_hash[i];
                struct hlist_node *tmp;
                struct fib6_table *tb;

                hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
                        hlist_del(&tb->tb6_hlist);
                        fib6_free_table(tb);
                }
        }

        kfree(net->ipv6.fib_table_hash);
        kfree(net->ipv6.rt6_stats);
        fib6_notifier_exit(net);
}

static struct pernet_operations fib6_net_ops = {
        .init = fib6_net_init,
        .exit = fib6_net_exit,
};

int __init fib6_init(void)
{
        int ret = -ENOMEM;

        fib6_node_kmem = KMEM_CACHE(fib6_node,
                                    SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
        if (!fib6_node_kmem)
                goto out;

        ret = register_pernet_subsys(&fib6_net_ops);
        if (ret)
                goto out_kmem_cache_create;

        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
                                   inet6_dump_fib, 0);
        if (ret)
                goto out_unregister_subsys;

        __fib6_flush_trees = fib6_flush_trees;
out:
        return ret;

out_unregister_subsys:
        unregister_pernet_subsys(&fib6_net_ops);
out_kmem_cache_create:
        kmem_cache_destroy(fib6_node_kmem);
        goto out;
}

void fib6_gc_cleanup(void)
{
        unregister_pernet_subsys(&fib6_net_ops);
        kmem_cache_destroy(fib6_node_kmem);
}

#ifdef CONFIG_PROC_FS
static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
{
        struct fib6_info *rt = v;
        struct ipv6_route_iter *iter = seq->private;
        struct fib6_nh *fib6_nh = rt->fib6_nh;
        unsigned int flags = rt->fib6_flags;
        const struct net_device *dev;

        if (rt->nh)
                fib6_nh = nexthop_fib6_nh(rt->nh);

        seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);

#ifdef CONFIG_IPV6_SUBTREES
        seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
#else
        seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
        if (fib6_nh->fib_nh_gw_family) {
                flags |= RTF_GATEWAY;
                seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
        } else {
                seq_puts(seq, "00000000000000000000000000000000");
        }

        dev = fib6_nh->fib_nh_dev;
        seq_printf(seq, " %08x %08x %08x %08x %8s\n",
                   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
                   flags, dev ? dev->name : "");
        iter->w.leaf = NULL;
        return 0;
}

static int ipv6_route_yield(struct fib6_walker *w)
{
        struct ipv6_route_iter *iter = w->args;

        if (!iter->skip)
                return 1;

        do {
                iter->w.leaf = rcu_dereference_protected(
                                iter->w.leaf->fib6_next,
                                lockdep_is_held(&iter->tbl->tb6_lock));
                iter->skip--;
                if (!iter->skip && iter->w.leaf)
                        return 1;
        } while (iter->w.leaf);

        return 0;
}

static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
                                      struct net *net)
{
        memset(&iter->w, 0, sizeof(iter->w));
        iter->w.func = ipv6_route_yield;
        iter->w.root = &iter->tbl->tb6_root;
        iter->w.state = FWS_INIT;
        iter->w.node = iter->w.root;
        iter->w.args = iter;
        iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
        INIT_LIST_HEAD(&iter->w.lh);
        fib6_walker_link(net, &iter->w);
}

static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
                                                    struct net *net)
{
        unsigned int h;
        struct hlist_node *node;

        if (tbl) {
                h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
                node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist));
        } else {
                h = 0;
                node = NULL;
        }

        while (!node && h < FIB6_TABLE_HASHSZ) {
                node = rcu_dereference(
                        hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
        }
        return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
}

static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
        int sernum = READ_ONCE(iter->w.root->fn_sernum);

        if (iter->sernum != sernum) {
                iter->sernum = sernum;
                iter->w.state = FWS_INIT;
                iter->w.node = iter->w.root;
                WARN_ON(iter->w.skip);
                iter->w.skip = iter->w.count;
        }
}

static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        int r;
        struct fib6_info *n;
        struct net *net = seq_file_net(seq);
        struct ipv6_route_iter *iter = seq->private;

        ++(*pos);
        if (!v)
                goto iter_table;

        n = rcu_dereference(((struct fib6_info *)v)->fib6_next);
        if (n)
                return n;

iter_table:
        ipv6_route_check_sernum(iter);
        spin_lock_bh(&iter->tbl->tb6_lock);
        r = fib6_walk_continue(&iter->w);
        spin_unlock_bh(&iter->tbl->tb6_lock);
        if (r > 0) {
                return iter->w.leaf;
        } else if (r < 0) {
                fib6_walker_unlink(net, &iter->w);
                return NULL;
        }
        fib6_walker_unlink(net, &iter->w);

        iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
        if (!iter->tbl)
                return NULL;

        ipv6_route_seq_setup_walk(iter, net);
        goto iter_table;
}

static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        struct net *net = seq_file_net(seq);
        struct ipv6_route_iter *iter = seq->private;

        rcu_read_lock();
        iter->tbl = ipv6_route_seq_next_table(NULL, net);
        iter->skip = *pos;

        if (iter->tbl) {
                loff_t p = 0;

                ipv6_route_seq_setup_walk(iter, net);
                return ipv6_route_seq_next(seq, NULL, &p);
        } else {
                return NULL;
        }
}

static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
{
        struct fib6_walker *w = &iter->w;
        return w->node && !(w->state == FWS_U && w->node == w->root);
}

static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        struct net *net = seq_file_net(seq);
        struct ipv6_route_iter *iter = seq->private;

        if (ipv6_route_iter_active(iter))
                fib6_walker_unlink(net, &iter->w);

        rcu_read_unlock();
}

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
                                    struct bpf_iter_meta *meta,
                                    void *v)
{
        struct bpf_iter__ipv6_route ctx;

        ctx.meta = meta;
        ctx.rt = v;
        return bpf_iter_run_prog(prog, &ctx);
}

static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
        struct ipv6_route_iter *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        int ret;

        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
        if (!prog)
                return ipv6_route_native_seq_show(seq, v);

        ret = ipv6_route_prog_seq_show(prog, &meta, v);
        iter->w.leaf = NULL;

        return ret;
}

static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
{
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;

        if (!v) {
                meta.seq = seq;
                prog = bpf_iter_get_info(&meta, true);
                if (prog)
                        (void)ipv6_route_prog_seq_show(prog, &meta, v);
        }

        ipv6_route_native_seq_stop(seq, v);
}
#else
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
        return ipv6_route_native_seq_show(seq, v);
}

static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
{
        ipv6_route_native_seq_stop(seq, v);
}
#endif

const struct seq_operations ipv6_route_seq_ops = {
        .start        = ipv6_route_seq_start,
        .next        = ipv6_route_seq_next,
        .stop        = ipv6_route_seq_stop,
        .show        = ipv6_route_seq_show
};
#endif /* CONFIG_PROC_FS */





















































  392 
  392 













    5 


    5 


    5 
    5 


    5 





    5 
    5 

    5 

    5 







   90 







   90 


   88 


   90 
   90 


   90 


   47 
   48 

   52 
   52 

   85 

   87 











  359 

  357 

  359 
  359 


  359 
  359 

  357 




    9 


    9 
    5 































   96 


   90 
   88 
   97 
   90 


















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// SPDX-License-Identifier: GPL-2.0

/*
 * Copyright (c) 2021, Google LLC.
 * Pasha Tatashin <pasha.tatashin@soleen.com>
 */
#include <linux/kstrtox.h>
#include <linux/mm.h>
#include <linux/page_table_check.h>

#undef pr_fmt
#define pr_fmt(fmt)        "page_table_check: " fmt

struct page_table_check {
        atomic_t anon_map_count;
        atomic_t file_map_count;
};

static bool __page_table_check_enabled __initdata =
                                IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);

DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
EXPORT_SYMBOL(page_table_check_disabled);

static int __init early_page_table_check_param(char *buf)
{
        return kstrtobool(buf, &__page_table_check_enabled);
}

early_param("page_table_check", early_page_table_check_param);

static bool __init need_page_table_check(void)
{
        return __page_table_check_enabled;
}

static void __init init_page_table_check(void)
{
        if (!__page_table_check_enabled)
                return;
        static_branch_disable(&page_table_check_disabled);
}

struct page_ext_operations page_table_check_ops = {
        .size = sizeof(struct page_table_check),
        .need = need_page_table_check,
        .init = init_page_table_check,
        .need_shared_flags = false,
};

static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
{
        BUG_ON(!page_ext);
        return page_ext_data(page_ext, &page_table_check_ops);
}

/*
 * An entry is removed from the page table, decrement the counters for that page
 * verify that it is of correct type and counters do not become negative.
 */
static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt)
{
        struct page_ext *page_ext;
        struct page *page;
        unsigned long i;
        bool anon;

        if (!pfn_valid(pfn))
                return;

        page = pfn_to_page(pfn);
        page_ext = page_ext_get(page);

        BUG_ON(PageSlab(page));
        anon = PageAnon(page);

        for (i = 0; i < pgcnt; i++) {
                struct page_table_check *ptc = get_page_table_check(page_ext);

                if (anon) {
                        BUG_ON(atomic_read(&ptc->file_map_count));
                        BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
                } else {
                        BUG_ON(atomic_read(&ptc->anon_map_count));
                        BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
                }
                page_ext = page_ext_next(page_ext);
        }
        page_ext_put(page_ext);
}

/*
 * A new entry is added to the page table, increment the counters for that page
 * verify that it is of correct type and is not being mapped with a different
 * type to a different process.
 */
static void page_table_check_set(unsigned long pfn, unsigned long pgcnt,
                                 bool rw)
{
        struct page_ext *page_ext;
        struct page *page;
        unsigned long i;
        bool anon;

        if (!pfn_valid(pfn))
                return;

        page = pfn_to_page(pfn);
        page_ext = page_ext_get(page);

        BUG_ON(PageSlab(page));
        anon = PageAnon(page);

        for (i = 0; i < pgcnt; i++) {
                struct page_table_check *ptc = get_page_table_check(page_ext);

                if (anon) {
                        BUG_ON(atomic_read(&ptc->file_map_count));
                        BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
                } else {
                        BUG_ON(atomic_read(&ptc->anon_map_count));
                        BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
                }
                page_ext = page_ext_next(page_ext);
        }
        page_ext_put(page_ext);
}

/*
 * page is on free list, or is being allocated, verify that counters are zeroes
 * crash if they are not.
 */
void __page_table_check_zero(struct page *page, unsigned int order)
{
        struct page_ext *page_ext;
        unsigned long i;

        BUG_ON(PageSlab(page));

        page_ext = page_ext_get(page);
        BUG_ON(!page_ext);
        for (i = 0; i < (1ul << order); i++) {
                struct page_table_check *ptc = get_page_table_check(page_ext);

                BUG_ON(atomic_read(&ptc->anon_map_count));
                BUG_ON(atomic_read(&ptc->file_map_count));
                page_ext = page_ext_next(page_ext);
        }
        page_ext_put(page_ext);
}

void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte)
{
        if (&init_mm == mm)
                return;

        if (pte_user_accessible_page(pte)) {
                page_table_check_clear(pte_pfn(pte), PAGE_SIZE >> PAGE_SHIFT);
        }
}
EXPORT_SYMBOL(__page_table_check_pte_clear);

void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd)
{
        if (&init_mm == mm)
                return;

        if (pmd_user_accessible_page(pmd)) {
                page_table_check_clear(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT);
        }
}
EXPORT_SYMBOL(__page_table_check_pmd_clear);

void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
{
        if (&init_mm == mm)
                return;

        if (pud_user_accessible_page(pud)) {
                page_table_check_clear(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT);
        }
}
EXPORT_SYMBOL(__page_table_check_pud_clear);

void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
                unsigned int nr)
{
        unsigned int i;

        if (&init_mm == mm)
                return;

        for (i = 0; i < nr; i++)
                __page_table_check_pte_clear(mm, ptep_get(ptep + i));
        if (pte_user_accessible_page(pte))
                page_table_check_set(pte_pfn(pte), nr, pte_write(pte));
}
EXPORT_SYMBOL(__page_table_check_ptes_set);

void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd)
{
        if (&init_mm == mm)
                return;

        __page_table_check_pmd_clear(mm, *pmdp);
        if (pmd_user_accessible_page(pmd)) {
                page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT,
                                     pmd_write(pmd));
        }
}
EXPORT_SYMBOL(__page_table_check_pmd_set);

void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud)
{
        if (&init_mm == mm)
                return;

        __page_table_check_pud_clear(mm, *pudp);
        if (pud_user_accessible_page(pud)) {
                page_table_check_set(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT,
                                     pud_write(pud));
        }
}
EXPORT_SYMBOL(__page_table_check_pud_set);

void __page_table_check_pte_clear_range(struct mm_struct *mm,
                                        unsigned long addr,
                                        pmd_t pmd)
{
        if (&init_mm == mm)
                return;

        if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
                pte_t *ptep = pte_offset_map(&pmd, addr);
                unsigned long i;

                if (WARN_ON(!ptep))
                        return;
                for (i = 0; i < PTRS_PER_PTE; i++) {
                        __page_table_check_pte_clear(mm, ptep_get(ptep));
                        addr += PAGE_SIZE;
                        ptep++;
                }
                pte_unmap(ptep - PTRS_PER_PTE);
        }
}









































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 





































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Helpers for formatting and printing strings
 *
 * Copyright 31 August 2008 James Bottomley
 * Copyright (C) 2013, Intel Corporation
 */
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/export.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/string_helpers.h>
#include <kunit/test.h>
#include <kunit/test-bug.h>

/**
 * string_get_size - get the size in the specified units
 * @size:        The size to be converted in blocks
 * @blk_size:        Size of the block (use 1 for size in bytes)
 * @units:        Units to use (powers of 1000 or 1024), whether to include space separator
 * @buf:        buffer to format to
 * @len:        length of buffer
 *
 * This function returns a string formatted to 3 significant figures
 * giving the size in the required units.  @buf should have room for
 * at least 9 bytes and will always be zero terminated.
 *
 * Return value: number of characters of output that would have been written
 * (which may be greater than len, if output was truncated).
 */
int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
                    char *buf, int len)
{
        enum string_size_units units_base = units & STRING_UNITS_MASK;
        static const char *const units_10[] = {
                "", "k", "M", "G", "T", "P", "E", "Z", "Y",
        };
        static const char *const units_2[] = {
                "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi",
        };
        static const char *const *const units_str[] = {
                [STRING_UNITS_10] = units_10,
                [STRING_UNITS_2] = units_2,
        };
        static const unsigned int divisor[] = {
                [STRING_UNITS_10] = 1000,
                [STRING_UNITS_2] = 1024,
        };
        static const unsigned int rounding[] = { 500, 50, 5 };
        int i = 0, j;
        u32 remainder = 0, sf_cap;
        char tmp[8];
        const char *unit;

        tmp[0] = '\0';

        if (blk_size == 0)
                size = 0;
        if (size == 0)
                goto out;

        /* This is Napier's algorithm.  Reduce the original block size to
         *
         * coefficient * divisor[units_base]^i
         *
         * we do the reduction so both coefficients are just under 32 bits so
         * that multiplying them together won't overflow 64 bits and we keep
         * as much precision as possible in the numbers.
         *
         * Note: it's safe to throw away the remainders here because all the
         * precision is in the coefficients.
         */
        while (blk_size >> 32) {
                do_div(blk_size, divisor[units_base]);
                i++;
        }

        while (size >> 32) {
                do_div(size, divisor[units_base]);
                i++;
        }

        /* now perform the actual multiplication keeping i as the sum of the
         * two logarithms */
        size *= blk_size;

        /* and logarithmically reduce it until it's just under the divisor */
        while (size >= divisor[units_base]) {
                remainder = do_div(size, divisor[units_base]);
                i++;
        }

        /* work out in j how many digits of precision we need from the
         * remainder */
        sf_cap = size;
        for (j = 0; sf_cap*10 < 1000; j++)
                sf_cap *= 10;

        if (units_base == STRING_UNITS_2) {
                /* express the remainder as a decimal.  It's currently the
                 * numerator of a fraction whose denominator is
                 * divisor[units_base], which is 1 << 10 for STRING_UNITS_2 */
                remainder *= 1000;
                remainder >>= 10;
        }

        /* add a 5 to the digit below what will be printed to ensure
         * an arithmetical round up and carry it through to size */
        remainder += rounding[j];
        if (remainder >= 1000) {
                remainder -= 1000;
                size += 1;
        }

        if (j) {
                snprintf(tmp, sizeof(tmp), ".%03u", remainder);
                tmp[j+1] = '\0';
        }

 out:
        if (i >= ARRAY_SIZE(units_2))
                unit = "UNK";
        else
                unit = units_str[units_base][i];

        return snprintf(buf, len, "%u%s%s%s%s", (u32)size, tmp,
                        (units & STRING_UNITS_NO_SPACE) ? "" : " ",
                        unit,
                        (units & STRING_UNITS_NO_BYTES) ? "" : "B");
}
EXPORT_SYMBOL(string_get_size);

/**
 * parse_int_array_user - Split string into a sequence of integers
 * @from:        The user space buffer to read from
 * @count:        The maximum number of bytes to read
 * @array:        Returned pointer to sequence of integers
 *
 * On success @array is allocated and initialized with a sequence of
 * integers extracted from the @from plus an additional element that
 * begins the sequence and specifies the integers count.
 *
 * Caller takes responsibility for freeing @array when it is no longer
 * needed.
 */
int parse_int_array_user(const char __user *from, size_t count, int **array)
{
        int *ints, nints;
        char *buf;
        int ret = 0;

        buf = memdup_user_nul(from, count);
        if (IS_ERR(buf))
                return PTR_ERR(buf);

        get_options(buf, 0, &nints);
        if (!nints) {
                ret = -ENOENT;
                goto free_buf;
        }

        ints = kcalloc(nints + 1, sizeof(*ints), GFP_KERNEL);
        if (!ints) {
                ret = -ENOMEM;
                goto free_buf;
        }

        get_options(buf, nints + 1, ints);
        *array = ints;

free_buf:
        kfree(buf);
        return ret;
}
EXPORT_SYMBOL(parse_int_array_user);

static bool unescape_space(char **src, char **dst)
{
        char *p = *dst, *q = *src;

        switch (*q) {
        case 'n':
                *p = '\n';
                break;
        case 'r':
                *p = '\r';
                break;
        case 't':
                *p = '\t';
                break;
        case 'v':
                *p = '\v';
                break;
        case 'f':
                *p = '\f';
                break;
        default:
                return false;
        }
        *dst += 1;
        *src += 1;
        return true;
}

static bool unescape_octal(char **src, char **dst)
{
        char *p = *dst, *q = *src;
        u8 num;

        if (isodigit(*q) == 0)
                return false;

        num = (*q++) & 7;
        while (num < 32 && isodigit(*q) && (q - *src < 3)) {
                num <<= 3;
                num += (*q++) & 7;
        }
        *p = num;
        *dst += 1;
        *src = q;
        return true;
}

static bool unescape_hex(char **src, char **dst)
{
        char *p = *dst, *q = *src;
        int digit;
        u8 num;

        if (*q++ != 'x')
                return false;

        num = digit = hex_to_bin(*q++);
        if (digit < 0)
                return false;

        digit = hex_to_bin(*q);
        if (digit >= 0) {
                q++;
                num = (num << 4) | digit;
        }
        *p = num;
        *dst += 1;
        *src = q;
        return true;
}

static bool unescape_special(char **src, char **dst)
{
        char *p = *dst, *q = *src;

        switch (*q) {
        case '\"':
                *p = '\"';
                break;
        case '\\':
                *p = '\\';
                break;
        case 'a':
                *p = '\a';
                break;
        case 'e':
                *p = '\e';
                break;
        default:
                return false;
        }
        *dst += 1;
        *src += 1;
        return true;
}

/**
 * string_unescape - unquote characters in the given string
 * @src:        source buffer (escaped)
 * @dst:        destination buffer (unescaped)
 * @size:        size of the destination buffer (0 to unlimit)
 * @flags:        combination of the flags.
 *
 * Description:
 * The function unquotes characters in the given string.
 *
 * Because the size of the output will be the same as or less than the size of
 * the input, the transformation may be performed in place.
 *
 * Caller must provide valid source and destination pointers. Be aware that
 * destination buffer will always be NULL-terminated. Source string must be
 * NULL-terminated as well.  The supported flags are::
 *
 *        UNESCAPE_SPACE:
 *                '\f' - form feed
 *                '\n' - new line
 *                '\r' - carriage return
 *                '\t' - horizontal tab
 *                '\v' - vertical tab
 *        UNESCAPE_OCTAL:
 *                '\NNN' - byte with octal value NNN (1 to 3 digits)
 *        UNESCAPE_HEX:
 *                '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
 *        UNESCAPE_SPECIAL:
 *                '\"' - double quote
 *                '\\' - backslash
 *                '\a' - alert (BEL)
 *                '\e' - escape
 *        UNESCAPE_ANY:
 *                all previous together
 *
 * Return:
 * The amount of the characters processed to the destination buffer excluding
 * trailing '\0' is returned.
 */
int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
{
        char *out = dst;

        while (*src && --size) {
                if (src[0] == '\\' && src[1] != '\0' && size > 1) {
                        src++;
                        size--;

                        if (flags & UNESCAPE_SPACE &&
                                        unescape_space(&src, &out))
                                continue;

                        if (flags & UNESCAPE_OCTAL &&
                                        unescape_octal(&src, &out))
                                continue;

                        if (flags & UNESCAPE_HEX &&
                                        unescape_hex(&src, &out))
                                continue;

                        if (flags & UNESCAPE_SPECIAL &&
                                        unescape_special(&src, &out))
                                continue;

                        *out++ = '\\';
                }
                *out++ = *src++;
        }
        *out = '\0';

        return out - dst;
}
EXPORT_SYMBOL(string_unescape);

static bool escape_passthrough(unsigned char c, char **dst, char *end)
{
        char *out = *dst;

        if (out < end)
                *out = c;
        *dst = out + 1;
        return true;
}

static bool escape_space(unsigned char c, char **dst, char *end)
{
        char *out = *dst;
        unsigned char to;

        switch (c) {
        case '\n':
                to = 'n';
                break;
        case '\r':
                to = 'r';
                break;
        case '\t':
                to = 't';
                break;
        case '\v':
                to = 'v';
                break;
        case '\f':
                to = 'f';
                break;
        default:
                return false;
        }

        if (out < end)
                *out = '\\';
        ++out;
        if (out < end)
                *out = to;
        ++out;

        *dst = out;
        return true;
}

static bool escape_special(unsigned char c, char **dst, char *end)
{
        char *out = *dst;
        unsigned char to;

        switch (c) {
        case '\\':
                to = '\\';
                break;
        case '\a':
                to = 'a';
                break;
        case '\e':
                to = 'e';
                break;
        case '"':
                to = '"';
                break;
        default:
                return false;
        }

        if (out < end)
                *out = '\\';
        ++out;
        if (out < end)
                *out = to;
        ++out;

        *dst = out;
        return true;
}

static bool escape_null(unsigned char c, char **dst, char *end)
{
        char *out = *dst;

        if (c)
                return false;

        if (out < end)
                *out = '\\';
        ++out;
        if (out < end)
                *out = '0';
        ++out;

        *dst = out;
        return true;
}

static bool escape_octal(unsigned char c, char **dst, char *end)
{
        char *out = *dst;

        if (out < end)
                *out = '\\';
        ++out;
        if (out < end)
                *out = ((c >> 6) & 0x07) + '0';
        ++out;
        if (out < end)
                *out = ((c >> 3) & 0x07) + '0';
        ++out;
        if (out < end)
                *out = ((c >> 0) & 0x07) + '0';
        ++out;

        *dst = out;
        return true;
}

static bool escape_hex(unsigned char c, char **dst, char *end)
{
        char *out = *dst;

        if (out < end)
                *out = '\\';
        ++out;
        if (out < end)
                *out = 'x';
        ++out;
        if (out < end)
                *out = hex_asc_hi(c);
        ++out;
        if (out < end)
                *out = hex_asc_lo(c);
        ++out;

        *dst = out;
        return true;
}

/**
 * string_escape_mem - quote characters in the given memory buffer
 * @src:        source buffer (unescaped)
 * @isz:        source buffer size
 * @dst:        destination buffer (escaped)
 * @osz:        destination buffer size
 * @flags:        combination of the flags
 * @only:        NULL-terminated string containing characters used to limit
 *                the selected escape class. If characters are included in @only
 *                that would not normally be escaped by the classes selected
 *                in @flags, they will be copied to @dst unescaped.
 *
 * Description:
 * The process of escaping byte buffer includes several parts. They are applied
 * in the following sequence.
 *
 *        1. The character is not matched to the one from @only string and thus
 *           must go as-is to the output.
 *        2. The character is matched to the printable and ASCII classes, if asked,
 *           and in case of match it passes through to the output.
 *        3. The character is matched to the printable or ASCII class, if asked,
 *           and in case of match it passes through to the output.
 *        4. The character is checked if it falls into the class given by @flags.
 *           %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
 *           character. Note that they actually can't go together, otherwise
 *           %ESCAPE_HEX will be ignored.
 *
 * Caller must provide valid source and destination pointers. Be aware that
 * destination buffer will not be NULL-terminated, thus caller have to append
 * it if needs. The supported flags are::
 *
 *        %ESCAPE_SPACE: (special white space, not space itself)
 *                '\f' - form feed
 *                '\n' - new line
 *                '\r' - carriage return
 *                '\t' - horizontal tab
 *                '\v' - vertical tab
 *        %ESCAPE_SPECIAL:
 *                '\"' - double quote
 *                '\\' - backslash
 *                '\a' - alert (BEL)
 *                '\e' - escape
 *        %ESCAPE_NULL:
 *                '\0' - null
 *        %ESCAPE_OCTAL:
 *                '\NNN' - byte with octal value NNN (3 digits)
 *        %ESCAPE_ANY:
 *                all previous together
 *        %ESCAPE_NP:
 *                escape only non-printable characters, checked by isprint()
 *        %ESCAPE_ANY_NP:
 *                all previous together
 *        %ESCAPE_HEX:
 *                '\xHH' - byte with hexadecimal value HH (2 digits)
 *        %ESCAPE_NA:
 *                escape only non-ascii characters, checked by isascii()
 *        %ESCAPE_NAP:
 *                escape only non-printable or non-ascii characters
 *        %ESCAPE_APPEND:
 *                append characters from @only to be escaped by the given classes
 *
 * %ESCAPE_APPEND would help to pass additional characters to the escaped, when
 * one of %ESCAPE_NP, %ESCAPE_NA, or %ESCAPE_NAP is provided.
 *
 * One notable caveat, the %ESCAPE_NAP, %ESCAPE_NP and %ESCAPE_NA have the
 * higher priority than the rest of the flags (%ESCAPE_NAP is the highest).
 * It doesn't make much sense to use either of them without %ESCAPE_OCTAL
 * or %ESCAPE_HEX, because they cover most of the other character classes.
 * %ESCAPE_NAP can utilize %ESCAPE_SPACE or %ESCAPE_SPECIAL in addition to
 * the above.
 *
 * Return:
 * The total size of the escaped output that would be generated for
 * the given input and flags. To check whether the output was
 * truncated, compare the return value to osz. There is room left in
 * dst for a '\0' terminator if and only if ret < osz.
 */
int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
                      unsigned int flags, const char *only)
{
        char *p = dst;
        char *end = p + osz;
        bool is_dict = only && *only;
        bool is_append = flags & ESCAPE_APPEND;

        while (isz--) {
                unsigned char c = *src++;
                bool in_dict = is_dict && strchr(only, c);

                /*
                 * Apply rules in the following sequence:
                 *        - the @only string is supplied and does not contain a
                 *          character under question
                 *        - the character is printable and ASCII, when @flags has
                 *          %ESCAPE_NAP bit set
                 *        - the character is printable, when @flags has
                 *          %ESCAPE_NP bit set
                 *        - the character is ASCII, when @flags has
                 *          %ESCAPE_NA bit set
                 *        - the character doesn't fall into a class of symbols
                 *          defined by given @flags
                 * In these cases we just pass through a character to the
                 * output buffer.
                 *
                 * When %ESCAPE_APPEND is passed, the characters from @only
                 * have been excluded from the %ESCAPE_NAP, %ESCAPE_NP, and
                 * %ESCAPE_NA cases.
                 */
                if (!(is_append || in_dict) && is_dict &&
                                          escape_passthrough(c, &p, end))
                        continue;

                if (!(is_append && in_dict) && isascii(c) && isprint(c) &&
                    flags & ESCAPE_NAP && escape_passthrough(c, &p, end))
                        continue;

                if (!(is_append && in_dict) && isprint(c) &&
                    flags & ESCAPE_NP && escape_passthrough(c, &p, end))
                        continue;

                if (!(is_append && in_dict) && isascii(c) &&
                    flags & ESCAPE_NA && escape_passthrough(c, &p, end))
                        continue;

                if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
                        continue;

                if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
                        continue;

                if (flags & ESCAPE_NULL && escape_null(c, &p, end))
                        continue;

                /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
                if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
                        continue;

                if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
                        continue;

                escape_passthrough(c, &p, end);
        }

        return p - dst;
}
EXPORT_SYMBOL(string_escape_mem);

/*
 * Return an allocated string that has been escaped of special characters
 * and double quotes, making it safe to log in quotes.
 */
char *kstrdup_quotable(const char *src, gfp_t gfp)
{
        size_t slen, dlen;
        char *dst;
        const int flags = ESCAPE_HEX;
        const char esc[] = "\f\n\r\t\v\a\e\\\"";

        if (!src)
                return NULL;
        slen = strlen(src);

        dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
        dst = kmalloc(dlen + 1, gfp);
        if (!dst)
                return NULL;

        WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
        dst[dlen] = '\0';

        return dst;
}
EXPORT_SYMBOL_GPL(kstrdup_quotable);

/*
 * Returns allocated NULL-terminated string containing process
 * command line, with inter-argument NULLs replaced with spaces,
 * and other special characters escaped.
 */
char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
{
        char *buffer, *quoted;
        int i, res;

        buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
        if (!buffer)
                return NULL;

        res = get_cmdline(task, buffer, PAGE_SIZE - 1);
        buffer[res] = '\0';

        /* Collapse trailing NULLs, leave res pointing to last non-NULL. */
        while (--res >= 0 && buffer[res] == '\0')
                ;

        /* Replace inter-argument NULLs. */
        for (i = 0; i <= res; i++)
                if (buffer[i] == '\0')
                        buffer[i] = ' ';

        /* Make sure result is printable. */
        quoted = kstrdup_quotable(buffer, gfp);
        kfree(buffer);
        return quoted;
}
EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);

/*
 * Returns allocated NULL-terminated string containing pathname,
 * with special characters escaped, able to be safely logged. If
 * there is an error, the leading character will be "<".
 */
char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
{
        char *temp, *pathname;

        if (!file)
                return kstrdup("<unknown>", gfp);

        /* We add 11 spaces for ' (deleted)' to be appended */
        temp = kmalloc(PATH_MAX + 11, GFP_KERNEL);
        if (!temp)
                return kstrdup("<no_memory>", gfp);

        pathname = file_path(file, temp, PATH_MAX + 11);
        if (IS_ERR(pathname))
                pathname = kstrdup("<too_long>", gfp);
        else
                pathname = kstrdup_quotable(pathname, gfp);

        kfree(temp);
        return pathname;
}
EXPORT_SYMBOL_GPL(kstrdup_quotable_file);

/*
 * Returns duplicate string in which the @old characters are replaced by @new.
 */
char *kstrdup_and_replace(const char *src, char old, char new, gfp_t gfp)
{
        char *dst;

        dst = kstrdup(src, gfp);
        if (!dst)
                return NULL;

        return strreplace(dst, old, new);
}
EXPORT_SYMBOL_GPL(kstrdup_and_replace);

/**
 * kasprintf_strarray - allocate and fill array of sequential strings
 * @gfp: flags for the slab allocator
 * @prefix: prefix to be used
 * @n: amount of lines to be allocated and filled
 *
 * Allocates and fills @n strings using pattern "%s-%zu", where prefix
 * is provided by caller. The caller is responsible to free them with
 * kfree_strarray() after use.
 *
 * Returns array of strings or NULL when memory can't be allocated.
 */
char **kasprintf_strarray(gfp_t gfp, const char *prefix, size_t n)
{
        char **names;
        size_t i;

        names = kcalloc(n + 1, sizeof(char *), gfp);
        if (!names)
                return NULL;

        for (i = 0; i < n; i++) {
                names[i] = kasprintf(gfp, "%s-%zu", prefix, i);
                if (!names[i]) {
                        kfree_strarray(names, i);
                        return NULL;
                }
        }

        return names;
}
EXPORT_SYMBOL_GPL(kasprintf_strarray);

/**
 * kfree_strarray - free a number of dynamically allocated strings contained
 *                  in an array and the array itself
 *
 * @array: Dynamically allocated array of strings to free.
 * @n: Number of strings (starting from the beginning of the array) to free.
 *
 * Passing a non-NULL @array and @n == 0 as well as NULL @array are valid
 * use-cases. If @array is NULL, the function does nothing.
 */
void kfree_strarray(char **array, size_t n)
{
        unsigned int i;

        if (!array)
                return;

        for (i = 0; i < n; i++)
                kfree(array[i]);
        kfree(array);
}
EXPORT_SYMBOL_GPL(kfree_strarray);

struct strarray {
        char **array;
        size_t n;
};

static void devm_kfree_strarray(struct device *dev, void *res)
{
        struct strarray *array = res;

        kfree_strarray(array->array, array->n);
}

char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n)
{
        struct strarray *ptr;

        ptr = devres_alloc(devm_kfree_strarray, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        ptr->array = kasprintf_strarray(GFP_KERNEL, prefix, n);
        if (!ptr->array) {
                devres_free(ptr);
                return ERR_PTR(-ENOMEM);
        }

        ptr->n = n;
        devres_add(dev, ptr);

        return ptr->array;
}
EXPORT_SYMBOL_GPL(devm_kasprintf_strarray);

/**
 * skip_spaces - Removes leading whitespace from @str.
 * @str: The string to be stripped.
 *
 * Returns a pointer to the first non-whitespace character in @str.
 */
char *skip_spaces(const char *str)
{
        while (isspace(*str))
                ++str;
        return (char *)str;
}
EXPORT_SYMBOL(skip_spaces);

/**
 * strim - Removes leading and trailing whitespace from @s.
 * @s: The string to be stripped.
 *
 * Note that the first trailing whitespace is replaced with a %NUL-terminator
 * in the given string @s. Returns a pointer to the first non-whitespace
 * character in @s.
 */
char *strim(char *s)
{
        size_t size;
        char *end;

        size = strlen(s);
        if (!size)
                return s;

        end = s + size - 1;
        while (end >= s && isspace(*end))
                end--;
        *(end + 1) = '\0';

        return skip_spaces(s);
}
EXPORT_SYMBOL(strim);

/**
 * sysfs_streq - return true if strings are equal, modulo trailing newline
 * @s1: one string
 * @s2: another string
 *
 * This routine returns true iff two strings are equal, treating both
 * NUL and newline-then-NUL as equivalent string terminations.  It's
 * geared for use with sysfs input strings, which generally terminate
 * with newlines but are compared against values without newlines.
 */
bool sysfs_streq(const char *s1, const char *s2)
{
        while (*s1 && *s1 == *s2) {
                s1++;
                s2++;
        }

        if (*s1 == *s2)
                return true;
        if (!*s1 && *s2 == '\n' && !s2[1])
                return true;
        if (*s1 == '\n' && !s1[1] && !*s2)
                return true;
        return false;
}
EXPORT_SYMBOL(sysfs_streq);

/**
 * match_string - matches given string in an array
 * @array:        array of strings
 * @n:                number of strings in the array or -1 for NULL terminated arrays
 * @string:        string to match with
 *
 * This routine will look for a string in an array of strings up to the
 * n-th element in the array or until the first NULL element.
 *
 * Historically the value of -1 for @n, was used to search in arrays that
 * are NULL terminated. However, the function does not make a distinction
 * when finishing the search: either @n elements have been compared OR
 * the first NULL element was found.
 *
 * Return:
 * index of a @string in the @array if matches, or %-EINVAL otherwise.
 */
int match_string(const char * const *array, size_t n, const char *string)
{
        int index;
        const char *item;

        for (index = 0; index < n; index++) {
                item = array[index];
                if (!item)
                        break;
                if (!strcmp(item, string))
                        return index;
        }

        return -EINVAL;
}
EXPORT_SYMBOL(match_string);

/**
 * __sysfs_match_string - matches given string in an array
 * @array: array of strings
 * @n: number of strings in the array or -1 for NULL terminated arrays
 * @str: string to match with
 *
 * Returns index of @str in the @array or -EINVAL, just like match_string().
 * Uses sysfs_streq instead of strcmp for matching.
 *
 * This routine will look for a string in an array of strings up to the
 * n-th element in the array or until the first NULL element.
 *
 * Historically the value of -1 for @n, was used to search in arrays that
 * are NULL terminated. However, the function does not make a distinction
 * when finishing the search: either @n elements have been compared OR
 * the first NULL element was found.
 */
int __sysfs_match_string(const char * const *array, size_t n, const char *str)
{
        const char *item;
        int index;

        for (index = 0; index < n; index++) {
                item = array[index];
                if (!item)
                        break;
                if (sysfs_streq(item, str))
                        return index;
        }

        return -EINVAL;
}
EXPORT_SYMBOL(__sysfs_match_string);

/**
 * strreplace - Replace all occurrences of character in string.
 * @str: The string to operate on.
 * @old: The character being replaced.
 * @new: The character @old is replaced with.
 *
 * Replaces the each @old character with a @new one in the given string @str.
 *
 * Return: pointer to the string @str itself.
 */
char *strreplace(char *str, char old, char new)
{
        char *s = str;

        for (; *s; ++s)
                if (*s == old)
                        *s = new;
        return str;
}
EXPORT_SYMBOL(strreplace);

/**
 * memcpy_and_pad - Copy one buffer to another with padding
 * @dest: Where to copy to
 * @dest_len: The destination buffer size
 * @src: Where to copy from
 * @count: The number of bytes to copy
 * @pad: Character to use for padding if space is left in destination.
 */
void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
                    int pad)
{
        if (dest_len > count) {
                memcpy(dest, src, count);
                memset(dest + count, pad,  dest_len - count);
        } else {
                memcpy(dest, src, dest_len);
        }
}
EXPORT_SYMBOL(memcpy_and_pad);

#ifdef CONFIG_FORTIFY_SOURCE
/* These are placeholders for fortify compile-time warnings. */
void __read_overflow2_field(size_t avail, size_t wanted) { }
EXPORT_SYMBOL(__read_overflow2_field);
void __write_overflow_field(size_t avail, size_t wanted) { }
EXPORT_SYMBOL(__write_overflow_field);

static const char * const fortify_func_name[] = {
#define MAKE_FORTIFY_FUNC_NAME(func)        [MAKE_FORTIFY_FUNC(func)] = #func
        EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC_NAME)
#undef  MAKE_FORTIFY_FUNC_NAME
};

void __fortify_report(const u8 reason, const size_t avail, const size_t size)
{
        const u8 func = FORTIFY_REASON_FUNC(reason);
        const bool write = FORTIFY_REASON_DIR(reason);
        const char *name;

        name = fortify_func_name[umin(func, FORTIFY_FUNC_UNKNOWN)];
        WARN(1, "%s: detected buffer overflow: %zu byte %s of buffer size %zu\n",
                 name, size, str_read_write(!write), avail);
}
EXPORT_SYMBOL(__fortify_report);

void __fortify_panic(const u8 reason, const size_t avail, const size_t size)
{
        __fortify_report(reason, avail, size);
        BUG();
}
EXPORT_SYMBOL(__fortify_panic);
#endif /* CONFIG_FORTIFY_SOURCE */

























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * AppArmor security module
 *
 * This file contains AppArmor network mediation definitions.
 *
 * Copyright (C) 1998-2008 Novell/SUSE
 * Copyright 2009-2017 Canonical Ltd.
 */

#ifndef __AA_NET_H
#define __AA_NET_H

#include <net/sock.h>
#include <linux/path.h>

#include "apparmorfs.h"
#include "label.h"
#include "perms.h"
#include "policy.h"

#define AA_MAY_SEND                AA_MAY_WRITE
#define AA_MAY_RECEIVE                AA_MAY_READ

#define AA_MAY_SHUTDOWN                AA_MAY_DELETE

#define AA_MAY_CONNECT                AA_MAY_OPEN
#define AA_MAY_ACCEPT                0x00100000

#define AA_MAY_BIND                0x00200000
#define AA_MAY_LISTEN                0x00400000

#define AA_MAY_SETOPT                0x01000000
#define AA_MAY_GETOPT                0x02000000

#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |    \
                        AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN |          \
                        AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \
                        AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT)

#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |        \
                      AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\
                      AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD |        \
                      AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK |        \
                      AA_MAY_MPROT)

#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT |        \
                       AA_MAY_ACCEPT)
struct aa_sk_ctx {
        struct aa_label *label;
        struct aa_label *peer;
};

#define SK_CTX(X) ((X)->sk_security)
static inline struct aa_sk_ctx *aa_sock(const struct sock *sk)
{
        return sk->sk_security;
}

#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)                                  \
        struct lsm_network_audit NAME ## _net = { .sk = (SK),                  \
                                                  .family = (F)};          \
        DEFINE_AUDIT_DATA(NAME,                                                  \
                          ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
                                                     LSM_AUDIT_DATA_NONE, \
                                                     AA_CLASS_NET,        \
                          OP);                                                  \
        NAME.common.u.net = &(NAME ## _net);                                  \
        NAME.net.type = (T);                                                  \
        NAME.net.protocol = (P)

#define DEFINE_AUDIT_SK(NAME, OP, SK)                                        \
        DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type,        \
                         (SK)->sk_protocol)


#define af_select(FAMILY, FN, DEF_FN)                \
({                                                \
        int __e;                                \
        switch ((FAMILY)) {                        \
        default:                                \
                __e = DEF_FN;                        \
        }                                        \
        __e;                                        \
})

struct aa_secmark {
        u8 audit;
        u8 deny;
        u32 secid;
        char *label;
};

extern struct aa_sfs_entry aa_sfs_entry_network[];

void audit_net_cb(struct audit_buffer *ab, void *va);
int aa_profile_af_perm(struct aa_profile *profile,
                       struct apparmor_audit_data *ad,
                       u32 request, u16 family, int type);
int aa_af_perm(const struct cred *subj_cred, struct aa_label *label,
               const char *op, u32 request, u16 family,
               int type, int protocol);
static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
                                        struct apparmor_audit_data *ad,
                                        u32 request,
                                        struct sock *sk)
{
        return aa_profile_af_perm(profile, ad, request, sk->sk_family,
                                  sk->sk_type);
}
int aa_sk_perm(const char *op, u32 request, struct sock *sk);

int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label,
                      const char *op, u32 request,
                      struct socket *sock);

int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
                           u32 secid, const struct sock *sk);

#endif /* __AA_NET_H */

























































































































































































































































































































































































































    3 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for some microsoft "special" devices
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2006-2007 Jiri Kosina
 *  Copyright (c) 2008 Jiri Slaby
 */

/*
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>

#include "hid-ids.h"

#define MS_HIDINPUT                BIT(0)
#define MS_ERGONOMY                BIT(1)
#define MS_PRESENTER                BIT(2)
#define MS_RDESC                BIT(3)
#define MS_NOGET                BIT(4)
#define MS_DUPLICATE_USAGES        BIT(5)
#define MS_SURFACE_DIAL                BIT(6)
#define MS_QUIRK_FF                BIT(7)

struct ms_data {
        unsigned long quirks;
        struct hid_device *hdev;
        struct work_struct ff_worker;
        __u8 strong;
        __u8 weak;
        void *output_report_dmabuf;
};

#define XB1S_FF_REPORT                3
#define ENABLE_WEAK                BIT(0)
#define ENABLE_STRONG                BIT(1)

enum {
        MAGNITUDE_STRONG = 2,
        MAGNITUDE_WEAK,
        MAGNITUDE_NUM
};

struct xb1s_ff_report {
        __u8        report_id;
        __u8        enable;
        __u8        magnitude[MAGNITUDE_NUM];
        __u8        duration_10ms;
        __u8        start_delay_10ms;
        __u8        loop_count;
} __packed;

static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
{
        struct ms_data *ms = hid_get_drvdata(hdev);
        unsigned long quirks = ms->quirks;

        /*
         * Microsoft Wireless Desktop Receiver (Model 1028) has
         * 'Usage Min/Max' where it ought to have 'Physical Min/Max'
         */
        if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 &&
                        rdesc[559] == 0x29) {
                hid_info(hdev, "fixing up Microsoft Wireless Receiver Model 1028 report descriptor\n");
                rdesc[557] = 0x35;
                rdesc[559] = 0x45;
        }
        return rdesc;
}

#define ms_map_key_clear(c)        hid_map_usage_clear(hi, usage, bit, max, \
                                        EV_KEY, (c))
static int ms_ergonomy_kb_quirk(struct hid_input *hi, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct input_dev *input = hi->input;

        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) {
                switch (usage->hid & HID_USAGE) {
                /*
                 * Microsoft uses these 2 reserved usage ids for 2 keys on
                 * the MS office kb labelled "Office Home" and "Task Pane".
                 */
                case 0x29d:
                        ms_map_key_clear(KEY_PROG1);
                        return 1;
                case 0x29e:
                        ms_map_key_clear(KEY_PROG2);
                        return 1;
                }
                return 0;
        }

        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR)
                return 0;

        switch (usage->hid & HID_USAGE) {
        case 0xfd06: ms_map_key_clear(KEY_CHAT);        break;
        case 0xfd07: ms_map_key_clear(KEY_PHONE);        break;
        case 0xff00:
                /* Special keypad keys */
                ms_map_key_clear(KEY_KPEQUAL);
                set_bit(KEY_KPLEFTPAREN, input->keybit);
                set_bit(KEY_KPRIGHTPAREN, input->keybit);
                break;
        case 0xff01:
                /* Scroll wheel */
                hid_map_usage_clear(hi, usage, bit, max, EV_REL, REL_WHEEL);
                break;
        case 0xff02:
                /*
                 * This byte contains a copy of the modifier keys byte of a
                 * standard hid keyboard report, as send by interface 0
                 * (this usage is found on interface 1).
                 *
                 * This byte only gets send when another key in the same report
                 * changes state, and as such is useless, ignore it.
                 */
                return -1;
        case 0xff05:
                set_bit(EV_REP, input->evbit);
                ms_map_key_clear(KEY_F13);
                set_bit(KEY_F14, input->keybit);
                set_bit(KEY_F15, input->keybit);
                set_bit(KEY_F16, input->keybit);
                set_bit(KEY_F17, input->keybit);
                set_bit(KEY_F18, input->keybit);
                break;
        default:
                return 0;
        }
        return 1;
}

static int ms_presenter_8k_quirk(struct hid_input *hi, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR)
                return 0;

        set_bit(EV_REP, hi->input->evbit);
        switch (usage->hid & HID_USAGE) {
        case 0xfd08: ms_map_key_clear(KEY_FORWARD);        break;
        case 0xfd09: ms_map_key_clear(KEY_BACK);        break;
        case 0xfd0b: ms_map_key_clear(KEY_PLAYPAUSE);        break;
        case 0xfd0e: ms_map_key_clear(KEY_CLOSE);        break;
        case 0xfd0f: ms_map_key_clear(KEY_PLAY);        break;
        default:
                return 0;
        }
        return 1;
}

static int ms_surface_dial_quirk(struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        switch (usage->hid & HID_USAGE_PAGE) {
        case 0xff070000:
        case HID_UP_DIGITIZER:
                /* ignore those axis */
                return -1;
        case HID_UP_GENDESK:
                switch (usage->hid) {
                case HID_GD_X:
                case HID_GD_Y:
                case HID_GD_RFKILL_BTN:
                        /* ignore those axis */
                        return -1;
                }
        }

        return 0;
}

static int ms_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct ms_data *ms = hid_get_drvdata(hdev);
        unsigned long quirks = ms->quirks;

        if (quirks & MS_ERGONOMY) {
                int ret = ms_ergonomy_kb_quirk(hi, usage, bit, max);
                if (ret)
                        return ret;
        }

        if ((quirks & MS_PRESENTER) &&
                        ms_presenter_8k_quirk(hi, usage, bit, max))
                return 1;

        if (quirks & MS_SURFACE_DIAL) {
                int ret = ms_surface_dial_quirk(hi, field, usage, bit, max);

                if (ret)
                        return ret;
        }

        return 0;
}

static int ms_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct ms_data *ms = hid_get_drvdata(hdev);
        unsigned long quirks = ms->quirks;

        if (quirks & MS_DUPLICATE_USAGES)
                clear_bit(usage->code, *bit);

        return 0;
}

static int ms_event(struct hid_device *hdev, struct hid_field *field,
                struct hid_usage *usage, __s32 value)
{
        struct ms_data *ms = hid_get_drvdata(hdev);
        unsigned long quirks = ms->quirks;
        struct input_dev *input;

        if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput ||
                        !usage->type)
                return 0;

        input = field->hidinput->input;

        /* Handling MS keyboards special buttons */
        if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff00)) {
                /* Special keypad keys */
                input_report_key(input, KEY_KPEQUAL, value & 0x01);
                input_report_key(input, KEY_KPLEFTPAREN, value & 0x02);
                input_report_key(input, KEY_KPRIGHTPAREN, value & 0x04);
                return 1;
        }

        if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff01)) {
                /* Scroll wheel */
                int step = ((value & 0x60) >> 5) + 1;

                switch (value & 0x1f) {
                case 0x01:
                        input_report_rel(input, REL_WHEEL, step);
                        break;
                case 0x1f:
                        input_report_rel(input, REL_WHEEL, -step);
                        break;
                }
                return 1;
        }

        if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff05)) {
                static unsigned int last_key = 0;
                unsigned int key = 0;
                switch (value) {
                case 0x01: key = KEY_F14; break;
                case 0x02: key = KEY_F15; break;
                case 0x04: key = KEY_F16; break;
                case 0x08: key = KEY_F17; break;
                case 0x10: key = KEY_F18; break;
                }
                if (key) {
                        input_event(input, usage->type, key, 1);
                        last_key = key;
                } else
                        input_event(input, usage->type, last_key, 0);

                return 1;
        }

        return 0;
}

static void ms_ff_worker(struct work_struct *work)
{
        struct ms_data *ms = container_of(work, struct ms_data, ff_worker);
        struct hid_device *hdev = ms->hdev;
        struct xb1s_ff_report *r = ms->output_report_dmabuf;
        int ret;

        memset(r, 0, sizeof(*r));

        r->report_id = XB1S_FF_REPORT;
        r->enable = ENABLE_WEAK | ENABLE_STRONG;
        /*
         * Specifying maximum duration and maximum loop count should
         * cover maximum duration of a single effect, which is 65536
         * ms
         */
        r->duration_10ms = U8_MAX;
        r->loop_count = U8_MAX;
        r->magnitude[MAGNITUDE_STRONG] = ms->strong; /* left actuator */
        r->magnitude[MAGNITUDE_WEAK] = ms->weak;     /* right actuator */

        ret = hid_hw_output_report(hdev, (__u8 *)r, sizeof(*r));
        if (ret < 0)
                hid_warn(hdev, "failed to send FF report\n");
}

static int ms_play_effect(struct input_dev *dev, void *data,
                          struct ff_effect *effect)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct ms_data *ms = hid_get_drvdata(hid);

        if (effect->type != FF_RUMBLE)
                return 0;

        /*
         * Magnitude is 0..100 so scale the 16-bit input here
         */
        ms->strong = ((u32) effect->u.rumble.strong_magnitude * 100) / U16_MAX;
        ms->weak = ((u32) effect->u.rumble.weak_magnitude * 100) / U16_MAX;

        schedule_work(&ms->ff_worker);
        return 0;
}

static int ms_init_ff(struct hid_device *hdev)
{
        struct hid_input *hidinput;
        struct input_dev *input_dev;
        struct ms_data *ms = hid_get_drvdata(hdev);

        if (list_empty(&hdev->inputs)) {
                hid_err(hdev, "no inputs found\n");
                return -ENODEV;
        }
        hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
        input_dev = hidinput->input;

        if (!(ms->quirks & MS_QUIRK_FF))
                return 0;

        ms->hdev = hdev;
        INIT_WORK(&ms->ff_worker, ms_ff_worker);

        ms->output_report_dmabuf = devm_kzalloc(&hdev->dev,
                                                sizeof(struct xb1s_ff_report),
                                                GFP_KERNEL);
        if (ms->output_report_dmabuf == NULL)
                return -ENOMEM;

        input_set_capability(input_dev, EV_FF, FF_RUMBLE);
        return input_ff_create_memless(input_dev, NULL, ms_play_effect);
}

static void ms_remove_ff(struct hid_device *hdev)
{
        struct ms_data *ms = hid_get_drvdata(hdev);

        if (!(ms->quirks & MS_QUIRK_FF))
                return;

        cancel_work_sync(&ms->ff_worker);
}

static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        unsigned long quirks = id->driver_data;
        struct ms_data *ms;
        int ret;

        ms = devm_kzalloc(&hdev->dev, sizeof(*ms), GFP_KERNEL);
        if (ms == NULL)
                return -ENOMEM;

        ms->quirks = quirks;

        hid_set_drvdata(hdev, ms);

        if (quirks & MS_NOGET)
                hdev->quirks |= HID_QUIRK_NOGET;

        if (quirks & MS_SURFACE_DIAL)
                hdev->quirks |= HID_QUIRK_INPUT_PER_APP;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                goto err_free;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | ((quirks & MS_HIDINPUT) ?
                                HID_CONNECT_HIDINPUT_FORCE : 0));
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto err_free;
        }

        ret = ms_init_ff(hdev);
        if (ret)
                hid_err(hdev, "could not initialize ff, continuing anyway");

        return 0;
err_free:
        return ret;
}

static void ms_remove(struct hid_device *hdev)
{
        hid_hw_stop(hdev);
        ms_remove_ff(hdev);
}

static const struct hid_device_id ms_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV),
                .driver_data = MS_HIDINPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K_JP),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE7K),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K),
                .driver_data = MS_ERGONOMY | MS_RDESC },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB),
                .driver_data = MS_PRESENTER },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1),
                .driver_data = MS_ERGONOMY },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0),
                .driver_data = MS_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
                .driver_data = MS_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER),
                .driver_data = MS_HIDINPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD),
                .driver_data = MS_ERGONOMY},

        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT),
                .driver_data = MS_PRESENTER },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, 0x091B),
                .driver_data = MS_SURFACE_DIAL },

        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708),
                .driver_data = MS_QUIRK_FF },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708_BLE),
                .driver_data = MS_QUIRK_FF },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1914),
                .driver_data = MS_QUIRK_FF },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797),
                .driver_data = MS_QUIRK_FF },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797_BLE),
                .driver_data = MS_QUIRK_FF },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS),
                .driver_data = MS_QUIRK_FF },
        { }
};
MODULE_DEVICE_TABLE(hid, ms_devices);

static struct hid_driver ms_driver = {
        .name = "microsoft",
        .id_table = ms_devices,
        .report_fixup = ms_report_fixup,
        .input_mapping = ms_input_mapping,
        .input_mapped = ms_input_mapped,
        .event = ms_event,
        .probe = ms_probe,
        .remove = ms_remove,
};
module_hid_driver(ms_driver);

MODULE_LICENSE("GPL");




































































































































































































































































































































































































































































































































































































































































































    1 





    1 
    1 












    1 












    1 
    1 
    1 
    1 


















































































    1 













    1 

    1 

    1 
















    1 





    1 


    1 
    1 




    1 


    1 

    1 
    1 
    1 
















    1 








    1 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
/*
 * Copyright (c) 2016 Intel Corporation
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */

#include <drm/drm_auth.h>
#include <drm/drm_connector.h>
#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
#include <drm/drm_file.h>
#include <drm/drm_managed.h>
#include <drm/drm_panel.h>
#include <drm/drm_print.h>
#include <drm/drm_privacy_screen_consumer.h>
#include <drm/drm_sysfs.h>
#include <drm/drm_utils.h>

#include <linux/property.h>
#include <linux/uaccess.h>

#include <video/cmdline.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

/**
 * DOC: overview
 *
 * In DRM connectors are the general abstraction for display sinks, and include
 * also fixed panels or anything else that can display pixels in some form. As
 * opposed to all other KMS objects representing hardware (like CRTC, encoder or
 * plane abstractions) connectors can be hotplugged and unplugged at runtime.
 * Hence they are reference-counted using drm_connector_get() and
 * drm_connector_put().
 *
 * KMS driver must create, initialize, register and attach at a &struct
 * drm_connector for each such sink. The instance is created as other KMS
 * objects and initialized by setting the following fields. The connector is
 * initialized with a call to drm_connector_init() with a pointer to the
 * &struct drm_connector_funcs and a connector type, and then exposed to
 * userspace with a call to drm_connector_register().
 *
 * Connectors must be attached to an encoder to be used. For devices that map
 * connectors to encoders 1:1, the connector should be attached at
 * initialization time with a call to drm_connector_attach_encoder(). The
 * driver must also set the &drm_connector.encoder field to point to the
 * attached encoder.
 *
 * For connectors which are not fixed (like built-in panels) the driver needs to
 * support hotplug notifications. The simplest way to do that is by using the
 * probe helpers, see drm_kms_helper_poll_init() for connectors which don't have
 * hardware support for hotplug interrupts. Connectors with hardware hotplug
 * support can instead use e.g. drm_helper_hpd_irq_event().
 */

/*
 * Global connector list for drm_connector_find_by_fwnode().
 * Note drm_connector_[un]register() first take connector->lock and then
 * take the connector_list_lock.
 */
static DEFINE_MUTEX(connector_list_lock);
static LIST_HEAD(connector_list);

struct drm_conn_prop_enum_list {
        int type;
        const char *name;
        struct ida ida;
};

/*
 * Connector and encoder types.
 */
static struct drm_conn_prop_enum_list drm_connector_enum_list[] = {
        { DRM_MODE_CONNECTOR_Unknown, "Unknown" },
        { DRM_MODE_CONNECTOR_VGA, "VGA" },
        { DRM_MODE_CONNECTOR_DVII, "DVI-I" },
        { DRM_MODE_CONNECTOR_DVID, "DVI-D" },
        { DRM_MODE_CONNECTOR_DVIA, "DVI-A" },
        { DRM_MODE_CONNECTOR_Composite, "Composite" },
        { DRM_MODE_CONNECTOR_SVIDEO, "SVIDEO" },
        { DRM_MODE_CONNECTOR_LVDS, "LVDS" },
        { DRM_MODE_CONNECTOR_Component, "Component" },
        { DRM_MODE_CONNECTOR_9PinDIN, "DIN" },
        { DRM_MODE_CONNECTOR_DisplayPort, "DP" },
        { DRM_MODE_CONNECTOR_HDMIA, "HDMI-A" },
        { DRM_MODE_CONNECTOR_HDMIB, "HDMI-B" },
        { DRM_MODE_CONNECTOR_TV, "TV" },
        { DRM_MODE_CONNECTOR_eDP, "eDP" },
        { DRM_MODE_CONNECTOR_VIRTUAL, "Virtual" },
        { DRM_MODE_CONNECTOR_DSI, "DSI" },
        { DRM_MODE_CONNECTOR_DPI, "DPI" },
        { DRM_MODE_CONNECTOR_WRITEBACK, "Writeback" },
        { DRM_MODE_CONNECTOR_SPI, "SPI" },
        { DRM_MODE_CONNECTOR_USB, "USB" },
};

void drm_connector_ida_init(void)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(drm_connector_enum_list); i++)
                ida_init(&drm_connector_enum_list[i].ida);
}

void drm_connector_ida_destroy(void)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(drm_connector_enum_list); i++)
                ida_destroy(&drm_connector_enum_list[i].ida);
}

/**
 * drm_get_connector_type_name - return a string for connector type
 * @type: The connector type (DRM_MODE_CONNECTOR_*)
 *
 * Returns: the name of the connector type, or NULL if the type is not valid.
 */
const char *drm_get_connector_type_name(unsigned int type)
{
        if (type < ARRAY_SIZE(drm_connector_enum_list))
                return drm_connector_enum_list[type].name;

        return NULL;
}
EXPORT_SYMBOL(drm_get_connector_type_name);

/**
 * drm_connector_get_cmdline_mode - reads the user's cmdline mode
 * @connector: connector to query
 *
 * The kernel supports per-connector configuration of its consoles through
 * use of the video= parameter. This function parses that option and
 * extracts the user's specified mode (or enable/disable status) for a
 * particular connector. This is typically only used during the early fbdev
 * setup.
 */
static void drm_connector_get_cmdline_mode(struct drm_connector *connector)
{
        struct drm_cmdline_mode *mode = &connector->cmdline_mode;
        const char *option;

        option = video_get_options(connector->name);
        if (!option)
                return;

        if (!drm_mode_parse_command_line_for_connector(option,
                                                       connector,
                                                       mode))
                return;

        if (mode->force) {
                DRM_INFO("forcing %s connector %s\n", connector->name,
                         drm_get_connector_force_name(mode->force));
                connector->force = mode->force;
        }

        if (mode->panel_orientation != DRM_MODE_PANEL_ORIENTATION_UNKNOWN) {
                DRM_INFO("cmdline forces connector %s panel_orientation to %d\n",
                         connector->name, mode->panel_orientation);
                drm_connector_set_panel_orientation(connector,
                                                    mode->panel_orientation);
        }

        DRM_DEBUG_KMS("cmdline mode for connector %s %s %dx%d@%dHz%s%s%s\n",
                      connector->name, mode->name,
                      mode->xres, mode->yres,
                      mode->refresh_specified ? mode->refresh : 60,
                      mode->rb ? " reduced blanking" : "",
                      mode->margins ? " with margins" : "",
                      mode->interlace ?  " interlaced" : "");
}

static void drm_connector_free(struct kref *kref)
{
        struct drm_connector *connector =
                container_of(kref, struct drm_connector, base.refcount);
        struct drm_device *dev = connector->dev;

        drm_mode_object_unregister(dev, &connector->base);
        connector->funcs->destroy(connector);
}

void drm_connector_free_work_fn(struct work_struct *work)
{
        struct drm_connector *connector, *n;
        struct drm_device *dev =
                container_of(work, struct drm_device, mode_config.connector_free_work);
        struct drm_mode_config *config = &dev->mode_config;
        unsigned long flags;
        struct llist_node *freed;

        spin_lock_irqsave(&config->connector_list_lock, flags);
        freed = llist_del_all(&config->connector_free_list);
        spin_unlock_irqrestore(&config->connector_list_lock, flags);

        llist_for_each_entry_safe(connector, n, freed, free_node) {
                drm_mode_object_unregister(dev, &connector->base);
                connector->funcs->destroy(connector);
        }
}

static int __drm_connector_init(struct drm_device *dev,
                                struct drm_connector *connector,
                                const struct drm_connector_funcs *funcs,
                                int connector_type,
                                struct i2c_adapter *ddc)
{
        struct drm_mode_config *config = &dev->mode_config;
        int ret;
        struct ida *connector_ida =
                &drm_connector_enum_list[connector_type].ida;

        WARN_ON(drm_drv_uses_atomic_modeset(dev) &&
                (!funcs->atomic_destroy_state ||
                 !funcs->atomic_duplicate_state));

        ret = __drm_mode_object_add(dev, &connector->base,
                                    DRM_MODE_OBJECT_CONNECTOR,
                                    false, drm_connector_free);
        if (ret)
                return ret;

        connector->base.properties = &connector->properties;
        connector->dev = dev;
        connector->funcs = funcs;

        /* connector index is used with 32bit bitmasks */
        ret = ida_alloc_max(&config->connector_ida, 31, GFP_KERNEL);
        if (ret < 0) {
                DRM_DEBUG_KMS("Failed to allocate %s connector index: %d\n",
                              drm_connector_enum_list[connector_type].name,
                              ret);
                goto out_put;
        }
        connector->index = ret;
        ret = 0;

        connector->connector_type = connector_type;
        connector->connector_type_id =
                ida_alloc_min(connector_ida, 1, GFP_KERNEL);
        if (connector->connector_type_id < 0) {
                ret = connector->connector_type_id;
                goto out_put_id;
        }
        connector->name =
                kasprintf(GFP_KERNEL, "%s-%d",
                          drm_connector_enum_list[connector_type].name,
                          connector->connector_type_id);
        if (!connector->name) {
                ret = -ENOMEM;
                goto out_put_type_id;
        }

        /* provide ddc symlink in sysfs */
        connector->ddc = ddc;

        INIT_LIST_HEAD(&connector->global_connector_list_entry);
        INIT_LIST_HEAD(&connector->probed_modes);
        INIT_LIST_HEAD(&connector->modes);
        mutex_init(&connector->mutex);
        mutex_init(&connector->edid_override_mutex);
        connector->edid_blob_ptr = NULL;
        connector->epoch_counter = 0;
        connector->tile_blob_ptr = NULL;
        connector->status = connector_status_unknown;
        connector->display_info.panel_orientation =
                DRM_MODE_PANEL_ORIENTATION_UNKNOWN;

        drm_connector_get_cmdline_mode(connector);

        /* We should add connectors at the end to avoid upsetting the connector
         * index too much.
         */
        spin_lock_irq(&config->connector_list_lock);
        list_add_tail(&connector->head, &config->connector_list);
        config->num_connector++;
        spin_unlock_irq(&config->connector_list_lock);

        if (connector_type != DRM_MODE_CONNECTOR_VIRTUAL &&
            connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
                drm_connector_attach_edid_property(connector);

        drm_object_attach_property(&connector->base,
                                      config->dpms_property, 0);

        drm_object_attach_property(&connector->base,
                                   config->link_status_property,
                                   0);

        drm_object_attach_property(&connector->base,
                                   config->non_desktop_property,
                                   0);
        drm_object_attach_property(&connector->base,
                                   config->tile_property,
                                   0);

        if (drm_core_check_feature(dev, DRIVER_ATOMIC)) {
                drm_object_attach_property(&connector->base, config->prop_crtc_id, 0);
        }

        connector->debugfs_entry = NULL;
out_put_type_id:
        if (ret)
                ida_free(connector_ida, connector->connector_type_id);
out_put_id:
        if (ret)
                ida_free(&config->connector_ida, connector->index);
out_put:
        if (ret)
                drm_mode_object_unregister(dev, &connector->base);

        return ret;
}

/**
 * drm_connector_init - Init a preallocated connector
 * @dev: DRM device
 * @connector: the connector to init
 * @funcs: callbacks for this connector
 * @connector_type: user visible type of the connector
 *
 * Initialises a preallocated connector. Connectors should be
 * subclassed as part of driver connector objects.
 *
 * At driver unload time the driver's &drm_connector_funcs.destroy hook
 * should call drm_connector_cleanup() and free the connector structure.
 * The connector structure should not be allocated with devm_kzalloc().
 *
 * Note: consider using drmm_connector_init() instead of
 * drm_connector_init() to let the DRM managed resource infrastructure
 * take care of cleanup and deallocation.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_connector_init(struct drm_device *dev,
                       struct drm_connector *connector,
                       const struct drm_connector_funcs *funcs,
                       int connector_type)
{
        if (drm_WARN_ON(dev, !(funcs && funcs->destroy)))
                return -EINVAL;

        return __drm_connector_init(dev, connector, funcs, connector_type, NULL);
}
EXPORT_SYMBOL(drm_connector_init);

/**
 * drm_connector_init_with_ddc - Init a preallocated connector
 * @dev: DRM device
 * @connector: the connector to init
 * @funcs: callbacks for this connector
 * @connector_type: user visible type of the connector
 * @ddc: pointer to the associated ddc adapter
 *
 * Initialises a preallocated connector. Connectors should be
 * subclassed as part of driver connector objects.
 *
 * At driver unload time the driver's &drm_connector_funcs.destroy hook
 * should call drm_connector_cleanup() and free the connector structure.
 * The connector structure should not be allocated with devm_kzalloc().
 *
 * Ensures that the ddc field of the connector is correctly set.
 *
 * Note: consider using drmm_connector_init() instead of
 * drm_connector_init_with_ddc() to let the DRM managed resource
 * infrastructure take care of cleanup and deallocation.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_connector_init_with_ddc(struct drm_device *dev,
                                struct drm_connector *connector,
                                const struct drm_connector_funcs *funcs,
                                int connector_type,
                                struct i2c_adapter *ddc)
{
        if (drm_WARN_ON(dev, !(funcs && funcs->destroy)))
                return -EINVAL;

        return __drm_connector_init(dev, connector, funcs, connector_type, ddc);
}
EXPORT_SYMBOL(drm_connector_init_with_ddc);

static void drm_connector_cleanup_action(struct drm_device *dev,
                                         void *ptr)
{
        struct drm_connector *connector = ptr;

        drm_connector_cleanup(connector);
}

/**
 * drmm_connector_init - Init a preallocated connector
 * @dev: DRM device
 * @connector: the connector to init
 * @funcs: callbacks for this connector
 * @connector_type: user visible type of the connector
 * @ddc: optional pointer to the associated ddc adapter
 *
 * Initialises a preallocated connector. Connectors should be
 * subclassed as part of driver connector objects.
 *
 * Cleanup is automatically handled with a call to
 * drm_connector_cleanup() in a DRM-managed action.
 *
 * The connector structure should be allocated with drmm_kzalloc().
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drmm_connector_init(struct drm_device *dev,
                        struct drm_connector *connector,
                        const struct drm_connector_funcs *funcs,
                        int connector_type,
                        struct i2c_adapter *ddc)
{
        int ret;

        if (drm_WARN_ON(dev, funcs && funcs->destroy))
                return -EINVAL;

        ret = __drm_connector_init(dev, connector, funcs, connector_type, ddc);
        if (ret)
                return ret;

        ret = drmm_add_action_or_reset(dev, drm_connector_cleanup_action,
                                       connector);
        if (ret)
                return ret;

        return 0;
}
EXPORT_SYMBOL(drmm_connector_init);

/**
 * drm_connector_attach_edid_property - attach edid property.
 * @connector: the connector
 *
 * Some connector types like DRM_MODE_CONNECTOR_VIRTUAL do not get a
 * edid property attached by default.  This function can be used to
 * explicitly enable the edid property in these cases.
 */
void drm_connector_attach_edid_property(struct drm_connector *connector)
{
        struct drm_mode_config *config = &connector->dev->mode_config;

        drm_object_attach_property(&connector->base,
                                   config->edid_property,
                                   0);
}
EXPORT_SYMBOL(drm_connector_attach_edid_property);

/**
 * drm_connector_attach_encoder - attach a connector to an encoder
 * @connector: connector to attach
 * @encoder: encoder to attach @connector to
 *
 * This function links up a connector to an encoder. Note that the routing
 * restrictions between encoders and crtcs are exposed to userspace through the
 * possible_clones and possible_crtcs bitmasks.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_encoder(struct drm_connector *connector,
                                 struct drm_encoder *encoder)
{
        /*
         * In the past, drivers have attempted to model the static association
         * of connector to encoder in simple connector/encoder devices using a
         * direct assignment of connector->encoder = encoder. This connection
         * is a logical one and the responsibility of the core, so drivers are
         * expected not to mess with this.
         *
         * Note that the error return should've been enough here, but a large
         * majority of drivers ignores the return value, so add in a big WARN
         * to get people's attention.
         */
        if (WARN_ON(connector->encoder))
                return -EINVAL;

        connector->possible_encoders |= drm_encoder_mask(encoder);

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_encoder);

/**
 * drm_connector_has_possible_encoder - check if the connector and encoder are
 * associated with each other
 * @connector: the connector
 * @encoder: the encoder
 *
 * Returns:
 * True if @encoder is one of the possible encoders for @connector.
 */
bool drm_connector_has_possible_encoder(struct drm_connector *connector,
                                        struct drm_encoder *encoder)
{
        return connector->possible_encoders & drm_encoder_mask(encoder);
}
EXPORT_SYMBOL(drm_connector_has_possible_encoder);

static void drm_mode_remove(struct drm_connector *connector,
                            struct drm_display_mode *mode)
{
        list_del(&mode->head);
        drm_mode_destroy(connector->dev, mode);
}

/**
 * drm_connector_cleanup - cleans up an initialised connector
 * @connector: connector to cleanup
 *
 * Cleans up the connector but doesn't free the object.
 */
void drm_connector_cleanup(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct drm_display_mode *mode, *t;

        /* The connector should have been removed from userspace long before
         * it is finally destroyed.
         */
        if (WARN_ON(connector->registration_state ==
                    DRM_CONNECTOR_REGISTERED))
                drm_connector_unregister(connector);

        if (connector->privacy_screen) {
                drm_privacy_screen_put(connector->privacy_screen);
                connector->privacy_screen = NULL;
        }

        if (connector->tile_group) {
                drm_mode_put_tile_group(dev, connector->tile_group);
                connector->tile_group = NULL;
        }

        list_for_each_entry_safe(mode, t, &connector->probed_modes, head)
                drm_mode_remove(connector, mode);

        list_for_each_entry_safe(mode, t, &connector->modes, head)
                drm_mode_remove(connector, mode);

        ida_free(&drm_connector_enum_list[connector->connector_type].ida,
                          connector->connector_type_id);

        ida_free(&dev->mode_config.connector_ida, connector->index);

        kfree(connector->display_info.bus_formats);
        kfree(connector->display_info.vics);
        drm_mode_object_unregister(dev, &connector->base);
        kfree(connector->name);
        connector->name = NULL;
        fwnode_handle_put(connector->fwnode);
        connector->fwnode = NULL;
        spin_lock_irq(&dev->mode_config.connector_list_lock);
        list_del(&connector->head);
        dev->mode_config.num_connector--;
        spin_unlock_irq(&dev->mode_config.connector_list_lock);

        WARN_ON(connector->state && !connector->funcs->atomic_destroy_state);
        if (connector->state && connector->funcs->atomic_destroy_state)
                connector->funcs->atomic_destroy_state(connector,
                                                       connector->state);

        mutex_destroy(&connector->mutex);

        memset(connector, 0, sizeof(*connector));

        if (dev->registered)
                drm_sysfs_hotplug_event(dev);
}
EXPORT_SYMBOL(drm_connector_cleanup);

/**
 * drm_connector_register - register a connector
 * @connector: the connector to register
 *
 * Register userspace interfaces for a connector. Only call this for connectors
 * which can be hotplugged after drm_dev_register() has been called already,
 * e.g. DP MST connectors. All other connectors will be registered automatically
 * when calling drm_dev_register().
 *
 * When the connector is no longer available, callers must call
 * drm_connector_unregister().
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_connector_register(struct drm_connector *connector)
{
        int ret = 0;

        if (!connector->dev->registered)
                return 0;

        mutex_lock(&connector->mutex);
        if (connector->registration_state != DRM_CONNECTOR_INITIALIZING)
                goto unlock;

        ret = drm_sysfs_connector_add(connector);
        if (ret)
                goto unlock;

        drm_debugfs_connector_add(connector);

        if (connector->funcs->late_register) {
                ret = connector->funcs->late_register(connector);
                if (ret)
                        goto err_debugfs;
        }

        ret = drm_sysfs_connector_add_late(connector);
        if (ret)
                goto err_late_register;

        drm_mode_object_register(connector->dev, &connector->base);

        connector->registration_state = DRM_CONNECTOR_REGISTERED;

        /* Let userspace know we have a new connector */
        drm_sysfs_connector_hotplug_event(connector);

        if (connector->privacy_screen)
                drm_privacy_screen_register_notifier(connector->privacy_screen,
                                           &connector->privacy_screen_notifier);

        mutex_lock(&connector_list_lock);
        list_add_tail(&connector->global_connector_list_entry, &connector_list);
        mutex_unlock(&connector_list_lock);
        goto unlock;

err_late_register:
        if (connector->funcs->early_unregister)
                connector->funcs->early_unregister(connector);
err_debugfs:
        drm_debugfs_connector_remove(connector);
        drm_sysfs_connector_remove(connector);
unlock:
        mutex_unlock(&connector->mutex);
        return ret;
}
EXPORT_SYMBOL(drm_connector_register);

/**
 * drm_connector_unregister - unregister a connector
 * @connector: the connector to unregister
 *
 * Unregister userspace interfaces for a connector. Only call this for
 * connectors which have been registered explicitly by calling
 * drm_connector_register().
 */
void drm_connector_unregister(struct drm_connector *connector)
{
        mutex_lock(&connector->mutex);
        if (connector->registration_state != DRM_CONNECTOR_REGISTERED) {
                mutex_unlock(&connector->mutex);
                return;
        }

        mutex_lock(&connector_list_lock);
        list_del_init(&connector->global_connector_list_entry);
        mutex_unlock(&connector_list_lock);

        if (connector->privacy_screen)
                drm_privacy_screen_unregister_notifier(
                                        connector->privacy_screen,
                                        &connector->privacy_screen_notifier);

        drm_sysfs_connector_remove_early(connector);

        if (connector->funcs->early_unregister)
                connector->funcs->early_unregister(connector);

        drm_debugfs_connector_remove(connector);
        drm_sysfs_connector_remove(connector);

        connector->registration_state = DRM_CONNECTOR_UNREGISTERED;
        mutex_unlock(&connector->mutex);
}
EXPORT_SYMBOL(drm_connector_unregister);

void drm_connector_unregister_all(struct drm_device *dev)
{
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter)
                drm_connector_unregister(connector);
        drm_connector_list_iter_end(&conn_iter);
}

int drm_connector_register_all(struct drm_device *dev)
{
        struct drm_connector *connector;
        struct drm_connector_list_iter conn_iter;
        int ret = 0;

        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                ret = drm_connector_register(connector);
                if (ret)
                        break;
        }
        drm_connector_list_iter_end(&conn_iter);

        if (ret)
                drm_connector_unregister_all(dev);
        return ret;
}

/**
 * drm_get_connector_status_name - return a string for connector status
 * @status: connector status to compute name of
 *
 * In contrast to the other drm_get_*_name functions this one here returns a
 * const pointer and hence is threadsafe.
 *
 * Returns: connector status string
 */
const char *drm_get_connector_status_name(enum drm_connector_status status)
{
        if (status == connector_status_connected)
                return "connected";
        else if (status == connector_status_disconnected)
                return "disconnected";
        else
                return "unknown";
}
EXPORT_SYMBOL(drm_get_connector_status_name);

/**
 * drm_get_connector_force_name - return a string for connector force
 * @force: connector force to get name of
 *
 * Returns: const pointer to name.
 */
const char *drm_get_connector_force_name(enum drm_connector_force force)
{
        switch (force) {
        case DRM_FORCE_UNSPECIFIED:
                return "unspecified";
        case DRM_FORCE_OFF:
                return "off";
        case DRM_FORCE_ON:
                return "on";
        case DRM_FORCE_ON_DIGITAL:
                return "digital";
        default:
                return "unknown";
        }
}

#ifdef CONFIG_LOCKDEP
static struct lockdep_map connector_list_iter_dep_map = {
        .name = "drm_connector_list_iter"
};
#endif

/**
 * drm_connector_list_iter_begin - initialize a connector_list iterator
 * @dev: DRM device
 * @iter: connector_list iterator
 *
 * Sets @iter up to walk the &drm_mode_config.connector_list of @dev. @iter
 * must always be cleaned up again by calling drm_connector_list_iter_end().
 * Iteration itself happens using drm_connector_list_iter_next() or
 * drm_for_each_connector_iter().
 */
void drm_connector_list_iter_begin(struct drm_device *dev,
                                   struct drm_connector_list_iter *iter)
{
        iter->dev = dev;
        iter->conn = NULL;
        lock_acquire_shared_recursive(&connector_list_iter_dep_map, 0, 1, NULL, _RET_IP_);
}
EXPORT_SYMBOL(drm_connector_list_iter_begin);

/*
 * Extra-safe connector put function that works in any context. Should only be
 * used from the connector_iter functions, where we never really expect to
 * actually release the connector when dropping our final reference.
 */
static void
__drm_connector_put_safe(struct drm_connector *conn)
{
        struct drm_mode_config *config = &conn->dev->mode_config;

        lockdep_assert_held(&config->connector_list_lock);

        if (!refcount_dec_and_test(&conn->base.refcount.refcount))
                return;

        llist_add(&conn->free_node, &config->connector_free_list);
        schedule_work(&config->connector_free_work);
}

/**
 * drm_connector_list_iter_next - return next connector
 * @iter: connector_list iterator
 *
 * Returns: the next connector for @iter, or NULL when the list walk has
 * completed.
 */
struct drm_connector *
drm_connector_list_iter_next(struct drm_connector_list_iter *iter)
{
        struct drm_connector *old_conn = iter->conn;
        struct drm_mode_config *config = &iter->dev->mode_config;
        struct list_head *lhead;
        unsigned long flags;

        spin_lock_irqsave(&config->connector_list_lock, flags);
        lhead = old_conn ? &old_conn->head : &config->connector_list;

        do {
                if (lhead->next == &config->connector_list) {
                        iter->conn = NULL;
                        break;
                }

                lhead = lhead->next;
                iter->conn = list_entry(lhead, struct drm_connector, head);

                /* loop until it's not a zombie connector */
        } while (!kref_get_unless_zero(&iter->conn->base.refcount));

        if (old_conn)
                __drm_connector_put_safe(old_conn);
        spin_unlock_irqrestore(&config->connector_list_lock, flags);

        return iter->conn;
}
EXPORT_SYMBOL(drm_connector_list_iter_next);

/**
 * drm_connector_list_iter_end - tear down a connector_list iterator
 * @iter: connector_list iterator
 *
 * Tears down @iter and releases any resources (like &drm_connector references)
 * acquired while walking the list. This must always be called, both when the
 * iteration completes fully or when it was aborted without walking the entire
 * list.
 */
void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
{
        struct drm_mode_config *config = &iter->dev->mode_config;
        unsigned long flags;

        iter->dev = NULL;
        if (iter->conn) {
                spin_lock_irqsave(&config->connector_list_lock, flags);
                __drm_connector_put_safe(iter->conn);
                spin_unlock_irqrestore(&config->connector_list_lock, flags);
        }
        lock_release(&connector_list_iter_dep_map, _RET_IP_);
}
EXPORT_SYMBOL(drm_connector_list_iter_end);

static const struct drm_prop_enum_list drm_subpixel_enum_list[] = {
        { SubPixelUnknown, "Unknown" },
        { SubPixelHorizontalRGB, "Horizontal RGB" },
        { SubPixelHorizontalBGR, "Horizontal BGR" },
        { SubPixelVerticalRGB, "Vertical RGB" },
        { SubPixelVerticalBGR, "Vertical BGR" },
        { SubPixelNone, "None" },
};

/**
 * drm_get_subpixel_order_name - return a string for a given subpixel enum
 * @order: enum of subpixel_order
 *
 * Note you could abuse this and return something out of bounds, but that
 * would be a caller error.  No unscrubbed user data should make it here.
 *
 * Returns: string describing an enumerated subpixel property
 */
const char *drm_get_subpixel_order_name(enum subpixel_order order)
{
        return drm_subpixel_enum_list[order].name;
}
EXPORT_SYMBOL(drm_get_subpixel_order_name);

static const struct drm_prop_enum_list drm_dpms_enum_list[] = {
        { DRM_MODE_DPMS_ON, "On" },
        { DRM_MODE_DPMS_STANDBY, "Standby" },
        { DRM_MODE_DPMS_SUSPEND, "Suspend" },
        { DRM_MODE_DPMS_OFF, "Off" }
};
DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list)

static const struct drm_prop_enum_list drm_link_status_enum_list[] = {
        { DRM_MODE_LINK_STATUS_GOOD, "Good" },
        { DRM_MODE_LINK_STATUS_BAD, "Bad" },
};

/**
 * drm_display_info_set_bus_formats - set the supported bus formats
 * @info: display info to store bus formats in
 * @formats: array containing the supported bus formats
 * @num_formats: the number of entries in the fmts array
 *
 * Store the supported bus formats in display info structure.
 * See MEDIA_BUS_FMT_* definitions in include/uapi/linux/media-bus-format.h for
 * a full list of available formats.
 *
 * Returns:
 * 0 on success or a negative error code on failure.
 */
int drm_display_info_set_bus_formats(struct drm_display_info *info,
                                     const u32 *formats,
                                     unsigned int num_formats)
{
        u32 *fmts = NULL;

        if (!formats && num_formats)
                return -EINVAL;

        if (formats && num_formats) {
                fmts = kmemdup(formats, sizeof(*formats) * num_formats,
                               GFP_KERNEL);
                if (!fmts)
                        return -ENOMEM;
        }

        kfree(info->bus_formats);
        info->bus_formats = fmts;
        info->num_bus_formats = num_formats;

        return 0;
}
EXPORT_SYMBOL(drm_display_info_set_bus_formats);

/* Optional connector properties. */
static const struct drm_prop_enum_list drm_scaling_mode_enum_list[] = {
        { DRM_MODE_SCALE_NONE, "None" },
        { DRM_MODE_SCALE_FULLSCREEN, "Full" },
        { DRM_MODE_SCALE_CENTER, "Center" },
        { DRM_MODE_SCALE_ASPECT, "Full aspect" },
};

static const struct drm_prop_enum_list drm_aspect_ratio_enum_list[] = {
        { DRM_MODE_PICTURE_ASPECT_NONE, "Automatic" },
        { DRM_MODE_PICTURE_ASPECT_4_3, "4:3" },
        { DRM_MODE_PICTURE_ASPECT_16_9, "16:9" },
};

static const struct drm_prop_enum_list drm_content_type_enum_list[] = {
        { DRM_MODE_CONTENT_TYPE_NO_DATA, "No Data" },
        { DRM_MODE_CONTENT_TYPE_GRAPHICS, "Graphics" },
        { DRM_MODE_CONTENT_TYPE_PHOTO, "Photo" },
        { DRM_MODE_CONTENT_TYPE_CINEMA, "Cinema" },
        { DRM_MODE_CONTENT_TYPE_GAME, "Game" },
};

static const struct drm_prop_enum_list drm_panel_orientation_enum_list[] = {
        { DRM_MODE_PANEL_ORIENTATION_NORMAL,        "Normal"        },
        { DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP,        "Upside Down"        },
        { DRM_MODE_PANEL_ORIENTATION_LEFT_UP,        "Left Side Up"        },
        { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,        "Right Side Up"        },
};

static const struct drm_prop_enum_list drm_dvi_i_select_enum_list[] = {
        { DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
        { DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
        { DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
};
DRM_ENUM_NAME_FN(drm_get_dvi_i_select_name, drm_dvi_i_select_enum_list)

static const struct drm_prop_enum_list drm_dvi_i_subconnector_enum_list[] = {
        { DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I, TV-out and DP */
        { DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
        { DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
};
DRM_ENUM_NAME_FN(drm_get_dvi_i_subconnector_name,
                 drm_dvi_i_subconnector_enum_list)

static const struct drm_prop_enum_list drm_tv_mode_enum_list[] = {
        { DRM_MODE_TV_MODE_NTSC, "NTSC" },
        { DRM_MODE_TV_MODE_NTSC_443, "NTSC-443" },
        { DRM_MODE_TV_MODE_NTSC_J, "NTSC-J" },
        { DRM_MODE_TV_MODE_PAL, "PAL" },
        { DRM_MODE_TV_MODE_PAL_M, "PAL-M" },
        { DRM_MODE_TV_MODE_PAL_N, "PAL-N" },
        { DRM_MODE_TV_MODE_SECAM, "SECAM" },
};
DRM_ENUM_NAME_FN(drm_get_tv_mode_name, drm_tv_mode_enum_list)

/**
 * drm_get_tv_mode_from_name - Translates a TV mode name into its enum value
 * @name: TV Mode name we want to convert
 * @len: Length of @name
 *
 * Translates @name into an enum drm_connector_tv_mode.
 *
 * Returns: the enum value on success, a negative errno otherwise.
 */
int drm_get_tv_mode_from_name(const char *name, size_t len)
{
        unsigned int i;

        for (i = 0; i < ARRAY_SIZE(drm_tv_mode_enum_list); i++) {
                const struct drm_prop_enum_list *item = &drm_tv_mode_enum_list[i];

                if (strlen(item->name) == len && !strncmp(item->name, name, len))
                        return item->type;
        }

        return -EINVAL;
}
EXPORT_SYMBOL(drm_get_tv_mode_from_name);

static const struct drm_prop_enum_list drm_tv_select_enum_list[] = {
        { DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
        { DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
};
DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)

static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = {
        { DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I, TV-out and DP */
        { DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
        { DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
};
DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
                 drm_tv_subconnector_enum_list)

static const struct drm_prop_enum_list drm_dp_subconnector_enum_list[] = {
        { DRM_MODE_SUBCONNECTOR_Unknown,     "Unknown"   }, /* DVI-I, TV-out and DP */
        { DRM_MODE_SUBCONNECTOR_VGA,             "VGA"       }, /* DP */
        { DRM_MODE_SUBCONNECTOR_DVID,             "DVI-D"     }, /* DP */
        { DRM_MODE_SUBCONNECTOR_HDMIA,             "HDMI"      }, /* DP */
        { DRM_MODE_SUBCONNECTOR_DisplayPort, "DP"        }, /* DP */
        { DRM_MODE_SUBCONNECTOR_Wireless,    "Wireless"  }, /* DP */
        { DRM_MODE_SUBCONNECTOR_Native,             "Native"    }, /* DP */
};

DRM_ENUM_NAME_FN(drm_get_dp_subconnector_name,
                 drm_dp_subconnector_enum_list)


static const char * const colorspace_names[] = {
        /* For Default case, driver will set the colorspace */
        [DRM_MODE_COLORIMETRY_DEFAULT] = "Default",
        /* Standard Definition Colorimetry based on CEA 861 */
        [DRM_MODE_COLORIMETRY_SMPTE_170M_YCC] = "SMPTE_170M_YCC",
        [DRM_MODE_COLORIMETRY_BT709_YCC] = "BT709_YCC",
        /* Standard Definition Colorimetry based on IEC 61966-2-4 */
        [DRM_MODE_COLORIMETRY_XVYCC_601] = "XVYCC_601",
        /* High Definition Colorimetry based on IEC 61966-2-4 */
        [DRM_MODE_COLORIMETRY_XVYCC_709] = "XVYCC_709",
        /* Colorimetry based on IEC 61966-2-1/Amendment 1 */
        [DRM_MODE_COLORIMETRY_SYCC_601] = "SYCC_601",
        /* Colorimetry based on IEC 61966-2-5 [33] */
        [DRM_MODE_COLORIMETRY_OPYCC_601] = "opYCC_601",
        /* Colorimetry based on IEC 61966-2-5 */
        [DRM_MODE_COLORIMETRY_OPRGB] = "opRGB",
        /* Colorimetry based on ITU-R BT.2020 */
        [DRM_MODE_COLORIMETRY_BT2020_CYCC] = "BT2020_CYCC",
        /* Colorimetry based on ITU-R BT.2020 */
        [DRM_MODE_COLORIMETRY_BT2020_RGB] = "BT2020_RGB",
        /* Colorimetry based on ITU-R BT.2020 */
        [DRM_MODE_COLORIMETRY_BT2020_YCC] = "BT2020_YCC",
        /* Added as part of Additional Colorimetry Extension in 861.G */
        [DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65] = "DCI-P3_RGB_D65",
        [DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER] = "DCI-P3_RGB_Theater",
        [DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED] = "RGB_WIDE_FIXED",
        /* Colorimetry based on scRGB (IEC 61966-2-2) */
        [DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT] = "RGB_WIDE_FLOAT",
        [DRM_MODE_COLORIMETRY_BT601_YCC] = "BT601_YCC",
};

/**
 * drm_get_colorspace_name - return a string for color encoding
 * @colorspace: color space to compute name of
 *
 * In contrast to the other drm_get_*_name functions this one here returns a
 * const pointer and hence is threadsafe.
 */
const char *drm_get_colorspace_name(enum drm_colorspace colorspace)
{
        if (colorspace < ARRAY_SIZE(colorspace_names) && colorspace_names[colorspace])
                return colorspace_names[colorspace];
        else
                return "(null)";
}

static const u32 hdmi_colorspaces =
        BIT(DRM_MODE_COLORIMETRY_SMPTE_170M_YCC) |
        BIT(DRM_MODE_COLORIMETRY_BT709_YCC) |
        BIT(DRM_MODE_COLORIMETRY_XVYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_XVYCC_709) |
        BIT(DRM_MODE_COLORIMETRY_SYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_OPYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_OPRGB) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_RGB) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_YCC) |
        BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65) |
        BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER);

/*
 * As per DP 1.4a spec, 2.2.5.7.5 VSC SDP Payload for Pixel Encoding/Colorimetry
 * Format Table 2-120
 */
static const u32 dp_colorspaces =
        BIT(DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED) |
        BIT(DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT) |
        BIT(DRM_MODE_COLORIMETRY_OPRGB) |
        BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_RGB) |
        BIT(DRM_MODE_COLORIMETRY_BT601_YCC) |
        BIT(DRM_MODE_COLORIMETRY_BT709_YCC) |
        BIT(DRM_MODE_COLORIMETRY_XVYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_XVYCC_709) |
        BIT(DRM_MODE_COLORIMETRY_SYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_OPYCC_601) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) |
        BIT(DRM_MODE_COLORIMETRY_BT2020_YCC);

/**
 * DOC: standard connector properties
 *
 * DRM connectors have a few standardized properties:
 *
 * EDID:
 *         Blob property which contains the current EDID read from the sink. This
 *         is useful to parse sink identification information like vendor, model
 *         and serial. Drivers should update this property by calling
 *         drm_connector_update_edid_property(), usually after having parsed
 *         the EDID using drm_add_edid_modes(). Userspace cannot change this
 *         property.
 *
 *         User-space should not parse the EDID to obtain information exposed via
 *         other KMS properties (because the kernel might apply limits, quirks or
 *         fixups to the EDID). For instance, user-space should not try to parse
 *         mode lists from the EDID.
 * DPMS:
 *         Legacy property for setting the power state of the connector. For atomic
 *         drivers this is only provided for backwards compatibility with existing
 *         drivers, it remaps to controlling the "ACTIVE" property on the CRTC the
 *         connector is linked to. Drivers should never set this property directly,
 *         it is handled by the DRM core by calling the &drm_connector_funcs.dpms
 *         callback. For atomic drivers the remapping to the "ACTIVE" property is
 *         implemented in the DRM core.
 *
 *         Note that this property cannot be set through the MODE_ATOMIC ioctl,
 *         userspace must use "ACTIVE" on the CRTC instead.
 *
 *         WARNING:
 *
 *         For userspace also running on legacy drivers the "DPMS" semantics are a
 *         lot more complicated. First, userspace cannot rely on the "DPMS" value
 *         returned by the GETCONNECTOR actually reflecting reality, because many
 *         drivers fail to update it. For atomic drivers this is taken care of in
 *         drm_atomic_helper_update_legacy_modeset_state().
 *
 *         The second issue is that the DPMS state is only well-defined when the
 *         connector is connected to a CRTC. In atomic the DRM core enforces that
 *         "ACTIVE" is off in such a case, no such checks exists for "DPMS".
 *
 *         Finally, when enabling an output using the legacy SETCONFIG ioctl then
 *         "DPMS" is forced to ON. But see above, that might not be reflected in
 *         the software value on legacy drivers.
 *
 *         Summarizing: Only set "DPMS" when the connector is known to be enabled,
 *         assume that a successful SETCONFIG call also sets "DPMS" to on, and
 *         never read back the value of "DPMS" because it can be incorrect.
 * PATH:
 *         Connector path property to identify how this sink is physically
 *         connected. Used by DP MST. This should be set by calling
 *         drm_connector_set_path_property(), in the case of DP MST with the
 *         path property the MST manager created. Userspace cannot change this
 *         property.
 *
 *         In the case of DP MST, the property has the format
 *         ``mst:<parent>-<ports>`` where ``<parent>`` is the KMS object ID of the
 *         parent connector and ``<ports>`` is a hyphen-separated list of DP MST
 *         port numbers. Note, KMS object IDs are not guaranteed to be stable
 *         across reboots.
 * TILE:
 *         Connector tile group property to indicate how a set of DRM connector
 *         compose together into one logical screen. This is used by both high-res
 *         external screens (often only using a single cable, but exposing multiple
 *         DP MST sinks), or high-res integrated panels (like dual-link DSI) which
 *         are not gen-locked. Note that for tiled panels which are genlocked, like
 *         dual-link LVDS or dual-link DSI, the driver should try to not expose the
 *         tiling and virtualise both &drm_crtc and &drm_plane if needed. Drivers
 *         should update this value using drm_connector_set_tile_property().
 *         Userspace cannot change this property.
 * link-status:
 *      Connector link-status property to indicate the status of link. The
 *      default value of link-status is "GOOD". If something fails during or
 *      after modeset, the kernel driver may set this to "BAD" and issue a
 *      hotplug uevent. Drivers should update this value using
 *      drm_connector_set_link_status_property().
 *
 *      When user-space receives the hotplug uevent and detects a "BAD"
 *      link-status, the sink doesn't receive pixels anymore (e.g. the screen
 *      becomes completely black). The list of available modes may have
 *      changed. User-space is expected to pick a new mode if the current one
 *      has disappeared and perform a new modeset with link-status set to
 *      "GOOD" to re-enable the connector.
 *
 *      If multiple connectors share the same CRTC and one of them gets a "BAD"
 *      link-status, the other are unaffected (ie. the sinks still continue to
 *      receive pixels).
 *
 *      When user-space performs an atomic commit on a connector with a "BAD"
 *      link-status without resetting the property to "GOOD", the sink may
 *      still not receive pixels. When user-space performs an atomic commit
 *      which resets the link-status property to "GOOD" without the
 *      ALLOW_MODESET flag set, it might fail because a modeset is required.
 *
 *      User-space can only change link-status to "GOOD", changing it to "BAD"
 *      is a no-op.
 *
 *      For backwards compatibility with non-atomic userspace the kernel
 *      tries to automatically set the link-status back to "GOOD" in the
 *      SETCRTC IOCTL. This might fail if the mode is no longer valid, similar
 *      to how it might fail if a different screen has been connected in the
 *      interim.
 * non_desktop:
 *         Indicates the output should be ignored for purposes of displaying a
 *         standard desktop environment or console. This is most likely because
 *         the output device is not rectilinear.
 * Content Protection:
 *        This property is used by userspace to request the kernel protect future
 *        content communicated over the link. When requested, kernel will apply
 *        the appropriate means of protection (most often HDCP), and use the
 *        property to tell userspace the protection is active.
 *
 *        Drivers can set this up by calling
 *        drm_connector_attach_content_protection_property() on initialization.
 *
 *        The value of this property can be one of the following:
 *
 *        DRM_MODE_CONTENT_PROTECTION_UNDESIRED = 0
 *                The link is not protected, content is transmitted in the clear.
 *        DRM_MODE_CONTENT_PROTECTION_DESIRED = 1
 *                Userspace has requested content protection, but the link is not
 *                currently protected. When in this state, kernel should enable
 *                Content Protection as soon as possible.
 *        DRM_MODE_CONTENT_PROTECTION_ENABLED = 2
 *                Userspace has requested content protection, and the link is
 *                protected. Only the driver can set the property to this value.
 *                If userspace attempts to set to ENABLED, kernel will return
 *                -EINVAL.
 *
 *        A few guidelines:
 *
 *        - DESIRED state should be preserved until userspace de-asserts it by
 *          setting the property to UNDESIRED. This means ENABLED should only
 *          transition to UNDESIRED when the user explicitly requests it.
 *        - If the state is DESIRED, kernel should attempt to re-authenticate the
 *          link whenever possible. This includes across disable/enable, dpms,
 *          hotplug, downstream device changes, link status failures, etc..
 *        - Kernel sends uevent with the connector id and property id through
 *          @drm_hdcp_update_content_protection, upon below kernel triggered
 *          scenarios:
 *
 *                - DESIRED -> ENABLED (authentication success)
 *                - ENABLED -> DESIRED (termination of authentication)
 *        - Please note no uevents for userspace triggered property state changes,
 *          which can't fail such as
 *
 *                - DESIRED/ENABLED -> UNDESIRED
 *                - UNDESIRED -> DESIRED
 *        - Userspace is responsible for polling the property or listen to uevents
 *          to determine when the value transitions from ENABLED to DESIRED.
 *          This signifies the link is no longer protected and userspace should
 *          take appropriate action (whatever that might be).
 *
 * HDCP Content Type:
 *        This Enum property is used by the userspace to declare the content type
 *        of the display stream, to kernel. Here display stream stands for any
 *        display content that userspace intended to display through HDCP
 *        encryption.
 *
 *        Content Type of a stream is decided by the owner of the stream, as
 *        "HDCP Type0" or "HDCP Type1".
 *
 *        The value of the property can be one of the below:
 *          - "HDCP Type0": DRM_MODE_HDCP_CONTENT_TYPE0 = 0
 *          - "HDCP Type1": DRM_MODE_HDCP_CONTENT_TYPE1 = 1
 *
 *        When kernel starts the HDCP authentication (see "Content Protection"
 *        for details), it uses the content type in "HDCP Content Type"
 *        for performing the HDCP authentication with the display sink.
 *
 *        Please note in HDCP spec versions, a link can be authenticated with
 *        HDCP 2.2 for Content Type 0/Content Type 1. Where as a link can be
 *        authenticated with HDCP1.4 only for Content Type 0(though it is implicit
 *        in nature. As there is no reference for Content Type in HDCP1.4).
 *
 *        HDCP2.2 authentication protocol itself takes the "Content Type" as a
 *        parameter, which is a input for the DP HDCP2.2 encryption algo.
 *
 *        In case of Type 0 content protection request, kernel driver can choose
 *        either of HDCP spec versions 1.4 and 2.2. When HDCP2.2 is used for
 *        "HDCP Type 0", a HDCP 2.2 capable repeater in the downstream can send
 *        that content to a HDCP 1.4 authenticated HDCP sink (Type0 link).
 *        But if the content is classified as "HDCP Type 1", above mentioned
 *        HDCP 2.2 repeater wont send the content to the HDCP sink as it can't
 *        authenticate the HDCP1.4 capable sink for "HDCP Type 1".
 *
 *        Please note userspace can be ignorant of the HDCP versions used by the
 *        kernel driver to achieve the "HDCP Content Type".
 *
 *        At current scenario, classifying a content as Type 1 ensures that the
 *        content will be displayed only through the HDCP2.2 encrypted link.
 *
 *        Note that the HDCP Content Type property is introduced at HDCP 2.2, and
 *        defaults to type 0. It is only exposed by drivers supporting HDCP 2.2
 *        (hence supporting Type 0 and Type 1). Based on how next versions of
 *        HDCP specs are defined content Type could be used for higher versions
 *        too.
 *
 *        If content type is changed when "Content Protection" is not UNDESIRED,
 *        then kernel will disable the HDCP and re-enable with new type in the
 *        same atomic commit. And when "Content Protection" is ENABLED, it means
 *        that link is HDCP authenticated and encrypted, for the transmission of
 *        the Type of stream mentioned at "HDCP Content Type".
 *
 * HDR_OUTPUT_METADATA:
 *        Connector property to enable userspace to send HDR Metadata to
 *        driver. This metadata is based on the composition and blending
 *        policies decided by user, taking into account the hardware and
 *        sink capabilities. The driver gets this metadata and creates a
 *        Dynamic Range and Mastering Infoframe (DRM) in case of HDMI,
 *        SDP packet (Non-audio INFOFRAME SDP v1.3) for DP. This is then
 *        sent to sink. This notifies the sink of the upcoming frame's Color
 *        Encoding and Luminance parameters.
 *
 *        Userspace first need to detect the HDR capabilities of sink by
 *        reading and parsing the EDID. Details of HDR metadata for HDMI
 *        are added in CTA 861.G spec. For DP , its defined in VESA DP
 *        Standard v1.4. It needs to then get the metadata information
 *        of the video/game/app content which are encoded in HDR (basically
 *        using HDR transfer functions). With this information it needs to
 *        decide on a blending policy and compose the relevant
 *        layers/overlays into a common format. Once this blending is done,
 *        userspace will be aware of the metadata of the composed frame to
 *        be send to sink. It then uses this property to communicate this
 *        metadata to driver which then make a Infoframe packet and sends
 *        to sink based on the type of encoder connected.
 *
 *        Userspace will be responsible to do Tone mapping operation in case:
 *                - Some layers are HDR and others are SDR
 *                - HDR layers luminance is not same as sink
 *
 *        It will even need to do colorspace conversion and get all layers
 *        to one common colorspace for blending. It can use either GL, Media
 *        or display engine to get this done based on the capabilities of the
 *        associated hardware.
 *
 *        Driver expects metadata to be put in &struct hdr_output_metadata
 *        structure from userspace. This is received as blob and stored in
 *        &drm_connector_state.hdr_output_metadata. It parses EDID and saves the
 *        sink metadata in &struct hdr_sink_metadata, as
 *        &drm_connector.hdr_sink_metadata.  Driver uses
 *        drm_hdmi_infoframe_set_hdr_metadata() helper to set the HDR metadata,
 *        hdmi_drm_infoframe_pack() to pack the infoframe as per spec, in case of
 *        HDMI encoder.
 *
 * max bpc:
 *        This range property is used by userspace to limit the bit depth. When
 *        used the driver would limit the bpc in accordance with the valid range
 *        supported by the hardware and sink. Drivers to use the function
 *        drm_connector_attach_max_bpc_property() to create and attach the
 *        property to the connector during initialization.
 *
 * Connectors also have one standardized atomic property:
 *
 * CRTC_ID:
 *         Mode object ID of the &drm_crtc this connector should be connected to.
 *
 * Connectors for LCD panels may also have one standardized property:
 *
 * panel orientation:
 *        On some devices the LCD panel is mounted in the casing in such a way
 *        that the up/top side of the panel does not match with the top side of
 *        the device. Userspace can use this property to check for this.
 *        Note that input coordinates from touchscreens (input devices with
 *        INPUT_PROP_DIRECT) will still map 1:1 to the actual LCD panel
 *        coordinates, so if userspace rotates the picture to adjust for
 *        the orientation it must also apply the same transformation to the
 *        touchscreen input coordinates. This property is initialized by calling
 *        drm_connector_set_panel_orientation() or
 *        drm_connector_set_panel_orientation_with_quirk()
 *
 * scaling mode:
 *        This property defines how a non-native mode is upscaled to the native
 *        mode of an LCD panel:
 *
 *        None:
 *                No upscaling happens, scaling is left to the panel. Not all
 *                drivers expose this mode.
 *        Full:
 *                The output is upscaled to the full resolution of the panel,
 *                ignoring the aspect ratio.
 *        Center:
 *                No upscaling happens, the output is centered within the native
 *                resolution the panel.
 *        Full aspect:
 *                The output is upscaled to maximize either the width or height
 *                while retaining the aspect ratio.
 *
 *        This property should be set up by calling
 *        drm_connector_attach_scaling_mode_property(). Note that drivers
 *        can also expose this property to external outputs, in which case they
 *        must support "None", which should be the default (since external screens
 *        have a built-in scaler).
 *
 * subconnector:
 *        This property is used by DVI-I, TVout and DisplayPort to indicate different
 *        connector subtypes. Enum values more or less match with those from main
 *        connector types.
 *        For DVI-I and TVout there is also a matching property "select subconnector"
 *        allowing to switch between signal types.
 *        DP subconnector corresponds to a downstream port.
 *
 * privacy-screen sw-state, privacy-screen hw-state:
 *        These 2 optional properties can be used to query the state of the
 *        electronic privacy screen that is available on some displays; and in
 *        some cases also control the state. If a driver implements these
 *        properties then both properties must be present.
 *
 *        "privacy-screen hw-state" is read-only and reflects the actual state
 *        of the privacy-screen, possible values: "Enabled", "Disabled,
 *        "Enabled-locked", "Disabled-locked". The locked states indicate
 *        that the state cannot be changed through the DRM API. E.g. there
 *        might be devices where the firmware-setup options, or a hardware
 *        slider-switch, offer always on / off modes.
 *
 *        "privacy-screen sw-state" can be set to change the privacy-screen state
 *        when not locked. In this case the driver must update the hw-state
 *        property to reflect the new state on completion of the commit of the
 *        sw-state property. Setting the sw-state property when the hw-state is
 *        locked must be interpreted by the driver as a request to change the
 *        state to the set state when the hw-state becomes unlocked. E.g. if
 *        "privacy-screen hw-state" is "Enabled-locked" and the sw-state
 *        gets set to "Disabled" followed by the user unlocking the state by
 *        changing the slider-switch position, then the driver must set the
 *        state to "Disabled" upon receiving the unlock event.
 *
 *        In some cases the privacy-screen's actual state might change outside of
 *        control of the DRM code. E.g. there might be a firmware handled hotkey
 *        which toggles the actual state, or the actual state might be changed
 *        through another userspace API such as writing /proc/acpi/ibm/lcdshadow.
 *        In this case the driver must update both the hw-state and the sw-state
 *        to reflect the new value, overwriting any pending state requests in the
 *        sw-state. Any pending sw-state requests are thus discarded.
 *
 *        Note that the ability for the state to change outside of control of
 *        the DRM master process means that userspace must not cache the value
 *        of the sw-state. Caching the sw-state value and including it in later
 *        atomic commits may lead to overriding a state change done through e.g.
 *        a firmware handled hotkey. Therefor userspace must not include the
 *        privacy-screen sw-state in an atomic commit unless it wants to change
 *        its value.
 *
 * left margin, right margin, top margin, bottom margin:
 *        Add margins to the connector's viewport. This is typically used to
 *        mitigate overscan on TVs.
 *
 *        The value is the size in pixels of the black border which will be
 *        added. The attached CRTC's content will be scaled to fill the whole
 *        area inside the margin.
 *
 *        The margins configuration might be sent to the sink, e.g. via HDMI AVI
 *        InfoFrames.
 *
 *        Drivers can set up these properties by calling
 *        drm_mode_create_tv_margin_properties().
 */

int drm_connector_create_standard_properties(struct drm_device *dev)
{
        struct drm_property *prop;

        prop = drm_property_create(dev, DRM_MODE_PROP_BLOB |
                                   DRM_MODE_PROP_IMMUTABLE,
                                   "EDID", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.edid_property = prop;

        prop = drm_property_create_enum(dev, 0,
                                   "DPMS", drm_dpms_enum_list,
                                   ARRAY_SIZE(drm_dpms_enum_list));
        if (!prop)
                return -ENOMEM;
        dev->mode_config.dpms_property = prop;

        prop = drm_property_create(dev,
                                   DRM_MODE_PROP_BLOB |
                                   DRM_MODE_PROP_IMMUTABLE,
                                   "PATH", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.path_property = prop;

        prop = drm_property_create(dev,
                                   DRM_MODE_PROP_BLOB |
                                   DRM_MODE_PROP_IMMUTABLE,
                                   "TILE", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.tile_property = prop;

        prop = drm_property_create_enum(dev, 0, "link-status",
                                        drm_link_status_enum_list,
                                        ARRAY_SIZE(drm_link_status_enum_list));
        if (!prop)
                return -ENOMEM;
        dev->mode_config.link_status_property = prop;

        prop = drm_property_create_bool(dev, DRM_MODE_PROP_IMMUTABLE, "non-desktop");
        if (!prop)
                return -ENOMEM;
        dev->mode_config.non_desktop_property = prop;

        prop = drm_property_create(dev, DRM_MODE_PROP_BLOB,
                                   "HDR_OUTPUT_METADATA", 0);
        if (!prop)
                return -ENOMEM;
        dev->mode_config.hdr_output_metadata_property = prop;

        return 0;
}

/**
 * drm_mode_create_dvi_i_properties - create DVI-I specific connector properties
 * @dev: DRM device
 *
 * Called by a driver the first time a DVI-I connector is made.
 *
 * Returns: %0
 */
int drm_mode_create_dvi_i_properties(struct drm_device *dev)
{
        struct drm_property *dvi_i_selector;
        struct drm_property *dvi_i_subconnector;

        if (dev->mode_config.dvi_i_select_subconnector_property)
                return 0;

        dvi_i_selector =
                drm_property_create_enum(dev, 0,
                                    "select subconnector",
                                    drm_dvi_i_select_enum_list,
                                    ARRAY_SIZE(drm_dvi_i_select_enum_list));
        dev->mode_config.dvi_i_select_subconnector_property = dvi_i_selector;

        dvi_i_subconnector = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
                                    "subconnector",
                                    drm_dvi_i_subconnector_enum_list,
                                    ARRAY_SIZE(drm_dvi_i_subconnector_enum_list));
        dev->mode_config.dvi_i_subconnector_property = dvi_i_subconnector;

        return 0;
}
EXPORT_SYMBOL(drm_mode_create_dvi_i_properties);

/**
 * drm_connector_attach_dp_subconnector_property - create subconnector property for DP
 * @connector: drm_connector to attach property
 *
 * Called by a driver when DP connector is created.
 */
void drm_connector_attach_dp_subconnector_property(struct drm_connector *connector)
{
        struct drm_mode_config *mode_config = &connector->dev->mode_config;

        if (!mode_config->dp_subconnector_property)
                mode_config->dp_subconnector_property =
                        drm_property_create_enum(connector->dev,
                                DRM_MODE_PROP_IMMUTABLE,
                                "subconnector",
                                drm_dp_subconnector_enum_list,
                                ARRAY_SIZE(drm_dp_subconnector_enum_list));

        drm_object_attach_property(&connector->base,
                                   mode_config->dp_subconnector_property,
                                   DRM_MODE_SUBCONNECTOR_Unknown);
}
EXPORT_SYMBOL(drm_connector_attach_dp_subconnector_property);

/**
 * DOC: HDMI connector properties
 *
 * content type (HDMI specific):
 *        Indicates content type setting to be used in HDMI infoframes to indicate
 *        content type for the external device, so that it adjusts its display
 *        settings accordingly.
 *
 *        The value of this property can be one of the following:
 *
 *        No Data:
 *                Content type is unknown
 *        Graphics:
 *                Content type is graphics
 *        Photo:
 *                Content type is photo
 *        Cinema:
 *                Content type is cinema
 *        Game:
 *                Content type is game
 *
 *        The meaning of each content type is defined in CTA-861-G table 15.
 *
 *        Drivers can set up this property by calling
 *        drm_connector_attach_content_type_property(). Decoding to
 *        infoframe values is done through drm_hdmi_avi_infoframe_content_type().
 */

/*
 * TODO: Document the properties:
 *   - brightness
 *   - contrast
 *   - flicker reduction
 *   - hue
 *   - mode
 *   - overscan
 *   - saturation
 *   - select subconnector
 */
/**
 * DOC: Analog TV Connector Properties
 *
 * TV Mode:
 *        Indicates the TV Mode used on an analog TV connector. The value
 *        of this property can be one of the following:
 *
 *        NTSC:
 *                TV Mode is CCIR System M (aka 525-lines) together with
 *                the NTSC Color Encoding.
 *
 *        NTSC-443:
 *
 *                TV Mode is CCIR System M (aka 525-lines) together with
 *                the NTSC Color Encoding, but with a color subcarrier
 *                frequency of 4.43MHz
 *
 *        NTSC-J:
 *
 *                TV Mode is CCIR System M (aka 525-lines) together with
 *                the NTSC Color Encoding, but with a black level equal to
 *                the blanking level.
 *
 *        PAL:
 *
 *                TV Mode is CCIR System B (aka 625-lines) together with
 *                the PAL Color Encoding.
 *
 *        PAL-M:
 *
 *                TV Mode is CCIR System M (aka 525-lines) together with
 *                the PAL Color Encoding.
 *
 *        PAL-N:
 *
 *                TV Mode is CCIR System N together with the PAL Color
 *                Encoding, a color subcarrier frequency of 3.58MHz, the
 *                SECAM color space, and narrower channels than other PAL
 *                variants.
 *
 *        SECAM:
 *
 *                TV Mode is CCIR System B (aka 625-lines) together with
 *                the SECAM Color Encoding.
 *
 *        Drivers can set up this property by calling
 *        drm_mode_create_tv_properties().
 */

/**
 * drm_connector_attach_content_type_property - attach content-type property
 * @connector: connector to attach content type property on.
 *
 * Called by a driver the first time a HDMI connector is made.
 *
 * Returns: %0
 */
int drm_connector_attach_content_type_property(struct drm_connector *connector)
{
        if (!drm_mode_create_content_type_property(connector->dev))
                drm_object_attach_property(&connector->base,
                                           connector->dev->mode_config.content_type_property,
                                           DRM_MODE_CONTENT_TYPE_NO_DATA);
        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_content_type_property);

/**
 * drm_connector_attach_tv_margin_properties - attach TV connector margin
 *         properties
 * @connector: DRM connector
 *
 * Called by a driver when it needs to attach TV margin props to a connector.
 * Typically used on SDTV and HDMI connectors.
 */
void drm_connector_attach_tv_margin_properties(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;

        drm_object_attach_property(&connector->base,
                                   dev->mode_config.tv_left_margin_property,
                                   0);
        drm_object_attach_property(&connector->base,
                                   dev->mode_config.tv_right_margin_property,
                                   0);
        drm_object_attach_property(&connector->base,
                                   dev->mode_config.tv_top_margin_property,
                                   0);
        drm_object_attach_property(&connector->base,
                                   dev->mode_config.tv_bottom_margin_property,
                                   0);
}
EXPORT_SYMBOL(drm_connector_attach_tv_margin_properties);

/**
 * drm_mode_create_tv_margin_properties - create TV connector margin properties
 * @dev: DRM device
 *
 * Called by a driver's HDMI connector initialization routine, this function
 * creates the TV margin properties for a given device. No need to call this
 * function for an SDTV connector, it's already called from
 * drm_mode_create_tv_properties_legacy().
 *
 * Returns:
 * 0 on success or a negative error code on failure.
 */
int drm_mode_create_tv_margin_properties(struct drm_device *dev)
{
        if (dev->mode_config.tv_left_margin_property)
                return 0;

        dev->mode_config.tv_left_margin_property =
                drm_property_create_range(dev, 0, "left margin", 0, 100);
        if (!dev->mode_config.tv_left_margin_property)
                return -ENOMEM;

        dev->mode_config.tv_right_margin_property =
                drm_property_create_range(dev, 0, "right margin", 0, 100);
        if (!dev->mode_config.tv_right_margin_property)
                return -ENOMEM;

        dev->mode_config.tv_top_margin_property =
                drm_property_create_range(dev, 0, "top margin", 0, 100);
        if (!dev->mode_config.tv_top_margin_property)
                return -ENOMEM;

        dev->mode_config.tv_bottom_margin_property =
                drm_property_create_range(dev, 0, "bottom margin", 0, 100);
        if (!dev->mode_config.tv_bottom_margin_property)
                return -ENOMEM;

        return 0;
}
EXPORT_SYMBOL(drm_mode_create_tv_margin_properties);

/**
 * drm_mode_create_tv_properties_legacy - create TV specific connector properties
 * @dev: DRM device
 * @num_modes: number of different TV formats (modes) supported
 * @modes: array of pointers to strings containing name of each format
 *
 * Called by a driver's TV initialization routine, this function creates
 * the TV specific connector properties for a given device.  Caller is
 * responsible for allocating a list of format names and passing them to
 * this routine.
 *
 * NOTE: This functions registers the deprecated "mode" connector
 * property to select the analog TV mode (ie, NTSC, PAL, etc.). New
 * drivers must use drm_mode_create_tv_properties() instead.
 *
 * Returns:
 * 0 on success or a negative error code on failure.
 */
int drm_mode_create_tv_properties_legacy(struct drm_device *dev,
                                         unsigned int num_modes,
                                         const char * const modes[])
{
        struct drm_property *tv_selector;
        struct drm_property *tv_subconnector;
        unsigned int i;

        if (dev->mode_config.tv_select_subconnector_property)
                return 0;

        /*
         * Basic connector properties
         */
        tv_selector = drm_property_create_enum(dev, 0,
                                          "select subconnector",
                                          drm_tv_select_enum_list,
                                          ARRAY_SIZE(drm_tv_select_enum_list));
        if (!tv_selector)
                goto nomem;

        dev->mode_config.tv_select_subconnector_property = tv_selector;

        tv_subconnector =
                drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
                                    "subconnector",
                                    drm_tv_subconnector_enum_list,
                                    ARRAY_SIZE(drm_tv_subconnector_enum_list));
        if (!tv_subconnector)
                goto nomem;
        dev->mode_config.tv_subconnector_property = tv_subconnector;

        /*
         * Other, TV specific properties: margins & TV modes.
         */
        if (drm_mode_create_tv_margin_properties(dev))
                goto nomem;

        if (num_modes) {
                dev->mode_config.legacy_tv_mode_property =
                        drm_property_create(dev, DRM_MODE_PROP_ENUM,
                                            "mode", num_modes);
                if (!dev->mode_config.legacy_tv_mode_property)
                        goto nomem;

                for (i = 0; i < num_modes; i++)
                        drm_property_add_enum(dev->mode_config.legacy_tv_mode_property,
                                              i, modes[i]);
        }

        dev->mode_config.tv_brightness_property =
                drm_property_create_range(dev, 0, "brightness", 0, 100);
        if (!dev->mode_config.tv_brightness_property)
                goto nomem;

        dev->mode_config.tv_contrast_property =
                drm_property_create_range(dev, 0, "contrast", 0, 100);
        if (!dev->mode_config.tv_contrast_property)
                goto nomem;

        dev->mode_config.tv_flicker_reduction_property =
                drm_property_create_range(dev, 0, "flicker reduction", 0, 100);
        if (!dev->mode_config.tv_flicker_reduction_property)
                goto nomem;

        dev->mode_config.tv_overscan_property =
                drm_property_create_range(dev, 0, "overscan", 0, 100);
        if (!dev->mode_config.tv_overscan_property)
                goto nomem;

        dev->mode_config.tv_saturation_property =
                drm_property_create_range(dev, 0, "saturation", 0, 100);
        if (!dev->mode_config.tv_saturation_property)
                goto nomem;

        dev->mode_config.tv_hue_property =
                drm_property_create_range(dev, 0, "hue", 0, 100);
        if (!dev->mode_config.tv_hue_property)
                goto nomem;

        return 0;
nomem:
        return -ENOMEM;
}
EXPORT_SYMBOL(drm_mode_create_tv_properties_legacy);

/**
 * drm_mode_create_tv_properties - create TV specific connector properties
 * @dev: DRM device
 * @supported_tv_modes: Bitmask of TV modes supported (See DRM_MODE_TV_MODE_*)
 *
 * Called by a driver's TV initialization routine, this function creates
 * the TV specific connector properties for a given device.
 *
 * Returns:
 * 0 on success or a negative error code on failure.
 */
int drm_mode_create_tv_properties(struct drm_device *dev,
                                  unsigned int supported_tv_modes)
{
        struct drm_prop_enum_list tv_mode_list[DRM_MODE_TV_MODE_MAX];
        struct drm_property *tv_mode;
        unsigned int i, len = 0;

        if (dev->mode_config.tv_mode_property)
                return 0;

        for (i = 0; i < DRM_MODE_TV_MODE_MAX; i++) {
                if (!(supported_tv_modes & BIT(i)))
                        continue;

                tv_mode_list[len].type = i;
                tv_mode_list[len].name = drm_get_tv_mode_name(i);
                len++;
        }

        tv_mode = drm_property_create_enum(dev, 0, "TV mode",
                                           tv_mode_list, len);
        if (!tv_mode)
                return -ENOMEM;

        dev->mode_config.tv_mode_property = tv_mode;

        return drm_mode_create_tv_properties_legacy(dev, 0, NULL);
}
EXPORT_SYMBOL(drm_mode_create_tv_properties);

/**
 * drm_mode_create_scaling_mode_property - create scaling mode property
 * @dev: DRM device
 *
 * Called by a driver the first time it's needed, must be attached to desired
 * connectors.
 *
 * Atomic drivers should use drm_connector_attach_scaling_mode_property()
 * instead to correctly assign &drm_connector_state.scaling_mode
 * in the atomic state.
 *
 * Returns: %0
 */
int drm_mode_create_scaling_mode_property(struct drm_device *dev)
{
        struct drm_property *scaling_mode;

        if (dev->mode_config.scaling_mode_property)
                return 0;

        scaling_mode =
                drm_property_create_enum(dev, 0, "scaling mode",
                                drm_scaling_mode_enum_list,
                                    ARRAY_SIZE(drm_scaling_mode_enum_list));

        dev->mode_config.scaling_mode_property = scaling_mode;

        return 0;
}
EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);

/**
 * DOC: Variable refresh properties
 *
 * Variable refresh rate capable displays can dynamically adjust their
 * refresh rate by extending the duration of their vertical front porch
 * until page flip or timeout occurs. This can reduce or remove stuttering
 * and latency in scenarios where the page flip does not align with the
 * vblank interval.
 *
 * An example scenario would be an application flipping at a constant rate
 * of 48Hz on a 60Hz display. The page flip will frequently miss the vblank
 * interval and the same contents will be displayed twice. This can be
 * observed as stuttering for content with motion.
 *
 * If variable refresh rate was active on a display that supported a
 * variable refresh range from 35Hz to 60Hz no stuttering would be observable
 * for the example scenario. The minimum supported variable refresh rate of
 * 35Hz is below the page flip frequency and the vertical front porch can
 * be extended until the page flip occurs. The vblank interval will be
 * directly aligned to the page flip rate.
 *
 * Not all userspace content is suitable for use with variable refresh rate.
 * Large and frequent changes in vertical front porch duration may worsen
 * perceived stuttering for input sensitive applications.
 *
 * Panel brightness will also vary with vertical front porch duration. Some
 * panels may have noticeable differences in brightness between the minimum
 * vertical front porch duration and the maximum vertical front porch duration.
 * Large and frequent changes in vertical front porch duration may produce
 * observable flickering for such panels.
 *
 * Userspace control for variable refresh rate is supported via properties
 * on the &drm_connector and &drm_crtc objects.
 *
 * "vrr_capable":
 *        Optional &drm_connector boolean property that drivers should attach
 *        with drm_connector_attach_vrr_capable_property() on connectors that
 *        could support variable refresh rates. Drivers should update the
 *        property value by calling drm_connector_set_vrr_capable_property().
 *
 *        Absence of the property should indicate absence of support.
 *
 * "VRR_ENABLED":
 *        Default &drm_crtc boolean property that notifies the driver that the
 *        content on the CRTC is suitable for variable refresh rate presentation.
 *        The driver will take this property as a hint to enable variable
 *        refresh rate support if the receiver supports it, ie. if the
 *        "vrr_capable" property is true on the &drm_connector object. The
 *        vertical front porch duration will be extended until page-flip or
 *        timeout when enabled.
 *
 *        The minimum vertical front porch duration is defined as the vertical
 *        front porch duration for the current mode.
 *
 *        The maximum vertical front porch duration is greater than or equal to
 *        the minimum vertical front porch duration. The duration is derived
 *        from the minimum supported variable refresh rate for the connector.
 *
 *        The driver may place further restrictions within these minimum
 *        and maximum bounds.
 */

/**
 * drm_connector_attach_vrr_capable_property - creates the
 * vrr_capable property
 * @connector: connector to create the vrr_capable property on.
 *
 * This is used by atomic drivers to add support for querying
 * variable refresh rate capability for a connector.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_vrr_capable_property(
        struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct drm_property *prop;

        if (!connector->vrr_capable_property) {
                prop = drm_property_create_bool(dev, DRM_MODE_PROP_IMMUTABLE,
                        "vrr_capable");
                if (!prop)
                        return -ENOMEM;

                connector->vrr_capable_property = prop;
                drm_object_attach_property(&connector->base, prop, 0);
        }

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_vrr_capable_property);

/**
 * drm_connector_attach_scaling_mode_property - attach atomic scaling mode property
 * @connector: connector to attach scaling mode property on.
 * @scaling_mode_mask: or'ed mask of BIT(%DRM_MODE_SCALE_\*).
 *
 * This is used to add support for scaling mode to atomic drivers.
 * The scaling mode will be set to &drm_connector_state.scaling_mode
 * and can be used from &drm_connector_helper_funcs->atomic_check for validation.
 *
 * This is the atomic version of drm_mode_create_scaling_mode_property().
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
                                               u32 scaling_mode_mask)
{
        struct drm_device *dev = connector->dev;
        struct drm_property *scaling_mode_property;
        int i;
        const unsigned valid_scaling_mode_mask =
                (1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1;

        if (WARN_ON(hweight32(scaling_mode_mask) < 2 ||
                    scaling_mode_mask & ~valid_scaling_mode_mask))
                return -EINVAL;

        scaling_mode_property =
                drm_property_create(dev, DRM_MODE_PROP_ENUM, "scaling mode",
                                    hweight32(scaling_mode_mask));

        if (!scaling_mode_property)
                return -ENOMEM;

        for (i = 0; i < ARRAY_SIZE(drm_scaling_mode_enum_list); i++) {
                int ret;

                if (!(BIT(i) & scaling_mode_mask))
                        continue;

                ret = drm_property_add_enum(scaling_mode_property,
                                            drm_scaling_mode_enum_list[i].type,
                                            drm_scaling_mode_enum_list[i].name);

                if (ret) {
                        drm_property_destroy(dev, scaling_mode_property);

                        return ret;
                }
        }

        drm_object_attach_property(&connector->base,
                                   scaling_mode_property, 0);

        connector->scaling_mode_property = scaling_mode_property;

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_scaling_mode_property);

/**
 * drm_mode_create_aspect_ratio_property - create aspect ratio property
 * @dev: DRM device
 *
 * Called by a driver the first time it's needed, must be attached to desired
 * connectors.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_create_aspect_ratio_property(struct drm_device *dev)
{
        if (dev->mode_config.aspect_ratio_property)
                return 0;

        dev->mode_config.aspect_ratio_property =
                drm_property_create_enum(dev, 0, "aspect ratio",
                                drm_aspect_ratio_enum_list,
                                ARRAY_SIZE(drm_aspect_ratio_enum_list));

        if (dev->mode_config.aspect_ratio_property == NULL)
                return -ENOMEM;

        return 0;
}
EXPORT_SYMBOL(drm_mode_create_aspect_ratio_property);

/**
 * DOC: standard connector properties
 *
 * Colorspace:
 *     This property helps select a suitable colorspace based on the sink
 *     capability. Modern sink devices support wider gamut like BT2020.
 *     This helps switch to BT2020 mode if the BT2020 encoded video stream
 *     is being played by the user, same for any other colorspace. Thereby
 *     giving a good visual experience to users.
 *
 *     The expectation from userspace is that it should parse the EDID
 *     and get supported colorspaces. Use this property and switch to the
 *     one supported. Sink supported colorspaces should be retrieved by
 *     userspace from EDID and driver will not explicitly expose them.
 *
 *     Basically the expectation from userspace is:
 *      - Set up CRTC DEGAMMA/CTM/GAMMA to convert to some sink
 *        colorspace
 *      - Set this new property to let the sink know what it
 *        converted the CRTC output to.
 *      - This property is just to inform sink what colorspace
 *        source is trying to drive.
 *
 * Because between HDMI and DP have different colorspaces,
 * drm_mode_create_hdmi_colorspace_property() is used for HDMI connector and
 * drm_mode_create_dp_colorspace_property() is used for DP connector.
 */

static int drm_mode_create_colorspace_property(struct drm_connector *connector,
                                        u32 supported_colorspaces)
{
        struct drm_device *dev = connector->dev;
        u32 colorspaces = supported_colorspaces | BIT(DRM_MODE_COLORIMETRY_DEFAULT);
        struct drm_prop_enum_list enum_list[DRM_MODE_COLORIMETRY_COUNT];
        int i, len;

        if (connector->colorspace_property)
                return 0;

        if (!supported_colorspaces) {
                drm_err(dev, "No supported colorspaces provded on [CONNECTOR:%d:%s]\n",
                            connector->base.id, connector->name);
                return -EINVAL;
        }

        if ((supported_colorspaces & -BIT(DRM_MODE_COLORIMETRY_COUNT)) != 0) {
                drm_err(dev, "Unknown colorspace provded on [CONNECTOR:%d:%s]\n",
                            connector->base.id, connector->name);
                return -EINVAL;
        }

        len = 0;
        for (i = 0; i < DRM_MODE_COLORIMETRY_COUNT; i++) {
                if ((colorspaces & BIT(i)) == 0)
                        continue;

                enum_list[len].type = i;
                enum_list[len].name = colorspace_names[i];
                len++;
        }

        connector->colorspace_property =
                drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, "Colorspace",
                                        enum_list,
                                        len);

        if (!connector->colorspace_property)
                return -ENOMEM;

        return 0;
}

/**
 * drm_mode_create_hdmi_colorspace_property - create hdmi colorspace property
 * @connector: connector to create the Colorspace property on.
 * @supported_colorspaces: bitmap of supported color spaces
 *
 * Called by a driver the first time it's needed, must be attached to desired
 * HDMI connectors.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector,
                                             u32 supported_colorspaces)
{
        u32 colorspaces;

        if (supported_colorspaces)
                colorspaces = supported_colorspaces & hdmi_colorspaces;
        else
                colorspaces = hdmi_colorspaces;

        return drm_mode_create_colorspace_property(connector, colorspaces);
}
EXPORT_SYMBOL(drm_mode_create_hdmi_colorspace_property);

/**
 * drm_mode_create_dp_colorspace_property - create dp colorspace property
 * @connector: connector to create the Colorspace property on.
 * @supported_colorspaces: bitmap of supported color spaces
 *
 * Called by a driver the first time it's needed, must be attached to desired
 * DP connectors.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_create_dp_colorspace_property(struct drm_connector *connector,
                                           u32 supported_colorspaces)
{
        u32 colorspaces;

        if (supported_colorspaces)
                colorspaces = supported_colorspaces & dp_colorspaces;
        else
                colorspaces = dp_colorspaces;

        return drm_mode_create_colorspace_property(connector, colorspaces);
}
EXPORT_SYMBOL(drm_mode_create_dp_colorspace_property);

/**
 * drm_mode_create_content_type_property - create content type property
 * @dev: DRM device
 *
 * Called by a driver the first time it's needed, must be attached to desired
 * connectors.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_mode_create_content_type_property(struct drm_device *dev)
{
        if (dev->mode_config.content_type_property)
                return 0;

        dev->mode_config.content_type_property =
                drm_property_create_enum(dev, 0, "content type",
                                         drm_content_type_enum_list,
                                         ARRAY_SIZE(drm_content_type_enum_list));

        if (dev->mode_config.content_type_property == NULL)
                return -ENOMEM;

        return 0;
}
EXPORT_SYMBOL(drm_mode_create_content_type_property);

/**
 * drm_mode_create_suggested_offset_properties - create suggests offset properties
 * @dev: DRM device
 *
 * Create the suggested x/y offset property for connectors.
 *
 * Returns:
 * 0 on success or a negative error code on failure.
 */
int drm_mode_create_suggested_offset_properties(struct drm_device *dev)
{
        if (dev->mode_config.suggested_x_property && dev->mode_config.suggested_y_property)
                return 0;

        dev->mode_config.suggested_x_property =
                drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE, "suggested X", 0, 0xffffffff);

        dev->mode_config.suggested_y_property =
                drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE, "suggested Y", 0, 0xffffffff);

        if (dev->mode_config.suggested_x_property == NULL ||
            dev->mode_config.suggested_y_property == NULL)
                return -ENOMEM;
        return 0;
}
EXPORT_SYMBOL(drm_mode_create_suggested_offset_properties);

/**
 * drm_connector_set_path_property - set tile property on connector
 * @connector: connector to set property on.
 * @path: path to use for property; must not be NULL.
 *
 * This creates a property to expose to userspace to specify a
 * connector path. This is mainly used for DisplayPort MST where
 * connectors have a topology and we want to allow userspace to give
 * them more meaningful names.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_set_path_property(struct drm_connector *connector,
                                    const char *path)
{
        struct drm_device *dev = connector->dev;
        int ret;

        ret = drm_property_replace_global_blob(dev,
                                               &connector->path_blob_ptr,
                                               strlen(path) + 1,
                                               path,
                                               &connector->base,
                                               dev->mode_config.path_property);
        return ret;
}
EXPORT_SYMBOL(drm_connector_set_path_property);

/**
 * drm_connector_set_tile_property - set tile property on connector
 * @connector: connector to set property on.
 *
 * This looks up the tile information for a connector, and creates a
 * property for userspace to parse if it exists. The property is of
 * the form of 8 integers using ':' as a separator.
 * This is used for dual port tiled displays with DisplayPort SST
 * or DisplayPort MST connectors.
 *
 * Returns:
 * Zero on success, errno on failure.
 */
int drm_connector_set_tile_property(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        char tile[256];
        int ret;

        if (!connector->has_tile) {
                ret  = drm_property_replace_global_blob(dev,
                                                        &connector->tile_blob_ptr,
                                                        0,
                                                        NULL,
                                                        &connector->base,
                                                        dev->mode_config.tile_property);
                return ret;
        }

        snprintf(tile, 256, "%d:%d:%d:%d:%d:%d:%d:%d",
                 connector->tile_group->id, connector->tile_is_single_monitor,
                 connector->num_h_tile, connector->num_v_tile,
                 connector->tile_h_loc, connector->tile_v_loc,
                 connector->tile_h_size, connector->tile_v_size);

        ret = drm_property_replace_global_blob(dev,
                                               &connector->tile_blob_ptr,
                                               strlen(tile) + 1,
                                               tile,
                                               &connector->base,
                                               dev->mode_config.tile_property);
        return ret;
}
EXPORT_SYMBOL(drm_connector_set_tile_property);

/**
 * drm_connector_set_link_status_property - Set link status property of a connector
 * @connector: drm connector
 * @link_status: new value of link status property (0: Good, 1: Bad)
 *
 * In usual working scenario, this link status property will always be set to
 * "GOOD". If something fails during or after a mode set, the kernel driver
 * may set this link status property to "BAD". The caller then needs to send a
 * hotplug uevent for userspace to re-check the valid modes through
 * GET_CONNECTOR_IOCTL and retry modeset.
 *
 * Note: Drivers cannot rely on userspace to support this property and
 * issue a modeset. As such, they may choose to handle issues (like
 * re-training a link) without userspace's intervention.
 *
 * The reason for adding this property is to handle link training failures, but
 * it is not limited to DP or link training. For example, if we implement
 * asynchronous setcrtc, this property can be used to report any failures in that.
 */
void drm_connector_set_link_status_property(struct drm_connector *connector,
                                            uint64_t link_status)
{
        struct drm_device *dev = connector->dev;

        drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
        connector->state->link_status = link_status;
        drm_modeset_unlock(&dev->mode_config.connection_mutex);
}
EXPORT_SYMBOL(drm_connector_set_link_status_property);

/**
 * drm_connector_attach_max_bpc_property - attach "max bpc" property
 * @connector: connector to attach max bpc property on.
 * @min: The minimum bit depth supported by the connector.
 * @max: The maximum bit depth supported by the connector.
 *
 * This is used to add support for limiting the bit depth on a connector.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_max_bpc_property(struct drm_connector *connector,
                                          int min, int max)
{
        struct drm_device *dev = connector->dev;
        struct drm_property *prop;

        prop = connector->max_bpc_property;
        if (!prop) {
                prop = drm_property_create_range(dev, 0, "max bpc", min, max);
                if (!prop)
                        return -ENOMEM;

                connector->max_bpc_property = prop;
        }

        drm_object_attach_property(&connector->base, prop, max);
        connector->state->max_requested_bpc = max;
        connector->state->max_bpc = max;

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_max_bpc_property);

/**
 * drm_connector_attach_hdr_output_metadata_property - attach "HDR_OUTPUT_METADA" property
 * @connector: connector to attach the property on.
 *
 * This is used to allow the userspace to send HDR Metadata to the
 * driver.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct drm_property *prop = dev->mode_config.hdr_output_metadata_property;

        drm_object_attach_property(&connector->base, prop, 0);

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_hdr_output_metadata_property);

/**
 * drm_connector_attach_colorspace_property - attach "Colorspace" property
 * @connector: connector to attach the property on.
 *
 * This is used to allow the userspace to signal the output colorspace
 * to the driver.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_attach_colorspace_property(struct drm_connector *connector)
{
        struct drm_property *prop = connector->colorspace_property;

        drm_object_attach_property(&connector->base, prop, DRM_MODE_COLORIMETRY_DEFAULT);

        return 0;
}
EXPORT_SYMBOL(drm_connector_attach_colorspace_property);

/**
 * drm_connector_atomic_hdr_metadata_equal - checks if the hdr metadata changed
 * @old_state: old connector state to compare
 * @new_state: new connector state to compare
 *
 * This is used by HDR-enabled drivers to test whether the HDR metadata
 * have changed between two different connector state (and thus probably
 * requires a full blown mode change).
 *
 * Returns:
 * True if the metadata are equal, False otherwise
 */
bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state,
                                             struct drm_connector_state *new_state)
{
        struct drm_property_blob *old_blob = old_state->hdr_output_metadata;
        struct drm_property_blob *new_blob = new_state->hdr_output_metadata;

        if (!old_blob || !new_blob)
                return old_blob == new_blob;

        if (old_blob->length != new_blob->length)
                return false;

        return !memcmp(old_blob->data, new_blob->data, old_blob->length);
}
EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal);

/**
 * drm_connector_set_vrr_capable_property - sets the variable refresh rate
 * capable property for a connector
 * @connector: drm connector
 * @capable: True if the connector is variable refresh rate capable
 *
 * Should be used by atomic drivers to update the indicated support for
 * variable refresh rate over a connector.
 */
void drm_connector_set_vrr_capable_property(
                struct drm_connector *connector, bool capable)
{
        if (!connector->vrr_capable_property)
                return;

        drm_object_property_set_value(&connector->base,
                                      connector->vrr_capable_property,
                                      capable);
}
EXPORT_SYMBOL(drm_connector_set_vrr_capable_property);

/**
 * drm_connector_set_panel_orientation - sets the connector's panel_orientation
 * @connector: connector for which to set the panel-orientation property.
 * @panel_orientation: drm_panel_orientation value to set
 *
 * This function sets the connector's panel_orientation and attaches
 * a "panel orientation" property to the connector.
 *
 * Calling this function on a connector where the panel_orientation has
 * already been set is a no-op (e.g. the orientation has been overridden with
 * a kernel commandline option).
 *
 * It is allowed to call this function with a panel_orientation of
 * DRM_MODE_PANEL_ORIENTATION_UNKNOWN, in which case it is a no-op.
 *
 * The function shouldn't be called in panel after drm is registered (i.e.
 * drm_dev_register() is called in drm).
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_set_panel_orientation(
        struct drm_connector *connector,
        enum drm_panel_orientation panel_orientation)
{
        struct drm_device *dev = connector->dev;
        struct drm_display_info *info = &connector->display_info;
        struct drm_property *prop;

        /* Already set? */
        if (info->panel_orientation != DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
                return 0;

        /* Don't attach the property if the orientation is unknown */
        if (panel_orientation == DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
                return 0;

        info->panel_orientation = panel_orientation;

        prop = dev->mode_config.panel_orientation_property;
        if (!prop) {
                prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE,
                                "panel orientation",
                                drm_panel_orientation_enum_list,
                                ARRAY_SIZE(drm_panel_orientation_enum_list));
                if (!prop)
                        return -ENOMEM;

                dev->mode_config.panel_orientation_property = prop;
        }

        drm_object_attach_property(&connector->base, prop,
                                   info->panel_orientation);
        return 0;
}
EXPORT_SYMBOL(drm_connector_set_panel_orientation);

/**
 * drm_connector_set_panel_orientation_with_quirk - set the
 *        connector's panel_orientation after checking for quirks
 * @connector: connector for which to init the panel-orientation property.
 * @panel_orientation: drm_panel_orientation value to set
 * @width: width in pixels of the panel, used for panel quirk detection
 * @height: height in pixels of the panel, used for panel quirk detection
 *
 * Like drm_connector_set_panel_orientation(), but with a check for platform
 * specific (e.g. DMI based) quirks overriding the passed in panel_orientation.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_set_panel_orientation_with_quirk(
        struct drm_connector *connector,
        enum drm_panel_orientation panel_orientation,
        int width, int height)
{
        int orientation_quirk;

        orientation_quirk = drm_get_panel_orientation_quirk(width, height);
        if (orientation_quirk != DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
                panel_orientation = orientation_quirk;

        return drm_connector_set_panel_orientation(connector,
                                                   panel_orientation);
}
EXPORT_SYMBOL(drm_connector_set_panel_orientation_with_quirk);

/**
 * drm_connector_set_orientation_from_panel -
 *        set the connector's panel_orientation from panel's callback.
 * @connector: connector for which to init the panel-orientation property.
 * @panel: panel that can provide orientation information.
 *
 * Drm drivers should call this function before drm_dev_register().
 * Orientation is obtained from panel's .get_orientation() callback.
 *
 * Returns:
 * Zero on success, negative errno on failure.
 */
int drm_connector_set_orientation_from_panel(
        struct drm_connector *connector,
        struct drm_panel *panel)
{
        enum drm_panel_orientation orientation;

        if (panel && panel->funcs && panel->funcs->get_orientation)
                orientation = panel->funcs->get_orientation(panel);
        else
                orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN;

        return drm_connector_set_panel_orientation(connector, orientation);
}
EXPORT_SYMBOL(drm_connector_set_orientation_from_panel);

static const struct drm_prop_enum_list privacy_screen_enum[] = {
        { PRIVACY_SCREEN_DISABLED,                "Disabled" },
        { PRIVACY_SCREEN_ENABLED,                "Enabled" },
        { PRIVACY_SCREEN_DISABLED_LOCKED,        "Disabled-locked" },
        { PRIVACY_SCREEN_ENABLED_LOCKED,        "Enabled-locked" },
};

/**
 * drm_connector_create_privacy_screen_properties - create the drm connecter's
 *    privacy-screen properties.
 * @connector: connector for which to create the privacy-screen properties
 *
 * This function creates the "privacy-screen sw-state" and "privacy-screen
 * hw-state" properties for the connector. They are not attached.
 */
void
drm_connector_create_privacy_screen_properties(struct drm_connector *connector)
{
        if (connector->privacy_screen_sw_state_property)
                return;

        /* Note sw-state only supports the first 2 values of the enum */
        connector->privacy_screen_sw_state_property =
                drm_property_create_enum(connector->dev, DRM_MODE_PROP_ENUM,
                                "privacy-screen sw-state",
                                privacy_screen_enum, 2);

        connector->privacy_screen_hw_state_property =
                drm_property_create_enum(connector->dev,
                                DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_ENUM,
                                "privacy-screen hw-state",
                                privacy_screen_enum,
                                ARRAY_SIZE(privacy_screen_enum));
}
EXPORT_SYMBOL(drm_connector_create_privacy_screen_properties);

/**
 * drm_connector_attach_privacy_screen_properties - attach the drm connecter's
 *    privacy-screen properties.
 * @connector: connector on which to attach the privacy-screen properties
 *
 * This function attaches the "privacy-screen sw-state" and "privacy-screen
 * hw-state" properties to the connector. The initial state of both is set
 * to "Disabled".
 */
void
drm_connector_attach_privacy_screen_properties(struct drm_connector *connector)
{
        if (!connector->privacy_screen_sw_state_property)
                return;

        drm_object_attach_property(&connector->base,
                                   connector->privacy_screen_sw_state_property,
                                   PRIVACY_SCREEN_DISABLED);

        drm_object_attach_property(&connector->base,
                                   connector->privacy_screen_hw_state_property,
                                   PRIVACY_SCREEN_DISABLED);
}
EXPORT_SYMBOL(drm_connector_attach_privacy_screen_properties);

static void drm_connector_update_privacy_screen_properties(
        struct drm_connector *connector, bool set_sw_state)
{
        enum drm_privacy_screen_status sw_state, hw_state;

        drm_privacy_screen_get_state(connector->privacy_screen,
                                     &sw_state, &hw_state);

        if (set_sw_state)
                connector->state->privacy_screen_sw_state = sw_state;
        drm_object_property_set_value(&connector->base,
                        connector->privacy_screen_hw_state_property, hw_state);
}

static int drm_connector_privacy_screen_notifier(
        struct notifier_block *nb, unsigned long action, void *data)
{
        struct drm_connector *connector =
                container_of(nb, struct drm_connector, privacy_screen_notifier);
        struct drm_device *dev = connector->dev;

        drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
        drm_connector_update_privacy_screen_properties(connector, true);
        drm_modeset_unlock(&dev->mode_config.connection_mutex);

        drm_sysfs_connector_property_event(connector,
                                           connector->privacy_screen_sw_state_property);
        drm_sysfs_connector_property_event(connector,
                                           connector->privacy_screen_hw_state_property);

        return NOTIFY_DONE;
}

/**
 * drm_connector_attach_privacy_screen_provider - attach a privacy-screen to
 *    the connector
 * @connector: connector to attach the privacy-screen to
 * @priv: drm_privacy_screen to attach
 *
 * Create and attach the standard privacy-screen properties and register
 * a generic notifier for generating sysfs-connector-status-events
 * on external changes to the privacy-screen status.
 * This function takes ownership of the passed in drm_privacy_screen and will
 * call drm_privacy_screen_put() on it when the connector is destroyed.
 */
void drm_connector_attach_privacy_screen_provider(
        struct drm_connector *connector, struct drm_privacy_screen *priv)
{
        connector->privacy_screen = priv;
        connector->privacy_screen_notifier.notifier_call =
                drm_connector_privacy_screen_notifier;

        drm_connector_create_privacy_screen_properties(connector);
        drm_connector_update_privacy_screen_properties(connector, true);
        drm_connector_attach_privacy_screen_properties(connector);
}
EXPORT_SYMBOL(drm_connector_attach_privacy_screen_provider);

/**
 * drm_connector_update_privacy_screen - update connector's privacy-screen sw-state
 * @connector_state: connector-state to update the privacy-screen for
 *
 * This function calls drm_privacy_screen_set_sw_state() on the connector's
 * privacy-screen.
 *
 * If the connector has no privacy-screen, then this is a no-op.
 */
void drm_connector_update_privacy_screen(const struct drm_connector_state *connector_state)
{
        struct drm_connector *connector = connector_state->connector;
        int ret;

        if (!connector->privacy_screen)
                return;

        ret = drm_privacy_screen_set_sw_state(connector->privacy_screen,
                                              connector_state->privacy_screen_sw_state);
        if (ret) {
                drm_err(connector->dev, "Error updating privacy-screen sw_state\n");
                return;
        }

        /* The hw_state property value may have changed, update it. */
        drm_connector_update_privacy_screen_properties(connector, false);
}
EXPORT_SYMBOL(drm_connector_update_privacy_screen);

int drm_connector_set_obj_prop(struct drm_mode_object *obj,
                                    struct drm_property *property,
                                    uint64_t value)
{
        int ret = -EINVAL;
        struct drm_connector *connector = obj_to_connector(obj);

        /* Do DPMS ourselves */
        if (property == connector->dev->mode_config.dpms_property) {
                ret = (*connector->funcs->dpms)(connector, (int)value);
        } else if (connector->funcs->set_property)
                ret = connector->funcs->set_property(connector, property, value);

        if (!ret)
                drm_object_property_set_value(&connector->base, property, value);
        return ret;
}

int drm_connector_property_set_ioctl(struct drm_device *dev,
                                     void *data, struct drm_file *file_priv)
{
        struct drm_mode_connector_set_property *conn_set_prop = data;
        struct drm_mode_obj_set_property obj_set_prop = {
                .value = conn_set_prop->value,
                .prop_id = conn_set_prop->prop_id,
                .obj_id = conn_set_prop->connector_id,
                .obj_type = DRM_MODE_OBJECT_CONNECTOR
        };

        /* It does all the locking and checking we need */
        return drm_mode_obj_set_property_ioctl(dev, &obj_set_prop, file_priv);
}

static struct drm_encoder *drm_connector_get_encoder(struct drm_connector *connector)
{
        /* For atomic drivers only state objects are synchronously updated and
         * protected by modeset locks, so check those first.
         */
        if (connector->state)
                return connector->state->best_encoder;
        return connector->encoder;
}

static bool
drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
                             const struct list_head *modes,
                             const struct drm_file *file_priv)
{
        /*
         * If user-space hasn't configured the driver to expose the stereo 3D
         * modes, don't expose them.
         */
        if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode))
                return false;
        /*
         * If user-space hasn't configured the driver to expose the modes
         * with aspect-ratio, don't expose them. However if such a mode
         * is unique, let it be exposed, but reset the aspect-ratio flags
         * while preparing the list of user-modes.
         */
        if (!file_priv->aspect_ratio_allowed) {
                const struct drm_display_mode *mode_itr;

                list_for_each_entry(mode_itr, modes, head) {
                        if (mode_itr->expose_to_userspace &&
                            drm_mode_match(mode_itr, mode,
                                           DRM_MODE_MATCH_TIMINGS |
                                           DRM_MODE_MATCH_CLOCK |
                                           DRM_MODE_MATCH_FLAGS |
                                           DRM_MODE_MATCH_3D_FLAGS))
                                return false;
                }
        }

        return true;
}

int drm_mode_getconnector(struct drm_device *dev, void *data,
                          struct drm_file *file_priv)
{
        struct drm_mode_get_connector *out_resp = data;
        struct drm_connector *connector;
        struct drm_encoder *encoder;
        struct drm_display_mode *mode;
        int mode_count = 0;
        int encoders_count = 0;
        int ret = 0;
        int copied = 0;
        struct drm_mode_modeinfo u_mode;
        struct drm_mode_modeinfo __user *mode_ptr;
        uint32_t __user *encoder_ptr;
        bool is_current_master;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));

        connector = drm_connector_lookup(dev, file_priv, out_resp->connector_id);
        if (!connector)
                return -ENOENT;

        encoders_count = hweight32(connector->possible_encoders);

        if ((out_resp->count_encoders >= encoders_count) && encoders_count) {
                copied = 0;
                encoder_ptr = (uint32_t __user *)(unsigned long)(out_resp->encoders_ptr);

                drm_connector_for_each_possible_encoder(connector, encoder) {
                        if (put_user(encoder->base.id, encoder_ptr + copied)) {
                                ret = -EFAULT;
                                goto out;
                        }
                        copied++;
                }
        }
        out_resp->count_encoders = encoders_count;

        out_resp->connector_id = connector->base.id;
        out_resp->connector_type = connector->connector_type;
        out_resp->connector_type_id = connector->connector_type_id;

        is_current_master = drm_is_current_master(file_priv);

        mutex_lock(&dev->mode_config.mutex);
        if (out_resp->count_modes == 0) {
                if (is_current_master)
                        connector->funcs->fill_modes(connector,
                                                     dev->mode_config.max_width,
                                                     dev->mode_config.max_height);
                else
                        drm_dbg_kms(dev, "User-space requested a forced probe on [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
                                    connector->base.id, connector->name);
        }

        out_resp->mm_width = connector->display_info.width_mm;
        out_resp->mm_height = connector->display_info.height_mm;
        out_resp->subpixel = connector->display_info.subpixel_order;
        out_resp->connection = connector->status;

        /* delayed so we get modes regardless of pre-fill_modes state */
        list_for_each_entry(mode, &connector->modes, head) {
                WARN_ON(mode->expose_to_userspace);

                if (drm_mode_expose_to_userspace(mode, &connector->modes,
                                                 file_priv)) {
                        mode->expose_to_userspace = true;
                        mode_count++;
                }
        }

        /*
         * This ioctl is called twice, once to determine how much space is
         * needed, and the 2nd time to fill it.
         */
        if ((out_resp->count_modes >= mode_count) && mode_count) {
                copied = 0;
                mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr;
                list_for_each_entry(mode, &connector->modes, head) {
                        if (!mode->expose_to_userspace)
                                continue;

                        /* Clear the tag for the next time around */
                        mode->expose_to_userspace = false;

                        drm_mode_convert_to_umode(&u_mode, mode);
                        /*
                         * Reset aspect ratio flags of user-mode, if modes with
                         * aspect-ratio are not supported.
                         */
                        if (!file_priv->aspect_ratio_allowed)
                                u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
                        if (copy_to_user(mode_ptr + copied,
                                         &u_mode, sizeof(u_mode))) {
                                ret = -EFAULT;

                                /*
                                 * Clear the tag for the rest of
                                 * the modes for the next time around.
                                 */
                                list_for_each_entry_continue(mode, &connector->modes, head)
                                        mode->expose_to_userspace = false;

                                mutex_unlock(&dev->mode_config.mutex);

                                goto out;
                        }
                        copied++;
                }
        } else {
                /* Clear the tag for the next time around */
                list_for_each_entry(mode, &connector->modes, head)
                        mode->expose_to_userspace = false;
        }

        out_resp->count_modes = mode_count;
        mutex_unlock(&dev->mode_config.mutex);

        drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
        encoder = drm_connector_get_encoder(connector);
        if (encoder)
                out_resp->encoder_id = encoder->base.id;
        else
                out_resp->encoder_id = 0;

        /* Only grab properties after probing, to make sure EDID and other
         * properties reflect the latest status.
         */
        ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
                        (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
                        (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
                        &out_resp->count_props);
        drm_modeset_unlock(&dev->mode_config.connection_mutex);

out:
        drm_connector_put(connector);

        return ret;
}

/**
 * drm_connector_find_by_fwnode - Find a connector based on the associated fwnode
 * @fwnode: fwnode for which to find the matching drm_connector
 *
 * This functions looks up a drm_connector based on its associated fwnode. When
 * a connector is found a reference to the connector is returned. The caller must
 * call drm_connector_put() to release this reference when it is done with the
 * connector.
 *
 * Returns: A reference to the found connector or an ERR_PTR().
 */
struct drm_connector *drm_connector_find_by_fwnode(struct fwnode_handle *fwnode)
{
        struct drm_connector *connector, *found = ERR_PTR(-ENODEV);

        if (!fwnode)
                return ERR_PTR(-ENODEV);

        mutex_lock(&connector_list_lock);

        list_for_each_entry(connector, &connector_list, global_connector_list_entry) {
                if (connector->fwnode == fwnode ||
                    (connector->fwnode && connector->fwnode->secondary == fwnode)) {
                        drm_connector_get(connector);
                        found = connector;
                        break;
                }
        }

        mutex_unlock(&connector_list_lock);

        return found;
}

/**
 * drm_connector_oob_hotplug_event - Report out-of-band hotplug event to connector
 * @connector_fwnode: fwnode_handle to report the event on
 * @status: hot plug detect logical state
 *
 * On some hardware a hotplug event notification may come from outside the display
 * driver / device. An example of this is some USB Type-C setups where the hardware
 * muxes the DisplayPort data and aux-lines but does not pass the altmode HPD
 * status bit to the GPU's DP HPD pin.
 *
 * This function can be used to report these out-of-band events after obtaining
 * a drm_connector reference through calling drm_connector_find_by_fwnode().
 */
void drm_connector_oob_hotplug_event(struct fwnode_handle *connector_fwnode,
                                     enum drm_connector_status status)
{
        struct drm_connector *connector;

        connector = drm_connector_find_by_fwnode(connector_fwnode);
        if (IS_ERR(connector))
                return;

        if (connector->funcs->oob_hotplug_event)
                connector->funcs->oob_hotplug_event(connector, status);

        drm_connector_put(connector);
}
EXPORT_SYMBOL(drm_connector_oob_hotplug_event);


/**
 * DOC: Tile group
 *
 * Tile groups are used to represent tiled monitors with a unique integer
 * identifier. Tiled monitors using DisplayID v1.3 have a unique 8-byte handle,
 * we store this in a tile group, so we have a common identifier for all tiles
 * in a monitor group. The property is called "TILE". Drivers can manage tile
 * groups using drm_mode_create_tile_group(), drm_mode_put_tile_group() and
 * drm_mode_get_tile_group(). But this is only needed for internal panels where
 * the tile group information is exposed through a non-standard way.
 */

static void drm_tile_group_free(struct kref *kref)
{
        struct drm_tile_group *tg = container_of(kref, struct drm_tile_group, refcount);
        struct drm_device *dev = tg->dev;

        mutex_lock(&dev->mode_config.idr_mutex);
        idr_remove(&dev->mode_config.tile_idr, tg->id);
        mutex_unlock(&dev->mode_config.idr_mutex);
        kfree(tg);
}

/**
 * drm_mode_put_tile_group - drop a reference to a tile group.
 * @dev: DRM device
 * @tg: tile group to drop reference to.
 *
 * drop reference to tile group and free if 0.
 */
void drm_mode_put_tile_group(struct drm_device *dev,
                             struct drm_tile_group *tg)
{
        kref_put(&tg->refcount, drm_tile_group_free);
}
EXPORT_SYMBOL(drm_mode_put_tile_group);

/**
 * drm_mode_get_tile_group - get a reference to an existing tile group
 * @dev: DRM device
 * @topology: 8-bytes unique per monitor.
 *
 * Use the unique bytes to get a reference to an existing tile group.
 *
 * RETURNS:
 * tile group or NULL if not found.
 */
struct drm_tile_group *drm_mode_get_tile_group(struct drm_device *dev,
                                               const char topology[8])
{
        struct drm_tile_group *tg;
        int id;

        mutex_lock(&dev->mode_config.idr_mutex);
        idr_for_each_entry(&dev->mode_config.tile_idr, tg, id) {
                if (!memcmp(tg->group_data, topology, 8)) {
                        if (!kref_get_unless_zero(&tg->refcount))
                                tg = NULL;
                        mutex_unlock(&dev->mode_config.idr_mutex);
                        return tg;
                }
        }
        mutex_unlock(&dev->mode_config.idr_mutex);
        return NULL;
}
EXPORT_SYMBOL(drm_mode_get_tile_group);

/**
 * drm_mode_create_tile_group - create a tile group from a displayid description
 * @dev: DRM device
 * @topology: 8-bytes unique per monitor.
 *
 * Create a tile group for the unique monitor, and get a unique
 * identifier for the tile group.
 *
 * RETURNS:
 * new tile group or NULL.
 */
struct drm_tile_group *drm_mode_create_tile_group(struct drm_device *dev,
                                                  const char topology[8])
{
        struct drm_tile_group *tg;
        int ret;

        tg = kzalloc(sizeof(*tg), GFP_KERNEL);
        if (!tg)
                return NULL;

        kref_init(&tg->refcount);
        memcpy(tg->group_data, topology, 8);
        tg->dev = dev;

        mutex_lock(&dev->mode_config.idr_mutex);
        ret = idr_alloc(&dev->mode_config.tile_idr, tg, 1, 0, GFP_KERNEL);
        if (ret >= 0) {
                tg->id = ret;
        } else {
                kfree(tg);
                tg = NULL;
        }

        mutex_unlock(&dev->mode_config.idr_mutex);
        return tg;
}
EXPORT_SYMBOL(drm_mode_create_tile_group);


















    9 


















































    9 






    9 

    9 








    9 











































































































































    9 










































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SIGNAL_H
#define _LINUX_SIGNAL_H

#include <linux/bug.h>
#include <linux/list.h>
#include <linux/signal_types.h>
#include <linux/string.h>

struct task_struct;

/* for sysctl */
extern int print_fatal_signals;

static inline void copy_siginfo(kernel_siginfo_t *to,
                                const kernel_siginfo_t *from)
{
        memcpy(to, from, sizeof(*to));
}

static inline void clear_siginfo(kernel_siginfo_t *info)
{
        memset(info, 0, sizeof(*info));
}

#define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo))

static inline void copy_siginfo_to_external(siginfo_t *to,
                                            const kernel_siginfo_t *from)
{
        memcpy(to, from, sizeof(*from));
        memset(((char *)to) + sizeof(struct kernel_siginfo), 0,
                SI_EXPANSION_SIZE);
}

int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);

enum siginfo_layout {
        SIL_KILL,
        SIL_TIMER,
        SIL_POLL,
        SIL_FAULT,
        SIL_FAULT_TRAPNO,
        SIL_FAULT_MCEERR,
        SIL_FAULT_BNDERR,
        SIL_FAULT_PKUERR,
        SIL_FAULT_PERF_EVENT,
        SIL_CHLD,
        SIL_RT,
        SIL_SYS,
};

enum siginfo_layout siginfo_layout(unsigned sig, int si_code);

/*
 * Define some primitives to manipulate sigset_t.
 */

#ifndef __HAVE_ARCH_SIG_BITOPS
#include <linux/bitops.h>

/* We don't use <linux/bitops.h> for these because there is no need to
   be atomic.  */
static inline void sigaddset(sigset_t *set, int _sig)
{
        unsigned long sig = _sig - 1;
        if (_NSIG_WORDS == 1)
                set->sig[0] |= 1UL << sig;
        else
                set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW);
}

static inline void sigdelset(sigset_t *set, int _sig)
{
        unsigned long sig = _sig - 1;
        if (_NSIG_WORDS == 1)
                set->sig[0] &= ~(1UL << sig);
        else
                set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW));
}

static inline int sigismember(sigset_t *set, int _sig)
{
        unsigned long sig = _sig - 1;
        if (_NSIG_WORDS == 1)
                return 1 & (set->sig[0] >> sig);
        else
                return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW));
}

#endif /* __HAVE_ARCH_SIG_BITOPS */

static inline int sigisemptyset(sigset_t *set)
{
        switch (_NSIG_WORDS) {
        case 4:
                return (set->sig[3] | set->sig[2] |
                        set->sig[1] | set->sig[0]) == 0;
        case 2:
                return (set->sig[1] | set->sig[0]) == 0;
        case 1:
                return set->sig[0] == 0;
        default:
                BUILD_BUG();
                return 0;
        }
}

static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2)
{
        switch (_NSIG_WORDS) {
        case 4:
                return        (set1->sig[3] == set2->sig[3]) &&
                        (set1->sig[2] == set2->sig[2]) &&
                        (set1->sig[1] == set2->sig[1]) &&
                        (set1->sig[0] == set2->sig[0]);
        case 2:
                return        (set1->sig[1] == set2->sig[1]) &&
                        (set1->sig[0] == set2->sig[0]);
        case 1:
                return        set1->sig[0] == set2->sig[0];
        }
        return 0;
}

#define sigmask(sig)        (1UL << ((sig) - 1))

#ifndef __HAVE_ARCH_SIG_SETOPS

#define _SIG_SET_BINOP(name, op)                                        \
static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
{                                                                        \
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;                        \
                                                                        \
        switch (_NSIG_WORDS) {                                                \
        case 4:                                                                \
                a3 = a->sig[3]; a2 = a->sig[2];                                \
                b3 = b->sig[3]; b2 = b->sig[2];                                \
                r->sig[3] = op(a3, b3);                                        \
                r->sig[2] = op(a2, b2);                                        \
                fallthrough;                                                \
        case 2:                                                                \
                a1 = a->sig[1]; b1 = b->sig[1];                                \
                r->sig[1] = op(a1, b1);                                        \
                fallthrough;                                                \
        case 1:                                                                \
                a0 = a->sig[0]; b0 = b->sig[0];                                \
                r->sig[0] = op(a0, b0);                                        \
                break;                                                        \
        default:                                                        \
                BUILD_BUG();                                                \
        }                                                                \
}

#define _sig_or(x,y)        ((x) | (y))
_SIG_SET_BINOP(sigorsets, _sig_or)

#define _sig_and(x,y)        ((x) & (y))
_SIG_SET_BINOP(sigandsets, _sig_and)

#define _sig_andn(x,y)        ((x) & ~(y))
_SIG_SET_BINOP(sigandnsets, _sig_andn)

#undef _SIG_SET_BINOP
#undef _sig_or
#undef _sig_and
#undef _sig_andn

#define _SIG_SET_OP(name, op)                                                \
static inline void name(sigset_t *set)                                        \
{                                                                        \
        switch (_NSIG_WORDS) {                                                \
        case 4:        set->sig[3] = op(set->sig[3]);                                \
                set->sig[2] = op(set->sig[2]);                                \
                fallthrough;                                                \
        case 2:        set->sig[1] = op(set->sig[1]);                                \
                fallthrough;                                                \
        case 1:        set->sig[0] = op(set->sig[0]);                                \
                    break;                                                \
        default:                                                        \
                BUILD_BUG();                                                \
        }                                                                \
}

#define _sig_not(x)        (~(x))
_SIG_SET_OP(signotset, _sig_not)

#undef _SIG_SET_OP
#undef _sig_not

static inline void sigemptyset(sigset_t *set)
{
        switch (_NSIG_WORDS) {
        default:
                memset(set, 0, sizeof(sigset_t));
                break;
        case 2: set->sig[1] = 0;
                fallthrough;
        case 1:        set->sig[0] = 0;
                break;
        }
}

static inline void sigfillset(sigset_t *set)
{
        switch (_NSIG_WORDS) {
        default:
                memset(set, -1, sizeof(sigset_t));
                break;
        case 2: set->sig[1] = -1;
                fallthrough;
        case 1:        set->sig[0] = -1;
                break;
        }
}

/* Some extensions for manipulating the low 32 signals in particular.  */

static inline void sigaddsetmask(sigset_t *set, unsigned long mask)
{
        set->sig[0] |= mask;
}

static inline void sigdelsetmask(sigset_t *set, unsigned long mask)
{
        set->sig[0] &= ~mask;
}

static inline int sigtestsetmask(sigset_t *set, unsigned long mask)
{
        return (set->sig[0] & mask) != 0;
}

static inline void siginitset(sigset_t *set, unsigned long mask)
{
        set->sig[0] = mask;
        switch (_NSIG_WORDS) {
        default:
                memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1));
                break;
        case 2: set->sig[1] = 0;
                break;
        case 1: ;
        }
}

static inline void siginitsetinv(sigset_t *set, unsigned long mask)
{
        set->sig[0] = ~mask;
        switch (_NSIG_WORDS) {
        default:
                memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1));
                break;
        case 2: set->sig[1] = -1;
                break;
        case 1: ;
        }
}

#endif /* __HAVE_ARCH_SIG_SETOPS */

static inline void init_sigpending(struct sigpending *sig)
{
        sigemptyset(&sig->signal);
        INIT_LIST_HEAD(&sig->list);
}

extern void flush_sigqueue(struct sigpending *queue);

/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
static inline int valid_signal(unsigned long sig)
{
        return sig <= _NSIG ? 1 : 0;
}

struct timespec;
struct pt_regs;
enum pid_type;

extern int next_signal(struct sigpending *pending, sigset_t *mask);
extern int do_send_sig_info(int sig, struct kernel_siginfo *info,
                                struct task_struct *p, enum pid_type type);
extern int group_send_sig_info(int sig, struct kernel_siginfo *info,
                               struct task_struct *p, enum pid_type type);
extern int send_signal_locked(int sig, struct kernel_siginfo *info,
                              struct task_struct *p, enum pid_type type);
extern int sigprocmask(int, sigset_t *, sigset_t *);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;

extern bool get_signal(struct ksignal *ksig);
extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
extern void exit_signals(struct task_struct *tsk);
extern void kernel_sigaction(int, __sighandler_t);

#define SIG_KTHREAD ((__force __sighandler_t)2)
#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3)

static inline void allow_signal(int sig)
{
        /*
         * Kernel threads handle their own signals. Let the signal code
         * know it'll be handled, so that they don't get converted to
         * SIGKILL or just silently dropped.
         */
        kernel_sigaction(sig, SIG_KTHREAD);
}

static inline void allow_kernel_signal(int sig)
{
        /*
         * Kernel threads handle their own signals. Let the signal code
         * know signals sent by the kernel will be handled, so that they
         * don't get silently dropped.
         */
        kernel_sigaction(sig, SIG_KTHREAD_KERNEL);
}

static inline void disallow_signal(int sig)
{
        kernel_sigaction(sig, SIG_IGN);
}

extern struct kmem_cache *sighand_cachep;

extern bool unhandled_signal(struct task_struct *tsk, int sig);

/*
 * In POSIX a signal is sent either to a specific thread (Linux task)
 * or to the process as a whole (Linux thread group).  How the signal
 * is sent determines whether it's to one thread or the whole group,
 * which determines which signal mask(s) are involved in blocking it
 * from being delivered until later.  When the signal is delivered,
 * either it's caught or ignored by a user handler or it has a default
 * effect that applies to the whole thread group (POSIX process).
 *
 * The possible effects an unblocked signal set to SIG_DFL can have are:
 *   ignore        - Nothing Happens
 *   terminate        - kill the process, i.e. all threads in the group,
 *                   similar to exit_group.  The group leader (only) reports
 *                  WIFSIGNALED status to its parent.
 *   coredump        - write a core dump file describing all threads using
 *                  the same mm and then kill all those threads
 *   stop         - stop all the threads in the group, i.e. TASK_STOPPED state
 *
 * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored.
 * Other signals when not blocked and set to SIG_DFL behaves as follows.
 * The job control signals also have other special effects.
 *
 *        +--------------------+------------------+
 *        |  POSIX signal      |  default action  |
 *        +--------------------+------------------+
 *        |  SIGHUP            |  terminate        |
 *        |  SIGINT            |        terminate        |
 *        |  SIGQUIT           |        coredump         |
 *        |  SIGILL            |        coredump         |
 *        |  SIGTRAP           |        coredump         |
 *        |  SIGABRT/SIGIOT    |        coredump         |
 *        |  SIGBUS            |        coredump         |
 *        |  SIGFPE            |        coredump         |
 *        |  SIGKILL           |        terminate(+)        |
 *        |  SIGUSR1           |        terminate        |
 *        |  SIGSEGV           |        coredump         |
 *        |  SIGUSR2           |        terminate        |
 *        |  SIGPIPE           |        terminate        |
 *        |  SIGALRM           |        terminate        |
 *        |  SIGTERM           |        terminate        |
 *        |  SIGCHLD           |        ignore           |
 *        |  SIGCONT           |        ignore(*)        |
 *        |  SIGSTOP           |        stop(*)(+)          |
 *        |  SIGTSTP           |        stop(*)          |
 *        |  SIGTTIN           |        stop(*)          |
 *        |  SIGTTOU           |        stop(*)          |
 *        |  SIGURG            |        ignore           |
 *        |  SIGXCPU           |        coredump         |
 *        |  SIGXFSZ           |        coredump         |
 *        |  SIGVTALRM         |        terminate        |
 *        |  SIGPROF           |        terminate        |
 *        |  SIGPOLL/SIGIO     |        terminate        |
 *        |  SIGSYS/SIGUNUSED  |        coredump         |
 *        |  SIGSTKFLT         |        terminate        |
 *        |  SIGWINCH          |        ignore           |
 *        |  SIGPWR            |        terminate        |
 *        |  SIGRTMIN-SIGRTMAX |        terminate       |
 *        +--------------------+------------------+
 *        |  non-POSIX signal  |  default action  |
 *        +--------------------+------------------+
 *        |  SIGEMT            |  coredump        |
 *        +--------------------+------------------+
 *
 * (+) For SIGKILL and SIGSTOP the action is "always", not just "default".
 * (*) Special job control effects:
 * When SIGCONT is sent, it resumes the process (all threads in the group)
 * from TASK_STOPPED state and also clears any pending/queued stop signals
 * (any of those marked with "stop(*)").  This happens regardless of blocking,
 * catching, or ignoring SIGCONT.  When any stop signal is sent, it clears
 * any pending/queued SIGCONT signals; this happens regardless of blocking,
 * catching, or ignored the stop signal, though (except for SIGSTOP) the
 * default action of stopping the process may happen later or never.
 */

#ifdef SIGEMT
#define SIGEMT_MASK        rt_sigmask(SIGEMT)
#else
#define SIGEMT_MASK        0
#endif

#if SIGRTMIN > BITS_PER_LONG
#define rt_sigmask(sig)        (1ULL << ((sig)-1))
#else
#define rt_sigmask(sig)        sigmask(sig)
#endif

#define siginmask(sig, mask) \
        ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask)))

#define SIG_KERNEL_ONLY_MASK (\
        rt_sigmask(SIGKILL)   |  rt_sigmask(SIGSTOP))

#define SIG_KERNEL_STOP_MASK (\
        rt_sigmask(SIGSTOP)   |  rt_sigmask(SIGTSTP)   | \
        rt_sigmask(SIGTTIN)   |  rt_sigmask(SIGTTOU)   )

#define SIG_KERNEL_COREDUMP_MASK (\
        rt_sigmask(SIGQUIT)   |  rt_sigmask(SIGILL)    | \
        rt_sigmask(SIGTRAP)   |  rt_sigmask(SIGABRT)   | \
        rt_sigmask(SIGFPE)    |  rt_sigmask(SIGSEGV)   | \
        rt_sigmask(SIGBUS)    |  rt_sigmask(SIGSYS)    | \
        rt_sigmask(SIGXCPU)   |  rt_sigmask(SIGXFSZ)   | \
        SIGEMT_MASK                                       )

#define SIG_KERNEL_IGNORE_MASK (\
        rt_sigmask(SIGCONT)   |  rt_sigmask(SIGCHLD)   | \
        rt_sigmask(SIGWINCH)  |  rt_sigmask(SIGURG)    )

#define SIG_SPECIFIC_SICODES_MASK (\
        rt_sigmask(SIGILL)    |  rt_sigmask(SIGFPE)    | \
        rt_sigmask(SIGSEGV)   |  rt_sigmask(SIGBUS)    | \
        rt_sigmask(SIGTRAP)   |  rt_sigmask(SIGCHLD)   | \
        rt_sigmask(SIGPOLL)   |  rt_sigmask(SIGSYS)    | \
        SIGEMT_MASK                                    )

#define sig_kernel_only(sig)                siginmask(sig, SIG_KERNEL_ONLY_MASK)
#define sig_kernel_coredump(sig)        siginmask(sig, SIG_KERNEL_COREDUMP_MASK)
#define sig_kernel_ignore(sig)                siginmask(sig, SIG_KERNEL_IGNORE_MASK)
#define sig_kernel_stop(sig)                siginmask(sig, SIG_KERNEL_STOP_MASK)
#define sig_specific_sicodes(sig)        siginmask(sig, SIG_SPECIFIC_SICODES_MASK)

#define sig_fatal(t, signr) \
        (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
         (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)

void signals_init(void);

int restore_altstack(const stack_t __user *);
int __save_altstack(stack_t __user *, unsigned long);

#define unsafe_save_altstack(uss, sp, label) do { \
        stack_t __user *__uss = uss; \
        struct task_struct *t = current; \
        unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \
        unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \
        unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
} while (0);

#ifdef CONFIG_DYNAMIC_SIGFRAME
bool sigaltstack_size_valid(size_t ss_size);
#else
static inline bool sigaltstack_size_valid(size_t size) { return true; }
#endif /* !CONFIG_DYNAMIC_SIGFRAME */

#ifdef CONFIG_PROC_FS
struct seq_file;
extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
#endif

#ifndef arch_untagged_si_addr
/*
 * Given a fault address and a signal and si_code which correspond to the
 * _sigfault union member, returns the address that must appear in si_addr if
 * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags.
 */
static inline void __user *arch_untagged_si_addr(void __user *addr,
                                                 unsigned long sig,
                                                 unsigned long si_code)
{
        return addr;
}
#endif

#endif /* _LINUX_SIGNAL_H */











































  169 





  170 
  170 



  170 


  170 









  167 
  168 












  233 


















  233 
  234 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// SPDX-License-Identifier: GPL-2.0
/*
 * fs/sysfs/dir.c - sysfs core and dir operation implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * Please see Documentation/filesystems/sysfs.rst for more information.
 */

#define pr_fmt(fmt)        "sysfs: " fmt

#include <linux/fs.h>
#include <linux/kobject.h>
#include <linux/slab.h>
#include "sysfs.h"

DEFINE_SPINLOCK(sysfs_symlink_target_lock);

void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
        char *buf;

        buf = kzalloc(PATH_MAX, GFP_KERNEL);
        if (buf)
                kernfs_path(parent, buf, PATH_MAX);

        pr_warn("cannot create duplicate filename '%s/%s'\n", buf, name);
        dump_stack();

        kfree(buf);
}

/**
 * sysfs_create_dir_ns - create a directory for an object with a namespace tag
 * @kobj: object we're creating directory for
 * @ns: the namespace tag to use
 */
int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
{
        struct kernfs_node *parent, *kn;
        kuid_t uid;
        kgid_t gid;

        if (WARN_ON(!kobj))
                return -EINVAL;

        if (kobj->parent)
                parent = kobj->parent->sd;
        else
                parent = sysfs_root_kn;

        if (!parent)
                return -ENOENT;

        kobject_get_ownership(kobj, &uid, &gid);

        kn = kernfs_create_dir_ns(parent, kobject_name(kobj), 0755, uid, gid,
                                  kobj, ns);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, kobject_name(kobj));
                return PTR_ERR(kn);
        }

        kobj->sd = kn;
        return 0;
}

/**
 *        sysfs_remove_dir - remove an object's directory.
 *        @kobj:        object.
 *
 *        The only thing special about this is that we remove any files in
 *        the directory before we remove the directory, and we've inlined
 *        what used to be sysfs_rmdir() below, instead of calling separately.
 */
void sysfs_remove_dir(struct kobject *kobj)
{
        struct kernfs_node *kn = kobj->sd;

        /*
         * In general, kobject owner is responsible for ensuring removal
         * doesn't race with other operations and sysfs doesn't provide any
         * protection; however, when @kobj is used as a symlink target, the
         * symlinking entity usually doesn't own @kobj and thus has no
         * control over removal.  @kobj->sd may be removed anytime
         * and symlink code may end up dereferencing an already freed node.
         *
         * sysfs_symlink_target_lock synchronizes @kobj->sd
         * disassociation against symlink operations so that symlink code
         * can safely dereference @kobj->sd.
         */
        spin_lock(&sysfs_symlink_target_lock);
        kobj->sd = NULL;
        spin_unlock(&sysfs_symlink_target_lock);

        if (kn) {
                WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
                kernfs_remove(kn);
        }
}

int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
                        const void *new_ns)
{
        struct kernfs_node *parent;
        int ret;

        parent = kernfs_get_parent(kobj->sd);
        ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
        kernfs_put(parent);
        return ret;
}

int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
                      const void *new_ns)
{
        struct kernfs_node *kn = kobj->sd;
        struct kernfs_node *new_parent;

        new_parent = new_parent_kobj && new_parent_kobj->sd ?
                new_parent_kobj->sd : sysfs_root_kn;

        return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
}

/**
 * sysfs_create_mount_point - create an always empty directory
 * @parent_kobj:  kobject that will contain this always empty directory
 * @name: The name of the always empty directory to add
 */
int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name)
{
        struct kernfs_node *kn, *parent = parent_kobj->sd;

        kn = kernfs_create_empty_dir(parent, name);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, name);
                return PTR_ERR(kn);
        }

        return 0;
}
EXPORT_SYMBOL_GPL(sysfs_create_mount_point);

/**
 *        sysfs_remove_mount_point - remove an always empty directory.
 *        @parent_kobj: kobject that will contain this always empty directory
 *        @name: The name of the always empty directory to remove
 *
 */
void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name)
{
        struct kernfs_node *parent = parent_kobj->sd;

        kernfs_remove_by_name_ns(parent, name, NULL);
}
EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);































































































    1 


    1 

































































































































    1 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (c) 2001-2005 Edouard TISSERANT   <edouard.tisserant@wanadoo.fr>
 *  Copyright (c) 2004-2005 Stephane VOLTZ      <svoltz@numericable.fr>
 *
 *  USB Acecad "Acecad Flair" tablet support
 *
 *  Changelog:
 *      v3.2 - Added sysfs support
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb/input.h>

MODULE_AUTHOR("Edouard TISSERANT <edouard.tisserant@wanadoo.fr>");
MODULE_DESCRIPTION("USB Acecad Flair tablet driver");
MODULE_LICENSE("GPL");

#define USB_VENDOR_ID_ACECAD        0x0460
#define USB_DEVICE_ID_FLAIR        0x0004
#define USB_DEVICE_ID_302        0x0008

struct usb_acecad {
        char name[128];
        char phys[64];
        struct usb_interface *intf;
        struct input_dev *input;
        struct urb *irq;

        unsigned char *data;
        dma_addr_t data_dma;
};

static void usb_acecad_irq(struct urb *urb)
{
        struct usb_acecad *acecad = urb->context;
        unsigned char *data = acecad->data;
        struct input_dev *dev = acecad->input;
        struct usb_interface *intf = acecad->intf;
        struct usb_device *udev = interface_to_usbdev(intf);
        int prox, status;

        switch (urb->status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(&intf->dev, "%s - urb shutting down with status: %d\n",
                        __func__, urb->status);
                return;
        default:
                dev_dbg(&intf->dev, "%s - nonzero urb status received: %d\n",
                        __func__, urb->status);
                goto resubmit;
        }

        prox = (data[0] & 0x04) >> 2;
        input_report_key(dev, BTN_TOOL_PEN, prox);

        if (prox) {
                int x = data[1] | (data[2] << 8);
                int y = data[3] | (data[4] << 8);
                /* Pressure should compute the same way for flair and 302 */
                int pressure = data[5] | (data[6] << 8);
                int touch = data[0] & 0x01;
                int stylus = (data[0] & 0x10) >> 4;
                int stylus2 = (data[0] & 0x20) >> 5;
                input_report_abs(dev, ABS_X, x);
                input_report_abs(dev, ABS_Y, y);
                input_report_abs(dev, ABS_PRESSURE, pressure);
                input_report_key(dev, BTN_TOUCH, touch);
                input_report_key(dev, BTN_STYLUS, stylus);
                input_report_key(dev, BTN_STYLUS2, stylus2);
        }

        /* event termination */
        input_sync(dev);

resubmit:
        status = usb_submit_urb(urb, GFP_ATOMIC);
        if (status)
                dev_err(&intf->dev,
                        "can't resubmit intr, %s-%s/input0, status %d\n",
                        udev->bus->bus_name,
                        udev->devpath, status);
}

static int usb_acecad_open(struct input_dev *dev)
{
        struct usb_acecad *acecad = input_get_drvdata(dev);

        acecad->irq->dev = interface_to_usbdev(acecad->intf);
        if (usb_submit_urb(acecad->irq, GFP_KERNEL))
                return -EIO;

        return 0;
}

static void usb_acecad_close(struct input_dev *dev)
{
        struct usb_acecad *acecad = input_get_drvdata(dev);

        usb_kill_urb(acecad->irq);
}

static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        struct usb_host_interface *interface = intf->cur_altsetting;
        struct usb_endpoint_descriptor *endpoint;
        struct usb_acecad *acecad;
        struct input_dev *input_dev;
        int pipe, maxp;
        int err;

        if (interface->desc.bNumEndpoints != 1)
                return -ENODEV;

        endpoint = &interface->endpoint[0].desc;

        if (!usb_endpoint_is_int_in(endpoint))
                return -ENODEV;

        pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress);
        maxp = usb_maxpacket(dev, pipe);

        acecad = kzalloc(sizeof(struct usb_acecad), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!acecad || !input_dev) {
                err = -ENOMEM;
                goto fail1;
        }

        acecad->data = usb_alloc_coherent(dev, 8, GFP_KERNEL, &acecad->data_dma);
        if (!acecad->data) {
                err= -ENOMEM;
                goto fail1;
        }

        acecad->irq = usb_alloc_urb(0, GFP_KERNEL);
        if (!acecad->irq) {
                err = -ENOMEM;
                goto fail2;
        }

        acecad->intf = intf;
        acecad->input = input_dev;

        if (dev->manufacturer)
                strscpy(acecad->name, dev->manufacturer, sizeof(acecad->name));

        if (dev->product) {
                if (dev->manufacturer)
                        strlcat(acecad->name, " ", sizeof(acecad->name));
                strlcat(acecad->name, dev->product, sizeof(acecad->name));
        }

        usb_make_path(dev, acecad->phys, sizeof(acecad->phys));
        strlcat(acecad->phys, "/input0", sizeof(acecad->phys));

        input_dev->name = acecad->name;
        input_dev->phys = acecad->phys;
        usb_to_input_id(dev, &input_dev->id);
        input_dev->dev.parent = &intf->dev;

        input_set_drvdata(input_dev, acecad);

        input_dev->open = usb_acecad_open;
        input_dev->close = usb_acecad_close;

        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
        input_dev->keybit[BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_TOOL_PEN) |
                BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS) |
                BIT_MASK(BTN_STYLUS2);

        switch (id->driver_info) {
        case 0:
                input_set_abs_params(input_dev, ABS_X, 0, 5000, 4, 0);
                input_set_abs_params(input_dev, ABS_Y, 0, 3750, 4, 0);
                input_set_abs_params(input_dev, ABS_PRESSURE, 0, 512, 0, 0);
                if (!strlen(acecad->name))
                        snprintf(acecad->name, sizeof(acecad->name),
                                "USB Acecad Flair Tablet %04x:%04x",
                                le16_to_cpu(dev->descriptor.idVendor),
                                le16_to_cpu(dev->descriptor.idProduct));
                break;

        case 1:
                input_set_abs_params(input_dev, ABS_X, 0, 53000, 4, 0);
                input_set_abs_params(input_dev, ABS_Y, 0, 2250, 4, 0);
                input_set_abs_params(input_dev, ABS_PRESSURE, 0, 1024, 0, 0);
                if (!strlen(acecad->name))
                        snprintf(acecad->name, sizeof(acecad->name),
                                "USB Acecad 302 Tablet %04x:%04x",
                                le16_to_cpu(dev->descriptor.idVendor),
                                le16_to_cpu(dev->descriptor.idProduct));
                break;
        }

        usb_fill_int_urb(acecad->irq, dev, pipe,
                        acecad->data, maxp > 8 ? 8 : maxp,
                        usb_acecad_irq, acecad, endpoint->bInterval);
        acecad->irq->transfer_dma = acecad->data_dma;
        acecad->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        err = input_register_device(acecad->input);
        if (err)
                goto fail3;

        usb_set_intfdata(intf, acecad);

        return 0;

 fail3:        usb_free_urb(acecad->irq);
 fail2:        usb_free_coherent(dev, 8, acecad->data, acecad->data_dma);
 fail1: input_free_device(input_dev);
        kfree(acecad);
        return err;
}

static void usb_acecad_disconnect(struct usb_interface *intf)
{
        struct usb_acecad *acecad = usb_get_intfdata(intf);
        struct usb_device *udev = interface_to_usbdev(intf);

        usb_set_intfdata(intf, NULL);

        input_unregister_device(acecad->input);
        usb_free_urb(acecad->irq);
        usb_free_coherent(udev, 8, acecad->data, acecad->data_dma);
        kfree(acecad);
}

static const struct usb_device_id usb_acecad_id_table[] = {
        { USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_FLAIR), .driver_info = 0 },
        { USB_DEVICE(USB_VENDOR_ID_ACECAD, USB_DEVICE_ID_302),         .driver_info = 1 },
        { }
};

MODULE_DEVICE_TABLE(usb, usb_acecad_id_table);

static struct usb_driver usb_acecad_driver = {
        .name =                "usb_acecad",
        .probe =        usb_acecad_probe,
        .disconnect =        usb_acecad_disconnect,
        .id_table =        usb_acecad_id_table,
};

module_usb_driver(usb_acecad_driver);











































































































































































































































   17 










   61 

















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * include/linux/writeback.h
 */
#ifndef WRITEBACK_H
#define WRITEBACK_H

#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/fs.h>
#include <linux/flex_proportions.h>
#include <linux/backing-dev-defs.h>
#include <linux/blk_types.h>
#include <linux/pagevec.h>

struct bio;

DECLARE_PER_CPU(int, dirty_throttle_leaks);

/*
 * The global dirty threshold is normally equal to the global dirty limit,
 * except when the system suddenly allocates a lot of anonymous memory and
 * knocks down the global dirty threshold quickly, in which case the global
 * dirty limit will follow down slowly to prevent livelocking all dirtier tasks.
 */
#define DIRTY_SCOPE                8

struct backing_dev_info;

/*
 * fs/fs-writeback.c
 */
enum writeback_sync_modes {
        WB_SYNC_NONE,        /* Don't wait on anything */
        WB_SYNC_ALL,        /* Wait on every mapping */
};

/*
 * A control structure which tells the writeback code what to do.  These are
 * always on the stack, and hence need no locking.  They are always initialised
 * in a manner such that unspecified fields are set to zero.
 */
struct writeback_control {
        /* public fields that can be set and/or consumed by the caller: */
        long nr_to_write;                /* Write this many pages, and decrement
                                           this for each page written */
        long pages_skipped;                /* Pages which were not written */

        /*
         * For a_ops->writepages(): if start or end are non-zero then this is
         * a hint that the filesystem need only write out the pages inside that
         * byterange.  The byte at `end' is included in the writeout request.
         */
        loff_t range_start;
        loff_t range_end;

        enum writeback_sync_modes sync_mode;

        unsigned for_kupdate:1;                /* A kupdate writeback */
        unsigned for_background:1;        /* A background writeback */
        unsigned tagged_writepages:1;        /* tag-and-write to avoid livelock */
        unsigned for_reclaim:1;                /* Invoked from the page allocator */
        unsigned range_cyclic:1;        /* range_start is cyclic */
        unsigned for_sync:1;                /* sync(2) WB_SYNC_ALL writeback */
        unsigned unpinned_netfs_wb:1;        /* Cleared I_PINNING_NETFS_WB */

        /*
         * When writeback IOs are bounced through async layers, only the
         * initial synchronous phase should be accounted towards inode
         * cgroup ownership arbitration to avoid confusion.  Later stages
         * can set the following flag to disable the accounting.
         */
        unsigned no_cgroup_owner:1;

        /* To enable batching of swap writes to non-block-device backends,
         * "plug" can be set point to a 'struct swap_iocb *'.  When all swap
         * writes have been submitted, if with swap_iocb is not NULL,
         * swap_write_unplug() should be called.
         */
        struct swap_iocb **swap_plug;

        /* internal fields used by the ->writepages implementation: */
        struct folio_batch fbatch;
        pgoff_t index;
        int saved_err;

#ifdef CONFIG_CGROUP_WRITEBACK
        struct bdi_writeback *wb;        /* wb this writeback is issued under */
        struct inode *inode;                /* inode being written out */

        /* foreign inode detection, see wbc_detach_inode() */
        int wb_id;                        /* current wb id */
        int wb_lcand_id;                /* last foreign candidate wb id */
        int wb_tcand_id;                /* this foreign candidate wb id */
        size_t wb_bytes;                /* bytes written by current wb */
        size_t wb_lcand_bytes;                /* bytes written by last candidate */
        size_t wb_tcand_bytes;                /* bytes written by this candidate */
#endif
};

static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc)
{
        blk_opf_t flags = 0;

        if (wbc->sync_mode == WB_SYNC_ALL)
                flags |= REQ_SYNC;
        else if (wbc->for_kupdate || wbc->for_background)
                flags |= REQ_BACKGROUND;

        return flags;
}

#ifdef CONFIG_CGROUP_WRITEBACK
#define wbc_blkcg_css(wbc) \
        ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css)
#else
#define wbc_blkcg_css(wbc)                (blkcg_root_css)
#endif /* CONFIG_CGROUP_WRITEBACK */

/*
 * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
 * and are measured against each other in.  There always is one global
 * domain, global_wb_domain, that every wb in the system is a member of.
 * This allows measuring the relative bandwidth of each wb to distribute
 * dirtyable memory accordingly.
 */
struct wb_domain {
        spinlock_t lock;

        /*
         * Scale the writeback cache size proportional to the relative
         * writeout speed.
         *
         * We do this by keeping a floating proportion between BDIs, based
         * on page writeback completions [end_page_writeback()]. Those
         * devices that write out pages fastest will get the larger share,
         * while the slower will get a smaller share.
         *
         * We use page writeout completions because we are interested in
         * getting rid of dirty pages. Having them written out is the
         * primary goal.
         *
         * We introduce a concept of time, a period over which we measure
         * these events, because demand can/will vary over time. The length
         * of this period itself is measured in page writeback completions.
         */
        struct fprop_global completions;
        struct timer_list period_timer;        /* timer for aging of completions */
        unsigned long period_time;

        /*
         * The dirtyable memory and dirty threshold could be suddenly
         * knocked down by a large amount (eg. on the startup of KVM in a
         * swapless system). This may throw the system into deep dirty
         * exceeded state and throttle heavy/light dirtiers alike. To
         * retain good responsiveness, maintain global_dirty_limit for
         * tracking slowly down to the knocked down dirty threshold.
         *
         * Both fields are protected by ->lock.
         */
        unsigned long dirty_limit_tstamp;
        unsigned long dirty_limit;
};

/**
 * wb_domain_size_changed - memory available to a wb_domain has changed
 * @dom: wb_domain of interest
 *
 * This function should be called when the amount of memory available to
 * @dom has changed.  It resets @dom's dirty limit parameters to prevent
 * the past values which don't match the current configuration from skewing
 * dirty throttling.  Without this, when memory size of a wb_domain is
 * greatly reduced, the dirty throttling logic may allow too many pages to
 * be dirtied leading to consecutive unnecessary OOMs and may get stuck in
 * that situation.
 */
static inline void wb_domain_size_changed(struct wb_domain *dom)
{
        spin_lock(&dom->lock);
        dom->dirty_limit_tstamp = jiffies;
        dom->dirty_limit = 0;
        spin_unlock(&dom->lock);
}

/*
 * fs/fs-writeback.c
 */        
struct bdi_writeback;
void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                                        enum wb_reason reason);
void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
void wakeup_flusher_threads(enum wb_reason reason);
void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
                                enum wb_reason reason);
void inode_wait_for_writeback(struct inode *inode);
void inode_io_list_del(struct inode *inode);

/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
{
        wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
}

#ifdef CONFIG_CGROUP_WRITEBACK

#include <linux/cgroup.h>
#include <linux/bio.h>

void __inode_attach_wb(struct inode *inode, struct folio *folio);
void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
                                 struct inode *inode)
        __releases(&inode->i_lock);
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
                              size_t bytes);
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                           enum wb_reason reason, struct wb_completion *done);
void cgroup_writeback_umount(void);
bool cleanup_offline_cgwb(struct bdi_writeback *wb);

/**
 * inode_attach_wb - associate an inode with its wb
 * @inode: inode of interest
 * @folio: folio being dirtied (may be NULL)
 *
 * If @inode doesn't have its wb, associate it with the wb matching the
 * memcg of @folio or, if @folio is NULL, %current.  May be called w/ or w/o
 * @inode->i_lock.
 */
static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
{
        if (!inode->i_wb)
                __inode_attach_wb(inode, folio);
}

/**
 * inode_detach_wb - disassociate an inode from its wb
 * @inode: inode of interest
 *
 * @inode is being freed.  Detach from its wb.
 */
static inline void inode_detach_wb(struct inode *inode)
{
        if (inode->i_wb) {
                WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
                wb_put(inode->i_wb);
                inode->i_wb = NULL;
        }
}

/**
 * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
 * @wbc: writeback_control of interest
 * @inode: target inode
 *
 * This function is to be used by __filemap_fdatawrite_range(), which is an
 * alternative entry point into writeback code, and first ensures @inode is
 * associated with a bdi_writeback and attaches it to @wbc.
 */
static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
                                               struct inode *inode)
{
        spin_lock(&inode->i_lock);
        inode_attach_wb(inode, NULL);
        wbc_attach_and_unlock_inode(wbc, inode);
}

/**
 * wbc_init_bio - writeback specific initializtion of bio
 * @wbc: writeback_control for the writeback in progress
 * @bio: bio to be initialized
 *
 * @bio is a part of the writeback in progress controlled by @wbc.  Perform
 * writeback specific initialization.  This is used to apply the cgroup
 * writeback context.  Must be called after the bio has been associated with
 * a device.
 */
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
        /*
         * pageout() path doesn't attach @wbc to the inode being written
         * out.  This is intentional as we don't want the function to block
         * behind a slow cgroup.  Ultimately, we want pageout() to kick off
         * regular writeback instead of writing things out itself.
         */
        if (wbc->wb)
                bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
}

#else        /* CONFIG_CGROUP_WRITEBACK */

static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
{
}

static inline void inode_detach_wb(struct inode *inode)
{
}

static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
                                               struct inode *inode)
        __releases(&inode->i_lock)
{
        spin_unlock(&inode->i_lock);
}

static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
                                               struct inode *inode)
{
}

static inline void wbc_detach_inode(struct writeback_control *wbc)
{
}

static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
}

static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
                                            struct page *page, size_t bytes)
{
}

static inline void cgroup_writeback_umount(void)
{
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

/*
 * mm/page-writeback.c
 */
void laptop_io_completion(struct backing_dev_info *info);
void laptop_sync_completion(void);
void laptop_mode_timer_fn(struct timer_list *t);
bool node_dirty_ok(struct pglist_data *pgdat);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom);
#endif

extern struct wb_domain global_wb_domain;

/* These are exported to sysctl. */
extern unsigned int dirty_writeback_interval;
extern unsigned int dirty_expire_interval;
extern unsigned int dirtytime_expire_interval;
extern int laptop_mode;

int dirtytime_interval_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos);

void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);

void wb_update_bandwidth(struct bdi_writeback *wb);

/* Invoke balance dirty pages in async mode. */
#define BDP_ASYNC 0x0001

void balance_dirty_pages_ratelimited(struct address_space *mapping);
int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
                unsigned int flags);

bool wb_over_bg_thresh(struct bdi_writeback *wb);

struct folio *writeback_iter(struct address_space *mapping,
                struct writeback_control *wbc, struct folio *folio, int *error);

typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
                                void *data);

int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
                      void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end);

bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
bool redirty_page_for_writepage(struct writeback_control *, struct page *);

void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);

#endif                /* WRITEBACK_H */










































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * USB PHY defines
 *
 * These APIs may be used between USB controllers.  USB device drivers
 * (for either host or peripheral roles) don't use these calls; they
 * continue to use just usb_device and usb_gadget.
 */

#ifndef __LINUX_USB_PHY_H
#define __LINUX_USB_PHY_H

#include <linux/extcon.h>
#include <linux/notifier.h>
#include <linux/usb.h>
#include <uapi/linux/usb/charger.h>

enum usb_phy_interface {
        USBPHY_INTERFACE_MODE_UNKNOWN,
        USBPHY_INTERFACE_MODE_UTMI,
        USBPHY_INTERFACE_MODE_UTMIW,
        USBPHY_INTERFACE_MODE_ULPI,
        USBPHY_INTERFACE_MODE_SERIAL,
        USBPHY_INTERFACE_MODE_HSIC,
};

enum usb_phy_events {
        USB_EVENT_NONE,         /* no events or cable disconnected */
        USB_EVENT_VBUS,         /* vbus valid event */
        USB_EVENT_ID,           /* id was grounded */
        USB_EVENT_CHARGER,      /* usb dedicated charger */
        USB_EVENT_ENUMERATED,   /* gadget driver enumerated */
};

/* associate a type with PHY */
enum usb_phy_type {
        USB_PHY_TYPE_UNDEFINED,
        USB_PHY_TYPE_USB2,
        USB_PHY_TYPE_USB3,
};

/* OTG defines lots of enumeration states before device reset */
enum usb_otg_state {
        OTG_STATE_UNDEFINED = 0,

        /* single-role peripheral, and dual-role default-b */
        OTG_STATE_B_IDLE,
        OTG_STATE_B_SRP_INIT,
        OTG_STATE_B_PERIPHERAL,

        /* extra dual-role default-b states */
        OTG_STATE_B_WAIT_ACON,
        OTG_STATE_B_HOST,

        /* dual-role default-a */
        OTG_STATE_A_IDLE,
        OTG_STATE_A_WAIT_VRISE,
        OTG_STATE_A_WAIT_BCON,
        OTG_STATE_A_HOST,
        OTG_STATE_A_SUSPEND,
        OTG_STATE_A_PERIPHERAL,
        OTG_STATE_A_WAIT_VFALL,
        OTG_STATE_A_VBUS_ERR,
};

struct usb_phy;
struct usb_otg;

/* for phys connected thru an ULPI interface, the user must
 * provide access ops
 */
struct usb_phy_io_ops {
        int (*read)(struct usb_phy *x, u32 reg);
        int (*write)(struct usb_phy *x, u32 val, u32 reg);
};

struct usb_charger_current {
        unsigned int sdp_min;
        unsigned int sdp_max;
        unsigned int dcp_min;
        unsigned int dcp_max;
        unsigned int cdp_min;
        unsigned int cdp_max;
        unsigned int aca_min;
        unsigned int aca_max;
};

struct usb_phy {
        struct device                *dev;
        const char                *label;
        unsigned int                 flags;

        enum usb_phy_type        type;
        enum usb_phy_events        last_event;

        struct usb_otg                *otg;

        struct device                *io_dev;
        struct usb_phy_io_ops        *io_ops;
        void __iomem                *io_priv;

        /* to support extcon device */
        struct extcon_dev        *edev;
        struct extcon_dev        *id_edev;
        struct notifier_block        vbus_nb;
        struct notifier_block        id_nb;
        struct notifier_block        type_nb;

        /* Support USB charger */
        enum usb_charger_type        chg_type;
        enum usb_charger_state        chg_state;
        struct usb_charger_current        chg_cur;
        struct work_struct                chg_work;

        /* for notification of usb_phy_events */
        struct atomic_notifier_head        notifier;

        /* to pass extra port status to the root hub */
        u16                        port_status;
        u16                        port_change;

        /* to support controllers that have multiple phys */
        struct list_head        head;

        /* initialize/shutdown the phy */
        int        (*init)(struct usb_phy *x);
        void        (*shutdown)(struct usb_phy *x);

        /* enable/disable VBUS */
        int        (*set_vbus)(struct usb_phy *x, int on);

        /* effective for B devices, ignored for A-peripheral */
        int        (*set_power)(struct usb_phy *x,
                                unsigned mA);

        /* Set phy into suspend mode */
        int        (*set_suspend)(struct usb_phy *x,
                                int suspend);

        /*
         * Set wakeup enable for PHY, in that case, the PHY can be
         * woken up from suspend status due to external events,
         * like vbus change, dp/dm change and id.
         */
        int        (*set_wakeup)(struct usb_phy *x, bool enabled);

        /* notify phy connect status change */
        int        (*notify_connect)(struct usb_phy *x,
                        enum usb_device_speed speed);
        int        (*notify_disconnect)(struct usb_phy *x,
                        enum usb_device_speed speed);

        /*
         * Charger detection method can be implemented if you need to
         * manually detect the charger type.
         */
        enum usb_charger_type (*charger_detect)(struct usb_phy *x);
};

/* for board-specific init logic */
extern int usb_add_phy(struct usb_phy *, enum usb_phy_type type);
extern int usb_add_phy_dev(struct usb_phy *);
extern void usb_remove_phy(struct usb_phy *);

/* helpers for direct access thru low-level io interface */
static inline int usb_phy_io_read(struct usb_phy *x, u32 reg)
{
        if (x && x->io_ops && x->io_ops->read)
                return x->io_ops->read(x, reg);

        return -EINVAL;
}

static inline int usb_phy_io_write(struct usb_phy *x, u32 val, u32 reg)
{
        if (x && x->io_ops && x->io_ops->write)
                return x->io_ops->write(x, val, reg);

        return -EINVAL;
}

static inline int
usb_phy_init(struct usb_phy *x)
{
        if (x && x->init)
                return x->init(x);

        return 0;
}

static inline void
usb_phy_shutdown(struct usb_phy *x)
{
        if (x && x->shutdown)
                x->shutdown(x);
}

static inline int
usb_phy_vbus_on(struct usb_phy *x)
{
        if (!x || !x->set_vbus)
                return 0;

        return x->set_vbus(x, true);
}

static inline int
usb_phy_vbus_off(struct usb_phy *x)
{
        if (!x || !x->set_vbus)
                return 0;

        return x->set_vbus(x, false);
}

/* for usb host and peripheral controller drivers */
#if IS_ENABLED(CONFIG_USB_PHY)
extern struct usb_phy *usb_get_phy(enum usb_phy_type type);
extern struct usb_phy *devm_usb_get_phy(struct device *dev,
        enum usb_phy_type type);
extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
        const char *phandle, u8 index);
extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev,
        struct device_node *node, struct notifier_block *nb);
extern void usb_put_phy(struct usb_phy *);
extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
extern void usb_phy_set_event(struct usb_phy *x, unsigned long event);
extern void usb_phy_set_charger_current(struct usb_phy *usb_phy,
                                        unsigned int mA);
extern void usb_phy_get_charger_current(struct usb_phy *usb_phy,
                                        unsigned int *min, unsigned int *max);
extern void usb_phy_set_charger_state(struct usb_phy *usb_phy,
                                      enum usb_charger_state state);
#else
static inline struct usb_phy *usb_get_phy(enum usb_phy_type type)
{
        return ERR_PTR(-ENXIO);
}

static inline struct usb_phy *devm_usb_get_phy(struct device *dev,
        enum usb_phy_type type)
{
        return ERR_PTR(-ENXIO);
}

static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
        const char *phandle, u8 index)
{
        return ERR_PTR(-ENXIO);
}

static inline struct usb_phy *devm_usb_get_phy_by_node(struct device *dev,
        struct device_node *node, struct notifier_block *nb)
{
        return ERR_PTR(-ENXIO);
}

static inline void usb_put_phy(struct usb_phy *x)
{
}

static inline void devm_usb_put_phy(struct device *dev, struct usb_phy *x)
{
}

static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event)
{
}

static inline void usb_phy_set_charger_current(struct usb_phy *usb_phy,
                                               unsigned int mA)
{
}

static inline void usb_phy_get_charger_current(struct usb_phy *usb_phy,
                                               unsigned int *min,
                                               unsigned int *max)
{
}

static inline void usb_phy_set_charger_state(struct usb_phy *usb_phy,
                                             enum usb_charger_state state)
{
}
#endif

static inline int
usb_phy_set_power(struct usb_phy *x, unsigned mA)
{
        if (!x)
                return 0;

        usb_phy_set_charger_current(x, mA);

        if (x->set_power)
                return x->set_power(x, mA);
        return 0;
}

/* Context: can sleep */
static inline int
usb_phy_set_suspend(struct usb_phy *x, int suspend)
{
        if (x && x->set_suspend != NULL)
                return x->set_suspend(x, suspend);
        else
                return 0;
}

static inline int
usb_phy_set_wakeup(struct usb_phy *x, bool enabled)
{
        if (x && x->set_wakeup)
                return x->set_wakeup(x, enabled);
        else
                return 0;
}

static inline int
usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed)
{
        if (x && x->notify_connect)
                return x->notify_connect(x, speed);
        else
                return 0;
}

static inline int
usb_phy_notify_disconnect(struct usb_phy *x, enum usb_device_speed speed)
{
        if (x && x->notify_disconnect)
                return x->notify_disconnect(x, speed);
        else
                return 0;
}

/* notifiers */
static inline int
usb_register_notifier(struct usb_phy *x, struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&x->notifier, nb);
}

static inline void
usb_unregister_notifier(struct usb_phy *x, struct notifier_block *nb)
{
        atomic_notifier_chain_unregister(&x->notifier, nb);
}

static inline const char *usb_phy_type_string(enum usb_phy_type type)
{
        switch (type) {
        case USB_PHY_TYPE_USB2:
                return "USB2 PHY";
        case USB_PHY_TYPE_USB3:
                return "USB3 PHY";
        default:
                return "UNKNOWN PHY TYPE";
        }
}
#endif /* __LINUX_USB_PHY_H */

















































































   83 


































































































































































    1 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MNT_IDMAPPING_H
#define _LINUX_MNT_IDMAPPING_H

#include <linux/types.h>
#include <linux/uidgid.h>

struct mnt_idmap;
struct user_namespace;

extern struct mnt_idmap nop_mnt_idmap;
extern struct user_namespace init_user_ns;

typedef struct {
        uid_t val;
} vfsuid_t;

typedef struct {
        gid_t val;
} vfsgid_t;

static_assert(sizeof(vfsuid_t) == sizeof(kuid_t));
static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));

#ifdef CONFIG_MULTIUSER
static inline uid_t __vfsuid_val(vfsuid_t uid)
{
        return uid.val;
}

static inline gid_t __vfsgid_val(vfsgid_t gid)
{
        return gid.val;
}
#else
static inline uid_t __vfsuid_val(vfsuid_t uid)
{
        return 0;
}

static inline gid_t __vfsgid_val(vfsgid_t gid)
{
        return 0;
}
#endif

static inline bool vfsuid_valid(vfsuid_t uid)
{
        return __vfsuid_val(uid) != (uid_t)-1;
}

static inline bool vfsgid_valid(vfsgid_t gid)
{
        return __vfsgid_val(gid) != (gid_t)-1;
}

static inline bool vfsuid_eq(vfsuid_t left, vfsuid_t right)
{
        return vfsuid_valid(left) && __vfsuid_val(left) == __vfsuid_val(right);
}

static inline bool vfsgid_eq(vfsgid_t left, vfsgid_t right)
{
        return vfsgid_valid(left) && __vfsgid_val(left) == __vfsgid_val(right);
}

/**
 * vfsuid_eq_kuid - check whether kuid and vfsuid have the same value
 * @vfsuid: the vfsuid to compare
 * @kuid: the kuid to compare
 *
 * Check whether @vfsuid and @kuid have the same values.
 *
 * Return: true if @vfsuid and @kuid have the same value, false if not.
 * Comparison between two invalid uids returns false.
 */
static inline bool vfsuid_eq_kuid(vfsuid_t vfsuid, kuid_t kuid)
{
        return vfsuid_valid(vfsuid) && __vfsuid_val(vfsuid) == __kuid_val(kuid);
}

/**
 * vfsgid_eq_kgid - check whether kgid and vfsgid have the same value
 * @vfsgid: the vfsgid to compare
 * @kgid: the kgid to compare
 *
 * Check whether @vfsgid and @kgid have the same values.
 *
 * Return: true if @vfsgid and @kgid have the same value, false if not.
 * Comparison between two invalid gids returns false.
 */
static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
{
        return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid);
}

/*
 * vfs{g,u}ids are created from k{g,u}ids.
 * We don't allow them to be created from regular {u,g}id.
 */
#define VFSUIDT_INIT(val) (vfsuid_t){ __kuid_val(val) }
#define VFSGIDT_INIT(val) (vfsgid_t){ __kgid_val(val) }

#define INVALID_VFSUID VFSUIDT_INIT(INVALID_UID)
#define INVALID_VFSGID VFSGIDT_INIT(INVALID_GID)

/*
 * Allow a vfs{g,u}id to be used as a k{g,u}id where we want to compare
 * whether the mapped value is identical to value of a k{g,u}id.
 */
#define AS_KUIDT(val) (kuid_t){ __vfsuid_val(val) }
#define AS_KGIDT(val) (kgid_t){ __vfsgid_val(val) }

int vfsgid_in_group_p(vfsgid_t vfsgid);

struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
void mnt_idmap_put(struct mnt_idmap *idmap);

vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
                     struct user_namespace *fs_userns, kuid_t kuid);

vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
                     struct user_namespace *fs_userns, kgid_t kgid);

kuid_t from_vfsuid(struct mnt_idmap *idmap,
                   struct user_namespace *fs_userns, vfsuid_t vfsuid);

kgid_t from_vfsgid(struct mnt_idmap *idmap,
                   struct user_namespace *fs_userns, vfsgid_t vfsgid);

/**
 * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @vfsuid: vfsuid to be mapped
 *
 * Check whether @vfsuid has a mapping in the filesystem idmapping. Use this
 * function to check whether the filesystem idmapping has a mapping for
 * @vfsuid.
 *
 * Return: true if @vfsuid has a mapping in the filesystem, false if not.
 */
static inline bool vfsuid_has_fsmapping(struct mnt_idmap *idmap,
                                        struct user_namespace *fs_userns,
                                        vfsuid_t vfsuid)
{
        return uid_valid(from_vfsuid(idmap, fs_userns, vfsuid));
}

static inline bool vfsuid_has_mapping(struct user_namespace *userns,
                                      vfsuid_t vfsuid)
{
        return from_kuid(userns, AS_KUIDT(vfsuid)) != (uid_t)-1;
}

/**
 * vfsuid_into_kuid - convert vfsuid into kuid
 * @vfsuid: the vfsuid to convert
 *
 * This can be used when a vfsuid is committed as a kuid.
 *
 * Return: a kuid with the value of @vfsuid
 */
static inline kuid_t vfsuid_into_kuid(vfsuid_t vfsuid)
{
        return AS_KUIDT(vfsuid);
}

/**
 * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 * @vfsgid: vfsgid to be mapped
 *
 * Check whether @vfsgid has a mapping in the filesystem idmapping. Use this
 * function to check whether the filesystem idmapping has a mapping for
 * @vfsgid.
 *
 * Return: true if @vfsgid has a mapping in the filesystem, false if not.
 */
static inline bool vfsgid_has_fsmapping(struct mnt_idmap *idmap,
                                        struct user_namespace *fs_userns,
                                        vfsgid_t vfsgid)
{
        return gid_valid(from_vfsgid(idmap, fs_userns, vfsgid));
}

static inline bool vfsgid_has_mapping(struct user_namespace *userns,
                                      vfsgid_t vfsgid)
{
        return from_kgid(userns, AS_KGIDT(vfsgid)) != (gid_t)-1;
}

/**
 * vfsgid_into_kgid - convert vfsgid into kgid
 * @vfsgid: the vfsgid to convert
 *
 * This can be used when a vfsgid is committed as a kgid.
 *
 * Return: a kgid with the value of @vfsgid
 */
static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
{
        return AS_KGIDT(vfsgid);
}

/**
 * mapped_fsuid - return caller's fsuid mapped according to an idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 *
 * Use this helper to initialize a new vfs or filesystem object based on
 * the caller's fsuid. A common example is initializing the i_uid field of
 * a newly allocated inode triggered by a creation event such as mkdir or
 * O_CREAT. Other examples include the allocation of quotas for a specific
 * user.
 *
 * Return: the caller's current fsuid mapped up according to @idmap.
 */
static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap,
                                  struct user_namespace *fs_userns)
{
        return from_vfsuid(idmap, fs_userns, VFSUIDT_INIT(current_fsuid()));
}

/**
 * mapped_fsgid - return caller's fsgid mapped according to an idmapping
 * @idmap: the mount's idmapping
 * @fs_userns: the filesystem's idmapping
 *
 * Use this helper to initialize a new vfs or filesystem object based on
 * the caller's fsgid. A common example is initializing the i_gid field of
 * a newly allocated inode triggered by a creation event such as mkdir or
 * O_CREAT. Other examples include the allocation of quotas for a specific
 * user.
 *
 * Return: the caller's current fsgid mapped up according to @idmap.
 */
static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
                                  struct user_namespace *fs_userns)
{
        return from_vfsgid(idmap, fs_userns, VFSGIDT_INIT(current_fsgid()));
}

#endif /* _LINUX_MNT_IDMAPPING_H */













































































































































   56 






















































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/readahead.c - address_space-level file readahead.
 *
 * Copyright (C) 2002, Linus Torvalds
 *
 * 09Apr2002        Andrew Morton
 *                Initial version.
 */

/**
 * DOC: Readahead Overview
 *
 * Readahead is used to read content into the page cache before it is
 * explicitly requested by the application.  Readahead only ever
 * attempts to read folios that are not yet in the page cache.  If a
 * folio is present but not up-to-date, readahead will not try to read
 * it. In that case a simple ->read_folio() will be requested.
 *
 * Readahead is triggered when an application read request (whether a
 * system call or a page fault) finds that the requested folio is not in
 * the page cache, or that it is in the page cache and has the
 * readahead flag set.  This flag indicates that the folio was read
 * as part of a previous readahead request and now that it has been
 * accessed, it is time for the next readahead.
 *
 * Each readahead request is partly synchronous read, and partly async
 * readahead.  This is reflected in the struct file_ra_state which
 * contains ->size being the total number of pages, and ->async_size
 * which is the number of pages in the async section.  The readahead
 * flag will be set on the first folio in this async section to trigger
 * a subsequent readahead.  Once a series of sequential reads has been
 * established, there should be no need for a synchronous component and
 * all readahead request will be fully asynchronous.
 *
 * When either of the triggers causes a readahead, three numbers need
 * to be determined: the start of the region to read, the size of the
 * region, and the size of the async tail.
 *
 * The start of the region is simply the first page address at or after
 * the accessed address, which is not currently populated in the page
 * cache.  This is found with a simple search in the page cache.
 *
 * The size of the async tail is determined by subtracting the size that
 * was explicitly requested from the determined request size, unless
 * this would be less than zero - then zero is used.  NOTE THIS
 * CALCULATION IS WRONG WHEN THE START OF THE REGION IS NOT THE ACCESSED
 * PAGE.  ALSO THIS CALCULATION IS NOT USED CONSISTENTLY.
 *
 * The size of the region is normally determined from the size of the
 * previous readahead which loaded the preceding pages.  This may be
 * discovered from the struct file_ra_state for simple sequential reads,
 * or from examining the state of the page cache when multiple
 * sequential reads are interleaved.  Specifically: where the readahead
 * was triggered by the readahead flag, the size of the previous
 * readahead is assumed to be the number of pages from the triggering
 * page to the start of the new readahead.  In these cases, the size of
 * the previous readahead is scaled, often doubled, for the new
 * readahead, though see get_next_ra_size() for details.
 *
 * If the size of the previous read cannot be determined, the number of
 * preceding pages in the page cache is used to estimate the size of
 * a previous read.  This estimate could easily be misled by random
 * reads being coincidentally adjacent, so it is ignored unless it is
 * larger than the current request, and it is not scaled up, unless it
 * is at the start of file.
 *
 * In general readahead is accelerated at the start of the file, as
 * reads from there are often sequential.  There are other minor
 * adjustments to the readahead size in various special cases and these
 * are best discovered by reading the code.
 *
 * The above calculation, based on the previous readahead size,
 * determines the size of the readahead, to which any requested read
 * size may be added.
 *
 * Readahead requests are sent to the filesystem using the ->readahead()
 * address space operation, for which mpage_readahead() is a canonical
 * implementation.  ->readahead() should normally initiate reads on all
 * folios, but may fail to read any or all folios without causing an I/O
 * error.  The page cache reading code will issue a ->read_folio() request
 * for any folio which ->readahead() did not read, and only an error
 * from this will be final.
 *
 * ->readahead() will generally call readahead_folio() repeatedly to get
 * each folio from those prepared for readahead.  It may fail to read a
 * folio by:
 *
 * * not calling readahead_folio() sufficiently many times, effectively
 *   ignoring some folios, as might be appropriate if the path to
 *   storage is congested.
 *
 * * failing to actually submit a read request for a given folio,
 *   possibly due to insufficient resources, or
 *
 * * getting an error during subsequent processing of a request.
 *
 * In the last two cases, the folio should be unlocked by the filesystem
 * to indicate that the read attempt has failed.  In the first case the
 * folio will be unlocked by the VFS.
 *
 * Those folios not in the final ``async_size`` of the request should be
 * considered to be important and ->readahead() should not fail them due
 * to congestion or temporary resource unavailability, but should wait
 * for necessary resources (e.g.  memory or indexing information) to
 * become available.  Folios in the final ``async_size`` may be
 * considered less urgent and failure to read them is more acceptable.
 * In this case it is best to use filemap_remove_folio() to remove the
 * folios from the page cache as is automatically done for folios that
 * were not fetched with readahead_folio().  This will allow a
 * subsequent synchronous readahead request to try them again.  If they
 * are left in the page cache, then they will be read individually using
 * ->read_folio() which may be less efficient.
 */

#include <linux/blkdev.h>
#include <linux/kernel.h>
#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/export.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/pagemap.h>
#include <linux/psi.h>
#include <linux/syscalls.h>
#include <linux/file.h>
#include <linux/mm_inline.h>
#include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include <linux/sched/mm.h>

#include "internal.h"

/*
 * Initialise a struct file's readahead state.  Assumes that the caller has
 * memset *ra to zero.
 */
void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
{
        ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
        ra->prev_pos = -1;
}
EXPORT_SYMBOL_GPL(file_ra_state_init);

static void read_pages(struct readahead_control *rac)
{
        const struct address_space_operations *aops = rac->mapping->a_ops;
        struct folio *folio;
        struct blk_plug plug;

        if (!readahead_count(rac))
                return;

        if (unlikely(rac->_workingset))
                psi_memstall_enter(&rac->_pflags);
        blk_start_plug(&plug);

        if (aops->readahead) {
                aops->readahead(rac);
                /*
                 * Clean up the remaining folios.  The sizes in ->ra
                 * may be used to size the next readahead, so make sure
                 * they accurately reflect what happened.
                 */
                while ((folio = readahead_folio(rac)) != NULL) {
                        unsigned long nr = folio_nr_pages(folio);

                        folio_get(folio);
                        rac->ra->size -= nr;
                        if (rac->ra->async_size >= nr) {
                                rac->ra->async_size -= nr;
                                filemap_remove_folio(folio);
                        }
                        folio_unlock(folio);
                        folio_put(folio);
                }
        } else {
                while ((folio = readahead_folio(rac)) != NULL)
                        aops->read_folio(rac->file, folio);
        }

        blk_finish_plug(&plug);
        if (unlikely(rac->_workingset))
                psi_memstall_leave(&rac->_pflags);
        rac->_workingset = false;

        BUG_ON(readahead_count(rac));
}

/**
 * page_cache_ra_unbounded - Start unchecked readahead.
 * @ractl: Readahead control.
 * @nr_to_read: The number of pages to read.
 * @lookahead_size: Where to start the next readahead.
 *
 * This function is for filesystems to call when they want to start
 * readahead beyond a file's stated i_size.  This is almost certainly
 * not the function you want to call.  Use page_cache_async_readahead()
 * or page_cache_sync_readahead() instead.
 *
 * Context: File is referenced by caller.  Mutexes may be held by caller.
 * May sleep, but will not reenter filesystem to reclaim memory.
 */
void page_cache_ra_unbounded(struct readahead_control *ractl,
                unsigned long nr_to_read, unsigned long lookahead_size)
{
        struct address_space *mapping = ractl->mapping;
        unsigned long index = readahead_index(ractl);
        gfp_t gfp_mask = readahead_gfp_mask(mapping);
        unsigned long i;

        /*
         * Partway through the readahead operation, we will have added
         * locked pages to the page cache, but will not yet have submitted
         * them for I/O.  Adding another page may need to allocate memory,
         * which can trigger memory reclaim.  Telling the VM we're in
         * the middle of a filesystem operation will cause it to not
         * touch file-backed pages, preventing a deadlock.  Most (all?)
         * filesystems already specify __GFP_NOFS in their mapping's
         * gfp_mask, but let's be explicit here.
         */
        unsigned int nofs = memalloc_nofs_save();

        filemap_invalidate_lock_shared(mapping);
        /*
         * Preallocate as many pages as we will need.
         */
        for (i = 0; i < nr_to_read; i++) {
                struct folio *folio = xa_load(&mapping->i_pages, index + i);

                if (folio && !xa_is_value(folio)) {
                        /*
                         * Page already present?  Kick off the current batch
                         * of contiguous pages before continuing with the
                         * next batch.  This page may be the one we would
                         * have intended to mark as Readahead, but we don't
                         * have a stable reference to this page, and it's
                         * not worth getting one just for that.
                         */
                        read_pages(ractl);
                        ractl->_index++;
                        i = ractl->_index + ractl->_nr_pages - index - 1;
                        continue;
                }

                folio = filemap_alloc_folio(gfp_mask, 0);
                if (!folio)
                        break;
                if (filemap_add_folio(mapping, folio, index + i,
                                        gfp_mask) < 0) {
                        folio_put(folio);
                        read_pages(ractl);
                        ractl->_index++;
                        i = ractl->_index + ractl->_nr_pages - index - 1;
                        continue;
                }
                if (i == nr_to_read - lookahead_size)
                        folio_set_readahead(folio);
                ractl->_workingset |= folio_test_workingset(folio);
                ractl->_nr_pages++;
        }

        /*
         * Now start the IO.  We ignore I/O errors - if the folio is not
         * uptodate then the caller will launch read_folio again, and
         * will then handle the error.
         */
        read_pages(ractl);
        filemap_invalidate_unlock_shared(mapping);
        memalloc_nofs_restore(nofs);
}
EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);

/*
 * do_page_cache_ra() actually reads a chunk of disk.  It allocates
 * the pages first, then submits them for I/O. This avoids the very bad
 * behaviour which would occur if page allocations are causing VM writeback.
 * We really don't want to intermingle reads and writes like that.
 */
static void do_page_cache_ra(struct readahead_control *ractl,
                unsigned long nr_to_read, unsigned long lookahead_size)
{
        struct inode *inode = ractl->mapping->host;
        unsigned long index = readahead_index(ractl);
        loff_t isize = i_size_read(inode);
        pgoff_t end_index;        /* The last page we want to read */

        if (isize == 0)
                return;

        end_index = (isize - 1) >> PAGE_SHIFT;
        if (index > end_index)
                return;
        /* Don't read past the page containing the last byte of the file */
        if (nr_to_read > end_index - index)
                nr_to_read = end_index - index + 1;

        page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
}

/*
 * Chunk the readahead into 2 megabyte units, so that we don't pin too much
 * memory at once.
 */
void force_page_cache_ra(struct readahead_control *ractl,
                unsigned long nr_to_read)
{
        struct address_space *mapping = ractl->mapping;
        struct file_ra_state *ra = ractl->ra;
        struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
        unsigned long max_pages, index;

        if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
                return;

        /*
         * If the request exceeds the readahead window, allow the read to
         * be up to the optimal hardware IO size
         */
        index = readahead_index(ractl);
        max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
        nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
        while (nr_to_read) {
                unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;

                if (this_chunk > nr_to_read)
                        this_chunk = nr_to_read;
                ractl->_index = index;
                do_page_cache_ra(ractl, this_chunk, 0);

                index += this_chunk;
                nr_to_read -= this_chunk;
        }
}

/*
 * Set the initial window size, round to next power of 2 and square
 * for small size, x 4 for medium, and x 2 for large
 * for 128k (32 page) max ra
 * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
 */
static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
{
        unsigned long newsize = roundup_pow_of_two(size);

        if (newsize <= max / 32)
                newsize = newsize * 4;
        else if (newsize <= max / 4)
                newsize = newsize * 2;
        else
                newsize = max;

        return newsize;
}

/*
 *  Get the previous window size, ramp it up, and
 *  return it as the new window size.
 */
static unsigned long get_next_ra_size(struct file_ra_state *ra,
                                      unsigned long max)
{
        unsigned long cur = ra->size;

        if (cur < max / 16)
                return 4 * cur;
        if (cur <= max / 2)
                return 2 * cur;
        return max;
}

/*
 * On-demand readahead design.
 *
 * The fields in struct file_ra_state represent the most-recently-executed
 * readahead attempt:
 *
 *                        |<----- async_size ---------|
 *     |------------------- size -------------------->|
 *     |==================#===========================|
 *     ^start             ^page marked with PG_readahead
 *
 * To overlap application thinking time and disk I/O time, we do
 * `readahead pipelining': Do not wait until the application consumed all
 * readahead pages and stalled on the missing page at readahead_index;
 * Instead, submit an asynchronous readahead I/O as soon as there are
 * only async_size pages left in the readahead window. Normally async_size
 * will be equal to size, for maximum pipelining.
 *
 * In interleaved sequential reads, concurrent streams on the same fd can
 * be invalidating each other's readahead state. So we flag the new readahead
 * page at (start+size-async_size) with PG_readahead, and use it as readahead
 * indicator. The flag won't be set on already cached pages, to avoid the
 * readahead-for-nothing fuss, saving pointless page cache lookups.
 *
 * prev_pos tracks the last visited byte in the _previous_ read request.
 * It should be maintained by the caller, and will be used for detecting
 * small random reads. Note that the readahead algorithm checks loosely
 * for sequential patterns. Hence interleaved reads might be served as
 * sequential ones.
 *
 * There is a special-case: if the first page which the application tries to
 * read happens to be the first page of the file, it is assumed that a linear
 * read is about to happen and the window is immediately set to the initial size
 * based on I/O request size and the max_readahead.
 *
 * The code ramps up the readahead size aggressively at first, but slow down as
 * it approaches max_readhead.
 */

/*
 * Count contiguously cached pages from @index-1 to @index-@max,
 * this count is a conservative estimation of
 *         - length of the sequential read sequence, or
 *         - thrashing threshold in memory tight systems
 */
static pgoff_t count_history_pages(struct address_space *mapping,
                                   pgoff_t index, unsigned long max)
{
        pgoff_t head;

        rcu_read_lock();
        head = page_cache_prev_miss(mapping, index - 1, max);
        rcu_read_unlock();

        return index - 1 - head;
}

/*
 * page cache context based readahead
 */
static int try_context_readahead(struct address_space *mapping,
                                 struct file_ra_state *ra,
                                 pgoff_t index,
                                 unsigned long req_size,
                                 unsigned long max)
{
        pgoff_t size;

        size = count_history_pages(mapping, index, max);

        /*
         * not enough history pages:
         * it could be a random read
         */
        if (size <= req_size)
                return 0;

        /*
         * starts from beginning of file:
         * it is a strong indication of long-run stream (or whole-file-read)
         */
        if (size >= index)
                size *= 2;

        ra->start = index;
        ra->size = min(size + req_size, max);
        ra->async_size = 1;

        return 1;
}

static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
                pgoff_t mark, unsigned int order, gfp_t gfp)
{
        int err;
        struct folio *folio = filemap_alloc_folio(gfp, order);

        if (!folio)
                return -ENOMEM;
        mark = round_down(mark, 1UL << order);
        if (index == mark)
                folio_set_readahead(folio);
        err = filemap_add_folio(ractl->mapping, folio, index, gfp);
        if (err) {
                folio_put(folio);
                return err;
        }

        ractl->_nr_pages += 1UL << order;
        ractl->_workingset |= folio_test_workingset(folio);
        return 0;
}

void page_cache_ra_order(struct readahead_control *ractl,
                struct file_ra_state *ra, unsigned int new_order)
{
        struct address_space *mapping = ractl->mapping;
        pgoff_t index = readahead_index(ractl);
        pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
        pgoff_t mark = index + ra->size - ra->async_size;
        int err = 0;
        gfp_t gfp = readahead_gfp_mask(mapping);

        if (!mapping_large_folio_support(mapping) || ra->size < 4)
                goto fallback;

        limit = min(limit, index + ra->size - 1);

        if (new_order < MAX_PAGECACHE_ORDER) {
                new_order += 2;
                new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
                new_order = min_t(unsigned int, new_order, ilog2(ra->size));
        }

        filemap_invalidate_lock_shared(mapping);
        while (index <= limit) {
                unsigned int order = new_order;

                /* Align with smaller pages if needed */
                if (index & ((1UL << order) - 1))
                        order = __ffs(index);
                /* Don't allocate pages past EOF */
                while (index + (1UL << order) - 1 > limit)
                        order--;
                err = ra_alloc_folio(ractl, index, mark, order, gfp);
                if (err)
                        break;
                index += 1UL << order;
        }

        if (index > limit) {
                ra->size += index - limit - 1;
                ra->async_size += index - limit - 1;
        }

        read_pages(ractl);
        filemap_invalidate_unlock_shared(mapping);

        /*
         * If there were already pages in the page cache, then we may have
         * left some gaps.  Let the regular readahead code take care of this
         * situation.
         */
        if (!err)
                return;
fallback:
        do_page_cache_ra(ractl, ra->size, ra->async_size);
}

/*
 * A minimal readahead algorithm for trivial sequential/random reads.
 */
static void ondemand_readahead(struct readahead_control *ractl,
                struct folio *folio, unsigned long req_size)
{
        struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
        struct file_ra_state *ra = ractl->ra;
        unsigned long max_pages = ra->ra_pages;
        unsigned long add_pages;
        pgoff_t index = readahead_index(ractl);
        pgoff_t expected, prev_index;
        unsigned int order = folio ? folio_order(folio) : 0;

        /*
         * If the request exceeds the readahead window, allow the read to
         * be up to the optimal hardware IO size
         */
        if (req_size > max_pages && bdi->io_pages > max_pages)
                max_pages = min(req_size, bdi->io_pages);

        /*
         * start of file
         */
        if (!index)
                goto initial_readahead;

        /*
         * It's the expected callback index, assume sequential access.
         * Ramp up sizes, and push forward the readahead window.
         */
        expected = round_down(ra->start + ra->size - ra->async_size,
                        1UL << order);
        if (index == expected || index == (ra->start + ra->size)) {
                ra->start += ra->size;
                ra->size = get_next_ra_size(ra, max_pages);
                ra->async_size = ra->size;
                goto readit;
        }

        /*
         * Hit a marked folio without valid readahead state.
         * E.g. interleaved reads.
         * Query the pagecache for async_size, which normally equals to
         * readahead size. Ramp it up and use it as the new readahead size.
         */
        if (folio) {
                pgoff_t start;

                rcu_read_lock();
                start = page_cache_next_miss(ractl->mapping, index + 1,
                                max_pages);
                rcu_read_unlock();

                if (!start || start - index > max_pages)
                        return;

                ra->start = start;
                ra->size = start - index;        /* old async_size */
                ra->size += req_size;
                ra->size = get_next_ra_size(ra, max_pages);
                ra->async_size = ra->size;
                goto readit;
        }

        /*
         * oversize read
         */
        if (req_size > max_pages)
                goto initial_readahead;

        /*
         * sequential cache miss
         * trivial case: (index - prev_index) == 1
         * unaligned reads: (index - prev_index) == 0
         */
        prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
        if (index - prev_index <= 1UL)
                goto initial_readahead;

        /*
         * Query the page cache and look for the traces(cached history pages)
         * that a sequential stream would leave behind.
         */
        if (try_context_readahead(ractl->mapping, ra, index, req_size,
                        max_pages))
                goto readit;

        /*
         * standalone, small random read
         * Read as is, and do not pollute the readahead state.
         */
        do_page_cache_ra(ractl, req_size, 0);
        return;

initial_readahead:
        ra->start = index;
        ra->size = get_init_ra_size(req_size, max_pages);
        ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;

readit:
        /*
         * Will this read hit the readahead marker made by itself?
         * If so, trigger the readahead marker hit now, and merge
         * the resulted next readahead window into the current one.
         * Take care of maximum IO pages as above.
         */
        if (index == ra->start && ra->size == ra->async_size) {
                add_pages = get_next_ra_size(ra, max_pages);
                if (ra->size + add_pages <= max_pages) {
                        ra->async_size = add_pages;
                        ra->size += add_pages;
                } else {
                        ra->size = max_pages;
                        ra->async_size = max_pages >> 1;
                }
        }

        ractl->_index = ra->start;
        page_cache_ra_order(ractl, ra, order);
}

void page_cache_sync_ra(struct readahead_control *ractl,
                unsigned long req_count)
{
        bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);

        /*
         * Even if readahead is disabled, issue this request as readahead
         * as we'll need it to satisfy the requested range. The forced
         * readahead will do the right thing and limit the read to just the
         * requested range, which we'll set to 1 page for this case.
         */
        if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
                if (!ractl->file)
                        return;
                req_count = 1;
                do_forced_ra = true;
        }

        /* be dumb */
        if (do_forced_ra) {
                force_page_cache_ra(ractl, req_count);
                return;
        }

        ondemand_readahead(ractl, NULL, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_sync_ra);

void page_cache_async_ra(struct readahead_control *ractl,
                struct folio *folio, unsigned long req_count)
{
        /* no readahead */
        if (!ractl->ra->ra_pages)
                return;

        /*
         * Same bit is used for PG_readahead and PG_reclaim.
         */
        if (folio_test_writeback(folio))
                return;

        folio_clear_readahead(folio);

        if (blk_cgroup_congested())
                return;

        ondemand_readahead(ractl, folio, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_async_ra);

ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
        ssize_t ret;
        struct fd f;

        ret = -EBADF;
        f = fdget(fd);
        if (!f.file || !(f.file->f_mode & FMODE_READ))
                goto out;

        /*
         * The readahead() syscall is intended to run only on files
         * that can execute readahead. If readahead is not possible
         * on this file, then we must return -EINVAL.
         */
        ret = -EINVAL;
        if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
            (!S_ISREG(file_inode(f.file)->i_mode) &&
            !S_ISBLK(file_inode(f.file)->i_mode)))
                goto out;

        ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
out:
        fdput(f);
        return ret;
}

SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
{
        return ksys_readahead(fd, offset, count);
}

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_READAHEAD)
COMPAT_SYSCALL_DEFINE4(readahead, int, fd, compat_arg_u64_dual(offset), size_t, count)
{
        return ksys_readahead(fd, compat_arg_u64_glue(offset), count);
}
#endif

/**
 * readahead_expand - Expand a readahead request
 * @ractl: The request to be expanded
 * @new_start: The revised start
 * @new_len: The revised size of the request
 *
 * Attempt to expand a readahead request outwards from the current size to the
 * specified size by inserting locked pages before and after the current window
 * to increase the size to the new window.  This may involve the insertion of
 * THPs, in which case the window may get expanded even beyond what was
 * requested.
 *
 * The algorithm will stop if it encounters a conflicting page already in the
 * pagecache and leave a smaller expansion than requested.
 *
 * The caller must check for this by examining the revised @ractl object for a
 * different expansion than was requested.
 */
void readahead_expand(struct readahead_control *ractl,
                      loff_t new_start, size_t new_len)
{
        struct address_space *mapping = ractl->mapping;
        struct file_ra_state *ra = ractl->ra;
        pgoff_t new_index, new_nr_pages;
        gfp_t gfp_mask = readahead_gfp_mask(mapping);

        new_index = new_start / PAGE_SIZE;

        /* Expand the leading edge downwards */
        while (ractl->_index > new_index) {
                unsigned long index = ractl->_index - 1;
                struct folio *folio = xa_load(&mapping->i_pages, index);

                if (folio && !xa_is_value(folio))
                        return; /* Folio apparently present */

                folio = filemap_alloc_folio(gfp_mask, 0);
                if (!folio)
                        return;
                if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
                        folio_put(folio);
                        return;
                }
                if (unlikely(folio_test_workingset(folio)) &&
                                !ractl->_workingset) {
                        ractl->_workingset = true;
                        psi_memstall_enter(&ractl->_pflags);
                }
                ractl->_nr_pages++;
                ractl->_index = folio->index;
        }

        new_len += new_start - readahead_pos(ractl);
        new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);

        /* Expand the trailing edge upwards */
        while (ractl->_nr_pages < new_nr_pages) {
                unsigned long index = ractl->_index + ractl->_nr_pages;
                struct folio *folio = xa_load(&mapping->i_pages, index);

                if (folio && !xa_is_value(folio))
                        return; /* Folio apparently present */

                folio = filemap_alloc_folio(gfp_mask, 0);
                if (!folio)
                        return;
                if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
                        folio_put(folio);
                        return;
                }
                if (unlikely(folio_test_workingset(folio)) &&
                                !ractl->_workingset) {
                        ractl->_workingset = true;
                        psi_memstall_enter(&ractl->_pflags);
                }
                ractl->_nr_pages++;
                if (ra) {
                        ra->size++;
                        ra->async_size++;
                }
        }
}
EXPORT_SYMBOL(readahead_expand);
























































   78 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// SPDX-License-Identifier: GPL-2.0
/*
 * All the USB notify logic
 *
 * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de>
 *
 * notifier functions originally based on those in kernel/sys.c
 * but fixed up to not be so broken.
 *
 * Released under the GPLv2 only.
 */


#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/usb.h>
#include <linux/mutex.h>
#include "usb.h"

static BLOCKING_NOTIFIER_HEAD(usb_notifier_list);

/**
 * usb_register_notify - register a notifier callback whenever a usb change happens
 * @nb: pointer to the notifier block for the callback events.
 *
 * These changes are either USB devices or busses being added or removed.
 */
void usb_register_notify(struct notifier_block *nb)
{
        blocking_notifier_chain_register(&usb_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(usb_register_notify);

/**
 * usb_unregister_notify - unregister a notifier callback
 * @nb: pointer to the notifier block for the callback events.
 *
 * usb_register_notify() must have been previously called for this function
 * to work properly.
 */
void usb_unregister_notify(struct notifier_block *nb)
{
        blocking_notifier_chain_unregister(&usb_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(usb_unregister_notify);


void usb_notify_add_device(struct usb_device *udev)
{
        blocking_notifier_call_chain(&usb_notifier_list, USB_DEVICE_ADD, udev);
}

void usb_notify_remove_device(struct usb_device *udev)
{
        blocking_notifier_call_chain(&usb_notifier_list,
                        USB_DEVICE_REMOVE, udev);
}

void usb_notify_add_bus(struct usb_bus *ubus)
{
        blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_ADD, ubus);
}

void usb_notify_remove_bus(struct usb_bus *ubus)
{
        blocking_notifier_call_chain(&usb_notifier_list, USB_BUS_REMOVE, ubus);
}






































































































































































































































































































































  339 
























  343 




   14 




   61 














  270 














  343 
   61 



   14 




  247 




  329 




    4 
































    5 




















































































































   19 





    2 






    2 







    2 







   19 








    2 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SPINLOCK_H
#define __LINUX_SPINLOCK_H
#define __LINUX_INSIDE_SPINLOCK_H

/*
 * include/linux/spinlock.h - generic spinlock/rwlock declarations
 *
 * here's the role of the various spinlock/rwlock related include files:
 *
 * on SMP builds:
 *
 *  asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the
 *                        initializers
 *
 *  linux/spinlock_types_raw:
 *                          The raw types and initializers
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  asm/spinlock.h:       contains the arch_spin_*()/etc. lowlevel
 *                        implementations, mostly inline assembly code
 *
 *   (also included on UP-debug builds:)
 *
 *  linux/spinlock_api_smp.h:
 *                        contains the prototypes for the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 *
 * on UP builds:
 *
 *  linux/spinlock_type_up.h:
 *                        contains the generic, simplified UP spinlock type.
 *                        (which is an empty structure on non-debug builds)
 *
 *  linux/spinlock_types_raw:
 *                          The raw RT types and initializers
 *  linux/spinlock_types.h:
 *                        defines the generic type and initializers
 *
 *  linux/spinlock_up.h:
 *                        contains the arch_spin_*()/etc. version of UP
 *                        builds. (which are NOPs on non-debug, non-preempt
 *                        builds)
 *
 *   (included on UP-non-debug builds:)
 *
 *  linux/spinlock_api_up.h:
 *                        builds the _spin_*() APIs.
 *
 *  linux/spinlock.h:     builds the final spin_*() APIs.
 */

#include <linux/typecheck.h>
#include <linux/preempt.h>
#include <linux/linkage.h>
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/thread_info.h>
#include <linux/stringify.h>
#include <linux/bottom_half.h>
#include <linux/lockdep.h>
#include <linux/cleanup.h>
#include <asm/barrier.h>
#include <asm/mmiowb.h>


/*
 * Must define these before including other files, inline functions need them
 */
#define LOCK_SECTION_NAME ".text..lock."KBUILD_BASENAME

#define LOCK_SECTION_START(extra)               \
        ".subsection 1\n\t"                     \
        extra                                   \
        ".ifndef " LOCK_SECTION_NAME "\n\t"     \
        LOCK_SECTION_NAME ":\n\t"               \
        ".endif\n"

#define LOCK_SECTION_END                        \
        ".previous\n\t"

#define __lockfunc __section(".spinlock.text")

/*
 * Pull the arch_spinlock_t and arch_rwlock_t definitions:
 */
#include <linux/spinlock_types.h>

/*
 * Pull the arch_spin*() functions/declarations (UP-nondebug doesn't need them):
 */
#ifdef CONFIG_SMP
# include <asm/spinlock.h>
#else
# include <linux/spinlock_up.h>
#endif

#ifdef CONFIG_DEBUG_SPINLOCK
  extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
                                   struct lock_class_key *key, short inner);

# define raw_spin_lock_init(lock)                                        \
do {                                                                        \
        static struct lock_class_key __key;                                \
                                                                        \
        __raw_spin_lock_init((lock), #lock, &__key, LD_WAIT_SPIN);        \
} while (0)

#else
# define raw_spin_lock_init(lock)                                \
        do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
#endif

#define raw_spin_is_locked(lock)        arch_spin_is_locked(&(lock)->raw_lock)

#ifdef arch_spin_is_contended
#define raw_spin_is_contended(lock)        arch_spin_is_contended(&(lock)->raw_lock)
#else
#define raw_spin_is_contended(lock)        (((void)(lock), 0))
#endif /*arch_spin_is_contended*/

/*
 * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
 * between program-order earlier lock acquisitions and program-order later
 * memory accesses.
 *
 * This guarantees that the following two properties hold:
 *
 *   1) Given the snippet:
 *
 *          { X = 0;  Y = 0; }
 *
 *          CPU0                                CPU1
 *
 *          WRITE_ONCE(X, 1);                WRITE_ONCE(Y, 1);
 *          spin_lock(S);                        smp_mb();
 *          smp_mb__after_spinlock();        r1 = READ_ONCE(X);
 *          r0 = READ_ONCE(Y);
 *          spin_unlock(S);
 *
 *      it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
 *      and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
 *      preceding the call to smp_mb__after_spinlock() in __schedule() and in
 *      try_to_wake_up().
 *
 *   2) Given the snippet:
 *
 *  { X = 0;  Y = 0; }
 *
 *  CPU0                CPU1                                CPU2
 *
 *  spin_lock(S);        spin_lock(S);                        r1 = READ_ONCE(Y);
 *  WRITE_ONCE(X, 1);        smp_mb__after_spinlock();        smp_rmb();
 *  spin_unlock(S);        r0 = READ_ONCE(X);                r2 = READ_ONCE(X);
 *                        WRITE_ONCE(Y, 1);
 *                        spin_unlock(S);
 *
 *      it is forbidden that CPU0's critical section executes before CPU1's
 *      critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
 *      and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
 *      preceding the calls to smp_rmb() in try_to_wake_up() for similar
 *      snippets but "projected" onto two CPUs.
 *
 * Property (2) upgrades the lock to an RCsc lock.
 *
 * Since most load-store architectures implement ACQUIRE with an smp_mb() after
 * the LL/SC loop, they need no further barriers. Similarly all our TSO
 * architectures imply an smp_mb() for each atomic instruction and equally don't
 * need more.
 *
 * Architectures that can implement ACQUIRE better need to take care.
 */
#ifndef smp_mb__after_spinlock
#define smp_mb__after_spinlock()        kcsan_mb()
#endif

#ifdef CONFIG_DEBUG_SPINLOCK
 extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
 extern int do_raw_spin_trylock(raw_spinlock_t *lock);
 extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
#else
static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
{
        __acquire(lock);
        arch_spin_lock(&lock->raw_lock);
        mmiowb_spin_lock();
}

static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
{
        int ret = arch_spin_trylock(&(lock)->raw_lock);

        if (ret)
                mmiowb_spin_lock();

        return ret;
}

static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
{
        mmiowb_spin_unlock();
        arch_spin_unlock(&lock->raw_lock);
        __release(lock);
}
#endif

/*
 * Define the various spin_lock methods.  Note we define these
 * regardless of whether CONFIG_SMP or CONFIG_PREEMPTION are set. The
 * various methods are defined as nops in the case they are not
 * required.
 */
#define raw_spin_trylock(lock)        __cond_lock(lock, _raw_spin_trylock(lock))

#define raw_spin_lock(lock)        _raw_spin_lock(lock)

#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define raw_spin_lock_nested(lock, subclass) \
        _raw_spin_lock_nested(lock, subclass)

# define raw_spin_lock_nest_lock(lock, nest_lock)                        \
         do {                                                                \
                 typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\
                 _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);        \
         } while (0)
#else
/*
 * Always evaluate the 'subclass' argument to avoid that the compiler
 * warns about set-but-not-used variables when building with
 * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1.
 */
# define raw_spin_lock_nested(lock, subclass)                \
        _raw_spin_lock(((void)(subclass), (lock)))
# define raw_spin_lock_nest_lock(lock, nest_lock)        _raw_spin_lock(lock)
#endif

#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)

#define raw_spin_lock_irqsave(lock, flags)                        \
        do {                                                \
                typecheck(unsigned long, flags);        \
                flags = _raw_spin_lock_irqsave(lock);        \
        } while (0)

#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define raw_spin_lock_irqsave_nested(lock, flags, subclass)                \
        do {                                                                \
                typecheck(unsigned long, flags);                        \
                flags = _raw_spin_lock_irqsave_nested(lock, subclass);        \
        } while (0)
#else
#define raw_spin_lock_irqsave_nested(lock, flags, subclass)                \
        do {                                                                \
                typecheck(unsigned long, flags);                        \
                flags = _raw_spin_lock_irqsave(lock);                        \
        } while (0)
#endif

#else

#define raw_spin_lock_irqsave(lock, flags)                \
        do {                                                \
                typecheck(unsigned long, flags);        \
                _raw_spin_lock_irqsave(lock, flags);        \
        } while (0)

#define raw_spin_lock_irqsave_nested(lock, flags, subclass)        \
        raw_spin_lock_irqsave(lock, flags)

#endif

#define raw_spin_lock_irq(lock)                _raw_spin_lock_irq(lock)
#define raw_spin_lock_bh(lock)                _raw_spin_lock_bh(lock)
#define raw_spin_unlock(lock)                _raw_spin_unlock(lock)
#define raw_spin_unlock_irq(lock)        _raw_spin_unlock_irq(lock)

#define raw_spin_unlock_irqrestore(lock, flags)                \
        do {                                                        \
                typecheck(unsigned long, flags);                \
                _raw_spin_unlock_irqrestore(lock, flags);        \
        } while (0)
#define raw_spin_unlock_bh(lock)        _raw_spin_unlock_bh(lock)

#define raw_spin_trylock_bh(lock) \
        __cond_lock(lock, _raw_spin_trylock_bh(lock))

#define raw_spin_trylock_irq(lock) \
({ \
        local_irq_disable(); \
        raw_spin_trylock(lock) ? \
        1 : ({ local_irq_enable(); 0;  }); \
})

#define raw_spin_trylock_irqsave(lock, flags) \
({ \
        local_irq_save(flags); \
        raw_spin_trylock(lock) ? \
        1 : ({ local_irq_restore(flags); 0; }); \
})

#ifndef CONFIG_PREEMPT_RT
/* Include rwlock functions for !RT */
#include <linux/rwlock.h>
#endif

/*
 * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
 */
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
# include <linux/spinlock_api_smp.h>
#else
# include <linux/spinlock_api_up.h>
#endif

/* Non PREEMPT_RT kernel, map to raw spinlocks: */
#ifndef CONFIG_PREEMPT_RT

/*
 * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
 */

static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
{
        return &lock->rlock;
}

#ifdef CONFIG_DEBUG_SPINLOCK

# define spin_lock_init(lock)                                        \
do {                                                                \
        static struct lock_class_key __key;                        \
                                                                \
        __raw_spin_lock_init(spinlock_check(lock),                \
                             #lock, &__key, LD_WAIT_CONFIG);        \
} while (0)

#else

# define spin_lock_init(_lock)                        \
do {                                                \
        spinlock_check(_lock);                        \
        *(_lock) = __SPIN_LOCK_UNLOCKED(_lock);        \
} while (0)

#endif

static __always_inline void spin_lock(spinlock_t *lock)
{
        raw_spin_lock(&lock->rlock);
}

static __always_inline void spin_lock_bh(spinlock_t *lock)
{
        raw_spin_lock_bh(&lock->rlock);
}

static __always_inline int spin_trylock(spinlock_t *lock)
{
        return raw_spin_trylock(&lock->rlock);
}

#define spin_lock_nested(lock, subclass)                        \
do {                                                                \
        raw_spin_lock_nested(spinlock_check(lock), subclass);        \
} while (0)

#define spin_lock_nest_lock(lock, nest_lock)                                \
do {                                                                        \
        raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);        \
} while (0)

static __always_inline void spin_lock_irq(spinlock_t *lock)
{
        raw_spin_lock_irq(&lock->rlock);
}

#define spin_lock_irqsave(lock, flags)                                \
do {                                                                \
        raw_spin_lock_irqsave(spinlock_check(lock), flags);        \
} while (0)

#define spin_lock_irqsave_nested(lock, flags, subclass)                        \
do {                                                                        \
        raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
} while (0)

static __always_inline void spin_unlock(spinlock_t *lock)
{
        raw_spin_unlock(&lock->rlock);
}

static __always_inline void spin_unlock_bh(spinlock_t *lock)
{
        raw_spin_unlock_bh(&lock->rlock);
}

static __always_inline void spin_unlock_irq(spinlock_t *lock)
{
        raw_spin_unlock_irq(&lock->rlock);
}

static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
        raw_spin_unlock_irqrestore(&lock->rlock, flags);
}

static __always_inline int spin_trylock_bh(spinlock_t *lock)
{
        return raw_spin_trylock_bh(&lock->rlock);
}

static __always_inline int spin_trylock_irq(spinlock_t *lock)
{
        return raw_spin_trylock_irq(&lock->rlock);
}

#define spin_trylock_irqsave(lock, flags)                        \
({                                                                \
        raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
})

/**
 * spin_is_locked() - Check whether a spinlock is locked.
 * @lock: Pointer to the spinlock.
 *
 * This function is NOT required to provide any memory ordering
 * guarantees; it could be used for debugging purposes or, when
 * additional synchronization is needed, accompanied with other
 * constructs (memory barriers) enforcing the synchronization.
 *
 * Returns: 1 if @lock is locked, 0 otherwise.
 *
 * Note that the function only tells you that the spinlock is
 * seen to be locked, not that it is locked on your CPU.
 *
 * Further, on CONFIG_SMP=n builds with CONFIG_DEBUG_SPINLOCK=n,
 * the return value is always 0 (see include/linux/spinlock_up.h).
 * Therefore you should not rely heavily on the return value.
 */
static __always_inline int spin_is_locked(spinlock_t *lock)
{
        return raw_spin_is_locked(&lock->rlock);
}

static __always_inline int spin_is_contended(spinlock_t *lock)
{
        return raw_spin_is_contended(&lock->rlock);
}

#define assert_spin_locked(lock)        assert_raw_spin_locked(&(lock)->rlock)

#else  /* !CONFIG_PREEMPT_RT */
# include <linux/spinlock_rt.h>
#endif /* CONFIG_PREEMPT_RT */

/*
 * Does a critical section need to be broken due to another
 * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
 * but a general need for low latency)
 */
static inline int spin_needbreak(spinlock_t *lock)
{
#ifdef CONFIG_PREEMPTION
        return spin_is_contended(lock);
#else
        return 0;
#endif
}

/*
 * Check if a rwlock is contended.
 * Returns non-zero if there is another task waiting on the rwlock.
 * Returns zero if the lock is not contended or the system / underlying
 * rwlock implementation does not support contention detection.
 * Technically does not depend on CONFIG_PREEMPTION, but a general need
 * for low latency.
 */
static inline int rwlock_needbreak(rwlock_t *lock)
{
#ifdef CONFIG_PREEMPTION
        return rwlock_is_contended(lock);
#else
        return 0;
#endif
}

/*
 * Pull the atomic_t declaration:
 * (asm-mips/atomic.h needs above definitions)
 */
#include <linux/atomic.h>
/**
 * atomic_dec_and_lock - lock on reaching reference count zero
 * @atomic: the atomic counter
 * @lock: the spinlock in question
 *
 * Decrements @atomic by 1.  If the result is 0, returns true and locks
 * @lock.  Returns false for all other cases.
 */
extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
#define atomic_dec_and_lock(atomic, lock) \
                __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))

extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
                                        unsigned long *flags);
#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
                __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))

extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock);
#define atomic_dec_and_raw_lock(atomic, lock) \
                __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock))

extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
                                        unsigned long *flags);
#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \
                __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags)))

int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
                             size_t max_size, unsigned int cpu_mult,
                             gfp_t gfp, const char *name,
                             struct lock_class_key *key);

#define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp)    \
        ({                                                                     \
                static struct lock_class_key key;                             \
                int ret;                                                     \
                                                                             \
                ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size,   \
                                               cpu_mult, gfp, #locks, &key); \
                ret;                                                             \
        })

void free_bucket_spinlocks(spinlock_t *locks);

DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t,
                    raw_spin_lock(_T->lock),
                    raw_spin_unlock(_T->lock))

DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock))

DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t,
                    raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING),
                    raw_spin_unlock(_T->lock))

DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t,
                    raw_spin_lock_irq(_T->lock),
                    raw_spin_unlock_irq(_T->lock))

DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock))

DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t,
                    raw_spin_lock_irqsave(_T->lock, _T->flags),
                    raw_spin_unlock_irqrestore(_T->lock, _T->flags),
                    unsigned long flags)

DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try,
                         raw_spin_trylock_irqsave(_T->lock, _T->flags))

DEFINE_LOCK_GUARD_1(spinlock, spinlock_t,
                    spin_lock(_T->lock),
                    spin_unlock(_T->lock))

DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock))

DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
                    spin_lock_irq(_T->lock),
                    spin_unlock_irq(_T->lock))

DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try,
                         spin_trylock_irq(_T->lock))

DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t,
                    spin_lock_irqsave(_T->lock, _T->flags),
                    spin_unlock_irqrestore(_T->lock, _T->flags),
                    unsigned long flags)

DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try,
                         spin_trylock_irqsave(_T->lock, _T->flags))

DEFINE_LOCK_GUARD_1(read_lock, rwlock_t,
                    read_lock(_T->lock),
                    read_unlock(_T->lock))

DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t,
                    read_lock_irq(_T->lock),
                    read_unlock_irq(_T->lock))

DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t,
                    read_lock_irqsave(_T->lock, _T->flags),
                    read_unlock_irqrestore(_T->lock, _T->flags),
                    unsigned long flags)

DEFINE_LOCK_GUARD_1(write_lock, rwlock_t,
                    write_lock(_T->lock),
                    write_unlock(_T->lock))

DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t,
                    write_lock_irq(_T->lock),
                    write_unlock_irq(_T->lock))

DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t,
                    write_lock_irqsave(_T->lock, _T->flags),
                    write_unlock_irqrestore(_T->lock, _T->flags),
                    unsigned long flags)

#undef __LINUX_INSIDE_SPINLOCK_H
#endif /* __LINUX_SPINLOCK_H */
















































































































































































































































































































































































































































































































































































































































    1 
    1 

    1 








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
// SPDX-License-Identifier: GPL-2.0
/*
 * main.c - Multi purpose firmware loading support
 *
 * Copyright (c) 2003 Manuel Estrada Sainz
 *
 * Please see Documentation/driver-api/firmware/ for more information.
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/capability.h>
#include <linux/device.h>
#include <linux/kernel_read_file.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/highmem.h>
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/fs.h>
#include <linux/async.h>
#include <linux/pm.h>
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/reboot.h>
#include <linux/security.h>
#include <linux/zstd.h>
#include <linux/xz.h>

#include <generated/utsrelease.h>

#include "../base.h"
#include "firmware.h"
#include "fallback.h"

MODULE_AUTHOR("Manuel Estrada Sainz");
MODULE_DESCRIPTION("Multi purpose firmware loading support");
MODULE_LICENSE("GPL");

struct firmware_cache {
        /* firmware_buf instance will be added into the below list */
        spinlock_t lock;
        struct list_head head;
        int state;

#ifdef CONFIG_FW_CACHE
        /*
         * Names of firmware images which have been cached successfully
         * will be added into the below list so that device uncache
         * helper can trace which firmware images have been cached
         * before.
         */
        spinlock_t name_lock;
        struct list_head fw_names;

        struct delayed_work work;

        struct notifier_block   pm_notify;
#endif
};

struct fw_cache_entry {
        struct list_head list;
        const char *name;
};

struct fw_name_devm {
        unsigned long magic;
        const char *name;
};

static inline struct fw_priv *to_fw_priv(struct kref *ref)
{
        return container_of(ref, struct fw_priv, ref);
}

#define        FW_LOADER_NO_CACHE        0
#define        FW_LOADER_START_CACHE        1

/* fw_lock could be moved to 'struct fw_sysfs' but since it is just
 * guarding for corner cases a global lock should be OK */
DEFINE_MUTEX(fw_lock);

struct firmware_cache fw_cache;
bool fw_load_abort_all;

void fw_state_init(struct fw_priv *fw_priv)
{
        struct fw_state *fw_st = &fw_priv->fw_st;

        init_completion(&fw_st->completion);
        fw_st->status = FW_STATUS_UNKNOWN;
}

static inline int fw_state_wait(struct fw_priv *fw_priv)
{
        return __fw_state_wait_common(fw_priv, MAX_SCHEDULE_TIMEOUT);
}

static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv);

static struct fw_priv *__allocate_fw_priv(const char *fw_name,
                                          struct firmware_cache *fwc,
                                          void *dbuf,
                                          size_t size,
                                          size_t offset,
                                          u32 opt_flags)
{
        struct fw_priv *fw_priv;

        /* For a partial read, the buffer must be preallocated. */
        if ((opt_flags & FW_OPT_PARTIAL) && !dbuf)
                return NULL;

        /* Only partial reads are allowed to use an offset. */
        if (offset != 0 && !(opt_flags & FW_OPT_PARTIAL))
                return NULL;

        fw_priv = kzalloc(sizeof(*fw_priv), GFP_ATOMIC);
        if (!fw_priv)
                return NULL;

        fw_priv->fw_name = kstrdup_const(fw_name, GFP_ATOMIC);
        if (!fw_priv->fw_name) {
                kfree(fw_priv);
                return NULL;
        }

        kref_init(&fw_priv->ref);
        fw_priv->fwc = fwc;
        fw_priv->data = dbuf;
        fw_priv->allocated_size = size;
        fw_priv->offset = offset;
        fw_priv->opt_flags = opt_flags;
        fw_state_init(fw_priv);
#ifdef CONFIG_FW_LOADER_USER_HELPER
        INIT_LIST_HEAD(&fw_priv->pending_list);
#endif

        pr_debug("%s: fw-%s fw_priv=%p\n", __func__, fw_name, fw_priv);

        return fw_priv;
}

static struct fw_priv *__lookup_fw_priv(const char *fw_name)
{
        struct fw_priv *tmp;
        struct firmware_cache *fwc = &fw_cache;

        list_for_each_entry(tmp, &fwc->head, list)
                if (!strcmp(tmp->fw_name, fw_name))
                        return tmp;
        return NULL;
}

/* Returns 1 for batching firmware requests with the same name */
int alloc_lookup_fw_priv(const char *fw_name, struct firmware_cache *fwc,
                         struct fw_priv **fw_priv, void *dbuf, size_t size,
                         size_t offset, u32 opt_flags)
{
        struct fw_priv *tmp;

        spin_lock(&fwc->lock);
        /*
         * Do not merge requests that are marked to be non-cached or
         * are performing partial reads.
         */
        if (!(opt_flags & (FW_OPT_NOCACHE | FW_OPT_PARTIAL))) {
                tmp = __lookup_fw_priv(fw_name);
                if (tmp) {
                        kref_get(&tmp->ref);
                        spin_unlock(&fwc->lock);
                        *fw_priv = tmp;
                        pr_debug("batched request - sharing the same struct fw_priv and lookup for multiple requests\n");
                        return 1;
                }
        }

        tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size, offset, opt_flags);
        if (tmp) {
                INIT_LIST_HEAD(&tmp->list);
                if (!(opt_flags & FW_OPT_NOCACHE))
                        list_add(&tmp->list, &fwc->head);
        }
        spin_unlock(&fwc->lock);

        *fw_priv = tmp;

        return tmp ? 0 : -ENOMEM;
}

static void __free_fw_priv(struct kref *ref)
        __releases(&fwc->lock)
{
        struct fw_priv *fw_priv = to_fw_priv(ref);
        struct firmware_cache *fwc = fw_priv->fwc;

        pr_debug("%s: fw-%s fw_priv=%p data=%p size=%u\n",
                 __func__, fw_priv->fw_name, fw_priv, fw_priv->data,
                 (unsigned int)fw_priv->size);

        list_del(&fw_priv->list);
        spin_unlock(&fwc->lock);

        if (fw_is_paged_buf(fw_priv))
                fw_free_paged_buf(fw_priv);
        else if (!fw_priv->allocated_size)
                vfree(fw_priv->data);

        kfree_const(fw_priv->fw_name);
        kfree(fw_priv);
}

void free_fw_priv(struct fw_priv *fw_priv)
{
        struct firmware_cache *fwc = fw_priv->fwc;
        spin_lock(&fwc->lock);
        if (!kref_put(&fw_priv->ref, __free_fw_priv))
                spin_unlock(&fwc->lock);
}

#ifdef CONFIG_FW_LOADER_PAGED_BUF
bool fw_is_paged_buf(struct fw_priv *fw_priv)
{
        return fw_priv->is_paged_buf;
}

void fw_free_paged_buf(struct fw_priv *fw_priv)
{
        int i;

        if (!fw_priv->pages)
                return;

        vunmap(fw_priv->data);

        for (i = 0; i < fw_priv->nr_pages; i++)
                __free_page(fw_priv->pages[i]);
        kvfree(fw_priv->pages);
        fw_priv->pages = NULL;
        fw_priv->page_array_size = 0;
        fw_priv->nr_pages = 0;
        fw_priv->data = NULL;
        fw_priv->size = 0;
}

int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed)
{
        /* If the array of pages is too small, grow it */
        if (fw_priv->page_array_size < pages_needed) {
                int new_array_size = max(pages_needed,
                                         fw_priv->page_array_size * 2);
                struct page **new_pages;

                new_pages = kvmalloc_array(new_array_size, sizeof(void *),
                                           GFP_KERNEL);
                if (!new_pages)
                        return -ENOMEM;
                memcpy(new_pages, fw_priv->pages,
                       fw_priv->page_array_size * sizeof(void *));
                memset(&new_pages[fw_priv->page_array_size], 0, sizeof(void *) *
                       (new_array_size - fw_priv->page_array_size));
                kvfree(fw_priv->pages);
                fw_priv->pages = new_pages;
                fw_priv->page_array_size = new_array_size;
        }

        while (fw_priv->nr_pages < pages_needed) {
                fw_priv->pages[fw_priv->nr_pages] =
                        alloc_page(GFP_KERNEL | __GFP_HIGHMEM);

                if (!fw_priv->pages[fw_priv->nr_pages])
                        return -ENOMEM;
                fw_priv->nr_pages++;
        }

        return 0;
}

int fw_map_paged_buf(struct fw_priv *fw_priv)
{
        /* one pages buffer should be mapped/unmapped only once */
        if (!fw_priv->pages)
                return 0;

        vunmap(fw_priv->data);
        fw_priv->data = vmap(fw_priv->pages, fw_priv->nr_pages, 0,
                             PAGE_KERNEL_RO);
        if (!fw_priv->data)
                return -ENOMEM;

        return 0;
}
#endif

/*
 * ZSTD-compressed firmware support
 */
#ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD
static int fw_decompress_zstd(struct device *dev, struct fw_priv *fw_priv,
                              size_t in_size, const void *in_buffer)
{
        size_t len, out_size, workspace_size;
        void *workspace, *out_buf;
        zstd_dctx *ctx;
        int err;

        if (fw_priv->allocated_size) {
                out_size = fw_priv->allocated_size;
                out_buf = fw_priv->data;
        } else {
                zstd_frame_header params;

                if (zstd_get_frame_header(&params, in_buffer, in_size) ||
                    params.frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
                        dev_dbg(dev, "%s: invalid zstd header\n", __func__);
                        return -EINVAL;
                }
                out_size = params.frameContentSize;
                out_buf = vzalloc(out_size);
                if (!out_buf)
                        return -ENOMEM;
        }

        workspace_size = zstd_dctx_workspace_bound();
        workspace = kvzalloc(workspace_size, GFP_KERNEL);
        if (!workspace) {
                err = -ENOMEM;
                goto error;
        }

        ctx = zstd_init_dctx(workspace, workspace_size);
        if (!ctx) {
                dev_dbg(dev, "%s: failed to initialize context\n", __func__);
                err = -EINVAL;
                goto error;
        }

        len = zstd_decompress_dctx(ctx, out_buf, out_size, in_buffer, in_size);
        if (zstd_is_error(len)) {
                dev_dbg(dev, "%s: failed to decompress: %d\n", __func__,
                        zstd_get_error_code(len));
                err = -EINVAL;
                goto error;
        }

        if (!fw_priv->allocated_size)
                fw_priv->data = out_buf;
        fw_priv->size = len;
        err = 0;

 error:
        kvfree(workspace);
        if (err && !fw_priv->allocated_size)
                vfree(out_buf);
        return err;
}
#endif /* CONFIG_FW_LOADER_COMPRESS_ZSTD */

/*
 * XZ-compressed firmware support
 */
#ifdef CONFIG_FW_LOADER_COMPRESS_XZ
/* show an error and return the standard error code */
static int fw_decompress_xz_error(struct device *dev, enum xz_ret xz_ret)
{
        if (xz_ret != XZ_STREAM_END) {
                dev_warn(dev, "xz decompression failed (xz_ret=%d)\n", xz_ret);
                return xz_ret == XZ_MEM_ERROR ? -ENOMEM : -EINVAL;
        }
        return 0;
}

/* single-shot decompression onto the pre-allocated buffer */
static int fw_decompress_xz_single(struct device *dev, struct fw_priv *fw_priv,
                                   size_t in_size, const void *in_buffer)
{
        struct xz_dec *xz_dec;
        struct xz_buf xz_buf;
        enum xz_ret xz_ret;

        xz_dec = xz_dec_init(XZ_SINGLE, (u32)-1);
        if (!xz_dec)
                return -ENOMEM;

        xz_buf.in_size = in_size;
        xz_buf.in = in_buffer;
        xz_buf.in_pos = 0;
        xz_buf.out_size = fw_priv->allocated_size;
        xz_buf.out = fw_priv->data;
        xz_buf.out_pos = 0;

        xz_ret = xz_dec_run(xz_dec, &xz_buf);
        xz_dec_end(xz_dec);

        fw_priv->size = xz_buf.out_pos;
        return fw_decompress_xz_error(dev, xz_ret);
}

/* decompression on paged buffer and map it */
static int fw_decompress_xz_pages(struct device *dev, struct fw_priv *fw_priv,
                                  size_t in_size, const void *in_buffer)
{
        struct xz_dec *xz_dec;
        struct xz_buf xz_buf;
        enum xz_ret xz_ret;
        struct page *page;
        int err = 0;

        xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
        if (!xz_dec)
                return -ENOMEM;

        xz_buf.in_size = in_size;
        xz_buf.in = in_buffer;
        xz_buf.in_pos = 0;

        fw_priv->is_paged_buf = true;
        fw_priv->size = 0;
        do {
                if (fw_grow_paged_buf(fw_priv, fw_priv->nr_pages + 1)) {
                        err = -ENOMEM;
                        goto out;
                }

                /* decompress onto the new allocated page */
                page = fw_priv->pages[fw_priv->nr_pages - 1];
                xz_buf.out = kmap_local_page(page);
                xz_buf.out_pos = 0;
                xz_buf.out_size = PAGE_SIZE;
                xz_ret = xz_dec_run(xz_dec, &xz_buf);
                kunmap_local(xz_buf.out);
                fw_priv->size += xz_buf.out_pos;
                /* partial decompression means either end or error */
                if (xz_buf.out_pos != PAGE_SIZE)
                        break;
        } while (xz_ret == XZ_OK);

        err = fw_decompress_xz_error(dev, xz_ret);
        if (!err)
                err = fw_map_paged_buf(fw_priv);

 out:
        xz_dec_end(xz_dec);
        return err;
}

static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv,
                            size_t in_size, const void *in_buffer)
{
        /* if the buffer is pre-allocated, we can perform in single-shot mode */
        if (fw_priv->data)
                return fw_decompress_xz_single(dev, fw_priv, in_size, in_buffer);
        else
                return fw_decompress_xz_pages(dev, fw_priv, in_size, in_buffer);
}
#endif /* CONFIG_FW_LOADER_COMPRESS_XZ */

/* direct firmware loading support */
static char fw_path_para[256];
static const char * const fw_path[] = {
        fw_path_para,
        "/lib/firmware/updates/" UTS_RELEASE,
        "/lib/firmware/updates",
        "/lib/firmware/" UTS_RELEASE,
        "/lib/firmware"
};

/*
 * Typical usage is that passing 'firmware_class.path=$CUSTOMIZED_PATH'
 * from kernel command line because firmware_class is generally built in
 * kernel instead of module.
 */
module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644);
MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path");

static int
fw_get_filesystem_firmware(struct device *device, struct fw_priv *fw_priv,
                           const char *suffix,
                           int (*decompress)(struct device *dev,
                                             struct fw_priv *fw_priv,
                                             size_t in_size,
                                             const void *in_buffer))
{
        size_t size;
        int i, len, maxlen = 0;
        int rc = -ENOENT;
        char *path, *nt = NULL;
        size_t msize = INT_MAX;
        void *buffer = NULL;

        /* Already populated data member means we're loading into a buffer */
        if (!decompress && fw_priv->data) {
                buffer = fw_priv->data;
                msize = fw_priv->allocated_size;
        }

        path = __getname();
        if (!path)
                return -ENOMEM;

        wait_for_initramfs();
        for (i = 0; i < ARRAY_SIZE(fw_path); i++) {
                size_t file_size = 0;
                size_t *file_size_ptr = NULL;

                /* skip the unset customized path */
                if (!fw_path[i][0])
                        continue;

                /* strip off \n from customized path */
                maxlen = strlen(fw_path[i]);
                if (i == 0) {
                        nt = strchr(fw_path[i], '\n');
                        if (nt)
                                maxlen = nt - fw_path[i];
                }

                len = snprintf(path, PATH_MAX, "%.*s/%s%s",
                               maxlen, fw_path[i],
                               fw_priv->fw_name, suffix);
                if (len >= PATH_MAX) {
                        rc = -ENAMETOOLONG;
                        break;
                }

                fw_priv->size = 0;

                /*
                 * The total file size is only examined when doing a partial
                 * read; the "full read" case needs to fail if the whole
                 * firmware was not completely loaded.
                 */
                if ((fw_priv->opt_flags & FW_OPT_PARTIAL) && buffer)
                        file_size_ptr = &file_size;

                /* load firmware files from the mount namespace of init */
                rc = kernel_read_file_from_path_initns(path, fw_priv->offset,
                                                       &buffer, msize,
                                                       file_size_ptr,
                                                       READING_FIRMWARE);
                if (rc < 0) {
                        if (!(fw_priv->opt_flags & FW_OPT_NO_WARN)) {
                                if (rc != -ENOENT)
                                        dev_warn(device,
                                                 "loading %s failed with error %d\n",
                                                 path, rc);
                                else
                                        dev_dbg(device,
                                                "loading %s failed for no such file or directory.\n",
                                                path);
                        }
                        continue;
                }
                size = rc;
                rc = 0;

                dev_dbg(device, "Loading firmware from %s\n", path);
                if (decompress) {
                        dev_dbg(device, "f/w decompressing %s\n",
                                fw_priv->fw_name);
                        rc = decompress(device, fw_priv, size, buffer);
                        /* discard the superfluous original content */
                        vfree(buffer);
                        buffer = NULL;
                        if (rc) {
                                fw_free_paged_buf(fw_priv);
                                continue;
                        }
                } else {
                        dev_dbg(device, "direct-loading %s\n",
                                fw_priv->fw_name);
                        if (!fw_priv->data)
                                fw_priv->data = buffer;
                        fw_priv->size = size;
                }
                fw_state_done(fw_priv);
                break;
        }
        __putname(path);

        return rc;
}

/* firmware holds the ownership of pages */
static void firmware_free_data(const struct firmware *fw)
{
        /* Loaded directly? */
        if (!fw->priv) {
                vfree(fw->data);
                return;
        }
        free_fw_priv(fw->priv);
}

/* store the pages buffer info firmware from buf */
static void fw_set_page_data(struct fw_priv *fw_priv, struct firmware *fw)
{
        fw->priv = fw_priv;
        fw->size = fw_priv->size;
        fw->data = fw_priv->data;

        pr_debug("%s: fw-%s fw_priv=%p data=%p size=%u\n",
                 __func__, fw_priv->fw_name, fw_priv, fw_priv->data,
                 (unsigned int)fw_priv->size);
}

#ifdef CONFIG_FW_CACHE
static void fw_name_devm_release(struct device *dev, void *res)
{
        struct fw_name_devm *fwn = res;

        if (fwn->magic == (unsigned long)&fw_cache)
                pr_debug("%s: fw_name-%s devm-%p released\n",
                                __func__, fwn->name, res);
        kfree_const(fwn->name);
}

static int fw_devm_match(struct device *dev, void *res,
                void *match_data)
{
        struct fw_name_devm *fwn = res;

        return (fwn->magic == (unsigned long)&fw_cache) &&
                !strcmp(fwn->name, match_data);
}

static struct fw_name_devm *fw_find_devm_name(struct device *dev,
                const char *name)
{
        struct fw_name_devm *fwn;

        fwn = devres_find(dev, fw_name_devm_release,
                          fw_devm_match, (void *)name);
        return fwn;
}

static bool fw_cache_is_setup(struct device *dev, const char *name)
{
        struct fw_name_devm *fwn;

        fwn = fw_find_devm_name(dev, name);
        if (fwn)
                return true;

        return false;
}

/* add firmware name into devres list */
static int fw_add_devm_name(struct device *dev, const char *name)
{
        struct fw_name_devm *fwn;

        if (fw_cache_is_setup(dev, name))
                return 0;

        fwn = devres_alloc(fw_name_devm_release, sizeof(struct fw_name_devm),
                           GFP_KERNEL);
        if (!fwn)
                return -ENOMEM;
        fwn->name = kstrdup_const(name, GFP_KERNEL);
        if (!fwn->name) {
                devres_free(fwn);
                return -ENOMEM;
        }

        fwn->magic = (unsigned long)&fw_cache;
        devres_add(dev, fwn);

        return 0;
}
#else
static bool fw_cache_is_setup(struct device *dev, const char *name)
{
        return false;
}

static int fw_add_devm_name(struct device *dev, const char *name)
{
        return 0;
}
#endif

int assign_fw(struct firmware *fw, struct device *device)
{
        struct fw_priv *fw_priv = fw->priv;
        int ret;

        mutex_lock(&fw_lock);
        if (!fw_priv->size || fw_state_is_aborted(fw_priv)) {
                mutex_unlock(&fw_lock);
                return -ENOENT;
        }

        /*
         * add firmware name into devres list so that we can auto cache
         * and uncache firmware for device.
         *
         * device may has been deleted already, but the problem
         * should be fixed in devres or driver core.
         */
        /* don't cache firmware handled without uevent */
        if (device && (fw_priv->opt_flags & FW_OPT_UEVENT) &&
            !(fw_priv->opt_flags & FW_OPT_NOCACHE)) {
                ret = fw_add_devm_name(device, fw_priv->fw_name);
                if (ret) {
                        mutex_unlock(&fw_lock);
                        return ret;
                }
        }

        /*
         * After caching firmware image is started, let it piggyback
         * on request firmware.
         */
        if (!(fw_priv->opt_flags & FW_OPT_NOCACHE) &&
            fw_priv->fwc->state == FW_LOADER_START_CACHE)
                fw_cache_piggyback_on_request(fw_priv);

        /* pass the pages buffer to driver at the last minute */
        fw_set_page_data(fw_priv, fw);
        mutex_unlock(&fw_lock);
        return 0;
}

/* prepare firmware and firmware_buf structs;
 * return 0 if a firmware is already assigned, 1 if need to load one,
 * or a negative error code
 */
static int
_request_firmware_prepare(struct firmware **firmware_p, const char *name,
                          struct device *device, void *dbuf, size_t size,
                          size_t offset, u32 opt_flags)
{
        struct firmware *firmware;
        struct fw_priv *fw_priv;
        int ret;

        *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
        if (!firmware) {
                dev_err(device, "%s: kmalloc(struct firmware) failed\n",
                        __func__);
                return -ENOMEM;
        }

        if (firmware_request_builtin_buf(firmware, name, dbuf, size)) {
                dev_dbg(device, "using built-in %s\n", name);
                return 0; /* assigned */
        }

        ret = alloc_lookup_fw_priv(name, &fw_cache, &fw_priv, dbuf, size,
                                   offset, opt_flags);

        /*
         * bind with 'priv' now to avoid warning in failure path
         * of requesting firmware.
         */
        firmware->priv = fw_priv;

        if (ret > 0) {
                ret = fw_state_wait(fw_priv);
                if (!ret) {
                        fw_set_page_data(fw_priv, firmware);
                        return 0; /* assigned */
                }
        }

        if (ret < 0)
                return ret;
        return 1; /* need to load */
}

/*
 * Batched requests need only one wake, we need to do this step last due to the
 * fallback mechanism. The buf is protected with kref_get(), and it won't be
 * released until the last user calls release_firmware().
 *
 * Failed batched requests are possible as well, in such cases we just share
 * the struct fw_priv and won't release it until all requests are woken
 * and have gone through this same path.
 */
static void fw_abort_batch_reqs(struct firmware *fw)
{
        struct fw_priv *fw_priv;

        /* Loaded directly? */
        if (!fw || !fw->priv)
                return;

        fw_priv = fw->priv;
        mutex_lock(&fw_lock);
        if (!fw_state_is_aborted(fw_priv))
                fw_state_aborted(fw_priv);
        mutex_unlock(&fw_lock);
}

#if defined(CONFIG_FW_LOADER_DEBUG)
#include <crypto/hash.h>
#include <crypto/sha2.h>

static void fw_log_firmware_info(const struct firmware *fw, const char *name, struct device *device)
{
        struct shash_desc *shash;
        struct crypto_shash *alg;
        u8 *sha256buf;
        char *outbuf;

        alg = crypto_alloc_shash("sha256", 0, 0);
        if (IS_ERR(alg))
                return;

        sha256buf = kmalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
        outbuf = kmalloc(SHA256_BLOCK_SIZE + 1, GFP_KERNEL);
        shash = kmalloc(sizeof(*shash) + crypto_shash_descsize(alg), GFP_KERNEL);
        if (!sha256buf || !outbuf || !shash)
                goto out_free;

        shash->tfm = alg;

        if (crypto_shash_digest(shash, fw->data, fw->size, sha256buf) < 0)
                goto out_shash;

        for (int i = 0; i < SHA256_DIGEST_SIZE; i++)
                sprintf(&outbuf[i * 2], "%02x", sha256buf[i]);
        outbuf[SHA256_BLOCK_SIZE] = 0;
        dev_dbg(device, "Loaded FW: %s, sha256: %s\n", name, outbuf);

out_shash:
        crypto_free_shash(alg);
out_free:
        kfree(shash);
        kfree(outbuf);
        kfree(sha256buf);
}
#else
static void fw_log_firmware_info(const struct firmware *fw, const char *name,
                                 struct device *device)
{}
#endif

/* called from request_firmware() and request_firmware_work_func() */
static int
_request_firmware(const struct firmware **firmware_p, const char *name,
                  struct device *device, void *buf, size_t size,
                  size_t offset, u32 opt_flags)
{
        struct firmware *fw = NULL;
        struct cred *kern_cred = NULL;
        const struct cred *old_cred;
        bool nondirect = false;
        int ret;

        if (!firmware_p)
                return -EINVAL;

        if (!name || name[0] == '\0') {
                ret = -EINVAL;
                goto out;
        }

        ret = _request_firmware_prepare(&fw, name, device, buf, size,
                                        offset, opt_flags);
        if (ret <= 0) /* error or already assigned */
                goto out;

        /*
         * We are about to try to access the firmware file. Because we may have been
         * called by a driver when serving an unrelated request from userland, we use
         * the kernel credentials to read the file.
         */
        kern_cred = prepare_kernel_cred(&init_task);
        if (!kern_cred) {
                ret = -ENOMEM;
                goto out;
        }
        old_cred = override_creds(kern_cred);

        ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);

        /* Only full reads can support decompression, platform, and sysfs. */
        if (!(opt_flags & FW_OPT_PARTIAL))
                nondirect = true;

#ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD
        if (ret == -ENOENT && nondirect)
                ret = fw_get_filesystem_firmware(device, fw->priv, ".zst",
                                                 fw_decompress_zstd);
#endif
#ifdef CONFIG_FW_LOADER_COMPRESS_XZ
        if (ret == -ENOENT && nondirect)
                ret = fw_get_filesystem_firmware(device, fw->priv, ".xz",
                                                 fw_decompress_xz);
#endif
        if (ret == -ENOENT && nondirect)
                ret = firmware_fallback_platform(fw->priv);

        if (ret) {
                if (!(opt_flags & FW_OPT_NO_WARN))
                        dev_warn(device,
                                 "Direct firmware load for %s failed with error %d\n",
                                 name, ret);
                if (nondirect)
                        ret = firmware_fallback_sysfs(fw, name, device,
                                                      opt_flags, ret);
        } else
                ret = assign_fw(fw, device);

        revert_creds(old_cred);
        put_cred(kern_cred);

out:
        if (ret < 0) {
                fw_abort_batch_reqs(fw);
                release_firmware(fw);
                fw = NULL;
        } else {
                fw_log_firmware_info(fw, name, device);
        }

        *firmware_p = fw;
        return ret;
}

/**
 * request_firmware() - send firmware request and wait for it
 * @firmware_p: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded
 *
 *      @firmware_p will be used to return a firmware image by the name
 *      of @name for device @device.
 *
 *      Should be called from user context where sleeping is allowed.
 *
 *      @name will be used as $FIRMWARE in the uevent environment and
 *      should be distinctive enough not to be confused with any other
 *      firmware image for this or any other device.
 *
 *        Caller must hold the reference count of @device.
 *
 *        The function can be called safely inside device's suspend and
 *        resume callback.
 **/
int
request_firmware(const struct firmware **firmware_p, const char *name,
                 struct device *device)
{
        int ret;

        /* Need to pin this module until return */
        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware_p, name, device, NULL, 0, 0,
                                FW_OPT_UEVENT);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL(request_firmware);

/**
 * firmware_request_nowarn() - request for an optional fw module
 * @firmware: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded
 *
 * This function is similar in behaviour to request_firmware(), except it
 * doesn't produce warning messages when the file is not found. The sysfs
 * fallback mechanism is enabled if direct filesystem lookup fails. However,
 * failures to find the firmware file with it are still suppressed. It is
 * therefore up to the driver to check for the return value of this call and to
 * decide when to inform the users of errors.
 **/
int firmware_request_nowarn(const struct firmware **firmware, const char *name,
                            struct device *device)
{
        int ret;

        /* Need to pin this module until return */
        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware, name, device, NULL, 0, 0,
                                FW_OPT_UEVENT | FW_OPT_NO_WARN);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL_GPL(firmware_request_nowarn);

/**
 * request_firmware_direct() - load firmware directly without usermode helper
 * @firmware_p: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded
 *
 * This function works pretty much like request_firmware(), but this doesn't
 * fall back to usermode helper even if the firmware couldn't be loaded
 * directly from fs.  Hence it's useful for loading optional firmwares, which
 * aren't always present, without extra long timeouts of udev.
 **/
int request_firmware_direct(const struct firmware **firmware_p,
                            const char *name, struct device *device)
{
        int ret;

        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware_p, name, device, NULL, 0, 0,
                                FW_OPT_UEVENT | FW_OPT_NO_WARN |
                                FW_OPT_NOFALLBACK_SYSFS);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL_GPL(request_firmware_direct);

/**
 * firmware_request_platform() - request firmware with platform-fw fallback
 * @firmware: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded
 *
 * This function is similar in behaviour to request_firmware, except that if
 * direct filesystem lookup fails, it will fallback to looking for a copy of the
 * requested firmware embedded in the platform's main (e.g. UEFI) firmware.
 **/
int firmware_request_platform(const struct firmware **firmware,
                              const char *name, struct device *device)
{
        int ret;

        /* Need to pin this module until return */
        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware, name, device, NULL, 0, 0,
                                FW_OPT_UEVENT | FW_OPT_FALLBACK_PLATFORM);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL_GPL(firmware_request_platform);

/**
 * firmware_request_cache() - cache firmware for suspend so resume can use it
 * @name: name of firmware file
 * @device: device for which firmware should be cached for
 *
 * There are some devices with an optimization that enables the device to not
 * require loading firmware on system reboot. This optimization may still
 * require the firmware present on resume from suspend. This routine can be
 * used to ensure the firmware is present on resume from suspend in these
 * situations. This helper is not compatible with drivers which use
 * request_firmware_into_buf() or request_firmware_nowait() with no uevent set.
 **/
int firmware_request_cache(struct device *device, const char *name)
{
        int ret;

        mutex_lock(&fw_lock);
        ret = fw_add_devm_name(device, name);
        mutex_unlock(&fw_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(firmware_request_cache);

/**
 * request_firmware_into_buf() - load firmware into a previously allocated buffer
 * @firmware_p: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded and DMA region allocated
 * @buf: address of buffer to load firmware into
 * @size: size of buffer
 *
 * This function works pretty much like request_firmware(), but it doesn't
 * allocate a buffer to hold the firmware data. Instead, the firmware
 * is loaded directly into the buffer pointed to by @buf and the @firmware_p
 * data member is pointed at @buf.
 *
 * This function doesn't cache firmware either.
 */
int
request_firmware_into_buf(const struct firmware **firmware_p, const char *name,
                          struct device *device, void *buf, size_t size)
{
        int ret;

        if (fw_cache_is_setup(device, name))
                return -EOPNOTSUPP;

        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware_p, name, device, buf, size, 0,
                                FW_OPT_UEVENT | FW_OPT_NOCACHE);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL(request_firmware_into_buf);

/**
 * request_partial_firmware_into_buf() - load partial firmware into a previously allocated buffer
 * @firmware_p: pointer to firmware image
 * @name: name of firmware file
 * @device: device for which firmware is being loaded and DMA region allocated
 * @buf: address of buffer to load firmware into
 * @size: size of buffer
 * @offset: offset into file to read
 *
 * This function works pretty much like request_firmware_into_buf except
 * it allows a partial read of the file.
 */
int
request_partial_firmware_into_buf(const struct firmware **firmware_p,
                                  const char *name, struct device *device,
                                  void *buf, size_t size, size_t offset)
{
        int ret;

        if (fw_cache_is_setup(device, name))
                return -EOPNOTSUPP;

        __module_get(THIS_MODULE);
        ret = _request_firmware(firmware_p, name, device, buf, size, offset,
                                FW_OPT_UEVENT | FW_OPT_NOCACHE |
                                FW_OPT_PARTIAL);
        module_put(THIS_MODULE);
        return ret;
}
EXPORT_SYMBOL(request_partial_firmware_into_buf);

/**
 * release_firmware() - release the resource associated with a firmware image
 * @fw: firmware resource to release
 **/
void release_firmware(const struct firmware *fw)
{
        if (fw) {
                if (!firmware_is_builtin(fw))
                        firmware_free_data(fw);
                kfree(fw);
        }
}
EXPORT_SYMBOL(release_firmware);

/* Async support */
struct firmware_work {
        struct work_struct work;
        struct module *module;
        const char *name;
        struct device *device;
        void *context;
        void (*cont)(const struct firmware *fw, void *context);
        u32 opt_flags;
};

static void request_firmware_work_func(struct work_struct *work)
{
        struct firmware_work *fw_work;
        const struct firmware *fw;

        fw_work = container_of(work, struct firmware_work, work);

        _request_firmware(&fw, fw_work->name, fw_work->device, NULL, 0, 0,
                          fw_work->opt_flags);
        fw_work->cont(fw, fw_work->context);
        put_device(fw_work->device); /* taken in request_firmware_nowait() */

        module_put(fw_work->module);
        kfree_const(fw_work->name);
        kfree(fw_work);
}

/**
 * request_firmware_nowait() - asynchronous version of request_firmware
 * @module: module requesting the firmware
 * @uevent: sends uevent to copy the firmware image if this flag
 *        is non-zero else the firmware copy must be done manually.
 * @name: name of firmware file
 * @device: device for which firmware is being loaded
 * @gfp: allocation flags
 * @context: will be passed over to @cont, and
 *        @fw may be %NULL if firmware request fails.
 * @cont: function will be called asynchronously when the firmware
 *        request is over.
 *
 *        Caller must hold the reference count of @device.
 *
 *        Asynchronous variant of request_firmware() for user contexts:
 *                - sleep for as small periods as possible since it may
 *                  increase kernel boot time of built-in device drivers
 *                  requesting firmware in their ->probe() methods, if
 *                  @gfp is GFP_KERNEL.
 *
 *                - can't sleep at all if @gfp is GFP_ATOMIC.
 **/
int
request_firmware_nowait(
        struct module *module, bool uevent,
        const char *name, struct device *device, gfp_t gfp, void *context,
        void (*cont)(const struct firmware *fw, void *context))
{
        struct firmware_work *fw_work;

        fw_work = kzalloc(sizeof(struct firmware_work), gfp);
        if (!fw_work)
                return -ENOMEM;

        fw_work->module = module;
        fw_work->name = kstrdup_const(name, gfp);
        if (!fw_work->name) {
                kfree(fw_work);
                return -ENOMEM;
        }
        fw_work->device = device;
        fw_work->context = context;
        fw_work->cont = cont;
        fw_work->opt_flags = FW_OPT_NOWAIT |
                (uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER);

        if (!uevent && fw_cache_is_setup(device, name)) {
                kfree_const(fw_work->name);
                kfree(fw_work);
                return -EOPNOTSUPP;
        }

        if (!try_module_get(module)) {
                kfree_const(fw_work->name);
                kfree(fw_work);
                return -EFAULT;
        }

        get_device(fw_work->device);
        INIT_WORK(&fw_work->work, request_firmware_work_func);
        schedule_work(&fw_work->work);
        return 0;
}
EXPORT_SYMBOL(request_firmware_nowait);

#ifdef CONFIG_FW_CACHE
static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain);

/**
 * cache_firmware() - cache one firmware image in kernel memory space
 * @fw_name: the firmware image name
 *
 * Cache firmware in kernel memory so that drivers can use it when
 * system isn't ready for them to request firmware image from userspace.
 * Once it returns successfully, driver can use request_firmware or its
 * nowait version to get the cached firmware without any interacting
 * with userspace
 *
 * Return 0 if the firmware image has been cached successfully
 * Return !0 otherwise
 *
 */
static int cache_firmware(const char *fw_name)
{
        int ret;
        const struct firmware *fw;

        pr_debug("%s: %s\n", __func__, fw_name);

        ret = request_firmware(&fw, fw_name, NULL);
        if (!ret)
                kfree(fw);

        pr_debug("%s: %s ret=%d\n", __func__, fw_name, ret);

        return ret;
}

static struct fw_priv *lookup_fw_priv(const char *fw_name)
{
        struct fw_priv *tmp;
        struct firmware_cache *fwc = &fw_cache;

        spin_lock(&fwc->lock);
        tmp = __lookup_fw_priv(fw_name);
        spin_unlock(&fwc->lock);

        return tmp;
}

/**
 * uncache_firmware() - remove one cached firmware image
 * @fw_name: the firmware image name
 *
 * Uncache one firmware image which has been cached successfully
 * before.
 *
 * Return 0 if the firmware cache has been removed successfully
 * Return !0 otherwise
 *
 */
static int uncache_firmware(const char *fw_name)
{
        struct fw_priv *fw_priv;
        struct firmware fw;

        pr_debug("%s: %s\n", __func__, fw_name);

        if (firmware_request_builtin(&fw, fw_name))
                return 0;

        fw_priv = lookup_fw_priv(fw_name);
        if (fw_priv) {
                free_fw_priv(fw_priv);
                return 0;
        }

        return -EINVAL;
}

static struct fw_cache_entry *alloc_fw_cache_entry(const char *name)
{
        struct fw_cache_entry *fce;

        fce = kzalloc(sizeof(*fce), GFP_ATOMIC);
        if (!fce)
                goto exit;

        fce->name = kstrdup_const(name, GFP_ATOMIC);
        if (!fce->name) {
                kfree(fce);
                fce = NULL;
                goto exit;
        }
exit:
        return fce;
}

static int __fw_entry_found(const char *name)
{
        struct firmware_cache *fwc = &fw_cache;
        struct fw_cache_entry *fce;

        list_for_each_entry(fce, &fwc->fw_names, list) {
                if (!strcmp(fce->name, name))
                        return 1;
        }
        return 0;
}

static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv)
{
        const char *name = fw_priv->fw_name;
        struct firmware_cache *fwc = fw_priv->fwc;
        struct fw_cache_entry *fce;

        spin_lock(&fwc->name_lock);
        if (__fw_entry_found(name))
                goto found;

        fce = alloc_fw_cache_entry(name);
        if (fce) {
                list_add(&fce->list, &fwc->fw_names);
                kref_get(&fw_priv->ref);
                pr_debug("%s: fw: %s\n", __func__, name);
        }
found:
        spin_unlock(&fwc->name_lock);
}

static void free_fw_cache_entry(struct fw_cache_entry *fce)
{
        kfree_const(fce->name);
        kfree(fce);
}

static void __async_dev_cache_fw_image(void *fw_entry,
                                       async_cookie_t cookie)
{
        struct fw_cache_entry *fce = fw_entry;
        struct firmware_cache *fwc = &fw_cache;
        int ret;

        ret = cache_firmware(fce->name);
        if (ret) {
                spin_lock(&fwc->name_lock);
                list_del(&fce->list);
                spin_unlock(&fwc->name_lock);

                free_fw_cache_entry(fce);
        }
}

/* called with dev->devres_lock held */
static void dev_create_fw_entry(struct device *dev, void *res,
                                void *data)
{
        struct fw_name_devm *fwn = res;
        const char *fw_name = fwn->name;
        struct list_head *head = data;
        struct fw_cache_entry *fce;

        fce = alloc_fw_cache_entry(fw_name);
        if (fce)
                list_add(&fce->list, head);
}

static int devm_name_match(struct device *dev, void *res,
                           void *match_data)
{
        struct fw_name_devm *fwn = res;
        return (fwn->magic == (unsigned long)match_data);
}

static void dev_cache_fw_image(struct device *dev, void *data)
{
        LIST_HEAD(todo);
        struct fw_cache_entry *fce;
        struct fw_cache_entry *fce_next;
        struct firmware_cache *fwc = &fw_cache;

        devres_for_each_res(dev, fw_name_devm_release,
                            devm_name_match, &fw_cache,
                            dev_create_fw_entry, &todo);

        list_for_each_entry_safe(fce, fce_next, &todo, list) {
                list_del(&fce->list);

                spin_lock(&fwc->name_lock);
                /* only one cache entry for one firmware */
                if (!__fw_entry_found(fce->name)) {
                        list_add(&fce->list, &fwc->fw_names);
                } else {
                        free_fw_cache_entry(fce);
                        fce = NULL;
                }
                spin_unlock(&fwc->name_lock);

                if (fce)
                        async_schedule_domain(__async_dev_cache_fw_image,
                                              (void *)fce,
                                              &fw_cache_domain);
        }
}

static void __device_uncache_fw_images(void)
{
        struct firmware_cache *fwc = &fw_cache;
        struct fw_cache_entry *fce;

        spin_lock(&fwc->name_lock);
        while (!list_empty(&fwc->fw_names)) {
                fce = list_entry(fwc->fw_names.next,
                                struct fw_cache_entry, list);
                list_del(&fce->list);
                spin_unlock(&fwc->name_lock);

                uncache_firmware(fce->name);
                free_fw_cache_entry(fce);

                spin_lock(&fwc->name_lock);
        }
        spin_unlock(&fwc->name_lock);
}

/**
 * device_cache_fw_images() - cache devices' firmware
 *
 * If one device called request_firmware or its nowait version
 * successfully before, the firmware names are recored into the
 * device's devres link list, so device_cache_fw_images can call
 * cache_firmware() to cache these firmwares for the device,
 * then the device driver can load its firmwares easily at
 * time when system is not ready to complete loading firmware.
 */
static void device_cache_fw_images(void)
{
        struct firmware_cache *fwc = &fw_cache;
        DEFINE_WAIT(wait);

        pr_debug("%s\n", __func__);

        /* cancel uncache work */
        cancel_delayed_work_sync(&fwc->work);

        fw_fallback_set_cache_timeout();

        mutex_lock(&fw_lock);
        fwc->state = FW_LOADER_START_CACHE;
        dpm_for_each_dev(NULL, dev_cache_fw_image);
        mutex_unlock(&fw_lock);

        /* wait for completion of caching firmware for all devices */
        async_synchronize_full_domain(&fw_cache_domain);

        fw_fallback_set_default_timeout();
}

/**
 * device_uncache_fw_images() - uncache devices' firmware
 *
 * uncache all firmwares which have been cached successfully
 * by device_uncache_fw_images earlier
 */
static void device_uncache_fw_images(void)
{
        pr_debug("%s\n", __func__);
        __device_uncache_fw_images();
}

static void device_uncache_fw_images_work(struct work_struct *work)
{
        device_uncache_fw_images();
}

/**
 * device_uncache_fw_images_delay() - uncache devices firmwares
 * @delay: number of milliseconds to delay uncache device firmwares
 *
 * uncache all devices's firmwares which has been cached successfully
 * by device_cache_fw_images after @delay milliseconds.
 */
static void device_uncache_fw_images_delay(unsigned long delay)
{
        queue_delayed_work(system_power_efficient_wq, &fw_cache.work,
                           msecs_to_jiffies(delay));
}

static int fw_pm_notify(struct notifier_block *notify_block,
                        unsigned long mode, void *unused)
{
        switch (mode) {
        case PM_HIBERNATION_PREPARE:
        case PM_SUSPEND_PREPARE:
        case PM_RESTORE_PREPARE:
                /*
                 * Here, kill pending fallback requests will only kill
                 * non-uevent firmware request to avoid stalling suspend.
                 */
                kill_pending_fw_fallback_reqs(false);
                device_cache_fw_images();
                break;

        case PM_POST_SUSPEND:
        case PM_POST_HIBERNATION:
        case PM_POST_RESTORE:
                /*
                 * In case that system sleep failed and syscore_suspend is
                 * not called.
                 */
                mutex_lock(&fw_lock);
                fw_cache.state = FW_LOADER_NO_CACHE;
                mutex_unlock(&fw_lock);

                device_uncache_fw_images_delay(10 * MSEC_PER_SEC);
                break;
        }

        return 0;
}

/* stop caching firmware once syscore_suspend is reached */
static int fw_suspend(void)
{
        fw_cache.state = FW_LOADER_NO_CACHE;
        return 0;
}

static struct syscore_ops fw_syscore_ops = {
        .suspend = fw_suspend,
};

static int __init register_fw_pm_ops(void)
{
        int ret;

        spin_lock_init(&fw_cache.name_lock);
        INIT_LIST_HEAD(&fw_cache.fw_names);

        INIT_DELAYED_WORK(&fw_cache.work,
                          device_uncache_fw_images_work);

        fw_cache.pm_notify.notifier_call = fw_pm_notify;
        ret = register_pm_notifier(&fw_cache.pm_notify);
        if (ret)
                return ret;

        register_syscore_ops(&fw_syscore_ops);

        return ret;
}

static inline void unregister_fw_pm_ops(void)
{
        unregister_syscore_ops(&fw_syscore_ops);
        unregister_pm_notifier(&fw_cache.pm_notify);
}
#else
static void fw_cache_piggyback_on_request(struct fw_priv *fw_priv)
{
}
static inline int register_fw_pm_ops(void)
{
        return 0;
}
static inline void unregister_fw_pm_ops(void)
{
}
#endif

static void __init fw_cache_init(void)
{
        spin_lock_init(&fw_cache.lock);
        INIT_LIST_HEAD(&fw_cache.head);
        fw_cache.state = FW_LOADER_NO_CACHE;
}

static int fw_shutdown_notify(struct notifier_block *unused1,
                              unsigned long unused2, void *unused3)
{
        /*
         * Kill all pending fallback requests to avoid both stalling shutdown,
         * and avoid a deadlock with the usermode_lock.
         */
        kill_pending_fw_fallback_reqs(true);

        return NOTIFY_DONE;
}

static struct notifier_block fw_shutdown_nb = {
        .notifier_call = fw_shutdown_notify,
};

static int __init firmware_class_init(void)
{
        int ret;

        /* No need to unfold these on exit */
        fw_cache_init();

        ret = register_fw_pm_ops();
        if (ret)
                return ret;

        ret = register_reboot_notifier(&fw_shutdown_nb);
        if (ret)
                goto out;

        return register_sysfs_loader();

out:
        unregister_fw_pm_ops();
        return ret;
}

static void __exit firmware_class_exit(void)
{
        unregister_fw_pm_ops();
        unregister_reboot_notifier(&fw_shutdown_nb);
        unregister_sysfs_loader();
}

fs_initcall(firmware_class_init);
module_exit(firmware_class_exit);








































  245 



























  416 













  275 













  376 















   56 
















    6 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * This header provides generic wrappers for memory access instrumentation that
 * the compiler cannot emit for: KASAN, KCSAN, KMSAN.
 */
#ifndef _LINUX_INSTRUMENTED_H
#define _LINUX_INSTRUMENTED_H

#include <linux/compiler.h>
#include <linux/kasan-checks.h>
#include <linux/kcsan-checks.h>
#include <linux/kmsan-checks.h>
#include <linux/types.h>

/**
 * instrument_read - instrument regular read access
 * @v: address of access
 * @size: size of access
 *
 * Instrument a regular read access. The instrumentation should be inserted
 * before the actual read happens.
 */
static __always_inline void instrument_read(const volatile void *v, size_t size)
{
        kasan_check_read(v, size);
        kcsan_check_read(v, size);
}

/**
 * instrument_write - instrument regular write access
 * @v: address of access
 * @size: size of access
 *
 * Instrument a regular write access. The instrumentation should be inserted
 * before the actual write happens.
 */
static __always_inline void instrument_write(const volatile void *v, size_t size)
{
        kasan_check_write(v, size);
        kcsan_check_write(v, size);
}

/**
 * instrument_read_write - instrument regular read-write access
 * @v: address of access
 * @size: size of access
 *
 * Instrument a regular write access. The instrumentation should be inserted
 * before the actual write happens.
 */
static __always_inline void instrument_read_write(const volatile void *v, size_t size)
{
        kasan_check_write(v, size);
        kcsan_check_read_write(v, size);
}

/**
 * instrument_atomic_read - instrument atomic read access
 * @v: address of access
 * @size: size of access
 *
 * Instrument an atomic read access. The instrumentation should be inserted
 * before the actual read happens.
 */
static __always_inline void instrument_atomic_read(const volatile void *v, size_t size)
{
        kasan_check_read(v, size);
        kcsan_check_atomic_read(v, size);
}

/**
 * instrument_atomic_write - instrument atomic write access
 * @v: address of access
 * @size: size of access
 *
 * Instrument an atomic write access. The instrumentation should be inserted
 * before the actual write happens.
 */
static __always_inline void instrument_atomic_write(const volatile void *v, size_t size)
{
        kasan_check_write(v, size);
        kcsan_check_atomic_write(v, size);
}

/**
 * instrument_atomic_read_write - instrument atomic read-write access
 * @v: address of access
 * @size: size of access
 *
 * Instrument an atomic read-write access. The instrumentation should be
 * inserted before the actual write happens.
 */
static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size)
{
        kasan_check_write(v, size);
        kcsan_check_atomic_read_write(v, size);
}

/**
 * instrument_copy_to_user - instrument reads of copy_to_user
 * @to: destination address
 * @from: source address
 * @n: number of bytes to copy
 *
 * Instrument reads from kernel memory, that are due to copy_to_user (and
 * variants). The instrumentation must be inserted before the accesses.
 */
static __always_inline void
instrument_copy_to_user(void __user *to, const void *from, unsigned long n)
{
        kasan_check_read(from, n);
        kcsan_check_read(from, n);
        kmsan_copy_to_user(to, from, n, 0);
}

/**
 * instrument_copy_from_user_before - add instrumentation before copy_from_user
 * @to: destination address
 * @from: source address
 * @n: number of bytes to copy
 *
 * Instrument writes to kernel memory, that are due to copy_from_user (and
 * variants). The instrumentation should be inserted before the accesses.
 */
static __always_inline void
instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n)
{
        kasan_check_write(to, n);
        kcsan_check_write(to, n);
}

/**
 * instrument_copy_from_user_after - add instrumentation after copy_from_user
 * @to: destination address
 * @from: source address
 * @n: number of bytes to copy
 * @left: number of bytes not copied (as returned by copy_from_user)
 *
 * Instrument writes to kernel memory, that are due to copy_from_user (and
 * variants). The instrumentation should be inserted after the accesses.
 */
static __always_inline void
instrument_copy_from_user_after(const void *to, const void __user *from,
                                unsigned long n, unsigned long left)
{
        kmsan_unpoison_memory(to, n - left);
}

/**
 * instrument_get_user() - add instrumentation to get_user()-like macros
 * @to: destination variable, may not be address-taken
 *
 * get_user() and friends are fragile, so it may depend on the implementation
 * whether the instrumentation happens before or after the data is copied from
 * the userspace.
 */
#define instrument_get_user(to)                                \
({                                                        \
        u64 __tmp = (u64)(to);                                \
        kmsan_unpoison_memory(&__tmp, sizeof(__tmp));        \
        to = __tmp;                                        \
})


/**
 * instrument_put_user() - add instrumentation to put_user()-like macros
 * @from: source address
 * @ptr: userspace pointer to copy to
 * @size: number of bytes to copy
 *
 * put_user() and friends are fragile, so it may depend on the implementation
 * whether the instrumentation happens before or after the data is copied from
 * the userspace.
 */
#define instrument_put_user(from, ptr, size)                        \
({                                                                \
        kmsan_copy_to_user(ptr, &from, sizeof(from), 0);        \
})

#endif /* _LINUX_INSTRUMENTED_H */



















































































































































































































































































































































































































































  108 







   24 




   25 




  106 







  106 




  108 














    9 



































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PGTABLE_DEFS_H
#define _ASM_X86_PGTABLE_DEFS_H

#include <linux/const.h>
#include <linux/mem_encrypt.h>

#include <asm/page_types.h>

#define _PAGE_BIT_PRESENT        0        /* is present */
#define _PAGE_BIT_RW                1        /* writeable */
#define _PAGE_BIT_USER                2        /* userspace addressable */
#define _PAGE_BIT_PWT                3        /* page write through */
#define _PAGE_BIT_PCD                4        /* page cache disabled */
#define _PAGE_BIT_ACCESSED        5        /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY                6        /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE                7        /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT                7        /* on 4KB pages */
#define _PAGE_BIT_GLOBAL        8        /* Global TLB entry PPro+ */
#define _PAGE_BIT_SOFTW1        9        /* available for programmer */
#define _PAGE_BIT_SOFTW2        10        /* " */
#define _PAGE_BIT_SOFTW3        11        /* " */
#define _PAGE_BIT_PAT_LARGE        12        /* On 2MB or 1GB pages */
#define _PAGE_BIT_SOFTW4        57        /* available for programmer */
#define _PAGE_BIT_SOFTW5        58        /* available for programmer */
#define _PAGE_BIT_PKEY_BIT0        59        /* Protection Keys, bit 1/4 */
#define _PAGE_BIT_PKEY_BIT1        60        /* Protection Keys, bit 2/4 */
#define _PAGE_BIT_PKEY_BIT2        61        /* Protection Keys, bit 3/4 */
#define _PAGE_BIT_PKEY_BIT3        62        /* Protection Keys, bit 4/4 */
#define _PAGE_BIT_NX                63        /* No execute: only valid after cpuid check */

#define _PAGE_BIT_SPECIAL        _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST        _PAGE_BIT_SOFTW1
#define _PAGE_BIT_UFFD_WP        _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY        _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_DEVMAP        _PAGE_BIT_SOFTW4

#ifdef CONFIG_X86_64
#define _PAGE_BIT_SAVED_DIRTY        _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
#else
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
#define _PAGE_BIT_SAVED_DIRTY        _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
#endif

/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE        _PAGE_BIT_GLOBAL

#define _PAGE_PRESENT        (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
#define _PAGE_RW        (_AT(pteval_t, 1) << _PAGE_BIT_RW)
#define _PAGE_USER        (_AT(pteval_t, 1) << _PAGE_BIT_USER)
#define _PAGE_PWT        (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
#define _PAGE_PCD        (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
#define _PAGE_ACCESSED        (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
#define _PAGE_DIRTY        (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
#define _PAGE_PSE        (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL        (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_SOFTW1        (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
#define _PAGE_SOFTW2        (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
#define _PAGE_SOFTW3        (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW3)
#define _PAGE_PAT        (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL        (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST        (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define _PAGE_PKEY_BIT0        (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
#define _PAGE_PKEY_BIT1        (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)
#define _PAGE_PKEY_BIT2        (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT2)
#define _PAGE_PKEY_BIT3        (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT3)
#else
#define _PAGE_PKEY_BIT0        (_AT(pteval_t, 0))
#define _PAGE_PKEY_BIT1        (_AT(pteval_t, 0))
#define _PAGE_PKEY_BIT2        (_AT(pteval_t, 0))
#define _PAGE_PKEY_BIT3        (_AT(pteval_t, 0))
#endif

#define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \
                         _PAGE_PKEY_BIT1 | \
                         _PAGE_PKEY_BIT2 | \
                         _PAGE_PKEY_BIT3)

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
#else
#define _PAGE_KNL_ERRATUM_MASK 0
#endif

#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY        (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else
#define _PAGE_SOFT_DIRTY        (_AT(pteval_t, 0))
#endif

/*
 * Tracking soft dirty bit when a page goes to a swap is tricky.
 * We need a bit which can be stored in pte _and_ not conflict
 * with swap entry format. On x86 bits 1-4 are *not* involved
 * into swap entry computation, but bit 7 is used for thp migration,
 * so we borrow bit 1 for soft dirty tracking.
 *
 * Please note that this bit must be treated as swap dirty page
 * mark if and only if the PTE/PMD has present bit clear!
 */
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY        _PAGE_RW
#else
#define _PAGE_SWP_SOFT_DIRTY        (_AT(pteval_t, 0))
#endif

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
#define _PAGE_UFFD_WP                (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
#define _PAGE_SWP_UFFD_WP        _PAGE_USER
#else
#define _PAGE_UFFD_WP                (_AT(pteval_t, 0))
#define _PAGE_SWP_UFFD_WP        (_AT(pteval_t, 0))
#endif

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX        (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#define _PAGE_DEVMAP        (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
#define _PAGE_SOFTW4        (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
#else
#define _PAGE_NX        (_AT(pteval_t, 0))
#define _PAGE_DEVMAP        (_AT(pteval_t, 0))
#define _PAGE_SOFTW4        (_AT(pteval_t, 0))
#endif

/*
 * The hardware requires shadow stack to be Write=0,Dirty=1. However,
 * there are valid cases where the kernel might create read-only PTEs that
 * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In
 * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit,
 * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have
 * (Write=0,SavedDirty=1,Dirty=0) set.
 */
#define _PAGE_SAVED_DIRTY        (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY)

#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY)

#define _PAGE_PROTNONE        (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)

/*
 * Set of bits not changed in pte_modify.  The pte's
 * protection key is treated like _PAGE_RW, for
 * instance, and is *not* included in this mask since
 * pte_modify() does modify it.
 */
#define _COMMON_PAGE_CHG_MASK        (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |        \
                                 _PAGE_SPECIAL | _PAGE_ACCESSED |        \
                                 _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY |        \
                                 _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK        (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)

/*
 * The cache modes defined here are used to translate between pure SW usage
 * and the HW defined cache mode bits and/or PAT entries.
 *
 * The resulting bits for PWT, PCD and PAT should be chosen in a way
 * to have the WB mode at index 0 (all bits clear). This is the default
 * right now and likely would break too much if changed.
 */
#ifndef __ASSEMBLY__
enum page_cache_mode {
        _PAGE_CACHE_MODE_WB       = 0,
        _PAGE_CACHE_MODE_WC       = 1,
        _PAGE_CACHE_MODE_UC_MINUS = 2,
        _PAGE_CACHE_MODE_UC       = 3,
        _PAGE_CACHE_MODE_WT       = 4,
        _PAGE_CACHE_MODE_WP       = 5,

        _PAGE_CACHE_MODE_NUM      = 8
};
#endif

#define _PAGE_CC                (_AT(pteval_t, cc_mask))
#define _PAGE_ENC                (_AT(pteval_t, sme_me_mask))

#define _PAGE_CACHE_MASK        (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
#define _PAGE_LARGE_CACHE_MASK        (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)

#define _PAGE_NOCACHE                (cachemode2protval(_PAGE_CACHE_MODE_UC))
#define _PAGE_CACHE_WP                (cachemode2protval(_PAGE_CACHE_MODE_WP))

#define __PP _PAGE_PRESENT
#define __RW _PAGE_RW
#define _USR _PAGE_USER
#define ___A _PAGE_ACCESSED
#define ___D _PAGE_DIRTY
#define ___G _PAGE_GLOBAL
#define __NX _PAGE_NX

#define _ENC _PAGE_ENC
#define __WP _PAGE_CACHE_WP
#define __NC _PAGE_NOCACHE
#define _PSE _PAGE_PSE

#define pgprot_val(x)                ((x).pgprot)
#define __pgprot(x)                ((pgprot_t) { (x) } )
#define __pg(x)                        __pgprot(x)

#define PAGE_NONE             __pg(   0|   0|   0|___A|   0|   0|   0|___G)
#define PAGE_SHARED             __pg(__PP|__RW|_USR|___A|__NX|   0|   0|   0)
#define PAGE_SHARED_EXEC     __pg(__PP|__RW|_USR|___A|   0|   0|   0|   0)
#define PAGE_COPY_NOEXEC     __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
#define PAGE_COPY_EXEC             __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)
#define PAGE_COPY             __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
#define PAGE_READONLY             __pg(__PP|   0|_USR|___A|__NX|   0|   0|   0)
#define PAGE_READONLY_EXEC   __pg(__PP|   0|_USR|___A|   0|   0|   0|   0)

#define __PAGE_KERNEL                 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
#define __PAGE_KERNEL_EXEC         (__PP|__RW|   0|___A|   0|___D|   0|___G)

/*
 * Page tables needs to have Write=1 in order for any lower PTEs to be
 * writable. This includes shadow stack memory (Write=0, Dirty=1)
 */
#define _KERNPG_TABLE_NOENC         (__PP|__RW|   0|___A|   0|___D|   0|   0)
#define _KERNPG_TABLE                 (__PP|__RW|   0|___A|   0|___D|   0|   0| _ENC)
#define _PAGE_TABLE_NOENC         (__PP|__RW|_USR|___A|   0|___D|   0|   0)
#define _PAGE_TABLE                 (__PP|__RW|_USR|___A|   0|___D|   0|   0| _ENC)

#define __PAGE_KERNEL_RO         (__PP|   0|   0|___A|__NX|   0|   0|___G)
#define __PAGE_KERNEL_ROX         (__PP|   0|   0|___A|   0|   0|   0|___G)
#define __PAGE_KERNEL                 (__PP|__RW|   0|___A|__NX|___D|   0|___G)
#define __PAGE_KERNEL_EXEC         (__PP|__RW|   0|___A|   0|___D|   0|___G)
#define __PAGE_KERNEL_NOCACHE         (__PP|__RW|   0|___A|__NX|___D|   0|___G| __NC)
#define __PAGE_KERNEL_VVAR         (__PP|   0|_USR|___A|__NX|   0|   0|___G)
#define __PAGE_KERNEL_LARGE         (__PP|__RW|   0|___A|__NX|___D|_PSE|___G)
#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW|   0|___A|   0|___D|_PSE|___G)
#define __PAGE_KERNEL_WP         (__PP|__RW|   0|___A|__NX|___D|   0|___G| __WP)


#define __PAGE_KERNEL_IO                __PAGE_KERNEL
#define __PAGE_KERNEL_IO_NOCACHE        __PAGE_KERNEL_NOCACHE


#ifndef __ASSEMBLY__

#define __PAGE_KERNEL_ENC        (__PAGE_KERNEL    | _ENC)
#define __PAGE_KERNEL_ENC_WP        (__PAGE_KERNEL_WP | _ENC)
#define __PAGE_KERNEL_NOENC        (__PAGE_KERNEL    |    0)
#define __PAGE_KERNEL_NOENC_WP        (__PAGE_KERNEL_WP |    0)

#define __pgprot_mask(x)        __pgprot((x) & __default_kernel_pte_mask)

#define PAGE_KERNEL                __pgprot_mask(__PAGE_KERNEL            | _ENC)
#define PAGE_KERNEL_NOENC        __pgprot_mask(__PAGE_KERNEL            |    0)
#define PAGE_KERNEL_RO                __pgprot_mask(__PAGE_KERNEL_RO         | _ENC)
#define PAGE_KERNEL_EXEC        __pgprot_mask(__PAGE_KERNEL_EXEC       | _ENC)
#define PAGE_KERNEL_EXEC_NOENC        __pgprot_mask(__PAGE_KERNEL_EXEC       |    0)
#define PAGE_KERNEL_ROX                __pgprot_mask(__PAGE_KERNEL_ROX        | _ENC)
#define PAGE_KERNEL_NOCACHE        __pgprot_mask(__PAGE_KERNEL_NOCACHE    | _ENC)
#define PAGE_KERNEL_LARGE        __pgprot_mask(__PAGE_KERNEL_LARGE      | _ENC)
#define PAGE_KERNEL_LARGE_EXEC        __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
#define PAGE_KERNEL_VVAR        __pgprot_mask(__PAGE_KERNEL_VVAR       | _ENC)

#define PAGE_KERNEL_IO                __pgprot_mask(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE        __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)

#endif        /* __ASSEMBLY__ */

/*
 * early identity mapping  pte attrib macros.
 */
#ifdef CONFIG_X86_64
#define __PAGE_KERNEL_IDENT_LARGE_EXEC        __PAGE_KERNEL_LARGE_EXEC
#else
#define PTE_IDENT_ATTR         0x003                /* PRESENT+RW */
#define PDE_IDENT_ATTR         0x063                /* PRESENT+RW+DIRTY+ACCESSED */
#define PGD_IDENT_ATTR         0x001                /* PRESENT (no other attributes) */
#endif

#ifdef CONFIG_X86_32
# include <asm/pgtable_32_types.h>
#else
# include <asm/pgtable_64_types.h>
#endif

#ifndef __ASSEMBLY__

#include <linux/types.h>

/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_PFN_MASK                ((pteval_t)PHYSICAL_PAGE_MASK)

/*
 *  Extracts the flags from a (pte|pmd|pud|pgd)val_t
 *  This includes the protection key value.
 */
#define PTE_FLAGS_MASK                (~PTE_PFN_MASK)

typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;

typedef struct { pgdval_t pgd; } pgd_t;

static inline pgprot_t pgprot_nx(pgprot_t prot)
{
        return __pgprot(pgprot_val(prot) | _PAGE_NX);
}
#define pgprot_nx pgprot_nx

#ifdef CONFIG_X86_PAE

/*
 * PHYSICAL_PAGE_MASK might be non-constant when SME is compiled in, so we can't
 * use it here.
 */

#define PGD_PAE_PAGE_MASK        ((signed long)PAGE_MASK)
#define PGD_PAE_PHYS_MASK        (((1ULL << __PHYSICAL_MASK_SHIFT)-1) & PGD_PAE_PAGE_MASK)

/*
 * PAE allows Base Address, P, PWT, PCD and AVL bits to be set in PGD entries.
 * All other bits are Reserved MBZ
 */
#define PGD_ALLOWED_BITS        (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \
                                 _PAGE_PWT | _PAGE_PCD | \
                                 _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3)

#else
/* No need to mask any bits for !PAE */
#define PGD_ALLOWED_BITS        (~0ULL)
#endif

static inline pgd_t native_make_pgd(pgdval_t val)
{
        return (pgd_t) { val & PGD_ALLOWED_BITS };
}

static inline pgdval_t native_pgd_val(pgd_t pgd)
{
        return pgd.pgd & PGD_ALLOWED_BITS;
}

static inline pgdval_t pgd_flags(pgd_t pgd)
{
        return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}

#if CONFIG_PGTABLE_LEVELS > 4
typedef struct { p4dval_t p4d; } p4d_t;

static inline p4d_t native_make_p4d(pudval_t val)
{
        return (p4d_t) { val };
}

static inline p4dval_t native_p4d_val(p4d_t p4d)
{
        return p4d.p4d;
}
#else
#include <asm-generic/pgtable-nop4d.h>

static inline p4d_t native_make_p4d(pudval_t val)
{
        return (p4d_t) { .pgd = native_make_pgd((pgdval_t)val) };
}

static inline p4dval_t native_p4d_val(p4d_t p4d)
{
        return native_pgd_val(p4d.pgd);
}
#endif

#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;

static inline pud_t native_make_pud(pmdval_t val)
{
        return (pud_t) { val };
}

static inline pudval_t native_pud_val(pud_t pud)
{
        return pud.pud;
}
#else
#include <asm-generic/pgtable-nopud.h>

static inline pud_t native_make_pud(pudval_t val)
{
        return (pud_t) { .p4d.pgd = native_make_pgd(val) };
}

static inline pudval_t native_pud_val(pud_t pud)
{
        return native_pgd_val(pud.p4d.pgd);
}
#endif

#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t native_make_pmd(pmdval_t val)
{
        return (pmd_t) { .pmd = val };
}

static inline pmdval_t native_pmd_val(pmd_t pmd)
{
        return pmd.pmd;
}
#else
#include <asm-generic/pgtable-nopmd.h>

static inline pmd_t native_make_pmd(pmdval_t val)
{
        return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
}

static inline pmdval_t native_pmd_val(pmd_t pmd)
{
        return native_pgd_val(pmd.pud.p4d.pgd);
}
#endif

static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
{
        /* No 512 GiB huge pages yet */
        return PTE_PFN_MASK;
}

static inline p4dval_t p4d_flags_mask(p4d_t p4d)
{
        return ~p4d_pfn_mask(p4d);
}

static inline p4dval_t p4d_flags(p4d_t p4d)
{
        return native_p4d_val(p4d) & p4d_flags_mask(p4d);
}

static inline pudval_t pud_pfn_mask(pud_t pud)
{
        if (native_pud_val(pud) & _PAGE_PSE)
                return PHYSICAL_PUD_PAGE_MASK;
        else
                return PTE_PFN_MASK;
}

static inline pudval_t pud_flags_mask(pud_t pud)
{
        return ~pud_pfn_mask(pud);
}

static inline pudval_t pud_flags(pud_t pud)
{
        return native_pud_val(pud) & pud_flags_mask(pud);
}

static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
{
        if (native_pmd_val(pmd) & _PAGE_PSE)
                return PHYSICAL_PMD_PAGE_MASK;
        else
                return PTE_PFN_MASK;
}

static inline pmdval_t pmd_flags_mask(pmd_t pmd)
{
        return ~pmd_pfn_mask(pmd);
}

static inline pmdval_t pmd_flags(pmd_t pmd)
{
        return native_pmd_val(pmd) & pmd_flags_mask(pmd);
}

static inline pte_t native_make_pte(pteval_t val)
{
        return (pte_t) { .pte = val };
}

static inline pteval_t native_pte_val(pte_t pte)
{
        return pte.pte;
}

static inline pteval_t pte_flags(pte_t pte)
{
        return native_pte_val(pte) & PTE_FLAGS_MASK;
}

#define __pte2cm_idx(cb)                                \
        ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) |                \
         (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) |                \
         (((cb) >> _PAGE_BIT_PWT) & 1))
#define __cm_idx2pte(i)                                        \
        ((((i) & 4) << (_PAGE_BIT_PAT - 2)) |                \
         (((i) & 2) << (_PAGE_BIT_PCD - 1)) |                \
         (((i) & 1) << _PAGE_BIT_PWT))

unsigned long cachemode2protval(enum page_cache_mode pcm);

static inline pgprotval_t protval_4k_2_large(pgprotval_t val)
{
        return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
}
static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
{
        return __pgprot(protval_4k_2_large(pgprot_val(pgprot)));
}
static inline pgprotval_t protval_large_2_4k(pgprotval_t val)
{
        return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
                ((val & _PAGE_PAT_LARGE) >>
                 (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
}
static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
{
        return __pgprot(protval_large_2_4k(pgprot_val(pgprot)));
}


typedef struct page *pgtable_t;

extern pteval_t __supported_pte_mask;
extern pteval_t __default_kernel_pte_mask;
extern void set_nx(void);
extern int nx_enabled;

#define pgprot_writecombine        pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);

#define pgprot_writethrough        pgprot_writethrough
extern pgprot_t pgprot_writethrough(pgprot_t prot);

/* Indicate that x86 has its own track and untrack pfn vma functions */
#define __HAVE_PFNMAP_TRACKING

#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                              unsigned long size, pgprot_t vma_prot);

/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

#ifdef CONFIG_X86_32
extern void native_pagetable_init(void);
#else
#define native_pagetable_init        paging_init
#endif

enum pg_level {
        PG_LEVEL_NONE,
        PG_LEVEL_4K,
        PG_LEVEL_2M,
        PG_LEVEL_1G,
        PG_LEVEL_512G,
        PG_LEVEL_NUM
};

#ifdef CONFIG_PROC_FS
extern void update_page_count(int level, unsigned long pages);
#else
static inline void update_page_count(int level, unsigned long pages) { }
#endif

/*
 * Helper function that returns the kernel pagetable entry controlling
 * the virtual address 'address'. NULL means no pagetable entry present.
 * NOTE: the return type is pte_t but if the pmd is PSE then we return it
 * as a pte too.
 */
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                                    unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
                                          unsigned long address,
                                          unsigned numpages,
                                          unsigned long page_flags);
extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
                                            unsigned long numpages);
#endif        /* !__ASSEMBLY__ */

#endif /* _ASM_X86_PGTABLE_DEFS_H */











































































































































































































































































































































































































































































































































































































































  256 


















































  255 




  256 




  254 





















































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *  linux/drivers/char/serial_core.h
 *
 *  Copyright (C) 2000 Deep Blue Solutions Ltd.
 */
#ifndef LINUX_SERIAL_CORE_H
#define LINUX_SERIAL_CORE_H

#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/console.h>
#include <linux/interrupt.h>
#include <linux/circ_buf.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/tty.h>
#include <linux/mutex.h>
#include <linux/sysrq.h>
#include <uapi/linux/serial_core.h>

#ifdef CONFIG_SERIAL_CORE_CONSOLE
#define uart_console(port) \
        ((port)->cons && (port)->cons->index == (port)->line)
#else
#define uart_console(port)      ({ (void)port; 0; })
#endif

struct uart_port;
struct serial_struct;
struct serial_port_device;
struct device;
struct gpio_desc;

/**
 * struct uart_ops -- interface between serial_core and the driver
 *
 * This structure describes all the operations that can be done on the
 * physical hardware.
 *
 * @tx_empty: ``unsigned int ()(struct uart_port *port)``
 *
 *        This function tests whether the transmitter fifo and shifter for the
 *        @port is empty. If it is empty, this function should return
 *        %TIOCSER_TEMT, otherwise return 0. If the port does not support this
 *        operation, then it should return %TIOCSER_TEMT.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *        This call must not sleep
 *
 * @set_mctrl: ``void ()(struct uart_port *port, unsigned int mctrl)``
 *
 *        This function sets the modem control lines for @port to the state
 *        described by @mctrl. The relevant bits of @mctrl are:
 *
 *                - %TIOCM_RTS        RTS signal.
 *                - %TIOCM_DTR        DTR signal.
 *                - %TIOCM_OUT1        OUT1 signal.
 *                - %TIOCM_OUT2        OUT2 signal.
 *                - %TIOCM_LOOP        Set the port into loopback mode.
 *
 *        If the appropriate bit is set, the signal should be driven
 *        active.  If the bit is clear, the signal should be driven
 *        inactive.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @get_mctrl: ``unsigned int ()(struct uart_port *port)``
 *
 *        Returns the current state of modem control inputs of @port. The state
 *        of the outputs should not be returned, since the core keeps track of
 *        their state. The state information should include:
 *
 *                - %TIOCM_CAR        state of DCD signal
 *                - %TIOCM_CTS        state of CTS signal
 *                - %TIOCM_DSR        state of DSR signal
 *                - %TIOCM_RI        state of RI signal
 *
 *        The bit is set if the signal is currently driven active.  If
 *        the port does not support CTS, DCD or DSR, the driver should
 *        indicate that the signal is permanently active. If RI is
 *        not available, the signal should not be indicated as active.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @stop_tx: ``void ()(struct uart_port *port)``
 *
 *        Stop transmitting characters. This might be due to the CTS line
 *        becoming inactive or the tty layer indicating we want to stop
 *        transmission due to an %XOFF character.
 *
 *        The driver should stop transmitting characters as soon as possible.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @start_tx: ``void ()(struct uart_port *port)``
 *
 *        Start transmitting characters.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @throttle: ``void ()(struct uart_port *port)``
 *
 *        Notify the serial driver that input buffers for the line discipline are
 *        close to full, and it should somehow signal that no more characters
 *        should be sent to the serial port.
 *        This will be called only if hardware assisted flow control is enabled.
 *
 *        Locking: serialized with @unthrottle() and termios modification by the
 *        tty layer.
 *
 * @unthrottle: ``void ()(struct uart_port *port)``
 *
 *        Notify the serial driver that characters can now be sent to the serial
 *        port without fear of overrunning the input buffers of the line
 *        disciplines.
 *
 *        This will be called only if hardware assisted flow control is enabled.
 *
 *        Locking: serialized with @throttle() and termios modification by the
 *        tty layer.
 *
 * @send_xchar: ``void ()(struct uart_port *port, char ch)``
 *
 *        Transmit a high priority character, even if the port is stopped. This
 *        is used to implement XON/XOFF flow control and tcflow(). If the serial
 *        driver does not implement this function, the tty core will append the
 *        character to the circular buffer and then call start_tx() / stop_tx()
 *        to flush the data out.
 *
 *        Do not transmit if @ch == '\0' (%__DISABLED_CHAR).
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @start_rx: ``void ()(struct uart_port *port)``
 *
 *        Start receiving characters.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @stop_rx: ``void ()(struct uart_port *port)``
 *
 *        Stop receiving characters; the @port is in the process of being closed.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @enable_ms: ``void ()(struct uart_port *port)``
 *
 *        Enable the modem status interrupts.
 *
 *        This method may be called multiple times. Modem status interrupts
 *        should be disabled when the @shutdown() method is called.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @break_ctl: ``void ()(struct uart_port *port, int ctl)``
 *
 *        Control the transmission of a break signal. If @ctl is nonzero, the
 *        break signal should be transmitted. The signal should be terminated
 *        when another call is made with a zero @ctl.
 *
 *        Locking: caller holds tty_port->mutex
 *
 * @startup: ``int ()(struct uart_port *port)``
 *
 *        Grab any interrupt resources and initialise any low level driver state.
 *        Enable the port for reception. It should not activate RTS nor DTR;
 *        this will be done via a separate call to @set_mctrl().
 *
 *        This method will only be called when the port is initially opened.
 *
 *        Locking: port_sem taken.
 *        Interrupts: globally disabled.
 *
 * @shutdown: ``void ()(struct uart_port *port)``
 *
 *        Disable the @port, disable any break condition that may be in effect,
 *        and free any interrupt resources. It should not disable RTS nor DTR;
 *        this will have already been done via a separate call to @set_mctrl().
 *
 *        Drivers must not access @port->state once this call has completed.
 *
 *        This method will only be called when there are no more users of this
 *        @port.
 *
 *        Locking: port_sem taken.
 *        Interrupts: caller dependent.
 *
 * @flush_buffer: ``void ()(struct uart_port *port)``
 *
 *        Flush any write buffers, reset any DMA state and stop any ongoing DMA
 *        transfers.
 *
 *        This will be called whenever the @port->state->xmit circular buffer is
 *        cleared.
 *
 *        Locking: @port->lock taken.
 *        Interrupts: locally disabled.
 *        This call must not sleep
 *
 * @set_termios: ``void ()(struct uart_port *port, struct ktermios *new,
 *                        struct ktermios *old)``
 *
 *        Change the @port parameters, including word length, parity, stop bits.
 *        Update @port->read_status_mask and @port->ignore_status_mask to
 *        indicate the types of events we are interested in receiving. Relevant
 *        ktermios::c_cflag bits are:
 *
 *        - %CSIZE - word size
 *        - %CSTOPB - 2 stop bits
 *        - %PARENB - parity enable
 *        - %PARODD - odd parity (when %PARENB is in force)
 *        - %ADDRB - address bit (changed through uart_port::rs485_config()).
 *        - %CREAD - enable reception of characters (if not set, still receive
 *          characters from the port, but throw them away).
 *        - %CRTSCTS - if set, enable CTS status change reporting.
 *        - %CLOCAL - if not set, enable modem status change reporting.
 *
 *        Relevant ktermios::c_iflag bits are:
 *
 *        - %INPCK - enable frame and parity error events to be passed to the TTY
 *          layer.
 *        - %BRKINT / %PARMRK - both of these enable break events to be passed to
 *          the TTY layer.
 *        - %IGNPAR - ignore parity and framing errors.
 *        - %IGNBRK - ignore break errors. If %IGNPAR is also set, ignore overrun
 *          errors as well.
 *
 *        The interaction of the ktermios::c_iflag bits is as follows (parity
 *        error given as an example):
 *
 *        ============ ======= ======= =========================================
 *        Parity error INPCK   IGNPAR
 *        ============ ======= ======= =========================================
 *        n/a             0             n/a     character received, marked as %TTY_NORMAL
 *        None             1             n/a     character received, marked as %TTY_NORMAL
 *        Yes             1             0             character received, marked as %TTY_PARITY
 *        Yes             1             1             character discarded
 *        ============ ======= ======= =========================================
 *
 *        Other flags may be used (eg, xon/xoff characters) if your hardware
 *        supports hardware "soft" flow control.
 *
 *        Locking: caller holds tty_port->mutex
 *        Interrupts: caller dependent.
 *        This call must not sleep
 *
 * @set_ldisc: ``void ()(struct uart_port *port, struct ktermios *termios)``
 *
 *        Notifier for discipline change. See
 *        Documentation/driver-api/tty/tty_ldisc.rst.
 *
 *        Locking: caller holds tty_port->mutex
 *
 * @pm: ``void ()(struct uart_port *port, unsigned int state,
 *                 unsigned int oldstate)``
 *
 *        Perform any power management related activities on the specified @port.
 *        @state indicates the new state (defined by enum uart_pm_state),
 *        @oldstate indicates the previous state.
 *
 *        This function should not be used to grab any resources.
 *
 *        This will be called when the @port is initially opened and finally
 *        closed, except when the @port is also the system console. This will
 *        occur even if %CONFIG_PM is not set.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @type: ``const char *()(struct uart_port *port)``
 *
 *        Return a pointer to a string constant describing the specified @port,
 *        or return %NULL, in which case the string 'unknown' is substituted.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @release_port: ``void ()(struct uart_port *port)``
 *
 *        Release any memory and IO region resources currently in use by the
 *        @port.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @request_port: ``int ()(struct uart_port *port)``
 *
 *        Request any memory and IO region resources required by the port. If any
 *        fail, no resources should be registered when this function returns, and
 *        it should return -%EBUSY on failure.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @config_port: ``void ()(struct uart_port *port, int type)``
 *
 *        Perform any autoconfiguration steps required for the @port. @type
 *        contains a bit mask of the required configuration. %UART_CONFIG_TYPE
 *        indicates that the port requires detection and identification.
 *        @port->type should be set to the type found, or %PORT_UNKNOWN if no
 *        port was detected.
 *
 *        %UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal,
 *        which should be probed using standard kernel autoprobing techniques.
 *        This is not necessary on platforms where ports have interrupts
 *        internally hard wired (eg, system on a chip implementations).
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @verify_port: ``int ()(struct uart_port *port,
 *                        struct serial_struct *serinfo)``
 *
 *        Verify the new serial port information contained within @serinfo is
 *        suitable for this port type.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @ioctl: ``int ()(struct uart_port *port, unsigned int cmd,
 *                unsigned long arg)``
 *
 *        Perform any port specific IOCTLs. IOCTL commands must be defined using
 *        the standard numbering system found in <asm/ioctl.h>.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *
 * @poll_init: ``int ()(struct uart_port *port)``
 *
 *        Called by kgdb to perform the minimal hardware initialization needed to
 *        support @poll_put_char() and @poll_get_char(). Unlike @startup(), this
 *        should not request interrupts.
 *
 *        Locking: %tty_mutex and tty_port->mutex taken.
 *        Interrupts: n/a.
 *
 * @poll_put_char: ``void ()(struct uart_port *port, unsigned char ch)``
 *
 *        Called by kgdb to write a single character @ch directly to the serial
 *        @port. It can and should block until there is space in the TX FIFO.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *        This call must not sleep
 *
 * @poll_get_char: ``int ()(struct uart_port *port)``
 *
 *        Called by kgdb to read a single character directly from the serial
 *        port. If data is available, it should be returned; otherwise the
 *        function should return %NO_POLL_CHAR immediately.
 *
 *        Locking: none.
 *        Interrupts: caller dependent.
 *        This call must not sleep
 */
struct uart_ops {
        unsigned int        (*tx_empty)(struct uart_port *);
        void                (*set_mctrl)(struct uart_port *, unsigned int mctrl);
        unsigned int        (*get_mctrl)(struct uart_port *);
        void                (*stop_tx)(struct uart_port *);
        void                (*start_tx)(struct uart_port *);
        void                (*throttle)(struct uart_port *);
        void                (*unthrottle)(struct uart_port *);
        void                (*send_xchar)(struct uart_port *, char ch);
        void                (*stop_rx)(struct uart_port *);
        void                (*start_rx)(struct uart_port *);
        void                (*enable_ms)(struct uart_port *);
        void                (*break_ctl)(struct uart_port *, int ctl);
        int                (*startup)(struct uart_port *);
        void                (*shutdown)(struct uart_port *);
        void                (*flush_buffer)(struct uart_port *);
        void                (*set_termios)(struct uart_port *, struct ktermios *new,
                                       const struct ktermios *old);
        void                (*set_ldisc)(struct uart_port *, struct ktermios *);
        void                (*pm)(struct uart_port *, unsigned int state,
                              unsigned int oldstate);
        const char        *(*type)(struct uart_port *);
        void                (*release_port)(struct uart_port *);
        int                (*request_port)(struct uart_port *);
        void                (*config_port)(struct uart_port *, int);
        int                (*verify_port)(struct uart_port *, struct serial_struct *);
        int                (*ioctl)(struct uart_port *, unsigned int, unsigned long);
#ifdef CONFIG_CONSOLE_POLL
        int                (*poll_init)(struct uart_port *);
        void                (*poll_put_char)(struct uart_port *, unsigned char);
        int                (*poll_get_char)(struct uart_port *);
#endif
};

#define NO_POLL_CHAR                0x00ff0000
#define UART_CONFIG_TYPE        (1 << 0)
#define UART_CONFIG_IRQ                (1 << 1)

struct uart_icount {
        __u32        cts;
        __u32        dsr;
        __u32        rng;
        __u32        dcd;
        __u32        rx;
        __u32        tx;
        __u32        frame;
        __u32        overrun;
        __u32        parity;
        __u32        brk;
        __u32        buf_overrun;
};

typedef u64 __bitwise upf_t;
typedef unsigned int __bitwise upstat_t;

struct uart_port {
        spinlock_t                lock;                        /* port lock */
        unsigned long                iobase;                        /* in/out[bwl] */
        unsigned char __iomem        *membase;                /* read/write[bwl] */
        unsigned int                (*serial_in)(struct uart_port *, int);
        void                        (*serial_out)(struct uart_port *, int, int);
        void                        (*set_termios)(struct uart_port *,
                                               struct ktermios *new,
                                               const struct ktermios *old);
        void                        (*set_ldisc)(struct uart_port *,
                                             struct ktermios *);
        unsigned int                (*get_mctrl)(struct uart_port *);
        void                        (*set_mctrl)(struct uart_port *, unsigned int);
        unsigned int                (*get_divisor)(struct uart_port *,
                                               unsigned int baud,
                                               unsigned int *frac);
        void                        (*set_divisor)(struct uart_port *,
                                               unsigned int baud,
                                               unsigned int quot,
                                               unsigned int quot_frac);
        int                        (*startup)(struct uart_port *port);
        void                        (*shutdown)(struct uart_port *port);
        void                        (*throttle)(struct uart_port *port);
        void                        (*unthrottle)(struct uart_port *port);
        int                        (*handle_irq)(struct uart_port *);
        void                        (*pm)(struct uart_port *, unsigned int state,
                                      unsigned int old);
        void                        (*handle_break)(struct uart_port *);
        int                        (*rs485_config)(struct uart_port *,
                                                struct ktermios *termios,
                                                struct serial_rs485 *rs485);
        int                        (*iso7816_config)(struct uart_port *,
                                                  struct serial_iso7816 *iso7816);
        unsigned int                ctrl_id;                /* optional serial core controller id */
        unsigned int                port_id;                /* optional serial core port id */
        unsigned int                irq;                        /* irq number */
        unsigned long                irqflags;                /* irq flags  */
        unsigned int                uartclk;                /* base uart clock */
        unsigned int                fifosize;                /* tx fifo size */
        unsigned char                x_char;                        /* xon/xoff char */
        unsigned char                regshift;                /* reg offset shift */

        unsigned char                iotype;                        /* io access style */

#define UPIO_UNKNOWN                ((unsigned char)~0U)        /* UCHAR_MAX */
#define UPIO_PORT                (SERIAL_IO_PORT)        /* 8b I/O port access */
#define UPIO_HUB6                (SERIAL_IO_HUB6)        /* Hub6 ISA card */
#define UPIO_MEM                (SERIAL_IO_MEM)                /* driver-specific */
#define UPIO_MEM32                (SERIAL_IO_MEM32)        /* 32b little endian */
#define UPIO_AU                        (SERIAL_IO_AU)                /* Au1x00 and RT288x type IO */
#define UPIO_TSI                (SERIAL_IO_TSI)                /* Tsi108/109 type IO */
#define UPIO_MEM32BE                (SERIAL_IO_MEM32BE)        /* 32b big endian */
#define UPIO_MEM16                (SERIAL_IO_MEM16)        /* 16b little endian */

        unsigned char                quirks;                        /* internal quirks */

        /* internal quirks must be updated while holding port mutex */
#define UPQ_NO_TXEN_TEST        BIT(0)

        unsigned int                read_status_mask;        /* driver specific */
        unsigned int                ignore_status_mask;        /* driver specific */
        struct uart_state        *state;                        /* pointer to parent state */
        struct uart_icount        icount;                        /* statistics */

        struct console                *cons;                        /* struct console, if any */
        /* flags must be updated while holding port mutex */
        upf_t                        flags;

        /*
         * These flags must be equivalent to the flags defined in
         * include/uapi/linux/tty_flags.h which are the userspace definitions
         * assigned from the serial_struct flags in uart_set_info()
         * [for bit definitions in the UPF_CHANGE_MASK]
         *
         * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable
         * The remaining bits are serial-core specific and not modifiable by
         * userspace.
         */
#define UPF_FOURPORT                ((__force upf_t) ASYNC_FOURPORT       /* 1  */ )
#define UPF_SAK                        ((__force upf_t) ASYNC_SAK            /* 2  */ )
#define UPF_SPD_HI                ((__force upf_t) ASYNC_SPD_HI         /* 4  */ )
#define UPF_SPD_VHI                ((__force upf_t) ASYNC_SPD_VHI        /* 5  */ )
#define UPF_SPD_CUST                ((__force upf_t) ASYNC_SPD_CUST   /* 0x0030 */ )
#define UPF_SPD_WARP                ((__force upf_t) ASYNC_SPD_WARP   /* 0x1010 */ )
#define UPF_SPD_MASK                ((__force upf_t) ASYNC_SPD_MASK   /* 0x1030 */ )
#define UPF_SKIP_TEST                ((__force upf_t) ASYNC_SKIP_TEST      /* 6  */ )
#define UPF_AUTO_IRQ                ((__force upf_t) ASYNC_AUTO_IRQ       /* 7  */ )
#define UPF_HARDPPS_CD                ((__force upf_t) ASYNC_HARDPPS_CD     /* 11 */ )
#define UPF_SPD_SHI                ((__force upf_t) ASYNC_SPD_SHI        /* 12 */ )
#define UPF_LOW_LATENCY                ((__force upf_t) ASYNC_LOW_LATENCY    /* 13 */ )
#define UPF_BUGGY_UART                ((__force upf_t) ASYNC_BUGGY_UART     /* 14 */ )
#define UPF_MAGIC_MULTIPLIER        ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ )

#define UPF_NO_THRE_TEST        ((__force upf_t) BIT_ULL(19))
/* Port has hardware-assisted h/w flow control */
#define UPF_AUTO_CTS                ((__force upf_t) BIT_ULL(20))
#define UPF_AUTO_RTS                ((__force upf_t) BIT_ULL(21))
#define UPF_HARD_FLOW                ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS))
/* Port has hardware-assisted s/w flow control */
#define UPF_SOFT_FLOW                ((__force upf_t) BIT_ULL(22))
#define UPF_CONS_FLOW                ((__force upf_t) BIT_ULL(23))
#define UPF_SHARE_IRQ                ((__force upf_t) BIT_ULL(24))
#define UPF_EXAR_EFR                ((__force upf_t) BIT_ULL(25))
#define UPF_BUG_THRE                ((__force upf_t) BIT_ULL(26))
/* The exact UART type is known and should not be probed.  */
#define UPF_FIXED_TYPE                ((__force upf_t) BIT_ULL(27))
#define UPF_BOOT_AUTOCONF        ((__force upf_t) BIT_ULL(28))
#define UPF_FIXED_PORT                ((__force upf_t) BIT_ULL(29))
#define UPF_DEAD                ((__force upf_t) BIT_ULL(30))
#define UPF_IOREMAP                ((__force upf_t) BIT_ULL(31))
#define UPF_FULL_PROBE                ((__force upf_t) BIT_ULL(32))

#define __UPF_CHANGE_MASK        0x17fff
#define UPF_CHANGE_MASK                ((__force upf_t) __UPF_CHANGE_MASK)
#define UPF_USR_MASK                ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY))

#if __UPF_CHANGE_MASK > ASYNC_FLAGS
#error Change mask not equivalent to userspace-visible bit defines
#endif

        /*
         * Must hold termios_rwsem, port mutex and port lock to change;
         * can hold any one lock to read.
         */
        upstat_t                status;

#define UPSTAT_CTS_ENABLE        ((__force upstat_t) (1 << 0))
#define UPSTAT_DCD_ENABLE        ((__force upstat_t) (1 << 1))
#define UPSTAT_AUTORTS                ((__force upstat_t) (1 << 2))
#define UPSTAT_AUTOCTS                ((__force upstat_t) (1 << 3))
#define UPSTAT_AUTOXOFF                ((__force upstat_t) (1 << 4))
#define UPSTAT_SYNC_FIFO        ((__force upstat_t) (1 << 5))

        bool                        hw_stopped;                /* sw-assisted CTS flow state */
        unsigned int                mctrl;                        /* current modem ctrl settings */
        unsigned int                frame_time;                /* frame timing in ns */
        unsigned int                type;                        /* port type */
        const struct uart_ops        *ops;
        unsigned int                custom_divisor;
        unsigned int                line;                        /* port index */
        unsigned int                minor;
        resource_size_t                mapbase;                /* for ioremap */
        resource_size_t                mapsize;
        struct device                *dev;                        /* serial port physical parent device */
        struct serial_port_device *port_dev;                /* serial core port device */

        unsigned long                sysrq;                        /* sysrq timeout */
        u8                        sysrq_ch;                /* char for sysrq */
        unsigned char                has_sysrq;
        unsigned char                sysrq_seq;                /* index in sysrq_toggle_seq */

        unsigned char                hub6;                        /* this should be in the 8250 driver */
        unsigned char                suspended;
        unsigned char                console_reinit;
        const char                *name;                        /* port name */
        struct attribute_group        *attr_group;                /* port specific attributes */
        const struct attribute_group **tty_groups;        /* all attributes (serial core use only) */
        struct serial_rs485     rs485;
        struct serial_rs485        rs485_supported;        /* Supported mask for serial_rs485 */
        struct gpio_desc        *rs485_term_gpio;        /* enable RS485 bus termination */
        struct gpio_desc        *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */
        struct serial_iso7816   iso7816;
        void                        *private_data;                /* generic platform data pointer */
};

/**
 * uart_port_lock - Lock the UART port
 * @up:                Pointer to UART port structure
 */
static inline void uart_port_lock(struct uart_port *up)
{
        spin_lock(&up->lock);
}

/**
 * uart_port_lock_irq - Lock the UART port and disable interrupts
 * @up:                Pointer to UART port structure
 */
static inline void uart_port_lock_irq(struct uart_port *up)
{
        spin_lock_irq(&up->lock);
}

/**
 * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts
 * @up:                Pointer to UART port structure
 * @flags:        Pointer to interrupt flags storage
 */
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
{
        spin_lock_irqsave(&up->lock, *flags);
}

/**
 * uart_port_trylock - Try to lock the UART port
 * @up:                Pointer to UART port structure
 *
 * Returns: True if lock was acquired, false otherwise
 */
static inline bool uart_port_trylock(struct uart_port *up)
{
        return spin_trylock(&up->lock);
}

/**
 * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts
 * @up:                Pointer to UART port structure
 * @flags:        Pointer to interrupt flags storage
 *
 * Returns: True if lock was acquired, false otherwise
 */
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
{
        return spin_trylock_irqsave(&up->lock, *flags);
}

/**
 * uart_port_unlock - Unlock the UART port
 * @up:                Pointer to UART port structure
 */
static inline void uart_port_unlock(struct uart_port *up)
{
        spin_unlock(&up->lock);
}

/**
 * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts
 * @up:                Pointer to UART port structure
 */
static inline void uart_port_unlock_irq(struct uart_port *up)
{
        spin_unlock_irq(&up->lock);
}

/**
 * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts
 * @up:                Pointer to UART port structure
 * @flags:        The saved interrupt flags for restore
 */
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
{
        spin_unlock_irqrestore(&up->lock, flags);
}

static inline int serial_port_in(struct uart_port *up, int offset)
{
        return up->serial_in(up, offset);
}

static inline void serial_port_out(struct uart_port *up, int offset, int value)
{
        up->serial_out(up, offset, value);
}

/**
 * enum uart_pm_state - power states for UARTs
 * @UART_PM_STATE_ON: UART is powered, up and operational
 * @UART_PM_STATE_OFF: UART is powered off
 * @UART_PM_STATE_UNDEFINED: sentinel
 */
enum uart_pm_state {
        UART_PM_STATE_ON = 0,
        UART_PM_STATE_OFF = 3, /* number taken from ACPI */
        UART_PM_STATE_UNDEFINED,
};

/*
 * This is the state information which is persistent across opens.
 */
struct uart_state {
        struct tty_port                port;

        enum uart_pm_state        pm_state;
        struct circ_buf                xmit;

        atomic_t                refcount;
        wait_queue_head_t        remove_wait;
        struct uart_port        *uart_port;
};

#define UART_XMIT_SIZE        PAGE_SIZE


/* number of characters left in xmit buffer before we ask for more */
#define WAKEUP_CHARS                256

/**
 * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars
 * @up: uart_port structure describing the port
 * @chars: number of characters sent
 *
 * This function advances the tail of circular xmit buffer by the number of
 * @chars transmitted and handles accounting of transmitted bytes (into
 * @up's icount.tx).
 */
static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars)
{
        struct circ_buf *xmit = &up->state->xmit;

        xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1);
        up->icount.tx += chars;
}

struct module;
struct tty_driver;

struct uart_driver {
        struct module                *owner;
        const char                *driver_name;
        const char                *dev_name;
        int                         major;
        int                         minor;
        int                         nr;
        struct console                *cons;

        /*
         * these are private; the low level driver should not
         * touch these; they should be initialised to NULL
         */
        struct uart_state        *state;
        struct tty_driver        *tty_driver;
};

void uart_write_wakeup(struct uart_port *port);

/**
 * enum UART_TX_FLAGS -- flags for uart_port_tx_flags()
 *
 * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer
 */
enum UART_TX_FLAGS {
        UART_TX_NOSTOP = BIT(0),
};

#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done,              \
                       for_test, for_post)                                      \
({                                                                              \
        struct uart_port *__port = (uport);                                      \
        struct circ_buf *xmit = &__port->state->xmit;                              \
        unsigned int pending;                                                      \
                                                                              \
        for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) {   \
                if (__port->x_char) {                                              \
                        (ch) = __port->x_char;                                      \
                        (put_char);                                              \
                        __port->x_char = 0;                                      \
                        continue;                                              \
                }                                                              \
                                                                              \
                if (uart_circ_empty(xmit) || uart_tx_stopped(__port))              \
                        break;                                                      \
                                                                              \
                (ch) = xmit->buf[xmit->tail];                                      \
                (put_char);                                                      \
                xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE;                      \
        }                                                                      \
                                                                              \
        (tx_done);                                                              \
                                                                              \
        pending = uart_circ_chars_pending(xmit);                              \
        if (pending < WAKEUP_CHARS) {                                              \
                uart_write_wakeup(__port);                                      \
                                                                              \
                if (!((flags) & UART_TX_NOSTOP) && pending == 0 &&              \
                    __port->ops->tx_empty(__port))                              \
                        __port->ops->stop_tx(__port);                              \
        }                                                                      \
                                                                              \
        pending;                                                              \
})

/**
 * uart_port_tx_limited -- transmit helper for uart_port with count limiting
 * @port: uart port
 * @ch: variable to store a character to be written to the HW
 * @count: a limit of characters to send
 * @tx_ready: can HW accept more data function
 * @put_char: function to write a character
 * @tx_done: function to call after the loop is done
 *
 * This helper transmits characters from the xmit buffer to the hardware using
 * @put_char(). It does so until @count characters are sent and while @tx_ready
 * evaluates to true.
 *
 * Returns: the number of characters in the xmit buffer when done.
 *
 * The expression in macro parameters shall be designed as follows:
 *  * **tx_ready:** should evaluate to true if the HW can accept more data to
 *    be sent. This parameter can be %true, which means the HW is always ready.
 *  * **put_char:** shall write @ch to the device of @port.
 *  * **tx_done:** when the write loop is done, this can perform arbitrary
 *    action before potential invocation of ops->stop_tx() happens. If the
 *    driver does not need to do anything, use e.g. ({}).
 *
 * For all of them, @port->lock is held, interrupts are locally disabled and
 * the expressions must not sleep.
 */
#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
        unsigned int __count = (count);                                              \
        __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count,     \
                        __count--);                                              \
})

/**
 * uart_port_tx -- transmit helper for uart_port
 * @port: uart port
 * @ch: variable to store a character to be written to the HW
 * @tx_ready: can HW accept more data function
 * @put_char: function to write a character
 *
 * See uart_port_tx_limited() for more details.
 */
#define uart_port_tx(port, ch, tx_ready, put_char)                        \
        __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({}))


/**
 * uart_port_tx_flags -- transmit helper for uart_port with flags
 * @port: uart port
 * @ch: variable to store a character to be written to the HW
 * @flags: %UART_TX_NOSTOP or similar
 * @tx_ready: can HW accept more data function
 * @put_char: function to write a character
 *
 * See uart_port_tx_limited() for more details.
 */
#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char)                \
        __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({}))
/*
 * Baud rate helpers.
 */
void uart_update_timeout(struct uart_port *port, unsigned int cflag,
                         unsigned int baud);
unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios,
                                const struct ktermios *old, unsigned int min,
                                unsigned int max);
unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud);

/*
 * Calculates FIFO drain time.
 */
static inline unsigned long uart_fifo_timeout(struct uart_port *port)
{
        u64 fifo_timeout = (u64)READ_ONCE(port->frame_time) * port->fifosize;

        /* Add .02 seconds of slop */
        fifo_timeout += 20 * NSEC_PER_MSEC;

        return max(nsecs_to_jiffies(fifo_timeout), 1UL);
}

/* Base timer interval for polling */
static inline unsigned long uart_poll_timeout(struct uart_port *port)
{
        unsigned long timeout = uart_fifo_timeout(port);

        return timeout > 6 ? (timeout / 2 - 2) : 1;
}

/*
 * Console helpers.
 */
struct earlycon_device {
        struct console *con;
        struct uart_port port;
        char options[32];                /* e.g., 115200n8 */
        unsigned int baud;
};

struct earlycon_id {
        char        name[15];
        char        name_term;        /* In case compiler didn't '\0' term name */
        char        compatible[128];
        int        (*setup)(struct earlycon_device *, const char *options);
};

extern const struct earlycon_id __earlycon_table[];
extern const struct earlycon_id __earlycon_table_end[];

#if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE)
#define EARLYCON_USED_OR_UNUSED        __used
#else
#define EARLYCON_USED_OR_UNUSED        __maybe_unused
#endif

#define OF_EARLYCON_DECLARE(_name, compat, fn)                                \
        static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \
                EARLYCON_USED_OR_UNUSED  __section("__earlycon_table")  \
                __aligned(__alignof__(struct earlycon_id))                \
                = { .name = __stringify(_name),                                \
                    .compatible = compat,                                \
                    .setup = fn }

#define EARLYCON_DECLARE(_name, fn)        OF_EARLYCON_DECLARE(_name, "", fn)

int of_setup_earlycon(const struct earlycon_id *match, unsigned long node,
                      const char *options);

#ifdef CONFIG_SERIAL_EARLYCON
extern bool earlycon_acpi_spcr_enable __initdata;
int setup_earlycon(char *buf);
#else
static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED;
static inline int setup_earlycon(char *buf) { return 0; }
#endif

/* Variant of uart_console_registered() when the console_list_lock is held. */
static inline bool uart_console_registered_locked(struct uart_port *port)
{
        return uart_console(port) && console_is_registered_locked(port->cons);
}

static inline bool uart_console_registered(struct uart_port *port)
{
        return uart_console(port) && console_is_registered(port->cons);
}

struct uart_port *uart_get_console(struct uart_port *ports, int nr,
                                   struct console *c);
int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
                        char **options);
void uart_parse_options(const char *options, int *baud, int *parity, int *bits,
                        int *flow);
int uart_set_options(struct uart_port *port, struct console *co, int baud,
                     int parity, int bits, int flow);
struct tty_driver *uart_console_device(struct console *co, int *index);
void uart_console_write(struct uart_port *port, const char *s,
                        unsigned int count,
                        void (*putchar)(struct uart_port *, unsigned char));

/*
 * Port/driver registration/removal
 */
int uart_register_driver(struct uart_driver *uart);
void uart_unregister_driver(struct uart_driver *uart);
int uart_add_one_port(struct uart_driver *reg, struct uart_port *port);
void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port);
int uart_read_port_properties(struct uart_port *port);
int uart_read_and_validate_port_properties(struct uart_port *port);
bool uart_match_port(const struct uart_port *port1,
                const struct uart_port *port2);

/*
 * Power Management
 */
int uart_suspend_port(struct uart_driver *reg, struct uart_port *port);
int uart_resume_port(struct uart_driver *reg, struct uart_port *port);

#define uart_circ_empty(circ)                ((circ)->head == (circ)->tail)
#define uart_circ_clear(circ)                ((circ)->head = (circ)->tail = 0)

#define uart_circ_chars_pending(circ)        \
        (CIRC_CNT((circ)->head, (circ)->tail, UART_XMIT_SIZE))

#define uart_circ_chars_free(circ)        \
        (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE))

static inline int uart_tx_stopped(struct uart_port *port)
{
        struct tty_struct *tty = port->state->port.tty;
        if ((tty && tty->flow.stopped) || port->hw_stopped)
                return 1;
        return 0;
}

static inline bool uart_cts_enabled(struct uart_port *uport)
{
        return !!(uport->status & UPSTAT_CTS_ENABLE);
}

static inline bool uart_softcts_mode(struct uart_port *uport)
{
        upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS;

        return ((uport->status & mask) == UPSTAT_CTS_ENABLE);
}

/*
 * The following are helper functions for the low level drivers.
 */

void uart_handle_dcd_change(struct uart_port *uport, bool active);
void uart_handle_cts_change(struct uart_port *uport, bool active);

void uart_insert_char(struct uart_port *port, unsigned int status,
                      unsigned int overrun, u8 ch, u8 flag);

void uart_xchar_out(struct uart_port *uport, int offset);

#ifdef CONFIG_MAGIC_SYSRQ_SERIAL
#define SYSRQ_TIMEOUT        (HZ * 5)

bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch);

static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch)
{
        if (!port->sysrq)
                return 0;

        if (ch && time_before(jiffies, port->sysrq)) {
                if (sysrq_mask()) {
                        handle_sysrq(ch);
                        port->sysrq = 0;
                        return 1;
                }
                if (uart_try_toggle_sysrq(port, ch))
                        return 1;
        }
        port->sysrq = 0;

        return 0;
}

static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch)
{
        if (!port->sysrq)
                return 0;

        if (ch && time_before(jiffies, port->sysrq)) {
                if (sysrq_mask()) {
                        port->sysrq_ch = ch;
                        port->sysrq = 0;
                        return 1;
                }
                if (uart_try_toggle_sysrq(port, ch))
                        return 1;
        }
        port->sysrq = 0;

        return 0;
}

static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
{
        u8 sysrq_ch;

        if (!port->has_sysrq) {
                uart_port_unlock(port);
                return;
        }

        sysrq_ch = port->sysrq_ch;
        port->sysrq_ch = 0;

        uart_port_unlock(port);

        if (sysrq_ch)
                handle_sysrq(sysrq_ch);
}

static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
                unsigned long flags)
{
        u8 sysrq_ch;

        if (!port->has_sysrq) {
                uart_port_unlock_irqrestore(port, flags);
                return;
        }

        sysrq_ch = port->sysrq_ch;
        port->sysrq_ch = 0;

        uart_port_unlock_irqrestore(port, flags);

        if (sysrq_ch)
                handle_sysrq(sysrq_ch);
}
#else        /* CONFIG_MAGIC_SYSRQ_SERIAL */
static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch)
{
        return 0;
}
static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch)
{
        return 0;
}
static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
{
        uart_port_unlock(port);
}
static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
                unsigned long flags)
{
        uart_port_unlock_irqrestore(port, flags);
}
#endif        /* CONFIG_MAGIC_SYSRQ_SERIAL */

/*
 * We do the SysRQ and SAK checking like this...
 */
static inline int uart_handle_break(struct uart_port *port)
{
        struct uart_state *state = port->state;

        if (port->handle_break)
                port->handle_break(port);

#ifdef CONFIG_MAGIC_SYSRQ_SERIAL
        if (port->has_sysrq && uart_console(port)) {
                if (!port->sysrq) {
                        port->sysrq = jiffies + SYSRQ_TIMEOUT;
                        return 1;
                }
                port->sysrq = 0;
        }
#endif
        if (port->flags & UPF_SAK)
                do_SAK(state->port.tty);
        return 0;
}

/*
 *        UART_ENABLE_MS - determine if port should enable modem status irqs
 */
#define UART_ENABLE_MS(port,cflag)        ((port)->flags & UPF_HARDPPS_CD || \
                                         (cflag) & CRTSCTS || \
                                         !((cflag) & CLOCAL))

int uart_get_rs485_mode(struct uart_port *port);
#endif /* LINUX_SERIAL_CORE_H */


















































































































































































































































































































































































































































   78 





































































































































































































































































































































































































    9 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CGROUP_H
#define _LINUX_CGROUP_H
/*
 *  cgroup interface
 *
 *  Copyright (C) 2003 BULL SA
 *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
 *
 */

#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/rculist.h>
#include <linux/cgroupstats.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/kernfs.h>
#include <linux/jump_label.h>
#include <linux/types.h>
#include <linux/ns_common.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/kernel_stat.h>

#include <linux/cgroup-defs.h>

struct kernel_clone_args;

#ifdef CONFIG_CGROUPS

/*
 * All weight knobs on the default hierarchy should use the following min,
 * default and max values.  The default value is the logarithmic center of
 * MIN and MAX and allows 100x to be expressed in both directions.
 */
#define CGROUP_WEIGHT_MIN                1
#define CGROUP_WEIGHT_DFL                100
#define CGROUP_WEIGHT_MAX                10000

enum {
        CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
        CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
        CSS_TASK_ITER_SKIPPED  = (1U << 16), /* internal flags */
};

/* a css_task_iter should be treated as an opaque object */
struct css_task_iter {
        struct cgroup_subsys                *ss;
        unsigned int                        flags;

        struct list_head                *cset_pos;
        struct list_head                *cset_head;

        struct list_head                *tcset_pos;
        struct list_head                *tcset_head;

        struct list_head                *task_pos;

        struct list_head                *cur_tasks_head;
        struct css_set                        *cur_cset;
        struct css_set                        *cur_dcset;
        struct task_struct                *cur_task;
        struct list_head                iters_node;        /* css_set->task_iters */
};

extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
extern spinlock_t css_set_lock;

#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
#undef SUBSYS

#define SUBSYS(_x)                                                                \
        extern struct static_key_true _x ## _cgrp_subsys_enabled_key;                \
        extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key;
#include <linux/cgroup_subsys.h>
#undef SUBSYS

/**
 * cgroup_subsys_enabled - fast test on whether a subsys is enabled
 * @ss: subsystem in question
 */
#define cgroup_subsys_enabled(ss)                                                \
        static_branch_likely(&ss ## _enabled_key)

/**
 * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy
 * @ss: subsystem in question
 */
#define cgroup_subsys_on_dfl(ss)                                                \
        static_branch_likely(&ss ## _on_dfl_key)

bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
                                         struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
                                             struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
                                                       struct cgroup_subsys *ss);

struct cgroup *cgroup_get_from_path(const char *path);
struct cgroup *cgroup_get_from_fd(int fd);
struct cgroup *cgroup_v1v2_get_from_fd(int fd);

int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);

int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
void cgroup_file_show(struct cgroup_file *cfile, bool show);

int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk);

void cgroup_fork(struct task_struct *p);
extern int cgroup_can_fork(struct task_struct *p,
                           struct kernel_clone_args *kargs);
extern void cgroup_cancel_fork(struct task_struct *p,
                               struct kernel_clone_args *kargs);
extern void cgroup_post_fork(struct task_struct *p,
                             struct kernel_clone_args *kargs);
void cgroup_exit(struct task_struct *p);
void cgroup_release(struct task_struct *p);
void cgroup_free(struct task_struct *p);

int cgroup_init_early(void);
int cgroup_init(void);

int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v);

/*
 * Iteration helpers and macros.
 */

struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
                                           struct cgroup_subsys_state *parent);
struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos,
                                                    struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos);
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
                                                     struct cgroup_subsys_state *css);

struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
                                         struct cgroup_subsys_state **dst_cssp);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
                                        struct cgroup_subsys_state **dst_cssp);

void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
                         struct css_task_iter *it);
struct task_struct *css_task_iter_next(struct css_task_iter *it);
void css_task_iter_end(struct css_task_iter *it);

/**
 * css_for_each_child - iterate through children of a css
 * @pos: the css * to use as the loop cursor
 * @parent: css whose children to walk
 *
 * Walk @parent's children.  Must be called under rcu_read_lock().
 *
 * If a subsystem synchronizes ->css_online() and the start of iteration, a
 * css which finished ->css_online() is guaranteed to be visible in the
 * future iterations and will stay visible until the last reference is put.
 * A css which hasn't finished ->css_online() or already finished
 * ->css_offline() may show up during traversal.  It's each subsystem's
 * responsibility to synchronize against on/offlining.
 *
 * It is allowed to temporarily drop RCU read lock during iteration.  The
 * caller is responsible for ensuring that @pos remains accessible until
 * the start of the next iteration by, for example, bumping the css refcnt.
 */
#define css_for_each_child(pos, parent)                                        \
        for ((pos) = css_next_child(NULL, (parent)); (pos);                \
             (pos) = css_next_child((pos), (parent)))

/**
 * css_for_each_descendant_pre - pre-order walk of a css's descendants
 * @pos: the css * to use as the loop cursor
 * @root: css whose descendants to walk
 *
 * Walk @root's descendants.  @root is included in the iteration and the
 * first node to be visited.  Must be called under rcu_read_lock().
 *
 * If a subsystem synchronizes ->css_online() and the start of iteration, a
 * css which finished ->css_online() is guaranteed to be visible in the
 * future iterations and will stay visible until the last reference is put.
 * A css which hasn't finished ->css_online() or already finished
 * ->css_offline() may show up during traversal.  It's each subsystem's
 * responsibility to synchronize against on/offlining.
 *
 * For example, the following guarantees that a descendant can't escape
 * state updates of its ancestors.
 *
 * my_online(@css)
 * {
 *        Lock @css's parent and @css;
 *        Inherit state from the parent;
 *        Unlock both.
 * }
 *
 * my_update_state(@css)
 * {
 *        css_for_each_descendant_pre(@pos, @css) {
 *                Lock @pos;
 *                if (@pos == @css)
 *                        Update @css's state;
 *                else
 *                        Verify @pos is alive and inherit state from its parent;
 *                Unlock @pos;
 *        }
 * }
 *
 * As long as the inheriting step, including checking the parent state, is
 * enclosed inside @pos locking, double-locking the parent isn't necessary
 * while inheriting.  The state update to the parent is guaranteed to be
 * visible by walking order and, as long as inheriting operations to the
 * same @pos are atomic to each other, multiple updates racing each other
 * still result in the correct state.  It's guaranateed that at least one
 * inheritance happens for any css after the latest update to its parent.
 *
 * If checking parent's state requires locking the parent, each inheriting
 * iteration should lock and unlock both @pos->parent and @pos.
 *
 * Alternatively, a subsystem may choose to use a single global lock to
 * synchronize ->css_online() and ->css_offline() against tree-walking
 * operations.
 *
 * It is allowed to temporarily drop RCU read lock during iteration.  The
 * caller is responsible for ensuring that @pos remains accessible until
 * the start of the next iteration by, for example, bumping the css refcnt.
 */
#define css_for_each_descendant_pre(pos, css)                                \
        for ((pos) = css_next_descendant_pre(NULL, (css)); (pos);        \
             (pos) = css_next_descendant_pre((pos), (css)))

/**
 * css_for_each_descendant_post - post-order walk of a css's descendants
 * @pos: the css * to use as the loop cursor
 * @css: css whose descendants to walk
 *
 * Similar to css_for_each_descendant_pre() but performs post-order
 * traversal instead.  @root is included in the iteration and the last
 * node to be visited.
 *
 * If a subsystem synchronizes ->css_online() and the start of iteration, a
 * css which finished ->css_online() is guaranteed to be visible in the
 * future iterations and will stay visible until the last reference is put.
 * A css which hasn't finished ->css_online() or already finished
 * ->css_offline() may show up during traversal.  It's each subsystem's
 * responsibility to synchronize against on/offlining.
 *
 * Note that the walk visibility guarantee example described in pre-order
 * walk doesn't apply the same to post-order walks.
 */
#define css_for_each_descendant_post(pos, css)                                \
        for ((pos) = css_next_descendant_post(NULL, (css)); (pos);        \
             (pos) = css_next_descendant_post((pos), (css)))

/**
 * cgroup_taskset_for_each - iterate cgroup_taskset
 * @task: the loop cursor
 * @dst_css: the destination css
 * @tset: taskset to iterate
 *
 * @tset may contain multiple tasks and they may belong to multiple
 * processes.
 *
 * On the v2 hierarchy, there may be tasks from multiple processes and they
 * may not share the source or destination csses.
 *
 * On traditional hierarchies, when there are multiple tasks in @tset, if a
 * task of a process is in @tset, all tasks of the process are in @tset.
 * Also, all are guaranteed to share the same source and destination csses.
 *
 * Iteration is not in any specific order.
 */
#define cgroup_taskset_for_each(task, dst_css, tset)                        \
        for ((task) = cgroup_taskset_first((tset), &(dst_css));                \
             (task);                                                        \
             (task) = cgroup_taskset_next((tset), &(dst_css)))

/**
 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
 * @leader: the loop cursor
 * @dst_css: the destination css
 * @tset: taskset to iterate
 *
 * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
 * may not contain any.
 */
#define cgroup_taskset_for_each_leader(leader, dst_css, tset)                \
        for ((leader) = cgroup_taskset_first((tset), &(dst_css));        \
             (leader);                                                        \
             (leader) = cgroup_taskset_next((tset), &(dst_css)))        \
                if ((leader) != (leader)->group_leader)                        \
                        ;                                                \
                else

/*
 * Inline functions.
 */

#ifdef CONFIG_DEBUG_CGROUP_REF
void css_get(struct cgroup_subsys_state *css);
void css_get_many(struct cgroup_subsys_state *css, unsigned int n);
bool css_tryget(struct cgroup_subsys_state *css);
bool css_tryget_online(struct cgroup_subsys_state *css);
void css_put(struct cgroup_subsys_state *css);
void css_put_many(struct cgroup_subsys_state *css, unsigned int n);
#else
#define CGROUP_REF_FN_ATTRS        static inline
#define CGROUP_REF_EXPORT(fn)
#include <linux/cgroup_refcnt.h>
#endif

static inline u64 cgroup_id(const struct cgroup *cgrp)
{
        return cgrp->kn->id;
}

/**
 * css_is_dying - test whether the specified css is dying
 * @css: target css
 *
 * Test whether @css is in the process of offlining or already offline.  In
 * most cases, ->css_online() and ->css_offline() callbacks should be
 * enough; however, the actual offline operations are RCU delayed and this
 * test returns %true also when @css is scheduled to be offlined.
 *
 * This is useful, for example, when the use case requires synchronous
 * behavior with respect to cgroup removal.  cgroup removal schedules css
 * offlining but the css can seem alive while the operation is being
 * delayed.  If the delay affects user visible semantics, this test can be
 * used to resolve the situation.
 */
static inline bool css_is_dying(struct cgroup_subsys_state *css)
{
        return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
}

static inline void cgroup_get(struct cgroup *cgrp)
{
        css_get(&cgrp->self);
}

static inline bool cgroup_tryget(struct cgroup *cgrp)
{
        return css_tryget(&cgrp->self);
}

static inline void cgroup_put(struct cgroup *cgrp)
{
        css_put(&cgrp->self);
}

extern struct mutex cgroup_mutex;

static inline void cgroup_lock(void)
{
        mutex_lock(&cgroup_mutex);
}

static inline void cgroup_unlock(void)
{
        mutex_unlock(&cgroup_mutex);
}

/**
 * task_css_set_check - obtain a task's css_set with extra access conditions
 * @task: the task to obtain css_set for
 * @__c: extra condition expression to be passed to rcu_dereference_check()
 *
 * A task's css_set is RCU protected, initialized and exited while holding
 * task_lock(), and can only be modified while holding both cgroup_mutex
 * and task_lock() while the task is alive.  This macro verifies that the
 * caller is inside proper critical section and returns @task's css_set.
 *
 * The caller can also specify additional allowed conditions via @__c, such
 * as locks used during the cgroup_subsys::attach() methods.
 */
#ifdef CONFIG_PROVE_RCU
#define task_css_set_check(task, __c)                                        \
        rcu_dereference_check((task)->cgroups,                                \
                rcu_read_lock_sched_held() ||                                \
                lockdep_is_held(&cgroup_mutex) ||                        \
                lockdep_is_held(&css_set_lock) ||                        \
                ((task)->flags & PF_EXITING) || (__c))
#else
#define task_css_set_check(task, __c)                                        \
        rcu_dereference((task)->cgroups)
#endif

/**
 * task_css_check - obtain css for (task, subsys) w/ extra access conds
 * @task: the target task
 * @subsys_id: the target subsystem ID
 * @__c: extra condition expression to be passed to rcu_dereference_check()
 *
 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair.  The
 * synchronization rules are the same as task_css_set_check().
 */
#define task_css_check(task, subsys_id, __c)                                \
        task_css_set_check((task), (__c))->subsys[(subsys_id)]

/**
 * task_css_set - obtain a task's css_set
 * @task: the task to obtain css_set for
 *
 * See task_css_set_check().
 */
static inline struct css_set *task_css_set(struct task_struct *task)
{
        return task_css_set_check(task, false);
}

/**
 * task_css - obtain css for (task, subsys)
 * @task: the target task
 * @subsys_id: the target subsystem ID
 *
 * See task_css_check().
 */
static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
                                                   int subsys_id)
{
        return task_css_check(task, subsys_id, false);
}

/**
 * task_get_css - find and get the css for (task, subsys)
 * @task: the target task
 * @subsys_id: the target subsystem ID
 *
 * Find the css for the (@task, @subsys_id) combination, increment a
 * reference on and return it.  This function is guaranteed to return a
 * valid css.  The returned css may already have been offlined.
 */
static inline struct cgroup_subsys_state *
task_get_css(struct task_struct *task, int subsys_id)
{
        struct cgroup_subsys_state *css;

        rcu_read_lock();
        while (true) {
                css = task_css(task, subsys_id);
                /*
                 * Can't use css_tryget_online() here.  A task which has
                 * PF_EXITING set may stay associated with an offline css.
                 * If such task calls this function, css_tryget_online()
                 * will keep failing.
                 */
                if (likely(css_tryget(css)))
                        break;
                cpu_relax();
        }
        rcu_read_unlock();
        return css;
}

/**
 * task_css_is_root - test whether a task belongs to the root css
 * @task: the target task
 * @subsys_id: the target subsystem ID
 *
 * Test whether @task belongs to the root css on the specified subsystem.
 * May be invoked in any context.
 */
static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
{
        return task_css_check(task, subsys_id, true) ==
                init_css_set.subsys[subsys_id];
}

static inline struct cgroup *task_cgroup(struct task_struct *task,
                                         int subsys_id)
{
        return task_css(task, subsys_id)->cgroup;
}

static inline struct cgroup *task_dfl_cgroup(struct task_struct *task)
{
        return task_css_set(task)->dfl_cgrp;
}

static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
{
        struct cgroup_subsys_state *parent_css = cgrp->self.parent;

        if (parent_css)
                return container_of(parent_css, struct cgroup, self);
        return NULL;
}

/**
 * cgroup_is_descendant - test ancestry
 * @cgrp: the cgroup to be tested
 * @ancestor: possible ancestor of @cgrp
 *
 * Test whether @cgrp is a descendant of @ancestor.  It also returns %true
 * if @cgrp == @ancestor.  This function is safe to call as long as @cgrp
 * and @ancestor are accessible.
 */
static inline bool cgroup_is_descendant(struct cgroup *cgrp,
                                        struct cgroup *ancestor)
{
        if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
                return false;
        return cgrp->ancestors[ancestor->level] == ancestor;
}

/**
 * cgroup_ancestor - find ancestor of cgroup
 * @cgrp: cgroup to find ancestor of
 * @ancestor_level: level of ancestor to find starting from root
 *
 * Find ancestor of cgroup at specified level starting from root if it exists
 * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
 * @ancestor_level.
 *
 * This function is safe to call as long as @cgrp is accessible.
 */
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
                                             int ancestor_level)
{
        if (ancestor_level < 0 || ancestor_level > cgrp->level)
                return NULL;
        return cgrp->ancestors[ancestor_level];
}

/**
 * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
 * @task: the task to be tested
 * @ancestor: possible ancestor of @task's cgroup
 *
 * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
 * It follows all the same rules as cgroup_is_descendant, and only applies
 * to the default hierarchy.
 */
static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
                                               struct cgroup *ancestor)
{
        struct css_set *cset = task_css_set(task);

        return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
}

/* no synchronization, the result can only be used as a hint */
static inline bool cgroup_is_populated(struct cgroup *cgrp)
{
        return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children +
                cgrp->nr_populated_threaded_children;
}

/* returns ino associated with a cgroup */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
        return kernfs_ino(cgrp->kn);
}

/* cft/css accessors for cftype->write() operation */
static inline struct cftype *of_cft(struct kernfs_open_file *of)
{
        return of->kn->priv;
}

struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);

/* cft/css accessors for cftype->seq_*() operations */
static inline struct cftype *seq_cft(struct seq_file *seq)
{
        return of_cft(seq->private);
}

static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
{
        return of_css(seq->private);
}

/*
 * Name / path handling functions.  All are thin wrappers around the kernfs
 * counterparts and can be called under any context.
 */

static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
{
        return kernfs_name(cgrp->kn, buf, buflen);
}

static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
{
        return kernfs_path(cgrp->kn, buf, buflen);
}

static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
{
        pr_cont_kernfs_name(cgrp->kn);
}

static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
{
        pr_cont_kernfs_path(cgrp->kn);
}

bool cgroup_psi_enabled(void);

static inline void cgroup_init_kthreadd(void)
{
        /*
         * kthreadd is inherited by all kthreads, keep it in the root so
         * that the new kthreads are guaranteed to stay in the root until
         * initialization is finished.
         */
        current->no_cgroup_migration = 1;
}

static inline void cgroup_kthread_ready(void)
{
        /*
         * This kthread finished initialization.  The creator should have
         * set PF_NO_SETAFFINITY if this kthread should stay in the root.
         */
        current->no_cgroup_migration = 0;
}

void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
struct cgroup *cgroup_get_from_id(u64 id);
#else /* !CONFIG_CGROUPS */

struct cgroup_subsys_state;
struct cgroup;

static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
static inline void cgroup_lock(void) {}
static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
                                         struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
                                    struct dentry *dentry) { return -EINVAL; }

static inline void cgroup_fork(struct task_struct *p) {}
static inline int cgroup_can_fork(struct task_struct *p,
                                  struct kernel_clone_args *kargs) { return 0; }
static inline void cgroup_cancel_fork(struct task_struct *p,
                                      struct kernel_clone_args *kargs) {}
static inline void cgroup_post_fork(struct task_struct *p,
                                    struct kernel_clone_args *kargs) {}
static inline void cgroup_exit(struct task_struct *p) {}
static inline void cgroup_release(struct task_struct *p) {}
static inline void cgroup_free(struct task_struct *p) {}

static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
static inline void cgroup_init_kthreadd(void) {}
static inline void cgroup_kthread_ready(void) {}

static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
{
        return NULL;
}

static inline bool cgroup_psi_enabled(void)
{
        return false;
}

static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
                                               struct cgroup *ancestor)
{
        return true;
}

static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
{}
#endif /* !CONFIG_CGROUPS */

#ifdef CONFIG_CGROUPS
/*
 * cgroup scalable recursive statistics.
 */
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
void cgroup_rstat_flush(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);

/*
 * Basic resource stats.
 */
#ifdef CONFIG_CGROUP_CPUACCT
void cpuacct_charge(struct task_struct *tsk, u64 cputime);
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
#else
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
static inline void cpuacct_account_field(struct task_struct *tsk, int index,
                                         u64 val) {}
#endif

void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
void __cgroup_account_cputime_field(struct cgroup *cgrp,
                                    enum cpu_usage_stat index, u64 delta_exec);

static inline void cgroup_account_cputime(struct task_struct *task,
                                          u64 delta_exec)
{
        struct cgroup *cgrp;

        cpuacct_charge(task, delta_exec);

        cgrp = task_dfl_cgroup(task);
        if (cgroup_parent(cgrp))
                __cgroup_account_cputime(cgrp, delta_exec);
}

static inline void cgroup_account_cputime_field(struct task_struct *task,
                                                enum cpu_usage_stat index,
                                                u64 delta_exec)
{
        struct cgroup *cgrp;

        cpuacct_account_field(task, index, delta_exec);

        cgrp = task_dfl_cgroup(task);
        if (cgroup_parent(cgrp))
                __cgroup_account_cputime_field(cgrp, index, delta_exec);
}

#else        /* CONFIG_CGROUPS */

static inline void cgroup_account_cputime(struct task_struct *task,
                                          u64 delta_exec) {}
static inline void cgroup_account_cputime_field(struct task_struct *task,
                                                enum cpu_usage_stat index,
                                                u64 delta_exec) {}

#endif        /* CONFIG_CGROUPS */

/*
 * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
 * definition in cgroup-defs.h.
 */
#ifdef CONFIG_SOCK_CGROUP_DATA

void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd);

static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
        return skcd->cgroup;
}

#else        /* CONFIG_CGROUP_DATA */

static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {}
static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {}
static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}

#endif        /* CONFIG_CGROUP_DATA */

struct cgroup_namespace {
        struct ns_common        ns;
        struct user_namespace        *user_ns;
        struct ucounts                *ucounts;
        struct css_set          *root_cset;
};

extern struct cgroup_namespace init_cgroup_ns;

#ifdef CONFIG_CGROUPS

void free_cgroup_ns(struct cgroup_namespace *ns);

struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
                                        struct user_namespace *user_ns,
                                        struct cgroup_namespace *old_ns);

int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
                   struct cgroup_namespace *ns);

#else /* !CONFIG_CGROUPS */

static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
static inline struct cgroup_namespace *
copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
               struct cgroup_namespace *old_ns)
{
        return old_ns;
}

#endif /* !CONFIG_CGROUPS */

static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{
        if (ns)
                refcount_inc(&ns->ns.count);
}

static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{
        if (ns && refcount_dec_and_test(&ns->ns.count))
                free_cgroup_ns(ns);
}

#ifdef CONFIG_CGROUPS

void cgroup_enter_frozen(void);
void cgroup_leave_frozen(bool always_leave);
void cgroup_update_frozen(struct cgroup *cgrp);
void cgroup_freeze(struct cgroup *cgrp, bool freeze);
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
                                 struct cgroup *dst);

static inline bool cgroup_task_frozen(struct task_struct *task)
{
        return task->frozen;
}

#else /* !CONFIG_CGROUPS */

static inline void cgroup_enter_frozen(void) { }
static inline void cgroup_leave_frozen(bool always_leave) { }
static inline bool cgroup_task_frozen(struct task_struct *task)
{
        return false;
}

#endif /* !CONFIG_CGROUPS */

#ifdef CONFIG_CGROUP_BPF
static inline void cgroup_bpf_get(struct cgroup *cgrp)
{
        percpu_ref_get(&cgrp->bpf.refcnt);
}

static inline void cgroup_bpf_put(struct cgroup *cgrp)
{
        percpu_ref_put(&cgrp->bpf.refcnt);
}

#else /* CONFIG_CGROUP_BPF */

static inline void cgroup_bpf_get(struct cgroup *cgrp) {}
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}

#endif /* CONFIG_CGROUP_BPF */

struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);

#endif /* _LINUX_CGROUP_H */



















































































































































































































































































































    4 











    4 






























































































































































































































































































    4 























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Definitions for the IP module.
 *
 * Version:        @(#)ip.h        1.0.2        05/07/93
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Alan Cox, <gw4pts@gw4pts.ampr.org>
 *
 * Changes:
 *                Mike McLagan    :       Routing by source
 */
#ifndef _IP_H
#define _IP_H

#include <linux/types.h>
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
#include <linux/sockptr.h>
#include <linux/static_key.h>

#include <net/inet_sock.h>
#include <net/route.h>
#include <net/snmp.h>
#include <net/flow.h>
#include <net/flow_dissector.h>
#include <net/netns/hash.h>
#include <net/lwtunnel.h>

#define IPV4_MAX_PMTU                65535U                /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU                68                        /* RFC 791 */

extern unsigned int sysctl_fib_sync_mem;
extern unsigned int sysctl_fib_sync_mem_min;
extern unsigned int sysctl_fib_sync_mem_max;

struct sock;

struct inet_skb_parm {
        int                        iif;
        struct ip_options        opt;                /* Compiled IP options                */
        u16                        flags;

#define IPSKB_FORWARDED                BIT(0)
#define IPSKB_XFRM_TUNNEL_SIZE        BIT(1)
#define IPSKB_XFRM_TRANSFORMED        BIT(2)
#define IPSKB_FRAG_COMPLETE        BIT(3)
#define IPSKB_REROUTED                BIT(4)
#define IPSKB_DOREDIRECT        BIT(5)
#define IPSKB_FRAG_PMTU                BIT(6)
#define IPSKB_L3SLAVE                BIT(7)
#define IPSKB_NOPOLICY                BIT(8)
#define IPSKB_MULTIPATH                BIT(9)

        u16                        frag_max_size;
};

static inline bool ipv4_l3mdev_skb(u16 flags)
{
        return !!(flags & IPSKB_L3SLAVE);
}

static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
{
        return ip_hdr(skb)->ihl * 4;
}

struct ipcm_cookie {
        struct sockcm_cookie        sockc;
        __be32                        addr;
        int                        oif;
        struct ip_options_rcu        *opt;
        __u8                        protocol;
        __u8                        ttl;
        __s16                        tos;
        char                        priority;
        __u16                        gso_size;
};

static inline void ipcm_init(struct ipcm_cookie *ipcm)
{
        *ipcm = (struct ipcm_cookie) { .tos = -1 };
}

static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
                                const struct inet_sock *inet)
{
        ipcm_init(ipcm);

        ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
        ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
        ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
        ipcm->addr = inet->inet_saddr;
        ipcm->protocol = inet->inet_num;
}

#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))

/* return enslaved device index if relevant */
static inline int inet_sdif(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
        if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
                return IPCB(skb)->iif;
#endif
        return 0;
}

/* Special input handler for packets caught by router alert option.
   They are selected only by protocol field, and then processed likely
   local ones; but only if someone wants them! Otherwise, router
   not running rsvpd will kill RSVP.

   It is user level problem, what it will make with them.
   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
   but receiver should be enough clever f.e. to forward mtrace requests,
   sent to multicast group to reach destination designated router.
 */

struct ip_ra_chain {
        struct ip_ra_chain __rcu *next;
        struct sock                *sk;
        union {
                void                        (*destructor)(struct sock *);
                struct sock                *saved_sk;
        };
        struct rcu_head                rcu;
};

/* IP flags. */
#define IP_CE                0x8000                /* Flag: "Congestion"                */
#define IP_DF                0x4000                /* Flag: "Don't Fragment"        */
#define IP_MF                0x2000                /* Flag: "More Fragments"        */
#define IP_OFFSET        0x1FFF                /* "Fragment Offset" part        */

#define IP_FRAG_TIME        (30 * HZ)                /* fragment lifetime        */

struct msghdr;
struct net_device;
struct packet_type;
struct rtable;
struct sockaddr;

int igmp_mc_init(void);

/*
 *        Functions provided by ip.c
 */

int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
                          __be32 saddr, __be32 daddr,
                          struct ip_options_rcu *opt, u8 tos);
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
           struct net_device *orig_dev);
void ip_list_rcv(struct list_head *head, struct packet_type *pt,
                 struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                   int (*output)(struct net *, struct sock *, struct sk_buff *));

struct ip_fraglist_iter {
        struct sk_buff        *frag;
        struct iphdr        *iph;
        int                offset;
        unsigned int        hlen;
};

void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
                      unsigned int hlen, struct ip_fraglist_iter *iter);
void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter);

static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
{
        struct sk_buff *skb = iter->frag;

        iter->frag = skb->next;
        skb_mark_not_on_list(skb);

        return skb;
}

struct ip_frag_state {
        bool                DF;
        unsigned int        hlen;
        unsigned int        ll_rs;
        unsigned int        mtu;
        unsigned int        left;
        int                offset;
        int                ptr;
        __be16                not_last_frag;
};

void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
                  unsigned int mtu, bool DF, struct ip_frag_state *state);
struct sk_buff *ip_frag_next(struct sk_buff *skb,
                             struct ip_frag_state *state);

void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);

int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
                    __u8 tos);
void ip_init(void);
int ip_append_data(struct sock *sk, struct flowi4 *fl4,
                   int getfrag(void *from, char *to, int offset, int len,
                               int odd, struct sk_buff *skb),
                   void *from, int len, int protolen,
                   struct ipcm_cookie *ipc,
                   struct rtable **rt,
                   unsigned int flags);
int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd,
                       struct sk_buff *skb);
struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4,
                              struct sk_buff_head *queue,
                              struct inet_cork *cork);
int ip_send_skb(struct net *net, struct sk_buff *skb);
int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4);
void ip_flush_pending_frames(struct sock *sk);
struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
                            struct inet_cork *cork, unsigned int flags);

int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);

static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
{
        return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
}

/* Get the route scope that should be used when sending a packet. */
static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
                                  const struct ipcm_cookie *ipc,
                                  const struct msghdr *msg)
{
        if (sock_flag(&inet->sk, SOCK_LOCALROUTE) ||
            msg->msg_flags & MSG_DONTROUTE ||
            (ipc->opt && ipc->opt->opt.is_strictroute))
                return RT_SCOPE_LINK;

        return RT_SCOPE_UNIVERSE;
}

static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
        return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos));
}

/* datagram.c */
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);

void ip4_datagram_release_cb(struct sock *sk);

struct ip_reply_arg {
        struct kvec iov[1];
        int            flags;
        __wsum             csum;
        int            csumoffset; /* u16 offset of csum in iov[0].iov_base */
                                /* -1 if not needed */
        int            bound_dev_if;
        u8              tos;
        kuid_t            uid;
};

#define IP_REPLY_ARG_NOSRCCHECK 1

static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
{
        return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
}

void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                           const struct ip_options *sopt,
                           __be32 daddr, __be32 saddr,
                           const struct ip_reply_arg *arg,
                           unsigned int len, u64 transmit_time, u32 txhash);

#define IP_INC_STATS(net, field)        SNMP_INC_STATS64((net)->mib.ip_statistics, field)
#define __IP_INC_STATS(net, field)        __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
#define IP_ADD_STATS(net, field, val)        SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
#define __IP_ADD_STATS(net, field, val) __SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
#define __IP_UPD_PO_STATS(net, field, val) __SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
#define NET_INC_STATS(net, field)        SNMP_INC_STATS((net)->mib.net_statistics, field)
#define __NET_INC_STATS(net, field)        __SNMP_INC_STATS((net)->mib.net_statistics, field)
#define NET_ADD_STATS(net, field, adnd)        SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
#define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)

static inline u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt)
{
        return  *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt);
}

unsigned long snmp_fold_field(void __percpu *mib, int offt);
#if BITS_PER_LONG==32
u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct,
                         size_t syncp_offset);
u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off);
#else
static inline u64  snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct,
                                        size_t syncp_offset)
{
        return snmp_get_cpu_field(mib, cpu, offct);

}

static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off)
{
        return snmp_fold_field(mib, offt);
}
#endif

#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
{ \
        int i, c; \
        for_each_possible_cpu(c) { \
                for (i = 0; stats_list[i].name; i++) \
                        buff64[i] += snmp_get_cpu_field64( \
                                        mib_statistic, \
                                        c, stats_list[i].entry, \
                                        offset); \
        } \
}

#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
{ \
        int i, c; \
        for_each_possible_cpu(c) { \
                for (i = 0; stats_list[i].name; i++) \
                        buff[i] += snmp_get_cpu_field( \
                                                mib_statistic, \
                                                c, stats_list[i].entry); \
        } \
}

static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
{
        u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);

        *low = range & 0xffff;
        *high = range >> 16;
}
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);

#ifdef CONFIG_SYSCTL
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
{
        if (!net->ipv4.sysctl_local_reserved_ports)
                return false;
        return test_bit(port, net->ipv4.sysctl_local_reserved_ports);
}

static inline bool sysctl_dev_name_is_allowed(const char *name)
{
        return strcmp(name, "default") != 0  && strcmp(name, "all") != 0;
}

static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
        return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}

#else
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
{
        return false;
}

static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
        return port < PROT_SOCK;
}
#endif

__be32 inet_current_timestamp(void);

/* From inetpeer.c */
extern int inet_peer_threshold;
extern int inet_peer_minttl;
extern int inet_peer_maxttl;

void ipfrag_init(void);

void ip_static_sysctl_init(void);

#define IP4_REPLY_MARK(net, mark) \
        (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)

static inline bool ip_is_fragment(const struct iphdr *iph)
{
        return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
}

#ifdef CONFIG_INET
#include <net/dst.h>

/* The function in 2.2 was invalid, producing wrong result for
 * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
static inline
int ip_decrease_ttl(struct iphdr *iph)
{
        u32 check = (__force u32)iph->check;
        check += (__force u32)htons(0x0100);
        iph->check = (__force __sum16)(check + (check>=0xFFFF));
        return --iph->ttl;
}

static inline int ip_mtu_locked(const struct dst_entry *dst)
{
        const struct rtable *rt = (const struct rtable *)dst;

        return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
}

static inline
int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
{
        u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);

        return  pmtudisc == IP_PMTUDISC_DO ||
                (pmtudisc == IP_PMTUDISC_WANT &&
                 !ip_mtu_locked(dst));
}

static inline bool ip_sk_accept_pmtu(const struct sock *sk)
{
        u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);

        return pmtudisc != IP_PMTUDISC_INTERFACE &&
               pmtudisc != IP_PMTUDISC_OMIT;
}

static inline bool ip_sk_use_pmtu(const struct sock *sk)
{
        return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE;
}

static inline bool ip_sk_ignore_df(const struct sock *sk)
{
        u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);

        return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT;
}

static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
                                                    bool forwarding)
{
        const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net *net = dev_net(dst->dev);
        unsigned int mtu;

        if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
            ip_mtu_locked(dst) ||
            !forwarding) {
                mtu = rt->rt_pmtu;
                if (mtu && time_before(jiffies, rt->dst.expires))
                        goto out;
        }

        /* 'forwarding = true' case should always honour route mtu */
        mtu = dst_metric_raw(dst, RTAX_MTU);
        if (mtu)
                goto out;

        mtu = READ_ONCE(dst->dev->mtu);

        if (unlikely(ip_mtu_locked(dst))) {
                if (rt->rt_uses_gateway && mtu > 576)
                        mtu = 576;
        }

out:
        mtu = min_t(unsigned int, mtu, IP_MAX_MTU);

        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}

static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
                                          const struct sk_buff *skb)
{
        unsigned int mtu;

        if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
                bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;

                return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
        }

        mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
        return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
}

struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
                                        int fc_mx_len,
                                        struct netlink_ext_ack *extack);
static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics)
{
        if (fib_metrics != &dst_default_metrics &&
            refcount_dec_and_test(&fib_metrics->refcnt))
                kfree(fib_metrics);
}

/* ipv4 and ipv6 both use refcounted metrics if it is not the default */
static inline
void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics)
{
        dst_init_metrics(dst, fib_metrics->metrics, true);

        if (fib_metrics != &dst_default_metrics) {
                dst->_metrics |= DST_METRICS_REFCOUNTED;
                refcount_inc(&fib_metrics->refcnt);
        }
}

static inline
void ip_dst_metrics_put(struct dst_entry *dst)
{
        struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);

        if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
                kfree(p);
}

void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);

static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
                                        struct sock *sk, int segs)
{
        struct iphdr *iph = ip_hdr(skb);

        /* We had many attacks based on IPID, use the private
         * generator as much as we can.
         */
        if (sk && inet_sk(sk)->inet_daddr) {
                int val;

                /* avoid atomic operations for TCP,
                 * as we hold socket lock at this point.
                 */
                if (sk_is_tcp(sk)) {
                        sock_owned_by_me(sk);
                        val = atomic_read(&inet_sk(sk)->inet_id);
                        atomic_set(&inet_sk(sk)->inet_id, val + segs);
                } else {
                        val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
                }
                iph->id = htons(val);
                return;
        }
        if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
                iph->id = 0;
        } else {
                /* Unfortunately we need the big hammer to get a suitable IPID */
                __ip_select_ident(net, iph, segs);
        }
}

static inline void ip_select_ident(struct net *net, struct sk_buff *skb,
                                   struct sock *sk)
{
        ip_select_ident_segs(net, skb, sk, 1);
}

static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
{
        return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                                  skb->len, proto, 0);
}

/* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store
 * Equivalent to :        flow->v4addrs.src = iph->saddr;
 *                        flow->v4addrs.dst = iph->daddr;
 */
static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
                                            const struct iphdr *iph)
{
        BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) !=
                     offsetof(typeof(flow->addrs), v4addrs.src) +
                              sizeof(flow->addrs.v4addrs.src));
        memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs));
        flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}

/*
 *        Map a multicast IP onto multicast MAC for type ethernet.
 */

static inline void ip_eth_mc_map(__be32 naddr, char *buf)
{
        __u32 addr=ntohl(naddr);
        buf[0]=0x01;
        buf[1]=0x00;
        buf[2]=0x5e;
        buf[5]=addr&0xFF;
        addr>>=8;
        buf[4]=addr&0xFF;
        addr>>=8;
        buf[3]=addr&0x7F;
}

/*
 *        Map a multicast IP onto multicast MAC for type IP-over-InfiniBand.
 *        Leave P_Key as 0 to be filled in by driver.
 */

static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf)
{
        __u32 addr;
        unsigned char scope = broadcast[5] & 0xF;

        buf[0]  = 0;                /* Reserved */
        buf[1]  = 0xff;                /* Multicast QPN */
        buf[2]  = 0xff;
        buf[3]  = 0xff;
        addr    = ntohl(naddr);
        buf[4]  = 0xff;
        buf[5]  = 0x10 | scope;        /* scope from broadcast address */
        buf[6]  = 0x40;                /* IPv4 signature */
        buf[7]  = 0x1b;
        buf[8]  = broadcast[8];                /* P_Key */
        buf[9]  = broadcast[9];
        buf[10] = 0;
        buf[11] = 0;
        buf[12] = 0;
        buf[13] = 0;
        buf[14] = 0;
        buf[15] = 0;
        buf[19] = addr & 0xff;
        addr  >>= 8;
        buf[18] = addr & 0xff;
        addr  >>= 8;
        buf[17] = addr & 0xff;
        addr  >>= 8;
        buf[16] = addr & 0x0f;
}

static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf)
{
        if ((broadcast[0] | broadcast[1] | broadcast[2] | broadcast[3]) != 0)
                memcpy(buf, broadcast, 4);
        else
                memcpy(buf, &naddr, sizeof(naddr));
}

#if IS_ENABLED(CONFIG_IPV6)
#include <linux/ipv6.h>
#endif

static __inline__ void inet_reset_saddr(struct sock *sk)
{
        inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0;
#if IS_ENABLED(CONFIG_IPV6)
        if (sk->sk_family == PF_INET6) {
                struct ipv6_pinfo *np = inet6_sk(sk);

                memset(&np->saddr, 0, sizeof(np->saddr));
                memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
        }
#endif
}

#endif

static inline unsigned int ipv4_addr_hash(__be32 ip)
{
        return (__force unsigned int) ip;
}

static inline u32 ipv4_portaddr_hash(const struct net *net,
                                     __be32 saddr,
                                     unsigned int port)
{
        return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
}

bool ip_call_ra_chain(struct sk_buff *skb);

/*
 *        Functions provided by ip_fragment.c
 */

enum ip_defrag_users {
        IP_DEFRAG_LOCAL_DELIVER,
        IP_DEFRAG_CALL_RA_CHAIN,
        IP_DEFRAG_CONNTRACK_IN,
        __IP_DEFRAG_CONNTRACK_IN_END        = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX,
        IP_DEFRAG_CONNTRACK_OUT,
        __IP_DEFRAG_CONNTRACK_OUT_END        = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
        IP_DEFRAG_CONNTRACK_BRIDGE_IN,
        __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
        IP_DEFRAG_VS_IN,
        IP_DEFRAG_VS_OUT,
        IP_DEFRAG_VS_FWD,
        IP_DEFRAG_AF_PACKET,
        IP_DEFRAG_MACVLAN,
};

/* Return true if the value of 'user' is between 'lower_bond'
 * and 'upper_bond' inclusively.
 */
static inline bool ip_defrag_user_in_between(u32 user,
                                             enum ip_defrag_users lower_bond,
                                             enum ip_defrag_users upper_bond)
{
        return user >= lower_bond && user <= upper_bond;
}

int ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
#ifdef CONFIG_INET
struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user);
#else
static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
        return skb;
}
#endif

/*
 *        Functions provided by ip_forward.c
 */

int ip_forward(struct sk_buff *skb);

/*
 *        Functions provided by ip_options.c
 */

void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
                      __be32 daddr, struct rtable *rt);

int __ip_options_echo(struct net *net, struct ip_options *dopt,
                      struct sk_buff *skb, const struct ip_options *sopt);
static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
                                  struct sk_buff *skb)
{
        return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
}

void ip_options_fragment(struct sk_buff *skb);
int __ip_options_compile(struct net *net, struct ip_options *opt,
                         struct sk_buff *skb, __be32 *info);
int ip_options_compile(struct net *net, struct ip_options *opt,
                       struct sk_buff *skb);
int ip_options_get(struct net *net, struct ip_options_rcu **optp,
                   sockptr_t data, int optlen);
void ip_options_undo(struct ip_options *opt);
void ip_forward_options(struct sk_buff *skb);
int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);

/*
 *        Functions provided by ip_sockglue.c
 */

void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst);
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
                         struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
                 struct ipcm_cookie *ipc, bool allow_ipv6);
DECLARE_STATIC_KEY_FALSE(ip4_min_ttl);
int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
                     unsigned int optlen);
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
                  unsigned int optlen);
int do_ip_getsockopt(struct sock *sk, int level, int optname,
                     sockptr_t optval, sockptr_t optlen);
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                  int __user *optlen);
int ip_ra_control(struct sock *sk, unsigned char on,
                  void (*destructor)(struct sock *));

int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len);
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
                   u32 info, u8 *payload);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
                    u32 info);

static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
{
        ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
}

bool icmp_global_allow(void);
extern int sysctl_icmp_msgs_per_sec;
extern int sysctl_icmp_msgs_burst;

#ifdef CONFIG_PROC_FS
int ip_misc_proc_init(void);
#endif

int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
                                struct netlink_ext_ack *extack);

static inline bool inetdev_valid_mtu(unsigned int mtu)
{
        return likely(mtu >= IPV4_MIN_MTU);
}

void ip_sock_set_freebind(struct sock *sk);
int ip_sock_set_mtu_discover(struct sock *sk, int val);
void ip_sock_set_pktinfo(struct sock *sk);
void ip_sock_set_recverr(struct sock *sk);
void ip_sock_set_tos(struct sock *sk, int val);
void  __ip_sock_set_tos(struct sock *sk, int val);

#endif        /* _IP_H */































    1 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Landlock LSM - Credential hooks
 *
 * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net>
 * Copyright © 2019-2020 ANSSI
 */

#ifndef _SECURITY_LANDLOCK_CRED_H
#define _SECURITY_LANDLOCK_CRED_H

#include <linux/cred.h>
#include <linux/init.h>
#include <linux/rcupdate.h>

#include "ruleset.h"
#include "setup.h"

struct landlock_cred_security {
        struct landlock_ruleset *domain;
};

static inline struct landlock_cred_security *
landlock_cred(const struct cred *cred)
{
        return cred->security + landlock_blob_sizes.lbs_cred;
}

static inline const struct landlock_ruleset *landlock_get_current_domain(void)
{
        return landlock_cred(current_cred())->domain;
}

/*
 * The call needs to come from an RCU read-side critical section.
 */
static inline const struct landlock_ruleset *
landlock_get_task_domain(const struct task_struct *const task)
{
        return landlock_cred(__task_cred(task))->domain;
}

static inline bool landlocked(const struct task_struct *const task)
{
        bool has_dom;

        if (task == current)
                return !!landlock_get_current_domain();

        rcu_read_lock();
        has_dom = !!landlock_get_task_domain(task);
        rcu_read_unlock();
        return has_dom;
}

__init void landlock_add_cred_hooks(void);

#endif /* _SECURITY_LANDLOCK_CRED_H */


























































    4 





























































    4 



























    4 




















    4 

    4 

    4 



    4 














    4 
    4 


    4 

    4 














































































































































    4 
    4 





    4 












































































































































































































































































    4 
















    4 















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  hosts.c Copyright (C) 1992 Drew Eckhardt
 *          Copyright (C) 1993, 1994, 1995 Eric Youngdale
 *          Copyright (C) 2002-2003 Christoph Hellwig
 *
 *  mid to lowlevel SCSI driver interface
 *      Initial versions: Drew Eckhardt
 *      Subsequent revisions: Eric Youngdale
 *
 *  <drew@colorado.edu>
 *
 *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
 *  Added QLOGIC QLA1280 SCSI controller kernel host support. 
 *     August 4, 1999 Fred Lewis, Intel DuPont
 *
 *  Updated to reflect the new initialization scheme for the higher 
 *  level of scsi drivers (sd/sr/st)
 *  September 17, 2000 Torben Mathiasen <tmm@image.dk>
 *
 *  Restructured scsi_host lists and associated functions.
 *  September 04, 2002 Mike Anderson (andmike@us.ibm.com)
 */

#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/transport_class.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/idr.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_cmnd.h>

#include "scsi_priv.h"
#include "scsi_logging.h"


static int shost_eh_deadline = -1;

module_param_named(eh_deadline, shost_eh_deadline, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(eh_deadline,
                 "SCSI EH timeout in seconds (should be between 0 and 2^31-1)");

static DEFINE_IDA(host_index_ida);


static void scsi_host_cls_release(struct device *dev)
{
        put_device(&class_to_shost(dev)->shost_gendev);
}

static struct class shost_class = {
        .name                = "scsi_host",
        .dev_release        = scsi_host_cls_release,
        .dev_groups        = scsi_shost_groups,
};

/**
 *        scsi_host_set_state - Take the given host through the host state model.
 *        @shost:        scsi host to change the state of.
 *        @state:        state to change to.
 *
 *        Returns zero if unsuccessful or an error if the requested
 *        transition is illegal.
 **/
int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
{
        enum scsi_host_state oldstate = shost->shost_state;

        if (state == oldstate)
                return 0;

        switch (state) {
        case SHOST_CREATED:
                /* There are no legal states that come back to
                 * created.  This is the manually initialised start
                 * state */
                goto illegal;

        case SHOST_RUNNING:
                switch (oldstate) {
                case SHOST_CREATED:
                case SHOST_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SHOST_RECOVERY:
                switch (oldstate) {
                case SHOST_RUNNING:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SHOST_CANCEL:
                switch (oldstate) {
                case SHOST_CREATED:
                case SHOST_RUNNING:
                case SHOST_CANCEL_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SHOST_DEL:
                switch (oldstate) {
                case SHOST_CANCEL:
                case SHOST_DEL_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SHOST_CANCEL_RECOVERY:
                switch (oldstate) {
                case SHOST_CANCEL:
                case SHOST_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;

        case SHOST_DEL_RECOVERY:
                switch (oldstate) {
                case SHOST_CANCEL_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;
        }
        shost->shost_state = state;
        return 0;

 illegal:
        SCSI_LOG_ERROR_RECOVERY(1,
                                shost_printk(KERN_ERR, shost,
                                             "Illegal host state transition"
                                             "%s->%s\n",
                                             scsi_host_state_name(oldstate),
                                             scsi_host_state_name(state)));
        return -EINVAL;
}

/**
 * scsi_remove_host - remove a scsi host
 * @shost:        a pointer to a scsi host to remove
 **/
void scsi_remove_host(struct Scsi_Host *shost)
{
        unsigned long flags;

        mutex_lock(&shost->scan_mutex);
        spin_lock_irqsave(shost->host_lock, flags);
        if (scsi_host_set_state(shost, SHOST_CANCEL))
                if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) {
                        spin_unlock_irqrestore(shost->host_lock, flags);
                        mutex_unlock(&shost->scan_mutex);
                        return;
                }
        spin_unlock_irqrestore(shost->host_lock, flags);

        scsi_autopm_get_host(shost);
        flush_workqueue(shost->tmf_work_q);
        scsi_forget_host(shost);
        mutex_unlock(&shost->scan_mutex);
        scsi_proc_host_rm(shost);
        scsi_proc_hostdir_rm(shost->hostt);

        /*
         * New SCSI devices cannot be attached anymore because of the SCSI host
         * state so drop the tag set refcnt. Wait until the tag set refcnt drops
         * to zero because .exit_cmd_priv implementations may need the host
         * pointer.
         */
        kref_put(&shost->tagset_refcnt, scsi_mq_free_tags);
        wait_for_completion(&shost->tagset_freed);

        spin_lock_irqsave(shost->host_lock, flags);
        if (scsi_host_set_state(shost, SHOST_DEL))
                BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
        spin_unlock_irqrestore(shost->host_lock, flags);

        transport_unregister_device(&shost->shost_gendev);
        device_unregister(&shost->shost_dev);
        device_del(&shost->shost_gendev);
}
EXPORT_SYMBOL(scsi_remove_host);

/**
 * scsi_add_host_with_dma - add a scsi host with dma device
 * @shost:        scsi host pointer to add
 * @dev:        a struct device of type scsi class
 * @dma_dev:        dma device for the host
 *
 * Note: You rarely need to worry about this unless you're in a
 * virtualised host environments, so use the simpler scsi_add_host()
 * function instead.
 *
 * Return value: 
 *         0 on success / != 0 for error
 **/
int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
                           struct device *dma_dev)
{
        const struct scsi_host_template *sht = shost->hostt;
        int error = -EINVAL;

        shost_printk(KERN_INFO, shost, "%s\n",
                        sht->info ? sht->info(shost) : sht->name);

        if (!shost->can_queue) {
                shost_printk(KERN_ERR, shost,
                             "can_queue = 0 no longer supported\n");
                goto fail;
        }

        /* Use min_t(int, ...) in case shost->can_queue exceeds SHRT_MAX */
        shost->cmd_per_lun = min_t(int, shost->cmd_per_lun,
                                   shost->can_queue);

        error = scsi_init_sense_cache(shost);
        if (error)
                goto fail;

        if (!shost->shost_gendev.parent)
                shost->shost_gendev.parent = dev ? dev : &platform_bus;
        if (!dma_dev)
                dma_dev = shost->shost_gendev.parent;

        shost->dma_dev = dma_dev;

        if (dma_dev->dma_mask) {
                shost->max_sectors = min_t(unsigned int, shost->max_sectors,
                                dma_max_mapping_size(dma_dev) >> SECTOR_SHIFT);
        }

        error = scsi_mq_setup_tags(shost);
        if (error)
                goto fail;

        kref_init(&shost->tagset_refcnt);
        init_completion(&shost->tagset_freed);

        /*
         * Increase usage count temporarily here so that calling
         * scsi_autopm_put_host() will trigger runtime idle if there is
         * nothing else preventing suspending the device.
         */
        pm_runtime_get_noresume(&shost->shost_gendev);
        pm_runtime_set_active(&shost->shost_gendev);
        pm_runtime_enable(&shost->shost_gendev);
        device_enable_async_suspend(&shost->shost_gendev);

        error = device_add(&shost->shost_gendev);
        if (error)
                goto out_disable_runtime_pm;

        scsi_host_set_state(shost, SHOST_RUNNING);
        get_device(shost->shost_gendev.parent);

        device_enable_async_suspend(&shost->shost_dev);

        get_device(&shost->shost_gendev);
        error = device_add(&shost->shost_dev);
        if (error)
                goto out_del_gendev;

        if (shost->transportt->host_size) {
                shost->shost_data = kzalloc(shost->transportt->host_size,
                                         GFP_KERNEL);
                if (shost->shost_data == NULL) {
                        error = -ENOMEM;
                        goto out_del_dev;
                }
        }

        if (shost->transportt->create_work_queue) {
                snprintf(shost->work_q_name, sizeof(shost->work_q_name),
                         "scsi_wq_%d", shost->host_no);
                shost->work_q = alloc_workqueue("%s",
                        WQ_SYSFS | __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_UNBOUND,
                        1, shost->work_q_name);

                if (!shost->work_q) {
                        error = -EINVAL;
                        goto out_del_dev;
                }
        }

        error = scsi_sysfs_add_host(shost);
        if (error)
                goto out_del_dev;

        scsi_proc_host_add(shost);
        scsi_autopm_put_host(shost);
        return error;

        /*
         * Any host allocation in this function will be freed in
         * scsi_host_dev_release().
         */
 out_del_dev:
        device_del(&shost->shost_dev);
 out_del_gendev:
        /*
         * Host state is SHOST_RUNNING so we have to explicitly release
         * ->shost_dev.
         */
        put_device(&shost->shost_dev);
        device_del(&shost->shost_gendev);
 out_disable_runtime_pm:
        device_disable_async_suspend(&shost->shost_gendev);
        pm_runtime_disable(&shost->shost_gendev);
        pm_runtime_set_suspended(&shost->shost_gendev);
        pm_runtime_put_noidle(&shost->shost_gendev);
        kref_put(&shost->tagset_refcnt, scsi_mq_free_tags);
 fail:
        return error;
}
EXPORT_SYMBOL(scsi_add_host_with_dma);

static void scsi_host_dev_release(struct device *dev)
{
        struct Scsi_Host *shost = dev_to_shost(dev);
        struct device *parent = dev->parent;

        /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
        rcu_barrier();

        if (shost->tmf_work_q)
                destroy_workqueue(shost->tmf_work_q);
        if (shost->ehandler)
                kthread_stop(shost->ehandler);
        if (shost->work_q)
                destroy_workqueue(shost->work_q);

        if (shost->shost_state == SHOST_CREATED) {
                /*
                 * Free the shost_dev device name and remove the proc host dir
                 * here if scsi_host_{alloc,put}() have been called but neither
                 * scsi_host_add() nor scsi_remove_host() has been called.
                 * This avoids that the memory allocated for the shost_dev
                 * name as well as the proc dir structure are leaked.
                 */
                scsi_proc_hostdir_rm(shost->hostt);
                kfree(dev_name(&shost->shost_dev));
        }

        kfree(shost->shost_data);

        ida_free(&host_index_ida, shost->host_no);

        if (shost->shost_state != SHOST_CREATED)
                put_device(parent);
        kfree(shost);
}

static const struct device_type scsi_host_type = {
        .name =                "scsi_host",
        .release =        scsi_host_dev_release,
};

/**
 * scsi_host_alloc - register a scsi host adapter instance.
 * @sht:        pointer to scsi host template
 * @privsize:        extra bytes to allocate for driver
 *
 * Note:
 *         Allocate a new Scsi_Host and perform basic initialization.
 *         The host is not published to the scsi midlayer until scsi_add_host
 *         is called.
 *
 * Return value:
 *         Pointer to a new Scsi_Host
 **/
struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int privsize)
{
        struct Scsi_Host *shost;
        int index;

        shost = kzalloc(sizeof(struct Scsi_Host) + privsize, GFP_KERNEL);
        if (!shost)
                return NULL;

        shost->host_lock = &shost->default_lock;
        spin_lock_init(shost->host_lock);
        shost->shost_state = SHOST_CREATED;
        INIT_LIST_HEAD(&shost->__devices);
        INIT_LIST_HEAD(&shost->__targets);
        INIT_LIST_HEAD(&shost->eh_abort_list);
        INIT_LIST_HEAD(&shost->eh_cmd_q);
        INIT_LIST_HEAD(&shost->starved_list);
        init_waitqueue_head(&shost->host_wait);
        mutex_init(&shost->scan_mutex);

        index = ida_alloc(&host_index_ida, GFP_KERNEL);
        if (index < 0) {
                kfree(shost);
                return NULL;
        }
        shost->host_no = index;

        shost->dma_channel = 0xff;

        /* These three are default values which can be overridden */
        shost->max_channel = 0;
        shost->max_id = 8;
        shost->max_lun = 8;

        /* Give each shost a default transportt */
        shost->transportt = &blank_transport_template;

        /*
         * All drivers right now should be able to handle 12 byte
         * commands.  Every so often there are requests for 16 byte
         * commands, but individual low-level drivers need to certify that
         * they actually do something sensible with such commands.
         */
        shost->max_cmd_len = 12;
        shost->hostt = sht;
        shost->this_id = sht->this_id;
        shost->can_queue = sht->can_queue;
        shost->sg_tablesize = sht->sg_tablesize;
        shost->sg_prot_tablesize = sht->sg_prot_tablesize;
        shost->cmd_per_lun = sht->cmd_per_lun;
        shost->no_write_same = sht->no_write_same;
        shost->host_tagset = sht->host_tagset;
        shost->queuecommand_may_block = sht->queuecommand_may_block;

        if (shost_eh_deadline == -1 || !sht->eh_host_reset_handler)
                shost->eh_deadline = -1;
        else if ((ulong) shost_eh_deadline * HZ > INT_MAX) {
                shost_printk(KERN_WARNING, shost,
                             "eh_deadline %u too large, setting to %u\n",
                             shost_eh_deadline, INT_MAX / HZ);
                shost->eh_deadline = INT_MAX;
        } else
                shost->eh_deadline = shost_eh_deadline * HZ;

        if (sht->supported_mode == MODE_UNKNOWN)
                /* means we didn't set it ... default to INITIATOR */
                shost->active_mode = MODE_INITIATOR;
        else
                shost->active_mode = sht->supported_mode;

        if (sht->max_host_blocked)
                shost->max_host_blocked = sht->max_host_blocked;
        else
                shost->max_host_blocked = SCSI_DEFAULT_HOST_BLOCKED;

        /*
         * If the driver imposes no hard sector transfer limit, start at
         * machine infinity initially.
         */
        if (sht->max_sectors)
                shost->max_sectors = sht->max_sectors;
        else
                shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS;

        if (sht->max_segment_size)
                shost->max_segment_size = sht->max_segment_size;
        else
                shost->max_segment_size = BLK_MAX_SEGMENT_SIZE;

        /*
         * assume a 4GB boundary, if not set
         */
        if (sht->dma_boundary)
                shost->dma_boundary = sht->dma_boundary;
        else
                shost->dma_boundary = 0xffffffff;

        if (sht->virt_boundary_mask)
                shost->virt_boundary_mask = sht->virt_boundary_mask;

        device_initialize(&shost->shost_gendev);
        dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
        shost->shost_gendev.bus = &scsi_bus_type;
        shost->shost_gendev.type = &scsi_host_type;
        scsi_enable_async_suspend(&shost->shost_gendev);

        device_initialize(&shost->shost_dev);
        shost->shost_dev.parent = &shost->shost_gendev;
        shost->shost_dev.class = &shost_class;
        dev_set_name(&shost->shost_dev, "host%d", shost->host_no);
        shost->shost_dev.groups = sht->shost_groups;

        shost->ehandler = kthread_run(scsi_error_handler, shost,
                        "scsi_eh_%d", shost->host_no);
        if (IS_ERR(shost->ehandler)) {
                shost_printk(KERN_WARNING, shost,
                        "error handler thread failed to spawn, error = %ld\n",
                        PTR_ERR(shost->ehandler));
                shost->ehandler = NULL;
                goto fail;
        }

        shost->tmf_work_q = alloc_workqueue("scsi_tmf_%d",
                                        WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS,
                                           1, shost->host_no);
        if (!shost->tmf_work_q) {
                shost_printk(KERN_WARNING, shost,
                             "failed to create tmf workq\n");
                goto fail;
        }
        if (scsi_proc_hostdir_add(shost->hostt) < 0)
                goto fail;
        return shost;
 fail:
        /*
         * Host state is still SHOST_CREATED and that is enough to release
         * ->shost_gendev. scsi_host_dev_release() will free
         * dev_name(&shost->shost_dev).
         */
        put_device(&shost->shost_gendev);

        return NULL;
}
EXPORT_SYMBOL(scsi_host_alloc);

static int __scsi_host_match(struct device *dev, const void *data)
{
        struct Scsi_Host *p;
        const unsigned int *hostnum = data;

        p = class_to_shost(dev);
        return p->host_no == *hostnum;
}

/**
 * scsi_host_lookup - get a reference to a Scsi_Host by host no
 * @hostnum:        host number to locate
 *
 * Return value:
 *        A pointer to located Scsi_Host or NULL.
 *
 *        The caller must do a scsi_host_put() to drop the reference
 *        that scsi_host_get() took. The put_device() below dropped
 *        the reference from class_find_device().
 **/
struct Scsi_Host *scsi_host_lookup(unsigned int hostnum)
{
        struct device *cdev;
        struct Scsi_Host *shost = NULL;

        cdev = class_find_device(&shost_class, NULL, &hostnum,
                                 __scsi_host_match);
        if (cdev) {
                shost = scsi_host_get(class_to_shost(cdev));
                put_device(cdev);
        }
        return shost;
}
EXPORT_SYMBOL(scsi_host_lookup);

/**
 * scsi_host_get - inc a Scsi_Host ref count
 * @shost:        Pointer to Scsi_Host to inc.
 **/
struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
{
        if ((shost->shost_state == SHOST_DEL) ||
                !get_device(&shost->shost_gendev))
                return NULL;
        return shost;
}
EXPORT_SYMBOL(scsi_host_get);

static bool scsi_host_check_in_flight(struct request *rq, void *data)
{
        int *count = data;
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

        if (test_bit(SCMD_STATE_INFLIGHT, &cmd->state))
                (*count)++;

        return true;
}

/**
 * scsi_host_busy - Return the host busy counter
 * @shost:        Pointer to Scsi_Host to inc.
 **/
int scsi_host_busy(struct Scsi_Host *shost)
{
        int cnt = 0;

        blk_mq_tagset_busy_iter(&shost->tag_set,
                                scsi_host_check_in_flight, &cnt);
        return cnt;
}
EXPORT_SYMBOL(scsi_host_busy);

/**
 * scsi_host_put - dec a Scsi_Host ref count
 * @shost:        Pointer to Scsi_Host to dec.
 **/
void scsi_host_put(struct Scsi_Host *shost)
{
        put_device(&shost->shost_gendev);
}
EXPORT_SYMBOL(scsi_host_put);

int scsi_init_hosts(void)
{
        return class_register(&shost_class);
}

void scsi_exit_hosts(void)
{
        class_unregister(&shost_class);
        ida_destroy(&host_index_ida);
}

int scsi_is_host_device(const struct device *dev)
{
        return dev->type == &scsi_host_type;
}
EXPORT_SYMBOL(scsi_is_host_device);

/**
 * scsi_queue_work - Queue work to the Scsi_Host workqueue.
 * @shost:        Pointer to Scsi_Host.
 * @work:        Work to queue for execution.
 *
 * Return value:
 *         1 - work queued for execution
 *        0 - work is already queued
 *        -EINVAL - work queue doesn't exist
 **/
int scsi_queue_work(struct Scsi_Host *shost, struct work_struct *work)
{
        if (unlikely(!shost->work_q)) {
                shost_printk(KERN_ERR, shost,
                        "ERROR: Scsi host '%s' attempted to queue scsi-work, "
                        "when no workqueue created.\n", shost->hostt->name);
                dump_stack();

                return -EINVAL;
        }

        return queue_work(shost->work_q, work);
}
EXPORT_SYMBOL_GPL(scsi_queue_work);

/**
 * scsi_flush_work - Flush a Scsi_Host's workqueue.
 * @shost:        Pointer to Scsi_Host.
 **/
void scsi_flush_work(struct Scsi_Host *shost)
{
        if (!shost->work_q) {
                shost_printk(KERN_ERR, shost,
                        "ERROR: Scsi host '%s' attempted to flush scsi-work, "
                        "when no workqueue created.\n", shost->hostt->name);
                dump_stack();
                return;
        }

        flush_workqueue(shost->work_q);
}
EXPORT_SYMBOL_GPL(scsi_flush_work);

static bool complete_all_cmds_iter(struct request *rq, void *data)
{
        struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
        enum scsi_host_status status = *(enum scsi_host_status *)data;

        scsi_dma_unmap(scmd);
        scmd->result = 0;
        set_host_byte(scmd, status);
        scsi_done(scmd);
        return true;
}

/**
 * scsi_host_complete_all_commands - Terminate all running commands
 * @shost:        Scsi Host on which commands should be terminated
 * @status:        Status to be set for the terminated commands
 *
 * There is no protection against modification of the number
 * of outstanding commands. It is the responsibility of the
 * caller to ensure that concurrent I/O submission and/or
 * completion is stopped when calling this function.
 */
void scsi_host_complete_all_commands(struct Scsi_Host *shost,
                                     enum scsi_host_status status)
{
        blk_mq_tagset_busy_iter(&shost->tag_set, complete_all_cmds_iter,
                                &status);
}
EXPORT_SYMBOL_GPL(scsi_host_complete_all_commands);

struct scsi_host_busy_iter_data {
        bool (*fn)(struct scsi_cmnd *, void *);
        void *priv;
};

static bool __scsi_host_busy_iter_fn(struct request *req, void *priv)
{
        struct scsi_host_busy_iter_data *iter_data = priv;
        struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);

        return iter_data->fn(sc, iter_data->priv);
}

/**
 * scsi_host_busy_iter - Iterate over all busy commands
 * @shost:        Pointer to Scsi_Host.
 * @fn:                Function to call on each busy command
 * @priv:        Data pointer passed to @fn
 *
 * If locking against concurrent command completions is required
 * ithas to be provided by the caller
 **/
void scsi_host_busy_iter(struct Scsi_Host *shost,
                         bool (*fn)(struct scsi_cmnd *, void *),
                         void *priv)
{
        struct scsi_host_busy_iter_data iter_data = {
                .fn = fn,
                .priv = priv,
        };

        blk_mq_tagset_busy_iter(&shost->tag_set, __scsi_host_busy_iter_fn,
                                &iter_data);
}
EXPORT_SYMBOL_GPL(scsi_host_busy_iter);





































































































































































































































































































































































































































































































































































    1 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>
#include <linux/vmalloc.h>

#include <linux/fb.h>
#include <linux/module.h>

#include "hid-picolcd.h"

/* Framebuffer
 *
 * The PicoLCD use a Topway LCD module of 256x64 pixel
 * This display area is tiled over 4 controllers with 8 tiles
 * each. Each tile has 8x64 pixel, each data byte representing
 * a 1-bit wide vertical line of the tile.
 *
 * The display can be updated at a tile granularity.
 *
 *       Chip 1           Chip 2           Chip 3           Chip 4
 * +----------------+----------------+----------------+----------------+
 * |     Tile 1     |     Tile 1     |     Tile 1     |     Tile 1     |
 * +----------------+----------------+----------------+----------------+
 * |     Tile 2     |     Tile 2     |     Tile 2     |     Tile 2     |
 * +----------------+----------------+----------------+----------------+
 *                                  ...
 * +----------------+----------------+----------------+----------------+
 * |     Tile 8     |     Tile 8     |     Tile 8     |     Tile 8     |
 * +----------------+----------------+----------------+----------------+
 */
#define PICOLCDFB_NAME "picolcdfb"
#define PICOLCDFB_WIDTH (256)
#define PICOLCDFB_HEIGHT (64)
#define PICOLCDFB_SIZE (PICOLCDFB_WIDTH * PICOLCDFB_HEIGHT / 8)

#define PICOLCDFB_UPDATE_RATE_LIMIT   10
#define PICOLCDFB_UPDATE_RATE_DEFAULT  2

/* Framebuffer visual structures */
static const struct fb_fix_screeninfo picolcdfb_fix = {
        .id          = PICOLCDFB_NAME,
        .type        = FB_TYPE_PACKED_PIXELS,
        .visual      = FB_VISUAL_MONO01,
        .xpanstep    = 0,
        .ypanstep    = 0,
        .ywrapstep   = 0,
        .line_length = PICOLCDFB_WIDTH / 8,
        .accel       = FB_ACCEL_NONE,
};

static const struct fb_var_screeninfo picolcdfb_var = {
        .xres           = PICOLCDFB_WIDTH,
        .yres           = PICOLCDFB_HEIGHT,
        .xres_virtual   = PICOLCDFB_WIDTH,
        .yres_virtual   = PICOLCDFB_HEIGHT,
        .width          = 103,
        .height         = 26,
        .bits_per_pixel = 1,
        .grayscale      = 1,
        .red            = {
                .offset = 0,
                .length = 1,
                .msb_right = 0,
        },
        .green          = {
                .offset = 0,
                .length = 1,
                .msb_right = 0,
        },
        .blue           = {
                .offset = 0,
                .length = 1,
                .msb_right = 0,
        },
        .transp         = {
                .offset = 0,
                .length = 0,
                .msb_right = 0,
        },
};

/* Send a given tile to PicoLCD */
static int picolcd_fb_send_tile(struct picolcd_data *data, u8 *vbitmap,
                int chip, int tile)
{
        struct hid_report *report1, *report2;
        unsigned long flags;
        u8 *tdata;
        int i;

        report1 = picolcd_out_report(REPORT_LCD_CMD_DATA, data->hdev);
        if (!report1 || report1->maxfield != 1)
                return -ENODEV;
        report2 = picolcd_out_report(REPORT_LCD_DATA, data->hdev);
        if (!report2 || report2->maxfield != 1)
                return -ENODEV;

        spin_lock_irqsave(&data->lock, flags);
        if ((data->status & PICOLCD_FAILED)) {
                spin_unlock_irqrestore(&data->lock, flags);
                return -ENODEV;
        }
        hid_set_field(report1->field[0],  0, chip << 2);
        hid_set_field(report1->field[0],  1, 0x02);
        hid_set_field(report1->field[0],  2, 0x00);
        hid_set_field(report1->field[0],  3, 0x00);
        hid_set_field(report1->field[0],  4, 0xb8 | tile);
        hid_set_field(report1->field[0],  5, 0x00);
        hid_set_field(report1->field[0],  6, 0x00);
        hid_set_field(report1->field[0],  7, 0x40);
        hid_set_field(report1->field[0],  8, 0x00);
        hid_set_field(report1->field[0],  9, 0x00);
        hid_set_field(report1->field[0], 10,   32);

        hid_set_field(report2->field[0],  0, (chip << 2) | 0x01);
        hid_set_field(report2->field[0],  1, 0x00);
        hid_set_field(report2->field[0],  2, 0x00);
        hid_set_field(report2->field[0],  3,   32);

        tdata = vbitmap + (tile * 4 + chip) * 64;
        for (i = 0; i < 64; i++)
                if (i < 32)
                        hid_set_field(report1->field[0], 11 + i, tdata[i]);
                else
                        hid_set_field(report2->field[0], 4 + i - 32, tdata[i]);

        hid_hw_request(data->hdev, report1, HID_REQ_SET_REPORT);
        hid_hw_request(data->hdev, report2, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);
        return 0;
}

/* Translate a single tile*/
static int picolcd_fb_update_tile(u8 *vbitmap, const u8 *bitmap, int bpp,
                int chip, int tile)
{
        int i, b, changed = 0;
        u8 tdata[64];
        u8 *vdata = vbitmap + (tile * 4 + chip) * 64;

        if (bpp == 1) {
                for (b = 7; b >= 0; b--) {
                        const u8 *bdata = bitmap + tile * 256 + chip * 8 + b * 32;
                        for (i = 0; i < 64; i++) {
                                tdata[i] <<= 1;
                                tdata[i] |= (bdata[i/8] >> (i % 8)) & 0x01;
                        }
                }
        } else if (bpp == 8) {
                for (b = 7; b >= 0; b--) {
                        const u8 *bdata = bitmap + (tile * 256 + chip * 8 + b * 32) * 8;
                        for (i = 0; i < 64; i++) {
                                tdata[i] <<= 1;
                                tdata[i] |= (bdata[i] & 0x80) ? 0x01 : 0x00;
                        }
                }
        } else {
                /* Oops, we should never get here! */
                WARN_ON(1);
                return 0;
        }

        for (i = 0; i < 64; i++)
                if (tdata[i] != vdata[i]) {
                        changed = 1;
                        vdata[i] = tdata[i];
                }
        return changed;
}

void picolcd_fb_refresh(struct picolcd_data *data)
{
        if (data->fb_info)
                schedule_delayed_work(&data->fb_info->deferred_work, 0);
}

/* Reconfigure LCD display */
int picolcd_fb_reset(struct picolcd_data *data, int clear)
{
        struct hid_report *report = picolcd_out_report(REPORT_LCD_CMD, data->hdev);
        struct picolcd_fb_data *fbdata = data->fb_info->par;
        int i, j;
        unsigned long flags;
        static const u8 mapcmd[8] = { 0x00, 0x02, 0x00, 0x64, 0x3f, 0x00, 0x64, 0xc0 };

        if (!report || report->maxfield != 1)
                return -ENODEV;

        spin_lock_irqsave(&data->lock, flags);
        for (i = 0; i < 4; i++) {
                for (j = 0; j < report->field[0]->maxusage; j++)
                        if (j == 0)
                                hid_set_field(report->field[0], j, i << 2);
                        else if (j < sizeof(mapcmd))
                                hid_set_field(report->field[0], j, mapcmd[j]);
                        else
                                hid_set_field(report->field[0], j, 0);
                hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
        }
        spin_unlock_irqrestore(&data->lock, flags);

        if (clear) {
                memset(fbdata->vbitmap, 0, PICOLCDFB_SIZE);
                memset(fbdata->bitmap, 0, PICOLCDFB_SIZE*fbdata->bpp);
        }
        fbdata->force = 1;

        /* schedule first output of framebuffer */
        if (fbdata->ready)
                schedule_delayed_work(&data->fb_info->deferred_work, 0);
        else
                fbdata->ready = 1;

        return 0;
}

/* Update fb_vbitmap from the screen_buffer and send changed tiles to device */
static void picolcd_fb_update(struct fb_info *info)
{
        int chip, tile, n;
        unsigned long flags;
        struct picolcd_fb_data *fbdata = info->par;
        struct picolcd_data *data;

        mutex_lock(&info->lock);

        spin_lock_irqsave(&fbdata->lock, flags);
        if (!fbdata->ready && fbdata->picolcd)
                picolcd_fb_reset(fbdata->picolcd, 0);
        spin_unlock_irqrestore(&fbdata->lock, flags);

        /*
         * Translate the framebuffer into the format needed by the PicoLCD.
         * See display layout above.
         * Do this one tile after the other and push those tiles that changed.
         *
         * Wait for our IO to complete as otherwise we might flood the queue!
         */
        n = 0;
        for (chip = 0; chip < 4; chip++)
                for (tile = 0; tile < 8; tile++) {
                        if (!fbdata->force && !picolcd_fb_update_tile(
                                        fbdata->vbitmap, fbdata->bitmap,
                                        fbdata->bpp, chip, tile))
                                continue;
                        n += 2;
                        if (n >= HID_OUTPUT_FIFO_SIZE / 2) {
                                spin_lock_irqsave(&fbdata->lock, flags);
                                data = fbdata->picolcd;
                                spin_unlock_irqrestore(&fbdata->lock, flags);
                                mutex_unlock(&info->lock);
                                if (!data)
                                        return;
                                hid_hw_wait(data->hdev);
                                mutex_lock(&info->lock);
                                n = 0;
                        }
                        spin_lock_irqsave(&fbdata->lock, flags);
                        data = fbdata->picolcd;
                        spin_unlock_irqrestore(&fbdata->lock, flags);
                        if (!data || picolcd_fb_send_tile(data,
                                        fbdata->vbitmap, chip, tile))
                                goto out;
                }
        fbdata->force = false;
        if (n) {
                spin_lock_irqsave(&fbdata->lock, flags);
                data = fbdata->picolcd;
                spin_unlock_irqrestore(&fbdata->lock, flags);
                mutex_unlock(&info->lock);
                if (data)
                        hid_hw_wait(data->hdev);
                return;
        }
out:
        mutex_unlock(&info->lock);
}

static int picolcd_fb_blank(int blank, struct fb_info *info)
{
        /* We let fb notification do this for us via lcd/backlight device */
        return 0;
}

static void picolcd_fb_destroy(struct fb_info *info)
{
        struct picolcd_fb_data *fbdata = info->par;

        /* make sure no work is deferred */
        fb_deferred_io_cleanup(info);

        /* No thridparty should ever unregister our framebuffer! */
        WARN_ON(fbdata->picolcd != NULL);

        vfree((u8 *)info->fix.smem_start);
        framebuffer_release(info);
}

static int picolcd_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
{
        __u32 bpp      = var->bits_per_pixel;
        __u32 activate = var->activate;

        /* only allow 1/8 bit depth (8-bit is grayscale) */
        *var = picolcdfb_var;
        var->activate = activate;
        if (bpp >= 8) {
                var->bits_per_pixel = 8;
                var->red.length     = 8;
                var->green.length   = 8;
                var->blue.length    = 8;
        } else {
                var->bits_per_pixel = 1;
                var->red.length     = 1;
                var->green.length   = 1;
                var->blue.length    = 1;
        }
        return 0;
}

static int picolcd_set_par(struct fb_info *info)
{
        struct picolcd_fb_data *fbdata = info->par;
        u8 *tmp_fb, *o_fb;
        if (info->var.bits_per_pixel == fbdata->bpp)
                return 0;
        /* switch between 1/8 bit depths */
        if (info->var.bits_per_pixel != 1 && info->var.bits_per_pixel != 8)
                return -EINVAL;

        o_fb   = fbdata->bitmap;
        tmp_fb = kmalloc_array(PICOLCDFB_SIZE, info->var.bits_per_pixel,
                               GFP_KERNEL);
        if (!tmp_fb)
                return -ENOMEM;

        /* translate FB content to new bits-per-pixel */
        if (info->var.bits_per_pixel == 1) {
                int i, b;
                for (i = 0; i < PICOLCDFB_SIZE; i++) {
                        u8 p = 0;
                        for (b = 0; b < 8; b++) {
                                p <<= 1;
                                p |= o_fb[i*8+b] ? 0x01 : 0x00;
                        }
                        tmp_fb[i] = p;
                }
                memcpy(o_fb, tmp_fb, PICOLCDFB_SIZE);
                info->fix.visual = FB_VISUAL_MONO01;
                info->fix.line_length = PICOLCDFB_WIDTH / 8;
        } else {
                int i;
                memcpy(tmp_fb, o_fb, PICOLCDFB_SIZE);
                for (i = 0; i < PICOLCDFB_SIZE * 8; i++)
                        o_fb[i] = tmp_fb[i/8] & (0x01 << (7 - i % 8)) ? 0xff : 0x00;
                info->fix.visual = FB_VISUAL_DIRECTCOLOR;
                info->fix.line_length = PICOLCDFB_WIDTH;
        }

        kfree(tmp_fb);
        fbdata->bpp = info->var.bits_per_pixel;
        return 0;
}

static void picolcdfb_ops_damage_range(struct fb_info *info, off_t off, size_t len)
{
        if (!info->par)
                return;
        schedule_delayed_work(&info->deferred_work, 0);
}

static void picolcdfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height)
{
        if (!info->par)
                return;
        schedule_delayed_work(&info->deferred_work, 0);
}

FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(picolcdfb_ops,
                                   picolcdfb_ops_damage_range,
                                   picolcdfb_ops_damage_area)

static const struct fb_ops picolcdfb_ops = {
        .owner        = THIS_MODULE,
        FB_DEFAULT_DEFERRED_OPS(picolcdfb_ops),
        .fb_destroy   = picolcd_fb_destroy,
        .fb_blank     = picolcd_fb_blank,
        .fb_check_var = picolcd_fb_check_var,
        .fb_set_par   = picolcd_set_par,
};


/* Callback from deferred IO workqueue */
static void picolcd_fb_deferred_io(struct fb_info *info, struct list_head *pagereflist)
{
        picolcd_fb_update(info);
}

static const struct fb_deferred_io picolcd_fb_defio = {
        .delay = HZ / PICOLCDFB_UPDATE_RATE_DEFAULT,
        .deferred_io = picolcd_fb_deferred_io,
};


/*
 * The "fb_update_rate" sysfs attribute
 */
static ssize_t picolcd_fb_update_rate_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct picolcd_data *data = dev_get_drvdata(dev);
        struct picolcd_fb_data *fbdata = data->fb_info->par;
        unsigned i, fb_update_rate = fbdata->update_rate;
        size_t ret = 0;

        for (i = 1; i <= PICOLCDFB_UPDATE_RATE_LIMIT; i++)
                if (ret >= PAGE_SIZE)
                        break;
                else if (i == fb_update_rate)
                        ret += scnprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i);
                else
                        ret += scnprintf(buf+ret, PAGE_SIZE-ret, "%u ", i);
        if (ret > 0)
                buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n';
        return ret;
}

static ssize_t picolcd_fb_update_rate_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct picolcd_data *data = dev_get_drvdata(dev);
        struct picolcd_fb_data *fbdata = data->fb_info->par;
        int i;
        unsigned u;

        if (count < 1 || count > 10)
                return -EINVAL;

        i = sscanf(buf, "%u", &u);
        if (i != 1)
                return -EINVAL;

        if (u > PICOLCDFB_UPDATE_RATE_LIMIT)
                return -ERANGE;
        else if (u == 0)
                u = PICOLCDFB_UPDATE_RATE_DEFAULT;

        fbdata->update_rate = u;
        data->fb_info->fbdefio->delay = HZ / fbdata->update_rate;
        return count;
}

static DEVICE_ATTR(fb_update_rate, 0664, picolcd_fb_update_rate_show,
                picolcd_fb_update_rate_store);

/* initialize Framebuffer device */
int picolcd_init_framebuffer(struct picolcd_data *data)
{
        struct device *dev = &data->hdev->dev;
        struct fb_info *info = NULL;
        struct picolcd_fb_data *fbdata = NULL;
        int i, error = -ENOMEM;
        u32 *palette;

        /* The extra memory is:
         * - 256*u32 for pseudo_palette
         * - struct fb_deferred_io
         */
        info = framebuffer_alloc(256 * sizeof(u32) +
                        sizeof(struct fb_deferred_io) +
                        sizeof(struct picolcd_fb_data) +
                        PICOLCDFB_SIZE, dev);
        if (!info)
                goto err_nomem;

        info->fbdefio = info->par;
        *info->fbdefio = picolcd_fb_defio;
        info->par += sizeof(struct fb_deferred_io);
        palette = info->par;
        info->par += 256 * sizeof(u32);
        for (i = 0; i < 256; i++)
                palette[i] = i > 0 && i < 16 ? 0xff : 0;
        info->pseudo_palette = palette;
        info->fbops = &picolcdfb_ops;
        info->var = picolcdfb_var;
        info->fix = picolcdfb_fix;
        info->fix.smem_len   = PICOLCDFB_SIZE*8;

        fbdata = info->par;
        spin_lock_init(&fbdata->lock);
        fbdata->picolcd = data;
        fbdata->update_rate = PICOLCDFB_UPDATE_RATE_DEFAULT;
        fbdata->bpp     = picolcdfb_var.bits_per_pixel;
        fbdata->force   = 1;
        fbdata->vbitmap = info->par + sizeof(struct picolcd_fb_data);
        fbdata->bitmap  = vmalloc(PICOLCDFB_SIZE*8);
        if (fbdata->bitmap == NULL) {
                dev_err(dev, "can't get a free page for framebuffer\n");
                goto err_nomem;
        }
        info->flags |= FBINFO_VIRTFB;
        info->screen_buffer = fbdata->bitmap;
        info->fix.smem_start = (unsigned long)fbdata->bitmap;
        memset(fbdata->vbitmap, 0xff, PICOLCDFB_SIZE);
        data->fb_info = info;

        error = picolcd_fb_reset(data, 1);
        if (error) {
                dev_err(dev, "failed to configure display\n");
                goto err_cleanup;
        }

        error = device_create_file(dev, &dev_attr_fb_update_rate);
        if (error) {
                dev_err(dev, "failed to create sysfs attributes\n");
                goto err_cleanup;
        }

        fb_deferred_io_init(info);
        error = register_framebuffer(info);
        if (error) {
                dev_err(dev, "failed to register framebuffer\n");
                goto err_sysfs;
        }
        return 0;

err_sysfs:
        device_remove_file(dev, &dev_attr_fb_update_rate);
        fb_deferred_io_cleanup(info);
err_cleanup:
        data->fb_info    = NULL;

err_nomem:
        if (fbdata)
                vfree(fbdata->bitmap);
        framebuffer_release(info);
        return error;
}

void picolcd_exit_framebuffer(struct picolcd_data *data)
{
        struct fb_info *info = data->fb_info;
        struct picolcd_fb_data *fbdata;
        unsigned long flags;

        if (!info)
                return;

        device_remove_file(&data->hdev->dev, &dev_attr_fb_update_rate);
        fbdata = info->par;

        /* disconnect framebuffer from HID dev */
        spin_lock_irqsave(&fbdata->lock, flags);
        fbdata->picolcd = NULL;
        spin_unlock_irqrestore(&fbdata->lock, flags);

        /* make sure there is no running update - thus that fbdata->picolcd
         * once obtained under lock is guaranteed not to get free() under
         * the feet of the deferred work */
        flush_delayed_work(&info->deferred_work);

        data->fb_info = NULL;
        unregister_framebuffer(info);
}





















































































































































































































































    5 



    5 






























    5 














































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
// SPDX-License-Identifier: GPL-2.0-only
/*
 * drivers/acpi/device_sysfs.c - ACPI device sysfs attributes and modalias.
 *
 * Copyright (C) 2015, Intel Corp.
 * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/nls.h>

#include "internal.h"

static ssize_t acpi_object_path(acpi_handle handle, char *buf)
{
        struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL};
        int result;

        result = acpi_get_name(handle, ACPI_FULL_PATHNAME, &path);
        if (result)
                return result;

        result = sprintf(buf, "%s\n", (char *)path.pointer);
        kfree(path.pointer);
        return result;
}

struct acpi_data_node_attr {
        struct attribute attr;
        ssize_t (*show)(struct acpi_data_node *, char *);
        ssize_t (*store)(struct acpi_data_node *, const char *, size_t count);
};

#define DATA_NODE_ATTR(_name)                        \
        static struct acpi_data_node_attr data_node_##_name =        \
                __ATTR(_name, 0444, data_node_show_##_name, NULL)

static ssize_t data_node_show_path(struct acpi_data_node *dn, char *buf)
{
        return dn->handle ? acpi_object_path(dn->handle, buf) : 0;
}

DATA_NODE_ATTR(path);

static struct attribute *acpi_data_node_default_attrs[] = {
        &data_node_path.attr,
        NULL
};
ATTRIBUTE_GROUPS(acpi_data_node_default);

#define to_data_node(k) container_of(k, struct acpi_data_node, kobj)
#define to_attr(a) container_of(a, struct acpi_data_node_attr, attr)

static ssize_t acpi_data_node_attr_show(struct kobject *kobj,
                                        struct attribute *attr, char *buf)
{
        struct acpi_data_node *dn = to_data_node(kobj);
        struct acpi_data_node_attr *dn_attr = to_attr(attr);

        return dn_attr->show ? dn_attr->show(dn, buf) : -ENXIO;
}

static const struct sysfs_ops acpi_data_node_sysfs_ops = {
        .show        = acpi_data_node_attr_show,
};

static void acpi_data_node_release(struct kobject *kobj)
{
        struct acpi_data_node *dn = to_data_node(kobj);

        complete(&dn->kobj_done);
}

static const struct kobj_type acpi_data_node_ktype = {
        .sysfs_ops = &acpi_data_node_sysfs_ops,
        .default_groups = acpi_data_node_default_groups,
        .release = acpi_data_node_release,
};

static void acpi_expose_nondev_subnodes(struct kobject *kobj,
                                        struct acpi_device_data *data)
{
        struct list_head *list = &data->subnodes;
        struct acpi_data_node *dn;

        if (list_empty(list))
                return;

        list_for_each_entry(dn, list, sibling) {
                int ret;

                init_completion(&dn->kobj_done);
                ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype,
                                           kobj, "%s", dn->name);
                if (!ret)
                        acpi_expose_nondev_subnodes(&dn->kobj, &dn->data);
                else if (dn->handle)
                        acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret);
        }
}

static void acpi_hide_nondev_subnodes(struct acpi_device_data *data)
{
        struct list_head *list = &data->subnodes;
        struct acpi_data_node *dn;

        if (list_empty(list))
                return;

        list_for_each_entry_reverse(dn, list, sibling) {
                acpi_hide_nondev_subnodes(&dn->data);
                kobject_put(&dn->kobj);
        }
}

/**
 * create_pnp_modalias - Create hid/cid(s) string for modalias and uevent
 * @acpi_dev: ACPI device object.
 * @modalias: Buffer to print into.
 * @size: Size of the buffer.
 *
 * Creates hid/cid(s) string needed for modalias and uevent
 * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get:
 * char *modalias: "acpi:IBM0001:ACPI0001"
 * Return: 0: no _HID and no _CID
 *         -EINVAL: output error
 *         -ENOMEM: output is truncated
 */
static int create_pnp_modalias(const struct acpi_device *acpi_dev, char *modalias,
                               int size)
{
        int len;
        int count;
        struct acpi_hardware_id *id;

        /* Avoid unnecessarily loading modules for non present devices. */
        if (!acpi_device_is_present(acpi_dev))
                return 0;

        /*
         * Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should
         * be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the
         * device's list.
         */
        count = 0;
        list_for_each_entry(id, &acpi_dev->pnp.ids, list)
                if (strcmp(id->id, ACPI_DT_NAMESPACE_HID))
                        count++;

        if (!count)
                return 0;

        len = snprintf(modalias, size, "acpi:");
        if (len >= size)
                return -ENOMEM;

        size -= len;

        list_for_each_entry(id, &acpi_dev->pnp.ids, list) {
                if (!strcmp(id->id, ACPI_DT_NAMESPACE_HID))
                        continue;

                count = snprintf(&modalias[len], size, "%s:", id->id);

                if (count >= size)
                        return -ENOMEM;

                len += count;
                size -= count;
        }

        return len;
}

/**
 * create_of_modalias - Creates DT compatible string for modalias and uevent
 * @acpi_dev: ACPI device object.
 * @modalias: Buffer to print into.
 * @size: Size of the buffer.
 *
 * Expose DT compatible modalias as of:NnameTCcompatible.  This function should
 * only be called for devices having ACPI_DT_NAMESPACE_HID in their list of
 * ACPI/PNP IDs.
 */
static int create_of_modalias(const struct acpi_device *acpi_dev, char *modalias,
                              int size)
{
        struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
        const union acpi_object *of_compatible, *obj;
        acpi_status status;
        int len, count;
        int i, nval;
        char *c;

        status = acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf);
        if (ACPI_FAILURE(status))
                return -ENODEV;

        /* DT strings are all in lower case */
        for (c = buf.pointer; *c != '\0'; c++)
                *c = tolower(*c);

        len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer);
        ACPI_FREE(buf.pointer);

        if (len >= size)
                return -ENOMEM;

        size -= len;

        of_compatible = acpi_dev->data.of_compatible;
        if (of_compatible->type == ACPI_TYPE_PACKAGE) {
                nval = of_compatible->package.count;
                obj = of_compatible->package.elements;
        } else { /* Must be ACPI_TYPE_STRING. */
                nval = 1;
                obj = of_compatible;
        }
        for (i = 0; i < nval; i++, obj++) {
                count = snprintf(&modalias[len], size, "C%s",
                                 obj->string.pointer);

                if (count >= size)
                        return -ENOMEM;

                len += count;
                size -= count;
        }

        return len;
}

int __acpi_device_uevent_modalias(const struct acpi_device *adev,
                                  struct kobj_uevent_env *env)
{
        int len;

        if (!adev)
                return -ENODEV;

        if (list_empty(&adev->pnp.ids))
                return 0;

        if (add_uevent_var(env, "MODALIAS="))
                return -ENOMEM;

        if (adev->data.of_compatible)
                len = create_of_modalias(adev, &env->buf[env->buflen - 1],
                                         sizeof(env->buf) - env->buflen);
        else
                len = create_pnp_modalias(adev, &env->buf[env->buflen - 1],
                                          sizeof(env->buf) - env->buflen);
        if (len < 0)
                return len;

        env->buflen += len;

        return 0;
}

/**
 * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices.
 * @dev: Struct device to get ACPI device node.
 * @env: Environment variables of the kobject uevent.
 *
 * Create the uevent modalias field for ACPI-enumerated devices.
 *
 * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with
 * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001".
 */
int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env)
{
        return __acpi_device_uevent_modalias(acpi_companion_match(dev), env);
}
EXPORT_SYMBOL_GPL(acpi_device_uevent_modalias);

static int __acpi_device_modalias(const struct acpi_device *adev, char *buf, int size)
{
        int len, count;

        if (!adev)
                return -ENODEV;

        if (list_empty(&adev->pnp.ids))
                return 0;

        len = create_pnp_modalias(adev, buf, size - 1);
        if (len < 0) {
                return len;
        } else if (len > 0) {
                buf[len++] = '\n';
                size -= len;
        }
        if (!adev->data.of_compatible)
                return len;

        count = create_of_modalias(adev, buf + len, size - 1);
        if (count < 0) {
                return count;
        } else if (count > 0) {
                len += count;
                buf[len++] = '\n';
        }

        return len;
}

/**
 * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices.
 * @dev: Struct device to get ACPI device node.
 * @buf: The buffer to save pnp_modalias and of_modalias.
 * @size: Size of buffer.
 *
 * Create the modalias sysfs attribute for ACPI-enumerated devices.
 *
 * Because other buses do not support ACPI HIDs & CIDs, e.g. for a device with
 * hid:IBM0001 and cid:ACPI0001 you get: "acpi:IBM0001:ACPI0001".
 */
int acpi_device_modalias(struct device *dev, char *buf, int size)
{
        return __acpi_device_modalias(acpi_companion_match(dev), buf, size);
}
EXPORT_SYMBOL_GPL(acpi_device_modalias);

static ssize_t
modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        return __acpi_device_modalias(to_acpi_device(dev), buf, 1024);
}
static DEVICE_ATTR_RO(modalias);

static ssize_t real_power_state_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
{
        struct acpi_device *adev = to_acpi_device(dev);
        int state;
        int ret;

        ret = acpi_device_get_power(adev, &state);
        if (ret)
                return ret;

        return sprintf(buf, "%s\n", acpi_power_state_string(state));
}

static DEVICE_ATTR_RO(real_power_state);

static ssize_t power_state_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        struct acpi_device *adev = to_acpi_device(dev);

        return sprintf(buf, "%s\n", acpi_power_state_string(adev->power.state));
}

static DEVICE_ATTR_RO(power_state);

static ssize_t
eject_store(struct device *d, struct device_attribute *attr,
            const char *buf, size_t count)
{
        struct acpi_device *acpi_device = to_acpi_device(d);
        acpi_object_type not_used;
        acpi_status status;

        if (!count || buf[0] != '1')
                return -EINVAL;

        if ((!acpi_device->handler || !acpi_device->handler->hotplug.enabled)
            && !d->driver)
                return -ENODEV;

        status = acpi_get_type(acpi_device->handle, &not_used);
        if (ACPI_FAILURE(status) || !acpi_device->flags.ejectable)
                return -ENODEV;

        acpi_dev_get(acpi_device);
        status = acpi_hotplug_schedule(acpi_device, ACPI_OST_EC_OSPM_EJECT);
        if (ACPI_SUCCESS(status))
                return count;

        acpi_dev_put(acpi_device);
        acpi_evaluate_ost(acpi_device->handle, ACPI_OST_EC_OSPM_EJECT,
                          ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
        return status == AE_NO_MEMORY ? -ENOMEM : -EAGAIN;
}

static DEVICE_ATTR_WO(eject);

static ssize_t
hid_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);

        return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev));
}
static DEVICE_ATTR_RO(hid);

static ssize_t uid_show(struct device *dev,
                        struct device_attribute *attr, char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);

        return sprintf(buf, "%s\n", acpi_device_uid(acpi_dev));
}
static DEVICE_ATTR_RO(uid);

static ssize_t adr_show(struct device *dev,
                        struct device_attribute *attr, char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);

        if (acpi_dev->pnp.bus_address > U32_MAX)
                return sprintf(buf, "0x%016llx\n", acpi_dev->pnp.bus_address);
        else
                return sprintf(buf, "0x%08llx\n", acpi_dev->pnp.bus_address);
}
static DEVICE_ATTR_RO(adr);

static ssize_t path_show(struct device *dev,
                         struct device_attribute *attr, char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);

        return acpi_object_path(acpi_dev->handle, buf);
}
static DEVICE_ATTR_RO(path);

/* sysfs file that shows description text from the ACPI _STR method */
static ssize_t description_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        int result;

        if (acpi_dev->pnp.str_obj == NULL)
                return 0;

        /*
         * The _STR object contains a Unicode identifier for a device.
         * We need to convert to utf-8 so it can be displayed.
         */
        result = utf16s_to_utf8s(
                (wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer,
                acpi_dev->pnp.str_obj->buffer.length,
                UTF16_LITTLE_ENDIAN, buf,
                PAGE_SIZE - 1);

        buf[result++] = '\n';

        return result;
}
static DEVICE_ATTR_RO(description);

static ssize_t
sun_show(struct device *dev, struct device_attribute *attr,
         char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        acpi_status status;
        unsigned long long sun;

        status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun);
        if (ACPI_FAILURE(status))
                return -EIO;

        return sprintf(buf, "%llu\n", sun);
}
static DEVICE_ATTR_RO(sun);

static ssize_t
hrv_show(struct device *dev, struct device_attribute *attr,
         char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        acpi_status status;
        unsigned long long hrv;

        status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv);
        if (ACPI_FAILURE(status))
                return -EIO;

        return sprintf(buf, "%llu\n", hrv);
}
static DEVICE_ATTR_RO(hrv);

static ssize_t status_show(struct device *dev, struct device_attribute *attr,
                                char *buf)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        acpi_status status;
        unsigned long long sta;

        status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta);
        if (ACPI_FAILURE(status))
                return -EIO;

        return sprintf(buf, "%llu\n", sta);
}
static DEVICE_ATTR_RO(status);

/**
 * acpi_device_setup_files - Create sysfs attributes of an ACPI device.
 * @dev: ACPI device object.
 */
int acpi_device_setup_files(struct acpi_device *dev)
{
        struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
        acpi_status status;
        int result = 0;

        /*
         * Devices gotten from FADT don't have a "path" attribute
         */
        if (dev->handle) {
                result = device_create_file(&dev->dev, &dev_attr_path);
                if (result)
                        goto end;
        }

        if (!list_empty(&dev->pnp.ids)) {
                result = device_create_file(&dev->dev, &dev_attr_hid);
                if (result)
                        goto end;

                result = device_create_file(&dev->dev, &dev_attr_modalias);
                if (result)
                        goto end;
        }

        /*
         * If device has _STR, 'description' file is created
         */
        if (acpi_has_method(dev->handle, "_STR")) {
                status = acpi_evaluate_object(dev->handle, "_STR",
                                        NULL, &buffer);
                if (ACPI_FAILURE(status))
                        buffer.pointer = NULL;
                dev->pnp.str_obj = buffer.pointer;
                result = device_create_file(&dev->dev, &dev_attr_description);
                if (result)
                        goto end;
        }

        if (dev->pnp.type.bus_address)
                result = device_create_file(&dev->dev, &dev_attr_adr);
        if (acpi_device_uid(dev))
                result = device_create_file(&dev->dev, &dev_attr_uid);

        if (acpi_has_method(dev->handle, "_SUN")) {
                result = device_create_file(&dev->dev, &dev_attr_sun);
                if (result)
                        goto end;
        }

        if (acpi_has_method(dev->handle, "_HRV")) {
                result = device_create_file(&dev->dev, &dev_attr_hrv);
                if (result)
                        goto end;
        }

        if (acpi_has_method(dev->handle, "_STA")) {
                result = device_create_file(&dev->dev, &dev_attr_status);
                if (result)
                        goto end;
        }

        /*
         * If device has _EJ0, 'eject' file is created that is used to trigger
         * hot-removal function from userland.
         */
        if (acpi_has_method(dev->handle, "_EJ0")) {
                result = device_create_file(&dev->dev, &dev_attr_eject);
                if (result)
                        return result;
        }

        if (dev->flags.power_manageable) {
                result = device_create_file(&dev->dev, &dev_attr_power_state);
                if (result)
                        return result;

                if (dev->power.flags.power_resources)
                        result = device_create_file(&dev->dev,
                                                    &dev_attr_real_power_state);
        }

        acpi_expose_nondev_subnodes(&dev->dev.kobj, &dev->data);

end:
        return result;
}

/**
 * acpi_device_remove_files - Remove sysfs attributes of an ACPI device.
 * @dev: ACPI device object.
 */
void acpi_device_remove_files(struct acpi_device *dev)
{
        acpi_hide_nondev_subnodes(&dev->data);

        if (dev->flags.power_manageable) {
                device_remove_file(&dev->dev, &dev_attr_power_state);
                if (dev->power.flags.power_resources)
                        device_remove_file(&dev->dev,
                                           &dev_attr_real_power_state);
        }

        /*
         * If device has _STR, remove 'description' file
         */
        if (acpi_has_method(dev->handle, "_STR")) {
                kfree(dev->pnp.str_obj);
                device_remove_file(&dev->dev, &dev_attr_description);
        }
        /*
         * If device has _EJ0, remove 'eject' file.
         */
        if (acpi_has_method(dev->handle, "_EJ0"))
                device_remove_file(&dev->dev, &dev_attr_eject);

        if (acpi_has_method(dev->handle, "_SUN"))
                device_remove_file(&dev->dev, &dev_attr_sun);

        if (acpi_has_method(dev->handle, "_HRV"))
                device_remove_file(&dev->dev, &dev_attr_hrv);

        if (acpi_device_uid(dev))
                device_remove_file(&dev->dev, &dev_attr_uid);
        if (dev->pnp.type.bus_address)
                device_remove_file(&dev->dev, &dev_attr_adr);
        device_remove_file(&dev->dev, &dev_attr_modalias);
        device_remove_file(&dev->dev, &dev_attr_hid);
        if (acpi_has_method(dev->handle, "_STA"))
                device_remove_file(&dev->dev, &dev_attr_status);
        if (dev->handle)
                device_remove_file(&dev->dev, &dev_attr_path);
}

































































































































































































































































































































































































































































































































































































































































































    2 





    2 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 




    2 
    2 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 



























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
 * Author: Joerg Roedel <jroedel@suse.de>
 */

#define pr_fmt(fmt)    "iommu: " fmt

#include <linux/amba/bus.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/bits.h>
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/bitops.h>
#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/fsl/mc.h>
#include <linux/module.h>
#include <linux/cc_platform.h>
#include <linux/cdx/cdx_bus.h>
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
#include <linux/msi.h>

#include "dma-iommu.h"
#include "iommu-priv.h"

static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static DEFINE_IDA(iommu_global_pasid_ida);

static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;

struct iommu_group {
        struct kobject kobj;
        struct kobject *devices_kobj;
        struct list_head devices;
        struct xarray pasid_array;
        struct mutex mutex;
        void *iommu_data;
        void (*iommu_data_release)(void *iommu_data);
        char *name;
        int id;
        struct iommu_domain *default_domain;
        struct iommu_domain *blocking_domain;
        struct iommu_domain *domain;
        struct list_head entry;
        unsigned int owner_cnt;
        void *owner;
};

struct group_device {
        struct list_head list;
        struct device *dev;
        char *name;
};

/* Iterate over each struct group_device in a struct iommu_group */
#define for_each_group_device(group, pos) \
        list_for_each_entry(pos, &(group)->devices, list)

struct iommu_group_attribute {
        struct attribute attr;
        ssize_t (*show)(struct iommu_group *group, char *buf);
        ssize_t (*store)(struct iommu_group *group,
                         const char *buf, size_t count);
};

static const char * const iommu_group_resv_type_string[] = {
        [IOMMU_RESV_DIRECT]                        = "direct",
        [IOMMU_RESV_DIRECT_RELAXABLE]                = "direct-relaxable",
        [IOMMU_RESV_RESERVED]                        = "reserved",
        [IOMMU_RESV_MSI]                        = "msi",
        [IOMMU_RESV_SW_MSI]                        = "msi",
};

#define IOMMU_CMD_LINE_DMA_API                BIT(0)
#define IOMMU_CMD_LINE_STRICT                BIT(1)

static int iommu_bus_notifier(struct notifier_block *nb,
                              unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
static struct iommu_domain *
__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
static int __iommu_attach_device(struct iommu_domain *domain,
                                 struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
                                struct iommu_group *group);

enum {
        IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
};

static int __iommu_device_set_domain(struct iommu_group *group,
                                     struct device *dev,
                                     struct iommu_domain *new_domain,
                                     unsigned int flags);
static int __iommu_group_set_domain_internal(struct iommu_group *group,
                                             struct iommu_domain *new_domain,
                                             unsigned int flags);
static int __iommu_group_set_domain(struct iommu_group *group,
                                    struct iommu_domain *new_domain)
{
        return __iommu_group_set_domain_internal(group, new_domain, 0);
}
static void __iommu_group_set_domain_nofail(struct iommu_group *group,
                                            struct iommu_domain *new_domain)
{
        WARN_ON(__iommu_group_set_domain_internal(
                group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
}

static int iommu_setup_default_domain(struct iommu_group *group,
                                      int target_type);
static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
                                               struct device *dev);
static ssize_t iommu_group_store_type(struct iommu_group *group,
                                      const char *buf, size_t count);
static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
                                                     struct device *dev);
static void __iommu_group_free_device(struct iommu_group *group,
                                      struct group_device *grp_dev);

#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)                \
struct iommu_group_attribute iommu_group_attr_##_name =                \
        __ATTR(_name, _mode, _show, _store)

#define to_iommu_group_attr(_attr)        \
        container_of(_attr, struct iommu_group_attribute, attr)
#define to_iommu_group(_kobj)                \
        container_of(_kobj, struct iommu_group, kobj)

static LIST_HEAD(iommu_device_list);
static DEFINE_SPINLOCK(iommu_device_lock);

static const struct bus_type * const iommu_buses[] = {
        &platform_bus_type,
#ifdef CONFIG_PCI
        &pci_bus_type,
#endif
#ifdef CONFIG_ARM_AMBA
        &amba_bustype,
#endif
#ifdef CONFIG_FSL_MC_BUS
        &fsl_mc_bus_type,
#endif
#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
        &host1x_context_device_bus_type,
#endif
#ifdef CONFIG_CDX_BUS
        &cdx_bus_type,
#endif
};

/*
 * Use a function instead of an array here because the domain-type is a
 * bit-field, so an array would waste memory.
 */
static const char *iommu_domain_type_str(unsigned int t)
{
        switch (t) {
        case IOMMU_DOMAIN_BLOCKED:
                return "Blocked";
        case IOMMU_DOMAIN_IDENTITY:
                return "Passthrough";
        case IOMMU_DOMAIN_UNMANAGED:
                return "Unmanaged";
        case IOMMU_DOMAIN_DMA:
        case IOMMU_DOMAIN_DMA_FQ:
                return "Translated";
        case IOMMU_DOMAIN_PLATFORM:
                return "Platform";
        default:
                return "Unknown";
        }
}

static int __init iommu_subsys_init(void)
{
        struct notifier_block *nb;

        if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
                if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
                        iommu_set_default_passthrough(false);
                else
                        iommu_set_default_translated(false);

                if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
                        pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
                        iommu_set_default_translated(false);
                }
        }

        if (!iommu_default_passthrough() && !iommu_dma_strict)
                iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;

        pr_info("Default domain type: %s%s\n",
                iommu_domain_type_str(iommu_def_domain_type),
                (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
                        " (set via kernel command line)" : "");

        if (!iommu_default_passthrough())
                pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
                        iommu_dma_strict ? "strict" : "lazy",
                        (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
                                " (set via kernel command line)" : "");

        nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
        if (!nb)
                return -ENOMEM;

        for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
                nb[i].notifier_call = iommu_bus_notifier;
                bus_register_notifier(iommu_buses[i], &nb[i]);
        }

        return 0;
}
subsys_initcall(iommu_subsys_init);

static int remove_iommu_group(struct device *dev, void *data)
{
        if (dev->iommu && dev->iommu->iommu_dev == data)
                iommu_release_device(dev);

        return 0;
}

/**
 * iommu_device_register() - Register an IOMMU hardware instance
 * @iommu: IOMMU handle for the instance
 * @ops:   IOMMU ops to associate with the instance
 * @hwdev: (optional) actual instance device, used for fwnode lookup
 *
 * Return: 0 on success, or an error.
 */
int iommu_device_register(struct iommu_device *iommu,
                          const struct iommu_ops *ops, struct device *hwdev)
{
        int err = 0;

        /* We need to be able to take module references appropriately */
        if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
                return -EINVAL;

        iommu->ops = ops;
        if (hwdev)
                iommu->fwnode = dev_fwnode(hwdev);

        spin_lock(&iommu_device_lock);
        list_add_tail(&iommu->list, &iommu_device_list);
        spin_unlock(&iommu_device_lock);

        for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++)
                err = bus_iommu_probe(iommu_buses[i]);
        if (err)
                iommu_device_unregister(iommu);
        return err;
}
EXPORT_SYMBOL_GPL(iommu_device_register);

void iommu_device_unregister(struct iommu_device *iommu)
{
        for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
                bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);

        spin_lock(&iommu_device_lock);
        list_del(&iommu->list);
        spin_unlock(&iommu_device_lock);

        /* Pairs with the alloc in generic_single_device_group() */
        iommu_group_put(iommu->singleton_group);
        iommu->singleton_group = NULL;
}
EXPORT_SYMBOL_GPL(iommu_device_unregister);

#if IS_ENABLED(CONFIG_IOMMUFD_TEST)
void iommu_device_unregister_bus(struct iommu_device *iommu,
                                 const struct bus_type *bus,
                                 struct notifier_block *nb)
{
        bus_unregister_notifier(bus, nb);
        iommu_device_unregister(iommu);
}
EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);

/*
 * Register an iommu driver against a single bus. This is only used by iommufd
 * selftest to create a mock iommu driver. The caller must provide
 * some memory to hold a notifier_block.
 */
int iommu_device_register_bus(struct iommu_device *iommu,
                              const struct iommu_ops *ops,
                              const struct bus_type *bus,
                              struct notifier_block *nb)
{
        int err;

        iommu->ops = ops;
        nb->notifier_call = iommu_bus_notifier;
        err = bus_register_notifier(bus, nb);
        if (err)
                return err;

        spin_lock(&iommu_device_lock);
        list_add_tail(&iommu->list, &iommu_device_list);
        spin_unlock(&iommu_device_lock);

        err = bus_iommu_probe(bus);
        if (err) {
                iommu_device_unregister_bus(iommu, bus, nb);
                return err;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(iommu_device_register_bus);
#endif

static struct dev_iommu *dev_iommu_get(struct device *dev)
{
        struct dev_iommu *param = dev->iommu;

        lockdep_assert_held(&iommu_probe_device_lock);

        if (param)
                return param;

        param = kzalloc(sizeof(*param), GFP_KERNEL);
        if (!param)
                return NULL;

        mutex_init(&param->lock);
        dev->iommu = param;
        return param;
}

static void dev_iommu_free(struct device *dev)
{
        struct dev_iommu *param = dev->iommu;

        dev->iommu = NULL;
        if (param->fwspec) {
                fwnode_handle_put(param->fwspec->iommu_fwnode);
                kfree(param->fwspec);
        }
        kfree(param);
}

/*
 * Internal equivalent of device_iommu_mapped() for when we care that a device
 * actually has API ops, and don't want false positives from VFIO-only groups.
 */
static bool dev_has_iommu(struct device *dev)
{
        return dev->iommu && dev->iommu->iommu_dev;
}

static u32 dev_iommu_get_max_pasids(struct device *dev)
{
        u32 max_pasids = 0, bits = 0;
        int ret;

        if (dev_is_pci(dev)) {
                ret = pci_max_pasids(to_pci_dev(dev));
                if (ret > 0)
                        max_pasids = ret;
        } else {
                ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
                if (!ret)
                        max_pasids = 1UL << bits;
        }

        return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
}

void dev_iommu_priv_set(struct device *dev, void *priv)
{
        /* FSL_PAMU does something weird */
        if (!IS_ENABLED(CONFIG_FSL_PAMU))
                lockdep_assert_held(&iommu_probe_device_lock);
        dev->iommu->priv = priv;
}
EXPORT_SYMBOL_GPL(dev_iommu_priv_set);

/*
 * Init the dev->iommu and dev->iommu_group in the struct device and get the
 * driver probed
 */
static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
{
        struct iommu_device *iommu_dev;
        struct iommu_group *group;
        int ret;

        if (!dev_iommu_get(dev))
                return -ENOMEM;

        if (!try_module_get(ops->owner)) {
                ret = -EINVAL;
                goto err_free;
        }

        iommu_dev = ops->probe_device(dev);
        if (IS_ERR(iommu_dev)) {
                ret = PTR_ERR(iommu_dev);
                goto err_module_put;
        }
        dev->iommu->iommu_dev = iommu_dev;

        ret = iommu_device_link(iommu_dev, dev);
        if (ret)
                goto err_release;

        group = ops->device_group(dev);
        if (WARN_ON_ONCE(group == NULL))
                group = ERR_PTR(-EINVAL);
        if (IS_ERR(group)) {
                ret = PTR_ERR(group);
                goto err_unlink;
        }
        dev->iommu_group = group;

        dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
        if (ops->is_attach_deferred)
                dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
        return 0;

err_unlink:
        iommu_device_unlink(iommu_dev, dev);
err_release:
        if (ops->release_device)
                ops->release_device(dev);
err_module_put:
        module_put(ops->owner);
err_free:
        dev->iommu->iommu_dev = NULL;
        dev_iommu_free(dev);
        return ret;
}

static void iommu_deinit_device(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;
        const struct iommu_ops *ops = dev_iommu_ops(dev);

        lockdep_assert_held(&group->mutex);

        iommu_device_unlink(dev->iommu->iommu_dev, dev);

        /*
         * release_device() must stop using any attached domain on the device.
         * If there are still other devices in the group, they are not affected
         * by this callback.
         *
         * If the iommu driver provides release_domain, the core code ensures
         * that domain is attached prior to calling release_device. Drivers can
         * use this to enforce a translation on the idle iommu. Typically, the
         * global static blocked_domain is a good choice.
         *
         * Otherwise, the iommu driver must set the device to either an identity
         * or a blocking translation in release_device() and stop using any
         * domain pointer, as it is going to be freed.
         *
         * Regardless, if a delayed attach never occurred, then the release
         * should still avoid touching any hardware configuration either.
         */
        if (!dev->iommu->attach_deferred && ops->release_domain)
                ops->release_domain->ops->attach_dev(ops->release_domain, dev);

        if (ops->release_device)
                ops->release_device(dev);

        /*
         * If this is the last driver to use the group then we must free the
         * domains before we do the module_put().
         */
        if (list_empty(&group->devices)) {
                if (group->default_domain) {
                        iommu_domain_free(group->default_domain);
                        group->default_domain = NULL;
                }
                if (group->blocking_domain) {
                        iommu_domain_free(group->blocking_domain);
                        group->blocking_domain = NULL;
                }
                group->domain = NULL;
        }

        /* Caller must put iommu_group */
        dev->iommu_group = NULL;
        module_put(ops->owner);
        dev_iommu_free(dev);
}

DEFINE_MUTEX(iommu_probe_device_lock);

static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
        const struct iommu_ops *ops;
        struct iommu_fwspec *fwspec;
        struct iommu_group *group;
        struct group_device *gdev;
        int ret;

        /*
         * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
         * instances with non-NULL fwnodes, and client devices should have been
         * identified with a fwspec by this point. Otherwise, we can currently
         * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
         * be present, and that any of their registered instances has suitable
         * ops for probing, and thus cheekily co-opt the same mechanism.
         */
        fwspec = dev_iommu_fwspec_get(dev);
        if (fwspec && fwspec->ops)
                ops = fwspec->ops;
        else
                ops = iommu_ops_from_fwnode(NULL);

        if (!ops)
                return -ENODEV;
        /*
         * Serialise to avoid races between IOMMU drivers registering in
         * parallel and/or the "replay" calls from ACPI/OF code via client
         * driver probe. Once the latter have been cleaned up we should
         * probably be able to use device_lock() here to minimise the scope,
         * but for now enforcing a simple global ordering is fine.
         */
        lockdep_assert_held(&iommu_probe_device_lock);

        /* Device is probed already if in a group */
        if (dev->iommu_group)
                return 0;

        ret = iommu_init_device(dev, ops);
        if (ret)
                return ret;

        group = dev->iommu_group;
        gdev = iommu_group_alloc_device(group, dev);
        mutex_lock(&group->mutex);
        if (IS_ERR(gdev)) {
                ret = PTR_ERR(gdev);
                goto err_put_group;
        }

        /*
         * The gdev must be in the list before calling
         * iommu_setup_default_domain()
         */
        list_add_tail(&gdev->list, &group->devices);
        WARN_ON(group->default_domain && !group->domain);
        if (group->default_domain)
                iommu_create_device_direct_mappings(group->default_domain, dev);
        if (group->domain) {
                ret = __iommu_device_set_domain(group, dev, group->domain, 0);
                if (ret)
                        goto err_remove_gdev;
        } else if (!group->default_domain && !group_list) {
                ret = iommu_setup_default_domain(group, 0);
                if (ret)
                        goto err_remove_gdev;
        } else if (!group->default_domain) {
                /*
                 * With a group_list argument we defer the default_domain setup
                 * to the caller by providing a de-duplicated list of groups
                 * that need further setup.
                 */
                if (list_empty(&group->entry))
                        list_add_tail(&group->entry, group_list);
        }
        mutex_unlock(&group->mutex);

        if (dev_is_pci(dev))
                iommu_dma_set_pci_32bit_workaround(dev);

        return 0;

err_remove_gdev:
        list_del(&gdev->list);
        __iommu_group_free_device(group, gdev);
err_put_group:
        iommu_deinit_device(dev);
        mutex_unlock(&group->mutex);
        iommu_group_put(group);

        return ret;
}

int iommu_probe_device(struct device *dev)
{
        const struct iommu_ops *ops;
        int ret;

        mutex_lock(&iommu_probe_device_lock);
        ret = __iommu_probe_device(dev, NULL);
        mutex_unlock(&iommu_probe_device_lock);
        if (ret)
                return ret;

        ops = dev_iommu_ops(dev);
        if (ops->probe_finalize)
                ops->probe_finalize(dev);

        return 0;
}

static void __iommu_group_free_device(struct iommu_group *group,
                                      struct group_device *grp_dev)
{
        struct device *dev = grp_dev->dev;

        sysfs_remove_link(group->devices_kobj, grp_dev->name);
        sysfs_remove_link(&dev->kobj, "iommu_group");

        trace_remove_device_from_group(group->id, dev);

        /*
         * If the group has become empty then ownership must have been
         * released, and the current domain must be set back to NULL or
         * the default domain.
         */
        if (list_empty(&group->devices))
                WARN_ON(group->owner_cnt ||
                        group->domain != group->default_domain);

        kfree(grp_dev->name);
        kfree(grp_dev);
}

/* Remove the iommu_group from the struct device. */
static void __iommu_group_remove_device(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;
        struct group_device *device;

        mutex_lock(&group->mutex);
        for_each_group_device(group, device) {
                if (device->dev != dev)
                        continue;

                list_del(&device->list);
                __iommu_group_free_device(group, device);
                if (dev_has_iommu(dev))
                        iommu_deinit_device(dev);
                else
                        dev->iommu_group = NULL;
                break;
        }
        mutex_unlock(&group->mutex);

        /*
         * Pairs with the get in iommu_init_device() or
         * iommu_group_add_device()
         */
        iommu_group_put(group);
}

static void iommu_release_device(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;

        if (group)
                __iommu_group_remove_device(dev);

        /* Free any fwspec if no iommu_driver was ever attached */
        if (dev->iommu)
                dev_iommu_free(dev);
}

static int __init iommu_set_def_domain_type(char *str)
{
        bool pt;
        int ret;

        ret = kstrtobool(str, &pt);
        if (ret)
                return ret;

        if (pt)
                iommu_set_default_passthrough(true);
        else
                iommu_set_default_translated(true);

        return 0;
}
early_param("iommu.passthrough", iommu_set_def_domain_type);

static int __init iommu_dma_setup(char *str)
{
        int ret = kstrtobool(str, &iommu_dma_strict);

        if (!ret)
                iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
        return ret;
}
early_param("iommu.strict", iommu_dma_setup);

void iommu_set_dma_strict(void)
{
        iommu_dma_strict = true;
        if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
                iommu_def_domain_type = IOMMU_DOMAIN_DMA;
}

static ssize_t iommu_group_attr_show(struct kobject *kobj,
                                     struct attribute *__attr, char *buf)
{
        struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
        struct iommu_group *group = to_iommu_group(kobj);
        ssize_t ret = -EIO;

        if (attr->show)
                ret = attr->show(group, buf);
        return ret;
}

static ssize_t iommu_group_attr_store(struct kobject *kobj,
                                      struct attribute *__attr,
                                      const char *buf, size_t count)
{
        struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
        struct iommu_group *group = to_iommu_group(kobj);
        ssize_t ret = -EIO;

        if (attr->store)
                ret = attr->store(group, buf, count);
        return ret;
}

static const struct sysfs_ops iommu_group_sysfs_ops = {
        .show = iommu_group_attr_show,
        .store = iommu_group_attr_store,
};

static int iommu_group_create_file(struct iommu_group *group,
                                   struct iommu_group_attribute *attr)
{
        return sysfs_create_file(&group->kobj, &attr->attr);
}

static void iommu_group_remove_file(struct iommu_group *group,
                                    struct iommu_group_attribute *attr)
{
        sysfs_remove_file(&group->kobj, &attr->attr);
}

static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
{
        return sysfs_emit(buf, "%s\n", group->name);
}

/**
 * iommu_insert_resv_region - Insert a new region in the
 * list of reserved regions.
 * @new: new region to insert
 * @regions: list of regions
 *
 * Elements are sorted by start address and overlapping segments
 * of the same type are merged.
 */
static int iommu_insert_resv_region(struct iommu_resv_region *new,
                                    struct list_head *regions)
{
        struct iommu_resv_region *iter, *tmp, *nr, *top;
        LIST_HEAD(stack);

        nr = iommu_alloc_resv_region(new->start, new->length,
                                     new->prot, new->type, GFP_KERNEL);
        if (!nr)
                return -ENOMEM;

        /* First add the new element based on start address sorting */
        list_for_each_entry(iter, regions, list) {
                if (nr->start < iter->start ||
                    (nr->start == iter->start && nr->type <= iter->type))
                        break;
        }
        list_add_tail(&nr->list, &iter->list);

        /* Merge overlapping segments of type nr->type in @regions, if any */
        list_for_each_entry_safe(iter, tmp, regions, list) {
                phys_addr_t top_end, iter_end = iter->start + iter->length - 1;

                /* no merge needed on elements of different types than @new */
                if (iter->type != new->type) {
                        list_move_tail(&iter->list, &stack);
                        continue;
                }

                /* look for the last stack element of same type as @iter */
                list_for_each_entry_reverse(top, &stack, list)
                        if (top->type == iter->type)
                                goto check_overlap;

                list_move_tail(&iter->list, &stack);
                continue;

check_overlap:
                top_end = top->start + top->length - 1;

                if (iter->start > top_end + 1) {
                        list_move_tail(&iter->list, &stack);
                } else {
                        top->length = max(top_end, iter_end) - top->start + 1;
                        list_del(&iter->list);
                        kfree(iter);
                }
        }
        list_splice(&stack, regions);
        return 0;
}

static int
iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
                                 struct list_head *group_resv_regions)
{
        struct iommu_resv_region *entry;
        int ret = 0;

        list_for_each_entry(entry, dev_resv_regions, list) {
                ret = iommu_insert_resv_region(entry, group_resv_regions);
                if (ret)
                        break;
        }
        return ret;
}

int iommu_get_group_resv_regions(struct iommu_group *group,
                                 struct list_head *head)
{
        struct group_device *device;
        int ret = 0;

        mutex_lock(&group->mutex);
        for_each_group_device(group, device) {
                struct list_head dev_resv_regions;

                /*
                 * Non-API groups still expose reserved_regions in sysfs,
                 * so filter out calls that get here that way.
                 */
                if (!dev_has_iommu(device->dev))
                        break;

                INIT_LIST_HEAD(&dev_resv_regions);
                iommu_get_resv_regions(device->dev, &dev_resv_regions);
                ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
                iommu_put_resv_regions(device->dev, &dev_resv_regions);
                if (ret)
                        break;
        }
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);

static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
                                             char *buf)
{
        struct iommu_resv_region *region, *next;
        struct list_head group_resv_regions;
        int offset = 0;

        INIT_LIST_HEAD(&group_resv_regions);
        iommu_get_group_resv_regions(group, &group_resv_regions);

        list_for_each_entry_safe(region, next, &group_resv_regions, list) {
                offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
                                        (long long)region->start,
                                        (long long)(region->start +
                                                    region->length - 1),
                                        iommu_group_resv_type_string[region->type]);
                kfree(region);
        }

        return offset;
}

static ssize_t iommu_group_show_type(struct iommu_group *group,
                                     char *buf)
{
        char *type = "unknown";

        mutex_lock(&group->mutex);
        if (group->default_domain) {
                switch (group->default_domain->type) {
                case IOMMU_DOMAIN_BLOCKED:
                        type = "blocked";
                        break;
                case IOMMU_DOMAIN_IDENTITY:
                        type = "identity";
                        break;
                case IOMMU_DOMAIN_UNMANAGED:
                        type = "unmanaged";
                        break;
                case IOMMU_DOMAIN_DMA:
                        type = "DMA";
                        break;
                case IOMMU_DOMAIN_DMA_FQ:
                        type = "DMA-FQ";
                        break;
                }
        }
        mutex_unlock(&group->mutex);

        return sysfs_emit(buf, "%s\n", type);
}

static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);

static IOMMU_GROUP_ATTR(reserved_regions, 0444,
                        iommu_group_show_resv_regions, NULL);

static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
                        iommu_group_store_type);

static void iommu_group_release(struct kobject *kobj)
{
        struct iommu_group *group = to_iommu_group(kobj);

        pr_debug("Releasing group %d\n", group->id);

        if (group->iommu_data_release)
                group->iommu_data_release(group->iommu_data);

        ida_free(&iommu_group_ida, group->id);

        /* Domains are free'd by iommu_deinit_device() */
        WARN_ON(group->default_domain);
        WARN_ON(group->blocking_domain);

        kfree(group->name);
        kfree(group);
}

static const struct kobj_type iommu_group_ktype = {
        .sysfs_ops = &iommu_group_sysfs_ops,
        .release = iommu_group_release,
};

/**
 * iommu_group_alloc - Allocate a new group
 *
 * This function is called by an iommu driver to allocate a new iommu
 * group.  The iommu group represents the minimum granularity of the iommu.
 * Upon successful return, the caller holds a reference to the supplied
 * group in order to hold the group until devices are added.  Use
 * iommu_group_put() to release this extra reference count, allowing the
 * group to be automatically reclaimed once it has no devices or external
 * references.
 */
struct iommu_group *iommu_group_alloc(void)
{
        struct iommu_group *group;
        int ret;

        group = kzalloc(sizeof(*group), GFP_KERNEL);
        if (!group)
                return ERR_PTR(-ENOMEM);

        group->kobj.kset = iommu_group_kset;
        mutex_init(&group->mutex);
        INIT_LIST_HEAD(&group->devices);
        INIT_LIST_HEAD(&group->entry);
        xa_init(&group->pasid_array);

        ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
        if (ret < 0) {
                kfree(group);
                return ERR_PTR(ret);
        }
        group->id = ret;

        ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
                                   NULL, "%d", group->id);
        if (ret) {
                kobject_put(&group->kobj);
                return ERR_PTR(ret);
        }

        group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
        if (!group->devices_kobj) {
                kobject_put(&group->kobj); /* triggers .release & free */
                return ERR_PTR(-ENOMEM);
        }

        /*
         * The devices_kobj holds a reference on the group kobject, so
         * as long as that exists so will the group.  We can therefore
         * use the devices_kobj for reference counting.
         */
        kobject_put(&group->kobj);

        ret = iommu_group_create_file(group,
                                      &iommu_group_attr_reserved_regions);
        if (ret) {
                kobject_put(group->devices_kobj);
                return ERR_PTR(ret);
        }

        ret = iommu_group_create_file(group, &iommu_group_attr_type);
        if (ret) {
                kobject_put(group->devices_kobj);
                return ERR_PTR(ret);
        }

        pr_debug("Allocated group %d\n", group->id);

        return group;
}
EXPORT_SYMBOL_GPL(iommu_group_alloc);

/**
 * iommu_group_get_iommudata - retrieve iommu_data registered for a group
 * @group: the group
 *
 * iommu drivers can store data in the group for use when doing iommu
 * operations.  This function provides a way to retrieve it.  Caller
 * should hold a group reference.
 */
void *iommu_group_get_iommudata(struct iommu_group *group)
{
        return group->iommu_data;
}
EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);

/**
 * iommu_group_set_iommudata - set iommu_data for a group
 * @group: the group
 * @iommu_data: new data
 * @release: release function for iommu_data
 *
 * iommu drivers can store data in the group for use when doing iommu
 * operations.  This function provides a way to set the data after
 * the group has been allocated.  Caller should hold a group reference.
 */
void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
                               void (*release)(void *iommu_data))
{
        group->iommu_data = iommu_data;
        group->iommu_data_release = release;
}
EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);

/**
 * iommu_group_set_name - set name for a group
 * @group: the group
 * @name: name
 *
 * Allow iommu driver to set a name for a group.  When set it will
 * appear in a name attribute file under the group in sysfs.
 */
int iommu_group_set_name(struct iommu_group *group, const char *name)
{
        int ret;

        if (group->name) {
                iommu_group_remove_file(group, &iommu_group_attr_name);
                kfree(group->name);
                group->name = NULL;
                if (!name)
                        return 0;
        }

        group->name = kstrdup(name, GFP_KERNEL);
        if (!group->name)
                return -ENOMEM;

        ret = iommu_group_create_file(group, &iommu_group_attr_name);
        if (ret) {
                kfree(group->name);
                group->name = NULL;
                return ret;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(iommu_group_set_name);

static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
                                               struct device *dev)
{
        struct iommu_resv_region *entry;
        struct list_head mappings;
        unsigned long pg_size;
        int ret = 0;

        pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0;
        INIT_LIST_HEAD(&mappings);

        if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))
                return -EINVAL;

        iommu_get_resv_regions(dev, &mappings);

        /* We need to consider overlapping regions for different devices */
        list_for_each_entry(entry, &mappings, list) {
                dma_addr_t start, end, addr;
                size_t map_size = 0;

                if (entry->type == IOMMU_RESV_DIRECT)
                        dev->iommu->require_direct = 1;

                if ((entry->type != IOMMU_RESV_DIRECT &&
                     entry->type != IOMMU_RESV_DIRECT_RELAXABLE) ||
                    !iommu_is_dma_domain(domain))
                        continue;

                start = ALIGN(entry->start, pg_size);
                end   = ALIGN(entry->start + entry->length, pg_size);

                for (addr = start; addr <= end; addr += pg_size) {
                        phys_addr_t phys_addr;

                        if (addr == end)
                                goto map_end;

                        phys_addr = iommu_iova_to_phys(domain, addr);
                        if (!phys_addr) {
                                map_size += pg_size;
                                continue;
                        }

map_end:
                        if (map_size) {
                                ret = iommu_map(domain, addr - map_size,
                                                addr - map_size, map_size,
                                                entry->prot, GFP_KERNEL);
                                if (ret)
                                        goto out;
                                map_size = 0;
                        }
                }

        }

        if (!list_empty(&mappings) && iommu_is_dma_domain(domain))
                iommu_flush_iotlb_all(domain);

out:
        iommu_put_resv_regions(dev, &mappings);

        return ret;
}

/* This is undone by __iommu_group_free_device() */
static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
                                                     struct device *dev)
{
        int ret, i = 0;
        struct group_device *device;

        device = kzalloc(sizeof(*device), GFP_KERNEL);
        if (!device)
                return ERR_PTR(-ENOMEM);

        device->dev = dev;

        ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
        if (ret)
                goto err_free_device;

        device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
rename:
        if (!device->name) {
                ret = -ENOMEM;
                goto err_remove_link;
        }

        ret = sysfs_create_link_nowarn(group->devices_kobj,
                                       &dev->kobj, device->name);
        if (ret) {
                if (ret == -EEXIST && i >= 0) {
                        /*
                         * Account for the slim chance of collision
                         * and append an instance to the name.
                         */
                        kfree(device->name);
                        device->name = kasprintf(GFP_KERNEL, "%s.%d",
                                                 kobject_name(&dev->kobj), i++);
                        goto rename;
                }
                goto err_free_name;
        }

        trace_add_device_to_group(group->id, dev);

        dev_info(dev, "Adding to iommu group %d\n", group->id);

        return device;

err_free_name:
        kfree(device->name);
err_remove_link:
        sysfs_remove_link(&dev->kobj, "iommu_group");
err_free_device:
        kfree(device);
        dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
        return ERR_PTR(ret);
}

/**
 * iommu_group_add_device - add a device to an iommu group
 * @group: the group into which to add the device (reference should be held)
 * @dev: the device
 *
 * This function is called by an iommu driver to add a device into a
 * group.  Adding a device increments the group reference count.
 */
int iommu_group_add_device(struct iommu_group *group, struct device *dev)
{
        struct group_device *gdev;

        gdev = iommu_group_alloc_device(group, dev);
        if (IS_ERR(gdev))
                return PTR_ERR(gdev);

        iommu_group_ref_get(group);
        dev->iommu_group = group;

        mutex_lock(&group->mutex);
        list_add_tail(&gdev->list, &group->devices);
        mutex_unlock(&group->mutex);
        return 0;
}
EXPORT_SYMBOL_GPL(iommu_group_add_device);

/**
 * iommu_group_remove_device - remove a device from it's current group
 * @dev: device to be removed
 *
 * This function is called by an iommu driver to remove the device from
 * it's current group.  This decrements the iommu group reference count.
 */
void iommu_group_remove_device(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;

        if (!group)
                return;

        dev_info(dev, "Removing from iommu group %d\n", group->id);

        __iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);

#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
/**
 * iommu_group_mutex_assert - Check device group mutex lock
 * @dev: the device that has group param set
 *
 * This function is called by an iommu driver to check whether it holds
 * group mutex lock for the given device or not.
 *
 * Note that this function must be called after device group param is set.
 */
void iommu_group_mutex_assert(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;

        lockdep_assert_held(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_group_mutex_assert);
#endif

static struct device *iommu_group_first_dev(struct iommu_group *group)
{
        lockdep_assert_held(&group->mutex);
        return list_first_entry(&group->devices, struct group_device, list)->dev;
}

/**
 * iommu_group_for_each_dev - iterate over each device in the group
 * @group: the group
 * @data: caller opaque data to be passed to callback function
 * @fn: caller supplied callback function
 *
 * This function is called by group users to iterate over group devices.
 * Callers should hold a reference count to the group during callback.
 * The group->mutex is held across callbacks, which will block calls to
 * iommu_group_add/remove_device.
 */
int iommu_group_for_each_dev(struct iommu_group *group, void *data,
                             int (*fn)(struct device *, void *))
{
        struct group_device *device;
        int ret = 0;

        mutex_lock(&group->mutex);
        for_each_group_device(group, device) {
                ret = fn(device->dev, data);
                if (ret)
                        break;
        }
        mutex_unlock(&group->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);

/**
 * iommu_group_get - Return the group for a device and increment reference
 * @dev: get the group that this device belongs to
 *
 * This function is called by iommu drivers and users to get the group
 * for the specified device.  If found, the group is returned and the group
 * reference in incremented, else NULL.
 */
struct iommu_group *iommu_group_get(struct device *dev)
{
        struct iommu_group *group = dev->iommu_group;

        if (group)
                kobject_get(group->devices_kobj);

        return group;
}
EXPORT_SYMBOL_GPL(iommu_group_get);

/**
 * iommu_group_ref_get - Increment reference on a group
 * @group: the group to use, must not be NULL
 *
 * This function is called by iommu drivers to take additional references on an
 * existing group.  Returns the given group for convenience.
 */
struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
{
        kobject_get(group->devices_kobj);
        return group;
}
EXPORT_SYMBOL_GPL(iommu_group_ref_get);

/**
 * iommu_group_put - Decrement group reference
 * @group: the group to use
 *
 * This function is called by iommu drivers and users to release the
 * iommu group.  Once the reference count is zero, the group is released.
 */
void iommu_group_put(struct iommu_group *group)
{
        if (group)
                kobject_put(group->devices_kobj);
}
EXPORT_SYMBOL_GPL(iommu_group_put);

/**
 * iommu_group_id - Return ID for a group
 * @group: the group to ID
 *
 * Return the unique ID for the group matching the sysfs group number.
 */
int iommu_group_id(struct iommu_group *group)
{
        return group->id;
}
EXPORT_SYMBOL_GPL(iommu_group_id);

static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
                                               unsigned long *devfns);

/*
 * To consider a PCI device isolated, we require ACS to support Source
 * Validation, Request Redirection, Completer Redirection, and Upstream
 * Forwarding.  This effectively means that devices cannot spoof their
 * requester ID, requests and completions cannot be redirected, and all
 * transactions are forwarded upstream, even as it passes through a
 * bridge where the target device is downstream.
 */
#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)

/*
 * For multifunction devices which are not isolated from each other, find
 * all the other non-isolated functions and look for existing groups.  For
 * each function, we also need to look for aliases to or from other devices
 * that may already have a group.
 */
static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
                                                        unsigned long *devfns)
{
        struct pci_dev *tmp = NULL;
        struct iommu_group *group;

        if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
                return NULL;

        for_each_pci_dev(tmp) {
                if (tmp == pdev || tmp->bus != pdev->bus ||
                    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
                    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
                        continue;

                group = get_pci_alias_group(tmp, devfns);
                if (group) {
                        pci_dev_put(tmp);
                        return group;
                }
        }

        return NULL;
}

/*
 * Look for aliases to or from the given device for existing groups. DMA
 * aliases are only supported on the same bus, therefore the search
 * space is quite small (especially since we're really only looking at pcie
 * device, and therefore only expect multiple slots on the root complex or
 * downstream switch ports).  It's conceivable though that a pair of
 * multifunction devices could have aliases between them that would cause a
 * loop.  To prevent this, we use a bitmap to track where we've been.
 */
static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
                                               unsigned long *devfns)
{
        struct pci_dev *tmp = NULL;
        struct iommu_group *group;

        if (test_and_set_bit(pdev->devfn & 0xff, devfns))
                return NULL;

        group = iommu_group_get(&pdev->dev);
        if (group)
                return group;

        for_each_pci_dev(tmp) {
                if (tmp == pdev || tmp->bus != pdev->bus)
                        continue;

                /* We alias them or they alias us */
                if (pci_devs_are_dma_aliases(pdev, tmp)) {
                        group = get_pci_alias_group(tmp, devfns);
                        if (group) {
                                pci_dev_put(tmp);
                                return group;
                        }

                        group = get_pci_function_alias_group(tmp, devfns);
                        if (group) {
                                pci_dev_put(tmp);
                                return group;
                        }
                }
        }

        return NULL;
}

struct group_for_pci_data {
        struct pci_dev *pdev;
        struct iommu_group *group;
};

/*
 * DMA alias iterator callback, return the last seen device.  Stop and return
 * the IOMMU group if we find one along the way.
 */
static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
{
        struct group_for_pci_data *data = opaque;

        data->pdev = pdev;
        data->group = iommu_group_get(&pdev->dev);

        return data->group != NULL;
}

/*
 * Generic device_group call-back function. It just allocates one
 * iommu-group per device.
 */
struct iommu_group *generic_device_group(struct device *dev)
{
        return iommu_group_alloc();
}
EXPORT_SYMBOL_GPL(generic_device_group);

/*
 * Generic device_group call-back function. It just allocates one
 * iommu-group per iommu driver instance shared by every device
 * probed by that iommu driver.
 */
struct iommu_group *generic_single_device_group(struct device *dev)
{
        struct iommu_device *iommu = dev->iommu->iommu_dev;

        if (!iommu->singleton_group) {
                struct iommu_group *group;

                group = iommu_group_alloc();
                if (IS_ERR(group))
                        return group;
                iommu->singleton_group = group;
        }
        return iommu_group_ref_get(iommu->singleton_group);
}
EXPORT_SYMBOL_GPL(generic_single_device_group);

/*
 * Use standard PCI bus topology, isolation features, and DMA alias quirks
 * to find or create an IOMMU group for a device.
 */
struct iommu_group *pci_device_group(struct device *dev)
{
        struct pci_dev *pdev = to_pci_dev(dev);
        struct group_for_pci_data data;
        struct pci_bus *bus;
        struct iommu_group *group = NULL;
        u64 devfns[4] = { 0 };

        if (WARN_ON(!dev_is_pci(dev)))
                return ERR_PTR(-EINVAL);

        /*
         * Find the upstream DMA alias for the device.  A device must not
         * be aliased due to topology in order to have its own IOMMU group.
         * If we find an alias along the way that already belongs to a
         * group, use it.
         */
        if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
                return data.group;

        pdev = data.pdev;

        /*
         * Continue upstream from the point of minimum IOMMU granularity
         * due to aliases to the point where devices are protected from
         * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
         * group, use it.
         */
        for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
                if (!bus->self)
                        continue;

                if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
                        break;

                pdev = bus->self;

                group = iommu_group_get(&pdev->dev);
                if (group)
                        return group;
        }

        /*
         * Look for existing groups on device aliases.  If we alias another
         * device or another device aliases us, use the same group.
         */
        group = get_pci_alias_group(pdev, (unsigned long *)devfns);
        if (group)
                return group;

        /*
         * Look for existing groups on non-isolated functions on the same
         * slot and aliases of those funcions, if any.  No need to clear
         * the search bitmap, the tested devfns are still valid.
         */
        group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
        if (group)
                return group;

        /* No shared group found, allocate new */
        return iommu_group_alloc();
}
EXPORT_SYMBOL_GPL(pci_device_group);

/* Get the IOMMU group for device on fsl-mc bus */
struct iommu_group *fsl_mc_device_group(struct device *dev)
{
        struct device *cont_dev = fsl_mc_cont_dev(dev);
        struct iommu_group *group;

        group = iommu_group_get(cont_dev);
        if (!group)
                group = iommu_group_alloc();
        return group;
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);

static struct iommu_domain *
__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
        if (group->default_domain && group->default_domain->type == req_type)
                return group->default_domain;
        return __iommu_group_domain_alloc(group, req_type);
}

/*
 * req_type of 0 means "auto" which means to select a domain based on
 * iommu_def_domain_type or what the driver actually supports.
 */
static struct iommu_domain *
iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
        const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group));
        struct iommu_domain *dom;

        lockdep_assert_held(&group->mutex);

        /*
         * Allow legacy drivers to specify the domain that will be the default
         * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
         * domain. Do not use in new drivers.
         */
        if (ops->default_domain) {
                if (req_type != ops->default_domain->type)
                        return ERR_PTR(-EINVAL);
                return ops->default_domain;
        }

        if (req_type)
                return __iommu_group_alloc_default_domain(group, req_type);

        /* The driver gave no guidance on what type to use, try the default */
        dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
        if (!IS_ERR(dom))
                return dom;

        /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
        if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
                return ERR_PTR(-EINVAL);
        dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
        if (IS_ERR(dom))
                return dom;

        pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
                iommu_def_domain_type, group->name);
        return dom;
}

struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
{
        return group->default_domain;
}

static int probe_iommu_group(struct device *dev, void *data)
{
        struct list_head *group_list = data;
        int ret;

        mutex_lock(&iommu_probe_device_lock);
        ret = __iommu_probe_device(dev, group_list);
        mutex_unlock(&iommu_probe_device_lock);
        if (ret == -ENODEV)
                ret = 0;

        return ret;
}

static int iommu_bus_notifier(struct notifier_block *nb,
                              unsigned long action, void *data)
{
        struct device *dev = data;

        if (action == BUS_NOTIFY_ADD_DEVICE) {
                int ret;

                ret = iommu_probe_device(dev);
                return (ret) ? NOTIFY_DONE : NOTIFY_OK;
        } else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
                iommu_release_device(dev);
                return NOTIFY_OK;
        }

        return 0;
}

/*
 * Combine the driver's chosen def_domain_type across all the devices in a
 * group. Drivers must give a consistent result.
 */
static int iommu_get_def_domain_type(struct iommu_group *group,
                                     struct device *dev, int cur_type)
{
        const struct iommu_ops *ops = dev_iommu_ops(dev);
        int type;

        if (ops->default_domain) {
                /*
                 * Drivers that declare a global static default_domain will
                 * always choose that.
                 */
                type = ops->default_domain->type;
        } else {
                if (ops->def_domain_type)
                        type = ops->def_domain_type(dev);
                else
                        return cur_type;
        }
        if (!type || cur_type == type)
                return cur_type;
        if (!cur_type)
                return type;

        dev_err_ratelimited(
                dev,
                "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
                iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
                group->id);

        /*
         * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
         * takes precedence.
         */
        if (type == IOMMU_DOMAIN_IDENTITY)
                return type;
        return cur_type;
}

/*
 * A target_type of 0 will select the best domain type. 0 can be returned in
 * this case meaning the global default should be used.
 */
static int iommu_get_default_domain_type(struct iommu_group *group,
                                         int target_type)
{
        struct device *untrusted = NULL;
        struct group_device *gdev;
        int driver_type = 0;

        lockdep_assert_held(&group->mutex);

        /*
         * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
         * identity_domain and it will automatically become their default
         * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
         * Override the selection to IDENTITY.
         */
        if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
                static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
                                IS_ENABLED(CONFIG_IOMMU_DMA)));
                driver_type = IOMMU_DOMAIN_IDENTITY;
        }

        for_each_group_device(group, gdev) {
                driver_type = iommu_get_def_domain_type(group, gdev->dev,
                                                        driver_type);

                if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
                        /*
                         * No ARM32 using systems will set untrusted, it cannot
                         * work.
                         */
                        if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
                                return -1;
                        untrusted = gdev->dev;
                }
        }

        /*
         * If the common dma ops are not selected in kconfig then we cannot use
         * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
         * selected.
         */
        if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
                if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
                        return -1;
                if (!driver_type)
                        driver_type = IOMMU_DOMAIN_IDENTITY;
        }

        if (untrusted) {
                if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
                        dev_err_ratelimited(
                                untrusted,
                                "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
                                group->id, iommu_domain_type_str(driver_type));
                        return -1;
                }
                driver_type = IOMMU_DOMAIN_DMA;
        }

        if (target_type) {
                if (driver_type && target_type != driver_type)
                        return -1;
                return target_type;
        }
        return driver_type;
}

static void iommu_group_do_probe_finalize(struct device *dev)
{
        const struct iommu_ops *ops = dev_iommu_ops(dev);

        if (ops->probe_finalize)
                ops->probe_finalize(dev);
}

int bus_iommu_probe(const struct bus_type *bus)
{
        struct iommu_group *group, *next;
        LIST_HEAD(group_list);
        int ret;

        ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
        if (ret)
                return ret;

        list_for_each_entry_safe(group, next, &group_list, entry) {
                struct group_device *gdev;

                mutex_lock(&group->mutex);

                /* Remove item from the list */
                list_del_init(&group->entry);

                /*
                 * We go to the trouble of deferred default domain creation so
                 * that the cross-group default domain type and the setup of the
                 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios.
                 */
                ret = iommu_setup_default_domain(group, 0);
                if (ret) {
                        mutex_unlock(&group->mutex);
                        return ret;
                }
                mutex_unlock(&group->mutex);

                /*
                 * FIXME: Mis-locked because the ops->probe_finalize() call-back
                 * of some IOMMU drivers calls arm_iommu_attach_device() which
                 * in-turn might call back into IOMMU core code, where it tries
                 * to take group->mutex, resulting in a deadlock.
                 */
                for_each_group_device(group, gdev)
                        iommu_group_do_probe_finalize(gdev->dev);
        }

        return 0;
}

/**
 * iommu_present() - make platform-specific assumptions about an IOMMU
 * @bus: bus to check
 *
 * Do not use this function. You want device_iommu_mapped() instead.
 *
 * Return: true if some IOMMU is present and aware of devices on the given bus;
 * in general it may not be the only IOMMU, and it may not have anything to do
 * with whatever device you are ultimately interested in.
 */
bool iommu_present(const struct bus_type *bus)
{
        bool ret = false;

        for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
                if (iommu_buses[i] == bus) {
                        spin_lock(&iommu_device_lock);
                        ret = !list_empty(&iommu_device_list);
                        spin_unlock(&iommu_device_lock);
                }
        }
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_present);

/**
 * device_iommu_capable() - check for a general IOMMU capability
 * @dev: device to which the capability would be relevant, if available
 * @cap: IOMMU capability
 *
 * Return: true if an IOMMU is present and supports the given capability
 * for the given device, otherwise false.
 */
bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
{
        const struct iommu_ops *ops;

        if (!dev_has_iommu(dev))
                return false;

        ops = dev_iommu_ops(dev);
        if (!ops->capable)
                return false;

        return ops->capable(dev, cap);
}
EXPORT_SYMBOL_GPL(device_iommu_capable);

/**
 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
 *       for a group
 * @group: Group to query
 *
 * IOMMU groups should not have differing values of
 * msi_device_has_isolated_msi() for devices in a group. However nothing
 * directly prevents this, so ensure mistakes don't result in isolation failures
 * by checking that all the devices are the same.
 */
bool iommu_group_has_isolated_msi(struct iommu_group *group)
{
        struct group_device *group_dev;
        bool ret = true;

        mutex_lock(&group->mutex);
        for_each_group_device(group, group_dev)
                ret &= msi_device_has_isolated_msi(group_dev->dev);
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);

/**
 * iommu_set_fault_handler() - set a fault handler for an iommu domain
 * @domain: iommu domain
 * @handler: fault handler
 * @token: user data, will be passed back to the fault handler
 *
 * This function should be used by IOMMU users which want to be notified
 * whenever an IOMMU fault happens.
 *
 * The fault handler itself should return 0 on success, and an appropriate
 * error code otherwise.
 */
void iommu_set_fault_handler(struct iommu_domain *domain,
                                        iommu_fault_handler_t handler,
                                        void *token)
{
        BUG_ON(!domain);

        domain->handler = handler;
        domain->handler_token = token;
}
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);

static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
                                                 struct device *dev,
                                                 unsigned int type)
{
        struct iommu_domain *domain;
        unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;

        if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
                return ops->identity_domain;
        else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
                return ops->blocked_domain;
        else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
                domain = ops->domain_alloc_paging(dev);
        else if (ops->domain_alloc)
                domain = ops->domain_alloc(alloc_type);
        else
                return ERR_PTR(-EOPNOTSUPP);

        /*
         * Many domain_alloc ops now return ERR_PTR, make things easier for the
         * driver by accepting ERR_PTR from all domain_alloc ops instead of
         * having two rules.
         */
        if (IS_ERR(domain))
                return domain;
        if (!domain)
                return ERR_PTR(-ENOMEM);

        domain->type = type;
        domain->owner = ops;
        /*
         * If not already set, assume all sizes by default; the driver
         * may override this later
         */
        if (!domain->pgsize_bitmap)
                domain->pgsize_bitmap = ops->pgsize_bitmap;

        if (!domain->ops)
                domain->ops = ops->default_domain_ops;

        if (iommu_is_dma_domain(domain)) {
                int rc;

                rc = iommu_get_dma_cookie(domain);
                if (rc) {
                        iommu_domain_free(domain);
                        return ERR_PTR(rc);
                }
        }
        return domain;
}

static struct iommu_domain *
__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
{
        struct device *dev = iommu_group_first_dev(group);

        return __iommu_domain_alloc(dev_iommu_ops(dev), dev, type);
}

static int __iommu_domain_alloc_dev(struct device *dev, void *data)
{
        const struct iommu_ops **ops = data;

        if (!dev_has_iommu(dev))
                return 0;

        if (WARN_ONCE(*ops && *ops != dev_iommu_ops(dev),
                      "Multiple IOMMU drivers present for bus %s, which the public IOMMU API can't fully support yet. You will still need to disable one or more for this to work, sorry!\n",
                      dev_bus_name(dev)))
                return -EBUSY;

        *ops = dev_iommu_ops(dev);
        return 0;
}

struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
{
        const struct iommu_ops *ops = NULL;
        int err = bus_for_each_dev(bus, NULL, &ops, __iommu_domain_alloc_dev);
        struct iommu_domain *domain;

        if (err || !ops)
                return NULL;

        domain = __iommu_domain_alloc(ops, NULL, IOMMU_DOMAIN_UNMANAGED);
        if (IS_ERR(domain))
                return NULL;
        return domain;
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);

void iommu_domain_free(struct iommu_domain *domain)
{
        if (domain->type == IOMMU_DOMAIN_SVA)
                mmdrop(domain->mm);
        iommu_put_dma_cookie(domain);
        if (domain->ops->free)
                domain->ops->free(domain);
}
EXPORT_SYMBOL_GPL(iommu_domain_free);

/*
 * Put the group's domain back to the appropriate core-owned domain - either the
 * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
 */
static void __iommu_group_set_core_domain(struct iommu_group *group)
{
        struct iommu_domain *new_domain;

        if (group->owner)
                new_domain = group->blocking_domain;
        else
                new_domain = group->default_domain;

        __iommu_group_set_domain_nofail(group, new_domain);
}

static int __iommu_attach_device(struct iommu_domain *domain,
                                 struct device *dev)
{
        int ret;

        if (unlikely(domain->ops->attach_dev == NULL))
                return -ENODEV;

        ret = domain->ops->attach_dev(domain, dev);
        if (ret)
                return ret;
        dev->iommu->attach_deferred = 0;
        trace_attach_device_to_domain(dev);
        return 0;
}

/**
 * iommu_attach_device - Attach an IOMMU domain to a device
 * @domain: IOMMU domain to attach
 * @dev: Device that will be attached
 *
 * Returns 0 on success and error code on failure
 *
 * Note that EINVAL can be treated as a soft failure, indicating
 * that certain configuration of the domain is incompatible with
 * the device. In this case attaching a different domain to the
 * device may succeed.
 */
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;
        int ret;

        if (!group)
                return -ENODEV;

        /*
         * Lock the group to make sure the device-count doesn't
         * change while we are attaching
         */
        mutex_lock(&group->mutex);
        ret = -EINVAL;
        if (list_count_nodes(&group->devices) != 1)
                goto out_unlock;

        ret = __iommu_attach_group(domain, group);

out_unlock:
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device);

int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
{
        if (dev->iommu && dev->iommu->attach_deferred)
                return __iommu_attach_device(domain, dev);

        return 0;
}

void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;

        if (!group)
                return;

        mutex_lock(&group->mutex);
        if (WARN_ON(domain != group->domain) ||
            WARN_ON(list_count_nodes(&group->devices) != 1))
                goto out_unlock;
        __iommu_group_set_core_domain(group);

out_unlock:
        mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_device);

struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;

        if (!group)
                return NULL;

        return group->domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);

/*
 * For IOMMU_DOMAIN_DMA implementations which already provide their own
 * guarantees that the group and its default domain are valid and correct.
 */
struct iommu_domain *iommu_get_dma_domain(struct device *dev)
{
        return dev->iommu_group->default_domain;
}

static int __iommu_attach_group(struct iommu_domain *domain,
                                struct iommu_group *group)
{
        struct device *dev;

        if (group->domain && group->domain != group->default_domain &&
            group->domain != group->blocking_domain)
                return -EBUSY;

        dev = iommu_group_first_dev(group);
        if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
                return -EINVAL;

        return __iommu_group_set_domain(group, domain);
}

/**
 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
 * @domain: IOMMU domain to attach
 * @group: IOMMU group that will be attached
 *
 * Returns 0 on success and error code on failure
 *
 * Note that EINVAL can be treated as a soft failure, indicating
 * that certain configuration of the domain is incompatible with
 * the group. In this case attaching a different domain to the
 * group may succeed.
 */
int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
{
        int ret;

        mutex_lock(&group->mutex);
        ret = __iommu_attach_group(domain, group);
        mutex_unlock(&group->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_group);

/**
 * iommu_group_replace_domain - replace the domain that a group is attached to
 * @new_domain: new IOMMU domain to replace with
 * @group: IOMMU group that will be attached to the new domain
 *
 * This API allows the group to switch domains without being forced to go to
 * the blocking domain in-between.
 *
 * If the currently attached domain is a core domain (e.g. a default_domain),
 * it will act just like the iommu_attach_group().
 */
int iommu_group_replace_domain(struct iommu_group *group,
                               struct iommu_domain *new_domain)
{
        int ret;

        if (!new_domain)
                return -EINVAL;

        mutex_lock(&group->mutex);
        ret = __iommu_group_set_domain(group, new_domain);
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, IOMMUFD_INTERNAL);

static int __iommu_device_set_domain(struct iommu_group *group,
                                     struct device *dev,
                                     struct iommu_domain *new_domain,
                                     unsigned int flags)
{
        int ret;

        /*
         * If the device requires IOMMU_RESV_DIRECT then we cannot allow
         * the blocking domain to be attached as it does not contain the
         * required 1:1 mapping. This test effectively excludes the device
         * being used with iommu_group_claim_dma_owner() which will block
         * vfio and iommufd as well.
         */
        if (dev->iommu->require_direct &&
            (new_domain->type == IOMMU_DOMAIN_BLOCKED ||
             new_domain == group->blocking_domain)) {
                dev_warn(dev,
                         "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n");
                return -EINVAL;
        }

        if (dev->iommu->attach_deferred) {
                if (new_domain == group->default_domain)
                        return 0;
                dev->iommu->attach_deferred = 0;
        }

        ret = __iommu_attach_device(new_domain, dev);
        if (ret) {
                /*
                 * If we have a blocking domain then try to attach that in hopes
                 * of avoiding a UAF. Modern drivers should implement blocking
                 * domains as global statics that cannot fail.
                 */
                if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
                    group->blocking_domain &&
                    group->blocking_domain != new_domain)
                        __iommu_attach_device(group->blocking_domain, dev);
                return ret;
        }
        return 0;
}

/*
 * If 0 is returned the group's domain is new_domain. If an error is returned
 * then the group's domain will be set back to the existing domain unless
 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
 * domains is left inconsistent. This is a driver bug to fail attach with a
 * previously good domain. We try to avoid a kernel UAF because of this.
 *
 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
 * API works on domains and devices.  Bridge that gap by iterating over the
 * devices in a group.  Ideally we'd have a single device which represents the
 * requestor ID of the group, but we also allow IOMMU drivers to create policy
 * defined minimum sets, where the physical hardware may be able to distiguish
 * members, but we wish to group them at a higher level (ex. untrusted
 * multi-function PCI devices).  Thus we attach each device.
 */
static int __iommu_group_set_domain_internal(struct iommu_group *group,
                                             struct iommu_domain *new_domain,
                                             unsigned int flags)
{
        struct group_device *last_gdev;
        struct group_device *gdev;
        int result;
        int ret;

        lockdep_assert_held(&group->mutex);

        if (group->domain == new_domain)
                return 0;

        if (WARN_ON(!new_domain))
                return -EINVAL;

        /*
         * Changing the domain is done by calling attach_dev() on the new
         * domain. This switch does not have to be atomic and DMA can be
         * discarded during the transition. DMA must only be able to access
         * either new_domain or group->domain, never something else.
         */
        result = 0;
        for_each_group_device(group, gdev) {
                ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
                                                flags);
                if (ret) {
                        result = ret;
                        /*
                         * Keep trying the other devices in the group. If a
                         * driver fails attach to an otherwise good domain, and
                         * does not support blocking domains, it should at least
                         * drop its reference on the current domain so we don't
                         * UAF.
                         */
                        if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
                                continue;
                        goto err_revert;
                }
        }
        group->domain = new_domain;
        return result;

err_revert:
        /*
         * This is called in error unwind paths. A well behaved driver should
         * always allow us to attach to a domain that was already attached.
         */
        last_gdev = gdev;
        for_each_group_device(group, gdev) {
                /*
                 * A NULL domain can happen only for first probe, in which case
                 * we leave group->domain as NULL and let release clean
                 * everything up.
                 */
                if (group->domain)
                        WARN_ON(__iommu_device_set_domain(
                                group, gdev->dev, group->domain,
                                IOMMU_SET_DOMAIN_MUST_SUCCEED));
                if (gdev == last_gdev)
                        break;
        }
        return ret;
}

void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
{
        mutex_lock(&group->mutex);
        __iommu_group_set_core_domain(group);
        mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_group);

phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{
        if (domain->type == IOMMU_DOMAIN_IDENTITY)
                return iova;

        if (domain->type == IOMMU_DOMAIN_BLOCKED)
                return 0;

        return domain->ops->iova_to_phys(domain, iova);
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);

static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
                           phys_addr_t paddr, size_t size, size_t *count)
{
        unsigned int pgsize_idx, pgsize_idx_next;
        unsigned long pgsizes;
        size_t offset, pgsize, pgsize_next;
        unsigned long addr_merge = paddr | iova;

        /* Page sizes supported by the hardware and small enough for @size */
        pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);

        /* Constrain the page sizes further based on the maximum alignment */
        if (likely(addr_merge))
                pgsizes &= GENMASK(__ffs(addr_merge), 0);

        /* Make sure we have at least one suitable page size */
        BUG_ON(!pgsizes);

        /* Pick the biggest page size remaining */
        pgsize_idx = __fls(pgsizes);
        pgsize = BIT(pgsize_idx);
        if (!count)
                return pgsize;

        /* Find the next biggest support page size, if it exists */
        pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
        if (!pgsizes)
                goto out_set_count;

        pgsize_idx_next = __ffs(pgsizes);
        pgsize_next = BIT(pgsize_idx_next);

        /*
         * There's no point trying a bigger page size unless the virtual
         * and physical addresses are similarly offset within the larger page.
         */
        if ((iova ^ paddr) & (pgsize_next - 1))
                goto out_set_count;

        /* Calculate the offset to the next page size alignment boundary */
        offset = pgsize_next - (addr_merge & (pgsize_next - 1));

        /*
         * If size is big enough to accommodate the larger page, reduce
         * the number of smaller pages.
         */
        if (offset + pgsize_next <= size)
                size = offset;

out_set_count:
        *count = size >> pgsize_idx;
        return pgsize;
}

static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
                       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
        const struct iommu_domain_ops *ops = domain->ops;
        unsigned long orig_iova = iova;
        unsigned int min_pagesz;
        size_t orig_size = size;
        phys_addr_t orig_paddr = paddr;
        int ret = 0;

        if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
                return -EINVAL;

        if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
                return -ENODEV;

        /* find out the minimum page size supported */
        min_pagesz = 1 << __ffs(domain->pgsize_bitmap);

        /*
         * both the virtual address and the physical one, as well as
         * the size of the mapping, must be aligned (at least) to the
         * size of the smallest page supported by the hardware
         */
        if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
                pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
                       iova, &paddr, size, min_pagesz);
                return -EINVAL;
        }

        pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);

        while (size) {
                size_t pgsize, count, mapped = 0;

                pgsize = iommu_pgsize(domain, iova, paddr, size, &count);

                pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
                         iova, &paddr, pgsize, count);
                ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
                                     gfp, &mapped);
                /*
                 * Some pages may have been mapped, even if an error occurred,
                 * so we should account for those so they can be unmapped.
                 */
                size -= mapped;

                if (ret)
                        break;

                iova += mapped;
                paddr += mapped;
        }

        /* unroll mapping in case something went wrong */
        if (ret)
                iommu_unmap(domain, orig_iova, orig_size - size);
        else
                trace_map(orig_iova, orig_paddr, orig_size);

        return ret;
}

int iommu_map(struct iommu_domain *domain, unsigned long iova,
              phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
        const struct iommu_domain_ops *ops = domain->ops;
        int ret;

        might_sleep_if(gfpflags_allow_blocking(gfp));

        /* Discourage passing strange GFP flags */
        if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
                                __GFP_HIGHMEM)))
                return -EINVAL;

        ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
        if (ret == 0 && ops->iotlb_sync_map) {
                ret = ops->iotlb_sync_map(domain, iova, size);
                if (ret)
                        goto out_err;
        }

        return ret;

out_err:
        /* undo mappings already done */
        iommu_unmap(domain, iova, size);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_map);

static size_t __iommu_unmap(struct iommu_domain *domain,
                            unsigned long iova, size_t size,
                            struct iommu_iotlb_gather *iotlb_gather)
{
        const struct iommu_domain_ops *ops = domain->ops;
        size_t unmapped_page, unmapped = 0;
        unsigned long orig_iova = iova;
        unsigned int min_pagesz;

        if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
                return 0;

        if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
                return 0;

        /* find out the minimum page size supported */
        min_pagesz = 1 << __ffs(domain->pgsize_bitmap);

        /*
         * The virtual address, as well as the size of the mapping, must be
         * aligned (at least) to the size of the smallest page supported
         * by the hardware
         */
        if (!IS_ALIGNED(iova | size, min_pagesz)) {
                pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
                       iova, size, min_pagesz);
                return 0;
        }

        pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);

        /*
         * Keep iterating until we either unmap 'size' bytes (or more)
         * or we hit an area that isn't mapped.
         */
        while (unmapped < size) {
                size_t pgsize, count;

                pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count);
                unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
                if (!unmapped_page)
                        break;

                pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
                         iova, unmapped_page);

                iova += unmapped_page;
                unmapped += unmapped_page;
        }

        trace_unmap(orig_iova, size, unmapped);
        return unmapped;
}

size_t iommu_unmap(struct iommu_domain *domain,
                   unsigned long iova, size_t size)
{
        struct iommu_iotlb_gather iotlb_gather;
        size_t ret;

        iommu_iotlb_gather_init(&iotlb_gather);
        ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
        iommu_iotlb_sync(domain, &iotlb_gather);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_unmap);

size_t iommu_unmap_fast(struct iommu_domain *domain,
                        unsigned long iova, size_t size,
                        struct iommu_iotlb_gather *iotlb_gather)
{
        return __iommu_unmap(domain, iova, size, iotlb_gather);
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);

ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
                     struct scatterlist *sg, unsigned int nents, int prot,
                     gfp_t gfp)
{
        const struct iommu_domain_ops *ops = domain->ops;
        size_t len = 0, mapped = 0;
        phys_addr_t start;
        unsigned int i = 0;
        int ret;

        might_sleep_if(gfpflags_allow_blocking(gfp));

        /* Discourage passing strange GFP flags */
        if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
                                __GFP_HIGHMEM)))
                return -EINVAL;

        while (i <= nents) {
                phys_addr_t s_phys = sg_phys(sg);

                if (len && s_phys != start + len) {
                        ret = __iommu_map(domain, iova + mapped, start,
                                        len, prot, gfp);

                        if (ret)
                                goto out_err;

                        mapped += len;
                        len = 0;
                }

                if (sg_dma_is_bus_address(sg))
                        goto next;

                if (len) {
                        len += sg->length;
                } else {
                        len = sg->length;
                        start = s_phys;
                }

next:
                if (++i < nents)
                        sg = sg_next(sg);
        }

        if (ops->iotlb_sync_map) {
                ret = ops->iotlb_sync_map(domain, iova, mapped);
                if (ret)
                        goto out_err;
        }
        return mapped;

out_err:
        /* undo mappings already done */
        iommu_unmap(domain, iova, mapped);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_map_sg);

/**
 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
 * @domain: the iommu domain where the fault has happened
 * @dev: the device where the fault has happened
 * @iova: the faulting address
 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
 *
 * This function should be called by the low-level IOMMU implementations
 * whenever IOMMU faults happen, to allow high-level users, that are
 * interested in such events, to know about them.
 *
 * This event may be useful for several possible use cases:
 * - mere logging of the event
 * - dynamic TLB/PTE loading
 * - if restarting of the faulting device is required
 *
 * Returns 0 on success and an appropriate error code otherwise (if dynamic
 * PTE/TLB loading will one day be supported, implementations will be able
 * to tell whether it succeeded or not according to this return value).
 *
 * Specifically, -ENOSYS is returned if a fault handler isn't installed
 * (though fault handlers can also return -ENOSYS, in case they want to
 * elicit the default behavior of the IOMMU drivers).
 */
int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
                       unsigned long iova, int flags)
{
        int ret = -ENOSYS;

        /*
         * if upper layers showed interest and installed a fault handler,
         * invoke it.
         */
        if (domain->handler)
                ret = domain->handler(domain, dev, iova, flags,
                                                domain->handler_token);

        trace_io_page_fault(dev, iova, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(report_iommu_fault);

static int __init iommu_init(void)
{
        iommu_group_kset = kset_create_and_add("iommu_groups",
                                               NULL, kernel_kobj);
        BUG_ON(!iommu_group_kset);

        iommu_debugfs_setup();

        return 0;
}
core_initcall(iommu_init);

int iommu_enable_nesting(struct iommu_domain *domain)
{
        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
                return -EINVAL;
        if (!domain->ops->enable_nesting)
                return -EINVAL;
        return domain->ops->enable_nesting(domain);
}
EXPORT_SYMBOL_GPL(iommu_enable_nesting);

int iommu_set_pgtable_quirks(struct iommu_domain *domain,
                unsigned long quirk)
{
        if (domain->type != IOMMU_DOMAIN_UNMANAGED)
                return -EINVAL;
        if (!domain->ops->set_pgtable_quirks)
                return -EINVAL;
        return domain->ops->set_pgtable_quirks(domain, quirk);
}
EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);

/**
 * iommu_get_resv_regions - get reserved regions
 * @dev: device for which to get reserved regions
 * @list: reserved region list for device
 *
 * This returns a list of reserved IOVA regions specific to this device.
 * A domain user should not map IOVA in these ranges.
 */
void iommu_get_resv_regions(struct device *dev, struct list_head *list)
{
        const struct iommu_ops *ops = dev_iommu_ops(dev);

        if (ops->get_resv_regions)
                ops->get_resv_regions(dev, list);
}
EXPORT_SYMBOL_GPL(iommu_get_resv_regions);

/**
 * iommu_put_resv_regions - release reserved regions
 * @dev: device for which to free reserved regions
 * @list: reserved region list for device
 *
 * This releases a reserved region list acquired by iommu_get_resv_regions().
 */
void iommu_put_resv_regions(struct device *dev, struct list_head *list)
{
        struct iommu_resv_region *entry, *next;

        list_for_each_entry_safe(entry, next, list, list) {
                if (entry->free)
                        entry->free(dev, entry);
                else
                        kfree(entry);
        }
}
EXPORT_SYMBOL(iommu_put_resv_regions);

struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
                                                  size_t length, int prot,
                                                  enum iommu_resv_type type,
                                                  gfp_t gfp)
{
        struct iommu_resv_region *region;

        region = kzalloc(sizeof(*region), gfp);
        if (!region)
                return NULL;

        INIT_LIST_HEAD(&region->list);
        region->start = start;
        region->length = length;
        region->prot = prot;
        region->type = type;
        return region;
}
EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);

void iommu_set_default_passthrough(bool cmd_line)
{
        if (cmd_line)
                iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
        iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
}

void iommu_set_default_translated(bool cmd_line)
{
        if (cmd_line)
                iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
        iommu_def_domain_type = IOMMU_DOMAIN_DMA;
}

bool iommu_default_passthrough(void)
{
        return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
}
EXPORT_SYMBOL_GPL(iommu_default_passthrough);

const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
{
        const struct iommu_ops *ops = NULL;
        struct iommu_device *iommu;

        spin_lock(&iommu_device_lock);
        list_for_each_entry(iommu, &iommu_device_list, list)
                if (iommu->fwnode == fwnode) {
                        ops = iommu->ops;
                        break;
                }
        spin_unlock(&iommu_device_lock);
        return ops;
}

int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
                      const struct iommu_ops *ops)
{
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);

        if (fwspec)
                return ops == fwspec->ops ? 0 : -EINVAL;

        if (!dev_iommu_get(dev))
                return -ENOMEM;

        /* Preallocate for the overwhelmingly common case of 1 ID */
        fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
        if (!fwspec)
                return -ENOMEM;

        of_node_get(to_of_node(iommu_fwnode));
        fwspec->iommu_fwnode = iommu_fwnode;
        fwspec->ops = ops;
        dev_iommu_fwspec_set(dev, fwspec);
        return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_init);

void iommu_fwspec_free(struct device *dev)
{
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);

        if (fwspec) {
                fwnode_handle_put(fwspec->iommu_fwnode);
                kfree(fwspec);
                dev_iommu_fwspec_set(dev, NULL);
        }
}
EXPORT_SYMBOL_GPL(iommu_fwspec_free);

int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
{
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
        int i, new_num;

        if (!fwspec)
                return -EINVAL;

        new_num = fwspec->num_ids + num_ids;
        if (new_num > 1) {
                fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
                                  GFP_KERNEL);
                if (!fwspec)
                        return -ENOMEM;

                dev_iommu_fwspec_set(dev, fwspec);
        }

        for (i = 0; i < num_ids; i++)
                fwspec->ids[fwspec->num_ids + i] = ids[i];

        fwspec->num_ids = new_num;
        return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);

/*
 * Per device IOMMU features.
 */
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
{
        if (dev_has_iommu(dev)) {
                const struct iommu_ops *ops = dev_iommu_ops(dev);

                if (ops->dev_enable_feat)
                        return ops->dev_enable_feat(dev, feat);
        }

        return -ENODEV;
}
EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);

/*
 * The device drivers should do the necessary cleanups before calling this.
 */
int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
{
        if (dev_has_iommu(dev)) {
                const struct iommu_ops *ops = dev_iommu_ops(dev);

                if (ops->dev_disable_feat)
                        return ops->dev_disable_feat(dev, feat);
        }

        return -EBUSY;
}
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);

/**
 * iommu_setup_default_domain - Set the default_domain for the group
 * @group: Group to change
 * @target_type: Domain type to set as the default_domain
 *
 * Allocate a default domain and set it as the current domain on the group. If
 * the group already has a default domain it will be changed to the target_type.
 * When target_type is 0 the default domain is selected based on driver and
 * system preferences.
 */
static int iommu_setup_default_domain(struct iommu_group *group,
                                      int target_type)
{
        struct iommu_domain *old_dom = group->default_domain;
        struct group_device *gdev;
        struct iommu_domain *dom;
        bool direct_failed;
        int req_type;
        int ret;

        lockdep_assert_held(&group->mutex);

        req_type = iommu_get_default_domain_type(group, target_type);
        if (req_type < 0)
                return -EINVAL;

        dom = iommu_group_alloc_default_domain(group, req_type);
        if (IS_ERR(dom))
                return PTR_ERR(dom);

        if (group->default_domain == dom)
                return 0;

        /*
         * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
         * mapped before their device is attached, in order to guarantee
         * continuity with any FW activity
         */
        direct_failed = false;
        for_each_group_device(group, gdev) {
                if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
                        direct_failed = true;
                        dev_warn_once(
                                gdev->dev->iommu->iommu_dev->dev,
                                "IOMMU driver was not able to establish FW requested direct mapping.");
                }
        }

        /* We must set default_domain early for __iommu_device_set_domain */
        group->default_domain = dom;
        if (!group->domain) {
                /*
                 * Drivers are not allowed to fail the first domain attach.
                 * The only way to recover from this is to fail attaching the
                 * iommu driver and call ops->release_device. Put the domain
                 * in group->default_domain so it is freed after.
                 */
                ret = __iommu_group_set_domain_internal(
                        group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
                if (WARN_ON(ret))
                        goto out_free_old;
        } else {
                ret = __iommu_group_set_domain(group, dom);
                if (ret)
                        goto err_restore_def_domain;
        }

        /*
         * Drivers are supposed to allow mappings to be installed in a domain
         * before device attachment, but some don't. Hack around this defect by
         * trying again after attaching. If this happens it means the device
         * will not continuously have the IOMMU_RESV_DIRECT map.
         */
        if (direct_failed) {
                for_each_group_device(group, gdev) {
                        ret = iommu_create_device_direct_mappings(dom, gdev->dev);
                        if (ret)
                                goto err_restore_domain;
                }
        }

out_free_old:
        if (old_dom)
                iommu_domain_free(old_dom);
        return ret;

err_restore_domain:
        if (old_dom)
                __iommu_group_set_domain_internal(
                        group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
err_restore_def_domain:
        if (old_dom) {
                iommu_domain_free(dom);
                group->default_domain = old_dom;
        }
        return ret;
}

/*
 * Changing the default domain through sysfs requires the users to unbind the
 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
 * transition. Return failure if this isn't met.
 *
 * We need to consider the race between this and the device release path.
 * group->mutex is used here to guarantee that the device release path
 * will not be entered at the same time.
 */
static ssize_t iommu_group_store_type(struct iommu_group *group,
                                      const char *buf, size_t count)
{
        struct group_device *gdev;
        int ret, req_type;

        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
                return -EACCES;

        if (WARN_ON(!group) || !group->default_domain)
                return -EINVAL;

        if (sysfs_streq(buf, "identity"))
                req_type = IOMMU_DOMAIN_IDENTITY;
        else if (sysfs_streq(buf, "DMA"))
                req_type = IOMMU_DOMAIN_DMA;
        else if (sysfs_streq(buf, "DMA-FQ"))
                req_type = IOMMU_DOMAIN_DMA_FQ;
        else if (sysfs_streq(buf, "auto"))
                req_type = 0;
        else
                return -EINVAL;

        mutex_lock(&group->mutex);
        /* We can bring up a flush queue without tearing down the domain. */
        if (req_type == IOMMU_DOMAIN_DMA_FQ &&
            group->default_domain->type == IOMMU_DOMAIN_DMA) {
                ret = iommu_dma_init_fq(group->default_domain);
                if (ret)
                        goto out_unlock;

                group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
                ret = count;
                goto out_unlock;
        }

        /* Otherwise, ensure that device exists and no driver is bound. */
        if (list_empty(&group->devices) || group->owner_cnt) {
                ret = -EPERM;
                goto out_unlock;
        }

        ret = iommu_setup_default_domain(group, req_type);
        if (ret)
                goto out_unlock;

        /*
         * Release the mutex here because ops->probe_finalize() call-back of
         * some vendor IOMMU drivers calls arm_iommu_attach_device() which
         * in-turn might call back into IOMMU core code, where it tries to take
         * group->mutex, resulting in a deadlock.
         */
        mutex_unlock(&group->mutex);

        /* Make sure dma_ops is appropriatley set */
        for_each_group_device(group, gdev)
                iommu_group_do_probe_finalize(gdev->dev);
        return count;

out_unlock:
        mutex_unlock(&group->mutex);
        return ret ?: count;
}

/**
 * iommu_device_use_default_domain() - Device driver wants to handle device
 *                                     DMA through the kernel DMA API.
 * @dev: The device.
 *
 * The device driver about to bind @dev wants to do DMA through the kernel
 * DMA API. Return 0 if it is allowed, otherwise an error.
 */
int iommu_device_use_default_domain(struct device *dev)
{
        /* Caller is the driver core during the pre-probe path */
        struct iommu_group *group = dev->iommu_group;
        int ret = 0;

        if (!group)
                return 0;

        mutex_lock(&group->mutex);
        if (group->owner_cnt) {
                if (group->domain != group->default_domain || group->owner ||
                    !xa_empty(&group->pasid_array)) {
                        ret = -EBUSY;
                        goto unlock_out;
                }
        }

        group->owner_cnt++;

unlock_out:
        mutex_unlock(&group->mutex);
        return ret;
}

/**
 * iommu_device_unuse_default_domain() - Device driver stops handling device
 *                                       DMA through the kernel DMA API.
 * @dev: The device.
 *
 * The device driver doesn't want to do DMA through kernel DMA API anymore.
 * It must be called after iommu_device_use_default_domain().
 */
void iommu_device_unuse_default_domain(struct device *dev)
{
        /* Caller is the driver core during the post-probe path */
        struct iommu_group *group = dev->iommu_group;

        if (!group)
                return;

        mutex_lock(&group->mutex);
        if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
                group->owner_cnt--;

        mutex_unlock(&group->mutex);
}

static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
        struct iommu_domain *domain;

        if (group->blocking_domain)
                return 0;

        domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
        if (IS_ERR(domain)) {
                /*
                 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
                 * create an empty domain instead.
                 */
                domain = __iommu_group_domain_alloc(group,
                                                    IOMMU_DOMAIN_UNMANAGED);
                if (IS_ERR(domain))
                        return PTR_ERR(domain);
        }
        group->blocking_domain = domain;
        return 0;
}

static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
{
        int ret;

        if ((group->domain && group->domain != group->default_domain) ||
            !xa_empty(&group->pasid_array))
                return -EBUSY;

        ret = __iommu_group_alloc_blocking_domain(group);
        if (ret)
                return ret;
        ret = __iommu_group_set_domain(group, group->blocking_domain);
        if (ret)
                return ret;

        group->owner = owner;
        group->owner_cnt++;
        return 0;
}

/**
 * iommu_group_claim_dma_owner() - Set DMA ownership of a group
 * @group: The group.
 * @owner: Caller specified pointer. Used for exclusive ownership.
 *
 * This is to support backward compatibility for vfio which manages the dma
 * ownership in iommu_group level. New invocations on this interface should be
 * prohibited. Only a single owner may exist for a group.
 */
int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
{
        int ret = 0;

        if (WARN_ON(!owner))
                return -EINVAL;

        mutex_lock(&group->mutex);
        if (group->owner_cnt) {
                ret = -EPERM;
                goto unlock_out;
        }

        ret = __iommu_take_dma_ownership(group, owner);
unlock_out:
        mutex_unlock(&group->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);

/**
 * iommu_device_claim_dma_owner() - Set DMA ownership of a device
 * @dev: The device.
 * @owner: Caller specified pointer. Used for exclusive ownership.
 *
 * Claim the DMA ownership of a device. Multiple devices in the same group may
 * concurrently claim ownership if they present the same owner value. Returns 0
 * on success and error code on failure
 */
int iommu_device_claim_dma_owner(struct device *dev, void *owner)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;
        int ret = 0;

        if (WARN_ON(!owner))
                return -EINVAL;

        if (!group)
                return -ENODEV;

        mutex_lock(&group->mutex);
        if (group->owner_cnt) {
                if (group->owner != owner) {
                        ret = -EPERM;
                        goto unlock_out;
                }
                group->owner_cnt++;
                goto unlock_out;
        }

        ret = __iommu_take_dma_ownership(group, owner);
unlock_out:
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);

static void __iommu_release_dma_ownership(struct iommu_group *group)
{
        if (WARN_ON(!group->owner_cnt || !group->owner ||
                    !xa_empty(&group->pasid_array)))
                return;

        group->owner_cnt = 0;
        group->owner = NULL;
        __iommu_group_set_domain_nofail(group, group->default_domain);
}

/**
 * iommu_group_release_dma_owner() - Release DMA ownership of a group
 * @group: The group
 *
 * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
 */
void iommu_group_release_dma_owner(struct iommu_group *group)
{
        mutex_lock(&group->mutex);
        __iommu_release_dma_ownership(group);
        mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);

/**
 * iommu_device_release_dma_owner() - Release DMA ownership of a device
 * @dev: The device.
 *
 * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
 */
void iommu_device_release_dma_owner(struct device *dev)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;

        mutex_lock(&group->mutex);
        if (group->owner_cnt > 1)
                group->owner_cnt--;
        else
                __iommu_release_dma_ownership(group);
        mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);

/**
 * iommu_group_dma_owner_claimed() - Query group dma ownership status
 * @group: The group.
 *
 * This provides status query on a given group. It is racy and only for
 * non-binding status reporting.
 */
bool iommu_group_dma_owner_claimed(struct iommu_group *group)
{
        unsigned int user;

        mutex_lock(&group->mutex);
        user = group->owner_cnt;
        mutex_unlock(&group->mutex);

        return user;
}
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);

static int __iommu_set_group_pasid(struct iommu_domain *domain,
                                   struct iommu_group *group, ioasid_t pasid)
{
        struct group_device *device;
        int ret = 0;

        for_each_group_device(group, device) {
                ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
                if (ret)
                        break;
        }

        return ret;
}

static void __iommu_remove_group_pasid(struct iommu_group *group,
                                       ioasid_t pasid)
{
        struct group_device *device;
        const struct iommu_ops *ops;

        for_each_group_device(group, device) {
                ops = dev_iommu_ops(device->dev);
                ops->remove_dev_pasid(device->dev, pasid);
        }
}

/*
 * iommu_attach_device_pasid() - Attach a domain to pasid of device
 * @domain: the iommu domain.
 * @dev: the attached device.
 * @pasid: the pasid of the device.
 *
 * Return: 0 on success, or an error.
 */
int iommu_attach_device_pasid(struct iommu_domain *domain,
                              struct device *dev, ioasid_t pasid)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;
        struct group_device *device;
        void *curr;
        int ret;

        if (!domain->ops->set_dev_pasid)
                return -EOPNOTSUPP;

        if (!group)
                return -ENODEV;

        if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
            pasid == IOMMU_NO_PASID)
                return -EINVAL;

        mutex_lock(&group->mutex);
        for_each_group_device(group, device) {
                if (pasid >= device->dev->iommu->max_pasids) {
                        ret = -EINVAL;
                        goto out_unlock;
                }
        }

        curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
        if (curr) {
                ret = xa_err(curr) ? : -EBUSY;
                goto out_unlock;
        }

        ret = __iommu_set_group_pasid(domain, group, pasid);
        if (ret) {
                __iommu_remove_group_pasid(group, pasid);
                xa_erase(&group->pasid_array, pasid);
        }
out_unlock:
        mutex_unlock(&group->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);

/*
 * iommu_detach_device_pasid() - Detach the domain from pasid of device
 * @domain: the iommu domain.
 * @dev: the attached device.
 * @pasid: the pasid of the device.
 *
 * The @domain must have been attached to @pasid of the @dev with
 * iommu_attach_device_pasid().
 */
void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
                               ioasid_t pasid)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;

        mutex_lock(&group->mutex);
        __iommu_remove_group_pasid(group, pasid);
        WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
        mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);

/*
 * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
 * @dev: the queried device
 * @pasid: the pasid of the device
 * @type: matched domain type, 0 for any match
 *
 * This is a variant of iommu_get_domain_for_dev(). It returns the existing
 * domain attached to pasid of a device. Callers must hold a lock around this
 * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
 * type is being manipulated. This API does not internally resolve races with
 * attach/detach.
 *
 * Return: attached domain on success, NULL otherwise.
 */
struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
                                                    ioasid_t pasid,
                                                    unsigned int type)
{
        /* Caller must be a probed driver on dev */
        struct iommu_group *group = dev->iommu_group;
        struct iommu_domain *domain;

        if (!group)
                return NULL;

        xa_lock(&group->pasid_array);
        domain = xa_load(&group->pasid_array, pasid);
        if (type && domain && domain->type != type)
                domain = ERR_PTR(-EBUSY);
        xa_unlock(&group->pasid_array);

        return domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);

ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
        int ret;

        /* max_pasids == 0 means that the device does not support PASID */
        if (!dev->iommu->max_pasids)
                return IOMMU_PASID_INVALID;

        /*
         * max_pasids is set up by vendor driver based on number of PASID bits
         * supported but the IDA allocation is inclusive.
         */
        ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID,
                              dev->iommu->max_pasids - 1, GFP_KERNEL);
        return ret < 0 ? IOMMU_PASID_INVALID : ret;
}
EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid);

void iommu_free_global_pasid(ioasid_t pasid)
{
        if (WARN_ON(pasid == IOMMU_PASID_INVALID))
                return;

        ida_free(&iommu_global_pasid_ida, pasid);
}
EXPORT_SYMBOL_GPL(iommu_free_global_pasid);








































































































































































































































































































































































































































































































   14 











   13 






























  250 




























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/sched/mm.h>
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched/signal.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task.h>
#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
#include <linux/gfp.h>
#include <linux/suspend.h>
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
#include <linux/nmi.h>
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
#include <linux/scs.h>
#include <linux/percpu-rwsem.h>
#include <linux/cpuset.h>
#include <linux/random.h>
#include <linux/cc_platform.h>

#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>

#include "smpboot.h"

/**
 * struct cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:        The current cpu state
 * @target:        The target state
 * @fail:        Current CPU hotplug callback state
 * @thread:        Pointer to the hotplug thread
 * @should_run:        Thread should execute
 * @rollback:        Perform a rollback
 * @single:        Single callback invocation
 * @bringup:        Single callback bringup or teardown selector
 * @node:        Remote CPU node; for multi-instance, do a
 *                single entry callback for install/remove
 * @last:        For multi-instance rollback, remember how far we got
 * @cb_state:        The state for a single callback (install/uninstall)
 * @result:        Result of the operation
 * @ap_sync_state:        State for AP synchronization
 * @done_up:        Signal completion to the issuer of the task for cpu-up
 * @done_down:        Signal completion to the issuer of the task for cpu-down
 */
struct cpuhp_cpu_state {
        enum cpuhp_state        state;
        enum cpuhp_state        target;
        enum cpuhp_state        fail;
#ifdef CONFIG_SMP
        struct task_struct        *thread;
        bool                        should_run;
        bool                        rollback;
        bool                        single;
        bool                        bringup;
        struct hlist_node        *node;
        struct hlist_node        *last;
        enum cpuhp_state        cb_state;
        int                        result;
        atomic_t                ap_sync_state;
        struct completion        done_up;
        struct completion        done_down;
#endif
};

static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
        .fail = CPUHP_INVALID,
};

#ifdef CONFIG_SMP
cpumask_t cpus_booted_once_mask;
#endif

#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lockdep_map cpuhp_state_up_map =
        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);


static inline void cpuhp_lock_acquire(bool bringup)
{
        lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}

static inline void cpuhp_lock_release(bool bringup)
{
        lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else

static inline void cpuhp_lock_acquire(bool bringup) { }
static inline void cpuhp_lock_release(bool bringup) { }

#endif

/**
 * struct cpuhp_step - Hotplug state machine step
 * @name:        Name of the step
 * @startup:        Startup function of the step
 * @teardown:        Teardown function of the step
 * @cant_stop:        Bringup/teardown can't be stopped at this step
 * @multi_instance:        State has multiple instances which get added afterwards
 */
struct cpuhp_step {
        const char                *name;
        union {
                int                (*single)(unsigned int cpu);
                int                (*multi)(unsigned int cpu,
                                         struct hlist_node *node);
        } startup;
        union {
                int                (*single)(unsigned int cpu);
                int                (*multi)(unsigned int cpu,
                                         struct hlist_node *node);
        } teardown;
        /* private: */
        struct hlist_head        list;
        /* public: */
        bool                        cant_stop;
        bool                        multi_instance;
};

static DEFINE_MUTEX(cpuhp_state_mutex);
static struct cpuhp_step cpuhp_hp_states[];

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
        return cpuhp_hp_states + state;
}

static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
{
        return bringup ? !step->startup.single : !step->teardown.single;
}

/**
 * cpuhp_invoke_callback - Invoke the callbacks for a given state
 * @cpu:        The cpu for which the callback should be invoked
 * @state:        The state to do callbacks for
 * @bringup:        True if the bringup callback should be invoked
 * @node:        For multi-instance, do a single entry callback for install/remove
 * @lastp:        For multi-instance rollback, remember how far we got
 *
 * Called from cpu hotplug and from the state register machinery.
 *
 * Return: %0 on success or a negative errno code
 */
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
                                 bool bringup, struct hlist_node *node,
                                 struct hlist_node **lastp)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct cpuhp_step *step = cpuhp_get_step(state);
        int (*cbm)(unsigned int cpu, struct hlist_node *node);
        int (*cb)(unsigned int cpu);
        int ret, cnt;

        if (st->fail == state) {
                st->fail = CPUHP_INVALID;
                return -EAGAIN;
        }

        if (cpuhp_step_empty(bringup, step)) {
                WARN_ON_ONCE(1);
                return 0;
        }

        if (!step->multi_instance) {
                WARN_ON_ONCE(lastp && *lastp);
                cb = bringup ? step->startup.single : step->teardown.single;

                trace_cpuhp_enter(cpu, st->target, state, cb);
                ret = cb(cpu);
                trace_cpuhp_exit(cpu, st->state, state, ret);
                return ret;
        }
        cbm = bringup ? step->startup.multi : step->teardown.multi;

        /* Single invocation for instance add/remove */
        if (node) {
                WARN_ON_ONCE(lastp && *lastp);
                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
                return ret;
        }

        /* State transition. Invoke on all instances */
        cnt = 0;
        hlist_for_each(node, &step->list) {
                if (lastp && node == *lastp)
                        break;

                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
                if (ret) {
                        if (!lastp)
                                goto err;

                        *lastp = node;
                        return ret;
                }
                cnt++;
        }
        if (lastp)
                *lastp = NULL;
        return 0;
err:
        /* Rollback the instances if one failed */
        cbm = !bringup ? step->startup.multi : step->teardown.multi;
        if (!cbm)
                return ret;

        hlist_for_each(node, &step->list) {
                if (!cnt--)
                        break;

                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
                /*
                 * Rollback must not fail,
                 */
                WARN_ON_ONCE(ret);
        }
        return ret;
}

#ifdef CONFIG_SMP
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
        /*
         * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
         * purposes as that state is handled explicitly in cpu_down.
         */
        return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
        struct completion *done = bringup ? &st->done_up : &st->done_down;
        wait_for_completion(done);
}

static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
        struct completion *done = bringup ? &st->done_up : &st->done_down;
        complete(done);
}

/*
 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 */
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
        return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}

/* Synchronization state management */
enum cpuhp_sync_state {
        SYNC_STATE_DEAD,
        SYNC_STATE_KICKED,
        SYNC_STATE_SHOULD_DIE,
        SYNC_STATE_ALIVE,
        SYNC_STATE_SHOULD_ONLINE,
        SYNC_STATE_ONLINE,
};

#ifdef CONFIG_HOTPLUG_CORE_SYNC
/**
 * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
 * @state:        The synchronization state to set
 *
 * No synchronization point. Just update of the synchronization state, but implies
 * a full barrier so that the AP changes are visible before the control CPU proceeds.
 */
static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
{
        atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);

        (void)atomic_xchg(st, state);
}

void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }

static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
                                      enum cpuhp_sync_state next_state)
{
        atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
        ktime_t now, end, start = ktime_get();
        int sync;

        end = start + 10ULL * NSEC_PER_SEC;

        sync = atomic_read(st);
        while (1) {
                if (sync == state) {
                        if (!atomic_try_cmpxchg(st, &sync, next_state))
                                continue;
                        return true;
                }

                now = ktime_get();
                if (now > end) {
                        /* Timeout. Leave the state unchanged */
                        return false;
                } else if (now - start < NSEC_PER_MSEC) {
                        /* Poll for one millisecond */
                        arch_cpuhp_sync_state_poll();
                } else {
                        usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
                }
                sync = atomic_read(st);
        }
        return true;
}
#else  /* CONFIG_HOTPLUG_CORE_SYNC */
static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC */

#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
/**
 * cpuhp_ap_report_dead - Update synchronization state to DEAD
 *
 * No synchronization point. Just update of the synchronization state.
 */
void cpuhp_ap_report_dead(void)
{
        cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
}

void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }

/*
 * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
 * because the AP cannot issue complete() at this stage.
 */
static void cpuhp_bp_sync_dead(unsigned int cpu)
{
        atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
        int sync = atomic_read(st);

        do {
                /* CPU can have reported dead already. Don't overwrite that! */
                if (sync == SYNC_STATE_DEAD)
                        break;
        } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));

        if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
                /* CPU reached dead state. Invoke the cleanup function */
                arch_cpuhp_cleanup_dead_cpu(cpu);
                return;
        }

        /* No further action possible. Emit message and give up. */
        pr_err("CPU%u failed to report dead state\n", cpu);
}
#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */

#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
/**
 * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
 *
 * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
 * for the BP to release it.
 */
void cpuhp_ap_sync_alive(void)
{
        atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);

        cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);

        /* Wait for the control CPU to release it. */
        while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
                cpu_relax();
}

static bool cpuhp_can_boot_ap(unsigned int cpu)
{
        atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
        int sync = atomic_read(st);

again:
        switch (sync) {
        case SYNC_STATE_DEAD:
                /* CPU is properly dead */
                break;
        case SYNC_STATE_KICKED:
                /* CPU did not come up in previous attempt */
                break;
        case SYNC_STATE_ALIVE:
                /* CPU is stuck cpuhp_ap_sync_alive(). */
                break;
        default:
                /* CPU failed to report online or dead and is in limbo state. */
                return false;
        }

        /* Prepare for booting */
        if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
                goto again;

        return true;
}

void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }

/*
 * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
 * because the AP cannot issue complete() so early in the bringup.
 */
static int cpuhp_bp_sync_alive(unsigned int cpu)
{
        int ret = 0;

        if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
                return 0;

        if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
                pr_err("CPU%u failed to report alive state\n", cpu);
                ret = -EIO;
        }

        /* Let the architecture cleanup the kick alive mechanics. */
        arch_cpuhp_cleanup_kick_cpu(cpu);
        return ret;
}
#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */

/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);

/*
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
 */
void cpu_maps_update_begin(void)
{
        mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
        mutex_unlock(&cpu_add_remove_lock);
}

/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

#ifdef CONFIG_HOTPLUG_CPU

DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);

void cpus_read_lock(void)
{
        percpu_down_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_lock);

int cpus_read_trylock(void)
{
        return percpu_down_read_trylock(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_trylock);

void cpus_read_unlock(void)
{
        percpu_up_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_unlock);

void cpus_write_lock(void)
{
        percpu_down_write(&cpu_hotplug_lock);
}

void cpus_write_unlock(void)
{
        percpu_up_write(&cpu_hotplug_lock);
}

void lockdep_assert_cpus_held(void)
{
        /*
         * We can't have hotplug operations before userspace starts running,
         * and some init codepaths will knowingly not take the hotplug lock.
         * This is all valid, so mute lockdep until it makes sense to report
         * unheld locks.
         */
        if (system_state < SYSTEM_RUNNING)
                return;

        percpu_rwsem_assert_held(&cpu_hotplug_lock);
}

#ifdef CONFIG_LOCKDEP
int lockdep_is_cpus_held(void)
{
        return percpu_rwsem_is_held(&cpu_hotplug_lock);
}
#endif

static void lockdep_acquire_cpus_lock(void)
{
        rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}

static void lockdep_release_cpus_lock(void)
{
        rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}

/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
        cpu_maps_update_begin();
        cpu_hotplug_disabled++;
        cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);

static void __cpu_hotplug_enable(void)
{
        if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
                return;
        cpu_hotplug_disabled--;
}

void cpu_hotplug_enable(void)
{
        cpu_maps_update_begin();
        __cpu_hotplug_enable();
        cpu_maps_update_done();
}
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);

#else

static void lockdep_acquire_cpus_lock(void)
{
}

static void lockdep_release_cpus_lock(void)
{
}

#endif        /* CONFIG_HOTPLUG_CPU */

/*
 * Architectures that need SMT-specific errata handling during SMT hotplug
 * should override this.
 */
void __weak arch_smt_update(void) { }

#ifdef CONFIG_HOTPLUG_SMT

enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
static unsigned int cpu_smt_max_threads __ro_after_init;
unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;

void __init cpu_smt_disable(bool force)
{
        if (!cpu_smt_possible())
                return;

        if (force) {
                pr_info("SMT: Force disabled\n");
                cpu_smt_control = CPU_SMT_FORCE_DISABLED;
        } else {
                pr_info("SMT: disabled\n");
                cpu_smt_control = CPU_SMT_DISABLED;
        }
        cpu_smt_num_threads = 1;
}

/*
 * The decision whether SMT is supported can only be done after the full
 * CPU identification. Called from architecture code.
 */
void __init cpu_smt_set_num_threads(unsigned int num_threads,
                                    unsigned int max_threads)
{
        WARN_ON(!num_threads || (num_threads > max_threads));

        if (max_threads == 1)
                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;

        cpu_smt_max_threads = max_threads;

        /*
         * If SMT has been disabled via the kernel command line or SMT is
         * not supported, set cpu_smt_num_threads to 1 for consistency.
         * If enabled, take the architecture requested number of threads
         * to bring up into account.
         */
        if (cpu_smt_control != CPU_SMT_ENABLED)
                cpu_smt_num_threads = 1;
        else if (num_threads < cpu_smt_num_threads)
                cpu_smt_num_threads = num_threads;
}

static int __init smt_cmdline_disable(char *str)
{
        cpu_smt_disable(str && !strcmp(str, "force"));
        return 0;
}
early_param("nosmt", smt_cmdline_disable);

/*
 * For Archicture supporting partial SMT states check if the thread is allowed.
 * Otherwise this has already been checked through cpu_smt_max_threads when
 * setting the SMT level.
 */
static inline bool cpu_smt_thread_allowed(unsigned int cpu)
{
#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
        return topology_smt_thread_allowed(cpu);
#else
        return true;
#endif
}

static inline bool cpu_bootable(unsigned int cpu)
{
        if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
                return true;

        /* All CPUs are bootable if controls are not configured */
        if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED)
                return true;

        /* All CPUs are bootable if CPU is not SMT capable */
        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
                return true;

        if (topology_is_primary_thread(cpu))
                return true;

        /*
         * On x86 it's required to boot all logical CPUs at least once so
         * that the init code can get a chance to set CR4.MCE on each
         * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
         * core will shutdown the machine.
         */
        return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
}

/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
bool cpu_smt_possible(void)
{
        return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
                cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
}
EXPORT_SYMBOL_GPL(cpu_smt_possible);

#else
static inline bool cpu_bootable(unsigned int cpu) { return true; }
#endif

static inline enum cpuhp_state
cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
        enum cpuhp_state prev_state = st->state;
        bool bringup = st->state < target;

        st->rollback = false;
        st->last = NULL;

        st->target = target;
        st->single = false;
        st->bringup = bringup;
        if (cpu_dying(cpu) != !bringup)
                set_cpu_dying(cpu, !bringup);

        return prev_state;
}

static inline void
cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
                  enum cpuhp_state prev_state)
{
        bool bringup = !st->bringup;

        st->target = prev_state;

        /*
         * Already rolling back. No need invert the bringup value or to change
         * the current state.
         */
        if (st->rollback)
                return;

        st->rollback = true;

        /*
         * If we have st->last we need to undo partial multi_instance of this
         * state first. Otherwise start undo at the previous state.
         */
        if (!st->last) {
                if (st->bringup)
                        st->state--;
                else
                        st->state++;
        }

        st->bringup = bringup;
        if (cpu_dying(cpu) != !bringup)
                set_cpu_dying(cpu, !bringup);
}

/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
        if (!st->single && st->state == st->target)
                return;

        st->result = 0;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
         */
        smp_mb();
        st->should_run = true;
        wake_up_process(st->thread);
        wait_for_ap_thread(st, st->bringup);
}

static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
                         enum cpuhp_state target)
{
        enum cpuhp_state prev_state;
        int ret;

        prev_state = cpuhp_set_state(cpu, st, target);
        __cpuhp_kick_ap(st);
        if ((ret = st->result)) {
                cpuhp_reset_state(cpu, st, prev_state);
                __cpuhp_kick_ap(st);
        }

        return ret;
}

static int bringup_wait_for_ap_online(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

        /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
        wait_for_ap_thread(st, true);
        if (WARN_ON_ONCE((!cpu_online(cpu))))
                return -ECANCELED;

        /* Unpark the hotplug thread of the target cpu */
        kthread_unpark(st->thread);

        /*
         * SMT soft disabling on X86 requires to bring the CPU out of the
         * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
         * CPU marked itself as booted_once in notify_cpu_starting() so the
         * cpu_bootable() check will now return false if this is not the
         * primary sibling.
         */
        if (!cpu_bootable(cpu))
                return -ECANCELED;
        return 0;
}

#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
static int cpuhp_kick_ap_alive(unsigned int cpu)
{
        if (!cpuhp_can_boot_ap(cpu))
                return -EAGAIN;

        return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
}

static int cpuhp_bringup_ap(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        int ret;

        /*
         * Some architectures have to walk the irq descriptors to
         * setup the vector space for the cpu which comes online.
         * Prevent irq alloc/free across the bringup.
         */
        irq_lock_sparse();

        ret = cpuhp_bp_sync_alive(cpu);
        if (ret)
                goto out_unlock;

        ret = bringup_wait_for_ap_online(cpu);
        if (ret)
                goto out_unlock;

        irq_unlock_sparse();

        if (st->target <= CPUHP_AP_ONLINE_IDLE)
                return 0;

        return cpuhp_kick_ap(cpu, st, st->target);

out_unlock:
        irq_unlock_sparse();
        return ret;
}
#else
static int bringup_cpu(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;

        if (!cpuhp_can_boot_ap(cpu))
                return -EAGAIN;

        /*
         * Some architectures have to walk the irq descriptors to
         * setup the vector space for the cpu which comes online.
         *
         * Prevent irq alloc/free across the bringup by acquiring the
         * sparse irq lock. Hold it until the upcoming CPU completes the
         * startup in cpuhp_online_idle() which allows to avoid
         * intermediate synchronization points in the architecture code.
         */
        irq_lock_sparse();

        ret = __cpu_up(cpu, idle);
        if (ret)
                goto out_unlock;

        ret = cpuhp_bp_sync_alive(cpu);
        if (ret)
                goto out_unlock;

        ret = bringup_wait_for_ap_online(cpu);
        if (ret)
                goto out_unlock;

        irq_unlock_sparse();

        if (st->target <= CPUHP_AP_ONLINE_IDLE)
                return 0;

        return cpuhp_kick_ap(cpu, st, st->target);

out_unlock:
        irq_unlock_sparse();
        return ret;
}
#endif

static int finish_cpu(unsigned int cpu)
{
        struct task_struct *idle = idle_thread_get(cpu);
        struct mm_struct *mm = idle->active_mm;

        /*
         * idle_task_exit() will have switched to &init_mm, now
         * clean up any remaining active_mm state.
         */
        if (mm != &init_mm)
                idle->active_mm = &init_mm;
        mmdrop_lazy_tlb(mm);
        return 0;
}

/*
 * Hotplug state machine related functions
 */

/*
 * Get the next state to run. Empty ones will be skipped. Returns true if a
 * state must be run.
 *
 * st->state will be modified ahead of time, to match state_to_run, as if it
 * has already ran.
 */
static bool cpuhp_next_state(bool bringup,
                             enum cpuhp_state *state_to_run,
                             struct cpuhp_cpu_state *st,
                             enum cpuhp_state target)
{
        do {
                if (bringup) {
                        if (st->state >= target)
                                return false;

                        *state_to_run = ++st->state;
                } else {
                        if (st->state <= target)
                                return false;

                        *state_to_run = st->state--;
                }

                if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
                        break;
        } while (true);

        return true;
}

static int __cpuhp_invoke_callback_range(bool bringup,
                                         unsigned int cpu,
                                         struct cpuhp_cpu_state *st,
                                         enum cpuhp_state target,
                                         bool nofail)
{
        enum cpuhp_state state;
        int ret = 0;

        while (cpuhp_next_state(bringup, &state, st, target)) {
                int err;

                err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
                if (!err)
                        continue;

                if (nofail) {
                        pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
                                cpu, bringup ? "UP" : "DOWN",
                                cpuhp_get_step(st->state)->name,
                                st->state, err);
                        ret = -1;
                } else {
                        ret = err;
                        break;
                }
        }

        return ret;
}

static inline int cpuhp_invoke_callback_range(bool bringup,
                                              unsigned int cpu,
                                              struct cpuhp_cpu_state *st,
                                              enum cpuhp_state target)
{
        return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
}

static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
                                                      unsigned int cpu,
                                                      struct cpuhp_cpu_state *st,
                                                      enum cpuhp_state target)
{
        __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
}

static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
{
        if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
                return true;
        /*
         * When CPU hotplug is disabled, then taking the CPU down is not
         * possible because takedown_cpu() and the architecture and
         * subsystem specific mechanisms are not available. So the CPU
         * which would be completely unplugged again needs to stay around
         * in the current state.
         */
        return st->state <= CPUHP_BRINGUP_CPU;
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                              enum cpuhp_state target)
{
        enum cpuhp_state prev_state = st->state;
        int ret = 0;

        ret = cpuhp_invoke_callback_range(true, cpu, st, target);
        if (ret) {
                pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
                         ret, cpu, cpuhp_get_step(st->state)->name,
                         st->state);

                cpuhp_reset_state(cpu, st, prev_state);
                if (can_rollback_cpu(st))
                        WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
                                                            prev_state));
        }
        return ret;
}

/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static int cpuhp_should_run(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

        return st->should_run;
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
 *
 * Each invocation of this function by the smpboot thread does a single AP
 * state callback.
 *
 * It has 3 modes of operation:
 *  - single: runs st->cb_state
 *  - up:     runs ++st->state, while st->state < st->target
 *  - down:   runs st->state--, while st->state > st->target
 *
 * When complete or on error, should_run is cleared and the completion is fired.
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
        bool bringup = st->bringup;
        enum cpuhp_state state;

        if (WARN_ON_ONCE(!st->should_run))
                return;

        /*
         * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
         * that if we see ->should_run we also see the rest of the state.
         */
        smp_mb();

        /*
         * The BP holds the hotplug lock, but we're now running on the AP,
         * ensure that anybody asserting the lock is held, will actually find
         * it so.
         */
        lockdep_acquire_cpus_lock();
        cpuhp_lock_acquire(bringup);

        if (st->single) {
                state = st->cb_state;
                st->should_run = false;
        } else {
                st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
                if (!st->should_run)
                        goto end;
        }

        WARN_ON_ONCE(!cpuhp_is_ap_state(state));

        if (cpuhp_is_atomic_state(state)) {
                local_irq_disable();
                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
                local_irq_enable();

                /*
                 * STARTING/DYING must not fail!
                 */
                WARN_ON_ONCE(st->result);
        } else {
                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
        }

        if (st->result) {
                /*
                 * If we fail on a rollback, we're up a creek without no
                 * paddle, no way forward, no way back. We loose, thanks for
                 * playing.
                 */
                WARN_ON_ONCE(st->rollback);
                st->should_run = false;
        }

end:
        cpuhp_lock_release(bringup);
        lockdep_release_cpus_lock();

        if (!st->should_run)
                complete_ap_thread(st, bringup);
}

/* Invoke a single callback on a remote cpu */
static int
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
                         struct hlist_node *node)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        int ret;

        if (!cpu_online(cpu))
                return 0;

        cpuhp_lock_acquire(false);
        cpuhp_lock_release(false);

        cpuhp_lock_acquire(true);
        cpuhp_lock_release(true);

        /*
         * If we are up and running, use the hotplug thread. For early calls
         * we invoke the thread function directly.
         */
        if (!st->thread)
                return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);

        st->rollback = false;
        st->last = NULL;

        st->node = node;
        st->bringup = bringup;
        st->cb_state = state;
        st->single = true;

        __cpuhp_kick_ap(st);

        /*
         * If we failed and did a partial, do a rollback.
         */
        if ((ret = st->result) && st->last) {
                st->rollback = true;
                st->bringup = !bringup;

                __cpuhp_kick_ap(st);
        }

        /*
         * Clean up the leftovers so the next hotplug operation wont use stale
         * data.
         */
        st->node = st->last = NULL;
        return ret;
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        enum cpuhp_state prev_state = st->state;
        int ret;

        cpuhp_lock_acquire(false);
        cpuhp_lock_release(false);

        cpuhp_lock_acquire(true);
        cpuhp_lock_release(true);

        trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
        ret = cpuhp_kick_ap(cpu, st, st->target);
        trace_cpuhp_exit(cpu, st->state, prev_state, ret);

        return ret;
}

static struct smp_hotplug_thread cpuhp_threads = {
        .store                        = &cpuhp_state.thread,
        .thread_should_run        = cpuhp_should_run,
        .thread_fn                = cpuhp_thread_fun,
        .thread_comm                = "cpuhp/%u",
        .selfparking                = true,
};

static __init void cpuhp_init_state(void)
{
        struct cpuhp_cpu_state *st;
        int cpu;

        for_each_possible_cpu(cpu) {
                st = per_cpu_ptr(&cpuhp_state, cpu);
                init_completion(&st->done_up);
                init_completion(&st->done_down);
        }
}

void __init cpuhp_threads_init(void)
{
        cpuhp_init_state();
        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
        kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

/*
 *
 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
 * protected region.
 *
 * The operation is still serialized against concurrent CPU hotplug via
 * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
 * serialized against other hotplug related activity like adding or
 * removing of state callbacks and state instances, which invoke either the
 * startup or the teardown callback of the affected state.
 *
 * This is required for subsystems which are unfixable vs. CPU hotplug and
 * evade lock inversion problems by scheduling work which has to be
 * completed _before_ cpu_up()/_cpu_down() returns.
 *
 * Don't even think about adding anything to this for any new code or even
 * drivers. It's only purpose is to keep existing lock order trainwrecks
 * working.
 *
 * For cpu_down() there might be valid reasons to finish cleanups which are
 * not required to be done under cpu_hotplug_lock, but that's a different
 * story and would be not invoked via this.
 */
static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
{
        /*
         * cpusets delegate hotplug operations to a worker to "solve" the
         * lock order problems. Wait for the worker, but only if tasks are
         * _not_ frozen (suspend, hibernate) as that would wait forever.
         *
         * The wait is required because otherwise the hotplug operation
         * returns with inconsistent state, which could even be observed in
         * user space when a new CPU is brought up. The CPU plug uevent
         * would be delivered and user space reacting on it would fail to
         * move tasks to the newly plugged CPU up to the point where the
         * work has finished because up to that point the newly plugged CPU
         * is not assignable in cpusets/cgroups. On unplug that's not
         * necessarily a visible issue, but it is still inconsistent state,
         * which is the real problem which needs to be "fixed". This can't
         * prevent the transient state between scheduling the work and
         * returning from waiting for it.
         */
        if (!tasks_frozen)
                cpuset_wait_for_hotplug();
}

#ifdef CONFIG_HOTPLUG_CPU
#ifndef arch_clear_mm_cpumask_cpu
#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
#endif

/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
void clear_tasks_mm_cpumask(int cpu)
{
        struct task_struct *p;

        /*
         * This function is called after the cpu is taken down and marked
         * offline, so its not like new tasks will ever get this cpu set in
         * their mm mask. -- Peter Zijlstra
         * Thus, we may use rcu_read_lock() here, instead of grabbing
         * full-fledged tasklist_lock.
         */
        WARN_ON(cpu_online(cpu));
        rcu_read_lock();
        for_each_process(p) {
                struct task_struct *t;

                /*
                 * Main thread might exit, but other threads may still have
                 * a valid mm. Find one.
                 */
                t = find_lock_task_mm(p);
                if (!t)
                        continue;
                arch_clear_mm_cpumask_cpu(cpu, t->mm);
                task_unlock(t);
        }
        rcu_read_unlock();
}

/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
        enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
        int err, cpu = smp_processor_id();

        /* Ensure this CPU doesn't handle any more interrupts. */
        err = __cpu_disable();
        if (err < 0)
                return err;

        /*
         * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
         * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
         */
        WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));

        /*
         * Invoke the former CPU_DYING callbacks. DYING must not fail!
         */
        cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

        /* Park the stopper thread */
        stop_machine_park(cpu);
        return 0;
}

static int takedown_cpu(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        int err;

        /* Park the smpboot threads */
        kthread_park(st->thread);

        /*
         * Prevent irq alloc/free while the dying cpu reorganizes the
         * interrupt affinities.
         */
        irq_lock_sparse();

        /*
         * So now all preempt/rcu users must observe !cpu_active().
         */
        err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
        if (err) {
                /* CPU refused to die */
                irq_unlock_sparse();
                /* Unpark the hotplug thread so we can rollback there */
                kthread_unpark(st->thread);
                return err;
        }
        BUG_ON(cpu_online(cpu));

        /*
         * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
         * all runnable tasks from the CPU, there's only the idle task left now
         * that the migration thread is done doing the stop_machine thing.
         *
         * Wait for the stop thread to go away.
         */
        wait_for_ap_thread(st, false);
        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);

        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
        irq_unlock_sparse();

        hotplug_cpu__broadcast_tick_pull(cpu);
        /* This actually kills the CPU. */
        __cpu_die(cpu);

        cpuhp_bp_sync_dead(cpu);

        tick_cleanup_dead_cpu(cpu);

        /*
         * Callbacks must be re-integrated right away to the RCU state machine.
         * Otherwise an RCU callback could block a further teardown function
         * waiting for its completion.
         */
        rcutree_migrate_callbacks(cpu);

        return 0;
}

static void cpuhp_complete_idle_dead(void *arg)
{
        struct cpuhp_cpu_state *st = arg;

        complete_ap_thread(st, false);
}

void cpuhp_report_idle_dead(void)
{
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

        BUG_ON(st->state != CPUHP_AP_OFFLINE);
        tick_assert_timekeeping_handover();
        rcutree_report_cpu_dead();
        st->state = CPUHP_AP_IDLE_DEAD;
        /*
         * We cannot call complete after rcutree_report_cpu_dead() so we delegate it
         * to an online cpu.
         */
        smp_call_function_single(cpumask_first(cpu_online_mask),
                                 cpuhp_complete_idle_dead, st, 0);
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                                enum cpuhp_state target)
{
        enum cpuhp_state prev_state = st->state;
        int ret = 0;

        ret = cpuhp_invoke_callback_range(false, cpu, st, target);
        if (ret) {
                pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
                         ret, cpu, cpuhp_get_step(st->state)->name,
                         st->state);

                cpuhp_reset_state(cpu, st, prev_state);

                if (st->state < prev_state)
                        WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
                                                            prev_state));
        }

        return ret;
}

/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
                           enum cpuhp_state target)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        int prev_state, ret = 0;

        if (num_online_cpus() == 1)
                return -EBUSY;

        if (!cpu_present(cpu))
                return -EINVAL;

        cpus_write_lock();

        cpuhp_tasks_frozen = tasks_frozen;

        prev_state = cpuhp_set_state(cpu, st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread.
         */
        if (st->state > CPUHP_TEARDOWN_CPU) {
                st->target = max((int)target, CPUHP_TEARDOWN_CPU);
                ret = cpuhp_kick_ap_work(cpu);
                /*
                 * The AP side has done the error rollback already. Just
                 * return the error code..
                 */
                if (ret)
                        goto out;

                /*
                 * We might have stopped still in the range of the AP hotplug
                 * thread. Nothing to do anymore.
                 */
                if (st->state > CPUHP_TEARDOWN_CPU)
                        goto out;

                st->target = target;
        }
        /*
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
         * to do the further cleanups.
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state < prev_state) {
                if (st->state == CPUHP_TEARDOWN_CPU) {
                        cpuhp_reset_state(cpu, st, prev_state);
                        __cpuhp_kick_ap(st);
                } else {
                        WARN(1, "DEAD callback error for CPU%d", cpu);
                }
        }

out:
        cpus_write_unlock();
        /*
         * Do post unplug cleanup. This is still protected against
         * concurrent CPU hotplug via cpu_add_remove_lock.
         */
        lockup_detector_cleanup();
        arch_smt_update();
        cpu_up_down_serialize_trainwrecks(tasks_frozen);
        return ret;
}

struct cpu_down_work {
        unsigned int                cpu;
        enum cpuhp_state        target;
};

static long __cpu_down_maps_locked(void *arg)
{
        struct cpu_down_work *work = arg;

        return _cpu_down(work->cpu, 0, work->target);
}

static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
        struct cpu_down_work work = { .cpu = cpu, .target = target, };

        /*
         * If the platform does not support hotplug, report it explicitly to
         * differentiate it from a transient offlining failure.
         */
        if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
                return -EOPNOTSUPP;
        if (cpu_hotplug_disabled)
                return -EBUSY;

        /*
         * Ensure that the control task does not run on the to be offlined
         * CPU to prevent a deadlock against cfs_b->period_timer.
         * Also keep at least one housekeeping cpu onlined to avoid generating
         * an empty sched_domain span.
         */
        for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
                if (cpu != work.cpu)
                        return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
        }
        return -EBUSY;
}

static int cpu_down(unsigned int cpu, enum cpuhp_state target)
{
        int err;

        cpu_maps_update_begin();
        err = cpu_down_maps_locked(cpu, target);
        cpu_maps_update_done();
        return err;
}

/**
 * cpu_device_down - Bring down a cpu device
 * @dev: Pointer to the cpu device to offline
 *
 * This function is meant to be used by device core cpu subsystem only.
 *
 * Other subsystems should use remove_cpu() instead.
 *
 * Return: %0 on success or a negative errno code
 */
int cpu_device_down(struct device *dev)
{
        return cpu_down(dev->id, CPUHP_OFFLINE);
}

int remove_cpu(unsigned int cpu)
{
        int ret;

        lock_device_hotplug();
        ret = device_offline(get_cpu_device(cpu));
        unlock_device_hotplug();

        return ret;
}
EXPORT_SYMBOL_GPL(remove_cpu);

void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
{
        unsigned int cpu;
        int error;

        cpu_maps_update_begin();

        /*
         * Make certain the cpu I'm about to reboot on is online.
         *
         * This is inline to what migrate_to_reboot_cpu() already do.
         */
        if (!cpu_online(primary_cpu))
                primary_cpu = cpumask_first(cpu_online_mask);

        for_each_online_cpu(cpu) {
                if (cpu == primary_cpu)
                        continue;

                error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
                if (error) {
                        pr_err("Failed to offline CPU%d - error=%d",
                                cpu, error);
                        break;
                }
        }

        /*
         * Ensure all but the reboot CPU are offline.
         */
        BUG_ON(num_online_cpus() > 1);

        /*
         * Make sure the CPUs won't be enabled by someone else after this
         * point. Kexec will reboot to a new kernel shortly resetting
         * everything along the way.
         */
        cpu_hotplug_disabled++;

        cpu_maps_update_done();
}

#else
#define takedown_cpu                NULL
#endif /*CONFIG_HOTPLUG_CPU*/

/**
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);

        rcutree_report_cpu_starting(cpu);        /* Enables RCU usage on this CPU. */
        cpumask_set_cpu(cpu, &cpus_booted_once_mask);

        /*
         * STARTING must not fail!
         */
        cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
}

/*
 * Called from the idle task. Wake up the controlling task which brings the
 * hotplug thread of the upcoming CPU up and then delegates the rest of the
 * online bringup to the hotplug thread.
 */
void cpuhp_online_idle(enum cpuhp_state state)
{
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

        /* Happens for the boot cpu */
        if (state != CPUHP_AP_ONLINE_IDLE)
                return;

        cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);

        /*
         * Unpark the stopper thread before we start the idle loop (and start
         * scheduling); this ensures the stopper task is always available.
         */
        stop_machine_unpark(smp_processor_id());

        st->state = CPUHP_AP_ONLINE_IDLE;
        complete_ap_thread(st, true);
}

/* Requires cpu_add_remove_lock to be held */
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct task_struct *idle;
        int ret = 0;

        cpus_write_lock();

        if (!cpu_present(cpu)) {
                ret = -EINVAL;
                goto out;
        }

        /*
         * The caller of cpu_up() might have raced with another
         * caller. Nothing to do.
         */
        if (st->state >= target)
                goto out;

        if (st->state == CPUHP_OFFLINE) {
                /* Let it fail before we try to bring the cpu up */
                idle = idle_thread_get(cpu);
                if (IS_ERR(idle)) {
                        ret = PTR_ERR(idle);
                        goto out;
                }

                /*
                 * Reset stale stack state from the last time this CPU was online.
                 */
                scs_task_reset(idle);
                kasan_unpoison_task_stack(idle);
        }

        cpuhp_tasks_frozen = tasks_frozen;

        cpuhp_set_state(cpu, st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread once more.
         */
        if (st->state > CPUHP_BRINGUP_CPU) {
                ret = cpuhp_kick_ap_work(cpu);
                /*
                 * The AP side has done the error rollback already. Just
                 * return the error code..
                 */
                if (ret)
                        goto out;
        }

        /*
         * Try to reach the target state. We max out on the BP at
         * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
         * responsible for bringing it up to the target state.
         */
        target = min((int)target, CPUHP_BRINGUP_CPU);
        ret = cpuhp_up_callbacks(cpu, st, target);
out:
        cpus_write_unlock();
        arch_smt_update();
        cpu_up_down_serialize_trainwrecks(tasks_frozen);
        return ret;
}

static int cpu_up(unsigned int cpu, enum cpuhp_state target)
{
        int err = 0;

        if (!cpu_possible(cpu)) {
                pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
                       cpu);
                return -EINVAL;
        }

        err = try_online_node(cpu_to_node(cpu));
        if (err)
                return err;

        cpu_maps_update_begin();

        if (cpu_hotplug_disabled) {
                err = -EBUSY;
                goto out;
        }
        if (!cpu_bootable(cpu)) {
                err = -EPERM;
                goto out;
        }

        err = _cpu_up(cpu, 0, target);
out:
        cpu_maps_update_done();
        return err;
}

/**
 * cpu_device_up - Bring up a cpu device
 * @dev: Pointer to the cpu device to online
 *
 * This function is meant to be used by device core cpu subsystem only.
 *
 * Other subsystems should use add_cpu() instead.
 *
 * Return: %0 on success or a negative errno code
 */
int cpu_device_up(struct device *dev)
{
        return cpu_up(dev->id, CPUHP_ONLINE);
}

int add_cpu(unsigned int cpu)
{
        int ret;

        lock_device_hotplug();
        ret = device_online(get_cpu_device(cpu));
        unlock_device_hotplug();

        return ret;
}
EXPORT_SYMBOL_GPL(add_cpu);

/**
 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
 * @sleep_cpu: The cpu we hibernated on and should be brought up.
 *
 * On some architectures like arm64, we can hibernate on any CPU, but on
 * wake up the CPU we hibernated on might be offline as a side effect of
 * using maxcpus= for example.
 *
 * Return: %0 on success or a negative errno code
 */
int bringup_hibernate_cpu(unsigned int sleep_cpu)
{
        int ret;

        if (!cpu_online(sleep_cpu)) {
                pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
                ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
                if (ret) {
                        pr_err("Failed to bring hibernate-CPU up!\n");
                        return ret;
                }
        }
        return 0;
}

static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
                                      enum cpuhp_state target)
{
        unsigned int cpu;

        for_each_cpu(cpu, mask) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

                if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
                        /*
                         * If this failed then cpu_up() might have only
                         * rolled back to CPUHP_BP_KICK_AP for the final
                         * online. Clean it up. NOOP if already rolled back.
                         */
                        WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
                }

                if (!--ncpus)
                        break;
        }
}

#ifdef CONFIG_HOTPLUG_PARALLEL
static bool __cpuhp_parallel_bringup __ro_after_init = true;

static int __init parallel_bringup_parse_param(char *arg)
{
        return kstrtobool(arg, &__cpuhp_parallel_bringup);
}
early_param("cpuhp.parallel", parallel_bringup_parse_param);

static inline bool cpuhp_smt_aware(void)
{
        return cpu_smt_max_threads > 1;
}

static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
{
        return cpu_primary_thread_mask;
}

/*
 * On architectures which have enabled parallel bringup this invokes all BP
 * prepare states for each of the to be onlined APs first. The last state
 * sends the startup IPI to the APs. The APs proceed through the low level
 * bringup code in parallel and then wait for the control CPU to release
 * them one by one for the final onlining procedure.
 *
 * This avoids waiting for each AP to respond to the startup IPI in
 * CPUHP_BRINGUP_CPU.
 */
static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
{
        const struct cpumask *mask = cpu_present_mask;

        if (__cpuhp_parallel_bringup)
                __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
        if (!__cpuhp_parallel_bringup)
                return false;

        if (cpuhp_smt_aware()) {
                const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
                static struct cpumask tmp_mask __initdata;

                /*
                 * X86 requires to prevent that SMT siblings stopped while
                 * the primary thread does a microcode update for various
                 * reasons. Bring the primary threads up first.
                 */
                cpumask_and(&tmp_mask, mask, pmask);
                cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
                cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
                /* Account for the online CPUs */
                ncpus -= num_online_cpus();
                if (!ncpus)
                        return true;
                /* Create the mask for secondary CPUs */
                cpumask_andnot(&tmp_mask, mask, pmask);
                mask = &tmp_mask;
        }

        /* Bring the not-yet started CPUs up */
        cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
        cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
        return true;
}
#else
static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
#endif /* CONFIG_HOTPLUG_PARALLEL */

void __init bringup_nonboot_cpus(unsigned int max_cpus)
{
        /* Try parallel bringup optimization if enabled */
        if (cpuhp_bringup_cpus_parallel(max_cpus))
                return;

        /* Full per CPU serialized bringup */
        cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE);
}

#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;

int freeze_secondary_cpus(int primary)
{
        int cpu, error = 0;

        cpu_maps_update_begin();
        if (primary == -1) {
                primary = cpumask_first(cpu_online_mask);
                if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
                        primary = housekeeping_any_cpu(HK_TYPE_TIMER);
        } else {
                if (!cpu_online(primary))
                        primary = cpumask_first(cpu_online_mask);
        }

        /*
         * We take down all of the non-boot CPUs in one shot to avoid races
         * with the userspace trying to use the CPU hotplug at the same time
         */
        cpumask_clear(frozen_cpus);

        pr_info("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
                if (cpu == primary)
                        continue;

                if (pm_wakeup_pending()) {
                        pr_info("Wakeup pending. Abort CPU freeze\n");
                        error = -EBUSY;
                        break;
                }

                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
                trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
                if (!error)
                        cpumask_set_cpu(cpu, frozen_cpus);
                else {
                        pr_err("Error taking CPU%d down: %d\n", cpu, error);
                        break;
                }
        }

        if (!error)
                BUG_ON(num_online_cpus() > 1);
        else
                pr_err("Non-boot CPUs are not disabled\n");

        /*
         * Make sure the CPUs won't be enabled by someone else. We need to do
         * this even in case of failure as all freeze_secondary_cpus() users are
         * supposed to do thaw_secondary_cpus() on the failure path.
         */
        cpu_hotplug_disabled++;

        cpu_maps_update_done();
        return error;
}

void __weak arch_thaw_secondary_cpus_begin(void)
{
}

void __weak arch_thaw_secondary_cpus_end(void)
{
}

void thaw_secondary_cpus(void)
{
        int cpu, error;

        /* Allow everyone to use the CPU hotplug again */
        cpu_maps_update_begin();
        __cpu_hotplug_enable();
        if (cpumask_empty(frozen_cpus))
                goto out;

        pr_info("Enabling non-boot CPUs ...\n");

        arch_thaw_secondary_cpus_begin();

        for_each_cpu(cpu, frozen_cpus) {
                trace_suspend_resume(TPS("CPU_ON"), cpu, true);
                error = _cpu_up(cpu, 1, CPUHP_ONLINE);
                trace_suspend_resume(TPS("CPU_ON"), cpu, false);
                if (!error) {
                        pr_info("CPU%d is up\n", cpu);
                        continue;
                }
                pr_warn("Error taking CPU%d up: %d\n", cpu, error);
        }

        arch_thaw_secondary_cpus_end();

        cpumask_clear(frozen_cpus);
out:
        cpu_maps_update_done();
}

static int __init alloc_frozen_cpus(void)
{
        if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
                return -ENOMEM;
        return 0;
}
core_initcall(alloc_frozen_cpus);

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
                        unsigned long action, void *ptr)
{
        switch (action) {

        case PM_SUSPEND_PREPARE:
        case PM_HIBERNATION_PREPARE:
                cpu_hotplug_disable();
                break;

        case PM_POST_SUSPEND:
        case PM_POST_HIBERNATION:
                cpu_hotplug_enable();
                break;

        default:
                return NOTIFY_DONE;
        }

        return NOTIFY_OK;
}


static int __init cpu_hotplug_pm_sync_init(void)
{
        /*
         * cpu_hotplug_pm_callback has higher priority than x86
         * bsp_pm_callback which depends on cpu_hotplug_pm_callback
         * to disable cpu hotplug to avoid cpu hotplug race.
         */
        pm_notifier(cpu_hotplug_pm_callback, 0);
        return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

#endif /* CONFIG_PM_SLEEP_SMP */

int __boot_cpu_id;

#endif /* CONFIG_SMP */

/* Boot processor state steps */
static struct cpuhp_step cpuhp_hp_states[] = {
        [CPUHP_OFFLINE] = {
                .name                        = "offline",
                .startup.single                = NULL,
                .teardown.single        = NULL,
        },
#ifdef CONFIG_SMP
        [CPUHP_CREATE_THREADS]= {
                .name                        = "threads:prepare",
                .startup.single                = smpboot_create_threads,
                .teardown.single        = NULL,
                .cant_stop                = true,
        },
        [CPUHP_PERF_PREPARE] = {
                .name                        = "perf:prepare",
                .startup.single                = perf_event_init_cpu,
                .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_RANDOM_PREPARE] = {
                .name                        = "random:prepare",
                .startup.single                = random_prepare_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_WORKQUEUE_PREP] = {
                .name                        = "workqueue:prepare",
                .startup.single                = workqueue_prepare_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_HRTIMERS_PREPARE] = {
                .name                        = "hrtimers:prepare",
                .startup.single                = hrtimers_prepare_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_SMPCFD_PREPARE] = {
                .name                        = "smpcfd:prepare",
                .startup.single                = smpcfd_prepare_cpu,
                .teardown.single        = smpcfd_dead_cpu,
        },
        [CPUHP_RELAY_PREPARE] = {
                .name                        = "relay:prepare",
                .startup.single                = relay_prepare_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_RCUTREE_PREP] = {
                .name                        = "RCU/tree:prepare",
                .startup.single                = rcutree_prepare_cpu,
                .teardown.single        = rcutree_dead_cpu,
        },
        /*
         * On the tear-down path, timers_dead_cpu() must be invoked
         * before blk_mq_queue_reinit_notify() from notify_dead(),
         * otherwise a RCU stall occurs.
         */
        [CPUHP_TIMERS_PREPARE] = {
                .name                        = "timers:prepare",
                .startup.single                = timers_prepare_cpu,
                .teardown.single        = timers_dead_cpu,
        },

#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
        /*
         * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
         * the next step will release it.
         */
        [CPUHP_BP_KICK_AP] = {
                .name                        = "cpu:kick_ap",
                .startup.single                = cpuhp_kick_ap_alive,
        },

        /*
         * Waits for the AP to reach cpuhp_ap_sync_alive() and then
         * releases it for the complete bringup.
         */
        [CPUHP_BRINGUP_CPU] = {
                .name                        = "cpu:bringup",
                .startup.single                = cpuhp_bringup_ap,
                .teardown.single        = finish_cpu,
                .cant_stop                = true,
        },
#else
        /*
         * All-in-one CPU bringup state which includes the kick alive.
         */
        [CPUHP_BRINGUP_CPU] = {
                .name                        = "cpu:bringup",
                .startup.single                = bringup_cpu,
                .teardown.single        = finish_cpu,
                .cant_stop                = true,
        },
#endif
        /* Final state before CPU kills itself */
        [CPUHP_AP_IDLE_DEAD] = {
                .name                        = "idle:dead",
        },
        /*
         * Last state before CPU enters the idle loop to die. Transient state
         * for synchronization.
         */
        [CPUHP_AP_OFFLINE] = {
                .name                        = "ap:offline",
                .cant_stop                = true,
        },
        /* First state is scheduler control. Interrupts are disabled */
        [CPUHP_AP_SCHED_STARTING] = {
                .name                        = "sched:starting",
                .startup.single                = sched_cpu_starting,
                .teardown.single        = sched_cpu_dying,
        },
        [CPUHP_AP_RCUTREE_DYING] = {
                .name                        = "RCU/tree:dying",
                .startup.single                = NULL,
                .teardown.single        = rcutree_dying_cpu,
        },
        [CPUHP_AP_SMPCFD_DYING] = {
                .name                        = "smpcfd:dying",
                .startup.single                = NULL,
                .teardown.single        = smpcfd_dying_cpu,
        },
        [CPUHP_AP_HRTIMERS_DYING] = {
                .name                        = "hrtimers:dying",
                .startup.single                = NULL,
                .teardown.single        = hrtimers_cpu_dying,
        },
        [CPUHP_AP_TICK_DYING] = {
                .name                        = "tick:dying",
                .startup.single                = NULL,
                .teardown.single        = tick_cpu_dying,
        },
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        [CPUHP_AP_ONLINE] = {
                .name                        = "ap:online",
        },
        /*
         * Handled on control processor until the plugged processor manages
         * this itself.
         */
        [CPUHP_TEARDOWN_CPU] = {
                .name                        = "cpu:teardown",
                .startup.single                = NULL,
                .teardown.single        = takedown_cpu,
                .cant_stop                = true,
        },

        [CPUHP_AP_SCHED_WAIT_EMPTY] = {
                .name                        = "sched:waitempty",
                .startup.single                = NULL,
                .teardown.single        = sched_cpu_wait_empty,
        },

        /* Handle smpboot threads park/unpark */
        [CPUHP_AP_SMPBOOT_THREADS] = {
                .name                        = "smpboot/threads:online",
                .startup.single                = smpboot_unpark_threads,
                .teardown.single        = smpboot_park_threads,
        },
        [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
                .name                        = "irq/affinity:online",
                .startup.single                = irq_affinity_online_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_AP_PERF_ONLINE] = {
                .name                        = "perf:online",
                .startup.single                = perf_event_init_cpu,
                .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_AP_WATCHDOG_ONLINE] = {
                .name                        = "lockup_detector:online",
                .startup.single                = lockup_detector_online_cpu,
                .teardown.single        = lockup_detector_offline_cpu,
        },
        [CPUHP_AP_WORKQUEUE_ONLINE] = {
                .name                        = "workqueue:online",
                .startup.single                = workqueue_online_cpu,
                .teardown.single        = workqueue_offline_cpu,
        },
        [CPUHP_AP_RANDOM_ONLINE] = {
                .name                        = "random:online",
                .startup.single                = random_online_cpu,
                .teardown.single        = NULL,
        },
        [CPUHP_AP_RCUTREE_ONLINE] = {
                .name                        = "RCU/tree:online",
                .startup.single                = rcutree_online_cpu,
                .teardown.single        = rcutree_offline_cpu,
        },
#endif
        /*
         * The dynamically registered state space is here
         */

#ifdef CONFIG_SMP
        /* Last state is scheduler control setting the cpu active */
        [CPUHP_AP_ACTIVE] = {
                .name                        = "sched:active",
                .startup.single                = sched_cpu_activate,
                .teardown.single        = sched_cpu_deactivate,
        },
#endif

        /* CPU is fully up and running. */
        [CPUHP_ONLINE] = {
                .name                        = "online",
                .startup.single                = NULL,
                .teardown.single        = NULL,
        },
};

/* Sanity check for callbacks */
static int cpuhp_cb_check(enum cpuhp_state state)
{
        if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
                return -EINVAL;
        return 0;
}

/*
 * Returns a free for dynamic slot assignment of the Online state. The states
 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
 * by having no name assigned.
 */
static int cpuhp_reserve_state(enum cpuhp_state state)
{
        enum cpuhp_state i, end;
        struct cpuhp_step *step;

        switch (state) {
        case CPUHP_AP_ONLINE_DYN:
                step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
                end = CPUHP_AP_ONLINE_DYN_END;
                break;
        case CPUHP_BP_PREPARE_DYN:
                step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
                end = CPUHP_BP_PREPARE_DYN_END;
                break;
        default:
                return -EINVAL;
        }

        for (i = state; i <= end; i++, step++) {
                if (!step->name)
                        return i;
        }
        WARN(1, "No more dynamic states available for CPU hotplug\n");
        return -ENOSPC;
}

static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
                                 int (*startup)(unsigned int cpu),
                                 int (*teardown)(unsigned int cpu),
                                 bool multi_instance)
{
        /* (Un)Install the callbacks for further cpu hotplug operations */
        struct cpuhp_step *sp;
        int ret = 0;

        /*
         * If name is NULL, then the state gets removed.
         *
         * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
         * the first allocation from these dynamic ranges, so the removal
         * would trigger a new allocation and clear the wrong (already
         * empty) state, leaving the callbacks of the to be cleared state
         * dangling, which causes wreckage on the next hotplug operation.
         */
        if (name && (state == CPUHP_AP_ONLINE_DYN ||
                     state == CPUHP_BP_PREPARE_DYN)) {
                ret = cpuhp_reserve_state(state);
                if (ret < 0)
                        return ret;
                state = ret;
        }
        sp = cpuhp_get_step(state);
        if (name && sp->name)
                return -EBUSY;

        sp->startup.single = startup;
        sp->teardown.single = teardown;
        sp->name = name;
        sp->multi_instance = multi_instance;
        INIT_HLIST_HEAD(&sp->list);
        return ret;
}

static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
        return cpuhp_get_step(state)->teardown.single;
}

/*
 * Call the startup/teardown function for a step either on the AP or
 * on the current CPU.
 */
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
                            struct hlist_node *node)
{
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;

        /*
         * If there's nothing to do, we done.
         * Relies on the union for multi_instance.
         */
        if (cpuhp_step_empty(bringup, sp))
                return 0;
        /*
         * The non AP bound callbacks can fail on bringup. On teardown
         * e.g. module removal we crash for now.
         */
#ifdef CONFIG_SMP
        if (cpuhp_is_ap_state(state))
                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
                ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
#else
        ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
#endif
        BUG_ON(ret && !bringup);
        return ret;
}

/*
 * Called from __cpuhp_setup_state on a recoverable failure.
 *
 * Note: The teardown callbacks for rollback are not allowed to fail!
 */
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
                                   struct hlist_node *node)
{
        int cpu;

        /* Roll back the already executed steps on the other cpus */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
                int cpustate = st->state;

                if (cpu >= failedcpu)
                        break;

                /* Did we invoke the startup call on that cpu ? */
                if (cpustate >= state)
                        cpuhp_issue_call(cpu, state, false, node);
        }
}

int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
                                          struct hlist_node *node,
                                          bool invoke)
{
        struct cpuhp_step *sp;
        int cpu;
        int ret;

        lockdep_assert_cpus_held();

        sp = cpuhp_get_step(state);
        if (sp->multi_instance == false)
                return -EINVAL;

        mutex_lock(&cpuhp_state_mutex);

        if (!invoke || !sp->startup.multi)
                goto add_node;

        /*
         * Try to call the startup callback for each present cpu
         * depending on the hotplug state of the cpu.
         */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
                int cpustate = st->state;

                if (cpustate < state)
                        continue;

                ret = cpuhp_issue_call(cpu, state, true, node);
                if (ret) {
                        if (sp->teardown.multi)
                                cpuhp_rollback_install(cpu, state, node);
                        goto unlock;
                }
        }
add_node:
        ret = 0;
        hlist_add_head(node, &sp->list);
unlock:
        mutex_unlock(&cpuhp_state_mutex);
        return ret;
}

int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                               bool invoke)
{
        int ret;

        cpus_read_lock();
        ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
        cpus_read_unlock();
        return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);

/**
 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
 * @state:                The state to setup
 * @name:                Name of the step
 * @invoke:                If true, the startup function is invoked for cpus where
 *                        cpu state >= @state
 * @startup:                startup callback function
 * @teardown:                teardown callback function
 * @multi_instance:        State is set up for multiple instances which get
 *                        added afterwards.
 *
 * The caller needs to hold cpus read locked while calling this function.
 * Return:
 *   On success:
 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN;
 *      0 for all other states
 *   On failure: proper (negative) error code
 */
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
                                   const char *name, bool invoke,
                                   int (*startup)(unsigned int cpu),
                                   int (*teardown)(unsigned int cpu),
                                   bool multi_instance)
{
        int cpu, ret = 0;
        bool dynstate;

        lockdep_assert_cpus_held();

        if (cpuhp_cb_check(state) || !name)
                return -EINVAL;

        mutex_lock(&cpuhp_state_mutex);

        ret = cpuhp_store_callbacks(state, name, startup, teardown,
                                    multi_instance);

        dynstate = state == CPUHP_AP_ONLINE_DYN;
        if (ret > 0 && dynstate) {
                state = ret;
                ret = 0;
        }

        if (ret || !invoke || !startup)
                goto out;

        /*
         * Try to call the startup callback for each present cpu
         * depending on the hotplug state of the cpu.
         */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
                int cpustate = st->state;

                if (cpustate < state)
                        continue;

                ret = cpuhp_issue_call(cpu, state, true, NULL);
                if (ret) {
                        if (teardown)
                                cpuhp_rollback_install(cpu, state, NULL);
                        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
                        goto out;
                }
        }
out:
        mutex_unlock(&cpuhp_state_mutex);
        /*
         * If the requested state is CPUHP_AP_ONLINE_DYN, return the
         * dynamically allocated state in case of success.
         */
        if (!ret && dynstate)
                return state;
        return ret;
}
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);

int __cpuhp_setup_state(enum cpuhp_state state,
                        const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
                        int (*teardown)(unsigned int cpu),
                        bool multi_instance)
{
        int ret;

        cpus_read_lock();
        ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
                                             teardown, multi_instance);
        cpus_read_unlock();
        return ret;
}
EXPORT_SYMBOL(__cpuhp_setup_state);

int __cpuhp_state_remove_instance(enum cpuhp_state state,
                                  struct hlist_node *node, bool invoke)
{
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int cpu;

        BUG_ON(cpuhp_cb_check(state));

        if (!sp->multi_instance)
                return -EINVAL;

        cpus_read_lock();
        mutex_lock(&cpuhp_state_mutex);

        if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;
        /*
         * Call the teardown callback for each present cpu depending
         * on the hotplug state of the cpu. This function is not
         * allowed to fail currently!
         */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
                int cpustate = st->state;

                if (cpustate >= state)
                        cpuhp_issue_call(cpu, state, false, node);
        }

remove:
        hlist_del(node);
        mutex_unlock(&cpuhp_state_mutex);
        cpus_read_unlock();

        return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);

/**
 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
 * @state:        The state to remove
 * @invoke:        If true, the teardown function is invoked for cpus where
 *                cpu state >= @state
 *
 * The caller needs to hold cpus read locked while calling this function.
 * The teardown callback is currently not allowed to fail. Think
 * about module removal!
 */
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
{
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int cpu;

        BUG_ON(cpuhp_cb_check(state));

        lockdep_assert_cpus_held();

        mutex_lock(&cpuhp_state_mutex);
        if (sp->multi_instance) {
                WARN(!hlist_empty(&sp->list),
                     "Error: Removing state %d which has instances left.\n",
                     state);
                goto remove;
        }

        if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;

        /*
         * Call the teardown callback for each present cpu depending
         * on the hotplug state of the cpu. This function is not
         * allowed to fail currently!
         */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
                int cpustate = st->state;

                if (cpustate >= state)
                        cpuhp_issue_call(cpu, state, false, NULL);
        }
remove:
        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
        mutex_unlock(&cpuhp_state_mutex);
}
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);

void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
        cpus_read_lock();
        __cpuhp_remove_state_cpuslocked(state, invoke);
        cpus_read_unlock();
}
EXPORT_SYMBOL(__cpuhp_remove_state);

#ifdef CONFIG_HOTPLUG_SMT
static void cpuhp_offline_cpu_device(unsigned int cpu)
{
        struct device *dev = get_cpu_device(cpu);

        dev->offline = true;
        /* Tell user space about the state change */
        kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
}

static void cpuhp_online_cpu_device(unsigned int cpu)
{
        struct device *dev = get_cpu_device(cpu);

        dev->offline = false;
        /* Tell user space about the state change */
        kobject_uevent(&dev->kobj, KOBJ_ONLINE);
}

int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
{
        int cpu, ret = 0;

        cpu_maps_update_begin();
        for_each_online_cpu(cpu) {
                if (topology_is_primary_thread(cpu))
                        continue;
                /*
                 * Disable can be called with CPU_SMT_ENABLED when changing
                 * from a higher to lower number of SMT threads per core.
                 */
                if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
                        continue;
                ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
                if (ret)
                        break;
                /*
                 * As this needs to hold the cpu maps lock it's impossible
                 * to call device_offline() because that ends up calling
                 * cpu_down() which takes cpu maps lock. cpu maps lock
                 * needs to be held as this might race against in kernel
                 * abusers of the hotplug machinery (thermal management).
                 *
                 * So nothing would update device:offline state. That would
                 * leave the sysfs entry stale and prevent onlining after
                 * smt control has been changed to 'off' again. This is
                 * called under the sysfs hotplug lock, so it is properly
                 * serialized against the regular offline usage.
                 */
                cpuhp_offline_cpu_device(cpu);
        }
        if (!ret)
                cpu_smt_control = ctrlval;
        cpu_maps_update_done();
        return ret;
}

int cpuhp_smt_enable(void)
{
        int cpu, ret = 0;

        cpu_maps_update_begin();
        cpu_smt_control = CPU_SMT_ENABLED;
        for_each_present_cpu(cpu) {
                /* Skip online CPUs and CPUs on offline nodes */
                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
                        continue;
                if (!cpu_smt_thread_allowed(cpu))
                        continue;
                ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
                if (ret)
                        break;
                /* See comment in cpuhp_smt_disable() */
                cpuhp_online_cpu_device(cpu);
        }
        cpu_maps_update_done();
        return ret;
}
#endif

#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
static ssize_t state_show(struct device *dev,
                          struct device_attribute *attr, char *buf)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

        return sprintf(buf, "%d\n", st->state);
}
static DEVICE_ATTR_RO(state);

static ssize_t target_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
        struct cpuhp_step *sp;
        int target, ret;

        ret = kstrtoint(buf, 10, &target);
        if (ret)
                return ret;

#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
        if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
                return -EINVAL;
#else
        if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
                return -EINVAL;
#endif

        ret = lock_device_hotplug_sysfs();
        if (ret)
                return ret;

        mutex_lock(&cpuhp_state_mutex);
        sp = cpuhp_get_step(target);
        ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
        mutex_unlock(&cpuhp_state_mutex);
        if (ret)
                goto out;

        if (st->state < target)
                ret = cpu_up(dev->id, target);
        else if (st->state > target)
                ret = cpu_down(dev->id, target);
        else if (WARN_ON(st->target != target))
                st->target = target;
out:
        unlock_device_hotplug();
        return ret ? ret : count;
}

static ssize_t target_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

        return sprintf(buf, "%d\n", st->target);
}
static DEVICE_ATTR_RW(target);

static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
        struct cpuhp_step *sp;
        int fail, ret;

        ret = kstrtoint(buf, 10, &fail);
        if (ret)
                return ret;

        if (fail == CPUHP_INVALID) {
                st->fail = fail;
                return count;
        }

        if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
                return -EINVAL;

        /*
         * Cannot fail STARTING/DYING callbacks.
         */
        if (cpuhp_is_atomic_state(fail))
                return -EINVAL;

        /*
         * DEAD callbacks cannot fail...
         * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
         * triggering STARTING callbacks, a failure in this state would
         * hinder rollback.
         */
        if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
                return -EINVAL;

        /*
         * Cannot fail anything that doesn't have callbacks.
         */
        mutex_lock(&cpuhp_state_mutex);
        sp = cpuhp_get_step(fail);
        if (!sp->startup.single && !sp->teardown.single)
                ret = -EINVAL;
        mutex_unlock(&cpuhp_state_mutex);
        if (ret)
                return ret;

        st->fail = fail;

        return count;
}

static ssize_t fail_show(struct device *dev,
                         struct device_attribute *attr, char *buf)
{
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

        return sprintf(buf, "%d\n", st->fail);
}

static DEVICE_ATTR_RW(fail);

static struct attribute *cpuhp_cpu_attrs[] = {
        &dev_attr_state.attr,
        &dev_attr_target.attr,
        &dev_attr_fail.attr,
        NULL
};

static const struct attribute_group cpuhp_cpu_attr_group = {
        .attrs = cpuhp_cpu_attrs,
        .name = "hotplug",
        NULL
};

static ssize_t states_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        ssize_t cur, res = 0;
        int i;

        mutex_lock(&cpuhp_state_mutex);
        for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
                struct cpuhp_step *sp = cpuhp_get_step(i);

                if (sp->name) {
                        cur = sprintf(buf, "%3d: %s\n", i, sp->name);
                        buf += cur;
                        res += cur;
                }
        }
        mutex_unlock(&cpuhp_state_mutex);
        return res;
}
static DEVICE_ATTR_RO(states);

static struct attribute *cpuhp_cpu_root_attrs[] = {
        &dev_attr_states.attr,
        NULL
};

static const struct attribute_group cpuhp_cpu_root_attr_group = {
        .attrs = cpuhp_cpu_root_attrs,
        .name = "hotplug",
        NULL
};

#ifdef CONFIG_HOTPLUG_SMT

static bool cpu_smt_num_threads_valid(unsigned int threads)
{
        if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
                return threads >= 1 && threads <= cpu_smt_max_threads;
        return threads == 1 || threads == cpu_smt_max_threads;
}

static ssize_t
__store_smt_control(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        int ctrlval, ret, num_threads, orig_threads;
        bool force_off;

        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
                return -EPERM;

        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
                return -ENODEV;

        if (sysfs_streq(buf, "on")) {
                ctrlval = CPU_SMT_ENABLED;
                num_threads = cpu_smt_max_threads;
        } else if (sysfs_streq(buf, "off")) {
                ctrlval = CPU_SMT_DISABLED;
                num_threads = 1;
        } else if (sysfs_streq(buf, "forceoff")) {
                ctrlval = CPU_SMT_FORCE_DISABLED;
                num_threads = 1;
        } else if (kstrtoint(buf, 10, &num_threads) == 0) {
                if (num_threads == 1)
                        ctrlval = CPU_SMT_DISABLED;
                else if (cpu_smt_num_threads_valid(num_threads))
                        ctrlval = CPU_SMT_ENABLED;
                else
                        return -EINVAL;
        } else {
                return -EINVAL;
        }

        ret = lock_device_hotplug_sysfs();
        if (ret)
                return ret;

        orig_threads = cpu_smt_num_threads;
        cpu_smt_num_threads = num_threads;

        force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;

        if (num_threads > orig_threads)
                ret = cpuhp_smt_enable();
        else if (num_threads < orig_threads || force_off)
                ret = cpuhp_smt_disable(ctrlval);

        unlock_device_hotplug();
        return ret ? ret : count;
}

#else /* !CONFIG_HOTPLUG_SMT */
static ssize_t
__store_smt_control(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        return -ENODEV;
}
#endif /* CONFIG_HOTPLUG_SMT */

static const char *smt_states[] = {
        [CPU_SMT_ENABLED]                = "on",
        [CPU_SMT_DISABLED]                = "off",
        [CPU_SMT_FORCE_DISABLED]        = "forceoff",
        [CPU_SMT_NOT_SUPPORTED]                = "notsupported",
        [CPU_SMT_NOT_IMPLEMENTED]        = "notimplemented",
};

static ssize_t control_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        const char *state = smt_states[cpu_smt_control];

#ifdef CONFIG_HOTPLUG_SMT
        /*
         * If SMT is enabled but not all threads are enabled then show the
         * number of threads. If all threads are enabled show "on". Otherwise
         * show the state name.
         */
        if (cpu_smt_control == CPU_SMT_ENABLED &&
            cpu_smt_num_threads != cpu_smt_max_threads)
                return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
#endif

        return sysfs_emit(buf, "%s\n", state);
}

static ssize_t control_store(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t count)
{
        return __store_smt_control(dev, attr, buf, count);
}
static DEVICE_ATTR_RW(control);

static ssize_t active_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        return sysfs_emit(buf, "%d\n", sched_smt_active());
}
static DEVICE_ATTR_RO(active);

static struct attribute *cpuhp_smt_attrs[] = {
        &dev_attr_control.attr,
        &dev_attr_active.attr,
        NULL
};

static const struct attribute_group cpuhp_smt_attr_group = {
        .attrs = cpuhp_smt_attrs,
        .name = "smt",
        NULL
};

static int __init cpu_smt_sysfs_init(void)
{
        struct device *dev_root;
        int ret = -ENODEV;

        dev_root = bus_get_dev_root(&cpu_subsys);
        if (dev_root) {
                ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
                put_device(dev_root);
        }
        return ret;
}

static int __init cpuhp_sysfs_init(void)
{
        struct device *dev_root;
        int cpu, ret;

        ret = cpu_smt_sysfs_init();
        if (ret)
                return ret;

        dev_root = bus_get_dev_root(&cpu_subsys);
        if (dev_root) {
                ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
                put_device(dev_root);
                if (ret)
                        return ret;
        }

        for_each_possible_cpu(cpu) {
                struct device *dev = get_cpu_device(cpu);

                if (!dev)
                        continue;
                ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
                if (ret)
                        return ret;
        }
        return 0;
}
device_initcall(cpuhp_sysfs_init);
#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */

/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
 * It is used by cpumask_of() to get a constant address to a CPU
 * mask value that has a single bit set only.
 */

/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)        [x+1][0] = (1UL << (x))
#define MASK_DECLARE_2(x)        MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)        MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)        MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)

const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

        MASK_DECLARE_8(0),        MASK_DECLARE_8(8),
        MASK_DECLARE_8(16),        MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
        MASK_DECLARE_8(32),        MASK_DECLARE_8(40),
        MASK_DECLARE_8(48),        MASK_DECLARE_8(56),
#endif
};
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);

#ifdef CONFIG_INIT_ALL_POSSIBLE
struct cpumask __cpu_possible_mask __ro_after_init
        = {CPU_BITS_ALL};
#else
struct cpumask __cpu_possible_mask __ro_after_init;
#endif
EXPORT_SYMBOL(__cpu_possible_mask);

struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);

struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);

struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);

struct cpumask __cpu_dying_mask __read_mostly;
EXPORT_SYMBOL(__cpu_dying_mask);

atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);

void init_cpu_present(const struct cpumask *src)
{
        cpumask_copy(&__cpu_present_mask, src);
}

void init_cpu_possible(const struct cpumask *src)
{
        cpumask_copy(&__cpu_possible_mask, src);
}

void init_cpu_online(const struct cpumask *src)
{
        cpumask_copy(&__cpu_online_mask, src);
}

void set_cpu_online(unsigned int cpu, bool online)
{
        /*
         * atomic_inc/dec() is required to handle the horrid abuse of this
         * function by the reboot and kexec code which invoke it from
         * IPI/NMI broadcasts when shutting down CPUs. Invocation from
         * regular CPU hotplug is properly serialized.
         *
         * Note, that the fact that __num_online_cpus is of type atomic_t
         * does not protect readers which are not serialized against
         * concurrent hotplug operations.
         */
        if (online) {
                if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
                        atomic_inc(&__num_online_cpus);
        } else {
                if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
                        atomic_dec(&__num_online_cpus);
        }
}

/*
 * Activate the first processor.
 */
void __init boot_cpu_init(void)
{
        int cpu = smp_processor_id();

        /* Mark the boot cpu "present", "online" etc for SMP and UP case */
        set_cpu_online(cpu, true);
        set_cpu_active(cpu, true);
        set_cpu_present(cpu, true);
        set_cpu_possible(cpu, true);

#ifdef CONFIG_SMP
        __boot_cpu_id = cpu;
#endif
}

/*
 * Must be called _AFTER_ setting up the per_cpu areas
 */
void __init boot_cpu_hotplug_init(void)
{
#ifdef CONFIG_SMP
        cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
        atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
#endif
        this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
        this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
}

#ifdef CONFIG_CPU_MITIGATIONS
/*
 * These are used for a global "mitigations=" cmdline option for toggling
 * optional CPU mitigations.
 */
enum cpu_mitigations {
        CPU_MITIGATIONS_OFF,
        CPU_MITIGATIONS_AUTO,
        CPU_MITIGATIONS_AUTO_NOSMT,
};

static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;

static int __init mitigations_parse_cmdline(char *arg)
{
        if (!strcmp(arg, "off"))
                cpu_mitigations = CPU_MITIGATIONS_OFF;
        else if (!strcmp(arg, "auto"))
                cpu_mitigations = CPU_MITIGATIONS_AUTO;
        else if (!strcmp(arg, "auto,nosmt"))
                cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
        else
                pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
                        arg);

        return 0;
}

/* mitigations=off */
bool cpu_mitigations_off(void)
{
        return cpu_mitigations == CPU_MITIGATIONS_OFF;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_off);

/* mitigations=auto,nosmt */
bool cpu_mitigations_auto_nosmt(void)
{
        return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
#else
static int __init mitigations_parse_cmdline(char *arg)
{
        pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n");
        return 0;
}
#endif
early_param("mitigations", mitigations_parse_cmdline);

























































































    1 





















































































    1 



















































































































































    1 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Driver for the MasterKit MA901 USB FM radio. This device plugs
 * into the USB port and an analog audio input or headphones, so this thing
 * only deals with initialization, frequency setting, volume.
 *
 * Copyright (c) 2012 Alexey Klimov <klimov.linux@gmail.com>
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/videodev2.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>
#include <linux/usb.h>
#include <linux/mutex.h>

#define DRIVER_AUTHOR "Alexey Klimov <klimov.linux@gmail.com>"
#define DRIVER_DESC "Masterkit MA901 USB FM radio driver"
#define DRIVER_VERSION "0.0.1"

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
MODULE_VERSION(DRIVER_VERSION);

#define USB_MA901_VENDOR  0x16c0
#define USB_MA901_PRODUCT 0x05df

/* dev_warn macro with driver name */
#define MA901_DRIVER_NAME "radio-ma901"
#define ma901radio_dev_warn(dev, fmt, arg...)                                \
                dev_warn(dev, MA901_DRIVER_NAME " - " fmt, ##arg)

#define ma901radio_dev_err(dev, fmt, arg...) \
                dev_err(dev, MA901_DRIVER_NAME " - " fmt, ##arg)

/* Probably USB_TIMEOUT should be modified in module parameter */
#define BUFFER_LENGTH 8
#define USB_TIMEOUT 500

#define FREQ_MIN  87.5
#define FREQ_MAX 108.0
#define FREQ_MUL 16000

#define MA901_VOLUME_MAX 16
#define MA901_VOLUME_MIN 0

/* Commands that device should understand
 * List isn't full and will be updated with implementation of new functions
 */
#define MA901_RADIO_SET_FREQ                0x03
#define MA901_RADIO_SET_VOLUME                0x04
#define MA901_RADIO_SET_MONO_STEREO        0x05

/* Comfortable defines for ma901radio_set_stereo */
#define MA901_WANT_STEREO                0x50
#define MA901_WANT_MONO                        0xd0

/* module parameter */
static int radio_nr = -1;
module_param(radio_nr, int, 0);
MODULE_PARM_DESC(radio_nr, "Radio file number");

/* Data for one (physical) device */
struct ma901radio_device {
        /* reference to USB and video device */
        struct usb_device *usbdev;
        struct usb_interface *intf;
        struct video_device vdev;
        struct v4l2_device v4l2_dev;
        struct v4l2_ctrl_handler hdl;

        u8 *buffer;
        struct mutex lock;        /* buffer locking */
        int curfreq;
        u16 volume;
        int stereo;
        bool muted;
};

static inline struct ma901radio_device *to_ma901radio_dev(struct v4l2_device *v4l2_dev)
{
        return container_of(v4l2_dev, struct ma901radio_device, v4l2_dev);
}

/* set a frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */
static int ma901radio_set_freq(struct ma901radio_device *radio, int freq)
{
        unsigned int freq_send = 0x300 + (freq >> 5) / 25;
        int retval;

        radio->buffer[0] = 0x0a;
        radio->buffer[1] = MA901_RADIO_SET_FREQ;
        radio->buffer[2] = ((freq_send >> 8) & 0xff) + 0x80;
        radio->buffer[3] = freq_send & 0xff;
        radio->buffer[4] = 0x00;
        radio->buffer[5] = 0x00;
        radio->buffer[6] = 0x00;
        radio->buffer[7] = 0x00;

        retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                                9, 0x21, 0x0300, 0,
                                radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);
        if (retval < 0)
                return retval;

        radio->curfreq = freq;
        return 0;
}

static int ma901radio_set_volume(struct ma901radio_device *radio, u16 vol_to_set)
{
        int retval;

        radio->buffer[0] = 0x0a;
        radio->buffer[1] = MA901_RADIO_SET_VOLUME;
        radio->buffer[2] = 0xc2;
        radio->buffer[3] = vol_to_set + 0x20;
        radio->buffer[4] = 0x00;
        radio->buffer[5] = 0x00;
        radio->buffer[6] = 0x00;
        radio->buffer[7] = 0x00;

        retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                                9, 0x21, 0x0300, 0,
                                radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);
        if (retval < 0)
                return retval;

        radio->volume = vol_to_set;
        return retval;
}

static int ma901_set_stereo(struct ma901radio_device *radio, u8 stereo)
{
        int retval;

        radio->buffer[0] = 0x0a;
        radio->buffer[1] = MA901_RADIO_SET_MONO_STEREO;
        radio->buffer[2] = stereo;
        radio->buffer[3] = 0x00;
        radio->buffer[4] = 0x00;
        radio->buffer[5] = 0x00;
        radio->buffer[6] = 0x00;
        radio->buffer[7] = 0x00;

        retval = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                                9, 0x21, 0x0300, 0,
                                radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);

        if (retval < 0)
                return retval;

        if (stereo == MA901_WANT_STEREO)
                radio->stereo = V4L2_TUNER_MODE_STEREO;
        else
                radio->stereo = V4L2_TUNER_MODE_MONO;

        return retval;
}

/* Handle unplugging the device.
 * We call video_unregister_device in any case.
 * The last function called in this procedure is
 * usb_ma901radio_device_release.
 */
static void usb_ma901radio_disconnect(struct usb_interface *intf)
{
        struct ma901radio_device *radio = to_ma901radio_dev(usb_get_intfdata(intf));

        mutex_lock(&radio->lock);
        video_unregister_device(&radio->vdev);
        usb_set_intfdata(intf, NULL);
        v4l2_device_disconnect(&radio->v4l2_dev);
        mutex_unlock(&radio->lock);
        v4l2_device_put(&radio->v4l2_dev);
}

/* vidioc_querycap - query device capabilities */
static int vidioc_querycap(struct file *file, void *priv,
                                        struct v4l2_capability *v)
{
        struct ma901radio_device *radio = video_drvdata(file);

        strscpy(v->driver, "radio-ma901", sizeof(v->driver));
        strscpy(v->card, "Masterkit MA901 USB FM Radio", sizeof(v->card));
        usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info));
        return 0;
}

/* vidioc_g_tuner - get tuner attributes */
static int vidioc_g_tuner(struct file *file, void *priv,
                                struct v4l2_tuner *v)
{
        struct ma901radio_device *radio = video_drvdata(file);

        if (v->index > 0)
                return -EINVAL;

        v->signal = 0;

        /* TODO: the same words like in _probe() goes here.
         * When receiving of stats will be implemented then we can call
         * ma901radio_get_stat().
         * retval = ma901radio_get_stat(radio, &is_stereo, &v->signal);
         */

        strscpy(v->name, "FM", sizeof(v->name));
        v->type = V4L2_TUNER_RADIO;
        v->rangelow = FREQ_MIN * FREQ_MUL;
        v->rangehigh = FREQ_MAX * FREQ_MUL;
        v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO;
        /* v->rxsubchans = is_stereo ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; */
        v->audmode = radio->stereo ?
                V4L2_TUNER_MODE_STEREO : V4L2_TUNER_MODE_MONO;
        return 0;
}

/* vidioc_s_tuner - set tuner attributes */
static int vidioc_s_tuner(struct file *file, void *priv,
                                const struct v4l2_tuner *v)
{
        struct ma901radio_device *radio = video_drvdata(file);

        if (v->index > 0)
                return -EINVAL;

        /* mono/stereo selector */
        switch (v->audmode) {
        case V4L2_TUNER_MODE_MONO:
                return ma901_set_stereo(radio, MA901_WANT_MONO);
        default:
                return ma901_set_stereo(radio, MA901_WANT_STEREO);
        }
}

/* vidioc_s_frequency - set tuner radio frequency */
static int vidioc_s_frequency(struct file *file, void *priv,
                                const struct v4l2_frequency *f)
{
        struct ma901radio_device *radio = video_drvdata(file);

        if (f->tuner != 0)
                return -EINVAL;

        return ma901radio_set_freq(radio, clamp_t(unsigned, f->frequency,
                                FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL));
}

/* vidioc_g_frequency - get tuner radio frequency */
static int vidioc_g_frequency(struct file *file, void *priv,
                                struct v4l2_frequency *f)
{
        struct ma901radio_device *radio = video_drvdata(file);

        if (f->tuner != 0)
                return -EINVAL;
        f->frequency = radio->curfreq;

        return 0;
}

static int usb_ma901radio_s_ctrl(struct v4l2_ctrl *ctrl)
{
        struct ma901radio_device *radio =
                container_of(ctrl->handler, struct ma901radio_device, hdl);

        switch (ctrl->id) {
        case V4L2_CID_AUDIO_VOLUME:     /* set volume */
                return ma901radio_set_volume(radio, (u16)ctrl->val);
        }

        return -EINVAL;
}

/* TODO: Should we really need to implement suspend and resume functions?
 * Radio has it's own memory and will continue playing if power is present
 * on usb port and on resume it will start to play again based on freq, volume
 * values in device memory.
 */
static int usb_ma901radio_suspend(struct usb_interface *intf, pm_message_t message)
{
        return 0;
}

static int usb_ma901radio_resume(struct usb_interface *intf)
{
        return 0;
}

static const struct v4l2_ctrl_ops usb_ma901radio_ctrl_ops = {
        .s_ctrl = usb_ma901radio_s_ctrl,
};

/* File system interface */
static const struct v4l2_file_operations usb_ma901radio_fops = {
        .owner                = THIS_MODULE,
        .open                = v4l2_fh_open,
        .release        = v4l2_fh_release,
        .poll                = v4l2_ctrl_poll,
        .unlocked_ioctl        = video_ioctl2,
};

static const struct v4l2_ioctl_ops usb_ma901radio_ioctl_ops = {
        .vidioc_querycap    = vidioc_querycap,
        .vidioc_g_tuner     = vidioc_g_tuner,
        .vidioc_s_tuner     = vidioc_s_tuner,
        .vidioc_g_frequency = vidioc_g_frequency,
        .vidioc_s_frequency = vidioc_s_frequency,
        .vidioc_log_status  = v4l2_ctrl_log_status,
        .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
};

static void usb_ma901radio_release(struct v4l2_device *v4l2_dev)
{
        struct ma901radio_device *radio = to_ma901radio_dev(v4l2_dev);

        v4l2_ctrl_handler_free(&radio->hdl);
        v4l2_device_unregister(&radio->v4l2_dev);
        kfree(radio->buffer);
        kfree(radio);
}

/* check if the device is present and register with v4l and usb if it is */
static int usb_ma901radio_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        struct ma901radio_device *radio;
        int retval = 0;

        /* Masterkit MA901 usb radio has the same USB ID as many others
         * Atmel V-USB devices. Let's make additional checks to be sure
         * that this is our device.
         */

        if (dev->product && dev->manufacturer &&
                (strncmp(dev->product, "MA901", 5) != 0
                || strncmp(dev->manufacturer, "www.masterkit.ru", 16) != 0))
                return -ENODEV;

        radio = kzalloc(sizeof(struct ma901radio_device), GFP_KERNEL);
        if (!radio) {
                dev_err(&intf->dev, "kzalloc for ma901radio_device failed\n");
                retval = -ENOMEM;
                goto err;
        }

        radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
        if (!radio->buffer) {
                dev_err(&intf->dev, "kmalloc for radio->buffer failed\n");
                retval = -ENOMEM;
                goto err_nobuf;
        }

        retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
        if (retval < 0) {
                dev_err(&intf->dev, "couldn't register v4l2_device\n");
                goto err_v4l2;
        }

        v4l2_ctrl_handler_init(&radio->hdl, 1);

        /* TODO:It looks like this radio doesn't have mute/unmute control
         * and windows program just emulate it using volume control.
         * Let's plan to do the same in this driver.
         *
         * v4l2_ctrl_new_std(&radio->hdl, &usb_ma901radio_ctrl_ops,
         *                  V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1);
         */

        v4l2_ctrl_new_std(&radio->hdl, &usb_ma901radio_ctrl_ops,
                          V4L2_CID_AUDIO_VOLUME, MA901_VOLUME_MIN,
                          MA901_VOLUME_MAX, 1, MA901_VOLUME_MAX);

        if (radio->hdl.error) {
                retval = radio->hdl.error;
                dev_err(&intf->dev, "couldn't register control\n");
                goto err_ctrl;
        }
        mutex_init(&radio->lock);

        radio->v4l2_dev.ctrl_handler = &radio->hdl;
        radio->v4l2_dev.release = usb_ma901radio_release;
        strscpy(radio->vdev.name, radio->v4l2_dev.name,
                sizeof(radio->vdev.name));
        radio->vdev.v4l2_dev = &radio->v4l2_dev;
        radio->vdev.fops = &usb_ma901radio_fops;
        radio->vdev.ioctl_ops = &usb_ma901radio_ioctl_ops;
        radio->vdev.release = video_device_release_empty;
        radio->vdev.lock = &radio->lock;
        radio->vdev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_TUNER;

        radio->usbdev = interface_to_usbdev(intf);
        radio->intf = intf;
        usb_set_intfdata(intf, &radio->v4l2_dev);
        radio->curfreq = 95.21 * FREQ_MUL;

        video_set_drvdata(&radio->vdev, radio);

        /* TODO: we can get some statistics (freq, volume) from device
         * but it's not implemented yet. After insertion in usb-port radio
         * setups frequency and starts playing without any initialization.
         * So we don't call usb_ma901radio_init/get_stat() here.
         * retval = usb_ma901radio_init(radio);
         */

        retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO,
                                        radio_nr);
        if (retval < 0) {
                dev_err(&intf->dev, "could not register video device\n");
                goto err_vdev;
        }

        return 0;

err_vdev:
        v4l2_ctrl_handler_free(&radio->hdl);
err_ctrl:
        v4l2_device_unregister(&radio->v4l2_dev);
err_v4l2:
        kfree(radio->buffer);
err_nobuf:
        kfree(radio);
err:
        return retval;
}

/* USB Device ID List */
static const struct usb_device_id usb_ma901radio_device_table[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(USB_MA901_VENDOR, USB_MA901_PRODUCT,
                                                        USB_CLASS_HID, 0, 0) },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, usb_ma901radio_device_table);

/* USB subsystem interface */
static struct usb_driver usb_ma901radio_driver = {
        .name                        = MA901_DRIVER_NAME,
        .probe                        = usb_ma901radio_probe,
        .disconnect                = usb_ma901radio_disconnect,
        .suspend                = usb_ma901radio_suspend,
        .resume                        = usb_ma901radio_resume,
        .reset_resume                = usb_ma901radio_resume,
        .id_table                = usb_ma901radio_device_table,
};

module_usb_driver(usb_ma901radio_driver);












































































































































































































































































    2 
    2 
    2 

    2 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Intel SMP support routines.
 *
 *        (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
 *        (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
 *      (c) 2002,2003 Andi Kleen, SuSE Labs.
 *
 *        i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
 */

#include <linux/init.h>

#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/kexec.h>

#include <asm/mtrr.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/idtentry.h>
#include <asm/nmi.h>
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
#include <asm/reboot.h>

/*
 *        Some notes on x86 processor bugs affecting SMP operation:
 *
 *        Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
 *        The Linux implications for SMP are handled as follows:
 *
 *        Pentium III / [Xeon]
 *                None of the E1AP-E3AP errata are visible to the user.
 *
 *        E1AP.        see PII A1AP
 *        E2AP.        see PII A2AP
 *        E3AP.        see PII A3AP
 *
 *        Pentium II / [Xeon]
 *                None of the A1AP-A3AP errata are visible to the user.
 *
 *        A1AP.        see PPro 1AP
 *        A2AP.        see PPro 2AP
 *        A3AP.        see PPro 7AP
 *
 *        Pentium Pro
 *                None of 1AP-9AP errata are visible to the normal user,
 *        except occasional delivery of 'spurious interrupt' as trap #15.
 *        This is very rare and a non-problem.
 *
 *        1AP.        Linux maps APIC as non-cacheable
 *        2AP.        worked around in hardware
 *        3AP.        fixed in C0 and above steppings microcode update.
 *                Linux does not use excessive STARTUP_IPIs.
 *        4AP.        worked around in hardware
 *        5AP.        symmetric IO mode (normal Linux operation) not affected.
 *                'noapic' mode has vector 0xf filled out properly.
 *        6AP.        'noapic' mode might be affected - fixed in later steppings
 *        7AP.        We do not assume writes to the LVT deasserting IRQs
 *        8AP.        We do not enable low power mode (deep sleep) during MP bootup
 *        9AP.        We do not use mixed mode
 *
 *        Pentium
 *                There is a marginal case where REP MOVS on 100MHz SMP
 *        machines with B stepping processors can fail. XXX should provide
 *        an L1cache=Writethrough or L1cache=off option.
 *
 *                B stepping CPUs may hang. There are hardware work arounds
 *        for this. We warn about it in case your board doesn't have the work
 *        arounds. Basically that's so I can tell anyone with a B stepping
 *        CPU and SMP problems "tough".
 *
 *        Specific items [From Pentium Processor Specification Update]
 *
 *        1AP.        Linux doesn't use remote read
 *        2AP.        Linux doesn't trust APIC errors
 *        3AP.        We work around this
 *        4AP.        Linux never generated 3 interrupts of the same priority
 *                to cause a lost local interrupt.
 *        5AP.        Remote read is never used
 *        6AP.        not affected - worked around in hardware
 *        7AP.        not affected - worked around in hardware
 *        8AP.        worked around in hardware - we get explicit CS errors if not
 *        9AP.        only 'noapic' mode affected. Might generate spurious
 *                interrupts, we log only the first one and count the
 *                rest silently.
 *        10AP.        not affected - worked around in hardware
 *        11AP.        Linux reads the APIC between writes to avoid this, as per
 *                the documentation. Make sure you preserve this as it affects
 *                the C stepping chips too.
 *        12AP.        not affected - worked around in hardware
 *        13AP.        not affected - worked around in hardware
 *        14AP.        we always deassert INIT during bootup
 *        15AP.        not affected - worked around in hardware
 *        16AP.        not affected - worked around in hardware
 *        17AP.        not affected - worked around in hardware
 *        18AP.        not affected - worked around in hardware
 *        19AP.        not affected - worked around in BIOS
 *
 *        If this sounds worrying believe me these bugs are either ___RARE___,
 *        or are signal timing bugs worked around in hardware and there's
 *        about nothing of note with C stepping upwards.
 */

static atomic_t stopping_cpu = ATOMIC_INIT(-1);
static bool smp_no_nmi_ipi = false;

static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
{
        /* We are registered on stopping cpu too, avoid spurious NMI */
        if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
                return NMI_HANDLED;

        cpu_emergency_disable_virtualization();
        stop_this_cpu(NULL);

        return NMI_HANDLED;
}

/*
 * this function calls the 'stop' function on all other CPUs in the system.
 */
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
        apic_eoi();
        cpu_emergency_disable_virtualization();
        stop_this_cpu(NULL);
}

static int register_stop_handler(void)
{
        return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
                                    NMI_FLAG_FIRST, "smp_stop");
}

static void native_stop_other_cpus(int wait)
{
        unsigned int old_cpu, this_cpu;
        unsigned long flags, timeout;

        if (reboot_force)
                return;

        /* Only proceed if this is the first CPU to reach this code */
        old_cpu = -1;
        this_cpu = smp_processor_id();
        if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu))
                return;

        /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
        if (kexec_in_progress)
                smp_kick_mwait_play_dead();

        /*
         * 1) Send an IPI on the reboot vector to all other CPUs.
         *
         *    The other CPUs should react on it after leaving critical
         *    sections and re-enabling interrupts. They might still hold
         *    locks, but there is nothing which can be done about that.
         *
         * 2) Wait for all other CPUs to report that they reached the
         *    HLT loop in stop_this_cpu()
         *
         * 3) If #2 timed out send an NMI to the CPUs which did not
         *    yet report
         *
         * 4) Wait for all other CPUs to report that they reached the
         *    HLT loop in stop_this_cpu()
         *
         * #3 can obviously race against a CPU reaching the HLT loop late.
         * That CPU will have reported already and the "have all CPUs
         * reached HLT" condition will be true despite the fact that the
         * other CPU is still handling the NMI. Again, there is no
         * protection against that as "disabled" APICs still respond to
         * NMIs.
         */
        cpumask_copy(&cpus_stop_mask, cpu_online_mask);
        cpumask_clear_cpu(this_cpu, &cpus_stop_mask);

        if (!cpumask_empty(&cpus_stop_mask)) {
                apic_send_IPI_allbutself(REBOOT_VECTOR);

                /*
                 * Don't wait longer than a second for IPI completion. The
                 * wait request is not checked here because that would
                 * prevent an NMI shutdown attempt in case that not all
                 * CPUs reach shutdown state.
                 */
                timeout = USEC_PER_SEC;
                while (!cpumask_empty(&cpus_stop_mask) && timeout--)
                        udelay(1);
        }

        /* if the REBOOT_VECTOR didn't work, try with the NMI */
        if (!cpumask_empty(&cpus_stop_mask)) {
                /*
                 * If NMI IPI is enabled, try to register the stop handler
                 * and send the IPI. In any case try to wait for the other
                 * CPUs to stop.
                 */
                if (!smp_no_nmi_ipi && !register_stop_handler()) {
                        unsigned int cpu;

                        pr_emerg("Shutting down cpus with NMI\n");

                        for_each_cpu(cpu, &cpus_stop_mask)
                                __apic_send_IPI(cpu, NMI_VECTOR);
                }
                /*
                 * Don't wait longer than 10 ms if the caller didn't
                 * request it. If wait is true, the machine hangs here if
                 * one or more CPUs do not reach shutdown state.
                 */
                timeout = USEC_PER_MSEC * 10;
                while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--))
                        udelay(1);
        }

        local_irq_save(flags);
        disable_local_APIC();
        mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
        local_irq_restore(flags);

        /*
         * Ensure that the cpus_stop_mask cache lines are invalidated on
         * the other CPUs. See comment vs. SME in stop_this_cpu().
         */
        cpumask_clear(&cpus_stop_mask);
}

/*
 * Reschedule call back. KVM uses this interrupt to force a cpu out of
 * guest mode.
 */
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi)
{
        apic_eoi();
        trace_reschedule_entry(RESCHEDULE_VECTOR);
        inc_irq_stat(irq_resched_count);
        scheduler_ipi();
        trace_reschedule_exit(RESCHEDULE_VECTOR);
}

DEFINE_IDTENTRY_SYSVEC(sysvec_call_function)
{
        apic_eoi();
        trace_call_function_entry(CALL_FUNCTION_VECTOR);
        inc_irq_stat(irq_call_count);
        generic_smp_call_function_interrupt();
        trace_call_function_exit(CALL_FUNCTION_VECTOR);
}

DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
{
        apic_eoi();
        trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
        inc_irq_stat(irq_call_count);
        generic_smp_call_function_single_interrupt();
        trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
}

static int __init nonmi_ipi_setup(char *str)
{
        smp_no_nmi_ipi = true;
        return 1;
}

__setup("nonmi_ipi", nonmi_ipi_setup);

struct smp_ops smp_ops = {
        .smp_prepare_boot_cpu        = native_smp_prepare_boot_cpu,
        .smp_prepare_cpus        = native_smp_prepare_cpus,
        .smp_cpus_done                = native_smp_cpus_done,

        .stop_other_cpus        = native_stop_other_cpus,
#if defined(CONFIG_CRASH_DUMP)
        .crash_stop_other_cpus        = kdump_nmi_shootdown_cpus,
#endif
        .smp_send_reschedule        = native_smp_send_reschedule,

        .kick_ap_alive                = native_kick_ap,
        .cpu_disable                = native_cpu_disable,
        .play_dead                = native_play_dead,

        .send_call_func_ipi        = native_send_call_func_ipi,
        .send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);



































































  232 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 
























































  156 




















  225 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
// SPDX-License-Identifier: GPL-2.0
/*
 * Software nodes for the firmware node framework.
 *
 * Copyright (C) 2018, Intel Corporation
 * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
 */

#include <linux/container_of.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/kstrtox.h>
#include <linux/list.h>
#include <linux/property.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/types.h>

#include "base.h"

struct swnode {
        struct kobject kobj;
        struct fwnode_handle fwnode;
        const struct software_node *node;
        int id;

        /* hierarchy */
        struct ida child_ids;
        struct list_head entry;
        struct list_head children;
        struct swnode *parent;

        unsigned int allocated:1;
        unsigned int managed:1;
};

static DEFINE_IDA(swnode_root_ids);
static struct kset *swnode_kset;

#define kobj_to_swnode(_kobj_) container_of(_kobj_, struct swnode, kobj)

static const struct fwnode_operations software_node_ops;

bool is_software_node(const struct fwnode_handle *fwnode)
{
        return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &software_node_ops;
}
EXPORT_SYMBOL_GPL(is_software_node);

#define to_swnode(__fwnode)                                                \
        ({                                                                \
                typeof(__fwnode) __to_swnode_fwnode = __fwnode;                \
                                                                        \
                is_software_node(__to_swnode_fwnode) ?                        \
                        container_of(__to_swnode_fwnode,                \
                                     struct swnode, fwnode) : NULL;        \
        })

static inline struct swnode *dev_to_swnode(struct device *dev)
{
        struct fwnode_handle *fwnode = dev_fwnode(dev);

        if (!fwnode)
                return NULL;

        if (!is_software_node(fwnode))
                fwnode = fwnode->secondary;

        return to_swnode(fwnode);
}

static struct swnode *
software_node_to_swnode(const struct software_node *node)
{
        struct swnode *swnode = NULL;
        struct kobject *k;

        if (!node)
                return NULL;

        spin_lock(&swnode_kset->list_lock);

        list_for_each_entry(k, &swnode_kset->list, entry) {
                swnode = kobj_to_swnode(k);
                if (swnode->node == node)
                        break;
                swnode = NULL;
        }

        spin_unlock(&swnode_kset->list_lock);

        return swnode;
}

const struct software_node *to_software_node(const struct fwnode_handle *fwnode)
{
        const struct swnode *swnode = to_swnode(fwnode);

        return swnode ? swnode->node : NULL;
}
EXPORT_SYMBOL_GPL(to_software_node);

struct fwnode_handle *software_node_fwnode(const struct software_node *node)
{
        struct swnode *swnode = software_node_to_swnode(node);

        return swnode ? &swnode->fwnode : NULL;
}
EXPORT_SYMBOL_GPL(software_node_fwnode);

/* -------------------------------------------------------------------------- */
/* property_entry processing */

static const struct property_entry *
property_entry_get(const struct property_entry *prop, const char *name)
{
        if (!prop)
                return NULL;

        for (; prop->name; prop++)
                if (!strcmp(name, prop->name))
                        return prop;

        return NULL;
}

static const void *property_get_pointer(const struct property_entry *prop)
{
        if (!prop->length)
                return NULL;

        return prop->is_inline ? &prop->value : prop->pointer;
}

static const void *property_entry_find(const struct property_entry *props,
                                       const char *propname, size_t length)
{
        const struct property_entry *prop;
        const void *pointer;

        prop = property_entry_get(props, propname);
        if (!prop)
                return ERR_PTR(-EINVAL);
        pointer = property_get_pointer(prop);
        if (!pointer)
                return ERR_PTR(-ENODATA);
        if (length > prop->length)
                return ERR_PTR(-EOVERFLOW);
        return pointer;
}

static int
property_entry_count_elems_of_size(const struct property_entry *props,
                                   const char *propname, size_t length)
{
        const struct property_entry *prop;

        prop = property_entry_get(props, propname);
        if (!prop)
                return -EINVAL;

        return prop->length / length;
}

static int property_entry_read_int_array(const struct property_entry *props,
                                         const char *name,
                                         unsigned int elem_size, void *val,
                                         size_t nval)
{
        const void *pointer;
        size_t length;

        if (!val)
                return property_entry_count_elems_of_size(props, name,
                                                          elem_size);

        if (!is_power_of_2(elem_size) || elem_size > sizeof(u64))
                return -ENXIO;

        length = nval * elem_size;

        pointer = property_entry_find(props, name, length);
        if (IS_ERR(pointer))
                return PTR_ERR(pointer);

        memcpy(val, pointer, length);
        return 0;
}

static int property_entry_read_string_array(const struct property_entry *props,
                                            const char *propname,
                                            const char **strings, size_t nval)
{
        const void *pointer;
        size_t length;
        int array_len;

        /* Find out the array length. */
        array_len = property_entry_count_elems_of_size(props, propname,
                                                       sizeof(const char *));
        if (array_len < 0)
                return array_len;

        /* Return how many there are if strings is NULL. */
        if (!strings)
                return array_len;

        array_len = min_t(size_t, nval, array_len);
        length = array_len * sizeof(*strings);

        pointer = property_entry_find(props, propname, length);
        if (IS_ERR(pointer))
                return PTR_ERR(pointer);

        memcpy(strings, pointer, length);

        return array_len;
}

static void property_entry_free_data(const struct property_entry *p)
{
        const char * const *src_str;
        size_t i, nval;

        if (p->type == DEV_PROP_STRING) {
                src_str = property_get_pointer(p);
                nval = p->length / sizeof(*src_str);
                for (i = 0; i < nval; i++)
                        kfree(src_str[i]);
        }

        if (!p->is_inline)
                kfree(p->pointer);

        kfree(p->name);
}

static bool property_copy_string_array(const char **dst_ptr,
                                       const char * const *src_ptr,
                                       size_t nval)
{
        int i;

        for (i = 0; i < nval; i++) {
                dst_ptr[i] = kstrdup(src_ptr[i], GFP_KERNEL);
                if (!dst_ptr[i] && src_ptr[i]) {
                        while (--i >= 0)
                                kfree(dst_ptr[i]);
                        return false;
                }
        }

        return true;
}

static int property_entry_copy_data(struct property_entry *dst,
                                    const struct property_entry *src)
{
        const void *pointer = property_get_pointer(src);
        void *dst_ptr;
        size_t nval;

        /*
         * Properties with no data should not be marked as stored
         * out of line.
         */
        if (!src->is_inline && !src->length)
                return -ENODATA;

        /*
         * Reference properties are never stored inline as
         * they are too big.
         */
        if (src->type == DEV_PROP_REF && src->is_inline)
                return -EINVAL;

        if (src->length <= sizeof(dst->value)) {
                dst_ptr = &dst->value;
                dst->is_inline = true;
        } else {
                dst_ptr = kmalloc(src->length, GFP_KERNEL);
                if (!dst_ptr)
                        return -ENOMEM;
                dst->pointer = dst_ptr;
        }

        if (src->type == DEV_PROP_STRING) {
                nval = src->length / sizeof(const char *);
                if (!property_copy_string_array(dst_ptr, pointer, nval)) {
                        if (!dst->is_inline)
                                kfree(dst->pointer);
                        return -ENOMEM;
                }
        } else {
                memcpy(dst_ptr, pointer, src->length);
        }

        dst->length = src->length;
        dst->type = src->type;
        dst->name = kstrdup(src->name, GFP_KERNEL);
        if (!dst->name) {
                property_entry_free_data(dst);
                return -ENOMEM;
        }

        return 0;
}

/**
 * property_entries_dup - duplicate array of properties
 * @properties: array of properties to copy
 *
 * This function creates a deep copy of the given NULL-terminated array
 * of property entries.
 */
struct property_entry *
property_entries_dup(const struct property_entry *properties)
{
        struct property_entry *p;
        int i, n = 0;
        int ret;

        if (!properties)
                return NULL;

        while (properties[n].name)
                n++;

        p = kcalloc(n + 1, sizeof(*p), GFP_KERNEL);
        if (!p)
                return ERR_PTR(-ENOMEM);

        for (i = 0; i < n; i++) {
                ret = property_entry_copy_data(&p[i], &properties[i]);
                if (ret) {
                        while (--i >= 0)
                                property_entry_free_data(&p[i]);
                        kfree(p);
                        return ERR_PTR(ret);
                }
        }

        return p;
}
EXPORT_SYMBOL_GPL(property_entries_dup);

/**
 * property_entries_free - free previously allocated array of properties
 * @properties: array of properties to destroy
 *
 * This function frees given NULL-terminated array of property entries,
 * along with their data.
 */
void property_entries_free(const struct property_entry *properties)
{
        const struct property_entry *p;

        if (!properties)
                return;

        for (p = properties; p->name; p++)
                property_entry_free_data(p);

        kfree(properties);
}
EXPORT_SYMBOL_GPL(property_entries_free);

/* -------------------------------------------------------------------------- */
/* fwnode operations */

static struct fwnode_handle *software_node_get(struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);

        kobject_get(&swnode->kobj);

        return &swnode->fwnode;
}

static void software_node_put(struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);

        kobject_put(&swnode->kobj);
}

static bool software_node_property_present(const struct fwnode_handle *fwnode,
                                           const char *propname)
{
        struct swnode *swnode = to_swnode(fwnode);

        return !!property_entry_get(swnode->node->properties, propname);
}

static int software_node_read_int_array(const struct fwnode_handle *fwnode,
                                        const char *propname,
                                        unsigned int elem_size, void *val,
                                        size_t nval)
{
        struct swnode *swnode = to_swnode(fwnode);

        return property_entry_read_int_array(swnode->node->properties, propname,
                                             elem_size, val, nval);
}

static int software_node_read_string_array(const struct fwnode_handle *fwnode,
                                           const char *propname,
                                           const char **val, size_t nval)
{
        struct swnode *swnode = to_swnode(fwnode);

        return property_entry_read_string_array(swnode->node->properties,
                                                propname, val, nval);
}

static const char *
software_node_get_name(const struct fwnode_handle *fwnode)
{
        const struct swnode *swnode = to_swnode(fwnode);

        return kobject_name(&swnode->kobj);
}

static const char *
software_node_get_name_prefix(const struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent;
        const char *prefix;

        parent = fwnode_get_parent(fwnode);
        if (!parent)
                return "";

        /* Figure out the prefix from the parents. */
        while (is_software_node(parent))
                parent = fwnode_get_next_parent(parent);

        prefix = fwnode_get_name_prefix(parent);
        fwnode_handle_put(parent);

        /* Guess something if prefix was NULL. */
        return prefix ?: "/";
}

static struct fwnode_handle *
software_node_get_parent(const struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);

        if (!swnode || !swnode->parent)
                return NULL;

        return fwnode_handle_get(&swnode->parent->fwnode);
}

static struct fwnode_handle *
software_node_get_next_child(const struct fwnode_handle *fwnode,
                             struct fwnode_handle *child)
{
        struct swnode *p = to_swnode(fwnode);
        struct swnode *c = to_swnode(child);

        if (!p || list_empty(&p->children) ||
            (c && list_is_last(&c->entry, &p->children))) {
                fwnode_handle_put(child);
                return NULL;
        }

        if (c)
                c = list_next_entry(c, entry);
        else
                c = list_first_entry(&p->children, struct swnode, entry);

        fwnode_handle_put(child);
        return fwnode_handle_get(&c->fwnode);
}

static struct fwnode_handle *
software_node_get_named_child_node(const struct fwnode_handle *fwnode,
                                   const char *childname)
{
        struct swnode *swnode = to_swnode(fwnode);
        struct swnode *child;

        if (!swnode || list_empty(&swnode->children))
                return NULL;

        list_for_each_entry(child, &swnode->children, entry) {
                if (!strcmp(childname, kobject_name(&child->kobj))) {
                        kobject_get(&child->kobj);
                        return &child->fwnode;
                }
        }
        return NULL;
}

static int
software_node_get_reference_args(const struct fwnode_handle *fwnode,
                                 const char *propname, const char *nargs_prop,
                                 unsigned int nargs, unsigned int index,
                                 struct fwnode_reference_args *args)
{
        struct swnode *swnode = to_swnode(fwnode);
        const struct software_node_ref_args *ref_array;
        const struct software_node_ref_args *ref;
        const struct property_entry *prop;
        struct fwnode_handle *refnode;
        u32 nargs_prop_val;
        int error;
        int i;

        prop = property_entry_get(swnode->node->properties, propname);
        if (!prop)
                return -ENOENT;

        if (prop->type != DEV_PROP_REF)
                return -EINVAL;

        /*
         * We expect that references are never stored inline, even
         * single ones, as they are too big.
         */
        if (prop->is_inline)
                return -EINVAL;

        if (index * sizeof(*ref) >= prop->length)
                return -ENOENT;

        ref_array = prop->pointer;
        ref = &ref_array[index];

        refnode = software_node_fwnode(ref->node);
        if (!refnode)
                return -ENOENT;

        if (nargs_prop) {
                error = property_entry_read_int_array(ref->node->properties,
                                                      nargs_prop, sizeof(u32),
                                                      &nargs_prop_val, 1);
                if (error)
                        return error;

                nargs = nargs_prop_val;
        }

        if (nargs > NR_FWNODE_REFERENCE_ARGS)
                return -EINVAL;

        if (!args)
                return 0;

        args->fwnode = software_node_get(refnode);
        args->nargs = nargs;

        for (i = 0; i < nargs; i++)
                args->args[i] = ref->args[i];

        return 0;
}

static struct fwnode_handle *
swnode_graph_find_next_port(const struct fwnode_handle *parent,
                            struct fwnode_handle *port)
{
        struct fwnode_handle *old = port;

        while ((port = software_node_get_next_child(parent, old))) {
                /*
                 * fwnode ports have naming style "port@", so we search for any
                 * children that follow that convention.
                 */
                if (!strncmp(to_swnode(port)->node->name, "port@",
                             strlen("port@")))
                        return port;
                old = port;
        }

        return NULL;
}

static struct fwnode_handle *
software_node_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
                                      struct fwnode_handle *endpoint)
{
        struct swnode *swnode = to_swnode(fwnode);
        struct fwnode_handle *parent;
        struct fwnode_handle *port;

        if (!swnode)
                return NULL;

        if (endpoint) {
                port = software_node_get_parent(endpoint);
                parent = software_node_get_parent(port);
        } else {
                parent = software_node_get_named_child_node(fwnode, "ports");
                if (!parent)
                        parent = software_node_get(&swnode->fwnode);

                port = swnode_graph_find_next_port(parent, NULL);
        }

        for (; port; port = swnode_graph_find_next_port(parent, port)) {
                endpoint = software_node_get_next_child(port, endpoint);
                if (endpoint) {
                        fwnode_handle_put(port);
                        break;
                }
        }

        fwnode_handle_put(parent);

        return endpoint;
}

static struct fwnode_handle *
software_node_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);
        const struct software_node_ref_args *ref;
        const struct property_entry *prop;

        if (!swnode)
                return NULL;

        prop = property_entry_get(swnode->node->properties, "remote-endpoint");
        if (!prop || prop->type != DEV_PROP_REF || prop->is_inline)
                return NULL;

        ref = prop->pointer;

        return software_node_get(software_node_fwnode(ref[0].node));
}

static struct fwnode_handle *
software_node_graph_get_port_parent(struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);

        swnode = swnode->parent;
        if (swnode && !strcmp(swnode->node->name, "ports"))
                swnode = swnode->parent;

        return swnode ? software_node_get(&swnode->fwnode) : NULL;
}

static int
software_node_graph_parse_endpoint(const struct fwnode_handle *fwnode,
                                   struct fwnode_endpoint *endpoint)
{
        struct swnode *swnode = to_swnode(fwnode);
        const char *parent_name = swnode->parent->node->name;
        int ret;

        if (strlen("port@") >= strlen(parent_name) ||
            strncmp(parent_name, "port@", strlen("port@")))
                return -EINVAL;

        /* Ports have naming style "port@n", we need to select the n */
        ret = kstrtou32(parent_name + strlen("port@"), 10, &endpoint->port);
        if (ret)
                return ret;

        endpoint->id = swnode->id;
        endpoint->local_fwnode = fwnode;

        return 0;
}

static const struct fwnode_operations software_node_ops = {
        .get = software_node_get,
        .put = software_node_put,
        .property_present = software_node_property_present,
        .property_read_int_array = software_node_read_int_array,
        .property_read_string_array = software_node_read_string_array,
        .get_name = software_node_get_name,
        .get_name_prefix = software_node_get_name_prefix,
        .get_parent = software_node_get_parent,
        .get_next_child_node = software_node_get_next_child,
        .get_named_child_node = software_node_get_named_child_node,
        .get_reference_args = software_node_get_reference_args,
        .graph_get_next_endpoint = software_node_graph_get_next_endpoint,
        .graph_get_remote_endpoint = software_node_graph_get_remote_endpoint,
        .graph_get_port_parent = software_node_graph_get_port_parent,
        .graph_parse_endpoint = software_node_graph_parse_endpoint,
};

/* -------------------------------------------------------------------------- */

/**
 * software_node_find_by_name - Find software node by name
 * @parent: Parent of the software node
 * @name: Name of the software node
 *
 * The function will find a node that is child of @parent and that is named
 * @name. If no node is found, the function returns NULL.
 *
 * NOTE: you will need to drop the reference with fwnode_handle_put() after use.
 */
const struct software_node *
software_node_find_by_name(const struct software_node *parent, const char *name)
{
        struct swnode *swnode = NULL;
        struct kobject *k;

        if (!name)
                return NULL;

        spin_lock(&swnode_kset->list_lock);

        list_for_each_entry(k, &swnode_kset->list, entry) {
                swnode = kobj_to_swnode(k);
                if (parent == swnode->node->parent && swnode->node->name &&
                    !strcmp(name, swnode->node->name)) {
                        kobject_get(&swnode->kobj);
                        break;
                }
                swnode = NULL;
        }

        spin_unlock(&swnode_kset->list_lock);

        return swnode ? swnode->node : NULL;
}
EXPORT_SYMBOL_GPL(software_node_find_by_name);

static struct software_node *software_node_alloc(const struct property_entry *properties)
{
        struct property_entry *props;
        struct software_node *node;

        props = property_entries_dup(properties);
        if (IS_ERR(props))
                return ERR_CAST(props);

        node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node) {
                property_entries_free(props);
                return ERR_PTR(-ENOMEM);
        }

        node->properties = props;

        return node;
}

static void software_node_free(const struct software_node *node)
{
        property_entries_free(node->properties);
        kfree(node);
}

static void software_node_release(struct kobject *kobj)
{
        struct swnode *swnode = kobj_to_swnode(kobj);

        if (swnode->parent) {
                ida_free(&swnode->parent->child_ids, swnode->id);
                list_del(&swnode->entry);
        } else {
                ida_free(&swnode_root_ids, swnode->id);
        }

        if (swnode->allocated)
                software_node_free(swnode->node);

        ida_destroy(&swnode->child_ids);
        kfree(swnode);
}

static const struct kobj_type software_node_type = {
        .release = software_node_release,
        .sysfs_ops = &kobj_sysfs_ops,
};

static struct fwnode_handle *
swnode_register(const struct software_node *node, struct swnode *parent,
                unsigned int allocated)
{
        struct swnode *swnode;
        int ret;

        swnode = kzalloc(sizeof(*swnode), GFP_KERNEL);
        if (!swnode)
                return ERR_PTR(-ENOMEM);

        ret = ida_alloc(parent ? &parent->child_ids : &swnode_root_ids,
                        GFP_KERNEL);
        if (ret < 0) {
                kfree(swnode);
                return ERR_PTR(ret);
        }

        swnode->id = ret;
        swnode->node = node;
        swnode->parent = parent;
        swnode->kobj.kset = swnode_kset;
        fwnode_init(&swnode->fwnode, &software_node_ops);

        ida_init(&swnode->child_ids);
        INIT_LIST_HEAD(&swnode->entry);
        INIT_LIST_HEAD(&swnode->children);

        if (node->name)
                ret = kobject_init_and_add(&swnode->kobj, &software_node_type,
                                           parent ? &parent->kobj : NULL,
                                           "%s", node->name);
        else
                ret = kobject_init_and_add(&swnode->kobj, &software_node_type,
                                           parent ? &parent->kobj : NULL,
                                           "node%d", swnode->id);
        if (ret) {
                kobject_put(&swnode->kobj);
                return ERR_PTR(ret);
        }

        /*
         * Assign the flag only in the successful case, so
         * the above kobject_put() won't mess up with properties.
         */
        swnode->allocated = allocated;

        if (parent)
                list_add_tail(&swnode->entry, &parent->children);

        kobject_uevent(&swnode->kobj, KOBJ_ADD);
        return &swnode->fwnode;
}

/**
 * software_node_register_node_group - Register a group of software nodes
 * @node_group: NULL terminated array of software node pointers to be registered
 *
 * Register multiple software nodes at once. If any node in the array
 * has its .parent pointer set (which can only be to another software_node),
 * then its parent **must** have been registered before it is; either outside
 * of this function or by ordering the array such that parent comes before
 * child.
 */
int software_node_register_node_group(const struct software_node **node_group)
{
        unsigned int i;
        int ret;

        if (!node_group)
                return 0;

        for (i = 0; node_group[i]; i++) {
                ret = software_node_register(node_group[i]);
                if (ret) {
                        software_node_unregister_node_group(node_group);
                        return ret;
                }
        }

        return 0;
}
EXPORT_SYMBOL_GPL(software_node_register_node_group);

/**
 * software_node_unregister_node_group - Unregister a group of software nodes
 * @node_group: NULL terminated array of software node pointers to be unregistered
 *
 * Unregister multiple software nodes at once. If parent pointers are set up
 * in any of the software nodes then the array **must** be ordered such that
 * parents come before their children.
 *
 * NOTE: If you are uncertain whether the array is ordered such that
 * parents will be unregistered before their children, it is wiser to
 * remove the nodes individually, in the correct order (child before
 * parent).
 */
void software_node_unregister_node_group(
                const struct software_node **node_group)
{
        unsigned int i = 0;

        if (!node_group)
                return;

        while (node_group[i])
                i++;

        while (i--)
                software_node_unregister(node_group[i]);
}
EXPORT_SYMBOL_GPL(software_node_unregister_node_group);

/**
 * software_node_register - Register static software node
 * @node: The software node to be registered
 */
int software_node_register(const struct software_node *node)
{
        struct swnode *parent = software_node_to_swnode(node->parent);

        if (software_node_to_swnode(node))
                return -EEXIST;

        if (node->parent && !parent)
                return -EINVAL;

        return PTR_ERR_OR_ZERO(swnode_register(node, parent, 0));
}
EXPORT_SYMBOL_GPL(software_node_register);

/**
 * software_node_unregister - Unregister static software node
 * @node: The software node to be unregistered
 */
void software_node_unregister(const struct software_node *node)
{
        struct swnode *swnode;

        swnode = software_node_to_swnode(node);
        if (swnode)
                fwnode_remove_software_node(&swnode->fwnode);
}
EXPORT_SYMBOL_GPL(software_node_unregister);

struct fwnode_handle *
fwnode_create_software_node(const struct property_entry *properties,
                            const struct fwnode_handle *parent)
{
        struct fwnode_handle *fwnode;
        struct software_node *node;
        struct swnode *p;

        if (IS_ERR(parent))
                return ERR_CAST(parent);

        p = to_swnode(parent);
        if (parent && !p)
                return ERR_PTR(-EINVAL);

        node = software_node_alloc(properties);
        if (IS_ERR(node))
                return ERR_CAST(node);

        node->parent = p ? p->node : NULL;

        fwnode = swnode_register(node, p, 1);
        if (IS_ERR(fwnode))
                software_node_free(node);

        return fwnode;
}
EXPORT_SYMBOL_GPL(fwnode_create_software_node);

void fwnode_remove_software_node(struct fwnode_handle *fwnode)
{
        struct swnode *swnode = to_swnode(fwnode);

        if (!swnode)
                return;

        kobject_put(&swnode->kobj);
}
EXPORT_SYMBOL_GPL(fwnode_remove_software_node);

/**
 * device_add_software_node - Assign software node to a device
 * @dev: The device the software node is meant for.
 * @node: The software node.
 *
 * This function will make @node the secondary firmware node pointer of @dev. If
 * @dev has no primary node, then @node will become the primary node. The
 * function will register @node automatically if it wasn't already registered.
 */
int device_add_software_node(struct device *dev, const struct software_node *node)
{
        struct swnode *swnode;
        int ret;

        /* Only one software node per device. */
        if (dev_to_swnode(dev))
                return -EBUSY;

        swnode = software_node_to_swnode(node);
        if (swnode) {
                kobject_get(&swnode->kobj);
        } else {
                ret = software_node_register(node);
                if (ret)
                        return ret;

                swnode = software_node_to_swnode(node);
        }

        set_secondary_fwnode(dev, &swnode->fwnode);

        /*
         * If the device has been fully registered by the time this function is
         * called, software_node_notify() must be called separately so that the
         * symlinks get created and the reference count of the node is kept in
         * balance.
         */
        if (device_is_registered(dev))
                software_node_notify(dev);

        return 0;
}
EXPORT_SYMBOL_GPL(device_add_software_node);

/**
 * device_remove_software_node - Remove device's software node
 * @dev: The device with the software node.
 *
 * This function will unregister the software node of @dev.
 */
void device_remove_software_node(struct device *dev)
{
        struct swnode *swnode;

        swnode = dev_to_swnode(dev);
        if (!swnode)
                return;

        if (device_is_registered(dev))
                software_node_notify_remove(dev);

        set_secondary_fwnode(dev, NULL);
        kobject_put(&swnode->kobj);
}
EXPORT_SYMBOL_GPL(device_remove_software_node);

/**
 * device_create_managed_software_node - Create a software node for a device
 * @dev: The device the software node is assigned to.
 * @properties: Device properties for the software node.
 * @parent: Parent of the software node.
 *
 * Creates a software node as a managed resource for @dev, which means the
 * lifetime of the newly created software node is tied to the lifetime of @dev.
 * Software nodes created with this function should not be reused or shared
 * because of that. The function takes a deep copy of @properties for the
 * software node.
 *
 * Since the new software node is assigned directly to @dev, and since it should
 * not be shared, it is not returned to the caller. The function returns 0 on
 * success, and errno in case of an error.
 */
int device_create_managed_software_node(struct device *dev,
                                        const struct property_entry *properties,
                                        const struct software_node *parent)
{
        struct fwnode_handle *p = software_node_fwnode(parent);
        struct fwnode_handle *fwnode;

        if (parent && !p)
                return -EINVAL;

        fwnode = fwnode_create_software_node(properties, p);
        if (IS_ERR(fwnode))
                return PTR_ERR(fwnode);

        to_swnode(fwnode)->managed = true;
        set_secondary_fwnode(dev, fwnode);

        if (device_is_registered(dev))
                software_node_notify(dev);

        return 0;
}
EXPORT_SYMBOL_GPL(device_create_managed_software_node);

void software_node_notify(struct device *dev)
{
        struct swnode *swnode;
        int ret;

        swnode = dev_to_swnode(dev);
        if (!swnode)
                return;

        ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
        if (ret)
                return;

        ret = sysfs_create_link(&swnode->kobj, &dev->kobj, dev_name(dev));
        if (ret) {
                sysfs_remove_link(&dev->kobj, "software_node");
                return;
        }

        kobject_get(&swnode->kobj);
}

void software_node_notify_remove(struct device *dev)
{
        struct swnode *swnode;

        swnode = dev_to_swnode(dev);
        if (!swnode)
                return;

        sysfs_remove_link(&swnode->kobj, dev_name(dev));
        sysfs_remove_link(&dev->kobj, "software_node");
        kobject_put(&swnode->kobj);

        if (swnode->managed) {
                set_secondary_fwnode(dev, NULL);
                kobject_put(&swnode->kobj);
        }
}

static int __init software_node_init(void)
{
        swnode_kset = kset_create_and_add("software_nodes", NULL, kernel_kobj);
        if (!swnode_kset)
                return -ENOMEM;
        return 0;
}
postcore_initcall(software_node_init);

static void __exit software_node_exit(void)
{
        ida_destroy(&swnode_root_ids);
        kset_unregister(swnode_kset);
}
__exitcall(software_node_exit);








































    5 



























    5 






    5 




    5 





















































































    6 







    6 





    6 



    6 




    6 




    6 
















    6 






    6 



























    9 

    9 

    9 


    1 


    1 







    8 



    9 



































    4 


    4 
    4 

    4 
    4 
    4 
    4 
    4 














    6 








    6 









    6 

































    6 







    6 



    6 






















    6 



    6 


    6 
    6 



    6 














    6 



    6 





    6 








    6 






    6 



    6 








    6 


    6 


    5 


    6 



    6 
    6 



    4 
    4 
    4 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/attr.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  changes by Thomas Schoebel-Theuer
 */

#include <linux/export.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/sched/signal.h>
#include <linux/capability.h>
#include <linux/fsnotify.h>
#include <linux/fcntl.h>
#include <linux/filelock.h>
#include <linux/security.h>

#include "internal.h"

/**
 * setattr_should_drop_sgid - determine whether the setgid bit needs to be
 *                            removed
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        inode to check
 *
 * This function determines whether the setgid bit needs to be removed.
 * We retain backwards compatibility and require setgid bit to be removed
 * unconditionally if S_IXGRP is set. Otherwise we have the exact same
 * requirements as setattr_prepare() and setattr_copy().
 *
 * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
 */
int setattr_should_drop_sgid(struct mnt_idmap *idmap,
                             const struct inode *inode)
{
        umode_t mode = inode->i_mode;

        if (!(mode & S_ISGID))
                return 0;
        if (mode & S_IXGRP)
                return ATTR_KILL_SGID;
        if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
                return ATTR_KILL_SGID;
        return 0;
}
EXPORT_SYMBOL(setattr_should_drop_sgid);

/**
 * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
 *                               be dropped
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        inode to check
 *
 * This function determines whether the set{g,u}id bits need to be removed.
 * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the
 * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both
 * set{g,u}id bits need to be removed the corresponding mask of both flags is
 * returned.
 *
 * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
 * to remove, 0 otherwise.
 */
int setattr_should_drop_suidgid(struct mnt_idmap *idmap,
                                struct inode *inode)
{
        umode_t mode = inode->i_mode;
        int kill = 0;

        /* suid always must be killed */
        if (unlikely(mode & S_ISUID))
                kill = ATTR_KILL_SUID;

        kill |= setattr_should_drop_sgid(idmap, inode);

        if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
                return kill;

        return 0;
}
EXPORT_SYMBOL(setattr_should_drop_suidgid);

/**
 * chown_ok - verify permissions to chown inode
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        inode to check permissions on
 * @ia_vfsuid:        uid to chown @inode to
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
static bool chown_ok(struct mnt_idmap *idmap,
                     const struct inode *inode, vfsuid_t ia_vfsuid)
{
        vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
        if (vfsuid_eq_kuid(vfsuid, current_fsuid()) &&
            vfsuid_eq(ia_vfsuid, vfsuid))
                return true;
        if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
                return true;
        if (!vfsuid_valid(vfsuid) &&
            ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
                return true;
        return false;
}

/**
 * chgrp_ok - verify permissions to chgrp inode
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        inode to check permissions on
 * @ia_vfsgid:        gid to chown @inode to
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
static bool chgrp_ok(struct mnt_idmap *idmap,
                     const struct inode *inode, vfsgid_t ia_vfsgid)
{
        vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
        vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
        if (vfsuid_eq_kuid(vfsuid, current_fsuid())) {
                if (vfsgid_eq(ia_vfsgid, vfsgid))
                        return true;
                if (vfsgid_in_group_p(ia_vfsgid))
                        return true;
        }
        if (capable_wrt_inode_uidgid(idmap, inode, CAP_CHOWN))
                return true;
        if (!vfsgid_valid(vfsgid) &&
            ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
                return true;
        return false;
}

/**
 * setattr_prepare - check if attribute changes to a dentry are allowed
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        dentry to check
 * @attr:        attributes to change
 *
 * Check if we are allowed to change the attributes contained in @attr
 * in the given dentry.  This includes the normal unix access permission
 * checks, as well as checks for rlimits and others. The function also clears
 * SGID bit from mode if user is not allowed to set it. Also file capabilities
 * and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * Should be called as the first thing in ->setattr implementations,
 * possibly after taking additional locks.
 */
int setattr_prepare(struct mnt_idmap *idmap, struct dentry *dentry,
                    struct iattr *attr)
{
        struct inode *inode = d_inode(dentry);
        unsigned int ia_valid = attr->ia_valid;

        /*
         * First check size constraints.  These can't be overriden using
         * ATTR_FORCE.
         */
        if (ia_valid & ATTR_SIZE) {
                int error = inode_newsize_ok(inode, attr->ia_size);
                if (error)
                        return error;
        }

        /* If force is set do it anyway. */
        if (ia_valid & ATTR_FORCE)
                goto kill_priv;

        /* Make sure a caller can chown. */
        if ((ia_valid & ATTR_UID) &&
            !chown_ok(idmap, inode, attr->ia_vfsuid))
                return -EPERM;

        /* Make sure caller can chgrp. */
        if ((ia_valid & ATTR_GID) &&
            !chgrp_ok(idmap, inode, attr->ia_vfsgid))
                return -EPERM;

        /* Make sure a caller can chmod. */
        if (ia_valid & ATTR_MODE) {
                vfsgid_t vfsgid;

                if (!inode_owner_or_capable(idmap, inode))
                        return -EPERM;

                if (ia_valid & ATTR_GID)
                        vfsgid = attr->ia_vfsgid;
                else
                        vfsgid = i_gid_into_vfsgid(idmap, inode);

                /* Also check the setgid bit! */
                if (!in_group_or_capable(idmap, inode, vfsgid))
                        attr->ia_mode &= ~S_ISGID;
        }

        /* Check for setting the inode time. */
        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
                if (!inode_owner_or_capable(idmap, inode))
                        return -EPERM;
        }

kill_priv:
        /* User has permission for the change */
        if (ia_valid & ATTR_KILL_PRIV) {
                int error;

                error = security_inode_killpriv(idmap, dentry);
                if (error)
                        return error;
        }

        return 0;
}
EXPORT_SYMBOL(setattr_prepare);

/**
 * inode_newsize_ok - may this inode be truncated to a given size
 * @inode:        the inode to be truncated
 * @offset:        the new size to assign to the inode
 *
 * inode_newsize_ok must be called with i_mutex held.
 *
 * inode_newsize_ok will check filesystem limits and ulimits to check that the
 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
 * when necessary. Caller must not proceed with inode size change if failure is
 * returned. @inode must be a file (not directory), with appropriate
 * permissions to allow truncate (inode_newsize_ok does NOT check these
 * conditions).
 *
 * Return: 0 on success, -ve errno on failure
 */
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
        if (offset < 0)
                return -EINVAL;
        if (inode->i_size < offset) {
                unsigned long limit;

                limit = rlimit(RLIMIT_FSIZE);
                if (limit != RLIM_INFINITY && offset > limit)
                        goto out_sig;
                if (offset > inode->i_sb->s_maxbytes)
                        goto out_big;
        } else {
                /*
                 * truncation of in-use swapfiles is disallowed - it would
                 * cause subsequent swapout to scribble on the now-freed
                 * blocks.
                 */
                if (IS_SWAPFILE(inode))
                        return -ETXTBSY;
        }

        return 0;
out_sig:
        send_sig(SIGXFSZ, current, 0);
out_big:
        return -EFBIG;
}
EXPORT_SYMBOL(inode_newsize_ok);

/**
 * setattr_copy - copy simple metadata updates into the generic inode
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        the inode to be updated
 * @attr:        the new attributes
 *
 * setattr_copy must be called with i_mutex held.
 *
 * setattr_copy updates the inode's metadata with that specified
 * in attr on idmapped mounts. Necessary permission checks to determine
 * whether or not the S_ISGID property needs to be removed are performed with
 * the correct idmapped mount permission helpers.
 * Noticeably missing is inode size update, which is more complex
 * as it requires pagecache updates.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 *
 * The inode is not marked as dirty after this operation. The rationale is
 * that for "simple" filesystems, the struct inode is the inode storage.
 * The caller is free to mark the inode dirty afterwards if needed.
 */
void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
                  const struct iattr *attr)
{
        unsigned int ia_valid = attr->ia_valid;

        i_uid_update(idmap, attr, inode);
        i_gid_update(idmap, attr, inode);
        if (ia_valid & ATTR_ATIME)
                inode_set_atime_to_ts(inode, attr->ia_atime);
        if (ia_valid & ATTR_MTIME)
                inode_set_mtime_to_ts(inode, attr->ia_mtime);
        if (ia_valid & ATTR_CTIME)
                inode_set_ctime_to_ts(inode, attr->ia_ctime);
        if (ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
                if (!in_group_or_capable(idmap, inode,
                                         i_gid_into_vfsgid(idmap, inode)))
                        mode &= ~S_ISGID;
                inode->i_mode = mode;
        }
}
EXPORT_SYMBOL(setattr_copy);

int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
                unsigned int ia_valid)
{
        int error;

        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                        return -EPERM;
        }

        /*
         * If utimes(2) and friends are called with times == NULL (or both
         * times are UTIME_NOW), then we need to check for write permission
         */
        if (ia_valid & ATTR_TOUCH) {
                if (IS_IMMUTABLE(inode))
                        return -EPERM;

                if (!inode_owner_or_capable(idmap, inode)) {
                        error = inode_permission(idmap, inode, MAY_WRITE);
                        if (error)
                                return error;
                }
        }
        return 0;
}
EXPORT_SYMBOL(may_setattr);

/**
 * notify_change - modify attributes of a filesystem object
 * @idmap:        idmap of the mount the inode was found from
 * @dentry:        object affected
 * @attr:        new attributes
 * @delegated_inode: returns inode, if the inode is delegated
 *
 * The caller must hold the i_mutex on the affected object.
 *
 * If notify_change discovers a delegation in need of breaking,
 * it will return -EWOULDBLOCK and return a reference to the inode in
 * delegated_inode.  The caller should then break the delegation and
 * retry.  Because breaking a delegation may take a long time, the
 * caller should drop the i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.  Also, passing NULL is fine for callers holding
 * the file open for write, as there can be no conflicting delegation in
 * that case.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before checking
 * permissions. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
                  struct iattr *attr, struct inode **delegated_inode)
{
        struct inode *inode = dentry->d_inode;
        umode_t mode = inode->i_mode;
        int error;
        struct timespec64 now;
        unsigned int ia_valid = attr->ia_valid;

        WARN_ON_ONCE(!inode_is_locked(inode));

        error = may_setattr(idmap, inode, ia_valid);
        if (error)
                return error;

        if ((ia_valid & ATTR_MODE)) {
                /*
                 * Don't allow changing the mode of symlinks:
                 *
                 * (1) The vfs doesn't take the mode of symlinks into account
                 *     during permission checking.
                 * (2) This has never worked correctly. Most major filesystems
                 *     did return EOPNOTSUPP due to interactions with POSIX ACLs
                 *     but did still updated the mode of the symlink.
                 *     This inconsistency led system call wrapper providers such
                 *     as libc to block changing the mode of symlinks with
                 *     EOPNOTSUPP already.
                 * (3) To even do this in the first place one would have to use
                 *     specific file descriptors and quite some effort.
                 */
                if (S_ISLNK(inode->i_mode))
                        return -EOPNOTSUPP;

                /* Flag setting protected by i_mutex */
                if (is_sxid(attr->ia_mode))
                        inode->i_flags &= ~S_NOSEC;
        }

        now = current_time(inode);

        attr->ia_ctime = now;
        if (!(ia_valid & ATTR_ATIME_SET))
                attr->ia_atime = now;
        else
                attr->ia_atime = timestamp_truncate(attr->ia_atime, inode);
        if (!(ia_valid & ATTR_MTIME_SET))
                attr->ia_mtime = now;
        else
                attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode);

        if (ia_valid & ATTR_KILL_PRIV) {
                error = security_inode_need_killpriv(dentry);
                if (error < 0)
                        return error;
                if (error == 0)
                        ia_valid = attr->ia_valid &= ~ATTR_KILL_PRIV;
        }

        /*
         * We now pass ATTR_KILL_S*ID to the lower level setattr function so
         * that the function has the ability to reinterpret a mode change
         * that's due to these bits. This adds an implicit restriction that
         * no function will ever call notify_change with both ATTR_MODE and
         * ATTR_KILL_S*ID set.
         */
        if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
            (ia_valid & ATTR_MODE))
                BUG();

        if (ia_valid & ATTR_KILL_SUID) {
                if (mode & S_ISUID) {
                        ia_valid = attr->ia_valid |= ATTR_MODE;
                        attr->ia_mode = (inode->i_mode & ~S_ISUID);
                }
        }
        if (ia_valid & ATTR_KILL_SGID) {
                if (mode & S_ISGID) {
                        if (!(ia_valid & ATTR_MODE)) {
                                ia_valid = attr->ia_valid |= ATTR_MODE;
                                attr->ia_mode = inode->i_mode;
                        }
                        attr->ia_mode &= ~S_ISGID;
                }
        }
        if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
                return 0;

        /*
         * Verify that uid/gid changes are valid in the target
         * namespace of the superblock.
         */
        if (ia_valid & ATTR_UID &&
            !vfsuid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
                                  attr->ia_vfsuid))
                return -EOVERFLOW;
        if (ia_valid & ATTR_GID &&
            !vfsgid_has_fsmapping(idmap, inode->i_sb->s_user_ns,
                                  attr->ia_vfsgid))
                return -EOVERFLOW;

        /* Don't allow modifications of files with invalid uids or
         * gids unless those uids & gids are being made valid.
         */
        if (!(ia_valid & ATTR_UID) &&
            !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)))
                return -EOVERFLOW;
        if (!(ia_valid & ATTR_GID) &&
            !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
                return -EOVERFLOW;

        error = security_inode_setattr(idmap, dentry, attr);
        if (error)
                return error;
        error = try_break_deleg(inode, delegated_inode);
        if (error)
                return error;

        if (inode->i_op->setattr)
                error = inode->i_op->setattr(idmap, dentry, attr);
        else
                error = simple_setattr(idmap, dentry, attr);

        if (!error) {
                fsnotify_change(dentry, ia_valid);
                security_inode_post_setattr(idmap, dentry, ia_valid);
        }

        return error;
}
EXPORT_SYMBOL(notify_change);


















































































































































































































































































































































































































































































































































  294 



  296 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  292 
















  294 
  296 









  296 




























































  294 


























































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/common.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/string_helpers.h>
#include "common.h"

/* String table for operation mode. */
const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE] = {
        [TOMOYO_CONFIG_DISABLED]   = "disabled",
        [TOMOYO_CONFIG_LEARNING]   = "learning",
        [TOMOYO_CONFIG_PERMISSIVE] = "permissive",
        [TOMOYO_CONFIG_ENFORCING]  = "enforcing"
};

/* String table for /sys/kernel/security/tomoyo/profile */
const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
                                       + TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
        /* CONFIG::file group */
        [TOMOYO_MAC_FILE_EXECUTE]    = "execute",
        [TOMOYO_MAC_FILE_OPEN]       = "open",
        [TOMOYO_MAC_FILE_CREATE]     = "create",
        [TOMOYO_MAC_FILE_UNLINK]     = "unlink",
        [TOMOYO_MAC_FILE_GETATTR]    = "getattr",
        [TOMOYO_MAC_FILE_MKDIR]      = "mkdir",
        [TOMOYO_MAC_FILE_RMDIR]      = "rmdir",
        [TOMOYO_MAC_FILE_MKFIFO]     = "mkfifo",
        [TOMOYO_MAC_FILE_MKSOCK]     = "mksock",
        [TOMOYO_MAC_FILE_TRUNCATE]   = "truncate",
        [TOMOYO_MAC_FILE_SYMLINK]    = "symlink",
        [TOMOYO_MAC_FILE_MKBLOCK]    = "mkblock",
        [TOMOYO_MAC_FILE_MKCHAR]     = "mkchar",
        [TOMOYO_MAC_FILE_LINK]       = "link",
        [TOMOYO_MAC_FILE_RENAME]     = "rename",
        [TOMOYO_MAC_FILE_CHMOD]      = "chmod",
        [TOMOYO_MAC_FILE_CHOWN]      = "chown",
        [TOMOYO_MAC_FILE_CHGRP]      = "chgrp",
        [TOMOYO_MAC_FILE_IOCTL]      = "ioctl",
        [TOMOYO_MAC_FILE_CHROOT]     = "chroot",
        [TOMOYO_MAC_FILE_MOUNT]      = "mount",
        [TOMOYO_MAC_FILE_UMOUNT]     = "unmount",
        [TOMOYO_MAC_FILE_PIVOT_ROOT] = "pivot_root",
        /* CONFIG::network group */
        [TOMOYO_MAC_NETWORK_INET_STREAM_BIND]       = "inet_stream_bind",
        [TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN]     = "inet_stream_listen",
        [TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT]    = "inet_stream_connect",
        [TOMOYO_MAC_NETWORK_INET_DGRAM_BIND]        = "inet_dgram_bind",
        [TOMOYO_MAC_NETWORK_INET_DGRAM_SEND]        = "inet_dgram_send",
        [TOMOYO_MAC_NETWORK_INET_RAW_BIND]          = "inet_raw_bind",
        [TOMOYO_MAC_NETWORK_INET_RAW_SEND]          = "inet_raw_send",
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND]       = "unix_stream_bind",
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN]     = "unix_stream_listen",
        [TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT]    = "unix_stream_connect",
        [TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND]        = "unix_dgram_bind",
        [TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND]        = "unix_dgram_send",
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND]    = "unix_seqpacket_bind",
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN]  = "unix_seqpacket_listen",
        [TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT] = "unix_seqpacket_connect",
        /* CONFIG::misc group */
        [TOMOYO_MAC_ENVIRON] = "env",
        /* CONFIG group */
        [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file",
        [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_NETWORK] = "network",
        [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_MISC] = "misc",
};

/* String table for conditions. */
const char * const tomoyo_condition_keyword[TOMOYO_MAX_CONDITION_KEYWORD] = {
        [TOMOYO_TASK_UID]             = "task.uid",
        [TOMOYO_TASK_EUID]            = "task.euid",
        [TOMOYO_TASK_SUID]            = "task.suid",
        [TOMOYO_TASK_FSUID]           = "task.fsuid",
        [TOMOYO_TASK_GID]             = "task.gid",
        [TOMOYO_TASK_EGID]            = "task.egid",
        [TOMOYO_TASK_SGID]            = "task.sgid",
        [TOMOYO_TASK_FSGID]           = "task.fsgid",
        [TOMOYO_TASK_PID]             = "task.pid",
        [TOMOYO_TASK_PPID]            = "task.ppid",
        [TOMOYO_EXEC_ARGC]            = "exec.argc",
        [TOMOYO_EXEC_ENVC]            = "exec.envc",
        [TOMOYO_TYPE_IS_SOCKET]       = "socket",
        [TOMOYO_TYPE_IS_SYMLINK]      = "symlink",
        [TOMOYO_TYPE_IS_FILE]         = "file",
        [TOMOYO_TYPE_IS_BLOCK_DEV]    = "block",
        [TOMOYO_TYPE_IS_DIRECTORY]    = "directory",
        [TOMOYO_TYPE_IS_CHAR_DEV]     = "char",
        [TOMOYO_TYPE_IS_FIFO]         = "fifo",
        [TOMOYO_MODE_SETUID]          = "setuid",
        [TOMOYO_MODE_SETGID]          = "setgid",
        [TOMOYO_MODE_STICKY]          = "sticky",
        [TOMOYO_MODE_OWNER_READ]      = "owner_read",
        [TOMOYO_MODE_OWNER_WRITE]     = "owner_write",
        [TOMOYO_MODE_OWNER_EXECUTE]   = "owner_execute",
        [TOMOYO_MODE_GROUP_READ]      = "group_read",
        [TOMOYO_MODE_GROUP_WRITE]     = "group_write",
        [TOMOYO_MODE_GROUP_EXECUTE]   = "group_execute",
        [TOMOYO_MODE_OTHERS_READ]     = "others_read",
        [TOMOYO_MODE_OTHERS_WRITE]    = "others_write",
        [TOMOYO_MODE_OTHERS_EXECUTE]  = "others_execute",
        [TOMOYO_EXEC_REALPATH]        = "exec.realpath",
        [TOMOYO_SYMLINK_TARGET]       = "symlink.target",
        [TOMOYO_PATH1_UID]            = "path1.uid",
        [TOMOYO_PATH1_GID]            = "path1.gid",
        [TOMOYO_PATH1_INO]            = "path1.ino",
        [TOMOYO_PATH1_MAJOR]          = "path1.major",
        [TOMOYO_PATH1_MINOR]          = "path1.minor",
        [TOMOYO_PATH1_PERM]           = "path1.perm",
        [TOMOYO_PATH1_TYPE]           = "path1.type",
        [TOMOYO_PATH1_DEV_MAJOR]      = "path1.dev_major",
        [TOMOYO_PATH1_DEV_MINOR]      = "path1.dev_minor",
        [TOMOYO_PATH2_UID]            = "path2.uid",
        [TOMOYO_PATH2_GID]            = "path2.gid",
        [TOMOYO_PATH2_INO]            = "path2.ino",
        [TOMOYO_PATH2_MAJOR]          = "path2.major",
        [TOMOYO_PATH2_MINOR]          = "path2.minor",
        [TOMOYO_PATH2_PERM]           = "path2.perm",
        [TOMOYO_PATH2_TYPE]           = "path2.type",
        [TOMOYO_PATH2_DEV_MAJOR]      = "path2.dev_major",
        [TOMOYO_PATH2_DEV_MINOR]      = "path2.dev_minor",
        [TOMOYO_PATH1_PARENT_UID]     = "path1.parent.uid",
        [TOMOYO_PATH1_PARENT_GID]     = "path1.parent.gid",
        [TOMOYO_PATH1_PARENT_INO]     = "path1.parent.ino",
        [TOMOYO_PATH1_PARENT_PERM]    = "path1.parent.perm",
        [TOMOYO_PATH2_PARENT_UID]     = "path2.parent.uid",
        [TOMOYO_PATH2_PARENT_GID]     = "path2.parent.gid",
        [TOMOYO_PATH2_PARENT_INO]     = "path2.parent.ino",
        [TOMOYO_PATH2_PARENT_PERM]    = "path2.parent.perm",
};

/* String table for PREFERENCE keyword. */
static const char * const tomoyo_pref_keywords[TOMOYO_MAX_PREF] = {
        [TOMOYO_PREF_MAX_AUDIT_LOG]      = "max_audit_log",
        [TOMOYO_PREF_MAX_LEARNING_ENTRY] = "max_learning_entry",
};

/* String table for path operation. */
const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
        [TOMOYO_TYPE_EXECUTE]    = "execute",
        [TOMOYO_TYPE_READ]       = "read",
        [TOMOYO_TYPE_WRITE]      = "write",
        [TOMOYO_TYPE_APPEND]     = "append",
        [TOMOYO_TYPE_UNLINK]     = "unlink",
        [TOMOYO_TYPE_GETATTR]    = "getattr",
        [TOMOYO_TYPE_RMDIR]      = "rmdir",
        [TOMOYO_TYPE_TRUNCATE]   = "truncate",
        [TOMOYO_TYPE_SYMLINK]    = "symlink",
        [TOMOYO_TYPE_CHROOT]     = "chroot",
        [TOMOYO_TYPE_UMOUNT]     = "unmount",
};

/* String table for socket's operation. */
const char * const tomoyo_socket_keyword[TOMOYO_MAX_NETWORK_OPERATION] = {
        [TOMOYO_NETWORK_BIND]    = "bind",
        [TOMOYO_NETWORK_LISTEN]  = "listen",
        [TOMOYO_NETWORK_CONNECT] = "connect",
        [TOMOYO_NETWORK_SEND]    = "send",
};

/* String table for categories. */
static const char * const tomoyo_category_keywords
[TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
        [TOMOYO_MAC_CATEGORY_FILE]    = "file",
        [TOMOYO_MAC_CATEGORY_NETWORK] = "network",
        [TOMOYO_MAC_CATEGORY_MISC]    = "misc",
};

/* Permit policy management by non-root user? */
static bool tomoyo_manage_by_non_root;

/* Utility functions. */

/**
 * tomoyo_addprintf - strncat()-like-snprintf().
 *
 * @buffer: Buffer to write to. Must be '\0'-terminated.
 * @len:    Size of @buffer.
 * @fmt:    The printf()'s format string, followed by parameters.
 *
 * Returns nothing.
 */
__printf(3, 4)
static void tomoyo_addprintf(char *buffer, int len, const char *fmt, ...)
{
        va_list args;
        const int pos = strlen(buffer);

        va_start(args, fmt);
        vsnprintf(buffer + pos, len - pos - 1, fmt, args);
        va_end(args);
}

/**
 * tomoyo_flush - Flush queued string to userspace's buffer.
 *
 * @head:   Pointer to "struct tomoyo_io_buffer".
 *
 * Returns true if all data was flushed, false otherwise.
 */
static bool tomoyo_flush(struct tomoyo_io_buffer *head)
{
        while (head->r.w_pos) {
                const char *w = head->r.w[0];
                size_t len = strlen(w);

                if (len) {
                        if (len > head->read_user_buf_avail)
                                len = head->read_user_buf_avail;
                        if (!len)
                                return false;
                        if (copy_to_user(head->read_user_buf, w, len))
                                return false;
                        head->read_user_buf_avail -= len;
                        head->read_user_buf += len;
                        w += len;
                }
                head->r.w[0] = w;
                if (*w)
                        return false;
                /* Add '\0' for audit logs and query. */
                if (head->poll) {
                        if (!head->read_user_buf_avail ||
                            copy_to_user(head->read_user_buf, "", 1))
                                return false;
                        head->read_user_buf_avail--;
                        head->read_user_buf++;
                }
                head->r.w_pos--;
                for (len = 0; len < head->r.w_pos; len++)
                        head->r.w[len] = head->r.w[len + 1];
        }
        head->r.avail = 0;
        return true;
}

/**
 * tomoyo_set_string - Queue string to "struct tomoyo_io_buffer" structure.
 *
 * @head:   Pointer to "struct tomoyo_io_buffer".
 * @string: String to print.
 *
 * Note that @string has to be kept valid until @head is kfree()d.
 * This means that char[] allocated on stack memory cannot be passed to
 * this function. Use tomoyo_io_printf() for char[] allocated on stack memory.
 */
static void tomoyo_set_string(struct tomoyo_io_buffer *head, const char *string)
{
        if (head->r.w_pos < TOMOYO_MAX_IO_READ_QUEUE) {
                head->r.w[head->r.w_pos++] = string;
                tomoyo_flush(head);
        } else
                WARN_ON(1);
}

static void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt,
                             ...) __printf(2, 3);

/**
 * tomoyo_io_printf - printf() to "struct tomoyo_io_buffer" structure.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @fmt:  The printf()'s format string, followed by parameters.
 */
static void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt,
                             ...)
{
        va_list args;
        size_t len;
        size_t pos = head->r.avail;
        int size = head->readbuf_size - pos;

        if (size <= 0)
                return;
        va_start(args, fmt);
        len = vsnprintf(head->read_buf + pos, size, fmt, args) + 1;
        va_end(args);
        if (pos + len >= head->readbuf_size) {
                WARN_ON(1);
                return;
        }
        head->r.avail += len;
        tomoyo_set_string(head, head->read_buf + pos);
}

/**
 * tomoyo_set_space - Put a space to "struct tomoyo_io_buffer" structure.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static void tomoyo_set_space(struct tomoyo_io_buffer *head)
{
        tomoyo_set_string(head, " ");
}

/**
 * tomoyo_set_lf - Put a line feed to "struct tomoyo_io_buffer" structure.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static bool tomoyo_set_lf(struct tomoyo_io_buffer *head)
{
        tomoyo_set_string(head, "\n");
        return !head->r.w_pos;
}

/**
 * tomoyo_set_slash - Put a shash to "struct tomoyo_io_buffer" structure.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static void tomoyo_set_slash(struct tomoyo_io_buffer *head)
{
        tomoyo_set_string(head, "/");
}

/* List of namespaces. */
LIST_HEAD(tomoyo_namespace_list);
/* True if namespace other than tomoyo_kernel_namespace is defined. */
static bool tomoyo_namespace_enabled;

/**
 * tomoyo_init_policy_namespace - Initialize namespace.
 *
 * @ns: Pointer to "struct tomoyo_policy_namespace".
 *
 * Returns nothing.
 */
void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns)
{
        unsigned int idx;

        for (idx = 0; idx < TOMOYO_MAX_ACL_GROUPS; idx++)
                INIT_LIST_HEAD(&ns->acl_group[idx]);
        for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
                INIT_LIST_HEAD(&ns->group_list[idx]);
        for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
                INIT_LIST_HEAD(&ns->policy_list[idx]);
        ns->profile_version = 20150505;
        tomoyo_namespace_enabled = !list_empty(&tomoyo_namespace_list);
        list_add_tail_rcu(&ns->namespace_list, &tomoyo_namespace_list);
}

/**
 * tomoyo_print_namespace - Print namespace header.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static void tomoyo_print_namespace(struct tomoyo_io_buffer *head)
{
        if (!tomoyo_namespace_enabled)
                return;
        tomoyo_set_string(head,
                          container_of(head->r.ns,
                                       struct tomoyo_policy_namespace,
                                       namespace_list)->name);
        tomoyo_set_space(head);
}

/**
 * tomoyo_print_name_union - Print a tomoyo_name_union.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @ptr:  Pointer to "struct tomoyo_name_union".
 */
static void tomoyo_print_name_union(struct tomoyo_io_buffer *head,
                                    const struct tomoyo_name_union *ptr)
{
        tomoyo_set_space(head);
        if (ptr->group) {
                tomoyo_set_string(head, "@");
                tomoyo_set_string(head, ptr->group->group_name->name);
        } else {
                tomoyo_set_string(head, ptr->filename->name);
        }
}

/**
 * tomoyo_print_name_union_quoted - Print a tomoyo_name_union with a quote.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @ptr:  Pointer to "struct tomoyo_name_union".
 *
 * Returns nothing.
 */
static void tomoyo_print_name_union_quoted(struct tomoyo_io_buffer *head,
                                           const struct tomoyo_name_union *ptr)
{
        if (ptr->group) {
                tomoyo_set_string(head, "@");
                tomoyo_set_string(head, ptr->group->group_name->name);
        } else {
                tomoyo_set_string(head, "\"");
                tomoyo_set_string(head, ptr->filename->name);
                tomoyo_set_string(head, "\"");
        }
}

/**
 * tomoyo_print_number_union_nospace - Print a tomoyo_number_union without a space.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @ptr:  Pointer to "struct tomoyo_number_union".
 *
 * Returns nothing.
 */
static void tomoyo_print_number_union_nospace
(struct tomoyo_io_buffer *head, const struct tomoyo_number_union *ptr)
{
        if (ptr->group) {
                tomoyo_set_string(head, "@");
                tomoyo_set_string(head, ptr->group->group_name->name);
        } else {
                int i;
                unsigned long min = ptr->values[0];
                const unsigned long max = ptr->values[1];
                u8 min_type = ptr->value_type[0];
                const u8 max_type = ptr->value_type[1];
                char buffer[128];

                buffer[0] = '\0';
                for (i = 0; i < 2; i++) {
                        switch (min_type) {
                        case TOMOYO_VALUE_TYPE_HEXADECIMAL:
                                tomoyo_addprintf(buffer, sizeof(buffer),
                                                 "0x%lX", min);
                                break;
                        case TOMOYO_VALUE_TYPE_OCTAL:
                                tomoyo_addprintf(buffer, sizeof(buffer),
                                                 "0%lo", min);
                                break;
                        default:
                                tomoyo_addprintf(buffer, sizeof(buffer), "%lu",
                                                 min);
                                break;
                        }
                        if (min == max && min_type == max_type)
                                break;
                        tomoyo_addprintf(buffer, sizeof(buffer), "-");
                        min_type = max_type;
                        min = max;
                }
                tomoyo_io_printf(head, "%s", buffer);
        }
}

/**
 * tomoyo_print_number_union - Print a tomoyo_number_union.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @ptr:  Pointer to "struct tomoyo_number_union".
 *
 * Returns nothing.
 */
static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
                                      const struct tomoyo_number_union *ptr)
{
        tomoyo_set_space(head);
        tomoyo_print_number_union_nospace(head, ptr);
}

/**
 * tomoyo_assign_profile - Create a new profile.
 *
 * @ns:      Pointer to "struct tomoyo_policy_namespace".
 * @profile: Profile number to create.
 *
 * Returns pointer to "struct tomoyo_profile" on success, NULL otherwise.
 */
static struct tomoyo_profile *tomoyo_assign_profile
(struct tomoyo_policy_namespace *ns, const unsigned int profile)
{
        struct tomoyo_profile *ptr;
        struct tomoyo_profile *entry;

        if (profile >= TOMOYO_MAX_PROFILES)
                return NULL;
        ptr = ns->profile_ptr[profile];
        if (ptr)
                return ptr;
        entry = kzalloc(sizeof(*entry), GFP_NOFS | __GFP_NOWARN);
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
        ptr = ns->profile_ptr[profile];
        if (!ptr && tomoyo_memory_ok(entry)) {
                ptr = entry;
                ptr->default_config = TOMOYO_CONFIG_DISABLED |
                        TOMOYO_CONFIG_WANT_GRANT_LOG |
                        TOMOYO_CONFIG_WANT_REJECT_LOG;
                memset(ptr->config, TOMOYO_CONFIG_USE_DEFAULT,
                       sizeof(ptr->config));
                ptr->pref[TOMOYO_PREF_MAX_AUDIT_LOG] =
                        CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG;
                ptr->pref[TOMOYO_PREF_MAX_LEARNING_ENTRY] =
                        CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY;
                mb(); /* Avoid out-of-order execution. */
                ns->profile_ptr[profile] = ptr;
                entry = NULL;
        }
        mutex_unlock(&tomoyo_policy_lock);
 out:
        kfree(entry);
        return ptr;
}

/**
 * tomoyo_profile - Find a profile.
 *
 * @ns:      Pointer to "struct tomoyo_policy_namespace".
 * @profile: Profile number to find.
 *
 * Returns pointer to "struct tomoyo_profile".
 */
struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
                                      const u8 profile)
{
        static struct tomoyo_profile tomoyo_null_profile;
        struct tomoyo_profile *ptr = ns->profile_ptr[profile];

        if (!ptr)
                ptr = &tomoyo_null_profile;
        return ptr;
}

/**
 * tomoyo_find_yesno - Find values for specified keyword.
 *
 * @string: String to check.
 * @find:   Name of keyword.
 *
 * Returns 1 if "@find=yes" was found, 0 if "@find=no" was found, -1 otherwise.
 */
static s8 tomoyo_find_yesno(const char *string, const char *find)
{
        const char *cp = strstr(string, find);

        if (cp) {
                cp += strlen(find);
                if (!strncmp(cp, "=yes", 4))
                        return 1;
                else if (!strncmp(cp, "=no", 3))
                        return 0;
        }
        return -1;
}

/**
 * tomoyo_set_uint - Set value for specified preference.
 *
 * @i:      Pointer to "unsigned int".
 * @string: String to check.
 * @find:   Name of keyword.
 *
 * Returns nothing.
 */
static void tomoyo_set_uint(unsigned int *i, const char *string,
                            const char *find)
{
        const char *cp = strstr(string, find);

        if (cp)
                sscanf(cp + strlen(find), "=%u", i);
}

/**
 * tomoyo_set_mode - Set mode for specified profile.
 *
 * @name:    Name of functionality.
 * @value:   Mode for @name.
 * @profile: Pointer to "struct tomoyo_profile".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_set_mode(char *name, const char *value,
                           struct tomoyo_profile *profile)
{
        u8 i;
        u8 config;

        if (!strcmp(name, "CONFIG")) {
                i = TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX;
                config = profile->default_config;
        } else if (tomoyo_str_starts(&name, "CONFIG::")) {
                config = 0;
                for (i = 0; i < TOMOYO_MAX_MAC_INDEX
                             + TOMOYO_MAX_MAC_CATEGORY_INDEX; i++) {
                        int len = 0;

                        if (i < TOMOYO_MAX_MAC_INDEX) {
                                const u8 c = tomoyo_index2category[i];
                                const char *category =
                                        tomoyo_category_keywords[c];

                                len = strlen(category);
                                if (strncmp(name, category, len) ||
                                    name[len++] != ':' || name[len++] != ':')
                                        continue;
                        }
                        if (strcmp(name + len, tomoyo_mac_keywords[i]))
                                continue;
                        config = profile->config[i];
                        break;
                }
                if (i == TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX)
                        return -EINVAL;
        } else {
                return -EINVAL;
        }
        if (strstr(value, "use_default")) {
                config = TOMOYO_CONFIG_USE_DEFAULT;
        } else {
                u8 mode;

                for (mode = 0; mode < 4; mode++)
                        if (strstr(value, tomoyo_mode[mode]))
                                /*
                                 * Update lower 3 bits in order to distinguish
                                 * 'config' from 'TOMOYO_CONFIG_USE_DEFAULT'.
                                 */
                                config = (config & ~7) | mode;
                if (config != TOMOYO_CONFIG_USE_DEFAULT) {
                        switch (tomoyo_find_yesno(value, "grant_log")) {
                        case 1:
                                config |= TOMOYO_CONFIG_WANT_GRANT_LOG;
                                break;
                        case 0:
                                config &= ~TOMOYO_CONFIG_WANT_GRANT_LOG;
                                break;
                        }
                        switch (tomoyo_find_yesno(value, "reject_log")) {
                        case 1:
                                config |= TOMOYO_CONFIG_WANT_REJECT_LOG;
                                break;
                        case 0:
                                config &= ~TOMOYO_CONFIG_WANT_REJECT_LOG;
                                break;
                        }
                }
        }
        if (i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX)
                profile->config[i] = config;
        else if (config != TOMOYO_CONFIG_USE_DEFAULT)
                profile->default_config = config;
        return 0;
}

/**
 * tomoyo_write_profile - Write profile table.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_write_profile(struct tomoyo_io_buffer *head)
{
        char *data = head->write_buf;
        unsigned int i;
        char *cp;
        struct tomoyo_profile *profile;

        if (sscanf(data, "PROFILE_VERSION=%u", &head->w.ns->profile_version)
            == 1)
                return 0;
        i = simple_strtoul(data, &cp, 10);
        if (*cp != '-')
                return -EINVAL;
        data = cp + 1;
        profile = tomoyo_assign_profile(head->w.ns, i);
        if (!profile)
                return -EINVAL;
        cp = strchr(data, '=');
        if (!cp)
                return -EINVAL;
        *cp++ = '\0';
        if (!strcmp(data, "COMMENT")) {
                static DEFINE_SPINLOCK(lock);
                const struct tomoyo_path_info *new_comment
                        = tomoyo_get_name(cp);
                const struct tomoyo_path_info *old_comment;

                if (!new_comment)
                        return -ENOMEM;
                spin_lock(&lock);
                old_comment = profile->comment;
                profile->comment = new_comment;
                spin_unlock(&lock);
                tomoyo_put_name(old_comment);
                return 0;
        }
        if (!strcmp(data, "PREFERENCE")) {
                for (i = 0; i < TOMOYO_MAX_PREF; i++)
                        tomoyo_set_uint(&profile->pref[i], cp,
                                        tomoyo_pref_keywords[i]);
                return 0;
        }
        return tomoyo_set_mode(data, cp, profile);
}

/**
 * tomoyo_print_config - Print mode for specified functionality.
 *
 * @head:   Pointer to "struct tomoyo_io_buffer".
 * @config: Mode for that functionality.
 *
 * Returns nothing.
 *
 * Caller prints functionality's name.
 */
static void tomoyo_print_config(struct tomoyo_io_buffer *head, const u8 config)
{
        tomoyo_io_printf(head, "={ mode=%s grant_log=%s reject_log=%s }\n",
                         tomoyo_mode[config & 3],
                         str_yes_no(config & TOMOYO_CONFIG_WANT_GRANT_LOG),
                         str_yes_no(config & TOMOYO_CONFIG_WANT_REJECT_LOG));
}

/**
 * tomoyo_read_profile - Read profile table.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
{
        u8 index;
        struct tomoyo_policy_namespace *ns =
                container_of(head->r.ns, typeof(*ns), namespace_list);
        const struct tomoyo_profile *profile;

        if (head->r.eof)
                return;
 next:
        index = head->r.index;
        profile = ns->profile_ptr[index];
        switch (head->r.step) {
        case 0:
                tomoyo_print_namespace(head);
                tomoyo_io_printf(head, "PROFILE_VERSION=%u\n",
                                 ns->profile_version);
                head->r.step++;
                break;
        case 1:
                for ( ; head->r.index < TOMOYO_MAX_PROFILES;
                      head->r.index++)
                        if (ns->profile_ptr[head->r.index])
                                break;
                if (head->r.index == TOMOYO_MAX_PROFILES) {
                        head->r.eof = true;
                        return;
                }
                head->r.step++;
                break;
        case 2:
                {
                        u8 i;
                        const struct tomoyo_path_info *comment =
                                profile->comment;

                        tomoyo_print_namespace(head);
                        tomoyo_io_printf(head, "%u-COMMENT=", index);
                        tomoyo_set_string(head, comment ? comment->name : "");
                        tomoyo_set_lf(head);
                        tomoyo_print_namespace(head);
                        tomoyo_io_printf(head, "%u-PREFERENCE={ ", index);
                        for (i = 0; i < TOMOYO_MAX_PREF; i++)
                                tomoyo_io_printf(head, "%s=%u ",
                                                 tomoyo_pref_keywords[i],
                                                 profile->pref[i]);
                        tomoyo_set_string(head, "}\n");
                        head->r.step++;
                }
                break;
        case 3:
                {
                        tomoyo_print_namespace(head);
                        tomoyo_io_printf(head, "%u-%s", index, "CONFIG");
                        tomoyo_print_config(head, profile->default_config);
                        head->r.bit = 0;
                        head->r.step++;
                }
                break;
        case 4:
                for ( ; head->r.bit < TOMOYO_MAX_MAC_INDEX
                              + TOMOYO_MAX_MAC_CATEGORY_INDEX; head->r.bit++) {
                        const u8 i = head->r.bit;
                        const u8 config = profile->config[i];

                        if (config == TOMOYO_CONFIG_USE_DEFAULT)
                                continue;
                        tomoyo_print_namespace(head);
                        if (i < TOMOYO_MAX_MAC_INDEX)
                                tomoyo_io_printf(head, "%u-CONFIG::%s::%s",
                                                 index,
                                                 tomoyo_category_keywords
                                                 [tomoyo_index2category[i]],
                                                 tomoyo_mac_keywords[i]);
                        else
                                tomoyo_io_printf(head, "%u-CONFIG::%s", index,
                                                 tomoyo_mac_keywords[i]);
                        tomoyo_print_config(head, config);
                        head->r.bit++;
                        break;
                }
                if (head->r.bit == TOMOYO_MAX_MAC_INDEX
                    + TOMOYO_MAX_MAC_CATEGORY_INDEX) {
                        head->r.index++;
                        head->r.step = 1;
                }
                break;
        }
        if (tomoyo_flush(head))
                goto next;
}

/**
 * tomoyo_same_manager - Check for duplicated "struct tomoyo_manager" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_head".
 * @b: Pointer to "struct tomoyo_acl_head".
 *
 * Returns true if @a == @b, false otherwise.
 */
static bool tomoyo_same_manager(const struct tomoyo_acl_head *a,
                                const struct tomoyo_acl_head *b)
{
        return container_of(a, struct tomoyo_manager, head)->manager ==
                container_of(b, struct tomoyo_manager, head)->manager;
}

/**
 * tomoyo_update_manager_entry - Add a manager entry.
 *
 * @manager:   The path to manager or the domainnamme.
 * @is_delete: True if it is a delete request.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_update_manager_entry(const char *manager,
                                       const bool is_delete)
{
        struct tomoyo_manager e = { };
        struct tomoyo_acl_param param = {
                /* .ns = &tomoyo_kernel_namespace, */
                .is_delete = is_delete,
                .list = &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER],
        };
        int error = is_delete ? -ENOENT : -ENOMEM;

        if (!tomoyo_correct_domain(manager) &&
            !tomoyo_correct_word(manager))
                return -EINVAL;
        e.manager = tomoyo_get_name(manager);
        if (e.manager) {
                error = tomoyo_update_policy(&e.head, sizeof(e), &param,
                                             tomoyo_same_manager);
                tomoyo_put_name(e.manager);
        }
        return error;
}

/**
 * tomoyo_write_manager - Write manager policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_write_manager(struct tomoyo_io_buffer *head)
{
        char *data = head->write_buf;

        if (!strcmp(data, "manage_by_non_root")) {
                tomoyo_manage_by_non_root = !head->w.is_delete;
                return 0;
        }
        return tomoyo_update_manager_entry(data, head->w.is_delete);
}

/**
 * tomoyo_read_manager - Read manager policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Caller holds tomoyo_read_lock().
 */
static void tomoyo_read_manager(struct tomoyo_io_buffer *head)
{
        if (head->r.eof)
                return;
        list_for_each_cookie(head->r.acl, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER]) {
                struct tomoyo_manager *ptr =
                        list_entry(head->r.acl, typeof(*ptr), head.list);

                if (ptr->head.is_deleted)
                        continue;
                if (!tomoyo_flush(head))
                        return;
                tomoyo_set_string(head, ptr->manager->name);
                tomoyo_set_lf(head);
        }
        head->r.eof = true;
}

/**
 * tomoyo_manager - Check whether the current process is a policy manager.
 *
 * Returns true if the current process is permitted to modify policy
 * via /sys/kernel/security/tomoyo/ interface.
 *
 * Caller holds tomoyo_read_lock().
 */
static bool tomoyo_manager(void)
{
        struct tomoyo_manager *ptr;
        const char *exe;
        const struct task_struct *task = current;
        const struct tomoyo_path_info *domainname = tomoyo_domain()->domainname;
        bool found = IS_ENABLED(CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING);

        if (!tomoyo_policy_loaded)
                return true;
        if (!tomoyo_manage_by_non_root &&
            (!uid_eq(task->cred->uid,  GLOBAL_ROOT_UID) ||
             !uid_eq(task->cred->euid, GLOBAL_ROOT_UID)))
                return false;
        exe = tomoyo_get_exe();
        if (!exe)
                return false;
        list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER], head.list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (!ptr->head.is_deleted &&
                    (!tomoyo_pathcmp(domainname, ptr->manager) ||
                     !strcmp(exe, ptr->manager->name))) {
                        found = true;
                        break;
                }
        }
        if (!found) { /* Reduce error messages. */
                static pid_t last_pid;
                const pid_t pid = current->pid;

                if (last_pid != pid) {
                        pr_warn("%s ( %s ) is not permitted to update policies.\n",
                                domainname->name, exe);
                        last_pid = pid;
                }
        }
        kfree(exe);
        return found;
}

static struct tomoyo_domain_info *tomoyo_find_domain_by_qid
(unsigned int serial);

/**
 * tomoyo_select_domain - Parse select command.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @data: String to parse.
 *
 * Returns true on success, false otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
                                 const char *data)
{
        unsigned int pid;
        struct tomoyo_domain_info *domain = NULL;
        bool global_pid = false;

        if (strncmp(data, "select ", 7))
                return false;
        data += 7;
        if (sscanf(data, "pid=%u", &pid) == 1 ||
            (global_pid = true, sscanf(data, "global-pid=%u", &pid) == 1)) {
                struct task_struct *p;

                rcu_read_lock();
                if (global_pid)
                        p = find_task_by_pid_ns(pid, &init_pid_ns);
                else
                        p = find_task_by_vpid(pid);
                if (p)
                        domain = tomoyo_task(p)->domain_info;
                rcu_read_unlock();
        } else if (!strncmp(data, "domain=", 7)) {
                if (tomoyo_domain_def(data + 7))
                        domain = tomoyo_find_domain(data + 7);
        } else if (sscanf(data, "Q=%u", &pid) == 1) {
                domain = tomoyo_find_domain_by_qid(pid);
        } else
                return false;
        head->w.domain = domain;
        /* Accessing read_buf is safe because head->io_sem is held. */
        if (!head->read_buf)
                return true; /* Do nothing if open(O_WRONLY). */
        memset(&head->r, 0, sizeof(head->r));
        head->r.print_this_domain_only = true;
        if (domain)
                head->r.domain = &domain->list;
        else
                head->r.eof = true;
        tomoyo_io_printf(head, "# select %s\n", data);
        if (domain && domain->is_deleted)
                tomoyo_io_printf(head, "# This is a deleted domain.\n");
        return true;
}

/**
 * tomoyo_same_task_acl - Check for duplicated "struct tomoyo_task_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b, false otherwise.
 */
static bool tomoyo_same_task_acl(const struct tomoyo_acl_info *a,
                                 const struct tomoyo_acl_info *b)
{
        const struct tomoyo_task_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_task_acl *p2 = container_of(b, typeof(*p2), head);

        return p1->domainname == p2->domainname;
}

/**
 * tomoyo_write_task - Update task related list.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_write_task(struct tomoyo_acl_param *param)
{
        int error = -EINVAL;

        if (tomoyo_str_starts(&param->data, "manual_domain_transition ")) {
                struct tomoyo_task_acl e = {
                        .head.type = TOMOYO_TYPE_MANUAL_TASK_ACL,
                        .domainname = tomoyo_get_domainname(param),
                };

                if (e.domainname)
                        error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                                     tomoyo_same_task_acl,
                                                     NULL);
                tomoyo_put_name(e.domainname);
        }
        return error;
}

/**
 * tomoyo_delete_domain - Delete a domain.
 *
 * @domainname: The name of domain.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_delete_domain(char *domainname)
{
        struct tomoyo_domain_info *domain;
        struct tomoyo_path_info name;

        name.name = domainname;
        tomoyo_fill_path_info(&name);
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                return -EINTR;
        /* Is there an active domain? */
        list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                /* Never delete tomoyo_kernel_domain */
                if (domain == &tomoyo_kernel_domain)
                        continue;
                if (domain->is_deleted ||
                    tomoyo_pathcmp(domain->domainname, &name))
                        continue;
                domain->is_deleted = true;
                break;
        }
        mutex_unlock(&tomoyo_policy_lock);
        return 0;
}

/**
 * tomoyo_write_domain2 - Write domain policy.
 *
 * @ns:        Pointer to "struct tomoyo_policy_namespace".
 * @list:      Pointer to "struct list_head".
 * @data:      Policy to be interpreted.
 * @is_delete: True if it is a delete request.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_write_domain2(struct tomoyo_policy_namespace *ns,
                                struct list_head *list, char *data,
                                const bool is_delete)
{
        struct tomoyo_acl_param param = {
                .ns = ns,
                .list = list,
                .data = data,
                .is_delete = is_delete,
        };
        static const struct {
                const char *keyword;
                int (*write)(struct tomoyo_acl_param *param);
        } tomoyo_callback[5] = {
                { "file ", tomoyo_write_file },
                { "network inet ", tomoyo_write_inet_network },
                { "network unix ", tomoyo_write_unix_network },
                { "misc ", tomoyo_write_misc },
                { "task ", tomoyo_write_task },
        };
        u8 i;

        for (i = 0; i < ARRAY_SIZE(tomoyo_callback); i++) {
                if (!tomoyo_str_starts(&param.data,
                                       tomoyo_callback[i].keyword))
                        continue;
                return tomoyo_callback[i].write(&param);
        }
        return -EINVAL;
}

/* String table for domain flags. */
const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS] = {
        [TOMOYO_DIF_QUOTA_WARNED]      = "quota_exceeded\n",
        [TOMOYO_DIF_TRANSITION_FAILED] = "transition_failed\n",
};

/**
 * tomoyo_write_domain - Write domain policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_write_domain(struct tomoyo_io_buffer *head)
{
        char *data = head->write_buf;
        struct tomoyo_policy_namespace *ns;
        struct tomoyo_domain_info *domain = head->w.domain;
        const bool is_delete = head->w.is_delete;
        bool is_select = !is_delete && tomoyo_str_starts(&data, "select ");
        unsigned int idx;

        if (*data == '<') {
                int ret = 0;

                domain = NULL;
                if (is_delete)
                        ret = tomoyo_delete_domain(data);
                else if (is_select)
                        domain = tomoyo_find_domain(data);
                else
                        domain = tomoyo_assign_domain(data, false);
                head->w.domain = domain;
                return ret;
        }
        if (!domain)
                return -EINVAL;
        ns = domain->ns;
        if (sscanf(data, "use_profile %u", &idx) == 1
            && idx < TOMOYO_MAX_PROFILES) {
                if (!tomoyo_policy_loaded || ns->profile_ptr[idx])
                        if (!is_delete)
                                domain->profile = (u8) idx;
                return 0;
        }
        if (sscanf(data, "use_group %u\n", &idx) == 1
            && idx < TOMOYO_MAX_ACL_GROUPS) {
                if (!is_delete)
                        set_bit(idx, domain->group);
                else
                        clear_bit(idx, domain->group);
                return 0;
        }
        for (idx = 0; idx < TOMOYO_MAX_DOMAIN_INFO_FLAGS; idx++) {
                const char *cp = tomoyo_dif[idx];

                if (strncmp(data, cp, strlen(cp) - 1))
                        continue;
                domain->flags[idx] = !is_delete;
                return 0;
        }
        return tomoyo_write_domain2(ns, &domain->acl_info_list, data,
                                    is_delete);
}

/**
 * tomoyo_print_condition - Print condition part.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @cond: Pointer to "struct tomoyo_condition".
 *
 * Returns true on success, false otherwise.
 */
static bool tomoyo_print_condition(struct tomoyo_io_buffer *head,
                                   const struct tomoyo_condition *cond)
{
        switch (head->r.cond_step) {
        case 0:
                head->r.cond_index = 0;
                head->r.cond_step++;
                if (cond->transit) {
                        tomoyo_set_space(head);
                        tomoyo_set_string(head, cond->transit->name);
                }
                fallthrough;
        case 1:
                {
                        const u16 condc = cond->condc;
                        const struct tomoyo_condition_element *condp =
                                (typeof(condp)) (cond + 1);
                        const struct tomoyo_number_union *numbers_p =
                                (typeof(numbers_p)) (condp + condc);
                        const struct tomoyo_name_union *names_p =
                                (typeof(names_p))
                                (numbers_p + cond->numbers_count);
                        const struct tomoyo_argv *argv =
                                (typeof(argv)) (names_p + cond->names_count);
                        const struct tomoyo_envp *envp =
                                (typeof(envp)) (argv + cond->argc);
                        u16 skip;

                        for (skip = 0; skip < head->r.cond_index; skip++) {
                                const u8 left = condp->left;
                                const u8 right = condp->right;

                                condp++;
                                switch (left) {
                                case TOMOYO_ARGV_ENTRY:
                                        argv++;
                                        continue;
                                case TOMOYO_ENVP_ENTRY:
                                        envp++;
                                        continue;
                                case TOMOYO_NUMBER_UNION:
                                        numbers_p++;
                                        break;
                                }
                                switch (right) {
                                case TOMOYO_NAME_UNION:
                                        names_p++;
                                        break;
                                case TOMOYO_NUMBER_UNION:
                                        numbers_p++;
                                        break;
                                }
                        }
                        while (head->r.cond_index < condc) {
                                const u8 match = condp->equals;
                                const u8 left = condp->left;
                                const u8 right = condp->right;

                                if (!tomoyo_flush(head))
                                        return false;
                                condp++;
                                head->r.cond_index++;
                                tomoyo_set_space(head);
                                switch (left) {
                                case TOMOYO_ARGV_ENTRY:
                                        tomoyo_io_printf(head,
                                                         "exec.argv[%lu]%s=\"",
                                                         argv->index, argv->is_not ? "!" : "");
                                        tomoyo_set_string(head,
                                                          argv->value->name);
                                        tomoyo_set_string(head, "\"");
                                        argv++;
                                        continue;
                                case TOMOYO_ENVP_ENTRY:
                                        tomoyo_set_string(head,
                                                          "exec.envp[\"");
                                        tomoyo_set_string(head,
                                                          envp->name->name);
                                        tomoyo_io_printf(head, "\"]%s=", envp->is_not ? "!" : "");
                                        if (envp->value) {
                                                tomoyo_set_string(head, "\"");
                                                tomoyo_set_string(head, envp->value->name);
                                                tomoyo_set_string(head, "\"");
                                        } else {
                                                tomoyo_set_string(head,
                                                                  "NULL");
                                        }
                                        envp++;
                                        continue;
                                case TOMOYO_NUMBER_UNION:
                                        tomoyo_print_number_union_nospace
                                                (head, numbers_p++);
                                        break;
                                default:
                                        tomoyo_set_string(head,
                                               tomoyo_condition_keyword[left]);
                                        break;
                                }
                                tomoyo_set_string(head, match ? "=" : "!=");
                                switch (right) {
                                case TOMOYO_NAME_UNION:
                                        tomoyo_print_name_union_quoted
                                                (head, names_p++);
                                        break;
                                case TOMOYO_NUMBER_UNION:
                                        tomoyo_print_number_union_nospace
                                                (head, numbers_p++);
                                        break;
                                default:
                                        tomoyo_set_string(head,
                                          tomoyo_condition_keyword[right]);
                                        break;
                                }
                        }
                }
                head->r.cond_step++;
                fallthrough;
        case 2:
                if (!tomoyo_flush(head))
                        break;
                head->r.cond_step++;
                fallthrough;
        case 3:
                if (cond->grant_log != TOMOYO_GRANTLOG_AUTO)
                        tomoyo_io_printf(head, " grant_log=%s",
                                         str_yes_no(cond->grant_log ==
                                                    TOMOYO_GRANTLOG_YES));
                tomoyo_set_lf(head);
                return true;
        }
        return false;
}

/**
 * tomoyo_set_group - Print "acl_group " header keyword and category name.
 *
 * @head:     Pointer to "struct tomoyo_io_buffer".
 * @category: Category name.
 *
 * Returns nothing.
 */
static void tomoyo_set_group(struct tomoyo_io_buffer *head,
                             const char *category)
{
        if (head->type == TOMOYO_EXCEPTIONPOLICY) {
                tomoyo_print_namespace(head);
                tomoyo_io_printf(head, "acl_group %u ",
                                 head->r.acl_group_index);
        }
        tomoyo_set_string(head, category);
}

/**
 * tomoyo_print_entry - Print an ACL entry.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @acl:  Pointer to an ACL entry.
 *
 * Returns true on success, false otherwise.
 */
static bool tomoyo_print_entry(struct tomoyo_io_buffer *head,
                               struct tomoyo_acl_info *acl)
{
        const u8 acl_type = acl->type;
        bool first = true;
        u8 bit;

        if (head->r.print_cond_part)
                goto print_cond_part;
        if (acl->is_deleted)
                return true;
        if (!tomoyo_flush(head))
                return false;
        else if (acl_type == TOMOYO_TYPE_PATH_ACL) {
                struct tomoyo_path_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u16 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (head->r.print_transition_related_only &&
                            bit != TOMOYO_TYPE_EXECUTE)
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "file ");
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_path_keyword[bit]);
                }
                if (first)
                        return true;
                tomoyo_print_name_union(head, &ptr->name);
        } else if (acl_type == TOMOYO_TYPE_MANUAL_TASK_ACL) {
                struct tomoyo_task_acl *ptr =
                        container_of(acl, typeof(*ptr), head);

                tomoyo_set_group(head, "task ");
                tomoyo_set_string(head, "manual_domain_transition ");
                tomoyo_set_string(head, ptr->domainname->name);
        } else if (head->r.print_transition_related_only) {
                return true;
        } else if (acl_type == TOMOYO_TYPE_PATH2_ACL) {
                struct tomoyo_path2_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u8 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_PATH2_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "file ");
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_mac_keywords
                                          [tomoyo_pp2mac[bit]]);
                }
                if (first)
                        return true;
                tomoyo_print_name_union(head, &ptr->name1);
                tomoyo_print_name_union(head, &ptr->name2);
        } else if (acl_type == TOMOYO_TYPE_PATH_NUMBER_ACL) {
                struct tomoyo_path_number_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u8 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_PATH_NUMBER_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "file ");
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_mac_keywords
                                          [tomoyo_pn2mac[bit]]);
                }
                if (first)
                        return true;
                tomoyo_print_name_union(head, &ptr->name);
                tomoyo_print_number_union(head, &ptr->number);
        } else if (acl_type == TOMOYO_TYPE_MKDEV_ACL) {
                struct tomoyo_mkdev_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u8 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_MKDEV_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "file ");
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_mac_keywords
                                          [tomoyo_pnnn2mac[bit]]);
                }
                if (first)
                        return true;
                tomoyo_print_name_union(head, &ptr->name);
                tomoyo_print_number_union(head, &ptr->mode);
                tomoyo_print_number_union(head, &ptr->major);
                tomoyo_print_number_union(head, &ptr->minor);
        } else if (acl_type == TOMOYO_TYPE_INET_ACL) {
                struct tomoyo_inet_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u8 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_NETWORK_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "network inet ");
                                tomoyo_set_string(head, tomoyo_proto_keyword
                                                  [ptr->protocol]);
                                tomoyo_set_space(head);
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_socket_keyword[bit]);
                }
                if (first)
                        return true;
                tomoyo_set_space(head);
                if (ptr->address.group) {
                        tomoyo_set_string(head, "@");
                        tomoyo_set_string(head, ptr->address.group->group_name
                                          ->name);
                } else {
                        char buf[128];

                        tomoyo_print_ip(buf, sizeof(buf), &ptr->address);
                        tomoyo_io_printf(head, "%s", buf);
                }
                tomoyo_print_number_union(head, &ptr->port);
        } else if (acl_type == TOMOYO_TYPE_UNIX_ACL) {
                struct tomoyo_unix_acl *ptr =
                        container_of(acl, typeof(*ptr), head);
                const u8 perm = ptr->perm;

                for (bit = 0; bit < TOMOYO_MAX_NETWORK_OPERATION; bit++) {
                        if (!(perm & (1 << bit)))
                                continue;
                        if (first) {
                                tomoyo_set_group(head, "network unix ");
                                tomoyo_set_string(head, tomoyo_proto_keyword
                                                  [ptr->protocol]);
                                tomoyo_set_space(head);
                                first = false;
                        } else {
                                tomoyo_set_slash(head);
                        }
                        tomoyo_set_string(head, tomoyo_socket_keyword[bit]);
                }
                if (first)
                        return true;
                tomoyo_print_name_union(head, &ptr->name);
        } else if (acl_type == TOMOYO_TYPE_MOUNT_ACL) {
                struct tomoyo_mount_acl *ptr =
                        container_of(acl, typeof(*ptr), head);

                tomoyo_set_group(head, "file mount");
                tomoyo_print_name_union(head, &ptr->dev_name);
                tomoyo_print_name_union(head, &ptr->dir_name);
                tomoyo_print_name_union(head, &ptr->fs_type);
                tomoyo_print_number_union(head, &ptr->flags);
        } else if (acl_type == TOMOYO_TYPE_ENV_ACL) {
                struct tomoyo_env_acl *ptr =
                        container_of(acl, typeof(*ptr), head);

                tomoyo_set_group(head, "misc env ");
                tomoyo_set_string(head, ptr->env->name);
        }
        if (acl->cond) {
                head->r.print_cond_part = true;
                head->r.cond_step = 0;
                if (!tomoyo_flush(head))
                        return false;
print_cond_part:
                if (!tomoyo_print_condition(head, acl->cond))
                        return false;
                head->r.print_cond_part = false;
        } else {
                tomoyo_set_lf(head);
        }
        return true;
}

/**
 * tomoyo_read_domain2 - Read domain policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @list: Pointer to "struct list_head".
 *
 * Caller holds tomoyo_read_lock().
 *
 * Returns true on success, false otherwise.
 */
static bool tomoyo_read_domain2(struct tomoyo_io_buffer *head,
                                struct list_head *list)
{
        list_for_each_cookie(head->r.acl, list) {
                struct tomoyo_acl_info *ptr =
                        list_entry(head->r.acl, typeof(*ptr), list);

                if (!tomoyo_print_entry(head, ptr))
                        return false;
        }
        head->r.acl = NULL;
        return true;
}

/**
 * tomoyo_read_domain - Read domain policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Caller holds tomoyo_read_lock().
 */
static void tomoyo_read_domain(struct tomoyo_io_buffer *head)
{
        if (head->r.eof)
                return;
        list_for_each_cookie(head->r.domain, &tomoyo_domain_list) {
                struct tomoyo_domain_info *domain =
                        list_entry(head->r.domain, typeof(*domain), list);
                u8 i;

                switch (head->r.step) {
                case 0:
                        if (domain->is_deleted &&
                            !head->r.print_this_domain_only)
                                continue;
                        /* Print domainname and flags. */
                        tomoyo_set_string(head, domain->domainname->name);
                        tomoyo_set_lf(head);
                        tomoyo_io_printf(head, "use_profile %u\n",
                                         domain->profile);
                        for (i = 0; i < TOMOYO_MAX_DOMAIN_INFO_FLAGS; i++)
                                if (domain->flags[i])
                                        tomoyo_set_string(head, tomoyo_dif[i]);
                        head->r.index = 0;
                        head->r.step++;
                        fallthrough;
                case 1:
                        while (head->r.index < TOMOYO_MAX_ACL_GROUPS) {
                                i = head->r.index++;
                                if (!test_bit(i, domain->group))
                                        continue;
                                tomoyo_io_printf(head, "use_group %u\n", i);
                                if (!tomoyo_flush(head))
                                        return;
                        }
                        head->r.index = 0;
                        head->r.step++;
                        tomoyo_set_lf(head);
                        fallthrough;
                case 2:
                        if (!tomoyo_read_domain2(head, &domain->acl_info_list))
                                return;
                        head->r.step++;
                        if (!tomoyo_set_lf(head))
                                return;
                        fallthrough;
                case 3:
                        head->r.step = 0;
                        if (head->r.print_this_domain_only)
                                goto done;
                }
        }
 done:
        head->r.eof = true;
}

/**
 * tomoyo_write_pid: Specify PID to obtain domainname.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0.
 */
static int tomoyo_write_pid(struct tomoyo_io_buffer *head)
{
        head->r.eof = false;
        return 0;
}

/**
 * tomoyo_read_pid - Get domainname of the specified PID.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns the domainname which the specified PID is in on success,
 * empty string otherwise.
 * The PID is specified by tomoyo_write_pid() so that the user can obtain
 * using read()/write() interface rather than sysctl() interface.
 */
static void tomoyo_read_pid(struct tomoyo_io_buffer *head)
{
        char *buf = head->write_buf;
        bool global_pid = false;
        unsigned int pid;
        struct task_struct *p;
        struct tomoyo_domain_info *domain = NULL;

        /* Accessing write_buf is safe because head->io_sem is held. */
        if (!buf) {
                head->r.eof = true;
                return; /* Do nothing if open(O_RDONLY). */
        }
        if (head->r.w_pos || head->r.eof)
                return;
        head->r.eof = true;
        if (tomoyo_str_starts(&buf, "global-pid "))
                global_pid = true;
        if (kstrtouint(buf, 10, &pid))
                return;
        rcu_read_lock();
        if (global_pid)
                p = find_task_by_pid_ns(pid, &init_pid_ns);
        else
                p = find_task_by_vpid(pid);
        if (p)
                domain = tomoyo_task(p)->domain_info;
        rcu_read_unlock();
        if (!domain)
                return;
        tomoyo_io_printf(head, "%u %u ", pid, domain->profile);
        tomoyo_set_string(head, domain->domainname->name);
}

/* String table for domain transition control keywords. */
static const char *tomoyo_transition_type[TOMOYO_MAX_TRANSITION_TYPE] = {
        [TOMOYO_TRANSITION_CONTROL_NO_RESET]      = "no_reset_domain ",
        [TOMOYO_TRANSITION_CONTROL_RESET]         = "reset_domain ",
        [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] = "no_initialize_domain ",
        [TOMOYO_TRANSITION_CONTROL_INITIALIZE]    = "initialize_domain ",
        [TOMOYO_TRANSITION_CONTROL_NO_KEEP]       = "no_keep_domain ",
        [TOMOYO_TRANSITION_CONTROL_KEEP]          = "keep_domain ",
};

/* String table for grouping keywords. */
static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = {
        [TOMOYO_PATH_GROUP]    = "path_group ",
        [TOMOYO_NUMBER_GROUP]  = "number_group ",
        [TOMOYO_ADDRESS_GROUP] = "address_group ",
};

/**
 * tomoyo_write_exception - Write exception policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_write_exception(struct tomoyo_io_buffer *head)
{
        const bool is_delete = head->w.is_delete;
        struct tomoyo_acl_param param = {
                .ns = head->w.ns,
                .is_delete = is_delete,
                .data = head->write_buf,
        };
        u8 i;

        if (tomoyo_str_starts(&param.data, "aggregator "))
                return tomoyo_write_aggregator(&param);
        for (i = 0; i < TOMOYO_MAX_TRANSITION_TYPE; i++)
                if (tomoyo_str_starts(&param.data, tomoyo_transition_type[i]))
                        return tomoyo_write_transition_control(&param, i);
        for (i = 0; i < TOMOYO_MAX_GROUP; i++)
                if (tomoyo_str_starts(&param.data, tomoyo_group_name[i]))
                        return tomoyo_write_group(&param, i);
        if (tomoyo_str_starts(&param.data, "acl_group ")) {
                unsigned int group;
                char *data;

                group = simple_strtoul(param.data, &data, 10);
                if (group < TOMOYO_MAX_ACL_GROUPS && *data++ == ' ')
                        return tomoyo_write_domain2
                                (head->w.ns, &head->w.ns->acl_group[group],
                                 data, is_delete);
        }
        return -EINVAL;
}

/**
 * tomoyo_read_group - Read "struct tomoyo_path_group"/"struct tomoyo_number_group"/"struct tomoyo_address_group" list.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @idx:  Index number.
 *
 * Returns true on success, false otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
{
        struct tomoyo_policy_namespace *ns =
                container_of(head->r.ns, typeof(*ns), namespace_list);
        struct list_head *list = &ns->group_list[idx];

        list_for_each_cookie(head->r.group, list) {
                struct tomoyo_group *group =
                        list_entry(head->r.group, typeof(*group), head.list);

                list_for_each_cookie(head->r.acl, &group->member_list) {
                        struct tomoyo_acl_head *ptr =
                                list_entry(head->r.acl, typeof(*ptr), list);

                        if (ptr->is_deleted)
                                continue;
                        if (!tomoyo_flush(head))
                                return false;
                        tomoyo_print_namespace(head);
                        tomoyo_set_string(head, tomoyo_group_name[idx]);
                        tomoyo_set_string(head, group->group_name->name);
                        if (idx == TOMOYO_PATH_GROUP) {
                                tomoyo_set_space(head);
                                tomoyo_set_string(head, container_of
                                               (ptr, struct tomoyo_path_group,
                                                head)->member_name->name);
                        } else if (idx == TOMOYO_NUMBER_GROUP) {
                                tomoyo_print_number_union(head, &container_of
                                                          (ptr,
                                                   struct tomoyo_number_group,
                                                           head)->number);
                        } else if (idx == TOMOYO_ADDRESS_GROUP) {
                                char buffer[128];
                                struct tomoyo_address_group *member =
                                        container_of(ptr, typeof(*member),
                                                     head);

                                tomoyo_print_ip(buffer, sizeof(buffer),
                                                &member->address);
                                tomoyo_io_printf(head, " %s", buffer);
                        }
                        tomoyo_set_lf(head);
                }
                head->r.acl = NULL;
        }
        head->r.group = NULL;
        return true;
}

/**
 * tomoyo_read_policy - Read "struct tomoyo_..._entry" list.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @idx:  Index number.
 *
 * Returns true on success, false otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
{
        struct tomoyo_policy_namespace *ns =
                container_of(head->r.ns, typeof(*ns), namespace_list);
        struct list_head *list = &ns->policy_list[idx];

        list_for_each_cookie(head->r.acl, list) {
                struct tomoyo_acl_head *acl =
                        container_of(head->r.acl, typeof(*acl), list);
                if (acl->is_deleted)
                        continue;
                if (!tomoyo_flush(head))
                        return false;
                switch (idx) {
                case TOMOYO_ID_TRANSITION_CONTROL:
                        {
                                struct tomoyo_transition_control *ptr =
                                        container_of(acl, typeof(*ptr), head);

                                tomoyo_print_namespace(head);
                                tomoyo_set_string(head, tomoyo_transition_type
                                                  [ptr->type]);
                                tomoyo_set_string(head, ptr->program ?
                                                  ptr->program->name : "any");
                                tomoyo_set_string(head, " from ");
                                tomoyo_set_string(head, ptr->domainname ?
                                                  ptr->domainname->name :
                                                  "any");
                        }
                        break;
                case TOMOYO_ID_AGGREGATOR:
                        {
                                struct tomoyo_aggregator *ptr =
                                        container_of(acl, typeof(*ptr), head);

                                tomoyo_print_namespace(head);
                                tomoyo_set_string(head, "aggregator ");
                                tomoyo_set_string(head,
                                                  ptr->original_name->name);
                                tomoyo_set_space(head);
                                tomoyo_set_string(head,
                                               ptr->aggregated_name->name);
                        }
                        break;
                default:
                        continue;
                }
                tomoyo_set_lf(head);
        }
        head->r.acl = NULL;
        return true;
}

/**
 * tomoyo_read_exception - Read exception policy.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Caller holds tomoyo_read_lock().
 */
static void tomoyo_read_exception(struct tomoyo_io_buffer *head)
{
        struct tomoyo_policy_namespace *ns =
                container_of(head->r.ns, typeof(*ns), namespace_list);

        if (head->r.eof)
                return;
        while (head->r.step < TOMOYO_MAX_POLICY &&
               tomoyo_read_policy(head, head->r.step))
                head->r.step++;
        if (head->r.step < TOMOYO_MAX_POLICY)
                return;
        while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP &&
               tomoyo_read_group(head, head->r.step - TOMOYO_MAX_POLICY))
                head->r.step++;
        if (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP)
                return;
        while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP
               + TOMOYO_MAX_ACL_GROUPS) {
                head->r.acl_group_index = head->r.step - TOMOYO_MAX_POLICY
                        - TOMOYO_MAX_GROUP;
                if (!tomoyo_read_domain2(head, &ns->acl_group
                                         [head->r.acl_group_index]))
                        return;
                head->r.step++;
        }
        head->r.eof = true;
}

/* Wait queue for kernel -> userspace notification. */
static DECLARE_WAIT_QUEUE_HEAD(tomoyo_query_wait);
/* Wait queue for userspace -> kernel notification. */
static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait);

/* Structure for query. */
struct tomoyo_query {
        struct list_head list;
        struct tomoyo_domain_info *domain;
        char *query;
        size_t query_len;
        unsigned int serial;
        u8 timer;
        u8 answer;
        u8 retry;
};

/* The list for "struct tomoyo_query". */
static LIST_HEAD(tomoyo_query_list);

/* Lock for manipulating tomoyo_query_list. */
static DEFINE_SPINLOCK(tomoyo_query_list_lock);

/*
 * Number of "struct file" referring /sys/kernel/security/tomoyo/query
 * interface.
 */
static atomic_t tomoyo_query_observers = ATOMIC_INIT(0);

/**
 * tomoyo_truncate - Truncate a line.
 *
 * @str: String to truncate.
 *
 * Returns length of truncated @str.
 */
static int tomoyo_truncate(char *str)
{
        char *start = str;

        while (*(unsigned char *) str > (unsigned char) ' ')
                str++;
        *str = '\0';
        return strlen(start) + 1;
}

/**
 * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode.
 *
 * @domain: Pointer to "struct tomoyo_domain_info".
 * @header: Lines containing ACL.
 *
 * Returns nothing.
 */
static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
{
        char *buffer;
        char *realpath = NULL;
        char *argv0 = NULL;
        char *symlink = NULL;
        char *cp = strchr(header, '\n');
        int len;

        if (!cp)
                return;
        cp = strchr(cp + 1, '\n');
        if (!cp)
                return;
        *cp++ = '\0';
        len = strlen(cp) + 1;
        /* strstr() will return NULL if ordering is wrong. */
        if (*cp == 'f') {
                argv0 = strstr(header, " argv[]={ \"");
                if (argv0) {
                        argv0 += 10;
                        len += tomoyo_truncate(argv0) + 14;
                }
                realpath = strstr(header, " exec={ realpath=\"");
                if (realpath) {
                        realpath += 8;
                        len += tomoyo_truncate(realpath) + 6;
                }
                symlink = strstr(header, " symlink.target=\"");
                if (symlink)
                        len += tomoyo_truncate(symlink + 1) + 1;
        }
        buffer = kmalloc(len, GFP_NOFS);
        if (!buffer)
                return;
        snprintf(buffer, len - 1, "%s", cp);
        if (realpath)
                tomoyo_addprintf(buffer, len, " exec.%s", realpath);
        if (argv0)
                tomoyo_addprintf(buffer, len, " exec.argv[0]=%s", argv0);
        if (symlink)
                tomoyo_addprintf(buffer, len, "%s", symlink);
        tomoyo_normalize_line(buffer);
        if (!tomoyo_write_domain2(domain->ns, &domain->acl_info_list, buffer,
                                  false))
                tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
        kfree(buffer);
}

/**
 * tomoyo_supervisor - Ask for the supervisor's decision.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @fmt: The printf()'s format string, followed by parameters.
 *
 * Returns 0 if the supervisor decided to permit the access request which
 * violated the policy in enforcing mode, TOMOYO_RETRY_REQUEST if the
 * supervisor decided to retry the access request which violated the policy in
 * enforcing mode, 0 if it is not in enforcing mode, -EPERM otherwise.
 */
int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
{
        va_list args;
        int error;
        int len;
        static unsigned int tomoyo_serial;
        struct tomoyo_query entry = { };
        bool quota_exceeded = false;

        va_start(args, fmt);
        len = vsnprintf(NULL, 0, fmt, args) + 1;
        va_end(args);
        /* Write /sys/kernel/security/tomoyo/audit. */
        va_start(args, fmt);
        tomoyo_write_log2(r, len, fmt, args);
        va_end(args);
        /* Nothing more to do if granted. */
        if (r->granted)
                return 0;
        if (r->mode)
                tomoyo_update_stat(r->mode);
        switch (r->mode) {
        case TOMOYO_CONFIG_ENFORCING:
                error = -EPERM;
                if (atomic_read(&tomoyo_query_observers))
                        break;
                goto out;
        case TOMOYO_CONFIG_LEARNING:
                error = 0;
                /* Check max_learning_entry parameter. */
                if (tomoyo_domain_quota_is_ok(r))
                        break;
                fallthrough;
        default:
                return 0;
        }
        /* Get message. */
        va_start(args, fmt);
        entry.query = tomoyo_init_log(r, len, fmt, args);
        va_end(args);
        if (!entry.query)
                goto out;
        entry.query_len = strlen(entry.query) + 1;
        if (!error) {
                tomoyo_add_entry(r->domain, entry.query);
                goto out;
        }
        len = kmalloc_size_roundup(entry.query_len);
        entry.domain = r->domain;
        spin_lock(&tomoyo_query_list_lock);
        if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] &&
            tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len
            >= tomoyo_memory_quota[TOMOYO_MEMORY_QUERY]) {
                quota_exceeded = true;
        } else {
                entry.serial = tomoyo_serial++;
                entry.retry = r->retry;
                tomoyo_memory_used[TOMOYO_MEMORY_QUERY] += len;
                list_add_tail(&entry.list, &tomoyo_query_list);
        }
        spin_unlock(&tomoyo_query_list_lock);
        if (quota_exceeded)
                goto out;
        /* Give 10 seconds for supervisor's opinion. */
        while (entry.timer < 10) {
                wake_up_all(&tomoyo_query_wait);
                if (wait_event_interruptible_timeout
                    (tomoyo_answer_wait, entry.answer ||
                     !atomic_read(&tomoyo_query_observers), HZ))
                        break;
                entry.timer++;
        }
        spin_lock(&tomoyo_query_list_lock);
        list_del(&entry.list);
        tomoyo_memory_used[TOMOYO_MEMORY_QUERY] -= len;
        spin_unlock(&tomoyo_query_list_lock);
        switch (entry.answer) {
        case 3: /* Asked to retry by administrator. */
                error = TOMOYO_RETRY_REQUEST;
                r->retry++;
                break;
        case 1:
                /* Granted by administrator. */
                error = 0;
                break;
        default:
                /* Timed out or rejected by administrator. */
                break;
        }
out:
        kfree(entry.query);
        return error;
}

/**
 * tomoyo_find_domain_by_qid - Get domain by query id.
 *
 * @serial: Query ID assigned by tomoyo_supervisor().
 *
 * Returns pointer to "struct tomoyo_domain_info" if found, NULL otherwise.
 */
static struct tomoyo_domain_info *tomoyo_find_domain_by_qid
(unsigned int serial)
{
        struct tomoyo_query *ptr;
        struct tomoyo_domain_info *domain = NULL;

        spin_lock(&tomoyo_query_list_lock);
        list_for_each_entry(ptr, &tomoyo_query_list, list) {
                if (ptr->serial != serial)
                        continue;
                domain = ptr->domain;
                break;
        }
        spin_unlock(&tomoyo_query_list_lock);
        return domain;
}

/**
 * tomoyo_poll_query - poll() for /sys/kernel/security/tomoyo/query.
 *
 * @file: Pointer to "struct file".
 * @wait: Pointer to "poll_table".
 *
 * Returns EPOLLIN | EPOLLRDNORM when ready to read, 0 otherwise.
 *
 * Waits for access requests which violated policy in enforcing mode.
 */
static __poll_t tomoyo_poll_query(struct file *file, poll_table *wait)
{
        if (!list_empty(&tomoyo_query_list))
                return EPOLLIN | EPOLLRDNORM;
        poll_wait(file, &tomoyo_query_wait, wait);
        if (!list_empty(&tomoyo_query_list))
                return EPOLLIN | EPOLLRDNORM;
        return 0;
}

/**
 * tomoyo_read_query - Read access requests which violated policy in enforcing mode.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 */
static void tomoyo_read_query(struct tomoyo_io_buffer *head)
{
        struct list_head *tmp;
        unsigned int pos = 0;
        size_t len = 0;
        char *buf;

        if (head->r.w_pos)
                return;
        kfree(head->read_buf);
        head->read_buf = NULL;
        spin_lock(&tomoyo_query_list_lock);
        list_for_each(tmp, &tomoyo_query_list) {
                struct tomoyo_query *ptr = list_entry(tmp, typeof(*ptr), list);

                if (pos++ != head->r.query_index)
                        continue;
                len = ptr->query_len;
                break;
        }
        spin_unlock(&tomoyo_query_list_lock);
        if (!len) {
                head->r.query_index = 0;
                return;
        }
        buf = kzalloc(len + 32, GFP_NOFS);
        if (!buf)
                return;
        pos = 0;
        spin_lock(&tomoyo_query_list_lock);
        list_for_each(tmp, &tomoyo_query_list) {
                struct tomoyo_query *ptr = list_entry(tmp, typeof(*ptr), list);

                if (pos++ != head->r.query_index)
                        continue;
                /*
                 * Some query can be skipped because tomoyo_query_list
                 * can change, but I don't care.
                 */
                if (len == ptr->query_len)
                        snprintf(buf, len + 31, "Q%u-%hu\n%s", ptr->serial,
                                 ptr->retry, ptr->query);
                break;
        }
        spin_unlock(&tomoyo_query_list_lock);
        if (buf[0]) {
                head->read_buf = buf;
                head->r.w[head->r.w_pos++] = buf;
                head->r.query_index++;
        } else {
                kfree(buf);
        }
}

/**
 * tomoyo_write_answer - Write the supervisor's decision.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0 on success, -EINVAL otherwise.
 */
static int tomoyo_write_answer(struct tomoyo_io_buffer *head)
{
        char *data = head->write_buf;
        struct list_head *tmp;
        unsigned int serial;
        unsigned int answer;

        spin_lock(&tomoyo_query_list_lock);
        list_for_each(tmp, &tomoyo_query_list) {
                struct tomoyo_query *ptr = list_entry(tmp, typeof(*ptr), list);

                ptr->timer = 0;
        }
        spin_unlock(&tomoyo_query_list_lock);
        if (sscanf(data, "A%u=%u", &serial, &answer) != 2)
                return -EINVAL;
        spin_lock(&tomoyo_query_list_lock);
        list_for_each(tmp, &tomoyo_query_list) {
                struct tomoyo_query *ptr = list_entry(tmp, typeof(*ptr), list);

                if (ptr->serial != serial)
                        continue;
                ptr->answer = answer;
                /* Remove from tomoyo_query_list. */
                if (ptr->answer)
                        list_del_init(&ptr->list);
                break;
        }
        spin_unlock(&tomoyo_query_list_lock);
        return 0;
}

/**
 * tomoyo_read_version: Get version.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns version information.
 */
static void tomoyo_read_version(struct tomoyo_io_buffer *head)
{
        if (!head->r.eof) {
                tomoyo_io_printf(head, "2.6.0");
                head->r.eof = true;
        }
}

/* String table for /sys/kernel/security/tomoyo/stat interface. */
static const char * const tomoyo_policy_headers[TOMOYO_MAX_POLICY_STAT] = {
        [TOMOYO_STAT_POLICY_UPDATES]    = "update:",
        [TOMOYO_STAT_POLICY_LEARNING]   = "violation in learning mode:",
        [TOMOYO_STAT_POLICY_PERMISSIVE] = "violation in permissive mode:",
        [TOMOYO_STAT_POLICY_ENFORCING]  = "violation in enforcing mode:",
};

/* String table for /sys/kernel/security/tomoyo/stat interface. */
static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = {
        [TOMOYO_MEMORY_POLICY] = "policy:",
        [TOMOYO_MEMORY_AUDIT]  = "audit log:",
        [TOMOYO_MEMORY_QUERY]  = "query message:",
};

/* Counter for number of updates. */
static atomic_t tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT];
/* Timestamp counter for last updated. */
static time64_t tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT];

/**
 * tomoyo_update_stat - Update statistic counters.
 *
 * @index: Index for policy type.
 *
 * Returns nothing.
 */
void tomoyo_update_stat(const u8 index)
{
        atomic_inc(&tomoyo_stat_updated[index]);
        tomoyo_stat_modified[index] = ktime_get_real_seconds();
}

/**
 * tomoyo_read_stat - Read statistic data.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static void tomoyo_read_stat(struct tomoyo_io_buffer *head)
{
        u8 i;
        unsigned int total = 0;

        if (head->r.eof)
                return;
        for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) {
                tomoyo_io_printf(head, "Policy %-30s %10u",
                                 tomoyo_policy_headers[i],
                                 atomic_read(&tomoyo_stat_updated[i]));
                if (tomoyo_stat_modified[i]) {
                        struct tomoyo_time stamp;

                        tomoyo_convert_time(tomoyo_stat_modified[i], &stamp);
                        tomoyo_io_printf(head, " (Last: %04u/%02u/%02u %02u:%02u:%02u)",
                                         stamp.year, stamp.month, stamp.day,
                                         stamp.hour, stamp.min, stamp.sec);
                }
                tomoyo_set_lf(head);
        }
        for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) {
                unsigned int used = tomoyo_memory_used[i];

                total += used;
                tomoyo_io_printf(head, "Memory used by %-22s %10u",
                                 tomoyo_memory_headers[i], used);
                used = tomoyo_memory_quota[i];
                if (used)
                        tomoyo_io_printf(head, " (Quota: %10u)", used);
                tomoyo_set_lf(head);
        }
        tomoyo_io_printf(head, "Total memory used:                    %10u\n",
                         total);
        head->r.eof = true;
}

/**
 * tomoyo_write_stat - Set memory quota.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns 0.
 */
static int tomoyo_write_stat(struct tomoyo_io_buffer *head)
{
        char *data = head->write_buf;
        u8 i;

        if (tomoyo_str_starts(&data, "Memory used by "))
                for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++)
                        if (tomoyo_str_starts(&data, tomoyo_memory_headers[i]))
                                sscanf(data, "%u", &tomoyo_memory_quota[i]);
        return 0;
}

/**
 * tomoyo_open_control - open() for /sys/kernel/security/tomoyo/ interface.
 *
 * @type: Type of interface.
 * @file: Pointer to "struct file".
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_open_control(const u8 type, struct file *file)
{
        struct tomoyo_io_buffer *head = kzalloc(sizeof(*head), GFP_NOFS);

        if (!head)
                return -ENOMEM;
        mutex_init(&head->io_sem);
        head->type = type;
        switch (type) {
        case TOMOYO_DOMAINPOLICY:
                /* /sys/kernel/security/tomoyo/domain_policy */
                head->write = tomoyo_write_domain;
                head->read = tomoyo_read_domain;
                break;
        case TOMOYO_EXCEPTIONPOLICY:
                /* /sys/kernel/security/tomoyo/exception_policy */
                head->write = tomoyo_write_exception;
                head->read = tomoyo_read_exception;
                break;
        case TOMOYO_AUDIT:
                /* /sys/kernel/security/tomoyo/audit */
                head->poll = tomoyo_poll_log;
                head->read = tomoyo_read_log;
                break;
        case TOMOYO_PROCESS_STATUS:
                /* /sys/kernel/security/tomoyo/.process_status */
                head->write = tomoyo_write_pid;
                head->read = tomoyo_read_pid;
                break;
        case TOMOYO_VERSION:
                /* /sys/kernel/security/tomoyo/version */
                head->read = tomoyo_read_version;
                head->readbuf_size = 128;
                break;
        case TOMOYO_STAT:
                /* /sys/kernel/security/tomoyo/stat */
                head->write = tomoyo_write_stat;
                head->read = tomoyo_read_stat;
                head->readbuf_size = 1024;
                break;
        case TOMOYO_PROFILE:
                /* /sys/kernel/security/tomoyo/profile */
                head->write = tomoyo_write_profile;
                head->read = tomoyo_read_profile;
                break;
        case TOMOYO_QUERY: /* /sys/kernel/security/tomoyo/query */
                head->poll = tomoyo_poll_query;
                head->write = tomoyo_write_answer;
                head->read = tomoyo_read_query;
                break;
        case TOMOYO_MANAGER:
                /* /sys/kernel/security/tomoyo/manager */
                head->write = tomoyo_write_manager;
                head->read = tomoyo_read_manager;
                break;
        }
        if (!(file->f_mode & FMODE_READ)) {
                /*
                 * No need to allocate read_buf since it is not opened
                 * for reading.
                 */
                head->read = NULL;
                head->poll = NULL;
        } else if (!head->poll) {
                /* Don't allocate read_buf for poll() access. */
                if (!head->readbuf_size)
                        head->readbuf_size = 4096 * 2;
                head->read_buf = kzalloc(head->readbuf_size, GFP_NOFS);
                if (!head->read_buf) {
                        kfree(head);
                        return -ENOMEM;
                }
        }
        if (!(file->f_mode & FMODE_WRITE)) {
                /*
                 * No need to allocate write_buf since it is not opened
                 * for writing.
                 */
                head->write = NULL;
        } else if (head->write) {
                head->writebuf_size = 4096 * 2;
                head->write_buf = kzalloc(head->writebuf_size, GFP_NOFS);
                if (!head->write_buf) {
                        kfree(head->read_buf);
                        kfree(head);
                        return -ENOMEM;
                }
        }
        /*
         * If the file is /sys/kernel/security/tomoyo/query , increment the
         * observer counter.
         * The obserber counter is used by tomoyo_supervisor() to see if
         * there is some process monitoring /sys/kernel/security/tomoyo/query.
         */
        if (type == TOMOYO_QUERY)
                atomic_inc(&tomoyo_query_observers);
        file->private_data = head;
        tomoyo_notify_gc(head, true);
        return 0;
}

/**
 * tomoyo_poll_control - poll() for /sys/kernel/security/tomoyo/ interface.
 *
 * @file: Pointer to "struct file".
 * @wait: Pointer to "poll_table". Maybe NULL.
 *
 * Returns EPOLLIN | EPOLLRDNORM | EPOLLOUT | EPOLLWRNORM if ready to read/write,
 * EPOLLOUT | EPOLLWRNORM otherwise.
 */
__poll_t tomoyo_poll_control(struct file *file, poll_table *wait)
{
        struct tomoyo_io_buffer *head = file->private_data;

        if (head->poll)
                return head->poll(file, wait) | EPOLLOUT | EPOLLWRNORM;
        return EPOLLIN | EPOLLRDNORM | EPOLLOUT | EPOLLWRNORM;
}

/**
 * tomoyo_set_namespace_cursor - Set namespace to read.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
static inline void tomoyo_set_namespace_cursor(struct tomoyo_io_buffer *head)
{
        struct list_head *ns;

        if (head->type != TOMOYO_EXCEPTIONPOLICY &&
            head->type != TOMOYO_PROFILE)
                return;
        /*
         * If this is the first read, or reading previous namespace finished
         * and has more namespaces to read, update the namespace cursor.
         */
        ns = head->r.ns;
        if (!ns || (head->r.eof && ns->next != &tomoyo_namespace_list)) {
                /* Clearing is OK because tomoyo_flush() returned true. */
                memset(&head->r, 0, sizeof(head->r));
                head->r.ns = ns ? ns->next : tomoyo_namespace_list.next;
        }
}

/**
 * tomoyo_has_more_namespace - Check for unread namespaces.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns true if we have more entries to print, false otherwise.
 */
static inline bool tomoyo_has_more_namespace(struct tomoyo_io_buffer *head)
{
        return (head->type == TOMOYO_EXCEPTIONPOLICY ||
                head->type == TOMOYO_PROFILE) && head->r.eof &&
                head->r.ns->next != &tomoyo_namespace_list;
}

/**
 * tomoyo_read_control - read() for /sys/kernel/security/tomoyo/ interface.
 *
 * @head:       Pointer to "struct tomoyo_io_buffer".
 * @buffer:     Pointer to buffer to write to.
 * @buffer_len: Size of @buffer.
 *
 * Returns bytes read on success, negative value otherwise.
 */
ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
                            const int buffer_len)
{
        int len;
        int idx;

        if (!head->read)
                return -EINVAL;
        if (mutex_lock_interruptible(&head->io_sem))
                return -EINTR;
        head->read_user_buf = buffer;
        head->read_user_buf_avail = buffer_len;
        idx = tomoyo_read_lock();
        if (tomoyo_flush(head))
                /* Call the policy handler. */
                do {
                        tomoyo_set_namespace_cursor(head);
                        head->read(head);
                } while (tomoyo_flush(head) &&
                         tomoyo_has_more_namespace(head));
        tomoyo_read_unlock(idx);
        len = head->read_user_buf - buffer;
        mutex_unlock(&head->io_sem);
        return len;
}

/**
 * tomoyo_parse_policy - Parse a policy line.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 * @line: Line to parse.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_parse_policy(struct tomoyo_io_buffer *head, char *line)
{
        /* Delete request? */
        head->w.is_delete = !strncmp(line, "delete ", 7);
        if (head->w.is_delete)
                memmove(line, line + 7, strlen(line + 7) + 1);
        /* Selecting namespace to update. */
        if (head->type == TOMOYO_EXCEPTIONPOLICY ||
            head->type == TOMOYO_PROFILE) {
                if (*line == '<') {
                        char *cp = strchr(line, ' ');

                        if (cp) {
                                *cp++ = '\0';
                                head->w.ns = tomoyo_assign_namespace(line);
                                memmove(line, cp, strlen(cp) + 1);
                        } else
                                head->w.ns = NULL;
                } else
                        head->w.ns = &tomoyo_kernel_namespace;
                /* Don't allow updating if namespace is invalid. */
                if (!head->w.ns)
                        return -ENOENT;
        }
        /* Do the update. */
        return head->write(head);
}

/**
 * tomoyo_write_control - write() for /sys/kernel/security/tomoyo/ interface.
 *
 * @head:       Pointer to "struct tomoyo_io_buffer".
 * @buffer:     Pointer to buffer to read from.
 * @buffer_len: Size of @buffer.
 *
 * Returns @buffer_len on success, negative value otherwise.
 */
ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
                             const char __user *buffer, const int buffer_len)
{
        int error = buffer_len;
        size_t avail_len = buffer_len;
        char *cp0;
        int idx;

        if (!head->write)
                return -EINVAL;
        if (mutex_lock_interruptible(&head->io_sem))
                return -EINTR;
        cp0 = head->write_buf;
        head->read_user_buf_avail = 0;
        idx = tomoyo_read_lock();
        /* Read a line and dispatch it to the policy handler. */
        while (avail_len > 0) {
                char c;

                if (head->w.avail >= head->writebuf_size - 1) {
                        const int len = head->writebuf_size * 2;
                        char *cp = kzalloc(len, GFP_NOFS);

                        if (!cp) {
                                error = -ENOMEM;
                                break;
                        }
                        memmove(cp, cp0, head->w.avail);
                        kfree(cp0);
                        head->write_buf = cp;
                        cp0 = cp;
                        head->writebuf_size = len;
                }
                if (get_user(c, buffer)) {
                        error = -EFAULT;
                        break;
                }
                buffer++;
                avail_len--;
                cp0[head->w.avail++] = c;
                if (c != '\n')
                        continue;
                cp0[head->w.avail - 1] = '\0';
                head->w.avail = 0;
                tomoyo_normalize_line(cp0);
                if (!strcmp(cp0, "reset")) {
                        head->w.ns = &tomoyo_kernel_namespace;
                        head->w.domain = NULL;
                        memset(&head->r, 0, sizeof(head->r));
                        continue;
                }
                /* Don't allow updating policies by non manager programs. */
                switch (head->type) {
                case TOMOYO_PROCESS_STATUS:
                        /* This does not write anything. */
                        break;
                case TOMOYO_DOMAINPOLICY:
                        if (tomoyo_select_domain(head, cp0))
                                continue;
                        fallthrough;
                case TOMOYO_EXCEPTIONPOLICY:
                        if (!strcmp(cp0, "select transition_only")) {
                                head->r.print_transition_related_only = true;
                                continue;
                        }
                        fallthrough;
                default:
                        if (!tomoyo_manager()) {
                                error = -EPERM;
                                goto out;
                        }
                }
                switch (tomoyo_parse_policy(head, cp0)) {
                case -EPERM:
                        error = -EPERM;
                        goto out;
                case 0:
                        switch (head->type) {
                        case TOMOYO_DOMAINPOLICY:
                        case TOMOYO_EXCEPTIONPOLICY:
                        case TOMOYO_STAT:
                        case TOMOYO_PROFILE:
                        case TOMOYO_MANAGER:
                                tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
                                break;
                        default:
                                break;
                        }
                        break;
                }
        }
out:
        tomoyo_read_unlock(idx);
        mutex_unlock(&head->io_sem);
        return error;
}

/**
 * tomoyo_close_control - close() for /sys/kernel/security/tomoyo/ interface.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 */
void tomoyo_close_control(struct tomoyo_io_buffer *head)
{
        /*
         * If the file is /sys/kernel/security/tomoyo/query , decrement the
         * observer counter.
         */
        if (head->type == TOMOYO_QUERY &&
            atomic_dec_and_test(&tomoyo_query_observers))
                wake_up_all(&tomoyo_answer_wait);
        tomoyo_notify_gc(head, false);
}

/**
 * tomoyo_check_profile - Check all profiles currently assigned to domains are defined.
 */
void tomoyo_check_profile(void)
{
        struct tomoyo_domain_info *domain;
        const int idx = tomoyo_read_lock();

        tomoyo_policy_loaded = true;
        pr_info("TOMOYO: 2.6.0\n");
        list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                const u8 profile = domain->profile;
                struct tomoyo_policy_namespace *ns = domain->ns;

                if (ns->profile_version == 20110903) {
                        pr_info_once("Converting profile version from %u to %u.\n",
                                     20110903, 20150505);
                        ns->profile_version = 20150505;
                }
                if (ns->profile_version != 20150505)
                        pr_err("Profile version %u is not supported.\n",
                               ns->profile_version);
                else if (!ns->profile_ptr[profile])
                        pr_err("Profile %u (used by '%s') is not defined.\n",
                               profile, domain->domainname->name);
                else
                        continue;
                pr_err("Userland tools for TOMOYO 2.6 must be installed and policy must be initialized.\n");
                pr_err("Please see https://tomoyo.osdn.jp/2.6/ for more information.\n");
                panic("STOP!");
        }
        tomoyo_read_unlock(idx);
        pr_info("Mandatory Access Control activated.\n");
}

/**
 * tomoyo_load_builtin_policy - Load built-in policy.
 *
 * Returns nothing.
 */
void __init tomoyo_load_builtin_policy(void)
{
#ifdef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
        static char tomoyo_builtin_profile[] __initdata =
                "PROFILE_VERSION=20150505\n"
                "0-CONFIG={ mode=learning grant_log=no reject_log=yes }\n";
        static char tomoyo_builtin_exception_policy[] __initdata =
                "aggregator proc:/self/exe /proc/self/exe\n";
        static char tomoyo_builtin_domain_policy[] __initdata = "";
        static char tomoyo_builtin_manager[] __initdata = "";
        static char tomoyo_builtin_stat[] __initdata = "";
#else
        /*
         * This include file is manually created and contains built-in policy
         * named "tomoyo_builtin_profile", "tomoyo_builtin_exception_policy",
         * "tomoyo_builtin_domain_policy", "tomoyo_builtin_manager",
         * "tomoyo_builtin_stat" in the form of "static char [] __initdata".
         */
#include "builtin-policy.h"
#endif
        u8 i;
        const int idx = tomoyo_read_lock();

        for (i = 0; i < 5; i++) {
                struct tomoyo_io_buffer head = { };
                char *start = "";

                switch (i) {
                case 0:
                        start = tomoyo_builtin_profile;
                        head.type = TOMOYO_PROFILE;
                        head.write = tomoyo_write_profile;
                        break;
                case 1:
                        start = tomoyo_builtin_exception_policy;
                        head.type = TOMOYO_EXCEPTIONPOLICY;
                        head.write = tomoyo_write_exception;
                        break;
                case 2:
                        start = tomoyo_builtin_domain_policy;
                        head.type = TOMOYO_DOMAINPOLICY;
                        head.write = tomoyo_write_domain;
                        break;
                case 3:
                        start = tomoyo_builtin_manager;
                        head.type = TOMOYO_MANAGER;
                        head.write = tomoyo_write_manager;
                        break;
                case 4:
                        start = tomoyo_builtin_stat;
                        head.type = TOMOYO_STAT;
                        head.write = tomoyo_write_stat;
                        break;
                }
                while (1) {
                        char *end = strchr(start, '\n');

                        if (!end)
                                break;
                        *end = '\0';
                        tomoyo_normalize_line(start);
                        head.write_buf = start;
                        tomoyo_parse_policy(&head, start);
                        start = end + 1;
                }
        }
        tomoyo_read_unlock(idx);
#ifdef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
        tomoyo_check_profile();
#endif
}























    8 









































































    4 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vmalloc

#if !defined(_TRACE_VMALLOC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_VMALLOC_H

#include <linux/tracepoint.h>

/**
 * alloc_vmap_area - called when a new vmap allocation occurs
 * @addr:        an allocated address
 * @size:        a requested size
 * @align:        a requested alignment
 * @vstart:        a requested start range
 * @vend:        a requested end range
 * @failed:        an allocation failed or not
 *
 * This event is used for a debug purpose, it can give an extra
 * information for a developer about how often it occurs and which
 * parameters are passed for further validation.
 */
TRACE_EVENT(alloc_vmap_area,

        TP_PROTO(unsigned long addr, unsigned long size, unsigned long align,
                unsigned long vstart, unsigned long vend, int failed),

        TP_ARGS(addr, size, align, vstart, vend, failed),

        TP_STRUCT__entry(
                __field(unsigned long, addr)
                __field(unsigned long, size)
                __field(unsigned long, align)
                __field(unsigned long, vstart)
                __field(unsigned long, vend)
                __field(int, failed)
        ),

        TP_fast_assign(
                __entry->addr = addr;
                __entry->size = size;
                __entry->align = align;
                __entry->vstart = vstart;
                __entry->vend = vend;
                __entry->failed = failed;
        ),

        TP_printk("va_start: %lu size=%lu align=%lu vstart=0x%lx vend=0x%lx failed=%d",
                __entry->addr, __entry->size, __entry->align,
                __entry->vstart, __entry->vend, __entry->failed)
);

/**
 * purge_vmap_area_lazy - called when vmap areas were lazily freed
 * @start:                purging start address
 * @end:                purging end address
 * @npurged:        numbed of purged vmap areas
 *
 * This event is used for a debug purpose. It gives some
 * indication about start:end range and how many objects
 * are released.
 */
TRACE_EVENT(purge_vmap_area_lazy,

        TP_PROTO(unsigned long start, unsigned long end,
                unsigned int npurged),

        TP_ARGS(start, end, npurged),

        TP_STRUCT__entry(
                __field(unsigned long, start)
                __field(unsigned long, end)
                __field(unsigned int, npurged)
        ),

        TP_fast_assign(
                __entry->start = start;
                __entry->end = end;
                __entry->npurged = npurged;
        ),

        TP_printk("start=0x%lx end=0x%lx num_purged=%u",
                __entry->start, __entry->end, __entry->npurged)
);

/**
 * free_vmap_area_noflush - called when a vmap area is freed
 * @va_start:                a start address of VA
 * @nr_lazy:                number of current lazy pages
 * @nr_lazy_max:        number of maximum lazy pages
 *
 * This event is used for a debug purpose. It gives some
 * indication about a VA that is released, number of current
 * outstanding areas and a maximum allowed threshold before
 * dropping all of them.
 */
TRACE_EVENT(free_vmap_area_noflush,

        TP_PROTO(unsigned long va_start, unsigned long nr_lazy,
                unsigned long nr_lazy_max),

        TP_ARGS(va_start, nr_lazy, nr_lazy_max),

        TP_STRUCT__entry(
                __field(unsigned long, va_start)
                __field(unsigned long, nr_lazy)
                __field(unsigned long, nr_lazy_max)
        ),

        TP_fast_assign(
                __entry->va_start = va_start;
                __entry->nr_lazy = nr_lazy;
                __entry->nr_lazy_max = nr_lazy_max;
        ),

        TP_printk("va_start=0x%lx nr_lazy=%lu nr_lazy_max=%lu",
                __entry->va_start, __entry->nr_lazy, __entry->nr_lazy_max)
);

#endif /*  _TRACE_VMALLOC_H */

/* This part must be outside protection */
#include <trace/define_trace.h>


































































































  256 

  256 

  256 














































































































  460 
















  256 






















































  459 
  479 

  111 












  255 
  255 

















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
/* SPDX-License-Identifier: GPL-2.0+ */
/*
 * Sleepable Read-Copy Update mechanism for mutual exclusion
 *
 * Copyright (C) IBM Corporation, 2006
 * Copyright (C) Fujitsu, 2012
 *
 * Author: Paul McKenney <paulmck@linux.ibm.com>
 *           Lai Jiangshan <laijs@cn.fujitsu.com>
 *
 * For detailed explanation of Read-Copy Update mechanism see -
 *                Documentation/RCU/ *.txt
 *
 */

#ifndef _LINUX_SRCU_H
#define _LINUX_SRCU_H

#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/rcu_segcblist.h>

struct srcu_struct;

#ifdef CONFIG_DEBUG_LOCK_ALLOC

int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
                       struct lock_class_key *key);

#define init_srcu_struct(ssp) \
({ \
        static struct lock_class_key __srcu_key; \
        \
        __init_srcu_struct((ssp), #ssp, &__srcu_key); \
})

#define __SRCU_DEP_MAP_INIT(srcu_name)        .dep_map = { .name = #srcu_name },
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */

int init_srcu_struct(struct srcu_struct *ssp);

#define __SRCU_DEP_MAP_INIT(srcu_name)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */

#ifdef CONFIG_TINY_SRCU
#include <linux/srcutiny.h>
#elif defined(CONFIG_TREE_SRCU)
#include <linux/srcutree.h>
#else
#error "Unknown SRCU implementation specified to kernel configuration"
#endif

void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
                void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
void synchronize_srcu(struct srcu_struct *ssp);
unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);

#ifdef CONFIG_NEED_SRCU_NMI_SAFE
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
#else
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
        return __srcu_read_lock(ssp);
}
static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
{
        __srcu_read_unlock(ssp, idx);
}
#endif /* CONFIG_NEED_SRCU_NMI_SAFE */

void srcu_init(void);

#ifdef CONFIG_DEBUG_LOCK_ALLOC

/**
 * srcu_read_lock_held - might we be in SRCU read-side critical section?
 * @ssp: The srcu_struct structure to check
 *
 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU
 * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
 * this assumes we are in an SRCU read-side critical section unless it can
 * prove otherwise.
 *
 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
 * and while lockdep is disabled.
 *
 * Note that SRCU is based on its own statemachine and it doesn't
 * relies on normal RCU, it can be called from the CPU which
 * is in the idle loop from an RCU point of view or offline.
 */
static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
{
        if (!debug_lockdep_rcu_enabled())
                return 1;
        return lock_is_held(&ssp->dep_map);
}

/*
 * Annotations provide deadlock detection for SRCU.
 *
 * Similar to other lockdep annotations, except there is an additional
 * srcu_lock_sync(), which is basically an empty *write*-side critical section,
 * see lock_sync() for more information.
 */

/* Annotates a srcu_read_lock() */
static inline void srcu_lock_acquire(struct lockdep_map *map)
{
        lock_map_acquire_read(map);
}

/* Annotates a srcu_read_lock() */
static inline void srcu_lock_release(struct lockdep_map *map)
{
        lock_map_release(map);
}

/* Annotates a synchronize_srcu() */
static inline void srcu_lock_sync(struct lockdep_map *map)
{
        lock_map_sync(map);
}

#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */

static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
{
        return 1;
}

#define srcu_lock_acquire(m) do { } while (0)
#define srcu_lock_release(m) do { } while (0)
#define srcu_lock_sync(m) do { } while (0)

#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */

#define SRCU_NMI_UNKNOWN        0x0
#define SRCU_NMI_UNSAFE                0x1
#define SRCU_NMI_SAFE                0x2

#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
#else
static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
                                         bool nmi_safe) { }
#endif


/**
 * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
 * @p: the pointer to fetch and protect for later dereferencing
 * @ssp: pointer to the srcu_struct, which is used to check that we
 *        really are in an SRCU read-side critical section.
 * @c: condition to check for update-side use
 *
 * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
 * critical section will result in an RCU-lockdep splat, unless @c evaluates
 * to 1.  The @c argument will normally be a logical expression containing
 * lockdep_is_held() calls.
 */
#define srcu_dereference_check(p, ssp, c) \
        __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
                                (c) || srcu_read_lock_held(ssp), __rcu)

/**
 * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
 * @p: the pointer to fetch and protect for later dereferencing
 * @ssp: pointer to the srcu_struct, which is used to check that we
 *        really are in an SRCU read-side critical section.
 *
 * Makes rcu_dereference_check() do the dirty work.  If PROVE_RCU
 * is enabled, invoking this outside of an RCU read-side critical
 * section will result in an RCU-lockdep splat.
 */
#define srcu_dereference(p, ssp) srcu_dereference_check((p), (ssp), 0)

/**
 * srcu_dereference_notrace - no tracing and no lockdep calls from here
 * @p: the pointer to fetch and protect for later dereferencing
 * @ssp: pointer to the srcu_struct, which is used to check that we
 *        really are in an SRCU read-side critical section.
 */
#define srcu_dereference_notrace(p, ssp) srcu_dereference_check((p), (ssp), 1)

/**
 * srcu_read_lock - register a new reader for an SRCU-protected structure.
 * @ssp: srcu_struct in which to register the new reader.
 *
 * Enter an SRCU read-side critical section.  Note that SRCU read-side
 * critical sections may be nested.  However, it is illegal to
 * call anything that waits on an SRCU grace period for the same
 * srcu_struct, whether directly or indirectly.  Please note that
 * one way to indirectly wait on an SRCU grace period is to acquire
 * a mutex that is held elsewhere while calling synchronize_srcu() or
 * synchronize_srcu_expedited().
 *
 * Note that srcu_read_lock() and the matching srcu_read_unlock() must
 * occur in the same context, for example, it is illegal to invoke
 * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
 * was invoked in process context.
 */
static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{
        int retval;

        srcu_check_nmi_safety(ssp, false);
        retval = __srcu_read_lock(ssp);
        srcu_lock_acquire(&ssp->dep_map);
        return retval;
}

/**
 * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
 * @ssp: srcu_struct in which to register the new reader.
 *
 * Enter an SRCU read-side critical section, but in an NMI-safe manner.
 * See srcu_read_lock() for more information.
 */
static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
{
        int retval;

        srcu_check_nmi_safety(ssp, true);
        retval = __srcu_read_lock_nmisafe(ssp);
        rcu_try_lock_acquire(&ssp->dep_map);
        return retval;
}

/* Used by tracing, cannot be traced and cannot invoke lockdep. */
static inline notrace int
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
{
        int retval;

        srcu_check_nmi_safety(ssp, false);
        retval = __srcu_read_lock(ssp);
        return retval;
}

/**
 * srcu_down_read - register a new reader for an SRCU-protected structure.
 * @ssp: srcu_struct in which to register the new reader.
 *
 * Enter a semaphore-like SRCU read-side critical section.  Note that
 * SRCU read-side critical sections may be nested.  However, it is
 * illegal to call anything that waits on an SRCU grace period for the
 * same srcu_struct, whether directly or indirectly.  Please note that
 * one way to indirectly wait on an SRCU grace period is to acquire
 * a mutex that is held elsewhere while calling synchronize_srcu() or
 * synchronize_srcu_expedited().  But if you want lockdep to help you
 * keep this stuff straight, you should instead use srcu_read_lock().
 *
 * The semaphore-like nature of srcu_down_read() means that the matching
 * srcu_up_read() can be invoked from some other context, for example,
 * from some other task or from an irq handler.  However, neither
 * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler.
 *
 * Calls to srcu_down_read() may be nested, similar to the manner in
 * which calls to down_read() may be nested.
 */
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
        WARN_ON_ONCE(in_nmi());
        srcu_check_nmi_safety(ssp, false);
        return __srcu_read_lock(ssp);
}

/**
 * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
 * @ssp: srcu_struct in which to unregister the old reader.
 * @idx: return value from corresponding srcu_read_lock().
 *
 * Exit an SRCU read-side critical section.
 */
static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
        __releases(ssp)
{
        WARN_ON_ONCE(idx & ~0x1);
        srcu_check_nmi_safety(ssp, false);
        srcu_lock_release(&ssp->dep_map);
        __srcu_read_unlock(ssp, idx);
}

/**
 * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
 * @ssp: srcu_struct in which to unregister the old reader.
 * @idx: return value from corresponding srcu_read_lock().
 *
 * Exit an SRCU read-side critical section, but in an NMI-safe manner.
 */
static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
        __releases(ssp)
{
        WARN_ON_ONCE(idx & ~0x1);
        srcu_check_nmi_safety(ssp, true);
        rcu_lock_release(&ssp->dep_map);
        __srcu_read_unlock_nmisafe(ssp, idx);
}

/* Used by tracing, cannot be traced and cannot call lockdep. */
static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
{
        srcu_check_nmi_safety(ssp, false);
        __srcu_read_unlock(ssp, idx);
}

/**
 * srcu_up_read - unregister a old reader from an SRCU-protected structure.
 * @ssp: srcu_struct in which to unregister the old reader.
 * @idx: return value from corresponding srcu_read_lock().
 *
 * Exit an SRCU read-side critical section, but not necessarily from
 * the same context as the maching srcu_down_read().
 */
static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
        __releases(ssp)
{
        WARN_ON_ONCE(idx & ~0x1);
        WARN_ON_ONCE(in_nmi());
        srcu_check_nmi_safety(ssp, false);
        __srcu_read_unlock(ssp, idx);
}

/**
 * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
 *
 * Converts the preceding srcu_read_unlock into a two-way memory barrier.
 *
 * Call this after srcu_read_unlock, to guarantee that all memory operations
 * that occur after smp_mb__after_srcu_read_unlock will appear to happen after
 * the preceding srcu_read_unlock.
 */
static inline void smp_mb__after_srcu_read_unlock(void)
{
        /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
}

DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
                    _T->idx = srcu_read_lock(_T->lock),
                    srcu_read_unlock(_T->lock, _T->idx),
                    int idx)

#endif













































































































































































































































































    1 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
// SPDX-License-Identifier: GPL-2.0-only
/*
 * HID driver for ELO usb touchscreen 4000/4500
 *
 * Copyright (c) 2013 Jiri Slaby
 *
 * Data parsing taken from elousb driver by Vojtech Pavlik.
 */

#include <linux/hid.h>
#include <linux/input.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/workqueue.h>

#include "hid-ids.h"

#define ELO_PERIODIC_READ_INTERVAL        HZ
#define ELO_SMARTSET_CMD_TIMEOUT        2000 /* msec */

/* Elo SmartSet commands */
#define ELO_FLUSH_SMARTSET_RESPONSES        0x02 /* Flush all pending smartset responses */
#define ELO_SEND_SMARTSET_COMMAND        0x05 /* Send a smartset command */
#define ELO_GET_SMARTSET_RESPONSE        0x06 /* Get a smartset response */
#define ELO_DIAG                        0x64 /* Diagnostics command */
#define ELO_SMARTSET_PACKET_SIZE        8

struct elo_priv {
        struct usb_device *usbdev;
        struct delayed_work work;
        unsigned char buffer[ELO_SMARTSET_PACKET_SIZE];
};

static struct workqueue_struct *wq;
static bool use_fw_quirk = true;
module_param(use_fw_quirk, bool, S_IRUGO);
MODULE_PARM_DESC(use_fw_quirk, "Do periodic pokes for broken M firmwares (default = true)");

static int elo_input_configured(struct hid_device *hdev,
                struct hid_input *hidinput)
{
        struct input_dev *input = hidinput->input;

        /*
         * ELO devices have one Button usage in GenDesk field, which makes
         * hid-input map it to BTN_LEFT; that confuses userspace, which then
         * considers the device to be a mouse/touchpad instead of touchscreen.
         */
        clear_bit(BTN_LEFT, input->keybit);
        set_bit(BTN_TOUCH, input->keybit);
        set_bit(ABS_PRESSURE, input->absbit);
        input_set_abs_params(input, ABS_PRESSURE, 0, 256, 0, 0);

        return 0;
}

static void elo_process_data(struct input_dev *input, const u8 *data, int size)
{
        int press;

        input_report_abs(input, ABS_X, (data[3] << 8) | data[2]);
        input_report_abs(input, ABS_Y, (data[5] << 8) | data[4]);

        press = 0;
        if (data[1] & 0x80)
                press = (data[7] << 8) | data[6];
        input_report_abs(input, ABS_PRESSURE, press);

        if (data[1] & 0x03) {
                input_report_key(input, BTN_TOUCH, 1);
                input_sync(input);
        }

        if (data[1] & 0x04)
                input_report_key(input, BTN_TOUCH, 0);

        input_sync(input);
}

static int elo_raw_event(struct hid_device *hdev, struct hid_report *report,
         u8 *data, int size)
{
        struct hid_input *hidinput;

        if (!(hdev->claimed & HID_CLAIMED_INPUT) || list_empty(&hdev->inputs))
                return 0;

        hidinput = list_first_entry(&hdev->inputs, struct hid_input, list);

        switch (report->id) {
        case 0:
                if (data[0] == 'T') {        /* Mandatory ELO packet marker */
                        elo_process_data(hidinput->input, data, size);
                        return 1;
                }
                break;
        default:        /* unknown report */
                /* Unknown report type; pass upstream */
                hid_info(hdev, "unknown report type %d\n", report->id);
                break;
        }

        return 0;
}

static int elo_smartset_send_get(struct usb_device *dev, u8 command,
                void *data)
{
        unsigned int pipe;
        u8 dir;

        if (command == ELO_SEND_SMARTSET_COMMAND) {
                pipe = usb_sndctrlpipe(dev, 0);
                dir = USB_DIR_OUT;
        } else if (command == ELO_GET_SMARTSET_RESPONSE) {
                pipe = usb_rcvctrlpipe(dev, 0);
                dir = USB_DIR_IN;
        } else
                return -EINVAL;

        return usb_control_msg(dev, pipe, command,
                        dir | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                        0, 0, data, ELO_SMARTSET_PACKET_SIZE,
                        ELO_SMARTSET_CMD_TIMEOUT);
}

static int elo_flush_smartset_responses(struct usb_device *dev)
{
        return usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                        ELO_FLUSH_SMARTSET_RESPONSES,
                        USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                        0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
}

static void elo_work(struct work_struct *work)
{
        struct elo_priv *priv = container_of(work, struct elo_priv, work.work);
        struct usb_device *dev = priv->usbdev;
        unsigned char *buffer = priv->buffer;
        int ret;

        ret = elo_flush_smartset_responses(dev);
        if (ret < 0) {
                dev_err(&dev->dev, "initial FLUSH_SMARTSET_RESPONSES failed, error %d\n",
                                ret);
                goto fail;
        }

        /* send Diagnostics command */
        *buffer = ELO_DIAG;
        ret = elo_smartset_send_get(dev, ELO_SEND_SMARTSET_COMMAND, buffer);
        if (ret < 0) {
                dev_err(&dev->dev, "send Diagnostics Command failed, error %d\n",
                                ret);
                goto fail;
        }

        /* get the result */
        ret = elo_smartset_send_get(dev, ELO_GET_SMARTSET_RESPONSE, buffer);
        if (ret < 0) {
                dev_err(&dev->dev, "get Diagnostics Command response failed, error %d\n",
                                ret);
                goto fail;
        }

        /* read the ack */
        if (*buffer != 'A') {
                ret = elo_smartset_send_get(dev, ELO_GET_SMARTSET_RESPONSE,
                                buffer);
                if (ret < 0) {
                        dev_err(&dev->dev, "get acknowledge response failed, error %d\n",
                                        ret);
                        goto fail;
                }
        }

fail:
        ret = elo_flush_smartset_responses(dev);
        if (ret < 0)
                dev_err(&dev->dev, "final FLUSH_SMARTSET_RESPONSES failed, error %d\n",
                                ret);
        queue_delayed_work(wq, &priv->work, ELO_PERIODIC_READ_INTERVAL);
}

/*
 * Not all Elo devices need the periodic HID descriptor reads.
 * Only firmware version M needs this.
 */
static bool elo_broken_firmware(struct usb_device *dev)
{
        struct usb_device *hub = dev->parent;
        struct usb_device *child = NULL;
        u16 fw_lvl = le16_to_cpu(dev->descriptor.bcdDevice);
        u16 child_vid, child_pid;
        int i;
    
        if (!use_fw_quirk)
                return false;
        if (fw_lvl != 0x10d)
                return false;

        /* iterate sibling devices of the touch controller */
        usb_hub_for_each_child(hub, i, child) {
                child_vid = le16_to_cpu(child->descriptor.idVendor);
                child_pid = le16_to_cpu(child->descriptor.idProduct);

                /*
                 * If one of the devices below is present attached as a sibling of 
                 * the touch controller then  this is a newer IBM 4820 monitor that 
                 * does not need the IBM-requested workaround if fw level is
                 * 0x010d - aka 'M'.
                 * No other HW can have this combination.
                 */
                if (child_vid==0x04b3) {
                        switch (child_pid) {
                        case 0x4676: /* 4820 21x Video */
                        case 0x4677: /* 4820 51x Video */
                        case 0x4678: /* 4820 2Lx Video */
                        case 0x4679: /* 4820 5Lx Video */
                                return false;
                        }
                }
        }
        return true;
}

static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct elo_priv *priv;
        int ret;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        INIT_DELAYED_WORK(&priv->work, elo_work);
        priv->usbdev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));

        hid_set_drvdata(hdev, priv);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                goto err_free;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto err_free;
        }

        if (elo_broken_firmware(priv->usbdev)) {
                hid_info(hdev, "broken firmware found, installing workaround\n");
                queue_delayed_work(wq, &priv->work, ELO_PERIODIC_READ_INTERVAL);
        }

        return 0;
err_free:
        kfree(priv);
        return ret;
}

static void elo_remove(struct hid_device *hdev)
{
        struct elo_priv *priv = hid_get_drvdata(hdev);

        hid_hw_stop(hdev);
        cancel_delayed_work_sync(&priv->work);
        kfree(priv);
}

static const struct hid_device_id elo_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009), },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030), },
        { }
};
MODULE_DEVICE_TABLE(hid, elo_devices);

static struct hid_driver elo_driver = {
        .name = "elo",
        .id_table = elo_devices,
        .probe = elo_probe,
        .remove = elo_remove,
        .raw_event = elo_raw_event,
        .input_configured = elo_input_configured,
};

static int __init elo_driver_init(void)
{
        int ret;

        wq = create_singlethread_workqueue("elousb");
        if (!wq)
                return -ENOMEM;

        ret = hid_register_driver(&elo_driver);
        if (ret)
                destroy_workqueue(wq);

        return ret;
}
module_init(elo_driver_init);

static void __exit elo_driver_exit(void)
{
        hid_unregister_driver(&elo_driver);
        destroy_workqueue(wq);
}
module_exit(elo_driver_exit);

MODULE_AUTHOR("Jiri Slaby <jslaby@suse.cz>");
MODULE_LICENSE("GPL");
















































































   57 
    6 
    6 
    6 








   58 














   52 









    1 
    1 
    1 










   51 
   51 
   51 






   52 
   51 

    1 
   50 





































   15 


   16 












   16 
   15 
















   16 
   16 






   14 

   15 
   16 


    4 


   16 
   16 




   16 
   16 
   16 





   56 
   55 

   10 




























































































    4 

    4 
    4 

    4 

    4 
    4 

    4 
    4 







    6 


    2 


    6 

    6 
    6 


    6 
    6 


    6 


























    1 












    1 
    1 


    1 




    1 





















































    7 
    3 
    7 





    7 






    7 
    6 

    1 
    7 




















   54 

   53 






   55 
   55 


   55 













   47 

   47 


   49 














































































































    6 
    6 




    6 











    6 



    6 



    6 


    6 









































































    6 

    6 
    6 



















































































































































































































   16 


   15 
   15 

















   16 



    6 







    6 







    6 

   16 
    6 
   15 
   16 
   15 


    6 
    6 
    6 























































    6 
    6 


    6 
















    6 
    6 

    6 

    6 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/mm/swap.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 */

/*
 * This file contains the default values for the operation of the
 * Linux VM subsystem. Fine-tuning documentation can be found in
 * Documentation/admin-guide/sysctl/vm.rst.
 * Started 18.12.91
 * Swap aging added 23.2.95, Stephen Tweedie.
 * Buffermem limits added 12.3.98, Rik van Riel.
 */

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/mm_inline.h>
#include <linux/percpu_counter.h>
#include <linux/memremap.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
#include <linux/hugetlb.h>
#include <linux/page_idle.h>
#include <linux/local_lock.h>
#include <linux/buffer_head.h>

#include "internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/pagemap.h>

/* How many pages do we try to swap or page in/out together? As a power of 2 */
int page_cluster;
const int page_cluster_max = 31;

/* Protecting only lru_rotate.fbatch which requires disabling interrupts */
struct lru_rotate {
        local_lock_t lock;
        struct folio_batch fbatch;
};
static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
        .lock = INIT_LOCAL_LOCK(lock),
};

/*
 * The following folio batches are grouped together because they are protected
 * by disabling preemption (and interrupts remain enabled).
 */
struct cpu_fbatches {
        local_lock_t lock;
        struct folio_batch lru_add;
        struct folio_batch lru_deactivate_file;
        struct folio_batch lru_deactivate;
        struct folio_batch lru_lazyfree;
#ifdef CONFIG_SMP
        struct folio_batch activate;
#endif
};
static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
        .lock = INIT_LOCAL_LOCK(lock),
};

static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp,
                unsigned long *flagsp)
{
        if (folio_test_lru(folio)) {
                folio_lruvec_relock_irqsave(folio, lruvecp, flagsp);
                lruvec_del_folio(*lruvecp, folio);
                __folio_clear_lru_flags(folio);
        }

        /*
         * In rare cases, when truncation or holepunching raced with
         * munlock after VM_LOCKED was cleared, Mlocked may still be
         * found set here.  This does not indicate a problem, unless
         * "unevictable_pgs_cleared" appears worryingly large.
         */
        if (unlikely(folio_test_mlocked(folio))) {
                long nr_pages = folio_nr_pages(folio);

                __folio_clear_mlocked(folio);
                zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
                count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
        }
}

/*
 * This path almost never happens for VM activity - pages are normally freed
 * in batches.  But it gets used by networking - and for compound pages.
 */
static void page_cache_release(struct folio *folio)
{
        struct lruvec *lruvec = NULL;
        unsigned long flags;

        __page_cache_release(folio, &lruvec, &flags);
        if (lruvec)
                unlock_page_lruvec_irqrestore(lruvec, flags);
}

static void __folio_put_small(struct folio *folio)
{
        page_cache_release(folio);
        mem_cgroup_uncharge(folio);
        free_unref_page(&folio->page, 0);
}

static void __folio_put_large(struct folio *folio)
{
        /*
         * __page_cache_release() is supposed to be called for thp, not for
         * hugetlb. This is because hugetlb page does never have PageLRU set
         * (it's never listed to any LRU lists) and no memcg routines should
         * be called for hugetlb (it has a separate hugetlb_cgroup.)
         */
        if (!folio_test_hugetlb(folio))
                page_cache_release(folio);
        destroy_large_folio(folio);
}

void __folio_put(struct folio *folio)
{
        if (unlikely(folio_is_zone_device(folio)))
                free_zone_device_page(&folio->page);
        else if (unlikely(folio_test_large(folio)))
                __folio_put_large(folio);
        else
                __folio_put_small(folio);
}
EXPORT_SYMBOL(__folio_put);

/**
 * put_pages_list() - release a list of pages
 * @pages: list of pages threaded on page->lru
 *
 * Release a list of pages which are strung together on page.lru.
 */
void put_pages_list(struct list_head *pages)
{
        struct folio_batch fbatch;
        struct folio *folio, *next;

        folio_batch_init(&fbatch);
        list_for_each_entry_safe(folio, next, pages, lru) {
                if (!folio_put_testzero(folio))
                        continue;
                if (folio_test_large(folio)) {
                        __folio_put_large(folio);
                        continue;
                }
                /* LRU flag must be clear because it's passed using the lru */
                if (folio_batch_add(&fbatch, folio) > 0)
                        continue;
                free_unref_folios(&fbatch);
        }

        if (fbatch.nr)
                free_unref_folios(&fbatch);
        INIT_LIST_HEAD(pages);
}
EXPORT_SYMBOL(put_pages_list);

typedef void (*move_fn_t)(struct lruvec *lruvec, struct folio *folio);

static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
{
        int was_unevictable = folio_test_clear_unevictable(folio);
        long nr_pages = folio_nr_pages(folio);

        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

        /*
         * Is an smp_mb__after_atomic() still required here, before
         * folio_evictable() tests the mlocked flag, to rule out the possibility
         * of stranding an evictable folio on an unevictable LRU?  I think
         * not, because __munlock_folio() only clears the mlocked flag
         * while the LRU lock is held.
         *
         * (That is not true of __page_cache_release(), and not necessarily
         * true of folios_put(): but those only clear the mlocked flag after
         * folio_put_testzero() has excluded any other users of the folio.)
         */
        if (folio_evictable(folio)) {
                if (was_unevictable)
                        __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
        } else {
                folio_clear_active(folio);
                folio_set_unevictable(folio);
                /*
                 * folio->mlock_count = !!folio_test_mlocked(folio)?
                 * But that leaves __mlock_folio() in doubt whether another
                 * actor has already counted the mlock or not.  Err on the
                 * safe side, underestimate, let page reclaim fix it, rather
                 * than leaving a page on the unevictable LRU indefinitely.
                 */
                folio->mlock_count = 0;
                if (!was_unevictable)
                        __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
        }

        lruvec_add_folio(lruvec, folio);
        trace_mm_lru_insertion(folio);
}

static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
{
        int i;
        struct lruvec *lruvec = NULL;
        unsigned long flags = 0;

        for (i = 0; i < folio_batch_count(fbatch); i++) {
                struct folio *folio = fbatch->folios[i];

                /* block memcg migration while the folio moves between lru */
                if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
                        continue;

                folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
                move_fn(lruvec, folio);

                folio_set_lru(folio);
        }

        if (lruvec)
                unlock_page_lruvec_irqrestore(lruvec, flags);
        folios_put(fbatch);
}

static void folio_batch_add_and_move(struct folio_batch *fbatch,
                struct folio *folio, move_fn_t move_fn)
{
        if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
            !lru_cache_disabled())
                return;
        folio_batch_move_lru(fbatch, move_fn);
}

static void lru_move_tail_fn(struct lruvec *lruvec, struct folio *folio)
{
        if (!folio_test_unevictable(folio)) {
                lruvec_del_folio(lruvec, folio);
                folio_clear_active(folio);
                lruvec_add_folio_tail(lruvec, folio);
                __count_vm_events(PGROTATED, folio_nr_pages(folio));
        }
}

/*
 * Writeback is about to end against a folio which has been marked for
 * immediate reclaim.  If it still appears to be reclaimable, move it
 * to the tail of the inactive list.
 *
 * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races.
 */
void folio_rotate_reclaimable(struct folio *folio)
{
        if (!folio_test_locked(folio) && !folio_test_dirty(folio) &&
            !folio_test_unevictable(folio) && folio_test_lru(folio)) {
                struct folio_batch *fbatch;
                unsigned long flags;

                folio_get(folio);
                local_lock_irqsave(&lru_rotate.lock, flags);
                fbatch = this_cpu_ptr(&lru_rotate.fbatch);
                folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn);
                local_unlock_irqrestore(&lru_rotate.lock, flags);
        }
}

void lru_note_cost(struct lruvec *lruvec, bool file,
                   unsigned int nr_io, unsigned int nr_rotated)
{
        unsigned long cost;

        /*
         * Reflect the relative cost of incurring IO and spending CPU
         * time on rotations. This doesn't attempt to make a precise
         * comparison, it just says: if reloads are about comparable
         * between the LRU lists, or rotations are overwhelmingly
         * different between them, adjust scan balance for CPU work.
         */
        cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;

        do {
                unsigned long lrusize;

                /*
                 * Hold lruvec->lru_lock is safe here, since
                 * 1) The pinned lruvec in reclaim, or
                 * 2) From a pre-LRU page during refault (which also holds the
                 *    rcu lock, so would be safe even if the page was on the LRU
                 *    and could move simultaneously to a new lruvec).
                 */
                spin_lock_irq(&lruvec->lru_lock);
                /* Record cost event */
                if (file)
                        lruvec->file_cost += cost;
                else
                        lruvec->anon_cost += cost;

                /*
                 * Decay previous events
                 *
                 * Because workloads change over time (and to avoid
                 * overflow) we keep these statistics as a floating
                 * average, which ends up weighing recent refaults
                 * more than old ones.
                 */
                lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) +
                          lruvec_page_state(lruvec, NR_ACTIVE_ANON) +
                          lruvec_page_state(lruvec, NR_INACTIVE_FILE) +
                          lruvec_page_state(lruvec, NR_ACTIVE_FILE);

                if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) {
                        lruvec->file_cost /= 2;
                        lruvec->anon_cost /= 2;
                }
                spin_unlock_irq(&lruvec->lru_lock);
        } while ((lruvec = parent_lruvec(lruvec)));
}

void lru_note_cost_refault(struct folio *folio)
{
        lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio),
                      folio_nr_pages(folio), 0);
}

static void folio_activate_fn(struct lruvec *lruvec, struct folio *folio)
{
        if (!folio_test_active(folio) && !folio_test_unevictable(folio)) {
                long nr_pages = folio_nr_pages(folio);

                lruvec_del_folio(lruvec, folio);
                folio_set_active(folio);
                lruvec_add_folio(lruvec, folio);
                trace_mm_lru_activate(folio);

                __count_vm_events(PGACTIVATE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE,
                                     nr_pages);
        }
}

#ifdef CONFIG_SMP
static void folio_activate_drain(int cpu)
{
        struct folio_batch *fbatch = &per_cpu(cpu_fbatches.activate, cpu);

        if (folio_batch_count(fbatch))
                folio_batch_move_lru(fbatch, folio_activate_fn);
}

void folio_activate(struct folio *folio)
{
        if (folio_test_lru(folio) && !folio_test_active(folio) &&
            !folio_test_unevictable(folio)) {
                struct folio_batch *fbatch;

                folio_get(folio);
                local_lock(&cpu_fbatches.lock);
                fbatch = this_cpu_ptr(&cpu_fbatches.activate);
                folio_batch_add_and_move(fbatch, folio, folio_activate_fn);
                local_unlock(&cpu_fbatches.lock);
        }
}

#else
static inline void folio_activate_drain(int cpu)
{
}

void folio_activate(struct folio *folio)
{
        struct lruvec *lruvec;

        if (folio_test_clear_lru(folio)) {
                lruvec = folio_lruvec_lock_irq(folio);
                folio_activate_fn(lruvec, folio);
                unlock_page_lruvec_irq(lruvec);
                folio_set_lru(folio);
        }
}
#endif

static void __lru_cache_activate_folio(struct folio *folio)
{
        struct folio_batch *fbatch;
        int i;

        local_lock(&cpu_fbatches.lock);
        fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);

        /*
         * Search backwards on the optimistic assumption that the folio being
         * activated has just been added to this batch. Note that only
         * the local batch is examined as a !LRU folio could be in the
         * process of being released, reclaimed, migrated or on a remote
         * batch that is currently being drained. Furthermore, marking
         * a remote batch's folio active potentially hits a race where
         * a folio is marked active just after it is added to the inactive
         * list causing accounting errors and BUG_ON checks to trigger.
         */
        for (i = folio_batch_count(fbatch) - 1; i >= 0; i--) {
                struct folio *batch_folio = fbatch->folios[i];

                if (batch_folio == folio) {
                        folio_set_active(folio);
                        break;
                }
        }

        local_unlock(&cpu_fbatches.lock);
}

#ifdef CONFIG_LRU_GEN
static void folio_inc_refs(struct folio *folio)
{
        unsigned long new_flags, old_flags = READ_ONCE(folio->flags);

        if (folio_test_unevictable(folio))
                return;

        if (!folio_test_referenced(folio)) {
                folio_set_referenced(folio);
                return;
        }

        if (!folio_test_workingset(folio)) {
                folio_set_workingset(folio);
                return;
        }

        /* see the comment on MAX_NR_TIERS */
        do {
                new_flags = old_flags & LRU_REFS_MASK;
                if (new_flags == LRU_REFS_MASK)
                        break;

                new_flags += BIT(LRU_REFS_PGOFF);
                new_flags |= old_flags & ~LRU_REFS_MASK;
        } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
}
#else
static void folio_inc_refs(struct folio *folio)
{
}
#endif /* CONFIG_LRU_GEN */

/*
 * Mark a page as having seen activity.
 *
 * inactive,unreferenced        ->        inactive,referenced
 * inactive,referenced                ->        active,unreferenced
 * active,unreferenced                ->        active,referenced
 *
 * When a newly allocated page is not yet visible, so safe for non-atomic ops,
 * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
 */
void folio_mark_accessed(struct folio *folio)
{
        if (lru_gen_enabled()) {
                folio_inc_refs(folio);
                return;
        }

        if (!folio_test_referenced(folio)) {
                folio_set_referenced(folio);
        } else if (folio_test_unevictable(folio)) {
                /*
                 * Unevictable pages are on the "LRU_UNEVICTABLE" list. But,
                 * this list is never rotated or maintained, so marking an
                 * unevictable page accessed has no effect.
                 */
        } else if (!folio_test_active(folio)) {
                /*
                 * If the folio is on the LRU, queue it for activation via
                 * cpu_fbatches.activate. Otherwise, assume the folio is in a
                 * folio_batch, mark it active and it'll be moved to the active
                 * LRU on the next drain.
                 */
                if (folio_test_lru(folio))
                        folio_activate(folio);
                else
                        __lru_cache_activate_folio(folio);
                folio_clear_referenced(folio);
                workingset_activation(folio);
        }
        if (folio_test_idle(folio))
                folio_clear_idle(folio);
}
EXPORT_SYMBOL(folio_mark_accessed);

/**
 * folio_add_lru - Add a folio to an LRU list.
 * @folio: The folio to be added to the LRU.
 *
 * Queue the folio for addition to the LRU. The decision on whether
 * to add the page to the [in]active [file|anon] list is deferred until the
 * folio_batch is drained. This gives a chance for the caller of folio_add_lru()
 * have the folio added to the active list using folio_mark_accessed().
 */
void folio_add_lru(struct folio *folio)
{
        struct folio_batch *fbatch;

        VM_BUG_ON_FOLIO(folio_test_active(folio) &&
                        folio_test_unevictable(folio), folio);
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

        /* see the comment in lru_gen_add_folio() */
        if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
            lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
                folio_set_active(folio);

        folio_get(folio);
        local_lock(&cpu_fbatches.lock);
        fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
        folio_batch_add_and_move(fbatch, folio, lru_add_fn);
        local_unlock(&cpu_fbatches.lock);
}
EXPORT_SYMBOL(folio_add_lru);

/**
 * folio_add_lru_vma() - Add a folio to the appropate LRU list for this VMA.
 * @folio: The folio to be added to the LRU.
 * @vma: VMA in which the folio is mapped.
 *
 * If the VMA is mlocked, @folio is added to the unevictable list.
 * Otherwise, it is treated the same way as folio_add_lru().
 */
void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma)
{
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

        if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
                mlock_new_folio(folio);
        else
                folio_add_lru(folio);
}

/*
 * If the folio cannot be invalidated, it is moved to the
 * inactive list to speed up its reclaim.  It is moved to the
 * head of the list, rather than the tail, to give the flusher
 * threads some time to write it out, as this is much more
 * effective than the single-page writeout from reclaim.
 *
 * If the folio isn't mapped and dirty/writeback, the folio
 * could be reclaimed asap using the reclaim flag.
 *
 * 1. active, mapped folio -> none
 * 2. active, dirty/writeback folio -> inactive, head, reclaim
 * 3. inactive, mapped folio -> none
 * 4. inactive, dirty/writeback folio -> inactive, head, reclaim
 * 5. inactive, clean -> inactive, tail
 * 6. Others -> none
 *
 * In 4, it moves to the head of the inactive list so the folio is
 * written out by flusher threads as this is much more efficient
 * than the single-page writeout from reclaim.
 */
static void lru_deactivate_file_fn(struct lruvec *lruvec, struct folio *folio)
{
        bool active = folio_test_active(folio);
        long nr_pages = folio_nr_pages(folio);

        if (folio_test_unevictable(folio))
                return;

        /* Some processes are using the folio */
        if (folio_mapped(folio))
                return;

        lruvec_del_folio(lruvec, folio);
        folio_clear_active(folio);
        folio_clear_referenced(folio);

        if (folio_test_writeback(folio) || folio_test_dirty(folio)) {
                /*
                 * Setting the reclaim flag could race with
                 * folio_end_writeback() and confuse readahead.  But the
                 * race window is _really_ small and  it's not a critical
                 * problem.
                 */
                lruvec_add_folio(lruvec, folio);
                folio_set_reclaim(folio);
        } else {
                /*
                 * The folio's writeback ended while it was in the batch.
                 * We move that folio to the tail of the inactive list.
                 */
                lruvec_add_folio_tail(lruvec, folio);
                __count_vm_events(PGROTATED, nr_pages);
        }

        if (active) {
                __count_vm_events(PGDEACTIVATE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
                                     nr_pages);
        }
}

static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
{
        if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
                long nr_pages = folio_nr_pages(folio);

                lruvec_del_folio(lruvec, folio);
                folio_clear_active(folio);
                folio_clear_referenced(folio);
                lruvec_add_folio(lruvec, folio);

                __count_vm_events(PGDEACTIVATE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
                                     nr_pages);
        }
}

static void lru_lazyfree_fn(struct lruvec *lruvec, struct folio *folio)
{
        if (folio_test_anon(folio) && folio_test_swapbacked(folio) &&
            !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) {
                long nr_pages = folio_nr_pages(folio);

                lruvec_del_folio(lruvec, folio);
                folio_clear_active(folio);
                folio_clear_referenced(folio);
                /*
                 * Lazyfree folios are clean anonymous folios.  They have
                 * the swapbacked flag cleared, to distinguish them from normal
                 * anonymous folios
                 */
                folio_clear_swapbacked(folio);
                lruvec_add_folio(lruvec, folio);

                __count_vm_events(PGLAZYFREE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE,
                                     nr_pages);
        }
}

/*
 * Drain pages out of the cpu's folio_batch.
 * Either "cpu" is the current CPU, and preemption has already been
 * disabled; or "cpu" is being hot-unplugged, and is already dead.
 */
void lru_add_drain_cpu(int cpu)
{
        struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
        struct folio_batch *fbatch = &fbatches->lru_add;

        if (folio_batch_count(fbatch))
                folio_batch_move_lru(fbatch, lru_add_fn);

        fbatch = &per_cpu(lru_rotate.fbatch, cpu);
        /* Disabling interrupts below acts as a compiler barrier. */
        if (data_race(folio_batch_count(fbatch))) {
                unsigned long flags;

                /* No harm done if a racing interrupt already did this */
                local_lock_irqsave(&lru_rotate.lock, flags);
                folio_batch_move_lru(fbatch, lru_move_tail_fn);
                local_unlock_irqrestore(&lru_rotate.lock, flags);
        }

        fbatch = &fbatches->lru_deactivate_file;
        if (folio_batch_count(fbatch))
                folio_batch_move_lru(fbatch, lru_deactivate_file_fn);

        fbatch = &fbatches->lru_deactivate;
        if (folio_batch_count(fbatch))
                folio_batch_move_lru(fbatch, lru_deactivate_fn);

        fbatch = &fbatches->lru_lazyfree;
        if (folio_batch_count(fbatch))
                folio_batch_move_lru(fbatch, lru_lazyfree_fn);

        folio_activate_drain(cpu);
}

/**
 * deactivate_file_folio() - Deactivate a file folio.
 * @folio: Folio to deactivate.
 *
 * This function hints to the VM that @folio is a good reclaim candidate,
 * for example if its invalidation fails due to the folio being dirty
 * or under writeback.
 *
 * Context: Caller holds a reference on the folio.
 */
void deactivate_file_folio(struct folio *folio)
{
        struct folio_batch *fbatch;

        /* Deactivating an unevictable folio will not accelerate reclaim */
        if (folio_test_unevictable(folio))
                return;

        folio_get(folio);
        local_lock(&cpu_fbatches.lock);
        fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
        folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn);
        local_unlock(&cpu_fbatches.lock);
}

/*
 * folio_deactivate - deactivate a folio
 * @folio: folio to deactivate
 *
 * folio_deactivate() moves @folio to the inactive list if @folio was on the
 * active list and was not unevictable. This is done to accelerate the
 * reclaim of @folio.
 */
void folio_deactivate(struct folio *folio)
{
        if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
            (folio_test_active(folio) || lru_gen_enabled())) {
                struct folio_batch *fbatch;

                folio_get(folio);
                local_lock(&cpu_fbatches.lock);
                fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
                folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn);
                local_unlock(&cpu_fbatches.lock);
        }
}

/**
 * folio_mark_lazyfree - make an anon folio lazyfree
 * @folio: folio to deactivate
 *
 * folio_mark_lazyfree() moves @folio to the inactive file list.
 * This is done to accelerate the reclaim of @folio.
 */
void folio_mark_lazyfree(struct folio *folio)
{
        if (folio_test_lru(folio) && folio_test_anon(folio) &&
            folio_test_swapbacked(folio) && !folio_test_swapcache(folio) &&
            !folio_test_unevictable(folio)) {
                struct folio_batch *fbatch;

                folio_get(folio);
                local_lock(&cpu_fbatches.lock);
                fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
                folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn);
                local_unlock(&cpu_fbatches.lock);
        }
}

void lru_add_drain(void)
{
        local_lock(&cpu_fbatches.lock);
        lru_add_drain_cpu(smp_processor_id());
        local_unlock(&cpu_fbatches.lock);
        mlock_drain_local();
}

/*
 * It's called from per-cpu workqueue context in SMP case so
 * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on
 * the same cpu. It shouldn't be a problem in !SMP case since
 * the core is only one and the locks will disable preemption.
 */
static void lru_add_and_bh_lrus_drain(void)
{
        local_lock(&cpu_fbatches.lock);
        lru_add_drain_cpu(smp_processor_id());
        local_unlock(&cpu_fbatches.lock);
        invalidate_bh_lrus_cpu();
        mlock_drain_local();
}

void lru_add_drain_cpu_zone(struct zone *zone)
{
        local_lock(&cpu_fbatches.lock);
        lru_add_drain_cpu(smp_processor_id());
        drain_local_pages(zone);
        local_unlock(&cpu_fbatches.lock);
        mlock_drain_local();
}

#ifdef CONFIG_SMP

static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
        lru_add_and_bh_lrus_drain();
}

static bool cpu_needs_drain(unsigned int cpu)
{
        struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);

        /* Check these in order of likelihood that they're not zero */
        return folio_batch_count(&fbatches->lru_add) ||
                data_race(folio_batch_count(&per_cpu(lru_rotate.fbatch, cpu))) ||
                folio_batch_count(&fbatches->lru_deactivate_file) ||
                folio_batch_count(&fbatches->lru_deactivate) ||
                folio_batch_count(&fbatches->lru_lazyfree) ||
                folio_batch_count(&fbatches->activate) ||
                need_mlock_drain(cpu) ||
                has_bh_in_lru(cpu, NULL);
}

/*
 * Doesn't need any cpu hotplug locking because we do rely on per-cpu
 * kworkers being shut down before our page_alloc_cpu_dead callback is
 * executed on the offlined cpu.
 * Calling this function with cpu hotplug locks held can actually lead
 * to obscure indirect dependencies via WQ context.
 */
static inline void __lru_add_drain_all(bool force_all_cpus)
{
        /*
         * lru_drain_gen - Global pages generation number
         *
         * (A) Definition: global lru_drain_gen = x implies that all generations
         *     0 < n <= x are already *scheduled* for draining.
         *
         * This is an optimization for the highly-contended use case where a
         * user space workload keeps constantly generating a flow of pages for
         * each CPU.
         */
        static unsigned int lru_drain_gen;
        static struct cpumask has_work;
        static DEFINE_MUTEX(lock);
        unsigned cpu, this_gen;

        /*
         * Make sure nobody triggers this path before mm_percpu_wq is fully
         * initialized.
         */
        if (WARN_ON(!mm_percpu_wq))
                return;

        /*
         * Guarantee folio_batch counter stores visible by this CPU
         * are visible to other CPUs before loading the current drain
         * generation.
         */
        smp_mb();

        /*
         * (B) Locally cache global LRU draining generation number
         *
         * The read barrier ensures that the counter is loaded before the mutex
         * is taken. It pairs with smp_mb() inside the mutex critical section
         * at (D).
         */
        this_gen = smp_load_acquire(&lru_drain_gen);

        mutex_lock(&lock);

        /*
         * (C) Exit the draining operation if a newer generation, from another
         * lru_add_drain_all(), was already scheduled for draining. Check (A).
         */
        if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
                goto done;

        /*
         * (D) Increment global generation number
         *
         * Pairs with smp_load_acquire() at (B), outside of the critical
         * section. Use a full memory barrier to guarantee that the
         * new global drain generation number is stored before loading
         * folio_batch counters.
         *
         * This pairing must be done here, before the for_each_online_cpu loop
         * below which drains the page vectors.
         *
         * Let x, y, and z represent some system CPU numbers, where x < y < z.
         * Assume CPU #z is in the middle of the for_each_online_cpu loop
         * below and has already reached CPU #y's per-cpu data. CPU #x comes
         * along, adds some pages to its per-cpu vectors, then calls
         * lru_add_drain_all().
         *
         * If the paired barrier is done at any later step, e.g. after the
         * loop, CPU #x will just exit at (C) and miss flushing out all of its
         * added pages.
         */
        WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
        smp_mb();

        cpumask_clear(&has_work);
        for_each_online_cpu(cpu) {
                struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);

                if (cpu_needs_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
                        queue_work_on(cpu, mm_percpu_wq, work);
                        __cpumask_set_cpu(cpu, &has_work);
                }
        }

        for_each_cpu(cpu, &has_work)
                flush_work(&per_cpu(lru_add_drain_work, cpu));

done:
        mutex_unlock(&lock);
}

void lru_add_drain_all(void)
{
        __lru_add_drain_all(false);
}
#else
void lru_add_drain_all(void)
{
        lru_add_drain();
}
#endif /* CONFIG_SMP */

atomic_t lru_disable_count = ATOMIC_INIT(0);

/*
 * lru_cache_disable() needs to be called before we start compiling
 * a list of pages to be migrated using isolate_lru_page().
 * It drains pages on LRU cache and then disable on all cpus until
 * lru_cache_enable is called.
 *
 * Must be paired with a call to lru_cache_enable().
 */
void lru_cache_disable(void)
{
        atomic_inc(&lru_disable_count);
        /*
         * Readers of lru_disable_count are protected by either disabling
         * preemption or rcu_read_lock:
         *
         * preempt_disable, local_irq_disable  [bh_lru_lock()]
         * rcu_read_lock                       [rt_spin_lock CONFIG_PREEMPT_RT]
         * preempt_disable                       [local_lock !CONFIG_PREEMPT_RT]
         *
         * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on
         * preempt_disable() regions of code. So any CPU which sees
         * lru_disable_count = 0 will have exited the critical
         * section when synchronize_rcu() returns.
         */
        synchronize_rcu_expedited();
#ifdef CONFIG_SMP
        __lru_add_drain_all(true);
#else
        lru_add_and_bh_lrus_drain();
#endif
}

/**
 * folios_put_refs - Reduce the reference count on a batch of folios.
 * @folios: The folios.
 * @refs: The number of refs to subtract from each folio.
 *
 * Like folio_put(), but for a batch of folios.  This is more efficient
 * than writing the loop yourself as it will optimise the locks which need
 * to be taken if the folios are freed.  The folios batch is returned
 * empty and ready to be reused for another batch; there is no need
 * to reinitialise it.  If @refs is NULL, we subtract one from each
 * folio refcount.
 *
 * Context: May be called in process or interrupt context, but not in NMI
 * context.  May be called while holding a spinlock.
 */
void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
{
        int i, j;
        struct lruvec *lruvec = NULL;
        unsigned long flags = 0;

        for (i = 0, j = 0; i < folios->nr; i++) {
                struct folio *folio = folios->folios[i];
                unsigned int nr_refs = refs ? refs[i] : 1;

                if (is_huge_zero_page(&folio->page))
                        continue;

                if (folio_is_zone_device(folio)) {
                        if (lruvec) {
                                unlock_page_lruvec_irqrestore(lruvec, flags);
                                lruvec = NULL;
                        }
                        if (put_devmap_managed_page_refs(&folio->page, nr_refs))
                                continue;
                        if (folio_ref_sub_and_test(folio, nr_refs))
                                free_zone_device_page(&folio->page);
                        continue;
                }

                if (!folio_ref_sub_and_test(folio, nr_refs))
                        continue;

                /* hugetlb has its own memcg */
                if (folio_test_hugetlb(folio)) {
                        if (lruvec) {
                                unlock_page_lruvec_irqrestore(lruvec, flags);
                                lruvec = NULL;
                        }
                        free_huge_folio(folio);
                        continue;
                }
                if (folio_test_large(folio) &&
                    folio_test_large_rmappable(folio))
                        folio_undo_large_rmappable(folio);

                __page_cache_release(folio, &lruvec, &flags);

                if (j != i)
                        folios->folios[j] = folio;
                j++;
        }
        if (lruvec)
                unlock_page_lruvec_irqrestore(lruvec, flags);
        if (!j) {
                folio_batch_reinit(folios);
                return;
        }

        folios->nr = j;
        mem_cgroup_uncharge_folios(folios);
        free_unref_folios(folios);
}
EXPORT_SYMBOL(folios_put_refs);

/**
 * release_pages - batched put_page()
 * @arg: array of pages to release
 * @nr: number of pages
 *
 * Decrement the reference count on all the pages in @arg.  If it
 * fell to zero, remove the page from the LRU and free it.
 *
 * Note that the argument can be an array of pages, encoded pages,
 * or folio pointers. We ignore any encoded bits, and turn any of
 * them into just a folio that gets free'd.
 */
void release_pages(release_pages_arg arg, int nr)
{
        struct folio_batch fbatch;
        int refs[PAGEVEC_SIZE];
        struct encoded_page **encoded = arg.encoded_pages;
        int i;

        folio_batch_init(&fbatch);
        for (i = 0; i < nr; i++) {
                /* Turn any of the argument types into a folio */
                struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));

                /* Is our next entry actually "nr_pages" -> "nr_refs" ? */
                refs[fbatch.nr] = 1;
                if (unlikely(encoded_page_flags(encoded[i]) &
                             ENCODED_PAGE_BIT_NR_PAGES_NEXT))
                        refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);

                if (folio_batch_add(&fbatch, folio) > 0)
                        continue;
                folios_put_refs(&fbatch, refs);
        }

        if (fbatch.nr)
                folios_put_refs(&fbatch, refs);
}
EXPORT_SYMBOL(release_pages);

/*
 * The folios which we're about to release may be in the deferred lru-addition
 * queues.  That would prevent them from really being freed right now.  That's
 * OK from a correctness point of view but is inefficient - those folios may be
 * cache-warm and we want to give them back to the page allocator ASAP.
 *
 * So __folio_batch_release() will drain those queues here.
 * folio_batch_move_lru() calls folios_put() directly to avoid
 * mutual recursion.
 */
void __folio_batch_release(struct folio_batch *fbatch)
{
        if (!fbatch->percpu_pvec_drained) {
                lru_add_drain();
                fbatch->percpu_pvec_drained = true;
        }
        folios_put(fbatch);
}
EXPORT_SYMBOL(__folio_batch_release);

/**
 * folio_batch_remove_exceptionals() - Prune non-folios from a batch.
 * @fbatch: The batch to prune
 *
 * find_get_entries() fills a batch with both folios and shadow/swap/DAX
 * entries.  This function prunes all the non-folio entries from @fbatch
 * without leaving holes, so that it can be passed on to folio-only batch
 * operations.
 */
void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
{
        unsigned int i, j;

        for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) {
                struct folio *folio = fbatch->folios[i];
                if (!xa_is_value(folio))
                        fbatch->folios[j++] = folio;
        }
        fbatch->nr = j;
}

/*
 * Perform any setup for the swap system
 */
void __init swap_setup(void)
{
        unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);

        /* Use a smaller cluster for small-memory machines */
        if (megs < 16)
                page_cluster = 2;
        else
                page_cluster = 3;
        /*
         * Right now other parts of the system means that we
         * _really_ don't want to cluster much more
         */
}













































































   55 




   55 



























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * NUMA memory policies for Linux.
 * Copyright 2003,2004 Andi Kleen SuSE Labs
 */
#ifndef _LINUX_MEMPOLICY_H
#define _LINUX_MEMPOLICY_H 1

#include <linux/sched.h>
#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <uapi/linux/mempolicy.h>

struct mm_struct;

#define NO_INTERLEAVE_INDEX (-1UL)        /* use task il_prev for interleaving */

#ifdef CONFIG_NUMA

/*
 * Describe a memory policy.
 *
 * A mempolicy can be either associated with a process or with a VMA.
 * For VMA related allocations the VMA policy is preferred, otherwise
 * the process policy is used. Interrupts ignore the memory policy
 * of the current process.
 *
 * Locking policy for interleave:
 * In process context there is no locking because only the process accesses
 * its own state. All vma manipulation is somewhat protected by a down_read on
 * mmap_lock.
 *
 * Freeing policy:
 * Mempolicy objects are reference counted.  A mempolicy will be freed when
 * mpol_put() decrements the reference count to zero.
 *
 * Duplicating policy objects:
 * mpol_dup() allocates a new mempolicy and copies the specified mempolicy
 * to the new storage.  The reference count of the new object is initialized
 * to 1, representing the caller of mpol_dup().
 */
struct mempolicy {
        atomic_t refcnt;
        unsigned short mode;         /* See MPOL_* above */
        unsigned short flags;        /* See set_mempolicy() MPOL_F_* above */
        nodemask_t nodes;        /* interleave/bind/perfer */
        int home_node;                /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */

        union {
                nodemask_t cpuset_mems_allowed;        /* relative to these nodes */
                nodemask_t user_nodemask;        /* nodemask passed by user */
        } w;
};

/*
 * Support for managing mempolicy data objects (clone, copy, destroy)
 * The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
 */

extern void __mpol_put(struct mempolicy *pol);
static inline void mpol_put(struct mempolicy *pol)
{
        if (pol)
                __mpol_put(pol);
}

/*
 * Does mempolicy pol need explicit unref after use?
 * Currently only needed for shared policies.
 */
static inline int mpol_needs_cond_ref(struct mempolicy *pol)
{
        return (pol && (pol->flags & MPOL_F_SHARED));
}

static inline void mpol_cond_put(struct mempolicy *pol)
{
        if (mpol_needs_cond_ref(pol))
                __mpol_put(pol);
}

extern struct mempolicy *__mpol_dup(struct mempolicy *pol);
static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
{
        if (pol)
                pol = __mpol_dup(pol);
        return pol;
}

static inline void mpol_get(struct mempolicy *pol)
{
        if (pol)
                atomic_inc(&pol->refcnt);
}

extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b);
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
        if (a == b)
                return true;
        return __mpol_equal(a, b);
}

/*
 * Tree of shared policies for a shared memory region.
 */
struct shared_policy {
        struct rb_root root;
        rwlock_t lock;
};
struct sp_node {
        struct rb_node nd;
        pgoff_t start, end;
        struct mempolicy *policy;
};

int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
int mpol_set_shared_policy(struct shared_policy *sp,
                           struct vm_area_struct *vma, struct mempolicy *mpol);
void mpol_free_shared_policy(struct shared_policy *sp);
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
                                            pgoff_t idx);

struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
                unsigned long addr, pgoff_t *ilx);
struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                unsigned long addr, int order, pgoff_t *ilx);
bool vma_policy_mof(struct vm_area_struct *vma);

extern void numa_default_policy(void);
extern void numa_policy_init(void);
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);

extern int huge_node(struct vm_area_struct *vma,
                                unsigned long addr, gfp_t gfp_flags,
                                struct mempolicy **mpol, nodemask_t **nodemask);
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
                                const nodemask_t *mask);
extern unsigned int mempolicy_slab_node(void);

extern enum zone_type policy_zone;

static inline void check_highest_zone(enum zone_type k)
{
        if (k > policy_zone && k != ZONE_MOVABLE)
                policy_zone = k;
}

int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                     const nodemask_t *to, int flags);


#ifdef CONFIG_TMPFS
extern int mpol_parse_str(char *str, struct mempolicy **mpol);
#endif

extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);

/* Check if a vma is migratable */
extern bool vma_migratable(struct vm_area_struct *vma);

int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);

static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{
        return  (pol->mode == MPOL_PREFERRED_MANY);
}

extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);

#else

struct mempolicy {};

static inline struct mempolicy *get_task_policy(struct task_struct *p)
{
        return NULL;
}

static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
        return true;
}

static inline void mpol_put(struct mempolicy *pol)
{
}

static inline void mpol_cond_put(struct mempolicy *pol)
{
}

static inline void mpol_get(struct mempolicy *pol)
{
}

struct shared_policy {};

static inline void mpol_shared_policy_init(struct shared_policy *sp,
                                                struct mempolicy *mpol)
{
}

static inline void mpol_free_shared_policy(struct shared_policy *sp)
{
}

static inline struct mempolicy *
mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx)
{
        return NULL;
}

static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                unsigned long addr, int order, pgoff_t *ilx)
{
        *ilx = 0;
        return NULL;
}

static inline int
vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
        return 0;
}

static inline void numa_policy_init(void)
{
}

static inline void numa_default_policy(void)
{
}

static inline void mpol_rebind_task(struct task_struct *tsk,
                                const nodemask_t *new)
{
}

static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
}

static inline int huge_node(struct vm_area_struct *vma,
                                unsigned long addr, gfp_t gfp_flags,
                                struct mempolicy **mpol, nodemask_t **nodemask)
{
        *mpol = NULL;
        *nodemask = NULL;
        return 0;
}

static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
{
        return false;
}

static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                                   const nodemask_t *to, int flags)
{
        return 0;
}

static inline void check_highest_zone(int k)
{
}

#ifdef CONFIG_TMPFS
static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
{
        return 1;        /* error */
}
#endif

static inline int mpol_misplaced(struct folio *folio,
                                 struct vm_area_struct *vma,
                                 unsigned long address)
{
        return -1; /* no node preference */
}

static inline void mpol_put_task_policy(struct task_struct *task)
{
}

static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{
        return  false;
}

#endif /* CONFIG_NUMA */
#endif






















































































    2 






































































































































































   50 






























   58 














































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// SPDX-License-Identifier: GPL-2.0
/* Device wakeirq helper functions */
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include <linux/pm_wakeirq.h>

#include "power.h"

/**
 * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
 * @dev: Device entry
 * @wirq: Wake irq specific data
 *
 * Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
 */
static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
{
        unsigned long flags;

        if (!dev || !wirq)
                return -EINVAL;

        spin_lock_irqsave(&dev->power.lock, flags);
        if (dev_WARN_ONCE(dev, dev->power.wakeirq,
                          "wake irq already initialized\n")) {
                spin_unlock_irqrestore(&dev->power.lock, flags);
                return -EEXIST;
        }

        dev->power.wakeirq = wirq;
        device_wakeup_attach_irq(dev, wirq);

        spin_unlock_irqrestore(&dev->power.lock, flags);
        return 0;
}

/**
 * dev_pm_set_wake_irq - Attach device IO interrupt as wake IRQ
 * @dev: Device entry
 * @irq: Device IO interrupt
 *
 * Attach a device IO interrupt as a wake IRQ. The wake IRQ gets
 * automatically configured for wake-up from suspend  based
 * on the device specific sysfs wakeup entry. Typically called
 * during driver probe after calling device_init_wakeup().
 */
int dev_pm_set_wake_irq(struct device *dev, int irq)
{
        struct wake_irq *wirq;
        int err;

        if (irq < 0)
                return -EINVAL;

        wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
        if (!wirq)
                return -ENOMEM;

        wirq->dev = dev;
        wirq->irq = irq;

        err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                kfree(wirq);

        return err;
}
EXPORT_SYMBOL_GPL(dev_pm_set_wake_irq);

/**
 * dev_pm_clear_wake_irq - Detach a device IO interrupt wake IRQ
 * @dev: Device entry
 *
 * Detach a device wake IRQ and free resources.
 *
 * Note that it's OK for drivers to call this without calling
 * dev_pm_set_wake_irq() as all the driver instances may not have
 * a wake IRQ configured. This avoid adding wake IRQ specific
 * checks into the drivers.
 */
void dev_pm_clear_wake_irq(struct device *dev)
{
        struct wake_irq *wirq = dev->power.wakeirq;
        unsigned long flags;

        if (!wirq)
                return;

        spin_lock_irqsave(&dev->power.lock, flags);
        device_wakeup_detach_irq(dev);
        dev->power.wakeirq = NULL;
        spin_unlock_irqrestore(&dev->power.lock, flags);

        if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED) {
                free_irq(wirq->irq, wirq);
                wirq->status &= ~WAKE_IRQ_DEDICATED_MASK;
        }
        kfree(wirq->name);
        kfree(wirq);
}
EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq);

/**
 * handle_threaded_wake_irq - Handler for dedicated wake-up interrupts
 * @irq: Device specific dedicated wake-up interrupt
 * @_wirq: Wake IRQ data
 *
 * Some devices have a separate wake-up interrupt in addition to the
 * device IO interrupt. The wake-up interrupt signals that a device
 * should be woken up from it's idle state. This handler uses device
 * specific pm_runtime functions to wake the device, and then it's
 * up to the device to do whatever it needs to. Note that as the
 * device may need to restore context and start up regulators, we
 * use a threaded IRQ.
 *
 * Also note that we are not resending the lost device interrupts.
 * We assume that the wake-up interrupt just needs to wake-up the
 * device, and then device's pm_runtime_resume() can deal with the
 * situation.
 */
static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq)
{
        struct wake_irq *wirq = _wirq;
        int res;

        /* Maybe abort suspend? */
        if (irqd_is_wakeup_set(irq_get_irq_data(irq))) {
                pm_wakeup_event(wirq->dev, 0);

                return IRQ_HANDLED;
        }

        /* We don't want RPM_ASYNC or RPM_NOWAIT here */
        res = pm_runtime_resume(wirq->dev);
        if (res < 0)
                dev_warn(wirq->dev,
                         "wake IRQ with no resume: %i\n", res);

        return IRQ_HANDLED;
}

static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag)
{
        struct wake_irq *wirq;
        int err;

        if (irq < 0)
                return -EINVAL;

        wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
        if (!wirq)
                return -ENOMEM;

        wirq->name = kasprintf(GFP_KERNEL, "%s:wakeup", dev_name(dev));
        if (!wirq->name) {
                err = -ENOMEM;
                goto err_free;
        }

        wirq->dev = dev;
        wirq->irq = irq;

        /* Prevent deferred spurious wakeirqs with disable_irq_nosync() */
        irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY);

        /*
         * Consumer device may need to power up and restore state
         * so we use a threaded irq.
         */
        err = request_threaded_irq(irq, NULL, handle_threaded_wake_irq,
                                   IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                   wirq->name, wirq);
        if (err)
                goto err_free_name;

        err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                goto err_free_irq;

        wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED | flag;

        return err;

err_free_irq:
        free_irq(irq, wirq);
err_free_name:
        kfree(wirq->name);
err_free:
        kfree(wirq);

        return err;
}

/**
 * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
 * @dev: Device entry
 * @irq: Device wake-up interrupt
 *
 * Unless your hardware has separate wake-up interrupts in addition
 * to the device IO interrupts, you don't need this.
 *
 * Sets up a threaded interrupt handler for a device that has
 * a dedicated wake-up interrupt in addition to the device IO
 * interrupt.
 */
int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
{
        return __dev_pm_set_dedicated_wake_irq(dev, irq, 0);
}
EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);

/**
 * dev_pm_set_dedicated_wake_irq_reverse - Request a dedicated wake-up interrupt
 *                                         with reverse enable ordering
 * @dev: Device entry
 * @irq: Device wake-up interrupt
 *
 * Unless your hardware has separate wake-up interrupts in addition
 * to the device IO interrupts, you don't need this.
 *
 * Sets up a threaded interrupt handler for a device that has a dedicated
 * wake-up interrupt in addition to the device IO interrupt. It sets
 * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
 * to enable dedicated wake-up interrupt after running the runtime suspend
 * callback for @dev.
 */
int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
{
        return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE);
}
EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);

/**
 * dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt
 * @dev: Device
 * @can_change_status: Can change wake-up interrupt status
 *
 * Enables wakeirq conditionally. We need to enable wake-up interrupt
 * lazily on the first rpm_suspend(). This is needed as the consumer device
 * starts in RPM_SUSPENDED state, and the first pm_runtime_get() would
 * otherwise try to disable already disabled wakeirq. The wake-up interrupt
 * starts disabled with IRQ_NOAUTOEN set.
 *
 * Should be only called from rpm_suspend() and rpm_resume() path.
 * Caller must hold &dev->power.lock to change wirq->status
 */
void dev_pm_enable_wake_irq_check(struct device *dev,
                                  bool can_change_status)
{
        struct wake_irq *wirq = dev->power.wakeirq;

        if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
                return;

        if (likely(wirq->status & WAKE_IRQ_DEDICATED_MANAGED)) {
                goto enable;
        } else if (can_change_status) {
                wirq->status |= WAKE_IRQ_DEDICATED_MANAGED;
                goto enable;
        }

        return;

enable:
        if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) {
                enable_irq(wirq->irq);
                wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
        }
}

/**
 * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt
 * @dev: Device
 * @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE
 *
 * Disables wake-up interrupt conditionally based on status.
 * Should be only called from rpm_suspend() and rpm_resume() path.
 */
void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
{
        struct wake_irq *wirq = dev->power.wakeirq;

        if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
                return;

        if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
                return;

        if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) {
                wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED;
                disable_irq_nosync(wirq->irq);
        }
}

/**
 * dev_pm_enable_wake_irq_complete - enable wake IRQ not enabled before
 * @dev: Device using the wake IRQ
 *
 * Enable wake IRQ conditionally based on status, mainly used if want to
 * enable wake IRQ after running ->runtime_suspend() which depends on
 * WAKE_IRQ_DEDICATED_REVERSE.
 *
 * Should be only called from rpm_suspend() path.
 */
void dev_pm_enable_wake_irq_complete(struct device *dev)
{
        struct wake_irq *wirq = dev->power.wakeirq;

        if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
                return;

        if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED &&
            wirq->status & WAKE_IRQ_DEDICATED_REVERSE) {
                enable_irq(wirq->irq);
                wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
        }
}

/**
 * dev_pm_arm_wake_irq - Arm device wake-up
 * @wirq: Device wake-up interrupt
 *
 * Sets up the wake-up event conditionally based on the
 * device_may_wake().
 */
void dev_pm_arm_wake_irq(struct wake_irq *wirq)
{
        if (!wirq)
                return;

        if (device_may_wakeup(wirq->dev)) {
                if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
                    !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
                        enable_irq(wirq->irq);

                enable_irq_wake(wirq->irq);
        }
}

/**
 * dev_pm_disarm_wake_irq - Disarm device wake-up
 * @wirq: Device wake-up interrupt
 *
 * Clears up the wake-up event conditionally based on the
 * device_may_wake().
 */
void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
{
        if (!wirq)
                return;

        if (device_may_wakeup(wirq->dev)) {
                disable_irq_wake(wirq->irq);

                if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
                    !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
                        disable_irq_nosync(wirq->irq);
        }
}





































































































































































































































































































































































































































































































































































































































































































































































    1 








    1 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (c) 2013 Andrew Duggan <aduggan@synaptics.com>
 *  Copyright (c) 2013 Synaptics Incorporated
 *  Copyright (c) 2014 Benjamin Tissoires <benjamin.tissoires@gmail.com>
 *  Copyright (c) 2014 Red Hat, Inc
 */

#include <linux/kernel.h>
#include <linux/hid.h>
#include <linux/input.h>
#include <linux/input/mt.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/rmi.h>
#include "hid-ids.h"

#define RMI_MOUSE_REPORT_ID                0x01 /* Mouse emulation Report */
#define RMI_WRITE_REPORT_ID                0x09 /* Output Report */
#define RMI_READ_ADDR_REPORT_ID                0x0a /* Output Report */
#define RMI_READ_DATA_REPORT_ID                0x0b /* Input Report */
#define RMI_ATTN_REPORT_ID                0x0c /* Input Report */
#define RMI_SET_RMI_MODE_REPORT_ID        0x0f /* Feature Report */

/* flags */
#define RMI_READ_REQUEST_PENDING        0
#define RMI_READ_DATA_PENDING                1
#define RMI_STARTED                        2

/* device flags */
#define RMI_DEVICE                        BIT(0)
#define RMI_DEVICE_HAS_PHYS_BUTTONS        BIT(1)
#define RMI_DEVICE_OUTPUT_SET_REPORT        BIT(2)

/*
 * retrieve the ctrl registers
 * the ctrl register has a size of 20 but a fw bug split it into 16 + 4,
 * and there is no way to know if the first 20 bytes are here or not.
 * We use only the first 12 bytes, so get only them.
 */
#define RMI_F11_CTRL_REG_COUNT                12

enum rmi_mode_type {
        RMI_MODE_OFF                        = 0,
        RMI_MODE_ATTN_REPORTS                = 1,
        RMI_MODE_NO_PACKED_ATTN_REPORTS        = 2,
};

/**
 * struct rmi_data - stores information for hid communication
 *
 * @page_mutex: Locks current page to avoid changing pages in unexpected ways.
 * @page: Keeps track of the current virtual page
 * @xport: transport device to be registered with the RMI4 core.
 *
 * @wait: Used for waiting for read data
 *
 * @writeReport: output buffer when writing RMI registers
 * @readReport: input buffer when reading RMI registers
 *
 * @input_report_size: size of an input report (advertised by HID)
 * @output_report_size: size of an output report (advertised by HID)
 *
 * @flags: flags for the current device (started, reading, etc...)
 *
 * @reset_work: worker which will be called in case of a mouse report
 * @hdev: pointer to the struct hid_device
 *
 * @device_flags: flags which describe the device
 *
 * @domain: the IRQ domain allocated for this RMI4 device
 * @rmi_irq: the irq that will be used to generate events to rmi-core
 */
struct rmi_data {
        struct mutex page_mutex;
        int page;
        struct rmi_transport_dev xport;

        wait_queue_head_t wait;

        u8 *writeReport;
        u8 *readReport;

        u32 input_report_size;
        u32 output_report_size;

        unsigned long flags;

        struct work_struct reset_work;
        struct hid_device *hdev;

        unsigned long device_flags;

        struct irq_domain *domain;
        int rmi_irq;
};

#define RMI_PAGE(addr) (((addr) >> 8) & 0xff)

static int rmi_write_report(struct hid_device *hdev, u8 *report, int len);

/**
 * rmi_set_page - Set RMI page
 * @hdev: The pointer to the hid_device struct
 * @page: The new page address.
 *
 * RMI devices have 16-bit addressing, but some of the physical
 * implementations (like SMBus) only have 8-bit addressing. So RMI implements
 * a page address at 0xff of every page so we can reliable page addresses
 * every 256 registers.
 *
 * The page_mutex lock must be held when this function is entered.
 *
 * Returns zero on success, non-zero on failure.
 */
static int rmi_set_page(struct hid_device *hdev, u8 page)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        int retval;

        data->writeReport[0] = RMI_WRITE_REPORT_ID;
        data->writeReport[1] = 1;
        data->writeReport[2] = 0xFF;
        data->writeReport[4] = page;

        retval = rmi_write_report(hdev, data->writeReport,
                        data->output_report_size);
        if (retval != data->output_report_size) {
                dev_err(&hdev->dev,
                        "%s: set page failed: %d.", __func__, retval);
                return retval;
        }

        data->page = page;
        return 0;
}

static int rmi_set_mode(struct hid_device *hdev, u8 mode)
{
        int ret;
        const u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode};
        u8 *buf;

        buf = kmemdup(txbuf, sizeof(txbuf), GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, buf,
                        sizeof(txbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
        kfree(buf);
        if (ret < 0) {
                dev_err(&hdev->dev, "unable to set rmi mode to %d (%d)\n", mode,
                        ret);
                return ret;
        }

        return 0;
}

static int rmi_write_report(struct hid_device *hdev, u8 *report, int len)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        int ret;

        if (data->device_flags & RMI_DEVICE_OUTPUT_SET_REPORT) {
                /*
                 * Talk to device by using SET_REPORT requests instead.
                 */
                ret = hid_hw_raw_request(hdev, report[0], report,
                                len, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT);
        } else {
                ret = hid_hw_output_report(hdev, (void *)report, len);
        }

        if (ret < 0) {
                dev_err(&hdev->dev, "failed to write hid report (%d)\n", ret);
                return ret;
        }

        return ret;
}

static int rmi_hid_read_block(struct rmi_transport_dev *xport, u16 addr,
                void *buf, size_t len)
{
        struct rmi_data *data = container_of(xport, struct rmi_data, xport);
        struct hid_device *hdev = data->hdev;
        int ret;
        int bytes_read;
        int bytes_needed;
        int retries;
        int read_input_count;

        mutex_lock(&data->page_mutex);

        if (RMI_PAGE(addr) != data->page) {
                ret = rmi_set_page(hdev, RMI_PAGE(addr));
                if (ret < 0)
                        goto exit;
        }

        for (retries = 5; retries > 0; retries--) {
                data->writeReport[0] = RMI_READ_ADDR_REPORT_ID;
                data->writeReport[1] = 0; /* old 1 byte read count */
                data->writeReport[2] = addr & 0xFF;
                data->writeReport[3] = (addr >> 8) & 0xFF;
                data->writeReport[4] = len  & 0xFF;
                data->writeReport[5] = (len >> 8) & 0xFF;

                set_bit(RMI_READ_REQUEST_PENDING, &data->flags);

                ret = rmi_write_report(hdev, data->writeReport,
                                                data->output_report_size);
                if (ret != data->output_report_size) {
                        dev_err(&hdev->dev,
                                "failed to write request output report (%d)\n",
                                ret);
                        goto exit;
                }

                bytes_read = 0;
                bytes_needed = len;
                while (bytes_read < len) {
                        if (!wait_event_timeout(data->wait,
                                test_bit(RMI_READ_DATA_PENDING, &data->flags),
                                        msecs_to_jiffies(1000))) {
                                hid_warn(hdev, "%s: timeout elapsed\n",
                                         __func__);
                                ret = -EAGAIN;
                                break;
                        }

                        read_input_count = data->readReport[1];
                        memcpy(buf + bytes_read, &data->readReport[2],
                                min(read_input_count, bytes_needed));

                        bytes_read += read_input_count;
                        bytes_needed -= read_input_count;
                        clear_bit(RMI_READ_DATA_PENDING, &data->flags);
                }

                if (ret >= 0) {
                        ret = 0;
                        break;
                }
        }

exit:
        clear_bit(RMI_READ_REQUEST_PENDING, &data->flags);
        mutex_unlock(&data->page_mutex);
        return ret;
}

static int rmi_hid_write_block(struct rmi_transport_dev *xport, u16 addr,
                const void *buf, size_t len)
{
        struct rmi_data *data = container_of(xport, struct rmi_data, xport);
        struct hid_device *hdev = data->hdev;
        int ret;

        mutex_lock(&data->page_mutex);

        if (RMI_PAGE(addr) != data->page) {
                ret = rmi_set_page(hdev, RMI_PAGE(addr));
                if (ret < 0)
                        goto exit;
        }

        data->writeReport[0] = RMI_WRITE_REPORT_ID;
        data->writeReport[1] = len;
        data->writeReport[2] = addr & 0xFF;
        data->writeReport[3] = (addr >> 8) & 0xFF;
        memcpy(&data->writeReport[4], buf, len);

        ret = rmi_write_report(hdev, data->writeReport,
                                        data->output_report_size);
        if (ret < 0) {
                dev_err(&hdev->dev,
                        "failed to write request output report (%d)\n",
                        ret);
                goto exit;
        }
        ret = 0;

exit:
        mutex_unlock(&data->page_mutex);
        return ret;
}

static int rmi_reset_attn_mode(struct hid_device *hdev)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        struct rmi_device *rmi_dev = data->xport.rmi_dev;
        int ret;

        ret = rmi_set_mode(hdev, RMI_MODE_ATTN_REPORTS);
        if (ret)
                return ret;

        if (test_bit(RMI_STARTED, &data->flags))
                ret = rmi_dev->driver->reset_handler(rmi_dev);

        return ret;
}

static void rmi_reset_work(struct work_struct *work)
{
        struct rmi_data *hdata = container_of(work, struct rmi_data,
                                                reset_work);

        /* switch the device to RMI if we receive a generic mouse report */
        rmi_reset_attn_mode(hdata->hdev);
}

static int rmi_input_event(struct hid_device *hdev, u8 *data, int size)
{
        struct rmi_data *hdata = hid_get_drvdata(hdev);
        struct rmi_device *rmi_dev = hdata->xport.rmi_dev;
        unsigned long flags;

        if (!(test_bit(RMI_STARTED, &hdata->flags)))
                return 0;

        pm_wakeup_event(hdev->dev.parent, 0);

        local_irq_save(flags);

        rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2);

        generic_handle_irq(hdata->rmi_irq);

        local_irq_restore(flags);

        return 1;
}

static int rmi_read_data_event(struct hid_device *hdev, u8 *data, int size)
{
        struct rmi_data *hdata = hid_get_drvdata(hdev);

        if (!test_bit(RMI_READ_REQUEST_PENDING, &hdata->flags)) {
                hid_dbg(hdev, "no read request pending\n");
                return 0;
        }

        memcpy(hdata->readReport, data, min((u32)size, hdata->input_report_size));
        set_bit(RMI_READ_DATA_PENDING, &hdata->flags);
        wake_up(&hdata->wait);

        return 1;
}

static int rmi_check_sanity(struct hid_device *hdev, u8 *data, int size)
{
        int valid_size = size;
        /*
         * On the Dell XPS 13 9333, the bus sometimes get confused and fills
         * the report with a sentinel value "ff". Synaptics told us that such
         * behavior does not comes from the touchpad itself, so we filter out
         * such reports here.
         */

        while ((data[valid_size - 1] == 0xff) && valid_size > 0)
                valid_size--;

        return valid_size;
}

static int rmi_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct rmi_data *hdata = hid_get_drvdata(hdev);

        if (!(hdata->device_flags & RMI_DEVICE))
                return 0;

        size = rmi_check_sanity(hdev, data, size);
        if (size < 2)
                return 0;

        switch (data[0]) {
        case RMI_READ_DATA_REPORT_ID:
                return rmi_read_data_event(hdev, data, size);
        case RMI_ATTN_REPORT_ID:
                return rmi_input_event(hdev, data, size);
        default:
                return 1;
        }

        return 0;
}

static int rmi_event(struct hid_device *hdev, struct hid_field *field,
                        struct hid_usage *usage, __s32 value)
{
        struct rmi_data *data = hid_get_drvdata(hdev);

        if ((data->device_flags & RMI_DEVICE) &&
            (field->application == HID_GD_POINTER ||
            field->application == HID_GD_MOUSE)) {
                if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) {
                        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON)
                                return 0;

                        if ((usage->hid == HID_GD_X || usage->hid == HID_GD_Y)
                            && !value)
                                return 1;
                }

                schedule_work(&data->reset_work);
                return 1;
        }

        return 0;
}

static void rmi_report(struct hid_device *hid, struct hid_report *report)
{
        struct hid_field *field = report->field[0];

        if (!(hid->claimed & HID_CLAIMED_INPUT))
                return;

        switch (report->id) {
        case RMI_READ_DATA_REPORT_ID:
        case RMI_ATTN_REPORT_ID:
                return;
        }

        if (field && field->hidinput && field->hidinput->input)
                input_sync(field->hidinput->input);
}

static int rmi_suspend(struct hid_device *hdev, pm_message_t message)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        struct rmi_device *rmi_dev = data->xport.rmi_dev;
        int ret;

        if (!(data->device_flags & RMI_DEVICE))
                return 0;

        ret = rmi_driver_suspend(rmi_dev, false);
        if (ret) {
                hid_warn(hdev, "Failed to suspend device: %d\n", ret);
                return ret;
        }

        return 0;
}

static int rmi_post_resume(struct hid_device *hdev)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        struct rmi_device *rmi_dev = data->xport.rmi_dev;
        int ret;

        if (!(data->device_flags & RMI_DEVICE))
                return 0;

        /* Make sure the HID device is ready to receive events */
        ret = hid_hw_open(hdev);
        if (ret)
                return ret;

        ret = rmi_reset_attn_mode(hdev);
        if (ret)
                goto out;

        ret = rmi_driver_resume(rmi_dev, false);
        if (ret) {
                hid_warn(hdev, "Failed to resume device: %d\n", ret);
                goto out;
        }

out:
        hid_hw_close(hdev);
        return ret;
}

static int rmi_hid_reset(struct rmi_transport_dev *xport, u16 reset_addr)
{
        struct rmi_data *data = container_of(xport, struct rmi_data, xport);
        struct hid_device *hdev = data->hdev;

        return rmi_reset_attn_mode(hdev);
}

static int rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
{
        struct rmi_data *data = hid_get_drvdata(hdev);
        struct input_dev *input = hi->input;
        int ret = 0;

        if (!(data->device_flags & RMI_DEVICE))
                return 0;

        data->xport.input = input;

        hid_dbg(hdev, "Opening low level driver\n");
        ret = hid_hw_open(hdev);
        if (ret)
                return ret;

        /* Allow incoming hid reports */
        hid_device_io_start(hdev);

        ret = rmi_set_mode(hdev, RMI_MODE_ATTN_REPORTS);
        if (ret < 0) {
                dev_err(&hdev->dev, "failed to set rmi mode\n");
                goto exit;
        }

        ret = rmi_set_page(hdev, 0);
        if (ret < 0) {
                dev_err(&hdev->dev, "failed to set page select to 0.\n");
                goto exit;
        }

        ret = rmi_register_transport_device(&data->xport);
        if (ret < 0) {
                dev_err(&hdev->dev, "failed to register transport driver\n");
                goto exit;
        }

        set_bit(RMI_STARTED, &data->flags);

exit:
        hid_device_io_stop(hdev);
        hid_hw_close(hdev);
        return ret;
}

static int rmi_input_mapping(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        struct rmi_data *data = hid_get_drvdata(hdev);

        /*
         * we want to make HID ignore the advertised HID collection
         * for RMI deivces
         */
        if (data->device_flags & RMI_DEVICE) {
                if ((data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) &&
                    ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON))
                        return 0;

                return -1;
        }

        return 0;
}

static int rmi_check_valid_report_id(struct hid_device *hdev, unsigned type,
                unsigned id, struct hid_report **report)
{
        int i;

        *report = hdev->report_enum[type].report_id_hash[id];
        if (*report) {
                for (i = 0; i < (*report)->maxfield; i++) {
                        unsigned app = (*report)->field[i]->application;
                        if ((app & HID_USAGE_PAGE) >= HID_UP_MSVENDOR)
                                return 1;
                }
        }

        return 0;
}

static struct rmi_device_platform_data rmi_hid_pdata = {
        .sensor_pdata = {
                .sensor_type = rmi_sensor_touchpad,
                .axis_align.flip_y = true,
                .dribble = RMI_REG_STATE_ON,
                .palm_detect = RMI_REG_STATE_OFF,
        },
};

static const struct rmi_transport_ops hid_rmi_ops = {
        .write_block        = rmi_hid_write_block,
        .read_block        = rmi_hid_read_block,
        .reset                = rmi_hid_reset,
};

static void rmi_irq_teardown(void *data)
{
        struct rmi_data *hdata = data;
        struct irq_domain *domain = hdata->domain;

        if (!domain)
                return;

        irq_dispose_mapping(irq_find_mapping(domain, 0));

        irq_domain_remove(domain);
        hdata->domain = NULL;
        hdata->rmi_irq = 0;
}

static int rmi_irq_map(struct irq_domain *h, unsigned int virq,
                       irq_hw_number_t hw_irq_num)
{
        irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);

        return 0;
}

static const struct irq_domain_ops rmi_irq_ops = {
        .map = rmi_irq_map,
};

static int rmi_setup_irq_domain(struct hid_device *hdev)
{
        struct rmi_data *hdata = hid_get_drvdata(hdev);
        int ret;

        hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1,
                                                 &rmi_irq_ops, hdata);
        if (!hdata->domain)
                return -ENOMEM;

        ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata);
        if (ret)
                return ret;

        hdata->rmi_irq = irq_create_mapping(hdata->domain, 0);
        if (hdata->rmi_irq <= 0) {
                hid_err(hdev, "Can't allocate an IRQ\n");
                return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO;
        }

        return 0;
}

static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct rmi_data *data = NULL;
        int ret;
        size_t alloc_size;
        struct hid_report *input_report;
        struct hid_report *output_report;
        struct hid_report *feature_report;

        data = devm_kzalloc(&hdev->dev, sizeof(struct rmi_data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        INIT_WORK(&data->reset_work, rmi_reset_work);
        data->hdev = hdev;

        hid_set_drvdata(hdev, data);

        hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS;
        hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                return ret;
        }

        if (id->driver_data)
                data->device_flags = id->driver_data;

        /*
         * Check for the RMI specific report ids. If they are misisng
         * simply return and let the events be processed by hid-input
         */
        if (!rmi_check_valid_report_id(hdev, HID_FEATURE_REPORT,
            RMI_SET_RMI_MODE_REPORT_ID, &feature_report)) {
                hid_dbg(hdev, "device does not have set mode feature report\n");
                goto start;
        }

        if (!rmi_check_valid_report_id(hdev, HID_INPUT_REPORT,
            RMI_ATTN_REPORT_ID, &input_report)) {
                hid_dbg(hdev, "device does not have attention input report\n");
                goto start;
        }

        data->input_report_size = hid_report_len(input_report);

        if (!rmi_check_valid_report_id(hdev, HID_OUTPUT_REPORT,
            RMI_WRITE_REPORT_ID, &output_report)) {
                hid_dbg(hdev,
                        "device does not have rmi write output report\n");
                goto start;
        }

        data->output_report_size = hid_report_len(output_report);

        data->device_flags |= RMI_DEVICE;
        alloc_size = data->output_report_size + data->input_report_size;

        data->writeReport = devm_kzalloc(&hdev->dev, alloc_size, GFP_KERNEL);
        if (!data->writeReport) {
                hid_err(hdev, "failed to allocate buffer for HID reports\n");
                return -ENOMEM;
        }

        data->readReport = data->writeReport + data->output_report_size;

        init_waitqueue_head(&data->wait);

        mutex_init(&data->page_mutex);

        ret = rmi_setup_irq_domain(hdev);
        if (ret) {
                hid_err(hdev, "failed to allocate IRQ domain\n");
                return ret;
        }

        if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS)
                rmi_hid_pdata.gpio_data.disable = true;

        data->xport.dev = hdev->dev.parent;
        data->xport.pdata = rmi_hid_pdata;
        data->xport.pdata.irq = data->rmi_irq;
        data->xport.proto_name = "hid";
        data->xport.ops = &hid_rmi_ops;

start:
        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                return ret;
        }

        return 0;
}

static void rmi_remove(struct hid_device *hdev)
{
        struct rmi_data *hdata = hid_get_drvdata(hdev);

        if ((hdata->device_flags & RMI_DEVICE)
            && test_bit(RMI_STARTED, &hdata->flags)) {
                clear_bit(RMI_STARTED, &hdata->flags);
                cancel_work_sync(&hdata->reset_work);
                rmi_unregister_transport_device(&hdata->xport);
        }

        hid_hw_stop(hdev);
}

static const struct hid_device_id rmi_id[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14),
                .driver_data = RMI_DEVICE_HAS_PHYS_BUTTONS },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
        { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_REZEL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5),
                .driver_data = RMI_DEVICE_OUTPUT_SET_REPORT },
        { HID_DEVICE(HID_BUS_ANY, HID_GROUP_RMI, HID_ANY_ID, HID_ANY_ID) },
        { }
};
MODULE_DEVICE_TABLE(hid, rmi_id);

static struct hid_driver rmi_driver = {
        .name = "hid-rmi",
        .id_table                = rmi_id,
        .probe                        = rmi_probe,
        .remove                        = rmi_remove,
        .event                        = rmi_event,
        .raw_event                = rmi_raw_event,
        .report                        = rmi_report,
        .input_mapping                = rmi_input_mapping,
        .input_configured        = rmi_input_configured,
        .suspend                = pm_ptr(rmi_suspend),
        .resume                        = pm_ptr(rmi_post_resume),
        .reset_resume                = pm_ptr(rmi_post_resume),
};

module_hid_driver(rmi_driver);

MODULE_AUTHOR("Andrew Duggan <aduggan@synaptics.com>");
MODULE_DESCRIPTION("RMI HID driver");
MODULE_LICENSE("GPL");
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   12 







   12 




   12 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
// SPDX-License-Identifier: LGPL-2.1-or-later
/*
 * dmxdev.c - DVB demultiplexer device
 *
 * Copyright (C) 2000 Ralph Metzler & Marcus Metzler
 *                      for convergence integrated media GmbH
 */

#define pr_fmt(fmt) "dmxdev: " fmt

#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/ioctl.h>
#include <linux/wait.h>
#include <linux/uaccess.h>
#include <media/dmxdev.h>
#include <media/dvb_vb2.h>

static int debug;

module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off).");

#define dprintk(fmt, arg...) do {                                        \
        if (debug)                                                        \
                printk(KERN_DEBUG pr_fmt("%s: " fmt),                        \
                        __func__, ##arg);                                \
} while (0)

static int dvb_dmxdev_buffer_write(struct dvb_ringbuffer *buf,
                                   const u8 *src, size_t len)
{
        ssize_t free;

        if (!len)
                return 0;
        if (!buf->data)
                return 0;

        free = dvb_ringbuffer_free(buf);
        if (len > free) {
                dprintk("buffer overflow\n");
                return -EOVERFLOW;
        }

        return dvb_ringbuffer_write(buf, src, len);
}

static ssize_t dvb_dmxdev_buffer_read(struct dvb_ringbuffer *src,
                                      int non_blocking, char __user *buf,
                                      size_t count, loff_t *ppos)
{
        size_t todo;
        ssize_t avail;
        ssize_t ret = 0;

        if (!src->data)
                return 0;

        if (src->error) {
                ret = src->error;
                dvb_ringbuffer_flush(src);
                return ret;
        }

        for (todo = count; todo > 0; todo -= ret) {
                if (non_blocking && dvb_ringbuffer_empty(src)) {
                        ret = -EWOULDBLOCK;
                        break;
                }

                ret = wait_event_interruptible(src->queue,
                                               !dvb_ringbuffer_empty(src) ||
                                               (src->error != 0));
                if (ret < 0)
                        break;

                if (src->error) {
                        ret = src->error;
                        dvb_ringbuffer_flush(src);
                        break;
                }

                avail = dvb_ringbuffer_avail(src);
                if (avail > todo)
                        avail = todo;

                ret = dvb_ringbuffer_read_user(src, buf, avail);
                if (ret < 0)
                        break;

                buf += ret;
        }

        return (count - todo) ? (count - todo) : ret;
}

static struct dmx_frontend *get_fe(struct dmx_demux *demux, int type)
{
        struct list_head *head, *pos;

        head = demux->get_frontends(demux);
        if (!head)
                return NULL;
        list_for_each(pos, head)
                if (DMX_FE_ENTRY(pos)->source == type)
                        return DMX_FE_ENTRY(pos);

        return NULL;
}

static int dvb_dvr_open(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        struct dmx_frontend *front;
        bool need_ringbuffer = false;

        dprintk("%s\n", __func__);

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        if (dmxdev->exit) {
                mutex_unlock(&dmxdev->mutex);
                return -ENODEV;
        }

        dmxdev->may_do_mmap = 0;

        /*
         * The logic here is a little tricky due to the ifdef.
         *
         * The ringbuffer is used for both read and mmap.
         *
         * It is not needed, however, on two situations:
         *        - Write devices (access with O_WRONLY);
         *        - For duplex device nodes, opened with O_RDWR.
         */

        if ((file->f_flags & O_ACCMODE) == O_RDONLY)
                need_ringbuffer = true;
        else if ((file->f_flags & O_ACCMODE) == O_RDWR) {
                if (!(dmxdev->capabilities & DMXDEV_CAP_DUPLEX)) {
#ifdef CONFIG_DVB_MMAP
                        dmxdev->may_do_mmap = 1;
                        need_ringbuffer = true;
#else
                        mutex_unlock(&dmxdev->mutex);
                        return -EOPNOTSUPP;
#endif
                }
        }

        if (need_ringbuffer) {
                void *mem;

                if (!dvbdev->readers) {
                        mutex_unlock(&dmxdev->mutex);
                        return -EBUSY;
                }
                mem = vmalloc(DVR_BUFFER_SIZE);
                if (!mem) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ENOMEM;
                }
                dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE);
                if (dmxdev->may_do_mmap)
                        dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr",
                                     file->f_flags & O_NONBLOCK);
                dvbdev->readers--;
        }

        if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
                dmxdev->dvr_orig_fe = dmxdev->demux->frontend;

                if (!dmxdev->demux->write) {
                        mutex_unlock(&dmxdev->mutex);
                        return -EOPNOTSUPP;
                }

                front = get_fe(dmxdev->demux, DMX_MEMORY_FE);

                if (!front) {
                        mutex_unlock(&dmxdev->mutex);
                        return -EINVAL;
                }
                dmxdev->demux->disconnect_frontend(dmxdev->demux);
                dmxdev->demux->connect_frontend(dmxdev->demux, front);
        }
        dvbdev->users++;
        mutex_unlock(&dmxdev->mutex);
        return 0;
}

static int dvb_dvr_release(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;

        mutex_lock(&dmxdev->mutex);

        if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
                dmxdev->demux->disconnect_frontend(dmxdev->demux);
                dmxdev->demux->connect_frontend(dmxdev->demux,
                                                dmxdev->dvr_orig_fe);
        }

        if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
            dmxdev->may_do_mmap) {
                if (dmxdev->may_do_mmap) {
                        if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
                                dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx);
                        dvb_vb2_release(&dmxdev->dvr_vb2_ctx);
                }
                dvbdev->readers++;
                if (dmxdev->dvr_buffer.data) {
                        void *mem = dmxdev->dvr_buffer.data;
                        /*memory barrier*/
                        mb();
                        spin_lock_irq(&dmxdev->lock);
                        dmxdev->dvr_buffer.data = NULL;
                        spin_unlock_irq(&dmxdev->lock);
                        vfree(mem);
                }
        }
        /* TODO */
        dvbdev->users--;
        if (dvbdev->users == 1 && dmxdev->exit == 1) {
                mutex_unlock(&dmxdev->mutex);
                wake_up(&dvbdev->wait_queue);
        } else
                mutex_unlock(&dmxdev->mutex);

        return 0;
}

static ssize_t dvb_dvr_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        int ret;

        if (!dmxdev->demux->write)
                return -EOPNOTSUPP;
        if ((file->f_flags & O_ACCMODE) != O_WRONLY)
                return -EINVAL;
        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        if (dmxdev->exit) {
                mutex_unlock(&dmxdev->mutex);
                return -ENODEV;
        }
        ret = dmxdev->demux->write(dmxdev->demux, buf, count);
        mutex_unlock(&dmxdev->mutex);
        return ret;
}

static ssize_t dvb_dvr_read(struct file *file, char __user *buf, size_t count,
                            loff_t *ppos)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;

        if (dmxdev->exit)
                return -ENODEV;

        return dvb_dmxdev_buffer_read(&dmxdev->dvr_buffer,
                                      file->f_flags & O_NONBLOCK,
                                      buf, count, ppos);
}

static int dvb_dvr_set_buffer_size(struct dmxdev *dmxdev,
                                      unsigned long size)
{
        struct dvb_ringbuffer *buf = &dmxdev->dvr_buffer;
        void *newmem;
        void *oldmem;

        dprintk("%s\n", __func__);

        if (buf->size == size)
                return 0;
        if (!size)
                return -EINVAL;

        newmem = vmalloc(size);
        if (!newmem)
                return -ENOMEM;

        oldmem = buf->data;

        spin_lock_irq(&dmxdev->lock);
        buf->data = newmem;
        buf->size = size;

        /* reset and not flush in case the buffer shrinks */
        dvb_ringbuffer_reset(buf);
        spin_unlock_irq(&dmxdev->lock);

        vfree(oldmem);

        return 0;
}

static inline void dvb_dmxdev_filter_state_set(struct dmxdev_filter
                                               *dmxdevfilter, int state)
{
        spin_lock_irq(&dmxdevfilter->dev->lock);
        dmxdevfilter->state = state;
        spin_unlock_irq(&dmxdevfilter->dev->lock);
}

static int dvb_dmxdev_set_buffer_size(struct dmxdev_filter *dmxdevfilter,
                                      unsigned long size)
{
        struct dvb_ringbuffer *buf = &dmxdevfilter->buffer;
        void *newmem;
        void *oldmem;

        if (buf->size == size)
                return 0;
        if (!size)
                return -EINVAL;
        if (dmxdevfilter->state >= DMXDEV_STATE_GO)
                return -EBUSY;

        newmem = vmalloc(size);
        if (!newmem)
                return -ENOMEM;

        oldmem = buf->data;

        spin_lock_irq(&dmxdevfilter->dev->lock);
        buf->data = newmem;
        buf->size = size;

        /* reset and not flush in case the buffer shrinks */
        dvb_ringbuffer_reset(buf);
        spin_unlock_irq(&dmxdevfilter->dev->lock);

        vfree(oldmem);

        return 0;
}

static void dvb_dmxdev_filter_timeout(struct timer_list *t)
{
        struct dmxdev_filter *dmxdevfilter = from_timer(dmxdevfilter, t, timer);

        dmxdevfilter->buffer.error = -ETIMEDOUT;
        spin_lock_irq(&dmxdevfilter->dev->lock);
        dmxdevfilter->state = DMXDEV_STATE_TIMEDOUT;
        spin_unlock_irq(&dmxdevfilter->dev->lock);
        wake_up(&dmxdevfilter->buffer.queue);
}

static void dvb_dmxdev_filter_timer(struct dmxdev_filter *dmxdevfilter)
{
        struct dmx_sct_filter_params *para = &dmxdevfilter->params.sec;

        del_timer(&dmxdevfilter->timer);
        if (para->timeout) {
                dmxdevfilter->timer.expires =
                    jiffies + 1 + (HZ / 2 + HZ * para->timeout) / 1000;
                add_timer(&dmxdevfilter->timer);
        }
}

static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
                                       const u8 *buffer2, size_t buffer2_len,
                                       struct dmx_section_filter *filter,
                                       u32 *buffer_flags)
{
        struct dmxdev_filter *dmxdevfilter = filter->priv;
        int ret;

        if (!dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx) &&
            dmxdevfilter->buffer.error) {
                wake_up(&dmxdevfilter->buffer.queue);
                return 0;
        }
        spin_lock(&dmxdevfilter->dev->lock);
        if (dmxdevfilter->state != DMXDEV_STATE_GO) {
                spin_unlock(&dmxdevfilter->dev->lock);
                return 0;
        }
        del_timer(&dmxdevfilter->timer);
        dprintk("section callback %*ph\n", 6, buffer1);
        if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx)) {
                ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
                                          buffer1, buffer1_len,
                                          buffer_flags);
                if (ret == buffer1_len)
                        ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx,
                                                  buffer2, buffer2_len,
                                                  buffer_flags);
        } else {
                ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer,
                                              buffer1, buffer1_len);
                if (ret == buffer1_len) {
                        ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer,
                                                      buffer2, buffer2_len);
                }
        }
        if (ret < 0)
                dmxdevfilter->buffer.error = ret;
        if (dmxdevfilter->params.sec.flags & DMX_ONESHOT)
                dmxdevfilter->state = DMXDEV_STATE_DONE;
        spin_unlock(&dmxdevfilter->dev->lock);
        wake_up(&dmxdevfilter->buffer.queue);
        return 0;
}

static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
                                  const u8 *buffer2, size_t buffer2_len,
                                  struct dmx_ts_feed *feed,
                                  u32 *buffer_flags)
{
        struct dmxdev_filter *dmxdevfilter = feed->priv;
        struct dvb_ringbuffer *buffer;
#ifdef CONFIG_DVB_MMAP
        struct dvb_vb2_ctx *ctx;
#endif
        int ret;

        spin_lock(&dmxdevfilter->dev->lock);
        if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) {
                spin_unlock(&dmxdevfilter->dev->lock);
                return 0;
        }

        if (dmxdevfilter->params.pes.output == DMX_OUT_TAP ||
            dmxdevfilter->params.pes.output == DMX_OUT_TSDEMUX_TAP) {
                buffer = &dmxdevfilter->buffer;
#ifdef CONFIG_DVB_MMAP
                ctx = &dmxdevfilter->vb2_ctx;
#endif
        } else {
                buffer = &dmxdevfilter->dev->dvr_buffer;
#ifdef CONFIG_DVB_MMAP
                ctx = &dmxdevfilter->dev->dvr_vb2_ctx;
#endif
        }

        if (dvb_vb2_is_streaming(ctx)) {
                ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len,
                                          buffer_flags);
                if (ret == buffer1_len)
                        ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len,
                                                  buffer_flags);
        } else {
                if (buffer->error) {
                        spin_unlock(&dmxdevfilter->dev->lock);
                        wake_up(&buffer->queue);
                        return 0;
                }
                ret = dvb_dmxdev_buffer_write(buffer, buffer1, buffer1_len);
                if (ret == buffer1_len)
                        ret = dvb_dmxdev_buffer_write(buffer,
                                                      buffer2, buffer2_len);
        }
        if (ret < 0)
                buffer->error = ret;
        spin_unlock(&dmxdevfilter->dev->lock);
        wake_up(&buffer->queue);
        return 0;
}

/* stop feed but only mark the specified filter as stopped (state set) */
static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
{
        struct dmxdev_feed *feed;

        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);

        switch (dmxdevfilter->type) {
        case DMXDEV_TYPE_SEC:
                del_timer(&dmxdevfilter->timer);
                dmxdevfilter->feed.sec->stop_filtering(dmxdevfilter->feed.sec);
                break;
        case DMXDEV_TYPE_PES:
                list_for_each_entry(feed, &dmxdevfilter->feed.ts, next)
                        feed->ts->stop_filtering(feed->ts);
                break;
        default:
                return -EINVAL;
        }
        return 0;
}

/* start feed associated with the specified filter */
static int dvb_dmxdev_feed_start(struct dmxdev_filter *filter)
{
        struct dmxdev_feed *feed;
        int ret;

        dvb_dmxdev_filter_state_set(filter, DMXDEV_STATE_GO);

        switch (filter->type) {
        case DMXDEV_TYPE_SEC:
                return filter->feed.sec->start_filtering(filter->feed.sec);
        case DMXDEV_TYPE_PES:
                list_for_each_entry(feed, &filter->feed.ts, next) {
                        ret = feed->ts->start_filtering(feed->ts);
                        if (ret < 0) {
                                dvb_dmxdev_feed_stop(filter);
                                return ret;
                        }
                }
                break;
        default:
                return -EINVAL;
        }

        return 0;
}

/* restart section feed if it has filters left associated with it,
   otherwise release the feed */
static int dvb_dmxdev_feed_restart(struct dmxdev_filter *filter)
{
        int i;
        struct dmxdev *dmxdev = filter->dev;
        u16 pid = filter->params.sec.pid;

        for (i = 0; i < dmxdev->filternum; i++)
                if (dmxdev->filter[i].state >= DMXDEV_STATE_GO &&
                    dmxdev->filter[i].type == DMXDEV_TYPE_SEC &&
                    dmxdev->filter[i].params.sec.pid == pid) {
                        dvb_dmxdev_feed_start(&dmxdev->filter[i]);
                        return 0;
                }

        filter->dev->demux->release_section_feed(dmxdev->demux,
                                                 filter->feed.sec);

        return 0;
}

static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
{
        struct dmxdev_feed *feed;
        struct dmx_demux *demux;

        if (dmxdevfilter->state < DMXDEV_STATE_GO)
                return 0;

        switch (dmxdevfilter->type) {
        case DMXDEV_TYPE_SEC:
                if (!dmxdevfilter->feed.sec)
                        break;
                dvb_dmxdev_feed_stop(dmxdevfilter);
                if (dmxdevfilter->filter.sec)
                        dmxdevfilter->feed.sec->
                            release_filter(dmxdevfilter->feed.sec,
                                           dmxdevfilter->filter.sec);
                dvb_dmxdev_feed_restart(dmxdevfilter);
                dmxdevfilter->feed.sec = NULL;
                break;
        case DMXDEV_TYPE_PES:
                dvb_dmxdev_feed_stop(dmxdevfilter);
                demux = dmxdevfilter->dev->demux;
                list_for_each_entry(feed, &dmxdevfilter->feed.ts, next) {
                        demux->release_ts_feed(demux, feed->ts);
                        feed->ts = NULL;
                }
                break;
        default:
                if (dmxdevfilter->state == DMXDEV_STATE_ALLOCATED)
                        return 0;
                return -EINVAL;
        }

        dvb_ringbuffer_flush(&dmxdevfilter->buffer);
        return 0;
}

static void dvb_dmxdev_delete_pids(struct dmxdev_filter *dmxdevfilter)
{
        struct dmxdev_feed *feed, *tmp;

        /* delete all PIDs */
        list_for_each_entry_safe(feed, tmp, &dmxdevfilter->feed.ts, next) {
                list_del(&feed->next);
                kfree(feed);
        }

        BUG_ON(!list_empty(&dmxdevfilter->feed.ts));
}

static inline int dvb_dmxdev_filter_reset(struct dmxdev_filter *dmxdevfilter)
{
        if (dmxdevfilter->state < DMXDEV_STATE_SET)
                return 0;

        if (dmxdevfilter->type == DMXDEV_TYPE_PES)
                dvb_dmxdev_delete_pids(dmxdevfilter);

        dmxdevfilter->type = DMXDEV_TYPE_NONE;
        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
        return 0;
}

static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
                                 struct dmxdev_filter *filter,
                                 struct dmxdev_feed *feed)
{
        ktime_t timeout = ktime_set(0, 0);
        struct dmx_pes_filter_params *para = &filter->params.pes;
        enum dmx_output otype;
        int ret;
        int ts_type;
        enum dmx_ts_pes ts_pes;
        struct dmx_ts_feed *tsfeed;

        feed->ts = NULL;
        otype = para->output;

        ts_pes = para->pes_type;

        if (ts_pes < DMX_PES_OTHER)
                ts_type = TS_DECODER;
        else
                ts_type = 0;

        if (otype == DMX_OUT_TS_TAP)
                ts_type |= TS_PACKET;
        else if (otype == DMX_OUT_TSDEMUX_TAP)
                ts_type |= TS_PACKET | TS_DEMUX;
        else if (otype == DMX_OUT_TAP)
                ts_type |= TS_PACKET | TS_DEMUX | TS_PAYLOAD_ONLY;

        ret = dmxdev->demux->allocate_ts_feed(dmxdev->demux, &feed->ts,
                                              dvb_dmxdev_ts_callback);
        if (ret < 0)
                return ret;

        tsfeed = feed->ts;
        tsfeed->priv = filter;

        ret = tsfeed->set(tsfeed, feed->pid, ts_type, ts_pes, timeout);
        if (ret < 0) {
                dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
                return ret;
        }

        ret = tsfeed->start_filtering(tsfeed);
        if (ret < 0) {
                dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
                return ret;
        }

        return 0;
}

static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
{
        struct dmxdev *dmxdev = filter->dev;
        struct dmxdev_feed *feed;
        void *mem;
        int ret, i;

        if (filter->state < DMXDEV_STATE_SET)
                return -EINVAL;

        if (filter->state >= DMXDEV_STATE_GO)
                dvb_dmxdev_filter_stop(filter);

        if (!filter->buffer.data) {
                mem = vmalloc(filter->buffer.size);
                if (!mem)
                        return -ENOMEM;
                spin_lock_irq(&filter->dev->lock);
                filter->buffer.data = mem;
                spin_unlock_irq(&filter->dev->lock);
        }

        dvb_ringbuffer_flush(&filter->buffer);

        switch (filter->type) {
        case DMXDEV_TYPE_SEC:
        {
                struct dmx_sct_filter_params *para = &filter->params.sec;
                struct dmx_section_filter **secfilter = &filter->filter.sec;
                struct dmx_section_feed **secfeed = &filter->feed.sec;

                *secfilter = NULL;
                *secfeed = NULL;


                /* find active filter/feed with same PID */
                for (i = 0; i < dmxdev->filternum; i++) {
                        if (dmxdev->filter[i].state >= DMXDEV_STATE_GO &&
                            dmxdev->filter[i].type == DMXDEV_TYPE_SEC &&
                            dmxdev->filter[i].params.sec.pid == para->pid) {
                                *secfeed = dmxdev->filter[i].feed.sec;
                                break;
                        }
                }

                /* if no feed found, try to allocate new one */
                if (!*secfeed) {
                        ret = dmxdev->demux->allocate_section_feed(dmxdev->demux,
                                                                   secfeed,
                                                                   dvb_dmxdev_section_callback);
                        if (!*secfeed) {
                                pr_err("DVB (%s): could not alloc feed\n",
                                       __func__);
                                return ret;
                        }

                        ret = (*secfeed)->set(*secfeed, para->pid,
                                              (para->flags & DMX_CHECK_CRC) ? 1 : 0);
                        if (ret < 0) {
                                pr_err("DVB (%s): could not set feed\n",
                                       __func__);
                                dvb_dmxdev_feed_restart(filter);
                                return ret;
                        }
                } else {
                        dvb_dmxdev_feed_stop(filter);
                }

                ret = (*secfeed)->allocate_filter(*secfeed, secfilter);
                if (ret < 0) {
                        dvb_dmxdev_feed_restart(filter);
                        filter->feed.sec->start_filtering(*secfeed);
                        dprintk("could not get filter\n");
                        return ret;
                }

                (*secfilter)->priv = filter;

                memcpy(&((*secfilter)->filter_value[3]),
                       &(para->filter.filter[1]), DMX_FILTER_SIZE - 1);
                memcpy(&(*secfilter)->filter_mask[3],
                       &para->filter.mask[1], DMX_FILTER_SIZE - 1);
                memcpy(&(*secfilter)->filter_mode[3],
                       &para->filter.mode[1], DMX_FILTER_SIZE - 1);

                (*secfilter)->filter_value[0] = para->filter.filter[0];
                (*secfilter)->filter_mask[0] = para->filter.mask[0];
                (*secfilter)->filter_mode[0] = para->filter.mode[0];
                (*secfilter)->filter_mask[1] = 0;
                (*secfilter)->filter_mask[2] = 0;

                filter->todo = 0;

                ret = filter->feed.sec->start_filtering(filter->feed.sec);
                if (ret < 0)
                        return ret;

                dvb_dmxdev_filter_timer(filter);
                break;
        }
        case DMXDEV_TYPE_PES:
                list_for_each_entry(feed, &filter->feed.ts, next) {
                        ret = dvb_dmxdev_start_feed(dmxdev, filter, feed);
                        if (ret < 0) {
                                dvb_dmxdev_filter_stop(filter);
                                return ret;
                        }
                }
                break;
        default:
                return -EINVAL;
        }

        dvb_dmxdev_filter_state_set(filter, DMXDEV_STATE_GO);
        return 0;
}

static int dvb_demux_open(struct inode *inode, struct file *file)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        int i;
        struct dmxdev_filter *dmxdevfilter;

        if (!dmxdev->filter)
                return -EINVAL;

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        if (dmxdev->exit) {
                mutex_unlock(&dmxdev->mutex);
                return -ENODEV;
        }

        for (i = 0; i < dmxdev->filternum; i++)
                if (dmxdev->filter[i].state == DMXDEV_STATE_FREE)
                        break;

        if (i == dmxdev->filternum) {
                mutex_unlock(&dmxdev->mutex);
                return -EMFILE;
        }

        dmxdevfilter = &dmxdev->filter[i];
        mutex_init(&dmxdevfilter->mutex);
        file->private_data = dmxdevfilter;

#ifdef CONFIG_DVB_MMAP
        dmxdev->may_do_mmap = 1;
#else
        dmxdev->may_do_mmap = 0;
#endif

        dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
        dvb_vb2_init(&dmxdevfilter->vb2_ctx, "demux_filter",
                     file->f_flags & O_NONBLOCK);
        dmxdevfilter->type = DMXDEV_TYPE_NONE;
        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
        timer_setup(&dmxdevfilter->timer, dvb_dmxdev_filter_timeout, 0);

        dvbdev->users++;

        mutex_unlock(&dmxdev->mutex);
        return 0;
}

static int dvb_dmxdev_filter_free(struct dmxdev *dmxdev,
                                  struct dmxdev_filter *dmxdevfilter)
{
        mutex_lock(&dmxdev->mutex);
        mutex_lock(&dmxdevfilter->mutex);
        if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx))
                dvb_vb2_stream_off(&dmxdevfilter->vb2_ctx);
        dvb_vb2_release(&dmxdevfilter->vb2_ctx);


        dvb_dmxdev_filter_stop(dmxdevfilter);
        dvb_dmxdev_filter_reset(dmxdevfilter);

        if (dmxdevfilter->buffer.data) {
                void *mem = dmxdevfilter->buffer.data;

                spin_lock_irq(&dmxdev->lock);
                dmxdevfilter->buffer.data = NULL;
                spin_unlock_irq(&dmxdev->lock);
                vfree(mem);
        }

        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_FREE);
        wake_up(&dmxdevfilter->buffer.queue);
        mutex_unlock(&dmxdevfilter->mutex);
        mutex_unlock(&dmxdev->mutex);
        return 0;
}

static inline void invert_mode(struct dmx_filter *filter)
{
        int i;

        for (i = 0; i < DMX_FILTER_SIZE; i++)
                filter->mode[i] ^= 0xff;
}

static int dvb_dmxdev_add_pid(struct dmxdev *dmxdev,
                              struct dmxdev_filter *filter, u16 pid)
{
        struct dmxdev_feed *feed;

        if ((filter->type != DMXDEV_TYPE_PES) ||
            (filter->state < DMXDEV_STATE_SET))
                return -EINVAL;

        /* only TS packet filters may have multiple PIDs */
        if ((filter->params.pes.output != DMX_OUT_TSDEMUX_TAP) &&
            (!list_empty(&filter->feed.ts)))
                return -EINVAL;

        feed = kzalloc(sizeof(struct dmxdev_feed), GFP_KERNEL);
        if (feed == NULL)
                return -ENOMEM;

        feed->pid = pid;
        list_add(&feed->next, &filter->feed.ts);

        if (filter->state >= DMXDEV_STATE_GO)
                return dvb_dmxdev_start_feed(dmxdev, filter, feed);

        return 0;
}

static int dvb_dmxdev_remove_pid(struct dmxdev *dmxdev,
                                  struct dmxdev_filter *filter, u16 pid)
{
        struct dmxdev_feed *feed, *tmp;

        if ((filter->type != DMXDEV_TYPE_PES) ||
            (filter->state < DMXDEV_STATE_SET))
                return -EINVAL;

        list_for_each_entry_safe(feed, tmp, &filter->feed.ts, next) {
                if ((feed->pid == pid) && (feed->ts != NULL)) {
                        feed->ts->stop_filtering(feed->ts);
                        filter->dev->demux->release_ts_feed(filter->dev->demux,
                                                            feed->ts);
                        list_del(&feed->next);
                        kfree(feed);
                }
        }

        return 0;
}

static int dvb_dmxdev_filter_set(struct dmxdev *dmxdev,
                                 struct dmxdev_filter *dmxdevfilter,
                                 struct dmx_sct_filter_params *params)
{
        dprintk("%s: PID=0x%04x, flags=%02x, timeout=%d\n",
                __func__, params->pid, params->flags, params->timeout);

        dvb_dmxdev_filter_stop(dmxdevfilter);

        dmxdevfilter->type = DMXDEV_TYPE_SEC;
        memcpy(&dmxdevfilter->params.sec,
               params, sizeof(struct dmx_sct_filter_params));
        invert_mode(&dmxdevfilter->params.sec.filter);
        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);

        if (params->flags & DMX_IMMEDIATE_START)
                return dvb_dmxdev_filter_start(dmxdevfilter);

        return 0;
}

static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
                                     struct dmxdev_filter *dmxdevfilter,
                                     struct dmx_pes_filter_params *params)
{
        int ret;

        dvb_dmxdev_filter_stop(dmxdevfilter);
        dvb_dmxdev_filter_reset(dmxdevfilter);

        if ((unsigned int)params->pes_type > DMX_PES_OTHER)
                return -EINVAL;

        dmxdevfilter->type = DMXDEV_TYPE_PES;
        memcpy(&dmxdevfilter->params, params,
               sizeof(struct dmx_pes_filter_params));
        INIT_LIST_HEAD(&dmxdevfilter->feed.ts);

        dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);

        ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter,
                                 dmxdevfilter->params.pes.pid);
        if (ret < 0)
                return ret;

        if (params->flags & DMX_IMMEDIATE_START)
                return dvb_dmxdev_filter_start(dmxdevfilter);

        return 0;
}

static ssize_t dvb_dmxdev_read_sec(struct dmxdev_filter *dfil,
                                   struct file *file, char __user *buf,
                                   size_t count, loff_t *ppos)
{
        int result, hcount;
        int done = 0;

        if (dfil->todo <= 0) {
                hcount = 3 + dfil->todo;
                if (hcount > count)
                        hcount = count;
                result = dvb_dmxdev_buffer_read(&dfil->buffer,
                                                file->f_flags & O_NONBLOCK,
                                                buf, hcount, ppos);
                if (result < 0) {
                        dfil->todo = 0;
                        return result;
                }
                if (copy_from_user(dfil->secheader - dfil->todo, buf, result))
                        return -EFAULT;
                buf += result;
                done = result;
                count -= result;
                dfil->todo -= result;
                if (dfil->todo > -3)
                        return done;
                dfil->todo = ((dfil->secheader[1] << 8) | dfil->secheader[2]) & 0xfff;
                if (!count)
                        return done;
        }
        if (count > dfil->todo)
                count = dfil->todo;
        result = dvb_dmxdev_buffer_read(&dfil->buffer,
                                        file->f_flags & O_NONBLOCK,
                                        buf, count, ppos);
        if (result < 0)
                return result;
        dfil->todo -= result;
        return (result + done);
}

static ssize_t
dvb_demux_read(struct file *file, char __user *buf, size_t count,
               loff_t *ppos)
{
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        int ret;

        if (mutex_lock_interruptible(&dmxdevfilter->mutex))
                return -ERESTARTSYS;

        if (dmxdevfilter->type == DMXDEV_TYPE_SEC)
                ret = dvb_dmxdev_read_sec(dmxdevfilter, file, buf, count, ppos);
        else
                ret = dvb_dmxdev_buffer_read(&dmxdevfilter->buffer,
                                             file->f_flags & O_NONBLOCK,
                                             buf, count, ppos);

        mutex_unlock(&dmxdevfilter->mutex);
        return ret;
}

static int dvb_demux_do_ioctl(struct file *file,
                              unsigned int cmd, void *parg)
{
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        struct dmxdev *dmxdev = dmxdevfilter->dev;
        unsigned long arg = (unsigned long)parg;
        int ret = 0;

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        switch (cmd) {
        case DMX_START:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                if (dmxdevfilter->state < DMXDEV_STATE_SET)
                        ret = -EINVAL;
                else
                        ret = dvb_dmxdev_filter_start(dmxdevfilter);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_STOP:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_dmxdev_filter_stop(dmxdevfilter);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_SET_FILTER:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_dmxdev_filter_set(dmxdev, dmxdevfilter, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_SET_PES_FILTER:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_dmxdev_pes_filter_set(dmxdev, dmxdevfilter, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_SET_BUFFER_SIZE:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_dmxdev_set_buffer_size(dmxdevfilter, arg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_GET_PES_PIDS:
                if (!dmxdev->demux->get_pes_pids) {
                        ret = -EINVAL;
                        break;
                }
                dmxdev->demux->get_pes_pids(dmxdev->demux, parg);
                break;

        case DMX_GET_STC:
                if (!dmxdev->demux->get_stc) {
                        ret = -EINVAL;
                        break;
                }
                ret = dmxdev->demux->get_stc(dmxdev->demux,
                                             ((struct dmx_stc *)parg)->num,
                                             &((struct dmx_stc *)parg)->stc,
                                             &((struct dmx_stc *)parg)->base);
                break;

        case DMX_ADD_PID:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        ret = -ERESTARTSYS;
                        break;
                }
                ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_REMOVE_PID:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        ret = -ERESTARTSYS;
                        break;
                }
                ret = dvb_dmxdev_remove_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

#ifdef CONFIG_DVB_MMAP
        case DMX_REQBUFS:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_vb2_reqbufs(&dmxdevfilter->vb2_ctx, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_QUERYBUF:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_vb2_querybuf(&dmxdevfilter->vb2_ctx, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_EXPBUF:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_vb2_expbuf(&dmxdevfilter->vb2_ctx, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_QBUF:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_vb2_qbuf(&dmxdevfilter->vb2_ctx, parg);
                if (ret == 0 && !dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx))
                        ret = dvb_vb2_stream_on(&dmxdevfilter->vb2_ctx);
                mutex_unlock(&dmxdevfilter->mutex);
                break;

        case DMX_DQBUF:
                if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                        mutex_unlock(&dmxdev->mutex);
                        return -ERESTARTSYS;
                }
                ret = dvb_vb2_dqbuf(&dmxdevfilter->vb2_ctx, parg);
                mutex_unlock(&dmxdevfilter->mutex);
                break;
#endif
        default:
                ret = -ENOTTY;
                break;
        }
        mutex_unlock(&dmxdev->mutex);
        return ret;
}

static long dvb_demux_ioctl(struct file *file, unsigned int cmd,
                            unsigned long arg)
{
        return dvb_usercopy(file, cmd, arg, dvb_demux_do_ioctl);
}

static __poll_t dvb_demux_poll(struct file *file, poll_table *wait)
{
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        __poll_t mask = 0;

        poll_wait(file, &dmxdevfilter->buffer.queue, wait);

        if ((!dmxdevfilter) || dmxdevfilter->dev->exit)
                return EPOLLERR;
        if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx))
                return dvb_vb2_poll(&dmxdevfilter->vb2_ctx, file, wait);

        if (dmxdevfilter->state != DMXDEV_STATE_GO &&
            dmxdevfilter->state != DMXDEV_STATE_DONE &&
            dmxdevfilter->state != DMXDEV_STATE_TIMEDOUT)
                return 0;

        if (dmxdevfilter->buffer.error)
                mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);

        if (!dvb_ringbuffer_empty(&dmxdevfilter->buffer))
                mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI);

        return mask;
}

#ifdef CONFIG_DVB_MMAP
static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        struct dmxdev *dmxdev = dmxdevfilter->dev;
        int ret;

        if (!dmxdev->may_do_mmap)
                return -ENOTTY;

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
                mutex_unlock(&dmxdev->mutex);
                return -ERESTARTSYS;
        }
        ret = dvb_vb2_mmap(&dmxdevfilter->vb2_ctx, vma);

        mutex_unlock(&dmxdevfilter->mutex);
        mutex_unlock(&dmxdev->mutex);

        return ret;
}
#endif

static int dvb_demux_release(struct inode *inode, struct file *file)
{
        struct dmxdev_filter *dmxdevfilter = file->private_data;
        struct dmxdev *dmxdev = dmxdevfilter->dev;
        int ret;

        ret = dvb_dmxdev_filter_free(dmxdev, dmxdevfilter);

        mutex_lock(&dmxdev->mutex);
        dmxdev->dvbdev->users--;
        if (dmxdev->dvbdev->users == 1 && dmxdev->exit == 1) {
                mutex_unlock(&dmxdev->mutex);
                wake_up(&dmxdev->dvbdev->wait_queue);
        } else
                mutex_unlock(&dmxdev->mutex);

        return ret;
}

static const struct file_operations dvb_demux_fops = {
        .owner = THIS_MODULE,
        .read = dvb_demux_read,
        .unlocked_ioctl = dvb_demux_ioctl,
        .compat_ioctl = dvb_demux_ioctl,
        .open = dvb_demux_open,
        .release = dvb_demux_release,
        .poll = dvb_demux_poll,
        .llseek = default_llseek,
#ifdef CONFIG_DVB_MMAP
        .mmap = dvb_demux_mmap,
#endif
};

static const struct dvb_device dvbdev_demux = {
        .priv = NULL,
        .users = 1,
        .writers = 1,
#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
        .name = "dvb-demux",
#endif
        .fops = &dvb_demux_fops
};

static int dvb_dvr_do_ioctl(struct file *file,
                            unsigned int cmd, void *parg)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        unsigned long arg = (unsigned long)parg;
        int ret;

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        switch (cmd) {
        case DMX_SET_BUFFER_SIZE:
                ret = dvb_dvr_set_buffer_size(dmxdev, arg);
                break;

#ifdef CONFIG_DVB_MMAP
        case DMX_REQBUFS:
                ret = dvb_vb2_reqbufs(&dmxdev->dvr_vb2_ctx, parg);
                break;

        case DMX_QUERYBUF:
                ret = dvb_vb2_querybuf(&dmxdev->dvr_vb2_ctx, parg);
                break;

        case DMX_EXPBUF:
                ret = dvb_vb2_expbuf(&dmxdev->dvr_vb2_ctx, parg);
                break;

        case DMX_QBUF:
                ret = dvb_vb2_qbuf(&dmxdev->dvr_vb2_ctx, parg);
                if (ret == 0 && !dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
                        ret = dvb_vb2_stream_on(&dmxdev->dvr_vb2_ctx);
                break;

        case DMX_DQBUF:
                ret = dvb_vb2_dqbuf(&dmxdev->dvr_vb2_ctx, parg);
                break;
#endif
        default:
                ret = -ENOTTY;
                break;
        }
        mutex_unlock(&dmxdev->mutex);
        return ret;
}

static long dvb_dvr_ioctl(struct file *file,
                         unsigned int cmd, unsigned long arg)
{
        return dvb_usercopy(file, cmd, arg, dvb_dvr_do_ioctl);
}

static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        __poll_t mask = 0;

        dprintk("%s\n", __func__);

        poll_wait(file, &dmxdev->dvr_buffer.queue, wait);

        if (dmxdev->exit)
                return EPOLLERR;
        if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx))
                return dvb_vb2_poll(&dmxdev->dvr_vb2_ctx, file, wait);

        if (((file->f_flags & O_ACCMODE) == O_RDONLY) ||
            dmxdev->may_do_mmap) {
                if (dmxdev->dvr_buffer.error)
                        mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR);

                if (!dvb_ringbuffer_empty(&dmxdev->dvr_buffer))
                        mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI);
        } else
                mask |= (EPOLLOUT | EPOLLWRNORM | EPOLLPRI);

        return mask;
}

#ifdef CONFIG_DVB_MMAP
static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct dvb_device *dvbdev = file->private_data;
        struct dmxdev *dmxdev = dvbdev->priv;
        int ret;

        if (!dmxdev->may_do_mmap)
                return -ENOTTY;

        if (dmxdev->exit)
                return -ENODEV;

        if (mutex_lock_interruptible(&dmxdev->mutex))
                return -ERESTARTSYS;

        ret = dvb_vb2_mmap(&dmxdev->dvr_vb2_ctx, vma);
        mutex_unlock(&dmxdev->mutex);
        return ret;
}
#endif

static const struct file_operations dvb_dvr_fops = {
        .owner = THIS_MODULE,
        .read = dvb_dvr_read,
        .write = dvb_dvr_write,
        .unlocked_ioctl = dvb_dvr_ioctl,
        .open = dvb_dvr_open,
        .release = dvb_dvr_release,
        .poll = dvb_dvr_poll,
        .llseek = default_llseek,
#ifdef CONFIG_DVB_MMAP
        .mmap = dvb_dvr_mmap,
#endif
};

static const struct dvb_device dvbdev_dvr = {
        .priv = NULL,
        .readers = 1,
        .users = 1,
#if defined(CONFIG_MEDIA_CONTROLLER_DVB)
        .name = "dvb-dvr",
#endif
        .fops = &dvb_dvr_fops
};
int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter)
{
        int i, ret;

        if (dmxdev->demux->open(dmxdev->demux) < 0)
                return -EUSERS;

        dmxdev->filter = vmalloc(array_size(sizeof(struct dmxdev_filter),
                                            dmxdev->filternum));
        if (!dmxdev->filter)
                return -ENOMEM;

        mutex_init(&dmxdev->mutex);
        spin_lock_init(&dmxdev->lock);
        for (i = 0; i < dmxdev->filternum; i++) {
                dmxdev->filter[i].dev = dmxdev;
                dmxdev->filter[i].buffer.data = NULL;
                dvb_dmxdev_filter_state_set(&dmxdev->filter[i],
                                            DMXDEV_STATE_FREE);
        }

        ret = dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev,
                            DVB_DEVICE_DEMUX, dmxdev->filternum);
        if (ret < 0)
                goto err_register_dvbdev;

        ret = dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr,
                            dmxdev, DVB_DEVICE_DVR, dmxdev->filternum);
        if (ret < 0)
                goto err_register_dvr_dvbdev;

        dvb_ringbuffer_init(&dmxdev->dvr_buffer, NULL, 8192);

        return 0;

err_register_dvr_dvbdev:
        dvb_unregister_device(dmxdev->dvbdev);
err_register_dvbdev:
        vfree(dmxdev->filter);
        dmxdev->filter = NULL;
        return ret;
}

EXPORT_SYMBOL(dvb_dmxdev_init);

void dvb_dmxdev_release(struct dmxdev *dmxdev)
{
        mutex_lock(&dmxdev->mutex);
        dmxdev->exit = 1;
        mutex_unlock(&dmxdev->mutex);

        if (dmxdev->dvbdev->users > 1) {
                wait_event(dmxdev->dvbdev->wait_queue,
                                dmxdev->dvbdev->users == 1);
        }
        if (dmxdev->dvr_dvbdev->users > 1) {
                wait_event(dmxdev->dvr_dvbdev->wait_queue,
                                dmxdev->dvr_dvbdev->users == 1);
        }

        dvb_unregister_device(dmxdev->dvbdev);
        dvb_unregister_device(dmxdev->dvr_dvbdev);

        vfree(dmxdev->filter);
        dmxdev->filter = NULL;
        dmxdev->demux->close(dmxdev->demux);
}

EXPORT_SYMBOL(dvb_dmxdev_release);












































































































































































    1 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Force feedback support for ACRUX game controllers
 *
 * From what I have gathered, these devices are mass produced in China
 * by several vendors. They often share the same design as the original
 * Xbox 360 controller.
 *
 * 1a34:0802 "ACRUX USB GAMEPAD 8116"
 *  - tested with an EXEQ EQ-PCU-02090 game controller.
 *
 * Copyright (c) 2010 Sergei Kolzun <x0r@dv-life.ru>
 */

/*
 */

#include <linux/input.h>
#include <linux/slab.h>
#include <linux/hid.h>
#include <linux/module.h>

#include "hid-ids.h"

#ifdef CONFIG_HID_ACRUX_FF

struct axff_device {
        struct hid_report *report;
};

static int axff_play(struct input_dev *dev, void *data, struct ff_effect *effect)
{
        struct hid_device *hid = input_get_drvdata(dev);
        struct axff_device *axff = data;
        struct hid_report *report = axff->report;
        int field_count = 0;
        int left, right;
        int i, j;

        left = effect->u.rumble.strong_magnitude;
        right = effect->u.rumble.weak_magnitude;

        dbg_hid("called with 0x%04x 0x%04x", left, right);

        left = left * 0xff / 0xffff;
        right = right * 0xff / 0xffff;

        for (i = 0; i < report->maxfield; i++) {
                for (j = 0; j < report->field[i]->report_count; j++) {
                        report->field[i]->value[j] =
                                field_count % 2 ? right : left;
                        field_count++;
                }
        }

        dbg_hid("running with 0x%02x 0x%02x", left, right);
        hid_hw_request(hid, axff->report, HID_REQ_SET_REPORT);

        return 0;
}

static int axff_init(struct hid_device *hid)
{
        struct axff_device *axff;
        struct hid_report *report;
        struct hid_input *hidinput;
        struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev;
        int field_count = 0;
        int i, j;
        int error;

        if (list_empty(&hid->inputs)) {
                hid_err(hid, "no inputs found\n");
                return -ENODEV;
        }
        hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
        dev = hidinput->input;

        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
        }

        report = list_first_entry(report_list, struct hid_report, list);
        for (i = 0; i < report->maxfield; i++) {
                for (j = 0; j < report->field[i]->report_count; j++) {
                        report->field[i]->value[j] = 0x00;
                        field_count++;
                }
        }

        if (field_count < 4 && hid->product != 0xf705) {
                hid_err(hid, "not enough fields in the report: %d\n",
                        field_count);
                return -ENODEV;
        }

        axff = kzalloc(sizeof(struct axff_device), GFP_KERNEL);
        if (!axff)
                return -ENOMEM;

        set_bit(FF_RUMBLE, dev->ffbit);

        error = input_ff_create_memless(dev, axff, axff_play);
        if (error)
                goto err_free_mem;

        axff->report = report;
        hid_hw_request(hid, axff->report, HID_REQ_SET_REPORT);

        hid_info(hid, "Force Feedback for ACRUX game controllers by Sergei Kolzun <x0r@dv-life.ru>\n");

        return 0;

err_free_mem:
        kfree(axff);
        return error;
}
#else
static inline int axff_init(struct hid_device *hid)
{
        return 0;
}
#endif

static int ax_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int error;

        dev_dbg(&hdev->dev, "ACRUX HID hardware probe...\n");

        error = hid_parse(hdev);
        if (error) {
                hid_err(hdev, "parse failed\n");
                return error;
        }

        error = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
        if (error) {
                hid_err(hdev, "hw start failed\n");
                return error;
        }

        error = axff_init(hdev);
        if (error) {
                /*
                 * Do not fail device initialization completely as device
                 * may still be partially operable, just warn.
                 */
                hid_warn(hdev,
                         "Failed to enable force feedback support, error: %d\n",
                         error);
        }

        /*
         * We need to start polling device right away, otherwise
         * it will go into a coma.
         */
        error = hid_hw_open(hdev);
        if (error) {
                dev_err(&hdev->dev, "hw open failed\n");
                hid_hw_stop(hdev);
                return error;
        }

        return 0;
}

static void ax_remove(struct hid_device *hdev)
{
        hid_hw_close(hdev);
        hid_hw_stop(hdev);
}

static const struct hid_device_id ax_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802), },
        { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705), },
        { }
};
MODULE_DEVICE_TABLE(hid, ax_devices);

static struct hid_driver ax_driver = {
        .name                = "acrux",
        .id_table        = ax_devices,
        .probe                = ax_probe,
        .remove                = ax_remove,
};
module_hid_driver(ax_driver);

MODULE_AUTHOR("Sergei Kolzun");
MODULE_DESCRIPTION("Force feedback support for ACRUX game controllers");
MODULE_LICENSE("GPL");












































































































































































































































































































































































































    4 
    4 











    4 













    4 
    4 



    4 








    4 


    4 






    4 
    3 





    4 





    4 










































































































































































































































































    1 






    1 









    2 





    2 



    2 


    2 



    1 



    1 




    1 





    1 

















    1 















    1 










    1 









    1 

    1 




































































    2 





    2 



    2 
    2 
    1 


















    2 










    2 

    2 

    2 


    1 



































































    4 






    4 








    3 
    3 






    4 



















































    4 










































































































































































































































































































    4 



    4 




    4 











    4 

    4 


















    3 


    3 

    3 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
// SPDX-License-Identifier: GPL-2.0+
/*
 * usblp.c
 *
 * Copyright (c) 1999 Michael Gee        <michael@linuxspecific.com>
 * Copyright (c) 1999 Pavel Machek        <pavel@ucw.cz>
 * Copyright (c) 2000 Randy Dunlap        <rdunlap@xenotime.net>
 * Copyright (c) 2000 Vojtech Pavlik        <vojtech@suse.cz>
 # Copyright (c) 2001 Pete Zaitcev        <zaitcev@redhat.com>
 # Copyright (c) 2001 David Paschal        <paschal@rcsis.com>
 * Copyright (c) 2006 Oliver Neukum        <oliver@neukum.name>
 *
 * USB Printer Device Class driver for USB printers and printer cables
 *
 * Sponsored by SuSE
 *
 * ChangeLog:
 *        v0.1 - thorough cleaning, URBification, almost a rewrite
 *        v0.2 - some more cleanups
 *        v0.3 - cleaner again, waitqueue fixes
 *        v0.4 - fixes in unidirectional mode
 *        v0.5 - add DEVICE_ID string support
 *        v0.6 - never time out
 *        v0.7 - fixed bulk-IN read and poll (David Paschal)
 *        v0.8 - add devfs support
 *        v0.9 - fix unplug-while-open paths
 *        v0.10- remove sleep_on, fix error on oom (oliver@neukum.org)
 *        v0.11 - add proto_bias option (Pete Zaitcev)
 *        v0.12 - add hpoj.sourceforge.net ioctls (David Paschal)
 *        v0.13 - alloc space for statusbuf (<status> not on stack);
 *                use usb_alloc_coherent() for read buf & write buf;
 *      none  - Maintained in Linux kernel after v0.13
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/signal.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/lp.h>
#include <linux/mutex.h>
#undef DEBUG
#include <linux/usb.h>
#include <linux/usb/ch9.h>
#include <linux/ratelimit.h>

/*
 * Version Information
 */
#define DRIVER_AUTHOR "Michael Gee, Pavel Machek, Vojtech Pavlik, Randy Dunlap, Pete Zaitcev, David Paschal"
#define DRIVER_DESC "USB Printer Device Class driver"

#define USBLP_BUF_SIZE                8192
#define USBLP_BUF_SIZE_IN        1024
#define USBLP_DEVICE_ID_SIZE        1024

/* ioctls: */
#define IOCNR_GET_DEVICE_ID                1
#define IOCNR_GET_PROTOCOLS                2
#define IOCNR_SET_PROTOCOL                3
#define IOCNR_HP_SET_CHANNEL                4
#define IOCNR_GET_BUS_ADDRESS                5
#define IOCNR_GET_VID_PID                6
#define IOCNR_SOFT_RESET                7
/* Get device_id string: */
#define LPIOC_GET_DEVICE_ID(len) _IOC(_IOC_READ, 'P', IOCNR_GET_DEVICE_ID, len)
/* The following ioctls were added for http://hpoj.sourceforge.net:
 * Get two-int array:
 * [0]=current protocol
 *     (1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2,
 *         3=USB_CLASS_PRINTER/1/3),
 * [1]=supported protocol mask (mask&(1<<n)!=0 means
 *     USB_CLASS_PRINTER/1/n supported):
 */
#define LPIOC_GET_PROTOCOLS(len) _IOC(_IOC_READ, 'P', IOCNR_GET_PROTOCOLS, len)
/*
 * Set protocol
 *     (arg: 1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2,
 *         3=USB_CLASS_PRINTER/1/3):
 */
#define LPIOC_SET_PROTOCOL _IOC(_IOC_WRITE, 'P', IOCNR_SET_PROTOCOL, 0)
/* Set channel number (HP Vendor-specific command): */
#define LPIOC_HP_SET_CHANNEL _IOC(_IOC_WRITE, 'P', IOCNR_HP_SET_CHANNEL, 0)
/* Get two-int array: [0]=bus number, [1]=device address: */
#define LPIOC_GET_BUS_ADDRESS(len) _IOC(_IOC_READ, 'P', IOCNR_GET_BUS_ADDRESS, len)
/* Get two-int array: [0]=vendor ID, [1]=product ID: */
#define LPIOC_GET_VID_PID(len) _IOC(_IOC_READ, 'P', IOCNR_GET_VID_PID, len)
/* Perform class specific soft reset */
#define LPIOC_SOFT_RESET _IOC(_IOC_NONE, 'P', IOCNR_SOFT_RESET, 0);

/*
 * A DEVICE_ID string may include the printer's serial number.
 * It should end with a semi-colon (';').
 * An example from an HP 970C DeskJet printer is (this is one long string,
 * with the serial number changed):
MFG:HEWLETT-PACKARD;MDL:DESKJET 970C;CMD:MLC,PCL,PML;CLASS:PRINTER;DESCRIPTION:Hewlett-Packard DeskJet 970C;SERN:US970CSEPROF;VSTATUS:$HB0$NC0,ff,DN,IDLE,CUT,K1,C0,DP,NR,KP000,CP027;VP:0800,FL,B0;VJ:                    ;
 */

/*
 * USB Printer Requests
 */

#define USBLP_REQ_GET_ID                        0x00
#define USBLP_REQ_GET_STATUS                        0x01
#define USBLP_REQ_RESET                                0x02
#define USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST        0x00        /* HP Vendor-specific */

#define USBLP_MINORS                16
#define USBLP_MINOR_BASE        0

#define USBLP_CTL_TIMEOUT        5000                        /* 5 seconds */

#define USBLP_FIRST_PROTOCOL        1
#define USBLP_LAST_PROTOCOL        3
#define USBLP_MAX_PROTOCOLS        (USBLP_LAST_PROTOCOL+1)

/*
 * some arbitrary status buffer size;
 * need a status buffer that is allocated via kmalloc(), not on stack
 */
#define STATUS_BUF_SIZE                8

/*
 * Locks down the locking order:
 * ->wmut locks wstatus.
 * ->mut locks the whole usblp, except [rw]complete, and thus, by indirection,
 * [rw]status. We only touch status when we know the side idle.
 * ->lock locks what interrupt accesses.
 */
struct usblp {
        struct usb_device        *dev;                        /* USB device */
        struct mutex                wmut;
        struct mutex                mut;
        spinlock_t                lock;                /* locks rcomplete, wcomplete */
        char                        *readbuf;                /* read transfer_buffer */
        char                        *statusbuf;                /* status transfer_buffer */
        struct usb_anchor        urbs;
        wait_queue_head_t        rwait, wwait;
        int                        readcount;                /* Counter for reads */
        int                        ifnum;                        /* Interface number */
        struct usb_interface        *intf;                        /* The interface */
        /*
         * Alternate-setting numbers and endpoints for each protocol
         * (USB_CLASS_PRINTER/1/{index=1,2,3}) that the device supports:
         */
        struct {
                int                                alt_setting;
                struct usb_endpoint_descriptor        *epwrite;
                struct usb_endpoint_descriptor        *epread;
        }                        protocol[USBLP_MAX_PROTOCOLS];
        int                        current_protocol;
        int                        minor;                        /* minor number of device */
        int                        wcomplete, rcomplete;
        int                        wstatus;        /* bytes written or error */
        int                        rstatus;        /* bytes ready or error */
        unsigned int                quirks;                        /* quirks flags */
        unsigned int                flags;                        /* mode flags */
        unsigned char                used;                        /* True if open */
        unsigned char                present;                /* True if not disconnected */
        unsigned char                bidir;                        /* interface is bidirectional */
        unsigned char                no_paper;                /* Paper Out happened */
        unsigned char                *device_id_string;        /* IEEE 1284 DEVICE ID string (ptr) */
                                                        /* first 2 bytes are (big-endian) length */
};

#ifdef DEBUG
static void usblp_dump(struct usblp *usblp)
{
        struct device *dev = &usblp->intf->dev;
        int p;

        dev_dbg(dev, "usblp=0x%p\n", usblp);
        dev_dbg(dev, "dev=0x%p\n", usblp->dev);
        dev_dbg(dev, "present=%d\n", usblp->present);
        dev_dbg(dev, "readbuf=0x%p\n", usblp->readbuf);
        dev_dbg(dev, "readcount=%d\n", usblp->readcount);
        dev_dbg(dev, "ifnum=%d\n", usblp->ifnum);
        for (p = USBLP_FIRST_PROTOCOL; p <= USBLP_LAST_PROTOCOL; p++) {
                dev_dbg(dev, "protocol[%d].alt_setting=%d\n", p,
                        usblp->protocol[p].alt_setting);
                dev_dbg(dev, "protocol[%d].epwrite=%p\n", p,
                        usblp->protocol[p].epwrite);
                dev_dbg(dev, "protocol[%d].epread=%p\n", p,
                        usblp->protocol[p].epread);
        }
        dev_dbg(dev, "current_protocol=%d\n", usblp->current_protocol);
        dev_dbg(dev, "minor=%d\n", usblp->minor);
        dev_dbg(dev, "wstatus=%d\n", usblp->wstatus);
        dev_dbg(dev, "rstatus=%d\n", usblp->rstatus);
        dev_dbg(dev, "quirks=%d\n", usblp->quirks);
        dev_dbg(dev, "used=%d\n", usblp->used);
        dev_dbg(dev, "bidir=%d\n", usblp->bidir);
        dev_dbg(dev, "device_id_string=\"%s\"\n",
                usblp->device_id_string ?
                        usblp->device_id_string + 2 :
                        (unsigned char *)"(null)");
}
#endif

/* Quirks: various printer quirks are handled by this table & its flags. */

struct quirk_printer_struct {
        __u16 vendorId;
        __u16 productId;
        unsigned int quirks;
};

#define USBLP_QUIRK_BIDIR        0x1        /* reports bidir but requires unidirectional mode (no INs/reads) */
#define USBLP_QUIRK_USB_INIT        0x2        /* needs vendor USB init string */
#define USBLP_QUIRK_BAD_CLASS        0x4        /* descriptor uses vendor-specific Class or SubClass */

static const struct quirk_printer_struct quirk_printers[] = {
        { 0x03f0, 0x0004, USBLP_QUIRK_BIDIR }, /* HP DeskJet 895C */
        { 0x03f0, 0x0104, USBLP_QUIRK_BIDIR }, /* HP DeskJet 880C */
        { 0x03f0, 0x0204, USBLP_QUIRK_BIDIR }, /* HP DeskJet 815C */
        { 0x03f0, 0x0304, USBLP_QUIRK_BIDIR }, /* HP DeskJet 810C/812C */
        { 0x03f0, 0x0404, USBLP_QUIRK_BIDIR }, /* HP DeskJet 830C */
        { 0x03f0, 0x0504, USBLP_QUIRK_BIDIR }, /* HP DeskJet 885C */
        { 0x03f0, 0x0604, USBLP_QUIRK_BIDIR }, /* HP DeskJet 840C */
        { 0x03f0, 0x0804, USBLP_QUIRK_BIDIR }, /* HP DeskJet 816C */
        { 0x03f0, 0x1104, USBLP_QUIRK_BIDIR }, /* HP Deskjet 959C */
        { 0x0409, 0xefbe, USBLP_QUIRK_BIDIR }, /* NEC Picty900 (HP OEM) */
        { 0x0409, 0xbef4, USBLP_QUIRK_BIDIR }, /* NEC Picty760 (HP OEM) */
        { 0x0409, 0xf0be, USBLP_QUIRK_BIDIR }, /* NEC Picty920 (HP OEM) */
        { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */
        { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut <kernel@zut.de> */
        { 0x04f9, 0x000d, USBLP_QUIRK_BIDIR }, /* Brother Industries, Ltd HL-1440 Laser Printer */
        { 0x04b8, 0x0202, USBLP_QUIRK_BAD_CLASS }, /* Seiko Epson Receipt Printer M129C */
        { 0, 0 }
};

static int usblp_wwait(struct usblp *usblp, int nonblock);
static int usblp_wtest(struct usblp *usblp, int nonblock);
static int usblp_rwait_and_lock(struct usblp *usblp, int nonblock);
static int usblp_rtest(struct usblp *usblp, int nonblock);
static int usblp_submit_read(struct usblp *usblp);
static int usblp_select_alts(struct usblp *usblp);
static int usblp_set_protocol(struct usblp *usblp, int protocol);
static int usblp_cache_device_id_string(struct usblp *usblp);

/* forward reference to make our lives easier */
static struct usb_driver usblp_driver;
static DEFINE_MUTEX(usblp_mutex);        /* locks the existence of usblp's */

/*
 * Functions for usblp control messages.
 */

static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, int recip, int value, void *buf, int len)
{
        int retval;
        int index = usblp->ifnum;

        /* High byte has the interface index.
           Low byte has the alternate setting.
         */
        if ((request == USBLP_REQ_GET_ID) && (type == USB_TYPE_CLASS))
                index = (usblp->ifnum<<8)|usblp->protocol[usblp->current_protocol].alt_setting;

        retval = usb_control_msg(usblp->dev,
                dir ? usb_rcvctrlpipe(usblp->dev, 0) : usb_sndctrlpipe(usblp->dev, 0),
                request, type | dir | recip, value, index, buf, len, USBLP_CTL_TIMEOUT);
        dev_dbg(&usblp->intf->dev,
                "usblp_control_msg: rq: 0x%02x dir: %d recip: %d value: %d idx: %d len: %#x result: %d\n",
                request, !!dir, recip, value, index, len, retval);
        return retval < 0 ? retval : 0;
}

#define usblp_read_status(usblp, status)\
        usblp_ctrl_msg(usblp, USBLP_REQ_GET_STATUS, USB_TYPE_CLASS, USB_DIR_IN, USB_RECIP_INTERFACE, 0, status, 1)
#define usblp_get_id(usblp, config, id, maxlen)\
        usblp_ctrl_msg(usblp, USBLP_REQ_GET_ID, USB_TYPE_CLASS, USB_DIR_IN, USB_RECIP_INTERFACE, config, id, maxlen)
#define usblp_reset(usblp)\
        usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0)

static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel)
{
        u8 *buf;
        int ret;

        buf = kzalloc(1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST,
                        USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE,
                        channel, buf, 1);
        if (ret == 0)
                *new_channel = buf[0];

        kfree(buf);

        return ret;
}

/*
 * See the description for usblp_select_alts() below for the usage
 * explanation.  Look into your /sys/kernel/debug/usb/devices and dmesg in
 * case of any trouble.
 */
static int proto_bias = -1;

/*
 * URB callback.
 */

static void usblp_bulk_read(struct urb *urb)
{
        struct usblp *usblp = urb->context;
        int status = urb->status;
        unsigned long flags;

        if (usblp->present && usblp->used) {
                if (status)
                        printk(KERN_WARNING "usblp%d: "
                            "nonzero read bulk status received: %d\n",
                            usblp->minor, status);
        }
        spin_lock_irqsave(&usblp->lock, flags);
        if (status < 0)
                usblp->rstatus = status;
        else
                usblp->rstatus = urb->actual_length;
        usblp->rcomplete = 1;
        wake_up(&usblp->rwait);
        spin_unlock_irqrestore(&usblp->lock, flags);

        usb_free_urb(urb);
}

static void usblp_bulk_write(struct urb *urb)
{
        struct usblp *usblp = urb->context;
        int status = urb->status;
        unsigned long flags;

        if (usblp->present && usblp->used) {
                if (status)
                        printk(KERN_WARNING "usblp%d: "
                            "nonzero write bulk status received: %d\n",
                            usblp->minor, status);
        }
        spin_lock_irqsave(&usblp->lock, flags);
        if (status < 0)
                usblp->wstatus = status;
        else
                usblp->wstatus = urb->actual_length;
        usblp->no_paper = 0;
        usblp->wcomplete = 1;
        wake_up(&usblp->wwait);
        spin_unlock_irqrestore(&usblp->lock, flags);

        usb_free_urb(urb);
}

/*
 * Get and print printer errors.
 */

static const char *usblp_messages[] = { "ok", "out of paper", "off-line", "on fire" };

static int usblp_check_status(struct usblp *usblp, int err)
{
        unsigned char status, newerr = 0;
        int error;

        mutex_lock(&usblp->mut);
        if ((error = usblp_read_status(usblp, usblp->statusbuf)) < 0) {
                mutex_unlock(&usblp->mut);
                printk_ratelimited(KERN_ERR
                                "usblp%d: error %d reading printer status\n",
                                usblp->minor, error);
                return 0;
        }
        status = *usblp->statusbuf;
        mutex_unlock(&usblp->mut);

        if (~status & LP_PERRORP)
                newerr = 3;
        if (status & LP_POUTPA)
                newerr = 1;
        if (~status & LP_PSELECD)
                newerr = 2;

        if (newerr != err) {
                printk(KERN_INFO "usblp%d: %s\n",
                   usblp->minor, usblp_messages[newerr]);
        }

        return newerr;
}

static int handle_bidir(struct usblp *usblp)
{
        if (usblp->bidir && usblp->used) {
                if (usblp_submit_read(usblp) < 0)
                        return -EIO;
        }
        return 0;
}

/*
 * File op functions.
 */

static int usblp_open(struct inode *inode, struct file *file)
{
        int minor = iminor(inode);
        struct usblp *usblp;
        struct usb_interface *intf;
        int retval;

        if (minor < 0)
                return -ENODEV;

        mutex_lock(&usblp_mutex);

        retval = -ENODEV;
        intf = usb_find_interface(&usblp_driver, minor);
        if (!intf)
                goto out;
        usblp = usb_get_intfdata(intf);
        if (!usblp || !usblp->dev || !usblp->present)
                goto out;

        retval = -EBUSY;
        if (usblp->used)
                goto out;

        /*
         * We do not implement LP_ABORTOPEN/LPABORTOPEN for two reasons:
         *  - We do not want persistent state which close(2) does not clear
         *  - It is not used anyway, according to CUPS people
         */

        retval = usb_autopm_get_interface(intf);
        if (retval < 0)
                goto out;
        usblp->used = 1;
        file->private_data = usblp;

        usblp->wcomplete = 1; /* we begin writeable */
        usblp->wstatus = 0;
        usblp->rcomplete = 0;

        if (handle_bidir(usblp) < 0) {
                usb_autopm_put_interface(intf);
                usblp->used = 0;
                file->private_data = NULL;
                retval = -EIO;
        }
out:
        mutex_unlock(&usblp_mutex);
        return retval;
}

static void usblp_cleanup(struct usblp *usblp)
{
        printk(KERN_INFO "usblp%d: removed\n", usblp->minor);

        kfree(usblp->readbuf);
        kfree(usblp->device_id_string);
        kfree(usblp->statusbuf);
        usb_put_intf(usblp->intf);
        kfree(usblp);
}

static void usblp_unlink_urbs(struct usblp *usblp)
{
        usb_kill_anchored_urbs(&usblp->urbs);
}

static int usblp_release(struct inode *inode, struct file *file)
{
        struct usblp *usblp = file->private_data;

        usblp->flags &= ~LP_ABORT;

        mutex_lock(&usblp_mutex);
        usblp->used = 0;
        if (usblp->present)
                usblp_unlink_urbs(usblp);

        usb_autopm_put_interface(usblp->intf);

        if (!usblp->present)                /* finish cleanup from disconnect */
                usblp_cleanup(usblp);        /* any URBs must be dead */

        mutex_unlock(&usblp_mutex);
        return 0;
}

/* No kernel lock - fine */
static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait)
{
        struct usblp *usblp = file->private_data;
        __poll_t ret = 0;
        unsigned long flags;

        /* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */
        poll_wait(file, &usblp->rwait, wait);
        poll_wait(file, &usblp->wwait, wait);

        mutex_lock(&usblp->mut);
        if (!usblp->present)
                ret |= EPOLLHUP;
        mutex_unlock(&usblp->mut);

        spin_lock_irqsave(&usblp->lock, flags);
        if (usblp->bidir && usblp->rcomplete)
                ret |= EPOLLIN  | EPOLLRDNORM;
        if (usblp->no_paper || usblp->wcomplete)
                ret |= EPOLLOUT | EPOLLWRNORM;
        spin_unlock_irqrestore(&usblp->lock, flags);
        return ret;
}

static long usblp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct usblp *usblp = file->private_data;
        int length, err, i;
        unsigned char newChannel;
        int status;
        int twoints[2];
        int retval = 0;

        mutex_lock(&usblp->mut);
        if (!usblp->present) {
                retval = -ENODEV;
                goto done;
        }

        dev_dbg(&usblp->intf->dev,
                "usblp_ioctl: cmd=0x%x (%c nr=%d len=%d dir=%d)\n", cmd,
                _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd), _IOC_DIR(cmd));

        if (_IOC_TYPE(cmd) == 'P')        /* new-style ioctl number */

                switch (_IOC_NR(cmd)) {

                case IOCNR_GET_DEVICE_ID: /* get the DEVICE_ID string */
                        if (_IOC_DIR(cmd) != _IOC_READ) {
                                retval = -EINVAL;
                                goto done;
                        }

                        length = usblp_cache_device_id_string(usblp);
                        if (length < 0) {
                                retval = length;
                                goto done;
                        }
                        if (length > _IOC_SIZE(cmd))
                                length = _IOC_SIZE(cmd); /* truncate */

                        if (copy_to_user((void __user *) arg,
                                        usblp->device_id_string,
                                        (unsigned long) length)) {
                                retval = -EFAULT;
                                goto done;
                        }

                        break;

                case IOCNR_GET_PROTOCOLS:
                        if (_IOC_DIR(cmd) != _IOC_READ ||
                            _IOC_SIZE(cmd) < sizeof(twoints)) {
                                retval = -EINVAL;
                                goto done;
                        }

                        twoints[0] = usblp->current_protocol;
                        twoints[1] = 0;
                        for (i = USBLP_FIRST_PROTOCOL;
                             i <= USBLP_LAST_PROTOCOL; i++) {
                                if (usblp->protocol[i].alt_setting >= 0)
                                        twoints[1] |= (1<<i);
                        }

                        if (copy_to_user((void __user *)arg,
                                        (unsigned char *)twoints,
                                        sizeof(twoints))) {
                                retval = -EFAULT;
                                goto done;
                        }

                        break;

                case IOCNR_SET_PROTOCOL:
                        if (_IOC_DIR(cmd) != _IOC_WRITE) {
                                retval = -EINVAL;
                                goto done;
                        }

#ifdef DEBUG
                        if (arg == -10) {
                                usblp_dump(usblp);
                                break;
                        }
#endif

                        usblp_unlink_urbs(usblp);
                        retval = usblp_set_protocol(usblp, arg);
                        if (retval < 0) {
                                usblp_set_protocol(usblp,
                                        usblp->current_protocol);
                        }
                        break;

                case IOCNR_HP_SET_CHANNEL:
                        if (_IOC_DIR(cmd) != _IOC_WRITE ||
                            le16_to_cpu(usblp->dev->descriptor.idVendor) != 0x03F0 ||
                            usblp->quirks & USBLP_QUIRK_BIDIR) {
                                retval = -EINVAL;
                                goto done;
                        }

                        err = usblp_hp_channel_change_request(usblp,
                                arg, &newChannel);
                        if (err < 0) {
                                dev_err(&usblp->dev->dev,
                                        "usblp%d: error = %d setting "
                                        "HP channel\n",
                                        usblp->minor, err);
                                retval = -EIO;
                                goto done;
                        }

                        dev_dbg(&usblp->intf->dev,
                                "usblp%d requested/got HP channel %ld/%d\n",
                                usblp->minor, arg, newChannel);
                        break;

                case IOCNR_GET_BUS_ADDRESS:
                        if (_IOC_DIR(cmd) != _IOC_READ ||
                            _IOC_SIZE(cmd) < sizeof(twoints)) {
                                retval = -EINVAL;
                                goto done;
                        }

                        twoints[0] = usblp->dev->bus->busnum;
                        twoints[1] = usblp->dev->devnum;
                        if (copy_to_user((void __user *)arg,
                                        (unsigned char *)twoints,
                                        sizeof(twoints))) {
                                retval = -EFAULT;
                                goto done;
                        }

                        dev_dbg(&usblp->intf->dev,
                                "usblp%d is bus=%d, device=%d\n",
                                usblp->minor, twoints[0], twoints[1]);
                        break;

                case IOCNR_GET_VID_PID:
                        if (_IOC_DIR(cmd) != _IOC_READ ||
                            _IOC_SIZE(cmd) < sizeof(twoints)) {
                                retval = -EINVAL;
                                goto done;
                        }

                        twoints[0] = le16_to_cpu(usblp->dev->descriptor.idVendor);
                        twoints[1] = le16_to_cpu(usblp->dev->descriptor.idProduct);
                        if (copy_to_user((void __user *)arg,
                                        (unsigned char *)twoints,
                                        sizeof(twoints))) {
                                retval = -EFAULT;
                                goto done;
                        }

                        dev_dbg(&usblp->intf->dev,
                                "usblp%d is VID=0x%4.4X, PID=0x%4.4X\n",
                                usblp->minor, twoints[0], twoints[1]);
                        break;

                case IOCNR_SOFT_RESET:
                        if (_IOC_DIR(cmd) != _IOC_NONE) {
                                retval = -EINVAL;
                                goto done;
                        }
                        retval = usblp_reset(usblp);
                        break;
                default:
                        retval = -ENOTTY;
                }
        else        /* old-style ioctl value */
                switch (cmd) {

                case LPGETSTATUS:
                        retval = usblp_read_status(usblp, usblp->statusbuf);
                        if (retval) {
                                printk_ratelimited(KERN_ERR "usblp%d:"
                                            "failed reading printer status (%d)\n",
                                            usblp->minor, retval);
                                retval = -EIO;
                                goto done;
                        }
                        status = *usblp->statusbuf;
                        if (copy_to_user((void __user *)arg, &status, sizeof(int)))
                                retval = -EFAULT;
                        break;

                case LPABORT:
                        if (arg)
                                usblp->flags |= LP_ABORT;
                        else
                                usblp->flags &= ~LP_ABORT;
                        break;

                default:
                        retval = -ENOTTY;
                }

done:
        mutex_unlock(&usblp->mut);
        return retval;
}

static struct urb *usblp_new_writeurb(struct usblp *usblp, int transfer_length)
{
        struct urb *urb;
        char *writebuf;

        writebuf = kmalloc(transfer_length, GFP_KERNEL);
        if (writebuf == NULL)
                return NULL;
        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (urb == NULL) {
                kfree(writebuf);
                return NULL;
        }

        usb_fill_bulk_urb(urb, usblp->dev,
                usb_sndbulkpipe(usblp->dev,
                 usblp->protocol[usblp->current_protocol].epwrite->bEndpointAddress),
                writebuf, transfer_length, usblp_bulk_write, usblp);
        urb->transfer_flags |= URB_FREE_BUFFER;

        return urb;
}

static ssize_t usblp_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
        struct usblp *usblp = file->private_data;
        struct urb *writeurb;
        int rv;
        int transfer_length;
        ssize_t writecount = 0;

        if (mutex_lock_interruptible(&usblp->wmut)) {
                rv = -EINTR;
                goto raise_biglock;
        }
        if ((rv = usblp_wwait(usblp, !!(file->f_flags & O_NONBLOCK))) < 0)
                goto raise_wait;

        while (writecount < count) {
                /*
                 * Step 1: Submit next block.
                 */
                if ((transfer_length = count - writecount) > USBLP_BUF_SIZE)
                        transfer_length = USBLP_BUF_SIZE;

                rv = -ENOMEM;
                writeurb = usblp_new_writeurb(usblp, transfer_length);
                if (writeurb == NULL)
                        goto raise_urb;
                usb_anchor_urb(writeurb, &usblp->urbs);

                if (copy_from_user(writeurb->transfer_buffer,
                                   buffer + writecount, transfer_length)) {
                        rv = -EFAULT;
                        goto raise_badaddr;
                }

                spin_lock_irq(&usblp->lock);
                usblp->wcomplete = 0;
                spin_unlock_irq(&usblp->lock);
                if ((rv = usb_submit_urb(writeurb, GFP_KERNEL)) < 0) {
                        usblp->wstatus = 0;
                        spin_lock_irq(&usblp->lock);
                        usblp->no_paper = 0;
                        usblp->wcomplete = 1;
                        wake_up(&usblp->wwait);
                        spin_unlock_irq(&usblp->lock);
                        if (rv != -ENOMEM)
                                rv = -EIO;
                        goto raise_submit;
                }

                /*
                 * Step 2: Wait for transfer to end, collect results.
                 */
                rv = usblp_wwait(usblp, !!(file->f_flags&O_NONBLOCK));
                if (rv < 0) {
                        if (rv == -EAGAIN) {
                                /* Presume that it's going to complete well. */
                                writecount += transfer_length;
                        }
                        if (rv == -ENOSPC) {
                                spin_lock_irq(&usblp->lock);
                                usblp->no_paper = 1;        /* Mark for poll(2) */
                                spin_unlock_irq(&usblp->lock);
                                writecount += transfer_length;
                        }
                        /* Leave URB dangling, to be cleaned on close. */
                        goto collect_error;
                }

                if (usblp->wstatus < 0) {
                        rv = -EIO;
                        goto collect_error;
                }
                /*
                 * This is critical: it must be our URB, not other writer's.
                 * The wmut exists mainly to cover us here.
                 */
                writecount += usblp->wstatus;
        }

        mutex_unlock(&usblp->wmut);
        return writecount;

raise_submit:
raise_badaddr:
        usb_unanchor_urb(writeurb);
        usb_free_urb(writeurb);
raise_urb:
raise_wait:
collect_error:                /* Out of raise sequence */
        mutex_unlock(&usblp->wmut);
raise_biglock:
        return writecount ? writecount : rv;
}

/*
 * Notice that we fail to restart in a few cases: on EFAULT, on restart
 * error, etc. This is the historical behaviour. In all such cases we return
 * EIO, and applications loop in order to get the new read going.
 */
static ssize_t usblp_read(struct file *file, char __user *buffer, size_t len, loff_t *ppos)
{
        struct usblp *usblp = file->private_data;
        ssize_t count;
        ssize_t avail;
        int rv;

        if (!usblp->bidir)
                return -EINVAL;

        rv = usblp_rwait_and_lock(usblp, !!(file->f_flags & O_NONBLOCK));
        if (rv < 0)
                return rv;

        if (!usblp->present) {
                count = -ENODEV;
                goto done;
        }

        if ((avail = usblp->rstatus) < 0) {
                printk(KERN_ERR "usblp%d: error %d reading from printer\n",
                    usblp->minor, (int)avail);
                usblp_submit_read(usblp);
                count = -EIO;
                goto done;
        }

        count = len < avail - usblp->readcount ? len : avail - usblp->readcount;
        if (count != 0 &&
            copy_to_user(buffer, usblp->readbuf + usblp->readcount, count)) {
                count = -EFAULT;
                goto done;
        }

        if ((usblp->readcount += count) == avail) {
                if (usblp_submit_read(usblp) < 0) {
                        /* We don't want to leak USB return codes into errno. */
                        if (count == 0)
                                count = -EIO;
                        goto done;
                }
        }

done:
        mutex_unlock(&usblp->mut);
        return count;
}

/*
 * Wait for the write path to come idle.
 * This is called under the ->wmut, so the idle path stays idle.
 *
 * Our write path has a peculiar property: it does not buffer like a tty,
 * but waits for the write to succeed. This allows our ->release to bug out
 * without waiting for writes to drain. But it obviously does not work
 * when O_NONBLOCK is set. So, applications setting O_NONBLOCK must use
 * select(2) or poll(2) to wait for the buffer to drain before closing.
 * Alternatively, set blocking mode with fcntl and issue a zero-size write.
 */
static int usblp_wwait(struct usblp *usblp, int nonblock)
{
        DECLARE_WAITQUEUE(waita, current);
        int rc;
        int err = 0;

        add_wait_queue(&usblp->wwait, &waita);
        for (;;) {
                if (mutex_lock_interruptible(&usblp->mut)) {
                        rc = -EINTR;
                        break;
                }
                set_current_state(TASK_INTERRUPTIBLE);
                rc = usblp_wtest(usblp, nonblock);
                mutex_unlock(&usblp->mut);
                if (rc <= 0)
                        break;

                if (schedule_timeout(msecs_to_jiffies(1500)) == 0) {
                        if (usblp->flags & LP_ABORT) {
                                err = usblp_check_status(usblp, err);
                                if (err == 1) {        /* Paper out */
                                        rc = -ENOSPC;
                                        break;
                                }
                        } else {
                                /* Prod the printer, Gentoo#251237. */
                                mutex_lock(&usblp->mut);
                                usblp_read_status(usblp, usblp->statusbuf);
                                mutex_unlock(&usblp->mut);
                        }
                }
        }
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&usblp->wwait, &waita);
        return rc;
}

static int usblp_wtest(struct usblp *usblp, int nonblock)
{
        unsigned long flags;

        if (!usblp->present)
                return -ENODEV;
        if (signal_pending(current))
                return -EINTR;
        spin_lock_irqsave(&usblp->lock, flags);
        if (usblp->wcomplete) {
                spin_unlock_irqrestore(&usblp->lock, flags);
                return 0;
        }
        spin_unlock_irqrestore(&usblp->lock, flags);
        if (nonblock)
                return -EAGAIN;
        return 1;
}

/*
 * Wait for read bytes to become available. This probably should have been
 * called usblp_r_lock_and_wait(), because we lock first. But it's a traditional
 * name for functions which lock and return.
 *
 * We do not use wait_event_interruptible because it makes locking iffy.
 */
static int usblp_rwait_and_lock(struct usblp *usblp, int nonblock)
{
        DECLARE_WAITQUEUE(waita, current);
        int rc;

        add_wait_queue(&usblp->rwait, &waita);
        for (;;) {
                if (mutex_lock_interruptible(&usblp->mut)) {
                        rc = -EINTR;
                        break;
                }
                set_current_state(TASK_INTERRUPTIBLE);
                if ((rc = usblp_rtest(usblp, nonblock)) < 0) {
                        mutex_unlock(&usblp->mut);
                        break;
                }
                if (rc == 0)        /* Keep it locked */
                        break;
                mutex_unlock(&usblp->mut);
                schedule();
        }
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&usblp->rwait, &waita);
        return rc;
}

static int usblp_rtest(struct usblp *usblp, int nonblock)
{
        unsigned long flags;

        if (!usblp->present)
                return -ENODEV;
        if (signal_pending(current))
                return -EINTR;
        spin_lock_irqsave(&usblp->lock, flags);
        if (usblp->rcomplete) {
                spin_unlock_irqrestore(&usblp->lock, flags);
                return 0;
        }
        spin_unlock_irqrestore(&usblp->lock, flags);
        if (nonblock)
                return -EAGAIN;
        return 1;
}

/*
 * Please check ->bidir and other such things outside for now.
 */
static int usblp_submit_read(struct usblp *usblp)
{
        struct urb *urb;
        unsigned long flags;
        int rc;

        rc = -ENOMEM;
        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (urb == NULL)
                goto raise_urb;

        usb_fill_bulk_urb(urb, usblp->dev,
                usb_rcvbulkpipe(usblp->dev,
                  usblp->protocol[usblp->current_protocol].epread->bEndpointAddress),
                usblp->readbuf, USBLP_BUF_SIZE_IN,
                usblp_bulk_read, usblp);
        usb_anchor_urb(urb, &usblp->urbs);

        spin_lock_irqsave(&usblp->lock, flags);
        usblp->readcount = 0; /* XXX Why here? */
        usblp->rcomplete = 0;
        spin_unlock_irqrestore(&usblp->lock, flags);
        if ((rc = usb_submit_urb(urb, GFP_KERNEL)) < 0) {
                dev_dbg(&usblp->intf->dev, "error submitting urb (%d)\n", rc);
                spin_lock_irqsave(&usblp->lock, flags);
                usblp->rstatus = rc;
                usblp->rcomplete = 1;
                spin_unlock_irqrestore(&usblp->lock, flags);
                goto raise_submit;
        }

        return 0;

raise_submit:
        usb_unanchor_urb(urb);
        usb_free_urb(urb);
raise_urb:
        return rc;
}

/*
 * Checks for printers that have quirks, such as requiring unidirectional
 * communication but reporting bidirectional; currently some HP printers
 * have this flaw (HP 810, 880, 895, etc.), or needing an init string
 * sent at each open (like some Epsons).
 * Returns 1 if found, 0 if not found.
 *
 * HP recommended that we use the bidirectional interface but
 * don't attempt any bulk IN transfers from the IN endpoint.
 * Here's some more detail on the problem:
 * The problem is not that it isn't bidirectional though. The problem
 * is that if you request a device ID, or status information, while
 * the buffers are full, the return data will end up in the print data
 * buffer. For example if you make sure you never request the device ID
 * while you are sending print data, and you don't try to query the
 * printer status every couple of milliseconds, you will probably be OK.
 */
static unsigned int usblp_quirks(__u16 vendor, __u16 product)
{
        int i;

        for (i = 0; quirk_printers[i].vendorId; i++) {
                if (vendor == quirk_printers[i].vendorId &&
                    product == quirk_printers[i].productId)
                        return quirk_printers[i].quirks;
        }
        return 0;
}

static const struct file_operations usblp_fops = {
        .owner =        THIS_MODULE,
        .read =                usblp_read,
        .write =        usblp_write,
        .poll =                usblp_poll,
        .unlocked_ioctl =        usblp_ioctl,
        .compat_ioctl =                usblp_ioctl,
        .open =                usblp_open,
        .release =        usblp_release,
        .llseek =        noop_llseek,
};

static char *usblp_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
}

static struct usb_class_driver usblp_class = {
        .name =                "lp%d",
        .devnode =        usblp_devnode,
        .fops =                &usblp_fops,
        .minor_base =        USBLP_MINOR_BASE,
};

static ssize_t ieee1284_id_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usblp *usblp = usb_get_intfdata(intf);

        if (usblp->device_id_string[0] == 0 &&
            usblp->device_id_string[1] == 0)
                return 0;

        return sprintf(buf, "%s", usblp->device_id_string+2);
}

static DEVICE_ATTR_RO(ieee1284_id);

static struct attribute *usblp_attrs[] = {
        &dev_attr_ieee1284_id.attr,
        NULL,
};
ATTRIBUTE_GROUPS(usblp);

static int usblp_probe(struct usb_interface *intf,
                       const struct usb_device_id *id)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        struct usblp *usblp;
        int protocol;
        int retval;

        /* Malloc and start initializing usblp structure so we can use it
         * directly. */
        usblp = kzalloc(sizeof(struct usblp), GFP_KERNEL);
        if (!usblp) {
                retval = -ENOMEM;
                goto abort_ret;
        }
        usblp->dev = dev;
        mutex_init(&usblp->wmut);
        mutex_init(&usblp->mut);
        spin_lock_init(&usblp->lock);
        init_waitqueue_head(&usblp->rwait);
        init_waitqueue_head(&usblp->wwait);
        init_usb_anchor(&usblp->urbs);
        usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
        usblp->intf = usb_get_intf(intf);

        /* Malloc device ID string buffer to the largest expected length,
         * since we can re-query it on an ioctl and a dynamic string
         * could change in length. */
        if (!(usblp->device_id_string = kmalloc(USBLP_DEVICE_ID_SIZE, GFP_KERNEL))) {
                retval = -ENOMEM;
                goto abort;
        }

        /*
         * Allocate read buffer. We somewhat wastefully
         * malloc both regardless of bidirectionality, because the
         * alternate setting can be changed later via an ioctl.
         */
        if (!(usblp->readbuf = kmalloc(USBLP_BUF_SIZE_IN, GFP_KERNEL))) {
                retval = -ENOMEM;
                goto abort;
        }

        /* Allocate buffer for printer status */
        usblp->statusbuf = kmalloc(STATUS_BUF_SIZE, GFP_KERNEL);
        if (!usblp->statusbuf) {
                retval = -ENOMEM;
                goto abort;
        }

        /* Lookup quirks for this printer. */
        usblp->quirks = usblp_quirks(
                le16_to_cpu(dev->descriptor.idVendor),
                le16_to_cpu(dev->descriptor.idProduct));

        /* Analyze and pick initial alternate settings and endpoints. */
        protocol = usblp_select_alts(usblp);
        if (protocol < 0) {
                dev_dbg(&intf->dev,
                        "incompatible printer-class device 0x%4.4X/0x%4.4X\n",
                        le16_to_cpu(dev->descriptor.idVendor),
                        le16_to_cpu(dev->descriptor.idProduct));
                retval = -ENODEV;
                goto abort;
        }

        /* Setup the selected alternate setting and endpoints. */
        if (usblp_set_protocol(usblp, protocol) < 0) {
                retval = -ENODEV;        /* ->probe isn't ->ioctl */
                goto abort;
        }

        /* Retrieve and store the device ID string. */
        usblp_cache_device_id_string(usblp);

#ifdef DEBUG
        usblp_check_status(usblp, 0);
#endif

        usb_set_intfdata(intf, usblp);

        usblp->present = 1;

        retval = usb_register_dev(intf, &usblp_class);
        if (retval) {
                dev_err(&intf->dev,
                        "usblp: Not able to get a minor (base %u, slice default): %d\n",
                        USBLP_MINOR_BASE, retval);
                goto abort_intfdata;
        }
        usblp->minor = intf->minor;
        dev_info(&intf->dev,
                "usblp%d: USB %sdirectional printer dev %d if %d alt %d proto %d vid 0x%4.4X pid 0x%4.4X\n",
                usblp->minor, usblp->bidir ? "Bi" : "Uni", dev->devnum,
                usblp->ifnum,
                usblp->protocol[usblp->current_protocol].alt_setting,
                usblp->current_protocol,
                le16_to_cpu(usblp->dev->descriptor.idVendor),
                le16_to_cpu(usblp->dev->descriptor.idProduct));

        return 0;

abort_intfdata:
        usb_set_intfdata(intf, NULL);
abort:
        kfree(usblp->readbuf);
        kfree(usblp->statusbuf);
        kfree(usblp->device_id_string);
        usb_put_intf(usblp->intf);
        kfree(usblp);
abort_ret:
        return retval;
}

/*
 * We are a "new" style driver with usb_device_id table,
 * but our requirements are too intricate for simple match to handle.
 *
 * The "proto_bias" option may be used to specify the preferred protocol
 * for all USB printers (1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2,
 * 3=USB_CLASS_PRINTER/1/3).  If the device supports the preferred protocol,
 * then we bind to it.
 *
 * The best interface for us is USB_CLASS_PRINTER/1/2, because it
 * is compatible with a stream of characters. If we find it, we bind to it.
 *
 * Note that the people from hpoj.sourceforge.net need to be able to
 * bind to USB_CLASS_PRINTER/1/3 (MLC/1284.4), so we provide them ioctls
 * for this purpose.
 *
 * Failing USB_CLASS_PRINTER/1/2, we look for USB_CLASS_PRINTER/1/3,
 * even though it's probably not stream-compatible, because this matches
 * the behaviour of the old code.
 *
 * If nothing else, we bind to USB_CLASS_PRINTER/1/1
 * - the unidirectional interface.
 */
static int usblp_select_alts(struct usblp *usblp)
{
        struct usb_interface *if_alt;
        struct usb_host_interface *ifd;
        struct usb_endpoint_descriptor *epwrite, *epread;
        int p, i;
        int res;

        if_alt = usblp->intf;

        for (p = 0; p < USBLP_MAX_PROTOCOLS; p++)
                usblp->protocol[p].alt_setting = -1;

        /* Find out what we have. */
        for (i = 0; i < if_alt->num_altsetting; i++) {
                ifd = &if_alt->altsetting[i];

                if (ifd->desc.bInterfaceClass != USB_CLASS_PRINTER ||
                    ifd->desc.bInterfaceSubClass != 1)
                        if (!(usblp->quirks & USBLP_QUIRK_BAD_CLASS))
                                continue;

                if (ifd->desc.bInterfaceProtocol < USBLP_FIRST_PROTOCOL ||
                    ifd->desc.bInterfaceProtocol > USBLP_LAST_PROTOCOL)
                        continue;

                /* Look for the expected bulk endpoints. */
                if (ifd->desc.bInterfaceProtocol > 1) {
                        res = usb_find_common_endpoints(ifd,
                                        &epread, &epwrite, NULL, NULL);
                } else {
                        epread = NULL;
                        res = usb_find_bulk_out_endpoint(ifd, &epwrite);
                }

                /* Ignore buggy hardware without the right endpoints. */
                if (res)
                        continue;

                /* Turn off reads for buggy bidirectional printers. */
                if (usblp->quirks & USBLP_QUIRK_BIDIR) {
                        printk(KERN_INFO "usblp%d: Disabling reads from "
                            "problematic bidirectional printer\n",
                            usblp->minor);
                        epread = NULL;
                }

                usblp->protocol[ifd->desc.bInterfaceProtocol].alt_setting =
                                ifd->desc.bAlternateSetting;
                usblp->protocol[ifd->desc.bInterfaceProtocol].epwrite = epwrite;
                usblp->protocol[ifd->desc.bInterfaceProtocol].epread = epread;
        }

        /* If our requested protocol is supported, then use it. */
        if (proto_bias >= USBLP_FIRST_PROTOCOL &&
            proto_bias <= USBLP_LAST_PROTOCOL &&
            usblp->protocol[proto_bias].alt_setting != -1)
                return proto_bias;

        /* Ordering is important here. */
        if (usblp->protocol[2].alt_setting != -1)
                return 2;
        if (usblp->protocol[1].alt_setting != -1)
                return 1;
        if (usblp->protocol[3].alt_setting != -1)
                return 3;

        /* If nothing is available, then don't bind to this device. */
        return -1;
}

static int usblp_set_protocol(struct usblp *usblp, int protocol)
{
        int r, alts;

        if (protocol < USBLP_FIRST_PROTOCOL || protocol > USBLP_LAST_PROTOCOL)
                return -EINVAL;

        /* Don't unnecessarily set the interface if there's a single alt. */
        if (usblp->intf->num_altsetting > 1) {
                alts = usblp->protocol[protocol].alt_setting;
                if (alts < 0)
                        return -EINVAL;
                r = usb_set_interface(usblp->dev, usblp->ifnum, alts);
                if (r < 0) {
                        printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n",
                                alts, usblp->ifnum);
                        return r;
                }
        }

        usblp->bidir = (usblp->protocol[protocol].epread != NULL);
        usblp->current_protocol = protocol;
        dev_dbg(&usblp->intf->dev, "usblp%d set protocol %d\n",
                usblp->minor, protocol);
        return 0;
}

/* Retrieves and caches device ID string.
 * Returns length, including length bytes but not null terminator.
 * On error, returns a negative errno value. */
static int usblp_cache_device_id_string(struct usblp *usblp)
{
        int err, length;

        err = usblp_get_id(usblp, 0, usblp->device_id_string, USBLP_DEVICE_ID_SIZE - 1);
        if (err < 0) {
                dev_dbg(&usblp->intf->dev,
                        "usblp%d: error = %d reading IEEE-1284 Device ID string\n",
                        usblp->minor, err);
                usblp->device_id_string[0] = usblp->device_id_string[1] = '\0';
                return -EIO;
        }

        /* First two bytes are length in big-endian.
         * They count themselves, and we copy them into
         * the user's buffer. */
        length = be16_to_cpu(*((__be16 *)usblp->device_id_string));
        if (length < 2)
                length = 2;
        else if (length >= USBLP_DEVICE_ID_SIZE)
                length = USBLP_DEVICE_ID_SIZE - 1;
        usblp->device_id_string[length] = '\0';

        dev_dbg(&usblp->intf->dev, "usblp%d Device ID string [len=%d]=\"%s\"\n",
                usblp->minor, length, &usblp->device_id_string[2]);

        return length;
}

static void usblp_disconnect(struct usb_interface *intf)
{
        struct usblp *usblp = usb_get_intfdata(intf);

        usb_deregister_dev(intf, &usblp_class);

        if (!usblp || !usblp->dev) {
                dev_err(&intf->dev, "bogus disconnect\n");
                BUG();
        }

        mutex_lock(&usblp_mutex);
        mutex_lock(&usblp->mut);
        usblp->present = 0;
        wake_up(&usblp->wwait);
        wake_up(&usblp->rwait);
        usb_set_intfdata(intf, NULL);

        usblp_unlink_urbs(usblp);
        mutex_unlock(&usblp->mut);
        usb_poison_anchored_urbs(&usblp->urbs);

        if (!usblp->used)
                usblp_cleanup(usblp);

        mutex_unlock(&usblp_mutex);
}

static int usblp_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct usblp *usblp = usb_get_intfdata(intf);

        usblp_unlink_urbs(usblp);
#if 0 /* XXX Do we want this? What if someone is reading, should we fail? */
        /* not strictly necessary, but just in case */
        wake_up(&usblp->wwait);
        wake_up(&usblp->rwait);
#endif

        return 0;
}

static int usblp_resume(struct usb_interface *intf)
{
        struct usblp *usblp = usb_get_intfdata(intf);
        int r;

        r = handle_bidir(usblp);

        return r;
}

static const struct usb_device_id usblp_ids[] = {
        { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 1) },
        { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 2) },
        { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 3) },
        { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 1) },
        { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 2) },
        { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 3) },
        { USB_DEVICE(0x04b8, 0x0202) },        /* Seiko Epson Receipt Printer M129C */
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, usblp_ids);

static struct usb_driver usblp_driver = {
        .name =                "usblp",
        .probe =        usblp_probe,
        .disconnect =        usblp_disconnect,
        .suspend =        usblp_suspend,
        .resume =        usblp_resume,
        .id_table =        usblp_ids,
        .dev_groups =        usblp_groups,
        .supports_autosuspend =        1,
};

module_usb_driver(usblp_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
module_param(proto_bias, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(proto_bias, "Favourite protocol number");
MODULE_LICENSE("GPL");
































































































































































































































































































































































































   10 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
// SPDX-License-Identifier: GPL-2.0-only
/*
 * x86 APERF/MPERF KHz calculation for
 * /sys/.../cpufreq/scaling_cur_freq
 *
 * Copyright (C) 2017 Intel Corp.
 * Author: Len Brown <len.brown@intel.com>
 */
#include <linux/cpufreq.h>
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/math64.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/sched/isolation.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>

#include <asm/cpu.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>

#include "cpu.h"

struct aperfmperf {
        seqcount_t        seq;
        unsigned long        last_update;
        u64                acnt;
        u64                mcnt;
        u64                aperf;
        u64                mperf;
};

static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
        .seq = SEQCNT_ZERO(cpu_samples.seq)
};

static void init_counter_refs(void)
{
        u64 aperf, mperf;

        rdmsrl(MSR_IA32_APERF, aperf);
        rdmsrl(MSR_IA32_MPERF, mperf);

        this_cpu_write(cpu_samples.aperf, aperf);
        this_cpu_write(cpu_samples.mperf, mperf);
}

#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
/*
 * APERF/MPERF frequency ratio computation.
 *
 * The scheduler wants to do frequency invariant accounting and needs a <1
 * ratio to account for the 'current' frequency, corresponding to
 * freq_curr / freq_max.
 *
 * Since the frequency freq_curr on x86 is controlled by micro-controller and
 * our P-state setting is little more than a request/hint, we need to observe
 * the effective frequency 'BusyMHz', i.e. the average frequency over a time
 * interval after discarding idle time. This is given by:
 *
 *   BusyMHz = delta_APERF / delta_MPERF * freq_base
 *
 * where freq_base is the max non-turbo P-state.
 *
 * The freq_max term has to be set to a somewhat arbitrary value, because we
 * can't know which turbo states will be available at a given point in time:
 * it all depends on the thermal headroom of the entire package. We set it to
 * the turbo level with 4 cores active.
 *
 * Benchmarks show that's a good compromise between the 1C turbo ratio
 * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
 * which would ignore the entire turbo range (a conspicuous part, making
 * freq_curr/freq_max always maxed out).
 *
 * An exception to the heuristic above is the Atom uarch, where we choose the
 * highest turbo level for freq_max since Atom's are generally oriented towards
 * power efficiency.
 *
 * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
 * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
 */

DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);

static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;

void arch_set_max_freq_ratio(bool turbo_disabled)
{
        arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
                                        arch_turbo_freq_ratio;
}
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);

static bool __init turbo_disabled(void)
{
        u64 misc_en;
        int err;

        err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
        if (err)
                return false;

        return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
}

static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
        int err;

        err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
        if (err)
                return false;

        err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
        if (err)
                return false;

        *base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
        *turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */

        return true;
}

#define X86_MATCH(model)                                        \
        X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,                \
                INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)

static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
        X86_MATCH(XEON_PHI_KNL),
        X86_MATCH(XEON_PHI_KNM),
        {}
};

static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
        X86_MATCH(SKYLAKE_X),
        {}
};

static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
        X86_MATCH(ATOM_GOLDMONT),
        X86_MATCH(ATOM_GOLDMONT_D),
        X86_MATCH(ATOM_GOLDMONT_PLUS),
        {}
};

static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
                                          int num_delta_fratio)
{
        int fratio, delta_fratio, found;
        int err, i;
        u64 msr;

        err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
        if (err)
                return false;

        *base_freq = (*base_freq >> 8) & 0xFF;            /* max P state */

        err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
        if (err)
                return false;

        fratio = (msr >> 8) & 0xFF;
        i = 16;
        found = 0;
        do {
                if (found >= num_delta_fratio) {
                        *turbo_freq = fratio;
                        return true;
                }

                delta_fratio = (msr >> (i + 5)) & 0x7;

                if (delta_fratio) {
                        found += 1;
                        fratio -= delta_fratio;
                }

                i += 8;
        } while (i < 64);

        return true;
}

static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
{
        u64 ratios, counts;
        u32 group_size;
        int err, i;

        err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
        if (err)
                return false;

        *base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */

        err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
        if (err)
                return false;

        err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
        if (err)
                return false;

        for (i = 0; i < 64; i += 8) {
                group_size = (counts >> i) & 0xFF;
                if (group_size >= size) {
                        *turbo_freq = (ratios >> i) & 0xFF;
                        return true;
                }
        }

        return false;
}

static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
{
        u64 msr;
        int err;

        err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
        if (err)
                return false;

        err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
        if (err)
                return false;

        *base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
        *turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */

        /* The CPU may have less than 4 cores */
        if (!*turbo_freq)
                *turbo_freq = msr & 0xFF;         /* 1C turbo    */

        return true;
}

static bool __init intel_set_max_freq_ratio(void)
{
        u64 base_freq, turbo_freq;
        u64 turbo_ratio;

        if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
                goto out;

        if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
            skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
                goto out;

        if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
            knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
                goto out;

        if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
            skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
                goto out;

        if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
                goto out;

        return false;

out:
        /*
         * Some hypervisors advertise X86_FEATURE_APERFMPERF
         * but then fill all MSR's with zeroes.
         * Some CPUs have turbo boost but don't declare any turbo ratio
         * in MSR_TURBO_RATIO_LIMIT.
         */
        if (!base_freq || !turbo_freq) {
                pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
                return false;
        }

        turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
        if (!turbo_ratio) {
                pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
                return false;
        }

        arch_turbo_freq_ratio = turbo_ratio;
        arch_set_max_freq_ratio(turbo_disabled());

        return true;
}

#ifdef CONFIG_PM_SLEEP
static struct syscore_ops freq_invariance_syscore_ops = {
        .resume = init_counter_refs,
};

static void register_freq_invariance_syscore_ops(void)
{
        register_syscore_ops(&freq_invariance_syscore_ops);
}
#else
static inline void register_freq_invariance_syscore_ops(void) {}
#endif

static void freq_invariance_enable(void)
{
        if (static_branch_unlikely(&arch_scale_freq_key)) {
                WARN_ON_ONCE(1);
                return;
        }
        static_branch_enable(&arch_scale_freq_key);
        register_freq_invariance_syscore_ops();
        pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
}

void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
{
        arch_turbo_freq_ratio = ratio;
        arch_set_max_freq_ratio(turbo_disabled);
        freq_invariance_enable();
}

static void __init bp_init_freq_invariance(void)
{
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
                return;

        if (intel_set_max_freq_ratio())
                freq_invariance_enable();
}

static void disable_freq_invariance_workfn(struct work_struct *work)
{
        int cpu;

        static_branch_disable(&arch_scale_freq_key);

        /*
         * Set arch_freq_scale to a default value on all cpus
         * This negates the effect of scaling
         */
        for_each_possible_cpu(cpu)
                per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
}

static DECLARE_WORK(disable_freq_invariance_work,
                    disable_freq_invariance_workfn);

DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;

static void scale_freq_tick(u64 acnt, u64 mcnt)
{
        u64 freq_scale;

        if (!arch_scale_freq_invariant())
                return;

        if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
                goto error;

        if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
                goto error;

        freq_scale = div64_u64(acnt, mcnt);
        if (!freq_scale)
                goto error;

        if (freq_scale > SCHED_CAPACITY_SCALE)
                freq_scale = SCHED_CAPACITY_SCALE;

        this_cpu_write(arch_freq_scale, freq_scale);
        return;

error:
        pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
        schedule_work(&disable_freq_invariance_work);
}
#else
static inline void bp_init_freq_invariance(void) { }
static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
#endif /* CONFIG_X86_64 && CONFIG_SMP */

void arch_scale_freq_tick(void)
{
        struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
        u64 acnt, mcnt, aperf, mperf;

        if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                return;

        rdmsrl(MSR_IA32_APERF, aperf);
        rdmsrl(MSR_IA32_MPERF, mperf);
        acnt = aperf - s->aperf;
        mcnt = mperf - s->mperf;

        s->aperf = aperf;
        s->mperf = mperf;

        raw_write_seqcount_begin(&s->seq);
        s->last_update = jiffies;
        s->acnt = acnt;
        s->mcnt = mcnt;
        raw_write_seqcount_end(&s->seq);

        scale_freq_tick(acnt, mcnt);
}

/*
 * Discard samples older than the define maximum sample age of 20ms. There
 * is no point in sending IPIs in such a case. If the scheduler tick was
 * not running then the CPU is either idle or isolated.
 */
#define MAX_SAMPLE_AGE        ((unsigned long)HZ / 50)

unsigned int arch_freq_get_on_cpu(int cpu)
{
        struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
        unsigned int seq, freq;
        unsigned long last;
        u64 acnt, mcnt;

        if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                goto fallback;

        do {
                seq = raw_read_seqcount_begin(&s->seq);
                last = s->last_update;
                acnt = s->acnt;
                mcnt = s->mcnt;
        } while (read_seqcount_retry(&s->seq, seq));

        /*
         * Bail on invalid count and when the last update was too long ago,
         * which covers idle and NOHZ full CPUs.
         */
        if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
                goto fallback;

        return div64_u64((cpu_khz * acnt), mcnt);

fallback:
        freq = cpufreq_quick_get(cpu);
        return freq ? freq : cpu_khz;
}

static int __init bp_init_aperfmperf(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                return 0;

        init_counter_refs();
        bp_init_freq_invariance();
        return 0;
}
early_initcall(bp_init_aperfmperf);

void ap_init_aperfmperf(void)
{
        if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
                init_counter_refs();
}



























































   53 










   53 
   53 


















   53 

   29 
    5 


























































































































































































































































































































































   53 





   53 


   53 
   48 

   48 



   30 
    5 
   30 
   30 



   53 









   48 


   48 
   48 

   48 

   46 

   48 
   48 



   48 
   48 













  228 
   53 















  229 







  229 


    5 



    5 















































   25 


   48 




   48 
   48 















































































   48 




   48 
   48 
   48 



    4 

   48 


   48 





   45 


























    6 



























































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/devres.c - device resource management
 *
 * Copyright (c) 2006  SUSE Linux Products GmbH
 * Copyright (c) 2006  Tejun Heo <teheo@suse.de>
 */

#include <linux/device.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/percpu.h>

#include <asm/sections.h>

#include "base.h"
#include "trace.h"

struct devres_node {
        struct list_head                entry;
        dr_release_t                        release;
        const char                        *name;
        size_t                                size;
};

struct devres {
        struct devres_node                node;
        /*
         * Some archs want to perform DMA into kmalloc caches
         * and need a guaranteed alignment larger than
         * the alignment of a 64-bit integer.
         * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
         * alignment for struct devres when allocated by kmalloc().
         */
        u8 __aligned(ARCH_DMA_MINALIGN) data[];
};

struct devres_group {
        struct devres_node                node[2];
        void                                *id;
        int                                color;
        /* -- 8 pointers */
};

static void set_node_dbginfo(struct devres_node *node, const char *name,
                             size_t size)
{
        node->name = name;
        node->size = size;
}

#ifdef CONFIG_DEBUG_DEVRES
static int log_devres = 0;
module_param_named(log, log_devres, int, S_IRUGO | S_IWUSR);

static void devres_dbg(struct device *dev, struct devres_node *node,
                       const char *op)
{
        if (unlikely(log_devres))
                dev_err(dev, "DEVRES %3s %p %s (%zu bytes)\n",
                        op, node, node->name, node->size);
}
#else /* CONFIG_DEBUG_DEVRES */
#define devres_dbg(dev, node, op)        do {} while (0)
#endif /* CONFIG_DEBUG_DEVRES */

static void devres_log(struct device *dev, struct devres_node *node,
                       const char *op)
{
        trace_devres_log(dev, op, node, node->name, node->size);
        devres_dbg(dev, node, op);
}

/*
 * Release functions for devres group.  These callbacks are used only
 * for identification.
 */
static void group_open_release(struct device *dev, void *res)
{
        /* noop */
}

static void group_close_release(struct device *dev, void *res)
{
        /* noop */
}

static struct devres_group * node_to_group(struct devres_node *node)
{
        if (node->release == &group_open_release)
                return container_of(node, struct devres_group, node[0]);
        if (node->release == &group_close_release)
                return container_of(node, struct devres_group, node[1]);
        return NULL;
}

static bool check_dr_size(size_t size, size_t *tot_size)
{
        /* We must catch any near-SIZE_MAX cases that could overflow. */
        if (unlikely(check_add_overflow(sizeof(struct devres),
                                        size, tot_size)))
                return false;

        /* Actually allocate the full kmalloc bucket size. */
        *tot_size = kmalloc_size_roundup(*tot_size);

        return true;
}

static __always_inline struct devres * alloc_dr(dr_release_t release,
                                                size_t size, gfp_t gfp, int nid)
{
        size_t tot_size;
        struct devres *dr;

        if (!check_dr_size(size, &tot_size))
                return NULL;

        dr = kmalloc_node_track_caller(tot_size, gfp, nid);
        if (unlikely(!dr))
                return NULL;

        /* No need to clear memory twice */
        if (!(gfp & __GFP_ZERO))
                memset(dr, 0, offsetof(struct devres, data));

        INIT_LIST_HEAD(&dr->node.entry);
        dr->node.release = release;
        return dr;
}

static void add_dr(struct device *dev, struct devres_node *node)
{
        devres_log(dev, node, "ADD");
        BUG_ON(!list_empty(&node->entry));
        list_add_tail(&node->entry, &dev->devres_head);
}

static void replace_dr(struct device *dev,
                       struct devres_node *old, struct devres_node *new)
{
        devres_log(dev, old, "REPLACE");
        BUG_ON(!list_empty(&new->entry));
        list_replace(&old->entry, &new->entry);
}

/**
 * __devres_alloc_node - Allocate device resource data
 * @release: Release function devres will be associated with
 * @size: Allocation size
 * @gfp: Allocation flags
 * @nid: NUMA node
 * @name: Name of the resource
 *
 * Allocate devres of @size bytes.  The allocated area is zeroed, then
 * associated with @release.  The returned pointer can be passed to
 * other devres_*() functions.
 *
 * RETURNS:
 * Pointer to allocated devres on success, NULL on failure.
 */
void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid,
                          const char *name)
{
        struct devres *dr;

        dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid);
        if (unlikely(!dr))
                return NULL;
        set_node_dbginfo(&dr->node, name, size);
        return dr->data;
}
EXPORT_SYMBOL_GPL(__devres_alloc_node);

/**
 * devres_for_each_res - Resource iterator
 * @dev: Device to iterate resource from
 * @release: Look for resources associated with this release function
 * @match: Match function (optional)
 * @match_data: Data for the match function
 * @fn: Function to be called for each matched resource.
 * @data: Data for @fn, the 3rd parameter of @fn
 *
 * Call @fn for each devres of @dev which is associated with @release
 * and for which @match returns 1.
 *
 * RETURNS:
 *         void
 */
void devres_for_each_res(struct device *dev, dr_release_t release,
                        dr_match_t match, void *match_data,
                        void (*fn)(struct device *, void *, void *),
                        void *data)
{
        struct devres_node *node;
        struct devres_node *tmp;
        unsigned long flags;

        if (!fn)
                return;

        spin_lock_irqsave(&dev->devres_lock, flags);
        list_for_each_entry_safe_reverse(node, tmp,
                        &dev->devres_head, entry) {
                struct devres *dr = container_of(node, struct devres, node);

                if (node->release != release)
                        continue;
                if (match && !match(dev, dr->data, match_data))
                        continue;
                fn(dev, dr->data, data);
        }
        spin_unlock_irqrestore(&dev->devres_lock, flags);
}
EXPORT_SYMBOL_GPL(devres_for_each_res);

/**
 * devres_free - Free device resource data
 * @res: Pointer to devres data to free
 *
 * Free devres created with devres_alloc().
 */
void devres_free(void *res)
{
        if (res) {
                struct devres *dr = container_of(res, struct devres, data);

                BUG_ON(!list_empty(&dr->node.entry));
                kfree(dr);
        }
}
EXPORT_SYMBOL_GPL(devres_free);

/**
 * devres_add - Register device resource
 * @dev: Device to add resource to
 * @res: Resource to register
 *
 * Register devres @res to @dev.  @res should have been allocated
 * using devres_alloc().  On driver detach, the associated release
 * function will be invoked and devres will be freed automatically.
 */
void devres_add(struct device *dev, void *res)
{
        struct devres *dr = container_of(res, struct devres, data);
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);
        add_dr(dev, &dr->node);
        spin_unlock_irqrestore(&dev->devres_lock, flags);
}
EXPORT_SYMBOL_GPL(devres_add);

static struct devres *find_dr(struct device *dev, dr_release_t release,
                              dr_match_t match, void *match_data)
{
        struct devres_node *node;

        list_for_each_entry_reverse(node, &dev->devres_head, entry) {
                struct devres *dr = container_of(node, struct devres, node);

                if (node->release != release)
                        continue;
                if (match && !match(dev, dr->data, match_data))
                        continue;
                return dr;
        }

        return NULL;
}

/**
 * devres_find - Find device resource
 * @dev: Device to lookup resource from
 * @release: Look for resources associated with this release function
 * @match: Match function (optional)
 * @match_data: Data for the match function
 *
 * Find the latest devres of @dev which is associated with @release
 * and for which @match returns 1.  If @match is NULL, it's considered
 * to match all.
 *
 * RETURNS:
 * Pointer to found devres, NULL if not found.
 */
void * devres_find(struct device *dev, dr_release_t release,
                   dr_match_t match, void *match_data)
{
        struct devres *dr;
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);
        dr = find_dr(dev, release, match, match_data);
        spin_unlock_irqrestore(&dev->devres_lock, flags);

        if (dr)
                return dr->data;
        return NULL;
}
EXPORT_SYMBOL_GPL(devres_find);

/**
 * devres_get - Find devres, if non-existent, add one atomically
 * @dev: Device to lookup or add devres for
 * @new_res: Pointer to new initialized devres to add if not found
 * @match: Match function (optional)
 * @match_data: Data for the match function
 *
 * Find the latest devres of @dev which has the same release function
 * as @new_res and for which @match return 1.  If found, @new_res is
 * freed; otherwise, @new_res is added atomically.
 *
 * RETURNS:
 * Pointer to found or added devres.
 */
void * devres_get(struct device *dev, void *new_res,
                  dr_match_t match, void *match_data)
{
        struct devres *new_dr = container_of(new_res, struct devres, data);
        struct devres *dr;
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);
        dr = find_dr(dev, new_dr->node.release, match, match_data);
        if (!dr) {
                add_dr(dev, &new_dr->node);
                dr = new_dr;
                new_res = NULL;
        }
        spin_unlock_irqrestore(&dev->devres_lock, flags);
        devres_free(new_res);

        return dr->data;
}
EXPORT_SYMBOL_GPL(devres_get);

/**
 * devres_remove - Find a device resource and remove it
 * @dev: Device to find resource from
 * @release: Look for resources associated with this release function
 * @match: Match function (optional)
 * @match_data: Data for the match function
 *
 * Find the latest devres of @dev associated with @release and for
 * which @match returns 1.  If @match is NULL, it's considered to
 * match all.  If found, the resource is removed atomically and
 * returned.
 *
 * RETURNS:
 * Pointer to removed devres on success, NULL if not found.
 */
void * devres_remove(struct device *dev, dr_release_t release,
                     dr_match_t match, void *match_data)
{
        struct devres *dr;
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);
        dr = find_dr(dev, release, match, match_data);
        if (dr) {
                list_del_init(&dr->node.entry);
                devres_log(dev, &dr->node, "REM");
        }
        spin_unlock_irqrestore(&dev->devres_lock, flags);

        if (dr)
                return dr->data;
        return NULL;
}
EXPORT_SYMBOL_GPL(devres_remove);

/**
 * devres_destroy - Find a device resource and destroy it
 * @dev: Device to find resource from
 * @release: Look for resources associated with this release function
 * @match: Match function (optional)
 * @match_data: Data for the match function
 *
 * Find the latest devres of @dev associated with @release and for
 * which @match returns 1.  If @match is NULL, it's considered to
 * match all.  If found, the resource is removed atomically and freed.
 *
 * Note that the release function for the resource will not be called,
 * only the devres-allocated data will be freed.  The caller becomes
 * responsible for freeing any other data.
 *
 * RETURNS:
 * 0 if devres is found and freed, -ENOENT if not found.
 */
int devres_destroy(struct device *dev, dr_release_t release,
                   dr_match_t match, void *match_data)
{
        void *res;

        res = devres_remove(dev, release, match, match_data);
        if (unlikely(!res))
                return -ENOENT;

        devres_free(res);
        return 0;
}
EXPORT_SYMBOL_GPL(devres_destroy);


/**
 * devres_release - Find a device resource and destroy it, calling release
 * @dev: Device to find resource from
 * @release: Look for resources associated with this release function
 * @match: Match function (optional)
 * @match_data: Data for the match function
 *
 * Find the latest devres of @dev associated with @release and for
 * which @match returns 1.  If @match is NULL, it's considered to
 * match all.  If found, the resource is removed atomically, the
 * release function called and the resource freed.
 *
 * RETURNS:
 * 0 if devres is found and freed, -ENOENT if not found.
 */
int devres_release(struct device *dev, dr_release_t release,
                   dr_match_t match, void *match_data)
{
        void *res;

        res = devres_remove(dev, release, match, match_data);
        if (unlikely(!res))
                return -ENOENT;

        (*release)(dev, res);
        devres_free(res);
        return 0;
}
EXPORT_SYMBOL_GPL(devres_release);

static int remove_nodes(struct device *dev,
                        struct list_head *first, struct list_head *end,
                        struct list_head *todo)
{
        struct devres_node *node, *n;
        int cnt = 0, nr_groups = 0;

        /* First pass - move normal devres entries to @todo and clear
         * devres_group colors.
         */
        node = list_entry(first, struct devres_node, entry);
        list_for_each_entry_safe_from(node, n, end, entry) {
                struct devres_group *grp;

                grp = node_to_group(node);
                if (grp) {
                        /* clear color of group markers in the first pass */
                        grp->color = 0;
                        nr_groups++;
                } else {
                        /* regular devres entry */
                        if (&node->entry == first)
                                first = first->next;
                        list_move_tail(&node->entry, todo);
                        cnt++;
                }
        }

        if (!nr_groups)
                return cnt;

        /* Second pass - Scan groups and color them.  A group gets
         * color value of two iff the group is wholly contained in
         * [current node, end). That is, for a closed group, both opening
         * and closing markers should be in the range, while just the
         * opening marker is enough for an open group.
         */
        node = list_entry(first, struct devres_node, entry);
        list_for_each_entry_safe_from(node, n, end, entry) {
                struct devres_group *grp;

                grp = node_to_group(node);
                BUG_ON(!grp || list_empty(&grp->node[0].entry));

                grp->color++;
                if (list_empty(&grp->node[1].entry))
                        grp->color++;

                BUG_ON(grp->color <= 0 || grp->color > 2);
                if (grp->color == 2) {
                        /* No need to update current node or end. The removed
                         * nodes are always before both.
                         */
                        list_move_tail(&grp->node[0].entry, todo);
                        list_del_init(&grp->node[1].entry);
                }
        }

        return cnt;
}

static void release_nodes(struct device *dev, struct list_head *todo)
{
        struct devres *dr, *tmp;

        /* Release.  Note that both devres and devres_group are
         * handled as devres in the following loop.  This is safe.
         */
        list_for_each_entry_safe_reverse(dr, tmp, todo, node.entry) {
                devres_log(dev, &dr->node, "REL");
                dr->node.release(dev, dr->data);
                kfree(dr);
        }
}

/**
 * devres_release_all - Release all managed resources
 * @dev: Device to release resources for
 *
 * Release all resources associated with @dev.  This function is
 * called on driver detach.
 */
int devres_release_all(struct device *dev)
{
        unsigned long flags;
        LIST_HEAD(todo);
        int cnt;

        /* Looks like an uninitialized device structure */
        if (WARN_ON(dev->devres_head.next == NULL))
                return -ENODEV;

        /* Nothing to release if list is empty */
        if (list_empty(&dev->devres_head))
                return 0;

        spin_lock_irqsave(&dev->devres_lock, flags);
        cnt = remove_nodes(dev, dev->devres_head.next, &dev->devres_head, &todo);
        spin_unlock_irqrestore(&dev->devres_lock, flags);

        release_nodes(dev, &todo);
        return cnt;
}

/**
 * devres_open_group - Open a new devres group
 * @dev: Device to open devres group for
 * @id: Separator ID
 * @gfp: Allocation flags
 *
 * Open a new devres group for @dev with @id.  For @id, using a
 * pointer to an object which won't be used for another group is
 * recommended.  If @id is NULL, address-wise unique ID is created.
 *
 * RETURNS:
 * ID of the new group, NULL on failure.
 */
void * devres_open_group(struct device *dev, void *id, gfp_t gfp)
{
        struct devres_group *grp;
        unsigned long flags;

        grp = kmalloc(sizeof(*grp), gfp);
        if (unlikely(!grp))
                return NULL;

        grp->node[0].release = &group_open_release;
        grp->node[1].release = &group_close_release;
        INIT_LIST_HEAD(&grp->node[0].entry);
        INIT_LIST_HEAD(&grp->node[1].entry);
        set_node_dbginfo(&grp->node[0], "grp<", 0);
        set_node_dbginfo(&grp->node[1], "grp>", 0);
        grp->id = grp;
        if (id)
                grp->id = id;

        spin_lock_irqsave(&dev->devres_lock, flags);
        add_dr(dev, &grp->node[0]);
        spin_unlock_irqrestore(&dev->devres_lock, flags);
        return grp->id;
}
EXPORT_SYMBOL_GPL(devres_open_group);

/* Find devres group with ID @id.  If @id is NULL, look for the latest. */
static struct devres_group * find_group(struct device *dev, void *id)
{
        struct devres_node *node;

        list_for_each_entry_reverse(node, &dev->devres_head, entry) {
                struct devres_group *grp;

                if (node->release != &group_open_release)
                        continue;

                grp = container_of(node, struct devres_group, node[0]);

                if (id) {
                        if (grp->id == id)
                                return grp;
                } else if (list_empty(&grp->node[1].entry))
                        return grp;
        }

        return NULL;
}

/**
 * devres_close_group - Close a devres group
 * @dev: Device to close devres group for
 * @id: ID of target group, can be NULL
 *
 * Close the group identified by @id.  If @id is NULL, the latest open
 * group is selected.
 */
void devres_close_group(struct device *dev, void *id)
{
        struct devres_group *grp;
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);

        grp = find_group(dev, id);
        if (grp)
                add_dr(dev, &grp->node[1]);
        else
                WARN_ON(1);

        spin_unlock_irqrestore(&dev->devres_lock, flags);
}
EXPORT_SYMBOL_GPL(devres_close_group);

/**
 * devres_remove_group - Remove a devres group
 * @dev: Device to remove group for
 * @id: ID of target group, can be NULL
 *
 * Remove the group identified by @id.  If @id is NULL, the latest
 * open group is selected.  Note that removing a group doesn't affect
 * any other resources.
 */
void devres_remove_group(struct device *dev, void *id)
{
        struct devres_group *grp;
        unsigned long flags;

        spin_lock_irqsave(&dev->devres_lock, flags);

        grp = find_group(dev, id);
        if (grp) {
                list_del_init(&grp->node[0].entry);
                list_del_init(&grp->node[1].entry);
                devres_log(dev, &grp->node[0], "REM");
        } else
                WARN_ON(1);

        spin_unlock_irqrestore(&dev->devres_lock, flags);

        kfree(grp);
}
EXPORT_SYMBOL_GPL(devres_remove_group);

/**
 * devres_release_group - Release resources in a devres group
 * @dev: Device to release group for
 * @id: ID of target group, can be NULL
 *
 * Release all resources in the group identified by @id.  If @id is
 * NULL, the latest open group is selected.  The selected group and
 * groups properly nested inside the selected group are removed.
 *
 * RETURNS:
 * The number of released non-group resources.
 */
int devres_release_group(struct device *dev, void *id)
{
        struct devres_group *grp;
        unsigned long flags;
        LIST_HEAD(todo);
        int cnt = 0;

        spin_lock_irqsave(&dev->devres_lock, flags);

        grp = find_group(dev, id);
        if (grp) {
                struct list_head *first = &grp->node[0].entry;
                struct list_head *end = &dev->devres_head;

                if (!list_empty(&grp->node[1].entry))
                        end = grp->node[1].entry.next;

                cnt = remove_nodes(dev, first, end, &todo);
                spin_unlock_irqrestore(&dev->devres_lock, flags);

                release_nodes(dev, &todo);
        } else {
                WARN_ON(1);
                spin_unlock_irqrestore(&dev->devres_lock, flags);
        }

        return cnt;
}
EXPORT_SYMBOL_GPL(devres_release_group);

/*
 * Custom devres actions allow inserting a simple function call
 * into the teardown sequence.
 */

struct action_devres {
        void *data;
        void (*action)(void *);
};

static int devm_action_match(struct device *dev, void *res, void *p)
{
        struct action_devres *devres = res;
        struct action_devres *target = p;

        return devres->action == target->action &&
               devres->data == target->data;
}

static void devm_action_release(struct device *dev, void *res)
{
        struct action_devres *devres = res;

        devres->action(devres->data);
}

/**
 * __devm_add_action() - add a custom action to list of managed resources
 * @dev: Device that owns the action
 * @action: Function that should be called
 * @data: Pointer to data passed to @action implementation
 * @name: Name of the resource (for debugging purposes)
 *
 * This adds a custom action to the list of managed resources so that
 * it gets executed as part of standard resource unwinding.
 */
int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name)
{
        struct action_devres *devres;

        devres = __devres_alloc_node(devm_action_release, sizeof(struct action_devres),
                                     GFP_KERNEL, NUMA_NO_NODE, name);
        if (!devres)
                return -ENOMEM;

        devres->data = data;
        devres->action = action;

        devres_add(dev, devres);
        return 0;
}
EXPORT_SYMBOL_GPL(__devm_add_action);

/**
 * devm_remove_action() - removes previously added custom action
 * @dev: Device that owns the action
 * @action: Function implementing the action
 * @data: Pointer to data passed to @action implementation
 *
 * Removes instance of @action previously added by devm_add_action().
 * Both action and data should match one of the existing entries.
 */
void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
{
        struct action_devres devres = {
                .data = data,
                .action = action,
        };

        WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
                               &devres));
}
EXPORT_SYMBOL_GPL(devm_remove_action);

/**
 * devm_release_action() - release previously added custom action
 * @dev: Device that owns the action
 * @action: Function implementing the action
 * @data: Pointer to data passed to @action implementation
 *
 * Releases and removes instance of @action previously added by
 * devm_add_action().  Both action and data should match one of the
 * existing entries.
 */
void devm_release_action(struct device *dev, void (*action)(void *), void *data)
{
        struct action_devres devres = {
                .data = data,
                .action = action,
        };

        WARN_ON(devres_release(dev, devm_action_release, devm_action_match,
                               &devres));

}
EXPORT_SYMBOL_GPL(devm_release_action);

/*
 * Managed kmalloc/kfree
 */
static void devm_kmalloc_release(struct device *dev, void *res)
{
        /* noop */
}

static int devm_kmalloc_match(struct device *dev, void *res, void *data)
{
        return res == data;
}

/**
 * devm_kmalloc - Resource-managed kmalloc
 * @dev: Device to allocate memory for
 * @size: Allocation size
 * @gfp: Allocation gfp flags
 *
 * Managed kmalloc.  Memory allocated with this function is
 * automatically freed on driver detach.  Like all other devres
 * resources, guaranteed alignment is unsigned long long.
 *
 * RETURNS:
 * Pointer to allocated memory on success, NULL on failure.
 */
void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
{
        struct devres *dr;

        if (unlikely(!size))
                return ZERO_SIZE_PTR;

        /* use raw alloc_dr for kmalloc caller tracing */
        dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
        if (unlikely(!dr))
                return NULL;

        /*
         * This is named devm_kzalloc_release for historical reasons
         * The initial implementation did not support kmalloc, only kzalloc
         */
        set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
        devres_add(dev, dr->data);
        return dr->data;
}
EXPORT_SYMBOL_GPL(devm_kmalloc);

/**
 * devm_krealloc - Resource-managed krealloc()
 * @dev: Device to re-allocate memory for
 * @ptr: Pointer to the memory chunk to re-allocate
 * @new_size: New allocation size
 * @gfp: Allocation gfp flags
 *
 * Managed krealloc(). Resizes the memory chunk allocated with devm_kmalloc().
 * Behaves similarly to regular krealloc(): if @ptr is NULL or ZERO_SIZE_PTR,
 * it's the equivalent of devm_kmalloc(). If new_size is zero, it frees the
 * previously allocated memory and returns ZERO_SIZE_PTR. This function doesn't
 * change the order in which the release callback for the re-alloc'ed devres
 * will be called (except when falling back to devm_kmalloc() or when freeing
 * resources when new_size is zero). The contents of the memory are preserved
 * up to the lesser of new and old sizes.
 */
void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp)
{
        size_t total_new_size, total_old_size;
        struct devres *old_dr, *new_dr;
        unsigned long flags;

        if (unlikely(!new_size)) {
                devm_kfree(dev, ptr);
                return ZERO_SIZE_PTR;
        }

        if (unlikely(ZERO_OR_NULL_PTR(ptr)))
                return devm_kmalloc(dev, new_size, gfp);

        if (WARN_ON(is_kernel_rodata((unsigned long)ptr)))
                /*
                 * We cannot reliably realloc a const string returned by
                 * devm_kstrdup_const().
                 */
                return NULL;

        if (!check_dr_size(new_size, &total_new_size))
                return NULL;

        total_old_size = ksize(container_of(ptr, struct devres, data));
        if (total_old_size == 0) {
                WARN(1, "Pointer doesn't point to dynamically allocated memory.");
                return NULL;
        }

        /*
         * If new size is smaller or equal to the actual number of bytes
         * allocated previously - just return the same pointer.
         */
        if (total_new_size <= total_old_size)
                return ptr;

        /*
         * Otherwise: allocate new, larger chunk. We need to allocate before
         * taking the lock as most probably the caller uses GFP_KERNEL.
         */
        new_dr = alloc_dr(devm_kmalloc_release,
                          total_new_size, gfp, dev_to_node(dev));
        if (!new_dr)
                return NULL;

        /*
         * The spinlock protects the linked list against concurrent
         * modifications but not the resource itself.
         */
        spin_lock_irqsave(&dev->devres_lock, flags);

        old_dr = find_dr(dev, devm_kmalloc_release, devm_kmalloc_match, ptr);
        if (!old_dr) {
                spin_unlock_irqrestore(&dev->devres_lock, flags);
                kfree(new_dr);
                WARN(1, "Memory chunk not managed or managed by a different device.");
                return NULL;
        }

        replace_dr(dev, &old_dr->node, &new_dr->node);

        spin_unlock_irqrestore(&dev->devres_lock, flags);

        /*
         * We can copy the memory contents after releasing the lock as we're
         * no longer modifying the list links.
         */
        memcpy(new_dr->data, old_dr->data,
               total_old_size - offsetof(struct devres, data));
        /*
         * Same for releasing the old devres - it's now been removed from the
         * list. This is also the reason why we must not use devm_kfree() - the
         * links are no longer valid.
         */
        kfree(old_dr);

        return new_dr->data;
}
EXPORT_SYMBOL_GPL(devm_krealloc);

/**
 * devm_kstrdup - Allocate resource managed space and
 *                copy an existing string into that.
 * @dev: Device to allocate memory for
 * @s: the string to duplicate
 * @gfp: the GFP mask used in the devm_kmalloc() call when
 *       allocating memory
 * RETURNS:
 * Pointer to allocated string on success, NULL on failure.
 */
char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
{
        size_t size;
        char *buf;

        if (!s)
                return NULL;

        size = strlen(s) + 1;
        buf = devm_kmalloc(dev, size, gfp);
        if (buf)
                memcpy(buf, s, size);
        return buf;
}
EXPORT_SYMBOL_GPL(devm_kstrdup);

/**
 * devm_kstrdup_const - resource managed conditional string duplication
 * @dev: device for which to duplicate the string
 * @s: the string to duplicate
 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 *
 * Strings allocated by devm_kstrdup_const will be automatically freed when
 * the associated device is detached.
 *
 * RETURNS:
 * Source string if it is in .rodata section otherwise it falls back to
 * devm_kstrdup.
 */
const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp)
{
        if (is_kernel_rodata((unsigned long)s))
                return s;

        return devm_kstrdup(dev, s, gfp);
}
EXPORT_SYMBOL_GPL(devm_kstrdup_const);

/**
 * devm_kvasprintf - Allocate resource managed space and format a string
 *                     into that.
 * @dev: Device to allocate memory for
 * @gfp: the GFP mask used in the devm_kmalloc() call when
 *       allocating memory
 * @fmt: The printf()-style format string
 * @ap: Arguments for the format string
 * RETURNS:
 * Pointer to allocated string on success, NULL on failure.
 */
char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
                      va_list ap)
{
        unsigned int len;
        char *p;
        va_list aq;

        va_copy(aq, ap);
        len = vsnprintf(NULL, 0, fmt, aq);
        va_end(aq);

        p = devm_kmalloc(dev, len+1, gfp);
        if (!p)
                return NULL;

        vsnprintf(p, len+1, fmt, ap);

        return p;
}
EXPORT_SYMBOL(devm_kvasprintf);

/**
 * devm_kasprintf - Allocate resource managed space and format a string
 *                    into that.
 * @dev: Device to allocate memory for
 * @gfp: the GFP mask used in the devm_kmalloc() call when
 *       allocating memory
 * @fmt: The printf()-style format string
 * @...: Arguments for the format string
 * RETURNS:
 * Pointer to allocated string on success, NULL on failure.
 */
char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...)
{
        va_list ap;
        char *p;

        va_start(ap, fmt);
        p = devm_kvasprintf(dev, gfp, fmt, ap);
        va_end(ap);

        return p;
}
EXPORT_SYMBOL_GPL(devm_kasprintf);

/**
 * devm_kfree - Resource-managed kfree
 * @dev: Device this memory belongs to
 * @p: Memory to free
 *
 * Free memory allocated with devm_kmalloc().
 */
void devm_kfree(struct device *dev, const void *p)
{
        int rc;

        /*
         * Special cases: pointer to a string in .rodata returned by
         * devm_kstrdup_const() or NULL/ZERO ptr.
         */
        if (unlikely(is_kernel_rodata((unsigned long)p) || ZERO_OR_NULL_PTR(p)))
                return;

        rc = devres_destroy(dev, devm_kmalloc_release,
                            devm_kmalloc_match, (void *)p);
        WARN_ON(rc);
}
EXPORT_SYMBOL_GPL(devm_kfree);

/**
 * devm_kmemdup - Resource-managed kmemdup
 * @dev: Device this memory belongs to
 * @src: Memory region to duplicate
 * @len: Memory region length
 * @gfp: GFP mask to use
 *
 * Duplicate region of a memory using resource managed kmalloc
 */
void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
{
        void *p;

        p = devm_kmalloc(dev, len, gfp);
        if (p)
                memcpy(p, src, len);

        return p;
}
EXPORT_SYMBOL_GPL(devm_kmemdup);

struct pages_devres {
        unsigned long addr;
        unsigned int order;
};

static int devm_pages_match(struct device *dev, void *res, void *p)
{
        struct pages_devres *devres = res;
        struct pages_devres *target = p;

        return devres->addr == target->addr;
}

static void devm_pages_release(struct device *dev, void *res)
{
        struct pages_devres *devres = res;

        free_pages(devres->addr, devres->order);
}

/**
 * devm_get_free_pages - Resource-managed __get_free_pages
 * @dev: Device to allocate memory for
 * @gfp_mask: Allocation gfp flags
 * @order: Allocation size is (1 << order) pages
 *
 * Managed get_free_pages.  Memory allocated with this function is
 * automatically freed on driver detach.
 *
 * RETURNS:
 * Address of allocated memory on success, 0 on failure.
 */

unsigned long devm_get_free_pages(struct device *dev,
                                  gfp_t gfp_mask, unsigned int order)
{
        struct pages_devres *devres;
        unsigned long addr;

        addr = __get_free_pages(gfp_mask, order);

        if (unlikely(!addr))
                return 0;

        devres = devres_alloc(devm_pages_release,
                              sizeof(struct pages_devres), GFP_KERNEL);
        if (unlikely(!devres)) {
                free_pages(addr, order);
                return 0;
        }

        devres->addr = addr;
        devres->order = order;

        devres_add(dev, devres);
        return addr;
}
EXPORT_SYMBOL_GPL(devm_get_free_pages);

/**
 * devm_free_pages - Resource-managed free_pages
 * @dev: Device this memory belongs to
 * @addr: Memory to free
 *
 * Free memory allocated with devm_get_free_pages(). Unlike free_pages,
 * there is no need to supply the @order.
 */
void devm_free_pages(struct device *dev, unsigned long addr)
{
        struct pages_devres devres = { .addr = addr };

        WARN_ON(devres_release(dev, devm_pages_release, devm_pages_match,
                               &devres));
}
EXPORT_SYMBOL_GPL(devm_free_pages);

static void devm_percpu_release(struct device *dev, void *pdata)
{
        void __percpu *p;

        p = *(void __percpu **)pdata;
        free_percpu(p);
}

static int devm_percpu_match(struct device *dev, void *data, void *p)
{
        struct devres *devr = container_of(data, struct devres, data);

        return *(void **)devr->data == p;
}

/**
 * __devm_alloc_percpu - Resource-managed alloc_percpu
 * @dev: Device to allocate per-cpu memory for
 * @size: Size of per-cpu memory to allocate
 * @align: Alignment of per-cpu memory to allocate
 *
 * Managed alloc_percpu. Per-cpu memory allocated with this function is
 * automatically freed on driver detach.
 *
 * RETURNS:
 * Pointer to allocated memory on success, NULL on failure.
 */
void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
                size_t align)
{
        void *p;
        void __percpu *pcpu;

        pcpu = __alloc_percpu(size, align);
        if (!pcpu)
                return NULL;

        p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL);
        if (!p) {
                free_percpu(pcpu);
                return NULL;
        }

        *(void __percpu **)p = pcpu;

        devres_add(dev, p);

        return pcpu;
}
EXPORT_SYMBOL_GPL(__devm_alloc_percpu);

/**
 * devm_free_percpu - Resource-managed free_percpu
 * @dev: Device this memory belongs to
 * @pdata: Per-cpu memory to free
 *
 * Free memory allocated with devm_alloc_percpu().
 */
void devm_free_percpu(struct device *dev, void __percpu *pdata)
{
        WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
                               (__force void *)pdata));
}
EXPORT_SYMBOL_GPL(devm_free_percpu);



















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* I2C message transfer tracepoints
 *
 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM i2c

#if !defined(_TRACE_I2C_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_I2C_H

#include <linux/i2c.h>
#include <linux/tracepoint.h>

/*
 * drivers/i2c/i2c-core-base.c
 */
extern int i2c_transfer_trace_reg(void);
extern void i2c_transfer_trace_unreg(void);

/*
 * __i2c_transfer() write request
 */
TRACE_EVENT_FN(i2c_write,
               TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg,
                        int num),
               TP_ARGS(adap, msg, num),
               TP_STRUCT__entry(
                       __field(int,        adapter_nr                )
                       __field(__u16,        msg_nr                        )
                       __field(__u16,        addr                        )
                       __field(__u16,        flags                        )
                       __field(__u16,        len                        )
                       __dynamic_array(__u8, buf, msg->len)        ),
               TP_fast_assign(
                       __entry->adapter_nr = adap->nr;
                       __entry->msg_nr = num;
                       __entry->addr = msg->addr;
                       __entry->flags = msg->flags;
                       __entry->len = msg->len;
                       memcpy(__get_dynamic_array(buf), msg->buf, msg->len);
                              ),
               TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]",
                         __entry->adapter_nr,
                         __entry->msg_nr,
                         __entry->addr,
                         __entry->flags,
                         __entry->len,
                         __entry->len, __get_dynamic_array(buf)
                         ),
               i2c_transfer_trace_reg,
               i2c_transfer_trace_unreg);

/*
 * __i2c_transfer() read request
 */
TRACE_EVENT_FN(i2c_read,
               TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg,
                        int num),
               TP_ARGS(adap, msg, num),
               TP_STRUCT__entry(
                       __field(int,        adapter_nr                )
                       __field(__u16,        msg_nr                        )
                       __field(__u16,        addr                        )
                       __field(__u16,        flags                        )
                       __field(__u16,        len                        )
                                ),
               TP_fast_assign(
                       __entry->adapter_nr = adap->nr;
                       __entry->msg_nr = num;
                       __entry->addr = msg->addr;
                       __entry->flags = msg->flags;
                       __entry->len = msg->len;
                              ),
               TP_printk("i2c-%d #%u a=%03x f=%04x l=%u",
                         __entry->adapter_nr,
                         __entry->msg_nr,
                         __entry->addr,
                         __entry->flags,
                         __entry->len
                         ),
               i2c_transfer_trace_reg,
                       i2c_transfer_trace_unreg);

/*
 * __i2c_transfer() read reply
 */
TRACE_EVENT_FN(i2c_reply,
               TP_PROTO(const struct i2c_adapter *adap, const struct i2c_msg *msg,
                        int num),
               TP_ARGS(adap, msg, num),
               TP_STRUCT__entry(
                       __field(int,        adapter_nr                )
                       __field(__u16,        msg_nr                        )
                       __field(__u16,        addr                        )
                       __field(__u16,        flags                        )
                       __field(__u16,        len                        )
                       __dynamic_array(__u8, buf, msg->len)        ),
               TP_fast_assign(
                       __entry->adapter_nr = adap->nr;
                       __entry->msg_nr = num;
                       __entry->addr = msg->addr;
                       __entry->flags = msg->flags;
                       __entry->len = msg->len;
                       memcpy(__get_dynamic_array(buf), msg->buf, msg->len);
                              ),
               TP_printk("i2c-%d #%u a=%03x f=%04x l=%u [%*phD]",
                         __entry->adapter_nr,
                         __entry->msg_nr,
                         __entry->addr,
                         __entry->flags,
                         __entry->len,
                         __entry->len, __get_dynamic_array(buf)
                         ),
               i2c_transfer_trace_reg,
               i2c_transfer_trace_unreg);

/*
 * __i2c_transfer() result
 */
TRACE_EVENT_FN(i2c_result,
               TP_PROTO(const struct i2c_adapter *adap, int num, int ret),
               TP_ARGS(adap, num, ret),
               TP_STRUCT__entry(
                       __field(int,        adapter_nr                )
                       __field(__u16,        nr_msgs                        )
                       __field(__s16,        ret                        )
                                ),
               TP_fast_assign(
                       __entry->adapter_nr = adap->nr;
                       __entry->nr_msgs = num;
                       __entry->ret = ret;
                              ),
               TP_printk("i2c-%d n=%u ret=%d",
                         __entry->adapter_nr,
                         __entry->nr_msgs,
                         __entry->ret
                         ),
               i2c_transfer_trace_reg,
               i2c_transfer_trace_unreg);

#endif /* _TRACE_I2C_H */

/* This part must be outside protection */
#include <trace/define_trace.h>




















































































































































































































































































   10 




   11 































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2013 Politecnico di Torino, Italy
 *                    TORSEC group -- https://security.polito.it
 *
 * Author: Roberto Sassu <roberto.sassu@polito.it>
 *
 * File: ima_template.c
 *      Helpers to manage template descriptors.
 */

#include <linux/rculist.h>
#include "ima.h"
#include "ima_template_lib.h"

enum header_fields { HDR_PCR, HDR_DIGEST, HDR_TEMPLATE_NAME,
                     HDR_TEMPLATE_DATA, HDR__LAST };

static struct ima_template_desc builtin_templates[] = {
        {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
        {.name = "ima-ng", .fmt = "d-ng|n-ng"},
        {.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
        {.name = "ima-ngv2", .fmt = "d-ngv2|n-ng"},
        {.name = "ima-sigv2", .fmt = "d-ngv2|n-ng|sig"},
        {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
        {.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"},
        {.name = "evm-sig",
         .fmt = "d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode"},
        {.name = "", .fmt = ""},        /* placeholder for a custom format */
};

static LIST_HEAD(defined_templates);
static DEFINE_SPINLOCK(template_list);
static int template_setup_done;

static const struct ima_template_field supported_fields[] = {
        {.field_id = "d", .field_init = ima_eventdigest_init,
         .field_show = ima_show_template_digest},
        {.field_id = "n", .field_init = ima_eventname_init,
         .field_show = ima_show_template_string},
        {.field_id = "d-ng", .field_init = ima_eventdigest_ng_init,
         .field_show = ima_show_template_digest_ng},
        {.field_id = "d-ngv2", .field_init = ima_eventdigest_ngv2_init,
         .field_show = ima_show_template_digest_ngv2},
        {.field_id = "n-ng", .field_init = ima_eventname_ng_init,
         .field_show = ima_show_template_string},
        {.field_id = "sig", .field_init = ima_eventsig_init,
         .field_show = ima_show_template_sig},
        {.field_id = "buf", .field_init = ima_eventbuf_init,
         .field_show = ima_show_template_buf},
        {.field_id = "d-modsig", .field_init = ima_eventdigest_modsig_init,
         .field_show = ima_show_template_digest_ng},
        {.field_id = "modsig", .field_init = ima_eventmodsig_init,
         .field_show = ima_show_template_sig},
        {.field_id = "evmsig", .field_init = ima_eventevmsig_init,
         .field_show = ima_show_template_sig},
        {.field_id = "iuid", .field_init = ima_eventinodeuid_init,
         .field_show = ima_show_template_uint},
        {.field_id = "igid", .field_init = ima_eventinodegid_init,
         .field_show = ima_show_template_uint},
        {.field_id = "imode", .field_init = ima_eventinodemode_init,
         .field_show = ima_show_template_uint},
        {.field_id = "xattrnames",
         .field_init = ima_eventinodexattrnames_init,
         .field_show = ima_show_template_string},
        {.field_id = "xattrlengths",
         .field_init = ima_eventinodexattrlengths_init,
         .field_show = ima_show_template_sig},
        {.field_id = "xattrvalues",
         .field_init = ima_eventinodexattrvalues_init,
         .field_show = ima_show_template_sig},
};

/*
 * Used when restoring measurements carried over from a kexec. 'd' and 'n' don't
 * need to be accounted for since they shouldn't be defined in the same template
 * description as 'd-ng' and 'n-ng' respectively.
 */
#define MAX_TEMPLATE_NAME_LEN \
        sizeof("d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode")

static struct ima_template_desc *ima_template;
static struct ima_template_desc *ima_buf_template;

/**
 * ima_template_has_modsig - Check whether template has modsig-related fields.
 * @ima_template: IMA template to check.
 *
 * Tells whether the given template has fields referencing a file's appended
 * signature.
 */
bool ima_template_has_modsig(const struct ima_template_desc *ima_template)
{
        int i;

        for (i = 0; i < ima_template->num_fields; i++)
                if (!strcmp(ima_template->fields[i]->field_id, "modsig") ||
                    !strcmp(ima_template->fields[i]->field_id, "d-modsig"))
                        return true;

        return false;
}

static int __init ima_template_setup(char *str)
{
        struct ima_template_desc *template_desc;
        int template_len = strlen(str);

        if (template_setup_done)
                return 1;

        if (!ima_template)
                ima_init_template_list();

        /*
         * Verify that a template with the supplied name exists.
         * If not, use CONFIG_IMA_DEFAULT_TEMPLATE.
         */
        template_desc = lookup_template_desc(str);
        if (!template_desc) {
                pr_err("template %s not found, using %s\n",
                       str, CONFIG_IMA_DEFAULT_TEMPLATE);
                return 1;
        }

        /*
         * Verify whether the current hash algorithm is supported
         * by the 'ima' template.
         */
        if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 &&
            ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) {
                pr_err("template does not support hash alg\n");
                return 1;
        }

        ima_template = template_desc;
        template_setup_done = 1;
        return 1;
}
__setup("ima_template=", ima_template_setup);

static int __init ima_template_fmt_setup(char *str)
{
        int num_templates = ARRAY_SIZE(builtin_templates);

        if (template_setup_done)
                return 1;

        if (template_desc_init_fields(str, NULL, NULL) < 0) {
                pr_err("format string '%s' not valid, using template %s\n",
                       str, CONFIG_IMA_DEFAULT_TEMPLATE);
                return 1;
        }

        builtin_templates[num_templates - 1].fmt = str;
        ima_template = builtin_templates + num_templates - 1;
        template_setup_done = 1;

        return 1;
}
__setup("ima_template_fmt=", ima_template_fmt_setup);

struct ima_template_desc *lookup_template_desc(const char *name)
{
        struct ima_template_desc *template_desc;
        int found = 0;

        rcu_read_lock();
        list_for_each_entry_rcu(template_desc, &defined_templates, list) {
                if ((strcmp(template_desc->name, name) == 0) ||
                    (strcmp(template_desc->fmt, name) == 0)) {
                        found = 1;
                        break;
                }
        }
        rcu_read_unlock();
        return found ? template_desc : NULL;
}

static const struct ima_template_field *
lookup_template_field(const char *field_id)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(supported_fields); i++)
                if (strncmp(supported_fields[i].field_id, field_id,
                            IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0)
                        return &supported_fields[i];
        return NULL;
}

static int template_fmt_size(const char *template_fmt)
{
        char c;
        int template_fmt_len = strlen(template_fmt);
        int i = 0, j = 0;

        while (i < template_fmt_len) {
                c = template_fmt[i];
                if (c == '|')
                        j++;
                i++;
        }

        return j + 1;
}

int template_desc_init_fields(const char *template_fmt,
                              const struct ima_template_field ***fields,
                              int *num_fields)
{
        const char *template_fmt_ptr;
        const struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX];
        int template_num_fields;
        int i, len;

        if (num_fields && *num_fields > 0) /* already initialized? */
                return 0;

        template_num_fields = template_fmt_size(template_fmt);

        if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX) {
                pr_err("format string '%s' contains too many fields\n",
                       template_fmt);
                return -EINVAL;
        }

        for (i = 0, template_fmt_ptr = template_fmt; i < template_num_fields;
             i++, template_fmt_ptr += len + 1) {
                char tmp_field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN + 1];

                len = strchrnul(template_fmt_ptr, '|') - template_fmt_ptr;
                if (len == 0 || len > IMA_TEMPLATE_FIELD_ID_MAX_LEN) {
                        pr_err("Invalid field with length %d\n", len);
                        return -EINVAL;
                }

                memcpy(tmp_field_id, template_fmt_ptr, len);
                tmp_field_id[len] = '\0';
                found_fields[i] = lookup_template_field(tmp_field_id);
                if (!found_fields[i]) {
                        pr_err("field '%s' not found\n", tmp_field_id);
                        return -ENOENT;
                }
        }

        if (fields && num_fields) {
                *fields = kmalloc_array(i, sizeof(**fields), GFP_KERNEL);
                if (*fields == NULL)
                        return -ENOMEM;

                memcpy(*fields, found_fields, i * sizeof(**fields));
                *num_fields = i;
        }

        return 0;
}

void ima_init_template_list(void)
{
        int i;

        if (!list_empty(&defined_templates))
                return;

        spin_lock(&template_list);
        for (i = 0; i < ARRAY_SIZE(builtin_templates); i++) {
                list_add_tail_rcu(&builtin_templates[i].list,
                                  &defined_templates);
        }
        spin_unlock(&template_list);
}

struct ima_template_desc *ima_template_desc_current(void)
{
        if (!ima_template) {
                ima_init_template_list();
                ima_template =
                    lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE);
        }
        return ima_template;
}

struct ima_template_desc *ima_template_desc_buf(void)
{
        if (!ima_buf_template) {
                ima_init_template_list();
                ima_buf_template = lookup_template_desc("ima-buf");
        }
        return ima_buf_template;
}

int __init ima_init_template(void)
{
        struct ima_template_desc *template = ima_template_desc_current();
        int result;

        result = template_desc_init_fields(template->fmt,
                                           &(template->fields),
                                           &(template->num_fields));
        if (result < 0) {
                pr_err("template %s init failed, result: %d\n",
                       (strlen(template->name) ?
                       template->name : template->fmt), result);
                return result;
        }

        template = ima_template_desc_buf();
        if (!template) {
                pr_err("Failed to get ima-buf template\n");
                return -EINVAL;
        }

        result = template_desc_init_fields(template->fmt,
                                           &(template->fields),
                                           &(template->num_fields));
        if (result < 0)
                pr_err("template %s init failed, result: %d\n",
                       (strlen(template->name) ?
                       template->name : template->fmt), result);

        return result;
}

static struct ima_template_desc *restore_template_fmt(char *template_name)
{
        struct ima_template_desc *template_desc = NULL;
        int ret;

        ret = template_desc_init_fields(template_name, NULL, NULL);
        if (ret < 0) {
                pr_err("attempting to initialize the template \"%s\" failed\n",
                        template_name);
                goto out;
        }

        template_desc = kzalloc(sizeof(*template_desc), GFP_KERNEL);
        if (!template_desc)
                goto out;

        template_desc->name = "";
        template_desc->fmt = kstrdup(template_name, GFP_KERNEL);
        if (!template_desc->fmt) {
                kfree(template_desc);
                template_desc = NULL;
                goto out;
        }

        spin_lock(&template_list);
        list_add_tail_rcu(&template_desc->list, &defined_templates);
        spin_unlock(&template_list);
out:
        return template_desc;
}

static int ima_restore_template_data(struct ima_template_desc *template_desc,
                                     void *template_data,
                                     int template_data_size,
                                     struct ima_template_entry **entry)
{
        struct tpm_digest *digests;
        int ret = 0;
        int i;

        *entry = kzalloc(struct_size(*entry, template_data,
                                     template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;

        digests = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots,
                          sizeof(*digests), GFP_NOFS);
        if (!digests) {
                kfree(*entry);
                return -ENOMEM;
        }

        (*entry)->digests = digests;

        ret = ima_parse_buf(template_data, template_data + template_data_size,
                            NULL, template_desc->num_fields,
                            (*entry)->template_data, NULL, NULL,
                            ENFORCE_FIELDS | ENFORCE_BUFEND, "template data");
        if (ret < 0) {
                kfree((*entry)->digests);
                kfree(*entry);
                return ret;
        }

        (*entry)->template_desc = template_desc;
        for (i = 0; i < template_desc->num_fields; i++) {
                struct ima_field_data *field_data = &(*entry)->template_data[i];
                u8 *data = field_data->data;

                (*entry)->template_data[i].data =
                        kzalloc(field_data->len + 1, GFP_KERNEL);
                if (!(*entry)->template_data[i].data) {
                        ret = -ENOMEM;
                        break;
                }
                memcpy((*entry)->template_data[i].data, data, field_data->len);
                (*entry)->template_data_len += sizeof(field_data->len);
                (*entry)->template_data_len += field_data->len;
        }

        if (ret < 0) {
                ima_free_template_entry(*entry);
                *entry = NULL;
        }

        return ret;
}

/* Restore the serialized binary measurement list without extending PCRs. */
int ima_restore_measurement_list(loff_t size, void *buf)
{
        char template_name[MAX_TEMPLATE_NAME_LEN];
        unsigned char zero[TPM_DIGEST_SIZE] = { 0 };

        struct ima_kexec_hdr *khdr = buf;
        struct ima_field_data hdr[HDR__LAST] = {
                [HDR_PCR] = {.len = sizeof(u32)},
                [HDR_DIGEST] = {.len = TPM_DIGEST_SIZE},
        };

        void *bufp = buf + sizeof(*khdr);
        void *bufendp;
        struct ima_template_entry *entry;
        struct ima_template_desc *template_desc;
        DECLARE_BITMAP(hdr_mask, HDR__LAST);
        unsigned long count = 0;
        int ret = 0;

        if (!buf || size < sizeof(*khdr))
                return 0;

        if (ima_canonical_fmt) {
                khdr->version = le16_to_cpu((__force __le16)khdr->version);
                khdr->count = le64_to_cpu((__force __le64)khdr->count);
                khdr->buffer_size = le64_to_cpu((__force __le64)khdr->buffer_size);
        }

        if (khdr->version != 1) {
                pr_err("attempting to restore a incompatible measurement list");
                return -EINVAL;
        }

        if (khdr->count > ULONG_MAX - 1) {
                pr_err("attempting to restore too many measurements");
                return -EINVAL;
        }

        bitmap_zero(hdr_mask, HDR__LAST);
        bitmap_set(hdr_mask, HDR_PCR, 1);
        bitmap_set(hdr_mask, HDR_DIGEST, 1);

        /*
         * ima kexec buffer prefix: version, buffer size, count
         * v1 format: pcr, digest, template-name-len, template-name,
         *              template-data-size, template-data
         */
        bufendp = buf + khdr->buffer_size;
        while ((bufp < bufendp) && (count++ < khdr->count)) {
                int enforce_mask = ENFORCE_FIELDS;

                enforce_mask |= (count == khdr->count) ? ENFORCE_BUFEND : 0;
                ret = ima_parse_buf(bufp, bufendp, &bufp, HDR__LAST, hdr, NULL,
                                    hdr_mask, enforce_mask, "entry header");
                if (ret < 0)
                        break;

                if (hdr[HDR_TEMPLATE_NAME].len >= MAX_TEMPLATE_NAME_LEN) {
                        pr_err("attempting to restore a template name that is too long\n");
                        ret = -EINVAL;
                        break;
                }

                /* template name is not null terminated */
                memcpy(template_name, hdr[HDR_TEMPLATE_NAME].data,
                       hdr[HDR_TEMPLATE_NAME].len);
                template_name[hdr[HDR_TEMPLATE_NAME].len] = 0;

                if (strcmp(template_name, "ima") == 0) {
                        pr_err("attempting to restore an unsupported template \"%s\" failed\n",
                               template_name);
                        ret = -EINVAL;
                        break;
                }

                template_desc = lookup_template_desc(template_name);
                if (!template_desc) {
                        template_desc = restore_template_fmt(template_name);
                        if (!template_desc)
                                break;
                }

                /*
                 * Only the running system's template format is initialized
                 * on boot.  As needed, initialize the other template formats.
                 */
                ret = template_desc_init_fields(template_desc->fmt,
                                                &(template_desc->fields),
                                                &(template_desc->num_fields));
                if (ret < 0) {
                        pr_err("attempting to restore the template fmt \"%s\" failed\n",
                               template_desc->fmt);
                        ret = -EINVAL;
                        break;
                }

                ret = ima_restore_template_data(template_desc,
                                                hdr[HDR_TEMPLATE_DATA].data,
                                                hdr[HDR_TEMPLATE_DATA].len,
                                                &entry);
                if (ret < 0)
                        break;

                if (memcmp(hdr[HDR_DIGEST].data, zero, sizeof(zero))) {
                        ret = ima_calc_field_array_hash(
                                                &entry->template_data[0],
                                                entry);
                        if (ret < 0) {
                                pr_err("cannot calculate template digest\n");
                                ret = -EINVAL;
                                break;
                        }
                }

                entry->pcr = !ima_canonical_fmt ? *(u32 *)(hdr[HDR_PCR].data) :
                             le32_to_cpu(*(__le32 *)(hdr[HDR_PCR].data));
                ret = ima_restore_measurement_entry(entry);
                if (ret < 0)
                        break;

        }
        return ret;
}



























































































































































































































































































































































































































  242 










  254 



















































































































































































































































































































    1 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
// SPDX-License-Identifier: GPL-2.0+
/*
 * Copyright (c) 2001-2002 by David Brownell
 */

#ifndef __USB_CORE_HCD_H
#define __USB_CORE_HCD_H

#ifdef __KERNEL__

#include <linux/rwsem.h>
#include <linux/interrupt.h>
#include <linux/idr.h>

#define MAX_TOPO_LEVEL                6

/* This file contains declarations of usbcore internals that are mostly
 * used or exposed by Host Controller Drivers.
 */

/*
 * USB Packet IDs (PIDs)
 */
#define USB_PID_EXT                        0xf0        /* USB 2.0 LPM ECN */
#define USB_PID_OUT                        0xe1
#define USB_PID_ACK                        0xd2
#define USB_PID_DATA0                        0xc3
#define USB_PID_PING                        0xb4        /* USB 2.0 */
#define USB_PID_SOF                        0xa5
#define USB_PID_NYET                        0x96        /* USB 2.0 */
#define USB_PID_DATA2                        0x87        /* USB 2.0 */
#define USB_PID_SPLIT                        0x78        /* USB 2.0 */
#define USB_PID_IN                        0x69
#define USB_PID_NAK                        0x5a
#define USB_PID_DATA1                        0x4b
#define USB_PID_PREAMBLE                0x3c        /* Token mode */
#define USB_PID_ERR                        0x3c        /* USB 2.0: handshake mode */
#define USB_PID_SETUP                        0x2d
#define USB_PID_STALL                        0x1e
#define USB_PID_MDATA                        0x0f        /* USB 2.0 */

/*-------------------------------------------------------------------------*/

/*
 * USB Host Controller Driver (usb_hcd) framework
 *
 * Since "struct usb_bus" is so thin, you can't share much code in it.
 * This framework is a layer over that, and should be more shareable.
 */

/*-------------------------------------------------------------------------*/

struct giveback_urb_bh {
        bool running;
        bool high_prio;
        spinlock_t lock;
        struct list_head  head;
        struct work_struct bh;
        struct usb_host_endpoint *completing_ep;
};

enum usb_dev_authorize_policy {
        USB_DEVICE_AUTHORIZE_NONE        = 0,
        USB_DEVICE_AUTHORIZE_ALL        = 1,
        USB_DEVICE_AUTHORIZE_INTERNAL        = 2,
};

struct usb_hcd {

        /*
         * housekeeping
         */
        struct usb_bus                self;                /* hcd is-a bus */
        struct kref                kref;                /* reference counter */

        const char                *product_desc;        /* product/vendor string */
        int                        speed;                /* Speed for this roothub.
                                                 * May be different from
                                                 * hcd->driver->flags & HCD_MASK
                                                 */
        char                        irq_descr[24];        /* driver + bus # */

        struct timer_list        rh_timer;        /* drives root-hub polling */
        struct urb                *status_urb;        /* the current status urb */
#ifdef CONFIG_PM
        struct work_struct        wakeup_work;        /* for remote wakeup */
#endif
        struct work_struct        died_work;        /* for when the device dies */

        /*
         * hardware info/state
         */
        const struct hc_driver        *driver;        /* hw-specific hooks */

        /*
         * OTG and some Host controllers need software interaction with phys;
         * other external phys should be software-transparent
         */
        struct usb_phy                *usb_phy;
        struct usb_phy_roothub        *phy_roothub;

        /* Flags that need to be manipulated atomically because they can
         * change while the host controller is running.  Always use
         * set_bit() or clear_bit() to change their values.
         */
        unsigned long                flags;
#define HCD_FLAG_HW_ACCESSIBLE                0        /* at full power */
#define HCD_FLAG_POLL_RH                2        /* poll for rh status? */
#define HCD_FLAG_POLL_PENDING                3        /* status has changed? */
#define HCD_FLAG_WAKEUP_PENDING                4        /* root hub is resuming? */
#define HCD_FLAG_RH_RUNNING                5        /* root hub is running? */
#define HCD_FLAG_DEAD                        6        /* controller has died? */
#define HCD_FLAG_INTF_AUTHORIZED        7        /* authorize interfaces? */
#define HCD_FLAG_DEFER_RH_REGISTER        8        /* Defer roothub registration */

        /* The flags can be tested using these macros; they are likely to
         * be slightly faster than test_bit().
         */
#define HCD_HW_ACCESSIBLE(hcd)        ((hcd)->flags & (1U << HCD_FLAG_HW_ACCESSIBLE))
#define HCD_POLL_RH(hcd)        ((hcd)->flags & (1U << HCD_FLAG_POLL_RH))
#define HCD_POLL_PENDING(hcd)        ((hcd)->flags & (1U << HCD_FLAG_POLL_PENDING))
#define HCD_WAKEUP_PENDING(hcd)        ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
#define HCD_RH_RUNNING(hcd)        ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
#define HCD_DEAD(hcd)                ((hcd)->flags & (1U << HCD_FLAG_DEAD))
#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER))

        /*
         * Specifies if interfaces are authorized by default
         * or they require explicit user space authorization; this bit is
         * settable through /sys/class/usb_host/X/interface_authorized_default
         */
#define HCD_INTF_AUTHORIZED(hcd) \
        ((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED))

        /*
         * Specifies if devices are authorized by default
         * or they require explicit user space authorization; this bit is
         * settable through /sys/class/usb_host/X/authorized_default
         */
        enum usb_dev_authorize_policy dev_policy;

        /* Flags that get set only during HCD registration or removal. */
        unsigned                rh_registered:1;/* is root hub registered? */
        unsigned                rh_pollable:1;        /* may we poll the root hub? */
        unsigned                msix_enabled:1;        /* driver has MSI-X enabled? */
        unsigned                msi_enabled:1;        /* driver has MSI enabled? */
        /*
         * do not manage the PHY state in the HCD core, instead let the driver
         * handle this (for example if the PHY can only be turned on after a
         * specific event)
         */
        unsigned                skip_phy_initialization:1;

        /* The next flag is a stopgap, to be removed when all the HCDs
         * support the new root-hub polling mechanism. */
        unsigned                uses_new_polling:1;
        unsigned                has_tt:1;        /* Integrated TT in root hub */
        unsigned                amd_resume_bug:1; /* AMD remote wakeup quirk */
        unsigned                can_do_streams:1; /* HC supports streams */
        unsigned                tpl_support:1; /* OTG & EH TPL support */
        unsigned                cant_recv_wakeups:1;
                        /* wakeup requests from downstream aren't received */

        unsigned int                irq;                /* irq allocated */
        void __iomem                *regs;                /* device memory/io */
        resource_size_t                rsrc_start;        /* memory/io resource start */
        resource_size_t                rsrc_len;        /* memory/io resource length */
        unsigned                power_budget;        /* in mA, 0 = no limit */

        struct giveback_urb_bh  high_prio_bh;
        struct giveback_urb_bh  low_prio_bh;

        /* bandwidth_mutex should be taken before adding or removing
         * any new bus bandwidth constraints:
         *   1. Before adding a configuration for a new device.
         *   2. Before removing the configuration to put the device into
         *      the addressed state.
         *   3. Before selecting a different configuration.
         *   4. Before selecting an alternate interface setting.
         *
         * bandwidth_mutex should be dropped after a successful control message
         * to the device, or resetting the bandwidth after a failed attempt.
         */
        struct mutex                *address0_mutex;
        struct mutex                *bandwidth_mutex;
        struct usb_hcd                *shared_hcd;
        struct usb_hcd                *primary_hcd;


#define HCD_BUFFER_POOLS        4
        struct dma_pool                *pool[HCD_BUFFER_POOLS];

        int                        state;
#        define        __ACTIVE                0x01
#        define        __SUSPEND                0x04
#        define        __TRANSIENT                0x80

#        define        HC_STATE_HALT                0
#        define        HC_STATE_RUNNING        (__ACTIVE)
#        define        HC_STATE_QUIESCING        (__SUSPEND|__TRANSIENT|__ACTIVE)
#        define        HC_STATE_RESUMING        (__SUSPEND|__TRANSIENT)
#        define        HC_STATE_SUSPENDED        (__SUSPEND)

#define        HC_IS_RUNNING(state) ((state) & __ACTIVE)
#define        HC_IS_SUSPENDED(state) ((state) & __SUSPEND)

        /* memory pool for HCs having local memory, or %NULL */
        struct gen_pool         *localmem_pool;

        /* more shared queuing code would be good; it should support
         * smarter scheduling, handle transaction translators, etc;
         * input size of periodic table to an interrupt scheduler.
         * (ohci 32, uhci 1024, ehci 256/512/1024).
         */

        /* The HC driver's private data is stored at the end of
         * this structure.
         */
        unsigned long hcd_priv[]
                        __attribute__ ((aligned(sizeof(s64))));
};

/* 2.4 does this a bit differently ... */
static inline struct usb_bus *hcd_to_bus(struct usb_hcd *hcd)
{
        return &hcd->self;
}

static inline struct usb_hcd *bus_to_hcd(struct usb_bus *bus)
{
        return container_of(bus, struct usb_hcd, self);
}

/*-------------------------------------------------------------------------*/


struct hc_driver {
        const char        *description;        /* "ehci-hcd" etc */
        const char        *product_desc;        /* product/vendor string */
        size_t                hcd_priv_size;        /* size of private data */

        /* irq handler */
        irqreturn_t        (*irq) (struct usb_hcd *hcd);

        int        flags;
#define        HCD_MEMORY        0x0001                /* HC regs use memory (else I/O) */
#define        HCD_DMA                0x0002                /* HC uses DMA */
#define        HCD_SHARED        0x0004                /* Two (or more) usb_hcds share HW */
#define        HCD_USB11        0x0010                /* USB 1.1 */
#define        HCD_USB2        0x0020                /* USB 2.0 */
#define        HCD_USB3        0x0040                /* USB 3.0 */
#define        HCD_USB31        0x0050                /* USB 3.1 */
#define        HCD_USB32        0x0060                /* USB 3.2 */
#define        HCD_MASK        0x0070
#define        HCD_BH                0x0100                /* URB complete in BH context */

        /* called to init HCD and root hub */
        int        (*reset) (struct usb_hcd *hcd);
        int        (*start) (struct usb_hcd *hcd);

        /* NOTE:  these suspend/resume calls relate to the HC as
         * a whole, not just the root hub; they're for PCI bus glue.
         */
        /* called after suspending the hub, before entering D3 etc */
        int        (*pci_suspend)(struct usb_hcd *hcd, bool do_wakeup);

        /* called after entering D0 (etc), before resuming the hub */
        int        (*pci_resume)(struct usb_hcd *hcd, pm_message_t state);

        /* called just before hibernate final D3 state, allows host to poweroff parts */
        int        (*pci_poweroff_late)(struct usb_hcd *hcd, bool do_wakeup);

        /* cleanly make HCD stop writing memory and doing I/O */
        void        (*stop) (struct usb_hcd *hcd);

        /* shutdown HCD */
        void        (*shutdown) (struct usb_hcd *hcd);

        /* return current frame number */
        int        (*get_frame_number) (struct usb_hcd *hcd);

        /* manage i/o requests, device state */
        int        (*urb_enqueue)(struct usb_hcd *hcd,
                                struct urb *urb, gfp_t mem_flags);
        int        (*urb_dequeue)(struct usb_hcd *hcd,
                                struct urb *urb, int status);

        /*
         * (optional) these hooks allow an HCD to override the default DMA
         * mapping and unmapping routines.  In general, they shouldn't be
         * necessary unless the host controller has special DMA requirements,
         * such as alignment constraints.  If these are not specified, the
         * general usb_hcd_(un)?map_urb_for_dma functions will be used instead
         * (and it may be a good idea to call these functions in your HCD
         * implementation)
         */
        int        (*map_urb_for_dma)(struct usb_hcd *hcd, struct urb *urb,
                                   gfp_t mem_flags);
        void    (*unmap_urb_for_dma)(struct usb_hcd *hcd, struct urb *urb);

        /* hw synch, freeing endpoint resources that urb_dequeue can't */
        void        (*endpoint_disable)(struct usb_hcd *hcd,
                        struct usb_host_endpoint *ep);

        /* (optional) reset any endpoint state such as sequence number
           and current window */
        void        (*endpoint_reset)(struct usb_hcd *hcd,
                        struct usb_host_endpoint *ep);

        /* root hub support */
        int        (*hub_status_data) (struct usb_hcd *hcd, char *buf);
        int        (*hub_control) (struct usb_hcd *hcd,
                                u16 typeReq, u16 wValue, u16 wIndex,
                                char *buf, u16 wLength);
        int        (*bus_suspend)(struct usb_hcd *);
        int        (*bus_resume)(struct usb_hcd *);
        int        (*start_port_reset)(struct usb_hcd *, unsigned port_num);
        unsigned long        (*get_resuming_ports)(struct usb_hcd *);

                /* force handover of high-speed port to full-speed companion */
        void        (*relinquish_port)(struct usb_hcd *, int);
                /* has a port been handed over to a companion? */
        int        (*port_handed_over)(struct usb_hcd *, int);

                /* CLEAR_TT_BUFFER completion callback */
        void        (*clear_tt_buffer_complete)(struct usb_hcd *,
                                struct usb_host_endpoint *);

        /* xHCI specific functions */
                /* Called by usb_alloc_dev to alloc HC device structures */
        int        (*alloc_dev)(struct usb_hcd *, struct usb_device *);
                /* Called by usb_disconnect to free HC device structures */
        void        (*free_dev)(struct usb_hcd *, struct usb_device *);
        /* Change a group of bulk endpoints to support multiple stream IDs */
        int        (*alloc_streams)(struct usb_hcd *hcd, struct usb_device *udev,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                unsigned int num_streams, gfp_t mem_flags);
        /* Reverts a group of bulk endpoints back to not using stream IDs.
         * Can fail if we run out of memory.
         */
        int        (*free_streams)(struct usb_hcd *hcd, struct usb_device *udev,
                struct usb_host_endpoint **eps, unsigned int num_eps,
                gfp_t mem_flags);

        /* Bandwidth computation functions */
        /* Note that add_endpoint() can only be called once per endpoint before
         * check_bandwidth() or reset_bandwidth() must be called.
         * drop_endpoint() can only be called once per endpoint also.
         * A call to xhci_drop_endpoint() followed by a call to
         * xhci_add_endpoint() will add the endpoint to the schedule with
         * possibly new parameters denoted by a different endpoint descriptor
         * in usb_host_endpoint.  A call to xhci_add_endpoint() followed by a
         * call to xhci_drop_endpoint() is not allowed.
         */
                /* Allocate endpoint resources and add them to a new schedule */
        int        (*add_endpoint)(struct usb_hcd *, struct usb_device *,
                                struct usb_host_endpoint *);
                /* Drop an endpoint from a new schedule */
        int        (*drop_endpoint)(struct usb_hcd *, struct usb_device *,
                                 struct usb_host_endpoint *);
                /* Check that a new hardware configuration, set using
                 * endpoint_enable and endpoint_disable, does not exceed bus
                 * bandwidth.  This must be called before any set configuration
                 * or set interface requests are sent to the device.
                 */
        int        (*check_bandwidth)(struct usb_hcd *, struct usb_device *);
                /* Reset the device schedule to the last known good schedule,
                 * which was set from a previous successful call to
                 * check_bandwidth().  This reverts any add_endpoint() and
                 * drop_endpoint() calls since that last successful call.
                 * Used for when a check_bandwidth() call fails due to resource
                 * or bandwidth constraints.
                 */
        void        (*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
                /* Set the hardware-chosen device address */
        int        (*address_device)(struct usb_hcd *, struct usb_device *udev,
                                  unsigned int timeout_ms);
                /* prepares the hardware to send commands to the device */
        int        (*enable_device)(struct usb_hcd *, struct usb_device *udev);
                /* Notifies the HCD after a hub descriptor is fetched.
                 * Will block.
                 */
        int        (*update_hub_device)(struct usb_hcd *, struct usb_device *hdev,
                        struct usb_tt *tt, gfp_t mem_flags);
        int        (*reset_device)(struct usb_hcd *, struct usb_device *);
                /* Notifies the HCD after a device is connected and its
                 * address is set
                 */
        int        (*update_device)(struct usb_hcd *, struct usb_device *);
        int        (*set_usb2_hw_lpm)(struct usb_hcd *, struct usb_device *, int);
        /* USB 3.0 Link Power Management */
                /* Returns the USB3 hub-encoded value for the U1/U2 timeout. */
        int        (*enable_usb3_lpm_timeout)(struct usb_hcd *,
                        struct usb_device *, enum usb3_link_state state);
                /* The xHCI host controller can still fail the command to
                 * disable the LPM timeouts, so this can return an error code.
                 */
        int        (*disable_usb3_lpm_timeout)(struct usb_hcd *,
                        struct usb_device *, enum usb3_link_state state);
        int        (*find_raw_port_number)(struct usb_hcd *, int);
        /* Call for power on/off the port if necessary */
        int        (*port_power)(struct usb_hcd *hcd, int portnum, bool enable);
        /* Call for SINGLE_STEP_SET_FEATURE Test for USB2 EH certification */
#define EHSET_TEST_SINGLE_STEP_SET_FEATURE 0x06
        int        (*submit_single_step_set_feature)(struct usb_hcd *,
                        struct urb *, int);
};

static inline int hcd_giveback_urb_in_bh(struct usb_hcd *hcd)
{
        return hcd->driver->flags & HCD_BH;
}

static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd,
                struct usb_host_endpoint *ep)
{
        return hcd->high_prio_bh.completing_ep == ep;
}

static inline bool hcd_uses_dma(struct usb_hcd *hcd)
{
        return IS_ENABLED(CONFIG_HAS_DMA) && (hcd->driver->flags & HCD_DMA);
}

extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
                int status);
extern void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb);

extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags);
extern int usb_hcd_unlink_urb(struct urb *urb, int status);
extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb,
                int status);
extern int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
                gfp_t mem_flags);
extern void usb_hcd_unmap_urb_setup_for_dma(struct usb_hcd *, struct urb *);
extern void usb_hcd_unmap_urb_for_dma(struct usb_hcd *, struct urb *);
extern void usb_hcd_flush_endpoint(struct usb_device *udev,
                struct usb_host_endpoint *ep);
extern void usb_hcd_disable_endpoint(struct usb_device *udev,
                struct usb_host_endpoint *ep);
extern void usb_hcd_reset_endpoint(struct usb_device *udev,
                struct usb_host_endpoint *ep);
extern void usb_hcd_synchronize_unlinks(struct usb_device *udev);
extern int usb_hcd_alloc_bandwidth(struct usb_device *udev,
                struct usb_host_config *new_config,
                struct usb_host_interface *old_alt,
                struct usb_host_interface *new_alt);
extern int usb_hcd_get_frame_number(struct usb_device *udev);

struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
                struct device *sysdev, struct device *dev, const char *bus_name,
                struct usb_hcd *primary_hcd);
extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver,
                struct device *dev, const char *bus_name);
extern struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
                struct device *dev, const char *bus_name,
                struct usb_hcd *shared_hcd);
extern struct usb_hcd *usb_get_hcd(struct usb_hcd *hcd);
extern void usb_put_hcd(struct usb_hcd *hcd);
extern int usb_hcd_is_primary_hcd(struct usb_hcd *hcd);
extern int usb_add_hcd(struct usb_hcd *hcd,
                unsigned int irqnum, unsigned long irqflags);
extern void usb_remove_hcd(struct usb_hcd *hcd);
extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1);
int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr,
                            dma_addr_t dma, size_t size);

struct platform_device;
extern void usb_hcd_platform_shutdown(struct platform_device *dev);
#ifdef CONFIG_USB_HCD_TEST_MODE
extern int ehset_single_step_set_feature(struct usb_hcd *hcd, int port);
#else
static inline int ehset_single_step_set_feature(struct usb_hcd *hcd, int port)
{
        return 0;
}
#endif /* CONFIG_USB_HCD_TEST_MODE */

#ifdef CONFIG_USB_PCI
struct pci_dev;
struct pci_device_id;
extern int usb_hcd_pci_probe(struct pci_dev *dev,
                             const struct hc_driver *driver);
extern void usb_hcd_pci_remove(struct pci_dev *dev);
extern void usb_hcd_pci_shutdown(struct pci_dev *dev);

#ifdef CONFIG_USB_PCI_AMD
extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev);

static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev,
                                          const struct hc_driver *driver)
{
        if (!usb_hcd_amd_remote_wakeup_quirk(dev))
                return false;
        if (driver->flags & (HCD_USB11 | HCD_USB3))
                return true;
        return false;
}
#else /* CONFIG_USB_PCI_AMD */
static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev,
                                          const struct hc_driver *driver)
{
        return false;
}
#endif
extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
#endif /* CONFIG_USB_PCI */

/* pci-ish (pdev null is ok) buffer alloc/mapping support */
void usb_init_pool_max(void);
int hcd_buffer_create(struct usb_hcd *hcd);
void hcd_buffer_destroy(struct usb_hcd *hcd);

void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
        gfp_t mem_flags, dma_addr_t *dma);
void hcd_buffer_free(struct usb_bus *bus, size_t size,
        void *addr, dma_addr_t dma);

void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
                size_t size, gfp_t mem_flags, dma_addr_t *dma);
void hcd_buffer_free_pages(struct usb_hcd *hcd,
                size_t size, void *addr, dma_addr_t dma);

/* generic bus glue, needed for host controllers that don't use PCI */
extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);

extern void usb_hc_died(struct usb_hcd *hcd);
extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd);
extern void usb_wakeup_notification(struct usb_device *hdev,
                unsigned int portnum);

extern void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum);
extern void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum);

/* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */
#define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1)
#define        usb_dotoggle(dev, ep, out)  ((dev)->toggle[out] ^= (1 << (ep)))
#define usb_settoggle(dev, ep, out, bit) \
                ((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \
                 ((bit) << (ep)))

/* -------------------------------------------------------------------------- */

/* Enumeration is only for the hub driver, or HCD virtual root hubs */
extern struct usb_device *usb_alloc_dev(struct usb_device *parent,
                                        struct usb_bus *, unsigned port);
extern int usb_new_device(struct usb_device *dev);
extern void usb_disconnect(struct usb_device **);

extern int usb_get_configuration(struct usb_device *dev);
extern void usb_destroy_configuration(struct usb_device *dev);

/*-------------------------------------------------------------------------*/

/*
 * HCD Root Hub support
 */

#include <linux/usb/ch11.h>

/*
 * As of USB 2.0, full/low speed devices are segregated into trees.
 * One type grows from USB 1.1 host controllers (OHCI, UHCI etc).
 * The other type grows from high speed hubs when they connect to
 * full/low speed devices using "Transaction Translators" (TTs).
 *
 * TTs should only be known to the hub driver, and high speed bus
 * drivers (only EHCI for now).  They affect periodic scheduling and
 * sometimes control/bulk error recovery.
 */

struct usb_device;

struct usb_tt {
        struct usb_device        *hub;        /* upstream highspeed hub */
        int                        multi;        /* true means one TT per port */
        unsigned                think_time;        /* think time in ns */
        void                        *hcpriv;        /* HCD private data */

        /* for control/bulk error recovery (CLEAR_TT_BUFFER) */
        spinlock_t                lock;
        struct list_head        clear_list;        /* of usb_tt_clear */
        struct work_struct        clear_work;
};

struct usb_tt_clear {
        struct list_head        clear_list;
        unsigned                tt;
        u16                        devinfo;
        struct usb_hcd                *hcd;
        struct usb_host_endpoint        *ep;
};

extern int usb_hub_clear_tt_buffer(struct urb *urb);
extern void usb_ep0_reinit(struct usb_device *);

/* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */
#define DeviceRequest \
        ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
#define DeviceOutRequest \
        ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)

#define InterfaceRequest \
        ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)

#define EndpointRequest \
        ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
#define EndpointOutRequest \
        ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)

/* class requests from the USB 2.0 hub spec, table 11-15 */
#define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request))
/* GetBusState and SetHubDescriptor are optional, omitted */
#define ClearHubFeature                HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_CLEAR_FEATURE)
#define ClearPortFeature        HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_CLEAR_FEATURE)
#define GetHubDescriptor        HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_DESCRIPTOR)
#define GetHubStatus                HUB_CLASS_REQ(USB_DIR_IN, USB_RT_HUB, USB_REQ_GET_STATUS)
#define GetPortStatus                HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, USB_REQ_GET_STATUS)
#define SetHubFeature                HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_SET_FEATURE)
#define SetPortFeature                HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_SET_FEATURE)
#define ClearTTBuffer                HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_CLEAR_TT_BUFFER)
#define ResetTT                        HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_RESET_TT)
#define GetTTState                HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_TT_STATE)
#define StopTT                        HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_STOP_TT)


/*-------------------------------------------------------------------------*/

/* class requests from USB 3.1 hub spec, table 10-7 */
#define SetHubDepth                HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, HUB_SET_DEPTH)
#define GetPortErrorCount        HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_PORT_ERR_COUNT)

/*
 * Generic bandwidth allocation constants/support
 */
#define FRAME_TIME_USECS        1000L
#define BitTime(bytecount) (7 * 8 * bytecount / 6) /* with integer truncation */
                /* Trying not to use worst-case bit-stuffing
                 * of (7/6 * 8 * bytecount) = 9.33 * bytecount */
                /* bytecount = data payload byte count */

#define NS_TO_US(ns)        DIV_ROUND_UP(ns, 1000L)
                        /* convert nanoseconds to microseconds, rounding up */

/*
 * Full/low speed bandwidth allocation constants/support.
 */
#define BW_HOST_DELAY        1000L                /* nanoseconds */
#define BW_HUB_LS_SETUP        333L                /* nanoseconds */
                        /* 4 full-speed bit times (est.) */

#define FRAME_TIME_BITS                        12000L        /* frame = 1 millisecond */
#define FRAME_TIME_MAX_BITS_ALLOC        (90L * FRAME_TIME_BITS / 100L)
#define FRAME_TIME_MAX_USECS_ALLOC        (90L * FRAME_TIME_USECS / 100L)

/*
 * Ceiling [nano/micro]seconds (typical) for that many bytes at high speed
 * ISO is a bit less, no ACK ... from USB 2.0 spec, 5.11.3 (and needed
 * to preallocate bandwidth)
 */
#define USB2_HOST_DELAY        5        /* nsec, guess */
#define HS_NSECS(bytes) (((55 * 8 * 2083) \
        + (2083UL * (3 + BitTime(bytes))))/1000 \
        + USB2_HOST_DELAY)
#define HS_NSECS_ISO(bytes) (((38 * 8 * 2083) \
        + (2083UL * (3 + BitTime(bytes))))/1000 \
        + USB2_HOST_DELAY)
#define HS_USECS(bytes)                NS_TO_US(HS_NSECS(bytes))
#define HS_USECS_ISO(bytes)        NS_TO_US(HS_NSECS_ISO(bytes))

extern long usb_calc_bus_time(int speed, int is_input,
                        int isoc, int bytecount);

/*-------------------------------------------------------------------------*/

extern void usb_set_device_state(struct usb_device *udev,
                enum usb_device_state new_state);

/*-------------------------------------------------------------------------*/

/* exported only within usbcore */

extern struct idr usb_bus_idr;
extern struct mutex usb_bus_idr_lock;
extern wait_queue_head_t usb_kill_urb_queue;


#define usb_endpoint_out(ep_dir)        (!((ep_dir) & USB_DIR_IN))

#ifdef CONFIG_PM
extern unsigned usb_wakeup_enabled_descendants(struct usb_device *udev);
extern void usb_root_hub_lost_power(struct usb_device *rhdev);
extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
#else
static inline unsigned usb_wakeup_enabled_descendants(struct usb_device *udev)
{
        return 0;
}
static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
{
        return;
}
#endif /* CONFIG_PM */

/*-------------------------------------------------------------------------*/

#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)

struct usb_mon_operations {
        void (*urb_submit)(struct usb_bus *bus, struct urb *urb);
        void (*urb_submit_error)(struct usb_bus *bus, struct urb *urb, int err);
        void (*urb_complete)(struct usb_bus *bus, struct urb *urb, int status);
        /* void (*urb_unlink)(struct usb_bus *bus, struct urb *urb); */
};

extern const struct usb_mon_operations *mon_ops;

static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb)
{
        if (bus->monitored)
                (*mon_ops->urb_submit)(bus, urb);
}

static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
    int error)
{
        if (bus->monitored)
                (*mon_ops->urb_submit_error)(bus, urb, error);
}

static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
                int status)
{
        if (bus->monitored)
                (*mon_ops->urb_complete)(bus, urb, status);
}

int usb_mon_register(const struct usb_mon_operations *ops);
void usb_mon_deregister(void);

#else

static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb) {}
static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
    int error) {}
static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
                int status) {}

#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */

/*-------------------------------------------------------------------------*/

/* random stuff */

/* This rwsem is for use only by the hub driver and ehci-hcd.
 * Nobody else should touch it.
 */
extern struct rw_semaphore ehci_cf_port_reset_rwsem;

/* Keep track of which host controller drivers are loaded */
#define USB_UHCI_LOADED                0
#define USB_OHCI_LOADED                1
#define USB_EHCI_LOADED                2
extern unsigned long usb_hcds_loaded;

#endif /* __KERNEL__ */

#endif /* __USB_CORE_HCD_H */















































































   64 































































































































































  233 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *  pm_wakeup.h - Power management wakeup interface
 *
 *  Copyright (C) 2008 Alan Stern
 *  Copyright (C) 2010 Rafael J. Wysocki, Novell Inc.
 */

#ifndef _LINUX_PM_WAKEUP_H
#define _LINUX_PM_WAKEUP_H

#ifndef _DEVICE_H_
# error "please don't include this file directly"
#endif

#include <linux/types.h>

struct wake_irq;

/**
 * struct wakeup_source - Representation of wakeup sources
 *
 * @name: Name of the wakeup source
 * @id: Wakeup source id
 * @entry: Wakeup source list entry
 * @lock: Wakeup source lock
 * @wakeirq: Optional device specific wakeirq
 * @timer: Wakeup timer list
 * @timer_expires: Wakeup timer expiration
 * @total_time: Total time this wakeup source has been active.
 * @max_time: Maximum time this wakeup source has been continuously active.
 * @last_time: Monotonic clock when the wakeup source's was touched last time.
 * @prevent_sleep_time: Total time this source has been preventing autosleep.
 * @event_count: Number of signaled wakeup events.
 * @active_count: Number of times the wakeup source was activated.
 * @relax_count: Number of times the wakeup source was deactivated.
 * @expire_count: Number of times the wakeup source's timeout has expired.
 * @wakeup_count: Number of times the wakeup source might abort suspend.
 * @dev: Struct device for sysfs statistics about the wakeup source.
 * @active: Status of the wakeup source.
 * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time.
 */
struct wakeup_source {
        const char                 *name;
        int                        id;
        struct list_head        entry;
        spinlock_t                lock;
        struct wake_irq                *wakeirq;
        struct timer_list        timer;
        unsigned long                timer_expires;
        ktime_t total_time;
        ktime_t max_time;
        ktime_t last_time;
        ktime_t start_prevent_time;
        ktime_t prevent_sleep_time;
        unsigned long                event_count;
        unsigned long                active_count;
        unsigned long                relax_count;
        unsigned long                expire_count;
        unsigned long                wakeup_count;
        struct device                *dev;
        bool                        active:1;
        bool                        autosleep_enabled:1;
};

#define for_each_wakeup_source(ws) \
        for ((ws) = wakeup_sources_walk_start();        \
             (ws);                                        \
             (ws) = wakeup_sources_walk_next((ws)))

#ifdef CONFIG_PM_SLEEP

/*
 * Changes to device_may_wakeup take effect on the next pm state change.
 */

static inline bool device_can_wakeup(struct device *dev)
{
        return dev->power.can_wakeup;
}

static inline bool device_may_wakeup(struct device *dev)
{
        return dev->power.can_wakeup && !!dev->power.wakeup;
}

static inline bool device_wakeup_path(struct device *dev)
{
        return dev->power.wakeup_path;
}

static inline void device_set_wakeup_path(struct device *dev)
{
        dev->power.wakeup_path = true;
}

/* drivers/base/power/wakeup.c */
extern struct wakeup_source *wakeup_source_create(const char *name);
extern void wakeup_source_destroy(struct wakeup_source *ws);
extern void wakeup_source_add(struct wakeup_source *ws);
extern void wakeup_source_remove(struct wakeup_source *ws);
extern struct wakeup_source *wakeup_source_register(struct device *dev,
                                                    const char *name);
extern void wakeup_source_unregister(struct wakeup_source *ws);
extern int wakeup_sources_read_lock(void);
extern void wakeup_sources_read_unlock(int idx);
extern struct wakeup_source *wakeup_sources_walk_start(void);
extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws);
extern int device_wakeup_enable(struct device *dev);
extern int device_wakeup_disable(struct device *dev);
extern void device_set_wakeup_capable(struct device *dev, bool capable);
extern int device_set_wakeup_enable(struct device *dev, bool enable);
extern void __pm_stay_awake(struct wakeup_source *ws);
extern void pm_stay_awake(struct device *dev);
extern void __pm_relax(struct wakeup_source *ws);
extern void pm_relax(struct device *dev);
extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard);
extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard);

#else /* !CONFIG_PM_SLEEP */

static inline void device_set_wakeup_capable(struct device *dev, bool capable)
{
        dev->power.can_wakeup = capable;
}

static inline bool device_can_wakeup(struct device *dev)
{
        return dev->power.can_wakeup;
}

static inline struct wakeup_source *wakeup_source_create(const char *name)
{
        return NULL;
}

static inline void wakeup_source_destroy(struct wakeup_source *ws) {}

static inline void wakeup_source_add(struct wakeup_source *ws) {}

static inline void wakeup_source_remove(struct wakeup_source *ws) {}

static inline struct wakeup_source *wakeup_source_register(struct device *dev,
                                                           const char *name)
{
        return NULL;
}

static inline void wakeup_source_unregister(struct wakeup_source *ws) {}

static inline int device_wakeup_enable(struct device *dev)
{
        dev->power.should_wakeup = true;
        return 0;
}

static inline int device_wakeup_disable(struct device *dev)
{
        dev->power.should_wakeup = false;
        return 0;
}

static inline int device_set_wakeup_enable(struct device *dev, bool enable)
{
        dev->power.should_wakeup = enable;
        return 0;
}

static inline bool device_may_wakeup(struct device *dev)
{
        return dev->power.can_wakeup && dev->power.should_wakeup;
}

static inline bool device_wakeup_path(struct device *dev)
{
        return false;
}

static inline void device_set_wakeup_path(struct device *dev) {}

static inline void __pm_stay_awake(struct wakeup_source *ws) {}

static inline void pm_stay_awake(struct device *dev) {}

static inline void __pm_relax(struct wakeup_source *ws) {}

static inline void pm_relax(struct device *dev) {}

static inline void pm_wakeup_ws_event(struct wakeup_source *ws,
                                      unsigned int msec, bool hard) {}

static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
                                       bool hard) {}

#endif /* !CONFIG_PM_SLEEP */

static inline bool device_awake_path(struct device *dev)
{
        return device_wakeup_path(dev);
}

static inline void device_set_awake_path(struct device *dev)
{
        device_set_wakeup_path(dev);
}

static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
{
        return pm_wakeup_ws_event(ws, msec, false);
}

static inline void pm_wakeup_event(struct device *dev, unsigned int msec)
{
        return pm_wakeup_dev_event(dev, msec, false);
}

static inline void pm_wakeup_hard_event(struct device *dev)
{
        return pm_wakeup_dev_event(dev, 0, true);
}

/**
 * device_init_wakeup - Device wakeup initialization.
 * @dev: Device to handle.
 * @enable: Whether or not to enable @dev as a wakeup device.
 *
 * By default, most devices should leave wakeup disabled.  The exceptions are
 * devices that everyone expects to be wakeup sources: keyboards, power buttons,
 * possibly network interfaces, etc.  Also, devices that don't generate their
 * own wakeup requests but merely forward requests from one bus to another
 * (like PCI bridges) should have wakeup enabled by default.
 */
static inline int device_init_wakeup(struct device *dev, bool enable)
{
        if (enable) {
                device_set_wakeup_capable(dev, true);
                return device_wakeup_enable(dev);
        } else {
                device_wakeup_disable(dev);
                device_set_wakeup_capable(dev, false);
                return 0;
        }
}

#endif /* _LINUX_PM_WAKEUP_H */








































































  487 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CTYPE_H
#define _LINUX_CTYPE_H

#include <linux/compiler.h>

/*
 * NOTE! This ctype does not handle EOF like the standard C
 * library is required to.
 */

#define _U        0x01        /* upper */
#define _L        0x02        /* lower */
#define _D        0x04        /* digit */
#define _C        0x08        /* cntrl */
#define _P        0x10        /* punct */
#define _S        0x20        /* white space (space/lf/tab) */
#define _X        0x40        /* hex digit */
#define _SP        0x80        /* hard space (0x20) */

extern const unsigned char _ctype[];

#define __ismask(x) (_ctype[(int)(unsigned char)(x)])

#define isalnum(c)        ((__ismask(c)&(_U|_L|_D)) != 0)
#define isalpha(c)        ((__ismask(c)&(_U|_L)) != 0)
#define iscntrl(c)        ((__ismask(c)&(_C)) != 0)
#define isgraph(c)        ((__ismask(c)&(_P|_U|_L|_D)) != 0)
#define islower(c)        ((__ismask(c)&(_L)) != 0)
#define isprint(c)        ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
#define ispunct(c)        ((__ismask(c)&(_P)) != 0)
/* Note: isspace() must return false for %NUL-terminator */
#define isspace(c)        ((__ismask(c)&(_S)) != 0)
#define isupper(c)        ((__ismask(c)&(_U)) != 0)
#define isxdigit(c)        ((__ismask(c)&(_D|_X)) != 0)

#define isascii(c) (((unsigned char)(c))<=0x7f)
#define toascii(c) (((unsigned char)(c))&0x7f)

#if __has_builtin(__builtin_isdigit)
#define  isdigit(c) __builtin_isdigit(c)
#else
static inline int isdigit(int c)
{
        return '0' <= c && c <= '9';
}
#endif

static inline unsigned char __tolower(unsigned char c)
{
        if (isupper(c))
                c -= 'A'-'a';
        return c;
}

static inline unsigned char __toupper(unsigned char c)
{
        if (islower(c))
                c -= 'a'-'A';
        return c;
}

#define tolower(c) __tolower(c)
#define toupper(c) __toupper(c)

/*
 * Fast implementation of tolower() for internal usage. Do not use in your
 * code.
 */
static inline char _tolower(const char c)
{
        return c | 0x20;
}

/* Fast check for octal digit */
static inline int isodigit(const char c)
{
        return c >= '0' && c <= '7';
}

#endif



























































































































































































































































  234 






















  241 










  234 




  234 




















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * net/dst.h        Protocol independent destination cache definitions.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 */

#ifndef _NET_DST_H
#define _NET_DST_H

#include <net/dst_ops.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
#include <linux/bug.h>
#include <linux/jiffies.h>
#include <linux/refcount.h>
#include <linux/rcuref.h>
#include <net/neighbour.h>
#include <asm/processor.h>
#include <linux/indirect_call_wrapper.h>

struct sk_buff;

struct dst_entry {
        struct net_device       *dev;
        struct  dst_ops                *ops;
        unsigned long                _metrics;
        unsigned long           expires;
#ifdef CONFIG_XFRM
        struct xfrm_state        *xfrm;
#else
        void                        *__pad1;
#endif
        int                        (*input)(struct sk_buff *);
        int                        (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);

        unsigned short                flags;
#define DST_NOXFRM                0x0002
#define DST_NOPOLICY                0x0004
#define DST_NOCOUNT                0x0008
#define DST_FAKE_RTABLE                0x0010
#define DST_XFRM_TUNNEL                0x0020
#define DST_XFRM_QUEUE                0x0040
#define DST_METADATA                0x0080

        /* A non-zero value of dst->obsolete forces by-hand validation
         * of the route entry.  Positive values are set by the generic
         * dst layer to indicate that the entry has been forcefully
         * destroyed.
         *
         * Negative values are used by the implementation layer code to
         * force invocation of the dst_ops->check() method.
         */
        short                        obsolete;
#define DST_OBSOLETE_NONE        0
#define DST_OBSOLETE_DEAD        2
#define DST_OBSOLETE_FORCE_CHK        -1
#define DST_OBSOLETE_KILL        -2
        unsigned short                header_len;        /* more space at head required */
        unsigned short                trailer_len;        /* space to reserve at tail */

        /*
         * __rcuref wants to be on a different cache line from
         * input/output/ops or performance tanks badly
         */
#ifdef CONFIG_64BIT
        rcuref_t                __rcuref;        /* 64-bit offset 64 */
#endif
        int                        __use;
        unsigned long                lastuse;
        struct rcu_head                rcu_head;
        short                        error;
        short                        __pad;
        __u32                        tclassid;
#ifndef CONFIG_64BIT
        struct lwtunnel_state   *lwtstate;
        rcuref_t                __rcuref;        /* 32-bit offset 64 */
#endif
        netdevice_tracker        dev_tracker;

        /*
         * Used by rtable and rt6_info. Moves lwtstate into the next cache
         * line on 64bit so that lwtstate does not cause false sharing with
         * __rcuref under contention of __rcuref. This also puts the
         * frequently accessed members of rtable and rt6_info out of the
         * __rcuref cache line.
         */
        struct list_head        rt_uncached;
        struct uncached_list        *rt_uncached_list;
#ifdef CONFIG_64BIT
        struct lwtunnel_state   *lwtstate;
#endif
};

struct dst_metrics {
        u32                metrics[RTAX_MAX];
        refcount_t        refcnt;
} __aligned(4);                /* Low pointer bits contain DST_METRICS_FLAGS */
extern const struct dst_metrics dst_default_metrics;

u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);

#define DST_METRICS_READ_ONLY                0x1UL
#define DST_METRICS_REFCOUNTED                0x2UL
#define DST_METRICS_FLAGS                0x3UL
#define __DST_METRICS_PTR(Y)        \
        ((u32 *)((Y) & ~DST_METRICS_FLAGS))
#define DST_METRICS_PTR(X)        __DST_METRICS_PTR((X)->_metrics)

static inline bool dst_metrics_read_only(const struct dst_entry *dst)
{
        return dst->_metrics & DST_METRICS_READ_ONLY;
}

void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);

static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
{
        unsigned long val = dst->_metrics;
        if (!(val & DST_METRICS_READ_ONLY))
                __dst_destroy_metrics_generic(dst, val);
}

static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
{
        unsigned long p = dst->_metrics;

        BUG_ON(!p);

        if (p & DST_METRICS_READ_ONLY)
                return dst->ops->cow_metrics(dst, p);
        return __DST_METRICS_PTR(p);
}

/* This may only be invoked before the entry has reached global
 * visibility.
 */
static inline void dst_init_metrics(struct dst_entry *dst,
                                    const u32 *src_metrics,
                                    bool read_only)
{
        dst->_metrics = ((unsigned long) src_metrics) |
                (read_only ? DST_METRICS_READ_ONLY : 0);
}

static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
{
        u32 *dst_metrics = dst_metrics_write_ptr(dest);

        if (dst_metrics) {
                u32 *src_metrics = DST_METRICS_PTR(src);

                memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
        }
}

static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
{
        return DST_METRICS_PTR(dst);
}

static inline u32
dst_metric_raw(const struct dst_entry *dst, const int metric)
{
        u32 *p = DST_METRICS_PTR(dst);

        return p[metric-1];
}

static inline u32
dst_metric(const struct dst_entry *dst, const int metric)
{
        WARN_ON_ONCE(metric == RTAX_HOPLIMIT ||
                     metric == RTAX_ADVMSS ||
                     metric == RTAX_MTU);
        return dst_metric_raw(dst, metric);
}

static inline u32
dst_metric_advmss(const struct dst_entry *dst)
{
        u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS);

        if (!advmss)
                advmss = dst->ops->default_advmss(dst);

        return advmss;
}

static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
{
        u32 *p = dst_metrics_write_ptr(dst);

        if (p)
                p[metric-1] = val;
}

/* Kernel-internal feature bits that are unallocated in user space. */
#define DST_FEATURE_ECN_CA        (1U << 31)

#define DST_FEATURE_MASK        (DST_FEATURE_ECN_CA)
#define DST_FEATURE_ECN_MASK        (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN)

static inline u32
dst_feature(const struct dst_entry *dst, u32 feature)
{
        return dst_metric(dst, RTAX_FEATURES) & feature;
}

INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *));
INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *));
static inline u32 dst_mtu(const struct dst_entry *dst)
{
        return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
}

/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
{
        return msecs_to_jiffies(dst_metric(dst, metric));
}

static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
        return dst_metric(dst, RTAX_LOCK) & (1 << metric);
}

static inline void dst_hold(struct dst_entry *dst)
{
        /*
         * If your kernel compilation stops here, please check
         * the placement of __rcuref in struct dst_entry
         */
        BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63);
        WARN_ON(!rcuref_get(&dst->__rcuref));
}

static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
{
        if (unlikely(time != dst->lastuse)) {
                dst->__use++;
                dst->lastuse = time;
        }
}

static inline struct dst_entry *dst_clone(struct dst_entry *dst)
{
        if (dst)
                dst_hold(dst);
        return dst;
}

void dst_release(struct dst_entry *dst);

void dst_release_immediate(struct dst_entry *dst);

static inline void refdst_drop(unsigned long refdst)
{
        if (!(refdst & SKB_DST_NOREF))
                dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
}

/**
 * skb_dst_drop - drops skb dst
 * @skb: buffer
 *
 * Drops dst reference count if a reference was taken.
 */
static inline void skb_dst_drop(struct sk_buff *skb)
{
        if (skb->_skb_refdst) {
                refdst_drop(skb->_skb_refdst);
                skb->_skb_refdst = 0UL;
        }
}

static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
{
        nskb->slow_gro |= !!refdst;
        nskb->_skb_refdst = refdst;
        if (!(nskb->_skb_refdst & SKB_DST_NOREF))
                dst_clone(skb_dst(nskb));
}

static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
{
        __skb_dst_copy(nskb, oskb->_skb_refdst);
}

/**
 * dst_hold_safe - Take a reference on a dst if possible
 * @dst: pointer to dst entry
 *
 * This helper returns false if it could not safely
 * take a reference on a dst.
 */
static inline bool dst_hold_safe(struct dst_entry *dst)
{
        return rcuref_get(&dst->__rcuref);
}

/**
 * skb_dst_force - makes sure skb dst is refcounted
 * @skb: buffer
 *
 * If dst is not yet refcounted and not destroyed, grab a ref on it.
 * Returns true if dst is refcounted.
 */
static inline bool skb_dst_force(struct sk_buff *skb)
{
        if (skb_dst_is_noref(skb)) {
                struct dst_entry *dst = skb_dst(skb);

                WARN_ON(!rcu_read_lock_held());
                if (!dst_hold_safe(dst))
                        dst = NULL;

                skb->_skb_refdst = (unsigned long)dst;
                skb->slow_gro |= !!dst;
        }

        return skb->_skb_refdst != 0UL;
}


/**
 *        __skb_tunnel_rx - prepare skb for rx reinsert
 *        @skb: buffer
 *        @dev: tunnel device
 *        @net: netns for packet i/o
 *
 *        After decapsulation, packet is going to re-enter (netif_rx()) our stack,
 *        so make some cleanups. (no accounting done)
 */
static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
                                   struct net *net)
{
        skb->dev = dev;

        /*
         * Clear hash so that we can recalulate the hash for the
         * encapsulated packet, unless we have already determine the hash
         * over the L4 4-tuple.
         */
        skb_clear_hash_if_not_l4(skb);
        skb_set_queue_mapping(skb, 0);
        skb_scrub_packet(skb, !net_eq(net, dev_net(dev)));
}

/**
 *        skb_tunnel_rx - prepare skb for rx reinsert
 *        @skb: buffer
 *        @dev: tunnel device
 *        @net: netns for packet i/o
 *
 *        After decapsulation, packet is going to re-enter (netif_rx()) our stack,
 *        so make some cleanups, and perform accounting.
 *        Note: this accounting is not SMP safe.
 */
static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
                                 struct net *net)
{
        DEV_STATS_INC(dev, rx_packets);
        DEV_STATS_ADD(dev, rx_bytes, skb->len);
        __skb_tunnel_rx(skb, dev, net);
}

static inline u32 dst_tclassid(const struct sk_buff *skb)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
        const struct dst_entry *dst;

        dst = skb_dst(skb);
        if (dst)
                return dst->tclassid;
#endif
        return 0;
}

int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static inline int dst_discard(struct sk_buff *skb)
{
        return dst_discard_out(&init_net, skb->sk, skb);
}
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
                int initial_obsolete, unsigned short flags);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
              struct net_device *dev, int initial_obsolete,
              unsigned short flags);
void dst_dev_put(struct dst_entry *dst);

static inline void dst_confirm(struct dst_entry *dst)
{
}

static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
{
        struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr);
        return IS_ERR(n) ? NULL : n;
}

static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
                                                     struct sk_buff *skb)
{
        struct neighbour *n;

        if (WARN_ON_ONCE(!dst->ops->neigh_lookup))
                return NULL;

        n = dst->ops->neigh_lookup(dst, skb, NULL);

        return IS_ERR(n) ? NULL : n;
}

static inline void dst_confirm_neigh(const struct dst_entry *dst,
                                     const void *daddr)
{
        if (dst->ops->confirm_neigh)
                dst->ops->confirm_neigh(dst, daddr);
}

static inline void dst_link_failure(struct sk_buff *skb)
{
        struct dst_entry *dst = skb_dst(skb);
        if (dst && dst->ops && dst->ops->link_failure)
                dst->ops->link_failure(skb);
}

static inline void dst_set_expires(struct dst_entry *dst, int timeout)
{
        unsigned long expires = jiffies + timeout;

        if (expires == 0)
                expires = 1;

        if (dst->expires == 0 || time_before(expires, dst->expires))
                dst->expires = expires;
}

INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
                                         struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
                                         struct sk_buff *));
/* Output packet to network from transport.  */
static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        return INDIRECT_CALL_INET(skb_dst(skb)->output,
                                  ip6_output, ip_output,
                                  net, sk, skb);
}

INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
/* Input packet from network to transport.  */
static inline int dst_input(struct sk_buff *skb)
{
        return INDIRECT_CALL_INET(skb_dst(skb)->input,
                                  ip6_input, ip_local_deliver, skb);
}

INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
                                                          u32));
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
                                                           u32));
static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
{
        if (dst->obsolete)
                dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check,
                                         ipv4_dst_check, dst, cookie);
        return dst;
}

/* Flags for xfrm_lookup flags argument. */
enum {
        XFRM_LOOKUP_ICMP = 1 << 0,
        XFRM_LOOKUP_QUEUE = 1 << 1,
        XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
};

struct flowi;
#ifndef CONFIG_XFRM
static inline struct dst_entry *xfrm_lookup(struct net *net,
                                            struct dst_entry *dst_orig,
                                            const struct flowi *fl,
                                            const struct sock *sk,
                                            int flags)
{
        return dst_orig;
}

static inline struct dst_entry *
xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
                      const struct flowi *fl, const struct sock *sk,
                      int flags, u32 if_id)
{
        return dst_orig;
}

static inline struct dst_entry *xfrm_lookup_route(struct net *net,
                                                  struct dst_entry *dst_orig,
                                                  const struct flowi *fl,
                                                  const struct sock *sk,
                                                  int flags)
{
        return dst_orig;
}

static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
{
        return NULL;
}

#else
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                              const struct flowi *fl, const struct sock *sk,
                              int flags);

struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
                                        struct dst_entry *dst_orig,
                                        const struct flowi *fl,
                                        const struct sock *sk, int flags,
                                        u32 if_id);

struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
                                    const struct flowi *fl, const struct sock *sk,
                                    int flags);

/* skb attached with this dst needs transformation if dst->xfrm is valid */
static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
{
        return dst->xfrm;
}
#endif

static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
{
        struct dst_entry *dst = skb_dst(skb);

        if (dst && dst->ops->update_pmtu)
                dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
}

/* update dst pmtu but not do neighbor confirm */
static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
{
        struct dst_entry *dst = skb_dst(skb);

        if (dst && dst->ops->update_pmtu)
                dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}

struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
                               struct sk_buff *skb, u32 mtu, bool confirm_neigh);
void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
                            struct sk_buff *skb);
u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old);
struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
                                             struct sk_buff *skb,
                                             const void *daddr);
unsigned int dst_blackhole_mtu(const struct dst_entry *dst);

#endif /* _NET_DST_H */







































































   19 




























































    1 

    1 


    1 



    1 















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_GENERIC_PGALLOC_H
#define __ASM_GENERIC_PGALLOC_H

#ifdef CONFIG_MMU

#define GFP_PGTABLE_KERNEL        (GFP_KERNEL | __GFP_ZERO)
#define GFP_PGTABLE_USER        (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT)

/**
 * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table
 * @mm: the mm_struct of the current context
 *
 * This function is intended for architectures that need
 * anything beyond simple page allocation.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
{
        struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL &
                        ~__GFP_HIGHMEM, 0);

        if (!ptdesc)
                return NULL;
        return ptdesc_address(ptdesc);
}

#ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
/**
 * pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table
 * @mm: the mm_struct of the current context
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
        return __pte_alloc_one_kernel(mm);
}
#endif

/**
 * pte_free_kernel - free PTE-level kernel page table memory
 * @mm: the mm_struct of the current context
 * @pte: pointer to the memory containing the page table
 */
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
        pagetable_free(virt_to_ptdesc(pte));
}

/**
 * __pte_alloc_one - allocate memory for a PTE-level user page table
 * @mm: the mm_struct of the current context
 * @gfp: GFP flags to use for the allocation
 *
 * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor().
 *
 * This function is intended for architectures that need
 * anything beyond simple page allocation or must have custom GFP flags.
 *
 * Return: `struct page` referencing the ptdesc or %NULL on error
 */
static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
{
        struct ptdesc *ptdesc;

        ptdesc = pagetable_alloc(gfp, 0);
        if (!ptdesc)
                return NULL;
        if (!pagetable_pte_ctor(ptdesc)) {
                pagetable_free(ptdesc);
                return NULL;
        }

        return ptdesc_page(ptdesc);
}

#ifndef __HAVE_ARCH_PTE_ALLOC_ONE
/**
 * pte_alloc_one - allocate a page for PTE-level user page table
 * @mm: the mm_struct of the current context
 *
 * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor().
 *
 * Return: `struct page` referencing the ptdesc or %NULL on error
 */
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
        return __pte_alloc_one(mm, GFP_PGTABLE_USER);
}
#endif

/*
 * Should really implement gc for free page table pages. This could be
 * done with a reference count in struct page.
 */

/**
 * pte_free - free PTE-level user page table memory
 * @mm: the mm_struct of the current context
 * @pte_page: the `struct page` referencing the ptdesc
 */
static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
{
        struct ptdesc *ptdesc = page_ptdesc(pte_page);

        pagetable_pte_dtor(ptdesc);
        pagetable_free(ptdesc);
}


#if CONFIG_PGTABLE_LEVELS > 2

#ifndef __HAVE_ARCH_PMD_ALLOC_ONE
/**
 * pmd_alloc_one - allocate memory for a PMD-level page table
 * @mm: the mm_struct of the current context
 *
 * Allocate memory for a page table and ptdesc and runs pagetable_pmd_ctor().
 *
 * Allocations use %GFP_PGTABLE_USER in user context and
 * %GFP_PGTABLE_KERNEL in kernel context.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
        struct ptdesc *ptdesc;
        gfp_t gfp = GFP_PGTABLE_USER;

        if (mm == &init_mm)
                gfp = GFP_PGTABLE_KERNEL;
        ptdesc = pagetable_alloc(gfp, 0);
        if (!ptdesc)
                return NULL;
        if (!pagetable_pmd_ctor(ptdesc)) {
                pagetable_free(ptdesc);
                return NULL;
        }
        return ptdesc_address(ptdesc);
}
#endif

#ifndef __HAVE_ARCH_PMD_FREE
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pmd);

        BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
        pagetable_pmd_dtor(ptdesc);
        pagetable_free(ptdesc);
}
#endif

#endif /* CONFIG_PGTABLE_LEVELS > 2 */

#if CONFIG_PGTABLE_LEVELS > 3

static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
        gfp_t gfp = GFP_PGTABLE_USER;
        struct ptdesc *ptdesc;

        if (mm == &init_mm)
                gfp = GFP_PGTABLE_KERNEL;
        gfp &= ~__GFP_HIGHMEM;

        ptdesc = pagetable_alloc(gfp, 0);
        if (!ptdesc)
                return NULL;

        pagetable_pud_ctor(ptdesc);
        return ptdesc_address(ptdesc);
}

#ifndef __HAVE_ARCH_PUD_ALLOC_ONE
/**
 * pud_alloc_one - allocate memory for a PUD-level page table
 * @mm: the mm_struct of the current context
 *
 * Allocate memory for a page table using %GFP_PGTABLE_USER for user context
 * and %GFP_PGTABLE_KERNEL for kernel context.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
        return __pud_alloc_one(mm, addr);
}
#endif

static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
{
        struct ptdesc *ptdesc = virt_to_ptdesc(pud);

        BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
        pagetable_pud_dtor(ptdesc);
        pagetable_free(ptdesc);
}

#ifndef __HAVE_ARCH_PUD_FREE
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
        __pud_free(mm, pud);
}
#endif

#endif /* CONFIG_PGTABLE_LEVELS > 3 */

#ifndef __HAVE_ARCH_PGD_FREE
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
        pagetable_free(virt_to_ptdesc(pgd));
}
#endif

#endif /* CONFIG_MMU */

#endif /* __ASM_GENERIC_PGALLOC_H */




























































































































































































































































































































    2 































    2 




    2 


















































































































































































































































































































































































































































































































































































































































































































































































































































    2 




































    2 





































































































    2 

























































































































































































































































































































































































































































































































    2 














































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
// SPDX-License-Identifier: GPL-2.0-only
/* The industrial I/O core
 *
 * Copyright (c) 2008 Jonathan Cameron
 *
 * Handling of buffer allocation / resizing.
 *
 * Things to look at here.
 * - Better memory allocation techniques?
 * - Alternative access techniques?
 */
#include <linux/anon_inodes.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/sched/signal.h>

#include <linux/iio/iio.h>
#include <linux/iio/iio-opaque.h>
#include "iio_core.h"
#include "iio_core_trigger.h"
#include <linux/iio/sysfs.h>
#include <linux/iio/buffer.h>
#include <linux/iio/buffer_impl.h>

static const char * const iio_endian_prefix[] = {
        [IIO_BE] = "be",
        [IIO_LE] = "le",
};

static bool iio_buffer_is_active(struct iio_buffer *buf)
{
        return !list_empty(&buf->buffer_list);
}

static size_t iio_buffer_data_available(struct iio_buffer *buf)
{
        return buf->access->data_available(buf);
}

static int iio_buffer_flush_hwfifo(struct iio_dev *indio_dev,
                                   struct iio_buffer *buf, size_t required)
{
        if (!indio_dev->info->hwfifo_flush_to_buffer)
                return -ENODEV;

        return indio_dev->info->hwfifo_flush_to_buffer(indio_dev, required);
}

static bool iio_buffer_ready(struct iio_dev *indio_dev, struct iio_buffer *buf,
                             size_t to_wait, int to_flush)
{
        size_t avail;
        int flushed = 0;

        /* wakeup if the device was unregistered */
        if (!indio_dev->info)
                return true;

        /* drain the buffer if it was disabled */
        if (!iio_buffer_is_active(buf)) {
                to_wait = min_t(size_t, to_wait, 1);
                to_flush = 0;
        }

        avail = iio_buffer_data_available(buf);

        if (avail >= to_wait) {
                /* force a flush for non-blocking reads */
                if (!to_wait && avail < to_flush)
                        iio_buffer_flush_hwfifo(indio_dev, buf,
                                                to_flush - avail);
                return true;
        }

        if (to_flush)
                flushed = iio_buffer_flush_hwfifo(indio_dev, buf,
                                                  to_wait - avail);
        if (flushed <= 0)
                return false;

        if (avail + flushed >= to_wait)
                return true;

        return false;
}

/**
 * iio_buffer_read() - chrdev read for buffer access
 * @filp:        File structure pointer for the char device
 * @buf:        Destination buffer for iio buffer read
 * @n:                First n bytes to read
 * @f_ps:        Long offset provided by the user as a seek position
 *
 * This function relies on all buffer implementations having an
 * iio_buffer as their first element.
 *
 * Return: negative values corresponding to error codes or ret != 0
 *           for ending the reading activity
 **/
static ssize_t iio_buffer_read(struct file *filp, char __user *buf,
                               size_t n, loff_t *f_ps)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;
        struct iio_dev *indio_dev = ib->indio_dev;
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        size_t datum_size;
        size_t to_wait;
        int ret = 0;

        if (!indio_dev->info)
                return -ENODEV;

        if (!rb || !rb->access->read)
                return -EINVAL;

        if (rb->direction != IIO_BUFFER_DIRECTION_IN)
                return -EPERM;

        datum_size = rb->bytes_per_datum;

        /*
         * If datum_size is 0 there will never be anything to read from the
         * buffer, so signal end of file now.
         */
        if (!datum_size)
                return 0;

        if (filp->f_flags & O_NONBLOCK)
                to_wait = 0;
        else
                to_wait = min_t(size_t, n / datum_size, rb->watermark);

        add_wait_queue(&rb->pollq, &wait);
        do {
                if (!indio_dev->info) {
                        ret = -ENODEV;
                        break;
                }

                if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) {
                        if (signal_pending(current)) {
                                ret = -ERESTARTSYS;
                                break;
                        }

                        wait_woken(&wait, TASK_INTERRUPTIBLE,
                                   MAX_SCHEDULE_TIMEOUT);
                        continue;
                }

                ret = rb->access->read(rb, n, buf);
                if (ret == 0 && (filp->f_flags & O_NONBLOCK))
                        ret = -EAGAIN;
        } while (ret == 0);
        remove_wait_queue(&rb->pollq, &wait);

        return ret;
}

static size_t iio_buffer_space_available(struct iio_buffer *buf)
{
        if (buf->access->space_available)
                return buf->access->space_available(buf);

        return SIZE_MAX;
}

static ssize_t iio_buffer_write(struct file *filp, const char __user *buf,
                                size_t n, loff_t *f_ps)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;
        struct iio_dev *indio_dev = ib->indio_dev;
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int ret = 0;
        size_t written;

        if (!indio_dev->info)
                return -ENODEV;

        if (!rb || !rb->access->write)
                return -EINVAL;

        if (rb->direction != IIO_BUFFER_DIRECTION_OUT)
                return -EPERM;

        written = 0;
        add_wait_queue(&rb->pollq, &wait);
        do {
                if (!indio_dev->info)
                        return -ENODEV;

                if (!iio_buffer_space_available(rb)) {
                        if (signal_pending(current)) {
                                ret = -ERESTARTSYS;
                                break;
                        }

                        if (filp->f_flags & O_NONBLOCK) {
                                if (!written)
                                        ret = -EAGAIN;
                                break;
                        }

                        wait_woken(&wait, TASK_INTERRUPTIBLE,
                                   MAX_SCHEDULE_TIMEOUT);
                        continue;
                }

                ret = rb->access->write(rb, n - written, buf + written);
                if (ret < 0)
                        break;

                written += ret;

        } while (written != n);
        remove_wait_queue(&rb->pollq, &wait);

        return ret < 0 ? ret : written;
}

/**
 * iio_buffer_poll() - poll the buffer to find out if it has data
 * @filp:        File structure pointer for device access
 * @wait:        Poll table structure pointer for which the driver adds
 *                a wait queue
 *
 * Return: (EPOLLIN | EPOLLRDNORM) if data is available for reading
 *           or 0 for other cases
 */
static __poll_t iio_buffer_poll(struct file *filp,
                                struct poll_table_struct *wait)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;
        struct iio_dev *indio_dev = ib->indio_dev;

        if (!indio_dev->info || !rb)
                return 0;

        poll_wait(filp, &rb->pollq, wait);

        switch (rb->direction) {
        case IIO_BUFFER_DIRECTION_IN:
                if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0))
                        return EPOLLIN | EPOLLRDNORM;
                break;
        case IIO_BUFFER_DIRECTION_OUT:
                if (iio_buffer_space_available(rb))
                        return EPOLLOUT | EPOLLWRNORM;
                break;
        }

        return 0;
}

ssize_t iio_buffer_read_wrapper(struct file *filp, char __user *buf,
                                size_t n, loff_t *f_ps)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;

        /* check if buffer was opened through new API */
        if (test_bit(IIO_BUSY_BIT_POS, &rb->flags))
                return -EBUSY;

        return iio_buffer_read(filp, buf, n, f_ps);
}

ssize_t iio_buffer_write_wrapper(struct file *filp, const char __user *buf,
                                 size_t n, loff_t *f_ps)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;

        /* check if buffer was opened through new API */
        if (test_bit(IIO_BUSY_BIT_POS, &rb->flags))
                return -EBUSY;

        return iio_buffer_write(filp, buf, n, f_ps);
}

__poll_t iio_buffer_poll_wrapper(struct file *filp,
                                 struct poll_table_struct *wait)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_buffer *rb = ib->buffer;

        /* check if buffer was opened through new API */
        if (test_bit(IIO_BUSY_BIT_POS, &rb->flags))
                return 0;

        return iio_buffer_poll(filp, wait);
}

/**
 * iio_buffer_wakeup_poll - Wakes up the buffer waitqueue
 * @indio_dev: The IIO device
 *
 * Wakes up the event waitqueue used for poll(). Should usually
 * be called when the device is unregistered.
 */
void iio_buffer_wakeup_poll(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer;
        unsigned int i;

        for (i = 0; i < iio_dev_opaque->attached_buffers_cnt; i++) {
                buffer = iio_dev_opaque->attached_buffers[i];
                wake_up(&buffer->pollq);
        }
}

int iio_pop_from_buffer(struct iio_buffer *buffer, void *data)
{
        if (!buffer || !buffer->access || !buffer->access->remove_from)
                return -EINVAL;

        return buffer->access->remove_from(buffer, data);
}
EXPORT_SYMBOL_GPL(iio_pop_from_buffer);

void iio_buffer_init(struct iio_buffer *buffer)
{
        INIT_LIST_HEAD(&buffer->demux_list);
        INIT_LIST_HEAD(&buffer->buffer_list);
        init_waitqueue_head(&buffer->pollq);
        kref_init(&buffer->ref);
        if (!buffer->watermark)
                buffer->watermark = 1;
}
EXPORT_SYMBOL(iio_buffer_init);

void iio_device_detach_buffers(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer;
        unsigned int i;

        for (i = 0; i < iio_dev_opaque->attached_buffers_cnt; i++) {
                buffer = iio_dev_opaque->attached_buffers[i];
                iio_buffer_put(buffer);
        }

        kfree(iio_dev_opaque->attached_buffers);
}

static ssize_t iio_show_scan_index(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
{
        return sysfs_emit(buf, "%u\n", to_iio_dev_attr(attr)->c->scan_index);
}

static ssize_t iio_show_fixed_type(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
{
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        u8 type = this_attr->c->scan_type.endianness;

        if (type == IIO_CPU) {
#ifdef __LITTLE_ENDIAN
                type = IIO_LE;
#else
                type = IIO_BE;
#endif
        }
        if (this_attr->c->scan_type.repeat > 1)
                return sysfs_emit(buf, "%s:%c%d/%dX%d>>%u\n",
                       iio_endian_prefix[type],
                       this_attr->c->scan_type.sign,
                       this_attr->c->scan_type.realbits,
                       this_attr->c->scan_type.storagebits,
                       this_attr->c->scan_type.repeat,
                       this_attr->c->scan_type.shift);
        else
                return sysfs_emit(buf, "%s:%c%d/%d>>%u\n",
                       iio_endian_prefix[type],
                       this_attr->c->scan_type.sign,
                       this_attr->c->scan_type.realbits,
                       this_attr->c->scan_type.storagebits,
                       this_attr->c->scan_type.shift);
}

static ssize_t iio_scan_el_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        int ret;
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        /* Ensure ret is 0 or 1. */
        ret = !!test_bit(to_iio_dev_attr(attr)->address,
                       buffer->scan_mask);

        return sysfs_emit(buf, "%d\n", ret);
}

/* Note NULL used as error indicator as it doesn't make sense. */
static const unsigned long *iio_scan_mask_match(const unsigned long *av_masks,
                                                unsigned int masklength,
                                                const unsigned long *mask,
                                                bool strict)
{
        if (bitmap_empty(mask, masklength))
                return NULL;
        /*
         * The condition here do not handle multi-long masks correctly.
         * It only checks the first long to be zero, and will use such mask
         * as a terminator even if there was bits set after the first long.
         *
         * Correct check would require using:
         * while (!bitmap_empty(av_masks, masklength))
         * instead. This is potentially hazardous because the
         * avaliable_scan_masks is a zero terminated array of longs - and
         * using the proper bitmap_empty() check for multi-long wide masks
         * would require the array to be terminated with multiple zero longs -
         * which is not such an usual pattern.
         *
         * As writing of this no multi-long wide masks were found in-tree, so
         * the simple while (*av_masks) check is working.
         */
        while (*av_masks) {
                if (strict) {
                        if (bitmap_equal(mask, av_masks, masklength))
                                return av_masks;
                } else {
                        if (bitmap_subset(mask, av_masks, masklength))
                                return av_masks;
                }
                av_masks += BITS_TO_LONGS(masklength);
        }
        return NULL;
}

static bool iio_validate_scan_mask(struct iio_dev *indio_dev,
                                   const unsigned long *mask)
{
        if (!indio_dev->setup_ops->validate_scan_mask)
                return true;

        return indio_dev->setup_ops->validate_scan_mask(indio_dev, mask);
}

/**
 * iio_scan_mask_set() - set particular bit in the scan mask
 * @indio_dev: the iio device
 * @buffer: the buffer whose scan mask we are interested in
 * @bit: the bit to be set.
 *
 * Note that at this point we have no way of knowing what other
 * buffers might request, hence this code only verifies that the
 * individual buffers request is plausible.
 */
static int iio_scan_mask_set(struct iio_dev *indio_dev,
                             struct iio_buffer *buffer, int bit)
{
        const unsigned long *mask;
        unsigned long *trialmask;

        if (!indio_dev->masklength) {
                WARN(1, "Trying to set scanmask prior to registering buffer\n");
                return -EINVAL;
        }

        trialmask = bitmap_alloc(indio_dev->masklength, GFP_KERNEL);
        if (!trialmask)
                return -ENOMEM;
        bitmap_copy(trialmask, buffer->scan_mask, indio_dev->masklength);
        set_bit(bit, trialmask);

        if (!iio_validate_scan_mask(indio_dev, trialmask))
                goto err_invalid_mask;

        if (indio_dev->available_scan_masks) {
                mask = iio_scan_mask_match(indio_dev->available_scan_masks,
                                           indio_dev->masklength,
                                           trialmask, false);
                if (!mask)
                        goto err_invalid_mask;
        }
        bitmap_copy(buffer->scan_mask, trialmask, indio_dev->masklength);

        bitmap_free(trialmask);

        return 0;

err_invalid_mask:
        bitmap_free(trialmask);
        return -EINVAL;
}

static int iio_scan_mask_clear(struct iio_buffer *buffer, int bit)
{
        clear_bit(bit, buffer->scan_mask);
        return 0;
}

static int iio_scan_mask_query(struct iio_dev *indio_dev,
                               struct iio_buffer *buffer, int bit)
{
        if (bit > indio_dev->masklength)
                return -EINVAL;

        if (!buffer->scan_mask)
                return 0;

        /* Ensure return value is 0 or 1. */
        return !!test_bit(bit, buffer->scan_mask);
};

static ssize_t iio_scan_el_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf,
                                 size_t len)
{
        int ret;
        bool state;
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        struct iio_buffer *buffer = this_attr->buffer;

        ret = kstrtobool(buf, &state);
        if (ret < 0)
                return ret;
        mutex_lock(&iio_dev_opaque->mlock);
        if (iio_buffer_is_active(buffer)) {
                ret = -EBUSY;
                goto error_ret;
        }
        ret = iio_scan_mask_query(indio_dev, buffer, this_attr->address);
        if (ret < 0)
                goto error_ret;
        if (!state && ret) {
                ret = iio_scan_mask_clear(buffer, this_attr->address);
                if (ret)
                        goto error_ret;
        } else if (state && !ret) {
                ret = iio_scan_mask_set(indio_dev, buffer, this_attr->address);
                if (ret)
                        goto error_ret;
        }

error_ret:
        mutex_unlock(&iio_dev_opaque->mlock);

        return ret < 0 ? ret : len;
}

static ssize_t iio_scan_el_ts_show(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        return sysfs_emit(buf, "%d\n", buffer->scan_timestamp);
}

static ssize_t iio_scan_el_ts_store(struct device *dev,
                                    struct device_attribute *attr,
                                    const char *buf,
                                    size_t len)
{
        int ret;
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
        bool state;

        ret = kstrtobool(buf, &state);
        if (ret < 0)
                return ret;

        mutex_lock(&iio_dev_opaque->mlock);
        if (iio_buffer_is_active(buffer)) {
                ret = -EBUSY;
                goto error_ret;
        }
        buffer->scan_timestamp = state;
error_ret:
        mutex_unlock(&iio_dev_opaque->mlock);

        return ret ? ret : len;
}

static int iio_buffer_add_channel_sysfs(struct iio_dev *indio_dev,
                                        struct iio_buffer *buffer,
                                        const struct iio_chan_spec *chan)
{
        int ret, attrcount = 0;

        ret = __iio_add_chan_devattr("index",
                                     chan,
                                     &iio_show_scan_index,
                                     NULL,
                                     0,
                                     IIO_SEPARATE,
                                     &indio_dev->dev,
                                     buffer,
                                     &buffer->buffer_attr_list);
        if (ret)
                return ret;
        attrcount++;
        ret = __iio_add_chan_devattr("type",
                                     chan,
                                     &iio_show_fixed_type,
                                     NULL,
                                     0,
                                     IIO_SEPARATE,
                                     &indio_dev->dev,
                                     buffer,
                                     &buffer->buffer_attr_list);
        if (ret)
                return ret;
        attrcount++;
        if (chan->type != IIO_TIMESTAMP)
                ret = __iio_add_chan_devattr("en",
                                             chan,
                                             &iio_scan_el_show,
                                             &iio_scan_el_store,
                                             chan->scan_index,
                                             IIO_SEPARATE,
                                             &indio_dev->dev,
                                             buffer,
                                             &buffer->buffer_attr_list);
        else
                ret = __iio_add_chan_devattr("en",
                                             chan,
                                             &iio_scan_el_ts_show,
                                             &iio_scan_el_ts_store,
                                             chan->scan_index,
                                             IIO_SEPARATE,
                                             &indio_dev->dev,
                                             buffer,
                                             &buffer->buffer_attr_list);
        if (ret)
                return ret;
        attrcount++;
        ret = attrcount;
        return ret;
}

static ssize_t length_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        return sysfs_emit(buf, "%d\n", buffer->length);
}

static ssize_t length_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
        unsigned int val;
        int ret;

        ret = kstrtouint(buf, 10, &val);
        if (ret)
                return ret;

        if (val == buffer->length)
                return len;

        mutex_lock(&iio_dev_opaque->mlock);
        if (iio_buffer_is_active(buffer)) {
                ret = -EBUSY;
        } else {
                buffer->access->set_length(buffer, val);
                ret = 0;
        }
        if (ret)
                goto out;
        if (buffer->length && buffer->length < buffer->watermark)
                buffer->watermark = buffer->length;
out:
        mutex_unlock(&iio_dev_opaque->mlock);

        return ret ? ret : len;
}

static ssize_t enable_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        return sysfs_emit(buf, "%d\n", iio_buffer_is_active(buffer));
}

static unsigned int iio_storage_bytes_for_si(struct iio_dev *indio_dev,
                                             unsigned int scan_index)
{
        const struct iio_chan_spec *ch;
        unsigned int bytes;

        ch = iio_find_channel_from_si(indio_dev, scan_index);
        bytes = ch->scan_type.storagebits / 8;
        if (ch->scan_type.repeat > 1)
                bytes *= ch->scan_type.repeat;
        return bytes;
}

static unsigned int iio_storage_bytes_for_timestamp(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_storage_bytes_for_si(indio_dev,
                                        iio_dev_opaque->scan_index_timestamp);
}

static int iio_compute_scan_bytes(struct iio_dev *indio_dev,
                                  const unsigned long *mask, bool timestamp)
{
        unsigned int bytes = 0;
        int length, i, largest = 0;

        /* How much space will the demuxed element take? */
        for_each_set_bit(i, mask,
                         indio_dev->masklength) {
                length = iio_storage_bytes_for_si(indio_dev, i);
                bytes = ALIGN(bytes, length);
                bytes += length;
                largest = max(largest, length);
        }

        if (timestamp) {
                length = iio_storage_bytes_for_timestamp(indio_dev);
                bytes = ALIGN(bytes, length);
                bytes += length;
                largest = max(largest, length);
        }

        bytes = ALIGN(bytes, largest);
        return bytes;
}

static void iio_buffer_activate(struct iio_dev *indio_dev,
                                struct iio_buffer *buffer)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        iio_buffer_get(buffer);
        list_add(&buffer->buffer_list, &iio_dev_opaque->buffer_list);
}

static void iio_buffer_deactivate(struct iio_buffer *buffer)
{
        list_del_init(&buffer->buffer_list);
        wake_up_interruptible(&buffer->pollq);
        iio_buffer_put(buffer);
}

static void iio_buffer_deactivate_all(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer, *_buffer;

        list_for_each_entry_safe(buffer, _buffer,
                                 &iio_dev_opaque->buffer_list, buffer_list)
                iio_buffer_deactivate(buffer);
}

static int iio_buffer_enable(struct iio_buffer *buffer,
                             struct iio_dev *indio_dev)
{
        if (!buffer->access->enable)
                return 0;
        return buffer->access->enable(buffer, indio_dev);
}

static int iio_buffer_disable(struct iio_buffer *buffer,
                              struct iio_dev *indio_dev)
{
        if (!buffer->access->disable)
                return 0;
        return buffer->access->disable(buffer, indio_dev);
}

static void iio_buffer_update_bytes_per_datum(struct iio_dev *indio_dev,
                                              struct iio_buffer *buffer)
{
        unsigned int bytes;

        if (!buffer->access->set_bytes_per_datum)
                return;

        bytes = iio_compute_scan_bytes(indio_dev, buffer->scan_mask,
                                       buffer->scan_timestamp);

        buffer->access->set_bytes_per_datum(buffer, bytes);
}

static int iio_buffer_request_update(struct iio_dev *indio_dev,
                                     struct iio_buffer *buffer)
{
        int ret;

        iio_buffer_update_bytes_per_datum(indio_dev, buffer);
        if (buffer->access->request_update) {
                ret = buffer->access->request_update(buffer);
                if (ret) {
                        dev_dbg(&indio_dev->dev,
                                "Buffer not started: buffer parameter update failed (%d)\n",
                                ret);
                        return ret;
                }
        }

        return 0;
}

static void iio_free_scan_mask(struct iio_dev *indio_dev,
                               const unsigned long *mask)
{
        /* If the mask is dynamically allocated free it, otherwise do nothing */
        if (!indio_dev->available_scan_masks)
                bitmap_free(mask);
}

struct iio_device_config {
        unsigned int mode;
        unsigned int watermark;
        const unsigned long *scan_mask;
        unsigned int scan_bytes;
        bool scan_timestamp;
};

static int iio_verify_update(struct iio_dev *indio_dev,
                             struct iio_buffer *insert_buffer,
                             struct iio_buffer *remove_buffer,
                             struct iio_device_config *config)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        unsigned long *compound_mask;
        const unsigned long *scan_mask;
        bool strict_scanmask = false;
        struct iio_buffer *buffer;
        bool scan_timestamp;
        unsigned int modes;

        if (insert_buffer &&
            bitmap_empty(insert_buffer->scan_mask, indio_dev->masklength)) {
                dev_dbg(&indio_dev->dev,
                        "At least one scan element must be enabled first\n");
                return -EINVAL;
        }

        memset(config, 0, sizeof(*config));
        config->watermark = ~0;

        /*
         * If there is just one buffer and we are removing it there is nothing
         * to verify.
         */
        if (remove_buffer && !insert_buffer &&
            list_is_singular(&iio_dev_opaque->buffer_list))
                return 0;

        modes = indio_dev->modes;

        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) {
                if (buffer == remove_buffer)
                        continue;
                modes &= buffer->access->modes;
                config->watermark = min(config->watermark, buffer->watermark);
        }

        if (insert_buffer) {
                modes &= insert_buffer->access->modes;
                config->watermark = min(config->watermark,
                                        insert_buffer->watermark);
        }

        /* Definitely possible for devices to support both of these. */
        if ((modes & INDIO_BUFFER_TRIGGERED) && indio_dev->trig) {
                config->mode = INDIO_BUFFER_TRIGGERED;
        } else if (modes & INDIO_BUFFER_HARDWARE) {
                /*
                 * Keep things simple for now and only allow a single buffer to
                 * be connected in hardware mode.
                 */
                if (insert_buffer && !list_empty(&iio_dev_opaque->buffer_list))
                        return -EINVAL;
                config->mode = INDIO_BUFFER_HARDWARE;
                strict_scanmask = true;
        } else if (modes & INDIO_BUFFER_SOFTWARE) {
                config->mode = INDIO_BUFFER_SOFTWARE;
        } else {
                /* Can only occur on first buffer */
                if (indio_dev->modes & INDIO_BUFFER_TRIGGERED)
                        dev_dbg(&indio_dev->dev, "Buffer not started: no trigger\n");
                return -EINVAL;
        }

        /* What scan mask do we actually have? */
        compound_mask = bitmap_zalloc(indio_dev->masklength, GFP_KERNEL);
        if (!compound_mask)
                return -ENOMEM;

        scan_timestamp = false;

        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) {
                if (buffer == remove_buffer)
                        continue;
                bitmap_or(compound_mask, compound_mask, buffer->scan_mask,
                          indio_dev->masklength);
                scan_timestamp |= buffer->scan_timestamp;
        }

        if (insert_buffer) {
                bitmap_or(compound_mask, compound_mask,
                          insert_buffer->scan_mask, indio_dev->masklength);
                scan_timestamp |= insert_buffer->scan_timestamp;
        }

        if (indio_dev->available_scan_masks) {
                scan_mask = iio_scan_mask_match(indio_dev->available_scan_masks,
                                                indio_dev->masklength,
                                                compound_mask,
                                                strict_scanmask);
                bitmap_free(compound_mask);
                if (!scan_mask)
                        return -EINVAL;
        } else {
                scan_mask = compound_mask;
        }

        config->scan_bytes = iio_compute_scan_bytes(indio_dev,
                                                    scan_mask, scan_timestamp);
        config->scan_mask = scan_mask;
        config->scan_timestamp = scan_timestamp;

        return 0;
}

/**
 * struct iio_demux_table - table describing demux memcpy ops
 * @from:        index to copy from
 * @to:                index to copy to
 * @length:        how many bytes to copy
 * @l:                list head used for management
 */
struct iio_demux_table {
        unsigned int from;
        unsigned int to;
        unsigned int length;
        struct list_head l;
};

static void iio_buffer_demux_free(struct iio_buffer *buffer)
{
        struct iio_demux_table *p, *q;

        list_for_each_entry_safe(p, q, &buffer->demux_list, l) {
                list_del(&p->l);
                kfree(p);
        }
}

static int iio_buffer_add_demux(struct iio_buffer *buffer,
                                struct iio_demux_table **p, unsigned int in_loc,
                                unsigned int out_loc,
                                unsigned int length)
{
        if (*p && (*p)->from + (*p)->length == in_loc &&
            (*p)->to + (*p)->length == out_loc) {
                (*p)->length += length;
        } else {
                *p = kmalloc(sizeof(**p), GFP_KERNEL);
                if (!(*p))
                        return -ENOMEM;
                (*p)->from = in_loc;
                (*p)->to = out_loc;
                (*p)->length = length;
                list_add_tail(&(*p)->l, &buffer->demux_list);
        }

        return 0;
}

static int iio_buffer_update_demux(struct iio_dev *indio_dev,
                                   struct iio_buffer *buffer)
{
        int ret, in_ind = -1, out_ind, length;
        unsigned int in_loc = 0, out_loc = 0;
        struct iio_demux_table *p = NULL;

        /* Clear out any old demux */
        iio_buffer_demux_free(buffer);
        kfree(buffer->demux_bounce);
        buffer->demux_bounce = NULL;

        /* First work out which scan mode we will actually have */
        if (bitmap_equal(indio_dev->active_scan_mask,
                         buffer->scan_mask,
                         indio_dev->masklength))
                return 0;

        /* Now we have the two masks, work from least sig and build up sizes */
        for_each_set_bit(out_ind,
                         buffer->scan_mask,
                         indio_dev->masklength) {
                in_ind = find_next_bit(indio_dev->active_scan_mask,
                                       indio_dev->masklength,
                                       in_ind + 1);
                while (in_ind != out_ind) {
                        length = iio_storage_bytes_for_si(indio_dev, in_ind);
                        /* Make sure we are aligned */
                        in_loc = roundup(in_loc, length) + length;
                        in_ind = find_next_bit(indio_dev->active_scan_mask,
                                               indio_dev->masklength,
                                               in_ind + 1);
                }
                length = iio_storage_bytes_for_si(indio_dev, in_ind);
                out_loc = roundup(out_loc, length);
                in_loc = roundup(in_loc, length);
                ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length);
                if (ret)
                        goto error_clear_mux_table;
                out_loc += length;
                in_loc += length;
        }
        /* Relies on scan_timestamp being last */
        if (buffer->scan_timestamp) {
                length = iio_storage_bytes_for_timestamp(indio_dev);
                out_loc = roundup(out_loc, length);
                in_loc = roundup(in_loc, length);
                ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length);
                if (ret)
                        goto error_clear_mux_table;
                out_loc += length;
        }
        buffer->demux_bounce = kzalloc(out_loc, GFP_KERNEL);
        if (!buffer->demux_bounce) {
                ret = -ENOMEM;
                goto error_clear_mux_table;
        }
        return 0;

error_clear_mux_table:
        iio_buffer_demux_free(buffer);

        return ret;
}

static int iio_update_demux(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer;
        int ret;

        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) {
                ret = iio_buffer_update_demux(indio_dev, buffer);
                if (ret < 0)
                        goto error_clear_mux_table;
        }
        return 0;

error_clear_mux_table:
        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list)
                iio_buffer_demux_free(buffer);

        return ret;
}

static int iio_enable_buffers(struct iio_dev *indio_dev,
                              struct iio_device_config *config)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer, *tmp = NULL;
        int ret;

        indio_dev->active_scan_mask = config->scan_mask;
        indio_dev->scan_timestamp = config->scan_timestamp;
        indio_dev->scan_bytes = config->scan_bytes;
        iio_dev_opaque->currentmode = config->mode;

        iio_update_demux(indio_dev);

        /* Wind up again */
        if (indio_dev->setup_ops->preenable) {
                ret = indio_dev->setup_ops->preenable(indio_dev);
                if (ret) {
                        dev_dbg(&indio_dev->dev,
                                "Buffer not started: buffer preenable failed (%d)\n", ret);
                        goto err_undo_config;
                }
        }

        if (indio_dev->info->update_scan_mode) {
                ret = indio_dev->info
                        ->update_scan_mode(indio_dev,
                                           indio_dev->active_scan_mask);
                if (ret < 0) {
                        dev_dbg(&indio_dev->dev,
                                "Buffer not started: update scan mode failed (%d)\n",
                                ret);
                        goto err_run_postdisable;
                }
        }

        if (indio_dev->info->hwfifo_set_watermark)
                indio_dev->info->hwfifo_set_watermark(indio_dev,
                        config->watermark);

        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) {
                ret = iio_buffer_enable(buffer, indio_dev);
                if (ret) {
                        tmp = buffer;
                        goto err_disable_buffers;
                }
        }

        if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) {
                ret = iio_trigger_attach_poll_func(indio_dev->trig,
                                                   indio_dev->pollfunc);
                if (ret)
                        goto err_disable_buffers;
        }

        if (indio_dev->setup_ops->postenable) {
                ret = indio_dev->setup_ops->postenable(indio_dev);
                if (ret) {
                        dev_dbg(&indio_dev->dev,
                                "Buffer not started: postenable failed (%d)\n", ret);
                        goto err_detach_pollfunc;
                }
        }

        return 0;

err_detach_pollfunc:
        if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) {
                iio_trigger_detach_poll_func(indio_dev->trig,
                                             indio_dev->pollfunc);
        }
err_disable_buffers:
        buffer = list_prepare_entry(tmp, &iio_dev_opaque->buffer_list, buffer_list);
        list_for_each_entry_continue_reverse(buffer, &iio_dev_opaque->buffer_list,
                                             buffer_list)
                iio_buffer_disable(buffer, indio_dev);
err_run_postdisable:
        if (indio_dev->setup_ops->postdisable)
                indio_dev->setup_ops->postdisable(indio_dev);
err_undo_config:
        iio_dev_opaque->currentmode = INDIO_DIRECT_MODE;
        indio_dev->active_scan_mask = NULL;

        return ret;
}

static int iio_disable_buffers(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer;
        int ret = 0;
        int ret2;

        /* Wind down existing buffers - iff there are any */
        if (list_empty(&iio_dev_opaque->buffer_list))
                return 0;

        /*
         * If things go wrong at some step in disable we still need to continue
         * to perform the other steps, otherwise we leave the device in a
         * inconsistent state. We return the error code for the first error we
         * encountered.
         */

        if (indio_dev->setup_ops->predisable) {
                ret2 = indio_dev->setup_ops->predisable(indio_dev);
                if (ret2 && !ret)
                        ret = ret2;
        }

        if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) {
                iio_trigger_detach_poll_func(indio_dev->trig,
                                             indio_dev->pollfunc);
        }

        list_for_each_entry(buffer, &iio_dev_opaque->buffer_list, buffer_list) {
                ret2 = iio_buffer_disable(buffer, indio_dev);
                if (ret2 && !ret)
                        ret = ret2;
        }

        if (indio_dev->setup_ops->postdisable) {
                ret2 = indio_dev->setup_ops->postdisable(indio_dev);
                if (ret2 && !ret)
                        ret = ret2;
        }

        iio_free_scan_mask(indio_dev, indio_dev->active_scan_mask);
        indio_dev->active_scan_mask = NULL;
        iio_dev_opaque->currentmode = INDIO_DIRECT_MODE;

        return ret;
}

static int __iio_update_buffers(struct iio_dev *indio_dev,
                                struct iio_buffer *insert_buffer,
                                struct iio_buffer *remove_buffer)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_device_config new_config;
        int ret;

        ret = iio_verify_update(indio_dev, insert_buffer, remove_buffer,
                                &new_config);
        if (ret)
                return ret;

        if (insert_buffer) {
                ret = iio_buffer_request_update(indio_dev, insert_buffer);
                if (ret)
                        goto err_free_config;
        }

        ret = iio_disable_buffers(indio_dev);
        if (ret)
                goto err_deactivate_all;

        if (remove_buffer)
                iio_buffer_deactivate(remove_buffer);
        if (insert_buffer)
                iio_buffer_activate(indio_dev, insert_buffer);

        /* If no buffers in list, we are done */
        if (list_empty(&iio_dev_opaque->buffer_list))
                return 0;

        ret = iio_enable_buffers(indio_dev, &new_config);
        if (ret)
                goto err_deactivate_all;

        return 0;

err_deactivate_all:
        /*
         * We've already verified that the config is valid earlier. If things go
         * wrong in either enable or disable the most likely reason is an IO
         * error from the device. In this case there is no good recovery
         * strategy. Just make sure to disable everything and leave the device
         * in a sane state.  With a bit of luck the device might come back to
         * life again later and userspace can try again.
         */
        iio_buffer_deactivate_all(indio_dev);

err_free_config:
        iio_free_scan_mask(indio_dev, new_config.scan_mask);
        return ret;
}

int iio_update_buffers(struct iio_dev *indio_dev,
                       struct iio_buffer *insert_buffer,
                       struct iio_buffer *remove_buffer)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int ret;

        if (insert_buffer == remove_buffer)
                return 0;

        if (insert_buffer &&
            insert_buffer->direction == IIO_BUFFER_DIRECTION_OUT)
                return -EINVAL;

        mutex_lock(&iio_dev_opaque->info_exist_lock);
        mutex_lock(&iio_dev_opaque->mlock);

        if (insert_buffer && iio_buffer_is_active(insert_buffer))
                insert_buffer = NULL;

        if (remove_buffer && !iio_buffer_is_active(remove_buffer))
                remove_buffer = NULL;

        if (!insert_buffer && !remove_buffer) {
                ret = 0;
                goto out_unlock;
        }

        if (!indio_dev->info) {
                ret = -ENODEV;
                goto out_unlock;
        }

        ret = __iio_update_buffers(indio_dev, insert_buffer, remove_buffer);

out_unlock:
        mutex_unlock(&iio_dev_opaque->mlock);
        mutex_unlock(&iio_dev_opaque->info_exist_lock);

        return ret;
}
EXPORT_SYMBOL_GPL(iio_update_buffers);

void iio_disable_all_buffers(struct iio_dev *indio_dev)
{
        iio_disable_buffers(indio_dev);
        iio_buffer_deactivate_all(indio_dev);
}

static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t len)
{
        int ret;
        bool requested_state;
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
        bool inlist;

        ret = kstrtobool(buf, &requested_state);
        if (ret < 0)
                return ret;

        mutex_lock(&iio_dev_opaque->mlock);

        /* Find out if it is in the list */
        inlist = iio_buffer_is_active(buffer);
        /* Already in desired state */
        if (inlist == requested_state)
                goto done;

        if (requested_state)
                ret = __iio_update_buffers(indio_dev, buffer, NULL);
        else
                ret = __iio_update_buffers(indio_dev, NULL, buffer);

done:
        mutex_unlock(&iio_dev_opaque->mlock);
        return (ret < 0) ? ret : len;
}

static ssize_t watermark_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        return sysfs_emit(buf, "%u\n", buffer->watermark);
}

static ssize_t watermark_store(struct device *dev,
                               struct device_attribute *attr,
                               const char *buf, size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
        unsigned int val;
        int ret;

        ret = kstrtouint(buf, 10, &val);
        if (ret)
                return ret;
        if (!val)
                return -EINVAL;

        mutex_lock(&iio_dev_opaque->mlock);

        if (val > buffer->length) {
                ret = -EINVAL;
                goto out;
        }

        if (iio_buffer_is_active(buffer)) {
                ret = -EBUSY;
                goto out;
        }

        buffer->watermark = val;
out:
        mutex_unlock(&iio_dev_opaque->mlock);

        return ret ? ret : len;
}

static ssize_t data_available_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        return sysfs_emit(buf, "%zu\n", iio_buffer_data_available(buffer));
}

static ssize_t direction_show(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;

        switch (buffer->direction) {
        case IIO_BUFFER_DIRECTION_IN:
                return sysfs_emit(buf, "in\n");
        case IIO_BUFFER_DIRECTION_OUT:
                return sysfs_emit(buf, "out\n");
        default:
                return -EINVAL;
        }
}

static DEVICE_ATTR_RW(length);
static struct device_attribute dev_attr_length_ro = __ATTR_RO(length);
static DEVICE_ATTR_RW(enable);
static DEVICE_ATTR_RW(watermark);
static struct device_attribute dev_attr_watermark_ro = __ATTR_RO(watermark);
static DEVICE_ATTR_RO(data_available);
static DEVICE_ATTR_RO(direction);

/*
 * When adding new attributes here, put the at the end, at least until
 * the code that handles the length/length_ro & watermark/watermark_ro
 * assignments gets cleaned up. Otherwise these can create some weird
 * duplicate attributes errors under some setups.
 */
static struct attribute *iio_buffer_attrs[] = {
        &dev_attr_length.attr,
        &dev_attr_enable.attr,
        &dev_attr_watermark.attr,
        &dev_attr_data_available.attr,
        &dev_attr_direction.attr,
};

#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)

static struct attribute *iio_buffer_wrap_attr(struct iio_buffer *buffer,
                                              struct attribute *attr)
{
        struct device_attribute *dattr = to_dev_attr(attr);
        struct iio_dev_attr *iio_attr;

        iio_attr = kzalloc(sizeof(*iio_attr), GFP_KERNEL);
        if (!iio_attr)
                return NULL;

        iio_attr->buffer = buffer;
        memcpy(&iio_attr->dev_attr, dattr, sizeof(iio_attr->dev_attr));
        iio_attr->dev_attr.attr.name = kstrdup_const(attr->name, GFP_KERNEL);
        if (!iio_attr->dev_attr.attr.name) {
                kfree(iio_attr);
                return NULL;
        }

        sysfs_attr_init(&iio_attr->dev_attr.attr);

        list_add(&iio_attr->l, &buffer->buffer_attr_list);

        return &iio_attr->dev_attr.attr;
}

static int iio_buffer_register_legacy_sysfs_groups(struct iio_dev *indio_dev,
                                                   struct attribute **buffer_attrs,
                                                   int buffer_attrcount,
                                                   int scan_el_attrcount)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct attribute_group *group;
        struct attribute **attrs;
        int ret;

        attrs = kcalloc(buffer_attrcount + 1, sizeof(*attrs), GFP_KERNEL);
        if (!attrs)
                return -ENOMEM;

        memcpy(attrs, buffer_attrs, buffer_attrcount * sizeof(*attrs));

        group = &iio_dev_opaque->legacy_buffer_group;
        group->attrs = attrs;
        group->name = "buffer";

        ret = iio_device_register_sysfs_group(indio_dev, group);
        if (ret)
                goto error_free_buffer_attrs;

        attrs = kcalloc(scan_el_attrcount + 1, sizeof(*attrs), GFP_KERNEL);
        if (!attrs) {
                ret = -ENOMEM;
                goto error_free_buffer_attrs;
        }

        memcpy(attrs, &buffer_attrs[buffer_attrcount],
               scan_el_attrcount * sizeof(*attrs));

        group = &iio_dev_opaque->legacy_scan_el_group;
        group->attrs = attrs;
        group->name = "scan_elements";

        ret = iio_device_register_sysfs_group(indio_dev, group);
        if (ret)
                goto error_free_scan_el_attrs;

        return 0;

error_free_scan_el_attrs:
        kfree(iio_dev_opaque->legacy_scan_el_group.attrs);
error_free_buffer_attrs:
        kfree(iio_dev_opaque->legacy_buffer_group.attrs);

        return ret;
}

static void iio_buffer_unregister_legacy_sysfs_groups(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        kfree(iio_dev_opaque->legacy_buffer_group.attrs);
        kfree(iio_dev_opaque->legacy_scan_el_group.attrs);
}

static int iio_buffer_chrdev_release(struct inode *inode, struct file *filep)
{
        struct iio_dev_buffer_pair *ib = filep->private_data;
        struct iio_dev *indio_dev = ib->indio_dev;
        struct iio_buffer *buffer = ib->buffer;

        wake_up(&buffer->pollq);

        kfree(ib);
        clear_bit(IIO_BUSY_BIT_POS, &buffer->flags);
        iio_device_put(indio_dev);

        return 0;
}

static const struct file_operations iio_buffer_chrdev_fileops = {
        .owner = THIS_MODULE,
        .llseek = noop_llseek,
        .read = iio_buffer_read,
        .write = iio_buffer_write,
        .poll = iio_buffer_poll,
        .release = iio_buffer_chrdev_release,
};

static long iio_device_buffer_getfd(struct iio_dev *indio_dev, unsigned long arg)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int __user *ival = (int __user *)arg;
        struct iio_dev_buffer_pair *ib;
        struct iio_buffer *buffer;
        int fd, idx, ret;

        if (copy_from_user(&idx, ival, sizeof(idx)))
                return -EFAULT;

        if (idx >= iio_dev_opaque->attached_buffers_cnt)
                return -ENODEV;

        iio_device_get(indio_dev);

        buffer = iio_dev_opaque->attached_buffers[idx];

        if (test_and_set_bit(IIO_BUSY_BIT_POS, &buffer->flags)) {
                ret = -EBUSY;
                goto error_iio_dev_put;
        }

        ib = kzalloc(sizeof(*ib), GFP_KERNEL);
        if (!ib) {
                ret = -ENOMEM;
                goto error_clear_busy_bit;
        }

        ib->indio_dev = indio_dev;
        ib->buffer = buffer;

        fd = anon_inode_getfd("iio:buffer", &iio_buffer_chrdev_fileops,
                              ib, O_RDWR | O_CLOEXEC);
        if (fd < 0) {
                ret = fd;
                goto error_free_ib;
        }

        if (copy_to_user(ival, &fd, sizeof(fd))) {
                /*
                 * "Leak" the fd, as there's not much we can do about this
                 * anyway. 'fd' might have been closed already, as
                 * anon_inode_getfd() called fd_install() on it, which made
                 * it reachable by userland.
                 *
                 * Instead of allowing a malicious user to play tricks with
                 * us, rely on the process exit path to do any necessary
                 * cleanup, as in releasing the file, if still needed.
                 */
                return -EFAULT;
        }

        return 0;

error_free_ib:
        kfree(ib);
error_clear_busy_bit:
        clear_bit(IIO_BUSY_BIT_POS, &buffer->flags);
error_iio_dev_put:
        iio_device_put(indio_dev);
        return ret;
}

static long iio_device_buffer_ioctl(struct iio_dev *indio_dev, struct file *filp,
                                    unsigned int cmd, unsigned long arg)
{
        switch (cmd) {
        case IIO_BUFFER_GET_FD_IOCTL:
                return iio_device_buffer_getfd(indio_dev, arg);
        default:
                return IIO_IOCTL_UNHANDLED;
        }
}

static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
                                             struct iio_dev *indio_dev,
                                             int index)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_dev_attr *p;
        const struct iio_dev_attr *id_attr;
        struct attribute **attr;
        int ret, i, attrn, scan_el_attrcount, buffer_attrcount;
        const struct iio_chan_spec *channels;

        buffer_attrcount = 0;
        if (buffer->attrs) {
                while (buffer->attrs[buffer_attrcount])
                        buffer_attrcount++;
        }
        buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs);

        scan_el_attrcount = 0;
        INIT_LIST_HEAD(&buffer->buffer_attr_list);
        channels = indio_dev->channels;
        if (channels) {
                /* new magic */
                for (i = 0; i < indio_dev->num_channels; i++) {
                        if (channels[i].scan_index < 0)
                                continue;

                        /* Verify that sample bits fit into storage */
                        if (channels[i].scan_type.storagebits <
                            channels[i].scan_type.realbits +
                            channels[i].scan_type.shift) {
                                dev_err(&indio_dev->dev,
                                        "Channel %d storagebits (%d) < shifted realbits (%d + %d)\n",
                                        i, channels[i].scan_type.storagebits,
                                        channels[i].scan_type.realbits,
                                        channels[i].scan_type.shift);
                                ret = -EINVAL;
                                goto error_cleanup_dynamic;
                        }

                        ret = iio_buffer_add_channel_sysfs(indio_dev, buffer,
                                                           &channels[i]);
                        if (ret < 0)
                                goto error_cleanup_dynamic;
                        scan_el_attrcount += ret;
                        if (channels[i].type == IIO_TIMESTAMP)
                                iio_dev_opaque->scan_index_timestamp =
                                        channels[i].scan_index;
                }
                if (indio_dev->masklength && !buffer->scan_mask) {
                        buffer->scan_mask = bitmap_zalloc(indio_dev->masklength,
                                                          GFP_KERNEL);
                        if (!buffer->scan_mask) {
                                ret = -ENOMEM;
                                goto error_cleanup_dynamic;
                        }
                }
        }

        attrn = buffer_attrcount + scan_el_attrcount;
        attr = kcalloc(attrn + 1, sizeof(*attr), GFP_KERNEL);
        if (!attr) {
                ret = -ENOMEM;
                goto error_free_scan_mask;
        }

        memcpy(attr, iio_buffer_attrs, sizeof(iio_buffer_attrs));
        if (!buffer->access->set_length)
                attr[0] = &dev_attr_length_ro.attr;

        if (buffer->access->flags & INDIO_BUFFER_FLAG_FIXED_WATERMARK)
                attr[2] = &dev_attr_watermark_ro.attr;

        if (buffer->attrs)
                for (i = 0, id_attr = buffer->attrs[i];
                     (id_attr = buffer->attrs[i]); i++)
                        attr[ARRAY_SIZE(iio_buffer_attrs) + i] =
                                (struct attribute *)&id_attr->dev_attr.attr;

        buffer->buffer_group.attrs = attr;

        for (i = 0; i < buffer_attrcount; i++) {
                struct attribute *wrapped;

                wrapped = iio_buffer_wrap_attr(buffer, attr[i]);
                if (!wrapped) {
                        ret = -ENOMEM;
                        goto error_free_buffer_attrs;
                }
                attr[i] = wrapped;
        }

        attrn = 0;
        list_for_each_entry(p, &buffer->buffer_attr_list, l)
                attr[attrn++] = &p->dev_attr.attr;

        buffer->buffer_group.name = kasprintf(GFP_KERNEL, "buffer%d", index);
        if (!buffer->buffer_group.name) {
                ret = -ENOMEM;
                goto error_free_buffer_attrs;
        }

        ret = iio_device_register_sysfs_group(indio_dev, &buffer->buffer_group);
        if (ret)
                goto error_free_buffer_attr_group_name;

        /* we only need to register the legacy groups for the first buffer */
        if (index > 0)
                return 0;

        ret = iio_buffer_register_legacy_sysfs_groups(indio_dev, attr,
                                                      buffer_attrcount,
                                                      scan_el_attrcount);
        if (ret)
                goto error_free_buffer_attr_group_name;

        return 0;

error_free_buffer_attr_group_name:
        kfree(buffer->buffer_group.name);
error_free_buffer_attrs:
        kfree(buffer->buffer_group.attrs);
error_free_scan_mask:
        bitmap_free(buffer->scan_mask);
error_cleanup_dynamic:
        iio_free_chan_devattr_list(&buffer->buffer_attr_list);

        return ret;
}

static void __iio_buffer_free_sysfs_and_mask(struct iio_buffer *buffer,
                                             struct iio_dev *indio_dev,
                                             int index)
{
        if (index == 0)
                iio_buffer_unregister_legacy_sysfs_groups(indio_dev);
        bitmap_free(buffer->scan_mask);
        kfree(buffer->buffer_group.name);
        kfree(buffer->buffer_group.attrs);
        iio_free_chan_devattr_list(&buffer->buffer_attr_list);
}

int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        const struct iio_chan_spec *channels;
        struct iio_buffer *buffer;
        int ret, i, idx;
        size_t sz;

        channels = indio_dev->channels;
        if (channels) {
                int ml = indio_dev->masklength;

                for (i = 0; i < indio_dev->num_channels; i++)
                        ml = max(ml, channels[i].scan_index + 1);
                indio_dev->masklength = ml;
        }

        if (!iio_dev_opaque->attached_buffers_cnt)
                return 0;

        for (idx = 0; idx < iio_dev_opaque->attached_buffers_cnt; idx++) {
                buffer = iio_dev_opaque->attached_buffers[idx];
                ret = __iio_buffer_alloc_sysfs_and_mask(buffer, indio_dev, idx);
                if (ret)
                        goto error_unwind_sysfs_and_mask;
        }

        sz = sizeof(*iio_dev_opaque->buffer_ioctl_handler);
        iio_dev_opaque->buffer_ioctl_handler = kzalloc(sz, GFP_KERNEL);
        if (!iio_dev_opaque->buffer_ioctl_handler) {
                ret = -ENOMEM;
                goto error_unwind_sysfs_and_mask;
        }

        iio_dev_opaque->buffer_ioctl_handler->ioctl = iio_device_buffer_ioctl;
        iio_device_ioctl_handler_register(indio_dev,
                                          iio_dev_opaque->buffer_ioctl_handler);

        return 0;

error_unwind_sysfs_and_mask:
        while (idx--) {
                buffer = iio_dev_opaque->attached_buffers[idx];
                __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, idx);
        }
        return ret;
}

void iio_buffers_free_sysfs_and_mask(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer *buffer;
        int i;

        if (!iio_dev_opaque->attached_buffers_cnt)
                return;

        iio_device_ioctl_handler_unregister(iio_dev_opaque->buffer_ioctl_handler);
        kfree(iio_dev_opaque->buffer_ioctl_handler);

        for (i = iio_dev_opaque->attached_buffers_cnt - 1; i >= 0; i--) {
                buffer = iio_dev_opaque->attached_buffers[i];
                __iio_buffer_free_sysfs_and_mask(buffer, indio_dev, i);
        }
}

/**
 * iio_validate_scan_mask_onehot() - Validates that exactly one channel is selected
 * @indio_dev: the iio device
 * @mask: scan mask to be checked
 *
 * Return true if exactly one bit is set in the scan mask, false otherwise. It
 * can be used for devices where only one channel can be active for sampling at
 * a time.
 */
bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
                                   const unsigned long *mask)
{
        return bitmap_weight(mask, indio_dev->masklength) == 1;
}
EXPORT_SYMBOL_GPL(iio_validate_scan_mask_onehot);

static const void *iio_demux(struct iio_buffer *buffer,
                             const void *datain)
{
        struct iio_demux_table *t;

        if (list_empty(&buffer->demux_list))
                return datain;
        list_for_each_entry(t, &buffer->demux_list, l)
                memcpy(buffer->demux_bounce + t->to,
                       datain + t->from, t->length);

        return buffer->demux_bounce;
}

static int iio_push_to_buffer(struct iio_buffer *buffer, const void *data)
{
        const void *dataout = iio_demux(buffer, data);
        int ret;

        ret = buffer->access->store_to(buffer, dataout);
        if (ret)
                return ret;

        /*
         * We can't just test for watermark to decide if we wake the poll queue
         * because read may request less samples than the watermark.
         */
        wake_up_interruptible_poll(&buffer->pollq, EPOLLIN | EPOLLRDNORM);
        return 0;
}

/**
 * iio_push_to_buffers() - push to a registered buffer.
 * @indio_dev:                iio_dev structure for device.
 * @data:                Full scan.
 */
int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int ret;
        struct iio_buffer *buf;

        list_for_each_entry(buf, &iio_dev_opaque->buffer_list, buffer_list) {
                ret = iio_push_to_buffer(buf, data);
                if (ret < 0)
                        return ret;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(iio_push_to_buffers);

/**
 * iio_push_to_buffers_with_ts_unaligned() - push to registered buffer,
 *    no alignment or space requirements.
 * @indio_dev:                iio_dev structure for device.
 * @data:                channel data excluding the timestamp.
 * @data_sz:                size of data.
 * @timestamp:                timestamp for the sample data.
 *
 * This special variant of iio_push_to_buffers_with_timestamp() does
 * not require space for the timestamp, or 8 byte alignment of data.
 * It does however require an allocation on first call and additional
 * copies on all calls, so should be avoided if possible.
 */
int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev,
                                          const void *data,
                                          size_t data_sz,
                                          int64_t timestamp)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        /*
         * Conservative estimate - we can always safely copy the minimum
         * of either the data provided or the length of the destination buffer.
         * This relaxed limit allows the calling drivers to be lax about
         * tracking the size of the data they are pushing, at the cost of
         * unnecessary copying of padding.
         */
        data_sz = min_t(size_t, indio_dev->scan_bytes, data_sz);
        if (iio_dev_opaque->bounce_buffer_size !=  indio_dev->scan_bytes) {
                void *bb;

                bb = devm_krealloc(&indio_dev->dev,
                                   iio_dev_opaque->bounce_buffer,
                                   indio_dev->scan_bytes, GFP_KERNEL);
                if (!bb)
                        return -ENOMEM;
                iio_dev_opaque->bounce_buffer = bb;
                iio_dev_opaque->bounce_buffer_size = indio_dev->scan_bytes;
        }
        memcpy(iio_dev_opaque->bounce_buffer, data, data_sz);
        return iio_push_to_buffers_with_timestamp(indio_dev,
                                                  iio_dev_opaque->bounce_buffer,
                                                  timestamp);
}
EXPORT_SYMBOL_GPL(iio_push_to_buffers_with_ts_unaligned);

/**
 * iio_buffer_release() - Free a buffer's resources
 * @ref: Pointer to the kref embedded in the iio_buffer struct
 *
 * This function is called when the last reference to the buffer has been
 * dropped. It will typically free all resources allocated by the buffer. Do not
 * call this function manually, always use iio_buffer_put() when done using a
 * buffer.
 */
static void iio_buffer_release(struct kref *ref)
{
        struct iio_buffer *buffer = container_of(ref, struct iio_buffer, ref);

        buffer->access->release(buffer);
}

/**
 * iio_buffer_get() - Grab a reference to the buffer
 * @buffer: The buffer to grab a reference for, may be NULL
 *
 * Returns the pointer to the buffer that was passed into the function.
 */
struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer)
{
        if (buffer)
                kref_get(&buffer->ref);

        return buffer;
}
EXPORT_SYMBOL_GPL(iio_buffer_get);

/**
 * iio_buffer_put() - Release the reference to the buffer
 * @buffer: The buffer to release the reference for, may be NULL
 */
void iio_buffer_put(struct iio_buffer *buffer)
{
        if (buffer)
                kref_put(&buffer->ref, iio_buffer_release);
}
EXPORT_SYMBOL_GPL(iio_buffer_put);

/**
 * iio_device_attach_buffer - Attach a buffer to a IIO device
 * @indio_dev: The device the buffer should be attached to
 * @buffer: The buffer to attach to the device
 *
 * Return 0 if successful, negative if error.
 *
 * This function attaches a buffer to a IIO device. The buffer stays attached to
 * the device until the device is freed. For legacy reasons, the first attached
 * buffer will also be assigned to 'indio_dev->buffer'.
 * The array allocated here, will be free'd via the iio_device_detach_buffers()
 * call which is handled by the iio_device_free().
 */
int iio_device_attach_buffer(struct iio_dev *indio_dev,
                             struct iio_buffer *buffer)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_buffer **new, **old = iio_dev_opaque->attached_buffers;
        unsigned int cnt = iio_dev_opaque->attached_buffers_cnt;

        cnt++;

        new = krealloc(old, sizeof(*new) * cnt, GFP_KERNEL);
        if (!new)
                return -ENOMEM;
        iio_dev_opaque->attached_buffers = new;

        buffer = iio_buffer_get(buffer);

        /* first buffer is legacy; attach it to the IIO device directly */
        if (!indio_dev->buffer)
                indio_dev->buffer = buffer;

        iio_dev_opaque->attached_buffers[cnt - 1] = buffer;
        iio_dev_opaque->attached_buffers_cnt = cnt;

        return 0;
}
EXPORT_SYMBOL_GPL(iio_device_attach_buffer);















































































































































































































































   53 





   52 




   52 


















































































































































































































































































































































































































































































































   36 





















































   53 
   36 
   53 

















   55 


   53 














































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
// SPDX-License-Identifier: GPL-2.0
/*
 *  inode.c - part of debugfs, a tiny little debug file system
 *
 *  Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com>
 *  Copyright (C) 2004 IBM Inc.
 *  Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org>
 *
 *  debugfs is for people to use instead of /proc or /sys.
 *  See ./Documentation/core-api/kernel-api.rst for more details.
 */

#define pr_fmt(fmt)        "debugfs: " fmt

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/debugfs.h>
#include <linux/fsnotify.h>
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/security.h>

#include "internal.h"

#define DEBUGFS_DEFAULT_MODE        0700

static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;

/*
 * Don't allow access attributes to be changed whilst the kernel is locked down
 * so that we can use the file mode as part of a heuristic to determine whether
 * to lock down individual files.
 */
static int debugfs_setattr(struct mnt_idmap *idmap,
                           struct dentry *dentry, struct iattr *ia)
{
        int ret;

        if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
                ret = security_locked_down(LOCKDOWN_DEBUGFS);
                if (ret)
                        return ret;
        }
        return simple_setattr(&nop_mnt_idmap, dentry, ia);
}

static const struct inode_operations debugfs_file_inode_operations = {
        .setattr        = debugfs_setattr,
};
static const struct inode_operations debugfs_dir_inode_operations = {
        .lookup                = simple_lookup,
        .setattr        = debugfs_setattr,
};
static const struct inode_operations debugfs_symlink_inode_operations = {
        .get_link        = simple_get_link,
        .setattr        = debugfs_setattr,
};

static struct inode *debugfs_get_inode(struct super_block *sb)
{
        struct inode *inode = new_inode(sb);
        if (inode) {
                inode->i_ino = get_next_ino();
                simple_inode_init_ts(inode);
        }
        return inode;
}

struct debugfs_mount_opts {
        kuid_t uid;
        kgid_t gid;
        umode_t mode;
        /* Opt_* bitfield. */
        unsigned int opts;
};

enum {
        Opt_uid,
        Opt_gid,
        Opt_mode,
        Opt_err
};

static const match_table_t tokens = {
        {Opt_uid, "uid=%u"},
        {Opt_gid, "gid=%u"},
        {Opt_mode, "mode=%o"},
        {Opt_err, NULL}
};

struct debugfs_fs_info {
        struct debugfs_mount_opts mount_opts;
};

static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
{
        substring_t args[MAX_OPT_ARGS];
        int option;
        int token;
        kuid_t uid;
        kgid_t gid;
        char *p;

        opts->opts = 0;
        opts->mode = DEBUGFS_DEFAULT_MODE;

        while ((p = strsep(&data, ",")) != NULL) {
                if (!*p)
                        continue;

                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_uid:
                        if (match_int(&args[0], &option))
                                return -EINVAL;
                        uid = make_kuid(current_user_ns(), option);
                        if (!uid_valid(uid))
                                return -EINVAL;
                        opts->uid = uid;
                        break;
                case Opt_gid:
                        if (match_int(&args[0], &option))
                                return -EINVAL;
                        gid = make_kgid(current_user_ns(), option);
                        if (!gid_valid(gid))
                                return -EINVAL;
                        opts->gid = gid;
                        break;
                case Opt_mode:
                        if (match_octal(&args[0], &option))
                                return -EINVAL;
                        opts->mode = option & S_IALLUGO;
                        break;
                /*
                 * We might like to report bad mount options here;
                 * but traditionally debugfs has ignored all mount options
                 */
                }

                opts->opts |= BIT(token);
        }

        return 0;
}

static void _debugfs_apply_options(struct super_block *sb, bool remount)
{
        struct debugfs_fs_info *fsi = sb->s_fs_info;
        struct inode *inode = d_inode(sb->s_root);
        struct debugfs_mount_opts *opts = &fsi->mount_opts;

        /*
         * On remount, only reset mode/uid/gid if they were provided as mount
         * options.
         */

        if (!remount || opts->opts & BIT(Opt_mode)) {
                inode->i_mode &= ~S_IALLUGO;
                inode->i_mode |= opts->mode;
        }

        if (!remount || opts->opts & BIT(Opt_uid))
                inode->i_uid = opts->uid;

        if (!remount || opts->opts & BIT(Opt_gid))
                inode->i_gid = opts->gid;
}

static void debugfs_apply_options(struct super_block *sb)
{
        _debugfs_apply_options(sb, false);
}

static void debugfs_apply_options_remount(struct super_block *sb)
{
        _debugfs_apply_options(sb, true);
}

static int debugfs_remount(struct super_block *sb, int *flags, char *data)
{
        int err;
        struct debugfs_fs_info *fsi = sb->s_fs_info;

        sync_filesystem(sb);
        err = debugfs_parse_options(data, &fsi->mount_opts);
        if (err)
                goto fail;

        debugfs_apply_options_remount(sb);

fail:
        return err;
}

static int debugfs_show_options(struct seq_file *m, struct dentry *root)
{
        struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
        struct debugfs_mount_opts *opts = &fsi->mount_opts;

        if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
                seq_printf(m, ",uid=%u",
                           from_kuid_munged(&init_user_ns, opts->uid));
        if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
                seq_printf(m, ",gid=%u",
                           from_kgid_munged(&init_user_ns, opts->gid));
        if (opts->mode != DEBUGFS_DEFAULT_MODE)
                seq_printf(m, ",mode=%o", opts->mode);

        return 0;
}

static void debugfs_free_inode(struct inode *inode)
{
        if (S_ISLNK(inode->i_mode))
                kfree(inode->i_link);
        free_inode_nonrcu(inode);
}

static const struct super_operations debugfs_super_operations = {
        .statfs                = simple_statfs,
        .remount_fs        = debugfs_remount,
        .show_options        = debugfs_show_options,
        .free_inode        = debugfs_free_inode,
};

static void debugfs_release_dentry(struct dentry *dentry)
{
        struct debugfs_fsdata *fsd = dentry->d_fsdata;

        if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
                return;

        /* check it wasn't a dir (no fsdata) or automount (no real_fops) */
        if (fsd && fsd->real_fops) {
                WARN_ON(!list_empty(&fsd->cancellations));
                mutex_destroy(&fsd->cancellations_mtx);
        }

        kfree(fsd);
}

static struct vfsmount *debugfs_automount(struct path *path)
{
        struct debugfs_fsdata *fsd = path->dentry->d_fsdata;

        return fsd->automount(path->dentry, d_inode(path->dentry)->i_private);
}

static const struct dentry_operations debugfs_dops = {
        .d_delete = always_delete_dentry,
        .d_release = debugfs_release_dentry,
        .d_automount = debugfs_automount,
};

static int debug_fill_super(struct super_block *sb, void *data, int silent)
{
        static const struct tree_descr debug_files[] = {{""}};
        struct debugfs_fs_info *fsi;
        int err;

        fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
        sb->s_fs_info = fsi;
        if (!fsi) {
                err = -ENOMEM;
                goto fail;
        }

        err = debugfs_parse_options(data, &fsi->mount_opts);
        if (err)
                goto fail;

        err  =  simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
        if (err)
                goto fail;

        sb->s_op = &debugfs_super_operations;
        sb->s_d_op = &debugfs_dops;

        debugfs_apply_options(sb);

        return 0;

fail:
        kfree(fsi);
        sb->s_fs_info = NULL;
        return err;
}

static struct dentry *debug_mount(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
                        void *data)
{
        if (!(debugfs_allow & DEBUGFS_ALLOW_API))
                return ERR_PTR(-EPERM);

        return mount_single(fs_type, flags, data, debug_fill_super);
}

static struct file_system_type debug_fs_type = {
        .owner =        THIS_MODULE,
        .name =                "debugfs",
        .mount =        debug_mount,
        .kill_sb =        kill_litter_super,
};
MODULE_ALIAS_FS("debugfs");

/**
 * debugfs_lookup() - look up an existing debugfs file
 * @name: a pointer to a string containing the name of the file to look up.
 * @parent: a pointer to the parent dentry of the file.
 *
 * This function will return a pointer to a dentry if it succeeds.  If the file
 * doesn't exist or an error occurs, %NULL will be returned.  The returned
 * dentry must be passed to dput() when it is no longer needed.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 */
struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
{
        struct dentry *dentry;

        if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
                return NULL;

        if (!parent)
                parent = debugfs_mount->mnt_root;

        dentry = lookup_positive_unlocked(name, parent, strlen(name));
        if (IS_ERR(dentry))
                return NULL;
        return dentry;
}
EXPORT_SYMBOL_GPL(debugfs_lookup);

static struct dentry *start_creating(const char *name, struct dentry *parent)
{
        struct dentry *dentry;
        int error;

        if (!(debugfs_allow & DEBUGFS_ALLOW_API))
                return ERR_PTR(-EPERM);

        if (!debugfs_initialized())
                return ERR_PTR(-ENOENT);

        pr_debug("creating file '%s'\n", name);

        if (IS_ERR(parent))
                return parent;

        error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
                              &debugfs_mount_count);
        if (error) {
                pr_err("Unable to pin filesystem for file '%s'\n", name);
                return ERR_PTR(error);
        }

        /* If the parent is not specified, we create it in the root.
         * We need the root dentry to do this, which is in the super
         * block. A pointer to that is in the struct vfsmount that we
         * have around.
         */
        if (!parent)
                parent = debugfs_mount->mnt_root;

        inode_lock(d_inode(parent));
        if (unlikely(IS_DEADDIR(d_inode(parent))))
                dentry = ERR_PTR(-ENOENT);
        else
                dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                if (d_is_dir(dentry))
                        pr_err("Directory '%s' with parent '%s' already present!\n",
                               name, parent->d_name.name);
                else
                        pr_err("File '%s' in directory '%s' already present!\n",
                               name, parent->d_name.name);
                dput(dentry);
                dentry = ERR_PTR(-EEXIST);
        }

        if (IS_ERR(dentry)) {
                inode_unlock(d_inode(parent));
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }

        return dentry;
}

static struct dentry *failed_creating(struct dentry *dentry)
{
        inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        return ERR_PTR(-ENOMEM);
}

static struct dentry *end_creating(struct dentry *dentry)
{
        inode_unlock(d_inode(dentry->d_parent));
        return dentry;
}

static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
                                struct dentry *parent, void *data,
                                const struct file_operations *proxy_fops,
                                const struct file_operations *real_fops)
{
        struct dentry *dentry;
        struct inode *inode;

        if (!(mode & S_IFMT))
                mode |= S_IFREG;
        BUG_ON(!S_ISREG(mode));
        dentry = start_creating(name, parent);

        if (IS_ERR(dentry))
                return dentry;

        if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
                failed_creating(dentry);
                return ERR_PTR(-EPERM);
        }

        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode)) {
                pr_err("out of free dentries, can not create file '%s'\n",
                       name);
                return failed_creating(dentry);
        }

        inode->i_mode = mode;
        inode->i_private = data;

        inode->i_op = &debugfs_file_inode_operations;
        inode->i_fop = proxy_fops;
        dentry->d_fsdata = (void *)((unsigned long)real_fops |
                                DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);

        d_instantiate(dentry, inode);
        fsnotify_create(d_inode(dentry->d_parent), dentry);
        return end_creating(dentry);
}

/**
 * debugfs_create_file - create a file in the debugfs filesystem
 * @name: a pointer to a string containing the name of the file to create.
 * @mode: the permission that the file should have.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is NULL, then the
 *          file will be created in the root of the debugfs filesystem.
 * @data: a pointer to something that the caller will want to get to later
 *        on.  The inode.i_private pointer will point to this value on
 *        the open() call.
 * @fops: a pointer to a struct file_operations that should be used for
 *        this file.
 *
 * This is the basic "create a file" function for debugfs.  It allows for a
 * wide range of flexibility in creating a file, or a directory (if you want
 * to create a directory, the debugfs_create_dir() function is
 * recommended to be used instead.)
 *
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
 * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be
 * returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 *
 * NOTE: it's expected that most callers should _ignore_ the errors returned
 * by this function. Other debugfs functions handle the fact that the "dentry"
 * passed to them could be an error and they don't crash in that case.
 * Drivers should generally work fine even if debugfs fails to init anyway.
 */
struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
                                   const struct file_operations *fops)
{

        return __debugfs_create_file(name, mode, parent, data,
                                fops ? &debugfs_full_proxy_file_operations :
                                        &debugfs_noop_file_operations,
                                fops);
}
EXPORT_SYMBOL_GPL(debugfs_create_file);

/**
 * debugfs_create_file_unsafe - create a file in the debugfs filesystem
 * @name: a pointer to a string containing the name of the file to create.
 * @mode: the permission that the file should have.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is NULL, then the
 *          file will be created in the root of the debugfs filesystem.
 * @data: a pointer to something that the caller will want to get to later
 *        on.  The inode.i_private pointer will point to this value on
 *        the open() call.
 * @fops: a pointer to a struct file_operations that should be used for
 *        this file.
 *
 * debugfs_create_file_unsafe() is completely analogous to
 * debugfs_create_file(), the only difference being that the fops
 * handed it will not get protected against file removals by the
 * debugfs core.
 *
 * It is your responsibility to protect your struct file_operation
 * methods against file removals by means of debugfs_file_get()
 * and debugfs_file_put(). ->open() is still protected by
 * debugfs though.
 *
 * Any struct file_operations defined by means of
 * DEFINE_DEBUGFS_ATTRIBUTE() is protected against file removals and
 * thus, may be used here.
 */
struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
                                   const struct file_operations *fops)
{

        return __debugfs_create_file(name, mode, parent, data,
                                fops ? &debugfs_open_proxy_file_operations :
                                        &debugfs_noop_file_operations,
                                fops);
}
EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe);

/**
 * debugfs_create_file_size - create a file in the debugfs filesystem
 * @name: a pointer to a string containing the name of the file to create.
 * @mode: the permission that the file should have.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is NULL, then the
 *          file will be created in the root of the debugfs filesystem.
 * @data: a pointer to something that the caller will want to get to later
 *        on.  The inode.i_private pointer will point to this value on
 *        the open() call.
 * @fops: a pointer to a struct file_operations that should be used for
 *        this file.
 * @file_size: initial file size
 *
 * This is the basic "create a file" function for debugfs.  It allows for a
 * wide range of flexibility in creating a file, or a directory (if you want
 * to create a directory, the debugfs_create_dir() function is
 * recommended to be used instead.)
 */
void debugfs_create_file_size(const char *name, umode_t mode,
                              struct dentry *parent, void *data,
                              const struct file_operations *fops,
                              loff_t file_size)
{
        struct dentry *de = debugfs_create_file(name, mode, parent, data, fops);

        if (!IS_ERR(de))
                d_inode(de)->i_size = file_size;
}
EXPORT_SYMBOL_GPL(debugfs_create_file_size);

/**
 * debugfs_create_dir - create a directory in the debugfs filesystem
 * @name: a pointer to a string containing the name of the directory to
 *        create.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is NULL, then the
 *          directory will be created in the root of the debugfs filesystem.
 *
 * This function creates a directory in debugfs with the given name.
 *
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
 * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be
 * returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 *
 * NOTE: it's expected that most callers should _ignore_ the errors returned
 * by this function. Other debugfs functions handle the fact that the "dentry"
 * passed to them could be an error and they don't crash in that case.
 * Drivers should generally work fine even if debugfs fails to init anyway.
 */
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
{
        struct dentry *dentry = start_creating(name, parent);
        struct inode *inode;

        if (IS_ERR(dentry))
                return dentry;

        if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
                failed_creating(dentry);
                return ERR_PTR(-EPERM);
        }

        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode)) {
                pr_err("out of free dentries, can not create directory '%s'\n",
                       name);
                return failed_creating(dentry);
        }

        inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
        inode->i_op = &debugfs_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;

        /* directory inodes start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
        d_instantiate(dentry, inode);
        inc_nlink(d_inode(dentry->d_parent));
        fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
        return end_creating(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_create_dir);

/**
 * debugfs_create_automount - create automount point in the debugfs filesystem
 * @name: a pointer to a string containing the name of the file to create.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is NULL, then the
 *          file will be created in the root of the debugfs filesystem.
 * @f: function to be called when pathname resolution steps on that one.
 * @data: opaque argument to pass to f().
 *
 * @f should return what ->d_automount() would.
 */
struct dentry *debugfs_create_automount(const char *name,
                                        struct dentry *parent,
                                        debugfs_automount_t f,
                                        void *data)
{
        struct dentry *dentry = start_creating(name, parent);
        struct debugfs_fsdata *fsd;
        struct inode *inode;

        if (IS_ERR(dentry))
                return dentry;

        fsd = kzalloc(sizeof(*fsd), GFP_KERNEL);
        if (!fsd) {
                failed_creating(dentry);
                return ERR_PTR(-ENOMEM);
        }

        fsd->automount = f;

        if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
                failed_creating(dentry);
                kfree(fsd);
                return ERR_PTR(-EPERM);
        }

        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode)) {
                pr_err("out of free dentries, can not create automount '%s'\n",
                       name);
                kfree(fsd);
                return failed_creating(dentry);
        }

        make_empty_dir_inode(inode);
        inode->i_flags |= S_AUTOMOUNT;
        inode->i_private = data;
        dentry->d_fsdata = fsd;
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
        d_instantiate(dentry, inode);
        inc_nlink(d_inode(dentry->d_parent));
        fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
        return end_creating(dentry);
}
EXPORT_SYMBOL(debugfs_create_automount);

/**
 * debugfs_create_symlink- create a symbolic link in the debugfs filesystem
 * @name: a pointer to a string containing the name of the symbolic link to
 *        create.
 * @parent: a pointer to the parent dentry for this symbolic link.  This
 *          should be a directory dentry if set.  If this parameter is NULL,
 *          then the symbolic link will be created in the root of the debugfs
 *          filesystem.
 * @target: a pointer to a string containing the path to the target of the
 *          symbolic link.
 *
 * This function creates a symbolic link with the given name in debugfs that
 * links to the given target path.
 *
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the symbolic
 * link is to be removed (no automatic cleanup happens if your module is
 * unloaded, you are responsible here.)  If an error occurs, ERR_PTR(-ERROR)
 * will be returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 */
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
                                      const char *target)
{
        struct dentry *dentry;
        struct inode *inode;
        char *link = kstrdup(target, GFP_KERNEL);
        if (!link)
                return ERR_PTR(-ENOMEM);

        dentry = start_creating(name, parent);
        if (IS_ERR(dentry)) {
                kfree(link);
                return dentry;
        }

        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode)) {
                pr_err("out of free dentries, can not create symlink '%s'\n",
                       name);
                kfree(link);
                return failed_creating(dentry);
        }
        inode->i_mode = S_IFLNK | S_IRWXUGO;
        inode->i_op = &debugfs_symlink_inode_operations;
        inode->i_link = link;
        d_instantiate(dentry, inode);
        return end_creating(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);

static void __debugfs_file_removed(struct dentry *dentry)
{
        struct debugfs_fsdata *fsd;

        /*
         * Paired with the closing smp_mb() implied by a successful
         * cmpxchg() in debugfs_file_get(): either
         * debugfs_file_get() must see a dead dentry or we must see a
         * debugfs_fsdata instance at ->d_fsdata here (or both).
         */
        smp_mb();
        fsd = READ_ONCE(dentry->d_fsdata);
        if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
                return;

        /* if this was the last reference, we're done */
        if (refcount_dec_and_test(&fsd->active_users))
                return;

        /*
         * If there's still a reference, the code that obtained it can
         * be in different states:
         *  - The common case of not using cancellations, or already
         *    after debugfs_leave_cancellation(), where we just need
         *    to wait for debugfs_file_put() which signals the completion;
         *  - inside a cancellation section, i.e. between
         *    debugfs_enter_cancellation() and debugfs_leave_cancellation(),
         *    in which case we need to trigger the ->cancel() function,
         *    and then wait for debugfs_file_put() just like in the
         *    previous case;
         *  - before debugfs_enter_cancellation() (but obviously after
         *    debugfs_file_get()), in which case we may not see the
         *    cancellation in the list on the first round of the loop,
         *    but debugfs_enter_cancellation() signals the completion
         *    after adding it, so this code gets woken up to call the
         *    ->cancel() function.
         */
        while (refcount_read(&fsd->active_users)) {
                struct debugfs_cancellation *c;

                /*
                 * Lock the cancellations. Note that the cancellations
                 * structs are meant to be on the stack, so we need to
                 * ensure we either use them here or don't touch them,
                 * and debugfs_leave_cancellation() will wait for this
                 * to be finished processing before exiting one. It may
                 * of course win and remove the cancellation, but then
                 * chances are we never even got into this bit, we only
                 * do if the refcount isn't zero already.
                 */
                mutex_lock(&fsd->cancellations_mtx);
                while ((c = list_first_entry_or_null(&fsd->cancellations,
                                                     typeof(*c), list))) {
                        list_del_init(&c->list);
                        c->cancel(dentry, c->cancel_data);
                }
                mutex_unlock(&fsd->cancellations_mtx);

                wait_for_completion(&fsd->active_users_drained);
        }
}

static void remove_one(struct dentry *victim)
{
        if (d_is_reg(victim))
                __debugfs_file_removed(victim);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}

/**
 * debugfs_remove - recursively removes a directory
 * @dentry: a pointer to a the dentry of the directory to be removed.  If this
 *          parameter is NULL or an error value, nothing will be done.
 *
 * This function recursively removes a directory tree in debugfs that
 * was previously created with a call to another debugfs function
 * (like debugfs_create_file() or variants thereof.)
 *
 * This function is required to be called in order for the file to be
 * removed, no automatic cleanup of files will happen when a module is
 * removed, you are responsible here.
 */
void debugfs_remove(struct dentry *dentry)
{
        if (IS_ERR_OR_NULL(dentry))
                return;

        simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count);
        simple_recursive_removal(dentry, remove_one);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
EXPORT_SYMBOL_GPL(debugfs_remove);

/**
 * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it
 * @name: a pointer to a string containing the name of the item to look up.
 * @parent: a pointer to the parent dentry of the item.
 *
 * This is the equlivant of doing something like
 * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting
 * handled for the directory being looked up.
 */
void debugfs_lookup_and_remove(const char *name, struct dentry *parent)
{
        struct dentry *dentry;

        dentry = debugfs_lookup(name, parent);
        if (!dentry)
                return;

        debugfs_remove(dentry);
        dput(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove);

/**
 * debugfs_rename - rename a file/directory in the debugfs filesystem
 * @old_dir: a pointer to the parent dentry for the renamed object. This
 *          should be a directory dentry.
 * @old_dentry: dentry of an object to be renamed.
 * @new_dir: a pointer to the parent dentry where the object should be
 *          moved. This should be a directory dentry.
 * @new_name: a pointer to a string containing the target name.
 *
 * This function renames a file/directory in debugfs.  The target must not
 * exist for rename to succeed.
 *
 * This function will return a pointer to old_dentry (which is updated to
 * reflect renaming) if it succeeds. If an error occurs, ERR_PTR(-ERROR)
 * will be returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 */
struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
                struct dentry *new_dir, const char *new_name)
{
        int error;
        struct dentry *dentry = NULL, *trap;
        struct name_snapshot old_name;

        if (IS_ERR(old_dir))
                return old_dir;
        if (IS_ERR(new_dir))
                return new_dir;
        if (IS_ERR_OR_NULL(old_dentry))
                return old_dentry;

        trap = lock_rename(new_dir, old_dir);
        /* Source or destination directories don't exist? */
        if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
                goto exit;
        /* Source does not exist, cyclic rename, or mountpoint? */
        if (d_really_is_negative(old_dentry) || old_dentry == trap ||
            d_mountpoint(old_dentry))
                goto exit;
        dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
        /* Lookup failed, cyclic rename or target exists? */
        if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
                goto exit;

        take_dentry_name_snapshot(&old_name, old_dentry);

        error = simple_rename(&nop_mnt_idmap, d_inode(old_dir), old_dentry,
                              d_inode(new_dir), dentry, 0);
        if (error) {
                release_dentry_name_snapshot(&old_name);
                goto exit;
        }
        d_move(old_dentry, dentry);
        fsnotify_move(d_inode(old_dir), d_inode(new_dir), &old_name.name,
                d_is_dir(old_dentry),
                NULL, old_dentry);
        release_dentry_name_snapshot(&old_name);
        unlock_rename(new_dir, old_dir);
        dput(dentry);
        return old_dentry;
exit:
        if (dentry && !IS_ERR(dentry))
                dput(dentry);
        unlock_rename(new_dir, old_dir);
        if (IS_ERR(dentry))
                return dentry;
        return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(debugfs_rename);

/**
 * debugfs_initialized - Tells whether debugfs has been registered
 */
bool debugfs_initialized(void)
{
        return debugfs_registered;
}
EXPORT_SYMBOL_GPL(debugfs_initialized);

static int __init debugfs_kernel(char *str)
{
        if (str) {
                if (!strcmp(str, "on"))
                        debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT;
                else if (!strcmp(str, "no-mount"))
                        debugfs_allow = DEBUGFS_ALLOW_API;
                else if (!strcmp(str, "off"))
                        debugfs_allow = 0;
        }

        return 0;
}
early_param("debugfs", debugfs_kernel);
static int __init debugfs_init(void)
{
        int retval;

        if (!(debugfs_allow & DEBUGFS_ALLOW_MOUNT))
                return -EPERM;

        retval = sysfs_create_mount_point(kernel_kobj, "debug");
        if (retval)
                return retval;

        retval = register_filesystem(&debug_fs_type);
        if (retval)
                sysfs_remove_mount_point(kernel_kobj, "debug");
        else
                debugfs_registered = true;

        return retval;
}
core_initcall(debugfs_init);






























































































































































































































































































































































































   79 


   79 



































   69 


   70 














   70 


   89 

   69 
   89 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * AppArmor security module
 *
 * This file contains AppArmor label definitions
 *
 * Copyright 2017 Canonical Ltd.
 */

#ifndef __AA_LABEL_H
#define __AA_LABEL_H

#include <linux/atomic.h>
#include <linux/audit.h>
#include <linux/rbtree.h>
#include <linux/rcupdate.h>

#include "apparmor.h"
#include "lib.h"

struct aa_ns;

#define LOCAL_VEC_ENTRIES 8
#define DEFINE_VEC(T, V)                                                \
        struct aa_ ## T *(_ ## V ## _localtmp)[LOCAL_VEC_ENTRIES];        \
        struct aa_ ## T **(V)

#define vec_setup(T, V, N, GFP)                                                \
({                                                                        \
        if ((N) <= LOCAL_VEC_ENTRIES) {                                        \
                typeof(N) i;                                                \
                (V) = (_ ## V ## _localtmp);                                \
                for (i = 0; i < (N); i++)                                \
                        (V)[i] = NULL;                                        \
        } else                                                                \
                (V) = kzalloc(sizeof(struct aa_ ## T *) * (N), (GFP));        \
        (V) ? 0 : -ENOMEM;                                                \
})

#define vec_cleanup(T, V, N)                                                \
do {                                                                        \
        int i;                                                                \
        for (i = 0; i < (N); i++) {                                        \
                if (!IS_ERR_OR_NULL((V)[i]))                                \
                        aa_put_ ## T((V)[i]);                                \
        }                                                                \
        if ((V) != _ ## V ## _localtmp)                                        \
                kfree(V);                                                \
} while (0)

#define vec_last(VEC, SIZE) ((VEC)[(SIZE) - 1])
#define vec_ns(VEC, SIZE) (vec_last((VEC), (SIZE))->ns)
#define vec_labelset(VEC, SIZE) (&vec_ns((VEC), (SIZE))->labels)
#define cleanup_domain_vec(V, L) cleanup_label_vec((V), (L)->size)

struct aa_profile;
#define VEC_FLAG_TERMINATE 1
int aa_vec_unique(struct aa_profile **vec, int n, int flags);
struct aa_label *aa_vec_find_or_create_label(struct aa_profile **vec, int len,
                                             gfp_t gfp);
#define aa_sort_and_merge_vec(N, V) \
        aa_sort_and_merge_profiles((N), (struct aa_profile **)(V))


/* struct aa_labelset - set of labels for a namespace
 *
 * Labels are reference counted; aa_labelset does not contribute to label
 * reference counts. Once a label's last refcount is put it is removed from
 * the set.
 */
struct aa_labelset {
        rwlock_t lock;

        struct rb_root root;
};

#define __labelset_for_each(LS, N) \
        for ((N) = rb_first(&(LS)->root); (N); (N) = rb_next(N))

enum label_flags {
        FLAG_HAT = 1,                        /* profile is a hat */
        FLAG_UNCONFINED = 2,                /* label unconfined only if all */
        FLAG_NULL = 4,                        /* profile is null learning profile */
        FLAG_IX_ON_NAME_ERROR = 8,        /* fallback to ix on name lookup fail */
        FLAG_IMMUTIBLE = 0x10,                /* don't allow changes/replacement */
        FLAG_USER_DEFINED = 0x20,        /* user based profile - lower privs */
        FLAG_NO_LIST_REF = 0x40,        /* list doesn't keep profile ref */
        FLAG_NS_COUNT = 0x80,                /* carries NS ref count */
        FLAG_IN_TREE = 0x100,                /* label is in tree */
        FLAG_PROFILE = 0x200,                /* label is a profile */
        FLAG_EXPLICIT = 0x400,                /* explicit static label */
        FLAG_STALE = 0x800,                /* replaced/removed */
        FLAG_RENAMED = 0x1000,                /* label has renaming in it */
        FLAG_REVOKED = 0x2000,                /* label has revocation in it */
        FLAG_DEBUG1 = 0x4000,
        FLAG_DEBUG2 = 0x8000,

        /* These flags must correspond with PATH_flags */
        /* TODO: add new path flags */
};

struct aa_label;
struct aa_proxy {
        struct kref count;
        struct aa_label __rcu *label;
};

struct label_it {
        int i, j;
};

/* struct aa_label - lazy labeling struct
 * @count: ref count of active users
 * @node: rbtree position
 * @rcu: rcu callback struct
 * @proxy: is set to the label that replaced this label
 * @hname: text representation of the label (MAYBE_NULL)
 * @flags: stale and other flags - values may change under label set lock
 * @secid: secid that references this label
 * @size: number of entries in @ent[]
 * @ent: set of profiles for label, actual size determined by @size
 */
struct aa_label {
        struct kref count;
        struct rb_node node;
        struct rcu_head rcu;
        struct aa_proxy *proxy;
        __counted char *hname;
        long flags;
        u32 secid;
        int size;
        struct aa_profile *vec[];
};

#define last_error(E, FN)                                \
do {                                                        \
        int __subE = (FN);                                \
        if (__subE)                                        \
                (E) = __subE;                                \
} while (0)

#define label_isprofile(X) ((X)->flags & FLAG_PROFILE)
#define label_unconfined(X) ((X)->flags & FLAG_UNCONFINED)
#define unconfined(X) label_unconfined(X)
#define label_is_stale(X) ((X)->flags & FLAG_STALE)
#define __label_make_stale(X) ((X)->flags |= FLAG_STALE)
#define labels_ns(X) (vec_ns(&((X)->vec[0]), (X)->size))
#define labels_set(X) (&labels_ns(X)->labels)
#define labels_view(X) labels_ns(X)
#define labels_profile(X) ((X)->vec[(X)->size - 1])


int aa_label_next_confined(struct aa_label *l, int i);

/* for each profile in a label */
#define label_for_each(I, L, P)                                                \
        for ((I).i = 0; ((P) = (L)->vec[(I).i]); ++((I).i))

/* assumes break/goto ended label_for_each */
#define label_for_each_cont(I, L, P)                                        \
        for (++((I).i); ((P) = (L)->vec[(I).i]); ++((I).i))

#define next_comb(I, L1, L2)                                                \
do {                                                                        \
        (I).j++;                                                        \
        if ((I).j >= (L2)->size) {                                        \
                (I).i++;                                                \
                (I).j = 0;                                                \
        }                                                                \
} while (0)


/* for each combination of P1 in L1, and P2 in L2 */
#define label_for_each_comb(I, L1, L2, P1, P2)                                \
for ((I).i = (I).j = 0;                                                        \
        ((P1) = (L1)->vec[(I).i]) && ((P2) = (L2)->vec[(I).j]);                \
        (I) = next_comb(I, L1, L2))

#define fn_for_each_comb(L1, L2, P1, P2, FN)                                \
({                                                                        \
        struct label_it i;                                                \
        int __E = 0;                                                        \
        label_for_each_comb(i, (L1), (L2), (P1), (P2)) {                \
                last_error(__E, (FN));                                        \
        }                                                                \
        __E;                                                                \
})

/* for each profile that is enforcing confinement in a label */
#define label_for_each_confined(I, L, P)                                \
        for ((I).i = aa_label_next_confined((L), 0);                        \
             ((P) = (L)->vec[(I).i]);                                        \
             (I).i = aa_label_next_confined((L), (I).i + 1))

#define label_for_each_in_merge(I, A, B, P)                                \
        for ((I).i = (I).j = 0;                                                \
             ((P) = aa_label_next_in_merge(&(I), (A), (B)));                \
             )

#define label_for_each_not_in_set(I, SET, SUB, P)                        \
        for ((I).i = (I).j = 0;                                                \
             ((P) = __aa_label_next_not_in_set(&(I), (SET), (SUB)));        \
             )

#define next_in_ns(i, NS, L)                                                \
({                                                                        \
        typeof(i) ___i = (i);                                                \
        while ((L)->vec[___i] && (L)->vec[___i]->ns != (NS))                \
                (___i)++;                                                \
        (___i);                                                                \
})

#define label_for_each_in_ns(I, NS, L, P)                                \
        for ((I).i = next_in_ns(0, (NS), (L));                                \
             ((P) = (L)->vec[(I).i]);                                        \
             (I).i = next_in_ns((I).i + 1, (NS), (L)))

#define fn_for_each_in_ns(L, P, FN)                                        \
({                                                                        \
        struct label_it __i;                                                \
        struct aa_ns *__ns = labels_ns(L);                                \
        int __E = 0;                                                        \
        label_for_each_in_ns(__i, __ns, (L), (P)) {                        \
                last_error(__E, (FN));                                        \
        }                                                                \
        __E;                                                                \
})


#define fn_for_each_XXX(L, P, FN, ...)                                        \
({                                                                        \
        struct label_it i;                                                \
        int __E = 0;                                                        \
        label_for_each ## __VA_ARGS__(i, (L), (P)) {                        \
                last_error(__E, (FN));                                        \
        }                                                                \
        __E;                                                                \
})

#define fn_for_each(L, P, FN) fn_for_each_XXX(L, P, FN)
#define fn_for_each_confined(L, P, FN) fn_for_each_XXX(L, P, FN, _confined)

#define fn_for_each2_XXX(L1, L2, P, FN, ...)                                \
({                                                                        \
        struct label_it i;                                                \
        int __E = 0;                                                        \
        label_for_each ## __VA_ARGS__(i, (L1), (L2), (P)) {                \
                last_error(__E, (FN));                                        \
        }                                                                \
        __E;                                                                \
})

#define fn_for_each_in_merge(L1, L2, P, FN)                                \
        fn_for_each2_XXX((L1), (L2), P, FN, _in_merge)
#define fn_for_each_not_in_set(L1, L2, P, FN)                                \
        fn_for_each2_XXX((L1), (L2), P, FN, _not_in_set)

#define LABEL_MEDIATES(L, C)                                                \
({                                                                        \
        struct aa_profile *profile;                                        \
        struct label_it i;                                                \
        int ret = 0;                                                        \
        label_for_each(i, (L), profile) {                                \
                if (RULE_MEDIATES(&profile->rules, (C))) {                \
                        ret = 1;                                        \
                        break;                                                \
                }                                                        \
        }                                                                \
        ret;                                                                \
})


void aa_labelset_destroy(struct aa_labelset *ls);
void aa_labelset_init(struct aa_labelset *ls);
void __aa_labelset_update_subtree(struct aa_ns *ns);

void aa_label_destroy(struct aa_label *label);
void aa_label_free(struct aa_label *label);
void aa_label_kref(struct kref *kref);
bool aa_label_init(struct aa_label *label, int size, gfp_t gfp);
struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp);

bool aa_label_is_subset(struct aa_label *set, struct aa_label *sub);
bool aa_label_is_unconfined_subset(struct aa_label *set, struct aa_label *sub);
struct aa_profile *__aa_label_next_not_in_set(struct label_it *I,
                                             struct aa_label *set,
                                             struct aa_label *sub);
bool aa_label_remove(struct aa_label *label);
struct aa_label *aa_label_insert(struct aa_labelset *ls, struct aa_label *l);
bool aa_label_replace(struct aa_label *old, struct aa_label *new);
bool aa_label_make_newest(struct aa_labelset *ls, struct aa_label *old,
                          struct aa_label *new);

struct aa_label *aa_label_find(struct aa_label *l);

struct aa_profile *aa_label_next_in_merge(struct label_it *I,
                                          struct aa_label *a,
                                          struct aa_label *b);
struct aa_label *aa_label_find_merge(struct aa_label *a, struct aa_label *b);
struct aa_label *aa_label_merge(struct aa_label *a, struct aa_label *b,
                                gfp_t gfp);


bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp);

#define FLAGS_NONE 0
#define FLAG_SHOW_MODE 1
#define FLAG_VIEW_SUBNS 2
#define FLAG_HIDDEN_UNCONFINED 4
#define FLAG_ABS_ROOT 8
int aa_label_snxprint(char *str, size_t size, struct aa_ns *view,
                      struct aa_label *label, int flags);
int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label,
                      int flags, gfp_t gfp);
int aa_label_acntsxprint(char __counted **strp, struct aa_ns *ns,
                         struct aa_label *label, int flags, gfp_t gfp);
void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
                     struct aa_label *label, int flags, gfp_t gfp);
void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
                         struct aa_label *label, int flags, gfp_t gfp);
void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
                      gfp_t gfp);
void aa_label_audit(struct audit_buffer *ab, struct aa_label *label, gfp_t gfp);
void aa_label_seq_print(struct seq_file *f, struct aa_label *label, gfp_t gfp);
void aa_label_printk(struct aa_label *label, gfp_t gfp);

struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str,
                                     size_t n, gfp_t gfp, bool create,
                                     bool force_stack);
struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
                                gfp_t gfp, bool create, bool force_stack);

static inline const char *aa_label_strn_split(const char *str, int n)
{
        const char *pos;
        aa_state_t state;

        state = aa_dfa_matchn_until(stacksplitdfa, DFA_START, str, n, &pos);
        if (!ACCEPT_TABLE(stacksplitdfa)[state])
                return NULL;

        return pos - 3;
}

static inline const char *aa_label_str_split(const char *str)
{
        const char *pos;
        aa_state_t state;

        state = aa_dfa_match_until(stacksplitdfa, DFA_START, str, &pos);
        if (!ACCEPT_TABLE(stacksplitdfa)[state])
                return NULL;

        return pos - 3;
}



struct aa_perms;
struct aa_ruleset;
int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules,
                   struct aa_label *label, aa_state_t state, bool subns,
                   u32 request, struct aa_perms *perms);


/**
 * __aa_get_label - get a reference count to uncounted label reference
 * @l: reference to get a count on
 *
 * Returns: pointer to reference OR NULL if race is lost and reference is
 *          being repeated.
 * Requires: lock held, and the return code MUST be checked
 */
static inline struct aa_label *__aa_get_label(struct aa_label *l)
{
        if (l && kref_get_unless_zero(&l->count))
                return l;

        return NULL;
}

static inline struct aa_label *aa_get_label(struct aa_label *l)
{
        if (l)
                kref_get(&(l->count));

        return l;
}


/**
 * aa_get_label_rcu - increment refcount on a label that can be replaced
 * @l: pointer to label that can be replaced (NOT NULL)
 *
 * Returns: pointer to a refcounted label.
 *     else NULL if no label
 */
static inline struct aa_label *aa_get_label_rcu(struct aa_label __rcu **l)
{
        struct aa_label *c;

        rcu_read_lock();
        do {
                c = rcu_dereference(*l);
        } while (c && !kref_get_unless_zero(&c->count));
        rcu_read_unlock();

        return c;
}

/**
 * aa_get_newest_label - find the newest version of @l
 * @l: the label to check for newer versions of
 *
 * Returns: refcounted newest version of @l taking into account
 *          replacement, renames and removals
 *          return @l.
 */
static inline struct aa_label *aa_get_newest_label(struct aa_label *l)
{
        if (!l)
                return NULL;

        if (label_is_stale(l)) {
                struct aa_label *tmp;

                AA_BUG(!l->proxy);
                AA_BUG(!l->proxy->label);
                /* BUG: only way this can happen is @l ref count and its
                 * replacement count have gone to 0 and are on their way
                 * to destruction. ie. we have a refcounting error
                 */
                tmp = aa_get_label_rcu(&l->proxy->label);
                AA_BUG(!tmp);

                return tmp;
        }

        return aa_get_label(l);
}

static inline void aa_put_label(struct aa_label *l)
{
        if (l)
                kref_put(&l->count, aa_label_kref);
}


struct aa_proxy *aa_alloc_proxy(struct aa_label *l, gfp_t gfp);
void aa_proxy_kref(struct kref *kref);

static inline struct aa_proxy *aa_get_proxy(struct aa_proxy *proxy)
{
        if (proxy)
                kref_get(&(proxy->count));

        return proxy;
}

static inline void aa_put_proxy(struct aa_proxy *proxy)
{
        if (proxy)
                kref_put(&proxy->count, aa_proxy_kref);
}

void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new);

#endif /* __AA_LABEL_H */







































































































































   74 



   74 







   74 














   73 



























   73 










   73 

   73 














   74 









































   71 


   71 


   72 


   71 



   72 



































































   73 




   73 
   72 



















































   73 







   74 



    9 




   73 









    9 

    2 







    8 
    7 





















   73 
   19 



   19 



   74 













   49 




















   74 




   74 







   49 







   48 



   74 
   73 


   73 



   73 











   64 
   66 











   72 










































   73 







   73 









   72 




   70 





































   71 









   73 

   74 




   71 
   70 

   72 


   72 
   30 





   71 







   74 






   73 



   74 











   73 


   73 

   73 
   15 












   74 

   31 



























   73 



   73 

   73 

   74 

   74 
   73 









































































   74 

















   67 







   56 

   56 
   56 




   67 




















   67 
   66 
















   73 









   72 



   74 

   73 





   73 




   70 

   72 




   73 
   74 



   74 

   74 











   72 
   72 




































   17 

   17 
   15 













































































   17 








































































































































    4 
    4 





























































































































































































   29 



   29 
   19 
















































   69 


   74 







   73 






   73 
   74 

   74 


   74 










   73 






   72 





   71 
   74 








   29 







   11 
































































   74 








   73 

   11 








   73 


   74 








   15 


   15 
















    3 






    3 


    3 











    3 

    3 























   74 




   73 


   14 




   17 


   17 



























   17 






   17 





   17 




   17 
   17 


   17 


   17 
   15 

   15 



   17 



   17 
















   17 






   17 



   73 















   74 

   11 

   74 

   17 
   17 

   73 
   72 

   73 


   19 



   72 




   73 

   17 





   17 






    9 

    9 
















    9 
    9 



    9 















   15 

   15 












   15 














   17 



   17 




   17 
    9 

   15 
   17 

   17 



   17 













   17 










   74 
   17 

   17 

   74 


   72 
    3 



   73 

   74 






































































   29 
  295 






































  295 




  295 


  296 



  270 

  295 










































   25 


   74 





   74 


































































   73 
    4 
   69 
   11 
   73 
   73 
   73 











   72 
   74 



   74 


   73 

   17 

   17 







   72 










   74 











   73 

   74 


   72 
   71 









   73 

   74 
    1 
    1 





   73 













   74 


   73 




   74 
   73 



   72 


   74 
   72 


















   73 


   72 
   74 



















































   72 










    3 
    1 

    3 













    4 








    4 
    3 

    4 




    2 

    2 
    1 


    1 



    4 


















































































































































































































































































































































































































































































































































































































































































    1 



































































   55 
   55 





   64 






   64 










   55 




   55 

   54 


   11 




   63 






   65 
    1 

    1 




   65 








    6 
    6 



    6 

    6 



    4 


















    1 


    1 




    1 











































































   13 










   13 


   13 
    8 



   13 


    4 








   12 













    8 
    3 
    9 

    1 

    1 

    1 






    9 






    9 
    8 


    9 










    9 
    1 

    1 





    1 



    9 













   71 













   69 
   65 
    1 

   64 


   65 


    8 



    6 
    6 


    6 

    6 



   13 
   11 







    5 

    8 
   12 
   13 
    1 

   13 

    8 


   11 




   13 
    1 





   69 
   17 
   69 

   22 










   66 





   66 



   67 
   65 
   67 

    5 

    4 

    4 




   66 



   65 


    1 

   64 
    6 




   64 
   65 
   63 

   51 
   51 
    6 
   16 



   16 
    4 































































































    1 




















    4 


    1 
    1 


    3 








   74 



   72 
    1 
   74 
    4 

   70 
   71 
   71 

   70 
   67 
   65 

    5 
   51 




   25 
   71 





   25 





   74 



   73 



   72 

   72 
   72 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/namei.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * Some corrections by tytso.
 */

/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
 * lookup logic.
 */
/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
 */

#include <linux/init.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/wordpart.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/device_cgroup.h>
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
#include <linux/bitops.h>
#include <linux/init_task.h>
#include <linux/uaccess.h>

#include "internal.h"
#include "mount.h"

/* [Feb-1997 T. Schoebel-Theuer]
 * Fundamental changes in the pathname lookup mechanisms (namei)
 * were necessary because of omirr.  The reason is that omirr needs
 * to know the _real_ pathname, not the user-supplied one, in case
 * of symlinks (and also when transname replacements occur).
 *
 * The new code replaces the old recursive symlink resolution with
 * an iterative one (in case of non-nested symlink chains).  It does
 * this with calls to <fs>_follow_link().
 * As a side effect, dir_namei(), _namei() and follow_link() are now 
 * replaced with a single function lookup_dentry() that can handle all 
 * the special cases of the former code.
 *
 * With the new dcache, the pathname is stored at each inode, at least as
 * long as the refcount of the inode is positive.  As a side effect, the
 * size of the dcache depends on the inode cache and thus is dynamic.
 *
 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
 * resolution to correspond with current state of the code.
 *
 * Note that the symlink resolution is not *completely* iterative.
 * There is still a significant amount of tail- and mid- recursion in
 * the algorithm.  Also, note that <fs>_readlink() is not used in
 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
 * may return different results than <fs>_follow_link().  Many virtual
 * filesystems (including /proc) exhibit this behavior.
 */

/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
 * and the name already exists in form of a symlink, try to create the new
 * name indicated by the symlink. The old code always complained that the
 * name already exists, due to not following the symlink even if its target
 * is nonexistent.  The new semantics affects also mknod() and link() when
 * the name is a symlink pointing to a non-existent name.
 *
 * I don't know which semantics is the right one, since I have no access
 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
 * "old" one. Personally, I think the new semantics is much more logical.
 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
 * file does succeed in both HP-UX and SunOs, but not in Solaris
 * and in the old Linux semantics.
 */

/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
 * semantics.  See the comments in "open_namei" and "do_link" below.
 *
 * [10-Sep-98 Alan Modra] Another symlink change.
 */

/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
 *        inside the path - always follow.
 *        in the last component in creation/removal/renaming - never follow.
 *        if LOOKUP_FOLLOW passed - follow.
 *        if the pathname has trailing slashes - follow.
 *        otherwise - don't follow.
 * (applied in that order).
 *
 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
 * During the 2.4 we need to fix the userland stuff depending on it -
 * hopefully we will be able to get rid of that wart in 2.5. So far only
 * XEmacs seems to be relying on it...
 */
/*
 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
 * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
 * any extra contention...
 */

/* In order to reduce some races, while at the same time doing additional
 * checking and hopefully speeding things up, we copy filenames to the
 * kernel data space before using them..
 *
 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
 * PATH_MAX includes the nul terminator --RR.
 */

#define EMBEDDED_NAME_MAX        (PATH_MAX - offsetof(struct filename, iname))

struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
        struct filename *result;
        char *kname;
        int len;

        result = audit_reusename(filename);
        if (result)
                return result;

        result = __getname();
        if (unlikely(!result))
                return ERR_PTR(-ENOMEM);

        /*
         * First, try to embed the struct filename inside the names_cache
         * allocation
         */
        kname = (char *)result->iname;
        result->name = kname;

        len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
        if (unlikely(len < 0)) {
                __putname(result);
                return ERR_PTR(len);
        }

        /*
         * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
         * separate struct filename so we can dedicate the entire
         * names_cache allocation for the pathname, and re-do the copy from
         * userland.
         */
        if (unlikely(len == EMBEDDED_NAME_MAX)) {
                const size_t size = offsetof(struct filename, iname[1]);
                kname = (char *)result;

                /*
                 * size is chosen that way we to guarantee that
                 * result->iname[0] is within the same object and that
                 * kname can't be equal to result->iname, no matter what.
                 */
                result = kzalloc(size, GFP_KERNEL);
                if (unlikely(!result)) {
                        __putname(kname);
                        return ERR_PTR(-ENOMEM);
                }
                result->name = kname;
                len = strncpy_from_user(kname, filename, PATH_MAX);
                if (unlikely(len < 0)) {
                        __putname(kname);
                        kfree(result);
                        return ERR_PTR(len);
                }
                if (unlikely(len == PATH_MAX)) {
                        __putname(kname);
                        kfree(result);
                        return ERR_PTR(-ENAMETOOLONG);
                }
        }

        atomic_set(&result->refcnt, 1);
        /* The empty path is special. */
        if (unlikely(!len)) {
                if (empty)
                        *empty = 1;
                if (!(flags & LOOKUP_EMPTY)) {
                        putname(result);
                        return ERR_PTR(-ENOENT);
                }
        }

        result->uptr = filename;
        result->aname = NULL;
        audit_getname(result);
        return result;
}

struct filename *
getname_uflags(const char __user *filename, int uflags)
{
        int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;

        return getname_flags(filename, flags, NULL);
}

struct filename *
getname(const char __user * filename)
{
        return getname_flags(filename, 0, NULL);
}

struct filename *
getname_kernel(const char * filename)
{
        struct filename *result;
        int len = strlen(filename) + 1;

        result = __getname();
        if (unlikely(!result))
                return ERR_PTR(-ENOMEM);

        if (len <= EMBEDDED_NAME_MAX) {
                result->name = (char *)result->iname;
        } else if (len <= PATH_MAX) {
                const size_t size = offsetof(struct filename, iname[1]);
                struct filename *tmp;

                tmp = kmalloc(size, GFP_KERNEL);
                if (unlikely(!tmp)) {
                        __putname(result);
                        return ERR_PTR(-ENOMEM);
                }
                tmp->name = (char *)result;
                result = tmp;
        } else {
                __putname(result);
                return ERR_PTR(-ENAMETOOLONG);
        }
        memcpy((char *)result->name, filename, len);
        result->uptr = NULL;
        result->aname = NULL;
        atomic_set(&result->refcnt, 1);
        audit_getname(result);

        return result;
}
EXPORT_SYMBOL(getname_kernel);

void putname(struct filename *name)
{
        if (IS_ERR(name))
                return;

        if (WARN_ON_ONCE(!atomic_read(&name->refcnt)))
                return;

        if (!atomic_dec_and_test(&name->refcnt))
                return;

        if (name->name != name->iname) {
                __putname(name->name);
                kfree(name);
        } else
                __putname(name);
}
EXPORT_SYMBOL(putname);

/**
 * check_acl - perform ACL permission checking
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        inode to check permissions on
 * @mask:        right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * This function performs the ACL permission checking. Since this function
 * retrieve POSIX acls it needs to know whether it is called from a blocking or
 * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
static int check_acl(struct mnt_idmap *idmap,
                     struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
        struct posix_acl *acl;

        if (mask & MAY_NOT_BLOCK) {
                acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
                if (!acl)
                        return -EAGAIN;
                /* no ->get_inode_acl() calls in RCU mode... */
                if (is_uncached_acl(acl))
                        return -ECHILD;
                return posix_acl_permission(idmap, inode, acl, mask);
        }

        acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (acl) {
                int error = posix_acl_permission(idmap, inode, acl, mask);
                posix_acl_release(acl);
                return error;
        }
#endif

        return -EAGAIN;
}

/**
 * acl_permission_check - perform basic UNIX permission checking
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        inode to check permissions on
 * @mask:        right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * This function performs the basic UNIX permission checking. Since this
 * function may retrieve POSIX acls it needs to know whether it is called from a
 * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
static int acl_permission_check(struct mnt_idmap *idmap,
                                struct inode *inode, int mask)
{
        unsigned int mode = inode->i_mode;
        vfsuid_t vfsuid;

        /* Are we the owner? If so, ACL's don't matter */
        vfsuid = i_uid_into_vfsuid(idmap, inode);
        if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
                mask &= 7;
                mode >>= 6;
                return (mask & ~mode) ? -EACCES : 0;
        }

        /* Do we have ACL's? */
        if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
                int error = check_acl(idmap, inode, mask);
                if (error != -EAGAIN)
                        return error;
        }

        /* Only RWX matters for group/other mode bits */
        mask &= 7;

        /*
         * Are the group permissions different from
         * the other permissions in the bits we care
         * about? Need to check group ownership if so.
         */
        if (mask & (mode ^ (mode >> 3))) {
                vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
                if (vfsgid_in_group_p(vfsgid))
                        mode >>= 3;
        }

        /* Bits in 'mode' clear that we require? */
        return (mask & ~mode) ? -EACCES : 0;
}

/**
 * generic_permission -  check for access rights on a Posix-like filesystem
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        inode to check access rights for
 * @mask:        right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
 *                %MAY_NOT_BLOCK ...)
 *
 * Used to check for read/write/execute permissions on a file.
 * We use "fsuid" for this, letting us set arbitrary permissions
 * for filesystem access without changing the "normal" uids which
 * are used for other things.
 *
 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
 * request cannot be satisfied (eg. requires blocking or too much complexity).
 * It would then be called again in ref-walk mode.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
                       int mask)
{
        int ret;

        /*
         * Do the basic permission checks.
         */
        ret = acl_permission_check(idmap, inode, mask);
        if (ret != -EACCES)
                return ret;

        if (S_ISDIR(inode->i_mode)) {
                /* DACs are overridable for directories */
                if (!(mask & MAY_WRITE))
                        if (capable_wrt_inode_uidgid(idmap, inode,
                                                     CAP_DAC_READ_SEARCH))
                                return 0;
                if (capable_wrt_inode_uidgid(idmap, inode,
                                             CAP_DAC_OVERRIDE))
                        return 0;
                return -EACCES;
        }

        /*
         * Searching includes executable on directories, else just read.
         */
        mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
        if (mask == MAY_READ)
                if (capable_wrt_inode_uidgid(idmap, inode,
                                             CAP_DAC_READ_SEARCH))
                        return 0;
        /*
         * Read/write DACs are always overridable.
         * Executable DACs are overridable when there is
         * at least one exec bit set.
         */
        if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
                if (capable_wrt_inode_uidgid(idmap, inode,
                                             CAP_DAC_OVERRIDE))
                        return 0;

        return -EACCES;
}
EXPORT_SYMBOL(generic_permission);

/**
 * do_inode_permission - UNIX permission checking
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        inode to check permissions on
 * @mask:        right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
 *
 * We _really_ want to just do "generic_permission()" without
 * even looking at the inode->i_op values. So we keep a cache
 * flag in inode->i_opflags, that says "this has not special
 * permission function, use the fast case".
 */
static inline int do_inode_permission(struct mnt_idmap *idmap,
                                      struct inode *inode, int mask)
{
        if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
                if (likely(inode->i_op->permission))
                        return inode->i_op->permission(idmap, inode, mask);

                /* This gets set once for the inode lifetime */
                spin_lock(&inode->i_lock);
                inode->i_opflags |= IOP_FASTPERM;
                spin_unlock(&inode->i_lock);
        }
        return generic_permission(idmap, inode, mask);
}

/**
 * sb_permission - Check superblock-level permissions
 * @sb: Superblock of inode to check permission on
 * @inode: Inode to check permission on
 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
 * Separate out file-system wide checks from inode-specific permission checks.
 */
static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
{
        if (unlikely(mask & MAY_WRITE)) {
                umode_t mode = inode->i_mode;

                /* Nobody gets write access to a read-only fs. */
                if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
                        return -EROFS;
        }
        return 0;
}

/**
 * inode_permission - Check for access rights to a given inode
 * @idmap:        idmap of the mount the inode was found from
 * @inode:        Inode to check permission on
 * @mask:        Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
 * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
 * this, letting us set arbitrary permissions for filesystem access without
 * changing the "normal" UIDs which are used for other things.
 *
 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
 */
int inode_permission(struct mnt_idmap *idmap,
                     struct inode *inode, int mask)
{
        int retval;

        retval = sb_permission(inode->i_sb, inode, mask);
        if (retval)
                return retval;

        if (unlikely(mask & MAY_WRITE)) {
                /*
                 * Nobody gets write access to an immutable file.
                 */
                if (IS_IMMUTABLE(inode))
                        return -EPERM;

                /*
                 * Updating mtime will likely cause i_uid and i_gid to be
                 * written back improperly if their true value is unknown
                 * to the vfs.
                 */
                if (HAS_UNMAPPED_ID(idmap, inode))
                        return -EACCES;
        }

        retval = do_inode_permission(idmap, inode, mask);
        if (retval)
                return retval;

        retval = devcgroup_inode_permission(inode, mask);
        if (retval)
                return retval;

        return security_inode_permission(inode, mask);
}
EXPORT_SYMBOL(inode_permission);

/**
 * path_get - get a reference to a path
 * @path: path to get the reference to
 *
 * Given a path increment the reference count to the dentry and the vfsmount.
 */
void path_get(const struct path *path)
{
        mntget(path->mnt);
        dget(path->dentry);
}
EXPORT_SYMBOL(path_get);

/**
 * path_put - put a reference to a path
 * @path: path to put the reference to
 *
 * Given a path decrement the reference count to the dentry and the vfsmount.
 */
void path_put(const struct path *path)
{
        dput(path->dentry);
        mntput(path->mnt);
}
EXPORT_SYMBOL(path_put);

#define EMBEDDED_LEVELS 2
struct nameidata {
        struct path        path;
        struct qstr        last;
        struct path        root;
        struct inode        *inode; /* path.dentry.d_inode */
        unsigned int        flags, state;
        unsigned        seq, next_seq, m_seq, r_seq;
        int                last_type;
        unsigned        depth;
        int                total_link_count;
        struct saved {
                struct path link;
                struct delayed_call done;
                const char *name;
                unsigned seq;
        } *stack, internal[EMBEDDED_LEVELS];
        struct filename        *name;
        struct nameidata *saved;
        unsigned        root_seq;
        int                dfd;
        vfsuid_t        dir_vfsuid;
        umode_t                dir_mode;
} __randomize_layout;

#define ND_ROOT_PRESET 1
#define ND_ROOT_GRABBED 2
#define ND_JUMPED 4

static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
{
        struct nameidata *old = current->nameidata;
        p->stack = p->internal;
        p->depth = 0;
        p->dfd = dfd;
        p->name = name;
        p->path.mnt = NULL;
        p->path.dentry = NULL;
        p->total_link_count = old ? old->total_link_count : 0;
        p->saved = old;
        current->nameidata = p;
}

static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name,
                          const struct path *root)
{
        __set_nameidata(p, dfd, name);
        p->state = 0;
        if (unlikely(root)) {
                p->state = ND_ROOT_PRESET;
                p->root = *root;
        }
}

static void restore_nameidata(void)
{
        struct nameidata *now = current->nameidata, *old = now->saved;

        current->nameidata = old;
        if (old)
                old->total_link_count = now->total_link_count;
        if (now->stack != now->internal)
                kfree(now->stack);
}

static bool nd_alloc_stack(struct nameidata *nd)
{
        struct saved *p;

        p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
                         nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
        if (unlikely(!p))
                return false;
        memcpy(p, nd->internal, sizeof(nd->internal));
        nd->stack = p;
        return true;
}

/**
 * path_connected - Verify that a dentry is below mnt.mnt_root
 * @mnt: The mountpoint to check.
 * @dentry: The dentry to check.
 *
 * Rename can sometimes move a file or directory outside of a bind
 * mount, path_connected allows those cases to be detected.
 */
static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{
        struct super_block *sb = mnt->mnt_sb;

        /* Bind mounts can have disconnected paths */
        if (mnt->mnt_root == sb->s_root)
                return true;

        return is_subdir(dentry, mnt->mnt_root);
}

static void drop_links(struct nameidata *nd)
{
        int i = nd->depth;
        while (i--) {
                struct saved *last = nd->stack + i;
                do_delayed_call(&last->done);
                clear_delayed_call(&last->done);
        }
}

static void leave_rcu(struct nameidata *nd)
{
        nd->flags &= ~LOOKUP_RCU;
        nd->seq = nd->next_seq = 0;
        rcu_read_unlock();
}

static void terminate_walk(struct nameidata *nd)
{
        drop_links(nd);
        if (!(nd->flags & LOOKUP_RCU)) {
                int i;
                path_put(&nd->path);
                for (i = 0; i < nd->depth; i++)
                        path_put(&nd->stack[i].link);
                if (nd->state & ND_ROOT_GRABBED) {
                        path_put(&nd->root);
                        nd->state &= ~ND_ROOT_GRABBED;
                }
        } else {
                leave_rcu(nd);
        }
        nd->depth = 0;
        nd->path.mnt = NULL;
        nd->path.dentry = NULL;
}

/* path_put is needed afterwards regardless of success or failure */
static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
{
        int res = __legitimize_mnt(path->mnt, mseq);
        if (unlikely(res)) {
                if (res > 0)
                        path->mnt = NULL;
                path->dentry = NULL;
                return false;
        }
        if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
                path->dentry = NULL;
                return false;
        }
        return !read_seqcount_retry(&path->dentry->d_seq, seq);
}

static inline bool legitimize_path(struct nameidata *nd,
                            struct path *path, unsigned seq)
{
        return __legitimize_path(path, seq, nd->m_seq);
}

static bool legitimize_links(struct nameidata *nd)
{
        int i;
        if (unlikely(nd->flags & LOOKUP_CACHED)) {
                drop_links(nd);
                nd->depth = 0;
                return false;
        }
        for (i = 0; i < nd->depth; i++) {
                struct saved *last = nd->stack + i;
                if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
                        drop_links(nd);
                        nd->depth = i + 1;
                        return false;
                }
        }
        return true;
}

static bool legitimize_root(struct nameidata *nd)
{
        /* Nothing to do if nd->root is zero or is managed by the VFS user. */
        if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
                return true;
        nd->state |= ND_ROOT_GRABBED;
        return legitimize_path(nd, &nd->root, nd->root_seq);
}

/*
 * Path walking has 2 modes, rcu-walk and ref-walk (see
 * Documentation/filesystems/path-lookup.txt).  In situations when we can't
 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
 * normal reference counts on dentries and vfsmounts to transition to ref-walk
 * mode.  Refcounts are grabbed at the last known good point before rcu-walk
 * got stuck, so ref-walk may continue from there. If this is not successful
 * (eg. a seqcount has changed), then failure is returned and it's up to caller
 * to restart the path walk from the beginning in ref-walk mode.
 */

/**
 * try_to_unlazy - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
 * Returns: true on success, false on failure
 *
 * try_to_unlazy attempts to legitimize the current nd->path and nd->root
 * for ref-walk mode.
 * Must be called from rcu-walk context.
 * Nothing should touch nameidata between try_to_unlazy() failure and
 * terminate_walk().
 */
static bool try_to_unlazy(struct nameidata *nd)
{
        struct dentry *parent = nd->path.dentry;

        BUG_ON(!(nd->flags & LOOKUP_RCU));

        if (unlikely(!legitimize_links(nd)))
                goto out1;
        if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
                goto out;
        if (unlikely(!legitimize_root(nd)))
                goto out;
        leave_rcu(nd);
        BUG_ON(nd->inode != parent->d_inode);
        return true;

out1:
        nd->path.mnt = NULL;
        nd->path.dentry = NULL;
out:
        leave_rcu(nd);
        return false;
}

/**
 * try_to_unlazy_next - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
 * @dentry: next dentry to step into
 * Returns: true on success, false on failure
 *
 * Similar to try_to_unlazy(), but here we have the next dentry already
 * picked by rcu-walk and want to legitimize that in addition to the current
 * nd->path and nd->root for ref-walk mode.  Must be called from rcu-walk context.
 * Nothing should touch nameidata between try_to_unlazy_next() failure and
 * terminate_walk().
 */
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{
        int res;
        BUG_ON(!(nd->flags & LOOKUP_RCU));

        if (unlikely(!legitimize_links(nd)))
                goto out2;
        res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
        if (unlikely(res)) {
                if (res > 0)
                        goto out2;
                goto out1;
        }
        if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
                goto out1;

        /*
         * We need to move both the parent and the dentry from the RCU domain
         * to be properly refcounted. And the sequence number in the dentry
         * validates *both* dentry counters, since we checked the sequence
         * number of the parent after we got the child sequence number. So we
         * know the parent must still be valid if the child sequence number is
         */
        if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
                goto out;
        if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
                goto out_dput;
        /*
         * Sequence counts matched. Now make sure that the root is
         * still valid and get it if required.
         */
        if (unlikely(!legitimize_root(nd)))
                goto out_dput;
        leave_rcu(nd);
        return true;

out2:
        nd->path.mnt = NULL;
out1:
        nd->path.dentry = NULL;
out:
        leave_rcu(nd);
        return false;
out_dput:
        leave_rcu(nd);
        dput(dentry);
        return false;
}

static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
{
        if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
                return dentry->d_op->d_revalidate(dentry, flags);
        else
                return 1;
}

/**
 * complete_walk - successful completion of path walk
 * @nd:  pointer nameidata
 *
 * If we had been in RCU mode, drop out of it and legitimize nd->path.
 * Revalidate the final result, unless we'd already done that during
 * the path walk or the filesystem doesn't ask for it.  Return 0 on
 * success, -error on failure.  In case of failure caller does not
 * need to drop nd->path.
 */
static int complete_walk(struct nameidata *nd)
{
        struct dentry *dentry = nd->path.dentry;
        int status;

        if (nd->flags & LOOKUP_RCU) {
                /*
                 * We don't want to zero nd->root for scoped-lookups or
                 * externally-managed nd->root.
                 */
                if (!(nd->state & ND_ROOT_PRESET))
                        if (!(nd->flags & LOOKUP_IS_SCOPED))
                                nd->root.mnt = NULL;
                nd->flags &= ~LOOKUP_CACHED;
                if (!try_to_unlazy(nd))
                        return -ECHILD;
        }

        if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
                /*
                 * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
                 * ever step outside the root during lookup" and should already
                 * be guaranteed by the rest of namei, we want to avoid a namei
                 * BUG resulting in userspace being given a path that was not
                 * scoped within the root at some point during the lookup.
                 *
                 * So, do a final sanity-check to make sure that in the
                 * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
                 * we won't silently return an fd completely outside of the
                 * requested root to userspace.
                 *
                 * Userspace could move the path outside the root after this
                 * check, but as discussed elsewhere this is not a concern (the
                 * resolved file was inside the root at some point).
                 */
                if (!path_is_under(&nd->path, &nd->root))
                        return -EXDEV;
        }

        if (likely(!(nd->state & ND_JUMPED)))
                return 0;

        if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
                return 0;

        status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
        if (status > 0)
                return 0;

        if (!status)
                status = -ESTALE;

        return status;
}

static int set_root(struct nameidata *nd)
{
        struct fs_struct *fs = current->fs;

        /*
         * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
         * still have to ensure it doesn't happen because it will cause a breakout
         * from the dirfd.
         */
        if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
                return -ENOTRECOVERABLE;

        if (nd->flags & LOOKUP_RCU) {
                unsigned seq;

                do {
                        seq = read_seqcount_begin(&fs->seq);
                        nd->root = fs->root;
                        nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
                } while (read_seqcount_retry(&fs->seq, seq));
        } else {
                get_fs_root(fs, &nd->root);
                nd->state |= ND_ROOT_GRABBED;
        }
        return 0;
}

static int nd_jump_root(struct nameidata *nd)
{
        if (unlikely(nd->flags & LOOKUP_BENEATH))
                return -EXDEV;
        if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
                /* Absolute path arguments to path_init() are allowed. */
                if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
                        return -EXDEV;
        }
        if (!nd->root.mnt) {
                int error = set_root(nd);
                if (error)
                        return error;
        }
        if (nd->flags & LOOKUP_RCU) {
                struct dentry *d;
                nd->path = nd->root;
                d = nd->path.dentry;
                nd->inode = d->d_inode;
                nd->seq = nd->root_seq;
                if (read_seqcount_retry(&d->d_seq, nd->seq))
                        return -ECHILD;
        } else {
                path_put(&nd->path);
                nd->path = nd->root;
                path_get(&nd->path);
                nd->inode = nd->path.dentry->d_inode;
        }
        nd->state |= ND_JUMPED;
        return 0;
}

/*
 * Helper to directly jump to a known parsed path from ->get_link,
 * caller must have taken a reference to path beforehand.
 */
int nd_jump_link(const struct path *path)
{
        int error = -ELOOP;
        struct nameidata *nd = current->nameidata;

        if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
                goto err;

        error = -EXDEV;
        if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
                if (nd->path.mnt != path->mnt)
                        goto err;
        }
        /* Not currently safe for scoped-lookups. */
        if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
                goto err;

        path_put(&nd->path);
        nd->path = *path;
        nd->inode = nd->path.dentry->d_inode;
        nd->state |= ND_JUMPED;
        return 0;

err:
        path_put(path);
        return error;
}

static inline void put_link(struct nameidata *nd)
{
        struct saved *last = nd->stack + --nd->depth;
        do_delayed_call(&last->done);
        if (!(nd->flags & LOOKUP_RCU))
                path_put(&last->link);
}

static int sysctl_protected_symlinks __read_mostly;
static int sysctl_protected_hardlinks __read_mostly;
static int sysctl_protected_fifos __read_mostly;
static int sysctl_protected_regular __read_mostly;

#ifdef CONFIG_SYSCTL
static struct ctl_table namei_sysctls[] = {
        {
                .procname        = "protected_symlinks",
                .data                = &sysctl_protected_symlinks,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {
                .procname        = "protected_hardlinks",
                .data                = &sysctl_protected_hardlinks,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {
                .procname        = "protected_fifos",
                .data                = &sysctl_protected_fifos,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_TWO,
        },
        {
                .procname        = "protected_regular",
                .data                = &sysctl_protected_regular,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_TWO,
        },
};

static int __init init_fs_namei_sysctls(void)
{
        register_sysctl_init("fs", namei_sysctls);
        return 0;
}
fs_initcall(init_fs_namei_sysctls);

#endif /* CONFIG_SYSCTL */

/**
 * may_follow_link - Check symlink following for unsafe situations
 * @nd: nameidata pathwalk data
 * @inode: Used for idmapping.
 *
 * In the case of the sysctl_protected_symlinks sysctl being enabled,
 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
 * in a sticky world-writable directory. This is to protect privileged
 * processes from failing races against path names that may change out
 * from under them by way of other users creating malicious symlinks.
 * It will permit symlinks to be followed only when outside a sticky
 * world-writable directory, or when the uid of the symlink and follower
 * match, or when the directory owner matches the symlink's owner.
 *
 * Returns 0 if following the symlink is allowed, -ve on error.
 */
static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{
        struct mnt_idmap *idmap;
        vfsuid_t vfsuid;

        if (!sysctl_protected_symlinks)
                return 0;

        idmap = mnt_idmap(nd->path.mnt);
        vfsuid = i_uid_into_vfsuid(idmap, inode);
        /* Allowed if owner and follower match. */
        if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
                return 0;

        /* Allowed if parent directory not sticky and world-writable. */
        if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
                return 0;

        /* Allowed if parent directory and link owner match. */
        if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid))
                return 0;

        if (nd->flags & LOOKUP_RCU)
                return -ECHILD;

        audit_inode(nd->name, nd->stack[0].link.dentry, 0);
        audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
        return -EACCES;
}

/**
 * safe_hardlink_source - Check for safe hardlink conditions
 * @idmap: idmap of the mount the inode was found from
 * @inode: the source inode to hardlink from
 *
 * Return false if at least one of the following conditions:
 *    - inode is not a regular file
 *    - inode is setuid
 *    - inode is setgid and group-exec
 *    - access failure for read and write
 *
 * Otherwise returns true.
 */
static bool safe_hardlink_source(struct mnt_idmap *idmap,
                                 struct inode *inode)
{
        umode_t mode = inode->i_mode;

        /* Special files should not get pinned to the filesystem. */
        if (!S_ISREG(mode))
                return false;

        /* Setuid files should not get pinned to the filesystem. */
        if (mode & S_ISUID)
                return false;

        /* Executable setgid files should not get pinned to the filesystem. */
        if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
                return false;

        /* Hardlinking to unreadable or unwritable sources is dangerous. */
        if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE))
                return false;

        return true;
}

/**
 * may_linkat - Check permissions for creating a hardlink
 * @idmap: idmap of the mount the inode was found from
 * @link:  the source to hardlink from
 *
 * Block hardlink when all of:
 *  - sysctl_protected_hardlinks enabled
 *  - fsuid does not match inode
 *  - hardlink source is unsafe (see safe_hardlink_source() above)
 *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 *
 * Returns 0 if successful, -ve on error.
 */
int may_linkat(struct mnt_idmap *idmap, const struct path *link)
{
        struct inode *inode = link->dentry->d_inode;

        /* Inode writeback is not safe when the uid or gid are invalid. */
        if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
            !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
                return -EOVERFLOW;

        if (!sysctl_protected_hardlinks)
                return 0;

        /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
         * otherwise, it must be a safe source.
         */
        if (safe_hardlink_source(idmap, inode) ||
            inode_owner_or_capable(idmap, inode))
                return 0;

        audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
        return -EPERM;
}

/**
 * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
 *                          should be allowed, or not, on files that already
 *                          exist.
 * @idmap: idmap of the mount the inode was found from
 * @nd: nameidata pathwalk data
 * @inode: the inode of the file to open
 *
 * Block an O_CREAT open of a FIFO (or a regular file) when:
 *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
 *   - the file already exists
 *   - we are in a sticky directory
 *   - we don't own the file
 *   - the owner of the directory doesn't own the file
 *   - the directory is world writable
 * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
 * the directory doesn't have to be world writable: being group writable will
 * be enough.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 *
 * Returns 0 if the open is allowed, -ve on error.
 */
static int may_create_in_sticky(struct mnt_idmap *idmap,
                                struct nameidata *nd, struct inode *const inode)
{
        umode_t dir_mode = nd->dir_mode;
        vfsuid_t dir_vfsuid = nd->dir_vfsuid;

        if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
            (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
            likely(!(dir_mode & S_ISVTX)) ||
            vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) ||
            vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
                return 0;

        if (likely(dir_mode & 0002) ||
            (dir_mode & 0020 &&
             ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
              (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
                const char *operation = S_ISFIFO(inode->i_mode) ?
                                        "sticky_create_fifo" :
                                        "sticky_create_regular";
                audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
                return -EACCES;
        }
        return 0;
}

/*
 * follow_up - Find the mountpoint of path's vfsmount
 *
 * Given a path, find the mountpoint of its source file system.
 * Replace @path with the path of the mountpoint in the parent mount.
 * Up is towards /.
 *
 * Return 1 if we went up a level and 0 if we were already at the
 * root.
 */
int follow_up(struct path *path)
{
        struct mount *mnt = real_mount(path->mnt);
        struct mount *parent;
        struct dentry *mountpoint;

        read_seqlock_excl(&mount_lock);
        parent = mnt->mnt_parent;
        if (parent == mnt) {
                read_sequnlock_excl(&mount_lock);
                return 0;
        }
        mntget(&parent->mnt);
        mountpoint = dget(mnt->mnt_mountpoint);
        read_sequnlock_excl(&mount_lock);
        dput(path->dentry);
        path->dentry = mountpoint;
        mntput(path->mnt);
        path->mnt = &parent->mnt;
        return 1;
}
EXPORT_SYMBOL(follow_up);

static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
                                  struct path *path, unsigned *seqp)
{
        while (mnt_has_parent(m)) {
                struct dentry *mountpoint = m->mnt_mountpoint;

                m = m->mnt_parent;
                if (unlikely(root->dentry == mountpoint &&
                             root->mnt == &m->mnt))
                        break;
                if (mountpoint != m->mnt.mnt_root) {
                        path->mnt = &m->mnt;
                        path->dentry = mountpoint;
                        *seqp = read_seqcount_begin(&mountpoint->d_seq);
                        return true;
                }
        }
        return false;
}

static bool choose_mountpoint(struct mount *m, const struct path *root,
                              struct path *path)
{
        bool found;

        rcu_read_lock();
        while (1) {
                unsigned seq, mseq = read_seqbegin(&mount_lock);

                found = choose_mountpoint_rcu(m, root, path, &seq);
                if (unlikely(!found)) {
                        if (!read_seqretry(&mount_lock, mseq))
                                break;
                } else {
                        if (likely(__legitimize_path(path, seq, mseq)))
                                break;
                        rcu_read_unlock();
                        path_put(path);
                        rcu_read_lock();
                }
        }
        rcu_read_unlock();
        return found;
}

/*
 * Perform an automount
 * - return -EISDIR to tell follow_managed() to stop and return the path we
 *   were called with.
 */
static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
{
        struct dentry *dentry = path->dentry;

        /* We don't want to mount if someone's just doing a stat -
         * unless they're stat'ing a directory and appended a '/' to
         * the name.
         *
         * We do, however, want to mount if someone wants to open or
         * create a file of any type under the mountpoint, wants to
         * traverse through the mountpoint or wants to open the
         * mounted directory.  Also, autofs may mark negative dentries
         * as being automount points.  These will need the attentions
         * of the daemon to instantiate them before they can be used.
         */
        if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
                           LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
            dentry->d_inode)
                return -EISDIR;

        if (count && (*count)++ >= MAXSYMLINKS)
                return -ELOOP;

        return finish_automount(dentry->d_op->d_automount(path), path);
}

/*
 * mount traversal - out-of-line part.  One note on ->d_flags accesses -
 * dentries are pinned but not locked here, so negative dentry can go
 * positive right under us.  Use of smp_load_acquire() provides a barrier
 * sufficient for ->d_inode and ->d_flags consistency.
 */
static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
                             int *count, unsigned lookup_flags)
{
        struct vfsmount *mnt = path->mnt;
        bool need_mntput = false;
        int ret = 0;

        while (flags & DCACHE_MANAGED_DENTRY) {
                /* Allow the filesystem to manage the transit without i_mutex
                 * being held. */
                if (flags & DCACHE_MANAGE_TRANSIT) {
                        ret = path->dentry->d_op->d_manage(path, false);
                        flags = smp_load_acquire(&path->dentry->d_flags);
                        if (ret < 0)
                                break;
                }

                if (flags & DCACHE_MOUNTED) {        // something's mounted on it..
                        struct vfsmount *mounted = lookup_mnt(path);
                        if (mounted) {                // ... in our namespace
                                dput(path->dentry);
                                if (need_mntput)
                                        mntput(path->mnt);
                                path->mnt = mounted;
                                path->dentry = dget(mounted->mnt_root);
                                // here we know it's positive
                                flags = path->dentry->d_flags;
                                need_mntput = true;
                                continue;
                        }
                }

                if (!(flags & DCACHE_NEED_AUTOMOUNT))
                        break;

                // uncovered automount point
                ret = follow_automount(path, count, lookup_flags);
                flags = smp_load_acquire(&path->dentry->d_flags);
                if (ret < 0)
                        break;
        }

        if (ret == -EISDIR)
                ret = 0;
        // possible if you race with several mount --move
        if (need_mntput && path->mnt == mnt)
                mntput(path->mnt);
        if (!ret && unlikely(d_flags_negative(flags)))
                ret = -ENOENT;
        *jumped = need_mntput;
        return ret;
}

static inline int traverse_mounts(struct path *path, bool *jumped,
                                  int *count, unsigned lookup_flags)
{
        unsigned flags = smp_load_acquire(&path->dentry->d_flags);

        /* fastpath */
        if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
                *jumped = false;
                if (unlikely(d_flags_negative(flags)))
                        return -ENOENT;
                return 0;
        }
        return __traverse_mounts(path, flags, jumped, count, lookup_flags);
}

int follow_down_one(struct path *path)
{
        struct vfsmount *mounted;

        mounted = lookup_mnt(path);
        if (mounted) {
                dput(path->dentry);
                mntput(path->mnt);
                path->mnt = mounted;
                path->dentry = dget(mounted->mnt_root);
                return 1;
        }
        return 0;
}
EXPORT_SYMBOL(follow_down_one);

/*
 * Follow down to the covering mount currently visible to userspace.  At each
 * point, the filesystem owning that dentry may be queried as to whether the
 * caller is permitted to proceed or not.
 */
int follow_down(struct path *path, unsigned int flags)
{
        struct vfsmount *mnt = path->mnt;
        bool jumped;
        int ret = traverse_mounts(path, &jumped, NULL, flags);

        if (path->mnt != mnt)
                mntput(mnt);
        return ret;
}
EXPORT_SYMBOL(follow_down);

/*
 * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
 * we meet a managed dentry that would need blocking.
 */
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
{
        struct dentry *dentry = path->dentry;
        unsigned int flags = dentry->d_flags;

        if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
                return true;

        if (unlikely(nd->flags & LOOKUP_NO_XDEV))
                return false;

        for (;;) {
                /*
                 * Don't forget we might have a non-mountpoint managed dentry
                 * that wants to block transit.
                 */
                if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
                        int res = dentry->d_op->d_manage(path, true);
                        if (res)
                                return res == -EISDIR;
                        flags = dentry->d_flags;
                }

                if (flags & DCACHE_MOUNTED) {
                        struct mount *mounted = __lookup_mnt(path->mnt, dentry);
                        if (mounted) {
                                path->mnt = &mounted->mnt;
                                dentry = path->dentry = mounted->mnt.mnt_root;
                                nd->state |= ND_JUMPED;
                                nd->next_seq = read_seqcount_begin(&dentry->d_seq);
                                flags = dentry->d_flags;
                                // makes sure that non-RCU pathwalk could reach
                                // this state.
                                if (read_seqretry(&mount_lock, nd->m_seq))
                                        return false;
                                continue;
                        }
                        if (read_seqretry(&mount_lock, nd->m_seq))
                                return false;
                }
                return !(flags & DCACHE_NEED_AUTOMOUNT);
        }
}

static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
                          struct path *path)
{
        bool jumped;
        int ret;

        path->mnt = nd->path.mnt;
        path->dentry = dentry;
        if (nd->flags & LOOKUP_RCU) {
                unsigned int seq = nd->next_seq;
                if (likely(__follow_mount_rcu(nd, path)))
                        return 0;
                // *path and nd->next_seq might've been clobbered
                path->mnt = nd->path.mnt;
                path->dentry = dentry;
                nd->next_seq = seq;
                if (!try_to_unlazy_next(nd, dentry))
                        return -ECHILD;
        }
        ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
        if (jumped) {
                if (unlikely(nd->flags & LOOKUP_NO_XDEV))
                        ret = -EXDEV;
                else
                        nd->state |= ND_JUMPED;
        }
        if (unlikely(ret)) {
                dput(path->dentry);
                if (path->mnt != nd->path.mnt)
                        mntput(path->mnt);
        }
        return ret;
}

/*
 * This looks up the name in dcache and possibly revalidates the found dentry.
 * NULL is returned if the dentry does not exist in the cache.
 */
static struct dentry *lookup_dcache(const struct qstr *name,
                                    struct dentry *dir,
                                    unsigned int flags)
{
        struct dentry *dentry = d_lookup(dir, name);
        if (dentry) {
                int error = d_revalidate(dentry, flags);
                if (unlikely(error <= 0)) {
                        if (!error)
                                d_invalidate(dentry);
                        dput(dentry);
                        return ERR_PTR(error);
                }
        }
        return dentry;
}

/*
 * Parent directory has inode locked exclusive.  This is one
 * and only case when ->lookup() gets called on non in-lookup
 * dentries - as the matter of fact, this only gets called
 * when directory is guaranteed to have no in-lookup children
 * at all.
 */
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
                                    struct dentry *base,
                                    unsigned int flags)
{
        struct dentry *dentry = lookup_dcache(name, base, flags);
        struct dentry *old;
        struct inode *dir = base->d_inode;

        if (dentry)
                return dentry;

        /* Don't create child dentry for a dead directory. */
        if (unlikely(IS_DEADDIR(dir)))
                return ERR_PTR(-ENOENT);

        dentry = d_alloc(base, name);
        if (unlikely(!dentry))
                return ERR_PTR(-ENOMEM);

        old = dir->i_op->lookup(dir, dentry, flags);
        if (unlikely(old)) {
                dput(dentry);
                dentry = old;
        }
        return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);

static struct dentry *lookup_fast(struct nameidata *nd)
{
        struct dentry *dentry, *parent = nd->path.dentry;
        int status = 1;

        /*
         * Rename seqlock is not required here because in the off chance
         * of a false negative due to a concurrent rename, the caller is
         * going to fall back to non-racy lookup.
         */
        if (nd->flags & LOOKUP_RCU) {
                dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
                if (unlikely(!dentry)) {
                        if (!try_to_unlazy(nd))
                                return ERR_PTR(-ECHILD);
                        return NULL;
                }

                /*
                 * This sequence count validates that the parent had no
                 * changes while we did the lookup of the dentry above.
                 */
                if (read_seqcount_retry(&parent->d_seq, nd->seq))
                        return ERR_PTR(-ECHILD);

                status = d_revalidate(dentry, nd->flags);
                if (likely(status > 0))
                        return dentry;
                if (!try_to_unlazy_next(nd, dentry))
                        return ERR_PTR(-ECHILD);
                if (status == -ECHILD)
                        /* we'd been told to redo it in non-rcu mode */
                        status = d_revalidate(dentry, nd->flags);
        } else {
                dentry = __d_lookup(parent, &nd->last);
                if (unlikely(!dentry))
                        return NULL;
                status = d_revalidate(dentry, nd->flags);
        }
        if (unlikely(status <= 0)) {
                if (!status)
                        d_invalidate(dentry);
                dput(dentry);
                return ERR_PTR(status);
        }
        return dentry;
}

/* Fast lookup failed, do it the slow way */
static struct dentry *__lookup_slow(const struct qstr *name,
                                    struct dentry *dir,
                                    unsigned int flags)
{
        struct dentry *dentry, *old;
        struct inode *inode = dir->d_inode;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);

        /* Don't go there if it's already dead */
        if (unlikely(IS_DEADDIR(inode)))
                return ERR_PTR(-ENOENT);
again:
        dentry = d_alloc_parallel(dir, name, &wq);
        if (IS_ERR(dentry))
                return dentry;
        if (unlikely(!d_in_lookup(dentry))) {
                int error = d_revalidate(dentry, flags);
                if (unlikely(error <= 0)) {
                        if (!error) {
                                d_invalidate(dentry);
                                dput(dentry);
                                goto again;
                        }
                        dput(dentry);
                        dentry = ERR_PTR(error);
                }
        } else {
                old = inode->i_op->lookup(inode, dentry, flags);
                d_lookup_done(dentry);
                if (unlikely(old)) {
                        dput(dentry);
                        dentry = old;
                }
        }
        return dentry;
}

static struct dentry *lookup_slow(const struct qstr *name,
                                  struct dentry *dir,
                                  unsigned int flags)
{
        struct inode *inode = dir->d_inode;
        struct dentry *res;
        inode_lock_shared(inode);
        res = __lookup_slow(name, dir, flags);
        inode_unlock_shared(inode);
        return res;
}

static inline int may_lookup(struct mnt_idmap *idmap,
                             struct nameidata *nd)
{
        if (nd->flags & LOOKUP_RCU) {
                int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
                if (!err)                // success, keep going
                        return 0;
                if (!try_to_unlazy(nd))
                        return -ECHILD;        // redo it all non-lazy
                if (err != -ECHILD)        // hard error
                        return err;
        }
        return inode_permission(idmap, nd->inode, MAY_EXEC);
}

static int reserve_stack(struct nameidata *nd, struct path *link)
{
        if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
                return -ELOOP;

        if (likely(nd->depth != EMBEDDED_LEVELS))
                return 0;
        if (likely(nd->stack != nd->internal))
                return 0;
        if (likely(nd_alloc_stack(nd)))
                return 0;

        if (nd->flags & LOOKUP_RCU) {
                // we need to grab link before we do unlazy.  And we can't skip
                // unlazy even if we fail to grab the link - cleanup needs it
                bool grabbed_link = legitimize_path(nd, link, nd->next_seq);

                if (!try_to_unlazy(nd) || !grabbed_link)
                        return -ECHILD;

                if (nd_alloc_stack(nd))
                        return 0;
        }
        return -ENOMEM;
}

enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};

static const char *pick_link(struct nameidata *nd, struct path *link,
                     struct inode *inode, int flags)
{
        struct saved *last;
        const char *res;
        int error = reserve_stack(nd, link);

        if (unlikely(error)) {
                if (!(nd->flags & LOOKUP_RCU))
                        path_put(link);
                return ERR_PTR(error);
        }
        last = nd->stack + nd->depth++;
        last->link = *link;
        clear_delayed_call(&last->done);
        last->seq = nd->next_seq;

        if (flags & WALK_TRAILING) {
                error = may_follow_link(nd, inode);
                if (unlikely(error))
                        return ERR_PTR(error);
        }

        if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
                        unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
                return ERR_PTR(-ELOOP);

        if (!(nd->flags & LOOKUP_RCU)) {
                touch_atime(&last->link);
                cond_resched();
        } else if (atime_needs_update(&last->link, inode)) {
                if (!try_to_unlazy(nd))
                        return ERR_PTR(-ECHILD);
                touch_atime(&last->link);
        }

        error = security_inode_follow_link(link->dentry, inode,
                                           nd->flags & LOOKUP_RCU);
        if (unlikely(error))
                return ERR_PTR(error);

        res = READ_ONCE(inode->i_link);
        if (!res) {
                const char * (*get)(struct dentry *, struct inode *,
                                struct delayed_call *);
                get = inode->i_op->get_link;
                if (nd->flags & LOOKUP_RCU) {
                        res = get(NULL, inode, &last->done);
                        if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
                                res = get(link->dentry, inode, &last->done);
                } else {
                        res = get(link->dentry, inode, &last->done);
                }
                if (!res)
                        goto all_done;
                if (IS_ERR(res))
                        return res;
        }
        if (*res == '/') {
                error = nd_jump_root(nd);
                if (unlikely(error))
                        return ERR_PTR(error);
                while (unlikely(*++res == '/'))
                        ;
        }
        if (*res)
                return res;
all_done: // pure jump
        put_link(nd);
        return NULL;
}

/*
 * Do we need to follow links? We _really_ want to be able
 * to do this check without having to look at inode->i_op,
 * so we keep a cache of "no, this doesn't need follow_link"
 * for the common case.
 *
 * NOTE: dentry must be what nd->next_seq had been sampled from.
 */
static const char *step_into(struct nameidata *nd, int flags,
                     struct dentry *dentry)
{
        struct path path;
        struct inode *inode;
        int err = handle_mounts(nd, dentry, &path);

        if (err < 0)
                return ERR_PTR(err);
        inode = path.dentry->d_inode;
        if (likely(!d_is_symlink(path.dentry)) ||
           ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
           (flags & WALK_NOFOLLOW)) {
                /* not a symlink or should not follow */
                if (nd->flags & LOOKUP_RCU) {
                        if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
                                return ERR_PTR(-ECHILD);
                        if (unlikely(!inode))
                                return ERR_PTR(-ENOENT);
                } else {
                        dput(nd->path.dentry);
                        if (nd->path.mnt != path.mnt)
                                mntput(nd->path.mnt);
                }
                nd->path = path;
                nd->inode = inode;
                nd->seq = nd->next_seq;
                return NULL;
        }
        if (nd->flags & LOOKUP_RCU) {
                /* make sure that d_is_symlink above matches inode */
                if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
                        return ERR_PTR(-ECHILD);
        } else {
                if (path.mnt == nd->path.mnt)
                        mntget(path.mnt);
        }
        return pick_link(nd, &path, inode, flags);
}

static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
{
        struct dentry *parent, *old;

        if (path_equal(&nd->path, &nd->root))
                goto in_root;
        if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
                struct path path;
                unsigned seq;
                if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
                                           &nd->root, &path, &seq))
                        goto in_root;
                if (unlikely(nd->flags & LOOKUP_NO_XDEV))
                        return ERR_PTR(-ECHILD);
                nd->path = path;
                nd->inode = path.dentry->d_inode;
                nd->seq = seq;
                // makes sure that non-RCU pathwalk could reach this state
                if (read_seqretry(&mount_lock, nd->m_seq))
                        return ERR_PTR(-ECHILD);
                /* we know that mountpoint was pinned */
        }
        old = nd->path.dentry;
        parent = old->d_parent;
        nd->next_seq = read_seqcount_begin(&parent->d_seq);
        // makes sure that non-RCU pathwalk could reach this state
        if (read_seqcount_retry(&old->d_seq, nd->seq))
                return ERR_PTR(-ECHILD);
        if (unlikely(!path_connected(nd->path.mnt, parent)))
                return ERR_PTR(-ECHILD);
        return parent;
in_root:
        if (read_seqretry(&mount_lock, nd->m_seq))
                return ERR_PTR(-ECHILD);
        if (unlikely(nd->flags & LOOKUP_BENEATH))
                return ERR_PTR(-ECHILD);
        nd->next_seq = nd->seq;
        return nd->path.dentry;
}

static struct dentry *follow_dotdot(struct nameidata *nd)
{
        struct dentry *parent;

        if (path_equal(&nd->path, &nd->root))
                goto in_root;
        if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
                struct path path;

                if (!choose_mountpoint(real_mount(nd->path.mnt),
                                       &nd->root, &path))
                        goto in_root;
                path_put(&nd->path);
                nd->path = path;
                nd->inode = path.dentry->d_inode;
                if (unlikely(nd->flags & LOOKUP_NO_XDEV))
                        return ERR_PTR(-EXDEV);
        }
        /* rare case of legitimate dget_parent()... */
        parent = dget_parent(nd->path.dentry);
        if (unlikely(!path_connected(nd->path.mnt, parent))) {
                dput(parent);
                return ERR_PTR(-ENOENT);
        }
        return parent;

in_root:
        if (unlikely(nd->flags & LOOKUP_BENEATH))
                return ERR_PTR(-EXDEV);
        return dget(nd->path.dentry);
}

static const char *handle_dots(struct nameidata *nd, int type)
{
        if (type == LAST_DOTDOT) {
                const char *error = NULL;
                struct dentry *parent;

                if (!nd->root.mnt) {
                        error = ERR_PTR(set_root(nd));
                        if (error)
                                return error;
                }
                if (nd->flags & LOOKUP_RCU)
                        parent = follow_dotdot_rcu(nd);
                else
                        parent = follow_dotdot(nd);
                if (IS_ERR(parent))
                        return ERR_CAST(parent);
                error = step_into(nd, WALK_NOFOLLOW, parent);
                if (unlikely(error))
                        return error;

                if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
                        /*
                         * If there was a racing rename or mount along our
                         * path, then we can't be sure that ".." hasn't jumped
                         * above nd->root (and so userspace should retry or use
                         * some fallback).
                         */
                        smp_rmb();
                        if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
                                return ERR_PTR(-EAGAIN);
                        if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
                                return ERR_PTR(-EAGAIN);
                }
        }
        return NULL;
}

static const char *walk_component(struct nameidata *nd, int flags)
{
        struct dentry *dentry;
        /*
         * "." and ".." are special - ".." especially so because it has
         * to be able to know about the current root directory and
         * parent relationships.
         */
        if (unlikely(nd->last_type != LAST_NORM)) {
                if (!(flags & WALK_MORE) && nd->depth)
                        put_link(nd);
                return handle_dots(nd, nd->last_type);
        }
        dentry = lookup_fast(nd);
        if (IS_ERR(dentry))
                return ERR_CAST(dentry);
        if (unlikely(!dentry)) {
                dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
                if (IS_ERR(dentry))
                        return ERR_CAST(dentry);
        }
        if (!(flags & WALK_MORE) && nd->depth)
                put_link(nd);
        return step_into(nd, flags, dentry);
}

/*
 * We can do the critical dentry name comparison and hashing
 * operations one word at a time, but we are limited to:
 *
 * - Architectures with fast unaligned word accesses. We could
 *   do a "get_unaligned()" if this helps and is sufficiently
 *   fast.
 *
 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
 *   do not trap on the (extremely unlikely) case of a page
 *   crossing operation.
 *
 * - Furthermore, we need an efficient 64-bit compile for the
 *   64-bit case in order to generate the "number of bytes in
 *   the final mask". Again, that could be replaced with a
 *   efficient population count instruction or similar.
 */
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>

#ifdef HASH_MIX

/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */

#elif defined(CONFIG_64BIT)
/*
 * Register pressure in the mixing function is an issue, particularly
 * on 32-bit x86, but almost any function requires one state value and
 * one temporary.  Instead, use a function designed for two state values
 * and no temporaries.
 *
 * This function cannot create a collision in only two iterations, so
 * we have two iterations to achieve avalanche.  In those two iterations,
 * we have six layers of mixing, which is enough to spread one bit's
 * influence out to 2^6 = 64 state bits.
 *
 * Rotate constants are scored by considering either 64 one-bit input
 * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
 * probability of that delta causing a change to each of the 128 output
 * bits, using a sample of random initial states.
 *
 * The Shannon entropy of the computed probabilities is then summed
 * to produce a score.  Ideally, any input change has a 50% chance of
 * toggling any given output bit.
 *
 * Mixing scores (in bits) for (12,45):
 * Input delta: 1-bit      2-bit
 * 1 round:     713.3    42542.6
 * 2 rounds:   2753.7   140389.8
 * 3 rounds:   5954.1   233458.2
 * 4 rounds:   7862.6   256672.2
 * Perfect:    8192     258048
 *            (64*128) (64*63/2 * 128)
 */
#define HASH_MIX(x, y, a)        \
        (        x ^= (a),        \
        y ^= x,        x = rol64(x,12),\
        x += y,        y = rol64(y,45),\
        y *= 9                        )

/*
 * Fold two longs into one 32-bit hash value.  This must be fast, but
 * latency isn't quite as critical, as there is a fair bit of additional
 * work done before the hash value is used.
 */
static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
        y ^= x * GOLDEN_RATIO_64;
        y *= GOLDEN_RATIO_64;
        return y >> 32;
}

#else        /* 32-bit case */

/*
 * Mixing scores (in bits) for (7,20):
 * Input delta: 1-bit      2-bit
 * 1 round:     330.3     9201.6
 * 2 rounds:   1246.4    25475.4
 * 3 rounds:   1907.1    31295.1
 * 4 rounds:   2042.3    31718.6
 * Perfect:    2048      31744
 *            (32*64)   (32*31/2 * 64)
 */
#define HASH_MIX(x, y, a)        \
        (        x ^= (a),        \
        y ^= x,        x = rol32(x, 7),\
        x += y,        y = rol32(y,20),\
        y *= 9                        )

static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
        /* Use arch-optimized multiply if one exists */
        return __hash_32(y ^ __hash_32(x));
}

#endif

/*
 * Return the hash of a string of known length.  This is carfully
 * designed to match hash_name(), which is the more critical function.
 * In particular, we must end by hashing a final word containing 0..7
 * payload bytes, to match the way that hash_name() iterates until it
 * finds the delimiter after the name.
 */
unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
{
        unsigned long a, x = 0, y = (unsigned long)salt;

        for (;;) {
                if (!len)
                        goto done;
                a = load_unaligned_zeropad(name);
                if (len < sizeof(unsigned long))
                        break;
                HASH_MIX(x, y, a);
                name += sizeof(unsigned long);
                len -= sizeof(unsigned long);
        }
        x ^= a & bytemask_from_count(len);
done:
        return fold_hash(x, y);
}
EXPORT_SYMBOL(full_name_hash);

/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(const void *salt, const char *name)
{
        unsigned long a = 0, x = 0, y = (unsigned long)salt;
        unsigned long adata, mask, len;
        const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;

        len = 0;
        goto inside;

        do {
                HASH_MIX(x, y, a);
                len += sizeof(unsigned long);
inside:
                a = load_unaligned_zeropad(name+len);
        } while (!has_zero(a, &adata, &constants));

        adata = prep_zero_mask(a, adata, &constants);
        mask = create_zero_mask(adata);
        x ^= a & zero_bytemask(mask);

        return hashlen_create(fold_hash(x, y), len + find_zero(mask));
}
EXPORT_SYMBOL(hashlen_string);

/*
 * Calculate the length and hash of the path component, and
 * return the "hash_len" as the result.
 */
static inline u64 hash_name(const void *salt, const char *name)
{
        unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
        unsigned long adata, bdata, mask, len;
        const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;

        len = 0;
        goto inside;

        do {
                HASH_MIX(x, y, a);
                len += sizeof(unsigned long);
inside:
                a = load_unaligned_zeropad(name+len);
                b = a ^ REPEAT_BYTE('/');
        } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));

        adata = prep_zero_mask(a, adata, &constants);
        bdata = prep_zero_mask(b, bdata, &constants);
        mask = create_zero_mask(adata | bdata);
        x ^= a & zero_bytemask(mask);

        return hashlen_create(fold_hash(x, y), len + find_zero(mask));
}

#else        /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */

/* Return the hash of a string of known length */
unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
{
        unsigned long hash = init_name_hash(salt);
        while (len--)
                hash = partial_name_hash((unsigned char)*name++, hash);
        return end_name_hash(hash);
}
EXPORT_SYMBOL(full_name_hash);

/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(const void *salt, const char *name)
{
        unsigned long hash = init_name_hash(salt);
        unsigned long len = 0, c;

        c = (unsigned char)*name;
        while (c) {
                len++;
                hash = partial_name_hash(c, hash);
                c = (unsigned char)name[len];
        }
        return hashlen_create(end_name_hash(hash), len);
}
EXPORT_SYMBOL(hashlen_string);

/*
 * We know there's a real path component here of at least
 * one character.
 */
static inline u64 hash_name(const void *salt, const char *name)
{
        unsigned long hash = init_name_hash(salt);
        unsigned long len = 0, c;

        c = (unsigned char)*name;
        do {
                len++;
                hash = partial_name_hash(c, hash);
                c = (unsigned char)name[len];
        } while (c && c != '/');
        return hashlen_create(end_name_hash(hash), len);
}

#endif

/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static int link_path_walk(const char *name, struct nameidata *nd)
{
        int depth = 0; // depth <= nd->depth
        int err;

        nd->last_type = LAST_ROOT;
        nd->flags |= LOOKUP_PARENT;
        if (IS_ERR(name))
                return PTR_ERR(name);
        while (*name=='/')
                name++;
        if (!*name) {
                nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
                return 0;
        }

        /* At this point we know we have a real path component. */
        for(;;) {
                struct mnt_idmap *idmap;
                const char *link;
                u64 hash_len;
                int type;

                idmap = mnt_idmap(nd->path.mnt);
                err = may_lookup(idmap, nd);
                if (err)
                        return err;

                hash_len = hash_name(nd->path.dentry, name);

                type = LAST_NORM;
                if (name[0] == '.') switch (hashlen_len(hash_len)) {
                        case 2:
                                if (name[1] == '.') {
                                        type = LAST_DOTDOT;
                                        nd->state |= ND_JUMPED;
                                }
                                break;
                        case 1:
                                type = LAST_DOT;
                }
                if (likely(type == LAST_NORM)) {
                        struct dentry *parent = nd->path.dentry;
                        nd->state &= ~ND_JUMPED;
                        if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
                                struct qstr this = { { .hash_len = hash_len }, .name = name };
                                err = parent->d_op->d_hash(parent, &this);
                                if (err < 0)
                                        return err;
                                hash_len = this.hash_len;
                                name = this.name;
                        }
                }

                nd->last.hash_len = hash_len;
                nd->last.name = name;
                nd->last_type = type;

                name += hashlen_len(hash_len);
                if (!*name)
                        goto OK;
                /*
                 * If it wasn't NUL, we know it was '/'. Skip that
                 * slash, and continue until no more slashes.
                 */
                do {
                        name++;
                } while (unlikely(*name == '/'));
                if (unlikely(!*name)) {
OK:
                        /* pathname or trailing symlink, done */
                        if (!depth) {
                                nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode);
                                nd->dir_mode = nd->inode->i_mode;
                                nd->flags &= ~LOOKUP_PARENT;
                                return 0;
                        }
                        /* last component of nested symlink */
                        name = nd->stack[--depth].name;
                        link = walk_component(nd, 0);
                } else {
                        /* not the last component */
                        link = walk_component(nd, WALK_MORE);
                }
                if (unlikely(link)) {
                        if (IS_ERR(link))
                                return PTR_ERR(link);
                        /* a symlink to follow */
                        nd->stack[depth++].name = name;
                        name = link;
                        continue;
                }
                if (unlikely(!d_can_lookup(nd->path.dentry))) {
                        if (nd->flags & LOOKUP_RCU) {
                                if (!try_to_unlazy(nd))
                                        return -ECHILD;
                        }
                        return -ENOTDIR;
                }
        }
}

/* must be paired with terminate_walk() */
static const char *path_init(struct nameidata *nd, unsigned flags)
{
        int error;
        const char *s = nd->name->name;

        /* LOOKUP_CACHED requires RCU, ask caller to retry */
        if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
                return ERR_PTR(-EAGAIN);

        if (!*s)
                flags &= ~LOOKUP_RCU;
        if (flags & LOOKUP_RCU)
                rcu_read_lock();
        else
                nd->seq = nd->next_seq = 0;

        nd->flags = flags;
        nd->state |= ND_JUMPED;

        nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
        nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
        smp_rmb();

        if (nd->state & ND_ROOT_PRESET) {
                struct dentry *root = nd->root.dentry;
                struct inode *inode = root->d_inode;
                if (*s && unlikely(!d_can_lookup(root)))
                        return ERR_PTR(-ENOTDIR);
                nd->path = nd->root;
                nd->inode = inode;
                if (flags & LOOKUP_RCU) {
                        nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
                        nd->root_seq = nd->seq;
                } else {
                        path_get(&nd->path);
                }
                return s;
        }

        nd->root.mnt = NULL;

        /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
        if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
                error = nd_jump_root(nd);
                if (unlikely(error))
                        return ERR_PTR(error);
                return s;
        }

        /* Relative pathname -- get the starting-point it is relative to. */
        if (nd->dfd == AT_FDCWD) {
                if (flags & LOOKUP_RCU) {
                        struct fs_struct *fs = current->fs;
                        unsigned seq;

                        do {
                                seq = read_seqcount_begin(&fs->seq);
                                nd->path = fs->pwd;
                                nd->inode = nd->path.dentry->d_inode;
                                nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
                        } while (read_seqcount_retry(&fs->seq, seq));
                } else {
                        get_fs_pwd(current->fs, &nd->path);
                        nd->inode = nd->path.dentry->d_inode;
                }
        } else {
                /* Caller must check execute permissions on the starting path component */
                struct fd f = fdget_raw(nd->dfd);
                struct dentry *dentry;

                if (!f.file)
                        return ERR_PTR(-EBADF);

                dentry = f.file->f_path.dentry;

                if (*s && unlikely(!d_can_lookup(dentry))) {
                        fdput(f);
                        return ERR_PTR(-ENOTDIR);
                }

                nd->path = f.file->f_path;
                if (flags & LOOKUP_RCU) {
                        nd->inode = nd->path.dentry->d_inode;
                        nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
                } else {
                        path_get(&nd->path);
                        nd->inode = nd->path.dentry->d_inode;
                }
                fdput(f);
        }

        /* For scoped-lookups we need to set the root to the dirfd as well. */
        if (flags & LOOKUP_IS_SCOPED) {
                nd->root = nd->path;
                if (flags & LOOKUP_RCU) {
                        nd->root_seq = nd->seq;
                } else {
                        path_get(&nd->root);
                        nd->state |= ND_ROOT_GRABBED;
                }
        }
        return s;
}

static inline const char *lookup_last(struct nameidata *nd)
{
        if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
                nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;

        return walk_component(nd, WALK_TRAILING);
}

static int handle_lookup_down(struct nameidata *nd)
{
        if (!(nd->flags & LOOKUP_RCU))
                dget(nd->path.dentry);
        nd->next_seq = nd->seq;
        return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
}

/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
{
        const char *s = path_init(nd, flags);
        int err;

        if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
                err = handle_lookup_down(nd);
                if (unlikely(err < 0))
                        s = ERR_PTR(err);
        }

        while (!(err = link_path_walk(s, nd)) &&
               (s = lookup_last(nd)) != NULL)
                ;
        if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
                err = handle_lookup_down(nd);
                nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please...
        }
        if (!err)
                err = complete_walk(nd);

        if (!err && nd->flags & LOOKUP_DIRECTORY)
                if (!d_can_lookup(nd->path.dentry))
                        err = -ENOTDIR;
        if (!err) {
                *path = nd->path;
                nd->path.mnt = NULL;
                nd->path.dentry = NULL;
        }
        terminate_walk(nd);
        return err;
}

int filename_lookup(int dfd, struct filename *name, unsigned flags,
                    struct path *path, struct path *root)
{
        int retval;
        struct nameidata nd;
        if (IS_ERR(name))
                return PTR_ERR(name);
        set_nameidata(&nd, dfd, name, root);
        retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
        if (unlikely(retval == -ECHILD))
                retval = path_lookupat(&nd, flags, path);
        if (unlikely(retval == -ESTALE))
                retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);

        if (likely(!retval))
                audit_inode(name, path->dentry,
                            flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
        restore_nameidata();
        return retval;
}

/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_parentat(struct nameidata *nd, unsigned flags,
                                struct path *parent)
{
        const char *s = path_init(nd, flags);
        int err = link_path_walk(s, nd);
        if (!err)
                err = complete_walk(nd);
        if (!err) {
                *parent = nd->path;
                nd->path.mnt = NULL;
                nd->path.dentry = NULL;
        }
        terminate_walk(nd);
        return err;
}

/* Note: this does not consume "name" */
static int __filename_parentat(int dfd, struct filename *name,
                               unsigned int flags, struct path *parent,
                               struct qstr *last, int *type,
                               const struct path *root)
{
        int retval;
        struct nameidata nd;

        if (IS_ERR(name))
                return PTR_ERR(name);
        set_nameidata(&nd, dfd, name, root);
        retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
        if (unlikely(retval == -ECHILD))
                retval = path_parentat(&nd, flags, parent);
        if (unlikely(retval == -ESTALE))
                retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
        if (likely(!retval)) {
                *last = nd.last;
                *type = nd.last_type;
                audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
        }
        restore_nameidata();
        return retval;
}

static int filename_parentat(int dfd, struct filename *name,
                             unsigned int flags, struct path *parent,
                             struct qstr *last, int *type)
{
        return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
}

/* does lookup, returns the object with parent locked */
static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path)
{
        struct dentry *d;
        struct qstr last;
        int type, error;

        error = filename_parentat(dfd, name, 0, path, &last, &type);
        if (error)
                return ERR_PTR(error);
        if (unlikely(type != LAST_NORM)) {
                path_put(path);
                return ERR_PTR(-EINVAL);
        }
        inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        d = lookup_one_qstr_excl(&last, path->dentry, 0);
        if (IS_ERR(d)) {
                inode_unlock(path->dentry->d_inode);
                path_put(path);
        }
        return d;
}

struct dentry *kern_path_locked(const char *name, struct path *path)
{
        struct filename *filename = getname_kernel(name);
        struct dentry *res = __kern_path_locked(AT_FDCWD, filename, path);

        putname(filename);
        return res;
}

struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path)
{
        struct filename *filename = getname(name);
        struct dentry *res = __kern_path_locked(dfd, filename, path);

        putname(filename);
        return res;
}
EXPORT_SYMBOL(user_path_locked_at);

int kern_path(const char *name, unsigned int flags, struct path *path)
{
        struct filename *filename = getname_kernel(name);
        int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);

        putname(filename);
        return ret;

}
EXPORT_SYMBOL(kern_path);

/**
 * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair
 * @filename: filename structure
 * @flags: lookup flags
 * @parent: pointer to struct path to fill
 * @last: last component
 * @type: type of the last component
 * @root: pointer to struct path of the base directory
 */
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
                           struct path *parent, struct qstr *last, int *type,
                           const struct path *root)
{
        return  __filename_parentat(AT_FDCWD, filename, flags, parent, last,
                                    type, root);
}
EXPORT_SYMBOL(vfs_path_parent_lookup);

/**
 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
 * @dentry:  pointer to dentry of the base directory
 * @mnt: pointer to vfs mount of the base directory
 * @name: pointer to file name
 * @flags: lookup flags
 * @path: pointer to struct path to fill
 */
int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
                    const char *name, unsigned int flags,
                    struct path *path)
{
        struct filename *filename;
        struct path root = {.mnt = mnt, .dentry = dentry};
        int ret;

        filename = getname_kernel(name);
        /* the first argument of filename_lookup() is ignored with root */
        ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
        putname(filename);
        return ret;
}
EXPORT_SYMBOL(vfs_path_lookup);

static int lookup_one_common(struct mnt_idmap *idmap,
                             const char *name, struct dentry *base, int len,
                             struct qstr *this)
{
        this->name = name;
        this->len = len;
        this->hash = full_name_hash(base, name, len);
        if (!len)
                return -EACCES;

        if (is_dot_dotdot(name, len))
                return -EACCES;

        while (len--) {
                unsigned int c = *(const unsigned char *)name++;
                if (c == '/' || c == '\0')
                        return -EACCES;
        }
        /*
         * See if the low-level filesystem might want
         * to use its own hash..
         */
        if (base->d_flags & DCACHE_OP_HASH) {
                int err = base->d_op->d_hash(base, this);
                if (err < 0)
                        return err;
        }

        return inode_permission(idmap, base->d_inode, MAY_EXEC);
}

/**
 * try_lookup_one_len - filesystem helper to lookup single pathname component
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * Look up a dentry by name in the dcache, returning NULL if it does not
 * currently exist.  The function does not try to create a dentry.
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
{
        struct qstr this;
        int err;

        WARN_ON_ONCE(!inode_is_locked(base->d_inode));

        err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
        if (err)
                return ERR_PTR(err);

        return lookup_dcache(&this, base, 0);
}
EXPORT_SYMBOL(try_lookup_one_len);

/**
 * lookup_one_len - filesystem helper to lookup single pathname component
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
        struct dentry *dentry;
        struct qstr this;
        int err;

        WARN_ON_ONCE(!inode_is_locked(base->d_inode));

        err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
        if (err)
                return ERR_PTR(err);

        dentry = lookup_dcache(&this, base, 0);
        return dentry ? dentry : __lookup_slow(&this, base, 0);
}
EXPORT_SYMBOL(lookup_one_len);

/**
 * lookup_one - filesystem helper to lookup single pathname component
 * @idmap:        idmap of the mount the lookup is performed from
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The caller must hold base->i_mutex.
 */
struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
                          struct dentry *base, int len)
{
        struct dentry *dentry;
        struct qstr this;
        int err;

        WARN_ON_ONCE(!inode_is_locked(base->d_inode));

        err = lookup_one_common(idmap, name, base, len, &this);
        if (err)
                return ERR_PTR(err);

        dentry = lookup_dcache(&this, base, 0);
        return dentry ? dentry : __lookup_slow(&this, base, 0);
}
EXPORT_SYMBOL(lookup_one);

/**
 * lookup_one_unlocked - filesystem helper to lookup single pathname component
 * @idmap:        idmap of the mount the lookup is performed from
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * Unlike lookup_one_len, it should be called without the parent
 * i_mutex held, and will take the i_mutex itself if necessary.
 */
struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
                                   const char *name, struct dentry *base,
                                   int len)
{
        struct qstr this;
        int err;
        struct dentry *ret;

        err = lookup_one_common(idmap, name, base, len, &this);
        if (err)
                return ERR_PTR(err);

        ret = lookup_dcache(&this, base, 0);
        if (!ret)
                ret = lookup_slow(&this, base, 0);
        return ret;
}
EXPORT_SYMBOL(lookup_one_unlocked);

/**
 * lookup_one_positive_unlocked - filesystem helper to lookup single
 *                                  pathname component
 * @idmap:        idmap of the mount the lookup is performed from
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
 * known positive or ERR_PTR(). This is what most of the users want.
 *
 * Note that pinned negative with unlocked parent _can_ become positive at any
 * time, so callers of lookup_one_unlocked() need to be very careful; pinned
 * positives have >d_inode stable, so this one avoids such problems.
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * The helper should be called without i_mutex held.
 */
struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
                                            const char *name,
                                            struct dentry *base, int len)
{
        struct dentry *ret = lookup_one_unlocked(idmap, name, base, len);

        if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
                dput(ret);
                ret = ERR_PTR(-ENOENT);
        }
        return ret;
}
EXPORT_SYMBOL(lookup_one_positive_unlocked);

/**
 * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
 * @name:        pathname component to lookup
 * @base:        base directory to lookup from
 * @len:        maximum length @len should be interpreted to
 *
 * Note that this routine is purely a helper for filesystem usage and should
 * not be called by generic code.
 *
 * Unlike lookup_one_len, it should be called without the parent
 * i_mutex held, and will take the i_mutex itself if necessary.
 */
struct dentry *lookup_one_len_unlocked(const char *name,
                                       struct dentry *base, int len)
{
        return lookup_one_unlocked(&nop_mnt_idmap, name, base, len);
}
EXPORT_SYMBOL(lookup_one_len_unlocked);

/*
 * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
 * on negatives.  Returns known positive or ERR_PTR(); that's what
 * most of the users want.  Note that pinned negative with unlocked parent
 * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
 * need to be very careful; pinned positives have ->d_inode stable, so
 * this one avoids such problems.
 */
struct dentry *lookup_positive_unlocked(const char *name,
                                       struct dentry *base, int len)
{
        return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len);
}
EXPORT_SYMBOL(lookup_positive_unlocked);

#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
        /* Find something mounted on "pts" in the same directory as
         * the input path.
         */
        struct dentry *parent = dget_parent(path->dentry);
        struct dentry *child;
        struct qstr this = QSTR_INIT("pts", 3);

        if (unlikely(!path_connected(path->mnt, parent))) {
                dput(parent);
                return -ENOENT;
        }
        dput(path->dentry);
        path->dentry = parent;
        child = d_hash_and_lookup(parent, &this);
        if (IS_ERR_OR_NULL(child))
                return -ENOENT;

        path->dentry = child;
        dput(parent);
        follow_down(path, 0);
        return 0;
}
#endif

int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                 struct path *path, int *empty)
{
        struct filename *filename = getname_flags(name, flags, empty);
        int ret = filename_lookup(dfd, filename, flags, path, NULL);

        putname(filename);
        return ret;
}
EXPORT_SYMBOL(user_path_at_empty);

int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
                   struct inode *inode)
{
        kuid_t fsuid = current_fsuid();

        if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid))
                return 0;
        if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid))
                return 0;
        return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER);
}
EXPORT_SYMBOL(__check_sticky);

/*
 *        Check whether we can remove a link victim from directory dir, check
 *  whether the type of victim is right.
 *  1. We can't do it if dir is read-only (done in permission())
 *  2. We should have write and exec permissions on dir
 *  3. We can't remove anything from append-only dir
 *  4. We can't do anything with immutable dir (done in permission())
 *  5. If the sticky bit on dir is set we should either
 *        a. be owner of dir, or
 *        b. be owner of victim, or
 *        c. have CAP_FOWNER capability
 *  6. If the victim is append-only or immutable we can't do antyhing with
 *     links pointing to it.
 *  7. If the victim has an unknown uid or gid we can't change the inode.
 *  8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 *  9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 * 10. We can't remove a root or mountpoint.
 * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */
static int may_delete(struct mnt_idmap *idmap, struct inode *dir,
                      struct dentry *victim, bool isdir)
{
        struct inode *inode = d_backing_inode(victim);
        int error;

        if (d_is_negative(victim))
                return -ENOENT;
        BUG_ON(!inode);

        BUG_ON(victim->d_parent->d_inode != dir);

        /* Inode writeback is not safe when the uid or gid are invalid. */
        if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
            !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
                return -EOVERFLOW;

        audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);

        error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
        if (IS_APPEND(dir))
                return -EPERM;

        if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) ||
            IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
            HAS_UNMAPPED_ID(idmap, inode))
                return -EPERM;
        if (isdir) {
                if (!d_is_dir(victim))
                        return -ENOTDIR;
                if (IS_ROOT(victim))
                        return -EBUSY;
        } else if (d_is_dir(victim))
                return -EISDIR;
        if (IS_DEADDIR(dir))
                return -ENOENT;
        if (victim->d_flags & DCACHE_NFSFS_RENAMED)
                return -EBUSY;
        return 0;
}

/*        Check whether we can create an object with dentry child in directory
 *  dir.
 *  1. We can't do it if child already exists (open has special treatment for
 *     this case, but since we are inlined it's OK)
 *  2. We can't do it if dir is read-only (done in permission())
 *  3. We can't do it if the fs can't represent the fsuid or fsgid.
 *  4. We should have write and exec permissions on dir
 *  5. We can't do it if dir is immutable (done in permission())
 */
static inline int may_create(struct mnt_idmap *idmap,
                             struct inode *dir, struct dentry *child)
{
        audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
        if (child->d_inode)
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
        if (!fsuidgid_has_mapping(dir->i_sb, idmap))
                return -EOVERFLOW;

        return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}

// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{
        struct dentry *p = p1, *q = p2, *r;

        while ((r = p->d_parent) != p2 && r != p)
                p = r;
        if (r == p2) {
                // p is a child of p2 and an ancestor of p1 or p1 itself
                inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
                inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
                return p;
        }
        // p is the root of connected component that contains p1
        // p2 does not occur on the path from p to p1
        while ((r = q->d_parent) != p1 && r != p && r != q)
                q = r;
        if (r == p1) {
                // q is a child of p1 and an ancestor of p2 or p2 itself
                inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
                return q;
        } else if (likely(r == p)) {
                // both p2 and p1 are descendents of p
                inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
                return NULL;
        } else { // no common ancestor at the time we'd been called
                mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
                return ERR_PTR(-EXDEV);
        }
}

/*
 * p1 and p2 should be directories on the same fs.
 */
struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
{
        if (p1 == p2) {
                inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
                return NULL;
        }

        mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
        return lock_two_directories(p1, p2);
}
EXPORT_SYMBOL(lock_rename);

/*
 * c1 and p2 should be on the same fs.
 */
struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
{
        if (READ_ONCE(c1->d_parent) == p2) {
                /*
                 * hopefully won't need to touch ->s_vfs_rename_mutex at all.
                 */
                inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
                /*
                 * now that p2 is locked, nobody can move in or out of it,
                 * so the test below is safe.
                 */
                if (likely(c1->d_parent == p2))
                        return NULL;

                /*
                 * c1 got moved out of p2 while we'd been taking locks;
                 * unlock and fall back to slow case.
                 */
                inode_unlock(p2->d_inode);
        }

        mutex_lock(&c1->d_sb->s_vfs_rename_mutex);
        /*
         * nobody can move out of any directories on this fs.
         */
        if (likely(c1->d_parent != p2))
                return lock_two_directories(c1->d_parent, p2);

        /*
         * c1 got moved into p2 while we were taking locks;
         * we need p2 locked and ->s_vfs_rename_mutex unlocked,
         * for consistency with lock_rename().
         */
        inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
        mutex_unlock(&c1->d_sb->s_vfs_rename_mutex);
        return NULL;
}
EXPORT_SYMBOL(lock_rename_child);

void unlock_rename(struct dentry *p1, struct dentry *p2)
{
        inode_unlock(p1->d_inode);
        if (p1 != p2) {
                inode_unlock(p2->d_inode);
                mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
        }
}
EXPORT_SYMBOL(unlock_rename);

/**
 * vfs_prepare_mode - prepare the mode to be used for a new inode
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        parent directory of the new inode
 * @mode:        mode of the new inode
 * @mask_perms:        allowed permission by the vfs
 * @type:        type of file to be created
 *
 * This helper consolidates and enforces vfs restrictions on the @mode of a new
 * object to be created.
 *
 * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
 * the kernel documentation for mode_strip_umask()). Moving umask stripping
 * after setgid stripping allows the same ordering for both non-POSIX ACL and
 * POSIX ACL supporting filesystems.
 *
 * Note that it's currently valid for @type to be 0 if a directory is created.
 * Filesystems raise that flag individually and we need to check whether each
 * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
 * non-zero type.
 *
 * Returns: mode to be passed to the filesystem
 */
static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
                                       const struct inode *dir, umode_t mode,
                                       umode_t mask_perms, umode_t type)
{
        mode = mode_strip_sgid(idmap, dir, mode);
        mode = mode_strip_umask(dir, mode);

        /*
         * Apply the vfs mandated allowed permission mask and set the type of
         * file to be created before we call into the filesystem.
         */
        mode &= (mask_perms & ~S_IFMT);
        mode |= (type & S_IFMT);

        return mode;
}

/**
 * vfs_create - create new file
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        inode of @dentry
 * @dentry:        pointer to dentry of the base directory
 * @mode:        mode of the new file
 * @want_excl:        whether the file must not yet exist
 *
 * Create a new file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, umode_t mode, bool want_excl)
{
        int error;

        error = may_create(idmap, dir, dentry);
        if (error)
                return error;

        if (!dir->i_op->create)
                return -EACCES;        /* shouldn't it be ENOSYS? */

        mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
        error = security_inode_create(dir, dentry, mode);
        if (error)
                return error;
        error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_create);

int vfs_mkobj(struct dentry *dentry, umode_t mode,
                int (*f)(struct dentry *, umode_t, void *),
                void *arg)
{
        struct inode *dir = dentry->d_parent->d_inode;
        int error = may_create(&nop_mnt_idmap, dir, dentry);
        if (error)
                return error;

        mode &= S_IALLUGO;
        mode |= S_IFREG;
        error = security_inode_create(dir, dentry, mode);
        if (error)
                return error;
        error = f(dentry, mode, arg);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_mkobj);

bool may_open_dev(const struct path *path)
{
        return !(path->mnt->mnt_flags & MNT_NODEV) &&
                !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
}

static int may_open(struct mnt_idmap *idmap, const struct path *path,
                    int acc_mode, int flag)
{
        struct dentry *dentry = path->dentry;
        struct inode *inode = dentry->d_inode;
        int error;

        if (!inode)
                return -ENOENT;

        switch (inode->i_mode & S_IFMT) {
        case S_IFLNK:
                return -ELOOP;
        case S_IFDIR:
                if (acc_mode & MAY_WRITE)
                        return -EISDIR;
                if (acc_mode & MAY_EXEC)
                        return -EACCES;
                break;
        case S_IFBLK:
        case S_IFCHR:
                if (!may_open_dev(path))
                        return -EACCES;
                fallthrough;
        case S_IFIFO:
        case S_IFSOCK:
                if (acc_mode & MAY_EXEC)
                        return -EACCES;
                flag &= ~O_TRUNC;
                break;
        case S_IFREG:
                if ((acc_mode & MAY_EXEC) && path_noexec(path))
                        return -EACCES;
                break;
        }

        error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
        if (error)
                return error;

        /*
         * An append-only file must be opened in append mode for writing.
         */
        if (IS_APPEND(inode)) {
                if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
                        return -EPERM;
                if (flag & O_TRUNC)
                        return -EPERM;
        }

        /* O_NOATIME can only be set by the owner or superuser */
        if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode))
                return -EPERM;

        return 0;
}

static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
{
        const struct path *path = &filp->f_path;
        struct inode *inode = path->dentry->d_inode;
        int error = get_write_access(inode);
        if (error)
                return error;

        error = security_file_truncate(filp);
        if (!error) {
                error = do_truncate(idmap, path->dentry, 0,
                                    ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
                                    filp);
        }
        put_write_access(inode);
        return error;
}

static inline int open_to_namei_flags(int flag)
{
        if ((flag & O_ACCMODE) == 3)
                flag--;
        return flag;
}

static int may_o_create(struct mnt_idmap *idmap,
                        const struct path *dir, struct dentry *dentry,
                        umode_t mode)
{
        int error = security_path_mknod(dir, dentry, mode, 0);
        if (error)
                return error;

        if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap))
                return -EOVERFLOW;

        error = inode_permission(idmap, dir->dentry->d_inode,
                                 MAY_WRITE | MAY_EXEC);
        if (error)
                return error;

        return security_inode_create(dir->dentry->d_inode, dentry, mode);
}

/*
 * Attempt to atomically look up, create and open a file from a negative
 * dentry.
 *
 * Returns 0 if successful.  The file will have been created and attached to
 * @file by the filesystem calling finish_open().
 *
 * If the file was looked up only or didn't need creating, FMODE_OPENED won't
 * be set.  The caller will need to perform the open themselves.  @path will
 * have been updated to point to the new dentry.  This may be negative.
 *
 * Returns an error code otherwise.
 */
static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
                                  struct file *file,
                                  int open_flag, umode_t mode)
{
        struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
        struct inode *dir =  nd->path.dentry->d_inode;
        int error;

        if (nd->flags & LOOKUP_DIRECTORY)
                open_flag |= O_DIRECTORY;

        file->f_path.dentry = DENTRY_NOT_SET;
        file->f_path.mnt = nd->path.mnt;
        error = dir->i_op->atomic_open(dir, dentry, file,
                                       open_to_namei_flags(open_flag), mode);
        d_lookup_done(dentry);
        if (!error) {
                if (file->f_mode & FMODE_OPENED) {
                        if (unlikely(dentry != file->f_path.dentry)) {
                                dput(dentry);
                                dentry = dget(file->f_path.dentry);
                        }
                } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
                        error = -EIO;
                } else {
                        if (file->f_path.dentry) {
                                dput(dentry);
                                dentry = file->f_path.dentry;
                        }
                        if (unlikely(d_is_negative(dentry)))
                                error = -ENOENT;
                }
        }
        if (error) {
                dput(dentry);
                dentry = ERR_PTR(error);
        }
        return dentry;
}

/*
 * Look up and maybe create and open the last component.
 *
 * Must be called with parent locked (exclusive in O_CREAT case).
 *
 * Returns 0 on success, that is, if
 *  the file was successfully atomically created (if necessary) and opened, or
 *  the file was not completely opened at this time, though lookups and
 *  creations were performed.
 * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
 * In the latter case dentry returned in @path might be negative if O_CREAT
 * hadn't been specified.
 *
 * An error code is returned on failure.
 */
static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
                                  const struct open_flags *op,
                                  bool got_write)
{
        struct mnt_idmap *idmap;
        struct dentry *dir = nd->path.dentry;
        struct inode *dir_inode = dir->d_inode;
        int open_flag = op->open_flag;
        struct dentry *dentry;
        int error, create_error = 0;
        umode_t mode = op->mode;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);

        if (unlikely(IS_DEADDIR(dir_inode)))
                return ERR_PTR(-ENOENT);

        file->f_mode &= ~FMODE_CREATED;
        dentry = d_lookup(dir, &nd->last);
        for (;;) {
                if (!dentry) {
                        dentry = d_alloc_parallel(dir, &nd->last, &wq);
                        if (IS_ERR(dentry))
                                return dentry;
                }
                if (d_in_lookup(dentry))
                        break;

                error = d_revalidate(dentry, nd->flags);
                if (likely(error > 0))
                        break;
                if (error)
                        goto out_dput;
                d_invalidate(dentry);
                dput(dentry);
                dentry = NULL;
        }
        if (dentry->d_inode) {
                /* Cached positive dentry: will open in f_op->open */
                return dentry;
        }

        /*
         * Checking write permission is tricky, bacuse we don't know if we are
         * going to actually need it: O_CREAT opens should work as long as the
         * file exists.  But checking existence breaks atomicity.  The trick is
         * to check access and if not granted clear O_CREAT from the flags.
         *
         * Another problem is returing the "right" error value (e.g. for an
         * O_EXCL open we want to return EEXIST not EROFS).
         */
        if (unlikely(!got_write))
                open_flag &= ~O_TRUNC;
        idmap = mnt_idmap(nd->path.mnt);
        if (open_flag & O_CREAT) {
                if (open_flag & O_EXCL)
                        open_flag &= ~O_TRUNC;
                mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
                if (likely(got_write))
                        create_error = may_o_create(idmap, &nd->path,
                                                    dentry, mode);
                else
                        create_error = -EROFS;
        }
        if (create_error)
                open_flag &= ~O_CREAT;
        if (dir_inode->i_op->atomic_open) {
                dentry = atomic_open(nd, dentry, file, open_flag, mode);
                if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
                        dentry = ERR_PTR(create_error);
                return dentry;
        }

        if (d_in_lookup(dentry)) {
                struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
                                                             nd->flags);
                d_lookup_done(dentry);
                if (unlikely(res)) {
                        if (IS_ERR(res)) {
                                error = PTR_ERR(res);
                                goto out_dput;
                        }
                        dput(dentry);
                        dentry = res;
                }
        }

        /* Negative dentry, just create the file */
        if (!dentry->d_inode && (open_flag & O_CREAT)) {
                file->f_mode |= FMODE_CREATED;
                audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
                if (!dir_inode->i_op->create) {
                        error = -EACCES;
                        goto out_dput;
                }

                error = dir_inode->i_op->create(idmap, dir_inode, dentry,
                                                mode, open_flag & O_EXCL);
                if (error)
                        goto out_dput;
        }
        if (unlikely(create_error) && !dentry->d_inode) {
                error = create_error;
                goto out_dput;
        }
        return dentry;

out_dput:
        dput(dentry);
        return ERR_PTR(error);
}

static const char *open_last_lookups(struct nameidata *nd,
                   struct file *file, const struct open_flags *op)
{
        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool got_write = false;
        struct dentry *dentry;
        const char *res;

        nd->flags |= op->intent;

        if (nd->last_type != LAST_NORM) {
                if (nd->depth)
                        put_link(nd);
                return handle_dots(nd, nd->last_type);
        }

        if (!(open_flag & O_CREAT)) {
                if (nd->last.name[nd->last.len])
                        nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
                /* we _can_ be in RCU mode here */
                dentry = lookup_fast(nd);
                if (IS_ERR(dentry))
                        return ERR_CAST(dentry);
                if (likely(dentry))
                        goto finish_lookup;

                if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
                        return ERR_PTR(-ECHILD);
        } else {
                /* create side of things */
                if (nd->flags & LOOKUP_RCU) {
                        if (!try_to_unlazy(nd))
                                return ERR_PTR(-ECHILD);
                }
                audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
                /* trailing slashes? */
                if (unlikely(nd->last.name[nd->last.len]))
                        return ERR_PTR(-EISDIR);
        }

        if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
                got_write = !mnt_want_write(nd->path.mnt);
                /*
                 * do _not_ fail yet - we might not need that or fail with
                 * a different error; let lookup_open() decide; we'll be
                 * dropping this one anyway.
                 */
        }
        if (open_flag & O_CREAT)
                inode_lock(dir->d_inode);
        else
                inode_lock_shared(dir->d_inode);
        dentry = lookup_open(nd, file, op, got_write);
        if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
                fsnotify_create(dir->d_inode, dentry);
        if (open_flag & O_CREAT)
                inode_unlock(dir->d_inode);
        else
                inode_unlock_shared(dir->d_inode);

        if (got_write)
                mnt_drop_write(nd->path.mnt);

        if (IS_ERR(dentry))
                return ERR_CAST(dentry);

        if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
                dput(nd->path.dentry);
                nd->path.dentry = dentry;
                return NULL;
        }

finish_lookup:
        if (nd->depth)
                put_link(nd);
        res = step_into(nd, WALK_TRAILING, dentry);
        if (unlikely(res))
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
        return res;
}

/*
 * Handle the last step of open()
 */
static int do_open(struct nameidata *nd,
                   struct file *file, const struct open_flags *op)
{
        struct mnt_idmap *idmap;
        int open_flag = op->open_flag;
        bool do_truncate;
        int acc_mode;
        int error;

        if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
                error = complete_walk(nd);
                if (error)
                        return error;
        }
        if (!(file->f_mode & FMODE_CREATED))
                audit_inode(nd->name, nd->path.dentry, 0);
        idmap = mnt_idmap(nd->path.mnt);
        if (open_flag & O_CREAT) {
                if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
                        return -EEXIST;
                if (d_is_dir(nd->path.dentry))
                        return -EISDIR;
                error = may_create_in_sticky(idmap, nd,
                                             d_backing_inode(nd->path.dentry));
                if (unlikely(error))
                        return error;
        }
        if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
                return -ENOTDIR;

        do_truncate = false;
        acc_mode = op->acc_mode;
        if (file->f_mode & FMODE_CREATED) {
                /* Don't check for write permission, don't truncate */
                open_flag &= ~O_TRUNC;
                acc_mode = 0;
        } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
                error = mnt_want_write(nd->path.mnt);
                if (error)
                        return error;
                do_truncate = true;
        }
        error = may_open(idmap, &nd->path, acc_mode, open_flag);
        if (!error && !(file->f_mode & FMODE_OPENED))
                error = vfs_open(&nd->path, file);
        if (!error)
                error = security_file_post_open(file, op->acc_mode);
        if (!error && do_truncate)
                error = handle_truncate(idmap, file);
        if (unlikely(error > 0)) {
                WARN_ON(1);
                error = -EINVAL;
        }
        if (do_truncate)
                mnt_drop_write(nd->path.mnt);
        return error;
}

/**
 * vfs_tmpfile - create tmpfile
 * @idmap:        idmap of the mount the inode was found from
 * @parentpath:        pointer to the path of the base directory
 * @file:        file descriptor of the new tmpfile
 * @mode:        mode of the new tmpfile
 *
 * Create a temporary file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
static int vfs_tmpfile(struct mnt_idmap *idmap,
                       const struct path *parentpath,
                       struct file *file, umode_t mode)
{
        struct dentry *child;
        struct inode *dir = d_inode(parentpath->dentry);
        struct inode *inode;
        int error;
        int open_flag = file->f_flags;

        /* we want directory to be writable */
        error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
        if (!dir->i_op->tmpfile)
                return -EOPNOTSUPP;
        child = d_alloc(parentpath->dentry, &slash_name);
        if (unlikely(!child))
                return -ENOMEM;
        file->f_path.mnt = parentpath->mnt;
        file->f_path.dentry = child;
        mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
        error = dir->i_op->tmpfile(idmap, dir, file, mode);
        dput(child);
        if (error)
                return error;
        /* Don't check for other permissions, the inode was just created */
        error = may_open(idmap, &file->f_path, 0, file->f_flags);
        if (error)
                return error;
        inode = file_inode(file);
        if (!(open_flag & O_EXCL)) {
                spin_lock(&inode->i_lock);
                inode->i_state |= I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
        security_inode_post_create_tmpfile(idmap, inode);
        return 0;
}

/**
 * kernel_tmpfile_open - open a tmpfile for kernel internal use
 * @idmap:        idmap of the mount the inode was found from
 * @parentpath:        path of the base directory
 * @mode:        mode of the new tmpfile
 * @open_flag:        flags
 * @cred:        credentials for open
 *
 * Create and open a temporary file.  The file is not accounted in nr_files,
 * hence this is only for kernel internal use, and must not be installed into
 * file tables or such.
 */
struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
                                 const struct path *parentpath,
                                 umode_t mode, int open_flag,
                                 const struct cred *cred)
{
        struct file *file;
        int error;

        file = alloc_empty_file_noaccount(open_flag, cred);
        if (IS_ERR(file))
                return file;

        error = vfs_tmpfile(idmap, parentpath, file, mode);
        if (error) {
                fput(file);
                file = ERR_PTR(error);
        }
        return file;
}
EXPORT_SYMBOL(kernel_tmpfile_open);

static int do_tmpfile(struct nameidata *nd, unsigned flags,
                const struct open_flags *op,
                struct file *file)
{
        struct path path;
        int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);

        if (unlikely(error))
                return error;
        error = mnt_want_write(path.mnt);
        if (unlikely(error))
                goto out;
        error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode);
        if (error)
                goto out2;
        audit_inode(nd->name, file->f_path.dentry, 0);
out2:
        mnt_drop_write(path.mnt);
out:
        path_put(&path);
        return error;
}

static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
{
        struct path path;
        int error = path_lookupat(nd, flags, &path);
        if (!error) {
                audit_inode(nd->name, path.dentry, 0);
                error = vfs_open(&path, file);
                path_put(&path);
        }
        return error;
}

static struct file *path_openat(struct nameidata *nd,
                        const struct open_flags *op, unsigned flags)
{
        struct file *file;
        int error;

        file = alloc_empty_file(op->open_flag, current_cred());
        if (IS_ERR(file))
                return file;

        if (unlikely(file->f_flags & __O_TMPFILE)) {
                error = do_tmpfile(nd, flags, op, file);
        } else if (unlikely(file->f_flags & O_PATH)) {
                error = do_o_path(nd, flags, file);
        } else {
                const char *s = path_init(nd, flags);
                while (!(error = link_path_walk(s, nd)) &&
                       (s = open_last_lookups(nd, file, op)) != NULL)
                        ;
                if (!error)
                        error = do_open(nd, file, op);
                terminate_walk(nd);
        }
        if (likely(!error)) {
                if (likely(file->f_mode & FMODE_OPENED))
                        return file;
                WARN_ON(1);
                error = -EINVAL;
        }
        fput(file);
        if (error == -EOPENSTALE) {
                if (flags & LOOKUP_RCU)
                        error = -ECHILD;
                else
                        error = -ESTALE;
        }
        return ERR_PTR(error);
}

struct file *do_filp_open(int dfd, struct filename *pathname,
                const struct open_flags *op)
{
        struct nameidata nd;
        int flags = op->lookup_flags;
        struct file *filp;

        set_nameidata(&nd, dfd, pathname, NULL);
        filp = path_openat(&nd, op, flags | LOOKUP_RCU);
        if (unlikely(filp == ERR_PTR(-ECHILD)))
                filp = path_openat(&nd, op, flags);
        if (unlikely(filp == ERR_PTR(-ESTALE)))
                filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
        restore_nameidata();
        return filp;
}

struct file *do_file_open_root(const struct path *root,
                const char *name, const struct open_flags *op)
{
        struct nameidata nd;
        struct file *file;
        struct filename *filename;
        int flags = op->lookup_flags;

        if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN)
                return ERR_PTR(-ELOOP);

        filename = getname_kernel(name);
        if (IS_ERR(filename))
                return ERR_CAST(filename);

        set_nameidata(&nd, -1, filename, root);
        file = path_openat(&nd, op, flags | LOOKUP_RCU);
        if (unlikely(file == ERR_PTR(-ECHILD)))
                file = path_openat(&nd, op, flags);
        if (unlikely(file == ERR_PTR(-ESTALE)))
                file = path_openat(&nd, op, flags | LOOKUP_REVAL);
        restore_nameidata();
        putname(filename);
        return file;
}

static struct dentry *filename_create(int dfd, struct filename *name,
                                      struct path *path, unsigned int lookup_flags)
{
        struct dentry *dentry = ERR_PTR(-EEXIST);
        struct qstr last;
        bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
        unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
        unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
        int type;
        int err2;
        int error;

        error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
        if (error)
                return ERR_PTR(error);

        /*
         * Yucky last component or no last component at all?
         * (foo/., foo/.., /////)
         */
        if (unlikely(type != LAST_NORM))
                goto out;

        /* don't fail immediately if it's r/o, at least try to report other errors */
        err2 = mnt_want_write(path->mnt);
        /*
         * Do the final lookup.  Suppress 'create' if there is a trailing
         * '/', and a directory wasn't requested.
         */
        if (last.name[last.len] && !want_dir)
                create_flags = 0;
        inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
        dentry = lookup_one_qstr_excl(&last, path->dentry,
                                      reval_flag | create_flags);
        if (IS_ERR(dentry))
                goto unlock;

        error = -EEXIST;
        if (d_is_positive(dentry))
                goto fail;

        /*
         * Special case - lookup gave negative, but... we had foo/bar/
         * From the vfs_mknod() POV we just have a negative dentry -
         * all is fine. Let's be bastards - you had / on the end, you've
         * been asking for (non-existent) directory. -ENOENT for you.
         */
        if (unlikely(!create_flags)) {
                error = -ENOENT;
                goto fail;
        }
        if (unlikely(err2)) {
                error = err2;
                goto fail;
        }
        return dentry;
fail:
        dput(dentry);
        dentry = ERR_PTR(error);
unlock:
        inode_unlock(path->dentry->d_inode);
        if (!err2)
                mnt_drop_write(path->mnt);
out:
        path_put(path);
        return dentry;
}

struct dentry *kern_path_create(int dfd, const char *pathname,
                                struct path *path, unsigned int lookup_flags)
{
        struct filename *filename = getname_kernel(pathname);
        struct dentry *res = filename_create(dfd, filename, path, lookup_flags);

        putname(filename);
        return res;
}
EXPORT_SYMBOL(kern_path_create);

void done_path_create(struct path *path, struct dentry *dentry)
{
        dput(dentry);
        inode_unlock(path->dentry->d_inode);
        mnt_drop_write(path->mnt);
        path_put(path);
}
EXPORT_SYMBOL(done_path_create);

inline struct dentry *user_path_create(int dfd, const char __user *pathname,
                                struct path *path, unsigned int lookup_flags)
{
        struct filename *filename = getname(pathname);
        struct dentry *res = filename_create(dfd, filename, path, lookup_flags);

        putname(filename);
        return res;
}
EXPORT_SYMBOL(user_path_create);

/**
 * vfs_mknod - create device node or file
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        inode of @dentry
 * @dentry:        pointer to dentry of the base directory
 * @mode:        mode of the new device node or file
 * @dev:        device number of device to create
 *
 * Create a device node or file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
              struct dentry *dentry, umode_t mode, dev_t dev)
{
        bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
        int error = may_create(idmap, dir, dentry);

        if (error)
                return error;

        if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
            !capable(CAP_MKNOD))
                return -EPERM;

        if (!dir->i_op->mknod)
                return -EPERM;

        mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
        error = devcgroup_inode_mknod(mode, dev);
        if (error)
                return error;

        error = security_inode_mknod(dir, dentry, mode, dev);
        if (error)
                return error;

        error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_mknod);

static int may_mknod(umode_t mode)
{
        switch (mode & S_IFMT) {
        case S_IFREG:
        case S_IFCHR:
        case S_IFBLK:
        case S_IFIFO:
        case S_IFSOCK:
        case 0: /* zero mode translates to S_IFREG */
                return 0;
        case S_IFDIR:
                return -EPERM;
        default:
                return -EINVAL;
        }
}

static int do_mknodat(int dfd, struct filename *name, umode_t mode,
                unsigned int dev)
{
        struct mnt_idmap *idmap;
        struct dentry *dentry;
        struct path path;
        int error;
        unsigned int lookup_flags = 0;

        error = may_mknod(mode);
        if (error)
                goto out1;
retry:
        dentry = filename_create(dfd, name, &path, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto out1;

        error = security_path_mknod(&path, dentry,
                        mode_strip_umask(path.dentry->d_inode, mode), dev);
        if (error)
                goto out2;

        idmap = mnt_idmap(path.mnt);
        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
                        error = vfs_create(idmap, path.dentry->d_inode,
                                           dentry, mode, true);
                        if (!error)
                                security_path_post_mknod(idmap, dentry);
                        break;
                case S_IFCHR: case S_IFBLK:
                        error = vfs_mknod(idmap, path.dentry->d_inode,
                                          dentry, mode, new_decode_dev(dev));
                        break;
                case S_IFIFO: case S_IFSOCK:
                        error = vfs_mknod(idmap, path.dentry->d_inode,
                                          dentry, mode, 0);
                        break;
        }
out2:
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out1:
        putname(name);
        return error;
}

SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                unsigned int, dev)
{
        return do_mknodat(dfd, getname(filename), mode, dev);
}

SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
        return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
}

/**
 * vfs_mkdir - create directory
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        inode of @dentry
 * @dentry:        pointer to dentry of the base directory
 * @mode:        mode of the new directory
 *
 * Create a directory.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
              struct dentry *dentry, umode_t mode)
{
        int error;
        unsigned max_links = dir->i_sb->s_max_links;

        error = may_create(idmap, dir, dentry);
        if (error)
                return error;

        if (!dir->i_op->mkdir)
                return -EPERM;

        mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
        error = security_inode_mkdir(dir, dentry, mode);
        if (error)
                return error;

        if (max_links && dir->i_nlink >= max_links)
                return -EMLINK;

        error = dir->i_op->mkdir(idmap, dir, dentry, mode);
        if (!error)
                fsnotify_mkdir(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_mkdir);

int do_mkdirat(int dfd, struct filename *name, umode_t mode)
{
        struct dentry *dentry;
        struct path path;
        int error;
        unsigned int lookup_flags = LOOKUP_DIRECTORY;

retry:
        dentry = filename_create(dfd, name, &path, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto out_putname;

        error = security_path_mkdir(&path, dentry,
                        mode_strip_umask(path.dentry->d_inode, mode));
        if (!error) {
                error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
                                  dentry, mode);
        }
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out_putname:
        putname(name);
        return error;
}

SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
        return do_mkdirat(dfd, getname(pathname), mode);
}

SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
        return do_mkdirat(AT_FDCWD, getname(pathname), mode);
}

/**
 * vfs_rmdir - remove directory
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        inode of @dentry
 * @dentry:        pointer to dentry of the base directory
 *
 * Remove a directory.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
                     struct dentry *dentry)
{
        int error = may_delete(idmap, dir, dentry, 1);

        if (error)
                return error;

        if (!dir->i_op->rmdir)
                return -EPERM;

        dget(dentry);
        inode_lock(dentry->d_inode);

        error = -EBUSY;
        if (is_local_mountpoint(dentry) ||
            (dentry->d_inode->i_flags & S_KERNEL_FILE))
                goto out;

        error = security_inode_rmdir(dir, dentry);
        if (error)
                goto out;

        error = dir->i_op->rmdir(dir, dentry);
        if (error)
                goto out;

        shrink_dcache_parent(dentry);
        dentry->d_inode->i_flags |= S_DEAD;
        dont_mount(dentry);
        detach_mounts(dentry);

out:
        inode_unlock(dentry->d_inode);
        dput(dentry);
        if (!error)
                d_delete_notify(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_rmdir);

int do_rmdir(int dfd, struct filename *name)
{
        int error;
        struct dentry *dentry;
        struct path path;
        struct qstr last;
        int type;
        unsigned int lookup_flags = 0;
retry:
        error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
        if (error)
                goto exit1;

        switch (type) {
        case LAST_DOTDOT:
                error = -ENOTEMPTY;
                goto exit2;
        case LAST_DOT:
                error = -EINVAL;
                goto exit2;
        case LAST_ROOT:
                error = -EBUSY;
                goto exit2;
        }

        error = mnt_want_write(path.mnt);
        if (error)
                goto exit2;

        inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto exit3;
        if (!dentry->d_inode) {
                error = -ENOENT;
                goto exit4;
        }
        error = security_path_rmdir(&path, dentry);
        if (error)
                goto exit4;
        error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
exit4:
        dput(dentry);
exit3:
        inode_unlock(path.dentry->d_inode);
        mnt_drop_write(path.mnt);
exit2:
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
exit1:
        putname(name);
        return error;
}

SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
{
        return do_rmdir(AT_FDCWD, getname(pathname));
}

/**
 * vfs_unlink - unlink a filesystem object
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        parent directory
 * @dentry:        victim
 * @delegated_inode: returns victim inode, if the inode is delegated.
 *
 * The caller must hold dir->i_mutex.
 *
 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
 * return a reference to the inode in delegated_inode.  The caller
 * should then break the delegation on that inode and retry.  Because
 * breaking a delegation may take a long time, the caller should drop
 * dir->i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, struct inode **delegated_inode)
{
        struct inode *target = dentry->d_inode;
        int error = may_delete(idmap, dir, dentry, 0);

        if (error)
                return error;

        if (!dir->i_op->unlink)
                return -EPERM;

        inode_lock(target);
        if (IS_SWAPFILE(target))
                error = -EPERM;
        else if (is_local_mountpoint(dentry))
                error = -EBUSY;
        else {
                error = security_inode_unlink(dir, dentry);
                if (!error) {
                        error = try_break_deleg(target, delegated_inode);
                        if (error)
                                goto out;
                        error = dir->i_op->unlink(dir, dentry);
                        if (!error) {
                                dont_mount(dentry);
                                detach_mounts(dentry);
                        }
                }
        }
out:
        inode_unlock(target);

        /* We don't d_delete() NFS sillyrenamed files--they still exist. */
        if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
                fsnotify_unlink(dir, dentry);
        } else if (!error) {
                fsnotify_link_count(target);
                d_delete_notify(dir, dentry);
        }

        return error;
}
EXPORT_SYMBOL(vfs_unlink);

/*
 * Make sure that the actual truncation of the file will occur outside its
 * directory's i_mutex.  Truncate can take a long time if there is a lot of
 * writeout happening, and we don't want to prevent access to the directory
 * while waiting on the I/O.
 */
int do_unlinkat(int dfd, struct filename *name)
{
        int error;
        struct dentry *dentry;
        struct path path;
        struct qstr last;
        int type;
        struct inode *inode = NULL;
        struct inode *delegated_inode = NULL;
        unsigned int lookup_flags = 0;
retry:
        error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
        if (error)
                goto exit1;

        error = -EISDIR;
        if (type != LAST_NORM)
                goto exit2;

        error = mnt_want_write(path.mnt);
        if (error)
                goto exit2;
retry_deleg:
        inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
        dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {

                /* Why not before? Because we want correct error value */
                if (last.name[last.len] || d_is_negative(dentry))
                        goto slashes;
                inode = dentry->d_inode;
                ihold(inode);
                error = security_path_unlink(&path, dentry);
                if (error)
                        goto exit3;
                error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
                                   dentry, &delegated_inode);
exit3:
                dput(dentry);
        }
        inode_unlock(path.dentry->d_inode);
        if (inode)
                iput(inode);        /* truncate the inode here */
        inode = NULL;
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
        mnt_drop_write(path.mnt);
exit2:
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                inode = NULL;
                goto retry;
        }
exit1:
        putname(name);
        return error;

slashes:
        if (d_is_negative(dentry))
                error = -ENOENT;
        else if (d_is_dir(dentry))
                error = -EISDIR;
        else
                error = -ENOTDIR;
        goto exit3;
}

SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
{
        if ((flag & ~AT_REMOVEDIR) != 0)
                return -EINVAL;

        if (flag & AT_REMOVEDIR)
                return do_rmdir(dfd, getname(pathname));
        return do_unlinkat(dfd, getname(pathname));
}

SYSCALL_DEFINE1(unlink, const char __user *, pathname)
{
        return do_unlinkat(AT_FDCWD, getname(pathname));
}

/**
 * vfs_symlink - create symlink
 * @idmap:        idmap of the mount the inode was found from
 * @dir:        inode of @dentry
 * @dentry:        pointer to dentry of the base directory
 * @oldname:        name of the file to link to
 *
 * Create a symlink.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
                struct dentry *dentry, const char *oldname)
{
        int error;

        error = may_create(idmap, dir, dentry);
        if (error)
                return error;

        if (!dir->i_op->symlink)
                return -EPERM;

        error = security_inode_symlink(dir, dentry, oldname);
        if (error)
                return error;

        error = dir->i_op->symlink(idmap, dir, dentry, oldname);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
}
EXPORT_SYMBOL(vfs_symlink);

int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
{
        int error;
        struct dentry *dentry;
        struct path path;
        unsigned int lookup_flags = 0;

        if (IS_ERR(from)) {
                error = PTR_ERR(from);
                goto out_putnames;
        }
retry:
        dentry = filename_create(newdfd, to, &path, lookup_flags);
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto out_putnames;

        error = security_path_symlink(&path, dentry, from->name);
        if (!error)
                error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
                                    dentry, from->name);
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out_putnames:
        putname(to);
        putname(from);
        return error;
}

SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
                int, newdfd, const char __user *, newname)
{
        return do_symlinkat(getname(oldname), newdfd, getname(newname));
}

SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
        return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
}

/**
 * vfs_link - create a new link
 * @old_dentry:        object to be linked
 * @idmap:        idmap of the mount
 * @dir:        new parent
 * @new_dentry:        where to create the new link
 * @delegated_inode: returns inode needing a delegation break
 *
 * The caller must hold dir->i_mutex
 *
 * If vfs_link discovers a delegation on the to-be-linked file in need
 * of breaking, it will return -EWOULDBLOCK and return a reference to the
 * inode in delegated_inode.  The caller should then break the delegation
 * and retry.  Because breaking a delegation may take a long time, the
 * caller should drop the i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
             struct inode *dir, struct dentry *new_dentry,
             struct inode **delegated_inode)
{
        struct inode *inode = old_dentry->d_inode;
        unsigned max_links = dir->i_sb->s_max_links;
        int error;

        if (!inode)
                return -ENOENT;

        error = may_create(idmap, dir, new_dentry);
        if (error)
                return error;

        if (dir->i_sb != inode->i_sb)
                return -EXDEV;

        /*
         * A link to an append-only or immutable file cannot be created.
         */
        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return -EPERM;
        /*
         * Updating the link count will likely cause i_uid and i_gid to
         * be writen back improperly if their true value is unknown to
         * the vfs.
         */
        if (HAS_UNMAPPED_ID(idmap, inode))
                return -EPERM;
        if (!dir->i_op->link)
                return -EPERM;
        if (S_ISDIR(inode->i_mode))
                return -EPERM;

        error = security_inode_link(old_dentry, dir, new_dentry);
        if (error)
                return error;

        inode_lock(inode);
        /* Make sure we don't allow creating hardlink to an unlinked file */
        if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
                error =  -ENOENT;
        else if (max_links && inode->i_nlink >= max_links)
                error = -EMLINK;
        else {
                error = try_break_deleg(inode, delegated_inode);
                if (!error)
                        error = dir->i_op->link(old_dentry, dir, new_dentry);
        }

        if (!error && (inode->i_state & I_LINKABLE)) {
                spin_lock(&inode->i_lock);
                inode->i_state &= ~I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
        inode_unlock(inode);
        if (!error)
                fsnotify_link(dir, inode, new_dentry);
        return error;
}
EXPORT_SYMBOL(vfs_link);

/*
 * Hardlinks are often used in delicate situations.  We avoid
 * security-related surprises by not following symlinks on the
 * newname.  --KAB
 *
 * We don't follow them on the oldname either to be compatible
 * with linux 2.0, and to avoid hard-linking to directories
 * and other special files.  --ADM
 */
int do_linkat(int olddfd, struct filename *old, int newdfd,
              struct filename *new, int flags)
{
        struct mnt_idmap *idmap;
        struct dentry *new_dentry;
        struct path old_path, new_path;
        struct inode *delegated_inode = NULL;
        int how = 0;
        int error;

        if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
                error = -EINVAL;
                goto out_putnames;
        }
        /*
         * To use null names we require CAP_DAC_READ_SEARCH
         * This ensures that not everyone will be able to create
         * handlink using the passed filedescriptor.
         */
        if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
                error = -ENOENT;
                goto out_putnames;
        }

        if (flags & AT_SYMLINK_FOLLOW)
                how |= LOOKUP_FOLLOW;
retry:
        error = filename_lookup(olddfd, old, how, &old_path, NULL);
        if (error)
                goto out_putnames;

        new_dentry = filename_create(newdfd, new, &new_path,
                                        (how & LOOKUP_REVAL));
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto out_putpath;

        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto out_dput;
        idmap = mnt_idmap(new_path.mnt);
        error = may_linkat(idmap, &old_path);
        if (unlikely(error))
                goto out_dput;
        error = security_path_link(old_path.dentry, &new_path, new_dentry);
        if (error)
                goto out_dput;
        error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
                         new_dentry, &delegated_inode);
out_dput:
        done_path_create(&new_path, new_dentry);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error) {
                        path_put(&old_path);
                        goto retry;
                }
        }
        if (retry_estale(error, how)) {
                path_put(&old_path);
                how |= LOOKUP_REVAL;
                goto retry;
        }
out_putpath:
        path_put(&old_path);
out_putnames:
        putname(old);
        putname(new);

        return error;
}

SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname, int, flags)
{
        return do_linkat(olddfd, getname_uflags(oldname, flags),
                newdfd, getname(newname), flags);
}

SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
        return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
}

/**
 * vfs_rename - rename a filesystem object
 * @rd:                pointer to &struct renamedata info
 *
 * The caller must hold multiple mutexes--see lock_rename()).
 *
 * If vfs_rename discovers a delegation in need of breaking at either
 * the source or destination, it will return -EWOULDBLOCK and return a
 * reference to the inode in delegated_inode.  The caller should then
 * break the delegation and retry.  Because breaking a delegation may
 * take a long time, the caller should drop all locks before doing
 * so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * The worst of all namespace operations - renaming directory. "Perverted"
 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
 * Problems:
 *
 *        a) we can get into loop creation.
 *        b) race potential - two innocent renames can create a loop together.
 *           That's where 4.4BSD screws up. Current fix: serialization on
 *           sb->s_vfs_rename_mutex. We might be more accurate, but that's another
 *           story.
 *        c) we may have to lock up to _four_ objects - parents and victim (if it exists),
 *           and source (if it's a non-directory or a subdirectory that moves to
 *           different parent).
 *           And that - after we got ->i_mutex on parents (until then we don't know
 *           whether the target exists).  Solution: try to be smart with locking
 *           order for inodes.  We rely on the fact that tree topology may change
 *           only under ->s_vfs_rename_mutex _and_ that parent of the object we
 *           move will be locked.  Thus we can rank directories by the tree
 *           (ancestors first) and rank all non-directories after them.
 *           That works since everybody except rename does "lock parent, lookup,
 *           lock child" and rename is under ->s_vfs_rename_mutex.
 *           HOWEVER, it relies on the assumption that any object with ->lookup()
 *           has no more than 1 dentry.  If "hybrid" objects will ever appear,
 *           we'd better make sure that there's no link(2) for them.
 *        d) conversion from fhandle to dentry may come in the wrong moment - when
 *           we are removing the target. Solution: we will have to grab ->i_mutex
 *           in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
 *           ->i_mutex on parents, which works but leads to some truly excessive
 *           locking].
 */
int vfs_rename(struct renamedata *rd)
{
        int error;
        struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
        struct dentry *old_dentry = rd->old_dentry;
        struct dentry *new_dentry = rd->new_dentry;
        struct inode **delegated_inode = rd->delegated_inode;
        unsigned int flags = rd->flags;
        bool is_dir = d_is_dir(old_dentry);
        struct inode *source = old_dentry->d_inode;
        struct inode *target = new_dentry->d_inode;
        bool new_is_dir = false;
        unsigned max_links = new_dir->i_sb->s_max_links;
        struct name_snapshot old_name;
        bool lock_old_subdir, lock_new_subdir;

        if (source == target)
                return 0;

        error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir);
        if (error)
                return error;

        if (!target) {
                error = may_create(rd->new_mnt_idmap, new_dir, new_dentry);
        } else {
                new_is_dir = d_is_dir(new_dentry);

                if (!(flags & RENAME_EXCHANGE))
                        error = may_delete(rd->new_mnt_idmap, new_dir,
                                           new_dentry, is_dir);
                else
                        error = may_delete(rd->new_mnt_idmap, new_dir,
                                           new_dentry, new_is_dir);
        }
        if (error)
                return error;

        if (!old_dir->i_op->rename)
                return -EPERM;

        /*
         * If we are going to change the parent - check write permissions,
         * we'll need to flip '..'.
         */
        if (new_dir != old_dir) {
                if (is_dir) {
                        error = inode_permission(rd->old_mnt_idmap, source,
                                                 MAY_WRITE);
                        if (error)
                                return error;
                }
                if ((flags & RENAME_EXCHANGE) && new_is_dir) {
                        error = inode_permission(rd->new_mnt_idmap, target,
                                                 MAY_WRITE);
                        if (error)
                                return error;
                }
        }

        error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
                                      flags);
        if (error)
                return error;

        take_dentry_name_snapshot(&old_name, old_dentry);
        dget(new_dentry);
        /*
         * Lock children.
         * The source subdirectory needs to be locked on cross-directory
         * rename or cross-directory exchange since its parent changes.
         * The target subdirectory needs to be locked on cross-directory
         * exchange due to parent change and on any rename due to becoming
         * a victim.
         * Non-directories need locking in all cases (for NFS reasons);
         * they get locked after any subdirectories (in inode address order).
         *
         * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
         * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
         */
        lock_old_subdir = new_dir != old_dir;
        lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
        if (is_dir) {
                if (lock_old_subdir)
                        inode_lock_nested(source, I_MUTEX_CHILD);
                if (target && (!new_is_dir || lock_new_subdir))
                        inode_lock(target);
        } else if (new_is_dir) {
                if (lock_new_subdir)
                        inode_lock_nested(target, I_MUTEX_CHILD);
                inode_lock(source);
        } else {
                lock_two_nondirectories(source, target);
        }

        error = -EPERM;
        if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
                goto out;

        error = -EBUSY;
        if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
                goto out;

        if (max_links && new_dir != old_dir) {
                error = -EMLINK;
                if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
                        goto out;
                if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
                    old_dir->i_nlink >= max_links)
                        goto out;
        }
        if (!is_dir) {
                error = try_break_deleg(source, delegated_inode);
                if (error)
                        goto out;
        }
        if (target && !new_is_dir) {
                error = try_break_deleg(target, delegated_inode);
                if (error)
                        goto out;
        }
        error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry,
                                      new_dir, new_dentry, flags);
        if (error)
                goto out;

        if (!(flags & RENAME_EXCHANGE) && target) {
                if (is_dir) {
                        shrink_dcache_parent(new_dentry);
                        target->i_flags |= S_DEAD;
                }
                dont_mount(new_dentry);
                detach_mounts(new_dentry);
        }
        if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
                if (!(flags & RENAME_EXCHANGE))
                        d_move(old_dentry, new_dentry);
                else
                        d_exchange(old_dentry, new_dentry);
        }
out:
        if (!is_dir || lock_old_subdir)
                inode_unlock(source);
        if (target && (!new_is_dir || lock_new_subdir))
                inode_unlock(target);
        dput(new_dentry);
        if (!error) {
                fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
                              !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
                if (flags & RENAME_EXCHANGE) {
                        fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
                                      new_is_dir, NULL, new_dentry);
                }
        }
        release_dentry_name_snapshot(&old_name);

        return error;
}
EXPORT_SYMBOL(vfs_rename);

int do_renameat2(int olddfd, struct filename *from, int newdfd,
                 struct filename *to, unsigned int flags)
{
        struct renamedata rd;
        struct dentry *old_dentry, *new_dentry;
        struct dentry *trap;
        struct path old_path, new_path;
        struct qstr old_last, new_last;
        int old_type, new_type;
        struct inode *delegated_inode = NULL;
        unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
        bool should_retry = false;
        int error = -EINVAL;

        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                goto put_names;

        if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
            (flags & RENAME_EXCHANGE))
                goto put_names;

        if (flags & RENAME_EXCHANGE)
                target_flags = 0;

retry:
        error = filename_parentat(olddfd, from, lookup_flags, &old_path,
                                  &old_last, &old_type);
        if (error)
                goto put_names;

        error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
                                  &new_type);
        if (error)
                goto exit1;

        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto exit2;

        error = -EBUSY;
        if (old_type != LAST_NORM)
                goto exit2;

        if (flags & RENAME_NOREPLACE)
                error = -EEXIST;
        if (new_type != LAST_NORM)
                goto exit2;

        error = mnt_want_write(old_path.mnt);
        if (error)
                goto exit2;

retry_deleg:
        trap = lock_rename(new_path.dentry, old_path.dentry);
        if (IS_ERR(trap)) {
                error = PTR_ERR(trap);
                goto exit_lock_rename;
        }

        old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
                                          lookup_flags);
        error = PTR_ERR(old_dentry);
        if (IS_ERR(old_dentry))
                goto exit3;
        /* source must exist */
        error = -ENOENT;
        if (d_is_negative(old_dentry))
                goto exit4;
        new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
                                          lookup_flags | target_flags);
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto exit4;
        error = -EEXIST;
        if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
                goto exit5;
        if (flags & RENAME_EXCHANGE) {
                error = -ENOENT;
                if (d_is_negative(new_dentry))
                        goto exit5;

                if (!d_is_dir(new_dentry)) {
                        error = -ENOTDIR;
                        if (new_last.name[new_last.len])
                                goto exit5;
                }
        }
        /* unless the source is a directory trailing slashes give -ENOTDIR */
        if (!d_is_dir(old_dentry)) {
                error = -ENOTDIR;
                if (old_last.name[old_last.len])
                        goto exit5;
                if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
                        goto exit5;
        }
        /* source should not be ancestor of target */
        error = -EINVAL;
        if (old_dentry == trap)
                goto exit5;
        /* target should not be an ancestor of source */
        if (!(flags & RENAME_EXCHANGE))
                error = -ENOTEMPTY;
        if (new_dentry == trap)
                goto exit5;

        error = security_path_rename(&old_path, old_dentry,
                                     &new_path, new_dentry, flags);
        if (error)
                goto exit5;

        rd.old_dir           = old_path.dentry->d_inode;
        rd.old_dentry           = old_dentry;
        rd.old_mnt_idmap   = mnt_idmap(old_path.mnt);
        rd.new_dir           = new_path.dentry->d_inode;
        rd.new_dentry           = new_dentry;
        rd.new_mnt_idmap   = mnt_idmap(new_path.mnt);
        rd.delegated_inode = &delegated_inode;
        rd.flags           = flags;
        error = vfs_rename(&rd);
exit5:
        dput(new_dentry);
exit4:
        dput(old_dentry);
exit3:
        unlock_rename(new_path.dentry, old_path.dentry);
exit_lock_rename:
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
        mnt_drop_write(old_path.mnt);
exit2:
        if (retry_estale(error, lookup_flags))
                should_retry = true;
        path_put(&new_path);
exit1:
        path_put(&old_path);
        if (should_retry) {
                should_retry = false;
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
put_names:
        putname(from);
        putname(to);
        return error;
}

SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname, unsigned int, flags)
{
        return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
                                flags);
}

SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname)
{
        return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
                                0);
}

SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{
        return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
                                getname(newname), 0);
}

int readlink_copy(char __user *buffer, int buflen, const char *link)
{
        int len = PTR_ERR(link);
        if (IS_ERR(link))
                goto out;

        len = strlen(link);
        if (len > (unsigned) buflen)
                len = buflen;
        if (copy_to_user(buffer, link, len))
                len = -EFAULT;
out:
        return len;
}

/**
 * vfs_readlink - copy symlink body into userspace buffer
 * @dentry: dentry on which to get symbolic link
 * @buffer: user memory pointer
 * @buflen: size of buffer
 *
 * Does not touch atime.  That's up to the caller if necessary
 *
 * Does not call security hook.
 */
int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
        struct inode *inode = d_inode(dentry);
        DEFINE_DELAYED_CALL(done);
        const char *link;
        int res;

        if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
                if (unlikely(inode->i_op->readlink))
                        return inode->i_op->readlink(dentry, buffer, buflen);

                if (!d_is_symlink(dentry))
                        return -EINVAL;

                spin_lock(&inode->i_lock);
                inode->i_opflags |= IOP_DEFAULT_READLINK;
                spin_unlock(&inode->i_lock);
        }

        link = READ_ONCE(inode->i_link);
        if (!link) {
                link = inode->i_op->get_link(dentry, inode, &done);
                if (IS_ERR(link))
                        return PTR_ERR(link);
        }
        res = readlink_copy(buffer, buflen, link);
        do_delayed_call(&done);
        return res;
}
EXPORT_SYMBOL(vfs_readlink);

/**
 * vfs_get_link - get symlink body
 * @dentry: dentry on which to get symbolic link
 * @done: caller needs to free returned data with this
 *
 * Calls security hook and i_op->get_link() on the supplied inode.
 *
 * It does not touch atime.  That's up to the caller if necessary.
 *
 * Does not work on "special" symlinks like /proc/$$/fd/N
 */
const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
{
        const char *res = ERR_PTR(-EINVAL);
        struct inode *inode = d_inode(dentry);

        if (d_is_symlink(dentry)) {
                res = ERR_PTR(security_inode_readlink(dentry));
                if (!res)
                        res = inode->i_op->get_link(dentry, inode, done);
        }
        return res;
}
EXPORT_SYMBOL(vfs_get_link);

/* get the link contents into pagecache */
const char *page_get_link(struct dentry *dentry, struct inode *inode,
                          struct delayed_call *callback)
{
        char *kaddr;
        struct page *page;
        struct address_space *mapping = inode->i_mapping;

        if (!dentry) {
                page = find_get_page(mapping, 0);
                if (!page)
                        return ERR_PTR(-ECHILD);
                if (!PageUptodate(page)) {
                        put_page(page);
                        return ERR_PTR(-ECHILD);
                }
        } else {
                page = read_mapping_page(mapping, 0, NULL);
                if (IS_ERR(page))
                        return (char*)page;
        }
        set_delayed_call(callback, page_put_link, page);
        BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
        kaddr = page_address(page);
        nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
        return kaddr;
}

EXPORT_SYMBOL(page_get_link);

void page_put_link(void *arg)
{
        put_page(arg);
}
EXPORT_SYMBOL(page_put_link);

int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
        DEFINE_DELAYED_CALL(done);
        int res = readlink_copy(buffer, buflen,
                                page_get_link(dentry, d_inode(dentry),
                                              &done));
        do_delayed_call(&done);
        return res;
}
EXPORT_SYMBOL(page_readlink);

int page_symlink(struct inode *inode, const char *symname, int len)
{
        struct address_space *mapping = inode->i_mapping;
        const struct address_space_operations *aops = mapping->a_ops;
        bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
        struct page *page;
        void *fsdata = NULL;
        int err;
        unsigned int flags;

retry:
        if (nofs)
                flags = memalloc_nofs_save();
        err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata);
        if (nofs)
                memalloc_nofs_restore(flags);
        if (err)
                goto fail;

        memcpy(page_address(page), symname, len-1);

        err = aops->write_end(NULL, mapping, 0, len-1, len-1,
                                                        page, fsdata);
        if (err < 0)
                goto fail;
        if (err < len-1)
                goto retry;

        mark_inode_dirty(inode);
        return 0;
fail:
        return err;
}
EXPORT_SYMBOL(page_symlink);

const struct inode_operations page_symlink_inode_operations = {
        .get_link        = page_get_link,
};
EXPORT_SYMBOL(page_symlink_inode_operations);













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 






    1 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2005-2011 Atheros Communications Inc.
 * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
 * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include <linux/skbuff.h>
#include <linux/ctype.h>

#include "core.h"
#include "htc.h"
#include "debug.h"
#include "wmi.h"
#include "wmi-tlv.h"
#include "mac.h"
#include "testmode.h"
#include "wmi-ops.h"
#include "p2p.h"
#include "hw.h"
#include "hif.h"
#include "txrx.h"

#define ATH10K_WMI_BARRIER_ECHO_ID 0xBA991E9
#define ATH10K_WMI_BARRIER_TIMEOUT_HZ (3 * HZ)
#define ATH10K_WMI_DFS_CONF_TIMEOUT_HZ (HZ / 6)

/* MAIN WMI cmd track */
static struct wmi_cmd_map wmi_cmd_map = {
        .init_cmdid = WMI_INIT_CMDID,
        .start_scan_cmdid = WMI_START_SCAN_CMDID,
        .stop_scan_cmdid = WMI_STOP_SCAN_CMDID,
        .scan_chan_list_cmdid = WMI_SCAN_CHAN_LIST_CMDID,
        .scan_sch_prio_tbl_cmdid = WMI_SCAN_SCH_PRIO_TBL_CMDID,
        .scan_prob_req_oui_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_regdomain_cmdid = WMI_PDEV_SET_REGDOMAIN_CMDID,
        .pdev_set_channel_cmdid = WMI_PDEV_SET_CHANNEL_CMDID,
        .pdev_set_param_cmdid = WMI_PDEV_SET_PARAM_CMDID,
        .pdev_pktlog_enable_cmdid = WMI_PDEV_PKTLOG_ENABLE_CMDID,
        .pdev_pktlog_disable_cmdid = WMI_PDEV_PKTLOG_DISABLE_CMDID,
        .pdev_set_wmm_params_cmdid = WMI_PDEV_SET_WMM_PARAMS_CMDID,
        .pdev_set_ht_cap_ie_cmdid = WMI_PDEV_SET_HT_CAP_IE_CMDID,
        .pdev_set_vht_cap_ie_cmdid = WMI_PDEV_SET_VHT_CAP_IE_CMDID,
        .pdev_set_dscp_tid_map_cmdid = WMI_PDEV_SET_DSCP_TID_MAP_CMDID,
        .pdev_set_quiet_mode_cmdid = WMI_PDEV_SET_QUIET_MODE_CMDID,
        .pdev_green_ap_ps_enable_cmdid = WMI_PDEV_GREEN_AP_PS_ENABLE_CMDID,
        .pdev_get_tpc_config_cmdid = WMI_PDEV_GET_TPC_CONFIG_CMDID,
        .pdev_set_base_macaddr_cmdid = WMI_PDEV_SET_BASE_MACADDR_CMDID,
        .vdev_create_cmdid = WMI_VDEV_CREATE_CMDID,
        .vdev_delete_cmdid = WMI_VDEV_DELETE_CMDID,
        .vdev_start_request_cmdid = WMI_VDEV_START_REQUEST_CMDID,
        .vdev_restart_request_cmdid = WMI_VDEV_RESTART_REQUEST_CMDID,
        .vdev_up_cmdid = WMI_VDEV_UP_CMDID,
        .vdev_stop_cmdid = WMI_VDEV_STOP_CMDID,
        .vdev_down_cmdid = WMI_VDEV_DOWN_CMDID,
        .vdev_set_param_cmdid = WMI_VDEV_SET_PARAM_CMDID,
        .vdev_install_key_cmdid = WMI_VDEV_INSTALL_KEY_CMDID,
        .peer_create_cmdid = WMI_PEER_CREATE_CMDID,
        .peer_delete_cmdid = WMI_PEER_DELETE_CMDID,
        .peer_flush_tids_cmdid = WMI_PEER_FLUSH_TIDS_CMDID,
        .peer_set_param_cmdid = WMI_PEER_SET_PARAM_CMDID,
        .peer_assoc_cmdid = WMI_PEER_ASSOC_CMDID,
        .peer_add_wds_entry_cmdid = WMI_PEER_ADD_WDS_ENTRY_CMDID,
        .peer_remove_wds_entry_cmdid = WMI_PEER_REMOVE_WDS_ENTRY_CMDID,
        .peer_mcast_group_cmdid = WMI_PEER_MCAST_GROUP_CMDID,
        .bcn_tx_cmdid = WMI_BCN_TX_CMDID,
        .pdev_send_bcn_cmdid = WMI_PDEV_SEND_BCN_CMDID,
        .bcn_tmpl_cmdid = WMI_BCN_TMPL_CMDID,
        .bcn_filter_rx_cmdid = WMI_BCN_FILTER_RX_CMDID,
        .prb_req_filter_rx_cmdid = WMI_PRB_REQ_FILTER_RX_CMDID,
        .mgmt_tx_cmdid = WMI_MGMT_TX_CMDID,
        .prb_tmpl_cmdid = WMI_PRB_TMPL_CMDID,
        .addba_clear_resp_cmdid = WMI_ADDBA_CLEAR_RESP_CMDID,
        .addba_send_cmdid = WMI_ADDBA_SEND_CMDID,
        .addba_status_cmdid = WMI_ADDBA_STATUS_CMDID,
        .delba_send_cmdid = WMI_DELBA_SEND_CMDID,
        .addba_set_resp_cmdid = WMI_ADDBA_SET_RESP_CMDID,
        .send_singleamsdu_cmdid = WMI_SEND_SINGLEAMSDU_CMDID,
        .sta_powersave_mode_cmdid = WMI_STA_POWERSAVE_MODE_CMDID,
        .sta_powersave_param_cmdid = WMI_STA_POWERSAVE_PARAM_CMDID,
        .sta_mimo_ps_mode_cmdid = WMI_STA_MIMO_PS_MODE_CMDID,
        .pdev_dfs_enable_cmdid = WMI_PDEV_DFS_ENABLE_CMDID,
        .pdev_dfs_disable_cmdid = WMI_PDEV_DFS_DISABLE_CMDID,
        .roam_scan_mode = WMI_ROAM_SCAN_MODE,
        .roam_scan_rssi_threshold = WMI_ROAM_SCAN_RSSI_THRESHOLD,
        .roam_scan_period = WMI_ROAM_SCAN_PERIOD,
        .roam_scan_rssi_change_threshold = WMI_ROAM_SCAN_RSSI_CHANGE_THRESHOLD,
        .roam_ap_profile = WMI_ROAM_AP_PROFILE,
        .ofl_scan_add_ap_profile = WMI_ROAM_AP_PROFILE,
        .ofl_scan_remove_ap_profile = WMI_OFL_SCAN_REMOVE_AP_PROFILE,
        .ofl_scan_period = WMI_OFL_SCAN_PERIOD,
        .p2p_dev_set_device_info = WMI_P2P_DEV_SET_DEVICE_INFO,
        .p2p_dev_set_discoverability = WMI_P2P_DEV_SET_DISCOVERABILITY,
        .p2p_go_set_beacon_ie = WMI_P2P_GO_SET_BEACON_IE,
        .p2p_go_set_probe_resp_ie = WMI_P2P_GO_SET_PROBE_RESP_IE,
        .p2p_set_vendor_ie_data_cmdid = WMI_P2P_SET_VENDOR_IE_DATA_CMDID,
        .ap_ps_peer_param_cmdid = WMI_AP_PS_PEER_PARAM_CMDID,
        .ap_ps_peer_uapsd_coex_cmdid = WMI_AP_PS_PEER_UAPSD_COEX_CMDID,
        .peer_rate_retry_sched_cmdid = WMI_PEER_RATE_RETRY_SCHED_CMDID,
        .wlan_profile_trigger_cmdid = WMI_WLAN_PROFILE_TRIGGER_CMDID,
        .wlan_profile_set_hist_intvl_cmdid =
                                WMI_WLAN_PROFILE_SET_HIST_INTVL_CMDID,
        .wlan_profile_get_profile_data_cmdid =
                                WMI_WLAN_PROFILE_GET_PROFILE_DATA_CMDID,
        .wlan_profile_enable_profile_id_cmdid =
                                WMI_WLAN_PROFILE_ENABLE_PROFILE_ID_CMDID,
        .wlan_profile_list_profile_id_cmdid =
                                WMI_WLAN_PROFILE_LIST_PROFILE_ID_CMDID,
        .pdev_suspend_cmdid = WMI_PDEV_SUSPEND_CMDID,
        .pdev_resume_cmdid = WMI_PDEV_RESUME_CMDID,
        .add_bcn_filter_cmdid = WMI_ADD_BCN_FILTER_CMDID,
        .rmv_bcn_filter_cmdid = WMI_RMV_BCN_FILTER_CMDID,
        .wow_add_wake_pattern_cmdid = WMI_WOW_ADD_WAKE_PATTERN_CMDID,
        .wow_del_wake_pattern_cmdid = WMI_WOW_DEL_WAKE_PATTERN_CMDID,
        .wow_enable_disable_wake_event_cmdid =
                                WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID,
        .wow_enable_cmdid = WMI_WOW_ENABLE_CMDID,
        .wow_hostwakeup_from_sleep_cmdid = WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID,
        .rtt_measreq_cmdid = WMI_RTT_MEASREQ_CMDID,
        .rtt_tsf_cmdid = WMI_RTT_TSF_CMDID,
        .vdev_spectral_scan_configure_cmdid =
                                WMI_VDEV_SPECTRAL_SCAN_CONFIGURE_CMDID,
        .vdev_spectral_scan_enable_cmdid = WMI_VDEV_SPECTRAL_SCAN_ENABLE_CMDID,
        .request_stats_cmdid = WMI_REQUEST_STATS_CMDID,
        .set_arp_ns_offload_cmdid = WMI_SET_ARP_NS_OFFLOAD_CMDID,
        .network_list_offload_config_cmdid =
                                WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID,
        .gtk_offload_cmdid = WMI_GTK_OFFLOAD_CMDID,
        .csa_offload_enable_cmdid = WMI_CSA_OFFLOAD_ENABLE_CMDID,
        .csa_offload_chanswitch_cmdid = WMI_CSA_OFFLOAD_CHANSWITCH_CMDID,
        .chatter_set_mode_cmdid = WMI_CHATTER_SET_MODE_CMDID,
        .peer_tid_addba_cmdid = WMI_PEER_TID_ADDBA_CMDID,
        .peer_tid_delba_cmdid = WMI_PEER_TID_DELBA_CMDID,
        .sta_dtim_ps_method_cmdid = WMI_STA_DTIM_PS_METHOD_CMDID,
        .sta_uapsd_auto_trig_cmdid = WMI_STA_UAPSD_AUTO_TRIG_CMDID,
        .sta_keepalive_cmd = WMI_STA_KEEPALIVE_CMD,
        .echo_cmdid = WMI_ECHO_CMDID,
        .pdev_utf_cmdid = WMI_PDEV_UTF_CMDID,
        .dbglog_cfg_cmdid = WMI_DBGLOG_CFG_CMDID,
        .pdev_qvit_cmdid = WMI_PDEV_QVIT_CMDID,
        .pdev_ftm_intg_cmdid = WMI_PDEV_FTM_INTG_CMDID,
        .vdev_set_keepalive_cmdid = WMI_VDEV_SET_KEEPALIVE_CMDID,
        .vdev_get_keepalive_cmdid = WMI_VDEV_GET_KEEPALIVE_CMDID,
        .force_fw_hang_cmdid = WMI_FORCE_FW_HANG_CMDID,
        .gpio_config_cmdid = WMI_GPIO_CONFIG_CMDID,
        .gpio_output_cmdid = WMI_GPIO_OUTPUT_CMDID,
        .pdev_get_temperature_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_enable_adaptive_cca_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_update_request_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_standby_response_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_resume_response_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_add_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_evict_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_restore_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_print_all_peers_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_update_wds_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_add_proxy_sta_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .rtt_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .oem_req_cmdid = WMI_CMD_UNSUPPORTED,
        .nan_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_ratemask_cmdid = WMI_CMD_UNSUPPORTED,
        .qboost_cfg_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_set_rx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_tx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_train_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_node_config_ops_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_antenna_switch_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_ctl_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_mimogain_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_chainmsk_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_fips_cmdid = WMI_CMD_UNSUPPORTED,
        .tt_set_conf_cmdid = WMI_CMD_UNSUPPORTED,
        .fwtest_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_cck_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_ofdm_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_reserve_ast_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_nfcal_power_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ast_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_dscp_tid_map_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_filter_neighbor_rx_packets_cmdid = WMI_CMD_UNSUPPORTED,
        .mu_cal_start_cmdid = WMI_CMD_UNSUPPORTED,
        .set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_bss_chan_info_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
        .radar_found_cmdid = WMI_CMD_UNSUPPORTED,
};

/* 10.X WMI cmd track */
static struct wmi_cmd_map wmi_10x_cmd_map = {
        .init_cmdid = WMI_10X_INIT_CMDID,
        .start_scan_cmdid = WMI_10X_START_SCAN_CMDID,
        .stop_scan_cmdid = WMI_10X_STOP_SCAN_CMDID,
        .scan_chan_list_cmdid = WMI_10X_SCAN_CHAN_LIST_CMDID,
        .scan_sch_prio_tbl_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_prob_req_oui_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_regdomain_cmdid = WMI_10X_PDEV_SET_REGDOMAIN_CMDID,
        .pdev_set_channel_cmdid = WMI_10X_PDEV_SET_CHANNEL_CMDID,
        .pdev_set_param_cmdid = WMI_10X_PDEV_SET_PARAM_CMDID,
        .pdev_pktlog_enable_cmdid = WMI_10X_PDEV_PKTLOG_ENABLE_CMDID,
        .pdev_pktlog_disable_cmdid = WMI_10X_PDEV_PKTLOG_DISABLE_CMDID,
        .pdev_set_wmm_params_cmdid = WMI_10X_PDEV_SET_WMM_PARAMS_CMDID,
        .pdev_set_ht_cap_ie_cmdid = WMI_10X_PDEV_SET_HT_CAP_IE_CMDID,
        .pdev_set_vht_cap_ie_cmdid = WMI_10X_PDEV_SET_VHT_CAP_IE_CMDID,
        .pdev_set_dscp_tid_map_cmdid = WMI_10X_PDEV_SET_DSCP_TID_MAP_CMDID,
        .pdev_set_quiet_mode_cmdid = WMI_10X_PDEV_SET_QUIET_MODE_CMDID,
        .pdev_green_ap_ps_enable_cmdid = WMI_10X_PDEV_GREEN_AP_PS_ENABLE_CMDID,
        .pdev_get_tpc_config_cmdid = WMI_10X_PDEV_GET_TPC_CONFIG_CMDID,
        .pdev_set_base_macaddr_cmdid = WMI_10X_PDEV_SET_BASE_MACADDR_CMDID,
        .vdev_create_cmdid = WMI_10X_VDEV_CREATE_CMDID,
        .vdev_delete_cmdid = WMI_10X_VDEV_DELETE_CMDID,
        .vdev_start_request_cmdid = WMI_10X_VDEV_START_REQUEST_CMDID,
        .vdev_restart_request_cmdid = WMI_10X_VDEV_RESTART_REQUEST_CMDID,
        .vdev_up_cmdid = WMI_10X_VDEV_UP_CMDID,
        .vdev_stop_cmdid = WMI_10X_VDEV_STOP_CMDID,
        .vdev_down_cmdid = WMI_10X_VDEV_DOWN_CMDID,
        .vdev_set_param_cmdid = WMI_10X_VDEV_SET_PARAM_CMDID,
        .vdev_install_key_cmdid = WMI_10X_VDEV_INSTALL_KEY_CMDID,
        .peer_create_cmdid = WMI_10X_PEER_CREATE_CMDID,
        .peer_delete_cmdid = WMI_10X_PEER_DELETE_CMDID,
        .peer_flush_tids_cmdid = WMI_10X_PEER_FLUSH_TIDS_CMDID,
        .peer_set_param_cmdid = WMI_10X_PEER_SET_PARAM_CMDID,
        .peer_assoc_cmdid = WMI_10X_PEER_ASSOC_CMDID,
        .peer_add_wds_entry_cmdid = WMI_10X_PEER_ADD_WDS_ENTRY_CMDID,
        .peer_remove_wds_entry_cmdid = WMI_10X_PEER_REMOVE_WDS_ENTRY_CMDID,
        .peer_mcast_group_cmdid = WMI_10X_PEER_MCAST_GROUP_CMDID,
        .bcn_tx_cmdid = WMI_10X_BCN_TX_CMDID,
        .pdev_send_bcn_cmdid = WMI_10X_PDEV_SEND_BCN_CMDID,
        .bcn_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .bcn_filter_rx_cmdid = WMI_10X_BCN_FILTER_RX_CMDID,
        .prb_req_filter_rx_cmdid = WMI_10X_PRB_REQ_FILTER_RX_CMDID,
        .mgmt_tx_cmdid = WMI_10X_MGMT_TX_CMDID,
        .prb_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .addba_clear_resp_cmdid = WMI_10X_ADDBA_CLEAR_RESP_CMDID,
        .addba_send_cmdid = WMI_10X_ADDBA_SEND_CMDID,
        .addba_status_cmdid = WMI_10X_ADDBA_STATUS_CMDID,
        .delba_send_cmdid = WMI_10X_DELBA_SEND_CMDID,
        .addba_set_resp_cmdid = WMI_10X_ADDBA_SET_RESP_CMDID,
        .send_singleamsdu_cmdid = WMI_10X_SEND_SINGLEAMSDU_CMDID,
        .sta_powersave_mode_cmdid = WMI_10X_STA_POWERSAVE_MODE_CMDID,
        .sta_powersave_param_cmdid = WMI_10X_STA_POWERSAVE_PARAM_CMDID,
        .sta_mimo_ps_mode_cmdid = WMI_10X_STA_MIMO_PS_MODE_CMDID,
        .pdev_dfs_enable_cmdid = WMI_10X_PDEV_DFS_ENABLE_CMDID,
        .pdev_dfs_disable_cmdid = WMI_10X_PDEV_DFS_DISABLE_CMDID,
        .roam_scan_mode = WMI_10X_ROAM_SCAN_MODE,
        .roam_scan_rssi_threshold = WMI_10X_ROAM_SCAN_RSSI_THRESHOLD,
        .roam_scan_period = WMI_10X_ROAM_SCAN_PERIOD,
        .roam_scan_rssi_change_threshold =
                                WMI_10X_ROAM_SCAN_RSSI_CHANGE_THRESHOLD,
        .roam_ap_profile = WMI_10X_ROAM_AP_PROFILE,
        .ofl_scan_add_ap_profile = WMI_10X_OFL_SCAN_ADD_AP_PROFILE,
        .ofl_scan_remove_ap_profile = WMI_10X_OFL_SCAN_REMOVE_AP_PROFILE,
        .ofl_scan_period = WMI_10X_OFL_SCAN_PERIOD,
        .p2p_dev_set_device_info = WMI_10X_P2P_DEV_SET_DEVICE_INFO,
        .p2p_dev_set_discoverability = WMI_10X_P2P_DEV_SET_DISCOVERABILITY,
        .p2p_go_set_beacon_ie = WMI_10X_P2P_GO_SET_BEACON_IE,
        .p2p_go_set_probe_resp_ie = WMI_10X_P2P_GO_SET_PROBE_RESP_IE,
        .p2p_set_vendor_ie_data_cmdid = WMI_CMD_UNSUPPORTED,
        .ap_ps_peer_param_cmdid = WMI_10X_AP_PS_PEER_PARAM_CMDID,
        .ap_ps_peer_uapsd_coex_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_rate_retry_sched_cmdid = WMI_10X_PEER_RATE_RETRY_SCHED_CMDID,
        .wlan_profile_trigger_cmdid = WMI_10X_WLAN_PROFILE_TRIGGER_CMDID,
        .wlan_profile_set_hist_intvl_cmdid =
                                WMI_10X_WLAN_PROFILE_SET_HIST_INTVL_CMDID,
        .wlan_profile_get_profile_data_cmdid =
                                WMI_10X_WLAN_PROFILE_GET_PROFILE_DATA_CMDID,
        .wlan_profile_enable_profile_id_cmdid =
                                WMI_10X_WLAN_PROFILE_ENABLE_PROFILE_ID_CMDID,
        .wlan_profile_list_profile_id_cmdid =
                                WMI_10X_WLAN_PROFILE_LIST_PROFILE_ID_CMDID,
        .pdev_suspend_cmdid = WMI_10X_PDEV_SUSPEND_CMDID,
        .pdev_resume_cmdid = WMI_10X_PDEV_RESUME_CMDID,
        .add_bcn_filter_cmdid = WMI_10X_ADD_BCN_FILTER_CMDID,
        .rmv_bcn_filter_cmdid = WMI_10X_RMV_BCN_FILTER_CMDID,
        .wow_add_wake_pattern_cmdid = WMI_10X_WOW_ADD_WAKE_PATTERN_CMDID,
        .wow_del_wake_pattern_cmdid = WMI_10X_WOW_DEL_WAKE_PATTERN_CMDID,
        .wow_enable_disable_wake_event_cmdid =
                                WMI_10X_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID,
        .wow_enable_cmdid = WMI_10X_WOW_ENABLE_CMDID,
        .wow_hostwakeup_from_sleep_cmdid =
                                WMI_10X_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID,
        .rtt_measreq_cmdid = WMI_10X_RTT_MEASREQ_CMDID,
        .rtt_tsf_cmdid = WMI_10X_RTT_TSF_CMDID,
        .vdev_spectral_scan_configure_cmdid =
                                WMI_10X_VDEV_SPECTRAL_SCAN_CONFIGURE_CMDID,
        .vdev_spectral_scan_enable_cmdid =
                                WMI_10X_VDEV_SPECTRAL_SCAN_ENABLE_CMDID,
        .request_stats_cmdid = WMI_10X_REQUEST_STATS_CMDID,
        .set_arp_ns_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .network_list_offload_config_cmdid = WMI_CMD_UNSUPPORTED,
        .gtk_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_chanswitch_cmdid = WMI_CMD_UNSUPPORTED,
        .chatter_set_mode_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_addba_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_delba_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_dtim_ps_method_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_uapsd_auto_trig_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_keepalive_cmd = WMI_CMD_UNSUPPORTED,
        .echo_cmdid = WMI_10X_ECHO_CMDID,
        .pdev_utf_cmdid = WMI_10X_PDEV_UTF_CMDID,
        .dbglog_cfg_cmdid = WMI_10X_DBGLOG_CFG_CMDID,
        .pdev_qvit_cmdid = WMI_10X_PDEV_QVIT_CMDID,
        .pdev_ftm_intg_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .force_fw_hang_cmdid = WMI_CMD_UNSUPPORTED,
        .gpio_config_cmdid = WMI_10X_GPIO_CONFIG_CMDID,
        .gpio_output_cmdid = WMI_10X_GPIO_OUTPUT_CMDID,
        .pdev_get_temperature_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_enable_adaptive_cca_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_update_request_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_standby_response_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_resume_response_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_add_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_evict_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_restore_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_print_all_peers_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_update_wds_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_add_proxy_sta_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .rtt_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .oem_req_cmdid = WMI_CMD_UNSUPPORTED,
        .nan_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_ratemask_cmdid = WMI_CMD_UNSUPPORTED,
        .qboost_cfg_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_set_rx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_tx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_train_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_node_config_ops_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_antenna_switch_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_ctl_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_mimogain_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_chainmsk_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_fips_cmdid = WMI_CMD_UNSUPPORTED,
        .tt_set_conf_cmdid = WMI_CMD_UNSUPPORTED,
        .fwtest_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_cck_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_ofdm_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_reserve_ast_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_nfcal_power_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ast_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_dscp_tid_map_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_filter_neighbor_rx_packets_cmdid = WMI_CMD_UNSUPPORTED,
        .mu_cal_start_cmdid = WMI_CMD_UNSUPPORTED,
        .set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_bss_chan_info_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
        .radar_found_cmdid = WMI_CMD_UNSUPPORTED,
};

/* 10.2.4 WMI cmd track */
static struct wmi_cmd_map wmi_10_2_4_cmd_map = {
        .init_cmdid = WMI_10_2_INIT_CMDID,
        .start_scan_cmdid = WMI_10_2_START_SCAN_CMDID,
        .stop_scan_cmdid = WMI_10_2_STOP_SCAN_CMDID,
        .scan_chan_list_cmdid = WMI_10_2_SCAN_CHAN_LIST_CMDID,
        .scan_sch_prio_tbl_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_prob_req_oui_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_regdomain_cmdid = WMI_10_2_PDEV_SET_REGDOMAIN_CMDID,
        .pdev_set_channel_cmdid = WMI_10_2_PDEV_SET_CHANNEL_CMDID,
        .pdev_set_param_cmdid = WMI_10_2_PDEV_SET_PARAM_CMDID,
        .pdev_pktlog_enable_cmdid = WMI_10_2_PDEV_PKTLOG_ENABLE_CMDID,
        .pdev_pktlog_disable_cmdid = WMI_10_2_PDEV_PKTLOG_DISABLE_CMDID,
        .pdev_set_wmm_params_cmdid = WMI_10_2_PDEV_SET_WMM_PARAMS_CMDID,
        .pdev_set_ht_cap_ie_cmdid = WMI_10_2_PDEV_SET_HT_CAP_IE_CMDID,
        .pdev_set_vht_cap_ie_cmdid = WMI_10_2_PDEV_SET_VHT_CAP_IE_CMDID,
        .pdev_set_quiet_mode_cmdid = WMI_10_2_PDEV_SET_QUIET_MODE_CMDID,
        .pdev_green_ap_ps_enable_cmdid = WMI_10_2_PDEV_GREEN_AP_PS_ENABLE_CMDID,
        .pdev_get_tpc_config_cmdid = WMI_10_2_PDEV_GET_TPC_CONFIG_CMDID,
        .pdev_set_base_macaddr_cmdid = WMI_10_2_PDEV_SET_BASE_MACADDR_CMDID,
        .vdev_create_cmdid = WMI_10_2_VDEV_CREATE_CMDID,
        .vdev_delete_cmdid = WMI_10_2_VDEV_DELETE_CMDID,
        .vdev_start_request_cmdid = WMI_10_2_VDEV_START_REQUEST_CMDID,
        .vdev_restart_request_cmdid = WMI_10_2_VDEV_RESTART_REQUEST_CMDID,
        .vdev_up_cmdid = WMI_10_2_VDEV_UP_CMDID,
        .vdev_stop_cmdid = WMI_10_2_VDEV_STOP_CMDID,
        .vdev_down_cmdid = WMI_10_2_VDEV_DOWN_CMDID,
        .vdev_set_param_cmdid = WMI_10_2_VDEV_SET_PARAM_CMDID,
        .vdev_install_key_cmdid = WMI_10_2_VDEV_INSTALL_KEY_CMDID,
        .peer_create_cmdid = WMI_10_2_PEER_CREATE_CMDID,
        .peer_delete_cmdid = WMI_10_2_PEER_DELETE_CMDID,
        .peer_flush_tids_cmdid = WMI_10_2_PEER_FLUSH_TIDS_CMDID,
        .peer_set_param_cmdid = WMI_10_2_PEER_SET_PARAM_CMDID,
        .peer_assoc_cmdid = WMI_10_2_PEER_ASSOC_CMDID,
        .peer_add_wds_entry_cmdid = WMI_10_2_PEER_ADD_WDS_ENTRY_CMDID,
        .peer_remove_wds_entry_cmdid = WMI_10_2_PEER_REMOVE_WDS_ENTRY_CMDID,
        .peer_mcast_group_cmdid = WMI_10_2_PEER_MCAST_GROUP_CMDID,
        .bcn_tx_cmdid = WMI_10_2_BCN_TX_CMDID,
        .pdev_send_bcn_cmdid = WMI_10_2_PDEV_SEND_BCN_CMDID,
        .bcn_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .bcn_filter_rx_cmdid = WMI_10_2_BCN_FILTER_RX_CMDID,
        .prb_req_filter_rx_cmdid = WMI_10_2_PRB_REQ_FILTER_RX_CMDID,
        .mgmt_tx_cmdid = WMI_10_2_MGMT_TX_CMDID,
        .prb_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .addba_clear_resp_cmdid = WMI_10_2_ADDBA_CLEAR_RESP_CMDID,
        .addba_send_cmdid = WMI_10_2_ADDBA_SEND_CMDID,
        .addba_status_cmdid = WMI_10_2_ADDBA_STATUS_CMDID,
        .delba_send_cmdid = WMI_10_2_DELBA_SEND_CMDID,
        .addba_set_resp_cmdid = WMI_10_2_ADDBA_SET_RESP_CMDID,
        .send_singleamsdu_cmdid = WMI_10_2_SEND_SINGLEAMSDU_CMDID,
        .sta_powersave_mode_cmdid = WMI_10_2_STA_POWERSAVE_MODE_CMDID,
        .sta_powersave_param_cmdid = WMI_10_2_STA_POWERSAVE_PARAM_CMDID,
        .sta_mimo_ps_mode_cmdid = WMI_10_2_STA_MIMO_PS_MODE_CMDID,
        .pdev_dfs_enable_cmdid = WMI_10_2_PDEV_DFS_ENABLE_CMDID,
        .pdev_dfs_disable_cmdid = WMI_10_2_PDEV_DFS_DISABLE_CMDID,
        .roam_scan_mode = WMI_10_2_ROAM_SCAN_MODE,
        .roam_scan_rssi_threshold = WMI_10_2_ROAM_SCAN_RSSI_THRESHOLD,
        .roam_scan_period = WMI_10_2_ROAM_SCAN_PERIOD,
        .roam_scan_rssi_change_threshold =
                                WMI_10_2_ROAM_SCAN_RSSI_CHANGE_THRESHOLD,
        .roam_ap_profile = WMI_10_2_ROAM_AP_PROFILE,
        .ofl_scan_add_ap_profile = WMI_10_2_OFL_SCAN_ADD_AP_PROFILE,
        .ofl_scan_remove_ap_profile = WMI_10_2_OFL_SCAN_REMOVE_AP_PROFILE,
        .ofl_scan_period = WMI_10_2_OFL_SCAN_PERIOD,
        .p2p_dev_set_device_info = WMI_10_2_P2P_DEV_SET_DEVICE_INFO,
        .p2p_dev_set_discoverability = WMI_10_2_P2P_DEV_SET_DISCOVERABILITY,
        .p2p_go_set_beacon_ie = WMI_10_2_P2P_GO_SET_BEACON_IE,
        .p2p_go_set_probe_resp_ie = WMI_10_2_P2P_GO_SET_PROBE_RESP_IE,
        .p2p_set_vendor_ie_data_cmdid = WMI_CMD_UNSUPPORTED,
        .ap_ps_peer_param_cmdid = WMI_10_2_AP_PS_PEER_PARAM_CMDID,
        .ap_ps_peer_uapsd_coex_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_rate_retry_sched_cmdid = WMI_10_2_PEER_RATE_RETRY_SCHED_CMDID,
        .wlan_profile_trigger_cmdid = WMI_10_2_WLAN_PROFILE_TRIGGER_CMDID,
        .wlan_profile_set_hist_intvl_cmdid =
                                WMI_10_2_WLAN_PROFILE_SET_HIST_INTVL_CMDID,
        .wlan_profile_get_profile_data_cmdid =
                                WMI_10_2_WLAN_PROFILE_GET_PROFILE_DATA_CMDID,
        .wlan_profile_enable_profile_id_cmdid =
                                WMI_10_2_WLAN_PROFILE_ENABLE_PROFILE_ID_CMDID,
        .wlan_profile_list_profile_id_cmdid =
                                WMI_10_2_WLAN_PROFILE_LIST_PROFILE_ID_CMDID,
        .pdev_suspend_cmdid = WMI_10_2_PDEV_SUSPEND_CMDID,
        .pdev_resume_cmdid = WMI_10_2_PDEV_RESUME_CMDID,
        .add_bcn_filter_cmdid = WMI_10_2_ADD_BCN_FILTER_CMDID,
        .rmv_bcn_filter_cmdid = WMI_10_2_RMV_BCN_FILTER_CMDID,
        .wow_add_wake_pattern_cmdid = WMI_10_2_WOW_ADD_WAKE_PATTERN_CMDID,
        .wow_del_wake_pattern_cmdid = WMI_10_2_WOW_DEL_WAKE_PATTERN_CMDID,
        .wow_enable_disable_wake_event_cmdid =
                                WMI_10_2_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID,
        .wow_enable_cmdid = WMI_10_2_WOW_ENABLE_CMDID,
        .wow_hostwakeup_from_sleep_cmdid =
                                WMI_10_2_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID,
        .rtt_measreq_cmdid = WMI_10_2_RTT_MEASREQ_CMDID,
        .rtt_tsf_cmdid = WMI_10_2_RTT_TSF_CMDID,
        .vdev_spectral_scan_configure_cmdid =
                                WMI_10_2_VDEV_SPECTRAL_SCAN_CONFIGURE_CMDID,
        .vdev_spectral_scan_enable_cmdid =
                                WMI_10_2_VDEV_SPECTRAL_SCAN_ENABLE_CMDID,
        .request_stats_cmdid = WMI_10_2_REQUEST_STATS_CMDID,
        .set_arp_ns_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .network_list_offload_config_cmdid = WMI_CMD_UNSUPPORTED,
        .gtk_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_chanswitch_cmdid = WMI_CMD_UNSUPPORTED,
        .chatter_set_mode_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_addba_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_delba_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_dtim_ps_method_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_uapsd_auto_trig_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_keepalive_cmd = WMI_CMD_UNSUPPORTED,
        .echo_cmdid = WMI_10_2_ECHO_CMDID,
        .pdev_utf_cmdid = WMI_10_2_PDEV_UTF_CMDID,
        .dbglog_cfg_cmdid = WMI_10_2_DBGLOG_CFG_CMDID,
        .pdev_qvit_cmdid = WMI_10_2_PDEV_QVIT_CMDID,
        .pdev_ftm_intg_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .force_fw_hang_cmdid = WMI_CMD_UNSUPPORTED,
        .gpio_config_cmdid = WMI_10_2_GPIO_CONFIG_CMDID,
        .gpio_output_cmdid = WMI_10_2_GPIO_OUTPUT_CMDID,
        .pdev_get_temperature_cmdid = WMI_10_2_PDEV_GET_TEMPERATURE_CMDID,
        .pdev_enable_adaptive_cca_cmdid = WMI_10_2_SET_CCA_PARAMS,
        .scan_update_request_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_standby_response_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_resume_response_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_add_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_evict_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_restore_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_print_all_peers_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_update_wds_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_add_proxy_sta_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .rtt_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .oem_req_cmdid = WMI_CMD_UNSUPPORTED,
        .nan_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_ratemask_cmdid = WMI_CMD_UNSUPPORTED,
        .qboost_cfg_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_set_rx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_tx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_train_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_node_config_ops_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_antenna_switch_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_ctl_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_mimogain_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_chainmsk_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_fips_cmdid = WMI_CMD_UNSUPPORTED,
        .tt_set_conf_cmdid = WMI_CMD_UNSUPPORTED,
        .fwtest_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_cck_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_ofdm_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_reserve_ast_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_nfcal_power_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ast_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_dscp_tid_map_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_info_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_filter_neighbor_rx_packets_cmdid = WMI_CMD_UNSUPPORTED,
        .mu_cal_start_cmdid = WMI_CMD_UNSUPPORTED,
        .set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_bss_chan_info_request_cmdid =
                WMI_10_2_PDEV_BSS_CHAN_INFO_REQUEST_CMDID,
        .pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
        .radar_found_cmdid = WMI_CMD_UNSUPPORTED,
        .set_bb_timing_cmdid = WMI_10_2_PDEV_SET_BB_TIMING_CONFIG_CMDID,
};

/* 10.4 WMI cmd track */
static struct wmi_cmd_map wmi_10_4_cmd_map = {
        .init_cmdid = WMI_10_4_INIT_CMDID,
        .start_scan_cmdid = WMI_10_4_START_SCAN_CMDID,
        .stop_scan_cmdid = WMI_10_4_STOP_SCAN_CMDID,
        .scan_chan_list_cmdid = WMI_10_4_SCAN_CHAN_LIST_CMDID,
        .scan_sch_prio_tbl_cmdid = WMI_10_4_SCAN_SCH_PRIO_TBL_CMDID,
        .scan_prob_req_oui_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_regdomain_cmdid = WMI_10_4_PDEV_SET_REGDOMAIN_CMDID,
        .pdev_set_channel_cmdid = WMI_10_4_PDEV_SET_CHANNEL_CMDID,
        .pdev_set_param_cmdid = WMI_10_4_PDEV_SET_PARAM_CMDID,
        .pdev_pktlog_enable_cmdid = WMI_10_4_PDEV_PKTLOG_ENABLE_CMDID,
        .pdev_pktlog_disable_cmdid = WMI_10_4_PDEV_PKTLOG_DISABLE_CMDID,
        .pdev_set_wmm_params_cmdid = WMI_10_4_PDEV_SET_WMM_PARAMS_CMDID,
        .pdev_set_ht_cap_ie_cmdid = WMI_10_4_PDEV_SET_HT_CAP_IE_CMDID,
        .pdev_set_vht_cap_ie_cmdid = WMI_10_4_PDEV_SET_VHT_CAP_IE_CMDID,
        .pdev_set_dscp_tid_map_cmdid = WMI_10_4_PDEV_SET_DSCP_TID_MAP_CMDID,
        .pdev_set_quiet_mode_cmdid = WMI_10_4_PDEV_SET_QUIET_MODE_CMDID,
        .pdev_green_ap_ps_enable_cmdid = WMI_10_4_PDEV_GREEN_AP_PS_ENABLE_CMDID,
        .pdev_get_tpc_config_cmdid = WMI_10_4_PDEV_GET_TPC_CONFIG_CMDID,
        .pdev_set_base_macaddr_cmdid = WMI_10_4_PDEV_SET_BASE_MACADDR_CMDID,
        .vdev_create_cmdid = WMI_10_4_VDEV_CREATE_CMDID,
        .vdev_delete_cmdid = WMI_10_4_VDEV_DELETE_CMDID,
        .vdev_start_request_cmdid = WMI_10_4_VDEV_START_REQUEST_CMDID,
        .vdev_restart_request_cmdid = WMI_10_4_VDEV_RESTART_REQUEST_CMDID,
        .vdev_up_cmdid = WMI_10_4_VDEV_UP_CMDID,
        .vdev_stop_cmdid = WMI_10_4_VDEV_STOP_CMDID,
        .vdev_down_cmdid = WMI_10_4_VDEV_DOWN_CMDID,
        .vdev_set_param_cmdid = WMI_10_4_VDEV_SET_PARAM_CMDID,
        .vdev_install_key_cmdid = WMI_10_4_VDEV_INSTALL_KEY_CMDID,
        .peer_create_cmdid = WMI_10_4_PEER_CREATE_CMDID,
        .peer_delete_cmdid = WMI_10_4_PEER_DELETE_CMDID,
        .peer_flush_tids_cmdid = WMI_10_4_PEER_FLUSH_TIDS_CMDID,
        .peer_set_param_cmdid = WMI_10_4_PEER_SET_PARAM_CMDID,
        .peer_assoc_cmdid = WMI_10_4_PEER_ASSOC_CMDID,
        .peer_add_wds_entry_cmdid = WMI_10_4_PEER_ADD_WDS_ENTRY_CMDID,
        .peer_remove_wds_entry_cmdid = WMI_10_4_PEER_REMOVE_WDS_ENTRY_CMDID,
        .peer_mcast_group_cmdid = WMI_10_4_PEER_MCAST_GROUP_CMDID,
        .bcn_tx_cmdid = WMI_10_4_BCN_TX_CMDID,
        .pdev_send_bcn_cmdid = WMI_10_4_PDEV_SEND_BCN_CMDID,
        .bcn_tmpl_cmdid = WMI_10_4_BCN_PRB_TMPL_CMDID,
        .bcn_filter_rx_cmdid = WMI_10_4_BCN_FILTER_RX_CMDID,
        .prb_req_filter_rx_cmdid = WMI_10_4_PRB_REQ_FILTER_RX_CMDID,
        .mgmt_tx_cmdid = WMI_10_4_MGMT_TX_CMDID,
        .prb_tmpl_cmdid = WMI_10_4_PRB_TMPL_CMDID,
        .addba_clear_resp_cmdid = WMI_10_4_ADDBA_CLEAR_RESP_CMDID,
        .addba_send_cmdid = WMI_10_4_ADDBA_SEND_CMDID,
        .addba_status_cmdid = WMI_10_4_ADDBA_STATUS_CMDID,
        .delba_send_cmdid = WMI_10_4_DELBA_SEND_CMDID,
        .addba_set_resp_cmdid = WMI_10_4_ADDBA_SET_RESP_CMDID,
        .send_singleamsdu_cmdid = WMI_10_4_SEND_SINGLEAMSDU_CMDID,
        .sta_powersave_mode_cmdid = WMI_10_4_STA_POWERSAVE_MODE_CMDID,
        .sta_powersave_param_cmdid = WMI_10_4_STA_POWERSAVE_PARAM_CMDID,
        .sta_mimo_ps_mode_cmdid = WMI_10_4_STA_MIMO_PS_MODE_CMDID,
        .pdev_dfs_enable_cmdid = WMI_10_4_PDEV_DFS_ENABLE_CMDID,
        .pdev_dfs_disable_cmdid = WMI_10_4_PDEV_DFS_DISABLE_CMDID,
        .roam_scan_mode = WMI_10_4_ROAM_SCAN_MODE,
        .roam_scan_rssi_threshold = WMI_10_4_ROAM_SCAN_RSSI_THRESHOLD,
        .roam_scan_period = WMI_10_4_ROAM_SCAN_PERIOD,
        .roam_scan_rssi_change_threshold =
                                WMI_10_4_ROAM_SCAN_RSSI_CHANGE_THRESHOLD,
        .roam_ap_profile = WMI_10_4_ROAM_AP_PROFILE,
        .ofl_scan_add_ap_profile = WMI_10_4_OFL_SCAN_ADD_AP_PROFILE,
        .ofl_scan_remove_ap_profile = WMI_10_4_OFL_SCAN_REMOVE_AP_PROFILE,
        .ofl_scan_period = WMI_10_4_OFL_SCAN_PERIOD,
        .p2p_dev_set_device_info = WMI_10_4_P2P_DEV_SET_DEVICE_INFO,
        .p2p_dev_set_discoverability = WMI_10_4_P2P_DEV_SET_DISCOVERABILITY,
        .p2p_go_set_beacon_ie = WMI_10_4_P2P_GO_SET_BEACON_IE,
        .p2p_go_set_probe_resp_ie = WMI_10_4_P2P_GO_SET_PROBE_RESP_IE,
        .p2p_set_vendor_ie_data_cmdid = WMI_10_4_P2P_SET_VENDOR_IE_DATA_CMDID,
        .ap_ps_peer_param_cmdid = WMI_10_4_AP_PS_PEER_PARAM_CMDID,
        .ap_ps_peer_uapsd_coex_cmdid = WMI_10_4_AP_PS_PEER_UAPSD_COEX_CMDID,
        .peer_rate_retry_sched_cmdid = WMI_10_4_PEER_RATE_RETRY_SCHED_CMDID,
        .wlan_profile_trigger_cmdid = WMI_10_4_WLAN_PROFILE_TRIGGER_CMDID,
        .wlan_profile_set_hist_intvl_cmdid =
                                WMI_10_4_WLAN_PROFILE_SET_HIST_INTVL_CMDID,
        .wlan_profile_get_profile_data_cmdid =
                                WMI_10_4_WLAN_PROFILE_GET_PROFILE_DATA_CMDID,
        .wlan_profile_enable_profile_id_cmdid =
                                WMI_10_4_WLAN_PROFILE_ENABLE_PROFILE_ID_CMDID,
        .wlan_profile_list_profile_id_cmdid =
                                WMI_10_4_WLAN_PROFILE_LIST_PROFILE_ID_CMDID,
        .pdev_suspend_cmdid = WMI_10_4_PDEV_SUSPEND_CMDID,
        .pdev_resume_cmdid = WMI_10_4_PDEV_RESUME_CMDID,
        .add_bcn_filter_cmdid = WMI_10_4_ADD_BCN_FILTER_CMDID,
        .rmv_bcn_filter_cmdid = WMI_10_4_RMV_BCN_FILTER_CMDID,
        .wow_add_wake_pattern_cmdid = WMI_10_4_WOW_ADD_WAKE_PATTERN_CMDID,
        .wow_del_wake_pattern_cmdid = WMI_10_4_WOW_DEL_WAKE_PATTERN_CMDID,
        .wow_enable_disable_wake_event_cmdid =
                                WMI_10_4_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID,
        .wow_enable_cmdid = WMI_10_4_WOW_ENABLE_CMDID,
        .wow_hostwakeup_from_sleep_cmdid =
                                WMI_10_4_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID,
        .rtt_measreq_cmdid = WMI_10_4_RTT_MEASREQ_CMDID,
        .rtt_tsf_cmdid = WMI_10_4_RTT_TSF_CMDID,
        .vdev_spectral_scan_configure_cmdid =
                                WMI_10_4_VDEV_SPECTRAL_SCAN_CONFIGURE_CMDID,
        .vdev_spectral_scan_enable_cmdid =
                                WMI_10_4_VDEV_SPECTRAL_SCAN_ENABLE_CMDID,
        .request_stats_cmdid = WMI_10_4_REQUEST_STATS_CMDID,
        .set_arp_ns_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .network_list_offload_config_cmdid = WMI_CMD_UNSUPPORTED,
        .gtk_offload_cmdid = WMI_10_4_GTK_OFFLOAD_CMDID,
        .csa_offload_enable_cmdid = WMI_10_4_CSA_OFFLOAD_ENABLE_CMDID,
        .csa_offload_chanswitch_cmdid = WMI_10_4_CSA_OFFLOAD_CHANSWITCH_CMDID,
        .chatter_set_mode_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_addba_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_delba_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_dtim_ps_method_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_uapsd_auto_trig_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_keepalive_cmd = WMI_CMD_UNSUPPORTED,
        .echo_cmdid = WMI_10_4_ECHO_CMDID,
        .pdev_utf_cmdid = WMI_10_4_PDEV_UTF_CMDID,
        .dbglog_cfg_cmdid = WMI_10_4_DBGLOG_CFG_CMDID,
        .pdev_qvit_cmdid = WMI_10_4_PDEV_QVIT_CMDID,
        .pdev_ftm_intg_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_keepalive_cmdid = WMI_10_4_VDEV_SET_KEEPALIVE_CMDID,
        .vdev_get_keepalive_cmdid = WMI_10_4_VDEV_GET_KEEPALIVE_CMDID,
        .force_fw_hang_cmdid = WMI_10_4_FORCE_FW_HANG_CMDID,
        .gpio_config_cmdid = WMI_10_4_GPIO_CONFIG_CMDID,
        .gpio_output_cmdid = WMI_10_4_GPIO_OUTPUT_CMDID,
        .pdev_get_temperature_cmdid = WMI_10_4_PDEV_GET_TEMPERATURE_CMDID,
        .vdev_set_wmm_params_cmdid = WMI_CMD_UNSUPPORTED,
        .adaptive_qcs_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_update_request_cmdid = WMI_10_4_SCAN_UPDATE_REQUEST_CMDID,
        .vdev_standby_response_cmdid = WMI_10_4_VDEV_STANDBY_RESPONSE_CMDID,
        .vdev_resume_response_cmdid = WMI_10_4_VDEV_RESUME_RESPONSE_CMDID,
        .wlan_peer_caching_add_peer_cmdid =
                        WMI_10_4_WLAN_PEER_CACHING_ADD_PEER_CMDID,
        .wlan_peer_caching_evict_peer_cmdid =
                        WMI_10_4_WLAN_PEER_CACHING_EVICT_PEER_CMDID,
        .wlan_peer_caching_restore_peer_cmdid =
                        WMI_10_4_WLAN_PEER_CACHING_RESTORE_PEER_CMDID,
        .wlan_peer_caching_print_all_peers_info_cmdid =
                        WMI_10_4_WLAN_PEER_CACHING_PRINT_ALL_PEERS_INFO_CMDID,
        .peer_update_wds_entry_cmdid = WMI_10_4_PEER_UPDATE_WDS_ENTRY_CMDID,
        .peer_add_proxy_sta_entry_cmdid =
                        WMI_10_4_PEER_ADD_PROXY_STA_ENTRY_CMDID,
        .rtt_keepalive_cmdid = WMI_10_4_RTT_KEEPALIVE_CMDID,
        .oem_req_cmdid = WMI_10_4_OEM_REQ_CMDID,
        .nan_cmdid = WMI_10_4_NAN_CMDID,
        .vdev_ratemask_cmdid = WMI_10_4_VDEV_RATEMASK_CMDID,
        .qboost_cfg_cmdid = WMI_10_4_QBOOST_CFG_CMDID,
        .pdev_smart_ant_enable_cmdid = WMI_10_4_PDEV_SMART_ANT_ENABLE_CMDID,
        .pdev_smart_ant_set_rx_antenna_cmdid =
                        WMI_10_4_PDEV_SMART_ANT_SET_RX_ANTENNA_CMDID,
        .peer_smart_ant_set_tx_antenna_cmdid =
                        WMI_10_4_PEER_SMART_ANT_SET_TX_ANTENNA_CMDID,
        .peer_smart_ant_set_train_info_cmdid =
                        WMI_10_4_PEER_SMART_ANT_SET_TRAIN_INFO_CMDID,
        .peer_smart_ant_set_node_config_ops_cmdid =
                        WMI_10_4_PEER_SMART_ANT_SET_NODE_CONFIG_OPS_CMDID,
        .pdev_set_antenna_switch_table_cmdid =
                        WMI_10_4_PDEV_SET_ANTENNA_SWITCH_TABLE_CMDID,
        .pdev_set_ctl_table_cmdid = WMI_10_4_PDEV_SET_CTL_TABLE_CMDID,
        .pdev_set_mimogain_table_cmdid = WMI_10_4_PDEV_SET_MIMOGAIN_TABLE_CMDID,
        .pdev_ratepwr_table_cmdid = WMI_10_4_PDEV_RATEPWR_TABLE_CMDID,
        .pdev_ratepwr_chainmsk_table_cmdid =
                        WMI_10_4_PDEV_RATEPWR_CHAINMSK_TABLE_CMDID,
        .pdev_fips_cmdid = WMI_10_4_PDEV_FIPS_CMDID,
        .tt_set_conf_cmdid = WMI_10_4_TT_SET_CONF_CMDID,
        .fwtest_cmdid = WMI_10_4_FWTEST_CMDID,
        .vdev_atf_request_cmdid = WMI_10_4_VDEV_ATF_REQUEST_CMDID,
        .peer_atf_request_cmdid = WMI_10_4_PEER_ATF_REQUEST_CMDID,
        .pdev_get_ani_cck_config_cmdid = WMI_10_4_PDEV_GET_ANI_CCK_CONFIG_CMDID,
        .pdev_get_ani_ofdm_config_cmdid =
                        WMI_10_4_PDEV_GET_ANI_OFDM_CONFIG_CMDID,
        .pdev_reserve_ast_entry_cmdid = WMI_10_4_PDEV_RESERVE_AST_ENTRY_CMDID,
        .pdev_get_nfcal_power_cmdid = WMI_10_4_PDEV_GET_NFCAL_POWER_CMDID,
        .pdev_get_tpc_cmdid = WMI_10_4_PDEV_GET_TPC_CMDID,
        .pdev_get_ast_info_cmdid = WMI_10_4_PDEV_GET_AST_INFO_CMDID,
        .vdev_set_dscp_tid_map_cmdid = WMI_10_4_VDEV_SET_DSCP_TID_MAP_CMDID,
        .pdev_get_info_cmdid = WMI_10_4_PDEV_GET_INFO_CMDID,
        .vdev_get_info_cmdid = WMI_10_4_VDEV_GET_INFO_CMDID,
        .vdev_filter_neighbor_rx_packets_cmdid =
                        WMI_10_4_VDEV_FILTER_NEIGHBOR_RX_PACKETS_CMDID,
        .mu_cal_start_cmdid = WMI_10_4_MU_CAL_START_CMDID,
        .set_cca_params_cmdid = WMI_10_4_SET_CCA_PARAMS_CMDID,
        .pdev_bss_chan_info_request_cmdid =
                        WMI_10_4_PDEV_BSS_CHAN_INFO_REQUEST_CMDID,
        .ext_resource_cfg_cmdid = WMI_10_4_EXT_RESOURCE_CFG_CMDID,
        .vdev_set_ie_cmdid = WMI_10_4_VDEV_SET_IE_CMDID,
        .set_lteu_config_cmdid = WMI_10_4_SET_LTEU_CONFIG_CMDID,
        .atf_ssid_grouping_request_cmdid =
                        WMI_10_4_ATF_SSID_GROUPING_REQUEST_CMDID,
        .peer_atf_ext_request_cmdid = WMI_10_4_PEER_ATF_EXT_REQUEST_CMDID,
        .set_periodic_channel_stats_cfg_cmdid =
                        WMI_10_4_SET_PERIODIC_CHANNEL_STATS_CONFIG,
        .peer_bwf_request_cmdid = WMI_10_4_PEER_BWF_REQUEST_CMDID,
        .btcoex_cfg_cmdid = WMI_10_4_BTCOEX_CFG_CMDID,
        .peer_tx_mu_txmit_count_cmdid = WMI_10_4_PEER_TX_MU_TXMIT_COUNT_CMDID,
        .peer_tx_mu_txmit_rstcnt_cmdid = WMI_10_4_PEER_TX_MU_TXMIT_RSTCNT_CMDID,
        .peer_gid_userpos_list_cmdid = WMI_10_4_PEER_GID_USERPOS_LIST_CMDID,
        .pdev_check_cal_version_cmdid = WMI_10_4_PDEV_CHECK_CAL_VERSION_CMDID,
        .coex_version_cfg_cmid = WMI_10_4_COEX_VERSION_CFG_CMID,
        .pdev_get_rx_filter_cmdid = WMI_10_4_PDEV_GET_RX_FILTER_CMDID,
        .pdev_extended_nss_cfg_cmdid = WMI_10_4_PDEV_EXTENDED_NSS_CFG_CMDID,
        .vdev_set_scan_nac_rssi_cmdid = WMI_10_4_VDEV_SET_SCAN_NAC_RSSI_CMDID,
        .prog_gpio_band_select_cmdid = WMI_10_4_PROG_GPIO_BAND_SELECT_CMDID,
        .config_smart_logging_cmdid = WMI_10_4_CONFIG_SMART_LOGGING_CMDID,
        .debug_fatal_condition_cmdid = WMI_10_4_DEBUG_FATAL_CONDITION_CMDID,
        .get_tsf_timer_cmdid = WMI_10_4_GET_TSF_TIMER_CMDID,
        .pdev_get_tpc_table_cmdid = WMI_10_4_PDEV_GET_TPC_TABLE_CMDID,
        .vdev_sifs_trigger_time_cmdid = WMI_10_4_VDEV_SIFS_TRIGGER_TIME_CMDID,
        .pdev_wds_entry_list_cmdid = WMI_10_4_PDEV_WDS_ENTRY_LIST_CMDID,
        .tdls_set_state_cmdid = WMI_10_4_TDLS_SET_STATE_CMDID,
        .tdls_peer_update_cmdid = WMI_10_4_TDLS_PEER_UPDATE_CMDID,
        .tdls_set_offchan_mode_cmdid = WMI_10_4_TDLS_SET_OFFCHAN_MODE_CMDID,
        .radar_found_cmdid = WMI_10_4_RADAR_FOUND_CMDID,
        .per_peer_per_tid_config_cmdid = WMI_10_4_PER_PEER_PER_TID_CONFIG_CMDID,
};

static struct wmi_peer_param_map wmi_peer_param_map = {
        .smps_state = WMI_PEER_SMPS_STATE,
        .ampdu = WMI_PEER_AMPDU,
        .authorize = WMI_PEER_AUTHORIZE,
        .chan_width = WMI_PEER_CHAN_WIDTH,
        .nss = WMI_PEER_NSS,
        .use_4addr = WMI_PEER_USE_4ADDR,
        .use_fixed_power = WMI_PEER_USE_FIXED_PWR,
        .debug = WMI_PEER_DEBUG,
        .phymode = WMI_PEER_PHYMODE,
        .dummy_var = WMI_PEER_DUMMY_VAR,
};

/* MAIN WMI VDEV param map */
static struct wmi_vdev_param_map wmi_vdev_param_map = {
        .rts_threshold = WMI_VDEV_PARAM_RTS_THRESHOLD,
        .fragmentation_threshold = WMI_VDEV_PARAM_FRAGMENTATION_THRESHOLD,
        .beacon_interval = WMI_VDEV_PARAM_BEACON_INTERVAL,
        .listen_interval = WMI_VDEV_PARAM_LISTEN_INTERVAL,
        .multicast_rate = WMI_VDEV_PARAM_MULTICAST_RATE,
        .mgmt_tx_rate = WMI_VDEV_PARAM_MGMT_TX_RATE,
        .slot_time = WMI_VDEV_PARAM_SLOT_TIME,
        .preamble = WMI_VDEV_PARAM_PREAMBLE,
        .swba_time = WMI_VDEV_PARAM_SWBA_TIME,
        .wmi_vdev_stats_update_period = WMI_VDEV_STATS_UPDATE_PERIOD,
        .wmi_vdev_pwrsave_ageout_time = WMI_VDEV_PWRSAVE_AGEOUT_TIME,
        .wmi_vdev_host_swba_interval = WMI_VDEV_HOST_SWBA_INTERVAL,
        .dtim_period = WMI_VDEV_PARAM_DTIM_PERIOD,
        .wmi_vdev_oc_scheduler_air_time_limit =
                                        WMI_VDEV_OC_SCHEDULER_AIR_TIME_LIMIT,
        .wds = WMI_VDEV_PARAM_WDS,
        .atim_window = WMI_VDEV_PARAM_ATIM_WINDOW,
        .bmiss_count_max = WMI_VDEV_PARAM_BMISS_COUNT_MAX,
        .bmiss_first_bcnt = WMI_VDEV_PARAM_BMISS_FIRST_BCNT,
        .bmiss_final_bcnt = WMI_VDEV_PARAM_BMISS_FINAL_BCNT,
        .feature_wmm = WMI_VDEV_PARAM_FEATURE_WMM,
        .chwidth = WMI_VDEV_PARAM_CHWIDTH,
        .chextoffset = WMI_VDEV_PARAM_CHEXTOFFSET,
        .disable_htprotection =        WMI_VDEV_PARAM_DISABLE_HTPROTECTION,
        .sta_quickkickout = WMI_VDEV_PARAM_STA_QUICKKICKOUT,
        .mgmt_rate = WMI_VDEV_PARAM_MGMT_RATE,
        .protection_mode = WMI_VDEV_PARAM_PROTECTION_MODE,
        .fixed_rate = WMI_VDEV_PARAM_FIXED_RATE,
        .sgi = WMI_VDEV_PARAM_SGI,
        .ldpc = WMI_VDEV_PARAM_LDPC,
        .tx_stbc = WMI_VDEV_PARAM_TX_STBC,
        .rx_stbc = WMI_VDEV_PARAM_RX_STBC,
        .intra_bss_fwd = WMI_VDEV_PARAM_INTRA_BSS_FWD,
        .def_keyid = WMI_VDEV_PARAM_DEF_KEYID,
        .nss = WMI_VDEV_PARAM_NSS,
        .bcast_data_rate = WMI_VDEV_PARAM_BCAST_DATA_RATE,
        .mcast_data_rate = WMI_VDEV_PARAM_MCAST_DATA_RATE,
        .mcast_indicate = WMI_VDEV_PARAM_MCAST_INDICATE,
        .dhcp_indicate = WMI_VDEV_PARAM_DHCP_INDICATE,
        .unknown_dest_indicate = WMI_VDEV_PARAM_UNKNOWN_DEST_INDICATE,
        .ap_keepalive_min_idle_inactive_time_secs =
                        WMI_VDEV_PARAM_AP_KEEPALIVE_MIN_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_idle_inactive_time_secs =
                        WMI_VDEV_PARAM_AP_KEEPALIVE_MAX_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_unresponsive_time_secs =
                        WMI_VDEV_PARAM_AP_KEEPALIVE_MAX_UNRESPONSIVE_TIME_SECS,
        .ap_enable_nawds = WMI_VDEV_PARAM_AP_ENABLE_NAWDS,
        .mcast2ucast_set = WMI_VDEV_PARAM_UNSUPPORTED,
        .enable_rtscts = WMI_VDEV_PARAM_ENABLE_RTSCTS,
        .txbf = WMI_VDEV_PARAM_TXBF,
        .packet_powersave = WMI_VDEV_PARAM_PACKET_POWERSAVE,
        .drop_unencry = WMI_VDEV_PARAM_DROP_UNENCRY,
        .tx_encap_type = WMI_VDEV_PARAM_TX_ENCAP_TYPE,
        .ap_detect_out_of_sync_sleeping_sta_time_secs =
                                        WMI_VDEV_PARAM_UNSUPPORTED,
        .rc_num_retries = WMI_VDEV_PARAM_UNSUPPORTED,
        .cabq_maxdur = WMI_VDEV_PARAM_UNSUPPORTED,
        .mfptest_set = WMI_VDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht_sgimask = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht80_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_enable = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_tgt_bmiss_num = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_bmiss_sample_cycle = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_slop_step = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_init_slop = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_pause = WMI_VDEV_PARAM_UNSUPPORTED,
        .proxy_sta = WMI_VDEV_PARAM_UNSUPPORTED,
        .meru_vc = WMI_VDEV_PARAM_UNSUPPORTED,
        .rx_decap_type = WMI_VDEV_PARAM_UNSUPPORTED,
        .bw_nss_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .disable_4addr_src_lrn = WMI_VDEV_PARAM_UNSUPPORTED,
        .rtt_responder_role = WMI_VDEV_PARAM_UNSUPPORTED,
};

/* 10.X WMI VDEV param map */
static struct wmi_vdev_param_map wmi_10x_vdev_param_map = {
        .rts_threshold = WMI_10X_VDEV_PARAM_RTS_THRESHOLD,
        .fragmentation_threshold = WMI_10X_VDEV_PARAM_FRAGMENTATION_THRESHOLD,
        .beacon_interval = WMI_10X_VDEV_PARAM_BEACON_INTERVAL,
        .listen_interval = WMI_10X_VDEV_PARAM_LISTEN_INTERVAL,
        .multicast_rate = WMI_10X_VDEV_PARAM_MULTICAST_RATE,
        .mgmt_tx_rate = WMI_10X_VDEV_PARAM_MGMT_TX_RATE,
        .slot_time = WMI_10X_VDEV_PARAM_SLOT_TIME,
        .preamble = WMI_10X_VDEV_PARAM_PREAMBLE,
        .swba_time = WMI_10X_VDEV_PARAM_SWBA_TIME,
        .wmi_vdev_stats_update_period = WMI_10X_VDEV_STATS_UPDATE_PERIOD,
        .wmi_vdev_pwrsave_ageout_time = WMI_10X_VDEV_PWRSAVE_AGEOUT_TIME,
        .wmi_vdev_host_swba_interval = WMI_10X_VDEV_HOST_SWBA_INTERVAL,
        .dtim_period = WMI_10X_VDEV_PARAM_DTIM_PERIOD,
        .wmi_vdev_oc_scheduler_air_time_limit =
                                WMI_10X_VDEV_OC_SCHEDULER_AIR_TIME_LIMIT,
        .wds = WMI_10X_VDEV_PARAM_WDS,
        .atim_window = WMI_10X_VDEV_PARAM_ATIM_WINDOW,
        .bmiss_count_max = WMI_10X_VDEV_PARAM_BMISS_COUNT_MAX,
        .bmiss_first_bcnt = WMI_VDEV_PARAM_UNSUPPORTED,
        .bmiss_final_bcnt = WMI_VDEV_PARAM_UNSUPPORTED,
        .feature_wmm = WMI_10X_VDEV_PARAM_FEATURE_WMM,
        .chwidth = WMI_10X_VDEV_PARAM_CHWIDTH,
        .chextoffset = WMI_10X_VDEV_PARAM_CHEXTOFFSET,
        .disable_htprotection = WMI_10X_VDEV_PARAM_DISABLE_HTPROTECTION,
        .sta_quickkickout = WMI_10X_VDEV_PARAM_STA_QUICKKICKOUT,
        .mgmt_rate = WMI_10X_VDEV_PARAM_MGMT_RATE,
        .protection_mode = WMI_10X_VDEV_PARAM_PROTECTION_MODE,
        .fixed_rate = WMI_10X_VDEV_PARAM_FIXED_RATE,
        .sgi = WMI_10X_VDEV_PARAM_SGI,
        .ldpc = WMI_10X_VDEV_PARAM_LDPC,
        .tx_stbc = WMI_10X_VDEV_PARAM_TX_STBC,
        .rx_stbc = WMI_10X_VDEV_PARAM_RX_STBC,
        .intra_bss_fwd = WMI_10X_VDEV_PARAM_INTRA_BSS_FWD,
        .def_keyid = WMI_10X_VDEV_PARAM_DEF_KEYID,
        .nss = WMI_10X_VDEV_PARAM_NSS,
        .bcast_data_rate = WMI_10X_VDEV_PARAM_BCAST_DATA_RATE,
        .mcast_data_rate = WMI_10X_VDEV_PARAM_MCAST_DATA_RATE,
        .mcast_indicate = WMI_10X_VDEV_PARAM_MCAST_INDICATE,
        .dhcp_indicate = WMI_10X_VDEV_PARAM_DHCP_INDICATE,
        .unknown_dest_indicate = WMI_10X_VDEV_PARAM_UNKNOWN_DEST_INDICATE,
        .ap_keepalive_min_idle_inactive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MIN_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_idle_inactive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MAX_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_unresponsive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MAX_UNRESPONSIVE_TIME_SECS,
        .ap_enable_nawds = WMI_10X_VDEV_PARAM_AP_ENABLE_NAWDS,
        .mcast2ucast_set = WMI_10X_VDEV_PARAM_MCAST2UCAST_SET,
        .enable_rtscts = WMI_10X_VDEV_PARAM_ENABLE_RTSCTS,
        .txbf = WMI_VDEV_PARAM_UNSUPPORTED,
        .packet_powersave = WMI_VDEV_PARAM_UNSUPPORTED,
        .drop_unencry = WMI_VDEV_PARAM_UNSUPPORTED,
        .tx_encap_type = WMI_VDEV_PARAM_UNSUPPORTED,
        .ap_detect_out_of_sync_sleeping_sta_time_secs =
                WMI_10X_VDEV_PARAM_AP_DETECT_OUT_OF_SYNC_SLEEPING_STA_TIME_SECS,
        .rc_num_retries = WMI_VDEV_PARAM_UNSUPPORTED,
        .cabq_maxdur = WMI_VDEV_PARAM_UNSUPPORTED,
        .mfptest_set = WMI_VDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht_sgimask = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht80_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_enable = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_tgt_bmiss_num = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_bmiss_sample_cycle = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_slop_step = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_init_slop = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_pause = WMI_VDEV_PARAM_UNSUPPORTED,
        .proxy_sta = WMI_VDEV_PARAM_UNSUPPORTED,
        .meru_vc = WMI_VDEV_PARAM_UNSUPPORTED,
        .rx_decap_type = WMI_VDEV_PARAM_UNSUPPORTED,
        .bw_nss_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .disable_4addr_src_lrn = WMI_VDEV_PARAM_UNSUPPORTED,
        .rtt_responder_role = WMI_VDEV_PARAM_UNSUPPORTED,
};

static struct wmi_vdev_param_map wmi_10_2_4_vdev_param_map = {
        .rts_threshold = WMI_10X_VDEV_PARAM_RTS_THRESHOLD,
        .fragmentation_threshold = WMI_10X_VDEV_PARAM_FRAGMENTATION_THRESHOLD,
        .beacon_interval = WMI_10X_VDEV_PARAM_BEACON_INTERVAL,
        .listen_interval = WMI_10X_VDEV_PARAM_LISTEN_INTERVAL,
        .multicast_rate = WMI_10X_VDEV_PARAM_MULTICAST_RATE,
        .mgmt_tx_rate = WMI_10X_VDEV_PARAM_MGMT_TX_RATE,
        .slot_time = WMI_10X_VDEV_PARAM_SLOT_TIME,
        .preamble = WMI_10X_VDEV_PARAM_PREAMBLE,
        .swba_time = WMI_10X_VDEV_PARAM_SWBA_TIME,
        .wmi_vdev_stats_update_period = WMI_10X_VDEV_STATS_UPDATE_PERIOD,
        .wmi_vdev_pwrsave_ageout_time = WMI_10X_VDEV_PWRSAVE_AGEOUT_TIME,
        .wmi_vdev_host_swba_interval = WMI_10X_VDEV_HOST_SWBA_INTERVAL,
        .dtim_period = WMI_10X_VDEV_PARAM_DTIM_PERIOD,
        .wmi_vdev_oc_scheduler_air_time_limit =
                                WMI_10X_VDEV_OC_SCHEDULER_AIR_TIME_LIMIT,
        .wds = WMI_10X_VDEV_PARAM_WDS,
        .atim_window = WMI_10X_VDEV_PARAM_ATIM_WINDOW,
        .bmiss_count_max = WMI_10X_VDEV_PARAM_BMISS_COUNT_MAX,
        .bmiss_first_bcnt = WMI_VDEV_PARAM_UNSUPPORTED,
        .bmiss_final_bcnt = WMI_VDEV_PARAM_UNSUPPORTED,
        .feature_wmm = WMI_10X_VDEV_PARAM_FEATURE_WMM,
        .chwidth = WMI_10X_VDEV_PARAM_CHWIDTH,
        .chextoffset = WMI_10X_VDEV_PARAM_CHEXTOFFSET,
        .disable_htprotection = WMI_10X_VDEV_PARAM_DISABLE_HTPROTECTION,
        .sta_quickkickout = WMI_10X_VDEV_PARAM_STA_QUICKKICKOUT,
        .mgmt_rate = WMI_10X_VDEV_PARAM_MGMT_RATE,
        .protection_mode = WMI_10X_VDEV_PARAM_PROTECTION_MODE,
        .fixed_rate = WMI_10X_VDEV_PARAM_FIXED_RATE,
        .sgi = WMI_10X_VDEV_PARAM_SGI,
        .ldpc = WMI_10X_VDEV_PARAM_LDPC,
        .tx_stbc = WMI_10X_VDEV_PARAM_TX_STBC,
        .rx_stbc = WMI_10X_VDEV_PARAM_RX_STBC,
        .intra_bss_fwd = WMI_10X_VDEV_PARAM_INTRA_BSS_FWD,
        .def_keyid = WMI_10X_VDEV_PARAM_DEF_KEYID,
        .nss = WMI_10X_VDEV_PARAM_NSS,
        .bcast_data_rate = WMI_10X_VDEV_PARAM_BCAST_DATA_RATE,
        .mcast_data_rate = WMI_10X_VDEV_PARAM_MCAST_DATA_RATE,
        .mcast_indicate = WMI_10X_VDEV_PARAM_MCAST_INDICATE,
        .dhcp_indicate = WMI_10X_VDEV_PARAM_DHCP_INDICATE,
        .unknown_dest_indicate = WMI_10X_VDEV_PARAM_UNKNOWN_DEST_INDICATE,
        .ap_keepalive_min_idle_inactive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MIN_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_idle_inactive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MAX_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_unresponsive_time_secs =
                WMI_10X_VDEV_PARAM_AP_KEEPALIVE_MAX_UNRESPONSIVE_TIME_SECS,
        .ap_enable_nawds = WMI_10X_VDEV_PARAM_AP_ENABLE_NAWDS,
        .mcast2ucast_set = WMI_10X_VDEV_PARAM_MCAST2UCAST_SET,
        .enable_rtscts = WMI_10X_VDEV_PARAM_ENABLE_RTSCTS,
        .txbf = WMI_VDEV_PARAM_UNSUPPORTED,
        .packet_powersave = WMI_VDEV_PARAM_UNSUPPORTED,
        .drop_unencry = WMI_VDEV_PARAM_UNSUPPORTED,
        .tx_encap_type = WMI_VDEV_PARAM_UNSUPPORTED,
        .ap_detect_out_of_sync_sleeping_sta_time_secs =
                WMI_10X_VDEV_PARAM_AP_DETECT_OUT_OF_SYNC_SLEEPING_STA_TIME_SECS,
        .rc_num_retries = WMI_VDEV_PARAM_UNSUPPORTED,
        .cabq_maxdur = WMI_VDEV_PARAM_UNSUPPORTED,
        .mfptest_set = WMI_VDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht_sgimask = WMI_VDEV_PARAM_UNSUPPORTED,
        .vht80_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_enable = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_tgt_bmiss_num = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_bmiss_sample_cycle = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_slop_step = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_init_slop = WMI_VDEV_PARAM_UNSUPPORTED,
        .early_rx_adjust_pause = WMI_VDEV_PARAM_UNSUPPORTED,
        .proxy_sta = WMI_VDEV_PARAM_UNSUPPORTED,
        .meru_vc = WMI_VDEV_PARAM_UNSUPPORTED,
        .rx_decap_type = WMI_VDEV_PARAM_UNSUPPORTED,
        .bw_nss_ratemask = WMI_VDEV_PARAM_UNSUPPORTED,
        .disable_4addr_src_lrn = WMI_VDEV_PARAM_UNSUPPORTED,
        .rtt_responder_role = WMI_VDEV_PARAM_UNSUPPORTED,
};

static struct wmi_vdev_param_map wmi_10_4_vdev_param_map = {
        .rts_threshold = WMI_10_4_VDEV_PARAM_RTS_THRESHOLD,
        .fragmentation_threshold = WMI_10_4_VDEV_PARAM_FRAGMENTATION_THRESHOLD,
        .beacon_interval = WMI_10_4_VDEV_PARAM_BEACON_INTERVAL,
        .listen_interval = WMI_10_4_VDEV_PARAM_LISTEN_INTERVAL,
        .multicast_rate = WMI_10_4_VDEV_PARAM_MULTICAST_RATE,
        .mgmt_tx_rate = WMI_10_4_VDEV_PARAM_MGMT_TX_RATE,
        .slot_time = WMI_10_4_VDEV_PARAM_SLOT_TIME,
        .preamble = WMI_10_4_VDEV_PARAM_PREAMBLE,
        .swba_time = WMI_10_4_VDEV_PARAM_SWBA_TIME,
        .wmi_vdev_stats_update_period = WMI_10_4_VDEV_STATS_UPDATE_PERIOD,
        .wmi_vdev_pwrsave_ageout_time = WMI_10_4_VDEV_PWRSAVE_AGEOUT_TIME,
        .wmi_vdev_host_swba_interval = WMI_10_4_VDEV_HOST_SWBA_INTERVAL,
        .dtim_period = WMI_10_4_VDEV_PARAM_DTIM_PERIOD,
        .wmi_vdev_oc_scheduler_air_time_limit =
               WMI_10_4_VDEV_OC_SCHEDULER_AIR_TIME_LIMIT,
        .wds = WMI_10_4_VDEV_PARAM_WDS,
        .atim_window = WMI_10_4_VDEV_PARAM_ATIM_WINDOW,
        .bmiss_count_max = WMI_10_4_VDEV_PARAM_BMISS_COUNT_MAX,
        .bmiss_first_bcnt = WMI_10_4_VDEV_PARAM_BMISS_FIRST_BCNT,
        .bmiss_final_bcnt = WMI_10_4_VDEV_PARAM_BMISS_FINAL_BCNT,
        .feature_wmm = WMI_10_4_VDEV_PARAM_FEATURE_WMM,
        .chwidth = WMI_10_4_VDEV_PARAM_CHWIDTH,
        .chextoffset = WMI_10_4_VDEV_PARAM_CHEXTOFFSET,
        .disable_htprotection = WMI_10_4_VDEV_PARAM_DISABLE_HTPROTECTION,
        .sta_quickkickout = WMI_10_4_VDEV_PARAM_STA_QUICKKICKOUT,
        .mgmt_rate = WMI_10_4_VDEV_PARAM_MGMT_RATE,
        .protection_mode = WMI_10_4_VDEV_PARAM_PROTECTION_MODE,
        .fixed_rate = WMI_10_4_VDEV_PARAM_FIXED_RATE,
        .sgi = WMI_10_4_VDEV_PARAM_SGI,
        .ldpc = WMI_10_4_VDEV_PARAM_LDPC,
        .tx_stbc = WMI_10_4_VDEV_PARAM_TX_STBC,
        .rx_stbc = WMI_10_4_VDEV_PARAM_RX_STBC,
        .intra_bss_fwd = WMI_10_4_VDEV_PARAM_INTRA_BSS_FWD,
        .def_keyid = WMI_10_4_VDEV_PARAM_DEF_KEYID,
        .nss = WMI_10_4_VDEV_PARAM_NSS,
        .bcast_data_rate = WMI_10_4_VDEV_PARAM_BCAST_DATA_RATE,
        .mcast_data_rate = WMI_10_4_VDEV_PARAM_MCAST_DATA_RATE,
        .mcast_indicate = WMI_10_4_VDEV_PARAM_MCAST_INDICATE,
        .dhcp_indicate = WMI_10_4_VDEV_PARAM_DHCP_INDICATE,
        .unknown_dest_indicate = WMI_10_4_VDEV_PARAM_UNKNOWN_DEST_INDICATE,
        .ap_keepalive_min_idle_inactive_time_secs =
               WMI_10_4_VDEV_PARAM_AP_KEEPALIVE_MIN_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_idle_inactive_time_secs =
               WMI_10_4_VDEV_PARAM_AP_KEEPALIVE_MAX_IDLE_INACTIVE_TIME_SECS,
        .ap_keepalive_max_unresponsive_time_secs =
               WMI_10_4_VDEV_PARAM_AP_KEEPALIVE_MAX_UNRESPONSIVE_TIME_SECS,
        .ap_enable_nawds = WMI_10_4_VDEV_PARAM_AP_ENABLE_NAWDS,
        .mcast2ucast_set = WMI_10_4_VDEV_PARAM_MCAST2UCAST_SET,
        .enable_rtscts = WMI_10_4_VDEV_PARAM_ENABLE_RTSCTS,
        .txbf = WMI_10_4_VDEV_PARAM_TXBF,
        .packet_powersave = WMI_10_4_VDEV_PARAM_PACKET_POWERSAVE,
        .drop_unencry = WMI_10_4_VDEV_PARAM_DROP_UNENCRY,
        .tx_encap_type = WMI_10_4_VDEV_PARAM_TX_ENCAP_TYPE,
        .ap_detect_out_of_sync_sleeping_sta_time_secs =
               WMI_10_4_VDEV_PARAM_AP_DETECT_OUT_OF_SYNC_SLEEPING_STA_TIME_SECS,
        .rc_num_retries = WMI_10_4_VDEV_PARAM_RC_NUM_RETRIES,
        .cabq_maxdur = WMI_10_4_VDEV_PARAM_CABQ_MAXDUR,
        .mfptest_set = WMI_10_4_VDEV_PARAM_MFPTEST_SET,
        .rts_fixed_rate = WMI_10_4_VDEV_PARAM_RTS_FIXED_RATE,
        .vht_sgimask = WMI_10_4_VDEV_PARAM_VHT_SGIMASK,
        .vht80_ratemask = WMI_10_4_VDEV_PARAM_VHT80_RATEMASK,
        .early_rx_adjust_enable = WMI_10_4_VDEV_PARAM_EARLY_RX_ADJUST_ENABLE,
        .early_rx_tgt_bmiss_num = WMI_10_4_VDEV_PARAM_EARLY_RX_TGT_BMISS_NUM,
        .early_rx_bmiss_sample_cycle =
               WMI_10_4_VDEV_PARAM_EARLY_RX_BMISS_SAMPLE_CYCLE,
        .early_rx_slop_step = WMI_10_4_VDEV_PARAM_EARLY_RX_SLOP_STEP,
        .early_rx_init_slop = WMI_10_4_VDEV_PARAM_EARLY_RX_INIT_SLOP,
        .early_rx_adjust_pause = WMI_10_4_VDEV_PARAM_EARLY_RX_ADJUST_PAUSE,
        .proxy_sta = WMI_10_4_VDEV_PARAM_PROXY_STA,
        .meru_vc = WMI_10_4_VDEV_PARAM_MERU_VC,
        .rx_decap_type = WMI_10_4_VDEV_PARAM_RX_DECAP_TYPE,
        .bw_nss_ratemask = WMI_10_4_VDEV_PARAM_BW_NSS_RATEMASK,
        .inc_tsf = WMI_10_4_VDEV_PARAM_TSF_INCREMENT,
        .dec_tsf = WMI_10_4_VDEV_PARAM_TSF_DECREMENT,
        .disable_4addr_src_lrn = WMI_10_4_VDEV_PARAM_DISABLE_4_ADDR_SRC_LRN,
        .rtt_responder_role = WMI_10_4_VDEV_PARAM_ENABLE_DISABLE_RTT_RESPONDER_ROLE,
};

static struct wmi_pdev_param_map wmi_pdev_param_map = {
        .tx_chain_mask = WMI_PDEV_PARAM_TX_CHAIN_MASK,
        .rx_chain_mask = WMI_PDEV_PARAM_RX_CHAIN_MASK,
        .txpower_limit2g = WMI_PDEV_PARAM_TXPOWER_LIMIT2G,
        .txpower_limit5g = WMI_PDEV_PARAM_TXPOWER_LIMIT5G,
        .txpower_scale = WMI_PDEV_PARAM_TXPOWER_SCALE,
        .beacon_gen_mode = WMI_PDEV_PARAM_BEACON_GEN_MODE,
        .beacon_tx_mode = WMI_PDEV_PARAM_BEACON_TX_MODE,
        .resmgr_offchan_mode = WMI_PDEV_PARAM_RESMGR_OFFCHAN_MODE,
        .protection_mode = WMI_PDEV_PARAM_PROTECTION_MODE,
        .dynamic_bw = WMI_PDEV_PARAM_DYNAMIC_BW,
        .non_agg_sw_retry_th = WMI_PDEV_PARAM_NON_AGG_SW_RETRY_TH,
        .agg_sw_retry_th = WMI_PDEV_PARAM_AGG_SW_RETRY_TH,
        .sta_kickout_th = WMI_PDEV_PARAM_STA_KICKOUT_TH,
        .ac_aggrsize_scaling = WMI_PDEV_PARAM_AC_AGGRSIZE_SCALING,
        .ltr_enable = WMI_PDEV_PARAM_LTR_ENABLE,
        .ltr_ac_latency_be = WMI_PDEV_PARAM_LTR_AC_LATENCY_BE,
        .ltr_ac_latency_bk = WMI_PDEV_PARAM_LTR_AC_LATENCY_BK,
        .ltr_ac_latency_vi = WMI_PDEV_PARAM_LTR_AC_LATENCY_VI,
        .ltr_ac_latency_vo = WMI_PDEV_PARAM_LTR_AC_LATENCY_VO,
        .ltr_ac_latency_timeout = WMI_PDEV_PARAM_LTR_AC_LATENCY_TIMEOUT,
        .ltr_sleep_override = WMI_PDEV_PARAM_LTR_SLEEP_OVERRIDE,
        .ltr_rx_override = WMI_PDEV_PARAM_LTR_RX_OVERRIDE,
        .ltr_tx_activity_timeout = WMI_PDEV_PARAM_LTR_TX_ACTIVITY_TIMEOUT,
        .l1ss_enable = WMI_PDEV_PARAM_L1SS_ENABLE,
        .dsleep_enable = WMI_PDEV_PARAM_DSLEEP_ENABLE,
        .pcielp_txbuf_flush = WMI_PDEV_PARAM_PCIELP_TXBUF_FLUSH,
        .pcielp_txbuf_watermark = WMI_PDEV_PARAM_PCIELP_TXBUF_TMO_EN,
        .pcielp_txbuf_tmo_en = WMI_PDEV_PARAM_PCIELP_TXBUF_TMO_EN,
        .pcielp_txbuf_tmo_value = WMI_PDEV_PARAM_PCIELP_TXBUF_TMO_VALUE,
        .pdev_stats_update_period = WMI_PDEV_PARAM_PDEV_STATS_UPDATE_PERIOD,
        .vdev_stats_update_period = WMI_PDEV_PARAM_VDEV_STATS_UPDATE_PERIOD,
        .peer_stats_update_period = WMI_PDEV_PARAM_PEER_STATS_UPDATE_PERIOD,
        .bcnflt_stats_update_period = WMI_PDEV_PARAM_BCNFLT_STATS_UPDATE_PERIOD,
        .pmf_qos = WMI_PDEV_PARAM_PMF_QOS,
        .arp_ac_override = WMI_PDEV_PARAM_ARP_AC_OVERRIDE,
        .dcs = WMI_PDEV_PARAM_DCS,
        .ani_enable = WMI_PDEV_PARAM_ANI_ENABLE,
        .ani_poll_period = WMI_PDEV_PARAM_ANI_POLL_PERIOD,
        .ani_listen_period = WMI_PDEV_PARAM_ANI_LISTEN_PERIOD,
        .ani_ofdm_level = WMI_PDEV_PARAM_ANI_OFDM_LEVEL,
        .ani_cck_level = WMI_PDEV_PARAM_ANI_CCK_LEVEL,
        .dyntxchain = WMI_PDEV_PARAM_DYNTXCHAIN,
        .proxy_sta = WMI_PDEV_PARAM_PROXY_STA,
        .idle_ps_config = WMI_PDEV_PARAM_IDLE_PS_CONFIG,
        .power_gating_sleep = WMI_PDEV_PARAM_POWER_GATING_SLEEP,
        .fast_channel_reset = WMI_PDEV_PARAM_UNSUPPORTED,
        .burst_dur = WMI_PDEV_PARAM_UNSUPPORTED,
        .burst_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .cal_period = WMI_PDEV_PARAM_UNSUPPORTED,
        .aggr_burst = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_decap_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .smart_antenna_default_antenna = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .antenna_gain = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_filter = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_to_ucast_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .proxy_sta_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .remove_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .peer_sta_ps_statechg_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_ac_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .block_interbss = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_disable_reset_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_msdu_ttl_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_ppdu_duration_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .txbf_sound_period_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_promisc_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_burst_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .en_stats = WMI_PDEV_PARAM_UNSUPPORTED,
        .mu_group_policy = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_detection = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .dpd_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_bcast_echo = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_strict_sch = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_sched_duration = WMI_PDEV_PARAM_UNSUPPORTED,
        .ant_plzn = WMI_PDEV_PARAM_UNSUPPORTED,
        .mgmt_retry_limit = WMI_PDEV_PARAM_UNSUPPORTED,
        .sensitivity_level = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_2g = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_5g = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_amsdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_ampdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .cca_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_PDEV_PARAM_UNSUPPORTED,
        .pdev_reset = WMI_PDEV_PARAM_UNSUPPORTED,
        .wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
};

static struct wmi_pdev_param_map wmi_10x_pdev_param_map = {
        .tx_chain_mask = WMI_10X_PDEV_PARAM_TX_CHAIN_MASK,
        .rx_chain_mask = WMI_10X_PDEV_PARAM_RX_CHAIN_MASK,
        .txpower_limit2g = WMI_10X_PDEV_PARAM_TXPOWER_LIMIT2G,
        .txpower_limit5g = WMI_10X_PDEV_PARAM_TXPOWER_LIMIT5G,
        .txpower_scale = WMI_10X_PDEV_PARAM_TXPOWER_SCALE,
        .beacon_gen_mode = WMI_10X_PDEV_PARAM_BEACON_GEN_MODE,
        .beacon_tx_mode = WMI_10X_PDEV_PARAM_BEACON_TX_MODE,
        .resmgr_offchan_mode = WMI_10X_PDEV_PARAM_RESMGR_OFFCHAN_MODE,
        .protection_mode = WMI_10X_PDEV_PARAM_PROTECTION_MODE,
        .dynamic_bw = WMI_10X_PDEV_PARAM_DYNAMIC_BW,
        .non_agg_sw_retry_th = WMI_10X_PDEV_PARAM_NON_AGG_SW_RETRY_TH,
        .agg_sw_retry_th = WMI_10X_PDEV_PARAM_AGG_SW_RETRY_TH,
        .sta_kickout_th = WMI_10X_PDEV_PARAM_STA_KICKOUT_TH,
        .ac_aggrsize_scaling = WMI_10X_PDEV_PARAM_AC_AGGRSIZE_SCALING,
        .ltr_enable = WMI_10X_PDEV_PARAM_LTR_ENABLE,
        .ltr_ac_latency_be = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_BE,
        .ltr_ac_latency_bk = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_BK,
        .ltr_ac_latency_vi = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_VI,
        .ltr_ac_latency_vo = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_VO,
        .ltr_ac_latency_timeout = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_TIMEOUT,
        .ltr_sleep_override = WMI_10X_PDEV_PARAM_LTR_SLEEP_OVERRIDE,
        .ltr_rx_override = WMI_10X_PDEV_PARAM_LTR_RX_OVERRIDE,
        .ltr_tx_activity_timeout = WMI_10X_PDEV_PARAM_LTR_TX_ACTIVITY_TIMEOUT,
        .l1ss_enable = WMI_10X_PDEV_PARAM_L1SS_ENABLE,
        .dsleep_enable = WMI_10X_PDEV_PARAM_DSLEEP_ENABLE,
        .pcielp_txbuf_flush = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_watermark = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_tmo_en = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_tmo_value = WMI_PDEV_PARAM_UNSUPPORTED,
        .pdev_stats_update_period = WMI_10X_PDEV_PARAM_PDEV_STATS_UPDATE_PERIOD,
        .vdev_stats_update_period = WMI_10X_PDEV_PARAM_VDEV_STATS_UPDATE_PERIOD,
        .peer_stats_update_period = WMI_10X_PDEV_PARAM_PEER_STATS_UPDATE_PERIOD,
        .bcnflt_stats_update_period =
                                WMI_10X_PDEV_PARAM_BCNFLT_STATS_UPDATE_PERIOD,
        .pmf_qos = WMI_10X_PDEV_PARAM_PMF_QOS,
        .arp_ac_override = WMI_10X_PDEV_PARAM_ARPDHCP_AC_OVERRIDE,
        .dcs = WMI_10X_PDEV_PARAM_DCS,
        .ani_enable = WMI_10X_PDEV_PARAM_ANI_ENABLE,
        .ani_poll_period = WMI_10X_PDEV_PARAM_ANI_POLL_PERIOD,
        .ani_listen_period = WMI_10X_PDEV_PARAM_ANI_LISTEN_PERIOD,
        .ani_ofdm_level = WMI_10X_PDEV_PARAM_ANI_OFDM_LEVEL,
        .ani_cck_level = WMI_10X_PDEV_PARAM_ANI_CCK_LEVEL,
        .dyntxchain = WMI_10X_PDEV_PARAM_DYNTXCHAIN,
        .proxy_sta = WMI_PDEV_PARAM_UNSUPPORTED,
        .idle_ps_config = WMI_PDEV_PARAM_UNSUPPORTED,
        .power_gating_sleep = WMI_PDEV_PARAM_UNSUPPORTED,
        .fast_channel_reset = WMI_10X_PDEV_PARAM_FAST_CHANNEL_RESET,
        .burst_dur = WMI_10X_PDEV_PARAM_BURST_DUR,
        .burst_enable = WMI_10X_PDEV_PARAM_BURST_ENABLE,
        .cal_period = WMI_10X_PDEV_PARAM_CAL_PERIOD,
        .aggr_burst = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_decap_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .smart_antenna_default_antenna = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .antenna_gain = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_filter = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_to_ucast_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .proxy_sta_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .remove_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .peer_sta_ps_statechg_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_ac_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .block_interbss = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_disable_reset_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_msdu_ttl_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_ppdu_duration_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .txbf_sound_period_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_promisc_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_burst_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .en_stats = WMI_PDEV_PARAM_UNSUPPORTED,
        .mu_group_policy = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_detection = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .dpd_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_bcast_echo = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_strict_sch = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_sched_duration = WMI_PDEV_PARAM_UNSUPPORTED,
        .ant_plzn = WMI_PDEV_PARAM_UNSUPPORTED,
        .mgmt_retry_limit = WMI_PDEV_PARAM_UNSUPPORTED,
        .sensitivity_level = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_2g = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_5g = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_amsdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_ampdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .cca_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_PDEV_PARAM_UNSUPPORTED,
        .pdev_reset = WMI_PDEV_PARAM_UNSUPPORTED,
        .wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
};

static struct wmi_pdev_param_map wmi_10_2_4_pdev_param_map = {
        .tx_chain_mask = WMI_10X_PDEV_PARAM_TX_CHAIN_MASK,
        .rx_chain_mask = WMI_10X_PDEV_PARAM_RX_CHAIN_MASK,
        .txpower_limit2g = WMI_10X_PDEV_PARAM_TXPOWER_LIMIT2G,
        .txpower_limit5g = WMI_10X_PDEV_PARAM_TXPOWER_LIMIT5G,
        .txpower_scale = WMI_10X_PDEV_PARAM_TXPOWER_SCALE,
        .beacon_gen_mode = WMI_10X_PDEV_PARAM_BEACON_GEN_MODE,
        .beacon_tx_mode = WMI_10X_PDEV_PARAM_BEACON_TX_MODE,
        .resmgr_offchan_mode = WMI_10X_PDEV_PARAM_RESMGR_OFFCHAN_MODE,
        .protection_mode = WMI_10X_PDEV_PARAM_PROTECTION_MODE,
        .dynamic_bw = WMI_10X_PDEV_PARAM_DYNAMIC_BW,
        .non_agg_sw_retry_th = WMI_10X_PDEV_PARAM_NON_AGG_SW_RETRY_TH,
        .agg_sw_retry_th = WMI_10X_PDEV_PARAM_AGG_SW_RETRY_TH,
        .sta_kickout_th = WMI_10X_PDEV_PARAM_STA_KICKOUT_TH,
        .ac_aggrsize_scaling = WMI_10X_PDEV_PARAM_AC_AGGRSIZE_SCALING,
        .ltr_enable = WMI_10X_PDEV_PARAM_LTR_ENABLE,
        .ltr_ac_latency_be = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_BE,
        .ltr_ac_latency_bk = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_BK,
        .ltr_ac_latency_vi = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_VI,
        .ltr_ac_latency_vo = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_VO,
        .ltr_ac_latency_timeout = WMI_10X_PDEV_PARAM_LTR_AC_LATENCY_TIMEOUT,
        .ltr_sleep_override = WMI_10X_PDEV_PARAM_LTR_SLEEP_OVERRIDE,
        .ltr_rx_override = WMI_10X_PDEV_PARAM_LTR_RX_OVERRIDE,
        .ltr_tx_activity_timeout = WMI_10X_PDEV_PARAM_LTR_TX_ACTIVITY_TIMEOUT,
        .l1ss_enable = WMI_10X_PDEV_PARAM_L1SS_ENABLE,
        .dsleep_enable = WMI_10X_PDEV_PARAM_DSLEEP_ENABLE,
        .pcielp_txbuf_flush = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_watermark = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_tmo_en = WMI_PDEV_PARAM_UNSUPPORTED,
        .pcielp_txbuf_tmo_value = WMI_PDEV_PARAM_UNSUPPORTED,
        .pdev_stats_update_period = WMI_10X_PDEV_PARAM_PDEV_STATS_UPDATE_PERIOD,
        .vdev_stats_update_period = WMI_10X_PDEV_PARAM_VDEV_STATS_UPDATE_PERIOD,
        .peer_stats_update_period = WMI_10X_PDEV_PARAM_PEER_STATS_UPDATE_PERIOD,
        .bcnflt_stats_update_period =
                                WMI_10X_PDEV_PARAM_BCNFLT_STATS_UPDATE_PERIOD,
        .pmf_qos = WMI_10X_PDEV_PARAM_PMF_QOS,
        .arp_ac_override = WMI_10X_PDEV_PARAM_ARPDHCP_AC_OVERRIDE,
        .dcs = WMI_10X_PDEV_PARAM_DCS,
        .ani_enable = WMI_10X_PDEV_PARAM_ANI_ENABLE,
        .ani_poll_period = WMI_10X_PDEV_PARAM_ANI_POLL_PERIOD,
        .ani_listen_period = WMI_10X_PDEV_PARAM_ANI_LISTEN_PERIOD,
        .ani_ofdm_level = WMI_10X_PDEV_PARAM_ANI_OFDM_LEVEL,
        .ani_cck_level = WMI_10X_PDEV_PARAM_ANI_CCK_LEVEL,
        .dyntxchain = WMI_10X_PDEV_PARAM_DYNTXCHAIN,
        .proxy_sta = WMI_PDEV_PARAM_UNSUPPORTED,
        .idle_ps_config = WMI_PDEV_PARAM_UNSUPPORTED,
        .power_gating_sleep = WMI_PDEV_PARAM_UNSUPPORTED,
        .fast_channel_reset = WMI_10X_PDEV_PARAM_FAST_CHANNEL_RESET,
        .burst_dur = WMI_10X_PDEV_PARAM_BURST_DUR,
        .burst_enable = WMI_10X_PDEV_PARAM_BURST_ENABLE,
        .cal_period = WMI_10X_PDEV_PARAM_CAL_PERIOD,
        .aggr_burst = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_decap_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .smart_antenna_default_antenna = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .igmpmld_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .antenna_gain = WMI_PDEV_PARAM_UNSUPPORTED,
        .rx_filter = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_to_ucast_tid = WMI_PDEV_PARAM_UNSUPPORTED,
        .proxy_sta_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_mode = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .remove_mcast2ucast_buffer = WMI_PDEV_PARAM_UNSUPPORTED,
        .peer_sta_ps_statechg_enable =
                                WMI_10X_PDEV_PARAM_PEER_STA_PS_STATECHG_ENABLE,
        .igmpmld_ac_override = WMI_PDEV_PARAM_UNSUPPORTED,
        .block_interbss = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_disable_reset_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_msdu_ttl_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_ppdu_duration_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .txbf_sound_period_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_promisc_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_burst_mode_cmdid = WMI_PDEV_PARAM_UNSUPPORTED,
        .en_stats = WMI_PDEV_PARAM_UNSUPPORTED,
        .mu_group_policy = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_detection = WMI_PDEV_PARAM_UNSUPPORTED,
        .noise_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .dpd_enable = WMI_PDEV_PARAM_UNSUPPORTED,
        .set_mcast_bcast_echo = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_strict_sch = WMI_PDEV_PARAM_UNSUPPORTED,
        .atf_sched_duration = WMI_PDEV_PARAM_UNSUPPORTED,
        .ant_plzn = WMI_PDEV_PARAM_UNSUPPORTED,
        .mgmt_retry_limit = WMI_PDEV_PARAM_UNSUPPORTED,
        .sensitivity_level = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_2g = WMI_PDEV_PARAM_UNSUPPORTED,
        .signed_txpower_5g = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_amsdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_per_tid_ampdu = WMI_PDEV_PARAM_UNSUPPORTED,
        .cca_threshold = WMI_PDEV_PARAM_UNSUPPORTED,
        .rts_fixed_rate = WMI_PDEV_PARAM_UNSUPPORTED,
        .pdev_reset = WMI_10X_PDEV_PARAM_PDEV_RESET,
        .wapi_mbssid_offset = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_srcaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
        .enable_btcoex = WMI_PDEV_PARAM_UNSUPPORTED,
};

/* firmware 10.2 specific mappings */
static struct wmi_cmd_map wmi_10_2_cmd_map = {
        .init_cmdid = WMI_10_2_INIT_CMDID,
        .start_scan_cmdid = WMI_10_2_START_SCAN_CMDID,
        .stop_scan_cmdid = WMI_10_2_STOP_SCAN_CMDID,
        .scan_chan_list_cmdid = WMI_10_2_SCAN_CHAN_LIST_CMDID,
        .scan_sch_prio_tbl_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_prob_req_oui_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_regdomain_cmdid = WMI_10_2_PDEV_SET_REGDOMAIN_CMDID,
        .pdev_set_channel_cmdid = WMI_10_2_PDEV_SET_CHANNEL_CMDID,
        .pdev_set_param_cmdid = WMI_10_2_PDEV_SET_PARAM_CMDID,
        .pdev_pktlog_enable_cmdid = WMI_10_2_PDEV_PKTLOG_ENABLE_CMDID,
        .pdev_pktlog_disable_cmdid = WMI_10_2_PDEV_PKTLOG_DISABLE_CMDID,
        .pdev_set_wmm_params_cmdid = WMI_10_2_PDEV_SET_WMM_PARAMS_CMDID,
        .pdev_set_ht_cap_ie_cmdid = WMI_10_2_PDEV_SET_HT_CAP_IE_CMDID,
        .pdev_set_vht_cap_ie_cmdid = WMI_10_2_PDEV_SET_VHT_CAP_IE_CMDID,
        .pdev_set_quiet_mode_cmdid = WMI_10_2_PDEV_SET_QUIET_MODE_CMDID,
        .pdev_green_ap_ps_enable_cmdid = WMI_10_2_PDEV_GREEN_AP_PS_ENABLE_CMDID,
        .pdev_get_tpc_config_cmdid = WMI_10_2_PDEV_GET_TPC_CONFIG_CMDID,
        .pdev_set_base_macaddr_cmdid = WMI_10_2_PDEV_SET_BASE_MACADDR_CMDID,
        .vdev_create_cmdid = WMI_10_2_VDEV_CREATE_CMDID,
        .vdev_delete_cmdid = WMI_10_2_VDEV_DELETE_CMDID,
        .vdev_start_request_cmdid = WMI_10_2_VDEV_START_REQUEST_CMDID,
        .vdev_restart_request_cmdid = WMI_10_2_VDEV_RESTART_REQUEST_CMDID,
        .vdev_up_cmdid = WMI_10_2_VDEV_UP_CMDID,
        .vdev_stop_cmdid = WMI_10_2_VDEV_STOP_CMDID,
        .vdev_down_cmdid = WMI_10_2_VDEV_DOWN_CMDID,
        .vdev_set_param_cmdid = WMI_10_2_VDEV_SET_PARAM_CMDID,
        .vdev_install_key_cmdid = WMI_10_2_VDEV_INSTALL_KEY_CMDID,
        .peer_create_cmdid = WMI_10_2_PEER_CREATE_CMDID,
        .peer_delete_cmdid = WMI_10_2_PEER_DELETE_CMDID,
        .peer_flush_tids_cmdid = WMI_10_2_PEER_FLUSH_TIDS_CMDID,
        .peer_set_param_cmdid = WMI_10_2_PEER_SET_PARAM_CMDID,
        .peer_assoc_cmdid = WMI_10_2_PEER_ASSOC_CMDID,
        .peer_add_wds_entry_cmdid = WMI_10_2_PEER_ADD_WDS_ENTRY_CMDID,
        .peer_remove_wds_entry_cmdid = WMI_10_2_PEER_REMOVE_WDS_ENTRY_CMDID,
        .peer_mcast_group_cmdid = WMI_10_2_PEER_MCAST_GROUP_CMDID,
        .bcn_tx_cmdid = WMI_10_2_BCN_TX_CMDID,
        .pdev_send_bcn_cmdid = WMI_10_2_PDEV_SEND_BCN_CMDID,
        .bcn_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .bcn_filter_rx_cmdid = WMI_10_2_BCN_FILTER_RX_CMDID,
        .prb_req_filter_rx_cmdid = WMI_10_2_PRB_REQ_FILTER_RX_CMDID,
        .mgmt_tx_cmdid = WMI_10_2_MGMT_TX_CMDID,
        .prb_tmpl_cmdid = WMI_CMD_UNSUPPORTED,
        .addba_clear_resp_cmdid = WMI_10_2_ADDBA_CLEAR_RESP_CMDID,
        .addba_send_cmdid = WMI_10_2_ADDBA_SEND_CMDID,
        .addba_status_cmdid = WMI_10_2_ADDBA_STATUS_CMDID,
        .delba_send_cmdid = WMI_10_2_DELBA_SEND_CMDID,
        .addba_set_resp_cmdid = WMI_10_2_ADDBA_SET_RESP_CMDID,
        .send_singleamsdu_cmdid = WMI_10_2_SEND_SINGLEAMSDU_CMDID,
        .sta_powersave_mode_cmdid = WMI_10_2_STA_POWERSAVE_MODE_CMDID,
        .sta_powersave_param_cmdid = WMI_10_2_STA_POWERSAVE_PARAM_CMDID,
        .sta_mimo_ps_mode_cmdid = WMI_10_2_STA_MIMO_PS_MODE_CMDID,
        .pdev_dfs_enable_cmdid = WMI_10_2_PDEV_DFS_ENABLE_CMDID,
        .pdev_dfs_disable_cmdid = WMI_10_2_PDEV_DFS_DISABLE_CMDID,
        .roam_scan_mode = WMI_10_2_ROAM_SCAN_MODE,
        .roam_scan_rssi_threshold = WMI_10_2_ROAM_SCAN_RSSI_THRESHOLD,
        .roam_scan_period = WMI_10_2_ROAM_SCAN_PERIOD,
        .roam_scan_rssi_change_threshold =
                                WMI_10_2_ROAM_SCAN_RSSI_CHANGE_THRESHOLD,
        .roam_ap_profile = WMI_10_2_ROAM_AP_PROFILE,
        .ofl_scan_add_ap_profile = WMI_10_2_OFL_SCAN_ADD_AP_PROFILE,
        .ofl_scan_remove_ap_profile = WMI_10_2_OFL_SCAN_REMOVE_AP_PROFILE,
        .ofl_scan_period = WMI_10_2_OFL_SCAN_PERIOD,
        .p2p_dev_set_device_info = WMI_10_2_P2P_DEV_SET_DEVICE_INFO,
        .p2p_dev_set_discoverability = WMI_10_2_P2P_DEV_SET_DISCOVERABILITY,
        .p2p_go_set_beacon_ie = WMI_10_2_P2P_GO_SET_BEACON_IE,
        .p2p_go_set_probe_resp_ie = WMI_10_2_P2P_GO_SET_PROBE_RESP_IE,
        .p2p_set_vendor_ie_data_cmdid = WMI_CMD_UNSUPPORTED,
        .ap_ps_peer_param_cmdid = WMI_10_2_AP_PS_PEER_PARAM_CMDID,
        .ap_ps_peer_uapsd_coex_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_rate_retry_sched_cmdid = WMI_10_2_PEER_RATE_RETRY_SCHED_CMDID,
        .wlan_profile_trigger_cmdid = WMI_10_2_WLAN_PROFILE_TRIGGER_CMDID,
        .wlan_profile_set_hist_intvl_cmdid =
                                WMI_10_2_WLAN_PROFILE_SET_HIST_INTVL_CMDID,
        .wlan_profile_get_profile_data_cmdid =
                                WMI_10_2_WLAN_PROFILE_GET_PROFILE_DATA_CMDID,
        .wlan_profile_enable_profile_id_cmdid =
                                WMI_10_2_WLAN_PROFILE_ENABLE_PROFILE_ID_CMDID,
        .wlan_profile_list_profile_id_cmdid =
                                WMI_10_2_WLAN_PROFILE_LIST_PROFILE_ID_CMDID,
        .pdev_suspend_cmdid = WMI_10_2_PDEV_SUSPEND_CMDID,
        .pdev_resume_cmdid = WMI_10_2_PDEV_RESUME_CMDID,
        .add_bcn_filter_cmdid = WMI_10_2_ADD_BCN_FILTER_CMDID,
        .rmv_bcn_filter_cmdid = WMI_10_2_RMV_BCN_FILTER_CMDID,
        .wow_add_wake_pattern_cmdid = WMI_10_2_WOW_ADD_WAKE_PATTERN_CMDID,
        .wow_del_wake_pattern_cmdid = WMI_10_2_WOW_DEL_WAKE_PATTERN_CMDID,
        .wow_enable_disable_wake_event_cmdid =
                                WMI_10_2_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID,
        .wow_enable_cmdid = WMI_10_2_WOW_ENABLE_CMDID,
        .wow_hostwakeup_from_sleep_cmdid =
                                WMI_10_2_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID,
        .rtt_measreq_cmdid = WMI_10_2_RTT_MEASREQ_CMDID,
        .rtt_tsf_cmdid = WMI_10_2_RTT_TSF_CMDID,
        .vdev_spectral_scan_configure_cmdid =
                                WMI_10_2_VDEV_SPECTRAL_SCAN_CONFIGURE_CMDID,
        .vdev_spectral_scan_enable_cmdid =
                                WMI_10_2_VDEV_SPECTRAL_SCAN_ENABLE_CMDID,
        .request_stats_cmdid = WMI_10_2_REQUEST_STATS_CMDID,
        .set_arp_ns_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .network_list_offload_config_cmdid = WMI_CMD_UNSUPPORTED,
        .gtk_offload_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .csa_offload_chanswitch_cmdid = WMI_CMD_UNSUPPORTED,
        .chatter_set_mode_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_addba_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_tid_delba_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_dtim_ps_method_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_uapsd_auto_trig_cmdid = WMI_CMD_UNSUPPORTED,
        .sta_keepalive_cmd = WMI_CMD_UNSUPPORTED,
        .echo_cmdid = WMI_10_2_ECHO_CMDID,
        .pdev_utf_cmdid = WMI_10_2_PDEV_UTF_CMDID,
        .dbglog_cfg_cmdid = WMI_10_2_DBGLOG_CFG_CMDID,
        .pdev_qvit_cmdid = WMI_10_2_PDEV_QVIT_CMDID,
        .pdev_ftm_intg_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_set_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_get_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .force_fw_hang_cmdid = WMI_CMD_UNSUPPORTED,
        .gpio_config_cmdid = WMI_10_2_GPIO_CONFIG_CMDID,
        .gpio_output_cmdid = WMI_10_2_GPIO_OUTPUT_CMDID,
        .pdev_get_temperature_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_enable_adaptive_cca_cmdid = WMI_CMD_UNSUPPORTED,
        .scan_update_request_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_standby_response_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_resume_response_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_add_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_evict_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_restore_peer_cmdid = WMI_CMD_UNSUPPORTED,
        .wlan_peer_caching_print_all_peers_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_update_wds_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_add_proxy_sta_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .rtt_keepalive_cmdid = WMI_CMD_UNSUPPORTED,
        .oem_req_cmdid = WMI_CMD_UNSUPPORTED,
        .nan_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_ratemask_cmdid = WMI_CMD_UNSUPPORTED,
        .qboost_cfg_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_enable_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_smart_ant_set_rx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_tx_antenna_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_train_info_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_smart_ant_set_node_config_ops_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_antenna_switch_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_ctl_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_set_mimogain_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_ratepwr_chainmsk_table_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_fips_cmdid = WMI_CMD_UNSUPPORTED,
        .tt_set_conf_cmdid = WMI_CMD_UNSUPPORTED,
        .fwtest_cmdid = WMI_CMD_UNSUPPORTED,
        .vdev_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .peer_atf_request_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_cck_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_ani_ofdm_config_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_reserve_ast_entry_cmdid = WMI_CMD_UNSUPPORTED,
        .pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
        .radar_found_cmdid = WMI_CMD_UNSUPPORTED,
};

static struct wmi_pdev_param_map wmi_10_4_pdev_param_map = {
        .tx_chain_mask = WMI_10_4_PDEV_PARAM_TX_CHAIN_MASK,
        .rx_chain_mask = WMI_10_4_PDEV_PARAM_RX_CHAIN_MASK,
        .txpower_limit2g = WMI_10_4_PDEV_PARAM_TXPOWER_LIMIT2G,
        .txpower_limit5g = WMI_10_4_PDEV_PARAM_TXPOWER_LIMIT5G,
        .txpower_scale = WMI_10_4_PDEV_PARAM_TXPOWER_SCALE,
        .beacon_gen_mode = WMI_10_4_PDEV_PARAM_BEACON_GEN_MODE,
        .beacon_tx_mode = WMI_10_4_PDEV_PARAM_BEACON_TX_MODE,
        .resmgr_offchan_mode = WMI_10_4_PDEV_PARAM_RESMGR_OFFCHAN_MODE,
        .protection_mode = WMI_10_4_PDEV_PARAM_PROTECTION_MODE,
        .dynamic_bw = WMI_10_4_PDEV_PARAM_DYNAMIC_BW,
        .non_agg_sw_retry_th = WMI_10_4_PDEV_PARAM_NON_AGG_SW_RETRY_TH,
        .agg_sw_retry_th = WMI_10_4_PDEV_PARAM_AGG_SW_RETRY_TH,
        .sta_kickout_th = WMI_10_4_PDEV_PARAM_STA_KICKOUT_TH,
        .ac_aggrsize_scaling = WMI_10_4_PDEV_PARAM_AC_AGGRSIZE_SCALING,
        .ltr_enable = WMI_10_4_PDEV_PARAM_LTR_ENABLE,
        .ltr_ac_latency_be = WMI_10_4_PDEV_PARAM_LTR_AC_LATENCY_BE,
        .ltr_ac_latency_bk = WMI_10_4_PDEV_PARAM_LTR_AC_LATENCY_BK,
        .ltr_ac_latency_vi = WMI_10_4_PDEV_PARAM_LTR_AC_LATENCY_VI,
        .ltr_ac_latency_vo = WMI_10_4_PDEV_PARAM_LTR_AC_LATENCY_VO,
        .ltr_ac_latency_timeout = WMI_10_4_PDEV_PARAM_LTR_AC_LATENCY_TIMEOUT,
        .ltr_sleep_override = WMI_10_4_PDEV_PARAM_LTR_SLEEP_OVERRIDE,
        .ltr_rx_override = WMI_10_4_PDEV_PARAM_LTR_RX_OVERRIDE,
        .ltr_tx_activity_timeout = WMI_10_4_PDEV_PARAM_LTR_TX_ACTIVITY_TIMEOUT,
        .l1ss_enable = WMI_10_4_PDEV_PARAM_L1SS_ENABLE,
        .dsleep_enable = WMI_10_4_PDEV_PARAM_DSLEEP_ENABLE,
        .pcielp_txbuf_flush = WMI_10_4_PDEV_PARAM_PCIELP_TXBUF_FLUSH,
        .pcielp_txbuf_watermark = WMI_10_4_PDEV_PARAM_PCIELP_TXBUF_WATERMARK,
        .pcielp_txbuf_tmo_en = WMI_10_4_PDEV_PARAM_PCIELP_TXBUF_TMO_EN,
        .pcielp_txbuf_tmo_value = WMI_10_4_PDEV_PARAM_PCIELP_TXBUF_TMO_VALUE,
        .pdev_stats_update_period =
                        WMI_10_4_PDEV_PARAM_PDEV_STATS_UPDATE_PERIOD,
        .vdev_stats_update_period =
                        WMI_10_4_PDEV_PARAM_VDEV_STATS_UPDATE_PERIOD,
        .peer_stats_update_period =
                        WMI_10_4_PDEV_PARAM_PEER_STATS_UPDATE_PERIOD,
        .bcnflt_stats_update_period =
                        WMI_10_4_PDEV_PARAM_BCNFLT_STATS_UPDATE_PERIOD,
        .pmf_qos = WMI_10_4_PDEV_PARAM_PMF_QOS,
        .arp_ac_override = WMI_10_4_PDEV_PARAM_ARP_AC_OVERRIDE,
        .dcs = WMI_10_4_PDEV_PARAM_DCS,
        .ani_enable = WMI_10_4_PDEV_PARAM_ANI_ENABLE,
        .ani_poll_period = WMI_10_4_PDEV_PARAM_ANI_POLL_PERIOD,
        .ani_listen_period = WMI_10_4_PDEV_PARAM_ANI_LISTEN_PERIOD,
        .ani_ofdm_level = WMI_10_4_PDEV_PARAM_ANI_OFDM_LEVEL,
        .ani_cck_level = WMI_10_4_PDEV_PARAM_ANI_CCK_LEVEL,
        .dyntxchain = WMI_10_4_PDEV_PARAM_DYNTXCHAIN,
        .proxy_sta = WMI_10_4_PDEV_PARAM_PROXY_STA,
        .idle_ps_config = WMI_10_4_PDEV_PARAM_IDLE_PS_CONFIG,
        .power_gating_sleep = WMI_10_4_PDEV_PARAM_POWER_GATING_SLEEP,
        .fast_channel_reset = WMI_10_4_PDEV_PARAM_FAST_CHANNEL_RESET,
        .burst_dur = WMI_10_4_PDEV_PARAM_BURST_DUR,
        .burst_enable = WMI_10_4_PDEV_PARAM_BURST_ENABLE,
        .cal_period = WMI_10_4_PDEV_PARAM_CAL_PERIOD,
        .aggr_burst = WMI_10_4_PDEV_PARAM_AGGR_BURST,
        .rx_decap_mode = WMI_10_4_PDEV_PARAM_RX_DECAP_MODE,
        .smart_antenna_default_antenna =
                        WMI_10_4_PDEV_PARAM_SMART_ANTENNA_DEFAULT_ANTENNA,
        .igmpmld_override = WMI_10_4_PDEV_PARAM_IGMPMLD_OVERRIDE,
        .igmpmld_tid = WMI_10_4_PDEV_PARAM_IGMPMLD_TID,
        .antenna_gain = WMI_10_4_PDEV_PARAM_ANTENNA_GAIN,
        .rx_filter = WMI_10_4_PDEV_PARAM_RX_FILTER,
        .set_mcast_to_ucast_tid = WMI_10_4_PDEV_SET_MCAST_TO_UCAST_TID,
        .proxy_sta_mode = WMI_10_4_PDEV_PARAM_PROXY_STA_MODE,
        .set_mcast2ucast_mode = WMI_10_4_PDEV_PARAM_SET_MCAST2UCAST_MODE,
        .set_mcast2ucast_buffer = WMI_10_4_PDEV_PARAM_SET_MCAST2UCAST_BUFFER,
        .remove_mcast2ucast_buffer =
                        WMI_10_4_PDEV_PARAM_REMOVE_MCAST2UCAST_BUFFER,
        .peer_sta_ps_statechg_enable =
                        WMI_10_4_PDEV_PEER_STA_PS_STATECHG_ENABLE,
        .igmpmld_ac_override = WMI_10_4_PDEV_PARAM_IGMPMLD_AC_OVERRIDE,
        .block_interbss = WMI_10_4_PDEV_PARAM_BLOCK_INTERBSS,
        .set_disable_reset_cmdid = WMI_10_4_PDEV_PARAM_SET_DISABLE_RESET_CMDID,
        .set_msdu_ttl_cmdid = WMI_10_4_PDEV_PARAM_SET_MSDU_TTL_CMDID,
        .set_ppdu_duration_cmdid = WMI_10_4_PDEV_PARAM_SET_PPDU_DURATION_CMDID,
        .txbf_sound_period_cmdid = WMI_10_4_PDEV_PARAM_TXBF_SOUND_PERIOD_CMDID,
        .set_promisc_mode_cmdid = WMI_10_4_PDEV_PARAM_SET_PROMISC_MODE_CMDID,
        .set_burst_mode_cmdid = WMI_10_4_PDEV_PARAM_SET_BURST_MODE_CMDID,
        .en_stats = WMI_10_4_PDEV_PARAM_EN_STATS,
        .mu_group_policy = WMI_10_4_PDEV_PARAM_MU_GROUP_POLICY,
        .noise_detection = WMI_10_4_PDEV_PARAM_NOISE_DETECTION,
        .noise_threshold = WMI_10_4_PDEV_PARAM_NOISE_THRESHOLD,
        .dpd_enable = WMI_10_4_PDEV_PARAM_DPD_ENABLE,
        .set_mcast_bcast_echo = WMI_10_4_PDEV_PARAM_SET_MCAST_BCAST_ECHO,
        .atf_strict_sch = WMI_10_4_PDEV_PARAM_ATF_STRICT_SCH,
        .atf_sched_duration = WMI_10_4_PDEV_PARAM_ATF_SCHED_DURATION,
        .ant_plzn = WMI_10_4_PDEV_PARAM_ANT_PLZN,
        .mgmt_retry_limit = WMI_10_4_PDEV_PARAM_MGMT_RETRY_LIMIT,
        .sensitivity_level = WMI_10_4_PDEV_PARAM_SENSITIVITY_LEVEL,
        .signed_txpower_2g = WMI_10_4_PDEV_PARAM_SIGNED_TXPOWER_2G,
        .signed_txpower_5g = WMI_10_4_PDEV_PARAM_SIGNED_TXPOWER_5G,
        .enable_per_tid_amsdu = WMI_10_4_PDEV_PARAM_ENABLE_PER_TID_AMSDU,
        .enable_per_tid_ampdu = WMI_10_4_PDEV_PARAM_ENABLE_PER_TID_AMPDU,
        .cca_threshold = WMI_10_4_PDEV_PARAM_CCA_THRESHOLD,
        .rts_fixed_rate = WMI_10_4_PDEV_PARAM_RTS_FIXED_RATE,
        .pdev_reset = WMI_10_4_PDEV_PARAM_PDEV_RESET,
        .wapi_mbssid_offset = WMI_10_4_PDEV_PARAM_WAPI_MBSSID_OFFSET,
        .arp_srcaddr = WMI_10_4_PDEV_PARAM_ARP_SRCADDR,
        .arp_dstaddr = WMI_10_4_PDEV_PARAM_ARP_DSTADDR,
        .enable_btcoex = WMI_10_4_PDEV_PARAM_ENABLE_BTCOEX,
};

static const u8 wmi_key_cipher_suites[] = {
        [WMI_CIPHER_NONE] = WMI_CIPHER_NONE,
        [WMI_CIPHER_WEP] = WMI_CIPHER_WEP,
        [WMI_CIPHER_TKIP] = WMI_CIPHER_TKIP,
        [WMI_CIPHER_AES_OCB] = WMI_CIPHER_AES_OCB,
        [WMI_CIPHER_AES_CCM] = WMI_CIPHER_AES_CCM,
        [WMI_CIPHER_WAPI] = WMI_CIPHER_WAPI,
        [WMI_CIPHER_CKIP] = WMI_CIPHER_CKIP,
        [WMI_CIPHER_AES_CMAC] = WMI_CIPHER_AES_CMAC,
        [WMI_CIPHER_AES_GCM] = WMI_CIPHER_AES_GCM,
};

static const u8 wmi_tlv_key_cipher_suites[] = {
        [WMI_CIPHER_NONE] = WMI_TLV_CIPHER_NONE,
        [WMI_CIPHER_WEP] = WMI_TLV_CIPHER_WEP,
        [WMI_CIPHER_TKIP] = WMI_TLV_CIPHER_TKIP,
        [WMI_CIPHER_AES_OCB] = WMI_TLV_CIPHER_AES_OCB,
        [WMI_CIPHER_AES_CCM] = WMI_TLV_CIPHER_AES_CCM,
        [WMI_CIPHER_WAPI] = WMI_TLV_CIPHER_WAPI,
        [WMI_CIPHER_CKIP] = WMI_TLV_CIPHER_CKIP,
        [WMI_CIPHER_AES_CMAC] = WMI_TLV_CIPHER_AES_CMAC,
        [WMI_CIPHER_AES_GCM] = WMI_TLV_CIPHER_AES_GCM,
};

static const struct wmi_peer_flags_map wmi_peer_flags_map = {
        .auth = WMI_PEER_AUTH,
        .qos = WMI_PEER_QOS,
        .need_ptk_4_way = WMI_PEER_NEED_PTK_4_WAY,
        .need_gtk_2_way = WMI_PEER_NEED_GTK_2_WAY,
        .apsd = WMI_PEER_APSD,
        .ht = WMI_PEER_HT,
        .bw40 = WMI_PEER_40MHZ,
        .stbc = WMI_PEER_STBC,
        .ldbc = WMI_PEER_LDPC,
        .dyn_mimops = WMI_PEER_DYN_MIMOPS,
        .static_mimops = WMI_PEER_STATIC_MIMOPS,
        .spatial_mux = WMI_PEER_SPATIAL_MUX,
        .vht = WMI_PEER_VHT,
        .bw80 = WMI_PEER_80MHZ,
        .vht_2g = WMI_PEER_VHT_2G,
        .pmf = WMI_PEER_PMF,
        .bw160 = WMI_PEER_160MHZ,
};

static const struct wmi_peer_flags_map wmi_10x_peer_flags_map = {
        .auth = WMI_10X_PEER_AUTH,
        .qos = WMI_10X_PEER_QOS,
        .need_ptk_4_way = WMI_10X_PEER_NEED_PTK_4_WAY,
        .need_gtk_2_way = WMI_10X_PEER_NEED_GTK_2_WAY,
        .apsd = WMI_10X_PEER_APSD,
        .ht = WMI_10X_PEER_HT,
        .bw40 = WMI_10X_PEER_40MHZ,
        .stbc = WMI_10X_PEER_STBC,
        .ldbc = WMI_10X_PEER_LDPC,
        .dyn_mimops = WMI_10X_PEER_DYN_MIMOPS,
        .static_mimops = WMI_10X_PEER_STATIC_MIMOPS,
        .spatial_mux = WMI_10X_PEER_SPATIAL_MUX,
        .vht = WMI_10X_PEER_VHT,
        .bw80 = WMI_10X_PEER_80MHZ,
        .bw160 = WMI_10X_PEER_160MHZ,
};

static const struct wmi_peer_flags_map wmi_10_2_peer_flags_map = {
        .auth = WMI_10_2_PEER_AUTH,
        .qos = WMI_10_2_PEER_QOS,
        .need_ptk_4_way = WMI_10_2_PEER_NEED_PTK_4_WAY,
        .need_gtk_2_way = WMI_10_2_PEER_NEED_GTK_2_WAY,
        .apsd = WMI_10_2_PEER_APSD,
        .ht = WMI_10_2_PEER_HT,
        .bw40 = WMI_10_2_PEER_40MHZ,
        .stbc = WMI_10_2_PEER_STBC,
        .ldbc = WMI_10_2_PEER_LDPC,
        .dyn_mimops = WMI_10_2_PEER_DYN_MIMOPS,
        .static_mimops = WMI_10_2_PEER_STATIC_MIMOPS,
        .spatial_mux = WMI_10_2_PEER_SPATIAL_MUX,
        .vht = WMI_10_2_PEER_VHT,
        .bw80 = WMI_10_2_PEER_80MHZ,
        .vht_2g = WMI_10_2_PEER_VHT_2G,
        .pmf = WMI_10_2_PEER_PMF,
        .bw160 = WMI_10_2_PEER_160MHZ,
};

void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch,
                                const struct wmi_channel_arg *arg)
{
        u32 flags = 0;
        struct ieee80211_channel *chan = NULL;

        memset(ch, 0, sizeof(*ch));

        if (arg->passive)
                flags |= WMI_CHAN_FLAG_PASSIVE;
        if (arg->allow_ibss)
                flags |= WMI_CHAN_FLAG_ADHOC_ALLOWED;
        if (arg->allow_ht)
                flags |= WMI_CHAN_FLAG_ALLOW_HT;
        if (arg->allow_vht)
                flags |= WMI_CHAN_FLAG_ALLOW_VHT;
        if (arg->ht40plus)
                flags |= WMI_CHAN_FLAG_HT40_PLUS;
        if (arg->chan_radar)
                flags |= WMI_CHAN_FLAG_DFS;

        ch->band_center_freq2 = 0;
        ch->mhz = __cpu_to_le32(arg->freq);
        ch->band_center_freq1 = __cpu_to_le32(arg->band_center_freq1);
        if (arg->mode == MODE_11AC_VHT80_80) {
                ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq2);
                chan = ieee80211_get_channel(ar->hw->wiphy,
                                             arg->band_center_freq2 - 10);
        }

        if (arg->mode == MODE_11AC_VHT160) {
                u32 band_center_freq1;
                u32 band_center_freq2;

                if (arg->freq > arg->band_center_freq1) {
                        band_center_freq1 = arg->band_center_freq1 + 40;
                        band_center_freq2 = arg->band_center_freq1 - 40;
                } else {
                        band_center_freq1 = arg->band_center_freq1 - 40;
                        band_center_freq2 = arg->band_center_freq1 + 40;
                }

                ch->band_center_freq1 =
                                        __cpu_to_le32(band_center_freq1);
                /* Minus 10 to get a defined 5G channel frequency*/
                chan = ieee80211_get_channel(ar->hw->wiphy,
                                             band_center_freq2 - 10);
                /* The center frequency of the entire VHT160 */
                ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq1);
        }

        if (chan && chan->flags & IEEE80211_CHAN_RADAR)
                flags |= WMI_CHAN_FLAG_DFS_CFREQ2;

        ch->min_power = arg->min_power;
        ch->max_power = arg->max_power;
        ch->reg_power = arg->max_reg_power;
        ch->antenna_max = arg->max_antenna_gain;
        ch->max_tx_power = arg->max_power;

        /* mode & flags share storage */
        ch->mode = arg->mode;
        ch->flags |= __cpu_to_le32(flags);
}

int ath10k_wmi_wait_for_service_ready(struct ath10k *ar)
{
        unsigned long time_left;

        time_left = wait_for_completion_timeout(&ar->wmi.service_ready,
                                                WMI_SERVICE_READY_TIMEOUT_HZ);
        if (!time_left)
                return -ETIMEDOUT;
        return 0;
}

int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar)
{
        unsigned long time_left;

        time_left = wait_for_completion_timeout(&ar->wmi.unified_ready,
                                                WMI_UNIFIED_READY_TIMEOUT_HZ);
        if (!time_left)
                return -ETIMEDOUT;
        return 0;
}

struct sk_buff *ath10k_wmi_alloc_skb(struct ath10k *ar, u32 len)
{
        struct sk_buff *skb;
        u32 round_len = roundup(len, 4);

        skb = ath10k_htc_alloc_skb(ar, WMI_SKB_HEADROOM + round_len);
        if (!skb)
                return NULL;

        skb_reserve(skb, WMI_SKB_HEADROOM);
        if (!IS_ALIGNED((unsigned long)skb->data, 4))
                ath10k_warn(ar, "Unaligned WMI skb\n");

        skb_put(skb, round_len);
        memset(skb->data, 0, round_len);

        return skb;
}

static void ath10k_wmi_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
        dev_kfree_skb(skb);
}

int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb,
                               u32 cmd_id)
{
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb);
        struct wmi_cmd_hdr *cmd_hdr;
        int ret;
        u32 cmd = 0;

        if (skb_push(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
                return -ENOMEM;

        cmd |= SM(cmd_id, WMI_CMD_HDR_CMD_ID);

        cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
        cmd_hdr->cmd_id = __cpu_to_le32(cmd);

        memset(skb_cb, 0, sizeof(*skb_cb));
        trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len);
        ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb);

        if (ret)
                goto err_pull;

        return 0;

err_pull:
        skb_pull(skb, sizeof(struct wmi_cmd_hdr));
        return ret;
}

static void ath10k_wmi_tx_beacon_nowait(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct ath10k_skb_cb *cb;
        struct sk_buff *bcn;
        bool dtim_zero;
        bool deliver_cab;
        int ret;

        spin_lock_bh(&ar->data_lock);

        bcn = arvif->beacon;

        if (!bcn)
                goto unlock;

        cb = ATH10K_SKB_CB(bcn);

        switch (arvif->beacon_state) {
        case ATH10K_BEACON_SENDING:
        case ATH10K_BEACON_SENT:
                break;
        case ATH10K_BEACON_SCHEDULED:
                arvif->beacon_state = ATH10K_BEACON_SENDING;
                spin_unlock_bh(&ar->data_lock);

                dtim_zero = !!(cb->flags & ATH10K_SKB_F_DTIM_ZERO);
                deliver_cab = !!(cb->flags & ATH10K_SKB_F_DELIVER_CAB);
                ret = ath10k_wmi_beacon_send_ref_nowait(arvif->ar,
                                                        arvif->vdev_id,
                                                        bcn->data, bcn->len,
                                                        cb->paddr,
                                                        dtim_zero,
                                                        deliver_cab);

                spin_lock_bh(&ar->data_lock);

                if (ret == 0)
                        arvif->beacon_state = ATH10K_BEACON_SENT;
                else
                        arvif->beacon_state = ATH10K_BEACON_SCHEDULED;
        }

unlock:
        spin_unlock_bh(&ar->data_lock);
}

static void ath10k_wmi_tx_beacons_iter(void *data, u8 *mac,
                                       struct ieee80211_vif *vif)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        ath10k_wmi_tx_beacon_nowait(arvif);
}

static void ath10k_wmi_tx_beacons_nowait(struct ath10k *ar)
{
        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_NORMAL_FLAGS,
                                                   ath10k_wmi_tx_beacons_iter,
                                                   NULL);
}

static void ath10k_wmi_op_ep_tx_credits(struct ath10k *ar)
{
        /* try to send pending beacons first. they take priority */
        ath10k_wmi_tx_beacons_nowait(ar);

        wake_up(&ar->wmi.tx_credits_wq);
}

int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id)
{
        int ret = -EOPNOTSUPP;

        might_sleep();

        if (cmd_id == WMI_CMD_UNSUPPORTED) {
                ath10k_warn(ar, "wmi command %d is not supported by firmware\n",
                            cmd_id);
                return ret;
        }

        wait_event_timeout(ar->wmi.tx_credits_wq, ({
                /* try to send pending beacons first. they take priority */
                ath10k_wmi_tx_beacons_nowait(ar);

                ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id);

                if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
                        ret = -ESHUTDOWN;

                (ret != -EAGAIN);
        }), 3 * HZ);

        if (ret)
                dev_kfree_skb_any(skb);

        if (ret == -EAGAIN) {
                ath10k_warn(ar, "wmi command %d timeout, restarting hardware\n",
                            cmd_id);
                ath10k_core_start_recovery(ar);
        }

        return ret;
}

static struct sk_buff *
ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
{
        struct ath10k_skb_cb *cb = ATH10K_SKB_CB(msdu);
        struct ath10k_vif *arvif;
        struct wmi_mgmt_tx_cmd *cmd;
        struct ieee80211_hdr *hdr;
        struct sk_buff *skb;
        int len;
        u32 vdev_id;
        u32 buf_len = msdu->len;
        u16 fc;
        const u8 *peer_addr;

        hdr = (struct ieee80211_hdr *)msdu->data;
        fc = le16_to_cpu(hdr->frame_control);

        if (cb->vif) {
                arvif = (void *)cb->vif->drv_priv;
                vdev_id = arvif->vdev_id;
        } else {
                vdev_id = 0;
        }

        if (WARN_ON_ONCE(!ieee80211_is_mgmt(hdr->frame_control)))
                return ERR_PTR(-EINVAL);

        len = sizeof(cmd->hdr) + msdu->len;

        if ((ieee80211_is_action(hdr->frame_control) ||
             ieee80211_is_deauth(hdr->frame_control) ||
             ieee80211_is_disassoc(hdr->frame_control)) &&
             ieee80211_has_protected(hdr->frame_control)) {
                peer_addr = hdr->addr1;
                if (is_multicast_ether_addr(peer_addr)) {
                        len += sizeof(struct ieee80211_mmie_16);
                        buf_len += sizeof(struct ieee80211_mmie_16);
                } else {
                        if (cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP ||
                            cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP_256) {
                                len += IEEE80211_GCMP_MIC_LEN;
                                buf_len += IEEE80211_GCMP_MIC_LEN;
                        } else {
                                len += IEEE80211_CCMP_MIC_LEN;
                                buf_len += IEEE80211_CCMP_MIC_LEN;
                        }
                }
        }

        len = round_up(len, 4);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_mgmt_tx_cmd *)skb->data;

        cmd->hdr.vdev_id = __cpu_to_le32(vdev_id);
        cmd->hdr.tx_rate = 0;
        cmd->hdr.tx_power = 0;
        cmd->hdr.buf_len = __cpu_to_le32(buf_len);

        ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr));
        memcpy(cmd->buf, msdu->data, msdu->len);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n",
                   msdu, skb->len, fc & IEEE80211_FCTL_FTYPE,
                   fc & IEEE80211_FCTL_STYPE);
        trace_ath10k_tx_hdr(ar, skb->data, skb->len);
        trace_ath10k_tx_payload(ar, skb->data, skb->len);

        return skb;
}

static void ath10k_wmi_event_scan_started(struct ath10k *ar)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ath10k_warn(ar, "received scan started event in an invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_STARTING:
                ar->scan.state = ATH10K_SCAN_RUNNING;

                if (ar->scan.is_roc)
                        ieee80211_ready_on_channel(ar->hw);

                complete(&ar->scan.started);
                break;
        }
}

static void ath10k_wmi_event_scan_start_failed(struct ath10k *ar)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ath10k_warn(ar, "received scan start failed event in an invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_STARTING:
                complete(&ar->scan.started);
                __ath10k_scan_finish(ar);
                break;
        }
}

static void ath10k_wmi_event_scan_completed(struct ath10k *ar)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_STARTING:
                /* One suspected reason scan can be completed while starting is
                 * if firmware fails to deliver all scan events to the host,
                 * e.g. when transport pipe is full. This has been observed
                 * with spectral scan phyerr events starving wmi transport
                 * pipe. In such case the "scan completed" event should be (and
                 * is) ignored by the host as it may be just firmware's scan
                 * state machine recovering.
                 */
                ath10k_warn(ar, "received scan completed event in an invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                __ath10k_scan_finish(ar);
                break;
        }
}

static void ath10k_wmi_event_scan_bss_chan(struct ath10k *ar)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_STARTING:
                ath10k_warn(ar, "received scan bss chan event in an invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ar->scan_channel = NULL;
                break;
        }
}

static void ath10k_wmi_event_scan_foreign_chan(struct ath10k *ar, u32 freq)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_STARTING:
                ath10k_warn(ar, "received scan foreign chan event in an invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ar->scan_channel = ieee80211_get_channel(ar->hw->wiphy, freq);

                if (ar->scan.is_roc && ar->scan.roc_freq == freq)
                        complete(&ar->scan.on_channel);
                break;
        }
}

static const char *
ath10k_wmi_event_scan_type_str(enum wmi_scan_event_type type,
                               enum wmi_scan_completion_reason reason)
{
        switch (type) {
        case WMI_SCAN_EVENT_STARTED:
                return "started";
        case WMI_SCAN_EVENT_COMPLETED:
                switch (reason) {
                case WMI_SCAN_REASON_COMPLETED:
                        return "completed";
                case WMI_SCAN_REASON_CANCELLED:
                        return "completed [cancelled]";
                case WMI_SCAN_REASON_PREEMPTED:
                        return "completed [preempted]";
                case WMI_SCAN_REASON_TIMEDOUT:
                        return "completed [timedout]";
                case WMI_SCAN_REASON_INTERNAL_FAILURE:
                        return "completed [internal err]";
                case WMI_SCAN_REASON_MAX:
                        break;
                }
                return "completed [unknown]";
        case WMI_SCAN_EVENT_BSS_CHANNEL:
                return "bss channel";
        case WMI_SCAN_EVENT_FOREIGN_CHANNEL:
                return "foreign channel";
        case WMI_SCAN_EVENT_DEQUEUED:
                return "dequeued";
        case WMI_SCAN_EVENT_PREEMPTED:
                return "preempted";
        case WMI_SCAN_EVENT_START_FAILED:
                return "start failed";
        case WMI_SCAN_EVENT_RESTARTED:
                return "restarted";
        case WMI_SCAN_EVENT_FOREIGN_CHANNEL_EXIT:
                return "foreign channel exit";
        default:
                return "unknown";
        }
}

static int ath10k_wmi_op_pull_scan_ev(struct ath10k *ar, struct sk_buff *skb,
                                      struct wmi_scan_ev_arg *arg)
{
        struct wmi_scan_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->event_type = ev->event_type;
        arg->reason = ev->reason;
        arg->channel_freq = ev->channel_freq;
        arg->scan_req_id = ev->scan_req_id;
        arg->scan_id = ev->scan_id;
        arg->vdev_id = ev->vdev_id;

        return 0;
}

int ath10k_wmi_event_scan(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_scan_ev_arg arg = {};
        enum wmi_scan_event_type event_type;
        enum wmi_scan_completion_reason reason;
        u32 freq;
        u32 req_id;
        u32 scan_id;
        u32 vdev_id;
        int ret;

        ret = ath10k_wmi_pull_scan(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse scan event: %d\n", ret);
                return ret;
        }

        event_type = __le32_to_cpu(arg.event_type);
        reason = __le32_to_cpu(arg.reason);
        freq = __le32_to_cpu(arg.channel_freq);
        req_id = __le32_to_cpu(arg.scan_req_id);
        scan_id = __le32_to_cpu(arg.scan_id);
        vdev_id = __le32_to_cpu(arg.vdev_id);

        spin_lock_bh(&ar->data_lock);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "scan event %s type %d reason %d freq %d req_id %d scan_id %d vdev_id %d state %s (%d)\n",
                   ath10k_wmi_event_scan_type_str(event_type, reason),
                   event_type, reason, freq, req_id, scan_id, vdev_id,
                   ath10k_scan_state_str(ar->scan.state), ar->scan.state);

        switch (event_type) {
        case WMI_SCAN_EVENT_STARTED:
                ath10k_wmi_event_scan_started(ar);
                break;
        case WMI_SCAN_EVENT_COMPLETED:
                ath10k_wmi_event_scan_completed(ar);
                break;
        case WMI_SCAN_EVENT_BSS_CHANNEL:
                ath10k_wmi_event_scan_bss_chan(ar);
                break;
        case WMI_SCAN_EVENT_FOREIGN_CHANNEL:
                ath10k_wmi_event_scan_foreign_chan(ar, freq);
                break;
        case WMI_SCAN_EVENT_START_FAILED:
                ath10k_warn(ar, "received scan start failure event\n");
                ath10k_wmi_event_scan_start_failed(ar);
                break;
        case WMI_SCAN_EVENT_DEQUEUED:
        case WMI_SCAN_EVENT_PREEMPTED:
        case WMI_SCAN_EVENT_RESTARTED:
        case WMI_SCAN_EVENT_FOREIGN_CHANNEL_EXIT:
        default:
                break;
        }

        spin_unlock_bh(&ar->data_lock);
        return 0;
}

/* If keys are configured, HW decrypts all frames
 * with protected bit set. Mark such frames as decrypted.
 */
static void ath10k_wmi_handle_wep_reauth(struct ath10k *ar,
                                         struct sk_buff *skb,
                                         struct ieee80211_rx_status *status)
{
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        unsigned int hdrlen;
        bool peer_key;
        u8 *addr, keyidx;

        if (!ieee80211_is_auth(hdr->frame_control) ||
            !ieee80211_has_protected(hdr->frame_control))
                return;

        hdrlen = ieee80211_hdrlen(hdr->frame_control);
        if (skb->len < (hdrlen + IEEE80211_WEP_IV_LEN))
                return;

        keyidx = skb->data[hdrlen + (IEEE80211_WEP_IV_LEN - 1)] >> WEP_KEYID_SHIFT;
        addr = ieee80211_get_SA(hdr);

        spin_lock_bh(&ar->data_lock);
        peer_key = ath10k_mac_is_peer_wep_key_set(ar, addr, keyidx);
        spin_unlock_bh(&ar->data_lock);

        if (peer_key) {
                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac wep key present for peer %pM\n", addr);
                status->flag |= RX_FLAG_DECRYPTED;
        }
}

static int ath10k_wmi_op_pull_mgmt_rx_ev(struct ath10k *ar, struct sk_buff *skb,
                                         struct wmi_mgmt_rx_ev_arg *arg)
{
        struct wmi_mgmt_rx_event_v1 *ev_v1;
        struct wmi_mgmt_rx_event_v2 *ev_v2;
        struct wmi_mgmt_rx_hdr_v1 *ev_hdr;
        struct wmi_mgmt_rx_ext_info *ext_info;
        size_t pull_len;
        u32 msdu_len;
        u32 len;

        if (test_bit(ATH10K_FW_FEATURE_EXT_WMI_MGMT_RX,
                     ar->running_fw->fw_file.fw_features)) {
                ev_v2 = (struct wmi_mgmt_rx_event_v2 *)skb->data;
                ev_hdr = &ev_v2->hdr.v1;
                pull_len = sizeof(*ev_v2);
        } else {
                ev_v1 = (struct wmi_mgmt_rx_event_v1 *)skb->data;
                ev_hdr = &ev_v1->hdr;
                pull_len = sizeof(*ev_v1);
        }

        if (skb->len < pull_len)
                return -EPROTO;

        skb_pull(skb, pull_len);
        arg->channel = ev_hdr->channel;
        arg->buf_len = ev_hdr->buf_len;
        arg->status = ev_hdr->status;
        arg->snr = ev_hdr->snr;
        arg->phy_mode = ev_hdr->phy_mode;
        arg->rate = ev_hdr->rate;

        msdu_len = __le32_to_cpu(arg->buf_len);
        if (skb->len < msdu_len)
                return -EPROTO;

        if (le32_to_cpu(arg->status) & WMI_RX_STATUS_EXT_INFO) {
                len = ALIGN(le32_to_cpu(arg->buf_len), 4);
                ext_info = (struct wmi_mgmt_rx_ext_info *)(skb->data + len);
                memcpy(&arg->ext_info, ext_info,
                       sizeof(struct wmi_mgmt_rx_ext_info));
        }
        /* the WMI buffer might've ended up being padded to 4 bytes due to HTC
         * trailer with credit update. Trim the excess garbage.
         */
        skb_trim(skb, msdu_len);

        return 0;
}

static int ath10k_wmi_10_4_op_pull_mgmt_rx_ev(struct ath10k *ar,
                                              struct sk_buff *skb,
                                              struct wmi_mgmt_rx_ev_arg *arg)
{
        struct wmi_10_4_mgmt_rx_event *ev;
        struct wmi_10_4_mgmt_rx_hdr *ev_hdr;
        size_t pull_len;
        u32 msdu_len;
        struct wmi_mgmt_rx_ext_info *ext_info;
        u32 len;

        ev = (struct wmi_10_4_mgmt_rx_event *)skb->data;
        ev_hdr = &ev->hdr;
        pull_len = sizeof(*ev);

        if (skb->len < pull_len)
                return -EPROTO;

        skb_pull(skb, pull_len);
        arg->channel = ev_hdr->channel;
        arg->buf_len = ev_hdr->buf_len;
        arg->status = ev_hdr->status;
        arg->snr = ev_hdr->snr;
        arg->phy_mode = ev_hdr->phy_mode;
        arg->rate = ev_hdr->rate;

        msdu_len = __le32_to_cpu(arg->buf_len);
        if (skb->len < msdu_len)
                return -EPROTO;

        if (le32_to_cpu(arg->status) & WMI_RX_STATUS_EXT_INFO) {
                len = ALIGN(le32_to_cpu(arg->buf_len), 4);
                ext_info = (struct wmi_mgmt_rx_ext_info *)(skb->data + len);
                memcpy(&arg->ext_info, ext_info,
                       sizeof(struct wmi_mgmt_rx_ext_info));
        }

        /* Make sure bytes added for padding are removed. */
        skb_trim(skb, msdu_len);

        return 0;
}

static bool ath10k_wmi_rx_is_decrypted(struct ath10k *ar,
                                       struct ieee80211_hdr *hdr)
{
        if (!ieee80211_has_protected(hdr->frame_control))
                return false;

        /* FW delivers WEP Shared Auth frame with Protected Bit set and
         * encrypted payload. However in case of PMF it delivers decrypted
         * frames with Protected Bit set.
         */
        if (ieee80211_is_auth(hdr->frame_control))
                return false;

        /* qca99x0 based FW delivers broadcast or multicast management frames
         * (ex: group privacy action frames in mesh) as encrypted payload.
         */
        if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) &&
            ar->hw_params.sw_decrypt_mcast_mgmt)
                return false;

        return true;
}

static int
wmi_process_mgmt_tx_comp(struct ath10k *ar, struct mgmt_tx_compl_params *param)
{
        struct ath10k_mgmt_tx_pkt_addr *pkt_addr;
        struct ath10k_wmi *wmi = &ar->wmi;
        struct ieee80211_tx_info *info;
        struct sk_buff *msdu;
        int ret;

        spin_lock_bh(&ar->data_lock);

        pkt_addr = idr_find(&wmi->mgmt_pending_tx, param->desc_id);
        if (!pkt_addr) {
                ath10k_warn(ar, "received mgmt tx completion for invalid msdu_id: %d\n",
                            param->desc_id);
                ret = -ENOENT;
                goto out;
        }

        msdu = pkt_addr->vaddr;
        dma_unmap_single(ar->dev, pkt_addr->paddr,
                         msdu->len, DMA_TO_DEVICE);
        info = IEEE80211_SKB_CB(msdu);

        if (param->status) {
                info->flags &= ~IEEE80211_TX_STAT_ACK;
        } else {
                info->flags |= IEEE80211_TX_STAT_ACK;
                info->status.ack_signal = ATH10K_DEFAULT_NOISE_FLOOR +
                                          param->ack_rssi;
                info->status.flags |= IEEE80211_TX_STATUS_ACK_SIGNAL_VALID;
        }

        ieee80211_tx_status_irqsafe(ar->hw, msdu);

        ret = 0;

out:
        idr_remove(&wmi->mgmt_pending_tx, param->desc_id);
        spin_unlock_bh(&ar->data_lock);
        return ret;
}

int ath10k_wmi_event_mgmt_tx_compl(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_tlv_mgmt_tx_compl_ev_arg arg;
        struct mgmt_tx_compl_params param;
        int ret;

        ret = ath10k_wmi_pull_mgmt_tx_compl(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse mgmt comp event: %d\n", ret);
                return ret;
        }

        memset(&param, 0, sizeof(struct mgmt_tx_compl_params));
        param.desc_id = __le32_to_cpu(arg.desc_id);
        param.status = __le32_to_cpu(arg.status);

        if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map))
                param.ack_rssi = __le32_to_cpu(arg.ack_rssi);

        wmi_process_mgmt_tx_comp(ar, &param);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv evnt mgmt tx completion\n");

        return 0;
}

int ath10k_wmi_event_mgmt_tx_bundle_compl(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_tlv_mgmt_tx_bundle_compl_ev_arg arg;
        struct mgmt_tx_compl_params param;
        u32 num_reports;
        int i, ret;

        ret = ath10k_wmi_pull_mgmt_tx_bundle_compl(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse bundle mgmt compl event: %d\n", ret);
                return ret;
        }

        num_reports = __le32_to_cpu(arg.num_reports);

        for (i = 0; i < num_reports; i++) {
                memset(&param, 0, sizeof(struct mgmt_tx_compl_params));
                param.desc_id = __le32_to_cpu(arg.desc_ids[i]);
                param.status = __le32_to_cpu(arg.desc_ids[i]);

                if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map))
                        param.ack_rssi = __le32_to_cpu(arg.ack_rssi[i]);
                wmi_process_mgmt_tx_comp(ar, &param);
        }

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv event bundle mgmt tx completion\n");

        return 0;
}

int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_mgmt_rx_ev_arg arg = {};
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
        struct ieee80211_hdr *hdr;
        struct ieee80211_supported_band *sband;
        u32 rx_status;
        u32 channel;
        u32 phy_mode;
        u32 snr, rssi;
        u32 rate;
        u16 fc;
        int ret, i;

        ret = ath10k_wmi_pull_mgmt_rx(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse mgmt rx event: %d\n", ret);
                dev_kfree_skb(skb);
                return ret;
        }

        channel = __le32_to_cpu(arg.channel);
        rx_status = __le32_to_cpu(arg.status);
        snr = __le32_to_cpu(arg.snr);
        phy_mode = __le32_to_cpu(arg.phy_mode);
        rate = __le32_to_cpu(arg.rate);

        memset(status, 0, sizeof(*status));

        ath10k_dbg(ar, ATH10K_DBG_MGMT,
                   "event mgmt rx status %08x\n", rx_status);

        if ((test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags)) ||
            (rx_status & (WMI_RX_STATUS_ERR_DECRYPT |
            WMI_RX_STATUS_ERR_KEY_CACHE_MISS | WMI_RX_STATUS_ERR_CRC))) {
                dev_kfree_skb(skb);
                return 0;
        }

        if (rx_status & WMI_RX_STATUS_ERR_MIC)
                status->flag |= RX_FLAG_MMIC_ERROR;

        if (rx_status & WMI_RX_STATUS_EXT_INFO) {
                status->mactime =
                        __le64_to_cpu(arg.ext_info.rx_mac_timestamp);
                status->flag |= RX_FLAG_MACTIME_END;
        }
        /* Hardware can Rx CCK rates on 5GHz. In that case phy_mode is set to
         * MODE_11B. This means phy_mode is not a reliable source for the band
         * of mgmt rx.
         */
        if (channel >= 1 && channel <= 14) {
                status->band = NL80211_BAND_2GHZ;
        } else if (channel >= 36 && channel <= ATH10K_MAX_5G_CHAN) {
                status->band = NL80211_BAND_5GHZ;
        } else {
                /* Shouldn't happen unless list of advertised channels to
                 * mac80211 has been changed.
                 */
                WARN_ON_ONCE(1);
                dev_kfree_skb(skb);
                return 0;
        }

        if (phy_mode == MODE_11B && status->band == NL80211_BAND_5GHZ)
                ath10k_dbg(ar, ATH10K_DBG_MGMT, "wmi mgmt rx 11b (CCK) on 5GHz\n");

        sband = &ar->mac.sbands[status->band];

        status->freq = ieee80211_channel_to_frequency(channel, status->band);
        status->signal = snr + ATH10K_DEFAULT_NOISE_FLOOR;

        BUILD_BUG_ON(ARRAY_SIZE(status->chain_signal) != ARRAY_SIZE(arg.rssi));

        for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
                status->chains &= ~BIT(i);
                rssi = __le32_to_cpu(arg.rssi[i]);
                ath10k_dbg(ar, ATH10K_DBG_MGMT, "mgmt rssi[%d]:%d\n", i, arg.rssi[i]);

                if (rssi != ATH10K_INVALID_RSSI && rssi != 0) {
                        status->chain_signal[i] = ATH10K_DEFAULT_NOISE_FLOOR + rssi;
                        status->chains |= BIT(i);
                }
        }

        status->rate_idx = ath10k_mac_bitrate_to_idx(sband, rate / 100);

        hdr = (struct ieee80211_hdr *)skb->data;
        fc = le16_to_cpu(hdr->frame_control);

        /* Firmware is guaranteed to report all essential management frames via
         * WMI while it can deliver some extra via HTT. Since there can be
         * duplicates split the reporting wrt monitor/sniffing.
         */
        status->flag |= RX_FLAG_SKIP_MONITOR;

        ath10k_wmi_handle_wep_reauth(ar, skb, status);

        if (ath10k_wmi_rx_is_decrypted(ar, hdr)) {
                status->flag |= RX_FLAG_DECRYPTED;

                if (!ieee80211_is_action(hdr->frame_control) &&
                    !ieee80211_is_deauth(hdr->frame_control) &&
                    !ieee80211_is_disassoc(hdr->frame_control)) {
                        status->flag |= RX_FLAG_IV_STRIPPED |
                                        RX_FLAG_MMIC_STRIPPED;
                        hdr->frame_control = __cpu_to_le16(fc &
                                        ~IEEE80211_FCTL_PROTECTED);
                }
        }

        if (ieee80211_is_beacon(hdr->frame_control))
                ath10k_mac_handle_beacon(ar, skb);

        if (ieee80211_is_beacon(hdr->frame_control) ||
            ieee80211_is_probe_resp(hdr->frame_control))
                status->boottime_ns = ktime_get_boottime_ns();

        ath10k_dbg(ar, ATH10K_DBG_MGMT,
                   "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
                   skb, skb->len,
                   fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE);

        ath10k_dbg(ar, ATH10K_DBG_MGMT,
                   "event mgmt rx freq %d band %d snr %d, rate_idx %d\n",
                   status->freq, status->band, status->signal,
                   status->rate_idx);

        ieee80211_rx_ni(ar->hw, skb);

        return 0;
}

static int freq_to_idx(struct ath10k *ar, int freq)
{
        struct ieee80211_supported_band *sband;
        int band, ch, idx = 0;

        for (band = NL80211_BAND_2GHZ; band < NUM_NL80211_BANDS; band++) {
                sband = ar->hw->wiphy->bands[band];
                if (!sband)
                        continue;

                for (ch = 0; ch < sband->n_channels; ch++, idx++)
                        if (sband->channels[ch].center_freq == freq)
                                goto exit;
        }

exit:
        return idx;
}

static int ath10k_wmi_op_pull_ch_info_ev(struct ath10k *ar, struct sk_buff *skb,
                                         struct wmi_ch_info_ev_arg *arg)
{
        struct wmi_chan_info_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->err_code = ev->err_code;
        arg->freq = ev->freq;
        arg->cmd_flags = ev->cmd_flags;
        arg->noise_floor = ev->noise_floor;
        arg->rx_clear_count = ev->rx_clear_count;
        arg->cycle_count = ev->cycle_count;

        return 0;
}

static int ath10k_wmi_10_4_op_pull_ch_info_ev(struct ath10k *ar,
                                              struct sk_buff *skb,
                                              struct wmi_ch_info_ev_arg *arg)
{
        struct wmi_10_4_chan_info_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->err_code = ev->err_code;
        arg->freq = ev->freq;
        arg->cmd_flags = ev->cmd_flags;
        arg->noise_floor = ev->noise_floor;
        arg->rx_clear_count = ev->rx_clear_count;
        arg->cycle_count = ev->cycle_count;
        arg->chan_tx_pwr_range = ev->chan_tx_pwr_range;
        arg->chan_tx_pwr_tp = ev->chan_tx_pwr_tp;
        arg->rx_frame_count = ev->rx_frame_count;

        return 0;
}

/*
 * Handle the channel info event for firmware which only sends one
 * chan_info event per scanned channel.
 */
static void ath10k_wmi_event_chan_info_unpaired(struct ath10k *ar,
                                                struct chan_info_params *params)
{
        struct survey_info *survey;
        int idx;

        if (params->cmd_flags & WMI_CHAN_INFO_FLAG_COMPLETE) {
                ath10k_dbg(ar, ATH10K_DBG_WMI, "chan info report completed\n");
                return;
        }

        idx = freq_to_idx(ar, params->freq);
        if (idx >= ARRAY_SIZE(ar->survey)) {
                ath10k_warn(ar, "chan info: invalid frequency %d (idx %d out of bounds)\n",
                            params->freq, idx);
                return;
        }

        survey = &ar->survey[idx];

        if (!params->mac_clk_mhz)
                return;

        memset(survey, 0, sizeof(*survey));

        survey->noise = params->noise_floor;
        survey->time = (params->cycle_count / params->mac_clk_mhz) / 1000;
        survey->time_busy = (params->rx_clear_count / params->mac_clk_mhz) / 1000;
        survey->filled |= SURVEY_INFO_NOISE_DBM | SURVEY_INFO_TIME |
                          SURVEY_INFO_TIME_BUSY;
}

/*
 * Handle the channel info event for firmware which sends chan_info
 * event in pairs(start and stop events) for every scanned channel.
 */
static void ath10k_wmi_event_chan_info_paired(struct ath10k *ar,
                                              struct chan_info_params *params)
{
        struct survey_info *survey;
        int idx;

        idx = freq_to_idx(ar, params->freq);
        if (idx >= ARRAY_SIZE(ar->survey)) {
                ath10k_warn(ar, "chan info: invalid frequency %d (idx %d out of bounds)\n",
                            params->freq, idx);
                return;
        }

        if (params->cmd_flags & WMI_CHAN_INFO_FLAG_COMPLETE) {
                if (ar->ch_info_can_report_survey) {
                        survey = &ar->survey[idx];
                        survey->noise = params->noise_floor;
                        survey->filled = SURVEY_INFO_NOISE_DBM;

                        ath10k_hw_fill_survey_time(ar,
                                                   survey,
                                                   params->cycle_count,
                                                   params->rx_clear_count,
                                                   ar->survey_last_cycle_count,
                                                   ar->survey_last_rx_clear_count);
                }

                ar->ch_info_can_report_survey = false;
        } else {
                ar->ch_info_can_report_survey = true;
        }

        if (!(params->cmd_flags & WMI_CHAN_INFO_FLAG_PRE_COMPLETE)) {
                ar->survey_last_rx_clear_count = params->rx_clear_count;
                ar->survey_last_cycle_count = params->cycle_count;
        }
}

void ath10k_wmi_event_chan_info(struct ath10k *ar, struct sk_buff *skb)
{
        struct chan_info_params ch_info_param;
        struct wmi_ch_info_ev_arg arg = {};
        int ret;

        ret = ath10k_wmi_pull_ch_info(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse chan info event: %d\n", ret);
                return;
        }

        ch_info_param.err_code = __le32_to_cpu(arg.err_code);
        ch_info_param.freq = __le32_to_cpu(arg.freq);
        ch_info_param.cmd_flags = __le32_to_cpu(arg.cmd_flags);
        ch_info_param.noise_floor = __le32_to_cpu(arg.noise_floor);
        ch_info_param.rx_clear_count = __le32_to_cpu(arg.rx_clear_count);
        ch_info_param.cycle_count = __le32_to_cpu(arg.cycle_count);
        ch_info_param.mac_clk_mhz = __le32_to_cpu(arg.mac_clk_mhz);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "chan info err_code %d freq %d cmd_flags %d noise_floor %d rx_clear_count %d cycle_count %d\n",
                   ch_info_param.err_code, ch_info_param.freq, ch_info_param.cmd_flags,
                   ch_info_param.noise_floor, ch_info_param.rx_clear_count,
                   ch_info_param.cycle_count);

        spin_lock_bh(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
        case ATH10K_SCAN_STARTING:
                ath10k_dbg(ar, ATH10K_DBG_WMI, "received chan info event without a scan request, ignoring\n");
                goto exit;
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                break;
        }

        if (test_bit(ATH10K_FW_FEATURE_SINGLE_CHAN_INFO_PER_CHANNEL,
                     ar->running_fw->fw_file.fw_features))
                ath10k_wmi_event_chan_info_unpaired(ar, &ch_info_param);
        else
                ath10k_wmi_event_chan_info_paired(ar, &ch_info_param);

exit:
        spin_unlock_bh(&ar->data_lock);
}

void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_echo_ev_arg arg = {};
        int ret;

        ret = ath10k_wmi_pull_echo_ev(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse echo: %d\n", ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event echo value 0x%08x\n",
                   le32_to_cpu(arg.value));

        if (le32_to_cpu(arg.value) == ATH10K_WMI_BARRIER_ECHO_ID)
                complete(&ar->wmi.barrier);
}

int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi event debug mesg len %d\n",
                   skb->len);

        trace_ath10k_wmi_dbglog(ar, skb->data, skb->len);

        return 0;
}

void ath10k_wmi_pull_pdev_stats_base(const struct wmi_pdev_stats_base *src,
                                     struct ath10k_fw_stats_pdev *dst)
{
        dst->ch_noise_floor = __le32_to_cpu(src->chan_nf);
        dst->tx_frame_count = __le32_to_cpu(src->tx_frame_count);
        dst->rx_frame_count = __le32_to_cpu(src->rx_frame_count);
        dst->rx_clear_count = __le32_to_cpu(src->rx_clear_count);
        dst->cycle_count = __le32_to_cpu(src->cycle_count);
        dst->phy_err_count = __le32_to_cpu(src->phy_err_count);
        dst->chan_tx_power = __le32_to_cpu(src->chan_tx_pwr);
}

void ath10k_wmi_pull_pdev_stats_tx(const struct wmi_pdev_stats_tx *src,
                                   struct ath10k_fw_stats_pdev *dst)
{
        dst->comp_queued = __le32_to_cpu(src->comp_queued);
        dst->comp_delivered = __le32_to_cpu(src->comp_delivered);
        dst->msdu_enqued = __le32_to_cpu(src->msdu_enqued);
        dst->mpdu_enqued = __le32_to_cpu(src->mpdu_enqued);
        dst->wmm_drop = __le32_to_cpu(src->wmm_drop);
        dst->local_enqued = __le32_to_cpu(src->local_enqued);
        dst->local_freed = __le32_to_cpu(src->local_freed);
        dst->hw_queued = __le32_to_cpu(src->hw_queued);
        dst->hw_reaped = __le32_to_cpu(src->hw_reaped);
        dst->underrun = __le32_to_cpu(src->underrun);
        dst->tx_abort = __le32_to_cpu(src->tx_abort);
        dst->mpdus_requeued = __le32_to_cpu(src->mpdus_requeued);
        dst->tx_ko = __le32_to_cpu(src->tx_ko);
        dst->data_rc = __le32_to_cpu(src->data_rc);
        dst->self_triggers = __le32_to_cpu(src->self_triggers);
        dst->sw_retry_failure = __le32_to_cpu(src->sw_retry_failure);
        dst->illgl_rate_phy_err = __le32_to_cpu(src->illgl_rate_phy_err);
        dst->pdev_cont_xretry = __le32_to_cpu(src->pdev_cont_xretry);
        dst->pdev_tx_timeout = __le32_to_cpu(src->pdev_tx_timeout);
        dst->pdev_resets = __le32_to_cpu(src->pdev_resets);
        dst->phy_underrun = __le32_to_cpu(src->phy_underrun);
        dst->txop_ovf = __le32_to_cpu(src->txop_ovf);
}

static void
ath10k_wmi_10_4_pull_pdev_stats_tx(const struct wmi_10_4_pdev_stats_tx *src,
                                   struct ath10k_fw_stats_pdev *dst)
{
        dst->comp_queued = __le32_to_cpu(src->comp_queued);
        dst->comp_delivered = __le32_to_cpu(src->comp_delivered);
        dst->msdu_enqued = __le32_to_cpu(src->msdu_enqued);
        dst->mpdu_enqued = __le32_to_cpu(src->mpdu_enqued);
        dst->wmm_drop = __le32_to_cpu(src->wmm_drop);
        dst->local_enqued = __le32_to_cpu(src->local_enqued);
        dst->local_freed = __le32_to_cpu(src->local_freed);
        dst->hw_queued = __le32_to_cpu(src->hw_queued);
        dst->hw_reaped = __le32_to_cpu(src->hw_reaped);
        dst->underrun = __le32_to_cpu(src->underrun);
        dst->tx_abort = __le32_to_cpu(src->tx_abort);
        dst->mpdus_requeued = __le32_to_cpu(src->mpdus_requeued);
        dst->tx_ko = __le32_to_cpu(src->tx_ko);
        dst->data_rc = __le32_to_cpu(src->data_rc);
        dst->self_triggers = __le32_to_cpu(src->self_triggers);
        dst->sw_retry_failure = __le32_to_cpu(src->sw_retry_failure);
        dst->illgl_rate_phy_err = __le32_to_cpu(src->illgl_rate_phy_err);
        dst->pdev_cont_xretry = __le32_to_cpu(src->pdev_cont_xretry);
        dst->pdev_tx_timeout = __le32_to_cpu(src->pdev_tx_timeout);
        dst->pdev_resets = __le32_to_cpu(src->pdev_resets);
        dst->phy_underrun = __le32_to_cpu(src->phy_underrun);
        dst->txop_ovf = __le32_to_cpu(src->txop_ovf);
        dst->hw_paused = __le32_to_cpu(src->hw_paused);
        dst->seq_posted = __le32_to_cpu(src->seq_posted);
        dst->seq_failed_queueing =
                __le32_to_cpu(src->seq_failed_queueing);
        dst->seq_completed = __le32_to_cpu(src->seq_completed);
        dst->seq_restarted = __le32_to_cpu(src->seq_restarted);
        dst->mu_seq_posted = __le32_to_cpu(src->mu_seq_posted);
        dst->mpdus_sw_flush = __le32_to_cpu(src->mpdus_sw_flush);
        dst->mpdus_hw_filter = __le32_to_cpu(src->mpdus_hw_filter);
        dst->mpdus_truncated = __le32_to_cpu(src->mpdus_truncated);
        dst->mpdus_ack_failed = __le32_to_cpu(src->mpdus_ack_failed);
        dst->mpdus_hw_filter = __le32_to_cpu(src->mpdus_hw_filter);
        dst->mpdus_expired = __le32_to_cpu(src->mpdus_expired);
}

void ath10k_wmi_pull_pdev_stats_rx(const struct wmi_pdev_stats_rx *src,
                                   struct ath10k_fw_stats_pdev *dst)
{
        dst->mid_ppdu_route_change = __le32_to_cpu(src->mid_ppdu_route_change);
        dst->status_rcvd = __le32_to_cpu(src->status_rcvd);
        dst->r0_frags = __le32_to_cpu(src->r0_frags);
        dst->r1_frags = __le32_to_cpu(src->r1_frags);
        dst->r2_frags = __le32_to_cpu(src->r2_frags);
        dst->r3_frags = __le32_to_cpu(src->r3_frags);
        dst->htt_msdus = __le32_to_cpu(src->htt_msdus);
        dst->htt_mpdus = __le32_to_cpu(src->htt_mpdus);
        dst->loc_msdus = __le32_to_cpu(src->loc_msdus);
        dst->loc_mpdus = __le32_to_cpu(src->loc_mpdus);
        dst->oversize_amsdu = __le32_to_cpu(src->oversize_amsdu);
        dst->phy_errs = __le32_to_cpu(src->phy_errs);
        dst->phy_err_drop = __le32_to_cpu(src->phy_err_drop);
        dst->mpdu_errs = __le32_to_cpu(src->mpdu_errs);
}

void ath10k_wmi_pull_pdev_stats_extra(const struct wmi_pdev_stats_extra *src,
                                      struct ath10k_fw_stats_pdev *dst)
{
        dst->ack_rx_bad = __le32_to_cpu(src->ack_rx_bad);
        dst->rts_bad = __le32_to_cpu(src->rts_bad);
        dst->rts_good = __le32_to_cpu(src->rts_good);
        dst->fcs_bad = __le32_to_cpu(src->fcs_bad);
        dst->no_beacons = __le32_to_cpu(src->no_beacons);
        dst->mib_int_count = __le32_to_cpu(src->mib_int_count);
}

void ath10k_wmi_pull_peer_stats(const struct wmi_peer_stats *src,
                                struct ath10k_fw_stats_peer *dst)
{
        ether_addr_copy(dst->peer_macaddr, src->peer_macaddr.addr);
        dst->peer_rssi = __le32_to_cpu(src->peer_rssi);
        dst->peer_tx_rate = __le32_to_cpu(src->peer_tx_rate);
}

static void
ath10k_wmi_10_4_pull_peer_stats(const struct wmi_10_4_peer_stats *src,
                                struct ath10k_fw_stats_peer *dst)
{
        ether_addr_copy(dst->peer_macaddr, src->peer_macaddr.addr);
        dst->peer_rssi = __le32_to_cpu(src->peer_rssi);
        dst->peer_tx_rate = __le32_to_cpu(src->peer_tx_rate);
        dst->peer_rx_rate = __le32_to_cpu(src->peer_rx_rate);
}

static void
ath10k_wmi_10_4_pull_vdev_stats(const struct wmi_vdev_stats_extd *src,
                                struct ath10k_fw_stats_vdev_extd *dst)
{
        dst->vdev_id = __le32_to_cpu(src->vdev_id);
        dst->ppdu_aggr_cnt = __le32_to_cpu(src->ppdu_aggr_cnt);
        dst->ppdu_noack = __le32_to_cpu(src->ppdu_noack);
        dst->mpdu_queued = __le32_to_cpu(src->mpdu_queued);
        dst->ppdu_nonaggr_cnt = __le32_to_cpu(src->ppdu_nonaggr_cnt);
        dst->mpdu_sw_requeued = __le32_to_cpu(src->mpdu_sw_requeued);
        dst->mpdu_suc_retry = __le32_to_cpu(src->mpdu_suc_retry);
        dst->mpdu_suc_multitry = __le32_to_cpu(src->mpdu_suc_multitry);
        dst->mpdu_fail_retry = __le32_to_cpu(src->mpdu_fail_retry);
        dst->tx_ftm_suc = __le32_to_cpu(src->tx_ftm_suc);
        dst->tx_ftm_suc_retry = __le32_to_cpu(src->tx_ftm_suc_retry);
        dst->tx_ftm_fail = __le32_to_cpu(src->tx_ftm_fail);
        dst->rx_ftmr_cnt = __le32_to_cpu(src->rx_ftmr_cnt);
        dst->rx_ftmr_dup_cnt = __le32_to_cpu(src->rx_ftmr_dup_cnt);
        dst->rx_iftmr_cnt = __le32_to_cpu(src->rx_iftmr_cnt);
        dst->rx_iftmr_dup_cnt = __le32_to_cpu(src->rx_iftmr_dup_cnt);
}

static int ath10k_wmi_main_op_pull_fw_stats(struct ath10k *ar,
                                            struct sk_buff *skb,
                                            struct ath10k_fw_stats *stats)
{
        const struct wmi_stats_event *ev = (void *)skb->data;
        u32 num_pdev_stats, num_peer_stats;
        int i;

        if (!skb_pull(skb, sizeof(*ev)))
                return -EPROTO;

        num_pdev_stats = __le32_to_cpu(ev->num_pdev_stats);
        num_peer_stats = __le32_to_cpu(ev->num_peer_stats);

        for (i = 0; i < num_pdev_stats; i++) {
                const struct wmi_pdev_stats *src;
                struct ath10k_fw_stats_pdev *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_pdev_stats_base(&src->base, dst);
                ath10k_wmi_pull_pdev_stats_tx(&src->tx, dst);
                ath10k_wmi_pull_pdev_stats_rx(&src->rx, dst);

                list_add_tail(&dst->list, &stats->pdevs);
        }

        /* fw doesn't implement vdev stats */

        for (i = 0; i < num_peer_stats; i++) {
                const struct wmi_peer_stats *src;
                struct ath10k_fw_stats_peer *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_peer_stats(src, dst);
                list_add_tail(&dst->list, &stats->peers);
        }

        return 0;
}

static int ath10k_wmi_10x_op_pull_fw_stats(struct ath10k *ar,
                                           struct sk_buff *skb,
                                           struct ath10k_fw_stats *stats)
{
        const struct wmi_stats_event *ev = (void *)skb->data;
        u32 num_pdev_stats, num_peer_stats;
        int i;

        if (!skb_pull(skb, sizeof(*ev)))
                return -EPROTO;

        num_pdev_stats = __le32_to_cpu(ev->num_pdev_stats);
        num_peer_stats = __le32_to_cpu(ev->num_peer_stats);

        for (i = 0; i < num_pdev_stats; i++) {
                const struct wmi_10x_pdev_stats *src;
                struct ath10k_fw_stats_pdev *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_pdev_stats_base(&src->base, dst);
                ath10k_wmi_pull_pdev_stats_tx(&src->tx, dst);
                ath10k_wmi_pull_pdev_stats_rx(&src->rx, dst);
                ath10k_wmi_pull_pdev_stats_extra(&src->extra, dst);

                list_add_tail(&dst->list, &stats->pdevs);
        }

        /* fw doesn't implement vdev stats */

        for (i = 0; i < num_peer_stats; i++) {
                const struct wmi_10x_peer_stats *src;
                struct ath10k_fw_stats_peer *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_peer_stats(&src->old, dst);

                dst->peer_rx_rate = __le32_to_cpu(src->peer_rx_rate);

                list_add_tail(&dst->list, &stats->peers);
        }

        return 0;
}

static int ath10k_wmi_10_2_op_pull_fw_stats(struct ath10k *ar,
                                            struct sk_buff *skb,
                                            struct ath10k_fw_stats *stats)
{
        const struct wmi_10_2_stats_event *ev = (void *)skb->data;
        u32 num_pdev_stats;
        u32 num_pdev_ext_stats;
        u32 num_peer_stats;
        int i;

        if (!skb_pull(skb, sizeof(*ev)))
                return -EPROTO;

        num_pdev_stats = __le32_to_cpu(ev->num_pdev_stats);
        num_pdev_ext_stats = __le32_to_cpu(ev->num_pdev_ext_stats);
        num_peer_stats = __le32_to_cpu(ev->num_peer_stats);

        for (i = 0; i < num_pdev_stats; i++) {
                const struct wmi_10_2_pdev_stats *src;
                struct ath10k_fw_stats_pdev *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_pdev_stats_base(&src->base, dst);
                ath10k_wmi_pull_pdev_stats_tx(&src->tx, dst);
                ath10k_wmi_pull_pdev_stats_rx(&src->rx, dst);
                ath10k_wmi_pull_pdev_stats_extra(&src->extra, dst);
                /* FIXME: expose 10.2 specific values */

                list_add_tail(&dst->list, &stats->pdevs);
        }

        for (i = 0; i < num_pdev_ext_stats; i++) {
                const struct wmi_10_2_pdev_ext_stats *src;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                /* FIXME: expose values to userspace
                 *
                 * Note: Even though this loop seems to do nothing it is
                 * required to parse following sub-structures properly.
                 */
        }

        /* fw doesn't implement vdev stats */

        for (i = 0; i < num_peer_stats; i++) {
                const struct wmi_10_2_peer_stats *src;
                struct ath10k_fw_stats_peer *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_peer_stats(&src->old, dst);

                dst->peer_rx_rate = __le32_to_cpu(src->peer_rx_rate);
                /* FIXME: expose 10.2 specific values */

                list_add_tail(&dst->list, &stats->peers);
        }

        return 0;
}

static int ath10k_wmi_10_2_4_op_pull_fw_stats(struct ath10k *ar,
                                              struct sk_buff *skb,
                                              struct ath10k_fw_stats *stats)
{
        const struct wmi_10_2_stats_event *ev = (void *)skb->data;
        u32 num_pdev_stats;
        u32 num_pdev_ext_stats;
        u32 num_peer_stats;
        int i;

        if (!skb_pull(skb, sizeof(*ev)))
                return -EPROTO;

        num_pdev_stats = __le32_to_cpu(ev->num_pdev_stats);
        num_pdev_ext_stats = __le32_to_cpu(ev->num_pdev_ext_stats);
        num_peer_stats = __le32_to_cpu(ev->num_peer_stats);

        for (i = 0; i < num_pdev_stats; i++) {
                const struct wmi_10_2_pdev_stats *src;
                struct ath10k_fw_stats_pdev *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_pdev_stats_base(&src->base, dst);
                ath10k_wmi_pull_pdev_stats_tx(&src->tx, dst);
                ath10k_wmi_pull_pdev_stats_rx(&src->rx, dst);
                ath10k_wmi_pull_pdev_stats_extra(&src->extra, dst);
                /* FIXME: expose 10.2 specific values */

                list_add_tail(&dst->list, &stats->pdevs);
        }

        for (i = 0; i < num_pdev_ext_stats; i++) {
                const struct wmi_10_2_pdev_ext_stats *src;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                /* FIXME: expose values to userspace
                 *
                 * Note: Even though this loop seems to do nothing it is
                 * required to parse following sub-structures properly.
                 */
        }

        /* fw doesn't implement vdev stats */

        for (i = 0; i < num_peer_stats; i++) {
                const struct wmi_10_2_4_ext_peer_stats *src;
                struct ath10k_fw_stats_peer *dst;
                int stats_len;

                if (test_bit(WMI_SERVICE_PEER_STATS, ar->wmi.svc_map))
                        stats_len = sizeof(struct wmi_10_2_4_ext_peer_stats);
                else
                        stats_len = sizeof(struct wmi_10_2_4_peer_stats);

                src = (void *)skb->data;
                if (!skb_pull(skb, stats_len))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_peer_stats(&src->common.old, dst);

                dst->peer_rx_rate = __le32_to_cpu(src->common.peer_rx_rate);

                if (ath10k_peer_stats_enabled(ar))
                        dst->rx_duration = __le32_to_cpu(src->rx_duration);
                /* FIXME: expose 10.2 specific values */

                list_add_tail(&dst->list, &stats->peers);
        }

        return 0;
}

static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
                                            struct sk_buff *skb,
                                            struct ath10k_fw_stats *stats)
{
        const struct wmi_10_2_stats_event *ev = (void *)skb->data;
        u32 num_pdev_stats;
        u32 num_pdev_ext_stats;
        u32 num_vdev_stats;
        u32 num_peer_stats;
        u32 num_bcnflt_stats;
        u32 stats_id;
        int i;

        if (!skb_pull(skb, sizeof(*ev)))
                return -EPROTO;

        num_pdev_stats = __le32_to_cpu(ev->num_pdev_stats);
        num_pdev_ext_stats = __le32_to_cpu(ev->num_pdev_ext_stats);
        num_vdev_stats = __le32_to_cpu(ev->num_vdev_stats);
        num_peer_stats = __le32_to_cpu(ev->num_peer_stats);
        num_bcnflt_stats = __le32_to_cpu(ev->num_bcnflt_stats);
        stats_id = __le32_to_cpu(ev->stats_id);

        for (i = 0; i < num_pdev_stats; i++) {
                const struct wmi_10_4_pdev_stats *src;
                struct ath10k_fw_stats_pdev *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_pull_pdev_stats_base(&src->base, dst);
                ath10k_wmi_10_4_pull_pdev_stats_tx(&src->tx, dst);
                ath10k_wmi_pull_pdev_stats_rx(&src->rx, dst);
                dst->rx_ovfl_errs = __le32_to_cpu(src->rx_ovfl_errs);
                ath10k_wmi_pull_pdev_stats_extra(&src->extra, dst);

                list_add_tail(&dst->list, &stats->pdevs);
        }

        for (i = 0; i < num_pdev_ext_stats; i++) {
                const struct wmi_10_2_pdev_ext_stats *src;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                /* FIXME: expose values to userspace
                 *
                 * Note: Even though this loop seems to do nothing it is
                 * required to parse following sub-structures properly.
                 */
        }

        for (i = 0; i < num_vdev_stats; i++) {
                const struct wmi_vdev_stats *src;

                /* Ignore vdev stats here as it has only vdev id. Actual vdev
                 * stats will be retrieved from vdev extended stats.
                 */
                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;
        }

        for (i = 0; i < num_peer_stats; i++) {
                const struct wmi_10_4_peer_stats *src;
                struct ath10k_fw_stats_peer *dst;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                if (!dst)
                        continue;

                ath10k_wmi_10_4_pull_peer_stats(src, dst);
                list_add_tail(&dst->list, &stats->peers);
        }

        for (i = 0; i < num_bcnflt_stats; i++) {
                const struct wmi_10_4_bss_bcn_filter_stats *src;

                src = (void *)skb->data;
                if (!skb_pull(skb, sizeof(*src)))
                        return -EPROTO;

                /* FIXME: expose values to userspace
                 *
                 * Note: Even though this loop seems to do nothing it is
                 * required to parse following sub-structures properly.
                 */
        }

        if (stats_id & WMI_10_4_STAT_PEER_EXTD) {
                stats->extended = true;

                for (i = 0; i < num_peer_stats; i++) {
                        const struct wmi_10_4_peer_extd_stats *src;
                        struct ath10k_fw_extd_stats_peer *dst;

                        src = (void *)skb->data;
                        if (!skb_pull(skb, sizeof(*src)))
                                return -EPROTO;

                        dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                        if (!dst)
                                continue;

                        ether_addr_copy(dst->peer_macaddr,
                                        src->peer_macaddr.addr);
                        dst->rx_duration = __le32_to_cpu(src->rx_duration);
                        list_add_tail(&dst->list, &stats->peers_extd);
                }
        }

        if (stats_id & WMI_10_4_STAT_VDEV_EXTD) {
                for (i = 0; i < num_vdev_stats; i++) {
                        const struct wmi_vdev_stats_extd *src;
                        struct ath10k_fw_stats_vdev_extd *dst;

                        src = (void *)skb->data;
                        if (!skb_pull(skb, sizeof(*src)))
                                return -EPROTO;

                        dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
                        if (!dst)
                                continue;
                        ath10k_wmi_10_4_pull_vdev_stats(src, dst);
                        list_add_tail(&dst->list, &stats->vdevs);
                }
        }

        return 0;
}

void ath10k_wmi_event_update_stats(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_UPDATE_STATS_EVENTID\n");
        ath10k_debug_fw_stats_process(ar, skb);
}

static int
ath10k_wmi_op_pull_vdev_start_ev(struct ath10k *ar, struct sk_buff *skb,
                                 struct wmi_vdev_start_ev_arg *arg)
{
        struct wmi_vdev_start_response_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->vdev_id = ev->vdev_id;
        arg->req_id = ev->req_id;
        arg->resp_type = ev->resp_type;
        arg->status = ev->status;

        return 0;
}

void ath10k_wmi_event_vdev_start_resp(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_vdev_start_ev_arg arg = {};
        int ret;
        u32 status;

        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_START_RESP_EVENTID\n");

        ar->last_wmi_vdev_start_status = 0;

        ret = ath10k_wmi_pull_vdev_start(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse vdev start event: %d\n", ret);
                ar->last_wmi_vdev_start_status = ret;
                goto out;
        }

        status = __le32_to_cpu(arg.status);
        if (WARN_ON_ONCE(status)) {
                ath10k_warn(ar, "vdev-start-response reports status error: %d (%s)\n",
                            status, (status == WMI_VDEV_START_CHAN_INVALID) ?
                            "chan-invalid" : "unknown");
                /* Setup is done one way or another though, so we should still
                 * do the completion, so don't return here.
                 */
                ar->last_wmi_vdev_start_status = -EINVAL;
        }

out:
        complete(&ar->vdev_setup_done);
}

void ath10k_wmi_event_vdev_stopped(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_STOPPED_EVENTID\n");
        complete(&ar->vdev_setup_done);
}

static int
ath10k_wmi_op_pull_peer_kick_ev(struct ath10k *ar, struct sk_buff *skb,
                                struct wmi_peer_kick_ev_arg *arg)
{
        struct wmi_peer_sta_kickout_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->mac_addr = ev->peer_macaddr.addr;

        return 0;
}

void ath10k_wmi_event_peer_sta_kickout(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_peer_kick_ev_arg arg = {};
        struct ieee80211_sta *sta;
        int ret;

        ret = ath10k_wmi_pull_peer_kick(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse peer kickout event: %d\n",
                            ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_STA, "wmi event peer sta kickout %pM\n",
                   arg.mac_addr);

        rcu_read_lock();

        sta = ieee80211_find_sta_by_ifaddr(ar->hw, arg.mac_addr, NULL);
        if (!sta) {
                ath10k_warn(ar, "Spurious quick kickout for STA %pM\n",
                            arg.mac_addr);
                goto exit;
        }

        ieee80211_report_low_ack(sta, 10);

exit:
        rcu_read_unlock();
}

/*
 * FIXME
 *
 * We don't report to mac80211 sleep state of connected
 * stations. Due to this mac80211 can't fill in TIM IE
 * correctly.
 *
 * I know of no way of getting nullfunc frames that contain
 * sleep transition from connected stations - these do not
 * seem to be sent from the target to the host. There also
 * doesn't seem to be a dedicated event for that. So the
 * only way left to do this would be to read tim_bitmap
 * during SWBA.
 *
 * We could probably try using tim_bitmap from SWBA to tell
 * mac80211 which stations are asleep and which are not. The
 * problem here is calling mac80211 functions so many times
 * could take too long and make us miss the time to submit
 * the beacon to the target.
 *
 * So as a workaround we try to extend the TIM IE if there
 * is unicast buffered for stations with aid > 7 and fill it
 * in ourselves.
 */
static void ath10k_wmi_update_tim(struct ath10k *ar,
                                  struct ath10k_vif *arvif,
                                  struct sk_buff *bcn,
                                  const struct wmi_tim_info_arg *tim_info)
{
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)bcn->data;
        struct ieee80211_tim_ie *tim;
        u8 *ies, *ie;
        u8 ie_len, pvm_len;
        __le32 t;
        u32 v, tim_len;

        /* When FW reports 0 in tim_len, ensure at least first byte
         * in tim_bitmap is considered for pvm calculation.
         */
        tim_len = tim_info->tim_len ? __le32_to_cpu(tim_info->tim_len) : 1;

        /* if next SWBA has no tim_changed the tim_bitmap is garbage.
         * we must copy the bitmap upon change and reuse it later
         */
        if (__le32_to_cpu(tim_info->tim_changed)) {
                int i;

                if (sizeof(arvif->u.ap.tim_bitmap) < tim_len) {
                        ath10k_warn(ar, "SWBA TIM field is too big (%u), truncated it to %zu",
                                    tim_len, sizeof(arvif->u.ap.tim_bitmap));
                        tim_len = sizeof(arvif->u.ap.tim_bitmap);
                }

                for (i = 0; i < tim_len; i++) {
                        t = tim_info->tim_bitmap[i / 4];
                        v = __le32_to_cpu(t);
                        arvif->u.ap.tim_bitmap[i] = (v >> ((i % 4) * 8)) & 0xFF;
                }

                /* FW reports either length 0 or length based on max supported
                 * station. so we calculate this on our own
                 */
                arvif->u.ap.tim_len = 0;
                for (i = 0; i < tim_len; i++)
                        if (arvif->u.ap.tim_bitmap[i])
                                arvif->u.ap.tim_len = i;

                arvif->u.ap.tim_len++;
        }

        ies = bcn->data;
        ies += ieee80211_hdrlen(hdr->frame_control);
        ies += 12; /* fixed parameters */

        ie = (u8 *)cfg80211_find_ie(WLAN_EID_TIM, ies,
                                    (u8 *)skb_tail_pointer(bcn) - ies);
        if (!ie) {
                if (arvif->vdev_type != WMI_VDEV_TYPE_IBSS)
                        ath10k_warn(ar, "no tim ie found;\n");
                return;
        }

        tim = (void *)ie + 2;
        ie_len = ie[1];
        pvm_len = ie_len - 3; /* exclude dtim count, dtim period, bmap ctl */

        if (pvm_len < arvif->u.ap.tim_len) {
                int expand_size = tim_len - pvm_len;
                int move_size = skb_tail_pointer(bcn) - (ie + 2 + ie_len);
                void *next_ie = ie + 2 + ie_len;

                if (skb_put(bcn, expand_size)) {
                        memmove(next_ie + expand_size, next_ie, move_size);

                        ie[1] += expand_size;
                        ie_len += expand_size;
                        pvm_len += expand_size;
                } else {
                        ath10k_warn(ar, "tim expansion failed\n");
                }
        }

        if (pvm_len > tim_len) {
                ath10k_warn(ar, "tim pvm length is too great (%d)\n", pvm_len);
                return;
        }

        tim->bitmap_ctrl = !!__le32_to_cpu(tim_info->tim_mcast);
        memcpy(tim->virtual_map, arvif->u.ap.tim_bitmap, pvm_len);

        if (tim->dtim_count == 0) {
                ATH10K_SKB_CB(bcn)->flags |= ATH10K_SKB_F_DTIM_ZERO;

                if (__le32_to_cpu(tim_info->tim_mcast) == 1)
                        ATH10K_SKB_CB(bcn)->flags |= ATH10K_SKB_F_DELIVER_CAB;
        }

        ath10k_dbg(ar, ATH10K_DBG_MGMT, "dtim %d/%d mcast %d pvmlen %d\n",
                   tim->dtim_count, tim->dtim_period,
                   tim->bitmap_ctrl, pvm_len);
}

static void ath10k_wmi_update_noa(struct ath10k *ar, struct ath10k_vif *arvif,
                                  struct sk_buff *bcn,
                                  const struct wmi_p2p_noa_info *noa)
{
        if (!arvif->vif->p2p)
                return;

        ath10k_dbg(ar, ATH10K_DBG_MGMT, "noa changed: %d\n", noa->changed);

        if (noa->changed & WMI_P2P_NOA_CHANGED_BIT)
                ath10k_p2p_noa_update(arvif, noa);

        if (arvif->u.ap.noa_data)
                if (!pskb_expand_head(bcn, 0, arvif->u.ap.noa_len, GFP_ATOMIC))
                        skb_put_data(bcn, arvif->u.ap.noa_data,
                                     arvif->u.ap.noa_len);
}

static int ath10k_wmi_op_pull_swba_ev(struct ath10k *ar, struct sk_buff *skb,
                                      struct wmi_swba_ev_arg *arg)
{
        struct wmi_host_swba_event *ev = (void *)skb->data;
        u32 map;
        size_t i;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->vdev_map = ev->vdev_map;

        for (i = 0, map = __le32_to_cpu(ev->vdev_map); map; map >>= 1) {
                if (!(map & BIT(0)))
                        continue;

                /* If this happens there were some changes in firmware and
                 * ath10k should update the max size of tim_info array.
                 */
                if (WARN_ON_ONCE(i == ARRAY_SIZE(arg->tim_info)))
                        break;

                if (__le32_to_cpu(ev->bcn_info[i].tim_info.tim_len) >
                     sizeof(ev->bcn_info[i].tim_info.tim_bitmap)) {
                        ath10k_warn(ar, "refusing to parse invalid swba structure\n");
                        return -EPROTO;
                }

                arg->tim_info[i].tim_len = ev->bcn_info[i].tim_info.tim_len;
                arg->tim_info[i].tim_mcast = ev->bcn_info[i].tim_info.tim_mcast;
                arg->tim_info[i].tim_bitmap =
                                ev->bcn_info[i].tim_info.tim_bitmap;
                arg->tim_info[i].tim_changed =
                                ev->bcn_info[i].tim_info.tim_changed;
                arg->tim_info[i].tim_num_ps_pending =
                                ev->bcn_info[i].tim_info.tim_num_ps_pending;

                arg->noa_info[i] = &ev->bcn_info[i].p2p_noa_info;
                i++;
        }

        return 0;
}

static int ath10k_wmi_10_2_4_op_pull_swba_ev(struct ath10k *ar,
                                             struct sk_buff *skb,
                                             struct wmi_swba_ev_arg *arg)
{
        struct wmi_10_2_4_host_swba_event *ev = (void *)skb->data;
        u32 map;
        size_t i;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->vdev_map = ev->vdev_map;

        for (i = 0, map = __le32_to_cpu(ev->vdev_map); map; map >>= 1) {
                if (!(map & BIT(0)))
                        continue;

                /* If this happens there were some changes in firmware and
                 * ath10k should update the max size of tim_info array.
                 */
                if (WARN_ON_ONCE(i == ARRAY_SIZE(arg->tim_info)))
                        break;

                if (__le32_to_cpu(ev->bcn_info[i].tim_info.tim_len) >
                     sizeof(ev->bcn_info[i].tim_info.tim_bitmap)) {
                        ath10k_warn(ar, "refusing to parse invalid swba structure\n");
                        return -EPROTO;
                }

                arg->tim_info[i].tim_len = ev->bcn_info[i].tim_info.tim_len;
                arg->tim_info[i].tim_mcast = ev->bcn_info[i].tim_info.tim_mcast;
                arg->tim_info[i].tim_bitmap =
                                ev->bcn_info[i].tim_info.tim_bitmap;
                arg->tim_info[i].tim_changed =
                                ev->bcn_info[i].tim_info.tim_changed;
                arg->tim_info[i].tim_num_ps_pending =
                                ev->bcn_info[i].tim_info.tim_num_ps_pending;
                i++;
        }

        return 0;
}

static int ath10k_wmi_10_4_op_pull_swba_ev(struct ath10k *ar,
                                           struct sk_buff *skb,
                                           struct wmi_swba_ev_arg *arg)
{
        struct wmi_10_4_host_swba_event *ev = (void *)skb->data;
        u32 map, tim_len;
        size_t i;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->vdev_map = ev->vdev_map;

        for (i = 0, map = __le32_to_cpu(ev->vdev_map); map; map >>= 1) {
                if (!(map & BIT(0)))
                        continue;

                /* If this happens there were some changes in firmware and
                 * ath10k should update the max size of tim_info array.
                 */
                if (WARN_ON_ONCE(i == ARRAY_SIZE(arg->tim_info)))
                        break;

                if (__le32_to_cpu(ev->bcn_info[i].tim_info.tim_len) >
                      sizeof(ev->bcn_info[i].tim_info.tim_bitmap)) {
                        ath10k_warn(ar, "refusing to parse invalid swba structure\n");
                        return -EPROTO;
                }

                tim_len = __le32_to_cpu(ev->bcn_info[i].tim_info.tim_len);
                if (tim_len) {
                        /* Exclude 4 byte guard length */
                        tim_len -= 4;
                        arg->tim_info[i].tim_len = __cpu_to_le32(tim_len);
                } else {
                        arg->tim_info[i].tim_len = 0;
                }

                arg->tim_info[i].tim_mcast = ev->bcn_info[i].tim_info.tim_mcast;
                arg->tim_info[i].tim_bitmap =
                                ev->bcn_info[i].tim_info.tim_bitmap;
                arg->tim_info[i].tim_changed =
                                ev->bcn_info[i].tim_info.tim_changed;
                arg->tim_info[i].tim_num_ps_pending =
                                ev->bcn_info[i].tim_info.tim_num_ps_pending;

                /* 10.4 firmware doesn't have p2p support. notice of absence
                 * info can be ignored for now.
                 */

                i++;
        }

        return 0;
}

static enum wmi_txbf_conf ath10k_wmi_10_4_txbf_conf_scheme(struct ath10k *ar)
{
        return WMI_TXBF_CONF_BEFORE_ASSOC;
}

void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_swba_ev_arg arg = {};
        u32 map;
        int i = -1;
        const struct wmi_tim_info_arg *tim_info;
        const struct wmi_p2p_noa_info *noa_info;
        struct ath10k_vif *arvif;
        struct sk_buff *bcn;
        dma_addr_t paddr;
        int ret, vdev_id = 0;

        ret = ath10k_wmi_pull_swba(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse swba event: %d\n", ret);
                return;
        }

        map = __le32_to_cpu(arg.vdev_map);

        ath10k_dbg(ar, ATH10K_DBG_MGMT, "mgmt swba vdev_map 0x%x\n",
                   map);

        for (; map; map >>= 1, vdev_id++) {
                if (!(map & 0x1))
                        continue;

                i++;

                if (i >= WMI_MAX_AP_VDEV) {
                        ath10k_warn(ar, "swba has corrupted vdev map\n");
                        break;
                }

                tim_info = &arg.tim_info[i];
                noa_info = arg.noa_info[i];

                ath10k_dbg(ar, ATH10K_DBG_MGMT,
                           "mgmt event bcn_info %d tim_len %d mcast %d changed %d num_ps_pending %d bitmap 0x%08x%08x%08x%08x\n",
                           i,
                           __le32_to_cpu(tim_info->tim_len),
                           __le32_to_cpu(tim_info->tim_mcast),
                           __le32_to_cpu(tim_info->tim_changed),
                           __le32_to_cpu(tim_info->tim_num_ps_pending),
                           __le32_to_cpu(tim_info->tim_bitmap[3]),
                           __le32_to_cpu(tim_info->tim_bitmap[2]),
                           __le32_to_cpu(tim_info->tim_bitmap[1]),
                           __le32_to_cpu(tim_info->tim_bitmap[0]));

                /* TODO: Only first 4 word from tim_bitmap is dumped.
                 * Extend debug code to dump full tim_bitmap.
                 */

                arvif = ath10k_get_arvif(ar, vdev_id);
                if (arvif == NULL) {
                        ath10k_warn(ar, "no vif for vdev_id %d found\n",
                                    vdev_id);
                        continue;
                }

                /* mac80211 would have already asked us to stop beaconing and
                 * bring the vdev down, so continue in that case
                 */
                if (!arvif->is_up)
                        continue;

                /* There are no completions for beacons so wait for next SWBA
                 * before telling mac80211 to decrement CSA counter
                 *
                 * Once CSA counter is completed stop sending beacons until
                 * actual channel switch is done
                 */
                if (arvif->vif->bss_conf.csa_active &&
                    ieee80211_beacon_cntdwn_is_complete(arvif->vif, 0)) {
                        ieee80211_csa_finish(arvif->vif, 0);
                        continue;
                }

                bcn = ieee80211_beacon_get(ar->hw, arvif->vif, 0);
                if (!bcn) {
                        ath10k_warn(ar, "could not get mac80211 beacon\n");
                        continue;
                }

                ath10k_tx_h_seq_no(arvif->vif, bcn);
                ath10k_wmi_update_tim(ar, arvif, bcn, tim_info);
                ath10k_wmi_update_noa(ar, arvif, bcn, noa_info);

                spin_lock_bh(&ar->data_lock);

                if (arvif->beacon) {
                        switch (arvif->beacon_state) {
                        case ATH10K_BEACON_SENT:
                                break;
                        case ATH10K_BEACON_SCHEDULED:
                                ath10k_warn(ar, "SWBA overrun on vdev %d, skipped old beacon\n",
                                            arvif->vdev_id);
                                break;
                        case ATH10K_BEACON_SENDING:
                                ath10k_warn(ar, "SWBA overrun on vdev %d, skipped new beacon\n",
                                            arvif->vdev_id);
                                dev_kfree_skb(bcn);
                                goto skip;
                        }

                        ath10k_mac_vif_beacon_free(arvif);
                }

                if (!arvif->beacon_buf) {
                        paddr = dma_map_single(arvif->ar->dev, bcn->data,
                                               bcn->len, DMA_TO_DEVICE);
                        ret = dma_mapping_error(arvif->ar->dev, paddr);
                        if (ret) {
                                ath10k_warn(ar, "failed to map beacon: %d\n",
                                            ret);
                                dev_kfree_skb_any(bcn);
                                goto skip;
                        }

                        ATH10K_SKB_CB(bcn)->paddr = paddr;
                } else {
                        if (bcn->len > IEEE80211_MAX_FRAME_LEN) {
                                ath10k_warn(ar, "trimming beacon %d -> %d bytes!\n",
                                            bcn->len, IEEE80211_MAX_FRAME_LEN);
                                skb_trim(bcn, IEEE80211_MAX_FRAME_LEN);
                        }
                        memcpy(arvif->beacon_buf, bcn->data, bcn->len);
                        ATH10K_SKB_CB(bcn)->paddr = arvif->beacon_paddr;
                }

                arvif->beacon = bcn;
                arvif->beacon_state = ATH10K_BEACON_SCHEDULED;

                trace_ath10k_tx_hdr(ar, bcn->data, bcn->len);
                trace_ath10k_tx_payload(ar, bcn->data, bcn->len);

skip:
                spin_unlock_bh(&ar->data_lock);
        }

        ath10k_wmi_tx_beacons_nowait(ar);
}

void ath10k_wmi_event_tbttoffset_update(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_TBTTOFFSET_UPDATE_EVENTID\n");
}

static void ath10k_radar_detected(struct ath10k *ar)
{
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs radar detected\n");
        ATH10K_DFS_STAT_INC(ar, radar_detected);

        /* Control radar events reporting in debugfs file
         * dfs_block_radar_events
         */
        if (ar->dfs_block_radar_events)
                ath10k_info(ar, "DFS Radar detected, but ignored as requested\n");
        else
                ieee80211_radar_detected(ar->hw);
}

static void ath10k_radar_confirmation_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k,
                                         radar_confirmation_work);
        struct ath10k_radar_found_info radar_info;
        int ret, time_left;

        reinit_completion(&ar->wmi.radar_confirm);

        spin_lock_bh(&ar->data_lock);
        memcpy(&radar_info, &ar->last_radar_info, sizeof(radar_info));
        spin_unlock_bh(&ar->data_lock);

        ret = ath10k_wmi_report_radar_found(ar, &radar_info);
        if (ret) {
                ath10k_warn(ar, "failed to send radar found %d\n", ret);
                goto wait_complete;
        }

        time_left = wait_for_completion_timeout(&ar->wmi.radar_confirm,
                                                ATH10K_WMI_DFS_CONF_TIMEOUT_HZ);
        if (time_left) {
                /* DFS Confirmation status event received and
                 * necessary action completed.
                 */
                goto wait_complete;
        } else {
                /* DFS Confirmation event not received from FW.Considering this
                 * as real radar.
                 */
                ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                           "dfs confirmation not received from fw, considering as radar\n");
                goto radar_detected;
        }

radar_detected:
        ath10k_radar_detected(ar);

        /* Reset state to allow sending confirmation on consecutive radar
         * detections, unless radar confirmation is disabled/stopped.
         */
wait_complete:
        spin_lock_bh(&ar->data_lock);
        if (ar->radar_conf_state != ATH10K_RADAR_CONFIRMATION_STOPPED)
                ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_IDLE;
        spin_unlock_bh(&ar->data_lock);
}

static void ath10k_dfs_radar_report(struct ath10k *ar,
                                    struct wmi_phyerr_ev_arg *phyerr,
                                    const struct phyerr_radar_report *rr,
                                    u64 tsf)
{
        u32 reg0, reg1, tsf32l;
        struct ieee80211_channel *ch;
        struct pulse_event pe;
        struct radar_detector_specs rs;
        u64 tsf64;
        u8 rssi, width;
        struct ath10k_radar_found_info *radar_info;

        reg0 = __le32_to_cpu(rr->reg0);
        reg1 = __le32_to_cpu(rr->reg1);

        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi phyerr radar report chirp %d max_width %d agc_total_gain %d pulse_delta_diff %d\n",
                   MS(reg0, RADAR_REPORT_REG0_PULSE_IS_CHIRP),
                   MS(reg0, RADAR_REPORT_REG0_PULSE_IS_MAX_WIDTH),
                   MS(reg0, RADAR_REPORT_REG0_AGC_TOTAL_GAIN),
                   MS(reg0, RADAR_REPORT_REG0_PULSE_DELTA_DIFF));
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi phyerr radar report pulse_delta_pean %d pulse_sidx %d fft_valid %d agc_mb_gain %d subchan_mask %d\n",
                   MS(reg0, RADAR_REPORT_REG0_PULSE_DELTA_PEAK),
                   MS(reg0, RADAR_REPORT_REG0_PULSE_SIDX),
                   MS(reg1, RADAR_REPORT_REG1_PULSE_SRCH_FFT_VALID),
                   MS(reg1, RADAR_REPORT_REG1_PULSE_AGC_MB_GAIN),
                   MS(reg1, RADAR_REPORT_REG1_PULSE_SUBCHAN_MASK));
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi phyerr radar report pulse_tsf_offset 0x%X pulse_dur: %d\n",
                   MS(reg1, RADAR_REPORT_REG1_PULSE_TSF_OFFSET),
                   MS(reg1, RADAR_REPORT_REG1_PULSE_DUR));

        if (!ar->dfs_detector)
                return;

        spin_lock_bh(&ar->data_lock);
        ch = ar->rx_channel;

        /* fetch target operating channel during channel change */
        if (!ch)
                ch = ar->tgt_oper_chan;

        spin_unlock_bh(&ar->data_lock);

        if (!ch) {
                ath10k_warn(ar, "failed to derive channel for radar pulse, treating as radar\n");
                goto radar_detected;
        }

        /* report event to DFS pattern detector */
        tsf32l = phyerr->tsf_timestamp;
        tsf64 = tsf & (~0xFFFFFFFFULL);
        tsf64 |= tsf32l;

        width = MS(reg1, RADAR_REPORT_REG1_PULSE_DUR);
        rssi = phyerr->rssi_combined;

        /* hardware store this as 8 bit signed value,
         * set to zero if negative number
         */
        if (rssi & 0x80)
                rssi = 0;

        pe.ts = tsf64;
        pe.freq = ch->center_freq;
        pe.width = width;
        pe.rssi = rssi;
        pe.chirp = (MS(reg0, RADAR_REPORT_REG0_PULSE_IS_CHIRP) != 0);
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "dfs add pulse freq: %d, width: %d, rssi %d, tsf: %llX\n",
                   pe.freq, pe.width, pe.rssi, pe.ts);

        ATH10K_DFS_STAT_INC(ar, pulses_detected);

        if (!ar->dfs_detector->add_pulse(ar->dfs_detector, &pe, &rs)) {
                ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                           "dfs no pulse pattern detected, yet\n");
                return;
        }

        if ((test_bit(WMI_SERVICE_HOST_DFS_CHECK_SUPPORT, ar->wmi.svc_map)) &&
            ar->dfs_detector->region == NL80211_DFS_FCC) {
                /* Consecutive radar indications need not be
                 * sent to the firmware until we get confirmation
                 * for the previous detected radar.
                 */
                spin_lock_bh(&ar->data_lock);
                if (ar->radar_conf_state != ATH10K_RADAR_CONFIRMATION_IDLE) {
                        spin_unlock_bh(&ar->data_lock);
                        return;
                }
                ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_INPROGRESS;
                radar_info = &ar->last_radar_info;

                radar_info->pri_min = rs.pri_min;
                radar_info->pri_max = rs.pri_max;
                radar_info->width_min = rs.width_min;
                radar_info->width_max = rs.width_max;
                /*TODO Find sidx_min and sidx_max */
                radar_info->sidx_min = MS(reg0, RADAR_REPORT_REG0_PULSE_SIDX);
                radar_info->sidx_max = MS(reg0, RADAR_REPORT_REG0_PULSE_SIDX);

                ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                           "sending wmi radar found cmd pri_min %d pri_max %d width_min %d width_max %d sidx_min %d sidx_max %d\n",
                           radar_info->pri_min, radar_info->pri_max,
                           radar_info->width_min, radar_info->width_max,
                           radar_info->sidx_min, radar_info->sidx_max);
                ieee80211_queue_work(ar->hw, &ar->radar_confirmation_work);
                spin_unlock_bh(&ar->data_lock);
                return;
        }

radar_detected:
        ath10k_radar_detected(ar);
}

static int ath10k_dfs_fft_report(struct ath10k *ar,
                                 struct wmi_phyerr_ev_arg *phyerr,
                                 const struct phyerr_fft_report *fftr,
                                 u64 tsf)
{
        u32 reg0, reg1;
        u8 rssi, peak_mag;

        reg0 = __le32_to_cpu(fftr->reg0);
        reg1 = __le32_to_cpu(fftr->reg1);
        rssi = phyerr->rssi_combined;

        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi phyerr fft report total_gain_db %d base_pwr_db %d fft_chn_idx %d peak_sidx %d\n",
                   MS(reg0, SEARCH_FFT_REPORT_REG0_TOTAL_GAIN_DB),
                   MS(reg0, SEARCH_FFT_REPORT_REG0_BASE_PWR_DB),
                   MS(reg0, SEARCH_FFT_REPORT_REG0_FFT_CHN_IDX),
                   MS(reg0, SEARCH_FFT_REPORT_REG0_PEAK_SIDX));
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi phyerr fft report rel_pwr_db %d avgpwr_db %d peak_mag %d num_store_bin %d\n",
                   MS(reg1, SEARCH_FFT_REPORT_REG1_RELPWR_DB),
                   MS(reg1, SEARCH_FFT_REPORT_REG1_AVGPWR_DB),
                   MS(reg1, SEARCH_FFT_REPORT_REG1_PEAK_MAG),
                   MS(reg1, SEARCH_FFT_REPORT_REG1_NUM_STR_BINS_IB));

        peak_mag = MS(reg1, SEARCH_FFT_REPORT_REG1_PEAK_MAG);

        /* false event detection */
        if (rssi == DFS_RSSI_POSSIBLY_FALSE &&
            peak_mag < 2 * DFS_PEAK_MAG_THOLD_POSSIBLY_FALSE) {
                ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs false pulse detected\n");
                ATH10K_DFS_STAT_INC(ar, pulses_discarded);
                return -EINVAL;
        }

        return 0;
}

void ath10k_wmi_event_dfs(struct ath10k *ar,
                          struct wmi_phyerr_ev_arg *phyerr,
                          u64 tsf)
{
        int buf_len, tlv_len, res, i = 0;
        const struct phyerr_tlv *tlv;
        const struct phyerr_radar_report *rr;
        const struct phyerr_fft_report *fftr;
        const u8 *tlv_buf;

        buf_len = phyerr->buf_len;
        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "wmi event dfs err_code %d rssi %d tsfl 0x%X tsf64 0x%llX len %d\n",
                   phyerr->phy_err_code, phyerr->rssi_combined,
                   phyerr->tsf_timestamp, tsf, buf_len);

        /* Skip event if DFS disabled */
        if (!IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED))
                return;

        ATH10K_DFS_STAT_INC(ar, pulses_total);

        while (i < buf_len) {
                if (i + sizeof(*tlv) > buf_len) {
                        ath10k_warn(ar, "too short buf for tlv header (%d)\n",
                                    i);
                        return;
                }

                tlv = (struct phyerr_tlv *)&phyerr->buf[i];
                tlv_len = __le16_to_cpu(tlv->len);
                tlv_buf = &phyerr->buf[i + sizeof(*tlv)];
                ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                           "wmi event dfs tlv_len %d tlv_tag 0x%02X tlv_sig 0x%02X\n",
                           tlv_len, tlv->tag, tlv->sig);

                switch (tlv->tag) {
                case PHYERR_TLV_TAG_RADAR_PULSE_SUMMARY:
                        if (i + sizeof(*tlv) + sizeof(*rr) > buf_len) {
                                ath10k_warn(ar, "too short radar pulse summary (%d)\n",
                                            i);
                                return;
                        }

                        rr = (struct phyerr_radar_report *)tlv_buf;
                        ath10k_dfs_radar_report(ar, phyerr, rr, tsf);
                        break;
                case PHYERR_TLV_TAG_SEARCH_FFT_REPORT:
                        if (i + sizeof(*tlv) + sizeof(*fftr) > buf_len) {
                                ath10k_warn(ar, "too short fft report (%d)\n",
                                            i);
                                return;
                        }

                        fftr = (struct phyerr_fft_report *)tlv_buf;
                        res = ath10k_dfs_fft_report(ar, phyerr, fftr, tsf);
                        if (res)
                                return;
                        break;
                }

                i += sizeof(*tlv) + tlv_len;
        }
}

void ath10k_wmi_event_spectral_scan(struct ath10k *ar,
                                    struct wmi_phyerr_ev_arg *phyerr,
                                    u64 tsf)
{
        int buf_len, tlv_len, res, i = 0;
        struct phyerr_tlv *tlv;
        const void *tlv_buf;
        const struct phyerr_fft_report *fftr;
        size_t fftr_len;

        buf_len = phyerr->buf_len;

        while (i < buf_len) {
                if (i + sizeof(*tlv) > buf_len) {
                        ath10k_warn(ar, "failed to parse phyerr tlv header at byte %d\n",
                                    i);
                        return;
                }

                tlv = (struct phyerr_tlv *)&phyerr->buf[i];
                tlv_len = __le16_to_cpu(tlv->len);
                tlv_buf = &phyerr->buf[i + sizeof(*tlv)];

                if (i + sizeof(*tlv) + tlv_len > buf_len) {
                        ath10k_warn(ar, "failed to parse phyerr tlv payload at byte %d\n",
                                    i);
                        return;
                }

                switch (tlv->tag) {
                case PHYERR_TLV_TAG_SEARCH_FFT_REPORT:
                        if (sizeof(*fftr) > tlv_len) {
                                ath10k_warn(ar, "failed to parse fft report at byte %d\n",
                                            i);
                                return;
                        }

                        fftr_len = tlv_len - sizeof(*fftr);
                        fftr = tlv_buf;
                        res = ath10k_spectral_process_fft(ar, phyerr,
                                                          fftr, fftr_len,
                                                          tsf);
                        if (res < 0) {
                                ath10k_dbg(ar, ATH10K_DBG_WMI, "failed to process fft report: %d\n",
                                           res);
                                return;
                        }
                        break;
                }

                i += sizeof(*tlv) + tlv_len;
        }
}

static int ath10k_wmi_op_pull_phyerr_ev_hdr(struct ath10k *ar,
                                            struct sk_buff *skb,
                                            struct wmi_phyerr_hdr_arg *arg)
{
        struct wmi_phyerr_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        arg->num_phyerrs = __le32_to_cpu(ev->num_phyerrs);
        arg->tsf_l32 = __le32_to_cpu(ev->tsf_l32);
        arg->tsf_u32 = __le32_to_cpu(ev->tsf_u32);
        arg->buf_len = skb->len - sizeof(*ev);
        arg->phyerrs = ev->phyerrs;

        return 0;
}

static int ath10k_wmi_10_4_op_pull_phyerr_ev_hdr(struct ath10k *ar,
                                                 struct sk_buff *skb,
                                                 struct wmi_phyerr_hdr_arg *arg)
{
        struct wmi_10_4_phyerr_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        /* 10.4 firmware always reports only one phyerr */
        arg->num_phyerrs = 1;

        arg->tsf_l32 = __le32_to_cpu(ev->tsf_l32);
        arg->tsf_u32 = __le32_to_cpu(ev->tsf_u32);
        arg->buf_len = skb->len;
        arg->phyerrs = skb->data;

        return 0;
}

int ath10k_wmi_op_pull_phyerr_ev(struct ath10k *ar,
                                 const void *phyerr_buf,
                                 int left_len,
                                 struct wmi_phyerr_ev_arg *arg)
{
        const struct wmi_phyerr *phyerr = phyerr_buf;
        int i;

        if (left_len < sizeof(*phyerr)) {
                ath10k_warn(ar, "wrong phyerr event head len %d (need: >=%zd)\n",
                            left_len, sizeof(*phyerr));
                return -EINVAL;
        }

        arg->tsf_timestamp = __le32_to_cpu(phyerr->tsf_timestamp);
        arg->freq1 = __le16_to_cpu(phyerr->freq1);
        arg->freq2 = __le16_to_cpu(phyerr->freq2);
        arg->rssi_combined = phyerr->rssi_combined;
        arg->chan_width_mhz = phyerr->chan_width_mhz;
        arg->buf_len = __le32_to_cpu(phyerr->buf_len);
        arg->buf = phyerr->buf;
        arg->hdr_len = sizeof(*phyerr);

        for (i = 0; i < 4; i++)
                arg->nf_chains[i] = __le16_to_cpu(phyerr->nf_chains[i]);

        switch (phyerr->phy_err_code) {
        case PHY_ERROR_GEN_SPECTRAL_SCAN:
                arg->phy_err_code = PHY_ERROR_SPECTRAL_SCAN;
                break;
        case PHY_ERROR_GEN_FALSE_RADAR_EXT:
                arg->phy_err_code = PHY_ERROR_FALSE_RADAR_EXT;
                break;
        case PHY_ERROR_GEN_RADAR:
                arg->phy_err_code = PHY_ERROR_RADAR;
                break;
        default:
                arg->phy_err_code = PHY_ERROR_UNKNOWN;
                break;
        }

        return 0;
}

static int ath10k_wmi_10_4_op_pull_phyerr_ev(struct ath10k *ar,
                                             const void *phyerr_buf,
                                             int left_len,
                                             struct wmi_phyerr_ev_arg *arg)
{
        const struct wmi_10_4_phyerr_event *phyerr = phyerr_buf;
        u32 phy_err_mask;
        int i;

        if (left_len < sizeof(*phyerr)) {
                ath10k_warn(ar, "wrong phyerr event head len %d (need: >=%zd)\n",
                            left_len, sizeof(*phyerr));
                return -EINVAL;
        }

        arg->tsf_timestamp = __le32_to_cpu(phyerr->tsf_timestamp);
        arg->freq1 = __le16_to_cpu(phyerr->freq1);
        arg->freq2 = __le16_to_cpu(phyerr->freq2);
        arg->rssi_combined = phyerr->rssi_combined;
        arg->chan_width_mhz = phyerr->chan_width_mhz;
        arg->buf_len = __le32_to_cpu(phyerr->buf_len);
        arg->buf = phyerr->buf;
        arg->hdr_len = sizeof(*phyerr);

        for (i = 0; i < 4; i++)
                arg->nf_chains[i] = __le16_to_cpu(phyerr->nf_chains[i]);

        phy_err_mask = __le32_to_cpu(phyerr->phy_err_mask[0]);

        if (phy_err_mask & PHY_ERROR_10_4_SPECTRAL_SCAN_MASK)
                arg->phy_err_code = PHY_ERROR_SPECTRAL_SCAN;
        else if (phy_err_mask & PHY_ERROR_10_4_RADAR_MASK)
                arg->phy_err_code = PHY_ERROR_RADAR;
        else
                arg->phy_err_code = PHY_ERROR_UNKNOWN;

        return 0;
}

void ath10k_wmi_event_phyerr(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_phyerr_hdr_arg hdr_arg = {};
        struct wmi_phyerr_ev_arg phyerr_arg = {};
        const void *phyerr;
        u32 count, i, buf_len, phy_err_code;
        u64 tsf;
        int left_len, ret;

        ATH10K_DFS_STAT_INC(ar, phy_errors);

        ret = ath10k_wmi_pull_phyerr_hdr(ar, skb, &hdr_arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse phyerr event hdr: %d\n", ret);
                return;
        }

        /* Check number of included events */
        count = hdr_arg.num_phyerrs;

        left_len = hdr_arg.buf_len;

        tsf = hdr_arg.tsf_u32;
        tsf <<= 32;
        tsf |= hdr_arg.tsf_l32;

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event phyerr count %d tsf64 0x%llX\n",
                   count, tsf);

        phyerr = hdr_arg.phyerrs;
        for (i = 0; i < count; i++) {
                ret = ath10k_wmi_pull_phyerr(ar, phyerr, left_len, &phyerr_arg);
                if (ret) {
                        ath10k_warn(ar, "failed to parse phyerr event (%d)\n",
                                    i);
                        return;
                }

                left_len -= phyerr_arg.hdr_len;
                buf_len = phyerr_arg.buf_len;
                phy_err_code = phyerr_arg.phy_err_code;

                if (left_len < buf_len) {
                        ath10k_warn(ar, "single event (%d) wrong buf len\n", i);
                        return;
                }

                left_len -= buf_len;

                switch (phy_err_code) {
                case PHY_ERROR_RADAR:
                        ath10k_wmi_event_dfs(ar, &phyerr_arg, tsf);
                        break;
                case PHY_ERROR_SPECTRAL_SCAN:
                        ath10k_wmi_event_spectral_scan(ar, &phyerr_arg, tsf);
                        break;
                case PHY_ERROR_FALSE_RADAR_EXT:
                        ath10k_wmi_event_dfs(ar, &phyerr_arg, tsf);
                        ath10k_wmi_event_spectral_scan(ar, &phyerr_arg, tsf);
                        break;
                default:
                        break;
                }

                phyerr = phyerr + phyerr_arg.hdr_len + buf_len;
        }
}

static int
ath10k_wmi_10_4_op_pull_dfs_status_ev(struct ath10k *ar, struct sk_buff *skb,
                                      struct wmi_dfs_status_ev_arg *arg)
{
        struct wmi_dfs_status_ev_arg *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        arg->status = ev->status;

        return 0;
}

static void
ath10k_wmi_event_dfs_status_check(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_dfs_status_ev_arg status_arg = {};
        int ret;

        ret = ath10k_wmi_pull_dfs_status(ar, skb, &status_arg);

        if (ret) {
                ath10k_warn(ar, "failed to parse dfs status event: %d\n", ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_REGULATORY,
                   "dfs status event received from fw: %d\n",
                   status_arg.status);

        /* Even in case of radar detection failure we follow the same
         * behaviour as if radar is detected i.e to switch to a different
         * channel.
         */
        if (status_arg.status == WMI_HW_RADAR_DETECTED ||
            status_arg.status == WMI_RADAR_DETECTION_FAIL)
                ath10k_radar_detected(ar);
        complete(&ar->wmi.radar_confirm);
}

void ath10k_wmi_event_roam(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_roam_ev_arg arg = {};
        int ret;
        u32 vdev_id;
        u32 reason;
        s32 rssi;

        ret = ath10k_wmi_pull_roam_ev(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse roam event: %d\n", ret);
                return;
        }

        vdev_id = __le32_to_cpu(arg.vdev_id);
        reason = __le32_to_cpu(arg.reason);
        rssi = __le32_to_cpu(arg.rssi);
        rssi += WMI_SPECTRAL_NOISE_FLOOR_REF_DEFAULT;

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi roam event vdev %u reason 0x%08x rssi %d\n",
                   vdev_id, reason, rssi);

        if (reason >= WMI_ROAM_REASON_MAX)
                ath10k_warn(ar, "ignoring unknown roam event reason %d on vdev %i\n",
                            reason, vdev_id);

        switch (reason) {
        case WMI_ROAM_REASON_BEACON_MISS:
                ath10k_mac_handle_beacon_miss(ar, vdev_id);
                break;
        case WMI_ROAM_REASON_BETTER_AP:
        case WMI_ROAM_REASON_LOW_RSSI:
        case WMI_ROAM_REASON_SUITABLE_AP_FOUND:
        case WMI_ROAM_REASON_HO_FAILED:
                ath10k_warn(ar, "ignoring not implemented roam event reason %d on vdev %i\n",
                            reason, vdev_id);
                break;
        }
}

void ath10k_wmi_event_profile_match(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_PROFILE_MATCH\n");
}

void ath10k_wmi_event_debug_print(struct ath10k *ar, struct sk_buff *skb)
{
        char buf[101], c;
        int i;

        for (i = 0; i < sizeof(buf) - 1; i++) {
                if (i >= skb->len)
                        break;

                c = skb->data[i];

                if (c == '\0')
                        break;

                if (isascii(c) && isprint(c))
                        buf[i] = c;
                else
                        buf[i] = '.';
        }

        if (i == sizeof(buf) - 1)
                ath10k_warn(ar, "wmi debug print truncated: %d\n", skb->len);

        /* for some reason the debug prints end with \n, remove that */
        if (skb->data[i - 1] == '\n')
                i--;

        /* the last byte is always reserved for the null character */
        buf[i] = '\0';

        ath10k_dbg(ar, ATH10K_DBG_WMI_PRINT, "wmi print '%s'\n", buf);
}

void ath10k_wmi_event_pdev_qvit(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_PDEV_QVIT_EVENTID\n");
}

void ath10k_wmi_event_wlan_profile_data(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_WLAN_PROFILE_DATA_EVENTID\n");
}

void ath10k_wmi_event_rtt_measurement_report(struct ath10k *ar,
                                             struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_RTT_MEASUREMENT_REPORT_EVENTID\n");
}

void ath10k_wmi_event_tsf_measurement_report(struct ath10k *ar,
                                             struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_TSF_MEASUREMENT_REPORT_EVENTID\n");
}

void ath10k_wmi_event_rtt_error_report(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_RTT_ERROR_REPORT_EVENTID\n");
}

void ath10k_wmi_event_wow_wakeup_host(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_wow_ev_arg ev = {};
        int ret;

        complete(&ar->wow.wakeup_completed);

        ret = ath10k_wmi_pull_wow_event(ar, skb, &ev);
        if (ret) {
                ath10k_warn(ar, "failed to parse wow wakeup event: %d\n", ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wow wakeup host reason %s\n",
                   wow_reason(ev.wake_reason));
}

void ath10k_wmi_event_dcs_interference(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_DCS_INTERFERENCE_EVENTID\n");
}

static u8 ath10k_tpc_config_get_rate(struct ath10k *ar,
                                     struct wmi_pdev_tpc_config_event *ev,
                                     u32 rate_idx, u32 num_chains,
                                     u32 rate_code, u8 type)
{
        u8 tpc, num_streams, preamble, ch, stm_idx;

        num_streams = ATH10K_HW_NSS(rate_code);
        preamble = ATH10K_HW_PREAMBLE(rate_code);
        ch = num_chains - 1;

        tpc = min_t(u8, ev->rates_array[rate_idx], ev->max_reg_allow_pow[ch]);

        if (__le32_to_cpu(ev->num_tx_chain) <= 1)
                goto out;

        if (preamble == WMI_RATE_PREAMBLE_CCK)
                goto out;

        stm_idx = num_streams - 1;
        if (num_chains <= num_streams)
                goto out;

        switch (type) {
        case WMI_TPC_TABLE_TYPE_STBC:
                tpc = min_t(u8, tpc,
                            ev->max_reg_allow_pow_agstbc[ch - 1][stm_idx]);
                break;
        case WMI_TPC_TABLE_TYPE_TXBF:
                tpc = min_t(u8, tpc,
                            ev->max_reg_allow_pow_agtxbf[ch - 1][stm_idx]);
                break;
        case WMI_TPC_TABLE_TYPE_CDD:
                tpc = min_t(u8, tpc,
                            ev->max_reg_allow_pow_agcdd[ch - 1][stm_idx]);
                break;
        default:
                ath10k_warn(ar, "unknown wmi tpc table type: %d\n", type);
                tpc = 0;
                break;
        }

out:
        return tpc;
}

static void ath10k_tpc_config_disp_tables(struct ath10k *ar,
                                          struct wmi_pdev_tpc_config_event *ev,
                                          struct ath10k_tpc_stats *tpc_stats,
                                          u8 *rate_code, u16 *pream_table, u8 type)
{
        u32 i, j, pream_idx, flags;
        u8 tpc[WMI_TPC_TX_N_CHAIN];
        char tpc_value[WMI_TPC_TX_N_CHAIN * WMI_TPC_BUF_SIZE];
        char buff[WMI_TPC_BUF_SIZE];

        flags = __le32_to_cpu(ev->flags);

        switch (type) {
        case WMI_TPC_TABLE_TYPE_CDD:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_CDD)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "CDD not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        case WMI_TPC_TABLE_TYPE_STBC:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_STBC)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "STBC not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        case WMI_TPC_TABLE_TYPE_TXBF:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_TXBF)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "TXBF not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        default:
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "invalid table type in wmi tpc event: %d\n", type);
                return;
        }

        pream_idx = 0;
        for (i = 0; i < tpc_stats->rate_max; i++) {
                memset(tpc_value, 0, sizeof(tpc_value));
                memset(buff, 0, sizeof(buff));
                if (i == pream_table[pream_idx])
                        pream_idx++;

                for (j = 0; j < tpc_stats->num_tx_chain; j++) {
                        tpc[j] = ath10k_tpc_config_get_rate(ar, ev, i, j + 1,
                                                            rate_code[i],
                                                            type);
                        snprintf(buff, sizeof(buff), "%8d ", tpc[j]);
                        strlcat(tpc_value, buff, sizeof(tpc_value));
                }
                tpc_stats->tpc_table[type].pream_idx[i] = pream_idx;
                tpc_stats->tpc_table[type].rate_code[i] = rate_code[i];
                memcpy(tpc_stats->tpc_table[type].tpc_value[i],
                       tpc_value, sizeof(tpc_value));
        }
}

void ath10k_wmi_tpc_config_get_rate_code(u8 *rate_code, u16 *pream_table,
                                         u32 num_tx_chain)
{
        u32 i, j, pream_idx;
        u8 rate_idx;

        /* Create the rate code table based on the chains supported */
        rate_idx = 0;
        pream_idx = 0;

        /* Fill CCK rate code */
        for (i = 0; i < 4; i++) {
                rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(i, 0, WMI_RATE_PREAMBLE_CCK);
                rate_idx++;
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill OFDM rate code */
        for (i = 0; i < 8; i++) {
                rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(i, 0, WMI_RATE_PREAMBLE_OFDM);
                rate_idx++;
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill HT20 rate code */
        for (i = 0; i < num_tx_chain; i++) {
                for (j = 0; j < 8; j++) {
                        rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_HT);
                        rate_idx++;
                }
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill HT40 rate code */
        for (i = 0; i < num_tx_chain; i++) {
                for (j = 0; j < 8; j++) {
                        rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_HT);
                        rate_idx++;
                }
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill VHT20 rate code */
        for (i = 0; i < num_tx_chain; i++) {
                for (j = 0; j < 10; j++) {
                        rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_VHT);
                        rate_idx++;
                }
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill VHT40 rate code */
        for (i = 0; i < num_tx_chain; i++) {
                for (j = 0; j < 10; j++) {
                        rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_VHT);
                        rate_idx++;
                }
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        /* Fill VHT80 rate code */
        for (i = 0; i < num_tx_chain; i++) {
                for (j = 0; j < 10; j++) {
                        rate_code[rate_idx] =
                        ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_VHT);
                        rate_idx++;
                }
        }
        pream_table[pream_idx] = rate_idx;
        pream_idx++;

        rate_code[rate_idx++] =
                ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_CCK);
        rate_code[rate_idx++] =
                ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_OFDM);
        rate_code[rate_idx++] =
                ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_CCK);
        rate_code[rate_idx++] =
                ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_OFDM);
        rate_code[rate_idx++] =
                ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_OFDM);

        pream_table[pream_idx] = ATH10K_TPC_PREAM_TABLE_END;
}

void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
{
        u32 num_tx_chain, rate_max;
        u8 rate_code[WMI_TPC_RATE_MAX];
        u16 pream_table[WMI_TPC_PREAM_TABLE_MAX];
        struct wmi_pdev_tpc_config_event *ev;
        struct ath10k_tpc_stats *tpc_stats;

        ev = (struct wmi_pdev_tpc_config_event *)skb->data;

        num_tx_chain = __le32_to_cpu(ev->num_tx_chain);

        if (num_tx_chain > WMI_TPC_TX_N_CHAIN) {
                ath10k_warn(ar, "number of tx chain is %d greater than TPC configured tx chain %d\n",
                            num_tx_chain, WMI_TPC_TX_N_CHAIN);
                return;
        }

        rate_max = __le32_to_cpu(ev->rate_max);
        if (rate_max > WMI_TPC_RATE_MAX) {
                ath10k_warn(ar, "number of rate is %d greater than TPC configured rate %d\n",
                            rate_max, WMI_TPC_RATE_MAX);
                rate_max = WMI_TPC_RATE_MAX;
        }

        tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
        if (!tpc_stats)
                return;

        ath10k_wmi_tpc_config_get_rate_code(rate_code, pream_table,
                                            num_tx_chain);

        tpc_stats->chan_freq = __le32_to_cpu(ev->chan_freq);
        tpc_stats->phy_mode = __le32_to_cpu(ev->phy_mode);
        tpc_stats->ctl = __le32_to_cpu(ev->ctl);
        tpc_stats->reg_domain = __le32_to_cpu(ev->reg_domain);
        tpc_stats->twice_antenna_gain = a_sle32_to_cpu(ev->twice_antenna_gain);
        tpc_stats->twice_antenna_reduction =
                __le32_to_cpu(ev->twice_antenna_reduction);
        tpc_stats->power_limit = __le32_to_cpu(ev->power_limit);
        tpc_stats->twice_max_rd_power = __le32_to_cpu(ev->twice_max_rd_power);
        tpc_stats->num_tx_chain = num_tx_chain;
        tpc_stats->rate_max = rate_max;

        ath10k_tpc_config_disp_tables(ar, ev, tpc_stats,
                                      rate_code, pream_table,
                                      WMI_TPC_TABLE_TYPE_CDD);
        ath10k_tpc_config_disp_tables(ar, ev,  tpc_stats,
                                      rate_code, pream_table,
                                      WMI_TPC_TABLE_TYPE_STBC);
        ath10k_tpc_config_disp_tables(ar, ev, tpc_stats,
                                      rate_code, pream_table,
                                      WMI_TPC_TABLE_TYPE_TXBF);

        ath10k_debug_tpc_stats_process(ar, tpc_stats);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event tpc config channel %d mode %d ctl %d regd %d gain %d %d limit %d max_power %d tx_chanins %d rates %d\n",
                   __le32_to_cpu(ev->chan_freq),
                   __le32_to_cpu(ev->phy_mode),
                   __le32_to_cpu(ev->ctl),
                   __le32_to_cpu(ev->reg_domain),
                   a_sle32_to_cpu(ev->twice_antenna_gain),
                   __le32_to_cpu(ev->twice_antenna_reduction),
                   __le32_to_cpu(ev->power_limit),
                   __le32_to_cpu(ev->twice_max_rd_power) / 2,
                   __le32_to_cpu(ev->num_tx_chain),
                   __le32_to_cpu(ev->rate_max));
}

static u8
ath10k_wmi_tpc_final_get_rate(struct ath10k *ar,
                              struct wmi_pdev_tpc_final_table_event *ev,
                              u32 rate_idx, u32 num_chains,
                              u32 rate_code, u8 type, u32 pream_idx)
{
        u8 tpc, num_streams, preamble, ch, stm_idx;
        s8 pow_agcdd, pow_agstbc, pow_agtxbf;
        int pream;

        num_streams = ATH10K_HW_NSS(rate_code);
        preamble = ATH10K_HW_PREAMBLE(rate_code);
        ch = num_chains - 1;
        stm_idx = num_streams - 1;
        pream = -1;

        if (__le32_to_cpu(ev->chan_freq) <= 2483) {
                switch (pream_idx) {
                case WMI_TPC_PREAM_2GHZ_CCK:
                        pream = 0;
                        break;
                case WMI_TPC_PREAM_2GHZ_OFDM:
                        pream = 1;
                        break;
                case WMI_TPC_PREAM_2GHZ_HT20:
                case WMI_TPC_PREAM_2GHZ_VHT20:
                        pream = 2;
                        break;
                case WMI_TPC_PREAM_2GHZ_HT40:
                case WMI_TPC_PREAM_2GHZ_VHT40:
                        pream = 3;
                        break;
                case WMI_TPC_PREAM_2GHZ_VHT80:
                        pream = 4;
                        break;
                default:
                        pream = -1;
                        break;
                }
        }

        if (__le32_to_cpu(ev->chan_freq) >= 5180) {
                switch (pream_idx) {
                case WMI_TPC_PREAM_5GHZ_OFDM:
                        pream = 0;
                        break;
                case WMI_TPC_PREAM_5GHZ_HT20:
                case WMI_TPC_PREAM_5GHZ_VHT20:
                        pream = 1;
                        break;
                case WMI_TPC_PREAM_5GHZ_HT40:
                case WMI_TPC_PREAM_5GHZ_VHT40:
                        pream = 2;
                        break;
                case WMI_TPC_PREAM_5GHZ_VHT80:
                        pream = 3;
                        break;
                case WMI_TPC_PREAM_5GHZ_HTCUP:
                        pream = 4;
                        break;
                default:
                        pream = -1;
                        break;
                }
        }

        if (pream == -1) {
                ath10k_warn(ar, "unknown wmi tpc final index and frequency: %u, %u\n",
                            pream_idx, __le32_to_cpu(ev->chan_freq));
                tpc = 0;
                goto out;
        }

        if (pream == 4)
                tpc = min_t(u8, ev->rates_array[rate_idx],
                            ev->max_reg_allow_pow[ch]);
        else
                tpc = min_t(u8, min_t(u8, ev->rates_array[rate_idx],
                                      ev->max_reg_allow_pow[ch]),
                            ev->ctl_power_table[0][pream][stm_idx]);

        if (__le32_to_cpu(ev->num_tx_chain) <= 1)
                goto out;

        if (preamble == WMI_RATE_PREAMBLE_CCK)
                goto out;

        if (num_chains <= num_streams)
                goto out;

        switch (type) {
        case WMI_TPC_TABLE_TYPE_STBC:
                pow_agstbc = ev->max_reg_allow_pow_agstbc[ch - 1][stm_idx];
                if (pream == 4)
                        tpc = min_t(u8, tpc, pow_agstbc);
                else
                        tpc = min_t(u8, min_t(u8, tpc, pow_agstbc),
                                    ev->ctl_power_table[0][pream][stm_idx]);
                break;
        case WMI_TPC_TABLE_TYPE_TXBF:
                pow_agtxbf = ev->max_reg_allow_pow_agtxbf[ch - 1][stm_idx];
                if (pream == 4)
                        tpc = min_t(u8, tpc, pow_agtxbf);
                else
                        tpc = min_t(u8, min_t(u8, tpc, pow_agtxbf),
                                    ev->ctl_power_table[1][pream][stm_idx]);
                break;
        case WMI_TPC_TABLE_TYPE_CDD:
                pow_agcdd = ev->max_reg_allow_pow_agcdd[ch - 1][stm_idx];
                if (pream == 4)
                        tpc = min_t(u8, tpc, pow_agcdd);
                else
                        tpc = min_t(u8, min_t(u8, tpc, pow_agcdd),
                                    ev->ctl_power_table[0][pream][stm_idx]);
                break;
        default:
                ath10k_warn(ar, "unknown wmi tpc final table type: %d\n", type);
                tpc = 0;
                break;
        }

out:
        return tpc;
}

static void
ath10k_wmi_tpc_stats_final_disp_tables(struct ath10k *ar,
                                       struct wmi_pdev_tpc_final_table_event *ev,
                                       struct ath10k_tpc_stats_final *tpc_stats,
                                       u8 *rate_code, u16 *pream_table, u8 type)
{
        u32 i, j, pream_idx, flags;
        u8 tpc[WMI_TPC_TX_N_CHAIN];
        char tpc_value[WMI_TPC_TX_N_CHAIN * WMI_TPC_BUF_SIZE];
        char buff[WMI_TPC_BUF_SIZE];

        flags = __le32_to_cpu(ev->flags);

        switch (type) {
        case WMI_TPC_TABLE_TYPE_CDD:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_CDD)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "CDD not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        case WMI_TPC_TABLE_TYPE_STBC:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_STBC)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "STBC not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        case WMI_TPC_TABLE_TYPE_TXBF:
                if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_TXBF)) {
                        ath10k_dbg(ar, ATH10K_DBG_WMI, "TXBF not supported\n");
                        tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
                        return;
                }
                break;
        default:
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "invalid table type in wmi tpc event: %d\n", type);
                return;
        }

        pream_idx = 0;
        for (i = 0; i < tpc_stats->rate_max; i++) {
                memset(tpc_value, 0, sizeof(tpc_value));
                memset(buff, 0, sizeof(buff));
                if (i == pream_table[pream_idx])
                        pream_idx++;

                for (j = 0; j < tpc_stats->num_tx_chain; j++) {
                        tpc[j] = ath10k_wmi_tpc_final_get_rate(ar, ev, i, j + 1,
                                                               rate_code[i],
                                                               type, pream_idx);
                        snprintf(buff, sizeof(buff), "%8d ", tpc[j]);
                        strlcat(tpc_value, buff, sizeof(tpc_value));
                }
                tpc_stats->tpc_table_final[type].pream_idx[i] = pream_idx;
                tpc_stats->tpc_table_final[type].rate_code[i] = rate_code[i];
                memcpy(tpc_stats->tpc_table_final[type].tpc_value[i],
                       tpc_value, sizeof(tpc_value));
        }
}

void ath10k_wmi_event_tpc_final_table(struct ath10k *ar, struct sk_buff *skb)
{
        u32 num_tx_chain, rate_max;
        u8 rate_code[WMI_TPC_FINAL_RATE_MAX];
        u16 pream_table[WMI_TPC_PREAM_TABLE_MAX];
        struct wmi_pdev_tpc_final_table_event *ev;
        struct ath10k_tpc_stats_final *tpc_stats;

        ev = (struct wmi_pdev_tpc_final_table_event *)skb->data;

        num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
        if (num_tx_chain > WMI_TPC_TX_N_CHAIN) {
                ath10k_warn(ar, "number of tx chain is %d greater than TPC final configured tx chain %d\n",
                            num_tx_chain, WMI_TPC_TX_N_CHAIN);
                return;
        }

        rate_max = __le32_to_cpu(ev->rate_max);
        if (rate_max > WMI_TPC_FINAL_RATE_MAX) {
                ath10k_warn(ar, "number of rate is %d greater than TPC final configured rate %d\n",
                            rate_max, WMI_TPC_FINAL_RATE_MAX);
                rate_max = WMI_TPC_FINAL_RATE_MAX;
        }

        tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
        if (!tpc_stats)
                return;

        ath10k_wmi_tpc_config_get_rate_code(rate_code, pream_table,
                                            num_tx_chain);

        tpc_stats->chan_freq = __le32_to_cpu(ev->chan_freq);
        tpc_stats->phy_mode = __le32_to_cpu(ev->phy_mode);
        tpc_stats->ctl = __le32_to_cpu(ev->ctl);
        tpc_stats->reg_domain = __le32_to_cpu(ev->reg_domain);
        tpc_stats->twice_antenna_gain = a_sle32_to_cpu(ev->twice_antenna_gain);
        tpc_stats->twice_antenna_reduction =
                __le32_to_cpu(ev->twice_antenna_reduction);
        tpc_stats->power_limit = __le32_to_cpu(ev->power_limit);
        tpc_stats->twice_max_rd_power = __le32_to_cpu(ev->twice_max_rd_power);
        tpc_stats->num_tx_chain = num_tx_chain;
        tpc_stats->rate_max = rate_max;

        ath10k_wmi_tpc_stats_final_disp_tables(ar, ev, tpc_stats,
                                               rate_code, pream_table,
                                               WMI_TPC_TABLE_TYPE_CDD);
        ath10k_wmi_tpc_stats_final_disp_tables(ar, ev,  tpc_stats,
                                               rate_code, pream_table,
                                               WMI_TPC_TABLE_TYPE_STBC);
        ath10k_wmi_tpc_stats_final_disp_tables(ar, ev, tpc_stats,
                                               rate_code, pream_table,
                                               WMI_TPC_TABLE_TYPE_TXBF);

        ath10k_debug_tpc_stats_final_process(ar, tpc_stats);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event tpc final table channel %d mode %d ctl %d regd %d gain %d %d limit %d max_power %d tx_chanins %d rates %d\n",
                   __le32_to_cpu(ev->chan_freq),
                   __le32_to_cpu(ev->phy_mode),
                   __le32_to_cpu(ev->ctl),
                   __le32_to_cpu(ev->reg_domain),
                   a_sle32_to_cpu(ev->twice_antenna_gain),
                   __le32_to_cpu(ev->twice_antenna_reduction),
                   __le32_to_cpu(ev->power_limit),
                   __le32_to_cpu(ev->twice_max_rd_power) / 2,
                   __le32_to_cpu(ev->num_tx_chain),
                   __le32_to_cpu(ev->rate_max));
}

static void
ath10k_wmi_handle_tdls_peer_event(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_tdls_peer_event *ev;
        struct ath10k_peer *peer;
        struct ath10k_vif *arvif;
        int vdev_id;
        int peer_status;
        int peer_reason;
        u8 reason;

        if (skb->len < sizeof(*ev)) {
                ath10k_err(ar, "received tdls peer event with invalid size (%d bytes)\n",
                           skb->len);
                return;
        }

        ev = (struct wmi_tdls_peer_event *)skb->data;
        vdev_id = __le32_to_cpu(ev->vdev_id);
        peer_status = __le32_to_cpu(ev->peer_status);
        peer_reason = __le32_to_cpu(ev->peer_reason);

        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, vdev_id, ev->peer_macaddr.addr);
        spin_unlock_bh(&ar->data_lock);

        if (!peer) {
                ath10k_warn(ar, "failed to find peer entry for %pM\n",
                            ev->peer_macaddr.addr);
                return;
        }

        switch (peer_status) {
        case WMI_TDLS_SHOULD_TEARDOWN:
                switch (peer_reason) {
                case WMI_TDLS_TEARDOWN_REASON_PTR_TIMEOUT:
                case WMI_TDLS_TEARDOWN_REASON_NO_RESPONSE:
                case WMI_TDLS_TEARDOWN_REASON_RSSI:
                        reason = WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE;
                        break;
                default:
                        reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
                        break;
                }

                arvif = ath10k_get_arvif(ar, vdev_id);
                if (!arvif) {
                        ath10k_warn(ar, "received tdls peer event for invalid vdev id %u\n",
                                    vdev_id);
                        return;
                }

                ieee80211_tdls_oper_request(arvif->vif, ev->peer_macaddr.addr,
                                            NL80211_TDLS_TEARDOWN, reason,
                                            GFP_ATOMIC);

                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "received tdls teardown event for peer %pM reason %u\n",
                           ev->peer_macaddr.addr, peer_reason);
                break;
        default:
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "received unknown tdls peer event %u\n",
                           peer_status);
                break;
        }
}

static void
ath10k_wmi_event_peer_sta_ps_state_chg(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_peer_sta_ps_state_chg_event *ev;
        struct ieee80211_sta *sta;
        struct ath10k_sta *arsta;
        u8 peer_addr[ETH_ALEN];

        lockdep_assert_held(&ar->data_lock);

        ev = (struct wmi_peer_sta_ps_state_chg_event *)skb->data;
        ether_addr_copy(peer_addr, ev->peer_macaddr.addr);

        rcu_read_lock();

        sta = ieee80211_find_sta_by_ifaddr(ar->hw, peer_addr, NULL);

        if (!sta) {
                ath10k_warn(ar, "failed to find station entry %pM\n",
                            peer_addr);
                goto exit;
        }

        arsta = (struct ath10k_sta *)sta->drv_priv;
        arsta->peer_ps_state = __le32_to_cpu(ev->peer_ps_state);

exit:
        rcu_read_unlock();
}

void ath10k_wmi_event_pdev_ftm_intg(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_PDEV_FTM_INTG_EVENTID\n");
}

void ath10k_wmi_event_gtk_offload_status(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_GTK_OFFLOAD_STATUS_EVENTID\n");
}

void ath10k_wmi_event_gtk_rekey_fail(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_GTK_REKEY_FAIL_EVENTID\n");
}

void ath10k_wmi_event_delba_complete(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_TX_DELBA_COMPLETE_EVENTID\n");
}

void ath10k_wmi_event_addba_complete(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_TX_ADDBA_COMPLETE_EVENTID\n");
}

void ath10k_wmi_event_vdev_install_key_complete(struct ath10k *ar,
                                                struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_INSTALL_KEY_COMPLETE_EVENTID\n");
}

void ath10k_wmi_event_inst_rssi_stats(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_INST_RSSI_STATS_EVENTID\n");
}

void ath10k_wmi_event_vdev_standby_req(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_STANDBY_REQ_EVENTID\n");
}

void ath10k_wmi_event_vdev_resume_req(struct ath10k *ar, struct sk_buff *skb)
{
        ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_RESUME_REQ_EVENTID\n");
}

static int ath10k_wmi_alloc_chunk(struct ath10k *ar, u32 req_id,
                                  u32 num_units, u32 unit_len)
{
        dma_addr_t paddr;
        u32 pool_size;
        int idx = ar->wmi.num_mem_chunks;
        void *vaddr;

        pool_size = num_units * round_up(unit_len, 4);
        vaddr = dma_alloc_coherent(ar->dev, pool_size, &paddr, GFP_KERNEL);

        if (!vaddr)
                return -ENOMEM;

        ar->wmi.mem_chunks[idx].vaddr = vaddr;
        ar->wmi.mem_chunks[idx].paddr = paddr;
        ar->wmi.mem_chunks[idx].len = pool_size;
        ar->wmi.mem_chunks[idx].req_id = req_id;
        ar->wmi.num_mem_chunks++;

        return num_units;
}

static int ath10k_wmi_alloc_host_mem(struct ath10k *ar, u32 req_id,
                                     u32 num_units, u32 unit_len)
{
        int ret;

        while (num_units) {
                ret = ath10k_wmi_alloc_chunk(ar, req_id, num_units, unit_len);
                if (ret < 0)
                        return ret;

                num_units -= ret;
        }

        return 0;
}

static bool
ath10k_wmi_is_host_mem_allocated(struct ath10k *ar,
                                 const struct wlan_host_mem_req **mem_reqs,
                                 u32 num_mem_reqs)
{
        u32 req_id, num_units, unit_size, num_unit_info;
        u32 pool_size;
        int i, j;
        bool found;

        if (ar->wmi.num_mem_chunks != num_mem_reqs)
                return false;

        for (i = 0; i < num_mem_reqs; ++i) {
                req_id = __le32_to_cpu(mem_reqs[i]->req_id);
                num_units = __le32_to_cpu(mem_reqs[i]->num_units);
                unit_size = __le32_to_cpu(mem_reqs[i]->unit_size);
                num_unit_info = __le32_to_cpu(mem_reqs[i]->num_unit_info);

                if (num_unit_info & NUM_UNITS_IS_NUM_ACTIVE_PEERS) {
                        if (ar->num_active_peers)
                                num_units = ar->num_active_peers + 1;
                        else
                                num_units = ar->max_num_peers + 1;
                } else if (num_unit_info & NUM_UNITS_IS_NUM_PEERS) {
                        num_units = ar->max_num_peers + 1;
                } else if (num_unit_info & NUM_UNITS_IS_NUM_VDEVS) {
                        num_units = ar->max_num_vdevs + 1;
                }

                found = false;
                for (j = 0; j < ar->wmi.num_mem_chunks; j++) {
                        if (ar->wmi.mem_chunks[j].req_id == req_id) {
                                pool_size = num_units * round_up(unit_size, 4);
                                if (ar->wmi.mem_chunks[j].len == pool_size) {
                                        found = true;
                                        break;
                                }
                        }
                }
                if (!found)
                        return false;
        }

        return true;
}

static int
ath10k_wmi_main_op_pull_svc_rdy_ev(struct ath10k *ar, struct sk_buff *skb,
                                   struct wmi_svc_rdy_ev_arg *arg)
{
        struct wmi_service_ready_event *ev;
        size_t i, n;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        ev = (void *)skb->data;
        skb_pull(skb, sizeof(*ev));
        arg->min_tx_power = ev->hw_min_tx_power;
        arg->max_tx_power = ev->hw_max_tx_power;
        arg->ht_cap = ev->ht_cap_info;
        arg->vht_cap = ev->vht_cap_info;
        arg->vht_supp_mcs = ev->vht_supp_mcs;
        arg->sw_ver0 = ev->sw_version;
        arg->sw_ver1 = ev->sw_version_1;
        arg->phy_capab = ev->phy_capability;
        arg->num_rf_chains = ev->num_rf_chains;
        arg->eeprom_rd = ev->hal_reg_capabilities.eeprom_rd;
        arg->low_2ghz_chan = ev->hal_reg_capabilities.low_2ghz_chan;
        arg->high_2ghz_chan = ev->hal_reg_capabilities.high_2ghz_chan;
        arg->low_5ghz_chan = ev->hal_reg_capabilities.low_5ghz_chan;
        arg->high_5ghz_chan = ev->hal_reg_capabilities.high_5ghz_chan;
        arg->num_mem_reqs = ev->num_mem_reqs;
        arg->service_map = ev->wmi_service_bitmap;
        arg->service_map_len = sizeof(ev->wmi_service_bitmap);

        n = min_t(size_t, __le32_to_cpu(arg->num_mem_reqs),
                  ARRAY_SIZE(arg->mem_reqs));
        for (i = 0; i < n; i++)
                arg->mem_reqs[i] = &ev->mem_reqs[i];

        if (skb->len <
            __le32_to_cpu(arg->num_mem_reqs) * sizeof(arg->mem_reqs[0]))
                return -EPROTO;

        return 0;
}

static int
ath10k_wmi_10x_op_pull_svc_rdy_ev(struct ath10k *ar, struct sk_buff *skb,
                                  struct wmi_svc_rdy_ev_arg *arg)
{
        struct wmi_10x_service_ready_event *ev;
        int i, n;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        ev = (void *)skb->data;
        skb_pull(skb, sizeof(*ev));
        arg->min_tx_power = ev->hw_min_tx_power;
        arg->max_tx_power = ev->hw_max_tx_power;
        arg->ht_cap = ev->ht_cap_info;
        arg->vht_cap = ev->vht_cap_info;
        arg->vht_supp_mcs = ev->vht_supp_mcs;
        arg->sw_ver0 = ev->sw_version;
        arg->phy_capab = ev->phy_capability;
        arg->num_rf_chains = ev->num_rf_chains;
        arg->eeprom_rd = ev->hal_reg_capabilities.eeprom_rd;
        arg->low_2ghz_chan = ev->hal_reg_capabilities.low_2ghz_chan;
        arg->high_2ghz_chan = ev->hal_reg_capabilities.high_2ghz_chan;
        arg->low_5ghz_chan = ev->hal_reg_capabilities.low_5ghz_chan;
        arg->high_5ghz_chan = ev->hal_reg_capabilities.high_5ghz_chan;
        arg->num_mem_reqs = ev->num_mem_reqs;
        arg->service_map = ev->wmi_service_bitmap;
        arg->service_map_len = sizeof(ev->wmi_service_bitmap);

        /* Deliberately skipping ev->sys_cap_info as WMI and WMI-TLV have
         * different values. We would need a translation to handle that,
         * but as we don't currently need anything from sys_cap_info from
         * WMI interface (only from WMI-TLV) safest it to skip it.
         */

        n = min_t(size_t, __le32_to_cpu(arg->num_mem_reqs),
                  ARRAY_SIZE(arg->mem_reqs));
        for (i = 0; i < n; i++)
                arg->mem_reqs[i] = &ev->mem_reqs[i];

        if (skb->len <
            __le32_to_cpu(arg->num_mem_reqs) * sizeof(arg->mem_reqs[0]))
                return -EPROTO;

        return 0;
}

static void ath10k_wmi_event_service_ready_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k, svc_rdy_work);
        struct sk_buff *skb = ar->svc_rdy_skb;
        struct wmi_svc_rdy_ev_arg arg = {};
        u32 num_units, req_id, unit_size, num_mem_reqs, num_unit_info, i;
        int ret;
        bool allocated;

        if (!skb) {
                ath10k_warn(ar, "invalid service ready event skb\n");
                return;
        }

        ret = ath10k_wmi_pull_svc_rdy(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse service ready: %d\n", ret);
                return;
        }

        ath10k_wmi_map_svc(ar, arg.service_map, ar->wmi.svc_map,
                           arg.service_map_len);

        ar->hw_min_tx_power = __le32_to_cpu(arg.min_tx_power);
        ar->hw_max_tx_power = __le32_to_cpu(arg.max_tx_power);
        ar->ht_cap_info = __le32_to_cpu(arg.ht_cap);
        ar->vht_cap_info = __le32_to_cpu(arg.vht_cap);
        ar->vht_supp_mcs = __le32_to_cpu(arg.vht_supp_mcs);
        ar->fw_version_major =
                (__le32_to_cpu(arg.sw_ver0) & 0xff000000) >> 24;
        ar->fw_version_minor = (__le32_to_cpu(arg.sw_ver0) & 0x00ffffff);
        ar->fw_version_release =
                (__le32_to_cpu(arg.sw_ver1) & 0xffff0000) >> 16;
        ar->fw_version_build = (__le32_to_cpu(arg.sw_ver1) & 0x0000ffff);
        ar->phy_capability = __le32_to_cpu(arg.phy_capab);
        ar->num_rf_chains = __le32_to_cpu(arg.num_rf_chains);
        ar->hw_eeprom_rd = __le32_to_cpu(arg.eeprom_rd);
        ar->low_2ghz_chan = __le32_to_cpu(arg.low_2ghz_chan);
        ar->high_2ghz_chan = __le32_to_cpu(arg.high_2ghz_chan);
        ar->low_5ghz_chan = __le32_to_cpu(arg.low_5ghz_chan);
        ar->high_5ghz_chan = __le32_to_cpu(arg.high_5ghz_chan);
        ar->sys_cap_info = __le32_to_cpu(arg.sys_cap_info);

        ath10k_dbg_dump(ar, ATH10K_DBG_WMI, NULL, "wmi svc: ",
                        arg.service_map, arg.service_map_len);
        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi sys_cap_info 0x%x\n",
                   ar->sys_cap_info);

        if (ar->num_rf_chains > ar->max_spatial_stream) {
                ath10k_warn(ar, "hardware advertises support for more spatial streams than it should (%d > %d)\n",
                            ar->num_rf_chains, ar->max_spatial_stream);
                ar->num_rf_chains = ar->max_spatial_stream;
        }

        if (!ar->cfg_tx_chainmask) {
                ar->cfg_tx_chainmask = (1 << ar->num_rf_chains) - 1;
                ar->cfg_rx_chainmask = (1 << ar->num_rf_chains) - 1;
        }

        if (strlen(ar->hw->wiphy->fw_version) == 0) {
                snprintf(ar->hw->wiphy->fw_version,
                         sizeof(ar->hw->wiphy->fw_version),
                         "%u.%u.%u.%u",
                         ar->fw_version_major,
                         ar->fw_version_minor,
                         ar->fw_version_release,
                         ar->fw_version_build);
        }

        num_mem_reqs = __le32_to_cpu(arg.num_mem_reqs);
        if (num_mem_reqs > WMI_MAX_MEM_REQS) {
                ath10k_warn(ar, "requested memory chunks number (%d) exceeds the limit\n",
                            num_mem_reqs);
                return;
        }

        if (test_bit(WMI_SERVICE_PEER_CACHING, ar->wmi.svc_map)) {
                if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                             ar->running_fw->fw_file.fw_features))
                        ar->num_active_peers = TARGET_10_4_QCACHE_ACTIVE_PEERS_PFC +
                                               ar->max_num_vdevs;
                else
                        ar->num_active_peers = TARGET_10_4_QCACHE_ACTIVE_PEERS +
                                               ar->max_num_vdevs;

                ar->max_num_peers = TARGET_10_4_NUM_QCACHE_PEERS_MAX +
                                    ar->max_num_vdevs;
                ar->num_tids = ar->num_active_peers * 2;
                ar->max_num_stations = TARGET_10_4_NUM_QCACHE_PEERS_MAX;
        }

        /* TODO: Adjust max peer count for cases like WMI_SERVICE_RATECTRL_CACHE
         * and WMI_SERVICE_IRAM_TIDS, etc.
         */

        allocated = ath10k_wmi_is_host_mem_allocated(ar, arg.mem_reqs,
                                                     num_mem_reqs);
        if (allocated)
                goto skip_mem_alloc;

        /* Either this event is received during boot time or there is a change
         * in memory requirement from firmware when compared to last request.
         * Free any old memory and do a fresh allocation based on the current
         * memory requirement.
         */
        ath10k_wmi_free_host_mem(ar);

        for (i = 0; i < num_mem_reqs; ++i) {
                req_id = __le32_to_cpu(arg.mem_reqs[i]->req_id);
                num_units = __le32_to_cpu(arg.mem_reqs[i]->num_units);
                unit_size = __le32_to_cpu(arg.mem_reqs[i]->unit_size);
                num_unit_info = __le32_to_cpu(arg.mem_reqs[i]->num_unit_info);

                if (num_unit_info & NUM_UNITS_IS_NUM_ACTIVE_PEERS) {
                        if (ar->num_active_peers)
                                num_units = ar->num_active_peers + 1;
                        else
                                num_units = ar->max_num_peers + 1;
                } else if (num_unit_info & NUM_UNITS_IS_NUM_PEERS) {
                        /* number of units to allocate is number of
                         * peers, 1 extra for self peer on target
                         * this needs to be tied, host and target
                         * can get out of sync
                         */
                        num_units = ar->max_num_peers + 1;
                } else if (num_unit_info & NUM_UNITS_IS_NUM_VDEVS) {
                        num_units = ar->max_num_vdevs + 1;
                }

                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "wmi mem_req_id %d num_units %d num_unit_info %d unit size %d actual units %d\n",
                           req_id,
                           __le32_to_cpu(arg.mem_reqs[i]->num_units),
                           num_unit_info,
                           unit_size,
                           num_units);

                ret = ath10k_wmi_alloc_host_mem(ar, req_id, num_units,
                                                unit_size);
                if (ret)
                        return;
        }

skip_mem_alloc:
        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event service ready min_tx_power 0x%08x max_tx_power 0x%08x ht_cap 0x%08x vht_cap 0x%08x vht_supp_mcs 0x%08x sw_ver0 0x%08x sw_ver1 0x%08x fw_build 0x%08x phy_capab 0x%08x num_rf_chains 0x%08x eeprom_rd 0x%08x low_2ghz_chan %d high_2ghz_chan %d low_5ghz_chan %d high_5ghz_chan %d num_mem_reqs 0x%08x\n",
                   __le32_to_cpu(arg.min_tx_power),
                   __le32_to_cpu(arg.max_tx_power),
                   __le32_to_cpu(arg.ht_cap),
                   __le32_to_cpu(arg.vht_cap),
                   __le32_to_cpu(arg.vht_supp_mcs),
                   __le32_to_cpu(arg.sw_ver0),
                   __le32_to_cpu(arg.sw_ver1),
                   __le32_to_cpu(arg.fw_build),
                   __le32_to_cpu(arg.phy_capab),
                   __le32_to_cpu(arg.num_rf_chains),
                   __le32_to_cpu(arg.eeprom_rd),
                   __le32_to_cpu(arg.low_2ghz_chan),
                   __le32_to_cpu(arg.high_2ghz_chan),
                   __le32_to_cpu(arg.low_5ghz_chan),
                   __le32_to_cpu(arg.high_5ghz_chan),
                   __le32_to_cpu(arg.num_mem_reqs));

        dev_kfree_skb(skb);
        ar->svc_rdy_skb = NULL;
        complete(&ar->wmi.service_ready);
}

void ath10k_wmi_event_service_ready(struct ath10k *ar, struct sk_buff *skb)
{
        ar->svc_rdy_skb = skb;
        queue_work(ar->workqueue_aux, &ar->svc_rdy_work);
}

static int ath10k_wmi_op_pull_rdy_ev(struct ath10k *ar, struct sk_buff *skb,
                                     struct wmi_rdy_ev_arg *arg)
{
        struct wmi_ready_event *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->sw_version = ev->sw_version;
        arg->abi_version = ev->abi_version;
        arg->status = ev->status;
        arg->mac_addr = ev->mac_addr.addr;

        return 0;
}

static int ath10k_wmi_op_pull_roam_ev(struct ath10k *ar, struct sk_buff *skb,
                                      struct wmi_roam_ev_arg *arg)
{
        struct wmi_roam_ev *ev = (void *)skb->data;

        if (skb->len < sizeof(*ev))
                return -EPROTO;

        skb_pull(skb, sizeof(*ev));
        arg->vdev_id = ev->vdev_id;
        arg->reason = ev->reason;

        return 0;
}

static int ath10k_wmi_op_pull_echo_ev(struct ath10k *ar,
                                      struct sk_buff *skb,
                                      struct wmi_echo_ev_arg *arg)
{
        struct wmi_echo_event *ev = (void *)skb->data;

        arg->value = ev->value;

        return 0;
}

int ath10k_wmi_event_ready(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_rdy_ev_arg arg = {};
        int ret;

        ret = ath10k_wmi_pull_rdy(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse ready event: %d\n", ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event ready sw_version 0x%08x abi_version %u mac_addr %pM status %d\n",
                   __le32_to_cpu(arg.sw_version),
                   __le32_to_cpu(arg.abi_version),
                   arg.mac_addr,
                   __le32_to_cpu(arg.status));

        if (is_zero_ether_addr(ar->mac_addr))
                ether_addr_copy(ar->mac_addr, arg.mac_addr);
        complete(&ar->wmi.unified_ready);
        return 0;
}

void ath10k_wmi_event_service_available(struct ath10k *ar, struct sk_buff *skb)
{
        int ret;
        struct wmi_svc_avail_ev_arg arg = {};

        ret = ath10k_wmi_pull_svc_avail(ar, skb, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to parse service available event: %d\n",
                            ret);
        }

        /*
         * Initialization of "arg.service_map_ext_valid" to ZERO is necessary
         * for the below logic to work.
         */
        if (arg.service_map_ext_valid)
                ath10k_wmi_map_svc_ext(ar, arg.service_map_ext, ar->wmi.svc_map,
                                       __le32_to_cpu(arg.service_map_ext_len));
}

static int ath10k_wmi_event_temperature(struct ath10k *ar, struct sk_buff *skb)
{
        const struct wmi_pdev_temperature_event *ev;

        ev = (struct wmi_pdev_temperature_event *)skb->data;
        if (WARN_ON(skb->len < sizeof(*ev)))
                return -EPROTO;

        ath10k_thermal_event_temperature(ar, __le32_to_cpu(ev->temperature));
        return 0;
}

static int ath10k_wmi_event_pdev_bss_chan_info(struct ath10k *ar,
                                               struct sk_buff *skb)
{
        struct wmi_pdev_bss_chan_info_event *ev;
        struct survey_info *survey;
        u64 busy, total, tx, rx, rx_bss;
        u32 freq, noise_floor;
        u32 cc_freq_hz = ar->hw_params.channel_counters_freq_hz;
        int idx;

        ev = (struct wmi_pdev_bss_chan_info_event *)skb->data;
        if (WARN_ON(skb->len < sizeof(*ev)))
                return -EPROTO;

        freq        = __le32_to_cpu(ev->freq);
        noise_floor = __le32_to_cpu(ev->noise_floor);
        busy        = __le64_to_cpu(ev->cycle_busy);
        total       = __le64_to_cpu(ev->cycle_total);
        tx          = __le64_to_cpu(ev->cycle_tx);
        rx          = __le64_to_cpu(ev->cycle_rx);
        rx_bss      = __le64_to_cpu(ev->cycle_rx_bss);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi event pdev bss chan info:\n freq: %d noise: %d cycle: busy %llu total %llu tx %llu rx %llu rx_bss %llu\n",
                   freq, noise_floor, busy, total, tx, rx, rx_bss);

        spin_lock_bh(&ar->data_lock);
        idx = freq_to_idx(ar, freq);
        if (idx >= ARRAY_SIZE(ar->survey)) {
                ath10k_warn(ar, "bss chan info: invalid frequency %d (idx %d out of bounds)\n",
                            freq, idx);
                goto exit;
        }

        survey = &ar->survey[idx];

        survey->noise     = noise_floor;
        survey->time      = div_u64(total, cc_freq_hz);
        survey->time_busy = div_u64(busy, cc_freq_hz);
        survey->time_rx   = div_u64(rx_bss, cc_freq_hz);
        survey->time_tx   = div_u64(tx, cc_freq_hz);
        survey->filled   |= (SURVEY_INFO_NOISE_DBM |
                             SURVEY_INFO_TIME |
                             SURVEY_INFO_TIME_BUSY |
                             SURVEY_INFO_TIME_RX |
                             SURVEY_INFO_TIME_TX);
exit:
        spin_unlock_bh(&ar->data_lock);
        complete(&ar->bss_survey_done);
        return 0;
}

static inline void ath10k_wmi_queue_set_coverage_class_work(struct ath10k *ar)
{
        if (ar->hw_params.hw_ops->set_coverage_class) {
                spin_lock_bh(&ar->data_lock);

                /* This call only ensures that the modified coverage class
                 * persists in case the firmware sets the registers back to
                 * their default value. So calling it is only necessary if the
                 * coverage class has a non-zero value.
                 */
                if (ar->fw_coverage.coverage_class)
                        queue_work(ar->workqueue, &ar->set_coverage_class_work);

                spin_unlock_bh(&ar->data_lock);
        }
}

static void ath10k_wmi_op_rx(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_cmd_hdr *cmd_hdr;
        enum wmi_event_id id;

        cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
        id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);

        if (skb_pull(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
                goto out;

        trace_ath10k_wmi_event(ar, id, skb->data, skb->len);

        switch (id) {
        case WMI_MGMT_RX_EVENTID:
                ath10k_wmi_event_mgmt_rx(ar, skb);
                /* mgmt_rx() owns the skb now! */
                return;
        case WMI_SCAN_EVENTID:
                ath10k_wmi_event_scan(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_CHAN_INFO_EVENTID:
                ath10k_wmi_event_chan_info(ar, skb);
                break;
        case WMI_ECHO_EVENTID:
                ath10k_wmi_event_echo(ar, skb);
                break;
        case WMI_DEBUG_MESG_EVENTID:
                ath10k_wmi_event_debug_mesg(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_UPDATE_STATS_EVENTID:
                ath10k_wmi_event_update_stats(ar, skb);
                break;
        case WMI_VDEV_START_RESP_EVENTID:
                ath10k_wmi_event_vdev_start_resp(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_VDEV_STOPPED_EVENTID:
                ath10k_wmi_event_vdev_stopped(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_PEER_STA_KICKOUT_EVENTID:
                ath10k_wmi_event_peer_sta_kickout(ar, skb);
                break;
        case WMI_HOST_SWBA_EVENTID:
                ath10k_wmi_event_host_swba(ar, skb);
                break;
        case WMI_TBTTOFFSET_UPDATE_EVENTID:
                ath10k_wmi_event_tbttoffset_update(ar, skb);
                break;
        case WMI_PHYERR_EVENTID:
                ath10k_wmi_event_phyerr(ar, skb);
                break;
        case WMI_ROAM_EVENTID:
                ath10k_wmi_event_roam(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_PROFILE_MATCH:
                ath10k_wmi_event_profile_match(ar, skb);
                break;
        case WMI_DEBUG_PRINT_EVENTID:
                ath10k_wmi_event_debug_print(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_PDEV_QVIT_EVENTID:
                ath10k_wmi_event_pdev_qvit(ar, skb);
                break;
        case WMI_WLAN_PROFILE_DATA_EVENTID:
                ath10k_wmi_event_wlan_profile_data(ar, skb);
                break;
        case WMI_RTT_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_rtt_measurement_report(ar, skb);
                break;
        case WMI_TSF_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_tsf_measurement_report(ar, skb);
                break;
        case WMI_RTT_ERROR_REPORT_EVENTID:
                ath10k_wmi_event_rtt_error_report(ar, skb);
                break;
        case WMI_WOW_WAKEUP_HOST_EVENTID:
                ath10k_wmi_event_wow_wakeup_host(ar, skb);
                break;
        case WMI_DCS_INTERFERENCE_EVENTID:
                ath10k_wmi_event_dcs_interference(ar, skb);
                break;
        case WMI_PDEV_TPC_CONFIG_EVENTID:
                ath10k_wmi_event_pdev_tpc_config(ar, skb);
                break;
        case WMI_PDEV_FTM_INTG_EVENTID:
                ath10k_wmi_event_pdev_ftm_intg(ar, skb);
                break;
        case WMI_GTK_OFFLOAD_STATUS_EVENTID:
                ath10k_wmi_event_gtk_offload_status(ar, skb);
                break;
        case WMI_GTK_REKEY_FAIL_EVENTID:
                ath10k_wmi_event_gtk_rekey_fail(ar, skb);
                break;
        case WMI_TX_DELBA_COMPLETE_EVENTID:
                ath10k_wmi_event_delba_complete(ar, skb);
                break;
        case WMI_TX_ADDBA_COMPLETE_EVENTID:
                ath10k_wmi_event_addba_complete(ar, skb);
                break;
        case WMI_VDEV_INSTALL_KEY_COMPLETE_EVENTID:
                ath10k_wmi_event_vdev_install_key_complete(ar, skb);
                break;
        case WMI_SERVICE_READY_EVENTID:
                ath10k_wmi_event_service_ready(ar, skb);
                return;
        case WMI_READY_EVENTID:
                ath10k_wmi_event_ready(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_SERVICE_AVAILABLE_EVENTID:
                ath10k_wmi_event_service_available(ar, skb);
                break;
        default:
                ath10k_warn(ar, "Unknown eventid: %d\n", id);
                break;
        }

out:
        dev_kfree_skb(skb);
}

static void ath10k_wmi_10_1_op_rx(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_cmd_hdr *cmd_hdr;
        enum wmi_10x_event_id id;
        bool consumed;

        cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
        id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);

        if (skb_pull(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
                goto out;

        trace_ath10k_wmi_event(ar, id, skb->data, skb->len);

        consumed = ath10k_tm_event_wmi(ar, id, skb);

        /* Ready event must be handled normally also in UTF mode so that we
         * know the UTF firmware has booted, others we are just bypass WMI
         * events to testmode.
         */
        if (consumed && id != WMI_10X_READY_EVENTID) {
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "wmi testmode consumed 0x%x\n", id);
                goto out;
        }

        switch (id) {
        case WMI_10X_MGMT_RX_EVENTID:
                ath10k_wmi_event_mgmt_rx(ar, skb);
                /* mgmt_rx() owns the skb now! */
                return;
        case WMI_10X_SCAN_EVENTID:
                ath10k_wmi_event_scan(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_CHAN_INFO_EVENTID:
                ath10k_wmi_event_chan_info(ar, skb);
                break;
        case WMI_10X_ECHO_EVENTID:
                ath10k_wmi_event_echo(ar, skb);
                break;
        case WMI_10X_DEBUG_MESG_EVENTID:
                ath10k_wmi_event_debug_mesg(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_UPDATE_STATS_EVENTID:
                ath10k_wmi_event_update_stats(ar, skb);
                break;
        case WMI_10X_VDEV_START_RESP_EVENTID:
                ath10k_wmi_event_vdev_start_resp(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_VDEV_STOPPED_EVENTID:
                ath10k_wmi_event_vdev_stopped(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_PEER_STA_KICKOUT_EVENTID:
                ath10k_wmi_event_peer_sta_kickout(ar, skb);
                break;
        case WMI_10X_HOST_SWBA_EVENTID:
                ath10k_wmi_event_host_swba(ar, skb);
                break;
        case WMI_10X_TBTTOFFSET_UPDATE_EVENTID:
                ath10k_wmi_event_tbttoffset_update(ar, skb);
                break;
        case WMI_10X_PHYERR_EVENTID:
                ath10k_wmi_event_phyerr(ar, skb);
                break;
        case WMI_10X_ROAM_EVENTID:
                ath10k_wmi_event_roam(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_PROFILE_MATCH:
                ath10k_wmi_event_profile_match(ar, skb);
                break;
        case WMI_10X_DEBUG_PRINT_EVENTID:
                ath10k_wmi_event_debug_print(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_PDEV_QVIT_EVENTID:
                ath10k_wmi_event_pdev_qvit(ar, skb);
                break;
        case WMI_10X_WLAN_PROFILE_DATA_EVENTID:
                ath10k_wmi_event_wlan_profile_data(ar, skb);
                break;
        case WMI_10X_RTT_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_rtt_measurement_report(ar, skb);
                break;
        case WMI_10X_TSF_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_tsf_measurement_report(ar, skb);
                break;
        case WMI_10X_RTT_ERROR_REPORT_EVENTID:
                ath10k_wmi_event_rtt_error_report(ar, skb);
                break;
        case WMI_10X_WOW_WAKEUP_HOST_EVENTID:
                ath10k_wmi_event_wow_wakeup_host(ar, skb);
                break;
        case WMI_10X_DCS_INTERFERENCE_EVENTID:
                ath10k_wmi_event_dcs_interference(ar, skb);
                break;
        case WMI_10X_PDEV_TPC_CONFIG_EVENTID:
                ath10k_wmi_event_pdev_tpc_config(ar, skb);
                break;
        case WMI_10X_INST_RSSI_STATS_EVENTID:
                ath10k_wmi_event_inst_rssi_stats(ar, skb);
                break;
        case WMI_10X_VDEV_STANDBY_REQ_EVENTID:
                ath10k_wmi_event_vdev_standby_req(ar, skb);
                break;
        case WMI_10X_VDEV_RESUME_REQ_EVENTID:
                ath10k_wmi_event_vdev_resume_req(ar, skb);
                break;
        case WMI_10X_SERVICE_READY_EVENTID:
                ath10k_wmi_event_service_ready(ar, skb);
                return;
        case WMI_10X_READY_EVENTID:
                ath10k_wmi_event_ready(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10X_PDEV_UTF_EVENTID:
                /* ignore utf events */
                break;
        default:
                ath10k_warn(ar, "Unknown eventid: %d\n", id);
                break;
        }

out:
        dev_kfree_skb(skb);
}

static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_cmd_hdr *cmd_hdr;
        enum wmi_10_2_event_id id;
        bool consumed;

        cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
        id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);

        if (skb_pull(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
                goto out;

        trace_ath10k_wmi_event(ar, id, skb->data, skb->len);

        consumed = ath10k_tm_event_wmi(ar, id, skb);

        /* Ready event must be handled normally also in UTF mode so that we
         * know the UTF firmware has booted, others we are just bypass WMI
         * events to testmode.
         */
        if (consumed && id != WMI_10_2_READY_EVENTID) {
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "wmi testmode consumed 0x%x\n", id);
                goto out;
        }

        switch (id) {
        case WMI_10_2_MGMT_RX_EVENTID:
                ath10k_wmi_event_mgmt_rx(ar, skb);
                /* mgmt_rx() owns the skb now! */
                return;
        case WMI_10_2_SCAN_EVENTID:
                ath10k_wmi_event_scan(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_CHAN_INFO_EVENTID:
                ath10k_wmi_event_chan_info(ar, skb);
                break;
        case WMI_10_2_ECHO_EVENTID:
                ath10k_wmi_event_echo(ar, skb);
                break;
        case WMI_10_2_DEBUG_MESG_EVENTID:
                ath10k_wmi_event_debug_mesg(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_UPDATE_STATS_EVENTID:
                ath10k_wmi_event_update_stats(ar, skb);
                break;
        case WMI_10_2_VDEV_START_RESP_EVENTID:
                ath10k_wmi_event_vdev_start_resp(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_VDEV_STOPPED_EVENTID:
                ath10k_wmi_event_vdev_stopped(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_PEER_STA_KICKOUT_EVENTID:
                ath10k_wmi_event_peer_sta_kickout(ar, skb);
                break;
        case WMI_10_2_HOST_SWBA_EVENTID:
                ath10k_wmi_event_host_swba(ar, skb);
                break;
        case WMI_10_2_TBTTOFFSET_UPDATE_EVENTID:
                ath10k_wmi_event_tbttoffset_update(ar, skb);
                break;
        case WMI_10_2_PHYERR_EVENTID:
                ath10k_wmi_event_phyerr(ar, skb);
                break;
        case WMI_10_2_ROAM_EVENTID:
                ath10k_wmi_event_roam(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_PROFILE_MATCH:
                ath10k_wmi_event_profile_match(ar, skb);
                break;
        case WMI_10_2_DEBUG_PRINT_EVENTID:
                ath10k_wmi_event_debug_print(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_PDEV_QVIT_EVENTID:
                ath10k_wmi_event_pdev_qvit(ar, skb);
                break;
        case WMI_10_2_WLAN_PROFILE_DATA_EVENTID:
                ath10k_wmi_event_wlan_profile_data(ar, skb);
                break;
        case WMI_10_2_RTT_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_rtt_measurement_report(ar, skb);
                break;
        case WMI_10_2_TSF_MEASUREMENT_REPORT_EVENTID:
                ath10k_wmi_event_tsf_measurement_report(ar, skb);
                break;
        case WMI_10_2_RTT_ERROR_REPORT_EVENTID:
                ath10k_wmi_event_rtt_error_report(ar, skb);
                break;
        case WMI_10_2_WOW_WAKEUP_HOST_EVENTID:
                ath10k_wmi_event_wow_wakeup_host(ar, skb);
                break;
        case WMI_10_2_DCS_INTERFERENCE_EVENTID:
                ath10k_wmi_event_dcs_interference(ar, skb);
                break;
        case WMI_10_2_PDEV_TPC_CONFIG_EVENTID:
                ath10k_wmi_event_pdev_tpc_config(ar, skb);
                break;
        case WMI_10_2_INST_RSSI_STATS_EVENTID:
                ath10k_wmi_event_inst_rssi_stats(ar, skb);
                break;
        case WMI_10_2_VDEV_STANDBY_REQ_EVENTID:
                ath10k_wmi_event_vdev_standby_req(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_VDEV_RESUME_REQ_EVENTID:
                ath10k_wmi_event_vdev_resume_req(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_SERVICE_READY_EVENTID:
                ath10k_wmi_event_service_ready(ar, skb);
                return;
        case WMI_10_2_READY_EVENTID:
                ath10k_wmi_event_ready(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_2_PDEV_TEMPERATURE_EVENTID:
                ath10k_wmi_event_temperature(ar, skb);
                break;
        case WMI_10_2_PDEV_BSS_CHAN_INFO_EVENTID:
                ath10k_wmi_event_pdev_bss_chan_info(ar, skb);
                break;
        case WMI_10_2_RTT_KEEPALIVE_EVENTID:
        case WMI_10_2_GPIO_INPUT_EVENTID:
        case WMI_10_2_PEER_RATECODE_LIST_EVENTID:
        case WMI_10_2_GENERIC_BUFFER_EVENTID:
        case WMI_10_2_MCAST_BUF_RELEASE_EVENTID:
        case WMI_10_2_MCAST_LIST_AGEOUT_EVENTID:
        case WMI_10_2_WDS_PEER_EVENTID:
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "received event id %d not implemented\n", id);
                break;
        case WMI_10_2_PEER_STA_PS_STATECHG_EVENTID:
                ath10k_wmi_event_peer_sta_ps_state_chg(ar, skb);
                break;
        default:
                ath10k_warn(ar, "Unknown eventid: %d\n", id);
                break;
        }

out:
        dev_kfree_skb(skb);
}

static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb)
{
        struct wmi_cmd_hdr *cmd_hdr;
        enum wmi_10_4_event_id id;
        bool consumed;

        cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
        id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);

        if (!skb_pull(skb, sizeof(struct wmi_cmd_hdr)))
                goto out;

        trace_ath10k_wmi_event(ar, id, skb->data, skb->len);

        consumed = ath10k_tm_event_wmi(ar, id, skb);

        /* Ready event must be handled normally also in UTF mode so that we
         * know the UTF firmware has booted, others we are just bypass WMI
         * events to testmode.
         */
        if (consumed && id != WMI_10_4_READY_EVENTID) {
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "wmi testmode consumed 0x%x\n", id);
                goto out;
        }

        switch (id) {
        case WMI_10_4_MGMT_RX_EVENTID:
                ath10k_wmi_event_mgmt_rx(ar, skb);
                /* mgmt_rx() owns the skb now! */
                return;
        case WMI_10_4_ECHO_EVENTID:
                ath10k_wmi_event_echo(ar, skb);
                break;
        case WMI_10_4_DEBUG_MESG_EVENTID:
                ath10k_wmi_event_debug_mesg(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_SERVICE_READY_EVENTID:
                ath10k_wmi_event_service_ready(ar, skb);
                return;
        case WMI_10_4_SCAN_EVENTID:
                ath10k_wmi_event_scan(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_CHAN_INFO_EVENTID:
                ath10k_wmi_event_chan_info(ar, skb);
                break;
        case WMI_10_4_PHYERR_EVENTID:
                ath10k_wmi_event_phyerr(ar, skb);
                break;
        case WMI_10_4_READY_EVENTID:
                ath10k_wmi_event_ready(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_PEER_STA_KICKOUT_EVENTID:
                ath10k_wmi_event_peer_sta_kickout(ar, skb);
                break;
        case WMI_10_4_ROAM_EVENTID:
                ath10k_wmi_event_roam(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_HOST_SWBA_EVENTID:
                ath10k_wmi_event_host_swba(ar, skb);
                break;
        case WMI_10_4_TBTTOFFSET_UPDATE_EVENTID:
                ath10k_wmi_event_tbttoffset_update(ar, skb);
                break;
        case WMI_10_4_DEBUG_PRINT_EVENTID:
                ath10k_wmi_event_debug_print(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_VDEV_START_RESP_EVENTID:
                ath10k_wmi_event_vdev_start_resp(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_VDEV_STOPPED_EVENTID:
                ath10k_wmi_event_vdev_stopped(ar, skb);
                ath10k_wmi_queue_set_coverage_class_work(ar);
                break;
        case WMI_10_4_WOW_WAKEUP_HOST_EVENTID:
        case WMI_10_4_PEER_RATECODE_LIST_EVENTID:
        case WMI_10_4_WDS_PEER_EVENTID:
        case WMI_10_4_DEBUG_FATAL_CONDITION_EVENTID:
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "received event id %d not implemented\n", id);
                break;
        case WMI_10_4_UPDATE_STATS_EVENTID:
                ath10k_wmi_event_update_stats(ar, skb);
                break;
        case WMI_10_4_PDEV_TEMPERATURE_EVENTID:
                ath10k_wmi_event_temperature(ar, skb);
                break;
        case WMI_10_4_PDEV_BSS_CHAN_INFO_EVENTID:
                ath10k_wmi_event_pdev_bss_chan_info(ar, skb);
                break;
        case WMI_10_4_PDEV_TPC_CONFIG_EVENTID:
                ath10k_wmi_event_pdev_tpc_config(ar, skb);
                break;
        case WMI_10_4_TDLS_PEER_EVENTID:
                ath10k_wmi_handle_tdls_peer_event(ar, skb);
                break;
        case WMI_10_4_PDEV_TPC_TABLE_EVENTID:
                ath10k_wmi_event_tpc_final_table(ar, skb);
                break;
        case WMI_10_4_DFS_STATUS_CHECK_EVENTID:
                ath10k_wmi_event_dfs_status_check(ar, skb);
                break;
        case WMI_10_4_PEER_STA_PS_STATECHG_EVENTID:
                ath10k_wmi_event_peer_sta_ps_state_chg(ar, skb);
                break;
        default:
                ath10k_warn(ar, "Unknown eventid: %d\n", id);
                break;
        }

out:
        dev_kfree_skb(skb);
}

static void ath10k_wmi_process_rx(struct ath10k *ar, struct sk_buff *skb)
{
        int ret;

        ret = ath10k_wmi_rx(ar, skb);
        if (ret)
                ath10k_warn(ar, "failed to process wmi rx: %d\n", ret);
}

int ath10k_wmi_connect(struct ath10k *ar)
{
        int status;
        struct ath10k_htc_svc_conn_req conn_req;
        struct ath10k_htc_svc_conn_resp conn_resp;

        memset(&ar->wmi.svc_map, 0, sizeof(ar->wmi.svc_map));

        memset(&conn_req, 0, sizeof(conn_req));
        memset(&conn_resp, 0, sizeof(conn_resp));

        /* these fields are the same for all service endpoints */
        conn_req.ep_ops.ep_tx_complete = ath10k_wmi_htc_tx_complete;
        conn_req.ep_ops.ep_rx_complete = ath10k_wmi_process_rx;
        conn_req.ep_ops.ep_tx_credits = ath10k_wmi_op_ep_tx_credits;

        /* connect to control service */
        conn_req.service_id = ATH10K_HTC_SVC_ID_WMI_CONTROL;

        status = ath10k_htc_connect_service(&ar->htc, &conn_req, &conn_resp);
        if (status) {
                ath10k_warn(ar, "failed to connect to WMI CONTROL service status: %d\n",
                            status);
                return status;
        }

        ar->wmi.eid = conn_resp.eid;
        return 0;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_set_base_macaddr(struct ath10k *ar,
                                        const u8 macaddr[ETH_ALEN])
{
        struct wmi_pdev_set_base_macaddr_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_base_macaddr_cmd *)skb->data;
        ether_addr_copy(cmd->mac_addr.addr, macaddr);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev basemac %pM\n", macaddr);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_set_rd(struct ath10k *ar, u16 rd, u16 rd2g, u16 rd5g,
                              u16 ctl2g, u16 ctl5g,
                              enum wmi_dfs_region dfs_reg)
{
        struct wmi_pdev_set_regdomain_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_regdomain_cmd *)skb->data;
        cmd->reg_domain = __cpu_to_le32(rd);
        cmd->reg_domain_2G = __cpu_to_le32(rd2g);
        cmd->reg_domain_5G = __cpu_to_le32(rd5g);
        cmd->conformance_test_limit_2G = __cpu_to_le32(ctl2g);
        cmd->conformance_test_limit_5G = __cpu_to_le32(ctl5g);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev regdomain rd %x rd2g %x rd5g %x ctl2g %x ctl5g %x\n",
                   rd, rd2g, rd5g, ctl2g, ctl5g);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10x_op_gen_pdev_set_rd(struct ath10k *ar, u16 rd, u16 rd2g, u16
                                  rd5g, u16 ctl2g, u16 ctl5g,
                                  enum wmi_dfs_region dfs_reg)
{
        struct wmi_pdev_set_regdomain_cmd_10x *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_regdomain_cmd_10x *)skb->data;
        cmd->reg_domain = __cpu_to_le32(rd);
        cmd->reg_domain_2G = __cpu_to_le32(rd2g);
        cmd->reg_domain_5G = __cpu_to_le32(rd5g);
        cmd->conformance_test_limit_2G = __cpu_to_le32(ctl2g);
        cmd->conformance_test_limit_5G = __cpu_to_le32(ctl5g);
        cmd->dfs_domain = __cpu_to_le32(dfs_reg);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev regdomain rd %x rd2g %x rd5g %x ctl2g %x ctl5g %x dfs_region %x\n",
                   rd, rd2g, rd5g, ctl2g, ctl5g, dfs_reg);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_suspend(struct ath10k *ar, u32 suspend_opt)
{
        struct wmi_pdev_suspend_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_suspend_cmd *)skb->data;
        cmd->suspend_opt = __cpu_to_le32(suspend_opt);

        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_resume(struct ath10k *ar)
{
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, 0);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_set_param(struct ath10k *ar, u32 id, u32 value)
{
        struct wmi_pdev_set_param_cmd *cmd;
        struct sk_buff *skb;

        if (id == WMI_PDEV_PARAM_UNSUPPORTED) {
                ath10k_warn(ar, "pdev param %d not supported by firmware\n",
                            id);
                return ERR_PTR(-EOPNOTSUPP);
        }

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_param_cmd *)skb->data;
        cmd->param_id    = __cpu_to_le32(id);
        cmd->param_value = __cpu_to_le32(value);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi pdev set param %d value %d\n",
                   id, value);
        return skb;
}

void ath10k_wmi_put_host_mem_chunks(struct ath10k *ar,
                                    struct wmi_host_mem_chunks *chunks)
{
        struct host_memory_chunk *chunk;
        int i;

        chunks->count = __cpu_to_le32(ar->wmi.num_mem_chunks);

        for (i = 0; i < ar->wmi.num_mem_chunks; i++) {
                chunk = &chunks->items[i];
                chunk->ptr = __cpu_to_le32(ar->wmi.mem_chunks[i].paddr);
                chunk->size = __cpu_to_le32(ar->wmi.mem_chunks[i].len);
                chunk->req_id = __cpu_to_le32(ar->wmi.mem_chunks[i].req_id);

                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "wmi chunk %d len %d requested, addr 0x%llx\n",
                           i,
                           ar->wmi.mem_chunks[i].len,
                           (unsigned long long)ar->wmi.mem_chunks[i].paddr);
        }
}

static struct sk_buff *ath10k_wmi_op_gen_init(struct ath10k *ar)
{
        struct wmi_init_cmd *cmd;
        struct sk_buff *buf;
        struct wmi_resource_config config = {};
        u32 val;

        config.num_vdevs = __cpu_to_le32(TARGET_NUM_VDEVS);
        config.num_peers = __cpu_to_le32(TARGET_NUM_PEERS);
        config.num_offload_peers = __cpu_to_le32(TARGET_NUM_OFFLOAD_PEERS);

        config.num_offload_reorder_bufs =
                __cpu_to_le32(TARGET_NUM_OFFLOAD_REORDER_BUFS);

        config.num_peer_keys = __cpu_to_le32(TARGET_NUM_PEER_KEYS);
        config.num_tids = __cpu_to_le32(TARGET_NUM_TIDS);
        config.ast_skid_limit = __cpu_to_le32(TARGET_AST_SKID_LIMIT);
        config.tx_chain_mask = __cpu_to_le32(TARGET_TX_CHAIN_MASK);
        config.rx_chain_mask = __cpu_to_le32(TARGET_RX_CHAIN_MASK);
        config.rx_timeout_pri_vo = __cpu_to_le32(TARGET_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_vi = __cpu_to_le32(TARGET_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_be = __cpu_to_le32(TARGET_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_bk = __cpu_to_le32(TARGET_RX_TIMEOUT_HI_PRI);
        config.rx_decap_mode = __cpu_to_le32(ar->wmi.rx_decap_mode);
        config.scan_max_pending_reqs =
                __cpu_to_le32(TARGET_SCAN_MAX_PENDING_REQS);

        config.bmiss_offload_max_vdev =
                __cpu_to_le32(TARGET_BMISS_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_vdev =
                __cpu_to_le32(TARGET_ROAM_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_ap_profiles =
                __cpu_to_le32(TARGET_ROAM_OFFLOAD_MAX_AP_PROFILES);

        config.num_mcast_groups = __cpu_to_le32(TARGET_NUM_MCAST_GROUPS);
        config.num_mcast_table_elems =
                __cpu_to_le32(TARGET_NUM_MCAST_TABLE_ELEMS);

        config.mcast2ucast_mode = __cpu_to_le32(TARGET_MCAST2UCAST_MODE);
        config.tx_dbg_log_size = __cpu_to_le32(TARGET_TX_DBG_LOG_SIZE);
        config.num_wds_entries = __cpu_to_le32(TARGET_NUM_WDS_ENTRIES);
        config.dma_burst_size = __cpu_to_le32(TARGET_DMA_BURST_SIZE);
        config.mac_aggr_delim = __cpu_to_le32(TARGET_MAC_AGGR_DELIM);

        val = TARGET_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK;
        config.rx_skip_defrag_timeout_dup_detection_check = __cpu_to_le32(val);

        config.vow_config = __cpu_to_le32(TARGET_VOW_CONFIG);

        config.gtk_offload_max_vdev =
                __cpu_to_le32(TARGET_GTK_OFFLOAD_MAX_VDEV);

        config.num_msdu_desc = __cpu_to_le32(TARGET_NUM_MSDU_DESC);
        config.max_frag_entries = __cpu_to_le32(TARGET_MAX_FRAG_ENTRIES);

        buf = ath10k_wmi_alloc_skb(ar, struct_size(cmd, mem_chunks.items,
                                                   ar->wmi.num_mem_chunks));
        if (!buf)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_init_cmd *)buf->data;

        memcpy(&cmd->resource_config, &config, sizeof(config));
        ath10k_wmi_put_host_mem_chunks(ar, &cmd->mem_chunks);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi init\n");
        return buf;
}

static struct sk_buff *ath10k_wmi_10_1_op_gen_init(struct ath10k *ar)
{
        struct wmi_init_cmd_10x *cmd;
        struct sk_buff *buf;
        struct wmi_resource_config_10x config = {};
        u32 val;

        config.num_vdevs = __cpu_to_le32(TARGET_10X_NUM_VDEVS);
        config.num_peers = __cpu_to_le32(TARGET_10X_NUM_PEERS);
        config.num_peer_keys = __cpu_to_le32(TARGET_10X_NUM_PEER_KEYS);
        config.num_tids = __cpu_to_le32(TARGET_10X_NUM_TIDS);
        config.ast_skid_limit = __cpu_to_le32(TARGET_10X_AST_SKID_LIMIT);
        config.tx_chain_mask = __cpu_to_le32(TARGET_10X_TX_CHAIN_MASK);
        config.rx_chain_mask = __cpu_to_le32(TARGET_10X_RX_CHAIN_MASK);
        config.rx_timeout_pri_vo = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_vi = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_be = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_bk = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_HI_PRI);
        config.rx_decap_mode = __cpu_to_le32(ar->wmi.rx_decap_mode);
        config.scan_max_pending_reqs =
                __cpu_to_le32(TARGET_10X_SCAN_MAX_PENDING_REQS);

        config.bmiss_offload_max_vdev =
                __cpu_to_le32(TARGET_10X_BMISS_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_vdev =
                __cpu_to_le32(TARGET_10X_ROAM_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_ap_profiles =
                __cpu_to_le32(TARGET_10X_ROAM_OFFLOAD_MAX_AP_PROFILES);

        config.num_mcast_groups = __cpu_to_le32(TARGET_10X_NUM_MCAST_GROUPS);
        config.num_mcast_table_elems =
                __cpu_to_le32(TARGET_10X_NUM_MCAST_TABLE_ELEMS);

        config.mcast2ucast_mode = __cpu_to_le32(TARGET_10X_MCAST2UCAST_MODE);
        config.tx_dbg_log_size = __cpu_to_le32(TARGET_10X_TX_DBG_LOG_SIZE);
        config.num_wds_entries = __cpu_to_le32(TARGET_10X_NUM_WDS_ENTRIES);
        config.dma_burst_size = __cpu_to_le32(TARGET_10X_DMA_BURST_SIZE);
        config.mac_aggr_delim = __cpu_to_le32(TARGET_10X_MAC_AGGR_DELIM);

        val = TARGET_10X_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK;
        config.rx_skip_defrag_timeout_dup_detection_check = __cpu_to_le32(val);

        config.vow_config = __cpu_to_le32(TARGET_10X_VOW_CONFIG);

        config.num_msdu_desc = __cpu_to_le32(TARGET_10X_NUM_MSDU_DESC);
        config.max_frag_entries = __cpu_to_le32(TARGET_10X_MAX_FRAG_ENTRIES);

        buf = ath10k_wmi_alloc_skb(ar, struct_size(cmd, mem_chunks.items,
                                                   ar->wmi.num_mem_chunks));
        if (!buf)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_init_cmd_10x *)buf->data;

        memcpy(&cmd->resource_config, &config, sizeof(config));
        ath10k_wmi_put_host_mem_chunks(ar, &cmd->mem_chunks);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi init 10x\n");
        return buf;
}

static struct sk_buff *ath10k_wmi_10_2_op_gen_init(struct ath10k *ar)
{
        struct wmi_init_cmd_10_2 *cmd;
        struct sk_buff *buf;
        struct wmi_resource_config_10x config = {};
        u32 val, features;

        config.num_vdevs = __cpu_to_le32(TARGET_10X_NUM_VDEVS);
        config.num_peer_keys = __cpu_to_le32(TARGET_10X_NUM_PEER_KEYS);

        if (ath10k_peer_stats_enabled(ar)) {
                config.num_peers = __cpu_to_le32(TARGET_10X_TX_STATS_NUM_PEERS);
                config.num_tids = __cpu_to_le32(TARGET_10X_TX_STATS_NUM_TIDS);
        } else {
                config.num_peers = __cpu_to_le32(TARGET_10X_NUM_PEERS);
                config.num_tids = __cpu_to_le32(TARGET_10X_NUM_TIDS);
        }

        config.ast_skid_limit = __cpu_to_le32(TARGET_10X_AST_SKID_LIMIT);
        config.tx_chain_mask = __cpu_to_le32(TARGET_10X_TX_CHAIN_MASK);
        config.rx_chain_mask = __cpu_to_le32(TARGET_10X_RX_CHAIN_MASK);
        config.rx_timeout_pri_vo = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_vi = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_be = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri_bk = __cpu_to_le32(TARGET_10X_RX_TIMEOUT_HI_PRI);
        config.rx_decap_mode = __cpu_to_le32(ar->wmi.rx_decap_mode);

        config.scan_max_pending_reqs =
                __cpu_to_le32(TARGET_10X_SCAN_MAX_PENDING_REQS);

        config.bmiss_offload_max_vdev =
                __cpu_to_le32(TARGET_10X_BMISS_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_vdev =
                __cpu_to_le32(TARGET_10X_ROAM_OFFLOAD_MAX_VDEV);

        config.roam_offload_max_ap_profiles =
                __cpu_to_le32(TARGET_10X_ROAM_OFFLOAD_MAX_AP_PROFILES);

        config.num_mcast_groups = __cpu_to_le32(TARGET_10X_NUM_MCAST_GROUPS);
        config.num_mcast_table_elems =
                __cpu_to_le32(TARGET_10X_NUM_MCAST_TABLE_ELEMS);

        config.mcast2ucast_mode = __cpu_to_le32(TARGET_10X_MCAST2UCAST_MODE);
        config.tx_dbg_log_size = __cpu_to_le32(TARGET_10X_TX_DBG_LOG_SIZE);
        config.num_wds_entries = __cpu_to_le32(TARGET_10X_NUM_WDS_ENTRIES);
        config.dma_burst_size = __cpu_to_le32(TARGET_10_2_DMA_BURST_SIZE);
        config.mac_aggr_delim = __cpu_to_le32(TARGET_10X_MAC_AGGR_DELIM);

        val = TARGET_10X_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK;
        config.rx_skip_defrag_timeout_dup_detection_check = __cpu_to_le32(val);

        config.vow_config = __cpu_to_le32(TARGET_10X_VOW_CONFIG);

        config.num_msdu_desc = __cpu_to_le32(TARGET_10X_NUM_MSDU_DESC);
        config.max_frag_entries = __cpu_to_le32(TARGET_10X_MAX_FRAG_ENTRIES);

        buf = ath10k_wmi_alloc_skb(ar, struct_size(cmd, mem_chunks.items,
                                                   ar->wmi.num_mem_chunks));
        if (!buf)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_init_cmd_10_2 *)buf->data;

        features = WMI_10_2_RX_BATCH_MODE;

        if (test_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags) &&
            test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map))
                features |= WMI_10_2_COEX_GPIO;

        if (ath10k_peer_stats_enabled(ar))
                features |= WMI_10_2_PEER_STATS;

        if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map))
                features |= WMI_10_2_BSS_CHAN_INFO;

        cmd->resource_config.feature_mask = __cpu_to_le32(features);

        memcpy(&cmd->resource_config.common, &config, sizeof(config));
        ath10k_wmi_put_host_mem_chunks(ar, &cmd->mem_chunks);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi init 10.2\n");
        return buf;
}

static struct sk_buff *ath10k_wmi_10_4_op_gen_init(struct ath10k *ar)
{
        struct wmi_init_cmd_10_4 *cmd;
        struct sk_buff *buf;
        struct wmi_resource_config_10_4 config = {};

        config.num_vdevs = __cpu_to_le32(ar->max_num_vdevs);
        config.num_peers = __cpu_to_le32(ar->max_num_peers);
        config.num_active_peers = __cpu_to_le32(ar->num_active_peers);
        config.num_tids = __cpu_to_le32(ar->num_tids);

        config.num_offload_peers = __cpu_to_le32(TARGET_10_4_NUM_OFFLOAD_PEERS);
        config.num_offload_reorder_buffs =
                        __cpu_to_le32(TARGET_10_4_NUM_OFFLOAD_REORDER_BUFFS);
        config.num_peer_keys  = __cpu_to_le32(TARGET_10_4_NUM_PEER_KEYS);
        config.ast_skid_limit = __cpu_to_le32(TARGET_10_4_AST_SKID_LIMIT);
        config.tx_chain_mask  = __cpu_to_le32(ar->hw_params.tx_chain_mask);
        config.rx_chain_mask  = __cpu_to_le32(ar->hw_params.rx_chain_mask);

        config.rx_timeout_pri[0] = __cpu_to_le32(TARGET_10_4_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri[1] = __cpu_to_le32(TARGET_10_4_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri[2] = __cpu_to_le32(TARGET_10_4_RX_TIMEOUT_LO_PRI);
        config.rx_timeout_pri[3] = __cpu_to_le32(TARGET_10_4_RX_TIMEOUT_HI_PRI);

        config.rx_decap_mode            = __cpu_to_le32(ar->wmi.rx_decap_mode);
        config.scan_max_pending_req = __cpu_to_le32(TARGET_10_4_SCAN_MAX_REQS);
        config.bmiss_offload_max_vdev =
                        __cpu_to_le32(TARGET_10_4_BMISS_OFFLOAD_MAX_VDEV);
        config.roam_offload_max_vdev  =
                        __cpu_to_le32(TARGET_10_4_ROAM_OFFLOAD_MAX_VDEV);
        config.roam_offload_max_ap_profiles =
                        __cpu_to_le32(TARGET_10_4_ROAM_OFFLOAD_MAX_PROFILES);
        config.num_mcast_groups = __cpu_to_le32(TARGET_10_4_NUM_MCAST_GROUPS);
        config.num_mcast_table_elems =
                        __cpu_to_le32(TARGET_10_4_NUM_MCAST_TABLE_ELEMS);

        config.mcast2ucast_mode = __cpu_to_le32(TARGET_10_4_MCAST2UCAST_MODE);
        config.tx_dbg_log_size  = __cpu_to_le32(TARGET_10_4_TX_DBG_LOG_SIZE);
        config.num_wds_entries  = __cpu_to_le32(TARGET_10_4_NUM_WDS_ENTRIES);
        config.dma_burst_size   = __cpu_to_le32(TARGET_10_4_DMA_BURST_SIZE);
        config.mac_aggr_delim   = __cpu_to_le32(TARGET_10_4_MAC_AGGR_DELIM);

        config.rx_skip_defrag_timeout_dup_detection_check =
          __cpu_to_le32(TARGET_10_4_RX_SKIP_DEFRAG_TIMEOUT_DUP_DETECTION_CHECK);

        config.vow_config = __cpu_to_le32(TARGET_10_4_VOW_CONFIG);
        config.gtk_offload_max_vdev =
                        __cpu_to_le32(TARGET_10_4_GTK_OFFLOAD_MAX_VDEV);
        config.num_msdu_desc = __cpu_to_le32(ar->htt.max_num_pending_tx);
        config.max_frag_entries = __cpu_to_le32(TARGET_10_4_11AC_TX_MAX_FRAGS);
        config.max_peer_ext_stats =
                        __cpu_to_le32(TARGET_10_4_MAX_PEER_EXT_STATS);
        config.smart_ant_cap = __cpu_to_le32(TARGET_10_4_SMART_ANT_CAP);

        config.bk_minfree = __cpu_to_le32(TARGET_10_4_BK_MIN_FREE);
        config.be_minfree = __cpu_to_le32(TARGET_10_4_BE_MIN_FREE);
        config.vi_minfree = __cpu_to_le32(TARGET_10_4_VI_MIN_FREE);
        config.vo_minfree = __cpu_to_le32(TARGET_10_4_VO_MIN_FREE);

        config.rx_batchmode = __cpu_to_le32(TARGET_10_4_RX_BATCH_MODE);
        config.tt_support =
                        __cpu_to_le32(TARGET_10_4_THERMAL_THROTTLING_CONFIG);
        config.atf_config = __cpu_to_le32(TARGET_10_4_ATF_CONFIG);
        config.iphdr_pad_config = __cpu_to_le32(TARGET_10_4_IPHDR_PAD_CONFIG);
        config.qwrap_config = __cpu_to_le32(TARGET_10_4_QWRAP_CONFIG);

        buf = ath10k_wmi_alloc_skb(ar, struct_size(cmd, mem_chunks.items,
                                                   ar->wmi.num_mem_chunks));
        if (!buf)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_init_cmd_10_4 *)buf->data;
        memcpy(&cmd->resource_config, &config, sizeof(config));
        ath10k_wmi_put_host_mem_chunks(ar, &cmd->mem_chunks);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi init 10.4\n");
        return buf;
}

int ath10k_wmi_start_scan_verify(const struct wmi_start_scan_arg *arg)
{
        if (arg->ie_len > WLAN_SCAN_PARAMS_MAX_IE_LEN)
                return -EINVAL;
        if (arg->n_channels > ARRAY_SIZE(arg->channels))
                return -EINVAL;
        if (arg->n_ssids > WLAN_SCAN_PARAMS_MAX_SSID)
                return -EINVAL;
        if (arg->n_bssids > WLAN_SCAN_PARAMS_MAX_BSSID)
                return -EINVAL;

        return 0;
}

static size_t
ath10k_wmi_start_scan_tlvs_len(const struct wmi_start_scan_arg *arg)
{
        int len = 0;

        if (arg->ie_len) {
                len += sizeof(struct wmi_ie_data);
                len += roundup(arg->ie_len, 4);
        }

        if (arg->n_channels) {
                len += sizeof(struct wmi_chan_list);
                len += sizeof(__le32) * arg->n_channels;
        }

        if (arg->n_ssids) {
                len += sizeof(struct wmi_ssid_list);
                len += sizeof(struct wmi_ssid) * arg->n_ssids;
        }

        if (arg->n_bssids) {
                len += sizeof(struct wmi_bssid_list);
                len += sizeof(struct wmi_mac_addr) * arg->n_bssids;
        }

        return len;
}

void ath10k_wmi_put_start_scan_common(struct wmi_start_scan_common *cmn,
                                      const struct wmi_start_scan_arg *arg)
{
        u32 scan_id;
        u32 scan_req_id;

        scan_id  = WMI_HOST_SCAN_REQ_ID_PREFIX;
        scan_id |= arg->scan_id;

        scan_req_id  = WMI_HOST_SCAN_REQUESTOR_ID_PREFIX;
        scan_req_id |= arg->scan_req_id;

        cmn->scan_id            = __cpu_to_le32(scan_id);
        cmn->scan_req_id        = __cpu_to_le32(scan_req_id);
        cmn->vdev_id            = __cpu_to_le32(arg->vdev_id);
        cmn->scan_priority      = __cpu_to_le32(arg->scan_priority);
        cmn->notify_scan_events = __cpu_to_le32(arg->notify_scan_events);
        cmn->dwell_time_active  = __cpu_to_le32(arg->dwell_time_active);
        cmn->dwell_time_passive = __cpu_to_le32(arg->dwell_time_passive);
        cmn->min_rest_time      = __cpu_to_le32(arg->min_rest_time);
        cmn->max_rest_time      = __cpu_to_le32(arg->max_rest_time);
        cmn->repeat_probe_time  = __cpu_to_le32(arg->repeat_probe_time);
        cmn->probe_spacing_time = __cpu_to_le32(arg->probe_spacing_time);
        cmn->idle_time          = __cpu_to_le32(arg->idle_time);
        cmn->max_scan_time      = __cpu_to_le32(arg->max_scan_time);
        cmn->probe_delay        = __cpu_to_le32(arg->probe_delay);
        cmn->scan_ctrl_flags    = __cpu_to_le32(arg->scan_ctrl_flags);
}

static void
ath10k_wmi_put_start_scan_tlvs(u8 *tlvs,
                               const struct wmi_start_scan_arg *arg)
{
        struct wmi_ie_data *ie;
        struct wmi_chan_list *channels;
        struct wmi_ssid_list *ssids;
        struct wmi_bssid_list *bssids;
        void *ptr = tlvs;
        int i;

        if (arg->n_channels) {
                channels = ptr;
                channels->tag = __cpu_to_le32(WMI_CHAN_LIST_TAG);
                channels->num_chan = __cpu_to_le32(arg->n_channels);

                for (i = 0; i < arg->n_channels; i++)
                        channels->channel_list[i].freq =
                                __cpu_to_le16(arg->channels[i]);

                ptr += sizeof(*channels);
                ptr += sizeof(__le32) * arg->n_channels;
        }

        if (arg->n_ssids) {
                ssids = ptr;
                ssids->tag = __cpu_to_le32(WMI_SSID_LIST_TAG);
                ssids->num_ssids = __cpu_to_le32(arg->n_ssids);

                for (i = 0; i < arg->n_ssids; i++) {
                        ssids->ssids[i].ssid_len =
                                __cpu_to_le32(arg->ssids[i].len);
                        memcpy(&ssids->ssids[i].ssid,
                               arg->ssids[i].ssid,
                               arg->ssids[i].len);
                }

                ptr += sizeof(*ssids);
                ptr += sizeof(struct wmi_ssid) * arg->n_ssids;
        }

        if (arg->n_bssids) {
                bssids = ptr;
                bssids->tag = __cpu_to_le32(WMI_BSSID_LIST_TAG);
                bssids->num_bssid = __cpu_to_le32(arg->n_bssids);

                for (i = 0; i < arg->n_bssids; i++)
                        ether_addr_copy(bssids->bssid_list[i].addr,
                                        arg->bssids[i].bssid);

                ptr += sizeof(*bssids);
                ptr += sizeof(struct wmi_mac_addr) * arg->n_bssids;
        }

        if (arg->ie_len) {
                ie = ptr;
                ie->tag = __cpu_to_le32(WMI_IE_TAG);
                ie->ie_len = __cpu_to_le32(arg->ie_len);
                memcpy(ie->ie_data, arg->ie, arg->ie_len);

                ptr += sizeof(*ie);
                ptr += roundup(arg->ie_len, 4);
        }
}

static struct sk_buff *
ath10k_wmi_op_gen_start_scan(struct ath10k *ar,
                             const struct wmi_start_scan_arg *arg)
{
        struct wmi_start_scan_cmd *cmd;
        struct sk_buff *skb;
        size_t len;
        int ret;

        ret = ath10k_wmi_start_scan_verify(arg);
        if (ret)
                return ERR_PTR(ret);

        len = sizeof(*cmd) + ath10k_wmi_start_scan_tlvs_len(arg);
        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_start_scan_cmd *)skb->data;

        ath10k_wmi_put_start_scan_common(&cmd->common, arg);
        ath10k_wmi_put_start_scan_tlvs(cmd->tlvs, arg);

        cmd->burst_duration_ms = __cpu_to_le32(0);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi start scan\n");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10x_op_gen_start_scan(struct ath10k *ar,
                                 const struct wmi_start_scan_arg *arg)
{
        struct wmi_10x_start_scan_cmd *cmd;
        struct sk_buff *skb;
        size_t len;
        int ret;

        ret = ath10k_wmi_start_scan_verify(arg);
        if (ret)
                return ERR_PTR(ret);

        len = sizeof(*cmd) + ath10k_wmi_start_scan_tlvs_len(arg);
        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_10x_start_scan_cmd *)skb->data;

        ath10k_wmi_put_start_scan_common(&cmd->common, arg);
        ath10k_wmi_put_start_scan_tlvs(cmd->tlvs, arg);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi 10x start scan\n");
        return skb;
}

void ath10k_wmi_start_scan_init(struct ath10k *ar,
                                struct wmi_start_scan_arg *arg)
{
        /* setup commonly used values */
        arg->scan_req_id = 1;
        arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
        arg->dwell_time_active = 50;
        arg->dwell_time_passive = 150;
        arg->min_rest_time = 50;
        arg->max_rest_time = 500;
        arg->repeat_probe_time = 0;
        arg->probe_spacing_time = 0;
        arg->idle_time = 0;
        arg->max_scan_time = 20000;
        arg->probe_delay = 5;
        arg->notify_scan_events = WMI_SCAN_EVENT_STARTED
                | WMI_SCAN_EVENT_COMPLETED
                | WMI_SCAN_EVENT_BSS_CHANNEL
                | WMI_SCAN_EVENT_FOREIGN_CHANNEL
                | WMI_SCAN_EVENT_FOREIGN_CHANNEL_EXIT
                | WMI_SCAN_EVENT_DEQUEUED;
        arg->scan_ctrl_flags |= WMI_SCAN_CHAN_STAT_EVENT;
        arg->n_bssids = 1;
        arg->bssids[0].bssid = "\xFF\xFF\xFF\xFF\xFF\xFF";
}

static struct sk_buff *
ath10k_wmi_op_gen_stop_scan(struct ath10k *ar,
                            const struct wmi_stop_scan_arg *arg)
{
        struct wmi_stop_scan_cmd *cmd;
        struct sk_buff *skb;
        u32 scan_id;
        u32 req_id;

        if (arg->req_id > 0xFFF)
                return ERR_PTR(-EINVAL);
        if (arg->req_type == WMI_SCAN_STOP_ONE && arg->u.scan_id > 0xFFF)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        scan_id = arg->u.scan_id;
        scan_id |= WMI_HOST_SCAN_REQ_ID_PREFIX;

        req_id = arg->req_id;
        req_id |= WMI_HOST_SCAN_REQUESTOR_ID_PREFIX;

        cmd = (struct wmi_stop_scan_cmd *)skb->data;
        cmd->req_type    = __cpu_to_le32(arg->req_type);
        cmd->vdev_id     = __cpu_to_le32(arg->u.vdev_id);
        cmd->scan_id     = __cpu_to_le32(scan_id);
        cmd->scan_req_id = __cpu_to_le32(req_id);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi stop scan reqid %d req_type %d vdev/scan_id %d\n",
                   arg->req_id, arg->req_type, arg->u.scan_id);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_create(struct ath10k *ar, u32 vdev_id,
                              enum wmi_vdev_type type,
                              enum wmi_vdev_subtype subtype,
                              const u8 macaddr[ETH_ALEN])
{
        struct wmi_vdev_create_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_create_cmd *)skb->data;
        cmd->vdev_id      = __cpu_to_le32(vdev_id);
        cmd->vdev_type    = __cpu_to_le32(type);
        cmd->vdev_subtype = __cpu_to_le32(subtype);
        ether_addr_copy(cmd->vdev_macaddr.addr, macaddr);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "WMI vdev create: id %d type %d subtype %d macaddr %pM\n",
                   vdev_id, type, subtype, macaddr);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_delete(struct ath10k *ar, u32 vdev_id)
{
        struct wmi_vdev_delete_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_delete_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "WMI vdev delete id %d\n", vdev_id);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_start(struct ath10k *ar,
                             const struct wmi_vdev_start_request_arg *arg,
                             bool restart)
{
        struct wmi_vdev_start_request_cmd *cmd;
        struct sk_buff *skb;
        const char *cmdname;
        u32 flags = 0;

        if (WARN_ON(arg->hidden_ssid && !arg->ssid))
                return ERR_PTR(-EINVAL);
        if (WARN_ON(arg->ssid_len > sizeof(cmd->ssid.ssid)))
                return ERR_PTR(-EINVAL);

        if (restart)
                cmdname = "restart";
        else
                cmdname = "start";

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        if (arg->hidden_ssid)
                flags |= WMI_VDEV_START_HIDDEN_SSID;
        if (arg->pmf_enabled)
                flags |= WMI_VDEV_START_PMF_ENABLED;

        cmd = (struct wmi_vdev_start_request_cmd *)skb->data;
        cmd->vdev_id         = __cpu_to_le32(arg->vdev_id);
        cmd->disable_hw_ack  = __cpu_to_le32(arg->disable_hw_ack);
        cmd->beacon_interval = __cpu_to_le32(arg->bcn_intval);
        cmd->dtim_period     = __cpu_to_le32(arg->dtim_period);
        cmd->flags           = __cpu_to_le32(flags);
        cmd->bcn_tx_rate     = __cpu_to_le32(arg->bcn_tx_rate);
        cmd->bcn_tx_power    = __cpu_to_le32(arg->bcn_tx_power);

        if (arg->ssid) {
                cmd->ssid.ssid_len = __cpu_to_le32(arg->ssid_len);
                memcpy(cmd->ssid.ssid, arg->ssid, arg->ssid_len);
        }

        ath10k_wmi_put_wmi_channel(ar, &cmd->chan, &arg->channel);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi vdev %s id 0x%x flags: 0x%0X, freq %d, mode %d, ch_flags: 0x%0X, max_power: %d\n",
                   cmdname, arg->vdev_id,
                   flags, arg->channel.freq, arg->channel.mode,
                   cmd->chan.flags, arg->channel.max_power);

        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_stop(struct ath10k *ar, u32 vdev_id)
{
        struct wmi_vdev_stop_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_stop_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi vdev stop id 0x%x\n", vdev_id);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_up(struct ath10k *ar, u32 vdev_id, u32 aid,
                          const u8 *bssid)
{
        struct wmi_vdev_up_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_up_cmd *)skb->data;
        cmd->vdev_id       = __cpu_to_le32(vdev_id);
        cmd->vdev_assoc_id = __cpu_to_le32(aid);
        ether_addr_copy(cmd->vdev_bssid.addr, bssid);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi mgmt vdev up id 0x%x assoc id %d bssid %pM\n",
                   vdev_id, aid, bssid);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_down(struct ath10k *ar, u32 vdev_id)
{
        struct wmi_vdev_down_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_down_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi mgmt vdev down id 0x%x\n", vdev_id);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_set_param(struct ath10k *ar, u32 vdev_id,
                                 u32 param_id, u32 param_value)
{
        struct wmi_vdev_set_param_cmd *cmd;
        struct sk_buff *skb;

        if (param_id == WMI_VDEV_PARAM_UNSUPPORTED) {
                ath10k_dbg(ar, ATH10K_DBG_WMI,
                           "vdev param %d not supported by firmware\n",
                            param_id);
                return ERR_PTR(-EOPNOTSUPP);
        }

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_set_param_cmd *)skb->data;
        cmd->vdev_id     = __cpu_to_le32(vdev_id);
        cmd->param_id    = __cpu_to_le32(param_id);
        cmd->param_value = __cpu_to_le32(param_value);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi vdev id 0x%x set param %d value %d\n",
                   vdev_id, param_id, param_value);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_install_key(struct ath10k *ar,
                                   const struct wmi_vdev_install_key_arg *arg)
{
        struct wmi_vdev_install_key_cmd *cmd;
        struct sk_buff *skb;

        if (arg->key_cipher == WMI_CIPHER_NONE && arg->key_data != NULL)
                return ERR_PTR(-EINVAL);
        if (arg->key_cipher != WMI_CIPHER_NONE && arg->key_data == NULL)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd) + arg->key_len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_install_key_cmd *)skb->data;
        cmd->vdev_id       = __cpu_to_le32(arg->vdev_id);
        cmd->key_idx       = __cpu_to_le32(arg->key_idx);
        cmd->key_flags     = __cpu_to_le32(arg->key_flags);
        cmd->key_cipher    = __cpu_to_le32(arg->key_cipher);
        cmd->key_len       = __cpu_to_le32(arg->key_len);
        cmd->key_txmic_len = __cpu_to_le32(arg->key_txmic_len);
        cmd->key_rxmic_len = __cpu_to_le32(arg->key_rxmic_len);

        if (arg->macaddr)
                ether_addr_copy(cmd->peer_macaddr.addr, arg->macaddr);
        if (arg->key_data)
                memcpy(cmd->key_data, arg->key_data, arg->key_len);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi vdev install key idx %d cipher %d len %d\n",
                   arg->key_idx, arg->key_cipher, arg->key_len);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_spectral_conf(struct ath10k *ar,
                                     const struct wmi_vdev_spectral_conf_arg *arg)
{
        struct wmi_vdev_spectral_conf_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_spectral_conf_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(arg->vdev_id);
        cmd->scan_count = __cpu_to_le32(arg->scan_count);
        cmd->scan_period = __cpu_to_le32(arg->scan_period);
        cmd->scan_priority = __cpu_to_le32(arg->scan_priority);
        cmd->scan_fft_size = __cpu_to_le32(arg->scan_fft_size);
        cmd->scan_gc_ena = __cpu_to_le32(arg->scan_gc_ena);
        cmd->scan_restart_ena = __cpu_to_le32(arg->scan_restart_ena);
        cmd->scan_noise_floor_ref = __cpu_to_le32(arg->scan_noise_floor_ref);
        cmd->scan_init_delay = __cpu_to_le32(arg->scan_init_delay);
        cmd->scan_nb_tone_thr = __cpu_to_le32(arg->scan_nb_tone_thr);
        cmd->scan_str_bin_thr = __cpu_to_le32(arg->scan_str_bin_thr);
        cmd->scan_wb_rpt_mode = __cpu_to_le32(arg->scan_wb_rpt_mode);
        cmd->scan_rssi_rpt_mode = __cpu_to_le32(arg->scan_rssi_rpt_mode);
        cmd->scan_rssi_thr = __cpu_to_le32(arg->scan_rssi_thr);
        cmd->scan_pwr_format = __cpu_to_le32(arg->scan_pwr_format);
        cmd->scan_rpt_mode = __cpu_to_le32(arg->scan_rpt_mode);
        cmd->scan_bin_scale = __cpu_to_le32(arg->scan_bin_scale);
        cmd->scan_dbm_adj = __cpu_to_le32(arg->scan_dbm_adj);
        cmd->scan_chn_mask = __cpu_to_le32(arg->scan_chn_mask);

        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_vdev_spectral_enable(struct ath10k *ar, u32 vdev_id,
                                       u32 trigger, u32 enable)
{
        struct wmi_vdev_spectral_enable_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_vdev_spectral_enable_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        cmd->trigger_cmd = __cpu_to_le32(trigger);
        cmd->enable_cmd = __cpu_to_le32(enable);

        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_peer_create(struct ath10k *ar, u32 vdev_id,
                              const u8 peer_addr[ETH_ALEN],
                              enum wmi_peer_type peer_type)
{
        struct wmi_peer_create_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_peer_create_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, peer_addr);
        cmd->peer_type = __cpu_to_le32(peer_type);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer create vdev_id %d peer_addr %pM\n",
                   vdev_id, peer_addr);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_peer_delete(struct ath10k *ar, u32 vdev_id,
                              const u8 peer_addr[ETH_ALEN])
{
        struct wmi_peer_delete_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_peer_delete_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, peer_addr);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer delete vdev_id %d peer_addr %pM\n",
                   vdev_id, peer_addr);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_peer_flush(struct ath10k *ar, u32 vdev_id,
                             const u8 peer_addr[ETH_ALEN], u32 tid_bitmap)
{
        struct wmi_peer_flush_tids_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_peer_flush_tids_cmd *)skb->data;
        cmd->vdev_id         = __cpu_to_le32(vdev_id);
        cmd->peer_tid_bitmap = __cpu_to_le32(tid_bitmap);
        ether_addr_copy(cmd->peer_macaddr.addr, peer_addr);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer flush vdev_id %d peer_addr %pM tids %08x\n",
                   vdev_id, peer_addr, tid_bitmap);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_peer_set_param(struct ath10k *ar, u32 vdev_id,
                                 const u8 *peer_addr,
                                 enum wmi_peer_param param_id,
                                 u32 param_value)
{
        struct wmi_peer_set_param_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_peer_set_param_cmd *)skb->data;
        cmd->vdev_id     = __cpu_to_le32(vdev_id);
        cmd->param_id    = __cpu_to_le32(param_id);
        cmd->param_value = __cpu_to_le32(param_value);
        ether_addr_copy(cmd->peer_macaddr.addr, peer_addr);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi vdev %d peer 0x%pM set param %d value %d\n",
                   vdev_id, peer_addr, param_id, param_value);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_set_psmode(struct ath10k *ar, u32 vdev_id,
                             enum wmi_sta_ps_mode psmode)
{
        struct wmi_sta_powersave_mode_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_sta_powersave_mode_cmd *)skb->data;
        cmd->vdev_id     = __cpu_to_le32(vdev_id);
        cmd->sta_ps_mode = __cpu_to_le32(psmode);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi set powersave id 0x%x mode %d\n",
                   vdev_id, psmode);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_set_sta_ps(struct ath10k *ar, u32 vdev_id,
                             enum wmi_sta_powersave_param param_id,
                             u32 value)
{
        struct wmi_sta_powersave_param_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_sta_powersave_param_cmd *)skb->data;
        cmd->vdev_id     = __cpu_to_le32(vdev_id);
        cmd->param_id    = __cpu_to_le32(param_id);
        cmd->param_value = __cpu_to_le32(value);

        ath10k_dbg(ar, ATH10K_DBG_STA,
                   "wmi sta ps param vdev_id 0x%x param %d value %d\n",
                   vdev_id, param_id, value);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_set_ap_ps(struct ath10k *ar, u32 vdev_id, const u8 *mac,
                            enum wmi_ap_ps_peer_param param_id, u32 value)
{
        struct wmi_ap_ps_peer_cmd *cmd;
        struct sk_buff *skb;

        if (!mac)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_ap_ps_peer_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        cmd->param_id = __cpu_to_le32(param_id);
        cmd->param_value = __cpu_to_le32(value);
        ether_addr_copy(cmd->peer_macaddr.addr, mac);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi ap ps param vdev_id 0x%X param %d value %d mac_addr %pM\n",
                   vdev_id, param_id, value, mac);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_scan_chan_list(struct ath10k *ar,
                                 const struct wmi_scan_chan_list_arg *arg)
{
        struct wmi_scan_chan_list_cmd *cmd;
        struct sk_buff *skb;
        struct wmi_channel_arg *ch;
        struct wmi_channel *ci;
        int i;

        skb = ath10k_wmi_alloc_skb(ar, struct_size(cmd, chan_info, arg->n_channels));
        if (!skb)
                return ERR_PTR(-EINVAL);

        cmd = (struct wmi_scan_chan_list_cmd *)skb->data;
        cmd->num_scan_chans = __cpu_to_le32(arg->n_channels);

        for (i = 0; i < arg->n_channels; i++) {
                ch = &arg->channels[i];
                ci = &cmd->chan_info[i];

                ath10k_wmi_put_wmi_channel(ar, ci, ch);
        }

        return skb;
}

static void
ath10k_wmi_peer_assoc_fill(struct ath10k *ar, void *buf,
                           const struct wmi_peer_assoc_complete_arg *arg)
{
        struct wmi_common_peer_assoc_complete_cmd *cmd = buf;

        cmd->vdev_id            = __cpu_to_le32(arg->vdev_id);
        cmd->peer_new_assoc     = __cpu_to_le32(arg->peer_reassoc ? 0 : 1);
        cmd->peer_associd       = __cpu_to_le32(arg->peer_aid);
        cmd->peer_flags         = __cpu_to_le32(arg->peer_flags);
        cmd->peer_caps          = __cpu_to_le32(arg->peer_caps);
        cmd->peer_listen_intval = __cpu_to_le32(arg->peer_listen_intval);
        cmd->peer_ht_caps       = __cpu_to_le32(arg->peer_ht_caps);
        cmd->peer_max_mpdu      = __cpu_to_le32(arg->peer_max_mpdu);
        cmd->peer_mpdu_density  = __cpu_to_le32(arg->peer_mpdu_density);
        cmd->peer_rate_caps     = __cpu_to_le32(arg->peer_rate_caps);
        cmd->peer_nss           = __cpu_to_le32(arg->peer_num_spatial_streams);
        cmd->peer_vht_caps      = __cpu_to_le32(arg->peer_vht_caps);
        cmd->peer_phymode       = __cpu_to_le32(arg->peer_phymode);

        ether_addr_copy(cmd->peer_macaddr.addr, arg->addr);

        cmd->peer_legacy_rates.num_rates =
                __cpu_to_le32(arg->peer_legacy_rates.num_rates);
        memcpy(cmd->peer_legacy_rates.rates, arg->peer_legacy_rates.rates,
               arg->peer_legacy_rates.num_rates);

        cmd->peer_ht_rates.num_rates =
                __cpu_to_le32(arg->peer_ht_rates.num_rates);
        memcpy(cmd->peer_ht_rates.rates, arg->peer_ht_rates.rates,
               arg->peer_ht_rates.num_rates);

        cmd->peer_vht_rates.rx_max_rate =
                __cpu_to_le32(arg->peer_vht_rates.rx_max_rate);
        cmd->peer_vht_rates.rx_mcs_set =
                __cpu_to_le32(arg->peer_vht_rates.rx_mcs_set);
        cmd->peer_vht_rates.tx_max_rate =
                __cpu_to_le32(arg->peer_vht_rates.tx_max_rate);
        cmd->peer_vht_rates.tx_mcs_set =
                __cpu_to_le32(arg->peer_vht_rates.tx_mcs_set);
}

static void
ath10k_wmi_peer_assoc_fill_main(struct ath10k *ar, void *buf,
                                const struct wmi_peer_assoc_complete_arg *arg)
{
        struct wmi_main_peer_assoc_complete_cmd *cmd = buf;

        ath10k_wmi_peer_assoc_fill(ar, buf, arg);
        memset(cmd->peer_ht_info, 0, sizeof(cmd->peer_ht_info));
}

static void
ath10k_wmi_peer_assoc_fill_10_1(struct ath10k *ar, void *buf,
                                const struct wmi_peer_assoc_complete_arg *arg)
{
        ath10k_wmi_peer_assoc_fill(ar, buf, arg);
}

static void
ath10k_wmi_peer_assoc_fill_10_2(struct ath10k *ar, void *buf,
                                const struct wmi_peer_assoc_complete_arg *arg)
{
        struct wmi_10_2_peer_assoc_complete_cmd *cmd = buf;
        int max_mcs, max_nss;
        u32 info0;

        /* TODO: Is using max values okay with firmware? */
        max_mcs = 0xf;
        max_nss = 0xf;

        info0 = SM(max_mcs, WMI_PEER_ASSOC_INFO0_MAX_MCS_IDX) |
                SM(max_nss, WMI_PEER_ASSOC_INFO0_MAX_NSS);

        ath10k_wmi_peer_assoc_fill(ar, buf, arg);
        cmd->info0 = __cpu_to_le32(info0);
}

static void
ath10k_wmi_peer_assoc_fill_10_4(struct ath10k *ar, void *buf,
                                const struct wmi_peer_assoc_complete_arg *arg)
{
        struct wmi_10_4_peer_assoc_complete_cmd *cmd = buf;

        ath10k_wmi_peer_assoc_fill_10_2(ar, buf, arg);
        cmd->peer_bw_rxnss_override =
                __cpu_to_le32(arg->peer_bw_rxnss_override);
}

static int
ath10k_wmi_peer_assoc_check_arg(const struct wmi_peer_assoc_complete_arg *arg)
{
        if (arg->peer_mpdu_density > 16)
                return -EINVAL;
        if (arg->peer_legacy_rates.num_rates > MAX_SUPPORTED_RATES)
                return -EINVAL;
        if (arg->peer_ht_rates.num_rates > MAX_SUPPORTED_RATES)
                return -EINVAL;

        return 0;
}

static struct sk_buff *
ath10k_wmi_op_gen_peer_assoc(struct ath10k *ar,
                             const struct wmi_peer_assoc_complete_arg *arg)
{
        size_t len = sizeof(struct wmi_main_peer_assoc_complete_cmd);
        struct sk_buff *skb;
        int ret;

        ret = ath10k_wmi_peer_assoc_check_arg(arg);
        if (ret)
                return ERR_PTR(ret);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_wmi_peer_assoc_fill_main(ar, skb->data, arg);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer assoc vdev %d addr %pM (%s)\n",
                   arg->vdev_id, arg->addr,
                   arg->peer_reassoc ? "reassociate" : "new");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_1_op_gen_peer_assoc(struct ath10k *ar,
                                  const struct wmi_peer_assoc_complete_arg *arg)
{
        size_t len = sizeof(struct wmi_10_1_peer_assoc_complete_cmd);
        struct sk_buff *skb;
        int ret;

        ret = ath10k_wmi_peer_assoc_check_arg(arg);
        if (ret)
                return ERR_PTR(ret);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_wmi_peer_assoc_fill_10_1(ar, skb->data, arg);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer assoc vdev %d addr %pM (%s)\n",
                   arg->vdev_id, arg->addr,
                   arg->peer_reassoc ? "reassociate" : "new");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_2_op_gen_peer_assoc(struct ath10k *ar,
                                  const struct wmi_peer_assoc_complete_arg *arg)
{
        size_t len = sizeof(struct wmi_10_2_peer_assoc_complete_cmd);
        struct sk_buff *skb;
        int ret;

        ret = ath10k_wmi_peer_assoc_check_arg(arg);
        if (ret)
                return ERR_PTR(ret);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_wmi_peer_assoc_fill_10_2(ar, skb->data, arg);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer assoc vdev %d addr %pM (%s)\n",
                   arg->vdev_id, arg->addr,
                   arg->peer_reassoc ? "reassociate" : "new");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_op_gen_peer_assoc(struct ath10k *ar,
                                  const struct wmi_peer_assoc_complete_arg *arg)
{
        size_t len = sizeof(struct wmi_10_4_peer_assoc_complete_cmd);
        struct sk_buff *skb;
        int ret;

        ret = ath10k_wmi_peer_assoc_check_arg(arg);
        if (ret)
                return ERR_PTR(ret);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_wmi_peer_assoc_fill_10_4(ar, skb->data, arg);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi peer assoc vdev %d addr %pM (%s)\n",
                   arg->vdev_id, arg->addr,
                   arg->peer_reassoc ? "reassociate" : "new");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_2_op_gen_pdev_get_temperature(struct ath10k *ar)
{
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, 0);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi pdev get temperature\n");
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_2_op_gen_pdev_bss_chan_info(struct ath10k *ar,
                                          enum wmi_bss_survey_req_type type)
{
        struct wmi_pdev_chan_info_req_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_chan_info_req_cmd *)skb->data;
        cmd->type = __cpu_to_le32(type);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev bss info request type %d\n", type);

        return skb;
}

/* This function assumes the beacon is already DMA mapped */
static struct sk_buff *
ath10k_wmi_op_gen_beacon_dma(struct ath10k *ar, u32 vdev_id, const void *bcn,
                             size_t bcn_len, u32 bcn_paddr, bool dtim_zero,
                             bool deliver_cab)
{
        struct wmi_bcn_tx_ref_cmd *cmd;
        struct sk_buff *skb;
        struct ieee80211_hdr *hdr;
        u16 fc;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        hdr = (struct ieee80211_hdr *)bcn;
        fc = le16_to_cpu(hdr->frame_control);

        cmd = (struct wmi_bcn_tx_ref_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        cmd->data_len = __cpu_to_le32(bcn_len);
        cmd->data_ptr = __cpu_to_le32(bcn_paddr);
        cmd->msdu_id = 0;
        cmd->frame_control = __cpu_to_le32(fc);
        cmd->flags = 0;
        cmd->antenna_mask = __cpu_to_le32(WMI_BCN_TX_REF_DEF_ANTENNA);

        if (dtim_zero)
                cmd->flags |= __cpu_to_le32(WMI_BCN_TX_REF_FLAG_DTIM_ZERO);

        if (deliver_cab)
                cmd->flags |= __cpu_to_le32(WMI_BCN_TX_REF_FLAG_DELIVER_CAB);

        return skb;
}

void ath10k_wmi_set_wmm_param(struct wmi_wmm_params *params,
                              const struct wmi_wmm_params_arg *arg)
{
        params->cwmin  = __cpu_to_le32(arg->cwmin);
        params->cwmax  = __cpu_to_le32(arg->cwmax);
        params->aifs   = __cpu_to_le32(arg->aifs);
        params->txop   = __cpu_to_le32(arg->txop);
        params->acm    = __cpu_to_le32(arg->acm);
        params->no_ack = __cpu_to_le32(arg->no_ack);
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_set_wmm(struct ath10k *ar,
                               const struct wmi_wmm_params_all_arg *arg)
{
        struct wmi_pdev_set_wmm_params *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_wmm_params *)skb->data;
        ath10k_wmi_set_wmm_param(&cmd->ac_be, &arg->ac_be);
        ath10k_wmi_set_wmm_param(&cmd->ac_bk, &arg->ac_bk);
        ath10k_wmi_set_wmm_param(&cmd->ac_vi, &arg->ac_vi);
        ath10k_wmi_set_wmm_param(&cmd->ac_vo, &arg->ac_vo);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi pdev set wmm params\n");
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_request_stats(struct ath10k *ar, u32 stats_mask)
{
        struct wmi_request_stats_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_request_stats_cmd *)skb->data;
        cmd->stats_id = __cpu_to_le32(stats_mask);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi request stats 0x%08x\n",
                   stats_mask);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_force_fw_hang(struct ath10k *ar,
                                enum wmi_force_fw_hang_type type, u32 delay_ms)
{
        struct wmi_force_fw_hang_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_force_fw_hang_cmd *)skb->data;
        cmd->type = __cpu_to_le32(type);
        cmd->delay_ms = __cpu_to_le32(delay_ms);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi force fw hang %d delay %d\n",
                   type, delay_ms);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable,
                             u32 log_level)
{
        struct wmi_dbglog_cfg_cmd *cmd;
        struct sk_buff *skb;
        u32 cfg;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_dbglog_cfg_cmd *)skb->data;

        if (module_enable) {
                cfg = SM(log_level,
                         ATH10K_DBGLOG_CFG_LOG_LVL);
        } else {
                /* set back defaults, all modules with WARN level */
                cfg = SM(ATH10K_DBGLOG_LEVEL_WARN,
                         ATH10K_DBGLOG_CFG_LOG_LVL);
                module_enable = ~0;
        }

        cmd->module_enable = __cpu_to_le32(module_enable);
        cmd->module_valid = __cpu_to_le32(~0);
        cmd->config_enable = __cpu_to_le32(cfg);
        cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi dbglog cfg modules %08x %08x config %08x %08x\n",
                   __le32_to_cpu(cmd->module_enable),
                   __le32_to_cpu(cmd->module_valid),
                   __le32_to_cpu(cmd->config_enable),
                   __le32_to_cpu(cmd->config_valid));
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable,
                                  u32 log_level)
{
        struct wmi_10_4_dbglog_cfg_cmd *cmd;
        struct sk_buff *skb;
        u32 cfg;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_10_4_dbglog_cfg_cmd *)skb->data;

        if (module_enable) {
                cfg = SM(log_level,
                         ATH10K_DBGLOG_CFG_LOG_LVL);
        } else {
                /* set back defaults, all modules with WARN level */
                cfg = SM(ATH10K_DBGLOG_LEVEL_WARN,
                         ATH10K_DBGLOG_CFG_LOG_LVL);
                module_enable = ~0;
        }

        cmd->module_enable = __cpu_to_le64(module_enable);
        cmd->module_valid = __cpu_to_le64(~0);
        cmd->config_enable = __cpu_to_le32(cfg);
        cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi dbglog cfg modules 0x%016llx 0x%016llx config %08x %08x\n",
                   __le64_to_cpu(cmd->module_enable),
                   __le64_to_cpu(cmd->module_valid),
                   __le32_to_cpu(cmd->config_enable),
                   __le32_to_cpu(cmd->config_valid));
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pktlog_enable(struct ath10k *ar, u32 ev_bitmap)
{
        struct wmi_pdev_pktlog_enable_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ev_bitmap &= ATH10K_PKTLOG_ANY;

        cmd = (struct wmi_pdev_pktlog_enable_cmd *)skb->data;
        cmd->ev_bitmap = __cpu_to_le32(ev_bitmap);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi enable pktlog filter 0x%08x\n",
                   ev_bitmap);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pktlog_disable(struct ath10k *ar)
{
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, 0);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi disable pktlog\n");
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_set_quiet_mode(struct ath10k *ar, u32 period,
                                      u32 duration, u32 next_offset,
                                      u32 enabled)
{
        struct wmi_pdev_set_quiet_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_quiet_cmd *)skb->data;
        cmd->period = __cpu_to_le32(period);
        cmd->duration = __cpu_to_le32(duration);
        cmd->next_start = __cpu_to_le32(next_offset);
        cmd->enabled = __cpu_to_le32(enabled);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi quiet param: period %u duration %u enabled %d\n",
                   period, duration, enabled);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_addba_clear_resp(struct ath10k *ar, u32 vdev_id,
                                   const u8 *mac)
{
        struct wmi_addba_clear_resp_cmd *cmd;
        struct sk_buff *skb;

        if (!mac)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_addba_clear_resp_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, mac);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi addba clear resp vdev_id 0x%X mac_addr %pM\n",
                   vdev_id, mac);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_addba_send(struct ath10k *ar, u32 vdev_id, const u8 *mac,
                             u32 tid, u32 buf_size)
{
        struct wmi_addba_send_cmd *cmd;
        struct sk_buff *skb;

        if (!mac)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_addba_send_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, mac);
        cmd->tid = __cpu_to_le32(tid);
        cmd->buffersize = __cpu_to_le32(buf_size);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi addba send vdev_id 0x%X mac_addr %pM tid %u bufsize %u\n",
                   vdev_id, mac, tid, buf_size);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_addba_set_resp(struct ath10k *ar, u32 vdev_id, const u8 *mac,
                                 u32 tid, u32 status)
{
        struct wmi_addba_setresponse_cmd *cmd;
        struct sk_buff *skb;

        if (!mac)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_addba_setresponse_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, mac);
        cmd->tid = __cpu_to_le32(tid);
        cmd->statuscode = __cpu_to_le32(status);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi addba set resp vdev_id 0x%X mac_addr %pM tid %u status %u\n",
                   vdev_id, mac, tid, status);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_delba_send(struct ath10k *ar, u32 vdev_id, const u8 *mac,
                             u32 tid, u32 initiator, u32 reason)
{
        struct wmi_delba_send_cmd *cmd;
        struct sk_buff *skb;

        if (!mac)
                return ERR_PTR(-EINVAL);

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_delba_send_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, mac);
        cmd->tid = __cpu_to_le32(tid);
        cmd->initiator = __cpu_to_le32(initiator);
        cmd->reasoncode = __cpu_to_le32(reason);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi delba send vdev_id 0x%X mac_addr %pM tid %u initiator %u reason %u\n",
                   vdev_id, mac, tid, initiator, reason);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_2_4_op_gen_pdev_get_tpc_config(struct ath10k *ar, u32 param)
{
        struct wmi_pdev_get_tpc_config_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_get_tpc_config_cmd *)skb->data;
        cmd->param = __cpu_to_le32(param);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev get tpc config param %d\n", param);
        return skb;
}

static void
ath10k_wmi_fw_pdev_base_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
                                   char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s\n",
                        "ath10k PDEV stats");
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                        "=================");

        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Channel noise floor", pdev->ch_noise_floor);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "Channel TX power", pdev->chan_tx_power);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "TX frame count", pdev->tx_frame_count);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "RX frame count", pdev->rx_frame_count);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "RX clear count", pdev->rx_clear_count);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "Cycle count", pdev->cycle_count);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "PHY error count", pdev->phy_err_count);

        *length = len;
}

static void
ath10k_wmi_fw_pdev_extra_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
                                    char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "RTS bad count", pdev->rts_bad);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "RTS good count", pdev->rts_good);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "FCS bad count", pdev->fcs_bad);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "No beacon count", pdev->no_beacons);
        len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
                        "MIB int count", pdev->mib_int_count);

        len += scnprintf(buf + len, buf_len - len, "\n");
        *length = len;
}

static void
ath10k_wmi_fw_pdev_tx_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
                                 char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "\n%30s\n",
                         "ath10k PDEV TX stats");
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "HTT cookies queued", pdev->comp_queued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "HTT cookies disp.", pdev->comp_delivered);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MSDU queued", pdev->msdu_enqued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDU queued", pdev->mpdu_enqued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MSDUs dropped", pdev->wmm_drop);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Local enqued", pdev->local_enqued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Local freed", pdev->local_freed);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "HW queued", pdev->hw_queued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PPDUs reaped", pdev->hw_reaped);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Num underruns", pdev->underrun);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PPDUs cleaned", pdev->tx_abort);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDUs requeued", pdev->mpdus_requeued);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Excessive retries", pdev->tx_ko);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "HW rate", pdev->data_rc);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Sched self triggers", pdev->self_triggers);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Dropped due to SW retries",
                         pdev->sw_retry_failure);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Illegal rate phy errors",
                         pdev->illgl_rate_phy_err);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Pdev continuous xretry", pdev->pdev_cont_xretry);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "TX timeout", pdev->pdev_tx_timeout);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PDEV resets", pdev->pdev_resets);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PHY underrun", pdev->phy_underrun);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDU is more than txop limit", pdev->txop_ovf);
        *length = len;
}

static void
ath10k_wmi_fw_pdev_rx_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
                                 char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "\n%30s\n",
                         "ath10k PDEV RX stats");
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Mid PPDU route change",
                         pdev->mid_ppdu_route_change);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Tot. number of statuses", pdev->status_rcvd);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Extra frags on rings 0", pdev->r0_frags);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Extra frags on rings 1", pdev->r1_frags);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Extra frags on rings 2", pdev->r2_frags);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Extra frags on rings 3", pdev->r3_frags);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MSDUs delivered to HTT", pdev->htt_msdus);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDUs delivered to HTT", pdev->htt_mpdus);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MSDUs delivered to stack", pdev->loc_msdus);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDUs delivered to stack", pdev->loc_mpdus);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "Oversized AMSDUs", pdev->oversize_amsdu);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PHY errors", pdev->phy_errs);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "PHY errors drops", pdev->phy_err_drop);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                         "MPDU errors (FCS, MIC, ENC)", pdev->mpdu_errs);
        *length = len;
}

static void
ath10k_wmi_fw_vdev_stats_fill(const struct ath10k_fw_stats_vdev *vdev,
                              char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
        int i;

        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "vdev id", vdev->vdev_id);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "beacon snr", vdev->beacon_snr);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "data snr", vdev->data_snr);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num rx frames", vdev->num_rx_frames);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num rts fail", vdev->num_rts_fail);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num rts success", vdev->num_rts_success);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num rx err", vdev->num_rx_err);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num rx discard", vdev->num_rx_discard);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "num tx not acked", vdev->num_tx_not_acked);

        for (i = 0 ; i < ARRAY_SIZE(vdev->num_tx_frames); i++)
                len += scnprintf(buf + len, buf_len - len,
                                "%25s [%02d] %u\n",
                                "num tx frames", i,
                                vdev->num_tx_frames[i]);

        for (i = 0 ; i < ARRAY_SIZE(vdev->num_tx_frames_retries); i++)
                len += scnprintf(buf + len, buf_len - len,
                                "%25s [%02d] %u\n",
                                "num tx frames retries", i,
                                vdev->num_tx_frames_retries[i]);

        for (i = 0 ; i < ARRAY_SIZE(vdev->num_tx_frames_failures); i++)
                len += scnprintf(buf + len, buf_len - len,
                                "%25s [%02d] %u\n",
                                "num tx frames failures", i,
                                vdev->num_tx_frames_failures[i]);

        for (i = 0 ; i < ARRAY_SIZE(vdev->tx_rate_history); i++)
                len += scnprintf(buf + len, buf_len - len,
                                "%25s [%02d] 0x%08x\n",
                                "tx rate history", i,
                                vdev->tx_rate_history[i]);

        for (i = 0 ; i < ARRAY_SIZE(vdev->beacon_rssi_history); i++)
                len += scnprintf(buf + len, buf_len - len,
                                "%25s [%02d] %u\n",
                                "beacon rssi history", i,
                                vdev->beacon_rssi_history[i]);

        len += scnprintf(buf + len, buf_len - len, "\n");
        *length = len;
}

static void
ath10k_wmi_fw_peer_stats_fill(const struct ath10k_fw_stats_peer *peer,
                              char *buf, u32 *length, bool extended_peer)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "%30s %pM\n",
                        "Peer MAC address", peer->peer_macaddr);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "Peer RSSI", peer->peer_rssi);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "Peer TX rate", peer->peer_tx_rate);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                        "Peer RX rate", peer->peer_rx_rate);
        if (!extended_peer)
                len += scnprintf(buf + len, buf_len - len, "%30s %llu\n",
                                "Peer RX duration", peer->rx_duration);

        len += scnprintf(buf + len, buf_len - len, "\n");
        *length = len;
}

static void
ath10k_wmi_fw_extd_peer_stats_fill(const struct ath10k_fw_extd_stats_peer *peer,
                                   char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;

        len += scnprintf(buf + len, buf_len - len, "%30s %pM\n",
                        "Peer MAC address", peer->peer_macaddr);
        len += scnprintf(buf + len, buf_len - len, "%30s %llu\n",
                        "Peer RX duration", peer->rx_duration);
}

void ath10k_wmi_main_op_fw_stats_fill(struct ath10k *ar,
                                      struct ath10k_fw_stats *fw_stats,
                                      char *buf)
{
        u32 len = 0;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
        const struct ath10k_fw_stats_pdev *pdev;
        const struct ath10k_fw_stats_vdev *vdev;
        const struct ath10k_fw_stats_peer *peer;
        size_t num_peers;
        size_t num_vdevs;

        spin_lock_bh(&ar->data_lock);

        pdev = list_first_entry_or_null(&fw_stats->pdevs,
                                        struct ath10k_fw_stats_pdev, list);
        if (!pdev) {
                ath10k_warn(ar, "failed to get pdev stats\n");
                goto unlock;
        }

        num_peers = list_count_nodes(&fw_stats->peers);
        num_vdevs = list_count_nodes(&fw_stats->vdevs);

        ath10k_wmi_fw_pdev_base_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_tx_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_rx_stats_fill(pdev, buf, &len);

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                         "ath10k VDEV stats", num_vdevs);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        list_for_each_entry(vdev, &fw_stats->vdevs, list) {
                ath10k_wmi_fw_vdev_stats_fill(vdev, buf, &len);
        }

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                         "ath10k PEER stats", num_peers);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        list_for_each_entry(peer, &fw_stats->peers, list) {
                ath10k_wmi_fw_peer_stats_fill(peer, buf, &len,
                                              fw_stats->extended);
        }

unlock:
        spin_unlock_bh(&ar->data_lock);

        if (len >= buf_len)
                buf[len - 1] = 0;
        else
                buf[len] = 0;
}

void ath10k_wmi_10x_op_fw_stats_fill(struct ath10k *ar,
                                     struct ath10k_fw_stats *fw_stats,
                                     char *buf)
{
        unsigned int len = 0;
        unsigned int buf_len = ATH10K_FW_STATS_BUF_SIZE;
        const struct ath10k_fw_stats_pdev *pdev;
        const struct ath10k_fw_stats_vdev *vdev;
        const struct ath10k_fw_stats_peer *peer;
        size_t num_peers;
        size_t num_vdevs;

        spin_lock_bh(&ar->data_lock);

        pdev = list_first_entry_or_null(&fw_stats->pdevs,
                                        struct ath10k_fw_stats_pdev, list);
        if (!pdev) {
                ath10k_warn(ar, "failed to get pdev stats\n");
                goto unlock;
        }

        num_peers = list_count_nodes(&fw_stats->peers);
        num_vdevs = list_count_nodes(&fw_stats->vdevs);

        ath10k_wmi_fw_pdev_base_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_extra_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_tx_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_rx_stats_fill(pdev, buf, &len);

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                         "ath10k VDEV stats", num_vdevs);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        list_for_each_entry(vdev, &fw_stats->vdevs, list) {
                ath10k_wmi_fw_vdev_stats_fill(vdev, buf, &len);
        }

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                         "ath10k PEER stats", num_peers);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                 "=================");

        list_for_each_entry(peer, &fw_stats->peers, list) {
                ath10k_wmi_fw_peer_stats_fill(peer, buf, &len,
                                              fw_stats->extended);
        }

unlock:
        spin_unlock_bh(&ar->data_lock);

        if (len >= buf_len)
                buf[len - 1] = 0;
        else
                buf[len] = 0;
}

static struct sk_buff *
ath10k_wmi_op_gen_pdev_enable_adaptive_cca(struct ath10k *ar, u8 enable,
                                           u32 detect_level, u32 detect_margin)
{
        struct wmi_pdev_set_adaptive_cca_params *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_set_adaptive_cca_params *)skb->data;
        cmd->enable = __cpu_to_le32(enable);
        cmd->cca_detect_level = __cpu_to_le32(detect_level);
        cmd->cca_detect_margin = __cpu_to_le32(detect_margin);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev set adaptive cca params enable:%d detection level:%d detection margin:%d\n",
                   enable, detect_level, detect_margin);
        return skb;
}

static void
ath10k_wmi_fw_vdev_stats_extd_fill(const struct ath10k_fw_stats_vdev_extd *vdev,
                                   char *buf, u32 *length)
{
        u32 len = *length;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
        u32 val;

        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "vdev id", vdev->vdev_id);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "ppdu aggr count", vdev->ppdu_aggr_cnt);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "ppdu noack", vdev->ppdu_noack);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "mpdu queued", vdev->mpdu_queued);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "ppdu nonaggr count", vdev->ppdu_nonaggr_cnt);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "mpdu sw requeued", vdev->mpdu_sw_requeued);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "mpdu success retry", vdev->mpdu_suc_retry);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "mpdu success multitry", vdev->mpdu_suc_multitry);
        len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                         "mpdu fail retry", vdev->mpdu_fail_retry);
        val = vdev->tx_ftm_suc;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "tx ftm success",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->tx_ftm_suc_retry;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "tx ftm success retry",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->tx_ftm_fail;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "tx ftm fail",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->rx_ftmr_cnt;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "rx ftm request count",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->rx_ftmr_dup_cnt;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "rx ftm request dup count",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->rx_iftmr_cnt;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "rx initial ftm req count",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        val = vdev->rx_iftmr_dup_cnt;
        if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
                len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
                                 "rx initial ftm req dup cnt",
                                 MS(val, WMI_VDEV_STATS_FTM_COUNT));
        len += scnprintf(buf + len, buf_len - len, "\n");

        *length = len;
}

void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar,
                                      struct ath10k_fw_stats *fw_stats,
                                      char *buf)
{
        u32 len = 0;
        u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
        const struct ath10k_fw_stats_pdev *pdev;
        const struct ath10k_fw_stats_vdev_extd *vdev;
        const struct ath10k_fw_stats_peer *peer;
        const struct ath10k_fw_extd_stats_peer *extd_peer;
        size_t num_peers;
        size_t num_vdevs;

        spin_lock_bh(&ar->data_lock);

        pdev = list_first_entry_or_null(&fw_stats->pdevs,
                                        struct ath10k_fw_stats_pdev, list);
        if (!pdev) {
                ath10k_warn(ar, "failed to get pdev stats\n");
                goto unlock;
        }

        num_peers = list_count_nodes(&fw_stats->peers);
        num_vdevs = list_count_nodes(&fw_stats->vdevs);

        ath10k_wmi_fw_pdev_base_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_extra_stats_fill(pdev, buf, &len);
        ath10k_wmi_fw_pdev_tx_stats_fill(pdev, buf, &len);

        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "HW paused", pdev->hw_paused);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Seqs posted", pdev->seq_posted);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Seqs failed queueing", pdev->seq_failed_queueing);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Seqs completed", pdev->seq_completed);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Seqs restarted", pdev->seq_restarted);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MU Seqs posted", pdev->mu_seq_posted);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MPDUs SW flushed", pdev->mpdus_sw_flush);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MPDUs HW filtered", pdev->mpdus_hw_filter);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MPDUs truncated", pdev->mpdus_truncated);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MPDUs receive no ACK", pdev->mpdus_ack_failed);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "MPDUs expired", pdev->mpdus_expired);

        ath10k_wmi_fw_pdev_rx_stats_fill(pdev, buf, &len);
        len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
                        "Num Rx Overflow errors", pdev->rx_ovfl_errs);

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                        "ath10k VDEV stats", num_vdevs);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                "=================");
        list_for_each_entry(vdev, &fw_stats->vdevs, list) {
                ath10k_wmi_fw_vdev_stats_extd_fill(vdev, buf, &len);
        }

        len += scnprintf(buf + len, buf_len - len, "\n");
        len += scnprintf(buf + len, buf_len - len, "%30s (%zu)\n",
                        "ath10k PEER stats", num_peers);
        len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
                                "=================");

        list_for_each_entry(peer, &fw_stats->peers, list) {
                ath10k_wmi_fw_peer_stats_fill(peer, buf, &len,
                                              fw_stats->extended);
        }

        if (fw_stats->extended) {
                list_for_each_entry(extd_peer, &fw_stats->peers_extd, list) {
                        ath10k_wmi_fw_extd_peer_stats_fill(extd_peer, buf,
                                                           &len);
                }
        }

unlock:
        spin_unlock_bh(&ar->data_lock);

        if (len >= buf_len)
                buf[len - 1] = 0;
        else
                buf[len] = 0;
}

int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar,
                                   enum wmi_vdev_subtype subtype)
{
        switch (subtype) {
        case WMI_VDEV_SUBTYPE_NONE:
                return WMI_VDEV_SUBTYPE_LEGACY_NONE;
        case WMI_VDEV_SUBTYPE_P2P_DEVICE:
                return WMI_VDEV_SUBTYPE_LEGACY_P2P_DEV;
        case WMI_VDEV_SUBTYPE_P2P_CLIENT:
                return WMI_VDEV_SUBTYPE_LEGACY_P2P_CLI;
        case WMI_VDEV_SUBTYPE_P2P_GO:
                return WMI_VDEV_SUBTYPE_LEGACY_P2P_GO;
        case WMI_VDEV_SUBTYPE_PROXY_STA:
                return WMI_VDEV_SUBTYPE_LEGACY_PROXY_STA;
        case WMI_VDEV_SUBTYPE_MESH_11S:
        case WMI_VDEV_SUBTYPE_MESH_NON_11S:
                return -EOPNOTSUPP;
        }
        return -EOPNOTSUPP;
}

static int ath10k_wmi_10_2_4_op_get_vdev_subtype(struct ath10k *ar,
                                                 enum wmi_vdev_subtype subtype)
{
        switch (subtype) {
        case WMI_VDEV_SUBTYPE_NONE:
                return WMI_VDEV_SUBTYPE_10_2_4_NONE;
        case WMI_VDEV_SUBTYPE_P2P_DEVICE:
                return WMI_VDEV_SUBTYPE_10_2_4_P2P_DEV;
        case WMI_VDEV_SUBTYPE_P2P_CLIENT:
                return WMI_VDEV_SUBTYPE_10_2_4_P2P_CLI;
        case WMI_VDEV_SUBTYPE_P2P_GO:
                return WMI_VDEV_SUBTYPE_10_2_4_P2P_GO;
        case WMI_VDEV_SUBTYPE_PROXY_STA:
                return WMI_VDEV_SUBTYPE_10_2_4_PROXY_STA;
        case WMI_VDEV_SUBTYPE_MESH_11S:
                return WMI_VDEV_SUBTYPE_10_2_4_MESH_11S;
        case WMI_VDEV_SUBTYPE_MESH_NON_11S:
                return -EOPNOTSUPP;
        }
        return -EOPNOTSUPP;
}

static int ath10k_wmi_10_4_op_get_vdev_subtype(struct ath10k *ar,
                                               enum wmi_vdev_subtype subtype)
{
        switch (subtype) {
        case WMI_VDEV_SUBTYPE_NONE:
                return WMI_VDEV_SUBTYPE_10_4_NONE;
        case WMI_VDEV_SUBTYPE_P2P_DEVICE:
                return WMI_VDEV_SUBTYPE_10_4_P2P_DEV;
        case WMI_VDEV_SUBTYPE_P2P_CLIENT:
                return WMI_VDEV_SUBTYPE_10_4_P2P_CLI;
        case WMI_VDEV_SUBTYPE_P2P_GO:
                return WMI_VDEV_SUBTYPE_10_4_P2P_GO;
        case WMI_VDEV_SUBTYPE_PROXY_STA:
                return WMI_VDEV_SUBTYPE_10_4_PROXY_STA;
        case WMI_VDEV_SUBTYPE_MESH_11S:
                return WMI_VDEV_SUBTYPE_10_4_MESH_11S;
        case WMI_VDEV_SUBTYPE_MESH_NON_11S:
                return WMI_VDEV_SUBTYPE_10_4_MESH_NON_11S;
        }
        return -EOPNOTSUPP;
}

static struct sk_buff *
ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar,
                                    enum wmi_host_platform_type type,
                                    u32 fw_feature_bitmap)
{
        struct wmi_ext_resource_config_10_4_cmd *cmd;
        struct sk_buff *skb;
        u32 num_tdls_sleep_sta = 0;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        if (test_bit(WMI_SERVICE_TDLS_UAPSD_SLEEP_STA, ar->wmi.svc_map))
                num_tdls_sleep_sta = TARGET_10_4_NUM_TDLS_SLEEP_STA;

        cmd = (struct wmi_ext_resource_config_10_4_cmd *)skb->data;
        cmd->host_platform_config = __cpu_to_le32(type);
        cmd->fw_feature_bitmap = __cpu_to_le32(fw_feature_bitmap);
        cmd->wlan_gpio_priority = __cpu_to_le32(ar->coex_gpio_pin);
        cmd->coex_version = __cpu_to_le32(WMI_NO_COEX_VERSION_SUPPORT);
        cmd->coex_gpio_pin1 = __cpu_to_le32(-1);
        cmd->coex_gpio_pin2 = __cpu_to_le32(-1);
        cmd->coex_gpio_pin3 = __cpu_to_le32(-1);
        cmd->num_tdls_vdevs = __cpu_to_le32(TARGET_10_4_NUM_TDLS_VDEVS);
        cmd->num_tdls_conn_table_entries = __cpu_to_le32(20);
        cmd->max_tdls_concurrent_sleep_sta = __cpu_to_le32(num_tdls_sleep_sta);
        cmd->max_tdls_concurrent_buffer_sta =
                        __cpu_to_le32(TARGET_10_4_NUM_TDLS_BUFFER_STA);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi ext resource config host type %d firmware feature bitmap %08x\n",
                   type, fw_feature_bitmap);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_gen_update_fw_tdls_state(struct ath10k *ar, u32 vdev_id,
                                         enum wmi_tdls_state state)
{
        struct wmi_10_4_tdls_set_state_cmd *cmd;
        struct sk_buff *skb;
        u32 options = 0;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map) &&
            state == WMI_TDLS_ENABLE_ACTIVE)
                state = WMI_TDLS_ENABLE_PASSIVE;

        if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
                options |= WMI_TDLS_BUFFER_STA_EN;

        cmd = (struct wmi_10_4_tdls_set_state_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(vdev_id);
        cmd->state = __cpu_to_le32(state);
        cmd->notification_interval_ms = __cpu_to_le32(5000);
        cmd->tx_discovery_threshold = __cpu_to_le32(100);
        cmd->tx_teardown_threshold = __cpu_to_le32(5);
        cmd->rssi_teardown_threshold = __cpu_to_le32(-75);
        cmd->rssi_delta = __cpu_to_le32(-20);
        cmd->tdls_options = __cpu_to_le32(options);
        cmd->tdls_peer_traffic_ind_window = __cpu_to_le32(2);
        cmd->tdls_peer_traffic_response_timeout_ms = __cpu_to_le32(5000);
        cmd->tdls_puapsd_mask = __cpu_to_le32(0xf);
        cmd->tdls_puapsd_inactivity_time_ms = __cpu_to_le32(0);
        cmd->tdls_puapsd_rx_frame_threshold = __cpu_to_le32(10);
        cmd->teardown_notification_ms = __cpu_to_le32(10);
        cmd->tdls_peer_kickout_threshold = __cpu_to_le32(96);

        ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi update fw tdls state %d for vdev %i\n",
                   state, vdev_id);
        return skb;
}

static u32 ath10k_wmi_prepare_peer_qos(u8 uapsd_queues, u8 sp)
{
        u32 peer_qos = 0;

        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
                peer_qos |= WMI_TDLS_PEER_QOS_AC_VO;
        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
                peer_qos |= WMI_TDLS_PEER_QOS_AC_VI;
        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
                peer_qos |= WMI_TDLS_PEER_QOS_AC_BK;
        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
                peer_qos |= WMI_TDLS_PEER_QOS_AC_BE;

        peer_qos |= SM(sp, WMI_TDLS_PEER_SP);

        return peer_qos;
}

static struct sk_buff *
ath10k_wmi_10_4_op_gen_pdev_get_tpc_table_cmdid(struct ath10k *ar, u32 param)
{
        struct wmi_pdev_get_tpc_table_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_get_tpc_table_cmd *)skb->data;
        cmd->param = __cpu_to_le32(param);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev get tpc table param:%d\n", param);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_gen_tdls_peer_update(struct ath10k *ar,
                                     const struct wmi_tdls_peer_update_cmd_arg *arg,
                                     const struct wmi_tdls_peer_capab_arg *cap,
                                     const struct wmi_channel_arg *chan_arg)
{
        struct wmi_10_4_tdls_peer_update_cmd *cmd;
        struct wmi_tdls_peer_capabilities *peer_cap;
        struct wmi_channel *chan;
        struct sk_buff *skb;
        u32 peer_qos;
        int len, chan_len;
        int i;

        /* tdls peer update cmd has place holder for one channel*/
        chan_len = cap->peer_chan_len ? (cap->peer_chan_len - 1) : 0;

        len = sizeof(*cmd) + chan_len * sizeof(*chan);

        skb = ath10k_wmi_alloc_skb(ar, len);
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_10_4_tdls_peer_update_cmd *)skb->data;
        cmd->vdev_id = __cpu_to_le32(arg->vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, arg->addr);
        cmd->peer_state = __cpu_to_le32(arg->peer_state);

        peer_qos = ath10k_wmi_prepare_peer_qos(cap->peer_uapsd_queues,
                                               cap->peer_max_sp);

        peer_cap = &cmd->peer_capab;
        peer_cap->peer_qos = __cpu_to_le32(peer_qos);
        peer_cap->buff_sta_support = __cpu_to_le32(cap->buff_sta_support);
        peer_cap->off_chan_support = __cpu_to_le32(cap->off_chan_support);
        peer_cap->peer_curr_operclass = __cpu_to_le32(cap->peer_curr_operclass);
        peer_cap->self_curr_operclass = __cpu_to_le32(cap->self_curr_operclass);
        peer_cap->peer_chan_len = __cpu_to_le32(cap->peer_chan_len);
        peer_cap->peer_operclass_len = __cpu_to_le32(cap->peer_operclass_len);

        for (i = 0; i < WMI_TDLS_MAX_SUPP_OPER_CLASSES; i++)
                peer_cap->peer_operclass[i] = cap->peer_operclass[i];

        peer_cap->is_peer_responder = __cpu_to_le32(cap->is_peer_responder);
        peer_cap->pref_offchan_num = __cpu_to_le32(cap->pref_offchan_num);
        peer_cap->pref_offchan_bw = __cpu_to_le32(cap->pref_offchan_bw);

        for (i = 0; i < cap->peer_chan_len; i++) {
                chan = (struct wmi_channel *)&peer_cap->peer_chan_list[i];
                ath10k_wmi_put_wmi_channel(ar, chan, &chan_arg[i]);
        }

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi tdls peer update vdev %i state %d n_chans %u\n",
                   arg->vdev_id, arg->peer_state, cap->peer_chan_len);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_gen_radar_found(struct ath10k *ar,
                                const struct ath10k_radar_found_info *arg)
{
        struct wmi_radar_found_info *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_radar_found_info *)skb->data;
        cmd->pri_min   = __cpu_to_le32(arg->pri_min);
        cmd->pri_max   = __cpu_to_le32(arg->pri_max);
        cmd->width_min = __cpu_to_le32(arg->width_min);
        cmd->width_max = __cpu_to_le32(arg->width_max);
        cmd->sidx_min  = __cpu_to_le32(arg->sidx_min);
        cmd->sidx_max  = __cpu_to_le32(arg->sidx_max);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi radar found pri_min %d pri_max %d width_min %d width_max %d sidx_min %d sidx_max %d\n",
                   arg->pri_min, arg->pri_max, arg->width_min,
                   arg->width_max, arg->sidx_min, arg->sidx_max);
        return skb;
}

static struct sk_buff *
ath10k_wmi_10_4_gen_per_peer_per_tid_cfg(struct ath10k *ar,
                                         const struct wmi_per_peer_per_tid_cfg_arg *arg)
{
        struct wmi_peer_per_tid_cfg_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        memset(skb->data, 0, sizeof(*cmd));

        cmd = (struct wmi_peer_per_tid_cfg_cmd *)skb->data;
        cmd->vdev_id = cpu_to_le32(arg->vdev_id);
        ether_addr_copy(cmd->peer_macaddr.addr, arg->peer_macaddr.addr);
        cmd->tid = cpu_to_le32(arg->tid);
        cmd->ack_policy = cpu_to_le32(arg->ack_policy);
        cmd->aggr_control = cpu_to_le32(arg->aggr_control);
        cmd->rate_control = cpu_to_le32(arg->rate_ctrl);
        cmd->retry_count = cpu_to_le32(arg->retry_count);
        cmd->rcode_flags = cpu_to_le32(arg->rcode_flags);
        cmd->ext_tid_cfg_bitmap = cpu_to_le32(arg->ext_tid_cfg_bitmap);
        cmd->rtscts_ctrl = cpu_to_le32(arg->rtscts_ctrl);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi noack tid %d vdev id %d ack_policy %d aggr %u rate_ctrl %u rcflag %u retry_count %d rtscts %d ext_tid_cfg_bitmap %d mac_addr %pM\n",
                   arg->tid, arg->vdev_id, arg->ack_policy, arg->aggr_control,
                   arg->rate_ctrl, arg->rcode_flags, arg->retry_count,
                   arg->rtscts_ctrl, arg->ext_tid_cfg_bitmap, arg->peer_macaddr.addr);
        return skb;
}

static struct sk_buff *
ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value)
{
        struct wmi_echo_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_echo_cmd *)skb->data;
        cmd->value = cpu_to_le32(value);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi echo value 0x%08x\n", value);
        return skb;
}

int
ath10k_wmi_barrier(struct ath10k *ar)
{
        int ret;
        int time_left;

        spin_lock_bh(&ar->data_lock);
        reinit_completion(&ar->wmi.barrier);
        spin_unlock_bh(&ar->data_lock);

        ret = ath10k_wmi_echo(ar, ATH10K_WMI_BARRIER_ECHO_ID);
        if (ret) {
                ath10k_warn(ar, "failed to submit wmi echo: %d\n", ret);
                return ret;
        }

        time_left = wait_for_completion_timeout(&ar->wmi.barrier,
                                                ATH10K_WMI_BARRIER_TIMEOUT_HZ);
        if (!time_left)
                return -ETIMEDOUT;

        return 0;
}

static struct sk_buff *
ath10k_wmi_10_2_4_op_gen_bb_timing(struct ath10k *ar,
                                   const struct wmi_bb_timing_cfg_arg *arg)
{
        struct wmi_pdev_bb_timing_cfg_cmd *cmd;
        struct sk_buff *skb;

        skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
        if (!skb)
                return ERR_PTR(-ENOMEM);

        cmd = (struct wmi_pdev_bb_timing_cfg_cmd *)skb->data;
        cmd->bb_tx_timing = __cpu_to_le32(arg->bb_tx_timing);
        cmd->bb_xpa_timing = __cpu_to_le32(arg->bb_xpa_timing);

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "wmi pdev bb_tx_timing 0x%x bb_xpa_timing 0x%x\n",
                   arg->bb_tx_timing, arg->bb_xpa_timing);
        return skb;
}

static const struct wmi_ops wmi_ops = {
        .rx = ath10k_wmi_op_rx,
        .map_svc = wmi_main_svc_map,

        .pull_scan = ath10k_wmi_op_pull_scan_ev,
        .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
        .pull_ch_info = ath10k_wmi_op_pull_ch_info_ev,
        .pull_vdev_start = ath10k_wmi_op_pull_vdev_start_ev,
        .pull_peer_kick = ath10k_wmi_op_pull_peer_kick_ev,
        .pull_swba = ath10k_wmi_op_pull_swba_ev,
        .pull_phyerr_hdr = ath10k_wmi_op_pull_phyerr_ev_hdr,
        .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
        .pull_svc_rdy = ath10k_wmi_main_op_pull_svc_rdy_ev,
        .pull_rdy = ath10k_wmi_op_pull_rdy_ev,
        .pull_fw_stats = ath10k_wmi_main_op_pull_fw_stats,
        .pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
        .pull_echo_ev = ath10k_wmi_op_pull_echo_ev,

        .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
        .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
        .gen_pdev_set_rd = ath10k_wmi_op_gen_pdev_set_rd,
        .gen_pdev_set_param = ath10k_wmi_op_gen_pdev_set_param,
        .gen_init = ath10k_wmi_op_gen_init,
        .gen_start_scan = ath10k_wmi_op_gen_start_scan,
        .gen_stop_scan = ath10k_wmi_op_gen_stop_scan,
        .gen_vdev_create = ath10k_wmi_op_gen_vdev_create,
        .gen_vdev_delete = ath10k_wmi_op_gen_vdev_delete,
        .gen_vdev_start = ath10k_wmi_op_gen_vdev_start,
        .gen_vdev_stop = ath10k_wmi_op_gen_vdev_stop,
        .gen_vdev_up = ath10k_wmi_op_gen_vdev_up,
        .gen_vdev_down = ath10k_wmi_op_gen_vdev_down,
        .gen_vdev_set_param = ath10k_wmi_op_gen_vdev_set_param,
        .gen_vdev_install_key = ath10k_wmi_op_gen_vdev_install_key,
        .gen_vdev_spectral_conf = ath10k_wmi_op_gen_vdev_spectral_conf,
        .gen_vdev_spectral_enable = ath10k_wmi_op_gen_vdev_spectral_enable,
        /* .gen_vdev_wmm_conf not implemented */
        .gen_peer_create = ath10k_wmi_op_gen_peer_create,
        .gen_peer_delete = ath10k_wmi_op_gen_peer_delete,
        .gen_peer_flush = ath10k_wmi_op_gen_peer_flush,
        .gen_peer_set_param = ath10k_wmi_op_gen_peer_set_param,
        .gen_peer_assoc = ath10k_wmi_op_gen_peer_assoc,
        .gen_set_psmode = ath10k_wmi_op_gen_set_psmode,
        .gen_set_sta_ps = ath10k_wmi_op_gen_set_sta_ps,
        .gen_set_ap_ps = ath10k_wmi_op_gen_set_ap_ps,
        .gen_scan_chan_list = ath10k_wmi_op_gen_scan_chan_list,
        .gen_beacon_dma = ath10k_wmi_op_gen_beacon_dma,
        .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
        .gen_request_stats = ath10k_wmi_op_gen_request_stats,
        .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
        .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
        .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg,
        .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
        .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
        .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
        /* .gen_pdev_get_temperature not implemented */
        .gen_addba_clear_resp = ath10k_wmi_op_gen_addba_clear_resp,
        .gen_addba_send = ath10k_wmi_op_gen_addba_send,
        .gen_addba_set_resp = ath10k_wmi_op_gen_addba_set_resp,
        .gen_delba_send = ath10k_wmi_op_gen_delba_send,
        .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill,
        .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
        .gen_echo = ath10k_wmi_op_gen_echo,
        /* .gen_bcn_tmpl not implemented */
        /* .gen_prb_tmpl not implemented */
        /* .gen_p2p_go_bcn_ie not implemented */
        /* .gen_adaptive_qcs not implemented */
        /* .gen_pdev_enable_adaptive_cca not implemented */
};

static const struct wmi_ops wmi_10_1_ops = {
        .rx = ath10k_wmi_10_1_op_rx,
        .map_svc = wmi_10x_svc_map,
        .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev,
        .pull_fw_stats = ath10k_wmi_10x_op_pull_fw_stats,
        .gen_init = ath10k_wmi_10_1_op_gen_init,
        .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
        .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan,
        .gen_peer_assoc = ath10k_wmi_10_1_op_gen_peer_assoc,
        /* .gen_pdev_get_temperature not implemented */

        /* shared with main branch */
        .pull_scan = ath10k_wmi_op_pull_scan_ev,
        .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
        .pull_ch_info = ath10k_wmi_op_pull_ch_info_ev,
        .pull_vdev_start = ath10k_wmi_op_pull_vdev_start_ev,
        .pull_peer_kick = ath10k_wmi_op_pull_peer_kick_ev,
        .pull_swba = ath10k_wmi_op_pull_swba_ev,
        .pull_phyerr_hdr = ath10k_wmi_op_pull_phyerr_ev_hdr,
        .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
        .pull_rdy = ath10k_wmi_op_pull_rdy_ev,
        .pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
        .pull_echo_ev = ath10k_wmi_op_pull_echo_ev,

        .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
        .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
        .gen_pdev_set_param = ath10k_wmi_op_gen_pdev_set_param,
        .gen_stop_scan = ath10k_wmi_op_gen_stop_scan,
        .gen_vdev_create = ath10k_wmi_op_gen_vdev_create,
        .gen_vdev_delete = ath10k_wmi_op_gen_vdev_delete,
        .gen_vdev_start = ath10k_wmi_op_gen_vdev_start,
        .gen_vdev_stop = ath10k_wmi_op_gen_vdev_stop,
        .gen_vdev_up = ath10k_wmi_op_gen_vdev_up,
        .gen_vdev_down = ath10k_wmi_op_gen_vdev_down,
        .gen_vdev_set_param = ath10k_wmi_op_gen_vdev_set_param,
        .gen_vdev_install_key = ath10k_wmi_op_gen_vdev_install_key,
        .gen_vdev_spectral_conf = ath10k_wmi_op_gen_vdev_spectral_conf,
        .gen_vdev_spectral_enable = ath10k_wmi_op_gen_vdev_spectral_enable,
        /* .gen_vdev_wmm_conf not implemented */
        .gen_peer_create = ath10k_wmi_op_gen_peer_create,
        .gen_peer_delete = ath10k_wmi_op_gen_peer_delete,
        .gen_peer_flush = ath10k_wmi_op_gen_peer_flush,
        .gen_peer_set_param = ath10k_wmi_op_gen_peer_set_param,
        .gen_set_psmode = ath10k_wmi_op_gen_set_psmode,
        .gen_set_sta_ps = ath10k_wmi_op_gen_set_sta_ps,
        .gen_set_ap_ps = ath10k_wmi_op_gen_set_ap_ps,
        .gen_scan_chan_list = ath10k_wmi_op_gen_scan_chan_list,
        .gen_beacon_dma = ath10k_wmi_op_gen_beacon_dma,
        .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
        .gen_request_stats = ath10k_wmi_op_gen_request_stats,
        .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
        .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
        .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg,
        .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
        .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
        .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
        .gen_addba_clear_resp = ath10k_wmi_op_gen_addba_clear_resp,
        .gen_addba_send = ath10k_wmi_op_gen_addba_send,
        .gen_addba_set_resp = ath10k_wmi_op_gen_addba_set_resp,
        .gen_delba_send = ath10k_wmi_op_gen_delba_send,
        .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill,
        .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
        .gen_echo = ath10k_wmi_op_gen_echo,
        /* .gen_bcn_tmpl not implemented */
        /* .gen_prb_tmpl not implemented */
        /* .gen_p2p_go_bcn_ie not implemented */
        /* .gen_adaptive_qcs not implemented */
        /* .gen_pdev_enable_adaptive_cca not implemented */
};

static const struct wmi_ops wmi_10_2_ops = {
        .rx = ath10k_wmi_10_2_op_rx,
        .pull_fw_stats = ath10k_wmi_10_2_op_pull_fw_stats,
        .gen_init = ath10k_wmi_10_2_op_gen_init,
        .gen_peer_assoc = ath10k_wmi_10_2_op_gen_peer_assoc,
        /* .gen_pdev_get_temperature not implemented */

        /* shared with 10.1 */
        .map_svc = wmi_10x_svc_map,
        .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev,
        .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
        .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan,
        .gen_echo = ath10k_wmi_op_gen_echo,

        .pull_scan = ath10k_wmi_op_pull_scan_ev,
        .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
        .pull_ch_info = ath10k_wmi_op_pull_ch_info_ev,
        .pull_vdev_start = ath10k_wmi_op_pull_vdev_start_ev,
        .pull_peer_kick = ath10k_wmi_op_pull_peer_kick_ev,
        .pull_swba = ath10k_wmi_op_pull_swba_ev,
        .pull_phyerr_hdr = ath10k_wmi_op_pull_phyerr_ev_hdr,
        .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
        .pull_rdy = ath10k_wmi_op_pull_rdy_ev,
        .pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
        .pull_echo_ev = ath10k_wmi_op_pull_echo_ev,

        .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
        .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
        .gen_pdev_set_param = ath10k_wmi_op_gen_pdev_set_param,
        .gen_stop_scan = ath10k_wmi_op_gen_stop_scan,
        .gen_vdev_create = ath10k_wmi_op_gen_vdev_create,
        .gen_vdev_delete = ath10k_wmi_op_gen_vdev_delete,
        .gen_vdev_start = ath10k_wmi_op_gen_vdev_start,
        .gen_vdev_stop = ath10k_wmi_op_gen_vdev_stop,
        .gen_vdev_up = ath10k_wmi_op_gen_vdev_up,
        .gen_vdev_down = ath10k_wmi_op_gen_vdev_down,
        .gen_vdev_set_param = ath10k_wmi_op_gen_vdev_set_param,
        .gen_vdev_install_key = ath10k_wmi_op_gen_vdev_install_key,
        .gen_vdev_spectral_conf = ath10k_wmi_op_gen_vdev_spectral_conf,
        .gen_vdev_spectral_enable = ath10k_wmi_op_gen_vdev_spectral_enable,
        /* .gen_vdev_wmm_conf not implemented */
        .gen_peer_create = ath10k_wmi_op_gen_peer_create,
        .gen_peer_delete = ath10k_wmi_op_gen_peer_delete,
        .gen_peer_flush = ath10k_wmi_op_gen_peer_flush,
        .gen_pdev_set_base_macaddr = ath10k_wmi_op_gen_pdev_set_base_macaddr,
        .gen_peer_set_param = ath10k_wmi_op_gen_peer_set_param,
        .gen_set_psmode = ath10k_wmi_op_gen_set_psmode,
        .gen_set_sta_ps = ath10k_wmi_op_gen_set_sta_ps,
        .gen_set_ap_ps = ath10k_wmi_op_gen_set_ap_ps,
        .gen_scan_chan_list = ath10k_wmi_op_gen_scan_chan_list,
        .gen_beacon_dma = ath10k_wmi_op_gen_beacon_dma,
        .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
        .gen_request_stats = ath10k_wmi_op_gen_request_stats,
        .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
        .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
        .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg,
        .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
        .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
        .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
        .gen_addba_clear_resp = ath10k_wmi_op_gen_addba_clear_resp,
        .gen_addba_send = ath10k_wmi_op_gen_addba_send,
        .gen_addba_set_resp = ath10k_wmi_op_gen_addba_set_resp,
        .gen_delba_send = ath10k_wmi_op_gen_delba_send,
        .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill,
        .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
        /* .gen_pdev_enable_adaptive_cca not implemented */
};

static const struct wmi_ops wmi_10_2_4_ops = {
        .rx = ath10k_wmi_10_2_op_rx,
        .pull_fw_stats = ath10k_wmi_10_2_4_op_pull_fw_stats,
        .gen_init = ath10k_wmi_10_2_op_gen_init,
        .gen_peer_assoc = ath10k_wmi_10_2_op_gen_peer_assoc,
        .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature,
        .gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info,

        /* shared with 10.1 */
        .map_svc = wmi_10x_svc_map,
        .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev,
        .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
        .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan,
        .gen_echo = ath10k_wmi_op_gen_echo,

        .pull_scan = ath10k_wmi_op_pull_scan_ev,
        .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
        .pull_ch_info = ath10k_wmi_op_pull_ch_info_ev,
        .pull_vdev_start = ath10k_wmi_op_pull_vdev_start_ev,
        .pull_peer_kick = ath10k_wmi_op_pull_peer_kick_ev,
        .pull_swba = ath10k_wmi_10_2_4_op_pull_swba_ev,
        .pull_phyerr_hdr = ath10k_wmi_op_pull_phyerr_ev_hdr,
        .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
        .pull_rdy = ath10k_wmi_op_pull_rdy_ev,
        .pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
        .pull_echo_ev = ath10k_wmi_op_pull_echo_ev,

        .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
        .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
        .gen_pdev_set_param = ath10k_wmi_op_gen_pdev_set_param,
        .gen_stop_scan = ath10k_wmi_op_gen_stop_scan,
        .gen_vdev_create = ath10k_wmi_op_gen_vdev_create,
        .gen_vdev_delete = ath10k_wmi_op_gen_vdev_delete,
        .gen_vdev_start = ath10k_wmi_op_gen_vdev_start,
        .gen_vdev_stop = ath10k_wmi_op_gen_vdev_stop,
        .gen_vdev_up = ath10k_wmi_op_gen_vdev_up,
        .gen_vdev_down = ath10k_wmi_op_gen_vdev_down,
        .gen_vdev_set_param = ath10k_wmi_op_gen_vdev_set_param,
        .gen_vdev_install_key = ath10k_wmi_op_gen_vdev_install_key,
        .gen_vdev_spectral_conf = ath10k_wmi_op_gen_vdev_spectral_conf,
        .gen_vdev_spectral_enable = ath10k_wmi_op_gen_vdev_spectral_enable,
        .gen_peer_create = ath10k_wmi_op_gen_peer_create,
        .gen_peer_delete = ath10k_wmi_op_gen_peer_delete,
        .gen_peer_flush = ath10k_wmi_op_gen_peer_flush,
        .gen_peer_set_param = ath10k_wmi_op_gen_peer_set_param,
        .gen_set_psmode = ath10k_wmi_op_gen_set_psmode,
        .gen_set_sta_ps = ath10k_wmi_op_gen_set_sta_ps,
        .gen_set_ap_ps = ath10k_wmi_op_gen_set_ap_ps,
        .gen_scan_chan_list = ath10k_wmi_op_gen_scan_chan_list,
        .gen_beacon_dma = ath10k_wmi_op_gen_beacon_dma,
        .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
        .gen_request_stats = ath10k_wmi_op_gen_request_stats,
        .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
        .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
        .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg,
        .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
        .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
        .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
        .gen_addba_clear_resp = ath10k_wmi_op_gen_addba_clear_resp,
        .gen_addba_send = ath10k_wmi_op_gen_addba_send,
        .gen_addba_set_resp = ath10k_wmi_op_gen_addba_set_resp,
        .gen_delba_send = ath10k_wmi_op_gen_delba_send,
        .gen_pdev_get_tpc_config = ath10k_wmi_10_2_4_op_gen_pdev_get_tpc_config,
        .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill,
        .gen_pdev_enable_adaptive_cca =
                ath10k_wmi_op_gen_pdev_enable_adaptive_cca,
        .get_vdev_subtype = ath10k_wmi_10_2_4_op_get_vdev_subtype,
        .gen_bb_timing = ath10k_wmi_10_2_4_op_gen_bb_timing,
        /* .gen_bcn_tmpl not implemented */
        /* .gen_prb_tmpl not implemented */
        /* .gen_p2p_go_bcn_ie not implemented */
        /* .gen_adaptive_qcs not implemented */
};

static const struct wmi_ops wmi_10_4_ops = {
        .rx = ath10k_wmi_10_4_op_rx,
        .map_svc = wmi_10_4_svc_map,

        .pull_fw_stats = ath10k_wmi_10_4_op_pull_fw_stats,
        .pull_scan = ath10k_wmi_op_pull_scan_ev,
        .pull_mgmt_rx = ath10k_wmi_10_4_op_pull_mgmt_rx_ev,
        .pull_ch_info = ath10k_wmi_10_4_op_pull_ch_info_ev,
        .pull_vdev_start = ath10k_wmi_op_pull_vdev_start_ev,
        .pull_peer_kick = ath10k_wmi_op_pull_peer_kick_ev,
        .pull_swba = ath10k_wmi_10_4_op_pull_swba_ev,
        .pull_phyerr_hdr = ath10k_wmi_10_4_op_pull_phyerr_ev_hdr,
        .pull_phyerr = ath10k_wmi_10_4_op_pull_phyerr_ev,
        .pull_svc_rdy = ath10k_wmi_main_op_pull_svc_rdy_ev,
        .pull_rdy = ath10k_wmi_op_pull_rdy_ev,
        .pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
        .pull_dfs_status_ev = ath10k_wmi_10_4_op_pull_dfs_status_ev,
        .get_txbf_conf_scheme = ath10k_wmi_10_4_txbf_conf_scheme,

        .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
        .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
        .gen_pdev_set_base_macaddr = ath10k_wmi_op_gen_pdev_set_base_macaddr,
        .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
        .gen_pdev_set_param = ath10k_wmi_op_gen_pdev_set_param,
        .gen_init = ath10k_wmi_10_4_op_gen_init,
        .gen_start_scan = ath10k_wmi_op_gen_start_scan,
        .gen_stop_scan = ath10k_wmi_op_gen_stop_scan,
        .gen_vdev_create = ath10k_wmi_op_gen_vdev_create,
        .gen_vdev_delete = ath10k_wmi_op_gen_vdev_delete,
        .gen_vdev_start = ath10k_wmi_op_gen_vdev_start,
        .gen_vdev_stop = ath10k_wmi_op_gen_vdev_stop,
        .gen_vdev_up = ath10k_wmi_op_gen_vdev_up,
        .gen_vdev_down = ath10k_wmi_op_gen_vdev_down,
        .gen_vdev_set_param = ath10k_wmi_op_gen_vdev_set_param,
        .gen_vdev_install_key = ath10k_wmi_op_gen_vdev_install_key,
        .gen_vdev_spectral_conf = ath10k_wmi_op_gen_vdev_spectral_conf,
        .gen_vdev_spectral_enable = ath10k_wmi_op_gen_vdev_spectral_enable,
        .gen_peer_create = ath10k_wmi_op_gen_peer_create,
        .gen_peer_delete = ath10k_wmi_op_gen_peer_delete,
        .gen_peer_flush = ath10k_wmi_op_gen_peer_flush,
        .gen_peer_set_param = ath10k_wmi_op_gen_peer_set_param,
        .gen_peer_assoc = ath10k_wmi_10_4_op_gen_peer_assoc,
        .gen_set_psmode = ath10k_wmi_op_gen_set_psmode,
        .gen_set_sta_ps = ath10k_wmi_op_gen_set_sta_ps,
        .gen_set_ap_ps = ath10k_wmi_op_gen_set_ap_ps,
        .gen_scan_chan_list = ath10k_wmi_op_gen_scan_chan_list,
        .gen_beacon_dma = ath10k_wmi_op_gen_beacon_dma,
        .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
        .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
        .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
        .gen_dbglog_cfg = ath10k_wmi_10_4_op_gen_dbglog_cfg,
        .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
        .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
        .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
        .gen_addba_clear_resp = ath10k_wmi_op_gen_addba_clear_resp,
        .gen_addba_send = ath10k_wmi_op_gen_addba_send,
        .gen_addba_set_resp = ath10k_wmi_op_gen_addba_set_resp,
        .gen_delba_send = ath10k_wmi_op_gen_delba_send,
        .fw_stats_fill = ath10k_wmi_10_4_op_fw_stats_fill,
        .ext_resource_config = ath10k_wmi_10_4_ext_resource_config,
        .gen_update_fw_tdls_state = ath10k_wmi_10_4_gen_update_fw_tdls_state,
        .gen_tdls_peer_update = ath10k_wmi_10_4_gen_tdls_peer_update,
        .gen_pdev_get_tpc_table_cmdid =
                        ath10k_wmi_10_4_op_gen_pdev_get_tpc_table_cmdid,
        .gen_radar_found = ath10k_wmi_10_4_gen_radar_found,
        .gen_per_peer_per_tid_cfg = ath10k_wmi_10_4_gen_per_peer_per_tid_cfg,

        /* shared with 10.2 */
        .pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
        .gen_request_stats = ath10k_wmi_op_gen_request_stats,
        .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature,
        .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype,
        .gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info,
        .gen_echo = ath10k_wmi_op_gen_echo,
        .gen_pdev_get_tpc_config = ath10k_wmi_10_2_4_op_gen_pdev_get_tpc_config,
};

int ath10k_wmi_attach(struct ath10k *ar)
{
        switch (ar->running_fw->fw_file.wmi_op_version) {
        case ATH10K_FW_WMI_OP_VERSION_10_4:
                ar->wmi.ops = &wmi_10_4_ops;
                ar->wmi.cmd = &wmi_10_4_cmd_map;
                ar->wmi.vdev_param = &wmi_10_4_vdev_param_map;
                ar->wmi.pdev_param = &wmi_10_4_pdev_param_map;
                ar->wmi.peer_param = &wmi_peer_param_map;
                ar->wmi.peer_flags = &wmi_10_2_peer_flags_map;
                ar->wmi_key_cipher = wmi_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_2_4:
                ar->wmi.cmd = &wmi_10_2_4_cmd_map;
                ar->wmi.ops = &wmi_10_2_4_ops;
                ar->wmi.vdev_param = &wmi_10_2_4_vdev_param_map;
                ar->wmi.pdev_param = &wmi_10_2_4_pdev_param_map;
                ar->wmi.peer_param = &wmi_peer_param_map;
                ar->wmi.peer_flags = &wmi_10_2_peer_flags_map;
                ar->wmi_key_cipher = wmi_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_2:
                ar->wmi.cmd = &wmi_10_2_cmd_map;
                ar->wmi.ops = &wmi_10_2_ops;
                ar->wmi.vdev_param = &wmi_10x_vdev_param_map;
                ar->wmi.pdev_param = &wmi_10x_pdev_param_map;
                ar->wmi.peer_param = &wmi_peer_param_map;
                ar->wmi.peer_flags = &wmi_10_2_peer_flags_map;
                ar->wmi_key_cipher = wmi_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_1:
                ar->wmi.cmd = &wmi_10x_cmd_map;
                ar->wmi.ops = &wmi_10_1_ops;
                ar->wmi.vdev_param = &wmi_10x_vdev_param_map;
                ar->wmi.pdev_param = &wmi_10x_pdev_param_map;
                ar->wmi.peer_param = &wmi_peer_param_map;
                ar->wmi.peer_flags = &wmi_10x_peer_flags_map;
                ar->wmi_key_cipher = wmi_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_MAIN:
                ar->wmi.cmd = &wmi_cmd_map;
                ar->wmi.ops = &wmi_ops;
                ar->wmi.vdev_param = &wmi_vdev_param_map;
                ar->wmi.pdev_param = &wmi_pdev_param_map;
                ar->wmi.peer_param = &wmi_peer_param_map;
                ar->wmi.peer_flags = &wmi_peer_flags_map;
                ar->wmi_key_cipher = wmi_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_TLV:
                ath10k_wmi_tlv_attach(ar);
                ar->wmi_key_cipher = wmi_tlv_key_cipher_suites;
                break;
        case ATH10K_FW_WMI_OP_VERSION_UNSET:
        case ATH10K_FW_WMI_OP_VERSION_MAX:
                ath10k_err(ar, "unsupported WMI op version: %d\n",
                           ar->running_fw->fw_file.wmi_op_version);
                return -EINVAL;
        }

        init_completion(&ar->wmi.service_ready);
        init_completion(&ar->wmi.unified_ready);
        init_completion(&ar->wmi.barrier);
        init_completion(&ar->wmi.radar_confirm);

        INIT_WORK(&ar->svc_rdy_work, ath10k_wmi_event_service_ready_work);
        INIT_WORK(&ar->radar_confirmation_work,
                  ath10k_radar_confirmation_work);

        if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF,
                     ar->running_fw->fw_file.fw_features)) {
                idr_init(&ar->wmi.mgmt_pending_tx);
        }

        return 0;
}

void ath10k_wmi_free_host_mem(struct ath10k *ar)
{
        int i;

        /* free the host memory chunks requested by firmware */
        for (i = 0; i < ar->wmi.num_mem_chunks; i++) {
                dma_free_coherent(ar->dev,
                                  ar->wmi.mem_chunks[i].len,
                                  ar->wmi.mem_chunks[i].vaddr,
                                  ar->wmi.mem_chunks[i].paddr);
        }

        ar->wmi.num_mem_chunks = 0;
}

static int ath10k_wmi_mgmt_tx_clean_up_pending(int msdu_id, void *ptr,
                                               void *ctx)
{
        struct ath10k_mgmt_tx_pkt_addr *pkt_addr = ptr;
        struct ath10k *ar = ctx;
        struct sk_buff *msdu;

        ath10k_dbg(ar, ATH10K_DBG_WMI,
                   "force cleanup mgmt msdu_id %u\n", msdu_id);

        msdu = pkt_addr->vaddr;
        dma_unmap_single(ar->dev, pkt_addr->paddr,
                         msdu->len, DMA_TO_DEVICE);
        ieee80211_free_txskb(ar->hw, msdu);

        return 0;
}

void ath10k_wmi_detach(struct ath10k *ar)
{
        if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF,
                     ar->running_fw->fw_file.fw_features)) {
                spin_lock_bh(&ar->data_lock);
                idr_for_each(&ar->wmi.mgmt_pending_tx,
                             ath10k_wmi_mgmt_tx_clean_up_pending, ar);
                idr_destroy(&ar->wmi.mgmt_pending_tx);
                spin_unlock_bh(&ar->data_lock);
        }

        cancel_work_sync(&ar->svc_rdy_work);
        dev_kfree_skb(ar->svc_rdy_skb);
}




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
 * Linux Security Module interfaces
 *
 * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com>
 * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com>
 * Copyright (C) 2001 James Morris <jmorris@intercode.com.au>
 * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group)
 * Copyright (C) 2015 Intel Corporation.
 * Copyright (C) 2015 Casey Schaufler <casey@schaufler-ca.com>
 * Copyright (C) 2016 Mellanox Techonologies
 *
 *        This program is free software; you can redistribute it and/or modify
 *        it under the terms of the GNU General Public License as published by
 *        the Free Software Foundation; either version 2 of the License, or
 *        (at your option) any later version.
 *
 *        Due to this file being licensed under the GPL there is controversy over
 *        whether this permits you to write a module that #includes this file
 *        without placing your module under the GPL.  Please consult a lawyer for
 *        advice before doing this.
 *
 */

#ifndef __LINUX_LSM_HOOKS_H
#define __LINUX_LSM_HOOKS_H

#include <uapi/linux/lsm.h>
#include <linux/security.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <linux/xattr.h>

union security_list_options {
        #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
        #include "lsm_hook_defs.h"
        #undef LSM_HOOK
};

struct security_hook_heads {
        #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME;
        #include "lsm_hook_defs.h"
        #undef LSM_HOOK
} __randomize_layout;

/**
 * struct lsm_id - Identify a Linux Security Module.
 * @lsm: name of the LSM, must be approved by the LSM maintainers
 * @id: LSM ID number from uapi/linux/lsm.h
 *
 * Contains the information that identifies the LSM.
 */
struct lsm_id {
        const char        *name;
        u64                id;
};

/*
 * Security module hook list structure.
 * For use with generic list macros for common operations.
 */
struct security_hook_list {
        struct hlist_node                list;
        struct hlist_head                *head;
        union security_list_options        hook;
        const struct lsm_id                *lsmid;
} __randomize_layout;

/*
 * Security blob size or offset data.
 */
struct lsm_blob_sizes {
        int        lbs_cred;
        int        lbs_file;
        int        lbs_inode;
        int        lbs_superblock;
        int        lbs_ipc;
        int        lbs_msg_msg;
        int        lbs_task;
        int        lbs_xattr_count; /* number of xattr slots in new_xattrs array */
};

/**
 * lsm_get_xattr_slot - Return the next available slot and increment the index
 * @xattrs: array storing LSM-provided xattrs
 * @xattr_count: number of already stored xattrs (updated)
 *
 * Retrieve the first available slot in the @xattrs array to fill with an xattr,
 * and increment @xattr_count.
 *
 * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise.
 */
static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs,
                                               int *xattr_count)
{
        if (unlikely(!xattrs))
                return NULL;
        return &xattrs[(*xattr_count)++];
}

/*
 * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void
 * LSM hooks (in include/linux/lsm_hook_defs.h).
 */
#define LSM_RET_VOID ((void) 0)

/*
 * Initializing a security_hook_list structure takes
 * up a lot of space in a source file. This macro takes
 * care of the common case and reduces the amount of
 * text involved.
 */
#define LSM_HOOK_INIT(HEAD, HOOK) \
        { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } }

extern struct security_hook_heads security_hook_heads;
extern char *lsm_names;

extern void security_add_hooks(struct security_hook_list *hooks, int count,
                               const struct lsm_id *lsmid);

#define LSM_FLAG_LEGACY_MAJOR        BIT(0)
#define LSM_FLAG_EXCLUSIVE        BIT(1)

enum lsm_order {
        LSM_ORDER_FIRST = -1,        /* This is only for capabilities. */
        LSM_ORDER_MUTABLE = 0,
        LSM_ORDER_LAST = 1,        /* This is only for integrity. */
};

struct lsm_info {
        const char *name;        /* Required. */
        enum lsm_order order;        /* Optional: default is LSM_ORDER_MUTABLE */
        unsigned long flags;        /* Optional: flags describing LSM */
        int *enabled;                /* Optional: controlled by CONFIG_LSM */
        int (*init)(void);        /* Required. */
        struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */
};

extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];

#define DEFINE_LSM(lsm)                                                        \
        static struct lsm_info __lsm_##lsm                                \
                __used __section(".lsm_info.init")                        \
                __aligned(sizeof(unsigned long))

#define DEFINE_EARLY_LSM(lsm)                                                \
        static struct lsm_info __early_lsm_##lsm                        \
                __used __section(".early_lsm_info.init")                \
                __aligned(sizeof(unsigned long))

extern int lsm_inode_alloc(struct inode *inode);

#endif /* ! __LINUX_LSM_HOOKS_H */




























  351 

  350 

  349 

  348 




  351 


  351 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// SPDX-License-Identifier: GPL-2.0
#include <linux/fault-inject.h>
#include <linux/mm.h>

static struct {
        struct fault_attr attr;

        bool ignore_gfp_highmem;
        bool ignore_gfp_reclaim;
        u32 min_order;
} fail_page_alloc = {
        .attr = FAULT_ATTR_INITIALIZER,
        .ignore_gfp_reclaim = true,
        .ignore_gfp_highmem = true,
        .min_order = 1,
};

static int __init setup_fail_page_alloc(char *str)
{
        return setup_fault_attr(&fail_page_alloc.attr, str);
}
__setup("fail_page_alloc=", setup_fail_page_alloc);

bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
        int flags = 0;

        if (order < fail_page_alloc.min_order)
                return false;
        if (gfp_mask & __GFP_NOFAIL)
                return false;
        if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
                return false;
        if (fail_page_alloc.ignore_gfp_reclaim &&
                        (gfp_mask & __GFP_DIRECT_RECLAIM))
                return false;

        /* See comment in __should_failslab() */
        if (gfp_mask & __GFP_NOWARN)
                flags |= FAULT_NOWARN;

        return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

static int __init fail_page_alloc_debugfs(void)
{
        umode_t mode = S_IFREG | 0600;
        struct dentry *dir;

        dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
                                        &fail_page_alloc.attr);

        debugfs_create_bool("ignore-gfp-wait", mode, dir,
                            &fail_page_alloc.ignore_gfp_reclaim);
        debugfs_create_bool("ignore-gfp-highmem", mode, dir,
                            &fail_page_alloc.ignore_gfp_highmem);
        debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);

        return 0;
}

late_initcall(fail_page_alloc_debugfs);

#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */







































































   46 















   29 
    2 



   28 



   27 
    2 


















    3 

    3 

    3 


    3 




    1 


    3 
    3 



    3 



    7 




    3 























    2 









  100 














    2 
    1 








    1 



    1 
















    1 












   30 
   30 













   31 


   31 






   31 
   31 


   31 

   31 





   30 


   30 


   31 


   30 









   31 



   31 

   31 

   31 
    2 

   29 
   29 

   31 















    2 






   26 

   26 
   26 










    2 


    1 



    1 






   12 





    3 



   12 






   16 
   16 





    6 
   13 












   16 

   16 
    9 

    6 


   10 







    7 
    2 

    7 












   19 


   33 




    7 





    7 
    7 

    7 


    7 


    7 

    7 

    7 










   16 




   16 
   16 







   16 



   16 

   16 



   10 





    6 

   16 





   41 




   40 


   41 

   41 

   41 




   40 

   37 



   37 







   40 
   39 







    2 



    1 



    2 







    5 





    3 



    4 


    4 
    4 






    3 


    2 
    2 


    5 

    1 


    2 


    2 
    2 


    2 




    1 


































































































   14 

   14 

   14 
    5 

   14 





    7 


    7 



    6 

    4 

    6 






































    2 


    1 



    2 












    1 



    1 
    1 

    1 



    4 




    7 










    6 
    7 


    5 







    6 





    5 
    6 


    2 







    9 








    9 


   10 




    8 




    7 


    8 
























    7 



    7 






    7 





    2 

    7 

    7 













    2 















    2 



















    8 



    7 



    7 

    2 



    5 






















   12 




   11 

    2 





    9 
    8 




   15 
   12 








  104 














    1 


    1 




    2 

    2 

    2 




    4 

    3 

    3 


    3 





    1 


    2 






    4 
    2 

    3 


    3 


    2 




   17 


   17 
   17 







    9 


    9 
    8 





    4 


    3 


    7 


   10 


    6 


    8 


   31 






    1 



    2 


    3 



    1 



    2 



    1 






    1 


    1 


    2 


    1 










   18 


    9 

    7 
    7 



    2 

    1 













    2 

    2 

    1 


































  105 







  105 







  101 





  105 
































   33 






   18 

   33 
   33 















































































   34 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Event char devices, giving access to raw input device events.
 *
 * Copyright (c) 1999-2002 Vojtech Pavlik
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#define EVDEV_MINOR_BASE        64
#define EVDEV_MINORS                32
#define EVDEV_MIN_BUFFER_SIZE        64U
#define EVDEV_BUF_PACKETS        8

#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/input/mt.h>
#include <linux/major.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include "input-compat.h"

struct evdev {
        int open;
        struct input_handle handle;
        struct evdev_client __rcu *grab;
        struct list_head client_list;
        spinlock_t client_lock; /* protects client_list */
        struct mutex mutex;
        struct device dev;
        struct cdev cdev;
        bool exist;
};

struct evdev_client {
        unsigned int head;
        unsigned int tail;
        unsigned int packet_head; /* [future] position of the first element of next packet */
        spinlock_t buffer_lock; /* protects access to buffer, head and tail */
        wait_queue_head_t wait;
        struct fasync_struct *fasync;
        struct evdev *evdev;
        struct list_head node;
        enum input_clock_type clk_type;
        bool revoked;
        unsigned long *evmasks[EV_CNT];
        unsigned int bufsize;
        struct input_event buffer[] __counted_by(bufsize);
};

static size_t evdev_get_mask_cnt(unsigned int type)
{
        static const size_t counts[EV_CNT] = {
                /* EV_SYN==0 is EV_CNT, _not_ SYN_CNT, see EVIOCGBIT */
                [EV_SYN]        = EV_CNT,
                [EV_KEY]        = KEY_CNT,
                [EV_REL]        = REL_CNT,
                [EV_ABS]        = ABS_CNT,
                [EV_MSC]        = MSC_CNT,
                [EV_SW]                = SW_CNT,
                [EV_LED]        = LED_CNT,
                [EV_SND]        = SND_CNT,
                [EV_FF]                = FF_CNT,
        };

        return (type < EV_CNT) ? counts[type] : 0;
}

/* requires the buffer lock to be held */
static bool __evdev_is_filtered(struct evdev_client *client,
                                unsigned int type,
                                unsigned int code)
{
        unsigned long *mask;
        size_t cnt;

        /* EV_SYN and unknown codes are never filtered */
        if (type == EV_SYN || type >= EV_CNT)
                return false;

        /* first test whether the type is filtered */
        mask = client->evmasks[0];
        if (mask && !test_bit(type, mask))
                return true;

        /* unknown values are never filtered */
        cnt = evdev_get_mask_cnt(type);
        if (!cnt || code >= cnt)
                return false;

        mask = client->evmasks[type];
        return mask && !test_bit(code, mask);
}

/* flush queued events of type @type, caller must hold client->buffer_lock */
static void __evdev_flush_queue(struct evdev_client *client, unsigned int type)
{
        unsigned int i, head, num;
        unsigned int mask = client->bufsize - 1;
        bool is_report;
        struct input_event *ev;

        BUG_ON(type == EV_SYN);

        head = client->tail;
        client->packet_head = client->tail;

        /* init to 1 so a leading SYN_REPORT will not be dropped */
        num = 1;

        for (i = client->tail; i != client->head; i = (i + 1) & mask) {
                ev = &client->buffer[i];
                is_report = ev->type == EV_SYN && ev->code == SYN_REPORT;

                if (ev->type == type) {
                        /* drop matched entry */
                        continue;
                } else if (is_report && !num) {
                        /* drop empty SYN_REPORT groups */
                        continue;
                } else if (head != i) {
                        /* move entry to fill the gap */
                        client->buffer[head] = *ev;
                }

                num++;
                head = (head + 1) & mask;

                if (is_report) {
                        num = 0;
                        client->packet_head = head;
                }
        }

        client->head = head;
}

static void __evdev_queue_syn_dropped(struct evdev_client *client)
{
        ktime_t *ev_time = input_get_timestamp(client->evdev->handle.dev);
        struct timespec64 ts = ktime_to_timespec64(ev_time[client->clk_type]);
        struct input_event ev;

        ev.input_event_sec = ts.tv_sec;
        ev.input_event_usec = ts.tv_nsec / NSEC_PER_USEC;
        ev.type = EV_SYN;
        ev.code = SYN_DROPPED;
        ev.value = 0;

        client->buffer[client->head++] = ev;
        client->head &= client->bufsize - 1;

        if (unlikely(client->head == client->tail)) {
                /* drop queue but keep our SYN_DROPPED event */
                client->tail = (client->head - 1) & (client->bufsize - 1);
                client->packet_head = client->tail;
        }
}

static void evdev_queue_syn_dropped(struct evdev_client *client)
{
        unsigned long flags;

        spin_lock_irqsave(&client->buffer_lock, flags);
        __evdev_queue_syn_dropped(client);
        spin_unlock_irqrestore(&client->buffer_lock, flags);
}

static int evdev_set_clk_type(struct evdev_client *client, unsigned int clkid)
{
        unsigned long flags;
        enum input_clock_type clk_type;

        switch (clkid) {

        case CLOCK_REALTIME:
                clk_type = INPUT_CLK_REAL;
                break;
        case CLOCK_MONOTONIC:
                clk_type = INPUT_CLK_MONO;
                break;
        case CLOCK_BOOTTIME:
                clk_type = INPUT_CLK_BOOT;
                break;
        default:
                return -EINVAL;
        }

        if (client->clk_type != clk_type) {
                client->clk_type = clk_type;

                /*
                 * Flush pending events and queue SYN_DROPPED event,
                 * but only if the queue is not empty.
                 */
                spin_lock_irqsave(&client->buffer_lock, flags);

                if (client->head != client->tail) {
                        client->packet_head = client->head = client->tail;
                        __evdev_queue_syn_dropped(client);
                }

                spin_unlock_irqrestore(&client->buffer_lock, flags);
        }

        return 0;
}

static void __pass_event(struct evdev_client *client,
                         const struct input_event *event)
{
        client->buffer[client->head++] = *event;
        client->head &= client->bufsize - 1;

        if (unlikely(client->head == client->tail)) {
                /*
                 * This effectively "drops" all unconsumed events, leaving
                 * EV_SYN/SYN_DROPPED plus the newest event in the queue.
                 */
                client->tail = (client->head - 2) & (client->bufsize - 1);

                client->buffer[client->tail] = (struct input_event) {
                        .input_event_sec = event->input_event_sec,
                        .input_event_usec = event->input_event_usec,
                        .type = EV_SYN,
                        .code = SYN_DROPPED,
                        .value = 0,
                };

                client->packet_head = client->tail;
        }

        if (event->type == EV_SYN && event->code == SYN_REPORT) {
                client->packet_head = client->head;
                kill_fasync(&client->fasync, SIGIO, POLL_IN);
        }
}

static void evdev_pass_values(struct evdev_client *client,
                        const struct input_value *vals, unsigned int count,
                        ktime_t *ev_time)
{
        const struct input_value *v;
        struct input_event event;
        struct timespec64 ts;
        bool wakeup = false;

        if (client->revoked)
                return;

        ts = ktime_to_timespec64(ev_time[client->clk_type]);
        event.input_event_sec = ts.tv_sec;
        event.input_event_usec = ts.tv_nsec / NSEC_PER_USEC;

        /* Interrupts are disabled, just acquire the lock. */
        spin_lock(&client->buffer_lock);

        for (v = vals; v != vals + count; v++) {
                if (__evdev_is_filtered(client, v->type, v->code))
                        continue;

                if (v->type == EV_SYN && v->code == SYN_REPORT) {
                        /* drop empty SYN_REPORT */
                        if (client->packet_head == client->head)
                                continue;

                        wakeup = true;
                }

                event.type = v->type;
                event.code = v->code;
                event.value = v->value;
                __pass_event(client, &event);
        }

        spin_unlock(&client->buffer_lock);

        if (wakeup)
                wake_up_interruptible_poll(&client->wait,
                        EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM);
}

/*
 * Pass incoming events to all connected clients.
 */
static void evdev_events(struct input_handle *handle,
                         const struct input_value *vals, unsigned int count)
{
        struct evdev *evdev = handle->private;
        struct evdev_client *client;
        ktime_t *ev_time = input_get_timestamp(handle->dev);

        rcu_read_lock();

        client = rcu_dereference(evdev->grab);

        if (client)
                evdev_pass_values(client, vals, count, ev_time);
        else
                list_for_each_entry_rcu(client, &evdev->client_list, node)
                        evdev_pass_values(client, vals, count, ev_time);

        rcu_read_unlock();
}

/*
 * Pass incoming event to all connected clients.
 */
static void evdev_event(struct input_handle *handle,
                        unsigned int type, unsigned int code, int value)
{
        struct input_value vals[] = { { type, code, value } };

        evdev_events(handle, vals, 1);
}

static int evdev_fasync(int fd, struct file *file, int on)
{
        struct evdev_client *client = file->private_data;

        return fasync_helper(fd, file, on, &client->fasync);
}

static void evdev_free(struct device *dev)
{
        struct evdev *evdev = container_of(dev, struct evdev, dev);

        input_put_device(evdev->handle.dev);
        kfree(evdev);
}

/*
 * Grabs an event device (along with underlying input device).
 * This function is called with evdev->mutex taken.
 */
static int evdev_grab(struct evdev *evdev, struct evdev_client *client)
{
        int error;

        if (evdev->grab)
                return -EBUSY;

        error = input_grab_device(&evdev->handle);
        if (error)
                return error;

        rcu_assign_pointer(evdev->grab, client);

        return 0;
}

static int evdev_ungrab(struct evdev *evdev, struct evdev_client *client)
{
        struct evdev_client *grab = rcu_dereference_protected(evdev->grab,
                                        lockdep_is_held(&evdev->mutex));

        if (grab != client)
                return  -EINVAL;

        rcu_assign_pointer(evdev->grab, NULL);
        synchronize_rcu();
        input_release_device(&evdev->handle);

        return 0;
}

static void evdev_attach_client(struct evdev *evdev,
                                struct evdev_client *client)
{
        spin_lock(&evdev->client_lock);
        list_add_tail_rcu(&client->node, &evdev->client_list);
        spin_unlock(&evdev->client_lock);
}

static void evdev_detach_client(struct evdev *evdev,
                                struct evdev_client *client)
{
        spin_lock(&evdev->client_lock);
        list_del_rcu(&client->node);
        spin_unlock(&evdev->client_lock);
        synchronize_rcu();
}

static int evdev_open_device(struct evdev *evdev)
{
        int retval;

        retval = mutex_lock_interruptible(&evdev->mutex);
        if (retval)
                return retval;

        if (!evdev->exist)
                retval = -ENODEV;
        else if (!evdev->open++) {
                retval = input_open_device(&evdev->handle);
                if (retval)
                        evdev->open--;
        }

        mutex_unlock(&evdev->mutex);
        return retval;
}

static void evdev_close_device(struct evdev *evdev)
{
        mutex_lock(&evdev->mutex);

        if (evdev->exist && !--evdev->open)
                input_close_device(&evdev->handle);

        mutex_unlock(&evdev->mutex);
}

/*
 * Wake up users waiting for IO so they can disconnect from
 * dead device.
 */
static void evdev_hangup(struct evdev *evdev)
{
        struct evdev_client *client;

        spin_lock(&evdev->client_lock);
        list_for_each_entry(client, &evdev->client_list, node) {
                kill_fasync(&client->fasync, SIGIO, POLL_HUP);
                wake_up_interruptible_poll(&client->wait, EPOLLHUP | EPOLLERR);
        }
        spin_unlock(&evdev->client_lock);
}

static int evdev_release(struct inode *inode, struct file *file)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        unsigned int i;

        mutex_lock(&evdev->mutex);

        if (evdev->exist && !client->revoked)
                input_flush_device(&evdev->handle, file);

        evdev_ungrab(evdev, client);
        mutex_unlock(&evdev->mutex);

        evdev_detach_client(evdev, client);

        for (i = 0; i < EV_CNT; ++i)
                bitmap_free(client->evmasks[i]);

        kvfree(client);

        evdev_close_device(evdev);

        return 0;
}

static unsigned int evdev_compute_buffer_size(struct input_dev *dev)
{
        unsigned int n_events =
                max(dev->hint_events_per_packet * EVDEV_BUF_PACKETS,
                    EVDEV_MIN_BUFFER_SIZE);

        return roundup_pow_of_two(n_events);
}

static int evdev_open(struct inode *inode, struct file *file)
{
        struct evdev *evdev = container_of(inode->i_cdev, struct evdev, cdev);
        unsigned int bufsize = evdev_compute_buffer_size(evdev->handle.dev);
        struct evdev_client *client;
        int error;

        client = kvzalloc(struct_size(client, buffer, bufsize), GFP_KERNEL);
        if (!client)
                return -ENOMEM;

        init_waitqueue_head(&client->wait);
        client->bufsize = bufsize;
        spin_lock_init(&client->buffer_lock);
        client->evdev = evdev;
        evdev_attach_client(evdev, client);

        error = evdev_open_device(evdev);
        if (error)
                goto err_free_client;

        file->private_data = client;
        stream_open(inode, file);

        return 0;

 err_free_client:
        evdev_detach_client(evdev, client);
        kvfree(client);
        return error;
}

static ssize_t evdev_write(struct file *file, const char __user *buffer,
                           size_t count, loff_t *ppos)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        struct input_event event;
        int retval = 0;

        if (count != 0 && count < input_event_size())
                return -EINVAL;

        retval = mutex_lock_interruptible(&evdev->mutex);
        if (retval)
                return retval;

        if (!evdev->exist || client->revoked) {
                retval = -ENODEV;
                goto out;
        }

        while (retval + input_event_size() <= count) {

                if (input_event_from_user(buffer + retval, &event)) {
                        retval = -EFAULT;
                        goto out;
                }
                retval += input_event_size();

                input_inject_event(&evdev->handle,
                                   event.type, event.code, event.value);
                cond_resched();
        }

 out:
        mutex_unlock(&evdev->mutex);
        return retval;
}

static int evdev_fetch_next_event(struct evdev_client *client,
                                  struct input_event *event)
{
        int have_event;

        spin_lock_irq(&client->buffer_lock);

        have_event = client->packet_head != client->tail;
        if (have_event) {
                *event = client->buffer[client->tail++];
                client->tail &= client->bufsize - 1;
        }

        spin_unlock_irq(&client->buffer_lock);

        return have_event;
}

static ssize_t evdev_read(struct file *file, char __user *buffer,
                          size_t count, loff_t *ppos)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        struct input_event event;
        size_t read = 0;
        int error;

        if (count != 0 && count < input_event_size())
                return -EINVAL;

        for (;;) {
                if (!evdev->exist || client->revoked)
                        return -ENODEV;

                if (client->packet_head == client->tail &&
                    (file->f_flags & O_NONBLOCK))
                        return -EAGAIN;

                /*
                 * count == 0 is special - no IO is done but we check
                 * for error conditions (see above).
                 */
                if (count == 0)
                        break;

                while (read + input_event_size() <= count &&
                       evdev_fetch_next_event(client, &event)) {

                        if (input_event_to_user(buffer + read, &event))
                                return -EFAULT;

                        read += input_event_size();
                }

                if (read)
                        break;

                if (!(file->f_flags & O_NONBLOCK)) {
                        error = wait_event_interruptible(client->wait,
                                        client->packet_head != client->tail ||
                                        !evdev->exist || client->revoked);
                        if (error)
                                return error;
                }
        }

        return read;
}

/* No kernel lock - fine */
static __poll_t evdev_poll(struct file *file, poll_table *wait)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        __poll_t mask;

        poll_wait(file, &client->wait, wait);

        if (evdev->exist && !client->revoked)
                mask = EPOLLOUT | EPOLLWRNORM;
        else
                mask = EPOLLHUP | EPOLLERR;

        if (client->packet_head != client->tail)
                mask |= EPOLLIN | EPOLLRDNORM;

        return mask;
}

#ifdef CONFIG_COMPAT

#define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
#define BITS_TO_LONGS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)

#ifdef __BIG_ENDIAN
static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                        unsigned int maxlen, void __user *p, int compat)
{
        int len, i;

        if (compat) {
                len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
                if (len > maxlen)
                        len = maxlen;

                for (i = 0; i < len / sizeof(compat_long_t); i++)
                        if (copy_to_user((compat_long_t __user *) p + i,
                                         (compat_long_t *) bits +
                                                i + 1 - ((i % 2) << 1),
                                         sizeof(compat_long_t)))
                                return -EFAULT;
        } else {
                len = BITS_TO_LONGS(maxbit) * sizeof(long);
                if (len > maxlen)
                        len = maxlen;

                if (copy_to_user(p, bits, len))
                        return -EFAULT;
        }

        return len;
}

static int bits_from_user(unsigned long *bits, unsigned int maxbit,
                          unsigned int maxlen, const void __user *p, int compat)
{
        int len, i;

        if (compat) {
                if (maxlen % sizeof(compat_long_t))
                        return -EINVAL;

                len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
                if (len > maxlen)
                        len = maxlen;

                for (i = 0; i < len / sizeof(compat_long_t); i++)
                        if (copy_from_user((compat_long_t *) bits +
                                                i + 1 - ((i % 2) << 1),
                                           (compat_long_t __user *) p + i,
                                           sizeof(compat_long_t)))
                                return -EFAULT;
                if (i % 2)
                        *((compat_long_t *) bits + i - 1) = 0;

        } else {
                if (maxlen % sizeof(long))
                        return -EINVAL;

                len = BITS_TO_LONGS(maxbit) * sizeof(long);
                if (len > maxlen)
                        len = maxlen;

                if (copy_from_user(bits, p, len))
                        return -EFAULT;
        }

        return len;
}

#else

static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                        unsigned int maxlen, void __user *p, int compat)
{
        int len = compat ?
                        BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t) :
                        BITS_TO_LONGS(maxbit) * sizeof(long);

        if (len > maxlen)
                len = maxlen;

        return copy_to_user(p, bits, len) ? -EFAULT : len;
}

static int bits_from_user(unsigned long *bits, unsigned int maxbit,
                          unsigned int maxlen, const void __user *p, int compat)
{
        size_t chunk_size = compat ? sizeof(compat_long_t) : sizeof(long);
        int len;

        if (maxlen % chunk_size)
                return -EINVAL;

        len = compat ? BITS_TO_LONGS_COMPAT(maxbit) : BITS_TO_LONGS(maxbit);
        len *= chunk_size;
        if (len > maxlen)
                len = maxlen;

        return copy_from_user(bits, p, len) ? -EFAULT : len;
}

#endif /* __BIG_ENDIAN */

#else

static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                        unsigned int maxlen, void __user *p, int compat)
{
        int len = BITS_TO_LONGS(maxbit) * sizeof(long);

        if (len > maxlen)
                len = maxlen;

        return copy_to_user(p, bits, len) ? -EFAULT : len;
}

static int bits_from_user(unsigned long *bits, unsigned int maxbit,
                          unsigned int maxlen, const void __user *p, int compat)
{
        int len;

        if (maxlen % sizeof(long))
                return -EINVAL;

        len = BITS_TO_LONGS(maxbit) * sizeof(long);
        if (len > maxlen)
                len = maxlen;

        return copy_from_user(bits, p, len) ? -EFAULT : len;
}

#endif /* CONFIG_COMPAT */

static int str_to_user(const char *str, unsigned int maxlen, void __user *p)
{
        int len;

        if (!str)
                return -ENOENT;

        len = strlen(str) + 1;
        if (len > maxlen)
                len = maxlen;

        return copy_to_user(p, str, len) ? -EFAULT : len;
}

static int handle_eviocgbit(struct input_dev *dev,
                            unsigned int type, unsigned int size,
                            void __user *p, int compat_mode)
{
        unsigned long *bits;
        int len;

        switch (type) {

        case      0: bits = dev->evbit;  len = EV_MAX;  break;
        case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
        case EV_REL: bits = dev->relbit; len = REL_MAX; break;
        case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
        case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
        case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
        case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
        case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
        case EV_SW:  bits = dev->swbit;  len = SW_MAX;  break;
        default: return -EINVAL;
        }

        return bits_to_user(bits, len, size, p, compat_mode);
}

static int evdev_handle_get_keycode(struct input_dev *dev, void __user *p)
{
        struct input_keymap_entry ke = {
                .len        = sizeof(unsigned int),
                .flags        = 0,
        };
        int __user *ip = (int __user *)p;
        int error;

        /* legacy case */
        if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
                return -EFAULT;

        error = input_get_keycode(dev, &ke);
        if (error)
                return error;

        if (put_user(ke.keycode, ip + 1))
                return -EFAULT;

        return 0;
}

static int evdev_handle_get_keycode_v2(struct input_dev *dev, void __user *p)
{
        struct input_keymap_entry ke;
        int error;

        if (copy_from_user(&ke, p, sizeof(ke)))
                return -EFAULT;

        error = input_get_keycode(dev, &ke);
        if (error)
                return error;

        if (copy_to_user(p, &ke, sizeof(ke)))
                return -EFAULT;

        return 0;
}

static int evdev_handle_set_keycode(struct input_dev *dev, void __user *p)
{
        struct input_keymap_entry ke = {
                .len        = sizeof(unsigned int),
                .flags        = 0,
        };
        int __user *ip = (int __user *)p;

        if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
                return -EFAULT;

        if (get_user(ke.keycode, ip + 1))
                return -EFAULT;

        return input_set_keycode(dev, &ke);
}

static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p)
{
        struct input_keymap_entry ke;

        if (copy_from_user(&ke, p, sizeof(ke)))
                return -EFAULT;

        if (ke.len > sizeof(ke.scancode))
                return -EINVAL;

        return input_set_keycode(dev, &ke);
}

/*
 * If we transfer state to the user, we should flush all pending events
 * of the same type from the client's queue. Otherwise, they might end up
 * with duplicate events, which can screw up client's state tracking.
 * If bits_to_user fails after flushing the queue, we queue a SYN_DROPPED
 * event so user-space will notice missing events.
 *
 * LOCKING:
 * We need to take event_lock before buffer_lock to avoid dead-locks. But we
 * need the even_lock only to guarantee consistent state. We can safely release
 * it while flushing the queue. This allows input-core to handle filters while
 * we flush the queue.
 */
static int evdev_handle_get_val(struct evdev_client *client,
                                struct input_dev *dev, unsigned int type,
                                unsigned long *bits, unsigned int maxbit,
                                unsigned int maxlen, void __user *p,
                                int compat)
{
        int ret;
        unsigned long *mem;

        mem = bitmap_alloc(maxbit, GFP_KERNEL);
        if (!mem)
                return -ENOMEM;

        spin_lock_irq(&dev->event_lock);
        spin_lock(&client->buffer_lock);

        bitmap_copy(mem, bits, maxbit);

        spin_unlock(&dev->event_lock);

        __evdev_flush_queue(client, type);

        spin_unlock_irq(&client->buffer_lock);

        ret = bits_to_user(mem, maxbit, maxlen, p, compat);
        if (ret < 0)
                evdev_queue_syn_dropped(client);

        bitmap_free(mem);

        return ret;
}

static int evdev_handle_mt_request(struct input_dev *dev,
                                   unsigned int size,
                                   int __user *ip)
{
        const struct input_mt *mt = dev->mt;
        unsigned int code;
        int max_slots;
        int i;

        if (get_user(code, &ip[0]))
                return -EFAULT;
        if (!mt || !input_is_mt_value(code))
                return -EINVAL;

        max_slots = (size - sizeof(__u32)) / sizeof(__s32);
        for (i = 0; i < mt->num_slots && i < max_slots; i++) {
                int value = input_mt_get_value(&mt->slots[i], code);
                if (put_user(value, &ip[1 + i]))
                        return -EFAULT;
        }

        return 0;
}

static int evdev_revoke(struct evdev *evdev, struct evdev_client *client,
                        struct file *file)
{
        client->revoked = true;
        evdev_ungrab(evdev, client);
        input_flush_device(&evdev->handle, file);
        wake_up_interruptible_poll(&client->wait, EPOLLHUP | EPOLLERR);

        return 0;
}

/* must be called with evdev-mutex held */
static int evdev_set_mask(struct evdev_client *client,
                          unsigned int type,
                          const void __user *codes,
                          u32 codes_size,
                          int compat)
{
        unsigned long flags, *mask, *oldmask;
        size_t cnt;
        int error;

        /* we allow unknown types and 'codes_size > size' for forward-compat */
        cnt = evdev_get_mask_cnt(type);
        if (!cnt)
                return 0;

        mask = bitmap_zalloc(cnt, GFP_KERNEL);
        if (!mask)
                return -ENOMEM;

        error = bits_from_user(mask, cnt - 1, codes_size, codes, compat);
        if (error < 0) {
                bitmap_free(mask);
                return error;
        }

        spin_lock_irqsave(&client->buffer_lock, flags);
        oldmask = client->evmasks[type];
        client->evmasks[type] = mask;
        spin_unlock_irqrestore(&client->buffer_lock, flags);

        bitmap_free(oldmask);

        return 0;
}

/* must be called with evdev-mutex held */
static int evdev_get_mask(struct evdev_client *client,
                          unsigned int type,
                          void __user *codes,
                          u32 codes_size,
                          int compat)
{
        unsigned long *mask;
        size_t cnt, size, xfer_size;
        int i;
        int error;

        /* we allow unknown types and 'codes_size > size' for forward-compat */
        cnt = evdev_get_mask_cnt(type);
        size = sizeof(unsigned long) * BITS_TO_LONGS(cnt);
        xfer_size = min_t(size_t, codes_size, size);

        if (cnt > 0) {
                mask = client->evmasks[type];
                if (mask) {
                        error = bits_to_user(mask, cnt - 1,
                                             xfer_size, codes, compat);
                        if (error < 0)
                                return error;
                } else {
                        /* fake mask with all bits set */
                        for (i = 0; i < xfer_size; i++)
                                if (put_user(0xffU, (u8 __user *)codes + i))
                                        return -EFAULT;
                }
        }

        if (xfer_size < codes_size)
                if (clear_user(codes + xfer_size, codes_size - xfer_size))
                        return -EFAULT;

        return 0;
}

static long evdev_do_ioctl(struct file *file, unsigned int cmd,
                           void __user *p, int compat_mode)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        struct input_dev *dev = evdev->handle.dev;
        struct input_absinfo abs;
        struct input_mask mask;
        struct ff_effect effect;
        int __user *ip = (int __user *)p;
        unsigned int i, t, u, v;
        unsigned int size;
        int error;

        /* First we check for fixed-length commands */
        switch (cmd) {

        case EVIOCGVERSION:
                return put_user(EV_VERSION, ip);

        case EVIOCGID:
                if (copy_to_user(p, &dev->id, sizeof(struct input_id)))
                        return -EFAULT;
                return 0;

        case EVIOCGREP:
                if (!test_bit(EV_REP, dev->evbit))
                        return -ENOSYS;
                if (put_user(dev->rep[REP_DELAY], ip))
                        return -EFAULT;
                if (put_user(dev->rep[REP_PERIOD], ip + 1))
                        return -EFAULT;
                return 0;

        case EVIOCSREP:
                if (!test_bit(EV_REP, dev->evbit))
                        return -ENOSYS;
                if (get_user(u, ip))
                        return -EFAULT;
                if (get_user(v, ip + 1))
                        return -EFAULT;

                input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u);
                input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v);

                return 0;

        case EVIOCRMFF:
                return input_ff_erase(dev, (int)(unsigned long) p, file);

        case EVIOCGEFFECTS:
                i = test_bit(EV_FF, dev->evbit) ?
                                dev->ff->max_effects : 0;
                if (put_user(i, ip))
                        return -EFAULT;
                return 0;

        case EVIOCGRAB:
                if (p)
                        return evdev_grab(evdev, client);
                else
                        return evdev_ungrab(evdev, client);

        case EVIOCREVOKE:
                if (p)
                        return -EINVAL;
                else
                        return evdev_revoke(evdev, client, file);

        case EVIOCGMASK: {
                void __user *codes_ptr;

                if (copy_from_user(&mask, p, sizeof(mask)))
                        return -EFAULT;

                codes_ptr = (void __user *)(unsigned long)mask.codes_ptr;
                return evdev_get_mask(client,
                                      mask.type, codes_ptr, mask.codes_size,
                                      compat_mode);
        }

        case EVIOCSMASK: {
                const void __user *codes_ptr;

                if (copy_from_user(&mask, p, sizeof(mask)))
                        return -EFAULT;

                codes_ptr = (const void __user *)(unsigned long)mask.codes_ptr;
                return evdev_set_mask(client,
                                      mask.type, codes_ptr, mask.codes_size,
                                      compat_mode);
        }

        case EVIOCSCLOCKID:
                if (copy_from_user(&i, p, sizeof(unsigned int)))
                        return -EFAULT;

                return evdev_set_clk_type(client, i);

        case EVIOCGKEYCODE:
                return evdev_handle_get_keycode(dev, p);

        case EVIOCSKEYCODE:
                return evdev_handle_set_keycode(dev, p);

        case EVIOCGKEYCODE_V2:
                return evdev_handle_get_keycode_v2(dev, p);

        case EVIOCSKEYCODE_V2:
                return evdev_handle_set_keycode_v2(dev, p);
        }

        size = _IOC_SIZE(cmd);

        /* Now check variable-length commands */
#define EVIOC_MASK_SIZE(nr)        ((nr) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
        switch (EVIOC_MASK_SIZE(cmd)) {

        case EVIOCGPROP(0):
                return bits_to_user(dev->propbit, INPUT_PROP_MAX,
                                    size, p, compat_mode);

        case EVIOCGMTSLOTS(0):
                return evdev_handle_mt_request(dev, size, ip);

        case EVIOCGKEY(0):
                return evdev_handle_get_val(client, dev, EV_KEY, dev->key,
                                            KEY_MAX, size, p, compat_mode);

        case EVIOCGLED(0):
                return evdev_handle_get_val(client, dev, EV_LED, dev->led,
                                            LED_MAX, size, p, compat_mode);

        case EVIOCGSND(0):
                return evdev_handle_get_val(client, dev, EV_SND, dev->snd,
                                            SND_MAX, size, p, compat_mode);

        case EVIOCGSW(0):
                return evdev_handle_get_val(client, dev, EV_SW, dev->sw,
                                            SW_MAX, size, p, compat_mode);

        case EVIOCGNAME(0):
                return str_to_user(dev->name, size, p);

        case EVIOCGPHYS(0):
                return str_to_user(dev->phys, size, p);

        case EVIOCGUNIQ(0):
                return str_to_user(dev->uniq, size, p);

        case EVIOC_MASK_SIZE(EVIOCSFF):
                if (input_ff_effect_from_user(p, size, &effect))
                        return -EFAULT;

                error = input_ff_upload(dev, &effect, file);
                if (error)
                        return error;

                if (put_user(effect.id, &(((struct ff_effect __user *)p)->id)))
                        return -EFAULT;

                return 0;
        }

        /* Multi-number variable-length handlers */
        if (_IOC_TYPE(cmd) != 'E')
                return -EINVAL;

        if (_IOC_DIR(cmd) == _IOC_READ) {

                if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0, 0)))
                        return handle_eviocgbit(dev,
                                                _IOC_NR(cmd) & EV_MAX, size,
                                                p, compat_mode);

                if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {

                        if (!dev->absinfo)
                                return -EINVAL;

                        t = _IOC_NR(cmd) & ABS_MAX;
                        abs = dev->absinfo[t];

                        if (copy_to_user(p, &abs, min_t(size_t,
                                        size, sizeof(struct input_absinfo))))
                                return -EFAULT;

                        return 0;
                }
        }

        if (_IOC_DIR(cmd) == _IOC_WRITE) {

                if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {

                        if (!dev->absinfo)
                                return -EINVAL;

                        t = _IOC_NR(cmd) & ABS_MAX;

                        if (copy_from_user(&abs, p, min_t(size_t,
                                        size, sizeof(struct input_absinfo))))
                                return -EFAULT;

                        if (size < sizeof(struct input_absinfo))
                                abs.resolution = 0;

                        /* We can't change number of reserved MT slots */
                        if (t == ABS_MT_SLOT)
                                return -EINVAL;

                        /*
                         * Take event lock to ensure that we are not
                         * changing device parameters in the middle
                         * of event.
                         */
                        spin_lock_irq(&dev->event_lock);
                        dev->absinfo[t] = abs;
                        spin_unlock_irq(&dev->event_lock);

                        return 0;
                }
        }

        return -EINVAL;
}

static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
                                void __user *p, int compat_mode)
{
        struct evdev_client *client = file->private_data;
        struct evdev *evdev = client->evdev;
        int retval;

        retval = mutex_lock_interruptible(&evdev->mutex);
        if (retval)
                return retval;

        if (!evdev->exist || client->revoked) {
                retval = -ENODEV;
                goto out;
        }

        retval = evdev_do_ioctl(file, cmd, p, compat_mode);

 out:
        mutex_unlock(&evdev->mutex);
        return retval;
}

static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        return evdev_ioctl_handler(file, cmd, (void __user *)arg, 0);
}

#ifdef CONFIG_COMPAT
static long evdev_ioctl_compat(struct file *file,
                                unsigned int cmd, unsigned long arg)
{
        return evdev_ioctl_handler(file, cmd, compat_ptr(arg), 1);
}
#endif

static const struct file_operations evdev_fops = {
        .owner                = THIS_MODULE,
        .read                = evdev_read,
        .write                = evdev_write,
        .poll                = evdev_poll,
        .open                = evdev_open,
        .release        = evdev_release,
        .unlocked_ioctl        = evdev_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl        = evdev_ioctl_compat,
#endif
        .fasync                = evdev_fasync,
        .llseek                = no_llseek,
};

/*
 * Mark device non-existent. This disables writes, ioctls and
 * prevents new users from opening the device. Already posted
 * blocking reads will stay, however new ones will fail.
 */
static void evdev_mark_dead(struct evdev *evdev)
{
        mutex_lock(&evdev->mutex);
        evdev->exist = false;
        mutex_unlock(&evdev->mutex);
}

static void evdev_cleanup(struct evdev *evdev)
{
        struct input_handle *handle = &evdev->handle;

        evdev_mark_dead(evdev);
        evdev_hangup(evdev);

        /* evdev is marked dead so no one else accesses evdev->open */
        if (evdev->open) {
                input_flush_device(handle, NULL);
                input_close_device(handle);
        }
}

/*
 * Create new evdev device. Note that input core serializes calls
 * to connect and disconnect.
 */
static int evdev_connect(struct input_handler *handler, struct input_dev *dev,
                         const struct input_device_id *id)
{
        struct evdev *evdev;
        int minor;
        int dev_no;
        int error;

        minor = input_get_new_minor(EVDEV_MINOR_BASE, EVDEV_MINORS, true);
        if (minor < 0) {
                error = minor;
                pr_err("failed to reserve new minor: %d\n", error);
                return error;
        }

        evdev = kzalloc(sizeof(struct evdev), GFP_KERNEL);
        if (!evdev) {
                error = -ENOMEM;
                goto err_free_minor;
        }

        INIT_LIST_HEAD(&evdev->client_list);
        spin_lock_init(&evdev->client_lock);
        mutex_init(&evdev->mutex);
        evdev->exist = true;

        dev_no = minor;
        /* Normalize device number if it falls into legacy range */
        if (dev_no < EVDEV_MINOR_BASE + EVDEV_MINORS)
                dev_no -= EVDEV_MINOR_BASE;
        dev_set_name(&evdev->dev, "event%d", dev_no);

        evdev->handle.dev = input_get_device(dev);
        evdev->handle.name = dev_name(&evdev->dev);
        evdev->handle.handler = handler;
        evdev->handle.private = evdev;

        evdev->dev.devt = MKDEV(INPUT_MAJOR, minor);
        evdev->dev.class = &input_class;
        evdev->dev.parent = &dev->dev;
        evdev->dev.release = evdev_free;
        device_initialize(&evdev->dev);

        error = input_register_handle(&evdev->handle);
        if (error)
                goto err_free_evdev;

        cdev_init(&evdev->cdev, &evdev_fops);

        error = cdev_device_add(&evdev->cdev, &evdev->dev);
        if (error)
                goto err_cleanup_evdev;

        return 0;

 err_cleanup_evdev:
        evdev_cleanup(evdev);
        input_unregister_handle(&evdev->handle);
 err_free_evdev:
        put_device(&evdev->dev);
 err_free_minor:
        input_free_minor(minor);
        return error;
}

static void evdev_disconnect(struct input_handle *handle)
{
        struct evdev *evdev = handle->private;

        cdev_device_del(&evdev->cdev, &evdev->dev);
        evdev_cleanup(evdev);
        input_free_minor(MINOR(evdev->dev.devt));
        input_unregister_handle(handle);
        put_device(&evdev->dev);
}

static const struct input_device_id evdev_ids[] = {
        { .driver_info = 1 },        /* Matches all devices */
        { },                        /* Terminating zero entry */
};

MODULE_DEVICE_TABLE(input, evdev_ids);

static struct input_handler evdev_handler = {
        .event                = evdev_event,
        .events                = evdev_events,
        .connect        = evdev_connect,
        .disconnect        = evdev_disconnect,
        .legacy_minors        = true,
        .minor                = EVDEV_MINOR_BASE,
        .name                = "evdev",
        .id_table        = evdev_ids,
};

static int __init evdev_init(void)
{
        return input_register_handler(&evdev_handler);
}

static void __exit evdev_exit(void)
{
        input_unregister_handler(&evdev_handler);
}

module_init(evdev_init);
module_exit(evdev_exit);

MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
MODULE_DESCRIPTION("Input driver event char devices");
MODULE_LICENSE("GPL");













































































































































































































































































    6 


    6 

    6 

    6 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                This file implements the various access functions for the
 *                PROC file system.  This is very similar to the IPv4 version,
 *                except it reports the sockets in the INET6 address family.
 *
 * Authors:        David S. Miller (davem@caip.rutgers.edu)
 *                YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
 */
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stddef.h>
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/transp_v6.h>
#include <net/ipv6.h>

#define MAX4(a, b, c, d) \
        max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
                        IPSTATS_MIB_MAX, ICMP_MIB_MAX)

static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
        struct net *net = seq->private;

        seq_printf(seq, "TCP6: inuse %d\n",
                       sock_prot_inuse_get(net, &tcpv6_prot));
        seq_printf(seq, "UDP6: inuse %d\n",
                       sock_prot_inuse_get(net, &udpv6_prot));
        seq_printf(seq, "UDPLITE6: inuse %d\n",
                        sock_prot_inuse_get(net, &udplitev6_prot));
        seq_printf(seq, "RAW6: inuse %d\n",
                       sock_prot_inuse_get(net, &rawv6_prot));
        seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
                   atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
                   frag_mem_limit(net->ipv6.fqdir));
        return 0;
}

static const struct snmp_mib snmp6_ipstats_list[] = {
/* ipv6 mib according to RFC 2465 */
        SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
        SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
        SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
        SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
        SNMP_MIB_ITEM("Ip6InAddrErrors", IPSTATS_MIB_INADDRERRORS),
        SNMP_MIB_ITEM("Ip6InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
        SNMP_MIB_ITEM("Ip6InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
        SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
        SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
        SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
        SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
        SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
        SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
        SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
        SNMP_MIB_ITEM("Ip6ReasmReqds", IPSTATS_MIB_REASMREQDS),
        SNMP_MIB_ITEM("Ip6ReasmOKs", IPSTATS_MIB_REASMOKS),
        SNMP_MIB_ITEM("Ip6ReasmFails", IPSTATS_MIB_REASMFAILS),
        SNMP_MIB_ITEM("Ip6FragOKs", IPSTATS_MIB_FRAGOKS),
        SNMP_MIB_ITEM("Ip6FragFails", IPSTATS_MIB_FRAGFAILS),
        SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
        SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
        SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
        SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS),
        SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS),
        SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
        SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
        SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
        SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
        /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
        SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
        SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
        SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
        SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
        SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS),
        SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp6_icmp6_list[] = {
/* icmpv6 mib according to RFC 2466 */
        SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
        SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
        SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
        SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
        SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
        SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST),
        SNMP_MIB_SENTINEL
};

/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
static const char *const icmp6type2name[256] = {
        [ICMPV6_DEST_UNREACH] = "DestUnreachs",
        [ICMPV6_PKT_TOOBIG] = "PktTooBigs",
        [ICMPV6_TIME_EXCEED] = "TimeExcds",
        [ICMPV6_PARAMPROB] = "ParmProblems",
        [ICMPV6_ECHO_REQUEST] = "Echos",
        [ICMPV6_ECHO_REPLY] = "EchoReplies",
        [ICMPV6_MGM_QUERY] = "GroupMembQueries",
        [ICMPV6_MGM_REPORT] = "GroupMembResponses",
        [ICMPV6_MGM_REDUCTION] = "GroupMembReductions",
        [ICMPV6_MLD2_REPORT] = "MLDv2Reports",
        [NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements",
        [NDISC_ROUTER_SOLICITATION] = "RouterSolicits",
        [NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements",
        [NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits",
        [NDISC_REDIRECT] = "Redirects",
};


static const struct snmp_mib snmp6_udp6_list[] = {
        SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
        SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
        SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
        SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
        SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
        SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
        SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
        SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI),
        SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS),
        SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp6_udplite6_list[] = {
        SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
        SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
        SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
        SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
        SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
        SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
        SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
        SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS),
        SNMP_MIB_SENTINEL
};

static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
{
        char name[32];
        int i;

        /* print by name -- deprecated items */
        for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
                int icmptype;
                const char *p;

                icmptype = i & 0xff;
                p = icmp6type2name[icmptype];
                if (!p)        /* don't print un-named types here */
                        continue;
                snprintf(name, sizeof(name), "Icmp6%s%s",
                        i & 0x100 ? "Out" : "In", p);
                seq_printf(seq, "%-32s\t%lu\n", name,
                           atomic_long_read(smib + i));
        }

        /* print by number (nonzero only) - ICMPMsgStat format */
        for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
                unsigned long val;

                val = atomic_long_read(smib + i);
                if (!val)
                        continue;
                snprintf(name, sizeof(name), "Icmp6%sType%u",
                        i & 0x100 ?  "Out" : "In", i & 0xff);
                seq_printf(seq, "%-32s\t%lu\n", name, val);
        }
}

/* can be called either with percpu mib (pcpumib != NULL),
 * or shared one (smib != NULL)
 */
static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
                                atomic_long_t *smib,
                                const struct snmp_mib *itemlist)
{
        unsigned long buff[SNMP_MIB_MAX];
        int i;

        if (pcpumib) {
                memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);

                snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
                for (i = 0; itemlist[i].name; i++)
                        seq_printf(seq, "%-32s\t%lu\n",
                                   itemlist[i].name, buff[i]);
        } else {
                for (i = 0; itemlist[i].name; i++)
                        seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
                                   atomic_long_read(smib + itemlist[i].entry));
        }
}

static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
                                  const struct snmp_mib *itemlist, size_t syncpoff)
{
        u64 buff64[SNMP_MIB_MAX];
        int i;

        memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);

        snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
        for (i = 0; itemlist[i].name; i++)
                seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
}

static int snmp6_seq_show(struct seq_file *seq, void *v)
{
        struct net *net = (struct net *)seq->private;

        snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
                            snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
        snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
                            NULL, snmp6_icmp6_list);
        snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
        snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
                            NULL, snmp6_udp6_list);
        snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
                            NULL, snmp6_udplite6_list);
        return 0;
}

static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
{
        struct inet6_dev *idev = (struct inet6_dev *)seq->private;

        seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
        snmp6_seq_show_item64(seq, idev->stats.ipv6,
                            snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
        snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
                            snmp6_icmp6_list);
        snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
        return 0;
}

int snmp6_register_dev(struct inet6_dev *idev)
{
        struct proc_dir_entry *p;
        struct net *net;

        if (!idev || !idev->dev)
                return -EINVAL;

        net = dev_net(idev->dev);
        if (!net->mib.proc_net_devsnmp6)
                return -ENOENT;

        p = proc_create_single_data(idev->dev->name, 0444,
                        net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev);
        if (!p)
                return -ENOMEM;

        idev->stats.proc_dir_entry = p;
        return 0;
}

int snmp6_unregister_dev(struct inet6_dev *idev)
{
        struct net *net = dev_net(idev->dev);
        if (!net->mib.proc_net_devsnmp6)
                return -ENOENT;
        if (!idev->stats.proc_dir_entry)
                return -EINVAL;
        proc_remove(idev->stats.proc_dir_entry);
        idev->stats.proc_dir_entry = NULL;
        return 0;
}

static int __net_init ipv6_proc_init_net(struct net *net)
{
        if (!proc_create_net_single("sockstat6", 0444, net->proc_net,
                        sockstat6_seq_show, NULL))
                return -ENOMEM;

        if (!proc_create_net_single("snmp6", 0444, net->proc_net,
                        snmp6_seq_show, NULL))
                goto proc_snmp6_fail;

        net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
        if (!net->mib.proc_net_devsnmp6)
                goto proc_dev_snmp6_fail;
        return 0;

proc_dev_snmp6_fail:
        remove_proc_entry("snmp6", net->proc_net);
proc_snmp6_fail:
        remove_proc_entry("sockstat6", net->proc_net);
        return -ENOMEM;
}

static void __net_exit ipv6_proc_exit_net(struct net *net)
{
        remove_proc_entry("sockstat6", net->proc_net);
        remove_proc_entry("dev_snmp6", net->proc_net);
        remove_proc_entry("snmp6", net->proc_net);
}

static struct pernet_operations ipv6_proc_ops = {
        .init = ipv6_proc_init_net,
        .exit = ipv6_proc_exit_net,
};

int __init ipv6_misc_proc_init(void)
{
        return register_pernet_subsys(&ipv6_proc_ops);
}

void ipv6_misc_proc_exit(void)
{
        unregister_pernet_subsys(&ipv6_proc_ops);
}





































    2 






    2 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __VDSO_HELPERS_H
#define __VDSO_HELPERS_H

#ifndef __ASSEMBLY__

#include <vdso/datapage.h>

static __always_inline u32 vdso_read_begin(const struct vdso_data *vd)
{
        u32 seq;

        while (unlikely((seq = READ_ONCE(vd->seq)) & 1))
                cpu_relax();

        smp_rmb();
        return seq;
}

static __always_inline u32 vdso_read_retry(const struct vdso_data *vd,
                                           u32 start)
{
        u32 seq;

        smp_rmb();
        seq = READ_ONCE(vd->seq);
        return seq != start;
}

static __always_inline void vdso_write_begin(struct vdso_data *vd)
{
        /*
         * WRITE_ONCE() is required otherwise the compiler can validly tear
         * updates to vd[x].seq and it is possible that the value seen by the
         * reader is inconsistent.
         */
        WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
        WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
        smp_wmb();
}

static __always_inline void vdso_write_end(struct vdso_data *vd)
{
        smp_wmb();
        /*
         * WRITE_ONCE() is required otherwise the compiler can validly tear
         * updates to vd[x].seq and it is possible that the value seen by the
         * reader is inconsistent.
         */
        WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
        WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
}

#endif /* !__ASSEMBLY__ */

#endif /* __VDSO_HELPERS_H */





























































































































































  251 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/* SPDX-License-Identifier: GPL-2.0 */
/*  linux/include/linux/clockchips.h
 *
 *  This file contains the structure definitions for clockchips.
 *
 *  If you are not a clockchip, or the time of day code, you should
 *  not be including this file!
 */
#ifndef _LINUX_CLOCKCHIPS_H
#define _LINUX_CLOCKCHIPS_H

#ifdef CONFIG_GENERIC_CLOCKEVENTS

# include <linux/clocksource.h>
# include <linux/cpumask.h>
# include <linux/ktime.h>
# include <linux/notifier.h>

struct clock_event_device;
struct module;

/*
 * Possible states of a clock event device.
 *
 * DETACHED:        Device is not used by clockevents core. Initial state or can be
 *                reached from SHUTDOWN.
 * SHUTDOWN:        Device is powered-off. Can be reached from PERIODIC or ONESHOT.
 * PERIODIC:        Device is programmed to generate events periodically. Can be
 *                reached from DETACHED or SHUTDOWN.
 * ONESHOT:        Device is programmed to generate event only once. Can be reached
 *                from DETACHED or SHUTDOWN.
 * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily
 *                    stopped.
 */
enum clock_event_state {
        CLOCK_EVT_STATE_DETACHED,
        CLOCK_EVT_STATE_SHUTDOWN,
        CLOCK_EVT_STATE_PERIODIC,
        CLOCK_EVT_STATE_ONESHOT,
        CLOCK_EVT_STATE_ONESHOT_STOPPED,
};

/*
 * Clock event features
 */
# define CLOCK_EVT_FEAT_PERIODIC        0x000001
# define CLOCK_EVT_FEAT_ONESHOT                0x000002
# define CLOCK_EVT_FEAT_KTIME                0x000004

/*
 * x86(64) specific (mis)features:
 *
 * - Clockevent source stops in C3 State and needs broadcast support.
 * - Local APIC timer is used as a dummy device.
 */
# define CLOCK_EVT_FEAT_C3STOP                0x000008
# define CLOCK_EVT_FEAT_DUMMY                0x000010

/*
 * Core shall set the interrupt affinity dynamically in broadcast mode
 */
# define CLOCK_EVT_FEAT_DYNIRQ                0x000020
# define CLOCK_EVT_FEAT_PERCPU                0x000040

/*
 * Clockevent device is based on a hrtimer for broadcast
 */
# define CLOCK_EVT_FEAT_HRTIMER                0x000080

/**
 * struct clock_event_device - clock event device descriptor
 * @event_handler:        Assigned by the framework to be called by the low
 *                        level handler of the event source
 * @set_next_event:        set next event function using a clocksource delta
 * @set_next_ktime:        set next event function using a direct ktime value
 * @next_event:                local storage for the next event in oneshot mode
 * @max_delta_ns:        maximum delta value in ns
 * @min_delta_ns:        minimum delta value in ns
 * @mult:                nanosecond to cycles multiplier
 * @shift:                nanoseconds to cycles divisor (power of two)
 * @state_use_accessors:current state of the device, assigned by the core code
 * @features:                features
 * @retries:                number of forced programming retries
 * @set_state_periodic:        switch state to periodic
 * @set_state_oneshot:        switch state to oneshot
 * @set_state_oneshot_stopped: switch state to oneshot_stopped
 * @set_state_shutdown:        switch state to shutdown
 * @tick_resume:        resume clkevt device
 * @broadcast:                function to broadcast events
 * @min_delta_ticks:        minimum delta value in ticks stored for reconfiguration
 * @max_delta_ticks:        maximum delta value in ticks stored for reconfiguration
 * @name:                ptr to clock event name
 * @rating:                variable to rate clock event devices
 * @irq:                IRQ number (only for non CPU local devices)
 * @bound_on:                Bound on CPU
 * @cpumask:                cpumask to indicate for which CPUs this device works
 * @list:                list head for the management code
 * @owner:                module reference
 */
struct clock_event_device {
        void                        (*event_handler)(struct clock_event_device *);
        int                        (*set_next_event)(unsigned long evt, struct clock_event_device *);
        int                        (*set_next_ktime)(ktime_t expires, struct clock_event_device *);
        ktime_t                        next_event;
        u64                        max_delta_ns;
        u64                        min_delta_ns;
        u32                        mult;
        u32                        shift;
        enum clock_event_state        state_use_accessors;
        unsigned int                features;
        unsigned long                retries;

        int                        (*set_state_periodic)(struct clock_event_device *);
        int                        (*set_state_oneshot)(struct clock_event_device *);
        int                        (*set_state_oneshot_stopped)(struct clock_event_device *);
        int                        (*set_state_shutdown)(struct clock_event_device *);
        int                        (*tick_resume)(struct clock_event_device *);

        void                        (*broadcast)(const struct cpumask *mask);
        void                        (*suspend)(struct clock_event_device *);
        void                        (*resume)(struct clock_event_device *);
        unsigned long                min_delta_ticks;
        unsigned long                max_delta_ticks;

        const char                *name;
        int                        rating;
        int                        irq;
        int                        bound_on;
        const struct cpumask        *cpumask;
        struct list_head        list;
        struct module                *owner;
} ____cacheline_aligned;

/* Helpers to verify state of a clockevent device */
static inline bool clockevent_state_detached(struct clock_event_device *dev)
{
        return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED;
}

static inline bool clockevent_state_shutdown(struct clock_event_device *dev)
{
        return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN;
}

static inline bool clockevent_state_periodic(struct clock_event_device *dev)
{
        return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC;
}

static inline bool clockevent_state_oneshot(struct clock_event_device *dev)
{
        return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT;
}

static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev)
{
        return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED;
}

/*
 * Calculate a multiplication factor for scaled math, which is used to convert
 * nanoseconds based values to clock ticks:
 *
 * clock_ticks = (nanoseconds * factor) >> shift.
 *
 * div_sc is the rearranged equation to calculate a factor from a given clock
 * ticks / nanoseconds ratio:
 *
 * factor = (clock_ticks << shift) / nanoseconds
 */
static inline unsigned long
div_sc(unsigned long ticks, unsigned long nsec, int shift)
{
        u64 tmp = ((u64)ticks) << shift;

        do_div(tmp, nsec);

        return (unsigned long) tmp;
}

/* Clock event layer functions */
extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt);
extern void clockevents_register_device(struct clock_event_device *dev);
extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu);

extern void clockevents_config_and_register(struct clock_event_device *dev,
                                            u32 freq, unsigned long min_delta,
                                            unsigned long max_delta);

extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);

static inline void
clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec)
{
        return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec);
}

extern void clockevents_suspend(void);
extern void clockevents_resume(void);

# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
#  ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
extern void tick_broadcast(const struct cpumask *mask);
#  else
#   define tick_broadcast        NULL
#  endif
extern int tick_receive_broadcast(void);
# endif

# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void tick_setup_hrtimer_broadcast(void);
extern int tick_check_broadcast_expired(void);
# else
static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { }
# endif

#else /* !CONFIG_GENERIC_CLOCKEVENTS: */

static inline void clockevents_suspend(void) { }
static inline void clockevents_resume(void) { }
static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { }

#endif /* !CONFIG_GENERIC_CLOCKEVENTS */

#endif /* _LINUX_CLOCKCHIPS_H */




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  259 











  295 







































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * security/tomoyo/common.h
 *
 * Header file for TOMOYO.
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#ifndef _SECURITY_TOMOYO_COMMON_H
#define _SECURITY_TOMOYO_COMMON_H

#define pr_fmt(fmt) fmt

#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/kmod.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/list.h>
#include <linux/cred.h>
#include <linux/poll.h>
#include <linux/binfmts.h>
#include <linux/highmem.h>
#include <linux/net.h>
#include <linux/inet.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/un.h>
#include <linux/lsm_hooks.h>
#include <net/sock.h>
#include <net/af_unix.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/udp.h>

/********** Constants definitions. **********/

/*
 * TOMOYO uses this hash only when appending a string into the string
 * table. Frequency of appending strings is very low. So we don't need
 * large (e.g. 64k) hash size. 256 will be sufficient.
 */
#define TOMOYO_HASH_BITS  8
#define TOMOYO_MAX_HASH (1u<<TOMOYO_HASH_BITS)

/*
 * TOMOYO checks only SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_SEQPACKET.
 * Therefore, we don't need SOCK_MAX.
 */
#define TOMOYO_SOCK_MAX 6

#define TOMOYO_EXEC_TMPSIZE     4096

/* Garbage collector is trying to kfree() this element. */
#define TOMOYO_GC_IN_PROGRESS -1

/* Profile number is an integer between 0 and 255. */
#define TOMOYO_MAX_PROFILES 256

/* Group number is an integer between 0 and 255. */
#define TOMOYO_MAX_ACL_GROUPS 256

/* Index numbers for "struct tomoyo_condition". */
enum tomoyo_conditions_index {
        TOMOYO_TASK_UID,             /* current_uid()   */
        TOMOYO_TASK_EUID,            /* current_euid()  */
        TOMOYO_TASK_SUID,            /* current_suid()  */
        TOMOYO_TASK_FSUID,           /* current_fsuid() */
        TOMOYO_TASK_GID,             /* current_gid()   */
        TOMOYO_TASK_EGID,            /* current_egid()  */
        TOMOYO_TASK_SGID,            /* current_sgid()  */
        TOMOYO_TASK_FSGID,           /* current_fsgid() */
        TOMOYO_TASK_PID,             /* sys_getpid()   */
        TOMOYO_TASK_PPID,            /* sys_getppid()  */
        TOMOYO_EXEC_ARGC,            /* "struct linux_binprm *"->argc */
        TOMOYO_EXEC_ENVC,            /* "struct linux_binprm *"->envc */
        TOMOYO_TYPE_IS_SOCKET,       /* S_IFSOCK */
        TOMOYO_TYPE_IS_SYMLINK,      /* S_IFLNK */
        TOMOYO_TYPE_IS_FILE,         /* S_IFREG */
        TOMOYO_TYPE_IS_BLOCK_DEV,    /* S_IFBLK */
        TOMOYO_TYPE_IS_DIRECTORY,    /* S_IFDIR */
        TOMOYO_TYPE_IS_CHAR_DEV,     /* S_IFCHR */
        TOMOYO_TYPE_IS_FIFO,         /* S_IFIFO */
        TOMOYO_MODE_SETUID,          /* S_ISUID */
        TOMOYO_MODE_SETGID,          /* S_ISGID */
        TOMOYO_MODE_STICKY,          /* S_ISVTX */
        TOMOYO_MODE_OWNER_READ,      /* S_IRUSR */
        TOMOYO_MODE_OWNER_WRITE,     /* S_IWUSR */
        TOMOYO_MODE_OWNER_EXECUTE,   /* S_IXUSR */
        TOMOYO_MODE_GROUP_READ,      /* S_IRGRP */
        TOMOYO_MODE_GROUP_WRITE,     /* S_IWGRP */
        TOMOYO_MODE_GROUP_EXECUTE,   /* S_IXGRP */
        TOMOYO_MODE_OTHERS_READ,     /* S_IROTH */
        TOMOYO_MODE_OTHERS_WRITE,    /* S_IWOTH */
        TOMOYO_MODE_OTHERS_EXECUTE,  /* S_IXOTH */
        TOMOYO_EXEC_REALPATH,
        TOMOYO_SYMLINK_TARGET,
        TOMOYO_PATH1_UID,
        TOMOYO_PATH1_GID,
        TOMOYO_PATH1_INO,
        TOMOYO_PATH1_MAJOR,
        TOMOYO_PATH1_MINOR,
        TOMOYO_PATH1_PERM,
        TOMOYO_PATH1_TYPE,
        TOMOYO_PATH1_DEV_MAJOR,
        TOMOYO_PATH1_DEV_MINOR,
        TOMOYO_PATH2_UID,
        TOMOYO_PATH2_GID,
        TOMOYO_PATH2_INO,
        TOMOYO_PATH2_MAJOR,
        TOMOYO_PATH2_MINOR,
        TOMOYO_PATH2_PERM,
        TOMOYO_PATH2_TYPE,
        TOMOYO_PATH2_DEV_MAJOR,
        TOMOYO_PATH2_DEV_MINOR,
        TOMOYO_PATH1_PARENT_UID,
        TOMOYO_PATH1_PARENT_GID,
        TOMOYO_PATH1_PARENT_INO,
        TOMOYO_PATH1_PARENT_PERM,
        TOMOYO_PATH2_PARENT_UID,
        TOMOYO_PATH2_PARENT_GID,
        TOMOYO_PATH2_PARENT_INO,
        TOMOYO_PATH2_PARENT_PERM,
        TOMOYO_MAX_CONDITION_KEYWORD,
        TOMOYO_NUMBER_UNION,
        TOMOYO_NAME_UNION,
        TOMOYO_ARGV_ENTRY,
        TOMOYO_ENVP_ENTRY,
};


/* Index numbers for stat(). */
enum tomoyo_path_stat_index {
        /* Do not change this order. */
        TOMOYO_PATH1,
        TOMOYO_PATH1_PARENT,
        TOMOYO_PATH2,
        TOMOYO_PATH2_PARENT,
        TOMOYO_MAX_PATH_STAT
};

/* Index numbers for operation mode. */
enum tomoyo_mode_index {
        TOMOYO_CONFIG_DISABLED,
        TOMOYO_CONFIG_LEARNING,
        TOMOYO_CONFIG_PERMISSIVE,
        TOMOYO_CONFIG_ENFORCING,
        TOMOYO_CONFIG_MAX_MODE,
        TOMOYO_CONFIG_WANT_REJECT_LOG =  64,
        TOMOYO_CONFIG_WANT_GRANT_LOG  = 128,
        TOMOYO_CONFIG_USE_DEFAULT     = 255,
};

/* Index numbers for entry type. */
enum tomoyo_policy_id {
        TOMOYO_ID_GROUP,
        TOMOYO_ID_ADDRESS_GROUP,
        TOMOYO_ID_PATH_GROUP,
        TOMOYO_ID_NUMBER_GROUP,
        TOMOYO_ID_TRANSITION_CONTROL,
        TOMOYO_ID_AGGREGATOR,
        TOMOYO_ID_MANAGER,
        TOMOYO_ID_CONDITION,
        TOMOYO_ID_NAME,
        TOMOYO_ID_ACL,
        TOMOYO_ID_DOMAIN,
        TOMOYO_MAX_POLICY
};

/* Index numbers for domain's attributes. */
enum tomoyo_domain_info_flags_index {
        /* Quota warnning flag.   */
        TOMOYO_DIF_QUOTA_WARNED,
        /*
         * This domain was unable to create a new domain at
         * tomoyo_find_next_domain() because the name of the domain to be
         * created was too long or it could not allocate memory.
         * More than one process continued execve() without domain transition.
         */
        TOMOYO_DIF_TRANSITION_FAILED,
        TOMOYO_MAX_DOMAIN_INFO_FLAGS
};

/* Index numbers for audit type. */
enum tomoyo_grant_log {
        /* Follow profile's configuration. */
        TOMOYO_GRANTLOG_AUTO,
        /* Do not generate grant log. */
        TOMOYO_GRANTLOG_NO,
        /* Generate grant_log. */
        TOMOYO_GRANTLOG_YES,
};

/* Index numbers for group entries. */
enum tomoyo_group_id {
        TOMOYO_PATH_GROUP,
        TOMOYO_NUMBER_GROUP,
        TOMOYO_ADDRESS_GROUP,
        TOMOYO_MAX_GROUP
};

/* Index numbers for type of numeric values. */
enum tomoyo_value_type {
        TOMOYO_VALUE_TYPE_INVALID,
        TOMOYO_VALUE_TYPE_DECIMAL,
        TOMOYO_VALUE_TYPE_OCTAL,
        TOMOYO_VALUE_TYPE_HEXADECIMAL,
};

/* Index numbers for domain transition control keywords. */
enum tomoyo_transition_type {
        /* Do not change this order, */
        TOMOYO_TRANSITION_CONTROL_NO_RESET,
        TOMOYO_TRANSITION_CONTROL_RESET,
        TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE,
        TOMOYO_TRANSITION_CONTROL_INITIALIZE,
        TOMOYO_TRANSITION_CONTROL_NO_KEEP,
        TOMOYO_TRANSITION_CONTROL_KEEP,
        TOMOYO_MAX_TRANSITION_TYPE
};

/* Index numbers for Access Controls. */
enum tomoyo_acl_entry_type_index {
        TOMOYO_TYPE_PATH_ACL,
        TOMOYO_TYPE_PATH2_ACL,
        TOMOYO_TYPE_PATH_NUMBER_ACL,
        TOMOYO_TYPE_MKDEV_ACL,
        TOMOYO_TYPE_MOUNT_ACL,
        TOMOYO_TYPE_INET_ACL,
        TOMOYO_TYPE_UNIX_ACL,
        TOMOYO_TYPE_ENV_ACL,
        TOMOYO_TYPE_MANUAL_TASK_ACL,
};

/* Index numbers for access controls with one pathname. */
enum tomoyo_path_acl_index {
        TOMOYO_TYPE_EXECUTE,
        TOMOYO_TYPE_READ,
        TOMOYO_TYPE_WRITE,
        TOMOYO_TYPE_APPEND,
        TOMOYO_TYPE_UNLINK,
        TOMOYO_TYPE_GETATTR,
        TOMOYO_TYPE_RMDIR,
        TOMOYO_TYPE_TRUNCATE,
        TOMOYO_TYPE_SYMLINK,
        TOMOYO_TYPE_CHROOT,
        TOMOYO_TYPE_UMOUNT,
        TOMOYO_MAX_PATH_OPERATION
};

/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
enum tomoyo_memory_stat_type {
        TOMOYO_MEMORY_POLICY,
        TOMOYO_MEMORY_AUDIT,
        TOMOYO_MEMORY_QUERY,
        TOMOYO_MAX_MEMORY_STAT
};

enum tomoyo_mkdev_acl_index {
        TOMOYO_TYPE_MKBLOCK,
        TOMOYO_TYPE_MKCHAR,
        TOMOYO_MAX_MKDEV_OPERATION
};

/* Index numbers for socket operations. */
enum tomoyo_network_acl_index {
        TOMOYO_NETWORK_BIND,    /* bind() operation. */
        TOMOYO_NETWORK_LISTEN,  /* listen() operation. */
        TOMOYO_NETWORK_CONNECT, /* connect() operation. */
        TOMOYO_NETWORK_SEND,    /* send() operation. */
        TOMOYO_MAX_NETWORK_OPERATION
};

/* Index numbers for access controls with two pathnames. */
enum tomoyo_path2_acl_index {
        TOMOYO_TYPE_LINK,
        TOMOYO_TYPE_RENAME,
        TOMOYO_TYPE_PIVOT_ROOT,
        TOMOYO_MAX_PATH2_OPERATION
};

/* Index numbers for access controls with one pathname and one number. */
enum tomoyo_path_number_acl_index {
        TOMOYO_TYPE_CREATE,
        TOMOYO_TYPE_MKDIR,
        TOMOYO_TYPE_MKFIFO,
        TOMOYO_TYPE_MKSOCK,
        TOMOYO_TYPE_IOCTL,
        TOMOYO_TYPE_CHMOD,
        TOMOYO_TYPE_CHOWN,
        TOMOYO_TYPE_CHGRP,
        TOMOYO_MAX_PATH_NUMBER_OPERATION
};

/* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */
enum tomoyo_securityfs_interface_index {
        TOMOYO_DOMAINPOLICY,
        TOMOYO_EXCEPTIONPOLICY,
        TOMOYO_PROCESS_STATUS,
        TOMOYO_STAT,
        TOMOYO_AUDIT,
        TOMOYO_VERSION,
        TOMOYO_PROFILE,
        TOMOYO_QUERY,
        TOMOYO_MANAGER
};

/* Index numbers for special mount operations. */
enum tomoyo_special_mount {
        TOMOYO_MOUNT_BIND,            /* mount --bind /source /dest   */
        TOMOYO_MOUNT_MOVE,            /* mount --move /old /new       */
        TOMOYO_MOUNT_REMOUNT,         /* mount -o remount /dir        */
        TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */
        TOMOYO_MOUNT_MAKE_PRIVATE,    /* mount --make-private /dir    */
        TOMOYO_MOUNT_MAKE_SLAVE,      /* mount --make-slave /dir      */
        TOMOYO_MOUNT_MAKE_SHARED,     /* mount --make-shared /dir     */
        TOMOYO_MAX_SPECIAL_MOUNT
};

/* Index numbers for functionality. */
enum tomoyo_mac_index {
        TOMOYO_MAC_FILE_EXECUTE,
        TOMOYO_MAC_FILE_OPEN,
        TOMOYO_MAC_FILE_CREATE,
        TOMOYO_MAC_FILE_UNLINK,
        TOMOYO_MAC_FILE_GETATTR,
        TOMOYO_MAC_FILE_MKDIR,
        TOMOYO_MAC_FILE_RMDIR,
        TOMOYO_MAC_FILE_MKFIFO,
        TOMOYO_MAC_FILE_MKSOCK,
        TOMOYO_MAC_FILE_TRUNCATE,
        TOMOYO_MAC_FILE_SYMLINK,
        TOMOYO_MAC_FILE_MKBLOCK,
        TOMOYO_MAC_FILE_MKCHAR,
        TOMOYO_MAC_FILE_LINK,
        TOMOYO_MAC_FILE_RENAME,
        TOMOYO_MAC_FILE_CHMOD,
        TOMOYO_MAC_FILE_CHOWN,
        TOMOYO_MAC_FILE_CHGRP,
        TOMOYO_MAC_FILE_IOCTL,
        TOMOYO_MAC_FILE_CHROOT,
        TOMOYO_MAC_FILE_MOUNT,
        TOMOYO_MAC_FILE_UMOUNT,
        TOMOYO_MAC_FILE_PIVOT_ROOT,
        TOMOYO_MAC_NETWORK_INET_STREAM_BIND,
        TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN,
        TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT,
        TOMOYO_MAC_NETWORK_INET_DGRAM_BIND,
        TOMOYO_MAC_NETWORK_INET_DGRAM_SEND,
        TOMOYO_MAC_NETWORK_INET_RAW_BIND,
        TOMOYO_MAC_NETWORK_INET_RAW_SEND,
        TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND,
        TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN,
        TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT,
        TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND,
        TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND,
        TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND,
        TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN,
        TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT,
        TOMOYO_MAC_ENVIRON,
        TOMOYO_MAX_MAC_INDEX
};

/* Index numbers for category of functionality. */
enum tomoyo_mac_category_index {
        TOMOYO_MAC_CATEGORY_FILE,
        TOMOYO_MAC_CATEGORY_NETWORK,
        TOMOYO_MAC_CATEGORY_MISC,
        TOMOYO_MAX_MAC_CATEGORY_INDEX
};

/*
 * Retry this request. Returned by tomoyo_supervisor() if policy violation has
 * occurred in enforcing mode and the userspace daemon decided to retry.
 *
 * We must choose a positive value in order to distinguish "granted" (which is
 * 0) and "rejected" (which is a negative value) and "retry".
 */
#define TOMOYO_RETRY_REQUEST 1

/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
enum tomoyo_policy_stat_type {
        /* Do not change this order. */
        TOMOYO_STAT_POLICY_UPDATES,
        TOMOYO_STAT_POLICY_LEARNING,   /* == TOMOYO_CONFIG_LEARNING */
        TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */
        TOMOYO_STAT_POLICY_ENFORCING,  /* == TOMOYO_CONFIG_ENFORCING */
        TOMOYO_MAX_POLICY_STAT
};

/* Index numbers for profile's PREFERENCE values. */
enum tomoyo_pref_index {
        TOMOYO_PREF_MAX_AUDIT_LOG,
        TOMOYO_PREF_MAX_LEARNING_ENTRY,
        TOMOYO_MAX_PREF
};

/********** Structure definitions. **********/

/* Common header for holding ACL entries. */
struct tomoyo_acl_head {
        struct list_head list;
        s8 is_deleted; /* true or false or TOMOYO_GC_IN_PROGRESS */
} __packed;

/* Common header for shared entries. */
struct tomoyo_shared_acl_head {
        struct list_head list;
        atomic_t users;
} __packed;

struct tomoyo_policy_namespace;

/* Structure for request info. */
struct tomoyo_request_info {
        /*
         * For holding parameters specific to operations which deal files.
         * NULL if not dealing files.
         */
        struct tomoyo_obj_info *obj;
        /*
         * For holding parameters specific to execve() request.
         * NULL if not dealing execve().
         */
        struct tomoyo_execve *ee;
        struct tomoyo_domain_info *domain;
        /* For holding parameters. */
        union {
                struct {
                        const struct tomoyo_path_info *filename;
                        /* For using wildcards at tomoyo_find_next_domain(). */
                        const struct tomoyo_path_info *matched_path;
                        /* One of values in "enum tomoyo_path_acl_index". */
                        u8 operation;
                } path;
                struct {
                        const struct tomoyo_path_info *filename1;
                        const struct tomoyo_path_info *filename2;
                        /* One of values in "enum tomoyo_path2_acl_index". */
                        u8 operation;
                } path2;
                struct {
                        const struct tomoyo_path_info *filename;
                        unsigned int mode;
                        unsigned int major;
                        unsigned int minor;
                        /* One of values in "enum tomoyo_mkdev_acl_index". */
                        u8 operation;
                } mkdev;
                struct {
                        const struct tomoyo_path_info *filename;
                        unsigned long number;
                        /*
                         * One of values in
                         * "enum tomoyo_path_number_acl_index".
                         */
                        u8 operation;
                } path_number;
                struct {
                        const struct tomoyo_path_info *name;
                } environ;
                struct {
                        const __be32 *address;
                        u16 port;
                        /* One of values smaller than TOMOYO_SOCK_MAX. */
                        u8 protocol;
                        /* One of values in "enum tomoyo_network_acl_index". */
                        u8 operation;
                        bool is_ipv6;
                } inet_network;
                struct {
                        const struct tomoyo_path_info *address;
                        /* One of values smaller than TOMOYO_SOCK_MAX. */
                        u8 protocol;
                        /* One of values in "enum tomoyo_network_acl_index". */
                        u8 operation;
                } unix_network;
                struct {
                        const struct tomoyo_path_info *type;
                        const struct tomoyo_path_info *dir;
                        const struct tomoyo_path_info *dev;
                        unsigned long flags;
                        int need_dev;
                } mount;
                struct {
                        const struct tomoyo_path_info *domainname;
                } task;
        } param;
        struct tomoyo_acl_info *matched_acl;
        u8 param_type;
        bool granted;
        u8 retry;
        u8 profile;
        u8 mode; /* One of tomoyo_mode_index . */
        u8 type;
};

/* Structure for holding a token. */
struct tomoyo_path_info {
        const char *name;
        u32 hash;          /* = full_name_hash(name, strlen(name)) */
        u16 const_len;     /* = tomoyo_const_part_length(name)     */
        bool is_dir;       /* = tomoyo_strendswith(name, "/")      */
        bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
};

/* Structure for holding string data. */
struct tomoyo_name {
        struct tomoyo_shared_acl_head head;
        struct tomoyo_path_info entry;
};

/* Structure for holding a word. */
struct tomoyo_name_union {
        /* Either @filename or @group is NULL. */
        const struct tomoyo_path_info *filename;
        struct tomoyo_group *group;
};

/* Structure for holding a number. */
struct tomoyo_number_union {
        unsigned long values[2];
        struct tomoyo_group *group; /* Maybe NULL. */
        /* One of values in "enum tomoyo_value_type". */
        u8 value_type[2];
};

/* Structure for holding an IP address. */
struct tomoyo_ipaddr_union {
        struct in6_addr ip[2]; /* Big endian. */
        struct tomoyo_group *group; /* Pointer to address group. */
        bool is_ipv6; /* Valid only if @group == NULL. */
};

/* Structure for "path_group"/"number_group"/"address_group" directive. */
struct tomoyo_group {
        struct tomoyo_shared_acl_head head;
        const struct tomoyo_path_info *group_name;
        struct list_head member_list;
};

/* Structure for "path_group" directive. */
struct tomoyo_path_group {
        struct tomoyo_acl_head head;
        const struct tomoyo_path_info *member_name;
};

/* Structure for "number_group" directive. */
struct tomoyo_number_group {
        struct tomoyo_acl_head head;
        struct tomoyo_number_union number;
};

/* Structure for "address_group" directive. */
struct tomoyo_address_group {
        struct tomoyo_acl_head head;
        /* Structure for holding an IP address. */
        struct tomoyo_ipaddr_union address;
};

/* Subset of "struct stat". Used by conditional ACL and audit logs. */
struct tomoyo_mini_stat {
        kuid_t uid;
        kgid_t gid;
        ino_t ino;
        umode_t mode;
        dev_t dev;
        dev_t rdev;
};

/* Structure for dumping argv[] and envp[] of "struct linux_binprm". */
struct tomoyo_page_dump {
        struct page *page;    /* Previously dumped page. */
        char *data;           /* Contents of "page". Size is PAGE_SIZE. */
};

/* Structure for attribute checks in addition to pathname checks. */
struct tomoyo_obj_info {
        /*
         * True if tomoyo_get_attributes() was already called, false otherwise.
         */
        bool validate_done;
        /* True if @stat[] is valid. */
        bool stat_valid[TOMOYO_MAX_PATH_STAT];
        /* First pathname. Initialized with { NULL, NULL } if no path. */
        struct path path1;
        /* Second pathname. Initialized with { NULL, NULL } if no path. */
        struct path path2;
        /*
         * Information on @path1, @path1's parent directory, @path2, @path2's
         * parent directory.
         */
        struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT];
        /*
         * Content of symbolic link to be created. NULL for operations other
         * than symlink().
         */
        struct tomoyo_path_info *symlink_target;
};

/* Structure for argv[]. */
struct tomoyo_argv {
        unsigned long index;
        const struct tomoyo_path_info *value;
        bool is_not;
};

/* Structure for envp[]. */
struct tomoyo_envp {
        const struct tomoyo_path_info *name;
        const struct tomoyo_path_info *value;
        bool is_not;
};

/* Structure for execve() operation. */
struct tomoyo_execve {
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj;
        struct linux_binprm *bprm;
        const struct tomoyo_path_info *transition;
        /* For dumping argv[] and envp[]. */
        struct tomoyo_page_dump dump;
        /* For temporary use. */
        char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */
};

/* Structure for entries which follows "struct tomoyo_condition". */
struct tomoyo_condition_element {
        /*
         * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a
         * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail
         * of the array of this struct.
         */
        u8 left;
        /*
         * Right hand operand. A "struct tomoyo_number_union" for
         * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for
         * TOMOYO_NAME_UNION is attached to the tail of the array of this
         * struct.
         */
        u8 right;
        /* Equation operator. True if equals or overlaps, false otherwise. */
        bool equals;
};

/* Structure for optional arguments. */
struct tomoyo_condition {
        struct tomoyo_shared_acl_head head;
        u32 size; /* Memory size allocated for this entry. */
        u16 condc; /* Number of conditions in this struct. */
        u16 numbers_count; /* Number of "struct tomoyo_number_union values". */
        u16 names_count; /* Number of "struct tomoyo_name_union names". */
        u16 argc; /* Number of "struct tomoyo_argv". */
        u16 envc; /* Number of "struct tomoyo_envp". */
        u8 grant_log; /* One of values in "enum tomoyo_grant_log". */
        const struct tomoyo_path_info *transit; /* Maybe NULL. */
        /*
         * struct tomoyo_condition_element condition[condc];
         * struct tomoyo_number_union values[numbers_count];
         * struct tomoyo_name_union names[names_count];
         * struct tomoyo_argv argv[argc];
         * struct tomoyo_envp envp[envc];
         */
};

/* Common header for individual entries. */
struct tomoyo_acl_info {
        struct list_head list;
        struct tomoyo_condition *cond; /* Maybe NULL. */
        s8 is_deleted; /* true or false or TOMOYO_GC_IN_PROGRESS */
        u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */
} __packed;

/* Structure for domain information. */
struct tomoyo_domain_info {
        struct list_head list;
        struct list_head acl_info_list;
        /* Name of this domain. Never NULL.          */
        const struct tomoyo_path_info *domainname;
        /* Namespace for this domain. Never NULL. */
        struct tomoyo_policy_namespace *ns;
        /* Group numbers to use.   */
        unsigned long group[TOMOYO_MAX_ACL_GROUPS / BITS_PER_LONG];
        u8 profile;        /* Profile number to use. */
        bool is_deleted;   /* Delete flag.           */
        bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
        atomic_t users; /* Number of referring tasks. */
};

/*
 * Structure for "task manual_domain_transition" directive.
 */
struct tomoyo_task_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MANUAL_TASK_ACL */
        /* Pointer to domainname. */
        const struct tomoyo_path_info *domainname;
};

/*
 * Structure for "file execute", "file read", "file write", "file append",
 * "file unlink", "file getattr", "file rmdir", "file truncate",
 * "file symlink", "file chroot" and "file unmount" directive.
 */
struct tomoyo_path_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */
        u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */
        struct tomoyo_name_union name;
};

/*
 * Structure for "file create", "file mkdir", "file mkfifo", "file mksock",
 * "file ioctl", "file chmod", "file chown" and "file chgrp" directive.
 */
struct tomoyo_path_number_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */
        /* Bitmask of values in "enum tomoyo_path_number_acl_index". */
        u8 perm;
        struct tomoyo_name_union name;
        struct tomoyo_number_union number;
};

/* Structure for "file mkblock" and "file mkchar" directive. */
struct tomoyo_mkdev_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */
        u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */
        struct tomoyo_name_union name;
        struct tomoyo_number_union mode;
        struct tomoyo_number_union major;
        struct tomoyo_number_union minor;
};

/*
 * Structure for "file rename", "file link" and "file pivot_root" directive.
 */
struct tomoyo_path2_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */
        u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */
        struct tomoyo_name_union name1;
        struct tomoyo_name_union name2;
};

/* Structure for "file mount" directive. */
struct tomoyo_mount_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */
        struct tomoyo_name_union dev_name;
        struct tomoyo_name_union dir_name;
        struct tomoyo_name_union fs_type;
        struct tomoyo_number_union flags;
};

/* Structure for "misc env" directive in domain policy. */
struct tomoyo_env_acl {
        struct tomoyo_acl_info head;        /* type = TOMOYO_TYPE_ENV_ACL  */
        const struct tomoyo_path_info *env; /* environment variable */
};

/* Structure for "network inet" directive. */
struct tomoyo_inet_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_INET_ACL */
        u8 protocol;
        u8 perm; /* Bitmask of values in "enum tomoyo_network_acl_index" */
        struct tomoyo_ipaddr_union address;
        struct tomoyo_number_union port;
};

/* Structure for "network unix" directive. */
struct tomoyo_unix_acl {
        struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_UNIX_ACL */
        u8 protocol;
        u8 perm; /* Bitmask of values in "enum tomoyo_network_acl_index" */
        struct tomoyo_name_union name;
};

/* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */
struct tomoyo_acl_param {
        char *data;
        struct list_head *list;
        struct tomoyo_policy_namespace *ns;
        bool is_delete;
};

#define TOMOYO_MAX_IO_READ_QUEUE 64

/*
 * Structure for reading/writing policy via /sys/kernel/security/tomoyo
 * interfaces.
 */
struct tomoyo_io_buffer {
        void (*read)(struct tomoyo_io_buffer *head);
        int (*write)(struct tomoyo_io_buffer *head);
        __poll_t (*poll)(struct file *file, poll_table *wait);
        /* Exclusive lock for this structure.   */
        struct mutex io_sem;
        char __user *read_user_buf;
        size_t read_user_buf_avail;
        struct {
                struct list_head *ns;
                struct list_head *domain;
                struct list_head *group;
                struct list_head *acl;
                size_t avail;
                unsigned int step;
                unsigned int query_index;
                u16 index;
                u16 cond_index;
                u8 acl_group_index;
                u8 cond_step;
                u8 bit;
                u8 w_pos;
                bool eof;
                bool print_this_domain_only;
                bool print_transition_related_only;
                bool print_cond_part;
                const char *w[TOMOYO_MAX_IO_READ_QUEUE];
        } r;
        struct {
                struct tomoyo_policy_namespace *ns;
                /* The position currently writing to.   */
                struct tomoyo_domain_info *domain;
                /* Bytes available for writing.         */
                size_t avail;
                bool is_delete;
        } w;
        /* Buffer for reading.                  */
        char *read_buf;
        /* Size of read buffer.                 */
        size_t readbuf_size;
        /* Buffer for writing.                  */
        char *write_buf;
        /* Size of write buffer.                */
        size_t writebuf_size;
        /* Type of this interface.              */
        enum tomoyo_securityfs_interface_index type;
        /* Users counter protected by tomoyo_io_buffer_list_lock. */
        u8 users;
        /* List for telling GC not to kfree() elements. */
        struct list_head list;
};

/*
 * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/
 * "no_keep_domain" keyword.
 */
struct tomoyo_transition_control {
        struct tomoyo_acl_head head;
        u8 type; /* One of values in "enum tomoyo_transition_type".  */
        /* True if the domainname is tomoyo_get_last_name(). */
        bool is_last_name;
        const struct tomoyo_path_info *domainname; /* Maybe NULL */
        const struct tomoyo_path_info *program;    /* Maybe NULL */
};

/* Structure for "aggregator" keyword. */
struct tomoyo_aggregator {
        struct tomoyo_acl_head head;
        const struct tomoyo_path_info *original_name;
        const struct tomoyo_path_info *aggregated_name;
};

/* Structure for policy manager. */
struct tomoyo_manager {
        struct tomoyo_acl_head head;
        /* A path to program or a domainname. */
        const struct tomoyo_path_info *manager;
};

struct tomoyo_preference {
        unsigned int learning_max_entry;
        bool enforcing_verbose;
        bool learning_verbose;
        bool permissive_verbose;
};

/* Structure for /sys/kernel/security/tomnoyo/profile interface. */
struct tomoyo_profile {
        const struct tomoyo_path_info *comment;
        struct tomoyo_preference *learning;
        struct tomoyo_preference *permissive;
        struct tomoyo_preference *enforcing;
        struct tomoyo_preference preference;
        u8 default_config;
        u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX];
        unsigned int pref[TOMOYO_MAX_PREF];
};

/* Structure for representing YYYY/MM/DD hh/mm/ss. */
struct tomoyo_time {
        u16 year;
        u8 month;
        u8 day;
        u8 hour;
        u8 min;
        u8 sec;
};

/* Structure for policy namespace. */
struct tomoyo_policy_namespace {
        /* Profile table. Memory is allocated as needed. */
        struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES];
        /* List of "struct tomoyo_group". */
        struct list_head group_list[TOMOYO_MAX_GROUP];
        /* List of policy. */
        struct list_head policy_list[TOMOYO_MAX_POLICY];
        /* The global ACL referred by "use_group" keyword. */
        struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS];
        /* List for connecting to tomoyo_namespace_list list. */
        struct list_head namespace_list;
        /* Profile version. Currently only 20150505 is defined. */
        unsigned int profile_version;
        /* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */
        const char *name;
};

/* Structure for "struct task_struct"->security. */
struct tomoyo_task {
        struct tomoyo_domain_info *domain_info;
        struct tomoyo_domain_info *old_domain_info;
};

/********** Function prototypes. **********/

bool tomoyo_address_matches_group(const bool is_ipv6, const __be32 *address,
                                  const struct tomoyo_group *group);
bool tomoyo_compare_number_union(const unsigned long value,
                                 const struct tomoyo_number_union *ptr);
bool tomoyo_condition(struct tomoyo_request_info *r,
                      const struct tomoyo_condition *cond);
bool tomoyo_correct_domain(const unsigned char *domainname);
bool tomoyo_correct_path(const char *filename);
bool tomoyo_correct_word(const char *string);
bool tomoyo_domain_def(const unsigned char *buffer);
bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
                      struct tomoyo_page_dump *dump);
bool tomoyo_memory_ok(void *ptr);
bool tomoyo_number_matches_group(const unsigned long min,
                                 const unsigned long max,
                                 const struct tomoyo_group *group);
bool tomoyo_parse_ipaddr_union(struct tomoyo_acl_param *param,
                               struct tomoyo_ipaddr_union *ptr);
bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
                             struct tomoyo_name_union *ptr);
bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
                               struct tomoyo_number_union *ptr);
bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename,
                                 const struct tomoyo_path_info *pattern);
bool tomoyo_permstr(const char *string, const char *keyword);
bool tomoyo_str_starts(char **src, const char *find);
char *tomoyo_encode(const char *str);
char *tomoyo_encode2(const char *str, int str_len);
char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
                      va_list args) __printf(3, 0);
char *tomoyo_read_token(struct tomoyo_acl_param *param);
char *tomoyo_realpath_from_path(const struct path *path);
char *tomoyo_realpath_nofollow(const char *pathname);
const char *tomoyo_get_exe(void);
const struct tomoyo_path_info *tomoyo_compare_name_union
(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr);
const struct tomoyo_path_info *tomoyo_get_domainname
(struct tomoyo_acl_param *param);
const struct tomoyo_path_info *tomoyo_get_name(const char *name);
const struct tomoyo_path_info *tomoyo_path_matches_group
(const struct tomoyo_path_info *pathname, const struct tomoyo_group *group);
int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
                                 const struct path *path, const int flag);
void tomoyo_close_control(struct tomoyo_io_buffer *head);
int tomoyo_env_perm(struct tomoyo_request_info *r, const char *env);
int tomoyo_execute_permission(struct tomoyo_request_info *r,
                              const struct tomoyo_path_info *filename);
int tomoyo_find_next_domain(struct linux_binprm *bprm);
int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
                    const u8 index);
int tomoyo_init_request_info(struct tomoyo_request_info *r,
                             struct tomoyo_domain_info *domain,
                             const u8 index);
int tomoyo_mkdev_perm(const u8 operation, const struct path *path,
                      const unsigned int mode, unsigned int dev);
int tomoyo_mount_permission(const char *dev_name, const struct path *path,
                            const char *type, unsigned long flags,
                            void *data_page);
int tomoyo_open_control(const u8 type, struct file *file);
int tomoyo_path2_perm(const u8 operation, const struct path *path1,
                      const struct path *path2);
int tomoyo_path_number_perm(const u8 operation, const struct path *path,
                            unsigned long number);
int tomoyo_path_perm(const u8 operation, const struct path *path,
                     const char *target);
__poll_t tomoyo_poll_control(struct file *file, poll_table *wait);
__poll_t tomoyo_poll_log(struct file *file, poll_table *wait);
int tomoyo_socket_bind_permission(struct socket *sock, struct sockaddr *addr,
                                  int addr_len);
int tomoyo_socket_connect_permission(struct socket *sock,
                                     struct sockaddr *addr, int addr_len);
int tomoyo_socket_listen_permission(struct socket *sock);
int tomoyo_socket_sendmsg_permission(struct socket *sock, struct msghdr *msg,
                                     int size);
int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
        __printf(2, 3);
int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
                         struct tomoyo_acl_param *param,
                         bool (*check_duplicate)
                         (const struct tomoyo_acl_info *,
                          const struct tomoyo_acl_info *),
                         bool (*merge_duplicate)
                         (struct tomoyo_acl_info *, struct tomoyo_acl_info *,
                          const bool));
int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
                         struct tomoyo_acl_param *param,
                         bool (*check_duplicate)
                         (const struct tomoyo_acl_head *,
                          const struct tomoyo_acl_head *));
int tomoyo_write_aggregator(struct tomoyo_acl_param *param);
int tomoyo_write_file(struct tomoyo_acl_param *param);
int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type);
int tomoyo_write_misc(struct tomoyo_acl_param *param);
int tomoyo_write_inet_network(struct tomoyo_acl_param *param);
int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
                                    const u8 type);
int tomoyo_write_unix_network(struct tomoyo_acl_param *param);
ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
                            const int buffer_len);
ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
                             const char __user *buffer, const int buffer_len);
struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param);
struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
                                                const bool transit);
struct tomoyo_domain_info *tomoyo_domain(void);
struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
                                      const u8 idx);
struct tomoyo_policy_namespace *tomoyo_assign_namespace
(const char *domainname);
struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
                                      const u8 profile);
u8 tomoyo_parse_ulong(unsigned long *result, char **str);
void *tomoyo_commit_ok(void *data, const unsigned int size);
void __init tomoyo_load_builtin_policy(void);
void __init tomoyo_mm_init(void);
void tomoyo_check_acl(struct tomoyo_request_info *r,
                      bool (*check_entry)(struct tomoyo_request_info *,
                                          const struct tomoyo_acl_info *));
void tomoyo_check_profile(void);
void tomoyo_convert_time(time64_t time, struct tomoyo_time *stamp);
void tomoyo_del_condition(struct list_head *element);
void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
void tomoyo_get_attributes(struct tomoyo_obj_info *obj);
void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns);
void tomoyo_load_policy(const char *filename);
void tomoyo_normalize_line(unsigned char *buffer);
void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register);
void tomoyo_print_ip(char *buf, const unsigned int size,
                     const struct tomoyo_ipaddr_union *ptr);
void tomoyo_print_ulong(char *buffer, const int buffer_len,
                        const unsigned long value, const u8 type);
void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
void tomoyo_read_log(struct tomoyo_io_buffer *head);
void tomoyo_update_stat(const u8 index);
void tomoyo_warn_oom(const char *function);
void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
        __printf(2, 3);
void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
                       va_list args) __printf(3, 0);

/********** External variable definitions. **********/

extern bool tomoyo_policy_loaded;
extern int tomoyo_enabled;
extern const char * const tomoyo_condition_keyword
[TOMOYO_MAX_CONDITION_KEYWORD];
extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
                                              + TOMOYO_MAX_MAC_CATEGORY_INDEX];
extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE];
extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
extern const char * const tomoyo_proto_keyword[TOMOYO_SOCK_MAX];
extern const char * const tomoyo_socket_keyword[TOMOYO_MAX_NETWORK_OPERATION];
extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX];
extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION];
extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION];
extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION];
extern struct list_head tomoyo_condition_list;
extern struct list_head tomoyo_domain_list;
extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
extern struct list_head tomoyo_namespace_list;
extern struct mutex tomoyo_policy_lock;
extern struct srcu_struct tomoyo_ss;
extern struct tomoyo_domain_info tomoyo_kernel_domain;
extern struct tomoyo_policy_namespace tomoyo_kernel_namespace;
extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
extern struct lsm_blob_sizes tomoyo_blob_sizes;

/********** Inlined functions. **********/

/**
 * tomoyo_read_lock - Take lock for protecting policy.
 *
 * Returns index number for tomoyo_read_unlock().
 */
static inline int tomoyo_read_lock(void)
{
        return srcu_read_lock(&tomoyo_ss);
}

/**
 * tomoyo_read_unlock - Release lock for protecting policy.
 *
 * @idx: Index number returned by tomoyo_read_lock().
 *
 * Returns nothing.
 */
static inline void tomoyo_read_unlock(int idx)
{
        srcu_read_unlock(&tomoyo_ss, idx);
}

/**
 * tomoyo_sys_getppid - Copy of getppid().
 *
 * Returns parent process's PID.
 *
 * Alpha does not have getppid() defined. To be able to build this module on
 * Alpha, I have to copy getppid() from kernel/timer.c.
 */
static inline pid_t tomoyo_sys_getppid(void)
{
        pid_t pid;

        rcu_read_lock();
        pid = task_tgid_vnr(rcu_dereference(current->real_parent));
        rcu_read_unlock();
        return pid;
}

/**
 * tomoyo_sys_getpid - Copy of getpid().
 *
 * Returns current thread's PID.
 *
 * Alpha does not have getpid() defined. To be able to build this module on
 * Alpha, I have to copy getpid() from kernel/timer.c.
 */
static inline pid_t tomoyo_sys_getpid(void)
{
        return task_tgid_vnr(current);
}

/**
 * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure.
 *
 * @a: Pointer to "struct tomoyo_path_info".
 * @b: Pointer to "struct tomoyo_path_info".
 *
 * Returns true if @a == @b, false otherwise.
 */
static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a,
                                  const struct tomoyo_path_info *b)
{
        return a->hash != b->hash || strcmp(a->name, b->name);
}

/**
 * tomoyo_put_name - Drop reference on "struct tomoyo_name".
 *
 * @name: Pointer to "struct tomoyo_path_info". Maybe NULL.
 *
 * Returns nothing.
 */
static inline void tomoyo_put_name(const struct tomoyo_path_info *name)
{
        if (name) {
                struct tomoyo_name *ptr =
                        container_of(name, typeof(*ptr), entry);
                atomic_dec(&ptr->head.users);
        }
}

/**
 * tomoyo_put_condition - Drop reference on "struct tomoyo_condition".
 *
 * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
 *
 * Returns nothing.
 */
static inline void tomoyo_put_condition(struct tomoyo_condition *cond)
{
        if (cond)
                atomic_dec(&cond->head.users);
}

/**
 * tomoyo_put_group - Drop reference on "struct tomoyo_group".
 *
 * @group: Pointer to "struct tomoyo_group". Maybe NULL.
 *
 * Returns nothing.
 */
static inline void tomoyo_put_group(struct tomoyo_group *group)
{
        if (group)
                atomic_dec(&group->head.users);
}

/**
 * tomoyo_task - Get "struct tomoyo_task" for specified thread.
 *
 * @task - Pointer to "struct task_struct".
 *
 * Returns pointer to "struct tomoyo_task" for specified thread.
 */
static inline struct tomoyo_task *tomoyo_task(struct task_struct *task)
{
        return task->security + tomoyo_blob_sizes.lbs_task;
}

/**
 * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry.
 *
 * @a: Pointer to "struct tomoyo_name_union".
 * @b: Pointer to "struct tomoyo_name_union".
 *
 * Returns true if @a == @b, false otherwise.
 */
static inline bool tomoyo_same_name_union
(const struct tomoyo_name_union *a, const struct tomoyo_name_union *b)
{
        return a->filename == b->filename && a->group == b->group;
}

/**
 * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry.
 *
 * @a: Pointer to "struct tomoyo_number_union".
 * @b: Pointer to "struct tomoyo_number_union".
 *
 * Returns true if @a == @b, false otherwise.
 */
static inline bool tomoyo_same_number_union
(const struct tomoyo_number_union *a, const struct tomoyo_number_union *b)
{
        return a->values[0] == b->values[0] && a->values[1] == b->values[1] &&
                a->group == b->group && a->value_type[0] == b->value_type[0] &&
                a->value_type[1] == b->value_type[1];
}

/**
 * tomoyo_same_ipaddr_union - Check for duplicated "struct tomoyo_ipaddr_union" entry.
 *
 * @a: Pointer to "struct tomoyo_ipaddr_union".
 * @b: Pointer to "struct tomoyo_ipaddr_union".
 *
 * Returns true if @a == @b, false otherwise.
 */
static inline bool tomoyo_same_ipaddr_union
(const struct tomoyo_ipaddr_union *a, const struct tomoyo_ipaddr_union *b)
{
        return !memcmp(a->ip, b->ip, sizeof(a->ip)) && a->group == b->group &&
                a->is_ipv6 == b->is_ipv6;
}

/**
 * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread.
 *
 * Returns pointer to "struct tomoyo_policy_namespace" for current thread.
 */
static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void)
{
        return tomoyo_domain()->ns;
}

/**
 * list_for_each_cookie - iterate over a list with cookie.
 * @pos:        the &struct list_head to use as a loop cursor.
 * @head:       the head for your list.
 */
#define list_for_each_cookie(pos, head)                                        \
        if (!pos)                                                        \
                pos =  srcu_dereference((head)->next, &tomoyo_ss);        \
        for ( ; pos != (head); pos = srcu_dereference(pos->next, &tomoyo_ss))

#endif /* !defined(_SECURITY_TOMOYO_COMMON_H) */


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 















































    1 











































































































































































































































  345 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
// SPDX-License-Identifier: GPL-2.0

// Generated by scripts/atomic/gen-atomic-long.sh
// DO NOT MODIFY THIS FILE DIRECTLY

#ifndef _LINUX_ATOMIC_LONG_H
#define _LINUX_ATOMIC_LONG_H

#include <linux/compiler.h>
#include <asm/types.h>

#ifdef CONFIG_64BIT
typedef atomic64_t atomic_long_t;
#define ATOMIC_LONG_INIT(i)                ATOMIC64_INIT(i)
#define atomic_long_cond_read_acquire        atomic64_cond_read_acquire
#define atomic_long_cond_read_relaxed        atomic64_cond_read_relaxed
#else
typedef atomic_t atomic_long_t;
#define ATOMIC_LONG_INIT(i)                ATOMIC_INIT(i)
#define atomic_long_cond_read_acquire        atomic_cond_read_acquire
#define atomic_long_cond_read_relaxed        atomic_cond_read_relaxed
#endif

/**
 * raw_atomic_long_read() - atomic load with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_read() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline long
raw_atomic_long_read(const atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_read(v);
#else
        return raw_atomic_read(v);
#endif
}

/**
 * raw_atomic_long_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_read_acquire() elsewhere.
 *
 * Return: The value loaded from @v.
 */
static __always_inline long
raw_atomic_long_read_acquire(const atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_read_acquire(v);
#else
        return raw_atomic_read_acquire(v);
#endif
}

/**
 * raw_atomic_long_set() - atomic set with relaxed ordering
 * @v: pointer to atomic_long_t
 * @i: long value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_set() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_set(atomic_long_t *v, long i)
{
#ifdef CONFIG_64BIT
        raw_atomic64_set(v, i);
#else
        raw_atomic_set(v, i);
#endif
}

/**
 * raw_atomic_long_set_release() - atomic set with release ordering
 * @v: pointer to atomic_long_t
 * @i: long value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_set_release() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_set_release(atomic_long_t *v, long i)
{
#ifdef CONFIG_64BIT
        raw_atomic64_set_release(v, i);
#else
        raw_atomic_set_release(v, i);
#endif
}

/**
 * raw_atomic_long_add() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_add(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_add(i, v);
#else
        raw_atomic_add(i, v);
#endif
}

/**
 * raw_atomic_long_add_return() - atomic add with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_add_return(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_return(i, v);
#else
        return raw_atomic_add_return(i, v);
#endif
}

/**
 * raw_atomic_long_add_return_acquire() - atomic add with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_return_acquire(i, v);
#else
        return raw_atomic_add_return_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_add_return_release() - atomic add with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_add_return_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_return_release(i, v);
#else
        return raw_atomic_add_return_release(i, v);
#endif
}

/**
 * raw_atomic_long_add_return_relaxed() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_return_relaxed(i, v);
#else
        return raw_atomic_add_return_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_add() - atomic add with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_add() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_add(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_add(i, v);
#else
        return raw_atomic_fetch_add(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_add_acquire() - atomic add with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_add_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_add_acquire(i, v);
#else
        return raw_atomic_fetch_add_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_add_release() - atomic add with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_add_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_add_release(i, v);
#else
        return raw_atomic_fetch_add_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_add_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_add_relaxed(i, v);
#else
        return raw_atomic_fetch_add_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_sub() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_sub(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_sub(i, v);
#else
        raw_atomic_sub(i, v);
#endif
}

/**
 * raw_atomic_long_sub_return() - atomic subtract with full ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_sub_return(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_sub_return(i, v);
#else
        return raw_atomic_sub_return(i, v);
#endif
}

/**
 * raw_atomic_long_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_sub_return_acquire(i, v);
#else
        return raw_atomic_sub_return_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_sub_return_release() - atomic subtract with release ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_sub_return_release(i, v);
#else
        return raw_atomic_sub_return_release(i, v);
#endif
}

/**
 * raw_atomic_long_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_sub_return_relaxed(i, v);
#else
        return raw_atomic_sub_return_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_sub() - atomic subtract with full ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_sub() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_sub(i, v);
#else
        return raw_atomic_fetch_sub(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_sub_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_sub_acquire(i, v);
#else
        return raw_atomic_fetch_sub_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_sub_release() - atomic subtract with release ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_sub_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_sub_release(i, v);
#else
        return raw_atomic_fetch_sub_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_sub_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_sub_relaxed(i, v);
#else
        return raw_atomic_fetch_sub_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_inc(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_inc(v);
#else
        raw_atomic_inc(v);
#endif
}

/**
 * raw_atomic_long_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_inc_return(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_return(v);
#else
        return raw_atomic_inc_return(v);
#endif
}

/**
 * raw_atomic_long_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_inc_return_acquire(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_return_acquire(v);
#else
        return raw_atomic_inc_return_acquire(v);
#endif
}

/**
 * raw_atomic_long_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_inc_return_release(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_return_release(v);
#else
        return raw_atomic_inc_return_release(v);
#endif
}

/**
 * raw_atomic_long_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_return_relaxed(v);
#else
        return raw_atomic_inc_return_relaxed(v);
#endif
}

/**
 * raw_atomic_long_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_inc() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_inc(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_inc(v);
#else
        return raw_atomic_fetch_inc(v);
#endif
}

/**
 * raw_atomic_long_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_inc_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_inc_acquire(v);
#else
        return raw_atomic_fetch_inc_acquire(v);
#endif
}

/**
 * raw_atomic_long_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_inc_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_inc_release(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_inc_release(v);
#else
        return raw_atomic_fetch_inc_release(v);
#endif
}

/**
 * raw_atomic_long_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_inc_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_inc_relaxed(v);
#else
        return raw_atomic_fetch_inc_relaxed(v);
#endif
}

/**
 * raw_atomic_long_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_dec(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_dec(v);
#else
        raw_atomic_dec(v);
#endif
}

/**
 * raw_atomic_long_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_return() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_dec_return(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_return(v);
#else
        return raw_atomic_dec_return(v);
#endif
}

/**
 * raw_atomic_long_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_return_acquire() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_dec_return_acquire(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_return_acquire(v);
#else
        return raw_atomic_dec_return_acquire(v);
#endif
}

/**
 * raw_atomic_long_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_return_release() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_dec_return_release(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_return_release(v);
#else
        return raw_atomic_dec_return_release(v);
#endif
}

/**
 * raw_atomic_long_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_return_relaxed() elsewhere.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_return_relaxed(v);
#else
        return raw_atomic_dec_return_relaxed(v);
#endif
}

/**
 * raw_atomic_long_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_dec() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_dec(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_dec(v);
#else
        return raw_atomic_fetch_dec(v);
#endif
}

/**
 * raw_atomic_long_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_dec_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_dec_acquire(v);
#else
        return raw_atomic_fetch_dec_acquire(v);
#endif
}

/**
 * raw_atomic_long_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_dec_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_dec_release(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_dec_release(v);
#else
        return raw_atomic_fetch_dec_release(v);
#endif
}

/**
 * raw_atomic_long_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_dec_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_dec_relaxed(v);
#else
        return raw_atomic_fetch_dec_relaxed(v);
#endif
}

/**
 * raw_atomic_long_and() - atomic bitwise AND with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_and() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_and(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_and(i, v);
#else
        raw_atomic_and(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_and() - atomic bitwise AND with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_and() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_and(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_and(i, v);
#else
        return raw_atomic_fetch_and(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_and_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_and_acquire(i, v);
#else
        return raw_atomic_fetch_and_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_and_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_and_release(i, v);
#else
        return raw_atomic_fetch_and_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_and_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_and_relaxed(i, v);
#else
        return raw_atomic_fetch_and_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_andnot() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_andnot(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_andnot(i, v);
#else
        raw_atomic_andnot(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_andnot() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_andnot(i, v);
#else
        return raw_atomic_fetch_andnot(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_andnot_acquire(i, v);
#else
        return raw_atomic_fetch_andnot_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_andnot_release(i, v);
#else
        return raw_atomic_fetch_andnot_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_andnot_relaxed(i, v);
#else
        return raw_atomic_fetch_andnot_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_or() - atomic bitwise OR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_or() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_or(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_or(i, v);
#else
        raw_atomic_or(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_or() - atomic bitwise OR with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_or() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_or(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_or(i, v);
#else
        return raw_atomic_fetch_or(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_or_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_or_acquire(i, v);
#else
        return raw_atomic_fetch_or_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_or_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_or_release(i, v);
#else
        return raw_atomic_fetch_or_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_or_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_or_relaxed(i, v);
#else
        return raw_atomic_fetch_or_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_xor() - atomic bitwise XOR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_xor() elsewhere.
 *
 * Return: Nothing.
 */
static __always_inline void
raw_atomic_long_xor(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        raw_atomic64_xor(i, v);
#else
        raw_atomic_xor(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_xor() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_xor(i, v);
#else
        return raw_atomic_fetch_xor(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_xor_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_xor_acquire(i, v);
#else
        return raw_atomic_fetch_xor_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_xor_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_xor_release(i, v);
#else
        return raw_atomic_fetch_xor_release(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_xor_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_xor_relaxed(i, v);
#else
        return raw_atomic_fetch_xor_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_xchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_xchg(atomic_long_t *v, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_xchg(v, new);
#else
        return raw_atomic_xchg(v, new);
#endif
}

/**
 * raw_atomic_long_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_xchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_xchg_acquire(atomic_long_t *v, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_xchg_acquire(v, new);
#else
        return raw_atomic_xchg_acquire(v, new);
#endif
}

/**
 * raw_atomic_long_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_xchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_xchg_release(atomic_long_t *v, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_xchg_release(v, new);
#else
        return raw_atomic_xchg_release(v, new);
#endif
}

/**
 * raw_atomic_long_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_xchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_xchg_relaxed(v, new);
#else
        return raw_atomic_xchg_relaxed(v, new);
#endif
}

/**
 * raw_atomic_long_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_cmpxchg(v, old, new);
#else
        return raw_atomic_cmpxchg(v, old, new);
#endif
}

/**
 * raw_atomic_long_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_cmpxchg_acquire(v, old, new);
#else
        return raw_atomic_cmpxchg_acquire(v, old, new);
#endif
}

/**
 * raw_atomic_long_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_cmpxchg_release(v, old, new);
#else
        return raw_atomic_cmpxchg_release(v, old, new);
#endif
}

/**
 * raw_atomic_long_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_cmpxchg_relaxed(v, old, new);
#else
        return raw_atomic_cmpxchg_relaxed(v, old, new);
#endif
}

/**
 * raw_atomic_long_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
#else
        return raw_atomic_try_cmpxchg(v, (int *)old, new);
#endif
}

/**
 * raw_atomic_long_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
#else
        return raw_atomic_try_cmpxchg_acquire(v, (int *)old, new);
#endif
}

/**
 * raw_atomic_long_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
#else
        return raw_atomic_try_cmpxchg_release(v, (int *)old, new);
#endif
}

/**
 * raw_atomic_long_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
#else
        return raw_atomic_try_cmpxchg_relaxed(v, (int *)old, new);
#endif
}

/**
 * raw_atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_sub_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_sub_and_test(i, v);
#else
        return raw_atomic_sub_and_test(i, v);
#endif
}

/**
 * raw_atomic_long_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_dec_and_test(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_and_test(v);
#else
        return raw_atomic_dec_and_test(v);
#endif
}

/**
 * raw_atomic_long_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_and_test() elsewhere.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_inc_and_test(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_and_test(v);
#else
        return raw_atomic_inc_and_test(v);
#endif
}

/**
 * raw_atomic_long_add_negative() - atomic add and test if negative with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_negative() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_add_negative(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_negative(i, v);
#else
        return raw_atomic_add_negative(i, v);
#endif
}

/**
 * raw_atomic_long_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_negative_acquire() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_negative_acquire(i, v);
#else
        return raw_atomic_add_negative_acquire(i, v);
#endif
}

/**
 * raw_atomic_long_add_negative_release() - atomic add and test if negative with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_negative_release() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_negative_release(i, v);
#else
        return raw_atomic_add_negative_release(i, v);
#endif
}

/**
 * raw_atomic_long_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_negative_relaxed() elsewhere.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_negative_relaxed(i, v);
#else
        return raw_atomic_add_negative_relaxed(i, v);
#endif
}

/**
 * raw_atomic_long_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_long_t
 * @a: long value to add
 * @u: long value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere.
 *
 * Return: The original value of @v.
 */
static __always_inline long
raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_fetch_add_unless(v, a, u);
#else
        return raw_atomic_fetch_add_unless(v, a, u);
#endif
}

/**
 * raw_atomic_long_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_long_t
 * @a: long value to add
 * @u: long value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_add_unless(v, a, u);
#else
        return raw_atomic_add_unless(v, a, u);
#endif
}

/**
 * raw_atomic_long_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_inc_not_zero(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_not_zero(v);
#else
        return raw_atomic_inc_not_zero(v);
#endif
}

/**
 * raw_atomic_long_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_inc_unless_negative(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_inc_unless_negative(v);
#else
        return raw_atomic_inc_unless_negative(v);
#endif
}

/**
 * raw_atomic_long_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
raw_atomic_long_dec_unless_positive(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_unless_positive(v);
#else
        return raw_atomic_dec_unless_positive(v);
#endif
}

/**
 * raw_atomic_long_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline long
raw_atomic_long_dec_if_positive(atomic_long_t *v)
{
#ifdef CONFIG_64BIT
        return raw_atomic64_dec_if_positive(v);
#else
        return raw_atomic_dec_if_positive(v);
#endif
}

#endif /* _LINUX_ATOMIC_LONG_H */
// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a






































































































    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 

    9 
    9 
    9 
    9 
    9 
    9 
    9 
    9 





    9 


























    9 






    9 

    9 




    9 

    9 
    9 

    9 




    9 
    9 



    9 
    9 
    9 
    9 


    9 







    9 


























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>

#include <asm/ucontext.h>
#include <asm/fpu/signal.h>
#include <asm/sighandling.h>

#include <asm/syscall.h>
#include <asm/sigframe.h>
#include <asm/signal.h>

/*
 * If regs->ss will cause an IRET fault, change it.  Otherwise leave it
 * alone.  Using this generally makes no sense unless
 * user_64bit_mode(regs) would return true.
 */
static void force_valid_ss(struct pt_regs *regs)
{
        u32 ar;
        asm volatile ("lar %[old_ss], %[ar]\n\t"
                      "jz 1f\n\t"                /* If invalid: */
                      "xorl %[ar], %[ar]\n\t"        /* set ar = 0 */
                      "1:"
                      : [ar] "=r" (ar)
                      : [old_ss] "rm" ((u16)regs->ss));

        /*
         * For a valid 64-bit user context, we need DPL 3, type
         * read-write data or read-write exp-down data, and S and P
         * set.  We can't use VERW because VERW doesn't check the
         * P bit.
         */
        ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
        if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
            ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
                regs->ss = __USER_DS;
}

static bool restore_sigcontext(struct pt_regs *regs,
                               struct sigcontext __user *usc,
                               unsigned long uc_flags)
{
        struct sigcontext sc;

        /* Always make any pending restarted system calls return -EINTR */
        current->restart_block.fn = do_no_restart_syscall;

        if (copy_from_user(&sc, usc, offsetof(struct sigcontext, reserved1)))
                return false;

        regs->bx = sc.bx;
        regs->cx = sc.cx;
        regs->dx = sc.dx;
        regs->si = sc.si;
        regs->di = sc.di;
        regs->bp = sc.bp;
        regs->ax = sc.ax;
        regs->sp = sc.sp;
        regs->ip = sc.ip;
        regs->r8 = sc.r8;
        regs->r9 = sc.r9;
        regs->r10 = sc.r10;
        regs->r11 = sc.r11;
        regs->r12 = sc.r12;
        regs->r13 = sc.r13;
        regs->r14 = sc.r14;
        regs->r15 = sc.r15;

        /* Get CS/SS and force CPL3 */
        regs->cs = sc.cs | 0x03;
        regs->ss = sc.ss | 0x03;

        regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
        /* disable syscall checks */
        regs->orig_ax = -1;

        /*
         * Fix up SS if needed for the benefit of old DOSEMU and
         * CRIU.
         */
        if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
                force_valid_ss(regs);

        return fpu__restore_sig((void __user *)sc.fpstate, 0);
}

static __always_inline int
__unsafe_setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask)
{
        unsafe_put_user(regs->di, &sc->di, Efault);
        unsafe_put_user(regs->si, &sc->si, Efault);
        unsafe_put_user(regs->bp, &sc->bp, Efault);
        unsafe_put_user(regs->sp, &sc->sp, Efault);
        unsafe_put_user(regs->bx, &sc->bx, Efault);
        unsafe_put_user(regs->dx, &sc->dx, Efault);
        unsafe_put_user(regs->cx, &sc->cx, Efault);
        unsafe_put_user(regs->ax, &sc->ax, Efault);
        unsafe_put_user(regs->r8, &sc->r8, Efault);
        unsafe_put_user(regs->r9, &sc->r9, Efault);
        unsafe_put_user(regs->r10, &sc->r10, Efault);
        unsafe_put_user(regs->r11, &sc->r11, Efault);
        unsafe_put_user(regs->r12, &sc->r12, Efault);
        unsafe_put_user(regs->r13, &sc->r13, Efault);
        unsafe_put_user(regs->r14, &sc->r14, Efault);
        unsafe_put_user(regs->r15, &sc->r15, Efault);

        unsafe_put_user(current->thread.trap_nr, &sc->trapno, Efault);
        unsafe_put_user(current->thread.error_code, &sc->err, Efault);
        unsafe_put_user(regs->ip, &sc->ip, Efault);
        unsafe_put_user(regs->flags, &sc->flags, Efault);
        unsafe_put_user(regs->cs, &sc->cs, Efault);
        unsafe_put_user(0, &sc->gs, Efault);
        unsafe_put_user(0, &sc->fs, Efault);
        unsafe_put_user(regs->ss, &sc->ss, Efault);

        unsafe_put_user(fpstate, (unsigned long __user *)&sc->fpstate, Efault);

        /* non-iBCS2 extensions.. */
        unsafe_put_user(mask, &sc->oldmask, Efault);
        unsafe_put_user(current->thread.cr2, &sc->cr2, Efault);
        return 0;
Efault:
        return -EFAULT;
}

#define unsafe_put_sigcontext(sc, fp, regs, set, label)                        \
do {                                                                        \
        if (__unsafe_setup_sigcontext(sc, fp, regs, set->sig[0]))        \
                goto label;                                                \
} while(0);

#define unsafe_put_sigmask(set, frame, label) \
        unsafe_put_user(*(__u64 *)(set), \
                        (__u64 __user *)&(frame)->uc.uc_sigmask, \
                        label)

static unsigned long frame_uc_flags(struct pt_regs *regs)
{
        unsigned long flags;

        if (boot_cpu_has(X86_FEATURE_XSAVE))
                flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
        else
                flags = UC_SIGCONTEXT_SS;

        if (likely(user_64bit_mode(regs)))
                flags |= UC_STRICT_RESTORE_SS;

        return flags;
}

int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
        sigset_t *set = sigmask_to_save();
        struct rt_sigframe __user *frame;
        void __user *fp = NULL;
        unsigned long uc_flags;

        /* x86-64 should always use SA_RESTORER. */
        if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
                return -EFAULT;

        frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
        uc_flags = frame_uc_flags(regs);

        if (!user_access_begin(frame, sizeof(*frame)))
                return -EFAULT;

        /* Create the ucontext.  */
        unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
        unsafe_put_user(0, &frame->uc.uc_link, Efault);
        unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);

        /* Set up to return from userspace.  If provided, use a stub
           already in userspace.  */
        unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
        unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
        unsafe_put_sigmask(set, frame, Efault);
        user_access_end();

        if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
                if (copy_siginfo_to_user(&frame->info, &ksig->info))
                        return -EFAULT;
        }

        if (setup_signal_shadow_stack(ksig))
                return -EFAULT;

        /* Set up registers for signal handler */
        regs->di = ksig->sig;
        /* In case the signal handler was declared without prototypes */
        regs->ax = 0;

        /* This also works for non SA_SIGINFO handlers because they expect the
           next argument after the signal number on the stack. */
        regs->si = (unsigned long)&frame->info;
        regs->dx = (unsigned long)&frame->uc;
        regs->ip = (unsigned long) ksig->ka.sa.sa_handler;

        regs->sp = (unsigned long)frame;

        /*
         * Set up the CS and SS registers to run signal handlers in
         * 64-bit mode, even if the handler happens to be interrupting
         * 32-bit or 16-bit code.
         *
         * SS is subtle.  In 64-bit mode, we don't need any particular
         * SS descriptor, but we do need SS to be valid.  It's possible
         * that the old SS is entirely bogus -- this can happen if the
         * signal we're trying to deliver is #GP or #SS caused by a bad
         * SS value.  We also have a compatibility issue here: DOSEMU
         * relies on the contents of the SS register indicating the
         * SS value at the time of the signal, even though that code in
         * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
         * avoids relying on sigreturn to restore SS; instead it uses
         * a trampoline.)  So we do our best: if the old SS was valid,
         * we keep it.  Otherwise we replace it.
         */
        regs->cs = __USER_CS;

        if (unlikely(regs->ss != __USER_DS))
                force_valid_ss(regs);

        return 0;

Efault:
        user_access_end();
        return -EFAULT;
}

/*
 * Do a signal return; undo the signal stack.
 */
SYSCALL_DEFINE0(rt_sigreturn)
{
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe __user *frame;
        sigset_t set;
        unsigned long uc_flags;

        frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
        if (!access_ok(frame, sizeof(*frame)))
                goto badframe;
        if (__get_user(*(__u64 *)&set, (__u64 __user *)&frame->uc.uc_sigmask))
                goto badframe;
        if (__get_user(uc_flags, &frame->uc.uc_flags))
                goto badframe;

        set_current_blocked(&set);

        if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;

        if (restore_signal_shadow_stack())
                goto badframe;

        if (restore_altstack(&frame->uc.uc_stack))
                goto badframe;

        return regs->ax;

badframe:
        signal_fault(regs, frame, "rt_sigreturn");
        return 0;
}

#ifdef CONFIG_X86_X32_ABI
static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to,
                const struct kernel_siginfo *from)
{
        struct compat_siginfo new;

        copy_siginfo_to_external32(&new, from);
        if (from->si_signo == SIGCHLD) {
                new._sifields._sigchld_x32._utime = from->si_utime;
                new._sifields._sigchld_x32._stime = from->si_stime;
        }
        if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
                return -EFAULT;
        return 0;
}

int copy_siginfo_to_user32(struct compat_siginfo __user *to,
                           const struct kernel_siginfo *from)
{
        if (in_x32_syscall())
                return x32_copy_siginfo_to_user(to, from);
        return __copy_siginfo_to_user32(to, from);
}

int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
        compat_sigset_t *set = (compat_sigset_t *) sigmask_to_save();
        struct rt_sigframe_x32 __user *frame;
        unsigned long uc_flags;
        void __user *restorer;
        void __user *fp = NULL;

        if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
                return -EFAULT;

        frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);

        uc_flags = frame_uc_flags(regs);

        if (!user_access_begin(frame, sizeof(*frame)))
                return -EFAULT;

        /* Create the ucontext.  */
        unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
        unsafe_put_user(0, &frame->uc.uc_link, Efault);
        unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);
        unsafe_put_user(0, &frame->uc.uc__pad0, Efault);
        restorer = ksig->ka.sa.sa_restorer;
        unsafe_put_user(restorer, (unsigned long __user *)&frame->pretcode, Efault);
        unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
        unsafe_put_sigmask(set, frame, Efault);
        user_access_end();

        if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
                if (x32_copy_siginfo_to_user(&frame->info, &ksig->info))
                        return -EFAULT;
        }

        /* Set up registers for signal handler */
        regs->sp = (unsigned long) frame;
        regs->ip = (unsigned long) ksig->ka.sa.sa_handler;

        /* We use the x32 calling convention here... */
        regs->di = ksig->sig;
        regs->si = (unsigned long) &frame->info;
        regs->dx = (unsigned long) &frame->uc;

        loadsegment(ds, __USER_DS);
        loadsegment(es, __USER_DS);

        regs->cs = __USER_CS;
        regs->ss = __USER_DS;

        return 0;

Efault:
        user_access_end();
        return -EFAULT;
}

COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
{
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_x32 __user *frame;
        sigset_t set;
        unsigned long uc_flags;

        frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);

        if (!access_ok(frame, sizeof(*frame)))
                goto badframe;
        if (__get_user(set.sig[0], (__u64 __user *)&frame->uc.uc_sigmask))
                goto badframe;
        if (__get_user(uc_flags, &frame->uc.uc_flags))
                goto badframe;

        set_current_blocked(&set);

        if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;

        if (compat_restore_altstack(&frame->uc.uc_stack))
                goto badframe;

        return regs->ax;

badframe:
        signal_fault(regs, frame, "x32 rt_sigreturn");
        return 0;
}
#endif /* CONFIG_X86_X32_ABI */

#ifdef CONFIG_COMPAT
void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
{
        if (!act)
                return;

        if (in_ia32_syscall())
                act->sa.sa_flags |= SA_IA32_ABI;
        if (in_x32_syscall())
                act->sa.sa_flags |= SA_X32_ABI;
}
#endif /* CONFIG_COMPAT */

/*
* If adding a new si_code, there is probably new data in
* the siginfo.  Make sure folks bumping the si_code
* limits also have to look at this code.  Make sure any
* new fields are handled in copy_siginfo_to_user32()!
*/
static_assert(NSIGILL  == 11);
static_assert(NSIGFPE  == 15);
static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS  == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
static_assert(NSIGSYS  == 2);

/* This is part of the ABI and can never change in size: */
static_assert(sizeof(siginfo_t) == 128);

/* This is a part of the ABI and can never change in alignment */
static_assert(__alignof__(siginfo_t) == 8);

/*
* The offsets of all the (unioned) si_fields are fixed
* in the ABI, of course.  Make sure none of them ever
* move and are always at the beginning:
*/
static_assert(offsetof(siginfo_t, si_signo) == 0);
static_assert(offsetof(siginfo_t, si_errno) == 4);
static_assert(offsetof(siginfo_t, si_code)  == 8);

/*
* Ensure that the size of each si_field never changes.
* If it does, it is a sign that the
* copy_siginfo_to_user32() code below needs to updated
* along with the size in the CHECK_SI_SIZE().
*
* We repeat this check for both the generic and compat
* siginfos.
*
* Note: it is OK for these to grow as long as the whole
* structure stays within the padding size (checked
* above).
*/

#define CHECK_SI_OFFSET(name)                                                \
        static_assert(offsetof(siginfo_t, _sifields) ==                 \
                      offsetof(siginfo_t, _sifields.name))
#define CHECK_SI_SIZE(name, size)                                        \
        static_assert(sizeof_field(siginfo_t, _sifields.name) == size)

CHECK_SI_OFFSET(_kill);
CHECK_SI_SIZE  (_kill, 2*sizeof(int));
static_assert(offsetof(siginfo_t, si_pid) == 0x10);
static_assert(offsetof(siginfo_t, si_uid) == 0x14);

CHECK_SI_OFFSET(_timer);
CHECK_SI_SIZE  (_timer, 6*sizeof(int));
static_assert(offsetof(siginfo_t, si_tid)     == 0x10);
static_assert(offsetof(siginfo_t, si_overrun) == 0x14);
static_assert(offsetof(siginfo_t, si_value)   == 0x18);

CHECK_SI_OFFSET(_rt);
CHECK_SI_SIZE  (_rt, 4*sizeof(int));
static_assert(offsetof(siginfo_t, si_pid)   == 0x10);
static_assert(offsetof(siginfo_t, si_uid)   == 0x14);
static_assert(offsetof(siginfo_t, si_value) == 0x18);

CHECK_SI_OFFSET(_sigchld);
CHECK_SI_SIZE  (_sigchld, 8*sizeof(int));
static_assert(offsetof(siginfo_t, si_pid)    == 0x10);
static_assert(offsetof(siginfo_t, si_uid)    == 0x14);
static_assert(offsetof(siginfo_t, si_status) == 0x18);
static_assert(offsetof(siginfo_t, si_utime)  == 0x20);
static_assert(offsetof(siginfo_t, si_stime)  == 0x28);

#ifdef CONFIG_X86_X32_ABI
/* no _sigchld_x32 in the generic siginfo_t */
static_assert(sizeof_field(compat_siginfo_t, _sifields._sigchld_x32) ==
              7*sizeof(int));
static_assert(offsetof(compat_siginfo_t, _sifields) ==
              offsetof(compat_siginfo_t, _sifields._sigchld_x32));
static_assert(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime)  == 0x18);
static_assert(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime)  == 0x20);
#endif

CHECK_SI_OFFSET(_sigfault);
CHECK_SI_SIZE  (_sigfault, 8*sizeof(int));
static_assert(offsetof(siginfo_t, si_addr)        == 0x10);

static_assert(offsetof(siginfo_t, si_trapno)        == 0x18);

static_assert(offsetof(siginfo_t, si_addr_lsb)        == 0x18);

static_assert(offsetof(siginfo_t, si_lower)        == 0x20);
static_assert(offsetof(siginfo_t, si_upper)        == 0x28);

static_assert(offsetof(siginfo_t, si_pkey)        == 0x20);

static_assert(offsetof(siginfo_t, si_perf_data)         == 0x18);
static_assert(offsetof(siginfo_t, si_perf_type)         == 0x20);
static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);

CHECK_SI_OFFSET(_sigpoll);
CHECK_SI_SIZE  (_sigpoll, 4*sizeof(int));
static_assert(offsetof(siginfo_t, si_band) == 0x10);
static_assert(offsetof(siginfo_t, si_fd)   == 0x18);

CHECK_SI_OFFSET(_sigsys);
CHECK_SI_SIZE  (_sigsys, 4*sizeof(int));
static_assert(offsetof(siginfo_t, si_call_addr) == 0x10);
static_assert(offsetof(siginfo_t, si_syscall)   == 0x18);
static_assert(offsetof(siginfo_t, si_arch)      == 0x1C);

/* any new si_fields should be added here */







































   12 














































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * V4L2 controls framework Request API implementation.
 *
 * Copyright (C) 2018-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
 */

#define pr_fmt(fmt) "v4l2-ctrls: " fmt

#include <linux/export.h>
#include <linux/slab.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-dev.h>
#include <media/v4l2-ioctl.h>

#include "v4l2-ctrls-priv.h"

/* Initialize the request-related fields in a control handler */
void v4l2_ctrl_handler_init_request(struct v4l2_ctrl_handler *hdl)
{
        INIT_LIST_HEAD(&hdl->requests);
        INIT_LIST_HEAD(&hdl->requests_queued);
        hdl->request_is_queued = false;
        media_request_object_init(&hdl->req_obj);
}

/* Free the request-related fields in a control handler */
void v4l2_ctrl_handler_free_request(struct v4l2_ctrl_handler *hdl)
{
        struct v4l2_ctrl_handler *req, *next_req;

        /*
         * Do nothing if this isn't the main handler or the main
         * handler is not used in any request.
         *
         * The main handler can be identified by having a NULL ops pointer in
         * the request object.
         */
        if (hdl->req_obj.ops || list_empty(&hdl->requests))
                return;

        /*
         * If the main handler is freed and it is used by handler objects in
         * outstanding requests, then unbind and put those objects before
         * freeing the main handler.
         */
        list_for_each_entry_safe(req, next_req, &hdl->requests, requests) {
                media_request_object_unbind(&req->req_obj);
                media_request_object_put(&req->req_obj);
        }
}

static int v4l2_ctrl_request_clone(struct v4l2_ctrl_handler *hdl,
                                   const struct v4l2_ctrl_handler *from)
{
        struct v4l2_ctrl_ref *ref;
        int err = 0;

        if (WARN_ON(!hdl || hdl == from))
                return -EINVAL;

        if (hdl->error)
                return hdl->error;

        WARN_ON(hdl->lock != &hdl->_lock);

        mutex_lock(from->lock);
        list_for_each_entry(ref, &from->ctrl_refs, node) {
                struct v4l2_ctrl *ctrl = ref->ctrl;
                struct v4l2_ctrl_ref *new_ref;

                /* Skip refs inherited from other devices */
                if (ref->from_other_dev)
                        continue;
                err = handler_new_ref(hdl, ctrl, &new_ref, false, true);
                if (err)
                        break;
        }
        mutex_unlock(from->lock);
        return err;
}

static void v4l2_ctrl_request_queue(struct media_request_object *obj)
{
        struct v4l2_ctrl_handler *hdl =
                container_of(obj, struct v4l2_ctrl_handler, req_obj);
        struct v4l2_ctrl_handler *main_hdl = obj->priv;

        mutex_lock(main_hdl->lock);
        list_add_tail(&hdl->requests_queued, &main_hdl->requests_queued);
        hdl->request_is_queued = true;
        mutex_unlock(main_hdl->lock);
}

static void v4l2_ctrl_request_unbind(struct media_request_object *obj)
{
        struct v4l2_ctrl_handler *hdl =
                container_of(obj, struct v4l2_ctrl_handler, req_obj);
        struct v4l2_ctrl_handler *main_hdl = obj->priv;

        mutex_lock(main_hdl->lock);
        list_del_init(&hdl->requests);
        if (hdl->request_is_queued) {
                list_del_init(&hdl->requests_queued);
                hdl->request_is_queued = false;
        }
        mutex_unlock(main_hdl->lock);
}

static void v4l2_ctrl_request_release(struct media_request_object *obj)
{
        struct v4l2_ctrl_handler *hdl =
                container_of(obj, struct v4l2_ctrl_handler, req_obj);

        v4l2_ctrl_handler_free(hdl);
        kfree(hdl);
}

static const struct media_request_object_ops req_ops = {
        .queue = v4l2_ctrl_request_queue,
        .unbind = v4l2_ctrl_request_unbind,
        .release = v4l2_ctrl_request_release,
};

struct v4l2_ctrl_handler *v4l2_ctrl_request_hdl_find(struct media_request *req,
                                                     struct v4l2_ctrl_handler *parent)
{
        struct media_request_object *obj;

        if (WARN_ON(req->state != MEDIA_REQUEST_STATE_VALIDATING &&
                    req->state != MEDIA_REQUEST_STATE_QUEUED))
                return NULL;

        obj = media_request_object_find(req, &req_ops, parent);
        if (obj)
                return container_of(obj, struct v4l2_ctrl_handler, req_obj);
        return NULL;
}
EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_find);

struct v4l2_ctrl *
v4l2_ctrl_request_hdl_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
{
        struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);

        return (ref && ref->p_req_valid) ? ref->ctrl : NULL;
}
EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_ctrl_find);

static int v4l2_ctrl_request_bind(struct media_request *req,
                                  struct v4l2_ctrl_handler *hdl,
                                  struct v4l2_ctrl_handler *from)
{
        int ret;

        ret = v4l2_ctrl_request_clone(hdl, from);

        if (!ret) {
                ret = media_request_object_bind(req, &req_ops,
                                                from, false, &hdl->req_obj);
                if (!ret) {
                        mutex_lock(from->lock);
                        list_add_tail(&hdl->requests, &from->requests);
                        mutex_unlock(from->lock);
                }
        }
        return ret;
}

static struct media_request_object *
v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
                        struct media_request *req, bool set)
{
        struct media_request_object *obj;
        struct v4l2_ctrl_handler *new_hdl;
        int ret;

        if (IS_ERR(req))
                return ERR_CAST(req);

        if (set && WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING))
                return ERR_PTR(-EBUSY);

        obj = media_request_object_find(req, &req_ops, hdl);
        if (obj)
                return obj;
        /*
         * If there are no controls in this completed request,
         * then that can only happen if:
         *
         * 1) no controls were present in the queued request, and
         * 2) v4l2_ctrl_request_complete() could not allocate a
         *    control handler object to store the completed state in.
         *
         * So return ENOMEM to indicate that there was an out-of-memory
         * error.
         */
        if (!set)
                return ERR_PTR(-ENOMEM);

        new_hdl = kzalloc(sizeof(*new_hdl), GFP_KERNEL);
        if (!new_hdl)
                return ERR_PTR(-ENOMEM);

        obj = &new_hdl->req_obj;
        ret = v4l2_ctrl_handler_init(new_hdl, (hdl->nr_of_buckets - 1) * 8);
        if (!ret)
                ret = v4l2_ctrl_request_bind(req, new_hdl, hdl);
        if (ret) {
                v4l2_ctrl_handler_free(new_hdl);
                kfree(new_hdl);
                return ERR_PTR(ret);
        }

        media_request_object_get(obj);
        return obj;
}

int v4l2_g_ext_ctrls_request(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
                             struct media_device *mdev, struct v4l2_ext_controls *cs)
{
        struct media_request_object *obj = NULL;
        struct media_request *req = NULL;
        int ret;

        if (!mdev || cs->request_fd < 0)
                return -EINVAL;

        req = media_request_get_by_fd(mdev, cs->request_fd);
        if (IS_ERR(req))
                return PTR_ERR(req);

        if (req->state != MEDIA_REQUEST_STATE_COMPLETE) {
                media_request_put(req);
                return -EACCES;
        }

        ret = media_request_lock_for_access(req);
        if (ret) {
                media_request_put(req);
                return ret;
        }

        obj = v4l2_ctrls_find_req_obj(hdl, req, false);
        if (IS_ERR(obj)) {
                media_request_unlock_for_access(req);
                media_request_put(req);
                return PTR_ERR(obj);
        }

        hdl = container_of(obj, struct v4l2_ctrl_handler,
                           req_obj);
        ret = v4l2_g_ext_ctrls_common(hdl, cs, vdev);

        media_request_unlock_for_access(req);
        media_request_object_put(obj);
        media_request_put(req);
        return ret;
}

int try_set_ext_ctrls_request(struct v4l2_fh *fh,
                              struct v4l2_ctrl_handler *hdl,
                              struct video_device *vdev,
                              struct media_device *mdev,
                              struct v4l2_ext_controls *cs, bool set)
{
        struct media_request_object *obj = NULL;
        struct media_request *req = NULL;
        int ret;

        if (!mdev) {
                dprintk(vdev, "%s: missing media device\n",
                        video_device_node_name(vdev));
                return -EINVAL;
        }

        if (cs->request_fd < 0) {
                dprintk(vdev, "%s: invalid request fd %d\n",
                        video_device_node_name(vdev), cs->request_fd);
                return -EINVAL;
        }

        req = media_request_get_by_fd(mdev, cs->request_fd);
        if (IS_ERR(req)) {
                dprintk(vdev, "%s: cannot find request fd %d\n",
                        video_device_node_name(vdev), cs->request_fd);
                return PTR_ERR(req);
        }

        ret = media_request_lock_for_update(req);
        if (ret) {
                dprintk(vdev, "%s: cannot lock request fd %d\n",
                        video_device_node_name(vdev), cs->request_fd);
                media_request_put(req);
                return ret;
        }

        obj = v4l2_ctrls_find_req_obj(hdl, req, set);
        if (IS_ERR(obj)) {
                dprintk(vdev,
                        "%s: cannot find request object for request fd %d\n",
                        video_device_node_name(vdev),
                        cs->request_fd);
                media_request_unlock_for_update(req);
                media_request_put(req);
                return PTR_ERR(obj);
        }

        hdl = container_of(obj, struct v4l2_ctrl_handler,
                           req_obj);
        ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set);
        if (ret)
                dprintk(vdev,
                        "%s: try_set_ext_ctrls_common failed (%d)\n",
                        video_device_node_name(vdev), ret);

        media_request_unlock_for_update(req);
        media_request_object_put(obj);
        media_request_put(req);

        return ret;
}

void v4l2_ctrl_request_complete(struct media_request *req,
                                struct v4l2_ctrl_handler *main_hdl)
{
        struct media_request_object *obj;
        struct v4l2_ctrl_handler *hdl;
        struct v4l2_ctrl_ref *ref;

        if (!req || !main_hdl)
                return;

        /*
         * Note that it is valid if nothing was found. It means
         * that this request doesn't have any controls and so just
         * wants to leave the controls unchanged.
         */
        obj = media_request_object_find(req, &req_ops, main_hdl);
        if (!obj) {
                int ret;

                /* Create a new request so the driver can return controls */
                hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
                if (!hdl)
                        return;

                ret = v4l2_ctrl_handler_init(hdl, (main_hdl->nr_of_buckets - 1) * 8);
                if (!ret)
                        ret = v4l2_ctrl_request_bind(req, hdl, main_hdl);
                if (ret) {
                        v4l2_ctrl_handler_free(hdl);
                        kfree(hdl);
                        return;
                }
                hdl->request_is_queued = true;
                obj = media_request_object_find(req, &req_ops, main_hdl);
        }
        hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);

        list_for_each_entry(ref, &hdl->ctrl_refs, node) {
                struct v4l2_ctrl *ctrl = ref->ctrl;
                struct v4l2_ctrl *master = ctrl->cluster[0];
                unsigned int i;

                if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
                        v4l2_ctrl_lock(master);
                        /* g_volatile_ctrl will update the current control values */
                        for (i = 0; i < master->ncontrols; i++)
                                cur_to_new(master->cluster[i]);
                        call_op(master, g_volatile_ctrl);
                        new_to_req(ref);
                        v4l2_ctrl_unlock(master);
                        continue;
                }
                if (ref->p_req_valid)
                        continue;

                /* Copy the current control value into the request */
                v4l2_ctrl_lock(ctrl);
                cur_to_req(ref);
                v4l2_ctrl_unlock(ctrl);
        }

        mutex_lock(main_hdl->lock);
        WARN_ON(!hdl->request_is_queued);
        list_del_init(&hdl->requests_queued);
        hdl->request_is_queued = false;
        mutex_unlock(main_hdl->lock);
        media_request_object_complete(obj);
        media_request_object_put(obj);
}
EXPORT_SYMBOL(v4l2_ctrl_request_complete);

int v4l2_ctrl_request_setup(struct media_request *req,
                            struct v4l2_ctrl_handler *main_hdl)
{
        struct media_request_object *obj;
        struct v4l2_ctrl_handler *hdl;
        struct v4l2_ctrl_ref *ref;
        int ret = 0;

        if (!req || !main_hdl)
                return 0;

        if (WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED))
                return -EBUSY;

        /*
         * Note that it is valid if nothing was found. It means
         * that this request doesn't have any controls and so just
         * wants to leave the controls unchanged.
         */
        obj = media_request_object_find(req, &req_ops, main_hdl);
        if (!obj)
                return 0;
        if (obj->completed) {
                media_request_object_put(obj);
                return -EBUSY;
        }
        hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);

        list_for_each_entry(ref, &hdl->ctrl_refs, node)
                ref->req_done = false;

        list_for_each_entry(ref, &hdl->ctrl_refs, node) {
                struct v4l2_ctrl *ctrl = ref->ctrl;
                struct v4l2_ctrl *master = ctrl->cluster[0];
                bool have_new_data = false;
                int i;

                /*
                 * Skip if this control was already handled by a cluster.
                 * Skip button controls and read-only controls.
                 */
                if (ref->req_done || (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
                        continue;

                v4l2_ctrl_lock(master);
                for (i = 0; i < master->ncontrols; i++) {
                        if (master->cluster[i]) {
                                struct v4l2_ctrl_ref *r =
                                        find_ref(hdl, master->cluster[i]->id);

                                if (r->p_req_valid) {
                                        have_new_data = true;
                                        break;
                                }
                        }
                }
                if (!have_new_data) {
                        v4l2_ctrl_unlock(master);
                        continue;
                }

                for (i = 0; i < master->ncontrols; i++) {
                        if (master->cluster[i]) {
                                struct v4l2_ctrl_ref *r =
                                        find_ref(hdl, master->cluster[i]->id);

                                ret = req_to_new(r);
                                if (ret) {
                                        v4l2_ctrl_unlock(master);
                                        goto error;
                                }
                                master->cluster[i]->is_new = 1;
                                r->req_done = true;
                        }
                }
                /*
                 * For volatile autoclusters that are currently in auto mode
                 * we need to discover if it will be set to manual mode.
                 * If so, then we have to copy the current volatile values
                 * first since those will become the new manual values (which
                 * may be overwritten by explicit new values from this set
                 * of controls).
                 */
                if (master->is_auto && master->has_volatiles &&
                    !is_cur_manual(master)) {
                        s32 new_auto_val = *master->p_new.p_s32;

                        /*
                         * If the new value == the manual value, then copy
                         * the current volatile values.
                         */
                        if (new_auto_val == master->manual_mode_value)
                                update_from_auto_cluster(master);
                }

                ret = try_or_set_cluster(NULL, master, true, 0);
                v4l2_ctrl_unlock(master);

                if (ret)
                        break;
        }

error:
        media_request_object_put(obj);
        return ret;
}
EXPORT_SYMBOL(v4l2_ctrl_request_setup);















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Portions of this file
 * Copyright(c) 2016-2017 Intel Deutschland GmbH
 * Copyright (C) 2018, 2020-2024 Intel Corporation
 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg80211

#if !defined(__RDEV_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ)
#define __RDEV_OPS_TRACE

#include <linux/tracepoint.h>

#include <linux/rtnetlink.h>
#include <linux/etherdevice.h>
#include <net/cfg80211.h>
#include "core.h"

#define MAC_ENTRY(entry_mac) __array(u8, entry_mac, ETH_ALEN)
#define MAC_ASSIGN(entry_mac, given_mac) do {                             \
        if (given_mac)                                                     \
                memcpy(__entry->entry_mac, given_mac, ETH_ALEN);     \
        else                                                             \
                eth_zero_addr(__entry->entry_mac);                     \
        } while (0)

#define MAXNAME                32
#define WIPHY_ENTRY        __array(char, wiphy_name, 32)
#define WIPHY_ASSIGN        strscpy(__entry->wiphy_name, wiphy_name(wiphy), MAXNAME)
#define WIPHY_PR_FMT        "%s"
#define WIPHY_PR_ARG        __entry->wiphy_name

#define WDEV_ENTRY        __field(u32, id)
#define WDEV_ASSIGN        (__entry->id) = (!IS_ERR_OR_NULL(wdev)        \
                                         ? wdev->identifier : 0)
#define WDEV_PR_FMT        "wdev(%u)"
#define WDEV_PR_ARG        (__entry->id)

#define NETDEV_ENTRY        __array(char, name, IFNAMSIZ) \
                        __field(int, ifindex)
#define NETDEV_ASSIGN                                               \
        do {                                                       \
                memcpy(__entry->name, netdev->name, IFNAMSIZ); \
                (__entry->ifindex) = (netdev->ifindex);               \
        } while (0)
#define NETDEV_PR_FMT        "netdev:%s(%d)"
#define NETDEV_PR_ARG        __entry->name, __entry->ifindex

#define MESH_CFG_ENTRY __field(u16, dot11MeshRetryTimeout)                   \
                       __field(u16, dot11MeshConfirmTimeout)                   \
                       __field(u16, dot11MeshHoldingTimeout)                   \
                       __field(u16, dot11MeshMaxPeerLinks)                   \
                       __field(u8, dot11MeshMaxRetries)                           \
                       __field(u8, dot11MeshTTL)                           \
                       __field(u8, element_ttl)                                   \
                       __field(bool, auto_open_plinks)                           \
                       __field(u32, dot11MeshNbrOffsetMaxNeighbor)           \
                       __field(u8, dot11MeshHWMPmaxPREQretries)                   \
                       __field(u32, path_refresh_time)                           \
                       __field(u32, dot11MeshHWMPactivePathTimeout)           \
                       __field(u16, min_discovery_timeout)                   \
                       __field(u16, dot11MeshHWMPpreqMinInterval)           \
                       __field(u16, dot11MeshHWMPperrMinInterval)           \
                       __field(u16, dot11MeshHWMPnetDiameterTraversalTime) \
                       __field(u8, dot11MeshHWMPRootMode)                   \
                       __field(u16, dot11MeshHWMPRannInterval)                   \
                       __field(bool, dot11MeshGateAnnouncementProtocol)           \
                       __field(bool, dot11MeshForwarding)                   \
                       __field(s32, rssi_threshold)                           \
                       __field(u16, ht_opmode)                                   \
                       __field(u32, dot11MeshHWMPactivePathToRootTimeout)  \
                       __field(u16, dot11MeshHWMProotInterval)                   \
                       __field(u16, dot11MeshHWMPconfirmationInterval)           \
                       __field(bool, dot11MeshNolearn)
#define MESH_CFG_ASSIGN                                                              \
        do {                                                                      \
                __entry->dot11MeshRetryTimeout = conf->dot11MeshRetryTimeout; \
                __entry->dot11MeshConfirmTimeout =                              \
                                conf->dot11MeshConfirmTimeout;                      \
                __entry->dot11MeshHoldingTimeout =                              \
                                conf->dot11MeshHoldingTimeout;                      \
                __entry->dot11MeshMaxPeerLinks = conf->dot11MeshMaxPeerLinks; \
                __entry->dot11MeshMaxRetries = conf->dot11MeshMaxRetries;     \
                __entry->dot11MeshTTL = conf->dot11MeshTTL;                      \
                __entry->element_ttl = conf->element_ttl;                      \
                __entry->auto_open_plinks = conf->auto_open_plinks;              \
                __entry->dot11MeshNbrOffsetMaxNeighbor =                      \
                                conf->dot11MeshNbrOffsetMaxNeighbor;              \
                __entry->dot11MeshHWMPmaxPREQretries =                              \
                                conf->dot11MeshHWMPmaxPREQretries;              \
                __entry->path_refresh_time = conf->path_refresh_time;              \
                __entry->dot11MeshHWMPactivePathTimeout =                      \
                                conf->dot11MeshHWMPactivePathTimeout;              \
                __entry->min_discovery_timeout = conf->min_discovery_timeout; \
                __entry->dot11MeshHWMPpreqMinInterval =                              \
                                conf->dot11MeshHWMPpreqMinInterval;              \
                __entry->dot11MeshHWMPperrMinInterval =                              \
                                conf->dot11MeshHWMPperrMinInterval;              \
                __entry->dot11MeshHWMPnetDiameterTraversalTime =              \
                                conf->dot11MeshHWMPnetDiameterTraversalTime;  \
                __entry->dot11MeshHWMPRootMode = conf->dot11MeshHWMPRootMode; \
                __entry->dot11MeshHWMPRannInterval =                              \
                                conf->dot11MeshHWMPRannInterval;              \
                __entry->dot11MeshGateAnnouncementProtocol =                      \
                                conf->dot11MeshGateAnnouncementProtocol;      \
                __entry->dot11MeshForwarding = conf->dot11MeshForwarding;     \
                __entry->rssi_threshold = conf->rssi_threshold;                      \
                __entry->ht_opmode = conf->ht_opmode;                              \
                __entry->dot11MeshHWMPactivePathToRootTimeout =                      \
                                conf->dot11MeshHWMPactivePathToRootTimeout;   \
                __entry->dot11MeshHWMProotInterval =                              \
                                conf->dot11MeshHWMProotInterval;              \
                __entry->dot11MeshHWMPconfirmationInterval =                      \
                                conf->dot11MeshHWMPconfirmationInterval;      \
                __entry->dot11MeshNolearn = conf->dot11MeshNolearn;              \
        } while (0)

#define CHAN_ENTRY __field(enum nl80211_band, band) \
                   __field(u32, center_freq)                \
                   __field(u16, freq_offset)
#define CHAN_ASSIGN(chan)                                          \
        do {                                                          \
                if (chan) {                                          \
                        __entry->band = chan->band;                  \
                        __entry->center_freq = chan->center_freq; \
                        __entry->freq_offset = chan->freq_offset; \
                } else {                                          \
                        __entry->band = 0;                          \
                        __entry->center_freq = 0;                  \
                        __entry->freq_offset = 0;                  \
                }                                                  \
        } while (0)
#define CHAN_PR_FMT "band: %d, freq: %u.%03u"
#define CHAN_PR_ARG __entry->band, __entry->center_freq, __entry->freq_offset

#define CHAN_DEF_ENTRY __field(enum nl80211_band, band)                \
                       __field(u32, control_freq)                        \
                       __field(u32, freq_offset)                        \
                       __field(u32, width)                                \
                       __field(u32, center_freq1)                        \
                       __field(u32, freq1_offset)                        \
                       __field(u32, center_freq2)                        \
                       __field(u16, punctured)
#define CHAN_DEF_ASSIGN(chandef)                                        \
        do {                                                                \
                if ((chandef) && (chandef)->chan) {                        \
                        __entry->band = (chandef)->chan->band;                \
                        __entry->control_freq =                                \
                                (chandef)->chan->center_freq;                \
                        __entry->freq_offset =                                \
                                (chandef)->chan->freq_offset;                \
                        __entry->width = (chandef)->width;                \
                        __entry->center_freq1 = (chandef)->center_freq1;\
                        __entry->freq1_offset = (chandef)->freq1_offset;\
                        __entry->center_freq2 = (chandef)->center_freq2;\
                        __entry->punctured = (chandef)->punctured;        \
                } else {                                                \
                        __entry->band = 0;                                \
                        __entry->control_freq = 0;                        \
                        __entry->freq_offset = 0;                        \
                        __entry->width = 0;                                \
                        __entry->center_freq1 = 0;                        \
                        __entry->freq1_offset = 0;                        \
                        __entry->center_freq2 = 0;                        \
                        __entry->punctured = 0;                                \
                }                                                        \
        } while (0)
#define CHAN_DEF_PR_FMT                                                        \
        "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u, punct: 0x%x"
#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq,                \
                        __entry->freq_offset, __entry->width,                \
                        __entry->center_freq1, __entry->freq1_offset,        \
                        __entry->center_freq2, __entry->punctured

#define FILS_AAD_ASSIGN(fa)                                                \
        do {                                                                \
                if (fa) {                                                \
                        ether_addr_copy(__entry->macaddr, fa->macaddr);        \
                        __entry->kek_len = fa->kek_len;                        \
                } else {                                                \
                        eth_zero_addr(__entry->macaddr);                \
                        __entry->kek_len = 0;                                \
                }                                                        \
        } while (0)
#define FILS_AAD_PR_FMT                                                        \
        "macaddr: %pM, kek_len: %d"

#define SINFO_ENTRY __field(int, generation)            \
                    __field(u32, connected_time)    \
                    __field(u32, inactive_time)            \
                    __field(u32, rx_bytes)            \
                    __field(u32, tx_bytes)            \
                    __field(u32, rx_packets)            \
                    __field(u32, tx_packets)            \
                    __field(u32, tx_retries)            \
                    __field(u32, tx_failed)            \
                    __field(u32, rx_dropped_misc)   \
                    __field(u32, beacon_loss_count) \
                    __field(u16, llid)                    \
                    __field(u16, plid)                    \
                    __field(u8, plink_state)
#define SINFO_ASSIGN                                                       \
        do {                                                               \
                __entry->generation = sinfo->generation;               \
                __entry->connected_time = sinfo->connected_time;       \
                __entry->inactive_time = sinfo->inactive_time;               \
                __entry->rx_bytes = sinfo->rx_bytes;                       \
                __entry->tx_bytes = sinfo->tx_bytes;                       \
                __entry->rx_packets = sinfo->rx_packets;               \
                __entry->tx_packets = sinfo->tx_packets;               \
                __entry->tx_retries = sinfo->tx_retries;               \
                __entry->tx_failed = sinfo->tx_failed;                       \
                __entry->rx_dropped_misc = sinfo->rx_dropped_misc;     \
                __entry->beacon_loss_count = sinfo->beacon_loss_count; \
                __entry->llid = sinfo->llid;                               \
                __entry->plid = sinfo->plid;                               \
                __entry->plink_state = sinfo->plink_state;               \
        } while (0)

#define BOOL_TO_STR(bo) (bo) ? "true" : "false"

#define QOS_MAP_ENTRY __field(u8, num_des)                        \
                      __array(u8, dscp_exception,                \
                              2 * IEEE80211_QOS_MAP_MAX_EX)        \
                      __array(u8, up, IEEE80211_QOS_MAP_LEN_MIN)
#define QOS_MAP_ASSIGN(qos_map)                                        \
        do {                                                        \
                if ((qos_map)) {                                \
                        __entry->num_des = (qos_map)->num_des;        \
                        memcpy(__entry->dscp_exception,                \
                               &(qos_map)->dscp_exception,        \
                               2 * IEEE80211_QOS_MAP_MAX_EX);        \
                        memcpy(__entry->up, &(qos_map)->up,        \
                               IEEE80211_QOS_MAP_LEN_MIN);        \
                } else {                                        \
                        __entry->num_des = 0;                        \
                        memset(__entry->dscp_exception, 0,        \
                               2 * IEEE80211_QOS_MAP_MAX_EX);        \
                        memset(__entry->up, 0,                        \
                               IEEE80211_QOS_MAP_LEN_MIN);        \
                }                                                \
        } while (0)

/*************************************************************
 *                        rdev->ops traces                     *
 *************************************************************/

TRACE_EVENT(rdev_suspend,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_wowlan *wow),
        TP_ARGS(wiphy, wow),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(bool, any)
                __field(bool, disconnect)
                __field(bool, magic_pkt)
                __field(bool, gtk_rekey_failure)
                __field(bool, eap_identity_req)
                __field(bool, four_way_handshake)
                __field(bool, rfkill_release)
                __field(bool, valid_wow)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                if (wow) {
                        __entry->any = wow->any;
                        __entry->disconnect = wow->disconnect;
                        __entry->magic_pkt = wow->magic_pkt;
                        __entry->gtk_rekey_failure = wow->gtk_rekey_failure;
                        __entry->eap_identity_req = wow->eap_identity_req;
                        __entry->four_way_handshake = wow->four_way_handshake;
                        __entry->rfkill_release = wow->rfkill_release;
                        __entry->valid_wow = true;
                } else {
                        __entry->valid_wow = false;
                }
        ),
        TP_printk(WIPHY_PR_FMT ", wow%s - any: %d, disconnect: %d, "
                  "magic pkt: %d, gtk rekey failure: %d, eap identify req: %d, "
                  "four way handshake: %d, rfkill release: %d.",
                  WIPHY_PR_ARG, __entry->valid_wow ? "" : "(Not configured!)",
                  __entry->any, __entry->disconnect, __entry->magic_pkt,
                  __entry->gtk_rekey_failure, __entry->eap_identity_req,
                  __entry->four_way_handshake, __entry->rfkill_release)
);

TRACE_EVENT(rdev_return_int,
        TP_PROTO(struct wiphy *wiphy, int ret),
        TP_ARGS(wiphy, ret),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->ret = ret;
        ),
        TP_printk(WIPHY_PR_FMT ", returned: %d", WIPHY_PR_ARG, __entry->ret)
);

TRACE_EVENT(rdev_scan,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_scan_request *request),
        TP_ARGS(wiphy, request),
        TP_STRUCT__entry(
                WIPHY_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG)
);

DECLARE_EVENT_CLASS(wiphy_only_evt,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy),
        TP_STRUCT__entry(
                WIPHY_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG)
);

DEFINE_EVENT(wiphy_only_evt, rdev_resume,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy)
);

DEFINE_EVENT(wiphy_only_evt, rdev_return_void,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy)
);

DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy)
);

DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy)
);

DECLARE_EVENT_CLASS(wiphy_enabled_evt,
        TP_PROTO(struct wiphy *wiphy, bool enabled),
        TP_ARGS(wiphy, enabled),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(bool, enabled)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->enabled = enabled;
        ),
        TP_printk(WIPHY_PR_FMT ", %senabled ",
                  WIPHY_PR_ARG, __entry->enabled ? "" : "not ")
);

DEFINE_EVENT(wiphy_enabled_evt, rdev_set_wakeup,
        TP_PROTO(struct wiphy *wiphy, bool enabled),
        TP_ARGS(wiphy, enabled)
);

TRACE_EVENT(rdev_add_virtual_intf,
        TP_PROTO(struct wiphy *wiphy, char *name, enum nl80211_iftype type),
        TP_ARGS(wiphy, name, type),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __string(vir_intf_name, name ? name : "<noname>")
                __field(enum nl80211_iftype, type)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __assign_str(vir_intf_name, name ? name : "<noname>");
                __entry->type = type;
        ),
        TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d",
                  WIPHY_PR_ARG, __get_str(vir_intf_name), __entry->type)
);

DECLARE_EVENT_CLASS(wiphy_wdev_evt,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
);

DECLARE_EVENT_CLASS(wiphy_wdev_cookie_evt,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %lld",
                  WIPHY_PR_ARG, WDEV_PR_ARG,
                  (unsigned long long)__entry->cookie)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_return_wdev,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_del_virtual_intf,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_change_virtual_intf,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 enum nl80211_iftype type),
        TP_ARGS(wiphy, netdev, type),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(enum nl80211_iftype, type)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->type = type;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", type: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->type)
);

DECLARE_EVENT_CLASS(key_handle,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index, bool pairwise, const u8 *mac_addr),
        TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(mac_addr)
                __field(int, link_id)
                __field(u8, key_index)
                __field(bool, pairwise)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(mac_addr, mac_addr);
                __entry->link_id = link_id;
                __entry->key_index = key_index;
                __entry->pairwise = pairwise;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
                  "key_index: %u, pairwise: %s, mac addr: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
                  __entry->key_index, BOOL_TO_STR(__entry->pairwise),
                  __entry->mac_addr)
);

DEFINE_EVENT(key_handle, rdev_get_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index, bool pairwise, const u8 *mac_addr),
        TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr)
);

DEFINE_EVENT(key_handle, rdev_del_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index, bool pairwise, const u8 *mac_addr),
        TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr)
);

TRACE_EVENT(rdev_add_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode),
        TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(mac_addr)
                __field(int, link_id)
                __field(u8, key_index)
                __field(bool, pairwise)
                __field(u8, mode)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(mac_addr, mac_addr);
                __entry->link_id = link_id;
                __entry->key_index = key_index;
                __entry->pairwise = pairwise;
                __entry->mode = mode;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
                  "key_index: %u, mode: %u, pairwise: %s, "
                  "mac addr: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
                  __entry->key_index, __entry->mode,
                  BOOL_TO_STR(__entry->pairwise), __entry->mac_addr)
);

TRACE_EVENT(rdev_set_default_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index, bool unicast, bool multicast),
        TP_ARGS(wiphy, netdev, link_id, key_index, unicast, multicast),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, link_id)
                __field(u8, key_index)
                __field(bool, unicast)
                __field(bool, multicast)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = link_id;
                __entry->key_index = key_index;
                __entry->unicast = unicast;
                __entry->multicast = multicast;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
                  "key index: %u, unicast: %s, multicast: %s",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
                  __entry->key_index, BOOL_TO_STR(__entry->unicast),
                  BOOL_TO_STR(__entry->multicast))
);

TRACE_EVENT(rdev_set_default_mgmt_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index),
        TP_ARGS(wiphy, netdev, link_id, key_index),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, link_id)
                __field(u8, key_index)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = link_id;
                __entry->key_index = key_index;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
                  "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->link_id, __entry->key_index)
);

TRACE_EVENT(rdev_set_default_beacon_key,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
                 u8 key_index),
        TP_ARGS(wiphy, netdev, link_id, key_index),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, link_id)
                __field(u8, key_index)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = link_id;
                __entry->key_index = key_index;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
                  "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->link_id, __entry->key_index)
);

TRACE_EVENT(rdev_start_ap,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ap_settings *settings),
        TP_ARGS(wiphy, netdev, settings),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(int, beacon_interval)
                __field(int, dtim_period)
                __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1)
                __field(enum nl80211_hidden_ssid, hidden_ssid)
                __field(u32, wpa_ver)
                __field(bool, privacy)
                __field(enum nl80211_auth_type, auth_type)
                __field(int, inactivity_timeout)
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(&settings->chandef);
                __entry->beacon_interval = settings->beacon_interval;
                __entry->dtim_period = settings->dtim_period;
                __entry->hidden_ssid = settings->hidden_ssid;
                __entry->wpa_ver = settings->crypto.wpa_versions;
                __entry->privacy = settings->privacy;
                __entry->auth_type = settings->auth_type;
                __entry->inactivity_timeout = settings->inactivity_timeout;
                memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
                memcpy(__entry->ssid, settings->ssid, settings->ssid_len);
                __entry->link_id = settings->beacon.link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", AP settings - ssid: %s, "
                  CHAN_DEF_PR_FMT ", beacon interval: %d, dtim period: %d, "
                  "hidden ssid: %d, wpa versions: %u, privacy: %s, "
                  "auth type: %d, inactivity timeout: %d, link_id: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ssid, CHAN_DEF_PR_ARG,
                  __entry->beacon_interval, __entry->dtim_period,
                  __entry->hidden_ssid, __entry->wpa_ver,
                  BOOL_TO_STR(__entry->privacy), __entry->auth_type,
                  __entry->inactivity_timeout, __entry->link_id)
);

TRACE_EVENT(rdev_change_beacon,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ap_update *info),
        TP_ARGS(wiphy, netdev, info),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, link_id)
                __dynamic_array(u8, head, info->beacon.head_len)
                __dynamic_array(u8, tail, info->beacon.tail_len)
                __dynamic_array(u8, beacon_ies, info->beacon.beacon_ies_len)
                __dynamic_array(u8, proberesp_ies, info->beacon.proberesp_ies_len)
                __dynamic_array(u8, assocresp_ies, info->beacon.assocresp_ies_len)
                __dynamic_array(u8, probe_resp, info->beacon.probe_resp_len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = info->beacon.link_id;
                if (info->beacon.head)
                        memcpy(__get_dynamic_array(head),
                               info->beacon.head,
                               info->beacon.head_len);
                if (info->beacon.tail)
                        memcpy(__get_dynamic_array(tail),
                               info->beacon.tail,
                               info->beacon.tail_len);
                if (info->beacon.beacon_ies)
                        memcpy(__get_dynamic_array(beacon_ies),
                               info->beacon.beacon_ies,
                               info->beacon.beacon_ies_len);
                if (info->beacon.proberesp_ies)
                        memcpy(__get_dynamic_array(proberesp_ies),
                               info->beacon.proberesp_ies,
                               info->beacon.proberesp_ies_len);
                if (info->beacon.assocresp_ies)
                        memcpy(__get_dynamic_array(assocresp_ies),
                               info->beacon.assocresp_ies,
                               info->beacon.assocresp_ies_len);
                if (info->beacon.probe_resp)
                        memcpy(__get_dynamic_array(probe_resp),
                               info->beacon.probe_resp,
                               info->beacon.probe_resp_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);

TRACE_EVENT(rdev_stop_ap,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 unsigned int link_id),
        TP_ARGS(wiphy, netdev, link_id),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);

DECLARE_EVENT_CLASS(wiphy_netdev_evt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_get_mesh_config,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_mesh,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ibss,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_leave_ocb,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
        TP_ARGS(wiphy, netdev)
);

DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac,
             TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
             TP_ARGS(wiphy, netdev)
);

DECLARE_EVENT_CLASS(station_add_change,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac,
                 struct station_parameters *params),
        TP_ARGS(wiphy, netdev, mac, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(sta_mac)
                __field(u32, sta_flags_mask)
                __field(u32, sta_flags_set)
                __field(u32, sta_modify_mask)
                __field(int, listen_interval)
                __field(u16, capability)
                __field(u16, aid)
                __field(u8, plink_action)
                __field(u8, plink_state)
                __field(u8, uapsd_queues)
                __field(u8, max_sp)
                __field(u8, opmode_notif)
                __field(bool, opmode_notif_used)
                __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap))
                __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap))
                __array(char, vlan, IFNAMSIZ)
                __dynamic_array(u8, supported_rates,
                                params->link_sta_params.supported_rates_len)
                __dynamic_array(u8, ext_capab, params->ext_capab_len)
                __dynamic_array(u8, supported_channels,
                                params->supported_channels_len)
                __dynamic_array(u8, supported_oper_classes,
                                params->supported_oper_classes_len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(sta_mac, mac);
                __entry->sta_flags_mask = params->sta_flags_mask;
                __entry->sta_flags_set = params->sta_flags_set;
                __entry->sta_modify_mask = params->sta_modify_mask;
                __entry->listen_interval = params->listen_interval;
                __entry->aid = params->aid;
                __entry->plink_action = params->plink_action;
                __entry->plink_state = params->plink_state;
                __entry->uapsd_queues = params->uapsd_queues;
                memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap));
                if (params->link_sta_params.ht_capa)
                        memcpy(__entry->ht_capa,
                               params->link_sta_params.ht_capa,
                               sizeof(struct ieee80211_ht_cap));
                memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap));
                if (params->link_sta_params.vht_capa)
                        memcpy(__entry->vht_capa,
                               params->link_sta_params.vht_capa,
                               sizeof(struct ieee80211_vht_cap));
                memset(__entry->vlan, 0, sizeof(__entry->vlan));
                if (params->vlan)
                        memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ);
                if (params->link_sta_params.supported_rates &&
                    params->link_sta_params.supported_rates_len)
                        memcpy(__get_dynamic_array(supported_rates),
                               params->link_sta_params.supported_rates,
                               params->link_sta_params.supported_rates_len);
                if (params->ext_capab && params->ext_capab_len)
                        memcpy(__get_dynamic_array(ext_capab),
                               params->ext_capab,
                               params->ext_capab_len);
                if (params->supported_channels &&
                    params->supported_channels_len)
                        memcpy(__get_dynamic_array(supported_channels),
                               params->supported_channels,
                               params->supported_channels_len);
                if (params->supported_oper_classes &&
                    params->supported_oper_classes_len)
                        memcpy(__get_dynamic_array(supported_oper_classes),
                               params->supported_oper_classes,
                               params->supported_oper_classes_len);
                __entry->max_sp = params->max_sp;
                __entry->capability = params->capability;
                __entry->opmode_notif = params->link_sta_params.opmode_notif;
                __entry->opmode_notif_used =
                        params->link_sta_params.opmode_notif_used;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
                  ", station flags mask: 0x%x, station flags set: 0x%x, "
                  "station modify mask: 0x%x, listen interval: %d, aid: %u, "
                  "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
                  __entry->sta_flags_mask, __entry->sta_flags_set,
                  __entry->sta_modify_mask, __entry->listen_interval,
                  __entry->aid, __entry->plink_action, __entry->plink_state,
                  __entry->uapsd_queues, __entry->vlan)
);

DEFINE_EVENT(station_add_change, rdev_add_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac,
                 struct station_parameters *params),
        TP_ARGS(wiphy, netdev, mac, params)
);

DEFINE_EVENT(station_add_change, rdev_change_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac,
                 struct station_parameters *params),
        TP_ARGS(wiphy, netdev, mac, params)
);

DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac),
        TP_ARGS(wiphy, netdev, mac),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(sta_mac)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(sta_mac, mac);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac)
);

DECLARE_EVENT_CLASS(station_del,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct station_del_parameters *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(sta_mac)
                __field(u8, subtype)
                __field(u16, reason_code)
                __field(int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(sta_mac, params->mac);
                __entry->subtype = params->subtype;
                __entry->reason_code = params->reason_code;
                __entry->link_id = params->link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
                  ", subtype: %u, reason_code: %u, link_id: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
                  __entry->subtype, __entry->reason_code,
                  __entry->link_id)
);

DEFINE_EVENT(station_del, rdev_del_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct station_del_parameters *params),
        TP_ARGS(wiphy, netdev, params)
);

DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_get_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac),
        TP_ARGS(wiphy, netdev, mac)
);

DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac),
        TP_ARGS(wiphy, netdev, mac)
);

TRACE_EVENT(rdev_dump_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
                 u8 *mac),
        TP_ARGS(wiphy, netdev, _idx, mac),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(sta_mac)
                __field(int, idx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(sta_mac, mac);
                __entry->idx = _idx;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM, idx: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
                  __entry->idx)
);

TRACE_EVENT(rdev_return_int_station_info,
        TP_PROTO(struct wiphy *wiphy, int ret, struct station_info *sinfo),
        TP_ARGS(wiphy, ret, sinfo),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
                SINFO_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->ret = ret;
                SINFO_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", returned %d" ,
                  WIPHY_PR_ARG, __entry->ret)
);

DECLARE_EVENT_CLASS(mpath_evt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst,
                 u8 *next_hop),
        TP_ARGS(wiphy, netdev, dst, next_hop),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dst)
                MAC_ENTRY(next_hop)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dst, dst);
                MAC_ASSIGN(next_hop, next_hop);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM, next hop: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dst,
                  __entry->next_hop)
);

DEFINE_EVENT(mpath_evt, rdev_add_mpath,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst,
                 u8 *next_hop),
        TP_ARGS(wiphy, netdev, dst, next_hop)
);

DEFINE_EVENT(mpath_evt, rdev_change_mpath,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst,
                 u8 *next_hop),
        TP_ARGS(wiphy, netdev, dst, next_hop)
);

DEFINE_EVENT(mpath_evt, rdev_get_mpath,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *dst,
                 u8 *next_hop),
        TP_ARGS(wiphy, netdev, dst, next_hop)
);

TRACE_EVENT(rdev_dump_mpath,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
                 u8 *dst, u8 *next_hop),
        TP_ARGS(wiphy, netdev, _idx, dst, next_hop),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dst)
                MAC_ENTRY(next_hop)
                __field(int, idx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dst, dst);
                MAC_ASSIGN(next_hop, next_hop);
                __entry->idx = _idx;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, next hop: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst,
                  __entry->next_hop)
);

TRACE_EVENT(rdev_get_mpp,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u8 *dst, u8 *mpp),
        TP_ARGS(wiphy, netdev, dst, mpp),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dst)
                MAC_ENTRY(mpp)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dst, dst);
                MAC_ASSIGN(mpp, mpp);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM"
                  ", mpp: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->dst, __entry->mpp)
);

TRACE_EVENT(rdev_dump_mpp,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
                 u8 *dst, u8 *mpp),
        TP_ARGS(wiphy, netdev, _idx, dst, mpp),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dst)
                MAC_ENTRY(mpp)
                __field(int, idx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dst, dst);
                MAC_ASSIGN(mpp, mpp);
                __entry->idx = _idx;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, mpp: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst,
                  __entry->mpp)
);

TRACE_EVENT(rdev_return_int_mpath_info,
        TP_PROTO(struct wiphy *wiphy, int ret, struct mpath_info *pinfo),
        TP_ARGS(wiphy, ret, pinfo),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
                __field(int, generation)
                __field(u32, filled)
                __field(u32, frame_qlen)
                __field(u32, sn)
                __field(u32, metric)
                __field(u32, exptime)
                __field(u32, discovery_timeout)
                __field(u8, discovery_retries)
                __field(u8, flags)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->ret = ret;
                __entry->generation = pinfo->generation;
                __entry->filled = pinfo->filled;
                __entry->frame_qlen = pinfo->frame_qlen;
                __entry->sn = pinfo->sn;
                __entry->metric = pinfo->metric;
                __entry->exptime = pinfo->exptime;
                __entry->discovery_timeout = pinfo->discovery_timeout;
                __entry->discovery_retries = pinfo->discovery_retries;
                __entry->flags = pinfo->flags;
        ),
        TP_printk(WIPHY_PR_FMT ", returned %d. mpath info - generation: %d, "
                  "filled: %u, frame qlen: %u, sn: %u, metric: %u, exptime: %u,"
                  " discovery timeout: %u, discovery retries: %u, flags: 0x%x",
                  WIPHY_PR_ARG, __entry->ret, __entry->generation,
                  __entry->filled, __entry->frame_qlen, __entry->sn,
                  __entry->metric, __entry->exptime, __entry->discovery_timeout,
                  __entry->discovery_retries, __entry->flags)
);

TRACE_EVENT(rdev_return_int_mesh_config,
        TP_PROTO(struct wiphy *wiphy, int ret, struct mesh_config *conf),
        TP_ARGS(wiphy, ret, conf),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                MESH_CFG_ENTRY
                __field(int, ret)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                MESH_CFG_ASSIGN;
                __entry->ret = ret;
        ),
        TP_printk(WIPHY_PR_FMT ", returned: %d",
                  WIPHY_PR_ARG, __entry->ret)
);

TRACE_EVENT(rdev_update_mesh_config,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 mask,
                 const struct mesh_config *conf),
        TP_ARGS(wiphy, netdev, mask, conf),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MESH_CFG_ENTRY
                __field(u32, mask)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MESH_CFG_ASSIGN;
                __entry->mask = mask;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mask: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mask)
);

TRACE_EVENT(rdev_join_mesh,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const struct mesh_config *conf,
                 const struct mesh_setup *setup),
        TP_ARGS(wiphy, netdev, conf, setup),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MESH_CFG_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MESH_CFG_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
                  WIPHY_PR_ARG, NETDEV_PR_ARG)
);

TRACE_EVENT(rdev_change_bss,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct bss_parameters *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, use_cts_prot)
                __field(int, use_short_preamble)
                __field(int, use_short_slot_time)
                __field(int, ap_isolate)
                __field(int, ht_opmode)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->use_cts_prot = params->use_cts_prot;
                __entry->use_short_preamble = params->use_short_preamble;
                __entry->use_short_slot_time = params->use_short_slot_time;
                __entry->ap_isolate = params->ap_isolate;
                __entry->ht_opmode = params->ht_opmode;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", use cts prot: %d, "
                  "use short preamble: %d, use short slot time: %d, "
                  "ap isolate: %d, ht opmode: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->use_cts_prot,
                  __entry->use_short_preamble, __entry->use_short_slot_time,
                  __entry->ap_isolate, __entry->ht_opmode)
);

TRACE_EVENT(rdev_inform_bss,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_bss *bss),
        TP_ARGS(wiphy, bss),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                MAC_ENTRY(bssid)
                CHAN_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                MAC_ASSIGN(bssid, bss->bssid);
                CHAN_ASSIGN(bss->channel);
        ),
        TP_printk(WIPHY_PR_FMT ", %pM, " CHAN_PR_FMT,
                  WIPHY_PR_ARG, __entry->bssid, CHAN_PR_ARG)
);

TRACE_EVENT(rdev_set_txq_params,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct ieee80211_txq_params *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(enum nl80211_ac, ac)
                __field(u16, txop)
                __field(u16, cwmin)
                __field(u16, cwmax)
                __field(u8, aifs)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->ac = params->ac;
                __entry->txop = params->txop;
                __entry->cwmin = params->cwmin;
                __entry->cwmax = params->cwmax;
                __entry->aifs = params->aifs;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", ac: %d, txop: %u, cwmin: %u, cwmax: %u, aifs: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ac, __entry->txop,
                  __entry->cwmin, __entry->cwmax, __entry->aifs)
);

TRACE_EVENT(rdev_libertas_set_mesh_channel,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct ieee80211_channel *chan),
        TP_ARGS(wiphy, netdev, chan),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                CHAN_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                CHAN_ASSIGN(chan);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_PR_FMT, WIPHY_PR_ARG,
                  NETDEV_PR_ARG, CHAN_PR_ARG)
);

TRACE_EVENT(rdev_set_monitor_channel,
        TP_PROTO(struct wiphy *wiphy,
                 struct cfg80211_chan_def *chandef),
        TP_ARGS(wiphy, chandef),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_DEF_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
);

TRACE_EVENT(rdev_auth,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_auth_request *req),
        TP_ARGS(wiphy, netdev, req),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                __field(enum nl80211_auth_type, auth_type)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                if (req->bss)
                        MAC_ASSIGN(bssid, req->bss->bssid);
                else
                        eth_zero_addr(__entry->bssid);
                __entry->auth_type = req->auth_type;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->auth_type,
                  __entry->bssid)
);

TRACE_EVENT(rdev_assoc,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_assoc_request *req),
        TP_ARGS(wiphy, netdev, req),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                MAC_ENTRY(prev_bssid)
                __field(bool, use_mfp)
                __field(u32, flags)
                __dynamic_array(u8, elements, req->ie_len)
                __array(u8, ht_capa, sizeof(struct ieee80211_ht_cap))
                __array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap))
                __array(u8, vht_capa, sizeof(struct ieee80211_vht_cap))
                __array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap))
                __dynamic_array(u8, fils_kek, req->fils_kek_len)
                __dynamic_array(u8, fils_nonces,
                                req->fils_nonces ? 2 * FILS_NONCE_LEN : 0)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                if (req->bss)
                        MAC_ASSIGN(bssid, req->bss->bssid);
                else
                        eth_zero_addr(__entry->bssid);
                MAC_ASSIGN(prev_bssid, req->prev_bssid);
                __entry->use_mfp = req->use_mfp;
                __entry->flags = req->flags;
                if (req->ie)
                        memcpy(__get_dynamic_array(elements),
                               req->ie, req->ie_len);
                memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa));
                memcpy(__entry->ht_capa_mask, &req->ht_capa_mask,
                       sizeof(req->ht_capa_mask));
                memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa));
                memcpy(__entry->vht_capa_mask, &req->vht_capa_mask,
                       sizeof(req->vht_capa_mask));
                if (req->fils_kek)
                        memcpy(__get_dynamic_array(fils_kek),
                               req->fils_kek, req->fils_kek_len);
                if (req->fils_nonces)
                        memcpy(__get_dynamic_array(fils_nonces),
                               req->fils_nonces, 2 * FILS_NONCE_LEN);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
                  ", previous bssid: %pM, use mfp: %s, flags: 0x%x",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
                  __entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp),
                  __entry->flags)
);

TRACE_EVENT(rdev_deauth,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_deauth_request *req),
        TP_ARGS(wiphy, netdev, req),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                __field(u16, reason_code)
                __field(bool, local_state_change)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, req->bssid);
                __entry->reason_code = req->reason_code;
                __entry->local_state_change = req->local_state_change;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u, local_state_change:%d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
                  __entry->reason_code, __entry->local_state_change)
);

TRACE_EVENT(rdev_disassoc,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_disassoc_request *req),
        TP_ARGS(wiphy, netdev, req),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                __field(u16, reason_code)
                __field(bool, local_state_change)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, req->ap_addr);
                __entry->reason_code = req->reason_code;
                __entry->local_state_change = req->local_state_change;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
                  ", reason: %u, local state change: %s",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
                  __entry->reason_code,
                  BOOL_TO_STR(__entry->local_state_change))
);

TRACE_EVENT(rdev_mgmt_tx_cancel_wait,
        TP_PROTO(struct wiphy *wiphy,
                 struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %llu ",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
);

TRACE_EVENT(rdev_set_power_mgmt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 bool enabled, int timeout),
        TP_ARGS(wiphy, netdev, enabled, timeout),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(bool, enabled)
                __field(int, timeout)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->enabled = enabled;
                __entry->timeout = timeout;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %senabled, timeout: %d ",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->enabled ? "" : "not ", __entry->timeout)
);

TRACE_EVENT(rdev_connect,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_connect_params *sme),
        TP_ARGS(wiphy, netdev, sme),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1)
                __field(enum nl80211_auth_type, auth_type)
                __field(bool, privacy)
                __field(u32, wpa_versions)
                __field(u32, flags)
                MAC_ENTRY(prev_bssid)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, sme->bssid);
                memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
                memcpy(__entry->ssid, sme->ssid, sme->ssid_len);
                __entry->auth_type = sme->auth_type;
                __entry->privacy = sme->privacy;
                __entry->wpa_versions = sme->crypto.wpa_versions;
                __entry->flags = sme->flags;
                MAC_ASSIGN(prev_bssid, sme->prev_bssid);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
                  ", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, "
                  "flags: 0x%x, previous bssid: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid,
                  __entry->auth_type, BOOL_TO_STR(__entry->privacy),
                  __entry->wpa_versions, __entry->flags, __entry->prev_bssid)
);

TRACE_EVENT(rdev_update_connect_params,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_connect_params *sme, u32 changed),
        TP_ARGS(wiphy, netdev, sme, changed),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u32, changed)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->changed = changed;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,  __entry->changed)
);

TRACE_EVENT(rdev_set_cqm_rssi_config,
        TP_PROTO(struct wiphy *wiphy,
                 struct net_device *netdev, s32 rssi_thold,
                 u32 rssi_hyst),
        TP_ARGS(wiphy, netdev, rssi_thold, rssi_hyst),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(s32, rssi_thold)
                __field(u32, rssi_hyst)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->rssi_thold = rssi_thold;
                __entry->rssi_hyst = rssi_hyst;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
                  ", rssi_thold: %d, rssi_hyst: %u ",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                 __entry->rssi_thold, __entry->rssi_hyst)
);

TRACE_EVENT(rdev_set_cqm_rssi_range_config,
        TP_PROTO(struct wiphy *wiphy,
                 struct net_device *netdev, s32 low, s32 high),
        TP_ARGS(wiphy, netdev, low, high),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(s32, rssi_low)
                __field(s32, rssi_high)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->rssi_low = low;
                __entry->rssi_high = high;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
                  ", range: %d - %d ",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->rssi_low, __entry->rssi_high)
);

TRACE_EVENT(rdev_set_cqm_txe_config,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate,
                 u32 pkts, u32 intvl),
        TP_ARGS(wiphy, netdev, rate, pkts, intvl),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u32, rate)
                __field(u32, pkts)
                __field(u32, intvl)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->rate = rate;
                __entry->pkts = pkts;
                __entry->intvl = intvl;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", rate: %u, packets: %u, interval: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->rate, __entry->pkts,
                  __entry->intvl)
);

TRACE_EVENT(rdev_disconnect,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u16 reason_code),
        TP_ARGS(wiphy, netdev, reason_code),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u16, reason_code)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->reason_code = reason_code;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", reason code: %u", WIPHY_PR_ARG,
                  NETDEV_PR_ARG, __entry->reason_code)
);

TRACE_EVENT(rdev_join_ibss,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ibss_params *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                __array(char, ssid, IEEE80211_MAX_SSID_LEN + 1)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, params->bssid);
                memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
                memcpy(__entry->ssid, params->ssid, params->ssid_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, ssid: %s",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid)
);

TRACE_EVENT(rdev_join_ocb,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const struct ocb_setup *setup),
        TP_ARGS(wiphy, netdev, setup),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
                  WIPHY_PR_ARG, NETDEV_PR_ARG)
);

TRACE_EVENT(rdev_set_wiphy_params,
        TP_PROTO(struct wiphy *wiphy, u32 changed),
        TP_ARGS(wiphy, changed),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(u32, changed)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->changed = changed;
        ),
        TP_printk(WIPHY_PR_FMT ", changed: %u",
                  WIPHY_PR_ARG, __entry->changed)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_get_tx_power,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_set_tx_power,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 enum nl80211_tx_power_setting type, int mbm),
        TP_ARGS(wiphy, wdev, type, mbm),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(enum nl80211_tx_power_setting, type)
                __field(int, mbm)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->type = type;
                __entry->mbm = mbm;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type: %u, mbm: %d",
                  WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm)
);

TRACE_EVENT(rdev_return_int_int,
        TP_PROTO(struct wiphy *wiphy, int func_ret, int func_fill),
        TP_ARGS(wiphy, func_ret, func_fill),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, func_ret)
                __field(int, func_fill)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->func_ret = func_ret;
                __entry->func_fill = func_fill;
        ),
        TP_printk(WIPHY_PR_FMT ", function returns: %d, function filled: %d",
                  WIPHY_PR_ARG, __entry->func_ret, __entry->func_fill)
);

#ifdef CONFIG_NL80211_TESTMODE
TRACE_EVENT(rdev_testmode_cmd,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
);

TRACE_EVENT(rdev_testmode_dump,
        TP_PROTO(struct wiphy *wiphy),
        TP_ARGS(wiphy),
        TP_STRUCT__entry(
                WIPHY_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG)
);
#endif /* CONFIG_NL80211_TESTMODE */

TRACE_EVENT(rdev_set_bitrate_mask,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 unsigned int link_id,
                 const u8 *peer, const struct cfg80211_bitrate_mask *mask),
        TP_ARGS(wiphy, netdev, link_id, peer, mask),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(unsigned int, link_id)
                MAC_ENTRY(peer)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->link_id = link_id;
                MAC_ASSIGN(peer, peer);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
                  __entry->peer)
);

TRACE_EVENT(rdev_update_mgmt_frame_registrations,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct mgmt_frame_regs *upd),
        TP_ARGS(wiphy, wdev, upd),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u16, global_stypes)
                __field(u16, interface_stypes)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->global_stypes = upd->global_stypes;
                __entry->interface_stypes = upd->interface_stypes;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", global: 0x%.2x, intf: 0x%.2x",
                  WIPHY_PR_ARG, WDEV_PR_ARG,
                  __entry->global_stypes, __entry->interface_stypes)
);

TRACE_EVENT(rdev_return_int_tx_rx,
        TP_PROTO(struct wiphy *wiphy, int ret, u32 tx, u32 rx),
        TP_ARGS(wiphy, ret, tx, rx),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
                __field(u32, tx)
                __field(u32, rx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->ret = ret;
                __entry->tx = tx;
                __entry->rx = rx;
        ),
        TP_printk(WIPHY_PR_FMT ", returned %d, tx: %u, rx: %u",
                  WIPHY_PR_ARG, __entry->ret, __entry->tx, __entry->rx)
);

TRACE_EVENT(rdev_return_void_tx_rx,
        TP_PROTO(struct wiphy *wiphy, u32 tx, u32 tx_max,
                 u32 rx, u32 rx_max),
        TP_ARGS(wiphy, tx, tx_max, rx, rx_max),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(u32, tx)
                __field(u32, tx_max)
                __field(u32, rx)
                __field(u32, rx_max)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->tx = tx;
                __entry->tx_max = tx_max;
                __entry->rx = rx;
                __entry->rx_max = rx_max;
        ),
        TP_printk(WIPHY_PR_FMT ", tx: %u, tx_max: %u, rx: %u, rx_max: %u ",
                  WIPHY_PR_ARG, __entry->tx, __entry->tx_max, __entry->rx,
                  __entry->rx_max)
);

DECLARE_EVENT_CLASS(tx_rx_evt,
        TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
        TP_ARGS(wiphy, tx, rx),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(u32, tx)
                __field(u32, rx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->tx = tx;
                __entry->rx = rx;
        ),
        TP_printk(WIPHY_PR_FMT ", tx: %u, rx: %u ",
                  WIPHY_PR_ARG, __entry->tx, __entry->rx)
);

DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
        TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
        TP_ARGS(wiphy, tx, rx)
);

DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
        TP_ARGS(wiphy, netdev, id),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u64, id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->id = id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", id: %llu",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->id)
);

DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_start,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
        TP_ARGS(wiphy, netdev, id)
);

DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_stop,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id),
        TP_ARGS(wiphy, netdev, id)
);

TRACE_EVENT(rdev_tdls_mgmt,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u8 *peer, int link_id, u8 action_code, u8 dialog_token,
                 u16 status_code, u32 peer_capability,
                 bool initiator, const u8 *buf, size_t len),
        TP_ARGS(wiphy, netdev, peer, link_id, action_code, dialog_token,
                status_code, peer_capability, initiator, buf, len),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(int, link_id)
                __field(u8, action_code)
                __field(u8, dialog_token)
                __field(u16, status_code)
                __field(u32, peer_capability)
                __field(bool, initiator)
                __dynamic_array(u8, buf, len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->link_id = link_id;
                __entry->action_code = action_code;
                __entry->dialog_token = dialog_token;
                __entry->status_code = status_code;
                __entry->peer_capability = peer_capability;
                __entry->initiator = initiator;
                memcpy(__get_dynamic_array(buf), buf, len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM"
                  ", link_id: %d, action_code: %u "
                  "dialog_token: %u, status_code: %u, peer_capability: %u "
                  "initiator: %s buf: %#.2x ",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
                  __entry->link_id, __entry->action_code, __entry->dialog_token,
                  __entry->status_code, __entry->peer_capability,
                  BOOL_TO_STR(__entry->initiator),
                  ((u8 *)__get_dynamic_array(buf))[0])
);

TRACE_EVENT(rdev_dump_survey,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx),
        TP_ARGS(wiphy, netdev, _idx),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(int, idx)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->idx = _idx;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx)
);

TRACE_EVENT(rdev_return_int_survey_info,
        TP_PROTO(struct wiphy *wiphy, int ret, struct survey_info *info),
        TP_ARGS(wiphy, ret, info),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_ENTRY
                __field(int, ret)
                __field(u64, time)
                __field(u64, time_busy)
                __field(u64, time_ext_busy)
                __field(u64, time_rx)
                __field(u64, time_tx)
                __field(u64, time_scan)
                __field(u32, filled)
                __field(s8, noise)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_ASSIGN(info->channel);
                __entry->ret = ret;
                __entry->time = info->time;
                __entry->time_busy = info->time_busy;
                __entry->time_ext_busy = info->time_ext_busy;
                __entry->time_rx = info->time_rx;
                __entry->time_tx = info->time_tx;
                __entry->time_scan = info->time_scan;
                __entry->filled = info->filled;
                __entry->noise = info->noise;
        ),
        TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT
                  ", channel time: %llu, channel time busy: %llu, "
                  "channel time extension busy: %llu, channel time rx: %llu, "
                  "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d",
                  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
                  __entry->time, __entry->time_busy,
                  __entry->time_ext_busy, __entry->time_rx,
                  __entry->time_tx, __entry->time_scan,
                  __entry->filled, __entry->noise)
);

TRACE_EVENT(rdev_tdls_oper,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u8 *peer, enum nl80211_tdls_operation oper),
        TP_ARGS(wiphy, netdev, peer, oper),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(enum nl80211_tdls_operation, oper)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->oper = oper;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, oper: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper)
);

DECLARE_EVENT_CLASS(rdev_pmksa,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_pmksa *pmksa),
        TP_ARGS(wiphy, netdev, pmksa),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, pmksa->bssid);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid)
);

TRACE_EVENT(rdev_probe_client,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *peer),
        TP_ARGS(wiphy, netdev, peer),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer)
);

DEFINE_EVENT(rdev_pmksa, rdev_set_pmksa,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_pmksa *pmksa),
        TP_ARGS(wiphy, netdev, pmksa)
);

DEFINE_EVENT(rdev_pmksa, rdev_del_pmksa,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_pmksa *pmksa),
        TP_ARGS(wiphy, netdev, pmksa)
);

TRACE_EVENT(rdev_remain_on_channel,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct ieee80211_channel *chan,
                 unsigned int duration),
        TP_ARGS(wiphy, wdev, chan, duration),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                CHAN_ENTRY
                __field(unsigned int, duration)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                CHAN_ASSIGN(chan);
                __entry->duration = duration;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", duration: %u",
                  WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->duration)
);

TRACE_EVENT(rdev_return_int_cookie,
        TP_PROTO(struct wiphy *wiphy, int ret, u64 cookie),
        TP_ARGS(wiphy, ret, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->ret = ret;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", returned %d, cookie: %llu",
                  WIPHY_PR_ARG, __entry->ret, __entry->cookie)
);

TRACE_EVENT(rdev_cancel_remain_on_channel,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie: %llu",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
);

TRACE_EVENT(rdev_mgmt_tx,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct cfg80211_mgmt_tx_params *params),
        TP_ARGS(wiphy, wdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                CHAN_ENTRY
                __field(bool, offchan)
                __field(unsigned int, wait)
                __field(bool, no_cck)
                __field(bool, dont_wait_for_ack)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                CHAN_ASSIGN(params->chan);
                __entry->offchan = params->offchan;
                __entry->wait = params->wait;
                __entry->no_cck = params->no_cck;
                __entry->dont_wait_for_ack = params->dont_wait_for_ack;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", " CHAN_PR_FMT ", offchan: %s,"
                  " wait: %u, no cck: %s, dont wait for ack: %s",
                  WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG,
                  BOOL_TO_STR(__entry->offchan), __entry->wait,
                  BOOL_TO_STR(__entry->no_cck),
                  BOOL_TO_STR(__entry->dont_wait_for_ack))
);

TRACE_EVENT(rdev_tx_control_port,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *buf, size_t len, const u8 *dest, __be16 proto,
                 bool unencrypted, int link_id),
        TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted, link_id),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dest)
                __field(__be16, proto)
                __field(bool, unencrypted)
                __field(int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dest, dest);
                __entry->proto = proto;
                __entry->unencrypted = unencrypted;
                __entry->link_id = link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM,"
                  " proto: 0x%x, unencrypted: %s, link: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest,
                  be16_to_cpu(__entry->proto),
                  BOOL_TO_STR(__entry->unencrypted),
                  __entry->link_id)
);

TRACE_EVENT(rdev_set_noack_map,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u16 noack_map),
        TP_ARGS(wiphy, netdev, noack_map),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u16, noack_map)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->noack_map = noack_map;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", noack_map: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map)
);

DECLARE_EVENT_CLASS(wiphy_wdev_link_evt,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 unsigned int link_id),
        TP_ARGS(wiphy, wdev, link_id),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->link_id = link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id)
);

DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_channel,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 unsigned int link_id),
        TP_ARGS(wiphy, wdev, link_id)
);

TRACE_EVENT(rdev_return_chandef,
        TP_PROTO(struct wiphy *wiphy, int ret,
                 struct cfg80211_chan_def *chandef),
        TP_ARGS(wiphy, ret, chandef),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, ret)
                CHAN_DEF_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                if (ret == 0)
                        CHAN_DEF_ASSIGN(chandef);
                else
                        CHAN_DEF_ASSIGN((struct cfg80211_chan_def *)NULL);
                __entry->ret = ret;
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", ret: %d",
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->ret)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_start_p2p_device,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_start_nan,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct cfg80211_nan_conf *conf),
        TP_ARGS(wiphy, wdev, conf),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u8, master_pref)
                __field(u8, bands)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->master_pref = conf->master_pref;
                __entry->bands = conf->bands;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
                  ", master preference: %u, bands: 0x%0x",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
                  __entry->bands)
);

TRACE_EVENT(rdev_nan_change_conf,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct cfg80211_nan_conf *conf, u32 changes),
        TP_ARGS(wiphy, wdev, conf, changes),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u8, master_pref)
                __field(u8, bands)
                __field(u32, changes)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->master_pref = conf->master_pref;
                __entry->bands = conf->bands;
                __entry->changes = changes;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
                  ", master preference: %u, bands: 0x%0x, changes: %x",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
                  __entry->bands, __entry->changes)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_add_nan_func,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 const struct cfg80211_nan_func *func),
        TP_ARGS(wiphy, wdev, func),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u8, func_type)
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->func_type = func->type;
                __entry->cookie = func->cookie
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type,
                  __entry->cookie)
);

TRACE_EVENT(rdev_del_nan_func,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 u64 cookie),
        TP_ARGS(wiphy, wdev, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
);

TRACE_EVENT(rdev_set_mac_acl,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_acl_data *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u32, acl_policy)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->acl_policy = params->acl_policy;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
);

TRACE_EVENT(rdev_update_ft_ies,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_update_ft_ies_params *ftie),
        TP_ARGS(wiphy, netdev, ftie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u16, md)
                __dynamic_array(u8, ie, ftie->ie_len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->md = ftie->md;
                memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
);

TRACE_EVENT(rdev_crit_proto_start,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 enum nl80211_crit_proto_id protocol, u16 duration),
        TP_ARGS(wiphy, wdev, protocol, duration),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u16, proto)
                __field(u16, duration)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->proto = protocol;
                __entry->duration = duration;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u",
                  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration)
);

TRACE_EVENT(rdev_crit_proto_stop,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
                  WIPHY_PR_ARG, WDEV_PR_ARG)
);

TRACE_EVENT(rdev_channel_switch,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_csa_settings *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(bool, radar_required)
                __field(bool, block_tx)
                __field(u8, count)
                __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
                __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
                __field(u8, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(&params->chandef);
                __entry->radar_required = params->radar_required;
                __entry->block_tx = params->block_tx;
                __entry->count = params->count;
                memcpy(__get_dynamic_array(bcn_ofs),
                       params->counter_offsets_beacon,
                       params->n_counter_offsets_beacon * sizeof(u16));

                /* probe response offsets are optional */
                if (params->n_counter_offsets_presp)
                        memcpy(__get_dynamic_array(pres_ofs),
                               params->counter_offsets_presp,
                               params->n_counter_offsets_presp * sizeof(u16));
                __entry->link_id = params->link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
                  ", block_tx: %d, count: %u, radar_required: %d, link_id: %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
                  __entry->block_tx, __entry->count, __entry->radar_required,
                  __entry->link_id)
);

TRACE_EVENT(rdev_set_qos_map,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_qos_map *qos_map),
        TP_ARGS(wiphy, netdev, qos_map),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                QOS_MAP_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                QOS_MAP_ASSIGN(qos_map);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", num_des: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des)
);

TRACE_EVENT(rdev_set_ap_chanwidth,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 unsigned int link_id,
                 struct cfg80211_chan_def *chandef),
        TP_ARGS(wiphy, netdev, link_id, chandef),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->link_id = link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
                  __entry->link_id)
);

TRACE_EVENT(rdev_add_tx_ts,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time),
        TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(u8, tsid)
                __field(u8, user_prio)
                __field(u16, admitted_time)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->tsid = tsid;
                __entry->user_prio = user_prio;
                __entry->admitted_time = admitted_time;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d, UP %d, time %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
                  __entry->tsid, __entry->user_prio, __entry->admitted_time)
);

TRACE_EVENT(rdev_del_tx_ts,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 u8 tsid, const u8 *peer),
        TP_ARGS(wiphy, netdev, tsid, peer),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(u8, tsid)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->tsid = tsid;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tsid)
);

TRACE_EVENT(rdev_tdls_channel_switch,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *addr, u8 oper_class,
                 struct cfg80211_chan_def *chandef),
        TP_ARGS(wiphy, netdev, addr, oper_class, chandef),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(addr)
                __field(u8, oper_class)
                CHAN_DEF_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(addr, addr);
                CHAN_DEF_ASSIGN(chandef);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM"
                  " oper class %d, " CHAN_DEF_PR_FMT,
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr,
                  __entry->oper_class, CHAN_DEF_PR_ARG)
);

TRACE_EVENT(rdev_tdls_cancel_channel_switch,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *addr),
        TP_ARGS(wiphy, netdev, addr),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(addr)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(addr, addr);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr)
);

TRACE_EVENT(rdev_set_pmk,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_pmk_conf *pmk_conf),

        TP_ARGS(wiphy, netdev, pmk_conf),

        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(aa)
                __field(u8, pmk_len)
                __field(u8, pmk_r0_name_len)
                __dynamic_array(u8, pmk, pmk_conf->pmk_len)
                __dynamic_array(u8, pmk_r0_name, WLAN_PMK_NAME_LEN)
        ),

        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(aa, pmk_conf->aa);
                __entry->pmk_len = pmk_conf->pmk_len;
                __entry->pmk_r0_name_len =
                pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0;
                memcpy(__get_dynamic_array(pmk), pmk_conf->pmk,
                       pmk_conf->pmk_len);
                memcpy(__get_dynamic_array(pmk_r0_name), pmk_conf->pmk_r0_name,
                       pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0);
        ),

        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM"
                  "pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG,
                  NETDEV_PR_ARG, __entry->aa, __entry->pmk_len,
                  __print_array(__get_dynamic_array(pmk),
                                __get_dynamic_array_len(pmk), 1),
                  __entry->pmk_r0_name_len ?
                  __print_array(__get_dynamic_array(pmk_r0_name),
                                __get_dynamic_array_len(pmk_r0_name), 1) : "")
);

TRACE_EVENT(rdev_del_pmk,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *aa),

        TP_ARGS(wiphy, netdev, aa),

        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(aa)
        ),

        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(aa, aa);
        ),

        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->aa)
);

TRACE_EVENT(rdev_external_auth,
            TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                     struct cfg80211_external_auth_params *params),
            TP_ARGS(wiphy, netdev, params),
            TP_STRUCT__entry(WIPHY_ENTRY
                             NETDEV_ENTRY
                             MAC_ENTRY(bssid)
                             __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
                             __field(u16, status)
                             MAC_ENTRY(mld_addr)
            ),
            TP_fast_assign(WIPHY_ASSIGN;
                           NETDEV_ASSIGN;
                           MAC_ASSIGN(bssid, params->bssid);
                           memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
                           memcpy(__entry->ssid, params->ssid.ssid,
                                  params->ssid.ssid_len);
                           __entry->status = params->status;
                           MAC_ASSIGN(mld_addr, params->mld_addr);
            ),
            TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
                      ", ssid: %s, status: %u, mld_addr: %pM",
                      WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
                      __entry->ssid, __entry->status, __entry->mld_addr)
);

TRACE_EVENT(rdev_start_radar_detection,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_chan_def *chandef,
                 u32 cac_time_ms),
        TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(u32, cac_time_ms)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->cac_time_ms = cac_time_ms;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
                  ", cac_time_ms=%u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
                  __entry->cac_time_ms)
);

TRACE_EVENT(rdev_set_mcast_rate,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 int *mcast_rate),
        TP_ARGS(wiphy, netdev, mcast_rate),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __array(int, mcast_rate, NUM_NL80211_BANDS)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                memcpy(__entry->mcast_rate, mcast_rate,
                       sizeof(int) * NUM_NL80211_BANDS);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
                  "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->mcast_rate[NL80211_BAND_2GHZ],
                  __entry->mcast_rate[NL80211_BAND_5GHZ],
                  __entry->mcast_rate[NL80211_BAND_6GHZ],
                  __entry->mcast_rate[NL80211_BAND_60GHZ])
);

TRACE_EVENT(rdev_set_coalesce,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
        TP_ARGS(wiphy, coalesce),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, n_rules)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->n_rules = coalesce ? coalesce->n_rules : 0;
        ),
        TP_printk(WIPHY_PR_FMT ", n_rules=%d",
                  WIPHY_PR_ARG, __entry->n_rules)
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_set_multicast_to_unicast,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const bool enabled),
        TP_ARGS(wiphy, netdev, enabled),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(bool, enabled)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->enabled = enabled;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                  BOOL_TO_STR(__entry->enabled))
);

DEFINE_EVENT(wiphy_wdev_evt, rdev_get_txq_stats,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
);

TRACE_EVENT(rdev_get_ftm_responder_stats,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ftm_responder_stats *ftm_stats),

        TP_ARGS(wiphy, netdev, ftm_stats),

        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u64, timestamp)
                __field(u32, success_num)
                __field(u32, partial_num)
                __field(u32, failed_num)
                __field(u32, asap_num)
                __field(u32, non_asap_num)
                __field(u64, duration)
                __field(u32, unknown_triggers)
                __field(u32, reschedule)
                __field(u32, out_of_window)
        ),

        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->success_num = ftm_stats->success_num;
                __entry->partial_num = ftm_stats->partial_num;
                __entry->failed_num = ftm_stats->failed_num;
                __entry->asap_num = ftm_stats->asap_num;
                __entry->non_asap_num = ftm_stats->non_asap_num;
                __entry->duration = ftm_stats->total_duration_ms;
                __entry->unknown_triggers = ftm_stats->unknown_triggers_num;
                __entry->reschedule = ftm_stats->reschedule_requests_num;
                __entry->out_of_window = ftm_stats->out_of_window_triggers_num;
        ),

        TP_printk(WIPHY_PR_FMT "Ftm responder stats: success %u, partial %u, "
                "failed %u, asap %u, non asap %u, total duration %llu, unknown "
                "triggers %u, rescheduled %u, out of window %u", WIPHY_PR_ARG,
                __entry->success_num, __entry->partial_num, __entry->failed_num,
                __entry->asap_num, __entry->non_asap_num, __entry->duration,
                __entry->unknown_triggers, __entry->reschedule,
                __entry->out_of_window)
);

DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_start_pmsr,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie)
);

DEFINE_EVENT(wiphy_wdev_cookie_evt, rdev_abort_pmsr,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie)
);

TRACE_EVENT(rdev_set_fils_aad,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_fils_aad *fils_aad),
        TP_ARGS(wiphy, netdev, fils_aad),
        TP_STRUCT__entry(WIPHY_ENTRY
                NETDEV_ENTRY
                __array(u8, macaddr, ETH_ALEN)
                __field(u8, kek_len)
        ),
        TP_fast_assign(WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                FILS_AAD_ASSIGN(fils_aad);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " FILS_AAD_PR_FMT,
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
                  __entry->kek_len)
);

TRACE_EVENT(rdev_update_owe_info,
            TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                     struct cfg80211_update_owe_info *owe_info),
            TP_ARGS(wiphy, netdev, owe_info),
            TP_STRUCT__entry(WIPHY_ENTRY
                             NETDEV_ENTRY
                             MAC_ENTRY(peer)
                             __field(u16, status)
                             __dynamic_array(u8, ie, owe_info->ie_len)),
            TP_fast_assign(WIPHY_ASSIGN;
                           NETDEV_ASSIGN;
                           MAC_ASSIGN(peer, owe_info->peer);
                           __entry->status = owe_info->status;
                           memcpy(__get_dynamic_array(ie),
                                  owe_info->ie, owe_info->ie_len);),
            TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM"
                  " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
                  __entry->status)
);

TRACE_EVENT(rdev_probe_mesh_link,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *dest, const u8 *buf, size_t len),
        TP_ARGS(wiphy, netdev, dest, buf, len),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(dest)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(dest, dest);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest)
);

TRACE_EVENT(rdev_set_tid_config,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_tid_config *tid_conf),
        TP_ARGS(wiphy, netdev, tid_conf),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, tid_conf->peer);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer)
);

TRACE_EVENT(rdev_reset_tid_config,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 const u8 *peer, u8 tids),
        TP_ARGS(wiphy, netdev, peer, tids),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(u8, tids)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->tids = tids;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, tids: 0x%x",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tids)
);

TRACE_EVENT(rdev_set_sar_specs,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar),
        TP_ARGS(wiphy, sar),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(u16, type)
                __field(u16, num)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->type = sar->type;
                __entry->num = sar->num_sub_specs;

        ),
        TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d",
                  WIPHY_PR_ARG, __entry->type, __entry->num)
);

TRACE_EVENT(rdev_color_change,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_color_change_settings *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __field(u8, count)
                __field(u16, bcn_ofs)
                __field(u16, pres_ofs)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                __entry->count = params->count;
                __entry->bcn_ofs = params->counter_offset_beacon;
                __entry->pres_ofs = params->counter_offset_presp;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
                  ", count: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG,
                  __entry->count)
);

TRACE_EVENT(rdev_set_radar_background,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),

        TP_ARGS(wiphy, chandef),

        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_DEF_ENTRY
        ),

        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_DEF_ASSIGN(chandef)
        ),

        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
);

DEFINE_EVENT(wiphy_wdev_link_evt, rdev_add_intf_link,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 unsigned int link_id),
        TP_ARGS(wiphy, wdev, link_id)
);

DEFINE_EVENT(wiphy_wdev_link_evt, rdev_del_intf_link,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 unsigned int link_id),
        TP_ARGS(wiphy, wdev, link_id)
);

/*************************************************************
 *             cfg80211 exported functions traces                     *
 *************************************************************/

TRACE_EVENT(cfg80211_return_bool,
        TP_PROTO(bool ret),
        TP_ARGS(ret),
        TP_STRUCT__entry(
                __field(bool, ret)
        ),
        TP_fast_assign(
                __entry->ret = ret;
        ),
        TP_printk("returned %s", BOOL_TO_STR(__entry->ret))
);

DECLARE_EVENT_CLASS(cfg80211_netdev_mac_evt,
        TP_PROTO(struct net_device *netdev, const u8 *macaddr),
        TP_ARGS(netdev, macaddr),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(macaddr)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(macaddr, macaddr);
        ),
        TP_printk(NETDEV_PR_FMT ", mac: %pM",
                  NETDEV_PR_ARG, __entry->macaddr)
);

DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_notify_new_peer_candidate,
        TP_PROTO(struct net_device *netdev, const u8 *macaddr),
        TP_ARGS(netdev, macaddr)
);

DECLARE_EVENT_CLASS(netdev_evt_only,
        TP_PROTO(struct net_device *netdev),
        TP_ARGS(netdev),
        TP_STRUCT__entry(
                NETDEV_ENTRY
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
        ),
        TP_printk(NETDEV_PR_FMT , NETDEV_PR_ARG)
);

DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth,
        TP_PROTO(struct net_device *netdev),
        TP_ARGS(netdev)
);

TRACE_EVENT(cfg80211_send_rx_assoc,
        TP_PROTO(struct net_device *netdev,
                 const struct cfg80211_rx_assoc_resp_data *data),
        TP_ARGS(netdev, data),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(ap_addr)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(ap_addr,
                           data->ap_mld_addr ?: data->links[0].bss->bssid);
        ),
        TP_printk(NETDEV_PR_FMT ", %pM",
                  NETDEV_PR_ARG, __entry->ap_addr)
);

DECLARE_EVENT_CLASS(netdev_frame_event,
        TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
        TP_ARGS(netdev, buf, len),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __dynamic_array(u8, frame, len)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                memcpy(__get_dynamic_array(frame), buf, len);
        ),
        TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x",
                  NETDEV_PR_ARG,
                  le16_to_cpup((__le16 *)__get_dynamic_array(frame)))
);

DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt,
        TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
        TP_ARGS(netdev, buf, len)
);

DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt,
        TP_PROTO(struct net_device *netdev, const u8 *buf, int len),
        TP_ARGS(netdev, buf, len)
);

TRACE_EVENT(cfg80211_tx_mlme_mgmt,
        TP_PROTO(struct net_device *netdev, const u8 *buf, int len,
                 bool reconnect),
        TP_ARGS(netdev, buf, len, reconnect),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __dynamic_array(u8, frame, len)
                __field(int, reconnect)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                memcpy(__get_dynamic_array(frame), buf, len);
                __entry->reconnect = reconnect;
        ),
        TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x reconnect:%d",
                  NETDEV_PR_ARG,
                  le16_to_cpup((__le16 *)__get_dynamic_array(frame)),
                  __entry->reconnect)
);

DECLARE_EVENT_CLASS(netdev_mac_evt,
        TP_PROTO(struct net_device *netdev, const u8 *mac),
        TP_ARGS(netdev, mac),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(mac)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(mac, mac)
        ),
        TP_printk(NETDEV_PR_FMT ", mac: %pM",
                  NETDEV_PR_ARG, __entry->mac)
);

DEFINE_EVENT(netdev_mac_evt, cfg80211_send_auth_timeout,
        TP_PROTO(struct net_device *netdev, const u8 *mac),
        TP_ARGS(netdev, mac)
);

TRACE_EVENT(cfg80211_send_assoc_failure,
        TP_PROTO(struct net_device *netdev,
                 struct cfg80211_assoc_failure *data),
        TP_ARGS(netdev, data),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(ap_addr)
                __field(bool, timeout)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(ap_addr, data->ap_mld_addr ?: data->bss[0]->bssid);
                __entry->timeout = data->timeout;
        ),
        TP_printk(NETDEV_PR_FMT ", mac: %pM, timeout: %d",
                  NETDEV_PR_ARG, __entry->ap_addr, __entry->timeout)
);

TRACE_EVENT(cfg80211_michael_mic_failure,
        TP_PROTO(struct net_device *netdev, const u8 *addr,
                 enum nl80211_key_type key_type, int key_id, const u8 *tsc),
        TP_ARGS(netdev, addr, key_type, key_id, tsc),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(addr)
                __field(enum nl80211_key_type, key_type)
                __field(int, key_id)
                __array(u8, tsc, 6)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(addr, addr);
                __entry->key_type = key_type;
                __entry->key_id = key_id;
                if (tsc)
                        memcpy(__entry->tsc, tsc, 6);
        ),
        TP_printk(NETDEV_PR_FMT ", %pM, key type: %d, key id: %d, tsc: %pm",
                  NETDEV_PR_ARG, __entry->addr, __entry->key_type,
                  __entry->key_id, __entry->tsc)
);

TRACE_EVENT(cfg80211_ready_on_channel,
        TP_PROTO(struct wireless_dev *wdev, u64 cookie,
                 struct ieee80211_channel *chan,
                 unsigned int duration),
        TP_ARGS(wdev, cookie, chan, duration),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(u64, cookie)
                CHAN_ENTRY
                __field(unsigned int, duration)
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                CHAN_ASSIGN(chan);
                __entry->duration = duration;
        ),
        TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", duration: %u",
                  WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG,
                  __entry->duration)
);

TRACE_EVENT(cfg80211_ready_on_channel_expired,
        TP_PROTO(struct wireless_dev *wdev, u64 cookie,
                 struct ieee80211_channel *chan),
        TP_ARGS(wdev, cookie, chan),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(u64, cookie)
                CHAN_ENTRY
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                CHAN_ASSIGN(chan);
        ),
        TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT,
                  WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG)
);

TRACE_EVENT(cfg80211_tx_mgmt_expired,
        TP_PROTO(struct wireless_dev *wdev, u64 cookie,
                 struct ieee80211_channel *chan),
        TP_ARGS(wdev, cookie, chan),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(u64, cookie)
                CHAN_ENTRY
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                CHAN_ASSIGN(chan);
        ),
        TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT,
                  WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG)
);

TRACE_EVENT(cfg80211_new_sta,
        TP_PROTO(struct net_device *netdev, const u8 *mac_addr,
                 struct station_info *sinfo),
        TP_ARGS(netdev, mac_addr, sinfo),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(mac_addr)
                SINFO_ENTRY
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(mac_addr, mac_addr);
                SINFO_ASSIGN;
        ),
        TP_printk(NETDEV_PR_FMT ", %pM",
                  NETDEV_PR_ARG, __entry->mac_addr)
);

DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta,
        TP_PROTO(struct net_device *netdev, const u8 *macaddr),
        TP_ARGS(netdev, macaddr)
);

TRACE_EVENT(cfg80211_rx_mgmt,
        TP_PROTO(struct wireless_dev *wdev, struct cfg80211_rx_info *info),
        TP_ARGS(wdev, info),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(int, freq)
                __field(int, sig_dbm)
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->freq = info->freq;
                __entry->sig_dbm = info->sig_dbm;
        ),
        TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d",
                  WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
);

TRACE_EVENT(cfg80211_mgmt_tx_status,
        TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack),
        TP_ARGS(wdev, cookie, ack),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(u64, cookie)
                __field(bool, ack)
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                __entry->ack = ack;
        ),
        TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s",
                  WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
);

TRACE_EVENT(cfg80211_control_port_tx_status,
        TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack),
        TP_ARGS(wdev, cookie, ack),
        TP_STRUCT__entry(
                WDEV_ENTRY
                __field(u64, cookie)
                __field(bool, ack)
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                __entry->ack = ack;
        ),
        TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s",
                  WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
);

TRACE_EVENT(cfg80211_rx_control_port,
        TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
                 bool unencrypted, int link_id),
        TP_ARGS(netdev, skb, unencrypted, link_id),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(int, len)
                MAC_ENTRY(from)
                __field(u16, proto)
                __field(bool, unencrypted)
                __field(int, link_id)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->len = skb->len;
                MAC_ASSIGN(from, eth_hdr(skb)->h_source);
                __entry->proto = be16_to_cpu(skb->protocol);
                __entry->unencrypted = unencrypted;
                __entry->link_id = link_id;
        ),
        TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d",
                  NETDEV_PR_ARG, __entry->len, __entry->from,
                  __entry->proto, BOOL_TO_STR(__entry->unencrypted),
                  __entry->link_id)
);

TRACE_EVENT(cfg80211_cqm_rssi_notify,
        TP_PROTO(struct net_device *netdev,
                 enum nl80211_cqm_rssi_threshold_event rssi_event,
                 s32 rssi_level),
        TP_ARGS(netdev, rssi_event, rssi_level),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(enum nl80211_cqm_rssi_threshold_event, rssi_event)
                __field(s32, rssi_level)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->rssi_event = rssi_event;
                __entry->rssi_level = rssi_level;
        ),
        TP_printk(NETDEV_PR_FMT ", rssi event: %d, level: %d",
                  NETDEV_PR_ARG, __entry->rssi_event, __entry->rssi_level)
);

TRACE_EVENT(cfg80211_reg_can_beacon,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
                 enum nl80211_iftype iftype, bool check_no_ir),
        TP_ARGS(wiphy, chandef, iftype, check_no_ir),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_DEF_ENTRY
                __field(enum nl80211_iftype, iftype)
                __field(bool, check_no_ir)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->iftype = iftype;
                __entry->check_no_ir = check_no_ir;
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s",
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype,
                  BOOL_TO_STR(__entry->check_no_ir))
);

TRACE_EVENT(cfg80211_chandef_dfs_required,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
        TP_ARGS(wiphy, chandef),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_DEF_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
);

TRACE_EVENT(cfg80211_ch_switch_notify,
        TP_PROTO(struct net_device *netdev,
                 struct cfg80211_chan_def *chandef,
                 unsigned int link_id),
        TP_ARGS(netdev, chandef, link_id),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->link_id = link_id;
        ),
        TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
                  NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);

TRACE_EVENT(cfg80211_ch_switch_started_notify,
        TP_PROTO(struct net_device *netdev,
                 struct cfg80211_chan_def *chandef,
                 unsigned int link_id),
        TP_ARGS(netdev, chandef, link_id),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                CHAN_DEF_ENTRY
                __field(unsigned int, link_id)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->link_id = link_id;
        ),
        TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
                  NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);

TRACE_EVENT(cfg80211_radar_event,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
                 bool offchan),
        TP_ARGS(wiphy, chandef, offchan),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_DEF_ENTRY
                __field(bool, offchan)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_DEF_ASSIGN(chandef);
                __entry->offchan = offchan;
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", offchan %d",
                  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->offchan)
);

TRACE_EVENT(cfg80211_cac_event,
        TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt),
        TP_ARGS(netdev, evt),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(enum nl80211_radar_event, evt)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->evt = evt;
        ),
        TP_printk(NETDEV_PR_FMT ",  event: %d",
                  NETDEV_PR_ARG, __entry->evt)
);

DECLARE_EVENT_CLASS(cfg80211_rx_evt,
        TP_PROTO(struct net_device *netdev, const u8 *addr),
        TP_ARGS(netdev, addr),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(addr)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(addr, addr);
        ),
        TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->addr)
);

DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_spurious_frame,
        TP_PROTO(struct net_device *netdev, const u8 *addr),
        TP_ARGS(netdev, addr)
);

DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_unexpected_4addr_frame,
        TP_PROTO(struct net_device *netdev, const u8 *addr),
        TP_ARGS(netdev, addr)
);

TRACE_EVENT(cfg80211_ibss_joined,
        TP_PROTO(struct net_device *netdev, const u8 *bssid,
                 struct ieee80211_channel *channel),
        TP_ARGS(netdev, bssid, channel),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(bssid)
                CHAN_ENTRY
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(bssid, bssid);
                CHAN_ASSIGN(channel);
        ),
        TP_printk(NETDEV_PR_FMT ", bssid: %pM, " CHAN_PR_FMT,
                  NETDEV_PR_ARG, __entry->bssid, CHAN_PR_ARG)
);

TRACE_EVENT(cfg80211_probe_status,
        TP_PROTO(struct net_device *netdev, const u8 *addr, u64 cookie,
                 bool acked),
        TP_ARGS(netdev, addr, cookie, acked),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(addr)
                __field(u64, cookie)
                __field(bool, acked)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(addr, addr);
                __entry->cookie = cookie;
                __entry->acked = acked;
        ),
        TP_printk(NETDEV_PR_FMT " addr:%pM, cookie: %llu, acked: %s",
                  NETDEV_PR_ARG, __entry->addr, __entry->cookie,
                  BOOL_TO_STR(__entry->acked))
);

TRACE_EVENT(cfg80211_cqm_pktloss_notify,
        TP_PROTO(struct net_device *netdev, const u8 *peer, u32 num_packets),
        TP_ARGS(netdev, peer, num_packets),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(u32, num_packets)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->num_packets = num_packets;
        ),
        TP_printk(NETDEV_PR_FMT ", peer: %pM, num of lost packets: %u",
                  NETDEV_PR_ARG, __entry->peer, __entry->num_packets)
);

DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_gtk_rekey_notify,
        TP_PROTO(struct net_device *netdev, const u8 *macaddr),
        TP_ARGS(netdev, macaddr)
);

TRACE_EVENT(cfg80211_pmksa_candidate_notify,
        TP_PROTO(struct net_device *netdev, int index, const u8 *bssid,
                 bool preauth),
        TP_ARGS(netdev, index, bssid, preauth),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(int, index)
                MAC_ENTRY(bssid)
                __field(bool, preauth)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->index = index;
                MAC_ASSIGN(bssid, bssid);
                __entry->preauth = preauth;
        ),
        TP_printk(NETDEV_PR_FMT ", index:%d, bssid: %pM, pre auth: %s",
                  NETDEV_PR_ARG, __entry->index, __entry->bssid,
                  BOOL_TO_STR(__entry->preauth))
);

TRACE_EVENT(cfg80211_report_obss_beacon,
        TP_PROTO(struct wiphy *wiphy, const u8 *frame, size_t len,
                 int freq, int sig_dbm),
        TP_ARGS(wiphy, frame, len, freq, sig_dbm),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(int, freq)
                __field(int, sig_dbm)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->freq = freq;
                __entry->sig_dbm = sig_dbm;
        ),
        TP_printk(WIPHY_PR_FMT ", freq: "KHZ_F", sig_dbm: %d",
                  WIPHY_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
);

TRACE_EVENT(cfg80211_tdls_oper_request,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer,
                 enum nl80211_tdls_operation oper, u16 reason_code),
        TP_ARGS(wiphy, netdev, peer, oper, reason_code),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __field(enum nl80211_tdls_operation, oper)
                __field(u16, reason_code)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, peer);
                __entry->oper = oper;
                __entry->reason_code = reason_code;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, oper: %d, reason_code %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper,
                  __entry->reason_code)
        );

TRACE_EVENT(cfg80211_scan_done,
        TP_PROTO(struct cfg80211_scan_request *request,
                 struct cfg80211_scan_info *info),
        TP_ARGS(request, info),
        TP_STRUCT__entry(
                __field(u32, n_channels)
                __dynamic_array(u8, ie, request ? request->ie_len : 0)
                __array(u32, rates, NUM_NL80211_BANDS)
                __field(u32, wdev_id)
                MAC_ENTRY(wiphy_mac)
                __field(bool, no_cck)
                __field(bool, aborted)
                __field(u64, scan_start_tsf)
                MAC_ENTRY(tsf_bssid)
        ),
        TP_fast_assign(
                if (request) {
                        memcpy(__get_dynamic_array(ie), request->ie,
                               request->ie_len);
                        memcpy(__entry->rates, request->rates,
                               NUM_NL80211_BANDS);
                        __entry->wdev_id = request->wdev ?
                                        request->wdev->identifier : 0;
                        if (request->wiphy)
                                MAC_ASSIGN(wiphy_mac,
                                           request->wiphy->perm_addr);
                        __entry->no_cck = request->no_cck;
                }
                if (info) {
                        __entry->aborted = info->aborted;
                        __entry->scan_start_tsf = info->scan_start_tsf;
                        MAC_ASSIGN(tsf_bssid, info->tsf_bssid);
                }
        ),
        TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: %pM",
                  BOOL_TO_STR(__entry->aborted),
                  (unsigned long long)__entry->scan_start_tsf,
                  __entry->tsf_bssid)
);

DECLARE_EVENT_CLASS(wiphy_id_evt,
        TP_PROTO(struct wiphy *wiphy, u64 id),
        TP_ARGS(wiphy, id),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                __field(u64, id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                __entry->id = id;
        ),
        TP_printk(WIPHY_PR_FMT ", id: %llu", WIPHY_PR_ARG, __entry->id)
);

DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_stopped,
        TP_PROTO(struct wiphy *wiphy, u64 id),
        TP_ARGS(wiphy, id)
);

DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_results,
        TP_PROTO(struct wiphy *wiphy, u64 id),
        TP_ARGS(wiphy, id)
);

TRACE_EVENT(cfg80211_get_bss,
        TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
                 const u8 *bssid, const u8 *ssid, size_t ssid_len,
                 enum ieee80211_bss_type bss_type,
                 enum ieee80211_privacy privacy),
        TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, bss_type, privacy),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_ENTRY
                MAC_ENTRY(bssid)
                __dynamic_array(u8, ssid, ssid_len)
                __field(enum ieee80211_bss_type, bss_type)
                __field(enum ieee80211_privacy, privacy)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_ASSIGN(channel);
                MAC_ASSIGN(bssid, bssid);
                memcpy(__get_dynamic_array(ssid), ssid, ssid_len);
                __entry->bss_type = bss_type;
                __entry->privacy = privacy;
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", %pM"
                  ", buf: %#.2x, bss_type: %d, privacy: %d",
                  WIPHY_PR_ARG, CHAN_PR_ARG, __entry->bssid,
                  ((u8 *)__get_dynamic_array(ssid))[0], __entry->bss_type,
                  __entry->privacy)
);

TRACE_EVENT(cfg80211_inform_bss_frame,
        TP_PROTO(struct wiphy *wiphy, struct cfg80211_inform_bss *data,
                 struct ieee80211_mgmt *mgmt, size_t len),
        TP_ARGS(wiphy, data, mgmt, len),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                CHAN_ENTRY
                __dynamic_array(u8, mgmt, len)
                __field(s32, signal)
                __field(u64, ts_boottime)
                __field(u64, parent_tsf)
                MAC_ENTRY(parent_bssid)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                CHAN_ASSIGN(data->chan);
                if (mgmt)
                        memcpy(__get_dynamic_array(mgmt), mgmt, len);
                __entry->signal = data->signal;
                __entry->ts_boottime = data->boottime_ns;
                __entry->parent_tsf = data->parent_tsf;
                MAC_ASSIGN(parent_bssid, data->parent_bssid);
        ),
        TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
                  "signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM",
                  WIPHY_PR_ARG, CHAN_PR_ARG,
                  __entry->signal, (unsigned long long)__entry->ts_boottime,
                  (unsigned long long)__entry->parent_tsf,
                  __entry->parent_bssid)
);

DECLARE_EVENT_CLASS(cfg80211_bss_evt,
        TP_PROTO(struct cfg80211_bss *pub),
        TP_ARGS(pub),
        TP_STRUCT__entry(
                MAC_ENTRY(bssid)
                CHAN_ENTRY
        ),
        TP_fast_assign(
                MAC_ASSIGN(bssid, pub->bssid);
                CHAN_ASSIGN(pub->channel);
        ),
        TP_printk("%pM, " CHAN_PR_FMT, __entry->bssid, CHAN_PR_ARG)
);

DEFINE_EVENT(cfg80211_bss_evt, cfg80211_return_bss,
        TP_PROTO(struct cfg80211_bss *pub),
        TP_ARGS(pub)
);

TRACE_EVENT(cfg80211_return_uint,
        TP_PROTO(unsigned int ret),
        TP_ARGS(ret),
        TP_STRUCT__entry(
                __field(unsigned int, ret)
        ),
        TP_fast_assign(
                __entry->ret = ret;
        ),
        TP_printk("ret: %d", __entry->ret)
);

TRACE_EVENT(cfg80211_return_u32,
        TP_PROTO(u32 ret),
        TP_ARGS(ret),
        TP_STRUCT__entry(
                __field(u32, ret)
        ),
        TP_fast_assign(
                __entry->ret = ret;
        ),
        TP_printk("ret: %u", __entry->ret)
);

TRACE_EVENT(cfg80211_report_wowlan_wakeup,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 struct cfg80211_wowlan_wakeup *wakeup),
        TP_ARGS(wiphy, wdev, wakeup),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(bool, non_wireless)
                __field(bool, disconnect)
                __field(bool, magic_pkt)
                __field(bool, gtk_rekey_failure)
                __field(bool, eap_identity_req)
                __field(bool, four_way_handshake)
                __field(bool, rfkill_release)
                __field(s32, pattern_idx)
                __field(u32, packet_len)
                __dynamic_array(u8, packet,
                                wakeup ? wakeup->packet_present_len : 0)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->non_wireless = !wakeup;
                __entry->disconnect = wakeup ? wakeup->disconnect : false;
                __entry->magic_pkt = wakeup ? wakeup->magic_pkt : false;
                __entry->gtk_rekey_failure = wakeup ? wakeup->gtk_rekey_failure : false;
                __entry->eap_identity_req = wakeup ? wakeup->eap_identity_req : false;
                __entry->four_way_handshake = wakeup ? wakeup->four_way_handshake : false;
                __entry->rfkill_release = wakeup ? wakeup->rfkill_release : false;
                __entry->pattern_idx = wakeup ? wakeup->pattern_idx : false;
                __entry->packet_len = wakeup ? wakeup->packet_len : false;
                if (wakeup && wakeup->packet && wakeup->packet_present_len)
                        memcpy(__get_dynamic_array(packet), wakeup->packet,
                               wakeup->packet_present_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
);

TRACE_EVENT(cfg80211_ft_event,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ft_event_params *ft_event),
        TP_ARGS(wiphy, netdev, ft_event),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __dynamic_array(u8, ies, ft_event->ies_len)
                MAC_ENTRY(target_ap)
                __dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                if (ft_event->ies)
                        memcpy(__get_dynamic_array(ies), ft_event->ies,
                               ft_event->ies_len);
                MAC_ASSIGN(target_ap, ft_event->target_ap);
                if (ft_event->ric_ies)
                        memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
                               ft_event->ric_ies_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->target_ap)
);

TRACE_EVENT(cfg80211_stop_iface,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
                  WIPHY_PR_ARG, WDEV_PR_ARG)
);

TRACE_EVENT(cfg80211_pmsr_report,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
                 u64 cookie, const u8 *addr),
        TP_ARGS(wiphy, wdev, cookie, addr),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
                MAC_ENTRY(addr)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
                MAC_ASSIGN(addr, addr);
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld, %pM",
                  WIPHY_PR_ARG, WDEV_PR_ARG,
                  (unsigned long long)__entry->cookie,
                  __entry->addr)
);

TRACE_EVENT(cfg80211_pmsr_complete,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie),
        TP_ARGS(wiphy, wdev, cookie),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                WDEV_ENTRY
                __field(u64, cookie)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                WDEV_ASSIGN;
                __entry->cookie = cookie;
        ),
        TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld",
                  WIPHY_PR_ARG, WDEV_PR_ARG,
                  (unsigned long long)__entry->cookie)
);

TRACE_EVENT(cfg80211_update_owe_info_event,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_update_owe_info *owe_info),
        TP_ARGS(wiphy, netdev, owe_info),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(peer)
                __dynamic_array(u8, ie, owe_info->ie_len)
                __field(int, assoc_link_id)
                MAC_ENTRY(peer_mld_addr)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(peer, owe_info->peer);
                memcpy(__get_dynamic_array(ie), owe_info->ie,
                       owe_info->ie_len);
                __entry->assoc_link_id = owe_info->assoc_link_id;
                MAC_ASSIGN(peer_mld_addr, owe_info->peer_mld_addr);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM,"
                  " assoc_link_id: %d, peer_mld_addr: %pM",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
                  __entry->assoc_link_id, __entry->peer_mld_addr)
);

TRACE_EVENT(cfg80211_bss_color_notify,
        TP_PROTO(struct net_device *netdev,
                 enum nl80211_commands cmd,
                 u8 count, u64 color_bitmap),
        TP_ARGS(netdev, cmd, count, color_bitmap),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(u32, cmd)
                __field(u8, count)
                __field(u64, color_bitmap)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->cmd = cmd;
                __entry->count = count;
                __entry->color_bitmap = color_bitmap;
        ),
        TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
                  NETDEV_PR_ARG, __entry->cmd, __entry->count,
                  __entry->color_bitmap)
);

TRACE_EVENT(cfg80211_assoc_comeback,
        TP_PROTO(struct wireless_dev *wdev, const u8 *ap_addr, u32 timeout),
        TP_ARGS(wdev, ap_addr, timeout),
        TP_STRUCT__entry(
                WDEV_ENTRY
                MAC_ENTRY(ap_addr)
                __field(u32, timeout)
        ),
        TP_fast_assign(
                WDEV_ASSIGN;
                MAC_ASSIGN(ap_addr, ap_addr);
                __entry->timeout = timeout;
        ),
        TP_printk(WDEV_PR_FMT ", %pM, timeout: %u TUs",
                  WDEV_PR_ARG, __entry->ap_addr, __entry->timeout)
);

DECLARE_EVENT_CLASS(link_station_add_mod,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct link_station_parameters *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __array(u8, mld_mac, 6)
                __array(u8, link_mac, 6)
                __field(u32, link_id)
                __dynamic_array(u8, supported_rates,
                                params->supported_rates_len)
                __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap))
                __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap))
                __field(u8, opmode_notif)
                __field(bool, opmode_notif_used)
                __dynamic_array(u8, he_capa, params->he_capa_len)
                __array(u8, he_6ghz_capa, (int)sizeof(struct ieee80211_he_6ghz_capa))
                __dynamic_array(u8, eht_capa, params->eht_capa_len)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                memset(__entry->mld_mac, 0, 6);
                memset(__entry->link_mac, 0, 6);
                if (params->mld_mac)
                        memcpy(__entry->mld_mac, params->mld_mac, 6);
                if (params->link_mac)
                        memcpy(__entry->link_mac, params->link_mac, 6);
                __entry->link_id = params->link_id;
                if (params->supported_rates && params->supported_rates_len)
                        memcpy(__get_dynamic_array(supported_rates),
                               params->supported_rates,
                               params->supported_rates_len);
                memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap));
                if (params->ht_capa)
                        memcpy(__entry->ht_capa, params->ht_capa,
                               sizeof(struct ieee80211_ht_cap));
                memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap));
                if (params->vht_capa)
                        memcpy(__entry->vht_capa, params->vht_capa,
                               sizeof(struct ieee80211_vht_cap));
                __entry->opmode_notif = params->opmode_notif;
                __entry->opmode_notif_used = params->opmode_notif_used;
                if (params->he_capa && params->he_capa_len)
                        memcpy(__get_dynamic_array(he_capa), params->he_capa,
                               params->he_capa_len);
                memset(__entry->he_6ghz_capa, 0, sizeof(struct ieee80211_he_6ghz_capa));
                if (params->he_6ghz_capa)
                        memcpy(__entry->he_6ghz_capa, params->he_6ghz_capa,
                               sizeof(struct ieee80211_he_6ghz_capa));
                if (params->eht_capa && params->eht_capa_len)
                        memcpy(__get_dynamic_array(eht_capa), params->eht_capa,
                               params->eht_capa_len);
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
                  ", link mac: %pM, link id: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
                  __entry->link_mac, __entry->link_id)
);

DEFINE_EVENT(link_station_add_mod, rdev_add_link_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct link_station_parameters *params),
        TP_ARGS(wiphy, netdev, params)
);

DEFINE_EVENT(link_station_add_mod, rdev_mod_link_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct link_station_parameters *params),
        TP_ARGS(wiphy, netdev, params)
);

TRACE_EVENT(rdev_del_link_station,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct link_station_del_parameters *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __array(u8, mld_mac, 6)
                __field(u32, link_id)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                memset(__entry->mld_mac, 0, 6);
                if (params->mld_mac)
                        memcpy(__entry->mld_mac, params->mld_mac, 6);
                __entry->link_id = params->link_id;
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
                  ", link id: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
                  __entry->link_id)
);

TRACE_EVENT(rdev_set_hw_timestamp,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_set_hw_timestamp *hwts),

        TP_ARGS(wiphy, netdev, hwts),

        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                MAC_ENTRY(macaddr)
                __field(bool, enable)
        ),

        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                MAC_ASSIGN(macaddr, hwts->macaddr);
                __entry->enable = hwts->enable;
        ),

        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u",
                  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
                  __entry->enable)
);

TRACE_EVENT(cfg80211_links_removed,
        TP_PROTO(struct net_device *netdev, u16 link_mask),
        TP_ARGS(netdev, link_mask),
        TP_STRUCT__entry(
                NETDEV_ENTRY
                __field(u16, link_mask)
        ),
        TP_fast_assign(
                NETDEV_ASSIGN;
                __entry->link_mask = link_mask;
        ),
        TP_printk(NETDEV_PR_FMT ", link_mask:%u", NETDEV_PR_ARG,
                  __entry->link_mask)
);

TRACE_EVENT(rdev_set_ttlm,
        TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
                 struct cfg80211_ttlm_params *params),
        TP_ARGS(wiphy, netdev, params),
        TP_STRUCT__entry(
                WIPHY_ENTRY
                NETDEV_ENTRY
                __array(u8, dlink, sizeof(u16) * 8)
                __array(u8, ulink, sizeof(u16) * 8)
        ),
        TP_fast_assign(
                WIPHY_ASSIGN;
                NETDEV_ASSIGN;
                memcpy(__entry->dlink, params->dlink, sizeof(params->dlink));
                memcpy(__entry->ulink, params->ulink, sizeof(params->ulink));
        ),
        TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
                  WIPHY_PR_ARG, NETDEV_PR_ARG)
);

#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
#include <trace/define_trace.h>

























































   27 























































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * A generic kernel FIFO implementation
 *
 * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net>
 */

#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/log2.h>
#include <linux/uaccess.h>
#include <linux/kfifo.h>

/*
 * internal helper to calculate the unused elements in a fifo
 */
static inline unsigned int kfifo_unused(struct __kfifo *fifo)
{
        return (fifo->mask + 1) - (fifo->in - fifo->out);
}

int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
                size_t esize, gfp_t gfp_mask)
{
        /*
         * round up to the next power of 2, since our 'let the indices
         * wrap' technique works only in this case.
         */
        size = roundup_pow_of_two(size);

        fifo->in = 0;
        fifo->out = 0;
        fifo->esize = esize;

        if (size < 2) {
                fifo->data = NULL;
                fifo->mask = 0;
                return -EINVAL;
        }

        fifo->data = kmalloc_array(esize, size, gfp_mask);

        if (!fifo->data) {
                fifo->mask = 0;
                return -ENOMEM;
        }
        fifo->mask = size - 1;

        return 0;
}
EXPORT_SYMBOL(__kfifo_alloc);

void __kfifo_free(struct __kfifo *fifo)
{
        kfree(fifo->data);
        fifo->in = 0;
        fifo->out = 0;
        fifo->esize = 0;
        fifo->data = NULL;
        fifo->mask = 0;
}
EXPORT_SYMBOL(__kfifo_free);

int __kfifo_init(struct __kfifo *fifo, void *buffer,
                unsigned int size, size_t esize)
{
        size /= esize;

        if (!is_power_of_2(size))
                size = rounddown_pow_of_two(size);

        fifo->in = 0;
        fifo->out = 0;
        fifo->esize = esize;
        fifo->data = buffer;

        if (size < 2) {
                fifo->mask = 0;
                return -EINVAL;
        }
        fifo->mask = size - 1;

        return 0;
}
EXPORT_SYMBOL(__kfifo_init);

static void kfifo_copy_in(struct __kfifo *fifo, const void *src,
                unsigned int len, unsigned int off)
{
        unsigned int size = fifo->mask + 1;
        unsigned int esize = fifo->esize;
        unsigned int l;

        off &= fifo->mask;
        if (esize != 1) {
                off *= esize;
                size *= esize;
                len *= esize;
        }
        l = min(len, size - off);

        memcpy(fifo->data + off, src, l);
        memcpy(fifo->data, src + l, len - l);
        /*
         * make sure that the data in the fifo is up to date before
         * incrementing the fifo->in index counter
         */
        smp_wmb();
}

unsigned int __kfifo_in(struct __kfifo *fifo,
                const void *buf, unsigned int len)
{
        unsigned int l;

        l = kfifo_unused(fifo);
        if (len > l)
                len = l;

        kfifo_copy_in(fifo, buf, len, fifo->in);
        fifo->in += len;
        return len;
}
EXPORT_SYMBOL(__kfifo_in);

static void kfifo_copy_out(struct __kfifo *fifo, void *dst,
                unsigned int len, unsigned int off)
{
        unsigned int size = fifo->mask + 1;
        unsigned int esize = fifo->esize;
        unsigned int l;

        off &= fifo->mask;
        if (esize != 1) {
                off *= esize;
                size *= esize;
                len *= esize;
        }
        l = min(len, size - off);

        memcpy(dst, fifo->data + off, l);
        memcpy(dst + l, fifo->data, len - l);
        /*
         * make sure that the data is copied before
         * incrementing the fifo->out index counter
         */
        smp_wmb();
}

unsigned int __kfifo_out_peek(struct __kfifo *fifo,
                void *buf, unsigned int len)
{
        unsigned int l;

        l = fifo->in - fifo->out;
        if (len > l)
                len = l;

        kfifo_copy_out(fifo, buf, len, fifo->out);
        return len;
}
EXPORT_SYMBOL(__kfifo_out_peek);

unsigned int __kfifo_out(struct __kfifo *fifo,
                void *buf, unsigned int len)
{
        len = __kfifo_out_peek(fifo, buf, len);
        fifo->out += len;
        return len;
}
EXPORT_SYMBOL(__kfifo_out);

static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
        const void __user *from, unsigned int len, unsigned int off,
        unsigned int *copied)
{
        unsigned int size = fifo->mask + 1;
        unsigned int esize = fifo->esize;
        unsigned int l;
        unsigned long ret;

        off &= fifo->mask;
        if (esize != 1) {
                off *= esize;
                size *= esize;
                len *= esize;
        }
        l = min(len, size - off);

        ret = copy_from_user(fifo->data + off, from, l);
        if (unlikely(ret))
                ret = DIV_ROUND_UP(ret + len - l, esize);
        else {
                ret = copy_from_user(fifo->data, from + l, len - l);
                if (unlikely(ret))
                        ret = DIV_ROUND_UP(ret, esize);
        }
        /*
         * make sure that the data in the fifo is up to date before
         * incrementing the fifo->in index counter
         */
        smp_wmb();
        *copied = len - ret * esize;
        /* return the number of elements which are not copied */
        return ret;
}

int __kfifo_from_user(struct __kfifo *fifo, const void __user *from,
                unsigned long len, unsigned int *copied)
{
        unsigned int l;
        unsigned long ret;
        unsigned int esize = fifo->esize;
        int err;

        if (esize != 1)
                len /= esize;

        l = kfifo_unused(fifo);
        if (len > l)
                len = l;

        ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied);
        if (unlikely(ret)) {
                len -= ret;
                err = -EFAULT;
        } else
                err = 0;
        fifo->in += len;
        return err;
}
EXPORT_SYMBOL(__kfifo_from_user);

static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
                unsigned int len, unsigned int off, unsigned int *copied)
{
        unsigned int l;
        unsigned long ret;
        unsigned int size = fifo->mask + 1;
        unsigned int esize = fifo->esize;

        off &= fifo->mask;
        if (esize != 1) {
                off *= esize;
                size *= esize;
                len *= esize;
        }
        l = min(len, size - off);

        ret = copy_to_user(to, fifo->data + off, l);
        if (unlikely(ret))
                ret = DIV_ROUND_UP(ret + len - l, esize);
        else {
                ret = copy_to_user(to + l, fifo->data, len - l);
                if (unlikely(ret))
                        ret = DIV_ROUND_UP(ret, esize);
        }
        /*
         * make sure that the data is copied before
         * incrementing the fifo->out index counter
         */
        smp_wmb();
        *copied = len - ret * esize;
        /* return the number of elements which are not copied */
        return ret;
}

int __kfifo_to_user(struct __kfifo *fifo, void __user *to,
                unsigned long len, unsigned int *copied)
{
        unsigned int l;
        unsigned long ret;
        unsigned int esize = fifo->esize;
        int err;

        if (esize != 1)
                len /= esize;

        l = fifo->in - fifo->out;
        if (len > l)
                len = l;
        ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied);
        if (unlikely(ret)) {
                len -= ret;
                err = -EFAULT;
        } else
                err = 0;
        fifo->out += len;
        return err;
}
EXPORT_SYMBOL(__kfifo_to_user);

static int setup_sgl_buf(struct scatterlist *sgl, void *buf,
                int nents, unsigned int len)
{
        int n;
        unsigned int l;
        unsigned int off;
        struct page *page;

        if (!nents)
                return 0;

        if (!len)
                return 0;

        n = 0;
        page = virt_to_page(buf);
        off = offset_in_page(buf);
        l = 0;

        while (len >= l + PAGE_SIZE - off) {
                struct page *npage;

                l += PAGE_SIZE;
                buf += PAGE_SIZE;
                npage = virt_to_page(buf);
                if (page_to_phys(page) != page_to_phys(npage) - l) {
                        sg_set_page(sgl, page, l - off, off);
                        sgl = sg_next(sgl);
                        if (++n == nents || sgl == NULL)
                                return n;
                        page = npage;
                        len -= l - off;
                        l = off = 0;
                }
        }
        sg_set_page(sgl, page, len, off);
        return n + 1;
}

static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl,
                int nents, unsigned int len, unsigned int off)
{
        unsigned int size = fifo->mask + 1;
        unsigned int esize = fifo->esize;
        unsigned int l;
        unsigned int n;

        off &= fifo->mask;
        if (esize != 1) {
                off *= esize;
                size *= esize;
                len *= esize;
        }
        l = min(len, size - off);

        n = setup_sgl_buf(sgl, fifo->data + off, nents, l);
        n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l);

        return n;
}

unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo,
                struct scatterlist *sgl, int nents, unsigned int len)
{
        unsigned int l;

        l = kfifo_unused(fifo);
        if (len > l)
                len = l;

        return setup_sgl(fifo, sgl, nents, len, fifo->in);
}
EXPORT_SYMBOL(__kfifo_dma_in_prepare);

unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo,
                struct scatterlist *sgl, int nents, unsigned int len)
{
        unsigned int l;

        l = fifo->in - fifo->out;
        if (len > l)
                len = l;

        return setup_sgl(fifo, sgl, nents, len, fifo->out);
}
EXPORT_SYMBOL(__kfifo_dma_out_prepare);

unsigned int __kfifo_max_r(unsigned int len, size_t recsize)
{
        unsigned int max = (1 << (recsize << 3)) - 1;

        if (len > max)
                return max;
        return len;
}
EXPORT_SYMBOL(__kfifo_max_r);

#define        __KFIFO_PEEK(data, out, mask) \
        ((data)[(out) & (mask)])
/*
 * __kfifo_peek_n internal helper function for determinate the length of
 * the next record in the fifo
 */
static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize)
{
        unsigned int l;
        unsigned int mask = fifo->mask;
        unsigned char *data = fifo->data;

        l = __KFIFO_PEEK(data, fifo->out, mask);

        if (--recsize)
                l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8;

        return l;
}

#define        __KFIFO_POKE(data, in, mask, val) \
        ( \
        (data)[(in) & (mask)] = (unsigned char)(val) \
        )

/*
 * __kfifo_poke_n internal helper function for storing the length of
 * the record into the fifo
 */
static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize)
{
        unsigned int mask = fifo->mask;
        unsigned char *data = fifo->data;

        __KFIFO_POKE(data, fifo->in, mask, n);

        if (recsize > 1)
                __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8);
}

unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize)
{
        return __kfifo_peek_n(fifo, recsize);
}
EXPORT_SYMBOL(__kfifo_len_r);

unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf,
                unsigned int len, size_t recsize)
{
        if (len + recsize > kfifo_unused(fifo))
                return 0;

        __kfifo_poke_n(fifo, len, recsize);

        kfifo_copy_in(fifo, buf, len, fifo->in + recsize);
        fifo->in += len + recsize;
        return len;
}
EXPORT_SYMBOL(__kfifo_in_r);

static unsigned int kfifo_out_copy_r(struct __kfifo *fifo,
        void *buf, unsigned int len, size_t recsize, unsigned int *n)
{
        *n = __kfifo_peek_n(fifo, recsize);

        if (len > *n)
                len = *n;

        kfifo_copy_out(fifo, buf, len, fifo->out + recsize);
        return len;
}

unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf,
                unsigned int len, size_t recsize)
{
        unsigned int n;

        if (fifo->in == fifo->out)
                return 0;

        return kfifo_out_copy_r(fifo, buf, len, recsize, &n);
}
EXPORT_SYMBOL(__kfifo_out_peek_r);

unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
                unsigned int len, size_t recsize)
{
        unsigned int n;

        if (fifo->in == fifo->out)
                return 0;

        len = kfifo_out_copy_r(fifo, buf, len, recsize, &n);
        fifo->out += n + recsize;
        return len;
}
EXPORT_SYMBOL(__kfifo_out_r);

void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
{
        unsigned int n;

        n = __kfifo_peek_n(fifo, recsize);
        fifo->out += n + recsize;
}
EXPORT_SYMBOL(__kfifo_skip_r);

int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
        unsigned long len, unsigned int *copied, size_t recsize)
{
        unsigned long ret;

        len = __kfifo_max_r(len, recsize);

        if (len + recsize > kfifo_unused(fifo)) {
                *copied = 0;
                return 0;
        }

        __kfifo_poke_n(fifo, len, recsize);

        ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied);
        if (unlikely(ret)) {
                *copied = 0;
                return -EFAULT;
        }
        fifo->in += len + recsize;
        return 0;
}
EXPORT_SYMBOL(__kfifo_from_user_r);

int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to,
        unsigned long len, unsigned int *copied, size_t recsize)
{
        unsigned long ret;
        unsigned int n;

        if (fifo->in == fifo->out) {
                *copied = 0;
                return 0;
        }

        n = __kfifo_peek_n(fifo, recsize);
        if (len > n)
                len = n;

        ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied);
        if (unlikely(ret)) {
                *copied = 0;
                return -EFAULT;
        }
        fifo->out += n + recsize;
        return 0;
}
EXPORT_SYMBOL(__kfifo_to_user_r);

unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
        struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
        BUG_ON(!nents);

        len = __kfifo_max_r(len, recsize);

        if (len + recsize > kfifo_unused(fifo))
                return 0;

        return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize);
}
EXPORT_SYMBOL(__kfifo_dma_in_prepare_r);

void __kfifo_dma_in_finish_r(struct __kfifo *fifo,
        unsigned int len, size_t recsize)
{
        len = __kfifo_max_r(len, recsize);
        __kfifo_poke_n(fifo, len, recsize);
        fifo->in += len + recsize;
}
EXPORT_SYMBOL(__kfifo_dma_in_finish_r);

unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
        struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
        BUG_ON(!nents);

        len = __kfifo_max_r(len, recsize);

        if (len + recsize > fifo->in - fifo->out)
                return 0;

        return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize);
}
EXPORT_SYMBOL(__kfifo_dma_out_prepare_r);

void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize)
{
        unsigned int len;

        len = __kfifo_peek_n(fifo, recsize);
        fifo->out += len + recsize;
}
EXPORT_SYMBOL(__kfifo_dma_out_finish_r);


















































































































































   38 

   37 





   22 





    1 





    2 




    3 











   76 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * usb hub driver head file
 *
 * Copyright (C) 1999 Linus Torvalds
 * Copyright (C) 1999 Johannes Erdfelt
 * Copyright (C) 1999 Gregory P. Smith
 * Copyright (C) 2001 Brad Hards (bhards@bigpond.net.au)
 * Copyright (C) 2012 Intel Corp (tianyu.lan@intel.com)
 *
 *  move struct usb_hub to this file.
 */

#include <linux/usb.h>
#include <linux/usb/ch11.h>
#include <linux/usb/hcd.h>
#include <linux/usb/typec.h>
#include "usb.h"

struct usb_hub {
        struct device                *intfdev;        /* the "interface" device */
        struct usb_device        *hdev;
        struct kref                kref;
        struct urb                *urb;                /* for interrupt polling pipe */

        /* buffer for urb ... with extra space in case of babble */
        u8                        (*buffer)[8];
        union {
                struct usb_hub_status        hub;
                struct usb_port_status        port;
        }                        *status;        /* buffer for status reports */
        struct mutex                status_mutex;        /* for the status buffer */

        int                        error;                /* last reported error */
        int                        nerrors;        /* track consecutive errors */

        unsigned long                event_bits[1];        /* status change bitmask */
        unsigned long                change_bits[1];        /* ports with logical connect
                                                        status change */
        unsigned long                removed_bits[1]; /* ports with a "removed"
                                                        device present */
        unsigned long                wakeup_bits[1];        /* ports that have signaled
                                                        remote wakeup */
        unsigned long                power_bits[1]; /* ports that are powered */
        unsigned long                child_usage_bits[1]; /* ports powered on for
                                                        children */
        unsigned long                warm_reset_bits[1]; /* ports requesting warm
                                                        reset recovery */
#if USB_MAXCHILDREN > 31 /* 8*sizeof(unsigned long) - 1 */
#error event_bits[] is too short!
#endif

        struct usb_hub_descriptor *descriptor;        /* class descriptor */
        struct usb_tt                tt;                /* Transaction Translator */

        unsigned                mA_per_port;        /* current for each child */
#ifdef        CONFIG_PM
        unsigned                wakeup_enabled_descendants;
#endif

        unsigned                limited_power:1;
        unsigned                quiescing:1;
        unsigned                disconnected:1;
        unsigned                in_reset:1;
        unsigned                quirk_disable_autosuspend:1;

        unsigned                quirk_check_port_auto_suspend:1;

        unsigned                has_indicators:1;
        u8                        indicator[USB_MAXCHILDREN];
        struct delayed_work        leds;
        struct delayed_work        init_work;
        struct work_struct      events;
        spinlock_t                irq_urb_lock;
        struct timer_list        irq_urb_retry;
        struct usb_port                **ports;
        struct list_head        onboard_devs;
};

/**
 * struct usb port - kernel's representation of a usb port
 * @child: usb device attached to the port
 * @dev: generic device interface
 * @port_owner: port's owner
 * @peer: related usb2 and usb3 ports (share the same connector)
 * @connector: USB Type-C connector
 * @req: default pm qos request for hubs without port power control
 * @connect_type: port's connect type
 * @state: device state of the usb device attached to the port
 * @state_kn: kernfs_node of the sysfs attribute that accesses @state
 * @location: opaque representation of platform connector location
 * @status_lock: synchronize port_event() vs usb_port_{suspend|resume}
 * @portnum: port index num based one
 * @is_superspeed cache super-speed status
 * @usb3_lpm_u1_permit: whether USB3 U1 LPM is permitted.
 * @usb3_lpm_u2_permit: whether USB3 U2 LPM is permitted.
 * @early_stop: whether port initialization will be stopped earlier.
 * @ignore_event: whether events of the port are ignored.
 */
struct usb_port {
        struct usb_device *child;
        struct device dev;
        struct usb_dev_state *port_owner;
        struct usb_port *peer;
        struct typec_connector *connector;
        struct dev_pm_qos_request *req;
        enum usb_port_connect_type connect_type;
        enum usb_device_state state;
        struct kernfs_node *state_kn;
        usb_port_location_t location;
        struct mutex status_lock;
        u32 over_current_count;
        u8 portnum;
        u32 quirks;
        unsigned int early_stop:1;
        unsigned int ignore_event:1;
        unsigned int is_superspeed:1;
        unsigned int usb3_lpm_u1_permit:1;
        unsigned int usb3_lpm_u2_permit:1;
};

#define to_usb_port(_dev) \
        container_of(_dev, struct usb_port, dev)

extern int usb_hub_create_port_device(struct usb_hub *hub,
                int port1);
extern void usb_hub_remove_port_device(struct usb_hub *hub,
                int port1);
extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub,
                int port1, bool set);
extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev);
extern void hub_get(struct usb_hub *hub);
extern void hub_put(struct usb_hub *hub);
extern int hub_port_debounce(struct usb_hub *hub, int port1,
                bool must_be_connected);
extern int usb_clear_port_feature(struct usb_device *hdev,
                int port1, int feature);
extern int usb_hub_port_status(struct usb_hub *hub, int port1,
                u16 *status, u16 *change);
extern int usb_port_is_power_on(struct usb_hub *hub, unsigned int portstatus);

static inline bool hub_is_port_power_switchable(struct usb_hub *hub)
{
        __le16 hcs;

        if (!hub)
                return false;
        hcs = hub->descriptor->wHubCharacteristics;
        return (le16_to_cpu(hcs) & HUB_CHAR_LPSM) < HUB_CHAR_NO_LPSM;
}

static inline int hub_is_superspeed(struct usb_device *hdev)
{
        return hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS;
}

static inline int hub_is_superspeedplus(struct usb_device *hdev)
{
        return (hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS &&
                le16_to_cpu(hdev->descriptor.bcdUSB) >= 0x0310 &&
                hdev->bos && hdev->bos->ssp_cap);
}

static inline unsigned hub_power_on_good_delay(struct usb_hub *hub)
{
        unsigned delay = hub->descriptor->bPwrOn2PwrGood * 2;

        if (!hub->hdev->parent)        /* root hub */
                return delay;
        else /* Wait at least 100 msec for power to become stable */
                return max(delay, 100U);
}

static inline int hub_port_debounce_be_connected(struct usb_hub *hub,
                int port1)
{
        return hub_port_debounce(hub, port1, true);
}

static inline int hub_port_debounce_be_stable(struct usb_hub *hub,
                int port1)
{
        return hub_port_debounce(hub, port1, false);
}

















































































































































































    4 









    4 
























































    8 




















    2 






    2 


    2 



    2 





    2 
    2 

    2 



    2 

























    2 




    2 

    2 











    2 














    2 































    2 



    2 
    2 



    2 




    2 
    2 




    2 
    2 

    2 




















    2 















    2 



    2 






    2 


    2 














































    2 

    2 







































    2 












    2 



    2 









    2 





    2 









    2 































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    7 







    7 










    7 









































































































    7 




    7 











































































































































    8 



    8 

































































































































    9 


    8 
    1 

    8 





    8 


    8 













    7 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
// SPDX-License-Identifier: GPL-2.0
// rc-main.c - Remote Controller core module
//
// Copyright (C) 2009-2010 by Mauro Carvalho Chehab

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <media/rc-core.h>
#include <linux/bsearch.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/input.h>
#include <linux/leds.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/device.h>
#include <linux/module.h>
#include "rc-core-priv.h"

/* Sizes are in bytes, 256 bytes allows for 32 entries on x64 */
#define IR_TAB_MIN_SIZE        256
#define IR_TAB_MAX_SIZE        8192

static const struct {
        const char *name;
        unsigned int repeat_period;
        unsigned int scancode_bits;
} protocols[] = {
        [RC_PROTO_UNKNOWN] = { .name = "unknown", .repeat_period = 125 },
        [RC_PROTO_OTHER] = { .name = "other", .repeat_period = 125 },
        [RC_PROTO_RC5] = { .name = "rc-5",
                .scancode_bits = 0x1f7f, .repeat_period = 114 },
        [RC_PROTO_RC5X_20] = { .name = "rc-5x-20",
                .scancode_bits = 0x1f7f3f, .repeat_period = 114 },
        [RC_PROTO_RC5_SZ] = { .name = "rc-5-sz",
                .scancode_bits = 0x2fff, .repeat_period = 114 },
        [RC_PROTO_JVC] = { .name = "jvc",
                .scancode_bits = 0xffff, .repeat_period = 125 },
        [RC_PROTO_SONY12] = { .name = "sony-12",
                .scancode_bits = 0x1f007f, .repeat_period = 100 },
        [RC_PROTO_SONY15] = { .name = "sony-15",
                .scancode_bits = 0xff007f, .repeat_period = 100 },
        [RC_PROTO_SONY20] = { .name = "sony-20",
                .scancode_bits = 0x1fff7f, .repeat_period = 100 },
        [RC_PROTO_NEC] = { .name = "nec",
                .scancode_bits = 0xffff, .repeat_period = 110 },
        [RC_PROTO_NECX] = { .name = "nec-x",
                .scancode_bits = 0xffffff, .repeat_period = 110 },
        [RC_PROTO_NEC32] = { .name = "nec-32",
                .scancode_bits = 0xffffffff, .repeat_period = 110 },
        [RC_PROTO_SANYO] = { .name = "sanyo",
                .scancode_bits = 0x1fffff, .repeat_period = 125 },
        [RC_PROTO_MCIR2_KBD] = { .name = "mcir2-kbd",
                .scancode_bits = 0xffffff, .repeat_period = 100 },
        [RC_PROTO_MCIR2_MSE] = { .name = "mcir2-mse",
                .scancode_bits = 0x1fffff, .repeat_period = 100 },
        [RC_PROTO_RC6_0] = { .name = "rc-6-0",
                .scancode_bits = 0xffff, .repeat_period = 114 },
        [RC_PROTO_RC6_6A_20] = { .name = "rc-6-6a-20",
                .scancode_bits = 0xfffff, .repeat_period = 114 },
        [RC_PROTO_RC6_6A_24] = { .name = "rc-6-6a-24",
                .scancode_bits = 0xffffff, .repeat_period = 114 },
        [RC_PROTO_RC6_6A_32] = { .name = "rc-6-6a-32",
                .scancode_bits = 0xffffffff, .repeat_period = 114 },
        [RC_PROTO_RC6_MCE] = { .name = "rc-6-mce",
                .scancode_bits = 0xffff7fff, .repeat_period = 114 },
        [RC_PROTO_SHARP] = { .name = "sharp",
                .scancode_bits = 0x1fff, .repeat_period = 125 },
        [RC_PROTO_XMP] = { .name = "xmp", .repeat_period = 125 },
        [RC_PROTO_CEC] = { .name = "cec", .repeat_period = 0 },
        [RC_PROTO_IMON] = { .name = "imon",
                .scancode_bits = 0x7fffffff, .repeat_period = 114 },
        [RC_PROTO_RCMM12] = { .name = "rc-mm-12",
                .scancode_bits = 0x00000fff, .repeat_period = 114 },
        [RC_PROTO_RCMM24] = { .name = "rc-mm-24",
                .scancode_bits = 0x00ffffff, .repeat_period = 114 },
        [RC_PROTO_RCMM32] = { .name = "rc-mm-32",
                .scancode_bits = 0xffffffff, .repeat_period = 114 },
        [RC_PROTO_XBOX_DVD] = { .name = "xbox-dvd", .repeat_period = 64 },
};

/* Used to keep track of known keymaps */
static LIST_HEAD(rc_map_list);
static DEFINE_SPINLOCK(rc_map_lock);
static struct led_trigger *led_feedback;

/* Used to keep track of rc devices */
static DEFINE_IDA(rc_ida);

static struct rc_map_list *seek_rc_map(const char *name)
{
        struct rc_map_list *map = NULL;

        spin_lock(&rc_map_lock);
        list_for_each_entry(map, &rc_map_list, list) {
                if (!strcmp(name, map->map.name)) {
                        spin_unlock(&rc_map_lock);
                        return map;
                }
        }
        spin_unlock(&rc_map_lock);

        return NULL;
}

struct rc_map *rc_map_get(const char *name)
{

        struct rc_map_list *map;

        map = seek_rc_map(name);
#ifdef CONFIG_MODULES
        if (!map) {
                int rc = request_module("%s", name);
                if (rc < 0) {
                        pr_err("Couldn't load IR keymap %s\n", name);
                        return NULL;
                }
                msleep(20);        /* Give some time for IR to register */

                map = seek_rc_map(name);
        }
#endif
        if (!map) {
                pr_err("IR keymap %s not found\n", name);
                return NULL;
        }

        printk(KERN_INFO "Registered IR keymap %s\n", map->map.name);

        return &map->map;
}
EXPORT_SYMBOL_GPL(rc_map_get);

int rc_map_register(struct rc_map_list *map)
{
        spin_lock(&rc_map_lock);
        list_add_tail(&map->list, &rc_map_list);
        spin_unlock(&rc_map_lock);
        return 0;
}
EXPORT_SYMBOL_GPL(rc_map_register);

void rc_map_unregister(struct rc_map_list *map)
{
        spin_lock(&rc_map_lock);
        list_del(&map->list);
        spin_unlock(&rc_map_lock);
}
EXPORT_SYMBOL_GPL(rc_map_unregister);


static struct rc_map_table empty[] = {
        { 0x2a, KEY_COFFEE },
};

static struct rc_map_list empty_map = {
        .map = {
                .scan     = empty,
                .size     = ARRAY_SIZE(empty),
                .rc_proto = RC_PROTO_UNKNOWN,        /* Legacy IR type */
                .name     = RC_MAP_EMPTY,
        }
};

/**
 * scancode_to_u64() - converts scancode in &struct input_keymap_entry
 * @ke: keymap entry containing scancode to be converted.
 * @scancode: pointer to the location where converted scancode should
 *        be stored.
 *
 * This function is a version of input_scancode_to_scalar specialized for
 * rc-core.
 */
static int scancode_to_u64(const struct input_keymap_entry *ke, u64 *scancode)
{
        switch (ke->len) {
        case 1:
                *scancode = *((u8 *)ke->scancode);
                break;

        case 2:
                *scancode = *((u16 *)ke->scancode);
                break;

        case 4:
                *scancode = *((u32 *)ke->scancode);
                break;

        case 8:
                *scancode = *((u64 *)ke->scancode);
                break;

        default:
                return -EINVAL;
        }

        return 0;
}

/**
 * ir_create_table() - initializes a scancode table
 * @dev:        the rc_dev device
 * @rc_map:        the rc_map to initialize
 * @name:        name to assign to the table
 * @rc_proto:        ir type to assign to the new table
 * @size:        initial size of the table
 *
 * This routine will initialize the rc_map and will allocate
 * memory to hold at least the specified number of elements.
 *
 * return:        zero on success or a negative error code
 */
static int ir_create_table(struct rc_dev *dev, struct rc_map *rc_map,
                           const char *name, u64 rc_proto, size_t size)
{
        rc_map->name = kstrdup(name, GFP_KERNEL);
        if (!rc_map->name)
                return -ENOMEM;
        rc_map->rc_proto = rc_proto;
        rc_map->alloc = roundup_pow_of_two(size * sizeof(struct rc_map_table));
        rc_map->size = rc_map->alloc / sizeof(struct rc_map_table);
        rc_map->scan = kmalloc(rc_map->alloc, GFP_KERNEL);
        if (!rc_map->scan) {
                kfree(rc_map->name);
                rc_map->name = NULL;
                return -ENOMEM;
        }

        dev_dbg(&dev->dev, "Allocated space for %u keycode entries (%u bytes)\n",
                rc_map->size, rc_map->alloc);
        return 0;
}

/**
 * ir_free_table() - frees memory allocated by a scancode table
 * @rc_map:        the table whose mappings need to be freed
 *
 * This routine will free memory alloctaed for key mappings used by given
 * scancode table.
 */
static void ir_free_table(struct rc_map *rc_map)
{
        rc_map->size = 0;
        kfree(rc_map->name);
        rc_map->name = NULL;
        kfree(rc_map->scan);
        rc_map->scan = NULL;
}

/**
 * ir_resize_table() - resizes a scancode table if necessary
 * @dev:        the rc_dev device
 * @rc_map:        the rc_map to resize
 * @gfp_flags:        gfp flags to use when allocating memory
 *
 * This routine will shrink the rc_map if it has lots of
 * unused entries and grow it if it is full.
 *
 * return:        zero on success or a negative error code
 */
static int ir_resize_table(struct rc_dev *dev, struct rc_map *rc_map,
                           gfp_t gfp_flags)
{
        unsigned int oldalloc = rc_map->alloc;
        unsigned int newalloc = oldalloc;
        struct rc_map_table *oldscan = rc_map->scan;
        struct rc_map_table *newscan;

        if (rc_map->size == rc_map->len) {
                /* All entries in use -> grow keytable */
                if (rc_map->alloc >= IR_TAB_MAX_SIZE)
                        return -ENOMEM;

                newalloc *= 2;
                dev_dbg(&dev->dev, "Growing table to %u bytes\n", newalloc);
        }

        if ((rc_map->len * 3 < rc_map->size) && (oldalloc > IR_TAB_MIN_SIZE)) {
                /* Less than 1/3 of entries in use -> shrink keytable */
                newalloc /= 2;
                dev_dbg(&dev->dev, "Shrinking table to %u bytes\n", newalloc);
        }

        if (newalloc == oldalloc)
                return 0;

        newscan = kmalloc(newalloc, gfp_flags);
        if (!newscan)
                return -ENOMEM;

        memcpy(newscan, rc_map->scan, rc_map->len * sizeof(struct rc_map_table));
        rc_map->scan = newscan;
        rc_map->alloc = newalloc;
        rc_map->size = rc_map->alloc / sizeof(struct rc_map_table);
        kfree(oldscan);
        return 0;
}

/**
 * ir_update_mapping() - set a keycode in the scancode->keycode table
 * @dev:        the struct rc_dev device descriptor
 * @rc_map:        scancode table to be adjusted
 * @index:        index of the mapping that needs to be updated
 * @new_keycode: the desired keycode
 *
 * This routine is used to update scancode->keycode mapping at given
 * position.
 *
 * return:        previous keycode assigned to the mapping
 *
 */
static unsigned int ir_update_mapping(struct rc_dev *dev,
                                      struct rc_map *rc_map,
                                      unsigned int index,
                                      unsigned int new_keycode)
{
        int old_keycode = rc_map->scan[index].keycode;
        int i;

        /* Did the user wish to remove the mapping? */
        if (new_keycode == KEY_RESERVED || new_keycode == KEY_UNKNOWN) {
                dev_dbg(&dev->dev, "#%d: Deleting scan 0x%04llx\n",
                        index, rc_map->scan[index].scancode);
                rc_map->len--;
                memmove(&rc_map->scan[index], &rc_map->scan[index+ 1],
                        (rc_map->len - index) * sizeof(struct rc_map_table));
        } else {
                dev_dbg(&dev->dev, "#%d: %s scan 0x%04llx with key 0x%04x\n",
                        index,
                        old_keycode == KEY_RESERVED ? "New" : "Replacing",
                        rc_map->scan[index].scancode, new_keycode);
                rc_map->scan[index].keycode = new_keycode;
                __set_bit(new_keycode, dev->input_dev->keybit);
        }

        if (old_keycode != KEY_RESERVED) {
                /* A previous mapping was updated... */
                __clear_bit(old_keycode, dev->input_dev->keybit);
                /* ... but another scancode might use the same keycode */
                for (i = 0; i < rc_map->len; i++) {
                        if (rc_map->scan[i].keycode == old_keycode) {
                                __set_bit(old_keycode, dev->input_dev->keybit);
                                break;
                        }
                }

                /* Possibly shrink the keytable, failure is not a problem */
                ir_resize_table(dev, rc_map, GFP_ATOMIC);
        }

        return old_keycode;
}

/**
 * ir_establish_scancode() - set a keycode in the scancode->keycode table
 * @dev:        the struct rc_dev device descriptor
 * @rc_map:        scancode table to be searched
 * @scancode:        the desired scancode
 * @resize:        controls whether we allowed to resize the table to
 *                accommodate not yet present scancodes
 *
 * This routine is used to locate given scancode in rc_map.
 * If scancode is not yet present the routine will allocate a new slot
 * for it.
 *
 * return:        index of the mapping containing scancode in question
 *                or -1U in case of failure.
 */
static unsigned int ir_establish_scancode(struct rc_dev *dev,
                                          struct rc_map *rc_map,
                                          u64 scancode, bool resize)
{
        unsigned int i;

        /*
         * Unfortunately, some hardware-based IR decoders don't provide
         * all bits for the complete IR code. In general, they provide only
         * the command part of the IR code. Yet, as it is possible to replace
         * the provided IR with another one, it is needed to allow loading
         * IR tables from other remotes. So, we support specifying a mask to
         * indicate the valid bits of the scancodes.
         */
        if (dev->scancode_mask)
                scancode &= dev->scancode_mask;

        /* First check if we already have a mapping for this ir command */
        for (i = 0; i < rc_map->len; i++) {
                if (rc_map->scan[i].scancode == scancode)
                        return i;

                /* Keytable is sorted from lowest to highest scancode */
                if (rc_map->scan[i].scancode >= scancode)
                        break;
        }

        /* No previous mapping found, we might need to grow the table */
        if (rc_map->size == rc_map->len) {
                if (!resize || ir_resize_table(dev, rc_map, GFP_ATOMIC))
                        return -1U;
        }

        /* i is the proper index to insert our new keycode */
        if (i < rc_map->len)
                memmove(&rc_map->scan[i + 1], &rc_map->scan[i],
                        (rc_map->len - i) * sizeof(struct rc_map_table));
        rc_map->scan[i].scancode = scancode;
        rc_map->scan[i].keycode = KEY_RESERVED;
        rc_map->len++;

        return i;
}

/**
 * ir_setkeycode() - set a keycode in the scancode->keycode table
 * @idev:        the struct input_dev device descriptor
 * @ke:                Input keymap entry
 * @old_keycode: result
 *
 * This routine is used to handle evdev EVIOCSKEY ioctl.
 *
 * return:        -EINVAL if the keycode could not be inserted, otherwise zero.
 */
static int ir_setkeycode(struct input_dev *idev,
                         const struct input_keymap_entry *ke,
                         unsigned int *old_keycode)
{
        struct rc_dev *rdev = input_get_drvdata(idev);
        struct rc_map *rc_map = &rdev->rc_map;
        unsigned int index;
        u64 scancode;
        int retval = 0;
        unsigned long flags;

        spin_lock_irqsave(&rc_map->lock, flags);

        if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
                index = ke->index;
                if (index >= rc_map->len) {
                        retval = -EINVAL;
                        goto out;
                }
        } else {
                retval = scancode_to_u64(ke, &scancode);
                if (retval)
                        goto out;

                index = ir_establish_scancode(rdev, rc_map, scancode, true);
                if (index >= rc_map->len) {
                        retval = -ENOMEM;
                        goto out;
                }
        }

        *old_keycode = ir_update_mapping(rdev, rc_map, index, ke->keycode);

out:
        spin_unlock_irqrestore(&rc_map->lock, flags);
        return retval;
}

/**
 * ir_setkeytable() - sets several entries in the scancode->keycode table
 * @dev:        the struct rc_dev device descriptor
 * @from:        the struct rc_map to copy entries from
 *
 * This routine is used to handle table initialization.
 *
 * return:        -ENOMEM if all keycodes could not be inserted, otherwise zero.
 */
static int ir_setkeytable(struct rc_dev *dev, const struct rc_map *from)
{
        struct rc_map *rc_map = &dev->rc_map;
        unsigned int i, index;
        int rc;

        rc = ir_create_table(dev, rc_map, from->name, from->rc_proto,
                             from->size);
        if (rc)
                return rc;

        for (i = 0; i < from->size; i++) {
                index = ir_establish_scancode(dev, rc_map,
                                              from->scan[i].scancode, false);
                if (index >= rc_map->len) {
                        rc = -ENOMEM;
                        break;
                }

                ir_update_mapping(dev, rc_map, index,
                                  from->scan[i].keycode);
        }

        if (rc)
                ir_free_table(rc_map);

        return rc;
}

static int rc_map_cmp(const void *key, const void *elt)
{
        const u64 *scancode = key;
        const struct rc_map_table *e = elt;

        if (*scancode < e->scancode)
                return -1;
        else if (*scancode > e->scancode)
                return 1;
        return 0;
}

/**
 * ir_lookup_by_scancode() - locate mapping by scancode
 * @rc_map:        the struct rc_map to search
 * @scancode:        scancode to look for in the table
 *
 * This routine performs binary search in RC keykeymap table for
 * given scancode.
 *
 * return:        index in the table, -1U if not found
 */
static unsigned int ir_lookup_by_scancode(const struct rc_map *rc_map,
                                          u64 scancode)
{
        struct rc_map_table *res;

        res = bsearch(&scancode, rc_map->scan, rc_map->len,
                      sizeof(struct rc_map_table), rc_map_cmp);
        if (!res)
                return -1U;
        else
                return res - rc_map->scan;
}

/**
 * ir_getkeycode() - get a keycode from the scancode->keycode table
 * @idev:        the struct input_dev device descriptor
 * @ke:                Input keymap entry
 *
 * This routine is used to handle evdev EVIOCGKEY ioctl.
 *
 * return:        always returns zero.
 */
static int ir_getkeycode(struct input_dev *idev,
                         struct input_keymap_entry *ke)
{
        struct rc_dev *rdev = input_get_drvdata(idev);
        struct rc_map *rc_map = &rdev->rc_map;
        struct rc_map_table *entry;
        unsigned long flags;
        unsigned int index;
        u64 scancode;
        int retval;

        spin_lock_irqsave(&rc_map->lock, flags);

        if (ke->flags & INPUT_KEYMAP_BY_INDEX) {
                index = ke->index;
        } else {
                retval = scancode_to_u64(ke, &scancode);
                if (retval)
                        goto out;

                index = ir_lookup_by_scancode(rc_map, scancode);
        }

        if (index < rc_map->len) {
                entry = &rc_map->scan[index];

                ke->index = index;
                ke->keycode = entry->keycode;
                ke->len = sizeof(entry->scancode);
                memcpy(ke->scancode, &entry->scancode, sizeof(entry->scancode));
        } else if (!(ke->flags & INPUT_KEYMAP_BY_INDEX)) {
                /*
                 * We do not really know the valid range of scancodes
                 * so let's respond with KEY_RESERVED to anything we
                 * do not have mapping for [yet].
                 */
                ke->index = index;
                ke->keycode = KEY_RESERVED;
        } else {
                retval = -EINVAL;
                goto out;
        }

        retval = 0;

out:
        spin_unlock_irqrestore(&rc_map->lock, flags);
        return retval;
}

/**
 * rc_g_keycode_from_table() - gets the keycode that corresponds to a scancode
 * @dev:        the struct rc_dev descriptor of the device
 * @scancode:        the scancode to look for
 *
 * This routine is used by drivers which need to convert a scancode to a
 * keycode. Normally it should not be used since drivers should have no
 * interest in keycodes.
 *
 * return:        the corresponding keycode, or KEY_RESERVED
 */
u32 rc_g_keycode_from_table(struct rc_dev *dev, u64 scancode)
{
        struct rc_map *rc_map = &dev->rc_map;
        unsigned int keycode;
        unsigned int index;
        unsigned long flags;

        spin_lock_irqsave(&rc_map->lock, flags);

        index = ir_lookup_by_scancode(rc_map, scancode);
        keycode = index < rc_map->len ?
                        rc_map->scan[index].keycode : KEY_RESERVED;

        spin_unlock_irqrestore(&rc_map->lock, flags);

        if (keycode != KEY_RESERVED)
                dev_dbg(&dev->dev, "%s: scancode 0x%04llx keycode 0x%02x\n",
                        dev->device_name, scancode, keycode);

        return keycode;
}
EXPORT_SYMBOL_GPL(rc_g_keycode_from_table);

/**
 * ir_do_keyup() - internal function to signal the release of a keypress
 * @dev:        the struct rc_dev descriptor of the device
 * @sync:        whether or not to call input_sync
 *
 * This function is used internally to release a keypress, it must be
 * called with keylock held.
 */
static void ir_do_keyup(struct rc_dev *dev, bool sync)
{
        if (!dev->keypressed)
                return;

        dev_dbg(&dev->dev, "keyup key 0x%04x\n", dev->last_keycode);
        del_timer(&dev->timer_repeat);
        input_report_key(dev->input_dev, dev->last_keycode, 0);
        led_trigger_event(led_feedback, LED_OFF);
        if (sync)
                input_sync(dev->input_dev);
        dev->keypressed = false;
}

/**
 * rc_keyup() - signals the release of a keypress
 * @dev:        the struct rc_dev descriptor of the device
 *
 * This routine is used to signal that a key has been released on the
 * remote control.
 */
void rc_keyup(struct rc_dev *dev)
{
        unsigned long flags;

        spin_lock_irqsave(&dev->keylock, flags);
        ir_do_keyup(dev, true);
        spin_unlock_irqrestore(&dev->keylock, flags);
}
EXPORT_SYMBOL_GPL(rc_keyup);

/**
 * ir_timer_keyup() - generates a keyup event after a timeout
 *
 * @t:                a pointer to the struct timer_list
 *
 * This routine will generate a keyup event some time after a keydown event
 * is generated when no further activity has been detected.
 */
static void ir_timer_keyup(struct timer_list *t)
{
        struct rc_dev *dev = from_timer(dev, t, timer_keyup);
        unsigned long flags;

        /*
         * ir->keyup_jiffies is used to prevent a race condition if a
         * hardware interrupt occurs at this point and the keyup timer
         * event is moved further into the future as a result.
         *
         * The timer will then be reactivated and this function called
         * again in the future. We need to exit gracefully in that case
         * to allow the input subsystem to do its auto-repeat magic or
         * a keyup event might follow immediately after the keydown.
         */
        spin_lock_irqsave(&dev->keylock, flags);
        if (time_is_before_eq_jiffies(dev->keyup_jiffies))
                ir_do_keyup(dev, true);
        spin_unlock_irqrestore(&dev->keylock, flags);
}

/**
 * ir_timer_repeat() - generates a repeat event after a timeout
 *
 * @t:                a pointer to the struct timer_list
 *
 * This routine will generate a soft repeat event every REP_PERIOD
 * milliseconds.
 */
static void ir_timer_repeat(struct timer_list *t)
{
        struct rc_dev *dev = from_timer(dev, t, timer_repeat);
        struct input_dev *input = dev->input_dev;
        unsigned long flags;

        spin_lock_irqsave(&dev->keylock, flags);
        if (dev->keypressed) {
                input_event(input, EV_KEY, dev->last_keycode, 2);
                input_sync(input);
                if (input->rep[REP_PERIOD])
                        mod_timer(&dev->timer_repeat, jiffies +
                                  msecs_to_jiffies(input->rep[REP_PERIOD]));
        }
        spin_unlock_irqrestore(&dev->keylock, flags);
}

static unsigned int repeat_period(int protocol)
{
        if (protocol >= ARRAY_SIZE(protocols))
                return 100;

        return protocols[protocol].repeat_period;
}

/**
 * rc_repeat() - signals that a key is still pressed
 * @dev:        the struct rc_dev descriptor of the device
 *
 * This routine is used by IR decoders when a repeat message which does
 * not include the necessary bits to reproduce the scancode has been
 * received.
 */
void rc_repeat(struct rc_dev *dev)
{
        unsigned long flags;
        unsigned int timeout = usecs_to_jiffies(dev->timeout) +
                msecs_to_jiffies(repeat_period(dev->last_protocol));
        struct lirc_scancode sc = {
                .scancode = dev->last_scancode, .rc_proto = dev->last_protocol,
                .keycode = dev->keypressed ? dev->last_keycode : KEY_RESERVED,
                .flags = LIRC_SCANCODE_FLAG_REPEAT |
                         (dev->last_toggle ? LIRC_SCANCODE_FLAG_TOGGLE : 0)
        };

        if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
                lirc_scancode_event(dev, &sc);

        spin_lock_irqsave(&dev->keylock, flags);

        if (dev->last_scancode <= U32_MAX) {
                input_event(dev->input_dev, EV_MSC, MSC_SCAN,
                            dev->last_scancode);
                input_sync(dev->input_dev);
        }

        if (dev->keypressed) {
                dev->keyup_jiffies = jiffies + timeout;
                mod_timer(&dev->timer_keyup, dev->keyup_jiffies);
        }

        spin_unlock_irqrestore(&dev->keylock, flags);
}
EXPORT_SYMBOL_GPL(rc_repeat);

/**
 * ir_do_keydown() - internal function to process a keypress
 * @dev:        the struct rc_dev descriptor of the device
 * @protocol:        the protocol of the keypress
 * @scancode:   the scancode of the keypress
 * @keycode:    the keycode of the keypress
 * @toggle:     the toggle value of the keypress
 *
 * This function is used internally to register a keypress, it must be
 * called with keylock held.
 */
static void ir_do_keydown(struct rc_dev *dev, enum rc_proto protocol,
                          u64 scancode, u32 keycode, u8 toggle)
{
        bool new_event = (!dev->keypressed                 ||
                          dev->last_protocol != protocol ||
                          dev->last_scancode != scancode ||
                          dev->last_toggle   != toggle);
        struct lirc_scancode sc = {
                .scancode = scancode, .rc_proto = protocol,
                .flags = (toggle ? LIRC_SCANCODE_FLAG_TOGGLE : 0) |
                         (!new_event ? LIRC_SCANCODE_FLAG_REPEAT : 0),
                .keycode = keycode
        };

        if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
                lirc_scancode_event(dev, &sc);

        if (new_event && dev->keypressed)
                ir_do_keyup(dev, false);

        if (scancode <= U32_MAX)
                input_event(dev->input_dev, EV_MSC, MSC_SCAN, scancode);

        dev->last_protocol = protocol;
        dev->last_scancode = scancode;
        dev->last_toggle = toggle;
        dev->last_keycode = keycode;

        if (new_event && keycode != KEY_RESERVED) {
                /* Register a keypress */
                dev->keypressed = true;

                dev_dbg(&dev->dev, "%s: key down event, key 0x%04x, protocol 0x%04x, scancode 0x%08llx\n",
                        dev->device_name, keycode, protocol, scancode);
                input_report_key(dev->input_dev, keycode, 1);

                led_trigger_event(led_feedback, LED_FULL);
        }

        /*
         * For CEC, start sending repeat messages as soon as the first
         * repeated message is sent, as long as REP_DELAY = 0 and REP_PERIOD
         * is non-zero. Otherwise, the input layer will generate repeat
         * messages.
         */
        if (!new_event && keycode != KEY_RESERVED &&
            dev->allowed_protocols == RC_PROTO_BIT_CEC &&
            !timer_pending(&dev->timer_repeat) &&
            dev->input_dev->rep[REP_PERIOD] &&
            !dev->input_dev->rep[REP_DELAY]) {
                input_event(dev->input_dev, EV_KEY, keycode, 2);
                mod_timer(&dev->timer_repeat, jiffies +
                          msecs_to_jiffies(dev->input_dev->rep[REP_PERIOD]));
        }

        input_sync(dev->input_dev);
}

/**
 * rc_keydown() - generates input event for a key press
 * @dev:        the struct rc_dev descriptor of the device
 * @protocol:        the protocol for the keypress
 * @scancode:        the scancode for the keypress
 * @toggle:     the toggle value (protocol dependent, if the protocol doesn't
 *              support toggle values, this should be set to zero)
 *
 * This routine is used to signal that a key has been pressed on the
 * remote control.
 */
void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u64 scancode,
                u8 toggle)
{
        unsigned long flags;
        u32 keycode = rc_g_keycode_from_table(dev, scancode);

        spin_lock_irqsave(&dev->keylock, flags);
        ir_do_keydown(dev, protocol, scancode, keycode, toggle);

        if (dev->keypressed) {
                dev->keyup_jiffies = jiffies + usecs_to_jiffies(dev->timeout) +
                        msecs_to_jiffies(repeat_period(protocol));
                mod_timer(&dev->timer_keyup, dev->keyup_jiffies);
        }
        spin_unlock_irqrestore(&dev->keylock, flags);
}
EXPORT_SYMBOL_GPL(rc_keydown);

/**
 * rc_keydown_notimeout() - generates input event for a key press without
 *                          an automatic keyup event at a later time
 * @dev:        the struct rc_dev descriptor of the device
 * @protocol:        the protocol for the keypress
 * @scancode:        the scancode for the keypress
 * @toggle:     the toggle value (protocol dependent, if the protocol doesn't
 *              support toggle values, this should be set to zero)
 *
 * This routine is used to signal that a key has been pressed on the
 * remote control. The driver must manually call rc_keyup() at a later stage.
 */
void rc_keydown_notimeout(struct rc_dev *dev, enum rc_proto protocol,
                          u64 scancode, u8 toggle)
{
        unsigned long flags;
        u32 keycode = rc_g_keycode_from_table(dev, scancode);

        spin_lock_irqsave(&dev->keylock, flags);
        ir_do_keydown(dev, protocol, scancode, keycode, toggle);
        spin_unlock_irqrestore(&dev->keylock, flags);
}
EXPORT_SYMBOL_GPL(rc_keydown_notimeout);

/**
 * rc_validate_scancode() - checks that a scancode is valid for a protocol.
 *        For nec, it should do the opposite of ir_nec_bytes_to_scancode()
 * @proto:        protocol
 * @scancode:        scancode
 */
bool rc_validate_scancode(enum rc_proto proto, u32 scancode)
{
        switch (proto) {
        /*
         * NECX has a 16-bit address; if the lower 8 bits match the upper
         * 8 bits inverted, then the address would match regular nec.
         */
        case RC_PROTO_NECX:
                if ((((scancode >> 16) ^ ~(scancode >> 8)) & 0xff) == 0)
                        return false;
                break;
        /*
         * NEC32 has a 16 bit address and 16 bit command. If the lower 8 bits
         * of the command match the upper 8 bits inverted, then it would
         * be either NEC or NECX.
         */
        case RC_PROTO_NEC32:
                if ((((scancode >> 8) ^ ~scancode) & 0xff) == 0)
                        return false;
                break;
        /*
         * If the customer code (top 32-bit) is 0x800f, it is MCE else it
         * is regular mode-6a 32 bit
         */
        case RC_PROTO_RC6_MCE:
                if ((scancode & 0xffff0000) != 0x800f0000)
                        return false;
                break;
        case RC_PROTO_RC6_6A_32:
                if ((scancode & 0xffff0000) == 0x800f0000)
                        return false;
                break;
        default:
                break;
        }

        return true;
}

/**
 * rc_validate_filter() - checks that the scancode and mask are valid and
 *                          provides sensible defaults
 * @dev:        the struct rc_dev descriptor of the device
 * @filter:        the scancode and mask
 *
 * return:        0 or -EINVAL if the filter is not valid
 */
static int rc_validate_filter(struct rc_dev *dev,
                              struct rc_scancode_filter *filter)
{
        u32 mask, s = filter->data;
        enum rc_proto protocol = dev->wakeup_protocol;

        if (protocol >= ARRAY_SIZE(protocols))
                return -EINVAL;

        mask = protocols[protocol].scancode_bits;

        if (!rc_validate_scancode(protocol, s))
                return -EINVAL;

        filter->data &= mask;
        filter->mask &= mask;

        /*
         * If we have to raw encode the IR for wakeup, we cannot have a mask
         */
        if (dev->encode_wakeup && filter->mask != 0 && filter->mask != mask)
                return -EINVAL;

        return 0;
}

int rc_open(struct rc_dev *rdev)
{
        int rval = 0;

        if (!rdev)
                return -EINVAL;

        mutex_lock(&rdev->lock);

        if (!rdev->registered) {
                rval = -ENODEV;
        } else {
                if (!rdev->users++ && rdev->open)
                        rval = rdev->open(rdev);

                if (rval)
                        rdev->users--;
        }

        mutex_unlock(&rdev->lock);

        return rval;
}

static int ir_open(struct input_dev *idev)
{
        struct rc_dev *rdev = input_get_drvdata(idev);

        return rc_open(rdev);
}

void rc_close(struct rc_dev *rdev)
{
        if (rdev) {
                mutex_lock(&rdev->lock);

                if (!--rdev->users && rdev->close && rdev->registered)
                        rdev->close(rdev);

                mutex_unlock(&rdev->lock);
        }
}

static void ir_close(struct input_dev *idev)
{
        struct rc_dev *rdev = input_get_drvdata(idev);
        rc_close(rdev);
}

/* class for /sys/class/rc */
static char *rc_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "rc/%s", dev_name(dev));
}

static struct class rc_class = {
        .name                = "rc",
        .devnode        = rc_devnode,
};

/*
 * These are the protocol textual descriptions that are
 * used by the sysfs protocols file. Note that the order
 * of the entries is relevant.
 */
static const struct {
        u64        type;
        const char        *name;
        const char        *module_name;
} proto_names[] = {
        { RC_PROTO_BIT_NONE,        "none",                NULL                        },
        { RC_PROTO_BIT_OTHER,        "other",        NULL                        },
        { RC_PROTO_BIT_UNKNOWN,        "unknown",        NULL                        },
        { RC_PROTO_BIT_RC5 |
          RC_PROTO_BIT_RC5X_20,        "rc-5",                "ir-rc5-decoder"        },
        { RC_PROTO_BIT_NEC |
          RC_PROTO_BIT_NECX |
          RC_PROTO_BIT_NEC32,        "nec",                "ir-nec-decoder"        },
        { RC_PROTO_BIT_RC6_0 |
          RC_PROTO_BIT_RC6_6A_20 |
          RC_PROTO_BIT_RC6_6A_24 |
          RC_PROTO_BIT_RC6_6A_32 |
          RC_PROTO_BIT_RC6_MCE,        "rc-6",                "ir-rc6-decoder"        },
        { RC_PROTO_BIT_JVC,        "jvc",                "ir-jvc-decoder"        },
        { RC_PROTO_BIT_SONY12 |
          RC_PROTO_BIT_SONY15 |
          RC_PROTO_BIT_SONY20,        "sony",                "ir-sony-decoder"        },
        { RC_PROTO_BIT_RC5_SZ,        "rc-5-sz",        "ir-rc5-decoder"        },
        { RC_PROTO_BIT_SANYO,        "sanyo",        "ir-sanyo-decoder"        },
        { RC_PROTO_BIT_SHARP,        "sharp",        "ir-sharp-decoder"        },
        { RC_PROTO_BIT_MCIR2_KBD |
          RC_PROTO_BIT_MCIR2_MSE, "mce_kbd",        "ir-mce_kbd-decoder"        },
        { RC_PROTO_BIT_XMP,        "xmp",                "ir-xmp-decoder"        },
        { RC_PROTO_BIT_CEC,        "cec",                NULL                        },
        { RC_PROTO_BIT_IMON,        "imon",                "ir-imon-decoder"        },
        { RC_PROTO_BIT_RCMM12 |
          RC_PROTO_BIT_RCMM24 |
          RC_PROTO_BIT_RCMM32,        "rc-mm",        "ir-rcmm-decoder"        },
        { RC_PROTO_BIT_XBOX_DVD, "xbox-dvd",        NULL                        },
};

/**
 * struct rc_filter_attribute - Device attribute relating to a filter type.
 * @attr:        Device attribute.
 * @type:        Filter type.
 * @mask:        false for filter value, true for filter mask.
 */
struct rc_filter_attribute {
        struct device_attribute                attr;
        enum rc_filter_type                type;
        bool                                mask;
};
#define to_rc_filter_attr(a) container_of(a, struct rc_filter_attribute, attr)

#define RC_FILTER_ATTR(_name, _mode, _show, _store, _type, _mask)        \
        struct rc_filter_attribute dev_attr_##_name = {                        \
                .attr = __ATTR(_name, _mode, _show, _store),                \
                .type = (_type),                                        \
                .mask = (_mask),                                        \
        }

/**
 * show_protocols() - shows the current IR protocol(s)
 * @device:        the device descriptor
 * @mattr:        the device attribute struct
 * @buf:        a pointer to the output buffer
 *
 * This routine is a callback routine for input read the IR protocol type(s).
 * it is triggered by reading /sys/class/rc/rc?/protocols.
 * It returns the protocol names of supported protocols.
 * Enabled protocols are printed in brackets.
 *
 * dev->lock is taken to guard against races between
 * store_protocols and show_protocols.
 */
static ssize_t show_protocols(struct device *device,
                              struct device_attribute *mattr, char *buf)
{
        struct rc_dev *dev = to_rc_dev(device);
        u64 allowed, enabled;
        char *tmp = buf;
        int i;

        mutex_lock(&dev->lock);

        enabled = dev->enabled_protocols;
        allowed = dev->allowed_protocols;
        if (dev->raw && !allowed)
                allowed = ir_raw_get_allowed_protocols();

        mutex_unlock(&dev->lock);

        dev_dbg(&dev->dev, "%s: allowed - 0x%llx, enabled - 0x%llx\n",
                __func__, (long long)allowed, (long long)enabled);

        for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
                if (allowed & enabled & proto_names[i].type)
                        tmp += sprintf(tmp, "[%s] ", proto_names[i].name);
                else if (allowed & proto_names[i].type)
                        tmp += sprintf(tmp, "%s ", proto_names[i].name);

                if (allowed & proto_names[i].type)
                        allowed &= ~proto_names[i].type;
        }

#ifdef CONFIG_LIRC
        if (dev->driver_type == RC_DRIVER_IR_RAW)
                tmp += sprintf(tmp, "[lirc] ");
#endif

        if (tmp != buf)
                tmp--;
        *tmp = '\n';

        return tmp + 1 - buf;
}

/**
 * parse_protocol_change() - parses a protocol change request
 * @dev:        rc_dev device
 * @protocols:        pointer to the bitmask of current protocols
 * @buf:        pointer to the buffer with a list of changes
 *
 * Writing "+proto" will add a protocol to the protocol mask.
 * Writing "-proto" will remove a protocol from protocol mask.
 * Writing "proto" will enable only "proto".
 * Writing "none" will disable all protocols.
 * Returns the number of changes performed or a negative error code.
 */
static int parse_protocol_change(struct rc_dev *dev, u64 *protocols,
                                 const char *buf)
{
        const char *tmp;
        unsigned count = 0;
        bool enable, disable;
        u64 mask;
        int i;

        while ((tmp = strsep((char **)&buf, " \n")) != NULL) {
                if (!*tmp)
                        break;

                if (*tmp == '+') {
                        enable = true;
                        disable = false;
                        tmp++;
                } else if (*tmp == '-') {
                        enable = false;
                        disable = true;
                        tmp++;
                } else {
                        enable = false;
                        disable = false;
                }

                for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
                        if (!strcasecmp(tmp, proto_names[i].name)) {
                                mask = proto_names[i].type;
                                break;
                        }
                }

                if (i == ARRAY_SIZE(proto_names)) {
                        if (!strcasecmp(tmp, "lirc"))
                                mask = 0;
                        else {
                                dev_dbg(&dev->dev, "Unknown protocol: '%s'\n",
                                        tmp);
                                return -EINVAL;
                        }
                }

                count++;

                if (enable)
                        *protocols |= mask;
                else if (disable)
                        *protocols &= ~mask;
                else
                        *protocols = mask;
        }

        if (!count) {
                dev_dbg(&dev->dev, "Protocol not specified\n");
                return -EINVAL;
        }

        return count;
}

void ir_raw_load_modules(u64 *protocols)
{
        u64 available;
        int i, ret;

        for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
                if (proto_names[i].type == RC_PROTO_BIT_NONE ||
                    proto_names[i].type & (RC_PROTO_BIT_OTHER |
                                           RC_PROTO_BIT_UNKNOWN))
                        continue;

                available = ir_raw_get_allowed_protocols();
                if (!(*protocols & proto_names[i].type & ~available))
                        continue;

                if (!proto_names[i].module_name) {
                        pr_err("Can't enable IR protocol %s\n",
                               proto_names[i].name);
                        *protocols &= ~proto_names[i].type;
                        continue;
                }

                ret = request_module("%s", proto_names[i].module_name);
                if (ret < 0) {
                        pr_err("Couldn't load IR protocol module %s\n",
                               proto_names[i].module_name);
                        *protocols &= ~proto_names[i].type;
                        continue;
                }
                msleep(20);
                available = ir_raw_get_allowed_protocols();
                if (!(*protocols & proto_names[i].type & ~available))
                        continue;

                pr_err("Loaded IR protocol module %s, but protocol %s still not available\n",
                       proto_names[i].module_name,
                       proto_names[i].name);
                *protocols &= ~proto_names[i].type;
        }
}

/**
 * store_protocols() - changes the current/wakeup IR protocol(s)
 * @device:        the device descriptor
 * @mattr:        the device attribute struct
 * @buf:        a pointer to the input buffer
 * @len:        length of the input buffer
 *
 * This routine is for changing the IR protocol type.
 * It is triggered by writing to /sys/class/rc/rc?/[wakeup_]protocols.
 * See parse_protocol_change() for the valid commands.
 * Returns @len on success or a negative error code.
 *
 * dev->lock is taken to guard against races between
 * store_protocols and show_protocols.
 */
static ssize_t store_protocols(struct device *device,
                               struct device_attribute *mattr,
                               const char *buf, size_t len)
{
        struct rc_dev *dev = to_rc_dev(device);
        u64 *current_protocols;
        struct rc_scancode_filter *filter;
        u64 old_protocols, new_protocols;
        ssize_t rc;

        dev_dbg(&dev->dev, "Normal protocol change requested\n");
        current_protocols = &dev->enabled_protocols;
        filter = &dev->scancode_filter;

        if (!dev->change_protocol) {
                dev_dbg(&dev->dev, "Protocol switching not supported\n");
                return -EINVAL;
        }

        mutex_lock(&dev->lock);
        if (!dev->registered) {
                mutex_unlock(&dev->lock);
                return -ENODEV;
        }

        old_protocols = *current_protocols;
        new_protocols = old_protocols;
        rc = parse_protocol_change(dev, &new_protocols, buf);
        if (rc < 0)
                goto out;

        if (dev->driver_type == RC_DRIVER_IR_RAW)
                ir_raw_load_modules(&new_protocols);

        rc = dev->change_protocol(dev, &new_protocols);
        if (rc < 0) {
                dev_dbg(&dev->dev, "Error setting protocols to 0x%llx\n",
                        (long long)new_protocols);
                goto out;
        }

        if (new_protocols != old_protocols) {
                *current_protocols = new_protocols;
                dev_dbg(&dev->dev, "Protocols changed to 0x%llx\n",
                        (long long)new_protocols);
        }

        /*
         * If a protocol change was attempted the filter may need updating, even
         * if the actual protocol mask hasn't changed (since the driver may have
         * cleared the filter).
         * Try setting the same filter with the new protocol (if any).
         * Fall back to clearing the filter.
         */
        if (dev->s_filter && filter->mask) {
                if (new_protocols)
                        rc = dev->s_filter(dev, filter);
                else
                        rc = -1;

                if (rc < 0) {
                        filter->data = 0;
                        filter->mask = 0;
                        dev->s_filter(dev, filter);
                }
        }

        rc = len;

out:
        mutex_unlock(&dev->lock);
        return rc;
}

/**
 * show_filter() - shows the current scancode filter value or mask
 * @device:        the device descriptor
 * @attr:        the device attribute struct
 * @buf:        a pointer to the output buffer
 *
 * This routine is a callback routine to read a scancode filter value or mask.
 * It is triggered by reading /sys/class/rc/rc?/[wakeup_]filter[_mask].
 * It prints the current scancode filter value or mask of the appropriate filter
 * type in hexadecimal into @buf and returns the size of the buffer.
 *
 * Bits of the filter value corresponding to set bits in the filter mask are
 * compared against input scancodes and non-matching scancodes are discarded.
 *
 * dev->lock is taken to guard against races between
 * store_filter and show_filter.
 */
static ssize_t show_filter(struct device *device,
                           struct device_attribute *attr,
                           char *buf)
{
        struct rc_dev *dev = to_rc_dev(device);
        struct rc_filter_attribute *fattr = to_rc_filter_attr(attr);
        struct rc_scancode_filter *filter;
        u32 val;

        mutex_lock(&dev->lock);

        if (fattr->type == RC_FILTER_NORMAL)
                filter = &dev->scancode_filter;
        else
                filter = &dev->scancode_wakeup_filter;

        if (fattr->mask)
                val = filter->mask;
        else
                val = filter->data;
        mutex_unlock(&dev->lock);

        return sprintf(buf, "%#x\n", val);
}

/**
 * store_filter() - changes the scancode filter value
 * @device:        the device descriptor
 * @attr:        the device attribute struct
 * @buf:        a pointer to the input buffer
 * @len:        length of the input buffer
 *
 * This routine is for changing a scancode filter value or mask.
 * It is triggered by writing to /sys/class/rc/rc?/[wakeup_]filter[_mask].
 * Returns -EINVAL if an invalid filter value for the current protocol was
 * specified or if scancode filtering is not supported by the driver, otherwise
 * returns @len.
 *
 * Bits of the filter value corresponding to set bits in the filter mask are
 * compared against input scancodes and non-matching scancodes are discarded.
 *
 * dev->lock is taken to guard against races between
 * store_filter and show_filter.
 */
static ssize_t store_filter(struct device *device,
                            struct device_attribute *attr,
                            const char *buf, size_t len)
{
        struct rc_dev *dev = to_rc_dev(device);
        struct rc_filter_attribute *fattr = to_rc_filter_attr(attr);
        struct rc_scancode_filter new_filter, *filter;
        int ret;
        unsigned long val;
        int (*set_filter)(struct rc_dev *dev, struct rc_scancode_filter *filter);

        ret = kstrtoul(buf, 0, &val);
        if (ret < 0)
                return ret;

        if (fattr->type == RC_FILTER_NORMAL) {
                set_filter = dev->s_filter;
                filter = &dev->scancode_filter;
        } else {
                set_filter = dev->s_wakeup_filter;
                filter = &dev->scancode_wakeup_filter;
        }

        if (!set_filter)
                return -EINVAL;

        mutex_lock(&dev->lock);
        if (!dev->registered) {
                mutex_unlock(&dev->lock);
                return -ENODEV;
        }

        new_filter = *filter;
        if (fattr->mask)
                new_filter.mask = val;
        else
                new_filter.data = val;

        if (fattr->type == RC_FILTER_WAKEUP) {
                /*
                 * Refuse to set a filter unless a protocol is enabled
                 * and the filter is valid for that protocol
                 */
                if (dev->wakeup_protocol != RC_PROTO_UNKNOWN)
                        ret = rc_validate_filter(dev, &new_filter);
                else
                        ret = -EINVAL;

                if (ret != 0)
                        goto unlock;
        }

        if (fattr->type == RC_FILTER_NORMAL && !dev->enabled_protocols &&
            val) {
                /* refuse to set a filter unless a protocol is enabled */
                ret = -EINVAL;
                goto unlock;
        }

        ret = set_filter(dev, &new_filter);
        if (ret < 0)
                goto unlock;

        *filter = new_filter;

unlock:
        mutex_unlock(&dev->lock);
        return (ret < 0) ? ret : len;
}

/**
 * show_wakeup_protocols() - shows the wakeup IR protocol
 * @device:        the device descriptor
 * @mattr:        the device attribute struct
 * @buf:        a pointer to the output buffer
 *
 * This routine is a callback routine for input read the IR protocol type(s).
 * it is triggered by reading /sys/class/rc/rc?/wakeup_protocols.
 * It returns the protocol names of supported protocols.
 * The enabled protocols are printed in brackets.
 *
 * dev->lock is taken to guard against races between
 * store_wakeup_protocols and show_wakeup_protocols.
 */
static ssize_t show_wakeup_protocols(struct device *device,
                                     struct device_attribute *mattr,
                                     char *buf)
{
        struct rc_dev *dev = to_rc_dev(device);
        u64 allowed;
        enum rc_proto enabled;
        char *tmp = buf;
        int i;

        mutex_lock(&dev->lock);

        allowed = dev->allowed_wakeup_protocols;
        enabled = dev->wakeup_protocol;

        mutex_unlock(&dev->lock);

        dev_dbg(&dev->dev, "%s: allowed - 0x%llx, enabled - %d\n",
                __func__, (long long)allowed, enabled);

        for (i = 0; i < ARRAY_SIZE(protocols); i++) {
                if (allowed & (1ULL << i)) {
                        if (i == enabled)
                                tmp += sprintf(tmp, "[%s] ", protocols[i].name);
                        else
                                tmp += sprintf(tmp, "%s ", protocols[i].name);
                }
        }

        if (tmp != buf)
                tmp--;
        *tmp = '\n';

        return tmp + 1 - buf;
}

/**
 * store_wakeup_protocols() - changes the wakeup IR protocol(s)
 * @device:        the device descriptor
 * @mattr:        the device attribute struct
 * @buf:        a pointer to the input buffer
 * @len:        length of the input buffer
 *
 * This routine is for changing the IR protocol type.
 * It is triggered by writing to /sys/class/rc/rc?/wakeup_protocols.
 * Returns @len on success or a negative error code.
 *
 * dev->lock is taken to guard against races between
 * store_wakeup_protocols and show_wakeup_protocols.
 */
static ssize_t store_wakeup_protocols(struct device *device,
                                      struct device_attribute *mattr,
                                      const char *buf, size_t len)
{
        struct rc_dev *dev = to_rc_dev(device);
        enum rc_proto protocol = RC_PROTO_UNKNOWN;
        ssize_t rc;
        u64 allowed;
        int i;

        mutex_lock(&dev->lock);
        if (!dev->registered) {
                mutex_unlock(&dev->lock);
                return -ENODEV;
        }

        allowed = dev->allowed_wakeup_protocols;

        if (!sysfs_streq(buf, "none")) {
                for (i = 0; i < ARRAY_SIZE(protocols); i++) {
                        if ((allowed & (1ULL << i)) &&
                            sysfs_streq(buf, protocols[i].name)) {
                                protocol = i;
                                break;
                        }
                }

                if (i == ARRAY_SIZE(protocols)) {
                        rc = -EINVAL;
                        goto out;
                }

                if (dev->encode_wakeup) {
                        u64 mask = 1ULL << protocol;

                        ir_raw_load_modules(&mask);
                        if (!mask) {
                                rc = -EINVAL;
                                goto out;
                        }
                }
        }

        if (dev->wakeup_protocol != protocol) {
                dev->wakeup_protocol = protocol;
                dev_dbg(&dev->dev, "Wakeup protocol changed to %d\n", protocol);

                if (protocol == RC_PROTO_RC6_MCE)
                        dev->scancode_wakeup_filter.data = 0x800f0000;
                else
                        dev->scancode_wakeup_filter.data = 0;
                dev->scancode_wakeup_filter.mask = 0;

                rc = dev->s_wakeup_filter(dev, &dev->scancode_wakeup_filter);
                if (rc == 0)
                        rc = len;
        } else {
                rc = len;
        }

out:
        mutex_unlock(&dev->lock);
        return rc;
}

static void rc_dev_release(struct device *device)
{
        struct rc_dev *dev = to_rc_dev(device);

        kfree(dev);
}

static int rc_dev_uevent(const struct device *device, struct kobj_uevent_env *env)
{
        struct rc_dev *dev = to_rc_dev(device);
        int ret = 0;

        mutex_lock(&dev->lock);

        if (!dev->registered)
                ret = -ENODEV;
        if (ret == 0 && dev->rc_map.name)
                ret = add_uevent_var(env, "NAME=%s", dev->rc_map.name);
        if (ret == 0 && dev->driver_name)
                ret = add_uevent_var(env, "DRV_NAME=%s", dev->driver_name);
        if (ret == 0 && dev->device_name)
                ret = add_uevent_var(env, "DEV_NAME=%s", dev->device_name);

        mutex_unlock(&dev->lock);

        return ret;
}

/*
 * Static device attribute struct with the sysfs attributes for IR's
 */
static struct device_attribute dev_attr_ro_protocols =
__ATTR(protocols, 0444, show_protocols, NULL);
static struct device_attribute dev_attr_rw_protocols =
__ATTR(protocols, 0644, show_protocols, store_protocols);
static DEVICE_ATTR(wakeup_protocols, 0644, show_wakeup_protocols,
                   store_wakeup_protocols);
static RC_FILTER_ATTR(filter, S_IRUGO|S_IWUSR,
                      show_filter, store_filter, RC_FILTER_NORMAL, false);
static RC_FILTER_ATTR(filter_mask, S_IRUGO|S_IWUSR,
                      show_filter, store_filter, RC_FILTER_NORMAL, true);
static RC_FILTER_ATTR(wakeup_filter, S_IRUGO|S_IWUSR,
                      show_filter, store_filter, RC_FILTER_WAKEUP, false);
static RC_FILTER_ATTR(wakeup_filter_mask, S_IRUGO|S_IWUSR,
                      show_filter, store_filter, RC_FILTER_WAKEUP, true);

static struct attribute *rc_dev_rw_protocol_attrs[] = {
        &dev_attr_rw_protocols.attr,
        NULL,
};

static const struct attribute_group rc_dev_rw_protocol_attr_grp = {
        .attrs        = rc_dev_rw_protocol_attrs,
};

static struct attribute *rc_dev_ro_protocol_attrs[] = {
        &dev_attr_ro_protocols.attr,
        NULL,
};

static const struct attribute_group rc_dev_ro_protocol_attr_grp = {
        .attrs        = rc_dev_ro_protocol_attrs,
};

static struct attribute *rc_dev_filter_attrs[] = {
        &dev_attr_filter.attr.attr,
        &dev_attr_filter_mask.attr.attr,
        NULL,
};

static const struct attribute_group rc_dev_filter_attr_grp = {
        .attrs        = rc_dev_filter_attrs,
};

static struct attribute *rc_dev_wakeup_filter_attrs[] = {
        &dev_attr_wakeup_filter.attr.attr,
        &dev_attr_wakeup_filter_mask.attr.attr,
        &dev_attr_wakeup_protocols.attr,
        NULL,
};

static const struct attribute_group rc_dev_wakeup_filter_attr_grp = {
        .attrs        = rc_dev_wakeup_filter_attrs,
};

static const struct device_type rc_dev_type = {
        .release        = rc_dev_release,
        .uevent                = rc_dev_uevent,
};

struct rc_dev *rc_allocate_device(enum rc_driver_type type)
{
        struct rc_dev *dev;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return NULL;

        if (type != RC_DRIVER_IR_RAW_TX) {
                dev->input_dev = input_allocate_device();
                if (!dev->input_dev) {
                        kfree(dev);
                        return NULL;
                }

                dev->input_dev->getkeycode = ir_getkeycode;
                dev->input_dev->setkeycode = ir_setkeycode;
                input_set_drvdata(dev->input_dev, dev);

                dev->timeout = IR_DEFAULT_TIMEOUT;
                timer_setup(&dev->timer_keyup, ir_timer_keyup, 0);
                timer_setup(&dev->timer_repeat, ir_timer_repeat, 0);

                spin_lock_init(&dev->rc_map.lock);
                spin_lock_init(&dev->keylock);
        }
        mutex_init(&dev->lock);

        dev->dev.type = &rc_dev_type;
        dev->dev.class = &rc_class;
        device_initialize(&dev->dev);

        dev->driver_type = type;

        __module_get(THIS_MODULE);
        return dev;
}
EXPORT_SYMBOL_GPL(rc_allocate_device);

void rc_free_device(struct rc_dev *dev)
{
        if (!dev)
                return;

        input_free_device(dev->input_dev);

        put_device(&dev->dev);

        /* kfree(dev) will be called by the callback function
           rc_dev_release() */

        module_put(THIS_MODULE);
}
EXPORT_SYMBOL_GPL(rc_free_device);

static void devm_rc_alloc_release(struct device *dev, void *res)
{
        rc_free_device(*(struct rc_dev **)res);
}

struct rc_dev *devm_rc_allocate_device(struct device *dev,
                                       enum rc_driver_type type)
{
        struct rc_dev **dr, *rc;

        dr = devres_alloc(devm_rc_alloc_release, sizeof(*dr), GFP_KERNEL);
        if (!dr)
                return NULL;

        rc = rc_allocate_device(type);
        if (!rc) {
                devres_free(dr);
                return NULL;
        }

        rc->dev.parent = dev;
        rc->managed_alloc = true;
        *dr = rc;
        devres_add(dev, dr);

        return rc;
}
EXPORT_SYMBOL_GPL(devm_rc_allocate_device);

static int rc_prepare_rx_device(struct rc_dev *dev)
{
        int rc;
        struct rc_map *rc_map;
        u64 rc_proto;

        if (!dev->map_name)
                return -EINVAL;

        rc_map = rc_map_get(dev->map_name);
        if (!rc_map)
                rc_map = rc_map_get(RC_MAP_EMPTY);
        if (!rc_map || !rc_map->scan || rc_map->size == 0)
                return -EINVAL;

        rc = ir_setkeytable(dev, rc_map);
        if (rc)
                return rc;

        rc_proto = BIT_ULL(rc_map->rc_proto);

        if (dev->driver_type == RC_DRIVER_SCANCODE && !dev->change_protocol)
                dev->enabled_protocols = dev->allowed_protocols;

        if (dev->driver_type == RC_DRIVER_IR_RAW)
                ir_raw_load_modules(&rc_proto);

        if (dev->change_protocol) {
                rc = dev->change_protocol(dev, &rc_proto);
                if (rc < 0)
                        goto out_table;
                dev->enabled_protocols = rc_proto;
        }

        /* Keyboard events */
        set_bit(EV_KEY, dev->input_dev->evbit);
        set_bit(EV_REP, dev->input_dev->evbit);
        set_bit(EV_MSC, dev->input_dev->evbit);
        set_bit(MSC_SCAN, dev->input_dev->mscbit);

        /* Pointer/mouse events */
        set_bit(INPUT_PROP_POINTING_STICK, dev->input_dev->propbit);
        set_bit(EV_REL, dev->input_dev->evbit);
        set_bit(REL_X, dev->input_dev->relbit);
        set_bit(REL_Y, dev->input_dev->relbit);

        if (dev->open)
                dev->input_dev->open = ir_open;
        if (dev->close)
                dev->input_dev->close = ir_close;

        dev->input_dev->dev.parent = &dev->dev;
        memcpy(&dev->input_dev->id, &dev->input_id, sizeof(dev->input_id));
        dev->input_dev->phys = dev->input_phys;
        dev->input_dev->name = dev->device_name;

        return 0;

out_table:
        ir_free_table(&dev->rc_map);

        return rc;
}

static int rc_setup_rx_device(struct rc_dev *dev)
{
        int rc;

        /* rc_open will be called here */
        rc = input_register_device(dev->input_dev);
        if (rc)
                return rc;

        /*
         * Default delay of 250ms is too short for some protocols, especially
         * since the timeout is currently set to 250ms. Increase it to 500ms,
         * to avoid wrong repetition of the keycodes. Note that this must be
         * set after the call to input_register_device().
         */
        if (dev->allowed_protocols == RC_PROTO_BIT_CEC)
                dev->input_dev->rep[REP_DELAY] = 0;
        else
                dev->input_dev->rep[REP_DELAY] = 500;

        /*
         * As a repeat event on protocols like RC-5 and NEC take as long as
         * 110/114ms, using 33ms as a repeat period is not the right thing
         * to do.
         */
        dev->input_dev->rep[REP_PERIOD] = 125;

        return 0;
}

static void rc_free_rx_device(struct rc_dev *dev)
{
        if (!dev)
                return;

        if (dev->input_dev) {
                input_unregister_device(dev->input_dev);
                dev->input_dev = NULL;
        }

        ir_free_table(&dev->rc_map);
}

int rc_register_device(struct rc_dev *dev)
{
        const char *path;
        int attr = 0;
        int minor;
        int rc;

        if (!dev)
                return -EINVAL;

        minor = ida_alloc_max(&rc_ida, RC_DEV_MAX - 1, GFP_KERNEL);
        if (minor < 0)
                return minor;

        dev->minor = minor;
        dev_set_name(&dev->dev, "rc%u", dev->minor);
        dev_set_drvdata(&dev->dev, dev);

        dev->dev.groups = dev->sysfs_groups;
        if (dev->driver_type == RC_DRIVER_SCANCODE && !dev->change_protocol)
                dev->sysfs_groups[attr++] = &rc_dev_ro_protocol_attr_grp;
        else if (dev->driver_type != RC_DRIVER_IR_RAW_TX)
                dev->sysfs_groups[attr++] = &rc_dev_rw_protocol_attr_grp;
        if (dev->s_filter)
                dev->sysfs_groups[attr++] = &rc_dev_filter_attr_grp;
        if (dev->s_wakeup_filter)
                dev->sysfs_groups[attr++] = &rc_dev_wakeup_filter_attr_grp;
        dev->sysfs_groups[attr++] = NULL;

        if (dev->driver_type == RC_DRIVER_IR_RAW) {
                rc = ir_raw_event_prepare(dev);
                if (rc < 0)
                        goto out_minor;
        }

        if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
                rc = rc_prepare_rx_device(dev);
                if (rc)
                        goto out_raw;
        }

        dev->registered = true;

        rc = device_add(&dev->dev);
        if (rc)
                goto out_rx_free;

        path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
        dev_info(&dev->dev, "%s as %s\n",
                 dev->device_name ?: "Unspecified device", path ?: "N/A");
        kfree(path);

        /*
         * once the input device is registered in rc_setup_rx_device,
         * userspace can open the input device and rc_open() will be called
         * as a result. This results in driver code being allowed to submit
         * keycodes with rc_keydown, so lirc must be registered first.
         */
        if (dev->allowed_protocols != RC_PROTO_BIT_CEC) {
                rc = lirc_register(dev);
                if (rc < 0)
                        goto out_dev;
        }

        if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
                rc = rc_setup_rx_device(dev);
                if (rc)
                        goto out_lirc;
        }

        if (dev->driver_type == RC_DRIVER_IR_RAW) {
                rc = ir_raw_event_register(dev);
                if (rc < 0)
                        goto out_rx;
        }

        dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor,
                dev->driver_name ? dev->driver_name : "unknown");

        return 0;

out_rx:
        rc_free_rx_device(dev);
out_lirc:
        if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
                lirc_unregister(dev);
out_dev:
        device_del(&dev->dev);
out_rx_free:
        ir_free_table(&dev->rc_map);
out_raw:
        ir_raw_event_free(dev);
out_minor:
        ida_free(&rc_ida, minor);
        return rc;
}
EXPORT_SYMBOL_GPL(rc_register_device);

static void devm_rc_release(struct device *dev, void *res)
{
        rc_unregister_device(*(struct rc_dev **)res);
}

int devm_rc_register_device(struct device *parent, struct rc_dev *dev)
{
        struct rc_dev **dr;
        int ret;

        dr = devres_alloc(devm_rc_release, sizeof(*dr), GFP_KERNEL);
        if (!dr)
                return -ENOMEM;

        ret = rc_register_device(dev);
        if (ret) {
                devres_free(dr);
                return ret;
        }

        *dr = dev;
        devres_add(parent, dr);

        return 0;
}
EXPORT_SYMBOL_GPL(devm_rc_register_device);

void rc_unregister_device(struct rc_dev *dev)
{
        if (!dev)
                return;

        if (dev->driver_type == RC_DRIVER_IR_RAW)
                ir_raw_event_unregister(dev);

        del_timer_sync(&dev->timer_keyup);
        del_timer_sync(&dev->timer_repeat);

        mutex_lock(&dev->lock);
        if (dev->users && dev->close)
                dev->close(dev);
        dev->registered = false;
        mutex_unlock(&dev->lock);

        rc_free_rx_device(dev);

        /*
         * lirc device should be freed with dev->registered = false, so
         * that userspace polling will get notified.
         */
        if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
                lirc_unregister(dev);

        device_del(&dev->dev);

        ida_free(&rc_ida, dev->minor);

        if (!dev->managed_alloc)
                rc_free_device(dev);
}

EXPORT_SYMBOL_GPL(rc_unregister_device);

/*
 * Init/exit code for the module. Basically, creates/removes /sys/class/rc
 */

static int __init rc_core_init(void)
{
        int rc = class_register(&rc_class);
        if (rc) {
                pr_err("rc_core: unable to register rc class\n");
                return rc;
        }

        rc = lirc_dev_init();
        if (rc) {
                pr_err("rc_core: unable to init lirc\n");
                class_unregister(&rc_class);
                return rc;
        }

        led_trigger_register_simple("rc-feedback", &led_feedback);
        rc_map_register(&empty_map);
#ifdef CONFIG_MEDIA_CEC_RC
        rc_map_register(&cec_map);
#endif

        return 0;
}

static void __exit rc_core_exit(void)
{
        lirc_dev_exit();
        class_unregister(&rc_class);
        led_trigger_unregister_simple(led_feedback);
#ifdef CONFIG_MEDIA_CEC_RC
        rc_map_unregister(&cec_map);
#endif
        rc_map_unregister(&empty_map);
}

subsys_initcall(rc_core_init);
module_exit(rc_core_exit);

MODULE_AUTHOR("Mauro Carvalho Chehab");
MODULE_LICENSE("GPL v2");
















































































































































   38 






   38 



































    2 

















































































    2 
    2 
    2 













































































































































































































    1 















    2 











  453 






























































































































































































































































































































































































































































































































































































































   10 







  453 



















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_CPUMASK_H
#define __LINUX_CPUMASK_H

/*
 * Cpumasks provide a bitmap suitable for representing the
 * set of CPUs in a system, one bit position per CPU number.  In general,
 * only nr_cpu_ids (<= NR_CPUS) bits are valid.
 */
#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/bitmap.h>
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/gfp_types.h>
#include <linux/numa.h>

/* Don't assign or return these: may not be this big! */
typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;

/**
 * cpumask_bits - get the bits in a cpumask
 * @maskp: the struct cpumask *
 *
 * You should only assume nr_cpu_ids bits of this mask are valid.  This is
 * a macro so it's const-correct.
 */
#define cpumask_bits(maskp) ((maskp)->bits)

/**
 * cpumask_pr_args - printf args to output a cpumask
 * @maskp: cpumask to be printed
 *
 * Can be used to provide arguments for '%*pb[l]' when printing a cpumask.
 */
#define cpumask_pr_args(maskp)                nr_cpu_ids, cpumask_bits(maskp)

#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
#define nr_cpu_ids ((unsigned int)NR_CPUS)
#else
extern unsigned int nr_cpu_ids;
#endif

static inline void set_nr_cpu_ids(unsigned int nr)
{
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
        WARN_ON(nr != nr_cpu_ids);
#else
        nr_cpu_ids = nr;
#endif
}

/*
 * We have several different "preferred sizes" for the cpumask
 * operations, depending on operation.
 *
 * For example, the bitmap scanning and operating operations have
 * optimized routines that work for the single-word case, but only when
 * the size is constant. So if NR_CPUS fits in one single word, we are
 * better off using that small constant, in order to trigger the
 * optimized bit finding. That is 'small_cpumask_size'.
 *
 * The clearing and copying operations will similarly perform better
 * with a constant size, but we limit that size arbitrarily to four
 * words. We call this 'large_cpumask_size'.
 *
 * Finally, some operations just want the exact limit, either because
 * they set bits or just don't have any faster fixed-sized versions. We
 * call this just 'nr_cpumask_bits'.
 *
 * Note that these optional constants are always guaranteed to be at
 * least as big as 'nr_cpu_ids' itself is, and all our cpumask
 * allocations are at least that size (see cpumask_size()). The
 * optimization comes from being able to potentially use a compile-time
 * constant instead of a run-time generated exact number of CPUs.
 */
#if NR_CPUS <= BITS_PER_LONG
  #define small_cpumask_bits ((unsigned int)NR_CPUS)
  #define large_cpumask_bits ((unsigned int)NR_CPUS)
#elif NR_CPUS <= 4*BITS_PER_LONG
  #define small_cpumask_bits nr_cpu_ids
  #define large_cpumask_bits ((unsigned int)NR_CPUS)
#else
  #define small_cpumask_bits nr_cpu_ids
  #define large_cpumask_bits nr_cpu_ids
#endif
#define nr_cpumask_bits nr_cpu_ids

/*
 * The following particular system cpumasks and operations manage
 * possible, present, active and online cpus.
 *
 *     cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
 *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
 *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
 *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
 *
 *  If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
 *
 *  The cpu_possible_mask is fixed at boot time, as the set of CPU IDs
 *  that it is possible might ever be plugged in at anytime during the
 *  life of that system boot.  The cpu_present_mask is dynamic(*),
 *  representing which CPUs are currently plugged in.  And
 *  cpu_online_mask is the dynamic subset of cpu_present_mask,
 *  indicating those CPUs available for scheduling.
 *
 *  If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
 *  depending on what ACPI reports as currently plugged in, otherwise
 *  cpu_present_mask is just a copy of cpu_possible_mask.
 *
 *  (*) Well, cpu_present_mask is dynamic in the hotplug case.  If not
 *      hotplug, it's a copy of cpu_possible_mask, hence fixed at boot.
 *
 * Subtleties:
 * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
 *    assumption that their single CPU is online.  The UP
 *    cpu_{online,possible,present}_masks are placebos.  Changing them
 *    will have no useful affect on the following num_*_cpus()
 *    and cpu_*() macros in the UP case.  This ugliness is a UP
 *    optimization - don't waste any instructions or memory references
 *    asking if you're online or how many CPUs there are if there is
 *    only one CPU.
 */

extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_dying_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask   ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
#define cpu_dying_mask    ((const struct cpumask *)&__cpu_dying_mask)

extern atomic_t __num_online_cpus;

extern cpumask_t cpus_booted_once_mask;

static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
{
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
        WARN_ON_ONCE(cpu >= bits);
#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
}

/* verify cpu argument to cpumask_* operators */
static __always_inline unsigned int cpumask_check(unsigned int cpu)
{
        cpu_max_bits_warn(cpu, small_cpumask_bits);
        return cpu;
}

/**
 * cpumask_first - get the first cpu in a cpumask
 * @srcp: the cpumask pointer
 *
 * Return: >= nr_cpu_ids if no cpus set.
 */
static inline unsigned int cpumask_first(const struct cpumask *srcp)
{
        return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
}

/**
 * cpumask_first_zero - get the first unset cpu in a cpumask
 * @srcp: the cpumask pointer
 *
 * Return: >= nr_cpu_ids if all cpus are set.
 */
static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
{
        return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
}

/**
 * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
 * @srcp1: the first input
 * @srcp2: the second input
 *
 * Return: >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
 */
static inline
unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
{
        return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}

/**
 * cpumask_last - get the last CPU in a cpumask
 * @srcp:        - the cpumask pointer
 *
 * Return:        >= nr_cpumask_bits if no CPUs set.
 */
static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
        return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
}

/**
 * cpumask_next - get the next cpu in a cpumask
 * @n: the cpu prior to the place to search (i.e. return will be > @n)
 * @srcp: the cpumask pointer
 *
 * Return: >= nr_cpu_ids if no further cpus set.
 */
static inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
        /* -1 is a legal arg here. */
        if (n != -1)
                cpumask_check(n);
        return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1);
}

/**
 * cpumask_next_zero - get the next unset cpu in a cpumask
 * @n: the cpu prior to the place to search (i.e. return will be > @n)
 * @srcp: the cpumask pointer
 *
 * Return: >= nr_cpu_ids if no further cpus unset.
 */
static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
        /* -1 is a legal arg here. */
        if (n != -1)
                cpumask_check(n);
        return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1);
}

#if NR_CPUS == 1
/* Uniprocessor: there is only one valid CPU */
static inline unsigned int cpumask_local_spread(unsigned int i, int node)
{
        return 0;
}

static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
                                                      const struct cpumask *src2p)
{
        return cpumask_first_and(src1p, src2p);
}

static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp)
{
        return cpumask_first(srcp);
}
#else
unsigned int cpumask_local_spread(unsigned int i, int node);
unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
                               const struct cpumask *src2p);
unsigned int cpumask_any_distribute(const struct cpumask *srcp);
#endif /* NR_CPUS */

/**
 * cpumask_next_and - get the next cpu in *src1p & *src2p
 * @n: the cpu prior to the place to search (i.e. return will be > @n)
 * @src1p: the first cpumask pointer
 * @src2p: the second cpumask pointer
 *
 * Return: >= nr_cpu_ids if no further cpus set in both.
 */
static inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
                     const struct cpumask *src2p)
{
        /* -1 is a legal arg here. */
        if (n != -1)
                cpumask_check(n);
        return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
                small_cpumask_bits, n + 1);
}

/**
 * for_each_cpu - iterate over every cpu in a mask
 * @cpu: the (optionally unsigned) integer iterator
 * @mask: the cpumask pointer
 *
 * After the loop, cpu is >= nr_cpu_ids.
 */
#define for_each_cpu(cpu, mask)                                \
        for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)

#if NR_CPUS == 1
static inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
{
        cpumask_check(start);
        if (n != -1)
                cpumask_check(n);

        /*
         * Return the first available CPU when wrapping, or when starting before cpu0,
         * since there is only one valid option.
         */
        if (wrap && n >= 0)
                return nr_cpumask_bits;

        return cpumask_first(mask);
}
#else
unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
#endif

/**
 * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
 * @cpu: the (optionally unsigned) integer iterator
 * @mask: the cpumask pointer
 * @start: the start location
 *
 * The implementation does not assume any bit in @mask is set (including @start).
 *
 * After the loop, cpu is >= nr_cpu_ids.
 */
#define for_each_cpu_wrap(cpu, mask, start)                                \
        for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start)

/**
 * for_each_cpu_and - iterate over every cpu in both masks
 * @cpu: the (optionally unsigned) integer iterator
 * @mask1: the first cpumask pointer
 * @mask2: the second cpumask pointer
 *
 * This saves a temporary CPU mask in many places.  It is equivalent to:
 *        struct cpumask tmp;
 *        cpumask_and(&tmp, &mask1, &mask2);
 *        for_each_cpu(cpu, &tmp)
 *                ...
 *
 * After the loop, cpu is >= nr_cpu_ids.
 */
#define for_each_cpu_and(cpu, mask1, mask2)                                \
        for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)

/**
 * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
 *                         those present in another.
 * @cpu: the (optionally unsigned) integer iterator
 * @mask1: the first cpumask pointer
 * @mask2: the second cpumask pointer
 *
 * This saves a temporary CPU mask in many places.  It is equivalent to:
 *        struct cpumask tmp;
 *        cpumask_andnot(&tmp, &mask1, &mask2);
 *        for_each_cpu(cpu, &tmp)
 *                ...
 *
 * After the loop, cpu is >= nr_cpu_ids.
 */
#define for_each_cpu_andnot(cpu, mask1, mask2)                                \
        for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)

/**
 * for_each_cpu_or - iterate over every cpu present in either mask
 * @cpu: the (optionally unsigned) integer iterator
 * @mask1: the first cpumask pointer
 * @mask2: the second cpumask pointer
 *
 * This saves a temporary CPU mask in many places.  It is equivalent to:
 *        struct cpumask tmp;
 *        cpumask_or(&tmp, &mask1, &mask2);
 *        for_each_cpu(cpu, &tmp)
 *                ...
 *
 * After the loop, cpu is >= nr_cpu_ids.
 */
#define for_each_cpu_or(cpu, mask1, mask2)                                \
        for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)

/**
 * cpumask_any_but - return a "random" in a cpumask, but not this one.
 * @mask: the cpumask to search
 * @cpu: the cpu to ignore.
 *
 * Often used to find any cpu but smp_processor_id() in a mask.
 * Return: >= nr_cpu_ids if no cpus set.
 */
static inline
unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
{
        unsigned int i;

        cpumask_check(cpu);
        for_each_cpu(i, mask)
                if (i != cpu)
                        break;
        return i;
}

/**
 * cpumask_nth - get the Nth cpu in a cpumask
 * @srcp: the cpumask pointer
 * @cpu: the Nth cpu to find, starting from 0
 *
 * Return: >= nr_cpu_ids if such cpu doesn't exist.
 */
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
        return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
}

/**
 * cpumask_nth_and - get the Nth cpu in 2 cpumasks
 * @srcp1: the cpumask pointer
 * @srcp2: the cpumask pointer
 * @cpu: the Nth cpu to find, starting from 0
 *
 * Return: >= nr_cpu_ids if such cpu doesn't exist.
 */
static inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
                                                        const struct cpumask *srcp2)
{
        return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
                                small_cpumask_bits, cpumask_check(cpu));
}

/**
 * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd.
 * @srcp1: the cpumask pointer
 * @srcp2: the cpumask pointer
 * @cpu: the Nth cpu to find, starting from 0
 *
 * Return: >= nr_cpu_ids if such cpu doesn't exist.
 */
static inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
                                                        const struct cpumask *srcp2)
{
        return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
                                small_cpumask_bits, cpumask_check(cpu));
}

/**
 * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd.
 * @srcp1: the cpumask pointer
 * @srcp2: the cpumask pointer
 * @srcp3: the cpumask pointer
 * @cpu: the Nth cpu to find, starting from 0
 *
 * Return: >= nr_cpu_ids if such cpu doesn't exist.
 */
static __always_inline
unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1,
                                                        const struct cpumask *srcp2,
                                                        const struct cpumask *srcp3)
{
        return find_nth_and_andnot_bit(cpumask_bits(srcp1),
                                        cpumask_bits(srcp2),
                                        cpumask_bits(srcp3),
                                        small_cpumask_bits, cpumask_check(cpu));
}

#define CPU_BITS_NONE                                                \
{                                                                \
        [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL                        \
}

#define CPU_BITS_CPU0                                                \
{                                                                \
        [0] =  1UL                                                \
}

/**
 * cpumask_set_cpu - set a cpu in a cpumask
 * @cpu: cpu number (< nr_cpu_ids)
 * @dstp: the cpumask pointer
 */
static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
        set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}

static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
        __set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}


/**
 * cpumask_clear_cpu - clear a cpu in a cpumask
 * @cpu: cpu number (< nr_cpu_ids)
 * @dstp: the cpumask pointer
 */
static __always_inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
{
        clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
}

static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
{
        __clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
}

/**
 * cpumask_test_cpu - test for a cpu in a cpumask
 * @cpu: cpu number (< nr_cpu_ids)
 * @cpumask: the cpumask pointer
 *
 * Return: true if @cpu is set in @cpumask, else returns false
 */
static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
{
        return test_bit(cpumask_check(cpu), cpumask_bits((cpumask)));
}

/**
 * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask
 * @cpu: cpu number (< nr_cpu_ids)
 * @cpumask: the cpumask pointer
 *
 * test_and_set_bit wrapper for cpumasks.
 *
 * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
 */
static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
{
        return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}

/**
 * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask
 * @cpu: cpu number (< nr_cpu_ids)
 * @cpumask: the cpumask pointer
 *
 * test_and_clear_bit wrapper for cpumasks.
 *
 * Return: true if @cpu is set in old bitmap of @cpumask, else returns false
 */
static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
{
        return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}

/**
 * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
 * @dstp: the cpumask pointer
 */
static inline void cpumask_setall(struct cpumask *dstp)
{
        if (small_const_nbits(small_cpumask_bits)) {
                cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
                return;
        }
        bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
}

/**
 * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
 * @dstp: the cpumask pointer
 */
static inline void cpumask_clear(struct cpumask *dstp)
{
        bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
}

/**
 * cpumask_and - *dstp = *src1p & *src2p
 * @dstp: the cpumask result
 * @src1p: the first input
 * @src2p: the second input
 *
 * Return: false if *@dstp is empty, else returns true
 */
static inline bool cpumask_and(struct cpumask *dstp,
                               const struct cpumask *src1p,
                               const struct cpumask *src2p)
{
        return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
                                       cpumask_bits(src2p), small_cpumask_bits);
}

/**
 * cpumask_or - *dstp = *src1p | *src2p
 * @dstp: the cpumask result
 * @src1p: the first input
 * @src2p: the second input
 */
static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
                              const struct cpumask *src2p)
{
        bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
                                      cpumask_bits(src2p), small_cpumask_bits);
}

/**
 * cpumask_xor - *dstp = *src1p ^ *src2p
 * @dstp: the cpumask result
 * @src1p: the first input
 * @src2p: the second input
 */
static inline void cpumask_xor(struct cpumask *dstp,
                               const struct cpumask *src1p,
                               const struct cpumask *src2p)
{
        bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
                                       cpumask_bits(src2p), small_cpumask_bits);
}

/**
 * cpumask_andnot - *dstp = *src1p & ~*src2p
 * @dstp: the cpumask result
 * @src1p: the first input
 * @src2p: the second input
 *
 * Return: false if *@dstp is empty, else returns true
 */
static inline bool cpumask_andnot(struct cpumask *dstp,
                                  const struct cpumask *src1p,
                                  const struct cpumask *src2p)
{
        return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
                                          cpumask_bits(src2p), small_cpumask_bits);
}

/**
 * cpumask_equal - *src1p == *src2p
 * @src1p: the first input
 * @src2p: the second input
 *
 * Return: true if the cpumasks are equal, false if not
 */
static inline bool cpumask_equal(const struct cpumask *src1p,
                                const struct cpumask *src2p)
{
        return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
                                                 small_cpumask_bits);
}

/**
 * cpumask_or_equal - *src1p | *src2p == *src3p
 * @src1p: the first input
 * @src2p: the second input
 * @src3p: the third input
 *
 * Return: true if first cpumask ORed with second cpumask == third cpumask,
 *           otherwise false
 */
static inline bool cpumask_or_equal(const struct cpumask *src1p,
                                    const struct cpumask *src2p,
                                    const struct cpumask *src3p)
{
        return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
                               cpumask_bits(src3p), small_cpumask_bits);
}

/**
 * cpumask_intersects - (*src1p & *src2p) != 0
 * @src1p: the first input
 * @src2p: the second input
 *
 * Return: true if first cpumask ANDed with second cpumask is non-empty,
 *           otherwise false
 */
static inline bool cpumask_intersects(const struct cpumask *src1p,
                                     const struct cpumask *src2p)
{
        return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
                                                      small_cpumask_bits);
}

/**
 * cpumask_subset - (*src1p & ~*src2p) == 0
 * @src1p: the first input
 * @src2p: the second input
 *
 * Return: true if *@src1p is a subset of *@src2p, else returns false
 */
static inline bool cpumask_subset(const struct cpumask *src1p,
                                 const struct cpumask *src2p)
{
        return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
                                                  small_cpumask_bits);
}

/**
 * cpumask_empty - *srcp == 0
 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear.
 *
 * Return: true if srcp is empty (has no bits set), else false
 */
static inline bool cpumask_empty(const struct cpumask *srcp)
{
        return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
}

/**
 * cpumask_full - *srcp == 0xFFFFFFFF...
 * @srcp: the cpumask to that all cpus < nr_cpu_ids are set.
 *
 * Return: true if srcp is full (has all bits set), else false
 */
static inline bool cpumask_full(const struct cpumask *srcp)
{
        return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
}

/**
 * cpumask_weight - Count of bits in *srcp
 * @srcp: the cpumask to count bits (< nr_cpu_ids) in.
 *
 * Return: count of bits set in *srcp
 */
static inline unsigned int cpumask_weight(const struct cpumask *srcp)
{
        return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
}

/**
 * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
 * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
 * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
 *
 * Return: count of bits set in both *srcp1 and *srcp2
 */
static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
                                                const struct cpumask *srcp2)
{
        return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}

/**
 * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2)
 * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
 * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
 *
 * Return: count of bits set in both *srcp1 and *srcp2
 */
static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
                                                const struct cpumask *srcp2)
{
        return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}

/**
 * cpumask_shift_right - *dstp = *srcp >> n
 * @dstp: the cpumask result
 * @srcp: the input to shift
 * @n: the number of bits to shift by
 */
static inline void cpumask_shift_right(struct cpumask *dstp,
                                       const struct cpumask *srcp, int n)
{
        bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
                                               small_cpumask_bits);
}

/**
 * cpumask_shift_left - *dstp = *srcp << n
 * @dstp: the cpumask result
 * @srcp: the input to shift
 * @n: the number of bits to shift by
 */
static inline void cpumask_shift_left(struct cpumask *dstp,
                                      const struct cpumask *srcp, int n)
{
        bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
                                              nr_cpumask_bits);
}

/**
 * cpumask_copy - *dstp = *srcp
 * @dstp: the result
 * @srcp: the input cpumask
 */
static inline void cpumask_copy(struct cpumask *dstp,
                                const struct cpumask *srcp)
{
        bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
}

/**
 * cpumask_any - pick a "random" cpu from *srcp
 * @srcp: the input cpumask
 *
 * Return: >= nr_cpu_ids if no cpus set.
 */
#define cpumask_any(srcp) cpumask_first(srcp)

/**
 * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
 * @mask1: the first input cpumask
 * @mask2: the second input cpumask
 *
 * Return: >= nr_cpu_ids if no cpus set.
 */
#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2))

/**
 * cpumask_of - the cpumask containing just a given cpu
 * @cpu: the cpu (<= nr_cpu_ids)
 */
#define cpumask_of(cpu) (get_cpu_mask(cpu))

/**
 * cpumask_parse_user - extract a cpumask from a user string
 * @buf: the buffer to extract from
 * @len: the length of the buffer
 * @dstp: the cpumask to set.
 *
 * Return: -errno, or 0 for success.
 */
static inline int cpumask_parse_user(const char __user *buf, int len,
                                     struct cpumask *dstp)
{
        return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
}

/**
 * cpumask_parselist_user - extract a cpumask from a user string
 * @buf: the buffer to extract from
 * @len: the length of the buffer
 * @dstp: the cpumask to set.
 *
 * Return: -errno, or 0 for success.
 */
static inline int cpumask_parselist_user(const char __user *buf, int len,
                                     struct cpumask *dstp)
{
        return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
                                     nr_cpumask_bits);
}

/**
 * cpumask_parse - extract a cpumask from a string
 * @buf: the buffer to extract from
 * @dstp: the cpumask to set.
 *
 * Return: -errno, or 0 for success.
 */
static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
{
        return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits);
}

/**
 * cpulist_parse - extract a cpumask from a user string of ranges
 * @buf: the buffer to extract from
 * @dstp: the cpumask to set.
 *
 * Return: -errno, or 0 for success.
 */
static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
{
        return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
}

/**
 * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes
 *
 * Return: size to allocate for a &struct cpumask in bytes
 */
static inline unsigned int cpumask_size(void)
{
        return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long);
}

/*
 * cpumask_var_t: struct cpumask for stack usage.
 *
 * Oh, the wicked games we play!  In order to make kernel coding a
 * little more difficult, we typedef cpumask_var_t to an array or a
 * pointer: doing &mask on an array is a noop, so it still works.
 *
 * i.e.
 *        cpumask_var_t tmpmask;
 *        if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
 *                return -ENOMEM;
 *
 *          ... use 'tmpmask' like a normal struct cpumask * ...
 *
 *        free_cpumask_var(tmpmask);
 *
 *
 * However, one notable exception is there. alloc_cpumask_var() allocates
 * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has
 * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t.
 *
 *        cpumask_var_t tmpmask;
 *        if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
 *                return -ENOMEM;
 *
 *        var = *tmpmask;
 *
 * This code makes NR_CPUS length memcopy and brings to a memory corruption.
 * cpumask_copy() provide safe copy functionality.
 *
 * Note that there is another evil here: If you define a cpumask_var_t
 * as a percpu variable then the way to obtain the address of the cpumask
 * structure differently influences what this_cpu_* operation needs to be
 * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
 * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
 * other type of cpumask_var_t implementation is configured.
 *
 * Please also note that __cpumask_var_read_mostly can be used to declare
 * a cpumask_var_t variable itself (not its content) as read mostly.
 */
#ifdef CONFIG_CPUMASK_OFFSTACK
typedef struct cpumask *cpumask_var_t;

#define this_cpu_cpumask_var_ptr(x)        this_cpu_read(x)
#define __cpumask_var_read_mostly        __read_mostly

bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);

static inline
bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
{
        return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
}

/**
 * alloc_cpumask_var - allocate a struct cpumask
 * @mask: pointer to cpumask_var_t where the cpumask is returned
 * @flags: GFP_ flags
 *
 * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
 * a nop returning a constant 1 (in <linux/cpumask.h>).
 *
 * See alloc_cpumask_var_node.
 *
 * Return: %true if allocation succeeded, %false if not
 */
static inline
bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
        return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE);
}

static inline
bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
        return alloc_cpumask_var(mask, flags | __GFP_ZERO);
}

void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
void free_cpumask_var(cpumask_var_t mask);
void free_bootmem_cpumask_var(cpumask_var_t mask);

static inline bool cpumask_available(cpumask_var_t mask)
{
        return mask != NULL;
}

#else
typedef struct cpumask cpumask_var_t[1];

#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly

static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
        return true;
}

static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
                                          int node)
{
        return true;
}

static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
        cpumask_clear(*mask);
        return true;
}

static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
                                          int node)
{
        cpumask_clear(*mask);
        return true;
}

static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
{
}

static inline void free_cpumask_var(cpumask_var_t mask)
{
}

static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
{
}

static inline bool cpumask_available(cpumask_var_t mask)
{
        return true;
}
#endif /* CONFIG_CPUMASK_OFFSTACK */

DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T));

/* It's common to want to use cpu_all_mask in struct member initializers,
 * so it has to refer to an address rather than a pointer. */
extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define cpu_all_mask to_cpumask(cpu_all_bits)

/* First bits of cpu_bit_bitmap are in fact unset. */
#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])

#if NR_CPUS == 1
/* Uniprocessor: the possible/online/present masks are always "1" */
#define for_each_possible_cpu(cpu)        for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_online_cpu(cpu)        for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_present_cpu(cpu)        for ((cpu) = 0; (cpu) < 1; (cpu)++)
#else
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu)   for_each_cpu((cpu), cpu_online_mask)
#define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
#endif

/* Wrappers for arch boot code to manipulate normally-constant masks */
void init_cpu_present(const struct cpumask *src);
void init_cpu_possible(const struct cpumask *src);
void init_cpu_online(const struct cpumask *src);

static inline void reset_cpu_possible_mask(void)
{
        bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS);
}

static inline void
set_cpu_possible(unsigned int cpu, bool possible)
{
        if (possible)
                cpumask_set_cpu(cpu, &__cpu_possible_mask);
        else
                cpumask_clear_cpu(cpu, &__cpu_possible_mask);
}

static inline void
set_cpu_present(unsigned int cpu, bool present)
{
        if (present)
                cpumask_set_cpu(cpu, &__cpu_present_mask);
        else
                cpumask_clear_cpu(cpu, &__cpu_present_mask);
}

void set_cpu_online(unsigned int cpu, bool online);

static inline void
set_cpu_active(unsigned int cpu, bool active)
{
        if (active)
                cpumask_set_cpu(cpu, &__cpu_active_mask);
        else
                cpumask_clear_cpu(cpu, &__cpu_active_mask);
}

static inline void
set_cpu_dying(unsigned int cpu, bool dying)
{
        if (dying)
                cpumask_set_cpu(cpu, &__cpu_dying_mask);
        else
                cpumask_clear_cpu(cpu, &__cpu_dying_mask);
}

/**
 * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask *
 * @bitmap: the bitmap
 *
 * There are a few places where cpumask_var_t isn't appropriate and
 * static cpumasks must be used (eg. very early boot), yet we don't
 * expose the definition of 'struct cpumask'.
 *
 * This does the conversion, and can be used as a constant initializer.
 */
#define to_cpumask(bitmap)                                                \
        ((struct cpumask *)(1 ? (bitmap)                                \
                            : (void *)sizeof(__check_is_bitmap(bitmap))))

static inline int __check_is_bitmap(const unsigned long *bitmap)
{
        return 1;
}

/*
 * Special-case data structure for "single bit set only" constant CPU masks.
 *
 * We pre-generate all the 64 (or 32) possible bit positions, with enough
 * padding to the left and the right, and return the constant pointer
 * appropriately offset.
 */
extern const unsigned long
        cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];

static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
{
        const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
        p -= cpu / BITS_PER_LONG;
        return to_cpumask(p);
}

#if NR_CPUS > 1
/**
 * num_online_cpus() - Read the number of online CPUs
 *
 * Despite the fact that __num_online_cpus is of type atomic_t, this
 * interface gives only a momentary snapshot and is not protected against
 * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
 * region.
 *
 * Return: momentary snapshot of the number of online CPUs
 */
static __always_inline unsigned int num_online_cpus(void)
{
        return raw_atomic_read(&__num_online_cpus);
}
#define num_possible_cpus()        cpumask_weight(cpu_possible_mask)
#define num_present_cpus()        cpumask_weight(cpu_present_mask)
#define num_active_cpus()        cpumask_weight(cpu_active_mask)

static inline bool cpu_online(unsigned int cpu)
{
        return cpumask_test_cpu(cpu, cpu_online_mask);
}

static inline bool cpu_possible(unsigned int cpu)
{
        return cpumask_test_cpu(cpu, cpu_possible_mask);
}

static inline bool cpu_present(unsigned int cpu)
{
        return cpumask_test_cpu(cpu, cpu_present_mask);
}

static inline bool cpu_active(unsigned int cpu)
{
        return cpumask_test_cpu(cpu, cpu_active_mask);
}

static inline bool cpu_dying(unsigned int cpu)
{
        return cpumask_test_cpu(cpu, cpu_dying_mask);
}

#else

#define num_online_cpus()        1U
#define num_possible_cpus()        1U
#define num_present_cpus()        1U
#define num_active_cpus()        1U

static inline bool cpu_online(unsigned int cpu)
{
        return cpu == 0;
}

static inline bool cpu_possible(unsigned int cpu)
{
        return cpu == 0;
}

static inline bool cpu_present(unsigned int cpu)
{
        return cpu == 0;
}

static inline bool cpu_active(unsigned int cpu)
{
        return cpu == 0;
}

static inline bool cpu_dying(unsigned int cpu)
{
        return false;
}

#endif /* NR_CPUS > 1 */

#define cpu_is_offline(cpu)        unlikely(!cpu_online(cpu))

#if NR_CPUS <= BITS_PER_LONG
#define CPU_BITS_ALL                                                \
{                                                                \
        [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)        \
}

#else /* NR_CPUS > BITS_PER_LONG */

#define CPU_BITS_ALL                                                \
{                                                                \
        [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                \
        [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)        \
}
#endif /* NR_CPUS > BITS_PER_LONG */

/**
 * cpumap_print_to_pagebuf  - copies the cpumask into the buffer either
 *        as comma-separated list of cpus or hex values of cpumask
 * @list: indicates whether the cpumap must be list
 * @mask: the cpumask to copy
 * @buf: the buffer to copy into
 *
 * Return: the length of the (null-terminated) @buf string, zero if
 * nothing is copied.
 */
static inline ssize_t
cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
{
        return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask),
                                      nr_cpu_ids);
}

/**
 * cpumap_print_bitmask_to_buf  - copies the cpumask into the buffer as
 *        hex values of cpumask
 *
 * @buf: the buffer to copy into
 * @mask: the cpumask to copy
 * @off: in the string from which we are copying, we copy to @buf
 * @count: the maximum number of bytes to print
 *
 * The function prints the cpumask into the buffer as hex values of
 * cpumask; Typically used by bin_attribute to export cpumask bitmask
 * ABI.
 *
 * Return: the length of how many bytes have been copied, excluding
 * terminating '\0'.
 */
static inline ssize_t
cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
                loff_t off, size_t count)
{
        return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
                                   nr_cpu_ids, off, count) - 1;
}

/**
 * cpumap_print_list_to_buf  - copies the cpumask into the buffer as
 *        comma-separated list of cpus
 * @buf: the buffer to copy into
 * @mask: the cpumask to copy
 * @off: in the string from which we are copying, we copy to @buf
 * @count: the maximum number of bytes to print
 *
 * Everything is same with the above cpumap_print_bitmask_to_buf()
 * except the print format.
 *
 * Return: the length of how many bytes have been copied, excluding
 * terminating '\0'.
 */
static inline ssize_t
cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
                loff_t off, size_t count)
{
        return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
                                   nr_cpu_ids, off, count) - 1;
}

#if NR_CPUS <= BITS_PER_LONG
#define CPU_MASK_ALL                                                        \
(cpumask_t) { {                                                                \
        [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)        \
} }
#else
#define CPU_MASK_ALL                                                        \
(cpumask_t) { {                                                                \
        [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,                        \
        [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)        \
} }
#endif /* NR_CPUS > BITS_PER_LONG */

#define CPU_MASK_NONE                                                        \
(cpumask_t) { {                                                                \
        [0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL                                \
} }

#define CPU_MASK_CPU0                                                        \
(cpumask_t) { {                                                                \
        [0] =  1UL                                                        \
} }

/*
 * Provide a valid theoretical max size for cpumap and cpulist sysfs files
 * to avoid breaking userspace which may allocate a buffer based on the size
 * reported by e.g. fstat.
 *
 * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
 *
 * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
 * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
 * cover a worst-case of every other cpu being on one of two nodes for a
 * very large NR_CPUS.
 *
 *  Use PAGE_SIZE as a minimum for smaller configurations while avoiding
 *  unsigned comparison to -1.
 */
#define CPUMAP_FILE_MAX_BYTES  (((NR_CPUS * 9)/32 > PAGE_SIZE) \
                                        ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
#define CPULIST_FILE_MAX_BYTES  (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)

#endif /* __LINUX_CPUMASK_H */




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 

































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 *                   Abramo Bagnara <abramo@alsa-project.org>
 */

#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/time.h>
#include <linux/math64.h>
#include <linux/export.h>
#include <sound/core.h>
#include <sound/control.h>
#include <sound/tlv.h>
#include <sound/info.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/timer.h>

#include "pcm_local.h"

#ifdef CONFIG_SND_PCM_XRUN_DEBUG
#define CREATE_TRACE_POINTS
#include "pcm_trace.h"
#else
#define trace_hwptr(substream, pos, in_interrupt)
#define trace_xrun(substream)
#define trace_hw_ptr_error(substream, reason)
#define trace_applptr(substream, prev, curr)
#endif

static int fill_silence_frames(struct snd_pcm_substream *substream,
                               snd_pcm_uframes_t off, snd_pcm_uframes_t frames);


static inline void update_silence_vars(struct snd_pcm_runtime *runtime,
                                       snd_pcm_uframes_t ptr,
                                       snd_pcm_uframes_t new_ptr)
{
        snd_pcm_sframes_t delta;

        delta = new_ptr - ptr;
        if (delta == 0)
                return;
        if (delta < 0)
                delta += runtime->boundary;
        if ((snd_pcm_uframes_t)delta < runtime->silence_filled)
                runtime->silence_filled -= delta;
        else
                runtime->silence_filled = 0;
        runtime->silence_start = new_ptr;
}

/*
 * fill ring buffer with silence
 * runtime->silence_start: starting pointer to silence area
 * runtime->silence_filled: size filled with silence
 * runtime->silence_threshold: threshold from application
 * runtime->silence_size: maximal size from application
 *
 * when runtime->silence_size >= runtime->boundary - fill processed area with silence immediately
 */
void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_uframes_t new_hw_ptr)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_uframes_t frames, ofs, transfer;
        int err;

        if (runtime->silence_size < runtime->boundary) {
                snd_pcm_sframes_t noise_dist;
                snd_pcm_uframes_t appl_ptr = READ_ONCE(runtime->control->appl_ptr);
                update_silence_vars(runtime, runtime->silence_start, appl_ptr);
                /* initialization outside pointer updates */
                if (new_hw_ptr == ULONG_MAX)
                        new_hw_ptr = runtime->status->hw_ptr;
                /* get hw_avail with the boundary crossing */
                noise_dist = appl_ptr - new_hw_ptr;
                if (noise_dist < 0)
                        noise_dist += runtime->boundary;
                /* total noise distance */
                noise_dist += runtime->silence_filled;
                if (noise_dist >= (snd_pcm_sframes_t) runtime->silence_threshold)
                        return;
                frames = runtime->silence_threshold - noise_dist;
                if (frames > runtime->silence_size)
                        frames = runtime->silence_size;
        } else {
                /*
                 * This filling mode aims at free-running mode (used for example by dmix),
                 * which doesn't update the application pointer.
                 */
                snd_pcm_uframes_t hw_ptr = runtime->status->hw_ptr;
                if (new_hw_ptr == ULONG_MAX) {
                        /*
                         * Initialization, fill the whole unused buffer with silence.
                         *
                         * Usually, this is entered while stopped, before data is queued,
                         * so both pointers are expected to be zero.
                         */
                        snd_pcm_sframes_t avail = runtime->control->appl_ptr - hw_ptr;
                        if (avail < 0)
                                avail += runtime->boundary;
                        /*
                         * In free-running mode, appl_ptr will be zero even while running,
                         * so we end up with a huge number. There is no useful way to
                         * handle this, so we just clear the whole buffer.
                         */
                        runtime->silence_filled = avail > runtime->buffer_size ? 0 : avail;
                        runtime->silence_start = hw_ptr;
                } else {
                        /* Silence the just played area immediately */
                        update_silence_vars(runtime, hw_ptr, new_hw_ptr);
                }
                /*
                 * In this mode, silence_filled actually includes the valid
                 * sample data from the user.
                 */
                frames = runtime->buffer_size - runtime->silence_filled;
        }
        if (snd_BUG_ON(frames > runtime->buffer_size))
                return;
        if (frames == 0)
                return;
        ofs = (runtime->silence_start + runtime->silence_filled) % runtime->buffer_size;
        do {
                transfer = ofs + frames > runtime->buffer_size ? runtime->buffer_size - ofs : frames;
                err = fill_silence_frames(substream, ofs, transfer);
                snd_BUG_ON(err < 0);
                runtime->silence_filled += transfer;
                frames -= transfer;
                ofs = 0;
        } while (frames > 0);
        snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
}

#ifdef CONFIG_SND_DEBUG
void snd_pcm_debug_name(struct snd_pcm_substream *substream,
                           char *name, size_t len)
{
        snprintf(name, len, "pcmC%dD%d%c:%d",
                 substream->pcm->card->number,
                 substream->pcm->device,
                 substream->stream ? 'c' : 'p',
                 substream->number);
}
EXPORT_SYMBOL(snd_pcm_debug_name);
#endif

#define XRUN_DEBUG_BASIC        (1<<0)
#define XRUN_DEBUG_STACK        (1<<1)        /* dump also stack */
#define XRUN_DEBUG_JIFFIESCHECK        (1<<2)        /* do jiffies check */

#ifdef CONFIG_SND_PCM_XRUN_DEBUG

#define xrun_debug(substream, mask) \
                        ((substream)->pstr->xrun_debug & (mask))
#else
#define xrun_debug(substream, mask)        0
#endif

#define dump_stack_on_xrun(substream) do {                        \
                if (xrun_debug(substream, XRUN_DEBUG_STACK))        \
                        dump_stack();                                \
        } while (0)

/* call with stream lock held */
void __snd_pcm_xrun(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        trace_xrun(substream);
        if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) {
                struct timespec64 tstamp;

                snd_pcm_gettime(runtime, &tstamp);
                runtime->status->tstamp.tv_sec = tstamp.tv_sec;
                runtime->status->tstamp.tv_nsec = tstamp.tv_nsec;
        }
        snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
        if (xrun_debug(substream, XRUN_DEBUG_BASIC)) {
                char name[16];
                snd_pcm_debug_name(substream, name, sizeof(name));
                pcm_warn(substream->pcm, "XRUN: %s\n", name);
                dump_stack_on_xrun(substream);
        }
}

#ifdef CONFIG_SND_PCM_XRUN_DEBUG
#define hw_ptr_error(substream, in_interrupt, reason, fmt, args...)        \
        do {                                                                \
                trace_hw_ptr_error(substream, reason);        \
                if (xrun_debug(substream, XRUN_DEBUG_BASIC)) {                \
                        pr_err_ratelimited("ALSA: PCM: [%c] " reason ": " fmt, \
                                           (in_interrupt) ? 'Q' : 'P', ##args);        \
                        dump_stack_on_xrun(substream);                        \
                }                                                        \
        } while (0)

#else /* ! CONFIG_SND_PCM_XRUN_DEBUG */

#define hw_ptr_error(substream, fmt, args...) do { } while (0)

#endif

int snd_pcm_update_state(struct snd_pcm_substream *substream,
                         struct snd_pcm_runtime *runtime)
{
        snd_pcm_uframes_t avail;

        avail = snd_pcm_avail(substream);
        if (avail > runtime->avail_max)
                runtime->avail_max = avail;
        if (runtime->state == SNDRV_PCM_STATE_DRAINING) {
                if (avail >= runtime->buffer_size) {
                        snd_pcm_drain_done(substream);
                        return -EPIPE;
                }
        } else {
                if (avail >= runtime->stop_threshold) {
                        __snd_pcm_xrun(substream);
                        return -EPIPE;
                }
        }
        if (runtime->twake) {
                if (avail >= runtime->twake)
                        wake_up(&runtime->tsleep);
        } else if (avail >= runtime->control->avail_min)
                wake_up(&runtime->sleep);
        return 0;
}

static void update_audio_tstamp(struct snd_pcm_substream *substream,
                                struct timespec64 *curr_tstamp,
                                struct timespec64 *audio_tstamp)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        u64 audio_frames, audio_nsecs;
        struct timespec64 driver_tstamp;

        if (runtime->tstamp_mode != SNDRV_PCM_TSTAMP_ENABLE)
                return;

        if (!(substream->ops->get_time_info) ||
                (runtime->audio_tstamp_report.actual_type ==
                        SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT)) {

                /*
                 * provide audio timestamp derived from pointer position
                 * add delay only if requested
                 */

                audio_frames = runtime->hw_ptr_wrap + runtime->status->hw_ptr;

                if (runtime->audio_tstamp_config.report_delay) {
                        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                                audio_frames -=  runtime->delay;
                        else
                                audio_frames +=  runtime->delay;
                }
                audio_nsecs = div_u64(audio_frames * 1000000000LL,
                                runtime->rate);
                *audio_tstamp = ns_to_timespec64(audio_nsecs);
        }

        if (runtime->status->audio_tstamp.tv_sec != audio_tstamp->tv_sec ||
            runtime->status->audio_tstamp.tv_nsec != audio_tstamp->tv_nsec) {
                runtime->status->audio_tstamp.tv_sec = audio_tstamp->tv_sec;
                runtime->status->audio_tstamp.tv_nsec = audio_tstamp->tv_nsec;
                runtime->status->tstamp.tv_sec = curr_tstamp->tv_sec;
                runtime->status->tstamp.tv_nsec = curr_tstamp->tv_nsec;
        }


        /*
         * re-take a driver timestamp to let apps detect if the reference tstamp
         * read by low-level hardware was provided with a delay
         */
        snd_pcm_gettime(substream->runtime, &driver_tstamp);
        runtime->driver_tstamp = driver_tstamp;
}

static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream,
                                  unsigned int in_interrupt)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_uframes_t pos;
        snd_pcm_uframes_t old_hw_ptr, new_hw_ptr, hw_base;
        snd_pcm_sframes_t hdelta, delta;
        unsigned long jdelta;
        unsigned long curr_jiffies;
        struct timespec64 curr_tstamp;
        struct timespec64 audio_tstamp;
        int crossed_boundary = 0;

        old_hw_ptr = runtime->status->hw_ptr;

        /*
         * group pointer, time and jiffies reads to allow for more
         * accurate correlations/corrections.
         * The values are stored at the end of this routine after
         * corrections for hw_ptr position
         */
        pos = substream->ops->pointer(substream);
        curr_jiffies = jiffies;
        if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) {
                if ((substream->ops->get_time_info) &&
                        (runtime->audio_tstamp_config.type_requested != SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT)) {
                        substream->ops->get_time_info(substream, &curr_tstamp,
                                                &audio_tstamp,
                                                &runtime->audio_tstamp_config,
                                                &runtime->audio_tstamp_report);

                        /* re-test in case tstamp type is not supported in hardware and was demoted to DEFAULT */
                        if (runtime->audio_tstamp_report.actual_type == SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT)
                                snd_pcm_gettime(runtime, &curr_tstamp);
                } else
                        snd_pcm_gettime(runtime, &curr_tstamp);
        }

        if (pos == SNDRV_PCM_POS_XRUN) {
                __snd_pcm_xrun(substream);
                return -EPIPE;
        }
        if (pos >= runtime->buffer_size) {
                if (printk_ratelimit()) {
                        char name[16];
                        snd_pcm_debug_name(substream, name, sizeof(name));
                        pcm_err(substream->pcm,
                                "invalid position: %s, pos = %ld, buffer size = %ld, period size = %ld\n",
                                name, pos, runtime->buffer_size,
                                runtime->period_size);
                }
                pos = 0;
        }
        pos -= pos % runtime->min_align;
        trace_hwptr(substream, pos, in_interrupt);
        hw_base = runtime->hw_ptr_base;
        new_hw_ptr = hw_base + pos;
        if (in_interrupt) {
                /* we know that one period was processed */
                /* delta = "expected next hw_ptr" for in_interrupt != 0 */
                delta = runtime->hw_ptr_interrupt + runtime->period_size;
                if (delta > new_hw_ptr) {
                        /* check for double acknowledged interrupts */
                        hdelta = curr_jiffies - runtime->hw_ptr_jiffies;
                        if (hdelta > runtime->hw_ptr_buffer_jiffies/2 + 1) {
                                hw_base += runtime->buffer_size;
                                if (hw_base >= runtime->boundary) {
                                        hw_base = 0;
                                        crossed_boundary++;
                                }
                                new_hw_ptr = hw_base + pos;
                                goto __delta;
                        }
                }
        }
        /* new_hw_ptr might be lower than old_hw_ptr in case when */
        /* pointer crosses the end of the ring buffer */
        if (new_hw_ptr < old_hw_ptr) {
                hw_base += runtime->buffer_size;
                if (hw_base >= runtime->boundary) {
                        hw_base = 0;
                        crossed_boundary++;
                }
                new_hw_ptr = hw_base + pos;
        }
      __delta:
        delta = new_hw_ptr - old_hw_ptr;
        if (delta < 0)
                delta += runtime->boundary;

        if (runtime->no_period_wakeup) {
                snd_pcm_sframes_t xrun_threshold;
                /*
                 * Without regular period interrupts, we have to check
                 * the elapsed time to detect xruns.
                 */
                jdelta = curr_jiffies - runtime->hw_ptr_jiffies;
                if (jdelta < runtime->hw_ptr_buffer_jiffies / 2)
                        goto no_delta_check;
                hdelta = jdelta - delta * HZ / runtime->rate;
                xrun_threshold = runtime->hw_ptr_buffer_jiffies / 2 + 1;
                while (hdelta > xrun_threshold) {
                        delta += runtime->buffer_size;
                        hw_base += runtime->buffer_size;
                        if (hw_base >= runtime->boundary) {
                                hw_base = 0;
                                crossed_boundary++;
                        }
                        new_hw_ptr = hw_base + pos;
                        hdelta -= runtime->hw_ptr_buffer_jiffies;
                }
                goto no_delta_check;
        }

        /* something must be really wrong */
        if (delta >= runtime->buffer_size + runtime->period_size) {
                hw_ptr_error(substream, in_interrupt, "Unexpected hw_ptr",
                             "(stream=%i, pos=%ld, new_hw_ptr=%ld, old_hw_ptr=%ld)\n",
                             substream->stream, (long)pos,
                             (long)new_hw_ptr, (long)old_hw_ptr);
                return 0;
        }

        /* Do jiffies check only in xrun_debug mode */
        if (!xrun_debug(substream, XRUN_DEBUG_JIFFIESCHECK))
                goto no_jiffies_check;

        /* Skip the jiffies check for hardwares with BATCH flag.
         * Such hardware usually just increases the position at each IRQ,
         * thus it can't give any strange position.
         */
        if (runtime->hw.info & SNDRV_PCM_INFO_BATCH)
                goto no_jiffies_check;
        hdelta = delta;
        if (hdelta < runtime->delay)
                goto no_jiffies_check;
        hdelta -= runtime->delay;
        jdelta = curr_jiffies - runtime->hw_ptr_jiffies;
        if (((hdelta * HZ) / runtime->rate) > jdelta + HZ/100) {
                delta = jdelta /
                        (((runtime->period_size * HZ) / runtime->rate)
                                                                + HZ/100);
                /* move new_hw_ptr according jiffies not pos variable */
                new_hw_ptr = old_hw_ptr;
                hw_base = delta;
                /* use loop to avoid checks for delta overflows */
                /* the delta value is small or zero in most cases */
                while (delta > 0) {
                        new_hw_ptr += runtime->period_size;
                        if (new_hw_ptr >= runtime->boundary) {
                                new_hw_ptr -= runtime->boundary;
                                crossed_boundary--;
                        }
                        delta--;
                }
                /* align hw_base to buffer_size */
                hw_ptr_error(substream, in_interrupt, "hw_ptr skipping",
                             "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu/%lu, hw_ptr=%ld/%ld)\n",
                             (long)pos, (long)hdelta,
                             (long)runtime->period_size, jdelta,
                             ((hdelta * HZ) / runtime->rate), hw_base,
                             (unsigned long)old_hw_ptr,
                             (unsigned long)new_hw_ptr);
                /* reset values to proper state */
                delta = 0;
                hw_base = new_hw_ptr - (new_hw_ptr % runtime->buffer_size);
        }
 no_jiffies_check:
        if (delta > runtime->period_size + runtime->period_size / 2) {
                hw_ptr_error(substream, in_interrupt,
                             "Lost interrupts?",
                             "(stream=%i, delta=%ld, new_hw_ptr=%ld, old_hw_ptr=%ld)\n",
                             substream->stream, (long)delta,
                             (long)new_hw_ptr,
                             (long)old_hw_ptr);
        }

 no_delta_check:
        if (runtime->status->hw_ptr == new_hw_ptr) {
                runtime->hw_ptr_jiffies = curr_jiffies;
                update_audio_tstamp(substream, &curr_tstamp, &audio_tstamp);
                return 0;
        }

        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
            runtime->silence_size > 0)
                snd_pcm_playback_silence(substream, new_hw_ptr);

        if (in_interrupt) {
                delta = new_hw_ptr - runtime->hw_ptr_interrupt;
                if (delta < 0)
                        delta += runtime->boundary;
                delta -= (snd_pcm_uframes_t)delta % runtime->period_size;
                runtime->hw_ptr_interrupt += delta;
                if (runtime->hw_ptr_interrupt >= runtime->boundary)
                        runtime->hw_ptr_interrupt -= runtime->boundary;
        }
        runtime->hw_ptr_base = hw_base;
        runtime->status->hw_ptr = new_hw_ptr;
        runtime->hw_ptr_jiffies = curr_jiffies;
        if (crossed_boundary) {
                snd_BUG_ON(crossed_boundary != 1);
                runtime->hw_ptr_wrap += runtime->boundary;
        }

        update_audio_tstamp(substream, &curr_tstamp, &audio_tstamp);

        return snd_pcm_update_state(substream, runtime);
}

/* CAUTION: call it with irq disabled */
int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
{
        return snd_pcm_update_hw_ptr0(substream, 0);
}

/**
 * snd_pcm_set_ops - set the PCM operators
 * @pcm: the pcm instance
 * @direction: stream direction, SNDRV_PCM_STREAM_XXX
 * @ops: the operator table
 *
 * Sets the given PCM operators to the pcm instance.
 */
void snd_pcm_set_ops(struct snd_pcm *pcm, int direction,
                     const struct snd_pcm_ops *ops)
{
        struct snd_pcm_str *stream = &pcm->streams[direction];
        struct snd_pcm_substream *substream;
        
        for (substream = stream->substream; substream != NULL; substream = substream->next)
                substream->ops = ops;
}
EXPORT_SYMBOL(snd_pcm_set_ops);

/**
 * snd_pcm_set_sync - set the PCM sync id
 * @substream: the pcm substream
 *
 * Sets the PCM sync identifier for the card.
 */
void snd_pcm_set_sync(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        
        runtime->sync.id32[0] = substream->pcm->card->number;
        runtime->sync.id32[1] = -1;
        runtime->sync.id32[2] = -1;
        runtime->sync.id32[3] = -1;
}
EXPORT_SYMBOL(snd_pcm_set_sync);

/*
 *  Standard ioctl routine
 */

static inline unsigned int div32(unsigned int a, unsigned int b, 
                                 unsigned int *r)
{
        if (b == 0) {
                *r = 0;
                return UINT_MAX;
        }
        *r = a % b;
        return a / b;
}

static inline unsigned int div_down(unsigned int a, unsigned int b)
{
        if (b == 0)
                return UINT_MAX;
        return a / b;
}

static inline unsigned int div_up(unsigned int a, unsigned int b)
{
        unsigned int r;
        unsigned int q;
        if (b == 0)
                return UINT_MAX;
        q = div32(a, b, &r);
        if (r)
                ++q;
        return q;
}

static inline unsigned int mul(unsigned int a, unsigned int b)
{
        if (a == 0)
                return 0;
        if (div_down(UINT_MAX, a) < b)
                return UINT_MAX;
        return a * b;
}

static inline unsigned int muldiv32(unsigned int a, unsigned int b,
                                    unsigned int c, unsigned int *r)
{
        u_int64_t n = (u_int64_t) a * b;
        if (c == 0) {
                *r = 0;
                return UINT_MAX;
        }
        n = div_u64_rem(n, c, r);
        if (n >= UINT_MAX) {
                *r = 0;
                return UINT_MAX;
        }
        return n;
}

/**
 * snd_interval_refine - refine the interval value of configurator
 * @i: the interval value to refine
 * @v: the interval value to refer to
 *
 * Refines the interval value with the reference value.
 * The interval is changed to the range satisfying both intervals.
 * The interval status (min, max, integer, etc.) are evaluated.
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v)
{
        int changed = 0;
        if (snd_BUG_ON(snd_interval_empty(i)))
                return -EINVAL;
        if (i->min < v->min) {
                i->min = v->min;
                i->openmin = v->openmin;
                changed = 1;
        } else if (i->min == v->min && !i->openmin && v->openmin) {
                i->openmin = 1;
                changed = 1;
        }
        if (i->max > v->max) {
                i->max = v->max;
                i->openmax = v->openmax;
                changed = 1;
        } else if (i->max == v->max && !i->openmax && v->openmax) {
                i->openmax = 1;
                changed = 1;
        }
        if (!i->integer && v->integer) {
                i->integer = 1;
                changed = 1;
        }
        if (i->integer) {
                if (i->openmin) {
                        i->min++;
                        i->openmin = 0;
                }
                if (i->openmax) {
                        i->max--;
                        i->openmax = 0;
                }
        } else if (!i->openmin && !i->openmax && i->min == i->max)
                i->integer = 1;
        if (snd_interval_checkempty(i)) {
                snd_interval_none(i);
                return -EINVAL;
        }
        return changed;
}
EXPORT_SYMBOL(snd_interval_refine);

static int snd_interval_refine_first(struct snd_interval *i)
{
        const unsigned int last_max = i->max;

        if (snd_BUG_ON(snd_interval_empty(i)))
                return -EINVAL;
        if (snd_interval_single(i))
                return 0;
        i->max = i->min;
        if (i->openmin)
                i->max++;
        /* only exclude max value if also excluded before refine */
        i->openmax = (i->openmax && i->max >= last_max);
        return 1;
}

static int snd_interval_refine_last(struct snd_interval *i)
{
        const unsigned int last_min = i->min;

        if (snd_BUG_ON(snd_interval_empty(i)))
                return -EINVAL;
        if (snd_interval_single(i))
                return 0;
        i->min = i->max;
        if (i->openmax)
                i->min--;
        /* only exclude min value if also excluded before refine */
        i->openmin = (i->openmin && i->min <= last_min);
        return 1;
}

void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c)
{
        if (a->empty || b->empty) {
                snd_interval_none(c);
                return;
        }
        c->empty = 0;
        c->min = mul(a->min, b->min);
        c->openmin = (a->openmin || b->openmin);
        c->max = mul(a->max,  b->max);
        c->openmax = (a->openmax || b->openmax);
        c->integer = (a->integer && b->integer);
}

/**
 * snd_interval_div - refine the interval value with division
 * @a: dividend
 * @b: divisor
 * @c: quotient
 *
 * c = a / b
 *
 * Returns non-zero if the value is changed, zero if not changed.
 */
void snd_interval_div(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c)
{
        unsigned int r;
        if (a->empty || b->empty) {
                snd_interval_none(c);
                return;
        }
        c->empty = 0;
        c->min = div32(a->min, b->max, &r);
        c->openmin = (r || a->openmin || b->openmax);
        if (b->min > 0) {
                c->max = div32(a->max, b->min, &r);
                if (r) {
                        c->max++;
                        c->openmax = 1;
                } else
                        c->openmax = (a->openmax || b->openmin);
        } else {
                c->max = UINT_MAX;
                c->openmax = 0;
        }
        c->integer = 0;
}

/**
 * snd_interval_muldivk - refine the interval value
 * @a: dividend 1
 * @b: dividend 2
 * @k: divisor (as integer)
 * @c: result
  *
 * c = a * b / k
 *
 * Returns non-zero if the value is changed, zero if not changed.
 */
void snd_interval_muldivk(const struct snd_interval *a, const struct snd_interval *b,
                      unsigned int k, struct snd_interval *c)
{
        unsigned int r;
        if (a->empty || b->empty) {
                snd_interval_none(c);
                return;
        }
        c->empty = 0;
        c->min = muldiv32(a->min, b->min, k, &r);
        c->openmin = (r || a->openmin || b->openmin);
        c->max = muldiv32(a->max, b->max, k, &r);
        if (r) {
                c->max++;
                c->openmax = 1;
        } else
                c->openmax = (a->openmax || b->openmax);
        c->integer = 0;
}

/**
 * snd_interval_mulkdiv - refine the interval value
 * @a: dividend 1
 * @k: dividend 2 (as integer)
 * @b: divisor
 * @c: result
 *
 * c = a * k / b
 *
 * Returns non-zero if the value is changed, zero if not changed.
 */
void snd_interval_mulkdiv(const struct snd_interval *a, unsigned int k,
                      const struct snd_interval *b, struct snd_interval *c)
{
        unsigned int r;
        if (a->empty || b->empty) {
                snd_interval_none(c);
                return;
        }
        c->empty = 0;
        c->min = muldiv32(a->min, k, b->max, &r);
        c->openmin = (r || a->openmin || b->openmax);
        if (b->min > 0) {
                c->max = muldiv32(a->max, k, b->min, &r);
                if (r) {
                        c->max++;
                        c->openmax = 1;
                } else
                        c->openmax = (a->openmax || b->openmin);
        } else {
                c->max = UINT_MAX;
                c->openmax = 0;
        }
        c->integer = 0;
}

/* ---- */


/**
 * snd_interval_ratnum - refine the interval value
 * @i: interval to refine
 * @rats_count: number of ratnum_t 
 * @rats: ratnum_t array
 * @nump: pointer to store the resultant numerator
 * @denp: pointer to store the resultant denominator
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_interval_ratnum(struct snd_interval *i,
                        unsigned int rats_count, const struct snd_ratnum *rats,
                        unsigned int *nump, unsigned int *denp)
{
        unsigned int best_num, best_den;
        int best_diff;
        unsigned int k;
        struct snd_interval t;
        int err;
        unsigned int result_num, result_den;
        int result_diff;

        best_num = best_den = best_diff = 0;
        for (k = 0; k < rats_count; ++k) {
                unsigned int num = rats[k].num;
                unsigned int den;
                unsigned int q = i->min;
                int diff;
                if (q == 0)
                        q = 1;
                den = div_up(num, q);
                if (den < rats[k].den_min)
                        continue;
                if (den > rats[k].den_max)
                        den = rats[k].den_max;
                else {
                        unsigned int r;
                        r = (den - rats[k].den_min) % rats[k].den_step;
                        if (r != 0)
                                den -= r;
                }
                diff = num - q * den;
                if (diff < 0)
                        diff = -diff;
                if (best_num == 0 ||
                    diff * best_den < best_diff * den) {
                        best_diff = diff;
                        best_den = den;
                        best_num = num;
                }
        }
        if (best_den == 0) {
                i->empty = 1;
                return -EINVAL;
        }
        t.min = div_down(best_num, best_den);
        t.openmin = !!(best_num % best_den);
        
        result_num = best_num;
        result_diff = best_diff;
        result_den = best_den;
        best_num = best_den = best_diff = 0;
        for (k = 0; k < rats_count; ++k) {
                unsigned int num = rats[k].num;
                unsigned int den;
                unsigned int q = i->max;
                int diff;
                if (q == 0) {
                        i->empty = 1;
                        return -EINVAL;
                }
                den = div_down(num, q);
                if (den > rats[k].den_max)
                        continue;
                if (den < rats[k].den_min)
                        den = rats[k].den_min;
                else {
                        unsigned int r;
                        r = (den - rats[k].den_min) % rats[k].den_step;
                        if (r != 0)
                                den += rats[k].den_step - r;
                }
                diff = q * den - num;
                if (diff < 0)
                        diff = -diff;
                if (best_num == 0 ||
                    diff * best_den < best_diff * den) {
                        best_diff = diff;
                        best_den = den;
                        best_num = num;
                }
        }
        if (best_den == 0) {
                i->empty = 1;
                return -EINVAL;
        }
        t.max = div_up(best_num, best_den);
        t.openmax = !!(best_num % best_den);
        t.integer = 0;
        err = snd_interval_refine(i, &t);
        if (err < 0)
                return err;

        if (snd_interval_single(i)) {
                if (best_diff * result_den < result_diff * best_den) {
                        result_num = best_num;
                        result_den = best_den;
                }
                if (nump)
                        *nump = result_num;
                if (denp)
                        *denp = result_den;
        }
        return err;
}
EXPORT_SYMBOL(snd_interval_ratnum);

/**
 * snd_interval_ratden - refine the interval value
 * @i: interval to refine
 * @rats_count: number of struct ratden
 * @rats: struct ratden array
 * @nump: pointer to store the resultant numerator
 * @denp: pointer to store the resultant denominator
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
static int snd_interval_ratden(struct snd_interval *i,
                               unsigned int rats_count,
                               const struct snd_ratden *rats,
                               unsigned int *nump, unsigned int *denp)
{
        unsigned int best_num, best_diff, best_den;
        unsigned int k;
        struct snd_interval t;
        int err;

        best_num = best_den = best_diff = 0;
        for (k = 0; k < rats_count; ++k) {
                unsigned int num;
                unsigned int den = rats[k].den;
                unsigned int q = i->min;
                int diff;
                num = mul(q, den);
                if (num > rats[k].num_max)
                        continue;
                if (num < rats[k].num_min)
                        num = rats[k].num_max;
                else {
                        unsigned int r;
                        r = (num - rats[k].num_min) % rats[k].num_step;
                        if (r != 0)
                                num += rats[k].num_step - r;
                }
                diff = num - q * den;
                if (best_num == 0 ||
                    diff * best_den < best_diff * den) {
                        best_diff = diff;
                        best_den = den;
                        best_num = num;
                }
        }
        if (best_den == 0) {
                i->empty = 1;
                return -EINVAL;
        }
        t.min = div_down(best_num, best_den);
        t.openmin = !!(best_num % best_den);
        
        best_num = best_den = best_diff = 0;
        for (k = 0; k < rats_count; ++k) {
                unsigned int num;
                unsigned int den = rats[k].den;
                unsigned int q = i->max;
                int diff;
                num = mul(q, den);
                if (num < rats[k].num_min)
                        continue;
                if (num > rats[k].num_max)
                        num = rats[k].num_max;
                else {
                        unsigned int r;
                        r = (num - rats[k].num_min) % rats[k].num_step;
                        if (r != 0)
                                num -= r;
                }
                diff = q * den - num;
                if (best_num == 0 ||
                    diff * best_den < best_diff * den) {
                        best_diff = diff;
                        best_den = den;
                        best_num = num;
                }
        }
        if (best_den == 0) {
                i->empty = 1;
                return -EINVAL;
        }
        t.max = div_up(best_num, best_den);
        t.openmax = !!(best_num % best_den);
        t.integer = 0;
        err = snd_interval_refine(i, &t);
        if (err < 0)
                return err;

        if (snd_interval_single(i)) {
                if (nump)
                        *nump = best_num;
                if (denp)
                        *denp = best_den;
        }
        return err;
}

/**
 * snd_interval_list - refine the interval value from the list
 * @i: the interval value to refine
 * @count: the number of elements in the list
 * @list: the value list
 * @mask: the bit-mask to evaluate
 *
 * Refines the interval value from the list.
 * When mask is non-zero, only the elements corresponding to bit 1 are
 * evaluated.
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_interval_list(struct snd_interval *i, unsigned int count,
                      const unsigned int *list, unsigned int mask)
{
        unsigned int k;
        struct snd_interval list_range;

        if (!count) {
                i->empty = 1;
                return -EINVAL;
        }
        snd_interval_any(&list_range);
        list_range.min = UINT_MAX;
        list_range.max = 0;
        for (k = 0; k < count; k++) {
                if (mask && !(mask & (1 << k)))
                        continue;
                if (!snd_interval_test(i, list[k]))
                        continue;
                list_range.min = min(list_range.min, list[k]);
                list_range.max = max(list_range.max, list[k]);
        }
        return snd_interval_refine(i, &list_range);
}
EXPORT_SYMBOL(snd_interval_list);

/**
 * snd_interval_ranges - refine the interval value from the list of ranges
 * @i: the interval value to refine
 * @count: the number of elements in the list of ranges
 * @ranges: the ranges list
 * @mask: the bit-mask to evaluate
 *
 * Refines the interval value from the list of ranges.
 * When mask is non-zero, only the elements corresponding to bit 1 are
 * evaluated.
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_interval_ranges(struct snd_interval *i, unsigned int count,
                        const struct snd_interval *ranges, unsigned int mask)
{
        unsigned int k;
        struct snd_interval range_union;
        struct snd_interval range;

        if (!count) {
                snd_interval_none(i);
                return -EINVAL;
        }
        snd_interval_any(&range_union);
        range_union.min = UINT_MAX;
        range_union.max = 0;
        for (k = 0; k < count; k++) {
                if (mask && !(mask & (1 << k)))
                        continue;
                snd_interval_copy(&range, &ranges[k]);
                if (snd_interval_refine(&range, i) < 0)
                        continue;
                if (snd_interval_empty(&range))
                        continue;

                if (range.min < range_union.min) {
                        range_union.min = range.min;
                        range_union.openmin = 1;
                }
                if (range.min == range_union.min && !range.openmin)
                        range_union.openmin = 0;
                if (range.max > range_union.max) {
                        range_union.max = range.max;
                        range_union.openmax = 1;
                }
                if (range.max == range_union.max && !range.openmax)
                        range_union.openmax = 0;
        }
        return snd_interval_refine(i, &range_union);
}
EXPORT_SYMBOL(snd_interval_ranges);

static int snd_interval_step(struct snd_interval *i, unsigned int step)
{
        unsigned int n;
        int changed = 0;
        n = i->min % step;
        if (n != 0 || i->openmin) {
                i->min += step - n;
                i->openmin = 0;
                changed = 1;
        }
        n = i->max % step;
        if (n != 0 || i->openmax) {
                i->max -= n;
                i->openmax = 0;
                changed = 1;
        }
        if (snd_interval_checkempty(i)) {
                i->empty = 1;
                return -EINVAL;
        }
        return changed;
}

/* Info constraints helpers */

/**
 * snd_pcm_hw_rule_add - add the hw-constraint rule
 * @runtime: the pcm runtime instance
 * @cond: condition bits
 * @var: the variable to evaluate
 * @func: the evaluation function
 * @private: the private data pointer passed to function
 * @dep: the dependent variables
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
                        int var,
                        snd_pcm_hw_rule_func_t func, void *private,
                        int dep, ...)
{
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        struct snd_pcm_hw_rule *c;
        unsigned int k;
        va_list args;
        va_start(args, dep);
        if (constrs->rules_num >= constrs->rules_all) {
                struct snd_pcm_hw_rule *new;
                unsigned int new_rules = constrs->rules_all + 16;
                new = krealloc_array(constrs->rules, new_rules,
                                     sizeof(*c), GFP_KERNEL);
                if (!new) {
                        va_end(args);
                        return -ENOMEM;
                }
                constrs->rules = new;
                constrs->rules_all = new_rules;
        }
        c = &constrs->rules[constrs->rules_num];
        c->cond = cond;
        c->func = func;
        c->var = var;
        c->private = private;
        k = 0;
        while (1) {
                if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps))) {
                        va_end(args);
                        return -EINVAL;
                }
                c->deps[k++] = dep;
                if (dep < 0)
                        break;
                dep = va_arg(args, int);
        }
        constrs->rules_num++;
        va_end(args);
        return 0;
}
EXPORT_SYMBOL(snd_pcm_hw_rule_add);

/**
 * snd_pcm_hw_constraint_mask - apply the given bitmap mask constraint
 * @runtime: PCM runtime instance
 * @var: hw_params variable to apply the mask
 * @mask: the bitmap mask
 *
 * Apply the constraint of the given bitmap mask to a 32-bit mask parameter.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_mask(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
                               u_int32_t mask)
{
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        struct snd_mask *maskp = constrs_mask(constrs, var);
        *maskp->bits &= mask;
        memset(maskp->bits + 1, 0, (SNDRV_MASK_MAX-32) / 8); /* clear rest */
        if (*maskp->bits == 0)
                return -EINVAL;
        return 0;
}

/**
 * snd_pcm_hw_constraint_mask64 - apply the given bitmap mask constraint
 * @runtime: PCM runtime instance
 * @var: hw_params variable to apply the mask
 * @mask: the 64bit bitmap mask
 *
 * Apply the constraint of the given bitmap mask to a 64-bit mask parameter.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_mask64(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
                                 u_int64_t mask)
{
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        struct snd_mask *maskp = constrs_mask(constrs, var);
        maskp->bits[0] &= (u_int32_t)mask;
        maskp->bits[1] &= (u_int32_t)(mask >> 32);
        memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
        if (! maskp->bits[0] && ! maskp->bits[1])
                return -EINVAL;
        return 0;
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_mask64);

/**
 * snd_pcm_hw_constraint_integer - apply an integer constraint to an interval
 * @runtime: PCM runtime instance
 * @var: hw_params variable to apply the integer constraint
 *
 * Apply the constraint of integer to an interval parameter.
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_pcm_hw_constraint_integer(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var)
{
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        return snd_interval_setinteger(constrs_interval(constrs, var));
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_integer);

/**
 * snd_pcm_hw_constraint_minmax - apply a min/max range constraint to an interval
 * @runtime: PCM runtime instance
 * @var: hw_params variable to apply the range
 * @min: the minimal value
 * @max: the maximal value
 * 
 * Apply the min/max range constraint to an interval parameter.
 *
 * Return: Positive if the value is changed, zero if it's not changed, or a
 * negative error code.
 */
int snd_pcm_hw_constraint_minmax(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var,
                                 unsigned int min, unsigned int max)
{
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        struct snd_interval t;
        t.min = min;
        t.max = max;
        t.openmin = t.openmax = 0;
        t.integer = 0;
        return snd_interval_refine(constrs_interval(constrs, var), &t);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_minmax);

static int snd_pcm_hw_rule_list(struct snd_pcm_hw_params *params,
                                struct snd_pcm_hw_rule *rule)
{
        struct snd_pcm_hw_constraint_list *list = rule->private;
        return snd_interval_list(hw_param_interval(params, rule->var), list->count, list->list, list->mask);
}                


/**
 * snd_pcm_hw_constraint_list - apply a list of constraints to a parameter
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the list constraint
 * @l: list
 * 
 * Apply the list of constraints to an interval parameter.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_list(struct snd_pcm_runtime *runtime,
                               unsigned int cond,
                               snd_pcm_hw_param_t var,
                               const struct snd_pcm_hw_constraint_list *l)
{
        return snd_pcm_hw_rule_add(runtime, cond, var,
                                   snd_pcm_hw_rule_list, (void *)l,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_list);

static int snd_pcm_hw_rule_ranges(struct snd_pcm_hw_params *params,
                                  struct snd_pcm_hw_rule *rule)
{
        struct snd_pcm_hw_constraint_ranges *r = rule->private;
        return snd_interval_ranges(hw_param_interval(params, rule->var),
                                   r->count, r->ranges, r->mask);
}


/**
 * snd_pcm_hw_constraint_ranges - apply list of range constraints to a parameter
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the list of range constraints
 * @r: ranges
 *
 * Apply the list of range constraints to an interval parameter.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_ranges(struct snd_pcm_runtime *runtime,
                                 unsigned int cond,
                                 snd_pcm_hw_param_t var,
                                 const struct snd_pcm_hw_constraint_ranges *r)
{
        return snd_pcm_hw_rule_add(runtime, cond, var,
                                   snd_pcm_hw_rule_ranges, (void *)r,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_ranges);

static int snd_pcm_hw_rule_ratnums(struct snd_pcm_hw_params *params,
                                   struct snd_pcm_hw_rule *rule)
{
        const struct snd_pcm_hw_constraint_ratnums *r = rule->private;
        unsigned int num = 0, den = 0;
        int err;
        err = snd_interval_ratnum(hw_param_interval(params, rule->var),
                                  r->nrats, r->rats, &num, &den);
        if (err >= 0 && den && rule->var == SNDRV_PCM_HW_PARAM_RATE) {
                params->rate_num = num;
                params->rate_den = den;
        }
        return err;
}

/**
 * snd_pcm_hw_constraint_ratnums - apply ratnums constraint to a parameter
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the ratnums constraint
 * @r: struct snd_ratnums constriants
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_ratnums(struct snd_pcm_runtime *runtime, 
                                  unsigned int cond,
                                  snd_pcm_hw_param_t var,
                                  const struct snd_pcm_hw_constraint_ratnums *r)
{
        return snd_pcm_hw_rule_add(runtime, cond, var,
                                   snd_pcm_hw_rule_ratnums, (void *)r,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_ratnums);

static int snd_pcm_hw_rule_ratdens(struct snd_pcm_hw_params *params,
                                   struct snd_pcm_hw_rule *rule)
{
        const struct snd_pcm_hw_constraint_ratdens *r = rule->private;
        unsigned int num = 0, den = 0;
        int err = snd_interval_ratden(hw_param_interval(params, rule->var),
                                  r->nrats, r->rats, &num, &den);
        if (err >= 0 && den && rule->var == SNDRV_PCM_HW_PARAM_RATE) {
                params->rate_num = num;
                params->rate_den = den;
        }
        return err;
}

/**
 * snd_pcm_hw_constraint_ratdens - apply ratdens constraint to a parameter
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the ratdens constraint
 * @r: struct snd_ratdens constriants
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_ratdens(struct snd_pcm_runtime *runtime, 
                                  unsigned int cond,
                                  snd_pcm_hw_param_t var,
                                  const struct snd_pcm_hw_constraint_ratdens *r)
{
        return snd_pcm_hw_rule_add(runtime, cond, var,
                                   snd_pcm_hw_rule_ratdens, (void *)r,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_ratdens);

static int snd_pcm_hw_rule_msbits(struct snd_pcm_hw_params *params,
                                  struct snd_pcm_hw_rule *rule)
{
        unsigned int l = (unsigned long) rule->private;
        int width = l & 0xffff;
        unsigned int msbits = l >> 16;
        const struct snd_interval *i =
                hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS);

        if (!snd_interval_single(i))
                return 0;

        if ((snd_interval_value(i) == width) ||
            (width == 0 && snd_interval_value(i) > msbits))
                params->msbits = min_not_zero(params->msbits, msbits);

        return 0;
}

/**
 * snd_pcm_hw_constraint_msbits - add a hw constraint msbits rule
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @width: sample bits width
 * @msbits: msbits width
 *
 * This constraint will set the number of most significant bits (msbits) if a
 * sample format with the specified width has been select. If width is set to 0
 * the msbits will be set for any sample format with a width larger than the
 * specified msbits.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_msbits(struct snd_pcm_runtime *runtime, 
                                 unsigned int cond,
                                 unsigned int width,
                                 unsigned int msbits)
{
        unsigned long l = (msbits << 16) | width;
        return snd_pcm_hw_rule_add(runtime, cond, -1,
                                    snd_pcm_hw_rule_msbits,
                                    (void*) l,
                                    SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_msbits);

static int snd_pcm_hw_rule_step(struct snd_pcm_hw_params *params,
                                struct snd_pcm_hw_rule *rule)
{
        unsigned long step = (unsigned long) rule->private;
        return snd_interval_step(hw_param_interval(params, rule->var), step);
}

/**
 * snd_pcm_hw_constraint_step - add a hw constraint step rule
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the step constraint
 * @step: step size
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_step(struct snd_pcm_runtime *runtime,
                               unsigned int cond,
                               snd_pcm_hw_param_t var,
                               unsigned long step)
{
        return snd_pcm_hw_rule_add(runtime, cond, var, 
                                   snd_pcm_hw_rule_step, (void *) step,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_step);

static int snd_pcm_hw_rule_pow2(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule)
{
        static const unsigned int pow2_sizes[] = {
                1<<0, 1<<1, 1<<2, 1<<3, 1<<4, 1<<5, 1<<6, 1<<7,
                1<<8, 1<<9, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15,
                1<<16, 1<<17, 1<<18, 1<<19, 1<<20, 1<<21, 1<<22, 1<<23,
                1<<24, 1<<25, 1<<26, 1<<27, 1<<28, 1<<29, 1<<30
        };
        return snd_interval_list(hw_param_interval(params, rule->var),
                                 ARRAY_SIZE(pow2_sizes), pow2_sizes, 0);
}                

/**
 * snd_pcm_hw_constraint_pow2 - add a hw constraint power-of-2 rule
 * @runtime: PCM runtime instance
 * @cond: condition bits
 * @var: hw_params variable to apply the power-of-2 constraint
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_constraint_pow2(struct snd_pcm_runtime *runtime,
                               unsigned int cond,
                               snd_pcm_hw_param_t var)
{
        return snd_pcm_hw_rule_add(runtime, cond, var, 
                                   snd_pcm_hw_rule_pow2, NULL,
                                   var, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_constraint_pow2);

static int snd_pcm_hw_rule_noresample_func(struct snd_pcm_hw_params *params,
                                           struct snd_pcm_hw_rule *rule)
{
        unsigned int base_rate = (unsigned int)(uintptr_t)rule->private;
        struct snd_interval *rate;

        rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
        return snd_interval_list(rate, 1, &base_rate, 0);
}

/**
 * snd_pcm_hw_rule_noresample - add a rule to allow disabling hw resampling
 * @runtime: PCM runtime instance
 * @base_rate: the rate at which the hardware does not resample
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_hw_rule_noresample(struct snd_pcm_runtime *runtime,
                               unsigned int base_rate)
{
        return snd_pcm_hw_rule_add(runtime, SNDRV_PCM_HW_PARAMS_NORESAMPLE,
                                   SNDRV_PCM_HW_PARAM_RATE,
                                   snd_pcm_hw_rule_noresample_func,
                                   (void *)(uintptr_t)base_rate,
                                   SNDRV_PCM_HW_PARAM_RATE, -1);
}
EXPORT_SYMBOL(snd_pcm_hw_rule_noresample);

static void _snd_pcm_hw_param_any(struct snd_pcm_hw_params *params,
                                  snd_pcm_hw_param_t var)
{
        if (hw_is_mask(var)) {
                snd_mask_any(hw_param_mask(params, var));
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
                return;
        }
        if (hw_is_interval(var)) {
                snd_interval_any(hw_param_interval(params, var));
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
                return;
        }
        snd_BUG();
}

void _snd_pcm_hw_params_any(struct snd_pcm_hw_params *params)
{
        unsigned int k;
        memset(params, 0, sizeof(*params));
        for (k = SNDRV_PCM_HW_PARAM_FIRST_MASK; k <= SNDRV_PCM_HW_PARAM_LAST_MASK; k++)
                _snd_pcm_hw_param_any(params, k);
        for (k = SNDRV_PCM_HW_PARAM_FIRST_INTERVAL; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++)
                _snd_pcm_hw_param_any(params, k);
        params->info = ~0U;
}
EXPORT_SYMBOL(_snd_pcm_hw_params_any);

/**
 * snd_pcm_hw_param_value - return @params field @var value
 * @params: the hw_params instance
 * @var: parameter to retrieve
 * @dir: pointer to the direction (-1,0,1) or %NULL
 *
 * Return: The value for field @var if it's fixed in configuration space
 * defined by @params. -%EINVAL otherwise.
 */
int snd_pcm_hw_param_value(const struct snd_pcm_hw_params *params,
                           snd_pcm_hw_param_t var, int *dir)
{
        if (hw_is_mask(var)) {
                const struct snd_mask *mask = hw_param_mask_c(params, var);
                if (!snd_mask_single(mask))
                        return -EINVAL;
                if (dir)
                        *dir = 0;
                return snd_mask_value(mask);
        }
        if (hw_is_interval(var)) {
                const struct snd_interval *i = hw_param_interval_c(params, var);
                if (!snd_interval_single(i))
                        return -EINVAL;
                if (dir)
                        *dir = i->openmin;
                return snd_interval_value(i);
        }
        return -EINVAL;
}
EXPORT_SYMBOL(snd_pcm_hw_param_value);

void _snd_pcm_hw_param_setempty(struct snd_pcm_hw_params *params,
                                snd_pcm_hw_param_t var)
{
        if (hw_is_mask(var)) {
                snd_mask_none(hw_param_mask(params, var));
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
        } else if (hw_is_interval(var)) {
                snd_interval_none(hw_param_interval(params, var));
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
        } else {
                snd_BUG();
        }
}
EXPORT_SYMBOL(_snd_pcm_hw_param_setempty);

static int _snd_pcm_hw_param_first(struct snd_pcm_hw_params *params,
                                   snd_pcm_hw_param_t var)
{
        int changed;
        if (hw_is_mask(var))
                changed = snd_mask_refine_first(hw_param_mask(params, var));
        else if (hw_is_interval(var))
                changed = snd_interval_refine_first(hw_param_interval(params, var));
        else
                return -EINVAL;
        if (changed > 0) {
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
        }
        return changed;
}


/**
 * snd_pcm_hw_param_first - refine config space and return minimum value
 * @pcm: PCM instance
 * @params: the hw_params instance
 * @var: parameter to retrieve
 * @dir: pointer to the direction (-1,0,1) or %NULL
 *
 * Inside configuration space defined by @params remove from @var all
 * values > minimum. Reduce configuration space accordingly.
 *
 * Return: The minimum, or a negative error code on failure.
 */
int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, 
                           struct snd_pcm_hw_params *params, 
                           snd_pcm_hw_param_t var, int *dir)
{
        int changed = _snd_pcm_hw_param_first(params, var);
        if (changed < 0)
                return changed;
        if (params->rmask) {
                int err = snd_pcm_hw_refine(pcm, params);
                if (err < 0)
                        return err;
        }
        return snd_pcm_hw_param_value(params, var, dir);
}
EXPORT_SYMBOL(snd_pcm_hw_param_first);

static int _snd_pcm_hw_param_last(struct snd_pcm_hw_params *params,
                                  snd_pcm_hw_param_t var)
{
        int changed;
        if (hw_is_mask(var))
                changed = snd_mask_refine_last(hw_param_mask(params, var));
        else if (hw_is_interval(var))
                changed = snd_interval_refine_last(hw_param_interval(params, var));
        else
                return -EINVAL;
        if (changed > 0) {
                params->cmask |= 1 << var;
                params->rmask |= 1 << var;
        }
        return changed;
}


/**
 * snd_pcm_hw_param_last - refine config space and return maximum value
 * @pcm: PCM instance
 * @params: the hw_params instance
 * @var: parameter to retrieve
 * @dir: pointer to the direction (-1,0,1) or %NULL
 *
 * Inside configuration space defined by @params remove from @var all
 * values < maximum. Reduce configuration space accordingly.
 *
 * Return: The maximum, or a negative error code on failure.
 */
int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, 
                          struct snd_pcm_hw_params *params,
                          snd_pcm_hw_param_t var, int *dir)
{
        int changed = _snd_pcm_hw_param_last(params, var);
        if (changed < 0)
                return changed;
        if (params->rmask) {
                int err = snd_pcm_hw_refine(pcm, params);
                if (err < 0)
                        return err;
        }
        return snd_pcm_hw_param_value(params, var, dir);
}
EXPORT_SYMBOL(snd_pcm_hw_param_last);

/**
 * snd_pcm_hw_params_bits - Get the number of bits per the sample.
 * @p: hardware parameters
 *
 * Return: The number of bits per sample based on the format,
 * subformat and msbits the specified hw params has.
 */
int snd_pcm_hw_params_bits(const struct snd_pcm_hw_params *p)
{
        snd_pcm_subformat_t subformat = params_subformat(p);
        snd_pcm_format_t format = params_format(p);

        switch (format) {
        case SNDRV_PCM_FORMAT_S32_LE:
        case SNDRV_PCM_FORMAT_U32_LE:
        case SNDRV_PCM_FORMAT_S32_BE:
        case SNDRV_PCM_FORMAT_U32_BE:
                switch (subformat) {
                case SNDRV_PCM_SUBFORMAT_MSBITS_20:
                        return 20;
                case SNDRV_PCM_SUBFORMAT_MSBITS_24:
                        return 24;
                case SNDRV_PCM_SUBFORMAT_MSBITS_MAX:
                case SNDRV_PCM_SUBFORMAT_STD:
                default:
                        break;
                }
                fallthrough;
        default:
                return snd_pcm_format_width(format);
        }
}
EXPORT_SYMBOL(snd_pcm_hw_params_bits);

static int snd_pcm_lib_ioctl_reset(struct snd_pcm_substream *substream,
                                   void *arg)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        guard(pcm_stream_lock_irqsave)(substream);
        if (snd_pcm_running(substream) &&
            snd_pcm_update_hw_ptr(substream) >= 0)
                runtime->status->hw_ptr %= runtime->buffer_size;
        else {
                runtime->status->hw_ptr = 0;
                runtime->hw_ptr_wrap = 0;
        }
        return 0;
}

static int snd_pcm_lib_ioctl_channel_info(struct snd_pcm_substream *substream,
                                          void *arg)
{
        struct snd_pcm_channel_info *info = arg;
        struct snd_pcm_runtime *runtime = substream->runtime;
        int width;
        if (!(runtime->info & SNDRV_PCM_INFO_MMAP)) {
                info->offset = -1;
                return 0;
        }
        width = snd_pcm_format_physical_width(runtime->format);
        if (width < 0)
                return width;
        info->offset = 0;
        switch (runtime->access) {
        case SNDRV_PCM_ACCESS_MMAP_INTERLEAVED:
        case SNDRV_PCM_ACCESS_RW_INTERLEAVED:
                info->first = info->channel * width;
                info->step = runtime->channels * width;
                break;
        case SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED:
        case SNDRV_PCM_ACCESS_RW_NONINTERLEAVED:
        {
                size_t size = runtime->dma_bytes / runtime->channels;
                info->first = info->channel * size * 8;
                info->step = width;
                break;
        }
        default:
                snd_BUG();
                break;
        }
        return 0;
}

static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream,
                                       void *arg)
{
        struct snd_pcm_hw_params *params = arg;
        snd_pcm_format_t format;
        int channels;
        ssize_t frame_size;

        params->fifo_size = substream->runtime->hw.fifo_size;
        if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) {
                format = params_format(params);
                channels = params_channels(params);
                frame_size = snd_pcm_format_size(format, channels);
                if (frame_size > 0)
                        params->fifo_size /= frame_size;
        }
        return 0;
}

/**
 * snd_pcm_lib_ioctl - a generic PCM ioctl callback
 * @substream: the pcm substream instance
 * @cmd: ioctl command
 * @arg: ioctl argument
 *
 * Processes the generic ioctl commands for PCM.
 * Can be passed as the ioctl callback for PCM ops.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream,
                      unsigned int cmd, void *arg)
{
        switch (cmd) {
        case SNDRV_PCM_IOCTL1_RESET:
                return snd_pcm_lib_ioctl_reset(substream, arg);
        case SNDRV_PCM_IOCTL1_CHANNEL_INFO:
                return snd_pcm_lib_ioctl_channel_info(substream, arg);
        case SNDRV_PCM_IOCTL1_FIFO_SIZE:
                return snd_pcm_lib_ioctl_fifo_size(substream, arg);
        }
        return -ENXIO;
}
EXPORT_SYMBOL(snd_pcm_lib_ioctl);

/**
 * snd_pcm_period_elapsed_under_stream_lock() - update the status of runtime for the next period
 *                                                under acquired lock of PCM substream.
 * @substream: the instance of pcm substream.
 *
 * This function is called when the batch of audio data frames as the same size as the period of
 * buffer is already processed in audio data transmission.
 *
 * The call of function updates the status of runtime with the latest position of audio data
 * transmission, checks overrun and underrun over buffer, awaken user processes from waiting for
 * available audio data frames, sampling audio timestamp, and performs stop or drain the PCM
 * substream according to configured threshold.
 *
 * The function is intended to use for the case that PCM driver operates audio data frames under
 * acquired lock of PCM substream; e.g. in callback of any operation of &snd_pcm_ops in process
 * context. In any interrupt context, it's preferrable to use ``snd_pcm_period_elapsed()`` instead
 * since lock of PCM substream should be acquired in advance.
 *
 * Developer should pay enough attention that some callbacks in &snd_pcm_ops are done by the call of
 * function:
 *
 * - .pointer - to retrieve current position of audio data transmission by frame count or XRUN state.
 * - .trigger - with SNDRV_PCM_TRIGGER_STOP at XRUN or DRAINING state.
 * - .get_time_info - to retrieve audio time stamp if needed.
 *
 * Even if more than one periods have elapsed since the last call, you have to call this only once.
 */
void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;

        if (PCM_RUNTIME_CHECK(substream))
                return;
        runtime = substream->runtime;

        if (!snd_pcm_running(substream) ||
            snd_pcm_update_hw_ptr0(substream, 1) < 0)
                goto _end;

#ifdef CONFIG_SND_PCM_TIMER
        if (substream->timer_running)
                snd_timer_interrupt(substream->timer, 1);
#endif
 _end:
        snd_kill_fasync(runtime->fasync, SIGIO, POLL_IN);
}
EXPORT_SYMBOL(snd_pcm_period_elapsed_under_stream_lock);

/**
 * snd_pcm_period_elapsed() - update the status of runtime for the next period by acquiring lock of
 *                              PCM substream.
 * @substream: the instance of PCM substream.
 *
 * This function is mostly similar to ``snd_pcm_period_elapsed_under_stream_lock()`` except for
 * acquiring lock of PCM substream voluntarily.
 *
 * It's typically called by any type of IRQ handler when hardware IRQ occurs to notify event that
 * the batch of audio data frames as the same size as the period of buffer is already processed in
 * audio data transmission.
 */
void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
{
        if (snd_BUG_ON(!substream))
                return;

        guard(pcm_stream_lock_irqsave)(substream);
        snd_pcm_period_elapsed_under_stream_lock(substream);
}
EXPORT_SYMBOL(snd_pcm_period_elapsed);

/*
 * Wait until avail_min data becomes available
 * Returns a negative error code if any error occurs during operation.
 * The available space is stored on availp.  When err = 0 and avail = 0
 * on the capture stream, it indicates the stream is in DRAINING state.
 */
static int wait_for_avail(struct snd_pcm_substream *substream,
                              snd_pcm_uframes_t *availp)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        int is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
        wait_queue_entry_t wait;
        int err = 0;
        snd_pcm_uframes_t avail = 0;
        long wait_time, tout;

        init_waitqueue_entry(&wait, current);
        set_current_state(TASK_INTERRUPTIBLE);
        add_wait_queue(&runtime->tsleep, &wait);

        if (runtime->no_period_wakeup)
                wait_time = MAX_SCHEDULE_TIMEOUT;
        else {
                /* use wait time from substream if available */
                if (substream->wait_time) {
                        wait_time = substream->wait_time;
                } else {
                        wait_time = 100;

                        if (runtime->rate) {
                                long t = runtime->buffer_size * 1100 / runtime->rate;
                                wait_time = max(t, wait_time);
                        }
                }
                wait_time = msecs_to_jiffies(wait_time);
        }

        for (;;) {
                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        break;
                }

                /*
                 * We need to check if space became available already
                 * (and thus the wakeup happened already) first to close
                 * the race of space already having become available.
                 * This check must happen after been added to the waitqueue
                 * and having current state be INTERRUPTIBLE.
                 */
                avail = snd_pcm_avail(substream);
                if (avail >= runtime->twake)
                        break;
                snd_pcm_stream_unlock_irq(substream);

                tout = schedule_timeout(wait_time);

                snd_pcm_stream_lock_irq(substream);
                set_current_state(TASK_INTERRUPTIBLE);
                switch (runtime->state) {
                case SNDRV_PCM_STATE_SUSPENDED:
                        err = -ESTRPIPE;
                        goto _endloop;
                case SNDRV_PCM_STATE_XRUN:
                        err = -EPIPE;
                        goto _endloop;
                case SNDRV_PCM_STATE_DRAINING:
                        if (is_playback)
                                err = -EPIPE;
                        else 
                                avail = 0; /* indicate draining */
                        goto _endloop;
                case SNDRV_PCM_STATE_OPEN:
                case SNDRV_PCM_STATE_SETUP:
                case SNDRV_PCM_STATE_DISCONNECTED:
                        err = -EBADFD;
                        goto _endloop;
                case SNDRV_PCM_STATE_PAUSED:
                        continue;
                }
                if (!tout) {
                        pcm_dbg(substream->pcm,
                                "%s timeout (DMA or IRQ trouble?)\n",
                                is_playback ? "playback write" : "capture read");
                        err = -EIO;
                        break;
                }
        }
 _endloop:
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&runtime->tsleep, &wait);
        *availp = avail;
        return err;
}
        
typedef int (*pcm_transfer_f)(struct snd_pcm_substream *substream,
                              int channel, unsigned long hwoff,
                              struct iov_iter *iter, unsigned long bytes);

typedef int (*pcm_copy_f)(struct snd_pcm_substream *, snd_pcm_uframes_t, void *,
                          snd_pcm_uframes_t, snd_pcm_uframes_t, pcm_transfer_f,
                          bool);

/* calculate the target DMA-buffer position to be written/read */
static void *get_dma_ptr(struct snd_pcm_runtime *runtime,
                           int channel, unsigned long hwoff)
{
        return runtime->dma_area + hwoff +
                channel * (runtime->dma_bytes / runtime->channels);
}

/* default copy ops for write; used for both interleaved and non- modes */
static int default_write_copy(struct snd_pcm_substream *substream,
                              int channel, unsigned long hwoff,
                              struct iov_iter *iter, unsigned long bytes)
{
        if (copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
                           bytes, iter) != bytes)
                return -EFAULT;
        return 0;
}

/* fill silence instead of copy data; called as a transfer helper
 * from __snd_pcm_lib_write() or directly from noninterleaved_copy() when
 * a NULL buffer is passed
 */
static int fill_silence(struct snd_pcm_substream *substream, int channel,
                        unsigned long hwoff, struct iov_iter *iter,
                        unsigned long bytes)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        if (substream->stream != SNDRV_PCM_STREAM_PLAYBACK)
                return 0;
        if (substream->ops->fill_silence)
                return substream->ops->fill_silence(substream, channel,
                                                    hwoff, bytes);

        snd_pcm_format_set_silence(runtime->format,
                                   get_dma_ptr(runtime, channel, hwoff),
                                   bytes_to_samples(runtime, bytes));
        return 0;
}

/* default copy ops for read; used for both interleaved and non- modes */
static int default_read_copy(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
                             struct iov_iter *iter, unsigned long bytes)
{
        if (copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
                         bytes, iter) != bytes)
                return -EFAULT;
        return 0;
}

/* call transfer with the filled iov_iter */
static int do_transfer(struct snd_pcm_substream *substream, int c,
                       unsigned long hwoff, void *data, unsigned long bytes,
                       pcm_transfer_f transfer, bool in_kernel)
{
        struct iov_iter iter;
        int err, type;

        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                type = ITER_SOURCE;
        else
                type = ITER_DEST;

        if (in_kernel) {
                struct kvec kvec = { data, bytes };

                iov_iter_kvec(&iter, type, &kvec, 1, bytes);
                return transfer(substream, c, hwoff, &iter, bytes);
        }

        err = import_ubuf(type, (__force void __user *)data, bytes, &iter);
        if (err)
                return err;
        return transfer(substream, c, hwoff, &iter, bytes);
}

/* call transfer function with the converted pointers and sizes;
 * for interleaved mode, it's one shot for all samples
 */
static int interleaved_copy(struct snd_pcm_substream *substream,
                            snd_pcm_uframes_t hwoff, void *data,
                            snd_pcm_uframes_t off,
                            snd_pcm_uframes_t frames,
                            pcm_transfer_f transfer,
                            bool in_kernel)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        /* convert to bytes */
        hwoff = frames_to_bytes(runtime, hwoff);
        off = frames_to_bytes(runtime, off);
        frames = frames_to_bytes(runtime, frames);

        return do_transfer(substream, 0, hwoff, data + off, frames, transfer,
                           in_kernel);
}

/* call transfer function with the converted pointers and sizes for each
 * non-interleaved channel; when buffer is NULL, silencing instead of copying
 */
static int noninterleaved_copy(struct snd_pcm_substream *substream,
                               snd_pcm_uframes_t hwoff, void *data,
                               snd_pcm_uframes_t off,
                               snd_pcm_uframes_t frames,
                               pcm_transfer_f transfer,
                               bool in_kernel)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        int channels = runtime->channels;
        void **bufs = data;
        int c, err;

        /* convert to bytes; note that it's not frames_to_bytes() here.
         * in non-interleaved mode, we copy for each channel, thus
         * each copy is n_samples bytes x channels = whole frames.
         */
        off = samples_to_bytes(runtime, off);
        frames = samples_to_bytes(runtime, frames);
        hwoff = samples_to_bytes(runtime, hwoff);
        for (c = 0; c < channels; ++c, ++bufs) {
                if (!data || !*bufs)
                        err = fill_silence(substream, c, hwoff, NULL, frames);
                else
                        err = do_transfer(substream, c, hwoff, *bufs + off,
                                          frames, transfer, in_kernel);
                if (err < 0)
                        return err;
        }
        return 0;
}

/* fill silence on the given buffer position;
 * called from snd_pcm_playback_silence()
 */
static int fill_silence_frames(struct snd_pcm_substream *substream,
                               snd_pcm_uframes_t off, snd_pcm_uframes_t frames)
{
        if (substream->runtime->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED ||
            substream->runtime->access == SNDRV_PCM_ACCESS_MMAP_INTERLEAVED)
                return interleaved_copy(substream, off, NULL, 0, frames,
                                        fill_silence, true);
        else
                return noninterleaved_copy(substream, off, NULL, 0, frames,
                                           fill_silence, true);
}

/* sanity-check for read/write methods */
static int pcm_sanity_check(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (snd_BUG_ON(!substream->ops->copy && !runtime->dma_area))
                return -EINVAL;
        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;
        return 0;
}

static int pcm_accessible_state(struct snd_pcm_runtime *runtime)
{
        switch (runtime->state) {
        case SNDRV_PCM_STATE_PREPARED:
        case SNDRV_PCM_STATE_RUNNING:
        case SNDRV_PCM_STATE_PAUSED:
                return 0;
        case SNDRV_PCM_STATE_XRUN:
                return -EPIPE;
        case SNDRV_PCM_STATE_SUSPENDED:
                return -ESTRPIPE;
        default:
                return -EBADFD;
        }
}

/* update to the given appl_ptr and call ack callback if needed;
 * when an error is returned, take back to the original value
 */
int pcm_lib_apply_appl_ptr(struct snd_pcm_substream *substream,
                           snd_pcm_uframes_t appl_ptr)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_uframes_t old_appl_ptr = runtime->control->appl_ptr;
        snd_pcm_sframes_t diff;
        int ret;

        if (old_appl_ptr == appl_ptr)
                return 0;

        if (appl_ptr >= runtime->boundary)
                return -EINVAL;
        /*
         * check if a rewind is requested by the application
         */
        if (substream->runtime->info & SNDRV_PCM_INFO_NO_REWINDS) {
                diff = appl_ptr - old_appl_ptr;
                if (diff >= 0) {
                        if (diff > runtime->buffer_size)
                                return -EINVAL;
                } else {
                        if (runtime->boundary + diff > runtime->buffer_size)
                                return -EINVAL;
                }
        }

        runtime->control->appl_ptr = appl_ptr;
        if (substream->ops->ack) {
                ret = substream->ops->ack(substream);
                if (ret < 0) {
                        runtime->control->appl_ptr = old_appl_ptr;
                        if (ret == -EPIPE)
                                __snd_pcm_xrun(substream);
                        return ret;
                }
        }

        trace_applptr(substream, old_appl_ptr, appl_ptr);

        return 0;
}

/* the common loop for read/write data */
snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream,
                                     void *data, bool interleaved,
                                     snd_pcm_uframes_t size, bool in_kernel)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_uframes_t xfer = 0;
        snd_pcm_uframes_t offset = 0;
        snd_pcm_uframes_t avail;
        pcm_copy_f writer;
        pcm_transfer_f transfer;
        bool nonblock;
        bool is_playback;
        int err;

        err = pcm_sanity_check(substream);
        if (err < 0)
                return err;

        is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
        if (interleaved) {
                if (runtime->access != SNDRV_PCM_ACCESS_RW_INTERLEAVED &&
                    runtime->channels > 1)
                        return -EINVAL;
                writer = interleaved_copy;
        } else {
                if (runtime->access != SNDRV_PCM_ACCESS_RW_NONINTERLEAVED)
                        return -EINVAL;
                writer = noninterleaved_copy;
        }

        if (!data) {
                if (is_playback)
                        transfer = fill_silence;
                else
                        return -EINVAL;
        } else {
                if (substream->ops->copy)
                        transfer = substream->ops->copy;
                else
                        transfer = is_playback ?
                                default_write_copy : default_read_copy;
        }

        if (size == 0)
                return 0;

        nonblock = !!(substream->f_flags & O_NONBLOCK);

        snd_pcm_stream_lock_irq(substream);
        err = pcm_accessible_state(runtime);
        if (err < 0)
                goto _end_unlock;

        runtime->twake = runtime->control->avail_min ? : 1;
        if (runtime->state == SNDRV_PCM_STATE_RUNNING)
                snd_pcm_update_hw_ptr(substream);

        /*
         * If size < start_threshold, wait indefinitely. Another
         * thread may start capture
         */
        if (!is_playback &&
            runtime->state == SNDRV_PCM_STATE_PREPARED &&
            size >= runtime->start_threshold) {
                err = snd_pcm_start(substream);
                if (err < 0)
                        goto _end_unlock;
        }

        avail = snd_pcm_avail(substream);

        while (size > 0) {
                snd_pcm_uframes_t frames, appl_ptr, appl_ofs;
                snd_pcm_uframes_t cont;
                if (!avail) {
                        if (!is_playback &&
                            runtime->state == SNDRV_PCM_STATE_DRAINING) {
                                snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP);
                                goto _end_unlock;
                        }
                        if (nonblock) {
                                err = -EAGAIN;
                                goto _end_unlock;
                        }
                        runtime->twake = min_t(snd_pcm_uframes_t, size,
                                        runtime->control->avail_min ? : 1);
                        err = wait_for_avail(substream, &avail);
                        if (err < 0)
                                goto _end_unlock;
                        if (!avail)
                                continue; /* draining */
                }
                frames = size > avail ? avail : size;
                appl_ptr = READ_ONCE(runtime->control->appl_ptr);
                appl_ofs = appl_ptr % runtime->buffer_size;
                cont = runtime->buffer_size - appl_ofs;
                if (frames > cont)
                        frames = cont;
                if (snd_BUG_ON(!frames)) {
                        err = -EINVAL;
                        goto _end_unlock;
                }
                if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) {
                        err = -EBUSY;
                        goto _end_unlock;
                }
                snd_pcm_stream_unlock_irq(substream);
                if (!is_playback)
                        snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU);
                err = writer(substream, appl_ofs, data, offset, frames,
                             transfer, in_kernel);
                if (is_playback)
                        snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
                snd_pcm_stream_lock_irq(substream);
                atomic_dec(&runtime->buffer_accessing);
                if (err < 0)
                        goto _end_unlock;
                err = pcm_accessible_state(runtime);
                if (err < 0)
                        goto _end_unlock;
                appl_ptr += frames;
                if (appl_ptr >= runtime->boundary)
                        appl_ptr -= runtime->boundary;
                err = pcm_lib_apply_appl_ptr(substream, appl_ptr);
                if (err < 0)
                        goto _end_unlock;

                offset += frames;
                size -= frames;
                xfer += frames;
                avail -= frames;
                if (is_playback &&
                    runtime->state == SNDRV_PCM_STATE_PREPARED &&
                    snd_pcm_playback_hw_avail(runtime) >= (snd_pcm_sframes_t)runtime->start_threshold) {
                        err = snd_pcm_start(substream);
                        if (err < 0)
                                goto _end_unlock;
                }
        }
 _end_unlock:
        runtime->twake = 0;
        if (xfer > 0 && err >= 0)
                snd_pcm_update_state(substream, runtime);
        snd_pcm_stream_unlock_irq(substream);
        return xfer > 0 ? (snd_pcm_sframes_t)xfer : err;
}
EXPORT_SYMBOL(__snd_pcm_lib_xfer);

/*
 * standard channel mapping helpers
 */

/* default channel maps for multi-channel playbacks, up to 8 channels */
const struct snd_pcm_chmap_elem snd_pcm_std_chmaps[] = {
        { .channels = 1,
          .map = { SNDRV_CHMAP_MONO } },
        { .channels = 2,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR } },
        { .channels = 4,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR } },
        { .channels = 6,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR,
                   SNDRV_CHMAP_FC, SNDRV_CHMAP_LFE } },
        { .channels = 8,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR,
                   SNDRV_CHMAP_FC, SNDRV_CHMAP_LFE,
                   SNDRV_CHMAP_SL, SNDRV_CHMAP_SR } },
        { }
};
EXPORT_SYMBOL_GPL(snd_pcm_std_chmaps);

/* alternative channel maps with CLFE <-> surround swapped for 6/8 channels */
const struct snd_pcm_chmap_elem snd_pcm_alt_chmaps[] = {
        { .channels = 1,
          .map = { SNDRV_CHMAP_MONO } },
        { .channels = 2,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR } },
        { .channels = 4,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR } },
        { .channels = 6,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_FC, SNDRV_CHMAP_LFE,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR } },
        { .channels = 8,
          .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
                   SNDRV_CHMAP_FC, SNDRV_CHMAP_LFE,
                   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR,
                   SNDRV_CHMAP_SL, SNDRV_CHMAP_SR } },
        { }
};
EXPORT_SYMBOL_GPL(snd_pcm_alt_chmaps);

static bool valid_chmap_channels(const struct snd_pcm_chmap *info, int ch)
{
        if (ch > info->max_channels)
                return false;
        return !info->channel_mask || (info->channel_mask & (1U << ch));
}

static int pcm_chmap_ctl_info(struct snd_kcontrol *kcontrol,
                              struct snd_ctl_elem_info *uinfo)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);

        uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
        uinfo->count = info->max_channels;
        uinfo->value.integer.min = 0;
        uinfo->value.integer.max = SNDRV_CHMAP_LAST;
        return 0;
}

/* get callback for channel map ctl element
 * stores the channel position firstly matching with the current channels
 */
static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
                             struct snd_ctl_elem_value *ucontrol)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        unsigned int idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
        struct snd_pcm_substream *substream;
        const struct snd_pcm_chmap_elem *map;

        if (!info->chmap)
                return -EINVAL;
        substream = snd_pcm_chmap_substream(info, idx);
        if (!substream)
                return -ENODEV;
        memset(ucontrol->value.integer.value, 0,
               sizeof(long) * info->max_channels);
        if (!substream->runtime)
                return 0; /* no channels set */
        for (map = info->chmap; map->channels; map++) {
                int i;
                if (map->channels == substream->runtime->channels &&
                    valid_chmap_channels(info, map->channels)) {
                        for (i = 0; i < map->channels; i++)
                                ucontrol->value.integer.value[i] = map->map[i];
                        return 0;
                }
        }
        return -EINVAL;
}

/* tlv callback for channel map ctl element
 * expands the pre-defined channel maps in a form of TLV
 */
static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
                             unsigned int size, unsigned int __user *tlv)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        const struct snd_pcm_chmap_elem *map;
        unsigned int __user *dst;
        int c, count = 0;

        if (!info->chmap)
                return -EINVAL;
        if (size < 8)
                return -ENOMEM;
        if (put_user(SNDRV_CTL_TLVT_CONTAINER, tlv))
                return -EFAULT;
        size -= 8;
        dst = tlv + 2;
        for (map = info->chmap; map->channels; map++) {
                int chs_bytes = map->channels * 4;
                if (!valid_chmap_channels(info, map->channels))
                        continue;
                if (size < 8)
                        return -ENOMEM;
                if (put_user(SNDRV_CTL_TLVT_CHMAP_FIXED, dst) ||
                    put_user(chs_bytes, dst + 1))
                        return -EFAULT;
                dst += 2;
                size -= 8;
                count += 8;
                if (size < chs_bytes)
                        return -ENOMEM;
                size -= chs_bytes;
                count += chs_bytes;
                for (c = 0; c < map->channels; c++) {
                        if (put_user(map->map[c], dst))
                                return -EFAULT;
                        dst++;
                }
        }
        if (put_user(count, tlv + 1))
                return -EFAULT;
        return 0;
}

static void pcm_chmap_ctl_private_free(struct snd_kcontrol *kcontrol)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        info->pcm->streams[info->stream].chmap_kctl = NULL;
        kfree(info);
}

/**
 * snd_pcm_add_chmap_ctls - create channel-mapping control elements
 * @pcm: the assigned PCM instance
 * @stream: stream direction
 * @chmap: channel map elements (for query)
 * @max_channels: the max number of channels for the stream
 * @private_value: the value passed to each kcontrol's private_value field
 * @info_ret: store struct snd_pcm_chmap instance if non-NULL
 *
 * Create channel-mapping control elements assigned to the given PCM stream(s).
 * Return: Zero if successful, or a negative error value.
 */
int snd_pcm_add_chmap_ctls(struct snd_pcm *pcm, int stream,
                           const struct snd_pcm_chmap_elem *chmap,
                           int max_channels,
                           unsigned long private_value,
                           struct snd_pcm_chmap **info_ret)
{
        struct snd_pcm_chmap *info;
        struct snd_kcontrol_new knew = {
                .iface = SNDRV_CTL_ELEM_IFACE_PCM,
                .access = SNDRV_CTL_ELEM_ACCESS_READ |
                        SNDRV_CTL_ELEM_ACCESS_TLV_READ |
                        SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK,
                .info = pcm_chmap_ctl_info,
                .get = pcm_chmap_ctl_get,
                .tlv.c = pcm_chmap_ctl_tlv,
        };
        int err;

        if (WARN_ON(pcm->streams[stream].chmap_kctl))
                return -EBUSY;
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
                return -ENOMEM;
        info->pcm = pcm;
        info->stream = stream;
        info->chmap = chmap;
        info->max_channels = max_channels;
        if (stream == SNDRV_PCM_STREAM_PLAYBACK)
                knew.name = "Playback Channel Map";
        else
                knew.name = "Capture Channel Map";
        knew.device = pcm->device;
        knew.count = pcm->streams[stream].substream_count;
        knew.private_value = private_value;
        info->kctl = snd_ctl_new1(&knew, info);
        if (!info->kctl) {
                kfree(info);
                return -ENOMEM;
        }
        info->kctl->private_free = pcm_chmap_ctl_private_free;
        err = snd_ctl_add(pcm->card, info->kctl);
        if (err < 0)
                return err;
        pcm->streams[stream].chmap_kctl = info->kctl;
        if (info_ret)
                *info_ret = info;
        return 0;
}
EXPORT_SYMBOL_GPL(snd_pcm_add_chmap_ctls);
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   10 




















































































































































































   10 



   10 

   10 





   10 
   10 
   10 

   10 










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600
9601
9602
9603
9604
9605
9606
9607
9608
9609
9610
9611
9612
9613
9614
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
9767
9768
9769
9770
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868
9869
9870
9871
9872
9873
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
9893
9894
9895
9896
9897
9898
9899
9900
9901
9902
9903
9904
9905
9906
9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938
9939
9940
9941
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
10028
10029
10030
10031
10032
10033
10034
10035
10036
10037
10038
10039
10040
10041
10042
10043
10044
10045
10046
10047
10048
10049
10050
10051
10052
10053
10054
10055
10056
10057
10058
10059
10060
10061
10062
10063
10064
10065
10066
10067
10068
10069
10070
10071
10072
10073
10074
10075
10076
10077
10078
10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10193
10194
10195
10196
10197
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
10250
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271
10272
10273
10274
10275
10276
10277
10278
10279
10280
10281
10282
10283
10284
10285
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
10336
10337
10338
10339
10340
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359
10360
10361
10362
10363
10364
10365
10366
10367
10368
10369
10370
10371
10372
10373
10374
10375
10376
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388
10389
10390
10391
10392
10393
10394
10395
10396
10397
10398
10399
10400
10401
10402
10403
10404
10405
10406
10407
10408
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419
10420
10421
10422
10423
10424
10425
10426
10427
10428
10429
10430
10431
10432
10433
10434
10435
10436
10437
10438
10439
10440
10441
10442
10443
10444
10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457
10458
10459
10460
10461
10462
10463
10464
10465
10466
10467
10468
10469
10470
10471
10472
10473
10474
10475
10476
10477
10478
10479
10480
10481
10482
10483
10484
10485
10486
10487
10488
10489
10490
10491
10492
10493
10494
10495
10496
10497
10498
10499
10500
10501
10502
10503
10504
10505
10506
10507
10508
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528
10529
10530
10531
10532
10533
10534
10535
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556
10557
10558
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632
10633
10634
10635
10636
10637
10638
10639
10640
10641
10642
10643
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670
10671
10672
10673
10674
10675
10676
10677
10678
10679
10680
10681
10682
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
10764
10765
10766
10767
10768
10769
10770
10771
10772
10773
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783
10784
10785
10786
10787
10788
10789
10790
10791
10792
10793
10794
10795
10796
10797
10798
10799
10800
10801
10802
10803
10804
10805
10806
10807
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831
10832
10833
10834
10835
10836
10837
10838
10839
10840
10841
10842
10843
10844
10845
10846
10847
10848
10849
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
10860
10861
10862
10863
10864
10865
10866
10867
10868
10869
10870
10871
10872
10873
10874
10875
10876
10877
10878
10879
10880
10881
10882
10883
10884
10885
10886
10887
10888
10889
10890
10891
10892
10893
10894
10895
10896
10897
10898
10899
10900
10901
10902
10903
10904
10905
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916
10917
10918
10919
10920
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940
10941
10942
10943
10944
10945
10946
10947
10948
10949
10950
10951
10952
10953
10954
10955
10956
10957
10958
10959
10960
10961
10962
10963
10964
10965
10966
10967
10968
10969
10970
10971
10972
10973
10974
10975
10976
10977
10978
10979
10980
10981
10982
10983
10984
10985
10986
10987
10988
10989
10990
10991
10992
10993
10994
10995
10996
10997
10998
10999
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009
11010
11011
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021
11022
11023
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035
11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057
11058
11059
11060
11061
11062
11063
11064
11065
11066
11067
11068
11069
11070
11071
11072
11073
11074
11075
11076
11077
11078
11079
11080
11081
11082
11083
11084
11085
11086
11087
11088
11089
11090
11091
11092
11093
11094
11095
11096
11097
11098
11099
11100
11101
11102
11103
11104
11105
11106
11107
11108
11109
11110
11111
11112
11113
11114
11115
11116
11117
11118
11119
11120
11121
11122
11123
11124
11125
11126
11127
11128
11129
11130
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
11141
11142
11143
11144
11145
11146
11147
11148
11149
11150
11151
11152
11153
11154
11155
11156
11157
11158
11159
11160
11161
11162
11163
11164
11165
11166
11167
11168
11169
11170
11171
11172
11173
11174
11175
11176
11177
11178
11179
11180
11181
11182
11183
11184
11185
11186
11187
11188
11189
11190
11191
11192
11193
11194
11195
11196
11197
11198
11199
11200
11201
11202
11203
11204
11205
11206
11207
11208
11209
11210
11211
11212
11213
11214
11215
11216
11217
11218
11219
11220
11221
11222
11223
11224
11225
11226
11227
11228
11229
11230
11231
11232
11233
11234
11235
11236
11237
11238
11239
11240
11241
11242
11243
11244
11245
11246
11247
11248
11249
11250
11251
11252
11253
11254
11255
11256
11257
11258
11259
11260
11261
11262
11263
11264
11265
11266
11267
11268
11269
11270
11271
11272
11273
11274
11275
11276
11277
11278
11279
11280
11281
11282
11283
11284
11285
11286
11287
11288
11289
11290
11291
11292
11293
11294
11295
11296
11297
11298
11299
11300
11301
11302
11303
11304
11305
11306
11307
11308
11309
11310
11311
11312
11313
11314
11315
11316
11317
11318
11319
11320
11321
11322
11323
11324
11325
11326
11327
11328
11329
11330
11331
11332
11333
11334
11335
11336
11337
11338
11339
11340
11341
11342
11343
11344
11345
11346
11347
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358
11359
11360
11361
11362
11363
11364
11365
11366
11367
11368
11369
11370
11371
11372
11373
11374
11375
11376
11377
11378
11379
11380
11381
11382
11383
11384
11385
11386
11387
11388
11389
11390
11391
11392
11393
11394
11395
11396
11397
11398
11399
11400
11401
11402
11403
11404
11405
11406
11407
11408
11409
11410
11411
11412
11413
11414
11415
11416
11417
11418
11419
11420
11421
11422
11423
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434
11435
11436
11437
11438
11439
11440
11441
11442
11443
11444
11445
11446
11447
11448
11449
11450
11451
11452
11453
11454
11455
11456
11457
11458
11459
11460
11461
11462
11463
11464
11465
11466
11467
11468
11469
11470
11471
11472
11473
11474
11475
11476
11477
11478
11479
11480
11481
11482
11483
11484
11485
11486
11487
11488
11489
11490
11491
11492
11493
11494
11495
11496
11497
11498
11499
11500
11501
11502
11503
11504
11505
11506
11507
11508
11509
11510
11511
11512
11513
11514
11515
11516
11517
11518
11519
11520
11521
11522
11523
11524
11525
11526
11527
11528
11529
11530
11531
11532
11533
11534
11535
11536
11537
11538
11539
11540
11541
11542
11543
11544
11545
11546
11547
11548
11549
11550
11551
11552
11553
11554
11555
11556
11557
11558
11559
11560
11561
11562
11563
11564
11565
11566
11567
11568
11569
11570
11571
11572
11573
11574
11575
11576
11577
11578
11579
11580
11581
11582
11583
11584
11585
11586
11587
11588
11589
11590
11591
11592
11593
11594
11595
11596
11597
11598
11599
11600
11601
11602
11603
11604
11605
11606
11607
11608
11609
11610
11611
11612
11613
11614
11615
11616
11617
11618
11619
11620
11621
11622
11623
11624
11625
11626
11627
11628
11629
11630
11631
11632
11633
11634
11635
11636
11637
11638
11639
11640
11641
11642
11643
11644
11645
11646
11647
11648
11649
11650
11651
11652
11653
11654
11655
11656
11657
11658
11659
11660
11661
11662
11663
11664
11665
11666
11667
11668
11669
11670
11671
11672
11673
11674
11675
11676
11677
11678
11679
11680
11681
11682
11683
11684
11685
11686
11687
11688
11689
11690
11691
11692
11693
11694
11695
11696
11697
11698
11699
11700
11701
11702
11703
11704
11705
11706
11707
11708
11709
11710
11711
11712
11713
11714
11715
11716
11717
11718
11719
11720
11721
11722
11723
11724
11725
11726
11727
11728
11729
11730
11731
11732
11733
11734
11735
11736
11737
11738
11739
11740
11741
11742
11743
11744
11745
11746
11747
11748
11749
11750
11751
11752
11753
11754
11755
11756
11757
11758
11759
11760
11761
11762
11763
11764
11765
11766
11767
11768
11769
11770
11771
11772
11773
11774
11775
11776
11777
11778
11779
11780
11781
11782
11783
11784
11785
11786
11787
11788
11789
11790
11791
11792
11793
11794
11795
11796
11797
11798
11799
11800
11801
11802
11803
11804
11805
11806
11807
11808
11809
11810
11811
11812
11813
11814
11815
11816
11817
11818
11819
11820
11821
11822
11823
11824
11825
11826
11827
11828
11829
11830
11831
11832
11833
11834
11835
11836
11837
11838
11839
11840
11841
11842
11843
11844
11845
11846
11847
11848
11849
11850
11851
11852
11853
11854
11855
11856
11857
11858
11859
11860
11861
11862
11863
11864
11865
11866
11867
11868
11869
11870
11871
11872
11873
11874
11875
11876
11877
11878
11879
11880
11881
11882
11883
11884
11885
11886
11887
11888
11889
11890
11891
11892
11893
11894
11895
11896
11897
11898
11899
11900
11901
11902
11903
11904
11905
11906
11907
11908
11909
11910
11911
11912
11913
11914
11915
11916
11917
11918
11919
11920
11921
11922
11923
11924
11925
11926
11927
11928
11929
11930
11931
11932
11933
11934
11935
11936
11937
11938
11939
11940
11941
11942
11943
11944
11945
11946
11947
11948
11949
11950
11951
11952
11953
11954
11955
11956
11957
11958
11959
11960
11961
11962
11963
11964
11965
11966
11967
11968
11969
11970
11971
11972
11973
11974
11975
11976
11977
11978
11979
11980
11981
11982
11983
11984
11985
11986
11987
11988
11989
11990
11991
11992
11993
11994
11995
11996
11997
11998
11999
12000
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022
12023
12024
12025
12026
12027
12028
12029
12030
12031
12032
12033
12034
12035
12036
12037
12038
12039
12040
12041
12042
12043
12044
12045
12046
12047
12048
12049
12050
12051
12052
12053
12054
12055
12056
12057
12058
12059
12060
12061
12062
12063
12064
12065
12066
12067
12068
12069
12070
12071
12072
12073
12074
12075
12076
12077
12078
12079
12080
12081
12082
12083
12084
12085
12086
12087
12088
12089
12090
12091
12092
12093
12094
12095
12096
12097
12098
12099
12100
12101
12102
12103
12104
12105
12106
12107
12108
12109
12110
12111
12112
12113
12114
12115
12116
12117
12118
12119
12120
12121
12122
12123
12124
12125
12126
12127
12128
12129
12130
12131
12132
12133
12134
12135
12136
12137
12138
12139
12140
12141
12142
12143
12144
12145
12146
12147
12148
12149
12150
12151
12152
12153
12154
12155
12156
12157
12158
12159
12160
12161
12162
12163
12164
12165
12166
12167
12168
12169
12170
12171
12172
12173
12174
12175
12176
12177
12178
12179
12180
12181
12182
12183
12184
12185
12186
12187
12188
12189
12190
12191
12192
12193
12194
12195
12196
12197
12198
12199
12200
12201
12202
12203
12204
12205
12206
12207
12208
12209
12210
12211
12212
12213
12214
12215
12216
12217
12218
12219
12220
12221
12222
12223
12224
12225
12226
12227
12228
12229
12230
12231
12232
12233
12234
12235
12236
12237
12238
12239
12240
12241
12242
12243
12244
12245
12246
12247
12248
12249
12250
12251
12252
12253
12254
12255
12256
12257
12258
12259
12260
12261
12262
12263
12264
12265
12266
12267
12268
12269
12270
12271
12272
12273
12274
12275
12276
12277
12278
12279
12280
12281
12282
12283
12284
12285
12286
12287
12288
12289
12290
12291
12292
12293
12294
12295
12296
12297
12298
12299
12300
12301
12302
12303
12304
12305
12306
12307
12308
12309
12310
12311
12312
12313
12314
12315
12316
12317
12318
12319
12320
12321
12322
12323
12324
12325
12326
12327
12328
12329
12330
12331
12332
12333
12334
12335
12336
12337
12338
12339
12340
12341
12342
12343
12344
12345
12346
12347
12348
12349
12350
12351
12352
12353
12354
12355
12356
12357
12358
12359
12360
12361
12362
12363
12364
12365
12366
12367
12368
12369
12370
12371
12372
12373
12374
12375
12376
12377
12378
12379
12380
12381
12382
12383
12384
12385
12386
12387
12388
12389
12390
12391
12392
12393
12394
12395
12396
12397
12398
12399
12400
12401
12402
12403
12404
12405
12406
12407
12408
12409
12410
12411
12412
12413
12414
12415
12416
12417
12418
12419
12420
12421
12422
12423
12424
12425
12426
12427
12428
12429
12430
12431
12432
12433
12434
12435
12436
12437
12438
12439
12440
12441
12442
12443
12444
12445
12446
12447
12448
12449
12450
12451
12452
12453
12454
12455
12456
12457
12458
12459
12460
12461
12462
12463
12464
12465
12466
12467
12468
12469
12470
12471
12472
12473
12474
12475
12476
12477
12478
12479
12480
12481
12482
12483
12484
12485
12486
12487
12488
12489
12490
12491
12492
12493
12494
12495
12496
12497
12498
12499
12500
12501
12502
12503
12504
12505
12506
12507
12508
12509
12510
12511
12512
12513
12514
12515
12516
12517
12518
12519
12520
12521
12522
12523
12524
12525
12526
12527
12528
12529
12530
12531
12532
12533
12534
12535
12536
12537
12538
12539
12540
12541
12542
12543
12544
12545
12546
12547
12548
12549
12550
12551
12552
12553
12554
12555
12556
12557
12558
12559
12560
12561
12562
12563
12564
12565
12566
12567
12568
12569
12570
12571
12572
12573
12574
12575
12576
12577
12578
12579
12580
12581
12582
12583
12584
12585
12586
12587
12588
12589
12590
12591
12592
12593
12594
12595
12596
12597
12598
12599
12600
12601
12602
12603
12604
12605
12606
12607
12608
12609
12610
12611
12612
12613
12614
12615
12616
12617
12618
12619
12620
12621
12622
12623
12624
12625
12626
12627
12628
12629
12630
12631
12632
12633
12634
12635
12636
12637
12638
12639
12640
12641
12642
12643
12644
12645
12646
12647
12648
12649
12650
12651
12652
12653
12654
12655
12656
12657
12658
12659
12660
12661
12662
12663
12664
12665
12666
12667
12668
12669
12670
12671
12672
12673
12674
12675
12676
12677
12678
12679
12680
12681
12682
12683
12684
12685
12686
12687
12688
12689
12690
12691
12692
12693
12694
12695
12696
12697
12698
12699
12700
12701
12702
12703
12704
12705
12706
12707
12708
12709
12710
12711
12712
12713
12714
12715
12716
12717
12718
12719
12720
12721
12722
12723
12724
12725
12726
12727
12728
12729
12730
12731
12732
12733
12734
12735
12736
12737
12738
12739
12740
12741
12742
12743
12744
12745
12746
12747
12748
12749
12750
12751
12752
12753
12754
12755
12756
12757
12758
12759
12760
12761
12762
12763
12764
12765
12766
12767
12768
12769
12770
12771
12772
12773
12774
12775
12776
12777
12778
12779
12780
12781
12782
12783
12784
12785
12786
12787
12788
12789
12790
12791
12792
12793
12794
12795
12796
12797
12798
12799
12800
12801
12802
12803
12804
12805
12806
12807
12808
12809
12810
12811
12812
12813
12814
12815
12816
12817
12818
12819
12820
12821
12822
12823
12824
12825
12826
12827
12828
12829
12830
12831
12832
12833
12834
12835
12836
12837
12838
12839
12840
12841
12842
12843
12844
12845
12846
12847
12848
12849
12850
12851
12852
12853
12854
12855
12856
12857
12858
12859
12860
12861
12862
12863
12864
12865
12866
12867
12868
12869
12870
12871
12872
12873
12874
12875
12876
12877
12878
12879
12880
12881
12882
12883
12884
12885
12886
12887
12888
12889
12890
12891
12892
12893
12894
12895
12896
12897
12898
12899
12900
12901
12902
12903
12904
12905
12906
12907
12908
12909
12910
12911
12912
12913
12914
12915
12916
12917
12918
12919
12920
12921
12922
12923
12924
12925
12926
12927
12928
12929
12930
12931
12932
12933
12934
12935
12936
12937
12938
12939
12940
12941
12942
12943
12944
12945
12946
12947
12948
12949
12950
12951
12952
12953
12954
12955
12956
12957
12958
12959
12960
12961
12962
12963
12964
12965
12966
12967
12968
12969
12970
12971
12972
12973
12974
12975
12976
12977
12978
12979
12980
12981
12982
12983
12984
12985
12986
12987
12988
12989
12990
12991
12992
12993
12994
12995
12996
12997
12998
12999
13000
13001
13002
13003
13004
13005
13006
13007
13008
13009
13010
13011
13012
13013
13014
13015
13016
13017
13018
13019
13020
13021
13022
13023
13024
13025
13026
13027
13028
13029
13030
13031
13032
13033
13034
13035
13036
13037
13038
13039
13040
13041
13042
13043
13044
13045
13046
13047
13048
13049
13050
13051
13052
13053
13054
13055
13056
13057
13058
13059
13060
13061
13062
13063
13064
13065
13066
13067
13068
13069
13070
13071
13072
13073
13074
13075
13076
13077
13078
13079
13080
13081
13082
13083
13084
13085
13086
13087
13088
13089
13090
13091
13092
13093
13094
13095
13096
13097
13098
13099
13100
13101
13102
13103
13104
13105
13106
13107
13108
13109
13110
13111
13112
13113
13114
13115
13116
13117
13118
13119
13120
13121
13122
13123
13124
13125
13126
13127
13128
13129
13130
13131
13132
13133
13134
13135
13136
13137
13138
13139
13140
13141
13142
13143
13144
13145
13146
13147
13148
13149
13150
13151
13152
13153
13154
13155
13156
13157
13158
13159
13160
13161
13162
13163
13164
13165
13166
13167
13168
13169
13170
13171
13172
13173
13174
13175
13176
13177
13178
13179
13180
13181
13182
13183
13184
13185
13186
13187
13188
13189
13190
13191
13192
13193
13194
13195
13196
13197
13198
13199
13200
13201
13202
13203
13204
13205
13206
13207
13208
13209
13210
13211
13212
13213
13214
13215
13216
13217
13218
13219
13220
13221
13222
13223
13224
13225
13226
13227
13228
13229
13230
13231
13232
13233
13234
13235
13236
13237
13238
13239
13240
13241
13242
13243
13244
13245
13246
13247
13248
13249
13250
13251
13252
13253
13254
13255
13256
13257
13258
13259
13260
13261
13262
13263
13264
13265
13266
13267
13268
13269
13270
13271
13272
13273
13274
13275
13276
13277
13278
13279
13280
13281
13282
13283
13284
13285
13286
13287
13288
13289
13290
13291
13292
13293
13294
13295
13296
13297
13298
13299
13300
13301
13302
13303
13304
13305
13306
13307
13308
13309
13310
13311
13312
13313
13314
13315
13316
13317
13318
13319
13320
13321
13322
13323
13324
13325
13326
13327
13328
13329
13330
13331
13332
13333
13334
13335
13336
13337
13338
13339
13340
13341
13342
13343
13344
13345
13346
13347
13348
13349
13350
13351
13352
13353
13354
13355
13356
13357
13358
13359
13360
13361
13362
13363
13364
13365
13366
13367
13368
13369
13370
13371
13372
13373
13374
13375
13376
13377
13378
13379
13380
13381
13382
13383
13384
13385
13386
13387
13388
13389
13390
13391
13392
13393
13394
13395
13396
13397
13398
13399
13400
13401
13402
13403
13404
13405
13406
13407
13408
13409
13410
13411
13412
13413
13414
13415
13416
13417
13418
13419
13420
13421
13422
13423
13424
13425
13426
13427
13428
13429
13430
13431
13432
13433
13434
13435
13436
13437
13438
13439
13440
13441
13442
13443
13444
13445
13446
13447
13448
13449
13450
13451
13452
13453
13454
13455
13456
13457
13458
13459
13460
13461
13462
13463
13464
13465
13466
13467
13468
13469
13470
13471
13472
13473
13474
13475
13476
13477
13478
13479
13480
13481
13482
13483
13484
13485
13486
13487
13488
13489
13490
13491
13492
13493
13494
13495
13496
13497
13498
13499
13500
13501
13502
13503
13504
13505
13506
13507
13508
13509
13510
13511
13512
13513
13514
13515
13516
13517
13518
13519
13520
13521
13522
13523
13524
13525
13526
13527
13528
13529
13530
13531
13532
13533
13534
13535
13536
13537
13538
13539
13540
13541
13542
13543
13544
13545
13546
13547
13548
13549
13550
13551
13552
13553
13554
13555
13556
13557
13558
13559
13560
13561
13562
13563
13564
13565
13566
13567
13568
13569
13570
13571
13572
13573
13574
13575
13576
13577
13578
13579
13580
13581
13582
13583
13584
13585
13586
13587
13588
13589
13590
13591
13592
13593
13594
13595
13596
13597
13598
13599
13600
13601
13602
13603
13604
13605
13606
13607
13608
13609
13610
13611
13612
13613
13614
13615
13616
13617
13618
13619
13620
13621
13622
13623
13624
13625
13626
13627
13628
13629
13630
13631
13632
13633
13634
13635
13636
13637
13638
13639
13640
13641
13642
13643
13644
13645
13646
13647
13648
13649
13650
13651
13652
13653
13654
13655
13656
13657
13658
13659
13660
13661
13662
13663
13664
13665
13666
13667
13668
13669
13670
13671
13672
13673
13674
13675
13676
13677
13678
13679
13680
13681
13682
13683
13684
13685
13686
13687
13688
13689
13690
13691
13692
13693
13694
13695
13696
13697
13698
13699
13700
13701
13702
13703
13704
13705
13706
13707
13708
13709
13710
13711
13712
13713
13714
13715
13716
13717
13718
13719
13720
13721
13722
13723
13724
13725
13726
13727
13728
13729
13730
13731
13732
13733
13734
13735
13736
13737
13738
13739
13740
13741
13742
13743
13744
13745
13746
13747
13748
13749
13750
13751
13752
13753
13754
13755
13756
13757
13758
13759
13760
13761
13762
13763
13764
13765
13766
13767
13768
13769
13770
13771
13772
13773
13774
13775
13776
13777
13778
13779
13780
13781
13782
13783
13784
13785
13786
13787
13788
13789
13790
13791
13792
13793
13794
13795
13796
13797
13798
13799
13800
13801
13802
13803
13804
13805
13806
13807
13808
13809
13810
13811
13812
13813
13814
13815
13816
13817
13818
13819
13820
13821
13822
13823
13824
13825
13826
13827
13828
13829
13830
13831
13832
13833
13834
13835
13836
13837
13838
13839
13840
13841
13842
13843
13844
13845
13846
13847
13848
13849
13850
13851
13852
13853
13854
13855
13856
13857
13858
13859
13860
13861
13862
13863
13864
13865
13866
13867
13868
13869
13870
13871
13872
13873
13874
13875
13876
13877
13878
13879
13880
13881
13882
13883
13884
13885
13886
13887
// SPDX-License-Identifier: GPL-2.0
/*
 * Performance events core code:
 *
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
 *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
 *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
 */

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/hash.h>
#include <linux/tick.h>
#include <linux/sysfs.h>
#include <linux/dcache.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/reboot.h>
#include <linux/vmstat.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
#include <linux/trace_events.h>
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/namei.h>
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include <linux/min_heap.h>
#include <linux/highmem.h>
#include <linux/pgtable.h>
#include <linux/buildid.h>
#include <linux/task_work.h>

#include "internal.h"

#include <asm/irq_regs.h>

typedef int (*remote_function_f)(void *);

struct remote_function_call {
        struct task_struct        *p;
        remote_function_f        func;
        void                        *info;
        int                        ret;
};

static void remote_function(void *data)
{
        struct remote_function_call *tfc = data;
        struct task_struct *p = tfc->p;

        if (p) {
                /* -EAGAIN */
                if (task_cpu(p) != smp_processor_id())
                        return;

                /*
                 * Now that we're on right CPU with IRQs disabled, we can test
                 * if we hit the right task without races.
                 */

                tfc->ret = -ESRCH; /* No such (running) process */
                if (p != current)
                        return;
        }

        tfc->ret = tfc->func(tfc->info);
}

/**
 * task_function_call - call a function on the cpu on which a task runs
 * @p:                the task to evaluate
 * @func:        the function to be called
 * @info:        the function call argument
 *
 * Calls the function @func when the task is currently running. This might
 * be on the current CPU, which just calls the function directly.  This will
 * retry due to any failures in smp_call_function_single(), such as if the
 * task_cpu() goes offline concurrently.
 *
 * returns @func return value or -ESRCH or -ENXIO when the process isn't running
 */
static int
task_function_call(struct task_struct *p, remote_function_f func, void *info)
{
        struct remote_function_call data = {
                .p        = p,
                .func        = func,
                .info        = info,
                .ret        = -EAGAIN,
        };
        int ret;

        for (;;) {
                ret = smp_call_function_single(task_cpu(p), remote_function,
                                               &data, 1);
                if (!ret)
                        ret = data.ret;

                if (ret != -EAGAIN)
                        break;

                cond_resched();
        }

        return ret;
}

/**
 * cpu_function_call - call a function on the cpu
 * @cpu:        target cpu to queue this function
 * @func:        the function to be called
 * @info:        the function call argument
 *
 * Calls the function @func on the remote cpu.
 *
 * returns: @func return value or -ENXIO when the cpu is offline
 */
static int cpu_function_call(int cpu, remote_function_f func, void *info)
{
        struct remote_function_call data = {
                .p        = NULL,
                .func        = func,
                .info        = info,
                .ret        = -ENXIO, /* No such CPU */
        };

        smp_call_function_single(cpu, remote_function, &data, 1);

        return data.ret;
}

static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
                          struct perf_event_context *ctx)
{
        raw_spin_lock(&cpuctx->ctx.lock);
        if (ctx)
                raw_spin_lock(&ctx->lock);
}

static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
                            struct perf_event_context *ctx)
{
        if (ctx)
                raw_spin_unlock(&ctx->lock);
        raw_spin_unlock(&cpuctx->ctx.lock);
}

#define TASK_TOMBSTONE ((void *)-1L)

static bool is_kernel_event(struct perf_event *event)
{
        return READ_ONCE(event->owner) == TASK_TOMBSTONE;
}

static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);

struct perf_event_context *perf_cpu_task_ctx(void)
{
        lockdep_assert_irqs_disabled();
        return this_cpu_ptr(&perf_cpu_context)->task_ctx;
}

/*
 * On task ctx scheduling...
 *
 * When !ctx->nr_events a task context will not be scheduled. This means
 * we can disable the scheduler hooks (for performance) without leaving
 * pending task ctx state.
 *
 * This however results in two special cases:
 *
 *  - removing the last event from a task ctx; this is relatively straight
 *    forward and is done in __perf_remove_from_context.
 *
 *  - adding the first event to a task ctx; this is tricky because we cannot
 *    rely on ctx->is_active and therefore cannot use event_function_call().
 *    See perf_install_in_context().
 *
 * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
 */

typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
                        struct perf_event_context *, void *);

struct event_function_struct {
        struct perf_event *event;
        event_f func;
        void *data;
};

static int event_function(void *info)
{
        struct event_function_struct *efs = info;
        struct perf_event *event = efs->event;
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
        int ret = 0;

        lockdep_assert_irqs_disabled();

        perf_ctx_lock(cpuctx, task_ctx);
        /*
         * Since we do the IPI call without holding ctx->lock things can have
         * changed, double check we hit the task we set out to hit.
         */
        if (ctx->task) {
                if (ctx->task != current) {
                        ret = -ESRCH;
                        goto unlock;
                }

                /*
                 * We only use event_function_call() on established contexts,
                 * and event_function() is only ever called when active (or
                 * rather, we'll have bailed in task_function_call() or the
                 * above ctx->task != current test), therefore we must have
                 * ctx->is_active here.
                 */
                WARN_ON_ONCE(!ctx->is_active);
                /*
                 * And since we have ctx->is_active, cpuctx->task_ctx must
                 * match.
                 */
                WARN_ON_ONCE(task_ctx != ctx);
        } else {
                WARN_ON_ONCE(&cpuctx->ctx != ctx);
        }

        efs->func(event, cpuctx, ctx, efs->data);
unlock:
        perf_ctx_unlock(cpuctx, task_ctx);

        return ret;
}

static void event_function_call(struct perf_event *event, event_f func, void *data)
{
        struct perf_event_context *ctx = event->ctx;
        struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
        struct event_function_struct efs = {
                .event = event,
                .func = func,
                .data = data,
        };

        if (!event->parent) {
                /*
                 * If this is a !child event, we must hold ctx::mutex to
                 * stabilize the event->ctx relation. See
                 * perf_event_ctx_lock().
                 */
                lockdep_assert_held(&ctx->mutex);
        }

        if (!task) {
                cpu_function_call(event->cpu, event_function, &efs);
                return;
        }

        if (task == TASK_TOMBSTONE)
                return;

again:
        if (!task_function_call(task, event_function, &efs))
                return;

        raw_spin_lock_irq(&ctx->lock);
        /*
         * Reload the task pointer, it might have been changed by
         * a concurrent perf_event_context_sched_out().
         */
        task = ctx->task;
        if (task == TASK_TOMBSTONE) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
        if (ctx->is_active) {
                raw_spin_unlock_irq(&ctx->lock);
                goto again;
        }
        func(event, NULL, ctx, data);
        raw_spin_unlock_irq(&ctx->lock);
}

/*
 * Similar to event_function_call() + event_function(), but hard assumes IRQs
 * are already disabled and we're on the right CPU.
 */
static void event_function_local(struct perf_event *event, event_f func, void *data)
{
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct task_struct *task = READ_ONCE(ctx->task);
        struct perf_event_context *task_ctx = NULL;

        lockdep_assert_irqs_disabled();

        if (task) {
                if (task == TASK_TOMBSTONE)
                        return;

                task_ctx = ctx;
        }

        perf_ctx_lock(cpuctx, task_ctx);

        task = ctx->task;
        if (task == TASK_TOMBSTONE)
                goto unlock;

        if (task) {
                /*
                 * We must be either inactive or active and the right task,
                 * otherwise we're screwed, since we cannot IPI to somewhere
                 * else.
                 */
                if (ctx->is_active) {
                        if (WARN_ON_ONCE(task != current))
                                goto unlock;

                        if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
                                goto unlock;
                }
        } else {
                WARN_ON_ONCE(&cpuctx->ctx != ctx);
        }

        func(event, cpuctx, ctx, data);
unlock:
        perf_ctx_unlock(cpuctx, task_ctx);
}

#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
                       PERF_FLAG_FD_CLOEXEC)

/*
 * branch priv levels that need permission checks
 */
#define PERF_SAMPLE_BRANCH_PERM_PLM \
        (PERF_SAMPLE_BRANCH_KERNEL |\
         PERF_SAMPLE_BRANCH_HV)

enum event_type_t {
        EVENT_FLEXIBLE = 0x1,
        EVENT_PINNED = 0x2,
        EVENT_TIME = 0x4,
        /* see ctx_resched() for details */
        EVENT_CPU = 0x8,
        EVENT_CGROUP = 0x10,
        EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};

/*
 * perf_sched_events : >0 events exist
 */

static void perf_sched_delayed(struct work_struct *work);
DEFINE_STATIC_KEY_FALSE(perf_sched_events);
static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;

static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;
static atomic_t nr_text_poke_events __read_mostly;
static atomic_t nr_build_id_events __read_mostly;

static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
static cpumask_var_t perf_online_mask;
static struct kmem_cache *perf_event_cache;

/*
 * perf event paranoia level:
 *  -1 - not paranoid at all
 *   0 - disallow raw tracepoint access for unpriv
 *   1 - disallow cpu events for unpriv
 *   2 - disallow kernel profiling for unpriv
 */
int sysctl_perf_event_paranoid __read_mostly = 2;

/* Minimum for 512 kiB + 1 user control page */
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */

/*
 * max perf event sample rate
 */
#define DEFAULT_MAX_SAMPLE_RATE                100000
#define DEFAULT_SAMPLE_PERIOD_NS        (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
#define DEFAULT_CPU_TIME_MAX_PERCENT        25

int sysctl_perf_event_sample_rate __read_mostly        = DEFAULT_MAX_SAMPLE_RATE;

static int max_samples_per_tick __read_mostly        = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
static int perf_sample_period_ns __read_mostly        = DEFAULT_SAMPLE_PERIOD_NS;

static int perf_sample_allowed_ns __read_mostly =
        DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;

static void update_perf_cpu_limits(void)
{
        u64 tmp = perf_sample_period_ns;

        tmp *= sysctl_perf_cpu_time_max_percent;
        tmp = div_u64(tmp, 100);
        if (!tmp)
                tmp = 1;

        WRITE_ONCE(perf_sample_allowed_ns, tmp);
}

static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc);

int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
                                       void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret;
        int perf_cpu = sysctl_perf_cpu_time_max_percent;
        /*
         * If throttling is disabled don't allow the write:
         */
        if (write && (perf_cpu == 100 || perf_cpu == 0))
                return -EINVAL;

        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret || !write)
                return ret;

        max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
        perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
        update_perf_cpu_limits();

        return 0;
}

int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;

int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);

        if (ret || !write)
                return ret;

        if (sysctl_perf_cpu_time_max_percent == 100 ||
            sysctl_perf_cpu_time_max_percent == 0) {
                printk(KERN_WARNING
                       "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
                WRITE_ONCE(perf_sample_allowed_ns, 0);
        } else {
                update_perf_cpu_limits();
        }

        return 0;
}

/*
 * perf samples are done in some very critical code paths (NMIs).
 * If they take too much CPU time, the system can lock up and not
 * get any real work done.  This will drop the sample rate when
 * we detect that events are taking too long.
 */
#define NR_ACCUMULATED_SAMPLES 128
static DEFINE_PER_CPU(u64, running_sample_length);

static u64 __report_avg;
static u64 __report_allowed;

static void perf_duration_warn(struct irq_work *w)
{
        printk_ratelimited(KERN_INFO
                "perf: interrupt took too long (%lld > %lld), lowering "
                "kernel.perf_event_max_sample_rate to %d\n",
                __report_avg, __report_allowed,
                sysctl_perf_event_sample_rate);
}

static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);

void perf_sample_event_took(u64 sample_len_ns)
{
        u64 max_len = READ_ONCE(perf_sample_allowed_ns);
        u64 running_len;
        u64 avg_len;
        u32 max;

        if (max_len == 0)
                return;

        /* Decay the counter by 1 average sample. */
        running_len = __this_cpu_read(running_sample_length);
        running_len -= running_len/NR_ACCUMULATED_SAMPLES;
        running_len += sample_len_ns;
        __this_cpu_write(running_sample_length, running_len);

        /*
         * Note: this will be biased artifically low until we have
         * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
         * from having to maintain a count.
         */
        avg_len = running_len/NR_ACCUMULATED_SAMPLES;
        if (avg_len <= max_len)
                return;

        __report_avg = avg_len;
        __report_allowed = max_len;

        /*
         * Compute a throttle threshold 25% below the current duration.
         */
        avg_len += avg_len / 4;
        max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
        if (avg_len < max)
                max /= (u32)avg_len;
        else
                max = 1;

        WRITE_ONCE(perf_sample_allowed_ns, avg_len);
        WRITE_ONCE(max_samples_per_tick, max);

        sysctl_perf_event_sample_rate = max * HZ;
        perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;

        if (!irq_work_queue(&perf_duration_work)) {
                early_printk("perf: interrupt took too long (%lld > %lld), lowering "
                             "kernel.perf_event_max_sample_rate to %d\n",
                             __report_avg, __report_allowed,
                             sysctl_perf_event_sample_rate);
        }
}

static atomic64_t perf_event_id;

static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);

void __weak perf_event_print_debug(void)        { }

static inline u64 perf_clock(void)
{
        return local_clock();
}

static inline u64 perf_event_clock(struct perf_event *event)
{
        return event->clock();
}

/*
 * State based event timekeeping...
 *
 * The basic idea is to use event->state to determine which (if any) time
 * fields to increment with the current delta. This means we only need to
 * update timestamps when we change state or when they are explicitly requested
 * (read).
 *
 * Event groups make things a little more complicated, but not terribly so. The
 * rules for a group are that if the group leader is OFF the entire group is
 * OFF, irrespecive of what the group member states are. This results in
 * __perf_effective_state().
 *
 * A futher ramification is that when a group leader flips between OFF and
 * !OFF, we need to update all group member times.
 *
 *
 * NOTE: perf_event_time() is based on the (cgroup) context time, and thus we
 * need to make sure the relevant context time is updated before we try and
 * update our timestamps.
 */

static __always_inline enum perf_event_state
__perf_effective_state(struct perf_event *event)
{
        struct perf_event *leader = event->group_leader;

        if (leader->state <= PERF_EVENT_STATE_OFF)
                return leader->state;

        return event->state;
}

static __always_inline void
__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
{
        enum perf_event_state state = __perf_effective_state(event);
        u64 delta = now - event->tstamp;

        *enabled = event->total_time_enabled;
        if (state >= PERF_EVENT_STATE_INACTIVE)
                *enabled += delta;

        *running = event->total_time_running;
        if (state >= PERF_EVENT_STATE_ACTIVE)
                *running += delta;
}

static void perf_event_update_time(struct perf_event *event)
{
        u64 now = perf_event_time(event);

        __perf_update_times(event, now, &event->total_time_enabled,
                                        &event->total_time_running);
        event->tstamp = now;
}

static void perf_event_update_sibling_time(struct perf_event *leader)
{
        struct perf_event *sibling;

        for_each_sibling_event(sibling, leader)
                perf_event_update_time(sibling);
}

static void
perf_event_set_state(struct perf_event *event, enum perf_event_state state)
{
        if (event->state == state)
                return;

        perf_event_update_time(event);
        /*
         * If a group leader gets enabled/disabled all its siblings
         * are affected too.
         */
        if ((event->state < 0) ^ (state < 0))
                perf_event_update_sibling_time(event);

        WRITE_ONCE(event->state, state);
}

/*
 * UP store-release, load-acquire
 */

#define __store_release(ptr, val)                                        \
do {                                                                        \
        barrier();                                                        \
        WRITE_ONCE(*(ptr), (val));                                        \
} while (0)

#define __load_acquire(ptr)                                                \
({                                                                        \
        __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr));        \
        barrier();                                                        \
        ___p;                                                                \
})

static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
{
        struct perf_event_pmu_context *pmu_ctx;

        list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                if (cgroup && !pmu_ctx->nr_cgroups)
                        continue;
                perf_pmu_disable(pmu_ctx->pmu);
        }
}

static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
{
        struct perf_event_pmu_context *pmu_ctx;

        list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                if (cgroup && !pmu_ctx->nr_cgroups)
                        continue;
                perf_pmu_enable(pmu_ctx->pmu);
        }
}

static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
static void ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type);

#ifdef CONFIG_CGROUP_PERF

static inline bool
perf_cgroup_match(struct perf_event *event)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);

        /* @event doesn't care about cgroup */
        if (!event->cgrp)
                return true;

        /* wants specific cgroup scope but @cpuctx isn't associated with any */
        if (!cpuctx->cgrp)
                return false;

        /*
         * Cgroup scoping is recursive.  An event enabled for a cgroup is
         * also enabled for all its descendant cgroups.  If @cpuctx's
         * cgroup is a descendant of @event's (the test covers identity
         * case), it's a match.
         */
        return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
                                    event->cgrp->css.cgroup);
}

static inline void perf_detach_cgroup(struct perf_event *event)
{
        css_put(&event->cgrp->css);
        event->cgrp = NULL;
}

static inline int is_cgroup_event(struct perf_event *event)
{
        return event->cgrp != NULL;
}

static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
        struct perf_cgroup_info *t;

        t = per_cpu_ptr(event->cgrp->info, event->cpu);
        return t->time;
}

static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
        struct perf_cgroup_info *t;

        t = per_cpu_ptr(event->cgrp->info, event->cpu);
        if (!__load_acquire(&t->active))
                return t->time;
        now += READ_ONCE(t->timeoffset);
        return now;
}

static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
{
        if (adv)
                info->time += now - info->timestamp;
        info->timestamp = now;
        /*
         * see update_context_time()
         */
        WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
}

static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
{
        struct perf_cgroup *cgrp = cpuctx->cgrp;
        struct cgroup_subsys_state *css;
        struct perf_cgroup_info *info;

        if (cgrp) {
                u64 now = perf_clock();

                for (css = &cgrp->css; css; css = css->parent) {
                        cgrp = container_of(css, struct perf_cgroup, css);
                        info = this_cpu_ptr(cgrp->info);

                        __update_cgrp_time(info, now, true);
                        if (final)
                                __store_release(&info->active, 0);
                }
        }
}

static inline void update_cgrp_time_from_event(struct perf_event *event)
{
        struct perf_cgroup_info *info;

        /*
         * ensure we access cgroup data only when needed and
         * when we know the cgroup is pinned (css_get)
         */
        if (!is_cgroup_event(event))
                return;

        info = this_cpu_ptr(event->cgrp->info);
        /*
         * Do not update time when cgroup is not active
         */
        if (info->active)
                __update_cgrp_time(info, perf_clock(), true);
}

static inline void
perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
{
        struct perf_event_context *ctx = &cpuctx->ctx;
        struct perf_cgroup *cgrp = cpuctx->cgrp;
        struct perf_cgroup_info *info;
        struct cgroup_subsys_state *css;

        /*
         * ctx->lock held by caller
         * ensure we do not access cgroup data
         * unless we have the cgroup pinned (css_get)
         */
        if (!cgrp)
                return;

        WARN_ON_ONCE(!ctx->nr_cgroups);

        for (css = &cgrp->css; css; css = css->parent) {
                cgrp = container_of(css, struct perf_cgroup, css);
                info = this_cpu_ptr(cgrp->info);
                __update_cgrp_time(info, ctx->timestamp, false);
                __store_release(&info->active, 1);
        }
}

/*
 * reschedule events based on the cgroup constraint of task.
 */
static void perf_cgroup_switch(struct task_struct *task)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_cgroup *cgrp;

        /*
         * cpuctx->cgrp is set when the first cgroup event enabled,
         * and is cleared when the last cgroup event disabled.
         */
        if (READ_ONCE(cpuctx->cgrp) == NULL)
                return;

        WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);

        cgrp = perf_cgroup_from_task(task, NULL);
        if (READ_ONCE(cpuctx->cgrp) == cgrp)
                return;

        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_ctx_disable(&cpuctx->ctx, true);

        ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
        /*
         * must not be done before ctxswout due
         * to update_cgrp_time_from_cpuctx() in
         * ctx_sched_out()
         */
        cpuctx->cgrp = cgrp;
        /*
         * set cgrp before ctxsw in to allow
         * perf_cgroup_set_timestamp() in ctx_sched_in()
         * to not have to pass task around
         */
        ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);

        perf_ctx_enable(&cpuctx->ctx, true);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}

static int perf_cgroup_ensure_storage(struct perf_event *event,
                                struct cgroup_subsys_state *css)
{
        struct perf_cpu_context *cpuctx;
        struct perf_event **storage;
        int cpu, heap_size, ret = 0;

        /*
         * Allow storage to have sufficent space for an iterator for each
         * possibly nested cgroup plus an iterator for events with no cgroup.
         */
        for (heap_size = 1; css; css = css->parent)
                heap_size++;

        for_each_possible_cpu(cpu) {
                cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
                if (heap_size <= cpuctx->heap_size)
                        continue;

                storage = kmalloc_node(heap_size * sizeof(struct perf_event *),
                                       GFP_KERNEL, cpu_to_node(cpu));
                if (!storage) {
                        ret = -ENOMEM;
                        break;
                }

                raw_spin_lock_irq(&cpuctx->ctx.lock);
                if (cpuctx->heap_size < heap_size) {
                        swap(cpuctx->heap, storage);
                        if (storage == cpuctx->heap_default)
                                storage = NULL;
                        cpuctx->heap_size = heap_size;
                }
                raw_spin_unlock_irq(&cpuctx->ctx.lock);

                kfree(storage);
        }

        return ret;
}

static inline int perf_cgroup_connect(int fd, struct perf_event *event,
                                      struct perf_event_attr *attr,
                                      struct perf_event *group_leader)
{
        struct perf_cgroup *cgrp;
        struct cgroup_subsys_state *css;
        struct fd f = fdget(fd);
        int ret = 0;

        if (!f.file)
                return -EBADF;

        css = css_tryget_online_from_dir(f.file->f_path.dentry,
                                         &perf_event_cgrp_subsys);
        if (IS_ERR(css)) {
                ret = PTR_ERR(css);
                goto out;
        }

        ret = perf_cgroup_ensure_storage(event, css);
        if (ret)
                goto out;

        cgrp = container_of(css, struct perf_cgroup, css);
        event->cgrp = cgrp;

        /*
         * all events in a group must monitor
         * the same cgroup because a task belongs
         * to only one perf cgroup at a time
         */
        if (group_leader && group_leader->cgrp != cgrp) {
                perf_detach_cgroup(event);
                ret = -EINVAL;
        }
out:
        fdput(f);
        return ret;
}

static inline void
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_cpu_context *cpuctx;

        if (!is_cgroup_event(event))
                return;

        event->pmu_ctx->nr_cgroups++;

        /*
         * Because cgroup events are always per-cpu events,
         * @ctx == &cpuctx->ctx.
         */
        cpuctx = container_of(ctx, struct perf_cpu_context, ctx);

        if (ctx->nr_cgroups++)
                return;

        cpuctx->cgrp = perf_cgroup_from_task(current, ctx);
}

static inline void
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_cpu_context *cpuctx;

        if (!is_cgroup_event(event))
                return;

        event->pmu_ctx->nr_cgroups--;

        /*
         * Because cgroup events are always per-cpu events,
         * @ctx == &cpuctx->ctx.
         */
        cpuctx = container_of(ctx, struct perf_cpu_context, ctx);

        if (--ctx->nr_cgroups)
                return;

        cpuctx->cgrp = NULL;
}

#else /* !CONFIG_CGROUP_PERF */

static inline bool
perf_cgroup_match(struct perf_event *event)
{
        return true;
}

static inline void perf_detach_cgroup(struct perf_event *event)
{}

static inline int is_cgroup_event(struct perf_event *event)
{
        return 0;
}

static inline void update_cgrp_time_from_event(struct perf_event *event)
{
}

static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
                                                bool final)
{
}

static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
                                      struct perf_event_attr *attr,
                                      struct perf_event *group_leader)
{
        return -EINVAL;
}

static inline void
perf_cgroup_set_timestamp(struct perf_cpu_context *cpuctx)
{
}

static inline u64 perf_cgroup_event_time(struct perf_event *event)
{
        return 0;
}

static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
{
        return 0;
}

static inline void
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
}

static inline void
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
{
}

static void perf_cgroup_switch(struct task_struct *task)
{
}
#endif

/*
 * set default to be dependent on timer tick just
 * like original code
 */
#define PERF_CPU_HRTIMER (1000 / HZ)
/*
 * function must be called with interrupts disabled
 */
static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
{
        struct perf_cpu_pmu_context *cpc;
        bool rotations;

        lockdep_assert_irqs_disabled();

        cpc = container_of(hr, struct perf_cpu_pmu_context, hrtimer);
        rotations = perf_rotate_context(cpc);

        raw_spin_lock(&cpc->hrtimer_lock);
        if (rotations)
                hrtimer_forward_now(hr, cpc->hrtimer_interval);
        else
                cpc->hrtimer_active = 0;
        raw_spin_unlock(&cpc->hrtimer_lock);

        return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
}

static void __perf_mux_hrtimer_init(struct perf_cpu_pmu_context *cpc, int cpu)
{
        struct hrtimer *timer = &cpc->hrtimer;
        struct pmu *pmu = cpc->epc.pmu;
        u64 interval;

        /*
         * check default is sane, if not set then force to
         * default interval (1/tick)
         */
        interval = pmu->hrtimer_interval_ms;
        if (interval < 1)
                interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;

        cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);

        raw_spin_lock_init(&cpc->hrtimer_lock);
        hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
        timer->function = perf_mux_hrtimer_handler;
}

static int perf_mux_hrtimer_restart(struct perf_cpu_pmu_context *cpc)
{
        struct hrtimer *timer = &cpc->hrtimer;
        unsigned long flags;

        raw_spin_lock_irqsave(&cpc->hrtimer_lock, flags);
        if (!cpc->hrtimer_active) {
                cpc->hrtimer_active = 1;
                hrtimer_forward_now(timer, cpc->hrtimer_interval);
                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
        }
        raw_spin_unlock_irqrestore(&cpc->hrtimer_lock, flags);

        return 0;
}

static int perf_mux_hrtimer_restart_ipi(void *arg)
{
        return perf_mux_hrtimer_restart(arg);
}

void perf_pmu_disable(struct pmu *pmu)
{
        int *count = this_cpu_ptr(pmu->pmu_disable_count);
        if (!(*count)++)
                pmu->pmu_disable(pmu);
}

void perf_pmu_enable(struct pmu *pmu)
{
        int *count = this_cpu_ptr(pmu->pmu_disable_count);
        if (!--(*count))
                pmu->pmu_enable(pmu);
}

static void perf_assert_pmu_disabled(struct pmu *pmu)
{
        WARN_ON_ONCE(*this_cpu_ptr(pmu->pmu_disable_count) == 0);
}

static void get_ctx(struct perf_event_context *ctx)
{
        refcount_inc(&ctx->refcount);
}

static void *alloc_task_ctx_data(struct pmu *pmu)
{
        if (pmu->task_ctx_cache)
                return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);

        return NULL;
}

static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
{
        if (pmu->task_ctx_cache && task_ctx_data)
                kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
}

static void free_ctx(struct rcu_head *head)
{
        struct perf_event_context *ctx;

        ctx = container_of(head, struct perf_event_context, rcu_head);
        kfree(ctx);
}

static void put_ctx(struct perf_event_context *ctx)
{
        if (refcount_dec_and_test(&ctx->refcount)) {
                if (ctx->parent_ctx)
                        put_ctx(ctx->parent_ctx);
                if (ctx->task && ctx->task != TASK_TOMBSTONE)
                        put_task_struct(ctx->task);
                call_rcu(&ctx->rcu_head, free_ctx);
        }
}

/*
 * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
 * perf_pmu_migrate_context() we need some magic.
 *
 * Those places that change perf_event::ctx will hold both
 * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
 *
 * Lock ordering is by mutex address. There are two other sites where
 * perf_event_context::mutex nests and those are:
 *
 *  - perf_event_exit_task_context()        [ child , 0 ]
 *      perf_event_exit_event()
 *        put_event()                        [ parent, 1 ]
 *
 *  - perf_event_init_context()                [ parent, 0 ]
 *      inherit_task_group()
 *        inherit_group()
 *          inherit_event()
 *            perf_event_alloc()
 *              perf_init_event()
 *                perf_try_init_event()        [ child , 1 ]
 *
 * While it appears there is an obvious deadlock here -- the parent and child
 * nesting levels are inverted between the two. This is in fact safe because
 * life-time rules separate them. That is an exiting task cannot fork, and a
 * spawning task cannot (yet) exit.
 *
 * But remember that these are parent<->child context relations, and
 * migration does not affect children, therefore these two orderings should not
 * interact.
 *
 * The change in perf_event::ctx does not affect children (as claimed above)
 * because the sys_perf_event_open() case will install a new event and break
 * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
 * concerned with cpuctx and that doesn't have children.
 *
 * The places that change perf_event::ctx will issue:
 *
 *   perf_remove_from_context();
 *   synchronize_rcu();
 *   perf_install_in_context();
 *
 * to affect the change. The remove_from_context() + synchronize_rcu() should
 * quiesce the event, after which we can install it in the new location. This
 * means that only external vectors (perf_fops, prctl) can perturb the event
 * while in transit. Therefore all such accessors should also acquire
 * perf_event_context::mutex to serialize against this.
 *
 * However; because event->ctx can change while we're waiting to acquire
 * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
 * function.
 *
 * Lock order:
 *    exec_update_lock
 *        task_struct::perf_event_mutex
 *          perf_event_context::mutex
 *            perf_event::child_mutex;
 *              perf_event_context::lock
 *            perf_event::mmap_mutex
 *            mmap_lock
 *              perf_addr_filters_head::lock
 *
 *    cpu_hotplug_lock
 *      pmus_lock
 *          cpuctx->mutex / perf_event_context::mutex
 */
static struct perf_event_context *
perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
{
        struct perf_event_context *ctx;

again:
        rcu_read_lock();
        ctx = READ_ONCE(event->ctx);
        if (!refcount_inc_not_zero(&ctx->refcount)) {
                rcu_read_unlock();
                goto again;
        }
        rcu_read_unlock();

        mutex_lock_nested(&ctx->mutex, nesting);
        if (event->ctx != ctx) {
                mutex_unlock(&ctx->mutex);
                put_ctx(ctx);
                goto again;
        }

        return ctx;
}

static inline struct perf_event_context *
perf_event_ctx_lock(struct perf_event *event)
{
        return perf_event_ctx_lock_nested(event, 0);
}

static void perf_event_ctx_unlock(struct perf_event *event,
                                  struct perf_event_context *ctx)
{
        mutex_unlock(&ctx->mutex);
        put_ctx(ctx);
}

/*
 * This must be done under the ctx->lock, such as to serialize against
 * context_equiv(), therefore we cannot call put_ctx() since that might end up
 * calling scheduler related locks and ctx->lock nests inside those.
 */
static __must_check struct perf_event_context *
unclone_ctx(struct perf_event_context *ctx)
{
        struct perf_event_context *parent_ctx = ctx->parent_ctx;

        lockdep_assert_held(&ctx->lock);

        if (parent_ctx)
                ctx->parent_ctx = NULL;
        ctx->generation++;

        return parent_ctx;
}

static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
                                enum pid_type type)
{
        u32 nr;
        /*
         * only top level events have the pid namespace they were created in
         */
        if (event->parent)
                event = event->parent;

        nr = __task_pid_nr_ns(p, type, event->ns);
        /* avoid -1 if it is idle thread or runs in another ns */
        if (!nr && !pid_alive(p))
                nr = -1;
        return nr;
}

static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
{
        return perf_event_pid_type(event, p, PIDTYPE_TGID);
}

static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
{
        return perf_event_pid_type(event, p, PIDTYPE_PID);
}

/*
 * If we inherit events we want to return the parent event id
 * to userspace.
 */
static u64 primary_event_id(struct perf_event *event)
{
        u64 id = event->id;

        if (event->parent)
                id = event->parent->id;

        return id;
}

/*
 * Get the perf_event_context for a task and lock it.
 *
 * This has to cope with the fact that until it is locked,
 * the context could get moved to another task.
 */
static struct perf_event_context *
perf_lock_task_context(struct task_struct *task, unsigned long *flags)
{
        struct perf_event_context *ctx;

retry:
        /*
         * One of the few rules of preemptible RCU is that one cannot do
         * rcu_read_unlock() while holding a scheduler (or nested) lock when
         * part of the read side critical section was irqs-enabled -- see
         * rcu_read_unlock_special().
         *
         * Since ctx->lock nests under rq->lock we must ensure the entire read
         * side critical section has interrupts disabled.
         */
        local_irq_save(*flags);
        rcu_read_lock();
        ctx = rcu_dereference(task->perf_event_ctxp);
        if (ctx) {
                /*
                 * If this context is a clone of another, it might
                 * get swapped for another underneath us by
                 * perf_event_task_sched_out, though the
                 * rcu_read_lock() protects us from any context
                 * getting freed.  Lock the context and check if it
                 * got swapped before we could get the lock, and retry
                 * if so.  If we locked the right context, then it
                 * can't get swapped on us any more.
                 */
                raw_spin_lock(&ctx->lock);
                if (ctx != rcu_dereference(task->perf_event_ctxp)) {
                        raw_spin_unlock(&ctx->lock);
                        rcu_read_unlock();
                        local_irq_restore(*flags);
                        goto retry;
                }

                if (ctx->task == TASK_TOMBSTONE ||
                    !refcount_inc_not_zero(&ctx->refcount)) {
                        raw_spin_unlock(&ctx->lock);
                        ctx = NULL;
                } else {
                        WARN_ON_ONCE(ctx->task != task);
                }
        }
        rcu_read_unlock();
        if (!ctx)
                local_irq_restore(*flags);
        return ctx;
}

/*
 * Get the context for a task and increment its pin_count so it
 * can't get swapped to another task.  This also increments its
 * reference count so that the context can't get freed.
 */
static struct perf_event_context *
perf_pin_task_context(struct task_struct *task)
{
        struct perf_event_context *ctx;
        unsigned long flags;

        ctx = perf_lock_task_context(task, &flags);
        if (ctx) {
                ++ctx->pin_count;
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
        return ctx;
}

static void perf_unpin_context(struct perf_event_context *ctx)
{
        unsigned long flags;

        raw_spin_lock_irqsave(&ctx->lock, flags);
        --ctx->pin_count;
        raw_spin_unlock_irqrestore(&ctx->lock, flags);
}

/*
 * Update the record of the current time in a context.
 */
static void __update_context_time(struct perf_event_context *ctx, bool adv)
{
        u64 now = perf_clock();

        lockdep_assert_held(&ctx->lock);

        if (adv)
                ctx->time += now - ctx->timestamp;
        ctx->timestamp = now;

        /*
         * The above: time' = time + (now - timestamp), can be re-arranged
         * into: time` = now + (time - timestamp), which gives a single value
         * offset to compute future time without locks on.
         *
         * See perf_event_time_now(), which can be used from NMI context where
         * it's (obviously) not possible to acquire ctx->lock in order to read
         * both the above values in a consistent manner.
         */
        WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
}

static void update_context_time(struct perf_event_context *ctx)
{
        __update_context_time(ctx, true);
}

static u64 perf_event_time(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;

        if (unlikely(!ctx))
                return 0;

        if (is_cgroup_event(event))
                return perf_cgroup_event_time(event);

        return ctx->time;
}

static u64 perf_event_time_now(struct perf_event *event, u64 now)
{
        struct perf_event_context *ctx = event->ctx;

        if (unlikely(!ctx))
                return 0;

        if (is_cgroup_event(event))
                return perf_cgroup_event_time_now(event, now);

        if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
                return ctx->time;

        now += READ_ONCE(ctx->timeoffset);
        return now;
}

static enum event_type_t get_event_type(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;
        enum event_type_t event_type;

        lockdep_assert_held(&ctx->lock);

        /*
         * It's 'group type', really, because if our group leader is
         * pinned, so are we.
         */
        if (event->group_leader != event)
                event = event->group_leader;

        event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
        if (!ctx->task)
                event_type |= EVENT_CPU;

        return event_type;
}

/*
 * Helper function to initialize event group nodes.
 */
static void init_event_group(struct perf_event *event)
{
        RB_CLEAR_NODE(&event->group_node);
        event->group_index = 0;
}

/*
 * Extract pinned or flexible groups from the context
 * based on event attrs bits.
 */
static struct perf_event_groups *
get_event_groups(struct perf_event *event, struct perf_event_context *ctx)
{
        if (event->attr.pinned)
                return &ctx->pinned_groups;
        else
                return &ctx->flexible_groups;
}

/*
 * Helper function to initializes perf_event_group trees.
 */
static void perf_event_groups_init(struct perf_event_groups *groups)
{
        groups->tree = RB_ROOT;
        groups->index = 0;
}

static inline struct cgroup *event_cgroup(const struct perf_event *event)
{
        struct cgroup *cgroup = NULL;

#ifdef CONFIG_CGROUP_PERF
        if (event->cgrp)
                cgroup = event->cgrp->css.cgroup;
#endif

        return cgroup;
}

/*
 * Compare function for event groups;
 *
 * Implements complex key that first sorts by CPU and then by virtual index
 * which provides ordering when rotating groups for the same CPU.
 */
static __always_inline int
perf_event_groups_cmp(const int left_cpu, const struct pmu *left_pmu,
                      const struct cgroup *left_cgroup, const u64 left_group_index,
                      const struct perf_event *right)
{
        if (left_cpu < right->cpu)
                return -1;
        if (left_cpu > right->cpu)
                return 1;

        if (left_pmu) {
                if (left_pmu < right->pmu_ctx->pmu)
                        return -1;
                if (left_pmu > right->pmu_ctx->pmu)
                        return 1;
        }

#ifdef CONFIG_CGROUP_PERF
        {
                const struct cgroup *right_cgroup = event_cgroup(right);

                if (left_cgroup != right_cgroup) {
                        if (!left_cgroup) {
                                /*
                                 * Left has no cgroup but right does, no
                                 * cgroups come first.
                                 */
                                return -1;
                        }
                        if (!right_cgroup) {
                                /*
                                 * Right has no cgroup but left does, no
                                 * cgroups come first.
                                 */
                                return 1;
                        }
                        /* Two dissimilar cgroups, order by id. */
                        if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup))
                                return -1;

                        return 1;
                }
        }
#endif

        if (left_group_index < right->group_index)
                return -1;
        if (left_group_index > right->group_index)
                return 1;

        return 0;
}

#define __node_2_pe(node) \
        rb_entry((node), struct perf_event, group_node)

static inline bool __group_less(struct rb_node *a, const struct rb_node *b)
{
        struct perf_event *e = __node_2_pe(a);
        return perf_event_groups_cmp(e->cpu, e->pmu_ctx->pmu, event_cgroup(e),
                                     e->group_index, __node_2_pe(b)) < 0;
}

struct __group_key {
        int cpu;
        struct pmu *pmu;
        struct cgroup *cgroup;
};

static inline int __group_cmp(const void *key, const struct rb_node *node)
{
        const struct __group_key *a = key;
        const struct perf_event *b = __node_2_pe(node);

        /* partial/subtree match: @cpu, @pmu, @cgroup; ignore: @group_index */
        return perf_event_groups_cmp(a->cpu, a->pmu, a->cgroup, b->group_index, b);
}

static inline int
__group_cmp_ignore_cgroup(const void *key, const struct rb_node *node)
{
        const struct __group_key *a = key;
        const struct perf_event *b = __node_2_pe(node);

        /* partial/subtree match: @cpu, @pmu, ignore: @cgroup, @group_index */
        return perf_event_groups_cmp(a->cpu, a->pmu, event_cgroup(b),
                                     b->group_index, b);
}

/*
 * Insert @event into @groups' tree; using
 *   {@event->cpu, @event->pmu_ctx->pmu, event_cgroup(@event), ++@groups->index}
 * as key. This places it last inside the {cpu,pmu,cgroup} subtree.
 */
static void
perf_event_groups_insert(struct perf_event_groups *groups,
                         struct perf_event *event)
{
        event->group_index = ++groups->index;

        rb_add(&event->group_node, &groups->tree, __group_less);
}

/*
 * Helper function to insert event into the pinned or flexible groups.
 */
static void
add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_event_groups *groups;

        groups = get_event_groups(event, ctx);
        perf_event_groups_insert(groups, event);
}

/*
 * Delete a group from a tree.
 */
static void
perf_event_groups_delete(struct perf_event_groups *groups,
                         struct perf_event *event)
{
        WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) ||
                     RB_EMPTY_ROOT(&groups->tree));

        rb_erase(&event->group_node, &groups->tree);
        init_event_group(event);
}

/*
 * Helper function to delete event from its groups.
 */
static void
del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_event_groups *groups;

        groups = get_event_groups(event, ctx);
        perf_event_groups_delete(groups, event);
}

/*
 * Get the leftmost event in the {cpu,pmu,cgroup} subtree.
 */
static struct perf_event *
perf_event_groups_first(struct perf_event_groups *groups, int cpu,
                        struct pmu *pmu, struct cgroup *cgrp)
{
        struct __group_key key = {
                .cpu = cpu,
                .pmu = pmu,
                .cgroup = cgrp,
        };
        struct rb_node *node;

        node = rb_find_first(&key, &groups->tree, __group_cmp);
        if (node)
                return __node_2_pe(node);

        return NULL;
}

static struct perf_event *
perf_event_groups_next(struct perf_event *event, struct pmu *pmu)
{
        struct __group_key key = {
                .cpu = event->cpu,
                .pmu = pmu,
                .cgroup = event_cgroup(event),
        };
        struct rb_node *next;

        next = rb_next_match(&key, &event->group_node, __group_cmp);
        if (next)
                return __node_2_pe(next);

        return NULL;
}

#define perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu)                \
        for (event = perf_event_groups_first(groups, cpu, pmu, NULL);        \
             event; event = perf_event_groups_next(event, pmu))

/*
 * Iterate through the whole groups tree.
 */
#define perf_event_groups_for_each(event, groups)                        \
        for (event = rb_entry_safe(rb_first(&((groups)->tree)),                \
                                typeof(*event), group_node); event;        \
                event = rb_entry_safe(rb_next(&event->group_node),        \
                                typeof(*event), group_node))

/*
 * Add an event from the lists for its context.
 * Must be called with ctx->mutex and ctx->lock held.
 */
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
        lockdep_assert_held(&ctx->lock);

        WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
        event->attach_state |= PERF_ATTACH_CONTEXT;

        event->tstamp = perf_event_time(event);

        /*
         * If we're a stand alone event or group leader, we go to the context
         * list, group events are kept attached to the group so that
         * perf_group_detach can, at all times, locate all siblings.
         */
        if (event->group_leader == event) {
                event->group_caps = event->event_caps;
                add_event_to_groups(event, ctx);
        }

        list_add_rcu(&event->event_entry, &ctx->event_list);
        ctx->nr_events++;
        if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)
                ctx->nr_user++;
        if (event->attr.inherit_stat)
                ctx->nr_stat++;

        if (event->state > PERF_EVENT_STATE_OFF)
                perf_cgroup_event_enable(event, ctx);

        ctx->generation++;
        event->pmu_ctx->nr_events++;
}

/*
 * Initialize event state based on the perf_event_attr::disabled.
 */
static inline void perf_event__state_init(struct perf_event *event)
{
        event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
                                              PERF_EVENT_STATE_INACTIVE;
}

static int __perf_event_read_size(u64 read_format, int nr_siblings)
{
        int entry = sizeof(u64); /* value */
        int size = 0;
        int nr = 1;

        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                size += sizeof(u64);

        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
                size += sizeof(u64);

        if (read_format & PERF_FORMAT_ID)
                entry += sizeof(u64);

        if (read_format & PERF_FORMAT_LOST)
                entry += sizeof(u64);

        if (read_format & PERF_FORMAT_GROUP) {
                nr += nr_siblings;
                size += sizeof(u64);
        }

        /*
         * Since perf_event_validate_size() limits this to 16k and inhibits
         * adding more siblings, this will never overflow.
         */
        return size + nr * entry;
}

static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
{
        struct perf_sample_data *data;
        u16 size = 0;

        if (sample_type & PERF_SAMPLE_IP)
                size += sizeof(data->ip);

        if (sample_type & PERF_SAMPLE_ADDR)
                size += sizeof(data->addr);

        if (sample_type & PERF_SAMPLE_PERIOD)
                size += sizeof(data->period);

        if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
                size += sizeof(data->weight.full);

        if (sample_type & PERF_SAMPLE_READ)
                size += event->read_size;

        if (sample_type & PERF_SAMPLE_DATA_SRC)
                size += sizeof(data->data_src.val);

        if (sample_type & PERF_SAMPLE_TRANSACTION)
                size += sizeof(data->txn);

        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                size += sizeof(data->phys_addr);

        if (sample_type & PERF_SAMPLE_CGROUP)
                size += sizeof(data->cgroup);

        if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
                size += sizeof(data->data_page_size);

        if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
                size += sizeof(data->code_page_size);

        event->header_size = size;
}

/*
 * Called at perf_event creation and when events are attached/detached from a
 * group.
 */
static void perf_event__header_size(struct perf_event *event)
{
        event->read_size =
                __perf_event_read_size(event->attr.read_format,
                                       event->group_leader->nr_siblings);
        __perf_event_header_size(event, event->attr.sample_type);
}

static void perf_event__id_header_size(struct perf_event *event)
{
        struct perf_sample_data *data;
        u64 sample_type = event->attr.sample_type;
        u16 size = 0;

        if (sample_type & PERF_SAMPLE_TID)
                size += sizeof(data->tid_entry);

        if (sample_type & PERF_SAMPLE_TIME)
                size += sizeof(data->time);

        if (sample_type & PERF_SAMPLE_IDENTIFIER)
                size += sizeof(data->id);

        if (sample_type & PERF_SAMPLE_ID)
                size += sizeof(data->id);

        if (sample_type & PERF_SAMPLE_STREAM_ID)
                size += sizeof(data->stream_id);

        if (sample_type & PERF_SAMPLE_CPU)
                size += sizeof(data->cpu_entry);

        event->id_header_size = size;
}

/*
 * Check that adding an event to the group does not result in anybody
 * overflowing the 64k event limit imposed by the output buffer.
 *
 * Specifically, check that the read_size for the event does not exceed 16k,
 * read_size being the one term that grows with groups size. Since read_size
 * depends on per-event read_format, also (re)check the existing events.
 *
 * This leaves 48k for the constant size fields and things like callchains,
 * branch stacks and register sets.
 */
static bool perf_event_validate_size(struct perf_event *event)
{
        struct perf_event *sibling, *group_leader = event->group_leader;

        if (__perf_event_read_size(event->attr.read_format,
                                   group_leader->nr_siblings + 1) > 16*1024)
                return false;

        if (__perf_event_read_size(group_leader->attr.read_format,
                                   group_leader->nr_siblings + 1) > 16*1024)
                return false;

        /*
         * When creating a new group leader, group_leader->ctx is initialized
         * after the size has been validated, but we cannot safely use
         * for_each_sibling_event() until group_leader->ctx is set. A new group
         * leader cannot have any siblings yet, so we can safely skip checking
         * the non-existent siblings.
         */
        if (event == group_leader)
                return true;

        for_each_sibling_event(sibling, group_leader) {
                if (__perf_event_read_size(sibling->attr.read_format,
                                           group_leader->nr_siblings + 1) > 16*1024)
                        return false;
        }

        return true;
}

static void perf_group_attach(struct perf_event *event)
{
        struct perf_event *group_leader = event->group_leader, *pos;

        lockdep_assert_held(&event->ctx->lock);

        /*
         * We can have double attach due to group movement (move_group) in
         * perf_event_open().
         */
        if (event->attach_state & PERF_ATTACH_GROUP)
                return;

        event->attach_state |= PERF_ATTACH_GROUP;

        if (group_leader == event)
                return;

        WARN_ON_ONCE(group_leader->ctx != event->ctx);

        group_leader->group_caps &= event->event_caps;

        list_add_tail(&event->sibling_list, &group_leader->sibling_list);
        group_leader->nr_siblings++;
        group_leader->group_generation++;

        perf_event__header_size(group_leader);

        for_each_sibling_event(pos, group_leader)
                perf_event__header_size(pos);
}

/*
 * Remove an event from the lists for its context.
 * Must be called with ctx->mutex and ctx->lock held.
 */
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
        WARN_ON_ONCE(event->ctx != ctx);
        lockdep_assert_held(&ctx->lock);

        /*
         * We can have double detach due to exit/hot-unplug + close.
         */
        if (!(event->attach_state & PERF_ATTACH_CONTEXT))
                return;

        event->attach_state &= ~PERF_ATTACH_CONTEXT;

        ctx->nr_events--;
        if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)
                ctx->nr_user--;
        if (event->attr.inherit_stat)
                ctx->nr_stat--;

        list_del_rcu(&event->event_entry);

        if (event->group_leader == event)
                del_event_from_groups(event, ctx);

        /*
         * If event was in error state, then keep it
         * that way, otherwise bogus counts will be
         * returned on read(). The only way to get out
         * of error state is by explicit re-enabling
         * of the event
         */
        if (event->state > PERF_EVENT_STATE_OFF) {
                perf_cgroup_event_disable(event, ctx);
                perf_event_set_state(event, PERF_EVENT_STATE_OFF);
        }

        ctx->generation++;
        event->pmu_ctx->nr_events--;
}

static int
perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
{
        if (!has_aux(aux_event))
                return 0;

        if (!event->pmu->aux_output_match)
                return 0;

        return event->pmu->aux_output_match(aux_event);
}

static void put_event(struct perf_event *event);
static void event_sched_out(struct perf_event *event,
                            struct perf_event_context *ctx);

static void perf_put_aux_event(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;
        struct perf_event *iter;

        /*
         * If event uses aux_event tear down the link
         */
        if (event->aux_event) {
                iter = event->aux_event;
                event->aux_event = NULL;
                put_event(iter);
                return;
        }

        /*
         * If the event is an aux_event, tear down all links to
         * it from other events.
         */
        for_each_sibling_event(iter, event->group_leader) {
                if (iter->aux_event != event)
                        continue;

                iter->aux_event = NULL;
                put_event(event);

                /*
                 * If it's ACTIVE, schedule it out and put it into ERROR
                 * state so that we don't try to schedule it again. Note
                 * that perf_event_enable() will clear the ERROR status.
                 */
                event_sched_out(iter, ctx);
                perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
        }
}

static bool perf_need_aux_event(struct perf_event *event)
{
        return !!event->attr.aux_output || !!event->attr.aux_sample_size;
}

static int perf_get_aux_event(struct perf_event *event,
                              struct perf_event *group_leader)
{
        /*
         * Our group leader must be an aux event if we want to be
         * an aux_output. This way, the aux event will precede its
         * aux_output events in the group, and therefore will always
         * schedule first.
         */
        if (!group_leader)
                return 0;

        /*
         * aux_output and aux_sample_size are mutually exclusive.
         */
        if (event->attr.aux_output && event->attr.aux_sample_size)
                return 0;

        if (event->attr.aux_output &&
            !perf_aux_output_match(event, group_leader))
                return 0;

        if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
                return 0;

        if (!atomic_long_inc_not_zero(&group_leader->refcount))
                return 0;

        /*
         * Link aux_outputs to their aux event; this is undone in
         * perf_group_detach() by perf_put_aux_event(). When the
         * group in torn down, the aux_output events loose their
         * link to the aux_event and can't schedule any more.
         */
        event->aux_event = group_leader;

        return 1;
}

static inline struct list_head *get_event_list(struct perf_event *event)
{
        return event->attr.pinned ? &event->pmu_ctx->pinned_active :
                                    &event->pmu_ctx->flexible_active;
}

/*
 * Events that have PERF_EV_CAP_SIBLING require being part of a group and
 * cannot exist on their own, schedule them out and move them into the ERROR
 * state. Also see _perf_event_enable(), it will not be able to recover
 * this ERROR state.
 */
static inline void perf_remove_sibling_event(struct perf_event *event)
{
        event_sched_out(event, event->ctx);
        perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
}

static void perf_group_detach(struct perf_event *event)
{
        struct perf_event *leader = event->group_leader;
        struct perf_event *sibling, *tmp;
        struct perf_event_context *ctx = event->ctx;

        lockdep_assert_held(&ctx->lock);

        /*
         * We can have double detach due to exit/hot-unplug + close.
         */
        if (!(event->attach_state & PERF_ATTACH_GROUP))
                return;

        event->attach_state &= ~PERF_ATTACH_GROUP;

        perf_put_aux_event(event);

        /*
         * If this is a sibling, remove it from its group.
         */
        if (leader != event) {
                list_del_init(&event->sibling_list);
                event->group_leader->nr_siblings--;
                event->group_leader->group_generation++;
                goto out;
        }

        /*
         * If this was a group event with sibling events then
         * upgrade the siblings to singleton events by adding them
         * to whatever list we are on.
         */
        list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {

                if (sibling->event_caps & PERF_EV_CAP_SIBLING)
                        perf_remove_sibling_event(sibling);

                sibling->group_leader = sibling;
                list_del_init(&sibling->sibling_list);

                /* Inherit group flags from the previous leader */
                sibling->group_caps = event->group_caps;

                if (sibling->attach_state & PERF_ATTACH_CONTEXT) {
                        add_event_to_groups(sibling, event->ctx);

                        if (sibling->state == PERF_EVENT_STATE_ACTIVE)
                                list_add_tail(&sibling->active_list, get_event_list(sibling));
                }

                WARN_ON_ONCE(sibling->ctx != event->ctx);
        }

out:
        for_each_sibling_event(tmp, leader)
                perf_event__header_size(tmp);

        perf_event__header_size(leader);
}

static void sync_child_event(struct perf_event *child_event);

static void perf_child_detach(struct perf_event *event)
{
        struct perf_event *parent_event = event->parent;

        if (!(event->attach_state & PERF_ATTACH_CHILD))
                return;

        event->attach_state &= ~PERF_ATTACH_CHILD;

        if (WARN_ON_ONCE(!parent_event))
                return;

        lockdep_assert_held(&parent_event->child_mutex);

        sync_child_event(event);
        list_del_init(&event->child_list);
}

static bool is_orphaned_event(struct perf_event *event)
{
        return event->state == PERF_EVENT_STATE_DEAD;
}

static inline int
event_filter_match(struct perf_event *event)
{
        return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
               perf_cgroup_match(event);
}

static void
event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_event_pmu_context *epc = event->pmu_ctx;
        struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context);
        enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;

        // XXX cpc serialization, probably per-cpu IRQ disabled

        WARN_ON_ONCE(event->ctx != ctx);
        lockdep_assert_held(&ctx->lock);

        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return;

        /*
         * Asymmetry; we only schedule events _IN_ through ctx_sched_in(), but
         * we can schedule events _OUT_ individually through things like
         * __perf_remove_from_context().
         */
        list_del_init(&event->active_list);

        perf_pmu_disable(event->pmu);

        event->pmu->del(event, 0);
        event->oncpu = -1;

        if (event->pending_disable) {
                event->pending_disable = 0;
                perf_cgroup_event_disable(event, ctx);
                state = PERF_EVENT_STATE_OFF;
        }

        if (event->pending_sigtrap) {
                bool dec = true;

                event->pending_sigtrap = 0;
                if (state != PERF_EVENT_STATE_OFF &&
                    !event->pending_work) {
                        event->pending_work = 1;
                        dec = false;
                        WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
                        task_work_add(current, &event->pending_task, TWA_RESUME);
                }
                if (dec)
                        local_dec(&event->ctx->nr_pending);
        }

        perf_event_set_state(event, state);

        if (!is_software_event(event))
                cpc->active_oncpu--;
        if (event->attr.freq && event->attr.sample_freq)
                ctx->nr_freq--;
        if (event->attr.exclusive || !cpc->active_oncpu)
                cpc->exclusive = 0;

        perf_pmu_enable(event->pmu);
}

static void
group_sched_out(struct perf_event *group_event, struct perf_event_context *ctx)
{
        struct perf_event *event;

        if (group_event->state != PERF_EVENT_STATE_ACTIVE)
                return;

        perf_assert_pmu_disabled(group_event->pmu_ctx->pmu);

        event_sched_out(group_event, ctx);

        /*
         * Schedule out siblings (if any):
         */
        for_each_sibling_event(event, group_event)
                event_sched_out(event, ctx);
}

#define DETACH_GROUP        0x01UL
#define DETACH_CHILD        0x02UL
#define DETACH_DEAD        0x04UL

/*
 * Cross CPU call to remove a performance event
 *
 * We disable the event on the hardware level first. After that we
 * remove it from the context list.
 */
static void
__perf_remove_from_context(struct perf_event *event,
                           struct perf_cpu_context *cpuctx,
                           struct perf_event_context *ctx,
                           void *info)
{
        struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx;
        unsigned long flags = (unsigned long)info;

        if (ctx->is_active & EVENT_TIME) {
                update_context_time(ctx);
                update_cgrp_time_from_cpuctx(cpuctx, false);
        }

        /*
         * Ensure event_sched_out() switches to OFF, at the very least
         * this avoids raising perf_pending_task() at this time.
         */
        if (flags & DETACH_DEAD)
                event->pending_disable = 1;
        event_sched_out(event, ctx);
        if (flags & DETACH_GROUP)
                perf_group_detach(event);
        if (flags & DETACH_CHILD)
                perf_child_detach(event);
        list_del_event(event, ctx);
        if (flags & DETACH_DEAD)
                event->state = PERF_EVENT_STATE_DEAD;

        if (!pmu_ctx->nr_events) {
                pmu_ctx->rotate_necessary = 0;

                if (ctx->task && ctx->is_active) {
                        struct perf_cpu_pmu_context *cpc;

                        cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context);
                        WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx);
                        cpc->task_epc = NULL;
                }
        }

        if (!ctx->nr_events && ctx->is_active) {
                if (ctx == &cpuctx->ctx)
                        update_cgrp_time_from_cpuctx(cpuctx, true);

                ctx->is_active = 0;
                if (ctx->task) {
                        WARN_ON_ONCE(cpuctx->task_ctx != ctx);
                        cpuctx->task_ctx = NULL;
                }
        }
}

/*
 * Remove the event from a task's (or a CPU's) list of events.
 *
 * If event->ctx is a cloned context, callers must make sure that
 * every task struct that event->ctx->task could possibly point to
 * remains valid.  This is OK when called from perf_release since
 * that only calls us on the top-level context, which can't be a clone.
 * When called from perf_event_exit_task, it's OK because the
 * context has been detached from its task.
 */
static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
{
        struct perf_event_context *ctx = event->ctx;

        lockdep_assert_held(&ctx->mutex);

        /*
         * Because of perf_event_exit_task(), perf_remove_from_context() ought
         * to work in the face of TASK_TOMBSTONE, unlike every other
         * event_function_call() user.
         */
        raw_spin_lock_irq(&ctx->lock);
        if (!ctx->is_active) {
                __perf_remove_from_context(event, this_cpu_ptr(&perf_cpu_context),
                                           ctx, (void *)flags);
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
        raw_spin_unlock_irq(&ctx->lock);

        event_function_call(event, __perf_remove_from_context, (void *)flags);
}

/*
 * Cross CPU call to disable a performance event
 */
static void __perf_event_disable(struct perf_event *event,
                                 struct perf_cpu_context *cpuctx,
                                 struct perf_event_context *ctx,
                                 void *info)
{
        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return;

        if (ctx->is_active & EVENT_TIME) {
                update_context_time(ctx);
                update_cgrp_time_from_event(event);
        }

        perf_pmu_disable(event->pmu_ctx->pmu);

        if (event == event->group_leader)
                group_sched_out(event, ctx);
        else
                event_sched_out(event, ctx);

        perf_event_set_state(event, PERF_EVENT_STATE_OFF);
        perf_cgroup_event_disable(event, ctx);

        perf_pmu_enable(event->pmu_ctx->pmu);
}

/*
 * Disable an event.
 *
 * If event->ctx is a cloned context, callers must make sure that
 * every task struct that event->ctx->task could possibly point to
 * remains valid.  This condition is satisfied when called through
 * perf_event_for_each_child or perf_event_for_each because they
 * hold the top-level event's child_mutex, so any descendant that
 * goes to exit will block in perf_event_exit_event().
 *
 * When called from perf_pending_irq it's OK because event->ctx
 * is the current context on this CPU and preemption is disabled,
 * hence we can't get into perf_event_task_sched_out for this context.
 */
static void _perf_event_disable(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;

        raw_spin_lock_irq(&ctx->lock);
        if (event->state <= PERF_EVENT_STATE_OFF) {
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
        raw_spin_unlock_irq(&ctx->lock);

        event_function_call(event, __perf_event_disable, NULL);
}

void perf_event_disable_local(struct perf_event *event)
{
        event_function_local(event, __perf_event_disable, NULL);
}

/*
 * Strictly speaking kernel users cannot create groups and therefore this
 * interface does not need the perf_event_ctx_lock() magic.
 */
void perf_event_disable(struct perf_event *event)
{
        struct perf_event_context *ctx;

        ctx = perf_event_ctx_lock(event);
        _perf_event_disable(event);
        perf_event_ctx_unlock(event, ctx);
}
EXPORT_SYMBOL_GPL(perf_event_disable);

void perf_event_disable_inatomic(struct perf_event *event)
{
        event->pending_disable = 1;
        irq_work_queue(&event->pending_irq);
}

#define MAX_INTERRUPTS (~0ULL)

static void perf_log_throttle(struct perf_event *event, int enable);
static void perf_log_itrace_start(struct perf_event *event);

static int
event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_event_pmu_context *epc = event->pmu_ctx;
        struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context);
        int ret = 0;

        WARN_ON_ONCE(event->ctx != ctx);

        lockdep_assert_held(&ctx->lock);

        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;

        WRITE_ONCE(event->oncpu, smp_processor_id());
        /*
         * Order event::oncpu write to happen before the ACTIVE state is
         * visible. This allows perf_event_{stop,read}() to observe the correct
         * ->oncpu if it sees ACTIVE.
         */
        smp_wmb();
        perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);

        /*
         * Unthrottle events, since we scheduled we might have missed several
         * ticks already, also for a heavily scheduling task there is little
         * guarantee it'll get a tick in a timely manner.
         */
        if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
                perf_log_throttle(event, 1);
                event->hw.interrupts = 0;
        }

        perf_pmu_disable(event->pmu);

        perf_log_itrace_start(event);

        if (event->pmu->add(event, PERF_EF_START)) {
                perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
                event->oncpu = -1;
                ret = -EAGAIN;
                goto out;
        }

        if (!is_software_event(event))
                cpc->active_oncpu++;
        if (event->attr.freq && event->attr.sample_freq)
                ctx->nr_freq++;

        if (event->attr.exclusive)
                cpc->exclusive = 1;

out:
        perf_pmu_enable(event->pmu);

        return ret;
}

static int
group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx)
{
        struct perf_event *event, *partial_group = NULL;
        struct pmu *pmu = group_event->pmu_ctx->pmu;

        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;

        pmu->start_txn(pmu, PERF_PMU_TXN_ADD);

        if (event_sched_in(group_event, ctx))
                goto error;

        /*
         * Schedule in siblings as one group (if any):
         */
        for_each_sibling_event(event, group_event) {
                if (event_sched_in(event, ctx)) {
                        partial_group = event;
                        goto group_error;
                }
        }

        if (!pmu->commit_txn(pmu))
                return 0;

group_error:
        /*
         * Groups can be scheduled in as one unit only, so undo any
         * partial group before returning:
         * The events up to the failed event are scheduled out normally.
         */
        for_each_sibling_event(event, group_event) {
                if (event == partial_group)
                        break;

                event_sched_out(event, ctx);
        }
        event_sched_out(group_event, ctx);

error:
        pmu->cancel_txn(pmu);
        return -EAGAIN;
}

/*
 * Work out whether we can put this event group on the CPU now.
 */
static int group_can_go_on(struct perf_event *event, int can_add_hw)
{
        struct perf_event_pmu_context *epc = event->pmu_ctx;
        struct perf_cpu_pmu_context *cpc = this_cpu_ptr(epc->pmu->cpu_pmu_context);

        /*
         * Groups consisting entirely of software events can always go on.
         */
        if (event->group_caps & PERF_EV_CAP_SOFTWARE)
                return 1;
        /*
         * If an exclusive group is already on, no other hardware
         * events can go on.
         */
        if (cpc->exclusive)
                return 0;
        /*
         * If this group is exclusive and there are already
         * events on the CPU, it can't go on.
         */
        if (event->attr.exclusive && !list_empty(get_event_list(event)))
                return 0;
        /*
         * Otherwise, try to add it if all previous groups were able
         * to go on.
         */
        return can_add_hw;
}

static void add_event_to_ctx(struct perf_event *event,
                               struct perf_event_context *ctx)
{
        list_add_event(event, ctx);
        perf_group_attach(event);
}

static void task_ctx_sched_out(struct perf_event_context *ctx,
                                enum event_type_t event_type)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);

        if (!cpuctx->task_ctx)
                return;

        if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
                return;

        ctx_sched_out(ctx, event_type);
}

static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
                                struct perf_event_context *ctx)
{
        ctx_sched_in(&cpuctx->ctx, EVENT_PINNED);
        if (ctx)
                 ctx_sched_in(ctx, EVENT_PINNED);
        ctx_sched_in(&cpuctx->ctx, EVENT_FLEXIBLE);
        if (ctx)
                 ctx_sched_in(ctx, EVENT_FLEXIBLE);
}

/*
 * We want to maintain the following priority of scheduling:
 *  - CPU pinned (EVENT_CPU | EVENT_PINNED)
 *  - task pinned (EVENT_PINNED)
 *  - CPU flexible (EVENT_CPU | EVENT_FLEXIBLE)
 *  - task flexible (EVENT_FLEXIBLE).
 *
 * In order to avoid unscheduling and scheduling back in everything every
 * time an event is added, only do it for the groups of equal priority and
 * below.
 *
 * This can be called after a batch operation on task events, in which case
 * event_type is a bit mask of the types of events involved. For CPU events,
 * event_type is only either EVENT_PINNED or EVENT_FLEXIBLE.
 */
/*
 * XXX: ctx_resched() reschedule entire perf_event_context while adding new
 * event to the context or enabling existing event in the context. We can
 * probably optimize it by rescheduling only affected pmu_ctx.
 */
static void ctx_resched(struct perf_cpu_context *cpuctx,
                        struct perf_event_context *task_ctx,
                        enum event_type_t event_type)
{
        bool cpu_event = !!(event_type & EVENT_CPU);

        /*
         * If pinned groups are involved, flexible groups also need to be
         * scheduled out.
         */
        if (event_type & EVENT_PINNED)
                event_type |= EVENT_FLEXIBLE;

        event_type &= EVENT_ALL;

        perf_ctx_disable(&cpuctx->ctx, false);
        if (task_ctx) {
                perf_ctx_disable(task_ctx, false);
                task_ctx_sched_out(task_ctx, event_type);
        }

        /*
         * Decide which cpu ctx groups to schedule out based on the types
         * of events that caused rescheduling:
         *  - EVENT_CPU: schedule out corresponding groups;
         *  - EVENT_PINNED task events: schedule out EVENT_FLEXIBLE groups;
         *  - otherwise, do nothing more.
         */
        if (cpu_event)
                ctx_sched_out(&cpuctx->ctx, event_type);
        else if (event_type & EVENT_PINNED)
                ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);

        perf_event_sched_in(cpuctx, task_ctx);

        perf_ctx_enable(&cpuctx->ctx, false);
        if (task_ctx)
                perf_ctx_enable(task_ctx, false);
}

void perf_pmu_resched(struct pmu *pmu)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;

        perf_ctx_lock(cpuctx, task_ctx);
        ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
        perf_ctx_unlock(cpuctx, task_ctx);
}

/*
 * Cross CPU call to install and enable a performance event
 *
 * Very similar to remote_function() + event_function() but cannot assume that
 * things like ctx->is_active and cpuctx->task_ctx are set.
 */
static int  __perf_install_in_context(void *info)
{
        struct perf_event *event = info;
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *task_ctx = cpuctx->task_ctx;
        bool reprogram = true;
        int ret = 0;

        raw_spin_lock(&cpuctx->ctx.lock);
        if (ctx->task) {
                raw_spin_lock(&ctx->lock);
                task_ctx = ctx;

                reprogram = (ctx->task == current);

                /*
                 * If the task is running, it must be running on this CPU,
                 * otherwise we cannot reprogram things.
                 *
                 * If its not running, we don't care, ctx->lock will
                 * serialize against it becoming runnable.
                 */
                if (task_curr(ctx->task) && !reprogram) {
                        ret = -ESRCH;
                        goto unlock;
                }

                WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
        } else if (task_ctx) {
                raw_spin_lock(&task_ctx->lock);
        }

#ifdef CONFIG_CGROUP_PERF
        if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
                /*
                 * If the current cgroup doesn't match the event's
                 * cgroup, we should not try to schedule it.
                 */
                struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
                reprogram = cgroup_is_descendant(cgrp->css.cgroup,
                                        event->cgrp->css.cgroup);
        }
#endif

        if (reprogram) {
                ctx_sched_out(ctx, EVENT_TIME);
                add_event_to_ctx(event, ctx);
                ctx_resched(cpuctx, task_ctx, get_event_type(event));
        } else {
                add_event_to_ctx(event, ctx);
        }

unlock:
        perf_ctx_unlock(cpuctx, task_ctx);

        return ret;
}

static bool exclusive_event_installable(struct perf_event *event,
                                        struct perf_event_context *ctx);

/*
 * Attach a performance event to a context.
 *
 * Very similar to event_function_call, see comment there.
 */
static void
perf_install_in_context(struct perf_event_context *ctx,
                        struct perf_event *event,
                        int cpu)
{
        struct task_struct *task = READ_ONCE(ctx->task);

        lockdep_assert_held(&ctx->mutex);

        WARN_ON_ONCE(!exclusive_event_installable(event, ctx));

        if (event->cpu != -1)
                WARN_ON_ONCE(event->cpu != cpu);

        /*
         * Ensures that if we can observe event->ctx, both the event and ctx
         * will be 'complete'. See perf_iterate_sb_cpu().
         */
        smp_store_release(&event->ctx, ctx);

        /*
         * perf_event_attr::disabled events will not run and can be initialized
         * without IPI. Except when this is the first event for the context, in
         * that case we need the magic of the IPI to set ctx->is_active.
         *
         * The IOC_ENABLE that is sure to follow the creation of a disabled
         * event will issue the IPI and reprogram the hardware.
         */
        if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF &&
            ctx->nr_events && !is_cgroup_event(event)) {
                raw_spin_lock_irq(&ctx->lock);
                if (ctx->task == TASK_TOMBSTONE) {
                        raw_spin_unlock_irq(&ctx->lock);
                        return;
                }
                add_event_to_ctx(event, ctx);
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }

        if (!task) {
                cpu_function_call(cpu, __perf_install_in_context, event);
                return;
        }

        /*
         * Should not happen, we validate the ctx is still alive before calling.
         */
        if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
                return;

        /*
         * Installing events is tricky because we cannot rely on ctx->is_active
         * to be set in case this is the nr_events 0 -> 1 transition.
         *
         * Instead we use task_curr(), which tells us if the task is running.
         * However, since we use task_curr() outside of rq::lock, we can race
         * against the actual state. This means the result can be wrong.
         *
         * If we get a false positive, we retry, this is harmless.
         *
         * If we get a false negative, things are complicated. If we are after
         * perf_event_context_sched_in() ctx::lock will serialize us, and the
         * value must be correct. If we're before, it doesn't matter since
         * perf_event_context_sched_in() will program the counter.
         *
         * However, this hinges on the remote context switch having observed
         * our task->perf_event_ctxp[] store, such that it will in fact take
         * ctx::lock in perf_event_context_sched_in().
         *
         * We do this by task_function_call(), if the IPI fails to hit the task
         * we know any future context switch of task must see the
         * perf_event_ctpx[] store.
         */

        /*
         * This smp_mb() orders the task->perf_event_ctxp[] store with the
         * task_cpu() load, such that if the IPI then does not find the task
         * running, a future context switch of that task must observe the
         * store.
         */
        smp_mb();
again:
        if (!task_function_call(task, __perf_install_in_context, event))
                return;

        raw_spin_lock_irq(&ctx->lock);
        task = ctx->task;
        if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
                /*
                 * Cannot happen because we already checked above (which also
                 * cannot happen), and we hold ctx->mutex, which serializes us
                 * against perf_event_exit_task_context().
                 */
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }
        /*
         * If the task is not running, ctx->lock will avoid it becoming so,
         * thus we can safely install the event.
         */
        if (task_curr(task)) {
                raw_spin_unlock_irq(&ctx->lock);
                goto again;
        }
        add_event_to_ctx(event, ctx);
        raw_spin_unlock_irq(&ctx->lock);
}

/*
 * Cross CPU call to enable a performance event
 */
static void __perf_event_enable(struct perf_event *event,
                                struct perf_cpu_context *cpuctx,
                                struct perf_event_context *ctx,
                                void *info)
{
        struct perf_event *leader = event->group_leader;
        struct perf_event_context *task_ctx;

        if (event->state >= PERF_EVENT_STATE_INACTIVE ||
            event->state <= PERF_EVENT_STATE_ERROR)
                return;

        if (ctx->is_active)
                ctx_sched_out(ctx, EVENT_TIME);

        perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
        perf_cgroup_event_enable(event, ctx);

        if (!ctx->is_active)
                return;

        if (!event_filter_match(event)) {
                ctx_sched_in(ctx, EVENT_TIME);
                return;
        }

        /*
         * If the event is in a group and isn't the group leader,
         * then don't put it on unless the group is on.
         */
        if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
                ctx_sched_in(ctx, EVENT_TIME);
                return;
        }

        task_ctx = cpuctx->task_ctx;
        if (ctx->task)
                WARN_ON_ONCE(task_ctx != ctx);

        ctx_resched(cpuctx, task_ctx, get_event_type(event));
}

/*
 * Enable an event.
 *
 * If event->ctx is a cloned context, callers must make sure that
 * every task struct that event->ctx->task could possibly point to
 * remains valid.  This condition is satisfied when called through
 * perf_event_for_each_child or perf_event_for_each as described
 * for perf_event_disable.
 */
static void _perf_event_enable(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;

        raw_spin_lock_irq(&ctx->lock);
        if (event->state >= PERF_EVENT_STATE_INACTIVE ||
            event->state <  PERF_EVENT_STATE_ERROR) {
out:
                raw_spin_unlock_irq(&ctx->lock);
                return;
        }

        /*
         * If the event is in error state, clear that first.
         *
         * That way, if we see the event in error state below, we know that it
         * has gone back into error state, as distinct from the task having
         * been scheduled away before the cross-call arrived.
         */
        if (event->state == PERF_EVENT_STATE_ERROR) {
                /*
                 * Detached SIBLING events cannot leave ERROR state.
                 */
                if (event->event_caps & PERF_EV_CAP_SIBLING &&
                    event->group_leader == event)
                        goto out;

                event->state = PERF_EVENT_STATE_OFF;
        }
        raw_spin_unlock_irq(&ctx->lock);

        event_function_call(event, __perf_event_enable, NULL);
}

/*
 * See perf_event_disable();
 */
void perf_event_enable(struct perf_event *event)
{
        struct perf_event_context *ctx;

        ctx = perf_event_ctx_lock(event);
        _perf_event_enable(event);
        perf_event_ctx_unlock(event, ctx);
}
EXPORT_SYMBOL_GPL(perf_event_enable);

struct stop_event_data {
        struct perf_event        *event;
        unsigned int                restart;
};

static int __perf_event_stop(void *info)
{
        struct stop_event_data *sd = info;
        struct perf_event *event = sd->event;

        /* if it's already INACTIVE, do nothing */
        if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
                return 0;

        /* matches smp_wmb() in event_sched_in() */
        smp_rmb();

        /*
         * There is a window with interrupts enabled before we get here,
         * so we need to check again lest we try to stop another CPU's event.
         */
        if (READ_ONCE(event->oncpu) != smp_processor_id())
                return -EAGAIN;

        event->pmu->stop(event, PERF_EF_UPDATE);

        /*
         * May race with the actual stop (through perf_pmu_output_stop()),
         * but it is only used for events with AUX ring buffer, and such
         * events will refuse to restart because of rb::aux_mmap_count==0,
         * see comments in perf_aux_output_begin().
         *
         * Since this is happening on an event-local CPU, no trace is lost
         * while restarting.
         */
        if (sd->restart)
                event->pmu->start(event, 0);

        return 0;
}

static int perf_event_stop(struct perf_event *event, int restart)
{
        struct stop_event_data sd = {
                .event                = event,
                .restart        = restart,
        };
        int ret = 0;

        do {
                if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
                        return 0;

                /* matches smp_wmb() in event_sched_in() */
                smp_rmb();

                /*
                 * We only want to restart ACTIVE events, so if the event goes
                 * inactive here (event->oncpu==-1), there's nothing more to do;
                 * fall through with ret==-ENXIO.
                 */
                ret = cpu_function_call(READ_ONCE(event->oncpu),
                                        __perf_event_stop, &sd);
        } while (ret == -EAGAIN);

        return ret;
}

/*
 * In order to contain the amount of racy and tricky in the address filter
 * configuration management, it is a two part process:
 *
 * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
 *      we update the addresses of corresponding vmas in
 *        event::addr_filter_ranges array and bump the event::addr_filters_gen;
 * (p2) when an event is scheduled in (pmu::add), it calls
 *      perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
 *      if the generation has changed since the previous call.
 *
 * If (p1) happens while the event is active, we restart it to force (p2).
 *
 * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
 *     pre-existing mappings, called once when new filters arrive via SET_FILTER
 *     ioctl;
 * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
 *     registered mapping, called for every new mmap(), with mm::mmap_lock down
 *     for reading;
 * (3) perf_event_addr_filters_exec(): clearing filters' offsets in the process
 *     of exec.
 */
void perf_event_addr_filters_sync(struct perf_event *event)
{
        struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);

        if (!has_addr_filter(event))
                return;

        raw_spin_lock(&ifh->lock);
        if (event->addr_filters_gen != event->hw.addr_filters_gen) {
                event->pmu->addr_filters_sync(event);
                event->hw.addr_filters_gen = event->addr_filters_gen;
        }
        raw_spin_unlock(&ifh->lock);
}
EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);

static int _perf_event_refresh(struct perf_event *event, int refresh)
{
        /*
         * not supported on inherited events
         */
        if (event->attr.inherit || !is_sampling_event(event))
                return -EINVAL;

        atomic_add(refresh, &event->event_limit);
        _perf_event_enable(event);

        return 0;
}

/*
 * See perf_event_disable()
 */
int perf_event_refresh(struct perf_event *event, int refresh)
{
        struct perf_event_context *ctx;
        int ret;

        ctx = perf_event_ctx_lock(event);
        ret = _perf_event_refresh(event, refresh);
        perf_event_ctx_unlock(event, ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(perf_event_refresh);

static int perf_event_modify_breakpoint(struct perf_event *bp,
                                         struct perf_event_attr *attr)
{
        int err;

        _perf_event_disable(bp);

        err = modify_user_hw_breakpoint_check(bp, attr, true);

        if (!bp->attr.disabled)
                _perf_event_enable(bp);

        return err;
}

/*
 * Copy event-type-independent attributes that may be modified.
 */
static void perf_event_modify_copy_attr(struct perf_event_attr *to,
                                        const struct perf_event_attr *from)
{
        to->sig_data = from->sig_data;
}

static int perf_event_modify_attr(struct perf_event *event,
                                  struct perf_event_attr *attr)
{
        int (*func)(struct perf_event *, struct perf_event_attr *);
        struct perf_event *child;
        int err;

        if (event->attr.type != attr->type)
                return -EINVAL;

        switch (event->attr.type) {
        case PERF_TYPE_BREAKPOINT:
                func = perf_event_modify_breakpoint;
                break;
        default:
                /* Place holder for future additions. */
                return -EOPNOTSUPP;
        }

        WARN_ON_ONCE(event->ctx->parent_ctx);

        mutex_lock(&event->child_mutex);
        /*
         * Event-type-independent attributes must be copied before event-type
         * modification, which will validate that final attributes match the
         * source attributes after all relevant attributes have been copied.
         */
        perf_event_modify_copy_attr(&event->attr, attr);
        err = func(event, attr);
        if (err)
                goto out;
        list_for_each_entry(child, &event->child_list, child_list) {
                perf_event_modify_copy_attr(&child->attr, attr);
                err = func(child, attr);
                if (err)
                        goto out;
        }
out:
        mutex_unlock(&event->child_mutex);
        return err;
}

static void __pmu_ctx_sched_out(struct perf_event_pmu_context *pmu_ctx,
                                enum event_type_t event_type)
{
        struct perf_event_context *ctx = pmu_ctx->ctx;
        struct perf_event *event, *tmp;
        struct pmu *pmu = pmu_ctx->pmu;

        if (ctx->task && !ctx->is_active) {
                struct perf_cpu_pmu_context *cpc;

                cpc = this_cpu_ptr(pmu->cpu_pmu_context);
                WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx);
                cpc->task_epc = NULL;
        }

        if (!event_type)
                return;

        perf_pmu_disable(pmu);
        if (event_type & EVENT_PINNED) {
                list_for_each_entry_safe(event, tmp,
                                         &pmu_ctx->pinned_active,
                                         active_list)
                        group_sched_out(event, ctx);
        }

        if (event_type & EVENT_FLEXIBLE) {
                list_for_each_entry_safe(event, tmp,
                                         &pmu_ctx->flexible_active,
                                         active_list)
                        group_sched_out(event, ctx);
                /*
                 * Since we cleared EVENT_FLEXIBLE, also clear
                 * rotate_necessary, is will be reset by
                 * ctx_flexible_sched_in() when needed.
                 */
                pmu_ctx->rotate_necessary = 0;
        }
        perf_pmu_enable(pmu);
}

static void
ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_pmu_context *pmu_ctx;
        int is_active = ctx->is_active;
        bool cgroup = event_type & EVENT_CGROUP;

        event_type &= ~EVENT_CGROUP;

        lockdep_assert_held(&ctx->lock);

        if (likely(!ctx->nr_events)) {
                /*
                 * See __perf_remove_from_context().
                 */
                WARN_ON_ONCE(ctx->is_active);
                if (ctx->task)
                        WARN_ON_ONCE(cpuctx->task_ctx);
                return;
        }

        /*
         * Always update time if it was set; not only when it changes.
         * Otherwise we can 'forget' to update time for any but the last
         * context we sched out. For example:
         *
         *   ctx_sched_out(.event_type = EVENT_FLEXIBLE)
         *   ctx_sched_out(.event_type = EVENT_PINNED)
         *
         * would only update time for the pinned events.
         */
        if (is_active & EVENT_TIME) {
                /* update (and stop) ctx time */
                update_context_time(ctx);
                update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
                /*
                 * CPU-release for the below ->is_active store,
                 * see __load_acquire() in perf_event_time_now()
                 */
                barrier();
        }

        ctx->is_active &= ~event_type;
        if (!(ctx->is_active & EVENT_ALL))
                ctx->is_active = 0;

        if (ctx->task) {
                WARN_ON_ONCE(cpuctx->task_ctx != ctx);
                if (!ctx->is_active)
                        cpuctx->task_ctx = NULL;
        }

        is_active ^= ctx->is_active; /* changed bits */

        list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                if (cgroup && !pmu_ctx->nr_cgroups)
                        continue;
                __pmu_ctx_sched_out(pmu_ctx, is_active);
        }
}

/*
 * Test whether two contexts are equivalent, i.e. whether they have both been
 * cloned from the same version of the same context.
 *
 * Equivalence is measured using a generation number in the context that is
 * incremented on each modification to it; see unclone_ctx(), list_add_event()
 * and list_del_event().
 */
static int context_equiv(struct perf_event_context *ctx1,
                         struct perf_event_context *ctx2)
{
        lockdep_assert_held(&ctx1->lock);
        lockdep_assert_held(&ctx2->lock);

        /* Pinning disables the swap optimization */
        if (ctx1->pin_count || ctx2->pin_count)
                return 0;

        /* If ctx1 is the parent of ctx2 */
        if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
                return 1;

        /* If ctx2 is the parent of ctx1 */
        if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
                return 1;

        /*
         * If ctx1 and ctx2 have the same parent; we flatten the parent
         * hierarchy, see perf_event_init_context().
         */
        if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
                        ctx1->parent_gen == ctx2->parent_gen)
                return 1;

        /* Unmatched */
        return 0;
}

static void __perf_event_sync_stat(struct perf_event *event,
                                     struct perf_event *next_event)
{
        u64 value;

        if (!event->attr.inherit_stat)
                return;

        /*
         * Update the event value, we cannot use perf_event_read()
         * because we're in the middle of a context switch and have IRQs
         * disabled, which upsets smp_call_function_single(), however
         * we know the event must be on the current CPU, therefore we
         * don't need to use it.
         */
        if (event->state == PERF_EVENT_STATE_ACTIVE)
                event->pmu->read(event);

        perf_event_update_time(event);

        /*
         * In order to keep per-task stats reliable we need to flip the event
         * values when we flip the contexts.
         */
        value = local64_read(&next_event->count);
        value = local64_xchg(&event->count, value);
        local64_set(&next_event->count, value);

        swap(event->total_time_enabled, next_event->total_time_enabled);
        swap(event->total_time_running, next_event->total_time_running);

        /*
         * Since we swizzled the values, update the user visible data too.
         */
        perf_event_update_userpage(event);
        perf_event_update_userpage(next_event);
}

static void perf_event_sync_stat(struct perf_event_context *ctx,
                                   struct perf_event_context *next_ctx)
{
        struct perf_event *event, *next_event;

        if (!ctx->nr_stat)
                return;

        update_context_time(ctx);

        event = list_first_entry(&ctx->event_list,
                                   struct perf_event, event_entry);

        next_event = list_first_entry(&next_ctx->event_list,
                                        struct perf_event, event_entry);

        while (&event->event_entry != &ctx->event_list &&
               &next_event->event_entry != &next_ctx->event_list) {

                __perf_event_sync_stat(event, next_event);

                event = list_next_entry(event, event_entry);
                next_event = list_next_entry(next_event, event_entry);
        }
}

#define double_list_for_each_entry(pos1, pos2, head1, head2, member)        \
        for (pos1 = list_first_entry(head1, typeof(*pos1), member),        \
             pos2 = list_first_entry(head2, typeof(*pos2), member);        \
             !list_entry_is_head(pos1, head1, member) &&                \
             !list_entry_is_head(pos2, head2, member);                        \
             pos1 = list_next_entry(pos1, member),                        \
             pos2 = list_next_entry(pos2, member))

static void perf_event_swap_task_ctx_data(struct perf_event_context *prev_ctx,
                                          struct perf_event_context *next_ctx)
{
        struct perf_event_pmu_context *prev_epc, *next_epc;

        if (!prev_ctx->nr_task_data)
                return;

        double_list_for_each_entry(prev_epc, next_epc,
                                   &prev_ctx->pmu_ctx_list, &next_ctx->pmu_ctx_list,
                                   pmu_ctx_entry) {

                if (WARN_ON_ONCE(prev_epc->pmu != next_epc->pmu))
                        continue;

                /*
                 * PMU specific parts of task perf context can require
                 * additional synchronization. As an example of such
                 * synchronization see implementation details of Intel
                 * LBR call stack data profiling;
                 */
                if (prev_epc->pmu->swap_task_ctx)
                        prev_epc->pmu->swap_task_ctx(prev_epc, next_epc);
                else
                        swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
        }
}

static void perf_ctx_sched_task_cb(struct perf_event_context *ctx, bool sched_in)
{
        struct perf_event_pmu_context *pmu_ctx;
        struct perf_cpu_pmu_context *cpc;

        list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context);

                if (cpc->sched_cb_usage && pmu_ctx->pmu->sched_task)
                        pmu_ctx->pmu->sched_task(pmu_ctx, sched_in);
        }
}

static void
perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
{
        struct perf_event_context *ctx = task->perf_event_ctxp;
        struct perf_event_context *next_ctx;
        struct perf_event_context *parent, *next_parent;
        int do_switch = 1;

        if (likely(!ctx))
                return;

        rcu_read_lock();
        next_ctx = rcu_dereference(next->perf_event_ctxp);
        if (!next_ctx)
                goto unlock;

        parent = rcu_dereference(ctx->parent_ctx);
        next_parent = rcu_dereference(next_ctx->parent_ctx);

        /* If neither context have a parent context; they cannot be clones. */
        if (!parent && !next_parent)
                goto unlock;

        if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
                /*
                 * Looks like the two contexts are clones, so we might be
                 * able to optimize the context switch.  We lock both
                 * contexts and check that they are clones under the
                 * lock (including re-checking that neither has been
                 * uncloned in the meantime).  It doesn't matter which
                 * order we take the locks because no other cpu could
                 * be trying to lock both of these tasks.
                 */
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {

                        perf_ctx_disable(ctx, false);

                        /* PMIs are disabled; ctx->nr_pending is stable. */
                        if (local_read(&ctx->nr_pending) ||
                            local_read(&next_ctx->nr_pending)) {
                                /*
                                 * Must not swap out ctx when there's pending
                                 * events that rely on the ctx->task relation.
                                 */
                                raw_spin_unlock(&next_ctx->lock);
                                rcu_read_unlock();
                                goto inside_switch;
                        }

                        WRITE_ONCE(ctx->task, next);
                        WRITE_ONCE(next_ctx->task, task);

                        perf_ctx_sched_task_cb(ctx, false);
                        perf_event_swap_task_ctx_data(ctx, next_ctx);

                        perf_ctx_enable(ctx, false);

                        /*
                         * RCU_INIT_POINTER here is safe because we've not
                         * modified the ctx and the above modification of
                         * ctx->task and ctx->task_ctx_data are immaterial
                         * since those values are always verified under
                         * ctx->lock which we're now holding.
                         */
                        RCU_INIT_POINTER(task->perf_event_ctxp, next_ctx);
                        RCU_INIT_POINTER(next->perf_event_ctxp, ctx);

                        do_switch = 0;

                        perf_event_sync_stat(ctx, next_ctx);
                }
                raw_spin_unlock(&next_ctx->lock);
                raw_spin_unlock(&ctx->lock);
        }
unlock:
        rcu_read_unlock();

        if (do_switch) {
                raw_spin_lock(&ctx->lock);
                perf_ctx_disable(ctx, false);

inside_switch:
                perf_ctx_sched_task_cb(ctx, false);
                task_ctx_sched_out(ctx, EVENT_ALL);

                perf_ctx_enable(ctx, false);
                raw_spin_unlock(&ctx->lock);
        }
}

static DEFINE_PER_CPU(struct list_head, sched_cb_list);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);

void perf_sched_cb_dec(struct pmu *pmu)
{
        struct perf_cpu_pmu_context *cpc = this_cpu_ptr(pmu->cpu_pmu_context);

        this_cpu_dec(perf_sched_cb_usages);
        barrier();

        if (!--cpc->sched_cb_usage)
                list_del(&cpc->sched_cb_entry);
}


void perf_sched_cb_inc(struct pmu *pmu)
{
        struct perf_cpu_pmu_context *cpc = this_cpu_ptr(pmu->cpu_pmu_context);

        if (!cpc->sched_cb_usage++)
                list_add(&cpc->sched_cb_entry, this_cpu_ptr(&sched_cb_list));

        barrier();
        this_cpu_inc(perf_sched_cb_usages);
}

/*
 * This function provides the context switch callback to the lower code
 * layer. It is invoked ONLY when the context switch callback is enabled.
 *
 * This callback is relevant even to per-cpu events; for example multi event
 * PEBS requires this to provide PID/TID information. This requires we flush
 * all queued PEBS records before we context switch to a new task.
 */
static void __perf_pmu_sched_task(struct perf_cpu_pmu_context *cpc, bool sched_in)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct pmu *pmu;

        pmu = cpc->epc.pmu;

        /* software PMUs will not have sched_task */
        if (WARN_ON_ONCE(!pmu->sched_task))
                return;

        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_pmu_disable(pmu);

        pmu->sched_task(cpc->task_epc, sched_in);

        perf_pmu_enable(pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}

static void perf_pmu_sched_task(struct task_struct *prev,
                                struct task_struct *next,
                                bool sched_in)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_cpu_pmu_context *cpc;

        /* cpuctx->task_ctx will be handled in perf_event_context_sched_in/out */
        if (prev == next || cpuctx->task_ctx)
                return;

        list_for_each_entry(cpc, this_cpu_ptr(&sched_cb_list), sched_cb_entry)
                __perf_pmu_sched_task(cpc, sched_in);
}

static void perf_event_switch(struct task_struct *task,
                              struct task_struct *next_prev, bool sched_in);

/*
 * Called from scheduler to remove the events of the current task,
 * with interrupts disabled.
 *
 * We stop each event and update the event value in event->count.
 *
 * This does not protect us against NMI, but disable()
 * sets the disabled bit in the control field of event _before_
 * accessing the event control register. If a NMI hits, then it will
 * not restart the event.
 */
void __perf_event_task_sched_out(struct task_struct *task,
                                 struct task_struct *next)
{
        if (__this_cpu_read(perf_sched_cb_usages))
                perf_pmu_sched_task(task, next, false);

        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, next, false);

        perf_event_context_sched_out(task, next);

        /*
         * if cgroup events exist on this CPU, then we need
         * to check if we have to switch out PMU state.
         * cgroup event are system-wide mode only
         */
        perf_cgroup_switch(next);
}

static bool perf_less_group_idx(const void *l, const void *r)
{
        const struct perf_event *le = *(const struct perf_event **)l;
        const struct perf_event *re = *(const struct perf_event **)r;

        return le->group_index < re->group_index;
}

static void swap_ptr(void *l, void *r)
{
        void **lp = l, **rp = r;

        swap(*lp, *rp);
}

static const struct min_heap_callbacks perf_min_heap = {
        .elem_size = sizeof(struct perf_event *),
        .less = perf_less_group_idx,
        .swp = swap_ptr,
};

static void __heap_add(struct min_heap *heap, struct perf_event *event)
{
        struct perf_event **itrs = heap->data;

        if (event) {
                itrs[heap->nr] = event;
                heap->nr++;
        }
}

static void __link_epc(struct perf_event_pmu_context *pmu_ctx)
{
        struct perf_cpu_pmu_context *cpc;

        if (!pmu_ctx->ctx->task)
                return;

        cpc = this_cpu_ptr(pmu_ctx->pmu->cpu_pmu_context);
        WARN_ON_ONCE(cpc->task_epc && cpc->task_epc != pmu_ctx);
        cpc->task_epc = pmu_ctx;
}

static noinline int visit_groups_merge(struct perf_event_context *ctx,
                                struct perf_event_groups *groups, int cpu,
                                struct pmu *pmu,
                                int (*func)(struct perf_event *, void *),
                                void *data)
{
#ifdef CONFIG_CGROUP_PERF
        struct cgroup_subsys_state *css = NULL;
#endif
        struct perf_cpu_context *cpuctx = NULL;
        /* Space for per CPU and/or any CPU event iterators. */
        struct perf_event *itrs[2];
        struct min_heap event_heap;
        struct perf_event **evt;
        int ret;

        if (pmu->filter && pmu->filter(pmu, cpu))
                return 0;

        if (!ctx->task) {
                cpuctx = this_cpu_ptr(&perf_cpu_context);
                event_heap = (struct min_heap){
                        .data = cpuctx->heap,
                        .nr = 0,
                        .size = cpuctx->heap_size,
                };

                lockdep_assert_held(&cpuctx->ctx.lock);

#ifdef CONFIG_CGROUP_PERF
                if (cpuctx->cgrp)
                        css = &cpuctx->cgrp->css;
#endif
        } else {
                event_heap = (struct min_heap){
                        .data = itrs,
                        .nr = 0,
                        .size = ARRAY_SIZE(itrs),
                };
                /* Events not within a CPU context may be on any CPU. */
                __heap_add(&event_heap, perf_event_groups_first(groups, -1, pmu, NULL));
        }
        evt = event_heap.data;

        __heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, NULL));

#ifdef CONFIG_CGROUP_PERF
        for (; css; css = css->parent)
                __heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, css->cgroup));
#endif

        if (event_heap.nr) {
                __link_epc((*evt)->pmu_ctx);
                perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu);
        }

        min_heapify_all(&event_heap, &perf_min_heap);

        while (event_heap.nr) {
                ret = func(*evt, data);
                if (ret)
                        return ret;

                *evt = perf_event_groups_next(*evt, pmu);
                if (*evt)
                        min_heapify(&event_heap, 0, &perf_min_heap);
                else
                        min_heap_pop(&event_heap, &perf_min_heap);
        }

        return 0;
}

/*
 * Because the userpage is strictly per-event (there is no concept of context,
 * so there cannot be a context indirection), every userpage must be updated
 * when context time starts :-(
 *
 * IOW, we must not miss EVENT_TIME edges.
 */
static inline bool event_update_userpage(struct perf_event *event)
{
        if (likely(!atomic_read(&event->mmap_count)))
                return false;

        perf_event_update_time(event);
        perf_event_update_userpage(event);

        return true;
}

static inline void group_update_userpage(struct perf_event *group_event)
{
        struct perf_event *event;

        if (!event_update_userpage(group_event))
                return;

        for_each_sibling_event(event, group_event)
                event_update_userpage(event);
}

static int merge_sched_in(struct perf_event *event, void *data)
{
        struct perf_event_context *ctx = event->ctx;
        int *can_add_hw = data;

        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;

        if (!event_filter_match(event))
                return 0;

        if (group_can_go_on(event, *can_add_hw)) {
                if (!group_sched_in(event, ctx))
                        list_add_tail(&event->active_list, get_event_list(event));
        }

        if (event->state == PERF_EVENT_STATE_INACTIVE) {
                *can_add_hw = 0;
                if (event->attr.pinned) {
                        perf_cgroup_event_disable(event, ctx);
                        perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
                } else {
                        struct perf_cpu_pmu_context *cpc;

                        event->pmu_ctx->rotate_necessary = 1;
                        cpc = this_cpu_ptr(event->pmu_ctx->pmu->cpu_pmu_context);
                        perf_mux_hrtimer_restart(cpc);
                        group_update_userpage(event);
                }
        }

        return 0;
}

static void pmu_groups_sched_in(struct perf_event_context *ctx,
                                struct perf_event_groups *groups,
                                struct pmu *pmu)
{
        int can_add_hw = 1;
        visit_groups_merge(ctx, groups, smp_processor_id(), pmu,
                           merge_sched_in, &can_add_hw);
}

static void ctx_groups_sched_in(struct perf_event_context *ctx,
                                struct perf_event_groups *groups,
                                bool cgroup)
{
        struct perf_event_pmu_context *pmu_ctx;

        list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                if (cgroup && !pmu_ctx->nr_cgroups)
                        continue;
                pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
        }
}

static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
                               struct pmu *pmu)
{
        pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
}

static void
ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        int is_active = ctx->is_active;
        bool cgroup = event_type & EVENT_CGROUP;

        event_type &= ~EVENT_CGROUP;

        lockdep_assert_held(&ctx->lock);

        if (likely(!ctx->nr_events))
                return;

        if (!(is_active & EVENT_TIME)) {
                /* start ctx time */
                __update_context_time(ctx, false);
                perf_cgroup_set_timestamp(cpuctx);
                /*
                 * CPU-release for the below ->is_active store,
                 * see __load_acquire() in perf_event_time_now()
                 */
                barrier();
        }

        ctx->is_active |= (event_type | EVENT_TIME);
        if (ctx->task) {
                if (!is_active)
                        cpuctx->task_ctx = ctx;
                else
                        WARN_ON_ONCE(cpuctx->task_ctx != ctx);
        }

        is_active ^= ctx->is_active; /* changed bits */

        /*
         * First go through the list and put on any pinned groups
         * in order to give them the best chance of going on.
         */
        if (is_active & EVENT_PINNED)
                ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);

        /* Then walk through the lower prio flexible groups */
        if (is_active & EVENT_FLEXIBLE)
                ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
}

static void perf_event_context_sched_in(struct task_struct *task)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *ctx;

        rcu_read_lock();
        ctx = rcu_dereference(task->perf_event_ctxp);
        if (!ctx)
                goto rcu_unlock;

        if (cpuctx->task_ctx == ctx) {
                perf_ctx_lock(cpuctx, ctx);
                perf_ctx_disable(ctx, false);

                perf_ctx_sched_task_cb(ctx, true);

                perf_ctx_enable(ctx, false);
                perf_ctx_unlock(cpuctx, ctx);
                goto rcu_unlock;
        }

        perf_ctx_lock(cpuctx, ctx);
        /*
         * We must check ctx->nr_events while holding ctx->lock, such
         * that we serialize against perf_install_in_context().
         */
        if (!ctx->nr_events)
                goto unlock;

        perf_ctx_disable(ctx, false);
        /*
         * We want to keep the following priority order:
         * cpu pinned (that don't need to move), task pinned,
         * cpu flexible, task flexible.
         *
         * However, if task's ctx is not carrying any pinned
         * events, no need to flip the cpuctx's events around.
         */
        if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
                perf_ctx_disable(&cpuctx->ctx, false);
                ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
        }

        perf_event_sched_in(cpuctx, ctx);

        perf_ctx_sched_task_cb(cpuctx->task_ctx, true);

        if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
                perf_ctx_enable(&cpuctx->ctx, false);

        perf_ctx_enable(ctx, false);

unlock:
        perf_ctx_unlock(cpuctx, ctx);
rcu_unlock:
        rcu_read_unlock();
}

/*
 * Called from scheduler to add the events of the current task
 * with interrupts disabled.
 *
 * We restore the event value and then enable it.
 *
 * This does not protect us against NMI, but enable()
 * sets the enabled bit in the control field of event _before_
 * accessing the event control register. If a NMI hits, then it will
 * keep the event running.
 */
void __perf_event_task_sched_in(struct task_struct *prev,
                                struct task_struct *task)
{
        perf_event_context_sched_in(task);

        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);

        if (__this_cpu_read(perf_sched_cb_usages))
                perf_pmu_sched_task(prev, task, true);
}

static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
        u64 frequency = event->attr.sample_freq;
        u64 sec = NSEC_PER_SEC;
        u64 divisor, dividend;

        int count_fls, nsec_fls, frequency_fls, sec_fls;

        count_fls = fls64(count);
        nsec_fls = fls64(nsec);
        frequency_fls = fls64(frequency);
        sec_fls = 30;

        /*
         * We got @count in @nsec, with a target of sample_freq HZ
         * the target period becomes:
         *
         *             @count * 10^9
         * period = -------------------
         *          @nsec * sample_freq
         *
         */

        /*
         * Reduce accuracy by one bit such that @a and @b converge
         * to a similar magnitude.
         */
#define REDUCE_FLS(a, b)                \
do {                                        \
        if (a##_fls > b##_fls) {        \
                a >>= 1;                \
                a##_fls--;                \
        } else {                        \
                b >>= 1;                \
                b##_fls--;                \
        }                                \
} while (0)

        /*
         * Reduce accuracy until either term fits in a u64, then proceed with
         * the other, so that finally we can do a u64/u64 division.
         */
        while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
                REDUCE_FLS(nsec, frequency);
                REDUCE_FLS(sec, count);
        }

        if (count_fls + sec_fls > 64) {
                divisor = nsec * frequency;

                while (count_fls + sec_fls > 64) {
                        REDUCE_FLS(count, sec);
                        divisor >>= 1;
                }

                dividend = count * sec;
        } else {
                dividend = count * sec;

                while (nsec_fls + frequency_fls > 64) {
                        REDUCE_FLS(nsec, frequency);
                        dividend >>= 1;
                }

                divisor = nsec * frequency;
        }

        if (!divisor)
                return dividend;

        return div64_u64(dividend, divisor);
}

static DEFINE_PER_CPU(int, perf_throttled_count);
static DEFINE_PER_CPU(u64, perf_throttled_seq);

static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
{
        struct hw_perf_event *hwc = &event->hw;
        s64 period, sample_period;
        s64 delta;

        period = perf_calculate_period(event, nsec, count);

        delta = (s64)(period - hwc->sample_period);
        delta = (delta + 7) / 8; /* low pass filter */

        sample_period = hwc->sample_period + delta;

        if (!sample_period)
                sample_period = 1;

        hwc->sample_period = sample_period;

        if (local64_read(&hwc->period_left) > 8*sample_period) {
                if (disable)
                        event->pmu->stop(event, PERF_EF_UPDATE);

                local64_set(&hwc->period_left, 0);

                if (disable)
                        event->pmu->start(event, PERF_EF_RELOAD);
        }
}

/*
 * combine freq adjustment with unthrottling to avoid two passes over the
 * events. At the same time, make sure, having freq events does not change
 * the rate of unthrottling as that would introduce bias.
 */
static void
perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
{
        struct perf_event *event;
        struct hw_perf_event *hwc;
        u64 now, period = TICK_NSEC;
        s64 delta;

        /*
         * only need to iterate over all events iff:
         * - context have events in frequency mode (needs freq adjust)
         * - there are events to unthrottle on this cpu
         */
        if (!(ctx->nr_freq || unthrottle))
                return;

        raw_spin_lock(&ctx->lock);

        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
                        continue;

                // XXX use visit thingy to avoid the -1,cpu match
                if (!event_filter_match(event))
                        continue;

                perf_pmu_disable(event->pmu);

                hwc = &event->hw;

                if (hwc->interrupts == MAX_INTERRUPTS) {
                        hwc->interrupts = 0;
                        perf_log_throttle(event, 1);
                        event->pmu->start(event, 0);
                }

                if (!event->attr.freq || !event->attr.sample_freq)
                        goto next;

                /*
                 * stop the event and update event->count
                 */
                event->pmu->stop(event, PERF_EF_UPDATE);

                now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;

                /*
                 * restart the event
                 * reload only if value has changed
                 * we have stopped the event so tell that
                 * to perf_adjust_period() to avoid stopping it
                 * twice.
                 */
                if (delta > 0)
                        perf_adjust_period(event, period, delta, false);

                event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
        next:
                perf_pmu_enable(event->pmu);
        }

        raw_spin_unlock(&ctx->lock);
}

/*
 * Move @event to the tail of the @ctx's elegible events.
 */
static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
{
        /*
         * Rotate the first entry last of non-pinned groups. Rotation might be
         * disabled by the inheritance code.
         */
        if (ctx->rotate_disable)
                return;

        perf_event_groups_delete(&ctx->flexible_groups, event);
        perf_event_groups_insert(&ctx->flexible_groups, event);
}

/* pick an event from the flexible_groups to rotate */
static inline struct perf_event *
ctx_event_to_rotate(struct perf_event_pmu_context *pmu_ctx)
{
        struct perf_event *event;
        struct rb_node *node;
        struct rb_root *tree;
        struct __group_key key = {
                .pmu = pmu_ctx->pmu,
        };

        /* pick the first active flexible event */
        event = list_first_entry_or_null(&pmu_ctx->flexible_active,
                                         struct perf_event, active_list);
        if (event)
                goto out;

        /* if no active flexible event, pick the first event */
        tree = &pmu_ctx->ctx->flexible_groups.tree;

        if (!pmu_ctx->ctx->task) {
                key.cpu = smp_processor_id();

                node = rb_find_first(&key, tree, __group_cmp_ignore_cgroup);
                if (node)
                        event = __node_2_pe(node);
                goto out;
        }

        key.cpu = -1;
        node = rb_find_first(&key, tree, __group_cmp_ignore_cgroup);
        if (node) {
                event = __node_2_pe(node);
                goto out;
        }

        key.cpu = smp_processor_id();
        node = rb_find_first(&key, tree, __group_cmp_ignore_cgroup);
        if (node)
                event = __node_2_pe(node);

out:
        /*
         * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
         * finds there are unschedulable events, it will set it again.
         */
        pmu_ctx->rotate_necessary = 0;

        return event;
}

static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_pmu_context *cpu_epc, *task_epc = NULL;
        struct perf_event *cpu_event = NULL, *task_event = NULL;
        int cpu_rotate, task_rotate;
        struct pmu *pmu;

        /*
         * Since we run this from IRQ context, nobody can install new
         * events, thus the event count values are stable.
         */

        cpu_epc = &cpc->epc;
        pmu = cpu_epc->pmu;
        task_epc = cpc->task_epc;

        cpu_rotate = cpu_epc->rotate_necessary;
        task_rotate = task_epc ? task_epc->rotate_necessary : 0;

        if (!(cpu_rotate || task_rotate))
                return false;

        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_pmu_disable(pmu);

        if (task_rotate)
                task_event = ctx_event_to_rotate(task_epc);
        if (cpu_rotate)
                cpu_event = ctx_event_to_rotate(cpu_epc);

        /*
         * As per the order given at ctx_resched() first 'pop' task flexible
         * and then, if needed CPU flexible.
         */
        if (task_event || (task_epc && cpu_event)) {
                update_context_time(task_epc->ctx);
                __pmu_ctx_sched_out(task_epc, EVENT_FLEXIBLE);
        }

        if (cpu_event) {
                update_context_time(&cpuctx->ctx);
                __pmu_ctx_sched_out(cpu_epc, EVENT_FLEXIBLE);
                rotate_ctx(&cpuctx->ctx, cpu_event);
                __pmu_ctx_sched_in(&cpuctx->ctx, pmu);
        }

        if (task_event)
                rotate_ctx(task_epc->ctx, task_event);

        if (task_event || (task_epc && cpu_event))
                __pmu_ctx_sched_in(task_epc->ctx, pmu);

        perf_pmu_enable(pmu);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);

        return true;
}

void perf_event_task_tick(void)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *ctx;
        int throttled;

        lockdep_assert_irqs_disabled();

        __this_cpu_inc(perf_throttled_seq);
        throttled = __this_cpu_xchg(perf_throttled_count, 0);
        tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);

        perf_adjust_freq_unthr_context(&cpuctx->ctx, !!throttled);

        rcu_read_lock();
        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
                perf_adjust_freq_unthr_context(ctx, !!throttled);
        rcu_read_unlock();
}

static int event_enable_on_exec(struct perf_event *event,
                                struct perf_event_context *ctx)
{
        if (!event->attr.enable_on_exec)
                return 0;

        event->attr.enable_on_exec = 0;
        if (event->state >= PERF_EVENT_STATE_INACTIVE)
                return 0;

        perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);

        return 1;
}

/*
 * Enable all of a task's events that have been marked enable-on-exec.
 * This expects task == current.
 */
static void perf_event_enable_on_exec(struct perf_event_context *ctx)
{
        struct perf_event_context *clone_ctx = NULL;
        enum event_type_t event_type = 0;
        struct perf_cpu_context *cpuctx;
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;

        local_irq_save(flags);
        if (WARN_ON_ONCE(current->perf_event_ctxp != ctx))
                goto out;

        if (!ctx->nr_events)
                goto out;

        cpuctx = this_cpu_ptr(&perf_cpu_context);
        perf_ctx_lock(cpuctx, ctx);
        ctx_sched_out(ctx, EVENT_TIME);

        list_for_each_entry(event, &ctx->event_list, event_entry) {
                enabled |= event_enable_on_exec(event, ctx);
                event_type |= get_event_type(event);
        }

        /*
         * Unclone and reschedule this context if we enabled any event.
         */
        if (enabled) {
                clone_ctx = unclone_ctx(ctx);
                ctx_resched(cpuctx, ctx, event_type);
        } else {
                ctx_sched_in(ctx, EVENT_TIME);
        }
        perf_ctx_unlock(cpuctx, ctx);

out:
        local_irq_restore(flags);

        if (clone_ctx)
                put_ctx(clone_ctx);
}

static void perf_remove_from_owner(struct perf_event *event);
static void perf_event_exit_event(struct perf_event *event,
                                  struct perf_event_context *ctx);

/*
 * Removes all events from the current task that have been marked
 * remove-on-exec, and feeds their values back to parent events.
 */
static void perf_event_remove_on_exec(struct perf_event_context *ctx)
{
        struct perf_event_context *clone_ctx = NULL;
        struct perf_event *event, *next;
        unsigned long flags;
        bool modified = false;

        mutex_lock(&ctx->mutex);

        if (WARN_ON_ONCE(ctx->task != current))
                goto unlock;

        list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
                if (!event->attr.remove_on_exec)
                        continue;

                if (!is_kernel_event(event))
                        perf_remove_from_owner(event);

                modified = true;

                perf_event_exit_event(event, ctx);
        }

        raw_spin_lock_irqsave(&ctx->lock, flags);
        if (modified)
                clone_ctx = unclone_ctx(ctx);
        raw_spin_unlock_irqrestore(&ctx->lock, flags);

unlock:
        mutex_unlock(&ctx->mutex);

        if (clone_ctx)
                put_ctx(clone_ctx);
}

struct perf_read_data {
        struct perf_event *event;
        bool group;
        int ret;
};

static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
{
        u16 local_pkg, event_pkg;

        if ((unsigned)event_cpu >= nr_cpu_ids)
                return event_cpu;

        if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
                int local_cpu = smp_processor_id();

                event_pkg = topology_physical_package_id(event_cpu);
                local_pkg = topology_physical_package_id(local_cpu);

                if (event_pkg == local_pkg)
                        return local_cpu;
        }

        return event_cpu;
}

/*
 * Cross CPU call to read the hardware event
 */
static void __perf_event_read(void *info)
{
        struct perf_read_data *data = info;
        struct perf_event *sub, *event = data->event;
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct pmu *pmu = event->pmu;

        /*
         * If this is a task context, we need to check whether it is
         * the current task context of this cpu.  If not it has been
         * scheduled out before the smp call arrived.  In that case
         * event->count would have been updated to a recent sample
         * when the event was scheduled out.
         */
        if (ctx->task && cpuctx->task_ctx != ctx)
                return;

        raw_spin_lock(&ctx->lock);
        if (ctx->is_active & EVENT_TIME) {
                update_context_time(ctx);
                update_cgrp_time_from_event(event);
        }

        perf_event_update_time(event);
        if (data->group)
                perf_event_update_sibling_time(event);

        if (event->state != PERF_EVENT_STATE_ACTIVE)
                goto unlock;

        if (!data->group) {
                pmu->read(event);
                data->ret = 0;
                goto unlock;
        }

        pmu->start_txn(pmu, PERF_PMU_TXN_READ);

        pmu->read(event);

        for_each_sibling_event(sub, event) {
                if (sub->state == PERF_EVENT_STATE_ACTIVE) {
                        /*
                         * Use sibling's PMU rather than @event's since
                         * sibling could be on different (eg: software) PMU.
                         */
                        sub->pmu->read(sub);
                }
        }

        data->ret = pmu->commit_txn(pmu);

unlock:
        raw_spin_unlock(&ctx->lock);
}

static inline u64 perf_event_count(struct perf_event *event)
{
        return local64_read(&event->count) + atomic64_read(&event->child_count);
}

static void calc_timer_values(struct perf_event *event,
                                u64 *now,
                                u64 *enabled,
                                u64 *running)
{
        u64 ctx_time;

        *now = perf_clock();
        ctx_time = perf_event_time_now(event, *now);
        __perf_update_times(event, ctx_time, enabled, running);
}

/*
 * NMI-safe method to read a local event, that is an event that
 * is:
 *   - either for the current task, or for this CPU
 *   - does not have inherit set, for inherited task events
 *     will not be local and we cannot read them atomically
 *   - must not have a pmu::count method
 */
int perf_event_read_local(struct perf_event *event, u64 *value,
                          u64 *enabled, u64 *running)
{
        unsigned long flags;
        int event_oncpu;
        int event_cpu;
        int ret = 0;

        /*
         * Disabling interrupts avoids all counter scheduling (context
         * switches, timer based rotation and IPIs).
         */
        local_irq_save(flags);

        /*
         * It must not be an event with inherit set, we cannot read
         * all child counters from atomic context.
         */
        if (event->attr.inherit) {
                ret = -EOPNOTSUPP;
                goto out;
        }

        /* If this is a per-task event, it must be for current */
        if ((event->attach_state & PERF_ATTACH_TASK) &&
            event->hw.target != current) {
                ret = -EINVAL;
                goto out;
        }

        /*
         * Get the event CPU numbers, and adjust them to local if the event is
         * a per-package event that can be read locally
         */
        event_oncpu = __perf_event_read_cpu(event, event->oncpu);
        event_cpu = __perf_event_read_cpu(event, event->cpu);

        /* If this is a per-CPU event, it must be for this CPU */
        if (!(event->attach_state & PERF_ATTACH_TASK) &&
            event_cpu != smp_processor_id()) {
                ret = -EINVAL;
                goto out;
        }

        /* If this is a pinned event it must be running on this CPU */
        if (event->attr.pinned && event_oncpu != smp_processor_id()) {
                ret = -EBUSY;
                goto out;
        }

        /*
         * If the event is currently on this CPU, its either a per-task event,
         * or local to this CPU. Furthermore it means its ACTIVE (otherwise
         * oncpu == -1).
         */
        if (event_oncpu == smp_processor_id())
                event->pmu->read(event);

        *value = local64_read(&event->count);
        if (enabled || running) {
                u64 __enabled, __running, __now;

                calc_timer_values(event, &__now, &__enabled, &__running);
                if (enabled)
                        *enabled = __enabled;
                if (running)
                        *running = __running;
        }
out:
        local_irq_restore(flags);

        return ret;
}

static int perf_event_read(struct perf_event *event, bool group)
{
        enum perf_event_state state = READ_ONCE(event->state);
        int event_cpu, ret = 0;

        /*
         * If event is enabled and currently active on a CPU, update the
         * value in the event structure:
         */
again:
        if (state == PERF_EVENT_STATE_ACTIVE) {
                struct perf_read_data data;

                /*
                 * Orders the ->state and ->oncpu loads such that if we see
                 * ACTIVE we must also see the right ->oncpu.
                 *
                 * Matches the smp_wmb() from event_sched_in().
                 */
                smp_rmb();

                event_cpu = READ_ONCE(event->oncpu);
                if ((unsigned)event_cpu >= nr_cpu_ids)
                        return 0;

                data = (struct perf_read_data){
                        .event = event,
                        .group = group,
                        .ret = 0,
                };

                preempt_disable();
                event_cpu = __perf_event_read_cpu(event, event_cpu);

                /*
                 * Purposely ignore the smp_call_function_single() return
                 * value.
                 *
                 * If event_cpu isn't a valid CPU it means the event got
                 * scheduled out and that will have updated the event count.
                 *
                 * Therefore, either way, we'll have an up-to-date event count
                 * after this.
                 */
                (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
                preempt_enable();
                ret = data.ret;

        } else if (state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;

                raw_spin_lock_irqsave(&ctx->lock, flags);
                state = event->state;
                if (state != PERF_EVENT_STATE_INACTIVE) {
                        raw_spin_unlock_irqrestore(&ctx->lock, flags);
                        goto again;
                }

                /*
                 * May read while context is not active (e.g., thread is
                 * blocked), in that case we cannot update context time
                 */
                if (ctx->is_active & EVENT_TIME) {
                        update_context_time(ctx);
                        update_cgrp_time_from_event(event);
                }

                perf_event_update_time(event);
                if (group)
                        perf_event_update_sibling_time(event);
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }

        return ret;
}

/*
 * Initialize the perf_event context in a task_struct:
 */
static void __perf_event_init_context(struct perf_event_context *ctx)
{
        raw_spin_lock_init(&ctx->lock);
        mutex_init(&ctx->mutex);
        INIT_LIST_HEAD(&ctx->pmu_ctx_list);
        perf_event_groups_init(&ctx->pinned_groups);
        perf_event_groups_init(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        refcount_set(&ctx->refcount, 1);
}

static void
__perf_init_event_pmu_context(struct perf_event_pmu_context *epc, struct pmu *pmu)
{
        epc->pmu = pmu;
        INIT_LIST_HEAD(&epc->pmu_ctx_entry);
        INIT_LIST_HEAD(&epc->pinned_active);
        INIT_LIST_HEAD(&epc->flexible_active);
        atomic_set(&epc->refcount, 1);
}

static struct perf_event_context *
alloc_perf_context(struct task_struct *task)
{
        struct perf_event_context *ctx;

        ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
        if (!ctx)
                return NULL;

        __perf_event_init_context(ctx);
        if (task)
                ctx->task = get_task_struct(task);

        return ctx;
}

static struct task_struct *
find_lively_task_by_vpid(pid_t vpid)
{
        struct task_struct *task;

        rcu_read_lock();
        if (!vpid)
                task = current;
        else
                task = find_task_by_vpid(vpid);
        if (task)
                get_task_struct(task);
        rcu_read_unlock();

        if (!task)
                return ERR_PTR(-ESRCH);

        return task;
}

/*
 * Returns a matching context with refcount and pincount.
 */
static struct perf_event_context *
find_get_context(struct task_struct *task, struct perf_event *event)
{
        struct perf_event_context *ctx, *clone_ctx = NULL;
        struct perf_cpu_context *cpuctx;
        unsigned long flags;
        int err;

        if (!task) {
                /* Must be root to operate on a CPU event: */
                err = perf_allow_cpu(&event->attr);
                if (err)
                        return ERR_PTR(err);

                cpuctx = per_cpu_ptr(&perf_cpu_context, event->cpu);
                ctx = &cpuctx->ctx;
                get_ctx(ctx);
                raw_spin_lock_irqsave(&ctx->lock, flags);
                ++ctx->pin_count;
                raw_spin_unlock_irqrestore(&ctx->lock, flags);

                return ctx;
        }

        err = -EINVAL;
retry:
        ctx = perf_lock_task_context(task, &flags);
        if (ctx) {
                clone_ctx = unclone_ctx(ctx);
                ++ctx->pin_count;

                raw_spin_unlock_irqrestore(&ctx->lock, flags);

                if (clone_ctx)
                        put_ctx(clone_ctx);
        } else {
                ctx = alloc_perf_context(task);
                err = -ENOMEM;
                if (!ctx)
                        goto errout;

                err = 0;
                mutex_lock(&task->perf_event_mutex);
                /*
                 * If it has already passed perf_event_exit_task().
                 * we must see PF_EXITING, it takes this mutex too.
                 */
                if (task->flags & PF_EXITING)
                        err = -ESRCH;
                else if (task->perf_event_ctxp)
                        err = -EAGAIN;
                else {
                        get_ctx(ctx);
                        ++ctx->pin_count;
                        rcu_assign_pointer(task->perf_event_ctxp, ctx);
                }
                mutex_unlock(&task->perf_event_mutex);

                if (unlikely(err)) {
                        put_ctx(ctx);

                        if (err == -EAGAIN)
                                goto retry;
                        goto errout;
                }
        }

        return ctx;

errout:
        return ERR_PTR(err);
}

static struct perf_event_pmu_context *
find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
                     struct perf_event *event)
{
        struct perf_event_pmu_context *new = NULL, *epc;
        void *task_ctx_data = NULL;

        if (!ctx->task) {
                /*
                 * perf_pmu_migrate_context() / __perf_pmu_install_event()
                 * relies on the fact that find_get_pmu_context() cannot fail
                 * for CPU contexts.
                 */
                struct perf_cpu_pmu_context *cpc;

                cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
                epc = &cpc->epc;
                raw_spin_lock_irq(&ctx->lock);
                if (!epc->ctx) {
                        atomic_set(&epc->refcount, 1);
                        epc->embedded = 1;
                        list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
                        epc->ctx = ctx;
                } else {
                        WARN_ON_ONCE(epc->ctx != ctx);
                        atomic_inc(&epc->refcount);
                }
                raw_spin_unlock_irq(&ctx->lock);
                return epc;
        }

        new = kzalloc(sizeof(*epc), GFP_KERNEL);
        if (!new)
                return ERR_PTR(-ENOMEM);

        if (event->attach_state & PERF_ATTACH_TASK_DATA) {
                task_ctx_data = alloc_task_ctx_data(pmu);
                if (!task_ctx_data) {
                        kfree(new);
                        return ERR_PTR(-ENOMEM);
                }
        }

        __perf_init_event_pmu_context(new, pmu);

        /*
         * XXX
         *
         * lockdep_assert_held(&ctx->mutex);
         *
         * can't because perf_event_init_task() doesn't actually hold the
         * child_ctx->mutex.
         */

        raw_spin_lock_irq(&ctx->lock);
        list_for_each_entry(epc, &ctx->pmu_ctx_list, pmu_ctx_entry) {
                if (epc->pmu == pmu) {
                        WARN_ON_ONCE(epc->ctx != ctx);
                        atomic_inc(&epc->refcount);
                        goto found_epc;
                }
        }

        epc = new;
        new = NULL;

        list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
        epc->ctx = ctx;

found_epc:
        if (task_ctx_data && !epc->task_ctx_data) {
                epc->task_ctx_data = task_ctx_data;
                task_ctx_data = NULL;
                ctx->nr_task_data++;
        }
        raw_spin_unlock_irq(&ctx->lock);

        free_task_ctx_data(pmu, task_ctx_data);
        kfree(new);

        return epc;
}

static void get_pmu_ctx(struct perf_event_pmu_context *epc)
{
        WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount));
}

static void free_epc_rcu(struct rcu_head *head)
{
        struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head);

        kfree(epc->task_ctx_data);
        kfree(epc);
}

static void put_pmu_ctx(struct perf_event_pmu_context *epc)
{
        struct perf_event_context *ctx = epc->ctx;
        unsigned long flags;

        /*
         * XXX
         *
         * lockdep_assert_held(&ctx->mutex);
         *
         * can't because of the call-site in _free_event()/put_event()
         * which isn't always called under ctx->mutex.
         */
        if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags))
                return;

        WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry));

        list_del_init(&epc->pmu_ctx_entry);
        epc->ctx = NULL;

        WARN_ON_ONCE(!list_empty(&epc->pinned_active));
        WARN_ON_ONCE(!list_empty(&epc->flexible_active));

        raw_spin_unlock_irqrestore(&ctx->lock, flags);

        if (epc->embedded)
                return;

        call_rcu(&epc->rcu_head, free_epc_rcu);
}

static void perf_event_free_filter(struct perf_event *event);

static void free_event_rcu(struct rcu_head *head)
{
        struct perf_event *event = container_of(head, typeof(*event), rcu_head);

        if (event->ns)
                put_pid_ns(event->ns);
        perf_event_free_filter(event);
        kmem_cache_free(perf_event_cache, event);
}

static void ring_buffer_attach(struct perf_event *event,
                               struct perf_buffer *rb);

static void detach_sb_event(struct perf_event *event)
{
        struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);

        raw_spin_lock(&pel->lock);
        list_del_rcu(&event->sb_list);
        raw_spin_unlock(&pel->lock);
}

static bool is_sb_event(struct perf_event *event)
{
        struct perf_event_attr *attr = &event->attr;

        if (event->parent)
                return false;

        if (event->attach_state & PERF_ATTACH_TASK)
                return false;

        if (attr->mmap || attr->mmap_data || attr->mmap2 ||
            attr->comm || attr->comm_exec ||
            attr->task || attr->ksymbol ||
            attr->context_switch || attr->text_poke ||
            attr->bpf_event)
                return true;
        return false;
}

static void unaccount_pmu_sb_event(struct perf_event *event)
{
        if (is_sb_event(event))
                detach_sb_event(event);
}

#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif

static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
        spin_lock(&nr_freq_lock);
        if (atomic_dec_and_test(&nr_freq_events))
                tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
        spin_unlock(&nr_freq_lock);
#endif
}

static void unaccount_freq_event(void)
{
        if (tick_nohz_full_enabled())
                unaccount_freq_event_nohz();
        else
                atomic_dec(&nr_freq_events);
}

static void unaccount_event(struct perf_event *event)
{
        bool dec = false;

        if (event->parent)
                return;

        if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
                dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
        if (event->attr.build_id)
                atomic_dec(&nr_build_id_events);
        if (event->attr.comm)
                atomic_dec(&nr_comm_events);
        if (event->attr.namespaces)
                atomic_dec(&nr_namespaces_events);
        if (event->attr.cgroup)
                atomic_dec(&nr_cgroup_events);
        if (event->attr.task)
                atomic_dec(&nr_task_events);
        if (event->attr.freq)
                unaccount_freq_event();
        if (event->attr.context_switch) {
                dec = true;
                atomic_dec(&nr_switch_events);
        }
        if (is_cgroup_event(event))
                dec = true;
        if (has_branch_stack(event))
                dec = true;
        if (event->attr.ksymbol)
                atomic_dec(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_dec(&nr_bpf_events);
        if (event->attr.text_poke)
                atomic_dec(&nr_text_poke_events);

        if (dec) {
                if (!atomic_add_unless(&perf_sched_count, -1, 1))
                        schedule_delayed_work(&perf_sched_work, HZ);
        }

        unaccount_pmu_sb_event(event);
}

static void perf_sched_delayed(struct work_struct *work)
{
        mutex_lock(&perf_sched_mutex);
        if (atomic_dec_and_test(&perf_sched_count))
                static_branch_disable(&perf_sched_events);
        mutex_unlock(&perf_sched_mutex);
}

/*
 * The following implement mutual exclusion of events on "exclusive" pmus
 * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
 * at a time, so we disallow creating events that might conflict, namely:
 *
 *  1) cpu-wide events in the presence of per-task events,
 *  2) per-task events in the presence of cpu-wide events,
 *  3) two matching events on the same perf_event_context.
 *
 * The former two cases are handled in the allocation path (perf_event_alloc(),
 * _free_event()), the latter -- before the first perf_install_in_context().
 */
static int exclusive_event_init(struct perf_event *event)
{
        struct pmu *pmu = event->pmu;

        if (!is_exclusive_pmu(pmu))
                return 0;

        /*
         * Prevent co-existence of per-task and cpu-wide events on the
         * same exclusive pmu.
         *
         * Negative pmu::exclusive_cnt means there are cpu-wide
         * events on this "exclusive" pmu, positive means there are
         * per-task events.
         *
         * Since this is called in perf_event_alloc() path, event::ctx
         * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK
         * to mean "per-task event", because unlike other attach states it
         * never gets cleared.
         */
        if (event->attach_state & PERF_ATTACH_TASK) {
                if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
                        return -EBUSY;
        } else {
                if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
                        return -EBUSY;
        }

        return 0;
}

static void exclusive_event_destroy(struct perf_event *event)
{
        struct pmu *pmu = event->pmu;

        if (!is_exclusive_pmu(pmu))
                return;

        /* see comment in exclusive_event_init() */
        if (event->attach_state & PERF_ATTACH_TASK)
                atomic_dec(&pmu->exclusive_cnt);
        else
                atomic_inc(&pmu->exclusive_cnt);
}

static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
{
        if ((e1->pmu == e2->pmu) &&
            (e1->cpu == e2->cpu ||
             e1->cpu == -1 ||
             e2->cpu == -1))
                return true;
        return false;
}

static bool exclusive_event_installable(struct perf_event *event,
                                        struct perf_event_context *ctx)
{
        struct perf_event *iter_event;
        struct pmu *pmu = event->pmu;

        lockdep_assert_held(&ctx->mutex);

        if (!is_exclusive_pmu(pmu))
                return true;

        list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
                if (exclusive_event_match(iter_event, event))
                        return false;
        }

        return true;
}

static void perf_addr_filters_splice(struct perf_event *event,
                                       struct list_head *head);

static void _free_event(struct perf_event *event)
{
        irq_work_sync(&event->pending_irq);

        unaccount_event(event);

        security_perf_event_free(event);

        if (event->rb) {
                /*
                 * Can happen when we close an event with re-directed output.
                 *
                 * Since we have a 0 refcount, perf_mmap_close() will skip
                 * over us; possibly making our ring_buffer_put() the last.
                 */
                mutex_lock(&event->mmap_mutex);
                ring_buffer_attach(event, NULL);
                mutex_unlock(&event->mmap_mutex);
        }

        if (is_cgroup_event(event))
                perf_detach_cgroup(event);

        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
                        put_callchain_buffers();
        }

        perf_event_free_bpf_prog(event);
        perf_addr_filters_splice(event, NULL);
        kfree(event->addr_filter_ranges);

        if (event->destroy)
                event->destroy(event);

        /*
         * Must be after ->destroy(), due to uprobe_perf_close() using
         * hw.target.
         */
        if (event->hw.target)
                put_task_struct(event->hw.target);

        if (event->pmu_ctx)
                put_pmu_ctx(event->pmu_ctx);

        /*
         * perf_event_free_task() relies on put_ctx() being 'last', in particular
         * all task references must be cleaned up.
         */
        if (event->ctx)
                put_ctx(event->ctx);

        exclusive_event_destroy(event);
        module_put(event->pmu->module);

        call_rcu(&event->rcu_head, free_event_rcu);
}

/*
 * Used to free events which have a known refcount of 1, such as in error paths
 * where the event isn't exposed yet and inherited events.
 */
static void free_event(struct perf_event *event)
{
        if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
                                "unexpected event refcount: %ld; ptr=%p\n",
                                atomic_long_read(&event->refcount), event)) {
                /* leak to avoid use-after-free */
                return;
        }

        _free_event(event);
}

/*
 * Remove user event from the owner task.
 */
static void perf_remove_from_owner(struct perf_event *event)
{
        struct task_struct *owner;

        rcu_read_lock();
        /*
         * Matches the smp_store_release() in perf_event_exit_task(). If we
         * observe !owner it means the list deletion is complete and we can
         * indeed free this event, otherwise we need to serialize on
         * owner->perf_event_mutex.
         */
        owner = READ_ONCE(event->owner);
        if (owner) {
                /*
                 * Since delayed_put_task_struct() also drops the last
                 * task reference we can safely take a new reference
                 * while holding the rcu_read_lock().
                 */
                get_task_struct(owner);
        }
        rcu_read_unlock();

        if (owner) {
                /*
                 * If we're here through perf_event_exit_task() we're already
                 * holding ctx->mutex which would be an inversion wrt. the
                 * normal lock order.
                 *
                 * However we can safely take this lock because its the child
                 * ctx->mutex.
                 */
                mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);

                /*
                 * We have to re-check the event->owner field, if it is cleared
                 * we raced with perf_event_exit_task(), acquiring the mutex
                 * ensured they're done, and we can proceed with freeing the
                 * event.
                 */
                if (event->owner) {
                        list_del_init(&event->owner_entry);
                        smp_store_release(&event->owner, NULL);
                }
                mutex_unlock(&owner->perf_event_mutex);
                put_task_struct(owner);
        }
}

static void put_event(struct perf_event *event)
{
        if (!atomic_long_dec_and_test(&event->refcount))
                return;

        _free_event(event);
}

/*
 * Kill an event dead; while event:refcount will preserve the event
 * object, it will not preserve its functionality. Once the last 'user'
 * gives up the object, we'll destroy the thing.
 */
int perf_event_release_kernel(struct perf_event *event)
{
        struct perf_event_context *ctx = event->ctx;
        struct perf_event *child, *tmp;
        LIST_HEAD(free_list);

        /*
         * If we got here through err_alloc: free_event(event); we will not
         * have attached to a context yet.
         */
        if (!ctx) {
                WARN_ON_ONCE(event->attach_state &
                                (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
                goto no_ctx;
        }

        if (!is_kernel_event(event))
                perf_remove_from_owner(event);

        ctx = perf_event_ctx_lock(event);
        WARN_ON_ONCE(ctx->parent_ctx);

        /*
         * Mark this event as STATE_DEAD, there is no external reference to it
         * anymore.
         *
         * Anybody acquiring event->child_mutex after the below loop _must_
         * also see this, most importantly inherit_event() which will avoid
         * placing more children on the list.
         *
         * Thus this guarantees that we will in fact observe and kill _ALL_
         * child events.
         */
        perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);

        perf_event_ctx_unlock(event, ctx);

again:
        mutex_lock(&event->child_mutex);
        list_for_each_entry(child, &event->child_list, child_list) {

                /*
                 * Cannot change, child events are not migrated, see the
                 * comment with perf_event_ctx_lock_nested().
                 */
                ctx = READ_ONCE(child->ctx);
                /*
                 * Since child_mutex nests inside ctx::mutex, we must jump
                 * through hoops. We start by grabbing a reference on the ctx.
                 *
                 * Since the event cannot get freed while we hold the
                 * child_mutex, the context must also exist and have a !0
                 * reference count.
                 */
                get_ctx(ctx);

                /*
                 * Now that we have a ctx ref, we can drop child_mutex, and
                 * acquire ctx::mutex without fear of it going away. Then we
                 * can re-acquire child_mutex.
                 */
                mutex_unlock(&event->child_mutex);
                mutex_lock(&ctx->mutex);
                mutex_lock(&event->child_mutex);

                /*
                 * Now that we hold ctx::mutex and child_mutex, revalidate our
                 * state, if child is still the first entry, it didn't get freed
                 * and we can continue doing so.
                 */
                tmp = list_first_entry_or_null(&event->child_list,
                                               struct perf_event, child_list);
                if (tmp == child) {
                        perf_remove_from_context(child, DETACH_GROUP);
                        list_move(&child->child_list, &free_list);
                        /*
                         * This matches the refcount bump in inherit_event();
                         * this can't be the last reference.
                         */
                        put_event(event);
                }

                mutex_unlock(&event->child_mutex);
                mutex_unlock(&ctx->mutex);
                put_ctx(ctx);
                goto again;
        }
        mutex_unlock(&event->child_mutex);

        list_for_each_entry_safe(child, tmp, &free_list, child_list) {
                void *var = &child->ctx->refcount;

                list_del(&child->child_list);
                free_event(child);

                /*
                 * Wake any perf_event_free_task() waiting for this event to be
                 * freed.
                 */
                smp_mb(); /* pairs with wait_var_event() */
                wake_up_var(var);
        }

no_ctx:
        put_event(event); /* Must be the 'last' reference */
        return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);

/*
 * Called when the last reference to the file is gone.
 */
static int perf_release(struct inode *inode, struct file *file)
{
        perf_event_release_kernel(file->private_data);
        return 0;
}

static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
        struct perf_event *child;
        u64 total = 0;

        *enabled = 0;
        *running = 0;

        mutex_lock(&event->child_mutex);

        (void)perf_event_read(event, false);
        total += perf_event_count(event);

        *enabled += event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
        *running += event->total_time_running +
                        atomic64_read(&event->child_total_time_running);

        list_for_each_entry(child, &event->child_list, child_list) {
                (void)perf_event_read(child, false);
                total += perf_event_count(child);
                *enabled += child->total_time_enabled;
                *running += child->total_time_running;
        }
        mutex_unlock(&event->child_mutex);

        return total;
}

u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
        struct perf_event_context *ctx;
        u64 count;

        ctx = perf_event_ctx_lock(event);
        count = __perf_event_read_value(event, enabled, running);
        perf_event_ctx_unlock(event, ctx);

        return count;
}
EXPORT_SYMBOL_GPL(perf_event_read_value);

static int __perf_read_group_add(struct perf_event *leader,
                                        u64 read_format, u64 *values)
{
        struct perf_event_context *ctx = leader->ctx;
        struct perf_event *sub, *parent;
        unsigned long flags;
        int n = 1; /* skip @nr */
        int ret;

        ret = perf_event_read(leader, true);
        if (ret)
                return ret;

        raw_spin_lock_irqsave(&ctx->lock, flags);
        /*
         * Verify the grouping between the parent and child (inherited)
         * events is still in tact.
         *
         * Specifically:
         *  - leader->ctx->lock pins leader->sibling_list
         *  - parent->child_mutex pins parent->child_list
         *  - parent->ctx->mutex pins parent->sibling_list
         *
         * Because parent->ctx != leader->ctx (and child_list nests inside
         * ctx->mutex), group destruction is not atomic between children, also
         * see perf_event_release_kernel(). Additionally, parent can grow the
         * group.
         *
         * Therefore it is possible to have parent and child groups in a
         * different configuration and summing over such a beast makes no sense
         * what so ever.
         *
         * Reject this.
         */
        parent = leader->parent;
        if (parent &&
            (parent->group_generation != leader->group_generation ||
             parent->nr_siblings != leader->nr_siblings)) {
                ret = -ECHILD;
                goto unlock;
        }

        /*
         * Since we co-schedule groups, {enabled,running} times of siblings
         * will be identical to those of the leader, so we only publish one
         * set.
         */
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
                values[n++] += leader->total_time_enabled +
                        atomic64_read(&leader->child_total_time_enabled);
        }

        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
                values[n++] += leader->total_time_running +
                        atomic64_read(&leader->child_total_time_running);
        }

        /*
         * Write {count,id} tuples for every sibling.
         */
        values[n++] += perf_event_count(leader);
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
        if (read_format & PERF_FORMAT_LOST)
                values[n++] = atomic64_read(&leader->lost_samples);

        for_each_sibling_event(sub, leader) {
                values[n++] += perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
                if (read_format & PERF_FORMAT_LOST)
                        values[n++] = atomic64_read(&sub->lost_samples);
        }

unlock:
        raw_spin_unlock_irqrestore(&ctx->lock, flags);
        return ret;
}

static int perf_read_group(struct perf_event *event,
                                   u64 read_format, char __user *buf)
{
        struct perf_event *leader = event->group_leader, *child;
        struct perf_event_context *ctx = leader->ctx;
        int ret;
        u64 *values;

        lockdep_assert_held(&ctx->mutex);

        values = kzalloc(event->read_size, GFP_KERNEL);
        if (!values)
                return -ENOMEM;

        values[0] = 1 + leader->nr_siblings;

        mutex_lock(&leader->child_mutex);

        ret = __perf_read_group_add(leader, read_format, values);
        if (ret)
                goto unlock;

        list_for_each_entry(child, &leader->child_list, child_list) {
                ret = __perf_read_group_add(child, read_format, values);
                if (ret)
                        goto unlock;
        }

        mutex_unlock(&leader->child_mutex);

        ret = event->read_size;
        if (copy_to_user(buf, values, event->read_size))
                ret = -EFAULT;
        goto out;

unlock:
        mutex_unlock(&leader->child_mutex);
out:
        kfree(values);
        return ret;
}

static int perf_read_one(struct perf_event *event,
                                 u64 read_format, char __user *buf)
{
        u64 enabled, running;
        u64 values[5];
        int n = 0;

        values[n++] = __perf_event_read_value(event, &enabled, &running);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                values[n++] = enabled;
        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
                values[n++] = running;
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(event);
        if (read_format & PERF_FORMAT_LOST)
                values[n++] = atomic64_read(&event->lost_samples);

        if (copy_to_user(buf, values, n * sizeof(u64)))
                return -EFAULT;

        return n * sizeof(u64);
}

static bool is_event_hup(struct perf_event *event)
{
        bool no_children;

        if (event->state > PERF_EVENT_STATE_EXIT)
                return false;

        mutex_lock(&event->child_mutex);
        no_children = list_empty(&event->child_list);
        mutex_unlock(&event->child_mutex);
        return no_children;
}

/*
 * Read the performance event - simple non blocking version for now
 */
static ssize_t
__perf_read(struct perf_event *event, char __user *buf, size_t count)
{
        u64 read_format = event->attr.read_format;
        int ret;

        /*
         * Return end-of-file for a read on an event that is in
         * error state (i.e. because it was pinned but it couldn't be
         * scheduled on to the CPU at some point).
         */
        if (event->state == PERF_EVENT_STATE_ERROR)
                return 0;

        if (count < event->read_size)
                return -ENOSPC;

        WARN_ON_ONCE(event->ctx->parent_ctx);
        if (read_format & PERF_FORMAT_GROUP)
                ret = perf_read_group(event, read_format, buf);
        else
                ret = perf_read_one(event, read_format, buf);

        return ret;
}

static ssize_t
perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
        struct perf_event *event = file->private_data;
        struct perf_event_context *ctx;
        int ret;

        ret = security_perf_event_read(event);
        if (ret)
                return ret;

        ctx = perf_event_ctx_lock(event);
        ret = __perf_read(event, buf, count);
        perf_event_ctx_unlock(event, ctx);

        return ret;
}

static __poll_t perf_poll(struct file *file, poll_table *wait)
{
        struct perf_event *event = file->private_data;
        struct perf_buffer *rb;
        __poll_t events = EPOLLHUP;

        poll_wait(file, &event->waitq, wait);

        if (is_event_hup(event))
                return events;

        /*
         * Pin the event->rb by taking event->mmap_mutex; otherwise
         * perf_event_set_output() can swizzle our rb and make us miss wakeups.
         */
        mutex_lock(&event->mmap_mutex);
        rb = event->rb;
        if (rb)
                events = atomic_xchg(&rb->poll, 0);
        mutex_unlock(&event->mmap_mutex);
        return events;
}

static void _perf_event_reset(struct perf_event *event)
{
        (void)perf_event_read(event, false);
        local64_set(&event->count, 0);
        perf_event_update_userpage(event);
}

/* Assume it's not an event with inherit set. */
u64 perf_event_pause(struct perf_event *event, bool reset)
{
        struct perf_event_context *ctx;
        u64 count;

        ctx = perf_event_ctx_lock(event);
        WARN_ON_ONCE(event->attr.inherit);
        _perf_event_disable(event);
        count = local64_read(&event->count);
        if (reset)
                local64_set(&event->count, 0);
        perf_event_ctx_unlock(event, ctx);

        return count;
}
EXPORT_SYMBOL_GPL(perf_event_pause);

/*
 * Holding the top-level event's child_mutex means that any
 * descendant process that has inherited this event will block
 * in perf_event_exit_event() if it goes to exit, thus satisfying the
 * task existence requirements of perf_event_enable/disable.
 */
static void perf_event_for_each_child(struct perf_event *event,
                                        void (*func)(struct perf_event *))
{
        struct perf_event *child;

        WARN_ON_ONCE(event->ctx->parent_ctx);

        mutex_lock(&event->child_mutex);
        func(event);
        list_for_each_entry(child, &event->child_list, child_list)
                func(child);
        mutex_unlock(&event->child_mutex);
}

static void perf_event_for_each(struct perf_event *event,
                                  void (*func)(struct perf_event *))
{
        struct perf_event_context *ctx = event->ctx;
        struct perf_event *sibling;

        lockdep_assert_held(&ctx->mutex);

        event = event->group_leader;

        perf_event_for_each_child(event, func);
        for_each_sibling_event(sibling, event)
                perf_event_for_each_child(sibling, func);
}

static void __perf_event_period(struct perf_event *event,
                                struct perf_cpu_context *cpuctx,
                                struct perf_event_context *ctx,
                                void *info)
{
        u64 value = *((u64 *)info);
        bool active;

        if (event->attr.freq) {
                event->attr.sample_freq = value;
        } else {
                event->attr.sample_period = value;
                event->hw.sample_period = value;
        }

        active = (event->state == PERF_EVENT_STATE_ACTIVE);
        if (active) {
                perf_pmu_disable(event->pmu);
                /*
                 * We could be throttled; unthrottle now to avoid the tick
                 * trying to unthrottle while we already re-started the event.
                 */
                if (event->hw.interrupts == MAX_INTERRUPTS) {
                        event->hw.interrupts = 0;
                        perf_log_throttle(event, 1);
                }
                event->pmu->stop(event, PERF_EF_UPDATE);
        }

        local64_set(&event->hw.period_left, 0);

        if (active) {
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(event->pmu);
        }
}

static int perf_event_check_period(struct perf_event *event, u64 value)
{
        return event->pmu->check_period(event, value);
}

static int _perf_event_period(struct perf_event *event, u64 value)
{
        if (!is_sampling_event(event))
                return -EINVAL;

        if (!value)
                return -EINVAL;

        if (event->attr.freq && value > sysctl_perf_event_sample_rate)
                return -EINVAL;

        if (perf_event_check_period(event, value))
                return -EINVAL;

        if (!event->attr.freq && (value & (1ULL << 63)))
                return -EINVAL;

        event_function_call(event, __perf_event_period, &value);

        return 0;
}

int perf_event_period(struct perf_event *event, u64 value)
{
        struct perf_event_context *ctx;
        int ret;

        ctx = perf_event_ctx_lock(event);
        ret = _perf_event_period(event, value);
        perf_event_ctx_unlock(event, ctx);

        return ret;
}
EXPORT_SYMBOL_GPL(perf_event_period);

static const struct file_operations perf_fops;

static inline int perf_fget_light(int fd, struct fd *p)
{
        struct fd f = fdget(fd);
        if (!f.file)
                return -EBADF;

        if (f.file->f_op != &perf_fops) {
                fdput(f);
                return -EBADF;
        }
        *p = f;
        return 0;
}

static int perf_event_set_output(struct perf_event *event,
                                 struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
                          struct perf_event_attr *attr);

static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
        void (*func)(struct perf_event *);
        u32 flags = arg;

        switch (cmd) {
        case PERF_EVENT_IOC_ENABLE:
                func = _perf_event_enable;
                break;
        case PERF_EVENT_IOC_DISABLE:
                func = _perf_event_disable;
                break;
        case PERF_EVENT_IOC_RESET:
                func = _perf_event_reset;
                break;

        case PERF_EVENT_IOC_REFRESH:
                return _perf_event_refresh(event, arg);

        case PERF_EVENT_IOC_PERIOD:
        {
                u64 value;

                if (copy_from_user(&value, (u64 __user *)arg, sizeof(value)))
                        return -EFAULT;

                return _perf_event_period(event, value);
        }
        case PERF_EVENT_IOC_ID:
        {
                u64 id = primary_event_id(event);

                if (copy_to_user((void __user *)arg, &id, sizeof(id)))
                        return -EFAULT;
                return 0;
        }

        case PERF_EVENT_IOC_SET_OUTPUT:
        {
                int ret;
                if (arg != -1) {
                        struct perf_event *output_event;
                        struct fd output;
                        ret = perf_fget_light(arg, &output);
                        if (ret)
                                return ret;
                        output_event = output.file->private_data;
                        ret = perf_event_set_output(event, output_event);
                        fdput(output);
                } else {
                        ret = perf_event_set_output(event, NULL);
                }
                return ret;
        }

        case PERF_EVENT_IOC_SET_FILTER:
                return perf_event_set_filter(event, (void __user *)arg);

        case PERF_EVENT_IOC_SET_BPF:
        {
                struct bpf_prog *prog;
                int err;

                prog = bpf_prog_get(arg);
                if (IS_ERR(prog))
                        return PTR_ERR(prog);

                err = perf_event_set_bpf_prog(event, prog, 0);
                if (err) {
                        bpf_prog_put(prog);
                        return err;
                }

                return 0;
        }

        case PERF_EVENT_IOC_PAUSE_OUTPUT: {
                struct perf_buffer *rb;

                rcu_read_lock();
                rb = rcu_dereference(event->rb);
                if (!rb || !rb->nr_pages) {
                        rcu_read_unlock();
                        return -EINVAL;
                }
                rb_toggle_paused(rb, !!arg);
                rcu_read_unlock();
                return 0;
        }

        case PERF_EVENT_IOC_QUERY_BPF:
                return perf_event_query_prog_array(event, (void __user *)arg);

        case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
                struct perf_event_attr new_attr;
                int err = perf_copy_attr((struct perf_event_attr __user *)arg,
                                         &new_attr);

                if (err)
                        return err;

                return perf_event_modify_attr(event,  &new_attr);
        }
        default:
                return -ENOTTY;
        }

        if (flags & PERF_IOC_FLAG_GROUP)
                perf_event_for_each(event, func);
        else
                perf_event_for_each_child(event, func);

        return 0;
}

static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct perf_event *event = file->private_data;
        struct perf_event_context *ctx;
        long ret;

        /* Treat ioctl like writes as it is likely a mutating operation. */
        ret = security_perf_event_write(event);
        if (ret)
                return ret;

        ctx = perf_event_ctx_lock(event);
        ret = _perf_ioctl(event, cmd, arg);
        perf_event_ctx_unlock(event, ctx);

        return ret;
}

#ifdef CONFIG_COMPAT
static long perf_compat_ioctl(struct file *file, unsigned int cmd,
                                unsigned long arg)
{
        switch (_IOC_NR(cmd)) {
        case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
        case _IOC_NR(PERF_EVENT_IOC_ID):
        case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
        case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
                /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
                if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
                        cmd &= ~IOCSIZE_MASK;
                        cmd |= sizeof(void *) << IOCSIZE_SHIFT;
                }
                break;
        }
        return perf_ioctl(file, cmd, arg);
}
#else
# define perf_compat_ioctl NULL
#endif

int perf_event_task_enable(void)
{
        struct perf_event_context *ctx;
        struct perf_event *event;

        mutex_lock(&current->perf_event_mutex);
        list_for_each_entry(event, &current->perf_event_list, owner_entry) {
                ctx = perf_event_ctx_lock(event);
                perf_event_for_each_child(event, _perf_event_enable);
                perf_event_ctx_unlock(event, ctx);
        }
        mutex_unlock(&current->perf_event_mutex);

        return 0;
}

int perf_event_task_disable(void)
{
        struct perf_event_context *ctx;
        struct perf_event *event;

        mutex_lock(&current->perf_event_mutex);
        list_for_each_entry(event, &current->perf_event_list, owner_entry) {
                ctx = perf_event_ctx_lock(event);
                perf_event_for_each_child(event, _perf_event_disable);
                perf_event_ctx_unlock(event, ctx);
        }
        mutex_unlock(&current->perf_event_mutex);

        return 0;
}

static int perf_event_index(struct perf_event *event)
{
        if (event->hw.state & PERF_HES_STOPPED)
                return 0;

        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return 0;

        return event->pmu->event_idx(event);
}

static void perf_event_init_userpage(struct perf_event *event)
{
        struct perf_event_mmap_page *userpg;
        struct perf_buffer *rb;

        rcu_read_lock();
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;

        userpg = rb->user_page;

        /* Allow new userspace to detect that bit 0 is deprecated */
        userpg->cap_bit0_is_deprecated = 1;
        userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
        userpg->data_offset = PAGE_SIZE;
        userpg->data_size = perf_data_size(rb);

unlock:
        rcu_read_unlock();
}

void __weak arch_perf_update_userpage(
        struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
{
}

/*
 * Callers need to ensure there can be no nesting of this function, otherwise
 * the seqlock logic goes bad. We can not serialize this because the arch
 * code calls this from NMI context.
 */
void perf_event_update_userpage(struct perf_event *event)
{
        struct perf_event_mmap_page *userpg;
        struct perf_buffer *rb;
        u64 enabled, running, now;

        rcu_read_lock();
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;

        /*
         * compute total_time_enabled, total_time_running
         * based on snapshot values taken when the event
         * was last scheduled in.
         *
         * we cannot simply called update_context_time()
         * because of locking issue as we can be called in
         * NMI context
         */
        calc_timer_values(event, &now, &enabled, &running);

        userpg = rb->user_page;
        /*
         * Disable preemption to guarantee consistent time stamps are stored to
         * the user page.
         */
        preempt_disable();
        ++userpg->lock;
        barrier();
        userpg->index = perf_event_index(event);
        userpg->offset = perf_event_count(event);
        if (userpg->index)
                userpg->offset -= local64_read(&event->hw.prev_count);

        userpg->time_enabled = enabled +
                        atomic64_read(&event->child_total_time_enabled);

        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);

        arch_perf_update_userpage(event, userpg, now);

        barrier();
        ++userpg->lock;
        preempt_enable();
unlock:
        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(perf_event_update_userpage);

static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
{
        struct perf_event *event = vmf->vma->vm_file->private_data;
        struct perf_buffer *rb;
        vm_fault_t ret = VM_FAULT_SIGBUS;

        if (vmf->flags & FAULT_FLAG_MKWRITE) {
                if (vmf->pgoff == 0)
                        ret = 0;
                return ret;
        }

        rcu_read_lock();
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;

        if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
                goto unlock;

        vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
        if (!vmf->page)
                goto unlock;

        get_page(vmf->page);
        vmf->page->mapping = vmf->vma->vm_file->f_mapping;
        vmf->page->index   = vmf->pgoff;

        ret = 0;
unlock:
        rcu_read_unlock();

        return ret;
}

static void ring_buffer_attach(struct perf_event *event,
                               struct perf_buffer *rb)
{
        struct perf_buffer *old_rb = NULL;
        unsigned long flags;

        WARN_ON_ONCE(event->parent);

        if (event->rb) {
                /*
                 * Should be impossible, we set this when removing
                 * event->rb_entry and wait/clear when adding event->rb_entry.
                 */
                WARN_ON_ONCE(event->rcu_pending);

                old_rb = event->rb;
                spin_lock_irqsave(&old_rb->event_lock, flags);
                list_del_rcu(&event->rb_entry);
                spin_unlock_irqrestore(&old_rb->event_lock, flags);

                event->rcu_batches = get_state_synchronize_rcu();
                event->rcu_pending = 1;
        }

        if (rb) {
                if (event->rcu_pending) {
                        cond_synchronize_rcu(event->rcu_batches);
                        event->rcu_pending = 0;
                }

                spin_lock_irqsave(&rb->event_lock, flags);
                list_add_rcu(&event->rb_entry, &rb->event_list);
                spin_unlock_irqrestore(&rb->event_lock, flags);
        }

        /*
         * Avoid racing with perf_mmap_close(AUX): stop the event
         * before swizzling the event::rb pointer; if it's getting
         * unmapped, its aux_mmap_count will be 0 and it won't
         * restart. See the comment in __perf_pmu_output_stop().
         *
         * Data will inevitably be lost when set_output is done in
         * mid-air, but then again, whoever does it like this is
         * not in for the data anyway.
         */
        if (has_aux(event))
                perf_event_stop(event, 0);

        rcu_assign_pointer(event->rb, rb);

        if (old_rb) {
                ring_buffer_put(old_rb);
                /*
                 * Since we detached before setting the new rb, so that we
                 * could attach the new rb, we could have missed a wakeup.
                 * Provide it now.
                 */
                wake_up_all(&event->waitq);
        }
}

static void ring_buffer_wakeup(struct perf_event *event)
{
        struct perf_buffer *rb;

        if (event->parent)
                event = event->parent;

        rcu_read_lock();
        rb = rcu_dereference(event->rb);
        if (rb) {
                list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
                        wake_up_all(&event->waitq);
        }
        rcu_read_unlock();
}

struct perf_buffer *ring_buffer_get(struct perf_event *event)
{
        struct perf_buffer *rb;

        if (event->parent)
                event = event->parent;

        rcu_read_lock();
        rb = rcu_dereference(event->rb);
        if (rb) {
                if (!refcount_inc_not_zero(&rb->refcount))
                        rb = NULL;
        }
        rcu_read_unlock();

        return rb;
}

void ring_buffer_put(struct perf_buffer *rb)
{
        if (!refcount_dec_and_test(&rb->refcount))
                return;

        WARN_ON_ONCE(!list_empty(&rb->event_list));

        call_rcu(&rb->rcu_head, rb_free_rcu);
}

static void perf_mmap_open(struct vm_area_struct *vma)
{
        struct perf_event *event = vma->vm_file->private_data;

        atomic_inc(&event->mmap_count);
        atomic_inc(&event->rb->mmap_count);

        if (vma->vm_pgoff)
                atomic_inc(&event->rb->aux_mmap_count);

        if (event->pmu->event_mapped)
                event->pmu->event_mapped(event, vma->vm_mm);
}

static void perf_pmu_output_stop(struct perf_event *event);

/*
 * A buffer can be mmap()ed multiple times; either directly through the same
 * event, or through other events by use of perf_event_set_output().
 *
 * In order to undo the VM accounting done by perf_mmap() we need to destroy
 * the buffer here, where we still have a VM context. This means we need
 * to detach all events redirecting to us.
 */
static void perf_mmap_close(struct vm_area_struct *vma)
{
        struct perf_event *event = vma->vm_file->private_data;
        struct perf_buffer *rb = ring_buffer_get(event);
        struct user_struct *mmap_user = rb->mmap_user;
        int mmap_locked = rb->mmap_locked;
        unsigned long size = perf_data_size(rb);
        bool detach_rest = false;

        if (event->pmu->event_unmapped)
                event->pmu->event_unmapped(event, vma->vm_mm);

        /*
         * rb->aux_mmap_count will always drop before rb->mmap_count and
         * event->mmap_count, so it is ok to use event->mmap_mutex to
         * serialize with perf_mmap here.
         */
        if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
            atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
                /*
                 * Stop all AUX events that are writing to this buffer,
                 * so that we can free its AUX pages and corresponding PMU
                 * data. Note that after rb::aux_mmap_count dropped to zero,
                 * they won't start any more (see perf_aux_output_begin()).
                 */
                perf_pmu_output_stop(event);

                /* now it's safe to free the pages */
                atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
                atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);

                /* this has to be the last one */
                rb_free_aux(rb);
                WARN_ON_ONCE(refcount_read(&rb->aux_refcount));

                mutex_unlock(&event->mmap_mutex);
        }

        if (atomic_dec_and_test(&rb->mmap_count))
                detach_rest = true;

        if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
                goto out_put;

        ring_buffer_attach(event, NULL);
        mutex_unlock(&event->mmap_mutex);

        /* If there's still other mmap()s of this buffer, we're done. */
        if (!detach_rest)
                goto out_put;

        /*
         * No other mmap()s, detach from all other events that might redirect
         * into the now unreachable buffer. Somewhat complicated by the
         * fact that rb::event_lock otherwise nests inside mmap_mutex.
         */
again:
        rcu_read_lock();
        list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
                if (!atomic_long_inc_not_zero(&event->refcount)) {
                        /*
                         * This event is en-route to free_event() which will
                         * detach it and remove it from the list.
                         */
                        continue;
                }
                rcu_read_unlock();

                mutex_lock(&event->mmap_mutex);
                /*
                 * Check we didn't race with perf_event_set_output() which can
                 * swizzle the rb from under us while we were waiting to
                 * acquire mmap_mutex.
                 *
                 * If we find a different rb; ignore this event, a next
                 * iteration will no longer find it on the list. We have to
                 * still restart the iteration to make sure we're not now
                 * iterating the wrong list.
                 */
                if (event->rb == rb)
                        ring_buffer_attach(event, NULL);

                mutex_unlock(&event->mmap_mutex);
                put_event(event);

                /*
                 * Restart the iteration; either we're on the wrong list or
                 * destroyed its integrity by doing a deletion.
                 */
                goto again;
        }
        rcu_read_unlock();

        /*
         * It could be there's still a few 0-ref events on the list; they'll
         * get cleaned up by free_event() -- they'll also still have their
         * ref on the rb and will free it whenever they are done with it.
         *
         * Aside from that, this buffer is 'fully' detached and unmapped,
         * undo the VM accounting.
         */

        atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
                        &mmap_user->locked_vm);
        atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
        free_uid(mmap_user);

out_put:
        ring_buffer_put(rb); /* could be last */
}

static const struct vm_operations_struct perf_mmap_vmops = {
        .open                = perf_mmap_open,
        .close                = perf_mmap_close, /* non mergeable */
        .fault                = perf_mmap_fault,
        .page_mkwrite        = perf_mmap_fault,
};

static int perf_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct perf_event *event = file->private_data;
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
        struct perf_buffer *rb = NULL;
        unsigned long locked, lock_limit;
        unsigned long vma_size;
        unsigned long nr_pages;
        long user_extra = 0, extra = 0;
        int ret = 0, flags = 0;

        /*
         * Don't allow mmap() of inherited per-task counters. This would
         * create a performance issue due to all children writing to the
         * same rb.
         */
        if (event->cpu == -1 && event->attr.inherit)
                return -EINVAL;

        if (!(vma->vm_flags & VM_SHARED))
                return -EINVAL;

        ret = security_perf_event_read(event);
        if (ret)
                return ret;

        vma_size = vma->vm_end - vma->vm_start;

        if (vma->vm_pgoff == 0) {
                nr_pages = (vma_size / PAGE_SIZE) - 1;
        } else {
                /*
                 * AUX area mapping: if rb->aux_nr_pages != 0, it's already
                 * mapped, all subsequent mappings should have the same size
                 * and offset. Must be above the normal perf buffer.
                 */
                u64 aux_offset, aux_size;

                if (!event->rb)
                        return -EINVAL;

                nr_pages = vma_size / PAGE_SIZE;

                mutex_lock(&event->mmap_mutex);
                ret = -EINVAL;

                rb = event->rb;
                if (!rb)
                        goto aux_unlock;

                aux_offset = READ_ONCE(rb->user_page->aux_offset);
                aux_size = READ_ONCE(rb->user_page->aux_size);

                if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
                        goto aux_unlock;

                if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
                        goto aux_unlock;

                /* already mapped with a different offset */
                if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
                        goto aux_unlock;

                if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
                        goto aux_unlock;

                /* already mapped with a different size */
                if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
                        goto aux_unlock;

                if (!is_power_of_2(nr_pages))
                        goto aux_unlock;

                if (!atomic_inc_not_zero(&rb->mmap_count))
                        goto aux_unlock;

                if (rb_has_aux(rb)) {
                        atomic_inc(&rb->aux_mmap_count);
                        ret = 0;
                        goto unlock;
                }

                atomic_set(&rb->aux_mmap_count, 1);
                user_extra = nr_pages;

                goto accounting;
        }

        /*
         * If we have rb pages ensure they're a power-of-two number, so we
         * can do bitmasks instead of modulo.
         */
        if (nr_pages != 0 && !is_power_of_2(nr_pages))
                return -EINVAL;

        if (vma_size != PAGE_SIZE * (1 + nr_pages))
                return -EINVAL;

        WARN_ON_ONCE(event->ctx->parent_ctx);
again:
        mutex_lock(&event->mmap_mutex);
        if (event->rb) {
                if (data_page_nr(event->rb) != nr_pages) {
                        ret = -EINVAL;
                        goto unlock;
                }

                if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
                        /*
                         * Raced against perf_mmap_close(); remove the
                         * event and try again.
                         */
                        ring_buffer_attach(event, NULL);
                        mutex_unlock(&event->mmap_mutex);
                        goto again;
                }

                goto unlock;
        }

        user_extra = nr_pages + 1;

accounting:
        user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);

        /*
         * Increase the limit linearly with more CPUs:
         */
        user_lock_limit *= num_online_cpus();

        user_locked = atomic_long_read(&user->locked_vm);

        /*
         * sysctl_perf_event_mlock may have changed, so that
         *     user->locked_vm > user_lock_limit
         */
        if (user_locked > user_lock_limit)
                user_locked = user_lock_limit;
        user_locked += user_extra;

        if (user_locked > user_lock_limit) {
                /*
                 * charge locked_vm until it hits user_lock_limit;
                 * charge the rest from pinned_vm
                 */
                extra = user_locked - user_lock_limit;
                user_extra -= extra;
        }

        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
        locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;

        if ((locked > lock_limit) && perf_is_paranoid() &&
                !capable(CAP_IPC_LOCK)) {
                ret = -EPERM;
                goto unlock;
        }

        WARN_ON(!rb && event->rb);

        if (vma->vm_flags & VM_WRITE)
                flags |= RING_BUFFER_WRITABLE;

        if (!rb) {
                rb = rb_alloc(nr_pages,
                              event->attr.watermark ? event->attr.wakeup_watermark : 0,
                              event->cpu, flags);

                if (!rb) {
                        ret = -ENOMEM;
                        goto unlock;
                }

                atomic_set(&rb->mmap_count, 1);
                rb->mmap_user = get_current_user();
                rb->mmap_locked = extra;

                ring_buffer_attach(event, rb);

                perf_event_update_time(event);
                perf_event_init_userpage(event);
                perf_event_update_userpage(event);
        } else {
                ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
                                   event->attr.aux_watermark, flags);
                if (!ret)
                        rb->aux_mmap_locked = extra;
        }

unlock:
        if (!ret) {
                atomic_long_add(user_extra, &user->locked_vm);
                atomic64_add(extra, &vma->vm_mm->pinned_vm);

                atomic_inc(&event->mmap_count);
        } else if (rb) {
                atomic_dec(&rb->mmap_count);
        }
aux_unlock:
        mutex_unlock(&event->mmap_mutex);

        /*
         * Since pinned accounting is per vm we cannot allow fork() to copy our
         * vma.
         */
        vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
        vma->vm_ops = &perf_mmap_vmops;

        if (event->pmu->event_mapped)
                event->pmu->event_mapped(event, vma->vm_mm);

        return ret;
}

static int perf_fasync(int fd, struct file *filp, int on)
{
        struct inode *inode = file_inode(filp);
        struct perf_event *event = filp->private_data;
        int retval;

        inode_lock(inode);
        retval = fasync_helper(fd, filp, on, &event->fasync);
        inode_unlock(inode);

        if (retval < 0)
                return retval;

        return 0;
}

static const struct file_operations perf_fops = {
        .llseek                        = no_llseek,
        .release                = perf_release,
        .read                        = perf_read,
        .poll                        = perf_poll,
        .unlocked_ioctl                = perf_ioctl,
        .compat_ioctl                = perf_compat_ioctl,
        .mmap                        = perf_mmap,
        .fasync                        = perf_fasync,
};

/*
 * Perf event wakeup
 *
 * If there's data, ensure we set the poll() state and publish everything
 * to user-space before waking everybody up.
 */

static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
{
        /* only the parent has fasync state */
        if (event->parent)
                event = event->parent;
        return &event->fasync;
}

void perf_event_wakeup(struct perf_event *event)
{
        ring_buffer_wakeup(event);

        if (event->pending_kill) {
                kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
                event->pending_kill = 0;
        }
}

static void perf_sigtrap(struct perf_event *event)
{
        /*
         * We'd expect this to only occur if the irq_work is delayed and either
         * ctx->task or current has changed in the meantime. This can be the
         * case on architectures that do not implement arch_irq_work_raise().
         */
        if (WARN_ON_ONCE(event->ctx->task != current))
                return;

        /*
         * Both perf_pending_task() and perf_pending_irq() can race with the
         * task exiting.
         */
        if (current->flags & PF_EXITING)
                return;

        send_sig_perf((void __user *)event->pending_addr,
                      event->orig_type, event->attr.sig_data);
}

/*
 * Deliver the pending work in-event-context or follow the context.
 */
static void __perf_pending_irq(struct perf_event *event)
{
        int cpu = READ_ONCE(event->oncpu);

        /*
         * If the event isn't running; we done. event_sched_out() will have
         * taken care of things.
         */
        if (cpu < 0)
                return;

        /*
         * Yay, we hit home and are in the context of the event.
         */
        if (cpu == smp_processor_id()) {
                if (event->pending_sigtrap) {
                        event->pending_sigtrap = 0;
                        perf_sigtrap(event);
                        local_dec(&event->ctx->nr_pending);
                }
                if (event->pending_disable) {
                        event->pending_disable = 0;
                        perf_event_disable_local(event);
                }
                return;
        }

        /*
         *  CPU-A                        CPU-B
         *
         *  perf_event_disable_inatomic()
         *    @pending_disable = CPU-A;
         *    irq_work_queue();
         *
         *  sched-out
         *    @pending_disable = -1;
         *
         *                                sched-in
         *                                perf_event_disable_inatomic()
         *                                  @pending_disable = CPU-B;
         *                                  irq_work_queue(); // FAILS
         *
         *  irq_work_run()
         *    perf_pending_irq()
         *
         * But the event runs on CPU-B and wants disabling there.
         */
        irq_work_queue_on(&event->pending_irq, cpu);
}

static void perf_pending_irq(struct irq_work *entry)
{
        struct perf_event *event = container_of(entry, struct perf_event, pending_irq);
        int rctx;

        /*
         * If we 'fail' here, that's OK, it means recursion is already disabled
         * and we won't recurse 'further'.
         */
        rctx = perf_swevent_get_recursion_context();

        /*
         * The wakeup isn't bound to the context of the event -- it can happen
         * irrespective of where the event is.
         */
        if (event->pending_wakeup) {
                event->pending_wakeup = 0;
                perf_event_wakeup(event);
        }

        __perf_pending_irq(event);

        if (rctx >= 0)
                perf_swevent_put_recursion_context(rctx);
}

static void perf_pending_task(struct callback_head *head)
{
        struct perf_event *event = container_of(head, struct perf_event, pending_task);
        int rctx;

        /*
         * If we 'fail' here, that's OK, it means recursion is already disabled
         * and we won't recurse 'further'.
         */
        preempt_disable_notrace();
        rctx = perf_swevent_get_recursion_context();

        if (event->pending_work) {
                event->pending_work = 0;
                perf_sigtrap(event);
                local_dec(&event->ctx->nr_pending);
        }

        if (rctx >= 0)
                perf_swevent_put_recursion_context(rctx);
        preempt_enable_notrace();

        put_event(event);
}

#ifdef CONFIG_GUEST_PERF_EVENTS
struct perf_guest_info_callbacks __rcu *perf_guest_cbs;

DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);

void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
        if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs)))
                return;

        rcu_assign_pointer(perf_guest_cbs, cbs);
        static_call_update(__perf_guest_state, cbs->state);
        static_call_update(__perf_guest_get_ip, cbs->get_ip);

        /* Implementing ->handle_intel_pt_intr is optional. */
        if (cbs->handle_intel_pt_intr)
                static_call_update(__perf_guest_handle_intel_pt_intr,
                                   cbs->handle_intel_pt_intr);
}
EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);

void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
        if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs))
                return;

        rcu_assign_pointer(perf_guest_cbs, NULL);
        static_call_update(__perf_guest_state, (void *)&__static_call_return0);
        static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
        static_call_update(__perf_guest_handle_intel_pt_intr,
                           (void *)&__static_call_return0);
        synchronize_rcu();
}
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
#endif

static void
perf_output_sample_regs(struct perf_output_handle *handle,
                        struct pt_regs *regs, u64 mask)
{
        int bit;
        DECLARE_BITMAP(_mask, 64);

        bitmap_from_u64(_mask, mask);
        for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
                u64 val;

                val = perf_reg_value(regs, bit);
                perf_output_put(handle, val);
        }
}

static void perf_sample_regs_user(struct perf_regs *regs_user,
                                  struct pt_regs *regs)
{
        if (user_mode(regs)) {
                regs_user->abi = perf_reg_abi(current);
                regs_user->regs = regs;
        } else if (!(current->flags & PF_KTHREAD)) {
                perf_get_regs_user(regs_user, regs);
        } else {
                regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
                regs_user->regs = NULL;
        }
}

static void perf_sample_regs_intr(struct perf_regs *regs_intr,
                                  struct pt_regs *regs)
{
        regs_intr->regs = regs;
        regs_intr->abi  = perf_reg_abi(current);
}


/*
 * Get remaining task size from user stack pointer.
 *
 * It'd be better to take stack vma map and limit this more
 * precisely, but there's no way to get it safely under interrupt,
 * so using TASK_SIZE as limit.
 */
static u64 perf_ustack_task_size(struct pt_regs *regs)
{
        unsigned long addr = perf_user_stack_pointer(regs);

        if (!addr || addr >= TASK_SIZE)
                return 0;

        return TASK_SIZE - addr;
}

static u16
perf_sample_ustack_size(u16 stack_size, u16 header_size,
                        struct pt_regs *regs)
{
        u64 task_size;

        /* No regs, no stack pointer, no dump. */
        if (!regs)
                return 0;

        /*
         * Check if we fit in with the requested stack size into the:
         * - TASK_SIZE
         *   If we don't, we limit the size to the TASK_SIZE.
         *
         * - remaining sample size
         *   If we don't, we customize the stack size to
         *   fit in to the remaining sample size.
         */

        task_size  = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
        stack_size = min(stack_size, (u16) task_size);

        /* Current header size plus static size and dynamic size. */
        header_size += 2 * sizeof(u64);

        /* Do we fit in with the current stack dump size? */
        if ((u16) (header_size + stack_size) < header_size) {
                /*
                 * If we overflow the maximum size for the sample,
                 * we customize the stack dump size to fit in.
                 */
                stack_size = USHRT_MAX - header_size - sizeof(u64);
                stack_size = round_up(stack_size, sizeof(u64));
        }

        return stack_size;
}

static void
perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
                          struct pt_regs *regs)
{
        /* Case of a kernel thread, nothing to dump */
        if (!regs) {
                u64 size = 0;
                perf_output_put(handle, size);
        } else {
                unsigned long sp;
                unsigned int rem;
                u64 dyn_size;

                /*
                 * We dump:
                 * static size
                 *   - the size requested by user or the best one we can fit
                 *     in to the sample max size
                 * data
                 *   - user stack dump data
                 * dynamic size
                 *   - the actual dumped size
                 */

                /* Static size. */
                perf_output_put(handle, dump_size);

                /* Data. */
                sp = perf_user_stack_pointer(regs);
                rem = __output_copy_user(handle, (void *) sp, dump_size);
                dyn_size = dump_size - rem;

                perf_output_skip(handle, rem);

                /* Dynamic size. */
                perf_output_put(handle, dyn_size);
        }
}

static unsigned long perf_prepare_sample_aux(struct perf_event *event,
                                          struct perf_sample_data *data,
                                          size_t size)
{
        struct perf_event *sampler = event->aux_event;
        struct perf_buffer *rb;

        data->aux_size = 0;

        if (!sampler)
                goto out;

        if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
                goto out;

        if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
                goto out;

        rb = ring_buffer_get(sampler);
        if (!rb)
                goto out;

        /*
         * If this is an NMI hit inside sampling code, don't take
         * the sample. See also perf_aux_sample_output().
         */
        if (READ_ONCE(rb->aux_in_sampling)) {
                data->aux_size = 0;
        } else {
                size = min_t(size_t, size, perf_aux_size(rb));
                data->aux_size = ALIGN(size, sizeof(u64));
        }
        ring_buffer_put(rb);

out:
        return data->aux_size;
}

static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
                                 struct perf_event *event,
                                 struct perf_output_handle *handle,
                                 unsigned long size)
{
        unsigned long flags;
        long ret;

        /*
         * Normal ->start()/->stop() callbacks run in IRQ mode in scheduler
         * paths. If we start calling them in NMI context, they may race with
         * the IRQ ones, that is, for example, re-starting an event that's just
         * been stopped, which is why we're using a separate callback that
         * doesn't change the event state.
         *
         * IRQs need to be disabled to prevent IPIs from racing with us.
         */
        local_irq_save(flags);
        /*
         * Guard against NMI hits inside the critical section;
         * see also perf_prepare_sample_aux().
         */
        WRITE_ONCE(rb->aux_in_sampling, 1);
        barrier();

        ret = event->pmu->snapshot_aux(event, handle, size);

        barrier();
        WRITE_ONCE(rb->aux_in_sampling, 0);
        local_irq_restore(flags);

        return ret;
}

static void perf_aux_sample_output(struct perf_event *event,
                                   struct perf_output_handle *handle,
                                   struct perf_sample_data *data)
{
        struct perf_event *sampler = event->aux_event;
        struct perf_buffer *rb;
        unsigned long pad;
        long size;

        if (WARN_ON_ONCE(!sampler || !data->aux_size))
                return;

        rb = ring_buffer_get(sampler);
        if (!rb)
                return;

        size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);

        /*
         * An error here means that perf_output_copy() failed (returned a
         * non-zero surplus that it didn't copy), which in its current
         * enlightened implementation is not possible. If that changes, we'd
         * like to know.
         */
        if (WARN_ON_ONCE(size < 0))
                goto out_put;

        /*
         * The pad comes from ALIGN()ing data->aux_size up to u64 in
         * perf_prepare_sample_aux(), so should not be more than that.
         */
        pad = data->aux_size - size;
        if (WARN_ON_ONCE(pad >= sizeof(u64)))
                pad = 8;

        if (pad) {
                u64 zero = 0;
                perf_output_copy(handle, &zero, pad);
        }

out_put:
        ring_buffer_put(rb);
}

/*
 * A set of common sample data types saved even for non-sample records
 * when event->attr.sample_id_all is set.
 */
#define PERF_SAMPLE_ID_ALL  (PERF_SAMPLE_TID | PERF_SAMPLE_TIME |        \
                             PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID |        \
                             PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)

static void __perf_event_header__init_id(struct perf_sample_data *data,
                                         struct perf_event *event,
                                         u64 sample_type)
{
        data->type = event->attr.sample_type;
        data->sample_flags |= data->type & PERF_SAMPLE_ID_ALL;

        if (sample_type & PERF_SAMPLE_TID) {
                /* namespace issues */
                data->tid_entry.pid = perf_event_pid(event, current);
                data->tid_entry.tid = perf_event_tid(event, current);
        }

        if (sample_type & PERF_SAMPLE_TIME)
                data->time = perf_event_clock(event);

        if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
                data->id = primary_event_id(event);

        if (sample_type & PERF_SAMPLE_STREAM_ID)
                data->stream_id = event->id;

        if (sample_type & PERF_SAMPLE_CPU) {
                data->cpu_entry.cpu         = raw_smp_processor_id();
                data->cpu_entry.reserved = 0;
        }
}

void perf_event_header__init_id(struct perf_event_header *header,
                                struct perf_sample_data *data,
                                struct perf_event *event)
{
        if (event->attr.sample_id_all) {
                header->size += event->id_header_size;
                __perf_event_header__init_id(data, event, event->attr.sample_type);
        }
}

static void __perf_event__output_id_sample(struct perf_output_handle *handle,
                                           struct perf_sample_data *data)
{
        u64 sample_type = data->type;

        if (sample_type & PERF_SAMPLE_TID)
                perf_output_put(handle, data->tid_entry);

        if (sample_type & PERF_SAMPLE_TIME)
                perf_output_put(handle, data->time);

        if (sample_type & PERF_SAMPLE_ID)
                perf_output_put(handle, data->id);

        if (sample_type & PERF_SAMPLE_STREAM_ID)
                perf_output_put(handle, data->stream_id);

        if (sample_type & PERF_SAMPLE_CPU)
                perf_output_put(handle, data->cpu_entry);

        if (sample_type & PERF_SAMPLE_IDENTIFIER)
                perf_output_put(handle, data->id);
}

void perf_event__output_id_sample(struct perf_event *event,
                                  struct perf_output_handle *handle,
                                  struct perf_sample_data *sample)
{
        if (event->attr.sample_id_all)
                __perf_event__output_id_sample(handle, sample);
}

static void perf_output_read_one(struct perf_output_handle *handle,
                                 struct perf_event *event,
                                 u64 enabled, u64 running)
{
        u64 read_format = event->attr.read_format;
        u64 values[5];
        int n = 0;

        values[n++] = perf_event_count(event);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
                values[n++] = enabled +
                        atomic64_read(&event->child_total_time_enabled);
        }
        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
                values[n++] = running +
                        atomic64_read(&event->child_total_time_running);
        }
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(event);
        if (read_format & PERF_FORMAT_LOST)
                values[n++] = atomic64_read(&event->lost_samples);

        __output_copy(handle, values, n * sizeof(u64));
}

static void perf_output_read_group(struct perf_output_handle *handle,
                            struct perf_event *event,
                            u64 enabled, u64 running)
{
        struct perf_event *leader = event->group_leader, *sub;
        u64 read_format = event->attr.read_format;
        unsigned long flags;
        u64 values[6];
        int n = 0;

        /*
         * Disabling interrupts avoids all counter scheduling
         * (context switches, timer based rotation and IPIs).
         */
        local_irq_save(flags);

        values[n++] = 1 + leader->nr_siblings;

        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                values[n++] = enabled;

        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
                values[n++] = running;

        if ((leader != event) &&
            (leader->state == PERF_EVENT_STATE_ACTIVE))
                leader->pmu->read(leader);

        values[n++] = perf_event_count(leader);
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
        if (read_format & PERF_FORMAT_LOST)
                values[n++] = atomic64_read(&leader->lost_samples);

        __output_copy(handle, values, n * sizeof(u64));

        for_each_sibling_event(sub, leader) {
                n = 0;

                if ((sub != event) &&
                    (sub->state == PERF_EVENT_STATE_ACTIVE))
                        sub->pmu->read(sub);

                values[n++] = perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
                if (read_format & PERF_FORMAT_LOST)
                        values[n++] = atomic64_read(&sub->lost_samples);

                __output_copy(handle, values, n * sizeof(u64));
        }

        local_irq_restore(flags);
}

#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
                                 PERF_FORMAT_TOTAL_TIME_RUNNING)

/*
 * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
 *
 * The problem is that its both hard and excessively expensive to iterate the
 * child list, not to mention that its impossible to IPI the children running
 * on another CPU, from interrupt/NMI context.
 */
static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
{
        u64 enabled = 0, running = 0, now;
        u64 read_format = event->attr.read_format;

        /*
         * compute total_time_enabled, total_time_running
         * based on snapshot values taken when the event
         * was last scheduled in.
         *
         * we cannot simply called update_context_time()
         * because of locking issue as we are called in
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
                calc_timer_values(event, &now, &enabled, &running);

        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
        else
                perf_output_read_one(handle, event, enabled, running);
}

void perf_output_sample(struct perf_output_handle *handle,
                        struct perf_event_header *header,
                        struct perf_sample_data *data,
                        struct perf_event *event)
{
        u64 sample_type = data->type;

        perf_output_put(handle, *header);

        if (sample_type & PERF_SAMPLE_IDENTIFIER)
                perf_output_put(handle, data->id);

        if (sample_type & PERF_SAMPLE_IP)
                perf_output_put(handle, data->ip);

        if (sample_type & PERF_SAMPLE_TID)
                perf_output_put(handle, data->tid_entry);

        if (sample_type & PERF_SAMPLE_TIME)
                perf_output_put(handle, data->time);

        if (sample_type & PERF_SAMPLE_ADDR)
                perf_output_put(handle, data->addr);

        if (sample_type & PERF_SAMPLE_ID)
                perf_output_put(handle, data->id);

        if (sample_type & PERF_SAMPLE_STREAM_ID)
                perf_output_put(handle, data->stream_id);

        if (sample_type & PERF_SAMPLE_CPU)
                perf_output_put(handle, data->cpu_entry);

        if (sample_type & PERF_SAMPLE_PERIOD)
                perf_output_put(handle, data->period);

        if (sample_type & PERF_SAMPLE_READ)
                perf_output_read(handle, event);

        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                int size = 1;

                size += data->callchain->nr;
                size *= sizeof(u64);
                __output_copy(handle, data->callchain, size);
        }

        if (sample_type & PERF_SAMPLE_RAW) {
                struct perf_raw_record *raw = data->raw;

                if (raw) {
                        struct perf_raw_frag *frag = &raw->frag;

                        perf_output_put(handle, raw->size);
                        do {
                                if (frag->copy) {
                                        __output_custom(handle, frag->copy,
                                                        frag->data, frag->size);
                                } else {
                                        __output_copy(handle, frag->data,
                                                      frag->size);
                                }
                                if (perf_raw_frag_last(frag))
                                        break;
                                frag = frag->next;
                        } while (1);
                        if (frag->pad)
                                __output_skip(handle, NULL, frag->pad);
                } else {
                        struct {
                                u32        size;
                                u32        data;
                        } raw = {
                                .size = sizeof(u32),
                                .data = 0,
                        };
                        perf_output_put(handle, raw);
                }
        }

        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
                if (data->br_stack) {
                        size_t size;

                        size = data->br_stack->nr
                             * sizeof(struct perf_branch_entry);

                        perf_output_put(handle, data->br_stack->nr);
                        if (branch_sample_hw_index(event))
                                perf_output_put(handle, data->br_stack->hw_idx);
                        perf_output_copy(handle, data->br_stack->entries, size);
                        /*
                         * Add the extension space which is appended
                         * right after the struct perf_branch_stack.
                         */
                        if (data->br_stack_cntr) {
                                size = data->br_stack->nr * sizeof(u64);
                                perf_output_copy(handle, data->br_stack_cntr, size);
                        }
                } else {
                        /*
                         * we always store at least the value of nr
                         */
                        u64 nr = 0;
                        perf_output_put(handle, nr);
                }
        }

        if (sample_type & PERF_SAMPLE_REGS_USER) {
                u64 abi = data->regs_user.abi;

                /*
                 * If there are no regs to dump, notice it through
                 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
                 */
                perf_output_put(handle, abi);

                if (abi) {
                        u64 mask = event->attr.sample_regs_user;
                        perf_output_sample_regs(handle,
                                                data->regs_user.regs,
                                                mask);
                }
        }

        if (sample_type & PERF_SAMPLE_STACK_USER) {
                perf_output_sample_ustack(handle,
                                          data->stack_user_size,
                                          data->regs_user.regs);
        }

        if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
                perf_output_put(handle, data->weight.full);

        if (sample_type & PERF_SAMPLE_DATA_SRC)
                perf_output_put(handle, data->data_src.val);

        if (sample_type & PERF_SAMPLE_TRANSACTION)
                perf_output_put(handle, data->txn);

        if (sample_type & PERF_SAMPLE_REGS_INTR) {
                u64 abi = data->regs_intr.abi;
                /*
                 * If there are no regs to dump, notice it through
                 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
                 */
                perf_output_put(handle, abi);

                if (abi) {
                        u64 mask = event->attr.sample_regs_intr;

                        perf_output_sample_regs(handle,
                                                data->regs_intr.regs,
                                                mask);
                }
        }

        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                perf_output_put(handle, data->phys_addr);

        if (sample_type & PERF_SAMPLE_CGROUP)
                perf_output_put(handle, data->cgroup);

        if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
                perf_output_put(handle, data->data_page_size);

        if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
                perf_output_put(handle, data->code_page_size);

        if (sample_type & PERF_SAMPLE_AUX) {
                perf_output_put(handle, data->aux_size);

                if (data->aux_size)
                        perf_aux_sample_output(event, handle, data);
        }

        if (!event->attr.watermark) {
                int wakeup_events = event->attr.wakeup_events;

                if (wakeup_events) {
                        struct perf_buffer *rb = handle->rb;
                        int events = local_inc_return(&rb->events);

                        if (events >= wakeup_events) {
                                local_sub(wakeup_events, &rb->events);
                                local_inc(&rb->wakeup);
                        }
                }
        }
}

static u64 perf_virt_to_phys(u64 virt)
{
        u64 phys_addr = 0;

        if (!virt)
                return 0;

        if (virt >= TASK_SIZE) {
                /* If it's vmalloc()d memory, leave phys_addr as 0 */
                if (virt_addr_valid((void *)(uintptr_t)virt) &&
                    !(virt >= VMALLOC_START && virt < VMALLOC_END))
                        phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
        } else {
                /*
                 * Walking the pages tables for user address.
                 * Interrupts are disabled, so it prevents any tear down
                 * of the page tables.
                 * Try IRQ-safe get_user_page_fast_only first.
                 * If failed, leave phys_addr as 0.
                 */
                if (current->mm != NULL) {
                        struct page *p;

                        pagefault_disable();
                        if (get_user_page_fast_only(virt, 0, &p)) {
                                phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
                                put_page(p);
                        }
                        pagefault_enable();
                }
        }

        return phys_addr;
}

/*
 * Return the pagetable size of a given virtual address.
 */
static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
{
        u64 size = 0;

#ifdef CONFIG_HAVE_FAST_GUP
        pgd_t *pgdp, pgd;
        p4d_t *p4dp, p4d;
        pud_t *pudp, pud;
        pmd_t *pmdp, pmd;
        pte_t *ptep, pte;

        pgdp = pgd_offset(mm, addr);
        pgd = READ_ONCE(*pgdp);
        if (pgd_none(pgd))
                return 0;

        if (pgd_leaf(pgd))
                return pgd_leaf_size(pgd);

        p4dp = p4d_offset_lockless(pgdp, pgd, addr);
        p4d = READ_ONCE(*p4dp);
        if (!p4d_present(p4d))
                return 0;

        if (p4d_leaf(p4d))
                return p4d_leaf_size(p4d);

        pudp = pud_offset_lockless(p4dp, p4d, addr);
        pud = READ_ONCE(*pudp);
        if (!pud_present(pud))
                return 0;

        if (pud_leaf(pud))
                return pud_leaf_size(pud);

        pmdp = pmd_offset_lockless(pudp, pud, addr);
again:
        pmd = pmdp_get_lockless(pmdp);
        if (!pmd_present(pmd))
                return 0;

        if (pmd_leaf(pmd))
                return pmd_leaf_size(pmd);

        ptep = pte_offset_map(&pmd, addr);
        if (!ptep)
                goto again;

        pte = ptep_get_lockless(ptep);
        if (pte_present(pte))
                size = pte_leaf_size(pte);
        pte_unmap(ptep);
#endif /* CONFIG_HAVE_FAST_GUP */

        return size;
}

static u64 perf_get_page_size(unsigned long addr)
{
        struct mm_struct *mm;
        unsigned long flags;
        u64 size;

        if (!addr)
                return 0;

        /*
         * Software page-table walkers must disable IRQs,
         * which prevents any tear down of the page tables.
         */
        local_irq_save(flags);

        mm = current->mm;
        if (!mm) {
                /*
                 * For kernel threads and the like, use init_mm so that
                 * we can find kernel memory.
                 */
                mm = &init_mm;
        }

        size = perf_get_pgtable_size(mm, addr);

        local_irq_restore(flags);

        return size;
}

static struct perf_callchain_entry __empty_callchain = { .nr = 0, };

struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
        bool kernel = !event->attr.exclude_callchain_kernel;
        bool user   = !event->attr.exclude_callchain_user;
        /* Disallow cross-task user callchains. */
        bool crosstask = event->ctx->task && event->ctx->task != current;
        const u32 max_stack = event->attr.sample_max_stack;
        struct perf_callchain_entry *callchain;

        if (!kernel && !user)
                return &__empty_callchain;

        callchain = get_perf_callchain(regs, 0, kernel, user,
                                       max_stack, crosstask, true);
        return callchain ?: &__empty_callchain;
}

static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
{
        return d * !!(flags & s);
}

void perf_prepare_sample(struct perf_sample_data *data,
                         struct perf_event *event,
                         struct pt_regs *regs)
{
        u64 sample_type = event->attr.sample_type;
        u64 filtered_sample_type;

        /*
         * Add the sample flags that are dependent to others.  And clear the
         * sample flags that have already been done by the PMU driver.
         */
        filtered_sample_type = sample_type;
        filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_CODE_PAGE_SIZE,
                                           PERF_SAMPLE_IP);
        filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_DATA_PAGE_SIZE |
                                           PERF_SAMPLE_PHYS_ADDR, PERF_SAMPLE_ADDR);
        filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_STACK_USER,
                                           PERF_SAMPLE_REGS_USER);
        filtered_sample_type &= ~data->sample_flags;

        if (filtered_sample_type == 0) {
                /* Make sure it has the correct data->type for output */
                data->type = event->attr.sample_type;
                return;
        }

        __perf_event_header__init_id(data, event, filtered_sample_type);

        if (filtered_sample_type & PERF_SAMPLE_IP) {
                data->ip = perf_instruction_pointer(regs);
                data->sample_flags |= PERF_SAMPLE_IP;
        }

        if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
                perf_sample_save_callchain(data, event, regs);

        if (filtered_sample_type & PERF_SAMPLE_RAW) {
                data->raw = NULL;
                data->dyn_size += sizeof(u64);
                data->sample_flags |= PERF_SAMPLE_RAW;
        }

        if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
                data->br_stack = NULL;
                data->dyn_size += sizeof(u64);
                data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
        }

        if (filtered_sample_type & PERF_SAMPLE_REGS_USER)
                perf_sample_regs_user(&data->regs_user, regs);

        /*
         * It cannot use the filtered_sample_type here as REGS_USER can be set
         * by STACK_USER (using __cond_set() above) and we don't want to update
         * the dyn_size if it's not requested by users.
         */
        if ((sample_type & ~data->sample_flags) & PERF_SAMPLE_REGS_USER) {
                /* regs dump ABI info */
                int size = sizeof(u64);

                if (data->regs_user.regs) {
                        u64 mask = event->attr.sample_regs_user;
                        size += hweight64(mask) * sizeof(u64);
                }

                data->dyn_size += size;
                data->sample_flags |= PERF_SAMPLE_REGS_USER;
        }

        if (filtered_sample_type & PERF_SAMPLE_STACK_USER) {
                /*
                 * Either we need PERF_SAMPLE_STACK_USER bit to be always
                 * processed as the last one or have additional check added
                 * in case new sample type is added, because we could eat
                 * up the rest of the sample size.
                 */
                u16 stack_size = event->attr.sample_stack_user;
                u16 header_size = perf_sample_data_size(data, event);
                u16 size = sizeof(u64);

                stack_size = perf_sample_ustack_size(stack_size, header_size,
                                                     data->regs_user.regs);

                /*
                 * If there is something to dump, add space for the dump
                 * itself and for the field that tells the dynamic size,
                 * which is how many have been actually dumped.
                 */
                if (stack_size)
                        size += sizeof(u64) + stack_size;

                data->stack_user_size = stack_size;
                data->dyn_size += size;
                data->sample_flags |= PERF_SAMPLE_STACK_USER;
        }

        if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
                data->weight.full = 0;
                data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
        }

        if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) {
                data->data_src.val = PERF_MEM_NA;
                data->sample_flags |= PERF_SAMPLE_DATA_SRC;
        }

        if (filtered_sample_type & PERF_SAMPLE_TRANSACTION) {
                data->txn = 0;
                data->sample_flags |= PERF_SAMPLE_TRANSACTION;
        }

        if (filtered_sample_type & PERF_SAMPLE_ADDR) {
                data->addr = 0;
                data->sample_flags |= PERF_SAMPLE_ADDR;
        }

        if (filtered_sample_type & PERF_SAMPLE_REGS_INTR) {
                /* regs dump ABI info */
                int size = sizeof(u64);

                perf_sample_regs_intr(&data->regs_intr, regs);

                if (data->regs_intr.regs) {
                        u64 mask = event->attr.sample_regs_intr;

                        size += hweight64(mask) * sizeof(u64);
                }

                data->dyn_size += size;
                data->sample_flags |= PERF_SAMPLE_REGS_INTR;
        }

        if (filtered_sample_type & PERF_SAMPLE_PHYS_ADDR) {
                data->phys_addr = perf_virt_to_phys(data->addr);
                data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
        }

#ifdef CONFIG_CGROUP_PERF
        if (filtered_sample_type & PERF_SAMPLE_CGROUP) {
                struct cgroup *cgrp;

                /* protected by RCU */
                cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
                data->cgroup = cgroup_id(cgrp);
                data->sample_flags |= PERF_SAMPLE_CGROUP;
        }
#endif

        /*
         * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
         * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
         * but the value will not dump to the userspace.
         */
        if (filtered_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) {
                data->data_page_size = perf_get_page_size(data->addr);
                data->sample_flags |= PERF_SAMPLE_DATA_PAGE_SIZE;
        }

        if (filtered_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) {
                data->code_page_size = perf_get_page_size(data->ip);
                data->sample_flags |= PERF_SAMPLE_CODE_PAGE_SIZE;
        }

        if (filtered_sample_type & PERF_SAMPLE_AUX) {
                u64 size;
                u16 header_size = perf_sample_data_size(data, event);

                header_size += sizeof(u64); /* size */

                /*
                 * Given the 16bit nature of header::size, an AUX sample can
                 * easily overflow it, what with all the preceding sample bits.
                 * Make sure this doesn't happen by using up to U16_MAX bytes
                 * per sample in total (rounded down to 8 byte boundary).
                 */
                size = min_t(size_t, U16_MAX - header_size,
                             event->attr.aux_sample_size);
                size = rounddown(size, 8);
                size = perf_prepare_sample_aux(event, data, size);

                WARN_ON_ONCE(size + header_size > U16_MAX);
                data->dyn_size += size + sizeof(u64); /* size above */
                data->sample_flags |= PERF_SAMPLE_AUX;
        }
}

void perf_prepare_header(struct perf_event_header *header,
                         struct perf_sample_data *data,
                         struct perf_event *event,
                         struct pt_regs *regs)
{
        header->type = PERF_RECORD_SAMPLE;
        header->size = perf_sample_data_size(data, event);
        header->misc = perf_misc_flags(regs);

        /*
         * If you're adding more sample types here, you likely need to do
         * something about the overflowing header::size, like repurpose the
         * lowest 3 bits of size, which should be always zero at the moment.
         * This raises a more important question, do we really need 512k sized
         * samples and why, so good argumentation is in order for whatever you
         * do here next.
         */
        WARN_ON_ONCE(header->size & 7);
}

static __always_inline int
__perf_event_output(struct perf_event *event,
                    struct perf_sample_data *data,
                    struct pt_regs *regs,
                    int (*output_begin)(struct perf_output_handle *,
                                        struct perf_sample_data *,
                                        struct perf_event *,
                                        unsigned int))
{
        struct perf_output_handle handle;
        struct perf_event_header header;
        int err;

        /* protect the callchain buffers */
        rcu_read_lock();

        perf_prepare_sample(data, event, regs);
        perf_prepare_header(&header, data, event, regs);

        err = output_begin(&handle, data, event, header.size);
        if (err)
                goto exit;

        perf_output_sample(&handle, &header, data, event);

        perf_output_end(&handle);

exit:
        rcu_read_unlock();
        return err;
}

void
perf_event_output_forward(struct perf_event *event,
                         struct perf_sample_data *data,
                         struct pt_regs *regs)
{
        __perf_event_output(event, data, regs, perf_output_begin_forward);
}

void
perf_event_output_backward(struct perf_event *event,
                           struct perf_sample_data *data,
                           struct pt_regs *regs)
{
        __perf_event_output(event, data, regs, perf_output_begin_backward);
}

int
perf_event_output(struct perf_event *event,
                  struct perf_sample_data *data,
                  struct pt_regs *regs)
{
        return __perf_event_output(event, data, regs, perf_output_begin);
}

/*
 * read event_id
 */

struct perf_read_event {
        struct perf_event_header        header;

        u32                                pid;
        u32                                tid;
};

static void
perf_event_read_event(struct perf_event *event,
                        struct task_struct *task)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        struct perf_read_event read_event = {
                .header = {
                        .type = PERF_RECORD_READ,
                        .misc = 0,
                        .size = sizeof(read_event) + event->read_size,
                },
                .pid = perf_event_pid(event, task),
                .tid = perf_event_tid(event, task),
        };
        int ret;

        perf_event_header__init_id(&read_event.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
        if (ret)
                return;

        perf_output_put(&handle, read_event);
        perf_output_read(&handle, event);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

typedef void (perf_iterate_f)(struct perf_event *event, void *data);

static void
perf_iterate_ctx(struct perf_event_context *ctx,
                   perf_iterate_f output,
                   void *data, bool all)
{
        struct perf_event *event;

        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (!all) {
                        if (event->state < PERF_EVENT_STATE_INACTIVE)
                                continue;
                        if (!event_filter_match(event))
                                continue;
                }

                output(event, data);
        }
}

static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
{
        struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
        struct perf_event *event;

        list_for_each_entry_rcu(event, &pel->list, sb_list) {
                /*
                 * Skip events that are not fully formed yet; ensure that
                 * if we observe event->ctx, both event and ctx will be
                 * complete enough. See perf_install_in_context().
                 */
                if (!smp_load_acquire(&event->ctx))
                        continue;

                if (event->state < PERF_EVENT_STATE_INACTIVE)
                        continue;
                if (!event_filter_match(event))
                        continue;
                output(event, data);
        }
}

/*
 * Iterate all events that need to receive side-band events.
 *
 * For new callers; ensure that account_pmu_sb_event() includes
 * your event, otherwise it might not get delivered.
 */
static void
perf_iterate_sb(perf_iterate_f output, void *data,
               struct perf_event_context *task_ctx)
{
        struct perf_event_context *ctx;

        rcu_read_lock();
        preempt_disable();

        /*
         * If we have task_ctx != NULL we only notify the task context itself.
         * The task_ctx is set only for EXIT events before releasing task
         * context.
         */
        if (task_ctx) {
                perf_iterate_ctx(task_ctx, output, data, false);
                goto done;
        }

        perf_iterate_sb_cpu(output, data);

        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
                perf_iterate_ctx(ctx, output, data, false);
done:
        preempt_enable();
        rcu_read_unlock();
}

/*
 * Clear all file-based filters at exec, they'll have to be
 * re-instated when/if these objects are mmapped again.
 */
static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
{
        struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
        struct perf_addr_filter *filter;
        unsigned int restart = 0, count = 0;
        unsigned long flags;

        if (!has_addr_filter(event))
                return;

        raw_spin_lock_irqsave(&ifh->lock, flags);
        list_for_each_entry(filter, &ifh->list, entry) {
                if (filter->path.dentry) {
                        event->addr_filter_ranges[count].start = 0;
                        event->addr_filter_ranges[count].size = 0;
                        restart++;
                }

                count++;
        }

        if (restart)
                event->addr_filters_gen++;
        raw_spin_unlock_irqrestore(&ifh->lock, flags);

        if (restart)
                perf_event_stop(event, 1);
}

void perf_event_exec(void)
{
        struct perf_event_context *ctx;

        ctx = perf_pin_task_context(current);
        if (!ctx)
                return;

        perf_event_enable_on_exec(ctx);
        perf_event_remove_on_exec(ctx);
        perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);

        perf_unpin_context(ctx);
        put_ctx(ctx);
}

struct remote_output {
        struct perf_buffer        *rb;
        int                        err;
};

static void __perf_event_output_stop(struct perf_event *event, void *data)
{
        struct perf_event *parent = event->parent;
        struct remote_output *ro = data;
        struct perf_buffer *rb = ro->rb;
        struct stop_event_data sd = {
                .event        = event,
        };

        if (!has_aux(event))
                return;

        if (!parent)
                parent = event;

        /*
         * In case of inheritance, it will be the parent that links to the
         * ring-buffer, but it will be the child that's actually using it.
         *
         * We are using event::rb to determine if the event should be stopped,
         * however this may race with ring_buffer_attach() (through set_output),
         * which will make us skip the event that actually needs to be stopped.
         * So ring_buffer_attach() has to stop an aux event before re-assigning
         * its rb pointer.
         */
        if (rcu_dereference(parent->rb) == rb)
                ro->err = __perf_event_stop(&sd);
}

static int __perf_pmu_output_stop(void *info)
{
        struct perf_event *event = info;
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct remote_output ro = {
                .rb        = event->rb,
        };

        rcu_read_lock();
        perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
        if (cpuctx->task_ctx)
                perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
                                   &ro, false);
        rcu_read_unlock();

        return ro.err;
}

static void perf_pmu_output_stop(struct perf_event *event)
{
        struct perf_event *iter;
        int err, cpu;

restart:
        rcu_read_lock();
        list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
                /*
                 * For per-CPU events, we need to make sure that neither they
                 * nor their children are running; for cpu==-1 events it's
                 * sufficient to stop the event itself if it's active, since
                 * it can't have children.
                 */
                cpu = iter->cpu;
                if (cpu == -1)
                        cpu = READ_ONCE(iter->oncpu);

                if (cpu == -1)
                        continue;

                err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
                if (err == -EAGAIN) {
                        rcu_read_unlock();
                        goto restart;
                }
        }
        rcu_read_unlock();
}

/*
 * task tracking -- fork/exit
 *
 * enabled by: attr.comm | attr.mmap | attr.mmap2 | attr.mmap_data | attr.task
 */

struct perf_task_event {
        struct task_struct                *task;
        struct perf_event_context        *task_ctx;

        struct {
                struct perf_event_header        header;

                u32                                pid;
                u32                                ppid;
                u32                                tid;
                u32                                ptid;
                u64                                time;
        } event_id;
};

static int perf_event_task_match(struct perf_event *event)
{
        return event->attr.comm  || event->attr.mmap ||
               event->attr.mmap2 || event->attr.mmap_data ||
               event->attr.task;
}

static void perf_event_task_output(struct perf_event *event,
                                   void *data)
{
        struct perf_task_event *task_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data        sample;
        struct task_struct *task = task_event->task;
        int ret, size = task_event->event_id.header.size;

        if (!perf_event_task_match(event))
                return;

        perf_event_header__init_id(&task_event->event_id.header, &sample, event);

        ret = perf_output_begin(&handle, &sample, event,
                                task_event->event_id.header.size);
        if (ret)
                goto out;

        task_event->event_id.pid = perf_event_pid(event, task);
        task_event->event_id.tid = perf_event_tid(event, task);

        if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
                task_event->event_id.ppid = perf_event_pid(event,
                                                        task->real_parent);
                task_event->event_id.ptid = perf_event_pid(event,
                                                        task->real_parent);
        } else {  /* PERF_RECORD_FORK */
                task_event->event_id.ppid = perf_event_pid(event, current);
                task_event->event_id.ptid = perf_event_tid(event, current);
        }

        task_event->event_id.time = perf_event_clock(event);

        perf_output_put(&handle, task_event->event_id);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
out:
        task_event->event_id.header.size = size;
}

static void perf_event_task(struct task_struct *task,
                              struct perf_event_context *task_ctx,
                              int new)
{
        struct perf_task_event task_event;

        if (!atomic_read(&nr_comm_events) &&
            !atomic_read(&nr_mmap_events) &&
            !atomic_read(&nr_task_events))
                return;

        task_event = (struct perf_task_event){
                .task          = task,
                .task_ctx = task_ctx,
                .event_id    = {
                        .header = {
                                .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
                                .misc = 0,
                                .size = sizeof(task_event.event_id),
                        },
                        /* .pid  */
                        /* .ppid */
                        /* .tid  */
                        /* .ptid */
                        /* .time */
                },
        };

        perf_iterate_sb(perf_event_task_output,
                       &task_event,
                       task_ctx);
}

void perf_event_fork(struct task_struct *task)
{
        perf_event_task(task, NULL, 1);
        perf_event_namespaces(task);
}

/*
 * comm tracking
 */

struct perf_comm_event {
        struct task_struct        *task;
        char                        *comm;
        int                        comm_size;

        struct {
                struct perf_event_header        header;

                u32                                pid;
                u32                                tid;
        } event_id;
};

static int perf_event_comm_match(struct perf_event *event)
{
        return event->attr.comm;
}

static void perf_event_comm_output(struct perf_event *event,
                                   void *data)
{
        struct perf_comm_event *comm_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int size = comm_event->event_id.header.size;
        int ret;

        if (!perf_event_comm_match(event))
                return;

        perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                comm_event->event_id.header.size);

        if (ret)
                goto out;

        comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
        comm_event->event_id.tid = perf_event_tid(event, comm_event->task);

        perf_output_put(&handle, comm_event->event_id);
        __output_copy(&handle, comm_event->comm,
                                   comm_event->comm_size);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
out:
        comm_event->event_id.header.size = size;
}

static void perf_event_comm_event(struct perf_comm_event *comm_event)
{
        char comm[TASK_COMM_LEN];
        unsigned int size;

        memset(comm, 0, sizeof(comm));
        strscpy(comm, comm_event->task->comm, sizeof(comm));
        size = ALIGN(strlen(comm)+1, sizeof(u64));

        comm_event->comm = comm;
        comm_event->comm_size = size;

        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;

        perf_iterate_sb(perf_event_comm_output,
                       comm_event,
                       NULL);
}

void perf_event_comm(struct task_struct *task, bool exec)
{
        struct perf_comm_event comm_event;

        if (!atomic_read(&nr_comm_events))
                return;

        comm_event = (struct perf_comm_event){
                .task        = task,
                /* .comm      */
                /* .comm_size */
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_COMM,
                                .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
                                /* .size */
                        },
                        /* .pid */
                        /* .tid */
                },
        };

        perf_event_comm_event(&comm_event);
}

/*
 * namespaces tracking
 */

struct perf_namespaces_event {
        struct task_struct                *task;

        struct {
                struct perf_event_header        header;

                u32                                pid;
                u32                                tid;
                u64                                nr_namespaces;
                struct perf_ns_link_info        link_info[NR_NAMESPACES];
        } event_id;
};

static int perf_event_namespaces_match(struct perf_event *event)
{
        return event->attr.namespaces;
}

static void perf_event_namespaces_output(struct perf_event *event,
                                         void *data)
{
        struct perf_namespaces_event *namespaces_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        u16 header_size = namespaces_event->event_id.header.size;
        int ret;

        if (!perf_event_namespaces_match(event))
                return;

        perf_event_header__init_id(&namespaces_event->event_id.header,
                                   &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                namespaces_event->event_id.header.size);
        if (ret)
                goto out;

        namespaces_event->event_id.pid = perf_event_pid(event,
                                                        namespaces_event->task);
        namespaces_event->event_id.tid = perf_event_tid(event,
                                                        namespaces_event->task);

        perf_output_put(&handle, namespaces_event->event_id);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
out:
        namespaces_event->event_id.header.size = header_size;
}

static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
                                   struct task_struct *task,
                                   const struct proc_ns_operations *ns_ops)
{
        struct path ns_path;
        struct inode *ns_inode;
        int error;

        error = ns_get_path(&ns_path, task, ns_ops);
        if (!error) {
                ns_inode = ns_path.dentry->d_inode;
                ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
                ns_link_info->ino = ns_inode->i_ino;
                path_put(&ns_path);
        }
}

void perf_event_namespaces(struct task_struct *task)
{
        struct perf_namespaces_event namespaces_event;
        struct perf_ns_link_info *ns_link_info;

        if (!atomic_read(&nr_namespaces_events))
                return;

        namespaces_event = (struct perf_namespaces_event){
                .task        = task,
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_NAMESPACES,
                                .misc = 0,
                                .size = sizeof(namespaces_event.event_id),
                        },
                        /* .pid */
                        /* .tid */
                        .nr_namespaces = NR_NAMESPACES,
                        /* .link_info[NR_NAMESPACES] */
                },
        };

        ns_link_info = namespaces_event.event_id.link_info;

        perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
                               task, &mntns_operations);

#ifdef CONFIG_USER_NS
        perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
                               task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
        perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
                               task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
        perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
                               task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
        perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
                               task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
        perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
                               task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
        perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
                               task, &cgroupns_operations);
#endif

        perf_iterate_sb(perf_event_namespaces_output,
                        &namespaces_event,
                        NULL);
}

/*
 * cgroup tracking
 */
#ifdef CONFIG_CGROUP_PERF

struct perf_cgroup_event {
        char                                *path;
        int                                path_size;
        struct {
                struct perf_event_header        header;
                u64                                id;
                char                                path[];
        } event_id;
};

static int perf_event_cgroup_match(struct perf_event *event)
{
        return event->attr.cgroup;
}

static void perf_event_cgroup_output(struct perf_event *event, void *data)
{
        struct perf_cgroup_event *cgroup_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        u16 header_size = cgroup_event->event_id.header.size;
        int ret;

        if (!perf_event_cgroup_match(event))
                return;

        perf_event_header__init_id(&cgroup_event->event_id.header,
                                   &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                cgroup_event->event_id.header.size);
        if (ret)
                goto out;

        perf_output_put(&handle, cgroup_event->event_id);
        __output_copy(&handle, cgroup_event->path, cgroup_event->path_size);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
out:
        cgroup_event->event_id.header.size = header_size;
}

static void perf_event_cgroup(struct cgroup *cgrp)
{
        struct perf_cgroup_event cgroup_event;
        char path_enomem[16] = "//enomem";
        char *pathname;
        size_t size;

        if (!atomic_read(&nr_cgroup_events))
                return;

        cgroup_event = (struct perf_cgroup_event){
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_CGROUP,
                                .misc = 0,
                                .size = sizeof(cgroup_event.event_id),
                        },
                        .id = cgroup_id(cgrp),
                },
        };

        pathname = kmalloc(PATH_MAX, GFP_KERNEL);
        if (pathname == NULL) {
                cgroup_event.path = path_enomem;
        } else {
                /* just to be sure to have enough space for alignment */
                cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
                cgroup_event.path = pathname;
        }

        /*
         * Since our buffer works in 8 byte units we need to align our string
         * size to a multiple of 8. However, we must guarantee the tail end is
         * zero'd out to avoid leaking random bits to userspace.
         */
        size = strlen(cgroup_event.path) + 1;
        while (!IS_ALIGNED(size, sizeof(u64)))
                cgroup_event.path[size++] = '\0';

        cgroup_event.event_id.header.size += size;
        cgroup_event.path_size = size;

        perf_iterate_sb(perf_event_cgroup_output,
                        &cgroup_event,
                        NULL);

        kfree(pathname);
}

#endif

/*
 * mmap tracking
 */

struct perf_mmap_event {
        struct vm_area_struct        *vma;

        const char                *file_name;
        int                        file_size;
        int                        maj, min;
        u64                        ino;
        u64                        ino_generation;
        u32                        prot, flags;
        u8                        build_id[BUILD_ID_SIZE_MAX];
        u32                        build_id_size;

        struct {
                struct perf_event_header        header;

                u32                                pid;
                u32                                tid;
                u64                                start;
                u64                                len;
                u64                                pgoff;
        } event_id;
};

static int perf_event_mmap_match(struct perf_event *event,
                                 void *data)
{
        struct perf_mmap_event *mmap_event = data;
        struct vm_area_struct *vma = mmap_event->vma;
        int executable = vma->vm_flags & VM_EXEC;

        return (!executable && event->attr.mmap_data) ||
               (executable && (event->attr.mmap || event->attr.mmap2));
}

static void perf_event_mmap_output(struct perf_event *event,
                                   void *data)
{
        struct perf_mmap_event *mmap_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int size = mmap_event->event_id.header.size;
        u32 type = mmap_event->event_id.header.type;
        bool use_build_id;
        int ret;

        if (!perf_event_mmap_match(event, data))
                return;

        if (event->attr.mmap2) {
                mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
                mmap_event->event_id.header.size += sizeof(mmap_event->maj);
                mmap_event->event_id.header.size += sizeof(mmap_event->min);
                mmap_event->event_id.header.size += sizeof(mmap_event->ino);
                mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
                mmap_event->event_id.header.size += sizeof(mmap_event->prot);
                mmap_event->event_id.header.size += sizeof(mmap_event->flags);
        }

        perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                mmap_event->event_id.header.size);
        if (ret)
                goto out;

        mmap_event->event_id.pid = perf_event_pid(event, current);
        mmap_event->event_id.tid = perf_event_tid(event, current);

        use_build_id = event->attr.build_id && mmap_event->build_id_size;

        if (event->attr.mmap2 && use_build_id)
                mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;

        perf_output_put(&handle, mmap_event->event_id);

        if (event->attr.mmap2) {
                if (use_build_id) {
                        u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 };

                        __output_copy(&handle, size, 4);
                        __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX);
                } else {
                        perf_output_put(&handle, mmap_event->maj);
                        perf_output_put(&handle, mmap_event->min);
                        perf_output_put(&handle, mmap_event->ino);
                        perf_output_put(&handle, mmap_event->ino_generation);
                }
                perf_output_put(&handle, mmap_event->prot);
                perf_output_put(&handle, mmap_event->flags);
        }

        __output_copy(&handle, mmap_event->file_name,
                                   mmap_event->file_size);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
out:
        mmap_event->event_id.header.size = size;
        mmap_event->event_id.header.type = type;
}

static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
{
        struct vm_area_struct *vma = mmap_event->vma;
        struct file *file = vma->vm_file;
        int maj = 0, min = 0;
        u64 ino = 0, gen = 0;
        u32 prot = 0, flags = 0;
        unsigned int size;
        char tmp[16];
        char *buf = NULL;
        char *name = NULL;

        if (vma->vm_flags & VM_READ)
                prot |= PROT_READ;
        if (vma->vm_flags & VM_WRITE)
                prot |= PROT_WRITE;
        if (vma->vm_flags & VM_EXEC)
                prot |= PROT_EXEC;

        if (vma->vm_flags & VM_MAYSHARE)
                flags = MAP_SHARED;
        else
                flags = MAP_PRIVATE;

        if (vma->vm_flags & VM_LOCKED)
                flags |= MAP_LOCKED;
        if (is_vm_hugetlb_page(vma))
                flags |= MAP_HUGETLB;

        if (file) {
                struct inode *inode;
                dev_t dev;

                buf = kmalloc(PATH_MAX, GFP_KERNEL);
                if (!buf) {
                        name = "//enomem";
                        goto cpy_name;
                }
                /*
                 * d_path() works from the end of the rb backwards, so we
                 * need to add enough zero bytes after the string to handle
                 * the 64bit alignment we do later.
                 */
                name = file_path(file, buf, PATH_MAX - sizeof(u64));
                if (IS_ERR(name)) {
                        name = "//toolong";
                        goto cpy_name;
                }
                inode = file_inode(vma->vm_file);
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
                gen = inode->i_generation;
                maj = MAJOR(dev);
                min = MINOR(dev);

                goto got_name;
        } else {
                if (vma->vm_ops && vma->vm_ops->name)
                        name = (char *) vma->vm_ops->name(vma);
                if (!name)
                        name = (char *)arch_vma_name(vma);
                if (!name) {
                        if (vma_is_initial_heap(vma))
                                name = "[heap]";
                        else if (vma_is_initial_stack(vma))
                                name = "[stack]";
                        else
                                name = "//anon";
                }
        }

cpy_name:
        strscpy(tmp, name, sizeof(tmp));
        name = tmp;
got_name:
        /*
         * Since our buffer works in 8 byte units we need to align our string
         * size to a multiple of 8. However, we must guarantee the tail end is
         * zero'd out to avoid leaking random bits to userspace.
         */
        size = strlen(name)+1;
        while (!IS_ALIGNED(size, sizeof(u64)))
                name[size++] = '\0';

        mmap_event->file_name = name;
        mmap_event->file_size = size;
        mmap_event->maj = maj;
        mmap_event->min = min;
        mmap_event->ino = ino;
        mmap_event->ino_generation = gen;
        mmap_event->prot = prot;
        mmap_event->flags = flags;

        if (!(vma->vm_flags & VM_EXEC))
                mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;

        mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;

        if (atomic_read(&nr_build_id_events))
                build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);

        perf_iterate_sb(perf_event_mmap_output,
                       mmap_event,
                       NULL);

        kfree(buf);
}

/*
 * Check whether inode and address range match filter criteria.
 */
static bool perf_addr_filter_match(struct perf_addr_filter *filter,
                                     struct file *file, unsigned long offset,
                                     unsigned long size)
{
        /* d_inode(NULL) won't be equal to any mapped user-space file */
        if (!filter->path.dentry)
                return false;

        if (d_inode(filter->path.dentry) != file_inode(file))
                return false;

        if (filter->offset > offset + size)
                return false;

        if (filter->offset + filter->size < offset)
                return false;

        return true;
}

static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
                                        struct vm_area_struct *vma,
                                        struct perf_addr_filter_range *fr)
{
        unsigned long vma_size = vma->vm_end - vma->vm_start;
        unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
        struct file *file = vma->vm_file;

        if (!perf_addr_filter_match(filter, file, off, vma_size))
                return false;

        if (filter->offset < off) {
                fr->start = vma->vm_start;
                fr->size = min(vma_size, filter->size - (off - filter->offset));
        } else {
                fr->start = vma->vm_start + filter->offset - off;
                fr->size = min(vma->vm_end - fr->start, filter->size);
        }

        return true;
}

static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
{
        struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
        struct vm_area_struct *vma = data;
        struct perf_addr_filter *filter;
        unsigned int restart = 0, count = 0;
        unsigned long flags;

        if (!has_addr_filter(event))
                return;

        if (!vma->vm_file)
                return;

        raw_spin_lock_irqsave(&ifh->lock, flags);
        list_for_each_entry(filter, &ifh->list, entry) {
                if (perf_addr_filter_vma_adjust(filter, vma,
                                                &event->addr_filter_ranges[count]))
                        restart++;

                count++;
        }

        if (restart)
                event->addr_filters_gen++;
        raw_spin_unlock_irqrestore(&ifh->lock, flags);

        if (restart)
                perf_event_stop(event, 1);
}

/*
 * Adjust all task's events' filters to the new vma
 */
static void perf_addr_filters_adjust(struct vm_area_struct *vma)
{
        struct perf_event_context *ctx;

        /*
         * Data tracing isn't supported yet and as such there is no need
         * to keep track of anything that isn't related to executable code:
         */
        if (!(vma->vm_flags & VM_EXEC))
                return;

        rcu_read_lock();
        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
                perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
        rcu_read_unlock();
}

void perf_event_mmap(struct vm_area_struct *vma)
{
        struct perf_mmap_event mmap_event;

        if (!atomic_read(&nr_mmap_events))
                return;

        mmap_event = (struct perf_mmap_event){
                .vma        = vma,
                /* .file_name */
                /* .file_size */
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_MMAP,
                                .misc = PERF_RECORD_MISC_USER,
                                /* .size */
                        },
                        /* .pid */
                        /* .tid */
                        .start  = vma->vm_start,
                        .len    = vma->vm_end - vma->vm_start,
                        .pgoff  = (u64)vma->vm_pgoff << PAGE_SHIFT,
                },
                /* .maj (attr_mmap2 only) */
                /* .min (attr_mmap2 only) */
                /* .ino (attr_mmap2 only) */
                /* .ino_generation (attr_mmap2 only) */
                /* .prot (attr_mmap2 only) */
                /* .flags (attr_mmap2 only) */
        };

        perf_addr_filters_adjust(vma);
        perf_event_mmap_event(&mmap_event);
}

void perf_event_aux_event(struct perf_event *event, unsigned long head,
                          unsigned long size, u64 flags)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        struct perf_aux_event {
                struct perf_event_header        header;
                u64                                offset;
                u64                                size;
                u64                                flags;
        } rec = {
                .header = {
                        .type = PERF_RECORD_AUX,
                        .misc = 0,
                        .size = sizeof(rec),
                },
                .offset                = head,
                .size                = size,
                .flags                = flags,
        };
        int ret;

        perf_event_header__init_id(&rec.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event, rec.header.size);

        if (ret)
                return;

        perf_output_put(&handle, rec);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

/*
 * Lost/dropped samples logging
 */
void perf_log_lost_samples(struct perf_event *event, u64 lost)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int ret;

        struct {
                struct perf_event_header        header;
                u64                                lost;
        } lost_samples_event = {
                .header = {
                        .type = PERF_RECORD_LOST_SAMPLES,
                        .misc = 0,
                        .size = sizeof(lost_samples_event),
                },
                .lost                = lost,
        };

        perf_event_header__init_id(&lost_samples_event.header, &sample, event);

        ret = perf_output_begin(&handle, &sample, event,
                                lost_samples_event.header.size);
        if (ret)
                return;

        perf_output_put(&handle, lost_samples_event);
        perf_event__output_id_sample(event, &handle, &sample);
        perf_output_end(&handle);
}

/*
 * context_switch tracking
 */

struct perf_switch_event {
        struct task_struct        *task;
        struct task_struct        *next_prev;

        struct {
                struct perf_event_header        header;
                u32                                next_prev_pid;
                u32                                next_prev_tid;
        } event_id;
};

static int perf_event_switch_match(struct perf_event *event)
{
        return event->attr.context_switch;
}

static void perf_event_switch_output(struct perf_event *event, void *data)
{
        struct perf_switch_event *se = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int ret;

        if (!perf_event_switch_match(event))
                return;

        /* Only CPU-wide events are allowed to see next/prev pid/tid */
        if (event->ctx->task) {
                se->event_id.header.type = PERF_RECORD_SWITCH;
                se->event_id.header.size = sizeof(se->event_id.header);
        } else {
                se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
                se->event_id.header.size = sizeof(se->event_id);
                se->event_id.next_prev_pid =
                                        perf_event_pid(event, se->next_prev);
                se->event_id.next_prev_tid =
                                        perf_event_tid(event, se->next_prev);
        }

        perf_event_header__init_id(&se->event_id.header, &sample, event);

        ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
        if (ret)
                return;

        if (event->ctx->task)
                perf_output_put(&handle, se->event_id.header);
        else
                perf_output_put(&handle, se->event_id);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

static void perf_event_switch(struct task_struct *task,
                              struct task_struct *next_prev, bool sched_in)
{
        struct perf_switch_event switch_event;

        /* N.B. caller checks nr_switch_events != 0 */

        switch_event = (struct perf_switch_event){
                .task                = task,
                .next_prev        = next_prev,
                .event_id        = {
                        .header = {
                                /* .type */
                                .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
                                /* .size */
                        },
                        /* .next_prev_pid */
                        /* .next_prev_tid */
                },
        };

        if (!sched_in && task->on_rq) {
                switch_event.event_id.header.misc |=
                                PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
        }

        perf_iterate_sb(perf_event_switch_output, &switch_event, NULL);
}

/*
 * IRQ throttle logging
 */

static void perf_log_throttle(struct perf_event *event, int enable)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int ret;

        struct {
                struct perf_event_header        header;
                u64                                time;
                u64                                id;
                u64                                stream_id;
        } throttle_event = {
                .header = {
                        .type = PERF_RECORD_THROTTLE,
                        .misc = 0,
                        .size = sizeof(throttle_event),
                },
                .time                = perf_event_clock(event),
                .id                = primary_event_id(event),
                .stream_id        = event->id,
        };

        if (enable)
                throttle_event.header.type = PERF_RECORD_UNTHROTTLE;

        perf_event_header__init_id(&throttle_event.header, &sample, event);

        ret = perf_output_begin(&handle, &sample, event,
                                throttle_event.header.size);
        if (ret)
                return;

        perf_output_put(&handle, throttle_event);
        perf_event__output_id_sample(event, &handle, &sample);
        perf_output_end(&handle);
}

/*
 * ksymbol register/unregister tracking
 */

struct perf_ksymbol_event {
        const char        *name;
        int                name_len;
        struct {
                struct perf_event_header        header;
                u64                                addr;
                u32                                len;
                u16                                ksym_type;
                u16                                flags;
        } event_id;
};

static int perf_event_ksymbol_match(struct perf_event *event)
{
        return event->attr.ksymbol;
}

static void perf_event_ksymbol_output(struct perf_event *event, void *data)
{
        struct perf_ksymbol_event *ksymbol_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int ret;

        if (!perf_event_ksymbol_match(event))
                return;

        perf_event_header__init_id(&ksymbol_event->event_id.header,
                                   &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                ksymbol_event->event_id.header.size);
        if (ret)
                return;

        perf_output_put(&handle, ksymbol_event->event_id);
        __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
                        const char *sym)
{
        struct perf_ksymbol_event ksymbol_event;
        char name[KSYM_NAME_LEN];
        u16 flags = 0;
        int name_len;

        if (!atomic_read(&nr_ksymbol_events))
                return;

        if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
            ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
                goto err;

        strscpy(name, sym, KSYM_NAME_LEN);
        name_len = strlen(name) + 1;
        while (!IS_ALIGNED(name_len, sizeof(u64)))
                name[name_len++] = '\0';
        BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));

        if (unregister)
                flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;

        ksymbol_event = (struct perf_ksymbol_event){
                .name = name,
                .name_len = name_len,
                .event_id = {
                        .header = {
                                .type = PERF_RECORD_KSYMBOL,
                                .size = sizeof(ksymbol_event.event_id) +
                                        name_len,
                        },
                        .addr = addr,
                        .len = len,
                        .ksym_type = ksym_type,
                        .flags = flags,
                },
        };

        perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
        return;
err:
        WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
}

/*
 * bpf program load/unload tracking
 */

struct perf_bpf_event {
        struct bpf_prog        *prog;
        struct {
                struct perf_event_header        header;
                u16                                type;
                u16                                flags;
                u32                                id;
                u8                                tag[BPF_TAG_SIZE];
        } event_id;
};

static int perf_event_bpf_match(struct perf_event *event)
{
        return event->attr.bpf_event;
}

static void perf_event_bpf_output(struct perf_event *event, void *data)
{
        struct perf_bpf_event *bpf_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        int ret;

        if (!perf_event_bpf_match(event))
                return;

        perf_event_header__init_id(&bpf_event->event_id.header,
                                   &sample, event);
        ret = perf_output_begin(&handle, &sample, event,
                                bpf_event->event_id.header.size);
        if (ret)
                return;

        perf_output_put(&handle, bpf_event->event_id);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
                                         enum perf_bpf_event_type type)
{
        bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
        int i;

        if (prog->aux->func_cnt == 0) {
                perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
                                   (u64)(unsigned long)prog->bpf_func,
                                   prog->jited_len, unregister,
                                   prog->aux->ksym.name);
        } else {
                for (i = 0; i < prog->aux->func_cnt; i++) {
                        struct bpf_prog *subprog = prog->aux->func[i];

                        perf_event_ksymbol(
                                PERF_RECORD_KSYMBOL_TYPE_BPF,
                                (u64)(unsigned long)subprog->bpf_func,
                                subprog->jited_len, unregister,
                                subprog->aux->ksym.name);
                }
        }
}

void perf_event_bpf_event(struct bpf_prog *prog,
                          enum perf_bpf_event_type type,
                          u16 flags)
{
        struct perf_bpf_event bpf_event;

        switch (type) {
        case PERF_BPF_EVENT_PROG_LOAD:
        case PERF_BPF_EVENT_PROG_UNLOAD:
                if (atomic_read(&nr_ksymbol_events))
                        perf_event_bpf_emit_ksymbols(prog, type);
                break;
        default:
                return;
        }

        if (!atomic_read(&nr_bpf_events))
                return;

        bpf_event = (struct perf_bpf_event){
                .prog = prog,
                .event_id = {
                        .header = {
                                .type = PERF_RECORD_BPF_EVENT,
                                .size = sizeof(bpf_event.event_id),
                        },
                        .type = type,
                        .flags = flags,
                        .id = prog->aux->id,
                },
        };

        BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));

        memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
        perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
}

struct perf_text_poke_event {
        const void                *old_bytes;
        const void                *new_bytes;
        size_t                        pad;
        u16                        old_len;
        u16                        new_len;

        struct {
                struct perf_event_header        header;

                u64                                addr;
        } event_id;
};

static int perf_event_text_poke_match(struct perf_event *event)
{
        return event->attr.text_poke;
}

static void perf_event_text_poke_output(struct perf_event *event, void *data)
{
        struct perf_text_poke_event *text_poke_event = data;
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        u64 padding = 0;
        int ret;

        if (!perf_event_text_poke_match(event))
                return;

        perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);

        ret = perf_output_begin(&handle, &sample, event,
                                text_poke_event->event_id.header.size);
        if (ret)
                return;

        perf_output_put(&handle, text_poke_event->event_id);
        perf_output_put(&handle, text_poke_event->old_len);
        perf_output_put(&handle, text_poke_event->new_len);

        __output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
        __output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);

        if (text_poke_event->pad)
                __output_copy(&handle, &padding, text_poke_event->pad);

        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

void perf_event_text_poke(const void *addr, const void *old_bytes,
                          size_t old_len, const void *new_bytes, size_t new_len)
{
        struct perf_text_poke_event text_poke_event;
        size_t tot, pad;

        if (!atomic_read(&nr_text_poke_events))
                return;

        tot  = sizeof(text_poke_event.old_len) + old_len;
        tot += sizeof(text_poke_event.new_len) + new_len;
        pad  = ALIGN(tot, sizeof(u64)) - tot;

        text_poke_event = (struct perf_text_poke_event){
                .old_bytes    = old_bytes,
                .new_bytes    = new_bytes,
                .pad          = pad,
                .old_len      = old_len,
                .new_len      = new_len,
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_TEXT_POKE,
                                .misc = PERF_RECORD_MISC_KERNEL,
                                .size = sizeof(text_poke_event.event_id) + tot + pad,
                        },
                        .addr = (unsigned long)addr,
                },
        };

        perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
}

void perf_event_itrace_started(struct perf_event *event)
{
        event->attach_state |= PERF_ATTACH_ITRACE;
}

static void perf_log_itrace_start(struct perf_event *event)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        struct perf_aux_event {
                struct perf_event_header        header;
                u32                                pid;
                u32                                tid;
        } rec;
        int ret;

        if (event->parent)
                event = event->parent;

        if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
            event->attach_state & PERF_ATTACH_ITRACE)
                return;

        rec.header.type        = PERF_RECORD_ITRACE_START;
        rec.header.misc        = 0;
        rec.header.size        = sizeof(rec);
        rec.pid        = perf_event_pid(event, current);
        rec.tid        = perf_event_tid(event, current);

        perf_event_header__init_id(&rec.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event, rec.header.size);

        if (ret)
                return;

        perf_output_put(&handle, rec);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}

void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
{
        struct perf_output_handle handle;
        struct perf_sample_data sample;
        struct perf_aux_event {
                struct perf_event_header        header;
                u64                                hw_id;
        } rec;
        int ret;

        if (event->parent)
                event = event->parent;

        rec.header.type        = PERF_RECORD_AUX_OUTPUT_HW_ID;
        rec.header.misc        = 0;
        rec.header.size        = sizeof(rec);
        rec.hw_id        = hw_id;

        perf_event_header__init_id(&rec.header, &sample, event);
        ret = perf_output_begin(&handle, &sample, event, rec.header.size);

        if (ret)
                return;

        perf_output_put(&handle, rec);
        perf_event__output_id_sample(event, &handle, &sample);

        perf_output_end(&handle);
}
EXPORT_SYMBOL_GPL(perf_report_aux_output_id);

static int
__perf_event_account_interrupt(struct perf_event *event, int throttle)
{
        struct hw_perf_event *hwc = &event->hw;
        int ret = 0;
        u64 seq;

        seq = __this_cpu_read(perf_throttled_seq);
        if (seq != hwc->interrupts_seq) {
                hwc->interrupts_seq = seq;
                hwc->interrupts = 1;
        } else {
                hwc->interrupts++;
                if (unlikely(throttle &&
                             hwc->interrupts > max_samples_per_tick)) {
                        __this_cpu_inc(perf_throttled_count);
                        tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
                        hwc->interrupts = MAX_INTERRUPTS;
                        perf_log_throttle(event, 0);
                        ret = 1;
                }
        }

        if (event->attr.freq) {
                u64 now = perf_clock();
                s64 delta = now - hwc->freq_time_stamp;

                hwc->freq_time_stamp = now;

                if (delta > 0 && delta < 2*TICK_NSEC)
                        perf_adjust_period(event, delta, hwc->last_period, true);
        }

        return ret;
}

int perf_event_account_interrupt(struct perf_event *event)
{
        return __perf_event_account_interrupt(event, 1);
}

static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
{
        /*
         * Due to interrupt latency (AKA "skid"), we may enter the
         * kernel before taking an overflow, even if the PMU is only
         * counting user events.
         */
        if (event->attr.exclude_kernel && !user_mode(regs))
                return false;

        return true;
}

/*
 * Generic event overflow handling, sampling.
 */

static int __perf_event_overflow(struct perf_event *event,
                                 int throttle, struct perf_sample_data *data,
                                 struct pt_regs *regs)
{
        int events = atomic_read(&event->event_limit);
        int ret = 0;

        /*
         * Non-sampling counters might still use the PMI to fold short
         * hardware counters, ignore those.
         */
        if (unlikely(!is_sampling_event(event)))
                return 0;

        ret = __perf_event_account_interrupt(event, throttle);

        /*
         * XXX event_limit might not quite work as expected on inherited
         * events
         */

        event->pending_kill = POLL_IN;
        if (events && atomic_dec_and_test(&event->event_limit)) {
                ret = 1;
                event->pending_kill = POLL_HUP;
                perf_event_disable_inatomic(event);
        }

        if (event->attr.sigtrap) {
                /*
                 * The desired behaviour of sigtrap vs invalid samples is a bit
                 * tricky; on the one hand, one should not loose the SIGTRAP if
                 * it is the first event, on the other hand, we should also not
                 * trigger the WARN or override the data address.
                 */
                bool valid_sample = sample_is_allowed(event, regs);
                unsigned int pending_id = 1;

                if (regs)
                        pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
                if (!event->pending_sigtrap) {
                        event->pending_sigtrap = pending_id;
                        local_inc(&event->ctx->nr_pending);
                } else if (event->attr.exclude_kernel && valid_sample) {
                        /*
                         * Should not be able to return to user space without
                         * consuming pending_sigtrap; with exceptions:
                         *
                         *  1. Where !exclude_kernel, events can overflow again
                         *     in the kernel without returning to user space.
                         *
                         *  2. Events that can overflow again before the IRQ-
                         *     work without user space progress (e.g. hrtimer).
                         *     To approximate progress (with false negatives),
                         *     check 32-bit hash of the current IP.
                         */
                        WARN_ON_ONCE(event->pending_sigtrap != pending_id);
                }

                event->pending_addr = 0;
                if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
                        event->pending_addr = data->addr;
                irq_work_queue(&event->pending_irq);
        }

        READ_ONCE(event->overflow_handler)(event, data, regs);

        if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
                irq_work_queue(&event->pending_irq);
        }

        return ret;
}

int perf_event_overflow(struct perf_event *event,
                        struct perf_sample_data *data,
                        struct pt_regs *regs)
{
        return __perf_event_overflow(event, 1, data, regs);
}

/*
 * Generic software event infrastructure
 */

struct swevent_htable {
        struct swevent_hlist                *swevent_hlist;
        struct mutex                        hlist_mutex;
        int                                hlist_refcount;

        /* Recursion avoidance in each contexts */
        int                                recursion[PERF_NR_CONTEXTS];
};

static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);

/*
 * We directly increment event->count and keep a second value in
 * event->hw.period_left to count intervals. This period event
 * is kept in the range [-sample_period, 0] so that we can use the
 * sign as trigger.
 */

u64 perf_swevent_set_period(struct perf_event *event)
{
        struct hw_perf_event *hwc = &event->hw;
        u64 period = hwc->last_period;
        u64 nr, offset;
        s64 old, val;

        hwc->last_period = hwc->sample_period;

        old = local64_read(&hwc->period_left);
        do {
                val = old;
                if (val < 0)
                        return 0;

                nr = div64_u64(period + val, period);
                offset = nr * period;
                val -= offset;
        } while (!local64_try_cmpxchg(&hwc->period_left, &old, val));

        return nr;
}

static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
                                    struct perf_sample_data *data,
                                    struct pt_regs *regs)
{
        struct hw_perf_event *hwc = &event->hw;
        int throttle = 0;

        if (!overflow)
                overflow = perf_swevent_set_period(event);

        if (hwc->interrupts == MAX_INTERRUPTS)
                return;

        for (; overflow; overflow--) {
                if (__perf_event_overflow(event, throttle,
                                            data, regs)) {
                        /*
                         * We inhibit the overflow from happening when
                         * hwc->interrupts == MAX_INTERRUPTS.
                         */
                        break;
                }
                throttle = 1;
        }
}

static void perf_swevent_event(struct perf_event *event, u64 nr,
                               struct perf_sample_data *data,
                               struct pt_regs *regs)
{
        struct hw_perf_event *hwc = &event->hw;

        local64_add(nr, &event->count);

        if (!regs)
                return;

        if (!is_sampling_event(event))
                return;

        if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
                data->period = nr;
                return perf_swevent_overflow(event, 1, data, regs);
        } else
                data->period = event->hw.last_period;

        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
                return perf_swevent_overflow(event, 1, data, regs);

        if (local64_add_negative(nr, &hwc->period_left))
                return;

        perf_swevent_overflow(event, 0, data, regs);
}

static int perf_exclude_event(struct perf_event *event,
                              struct pt_regs *regs)
{
        if (event->hw.state & PERF_HES_STOPPED)
                return 1;

        if (regs) {
                if (event->attr.exclude_user && user_mode(regs))
                        return 1;

                if (event->attr.exclude_kernel && !user_mode(regs))
                        return 1;
        }

        return 0;
}

static int perf_swevent_match(struct perf_event *event,
                                enum perf_type_id type,
                                u32 event_id,
                                struct perf_sample_data *data,
                                struct pt_regs *regs)
{
        if (event->attr.type != type)
                return 0;

        if (event->attr.config != event_id)
                return 0;

        if (perf_exclude_event(event, regs))
                return 0;

        return 1;
}

static inline u64 swevent_hash(u64 type, u32 event_id)
{
        u64 val = event_id | (type << 32);

        return hash_64(val, SWEVENT_HLIST_BITS);
}

static inline struct hlist_head *
__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
{
        u64 hash = swevent_hash(type, event_id);

        return &hlist->heads[hash];
}

/* For the read side: events when they trigger */
static inline struct hlist_head *
find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
{
        struct swevent_hlist *hlist;

        hlist = rcu_dereference(swhash->swevent_hlist);
        if (!hlist)
                return NULL;

        return __find_swevent_head(hlist, type, event_id);
}

/* For the event head insertion and removal in the hlist */
static inline struct hlist_head *
find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
{
        struct swevent_hlist *hlist;
        u32 event_id = event->attr.config;
        u64 type = event->attr.type;

        /*
         * Event scheduling is always serialized against hlist allocation
         * and release. Which makes the protected version suitable here.
         * The context lock guarantees that.
         */
        hlist = rcu_dereference_protected(swhash->swevent_hlist,
                                          lockdep_is_held(&event->ctx->lock));
        if (!hlist)
                return NULL;

        return __find_swevent_head(hlist, type, event_id);
}

static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
                                    u64 nr,
                                    struct perf_sample_data *data,
                                    struct pt_regs *regs)
{
        struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
        struct perf_event *event;
        struct hlist_head *head;

        rcu_read_lock();
        head = find_swevent_head_rcu(swhash, type, event_id);
        if (!head)
                goto end;

        hlist_for_each_entry_rcu(event, head, hlist_entry) {
                if (perf_swevent_match(event, type, event_id, data, regs))
                        perf_swevent_event(event, nr, data, regs);
        }
end:
        rcu_read_unlock();
}

DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);

int perf_swevent_get_recursion_context(void)
{
        struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);

        return get_recursion_context(swhash->recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);

void perf_swevent_put_recursion_context(int rctx)
{
        struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);

        put_recursion_context(swhash->recursion, rctx);
}

void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
{
        struct perf_sample_data data;

        if (WARN_ON_ONCE(!regs))
                return;

        perf_sample_data_init(&data, addr, 0);
        do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
}

void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
{
        int rctx;

        preempt_disable_notrace();
        rctx = perf_swevent_get_recursion_context();
        if (unlikely(rctx < 0))
                goto fail;

        ___perf_sw_event(event_id, nr, regs, addr);

        perf_swevent_put_recursion_context(rctx);
fail:
        preempt_enable_notrace();
}

static void perf_swevent_read(struct perf_event *event)
{
}

static int perf_swevent_add(struct perf_event *event, int flags)
{
        struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
        struct hw_perf_event *hwc = &event->hw;
        struct hlist_head *head;

        if (is_sampling_event(event)) {
                hwc->last_period = hwc->sample_period;
                perf_swevent_set_period(event);
        }

        hwc->state = !(flags & PERF_EF_START);

        head = find_swevent_head(swhash, event);
        if (WARN_ON_ONCE(!head))
                return -EINVAL;

        hlist_add_head_rcu(&event->hlist_entry, head);
        perf_event_update_userpage(event);

        return 0;
}

static void perf_swevent_del(struct perf_event *event, int flags)
{
        hlist_del_rcu(&event->hlist_entry);
}

static void perf_swevent_start(struct perf_event *event, int flags)
{
        event->hw.state = 0;
}

static void perf_swevent_stop(struct perf_event *event, int flags)
{
        event->hw.state = PERF_HES_STOPPED;
}

/* Deref the hlist from the update side */
static inline struct swevent_hlist *
swevent_hlist_deref(struct swevent_htable *swhash)
{
        return rcu_dereference_protected(swhash->swevent_hlist,
                                         lockdep_is_held(&swhash->hlist_mutex));
}

static void swevent_hlist_release(struct swevent_htable *swhash)
{
        struct swevent_hlist *hlist = swevent_hlist_deref(swhash);

        if (!hlist)
                return;

        RCU_INIT_POINTER(swhash->swevent_hlist, NULL);
        kfree_rcu(hlist, rcu_head);
}

static void swevent_hlist_put_cpu(int cpu)
{
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);

        mutex_lock(&swhash->hlist_mutex);

        if (!--swhash->hlist_refcount)
                swevent_hlist_release(swhash);

        mutex_unlock(&swhash->hlist_mutex);
}

static void swevent_hlist_put(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                swevent_hlist_put_cpu(cpu);
}

static int swevent_hlist_get_cpu(int cpu)
{
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
        int err = 0;

        mutex_lock(&swhash->hlist_mutex);
        if (!swevent_hlist_deref(swhash) &&
            cpumask_test_cpu(cpu, perf_online_mask)) {
                struct swevent_hlist *hlist;

                hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
                if (!hlist) {
                        err = -ENOMEM;
                        goto exit;
                }
                rcu_assign_pointer(swhash->swevent_hlist, hlist);
        }
        swhash->hlist_refcount++;
exit:
        mutex_unlock(&swhash->hlist_mutex);

        return err;
}

static int swevent_hlist_get(void)
{
        int err, cpu, failed_cpu;

        mutex_lock(&pmus_lock);
        for_each_possible_cpu(cpu) {
                err = swevent_hlist_get_cpu(cpu);
                if (err) {
                        failed_cpu = cpu;
                        goto fail;
                }
        }
        mutex_unlock(&pmus_lock);
        return 0;
fail:
        for_each_possible_cpu(cpu) {
                if (cpu == failed_cpu)
                        break;
                swevent_hlist_put_cpu(cpu);
        }
        mutex_unlock(&pmus_lock);
        return err;
}

struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];

static void sw_perf_event_destroy(struct perf_event *event)
{
        u64 event_id = event->attr.config;

        WARN_ON(event->parent);

        static_key_slow_dec(&perf_swevent_enabled[event_id]);
        swevent_hlist_put();
}

static struct pmu perf_cpu_clock; /* fwd declaration */
static struct pmu perf_task_clock;

static int perf_swevent_init(struct perf_event *event)
{
        u64 event_id = event->attr.config;

        if (event->attr.type != PERF_TYPE_SOFTWARE)
                return -ENOENT;

        /*
         * no branch sampling for software events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        switch (event_id) {
        case PERF_COUNT_SW_CPU_CLOCK:
                event->attr.type = perf_cpu_clock.type;
                return -ENOENT;
        case PERF_COUNT_SW_TASK_CLOCK:
                event->attr.type = perf_task_clock.type;
                return -ENOENT;

        default:
                break;
        }

        if (event_id >= PERF_COUNT_SW_MAX)
                return -ENOENT;

        if (!event->parent) {
                int err;

                err = swevent_hlist_get();
                if (err)
                        return err;

                static_key_slow_inc(&perf_swevent_enabled[event_id]);
                event->destroy = sw_perf_event_destroy;
        }

        return 0;
}

static struct pmu perf_swevent = {
        .task_ctx_nr        = perf_sw_context,

        .capabilities        = PERF_PMU_CAP_NO_NMI,

        .event_init        = perf_swevent_init,
        .add                = perf_swevent_add,
        .del                = perf_swevent_del,
        .start                = perf_swevent_start,
        .stop                = perf_swevent_stop,
        .read                = perf_swevent_read,
};

#ifdef CONFIG_EVENT_TRACING

static void tp_perf_event_destroy(struct perf_event *event)
{
        perf_trace_destroy(event);
}

static int perf_tp_event_init(struct perf_event *event)
{
        int err;

        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;

        /*
         * no branch sampling for tracepoint events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        err = perf_trace_init(event);
        if (err)
                return err;

        event->destroy = tp_perf_event_destroy;

        return 0;
}

static struct pmu perf_tracepoint = {
        .task_ctx_nr        = perf_sw_context,

        .event_init        = perf_tp_event_init,
        .add                = perf_trace_add,
        .del                = perf_trace_del,
        .start                = perf_swevent_start,
        .stop                = perf_swevent_stop,
        .read                = perf_swevent_read,
};

static int perf_tp_filter_match(struct perf_event *event,
                                struct perf_sample_data *data)
{
        void *record = data->raw->frag.data;

        /* only top level events have filters set */
        if (event->parent)
                event = event->parent;

        if (likely(!event->filter) || filter_match_preds(event->filter, record))
                return 1;
        return 0;
}

static int perf_tp_event_match(struct perf_event *event,
                                struct perf_sample_data *data,
                                struct pt_regs *regs)
{
        if (event->hw.state & PERF_HES_STOPPED)
                return 0;
        /*
         * If exclude_kernel, only trace user-space tracepoints (uprobes)
         */
        if (event->attr.exclude_kernel && !user_mode(regs))
                return 0;

        if (!perf_tp_filter_match(event, data))
                return 0;

        return 1;
}

void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
                               struct trace_event_call *call, u64 count,
                               struct pt_regs *regs, struct hlist_head *head,
                               struct task_struct *task)
{
        if (bpf_prog_array_valid(call)) {
                *(struct pt_regs **)raw_data = regs;
                if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
                        perf_swevent_put_recursion_context(rctx);
                        return;
                }
        }
        perf_tp_event(call->event.type, count, raw_data, size, regs, head,
                      rctx, task);
}
EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);

static void __perf_tp_event_target_task(u64 count, void *record,
                                        struct pt_regs *regs,
                                        struct perf_sample_data *data,
                                        struct perf_event *event)
{
        struct trace_entry *entry = record;

        if (event->attr.config != entry->type)
                return;
        /* Cannot deliver synchronous signal to other task. */
        if (event->attr.sigtrap)
                return;
        if (perf_tp_event_match(event, data, regs))
                perf_swevent_event(event, count, data, regs);
}

static void perf_tp_event_target_task(u64 count, void *record,
                                      struct pt_regs *regs,
                                      struct perf_sample_data *data,
                                      struct perf_event_context *ctx)
{
        unsigned int cpu = smp_processor_id();
        struct pmu *pmu = &perf_tracepoint;
        struct perf_event *event, *sibling;

        perf_event_groups_for_cpu_pmu(event, &ctx->pinned_groups, cpu, pmu) {
                __perf_tp_event_target_task(count, record, regs, data, event);
                for_each_sibling_event(sibling, event)
                        __perf_tp_event_target_task(count, record, regs, data, sibling);
        }

        perf_event_groups_for_cpu_pmu(event, &ctx->flexible_groups, cpu, pmu) {
                __perf_tp_event_target_task(count, record, regs, data, event);
                for_each_sibling_event(sibling, event)
                        __perf_tp_event_target_task(count, record, regs, data, sibling);
        }
}

void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
                   struct pt_regs *regs, struct hlist_head *head, int rctx,
                   struct task_struct *task)
{
        struct perf_sample_data data;
        struct perf_event *event;

        struct perf_raw_record raw = {
                .frag = {
                        .size = entry_size,
                        .data = record,
                },
        };

        perf_sample_data_init(&data, 0, 0);
        perf_sample_save_raw_data(&data, &raw);

        perf_trace_buf_update(record, event_type);

        hlist_for_each_entry_rcu(event, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs)) {
                        perf_swevent_event(event, count, &data, regs);

                        /*
                         * Here use the same on-stack perf_sample_data,
                         * some members in data are event-specific and
                         * need to be re-computed for different sweveents.
                         * Re-initialize data->sample_flags safely to avoid
                         * the problem that next event skips preparing data
                         * because data->sample_flags is set.
                         */
                        perf_sample_data_init(&data, 0, 0);
                        perf_sample_save_raw_data(&data, &raw);
                }
        }

        /*
         * If we got specified a target task, also iterate its context and
         * deliver this event there too.
         */
        if (task && task != current) {
                struct perf_event_context *ctx;

                rcu_read_lock();
                ctx = rcu_dereference(task->perf_event_ctxp);
                if (!ctx)
                        goto unlock;

                raw_spin_lock(&ctx->lock);
                perf_tp_event_target_task(count, record, regs, &data, ctx);
                raw_spin_unlock(&ctx->lock);
unlock:
                rcu_read_unlock();
        }

        perf_swevent_put_recursion_context(rctx);
}
EXPORT_SYMBOL_GPL(perf_tp_event);

#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
/*
 * Flags in config, used by dynamic PMU kprobe and uprobe
 * The flags should match following PMU_FORMAT_ATTR().
 *
 * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
 *                               if not set, create kprobe/uprobe
 *
 * The following values specify a reference counter (or semaphore in the
 * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
 * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
 *
 * PERF_UPROBE_REF_CTR_OFFSET_BITS        # of bits in config as th offset
 * PERF_UPROBE_REF_CTR_OFFSET_SHIFT        # of bits to shift left
 */
enum perf_probe_config {
        PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */
        PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
        PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
};

PMU_FORMAT_ATTR(retprobe, "config:0");
#endif

#ifdef CONFIG_KPROBE_EVENTS
static struct attribute *kprobe_attrs[] = {
        &format_attr_retprobe.attr,
        NULL,
};

static struct attribute_group kprobe_format_group = {
        .name = "format",
        .attrs = kprobe_attrs,
};

static const struct attribute_group *kprobe_attr_groups[] = {
        &kprobe_format_group,
        NULL,
};

static int perf_kprobe_event_init(struct perf_event *event);
static struct pmu perf_kprobe = {
        .task_ctx_nr        = perf_sw_context,
        .event_init        = perf_kprobe_event_init,
        .add                = perf_trace_add,
        .del                = perf_trace_del,
        .start                = perf_swevent_start,
        .stop                = perf_swevent_stop,
        .read                = perf_swevent_read,
        .attr_groups        = kprobe_attr_groups,
};

static int perf_kprobe_event_init(struct perf_event *event)
{
        int err;
        bool is_retprobe;

        if (event->attr.type != perf_kprobe.type)
                return -ENOENT;

        if (!perfmon_capable())
                return -EACCES;

        /*
         * no branch sampling for probe events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
        err = perf_kprobe_init(event, is_retprobe);
        if (err)
                return err;

        event->destroy = perf_kprobe_destroy;

        return 0;
}
#endif /* CONFIG_KPROBE_EVENTS */

#ifdef CONFIG_UPROBE_EVENTS
PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");

static struct attribute *uprobe_attrs[] = {
        &format_attr_retprobe.attr,
        &format_attr_ref_ctr_offset.attr,
        NULL,
};

static struct attribute_group uprobe_format_group = {
        .name = "format",
        .attrs = uprobe_attrs,
};

static const struct attribute_group *uprobe_attr_groups[] = {
        &uprobe_format_group,
        NULL,
};

static int perf_uprobe_event_init(struct perf_event *event);
static struct pmu perf_uprobe = {
        .task_ctx_nr        = perf_sw_context,
        .event_init        = perf_uprobe_event_init,
        .add                = perf_trace_add,
        .del                = perf_trace_del,
        .start                = perf_swevent_start,
        .stop                = perf_swevent_stop,
        .read                = perf_swevent_read,
        .attr_groups        = uprobe_attr_groups,
};

static int perf_uprobe_event_init(struct perf_event *event)
{
        int err;
        unsigned long ref_ctr_offset;
        bool is_retprobe;

        if (event->attr.type != perf_uprobe.type)
                return -ENOENT;

        if (!perfmon_capable())
                return -EACCES;

        /*
         * no branch sampling for probe events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
        ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
        err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
        if (err)
                return err;

        event->destroy = perf_uprobe_destroy;

        return 0;
}
#endif /* CONFIG_UPROBE_EVENTS */

static inline void perf_tp_register(void)
{
        perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
#ifdef CONFIG_KPROBE_EVENTS
        perf_pmu_register(&perf_kprobe, "kprobe", -1);
#endif
#ifdef CONFIG_UPROBE_EVENTS
        perf_pmu_register(&perf_uprobe, "uprobe", -1);
#endif
}

static void perf_event_free_filter(struct perf_event *event)
{
        ftrace_profile_free_filter(event);
}

#ifdef CONFIG_BPF_SYSCALL
static void bpf_overflow_handler(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs)
{
        struct bpf_perf_event_data_kern ctx = {
                .data = data,
                .event = event,
        };
        struct bpf_prog *prog;
        int ret = 0;

        ctx.regs = perf_arch_bpf_user_pt_regs(regs);
        if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
                goto out;
        rcu_read_lock();
        prog = READ_ONCE(event->prog);
        if (prog) {
                perf_prepare_sample(data, event, regs);
                ret = bpf_prog_run(prog, &ctx);
        }
        rcu_read_unlock();
out:
        __this_cpu_dec(bpf_prog_active);
        if (!ret)
                return;

        event->orig_overflow_handler(event, data, regs);
}

static int perf_event_set_bpf_handler(struct perf_event *event,
                                      struct bpf_prog *prog,
                                      u64 bpf_cookie)
{
        if (event->overflow_handler_context)
                /* hw breakpoint or kernel counter */
                return -EINVAL;

        if (event->prog)
                return -EEXIST;

        if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
                return -EINVAL;

        if (event->attr.precise_ip &&
            prog->call_get_stack &&
            (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
             event->attr.exclude_callchain_kernel ||
             event->attr.exclude_callchain_user)) {
                /*
                 * On perf_event with precise_ip, calling bpf_get_stack()
                 * may trigger unwinder warnings and occasional crashes.
                 * bpf_get_[stack|stackid] works around this issue by using
                 * callchain attached to perf_sample_data. If the
                 * perf_event does not full (kernel and user) callchain
                 * attached to perf_sample_data, do not allow attaching BPF
                 * program that calls bpf_get_[stack|stackid].
                 */
                return -EPROTO;
        }

        event->prog = prog;
        event->bpf_cookie = bpf_cookie;
        event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
        WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
        return 0;
}

static void perf_event_free_bpf_handler(struct perf_event *event)
{
        struct bpf_prog *prog = event->prog;

        if (!prog)
                return;

        WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
        event->prog = NULL;
        bpf_prog_put(prog);
}
#else
static int perf_event_set_bpf_handler(struct perf_event *event,
                                      struct bpf_prog *prog,
                                      u64 bpf_cookie)
{
        return -EOPNOTSUPP;
}
static void perf_event_free_bpf_handler(struct perf_event *event)
{
}
#endif

/*
 * returns true if the event is a tracepoint, or a kprobe/upprobe created
 * with perf_event_open()
 */
static inline bool perf_event_is_tracing(struct perf_event *event)
{
        if (event->pmu == &perf_tracepoint)
                return true;
#ifdef CONFIG_KPROBE_EVENTS
        if (event->pmu == &perf_kprobe)
                return true;
#endif
#ifdef CONFIG_UPROBE_EVENTS
        if (event->pmu == &perf_uprobe)
                return true;
#endif
        return false;
}

int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
                            u64 bpf_cookie)
{
        bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp;

        if (!perf_event_is_tracing(event))
                return perf_event_set_bpf_handler(event, prog, bpf_cookie);

        is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_KPROBE;
        is_uprobe = event->tp_event->flags & TRACE_EVENT_FL_UPROBE;
        is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
        is_syscall_tp = is_syscall_trace_event(event->tp_event);
        if (!is_kprobe && !is_uprobe && !is_tracepoint && !is_syscall_tp)
                /* bpf programs can only be attached to u/kprobe or tracepoint */
                return -EINVAL;

        if (((is_kprobe || is_uprobe) && prog->type != BPF_PROG_TYPE_KPROBE) ||
            (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
            (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
                return -EINVAL;

        if (prog->type == BPF_PROG_TYPE_KPROBE && prog->sleepable && !is_uprobe)
                /* only uprobe programs are allowed to be sleepable */
                return -EINVAL;

        /* Kprobe override only works for kprobes, not uprobes. */
        if (prog->kprobe_override && !is_kprobe)
                return -EINVAL;

        if (is_tracepoint || is_syscall_tp) {
                int off = trace_event_get_offsets(event->tp_event);

                if (prog->aux->max_ctx_offset > off)
                        return -EACCES;
        }

        return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
}

void perf_event_free_bpf_prog(struct perf_event *event)
{
        if (!perf_event_is_tracing(event)) {
                perf_event_free_bpf_handler(event);
                return;
        }
        perf_event_detach_bpf_prog(event);
}

#else

static inline void perf_tp_register(void)
{
}

static void perf_event_free_filter(struct perf_event *event)
{
}

int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
                            u64 bpf_cookie)
{
        return -ENOENT;
}

void perf_event_free_bpf_prog(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_TRACING */

#ifdef CONFIG_HAVE_HW_BREAKPOINT
void perf_bp_event(struct perf_event *bp, void *data)
{
        struct perf_sample_data sample;
        struct pt_regs *regs = data;

        perf_sample_data_init(&sample, bp->attr.bp_addr, 0);

        if (!bp->hw.state && !perf_exclude_event(bp, regs))
                perf_swevent_event(bp, 1, &sample, regs);
}
#endif

/*
 * Allocate a new address filter
 */
static struct perf_addr_filter *
perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
{
        int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
        struct perf_addr_filter *filter;

        filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
        if (!filter)
                return NULL;

        INIT_LIST_HEAD(&filter->entry);
        list_add_tail(&filter->entry, filters);

        return filter;
}

static void free_filters_list(struct list_head *filters)
{
        struct perf_addr_filter *filter, *iter;

        list_for_each_entry_safe(filter, iter, filters, entry) {
                path_put(&filter->path);
                list_del(&filter->entry);
                kfree(filter);
        }
}

/*
 * Free existing address filters and optionally install new ones
 */
static void perf_addr_filters_splice(struct perf_event *event,
                                     struct list_head *head)
{
        unsigned long flags;
        LIST_HEAD(list);

        if (!has_addr_filter(event))
                return;

        /* don't bother with children, they don't have their own filters */
        if (event->parent)
                return;

        raw_spin_lock_irqsave(&event->addr_filters.lock, flags);

        list_splice_init(&event->addr_filters.list, &list);
        if (head)
                list_splice(head, &event->addr_filters.list);

        raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);

        free_filters_list(&list);
}

/*
 * Scan through mm's vmas and see if one of them matches the
 * @filter; if so, adjust filter's address range.
 * Called with mm::mmap_lock down for reading.
 */
static void perf_addr_filter_apply(struct perf_addr_filter *filter,
                                   struct mm_struct *mm,
                                   struct perf_addr_filter_range *fr)
{
        struct vm_area_struct *vma;
        VMA_ITERATOR(vmi, mm, 0);

        for_each_vma(vmi, vma) {
                if (!vma->vm_file)
                        continue;

                if (perf_addr_filter_vma_adjust(filter, vma, fr))
                        return;
        }
}

/*
 * Update event's address range filters based on the
 * task's existing mappings, if any.
 */
static void perf_event_addr_filters_apply(struct perf_event *event)
{
        struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
        struct task_struct *task = READ_ONCE(event->ctx->task);
        struct perf_addr_filter *filter;
        struct mm_struct *mm = NULL;
        unsigned int count = 0;
        unsigned long flags;

        /*
         * We may observe TASK_TOMBSTONE, which means that the event tear-down
         * will stop on the parent's child_mutex that our caller is also holding
         */
        if (task == TASK_TOMBSTONE)
                return;

        if (ifh->nr_file_filters) {
                mm = get_task_mm(task);
                if (!mm)
                        goto restart;

                mmap_read_lock(mm);
        }

        raw_spin_lock_irqsave(&ifh->lock, flags);
        list_for_each_entry(filter, &ifh->list, entry) {
                if (filter->path.dentry) {
                        /*
                         * Adjust base offset if the filter is associated to a
                         * binary that needs to be mapped:
                         */
                        event->addr_filter_ranges[count].start = 0;
                        event->addr_filter_ranges[count].size = 0;

                        perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
                } else {
                        event->addr_filter_ranges[count].start = filter->offset;
                        event->addr_filter_ranges[count].size  = filter->size;
                }

                count++;
        }

        event->addr_filters_gen++;
        raw_spin_unlock_irqrestore(&ifh->lock, flags);

        if (ifh->nr_file_filters) {
                mmap_read_unlock(mm);

                mmput(mm);
        }

restart:
        perf_event_stop(event, 1);
}

/*
 * Address range filtering: limiting the data to certain
 * instruction address ranges. Filters are ioctl()ed to us from
 * userspace as ascii strings.
 *
 * Filter string format:
 *
 * ACTION RANGE_SPEC
 * where ACTION is one of the
 *  * "filter": limit the trace to this region
 *  * "start": start tracing from this address
 *  * "stop": stop tracing at this address/region;
 * RANGE_SPEC is
 *  * for kernel addresses: <start address>[/<size>]
 *  * for object files:     <start address>[/<size>]@</path/to/object/file>
 *
 * if <size> is not specified or is zero, the range is treated as a single
 * address; not valid for ACTION=="filter".
 */
enum {
        IF_ACT_NONE = -1,
        IF_ACT_FILTER,
        IF_ACT_START,
        IF_ACT_STOP,
        IF_SRC_FILE,
        IF_SRC_KERNEL,
        IF_SRC_FILEADDR,
        IF_SRC_KERNELADDR,
};

enum {
        IF_STATE_ACTION = 0,
        IF_STATE_SOURCE,
        IF_STATE_END,
};

static const match_table_t if_tokens = {
        { IF_ACT_FILTER,        "filter" },
        { IF_ACT_START,                "start" },
        { IF_ACT_STOP,                "stop" },
        { IF_SRC_FILE,                "%u/%u@%s" },
        { IF_SRC_KERNEL,        "%u/%u" },
        { IF_SRC_FILEADDR,        "%u@%s" },
        { IF_SRC_KERNELADDR,        "%u" },
        { IF_ACT_NONE,                NULL },
};

/*
 * Address filter string parser
 */
static int
perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
                             struct list_head *filters)
{
        struct perf_addr_filter *filter = NULL;
        char *start, *orig, *filename = NULL;
        substring_t args[MAX_OPT_ARGS];
        int state = IF_STATE_ACTION, token;
        unsigned int kernel = 0;
        int ret = -EINVAL;

        orig = fstr = kstrdup(fstr, GFP_KERNEL);
        if (!fstr)
                return -ENOMEM;

        while ((start = strsep(&fstr, " ,\n")) != NULL) {
                static const enum perf_addr_filter_action_t actions[] = {
                        [IF_ACT_FILTER]        = PERF_ADDR_FILTER_ACTION_FILTER,
                        [IF_ACT_START]        = PERF_ADDR_FILTER_ACTION_START,
                        [IF_ACT_STOP]        = PERF_ADDR_FILTER_ACTION_STOP,
                };
                ret = -EINVAL;

                if (!*start)
                        continue;

                /* filter definition begins */
                if (state == IF_STATE_ACTION) {
                        filter = perf_addr_filter_new(event, filters);
                        if (!filter)
                                goto fail;
                }

                token = match_token(start, if_tokens, args);
                switch (token) {
                case IF_ACT_FILTER:
                case IF_ACT_START:
                case IF_ACT_STOP:
                        if (state != IF_STATE_ACTION)
                                goto fail;

                        filter->action = actions[token];
                        state = IF_STATE_SOURCE;
                        break;

                case IF_SRC_KERNELADDR:
                case IF_SRC_KERNEL:
                        kernel = 1;
                        fallthrough;

                case IF_SRC_FILEADDR:
                case IF_SRC_FILE:
                        if (state != IF_STATE_SOURCE)
                                goto fail;

                        *args[0].to = 0;
                        ret = kstrtoul(args[0].from, 0, &filter->offset);
                        if (ret)
                                goto fail;

                        if (token == IF_SRC_KERNEL || token == IF_SRC_FILE) {
                                *args[1].to = 0;
                                ret = kstrtoul(args[1].from, 0, &filter->size);
                                if (ret)
                                        goto fail;
                        }

                        if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
                                int fpos = token == IF_SRC_FILE ? 2 : 1;

                                kfree(filename);
                                filename = match_strdup(&args[fpos]);
                                if (!filename) {
                                        ret = -ENOMEM;
                                        goto fail;
                                }
                        }

                        state = IF_STATE_END;
                        break;

                default:
                        goto fail;
                }

                /*
                 * Filter definition is fully parsed, validate and install it.
                 * Make sure that it doesn't contradict itself or the event's
                 * attribute.
                 */
                if (state == IF_STATE_END) {
                        ret = -EINVAL;

                        /*
                         * ACTION "filter" must have a non-zero length region
                         * specified.
                         */
                        if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER &&
                            !filter->size)
                                goto fail;

                        if (!kernel) {
                                if (!filename)
                                        goto fail;

                                /*
                                 * For now, we only support file-based filters
                                 * in per-task events; doing so for CPU-wide
                                 * events requires additional context switching
                                 * trickery, since same object code will be
                                 * mapped at different virtual addresses in
                                 * different processes.
                                 */
                                ret = -EOPNOTSUPP;
                                if (!event->ctx->task)
                                        goto fail;

                                /* look up the path and grab its inode */
                                ret = kern_path(filename, LOOKUP_FOLLOW,
                                                &filter->path);
                                if (ret)
                                        goto fail;

                                ret = -EINVAL;
                                if (!filter->path.dentry ||
                                    !S_ISREG(d_inode(filter->path.dentry)
                                             ->i_mode))
                                        goto fail;

                                event->addr_filters.nr_file_filters++;
                        }

                        /* ready to consume more filters */
                        kfree(filename);
                        filename = NULL;
                        state = IF_STATE_ACTION;
                        filter = NULL;
                        kernel = 0;
                }
        }

        if (state != IF_STATE_ACTION)
                goto fail;

        kfree(filename);
        kfree(orig);

        return 0;

fail:
        kfree(filename);
        free_filters_list(filters);
        kfree(orig);

        return ret;
}

static int
perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
{
        LIST_HEAD(filters);
        int ret;

        /*
         * Since this is called in perf_ioctl() path, we're already holding
         * ctx::mutex.
         */
        lockdep_assert_held(&event->ctx->mutex);

        if (WARN_ON_ONCE(event->parent))
                return -EINVAL;

        ret = perf_event_parse_addr_filter(event, filter_str, &filters);
        if (ret)
                goto fail_clear_files;

        ret = event->pmu->addr_filters_validate(&filters);
        if (ret)
                goto fail_free_filters;

        /* remove existing filters, if any */
        perf_addr_filters_splice(event, &filters);

        /* install new filters */
        perf_event_for_each_child(event, perf_event_addr_filters_apply);

        return ret;

fail_free_filters:
        free_filters_list(&filters);

fail_clear_files:
        event->addr_filters.nr_file_filters = 0;

        return ret;
}

static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
        int ret = -EINVAL;
        char *filter_str;

        filter_str = strndup_user(arg, PAGE_SIZE);
        if (IS_ERR(filter_str))
                return PTR_ERR(filter_str);

#ifdef CONFIG_EVENT_TRACING
        if (perf_event_is_tracing(event)) {
                struct perf_event_context *ctx = event->ctx;

                /*
                 * Beware, here be dragons!!
                 *
                 * the tracepoint muck will deadlock against ctx->mutex, but
                 * the tracepoint stuff does not actually need it. So
                 * temporarily drop ctx->mutex. As per perf_event_ctx_lock() we
                 * already have a reference on ctx.
                 *
                 * This can result in event getting moved to a different ctx,
                 * but that does not affect the tracepoint state.
                 */
                mutex_unlock(&ctx->mutex);
                ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
                mutex_lock(&ctx->mutex);
        } else
#endif
        if (has_addr_filter(event))
                ret = perf_event_set_addr_filter(event, filter_str);

        kfree(filter_str);
        return ret;
}

/*
 * hrtimer based swevent callback
 */

static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
{
        enum hrtimer_restart ret = HRTIMER_RESTART;
        struct perf_sample_data data;
        struct pt_regs *regs;
        struct perf_event *event;
        u64 period;

        event = container_of(hrtimer, struct perf_event, hw.hrtimer);

        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return HRTIMER_NORESTART;

        event->pmu->read(event);

        perf_sample_data_init(&data, 0, event->hw.last_period);
        regs = get_irq_regs();

        if (regs && !perf_exclude_event(event, regs)) {
                if (!(event->attr.exclude_idle && is_idle_task(current)))
                        if (__perf_event_overflow(event, 1, &data, regs))
                                ret = HRTIMER_NORESTART;
        }

        period = max_t(u64, 10000, event->hw.sample_period);
        hrtimer_forward_now(hrtimer, ns_to_ktime(period));

        return ret;
}

static void perf_swevent_start_hrtimer(struct perf_event *event)
{
        struct hw_perf_event *hwc = &event->hw;
        s64 period;

        if (!is_sampling_event(event))
                return;

        period = local64_read(&hwc->period_left);
        if (period) {
                if (period < 0)
                        period = 10000;

                local64_set(&hwc->period_left, 0);
        } else {
                period = max_t(u64, 10000, hwc->sample_period);
        }
        hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
                      HRTIMER_MODE_REL_PINNED_HARD);
}

static void perf_swevent_cancel_hrtimer(struct perf_event *event)
{
        struct hw_perf_event *hwc = &event->hw;

        if (is_sampling_event(event)) {
                ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
                local64_set(&hwc->period_left, ktime_to_ns(remaining));

                hrtimer_cancel(&hwc->hrtimer);
        }
}

static void perf_swevent_init_hrtimer(struct perf_event *event)
{
        struct hw_perf_event *hwc = &event->hw;

        if (!is_sampling_event(event))
                return;

        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
        hwc->hrtimer.function = perf_swevent_hrtimer;

        /*
         * Since hrtimers have a fixed rate, we can do a static freq->period
         * mapping and avoid the whole period adjust feedback stuff.
         */
        if (event->attr.freq) {
                long freq = event->attr.sample_freq;

                event->attr.sample_period = NSEC_PER_SEC / freq;
                hwc->sample_period = event->attr.sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
                hwc->last_period = hwc->sample_period;
                event->attr.freq = 0;
        }
}

/*
 * Software event: cpu wall time clock
 */

static void cpu_clock_event_update(struct perf_event *event)
{
        s64 prev;
        u64 now;

        now = local_clock();
        prev = local64_xchg(&event->hw.prev_count, now);
        local64_add(now - prev, &event->count);
}

static void cpu_clock_event_start(struct perf_event *event, int flags)
{
        local64_set(&event->hw.prev_count, local_clock());
        perf_swevent_start_hrtimer(event);
}

static void cpu_clock_event_stop(struct perf_event *event, int flags)
{
        perf_swevent_cancel_hrtimer(event);
        cpu_clock_event_update(event);
}

static int cpu_clock_event_add(struct perf_event *event, int flags)
{
        if (flags & PERF_EF_START)
                cpu_clock_event_start(event, flags);
        perf_event_update_userpage(event);

        return 0;
}

static void cpu_clock_event_del(struct perf_event *event, int flags)
{
        cpu_clock_event_stop(event, flags);
}

static void cpu_clock_event_read(struct perf_event *event)
{
        cpu_clock_event_update(event);
}

static int cpu_clock_event_init(struct perf_event *event)
{
        if (event->attr.type != perf_cpu_clock.type)
                return -ENOENT;

        if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
                return -ENOENT;

        /*
         * no branch sampling for software events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        perf_swevent_init_hrtimer(event);

        return 0;
}

static struct pmu perf_cpu_clock = {
        .task_ctx_nr        = perf_sw_context,

        .capabilities        = PERF_PMU_CAP_NO_NMI,
        .dev                = PMU_NULL_DEV,

        .event_init        = cpu_clock_event_init,
        .add                = cpu_clock_event_add,
        .del                = cpu_clock_event_del,
        .start                = cpu_clock_event_start,
        .stop                = cpu_clock_event_stop,
        .read                = cpu_clock_event_read,
};

/*
 * Software event: task time clock
 */

static void task_clock_event_update(struct perf_event *event, u64 now)
{
        u64 prev;
        s64 delta;

        prev = local64_xchg(&event->hw.prev_count, now);
        delta = now - prev;
        local64_add(delta, &event->count);
}

static void task_clock_event_start(struct perf_event *event, int flags)
{
        local64_set(&event->hw.prev_count, event->ctx->time);
        perf_swevent_start_hrtimer(event);
}

static void task_clock_event_stop(struct perf_event *event, int flags)
{
        perf_swevent_cancel_hrtimer(event);
        task_clock_event_update(event, event->ctx->time);
}

static int task_clock_event_add(struct perf_event *event, int flags)
{
        if (flags & PERF_EF_START)
                task_clock_event_start(event, flags);
        perf_event_update_userpage(event);

        return 0;
}

static void task_clock_event_del(struct perf_event *event, int flags)
{
        task_clock_event_stop(event, PERF_EF_UPDATE);
}

static void task_clock_event_read(struct perf_event *event)
{
        u64 now = perf_clock();
        u64 delta = now - event->ctx->timestamp;
        u64 time = event->ctx->time + delta;

        task_clock_event_update(event, time);
}

static int task_clock_event_init(struct perf_event *event)
{
        if (event->attr.type != perf_task_clock.type)
                return -ENOENT;

        if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
                return -ENOENT;

        /*
         * no branch sampling for software events
         */
        if (has_branch_stack(event))
                return -EOPNOTSUPP;

        perf_swevent_init_hrtimer(event);

        return 0;
}

static struct pmu perf_task_clock = {
        .task_ctx_nr        = perf_sw_context,

        .capabilities        = PERF_PMU_CAP_NO_NMI,
        .dev                = PMU_NULL_DEV,

        .event_init        = task_clock_event_init,
        .add                = task_clock_event_add,
        .del                = task_clock_event_del,
        .start                = task_clock_event_start,
        .stop                = task_clock_event_stop,
        .read                = task_clock_event_read,
};

static void perf_pmu_nop_void(struct pmu *pmu)
{
}

static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
{
}

static int perf_pmu_nop_int(struct pmu *pmu)
{
        return 0;
}

static int perf_event_nop_int(struct perf_event *event, u64 value)
{
        return 0;
}

static DEFINE_PER_CPU(unsigned int, nop_txn_flags);

static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
{
        __this_cpu_write(nop_txn_flags, flags);

        if (flags & ~PERF_PMU_TXN_ADD)
                return;

        perf_pmu_disable(pmu);
}

static int perf_pmu_commit_txn(struct pmu *pmu)
{
        unsigned int flags = __this_cpu_read(nop_txn_flags);

        __this_cpu_write(nop_txn_flags, 0);

        if (flags & ~PERF_PMU_TXN_ADD)
                return 0;

        perf_pmu_enable(pmu);
        return 0;
}

static void perf_pmu_cancel_txn(struct pmu *pmu)
{
        unsigned int flags =  __this_cpu_read(nop_txn_flags);

        __this_cpu_write(nop_txn_flags, 0);

        if (flags & ~PERF_PMU_TXN_ADD)
                return;

        perf_pmu_enable(pmu);
}

static int perf_event_idx_default(struct perf_event *event)
{
        return 0;
}

static void free_pmu_context(struct pmu *pmu)
{
        free_percpu(pmu->cpu_pmu_context);
}

/*
 * Let userspace know that this PMU supports address range filtering:
 */
static ssize_t nr_addr_filters_show(struct device *dev,
                                    struct device_attribute *attr,
                                    char *page)
{
        struct pmu *pmu = dev_get_drvdata(dev);

        return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
}
DEVICE_ATTR_RO(nr_addr_filters);

static struct idr pmu_idr;

static ssize_t
type_show(struct device *dev, struct device_attribute *attr, char *page)
{
        struct pmu *pmu = dev_get_drvdata(dev);

        return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->type);
}
static DEVICE_ATTR_RO(type);

static ssize_t
perf_event_mux_interval_ms_show(struct device *dev,
                                struct device_attribute *attr,
                                char *page)
{
        struct pmu *pmu = dev_get_drvdata(dev);

        return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->hrtimer_interval_ms);
}

static DEFINE_MUTEX(mux_interval_mutex);

static ssize_t
perf_event_mux_interval_ms_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
{
        struct pmu *pmu = dev_get_drvdata(dev);
        int timer, cpu, ret;

        ret = kstrtoint(buf, 0, &timer);
        if (ret)
                return ret;

        if (timer < 1)
                return -EINVAL;

        /* same value, noting to do */
        if (timer == pmu->hrtimer_interval_ms)
                return count;

        mutex_lock(&mux_interval_mutex);
        pmu->hrtimer_interval_ms = timer;

        /* update all cpuctx for this PMU */
        cpus_read_lock();
        for_each_online_cpu(cpu) {
                struct perf_cpu_pmu_context *cpc;
                cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
                cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);

                cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc);
        }
        cpus_read_unlock();
        mutex_unlock(&mux_interval_mutex);

        return count;
}
static DEVICE_ATTR_RW(perf_event_mux_interval_ms);

static struct attribute *pmu_dev_attrs[] = {
        &dev_attr_type.attr,
        &dev_attr_perf_event_mux_interval_ms.attr,
        &dev_attr_nr_addr_filters.attr,
        NULL,
};

static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct pmu *pmu = dev_get_drvdata(dev);

        if (n == 2 && !pmu->nr_addr_filters)
                return 0;

        return a->mode;
}

static struct attribute_group pmu_dev_attr_group = {
        .is_visible = pmu_dev_is_visible,
        .attrs = pmu_dev_attrs,
};

static const struct attribute_group *pmu_dev_groups[] = {
        &pmu_dev_attr_group,
        NULL,
};

static int pmu_bus_running;
static struct bus_type pmu_bus = {
        .name                = "event_source",
        .dev_groups        = pmu_dev_groups,
};

static void pmu_dev_release(struct device *dev)
{
        kfree(dev);
}

static int pmu_dev_alloc(struct pmu *pmu)
{
        int ret = -ENOMEM;

        pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
        if (!pmu->dev)
                goto out;

        pmu->dev->groups = pmu->attr_groups;
        device_initialize(pmu->dev);

        dev_set_drvdata(pmu->dev, pmu);
        pmu->dev->bus = &pmu_bus;
        pmu->dev->parent = pmu->parent;
        pmu->dev->release = pmu_dev_release;

        ret = dev_set_name(pmu->dev, "%s", pmu->name);
        if (ret)
                goto free_dev;

        ret = device_add(pmu->dev);
        if (ret)
                goto free_dev;

        if (pmu->attr_update) {
                ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
                if (ret)
                        goto del_dev;
        }

out:
        return ret;

del_dev:
        device_del(pmu->dev);

free_dev:
        put_device(pmu->dev);
        goto out;
}

static struct lock_class_key cpuctx_mutex;
static struct lock_class_key cpuctx_lock;

int perf_pmu_register(struct pmu *pmu, const char *name, int type)
{
        int cpu, ret, max = PERF_TYPE_MAX;

        mutex_lock(&pmus_lock);
        ret = -ENOMEM;
        pmu->pmu_disable_count = alloc_percpu(int);
        if (!pmu->pmu_disable_count)
                goto unlock;

        pmu->type = -1;
        if (WARN_ONCE(!name, "Can not register anonymous pmu.\n")) {
                ret = -EINVAL;
                goto free_pdc;
        }

        pmu->name = name;

        if (type >= 0)
                max = type;

        ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
        if (ret < 0)
                goto free_pdc;

        WARN_ON(type >= 0 && ret != type);

        type = ret;
        pmu->type = type;

        if (pmu_bus_running && !pmu->dev) {
                ret = pmu_dev_alloc(pmu);
                if (ret)
                        goto free_idr;
        }

        ret = -ENOMEM;
        pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
        if (!pmu->cpu_pmu_context)
                goto free_dev;

        for_each_possible_cpu(cpu) {
                struct perf_cpu_pmu_context *cpc;

                cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
                __perf_init_event_pmu_context(&cpc->epc, pmu);
                __perf_mux_hrtimer_init(cpc, cpu);
        }

        if (!pmu->start_txn) {
                if (pmu->pmu_enable) {
                        /*
                         * If we have pmu_enable/pmu_disable calls, install
                         * transaction stubs that use that to try and batch
                         * hardware accesses.
                         */
                        pmu->start_txn  = perf_pmu_start_txn;
                        pmu->commit_txn = perf_pmu_commit_txn;
                        pmu->cancel_txn = perf_pmu_cancel_txn;
                } else {
                        pmu->start_txn  = perf_pmu_nop_txn;
                        pmu->commit_txn = perf_pmu_nop_int;
                        pmu->cancel_txn = perf_pmu_nop_void;
                }
        }

        if (!pmu->pmu_enable) {
                pmu->pmu_enable  = perf_pmu_nop_void;
                pmu->pmu_disable = perf_pmu_nop_void;
        }

        if (!pmu->check_period)
                pmu->check_period = perf_event_nop_int;

        if (!pmu->event_idx)
                pmu->event_idx = perf_event_idx_default;

        list_add_rcu(&pmu->entry, &pmus);
        atomic_set(&pmu->exclusive_cnt, 0);
        ret = 0;
unlock:
        mutex_unlock(&pmus_lock);

        return ret;

free_dev:
        if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
                device_del(pmu->dev);
                put_device(pmu->dev);
        }

free_idr:
        idr_remove(&pmu_idr, pmu->type);

free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
}
EXPORT_SYMBOL_GPL(perf_pmu_register);

void perf_pmu_unregister(struct pmu *pmu)
{
        mutex_lock(&pmus_lock);
        list_del_rcu(&pmu->entry);

        /*
         * We dereference the pmu list under both SRCU and regular RCU, so
         * synchronize against both of those.
         */
        synchronize_srcu(&pmus_srcu);
        synchronize_rcu();

        free_percpu(pmu->pmu_disable_count);
        idr_remove(&pmu_idr, pmu->type);
        if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) {
                if (pmu->nr_addr_filters)
                        device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
                device_del(pmu->dev);
                put_device(pmu->dev);
        }
        free_pmu_context(pmu);
        mutex_unlock(&pmus_lock);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);

static inline bool has_extended_regs(struct perf_event *event)
{
        return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
               (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
}

static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
{
        struct perf_event_context *ctx = NULL;
        int ret;

        if (!try_module_get(pmu->module))
                return -ENODEV;

        /*
         * A number of pmu->event_init() methods iterate the sibling_list to,
         * for example, validate if the group fits on the PMU. Therefore,
         * if this is a sibling event, acquire the ctx->mutex to protect
         * the sibling_list.
         */
        if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
                /*
                 * This ctx->mutex can nest when we're called through
                 * inheritance. See the perf_event_ctx_lock_nested() comment.
                 */
                ctx = perf_event_ctx_lock_nested(event->group_leader,
                                                 SINGLE_DEPTH_NESTING);
                BUG_ON(!ctx);
        }

        event->pmu = pmu;
        ret = pmu->event_init(event);

        if (ctx)
                perf_event_ctx_unlock(event->group_leader, ctx);

        if (!ret) {
                if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
                    has_extended_regs(event))
                        ret = -EOPNOTSUPP;

                if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
                    event_has_any_exclude_flag(event))
                        ret = -EINVAL;

                if (ret && event->destroy)
                        event->destroy(event);
        }

        if (ret)
                module_put(pmu->module);

        return ret;
}

static struct pmu *perf_init_event(struct perf_event *event)
{
        bool extended_type = false;
        int idx, type, ret;
        struct pmu *pmu;

        idx = srcu_read_lock(&pmus_srcu);

        /*
         * Save original type before calling pmu->event_init() since certain
         * pmus overwrites event->attr.type to forward event to another pmu.
         */
        event->orig_type = event->attr.type;

        /* Try parent's PMU first: */
        if (event->parent && event->parent->pmu) {
                pmu = event->parent->pmu;
                ret = perf_try_init_event(pmu, event);
                if (!ret)
                        goto unlock;
        }

        /*
         * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
         * are often aliases for PERF_TYPE_RAW.
         */
        type = event->attr.type;
        if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
                type = event->attr.config >> PERF_PMU_TYPE_SHIFT;
                if (!type) {
                        type = PERF_TYPE_RAW;
                } else {
                        extended_type = true;
                        event->attr.config &= PERF_HW_EVENT_MASK;
                }
        }

again:
        rcu_read_lock();
        pmu = idr_find(&pmu_idr, type);
        rcu_read_unlock();
        if (pmu) {
                if (event->attr.type != type && type != PERF_TYPE_RAW &&
                    !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE))
                        goto fail;

                ret = perf_try_init_event(pmu, event);
                if (ret == -ENOENT && event->attr.type != type && !extended_type) {
                        type = event->attr.type;
                        goto again;
                }

                if (ret)
                        pmu = ERR_PTR(ret);

                goto unlock;
        }

        list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
                ret = perf_try_init_event(pmu, event);
                if (!ret)
                        goto unlock;

                if (ret != -ENOENT) {
                        pmu = ERR_PTR(ret);
                        goto unlock;
                }
        }
fail:
        pmu = ERR_PTR(-ENOENT);
unlock:
        srcu_read_unlock(&pmus_srcu, idx);

        return pmu;
}

static void attach_sb_event(struct perf_event *event)
{
        struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);

        raw_spin_lock(&pel->lock);
        list_add_rcu(&event->sb_list, &pel->list);
        raw_spin_unlock(&pel->lock);
}

/*
 * We keep a list of all !task (and therefore per-cpu) events
 * that need to receive side-band records.
 *
 * This avoids having to scan all the various PMU per-cpu contexts
 * looking for them.
 */
static void account_pmu_sb_event(struct perf_event *event)
{
        if (is_sb_event(event))
                attach_sb_event(event);
}

/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
        /* Lock so we don't race with concurrent unaccount */
        spin_lock(&nr_freq_lock);
        if (atomic_inc_return(&nr_freq_events) == 1)
                tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
        spin_unlock(&nr_freq_lock);
#endif
}

static void account_freq_event(void)
{
        if (tick_nohz_full_enabled())
                account_freq_event_nohz();
        else
                atomic_inc(&nr_freq_events);
}


static void account_event(struct perf_event *event)
{
        bool inc = false;

        if (event->parent)
                return;

        if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
                inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
        if (event->attr.build_id)
                atomic_inc(&nr_build_id_events);
        if (event->attr.comm)
                atomic_inc(&nr_comm_events);
        if (event->attr.namespaces)
                atomic_inc(&nr_namespaces_events);
        if (event->attr.cgroup)
                atomic_inc(&nr_cgroup_events);
        if (event->attr.task)
                atomic_inc(&nr_task_events);
        if (event->attr.freq)
                account_freq_event();
        if (event->attr.context_switch) {
                atomic_inc(&nr_switch_events);
                inc = true;
        }
        if (has_branch_stack(event))
                inc = true;
        if (is_cgroup_event(event))
                inc = true;
        if (event->attr.ksymbol)
                atomic_inc(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_inc(&nr_bpf_events);
        if (event->attr.text_poke)
                atomic_inc(&nr_text_poke_events);

        if (inc) {
                /*
                 * We need the mutex here because static_branch_enable()
                 * must complete *before* the perf_sched_count increment
                 * becomes visible.
                 */
                if (atomic_inc_not_zero(&perf_sched_count))
                        goto enabled;

                mutex_lock(&perf_sched_mutex);
                if (!atomic_read(&perf_sched_count)) {
                        static_branch_enable(&perf_sched_events);
                        /*
                         * Guarantee that all CPUs observe they key change and
                         * call the perf scheduling hooks before proceeding to
                         * install events that need them.
                         */
                        synchronize_rcu();
                }
                /*
                 * Now that we have waited for the sync_sched(), allow further
                 * increments to by-pass the mutex.
                 */
                atomic_inc(&perf_sched_count);
                mutex_unlock(&perf_sched_mutex);
        }
enabled:

        account_pmu_sb_event(event);
}

/*
 * Allocate and initialize an event structure
 */
static struct perf_event *
perf_event_alloc(struct perf_event_attr *attr, int cpu,
                 struct task_struct *task,
                 struct perf_event *group_leader,
                 struct perf_event *parent_event,
                 perf_overflow_handler_t overflow_handler,
                 void *context, int cgroup_fd)
{
        struct pmu *pmu;
        struct perf_event *event;
        struct hw_perf_event *hwc;
        long err = -EINVAL;
        int node;

        if ((unsigned)cpu >= nr_cpu_ids) {
                if (!task || cpu != -1)
                        return ERR_PTR(-EINVAL);
        }
        if (attr->sigtrap && !task) {
                /* Requires a task: avoid signalling random tasks. */
                return ERR_PTR(-EINVAL);
        }

        node = (cpu >= 0) ? cpu_to_node(cpu) : -1;
        event = kmem_cache_alloc_node(perf_event_cache, GFP_KERNEL | __GFP_ZERO,
                                      node);
        if (!event)
                return ERR_PTR(-ENOMEM);

        /*
         * Single events are their own group leaders, with an
         * empty sibling list:
         */
        if (!group_leader)
                group_leader = event;

        mutex_init(&event->child_mutex);
        INIT_LIST_HEAD(&event->child_list);

        INIT_LIST_HEAD(&event->event_entry);
        INIT_LIST_HEAD(&event->sibling_list);
        INIT_LIST_HEAD(&event->active_list);
        init_event_group(event);
        INIT_LIST_HEAD(&event->rb_entry);
        INIT_LIST_HEAD(&event->active_entry);
        INIT_LIST_HEAD(&event->addr_filters.list);
        INIT_HLIST_NODE(&event->hlist_entry);


        init_waitqueue_head(&event->waitq);
        init_irq_work(&event->pending_irq, perf_pending_irq);
        init_task_work(&event->pending_task, perf_pending_task);

        mutex_init(&event->mmap_mutex);
        raw_spin_lock_init(&event->addr_filters.lock);

        atomic_long_set(&event->refcount, 1);
        event->cpu                = cpu;
        event->attr                = *attr;
        event->group_leader        = group_leader;
        event->pmu                = NULL;
        event->oncpu                = -1;

        event->parent                = parent_event;

        event->ns                = get_pid_ns(task_active_pid_ns(current));
        event->id                = atomic64_inc_return(&perf_event_id);

        event->state                = PERF_EVENT_STATE_INACTIVE;

        if (parent_event)
                event->event_caps = parent_event->event_caps;

        if (task) {
                event->attach_state = PERF_ATTACH_TASK;
                /*
                 * XXX pmu::event_init needs to know what task to account to
                 * and we cannot use the ctx information because we need the
                 * pmu before we get a ctx.
                 */
                event->hw.target = get_task_struct(task);
        }

        event->clock = &local_clock;
        if (parent_event)
                event->clock = parent_event->clock;

        if (!overflow_handler && parent_event) {
                overflow_handler = parent_event->overflow_handler;
                context = parent_event->overflow_handler_context;
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
                if (overflow_handler == bpf_overflow_handler) {
                        struct bpf_prog *prog = parent_event->prog;

                        bpf_prog_inc(prog);
                        event->prog = prog;
                        event->orig_overflow_handler =
                                parent_event->orig_overflow_handler;
                }
#endif
        }

        if (overflow_handler) {
                event->overflow_handler        = overflow_handler;
                event->overflow_handler_context = context;
        } else if (is_write_backward(event)){
                event->overflow_handler = perf_event_output_backward;
                event->overflow_handler_context = NULL;
        } else {
                event->overflow_handler = perf_event_output_forward;
                event->overflow_handler_context = NULL;
        }

        perf_event__state_init(event);

        pmu = NULL;

        hwc = &event->hw;
        hwc->sample_period = attr->sample_period;
        if (attr->freq && attr->sample_freq)
                hwc->sample_period = 1;
        hwc->last_period = hwc->sample_period;

        local64_set(&hwc->period_left, hwc->sample_period);

        /*
         * We currently do not support PERF_SAMPLE_READ on inherited events.
         * See perf_output_read().
         */
        if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
                goto err_ns;

        if (!has_branch_stack(event))
                event->attr.branch_sample_type = 0;

        pmu = perf_init_event(event);
        if (IS_ERR(pmu)) {
                err = PTR_ERR(pmu);
                goto err_ns;
        }

        /*
         * Disallow uncore-task events. Similarly, disallow uncore-cgroup
         * events (they don't make sense as the cgroup will be different
         * on other CPUs in the uncore mask).
         */
        if (pmu->task_ctx_nr == perf_invalid_context && (task || cgroup_fd != -1)) {
                err = -EINVAL;
                goto err_pmu;
        }

        if (event->attr.aux_output &&
            !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
                err = -EOPNOTSUPP;
                goto err_pmu;
        }

        if (cgroup_fd != -1) {
                err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
                if (err)
                        goto err_pmu;
        }

        err = exclusive_event_init(event);
        if (err)
                goto err_pmu;

        if (has_addr_filter(event)) {
                event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
                                                    sizeof(struct perf_addr_filter_range),
                                                    GFP_KERNEL);
                if (!event->addr_filter_ranges) {
                        err = -ENOMEM;
                        goto err_per_task;
                }

                /*
                 * Clone the parent's vma offsets: they are valid until exec()
                 * even if the mm is not shared with the parent.
                 */
                if (event->parent) {
                        struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);

                        raw_spin_lock_irq(&ifh->lock);
                        memcpy(event->addr_filter_ranges,
                               event->parent->addr_filter_ranges,
                               pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
                        raw_spin_unlock_irq(&ifh->lock);
                }

                /* force hw sync on the address filters */
                event->addr_filters_gen = 1;
        }

        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                        err = get_callchain_buffers(attr->sample_max_stack);
                        if (err)
                                goto err_addr_filters;
                }
        }

        err = security_perf_event_alloc(event);
        if (err)
                goto err_callchain_buffer;

        /* symmetric to unaccount_event() in _free_event() */
        account_event(event);

        return event;

err_callchain_buffer:
        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
                        put_callchain_buffers();
        }
err_addr_filters:
        kfree(event->addr_filter_ranges);

err_per_task:
        exclusive_event_destroy(event);

err_pmu:
        if (is_cgroup_event(event))
                perf_detach_cgroup(event);
        if (event->destroy)
                event->destroy(event);
        module_put(pmu->module);
err_ns:
        if (event->hw.target)
                put_task_struct(event->hw.target);
        call_rcu(&event->rcu_head, free_event_rcu);

        return ERR_PTR(err);
}

static int perf_copy_attr(struct perf_event_attr __user *uattr,
                          struct perf_event_attr *attr)
{
        u32 size;
        int ret;

        /* Zero the full structure, so that a short copy will be nice. */
        memset(attr, 0, sizeof(*attr));

        ret = get_user(size, &uattr->size);
        if (ret)
                return ret;

        /* ABI compatibility quirk: */
        if (!size)
                size = PERF_ATTR_SIZE_VER0;
        if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE)
                goto err_size;

        ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
        if (ret) {
                if (ret == -E2BIG)
                        goto err_size;
                return ret;
        }

        attr->size = size;

        if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
                return -EINVAL;

        if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
                return -EINVAL;

        if (attr->read_format & ~(PERF_FORMAT_MAX-1))
                return -EINVAL;

        if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
                u64 mask = attr->branch_sample_type;

                /* only using defined bits */
                if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
                        return -EINVAL;

                /* at least one branch bit must be set */
                if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
                        return -EINVAL;

                /* propagate priv level, when not set for branch */
                if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {

                        /* exclude_kernel checked on syscall entry */
                        if (!attr->exclude_kernel)
                                mask |= PERF_SAMPLE_BRANCH_KERNEL;

                        if (!attr->exclude_user)
                                mask |= PERF_SAMPLE_BRANCH_USER;

                        if (!attr->exclude_hv)
                                mask |= PERF_SAMPLE_BRANCH_HV;
                        /*
                         * adjust user setting (for HW filter setup)
                         */
                        attr->branch_sample_type = mask;
                }
                /* privileged levels capture (kernel, hv): check permissions */
                if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
                        ret = perf_allow_kernel(attr);
                        if (ret)
                                return ret;
                }
        }

        if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
                ret = perf_reg_validate(attr->sample_regs_user);
                if (ret)
                        return ret;
        }

        if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
                if (!arch_perf_have_user_stack_dump())
                        return -ENOSYS;

                /*
                 * We have __u32 type for the size, but so far
                 * we can only use __u16 as maximum due to the
                 * __u16 sample size limit.
                 */
                if (attr->sample_stack_user >= USHRT_MAX)
                        return -EINVAL;
                else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
                        return -EINVAL;
        }

        if (!attr->sample_max_stack)
                attr->sample_max_stack = sysctl_perf_event_max_stack;

        if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
                ret = perf_reg_validate(attr->sample_regs_intr);

#ifndef CONFIG_CGROUP_PERF
        if (attr->sample_type & PERF_SAMPLE_CGROUP)
                return -EINVAL;
#endif
        if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
            (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
                return -EINVAL;

        if (!attr->inherit && attr->inherit_thread)
                return -EINVAL;

        if (attr->remove_on_exec && attr->enable_on_exec)
                return -EINVAL;

        if (attr->sigtrap && !attr->remove_on_exec)
                return -EINVAL;

out:
        return ret;

err_size:
        put_user(sizeof(*attr), &uattr->size);
        ret = -E2BIG;
        goto out;
}

static void mutex_lock_double(struct mutex *a, struct mutex *b)
{
        if (b < a)
                swap(a, b);

        mutex_lock(a);
        mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
}

static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
        struct perf_buffer *rb = NULL;
        int ret = -EINVAL;

        if (!output_event) {
                mutex_lock(&event->mmap_mutex);
                goto set;
        }

        /* don't allow circular references */
        if (event == output_event)
                goto out;

        /*
         * Don't allow cross-cpu buffers
         */
        if (output_event->cpu != event->cpu)
                goto out;

        /*
         * If its not a per-cpu rb, it must be the same task.
         */
        if (output_event->cpu == -1 && output_event->hw.target != event->hw.target)
                goto out;

        /*
         * Mixing clocks in the same buffer is trouble you don't need.
         */
        if (output_event->clock != event->clock)
                goto out;

        /*
         * Either writing ring buffer from beginning or from end.
         * Mixing is not allowed.
         */
        if (is_write_backward(output_event) != is_write_backward(event))
                goto out;

        /*
         * If both events generate aux data, they must be on the same PMU
         */
        if (has_aux(event) && has_aux(output_event) &&
            event->pmu != output_event->pmu)
                goto out;

        /*
         * Hold both mmap_mutex to serialize against perf_mmap_close().  Since
         * output_event is already on rb->event_list, and the list iteration
         * restarts after every removal, it is guaranteed this new event is
         * observed *OR* if output_event is already removed, it's guaranteed we
         * observe !rb->mmap_count.
         */
        mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
set:
        /* Can't redirect output if we've got an active mmap() */
        if (atomic_read(&event->mmap_count))
                goto unlock;

        if (output_event) {
                /* get the rb we want to redirect to */
                rb = ring_buffer_get(output_event);
                if (!rb)
                        goto unlock;

                /* did we race against perf_mmap_close() */
                if (!atomic_read(&rb->mmap_count)) {
                        ring_buffer_put(rb);
                        goto unlock;
                }
        }

        ring_buffer_attach(event, rb);

        ret = 0;
unlock:
        mutex_unlock(&event->mmap_mutex);
        if (output_event)
                mutex_unlock(&output_event->mmap_mutex);

out:
        return ret;
}

static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
{
        bool nmi_safe = false;

        switch (clk_id) {
        case CLOCK_MONOTONIC:
                event->clock = &ktime_get_mono_fast_ns;
                nmi_safe = true;
                break;

        case CLOCK_MONOTONIC_RAW:
                event->clock = &ktime_get_raw_fast_ns;
                nmi_safe = true;
                break;

        case CLOCK_REALTIME:
                event->clock = &ktime_get_real_ns;
                break;

        case CLOCK_BOOTTIME:
                event->clock = &ktime_get_boottime_ns;
                break;

        case CLOCK_TAI:
                event->clock = &ktime_get_clocktai_ns;
                break;

        default:
                return -EINVAL;
        }

        if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
                return -EINVAL;

        return 0;
}

static bool
perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
{
        unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
        bool is_capable = perfmon_capable();

        if (attr->sigtrap) {
                /*
                 * perf_event_attr::sigtrap sends signals to the other task.
                 * Require the current task to also have CAP_KILL.
                 */
                rcu_read_lock();
                is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
                rcu_read_unlock();

                /*
                 * If the required capabilities aren't available, checks for
                 * ptrace permissions: upgrade to ATTACH, since sending signals
                 * can effectively change the target task.
                 */
                ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
        }

        /*
         * Preserve ptrace permission check for backwards compatibility. The
         * ptrace check also includes checks that the current task and other
         * task have matching uids, and is therefore not done here explicitly.
         */
        return is_capable || ptrace_may_access(task, ptrace_mode);
}

/**
 * sys_perf_event_open - open a performance event, associate it to a task/cpu
 *
 * @attr_uptr:        event_id type attributes for monitoring/sampling
 * @pid:                target pid
 * @cpu:                target cpu
 * @group_fd:                group leader event fd
 * @flags:                perf event open flags
 */
SYSCALL_DEFINE5(perf_event_open,
                struct perf_event_attr __user *, attr_uptr,
                pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
        struct perf_event *group_leader = NULL, *output_event = NULL;
        struct perf_event_pmu_context *pmu_ctx;
        struct perf_event *event, *sibling;
        struct perf_event_attr attr;
        struct perf_event_context *ctx;
        struct file *event_file = NULL;
        struct fd group = {NULL, 0};
        struct task_struct *task = NULL;
        struct pmu *pmu;
        int event_fd;
        int move_group = 0;
        int err;
        int f_flags = O_RDWR;
        int cgroup_fd = -1;

        /* for future expandability... */
        if (flags & ~PERF_FLAG_ALL)
                return -EINVAL;

        err = perf_copy_attr(attr_uptr, &attr);
        if (err)
                return err;

        /* Do we allow access to perf_event_open(2) ? */
        err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
        if (err)
                return err;

        if (!attr.exclude_kernel) {
                err = perf_allow_kernel(&attr);
                if (err)
                        return err;
        }

        if (attr.namespaces) {
                if (!perfmon_capable())
                        return -EACCES;
        }

        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
                        return -EINVAL;
        } else {
                if (attr.sample_period & (1ULL << 63))
                        return -EINVAL;
        }

        /* Only privileged users can get physical addresses */
        if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
                err = perf_allow_kernel(&attr);
                if (err)
                        return err;
        }

        /* REGS_INTR can leak data, lockdown must prevent this */
        if (attr.sample_type & PERF_SAMPLE_REGS_INTR) {
                err = security_locked_down(LOCKDOWN_PERF);
                if (err)
                        return err;
        }

        /*
         * In cgroup mode, the pid argument is used to pass the fd
         * opened to the cgroup directory in cgroupfs. The cpu argument
         * designates the cpu on which to monitor threads from that
         * cgroup.
         */
        if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
                return -EINVAL;

        if (flags & PERF_FLAG_FD_CLOEXEC)
                f_flags |= O_CLOEXEC;

        event_fd = get_unused_fd_flags(f_flags);
        if (event_fd < 0)
                return event_fd;

        if (group_fd != -1) {
                err = perf_fget_light(group_fd, &group);
                if (err)
                        goto err_fd;
                group_leader = group.file->private_data;
                if (flags & PERF_FLAG_FD_OUTPUT)
                        output_event = group_leader;
                if (flags & PERF_FLAG_FD_NO_GROUP)
                        group_leader = NULL;
        }

        if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
                task = find_lively_task_by_vpid(pid);
                if (IS_ERR(task)) {
                        err = PTR_ERR(task);
                        goto err_group_fd;
                }
        }

        if (task && group_leader &&
            group_leader->attr.inherit != attr.inherit) {
                err = -EINVAL;
                goto err_task;
        }

        if (flags & PERF_FLAG_PID_CGROUP)
                cgroup_fd = pid;

        event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
                                 NULL, NULL, cgroup_fd);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
                goto err_task;
        }

        if (is_sampling_event(event)) {
                if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
                        err = -EOPNOTSUPP;
                        goto err_alloc;
                }
        }

        /*
         * Special case software events and allow them to be part of
         * any hardware group.
         */
        pmu = event->pmu;

        if (attr.use_clockid) {
                err = perf_event_set_clock(event, attr.clockid);
                if (err)
                        goto err_alloc;
        }

        if (pmu->task_ctx_nr == perf_sw_context)
                event->event_caps |= PERF_EV_CAP_SOFTWARE;

        if (task) {
                err = down_read_interruptible(&task->signal->exec_update_lock);
                if (err)
                        goto err_alloc;

                /*
                 * We must hold exec_update_lock across this and any potential
                 * perf_install_in_context() call for this new event to
                 * serialize against exec() altering our credentials (and the
                 * perf_event_exit_task() that could imply).
                 */
                err = -EACCES;
                if (!perf_check_permission(&attr, task))
                        goto err_cred;
        }

        /*
         * Get the target context (task or percpu):
         */
        ctx = find_get_context(task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
                goto err_cred;
        }

        mutex_lock(&ctx->mutex);

        if (ctx->task == TASK_TOMBSTONE) {
                err = -ESRCH;
                goto err_locked;
        }

        if (!task) {
                /*
                 * Check if the @cpu we're creating an event for is online.
                 *
                 * We use the perf_cpu_context::ctx::mutex to serialize against
                 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
                 */
                struct perf_cpu_context *cpuctx = per_cpu_ptr(&perf_cpu_context, event->cpu);

                if (!cpuctx->online) {
                        err = -ENODEV;
                        goto err_locked;
                }
        }

        if (group_leader) {
                err = -EINVAL;

                /*
                 * Do not allow a recursive hierarchy (this new sibling
                 * becoming part of another group-sibling):
                 */
                if (group_leader->group_leader != group_leader)
                        goto err_locked;

                /* All events in a group should have the same clock */
                if (group_leader->clock != event->clock)
                        goto err_locked;

                /*
                 * Make sure we're both events for the same CPU;
                 * grouping events for different CPUs is broken; since
                 * you can never concurrently schedule them anyhow.
                 */
                if (group_leader->cpu != event->cpu)
                        goto err_locked;

                /*
                 * Make sure we're both on the same context; either task or cpu.
                 */
                if (group_leader->ctx != ctx)
                        goto err_locked;

                /*
                 * Only a group leader can be exclusive or pinned
                 */
                if (attr.exclusive || attr.pinned)
                        goto err_locked;

                if (is_software_event(event) &&
                    !in_software_context(group_leader)) {
                        /*
                         * If the event is a sw event, but the group_leader
                         * is on hw context.
                         *
                         * Allow the addition of software events to hw
                         * groups, this is safe because software events
                         * never fail to schedule.
                         *
                         * Note the comment that goes with struct
                         * perf_event_pmu_context.
                         */
                        pmu = group_leader->pmu_ctx->pmu;
                } else if (!is_software_event(event)) {
                        if (is_software_event(group_leader) &&
                            (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
                                /*
                                 * In case the group is a pure software group, and we
                                 * try to add a hardware event, move the whole group to
                                 * the hardware context.
                                 */
                                move_group = 1;
                        }

                        /* Don't allow group of multiple hw events from different pmus */
                        if (!in_software_context(group_leader) &&
                            group_leader->pmu_ctx->pmu != pmu)
                                goto err_locked;
                }
        }

        /*
         * Now that we're certain of the pmu; find the pmu_ctx.
         */
        pmu_ctx = find_get_pmu_context(pmu, ctx, event);
        if (IS_ERR(pmu_ctx)) {
                err = PTR_ERR(pmu_ctx);
                goto err_locked;
        }
        event->pmu_ctx = pmu_ctx;

        if (output_event) {
                err = perf_event_set_output(event, output_event);
                if (err)
                        goto err_context;
        }

        if (!perf_event_validate_size(event)) {
                err = -E2BIG;
                goto err_context;
        }

        if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader)) {
                err = -EINVAL;
                goto err_context;
        }

        /*
         * Must be under the same ctx::mutex as perf_install_in_context(),
         * because we need to serialize with concurrent event creation.
         */
        if (!exclusive_event_installable(event, ctx)) {
                err = -EBUSY;
                goto err_context;
        }

        WARN_ON_ONCE(ctx->parent_ctx);

        event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, f_flags);
        if (IS_ERR(event_file)) {
                err = PTR_ERR(event_file);
                event_file = NULL;
                goto err_context;
        }

        /*
         * This is the point on no return; we cannot fail hereafter. This is
         * where we start modifying current state.
         */

        if (move_group) {
                perf_remove_from_context(group_leader, 0);
                put_pmu_ctx(group_leader->pmu_ctx);

                for_each_sibling_event(sibling, group_leader) {
                        perf_remove_from_context(sibling, 0);
                        put_pmu_ctx(sibling->pmu_ctx);
                }

                /*
                 * Install the group siblings before the group leader.
                 *
                 * Because a group leader will try and install the entire group
                 * (through the sibling list, which is still in-tact), we can
                 * end up with siblings installed in the wrong context.
                 *
                 * By installing siblings first we NO-OP because they're not
                 * reachable through the group lists.
                 */
                for_each_sibling_event(sibling, group_leader) {
                        sibling->pmu_ctx = pmu_ctx;
                        get_pmu_ctx(pmu_ctx);
                        perf_event__state_init(sibling);
                        perf_install_in_context(ctx, sibling, sibling->cpu);
                }

                /*
                 * Removing from the context ends up with disabled
                 * event. What we want here is event in the initial
                 * startup state, ready to be add into new context.
                 */
                group_leader->pmu_ctx = pmu_ctx;
                get_pmu_ctx(pmu_ctx);
                perf_event__state_init(group_leader);
                perf_install_in_context(ctx, group_leader, group_leader->cpu);
        }

        /*
         * Precalculate sample_data sizes; do while holding ctx::mutex such
         * that we're serialized against further additions and before
         * perf_install_in_context() which is the point the event is active and
         * can use these values.
         */
        perf_event__header_size(event);
        perf_event__id_header_size(event);

        event->owner = current;

        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);

        mutex_unlock(&ctx->mutex);

        if (task) {
                up_read(&task->signal->exec_update_lock);
                put_task_struct(task);
        }

        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);

        /*
         * Drop the reference on the group_event after placing the
         * new event on the sibling_list. This ensures destruction
         * of the group leader will find the pointer to itself in
         * perf_group_detach().
         */
        fdput(group);
        fd_install(event_fd, event_file);
        return event_fd;

err_context:
        put_pmu_ctx(event->pmu_ctx);
        event->pmu_ctx = NULL; /* _free_event() */
err_locked:
        mutex_unlock(&ctx->mutex);
        perf_unpin_context(ctx);
        put_ctx(ctx);
err_cred:
        if (task)
                up_read(&task->signal->exec_update_lock);
err_alloc:
        free_event(event);
err_task:
        if (task)
                put_task_struct(task);
err_group_fd:
        fdput(group);
err_fd:
        put_unused_fd(event_fd);
        return err;
}

/**
 * perf_event_create_kernel_counter
 *
 * @attr: attributes of the counter to create
 * @cpu: cpu in which the counter is bound
 * @task: task to profile (NULL for percpu)
 * @overflow_handler: callback to trigger when we hit the event
 * @context: context data could be used in overflow_handler callback
 */
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
                                 struct task_struct *task,
                                 perf_overflow_handler_t overflow_handler,
                                 void *context)
{
        struct perf_event_pmu_context *pmu_ctx;
        struct perf_event_context *ctx;
        struct perf_event *event;
        struct pmu *pmu;
        int err;

        /*
         * Grouping is not supported for kernel events, neither is 'AUX',
         * make sure the caller's intentions are adjusted.
         */
        if (attr->aux_output)
                return ERR_PTR(-EINVAL);

        event = perf_event_alloc(attr, cpu, task, NULL, NULL,
                                 overflow_handler, context, -1);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
                goto err;
        }

        /* Mark owner so we could distinguish it from user events. */
        event->owner = TASK_TOMBSTONE;
        pmu = event->pmu;

        if (pmu->task_ctx_nr == perf_sw_context)
                event->event_caps |= PERF_EV_CAP_SOFTWARE;

        /*
         * Get the target context (task or percpu):
         */
        ctx = find_get_context(task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
                goto err_alloc;
        }

        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
        if (ctx->task == TASK_TOMBSTONE) {
                err = -ESRCH;
                goto err_unlock;
        }

        pmu_ctx = find_get_pmu_context(pmu, ctx, event);
        if (IS_ERR(pmu_ctx)) {
                err = PTR_ERR(pmu_ctx);
                goto err_unlock;
        }
        event->pmu_ctx = pmu_ctx;

        if (!task) {
                /*
                 * Check if the @cpu we're creating an event for is online.
                 *
                 * We use the perf_cpu_context::ctx::mutex to serialize against
                 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
                 */
                struct perf_cpu_context *cpuctx =
                        container_of(ctx, struct perf_cpu_context, ctx);
                if (!cpuctx->online) {
                        err = -ENODEV;
                        goto err_pmu_ctx;
                }
        }

        if (!exclusive_event_installable(event, ctx)) {
                err = -EBUSY;
                goto err_pmu_ctx;
        }

        perf_install_in_context(ctx, event, event->cpu);
        perf_unpin_context(ctx);
        mutex_unlock(&ctx->mutex);

        return event;

err_pmu_ctx:
        put_pmu_ctx(pmu_ctx);
        event->pmu_ctx = NULL; /* _free_event() */
err_unlock:
        mutex_unlock(&ctx->mutex);
        perf_unpin_context(ctx);
        put_ctx(ctx);
err_alloc:
        free_event(event);
err:
        return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);

static void __perf_pmu_remove(struct perf_event_context *ctx,
                              int cpu, struct pmu *pmu,
                              struct perf_event_groups *groups,
                              struct list_head *events)
{
        struct perf_event *event, *sibling;

        perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
                perf_remove_from_context(event, 0);
                put_pmu_ctx(event->pmu_ctx);
                list_add(&event->migrate_entry, events);

                for_each_sibling_event(sibling, event) {
                        perf_remove_from_context(sibling, 0);
                        put_pmu_ctx(sibling->pmu_ctx);
                        list_add(&sibling->migrate_entry, events);
                }
        }
}

static void __perf_pmu_install_event(struct pmu *pmu,
                                     struct perf_event_context *ctx,
                                     int cpu, struct perf_event *event)
{
        struct perf_event_pmu_context *epc;
        struct perf_event_context *old_ctx = event->ctx;

        get_ctx(ctx); /* normally find_get_context() */

        event->cpu = cpu;
        epc = find_get_pmu_context(pmu, ctx, event);
        event->pmu_ctx = epc;

        if (event->state >= PERF_EVENT_STATE_OFF)
                event->state = PERF_EVENT_STATE_INACTIVE;
        perf_install_in_context(ctx, event, cpu);

        /*
         * Now that event->ctx is updated and visible, put the old ctx.
         */
        put_ctx(old_ctx);
}

static void __perf_pmu_install(struct perf_event_context *ctx,
                               int cpu, struct pmu *pmu, struct list_head *events)
{
        struct perf_event *event, *tmp;

        /*
         * Re-instate events in 2 passes.
         *
         * Skip over group leaders and only install siblings on this first
         * pass, siblings will not get enabled without a leader, however a
         * leader will enable its siblings, even if those are still on the old
         * context.
         */
        list_for_each_entry_safe(event, tmp, events, migrate_entry) {
                if (event->group_leader == event)
                        continue;

                list_del(&event->migrate_entry);
                __perf_pmu_install_event(pmu, ctx, cpu, event);
        }

        /*
         * Once all the siblings are setup properly, install the group leaders
         * to make it go.
         */
        list_for_each_entry_safe(event, tmp, events, migrate_entry) {
                list_del(&event->migrate_entry);
                __perf_pmu_install_event(pmu, ctx, cpu, event);
        }
}

void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
{
        struct perf_event_context *src_ctx, *dst_ctx;
        LIST_HEAD(events);

        /*
         * Since per-cpu context is persistent, no need to grab an extra
         * reference.
         */
        src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
        dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;

        /*
         * See perf_event_ctx_lock() for comments on the details
         * of swizzling perf_event::ctx.
         */
        mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);

        __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->pinned_groups, &events);
        __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_ctx->flexible_groups, &events);

        if (!list_empty(&events)) {
                /*
                 * Wait for the events to quiesce before re-instating them.
                 */
                synchronize_rcu();

                __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
        }

        mutex_unlock(&dst_ctx->mutex);
        mutex_unlock(&src_ctx->mutex);
}
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);

static void sync_child_event(struct perf_event *child_event)
{
        struct perf_event *parent_event = child_event->parent;
        u64 child_val;

        if (child_event->attr.inherit_stat) {
                struct task_struct *task = child_event->ctx->task;

                if (task && task != TASK_TOMBSTONE)
                        perf_event_read_event(child_event, task);
        }

        child_val = perf_event_count(child_event);

        /*
         * Add back the child's count to the parent's count:
         */
        atomic64_add(child_val, &parent_event->child_count);
        atomic64_add(child_event->total_time_enabled,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
                     &parent_event->child_total_time_running);
}

static void
perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
{
        struct perf_event *parent_event = event->parent;
        unsigned long detach_flags = 0;

        if (parent_event) {
                /*
                 * Do not destroy the 'original' grouping; because of the
                 * context switch optimization the original events could've
                 * ended up in a random child task.
                 *
                 * If we were to destroy the original group, all group related
                 * operations would cease to function properly after this
                 * random child dies.
                 *
                 * Do destroy all inherited groups, we don't care about those
                 * and being thorough is better.
                 */
                detach_flags = DETACH_GROUP | DETACH_CHILD;
                mutex_lock(&parent_event->child_mutex);
        }

        perf_remove_from_context(event, detach_flags);

        raw_spin_lock_irq(&ctx->lock);
        if (event->state > PERF_EVENT_STATE_EXIT)
                perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
        raw_spin_unlock_irq(&ctx->lock);

        /*
         * Child events can be freed.
         */
        if (parent_event) {
                mutex_unlock(&parent_event->child_mutex);
                /*
                 * Kick perf_poll() for is_event_hup();
                 */
                perf_event_wakeup(parent_event);
                free_event(event);
                put_event(parent_event);
                return;
        }

        /*
         * Parent events are governed by their filedesc, retain them.
         */
        perf_event_wakeup(event);
}

static void perf_event_exit_task_context(struct task_struct *child)
{
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
        struct perf_event *child_event, *next;

        WARN_ON_ONCE(child != current);

        child_ctx = perf_pin_task_context(child);
        if (!child_ctx)
                return;

        /*
         * In order to reduce the amount of tricky in ctx tear-down, we hold
         * ctx::mutex over the entire thing. This serializes against almost
         * everything that wants to access the ctx.
         *
         * The exception is sys_perf_event_open() /
         * perf_event_create_kernel_count() which does find_get_context()
         * without ctx::mutex (it cannot because of the move_group double mutex
         * lock thing). See the comments in perf_install_in_context().
         */
        mutex_lock(&child_ctx->mutex);

        /*
         * In a single ctx::lock section, de-schedule the events and detach the
         * context from the task such that we cannot ever get it scheduled back
         * in.
         */
        raw_spin_lock_irq(&child_ctx->lock);
        task_ctx_sched_out(child_ctx, EVENT_ALL);

        /*
         * Now that the context is inactive, destroy the task <-> ctx relation
         * and mark the context dead.
         */
        RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
        put_ctx(child_ctx); /* cannot be last */
        WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
        put_task_struct(current); /* cannot be last */

        clone_ctx = unclone_ctx(child_ctx);
        raw_spin_unlock_irq(&child_ctx->lock);

        if (clone_ctx)
                put_ctx(clone_ctx);

        /*
         * Report the task dead after unscheduling the events so that we
         * won't get any samples after PERF_RECORD_EXIT. We can however still
         * get a few PERF_RECORD_READ events.
         */
        perf_event_task(child, child_ctx, 0);

        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
                perf_event_exit_event(child_event, child_ctx);

        mutex_unlock(&child_ctx->mutex);

        put_ctx(child_ctx);
}

/*
 * When a child task exits, feed back event values to parent events.
 *
 * Can be called with exec_update_lock held when called from
 * setup_new_exec().
 */
void perf_event_exit_task(struct task_struct *child)
{
        struct perf_event *event, *tmp;

        mutex_lock(&child->perf_event_mutex);
        list_for_each_entry_safe(event, tmp, &child->perf_event_list,
                                 owner_entry) {
                list_del_init(&event->owner_entry);

                /*
                 * Ensure the list deletion is visible before we clear
                 * the owner, closes a race against perf_release() where
                 * we need to serialize on the owner->perf_event_mutex.
                 */
                smp_store_release(&event->owner, NULL);
        }
        mutex_unlock(&child->perf_event_mutex);

        perf_event_exit_task_context(child);

        /*
         * The perf_event_exit_task_context calls perf_event_task
         * with child's task_ctx, which generates EXIT events for
         * child contexts and sets child->perf_event_ctxp[] to NULL.
         * At this point we need to send EXIT events to cpu contexts.
         */
        perf_event_task(child, NULL, 0);
}

static void perf_free_event(struct perf_event *event,
                            struct perf_event_context *ctx)
{
        struct perf_event *parent = event->parent;

        if (WARN_ON_ONCE(!parent))
                return;

        mutex_lock(&parent->child_mutex);
        list_del_init(&event->child_list);
        mutex_unlock(&parent->child_mutex);

        put_event(parent);

        raw_spin_lock_irq(&ctx->lock);
        perf_group_detach(event);
        list_del_event(event, ctx);
        raw_spin_unlock_irq(&ctx->lock);
        free_event(event);
}

/*
 * Free a context as created by inheritance by perf_event_init_task() below,
 * used by fork() in case of fail.
 *
 * Even though the task has never lived, the context and events have been
 * exposed through the child_list, so we must take care tearing it all down.
 */
void perf_event_free_task(struct task_struct *task)
{
        struct perf_event_context *ctx;
        struct perf_event *event, *tmp;

        ctx = rcu_access_pointer(task->perf_event_ctxp);
        if (!ctx)
                return;

        mutex_lock(&ctx->mutex);
        raw_spin_lock_irq(&ctx->lock);
        /*
         * Destroy the task <-> ctx relation and mark the context dead.
         *
         * This is important because even though the task hasn't been
         * exposed yet the context has been (through child_list).
         */
        RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
        WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
        put_task_struct(task); /* cannot be last */
        raw_spin_unlock_irq(&ctx->lock);


        list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
                perf_free_event(event, ctx);

        mutex_unlock(&ctx->mutex);

        /*
         * perf_event_release_kernel() could've stolen some of our
         * child events and still have them on its free_list. In that
         * case we must wait for these events to have been freed (in
         * particular all their references to this task must've been
         * dropped).
         *
         * Without this copy_process() will unconditionally free this
         * task (irrespective of its reference count) and
         * _free_event()'s put_task_struct(event->hw.target) will be a
         * use-after-free.
         *
         * Wait for all events to drop their context reference.
         */
        wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
        put_ctx(ctx); /* must be last */
}

void perf_event_delayed_put(struct task_struct *task)
{
        WARN_ON_ONCE(task->perf_event_ctxp);
}

struct file *perf_event_get(unsigned int fd)
{
        struct file *file = fget(fd);
        if (!file)
                return ERR_PTR(-EBADF);

        if (file->f_op != &perf_fops) {
                fput(file);
                return ERR_PTR(-EBADF);
        }

        return file;
}

const struct perf_event *perf_get_event(struct file *file)
{
        if (file->f_op != &perf_fops)
                return ERR_PTR(-EINVAL);

        return file->private_data;
}

const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
        if (!event)
                return ERR_PTR(-EINVAL);

        return &event->attr;
}

/*
 * Inherit an event from parent task to child task.
 *
 * Returns:
 *  - valid pointer on success
 *  - NULL for orphaned events
 *  - IS_ERR() on error
 */
static struct perf_event *
inherit_event(struct perf_event *parent_event,
              struct task_struct *parent,
              struct perf_event_context *parent_ctx,
              struct task_struct *child,
              struct perf_event *group_leader,
              struct perf_event_context *child_ctx)
{
        enum perf_event_state parent_state = parent_event->state;
        struct perf_event_pmu_context *pmu_ctx;
        struct perf_event *child_event;
        unsigned long flags;

        /*
         * Instead of creating recursive hierarchies of events,
         * we link inherited events back to the original parent,
         * which has a filp for sure, which we use as the reference
         * count:
         */
        if (parent_event->parent)
                parent_event = parent_event->parent;

        child_event = perf_event_alloc(&parent_event->attr,
                                           parent_event->cpu,
                                           child,
                                           group_leader, parent_event,
                                           NULL, NULL, -1);
        if (IS_ERR(child_event))
                return child_event;

        pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
        if (IS_ERR(pmu_ctx)) {
                free_event(child_event);
                return ERR_CAST(pmu_ctx);
        }
        child_event->pmu_ctx = pmu_ctx;

        /*
         * is_orphaned_event() and list_add_tail(&parent_event->child_list)
         * must be under the same lock in order to serialize against
         * perf_event_release_kernel(), such that either we must observe
         * is_orphaned_event() or they will observe us on the child_list.
         */
        mutex_lock(&parent_event->child_mutex);
        if (is_orphaned_event(parent_event) ||
            !atomic_long_inc_not_zero(&parent_event->refcount)) {
                mutex_unlock(&parent_event->child_mutex);
                /* task_ctx_data is freed with child_ctx */
                free_event(child_event);
                return NULL;
        }

        get_ctx(child_ctx);

        /*
         * Make the child state follow the state of the parent event,
         * not its attr.disabled bit.  We hold the parent's mutex,
         * so we won't race with perf_event_{en, dis}able_family.
         */
        if (parent_state >= PERF_EVENT_STATE_INACTIVE)
                child_event->state = PERF_EVENT_STATE_INACTIVE;
        else
                child_event->state = PERF_EVENT_STATE_OFF;

        if (parent_event->attr.freq) {
                u64 sample_period = parent_event->hw.sample_period;
                struct hw_perf_event *hwc = &child_event->hw;

                hwc->sample_period = sample_period;
                hwc->last_period   = sample_period;

                local64_set(&hwc->period_left, sample_period);
        }

        child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
        child_event->overflow_handler_context
                = parent_event->overflow_handler_context;

        /*
         * Precalculate sample_data sizes
         */
        perf_event__header_size(child_event);
        perf_event__id_header_size(child_event);

        /*
         * Link it up in the child's context:
         */
        raw_spin_lock_irqsave(&child_ctx->lock, flags);
        add_event_to_ctx(child_event, child_ctx);
        child_event->attach_state |= PERF_ATTACH_CHILD;
        raw_spin_unlock_irqrestore(&child_ctx->lock, flags);

        /*
         * Link this into the parent event's child list
         */
        list_add_tail(&child_event->child_list, &parent_event->child_list);
        mutex_unlock(&parent_event->child_mutex);

        return child_event;
}

/*
 * Inherits an event group.
 *
 * This will quietly suppress orphaned events; !inherit_event() is not an error.
 * This matches with perf_event_release_kernel() removing all child events.
 *
 * Returns:
 *  - 0 on success
 *  - <0 on error
 */
static int inherit_group(struct perf_event *parent_event,
              struct task_struct *parent,
              struct perf_event_context *parent_ctx,
              struct task_struct *child,
              struct perf_event_context *child_ctx)
{
        struct perf_event *leader;
        struct perf_event *sub;
        struct perf_event *child_ctr;

        leader = inherit_event(parent_event, parent, parent_ctx,
                                 child, NULL, child_ctx);
        if (IS_ERR(leader))
                return PTR_ERR(leader);
        /*
         * @leader can be NULL here because of is_orphaned_event(). In this
         * case inherit_event() will create individual events, similar to what
         * perf_group_detach() would do anyway.
         */
        for_each_sibling_event(sub, parent_event) {
                child_ctr = inherit_event(sub, parent, parent_ctx,
                                            child, leader, child_ctx);
                if (IS_ERR(child_ctr))
                        return PTR_ERR(child_ctr);

                if (sub->aux_event == parent_event && child_ctr &&
                    !perf_get_aux_event(child_ctr, leader))
                        return -EINVAL;
        }
        if (leader)
                leader->group_generation = parent_event->group_generation;
        return 0;
}

/*
 * Creates the child task context and tries to inherit the event-group.
 *
 * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
 * inherited_all set when we 'fail' to inherit an orphaned event; this is
 * consistent with perf_event_release_kernel() removing all child events.
 *
 * Returns:
 *  - 0 on success
 *  - <0 on error
 */
static int
inherit_task_group(struct perf_event *event, struct task_struct *parent,
                   struct perf_event_context *parent_ctx,
                   struct task_struct *child,
                   u64 clone_flags, int *inherited_all)
{
        struct perf_event_context *child_ctx;
        int ret;

        if (!event->attr.inherit ||
            (event->attr.inherit_thread && !(clone_flags & CLONE_THREAD)) ||
            /* Do not inherit if sigtrap and signal handlers were cleared. */
            (event->attr.sigtrap && (clone_flags & CLONE_CLEAR_SIGHAND))) {
                *inherited_all = 0;
                return 0;
        }

        child_ctx = child->perf_event_ctxp;
        if (!child_ctx) {
                /*
                 * This is executed from the parent task context, so
                 * inherit events that have been marked for cloning.
                 * First allocate and initialize a context for the
                 * child.
                 */
                child_ctx = alloc_perf_context(child);
                if (!child_ctx)
                        return -ENOMEM;

                child->perf_event_ctxp = child_ctx;
        }

        ret = inherit_group(event, parent, parent_ctx, child, child_ctx);
        if (ret)
                *inherited_all = 0;

        return ret;
}

/*
 * Initialize the perf_event context in task_struct
 */
static int perf_event_init_context(struct task_struct *child, u64 clone_flags)
{
        struct perf_event_context *child_ctx, *parent_ctx;
        struct perf_event_context *cloned_ctx;
        struct perf_event *event;
        struct task_struct *parent = current;
        int inherited_all = 1;
        unsigned long flags;
        int ret = 0;

        if (likely(!parent->perf_event_ctxp))
                return 0;

        /*
         * If the parent's context is a clone, pin it so it won't get
         * swapped under us.
         */
        parent_ctx = perf_pin_task_context(parent);
        if (!parent_ctx)
                return 0;

        /*
         * No need to check if parent_ctx != NULL here; since we saw
         * it non-NULL earlier, the only reason for it to become NULL
         * is if we exit, and since we're currently in the middle of
         * a fork we can't be exiting at the same time.
         */

        /*
         * Lock the parent list. No need to lock the child - not PID
         * hashed yet and not running, so nobody can access it.
         */
        mutex_lock(&parent_ctx->mutex);

        /*
         * We dont have to disable NMIs - we are only looking at
         * the list, not manipulating it:
         */
        perf_event_groups_for_each(event, &parent_ctx->pinned_groups) {
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, clone_flags, &inherited_all);
                if (ret)
                        goto out_unlock;
        }

        /*
         * We can't hold ctx->lock when iterating the ->flexible_group list due
         * to allocations, but we need to prevent rotation because
         * rotate_ctx() will change the list from interrupt context.
         */
        raw_spin_lock_irqsave(&parent_ctx->lock, flags);
        parent_ctx->rotate_disable = 1;
        raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);

        perf_event_groups_for_each(event, &parent_ctx->flexible_groups) {
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, clone_flags, &inherited_all);
                if (ret)
                        goto out_unlock;
        }

        raw_spin_lock_irqsave(&parent_ctx->lock, flags);
        parent_ctx->rotate_disable = 0;

        child_ctx = child->perf_event_ctxp;

        if (child_ctx && inherited_all) {
                /*
                 * Mark the child context as a clone of the parent
                 * context, or of whatever the parent is a clone of.
                 *
                 * Note that if the parent is a clone, the holding of
                 * parent_ctx->lock avoids it from being uncloned.
                 */
                cloned_ctx = parent_ctx->parent_ctx;
                if (cloned_ctx) {
                        child_ctx->parent_ctx = cloned_ctx;
                        child_ctx->parent_gen = parent_ctx->parent_gen;
                } else {
                        child_ctx->parent_ctx = parent_ctx;
                        child_ctx->parent_gen = parent_ctx->generation;
                }
                get_ctx(child_ctx->parent_ctx);
        }

        raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
out_unlock:
        mutex_unlock(&parent_ctx->mutex);

        perf_unpin_context(parent_ctx);
        put_ctx(parent_ctx);

        return ret;
}

/*
 * Initialize the perf_event context in task_struct
 */
int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{
        int ret;

        child->perf_event_ctxp = NULL;
        mutex_init(&child->perf_event_mutex);
        INIT_LIST_HEAD(&child->perf_event_list);

        ret = perf_event_init_context(child, clone_flags);
        if (ret) {
                perf_event_free_task(child);
                return ret;
        }

        return 0;
}

static void __init perf_event_init_all_cpus(void)
{
        struct swevent_htable *swhash;
        struct perf_cpu_context *cpuctx;
        int cpu;

        zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);

        for_each_possible_cpu(cpu) {
                swhash = &per_cpu(swevent_htable, cpu);
                mutex_init(&swhash->hlist_mutex);

                INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
                raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));

                INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));

                cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
                __perf_event_init_context(&cpuctx->ctx);
                lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
                lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
                cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default);
                cpuctx->heap = cpuctx->heap_default;
        }
}

static void perf_swevent_init_cpu(unsigned int cpu)
{
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);

        mutex_lock(&swhash->hlist_mutex);
        if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
                struct swevent_hlist *hlist;

                hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
                WARN_ON(!hlist);
                rcu_assign_pointer(swhash->swevent_hlist, hlist);
        }
        mutex_unlock(&swhash->hlist_mutex);
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void __perf_event_exit_context(void *__info)
{
        struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
        struct perf_event_context *ctx = __info;
        struct perf_event *event;

        raw_spin_lock(&ctx->lock);
        ctx_sched_out(ctx, EVENT_TIME);
        list_for_each_entry(event, &ctx->event_list, event_entry)
                __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
        raw_spin_unlock(&ctx->lock);
}

static void perf_event_exit_cpu_context(int cpu)
{
        struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx;

        // XXX simplify cpuctx->online
        mutex_lock(&pmus_lock);
        cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
        ctx = &cpuctx->ctx;

        mutex_lock(&ctx->mutex);
        smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
        cpuctx->online = 0;
        mutex_unlock(&ctx->mutex);
        cpumask_clear_cpu(cpu, perf_online_mask);
        mutex_unlock(&pmus_lock);
}
#else

static void perf_event_exit_cpu_context(int cpu) { }

#endif

int perf_event_init_cpu(unsigned int cpu)
{
        struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx;

        perf_swevent_init_cpu(cpu);

        mutex_lock(&pmus_lock);
        cpumask_set_cpu(cpu, perf_online_mask);
        cpuctx = per_cpu_ptr(&perf_cpu_context, cpu);
        ctx = &cpuctx->ctx;

        mutex_lock(&ctx->mutex);
        cpuctx->online = 1;
        mutex_unlock(&ctx->mutex);
        mutex_unlock(&pmus_lock);

        return 0;
}

int perf_event_exit_cpu(unsigned int cpu)
{
        perf_event_exit_cpu_context(cpu);
        return 0;
}

static int
perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
{
        int cpu;

        for_each_online_cpu(cpu)
                perf_event_exit_cpu(cpu);

        return NOTIFY_OK;
}

/*
 * Run the perf reboot notifier at the very last possible moment so that
 * the generic watchdog code runs as long as possible.
 */
static struct notifier_block perf_reboot_notifier = {
        .notifier_call = perf_reboot,
        .priority = INT_MIN,
};

void __init perf_event_init(void)
{
        int ret;

        idr_init(&pmu_idr);

        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
        perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
        perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1);
        perf_pmu_register(&perf_task_clock, "task_clock", -1);
        perf_tp_register();
        perf_event_init_cpu(smp_processor_id());
        register_reboot_notifier(&perf_reboot_notifier);

        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);

        perf_event_cache = KMEM_CACHE(perf_event, SLAB_PANIC);

        /*
         * Build time assertion that we keep the data_head at the intended
         * location.  IOW, validation we got the __reserved[] size right.
         */
        BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
                     != 1024);
}

ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
                              char *page)
{
        struct perf_pmu_events_attr *pmu_attr =
                container_of(attr, struct perf_pmu_events_attr, attr);

        if (pmu_attr->event_str)
                return sprintf(page, "%s\n", pmu_attr->event_str);

        return 0;
}
EXPORT_SYMBOL_GPL(perf_event_sysfs_show);

static int __init perf_event_sysfs_init(void)
{
        struct pmu *pmu;
        int ret;

        mutex_lock(&pmus_lock);

        ret = bus_register(&pmu_bus);
        if (ret)
                goto unlock;

        list_for_each_entry(pmu, &pmus, entry) {
                if (pmu->dev)
                        continue;

                ret = pmu_dev_alloc(pmu);
                WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
        }
        pmu_bus_running = 1;
        ret = 0;

unlock:
        mutex_unlock(&pmus_lock);

        return ret;
}
device_initcall(perf_event_sysfs_init);

#ifdef CONFIG_CGROUP_PERF
static struct cgroup_subsys_state *
perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
        struct perf_cgroup *jc;

        jc = kzalloc(sizeof(*jc), GFP_KERNEL);
        if (!jc)
                return ERR_PTR(-ENOMEM);

        jc->info = alloc_percpu(struct perf_cgroup_info);
        if (!jc->info) {
                kfree(jc);
                return ERR_PTR(-ENOMEM);
        }

        return &jc->css;
}

static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
{
        struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);

        free_percpu(jc->info);
        kfree(jc);
}

static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
{
        perf_event_cgroup(css->cgroup);
        return 0;
}

static int __perf_cgroup_move(void *info)
{
        struct task_struct *task = info;

        preempt_disable();
        perf_cgroup_switch(task);
        preempt_enable();

        return 0;
}

static void perf_cgroup_attach(struct cgroup_taskset *tset)
{
        struct task_struct *task;
        struct cgroup_subsys_state *css;

        cgroup_taskset_for_each(task, css, tset)
                task_function_call(task, __perf_cgroup_move, task);
}

struct cgroup_subsys perf_event_cgrp_subsys = {
        .css_alloc        = perf_cgroup_css_alloc,
        .css_free        = perf_cgroup_css_free,
        .css_online        = perf_cgroup_css_online,
        .attach                = perf_cgroup_attach,
        /*
         * Implicitly enable on dfl hierarchy so that perf events can
         * always be filtered by cgroup2 path as long as perf_event
         * controller is not mounted on a legacy hierarchy.
         */
        .implicit_on_dfl = true,
        .threaded        = true,
};
#endif /* CONFIG_CGROUP_PERF */

DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);































































































































































































































































   16 
   16 
   16 
   16 

























































































































































































































































































































































































































































































































    5 








































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* internal.h: mm/ internal definitions
 *
 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */
#ifndef __MM_INTERNAL_H
#define __MM_INTERNAL_H

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/tracepoint-defs.h>

struct folio_batch;

/*
 * The set of flags that only affect watermark checking and reclaim
 * behaviour. This is used by the MM to obey the caller constraints
 * about IO, FS and watermark checking while ignoring placement
 * hints such as HIGHMEM usage.
 */
#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
                        __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
                        __GFP_NOLOCKDEP)

/* The GFP flags allowed during early boot */
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))

/* Control allocation cpuset and node placement constraints */
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)

/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)

/*
 * Different from WARN_ON_ONCE(), no warning will be issued
 * when we specify __GFP_NOWARN.
 */
#define WARN_ON_ONCE_GFP(cond, gfp)        ({                                \
        static bool __section(".data.once") __warned;                        \
        int __ret_warn_once = !!(cond);                                        \
                                                                        \
        if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
                __warned = true;                                        \
                WARN_ON(1);                                                \
        }                                                                \
        unlikely(__ret_warn_once);                                        \
})

void page_writeback_init(void);

/*
 * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages,
 * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit
 * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
 * leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
 */
#define ENTIRELY_MAPPED                0x800000
#define FOLIO_PAGES_MAPPED        (ENTIRELY_MAPPED - 1)

/*
 * Flags passed to __show_mem() and show_free_areas() to suppress output in
 * various contexts.
 */
#define SHOW_MEM_FILTER_NODES                (0x0001u)        /* disallowed nodes */

/*
 * How many individual pages have an elevated _mapcount.  Excludes
 * the folio's entire_mapcount.
 */
static inline int folio_nr_pages_mapped(struct folio *folio)
{
        return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
}

static inline void *folio_raw_mapping(struct folio *folio)
{
        unsigned long mapping = (unsigned long)folio->mapping;

        return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
}

#ifdef CONFIG_MMU

/* Flags for folio_pte_batch(). */
typedef int __bitwise fpb_t;

/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
#define FPB_IGNORE_DIRTY                ((__force fpb_t)BIT(0))

/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
#define FPB_IGNORE_SOFT_DIRTY                ((__force fpb_t)BIT(1))

static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
{
        if (flags & FPB_IGNORE_DIRTY)
                pte = pte_mkclean(pte);
        if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
                pte = pte_clear_soft_dirty(pte);
        return pte_wrprotect(pte_mkold(pte));
}

/**
 * folio_pte_batch - detect a PTE batch for a large folio
 * @folio: The large folio to detect a PTE batch for.
 * @addr: The user virtual address the first page is mapped at.
 * @start_ptep: Page table pointer for the first entry.
 * @pte: Page table entry for the first page.
 * @max_nr: The maximum number of table entries to consider.
 * @flags: Flags to modify the PTE batch semantics.
 * @any_writable: Optional pointer to indicate whether any entry except the
 *                  first one is writable.
 *
 * Detect a PTE batch: consecutive (present) PTEs that map consecutive
 * pages of the same large folio.
 *
 * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
 * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
 * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
 *
 * start_ptep must map any page of the folio. max_nr must be at least one and
 * must be limited by the caller so scanning cannot exceed a single page table.
 *
 * Return: the number of table entries in the batch.
 */
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
                pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
                bool *any_writable)
{
        unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
        const pte_t *end_ptep = start_ptep + max_nr;
        pte_t expected_pte, *ptep;
        bool writable;
        int nr;

        if (any_writable)
                *any_writable = false;

        VM_WARN_ON_FOLIO(!pte_present(pte), folio);
        VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
        VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);

        nr = pte_batch_hint(start_ptep, pte);
        expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
        ptep = start_ptep + nr;

        while (ptep < end_ptep) {
                pte = ptep_get(ptep);
                if (any_writable)
                        writable = !!pte_write(pte);
                pte = __pte_batch_clear_ignored(pte, flags);

                if (!pte_same(pte, expected_pte))
                        break;

                /*
                 * Stop immediately once we reached the end of the folio. In
                 * corner cases the next PFN might fall into a different
                 * folio.
                 */
                if (pte_pfn(pte) >= folio_end_pfn)
                        break;

                if (any_writable)
                        *any_writable |= writable;

                nr = pte_batch_hint(ptep, pte);
                expected_pte = pte_advance_pfn(expected_pte, nr);
                ptep += nr;
        }

        return min(ptep - start_ptep, max_nr);
}
#endif /* CONFIG_MMU */

void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
                                                int nr_throttled);
static inline void acct_reclaim_writeback(struct folio *folio)
{
        pg_data_t *pgdat = folio_pgdat(folio);
        int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);

        if (nr_throttled)
                __acct_reclaim_writeback(pgdat, folio, nr_throttled);
}

static inline void wake_throttle_isolated(pg_data_t *pgdat)
{
        wait_queue_head_t *wqh;

        wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
        if (waitqueue_active(wqh))
                wake_up(wqh);
}

vm_fault_t vmf_anon_prepare(struct vm_fault *vmf);
vm_fault_t do_swap_page(struct vm_fault *vmf);
void folio_rotate_reclaimable(struct folio *folio);
bool __folio_end_writeback(struct folio *folio);
void deactivate_file_folio(struct folio *folio);
void folio_activate(struct folio *folio);

void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
                   struct vm_area_struct *start_vma, unsigned long floor,
                   unsigned long ceiling, bool mm_wr_locked);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);

struct zap_details;
void unmap_page_range(struct mmu_gather *tlb,
                             struct vm_area_struct *vma,
                             unsigned long addr, unsigned long end,
                             struct zap_details *details);

void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
                unsigned int order);
void force_page_cache_ra(struct readahead_control *, unsigned long nr);
static inline void force_page_cache_readahead(struct address_space *mapping,
                struct file *file, pgoff_t index, unsigned long nr_to_read)
{
        DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
        force_page_cache_ra(&ractl, nr_to_read);
}

unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
void filemap_free_folio(struct address_space *mapping, struct folio *folio);
int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
                loff_t end);
long mapping_evict_folio(struct address_space *mapping, struct folio *folio);
unsigned long mapping_try_invalidate(struct address_space *mapping,
                pgoff_t start, pgoff_t end, unsigned long *nr_failed);

/**
 * folio_evictable - Test whether a folio is evictable.
 * @folio: The folio to test.
 *
 * Test whether @folio is evictable -- i.e., should be placed on
 * active/inactive lists vs unevictable list.
 *
 * Reasons folio might not be evictable:
 * 1. folio's mapping marked unevictable
 * 2. One of the pages in the folio is part of an mlocked VMA
 */
static inline bool folio_evictable(struct folio *folio)
{
        bool ret;

        /* Prevent address_space of inode and swap cache from being freed */
        rcu_read_lock();
        ret = !mapping_unevictable(folio_mapping(folio)) &&
                        !folio_test_mlocked(folio);
        rcu_read_unlock();
        return ret;
}

/*
 * Turn a non-refcounted page (->_refcount == 0) into refcounted with
 * a count of one.
 */
static inline void set_page_refcounted(struct page *page)
{
        VM_BUG_ON_PAGE(PageTail(page), page);
        VM_BUG_ON_PAGE(page_ref_count(page), page);
        set_page_count(page, 1);
}

/*
 * Return true if a folio needs ->release_folio() calling upon it.
 */
static inline bool folio_needs_release(struct folio *folio)
{
        struct address_space *mapping = folio_mapping(folio);

        return folio_has_private(folio) ||
                (mapping && mapping_release_always(mapping));
}

extern unsigned long highest_memmap_pfn;

/*
 * Maximum number of reclaim retries without progress before the OOM
 * killer is consider the only way forward.
 */
#define MAX_RECLAIM_RETRIES 16

/*
 * in mm/vmscan.c:
 */
bool isolate_lru_page(struct page *page);
bool folio_isolate_lru(struct folio *folio);
void putback_lru_page(struct page *page);
void folio_putback_lru(struct folio *folio);
extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);

/*
 * in mm/rmap.c:
 */
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);

/*
 * in mm/page_alloc.c
 */
#define K(x) ((x) << (PAGE_SHIFT-10))

extern char * const zone_names[MAX_NR_ZONES];

/* perform sanity checks on struct pages being allocated or freed */
DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);

extern int min_free_kbytes;

void setup_per_zone_wmarks(void);
void calculate_min_free_kbytes(void);
int __meminit init_per_zone_wmark_min(void);
void page_alloc_sysctl_init(void);

/*
 * Structure for holding the mostly immutable allocation parameters passed
 * between functions involved in allocations, including the alloc_pages*
 * family of functions.
 *
 * nodemask, migratetype and highest_zoneidx are initialized only once in
 * __alloc_pages() and then never change.
 *
 * zonelist, preferred_zone and highest_zoneidx are set first in
 * __alloc_pages() for the fast path, and might be later changed
 * in __alloc_pages_slowpath(). All other functions pass the whole structure
 * by a const pointer.
 */
struct alloc_context {
        struct zonelist *zonelist;
        nodemask_t *nodemask;
        struct zoneref *preferred_zoneref;
        int migratetype;

        /*
         * highest_zoneidx represents highest usable zone index of
         * the allocation request. Due to the nature of the zone,
         * memory on lower zone than the highest_zoneidx will be
         * protected by lowmem_reserve[highest_zoneidx].
         *
         * highest_zoneidx is also used by reclaim/compaction to limit
         * the target zone since higher zone than this index cannot be
         * usable for this allocation request.
         */
        enum zone_type highest_zoneidx;
        bool spread_dirty_pages;
};

/*
 * This function returns the order of a free page in the buddy system. In
 * general, page_zone(page)->lock must be held by the caller to prevent the
 * page from being allocated in parallel and returning garbage as the order.
 * If a caller does not hold page_zone(page)->lock, it must guarantee that the
 * page cannot be allocated or merged in parallel. Alternatively, it must
 * handle invalid values gracefully, and use buddy_order_unsafe() below.
 */
static inline unsigned int buddy_order(struct page *page)
{
        /* PageBuddy() must be checked by the caller */
        return page_private(page);
}

/*
 * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
 * PageBuddy() should be checked first by the caller to minimize race window,
 * and invalid values must be handled gracefully.
 *
 * READ_ONCE is used so that if the caller assigns the result into a local
 * variable and e.g. tests it for valid range before using, the compiler cannot
 * decide to remove the variable and inline the page_private(page) multiple
 * times, potentially observing different values in the tests and the actual
 * use of the result.
 */
#define buddy_order_unsafe(page)        READ_ONCE(page_private(page))

/*
 * This function checks whether a page is free && is the buddy
 * we can coalesce a page and its buddy if
 * (a) the buddy is not in a hole (check before calling!) &&
 * (b) the buddy is in the buddy system &&
 * (c) a page and its buddy have the same order &&
 * (d) a page and its buddy are in the same zone.
 *
 * For recording whether a page is in the buddy system, we set PageBuddy.
 * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
 *
 * For recording page's order, we use page_private(page).
 */
static inline bool page_is_buddy(struct page *page, struct page *buddy,
                                 unsigned int order)
{
        if (!page_is_guard(buddy) && !PageBuddy(buddy))
                return false;

        if (buddy_order(buddy) != order)
                return false;

        /*
         * zone check is done late to avoid uselessly calculating
         * zone/node ids for pages that could never merge.
         */
        if (page_zone_id(page) != page_zone_id(buddy))
                return false;

        VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);

        return true;
}

/*
 * Locate the struct page for both the matching buddy in our
 * pair (buddy1) and the combined O(n+1) page they form (page).
 *
 * 1) Any buddy B1 will have an order O twin B2 which satisfies
 * the following equation:
 *     B2 = B1 ^ (1 << O)
 * For example, if the starting buddy (buddy2) is #8 its order
 * 1 buddy is #10:
 *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
 *
 * 2) Any buddy B will have an order O+1 parent P which
 * satisfies the following equation:
 *     P = B & ~(1 << O)
 *
 * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER
 */
static inline unsigned long
__find_buddy_pfn(unsigned long page_pfn, unsigned int order)
{
        return page_pfn ^ (1 << order);
}

/*
 * Find the buddy of @page and validate it.
 * @page: The input page
 * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the
 *       function is used in the performance-critical __free_one_page().
 * @order: The order of the page
 * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to
 *             page_to_pfn().
 *
 * The found buddy can be a non PageBuddy, out of @page's zone, or its order is
 * not the same as @page. The validation is necessary before use it.
 *
 * Return: the found buddy page or NULL if not found.
 */
static inline struct page *find_buddy_page_pfn(struct page *page,
                        unsigned long pfn, unsigned int order, unsigned long *buddy_pfn)
{
        unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order);
        struct page *buddy;

        buddy = page + (__buddy_pfn - pfn);
        if (buddy_pfn)
                *buddy_pfn = __buddy_pfn;

        if (page_is_buddy(page, buddy, order))
                return buddy;
        return NULL;
}

extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
                                unsigned long end_pfn, struct zone *zone);

static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
                                unsigned long end_pfn, struct zone *zone)
{
        if (zone->contiguous)
                return pfn_to_page(start_pfn);

        return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
}

void set_zone_contiguous(struct zone *zone);

static inline void clear_zone_contiguous(struct zone *zone)
{
        zone->contiguous = false;
}

extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
                                    int mt);
extern void memblock_free_pages(struct page *page, unsigned long pfn,
                                        unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order);

/*
 * This will have no effect, other than possibly generating a warning, if the
 * caller passes in a non-large folio.
 */
static inline void folio_set_order(struct folio *folio, unsigned int order)
{
        if (WARN_ON_ONCE(!order || !folio_test_large(folio)))
                return;

        folio->_flags_1 = (folio->_flags_1 & ~0xffUL) | order;
#ifdef CONFIG_64BIT
        folio->_folio_nr_pages = 1U << order;
#endif
}

void folio_undo_large_rmappable(struct folio *folio);

static inline struct folio *page_rmappable_folio(struct page *page)
{
        struct folio *folio = (struct folio *)page;

        folio_prep_large_rmappable(folio);
        return folio;
}

static inline void prep_compound_head(struct page *page, unsigned int order)
{
        struct folio *folio = (struct folio *)page;

        folio_set_order(folio, order);
        atomic_set(&folio->_entire_mapcount, -1);
        atomic_set(&folio->_nr_pages_mapped, 0);
        atomic_set(&folio->_pincount, 0);
}

static inline void prep_compound_tail(struct page *head, int tail_idx)
{
        struct page *p = head + tail_idx;

        p->mapping = TAIL_MAPPING;
        set_compound_head(p, head);
        set_page_private(p, 0);
}

extern void prep_compound_page(struct page *page, unsigned int order);

extern void post_alloc_hook(struct page *page, unsigned int order,
                                        gfp_t gfp_flags);
extern bool free_pages_prepare(struct page *page, unsigned int order);

extern int user_min_free_kbytes;

void free_unref_page(struct page *page, unsigned int order);
void free_unref_folios(struct folio_batch *fbatch);

extern void zone_pcp_reset(struct zone *zone);
extern void zone_pcp_disable(struct zone *zone);
extern void zone_pcp_enable(struct zone *zone);
extern void zone_pcp_init(struct zone *zone);

extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
                          phys_addr_t min_addr,
                          int nid, bool exact_nid);

void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
                unsigned long, enum meminit_context, struct vmem_altmap *, int);


int split_free_page(struct page *free_page,
                        unsigned int order, unsigned long split_pfn_offset);

#if defined CONFIG_COMPACTION || defined CONFIG_CMA

/*
 * in mm/compaction.c
 */
/*
 * compact_control is used to track pages being migrated and the free pages
 * they are being migrated to during memory compaction. The free_pfn starts
 * at the end of a zone and migrate_pfn begins at the start. Movable pages
 * are moved to the end of a zone during a compaction run and the run
 * completes when free_pfn <= migrate_pfn
 */
struct compact_control {
        struct list_head freepages[NR_PAGE_ORDERS];        /* List of free pages to migrate to */
        struct list_head migratepages;        /* List of pages being migrated */
        unsigned int nr_freepages;        /* Number of isolated free pages */
        unsigned int nr_migratepages;        /* Number of pages to migrate */
        unsigned long free_pfn;                /* isolate_freepages search base */
        /*
         * Acts as an in/out parameter to page isolation for migration.
         * isolate_migratepages uses it as a search base.
         * isolate_migratepages_block will update the value to the next pfn
         * after the last isolated one.
         */
        unsigned long migrate_pfn;
        unsigned long fast_start_pfn;        /* a pfn to start linear scan from */
        struct zone *zone;
        unsigned long total_migrate_scanned;
        unsigned long total_free_scanned;
        unsigned short fast_search_fail;/* failures to use free list searches */
        short search_order;                /* order to start a fast search at */
        const gfp_t gfp_mask;                /* gfp mask of a direct compactor */
        int order;                        /* order a direct compactor needs */
        int migratetype;                /* migratetype of direct compactor */
        const unsigned int alloc_flags;        /* alloc flags of a direct compactor */
        const int highest_zoneidx;        /* zone index of a direct compactor */
        enum migrate_mode mode;                /* Async or sync migration mode */
        bool ignore_skip_hint;                /* Scan blocks even if marked skip */
        bool no_set_skip_hint;                /* Don't mark blocks for skipping */
        bool ignore_block_suitable;        /* Scan blocks considered unsuitable */
        bool direct_compaction;                /* False from kcompactd or /proc/... */
        bool proactive_compaction;        /* kcompactd proactive compaction */
        bool whole_zone;                /* Whole zone should/has been scanned */
        bool contended;                        /* Signal lock contention */
        bool finish_pageblock;                /* Scan the remainder of a pageblock. Used
                                         * when there are potentially transient
                                         * isolation or migration failures to
                                         * ensure forward progress.
                                         */
        bool alloc_contig;                /* alloc_contig_range allocation */
};

/*
 * Used in direct compaction when a page should be taken from the freelists
 * immediately when one is created during the free path.
 */
struct capture_control {
        struct compact_control *cc;
        struct page *page;
};

unsigned long
isolate_freepages_range(struct compact_control *cc,
                        unsigned long start_pfn, unsigned long end_pfn);
int
isolate_migratepages_range(struct compact_control *cc,
                           unsigned long low_pfn, unsigned long end_pfn);

int __alloc_contig_migrate_range(struct compact_control *cc,
                                        unsigned long start, unsigned long end,
                                        int migratetype);

/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void init_cma_reserved_pageblock(struct page *page);

#endif /* CONFIG_COMPACTION || CONFIG_CMA */

int find_suitable_fallback(struct free_area *area, unsigned int order,
                        int migratetype, bool only_stealable, bool *can_steal);

static inline bool free_area_empty(struct free_area *area, int migratetype)
{
        return list_empty(&area->free_list[migratetype]);
}

/*
 * These three helpers classifies VMAs for virtual memory accounting.
 */

/*
 * Executable code area - executable, not writable, not stack
 */
static inline bool is_exec_mapping(vm_flags_t flags)
{
        return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
}

/*
 * Stack area (including shadow stacks)
 *
 * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
 * do_mmap() forbids all other combinations.
 */
static inline bool is_stack_mapping(vm_flags_t flags)
{
        return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
}

/*
 * Data area - private, writable, not stack
 */
static inline bool is_data_mapping(vm_flags_t flags)
{
        return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
}

/* mm/util.c */
struct anon_vma *folio_anon_vma(struct folio *folio);

#ifdef CONFIG_MMU
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *locked);
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
                unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
                               unsigned long bytes);

/*
 * NOTE: This function can't tell whether the folio is "fully mapped" in the
 * range.
 * "fully mapped" means all the pages of folio is associated with the page
 * table of range while this function just check whether the folio range is
 * within the range [start, end). Function caller needs to do page table
 * check if it cares about the page table association.
 *
 * Typical usage (like mlock or madvise) is:
 * Caller knows at least 1 page of folio is associated with page table of VMA
 * and the range [start, end) is intersect with the VMA range. Caller wants
 * to know whether the folio is fully associated with the range. It calls
 * this function to check whether the folio is in the range first. Then checks
 * the page table to know whether the folio is fully mapped to the range.
 */
static inline bool
folio_within_range(struct folio *folio, struct vm_area_struct *vma,
                unsigned long start, unsigned long end)
{
        pgoff_t pgoff, addr;
        unsigned long vma_pglen = vma_pages(vma);

        VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio);
        if (start > end)
                return false;

        if (start < vma->vm_start)
                start = vma->vm_start;

        if (end > vma->vm_end)
                end = vma->vm_end;

        pgoff = folio_pgoff(folio);

        /* if folio start address is not in vma range */
        if (!in_range(pgoff, vma->vm_pgoff, vma_pglen))
                return false;

        addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);

        return !(addr < start || end - addr < folio_size(folio));
}

static inline bool
folio_within_vma(struct folio *folio, struct vm_area_struct *vma)
{
        return folio_within_range(folio, vma, vma->vm_start, vma->vm_end);
}

/*
 * mlock_vma_folio() and munlock_vma_folio():
 * should be called with vma's mmap_lock held for read or write,
 * under page table lock for the pte/pmd being added or removed.
 *
 * mlock is usually called at the end of folio_add_*_rmap_*(), munlock at
 * the end of folio_remove_rmap_*(); but new anon folios are managed by
 * folio_add_lru_vma() calling mlock_new_folio().
 */
void mlock_folio(struct folio *folio);
static inline void mlock_vma_folio(struct folio *folio,
                                struct vm_area_struct *vma)
{
        /*
         * The VM_SPECIAL check here serves two purposes.
         * 1) VM_IO check prevents migration from double-counting during mlock.
         * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED
         *    is never left set on a VM_SPECIAL vma, there is an interval while
         *    file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
         *    still be set while VM_SPECIAL bits are added: so ignore it then.
         */
        if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED))
                mlock_folio(folio);
}

void munlock_folio(struct folio *folio);
static inline void munlock_vma_folio(struct folio *folio,
                                        struct vm_area_struct *vma)
{
        /*
         * munlock if the function is called. Ideally, we should only
         * do munlock if any page of folio is unmapped from VMA and
         * cause folio not fully mapped to VMA.
         *
         * But it's not easy to confirm that's the situation. So we
         * always munlock the folio and page reclaim will correct it
         * if it's wrong.
         */
        if (unlikely(vma->vm_flags & VM_LOCKED))
                munlock_folio(folio);
}

void mlock_new_folio(struct folio *folio);
bool need_mlock_drain(int cpu);
void mlock_drain_local(void);
void mlock_drain_remote(int cpu);

extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
 * Return the start of user virtual address at the specific offset within
 * a vma.
 */
static inline unsigned long
vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
                  struct vm_area_struct *vma)
{
        unsigned long address;

        if (pgoff >= vma->vm_pgoff) {
                address = vma->vm_start +
                        ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
                /* Check for address beyond vma (or wrapped through 0?) */
                if (address < vma->vm_start || address >= vma->vm_end)
                        address = -EFAULT;
        } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) {
                /* Test above avoids possibility of wrap to 0 on 32-bit */
                address = vma->vm_start;
        } else {
                address = -EFAULT;
        }
        return address;
}

/*
 * Return the start of user virtual address of a page within a vma.
 * Returns -EFAULT if all of the page is outside the range of vma.
 * If page is a compound head, the entire compound page is considered.
 */
static inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
{
        VM_BUG_ON_PAGE(PageKsm(page), page);        /* KSM page->index unusable */
        return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma);
}

/*
 * Then at what user virtual address will none of the range be found in vma?
 * Assumes that vma_address() already returned a good starting address.
 */
static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
{
        struct vm_area_struct *vma = pvmw->vma;
        pgoff_t pgoff;
        unsigned long address;

        /* Common case, plus ->pgoff is invalid for KSM */
        if (pvmw->nr_pages == 1)
                return pvmw->address + PAGE_SIZE;

        pgoff = pvmw->pgoff + pvmw->nr_pages;
        address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
        /* Check for address beyond vma (or wrapped through 0?) */
        if (address < vma->vm_start || address > vma->vm_end)
                address = vma->vm_end;
        return address;
}

static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
                                                    struct file *fpin)
{
        int flags = vmf->flags;

        if (fpin)
                return fpin;

        /*
         * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
         * anything, so we only pin the file and drop the mmap_lock if only
         * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
         */
        if (fault_flag_allow_retry_first(flags) &&
            !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
                fpin = get_file(vmf->vma->vm_file);
                release_fault_lock(vmf);
        }
        return fpin;
}
#else /* !CONFIG_MMU */
static inline void unmap_mapping_folio(struct folio *folio) { }
static inline void mlock_new_folio(struct folio *folio) { }
static inline bool need_mlock_drain(int cpu) { return false; }
static inline void mlock_drain_local(void) { }
static inline void mlock_drain_remote(int cpu) { }
static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
{
}
#endif /* !CONFIG_MMU */

/* Memory initialisation debug and verification */
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
DECLARE_STATIC_KEY_TRUE(deferred_pages);

bool __init deferred_grow_zone(struct zone *zone, unsigned int order);
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */

enum mminit_level {
        MMINIT_WARNING,
        MMINIT_VERIFY,
        MMINIT_TRACE
};

#ifdef CONFIG_DEBUG_MEMORY_INIT

extern int mminit_loglevel;

#define mminit_dprintk(level, prefix, fmt, arg...) \
do { \
        if (level < mminit_loglevel) { \
                if (level <= MMINIT_WARNING) \
                        pr_warn("mminit::" prefix " " fmt, ##arg);        \
                else \
                        printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \
        } \
} while (0)

extern void mminit_verify_pageflags_layout(void);
extern void mminit_verify_zonelist(void);
#else

static inline void mminit_dprintk(enum mminit_level level,
                                const char *prefix, const char *fmt, ...)
{
}

static inline void mminit_verify_pageflags_layout(void)
{
}

static inline void mminit_verify_zonelist(void)
{
}
#endif /* CONFIG_DEBUG_MEMORY_INIT */

#define NODE_RECLAIM_NOSCAN        -2
#define NODE_RECLAIM_FULL        -1
#define NODE_RECLAIM_SOME        0
#define NODE_RECLAIM_SUCCESS        1

#ifdef CONFIG_NUMA
extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
extern int find_next_best_node(int node, nodemask_t *used_node_mask);
#else
static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
                                unsigned int order)
{
        return NODE_RECLAIM_NOSCAN;
}
static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
{
        return NUMA_NO_NODE;
}
#endif

/*
 * mm/memory-failure.c
 */
extern int hwpoison_filter(struct page *p);

extern u32 hwpoison_filter_dev_major;
extern u32 hwpoison_filter_dev_minor;
extern u64 hwpoison_filter_flags_mask;
extern u64 hwpoison_filter_flags_value;
extern u64 hwpoison_filter_memcg;
extern u32 hwpoison_filter_enable;

extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
        unsigned long, unsigned long,
        unsigned long, unsigned long);

extern void set_pageblock_order(void);
unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references);
unsigned int reclaim_clean_pages_from_list(struct zone *zone,
                                            struct list_head *folio_list);
/* The ALLOC_WMARK bits are used as an index to zone->watermark */
#define ALLOC_WMARK_MIN                WMARK_MIN
#define ALLOC_WMARK_LOW                WMARK_LOW
#define ALLOC_WMARK_HIGH        WMARK_HIGH
#define ALLOC_NO_WATERMARKS        0x04 /* don't check watermarks at all */

/* Mask to get the watermark bits */
#define ALLOC_WMARK_MASK        (ALLOC_NO_WATERMARKS-1)

/*
 * Only MMU archs have async oom victim reclaim - aka oom_reaper so we
 * cannot assume a reduced access to memory reserves is sufficient for
 * !MMU
 */
#ifdef CONFIG_MMU
#define ALLOC_OOM                0x08
#else
#define ALLOC_OOM                ALLOC_NO_WATERMARKS
#endif

#define ALLOC_NON_BLOCK                 0x10 /* Caller cannot block. Allow access
                                       * to 25% of the min watermark or
                                       * 62.5% if __GFP_HIGH is set.
                                       */
#define ALLOC_MIN_RESERVE         0x20 /* __GFP_HIGH set. Allow access to 50%
                                       * of the min watermark.
                                       */
#define ALLOC_CPUSET                 0x40 /* check for correct cpuset */
#define ALLOC_CMA                 0x80 /* allow allocations from CMA areas */
#ifdef CONFIG_ZONE_DMA32
#define ALLOC_NOFRAGMENT        0x100 /* avoid mixing pageblock types */
#else
#define ALLOC_NOFRAGMENT          0x0
#endif
#define ALLOC_HIGHATOMIC        0x200 /* Allows access to MIGRATE_HIGHATOMIC */
#define ALLOC_KSWAPD                0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */

/* Flags that allow allocations below the min watermark. */
#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)

enum ttu_flags;
struct tlbflush_unmap_batch;


/*
 * only for MM internal work items which do not depend on
 * any allocations or locks which might depend on allocations
 */
extern struct workqueue_struct *mm_percpu_wq;

#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
void flush_tlb_batched_pending(struct mm_struct *mm);
#else
static inline void try_to_unmap_flush(void)
{
}
static inline void try_to_unmap_flush_dirty(void)
{
}
static inline void flush_tlb_batched_pending(struct mm_struct *mm)
{
}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */

extern const struct trace_print_flags pageflag_names[];
extern const struct trace_print_flags pagetype_names[];
extern const struct trace_print_flags vmaflag_names[];
extern const struct trace_print_flags gfpflag_names[];

static inline bool is_migrate_highatomic(enum migratetype migratetype)
{
        return migratetype == MIGRATE_HIGHATOMIC;
}

static inline bool is_migrate_highatomic_page(struct page *page)
{
        return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
}

void setup_zone_pageset(struct zone *zone);

struct migration_target_control {
        int nid;                /* preferred node id */
        nodemask_t *nmask;
        gfp_t gfp_mask;
};

/*
 * mm/filemap.c
 */
size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
                              struct folio *folio, loff_t fpos, size_t size);

/*
 * mm/vmalloc.c
 */
#ifdef CONFIG_MMU
void __init vmalloc_init(void);
int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages, unsigned int page_shift);
#else
static inline void vmalloc_init(void)
{
}

static inline
int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages, unsigned int page_shift)
{
        return -EINVAL;
}
#endif

int __must_check __vmap_pages_range_noflush(unsigned long addr,
                               unsigned long end, pgprot_t prot,
                               struct page **pages, unsigned int page_shift);

void vunmap_range_noflush(unsigned long start, unsigned long end);

void __vunmap_range_noflush(unsigned long start, unsigned long end);

int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
                      unsigned long addr, int page_nid, int *flags);

void free_zone_device_page(struct page *page);
int migrate_device_coherent_page(struct page *page);

/*
 * mm/gup.c
 */
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
int __must_check try_grab_page(struct page *page, unsigned int flags);

/*
 * mm/huge_memory.c
 */
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                                   unsigned long addr, pmd_t *pmd,
                                   unsigned int flags);

/*
 * mm/mmap.c
 */
struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
                                        struct vm_area_struct *vma,
                                        unsigned long delta);

enum {
        /* mark page accessed */
        FOLL_TOUCH = 1 << 16,
        /* a retry, previous pass started an IO */
        FOLL_TRIED = 1 << 17,
        /* we are working on non-current tsk/mm */
        FOLL_REMOTE = 1 << 18,
        /* pages must be released via unpin_user_page */
        FOLL_PIN = 1 << 19,
        /* gup_fast: prevent fall-back to slow gup */
        FOLL_FAST_ONLY = 1 << 20,
        /* allow unlocking the mmap lock */
        FOLL_UNLOCKABLE = 1 << 21,
        /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
        FOLL_MADV_POPULATE = 1 << 22,
};

#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
                            FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
                            FOLL_MADV_POPULATE)

/*
 * Indicates for which pages that are write-protected in the page table,
 * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the
 * GUP pin will remain consistent with the pages mapped into the page tables
 * of the MM.
 *
 * Temporary unmapping of PageAnonExclusive() pages or clearing of
 * PageAnonExclusive() has to protect against concurrent GUP:
 * * Ordinary GUP: Using the PT lock
 * * GUP-fast and fork(): mm->write_protect_seq
 * * GUP-fast and KSM or temporary unmapping (swap, migration): see
 *    folio_try_share_anon_rmap_*()
 *
 * Must be called with the (sub)page that's actually referenced via the
 * page table entry, which might not necessarily be the head page for a
 * PTE-mapped THP.
 *
 * If the vma is NULL, we're coming from the GUP-fast path and might have
 * to fallback to the slow path just to lookup the vma.
 */
static inline bool gup_must_unshare(struct vm_area_struct *vma,
                                    unsigned int flags, struct page *page)
{
        /*
         * FOLL_WRITE is implicitly handled correctly as the page table entry
         * has to be writable -- and if it references (part of) an anonymous
         * folio, that part is required to be marked exclusive.
         */
        if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN)
                return false;
        /*
         * Note: PageAnon(page) is stable until the page is actually getting
         * freed.
         */
        if (!PageAnon(page)) {
                /*
                 * We only care about R/O long-term pining: R/O short-term
                 * pinning does not have the semantics to observe successive
                 * changes through the process page tables.
                 */
                if (!(flags & FOLL_LONGTERM))
                        return false;

                /* We really need the vma ... */
                if (!vma)
                        return true;

                /*
                 * ... because we only care about writable private ("COW")
                 * mappings where we have to break COW early.
                 */
                return is_cow_mapping(vma->vm_flags);
        }

        /* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
                smp_rmb();

        /*
         * During GUP-fast we might not get called on the head page for a
         * hugetlb page that is mapped using cont-PTE, because GUP-fast does
         * not work with the abstracted hugetlb PTEs that always point at the
         * head page. For hugetlb, PageAnonExclusive only applies on the head
         * page (as it cannot be partially COW-shared), so lookup the head page.
         */
        if (unlikely(!PageHead(page) && PageHuge(page)))
                page = compound_head(page);

        /*
         * Note that PageKsm() pages cannot be exclusive, and consequently,
         * cannot get pinned.
         */
        return !PageAnonExclusive(page);
}

extern bool mirrored_kernelcore;
extern bool memblock_has_mirror(void);

static __always_inline void vma_set_range(struct vm_area_struct *vma,
                                          unsigned long start, unsigned long end,
                                          pgoff_t pgoff)
{
        vma->vm_start = start;
        vma->vm_end = end;
        vma->vm_pgoff = pgoff;
}

static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
{
        /*
         * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
         * enablements, because when without soft-dirty being compiled in,
         * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
         * will be constantly true.
         */
        if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
                return false;

        /*
         * Soft-dirty is kind of special: its tracking is enabled when the
         * vma flags not set.
         */
        return !(vma->vm_flags & VM_SOFTDIRTY);
}

static inline void vma_iter_config(struct vma_iterator *vmi,
                unsigned long index, unsigned long last)
{
        __mas_set_range(&vmi->mas, index, last - 1);
}

/*
 * VMA Iterator functions shared between nommu and mmap
 */
static inline int vma_iter_prealloc(struct vma_iterator *vmi,
                struct vm_area_struct *vma)
{
        return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
}

static inline void vma_iter_clear(struct vma_iterator *vmi)
{
        mas_store_prealloc(&vmi->mas, NULL);
}

static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi)
{
        return mas_walk(&vmi->mas);
}

/* Store a VMA with preallocated memory */
static inline void vma_iter_store(struct vma_iterator *vmi,
                                  struct vm_area_struct *vma)
{

#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
        if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
                        vmi->mas.index > vma->vm_start)) {
                pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
                        vmi->mas.index, vma->vm_start, vma->vm_start,
                        vma->vm_end, vmi->mas.index, vmi->mas.last);
        }
        if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
                        vmi->mas.last <  vma->vm_start)) {
                pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
                       vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end,
                       vmi->mas.index, vmi->mas.last);
        }
#endif

        if (vmi->mas.status != ma_start &&
            ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
                vma_iter_invalidate(vmi);

        __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
        mas_store_prealloc(&vmi->mas, vma);
}

static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
                        struct vm_area_struct *vma, gfp_t gfp)
{
        if (vmi->mas.status != ma_start &&
            ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
                vma_iter_invalidate(vmi);

        __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
        mas_store_gfp(&vmi->mas, vma, gfp);
        if (unlikely(mas_is_err(&vmi->mas)))
                return -ENOMEM;

        return 0;
}

/*
 * VMA lock generalization
 */
struct vma_prepare {
        struct vm_area_struct *vma;
        struct vm_area_struct *adj_next;
        struct file *file;
        struct address_space *mapping;
        struct anon_vma *anon_vma;
        struct vm_area_struct *insert;
        struct vm_area_struct *remove;
        struct vm_area_struct *remove2;
};

void __meminit __init_single_page(struct page *page, unsigned long pfn,
                                unsigned long zone, int nid);

/* shrinker related functions */
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
                          int priority);

#ifdef CONFIG_SHRINKER_DEBUG
static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
                        struct shrinker *shrinker, const char *fmt, va_list ap)
{
        shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap);

        return shrinker->name ? 0 : -ENOMEM;
}

static inline void shrinker_debugfs_name_free(struct shrinker *shrinker)
{
        kfree_const(shrinker->name);
        shrinker->name = NULL;
}

extern int shrinker_debugfs_add(struct shrinker *shrinker);
extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
                                              int *debugfs_id);
extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
                                    int debugfs_id);
#else /* CONFIG_SHRINKER_DEBUG */
static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
        return 0;
}
static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker,
                                              const char *fmt, va_list ap)
{
        return 0;
}
static inline void shrinker_debugfs_name_free(struct shrinker *shrinker)
{
}
static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
                                                     int *debugfs_id)
{
        *debugfs_id = -1;
        return NULL;
}
static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
                                           int debugfs_id)
{
}
#endif /* CONFIG_SHRINKER_DEBUG */

/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
extern struct list_lru shadow_nodes;

#endif        /* __MM_INTERNAL_H */
























  240 







    2 














    8 









   16 









































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Integer base 2 logarithm calculation
 *
 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#ifndef _LINUX_LOG2_H
#define _LINUX_LOG2_H

#include <linux/types.h>
#include <linux/bitops.h>

/*
 * non-constant log of base 2 calculators
 * - the arch may override these in asm/bitops.h if they can be implemented
 *   more efficiently than using fls() and fls64()
 * - the arch is not required to handle n==0 if implementing the fallback
 */
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
static __always_inline __attribute__((const))
int __ilog2_u32(u32 n)
{
        return fls(n) - 1;
}
#endif

#ifndef CONFIG_ARCH_HAS_ILOG2_U64
static __always_inline __attribute__((const))
int __ilog2_u64(u64 n)
{
        return fls64(n) - 1;
}
#endif

/**
 * is_power_of_2() - check if a value is a power of two
 * @n: the value to check
 *
 * Determine whether some value is a power of two, where zero is
 * *not* considered a power of two.
 * Return: true if @n is a power of 2, otherwise false.
 */
static inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
        return (n != 0 && ((n & (n - 1)) == 0));
}

/**
 * __roundup_pow_of_two() - round up to nearest power of two
 * @n: value to round up
 */
static inline __attribute__((const))
unsigned long __roundup_pow_of_two(unsigned long n)
{
        return 1UL << fls_long(n - 1);
}

/**
 * __rounddown_pow_of_two() - round down to nearest power of two
 * @n: value to round down
 */
static inline __attribute__((const))
unsigned long __rounddown_pow_of_two(unsigned long n)
{
        return 1UL << (fls_long(n) - 1);
}

/**
 * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
 * @n: parameter
 *
 * Use this where sparse expects a true constant expression, e.g. for array
 * indices.
 */
#define const_ilog2(n)                                \
(                                                \
        __builtin_constant_p(n) ? (                \
                (n) < 2 ? 0 :                        \
                (n) & (1ULL << 63) ? 63 :        \
                (n) & (1ULL << 62) ? 62 :        \
                (n) & (1ULL << 61) ? 61 :        \
                (n) & (1ULL << 60) ? 60 :        \
                (n) & (1ULL << 59) ? 59 :        \
                (n) & (1ULL << 58) ? 58 :        \
                (n) & (1ULL << 57) ? 57 :        \
                (n) & (1ULL << 56) ? 56 :        \
                (n) & (1ULL << 55) ? 55 :        \
                (n) & (1ULL << 54) ? 54 :        \
                (n) & (1ULL << 53) ? 53 :        \
                (n) & (1ULL << 52) ? 52 :        \
                (n) & (1ULL << 51) ? 51 :        \
                (n) & (1ULL << 50) ? 50 :        \
                (n) & (1ULL << 49) ? 49 :        \
                (n) & (1ULL << 48) ? 48 :        \
                (n) & (1ULL << 47) ? 47 :        \
                (n) & (1ULL << 46) ? 46 :        \
                (n) & (1ULL << 45) ? 45 :        \
                (n) & (1ULL << 44) ? 44 :        \
                (n) & (1ULL << 43) ? 43 :        \
                (n) & (1ULL << 42) ? 42 :        \
                (n) & (1ULL << 41) ? 41 :        \
                (n) & (1ULL << 40) ? 40 :        \
                (n) & (1ULL << 39) ? 39 :        \
                (n) & (1ULL << 38) ? 38 :        \
                (n) & (1ULL << 37) ? 37 :        \
                (n) & (1ULL << 36) ? 36 :        \
                (n) & (1ULL << 35) ? 35 :        \
                (n) & (1ULL << 34) ? 34 :        \
                (n) & (1ULL << 33) ? 33 :        \
                (n) & (1ULL << 32) ? 32 :        \
                (n) & (1ULL << 31) ? 31 :        \
                (n) & (1ULL << 30) ? 30 :        \
                (n) & (1ULL << 29) ? 29 :        \
                (n) & (1ULL << 28) ? 28 :        \
                (n) & (1ULL << 27) ? 27 :        \
                (n) & (1ULL << 26) ? 26 :        \
                (n) & (1ULL << 25) ? 25 :        \
                (n) & (1ULL << 24) ? 24 :        \
                (n) & (1ULL << 23) ? 23 :        \
                (n) & (1ULL << 22) ? 22 :        \
                (n) & (1ULL << 21) ? 21 :        \
                (n) & (1ULL << 20) ? 20 :        \
                (n) & (1ULL << 19) ? 19 :        \
                (n) & (1ULL << 18) ? 18 :        \
                (n) & (1ULL << 17) ? 17 :        \
                (n) & (1ULL << 16) ? 16 :        \
                (n) & (1ULL << 15) ? 15 :        \
                (n) & (1ULL << 14) ? 14 :        \
                (n) & (1ULL << 13) ? 13 :        \
                (n) & (1ULL << 12) ? 12 :        \
                (n) & (1ULL << 11) ? 11 :        \
                (n) & (1ULL << 10) ? 10 :        \
                (n) & (1ULL <<  9) ?  9 :        \
                (n) & (1ULL <<  8) ?  8 :        \
                (n) & (1ULL <<  7) ?  7 :        \
                (n) & (1ULL <<  6) ?  6 :        \
                (n) & (1ULL <<  5) ?  5 :        \
                (n) & (1ULL <<  4) ?  4 :        \
                (n) & (1ULL <<  3) ?  3 :        \
                (n) & (1ULL <<  2) ?  2 :        \
                1) :                                \
        -1)

/**
 * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
 * @n: parameter
 *
 * constant-capable log of base 2 calculation
 * - this can be used to initialise global variables from constant data, hence
 * the massive ternary operator construction
 *
 * selects the appropriately-sized optimised version depending on sizeof(n)
 */
#define ilog2(n) \
( \
        __builtin_constant_p(n) ?        \
        ((n) < 2 ? 0 :                        \
         63 - __builtin_clzll(n)) :        \
        (sizeof(n) <= 4) ?                \
        __ilog2_u32(n) :                \
        __ilog2_u64(n)                        \
 )

/**
 * roundup_pow_of_two - round the given value up to nearest power of two
 * @n: parameter
 *
 * round the given value up to the nearest power of two
 * - the result is undefined when n == 0
 * - this can be used to initialise global variables from constant data
 */
#define roundup_pow_of_two(n)                        \
(                                                \
        __builtin_constant_p(n) ? (                \
                ((n) == 1) ? 1 :                \
                (1UL << (ilog2((n) - 1) + 1))        \
                                   ) :                \
        __roundup_pow_of_two(n)                        \
 )

/**
 * rounddown_pow_of_two - round the given value down to nearest power of two
 * @n: parameter
 *
 * round the given value down to the nearest power of two
 * - the result is undefined when n == 0
 * - this can be used to initialise global variables from constant data
 */
#define rounddown_pow_of_two(n)                        \
(                                                \
        __builtin_constant_p(n) ? (                \
                (1UL << ilog2(n))) :                \
        __rounddown_pow_of_two(n)                \
 )

static inline __attribute_const__
int __order_base_2(unsigned long n)
{
        return n > 1 ? ilog2(n - 1) + 1 : 0;
}

/**
 * order_base_2 - calculate the (rounded up) base 2 order of the argument
 * @n: parameter
 *
 * The first few values calculated by this routine:
 *  ob2(0) = 0
 *  ob2(1) = 0
 *  ob2(2) = 1
 *  ob2(3) = 2
 *  ob2(4) = 2
 *  ob2(5) = 3
 *  ... and so on.
 */
#define order_base_2(n)                                \
(                                                \
        __builtin_constant_p(n) ? (                \
                ((n) == 0 || (n) == 1) ? 0 :        \
                ilog2((n) - 1) + 1) :                \
        __order_base_2(n)                        \
)

static inline __attribute__((const))
int __bits_per(unsigned long n)
{
        if (n < 2)
                return 1;
        if (is_power_of_2(n))
                return order_base_2(n) + 1;
        return order_base_2(n);
}

/**
 * bits_per - calculate the number of bits required for the argument
 * @n: parameter
 *
 * This is constant-capable and can be used for compile time
 * initializations, e.g bitfields.
 *
 * The first few values calculated by this routine:
 * bf(0) = 1
 * bf(1) = 1
 * bf(2) = 2
 * bf(3) = 2
 * bf(4) = 3
 * ... and so on.
 */
#define bits_per(n)                                \
(                                                \
        __builtin_constant_p(n) ? (                \
                ((n) == 0 || (n) == 1)                \
                        ? 1 : ilog2(n) + 1        \
        ) :                                        \
        __bits_per(n)                                \
)
#endif /* _LINUX_LOG2_H */



























































    2 
































    2 




    5 


    2 



    5 
    2 



    5 



























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2017 Red Hat, Inc
 */

#define pr_fmt(fmt)                KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/libps2.h>
#include <linux/i2c.h>
#include <linux/serio.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "psmouse.h"

struct psmouse_smbus_dev {
        struct i2c_board_info board;
        struct psmouse *psmouse;
        struct i2c_client *client;
        struct list_head node;
        bool dead;
        bool need_deactivate;
};

static LIST_HEAD(psmouse_smbus_list);
static DEFINE_MUTEX(psmouse_smbus_mutex);

static struct workqueue_struct *psmouse_smbus_wq;

static void psmouse_smbus_check_adapter(struct i2c_adapter *adapter)
{
        struct psmouse_smbus_dev *smbdev;

        if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_HOST_NOTIFY))
                return;

        mutex_lock(&psmouse_smbus_mutex);

        list_for_each_entry(smbdev, &psmouse_smbus_list, node) {
                if (smbdev->dead)
                        continue;

                if (smbdev->client)
                        continue;

                /*
                 * Here would be a good place to check if device is actually
                 * present, but it seems that SMBus will not respond unless we
                 * fully reset PS/2 connection.  So cross our fingers, and try
                 * to switch over, hopefully our system will not have too many
                 * "host notify" I2C adapters.
                 */
                psmouse_dbg(smbdev->psmouse,
                            "SMBus candidate adapter appeared, triggering rescan\n");
                serio_rescan(smbdev->psmouse->ps2dev.serio);
        }

        mutex_unlock(&psmouse_smbus_mutex);
}

static void psmouse_smbus_detach_i2c_client(struct i2c_client *client)
{
        struct psmouse_smbus_dev *smbdev, *tmp;

        mutex_lock(&psmouse_smbus_mutex);

        list_for_each_entry_safe(smbdev, tmp, &psmouse_smbus_list, node) {
                if (smbdev->client != client)
                        continue;

                kfree(client->dev.platform_data);
                client->dev.platform_data = NULL;

                if (!smbdev->dead) {
                        psmouse_dbg(smbdev->psmouse,
                                    "Marking SMBus companion %s as gone\n",
                                    dev_name(&smbdev->client->dev));
                        smbdev->dead = true;
                        device_link_remove(&smbdev->client->dev,
                                           &smbdev->psmouse->ps2dev.serio->dev);
                        serio_rescan(smbdev->psmouse->ps2dev.serio);
                } else {
                        list_del(&smbdev->node);
                        kfree(smbdev);
                }
        }

        mutex_unlock(&psmouse_smbus_mutex);
}

static int psmouse_smbus_notifier_call(struct notifier_block *nb,
                                       unsigned long action, void *data)
{
        struct device *dev = data;

        switch (action) {
        case BUS_NOTIFY_ADD_DEVICE:
                if (dev->type == &i2c_adapter_type)
                        psmouse_smbus_check_adapter(to_i2c_adapter(dev));
                break;

        case BUS_NOTIFY_REMOVED_DEVICE:
                if (dev->type == &i2c_client_type)
                        psmouse_smbus_detach_i2c_client(to_i2c_client(dev));
                break;
        }

        return 0;
}

static struct notifier_block psmouse_smbus_notifier = {
        .notifier_call = psmouse_smbus_notifier_call,
};

static psmouse_ret_t psmouse_smbus_process_byte(struct psmouse *psmouse)
{
        return PSMOUSE_FULL_PACKET;
}

static int psmouse_smbus_reconnect(struct psmouse *psmouse)
{
        struct psmouse_smbus_dev *smbdev = psmouse->private;

        if (smbdev->need_deactivate)
                psmouse_deactivate(psmouse);

        return 0;
}

struct psmouse_smbus_removal_work {
        struct work_struct work;
        struct i2c_client *client;
};

static void psmouse_smbus_remove_i2c_device(struct work_struct *work)
{
        struct psmouse_smbus_removal_work *rwork =
                container_of(work, struct psmouse_smbus_removal_work, work);

        dev_dbg(&rwork->client->dev, "destroying SMBus companion device\n");
        i2c_unregister_device(rwork->client);

        kfree(rwork);
}

/*
 * This schedules removal of SMBus companion device. We have to do
 * it in a separate tread to avoid deadlocking on psmouse_mutex in
 * case the device has a trackstick (which is also driven by psmouse).
 *
 * Note that this may be racing with i2c adapter removal, but we
 * can't do anything about that: i2c automatically destroys clients
 * attached to an adapter that is being removed. This has to be
 * fixed in i2c core.
 */
static void psmouse_smbus_schedule_remove(struct i2c_client *client)
{
        struct psmouse_smbus_removal_work *rwork;

        rwork = kzalloc(sizeof(*rwork), GFP_KERNEL);
        if (rwork) {
                INIT_WORK(&rwork->work, psmouse_smbus_remove_i2c_device);
                rwork->client = client;

                queue_work(psmouse_smbus_wq, &rwork->work);
        }
}

static void psmouse_smbus_disconnect(struct psmouse *psmouse)
{
        struct psmouse_smbus_dev *smbdev = psmouse->private;

        mutex_lock(&psmouse_smbus_mutex);

        if (smbdev->dead) {
                list_del(&smbdev->node);
                kfree(smbdev);
        } else {
                smbdev->dead = true;
                device_link_remove(&smbdev->client->dev,
                                   &psmouse->ps2dev.serio->dev);
                psmouse_dbg(smbdev->psmouse,
                            "posting removal request for SMBus companion %s\n",
                            dev_name(&smbdev->client->dev));
                psmouse_smbus_schedule_remove(smbdev->client);
        }

        mutex_unlock(&psmouse_smbus_mutex);

        psmouse->private = NULL;
}

static int psmouse_smbus_create_companion(struct device *dev, void *data)
{
        struct psmouse_smbus_dev *smbdev = data;
        unsigned short addr_list[] = { smbdev->board.addr, I2C_CLIENT_END };
        struct i2c_adapter *adapter;
        struct i2c_client *client;

        adapter = i2c_verify_adapter(dev);
        if (!adapter)
                return 0;

        if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_HOST_NOTIFY))
                return 0;

        client = i2c_new_scanned_device(adapter, &smbdev->board,
                                        addr_list, NULL);
        if (IS_ERR(client))
                return 0;

        /* We have our(?) device, stop iterating i2c bus. */
        smbdev->client = client;
        return 1;
}

void psmouse_smbus_cleanup(struct psmouse *psmouse)
{
        struct psmouse_smbus_dev *smbdev, *tmp;

        mutex_lock(&psmouse_smbus_mutex);

        list_for_each_entry_safe(smbdev, tmp, &psmouse_smbus_list, node) {
                if (psmouse == smbdev->psmouse) {
                        list_del(&smbdev->node);
                        kfree(smbdev);
                }
        }

        mutex_unlock(&psmouse_smbus_mutex);
}

int psmouse_smbus_init(struct psmouse *psmouse,
                       const struct i2c_board_info *board,
                       const void *pdata, size_t pdata_size,
                       bool need_deactivate,
                       bool leave_breadcrumbs)
{
        struct psmouse_smbus_dev *smbdev;
        int error;

        smbdev = kzalloc(sizeof(*smbdev), GFP_KERNEL);
        if (!smbdev)
                return -ENOMEM;

        smbdev->psmouse = psmouse;
        smbdev->board = *board;
        smbdev->need_deactivate = need_deactivate;

        if (pdata) {
                smbdev->board.platform_data = kmemdup(pdata, pdata_size,
                                                      GFP_KERNEL);
                if (!smbdev->board.platform_data) {
                        kfree(smbdev);
                        return -ENOMEM;
                }
        }

        if (need_deactivate)
                psmouse_deactivate(psmouse);

        psmouse->private = smbdev;
        psmouse->protocol_handler = psmouse_smbus_process_byte;
        psmouse->reconnect = psmouse_smbus_reconnect;
        psmouse->fast_reconnect = psmouse_smbus_reconnect;
        psmouse->disconnect = psmouse_smbus_disconnect;
        psmouse->resync_time = 0;

        mutex_lock(&psmouse_smbus_mutex);
        list_add_tail(&smbdev->node, &psmouse_smbus_list);
        mutex_unlock(&psmouse_smbus_mutex);

        /* Bind to already existing adapters right away */
        error = i2c_for_each_dev(smbdev, psmouse_smbus_create_companion);

        if (smbdev->client) {
                /* We have our companion device */
                if (!device_link_add(&smbdev->client->dev,
                                     &psmouse->ps2dev.serio->dev,
                                     DL_FLAG_STATELESS))
                        psmouse_warn(psmouse,
                                     "failed to set up link with iSMBus companion %s\n",
                                     dev_name(&smbdev->client->dev));
                return 0;
        }

        /*
         * If we did not create i2c device we will not need platform
         * data even if we are leaving breadcrumbs.
         */
        kfree(smbdev->board.platform_data);
        smbdev->board.platform_data = NULL;

        if (error < 0 || !leave_breadcrumbs) {
                mutex_lock(&psmouse_smbus_mutex);
                list_del(&smbdev->node);
                mutex_unlock(&psmouse_smbus_mutex);

                kfree(smbdev);
        }

        return error < 0 ? error : -EAGAIN;
}

int __init psmouse_smbus_module_init(void)
{
        int error;

        psmouse_smbus_wq = alloc_workqueue("psmouse-smbus", 0, 0);
        if (!psmouse_smbus_wq)
                return -ENOMEM;

        error = bus_register_notifier(&i2c_bus_type, &psmouse_smbus_notifier);
        if (error) {
                pr_err("failed to register i2c bus notifier: %d\n", error);
                destroy_workqueue(psmouse_smbus_wq);
                return error;
        }

        return 0;
}

void psmouse_smbus_module_exit(void)
{
        bus_unregister_notifier(&i2c_bus_type, &psmouse_smbus_notifier);
        destroy_workqueue(psmouse_smbus_wq);
}















































































    6 




    3 


























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MMAN_H
#define _LINUX_MMAN_H

#include <linux/mm.h>
#include <linux/percpu_counter.h>

#include <linux/atomic.h>
#include <uapi/linux/mman.h>

/*
 * Arrange for legacy / undefined architecture specific flags to be
 * ignored by mmap handling code.
 */
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
#ifndef MAP_ABOVE4G
#define MAP_ABOVE4G 0
#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB 0
#endif
#ifndef MAP_HUGE_1GB
#define MAP_HUGE_1GB 0
#endif
#ifndef MAP_UNINITIALIZED
#define MAP_UNINITIALIZED 0
#endif
#ifndef MAP_SYNC
#define MAP_SYNC 0
#endif

/*
 * The historical set of flags that all mmap implementations implicitly
 * support when a ->mmap_validate() op is not provided in file_operations.
 *
 * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the
 * kernel.
 */
#define LEGACY_MAP_MASK (MAP_SHARED \
                | MAP_PRIVATE \
                | MAP_FIXED \
                | MAP_ANONYMOUS \
                | MAP_DENYWRITE \
                | MAP_EXECUTABLE \
                | MAP_UNINITIALIZED \
                | MAP_GROWSDOWN \
                | MAP_LOCKED \
                | MAP_NORESERVE \
                | MAP_POPULATE \
                | MAP_NONBLOCK \
                | MAP_STACK \
                | MAP_HUGETLB \
                | MAP_32BIT \
                | MAP_ABOVE4G \
                | MAP_HUGE_2MB \
                | MAP_HUGE_1GB)

extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
extern struct percpu_counter vm_committed_as;

#ifdef CONFIG_SMP
extern s32 vm_committed_as_batch;
extern void mm_compute_batch(int overcommit_policy);
#else
#define vm_committed_as_batch 0
static inline void mm_compute_batch(int overcommit_policy)
{
}
#endif

unsigned long vm_memory_committed(void);

static inline void vm_acct_memory(long pages)
{
        percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch);
}

static inline void vm_unacct_memory(long pages)
{
        vm_acct_memory(-pages);
}

/*
 * Allow architectures to handle additional protection and flag bits. The
 * overriding macros must be defined in the arch-specific asm/mman.h file.
 */

#ifndef arch_calc_vm_prot_bits
#define arch_calc_vm_prot_bits(prot, pkey) 0
#endif

#ifndef arch_calc_vm_flag_bits
#define arch_calc_vm_flag_bits(flags) 0
#endif

#ifndef arch_validate_prot
/*
 * This is called from mprotect().  PROT_GROWSDOWN and PROT_GROWSUP have
 * already been masked out.
 *
 * Returns true if the prot flags are valid
 */
static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
{
        return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0;
}
#define arch_validate_prot arch_validate_prot
#endif

#ifndef arch_validate_flags
/*
 * This is called from mmap() and mprotect() with the updated vma->vm_flags.
 *
 * Returns true if the VM_* flags are valid.
 */
static inline bool arch_validate_flags(unsigned long flags)
{
        return true;
}
#define arch_validate_flags arch_validate_flags
#endif

/*
 * Optimisation macro.  It is equivalent to:
 *      (x & bit1) ? bit2 : 0
 * but this version is faster.
 * ("bit1" and "bit2" must be single bits)
 */
#define _calc_vm_trans(x, bit1, bit2) \
  ((!(bit1) || !(bit2)) ? 0 : \
  ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
   : ((x) & (bit1)) / ((bit1) / (bit2))))

/*
 * Combine the mmap "prot" argument into "vm_flags" used internally.
 */
static inline unsigned long
calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
{
        return _calc_vm_trans(prot, PROT_READ,  VM_READ ) |
               _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) |
               _calc_vm_trans(prot, PROT_EXEC,  VM_EXEC) |
               arch_calc_vm_prot_bits(prot, pkey);
}

/*
 * Combine the mmap "flags" argument into "vm_flags" used internally.
 */
static inline unsigned long
calc_vm_flag_bits(unsigned long flags)
{
        return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
               _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
               _calc_vm_trans(flags, MAP_SYNC,             VM_SYNC      ) |
               _calc_vm_trans(flags, MAP_STACK,             VM_NOHUGEPAGE) |
               arch_calc_vm_flag_bits(flags);
}

unsigned long vm_commit_limit(void);

#ifndef arch_memory_deny_write_exec_supported
static inline bool arch_memory_deny_write_exec_supported(void)
{
        return true;
}
#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
#endif

/*
 * Denies creating a writable executable mapping or gaining executable permissions.
 *
 * This denies the following:
 *
 *         a)        mmap(PROT_WRITE | PROT_EXEC)
 *
 *        b)        mmap(PROT_WRITE)
 *                mprotect(PROT_EXEC)
 *
 *        c)        mmap(PROT_WRITE)
 *                mprotect(PROT_READ)
 *                mprotect(PROT_EXEC)
 *
 * But allows the following:
 *
 *        d)        mmap(PROT_READ | PROT_EXEC)
 *                mmap(PROT_READ | PROT_EXEC | PROT_BTI)
 */
static inline bool map_deny_write_exec(struct vm_area_struct *vma,  unsigned long vm_flags)
{
        if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
                return false;

        if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
                return true;

        if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC))
                return true;

        return false;
}

#endif /* _LINUX_MMAN_H */






























































































    3 














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
// SPDX-License-Identifier: GPL-2.0-only
/*
 * kernel/power/main.c - PM subsystem core functionality.
 *
 * Copyright (c) 2003 Patrick Mochel
 * Copyright (c) 2003 Open Source Development Lab
 */

#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/pm-trace.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/pm_runtime.h>

#include "power.h"

#ifdef CONFIG_PM_SLEEP
/*
 * The following functions are used by the suspend/hibernate code to temporarily
 * change gfp_allowed_mask in order to avoid using I/O during memory allocations
 * while devices are suspended.  To avoid races with the suspend/hibernate code,
 * they should always be called with system_transition_mutex held
 * (gfp_allowed_mask also should only be modified with system_transition_mutex
 * held, unless the suspend/hibernate code is guaranteed not to run in parallel
 * with that modification).
 */
static gfp_t saved_gfp_mask;

void pm_restore_gfp_mask(void)
{
        WARN_ON(!mutex_is_locked(&system_transition_mutex));
        if (saved_gfp_mask) {
                gfp_allowed_mask = saved_gfp_mask;
                saved_gfp_mask = 0;
        }
}

void pm_restrict_gfp_mask(void)
{
        WARN_ON(!mutex_is_locked(&system_transition_mutex));
        WARN_ON(saved_gfp_mask);
        saved_gfp_mask = gfp_allowed_mask;
        gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
}

unsigned int lock_system_sleep(void)
{
        unsigned int flags = current->flags;
        current->flags |= PF_NOFREEZE;
        mutex_lock(&system_transition_mutex);
        return flags;
}
EXPORT_SYMBOL_GPL(lock_system_sleep);

void unlock_system_sleep(unsigned int flags)
{
        if (!(flags & PF_NOFREEZE))
                current->flags &= ~PF_NOFREEZE;
        mutex_unlock(&system_transition_mutex);
}
EXPORT_SYMBOL_GPL(unlock_system_sleep);

void ksys_sync_helper(void)
{
        ktime_t start;
        long elapsed_msecs;

        start = ktime_get();
        ksys_sync();
        elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start));
        pr_info("Filesystems sync: %ld.%03ld seconds\n",
                elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC);
}
EXPORT_SYMBOL_GPL(ksys_sync_helper);

/* Routines for PM-transition notifications */

static BLOCKING_NOTIFIER_HEAD(pm_chain_head);

int register_pm_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&pm_chain_head, nb);
}
EXPORT_SYMBOL_GPL(register_pm_notifier);

int unregister_pm_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&pm_chain_head, nb);
}
EXPORT_SYMBOL_GPL(unregister_pm_notifier);

int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
{
        int ret;

        ret = blocking_notifier_call_chain_robust(&pm_chain_head, val_up, val_down, NULL);

        return notifier_to_errno(ret);
}

int pm_notifier_call_chain(unsigned long val)
{
        return blocking_notifier_call_chain(&pm_chain_head, val, NULL);
}

/* If set, devices may be suspended and resumed asynchronously. */
int pm_async_enabled = 1;

static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr,
                             char *buf)
{
        return sprintf(buf, "%d\n", pm_async_enabled);
}

static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr,
                              const char *buf, size_t n)
{
        unsigned long val;

        if (kstrtoul(buf, 10, &val))
                return -EINVAL;

        if (val > 1)
                return -EINVAL;

        pm_async_enabled = val;
        return n;
}

power_attr(pm_async);

#ifdef CONFIG_SUSPEND
static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr,
                              char *buf)
{
        char *s = buf;
        suspend_state_t i;

        for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) {
                if (i >= PM_SUSPEND_MEM && cxl_mem_active())
                        continue;
                if (mem_sleep_states[i]) {
                        const char *label = mem_sleep_states[i];

                        if (mem_sleep_current == i)
                                s += sprintf(s, "[%s] ", label);
                        else
                                s += sprintf(s, "%s ", label);
                }
        }

        /* Convert the last space to a newline if needed. */
        if (s != buf)
                *(s-1) = '\n';

        return (s - buf);
}

static suspend_state_t decode_suspend_state(const char *buf, size_t n)
{
        suspend_state_t state;
        char *p;
        int len;

        p = memchr(buf, '\n', n);
        len = p ? p - buf : n;

        for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) {
                const char *label = mem_sleep_states[state];

                if (label && len == strlen(label) && !strncmp(buf, label, len))
                        return state;
        }

        return PM_SUSPEND_ON;
}

static ssize_t mem_sleep_store(struct kobject *kobj, struct kobj_attribute *attr,
                               const char *buf, size_t n)
{
        suspend_state_t state;
        int error;

        error = pm_autosleep_lock();
        if (error)
                return error;

        if (pm_autosleep_state() > PM_SUSPEND_ON) {
                error = -EBUSY;
                goto out;
        }

        state = decode_suspend_state(buf, n);
        if (state < PM_SUSPEND_MAX && state > PM_SUSPEND_ON)
                mem_sleep_current = state;
        else
                error = -EINVAL;

 out:
        pm_autosleep_unlock();
        return error ? error : n;
}

power_attr(mem_sleep);

/*
 * sync_on_suspend: invoke ksys_sync_helper() before suspend.
 *
 * show() returns whether ksys_sync_helper() is invoked before suspend.
 * store() accepts 0 or 1.  0 disables ksys_sync_helper() and 1 enables it.
 */
bool sync_on_suspend_enabled = !IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC);

static ssize_t sync_on_suspend_show(struct kobject *kobj,
                                   struct kobj_attribute *attr, char *buf)
{
        return sprintf(buf, "%d\n", sync_on_suspend_enabled);
}

static ssize_t sync_on_suspend_store(struct kobject *kobj,
                                    struct kobj_attribute *attr,
                                    const char *buf, size_t n)
{
        unsigned long val;

        if (kstrtoul(buf, 10, &val))
                return -EINVAL;

        if (val > 1)
                return -EINVAL;

        sync_on_suspend_enabled = !!val;
        return n;
}

power_attr(sync_on_suspend);
#endif /* CONFIG_SUSPEND */

#ifdef CONFIG_PM_SLEEP_DEBUG
int pm_test_level = TEST_NONE;

static const char * const pm_tests[__TEST_AFTER_LAST] = {
        [TEST_NONE] = "none",
        [TEST_CORE] = "core",
        [TEST_CPUS] = "processors",
        [TEST_PLATFORM] = "platform",
        [TEST_DEVICES] = "devices",
        [TEST_FREEZER] = "freezer",
};

static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
                                char *buf)
{
        char *s = buf;
        int level;

        for (level = TEST_FIRST; level <= TEST_MAX; level++)
                if (pm_tests[level]) {
                        if (level == pm_test_level)
                                s += sprintf(s, "[%s] ", pm_tests[level]);
                        else
                                s += sprintf(s, "%s ", pm_tests[level]);
                }

        if (s != buf)
                /* convert the last space to a newline */
                *(s-1) = '\n';

        return (s - buf);
}

static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
                                const char *buf, size_t n)
{
        unsigned int sleep_flags;
        const char * const *s;
        int error = -EINVAL;
        int level;
        char *p;
        int len;

        p = memchr(buf, '\n', n);
        len = p ? p - buf : n;

        sleep_flags = lock_system_sleep();

        level = TEST_FIRST;
        for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
                if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
                        pm_test_level = level;
                        error = 0;
                        break;
                }

        unlock_system_sleep(sleep_flags);

        return error ? error : n;
}

power_attr(pm_test);
#endif /* CONFIG_PM_SLEEP_DEBUG */

#define SUSPEND_NR_STEPS        SUSPEND_RESUME
#define REC_FAILED_NUM                2

struct suspend_stats {
        unsigned int step_failures[SUSPEND_NR_STEPS];
        unsigned int success;
        unsigned int fail;
        int last_failed_dev;
        char failed_devs[REC_FAILED_NUM][40];
        int last_failed_errno;
        int errno[REC_FAILED_NUM];
        int last_failed_step;
        u64 last_hw_sleep;
        u64 total_hw_sleep;
        u64 max_hw_sleep;
        enum suspend_stat_step failed_steps[REC_FAILED_NUM];
};

static struct suspend_stats suspend_stats;
static DEFINE_MUTEX(suspend_stats_lock);

void dpm_save_failed_dev(const char *name)
{
        mutex_lock(&suspend_stats_lock);

        strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
                name, sizeof(suspend_stats.failed_devs[0]));
        suspend_stats.last_failed_dev++;
        suspend_stats.last_failed_dev %= REC_FAILED_NUM;

        mutex_unlock(&suspend_stats_lock);
}

void dpm_save_failed_step(enum suspend_stat_step step)
{
        suspend_stats.step_failures[step-1]++;
        suspend_stats.failed_steps[suspend_stats.last_failed_step] = step;
        suspend_stats.last_failed_step++;
        suspend_stats.last_failed_step %= REC_FAILED_NUM;
}

void dpm_save_errno(int err)
{
        if (!err) {
                suspend_stats.success++;
                return;
        }

        suspend_stats.fail++;

        suspend_stats.errno[suspend_stats.last_failed_errno] = err;
        suspend_stats.last_failed_errno++;
        suspend_stats.last_failed_errno %= REC_FAILED_NUM;
}

void pm_report_hw_sleep_time(u64 t)
{
        suspend_stats.last_hw_sleep = t;
        suspend_stats.total_hw_sleep += t;
}
EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);

void pm_report_max_hw_sleep(u64 t)
{
        suspend_stats.max_hw_sleep = t;
}
EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep);

static const char * const suspend_step_names[] = {
        [SUSPEND_WORKING] = "",
        [SUSPEND_FREEZE] = "freeze",
        [SUSPEND_PREPARE] = "prepare",
        [SUSPEND_SUSPEND] = "suspend",
        [SUSPEND_SUSPEND_LATE] = "suspend_late",
        [SUSPEND_SUSPEND_NOIRQ] = "suspend_noirq",
        [SUSPEND_RESUME_NOIRQ] = "resume_noirq",
        [SUSPEND_RESUME_EARLY] = "resume_early",
        [SUSPEND_RESUME] = "resume",
};

#define suspend_attr(_name, format_str)                                \
static ssize_t _name##_show(struct kobject *kobj,                \
                struct kobj_attribute *attr, char *buf)                \
{                                                                \
        return sprintf(buf, format_str, suspend_stats._name);        \
}                                                                \
static struct kobj_attribute _name = __ATTR_RO(_name)

suspend_attr(success, "%u\n");
suspend_attr(fail, "%u\n");
suspend_attr(last_hw_sleep, "%llu\n");
suspend_attr(total_hw_sleep, "%llu\n");
suspend_attr(max_hw_sleep, "%llu\n");

#define suspend_step_attr(_name, step)                \
static ssize_t _name##_show(struct kobject *kobj,                \
                struct kobj_attribute *attr, char *buf)                \
{                                                                \
        return sprintf(buf, "%u\n",                                \
                       suspend_stats.step_failures[step-1]);        \
}                                                                \
static struct kobj_attribute _name = __ATTR_RO(_name)

suspend_step_attr(failed_freeze, SUSPEND_FREEZE);
suspend_step_attr(failed_prepare, SUSPEND_PREPARE);
suspend_step_attr(failed_suspend, SUSPEND_SUSPEND);
suspend_step_attr(failed_suspend_late, SUSPEND_SUSPEND_LATE);
suspend_step_attr(failed_suspend_noirq, SUSPEND_SUSPEND_NOIRQ);
suspend_step_attr(failed_resume, SUSPEND_RESUME);
suspend_step_attr(failed_resume_early, SUSPEND_RESUME_EARLY);
suspend_step_attr(failed_resume_noirq, SUSPEND_RESUME_NOIRQ);

static ssize_t last_failed_dev_show(struct kobject *kobj,
                struct kobj_attribute *attr, char *buf)
{
        int index;
        char *last_failed_dev = NULL;

        index = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
        index %= REC_FAILED_NUM;
        last_failed_dev = suspend_stats.failed_devs[index];

        return sprintf(buf, "%s\n", last_failed_dev);
}
static struct kobj_attribute last_failed_dev = __ATTR_RO(last_failed_dev);

static ssize_t last_failed_errno_show(struct kobject *kobj,
                struct kobj_attribute *attr, char *buf)
{
        int index;
        int last_failed_errno;

        index = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1;
        index %= REC_FAILED_NUM;
        last_failed_errno = suspend_stats.errno[index];

        return sprintf(buf, "%d\n", last_failed_errno);
}
static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno);

static ssize_t last_failed_step_show(struct kobject *kobj,
                struct kobj_attribute *attr, char *buf)
{
        enum suspend_stat_step step;
        int index;

        index = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
        index %= REC_FAILED_NUM;
        step = suspend_stats.failed_steps[index];

        return sprintf(buf, "%s\n", suspend_step_names[step]);
}
static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);

static struct attribute *suspend_attrs[] = {
        &success.attr,
        &fail.attr,
        &failed_freeze.attr,
        &failed_prepare.attr,
        &failed_suspend.attr,
        &failed_suspend_late.attr,
        &failed_suspend_noirq.attr,
        &failed_resume.attr,
        &failed_resume_early.attr,
        &failed_resume_noirq.attr,
        &last_failed_dev.attr,
        &last_failed_errno.attr,
        &last_failed_step.attr,
        &last_hw_sleep.attr,
        &total_hw_sleep.attr,
        &max_hw_sleep.attr,
        NULL,
};

static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
{
        if (attr != &last_hw_sleep.attr &&
            attr != &total_hw_sleep.attr &&
            attr != &max_hw_sleep.attr)
                return 0444;

#ifdef CONFIG_ACPI
        if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
                return 0444;
#endif
        return 0;
}

static const struct attribute_group suspend_attr_group = {
        .name = "suspend_stats",
        .attrs = suspend_attrs,
        .is_visible = suspend_attr_is_visible,
};

#ifdef CONFIG_DEBUG_FS
static int suspend_stats_show(struct seq_file *s, void *unused)
{
        int i, index, last_dev, last_errno, last_step;
        enum suspend_stat_step step;

        last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
        last_dev %= REC_FAILED_NUM;
        last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1;
        last_errno %= REC_FAILED_NUM;
        last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
        last_step %= REC_FAILED_NUM;

        seq_printf(s, "success: %u\nfail: %u\n",
                   suspend_stats.success, suspend_stats.fail);

        for (step = SUSPEND_FREEZE; step <= SUSPEND_NR_STEPS; step++)
                seq_printf(s, "failed_%s: %u\n", suspend_step_names[step],
                           suspend_stats.step_failures[step-1]);

        seq_printf(s,        "failures:\n  last_failed_dev:\t%-s\n",
                   suspend_stats.failed_devs[last_dev]);
        for (i = 1; i < REC_FAILED_NUM; i++) {
                index = last_dev + REC_FAILED_NUM - i;
                index %= REC_FAILED_NUM;
                seq_printf(s, "\t\t\t%-s\n", suspend_stats.failed_devs[index]);
        }
        seq_printf(s,        "  last_failed_errno:\t%-d\n",
                        suspend_stats.errno[last_errno]);
        for (i = 1; i < REC_FAILED_NUM; i++) {
                index = last_errno + REC_FAILED_NUM - i;
                index %= REC_FAILED_NUM;
                seq_printf(s, "\t\t\t%-d\n", suspend_stats.errno[index]);
        }
        seq_printf(s,        "  last_failed_step:\t%-s\n",
                   suspend_step_names[suspend_stats.failed_steps[last_step]]);
        for (i = 1; i < REC_FAILED_NUM; i++) {
                index = last_step + REC_FAILED_NUM - i;
                index %= REC_FAILED_NUM;
                seq_printf(s, "\t\t\t%-s\n",
                           suspend_step_names[suspend_stats.failed_steps[index]]);
        }

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(suspend_stats);

static int __init pm_debugfs_init(void)
{
        debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO,
                        NULL, NULL, &suspend_stats_fops);
        return 0;
}

late_initcall(pm_debugfs_init);
#endif /* CONFIG_DEBUG_FS */

#endif /* CONFIG_PM_SLEEP */

#ifdef CONFIG_PM_SLEEP_DEBUG
/*
 * pm_print_times: print time taken by devices to suspend and resume.
 *
 * show() returns whether printing of suspend and resume times is enabled.
 * store() accepts 0 or 1.  0 disables printing and 1 enables it.
 */
bool pm_print_times_enabled;

static ssize_t pm_print_times_show(struct kobject *kobj,
                                   struct kobj_attribute *attr, char *buf)
{
        return sprintf(buf, "%d\n", pm_print_times_enabled);
}

static ssize_t pm_print_times_store(struct kobject *kobj,
                                    struct kobj_attribute *attr,
                                    const char *buf, size_t n)
{
        unsigned long val;

        if (kstrtoul(buf, 10, &val))
                return -EINVAL;

        if (val > 1)
                return -EINVAL;

        pm_print_times_enabled = !!val;
        return n;
}

power_attr(pm_print_times);

static inline void pm_print_times_init(void)
{
        pm_print_times_enabled = !!initcall_debug;
}

static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
                                        struct kobj_attribute *attr,
                                        char *buf)
{
        if (!pm_wakeup_irq())
                return -ENODATA;

        return sprintf(buf, "%u\n", pm_wakeup_irq());
}

power_attr_ro(pm_wakeup_irq);

bool pm_debug_messages_on __read_mostly;

bool pm_debug_messages_should_print(void)
{
        return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON;
}
EXPORT_SYMBOL_GPL(pm_debug_messages_should_print);

static ssize_t pm_debug_messages_show(struct kobject *kobj,
                                      struct kobj_attribute *attr, char *buf)
{
        return sprintf(buf, "%d\n", pm_debug_messages_on);
}

static ssize_t pm_debug_messages_store(struct kobject *kobj,
                                       struct kobj_attribute *attr,
                                       const char *buf, size_t n)
{
        unsigned long val;

        if (kstrtoul(buf, 10, &val))
                return -EINVAL;

        if (val > 1)
                return -EINVAL;

        pm_debug_messages_on = !!val;
        return n;
}

power_attr(pm_debug_messages);

static int __init pm_debug_messages_setup(char *str)
{
        pm_debug_messages_on = true;
        return 1;
}
__setup("pm_debug_messages", pm_debug_messages_setup);

#else /* !CONFIG_PM_SLEEP_DEBUG */
static inline void pm_print_times_init(void) {}
#endif /* CONFIG_PM_SLEEP_DEBUG */

struct kobject *power_kobj;

/*
 * state - control system sleep states.
 *
 * show() returns available sleep state labels, which may be "mem", "standby",
 * "freeze" and "disk" (hibernation).
 * See Documentation/admin-guide/pm/sleep-states.rst for a description of
 * what they mean.
 *
 * store() accepts one of those strings, translates it into the proper
 * enumerated value, and initiates a suspend transition.
 */
static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
                          char *buf)
{
        char *s = buf;
#ifdef CONFIG_SUSPEND
        suspend_state_t i;

        for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
                if (pm_states[i])
                        s += sprintf(s,"%s ", pm_states[i]);

#endif
        if (hibernation_available())
                s += sprintf(s, "disk ");
        if (s != buf)
                /* convert the last space to a newline */
                *(s-1) = '\n';
        return (s - buf);
}

static suspend_state_t decode_state(const char *buf, size_t n)
{
#ifdef CONFIG_SUSPEND
        suspend_state_t state;
#endif
        char *p;
        int len;

        p = memchr(buf, '\n', n);
        len = p ? p - buf : n;

        /* Check hibernation first. */
        if (len == 4 && str_has_prefix(buf, "disk"))
                return PM_SUSPEND_MAX;

#ifdef CONFIG_SUSPEND
        for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) {
                const char *label = pm_states[state];

                if (label && len == strlen(label) && !strncmp(buf, label, len))
                        return state;
        }
#endif

        return PM_SUSPEND_ON;
}

static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
                           const char *buf, size_t n)
{
        suspend_state_t state;
        int error;

        error = pm_autosleep_lock();
        if (error)
                return error;

        if (pm_autosleep_state() > PM_SUSPEND_ON) {
                error = -EBUSY;
                goto out;
        }

        state = decode_state(buf, n);
        if (state < PM_SUSPEND_MAX) {
                if (state == PM_SUSPEND_MEM)
                        state = mem_sleep_current;

                error = pm_suspend(state);
        } else if (state == PM_SUSPEND_MAX) {
                error = hibernate();
        } else {
                error = -EINVAL;
        }

 out:
        pm_autosleep_unlock();
        return error ? error : n;
}

power_attr(state);

#ifdef CONFIG_PM_SLEEP
/*
 * The 'wakeup_count' attribute, along with the functions defined in
 * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
 * handled in a non-racy way.
 *
 * If a wakeup event occurs when the system is in a sleep state, it simply is
 * woken up.  In turn, if an event that would wake the system up from a sleep
 * state occurs when it is undergoing a transition to that sleep state, the
 * transition should be aborted.  Moreover, if such an event occurs when the
 * system is in the working state, an attempt to start a transition to the
 * given sleep state should fail during certain period after the detection of
 * the event.  Using the 'state' attribute alone is not sufficient to satisfy
 * these requirements, because a wakeup event may occur exactly when 'state'
 * is being written to and may be delivered to user space right before it is
 * frozen, so the event will remain only partially processed until the system is
 * woken up by another event.  In particular, it won't cause the transition to
 * a sleep state to be aborted.
 *
 * This difficulty may be overcome if user space uses 'wakeup_count' before
 * writing to 'state'.  It first should read from 'wakeup_count' and store
 * the read value.  Then, after carrying out its own preparations for the system
 * transition to a sleep state, it should write the stored value to
 * 'wakeup_count'.  If that fails, at least one wakeup event has occurred since
 * 'wakeup_count' was read and 'state' should not be written to.  Otherwise, it
 * is allowed to write to 'state', but the transition will be aborted if there
 * are any wakeup events detected after 'wakeup_count' was written to.
 */

static ssize_t wakeup_count_show(struct kobject *kobj,
                                struct kobj_attribute *attr,
                                char *buf)
{
        unsigned int val;

        return pm_get_wakeup_count(&val, true) ?
                sprintf(buf, "%u\n", val) : -EINTR;
}

static ssize_t wakeup_count_store(struct kobject *kobj,
                                struct kobj_attribute *attr,
                                const char *buf, size_t n)
{
        unsigned int val;
        int error;

        error = pm_autosleep_lock();
        if (error)
                return error;

        if (pm_autosleep_state() > PM_SUSPEND_ON) {
                error = -EBUSY;
                goto out;
        }

        error = -EINVAL;
        if (sscanf(buf, "%u", &val) == 1) {
                if (pm_save_wakeup_count(val))
                        error = n;
                else
                        pm_print_active_wakeup_sources();
        }

 out:
        pm_autosleep_unlock();
        return error;
}

power_attr(wakeup_count);

#ifdef CONFIG_PM_AUTOSLEEP
static ssize_t autosleep_show(struct kobject *kobj,
                              struct kobj_attribute *attr,
                              char *buf)
{
        suspend_state_t state = pm_autosleep_state();

        if (state == PM_SUSPEND_ON)
                return sprintf(buf, "off\n");

#ifdef CONFIG_SUSPEND
        if (state < PM_SUSPEND_MAX)
                return sprintf(buf, "%s\n", pm_states[state] ?
                                        pm_states[state] : "error");
#endif
#ifdef CONFIG_HIBERNATION
        return sprintf(buf, "disk\n");
#else
        return sprintf(buf, "error");
#endif
}

static ssize_t autosleep_store(struct kobject *kobj,
                               struct kobj_attribute *attr,
                               const char *buf, size_t n)
{
        suspend_state_t state = decode_state(buf, n);
        int error;

        if (state == PM_SUSPEND_ON
            && strcmp(buf, "off") && strcmp(buf, "off\n"))
                return -EINVAL;

        if (state == PM_SUSPEND_MEM)
                state = mem_sleep_current;

        error = pm_autosleep_set_state(state);
        return error ? error : n;
}

power_attr(autosleep);
#endif /* CONFIG_PM_AUTOSLEEP */

#ifdef CONFIG_PM_WAKELOCKS
static ssize_t wake_lock_show(struct kobject *kobj,
                              struct kobj_attribute *attr,
                              char *buf)
{
        return pm_show_wakelocks(buf, true);
}

static ssize_t wake_lock_store(struct kobject *kobj,
                               struct kobj_attribute *attr,
                               const char *buf, size_t n)
{
        int error = pm_wake_lock(buf);
        return error ? error : n;
}

power_attr(wake_lock);

static ssize_t wake_unlock_show(struct kobject *kobj,
                                struct kobj_attribute *attr,
                                char *buf)
{
        return pm_show_wakelocks(buf, false);
}

static ssize_t wake_unlock_store(struct kobject *kobj,
                                 struct kobj_attribute *attr,
                                 const char *buf, size_t n)
{
        int error = pm_wake_unlock(buf);
        return error ? error : n;
}

power_attr(wake_unlock);

#endif /* CONFIG_PM_WAKELOCKS */
#endif /* CONFIG_PM_SLEEP */

#ifdef CONFIG_PM_TRACE
int pm_trace_enabled;

static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
                             char *buf)
{
        return sprintf(buf, "%d\n", pm_trace_enabled);
}

static ssize_t
pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
               const char *buf, size_t n)
{
        int val;

        if (sscanf(buf, "%d", &val) == 1) {
                pm_trace_enabled = !!val;
                if (pm_trace_enabled) {
                        pr_warn("PM: Enabling pm_trace changes system date and time during resume.\n"
                                "PM: Correct system time has to be restored manually after resume.\n");
                }
                return n;
        }
        return -EINVAL;
}

power_attr(pm_trace);

static ssize_t pm_trace_dev_match_show(struct kobject *kobj,
                                       struct kobj_attribute *attr,
                                       char *buf)
{
        return show_trace_dev_match(buf, PAGE_SIZE);
}

power_attr_ro(pm_trace_dev_match);

#endif /* CONFIG_PM_TRACE */

#ifdef CONFIG_FREEZER
static ssize_t pm_freeze_timeout_show(struct kobject *kobj,
                                      struct kobj_attribute *attr, char *buf)
{
        return sprintf(buf, "%u\n", freeze_timeout_msecs);
}

static ssize_t pm_freeze_timeout_store(struct kobject *kobj,
                                       struct kobj_attribute *attr,
                                       const char *buf, size_t n)
{
        unsigned long val;

        if (kstrtoul(buf, 10, &val))
                return -EINVAL;

        freeze_timeout_msecs = val;
        return n;
}

power_attr(pm_freeze_timeout);

#endif        /* CONFIG_FREEZER*/

static struct attribute * g[] = {
        &state_attr.attr,
#ifdef CONFIG_PM_TRACE
        &pm_trace_attr.attr,
        &pm_trace_dev_match_attr.attr,
#endif
#ifdef CONFIG_PM_SLEEP
        &pm_async_attr.attr,
        &wakeup_count_attr.attr,
#ifdef CONFIG_SUSPEND
        &mem_sleep_attr.attr,
        &sync_on_suspend_attr.attr,
#endif
#ifdef CONFIG_PM_AUTOSLEEP
        &autosleep_attr.attr,
#endif
#ifdef CONFIG_PM_WAKELOCKS
        &wake_lock_attr.attr,
        &wake_unlock_attr.attr,
#endif
#ifdef CONFIG_PM_SLEEP_DEBUG
        &pm_test_attr.attr,
        &pm_print_times_attr.attr,
        &pm_wakeup_irq_attr.attr,
        &pm_debug_messages_attr.attr,
#endif
#endif
#ifdef CONFIG_FREEZER
        &pm_freeze_timeout_attr.attr,
#endif
        NULL,
};

static const struct attribute_group attr_group = {
        .attrs = g,
};

static const struct attribute_group *attr_groups[] = {
        &attr_group,
#ifdef CONFIG_PM_SLEEP
        &suspend_attr_group,
#endif
        NULL,
};

struct workqueue_struct *pm_wq;
EXPORT_SYMBOL_GPL(pm_wq);

static int __init pm_start_workqueue(void)
{
        pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);

        return pm_wq ? 0 : -ENOMEM;
}

static int __init pm_init(void)
{
        int error = pm_start_workqueue();
        if (error)
                return error;
        hibernate_image_size_init();
        hibernate_reserved_size_init();
        pm_states_init();
        power_kobj = kobject_create_and_add("power", NULL);
        if (!power_kobj)
                return -ENOMEM;
        error = sysfs_create_groups(power_kobj, attr_groups);
        if (error)
                return error;
        pm_print_times_init();
        return pm_autosleep_init();
}

core_initcall(pm_init);

























































































    9 


















    7 









    6 
































































































   43 

































   31 

   43 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/* SPDX-License-Identifier: GPL-2.0 */
/* thread_info.h: common low-level thread information accessors
 *
 * Copyright (C) 2002  David Howells (dhowells@redhat.com)
 * - Incorporating suggestions made by Linus Torvalds
 */

#ifndef _LINUX_THREAD_INFO_H
#define _LINUX_THREAD_INFO_H

#include <linux/types.h>
#include <linux/limits.h>
#include <linux/bug.h>
#include <linux/restart_block.h>
#include <linux/errno.h>

#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
 * For CONFIG_THREAD_INFO_IN_TASK kernels we need <asm/current.h> for the
 * definition of current, but for !CONFIG_THREAD_INFO_IN_TASK kernels,
 * including <asm/current.h> can cause a circular dependency on some platforms.
 */
#include <asm/current.h>
#define current_thread_info() ((struct thread_info *)current)
#endif

#include <linux/bitops.h>

/*
 * For per-arch arch_within_stack_frames() implementations, defined in
 * asm/thread_info.h.
 */
enum {
        BAD_STACK = -1,
        NOT_STACK = 0,
        GOOD_FRAME,
        GOOD_STACK,
};

#ifdef CONFIG_GENERIC_ENTRY
enum syscall_work_bit {
        SYSCALL_WORK_BIT_SECCOMP,
        SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
        SYSCALL_WORK_BIT_SYSCALL_TRACE,
        SYSCALL_WORK_BIT_SYSCALL_EMU,
        SYSCALL_WORK_BIT_SYSCALL_AUDIT,
        SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
        SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP,
};

#define SYSCALL_WORK_SECCOMP                BIT(SYSCALL_WORK_BIT_SECCOMP)
#define SYSCALL_WORK_SYSCALL_TRACEPOINT        BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
#define SYSCALL_WORK_SYSCALL_TRACE        BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
#define SYSCALL_WORK_SYSCALL_EMU        BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
#define SYSCALL_WORK_SYSCALL_AUDIT        BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
#define SYSCALL_WORK_SYSCALL_EXIT_TRAP        BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP)
#endif

#include <asm/thread_info.h>

#ifdef __KERNEL__

#ifndef arch_set_restart_data
#define arch_set_restart_data(restart) do { } while (0)
#endif

static inline long set_restart_fn(struct restart_block *restart,
                                        long (*fn)(struct restart_block *))
{
        restart->fn = fn;
        arch_set_restart_data(restart);
        return -ERESTART_RESTARTBLOCK;
}

#ifndef THREAD_ALIGN
#define THREAD_ALIGN        THREAD_SIZE
#endif

#define THREADINFO_GFP                (GFP_KERNEL_ACCOUNT | __GFP_ZERO)

/*
 * flag set/clear/test wrappers
 * - pass TIF_xxxx constants to these functions
 */

static inline void set_ti_thread_flag(struct thread_info *ti, int flag)
{
        set_bit(flag, (unsigned long *)&ti->flags);
}

static inline void clear_ti_thread_flag(struct thread_info *ti, int flag)
{
        clear_bit(flag, (unsigned long *)&ti->flags);
}

static inline void update_ti_thread_flag(struct thread_info *ti, int flag,
                                         bool value)
{
        if (value)
                set_ti_thread_flag(ti, flag);
        else
                clear_ti_thread_flag(ti, flag);
}

static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag)
{
        return test_and_set_bit(flag, (unsigned long *)&ti->flags);
}

static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
{
        return test_and_clear_bit(flag, (unsigned long *)&ti->flags);
}

static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
{
        return test_bit(flag, (unsigned long *)&ti->flags);
}

/*
 * This may be used in noinstr code, and needs to be __always_inline to prevent
 * inadvertent instrumentation.
 */
static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti)
{
        return READ_ONCE(ti->flags);
}

#define set_thread_flag(flag) \
        set_ti_thread_flag(current_thread_info(), flag)
#define clear_thread_flag(flag) \
        clear_ti_thread_flag(current_thread_info(), flag)
#define update_thread_flag(flag, value) \
        update_ti_thread_flag(current_thread_info(), flag, value)
#define test_and_set_thread_flag(flag) \
        test_and_set_ti_thread_flag(current_thread_info(), flag)
#define test_and_clear_thread_flag(flag) \
        test_and_clear_ti_thread_flag(current_thread_info(), flag)
#define test_thread_flag(flag) \
        test_ti_thread_flag(current_thread_info(), flag)
#define read_thread_flags() \
        read_ti_thread_flags(current_thread_info())

#define read_task_thread_flags(t) \
        read_ti_thread_flags(task_thread_info(t))

#ifdef CONFIG_GENERIC_ENTRY
#define set_syscall_work(fl) \
        set_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
#define test_syscall_work(fl) \
        test_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
#define clear_syscall_work(fl) \
        clear_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)

#define set_task_syscall_work(t, fl) \
        set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
#define test_task_syscall_work(t, fl) \
        test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
#define clear_task_syscall_work(t, fl) \
        clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)

#else /* CONFIG_GENERIC_ENTRY */

#define set_syscall_work(fl)                                                \
        set_ti_thread_flag(current_thread_info(), TIF_##fl)
#define test_syscall_work(fl) \
        test_ti_thread_flag(current_thread_info(), TIF_##fl)
#define clear_syscall_work(fl) \
        clear_ti_thread_flag(current_thread_info(), TIF_##fl)

#define set_task_syscall_work(t, fl) \
        set_ti_thread_flag(task_thread_info(t), TIF_##fl)
#define test_task_syscall_work(t, fl) \
        test_ti_thread_flag(task_thread_info(t), TIF_##fl)
#define clear_task_syscall_work(t, fl) \
        clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */

#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H

static __always_inline bool tif_need_resched(void)
{
        return arch_test_bit(TIF_NEED_RESCHED,
                             (unsigned long *)(&current_thread_info()->flags));
}

#else

static __always_inline bool tif_need_resched(void)
{
        return test_bit(TIF_NEED_RESCHED,
                        (unsigned long *)(&current_thread_info()->flags));
}

#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */

#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
                                           const void * const stackend,
                                           const void *obj, unsigned long len)
{
        return 0;
}
#endif

#ifdef CONFIG_HARDENED_USERCOPY
extern void __check_object_size(const void *ptr, unsigned long n,
                                        bool to_user);

static __always_inline void check_object_size(const void *ptr, unsigned long n,
                                              bool to_user)
{
        if (!__builtin_constant_p(n))
                __check_object_size(ptr, n, to_user);
}
#else
static inline void check_object_size(const void *ptr, unsigned long n,
                                     bool to_user)
{ }
#endif /* CONFIG_HARDENED_USERCOPY */

extern void __compiletime_error("copy source size is too small")
__bad_copy_from(void);
extern void __compiletime_error("copy destination size is too small")
__bad_copy_to(void);

void __copy_overflow(int size, unsigned long count);

static inline void copy_overflow(int size, unsigned long count)
{
        if (IS_ENABLED(CONFIG_BUG))
                __copy_overflow(size, count);
}

static __always_inline __must_check bool
check_copy_size(const void *addr, size_t bytes, bool is_source)
{
        int sz = __builtin_object_size(addr, 0);
        if (unlikely(sz >= 0 && sz < bytes)) {
                if (!__builtin_constant_p(bytes))
                        copy_overflow(sz, bytes);
                else if (is_source)
                        __bad_copy_from();
                else
                        __bad_copy_to();
                return false;
        }
        if (WARN_ON_ONCE(bytes > INT_MAX))
                return false;
        check_object_size(addr, bytes, is_source);
        return true;
}

#ifndef arch_setup_new_exec
static inline void arch_setup_new_exec(void) { }
#endif

void arch_task_cache_init(void); /* for CONFIG_SH */
void arch_release_task_struct(struct task_struct *tsk);
int arch_dup_task_struct(struct task_struct *dst,
                                struct task_struct *src);

#endif        /* __KERNEL__ */

#endif /* _LINUX_THREAD_INFO_H */












































































    1 















































    1 
    1 

    1 

    1 





    1 

























































































































































































  438 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
 *
 * Based on the original implementation which is:
 *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
 *  Copyright 2003 Andi Kleen, SuSE Labs.
 *
 *  Parts of the original code have been moved to arch/x86/vdso/vma.c
 *
 * This file implements vsyscall emulation.  vsyscalls are a legacy ABI:
 * Userspace can request certain kernel services by calling fixed
 * addresses.  This concept is problematic:
 *
 * - It interferes with ASLR.
 * - It's awkward to write code that lives in kernel addresses but is
 *   callable by userspace at fixed addresses.
 * - The whole concept is impossible for 32-bit compat userspace.
 * - UML cannot easily virtualize a vsyscall.
 *
 * As of mid-2014, I believe that there is no new userspace code that
 * will use a vsyscall if the vDSO is present.  I hope that there will
 * soon be no new userspace code that will ever use a vsyscall.
 *
 * The code in this file emulates vsyscalls when notified of a page
 * fault to a vsyscall address.
 */

#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/sched/signal.h>
#include <linux/mm_types.h>
#include <linux/syscalls.h>
#include <linux/ratelimit.h>

#include <asm/vsyscall.h>
#include <asm/unistd.h>
#include <asm/fixmap.h>
#include <asm/traps.h>
#include <asm/paravirt.h>

#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"

static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
        NONE;
#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
        XONLY;
#else
        #error VSYSCALL config is broken
#endif

static int __init vsyscall_setup(char *str)
{
        if (str) {
                if (!strcmp("emulate", str))
                        vsyscall_mode = EMULATE;
                else if (!strcmp("xonly", str))
                        vsyscall_mode = XONLY;
                else if (!strcmp("none", str))
                        vsyscall_mode = NONE;
                else
                        return -EINVAL;

                return 0;
        }

        return -EINVAL;
}
early_param("vsyscall", vsyscall_setup);

static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
                              const char *message)
{
        if (!show_unhandled_signals)
                return;

        printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
                           level, current->comm, task_pid_nr(current),
                           message, regs->ip, regs->cs,
                           regs->sp, regs->ax, regs->si, regs->di);
}

static int addr_to_vsyscall_nr(unsigned long addr)
{
        int nr;

        if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
                return -EINVAL;

        nr = (addr & 0xC00UL) >> 10;
        if (nr >= 3)
                return -EINVAL;

        return nr;
}

static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
        if (!access_ok((void __user *)ptr, size)) {
                struct thread_struct *thread = &current->thread;

                thread->error_code        = X86_PF_USER | X86_PF_WRITE;
                thread->cr2                = ptr;
                thread->trap_nr                = X86_TRAP_PF;

                force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr);
                return false;
        } else {
                return true;
        }
}

bool emulate_vsyscall(unsigned long error_code,
                      struct pt_regs *regs, unsigned long address)
{
        unsigned long caller;
        int vsyscall_nr, syscall_nr, tmp;
        long ret;
        unsigned long orig_dx;

        /* Write faults or kernel-privilege faults never get fixed up. */
        if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
                return false;

        if (!(error_code & X86_PF_INSTR)) {
                /* Failed vsyscall read */
                if (vsyscall_mode == EMULATE)
                        return false;

                /*
                 * User code tried and failed to read the vsyscall page.
                 */
                warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
                return false;
        }

        /*
         * No point in checking CS -- the only way to get here is a user mode
         * trap to a high address, which means that we're in 64-bit user code.
         */

        WARN_ON_ONCE(address != regs->ip);

        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
                return false;
        }

        vsyscall_nr = addr_to_vsyscall_nr(address);

        trace_emulate_vsyscall(vsyscall_nr);

        if (vsyscall_nr < 0) {
                warn_bad_vsyscall(KERN_WARNING, regs,
                                  "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
                goto sigsegv;
        }

        if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
                warn_bad_vsyscall(KERN_WARNING, regs,
                                  "vsyscall with bad stack (exploit attempt?)");
                goto sigsegv;
        }

        /*
         * Check for access_ok violations and find the syscall nr.
         *
         * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
         * 64-bit, so we don't need to special-case it here.  For all the
         * vsyscalls, NULL means "don't write anything" not "write it at
         * address 0".
         */
        switch (vsyscall_nr) {
        case 0:
                if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) ||
                    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
                        ret = -EFAULT;
                        goto check_fault;
                }

                syscall_nr = __NR_gettimeofday;
                break;

        case 1:
                if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) {
                        ret = -EFAULT;
                        goto check_fault;
                }

                syscall_nr = __NR_time;
                break;

        case 2:
                if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
                    !write_ok_or_segv(regs->si, sizeof(unsigned))) {
                        ret = -EFAULT;
                        goto check_fault;
                }

                syscall_nr = __NR_getcpu;
                break;
        }

        /*
         * Handle seccomp.  regs->ip must be the original value.
         * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst.
         *
         * We could optimize the seccomp disabled case, but performance
         * here doesn't matter.
         */
        regs->orig_ax = syscall_nr;
        regs->ax = -ENOSYS;
        tmp = secure_computing();
        if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
                warn_bad_vsyscall(KERN_DEBUG, regs,
                                  "seccomp tried to change syscall nr or ip");
                force_exit_sig(SIGSYS);
                return true;
        }
        regs->orig_ax = -1;
        if (tmp)
                goto do_ret;  /* skip requested */

        /*
         * With a real vsyscall, page faults cause SIGSEGV.
         */
        ret = -EFAULT;
        switch (vsyscall_nr) {
        case 0:
                /* this decodes regs->di and regs->si on its own */
                ret = __x64_sys_gettimeofday(regs);
                break;

        case 1:
                /* this decodes regs->di on its own */
                ret = __x64_sys_time(regs);
                break;

        case 2:
                /* while we could clobber regs->dx, we didn't in the past... */
                orig_dx = regs->dx;
                regs->dx = 0;
                /* this decodes regs->di, regs->si and regs->dx on its own */
                ret = __x64_sys_getcpu(regs);
                regs->dx = orig_dx;
                break;
        }

check_fault:
        if (ret == -EFAULT) {
                /* Bad news -- userspace fed a bad pointer to a vsyscall. */
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall fault (exploit attempt?)");
                goto sigsegv;
        }

        regs->ax = ret;

do_ret:
        /* Emulate a ret instruction. */
        regs->ip = caller;
        regs->sp += 8;
        return true;

sigsegv:
        force_sig(SIGSEGV);
        return true;
}

/*
 * A pseudo VMA to allow ptrace access for the vsyscall page.  This only
 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
 * not need special handling anymore:
 */
static const char *gate_vma_name(struct vm_area_struct *vma)
{
        return "[vsyscall]";
}
static const struct vm_operations_struct gate_vma_ops = {
        .name = gate_vma_name,
};
static struct vm_area_struct gate_vma __ro_after_init = {
        .vm_start        = VSYSCALL_ADDR,
        .vm_end                = VSYSCALL_ADDR + PAGE_SIZE,
        .vm_page_prot        = PAGE_READONLY_EXEC,
        .vm_flags        = VM_READ | VM_EXEC,
        .vm_ops                = &gate_vma_ops,
};

struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_COMPAT
        if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags))
                return NULL;
#endif
        if (vsyscall_mode == NONE)
                return NULL;
        return &gate_vma;
}

int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
        struct vm_area_struct *vma = get_gate_vma(mm);

        if (!vma)
                return 0;

        return (addr >= vma->vm_start) && (addr < vma->vm_end);
}

/*
 * Use this when you have no reliable mm, typically from interrupt
 * context. It is less reliable than using a task's mm and may give
 * false positives.
 */
int in_gate_area_no_mm(unsigned long addr)
{
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
}

/*
 * The VSYSCALL page is the only user-accessible page in the kernel address
 * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
 * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
 * are enabled.
 *
 * Some day we may create a "minimal" vsyscall mode in which we emulate
 * vsyscalls but leave the page not present.  If so, we skip calling
 * this.
 */
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;

        pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
        set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
        p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
        set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
#endif
        pud = pud_offset(p4d, VSYSCALL_ADDR);
        set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
        pmd = pmd_offset(pud, VSYSCALL_ADDR);
        set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
}

void __init map_vsyscall(void)
{
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);

        /*
         * For full emulation, the page needs to exist for real.  In
         * execute-only mode, there is no PTE at all backing the vsyscall
         * page.
         */
        if (vsyscall_mode == EMULATE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             PAGE_KERNEL_VVAR);
                set_vsyscall_pgtable_user_bits(swapper_pg_dir);
        }

        if (vsyscall_mode == XONLY)
                vm_flags_init(&gate_vma, VM_EXEC);

        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);
}



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BLK_MQ_H
#define BLK_MQ_H

#include <linux/blkdev.h>
#include <linux/sbitmap.h>
#include <linux/lockdep.h>
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
#include <linux/srcu.h>
#include <linux/rw_hint.h>

struct blk_mq_tags;
struct blk_flush_queue;

#define BLKDEV_MIN_RQ        4
#define BLKDEV_DEFAULT_RQ        128

enum rq_end_io_ret {
        RQ_END_IO_NONE,
        RQ_END_IO_FREE,
};

typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);

/*
 * request flags */
typedef __u32 __bitwise req_flags_t;

/* drive already may have started this one */
#define RQF_STARTED                ((__force req_flags_t)(1 << 1))
/* request for flush sequence */
#define RQF_FLUSH_SEQ                ((__force req_flags_t)(1 << 4))
/* merge of different types, fail separately */
#define RQF_MIXED_MERGE                ((__force req_flags_t)(1 << 5))
/* don't call prep for this one */
#define RQF_DONTPREP                ((__force req_flags_t)(1 << 7))
/* use hctx->sched_tags */
#define RQF_SCHED_TAGS                ((__force req_flags_t)(1 << 8))
/* use an I/O scheduler for this request */
#define RQF_USE_SCHED                ((__force req_flags_t)(1 << 9))
/* vaguely specified driver internal error.  Ignored by the block layer */
#define RQF_FAILED                ((__force req_flags_t)(1 << 10))
/* don't warn about errors */
#define RQF_QUIET                ((__force req_flags_t)(1 << 11))
/* account into disk and partition IO statistics */
#define RQF_IO_STAT                ((__force req_flags_t)(1 << 13))
/* runtime pm request */
#define RQF_PM                        ((__force req_flags_t)(1 << 15))
/* on IO scheduler merge hash */
#define RQF_HASHED                ((__force req_flags_t)(1 << 16))
/* track IO completion time */
#define RQF_STATS                ((__force req_flags_t)(1 << 17))
/* Look at ->special_vec for the actual data payload instead of the
   bio chain. */
#define RQF_SPECIAL_PAYLOAD        ((__force req_flags_t)(1 << 18))
/* The per-zone write lock is held for this request */
#define RQF_ZONE_WRITE_LOCKED        ((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT                ((__force req_flags_t)(1 << 21))
#define RQF_RESV                ((__force req_flags_t)(1 << 23))

/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
        (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)

enum mq_rq_state {
        MQ_RQ_IDLE                = 0,
        MQ_RQ_IN_FLIGHT                = 1,
        MQ_RQ_COMPLETE                = 2,
};

/*
 * Try to put the fields that are referenced together in the same cacheline.
 *
 * If you modify this structure, make sure to update blk_rq_init() and
 * especially blk_mq_rq_ctx_init() to take care of the added fields.
 */
struct request {
        struct request_queue *q;
        struct blk_mq_ctx *mq_ctx;
        struct blk_mq_hw_ctx *mq_hctx;

        blk_opf_t cmd_flags;                /* op and common flags */
        req_flags_t rq_flags;

        int tag;
        int internal_tag;

        unsigned int timeout;

        /* the following two fields are internal, NEVER access directly */
        unsigned int __data_len;        /* total data len */
        sector_t __sector;                /* sector cursor */

        struct bio *bio;
        struct bio *biotail;

        union {
                struct list_head queuelist;
                struct request *rq_next;
        };

        struct block_device *part;
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
        /* Time that the first bio started allocating this request. */
        u64 alloc_time_ns;
#endif
        /* Time that this request was allocated for this IO. */
        u64 start_time_ns;
        /* Time that I/O was submitted to the device. */
        u64 io_start_time_ns;

#ifdef CONFIG_BLK_WBT
        unsigned short wbt_flags;
#endif
        /*
         * rq sectors used for blk stats. It has the same value
         * with blk_rq_sectors(rq), except that it never be zeroed
         * by completion.
         */
        unsigned short stats_sectors;

        /*
         * Number of scatter-gather DMA addr+len pairs after
         * physical address coalescing is performed.
         */
        unsigned short nr_phys_segments;

#ifdef CONFIG_BLK_DEV_INTEGRITY
        unsigned short nr_integrity_segments;
#endif

#ifdef CONFIG_BLK_INLINE_ENCRYPTION
        struct bio_crypt_ctx *crypt_ctx;
        struct blk_crypto_keyslot *crypt_keyslot;
#endif

        enum rw_hint write_hint;
        unsigned short ioprio;

        enum mq_rq_state state;
        atomic_t ref;

        unsigned long deadline;

        /*
         * The hash is used inside the scheduler, and killed once the
         * request reaches the dispatch list. The ipi_list is only used
         * to queue the request for softirq completion, which is long
         * after the request has been unhashed (and even removed from
         * the dispatch list).
         */
        union {
                struct hlist_node hash;        /* merge hash */
                struct llist_node ipi_list;
        };

        /*
         * The rb_node is only used inside the io scheduler, requests
         * are pruned when moved to the dispatch queue. special_vec must
         * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be
         * insert into an IO scheduler.
         */
        union {
                struct rb_node rb_node;        /* sort/lookup */
                struct bio_vec special_vec;
        };

        /*
         * Three pointers are available for the IO schedulers, if they need
         * more they have to dynamically allocate it.
         */
        struct {
                struct io_cq                *icq;
                void                        *priv[2];
        } elv;

        struct {
                unsigned int                seq;
                rq_end_io_fn                *saved_end_io;
        } flush;

        u64 fifo_time;

        /*
         * completion callback.
         */
        rq_end_io_fn *end_io;
        void *end_io_data;
};

static inline enum req_op req_op(const struct request *req)
{
        return req->cmd_flags & REQ_OP_MASK;
}

static inline bool blk_rq_is_passthrough(struct request *rq)
{
        return blk_op_is_passthrough(rq->cmd_flags);
}

static inline unsigned short req_get_ioprio(struct request *req)
{
        return req->ioprio;
}

#define rq_data_dir(rq)                (op_is_write(req_op(rq)) ? WRITE : READ)

#define rq_dma_dir(rq) \
        (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)

#define rq_list_add(listptr, rq)        do {                \
        (rq)->rq_next = *(listptr);                        \
        *(listptr) = rq;                                \
} while (0)

#define rq_list_add_tail(lastpptr, rq)        do {                \
        (rq)->rq_next = NULL;                                \
        **(lastpptr) = rq;                                \
        *(lastpptr) = &rq->rq_next;                        \
} while (0)

#define rq_list_pop(listptr)                                \
({                                                        \
        struct request *__req = NULL;                        \
        if ((listptr) && *(listptr))        {                \
                __req = *(listptr);                        \
                *(listptr) = __req->rq_next;                \
        }                                                \
        __req;                                                \
})

#define rq_list_peek(listptr)                                \
({                                                        \
        struct request *__req = NULL;                        \
        if ((listptr) && *(listptr))                        \
                __req = *(listptr);                        \
        __req;                                                \
})

#define rq_list_for_each(listptr, pos)                        \
        for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))

#define rq_list_for_each_safe(listptr, pos, nxt)                        \
        for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos);        \
                pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)

#define rq_list_next(rq)        (rq)->rq_next
#define rq_list_empty(list)        ((list) == (struct request *) NULL)

/**
 * rq_list_move() - move a struct request from one list to another
 * @src: The source list @rq is currently in
 * @dst: The destination list that @rq will be appended to
 * @rq: The request to move
 * @prev: The request preceding @rq in @src (NULL if @rq is the head)
 */
static inline void rq_list_move(struct request **src, struct request **dst,
                                struct request *rq, struct request *prev)
{
        if (prev)
                prev->rq_next = rq->rq_next;
        else
                *src = rq->rq_next;
        rq_list_add(dst, rq);
}

/**
 * enum blk_eh_timer_return - How the timeout handler should proceed
 * @BLK_EH_DONE: The block driver completed the command or will complete it at
 *        a later time.
 * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the
 *        request to complete.
 */
enum blk_eh_timer_return {
        BLK_EH_DONE,
        BLK_EH_RESET_TIMER,
};

#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */

/**
 * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
 * block device
 */
struct blk_mq_hw_ctx {
        struct {
                /** @lock: Protects the dispatch list. */
                spinlock_t                lock;
                /**
                 * @dispatch: Used for requests that are ready to be
                 * dispatched to the hardware but for some reason (e.g. lack of
                 * resources) could not be sent to the hardware. As soon as the
                 * driver can send new requests, requests at this list will
                 * be sent first for a fairer dispatch.
                 */
                struct list_head        dispatch;
                 /**
                  * @state: BLK_MQ_S_* flags. Defines the state of the hw
                  * queue (active, scheduled to restart, stopped).
                  */
                unsigned long                state;
        } ____cacheline_aligned_in_smp;

        /**
         * @run_work: Used for scheduling a hardware queue run at a later time.
         */
        struct delayed_work        run_work;
        /** @cpumask: Map of available CPUs where this hctx can run. */
        cpumask_var_t                cpumask;
        /**
         * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
         * selection from @cpumask.
         */
        int                        next_cpu;
        /**
         * @next_cpu_batch: Counter of how many works left in the batch before
         * changing to the next CPU.
         */
        int                        next_cpu_batch;

        /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
        unsigned long                flags;

        /**
         * @sched_data: Pointer owned by the IO scheduler attached to a request
         * queue. It's up to the IO scheduler how to use this pointer.
         */
        void                        *sched_data;
        /**
         * @queue: Pointer to the request queue that owns this hardware context.
         */
        struct request_queue        *queue;
        /** @fq: Queue of requests that need to perform a flush operation. */
        struct blk_flush_queue        *fq;

        /**
         * @driver_data: Pointer to data owned by the block driver that created
         * this hctx
         */
        void                        *driver_data;

        /**
         * @ctx_map: Bitmap for each software queue. If bit is on, there is a
         * pending request in that software queue.
         */
        struct sbitmap                ctx_map;

        /**
         * @dispatch_from: Software queue to be used when no scheduler was
         * selected.
         */
        struct blk_mq_ctx        *dispatch_from;
        /**
         * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
         * decide if the hw_queue is busy using Exponential Weighted Moving
         * Average algorithm.
         */
        unsigned int                dispatch_busy;

        /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
        unsigned short                type;
        /** @nr_ctx: Number of software queues. */
        unsigned short                nr_ctx;
        /** @ctxs: Array of software queues. */
        struct blk_mq_ctx        **ctxs;

        /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
        spinlock_t                dispatch_wait_lock;
        /**
         * @dispatch_wait: Waitqueue to put requests when there is no tag
         * available at the moment, to wait for another try in the future.
         */
        wait_queue_entry_t        dispatch_wait;

        /**
         * @wait_index: Index of next available dispatch_wait queue to insert
         * requests.
         */
        atomic_t                wait_index;

        /**
         * @tags: Tags owned by the block driver. A tag at this set is only
         * assigned when a request is dispatched from a hardware queue.
         */
        struct blk_mq_tags        *tags;
        /**
         * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
         * scheduler associated with a request queue, a tag is assigned when
         * that request is allocated. Else, this member is not used.
         */
        struct blk_mq_tags        *sched_tags;

        /** @numa_node: NUMA node the storage adapter has been connected to. */
        unsigned int                numa_node;
        /** @queue_num: Index of this hardware queue. */
        unsigned int                queue_num;

        /**
         * @nr_active: Number of active requests. Only used when a tag set is
         * shared across request queues.
         */
        atomic_t                nr_active;

        /** @cpuhp_online: List to store request if CPU is going to die */
        struct hlist_node        cpuhp_online;
        /** @cpuhp_dead: List to store request if some CPU die. */
        struct hlist_node        cpuhp_dead;
        /** @kobj: Kernel object for sysfs. */
        struct kobject                kobj;

#ifdef CONFIG_BLK_DEBUG_FS
        /**
         * @debugfs_dir: debugfs directory for this hardware queue. Named
         * as cpu<cpu_number>.
         */
        struct dentry                *debugfs_dir;
        /** @sched_debugfs_dir:        debugfs directory for the scheduler. */
        struct dentry                *sched_debugfs_dir;
#endif

        /**
         * @hctx_list: if this hctx is not in use, this is an entry in
         * q->unused_hctx_list.
         */
        struct list_head        hctx_list;
};

/**
 * struct blk_mq_queue_map - Map software queues to hardware queues
 * @mq_map:       CPU ID to hardware queue index map. This is an array
 *        with nr_cpu_ids elements. Each element has a value in the range
 *        [@queue_offset, @queue_offset + @nr_queues).
 * @nr_queues:    Number of hardware queues to map CPU IDs onto.
 * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
 *        driver to map each hardware queue type (enum hctx_type) onto a distinct
 *        set of hardware queues.
 */
struct blk_mq_queue_map {
        unsigned int *mq_map;
        unsigned int nr_queues;
        unsigned int queue_offset;
};

/**
 * enum hctx_type - Type of hardware queue
 * @HCTX_TYPE_DEFAULT:        All I/O not otherwise accounted for.
 * @HCTX_TYPE_READ:        Just for READ I/O.
 * @HCTX_TYPE_POLL:        Polled I/O of any kind.
 * @HCTX_MAX_TYPES:        Number of types of hctx.
 */
enum hctx_type {
        HCTX_TYPE_DEFAULT,
        HCTX_TYPE_READ,
        HCTX_TYPE_POLL,

        HCTX_MAX_TYPES,
};

/**
 * struct blk_mq_tag_set - tag set that can be shared between request queues
 * @ops:           Pointers to functions that implement block driver behavior.
 * @map:           One or more ctx -> hctx mappings. One map exists for each
 *                   hardware queue type (enum hctx_type) that the driver wishes
 *                   to support. There are no restrictions on maps being of the
 *                   same size, and it's perfectly legal to share maps between
 *                   types.
 * @nr_maps:           Number of elements in the @map array. A number in the range
 *                   [1, HCTX_MAX_TYPES].
 * @nr_hw_queues:  Number of hardware queues supported by the block driver that
 *                   owns this data structure.
 * @queue_depth:   Number of tags per hardware queue, reserved tags included.
 * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
 *                   allocations.
 * @cmd_size:           Number of additional bytes to allocate per request. The block
 *                   driver owns these additional bytes.
 * @numa_node:           NUMA node the storage adapter has been connected to.
 * @timeout:           Request processing timeout in jiffies.
 * @flags:           Zero or more BLK_MQ_F_* flags.
 * @driver_data:   Pointer to data owned by the block driver that created this
 *                   tag set.
 * @tags:           Tag sets. One tag set per hardware queue. Has @nr_hw_queues
 *                   elements.
 * @shared_tags:
 *                   Shared set of tags. Has @nr_hw_queues elements. If set,
 *                   shared by all @tags.
 * @tag_list_lock: Serializes tag_list accesses.
 * @tag_list:           List of the request queues that use this tag set. See also
 *                   request_queue.tag_set_list.
 * @srcu:           Use as lock when type of the request queue is blocking
 *                   (BLK_MQ_F_BLOCKING).
 */
struct blk_mq_tag_set {
        const struct blk_mq_ops        *ops;
        struct blk_mq_queue_map        map[HCTX_MAX_TYPES];
        unsigned int                nr_maps;
        unsigned int                nr_hw_queues;
        unsigned int                queue_depth;
        unsigned int                reserved_tags;
        unsigned int                cmd_size;
        int                        numa_node;
        unsigned int                timeout;
        unsigned int                flags;
        void                        *driver_data;

        struct blk_mq_tags        **tags;

        struct blk_mq_tags        *shared_tags;

        struct mutex                tag_list_lock;
        struct list_head        tag_list;
        struct srcu_struct        *srcu;
};

/**
 * struct blk_mq_queue_data - Data about a request inserted in a queue
 *
 * @rq:   Request pointer.
 * @last: If it is the last request in the queue.
 */
struct blk_mq_queue_data {
        struct request *rq;
        bool last;
};

typedef bool (busy_tag_iter_fn)(struct request *, void *);

/**
 * struct blk_mq_ops - Callback functions that implements block driver
 * behaviour.
 */
struct blk_mq_ops {
        /**
         * @queue_rq: Queue a new request from block IO.
         */
        blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
                                 const struct blk_mq_queue_data *);

        /**
         * @commit_rqs: If a driver uses bd->last to judge when to submit
         * requests to hardware, it must define this function. In case of errors
         * that make us stop issuing further requests, this hook serves the
         * purpose of kicking the hardware (which the last request otherwise
         * would have done).
         */
        void (*commit_rqs)(struct blk_mq_hw_ctx *);

        /**
         * @queue_rqs: Queue a list of new requests. Driver is guaranteed
         * that each request belongs to the same queue. If the driver doesn't
         * empty the @rqlist completely, then the rest will be queued
         * individually by the block layer upon return.
         */
        void (*queue_rqs)(struct request **rqlist);

        /**
         * @get_budget: Reserve budget before queue request, once .queue_rq is
         * run, it is driver's responsibility to release the
         * reserved budget. Also we have to handle failure case
         * of .get_budget for avoiding I/O deadlock.
         */
        int (*get_budget)(struct request_queue *);

        /**
         * @put_budget: Release the reserved budget.
         */
        void (*put_budget)(struct request_queue *, int);

        /**
         * @set_rq_budget_token: store rq's budget token
         */
        void (*set_rq_budget_token)(struct request *, int);
        /**
         * @get_rq_budget_token: retrieve rq's budget token
         */
        int (*get_rq_budget_token)(struct request *);

        /**
         * @timeout: Called on request timeout.
         */
        enum blk_eh_timer_return (*timeout)(struct request *);

        /**
         * @poll: Called to poll for completion of a specific tag.
         */
        int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *);

        /**
         * @complete: Mark the request as complete.
         */
        void (*complete)(struct request *);

        /**
         * @init_hctx: Called when the block layer side of a hardware queue has
         * been set up, allowing the driver to allocate/init matching
         * structures.
         */
        int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
        /**
         * @exit_hctx: Ditto for exit/teardown.
         */
        void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);

        /**
         * @init_request: Called for every command allocated by the block layer
         * to allow the driver to set up driver specific data.
         *
         * Tag greater than or equal to queue_depth is for setting up
         * flush request.
         */
        int (*init_request)(struct blk_mq_tag_set *set, struct request *,
                            unsigned int, unsigned int);
        /**
         * @exit_request: Ditto for exit/teardown.
         */
        void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
                             unsigned int);

        /**
         * @cleanup_rq: Called before freeing one request which isn't completed
         * yet, and usually for freeing the driver private data.
         */
        void (*cleanup_rq)(struct request *);

        /**
         * @busy: If set, returns whether or not this queue currently is busy.
         */
        bool (*busy)(struct request_queue *);

        /**
         * @map_queues: This allows drivers specify their own queue mapping by
         * overriding the setup-time function that builds the mq_map.
         */
        void (*map_queues)(struct blk_mq_tag_set *set);

#ifdef CONFIG_BLK_DEBUG_FS
        /**
         * @show_rq: Used by the debugfs implementation to show driver-specific
         * information about a request.
         */
        void (*show_rq)(struct seq_file *m, struct request *rq);
#endif
};

enum {
        BLK_MQ_F_SHOULD_MERGE        = 1 << 0,
        BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
        /*
         * Set when this device requires underlying blk-mq device for
         * completing IO:
         */
        BLK_MQ_F_STACKING        = 1 << 2,
        BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
        BLK_MQ_F_BLOCKING        = 1 << 5,
        /* Do not allow an I/O scheduler to be configured. */
        BLK_MQ_F_NO_SCHED        = 1 << 6,
        /*
         * Select 'none' during queue registration in case of a single hwq
         * or shared hwqs instead of 'mq-deadline'.
         */
        BLK_MQ_F_NO_SCHED_BY_DEFAULT        = 1 << 7,
        BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
        BLK_MQ_F_ALLOC_POLICY_BITS = 1,

        BLK_MQ_S_STOPPED        = 0,
        BLK_MQ_S_TAG_ACTIVE        = 1,
        BLK_MQ_S_SCHED_RESTART        = 2,

        /* hw queue is inactive after all its CPUs become offline */
        BLK_MQ_S_INACTIVE        = 3,

        BLK_MQ_MAX_DEPTH        = 10240,

        BLK_MQ_CPU_WORK_BATCH        = 8,
};
#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
        ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
                ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
        ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
                << BLK_MQ_F_ALLOC_POLICY_START_BIT)

#define BLK_MQ_NO_HCTX_IDX        (-1U)

struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
                struct queue_limits *lim, void *queuedata,
                struct lock_class_key *lkclass);
#define blk_mq_alloc_disk(set, lim, queuedata)                                \
({                                                                        \
        static struct lock_class_key __key;                                \
                                                                        \
        __blk_mq_alloc_disk(set, lim, queuedata, &__key);                \
})
struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
                struct lock_class_key *lkclass);
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
                struct queue_limits *lim, void *queuedata);
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
                struct request_queue *q);
void blk_mq_destroy_queue(struct request_queue *);

int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
                const struct blk_mq_ops *ops, unsigned int queue_depth,
                unsigned int set_flags);
void blk_mq_free_tag_set(struct blk_mq_tag_set *set);

void blk_mq_free_request(struct request *rq);
int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
                unsigned int poll_flags);

bool blk_mq_queue_inflight(struct request_queue *q);

enum {
        /* return when out of requests */
        BLK_MQ_REQ_NOWAIT        = (__force blk_mq_req_flags_t)(1 << 0),
        /* allocate from reserved pool */
        BLK_MQ_REQ_RESERVED        = (__force blk_mq_req_flags_t)(1 << 1),
        /* set RQF_PM */
        BLK_MQ_REQ_PM                = (__force blk_mq_req_flags_t)(1 << 2),
};

struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
                blk_mq_req_flags_t flags);
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
                blk_opf_t opf, blk_mq_req_flags_t flags,
                unsigned int hctx_idx);

/*
 * Tag address space map.
 */
struct blk_mq_tags {
        unsigned int nr_tags;
        unsigned int nr_reserved_tags;
        unsigned int active_queues;

        struct sbitmap_queue bitmap_tags;
        struct sbitmap_queue breserved_tags;

        struct request **rqs;
        struct request **static_rqs;
        struct list_head page_list;

        /*
         * used to clear request reference in rqs[] before freeing one
         * request pool
         */
        spinlock_t lock;
};

static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
                                               unsigned int tag)
{
        if (tag < tags->nr_tags) {
                prefetch(tags->rqs[tag]);
                return tags->rqs[tag];
        }

        return NULL;
}

enum {
        BLK_MQ_UNIQUE_TAG_BITS = 16,
        BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
};

u32 blk_mq_unique_tag(struct request *rq);

static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
{
        return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
}

static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
{
        return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
}

/**
 * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
 * @rq: target request.
 */
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
        return READ_ONCE(rq->state);
}

static inline int blk_mq_request_started(struct request *rq)
{
        return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
}

static inline int blk_mq_request_completed(struct request *rq)
{
        return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
}

/*
 * 
 * Set the state to complete when completing a request from inside ->queue_rq.
 * This is used by drivers that want to ensure special complete actions that
 * need access to the request are called on failure, e.g. by nvme for
 * multipathing.
 */
static inline void blk_mq_set_request_complete(struct request *rq)
{
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
}

/*
 * Complete the request directly instead of deferring it to softirq or
 * completing it another CPU. Useful in preemptible instead of an interrupt.
 */
static inline void blk_mq_complete_request_direct(struct request *rq,
                   void (*complete)(struct request *rq))
{
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
        complete(rq);
}

void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, blk_status_t error);
void __blk_mq_end_request(struct request *rq, blk_status_t error);
void blk_mq_end_request_batch(struct io_comp_batch *ib);

/*
 * Only need start/end time stamping if we have iostat or
 * blk stats enabled, or using an IO scheduler.
 */
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
        /*
         * passthrough io doesn't use iostat accounting, cgroup stats
         * and io scheduler functionalities.
         */
        if (blk_rq_is_passthrough(rq))
                return false;
        return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}

static inline bool blk_mq_is_reserved_rq(struct request *rq)
{
        return rq->rq_flags & RQF_RESV;
}

/*
 * Batched completions only work when there is no I/O error and no special
 * ->end_io handler.
 */
static inline bool blk_mq_add_to_batch(struct request *req,
                                       struct io_comp_batch *iob, int ioerror,
                                       void (*complete)(struct io_comp_batch *))
{
        /*
         * blk_mq_end_request_batch() can't end request allocated from
         * sched tags
         */
        if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror ||
                        (req->end_io && !blk_rq_is_passthrough(req)))
                return false;

        if (!iob->complete)
                iob->complete = complete;
        else if (iob->complete != complete)
                return false;
        iob->need_ts |= blk_mq_need_time_stamp(req);
        rq_list_add(&iob->req_list, req);
        return true;
}

void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
void blk_mq_complete_request(struct request *rq);
bool blk_mq_complete_request_remote(struct request *rq);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_stop_hw_queues(struct request_queue *q);
void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set);
void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set);
void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
                busy_tag_iter_fn *fn, void *priv);
void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_unfreeze_queue(struct request_queue *q);
void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
                                     unsigned long timeout);

void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);

void blk_mq_quiesce_queue_nowait(struct request_queue *q);

unsigned int blk_mq_rq_cpu(struct request *rq);

bool __blk_should_fake_timeout(struct request_queue *q);
static inline bool blk_should_fake_timeout(struct request_queue *q)
{
        if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
            test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
                return __blk_should_fake_timeout(q);
        return false;
}

/**
 * blk_mq_rq_from_pdu - cast a PDU to a request
 * @pdu: the PDU (Protocol Data Unit) to be casted
 *
 * Return: request
 *
 * Driver command data is immediately after the request. So subtract request
 * size to get back to the original request.
 */
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
{
        return pdu - sizeof(struct request);
}

/**
 * blk_mq_rq_to_pdu - cast a request to a PDU
 * @rq: the request to be casted
 *
 * Return: pointer to the PDU
 *
 * Driver command data is immediately after the request. So add request to get
 * the PDU.
 */
static inline void *blk_mq_rq_to_pdu(struct request *rq)
{
        return rq + 1;
}

#define queue_for_each_hw_ctx(q, hctx, i)                                \
        xa_for_each(&(q)->hctx_table, (i), (hctx))

#define hctx_for_each_ctx(hctx, ctx, i)                                        \
        for ((i) = 0; (i) < (hctx)->nr_ctx &&                                \
             ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)

static inline void blk_mq_cleanup_rq(struct request *rq)
{
        if (rq->q->mq_ops->cleanup_rq)
                rq->q->mq_ops->cleanup_rq(rq);
}

static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
                unsigned int nr_segs)
{
        rq->nr_phys_segments = nr_segs;
        rq->__data_len = bio->bi_iter.bi_size;
        rq->bio = rq->biotail = bio;
        rq->ioprio = bio_prio(bio);
}

void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
                struct lock_class_key *key);

static inline bool rq_is_sync(struct request *rq)
{
        return op_is_sync(rq->cmd_flags);
}

void blk_rq_init(struct request_queue *q, struct request *rq);
int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
                struct bio_set *bs, gfp_t gfp_mask,
                int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
void blk_rq_unprep_clone(struct request *rq);
blk_status_t blk_insert_cloned_request(struct request *rq);

struct rq_map_data {
        struct page **pages;
        unsigned long offset;
        unsigned short page_order;
        unsigned short nr_entries;
        bool null_mapped;
        bool from_user;
};

int blk_rq_map_user(struct request_queue *, struct request *,
                struct rq_map_data *, void __user *, unsigned long, gfp_t);
int blk_rq_map_user_io(struct request *, struct rq_map_data *,
                void __user *, unsigned long, gfp_t, bool, int, bool, int);
int blk_rq_map_user_iov(struct request_queue *, struct request *,
                struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *);
int blk_rq_map_kern(struct request_queue *, struct request *, void *,
                unsigned int, gfp_t);
int blk_rq_append_bio(struct request *rq, struct bio *bio);
void blk_execute_rq_nowait(struct request *rq, bool at_head);
blk_status_t blk_execute_rq(struct request *rq, bool at_head);
bool blk_rq_is_poll(struct request *rq);

struct req_iterator {
        struct bvec_iter iter;
        struct bio *bio;
};

#define __rq_for_each_bio(_bio, rq)        \
        if ((rq->bio))                        \
                for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)

#define rq_for_each_segment(bvl, _rq, _iter)                        \
        __rq_for_each_bio(_iter.bio, _rq)                        \
                bio_for_each_segment(bvl, _iter.bio, _iter.iter)

#define rq_for_each_bvec(bvl, _rq, _iter)                        \
        __rq_for_each_bio(_iter.bio, _rq)                        \
                bio_for_each_bvec(bvl, _iter.bio, _iter.iter)

#define rq_iter_last(bvec, _iter)                                \
                (_iter.bio->bi_next == NULL &&                        \
                 bio_iter_last(bvec, _iter.iter))

/*
 * blk_rq_pos()                        : the current sector
 * blk_rq_bytes()                : bytes left in the entire request
 * blk_rq_cur_bytes()                : bytes left in the current segment
 * blk_rq_sectors()                : sectors left in the entire request
 * blk_rq_cur_sectors()                : sectors left in the current segment
 * blk_rq_stats_sectors()        : sectors of the entire request used for stats
 */
static inline sector_t blk_rq_pos(const struct request *rq)
{
        return rq->__sector;
}

static inline unsigned int blk_rq_bytes(const struct request *rq)
{
        return rq->__data_len;
}

static inline int blk_rq_cur_bytes(const struct request *rq)
{
        if (!rq->bio)
                return 0;
        if (!bio_has_data(rq->bio))        /* dataless requests such as discard */
                return rq->bio->bi_iter.bi_size;
        return bio_iovec(rq->bio).bv_len;
}

static inline unsigned int blk_rq_sectors(const struct request *rq)
{
        return blk_rq_bytes(rq) >> SECTOR_SHIFT;
}

static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
{
        return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
}

static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
{
        return rq->stats_sectors;
}

/*
 * Some commands like WRITE SAME have a payload or data transfer size which
 * is different from the size of the request.  Any driver that supports such
 * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
 * calculate the data transfer size.
 */
static inline unsigned int blk_rq_payload_bytes(struct request *rq)
{
        if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
                return rq->special_vec.bv_len;
        return blk_rq_bytes(rq);
}

/*
 * Return the first full biovec in the request.  The caller needs to check that
 * there are any bvecs before calling this helper.
 */
static inline struct bio_vec req_bvec(struct request *rq)
{
        if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
                return rq->special_vec;
        return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
}

static inline unsigned int blk_rq_count_bios(struct request *rq)
{
        unsigned int nr_bios = 0;
        struct bio *bio;

        __rq_for_each_bio(bio, rq)
                nr_bios++;

        return nr_bios;
}

void blk_steal_bios(struct bio_list *list, struct request *rq);

/*
 * Request completion related functions.
 *
 * blk_update_request() completes given number of bytes and updates
 * the request without completing it.
 */
bool blk_update_request(struct request *rq, blk_status_t error,
                               unsigned int nr_bytes);
void blk_abort_request(struct request *);

/*
 * Number of physical segments as sent to the device.
 *
 * Normally this is the number of discontiguous data segments sent by the
 * submitter.  But for data-less command like discard we might have no
 * actual data segments submitted, but the driver might have to add it's
 * own special payload.  In that case we still return 1 here so that this
 * special payload will be mapped.
 */
static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
{
        if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
                return 1;
        return rq->nr_phys_segments;
}

/*
 * Number of discard segments (or ranges) the driver needs to fill in.
 * Each discard bio merged into a request is counted as one segment.
 */
static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
{
        return max_t(unsigned short, rq->nr_phys_segments, 1);
}

int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
                struct scatterlist *sglist, struct scatterlist **last_sg);
static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
                struct scatterlist *sglist)
{
        struct scatterlist *last_sg = NULL;

        return __blk_rq_map_sg(q, rq, sglist, &last_sg);
}
void blk_dump_rq_flags(struct request *, char *);

#ifdef CONFIG_BLK_DEV_ZONED
static inline unsigned int blk_rq_zone_no(struct request *rq)
{
        return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
}

static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
{
        return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
}

/**
 * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization.
 * @rq: Request to examine.
 *
 * Note: REQ_OP_ZONE_APPEND requests do not require serialization.
 */
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
{
        return op_needs_zoned_write_locking(req_op(rq)) &&
                blk_rq_zone_is_seq(rq);
}

bool blk_req_needs_zone_write_lock(struct request *rq);
bool blk_req_zone_write_trylock(struct request *rq);
void __blk_req_zone_write_lock(struct request *rq);
void __blk_req_zone_write_unlock(struct request *rq);

static inline void blk_req_zone_write_lock(struct request *rq)
{
        if (blk_req_needs_zone_write_lock(rq))
                __blk_req_zone_write_lock(rq);
}

static inline void blk_req_zone_write_unlock(struct request *rq)
{
        if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
                __blk_req_zone_write_unlock(rq);
}

static inline bool blk_req_zone_is_write_locked(struct request *rq)
{
        return rq->q->disk->seq_zones_wlock &&
                test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock);
}

static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
        if (!blk_req_needs_zone_write_lock(rq))
                return true;
        return !blk_req_zone_is_write_locked(rq);
}
#else /* CONFIG_BLK_DEV_ZONED */
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
{
        return false;
}

static inline bool blk_req_needs_zone_write_lock(struct request *rq)
{
        return false;
}

static inline void blk_req_zone_write_lock(struct request *rq)
{
}

static inline void blk_req_zone_write_unlock(struct request *rq)
{
}
static inline bool blk_req_zone_is_write_locked(struct request *rq)
{
        return false;
}

static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
{
        return true;
}
#endif /* CONFIG_BLK_DEV_ZONED */

#endif /* BLK_MQ_H */





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 









    1 


    1 

    1 


    1 
    1 
    1 
    1 
    1 







    1 

    1 





    1 




































































































    2 


    2 
    2 

    2 
    2 


    2 
    1 

















    2 


























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Abstract layer for MIDI v1.0 stream
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <sound/core.h>
#include <linux/major.h>
#include <linux/init.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/wait.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/mm.h>
#include <linux/nospec.h>
#include <sound/rawmidi.h>
#include <sound/info.h>
#include <sound/control.h>
#include <sound/minors.h>
#include <sound/initval.h>
#include <sound/ump.h>

MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("Midlevel RawMidi code for ALSA.");
MODULE_LICENSE("GPL");

#ifdef CONFIG_SND_OSSEMUL
static int midi_map[SNDRV_CARDS];
static int amidi_map[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1};
module_param_array(midi_map, int, NULL, 0444);
MODULE_PARM_DESC(midi_map, "Raw MIDI device number assigned to 1st OSS device.");
module_param_array(amidi_map, int, NULL, 0444);
MODULE_PARM_DESC(amidi_map, "Raw MIDI device number assigned to 2nd OSS device.");
#endif /* CONFIG_SND_OSSEMUL */

static int snd_rawmidi_dev_free(struct snd_device *device);
static int snd_rawmidi_dev_register(struct snd_device *device);
static int snd_rawmidi_dev_disconnect(struct snd_device *device);

static LIST_HEAD(snd_rawmidi_devices);
static DEFINE_MUTEX(register_mutex);

#define rmidi_err(rmidi, fmt, args...) \
        dev_err((rmidi)->dev, fmt, ##args)
#define rmidi_warn(rmidi, fmt, args...) \
        dev_warn((rmidi)->dev, fmt, ##args)
#define rmidi_dbg(rmidi, fmt, args...) \
        dev_dbg((rmidi)->dev, fmt, ##args)

struct snd_rawmidi_status32 {
        s32 stream;
        s32 tstamp_sec;                        /* Timestamp */
        s32 tstamp_nsec;
        u32 avail;                        /* available bytes */
        u32 xruns;                        /* count of overruns since last status (in bytes) */
        unsigned char reserved[16];        /* reserved for future use */
};

#define SNDRV_RAWMIDI_IOCTL_STATUS32        _IOWR('W', 0x20, struct snd_rawmidi_status32)

struct snd_rawmidi_status64 {
        int stream;
        u8 rsvd[4];                        /* alignment */
        s64 tstamp_sec;                        /* Timestamp */
        s64 tstamp_nsec;
        size_t avail;                        /* available bytes */
        size_t xruns;                        /* count of overruns since last status (in bytes) */
        unsigned char reserved[16];        /* reserved for future use */
};

#define SNDRV_RAWMIDI_IOCTL_STATUS64        _IOWR('W', 0x20, struct snd_rawmidi_status64)

#define rawmidi_is_ump(rmidi) \
        (IS_ENABLED(CONFIG_SND_UMP) && ((rmidi)->info_flags & SNDRV_RAWMIDI_INFO_UMP))

static struct snd_rawmidi *snd_rawmidi_search(struct snd_card *card, int device)
{
        struct snd_rawmidi *rawmidi;

        list_for_each_entry(rawmidi, &snd_rawmidi_devices, list)
                if (rawmidi->card == card && rawmidi->device == device)
                        return rawmidi;
        return NULL;
}

static inline unsigned short snd_rawmidi_file_flags(struct file *file)
{
        switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
        case FMODE_WRITE:
                return SNDRV_RAWMIDI_LFLG_OUTPUT;
        case FMODE_READ:
                return SNDRV_RAWMIDI_LFLG_INPUT;
        default:
                return SNDRV_RAWMIDI_LFLG_OPEN;
        }
}

static inline bool __snd_rawmidi_ready(struct snd_rawmidi_runtime *runtime)
{
        return runtime->avail >= runtime->avail_min;
}

static bool snd_rawmidi_ready(struct snd_rawmidi_substream *substream)
{
        guard(spinlock_irqsave)(&substream->lock);
        return __snd_rawmidi_ready(substream->runtime);
}

static inline int snd_rawmidi_ready_append(struct snd_rawmidi_substream *substream,
                                           size_t count)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        return runtime->avail >= runtime->avail_min &&
               (!substream->append || runtime->avail >= count);
}

static void snd_rawmidi_input_event_work(struct work_struct *work)
{
        struct snd_rawmidi_runtime *runtime =
                container_of(work, struct snd_rawmidi_runtime, event_work);

        if (runtime->event)
                runtime->event(runtime->substream);
}

/* buffer refcount management: call with substream->lock held */
static inline void snd_rawmidi_buffer_ref(struct snd_rawmidi_runtime *runtime)
{
        runtime->buffer_ref++;
}

static inline void snd_rawmidi_buffer_unref(struct snd_rawmidi_runtime *runtime)
{
        runtime->buffer_ref--;
}

static void snd_rawmidi_buffer_ref_sync(struct snd_rawmidi_substream *substream)
{
        int loop = HZ;

        spin_lock_irq(&substream->lock);
        while (substream->runtime->buffer_ref) {
                spin_unlock_irq(&substream->lock);
                if (!--loop) {
                        rmidi_err(substream->rmidi, "Buffer ref sync timeout\n");
                        return;
                }
                schedule_timeout_uninterruptible(1);
                spin_lock_irq(&substream->lock);
        }
        spin_unlock_irq(&substream->lock);
}

static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream)
{
        struct snd_rawmidi_runtime *runtime;

        runtime = kzalloc(sizeof(*runtime), GFP_KERNEL);
        if (!runtime)
                return -ENOMEM;
        runtime->substream = substream;
        init_waitqueue_head(&runtime->sleep);
        INIT_WORK(&runtime->event_work, snd_rawmidi_input_event_work);
        runtime->event = NULL;
        runtime->buffer_size = PAGE_SIZE;
        runtime->avail_min = 1;
        if (substream->stream == SNDRV_RAWMIDI_STREAM_INPUT)
                runtime->avail = 0;
        else
                runtime->avail = runtime->buffer_size;
        runtime->buffer = kvzalloc(runtime->buffer_size, GFP_KERNEL);
        if (!runtime->buffer) {
                kfree(runtime);
                return -ENOMEM;
        }
        runtime->appl_ptr = runtime->hw_ptr = 0;
        substream->runtime = runtime;
        if (rawmidi_is_ump(substream->rmidi))
                runtime->align = 3;
        return 0;
}

/* get the current alignment (either 0 or 3) */
static inline int get_align(struct snd_rawmidi_runtime *runtime)
{
        if (IS_ENABLED(CONFIG_SND_UMP))
                return runtime->align;
        else
                return 0;
}

/* get the trimmed size with the current alignment */
#define get_aligned_size(runtime, size) ((size) & ~get_align(runtime))

static int snd_rawmidi_runtime_free(struct snd_rawmidi_substream *substream)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        kvfree(runtime->buffer);
        kfree(runtime);
        substream->runtime = NULL;
        return 0;
}

static inline void snd_rawmidi_output_trigger(struct snd_rawmidi_substream *substream, int up)
{
        if (!substream->opened)
                return;
        substream->ops->trigger(substream, up);
}

static void snd_rawmidi_input_trigger(struct snd_rawmidi_substream *substream, int up)
{
        if (!substream->opened)
                return;
        substream->ops->trigger(substream, up);
        if (!up)
                cancel_work_sync(&substream->runtime->event_work);
}

static void __reset_runtime_ptrs(struct snd_rawmidi_runtime *runtime,
                                 bool is_input)
{
        runtime->drain = 0;
        runtime->appl_ptr = runtime->hw_ptr = 0;
        runtime->avail = is_input ? 0 : runtime->buffer_size;
}

static void reset_runtime_ptrs(struct snd_rawmidi_substream *substream,
                               bool is_input)
{
        guard(spinlock_irqsave)(&substream->lock);
        if (substream->opened && substream->runtime)
                __reset_runtime_ptrs(substream->runtime, is_input);
}

int snd_rawmidi_drop_output(struct snd_rawmidi_substream *substream)
{
        snd_rawmidi_output_trigger(substream, 0);
        reset_runtime_ptrs(substream, false);
        return 0;
}
EXPORT_SYMBOL(snd_rawmidi_drop_output);

int snd_rawmidi_drain_output(struct snd_rawmidi_substream *substream)
{
        int err = 0;
        long timeout;
        struct snd_rawmidi_runtime *runtime;

        scoped_guard(spinlock_irq, &substream->lock) {
                runtime = substream->runtime;
                if (!substream->opened || !runtime || !runtime->buffer)
                        return -EINVAL;
                snd_rawmidi_buffer_ref(runtime);
                runtime->drain = 1;
        }

        timeout = wait_event_interruptible_timeout(runtime->sleep,
                                (runtime->avail >= runtime->buffer_size),
                                10*HZ);

        scoped_guard(spinlock_irq, &substream->lock) {
                if (signal_pending(current))
                        err = -ERESTARTSYS;
                if (runtime->avail < runtime->buffer_size && !timeout) {
                        rmidi_warn(substream->rmidi,
                                   "rawmidi drain error (avail = %li, buffer_size = %li)\n",
                                   (long)runtime->avail, (long)runtime->buffer_size);
                        err = -EIO;
                }
                runtime->drain = 0;
        }

        if (err != -ERESTARTSYS) {
                /* we need wait a while to make sure that Tx FIFOs are empty */
                if (substream->ops->drain)
                        substream->ops->drain(substream);
                else
                        msleep(50);
                snd_rawmidi_drop_output(substream);
        }

        scoped_guard(spinlock_irq, &substream->lock)
                snd_rawmidi_buffer_unref(runtime);

        return err;
}
EXPORT_SYMBOL(snd_rawmidi_drain_output);

int snd_rawmidi_drain_input(struct snd_rawmidi_substream *substream)
{
        snd_rawmidi_input_trigger(substream, 0);
        reset_runtime_ptrs(substream, true);
        return 0;
}
EXPORT_SYMBOL(snd_rawmidi_drain_input);

/* look for an available substream for the given stream direction;
 * if a specific subdevice is given, try to assign it
 */
static int assign_substream(struct snd_rawmidi *rmidi, int subdevice,
                            int stream, int mode,
                            struct snd_rawmidi_substream **sub_ret)
{
        struct snd_rawmidi_substream *substream;
        struct snd_rawmidi_str *s = &rmidi->streams[stream];
        static const unsigned int info_flags[2] = {
                [SNDRV_RAWMIDI_STREAM_OUTPUT] = SNDRV_RAWMIDI_INFO_OUTPUT,
                [SNDRV_RAWMIDI_STREAM_INPUT] = SNDRV_RAWMIDI_INFO_INPUT,
        };

        if (!(rmidi->info_flags & info_flags[stream]))
                return -ENXIO;
        if (subdevice >= 0 && subdevice >= s->substream_count)
                return -ENODEV;

        list_for_each_entry(substream, &s->substreams, list) {
                if (substream->opened) {
                        if (stream == SNDRV_RAWMIDI_STREAM_INPUT ||
                            !(mode & SNDRV_RAWMIDI_LFLG_APPEND) ||
                            !substream->append)
                                continue;
                }
                if (subdevice < 0 || subdevice == substream->number) {
                        *sub_ret = substream;
                        return 0;
                }
        }
        return -EAGAIN;
}

/* open and do ref-counting for the given substream */
static int open_substream(struct snd_rawmidi *rmidi,
                          struct snd_rawmidi_substream *substream,
                          int mode)
{
        int err;

        if (substream->use_count == 0) {
                err = snd_rawmidi_runtime_create(substream);
                if (err < 0)
                        return err;
                err = substream->ops->open(substream);
                if (err < 0) {
                        snd_rawmidi_runtime_free(substream);
                        return err;
                }
                guard(spinlock_irq)(&substream->lock);
                substream->opened = 1;
                substream->active_sensing = 0;
                if (mode & SNDRV_RAWMIDI_LFLG_APPEND)
                        substream->append = 1;
                substream->pid = get_pid(task_pid(current));
                rmidi->streams[substream->stream].substream_opened++;
        }
        substream->use_count++;
        return 0;
}

static void close_substream(struct snd_rawmidi *rmidi,
                            struct snd_rawmidi_substream *substream,
                            int cleanup);

static int rawmidi_open_priv(struct snd_rawmidi *rmidi, int subdevice, int mode,
                             struct snd_rawmidi_file *rfile)
{
        struct snd_rawmidi_substream *sinput = NULL, *soutput = NULL;
        int err;

        rfile->input = rfile->output = NULL;
        if (mode & SNDRV_RAWMIDI_LFLG_INPUT) {
                err = assign_substream(rmidi, subdevice,
                                       SNDRV_RAWMIDI_STREAM_INPUT,
                                       mode, &sinput);
                if (err < 0)
                        return err;
        }
        if (mode & SNDRV_RAWMIDI_LFLG_OUTPUT) {
                err = assign_substream(rmidi, subdevice,
                                       SNDRV_RAWMIDI_STREAM_OUTPUT,
                                       mode, &soutput);
                if (err < 0)
                        return err;
        }

        if (sinput) {
                err = open_substream(rmidi, sinput, mode);
                if (err < 0)
                        return err;
        }
        if (soutput) {
                err = open_substream(rmidi, soutput, mode);
                if (err < 0) {
                        if (sinput)
                                close_substream(rmidi, sinput, 0);
                        return err;
                }
        }

        rfile->rmidi = rmidi;
        rfile->input = sinput;
        rfile->output = soutput;
        return 0;
}

/* called from sound/core/seq/seq_midi.c */
int snd_rawmidi_kernel_open(struct snd_rawmidi *rmidi, int subdevice,
                            int mode, struct snd_rawmidi_file *rfile)
{
        int err;

        if (snd_BUG_ON(!rfile))
                return -EINVAL;
        if (!try_module_get(rmidi->card->module))
                return -ENXIO;

        guard(mutex)(&rmidi->open_mutex);
        err = rawmidi_open_priv(rmidi, subdevice, mode, rfile);
        if (err < 0)
                module_put(rmidi->card->module);
        return err;
}
EXPORT_SYMBOL(snd_rawmidi_kernel_open);

static int snd_rawmidi_open(struct inode *inode, struct file *file)
{
        int maj = imajor(inode);
        struct snd_card *card;
        int subdevice;
        unsigned short fflags;
        int err;
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_file *rawmidi_file = NULL;
        wait_queue_entry_t wait;

        if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK))
                return -EINVAL;                /* invalid combination */

        err = stream_open(inode, file);
        if (err < 0)
                return err;

        if (maj == snd_major) {
                rmidi = snd_lookup_minor_data(iminor(inode),
                                              SNDRV_DEVICE_TYPE_RAWMIDI);
#ifdef CONFIG_SND_OSSEMUL
        } else if (maj == SOUND_MAJOR) {
                rmidi = snd_lookup_oss_minor_data(iminor(inode),
                                                  SNDRV_OSS_DEVICE_TYPE_MIDI);
#endif
        } else
                return -ENXIO;

        if (rmidi == NULL)
                return -ENODEV;

        if (!try_module_get(rmidi->card->module)) {
                snd_card_unref(rmidi->card);
                return -ENXIO;
        }

        mutex_lock(&rmidi->open_mutex);
        card = rmidi->card;
        err = snd_card_file_add(card, file);
        if (err < 0)
                goto __error_card;
        fflags = snd_rawmidi_file_flags(file);
        if ((file->f_flags & O_APPEND) || maj == SOUND_MAJOR) /* OSS emul? */
                fflags |= SNDRV_RAWMIDI_LFLG_APPEND;
        rawmidi_file = kmalloc(sizeof(*rawmidi_file), GFP_KERNEL);
        if (rawmidi_file == NULL) {
                err = -ENOMEM;
                goto __error;
        }
        rawmidi_file->user_pversion = 0;
        init_waitqueue_entry(&wait, current);
        add_wait_queue(&rmidi->open_wait, &wait);
        while (1) {
                subdevice = snd_ctl_get_preferred_subdevice(card, SND_CTL_SUBDEV_RAWMIDI);
                err = rawmidi_open_priv(rmidi, subdevice, fflags, rawmidi_file);
                if (err >= 0)
                        break;
                if (err == -EAGAIN) {
                        if (file->f_flags & O_NONBLOCK) {
                                err = -EBUSY;
                                break;
                        }
                } else
                        break;
                set_current_state(TASK_INTERRUPTIBLE);
                mutex_unlock(&rmidi->open_mutex);
                schedule();
                mutex_lock(&rmidi->open_mutex);
                if (rmidi->card->shutdown) {
                        err = -ENODEV;
                        break;
                }
                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        break;
                }
        }
        remove_wait_queue(&rmidi->open_wait, &wait);
        if (err < 0) {
                kfree(rawmidi_file);
                goto __error;
        }
#ifdef CONFIG_SND_OSSEMUL
        if (rawmidi_file->input && rawmidi_file->input->runtime)
                rawmidi_file->input->runtime->oss = (maj == SOUND_MAJOR);
        if (rawmidi_file->output && rawmidi_file->output->runtime)
                rawmidi_file->output->runtime->oss = (maj == SOUND_MAJOR);
#endif
        file->private_data = rawmidi_file;
        mutex_unlock(&rmidi->open_mutex);
        snd_card_unref(rmidi->card);
        return 0;

 __error:
        snd_card_file_remove(card, file);
 __error_card:
        mutex_unlock(&rmidi->open_mutex);
        module_put(rmidi->card->module);
        snd_card_unref(rmidi->card);
        return err;
}

static void close_substream(struct snd_rawmidi *rmidi,
                            struct snd_rawmidi_substream *substream,
                            int cleanup)
{
        if (--substream->use_count)
                return;

        if (cleanup) {
                if (substream->stream == SNDRV_RAWMIDI_STREAM_INPUT)
                        snd_rawmidi_input_trigger(substream, 0);
                else {
                        if (substream->active_sensing) {
                                unsigned char buf = 0xfe;
                                /* sending single active sensing message
                                 * to shut the device up
                                 */
                                snd_rawmidi_kernel_write(substream, &buf, 1);
                        }
                        if (snd_rawmidi_drain_output(substream) == -ERESTARTSYS)
                                snd_rawmidi_output_trigger(substream, 0);
                }
                snd_rawmidi_buffer_ref_sync(substream);
        }
        scoped_guard(spinlock_irq, &substream->lock) {
                substream->opened = 0;
                substream->append = 0;
        }
        substream->ops->close(substream);
        if (substream->runtime->private_free)
                substream->runtime->private_free(substream);
        snd_rawmidi_runtime_free(substream);
        put_pid(substream->pid);
        substream->pid = NULL;
        rmidi->streams[substream->stream].substream_opened--;
}

static void rawmidi_release_priv(struct snd_rawmidi_file *rfile)
{
        struct snd_rawmidi *rmidi;

        rmidi = rfile->rmidi;
        guard(mutex)(&rmidi->open_mutex);
        if (rfile->input) {
                close_substream(rmidi, rfile->input, 1);
                rfile->input = NULL;
        }
        if (rfile->output) {
                close_substream(rmidi, rfile->output, 1);
                rfile->output = NULL;
        }
        rfile->rmidi = NULL;
        wake_up(&rmidi->open_wait);
}

/* called from sound/core/seq/seq_midi.c */
int snd_rawmidi_kernel_release(struct snd_rawmidi_file *rfile)
{
        struct snd_rawmidi *rmidi;

        if (snd_BUG_ON(!rfile))
                return -ENXIO;

        rmidi = rfile->rmidi;
        rawmidi_release_priv(rfile);
        module_put(rmidi->card->module);
        return 0;
}
EXPORT_SYMBOL(snd_rawmidi_kernel_release);

static int snd_rawmidi_release(struct inode *inode, struct file *file)
{
        struct snd_rawmidi_file *rfile;
        struct snd_rawmidi *rmidi;
        struct module *module;

        rfile = file->private_data;
        rmidi = rfile->rmidi;
        rawmidi_release_priv(rfile);
        kfree(rfile);
        module = rmidi->card->module;
        snd_card_file_remove(rmidi->card, file);
        module_put(module);
        return 0;
}

static int snd_rawmidi_info(struct snd_rawmidi_substream *substream,
                            struct snd_rawmidi_info *info)
{
        struct snd_rawmidi *rmidi;

        if (substream == NULL)
                return -ENODEV;
        rmidi = substream->rmidi;
        memset(info, 0, sizeof(*info));
        info->card = rmidi->card->number;
        info->device = rmidi->device;
        info->subdevice = substream->number;
        info->stream = substream->stream;
        info->flags = rmidi->info_flags;
        strcpy(info->id, rmidi->id);
        strcpy(info->name, rmidi->name);
        strcpy(info->subname, substream->name);
        info->subdevices_count = substream->pstr->substream_count;
        info->subdevices_avail = (substream->pstr->substream_count -
                                  substream->pstr->substream_opened);
        return 0;
}

static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
                                 struct snd_rawmidi_info __user *_info)
{
        struct snd_rawmidi_info info;
        int err;

        err = snd_rawmidi_info(substream, &info);
        if (err < 0)
                return err;
        if (copy_to_user(_info, &info, sizeof(struct snd_rawmidi_info)))
                return -EFAULT;
        return 0;
}

static int __snd_rawmidi_info_select(struct snd_card *card,
                                     struct snd_rawmidi_info *info)
{
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_str *pstr;
        struct snd_rawmidi_substream *substream;

        rmidi = snd_rawmidi_search(card, info->device);
        if (!rmidi)
                return -ENXIO;
        if (info->stream < 0 || info->stream > 1)
                return -EINVAL;
        info->stream = array_index_nospec(info->stream, 2);
        pstr = &rmidi->streams[info->stream];
        if (pstr->substream_count == 0)
                return -ENOENT;
        if (info->subdevice >= pstr->substream_count)
                return -ENXIO;
        list_for_each_entry(substream, &pstr->substreams, list) {
                if ((unsigned int)substream->number == info->subdevice)
                        return snd_rawmidi_info(substream, info);
        }
        return -ENXIO;
}

int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
{
        guard(mutex)(&register_mutex);
        return __snd_rawmidi_info_select(card, info);
}
EXPORT_SYMBOL(snd_rawmidi_info_select);

static int snd_rawmidi_info_select_user(struct snd_card *card,
                                        struct snd_rawmidi_info __user *_info)
{
        int err;
        struct snd_rawmidi_info info;

        if (get_user(info.device, &_info->device))
                return -EFAULT;
        if (get_user(info.stream, &_info->stream))
                return -EFAULT;
        if (get_user(info.subdevice, &_info->subdevice))
                return -EFAULT;
        err = snd_rawmidi_info_select(card, &info);
        if (err < 0)
                return err;
        if (copy_to_user(_info, &info, sizeof(struct snd_rawmidi_info)))
                return -EFAULT;
        return 0;
}

static int resize_runtime_buffer(struct snd_rawmidi_substream *substream,
                                 struct snd_rawmidi_params *params,
                                 bool is_input)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;
        char *newbuf, *oldbuf;
        unsigned int framing = params->mode & SNDRV_RAWMIDI_MODE_FRAMING_MASK;

        if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L)
                return -EINVAL;
        if (framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP && (params->buffer_size & 0x1f) != 0)
                return -EINVAL;
        if (params->avail_min < 1 || params->avail_min > params->buffer_size)
                return -EINVAL;
        if (params->buffer_size & get_align(runtime))
                return -EINVAL;
        if (params->buffer_size != runtime->buffer_size) {
                newbuf = kvzalloc(params->buffer_size, GFP_KERNEL);
                if (!newbuf)
                        return -ENOMEM;
                guard(spinlock_irq)(&substream->lock);
                if (runtime->buffer_ref) {
                        kvfree(newbuf);
                        return -EBUSY;
                }
                oldbuf = runtime->buffer;
                runtime->buffer = newbuf;
                runtime->buffer_size = params->buffer_size;
                __reset_runtime_ptrs(runtime, is_input);
                kvfree(oldbuf);
        }
        runtime->avail_min = params->avail_min;
        return 0;
}

int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream,
                              struct snd_rawmidi_params *params)
{
        int err;

        snd_rawmidi_drain_output(substream);
        guard(mutex)(&substream->rmidi->open_mutex);
        if (substream->append && substream->use_count > 1)
                return -EBUSY;
        err = resize_runtime_buffer(substream, params, false);
        if (!err)
                substream->active_sensing = !params->no_active_sensing;
        return err;
}
EXPORT_SYMBOL(snd_rawmidi_output_params);

int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream,
                             struct snd_rawmidi_params *params)
{
        unsigned int framing = params->mode & SNDRV_RAWMIDI_MODE_FRAMING_MASK;
        unsigned int clock_type = params->mode & SNDRV_RAWMIDI_MODE_CLOCK_MASK;
        int err;

        snd_rawmidi_drain_input(substream);
        guard(mutex)(&substream->rmidi->open_mutex);
        if (framing == SNDRV_RAWMIDI_MODE_FRAMING_NONE && clock_type != SNDRV_RAWMIDI_MODE_CLOCK_NONE)
                err = -EINVAL;
        else if (clock_type > SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW)
                err = -EINVAL;
        else if (framing > SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP)
                err = -EINVAL;
        else
                err = resize_runtime_buffer(substream, params, true);

        if (!err) {
                substream->framing = framing;
                substream->clock_type = clock_type;
        }
        return 0;
}
EXPORT_SYMBOL(snd_rawmidi_input_params);

static int snd_rawmidi_output_status(struct snd_rawmidi_substream *substream,
                                     struct snd_rawmidi_status64 *status)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        memset(status, 0, sizeof(*status));
        status->stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
        guard(spinlock_irq)(&substream->lock);
        status->avail = runtime->avail;
        return 0;
}

static int snd_rawmidi_input_status(struct snd_rawmidi_substream *substream,
                                    struct snd_rawmidi_status64 *status)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        memset(status, 0, sizeof(*status));
        status->stream = SNDRV_RAWMIDI_STREAM_INPUT;
        guard(spinlock_irq)(&substream->lock);
        status->avail = runtime->avail;
        status->xruns = runtime->xruns;
        runtime->xruns = 0;
        return 0;
}

static int snd_rawmidi_ioctl_status32(struct snd_rawmidi_file *rfile,
                                      struct snd_rawmidi_status32 __user *argp)
{
        int err = 0;
        struct snd_rawmidi_status32 __user *status = argp;
        struct snd_rawmidi_status32 status32;
        struct snd_rawmidi_status64 status64;

        if (copy_from_user(&status32, argp,
                           sizeof(struct snd_rawmidi_status32)))
                return -EFAULT;

        switch (status32.stream) {
        case SNDRV_RAWMIDI_STREAM_OUTPUT:
                if (rfile->output == NULL)
                        return -EINVAL;
                err = snd_rawmidi_output_status(rfile->output, &status64);
                break;
        case SNDRV_RAWMIDI_STREAM_INPUT:
                if (rfile->input == NULL)
                        return -EINVAL;
                err = snd_rawmidi_input_status(rfile->input, &status64);
                break;
        default:
                return -EINVAL;
        }
        if (err < 0)
                return err;

        status32 = (struct snd_rawmidi_status32) {
                .stream = status64.stream,
                .tstamp_sec = status64.tstamp_sec,
                .tstamp_nsec = status64.tstamp_nsec,
                .avail = status64.avail,
                .xruns = status64.xruns,
        };

        if (copy_to_user(status, &status32, sizeof(*status)))
                return -EFAULT;

        return 0;
}

static int snd_rawmidi_ioctl_status64(struct snd_rawmidi_file *rfile,
                                      struct snd_rawmidi_status64 __user *argp)
{
        int err = 0;
        struct snd_rawmidi_status64 status;

        if (copy_from_user(&status, argp, sizeof(struct snd_rawmidi_status64)))
                return -EFAULT;

        switch (status.stream) {
        case SNDRV_RAWMIDI_STREAM_OUTPUT:
                if (rfile->output == NULL)
                        return -EINVAL;
                err = snd_rawmidi_output_status(rfile->output, &status);
                break;
        case SNDRV_RAWMIDI_STREAM_INPUT:
                if (rfile->input == NULL)
                        return -EINVAL;
                err = snd_rawmidi_input_status(rfile->input, &status);
                break;
        default:
                return -EINVAL;
        }
        if (err < 0)
                return err;
        if (copy_to_user(argp, &status,
                         sizeof(struct snd_rawmidi_status64)))
                return -EFAULT;
        return 0;
}

static long snd_rawmidi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct snd_rawmidi_file *rfile;
        struct snd_rawmidi *rmidi;
        void __user *argp = (void __user *)arg;

        rfile = file->private_data;
        if (((cmd >> 8) & 0xff) != 'W')
                return -ENOTTY;
        switch (cmd) {
        case SNDRV_RAWMIDI_IOCTL_PVERSION:
                return put_user(SNDRV_RAWMIDI_VERSION, (int __user *)argp) ? -EFAULT : 0;
        case SNDRV_RAWMIDI_IOCTL_INFO:
        {
                int stream;
                struct snd_rawmidi_info __user *info = argp;

                if (get_user(stream, &info->stream))
                        return -EFAULT;
                switch (stream) {
                case SNDRV_RAWMIDI_STREAM_INPUT:
                        return snd_rawmidi_info_user(rfile->input, info);
                case SNDRV_RAWMIDI_STREAM_OUTPUT:
                        return snd_rawmidi_info_user(rfile->output, info);
                default:
                        return -EINVAL;
                }
        }
        case SNDRV_RAWMIDI_IOCTL_USER_PVERSION:
                if (get_user(rfile->user_pversion, (unsigned int __user *)arg))
                        return -EFAULT;
                return 0;

        case SNDRV_RAWMIDI_IOCTL_PARAMS:
        {
                struct snd_rawmidi_params params;

                if (copy_from_user(&params, argp, sizeof(struct snd_rawmidi_params)))
                        return -EFAULT;
                if (rfile->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 2)) {
                        params.mode = 0;
                        memset(params.reserved, 0, sizeof(params.reserved));
                }
                switch (params.stream) {
                case SNDRV_RAWMIDI_STREAM_OUTPUT:
                        if (rfile->output == NULL)
                                return -EINVAL;
                        return snd_rawmidi_output_params(rfile->output, &params);
                case SNDRV_RAWMIDI_STREAM_INPUT:
                        if (rfile->input == NULL)
                                return -EINVAL;
                        return snd_rawmidi_input_params(rfile->input, &params);
                default:
                        return -EINVAL;
                }
        }
        case SNDRV_RAWMIDI_IOCTL_STATUS32:
                return snd_rawmidi_ioctl_status32(rfile, argp);
        case SNDRV_RAWMIDI_IOCTL_STATUS64:
                return snd_rawmidi_ioctl_status64(rfile, argp);
        case SNDRV_RAWMIDI_IOCTL_DROP:
        {
                int val;

                if (get_user(val, (int __user *) argp))
                        return -EFAULT;
                switch (val) {
                case SNDRV_RAWMIDI_STREAM_OUTPUT:
                        if (rfile->output == NULL)
                                return -EINVAL;
                        return snd_rawmidi_drop_output(rfile->output);
                default:
                        return -EINVAL;
                }
        }
        case SNDRV_RAWMIDI_IOCTL_DRAIN:
        {
                int val;

                if (get_user(val, (int __user *) argp))
                        return -EFAULT;
                switch (val) {
                case SNDRV_RAWMIDI_STREAM_OUTPUT:
                        if (rfile->output == NULL)
                                return -EINVAL;
                        return snd_rawmidi_drain_output(rfile->output);
                case SNDRV_RAWMIDI_STREAM_INPUT:
                        if (rfile->input == NULL)
                                return -EINVAL;
                        return snd_rawmidi_drain_input(rfile->input);
                default:
                        return -EINVAL;
                }
        }
        default:
                rmidi = rfile->rmidi;
                if (rmidi->ops && rmidi->ops->ioctl)
                        return rmidi->ops->ioctl(rmidi, cmd, argp);
                rmidi_dbg(rmidi, "rawmidi: unknown command = 0x%x\n", cmd);
        }
        return -ENOTTY;
}

/* ioctl to find the next device; either legacy or UMP depending on @find_ump */
static int snd_rawmidi_next_device(struct snd_card *card, int __user *argp,
                                   bool find_ump)

{
        struct snd_rawmidi *rmidi;
        int device;
        bool is_ump;

        if (get_user(device, argp))
                return -EFAULT;
        if (device >= SNDRV_RAWMIDI_DEVICES) /* next device is -1 */
                device = SNDRV_RAWMIDI_DEVICES - 1;
        scoped_guard(mutex, &register_mutex) {
                device = device < 0 ? 0 : device + 1;
                for (; device < SNDRV_RAWMIDI_DEVICES; device++) {
                        rmidi = snd_rawmidi_search(card, device);
                        if (!rmidi)
                                continue;
                        is_ump = rawmidi_is_ump(rmidi);
                        if (find_ump == is_ump)
                                break;
                }
                if (device == SNDRV_RAWMIDI_DEVICES)
                        device = -1;
        }
        if (put_user(device, argp))
                return -EFAULT;
        return 0;
}

#if IS_ENABLED(CONFIG_SND_UMP)
/* inquiry of UMP endpoint and block info via control API */
static int snd_rawmidi_call_ump_ioctl(struct snd_card *card, int cmd,
                                      void __user *argp)
{
        struct snd_ump_endpoint_info __user *info = argp;
        struct snd_rawmidi *rmidi;
        int device;

        if (get_user(device, &info->device))
                return -EFAULT;
        guard(mutex)(&register_mutex);
        rmidi = snd_rawmidi_search(card, device);
        if (rmidi && rmidi->ops && rmidi->ops->ioctl)
                return rmidi->ops->ioctl(rmidi, cmd, argp);
        else
                return -ENXIO;
}
#endif

static int snd_rawmidi_control_ioctl(struct snd_card *card,
                                     struct snd_ctl_file *control,
                                     unsigned int cmd,
                                     unsigned long arg)
{
        void __user *argp = (void __user *)arg;

        switch (cmd) {
        case SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE:
                return snd_rawmidi_next_device(card, argp, false);
#if IS_ENABLED(CONFIG_SND_UMP)
        case SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE:
                return snd_rawmidi_next_device(card, argp, true);
        case SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO:
                return snd_rawmidi_call_ump_ioctl(card, SNDRV_UMP_IOCTL_ENDPOINT_INFO, argp);
        case SNDRV_CTL_IOCTL_UMP_BLOCK_INFO:
                return snd_rawmidi_call_ump_ioctl(card, SNDRV_UMP_IOCTL_BLOCK_INFO, argp);
#endif
        case SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE:
        {
                int val;

                if (get_user(val, (int __user *)argp))
                        return -EFAULT;
                control->preferred_subdevice[SND_CTL_SUBDEV_RAWMIDI] = val;
                return 0;
        }
        case SNDRV_CTL_IOCTL_RAWMIDI_INFO:
                return snd_rawmidi_info_select_user(card, argp);
        }
        return -ENOIOCTLCMD;
}

static int receive_with_tstamp_framing(struct snd_rawmidi_substream *substream,
                        const unsigned char *buffer, int src_count, const struct timespec64 *tstamp)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;
        struct snd_rawmidi_framing_tstamp *dest_ptr;
        struct snd_rawmidi_framing_tstamp frame = { .tv_sec = tstamp->tv_sec, .tv_nsec = tstamp->tv_nsec };
        int orig_count = src_count;
        int frame_size = sizeof(struct snd_rawmidi_framing_tstamp);
        int align = get_align(runtime);

        BUILD_BUG_ON(frame_size != 0x20);
        if (snd_BUG_ON((runtime->hw_ptr & 0x1f) != 0))
                return -EINVAL;

        while (src_count > align) {
                if ((int)(runtime->buffer_size - runtime->avail) < frame_size) {
                        runtime->xruns += src_count;
                        break;
                }
                if (src_count >= SNDRV_RAWMIDI_FRAMING_DATA_LENGTH)
                        frame.length = SNDRV_RAWMIDI_FRAMING_DATA_LENGTH;
                else {
                        frame.length = get_aligned_size(runtime, src_count);
                        if (!frame.length)
                                break;
                        memset(frame.data, 0, SNDRV_RAWMIDI_FRAMING_DATA_LENGTH);
                }
                memcpy(frame.data, buffer, frame.length);
                buffer += frame.length;
                src_count -= frame.length;
                dest_ptr = (struct snd_rawmidi_framing_tstamp *) (runtime->buffer + runtime->hw_ptr);
                *dest_ptr = frame;
                runtime->avail += frame_size;
                runtime->hw_ptr += frame_size;
                runtime->hw_ptr %= runtime->buffer_size;
        }
        return orig_count - src_count;
}

static struct timespec64 get_framing_tstamp(struct snd_rawmidi_substream *substream)
{
        struct timespec64 ts64 = {0, 0};

        switch (substream->clock_type) {
        case SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW:
                ktime_get_raw_ts64(&ts64);
                break;
        case SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC:
                ktime_get_ts64(&ts64);
                break;
        case SNDRV_RAWMIDI_MODE_CLOCK_REALTIME:
                ktime_get_real_ts64(&ts64);
                break;
        }
        return ts64;
}

/**
 * snd_rawmidi_receive - receive the input data from the device
 * @substream: the rawmidi substream
 * @buffer: the buffer pointer
 * @count: the data size to read
 *
 * Reads the data from the internal buffer.
 *
 * Return: The size of read data, or a negative error code on failure.
 */
int snd_rawmidi_receive(struct snd_rawmidi_substream *substream,
                        const unsigned char *buffer, int count)
{
        struct timespec64 ts64 = get_framing_tstamp(substream);
        int result = 0, count1;
        struct snd_rawmidi_runtime *runtime;

        guard(spinlock_irqsave)(&substream->lock);
        if (!substream->opened)
                return -EBADFD;
        runtime = substream->runtime;
        if (!runtime || !runtime->buffer) {
                rmidi_dbg(substream->rmidi,
                          "snd_rawmidi_receive: input is not active!!!\n");
                return -EINVAL;
        }

        count = get_aligned_size(runtime, count);
        if (!count)
                return result;

        if (substream->framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP) {
                result = receive_with_tstamp_framing(substream, buffer, count, &ts64);
        } else if (count == 1) {        /* special case, faster code */
                substream->bytes++;
                if (runtime->avail < runtime->buffer_size) {
                        runtime->buffer[runtime->hw_ptr++] = buffer[0];
                        runtime->hw_ptr %= runtime->buffer_size;
                        runtime->avail++;
                        result++;
                } else {
                        runtime->xruns++;
                }
        } else {
                substream->bytes += count;
                count1 = runtime->buffer_size - runtime->hw_ptr;
                if (count1 > count)
                        count1 = count;
                if (count1 > (int)(runtime->buffer_size - runtime->avail))
                        count1 = runtime->buffer_size - runtime->avail;
                count1 = get_aligned_size(runtime, count1);
                if (!count1)
                        return result;
                memcpy(runtime->buffer + runtime->hw_ptr, buffer, count1);
                runtime->hw_ptr += count1;
                runtime->hw_ptr %= runtime->buffer_size;
                runtime->avail += count1;
                count -= count1;
                result += count1;
                if (count > 0) {
                        buffer += count1;
                        count1 = count;
                        if (count1 > (int)(runtime->buffer_size - runtime->avail)) {
                                count1 = runtime->buffer_size - runtime->avail;
                                runtime->xruns += count - count1;
                        }
                        if (count1 > 0) {
                                memcpy(runtime->buffer, buffer, count1);
                                runtime->hw_ptr = count1;
                                runtime->avail += count1;
                                result += count1;
                        }
                }
        }
        if (result > 0) {
                if (runtime->event)
                        schedule_work(&runtime->event_work);
                else if (__snd_rawmidi_ready(runtime))
                        wake_up(&runtime->sleep);
        }
        return result;
}
EXPORT_SYMBOL(snd_rawmidi_receive);

static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
                                     unsigned char __user *userbuf,
                                     unsigned char *kernelbuf, long count)
{
        unsigned long flags;
        long result = 0, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
        unsigned long appl_ptr;
        int err = 0;

        spin_lock_irqsave(&substream->lock, flags);
        snd_rawmidi_buffer_ref(runtime);
        while (count > 0 && runtime->avail) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
                        count1 = count;
                if (count1 > (int)runtime->avail)
                        count1 = runtime->avail;

                /* update runtime->appl_ptr before unlocking for userbuf */
                appl_ptr = runtime->appl_ptr;
                runtime->appl_ptr += count1;
                runtime->appl_ptr %= runtime->buffer_size;
                runtime->avail -= count1;

                if (kernelbuf)
                        memcpy(kernelbuf + result, runtime->buffer + appl_ptr, count1);
                if (userbuf) {
                        spin_unlock_irqrestore(&substream->lock, flags);
                        if (copy_to_user(userbuf + result,
                                         runtime->buffer + appl_ptr, count1))
                                err = -EFAULT;
                        spin_lock_irqsave(&substream->lock, flags);
                        if (err)
                                goto out;
                }
                result += count1;
                count -= count1;
        }
 out:
        snd_rawmidi_buffer_unref(runtime);
        spin_unlock_irqrestore(&substream->lock, flags);
        return result > 0 ? result : err;
}

long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream,
                             unsigned char *buf, long count)
{
        snd_rawmidi_input_trigger(substream, 1);
        return snd_rawmidi_kernel_read1(substream, NULL/*userbuf*/, buf, count);
}
EXPORT_SYMBOL(snd_rawmidi_kernel_read);

static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t count,
                                loff_t *offset)
{
        long result;
        int count1;
        struct snd_rawmidi_file *rfile;
        struct snd_rawmidi_substream *substream;
        struct snd_rawmidi_runtime *runtime;

        rfile = file->private_data;
        substream = rfile->input;
        if (substream == NULL)
                return -EIO;
        runtime = substream->runtime;
        snd_rawmidi_input_trigger(substream, 1);
        result = 0;
        while (count > 0) {
                spin_lock_irq(&substream->lock);
                while (!__snd_rawmidi_ready(runtime)) {
                        wait_queue_entry_t wait;

                        if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
                                spin_unlock_irq(&substream->lock);
                                return result > 0 ? result : -EAGAIN;
                        }
                        init_waitqueue_entry(&wait, current);
                        add_wait_queue(&runtime->sleep, &wait);
                        set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&substream->lock);
                        schedule();
                        remove_wait_queue(&runtime->sleep, &wait);
                        if (rfile->rmidi->card->shutdown)
                                return -ENODEV;
                        if (signal_pending(current))
                                return result > 0 ? result : -ERESTARTSYS;
                        spin_lock_irq(&substream->lock);
                        if (!runtime->avail) {
                                spin_unlock_irq(&substream->lock);
                                return result > 0 ? result : -EIO;
                        }
                }
                spin_unlock_irq(&substream->lock);
                count1 = snd_rawmidi_kernel_read1(substream,
                                                  (unsigned char __user *)buf,
                                                  NULL/*kernelbuf*/,
                                                  count);
                if (count1 < 0)
                        return result > 0 ? result : count1;
                result += count1;
                buf += count1;
                count -= count1;
        }
        return result;
}

/**
 * snd_rawmidi_transmit_empty - check whether the output buffer is empty
 * @substream: the rawmidi substream
 *
 * Return: 1 if the internal output buffer is empty, 0 if not.
 */
int snd_rawmidi_transmit_empty(struct snd_rawmidi_substream *substream)
{
        struct snd_rawmidi_runtime *runtime;

        guard(spinlock_irqsave)(&substream->lock);
        runtime = substream->runtime;
        if (!substream->opened || !runtime || !runtime->buffer) {
                rmidi_dbg(substream->rmidi,
                          "snd_rawmidi_transmit_empty: output is not active!!!\n");
                return 1;
        }
        return (runtime->avail >= runtime->buffer_size);
}
EXPORT_SYMBOL(snd_rawmidi_transmit_empty);

/*
 * __snd_rawmidi_transmit_peek - copy data from the internal buffer
 * @substream: the rawmidi substream
 * @buffer: the buffer pointer
 * @count: data size to transfer
 *
 * This is a variant of snd_rawmidi_transmit_peek() without spinlock.
 */
static int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                                       unsigned char *buffer, int count)
{
        int result, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        if (runtime->buffer == NULL) {
                rmidi_dbg(substream->rmidi,
                          "snd_rawmidi_transmit_peek: output is not active!!!\n");
                return -EINVAL;
        }
        result = 0;
        if (runtime->avail >= runtime->buffer_size) {
                /* warning: lowlevel layer MUST trigger down the hardware */
                goto __skip;
        }
        if (count == 1) {        /* special case, faster code */
                *buffer = runtime->buffer[runtime->hw_ptr];
                result++;
        } else {
                count1 = runtime->buffer_size - runtime->hw_ptr;
                if (count1 > count)
                        count1 = count;
                if (count1 > (int)(runtime->buffer_size - runtime->avail))
                        count1 = runtime->buffer_size - runtime->avail;
                count1 = get_aligned_size(runtime, count1);
                if (!count1)
                        goto __skip;
                memcpy(buffer, runtime->buffer + runtime->hw_ptr, count1);
                count -= count1;
                result += count1;
                if (count > 0) {
                        if (count > (int)(runtime->buffer_size - runtime->avail - count1))
                                count = runtime->buffer_size - runtime->avail - count1;
                        count = get_aligned_size(runtime, count);
                        if (!count)
                                goto __skip;
                        memcpy(buffer + count1, runtime->buffer, count);
                        result += count;
                }
        }
      __skip:
        return result;
}

/**
 * snd_rawmidi_transmit_peek - copy data from the internal buffer
 * @substream: the rawmidi substream
 * @buffer: the buffer pointer
 * @count: data size to transfer
 *
 * Copies data from the internal output buffer to the given buffer.
 *
 * Call this in the interrupt handler when the midi output is ready,
 * and call snd_rawmidi_transmit_ack() after the transmission is
 * finished.
 *
 * Return: The size of copied data, or a negative error code on failure.
 */
int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
                              unsigned char *buffer, int count)
{
        guard(spinlock_irqsave)(&substream->lock);
        if (!substream->opened || !substream->runtime)
                return -EBADFD;
        return __snd_rawmidi_transmit_peek(substream, buffer, count);
}
EXPORT_SYMBOL(snd_rawmidi_transmit_peek);

/*
 * __snd_rawmidi_transmit_ack - acknowledge the transmission
 * @substream: the rawmidi substream
 * @count: the transferred count
 *
 * This is a variant of __snd_rawmidi_transmit_ack() without spinlock.
 */
static int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
                                      int count)
{
        struct snd_rawmidi_runtime *runtime = substream->runtime;

        if (runtime->buffer == NULL) {
                rmidi_dbg(substream->rmidi,
                          "snd_rawmidi_transmit_ack: output is not active!!!\n");
                return -EINVAL;
        }
        snd_BUG_ON(runtime->avail + count > runtime->buffer_size);
        count = get_aligned_size(runtime, count);
        runtime->hw_ptr += count;
        runtime->hw_ptr %= runtime->buffer_size;
        runtime->avail += count;
        substream->bytes += count;
        if (count > 0) {
                if (runtime->drain || __snd_rawmidi_ready(runtime))
                        wake_up(&runtime->sleep);
        }
        return count;
}

/**
 * snd_rawmidi_transmit_ack - acknowledge the transmission
 * @substream: the rawmidi substream
 * @count: the transferred count
 *
 * Advances the hardware pointer for the internal output buffer with
 * the given size and updates the condition.
 * Call after the transmission is finished.
 *
 * Return: The advanced size if successful, or a negative error code on failure.
 */
int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count)
{
        guard(spinlock_irqsave)(&substream->lock);
        if (!substream->opened || !substream->runtime)
                return -EBADFD;
        return __snd_rawmidi_transmit_ack(substream, count);
}
EXPORT_SYMBOL(snd_rawmidi_transmit_ack);

/**
 * snd_rawmidi_transmit - copy from the buffer to the device
 * @substream: the rawmidi substream
 * @buffer: the buffer pointer
 * @count: the data size to transfer
 *
 * Copies data from the buffer to the device and advances the pointer.
 *
 * Return: The copied size if successful, or a negative error code on failure.
 */
int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
                         unsigned char *buffer, int count)
{
        guard(spinlock_irqsave)(&substream->lock);
        if (!substream->opened)
                return -EBADFD;
        count = __snd_rawmidi_transmit_peek(substream, buffer, count);
        if (count <= 0)
                return count;
        return __snd_rawmidi_transmit_ack(substream, count);
}
EXPORT_SYMBOL(snd_rawmidi_transmit);

/**
 * snd_rawmidi_proceed - Discard the all pending bytes and proceed
 * @substream: rawmidi substream
 *
 * Return: the number of discarded bytes
 */
int snd_rawmidi_proceed(struct snd_rawmidi_substream *substream)
{
        struct snd_rawmidi_runtime *runtime;
        int count = 0;

        guard(spinlock_irqsave)(&substream->lock);
        runtime = substream->runtime;
        if (substream->opened && runtime &&
            runtime->avail < runtime->buffer_size) {
                count = runtime->buffer_size - runtime->avail;
                __snd_rawmidi_transmit_ack(substream, count);
        }
        return count;
}
EXPORT_SYMBOL(snd_rawmidi_proceed);

static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                                      const unsigned char __user *userbuf,
                                      const unsigned char *kernelbuf,
                                      long count)
{
        unsigned long flags;
        long count1, result;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
        unsigned long appl_ptr;

        if (!kernelbuf && !userbuf)
                return -EINVAL;
        if (snd_BUG_ON(!runtime->buffer))
                return -EINVAL;

        result = 0;
        spin_lock_irqsave(&substream->lock, flags);
        if (substream->append) {
                if ((long)runtime->avail < count) {
                        spin_unlock_irqrestore(&substream->lock, flags);
                        return -EAGAIN;
                }
        }
        snd_rawmidi_buffer_ref(runtime);
        while (count > 0 && runtime->avail > 0) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
                        count1 = count;
                if (count1 > (long)runtime->avail)
                        count1 = runtime->avail;

                /* update runtime->appl_ptr before unlocking for userbuf */
                appl_ptr = runtime->appl_ptr;
                runtime->appl_ptr += count1;
                runtime->appl_ptr %= runtime->buffer_size;
                runtime->avail -= count1;

                if (kernelbuf)
                        memcpy(runtime->buffer + appl_ptr,
                               kernelbuf + result, count1);
                else if (userbuf) {
                        spin_unlock_irqrestore(&substream->lock, flags);
                        if (copy_from_user(runtime->buffer + appl_ptr,
                                           userbuf + result, count1)) {
                                spin_lock_irqsave(&substream->lock, flags);
                                result = result > 0 ? result : -EFAULT;
                                goto __end;
                        }
                        spin_lock_irqsave(&substream->lock, flags);
                }
                result += count1;
                count -= count1;
        }
      __end:
        count1 = runtime->avail < runtime->buffer_size;
        snd_rawmidi_buffer_unref(runtime);
        spin_unlock_irqrestore(&substream->lock, flags);
        if (count1)
                snd_rawmidi_output_trigger(substream, 1);
        return result;
}

long snd_rawmidi_kernel_write(struct snd_rawmidi_substream *substream,
                              const unsigned char *buf, long count)
{
        return snd_rawmidi_kernel_write1(substream, NULL, buf, count);
}
EXPORT_SYMBOL(snd_rawmidi_kernel_write);

static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
                                 size_t count, loff_t *offset)
{
        long result, timeout;
        int count1;
        struct snd_rawmidi_file *rfile;
        struct snd_rawmidi_runtime *runtime;
        struct snd_rawmidi_substream *substream;

        rfile = file->private_data;
        substream = rfile->output;
        runtime = substream->runtime;
        /* we cannot put an atomic message to our buffer */
        if (substream->append && count > runtime->buffer_size)
                return -EIO;
        result = 0;
        while (count > 0) {
                spin_lock_irq(&substream->lock);
                while (!snd_rawmidi_ready_append(substream, count)) {
                        wait_queue_entry_t wait;

                        if (file->f_flags & O_NONBLOCK) {
                                spin_unlock_irq(&substream->lock);
                                return result > 0 ? result : -EAGAIN;
                        }
                        init_waitqueue_entry(&wait, current);
                        add_wait_queue(&runtime->sleep, &wait);
                        set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&substream->lock);
                        timeout = schedule_timeout(30 * HZ);
                        remove_wait_queue(&runtime->sleep, &wait);
                        if (rfile->rmidi->card->shutdown)
                                return -ENODEV;
                        if (signal_pending(current))
                                return result > 0 ? result : -ERESTARTSYS;
                        spin_lock_irq(&substream->lock);
                        if (!runtime->avail && !timeout) {
                                spin_unlock_irq(&substream->lock);
                                return result > 0 ? result : -EIO;
                        }
                }
                spin_unlock_irq(&substream->lock);
                count1 = snd_rawmidi_kernel_write1(substream, buf, NULL, count);
                if (count1 < 0)
                        return result > 0 ? result : count1;
                result += count1;
                buf += count1;
                if ((size_t)count1 < count && (file->f_flags & O_NONBLOCK))
                        break;
                count -= count1;
        }
        if (file->f_flags & O_DSYNC) {
                spin_lock_irq(&substream->lock);
                while (runtime->avail != runtime->buffer_size) {
                        wait_queue_entry_t wait;
                        unsigned int last_avail = runtime->avail;

                        init_waitqueue_entry(&wait, current);
                        add_wait_queue(&runtime->sleep, &wait);
                        set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&substream->lock);
                        timeout = schedule_timeout(30 * HZ);
                        remove_wait_queue(&runtime->sleep, &wait);
                        if (signal_pending(current))
                                return result > 0 ? result : -ERESTARTSYS;
                        if (runtime->avail == last_avail && !timeout)
                                return result > 0 ? result : -EIO;
                        spin_lock_irq(&substream->lock);
                }
                spin_unlock_irq(&substream->lock);
        }
        return result;
}

static __poll_t snd_rawmidi_poll(struct file *file, poll_table *wait)
{
        struct snd_rawmidi_file *rfile;
        struct snd_rawmidi_runtime *runtime;
        __poll_t mask;

        rfile = file->private_data;
        if (rfile->input != NULL) {
                runtime = rfile->input->runtime;
                snd_rawmidi_input_trigger(rfile->input, 1);
                poll_wait(file, &runtime->sleep, wait);
        }
        if (rfile->output != NULL) {
                runtime = rfile->output->runtime;
                poll_wait(file, &runtime->sleep, wait);
        }
        mask = 0;
        if (rfile->input != NULL) {
                if (snd_rawmidi_ready(rfile->input))
                        mask |= EPOLLIN | EPOLLRDNORM;
        }
        if (rfile->output != NULL) {
                if (snd_rawmidi_ready(rfile->output))
                        mask |= EPOLLOUT | EPOLLWRNORM;
        }
        return mask;
}

/*
 */
#ifdef CONFIG_COMPAT
#include "rawmidi_compat.c"
#else
#define snd_rawmidi_ioctl_compat        NULL
#endif

/*
 */

static void snd_rawmidi_proc_info_read(struct snd_info_entry *entry,
                                       struct snd_info_buffer *buffer)
{
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_substream *substream;
        struct snd_rawmidi_runtime *runtime;
        unsigned long buffer_size, avail, xruns;
        unsigned int clock_type;
        static const char *clock_names[4] = { "none", "realtime", "monotonic", "monotonic raw" };

        rmidi = entry->private_data;
        snd_iprintf(buffer, "%s\n\n", rmidi->name);
        if (IS_ENABLED(CONFIG_SND_UMP))
                snd_iprintf(buffer, "Type: %s\n",
                            rawmidi_is_ump(rmidi) ? "UMP" : "Legacy");
        if (rmidi->ops && rmidi->ops->proc_read)
                rmidi->ops->proc_read(entry, buffer);
        guard(mutex)(&rmidi->open_mutex);
        if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_OUTPUT) {
                list_for_each_entry(substream,
                                    &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams,
                                    list) {
                        snd_iprintf(buffer,
                                    "Output %d\n"
                                    "  Tx bytes     : %lu\n",
                                    substream->number,
                                    (unsigned long) substream->bytes);
                        if (substream->opened) {
                                snd_iprintf(buffer,
                                    "  Owner PID    : %d\n",
                                    pid_vnr(substream->pid));
                                runtime = substream->runtime;
                                scoped_guard(spinlock_irq, &substream->lock) {
                                        buffer_size = runtime->buffer_size;
                                        avail = runtime->avail;
                                }
                                snd_iprintf(buffer,
                                    "  Mode         : %s\n"
                                    "  Buffer size  : %lu\n"
                                    "  Avail        : %lu\n",
                                    runtime->oss ? "OSS compatible" : "native",
                                    buffer_size, avail);
                        }
                }
        }
        if (rmidi->info_flags & SNDRV_RAWMIDI_INFO_INPUT) {
                list_for_each_entry(substream,
                                    &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams,
                                    list) {
                        snd_iprintf(buffer,
                                    "Input %d\n"
                                    "  Rx bytes     : %lu\n",
                                    substream->number,
                                    (unsigned long) substream->bytes);
                        if (substream->opened) {
                                snd_iprintf(buffer,
                                            "  Owner PID    : %d\n",
                                            pid_vnr(substream->pid));
                                runtime = substream->runtime;
                                scoped_guard(spinlock_irq, &substream->lock) {
                                        buffer_size = runtime->buffer_size;
                                        avail = runtime->avail;
                                        xruns = runtime->xruns;
                                }
                                snd_iprintf(buffer,
                                            "  Buffer size  : %lu\n"
                                            "  Avail        : %lu\n"
                                            "  Overruns     : %lu\n",
                                            buffer_size, avail, xruns);
                                if (substream->framing == SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP) {
                                        clock_type = substream->clock_type >> SNDRV_RAWMIDI_MODE_CLOCK_SHIFT;
                                        if (!snd_BUG_ON(clock_type >= ARRAY_SIZE(clock_names)))
                                                snd_iprintf(buffer,
                                                            "  Framing      : tstamp\n"
                                                            "  Clock type   : %s\n",
                                                            clock_names[clock_type]);
                                }
                        }
                }
        }
}

/*
 *  Register functions
 */

static const struct file_operations snd_rawmidi_f_ops = {
        .owner =        THIS_MODULE,
        .read =                snd_rawmidi_read,
        .write =        snd_rawmidi_write,
        .open =                snd_rawmidi_open,
        .release =        snd_rawmidi_release,
        .llseek =        no_llseek,
        .poll =                snd_rawmidi_poll,
        .unlocked_ioctl =        snd_rawmidi_ioctl,
        .compat_ioctl =        snd_rawmidi_ioctl_compat,
};

static int snd_rawmidi_alloc_substreams(struct snd_rawmidi *rmidi,
                                        struct snd_rawmidi_str *stream,
                                        int direction,
                                        int count)
{
        struct snd_rawmidi_substream *substream;
        int idx;

        for (idx = 0; idx < count; idx++) {
                substream = kzalloc(sizeof(*substream), GFP_KERNEL);
                if (!substream)
                        return -ENOMEM;
                substream->stream = direction;
                substream->number = idx;
                substream->rmidi = rmidi;
                substream->pstr = stream;
                spin_lock_init(&substream->lock);
                list_add_tail(&substream->list, &stream->substreams);
                stream->substream_count++;
        }
        return 0;
}

/* used for both rawmidi and ump */
int snd_rawmidi_init(struct snd_rawmidi *rmidi,
                     struct snd_card *card, char *id, int device,
                     int output_count, int input_count,
                     unsigned int info_flags)
{
        int err;
        static const struct snd_device_ops ops = {
                .dev_free = snd_rawmidi_dev_free,
                .dev_register = snd_rawmidi_dev_register,
                .dev_disconnect = snd_rawmidi_dev_disconnect,
        };

        rmidi->card = card;
        rmidi->device = device;
        mutex_init(&rmidi->open_mutex);
        init_waitqueue_head(&rmidi->open_wait);
        INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams);
        INIT_LIST_HEAD(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams);
        rmidi->info_flags = info_flags;

        if (id != NULL)
                strscpy(rmidi->id, id, sizeof(rmidi->id));

        err = snd_device_alloc(&rmidi->dev, card);
        if (err < 0)
                return err;
        if (rawmidi_is_ump(rmidi))
                dev_set_name(rmidi->dev, "umpC%iD%i", card->number, device);
        else
                dev_set_name(rmidi->dev, "midiC%iD%i", card->number, device);

        err = snd_rawmidi_alloc_substreams(rmidi,
                                           &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT],
                                           SNDRV_RAWMIDI_STREAM_INPUT,
                                           input_count);
        if (err < 0)
                return err;
        err = snd_rawmidi_alloc_substreams(rmidi,
                                           &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT],
                                           SNDRV_RAWMIDI_STREAM_OUTPUT,
                                           output_count);
        if (err < 0)
                return err;
        err = snd_device_new(card, SNDRV_DEV_RAWMIDI, rmidi, &ops);
        if (err < 0)
                return err;
        return 0;
}
EXPORT_SYMBOL_GPL(snd_rawmidi_init);

/**
 * snd_rawmidi_new - create a rawmidi instance
 * @card: the card instance
 * @id: the id string
 * @device: the device index
 * @output_count: the number of output streams
 * @input_count: the number of input streams
 * @rrawmidi: the pointer to store the new rawmidi instance
 *
 * Creates a new rawmidi instance.
 * Use snd_rawmidi_set_ops() to set the operators to the new instance.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_rawmidi_new(struct snd_card *card, char *id, int device,
                    int output_count, int input_count,
                    struct snd_rawmidi **rrawmidi)
{
        struct snd_rawmidi *rmidi;
        int err;

        if (rrawmidi)
                *rrawmidi = NULL;
        rmidi = kzalloc(sizeof(*rmidi), GFP_KERNEL);
        if (!rmidi)
                return -ENOMEM;
        err = snd_rawmidi_init(rmidi, card, id, device,
                               output_count, input_count, 0);
        if (err < 0) {
                snd_rawmidi_free(rmidi);
                return err;
        }
        if (rrawmidi)
                *rrawmidi = rmidi;
        return 0;
}
EXPORT_SYMBOL(snd_rawmidi_new);

static void snd_rawmidi_free_substreams(struct snd_rawmidi_str *stream)
{
        struct snd_rawmidi_substream *substream;

        while (!list_empty(&stream->substreams)) {
                substream = list_entry(stream->substreams.next, struct snd_rawmidi_substream, list);
                list_del(&substream->list);
                kfree(substream);
        }
}

/* called from ump.c, too */
int snd_rawmidi_free(struct snd_rawmidi *rmidi)
{
        if (!rmidi)
                return 0;

        snd_info_free_entry(rmidi->proc_entry);
        rmidi->proc_entry = NULL;
        if (rmidi->ops && rmidi->ops->dev_unregister)
                rmidi->ops->dev_unregister(rmidi);

        snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]);
        snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]);
        if (rmidi->private_free)
                rmidi->private_free(rmidi);
        put_device(rmidi->dev);
        kfree(rmidi);
        return 0;
}
EXPORT_SYMBOL_GPL(snd_rawmidi_free);

static int snd_rawmidi_dev_free(struct snd_device *device)
{
        struct snd_rawmidi *rmidi = device->device_data;

        return snd_rawmidi_free(rmidi);
}

#if IS_ENABLED(CONFIG_SND_SEQUENCER)
static void snd_rawmidi_dev_seq_free(struct snd_seq_device *device)
{
        struct snd_rawmidi *rmidi = device->private_data;

        rmidi->seq_dev = NULL;
}
#endif

static int snd_rawmidi_dev_register(struct snd_device *device)
{
        int err;
        struct snd_info_entry *entry;
        char name[16];
        struct snd_rawmidi *rmidi = device->device_data;

        if (rmidi->device >= SNDRV_RAWMIDI_DEVICES)
                return -ENOMEM;
        err = 0;
        scoped_guard(mutex, &register_mutex) {
                if (snd_rawmidi_search(rmidi->card, rmidi->device))
                        err = -EBUSY;
                else
                        list_add_tail(&rmidi->list, &snd_rawmidi_devices);
        }
        if (err < 0)
                return err;

        err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI,
                                  rmidi->card, rmidi->device,
                                  &snd_rawmidi_f_ops, rmidi, rmidi->dev);
        if (err < 0) {
                rmidi_err(rmidi, "unable to register\n");
                goto error;
        }
        if (rmidi->ops && rmidi->ops->dev_register) {
                err = rmidi->ops->dev_register(rmidi);
                if (err < 0)
                        goto error_unregister;
        }
#ifdef CONFIG_SND_OSSEMUL
        rmidi->ossreg = 0;
        if (!rawmidi_is_ump(rmidi) &&
            (int)rmidi->device == midi_map[rmidi->card->number]) {
                if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI,
                                            rmidi->card, 0, &snd_rawmidi_f_ops,
                                            rmidi) < 0) {
                        rmidi_err(rmidi,
                                  "unable to register OSS rawmidi device %i:%i\n",
                                  rmidi->card->number, 0);
                } else {
                        rmidi->ossreg++;
#ifdef SNDRV_OSS_INFO_DEV_MIDI
                        snd_oss_info_register(SNDRV_OSS_INFO_DEV_MIDI, rmidi->card->number, rmidi->name);
#endif
                }
        }
        if (!rawmidi_is_ump(rmidi) &&
            (int)rmidi->device == amidi_map[rmidi->card->number]) {
                if (snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI,
                                            rmidi->card, 1, &snd_rawmidi_f_ops,
                                            rmidi) < 0) {
                        rmidi_err(rmidi,
                                  "unable to register OSS rawmidi device %i:%i\n",
                                  rmidi->card->number, 1);
                } else {
                        rmidi->ossreg++;
                }
        }
#endif /* CONFIG_SND_OSSEMUL */
        sprintf(name, "midi%d", rmidi->device);
        entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root);
        if (entry) {
                entry->private_data = rmidi;
                entry->c.text.read = snd_rawmidi_proc_info_read;
                if (snd_info_register(entry) < 0) {
                        snd_info_free_entry(entry);
                        entry = NULL;
                }
        }
        rmidi->proc_entry = entry;
#if IS_ENABLED(CONFIG_SND_SEQUENCER)
        /* no own registration mechanism? */
        if (!rmidi->ops || !rmidi->ops->dev_register) {
                if (snd_seq_device_new(rmidi->card, rmidi->device, SNDRV_SEQ_DEV_ID_MIDISYNTH, 0, &rmidi->seq_dev) >= 0) {
                        rmidi->seq_dev->private_data = rmidi;
                        rmidi->seq_dev->private_free = snd_rawmidi_dev_seq_free;
                        sprintf(rmidi->seq_dev->name, "MIDI %d-%d", rmidi->card->number, rmidi->device);
                        snd_device_register(rmidi->card, rmidi->seq_dev);
                }
        }
#endif
        return 0;

 error_unregister:
        snd_unregister_device(rmidi->dev);
 error:
        scoped_guard(mutex, &register_mutex)
                list_del(&rmidi->list);
        return err;
}

static int snd_rawmidi_dev_disconnect(struct snd_device *device)
{
        struct snd_rawmidi *rmidi = device->device_data;
        int dir;

        guard(mutex)(&register_mutex);
        guard(mutex)(&rmidi->open_mutex);
        wake_up(&rmidi->open_wait);
        list_del_init(&rmidi->list);
        for (dir = 0; dir < 2; dir++) {
                struct snd_rawmidi_substream *s;

                list_for_each_entry(s, &rmidi->streams[dir].substreams, list) {
                        if (s->runtime)
                                wake_up(&s->runtime->sleep);
                }
        }

#ifdef CONFIG_SND_OSSEMUL
        if (rmidi->ossreg) {
                if ((int)rmidi->device == midi_map[rmidi->card->number]) {
                        snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 0);
#ifdef SNDRV_OSS_INFO_DEV_MIDI
                        snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIDI, rmidi->card->number);
#endif
                }
                if ((int)rmidi->device == amidi_map[rmidi->card->number])
                        snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIDI, rmidi->card, 1);
                rmidi->ossreg = 0;
        }
#endif /* CONFIG_SND_OSSEMUL */
        snd_unregister_device(rmidi->dev);
        return 0;
}

/**
 * snd_rawmidi_set_ops - set the rawmidi operators
 * @rmidi: the rawmidi instance
 * @stream: the stream direction, SNDRV_RAWMIDI_STREAM_XXX
 * @ops: the operator table
 *
 * Sets the rawmidi operators for the given stream direction.
 */
void snd_rawmidi_set_ops(struct snd_rawmidi *rmidi, int stream,
                         const struct snd_rawmidi_ops *ops)
{
        struct snd_rawmidi_substream *substream;

        list_for_each_entry(substream, &rmidi->streams[stream].substreams, list)
                substream->ops = ops;
}
EXPORT_SYMBOL(snd_rawmidi_set_ops);

/*
 *  ENTRY functions
 */

static int __init alsa_rawmidi_init(void)
{

        snd_ctl_register_ioctl(snd_rawmidi_control_ioctl);
        snd_ctl_register_ioctl_compat(snd_rawmidi_control_ioctl);
#ifdef CONFIG_SND_OSSEMUL
        { int i;
        /* check device map table */
        for (i = 0; i < SNDRV_CARDS; i++) {
                if (midi_map[i] < 0 || midi_map[i] >= SNDRV_RAWMIDI_DEVICES) {
                        pr_err("ALSA: rawmidi: invalid midi_map[%d] = %d\n",
                               i, midi_map[i]);
                        midi_map[i] = 0;
                }
                if (amidi_map[i] < 0 || amidi_map[i] >= SNDRV_RAWMIDI_DEVICES) {
                        pr_err("ALSA: rawmidi: invalid amidi_map[%d] = %d\n",
                               i, amidi_map[i]);
                        amidi_map[i] = 1;
                }
        }
        }
#endif /* CONFIG_SND_OSSEMUL */
        return 0;
}

static void __exit alsa_rawmidi_exit(void)
{
        snd_ctl_unregister_ioctl(snd_rawmidi_control_ioctl);
        snd_ctl_unregister_ioctl_compat(snd_rawmidi_control_ioctl);
}

module_init(alsa_rawmidi_init)
module_exit(alsa_rawmidi_exit)


























































































































































































































































































    1 




    1 







    1 



    1 
    1 

    1 
    1 
































































































































































































































































































































































































































































































































































































































    1 


































    1 
















    1 

    1 
    1 



    1 





    1 























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
// SPDX-License-Identifier: GPL-2.0

#define pr_fmt(fmt)  "irq: " fmt

#include <linux/acpi.h>
#include <linux/debugfs.h>
#include <linux/hardirq.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irqdesc.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/topology.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/fs.h>

static LIST_HEAD(irq_domain_list);
static DEFINE_MUTEX(irq_domain_mutex);

static struct irq_domain *irq_default_domain;

static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
                                        unsigned int nr_irqs, int node, void *arg,
                                        bool realloc, const struct irq_affinity_desc *affinity);
static void irq_domain_check_hierarchy(struct irq_domain *domain);
static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq);

struct irqchip_fwid {
        struct fwnode_handle        fwnode;
        unsigned int                type;
        char                        *name;
        phys_addr_t                *pa;
};

#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
static void debugfs_add_domain_dir(struct irq_domain *d);
static void debugfs_remove_domain_dir(struct irq_domain *d);
#else
static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
#endif

static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode)
{
        struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode);

        return fwid->name;
}

const struct fwnode_operations irqchip_fwnode_ops = {
        .get_name = irqchip_fwnode_get_name,
};
EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);

/**
 * __irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
 *                           identifying an irq domain
 * @type:        Type of irqchip_fwnode. See linux/irqdomain.h
 * @id:                Optional user provided id if name != NULL
 * @name:        Optional user provided domain name
 * @pa:                Optional user-provided physical address
 *
 * Allocate a struct irqchip_fwid, and return a pointer to the embedded
 * fwnode_handle (or NULL on failure).
 *
 * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
 * solely to transport name information to irqdomain creation code. The
 * node is not stored. For other types the pointer is kept in the irq
 * domain struct.
 */
struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
                                                const char *name,
                                                phys_addr_t *pa)
{
        struct irqchip_fwid *fwid;
        char *n;

        fwid = kzalloc(sizeof(*fwid), GFP_KERNEL);

        switch (type) {
        case IRQCHIP_FWNODE_NAMED:
                n = kasprintf(GFP_KERNEL, "%s", name);
                break;
        case IRQCHIP_FWNODE_NAMED_ID:
                n = kasprintf(GFP_KERNEL, "%s-%d", name, id);
                break;
        default:
                n = kasprintf(GFP_KERNEL, "irqchip@%pa", pa);
                break;
        }

        if (!fwid || !n) {
                kfree(fwid);
                kfree(n);
                return NULL;
        }

        fwid->type = type;
        fwid->name = n;
        fwid->pa = pa;
        fwnode_init(&fwid->fwnode, &irqchip_fwnode_ops);
        return &fwid->fwnode;
}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);

/**
 * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
 *
 * Free a fwnode_handle allocated with irq_domain_alloc_fwnode.
 */
void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
{
        struct irqchip_fwid *fwid;

        if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode)))
                return;

        fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
        kfree(fwid->name);
        kfree(fwid);
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);

static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
                                              unsigned int size,
                                              irq_hw_number_t hwirq_max,
                                              int direct_max,
                                              const struct irq_domain_ops *ops,
                                              void *host_data)
{
        struct irqchip_fwid *fwid;
        struct irq_domain *domain;

        static atomic_t unknown_domains;

        if (WARN_ON((size && direct_max) ||
                    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max) ||
                    (direct_max && (direct_max != hwirq_max))))
                return NULL;

        domain = kzalloc_node(struct_size(domain, revmap, size),
                              GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
        if (!domain)
                return NULL;

        if (is_fwnode_irqchip(fwnode)) {
                fwid = container_of(fwnode, struct irqchip_fwid, fwnode);

                switch (fwid->type) {
                case IRQCHIP_FWNODE_NAMED:
                case IRQCHIP_FWNODE_NAMED_ID:
                        domain->fwnode = fwnode;
                        domain->name = kstrdup(fwid->name, GFP_KERNEL);
                        if (!domain->name) {
                                kfree(domain);
                                return NULL;
                        }
                        domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
                        break;
                default:
                        domain->fwnode = fwnode;
                        domain->name = fwid->name;
                        break;
                }
        } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) ||
                   is_software_node(fwnode)) {
                char *name;

                /*
                 * fwnode paths contain '/', which debugfs is legitimately
                 * unhappy about. Replace them with ':', which does
                 * the trick and is not as offensive as '\'...
                 */
                name = kasprintf(GFP_KERNEL, "%pfw", fwnode);
                if (!name) {
                        kfree(domain);
                        return NULL;
                }

                domain->name = strreplace(name, '/', ':');
                domain->fwnode = fwnode;
                domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
        }

        if (!domain->name) {
                if (fwnode)
                        pr_err("Invalid fwnode type for irqdomain\n");
                domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
                                         atomic_inc_return(&unknown_domains));
                if (!domain->name) {
                        kfree(domain);
                        return NULL;
                }
                domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
        }

        fwnode_handle_get(fwnode);
        fwnode_dev_initialized(fwnode, true);

        /* Fill structure */
        INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
        domain->ops = ops;
        domain->host_data = host_data;
        domain->hwirq_max = hwirq_max;

        if (direct_max)
                domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;

        domain->revmap_size = size;

        /*
         * Hierarchical domains use the domain lock of the root domain
         * (innermost domain).
         *
         * For non-hierarchical domains (as for root domains), the root
         * pointer is set to the domain itself so that &domain->root->mutex
         * always points to the right lock.
         */
        mutex_init(&domain->mutex);
        domain->root = domain;

        irq_domain_check_hierarchy(domain);

        return domain;
}

static void __irq_domain_publish(struct irq_domain *domain)
{
        mutex_lock(&irq_domain_mutex);
        debugfs_add_domain_dir(domain);
        list_add(&domain->link, &irq_domain_list);
        mutex_unlock(&irq_domain_mutex);

        pr_debug("Added domain %s\n", domain->name);
}

/**
 * __irq_domain_add() - Allocate a new irq_domain data structure
 * @fwnode: firmware node for the interrupt controller
 * @size: Size of linear map; 0 for radix mapping only
 * @hwirq_max: Maximum number of interrupts supported by controller
 * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
 *              direct mapping
 * @ops: domain callbacks
 * @host_data: Controller private data pointer
 *
 * Allocates and initializes an irq_domain structure.
 * Returns pointer to IRQ domain, or NULL on failure.
 */
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
                                    irq_hw_number_t hwirq_max, int direct_max,
                                    const struct irq_domain_ops *ops,
                                    void *host_data)
{
        struct irq_domain *domain;

        domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
                                     ops, host_data);
        if (domain)
                __irq_domain_publish(domain);

        return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);

/**
 * irq_domain_remove() - Remove an irq domain.
 * @domain: domain to remove
 *
 * This routine is used to remove an irq domain. The caller must ensure
 * that all mappings within the domain have been disposed of prior to
 * use, depending on the revmap type.
 */
void irq_domain_remove(struct irq_domain *domain)
{
        mutex_lock(&irq_domain_mutex);
        debugfs_remove_domain_dir(domain);

        WARN_ON(!radix_tree_empty(&domain->revmap_tree));

        list_del(&domain->link);

        /*
         * If the going away domain is the default one, reset it.
         */
        if (unlikely(irq_default_domain == domain))
                irq_set_default_host(NULL);

        mutex_unlock(&irq_domain_mutex);

        pr_debug("Removed domain %s\n", domain->name);

        fwnode_dev_initialized(domain->fwnode, false);
        fwnode_handle_put(domain->fwnode);
        if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
                kfree(domain->name);
        kfree(domain);
}
EXPORT_SYMBOL_GPL(irq_domain_remove);

void irq_domain_update_bus_token(struct irq_domain *domain,
                                 enum irq_domain_bus_token bus_token)
{
        char *name;

        if (domain->bus_token == bus_token)
                return;

        mutex_lock(&irq_domain_mutex);

        domain->bus_token = bus_token;

        name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token);
        if (!name) {
                mutex_unlock(&irq_domain_mutex);
                return;
        }

        debugfs_remove_domain_dir(domain);

        if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
                kfree(domain->name);
        else
                domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;

        domain->name = name;
        debugfs_add_domain_dir(domain);

        mutex_unlock(&irq_domain_mutex);
}
EXPORT_SYMBOL_GPL(irq_domain_update_bus_token);

/**
 * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs
 * @fwnode: firmware node for the interrupt controller
 * @size: total number of irqs in mapping
 * @first_irq: first number of irq block assigned to the domain,
 *        pass zero to assign irqs on-the-fly. If first_irq is non-zero, then
 *        pre-map all of the irqs in the domain to virqs starting at first_irq.
 * @ops: domain callbacks
 * @host_data: Controller private data pointer
 *
 * Allocates an irq_domain, and optionally if first_irq is positive then also
 * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq.
 *
 * This is intended to implement the expected behaviour for most
 * interrupt controllers. If device tree is used, then first_irq will be 0 and
 * irqs get mapped dynamically on the fly. However, if the controller requires
 * static virq assignments (non-DT boot) then it will set that up correctly.
 */
struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
                                            unsigned int size,
                                            unsigned int first_irq,
                                            const struct irq_domain_ops *ops,
                                            void *host_data)
{
        struct irq_domain *domain;

        domain = __irq_domain_add(fwnode, size, size, 0, ops, host_data);
        if (!domain)
                return NULL;

        if (first_irq > 0) {
                if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
                        /* attempt to allocated irq_descs */
                        int rc = irq_alloc_descs(first_irq, first_irq, size,
                                                 of_node_to_nid(to_of_node(fwnode)));
                        if (rc < 0)
                                pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
                                        first_irq);
                }
                irq_domain_associate_many(domain, first_irq, 0, size);
        }

        return domain;
}
EXPORT_SYMBOL_GPL(irq_domain_create_simple);

/**
 * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
 * @of_node: pointer to interrupt controller's device tree node.
 * @size: total number of irqs in legacy mapping
 * @first_irq: first number of irq block assigned to the domain
 * @first_hwirq: first hwirq number to use for the translation. Should normally
 *               be '0', but a positive integer can be used if the effective
 *               hwirqs numbering does not begin at zero.
 * @ops: map/unmap domain callbacks
 * @host_data: Controller private data pointer
 *
 * Note: the map() callback will be called before this function returns
 * for all legacy interrupts except 0 (which is always the invalid irq for
 * a legacy controller).
 */
struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
                                         unsigned int size,
                                         unsigned int first_irq,
                                         irq_hw_number_t first_hwirq,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return irq_domain_create_legacy(of_node_to_fwnode(of_node), size,
                                        first_irq, first_hwirq, ops, host_data);
}
EXPORT_SYMBOL_GPL(irq_domain_add_legacy);

struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
                                         unsigned int size,
                                         unsigned int first_irq,
                                         irq_hw_number_t first_hwirq,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        struct irq_domain *domain;

        domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data);
        if (domain)
                irq_domain_associate_many(domain, first_irq, first_hwirq, size);

        return domain;
}
EXPORT_SYMBOL_GPL(irq_domain_create_legacy);

/**
 * irq_find_matching_fwspec() - Locates a domain for a given fwspec
 * @fwspec: FW specifier for an interrupt
 * @bus_token: domain-specific data
 */
struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
                                            enum irq_domain_bus_token bus_token)
{
        struct irq_domain *h, *found = NULL;
        struct fwnode_handle *fwnode = fwspec->fwnode;
        int rc;

        /* We might want to match the legacy controller last since
         * it might potentially be set to match all interrupts in
         * the absence of a device node. This isn't a problem so far
         * yet though...
         *
         * bus_token == DOMAIN_BUS_ANY matches any domain, any other
         * values must generate an exact match for the domain to be
         * selected.
         */
        mutex_lock(&irq_domain_mutex);
        list_for_each_entry(h, &irq_domain_list, link) {
                if (h->ops->select && bus_token != DOMAIN_BUS_ANY)
                        rc = h->ops->select(h, fwspec, bus_token);
                else if (h->ops->match)
                        rc = h->ops->match(h, to_of_node(fwnode), bus_token);
                else
                        rc = ((fwnode != NULL) && (h->fwnode == fwnode) &&
                              ((bus_token == DOMAIN_BUS_ANY) ||
                               (h->bus_token == bus_token)));

                if (rc) {
                        found = h;
                        break;
                }
        }
        mutex_unlock(&irq_domain_mutex);
        return found;
}
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);

/**
 * irq_set_default_host() - Set a "default" irq domain
 * @domain: default domain pointer
 *
 * For convenience, it's possible to set a "default" domain that will be used
 * whenever NULL is passed to irq_create_mapping(). It makes life easier for
 * platforms that want to manipulate a few hard coded interrupt numbers that
 * aren't properly represented in the device-tree.
 */
void irq_set_default_host(struct irq_domain *domain)
{
        pr_debug("Default domain set to @0x%p\n", domain);

        irq_default_domain = domain;
}
EXPORT_SYMBOL_GPL(irq_set_default_host);

/**
 * irq_get_default_host() - Retrieve the "default" irq domain
 *
 * Returns: the default domain, if any.
 *
 * Modern code should never use this. This should only be used on
 * systems that cannot implement a firmware->fwnode mapping (which
 * both DT and ACPI provide).
 */
struct irq_domain *irq_get_default_host(void)
{
        return irq_default_domain;
}
EXPORT_SYMBOL_GPL(irq_get_default_host);

static bool irq_domain_is_nomap(struct irq_domain *domain)
{
        return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) &&
               (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP);
}

static void irq_domain_clear_mapping(struct irq_domain *domain,
                                     irq_hw_number_t hwirq)
{
        lockdep_assert_held(&domain->root->mutex);

        if (irq_domain_is_nomap(domain))
                return;

        if (hwirq < domain->revmap_size)
                rcu_assign_pointer(domain->revmap[hwirq], NULL);
        else
                radix_tree_delete(&domain->revmap_tree, hwirq);
}

static void irq_domain_set_mapping(struct irq_domain *domain,
                                   irq_hw_number_t hwirq,
                                   struct irq_data *irq_data)
{
        /*
         * This also makes sure that all domains point to the same root when
         * called from irq_domain_insert_irq() for each domain in a hierarchy.
         */
        lockdep_assert_held(&domain->root->mutex);

        if (irq_domain_is_nomap(domain))
                return;

        if (hwirq < domain->revmap_size)
                rcu_assign_pointer(domain->revmap[hwirq], irq_data);
        else
                radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
}

static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
{
        struct irq_data *irq_data = irq_get_irq_data(irq);
        irq_hw_number_t hwirq;

        if (WARN(!irq_data || irq_data->domain != domain,
                 "virq%i doesn't exist; cannot disassociate\n", irq))
                return;

        hwirq = irq_data->hwirq;

        mutex_lock(&domain->root->mutex);

        irq_set_status_flags(irq, IRQ_NOREQUEST);

        /* remove chip and handler */
        irq_set_chip_and_handler(irq, NULL, NULL);

        /* Make sure it's completed */
        synchronize_irq(irq);

        /* Tell the PIC about it */
        if (domain->ops->unmap)
                domain->ops->unmap(domain, irq);
        smp_mb();

        irq_data->domain = NULL;
        irq_data->hwirq = 0;
        domain->mapcount--;

        /* Clear reverse map for this hwirq */
        irq_domain_clear_mapping(domain, hwirq);

        mutex_unlock(&domain->root->mutex);
}

static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq,
                                       irq_hw_number_t hwirq)
{
        struct irq_data *irq_data = irq_get_irq_data(virq);
        int ret;

        if (WARN(hwirq >= domain->hwirq_max,
                 "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
                return -EINVAL;
        if (WARN(!irq_data, "error: virq%i is not allocated", virq))
                return -EINVAL;
        if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
                return -EINVAL;

        irq_data->hwirq = hwirq;
        irq_data->domain = domain;
        if (domain->ops->map) {
                ret = domain->ops->map(domain, virq, hwirq);
                if (ret != 0) {
                        /*
                         * If map() returns -EPERM, this interrupt is protected
                         * by the firmware or some other service and shall not
                         * be mapped. Don't bother telling the user about it.
                         */
                        if (ret != -EPERM) {
                                pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n",
                                       domain->name, hwirq, virq, ret);
                        }
                        irq_data->domain = NULL;
                        irq_data->hwirq = 0;
                        return ret;
                }
        }

        domain->mapcount++;
        irq_domain_set_mapping(domain, hwirq, irq_data);

        irq_clear_status_flags(virq, IRQ_NOREQUEST);

        return 0;
}

int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
                         irq_hw_number_t hwirq)
{
        int ret;

        mutex_lock(&domain->root->mutex);
        ret = irq_domain_associate_locked(domain, virq, hwirq);
        mutex_unlock(&domain->root->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(irq_domain_associate);

void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
                               irq_hw_number_t hwirq_base, int count)
{
        struct device_node *of_node;
        int i;

        of_node = irq_domain_get_of_node(domain);
        pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__,
                of_node_full_name(of_node), irq_base, (int)hwirq_base, count);

        for (i = 0; i < count; i++)
                irq_domain_associate(domain, irq_base + i, hwirq_base + i);
}
EXPORT_SYMBOL_GPL(irq_domain_associate_many);

#ifdef CONFIG_IRQ_DOMAIN_NOMAP
/**
 * irq_create_direct_mapping() - Allocate an irq for direct mapping
 * @domain: domain to allocate the irq for or NULL for default domain
 *
 * This routine is used for irq controllers which can choose the hardware
 * interrupt numbers they generate. In such a case it's simplest to use
 * the linux irq as the hardware interrupt number. It still uses the linear
 * or radix tree to store the mapping, but the irq controller can optimize
 * the revmap path by using the hwirq directly.
 */
unsigned int irq_create_direct_mapping(struct irq_domain *domain)
{
        struct device_node *of_node;
        unsigned int virq;

        if (domain == NULL)
                domain = irq_default_domain;

        of_node = irq_domain_get_of_node(domain);
        virq = irq_alloc_desc_from(1, of_node_to_nid(of_node));
        if (!virq) {
                pr_debug("create_direct virq allocation failed\n");
                return 0;
        }
        if (virq >= domain->hwirq_max) {
                pr_err("ERROR: no free irqs available below %lu maximum\n",
                        domain->hwirq_max);
                irq_free_desc(virq);
                return 0;
        }
        pr_debug("create_direct obtained virq %d\n", virq);

        if (irq_domain_associate(domain, virq, virq)) {
                irq_free_desc(virq);
                return 0;
        }

        return virq;
}
EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
#endif

static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
                                                       irq_hw_number_t hwirq,
                                                       const struct irq_affinity_desc *affinity)
{
        struct device_node *of_node = irq_domain_get_of_node(domain);
        int virq;

        pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);

        /* Allocate a virtual interrupt number */
        virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
                                      affinity);
        if (virq <= 0) {
                pr_debug("-> virq allocation failed\n");
                return 0;
        }

        if (irq_domain_associate_locked(domain, virq, hwirq)) {
                irq_free_desc(virq);
                return 0;
        }

        pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
                hwirq, of_node_full_name(of_node), virq);

        return virq;
}

/**
 * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
 * @domain: domain owning this hardware interrupt or NULL for default domain
 * @hwirq: hardware irq number in that domain space
 * @affinity: irq affinity
 *
 * Only one mapping per hardware interrupt is permitted. Returns a linux
 * irq number.
 * If the sense/trigger is to be specified, set_irq_type() should be called
 * on the number returned from that call.
 */
unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
                                         irq_hw_number_t hwirq,
                                         const struct irq_affinity_desc *affinity)
{
        int virq;

        /* Look for default domain if necessary */
        if (domain == NULL)
                domain = irq_default_domain;
        if (domain == NULL) {
                WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
                return 0;
        }

        mutex_lock(&domain->root->mutex);

        /* Check if mapping already exists */
        virq = irq_find_mapping(domain, hwirq);
        if (virq) {
                pr_debug("existing mapping on virq %d\n", virq);
                goto out;
        }

        virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
out:
        mutex_unlock(&domain->root->mutex);

        return virq;
}
EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);

static int irq_domain_translate(struct irq_domain *d,
                                struct irq_fwspec *fwspec,
                                irq_hw_number_t *hwirq, unsigned int *type)
{
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
        if (d->ops->translate)
                return d->ops->translate(d, fwspec, hwirq, type);
#endif
        if (d->ops->xlate)
                return d->ops->xlate(d, to_of_node(fwspec->fwnode),
                                     fwspec->param, fwspec->param_count,
                                     hwirq, type);

        /* If domain has no translation, then we assume interrupt line */
        *hwirq = fwspec->param[0];
        return 0;
}

void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
                               unsigned int count, struct irq_fwspec *fwspec)
{
        int i;

        fwspec->fwnode = of_node_to_fwnode(np);
        fwspec->param_count = count;

        for (i = 0; i < count; i++)
                fwspec->param[i] = args[i];
}
EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec);

unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
{
        struct irq_domain *domain;
        struct irq_data *irq_data;
        irq_hw_number_t hwirq;
        unsigned int type = IRQ_TYPE_NONE;
        int virq;

        if (fwspec->fwnode) {
                domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
                if (!domain)
                        domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_ANY);
        } else {
                domain = irq_default_domain;
        }

        if (!domain) {
                pr_warn("no irq domain found for %s !\n",
                        of_node_full_name(to_of_node(fwspec->fwnode)));
                return 0;
        }

        if (irq_domain_translate(domain, fwspec, &hwirq, &type))
                return 0;

        /*
         * WARN if the irqchip returns a type with bits
         * outside the sense mask set and clear these bits.
         */
        if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
                type &= IRQ_TYPE_SENSE_MASK;

        mutex_lock(&domain->root->mutex);

        /*
         * If we've already configured this interrupt,
         * don't do it again, or hell will break loose.
         */
        virq = irq_find_mapping(domain, hwirq);
        if (virq) {
                /*
                 * If the trigger type is not specified or matches the
                 * current trigger type then we are done so return the
                 * interrupt number.
                 */
                if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
                        goto out;

                /*
                 * If the trigger type has not been set yet, then set
                 * it now and return the interrupt number.
                 */
                if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
                        irq_data = irq_get_irq_data(virq);
                        if (!irq_data) {
                                virq = 0;
                                goto out;
                        }

                        irqd_set_trigger_type(irq_data, type);
                        goto out;
                }

                pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
                        hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
                virq = 0;
                goto out;
        }

        if (irq_domain_is_hierarchy(domain)) {
                if (irq_domain_is_msi_device(domain)) {
                        mutex_unlock(&domain->root->mutex);
                        virq = msi_device_domain_alloc_wired(domain, hwirq, type);
                        mutex_lock(&domain->root->mutex);
                } else
                        virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
                                                            fwspec, false, NULL);
                if (virq <= 0) {
                        virq = 0;
                        goto out;
                }
        } else {
                /* Create mapping */
                virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
                if (!virq)
                        goto out;
        }

        irq_data = irq_get_irq_data(virq);
        if (WARN_ON(!irq_data)) {
                virq = 0;
                goto out;
        }

        /* Store trigger type */
        irqd_set_trigger_type(irq_data, type);
out:
        mutex_unlock(&domain->root->mutex);

        return virq;
}
EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);

unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
{
        struct irq_fwspec fwspec;

        of_phandle_args_to_fwspec(irq_data->np, irq_data->args,
                                  irq_data->args_count, &fwspec);

        return irq_create_fwspec_mapping(&fwspec);
}
EXPORT_SYMBOL_GPL(irq_create_of_mapping);

/**
 * irq_dispose_mapping() - Unmap an interrupt
 * @virq: linux irq number of the interrupt to unmap
 */
void irq_dispose_mapping(unsigned int virq)
{
        struct irq_data *irq_data = irq_get_irq_data(virq);
        struct irq_domain *domain;

        if (!virq || !irq_data)
                return;

        domain = irq_data->domain;
        if (WARN_ON(domain == NULL))
                return;

        if (irq_domain_is_hierarchy(domain)) {
                irq_domain_free_one_irq(domain, virq);
        } else {
                irq_domain_disassociate(domain, virq);
                irq_free_desc(virq);
        }
}
EXPORT_SYMBOL_GPL(irq_dispose_mapping);

/**
 * __irq_resolve_mapping() - Find a linux irq from a hw irq number.
 * @domain: domain owning this hardware interrupt
 * @hwirq: hardware irq number in that domain space
 * @irq: optional pointer to return the Linux irq if required
 *
 * Returns the interrupt descriptor.
 */
struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
                                       irq_hw_number_t hwirq,
                                       unsigned int *irq)
{
        struct irq_desc *desc = NULL;
        struct irq_data *data;

        /* Look for default domain if necessary */
        if (domain == NULL)
                domain = irq_default_domain;
        if (domain == NULL)
                return desc;

        if (irq_domain_is_nomap(domain)) {
                if (hwirq < domain->hwirq_max) {
                        data = irq_domain_get_irq_data(domain, hwirq);
                        if (data && data->hwirq == hwirq)
                                desc = irq_data_to_desc(data);
                        if (irq && desc)
                                *irq = hwirq;
                }

                return desc;
        }

        rcu_read_lock();
        /* Check if the hwirq is in the linear revmap. */
        if (hwirq < domain->revmap_size)
                data = rcu_dereference(domain->revmap[hwirq]);
        else
                data = radix_tree_lookup(&domain->revmap_tree, hwirq);

        if (likely(data)) {
                desc = irq_data_to_desc(data);
                if (irq)
                        *irq = data->irq;
        }

        rcu_read_unlock();
        return desc;
}
EXPORT_SYMBOL_GPL(__irq_resolve_mapping);

/**
 * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
 *
 * Device Tree IRQ specifier translation function which works with one cell
 * bindings where the cell value maps directly to the hwirq number.
 */
int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr,
                             const u32 *intspec, unsigned int intsize,
                             unsigned long *out_hwirq, unsigned int *out_type)
{
        if (WARN_ON(intsize < 1))
                return -EINVAL;
        *out_hwirq = intspec[0];
        *out_type = IRQ_TYPE_NONE;
        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell);

/**
 * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings
 *
 * Device Tree IRQ specifier translation function which works with two cell
 * bindings where the cell values map directly to the hwirq number
 * and linux irq flags.
 */
int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
                        const u32 *intspec, unsigned int intsize,
                        irq_hw_number_t *out_hwirq, unsigned int *out_type)
{
        struct irq_fwspec fwspec;

        of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec);
        return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type);
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);

/**
 * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings
 *
 * Device Tree IRQ specifier translation function which works with either one
 * or two cell bindings where the cell values map directly to the hwirq number
 * and linux irq flags.
 *
 * Note: don't use this function unless your interrupt controller explicitly
 * supports both one and two cell bindings.  For the majority of controllers
 * the _onecell() or _twocell() variants above should be used.
 */
int irq_domain_xlate_onetwocell(struct irq_domain *d,
                                struct device_node *ctrlr,
                                const u32 *intspec, unsigned int intsize,
                                unsigned long *out_hwirq, unsigned int *out_type)
{
        if (WARN_ON(intsize < 1))
                return -EINVAL;
        *out_hwirq = intspec[0];
        if (intsize > 1)
                *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
        else
                *out_type = IRQ_TYPE_NONE;
        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);

const struct irq_domain_ops irq_domain_simple_ops = {
        .xlate = irq_domain_xlate_onetwocell,
};
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);

/**
 * irq_domain_translate_onecell() - Generic translate for direct one cell
 * bindings
 */
int irq_domain_translate_onecell(struct irq_domain *d,
                                 struct irq_fwspec *fwspec,
                                 unsigned long *out_hwirq,
                                 unsigned int *out_type)
{
        if (WARN_ON(fwspec->param_count < 1))
                return -EINVAL;
        *out_hwirq = fwspec->param[0];
        *out_type = IRQ_TYPE_NONE;
        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_translate_onecell);

/**
 * irq_domain_translate_twocell() - Generic translate for direct two cell
 * bindings
 *
 * Device Tree IRQ specifier translation function which works with two cell
 * bindings where the cell values map directly to the hwirq number
 * and linux irq flags.
 */
int irq_domain_translate_twocell(struct irq_domain *d,
                                 struct irq_fwspec *fwspec,
                                 unsigned long *out_hwirq,
                                 unsigned int *out_type)
{
        if (WARN_ON(fwspec->param_count < 2))
                return -EINVAL;
        *out_hwirq = fwspec->param[0];
        *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_translate_twocell);

int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
                           int node, const struct irq_affinity_desc *affinity)
{
        unsigned int hint;

        if (virq >= 0) {
                virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE,
                                         affinity);
        } else {
                hint = hwirq % nr_irqs;
                if (hint == 0)
                        hint++;
                virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE,
                                         affinity);
                if (virq <= 0 && hint > 1) {
                        virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE,
                                                 affinity);
                }
        }

        return virq;
}

/**
 * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
 * @irq_data:        The pointer to irq_data
 */
void irq_domain_reset_irq_data(struct irq_data *irq_data)
{
        irq_data->hwirq = 0;
        irq_data->chip = &no_irq_chip;
        irq_data->chip_data = NULL;
}
EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);

#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
/**
 * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
 * @parent:        Parent irq domain to associate with the new domain
 * @flags:        Irq domain flags associated to the domain
 * @size:        Size of the domain. See below
 * @fwnode:        Optional fwnode of the interrupt controller
 * @ops:        Pointer to the interrupt domain callbacks
 * @host_data:        Controller private data pointer
 *
 * If @size is 0 a tree domain is created, otherwise a linear domain.
 *
 * If successful the parent is associated to the new domain and the
 * domain flags are set.
 * Returns pointer to IRQ domain, or NULL on failure.
 */
struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
                                            unsigned int flags,
                                            unsigned int size,
                                            struct fwnode_handle *fwnode,
                                            const struct irq_domain_ops *ops,
                                            void *host_data)
{
        struct irq_domain *domain;

        if (size)
                domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
        else
                domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);

        if (domain) {
                if (parent)
                        domain->root = parent->root;
                domain->parent = parent;
                domain->flags |= flags;

                __irq_domain_publish(domain);
        }

        return domain;
}
EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);

static void irq_domain_insert_irq(int virq)
{
        struct irq_data *data;

        for (data = irq_get_irq_data(virq); data; data = data->parent_data) {
                struct irq_domain *domain = data->domain;

                domain->mapcount++;
                irq_domain_set_mapping(domain, data->hwirq, data);
        }

        irq_clear_status_flags(virq, IRQ_NOREQUEST);
}

static void irq_domain_remove_irq(int virq)
{
        struct irq_data *data;

        irq_set_status_flags(virq, IRQ_NOREQUEST);
        irq_set_chip_and_handler(virq, NULL, NULL);
        synchronize_irq(virq);
        smp_mb();

        for (data = irq_get_irq_data(virq); data; data = data->parent_data) {
                struct irq_domain *domain = data->domain;
                irq_hw_number_t hwirq = data->hwirq;

                domain->mapcount--;
                irq_domain_clear_mapping(domain, hwirq);
        }
}

static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
                                                   struct irq_data *child)
{
        struct irq_data *irq_data;

        irq_data = kzalloc_node(sizeof(*irq_data), GFP_KERNEL,
                                irq_data_get_node(child));
        if (irq_data) {
                child->parent_data = irq_data;
                irq_data->irq = child->irq;
                irq_data->common = child->common;
                irq_data->domain = domain;
        }

        return irq_data;
}

static void __irq_domain_free_hierarchy(struct irq_data *irq_data)
{
        struct irq_data *tmp;

        while (irq_data) {
                tmp = irq_data;
                irq_data = irq_data->parent_data;
                kfree(tmp);
        }
}

static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs)
{
        struct irq_data *irq_data, *tmp;
        int i;

        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_get_irq_data(virq + i);
                tmp = irq_data->parent_data;
                irq_data->parent_data = NULL;
                irq_data->domain = NULL;

                __irq_domain_free_hierarchy(tmp);
        }
}

/**
 * irq_domain_disconnect_hierarchy - Mark the first unused level of a hierarchy
 * @domain:        IRQ domain from which the hierarchy is to be disconnected
 * @virq:        IRQ number where the hierarchy is to be trimmed
 *
 * Marks the @virq level belonging to @domain as disconnected.
 * Returns -EINVAL if @virq doesn't have a valid irq_data pointing
 * to @domain.
 *
 * Its only use is to be able to trim levels of hierarchy that do not
 * have any real meaning for this interrupt, and that the driver marks
 * as such from its .alloc() callback.
 */
int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
                                    unsigned int virq)
{
        struct irq_data *irqd;

        irqd = irq_domain_get_irq_data(domain, virq);
        if (!irqd)
                return -EINVAL;

        irqd->chip = ERR_PTR(-ENOTCONN);
        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy);

static int irq_domain_trim_hierarchy(unsigned int virq)
{
        struct irq_data *tail, *irqd, *irq_data;

        irq_data = irq_get_irq_data(virq);
        tail = NULL;

        /* The first entry must have a valid irqchip */
        if (!irq_data->chip || IS_ERR(irq_data->chip))
                return -EINVAL;

        /*
         * Validate that the irq_data chain is sane in the presence of
         * a hierarchy trimming marker.
         */
        for (irqd = irq_data->parent_data; irqd; irq_data = irqd, irqd = irqd->parent_data) {
                /* Can't have a valid irqchip after a trim marker */
                if (irqd->chip && tail)
                        return -EINVAL;

                /* Can't have an empty irqchip before a trim marker */
                if (!irqd->chip && !tail)
                        return -EINVAL;

                if (IS_ERR(irqd->chip)) {
                        /* Only -ENOTCONN is a valid trim marker */
                        if (PTR_ERR(irqd->chip) != -ENOTCONN)
                                return -EINVAL;

                        tail = irq_data;
                }
        }

        /* No trim marker, nothing to do */
        if (!tail)
                return 0;

        pr_info("IRQ%d: trimming hierarchy from %s\n",
                virq, tail->parent_data->domain->name);

        /* Sever the inner part of the hierarchy...  */
        irqd = tail;
        tail = tail->parent_data;
        irqd->parent_data = NULL;
        __irq_domain_free_hierarchy(tail);

        return 0;
}

static int irq_domain_alloc_irq_data(struct irq_domain *domain,
                                     unsigned int virq, unsigned int nr_irqs)
{
        struct irq_data *irq_data;
        struct irq_domain *parent;
        int i;

        /* The outermost irq_data is embedded in struct irq_desc */
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_get_irq_data(virq + i);
                irq_data->domain = domain;

                for (parent = domain->parent; parent; parent = parent->parent) {
                        irq_data = irq_domain_insert_irq_data(parent, irq_data);
                        if (!irq_data) {
                                irq_domain_free_irq_data(virq, i + 1);
                                return -ENOMEM;
                        }
                }
        }

        return 0;
}

/**
 * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
 * @domain:        domain to match
 * @virq:        IRQ number to get irq_data
 */
struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
                                         unsigned int virq)
{
        struct irq_data *irq_data;

        for (irq_data = irq_get_irq_data(virq); irq_data;
             irq_data = irq_data->parent_data)
                if (irq_data->domain == domain)
                        return irq_data;

        return NULL;
}
EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);

/**
 * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
 * @domain:        Interrupt domain to match
 * @virq:        IRQ number
 * @hwirq:        The hwirq number
 * @chip:        The associated interrupt chip
 * @chip_data:        The associated chip data
 */
int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
                                  irq_hw_number_t hwirq,
                                  const struct irq_chip *chip,
                                  void *chip_data)
{
        struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);

        if (!irq_data)
                return -ENOENT;

        irq_data->hwirq = hwirq;
        irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
        irq_data->chip_data = chip_data;

        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);

/**
 * irq_domain_set_info - Set the complete data for a @virq in @domain
 * @domain:                Interrupt domain to match
 * @virq:                IRQ number
 * @hwirq:                The hardware interrupt number
 * @chip:                The associated interrupt chip
 * @chip_data:                The associated interrupt chip data
 * @handler:                The interrupt flow handler
 * @handler_data:        The interrupt flow handler data
 * @handler_name:        The interrupt handler name
 */
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
                         irq_hw_number_t hwirq, const struct irq_chip *chip,
                         void *chip_data, irq_flow_handler_t handler,
                         void *handler_data, const char *handler_name)
{
        irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip, chip_data);
        __irq_set_handler(virq, handler, 0, handler_name);
        irq_set_handler_data(virq, handler_data);
}
EXPORT_SYMBOL(irq_domain_set_info);

/**
 * irq_domain_free_irqs_common - Clear irq_data and free the parent
 * @domain:        Interrupt domain to match
 * @virq:        IRQ number to start with
 * @nr_irqs:        The number of irqs to free
 */
void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq,
                                 unsigned int nr_irqs)
{
        struct irq_data *irq_data;
        int i;

        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(domain, virq + i);
                if (irq_data)
                        irq_domain_reset_irq_data(irq_data);
        }
        irq_domain_free_irqs_parent(domain, virq, nr_irqs);
}
EXPORT_SYMBOL_GPL(irq_domain_free_irqs_common);

/**
 * irq_domain_free_irqs_top - Clear handler and handler data, clear irqdata and free parent
 * @domain:        Interrupt domain to match
 * @virq:        IRQ number to start with
 * @nr_irqs:        The number of irqs to free
 */
void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
                              unsigned int nr_irqs)
{
        int i;

        for (i = 0; i < nr_irqs; i++) {
                irq_set_handler_data(virq + i, NULL);
                irq_set_handler(virq + i, NULL);
        }
        irq_domain_free_irqs_common(domain, virq, nr_irqs);
}

static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain,
                                           unsigned int irq_base,
                                           unsigned int nr_irqs)
{
        unsigned int i;

        if (!domain->ops->free)
                return;

        for (i = 0; i < nr_irqs; i++) {
                if (irq_domain_get_irq_data(domain, irq_base + i))
                        domain->ops->free(domain, irq_base + i, 1);
        }
}

int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
                                    unsigned int irq_base,
                                    unsigned int nr_irqs, void *arg)
{
        if (!domain->ops->alloc) {
                pr_debug("domain->ops->alloc() is NULL\n");
                return -ENOSYS;
        }

        return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}

static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
                                        unsigned int nr_irqs, int node, void *arg,
                                        bool realloc, const struct irq_affinity_desc *affinity)
{
        int i, ret, virq;

        if (realloc && irq_base >= 0) {
                virq = irq_base;
        } else {
                virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
                                              affinity);
                if (virq < 0) {
                        pr_debug("cannot allocate IRQ(base %d, count %d)\n",
                                 irq_base, nr_irqs);
                        return virq;
                }
        }

        if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
                pr_debug("cannot allocate memory for IRQ%d\n", virq);
                ret = -ENOMEM;
                goto out_free_desc;
        }

        ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
        if (ret < 0)
                goto out_free_irq_data;

        for (i = 0; i < nr_irqs; i++) {
                ret = irq_domain_trim_hierarchy(virq + i);
                if (ret)
                        goto out_free_irq_data;
        }

        for (i = 0; i < nr_irqs; i++)
                irq_domain_insert_irq(virq + i);

        return virq;

out_free_irq_data:
        irq_domain_free_irq_data(virq, nr_irqs);
out_free_desc:
        irq_free_descs(virq, nr_irqs);
        return ret;
}

/**
 * __irq_domain_alloc_irqs - Allocate IRQs from domain
 * @domain:        domain to allocate from
 * @irq_base:        allocate specified IRQ number if irq_base >= 0
 * @nr_irqs:        number of IRQs to allocate
 * @node:        NUMA node id for memory allocation
 * @arg:        domain specific argument
 * @realloc:        IRQ descriptors have already been allocated if true
 * @affinity:        Optional irq affinity mask for multiqueue devices
 *
 * Allocate IRQ numbers and initialized all data structures to support
 * hierarchy IRQ domains.
 * Parameter @realloc is mainly to support legacy IRQs.
 * Returns error code or allocated IRQ number
 *
 * The whole process to setup an IRQ has been split into two steps.
 * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
 * descriptor and required hardware resources. The second step,
 * irq_domain_activate_irq(), is to program the hardware with preallocated
 * resources. In this way, it's easier to rollback when failing to
 * allocate resources.
 */
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
                            unsigned int nr_irqs, int node, void *arg,
                            bool realloc, const struct irq_affinity_desc *affinity)
{
        int ret;

        if (domain == NULL) {
                domain = irq_default_domain;
                if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
                        return -EINVAL;
        }

        mutex_lock(&domain->root->mutex);
        ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
                                           realloc, affinity);
        mutex_unlock(&domain->root->mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);

/* The irq_data was moved, fix the revmap to refer to the new location */
static void irq_domain_fix_revmap(struct irq_data *d)
{
        void __rcu **slot;

        lockdep_assert_held(&d->domain->root->mutex);

        if (irq_domain_is_nomap(d->domain))
                return;

        /* Fix up the revmap. */
        if (d->hwirq < d->domain->revmap_size) {
                /* Not using radix tree */
                rcu_assign_pointer(d->domain->revmap[d->hwirq], d);
        } else {
                slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
                if (slot)
                        radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
        }
}

/**
 * irq_domain_push_irq() - Push a domain in to the top of a hierarchy.
 * @domain:        Domain to push.
 * @virq:        Irq to push the domain in to.
 * @arg:        Passed to the irq_domain_ops alloc() function.
 *
 * For an already existing irqdomain hierarchy, as might be obtained
 * via a call to pci_enable_msix(), add an additional domain to the
 * head of the processing chain.  Must be called before request_irq()
 * has been called.
 */
int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
{
        struct irq_data *irq_data = irq_get_irq_data(virq);
        struct irq_data *parent_irq_data;
        struct irq_desc *desc;
        int rv = 0;

        /*
         * Check that no action has been set, which indicates the virq
         * is in a state where this function doesn't have to deal with
         * races between interrupt handling and maintaining the
         * hierarchy.  This will catch gross misuse.  Attempting to
         * make the check race free would require holding locks across
         * calls to struct irq_domain_ops->alloc(), which could lead
         * to deadlock, so we just do a simple check before starting.
         */
        desc = irq_to_desc(virq);
        if (!desc)
                return -EINVAL;
        if (WARN_ON(desc->action))
                return -EBUSY;

        if (domain == NULL)
                return -EINVAL;

        if (WARN_ON(!irq_domain_is_hierarchy(domain)))
                return -EINVAL;

        if (!irq_data)
                return -EINVAL;

        if (domain->parent != irq_data->domain)
                return -EINVAL;

        parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL,
                                       irq_data_get_node(irq_data));
        if (!parent_irq_data)
                return -ENOMEM;

        mutex_lock(&domain->root->mutex);

        /* Copy the original irq_data. */
        *parent_irq_data = *irq_data;

        /*
         * Overwrite the irq_data, which is embedded in struct irq_desc, with
         * values for this domain.
         */
        irq_data->parent_data = parent_irq_data;
        irq_data->domain = domain;
        irq_data->mask = 0;
        irq_data->hwirq = 0;
        irq_data->chip = NULL;
        irq_data->chip_data = NULL;

        /* May (probably does) set hwirq, chip, etc. */
        rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
        if (rv) {
                /* Restore the original irq_data. */
                *irq_data = *parent_irq_data;
                kfree(parent_irq_data);
                goto error;
        }

        irq_domain_fix_revmap(parent_irq_data);
        irq_domain_set_mapping(domain, irq_data->hwirq, irq_data);
error:
        mutex_unlock(&domain->root->mutex);

        return rv;
}
EXPORT_SYMBOL_GPL(irq_domain_push_irq);

/**
 * irq_domain_pop_irq() - Remove a domain from the top of a hierarchy.
 * @domain:        Domain to remove.
 * @virq:        Irq to remove the domain from.
 *
 * Undo the effects of a call to irq_domain_push_irq().  Must be
 * called either before request_irq() or after free_irq().
 */
int irq_domain_pop_irq(struct irq_domain *domain, int virq)
{
        struct irq_data *irq_data = irq_get_irq_data(virq);
        struct irq_data *parent_irq_data;
        struct irq_data *tmp_irq_data;
        struct irq_desc *desc;

        /*
         * Check that no action is set, which indicates the virq is in
         * a state where this function doesn't have to deal with races
         * between interrupt handling and maintaining the hierarchy.
         * This will catch gross misuse.  Attempting to make the check
         * race free would require holding locks across calls to
         * struct irq_domain_ops->free(), which could lead to
         * deadlock, so we just do a simple check before starting.
         */
        desc = irq_to_desc(virq);
        if (!desc)
                return -EINVAL;
        if (WARN_ON(desc->action))
                return -EBUSY;

        if (domain == NULL)
                return -EINVAL;

        if (!irq_data)
                return -EINVAL;

        tmp_irq_data = irq_domain_get_irq_data(domain, virq);

        /* We can only "pop" if this domain is at the top of the list */
        if (WARN_ON(irq_data != tmp_irq_data))
                return -EINVAL;

        if (WARN_ON(irq_data->domain != domain))
                return -EINVAL;

        parent_irq_data = irq_data->parent_data;
        if (WARN_ON(!parent_irq_data))
                return -EINVAL;

        mutex_lock(&domain->root->mutex);

        irq_data->parent_data = NULL;

        irq_domain_clear_mapping(domain, irq_data->hwirq);
        irq_domain_free_irqs_hierarchy(domain, virq, 1);

        /* Restore the original irq_data. */
        *irq_data = *parent_irq_data;

        irq_domain_fix_revmap(irq_data);

        mutex_unlock(&domain->root->mutex);

        kfree(parent_irq_data);

        return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_pop_irq);

/**
 * irq_domain_free_irqs - Free IRQ number and associated data structures
 * @virq:        base IRQ number
 * @nr_irqs:        number of IRQs to free
 */
void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
{
        struct irq_data *data = irq_get_irq_data(virq);
        struct irq_domain *domain;
        int i;

        if (WARN(!data || !data->domain || !data->domain->ops->free,
                 "NULL pointer, cannot free irq\n"))
                return;

        domain = data->domain;

        mutex_lock(&domain->root->mutex);
        for (i = 0; i < nr_irqs; i++)
                irq_domain_remove_irq(virq + i);
        irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs);
        mutex_unlock(&domain->root->mutex);

        irq_domain_free_irq_data(virq, nr_irqs);
        irq_free_descs(virq, nr_irqs);
}

static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq)
{
        if (irq_domain_is_msi_device(domain))
                msi_device_domain_free_wired(domain, virq);
        else
                irq_domain_free_irqs(virq, 1);
}

/**
 * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain
 * @domain:        Domain below which interrupts must be allocated
 * @irq_base:        Base IRQ number
 * @nr_irqs:        Number of IRQs to allocate
 * @arg:        Allocation data (arch/domain specific)
 */
int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
                                 unsigned int irq_base, unsigned int nr_irqs,
                                 void *arg)
{
        if (!domain->parent)
                return -ENOSYS;

        return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base,
                                               nr_irqs, arg);
}
EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);

/**
 * irq_domain_free_irqs_parent - Free interrupts from parent domain
 * @domain:        Domain below which interrupts must be freed
 * @irq_base:        Base IRQ number
 * @nr_irqs:        Number of IRQs to free
 */
void irq_domain_free_irqs_parent(struct irq_domain *domain,
                                 unsigned int irq_base, unsigned int nr_irqs)
{
        if (!domain->parent)
                return;

        irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs);
}
EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);

static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
{
        if (irq_data && irq_data->domain) {
                struct irq_domain *domain = irq_data->domain;

                if (domain->ops->deactivate)
                        domain->ops->deactivate(domain, irq_data);
                if (irq_data->parent_data)
                        __irq_domain_deactivate_irq(irq_data->parent_data);
        }
}

static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
{
        int ret = 0;

        if (irqd && irqd->domain) {
                struct irq_domain *domain = irqd->domain;

                if (irqd->parent_data)
                        ret = __irq_domain_activate_irq(irqd->parent_data,
                                                        reserve);
                if (!ret && domain->ops->activate) {
                        ret = domain->ops->activate(domain, irqd, reserve);
                        /* Rollback in case of error */
                        if (ret && irqd->parent_data)
                                __irq_domain_deactivate_irq(irqd->parent_data);
                }
        }
        return ret;
}

/**
 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
 *                             interrupt
 * @irq_data:        Outermost irq_data associated with interrupt
 * @reserve:        If set only reserve an interrupt vector instead of assigning one
 *
 * This is the second step to call domain_ops->activate to program interrupt
 * controllers, so the interrupt could actually get delivered.
 */
int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
{
        int ret = 0;

        if (!irqd_is_activated(irq_data))
                ret = __irq_domain_activate_irq(irq_data, reserve);
        if (!ret)
                irqd_set_activated(irq_data);
        return ret;
}

/**
 * irq_domain_deactivate_irq - Call domain_ops->deactivate recursively to
 *                               deactivate interrupt
 * @irq_data: outermost irq_data associated with interrupt
 *
 * It calls domain_ops->deactivate to program interrupt controllers to disable
 * interrupt delivery.
 */
void irq_domain_deactivate_irq(struct irq_data *irq_data)
{
        if (irqd_is_activated(irq_data)) {
                __irq_domain_deactivate_irq(irq_data);
                irqd_clr_activated(irq_data);
        }
}

static void irq_domain_check_hierarchy(struct irq_domain *domain)
{
        /* Hierarchy irq_domains must implement callback alloc() */
        if (domain->ops->alloc)
                domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
}
#else        /* CONFIG_IRQ_DOMAIN_HIERARCHY */
/**
 * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
 * @domain:        domain to match
 * @virq:        IRQ number to get irq_data
 */
struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
                                         unsigned int virq)
{
        struct irq_data *irq_data = irq_get_irq_data(virq);

        return (irq_data && irq_data->domain == domain) ? irq_data : NULL;
}
EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);

/**
 * irq_domain_set_info - Set the complete data for a @virq in @domain
 * @domain:                Interrupt domain to match
 * @virq:                IRQ number
 * @hwirq:                The hardware interrupt number
 * @chip:                The associated interrupt chip
 * @chip_data:                The associated interrupt chip data
 * @handler:                The interrupt flow handler
 * @handler_data:        The interrupt flow handler data
 * @handler_name:        The interrupt handler name
 */
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
                         irq_hw_number_t hwirq, const struct irq_chip *chip,
                         void *chip_data, irq_flow_handler_t handler,
                         void *handler_data, const char *handler_name)
{
        irq_set_chip_and_handler_name(virq, chip, handler, handler_name);
        irq_set_chip_data(virq, chip_data);
        irq_set_handler_data(virq, handler_data);
}

static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
                                        unsigned int nr_irqs, int node, void *arg,
                                        bool realloc, const struct irq_affinity_desc *affinity)
{
        return -EINVAL;
}

static void irq_domain_check_hierarchy(struct irq_domain *domain) { }
static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { }

#endif        /* CONFIG_IRQ_DOMAIN_HIERARCHY */

#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
#include "internals.h"

static struct dentry *domain_dir;

static void
irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
{
        seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
        seq_printf(m, "%*ssize:   %u\n", ind + 1, "", d->revmap_size);
        seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
        seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
        if (d->ops && d->ops->debug_show)
                d->ops->debug_show(m, d, NULL, ind + 1);
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
        if (!d->parent)
                return;
        seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name);
        irq_domain_debug_show_one(m, d->parent, ind + 4);
#endif
}

static int irq_domain_debug_show(struct seq_file *m, void *p)
{
        struct irq_domain *d = m->private;

        /* Default domain? Might be NULL */
        if (!d) {
                if (!irq_default_domain)
                        return 0;
                d = irq_default_domain;
        }
        irq_domain_debug_show_one(m, d, 0);
        return 0;
}
DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);

static void debugfs_add_domain_dir(struct irq_domain *d)
{
        if (!d->name || !domain_dir)
                return;
        debugfs_create_file(d->name, 0444, domain_dir, d,
                            &irq_domain_debug_fops);
}

static void debugfs_remove_domain_dir(struct irq_domain *d)
{
        debugfs_lookup_and_remove(d->name, domain_dir);
}

void __init irq_domain_debugfs_init(struct dentry *root)
{
        struct irq_domain *d;

        domain_dir = debugfs_create_dir("domains", root);

        debugfs_create_file("default", 0444, domain_dir, NULL,
                            &irq_domain_debug_fops);
        mutex_lock(&irq_domain_mutex);
        list_for_each_entry(d, &irq_domain_list, link)
                debugfs_add_domain_dir(d);
        mutex_unlock(&irq_domain_mutex);
}
#endif






































































  244 

  244 








































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * fs/kernfs/kernfs-internal.h - kernfs internal header file
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de>
 */

#ifndef __KERNFS_INTERNAL_H
#define __KERNFS_INTERNAL_H

#include <linux/lockdep.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/xattr.h>

#include <linux/kernfs.h>
#include <linux/fs_context.h>

struct kernfs_iattrs {
        kuid_t                        ia_uid;
        kgid_t                        ia_gid;
        struct timespec64        ia_atime;
        struct timespec64        ia_mtime;
        struct timespec64        ia_ctime;

        struct simple_xattrs        xattrs;
        atomic_t                nr_user_xattrs;
        atomic_t                user_xattr_size;
};

struct kernfs_root {
        /* published fields */
        struct kernfs_node        *kn;
        unsigned int                flags;        /* KERNFS_ROOT_* flags */

        /* private fields, do not use outside kernfs proper */
        struct idr                ino_idr;
        u32                        last_id_lowbits;
        u32                        id_highbits;
        struct kernfs_syscall_ops *syscall_ops;

        /* list of kernfs_super_info of this root, protected by kernfs_rwsem */
        struct list_head        supers;

        wait_queue_head_t        deactivate_waitq;
        struct rw_semaphore        kernfs_rwsem;
        struct rw_semaphore        kernfs_iattr_rwsem;
        struct rw_semaphore        kernfs_supers_rwsem;

        struct rcu_head                rcu;
};

/* +1 to avoid triggering overflow warning when negating it */
#define KN_DEACTIVATED_BIAS                (INT_MIN + 1)

/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */

/**
 * kernfs_root - find out the kernfs_root a kernfs_node belongs to
 * @kn: kernfs_node of interest
 *
 * Return: the kernfs_root @kn belongs to.
 */
static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
{
        /* if parent exists, it's always a dir; otherwise, @sd is a dir */
        if (kn->parent)
                kn = kn->parent;
        return kn->dir.root;
}

/*
 * mount.c
 */
struct kernfs_super_info {
        struct super_block        *sb;

        /*
         * The root associated with this super_block.  Each super_block is
         * identified by the root and ns it's associated with.
         */
        struct kernfs_root        *root;

        /*
         * Each sb is associated with one namespace tag, currently the
         * network namespace of the task which mounted this kernfs
         * instance.  If multiple tags become necessary, make the following
         * an array and compare kernfs_node tag against every entry.
         */
        const void                *ns;

        /* anchored at kernfs_root->supers, protected by kernfs_rwsem */
        struct list_head        node;
};
#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))

static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
{
        if (d_really_is_negative(dentry))
                return NULL;
        return d_inode(dentry)->i_private;
}

static inline void kernfs_set_rev(struct kernfs_node *parent,
                                  struct dentry *dentry)
{
        dentry->d_time = parent->dir.rev;
}

static inline void kernfs_inc_rev(struct kernfs_node *parent)
{
        parent->dir.rev++;
}

static inline bool kernfs_dir_changed(struct kernfs_node *parent,
                                      struct dentry *dentry)
{
        if (parent->dir.rev != dentry->d_time)
                return true;
        return false;
}

extern const struct super_operations kernfs_sops;
extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;

/*
 * inode.c
 */
extern const struct xattr_handler * const kernfs_xattr_handlers[];
void kernfs_evict_inode(struct inode *inode);
int kernfs_iop_permission(struct mnt_idmap *idmap,
                          struct inode *inode, int mask);
int kernfs_iop_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                       struct iattr *iattr);
int kernfs_iop_getattr(struct mnt_idmap *idmap,
                       const struct path *path, struct kstat *stat,
                       u32 request_mask, unsigned int query_flags);
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);

/*
 * dir.c
 */
extern const struct dentry_operations kernfs_dops;
extern const struct file_operations kernfs_dir_fops;
extern const struct inode_operations kernfs_dir_iops;

struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
void kernfs_put_active(struct kernfs_node *kn);
int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
                                    const char *name, umode_t mode,
                                    kuid_t uid, kgid_t gid,
                                    unsigned flags);

/*
 * file.c
 */
extern const struct file_operations kernfs_file_fops;

bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);

/*
 * symlink.c
 */
extern const struct inode_operations kernfs_symlink_iops;

/*
 * kernfs locks
 */
extern struct kernfs_global_locks *kernfs_locks;
#endif        /* __KERNFS_INTERNAL_H */






























































  268 


















































  250 
  258 









  192 

  192 













































  268 

  268 
  267 

  258 




  267 
  268 























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
  Red Black Trees
  (C) 1999  Andrea Arcangeli <andrea@suse.de>
  

  linux/include/linux/rbtree.h

  To use rbtrees you'll have to implement your own insert and search cores.
  This will avoid us to use callbacks and to drop drammatically performances.
  I know it's not the cleaner way,  but in C (not in C++) to get
  performances and genericity...

  See Documentation/core-api/rbtree.rst for documentation and samples.
*/

#ifndef        _LINUX_RBTREE_H
#define        _LINUX_RBTREE_H

#include <linux/container_of.h>
#include <linux/rbtree_types.h>

#include <linux/stddef.h>
#include <linux/rcupdate.h>

#define rb_parent(r)   ((struct rb_node *)((r)->__rb_parent_color & ~3))

#define        rb_entry(ptr, type, member) container_of(ptr, type, member)

#define RB_EMPTY_ROOT(root)  (READ_ONCE((root)->rb_node) == NULL)

/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
#define RB_EMPTY_NODE(node)  \
        ((node)->__rb_parent_color == (unsigned long)(node))
#define RB_CLEAR_NODE(node)  \
        ((node)->__rb_parent_color = (unsigned long)(node))


extern void rb_insert_color(struct rb_node *, struct rb_root *);
extern void rb_erase(struct rb_node *, struct rb_root *);


/* Find logical next and previous nodes in a tree */
extern struct rb_node *rb_next(const struct rb_node *);
extern struct rb_node *rb_prev(const struct rb_node *);
extern struct rb_node *rb_first(const struct rb_root *);
extern struct rb_node *rb_last(const struct rb_root *);

/* Postorder iteration - always visit the parent after its children */
extern struct rb_node *rb_first_postorder(const struct rb_root *);
extern struct rb_node *rb_next_postorder(const struct rb_node *);

/* Fast replacement of a single node without remove/rebalance/add/rebalance */
extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
                            struct rb_root *root);
extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
                                struct rb_root *root);

static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
                                struct rb_node **rb_link)
{
        node->__rb_parent_color = (unsigned long)parent;
        node->rb_left = node->rb_right = NULL;

        *rb_link = node;
}

static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
                                    struct rb_node **rb_link)
{
        node->__rb_parent_color = (unsigned long)parent;
        node->rb_left = node->rb_right = NULL;

        rcu_assign_pointer(*rb_link, node);
}

#define rb_entry_safe(ptr, type, member) \
        ({ typeof(ptr) ____ptr = (ptr); \
           ____ptr ? rb_entry(____ptr, type, member) : NULL; \
        })

/**
 * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of
 * given type allowing the backing memory of @pos to be invalidated
 *
 * @pos:        the 'type *' to use as a loop cursor.
 * @n:                another 'type *' to use as temporary storage
 * @root:        'rb_root *' of the rbtree.
 * @field:        the name of the rb_node field within 'type'.
 *
 * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as
 * list_for_each_entry_safe() and allows the iteration to continue independent
 * of changes to @pos by the body of the loop.
 *
 * Note, however, that it cannot handle other modifications that re-order the
 * rbtree it is iterating over. This includes calling rb_erase() on @pos, as
 * rb_erase() may rebalance the tree, causing us to miss some nodes.
 */
#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
        for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \
             pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \
                        typeof(*pos), field); 1; }); \
             pos = n)

/* Same as rb_first(), but O(1) */
#define rb_first_cached(root) (root)->rb_leftmost

static inline void rb_insert_color_cached(struct rb_node *node,
                                          struct rb_root_cached *root,
                                          bool leftmost)
{
        if (leftmost)
                root->rb_leftmost = node;
        rb_insert_color(node, &root->rb_root);
}


static inline struct rb_node *
rb_erase_cached(struct rb_node *node, struct rb_root_cached *root)
{
        struct rb_node *leftmost = NULL;

        if (root->rb_leftmost == node)
                leftmost = root->rb_leftmost = rb_next(node);

        rb_erase(node, &root->rb_root);

        return leftmost;
}

static inline void rb_replace_node_cached(struct rb_node *victim,
                                          struct rb_node *new,
                                          struct rb_root_cached *root)
{
        if (root->rb_leftmost == victim)
                root->rb_leftmost = new;
        rb_replace_node(victim, new, &root->rb_root);
}

/*
 * The below helper functions use 2 operators with 3 different
 * calling conventions. The operators are related like:
 *
 *        comp(a->key,b) < 0  := less(a,b)
 *        comp(a->key,b) > 0  := less(b,a)
 *        comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
 *
 * If these operators define a partial order on the elements we make no
 * guarantee on which of the elements matching the key is found. See
 * rb_find().
 *
 * The reason for this is to allow the find() interface without requiring an
 * on-stack dummy object, which might not be feasible due to object size.
 */

/**
 * rb_add_cached() - insert @node into the leftmost cached tree @tree
 * @node: node to insert
 * @tree: leftmost cached tree to insert @node into
 * @less: operator defining the (partial) node order
 *
 * Returns @node when it is the new leftmost, or NULL.
 */
static __always_inline struct rb_node *
rb_add_cached(struct rb_node *node, struct rb_root_cached *tree,
              bool (*less)(struct rb_node *, const struct rb_node *))
{
        struct rb_node **link = &tree->rb_root.rb_node;
        struct rb_node *parent = NULL;
        bool leftmost = true;

        while (*link) {
                parent = *link;
                if (less(node, parent)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
                        leftmost = false;
                }
        }

        rb_link_node(node, parent, link);
        rb_insert_color_cached(node, tree, leftmost);

        return leftmost ? node : NULL;
}

/**
 * rb_add() - insert @node into @tree
 * @node: node to insert
 * @tree: tree to insert @node into
 * @less: operator defining the (partial) node order
 */
static __always_inline void
rb_add(struct rb_node *node, struct rb_root *tree,
       bool (*less)(struct rb_node *, const struct rb_node *))
{
        struct rb_node **link = &tree->rb_node;
        struct rb_node *parent = NULL;

        while (*link) {
                parent = *link;
                if (less(node, parent))
                        link = &parent->rb_left;
                else
                        link = &parent->rb_right;
        }

        rb_link_node(node, parent, link);
        rb_insert_color(node, tree);
}

/**
 * rb_find_add() - find equivalent @node in @tree, or add @node
 * @node: node to look-for / insert
 * @tree: tree to search / modify
 * @cmp: operator defining the node order
 *
 * Returns the rb_node matching @node, or NULL when no match is found and @node
 * is inserted.
 */
static __always_inline struct rb_node *
rb_find_add(struct rb_node *node, struct rb_root *tree,
            int (*cmp)(struct rb_node *, const struct rb_node *))
{
        struct rb_node **link = &tree->rb_node;
        struct rb_node *parent = NULL;
        int c;

        while (*link) {
                parent = *link;
                c = cmp(node, parent);

                if (c < 0)
                        link = &parent->rb_left;
                else if (c > 0)
                        link = &parent->rb_right;
                else
                        return parent;
        }

        rb_link_node(node, parent, link);
        rb_insert_color(node, tree);
        return NULL;
}

/**
 * rb_find() - find @key in tree @tree
 * @key: key to match
 * @tree: tree to search
 * @cmp: operator defining the node order
 *
 * Returns the rb_node matching @key or NULL.
 */
static __always_inline struct rb_node *
rb_find(const void *key, const struct rb_root *tree,
        int (*cmp)(const void *key, const struct rb_node *))
{
        struct rb_node *node = tree->rb_node;

        while (node) {
                int c = cmp(key, node);

                if (c < 0)
                        node = node->rb_left;
                else if (c > 0)
                        node = node->rb_right;
                else
                        return node;
        }

        return NULL;
}

/**
 * rb_find_first() - find the first @key in @tree
 * @key: key to match
 * @tree: tree to search
 * @cmp: operator defining node order
 *
 * Returns the leftmost node matching @key, or NULL.
 */
static __always_inline struct rb_node *
rb_find_first(const void *key, const struct rb_root *tree,
              int (*cmp)(const void *key, const struct rb_node *))
{
        struct rb_node *node = tree->rb_node;
        struct rb_node *match = NULL;

        while (node) {
                int c = cmp(key, node);

                if (c <= 0) {
                        if (!c)
                                match = node;
                        node = node->rb_left;
                } else if (c > 0) {
                        node = node->rb_right;
                }
        }

        return match;
}

/**
 * rb_next_match() - find the next @key in @tree
 * @key: key to match
 * @tree: tree to search
 * @cmp: operator defining node order
 *
 * Returns the next node matching @key, or NULL.
 */
static __always_inline struct rb_node *
rb_next_match(const void *key, struct rb_node *node,
              int (*cmp)(const void *key, const struct rb_node *))
{
        node = rb_next(node);
        if (node && cmp(key, node))
                node = NULL;
        return node;
}

/**
 * rb_for_each() - iterates a subtree matching @key
 * @node: iterator
 * @key: key to match
 * @tree: tree to search
 * @cmp: operator defining node order
 */
#define rb_for_each(node, key, tree, cmp) \
        for ((node) = rb_find_first((key), (tree), (cmp)); \
             (node); (node) = rb_next_match((key), (node), (cmp)))

#endif        /* _LINUX_RBTREE_H */
















































































































































  231 







  232 















  232 










  230 








  229 






























































































































































































































































































































































































































































































































    9 
    9 





    9 




































    9 

    9 















    9 








    9 
















  231 





























  379 




  325 






    9 

    9 
    9 





















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  Copyright (C) 1994 Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *  General FPU state handling cleanups
 *        Gareth Hughes <gareth@valinux.com>, May 2000
 */
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
#include <asm/traps.h>
#include <asm/irq_regs.h>

#include <uapi/asm/kvm.h>

#include <linux/hardirq.h>
#include <linux/pkeys.h>
#include <linux/vmalloc.h>

#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"

#define CREATE_TRACE_POINTS
#include <asm/trace/fpu.h>

#ifdef CONFIG_X86_64
DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
DEFINE_PER_CPU(u64, xfd_state);
#endif

/* The FPU state configuration data for kernel and user space */
struct fpu_state_config        fpu_kernel_cfg __ro_after_init;
struct fpu_state_config fpu_user_cfg __ro_after_init;

/*
 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
 * depending on the FPU hardware format:
 */
struct fpstate init_fpstate __ro_after_init;

/* Track in-kernel FPU usage */
static DEFINE_PER_CPU(bool, in_kernel_fpu);

/*
 * Track which context is using the FPU on the CPU:
 */
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

/*
 * Can we use the FPU in kernel mode with the
 * whole "kernel_fpu_begin/end()" sequence?
 */
bool irq_fpu_usable(void)
{
        if (WARN_ON_ONCE(in_nmi()))
                return false;

        /* In kernel FPU usage already active? */
        if (this_cpu_read(in_kernel_fpu))
                return false;

        /*
         * When not in NMI or hard interrupt context, FPU can be used in:
         *
         * - Task context except from within fpregs_lock()'ed critical
         *   regions.
         *
         * - Soft interrupt processing context which cannot happen
         *   while in a fpregs_lock()'ed critical region.
         */
        if (!in_hardirq())
                return true;

        /*
         * In hard interrupt context it's safe when soft interrupts
         * are enabled, which means the interrupt did not hit in
         * a fpregs_lock()'ed critical region.
         */
        return !softirq_count();
}
EXPORT_SYMBOL(irq_fpu_usable);

/*
 * Track AVX512 state use because it is known to slow the max clock
 * speed of the core.
 */
static void update_avx_timestamp(struct fpu *fpu)
{

#define AVX512_TRACKING_MASK        (XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)

        if (fpu->fpstate->regs.xsave.header.xfeatures & AVX512_TRACKING_MASK)
                fpu->avx512_timestamp = jiffies;
}

/*
 * Save the FPU register state in fpu->fpstate->regs. The register state is
 * preserved.
 *
 * Must be called with fpregs_lock() held.
 *
 * The legacy FNSAVE instruction clears all FPU state unconditionally, so
 * register state has to be reloaded. That might be a pointless exercise
 * when the FPU is going to be used by another task right after that. But
 * this only affects 20+ years old 32bit systems and avoids conditionals all
 * over the place.
 *
 * FXSAVE and all XSAVE variants preserve the FPU register state.
 */
void save_fpregs_to_fpstate(struct fpu *fpu)
{
        if (likely(use_xsave())) {
                os_xsave(fpu->fpstate);
                update_avx_timestamp(fpu);
                return;
        }

        if (likely(use_fxsr())) {
                fxsave(&fpu->fpstate->regs.fxsave);
                return;
        }

        /*
         * Legacy FPU register saving, FNSAVE always clears FPU registers,
         * so we have to reload them from the memory state.
         */
        asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
        frstor(&fpu->fpstate->regs.fsave);
}

void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
{
        /*
         * AMD K7/K8 and later CPUs up to Zen don't save/restore
         * FDP/FIP/FOP unless an exception is pending. Clear the x87 state
         * here by setting it to fixed values.  "m" is a random variable
         * that should be in L1.
         */
        if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
                asm volatile(
                        "fnclex\n\t"
                        "emms\n\t"
                        "fildl %P[addr]"        /* set F?P to defined value */
                        : : [addr] "m" (fpstate));
        }

        if (use_xsave()) {
                /*
                 * Dynamically enabled features are enabled in XCR0, but
                 * usage requires also that the corresponding bits in XFD
                 * are cleared.  If the bits are set then using a related
                 * instruction will raise #NM. This allows to do the
                 * allocation of the larger FPU buffer lazy from #NM or if
                 * the task has no permission to kill it which would happen
                 * via #UD if the feature is disabled in XCR0.
                 *
                 * XFD state is following the same life time rules as
                 * XSTATE and to restore state correctly XFD has to be
                 * updated before XRSTORS otherwise the component would
                 * stay in or go into init state even if the bits are set
                 * in fpstate::regs::xsave::xfeatures.
                 */
                xfd_update_state(fpstate);

                /*
                 * Restoring state always needs to modify all features
                 * which are in @mask even if the current task cannot use
                 * extended features.
                 *
                 * So fpstate->xfeatures cannot be used here, because then
                 * a feature for which the task has no permission but was
                 * used by the previous task would not go into init state.
                 */
                mask = fpu_kernel_cfg.max_features & mask;

                os_xrstor(fpstate, mask);
        } else {
                if (use_fxsr())
                        fxrstor(&fpstate->regs.fxsave);
                else
                        frstor(&fpstate->regs.fsave);
        }
}

void fpu_reset_from_exception_fixup(void)
{
        restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
}

#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);

static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
{
        struct fpu_state_perm *fpuperm;
        u64 perm;

        if (!IS_ENABLED(CONFIG_X86_64))
                return;

        spin_lock_irq(&current->sighand->siglock);
        fpuperm = &current->group_leader->thread.fpu.guest_perm;
        perm = fpuperm->__state_perm;

        /* First fpstate allocation locks down permissions. */
        WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);

        spin_unlock_irq(&current->sighand->siglock);

        gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
}

bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
{
        struct fpstate *fpstate;
        unsigned int size;

        size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
        fpstate = vzalloc(size);
        if (!fpstate)
                return false;

        /* Leave xfd to 0 (the reset value defined by spec) */
        __fpstate_reset(fpstate, 0);
        fpstate_init_user(fpstate);
        fpstate->is_valloc        = true;
        fpstate->is_guest        = true;

        gfpu->fpstate                = fpstate;
        gfpu->xfeatures                = fpu_user_cfg.default_features;
        gfpu->perm                = fpu_user_cfg.default_features;

        /*
         * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
         * to userspace, even when XSAVE is unsupported, so that restoring FPU
         * state on a different CPU that does support XSAVE can cleanly load
         * the incoming state using its natural XSAVE.  In other words, KVM's
         * uABI size may be larger than this host's default size.  Conversely,
         * the default size should never be larger than KVM's base uABI size;
         * all features that can expand the uABI size must be opt-in.
         */
        gfpu->uabi_size                = sizeof(struct kvm_xsave);
        if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
                gfpu->uabi_size = fpu_user_cfg.default_size;

        fpu_init_guest_permissions(gfpu);

        return true;
}
EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);

void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
{
        struct fpstate *fps = gfpu->fpstate;

        if (!fps)
                return;

        if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
                return;

        gfpu->fpstate = NULL;
        vfree(fps);
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);

/*
  * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable
  * @guest_fpu:         Pointer to the guest FPU container
  * @xfeatures:         Features requested by guest CPUID
  *
  * Enable all dynamic xfeatures according to guest perm and requested CPUID.
  *
  * Return: 0 on success, error code otherwise
  */
int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
{
        lockdep_assert_preemption_enabled();

        /* Nothing to do if all requested features are already enabled. */
        xfeatures &= ~guest_fpu->xfeatures;
        if (!xfeatures)
                return 0;

        return __xfd_enable_feature(xfeatures, guest_fpu);
}
EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);

#ifdef CONFIG_X86_64
void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
{
        fpregs_lock();
        guest_fpu->fpstate->xfd = xfd;
        if (guest_fpu->fpstate->in_use)
                xfd_update_state(guest_fpu->fpstate);
        fpregs_unlock();
}
EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);

/**
 * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
 *
 * Must be invoked from KVM after a VMEXIT before enabling interrupts when
 * XFD write emulation is disabled. This is required because the guest can
 * freely modify XFD and the state at VMEXIT is not guaranteed to be the
 * same as the state on VMENTER. So software state has to be updated before
 * any operation which depends on it can take place.
 *
 * Note: It can be invoked unconditionally even when write emulation is
 * enabled for the price of a then pointless MSR read.
 */
void fpu_sync_guest_vmexit_xfd_state(void)
{
        struct fpstate *fps = current->thread.fpu.fpstate;

        lockdep_assert_irqs_disabled();
        if (fpu_state_size_dynamic()) {
                rdmsrl(MSR_IA32_XFD, fps->xfd);
                __this_cpu_write(xfd_state, fps->xfd);
        }
}
EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
#endif /* CONFIG_X86_64 */

int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
        struct fpstate *guest_fps = guest_fpu->fpstate;
        struct fpu *fpu = &current->thread.fpu;
        struct fpstate *cur_fps = fpu->fpstate;

        fpregs_lock();
        if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
                save_fpregs_to_fpstate(fpu);

        /* Swap fpstate */
        if (enter_guest) {
                fpu->__task_fpstate = cur_fps;
                fpu->fpstate = guest_fps;
                guest_fps->in_use = true;
        } else {
                guest_fps->in_use = false;
                fpu->fpstate = fpu->__task_fpstate;
                fpu->__task_fpstate = NULL;
        }

        cur_fps = fpu->fpstate;

        if (!cur_fps->is_confidential) {
                /* Includes XFD update */
                restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
        } else {
                /*
                 * XSTATE is restored by firmware from encrypted
                 * memory. Make sure XFD state is correct while
                 * running with guest fpstate
                 */
                xfd_update_state(cur_fps);
        }

        fpregs_mark_activate();
        fpregs_unlock();
        return 0;
}
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);

void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
                                    unsigned int size, u64 xfeatures, u32 pkru)
{
        struct fpstate *kstate = gfpu->fpstate;
        union fpregs_state *ustate = buf;
        struct membuf mb = { .p = buf, .left = size };

        if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
                __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
                                          XSTATE_COPY_XSAVE);
        } else {
                memcpy(&ustate->fxsave, &kstate->regs.fxsave,
                       sizeof(ustate->fxsave));
                /* Make it restorable on a XSAVE enabled host */
                ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
        }
}
EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);

int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
                                   u64 xcr0, u32 *vpkru)
{
        struct fpstate *kstate = gfpu->fpstate;
        const union fpregs_state *ustate = buf;

        if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
                if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
                        return -EINVAL;
                if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
                memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
                return 0;
        }

        if (ustate->xsave.header.xfeatures & ~xcr0)
                return -EINVAL;

        /*
         * Nullify @vpkru to preserve its current value if PKRU's bit isn't set
         * in the header.  KVM's odd ABI is to leave PKRU untouched in this
         * case (all other components are eventually re-initialized).
         */
        if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU))
                vpkru = NULL;

        return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */

void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
        preempt_disable();

        WARN_ON_FPU(!irq_fpu_usable());
        WARN_ON_FPU(this_cpu_read(in_kernel_fpu));

        this_cpu_write(in_kernel_fpu, true);

        if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
            !test_thread_flag(TIF_NEED_FPU_LOAD)) {
                set_thread_flag(TIF_NEED_FPU_LOAD);
                save_fpregs_to_fpstate(&current->thread.fpu);
        }
        __cpu_invalidate_fpregs_state();

        /* Put sane initial values into the control registers. */
        if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
                ldmxcsr(MXCSR_DEFAULT);

        if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
                asm volatile ("fninit");
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);

void kernel_fpu_end(void)
{
        WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));

        this_cpu_write(in_kernel_fpu, false);
        preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);

/*
 * Sync the FPU register state to current's memory register state when the
 * current task owns the FPU. The hardware register state is preserved.
 */
void fpu_sync_fpstate(struct fpu *fpu)
{
        WARN_ON_FPU(fpu != &current->thread.fpu);

        fpregs_lock();
        trace_x86_fpu_before_save(fpu);

        if (!test_thread_flag(TIF_NEED_FPU_LOAD))
                save_fpregs_to_fpstate(fpu);

        trace_x86_fpu_after_save(fpu);
        fpregs_unlock();
}

static inline unsigned int init_fpstate_copy_size(void)
{
        if (!use_xsave())
                return fpu_kernel_cfg.default_size;

        /* XSAVE(S) just needs the legacy and the xstate header part */
        return sizeof(init_fpstate.regs.xsave);
}

static inline void fpstate_init_fxstate(struct fpstate *fpstate)
{
        fpstate->regs.fxsave.cwd = 0x37f;
        fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
}

/*
 * Legacy x87 fpstate state init:
 */
static inline void fpstate_init_fstate(struct fpstate *fpstate)
{
        fpstate->regs.fsave.cwd = 0xffff037fu;
        fpstate->regs.fsave.swd = 0xffff0000u;
        fpstate->regs.fsave.twd = 0xffffffffu;
        fpstate->regs.fsave.fos = 0xffff0000u;
}

/*
 * Used in two places:
 * 1) Early boot to setup init_fpstate for non XSAVE systems
 * 2) fpu_init_fpstate_user() which is invoked from KVM
 */
void fpstate_init_user(struct fpstate *fpstate)
{
        if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
                fpstate_init_soft(&fpstate->regs.soft);
                return;
        }

        xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);

        if (cpu_feature_enabled(X86_FEATURE_FXSR))
                fpstate_init_fxstate(fpstate);
        else
                fpstate_init_fstate(fpstate);
}

static void __fpstate_reset(struct fpstate *fpstate, u64 xfd)
{
        /* Initialize sizes and feature masks */
        fpstate->size                = fpu_kernel_cfg.default_size;
        fpstate->user_size        = fpu_user_cfg.default_size;
        fpstate->xfeatures        = fpu_kernel_cfg.default_features;
        fpstate->user_xfeatures        = fpu_user_cfg.default_features;
        fpstate->xfd                = xfd;
}

void fpstate_reset(struct fpu *fpu)
{
        /* Set the fpstate pointer to the default fpstate */
        fpu->fpstate = &fpu->__fpstate;
        __fpstate_reset(fpu->fpstate, init_fpstate.xfd);

        /* Initialize the permission related info in fpu */
        fpu->perm.__state_perm                = fpu_kernel_cfg.default_features;
        fpu->perm.__state_size                = fpu_kernel_cfg.default_size;
        fpu->perm.__user_state_size        = fpu_user_cfg.default_size;
        /* Same defaults for guests */
        fpu->guest_perm = fpu->perm;
}

static inline void fpu_inherit_perms(struct fpu *dst_fpu)
{
        if (fpu_state_size_dynamic()) {
                struct fpu *src_fpu = &current->group_leader->thread.fpu;

                spin_lock_irq(&current->sighand->siglock);
                /* Fork also inherits the permissions of the parent */
                dst_fpu->perm = src_fpu->perm;
                dst_fpu->guest_perm = src_fpu->guest_perm;
                spin_unlock_irq(&current->sighand->siglock);
        }
}

/* A passed ssp of zero will not cause any update */
static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
{
#ifdef CONFIG_X86_USER_SHADOW_STACK
        struct cet_user_state *xstate;

        /* If ssp update is not needed. */
        if (!ssp)
                return 0;

        xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
                                XFEATURE_CET_USER);

        /*
         * If there is a non-zero ssp, then 'dst' must be configured with a shadow
         * stack and the fpu state should be up to date since it was just copied
         * from the parent in fpu_clone(). So there must be a valid non-init CET
         * state location in the buffer.
         */
        if (WARN_ON_ONCE(!xstate))
                return 1;

        xstate->user_ssp = (u64)ssp;
#endif
        return 0;
}

/* Clone current's FPU state on fork */
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
              unsigned long ssp)
{
        struct fpu *src_fpu = &current->thread.fpu;
        struct fpu *dst_fpu = &dst->thread.fpu;

        /* The new task's FPU state cannot be valid in the hardware. */
        dst_fpu->last_cpu = -1;

        fpstate_reset(dst_fpu);

        if (!cpu_feature_enabled(X86_FEATURE_FPU))
                return 0;

        /*
         * Enforce reload for user space tasks and prevent kernel threads
         * from trying to save the FPU registers on context switch.
         */
        set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);

        /*
         * No FPU state inheritance for kernel threads and IO
         * worker threads.
         */
        if (minimal) {
                /* Clear out the minimal state */
                memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
                       init_fpstate_copy_size());
                return 0;
        }

        /*
         * If a new feature is added, ensure all dynamic features are
         * caller-saved from here!
         */
        BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);

        /*
         * Save the default portion of the current FPU state into the
         * clone. Assume all dynamic features to be defined as caller-
         * saved, which enables skipping both the expansion of fpstate
         * and the copying of any dynamic state.
         *
         * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
         * copying is not valid when current uses non-default states.
         */
        fpregs_lock();
        if (test_thread_flag(TIF_NEED_FPU_LOAD))
                fpregs_restore_userregs();
        save_fpregs_to_fpstate(dst_fpu);
        fpregs_unlock();
        if (!(clone_flags & CLONE_THREAD))
                fpu_inherit_perms(dst_fpu);

        /*
         * Children never inherit PASID state.
         * Force it to have its init value:
         */
        if (use_xsave())
                dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;

        /*
         * Update shadow stack pointer, in case it changed during clone.
         */
        if (update_fpu_shstk(dst, ssp))
                return 1;

        trace_x86_fpu_copy_src(src_fpu);
        trace_x86_fpu_copy_dst(dst_fpu);

        return 0;
}

/*
 * Whitelist the FPU register state embedded into task_struct for hardened
 * usercopy.
 */
void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
{
        *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
        *size = fpu_kernel_cfg.default_size;
}

/*
 * Drops current FPU state: deactivates the fpregs and
 * the fpstate. NOTE: it still leaves previous contents
 * in the fpregs in the eager-FPU case.
 *
 * This function can be used in cases where we know that
 * a state-restore is coming: either an explicit one,
 * or a reschedule.
 */
void fpu__drop(struct fpu *fpu)
{
        preempt_disable();

        if (fpu == &current->thread.fpu) {
                /* Ignore delayed exceptions from user space */
                asm volatile("1: fwait\n"
                             "2:\n"
                             _ASM_EXTABLE(1b, 2b));
                fpregs_deactivate(fpu);
        }

        trace_x86_fpu_dropped(fpu);

        preempt_enable();
}

/*
 * Clear FPU registers by setting them up from the init fpstate.
 * Caller must do fpregs_[un]lock() around it.
 */
static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
{
        if (use_xsave())
                os_xrstor(&init_fpstate, features_mask);
        else if (use_fxsr())
                fxrstor(&init_fpstate.regs.fxsave);
        else
                frstor(&init_fpstate.regs.fsave);

        pkru_write_default();
}

/*
 * Reset current->fpu memory state to the init values.
 */
static void fpu_reset_fpregs(void)
{
        struct fpu *fpu = &current->thread.fpu;

        fpregs_lock();
        __fpu_invalidate_fpregs_state(fpu);
        /*
         * This does not change the actual hardware registers. It just
         * resets the memory image and sets TIF_NEED_FPU_LOAD so a
         * subsequent return to usermode will reload the registers from the
         * task's memory image.
         *
         * Do not use fpstate_init() here. Just copy init_fpstate which has
         * the correct content already except for PKRU.
         *
         * PKRU handling does not rely on the xstate when restoring for
         * user space as PKRU is eagerly written in switch_to() and
         * flush_thread().
         */
        memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
        set_thread_flag(TIF_NEED_FPU_LOAD);
        fpregs_unlock();
}

/*
 * Reset current's user FPU states to the init states.  current's
 * supervisor states, if any, are not modified by this function.  The
 * caller guarantees that the XSTATE header in memory is intact.
 */
void fpu__clear_user_states(struct fpu *fpu)
{
        WARN_ON_FPU(fpu != &current->thread.fpu);

        fpregs_lock();
        if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
                fpu_reset_fpregs();
                fpregs_unlock();
                return;
        }

        /*
         * Ensure that current's supervisor states are loaded into their
         * corresponding registers.
         */
        if (xfeatures_mask_supervisor() &&
            !fpregs_state_valid(fpu, smp_processor_id()))
                os_xrstor_supervisor(fpu->fpstate);

        /* Reset user states in registers. */
        restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);

        /*
         * Now all FPU registers have their desired values.  Inform the FPU
         * state machine that current's FPU registers are in the hardware
         * registers. The memory image does not need to be updated because
         * any operation relying on it has to save the registers first when
         * current's FPU is marked active.
         */
        fpregs_mark_activate();
        fpregs_unlock();
}

void fpu_flush_thread(void)
{
        fpstate_reset(&current->thread.fpu);
        fpu_reset_fpregs();
}
/*
 * Load FPU context before returning to userspace.
 */
void switch_fpu_return(void)
{
        if (!static_cpu_has(X86_FEATURE_FPU))
                return;

        fpregs_restore_userregs();
}
EXPORT_SYMBOL_GPL(switch_fpu_return);

void fpregs_lock_and_load(void)
{
        /*
         * fpregs_lock() only disables preemption (mostly). So modifying state
         * in an interrupt could screw up some in progress fpregs operation.
         * Warn about it.
         */
        WARN_ON_ONCE(!irq_fpu_usable());
        WARN_ON_ONCE(current->flags & PF_KTHREAD);

        fpregs_lock();

        fpregs_assert_state_consistent();

        if (test_thread_flag(TIF_NEED_FPU_LOAD))
                fpregs_restore_userregs();
}

#ifdef CONFIG_X86_DEBUG_FPU
/*
 * If current FPU state according to its tracking (loaded FPU context on this
 * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
 * loaded on return to userland.
 */
void fpregs_assert_state_consistent(void)
{
        struct fpu *fpu = &current->thread.fpu;

        if (test_thread_flag(TIF_NEED_FPU_LOAD))
                return;

        WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
}
EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
#endif

void fpregs_mark_activate(void)
{
        struct fpu *fpu = &current->thread.fpu;

        fpregs_activate(fpu);
        fpu->last_cpu = smp_processor_id();
        clear_thread_flag(TIF_NEED_FPU_LOAD);
}

/*
 * x87 math exception handling:
 */

int fpu__exception_code(struct fpu *fpu, int trap_nr)
{
        int err;

        if (trap_nr == X86_TRAP_MF) {
                unsigned short cwd, swd;
                /*
                 * (~cwd & swd) will mask out exceptions that are not set to unmasked
                 * status.  0x3f is the exception bits in these regs, 0x200 is the
                 * C1 reg you need in case of a stack fault, 0x040 is the stack
                 * fault bit.  We should only be taking one exception at a time,
                 * so if this combination doesn't produce any single exception,
                 * then we have a bad program that isn't synchronizing its FPU usage
                 * and it will suffer the consequences since we won't be able to
                 * fully reproduce the context of the exception.
                 */
                if (boot_cpu_has(X86_FEATURE_FXSR)) {
                        cwd = fpu->fpstate->regs.fxsave.cwd;
                        swd = fpu->fpstate->regs.fxsave.swd;
                } else {
                        cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
                        swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
                }

                err = swd & ~cwd;
        } else {
                /*
                 * The SIMD FPU exceptions are handled a little differently, as there
                 * is only a single status/control register.  Thus, to determine which
                 * unmasked exception was caught we must mask the exception mask bits
                 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
                 */
                unsigned short mxcsr = MXCSR_DEFAULT;

                if (boot_cpu_has(X86_FEATURE_XMM))
                        mxcsr = fpu->fpstate->regs.fxsave.mxcsr;

                err = ~(mxcsr >> 7) & mxcsr;
        }

        if (err & 0x001) {        /* Invalid op */
                /*
                 * swd & 0x240 == 0x040: Stack Underflow
                 * swd & 0x240 == 0x240: Stack Overflow
                 * User must clear the SF bit (0x40) if set
                 */
                return FPE_FLTINV;
        } else if (err & 0x004) { /* Divide by Zero */
                return FPE_FLTDIV;
        } else if (err & 0x008) { /* Overflow */
                return FPE_FLTOVF;
        } else if (err & 0x012) { /* Denormal, Underflow */
                return FPE_FLTUND;
        } else if (err & 0x020) { /* Precision */
                return FPE_FLTRES;
        }

        /*
         * If we're using IRQ 13, or supposedly even some trap
         * X86_TRAP_MF implementations, it's possible
         * we get a spurious trap, which is not an error.
         */
        return 0;
}

/*
 * Initialize register state that may prevent from entering low-power idle.
 * This function will be invoked from the cpuidle driver only when needed.
 */
noinstr void fpu_idle_fpregs(void)
{
        /* Note: AMX_TILE being enabled implies XGETBV1 support */
        if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
            (xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
                tile_release();
                __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
        }
}













































































































































































































    2 









    2 
































































   14 






    2 




























    2 








    2 







    2 






   14 











































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2007, 2008, 2009 Siemens AG
 */

#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>

#include <net/cfg802154.h>
#include <net/rtnetlink.h>

#include "ieee802154.h"
#include "nl802154.h"
#include "sysfs.h"
#include "core.h"

/* name for sysfs, %d is appended */
#define PHY_NAME "phy"

/* RCU-protected (and RTNL for writers) */
LIST_HEAD(cfg802154_rdev_list);
int cfg802154_rdev_list_generation;

struct wpan_phy *wpan_phy_find(const char *str)
{
        struct device *dev;

        if (WARN_ON(!str))
                return NULL;

        dev = class_find_device_by_name(&wpan_phy_class, str);
        if (!dev)
                return NULL;

        return container_of(dev, struct wpan_phy, dev);
}
EXPORT_SYMBOL(wpan_phy_find);

struct wpan_phy_iter_data {
        int (*fn)(struct wpan_phy *phy, void *data);
        void *data;
};

static int wpan_phy_iter(struct device *dev, void *_data)
{
        struct wpan_phy_iter_data *wpid = _data;
        struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev);

        return wpid->fn(phy, wpid->data);
}

int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data),
                      void *data)
{
        struct wpan_phy_iter_data wpid = {
                .fn = fn,
                .data = data,
        };

        return class_for_each_device(&wpan_phy_class, NULL,
                        &wpid, wpan_phy_iter);
}
EXPORT_SYMBOL(wpan_phy_for_each);

struct cfg802154_registered_device *
cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx)
{
        struct cfg802154_registered_device *result = NULL, *rdev;

        ASSERT_RTNL();

        list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
                if (rdev->wpan_phy_idx == wpan_phy_idx) {
                        result = rdev;
                        break;
                }
        }

        return result;
}

struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx)
{
        struct cfg802154_registered_device *rdev;

        ASSERT_RTNL();

        rdev = cfg802154_rdev_by_wpan_phy_idx(wpan_phy_idx);
        if (!rdev)
                return NULL;
        return &rdev->wpan_phy;
}

struct wpan_phy *
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
{
        static atomic_t wpan_phy_counter = ATOMIC_INIT(0);
        struct cfg802154_registered_device *rdev;
        size_t alloc_size;

        alloc_size = sizeof(*rdev) + priv_size;
        rdev = kzalloc(alloc_size, GFP_KERNEL);
        if (!rdev)
                return NULL;

        rdev->ops = ops;

        rdev->wpan_phy_idx = atomic_inc_return(&wpan_phy_counter);

        if (unlikely(rdev->wpan_phy_idx < 0)) {
                /* ugh, wrapped! */
                atomic_dec(&wpan_phy_counter);
                kfree(rdev);
                return NULL;
        }

        /* atomic_inc_return makes it start at 1, make it start at 0 */
        rdev->wpan_phy_idx--;

        INIT_LIST_HEAD(&rdev->wpan_dev_list);
        device_initialize(&rdev->wpan_phy.dev);
        dev_set_name(&rdev->wpan_phy.dev, PHY_NAME "%d", rdev->wpan_phy_idx);

        rdev->wpan_phy.dev.class = &wpan_phy_class;
        rdev->wpan_phy.dev.platform_data = rdev;

        wpan_phy_net_set(&rdev->wpan_phy, &init_net);

        init_waitqueue_head(&rdev->dev_wait);
        init_waitqueue_head(&rdev->wpan_phy.sync_txq);

        spin_lock_init(&rdev->wpan_phy.queue_lock);

        return &rdev->wpan_phy;
}
EXPORT_SYMBOL(wpan_phy_new);

int wpan_phy_register(struct wpan_phy *phy)
{
        struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(phy);
        int ret;

        rtnl_lock();
        ret = device_add(&phy->dev);
        if (ret) {
                rtnl_unlock();
                return ret;
        }

        list_add_rcu(&rdev->list, &cfg802154_rdev_list);
        cfg802154_rdev_list_generation++;

        /* TODO phy registered lock */
        rtnl_unlock();

        /* TODO nl802154 phy notify */

        return 0;
}
EXPORT_SYMBOL(wpan_phy_register);

void wpan_phy_unregister(struct wpan_phy *phy)
{
        struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(phy);

        wait_event(rdev->dev_wait, ({
                int __count;
                rtnl_lock();
                __count = rdev->opencount;
                rtnl_unlock();
                __count == 0; }));

        rtnl_lock();
        /* TODO nl802154 phy notify */
        /* TODO phy registered lock */

        WARN_ON(!list_empty(&rdev->wpan_dev_list));

        /* First remove the hardware from everywhere, this makes
         * it impossible to find from userspace.
         */
        list_del_rcu(&rdev->list);
        synchronize_rcu();

        cfg802154_rdev_list_generation++;

        device_del(&phy->dev);

        rtnl_unlock();
}
EXPORT_SYMBOL(wpan_phy_unregister);

void wpan_phy_free(struct wpan_phy *phy)
{
        put_device(&phy->dev);
}
EXPORT_SYMBOL(wpan_phy_free);

static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev)
{
        struct ieee802154_pan_device *child, *tmp;

        mutex_lock(&wpan_dev->association_lock);

        kfree(wpan_dev->parent);
        wpan_dev->parent = NULL;

        list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) {
                list_del(&child->node);
                kfree(child);
        }

        wpan_dev->nchildren = 0;

        mutex_unlock(&wpan_dev->association_lock);
}

int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
                           struct net *net)
{
        struct wpan_dev *wpan_dev;
        int err = 0;

        list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
                if (!wpan_dev->netdev)
                        continue;
                wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
                err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
                if (err)
                        break;
                wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
        }

        if (err) {
                /* failed -- clean up to old netns */
                net = wpan_phy_net(&rdev->wpan_phy);

                list_for_each_entry_continue_reverse(wpan_dev,
                                                     &rdev->wpan_dev_list,
                                                     list) {
                        if (!wpan_dev->netdev)
                                continue;
                        wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
                        err = dev_change_net_namespace(wpan_dev->netdev, net,
                                                       "wpan%d");
                        WARN_ON(err);
                        wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
                }

                return err;
        }

        wpan_phy_net_set(&rdev->wpan_phy, net);

        err = device_rename(&rdev->wpan_phy.dev, dev_name(&rdev->wpan_phy.dev));
        WARN_ON(err);

        return 0;
}

void cfg802154_dev_free(struct cfg802154_registered_device *rdev)
{
        kfree(rdev);
}

static void
cfg802154_update_iface_num(struct cfg802154_registered_device *rdev,
                           int iftype, int num)
{
        ASSERT_RTNL();

        rdev->num_running_ifaces += num;
}

static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
                                          unsigned long state, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
        struct cfg802154_registered_device *rdev;

        if (!wpan_dev)
                return NOTIFY_DONE;

        rdev = wpan_phy_to_rdev(wpan_dev->wpan_phy);

        /* TODO WARN_ON unspec type */

        switch (state) {
                /* TODO NETDEV_DEVTYPE */
        case NETDEV_REGISTER:
                dev->features |= NETIF_F_NETNS_LOCAL;
                wpan_dev->identifier = ++rdev->wpan_dev_id;
                list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
                rdev->devlist_generation++;
                mutex_init(&wpan_dev->association_lock);
                INIT_LIST_HEAD(&wpan_dev->children);
                wpan_dev->max_associations = SZ_16K;

                wpan_dev->netdev = dev;
                break;
        case NETDEV_DOWN:
                cfg802154_update_iface_num(rdev, wpan_dev->iftype, -1);

                rdev->opencount--;
                wake_up(&rdev->dev_wait);
                break;
        case NETDEV_UP:
                cfg802154_update_iface_num(rdev, wpan_dev->iftype, 1);

                rdev->opencount++;
                break;
        case NETDEV_UNREGISTER:
                cfg802154_free_peer_structures(wpan_dev);

                /* It is possible to get NETDEV_UNREGISTER
                 * multiple times. To detect that, check
                 * that the interface is still on the list
                 * of registered interfaces, and only then
                 * remove and clean it up.
                 */
                if (!list_empty(&wpan_dev->list)) {
                        list_del_rcu(&wpan_dev->list);
                        rdev->devlist_generation++;
                }
                /* synchronize (so that we won't find this netdev
                 * from other code any more) and then clear the list
                 * head so that the above code can safely check for
                 * !list_empty() to avoid double-cleanup.
                 */
                synchronize_rcu();
                INIT_LIST_HEAD(&wpan_dev->list);
                break;
        default:
                return NOTIFY_DONE;
        }

        return NOTIFY_OK;
}

static struct notifier_block cfg802154_netdev_notifier = {
        .notifier_call = cfg802154_netdev_notifier_call,
};

static void __net_exit cfg802154_pernet_exit(struct net *net)
{
        struct cfg802154_registered_device *rdev;

        rtnl_lock();
        list_for_each_entry(rdev, &cfg802154_rdev_list, list) {
                if (net_eq(wpan_phy_net(&rdev->wpan_phy), net))
                        WARN_ON(cfg802154_switch_netns(rdev, &init_net));
        }
        rtnl_unlock();
}

static struct pernet_operations cfg802154_pernet_ops = {
        .exit = cfg802154_pernet_exit,
};

static int __init wpan_phy_class_init(void)
{
        int rc;

        rc = register_pernet_device(&cfg802154_pernet_ops);
        if (rc)
                goto err;

        rc = wpan_phy_sysfs_init();
        if (rc)
                goto err_sysfs;

        rc = register_netdevice_notifier(&cfg802154_netdev_notifier);
        if (rc)
                goto err_nl;

        rc = ieee802154_nl_init();
        if (rc)
                goto err_notifier;

        rc = nl802154_init();
        if (rc)
                goto err_ieee802154_nl;

        return 0;

err_ieee802154_nl:
        ieee802154_nl_exit();

err_notifier:
        unregister_netdevice_notifier(&cfg802154_netdev_notifier);
err_nl:
        wpan_phy_sysfs_exit();
err_sysfs:
        unregister_pernet_device(&cfg802154_pernet_ops);
err:
        return rc;
}
subsys_initcall(wpan_phy_class_init);

static void __exit wpan_phy_class_exit(void)
{
        nl802154_exit();
        ieee802154_nl_exit();
        unregister_netdevice_notifier(&cfg802154_netdev_notifier);
        wpan_phy_sysfs_exit();
        unregister_pernet_device(&cfg802154_pernet_ops);
}
module_exit(wpan_phy_class_exit);

MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("IEEE 802.15.4 configuration interface");
MODULE_AUTHOR("Dmitry Eremin-Solenikov");






















































































   34 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_COMPLETION_H
#define __LINUX_COMPLETION_H

/*
 * (C) Copyright 2001 Linus Torvalds
 *
 * Atomic wait-for-completion handler data structures.
 * See kernel/sched/completion.c for details.
 */

#include <linux/swait.h>

/*
 * struct completion - structure used to maintain state for a "completion"
 *
 * This is the opaque structure used to maintain the state for a "completion".
 * Completions currently use a FIFO to queue threads that have to wait for
 * the "completion" event.
 *
 * See also:  complete(), wait_for_completion() (and friends _timeout,
 * _interruptible, _interruptible_timeout, and _killable), init_completion(),
 * reinit_completion(), and macros DECLARE_COMPLETION(),
 * DECLARE_COMPLETION_ONSTACK().
 */
struct completion {
        unsigned int done;
        struct swait_queue_head wait;
};

#define init_completion_map(x, m) init_completion(x)
static inline void complete_acquire(struct completion *x) {}
static inline void complete_release(struct completion *x) {}

#define COMPLETION_INITIALIZER(work) \
        { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }

#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
        (*({ init_completion_map(&(work), &(map)); &(work); }))

#define COMPLETION_INITIALIZER_ONSTACK(work) \
        (*({ init_completion(&work); &work; }))

/**
 * DECLARE_COMPLETION - declare and initialize a completion structure
 * @work:  identifier for the completion structure
 *
 * This macro declares and initializes a completion structure. Generally used
 * for static declarations. You should use the _ONSTACK variant for automatic
 * variables.
 */
#define DECLARE_COMPLETION(work) \
        struct completion work = COMPLETION_INITIALIZER(work)

/*
 * Lockdep needs to run a non-constant initializer for on-stack
 * completions - so we use the _ONSTACK() variant for those that
 * are on the kernel stack:
 */
/**
 * DECLARE_COMPLETION_ONSTACK - declare and initialize a completion structure
 * @work:  identifier for the completion structure
 *
 * This macro declares and initializes a completion structure on the kernel
 * stack.
 */
#ifdef CONFIG_LOCKDEP
# define DECLARE_COMPLETION_ONSTACK(work) \
        struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
# define DECLARE_COMPLETION_ONSTACK_MAP(work, map) \
        struct completion work = COMPLETION_INITIALIZER_ONSTACK_MAP(work, map)
#else
# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
# define DECLARE_COMPLETION_ONSTACK_MAP(work, map) DECLARE_COMPLETION(work)
#endif

/**
 * init_completion - Initialize a dynamically allocated completion
 * @x:  pointer to completion structure that is to be initialized
 *
 * This inline function will initialize a dynamically created completion
 * structure.
 */
static inline void init_completion(struct completion *x)
{
        x->done = 0;
        init_swait_queue_head(&x->wait);
}

/**
 * reinit_completion - reinitialize a completion structure
 * @x:  pointer to completion structure that is to be reinitialized
 *
 * This inline function should be used to reinitialize a completion structure so it can
 * be reused. This is especially important after complete_all() is used.
 */
static inline void reinit_completion(struct completion *x)
{
        x->done = 0;
}

extern void wait_for_completion(struct completion *);
extern void wait_for_completion_io(struct completion *);
extern int wait_for_completion_interruptible(struct completion *x);
extern int wait_for_completion_killable(struct completion *x);
extern int wait_for_completion_state(struct completion *x, unsigned int state);
extern unsigned long wait_for_completion_timeout(struct completion *x,
                                                   unsigned long timeout);
extern unsigned long wait_for_completion_io_timeout(struct completion *x,
                                                    unsigned long timeout);
extern long wait_for_completion_interruptible_timeout(
        struct completion *x, unsigned long timeout);
extern long wait_for_completion_killable_timeout(
        struct completion *x, unsigned long timeout);
extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);

extern void complete(struct completion *);
extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);

#endif








































































  234 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_KCOV_H
#define _LINUX_KCOV_H

#include <linux/sched.h>
#include <uapi/linux/kcov.h>

struct task_struct;

#ifdef CONFIG_KCOV

enum kcov_mode {
        /* Coverage collection is not enabled yet. */
        KCOV_MODE_DISABLED = 0,
        /* KCOV was initialized, but tracing mode hasn't been chosen yet. */
        KCOV_MODE_INIT = 1,
        /*
         * Tracing coverage collection mode.
         * Covered PCs are collected in a per-task buffer.
         */
        KCOV_MODE_TRACE_PC = 2,
        /* Collecting comparison operands mode. */
        KCOV_MODE_TRACE_CMP = 3,
};

#define KCOV_IN_CTXSW        (1 << 30)

void kcov_task_init(struct task_struct *t);
void kcov_task_exit(struct task_struct *t);

#define kcov_prepare_switch(t)                        \
do {                                                \
        (t)->kcov_mode |= KCOV_IN_CTXSW;        \
} while (0)

#define kcov_finish_switch(t)                        \
do {                                                \
        (t)->kcov_mode &= ~KCOV_IN_CTXSW;        \
} while (0)

/* See Documentation/dev-tools/kcov.rst for usage details. */
void kcov_remote_start(u64 handle);
void kcov_remote_stop(void);
u64 kcov_common_handle(void);

static inline void kcov_remote_start_common(u64 id)
{
        kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id));
}

static inline void kcov_remote_start_usb(u64 id)
{
        kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id));
}

/*
 * The softirq flavor of kcov_remote_*() functions is introduced as a temporary
 * work around for kcov's lack of nested remote coverage sections support in
 * task context. Adding support for nested sections is tracked in:
 * https://bugzilla.kernel.org/show_bug.cgi?id=210337
 */

static inline void kcov_remote_start_usb_softirq(u64 id)
{
        if (in_serving_softirq())
                kcov_remote_start_usb(id);
}

static inline void kcov_remote_stop_softirq(void)
{
        if (in_serving_softirq())
                kcov_remote_stop();
}

#ifdef CONFIG_64BIT
typedef unsigned long kcov_u64;
#else
typedef unsigned long long kcov_u64;
#endif

void __sanitizer_cov_trace_pc(void);
void __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2);
void __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2);
void __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2);
void __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2);
void __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2);
void __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2);
void __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2);
void __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2);
void __sanitizer_cov_trace_switch(kcov_u64 val, void *cases);

#else

static inline void kcov_task_init(struct task_struct *t) {}
static inline void kcov_task_exit(struct task_struct *t) {}
static inline void kcov_prepare_switch(struct task_struct *t) {}
static inline void kcov_finish_switch(struct task_struct *t) {}
static inline void kcov_remote_start(u64 handle) {}
static inline void kcov_remote_stop(void) {}
static inline u64 kcov_common_handle(void)
{
        return 0;
}
static inline void kcov_remote_start_common(u64 id) {}
static inline void kcov_remote_start_usb(u64 id) {}
static inline void kcov_remote_start_usb_softirq(u64 id) {}
static inline void kcov_remote_stop_softirq(void) {}

#endif /* CONFIG_KCOV */
#endif /* _LINUX_KCOV_H */













































































































   14 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_ATOMIC64_64_H
#define _ASM_X86_ATOMIC64_64_H

#include <linux/types.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>

/* The 64-bit atomic type */

#define ATOMIC64_INIT(i)        { (i) }

static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
        return __READ_ONCE((v)->counter);
}

static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
        __WRITE_ONCE(v->counter, i);
}

static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "addq %1,%0"
                     : "=m" (v->counter)
                     : "er" (i), "m" (v->counter) : "memory");
}

static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "subq %1,%0"
                     : "=m" (v->counter)
                     : "er" (i), "m" (v->counter) : "memory");
}

static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test

static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "incq %0"
                     : "=m" (v->counter)
                     : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc

static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "decq %0"
                     : "=m" (v->counter)
                     : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec

static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
        return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test

static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
        return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test

static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
#define arch_atomic64_add_negative arch_atomic64_add_negative

static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
        return i + xadd(&v->counter, i);
}
#define arch_atomic64_add_return arch_atomic64_add_return

static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
        return arch_atomic64_add_return(-i, v);
}
#define arch_atomic64_sub_return arch_atomic64_sub_return

static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
        return xadd(&v->counter, i);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add

static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
        return xadd(&v->counter, -i);
}
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub

static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
        return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg

static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
        return arch_try_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg

static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
        return arch_xchg(&v->counter, new);
}
#define arch_atomic64_xchg arch_atomic64_xchg

static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "andq %1,%0"
                        : "+m" (v->counter)
                        : "er" (i)
                        : "memory");
}

static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
        s64 val = arch_atomic64_read(v);

        do {
        } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
        return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and

static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "orq %1,%0"
                        : "+m" (v->counter)
                        : "er" (i)
                        : "memory");
}

static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
        s64 val = arch_atomic64_read(v);

        do {
        } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
        return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or

static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "xorq %1,%0"
                        : "+m" (v->counter)
                        : "er" (i)
                        : "memory");
}

static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
        s64 val = arch_atomic64_read(v);

        do {
        } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
        return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor

#endif /* _ASM_X86_ATOMIC64_64_H */




































































































































































































































































































































































































































































































































































    4 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
// SPDX-License-Identifier: GPL-2.0-only
/*
 * scsi_sysfs.c
 *
 * SCSI sysfs interface routines.
 *
 * Created to pull SCSI mid layer sysfs routines into one file.
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/device.h>
#include <linux/pm_runtime.h>
#include <linux/bsg.h>

#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi_dh.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_devinfo.h>

#include "scsi_priv.h"
#include "scsi_logging.h"

static const struct device_type scsi_dev_type;

static const struct {
        enum scsi_device_state        value;
        char                        *name;
} sdev_states[] = {
        { SDEV_CREATED, "created" },
        { SDEV_RUNNING, "running" },
        { SDEV_CANCEL, "cancel" },
        { SDEV_DEL, "deleted" },
        { SDEV_QUIESCE, "quiesce" },
        { SDEV_OFFLINE,        "offline" },
        { SDEV_TRANSPORT_OFFLINE, "transport-offline" },
        { SDEV_BLOCK,        "blocked" },
        { SDEV_CREATED_BLOCK, "created-blocked" },
};

const char *scsi_device_state_name(enum scsi_device_state state)
{
        int i;
        char *name = NULL;

        for (i = 0; i < ARRAY_SIZE(sdev_states); i++) {
                if (sdev_states[i].value == state) {
                        name = sdev_states[i].name;
                        break;
                }
        }
        return name;
}

static const struct {
        enum scsi_host_state        value;
        char                        *name;
} shost_states[] = {
        { SHOST_CREATED, "created" },
        { SHOST_RUNNING, "running" },
        { SHOST_CANCEL, "cancel" },
        { SHOST_DEL, "deleted" },
        { SHOST_RECOVERY, "recovery" },
        { SHOST_CANCEL_RECOVERY, "cancel/recovery" },
        { SHOST_DEL_RECOVERY, "deleted/recovery", },
};
const char *scsi_host_state_name(enum scsi_host_state state)
{
        int i;
        char *name = NULL;

        for (i = 0; i < ARRAY_SIZE(shost_states); i++) {
                if (shost_states[i].value == state) {
                        name = shost_states[i].name;
                        break;
                }
        }
        return name;
}

#ifdef CONFIG_SCSI_DH
static const struct {
        unsigned char        value;
        char                *name;
} sdev_access_states[] = {
        { SCSI_ACCESS_STATE_OPTIMAL, "active/optimized" },
        { SCSI_ACCESS_STATE_ACTIVE, "active/non-optimized" },
        { SCSI_ACCESS_STATE_STANDBY, "standby" },
        { SCSI_ACCESS_STATE_UNAVAILABLE, "unavailable" },
        { SCSI_ACCESS_STATE_LBA, "lba-dependent" },
        { SCSI_ACCESS_STATE_OFFLINE, "offline" },
        { SCSI_ACCESS_STATE_TRANSITIONING, "transitioning" },
};

static const char *scsi_access_state_name(unsigned char state)
{
        int i;
        char *name = NULL;

        for (i = 0; i < ARRAY_SIZE(sdev_access_states); i++) {
                if (sdev_access_states[i].value == state) {
                        name = sdev_access_states[i].name;
                        break;
                }
        }
        return name;
}
#endif

static int check_set(unsigned long long *val, char *src)
{
        char *last;

        if (strcmp(src, "-") == 0) {
                *val = SCAN_WILD_CARD;
        } else {
                /*
                 * Doesn't check for int overflow
                 */
                *val = simple_strtoull(src, &last, 0);
                if (*last != '\0')
                        return 1;
        }
        return 0;
}

static int scsi_scan(struct Scsi_Host *shost, const char *str)
{
        char s1[15], s2[15], s3[17], junk;
        unsigned long long channel, id, lun;
        int res;

        res = sscanf(str, "%10s %10s %16s %c", s1, s2, s3, &junk);
        if (res != 3)
                return -EINVAL;
        if (check_set(&channel, s1))
                return -EINVAL;
        if (check_set(&id, s2))
                return -EINVAL;
        if (check_set(&lun, s3))
                return -EINVAL;
        if (shost->transportt->user_scan)
                res = shost->transportt->user_scan(shost, channel, id, lun);
        else
                res = scsi_scan_host_selected(shost, channel, id, lun,
                                              SCSI_SCAN_MANUAL);
        return res;
}

/*
 * shost_show_function: macro to create an attr function that can be used to
 * show a non-bit field.
 */
#define shost_show_function(name, field, format_string)                        \
static ssize_t                                                                \
show_##name (struct device *dev, struct device_attribute *attr,         \
             char *buf)                                                        \
{                                                                        \
        struct Scsi_Host *shost = class_to_shost(dev);                        \
        return snprintf (buf, 20, format_string, shost->field);                \
}

/*
 * shost_rd_attr: macro to create a function and attribute variable for a
 * read only field.
 */
#define shost_rd_attr2(name, field, format_string)                        \
        shost_show_function(name, field, format_string)                        \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL);

#define shost_rd_attr(field, format_string) \
shost_rd_attr2(field, field, format_string)

/*
 * Create the actual show/store functions and data structures.
 */

static ssize_t
store_scan(struct device *dev, struct device_attribute *attr,
           const char *buf, size_t count)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        int res;

        res = scsi_scan(shost, buf);
        if (res == 0)
                res = count;
        return res;
};
static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);

static ssize_t
store_shost_state(struct device *dev, struct device_attribute *attr,
                  const char *buf, size_t count)
{
        int i;
        struct Scsi_Host *shost = class_to_shost(dev);
        enum scsi_host_state state = 0;

        for (i = 0; i < ARRAY_SIZE(shost_states); i++) {
                const int len = strlen(shost_states[i].name);
                if (strncmp(shost_states[i].name, buf, len) == 0 &&
                   buf[len] == '\n') {
                        state = shost_states[i].value;
                        break;
                }
        }
        if (!state)
                return -EINVAL;

        if (scsi_host_set_state(shost, state))
                return -EINVAL;
        return count;
}

static ssize_t
show_shost_state(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        const char *name = scsi_host_state_name(shost->shost_state);

        if (!name)
                return -EINVAL;

        return snprintf(buf, 20, "%s\n", name);
}

/* DEVICE_ATTR(state) clashes with dev_attr_state for sdev */
static struct device_attribute dev_attr_hstate =
        __ATTR(state, S_IRUGO | S_IWUSR, show_shost_state, store_shost_state);

static ssize_t
show_shost_mode(unsigned int mode, char *buf)
{
        ssize_t len = 0;

        if (mode & MODE_INITIATOR)
                len = sprintf(buf, "%s", "Initiator");

        if (mode & MODE_TARGET)
                len += sprintf(buf + len, "%s%s", len ? ", " : "", "Target");

        len += sprintf(buf + len, "\n");

        return len;
}

static ssize_t
show_shost_supported_mode(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        unsigned int supported_mode = shost->hostt->supported_mode;

        if (supported_mode == MODE_UNKNOWN)
                /* by default this should be initiator */
                supported_mode = MODE_INITIATOR;

        return show_shost_mode(supported_mode, buf);
}

static DEVICE_ATTR(supported_mode, S_IRUGO | S_IWUSR, show_shost_supported_mode, NULL);

static ssize_t
show_shost_active_mode(struct device *dev,
                       struct device_attribute *attr, char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);

        if (shost->active_mode == MODE_UNKNOWN)
                return snprintf(buf, 20, "unknown\n");
        else
                return show_shost_mode(shost->active_mode, buf);
}

static DEVICE_ATTR(active_mode, S_IRUGO | S_IWUSR, show_shost_active_mode, NULL);

static int check_reset_type(const char *str)
{
        if (sysfs_streq(str, "adapter"))
                return SCSI_ADAPTER_RESET;
        else if (sysfs_streq(str, "firmware"))
                return SCSI_FIRMWARE_RESET;
        else
                return 0;
}

static ssize_t
store_host_reset(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        const struct scsi_host_template *sht = shost->hostt;
        int ret = -EINVAL;
        int type;

        type = check_reset_type(buf);
        if (!type)
                goto exit_store_host_reset;

        if (sht->host_reset)
                ret = sht->host_reset(shost, type);
        else
                ret = -EOPNOTSUPP;

exit_store_host_reset:
        if (ret == 0)
                ret = count;
        return ret;
}

static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset);

static ssize_t
show_shost_eh_deadline(struct device *dev,
                      struct device_attribute *attr, char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);

        if (shost->eh_deadline == -1)
                return snprintf(buf, strlen("off") + 2, "off\n");
        return sprintf(buf, "%u\n", shost->eh_deadline / HZ);
}

static ssize_t
store_shost_eh_deadline(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        int ret = -EINVAL;
        unsigned long deadline, flags;

        if (shost->transportt &&
            (shost->transportt->eh_strategy_handler ||
             !shost->hostt->eh_host_reset_handler))
                return ret;

        if (!strncmp(buf, "off", strlen("off")))
                deadline = -1;
        else {
                ret = kstrtoul(buf, 10, &deadline);
                if (ret)
                        return ret;
                if (deadline * HZ > UINT_MAX)
                        return -EINVAL;
        }

        spin_lock_irqsave(shost->host_lock, flags);
        if (scsi_host_in_recovery(shost))
                ret = -EBUSY;
        else {
                if (deadline == -1)
                        shost->eh_deadline = -1;
                else
                        shost->eh_deadline = deadline * HZ;

                ret = count;
        }
        spin_unlock_irqrestore(shost->host_lock, flags);

        return ret;
}

static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline);

shost_rd_attr(unique_id, "%u\n");
shost_rd_attr(cmd_per_lun, "%hd\n");
shost_rd_attr(can_queue, "%d\n");
shost_rd_attr(sg_tablesize, "%hu\n");
shost_rd_attr(sg_prot_tablesize, "%hu\n");
shost_rd_attr(prot_capabilities, "%u\n");
shost_rd_attr(prot_guard_type, "%hd\n");
shost_rd_attr2(proc_name, hostt->proc_name, "%s\n");

static ssize_t
show_host_busy(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        return snprintf(buf, 20, "%d\n", scsi_host_busy(shost));
}
static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL);

static ssize_t
show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf)
{
        return sprintf(buf, "1\n");
}
static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL);

static ssize_t
show_nr_hw_queues(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct Scsi_Host *shost = class_to_shost(dev);
        struct blk_mq_tag_set *tag_set = &shost->tag_set;

        return snprintf(buf, 20, "%d\n", tag_set->nr_hw_queues);
}
static DEVICE_ATTR(nr_hw_queues, S_IRUGO, show_nr_hw_queues, NULL);

static struct attribute *scsi_sysfs_shost_attrs[] = {
        &dev_attr_use_blk_mq.attr,
        &dev_attr_unique_id.attr,
        &dev_attr_host_busy.attr,
        &dev_attr_cmd_per_lun.attr,
        &dev_attr_can_queue.attr,
        &dev_attr_sg_tablesize.attr,
        &dev_attr_sg_prot_tablesize.attr,
        &dev_attr_proc_name.attr,
        &dev_attr_scan.attr,
        &dev_attr_hstate.attr,
        &dev_attr_supported_mode.attr,
        &dev_attr_active_mode.attr,
        &dev_attr_prot_capabilities.attr,
        &dev_attr_prot_guard_type.attr,
        &dev_attr_host_reset.attr,
        &dev_attr_eh_deadline.attr,
        &dev_attr_nr_hw_queues.attr,
        NULL
};

static const struct attribute_group scsi_shost_attr_group = {
        .attrs =        scsi_sysfs_shost_attrs,
};

const struct attribute_group *scsi_shost_groups[] = {
        &scsi_shost_attr_group,
        NULL
};

static void scsi_device_cls_release(struct device *class_dev)
{
        struct scsi_device *sdev;

        sdev = class_to_sdev(class_dev);
        put_device(&sdev->sdev_gendev);
}

static void scsi_device_dev_release(struct device *dev)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        struct device *parent;
        struct list_head *this, *tmp;
        struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
        struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL;
        struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL;
        struct scsi_vpd *vpd_pgb7 = NULL;
        unsigned long flags;

        might_sleep();

        scsi_dh_release_device(sdev);

        parent = sdev->sdev_gendev.parent;

        spin_lock_irqsave(sdev->host->host_lock, flags);
        list_del(&sdev->siblings);
        list_del(&sdev->same_target_siblings);
        list_del(&sdev->starved_entry);
        spin_unlock_irqrestore(sdev->host->host_lock, flags);

        cancel_work_sync(&sdev->event_work);

        list_for_each_safe(this, tmp, &sdev->event_list) {
                struct scsi_event *evt;

                evt = list_entry(this, struct scsi_event, node);
                list_del(&evt->node);
                kfree(evt);
        }

        blk_put_queue(sdev->request_queue);
        /* NULL queue means the device can't be used */
        sdev->request_queue = NULL;

        sbitmap_free(&sdev->budget_map);

        mutex_lock(&sdev->inquiry_mutex);
        vpd_pg0 = rcu_replace_pointer(sdev->vpd_pg0, vpd_pg0,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pg80 = rcu_replace_pointer(sdev->vpd_pg80, vpd_pg80,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pg83 = rcu_replace_pointer(sdev->vpd_pg83, vpd_pg83,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pg89 = rcu_replace_pointer(sdev->vpd_pg89, vpd_pg89,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pgb0 = rcu_replace_pointer(sdev->vpd_pgb0, vpd_pgb0,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pgb1 = rcu_replace_pointer(sdev->vpd_pgb1, vpd_pgb1,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pgb2 = rcu_replace_pointer(sdev->vpd_pgb2, vpd_pgb2,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        vpd_pgb7 = rcu_replace_pointer(sdev->vpd_pgb7, vpd_pgb7,
                                       lockdep_is_held(&sdev->inquiry_mutex));
        mutex_unlock(&sdev->inquiry_mutex);

        if (vpd_pg0)
                kfree_rcu(vpd_pg0, rcu);
        if (vpd_pg83)
                kfree_rcu(vpd_pg83, rcu);
        if (vpd_pg80)
                kfree_rcu(vpd_pg80, rcu);
        if (vpd_pg89)
                kfree_rcu(vpd_pg89, rcu);
        if (vpd_pgb0)
                kfree_rcu(vpd_pgb0, rcu);
        if (vpd_pgb1)
                kfree_rcu(vpd_pgb1, rcu);
        if (vpd_pgb2)
                kfree_rcu(vpd_pgb2, rcu);
        if (vpd_pgb7)
                kfree_rcu(vpd_pgb7, rcu);
        kfree(sdev->inquiry);
        kfree(sdev);

        if (parent)
                put_device(parent);
}

static struct class sdev_class = {
        .name                = "scsi_device",
        .dev_release        = scsi_device_cls_release,
};

/* all probing is done in the individual ->probe routines */
static int scsi_bus_match(struct device *dev, struct device_driver *gendrv)
{
        struct scsi_device *sdp;

        if (dev->type != &scsi_dev_type)
                return 0;

        sdp = to_scsi_device(dev);
        if (sdp->no_uld_attach)
                return 0;
        return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0;
}

static int scsi_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct scsi_device *sdev;

        if (dev->type != &scsi_dev_type)
                return 0;

        sdev = to_scsi_device(dev);

        add_uevent_var(env, "MODALIAS=" SCSI_DEVICE_MODALIAS_FMT, sdev->type);
        return 0;
}

const struct bus_type scsi_bus_type = {
        .name                = "scsi",
        .match                = scsi_bus_match,
        .uevent                = scsi_bus_uevent,
#ifdef CONFIG_PM
        .pm                = &scsi_bus_pm_ops,
#endif
};

int scsi_sysfs_register(void)
{
        int error;

        error = bus_register(&scsi_bus_type);
        if (!error) {
                error = class_register(&sdev_class);
                if (error)
                        bus_unregister(&scsi_bus_type);
        }

        return error;
}

void scsi_sysfs_unregister(void)
{
        class_unregister(&sdev_class);
        bus_unregister(&scsi_bus_type);
}

/*
 * sdev_show_function: macro to create an attr function that can be used to
 * show a non-bit field.
 */
#define sdev_show_function(field, format_string)                                \
static ssize_t                                                                \
sdev_show_##field (struct device *dev, struct device_attribute *attr,        \
                   char *buf)                                                \
{                                                                        \
        struct scsi_device *sdev;                                        \
        sdev = to_scsi_device(dev);                                        \
        return snprintf (buf, 20, format_string, sdev->field);                \
}                                                                        \

/*
 * sdev_rd_attr: macro to create a function and attribute variable for a
 * read only field.
 */
#define sdev_rd_attr(field, format_string)                                \
        sdev_show_function(field, format_string)                        \
static DEVICE_ATTR(field, S_IRUGO, sdev_show_##field, NULL);


/*
 * sdev_rw_attr: create a function and attribute variable for a
 * read/write field.
 */
#define sdev_rw_attr(field, format_string)                                \
        sdev_show_function(field, format_string)                                \
                                                                        \
static ssize_t                                                                \
sdev_store_##field (struct device *dev, struct device_attribute *attr,        \
                    const char *buf, size_t count)                        \
{                                                                        \
        struct scsi_device *sdev;                                        \
        sdev = to_scsi_device(dev);                                        \
        sscanf (buf, format_string, &sdev->field);                        \
        return count;                                                        \
}                                                                        \
static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, sdev_show_##field, sdev_store_##field);

/* Currently we don't export bit fields, but we might in future,
 * so leave this code in */
#if 0
/*
 * sdev_rd_attr: create a function and attribute variable for a
 * read/write bit field.
 */
#define sdev_rw_attr_bit(field)                                                \
        sdev_show_function(field, "%d\n")                                        \
                                                                        \
static ssize_t                                                                \
sdev_store_##field (struct device *dev, struct device_attribute *attr,        \
                    const char *buf, size_t count)                        \
{                                                                        \
        int ret;                                                        \
        struct scsi_device *sdev;                                        \
        ret = scsi_sdev_check_buf_bit(buf);                                \
        if (ret >= 0)        {                                                \
                sdev = to_scsi_device(dev);                                \
                sdev->field = ret;                                        \
                ret = count;                                                \
        }                                                                \
        return ret;                                                        \
}                                                                        \
static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, sdev_show_##field, sdev_store_##field);

/*
 * scsi_sdev_check_buf_bit: return 0 if buf is "0", return 1 if buf is "1",
 * else return -EINVAL.
 */
static int scsi_sdev_check_buf_bit(const char *buf)
{
        if ((buf[1] == '\0') || ((buf[1] == '\n') && (buf[2] == '\0'))) {
                if (buf[0] == '1')
                        return 1;
                else if (buf[0] == '0')
                        return 0;
                else 
                        return -EINVAL;
        } else
                return -EINVAL;
}
#endif
/*
 * Create the actual show/store functions and data structures.
 */
sdev_rd_attr (type, "%d\n");
sdev_rd_attr (scsi_level, "%d\n");
sdev_rd_attr (vendor, "%.8s\n");
sdev_rd_attr (model, "%.16s\n");
sdev_rd_attr (rev, "%.4s\n");
sdev_rd_attr (cdl_supported, "%d\n");

static ssize_t
sdev_show_device_busy(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        return snprintf(buf, 20, "%d\n", scsi_device_busy(sdev));
}
static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL);

static ssize_t
sdev_show_device_blocked(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_blocked));
}
static DEVICE_ATTR(device_blocked, S_IRUGO, sdev_show_device_blocked, NULL);

/*
 * TODO: can we make these symlinks to the block layer ones?
 */
static ssize_t
sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf)
{
        struct scsi_device *sdev;
        sdev = to_scsi_device(dev);
        return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ);
}

static ssize_t
sdev_store_timeout (struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        struct scsi_device *sdev;
        int timeout;
        sdev = to_scsi_device(dev);
        sscanf (buf, "%d\n", &timeout);
        blk_queue_rq_timeout(sdev->request_queue, timeout * HZ);
        return count;
}
static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout);

static ssize_t
sdev_show_eh_timeout(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct scsi_device *sdev;
        sdev = to_scsi_device(dev);
        return snprintf(buf, 20, "%u\n", sdev->eh_timeout / HZ);
}

static ssize_t
sdev_store_eh_timeout(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        struct scsi_device *sdev;
        unsigned int eh_timeout;
        int err;

        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;

        sdev = to_scsi_device(dev);
        err = kstrtouint(buf, 10, &eh_timeout);
        if (err)
                return err;
        sdev->eh_timeout = eh_timeout * HZ;

        return count;
}
static DEVICE_ATTR(eh_timeout, S_IRUGO | S_IWUSR, sdev_show_eh_timeout, sdev_store_eh_timeout);

static ssize_t
store_rescan_field (struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        scsi_rescan_device(to_scsi_device(dev));
        return count;
}
static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);

static ssize_t
sdev_store_delete(struct device *dev, struct device_attribute *attr,
                  const char *buf, size_t count)
{
        struct kernfs_node *kn;
        struct scsi_device *sdev = to_scsi_device(dev);

        /*
         * We need to try to get module, avoiding the module been removed
         * during delete.
         */
        if (scsi_device_get(sdev))
                return -ENODEV;

        kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
        WARN_ON_ONCE(!kn);
        /*
         * Concurrent writes into the "delete" sysfs attribute may trigger
         * concurrent calls to device_remove_file() and scsi_remove_device().
         * device_remove_file() handles concurrent removal calls by
         * serializing these and by ignoring the second and later removal
         * attempts.  Concurrent calls of scsi_remove_device() are
         * serialized. The second and later calls of scsi_remove_device() are
         * ignored because the first call of that function changes the device
         * state into SDEV_DEL.
         */
        device_remove_file(dev, attr);
        scsi_remove_device(sdev);
        if (kn)
                sysfs_unbreak_active_protection(kn);
        scsi_device_put(sdev);
        return count;
};
static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);

static ssize_t
store_state_field(struct device *dev, struct device_attribute *attr,
                  const char *buf, size_t count)
{
        int i, ret;
        struct scsi_device *sdev = to_scsi_device(dev);
        enum scsi_device_state state = 0;
        bool rescan_dev = false;

        for (i = 0; i < ARRAY_SIZE(sdev_states); i++) {
                const int len = strlen(sdev_states[i].name);
                if (strncmp(sdev_states[i].name, buf, len) == 0 &&
                   buf[len] == '\n') {
                        state = sdev_states[i].value;
                        break;
                }
        }
        switch (state) {
        case SDEV_RUNNING:
        case SDEV_OFFLINE:
                break;
        default:
                return -EINVAL;
        }

        mutex_lock(&sdev->state_mutex);
        switch (sdev->sdev_state) {
        case SDEV_RUNNING:
        case SDEV_OFFLINE:
                break;
        default:
                mutex_unlock(&sdev->state_mutex);
                return -EINVAL;
        }
        if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) {
                ret = 0;
        } else {
                ret = scsi_device_set_state(sdev, state);
                if (ret == 0 && state == SDEV_RUNNING)
                        rescan_dev = true;
        }
        mutex_unlock(&sdev->state_mutex);

        if (rescan_dev) {
                /*
                 * If the device state changes to SDEV_RUNNING, we need to
                 * run the queue to avoid I/O hang, and rescan the device
                 * to revalidate it. Running the queue first is necessary
                 * because another thread may be waiting inside
                 * blk_mq_freeze_queue_wait() and because that call may be
                 * waiting for pending I/O to finish.
                 */
                blk_mq_run_hw_queues(sdev->request_queue, true);
                scsi_rescan_device(sdev);
        }

        return ret == 0 ? count : -EINVAL;
}

static ssize_t
show_state_field(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        const char *name = scsi_device_state_name(sdev->sdev_state);

        if (!name)
                return -EINVAL;

        return snprintf(buf, 20, "%s\n", name);
}

static DEVICE_ATTR(state, S_IRUGO | S_IWUSR, show_state_field, store_state_field);

static ssize_t
show_queue_type_field(struct device *dev, struct device_attribute *attr,
                      char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        const char *name = "none";

        if (sdev->simple_tags)
                name = "simple";

        return snprintf(buf, 20, "%s\n", name);
}

static ssize_t
store_queue_type_field(struct device *dev, struct device_attribute *attr,
                       const char *buf, size_t count)
{
        struct scsi_device *sdev = to_scsi_device(dev);

        if (!sdev->tagged_supported)
                return -EINVAL;
                
        sdev_printk(KERN_INFO, sdev,
                    "ignoring write to deprecated queue_type attribute");
        return count;
}

static DEVICE_ATTR(queue_type, S_IRUGO | S_IWUSR, show_queue_type_field,
                   store_queue_type_field);

#define sdev_vpd_pg_attr(_page)                                                \
static ssize_t                                                        \
show_vpd_##_page(struct file *filp, struct kobject *kobj,        \
                 struct bin_attribute *bin_attr,                        \
                 char *buf, loff_t off, size_t count)                        \
{                                                                        \
        struct device *dev = kobj_to_dev(kobj);                                \
        struct scsi_device *sdev = to_scsi_device(dev);                        \
        struct scsi_vpd *vpd_page;                                        \
        int ret = -EINVAL;                                                \
                                                                        \
        rcu_read_lock();                                                \
        vpd_page = rcu_dereference(sdev->vpd_##_page);                        \
        if (vpd_page)                                                        \
                ret = memory_read_from_buffer(buf, count, &off,                \
                                vpd_page->data, vpd_page->len);                \
        rcu_read_unlock();                                                \
        return ret;                                                        \
}                                                                        \
static struct bin_attribute dev_attr_vpd_##_page = {                \
        .attr =        {.name = __stringify(vpd_##_page), .mode = S_IRUGO },        \
        .size = 0,                                                        \
        .read = show_vpd_##_page,                                        \
};

sdev_vpd_pg_attr(pg83);
sdev_vpd_pg_attr(pg80);
sdev_vpd_pg_attr(pg89);
sdev_vpd_pg_attr(pgb0);
sdev_vpd_pg_attr(pgb1);
sdev_vpd_pg_attr(pgb2);
sdev_vpd_pg_attr(pgb7);
sdev_vpd_pg_attr(pg0);

static ssize_t show_inquiry(struct file *filep, struct kobject *kobj,
                            struct bin_attribute *bin_attr,
                            char *buf, loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct scsi_device *sdev = to_scsi_device(dev);

        if (!sdev->inquiry)
                return -EINVAL;

        return memory_read_from_buffer(buf, count, &off, sdev->inquiry,
                                       sdev->inquiry_len);
}

static struct bin_attribute dev_attr_inquiry = {
        .attr = {
                .name = "inquiry",
                .mode = S_IRUGO,
        },
        .size = 0,
        .read = show_inquiry,
};

static ssize_t
show_iostat_counterbits(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        return snprintf(buf, 20, "%d\n", (int)sizeof(atomic_t) * 8);
}

static DEVICE_ATTR(iocounterbits, S_IRUGO, show_iostat_counterbits, NULL);

#define show_sdev_iostat(field)                                                \
static ssize_t                                                                \
show_iostat_##field(struct device *dev, struct device_attribute *attr,        \
                    char *buf)                                                \
{                                                                        \
        struct scsi_device *sdev = to_scsi_device(dev);                        \
        unsigned long long count = atomic_read(&sdev->field);                \
        return snprintf(buf, 20, "0x%llx\n", count);                        \
}                                                                        \
static DEVICE_ATTR(field, S_IRUGO, show_iostat_##field, NULL)

show_sdev_iostat(iorequest_cnt);
show_sdev_iostat(iodone_cnt);
show_sdev_iostat(ioerr_cnt);
show_sdev_iostat(iotmo_cnt);

static ssize_t
sdev_show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct scsi_device *sdev;
        sdev = to_scsi_device(dev);
        return snprintf (buf, 20, SCSI_DEVICE_MODALIAS_FMT "\n", sdev->type);
}
static DEVICE_ATTR(modalias, S_IRUGO, sdev_show_modalias, NULL);

#define DECLARE_EVT_SHOW(name, Cap_name)                                \
static ssize_t                                                                \
sdev_show_evt_##name(struct device *dev, struct device_attribute *attr,        \
                     char *buf)                                                \
{                                                                        \
        struct scsi_device *sdev = to_scsi_device(dev);                        \
        int val = test_bit(SDEV_EVT_##Cap_name, sdev->supported_events);\
        return snprintf(buf, 20, "%d\n", val);                                \
}

#define DECLARE_EVT_STORE(name, Cap_name)                                \
static ssize_t                                                                \
sdev_store_evt_##name(struct device *dev, struct device_attribute *attr,\
                      const char *buf, size_t count)                        \
{                                                                        \
        struct scsi_device *sdev = to_scsi_device(dev);                        \
        int val = simple_strtoul(buf, NULL, 0);                                \
        if (val == 0)                                                        \
                clear_bit(SDEV_EVT_##Cap_name, sdev->supported_events);        \
        else if (val == 1)                                                \
                set_bit(SDEV_EVT_##Cap_name, sdev->supported_events);        \
        else                                                                \
                return -EINVAL;                                                \
        return count;                                                        \
}

#define DECLARE_EVT(name, Cap_name)                                        \
        DECLARE_EVT_SHOW(name, Cap_name)                                \
        DECLARE_EVT_STORE(name, Cap_name)                                \
        static DEVICE_ATTR(evt_##name, S_IRUGO, sdev_show_evt_##name,        \
                           sdev_store_evt_##name);
#define REF_EVT(name) &dev_attr_evt_##name.attr

DECLARE_EVT(media_change, MEDIA_CHANGE)
DECLARE_EVT(inquiry_change_reported, INQUIRY_CHANGE_REPORTED)
DECLARE_EVT(capacity_change_reported, CAPACITY_CHANGE_REPORTED)
DECLARE_EVT(soft_threshold_reached, SOFT_THRESHOLD_REACHED_REPORTED)
DECLARE_EVT(mode_parameter_change_reported, MODE_PARAMETER_CHANGE_REPORTED)
DECLARE_EVT(lun_change_reported, LUN_CHANGE_REPORTED)

static ssize_t
sdev_store_queue_depth(struct device *dev, struct device_attribute *attr,
                       const char *buf, size_t count)
{
        int depth, retval;
        struct scsi_device *sdev = to_scsi_device(dev);
        const struct scsi_host_template *sht = sdev->host->hostt;

        if (!sht->change_queue_depth)
                return -EINVAL;

        depth = simple_strtoul(buf, NULL, 0);

        if (depth < 1 || depth > sdev->host->can_queue)
                return -EINVAL;

        retval = sht->change_queue_depth(sdev, depth);
        if (retval < 0)
                return retval;

        sdev->max_queue_depth = sdev->queue_depth;

        return count;
}
sdev_show_function(queue_depth, "%d\n");

static DEVICE_ATTR(queue_depth, S_IRUGO | S_IWUSR, sdev_show_queue_depth,
                   sdev_store_queue_depth);

static ssize_t
sdev_show_wwid(struct device *dev, struct device_attribute *attr,
                    char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        ssize_t count;

        count = scsi_vpd_lun_id(sdev, buf, PAGE_SIZE);
        if (count > 0) {
                buf[count] = '\n';
                count++;
        }
        return count;
}
static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);

#define BLIST_FLAG_NAME(name)                                        \
        [const_ilog2((__force __u64)BLIST_##name)] = #name
static const char *const sdev_bflags_name[] = {
#include "scsi_devinfo_tbl.c"
};
#undef BLIST_FLAG_NAME

static ssize_t
sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
                    char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        int i;
        ssize_t len = 0;

        for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
                const char *name = NULL;

                if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
                        continue;
                if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
                        name = sdev_bflags_name[i];

                if (name)
                        len += scnprintf(buf + len, PAGE_SIZE - len,
                                         "%s%s", len ? " " : "", name);
                else
                        len += scnprintf(buf + len, PAGE_SIZE - len,
                                         "%sINVALID_BIT(%d)", len ? " " : "", i);
        }
        if (len)
                len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
        return len;
}
static DEVICE_ATTR(blacklist, S_IRUGO, sdev_show_blacklist, NULL);

#ifdef CONFIG_SCSI_DH
static ssize_t
sdev_show_dh_state(struct device *dev, struct device_attribute *attr,
                   char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);

        if (!sdev->handler)
                return snprintf(buf, 20, "detached\n");

        return snprintf(buf, 20, "%s\n", sdev->handler->name);
}

static ssize_t
sdev_store_dh_state(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        int err = -EINVAL;

        if (sdev->sdev_state == SDEV_CANCEL ||
            sdev->sdev_state == SDEV_DEL)
                return -ENODEV;

        if (!sdev->handler) {
                /*
                 * Attach to a device handler
                 */
                err = scsi_dh_attach(sdev->request_queue, buf);
        } else if (!strncmp(buf, "activate", 8)) {
                /*
                 * Activate a device handler
                 */
                if (sdev->handler->activate)
                        err = sdev->handler->activate(sdev, NULL, NULL);
                else
                        err = 0;
        } else if (!strncmp(buf, "detach", 6)) {
                /*
                 * Detach from a device handler
                 */
                sdev_printk(KERN_WARNING, sdev,
                            "can't detach handler %s.\n",
                            sdev->handler->name);
                err = -EINVAL;
        }

        return err < 0 ? err : count;
}

static DEVICE_ATTR(dh_state, S_IRUGO | S_IWUSR, sdev_show_dh_state,
                   sdev_store_dh_state);

static ssize_t
sdev_show_access_state(struct device *dev,
                       struct device_attribute *attr,
                       char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        unsigned char access_state;
        const char *access_state_name;

        if (!sdev->handler)
                return -EINVAL;

        access_state = (sdev->access_state & SCSI_ACCESS_STATE_MASK);
        access_state_name = scsi_access_state_name(access_state);

        return sprintf(buf, "%s\n",
                       access_state_name ? access_state_name : "unknown");
}
static DEVICE_ATTR(access_state, S_IRUGO, sdev_show_access_state, NULL);

static ssize_t
sdev_show_preferred_path(struct device *dev,
                         struct device_attribute *attr,
                         char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);

        if (!sdev->handler)
                return -EINVAL;

        if (sdev->access_state & SCSI_ACCESS_STATE_PREFERRED)
                return sprintf(buf, "1\n");
        else
                return sprintf(buf, "0\n");
}
static DEVICE_ATTR(preferred_path, S_IRUGO, sdev_show_preferred_path, NULL);
#endif

static ssize_t
sdev_show_queue_ramp_up_period(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
{
        struct scsi_device *sdev;
        sdev = to_scsi_device(dev);
        return snprintf(buf, 20, "%u\n",
                        jiffies_to_msecs(sdev->queue_ramp_up_period));
}

static ssize_t
sdev_store_queue_ramp_up_period(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        unsigned int period;

        if (kstrtouint(buf, 10, &period))
                return -EINVAL;

        sdev->queue_ramp_up_period = msecs_to_jiffies(period);
        return count;
}

static DEVICE_ATTR(queue_ramp_up_period, S_IRUGO | S_IWUSR,
                   sdev_show_queue_ramp_up_period,
                   sdev_store_queue_ramp_up_period);

static ssize_t sdev_show_cdl_enable(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        struct scsi_device *sdev = to_scsi_device(dev);

        return sysfs_emit(buf, "%d\n", (int)sdev->cdl_enable);
}

static ssize_t sdev_store_cdl_enable(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
{
        int ret;
        bool v;

        if (kstrtobool(buf, &v))
                return -EINVAL;

        ret = scsi_cdl_enable(to_scsi_device(dev), v);
        if (ret)
                return ret;

        return count;
}
static DEVICE_ATTR(cdl_enable, S_IRUGO | S_IWUSR,
                   sdev_show_cdl_enable, sdev_store_cdl_enable);

static umode_t scsi_sdev_attr_is_visible(struct kobject *kobj,
                                         struct attribute *attr, int i)
{
        struct device *dev = kobj_to_dev(kobj);
        struct scsi_device *sdev = to_scsi_device(dev);


        if (attr == &dev_attr_queue_depth.attr &&
            !sdev->host->hostt->change_queue_depth)
                return S_IRUGO;

        if (attr == &dev_attr_queue_ramp_up_period.attr &&
            !sdev->host->hostt->change_queue_depth)
                return 0;

        return attr->mode;
}

static umode_t scsi_sdev_bin_attr_is_visible(struct kobject *kobj,
                                             struct bin_attribute *attr, int i)
{
        struct device *dev = kobj_to_dev(kobj);
        struct scsi_device *sdev = to_scsi_device(dev);


        if (attr == &dev_attr_vpd_pg0 && !sdev->vpd_pg0)
                return 0;

        if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80)
                return 0;

        if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83)
                return 0;

        if (attr == &dev_attr_vpd_pg89 && !sdev->vpd_pg89)
                return 0;

        if (attr == &dev_attr_vpd_pgb0 && !sdev->vpd_pgb0)
                return 0;

        if (attr == &dev_attr_vpd_pgb1 && !sdev->vpd_pgb1)
                return 0;

        if (attr == &dev_attr_vpd_pgb2 && !sdev->vpd_pgb2)
                return 0;

        if (attr == &dev_attr_vpd_pgb7 && !sdev->vpd_pgb7)
                return 0;

        return S_IRUGO;
}

/* Default template for device attributes.  May NOT be modified */
static struct attribute *scsi_sdev_attrs[] = {
        &dev_attr_device_blocked.attr,
        &dev_attr_type.attr,
        &dev_attr_scsi_level.attr,
        &dev_attr_device_busy.attr,
        &dev_attr_vendor.attr,
        &dev_attr_model.attr,
        &dev_attr_rev.attr,
        &dev_attr_rescan.attr,
        &dev_attr_delete.attr,
        &dev_attr_state.attr,
        &dev_attr_timeout.attr,
        &dev_attr_eh_timeout.attr,
        &dev_attr_iocounterbits.attr,
        &dev_attr_iorequest_cnt.attr,
        &dev_attr_iodone_cnt.attr,
        &dev_attr_ioerr_cnt.attr,
        &dev_attr_iotmo_cnt.attr,
        &dev_attr_modalias.attr,
        &dev_attr_queue_depth.attr,
        &dev_attr_queue_type.attr,
        &dev_attr_wwid.attr,
        &dev_attr_blacklist.attr,
#ifdef CONFIG_SCSI_DH
        &dev_attr_dh_state.attr,
        &dev_attr_access_state.attr,
        &dev_attr_preferred_path.attr,
#endif
        &dev_attr_queue_ramp_up_period.attr,
        &dev_attr_cdl_supported.attr,
        &dev_attr_cdl_enable.attr,
        REF_EVT(media_change),
        REF_EVT(inquiry_change_reported),
        REF_EVT(capacity_change_reported),
        REF_EVT(soft_threshold_reached),
        REF_EVT(mode_parameter_change_reported),
        REF_EVT(lun_change_reported),
        NULL
};

static struct bin_attribute *scsi_sdev_bin_attrs[] = {
        &dev_attr_vpd_pg0,
        &dev_attr_vpd_pg83,
        &dev_attr_vpd_pg80,
        &dev_attr_vpd_pg89,
        &dev_attr_vpd_pgb0,
        &dev_attr_vpd_pgb1,
        &dev_attr_vpd_pgb2,
        &dev_attr_vpd_pgb7,
        &dev_attr_inquiry,
        NULL
};
static struct attribute_group scsi_sdev_attr_group = {
        .attrs =        scsi_sdev_attrs,
        .bin_attrs =        scsi_sdev_bin_attrs,
        .is_visible =        scsi_sdev_attr_is_visible,
        .is_bin_visible = scsi_sdev_bin_attr_is_visible,
};

static const struct attribute_group *scsi_sdev_attr_groups[] = {
        &scsi_sdev_attr_group,
        NULL
};

static int scsi_target_add(struct scsi_target *starget)
{
        int error;

        if (starget->state != STARGET_CREATED)
                return 0;

        error = device_add(&starget->dev);
        if (error) {
                dev_err(&starget->dev, "target device_add failed, error %d\n", error);
                return error;
        }
        transport_add_device(&starget->dev);
        starget->state = STARGET_RUNNING;

        pm_runtime_set_active(&starget->dev);
        pm_runtime_enable(&starget->dev);
        device_enable_async_suspend(&starget->dev);

        return 0;
}

/**
 * scsi_sysfs_add_sdev - add scsi device to sysfs
 * @sdev:        scsi_device to add
 *
 * Return value:
 *         0 on Success / non-zero on Failure
 **/
int scsi_sysfs_add_sdev(struct scsi_device *sdev)
{
        int error;
        struct scsi_target *starget = sdev->sdev_target;

        error = scsi_target_add(starget);
        if (error)
                return error;

        transport_configure_device(&starget->dev);

        device_enable_async_suspend(&sdev->sdev_gendev);
        scsi_autopm_get_target(starget);
        pm_runtime_set_active(&sdev->sdev_gendev);
        if (!sdev->rpm_autosuspend)
                pm_runtime_forbid(&sdev->sdev_gendev);
        pm_runtime_enable(&sdev->sdev_gendev);
        scsi_autopm_put_target(starget);

        scsi_autopm_get_device(sdev);

        scsi_dh_add_device(sdev);

        error = device_add(&sdev->sdev_gendev);
        if (error) {
                sdev_printk(KERN_INFO, sdev,
                                "failed to add device: %d\n", error);
                return error;
        }

        device_enable_async_suspend(&sdev->sdev_dev);
        error = device_add(&sdev->sdev_dev);
        if (error) {
                sdev_printk(KERN_INFO, sdev,
                                "failed to add class device: %d\n", error);
                device_del(&sdev->sdev_gendev);
                return error;
        }
        transport_add_device(&sdev->sdev_gendev);
        sdev->is_visible = 1;

        if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) {
                sdev->bsg_dev = scsi_bsg_register_queue(sdev);
                if (IS_ERR(sdev->bsg_dev)) {
                        error = PTR_ERR(sdev->bsg_dev);
                        sdev_printk(KERN_INFO, sdev,
                                    "Failed to register bsg queue, errno=%d\n",
                                    error);
                        sdev->bsg_dev = NULL;
                }
        }

        scsi_autopm_put_device(sdev);
        return error;
}

void __scsi_remove_device(struct scsi_device *sdev)
{
        struct device *dev = &sdev->sdev_gendev;
        int res;

        /*
         * This cleanup path is not reentrant and while it is impossible
         * to get a new reference with scsi_device_get() someone can still
         * hold a previously acquired one.
         */
        if (sdev->sdev_state == SDEV_DEL)
                return;

        if (sdev->is_visible) {
                /*
                 * If scsi_internal_target_block() is running concurrently,
                 * wait until it has finished before changing the device state.
                 */
                mutex_lock(&sdev->state_mutex);
                /*
                 * If blocked, we go straight to DEL and restart the queue so
                 * any commands issued during driver shutdown (like sync
                 * cache) are errored immediately.
                 */
                res = scsi_device_set_state(sdev, SDEV_CANCEL);
                if (res != 0) {
                        res = scsi_device_set_state(sdev, SDEV_DEL);
                        if (res == 0)
                                scsi_start_queue(sdev);
                }
                mutex_unlock(&sdev->state_mutex);

                if (res != 0)
                        return;

                if (IS_ENABLED(CONFIG_BLK_DEV_BSG) && sdev->bsg_dev)
                        bsg_unregister_queue(sdev->bsg_dev);
                device_unregister(&sdev->sdev_dev);
                transport_remove_device(dev);
                device_del(dev);
        } else
                put_device(&sdev->sdev_dev);

        /*
         * Stop accepting new requests and wait until all queuecommand() and
         * scsi_run_queue() invocations have finished before tearing down the
         * device.
         */
        mutex_lock(&sdev->state_mutex);
        scsi_device_set_state(sdev, SDEV_DEL);
        mutex_unlock(&sdev->state_mutex);

        blk_mq_destroy_queue(sdev->request_queue);
        kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags);
        cancel_work_sync(&sdev->requeue_work);

        if (sdev->host->hostt->slave_destroy)
                sdev->host->hostt->slave_destroy(sdev);
        transport_destroy_device(dev);

        /*
         * Paired with the kref_get() in scsi_sysfs_initialize().  We have
         * removed sysfs visibility from the device, so make the target
         * invisible if this was the last device underneath it.
         */
        scsi_target_reap(scsi_target(sdev));

        put_device(dev);
}

/**
 * scsi_remove_device - unregister a device from the scsi bus
 * @sdev:        scsi_device to unregister
 **/
void scsi_remove_device(struct scsi_device *sdev)
{
        struct Scsi_Host *shost = sdev->host;

        mutex_lock(&shost->scan_mutex);
        __scsi_remove_device(sdev);
        mutex_unlock(&shost->scan_mutex);
}
EXPORT_SYMBOL(scsi_remove_device);

static void __scsi_remove_target(struct scsi_target *starget)
{
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
        unsigned long flags;
        struct scsi_device *sdev;

        spin_lock_irqsave(shost->host_lock, flags);
 restart:
        list_for_each_entry(sdev, &shost->__devices, siblings) {
                /*
                 * We cannot call scsi_device_get() here, as
                 * we might've been called from rmmod() causing
                 * scsi_device_get() to fail the module_is_live()
                 * check.
                 */
                if (sdev->channel != starget->channel ||
                    sdev->id != starget->id)
                        continue;
                if (sdev->sdev_state == SDEV_DEL ||
                    sdev->sdev_state == SDEV_CANCEL ||
                    !get_device(&sdev->sdev_gendev))
                        continue;
                spin_unlock_irqrestore(shost->host_lock, flags);
                scsi_remove_device(sdev);
                put_device(&sdev->sdev_gendev);
                spin_lock_irqsave(shost->host_lock, flags);
                goto restart;
        }
        spin_unlock_irqrestore(shost->host_lock, flags);
}

/**
 * scsi_remove_target - try to remove a target and all its devices
 * @dev: generic starget or parent of generic stargets to be removed
 *
 * Note: This is slightly racy.  It is possible that if the user
 * requests the addition of another device then the target won't be
 * removed.
 */
void scsi_remove_target(struct device *dev)
{
        struct Scsi_Host *shost = dev_to_shost(dev->parent);
        struct scsi_target *starget;
        unsigned long flags;

restart:
        spin_lock_irqsave(shost->host_lock, flags);
        list_for_each_entry(starget, &shost->__targets, siblings) {
                if (starget->state == STARGET_DEL ||
                    starget->state == STARGET_REMOVE ||
                    starget->state == STARGET_CREATED_REMOVE)
                        continue;
                if (starget->dev.parent == dev || &starget->dev == dev) {
                        kref_get(&starget->reap_ref);
                        if (starget->state == STARGET_CREATED)
                                starget->state = STARGET_CREATED_REMOVE;
                        else
                                starget->state = STARGET_REMOVE;
                        spin_unlock_irqrestore(shost->host_lock, flags);
                        __scsi_remove_target(starget);
                        scsi_target_reap(starget);
                        goto restart;
                }
        }
        spin_unlock_irqrestore(shost->host_lock, flags);
}
EXPORT_SYMBOL(scsi_remove_target);

int scsi_register_driver(struct device_driver *drv)
{
        drv->bus = &scsi_bus_type;

        return driver_register(drv);
}
EXPORT_SYMBOL(scsi_register_driver);

int scsi_register_interface(struct class_interface *intf)
{
        intf->class = &sdev_class;

        return class_interface_register(intf);
}
EXPORT_SYMBOL(scsi_register_interface);

/**
 * scsi_sysfs_add_host - add scsi host to subsystem
 * @shost:     scsi host struct to add to subsystem
 **/
int scsi_sysfs_add_host(struct Scsi_Host *shost)
{
        transport_register_device(&shost->shost_gendev);
        transport_configure_device(&shost->shost_gendev);
        return 0;
}

static const struct device_type scsi_dev_type = {
        .name =                "scsi_device",
        .release =        scsi_device_dev_release,
        .groups =        scsi_sdev_attr_groups,
};

void scsi_sysfs_device_initialize(struct scsi_device *sdev)
{
        unsigned long flags;
        struct Scsi_Host *shost = sdev->host;
        const struct scsi_host_template *hostt = shost->hostt;
        struct scsi_target  *starget = sdev->sdev_target;

        device_initialize(&sdev->sdev_gendev);
        sdev->sdev_gendev.bus = &scsi_bus_type;
        sdev->sdev_gendev.type = &scsi_dev_type;
        scsi_enable_async_suspend(&sdev->sdev_gendev);
        dev_set_name(&sdev->sdev_gendev, "%d:%d:%d:%llu",
                     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
        sdev->sdev_gendev.groups = hostt->sdev_groups;

        device_initialize(&sdev->sdev_dev);
        sdev->sdev_dev.parent = get_device(&sdev->sdev_gendev);
        sdev->sdev_dev.class = &sdev_class;
        dev_set_name(&sdev->sdev_dev, "%d:%d:%d:%llu",
                     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
        /*
         * Get a default scsi_level from the target (derived from sibling
         * devices).  This is the best we can do for guessing how to set
         * sdev->lun_in_cdb for the initial INQUIRY command.  For LUN 0 the
         * setting doesn't matter, because all the bits are zero anyway.
         * But it does matter for higher LUNs.
         */
        sdev->scsi_level = starget->scsi_level;
        if (sdev->scsi_level <= SCSI_2 &&
                        sdev->scsi_level != SCSI_UNKNOWN &&
                        !shost->no_scsi2_lun_in_cdb)
                sdev->lun_in_cdb = 1;

        transport_setup_device(&sdev->sdev_gendev);
        spin_lock_irqsave(shost->host_lock, flags);
        list_add_tail(&sdev->same_target_siblings, &starget->devices);
        list_add_tail(&sdev->siblings, &shost->__devices);
        spin_unlock_irqrestore(shost->host_lock, flags);
        /*
         * device can now only be removed via __scsi_remove_device() so hold
         * the target.  Target will be held in CREATED state until something
         * beneath it becomes visible (in which case it moves to RUNNING)
         */
        kref_get(&starget->reap_ref);
}

int scsi_is_sdev_device(const struct device *dev)
{
        return dev->type == &scsi_dev_type;
}
EXPORT_SYMBOL(scsi_is_sdev_device);

/* A blank transport template that is used in drivers that don't
 * yet implement Transport Attributes */
struct scsi_transport_template blank_transport_template = { { { {NULL, }, }, }, };
























   55 

   55 




   54 


   52 

   55 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/fs.h>

#define DEVCG_ACC_MKNOD 1
#define DEVCG_ACC_READ  2
#define DEVCG_ACC_WRITE 4
#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE)

#define DEVCG_DEV_BLOCK 1
#define DEVCG_DEV_CHAR  2
#define DEVCG_DEV_ALL   4  /* this represents all devices */


#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
int devcgroup_check_permission(short type, u32 major, u32 minor,
                               short access);
static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{
        short type, access = 0;

        if (likely(!inode->i_rdev))
                return 0;

        if (S_ISBLK(inode->i_mode))
                type = DEVCG_DEV_BLOCK;
        else if (S_ISCHR(inode->i_mode))
                type = DEVCG_DEV_CHAR;
        else
                return 0;

        if (mask & MAY_WRITE)
                access |= DEVCG_ACC_WRITE;
        if (mask & MAY_READ)
                access |= DEVCG_ACC_READ;

        return devcgroup_check_permission(type, imajor(inode), iminor(inode),
                                          access);
}

static inline int devcgroup_inode_mknod(int mode, dev_t dev)
{
        short type;

        if (!S_ISBLK(mode) && !S_ISCHR(mode))
                return 0;

        if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
                return 0;

        if (S_ISBLK(mode))
                type = DEVCG_DEV_BLOCK;
        else
                type = DEVCG_DEV_CHAR;

        return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
                                          DEVCG_ACC_MKNOD);
}

#else
static inline int devcgroup_check_permission(short type, u32 major, u32 minor,
                               short access)
{ return 0; }
static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{ return 0; }
static inline int devcgroup_inode_mknod(int mode, dev_t dev)
{ return 0; }
#endif












































































    1 


  232 






























































































































  296 




















    2 










  296 
    2 











  363 




















   20 









































































































  240 






































    2 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_BITOPS_H
#define _ASM_X86_BITOPS_H

/*
 * Copyright 1992, Linus Torvalds.
 *
 * Note: inlines with more than a single statement should be marked
 * __always_inline to avoid problems with older gcc's inlining heuristics.
 */

#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif

#include <linux/compiler.h>
#include <asm/alternative.h>
#include <asm/rmwcc.h>
#include <asm/barrier.h>

#if BITS_PER_LONG == 32
# define _BITOPS_LONG_SHIFT 5
#elif BITS_PER_LONG == 64
# define _BITOPS_LONG_SHIFT 6
#else
# error "Unexpected BITS_PER_LONG"
#endif

#define BIT_64(n)                        (U64_C(1) << (n))

/*
 * These have to be done with inline assembly: that way the bit-setting
 * is guaranteed to be atomic. All bit operations return 0 if the bit
 * was cleared before the operation and != 0 if it was not.
 *
 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
 */

#define RLONG_ADDR(x)                         "m" (*(volatile long *) (x))
#define WBYTE_ADDR(x)                        "+m" (*(volatile char *) (x))

#define ADDR                                RLONG_ADDR(addr)

/*
 * We do the locked ops that don't return the old value as
 * a mask operation on a byte.
 */
#define CONST_MASK_ADDR(nr, addr)        WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr)                        (1 << ((nr) & 7))

static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
        if (__builtin_constant_p(nr)) {
                asm volatile(LOCK_PREFIX "orb %b1,%0"
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" (CONST_MASK(nr))
                        : "memory");
        } else {
                asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
                        : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
        }
}

static __always_inline void
arch___set_bit(unsigned long nr, volatile unsigned long *addr)
{
        asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}

static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
        if (__builtin_constant_p(nr)) {
                asm volatile(LOCK_PREFIX "andb %b1,%0"
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" (~CONST_MASK(nr)));
        } else {
                asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
                        : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
        }
}

static __always_inline void
arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
{
        barrier();
        arch_clear_bit(nr, addr);
}

static __always_inline void
arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
        asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}

static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
                volatile unsigned long *addr)
{
        bool negative;
        asm volatile(LOCK_PREFIX "xorb %2,%1"
                CC_SET(s)
                : CC_OUT(s) (negative), WBYTE_ADDR(addr)
                : "iq" ((char)mask) : "memory");
        return negative;
}
#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte

static __always_inline void
arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
{
        arch___clear_bit(nr, addr);
}

static __always_inline void
arch___change_bit(unsigned long nr, volatile unsigned long *addr)
{
        asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}

static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
        if (__builtin_constant_p(nr)) {
                asm volatile(LOCK_PREFIX "xorb %b1,%0"
                        : CONST_MASK_ADDR(nr, addr)
                        : "iq" (CONST_MASK(nr)));
        } else {
                asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
                        : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
        }
}

static __always_inline bool
arch_test_and_set_bit(long nr, volatile unsigned long *addr)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}

static __always_inline bool
arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
        return arch_test_and_set_bit(nr, addr);
}

static __always_inline bool
arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
        bool oldbit;

        asm(__ASM_SIZE(bts) " %2,%1"
            CC_SET(c)
            : CC_OUT(c) (oldbit)
            : ADDR, "Ir" (nr) : "memory");
        return oldbit;
}

static __always_inline bool
arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}

/*
 * Note: the operation is performed atomically with respect to
 * the local CPU, but not other CPUs. Portable code should not
 * rely on this behaviour.
 * KVM relies on this behaviour on x86 for modifying memory that is also
 * accessed from a hypervisor on the same CPU if running in a VM: don't change
 * this without also updating arch/x86/kernel/kvm.c
 */
static __always_inline bool
arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
        bool oldbit;

        asm volatile(__ASM_SIZE(btr) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit)
                     : ADDR, "Ir" (nr) : "memory");
        return oldbit;
}

static __always_inline bool
arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
        bool oldbit;

        asm volatile(__ASM_SIZE(btc) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit)
                     : ADDR, "Ir" (nr) : "memory");

        return oldbit;
}

static __always_inline bool
arch_test_and_change_bit(long nr, volatile unsigned long *addr)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}

static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
{
        return ((1UL << (nr & (BITS_PER_LONG-1))) &
                (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}

static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
{
        bool oldbit;

        asm volatile("testb %2,%1"
                     CC_SET(nz)
                     : CC_OUT(nz) (oldbit)
                     : "m" (((unsigned char *)addr)[nr >> 3]),
                       "i" (1 << (nr & 7))
                     :"memory");

        return oldbit;
}

static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
{
        bool oldbit;

        asm volatile(__ASM_SIZE(bt) " %2,%1"
                     CC_SET(c)
                     : CC_OUT(c) (oldbit)
                     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");

        return oldbit;
}

static __always_inline bool
arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
{
        return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
                                          variable_test_bit(nr, addr);
}

static __always_inline bool
arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
{
        return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
                                          variable_test_bit(nr, addr);
}

static __always_inline unsigned long variable__ffs(unsigned long word)
{
        asm("rep; bsf %1,%0"
                : "=r" (word)
                : "rm" (word));
        return word;
}

/**
 * __ffs - find first set bit in word
 * @word: The word to search
 *
 * Undefined if no bit exists, so code should check against 0 first.
 */
#define __ffs(word)                                \
        (__builtin_constant_p(word) ?                \
         (unsigned long)__builtin_ctzl(word) :        \
         variable__ffs(word))

static __always_inline unsigned long variable_ffz(unsigned long word)
{
        asm("rep; bsf %1,%0"
                : "=r" (word)
                : "r" (~word));
        return word;
}

/**
 * ffz - find first zero bit in word
 * @word: The word to search
 *
 * Undefined if no zero exists, so code should check against ~0UL first.
 */
#define ffz(word)                                \
        (__builtin_constant_p(word) ?                \
         (unsigned long)__builtin_ctzl(~word) :        \
         variable_ffz(word))

/*
 * __fls: find last set bit in word
 * @word: The word to search
 *
 * Undefined if no set bit exists, so code should check against 0 first.
 */
static __always_inline unsigned long __fls(unsigned long word)
{
        if (__builtin_constant_p(word))
                return BITS_PER_LONG - 1 - __builtin_clzl(word);

        asm("bsr %1,%0"
            : "=r" (word)
            : "rm" (word));
        return word;
}

#undef ADDR

#ifdef __KERNEL__
static __always_inline int variable_ffs(int x)
{
        int r;

#ifdef CONFIG_X86_64
        /*
         * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
         * dest reg is undefined if x==0, but their CPU architect says its
         * value is written to set it to the same as before, except that the
         * top 32 bits will be cleared.
         *
         * We cannot do this on 32 bits because at the very least some
         * 486 CPUs did not behave this way.
         */
        asm("bsfl %1,%0"
            : "=r" (r)
            : "rm" (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
        asm("bsfl %1,%0\n\t"
            "cmovzl %2,%0"
            : "=&r" (r) : "rm" (x), "r" (-1));
#else
        asm("bsfl %1,%0\n\t"
            "jnz 1f\n\t"
            "movl $-1,%0\n"
            "1:" : "=r" (r) : "rm" (x));
#endif
        return r + 1;
}

/**
 * ffs - find first set bit in word
 * @x: the word to search
 *
 * This is defined the same way as the libc and compiler builtin ffs
 * routines, therefore differs in spirit from the other bitops.
 *
 * ffs(value) returns 0 if value is 0 or the position of the first
 * set bit if value is nonzero. The first (least significant) bit
 * is at position 1.
 */
#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))

/**
 * fls - find last set bit in word
 * @x: the word to search
 *
 * This is defined in a similar way as the libc and compiler builtin
 * ffs, but returns the position of the most significant set bit.
 *
 * fls(value) returns 0 if value is 0 or the position of the last
 * set bit if value is nonzero. The last (most significant) bit is
 * at position 32.
 */
static __always_inline int fls(unsigned int x)
{
        int r;

        if (__builtin_constant_p(x))
                return x ? 32 - __builtin_clz(x) : 0;

#ifdef CONFIG_X86_64
        /*
         * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
         * dest reg is undefined if x==0, but their CPU architect says its
         * value is written to set it to the same as before, except that the
         * top 32 bits will be cleared.
         *
         * We cannot do this on 32 bits because at the very least some
         * 486 CPUs did not behave this way.
         */
        asm("bsrl %1,%0"
            : "=r" (r)
            : "rm" (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
        asm("bsrl %1,%0\n\t"
            "cmovzl %2,%0"
            : "=&r" (r) : "rm" (x), "rm" (-1));
#else
        asm("bsrl %1,%0\n\t"
            "jnz 1f\n\t"
            "movl $-1,%0\n"
            "1:" : "=r" (r) : "rm" (x));
#endif
        return r + 1;
}

/**
 * fls64 - find last set bit in a 64-bit word
 * @x: the word to search
 *
 * This is defined in a similar way as the libc and compiler builtin
 * ffsll, but returns the position of the most significant set bit.
 *
 * fls64(value) returns 0 if value is 0 or the position of the last
 * set bit if value is nonzero. The last (most significant) bit is
 * at position 64.
 */
#ifdef CONFIG_X86_64
static __always_inline int fls64(__u64 x)
{
        int bitpos = -1;

        if (__builtin_constant_p(x))
                return x ? 64 - __builtin_clzll(x) : 0;
        /*
         * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
         * dest reg is undefined if x==0, but their CPU architect says its
         * value is written to set it to the same as before.
         */
        asm("bsrq %1,%q0"
            : "+r" (bitpos)
            : "rm" (x));
        return bitpos + 1;
}
#else
#include <asm-generic/bitops/fls64.h>
#endif

#include <asm-generic/bitops/sched.h>

#include <asm/arch_hweight.h>

#include <asm-generic/bitops/const_hweight.h>

#include <asm-generic/bitops/instrumented-atomic.h>
#include <asm-generic/bitops/instrumented-non-atomic.h>
#include <asm-generic/bitops/instrumented-lock.h>

#include <asm-generic/bitops/le.h>

#include <asm-generic/bitops/ext2-atomic-setbit.h>

#endif /* __KERNEL__ */
#endif /* _ASM_X86_BITOPS_H */





























































  228 



  251 


  251 




  251 
  251 


  250 




  251 
  251 
  251 





















































    6 
    6 
























































































































































































   36 





   36 


























   18 







   18 

   18 
   18 
   18 

   17 






















   18 






   18 

   18 
   18 

   18 

   18 





   18 




   17 


























   20 







   18 

   18 
   16 
   15 

   15 













   81 
















   81 

   81 



   81 



   78 



   74 








   74 










   15 





   14 
   14 





















  229 





  231 



  231 


  231 


  230 

  231 

  231 














   18 

   18 








    6 











































   18 










   18 

   18 




   18 








   18 

   18 



   18 






   18 






   18 
   18 

















   18 












    6 




    6 

    6 
    6 
    6 

































































   74 







  231 















































































































































































  243 




  242 




































































































































































































































































































































   18 






   18 









   18 








   18 




   18 

   18 








































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
// SPDX-License-Identifier: GPL-2.0
/*
 * bus.c - bus driver management
 *
 * Copyright (c) 2002-3 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2007 Novell Inc.
 * Copyright (c) 2023 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 */

#include <linux/async.h>
#include <linux/device/bus.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/mutex.h>
#include <linux/sysfs.h>
#include "base.h"
#include "power/power.h"

/* /sys/devices/system */
static struct kset *system_kset;

/* /sys/bus */
static struct kset *bus_kset;

#define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr)

/*
 * sysfs bindings for drivers
 */

#define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr)

#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
        struct driver_attribute driver_attr_##_name =                \
                __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)

static int __must_check bus_rescan_devices_helper(struct device *dev,
                                                void *data);

/**
 * bus_to_subsys - Turn a struct bus_type into a struct subsys_private
 *
 * @bus: pointer to the struct bus_type to look up
 *
 * The driver core internals needs to work on the subsys_private structure, not
 * the external struct bus_type pointer.  This function walks the list of
 * registered busses in the system and finds the matching one and returns the
 * internal struct subsys_private that relates to that bus.
 *
 * Note, the reference count of the return value is INCREMENTED if it is not
 * NULL.  A call to subsys_put() must be done when finished with the pointer in
 * order for it to be properly freed.
 */
static struct subsys_private *bus_to_subsys(const struct bus_type *bus)
{
        struct subsys_private *sp = NULL;
        struct kobject *kobj;

        if (!bus || !bus_kset)
                return NULL;

        spin_lock(&bus_kset->list_lock);

        if (list_empty(&bus_kset->list))
                goto done;

        list_for_each_entry(kobj, &bus_kset->list, entry) {
                struct kset *kset = container_of(kobj, struct kset, kobj);

                sp = container_of_const(kset, struct subsys_private, subsys);
                if (sp->bus == bus)
                        goto done;
        }
        sp = NULL;
done:
        sp = subsys_get(sp);
        spin_unlock(&bus_kset->list_lock);
        return sp;
}

static const struct bus_type *bus_get(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);

        if (sp)
                return bus;
        return NULL;
}

static void bus_put(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);

        /* two puts are required as the call to bus_to_subsys incremented it again */
        subsys_put(sp);
        subsys_put(sp);
}

static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr,
                             char *buf)
{
        struct driver_attribute *drv_attr = to_drv_attr(attr);
        struct driver_private *drv_priv = to_driver(kobj);
        ssize_t ret = -EIO;

        if (drv_attr->show)
                ret = drv_attr->show(drv_priv->driver, buf);
        return ret;
}

static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr,
                              const char *buf, size_t count)
{
        struct driver_attribute *drv_attr = to_drv_attr(attr);
        struct driver_private *drv_priv = to_driver(kobj);
        ssize_t ret = -EIO;

        if (drv_attr->store)
                ret = drv_attr->store(drv_priv->driver, buf, count);
        return ret;
}

static const struct sysfs_ops driver_sysfs_ops = {
        .show        = drv_attr_show,
        .store        = drv_attr_store,
};

static void driver_release(struct kobject *kobj)
{
        struct driver_private *drv_priv = to_driver(kobj);

        pr_debug("driver: '%s': %s\n", kobject_name(kobj), __func__);
        kfree(drv_priv);
}

static const struct kobj_type driver_ktype = {
        .sysfs_ops        = &driver_sysfs_ops,
        .release        = driver_release,
};

/*
 * sysfs bindings for buses
 */
static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr,
                             char *buf)
{
        struct bus_attribute *bus_attr = to_bus_attr(attr);
        struct subsys_private *subsys_priv = to_subsys_private(kobj);
        ssize_t ret = 0;

        if (bus_attr->show)
                ret = bus_attr->show(subsys_priv->bus, buf);
        return ret;
}

static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
                              const char *buf, size_t count)
{
        struct bus_attribute *bus_attr = to_bus_attr(attr);
        struct subsys_private *subsys_priv = to_subsys_private(kobj);
        ssize_t ret = 0;

        if (bus_attr->store)
                ret = bus_attr->store(subsys_priv->bus, buf, count);
        return ret;
}

static const struct sysfs_ops bus_sysfs_ops = {
        .show        = bus_attr_show,
        .store        = bus_attr_store,
};

int bus_create_file(const struct bus_type *bus, struct bus_attribute *attr)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        int error;

        if (!sp)
                return -EINVAL;

        error = sysfs_create_file(&sp->subsys.kobj, &attr->attr);

        subsys_put(sp);
        return error;
}
EXPORT_SYMBOL_GPL(bus_create_file);

void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr)
{
        struct subsys_private *sp = bus_to_subsys(bus);

        if (!sp)
                return;

        sysfs_remove_file(&sp->subsys.kobj, &attr->attr);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(bus_remove_file);

static void bus_release(struct kobject *kobj)
{
        struct subsys_private *priv = to_subsys_private(kobj);

        lockdep_unregister_key(&priv->lock_key);
        kfree(priv);
}

static const struct kobj_type bus_ktype = {
        .sysfs_ops        = &bus_sysfs_ops,
        .release        = bus_release,
};

static int bus_uevent_filter(const struct kobject *kobj)
{
        const struct kobj_type *ktype = get_ktype(kobj);

        if (ktype == &bus_ktype)
                return 1;
        return 0;
}

static const struct kset_uevent_ops bus_uevent_ops = {
        .filter = bus_uevent_filter,
};

/* Manually detach a device from its associated driver. */
static ssize_t unbind_store(struct device_driver *drv, const char *buf,
                            size_t count)
{
        const struct bus_type *bus = bus_get(drv->bus);
        struct device *dev;
        int err = -ENODEV;

        dev = bus_find_device_by_name(bus, NULL, buf);
        if (dev && dev->driver == drv) {
                device_driver_detach(dev);
                err = count;
        }
        put_device(dev);
        bus_put(bus);
        return err;
}
static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);

/*
 * Manually attach a device to a driver.
 * Note: the driver must want to bind to the device,
 * it is not possible to override the driver's id table.
 */
static ssize_t bind_store(struct device_driver *drv, const char *buf,
                          size_t count)
{
        const struct bus_type *bus = bus_get(drv->bus);
        struct device *dev;
        int err = -ENODEV;

        dev = bus_find_device_by_name(bus, NULL, buf);
        if (dev && driver_match_device(drv, dev)) {
                err = device_driver_attach(drv, dev);
                if (!err) {
                        /* success */
                        err = count;
                }
        }
        put_device(dev);
        bus_put(bus);
        return err;
}
static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);

static ssize_t drivers_autoprobe_show(const struct bus_type *bus, char *buf)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        int ret;

        if (!sp)
                return -EINVAL;

        ret = sysfs_emit(buf, "%d\n", sp->drivers_autoprobe);
        subsys_put(sp);
        return ret;
}

static ssize_t drivers_autoprobe_store(const struct bus_type *bus,
                                       const char *buf, size_t count)
{
        struct subsys_private *sp = bus_to_subsys(bus);

        if (!sp)
                return -EINVAL;

        if (buf[0] == '0')
                sp->drivers_autoprobe = 0;
        else
                sp->drivers_autoprobe = 1;

        subsys_put(sp);
        return count;
}

static ssize_t drivers_probe_store(const struct bus_type *bus,
                                   const char *buf, size_t count)
{
        struct device *dev;
        int err = -EINVAL;

        dev = bus_find_device_by_name(bus, NULL, buf);
        if (!dev)
                return -ENODEV;
        if (bus_rescan_devices_helper(dev, NULL) == 0)
                err = count;
        put_device(dev);
        return err;
}

static struct device *next_device(struct klist_iter *i)
{
        struct klist_node *n = klist_next(i);
        struct device *dev = NULL;
        struct device_private *dev_prv;

        if (n) {
                dev_prv = to_device_private_bus(n);
                dev = dev_prv->device;
        }
        return dev;
}

/**
 * bus_for_each_dev - device iterator.
 * @bus: bus type.
 * @start: device to start iterating from.
 * @data: data for the callback.
 * @fn: function to be called for each device.
 *
 * Iterate over @bus's list of devices, and call @fn for each,
 * passing it @data. If @start is not NULL, we use that device to
 * begin iterating from.
 *
 * We check the return of @fn each time. If it returns anything
 * other than 0, we break out and return that value.
 *
 * NOTE: The device that returns a non-zero value is not retained
 * in any way, nor is its refcount incremented. If the caller needs
 * to retain this data, it should do so, and increment the reference
 * count in the supplied callback.
 */
int bus_for_each_dev(const struct bus_type *bus, struct device *start,
                     void *data, int (*fn)(struct device *, void *))
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct klist_iter i;
        struct device *dev;
        int error = 0;

        if (!sp)
                return -EINVAL;

        klist_iter_init_node(&sp->klist_devices, &i,
                             (start ? &start->p->knode_bus : NULL));
        while (!error && (dev = next_device(&i)))
                error = fn(dev, data);
        klist_iter_exit(&i);
        subsys_put(sp);
        return error;
}
EXPORT_SYMBOL_GPL(bus_for_each_dev);

/**
 * bus_find_device - device iterator for locating a particular device.
 * @bus: bus type
 * @start: Device to begin with
 * @data: Data to pass to match function
 * @match: Callback function to check device
 *
 * This is similar to the bus_for_each_dev() function above, but it
 * returns a reference to a device that is 'found' for later use, as
 * determined by the @match callback.
 *
 * The callback should return 0 if the device doesn't match and non-zero
 * if it does.  If the callback returns non-zero, this function will
 * return to the caller and not iterate over any more devices.
 */
struct device *bus_find_device(const struct bus_type *bus,
                               struct device *start, const void *data,
                               int (*match)(struct device *dev, const void *data))
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct klist_iter i;
        struct device *dev;

        if (!sp)
                return NULL;

        klist_iter_init_node(&sp->klist_devices, &i,
                             (start ? &start->p->knode_bus : NULL));
        while ((dev = next_device(&i)))
                if (match(dev, data) && get_device(dev))
                        break;
        klist_iter_exit(&i);
        subsys_put(sp);
        return dev;
}
EXPORT_SYMBOL_GPL(bus_find_device);

static struct device_driver *next_driver(struct klist_iter *i)
{
        struct klist_node *n = klist_next(i);
        struct driver_private *drv_priv;

        if (n) {
                drv_priv = container_of(n, struct driver_private, knode_bus);
                return drv_priv->driver;
        }
        return NULL;
}

/**
 * bus_for_each_drv - driver iterator
 * @bus: bus we're dealing with.
 * @start: driver to start iterating on.
 * @data: data to pass to the callback.
 * @fn: function to call for each driver.
 *
 * This is nearly identical to the device iterator above.
 * We iterate over each driver that belongs to @bus, and call
 * @fn for each. If @fn returns anything but 0, we break out
 * and return it. If @start is not NULL, we use it as the head
 * of the list.
 *
 * NOTE: we don't return the driver that returns a non-zero
 * value, nor do we leave the reference count incremented for that
 * driver. If the caller needs to know that info, it must set it
 * in the callback. It must also be sure to increment the refcount
 * so it doesn't disappear before returning to the caller.
 */
int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start,
                     void *data, int (*fn)(struct device_driver *, void *))
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct klist_iter i;
        struct device_driver *drv;
        int error = 0;

        if (!sp)
                return -EINVAL;

        klist_iter_init_node(&sp->klist_drivers, &i,
                             start ? &start->p->knode_bus : NULL);
        while ((drv = next_driver(&i)) && !error)
                error = fn(drv, data);
        klist_iter_exit(&i);
        subsys_put(sp);
        return error;
}
EXPORT_SYMBOL_GPL(bus_for_each_drv);

/**
 * bus_add_device - add device to bus
 * @dev: device being added
 *
 * - Add device's bus attributes.
 * - Create links to device's bus.
 * - Add the device to its bus's list of devices.
 */
int bus_add_device(struct device *dev)
{
        struct subsys_private *sp = bus_to_subsys(dev->bus);
        int error;

        if (!sp) {
                /*
                 * This is a normal operation for many devices that do not
                 * have a bus assigned to them, just say that all went
                 * well.
                 */
                return 0;
        }

        /*
         * Reference in sp is now incremented and will be dropped when
         * the device is removed from the bus
         */

        pr_debug("bus: '%s': add device %s\n", sp->bus->name, dev_name(dev));

        error = device_add_groups(dev, sp->bus->dev_groups);
        if (error)
                goto out_put;

        error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev));
        if (error)
                goto out_groups;

        error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem");
        if (error)
                goto out_subsys;

        klist_add_tail(&dev->p->knode_bus, &sp->klist_devices);
        return 0;

out_subsys:
        sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev));
out_groups:
        device_remove_groups(dev, sp->bus->dev_groups);
out_put:
        subsys_put(sp);
        return error;
}

/**
 * bus_probe_device - probe drivers for a new device
 * @dev: device to probe
 *
 * - Automatically probe for a driver if the bus allows it.
 */
void bus_probe_device(struct device *dev)
{
        struct subsys_private *sp = bus_to_subsys(dev->bus);
        struct subsys_interface *sif;

        if (!sp)
                return;

        if (sp->drivers_autoprobe)
                device_initial_probe(dev);

        mutex_lock(&sp->mutex);
        list_for_each_entry(sif, &sp->interfaces, node)
                if (sif->add_dev)
                        sif->add_dev(dev, sif);
        mutex_unlock(&sp->mutex);
        subsys_put(sp);
}

/**
 * bus_remove_device - remove device from bus
 * @dev: device to be removed
 *
 * - Remove device from all interfaces.
 * - Remove symlink from bus' directory.
 * - Delete device from bus's list.
 * - Detach from its driver.
 * - Drop reference taken in bus_add_device().
 */
void bus_remove_device(struct device *dev)
{
        struct subsys_private *sp = bus_to_subsys(dev->bus);
        struct subsys_interface *sif;

        if (!sp)
                return;

        mutex_lock(&sp->mutex);
        list_for_each_entry(sif, &sp->interfaces, node)
                if (sif->remove_dev)
                        sif->remove_dev(dev, sif);
        mutex_unlock(&sp->mutex);

        sysfs_remove_link(&dev->kobj, "subsystem");
        sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev));
        device_remove_groups(dev, dev->bus->dev_groups);
        if (klist_node_attached(&dev->p->knode_bus))
                klist_del(&dev->p->knode_bus);

        pr_debug("bus: '%s': remove device %s\n",
                 dev->bus->name, dev_name(dev));
        device_release_driver(dev);

        /*
         * Decrement the reference count twice, once for the bus_to_subsys()
         * call in the start of this function, and the second one from the
         * reference increment in bus_add_device()
         */
        subsys_put(sp);
        subsys_put(sp);
}

static int __must_check add_bind_files(struct device_driver *drv)
{
        int ret;

        ret = driver_create_file(drv, &driver_attr_unbind);
        if (ret == 0) {
                ret = driver_create_file(drv, &driver_attr_bind);
                if (ret)
                        driver_remove_file(drv, &driver_attr_unbind);
        }
        return ret;
}

static void remove_bind_files(struct device_driver *drv)
{
        driver_remove_file(drv, &driver_attr_bind);
        driver_remove_file(drv, &driver_attr_unbind);
}

static BUS_ATTR_WO(drivers_probe);
static BUS_ATTR_RW(drivers_autoprobe);

static int add_probe_files(const struct bus_type *bus)
{
        int retval;

        retval = bus_create_file(bus, &bus_attr_drivers_probe);
        if (retval)
                goto out;

        retval = bus_create_file(bus, &bus_attr_drivers_autoprobe);
        if (retval)
                bus_remove_file(bus, &bus_attr_drivers_probe);
out:
        return retval;
}

static void remove_probe_files(const struct bus_type *bus)
{
        bus_remove_file(bus, &bus_attr_drivers_autoprobe);
        bus_remove_file(bus, &bus_attr_drivers_probe);
}

static ssize_t uevent_store(struct device_driver *drv, const char *buf,
                            size_t count)
{
        int rc;

        rc = kobject_synth_uevent(&drv->p->kobj, buf, count);
        return rc ? rc : count;
}
static DRIVER_ATTR_WO(uevent);

/**
 * bus_add_driver - Add a driver to the bus.
 * @drv: driver.
 */
int bus_add_driver(struct device_driver *drv)
{
        struct subsys_private *sp = bus_to_subsys(drv->bus);
        struct driver_private *priv;
        int error = 0;

        if (!sp)
                return -EINVAL;

        /*
         * Reference in sp is now incremented and will be dropped when
         * the driver is removed from the bus
         */
        pr_debug("bus: '%s': add driver %s\n", sp->bus->name, drv->name);

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv) {
                error = -ENOMEM;
                goto out_put_bus;
        }
        klist_init(&priv->klist_devices, NULL, NULL);
        priv->driver = drv;
        drv->p = priv;
        priv->kobj.kset = sp->drivers_kset;
        error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL,
                                     "%s", drv->name);
        if (error)
                goto out_unregister;

        klist_add_tail(&priv->knode_bus, &sp->klist_drivers);
        if (sp->drivers_autoprobe) {
                error = driver_attach(drv);
                if (error)
                        goto out_del_list;
        }
        module_add_driver(drv->owner, drv);

        error = driver_create_file(drv, &driver_attr_uevent);
        if (error) {
                printk(KERN_ERR "%s: uevent attr (%s) failed\n",
                        __func__, drv->name);
        }
        error = driver_add_groups(drv, sp->bus->drv_groups);
        if (error) {
                /* How the hell do we get out of this pickle? Give up */
                printk(KERN_ERR "%s: driver_add_groups(%s) failed\n",
                        __func__, drv->name);
        }

        if (!drv->suppress_bind_attrs) {
                error = add_bind_files(drv);
                if (error) {
                        /* Ditto */
                        printk(KERN_ERR "%s: add_bind_files(%s) failed\n",
                                __func__, drv->name);
                }
        }

        return 0;

out_del_list:
        klist_del(&priv->knode_bus);
out_unregister:
        kobject_put(&priv->kobj);
        /* drv->p is freed in driver_release()  */
        drv->p = NULL;
out_put_bus:
        subsys_put(sp);
        return error;
}

/**
 * bus_remove_driver - delete driver from bus's knowledge.
 * @drv: driver.
 *
 * Detach the driver from the devices it controls, and remove
 * it from its bus's list of drivers. Finally, we drop the reference
 * to the bus we took in bus_add_driver().
 */
void bus_remove_driver(struct device_driver *drv)
{
        struct subsys_private *sp = bus_to_subsys(drv->bus);

        if (!sp)
                return;

        pr_debug("bus: '%s': remove driver %s\n", sp->bus->name, drv->name);

        if (!drv->suppress_bind_attrs)
                remove_bind_files(drv);
        driver_remove_groups(drv, sp->bus->drv_groups);
        driver_remove_file(drv, &driver_attr_uevent);
        klist_remove(&drv->p->knode_bus);
        driver_detach(drv);
        module_remove_driver(drv);
        kobject_put(&drv->p->kobj);

        /*
         * Decrement the reference count twice, once for the bus_to_subsys()
         * call in the start of this function, and the second one from the
         * reference increment in bus_add_driver()
         */
        subsys_put(sp);
        subsys_put(sp);
}

/* Helper for bus_rescan_devices's iter */
static int __must_check bus_rescan_devices_helper(struct device *dev,
                                                  void *data)
{
        int ret = 0;

        if (!dev->driver) {
                if (dev->parent && dev->bus->need_parent_lock)
                        device_lock(dev->parent);
                ret = device_attach(dev);
                if (dev->parent && dev->bus->need_parent_lock)
                        device_unlock(dev->parent);
        }
        return ret < 0 ? ret : 0;
}

/**
 * bus_rescan_devices - rescan devices on the bus for possible drivers
 * @bus: the bus to scan.
 *
 * This function will look for devices on the bus with no driver
 * attached and rescan it against existing drivers to see if it matches
 * any by calling device_attach() for the unbound devices.
 */
int bus_rescan_devices(const struct bus_type *bus)
{
        return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper);
}
EXPORT_SYMBOL_GPL(bus_rescan_devices);

/**
 * device_reprobe - remove driver for a device and probe for a new driver
 * @dev: the device to reprobe
 *
 * This function detaches the attached driver (if any) for the given
 * device and restarts the driver probing process.  It is intended
 * to use if probing criteria changed during a devices lifetime and
 * driver attachment should change accordingly.
 */
int device_reprobe(struct device *dev)
{
        if (dev->driver)
                device_driver_detach(dev);
        return bus_rescan_devices_helper(dev, NULL);
}
EXPORT_SYMBOL_GPL(device_reprobe);

static void klist_devices_get(struct klist_node *n)
{
        struct device_private *dev_prv = to_device_private_bus(n);
        struct device *dev = dev_prv->device;

        get_device(dev);
}

static void klist_devices_put(struct klist_node *n)
{
        struct device_private *dev_prv = to_device_private_bus(n);
        struct device *dev = dev_prv->device;

        put_device(dev);
}

static ssize_t bus_uevent_store(const struct bus_type *bus,
                                const char *buf, size_t count)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        int ret;

        if (!sp)
                return -EINVAL;

        ret = kobject_synth_uevent(&sp->subsys.kobj, buf, count);
        subsys_put(sp);

        if (ret)
                return ret;
        return count;
}
/*
 * "open code" the old BUS_ATTR() macro here.  We want to use BUS_ATTR_WO()
 * here, but can not use it as earlier in the file we have
 * DEVICE_ATTR_WO(uevent), which would cause a clash with the with the store
 * function name.
 */
static struct bus_attribute bus_attr_uevent = __ATTR(uevent, 0200, NULL,
                                                     bus_uevent_store);

/**
 * bus_register - register a driver-core subsystem
 * @bus: bus to register
 *
 * Once we have that, we register the bus with the kobject
 * infrastructure, then register the children subsystems it has:
 * the devices and drivers that belong to the subsystem.
 */
int bus_register(const struct bus_type *bus)
{
        int retval;
        struct subsys_private *priv;
        struct kobject *bus_kobj;
        struct lock_class_key *key;

        priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        priv->bus = bus;

        BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier);

        bus_kobj = &priv->subsys.kobj;
        retval = kobject_set_name(bus_kobj, "%s", bus->name);
        if (retval)
                goto out;

        bus_kobj->kset = bus_kset;
        bus_kobj->ktype = &bus_ktype;
        priv->drivers_autoprobe = 1;

        retval = kset_register(&priv->subsys);
        if (retval)
                goto out;

        retval = bus_create_file(bus, &bus_attr_uevent);
        if (retval)
                goto bus_uevent_fail;

        priv->devices_kset = kset_create_and_add("devices", NULL, bus_kobj);
        if (!priv->devices_kset) {
                retval = -ENOMEM;
                goto bus_devices_fail;
        }

        priv->drivers_kset = kset_create_and_add("drivers", NULL, bus_kobj);
        if (!priv->drivers_kset) {
                retval = -ENOMEM;
                goto bus_drivers_fail;
        }

        INIT_LIST_HEAD(&priv->interfaces);
        key = &priv->lock_key;
        lockdep_register_key(key);
        __mutex_init(&priv->mutex, "subsys mutex", key);
        klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
        klist_init(&priv->klist_drivers, NULL, NULL);

        retval = add_probe_files(bus);
        if (retval)
                goto bus_probe_files_fail;

        retval = sysfs_create_groups(bus_kobj, bus->bus_groups);
        if (retval)
                goto bus_groups_fail;

        pr_debug("bus: '%s': registered\n", bus->name);
        return 0;

bus_groups_fail:
        remove_probe_files(bus);
bus_probe_files_fail:
        kset_unregister(priv->drivers_kset);
bus_drivers_fail:
        kset_unregister(priv->devices_kset);
bus_devices_fail:
        bus_remove_file(bus, &bus_attr_uevent);
bus_uevent_fail:
        kset_unregister(&priv->subsys);
out:
        kfree(priv);
        return retval;
}
EXPORT_SYMBOL_GPL(bus_register);

/**
 * bus_unregister - remove a bus from the system
 * @bus: bus.
 *
 * Unregister the child subsystems and the bus itself.
 * Finally, we call bus_put() to release the refcount
 */
void bus_unregister(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct kobject *bus_kobj;

        if (!sp)
                return;

        pr_debug("bus: '%s': unregistering\n", bus->name);
        if (sp->dev_root)
                device_unregister(sp->dev_root);

        bus_kobj = &sp->subsys.kobj;
        sysfs_remove_groups(bus_kobj, bus->bus_groups);
        remove_probe_files(bus);
        bus_remove_file(bus, &bus_attr_uevent);

        kset_unregister(sp->drivers_kset);
        kset_unregister(sp->devices_kset);
        kset_unregister(&sp->subsys);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(bus_unregister);

int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        int retval;

        if (!sp)
                return -EINVAL;

        retval = blocking_notifier_chain_register(&sp->bus_notifier, nb);
        subsys_put(sp);
        return retval;
}
EXPORT_SYMBOL_GPL(bus_register_notifier);

int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        int retval;

        if (!sp)
                return -EINVAL;
        retval = blocking_notifier_chain_unregister(&sp->bus_notifier, nb);
        subsys_put(sp);
        return retval;
}
EXPORT_SYMBOL_GPL(bus_unregister_notifier);

void bus_notify(struct device *dev, enum bus_notifier_event value)
{
        struct subsys_private *sp = bus_to_subsys(dev->bus);

        if (!sp)
                return;

        blocking_notifier_call_chain(&sp->bus_notifier, value, dev);
        subsys_put(sp);
}

struct kset *bus_get_kset(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct kset *kset;

        if (!sp)
                return NULL;

        kset = &sp->subsys;
        subsys_put(sp);

        return kset;
}
EXPORT_SYMBOL_GPL(bus_get_kset);

/*
 * Yes, this forcibly breaks the klist abstraction temporarily.  It
 * just wants to sort the klist, not change reference counts and
 * take/drop locks rapidly in the process.  It does all this while
 * holding the lock for the list, so objects can't otherwise be
 * added/removed while we're swizzling.
 */
static void device_insertion_sort_klist(struct device *a, struct list_head *list,
                                        int (*compare)(const struct device *a,
                                                        const struct device *b))
{
        struct klist_node *n;
        struct device_private *dev_prv;
        struct device *b;

        list_for_each_entry(n, list, n_node) {
                dev_prv = to_device_private_bus(n);
                b = dev_prv->device;
                if (compare(a, b) <= 0) {
                        list_move_tail(&a->p->knode_bus.n_node,
                                       &b->p->knode_bus.n_node);
                        return;
                }
        }
        list_move_tail(&a->p->knode_bus.n_node, list);
}

void bus_sort_breadthfirst(const struct bus_type *bus,
                           int (*compare)(const struct device *a,
                                          const struct device *b))
{
        struct subsys_private *sp = bus_to_subsys(bus);
        LIST_HEAD(sorted_devices);
        struct klist_node *n, *tmp;
        struct device_private *dev_prv;
        struct device *dev;
        struct klist *device_klist;

        if (!sp)
                return;
        device_klist = &sp->klist_devices;

        spin_lock(&device_klist->k_lock);
        list_for_each_entry_safe(n, tmp, &device_klist->k_list, n_node) {
                dev_prv = to_device_private_bus(n);
                dev = dev_prv->device;
                device_insertion_sort_klist(dev, &sorted_devices, compare);
        }
        list_splice(&sorted_devices, &device_klist->k_list);
        spin_unlock(&device_klist->k_lock);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);

struct subsys_dev_iter {
        struct klist_iter                ki;
        const struct device_type        *type;
};

/**
 * subsys_dev_iter_init - initialize subsys device iterator
 * @iter: subsys iterator to initialize
 * @sp: the subsys private (i.e. bus) we wanna iterate over
 * @start: the device to start iterating from, if any
 * @type: device_type of the devices to iterate over, NULL for all
 *
 * Initialize subsys iterator @iter such that it iterates over devices
 * of @subsys.  If @start is set, the list iteration will start there,
 * otherwise if it is NULL, the iteration starts at the beginning of
 * the list.
 */
static void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct subsys_private *sp,
                                 struct device *start, const struct device_type *type)
{
        struct klist_node *start_knode = NULL;

        if (start)
                start_knode = &start->p->knode_bus;
        klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode);
        iter->type = type;
}

/**
 * subsys_dev_iter_next - iterate to the next device
 * @iter: subsys iterator to proceed
 *
 * Proceed @iter to the next device and return it.  Returns NULL if
 * iteration is complete.
 *
 * The returned device is referenced and won't be released till
 * iterator is proceed to the next device or exited.  The caller is
 * free to do whatever it wants to do with the device including
 * calling back into subsys code.
 */
static struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter)
{
        struct klist_node *knode;
        struct device *dev;

        for (;;) {
                knode = klist_next(&iter->ki);
                if (!knode)
                        return NULL;
                dev = to_device_private_bus(knode)->device;
                if (!iter->type || iter->type == dev->type)
                        return dev;
        }
}

/**
 * subsys_dev_iter_exit - finish iteration
 * @iter: subsys iterator to finish
 *
 * Finish an iteration.  Always call this function after iteration is
 * complete whether the iteration ran till the end or not.
 */
static void subsys_dev_iter_exit(struct subsys_dev_iter *iter)
{
        klist_iter_exit(&iter->ki);
}

int subsys_interface_register(struct subsys_interface *sif)
{
        struct subsys_private *sp;
        struct subsys_dev_iter iter;
        struct device *dev;

        if (!sif || !sif->subsys)
                return -ENODEV;

        sp = bus_to_subsys(sif->subsys);
        if (!sp)
                return -EINVAL;

        /*
         * Reference in sp is now incremented and will be dropped when
         * the interface is removed from the bus
         */

        mutex_lock(&sp->mutex);
        list_add_tail(&sif->node, &sp->interfaces);
        if (sif->add_dev) {
                subsys_dev_iter_init(&iter, sp, NULL, NULL);
                while ((dev = subsys_dev_iter_next(&iter)))
                        sif->add_dev(dev, sif);
                subsys_dev_iter_exit(&iter);
        }
        mutex_unlock(&sp->mutex);

        return 0;
}
EXPORT_SYMBOL_GPL(subsys_interface_register);

void subsys_interface_unregister(struct subsys_interface *sif)
{
        struct subsys_private *sp;
        struct subsys_dev_iter iter;
        struct device *dev;

        if (!sif || !sif->subsys)
                return;

        sp = bus_to_subsys(sif->subsys);
        if (!sp)
                return;

        mutex_lock(&sp->mutex);
        list_del_init(&sif->node);
        if (sif->remove_dev) {
                subsys_dev_iter_init(&iter, sp, NULL, NULL);
                while ((dev = subsys_dev_iter_next(&iter)))
                        sif->remove_dev(dev, sif);
                subsys_dev_iter_exit(&iter);
        }
        mutex_unlock(&sp->mutex);

        /*
         * Decrement the reference count twice, once for the bus_to_subsys()
         * call in the start of this function, and the second one from the
         * reference increment in subsys_interface_register()
         */
        subsys_put(sp);
        subsys_put(sp);
}
EXPORT_SYMBOL_GPL(subsys_interface_unregister);

static void system_root_device_release(struct device *dev)
{
        kfree(dev);
}

static int subsys_register(const struct bus_type *subsys,
                           const struct attribute_group **groups,
                           struct kobject *parent_of_root)
{
        struct subsys_private *sp;
        struct device *dev;
        int err;

        err = bus_register(subsys);
        if (err < 0)
                return err;

        sp = bus_to_subsys(subsys);
        if (!sp) {
                err = -EINVAL;
                goto err_sp;
        }

        dev = kzalloc(sizeof(struct device), GFP_KERNEL);
        if (!dev) {
                err = -ENOMEM;
                goto err_dev;
        }

        err = dev_set_name(dev, "%s", subsys->name);
        if (err < 0)
                goto err_name;

        dev->kobj.parent = parent_of_root;
        dev->groups = groups;
        dev->release = system_root_device_release;

        err = device_register(dev);
        if (err < 0)
                goto err_dev_reg;

        sp->dev_root = dev;
        subsys_put(sp);
        return 0;

err_dev_reg:
        put_device(dev);
        dev = NULL;
err_name:
        kfree(dev);
err_dev:
        subsys_put(sp);
err_sp:
        bus_unregister(subsys);
        return err;
}

/**
 * subsys_system_register - register a subsystem at /sys/devices/system/
 * @subsys: system subsystem
 * @groups: default attributes for the root device
 *
 * All 'system' subsystems have a /sys/devices/system/<name> root device
 * with the name of the subsystem. The root device can carry subsystem-
 * wide attributes. All registered devices are below this single root
 * device and are named after the subsystem with a simple enumeration
 * number appended. The registered devices are not explicitly named;
 * only 'id' in the device needs to be set.
 *
 * Do not use this interface for anything new, it exists for compatibility
 * with bad ideas only. New subsystems should use plain subsystems; and
 * add the subsystem-wide attributes should be added to the subsystem
 * directory itself and not some create fake root-device placed in
 * /sys/devices/system/<name>.
 */
int subsys_system_register(const struct bus_type *subsys,
                           const struct attribute_group **groups)
{
        return subsys_register(subsys, groups, &system_kset->kobj);
}
EXPORT_SYMBOL_GPL(subsys_system_register);

/**
 * subsys_virtual_register - register a subsystem at /sys/devices/virtual/
 * @subsys: virtual subsystem
 * @groups: default attributes for the root device
 *
 * All 'virtual' subsystems have a /sys/devices/system/<name> root device
 * with the name of the subystem.  The root device can carry subsystem-wide
 * attributes.  All registered devices are below this single root device.
 * There's no restriction on device naming.  This is for kernel software
 * constructs which need sysfs interface.
 */
int subsys_virtual_register(const struct bus_type *subsys,
                            const struct attribute_group **groups)
{
        struct kobject *virtual_dir;

        virtual_dir = virtual_device_parent(NULL);
        if (!virtual_dir)
                return -ENOMEM;

        return subsys_register(subsys, groups, virtual_dir);
}
EXPORT_SYMBOL_GPL(subsys_virtual_register);

/**
 * driver_find - locate driver on a bus by its name.
 * @name: name of the driver.
 * @bus: bus to scan for the driver.
 *
 * Call kset_find_obj() to iterate over list of drivers on
 * a bus to find driver by name. Return driver if found.
 *
 * This routine provides no locking to prevent the driver it returns
 * from being unregistered or unloaded while the caller is using it.
 * The caller is responsible for preventing this.
 */
struct device_driver *driver_find(const char *name, const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct kobject *k;
        struct driver_private *priv;

        if (!sp)
                return NULL;

        k = kset_find_obj(sp->drivers_kset, name);
        subsys_put(sp);
        if (!k)
                return NULL;

        priv = to_driver(k);

        /* Drop reference added by kset_find_obj() */
        kobject_put(k);
        return priv->driver;
}
EXPORT_SYMBOL_GPL(driver_find);

/*
 * Warning, the value could go to "removed" instantly after calling this function, so be very
 * careful when calling it...
 */
bool bus_is_registered(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        bool is_initialized = false;

        if (sp) {
                is_initialized = true;
                subsys_put(sp);
        }
        return is_initialized;
}

/**
 * bus_get_dev_root - return a pointer to the "device root" of a bus
 * @bus: bus to return the device root of.
 *
 * If a bus has a "device root" structure, return it, WITH THE REFERENCE
 * COUNT INCREMENTED.
 *
 * Note, when finished with the device, a call to put_device() is required.
 *
 * If the device root is not present (or bus is not a valid pointer), NULL
 * will be returned.
 */
struct device *bus_get_dev_root(const struct bus_type *bus)
{
        struct subsys_private *sp = bus_to_subsys(bus);
        struct device *dev_root;

        if (!sp)
                return NULL;

        dev_root = get_device(sp->dev_root);
        subsys_put(sp);
        return dev_root;
}
EXPORT_SYMBOL_GPL(bus_get_dev_root);

int __init buses_init(void)
{
        bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
        if (!bus_kset)
                return -ENOMEM;

        system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj);
        if (!system_kset)
                return -ENOMEM;

        return 0;
}

































































































































    4 
    4 

    4 





















    4 

    4 
    4 


































































































































































































































































































































































































































































    4 


    4 









    4 
    4 




    4 

    4 
    4 

    4 


    4 



































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
/*
 * hw_random/core.c: HWRNG core API
 *
 * Copyright 2006 Michael Buesch <m@bues.ch>
 * Copyright 2005 (c) MontaVista Software, Inc.
 *
 * Please read Documentation/admin-guide/hw_random.rst for details on use.
 *
 * This software may be used and distributed according to the terms
 * of the GNU General Public License, incorporated herein by reference.
 */

#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hw_random.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>

#define RNG_MODULE_NAME                "hw_random"

#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES)

static struct hwrng *current_rng;
/* the current rng has been explicitly chosen by user via sysfs */
static int cur_rng_set_by_user;
static struct task_struct *hwrng_fill;
/* list of registered rngs */
static LIST_HEAD(rng_list);
/* Protects rng_list and current_rng */
static DEFINE_MUTEX(rng_mutex);
/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
static DEFINE_MUTEX(reading_mutex);
static int data_avail;
static u8 *rng_buffer, *rng_fillbuf;
static unsigned short current_quality;
static unsigned short default_quality = 1024; /* default to maximum */

module_param(current_quality, ushort, 0644);
MODULE_PARM_DESC(current_quality,
                 "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
module_param(default_quality, ushort, 0644);
MODULE_PARM_DESC(default_quality,
                 "default maximum entropy content of hwrng per 1024 bits of input");

static void drop_current_rng(void);
static int hwrng_init(struct hwrng *rng);
static int hwrng_fillfn(void *unused);

static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
                               int wait);

static size_t rng_buffer_size(void)
{
        return RNG_BUFFER_SIZE;
}

static void add_early_randomness(struct hwrng *rng)
{
        int bytes_read;

        mutex_lock(&reading_mutex);
        bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
        mutex_unlock(&reading_mutex);
        if (bytes_read > 0) {
                size_t entropy = bytes_read * 8 * rng->quality / 1024;
                add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false);
        }
}

static inline void cleanup_rng(struct kref *kref)
{
        struct hwrng *rng = container_of(kref, struct hwrng, ref);

        if (rng->cleanup)
                rng->cleanup(rng);

        complete(&rng->cleanup_done);
}

static int set_current_rng(struct hwrng *rng)
{
        int err;

        BUG_ON(!mutex_is_locked(&rng_mutex));

        err = hwrng_init(rng);
        if (err)
                return err;

        drop_current_rng();
        current_rng = rng;

        /* if necessary, start hwrng thread */
        if (!hwrng_fill) {
                hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng");
                if (IS_ERR(hwrng_fill)) {
                        pr_err("hwrng_fill thread creation failed\n");
                        hwrng_fill = NULL;
                }
        }

        return 0;
}

static void drop_current_rng(void)
{
        BUG_ON(!mutex_is_locked(&rng_mutex));
        if (!current_rng)
                return;

        /* decrease last reference for triggering the cleanup */
        kref_put(&current_rng->ref, cleanup_rng);
        current_rng = NULL;
}

/* Returns ERR_PTR(), NULL or refcounted hwrng */
static struct hwrng *get_current_rng_nolock(void)
{
        if (current_rng)
                kref_get(&current_rng->ref);

        return current_rng;
}

static struct hwrng *get_current_rng(void)
{
        struct hwrng *rng;

        if (mutex_lock_interruptible(&rng_mutex))
                return ERR_PTR(-ERESTARTSYS);

        rng = get_current_rng_nolock();

        mutex_unlock(&rng_mutex);
        return rng;
}

static void put_rng(struct hwrng *rng)
{
        /*
         * Hold rng_mutex here so we serialize in case they set_current_rng
         * on rng again immediately.
         */
        mutex_lock(&rng_mutex);
        if (rng)
                kref_put(&rng->ref, cleanup_rng);
        mutex_unlock(&rng_mutex);
}

static int hwrng_init(struct hwrng *rng)
{
        if (kref_get_unless_zero(&rng->ref))
                goto skip_init;

        if (rng->init) {
                int ret;

                ret =  rng->init(rng);
                if (ret)
                        return ret;
        }

        kref_init(&rng->ref);
        reinit_completion(&rng->cleanup_done);

skip_init:
        rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024);
        current_quality = rng->quality; /* obsolete */

        return 0;
}

static int rng_dev_open(struct inode *inode, struct file *filp)
{
        /* enforce read-only access to this chrdev */
        if ((filp->f_mode & FMODE_READ) == 0)
                return -EINVAL;
        if (filp->f_mode & FMODE_WRITE)
                return -EINVAL;
        return 0;
}

static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
                        int wait) {
        int present;

        BUG_ON(!mutex_is_locked(&reading_mutex));
        if (rng->read)
                return rng->read(rng, (void *)buffer, size, wait);

        if (rng->data_present)
                present = rng->data_present(rng, wait);
        else
                present = 1;

        if (present)
                return rng->data_read(rng, (u32 *)buffer);

        return 0;
}

static ssize_t rng_dev_read(struct file *filp, char __user *buf,
                            size_t size, loff_t *offp)
{
        u8 buffer[RNG_BUFFER_SIZE];
        ssize_t ret = 0;
        int err = 0;
        int bytes_read, len;
        struct hwrng *rng;

        while (size) {
                rng = get_current_rng();
                if (IS_ERR(rng)) {
                        err = PTR_ERR(rng);
                        goto out;
                }
                if (!rng) {
                        err = -ENODEV;
                        goto out;
                }

                if (mutex_lock_interruptible(&reading_mutex)) {
                        err = -ERESTARTSYS;
                        goto out_put;
                }
                if (!data_avail) {
                        bytes_read = rng_get_data(rng, rng_buffer,
                                rng_buffer_size(),
                                !(filp->f_flags & O_NONBLOCK));
                        if (bytes_read < 0) {
                                err = bytes_read;
                                goto out_unlock_reading;
                        } else if (bytes_read == 0 &&
                                   (filp->f_flags & O_NONBLOCK)) {
                                err = -EAGAIN;
                                goto out_unlock_reading;
                        }

                        data_avail = bytes_read;
                }

                len = data_avail;
                if (len) {
                        if (len > size)
                                len = size;

                        data_avail -= len;

                        memcpy(buffer, rng_buffer + data_avail, len);
                }
                mutex_unlock(&reading_mutex);
                put_rng(rng);

                if (len) {
                        if (copy_to_user(buf + ret, buffer, len)) {
                                err = -EFAULT;
                                goto out;
                        }

                        size -= len;
                        ret += len;
                }


                if (need_resched())
                        schedule_timeout_interruptible(1);

                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        goto out;
                }
        }
out:
        memzero_explicit(buffer, sizeof(buffer));
        return ret ? : err;

out_unlock_reading:
        mutex_unlock(&reading_mutex);
out_put:
        put_rng(rng);
        goto out;
}

static const struct file_operations rng_chrdev_ops = {
        .owner                = THIS_MODULE,
        .open                = rng_dev_open,
        .read                = rng_dev_read,
        .llseek                = noop_llseek,
};

static const struct attribute_group *rng_dev_groups[];

static struct miscdevice rng_miscdev = {
        .minor                = HWRNG_MINOR,
        .name                = RNG_MODULE_NAME,
        .nodename        = "hwrng",
        .fops                = &rng_chrdev_ops,
        .groups                = rng_dev_groups,
};

static int enable_best_rng(void)
{
        struct hwrng *rng, *new_rng = NULL;
        int ret = -ENODEV;

        BUG_ON(!mutex_is_locked(&rng_mutex));

        /* no rng to use? */
        if (list_empty(&rng_list)) {
                drop_current_rng();
                cur_rng_set_by_user = 0;
                return 0;
        }

        /* use the rng which offers the best quality */
        list_for_each_entry(rng, &rng_list, list) {
                if (!new_rng || rng->quality > new_rng->quality)
                        new_rng = rng;
        }

        ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
        if (!ret)
                cur_rng_set_by_user = 0;

        return ret;
}

static ssize_t rng_current_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t len)
{
        int err;
        struct hwrng *rng, *old_rng, *new_rng;

        err = mutex_lock_interruptible(&rng_mutex);
        if (err)
                return -ERESTARTSYS;

        old_rng = current_rng;
        if (sysfs_streq(buf, "")) {
                err = enable_best_rng();
        } else {
                list_for_each_entry(rng, &rng_list, list) {
                        if (sysfs_streq(rng->name, buf)) {
                                err = set_current_rng(rng);
                                if (!err)
                                        cur_rng_set_by_user = 1;
                                break;
                        }
                }
        }
        new_rng = get_current_rng_nolock();
        mutex_unlock(&rng_mutex);

        if (new_rng) {
                if (new_rng != old_rng)
                        add_early_randomness(new_rng);
                put_rng(new_rng);
        }

        return err ? : len;
}

static ssize_t rng_current_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        ssize_t ret;
        struct hwrng *rng;

        rng = get_current_rng();
        if (IS_ERR(rng))
                return PTR_ERR(rng);

        ret = snprintf(buf, PAGE_SIZE, "%s\n", rng ? rng->name : "none");
        put_rng(rng);

        return ret;
}

static ssize_t rng_available_show(struct device *dev,
                                  struct device_attribute *attr,
                                  char *buf)
{
        int err;
        struct hwrng *rng;

        err = mutex_lock_interruptible(&rng_mutex);
        if (err)
                return -ERESTARTSYS;
        buf[0] = '\0';
        list_for_each_entry(rng, &rng_list, list) {
                strlcat(buf, rng->name, PAGE_SIZE);
                strlcat(buf, " ", PAGE_SIZE);
        }
        strlcat(buf, "\n", PAGE_SIZE);
        mutex_unlock(&rng_mutex);

        return strlen(buf);
}

static ssize_t rng_selected_show(struct device *dev,
                                 struct device_attribute *attr,
                                 char *buf)
{
        return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
}

static ssize_t rng_quality_show(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        ssize_t ret;
        struct hwrng *rng;

        rng = get_current_rng();
        if (IS_ERR(rng))
                return PTR_ERR(rng);

        if (!rng) /* no need to put_rng */
                return -ENODEV;

        ret = sysfs_emit(buf, "%hu\n", rng->quality);
        put_rng(rng);

        return ret;
}

static ssize_t rng_quality_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t len)
{
        u16 quality;
        int ret = -EINVAL;

        if (len < 2)
                return -EINVAL;

        ret = mutex_lock_interruptible(&rng_mutex);
        if (ret)
                return -ERESTARTSYS;

        ret = kstrtou16(buf, 0, &quality);
        if (ret || quality > 1024) {
                ret = -EINVAL;
                goto out;
        }

        if (!current_rng) {
                ret = -ENODEV;
                goto out;
        }

        current_rng->quality = quality;
        current_quality = quality; /* obsolete */

        /* the best available RNG may have changed */
        ret = enable_best_rng();

out:
        mutex_unlock(&rng_mutex);
        return ret ? ret : len;
}

static DEVICE_ATTR_RW(rng_current);
static DEVICE_ATTR_RO(rng_available);
static DEVICE_ATTR_RO(rng_selected);
static DEVICE_ATTR_RW(rng_quality);

static struct attribute *rng_dev_attrs[] = {
        &dev_attr_rng_current.attr,
        &dev_attr_rng_available.attr,
        &dev_attr_rng_selected.attr,
        &dev_attr_rng_quality.attr,
        NULL
};

ATTRIBUTE_GROUPS(rng_dev);

static void __exit unregister_miscdev(void)
{
        misc_deregister(&rng_miscdev);
}

static int __init register_miscdev(void)
{
        return misc_register(&rng_miscdev);
}

static int hwrng_fillfn(void *unused)
{
        size_t entropy, entropy_credit = 0; /* in 1/1024 of a bit */
        long rc;

        while (!kthread_should_stop()) {
                unsigned short quality;
                struct hwrng *rng;

                rng = get_current_rng();
                if (IS_ERR(rng) || !rng)
                        break;
                mutex_lock(&reading_mutex);
                rc = rng_get_data(rng, rng_fillbuf,
                                  rng_buffer_size(), 1);
                if (current_quality != rng->quality)
                        rng->quality = current_quality; /* obsolete */
                quality = rng->quality;
                mutex_unlock(&reading_mutex);

                if (rc <= 0)
                        hwrng_msleep(rng, 10000);

                put_rng(rng);

                if (rc <= 0)
                        continue;

                /* If we cannot credit at least one bit of entropy,
                 * keep track of the remainder for the next iteration
                 */
                entropy = rc * quality * 8 + entropy_credit;
                if ((entropy >> 10) == 0)
                        entropy_credit = entropy;

                /* Outside lock, sure, but y'know: randomness. */
                add_hwgenerator_randomness((void *)rng_fillbuf, rc,
                                           entropy >> 10, true);
        }
        hwrng_fill = NULL;
        return 0;
}

int hwrng_register(struct hwrng *rng)
{
        int err = -EINVAL;
        struct hwrng *tmp;
        bool is_new_current = false;

        if (!rng->name || (!rng->data_read && !rng->read))
                goto out;

        mutex_lock(&rng_mutex);

        /* Must not register two RNGs with the same name. */
        err = -EEXIST;
        list_for_each_entry(tmp, &rng_list, list) {
                if (strcmp(tmp->name, rng->name) == 0)
                        goto out_unlock;
        }
        list_add_tail(&rng->list, &rng_list);

        init_completion(&rng->cleanup_done);
        complete(&rng->cleanup_done);
        init_completion(&rng->dying);

        if (!current_rng ||
            (!cur_rng_set_by_user && rng->quality > current_rng->quality)) {
                /*
                 * Set new rng as current as the new rng source
                 * provides better entropy quality and was not
                 * chosen by userspace.
                 */
                err = set_current_rng(rng);
                if (err)
                        goto out_unlock;
                /* to use current_rng in add_early_randomness() we need
                 * to take a ref
                 */
                is_new_current = true;
                kref_get(&rng->ref);
        }
        mutex_unlock(&rng_mutex);
        if (is_new_current || !rng->init) {
                /*
                 * Use a new device's input to add some randomness to
                 * the system.  If this rng device isn't going to be
                 * used right away, its init function hasn't been
                 * called yet by set_current_rng(); so only use the
                 * randomness from devices that don't need an init callback
                 */
                add_early_randomness(rng);
        }
        if (is_new_current)
                put_rng(rng);
        return 0;
out_unlock:
        mutex_unlock(&rng_mutex);
out:
        return err;
}
EXPORT_SYMBOL_GPL(hwrng_register);

void hwrng_unregister(struct hwrng *rng)
{
        struct hwrng *old_rng, *new_rng;
        int err;

        mutex_lock(&rng_mutex);

        old_rng = current_rng;
        list_del(&rng->list);
        complete_all(&rng->dying);
        if (current_rng == rng) {
                err = enable_best_rng();
                if (err) {
                        drop_current_rng();
                        cur_rng_set_by_user = 0;
                }
        }

        new_rng = get_current_rng_nolock();
        if (list_empty(&rng_list)) {
                mutex_unlock(&rng_mutex);
                if (hwrng_fill)
                        kthread_stop(hwrng_fill);
        } else
                mutex_unlock(&rng_mutex);

        if (new_rng) {
                if (old_rng != new_rng)
                        add_early_randomness(new_rng);
                put_rng(new_rng);
        }

        wait_for_completion(&rng->cleanup_done);
}
EXPORT_SYMBOL_GPL(hwrng_unregister);

static void devm_hwrng_release(struct device *dev, void *res)
{
        hwrng_unregister(*(struct hwrng **)res);
}

static int devm_hwrng_match(struct device *dev, void *res, void *data)
{
        struct hwrng **r = res;

        if (WARN_ON(!r || !*r))
                return 0;

        return *r == data;
}

int devm_hwrng_register(struct device *dev, struct hwrng *rng)
{
        struct hwrng **ptr;
        int error;

        ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return -ENOMEM;

        error = hwrng_register(rng);
        if (error) {
                devres_free(ptr);
                return error;
        }

        *ptr = rng;
        devres_add(dev, ptr);
        return 0;
}
EXPORT_SYMBOL_GPL(devm_hwrng_register);

void devm_hwrng_unregister(struct device *dev, struct hwrng *rng)
{
        devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng);
}
EXPORT_SYMBOL_GPL(devm_hwrng_unregister);

long hwrng_msleep(struct hwrng *rng, unsigned int msecs)
{
        unsigned long timeout = msecs_to_jiffies(msecs) + 1;

        return wait_for_completion_interruptible_timeout(&rng->dying, timeout);
}
EXPORT_SYMBOL_GPL(hwrng_msleep);

long hwrng_yield(struct hwrng *rng)
{
        return wait_for_completion_interruptible_timeout(&rng->dying, 1);
}
EXPORT_SYMBOL_GPL(hwrng_yield);

static int __init hwrng_modinit(void)
{
        int ret;

        /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
        rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
        if (!rng_buffer)
                return -ENOMEM;

        rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
        if (!rng_fillbuf) {
                kfree(rng_buffer);
                return -ENOMEM;
        }

        ret = register_miscdev();
        if (ret) {
                kfree(rng_fillbuf);
                kfree(rng_buffer);
        }

        return ret;
}

static void __exit hwrng_modexit(void)
{
        mutex_lock(&rng_mutex);
        BUG_ON(current_rng);
        kfree(rng_buffer);
        kfree(rng_fillbuf);
        mutex_unlock(&rng_mutex);

        unregister_miscdev();
}

fs_initcall(hwrng_modinit); /* depends on misc_register() */
module_exit(hwrng_modexit);

MODULE_DESCRIPTION("H/W Random Number Generator (RNG) driver");
MODULE_LICENSE("GPL");



























































































































































































































































































































































    2 




    2 























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Marvell NFC-over-USB driver: USB interface related functions
 *
 * Copyright (C) 2014, Marvell International Ltd.
 */

#include <linux/module.h>
#include <linux/usb.h>
#include <linux/nfc.h>
#include <net/nfc/nci.h>
#include <net/nfc/nci_core.h>
#include "nfcmrvl.h"

static struct usb_device_id nfcmrvl_table[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(0x1286, 0x2046,
                                        USB_CLASS_VENDOR_SPEC, 4, 1) },
        { }        /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, nfcmrvl_table);

#define NFCMRVL_USB_BULK_RUNNING        1
#define NFCMRVL_USB_SUSPENDING                2

struct nfcmrvl_usb_drv_data {
        struct usb_device *udev;
        struct usb_interface *intf;
        unsigned long flags;
        struct work_struct waker;
        struct usb_anchor tx_anchor;
        struct usb_anchor bulk_anchor;
        struct usb_anchor deferred;
        int tx_in_flight;
        /* protects tx_in_flight */
        spinlock_t txlock;
        struct usb_endpoint_descriptor *bulk_tx_ep;
        struct usb_endpoint_descriptor *bulk_rx_ep;
        int suspend_count;
        struct nfcmrvl_private *priv;
};

static int nfcmrvl_inc_tx(struct nfcmrvl_usb_drv_data *drv_data)
{
        unsigned long flags;
        int rv;

        spin_lock_irqsave(&drv_data->txlock, flags);
        rv = test_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags);
        if (!rv)
                drv_data->tx_in_flight++;
        spin_unlock_irqrestore(&drv_data->txlock, flags);

        return rv;
}

static void nfcmrvl_bulk_complete(struct urb *urb)
{
        struct nfcmrvl_usb_drv_data *drv_data = urb->context;
        int err;

        dev_dbg(&drv_data->udev->dev, "urb %p status %d count %d\n",
                urb, urb->status, urb->actual_length);

        if (!test_bit(NFCMRVL_NCI_RUNNING, &drv_data->flags))
                return;

        if (!urb->status) {
                struct sk_buff *skb;

                skb = nci_skb_alloc(drv_data->priv->ndev, urb->actual_length,
                                    GFP_ATOMIC);
                if (!skb) {
                        nfc_err(&drv_data->udev->dev, "failed to alloc mem\n");
                } else {
                        skb_put_data(skb, urb->transfer_buffer,
                                     urb->actual_length);
                        if (nfcmrvl_nci_recv_frame(drv_data->priv, skb) < 0)
                                nfc_err(&drv_data->udev->dev,
                                        "corrupted Rx packet\n");
                }
        }

        if (!test_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags))
                return;

        usb_anchor_urb(urb, &drv_data->bulk_anchor);
        usb_mark_last_busy(drv_data->udev);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err) {
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        nfc_err(&drv_data->udev->dev,
                                "urb %p failed to resubmit (%d)\n", urb, -err);
                usb_unanchor_urb(urb);
        }
}

static int
nfcmrvl_submit_bulk_urb(struct nfcmrvl_usb_drv_data *drv_data, gfp_t mem_flags)
{
        struct urb *urb;
        unsigned char *buf;
        unsigned int pipe;
        int err, size = NFCMRVL_NCI_MAX_EVENT_SIZE;

        if (!drv_data->bulk_rx_ep)
                return -ENODEV;

        urb = usb_alloc_urb(0, mem_flags);
        if (!urb)
                return -ENOMEM;

        buf = kmalloc(size, mem_flags);
        if (!buf) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvbulkpipe(drv_data->udev,
                               drv_data->bulk_rx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, drv_data->udev, pipe, buf, size,
                          nfcmrvl_bulk_complete, drv_data);

        urb->transfer_flags |= URB_FREE_BUFFER;

        usb_mark_last_busy(drv_data->udev);
        usb_anchor_urb(urb, &drv_data->bulk_anchor);

        err = usb_submit_urb(urb, mem_flags);
        if (err) {
                if (err != -EPERM && err != -ENODEV)
                        nfc_err(&drv_data->udev->dev,
                                "urb %p submission failed (%d)\n", urb, -err);
                usb_unanchor_urb(urb);
        }

        usb_free_urb(urb);

        return err;
}

static void nfcmrvl_tx_complete(struct urb *urb)
{
        struct sk_buff *skb = urb->context;
        struct nci_dev *ndev = (struct nci_dev *)skb->dev;
        struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
        struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data;
        unsigned long flags;

        nfc_info(priv->dev, "urb %p status %d count %d\n",
                 urb, urb->status, urb->actual_length);

        spin_lock_irqsave(&drv_data->txlock, flags);
        drv_data->tx_in_flight--;
        spin_unlock_irqrestore(&drv_data->txlock, flags);

        kfree(urb->setup_packet);
        kfree_skb(skb);
}

static int nfcmrvl_usb_nci_open(struct nfcmrvl_private *priv)
{
        struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data;
        int err;

        err = usb_autopm_get_interface(drv_data->intf);
        if (err)
                return err;

        drv_data->intf->needs_remote_wakeup = 1;

        err = nfcmrvl_submit_bulk_urb(drv_data, GFP_KERNEL);
        if (err)
                goto failed;

        set_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags);
        nfcmrvl_submit_bulk_urb(drv_data, GFP_KERNEL);

        usb_autopm_put_interface(drv_data->intf);
        return 0;

failed:
        usb_autopm_put_interface(drv_data->intf);
        return err;
}

static void nfcmrvl_usb_stop_traffic(struct nfcmrvl_usb_drv_data *drv_data)
{
        usb_kill_anchored_urbs(&drv_data->bulk_anchor);
}

static int nfcmrvl_usb_nci_close(struct nfcmrvl_private *priv)
{
        struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data;
        int err;

        cancel_work_sync(&drv_data->waker);

        clear_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags);

        nfcmrvl_usb_stop_traffic(drv_data);
        usb_kill_anchored_urbs(&drv_data->tx_anchor);
        err = usb_autopm_get_interface(drv_data->intf);
        if (err)
                goto failed;

        drv_data->intf->needs_remote_wakeup = 0;
        usb_autopm_put_interface(drv_data->intf);

failed:
        usb_scuttle_anchored_urbs(&drv_data->deferred);
        return 0;
}

static int nfcmrvl_usb_nci_send(struct nfcmrvl_private *priv,
                                struct sk_buff *skb)
{
        struct nfcmrvl_usb_drv_data *drv_data = priv->drv_data;
        struct urb *urb;
        unsigned int pipe;
        int err;

        if (!drv_data->bulk_tx_ep)
                return -ENODEV;

        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!urb)
                return -ENOMEM;

        pipe = usb_sndbulkpipe(drv_data->udev,
                                drv_data->bulk_tx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, drv_data->udev, pipe, skb->data, skb->len,
                          nfcmrvl_tx_complete, skb);

        err = nfcmrvl_inc_tx(drv_data);
        if (err) {
                usb_anchor_urb(urb, &drv_data->deferred);
                schedule_work(&drv_data->waker);
                err = 0;
                goto done;
        }

        usb_anchor_urb(urb, &drv_data->tx_anchor);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err) {
                if (err != -EPERM && err != -ENODEV)
                        nfc_err(&drv_data->udev->dev,
                                "urb %p submission failed (%d)\n", urb, -err);
                kfree(urb->setup_packet);
                usb_unanchor_urb(urb);
        } else {
                usb_mark_last_busy(drv_data->udev);
        }

done:
        usb_free_urb(urb);
        return err;
}

static const struct nfcmrvl_if_ops usb_ops = {
        .nci_open = nfcmrvl_usb_nci_open,
        .nci_close = nfcmrvl_usb_nci_close,
        .nci_send = nfcmrvl_usb_nci_send,
};

static void nfcmrvl_waker(struct work_struct *work)
{
        struct nfcmrvl_usb_drv_data *drv_data =
                        container_of(work, struct nfcmrvl_usb_drv_data, waker);
        int err;

        err = usb_autopm_get_interface(drv_data->intf);
        if (err)
                return;

        usb_autopm_put_interface(drv_data->intf);
}

static int nfcmrvl_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
{
        struct nfcmrvl_usb_drv_data *drv_data;
        struct nfcmrvl_private *priv;
        int i;
        struct usb_device *udev = interface_to_usbdev(intf);
        struct nfcmrvl_platform_data config;

        /* No configuration for USB */
        memset(&config, 0, sizeof(config));
        config.reset_n_io = -EINVAL;

        nfc_info(&udev->dev, "intf %p id %p\n", intf, id);

        drv_data = devm_kzalloc(&intf->dev, sizeof(*drv_data), GFP_KERNEL);
        if (!drv_data)
                return -ENOMEM;

        for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
                struct usb_endpoint_descriptor *ep_desc;

                ep_desc = &intf->cur_altsetting->endpoint[i].desc;

                if (!drv_data->bulk_tx_ep &&
                    usb_endpoint_is_bulk_out(ep_desc)) {
                        drv_data->bulk_tx_ep = ep_desc;
                } else if (!drv_data->bulk_rx_ep &&
                           usb_endpoint_is_bulk_in(ep_desc)) {
                        drv_data->bulk_rx_ep = ep_desc;
                }
        }

        if (!drv_data->bulk_tx_ep || !drv_data->bulk_rx_ep)
                return -ENODEV;

        drv_data->udev = udev;
        drv_data->intf = intf;

        INIT_WORK(&drv_data->waker, nfcmrvl_waker);
        spin_lock_init(&drv_data->txlock);

        init_usb_anchor(&drv_data->tx_anchor);
        init_usb_anchor(&drv_data->bulk_anchor);
        init_usb_anchor(&drv_data->deferred);

        priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops,
                                        &intf->dev, &config);
        if (IS_ERR(priv))
                return PTR_ERR(priv);

        drv_data->priv = priv;
        drv_data->priv->support_fw_dnld = false;

        usb_set_intfdata(intf, drv_data);

        return 0;
}

static void nfcmrvl_disconnect(struct usb_interface *intf)
{
        struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf);

        if (!drv_data)
                return;

        nfc_info(&drv_data->udev->dev, "intf %p\n", intf);

        nfcmrvl_nci_unregister_dev(drv_data->priv);

        usb_set_intfdata(drv_data->intf, NULL);
}

#ifdef CONFIG_PM
static int nfcmrvl_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf);

        nfc_info(&drv_data->udev->dev, "intf %p\n", intf);

        if (drv_data->suspend_count++)
                return 0;

        spin_lock_irq(&drv_data->txlock);
        if (!(PMSG_IS_AUTO(message) && drv_data->tx_in_flight)) {
                set_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags);
                spin_unlock_irq(&drv_data->txlock);
        } else {
                spin_unlock_irq(&drv_data->txlock);
                drv_data->suspend_count--;
                return -EBUSY;
        }

        nfcmrvl_usb_stop_traffic(drv_data);
        usb_kill_anchored_urbs(&drv_data->tx_anchor);

        return 0;
}

static void nfcmrvl_play_deferred(struct nfcmrvl_usb_drv_data *drv_data)
{
        struct urb *urb;
        int err;

        while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
                usb_anchor_urb(urb, &drv_data->tx_anchor);

                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err) {
                        kfree(urb->setup_packet);
                        usb_unanchor_urb(urb);
                        usb_free_urb(urb);
                        break;
                }

                drv_data->tx_in_flight++;
                usb_free_urb(urb);
        }

        /* Cleanup the rest deferred urbs. */
        while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
                kfree(urb->setup_packet);
                usb_free_urb(urb);
        }
}

static int nfcmrvl_resume(struct usb_interface *intf)
{
        struct nfcmrvl_usb_drv_data *drv_data = usb_get_intfdata(intf);
        int err = 0;

        nfc_info(&drv_data->udev->dev, "intf %p\n", intf);

        if (--drv_data->suspend_count)
                return 0;

        if (!test_bit(NFCMRVL_NCI_RUNNING, &drv_data->flags))
                goto done;

        if (test_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags)) {
                err = nfcmrvl_submit_bulk_urb(drv_data, GFP_NOIO);
                if (err) {
                        clear_bit(NFCMRVL_USB_BULK_RUNNING, &drv_data->flags);
                        goto failed;
                }

                nfcmrvl_submit_bulk_urb(drv_data, GFP_NOIO);
        }

        spin_lock_irq(&drv_data->txlock);
        nfcmrvl_play_deferred(drv_data);
        clear_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags);
        spin_unlock_irq(&drv_data->txlock);

        return 0;

failed:
        usb_scuttle_anchored_urbs(&drv_data->deferred);
done:
        spin_lock_irq(&drv_data->txlock);
        clear_bit(NFCMRVL_USB_SUSPENDING, &drv_data->flags);
        spin_unlock_irq(&drv_data->txlock);

        return err;
}
#endif

static struct usb_driver nfcmrvl_usb_driver = {
        .name                = "nfcmrvl",
        .probe                = nfcmrvl_probe,
        .disconnect        = nfcmrvl_disconnect,
#ifdef CONFIG_PM
        .suspend        = nfcmrvl_suspend,
        .resume                = nfcmrvl_resume,
        .reset_resume        = nfcmrvl_resume,
#endif
        .id_table        = nfcmrvl_table,
        .supports_autosuspend = 1,
        .disable_hub_initiated_lpm = 1,
        .soft_unbind = 1,
};
module_usb_driver(nfcmrvl_usb_driver);

MODULE_AUTHOR("Marvell International Ltd.");
MODULE_DESCRIPTION("Marvell NFC-over-USB driver");
MODULE_LICENSE("GPL v2");



































































   17 






   18 







   18 
    1 

   18 













   17 













   17 





























    1 
    1 

    1 






















  256 


  255 






















































  265 



  264 

  262 






  256 

  254 














  265 
  263 

































  264 





  265 































































































































































  470 








  470 





  472 



  258 

   57 

  244 


  243 




  470 








  471 
  250 
  470 
  458 





  458 



  265 



  472 




  471 
  254 
  256 
  254 



  468 
   57 
   57 
   57 


  472 












  472 


  472 
  239 
  239 
  240 



  470 





  470 
  471 
  471 
  471 


  469 





  472 





















  256 





  256 

  256 

  256 











  487 



  487 


  256 

  256 
  255 

















  484 


  485 
  264 
  486 


  488 














































  485 










  485 












  488 


  488 










    2 


    2 





































































































    2 
    2 

    2 





























































































































































































































































































































































































































































































































































































































































































































































































































































  254 

  254 



  254 


  254 























































































































































































































































































































































































































































































































































































































































































































































  253 






































  254 

  254 

















































    2 



























  488 













  486 








  484 

  485 
  488 




  488 
  488 


  486 




  486 


  255 



  483 











  255 
  486 







  483 
















  488 
  486 
  468 

  240 

  239 








  488 


  256 



  486 
  488 


  252 








    1 



  236 



  460 




  259 


















  469 
  471 
  469 

  469 
  263 

  262 




  263 




  262 























































  488 







  487 







  488 

  488 






  485 
  472 
  237 
  472 

  488 




  469 









  256 
  254 






  256 
  255 

  256 









  488 



  254 


  254 






















  240 


  470 


























  259 


  256 


  471 




  488 
  471 
  470 

  255 



  485 























  255 


  256 

  254 






















  469 























    1 



    1 
    1 







































  263 
































































































































































































































































































































































































































    1 











    1 




    1 





    1 
    1 






    1 




















    1 







    1 













    1 


    1 






    1 





    1 











































































































    1 


    1 






    1 

    1 




    1 
    1 



































    1 




    1 


















    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/lib/vsprintf.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
/*
 * Wirzenius wrote this portably, Torvalds fucked it up :-)
 */

/*
 * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
 * - changed to provide snprintf and vsnprintf functions
 * So Feb  1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
 * - scnprintf and vscnprintf
 */

#include <linux/stdarg.h>
#include <linux/build_bug.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/errname.h>
#include <linux/module.h>        /* for KSYM_SYMBOL_LEN */
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/kallsyms.h>
#include <linux/math64.h>
#include <linux/uaccess.h>
#include <linux/ioport.h>
#include <linux/dcache.h>
#include <linux/cred.h>
#include <linux/rtc.h>
#include <linux/sprintf.h>
#include <linux/time.h>
#include <linux/uuid.h>
#include <linux/of.h>
#include <net/addrconf.h>
#include <linux/siphash.h>
#include <linux/compiler.h>
#include <linux/property.h>
#include <linux/notifier.h>
#ifdef CONFIG_BLOCK
#include <linux/blkdev.h>
#endif

#include "../mm/internal.h"        /* For the trace_print_flags arrays */

#include <asm/page.h>                /* for PAGE_SIZE */
#include <asm/byteorder.h>        /* cpu_to_le16 */
#include <asm/unaligned.h>

#include <linux/string_helpers.h>
#include "kstrtox.h"

/* Disable pointer hashing if requested */
bool no_hash_pointers __ro_after_init;
EXPORT_SYMBOL_GPL(no_hash_pointers);

noinline
static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars)
{
        const char *cp;
        unsigned long long result = 0ULL;
        size_t prefix_chars;
        unsigned int rv;

        cp = _parse_integer_fixup_radix(startp, &base);
        prefix_chars = cp - startp;
        if (prefix_chars < max_chars) {
                rv = _parse_integer_limit(cp, base, &result, max_chars - prefix_chars);
                /* FIXME */
                cp += (rv & ~KSTRTOX_OVERFLOW);
        } else {
                /* Field too short for prefix + digit, skip over without converting */
                cp = startp + max_chars;
        }

        if (endp)
                *endp = (char *)cp;

        return result;
}

/**
 * simple_strtoull - convert a string to an unsigned long long
 * @cp: The start of the string
 * @endp: A pointer to the end of the parsed string will be placed here
 * @base: The number base to use
 *
 * This function has caveats. Please use kstrtoull instead.
 */
noinline
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
        return simple_strntoull(cp, endp, base, INT_MAX);
}
EXPORT_SYMBOL(simple_strtoull);

/**
 * simple_strtoul - convert a string to an unsigned long
 * @cp: The start of the string
 * @endp: A pointer to the end of the parsed string will be placed here
 * @base: The number base to use
 *
 * This function has caveats. Please use kstrtoul instead.
 */
unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
{
        return simple_strtoull(cp, endp, base);
}
EXPORT_SYMBOL(simple_strtoul);

/**
 * simple_strtol - convert a string to a signed long
 * @cp: The start of the string
 * @endp: A pointer to the end of the parsed string will be placed here
 * @base: The number base to use
 *
 * This function has caveats. Please use kstrtol instead.
 */
long simple_strtol(const char *cp, char **endp, unsigned int base)
{
        if (*cp == '-')
                return -simple_strtoul(cp + 1, endp, base);

        return simple_strtoul(cp, endp, base);
}
EXPORT_SYMBOL(simple_strtol);

noinline
static long long simple_strntoll(const char *cp, char **endp, unsigned int base, size_t max_chars)
{
        /*
         * simple_strntoull() safely handles receiving max_chars==0 in the
         * case cp[0] == '-' && max_chars == 1.
         * If max_chars == 0 we can drop through and pass it to simple_strntoull()
         * and the content of *cp is irrelevant.
         */
        if (*cp == '-' && max_chars > 0)
                return -simple_strntoull(cp + 1, endp, base, max_chars - 1);

        return simple_strntoull(cp, endp, base, max_chars);
}

/**
 * simple_strtoll - convert a string to a signed long long
 * @cp: The start of the string
 * @endp: A pointer to the end of the parsed string will be placed here
 * @base: The number base to use
 *
 * This function has caveats. Please use kstrtoll instead.
 */
long long simple_strtoll(const char *cp, char **endp, unsigned int base)
{
        return simple_strntoll(cp, endp, base, INT_MAX);
}
EXPORT_SYMBOL(simple_strtoll);

static noinline_for_stack
int skip_atoi(const char **s)
{
        int i = 0;

        do {
                i = i*10 + *((*s)++) - '0';
        } while (isdigit(**s));

        return i;
}

/*
 * Decimal conversion is by far the most typical, and is used for
 * /proc and /sys data. This directly impacts e.g. top performance
 * with many processes running. We optimize it for speed by emitting
 * two characters at a time, using a 200 byte lookup table. This
 * roughly halves the number of multiplications compared to computing
 * the digits one at a time. Implementation strongly inspired by the
 * previous version, which in turn used ideas described at
 * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
 * from the author, Douglas W. Jones).
 *
 * It turns out there is precisely one 26 bit fixed-point
 * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
 * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
 * range happens to be somewhat larger (x <= 1073741898), but that's
 * irrelevant for our purpose.
 *
 * For dividing a number in the range [10^4, 10^6-1] by 100, we still
 * need a 32x32->64 bit multiply, so we simply use the same constant.
 *
 * For dividing a number in the range [100, 10^4-1] by 100, there are
 * several options. The simplest is (x * 0x147b) >> 19, which is valid
 * for all x <= 43698.
 */

static const u16 decpair[100] = {
#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
        _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
        _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
        _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
        _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
        _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
        _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
        _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
        _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
        _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
        _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
#undef _
};

/*
 * This will print a single '0' even if r == 0, since we would
 * immediately jump to out_r where two 0s would be written but only
 * one of them accounted for in buf. This is needed by ip4_string
 * below. All other callers pass a non-zero value of r.
*/
static noinline_for_stack
char *put_dec_trunc8(char *buf, unsigned r)
{
        unsigned q;

        /* 1 <= r < 10^8 */
        if (r < 100)
                goto out_r;

        /* 100 <= r < 10^8 */
        q = (r * (u64)0x28f5c29) >> 32;
        *((u16 *)buf) = decpair[r - 100*q];
        buf += 2;

        /* 1 <= q < 10^6 */
        if (q < 100)
                goto out_q;

        /*  100 <= q < 10^6 */
        r = (q * (u64)0x28f5c29) >> 32;
        *((u16 *)buf) = decpair[q - 100*r];
        buf += 2;

        /* 1 <= r < 10^4 */
        if (r < 100)
                goto out_r;

        /* 100 <= r < 10^4 */
        q = (r * 0x147b) >> 19;
        *((u16 *)buf) = decpair[r - 100*q];
        buf += 2;
out_q:
        /* 1 <= q < 100 */
        r = q;
out_r:
        /* 1 <= r < 100 */
        *((u16 *)buf) = decpair[r];
        buf += r < 10 ? 1 : 2;
        return buf;
}

#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
static noinline_for_stack
char *put_dec_full8(char *buf, unsigned r)
{
        unsigned q;

        /* 0 <= r < 10^8 */
        q = (r * (u64)0x28f5c29) >> 32;
        *((u16 *)buf) = decpair[r - 100*q];
        buf += 2;

        /* 0 <= q < 10^6 */
        r = (q * (u64)0x28f5c29) >> 32;
        *((u16 *)buf) = decpair[q - 100*r];
        buf += 2;

        /* 0 <= r < 10^4 */
        q = (r * 0x147b) >> 19;
        *((u16 *)buf) = decpair[r - 100*q];
        buf += 2;

        /* 0 <= q < 100 */
        *((u16 *)buf) = decpair[q];
        buf += 2;
        return buf;
}

static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
        if (n >= 100*1000*1000)
                buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
        /* 1 <= n <= 1.6e11 */
        if (n >= 100*1000*1000)
                buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
        /* 1 <= n < 1e8 */
        return put_dec_trunc8(buf, n);
}

#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64

static void
put_dec_full4(char *buf, unsigned r)
{
        unsigned q;

        /* 0 <= r < 10^4 */
        q = (r * 0x147b) >> 19;
        *((u16 *)buf) = decpair[r - 100*q];
        buf += 2;
        /* 0 <= q < 100 */
        *((u16 *)buf) = decpair[q];
}

/*
 * Call put_dec_full4 on x % 10000, return x / 10000.
 * The approximation x/10000 == (x * 0x346DC5D7) >> 43
 * holds for all x < 1,128,869,999.  The largest value this
 * helper will ever be asked to convert is 1,125,520,955.
 * (second call in the put_dec code, assuming n is all-ones).
 */
static noinline_for_stack
unsigned put_dec_helper4(char *buf, unsigned x)
{
        uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;

        put_dec_full4(buf, x - q * 10000);
        return q;
}

/* Based on code by Douglas W. Jones found at
 * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour>
 * (with permission from the author).
 * Performs no 64-bit division and hence should be fast on 32-bit machines.
 */
static
char *put_dec(char *buf, unsigned long long n)
{
        uint32_t d3, d2, d1, q, h;

        if (n < 100*1000*1000)
                return put_dec_trunc8(buf, n);

        d1  = ((uint32_t)n >> 16); /* implicit "& 0xffff" */
        h   = (n >> 32);
        d2  = (h      ) & 0xffff;
        d3  = (h >> 16); /* implicit "& 0xffff" */

        /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
             = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
        q   = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
        q = put_dec_helper4(buf, q);

        q += 7671 * d3 + 9496 * d2 + 6 * d1;
        q = put_dec_helper4(buf+4, q);

        q += 4749 * d3 + 42 * d2;
        q = put_dec_helper4(buf+8, q);

        q += 281 * d3;
        buf += 12;
        if (q)
                buf = put_dec_trunc8(buf, q);
        else while (buf[-1] == '0')
                --buf;

        return buf;
}

#endif

/*
 * Convert passed number to decimal string.
 * Returns the length of string.  On buffer overflow, returns 0.
 *
 * If speed is not important, use snprintf(). It's easy to read the code.
 */
int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
{
        /* put_dec requires 2-byte alignment of the buffer. */
        char tmp[sizeof(num) * 3] __aligned(2);
        int idx, len;

        /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
        if (num <= 9) {
                tmp[0] = '0' + num;
                len = 1;
        } else {
                len = put_dec(tmp, num) - tmp;
        }

        if (len > size || width > size)
                return 0;

        if (width > len) {
                width = width - len;
                for (idx = 0; idx < width; idx++)
                        buf[idx] = ' ';
        } else {
                width = 0;
        }

        for (idx = 0; idx < len; ++idx)
                buf[idx + width] = tmp[len - idx - 1];

        return len + width;
}

#define SIGN        1                /* unsigned/signed, must be 1 */
#define LEFT        2                /* left justified */
#define PLUS        4                /* show plus */
#define SPACE        8                /* space if plus */
#define ZEROPAD        16                /* pad with zero, must be 16 == '0' - ' ' */
#define SMALL        32                /* use lowercase in hex (must be 32 == 0x20) */
#define SPECIAL        64                /* prefix hex with "0x", octal with "0" */

static_assert(SIGN == 1);
static_assert(ZEROPAD == ('0' - ' '));
static_assert(SMALL == ('a' ^ 'A'));

enum format_type {
        FORMAT_TYPE_NONE, /* Just a string part */
        FORMAT_TYPE_WIDTH,
        FORMAT_TYPE_PRECISION,
        FORMAT_TYPE_CHAR,
        FORMAT_TYPE_STR,
        FORMAT_TYPE_PTR,
        FORMAT_TYPE_PERCENT_CHAR,
        FORMAT_TYPE_INVALID,
        FORMAT_TYPE_LONG_LONG,
        FORMAT_TYPE_ULONG,
        FORMAT_TYPE_LONG,
        FORMAT_TYPE_UBYTE,
        FORMAT_TYPE_BYTE,
        FORMAT_TYPE_USHORT,
        FORMAT_TYPE_SHORT,
        FORMAT_TYPE_UINT,
        FORMAT_TYPE_INT,
        FORMAT_TYPE_SIZE_T,
        FORMAT_TYPE_PTRDIFF
};

struct printf_spec {
        unsigned int        type:8;                /* format_type enum */
        signed int        field_width:24;        /* width of output field */
        unsigned int        flags:8;        /* flags to number() */
        unsigned int        base:8;                /* number base, 8, 10 or 16 only */
        signed int        precision:16;        /* # of digits/chars */
} __packed;
static_assert(sizeof(struct printf_spec) == 8);

#define FIELD_WIDTH_MAX ((1 << 23) - 1)
#define PRECISION_MAX ((1 << 15) - 1)

static noinline_for_stack
char *number(char *buf, char *end, unsigned long long num,
             struct printf_spec spec)
{
        /* put_dec requires 2-byte alignment of the buffer. */
        char tmp[3 * sizeof(num)] __aligned(2);
        char sign;
        char locase;
        int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
        int i;
        bool is_zero = num == 0LL;
        int field_width = spec.field_width;
        int precision = spec.precision;

        /* locase = 0 or 0x20. ORing digits or letters with 'locase'
         * produces same digits or (maybe lowercased) letters */
        locase = (spec.flags & SMALL);
        if (spec.flags & LEFT)
                spec.flags &= ~ZEROPAD;
        sign = 0;
        if (spec.flags & SIGN) {
                if ((signed long long)num < 0) {
                        sign = '-';
                        num = -(signed long long)num;
                        field_width--;
                } else if (spec.flags & PLUS) {
                        sign = '+';
                        field_width--;
                } else if (spec.flags & SPACE) {
                        sign = ' ';
                        field_width--;
                }
        }
        if (need_pfx) {
                if (spec.base == 16)
                        field_width -= 2;
                else if (!is_zero)
                        field_width--;
        }

        /* generate full string in tmp[], in reverse order */
        i = 0;
        if (num < spec.base)
                tmp[i++] = hex_asc_upper[num] | locase;
        else if (spec.base != 10) { /* 8 or 16 */
                int mask = spec.base - 1;
                int shift = 3;

                if (spec.base == 16)
                        shift = 4;
                do {
                        tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase);
                        num >>= shift;
                } while (num);
        } else { /* base 10 */
                i = put_dec(tmp, num) - tmp;
        }

        /* printing 100 using %2d gives "100", not "00" */
        if (i > precision)
                precision = i;
        /* leading space padding */
        field_width -= precision;
        if (!(spec.flags & (ZEROPAD | LEFT))) {
                while (--field_width >= 0) {
                        if (buf < end)
                                *buf = ' ';
                        ++buf;
                }
        }
        /* sign */
        if (sign) {
                if (buf < end)
                        *buf = sign;
                ++buf;
        }
        /* "0x" / "0" prefix */
        if (need_pfx) {
                if (spec.base == 16 || !is_zero) {
                        if (buf < end)
                                *buf = '0';
                        ++buf;
                }
                if (spec.base == 16) {
                        if (buf < end)
                                *buf = ('X' | locase);
                        ++buf;
                }
        }
        /* zero or space padding */
        if (!(spec.flags & LEFT)) {
                char c = ' ' + (spec.flags & ZEROPAD);

                while (--field_width >= 0) {
                        if (buf < end)
                                *buf = c;
                        ++buf;
                }
        }
        /* hmm even more zero padding? */
        while (i <= --precision) {
                if (buf < end)
                        *buf = '0';
                ++buf;
        }
        /* actual digits of result */
        while (--i >= 0) {
                if (buf < end)
                        *buf = tmp[i];
                ++buf;
        }
        /* trailing space padding */
        while (--field_width >= 0) {
                if (buf < end)
                        *buf = ' ';
                ++buf;
        }

        return buf;
}

static noinline_for_stack
char *special_hex_number(char *buf, char *end, unsigned long long num, int size)
{
        struct printf_spec spec;

        spec.type = FORMAT_TYPE_PTR;
        spec.field_width = 2 + 2 * size;        /* 0x + hex */
        spec.flags = SPECIAL | SMALL | ZEROPAD;
        spec.base = 16;
        spec.precision = -1;

        return number(buf, end, num, spec);
}

static void move_right(char *buf, char *end, unsigned len, unsigned spaces)
{
        size_t size;
        if (buf >= end)        /* nowhere to put anything */
                return;
        size = end - buf;
        if (size <= spaces) {
                memset(buf, ' ', size);
                return;
        }
        if (len) {
                if (len > size - spaces)
                        len = size - spaces;
                memmove(buf + spaces, buf, len);
        }
        memset(buf, ' ', spaces);
}

/*
 * Handle field width padding for a string.
 * @buf: current buffer position
 * @n: length of string
 * @end: end of output buffer
 * @spec: for field width and flags
 * Returns: new buffer position after padding.
 */
static noinline_for_stack
char *widen_string(char *buf, int n, char *end, struct printf_spec spec)
{
        unsigned spaces;

        if (likely(n >= spec.field_width))
                return buf;
        /* we want to pad the sucker */
        spaces = spec.field_width - n;
        if (!(spec.flags & LEFT)) {
                move_right(buf - n, end, n, spaces);
                return buf + spaces;
        }
        while (spaces--) {
                if (buf < end)
                        *buf = ' ';
                ++buf;
        }
        return buf;
}

/* Handle string from a well known address. */
static char *string_nocheck(char *buf, char *end, const char *s,
                            struct printf_spec spec)
{
        int len = 0;
        int lim = spec.precision;

        while (lim--) {
                char c = *s++;
                if (!c)
                        break;
                if (buf < end)
                        *buf = c;
                ++buf;
                ++len;
        }
        return widen_string(buf, len, end, spec);
}

static char *err_ptr(char *buf, char *end, void *ptr,
                     struct printf_spec spec)
{
        int err = PTR_ERR(ptr);
        const char *sym = errname(err);

        if (sym)
                return string_nocheck(buf, end, sym, spec);

        /*
         * Somebody passed ERR_PTR(-1234) or some other non-existing
         * Efoo - or perhaps CONFIG_SYMBOLIC_ERRNAME=n. Fall back to
         * printing it as its decimal representation.
         */
        spec.flags |= SIGN;
        spec.base = 10;
        return number(buf, end, err, spec);
}

/* Be careful: error messages must fit into the given buffer. */
static char *error_string(char *buf, char *end, const char *s,
                          struct printf_spec spec)
{
        /*
         * Hard limit to avoid a completely insane messages. It actually
         * works pretty well because most error messages are in
         * the many pointer format modifiers.
         */
        if (spec.precision == -1)
                spec.precision = 2 * sizeof(void *);

        return string_nocheck(buf, end, s, spec);
}

/*
 * Do not call any complex external code here. Nested printk()/vsprintf()
 * might cause infinite loops. Failures might break printk() and would
 * be hard to debug.
 */
static const char *check_pointer_msg(const void *ptr)
{
        if (!ptr)
                return "(null)";

        if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
                return "(efault)";

        return NULL;
}

static int check_pointer(char **buf, char *end, const void *ptr,
                         struct printf_spec spec)
{
        const char *err_msg;

        err_msg = check_pointer_msg(ptr);
        if (err_msg) {
                *buf = error_string(*buf, end, err_msg, spec);
                return -EFAULT;
        }

        return 0;
}

static noinline_for_stack
char *string(char *buf, char *end, const char *s,
             struct printf_spec spec)
{
        if (check_pointer(&buf, end, s, spec))
                return buf;

        return string_nocheck(buf, end, s, spec);
}

static char *pointer_string(char *buf, char *end,
                            const void *ptr,
                            struct printf_spec spec)
{
        spec.base = 16;
        spec.flags |= SMALL;
        if (spec.field_width == -1) {
                spec.field_width = 2 * sizeof(ptr);
                spec.flags |= ZEROPAD;
        }

        return number(buf, end, (unsigned long int)ptr, spec);
}

/* Make pointers available for printing early in the boot sequence. */
static int debug_boot_weak_hash __ro_after_init;

static int __init debug_boot_weak_hash_enable(char *str)
{
        debug_boot_weak_hash = 1;
        pr_info("debug_boot_weak_hash enabled\n");
        return 0;
}
early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);

static bool filled_random_ptr_key __read_mostly;
static siphash_key_t ptr_key __read_mostly;

static int fill_ptr_key(struct notifier_block *nb, unsigned long action, void *data)
{
        get_random_bytes(&ptr_key, sizeof(ptr_key));

        /* Pairs with smp_rmb() before reading ptr_key. */
        smp_wmb();
        WRITE_ONCE(filled_random_ptr_key, true);
        return NOTIFY_DONE;
}

static int __init vsprintf_init_hashval(void)
{
        static struct notifier_block fill_ptr_key_nb = { .notifier_call = fill_ptr_key };
        execute_with_initialized_rng(&fill_ptr_key_nb);
        return 0;
}
subsys_initcall(vsprintf_init_hashval)

/* Maps a pointer to a 32 bit unique identifier. */
static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
{
        unsigned long hashval;

        if (!READ_ONCE(filled_random_ptr_key))
                return -EBUSY;

        /* Pairs with smp_wmb() after writing ptr_key. */
        smp_rmb();

#ifdef CONFIG_64BIT
        hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
        /*
         * Mask off the first 32 bits, this makes explicit that we have
         * modified the address (and 32 bits is plenty for a unique ID).
         */
        hashval = hashval & 0xffffffff;
#else
        hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
#endif
        *hashval_out = hashval;
        return 0;
}

int ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
{
        return __ptr_to_hashval(ptr, hashval_out);
}

static char *ptr_to_id(char *buf, char *end, const void *ptr,
                       struct printf_spec spec)
{
        const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
        unsigned long hashval;
        int ret;

        /*
         * Print the real pointer value for NULL and error pointers,
         * as they are not actual addresses.
         */
        if (IS_ERR_OR_NULL(ptr))
                return pointer_string(buf, end, ptr, spec);

        /* When debugging early boot use non-cryptographically secure hash. */
        if (unlikely(debug_boot_weak_hash)) {
                hashval = hash_long((unsigned long)ptr, 32);
                return pointer_string(buf, end, (const void *)hashval, spec);
        }

        ret = __ptr_to_hashval(ptr, &hashval);
        if (ret) {
                spec.field_width = 2 * sizeof(ptr);
                /* string length must be less than default_width */
                return error_string(buf, end, str, spec);
        }

        return pointer_string(buf, end, (const void *)hashval, spec);
}

static char *default_pointer(char *buf, char *end, const void *ptr,
                             struct printf_spec spec)
{
        /*
         * default is to _not_ leak addresses, so hash before printing,
         * unless no_hash_pointers is specified on the command line.
         */
        if (unlikely(no_hash_pointers))
                return pointer_string(buf, end, ptr, spec);

        return ptr_to_id(buf, end, ptr, spec);
}

int kptr_restrict __read_mostly;

static noinline_for_stack
char *restricted_pointer(char *buf, char *end, const void *ptr,
                         struct printf_spec spec)
{
        switch (kptr_restrict) {
        case 0:
                /* Handle as %p, hash and do _not_ leak addresses. */
                return default_pointer(buf, end, ptr, spec);
        case 1: {
                const struct cred *cred;

                /*
                 * kptr_restrict==1 cannot be used in IRQ context
                 * because its test for CAP_SYSLOG would be meaningless.
                 */
                if (in_hardirq() || in_serving_softirq() || in_nmi()) {
                        if (spec.field_width == -1)
                                spec.field_width = 2 * sizeof(ptr);
                        return error_string(buf, end, "pK-error", spec);
                }

                /*
                 * Only print the real pointer value if the current
                 * process has CAP_SYSLOG and is running with the
                 * same credentials it started with. This is because
                 * access to files is checked at open() time, but %pK
                 * checks permission at read() time. We don't want to
                 * leak pointer values if a binary opens a file using
                 * %pK and then elevates privileges before reading it.
                 */
                cred = current_cred();
                if (!has_capability_noaudit(current, CAP_SYSLOG) ||
                    !uid_eq(cred->euid, cred->uid) ||
                    !gid_eq(cred->egid, cred->gid))
                        ptr = NULL;
                break;
        }
        case 2:
        default:
                /* Always print 0's for %pK */
                ptr = NULL;
                break;
        }

        return pointer_string(buf, end, ptr, spec);
}

static noinline_for_stack
char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec,
                  const char *fmt)
{
        const char *array[4], *s;
        const struct dentry *p;
        int depth;
        int i, n;

        switch (fmt[1]) {
                case '2': case '3': case '4':
                        depth = fmt[1] - '0';
                        break;
                default:
                        depth = 1;
        }

        rcu_read_lock();
        for (i = 0; i < depth; i++, d = p) {
                if (check_pointer(&buf, end, d, spec)) {
                        rcu_read_unlock();
                        return buf;
                }

                p = READ_ONCE(d->d_parent);
                array[i] = READ_ONCE(d->d_name.name);
                if (p == d) {
                        if (i)
                                array[i] = "";
                        i++;
                        break;
                }
        }
        s = array[--i];
        for (n = 0; n != spec.precision; n++, buf++) {
                char c = *s++;
                if (!c) {
                        if (!i)
                                break;
                        c = '/';
                        s = array[--i];
                }
                if (buf < end)
                        *buf = c;
        }
        rcu_read_unlock();
        return widen_string(buf, n, end, spec);
}

static noinline_for_stack
char *file_dentry_name(char *buf, char *end, const struct file *f,
                        struct printf_spec spec, const char *fmt)
{
        if (check_pointer(&buf, end, f, spec))
                return buf;

        return dentry_name(buf, end, f->f_path.dentry, spec, fmt);
}
#ifdef CONFIG_BLOCK
static noinline_for_stack
char *bdev_name(char *buf, char *end, struct block_device *bdev,
                struct printf_spec spec, const char *fmt)
{
        struct gendisk *hd;

        if (check_pointer(&buf, end, bdev, spec))
                return buf;

        hd = bdev->bd_disk;
        buf = string(buf, end, hd->disk_name, spec);
        if (bdev->bd_partno) {
                if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
                        if (buf < end)
                                *buf = 'p';
                        buf++;
                }
                buf = number(buf, end, bdev->bd_partno, spec);
        }
        return buf;
}
#endif

static noinline_for_stack
char *symbol_string(char *buf, char *end, void *ptr,
                    struct printf_spec spec, const char *fmt)
{
        unsigned long value;
#ifdef CONFIG_KALLSYMS
        char sym[KSYM_SYMBOL_LEN];
#endif

        if (fmt[1] == 'R')
                ptr = __builtin_extract_return_addr(ptr);
        value = (unsigned long)ptr;

#ifdef CONFIG_KALLSYMS
        if (*fmt == 'B' && fmt[1] == 'b')
                sprint_backtrace_build_id(sym, value);
        else if (*fmt == 'B')
                sprint_backtrace(sym, value);
        else if (*fmt == 'S' && (fmt[1] == 'b' || (fmt[1] == 'R' && fmt[2] == 'b')))
                sprint_symbol_build_id(sym, value);
        else if (*fmt != 's')
                sprint_symbol(sym, value);
        else
                sprint_symbol_no_offset(sym, value);

        return string_nocheck(buf, end, sym, spec);
#else
        return special_hex_number(buf, end, value, sizeof(void *));
#endif
}

static const struct printf_spec default_str_spec = {
        .field_width = -1,
        .precision = -1,
};

static const struct printf_spec default_flag_spec = {
        .base = 16,
        .precision = -1,
        .flags = SPECIAL | SMALL,
};

static const struct printf_spec default_dec_spec = {
        .base = 10,
        .precision = -1,
};

static const struct printf_spec default_dec02_spec = {
        .base = 10,
        .field_width = 2,
        .precision = -1,
        .flags = ZEROPAD,
};

static const struct printf_spec default_dec04_spec = {
        .base = 10,
        .field_width = 4,
        .precision = -1,
        .flags = ZEROPAD,
};

static noinline_for_stack
char *resource_string(char *buf, char *end, struct resource *res,
                      struct printf_spec spec, const char *fmt)
{
#ifndef IO_RSRC_PRINTK_SIZE
#define IO_RSRC_PRINTK_SIZE        6
#endif

#ifndef MEM_RSRC_PRINTK_SIZE
#define MEM_RSRC_PRINTK_SIZE        10
#endif
        static const struct printf_spec io_spec = {
                .base = 16,
                .field_width = IO_RSRC_PRINTK_SIZE,
                .precision = -1,
                .flags = SPECIAL | SMALL | ZEROPAD,
        };
        static const struct printf_spec mem_spec = {
                .base = 16,
                .field_width = MEM_RSRC_PRINTK_SIZE,
                .precision = -1,
                .flags = SPECIAL | SMALL | ZEROPAD,
        };
        static const struct printf_spec bus_spec = {
                .base = 16,
                .field_width = 2,
                .precision = -1,
                .flags = SMALL | ZEROPAD,
        };
        static const struct printf_spec str_spec = {
                .field_width = -1,
                .precision = 10,
                .flags = LEFT,
        };

        /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
         * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
#define RSRC_BUF_SIZE                ((2 * sizeof(resource_size_t)) + 4)
#define FLAG_BUF_SIZE                (2 * sizeof(res->flags))
#define DECODED_BUF_SIZE        sizeof("[mem - 64bit pref window disabled]")
#define RAW_BUF_SIZE                sizeof("[mem - flags 0x]")
        char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
                     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];

        char *p = sym, *pend = sym + sizeof(sym);
        int decode = (fmt[0] == 'R') ? 1 : 0;
        const struct printf_spec *specp;

        if (check_pointer(&buf, end, res, spec))
                return buf;

        *p++ = '[';
        if (res->flags & IORESOURCE_IO) {
                p = string_nocheck(p, pend, "io  ", str_spec);
                specp = &io_spec;
        } else if (res->flags & IORESOURCE_MEM) {
                p = string_nocheck(p, pend, "mem ", str_spec);
                specp = &mem_spec;
        } else if (res->flags & IORESOURCE_IRQ) {
                p = string_nocheck(p, pend, "irq ", str_spec);
                specp = &default_dec_spec;
        } else if (res->flags & IORESOURCE_DMA) {
                p = string_nocheck(p, pend, "dma ", str_spec);
                specp = &default_dec_spec;
        } else if (res->flags & IORESOURCE_BUS) {
                p = string_nocheck(p, pend, "bus ", str_spec);
                specp = &bus_spec;
        } else {
                p = string_nocheck(p, pend, "??? ", str_spec);
                specp = &mem_spec;
                decode = 0;
        }
        if (decode && res->flags & IORESOURCE_UNSET) {
                p = string_nocheck(p, pend, "size ", str_spec);
                p = number(p, pend, resource_size(res), *specp);
        } else {
                p = number(p, pend, res->start, *specp);
                if (res->start != res->end) {
                        *p++ = '-';
                        p = number(p, pend, res->end, *specp);
                }
        }
        if (decode) {
                if (res->flags & IORESOURCE_MEM_64)
                        p = string_nocheck(p, pend, " 64bit", str_spec);
                if (res->flags & IORESOURCE_PREFETCH)
                        p = string_nocheck(p, pend, " pref", str_spec);
                if (res->flags & IORESOURCE_WINDOW)
                        p = string_nocheck(p, pend, " window", str_spec);
                if (res->flags & IORESOURCE_DISABLED)
                        p = string_nocheck(p, pend, " disabled", str_spec);
        } else {
                p = string_nocheck(p, pend, " flags ", str_spec);
                p = number(p, pend, res->flags, default_flag_spec);
        }
        *p++ = ']';
        *p = '\0';

        return string_nocheck(buf, end, sym, spec);
}

static noinline_for_stack
char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
                 const char *fmt)
{
        int i, len = 1;                /* if we pass '%ph[CDN]', field width remains
                                   negative value, fallback to the default */
        char separator;

        if (spec.field_width == 0)
                /* nothing to print */
                return buf;

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        switch (fmt[1]) {
        case 'C':
                separator = ':';
                break;
        case 'D':
                separator = '-';
                break;
        case 'N':
                separator = 0;
                break;
        default:
                separator = ' ';
                break;
        }

        if (spec.field_width > 0)
                len = min_t(int, spec.field_width, 64);

        for (i = 0; i < len; ++i) {
                if (buf < end)
                        *buf = hex_asc_hi(addr[i]);
                ++buf;
                if (buf < end)
                        *buf = hex_asc_lo(addr[i]);
                ++buf;

                if (separator && i != len - 1) {
                        if (buf < end)
                                *buf = separator;
                        ++buf;
                }
        }

        return buf;
}

static noinline_for_stack
char *bitmap_string(char *buf, char *end, const unsigned long *bitmap,
                    struct printf_spec spec, const char *fmt)
{
        const int CHUNKSZ = 32;
        int nr_bits = max_t(int, spec.field_width, 0);
        int i, chunksz;
        bool first = true;

        if (check_pointer(&buf, end, bitmap, spec))
                return buf;

        /* reused to print numbers */
        spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 };

        chunksz = nr_bits & (CHUNKSZ - 1);
        if (chunksz == 0)
                chunksz = CHUNKSZ;

        i = ALIGN(nr_bits, CHUNKSZ) - CHUNKSZ;
        for (; i >= 0; i -= CHUNKSZ) {
                u32 chunkmask, val;
                int word, bit;

                chunkmask = ((1ULL << chunksz) - 1);
                word = i / BITS_PER_LONG;
                bit = i % BITS_PER_LONG;
                val = (bitmap[word] >> bit) & chunkmask;

                if (!first) {
                        if (buf < end)
                                *buf = ',';
                        buf++;
                }
                first = false;

                spec.field_width = DIV_ROUND_UP(chunksz, 4);
                buf = number(buf, end, val, spec);

                chunksz = CHUNKSZ;
        }
        return buf;
}

static noinline_for_stack
char *bitmap_list_string(char *buf, char *end, const unsigned long *bitmap,
                         struct printf_spec spec, const char *fmt)
{
        int nr_bits = max_t(int, spec.field_width, 0);
        bool first = true;
        int rbot, rtop;

        if (check_pointer(&buf, end, bitmap, spec))
                return buf;

        for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) {
                if (!first) {
                        if (buf < end)
                                *buf = ',';
                        buf++;
                }
                first = false;

                buf = number(buf, end, rbot, default_dec_spec);
                if (rtop == rbot + 1)
                        continue;

                if (buf < end)
                        *buf = '-';
                buf = number(++buf, end, rtop - 1, default_dec_spec);
        }
        return buf;
}

static noinline_for_stack
char *mac_address_string(char *buf, char *end, u8 *addr,
                         struct printf_spec spec, const char *fmt)
{
        char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
        char *p = mac_addr;
        int i;
        char separator;
        bool reversed = false;

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        switch (fmt[1]) {
        case 'F':
                separator = '-';
                break;

        case 'R':
                reversed = true;
                fallthrough;

        default:
                separator = ':';
                break;
        }

        for (i = 0; i < 6; i++) {
                if (reversed)
                        p = hex_byte_pack(p, addr[5 - i]);
                else
                        p = hex_byte_pack(p, addr[i]);

                if (fmt[0] == 'M' && i != 5)
                        *p++ = separator;
        }
        *p = '\0';

        return string_nocheck(buf, end, mac_addr, spec);
}

static noinline_for_stack
char *ip4_string(char *p, const u8 *addr, const char *fmt)
{
        int i;
        bool leading_zeros = (fmt[0] == 'i');
        int index;
        int step;

        switch (fmt[2]) {
        case 'h':
#ifdef __BIG_ENDIAN
                index = 0;
                step = 1;
#else
                index = 3;
                step = -1;
#endif
                break;
        case 'l':
                index = 3;
                step = -1;
                break;
        case 'n':
        case 'b':
        default:
                index = 0;
                step = 1;
                break;
        }
        for (i = 0; i < 4; i++) {
                char temp[4] __aligned(2);        /* hold each IP quad in reverse order */
                int digits = put_dec_trunc8(temp, addr[index]) - temp;
                if (leading_zeros) {
                        if (digits < 3)
                                *p++ = '0';
                        if (digits < 2)
                                *p++ = '0';
                }
                /* reverse the digits in the quad */
                while (digits--)
                        *p++ = temp[digits];
                if (i < 3)
                        *p++ = '.';
                index += step;
        }
        *p = '\0';

        return p;
}

static noinline_for_stack
char *ip6_compressed_string(char *p, const char *addr)
{
        int i, j, range;
        unsigned char zerolength[8];
        int longest = 1;
        int colonpos = -1;
        u16 word;
        u8 hi, lo;
        bool needcolon = false;
        bool useIPv4;
        struct in6_addr in6;

        memcpy(&in6, addr, sizeof(struct in6_addr));

        useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);

        memset(zerolength, 0, sizeof(zerolength));

        if (useIPv4)
                range = 6;
        else
                range = 8;

        /* find position of longest 0 run */
        for (i = 0; i < range; i++) {
                for (j = i; j < range; j++) {
                        if (in6.s6_addr16[j] != 0)
                                break;
                        zerolength[i]++;
                }
        }
        for (i = 0; i < range; i++) {
                if (zerolength[i] > longest) {
                        longest = zerolength[i];
                        colonpos = i;
                }
        }
        if (longest == 1)                /* don't compress a single 0 */
                colonpos = -1;

        /* emit address */
        for (i = 0; i < range; i++) {
                if (i == colonpos) {
                        if (needcolon || i == 0)
                                *p++ = ':';
                        *p++ = ':';
                        needcolon = false;
                        i += longest - 1;
                        continue;
                }
                if (needcolon) {
                        *p++ = ':';
                        needcolon = false;
                }
                /* hex u16 without leading 0s */
                word = ntohs(in6.s6_addr16[i]);
                hi = word >> 8;
                lo = word & 0xff;
                if (hi) {
                        if (hi > 0x0f)
                                p = hex_byte_pack(p, hi);
                        else
                                *p++ = hex_asc_lo(hi);
                        p = hex_byte_pack(p, lo);
                }
                else if (lo > 0x0f)
                        p = hex_byte_pack(p, lo);
                else
                        *p++ = hex_asc_lo(lo);
                needcolon = true;
        }

        if (useIPv4) {
                if (needcolon)
                        *p++ = ':';
                p = ip4_string(p, &in6.s6_addr[12], "I4");
        }
        *p = '\0';

        return p;
}

static noinline_for_stack
char *ip6_string(char *p, const char *addr, const char *fmt)
{
        int i;

        for (i = 0; i < 8; i++) {
                p = hex_byte_pack(p, *addr++);
                p = hex_byte_pack(p, *addr++);
                if (fmt[0] == 'I' && i != 7)
                        *p++ = ':';
        }
        *p = '\0';

        return p;
}

static noinline_for_stack
char *ip6_addr_string(char *buf, char *end, const u8 *addr,
                      struct printf_spec spec, const char *fmt)
{
        char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];

        if (fmt[0] == 'I' && fmt[2] == 'c')
                ip6_compressed_string(ip6_addr, addr);
        else
                ip6_string(ip6_addr, addr, fmt);

        return string_nocheck(buf, end, ip6_addr, spec);
}

static noinline_for_stack
char *ip4_addr_string(char *buf, char *end, const u8 *addr,
                      struct printf_spec spec, const char *fmt)
{
        char ip4_addr[sizeof("255.255.255.255")];

        ip4_string(ip4_addr, addr, fmt);

        return string_nocheck(buf, end, ip4_addr, spec);
}

static noinline_for_stack
char *ip6_addr_string_sa(char *buf, char *end, const struct sockaddr_in6 *sa,
                         struct printf_spec spec, const char *fmt)
{
        bool have_p = false, have_s = false, have_f = false, have_c = false;
        char ip6_addr[sizeof("[xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255]") +
                      sizeof(":12345") + sizeof("/123456789") +
                      sizeof("%1234567890")];
        char *p = ip6_addr, *pend = ip6_addr + sizeof(ip6_addr);
        const u8 *addr = (const u8 *) &sa->sin6_addr;
        char fmt6[2] = { fmt[0], '6' };
        u8 off = 0;

        fmt++;
        while (isalpha(*++fmt)) {
                switch (*fmt) {
                case 'p':
                        have_p = true;
                        break;
                case 'f':
                        have_f = true;
                        break;
                case 's':
                        have_s = true;
                        break;
                case 'c':
                        have_c = true;
                        break;
                }
        }

        if (have_p || have_s || have_f) {
                *p = '[';
                off = 1;
        }

        if (fmt6[0] == 'I' && have_c)
                p = ip6_compressed_string(ip6_addr + off, addr);
        else
                p = ip6_string(ip6_addr + off, addr, fmt6);

        if (have_p || have_s || have_f)
                *p++ = ']';

        if (have_p) {
                *p++ = ':';
                p = number(p, pend, ntohs(sa->sin6_port), spec);
        }
        if (have_f) {
                *p++ = '/';
                p = number(p, pend, ntohl(sa->sin6_flowinfo &
                                          IPV6_FLOWINFO_MASK), spec);
        }
        if (have_s) {
                *p++ = '%';
                p = number(p, pend, sa->sin6_scope_id, spec);
        }
        *p = '\0';

        return string_nocheck(buf, end, ip6_addr, spec);
}

static noinline_for_stack
char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa,
                         struct printf_spec spec, const char *fmt)
{
        bool have_p = false;
        char *p, ip4_addr[sizeof("255.255.255.255") + sizeof(":12345")];
        char *pend = ip4_addr + sizeof(ip4_addr);
        const u8 *addr = (const u8 *) &sa->sin_addr.s_addr;
        char fmt4[3] = { fmt[0], '4', 0 };

        fmt++;
        while (isalpha(*++fmt)) {
                switch (*fmt) {
                case 'p':
                        have_p = true;
                        break;
                case 'h':
                case 'l':
                case 'n':
                case 'b':
                        fmt4[2] = *fmt;
                        break;
                }
        }

        p = ip4_string(ip4_addr, addr, fmt4);
        if (have_p) {
                *p++ = ':';
                p = number(p, pend, ntohs(sa->sin_port), spec);
        }
        *p = '\0';

        return string_nocheck(buf, end, ip4_addr, spec);
}

static noinline_for_stack
char *ip_addr_string(char *buf, char *end, const void *ptr,
                     struct printf_spec spec, const char *fmt)
{
        char *err_fmt_msg;

        if (check_pointer(&buf, end, ptr, spec))
                return buf;

        switch (fmt[1]) {
        case '6':
                return ip6_addr_string(buf, end, ptr, spec, fmt);
        case '4':
                return ip4_addr_string(buf, end, ptr, spec, fmt);
        case 'S': {
                const union {
                        struct sockaddr                raw;
                        struct sockaddr_in        v4;
                        struct sockaddr_in6        v6;
                } *sa = ptr;

                switch (sa->raw.sa_family) {
                case AF_INET:
                        return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt);
                case AF_INET6:
                        return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
                default:
                        return error_string(buf, end, "(einval)", spec);
                }}
        }

        err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)";
        return error_string(buf, end, err_fmt_msg, spec);
}

static noinline_for_stack
char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
                     const char *fmt)
{
        bool found = true;
        int count = 1;
        unsigned int flags = 0;
        int len;

        if (spec.field_width == 0)
                return buf;                                /* nothing to print */

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        do {
                switch (fmt[count++]) {
                case 'a':
                        flags |= ESCAPE_ANY;
                        break;
                case 'c':
                        flags |= ESCAPE_SPECIAL;
                        break;
                case 'h':
                        flags |= ESCAPE_HEX;
                        break;
                case 'n':
                        flags |= ESCAPE_NULL;
                        break;
                case 'o':
                        flags |= ESCAPE_OCTAL;
                        break;
                case 'p':
                        flags |= ESCAPE_NP;
                        break;
                case 's':
                        flags |= ESCAPE_SPACE;
                        break;
                default:
                        found = false;
                        break;
                }
        } while (found);

        if (!flags)
                flags = ESCAPE_ANY_NP;

        len = spec.field_width < 0 ? 1 : spec.field_width;

        /*
         * string_escape_mem() writes as many characters as it can to
         * the given buffer, and returns the total size of the output
         * had the buffer been big enough.
         */
        buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL);

        return buf;
}

static char *va_format(char *buf, char *end, struct va_format *va_fmt,
                       struct printf_spec spec, const char *fmt)
{
        va_list va;

        if (check_pointer(&buf, end, va_fmt, spec))
                return buf;

        va_copy(va, *va_fmt->va);
        buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va);
        va_end(va);

        return buf;
}

static noinline_for_stack
char *uuid_string(char *buf, char *end, const u8 *addr,
                  struct printf_spec spec, const char *fmt)
{
        char uuid[UUID_STRING_LEN + 1];
        char *p = uuid;
        int i;
        const u8 *index = uuid_index;
        bool uc = false;

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        switch (*(++fmt)) {
        case 'L':
                uc = true;
                fallthrough;
        case 'l':
                index = guid_index;
                break;
        case 'B':
                uc = true;
                break;
        }

        for (i = 0; i < 16; i++) {
                if (uc)
                        p = hex_byte_pack_upper(p, addr[index[i]]);
                else
                        p = hex_byte_pack(p, addr[index[i]]);
                switch (i) {
                case 3:
                case 5:
                case 7:
                case 9:
                        *p++ = '-';
                        break;
                }
        }

        *p = 0;

        return string_nocheck(buf, end, uuid, spec);
}

static noinline_for_stack
char *netdev_bits(char *buf, char *end, const void *addr,
                  struct printf_spec spec,  const char *fmt)
{
        unsigned long long num;
        int size;

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        switch (fmt[1]) {
        case 'F':
                num = *(const netdev_features_t *)addr;
                size = sizeof(netdev_features_t);
                break;
        default:
                return error_string(buf, end, "(%pN?)", spec);
        }

        return special_hex_number(buf, end, num, size);
}

static noinline_for_stack
char *fourcc_string(char *buf, char *end, const u32 *fourcc,
                    struct printf_spec spec, const char *fmt)
{
        char output[sizeof("0123 little-endian (0x01234567)")];
        char *p = output;
        unsigned int i;
        u32 orig, val;

        if (fmt[1] != 'c' || fmt[2] != 'c')
                return error_string(buf, end, "(%p4?)", spec);

        if (check_pointer(&buf, end, fourcc, spec))
                return buf;

        orig = get_unaligned(fourcc);
        val = orig & ~BIT(31);

        for (i = 0; i < sizeof(u32); i++) {
                unsigned char c = val >> (i * 8);

                /* Print non-control ASCII characters as-is, dot otherwise */
                *p++ = isascii(c) && isprint(c) ? c : '.';
        }

        *p++ = ' ';
        strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian");
        p += strlen(p);

        *p++ = ' ';
        *p++ = '(';
        p = special_hex_number(p, output + sizeof(output) - 2, orig, sizeof(u32));
        *p++ = ')';
        *p = '\0';

        return string(buf, end, output, spec);
}

static noinline_for_stack
char *address_val(char *buf, char *end, const void *addr,
                  struct printf_spec spec, const char *fmt)
{
        unsigned long long num;
        int size;

        if (check_pointer(&buf, end, addr, spec))
                return buf;

        switch (fmt[1]) {
        case 'd':
                num = *(const dma_addr_t *)addr;
                size = sizeof(dma_addr_t);
                break;
        case 'p':
        default:
                num = *(const phys_addr_t *)addr;
                size = sizeof(phys_addr_t);
                break;
        }

        return special_hex_number(buf, end, num, size);
}

static noinline_for_stack
char *date_str(char *buf, char *end, const struct rtc_time *tm, bool r)
{
        int year = tm->tm_year + (r ? 0 : 1900);
        int mon = tm->tm_mon + (r ? 0 : 1);

        buf = number(buf, end, year, default_dec04_spec);
        if (buf < end)
                *buf = '-';
        buf++;

        buf = number(buf, end, mon, default_dec02_spec);
        if (buf < end)
                *buf = '-';
        buf++;

        return number(buf, end, tm->tm_mday, default_dec02_spec);
}

static noinline_for_stack
char *time_str(char *buf, char *end, const struct rtc_time *tm, bool r)
{
        buf = number(buf, end, tm->tm_hour, default_dec02_spec);
        if (buf < end)
                *buf = ':';
        buf++;

        buf = number(buf, end, tm->tm_min, default_dec02_spec);
        if (buf < end)
                *buf = ':';
        buf++;

        return number(buf, end, tm->tm_sec, default_dec02_spec);
}

static noinline_for_stack
char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
              struct printf_spec spec, const char *fmt)
{
        bool have_t = true, have_d = true;
        bool raw = false, iso8601_separator = true;
        bool found = true;
        int count = 2;

        if (check_pointer(&buf, end, tm, spec))
                return buf;

        switch (fmt[count]) {
        case 'd':
                have_t = false;
                count++;
                break;
        case 't':
                have_d = false;
                count++;
                break;
        }

        do {
                switch (fmt[count++]) {
                case 'r':
                        raw = true;
                        break;
                case 's':
                        iso8601_separator = false;
                        break;
                default:
                        found = false;
                        break;
                }
        } while (found);

        if (have_d)
                buf = date_str(buf, end, tm, raw);
        if (have_d && have_t) {
                if (buf < end)
                        *buf = iso8601_separator ? 'T' : ' ';
                buf++;
        }
        if (have_t)
                buf = time_str(buf, end, tm, raw);

        return buf;
}

static noinline_for_stack
char *time64_str(char *buf, char *end, const time64_t time,
                 struct printf_spec spec, const char *fmt)
{
        struct rtc_time rtc_time;
        struct tm tm;

        time64_to_tm(time, 0, &tm);

        rtc_time.tm_sec = tm.tm_sec;
        rtc_time.tm_min = tm.tm_min;
        rtc_time.tm_hour = tm.tm_hour;
        rtc_time.tm_mday = tm.tm_mday;
        rtc_time.tm_mon = tm.tm_mon;
        rtc_time.tm_year = tm.tm_year;
        rtc_time.tm_wday = tm.tm_wday;
        rtc_time.tm_yday = tm.tm_yday;

        rtc_time.tm_isdst = 0;

        return rtc_str(buf, end, &rtc_time, spec, fmt);
}

static noinline_for_stack
char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec,
                    const char *fmt)
{
        switch (fmt[1]) {
        case 'R':
                return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt);
        case 'T':
                return time64_str(buf, end, *(const time64_t *)ptr, spec, fmt);
        default:
                return error_string(buf, end, "(%pt?)", spec);
        }
}

static noinline_for_stack
char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
            const char *fmt)
{
        if (!IS_ENABLED(CONFIG_HAVE_CLK))
                return error_string(buf, end, "(%pC?)", spec);

        if (check_pointer(&buf, end, clk, spec))
                return buf;

        switch (fmt[1]) {
        case 'n':
        default:
#ifdef CONFIG_COMMON_CLK
                return string(buf, end, __clk_get_name(clk), spec);
#else
                return ptr_to_id(buf, end, clk, spec);
#endif
        }
}

static
char *format_flags(char *buf, char *end, unsigned long flags,
                                        const struct trace_print_flags *names)
{
        unsigned long mask;

        for ( ; flags && names->name; names++) {
                mask = names->mask;
                if ((flags & mask) != mask)
                        continue;

                buf = string(buf, end, names->name, default_str_spec);

                flags &= ~mask;
                if (flags) {
                        if (buf < end)
                                *buf = '|';
                        buf++;
                }
        }

        if (flags)
                buf = number(buf, end, flags, default_flag_spec);

        return buf;
}

struct page_flags_fields {
        int width;
        int shift;
        int mask;
        const struct printf_spec *spec;
        const char *name;
};

static const struct page_flags_fields pff[] = {
        {SECTIONS_WIDTH, SECTIONS_PGSHIFT, SECTIONS_MASK,
         &default_dec_spec, "section"},
        {NODES_WIDTH, NODES_PGSHIFT, NODES_MASK,
         &default_dec_spec, "node"},
        {ZONES_WIDTH, ZONES_PGSHIFT, ZONES_MASK,
         &default_dec_spec, "zone"},
        {LAST_CPUPID_WIDTH, LAST_CPUPID_PGSHIFT, LAST_CPUPID_MASK,
         &default_flag_spec, "lastcpupid"},
        {KASAN_TAG_WIDTH, KASAN_TAG_PGSHIFT, KASAN_TAG_MASK,
         &default_flag_spec, "kasantag"},
};

static
char *format_page_flags(char *buf, char *end, unsigned long flags)
{
        unsigned long main_flags = flags & PAGEFLAGS_MASK;
        bool append = false;
        int i;

        buf = number(buf, end, flags, default_flag_spec);
        if (buf < end)
                *buf = '(';
        buf++;

        /* Page flags from the main area. */
        if (main_flags) {
                buf = format_flags(buf, end, main_flags, pageflag_names);
                append = true;
        }

        /* Page flags from the fields area */
        for (i = 0; i < ARRAY_SIZE(pff); i++) {
                /* Skip undefined fields. */
                if (!pff[i].width)
                        continue;

                /* Format: Flag Name + '=' (equals sign) + Number + '|' (separator) */
                if (append) {
                        if (buf < end)
                                *buf = '|';
                        buf++;
                }

                buf = string(buf, end, pff[i].name, default_str_spec);
                if (buf < end)
                        *buf = '=';
                buf++;
                buf = number(buf, end, (flags >> pff[i].shift) & pff[i].mask,
                             *pff[i].spec);

                append = true;
        }
        if (buf < end)
                *buf = ')';
        buf++;

        return buf;
}

static
char *format_page_type(char *buf, char *end, unsigned int page_type)
{
        buf = number(buf, end, page_type, default_flag_spec);

        if (buf < end)
                *buf = '(';
        buf++;

        if (page_type_has_type(page_type))
                buf = format_flags(buf, end, ~page_type, pagetype_names);

        if (buf < end)
                *buf = ')';
        buf++;

        return buf;
}

static noinline_for_stack
char *flags_string(char *buf, char *end, void *flags_ptr,
                   struct printf_spec spec, const char *fmt)
{
        unsigned long flags;
        const struct trace_print_flags *names;

        if (check_pointer(&buf, end, flags_ptr, spec))
                return buf;

        switch (fmt[1]) {
        case 'p':
                return format_page_flags(buf, end, *(unsigned long *)flags_ptr);
        case 't':
                return format_page_type(buf, end, *(unsigned int *)flags_ptr);
        case 'v':
                flags = *(unsigned long *)flags_ptr;
                names = vmaflag_names;
                break;
        case 'g':
                flags = (__force unsigned long)(*(gfp_t *)flags_ptr);
                names = gfpflag_names;
                break;
        default:
                return error_string(buf, end, "(%pG?)", spec);
        }

        return format_flags(buf, end, flags, names);
}

static noinline_for_stack
char *fwnode_full_name_string(struct fwnode_handle *fwnode, char *buf,
                              char *end)
{
        int depth;

        /* Loop starting from the root node to the current node. */
        for (depth = fwnode_count_parents(fwnode); depth >= 0; depth--) {
                /*
                 * Only get a reference for other nodes (i.e. parent nodes).
                 * fwnode refcount may be 0 here.
                 */
                struct fwnode_handle *__fwnode = depth ?
                        fwnode_get_nth_parent(fwnode, depth) : fwnode;

                buf = string(buf, end, fwnode_get_name_prefix(__fwnode),
                             default_str_spec);
                buf = string(buf, end, fwnode_get_name(__fwnode),
                             default_str_spec);

                if (depth)
                        fwnode_handle_put(__fwnode);
        }

        return buf;
}

static noinline_for_stack
char *device_node_string(char *buf, char *end, struct device_node *dn,
                         struct printf_spec spec, const char *fmt)
{
        char tbuf[sizeof("xxxx") + 1];
        const char *p;
        int ret;
        char *buf_start = buf;
        struct property *prop;
        bool has_mult, pass;

        struct printf_spec str_spec = spec;
        str_spec.field_width = -1;

        if (fmt[0] != 'F')
                return error_string(buf, end, "(%pO?)", spec);

        if (!IS_ENABLED(CONFIG_OF))
                return error_string(buf, end, "(%pOF?)", spec);

        if (check_pointer(&buf, end, dn, spec))
                return buf;

        /* simple case without anything any more format specifiers */
        fmt++;
        if (fmt[0] == '\0' || strcspn(fmt,"fnpPFcC") > 0)
                fmt = "f";

        for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) {
                int precision;
                if (pass) {
                        if (buf < end)
                                *buf = ':';
                        buf++;
                }

                switch (*fmt) {
                case 'f':        /* full_name */
                        buf = fwnode_full_name_string(of_fwnode_handle(dn), buf,
                                                      end);
                        break;
                case 'n':        /* name */
                        p = fwnode_get_name(of_fwnode_handle(dn));
                        precision = str_spec.precision;
                        str_spec.precision = strchrnul(p, '@') - p;
                        buf = string(buf, end, p, str_spec);
                        str_spec.precision = precision;
                        break;
                case 'p':        /* phandle */
                        buf = number(buf, end, (unsigned int)dn->phandle, default_dec_spec);
                        break;
                case 'P':        /* path-spec */
                        p = fwnode_get_name(of_fwnode_handle(dn));
                        if (!p[1])
                                p = "/";
                        buf = string(buf, end, p, str_spec);
                        break;
                case 'F':        /* flags */
                        tbuf[0] = of_node_check_flag(dn, OF_DYNAMIC) ? 'D' : '-';
                        tbuf[1] = of_node_check_flag(dn, OF_DETACHED) ? 'd' : '-';
                        tbuf[2] = of_node_check_flag(dn, OF_POPULATED) ? 'P' : '-';
                        tbuf[3] = of_node_check_flag(dn, OF_POPULATED_BUS) ? 'B' : '-';
                        tbuf[4] = 0;
                        buf = string_nocheck(buf, end, tbuf, str_spec);
                        break;
                case 'c':        /* major compatible string */
                        ret = of_property_read_string(dn, "compatible", &p);
                        if (!ret)
                                buf = string(buf, end, p, str_spec);
                        break;
                case 'C':        /* full compatible string */
                        has_mult = false;
                        of_property_for_each_string(dn, "compatible", prop, p) {
                                if (has_mult)
                                        buf = string_nocheck(buf, end, ",", str_spec);
                                buf = string_nocheck(buf, end, "\"", str_spec);
                                buf = string(buf, end, p, str_spec);
                                buf = string_nocheck(buf, end, "\"", str_spec);

                                has_mult = true;
                        }
                        break;
                default:
                        break;
                }
        }

        return widen_string(buf, buf - buf_start, end, spec);
}

static noinline_for_stack
char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
                    struct printf_spec spec, const char *fmt)
{
        struct printf_spec str_spec = spec;
        char *buf_start = buf;

        str_spec.field_width = -1;

        if (*fmt != 'w')
                return error_string(buf, end, "(%pf?)", spec);

        if (check_pointer(&buf, end, fwnode, spec))
                return buf;

        fmt++;

        switch (*fmt) {
        case 'P':        /* name */
                buf = string(buf, end, fwnode_get_name(fwnode), str_spec);
                break;
        case 'f':        /* full_name */
        default:
                buf = fwnode_full_name_string(fwnode, buf, end);
                break;
        }

        return widen_string(buf, buf - buf_start, end, spec);
}

int __init no_hash_pointers_enable(char *str)
{
        if (no_hash_pointers)
                return 0;

        no_hash_pointers = true;

        pr_warn("**********************************************************\n");
        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
        pr_warn("**                                                      **\n");
        pr_warn("** This system shows unhashed kernel memory addresses   **\n");
        pr_warn("** via the console, logs, and other interfaces. This    **\n");
        pr_warn("** might reduce the security of your system.            **\n");
        pr_warn("**                                                      **\n");
        pr_warn("** If you see this message and you are not debugging    **\n");
        pr_warn("** the kernel, report this immediately to your system   **\n");
        pr_warn("** administrator!                                       **\n");
        pr_warn("**                                                      **\n");
        pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
        pr_warn("**********************************************************\n");

        return 0;
}
early_param("no_hash_pointers", no_hash_pointers_enable);

/* Used for Rust formatting ('%pA'). */
char *rust_fmt_argument(char *buf, char *end, void *ptr);

/*
 * Show a '%p' thing.  A kernel extension is that the '%p' is followed
 * by an extra set of alphanumeric characters that are extended format
 * specifiers.
 *
 * Please update scripts/checkpatch.pl when adding/removing conversion
 * characters.  (Search for "check for vsprintf extension").
 *
 * Right now we handle:
 *
 * - 'S' For symbolic direct pointers (or function descriptors) with offset
 * - 's' For symbolic direct pointers (or function descriptors) without offset
 * - '[Ss]R' as above with __builtin_extract_return_addr() translation
 * - 'S[R]b' as above with module build ID (for use in backtraces)
 * - '[Ff]' %pf and %pF were obsoleted and later removed in favor of
 *            %ps and %pS. Be careful when re-using these specifiers.
 * - 'B' For backtraced symbolic direct pointers with offset
 * - 'Bb' as above with module build ID (for use in backtraces)
 * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
 * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
 * - 'b[l]' For a bitmap, the number of bits is determined by the field
 *       width which must be explicitly specified either as part of the
 *       format string '%32b[l]' or through '%*b[l]', [l] selects
 *       range-list format instead of hex format
 * - 'M' For a 6-byte MAC address, it prints the address in the
 *       usual colon-separated hex notation
 * - 'm' For a 6-byte MAC address, it prints the hex address without colons
 * - 'MF' For a 6-byte MAC FDDI address, it prints the address
 *       with a dash-separated hex notation
 * - '[mM]R' For a 6-byte MAC address, Reverse order (Bluetooth)
 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
 *       IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
 *       IPv6 uses colon separated network-order 16 bit hex with leading 0's
 *       [S][pfs]
 *       Generic IPv4/IPv6 address (struct sockaddr *) that falls back to
 *       [4] or [6] and is able to print port [p], flowinfo [f], scope [s]
 * - 'i' [46] for 'raw' IPv4/IPv6 addresses
 *       IPv6 omits the colons (01020304...0f)
 *       IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
 *       [S][pfs]
 *       Generic IPv4/IPv6 address (struct sockaddr *) that falls back to
 *       [4] or [6] and is able to print port [p], flowinfo [f], scope [s]
 * - '[Ii][4S][hnbl]' IPv4 addresses in host, network, big or little endian order
 * - 'I[6S]c' for IPv6 addresses printed as specified by
 *       https://tools.ietf.org/html/rfc5952
 * - 'E[achnops]' For an escaped buffer, where rules are defined by combination
 *                of the following flags (see string_escape_mem() for the
 *                details):
 *                  a - ESCAPE_ANY
 *                  c - ESCAPE_SPECIAL
 *                  h - ESCAPE_HEX
 *                  n - ESCAPE_NULL
 *                  o - ESCAPE_OCTAL
 *                  p - ESCAPE_NP
 *                  s - ESCAPE_SPACE
 *                By default ESCAPE_ANY_NP is used.
 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
 *       "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
 *       Options for %pU are:
 *         b big endian lower case hex (default)
 *         B big endian UPPER case hex
 *         l little endian lower case hex
 *         L little endian UPPER case hex
 *           big endian output byte order is:
 *             [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
 *           little endian output byte order is:
 *             [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
 * - 'V' For a struct va_format which contains a format string * and va_list *,
 *       call vsnprintf(->format, *->va_list).
 *       Implements a "recursive vsnprintf".
 *       Do not use this feature without some mechanism to verify the
 *       correctness of the format string and va_list arguments.
 * - 'K' For a kernel pointer that should be hidden from unprivileged users.
 *       Use only for procfs, sysfs and similar files, not printk(); please
 *       read the documentation (path below) first.
 * - 'NF' For a netdev_features_t
 * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value.
 * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with
 *            a certain separator (' ' by default):
 *              C colon
 *              D dash
 *              N no separator
 *            The maximum supported length is 64 bytes of the input. Consider
 *            to use print_hex_dump() for the larger input.
 * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives
 *           (default assumed to be phys_addr_t, passed by reference)
 * - 'd[234]' For a dentry name (optionally 2-4 last components)
 * - 'D[234]' Same as 'd' but for a struct file
 * - 'g' For block_device name (gendisk + partition number)
 * - 't[RT][dt][r][s]' For time and date as represented by:
 *      R    struct rtc_time
 *      T    time64_t
 * - 'C' For a clock, it prints the name (Common Clock Framework) or address
 *       (legacy clock framework) of the clock
 * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
 *        (legacy clock framework) of the clock
 * - 'G' For flags to be printed as a collection of symbolic strings that would
 *       construct the specific value. Supported flags given by option:
 *       p page flags (see struct page) given as pointer to unsigned long
 *       g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t
 *       v vma flags (VM_*) given as pointer to unsigned long
 * - 'OF[fnpPcCF]'  For a device tree object
 *                  Without any optional arguments prints the full_name
 *                  f device node full_name
 *                  n device node name
 *                  p device node phandle
 *                  P device node path spec (name + @unit)
 *                  F device node flags
 *                  c major compatible string
 *                  C full compatible string
 * - 'fw[fP]'        For a firmware node (struct fwnode_handle) pointer
 *                Without an option prints the full name of the node
 *                f full name
 *                P node name, including a possible unit address
 * - 'x' For printing the address unmodified. Equivalent to "%lx".
 *       Please read the documentation (path below) before using!
 * - '[ku]s' For a BPF/tracing related format specifier, e.g. used out of
 *           bpf_trace_printk() where [ku] prefix specifies either kernel (k)
 *           or user (u) memory to probe, and:
 *              s a string, equivalent to "%s" on direct vsnprintf() use
 *
 * ** When making changes please also update:
 *        Documentation/core-api/printk-formats.rst
 *
 * Note: The default behaviour (unadorned %p) is to hash the address,
 * rendering it useful as a unique identifier.
 *
 * There is also a '%pA' format specifier, but it is only intended to be used
 * from Rust code to format core::fmt::Arguments. Do *not* use it from C.
 * See rust/kernel/print.rs for details.
 */
static noinline_for_stack
char *pointer(const char *fmt, char *buf, char *end, void *ptr,
              struct printf_spec spec)
{
        switch (*fmt) {
        case 'S':
        case 's':
                ptr = dereference_symbol_descriptor(ptr);
                fallthrough;
        case 'B':
                return symbol_string(buf, end, ptr, spec, fmt);
        case 'R':
        case 'r':
                return resource_string(buf, end, ptr, spec, fmt);
        case 'h':
                return hex_string(buf, end, ptr, spec, fmt);
        case 'b':
                switch (fmt[1]) {
                case 'l':
                        return bitmap_list_string(buf, end, ptr, spec, fmt);
                default:
                        return bitmap_string(buf, end, ptr, spec, fmt);
                }
        case 'M':                        /* Colon separated: 00:01:02:03:04:05 */
        case 'm':                        /* Contiguous: 000102030405 */
                                        /* [mM]F (FDDI) */
                                        /* [mM]R (Reverse order; Bluetooth) */
                return mac_address_string(buf, end, ptr, spec, fmt);
        case 'I':                        /* Formatted IP supported
                                         * 4:        1.2.3.4
                                         * 6:        0001:0203:...:0708
                                         * 6c:        1::708 or 1::1.2.3.4
                                         */
        case 'i':                        /* Contiguous:
                                         * 4:        001.002.003.004
                                         * 6:   000102...0f
                                         */
                return ip_addr_string(buf, end, ptr, spec, fmt);
        case 'E':
                return escaped_string(buf, end, ptr, spec, fmt);
        case 'U':
                return uuid_string(buf, end, ptr, spec, fmt);
        case 'V':
                return va_format(buf, end, ptr, spec, fmt);
        case 'K':
                return restricted_pointer(buf, end, ptr, spec);
        case 'N':
                return netdev_bits(buf, end, ptr, spec, fmt);
        case '4':
                return fourcc_string(buf, end, ptr, spec, fmt);
        case 'a':
                return address_val(buf, end, ptr, spec, fmt);
        case 'd':
                return dentry_name(buf, end, ptr, spec, fmt);
        case 't':
                return time_and_date(buf, end, ptr, spec, fmt);
        case 'C':
                return clock(buf, end, ptr, spec, fmt);
        case 'D':
                return file_dentry_name(buf, end, ptr, spec, fmt);
#ifdef CONFIG_BLOCK
        case 'g':
                return bdev_name(buf, end, ptr, spec, fmt);
#endif

        case 'G':
                return flags_string(buf, end, ptr, spec, fmt);
        case 'O':
                return device_node_string(buf, end, ptr, spec, fmt + 1);
        case 'f':
                return fwnode_string(buf, end, ptr, spec, fmt + 1);
        case 'A':
                if (!IS_ENABLED(CONFIG_RUST)) {
                        WARN_ONCE(1, "Please remove %%pA from non-Rust code\n");
                        return error_string(buf, end, "(%pA?)", spec);
                }
                return rust_fmt_argument(buf, end, ptr);
        case 'x':
                return pointer_string(buf, end, ptr, spec);
        case 'e':
                /* %pe with a non-ERR_PTR gets treated as plain %p */
                if (!IS_ERR(ptr))
                        return default_pointer(buf, end, ptr, spec);
                return err_ptr(buf, end, ptr, spec);
        case 'u':
        case 'k':
                switch (fmt[1]) {
                case 's':
                        return string(buf, end, ptr, spec);
                default:
                        return error_string(buf, end, "(einval)", spec);
                }
        default:
                return default_pointer(buf, end, ptr, spec);
        }
}

/*
 * Helper function to decode printf style format.
 * Each call decode a token from the format and return the
 * number of characters read (or likely the delta where it wants
 * to go on the next call).
 * The decoded token is returned through the parameters
 *
 * 'h', 'l', or 'L' for integer fields
 * 'z' support added 23/7/1999 S.H.
 * 'z' changed to 'Z' --davidm 1/25/99
 * 'Z' changed to 'z' --adobriyan 2017-01-25
 * 't' added for ptrdiff_t
 *
 * @fmt: the format string
 * @type of the token returned
 * @flags: various flags such as +, -, # tokens..
 * @field_width: overwritten width
 * @base: base of the number (octal, hex, ...)
 * @precision: precision of a number
 * @qualifier: qualifier of a number (long, size_t, ...)
 */
static noinline_for_stack
int format_decode(const char *fmt, struct printf_spec *spec)
{
        const char *start = fmt;
        char qualifier;

        /* we finished early by reading the field width */
        if (spec->type == FORMAT_TYPE_WIDTH) {
                if (spec->field_width < 0) {
                        spec->field_width = -spec->field_width;
                        spec->flags |= LEFT;
                }
                spec->type = FORMAT_TYPE_NONE;
                goto precision;
        }

        /* we finished early by reading the precision */
        if (spec->type == FORMAT_TYPE_PRECISION) {
                if (spec->precision < 0)
                        spec->precision = 0;

                spec->type = FORMAT_TYPE_NONE;
                goto qualifier;
        }

        /* By default */
        spec->type = FORMAT_TYPE_NONE;

        for (; *fmt ; ++fmt) {
                if (*fmt == '%')
                        break;
        }

        /* Return the current non-format string */
        if (fmt != start || !*fmt)
                return fmt - start;

        /* Process flags */
        spec->flags = 0;

        while (1) { /* this also skips first '%' */
                bool found = true;

                ++fmt;

                switch (*fmt) {
                case '-': spec->flags |= LEFT;    break;
                case '+': spec->flags |= PLUS;    break;
                case ' ': spec->flags |= SPACE;   break;
                case '#': spec->flags |= SPECIAL; break;
                case '0': spec->flags |= ZEROPAD; break;
                default:  found = false;
                }

                if (!found)
                        break;
        }

        /* get field width */
        spec->field_width = -1;

        if (isdigit(*fmt))
                spec->field_width = skip_atoi(&fmt);
        else if (*fmt == '*') {
                /* it's the next argument */
                spec->type = FORMAT_TYPE_WIDTH;
                return ++fmt - start;
        }

precision:
        /* get the precision */
        spec->precision = -1;
        if (*fmt == '.') {
                ++fmt;
                if (isdigit(*fmt)) {
                        spec->precision = skip_atoi(&fmt);
                        if (spec->precision < 0)
                                spec->precision = 0;
                } else if (*fmt == '*') {
                        /* it's the next argument */
                        spec->type = FORMAT_TYPE_PRECISION;
                        return ++fmt - start;
                }
        }

qualifier:
        /* get the conversion qualifier */
        qualifier = 0;
        if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
            *fmt == 'z' || *fmt == 't') {
                qualifier = *fmt++;
                if (unlikely(qualifier == *fmt)) {
                        if (qualifier == 'l') {
                                qualifier = 'L';
                                ++fmt;
                        } else if (qualifier == 'h') {
                                qualifier = 'H';
                                ++fmt;
                        }
                }
        }

        /* default base */
        spec->base = 10;
        switch (*fmt) {
        case 'c':
                spec->type = FORMAT_TYPE_CHAR;
                return ++fmt - start;

        case 's':
                spec->type = FORMAT_TYPE_STR;
                return ++fmt - start;

        case 'p':
                spec->type = FORMAT_TYPE_PTR;
                return ++fmt - start;

        case '%':
                spec->type = FORMAT_TYPE_PERCENT_CHAR;
                return ++fmt - start;

        /* integer number formats - set up the flags and "break" */
        case 'o':
                spec->base = 8;
                break;

        case 'x':
                spec->flags |= SMALL;
                fallthrough;

        case 'X':
                spec->base = 16;
                break;

        case 'd':
        case 'i':
                spec->flags |= SIGN;
                break;
        case 'u':
                break;

        case 'n':
                /*
                 * Since %n poses a greater security risk than
                 * utility, treat it as any other invalid or
                 * unsupported format specifier.
                 */
                fallthrough;

        default:
                WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt);
                spec->type = FORMAT_TYPE_INVALID;
                return fmt - start;
        }

        if (qualifier == 'L')
                spec->type = FORMAT_TYPE_LONG_LONG;
        else if (qualifier == 'l') {
                BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG);
                spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN);
        } else if (qualifier == 'z') {
                spec->type = FORMAT_TYPE_SIZE_T;
        } else if (qualifier == 't') {
                spec->type = FORMAT_TYPE_PTRDIFF;
        } else if (qualifier == 'H') {
                BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE);
                spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN);
        } else if (qualifier == 'h') {
                BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT);
                spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN);
        } else {
                BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT);
                spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN);
        }

        return ++fmt - start;
}

static void
set_field_width(struct printf_spec *spec, int width)
{
        spec->field_width = width;
        if (WARN_ONCE(spec->field_width != width, "field width %d too large", width)) {
                spec->field_width = clamp(width, -FIELD_WIDTH_MAX, FIELD_WIDTH_MAX);
        }
}

static void
set_precision(struct printf_spec *spec, int prec)
{
        spec->precision = prec;
        if (WARN_ONCE(spec->precision != prec, "precision %d too large", prec)) {
                spec->precision = clamp(prec, 0, PRECISION_MAX);
        }
}

/**
 * vsnprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @size: The size of the buffer, including the trailing null space
 * @fmt: The format string to use
 * @args: Arguments for the format string
 *
 * This function generally follows C99 vsnprintf, but has some
 * extensions and a few limitations:
 *
 *  - ``%n`` is unsupported
 *  - ``%p*`` is handled by pointer()
 *
 * See pointer() or Documentation/core-api/printk-formats.rst for more
 * extensive description.
 *
 * **Please update the documentation in both places when making changes**
 *
 * The return value is the number of characters which would
 * be generated for the given input, excluding the trailing
 * '\0', as per ISO C99. If you want to have the exact
 * number of characters written into @buf as return value
 * (not including the trailing '\0'), use vscnprintf(). If the
 * return is greater than or equal to @size, the resulting
 * string is truncated.
 *
 * If you're not already dealing with a va_list consider using snprintf().
 */
int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
        unsigned long long num;
        char *str, *end;
        struct printf_spec spec = {0};

        /* Reject out-of-range values early.  Large positive sizes are
           used for unknown buffer sizes. */
        if (WARN_ON_ONCE(size > INT_MAX))
                return 0;

        str = buf;
        end = buf + size;

        /* Make sure end is always >= buf */
        if (end < buf) {
                end = ((void *)-1);
                size = end - buf;
        }

        while (*fmt) {
                const char *old_fmt = fmt;
                int read = format_decode(fmt, &spec);

                fmt += read;

                switch (spec.type) {
                case FORMAT_TYPE_NONE: {
                        int copy = read;
                        if (str < end) {
                                if (copy > end - str)
                                        copy = end - str;
                                memcpy(str, old_fmt, copy);
                        }
                        str += read;
                        break;
                }

                case FORMAT_TYPE_WIDTH:
                        set_field_width(&spec, va_arg(args, int));
                        break;

                case FORMAT_TYPE_PRECISION:
                        set_precision(&spec, va_arg(args, int));
                        break;

                case FORMAT_TYPE_CHAR: {
                        char c;

                        if (!(spec.flags & LEFT)) {
                                while (--spec.field_width > 0) {
                                        if (str < end)
                                                *str = ' ';
                                        ++str;

                                }
                        }
                        c = (unsigned char) va_arg(args, int);
                        if (str < end)
                                *str = c;
                        ++str;
                        while (--spec.field_width > 0) {
                                if (str < end)
                                        *str = ' ';
                                ++str;
                        }
                        break;
                }

                case FORMAT_TYPE_STR:
                        str = string(str, end, va_arg(args, char *), spec);
                        break;

                case FORMAT_TYPE_PTR:
                        str = pointer(fmt, str, end, va_arg(args, void *),
                                      spec);
                        while (isalnum(*fmt))
                                fmt++;
                        break;

                case FORMAT_TYPE_PERCENT_CHAR:
                        if (str < end)
                                *str = '%';
                        ++str;
                        break;

                case FORMAT_TYPE_INVALID:
                        /*
                         * Presumably the arguments passed gcc's type
                         * checking, but there is no safe or sane way
                         * for us to continue parsing the format and
                         * fetching from the va_list; the remaining
                         * specifiers and arguments would be out of
                         * sync.
                         */
                        goto out;

                default:
                        switch (spec.type) {
                        case FORMAT_TYPE_LONG_LONG:
                                num = va_arg(args, long long);
                                break;
                        case FORMAT_TYPE_ULONG:
                                num = va_arg(args, unsigned long);
                                break;
                        case FORMAT_TYPE_LONG:
                                num = va_arg(args, long);
                                break;
                        case FORMAT_TYPE_SIZE_T:
                                if (spec.flags & SIGN)
                                        num = va_arg(args, ssize_t);
                                else
                                        num = va_arg(args, size_t);
                                break;
                        case FORMAT_TYPE_PTRDIFF:
                                num = va_arg(args, ptrdiff_t);
                                break;
                        case FORMAT_TYPE_UBYTE:
                                num = (unsigned char) va_arg(args, int);
                                break;
                        case FORMAT_TYPE_BYTE:
                                num = (signed char) va_arg(args, int);
                                break;
                        case FORMAT_TYPE_USHORT:
                                num = (unsigned short) va_arg(args, int);
                                break;
                        case FORMAT_TYPE_SHORT:
                                num = (short) va_arg(args, int);
                                break;
                        case FORMAT_TYPE_INT:
                                num = (int) va_arg(args, int);
                                break;
                        default:
                                num = va_arg(args, unsigned int);
                        }

                        str = number(str, end, num, spec);
                }
        }

out:
        if (size > 0) {
                if (str < end)
                        *str = '\0';
                else
                        end[-1] = '\0';
        }

        /* the trailing null byte doesn't count towards the total */
        return str-buf;

}
EXPORT_SYMBOL(vsnprintf);

/**
 * vscnprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @size: The size of the buffer, including the trailing null space
 * @fmt: The format string to use
 * @args: Arguments for the format string
 *
 * The return value is the number of characters which have been written into
 * the @buf not including the trailing '\0'. If @size is == 0 the function
 * returns 0.
 *
 * If you're not already dealing with a va_list consider using scnprintf().
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
{
        int i;

        if (unlikely(!size))
                return 0;

        i = vsnprintf(buf, size, fmt, args);

        if (likely(i < size))
                return i;

        return size - 1;
}
EXPORT_SYMBOL(vscnprintf);

/**
 * snprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @size: The size of the buffer, including the trailing null space
 * @fmt: The format string to use
 * @...: Arguments for the format string
 *
 * The return value is the number of characters which would be
 * generated for the given input, excluding the trailing null,
 * as per ISO C99.  If the return is greater than or equal to
 * @size, the resulting string is truncated.
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
int snprintf(char *buf, size_t size, const char *fmt, ...)
{
        va_list args;
        int i;

        va_start(args, fmt);
        i = vsnprintf(buf, size, fmt, args);
        va_end(args);

        return i;
}
EXPORT_SYMBOL(snprintf);

/**
 * scnprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @size: The size of the buffer, including the trailing null space
 * @fmt: The format string to use
 * @...: Arguments for the format string
 *
 * The return value is the number of characters written into @buf not including
 * the trailing '\0'. If @size is == 0 the function returns 0.
 */

int scnprintf(char *buf, size_t size, const char *fmt, ...)
{
        va_list args;
        int i;

        va_start(args, fmt);
        i = vscnprintf(buf, size, fmt, args);
        va_end(args);

        return i;
}
EXPORT_SYMBOL(scnprintf);

/**
 * vsprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @fmt: The format string to use
 * @args: Arguments for the format string
 *
 * The function returns the number of characters written
 * into @buf. Use vsnprintf() or vscnprintf() in order to avoid
 * buffer overflows.
 *
 * If you're not already dealing with a va_list consider using sprintf().
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
int vsprintf(char *buf, const char *fmt, va_list args)
{
        return vsnprintf(buf, INT_MAX, fmt, args);
}
EXPORT_SYMBOL(vsprintf);

/**
 * sprintf - Format a string and place it in a buffer
 * @buf: The buffer to place the result into
 * @fmt: The format string to use
 * @...: Arguments for the format string
 *
 * The function returns the number of characters written
 * into @buf. Use snprintf() or scnprintf() in order to avoid
 * buffer overflows.
 *
 * See the vsnprintf() documentation for format string extensions over C99.
 */
int sprintf(char *buf, const char *fmt, ...)
{
        va_list args;
        int i;

        va_start(args, fmt);
        i = vsnprintf(buf, INT_MAX, fmt, args);
        va_end(args);

        return i;
}
EXPORT_SYMBOL(sprintf);

#ifdef CONFIG_BINARY_PRINTF
/*
 * bprintf service:
 * vbin_printf() - VA arguments to binary data
 * bstr_printf() - Binary data to text string
 */

/**
 * vbin_printf - Parse a format string and place args' binary value in a buffer
 * @bin_buf: The buffer to place args' binary value
 * @size: The size of the buffer(by words(32bits), not characters)
 * @fmt: The format string to use
 * @args: Arguments for the format string
 *
 * The format follows C99 vsnprintf, except %n is ignored, and its argument
 * is skipped.
 *
 * The return value is the number of words(32bits) which would be generated for
 * the given input.
 *
 * NOTE:
 * If the return value is greater than @size, the resulting bin_buf is NOT
 * valid for bstr_printf().
 */
int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
{
        struct printf_spec spec = {0};
        char *str, *end;
        int width;

        str = (char *)bin_buf;
        end = (char *)(bin_buf + size);

#define save_arg(type)                                                        \
({                                                                        \
        unsigned long long value;                                        \
        if (sizeof(type) == 8) {                                        \
                unsigned long long val8;                                \
                str = PTR_ALIGN(str, sizeof(u32));                        \
                val8 = va_arg(args, unsigned long long);                \
                if (str + sizeof(type) <= end) {                        \
                        *(u32 *)str = *(u32 *)&val8;                        \
                        *(u32 *)(str + 4) = *((u32 *)&val8 + 1);        \
                }                                                        \
                value = val8;                                                \
        } else {                                                        \
                unsigned int val4;                                        \
                str = PTR_ALIGN(str, sizeof(type));                        \
                val4 = va_arg(args, int);                                \
                if (str + sizeof(type) <= end)                                \
                        *(typeof(type) *)str = (type)(long)val4;        \
                value = (unsigned long long)val4;                        \
        }                                                                \
        str += sizeof(type);                                                \
        value;                                                                \
})

        while (*fmt) {
                int read = format_decode(fmt, &spec);

                fmt += read;

                switch (spec.type) {
                case FORMAT_TYPE_NONE:
                case FORMAT_TYPE_PERCENT_CHAR:
                        break;
                case FORMAT_TYPE_INVALID:
                        goto out;

                case FORMAT_TYPE_WIDTH:
                case FORMAT_TYPE_PRECISION:
                        width = (int)save_arg(int);
                        /* Pointers may require the width */
                        if (*fmt == 'p')
                                set_field_width(&spec, width);
                        break;

                case FORMAT_TYPE_CHAR:
                        save_arg(char);
                        break;

                case FORMAT_TYPE_STR: {
                        const char *save_str = va_arg(args, char *);
                        const char *err_msg;
                        size_t len;

                        err_msg = check_pointer_msg(save_str);
                        if (err_msg)
                                save_str = err_msg;

                        len = strlen(save_str) + 1;
                        if (str + len < end)
                                memcpy(str, save_str, len);
                        str += len;
                        break;
                }

                case FORMAT_TYPE_PTR:
                        /* Dereferenced pointers must be done now */
                        switch (*fmt) {
                        /* Dereference of functions is still OK */
                        case 'S':
                        case 's':
                        case 'x':
                        case 'K':
                        case 'e':
                                save_arg(void *);
                                break;
                        default:
                                if (!isalnum(*fmt)) {
                                        save_arg(void *);
                                        break;
                                }
                                str = pointer(fmt, str, end, va_arg(args, void *),
                                              spec);
                                if (str + 1 < end)
                                        *str++ = '\0';
                                else
                                        end[-1] = '\0'; /* Must be nul terminated */
                        }
                        /* skip all alphanumeric pointer suffixes */
                        while (isalnum(*fmt))
                                fmt++;
                        break;

                default:
                        switch (spec.type) {

                        case FORMAT_TYPE_LONG_LONG:
                                save_arg(long long);
                                break;
                        case FORMAT_TYPE_ULONG:
                        case FORMAT_TYPE_LONG:
                                save_arg(unsigned long);
                                break;
                        case FORMAT_TYPE_SIZE_T:
                                save_arg(size_t);
                                break;
                        case FORMAT_TYPE_PTRDIFF:
                                save_arg(ptrdiff_t);
                                break;
                        case FORMAT_TYPE_UBYTE:
                        case FORMAT_TYPE_BYTE:
                                save_arg(char);
                                break;
                        case FORMAT_TYPE_USHORT:
                        case FORMAT_TYPE_SHORT:
                                save_arg(short);
                                break;
                        default:
                                save_arg(int);
                        }
                }
        }

out:
        return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
#undef save_arg
}
EXPORT_SYMBOL_GPL(vbin_printf);

/**
 * bstr_printf - Format a string from binary arguments and place it in a buffer
 * @buf: The buffer to place the result into
 * @size: The size of the buffer, including the trailing null space
 * @fmt: The format string to use
 * @bin_buf: Binary arguments for the format string
 *
 * This function like C99 vsnprintf, but the difference is that vsnprintf gets
 * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
 * a binary buffer that generated by vbin_printf.
 *
 * The format follows C99 vsnprintf, but has some extensions:
 *  see vsnprintf comment for details.
 *
 * The return value is the number of characters which would
 * be generated for the given input, excluding the trailing
 * '\0', as per ISO C99. If you want to have the exact
 * number of characters written into @buf as return value
 * (not including the trailing '\0'), use vscnprintf(). If the
 * return is greater than or equal to @size, the resulting
 * string is truncated.
 */
int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
{
        struct printf_spec spec = {0};
        char *str, *end;
        const char *args = (const char *)bin_buf;

        if (WARN_ON_ONCE(size > INT_MAX))
                return 0;

        str = buf;
        end = buf + size;

#define get_arg(type)                                                        \
({                                                                        \
        typeof(type) value;                                                \
        if (sizeof(type) == 8) {                                        \
                args = PTR_ALIGN(args, sizeof(u32));                        \
                *(u32 *)&value = *(u32 *)args;                                \
                *((u32 *)&value + 1) = *(u32 *)(args + 4);                \
        } else {                                                        \
                args = PTR_ALIGN(args, sizeof(type));                        \
                value = *(typeof(type) *)args;                                \
        }                                                                \
        args += sizeof(type);                                                \
        value;                                                                \
})

        /* Make sure end is always >= buf */
        if (end < buf) {
                end = ((void *)-1);
                size = end - buf;
        }

        while (*fmt) {
                const char *old_fmt = fmt;
                int read = format_decode(fmt, &spec);

                fmt += read;

                switch (spec.type) {
                case FORMAT_TYPE_NONE: {
                        int copy = read;
                        if (str < end) {
                                if (copy > end - str)
                                        copy = end - str;
                                memcpy(str, old_fmt, copy);
                        }
                        str += read;
                        break;
                }

                case FORMAT_TYPE_WIDTH:
                        set_field_width(&spec, get_arg(int));
                        break;

                case FORMAT_TYPE_PRECISION:
                        set_precision(&spec, get_arg(int));
                        break;

                case FORMAT_TYPE_CHAR: {
                        char c;

                        if (!(spec.flags & LEFT)) {
                                while (--spec.field_width > 0) {
                                        if (str < end)
                                                *str = ' ';
                                        ++str;
                                }
                        }
                        c = (unsigned char) get_arg(char);
                        if (str < end)
                                *str = c;
                        ++str;
                        while (--spec.field_width > 0) {
                                if (str < end)
                                        *str = ' ';
                                ++str;
                        }
                        break;
                }

                case FORMAT_TYPE_STR: {
                        const char *str_arg = args;
                        args += strlen(str_arg) + 1;
                        str = string(str, end, (char *)str_arg, spec);
                        break;
                }

                case FORMAT_TYPE_PTR: {
                        bool process = false;
                        int copy, len;
                        /* Non function dereferences were already done */
                        switch (*fmt) {
                        case 'S':
                        case 's':
                        case 'x':
                        case 'K':
                        case 'e':
                                process = true;
                                break;
                        default:
                                if (!isalnum(*fmt)) {
                                        process = true;
                                        break;
                                }
                                /* Pointer dereference was already processed */
                                if (str < end) {
                                        len = copy = strlen(args);
                                        if (copy > end - str)
                                                copy = end - str;
                                        memcpy(str, args, copy);
                                        str += len;
                                        args += len + 1;
                                }
                        }
                        if (process)
                                str = pointer(fmt, str, end, get_arg(void *), spec);

                        while (isalnum(*fmt))
                                fmt++;
                        break;
                }

                case FORMAT_TYPE_PERCENT_CHAR:
                        if (str < end)
                                *str = '%';
                        ++str;
                        break;

                case FORMAT_TYPE_INVALID:
                        goto out;

                default: {
                        unsigned long long num;

                        switch (spec.type) {

                        case FORMAT_TYPE_LONG_LONG:
                                num = get_arg(long long);
                                break;
                        case FORMAT_TYPE_ULONG:
                        case FORMAT_TYPE_LONG:
                                num = get_arg(unsigned long);
                                break;
                        case FORMAT_TYPE_SIZE_T:
                                num = get_arg(size_t);
                                break;
                        case FORMAT_TYPE_PTRDIFF:
                                num = get_arg(ptrdiff_t);
                                break;
                        case FORMAT_TYPE_UBYTE:
                                num = get_arg(unsigned char);
                                break;
                        case FORMAT_TYPE_BYTE:
                                num = get_arg(signed char);
                                break;
                        case FORMAT_TYPE_USHORT:
                                num = get_arg(unsigned short);
                                break;
                        case FORMAT_TYPE_SHORT:
                                num = get_arg(short);
                                break;
                        case FORMAT_TYPE_UINT:
                                num = get_arg(unsigned int);
                                break;
                        default:
                                num = get_arg(int);
                        }

                        str = number(str, end, num, spec);
                } /* default: */
                } /* switch(spec.type) */
        } /* while(*fmt) */

out:
        if (size > 0) {
                if (str < end)
                        *str = '\0';
                else
                        end[-1] = '\0';
        }

#undef get_arg

        /* the trailing null byte doesn't count towards the total */
        return str - buf;
}
EXPORT_SYMBOL_GPL(bstr_printf);

/**
 * bprintf - Parse a format string and place args' binary value in a buffer
 * @bin_buf: The buffer to place args' binary value
 * @size: The size of the buffer(by words(32bits), not characters)
 * @fmt: The format string to use
 * @...: Arguments for the format string
 *
 * The function returns the number of words(u32) written
 * into @bin_buf.
 */
int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
{
        va_list args;
        int ret;

        va_start(args, fmt);
        ret = vbin_printf(bin_buf, size, fmt, args);
        va_end(args);

        return ret;
}
EXPORT_SYMBOL_GPL(bprintf);

#endif /* CONFIG_BINARY_PRINTF */

/**
 * vsscanf - Unformat a buffer into a list of arguments
 * @buf:        input buffer
 * @fmt:        format of buffer
 * @args:        arguments
 */
int vsscanf(const char *buf, const char *fmt, va_list args)
{
        const char *str = buf;
        char *next;
        char digit;
        int num = 0;
        u8 qualifier;
        unsigned int base;
        union {
                long long s;
                unsigned long long u;
        } val;
        s16 field_width;
        bool is_sign;

        while (*fmt) {
                /* skip any white space in format */
                /* white space in format matches any amount of
                 * white space, including none, in the input.
                 */
                if (isspace(*fmt)) {
                        fmt = skip_spaces(++fmt);
                        str = skip_spaces(str);
                }

                /* anything that is not a conversion must match exactly */
                if (*fmt != '%' && *fmt) {
                        if (*fmt++ != *str++)
                                break;
                        continue;
                }

                if (!*fmt)
                        break;
                ++fmt;

                /* skip this conversion.
                 * advance both strings to next white space
                 */
                if (*fmt == '*') {
                        if (!*str)
                                break;
                        while (!isspace(*fmt) && *fmt != '%' && *fmt) {
                                /* '%*[' not yet supported, invalid format */
                                if (*fmt == '[')
                                        return num;
                                fmt++;
                        }
                        while (!isspace(*str) && *str)
                                str++;
                        continue;
                }

                /* get field width */
                field_width = -1;
                if (isdigit(*fmt)) {
                        field_width = skip_atoi(&fmt);
                        if (field_width <= 0)
                                break;
                }

                /* get conversion qualifier */
                qualifier = -1;
                if (*fmt == 'h' || _tolower(*fmt) == 'l' ||
                    *fmt == 'z') {
                        qualifier = *fmt++;
                        if (unlikely(qualifier == *fmt)) {
                                if (qualifier == 'h') {
                                        qualifier = 'H';
                                        fmt++;
                                } else if (qualifier == 'l') {
                                        qualifier = 'L';
                                        fmt++;
                                }
                        }
                }

                if (!*fmt)
                        break;

                if (*fmt == 'n') {
                        /* return number of characters read so far */
                        *va_arg(args, int *) = str - buf;
                        ++fmt;
                        continue;
                }

                if (!*str)
                        break;

                base = 10;
                is_sign = false;

                switch (*fmt++) {
                case 'c':
                {
                        char *s = (char *)va_arg(args, char*);
                        if (field_width == -1)
                                field_width = 1;
                        do {
                                *s++ = *str++;
                        } while (--field_width > 0 && *str);
                        num++;
                }
                continue;
                case 's':
                {
                        char *s = (char *)va_arg(args, char *);
                        if (field_width == -1)
                                field_width = SHRT_MAX;
                        /* first, skip leading white space in buffer */
                        str = skip_spaces(str);

                        /* now copy until next white space */
                        while (*str && !isspace(*str) && field_width--)
                                *s++ = *str++;
                        *s = '\0';
                        num++;
                }
                continue;
                /*
                 * Warning: This implementation of the '[' conversion specifier
                 * deviates from its glibc counterpart in the following ways:
                 * (1) It does NOT support ranges i.e. '-' is NOT a special
                 *     character
                 * (2) It cannot match the closing bracket ']' itself
                 * (3) A field width is required
                 * (4) '%*[' (discard matching input) is currently not supported
                 *
                 * Example usage:
                 * ret = sscanf("00:0a:95","%2[^:]:%2[^:]:%2[^:]",
                 *                buf1, buf2, buf3);
                 * if (ret < 3)
                 *    // etc..
                 */
                case '[':
                {
                        char *s = (char *)va_arg(args, char *);
                        DECLARE_BITMAP(set, 256) = {0};
                        unsigned int len = 0;
                        bool negate = (*fmt == '^');

                        /* field width is required */
                        if (field_width == -1)
                                return num;

                        if (negate)
                                ++fmt;

                        for ( ; *fmt && *fmt != ']'; ++fmt, ++len)
                                __set_bit((u8)*fmt, set);

                        /* no ']' or no character set found */
                        if (!*fmt || !len)
                                return num;
                        ++fmt;

                        if (negate) {
                                bitmap_complement(set, set, 256);
                                /* exclude null '\0' byte */
                                __clear_bit(0, set);
                        }

                        /* match must be non-empty */
                        if (!test_bit((u8)*str, set))
                                return num;

                        while (test_bit((u8)*str, set) && field_width--)
                                *s++ = *str++;
                        *s = '\0';
                        ++num;
                }
                continue;
                case 'o':
                        base = 8;
                        break;
                case 'x':
                case 'X':
                        base = 16;
                        break;
                case 'i':
                        base = 0;
                        fallthrough;
                case 'd':
                        is_sign = true;
                        fallthrough;
                case 'u':
                        break;
                case '%':
                        /* looking for '%' in str */
                        if (*str++ != '%')
                                return num;
                        continue;
                default:
                        /* invalid format; stop here */
                        return num;
                }

                /* have some sort of integer conversion.
                 * first, skip white space in buffer.
                 */
                str = skip_spaces(str);

                digit = *str;
                if (is_sign && digit == '-') {
                        if (field_width == 1)
                                break;

                        digit = *(str + 1);
                }

                if (!digit
                    || (base == 16 && !isxdigit(digit))
                    || (base == 10 && !isdigit(digit))
                    || (base == 8 && !isodigit(digit))
                    || (base == 0 && !isdigit(digit)))
                        break;

                if (is_sign)
                        val.s = simple_strntoll(str, &next, base,
                                                field_width >= 0 ? field_width : INT_MAX);
                else
                        val.u = simple_strntoull(str, &next, base,
                                                 field_width >= 0 ? field_width : INT_MAX);

                switch (qualifier) {
                case 'H':        /* that's 'hh' in format */
                        if (is_sign)
                                *va_arg(args, signed char *) = val.s;
                        else
                                *va_arg(args, unsigned char *) = val.u;
                        break;
                case 'h':
                        if (is_sign)
                                *va_arg(args, short *) = val.s;
                        else
                                *va_arg(args, unsigned short *) = val.u;
                        break;
                case 'l':
                        if (is_sign)
                                *va_arg(args, long *) = val.s;
                        else
                                *va_arg(args, unsigned long *) = val.u;
                        break;
                case 'L':
                        if (is_sign)
                                *va_arg(args, long long *) = val.s;
                        else
                                *va_arg(args, unsigned long long *) = val.u;
                        break;
                case 'z':
                        *va_arg(args, size_t *) = val.u;
                        break;
                default:
                        if (is_sign)
                                *va_arg(args, int *) = val.s;
                        else
                                *va_arg(args, unsigned int *) = val.u;
                        break;
                }
                num++;

                if (!next)
                        break;
                str = next;
        }

        return num;
}
EXPORT_SYMBOL(vsscanf);

/**
 * sscanf - Unformat a buffer into a list of arguments
 * @buf:        input buffer
 * @fmt:        formatting of buffer
 * @...:        resulting arguments
 */
int sscanf(const char *buf, const char *fmt, ...)
{
        va_list args;
        int i;

        va_start(args, fmt);
        i = vsscanf(buf, fmt, args);
        va_end(args);

        return i;
}
EXPORT_SYMBOL(sscanf);


































































    1 





































































    1 




















































































































































    1 
























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2012 Hans Verkuil <hverkuil@xs4all.nl>
 */

/* kernel includes */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/videodev2.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>
#include <linux/usb.h>
#include <linux/mutex.h>

/* driver and module definitions */
MODULE_AUTHOR("Hans Verkuil <hverkuil@xs4all.nl>");
MODULE_DESCRIPTION("Keene FM Transmitter driver");
MODULE_LICENSE("GPL");

/* Actually, it advertises itself as a Logitech */
#define USB_KEENE_VENDOR 0x046d
#define USB_KEENE_PRODUCT 0x0a0e

/* Probably USB_TIMEOUT should be modified in module parameter */
#define BUFFER_LENGTH 8
#define USB_TIMEOUT 500

/* Frequency limits in MHz */
#define FREQ_MIN  76U
#define FREQ_MAX 108U
#define FREQ_MUL 16000U

/* USB Device ID List */
static const struct usb_device_id usb_keene_device_table[] = {
        {USB_DEVICE_AND_INTERFACE_INFO(USB_KEENE_VENDOR, USB_KEENE_PRODUCT,
                                                        USB_CLASS_HID, 0, 0) },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, usb_keene_device_table);

struct keene_device {
        struct usb_device *usbdev;
        struct usb_interface *intf;
        struct video_device vdev;
        struct v4l2_device v4l2_dev;
        struct v4l2_ctrl_handler hdl;
        struct mutex lock;

        u8 *buffer;
        unsigned curfreq;
        u8 tx;
        u8 pa;
        bool stereo;
        bool muted;
        bool preemph_75_us;
};

static inline struct keene_device *to_keene_dev(struct v4l2_device *v4l2_dev)
{
        return container_of(v4l2_dev, struct keene_device, v4l2_dev);
}

/* Set frequency (if non-0), PA, mute and turn on/off the FM transmitter. */
static int keene_cmd_main(struct keene_device *radio, unsigned freq, bool play)
{
        unsigned short freq_send = freq ? (freq - 76 * 16000) / 800 : 0;
        int ret;

        radio->buffer[0] = 0x00;
        radio->buffer[1] = 0x50;
        radio->buffer[2] = (freq_send >> 8) & 0xff;
        radio->buffer[3] = freq_send & 0xff;
        radio->buffer[4] = radio->pa;
        /* If bit 4 is set, then tune to the frequency.
           If bit 3 is set, then unmute; if bit 2 is set, then mute.
           If bit 1 is set, then enter idle mode; if bit 0 is set,
           then enter transmit mode.
         */
        radio->buffer[5] = (radio->muted ? 4 : 8) | (play ? 1 : 2) |
                                                        (freq ? 0x10 : 0);
        radio->buffer[6] = 0x00;
        radio->buffer[7] = 0x00;

        ret = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                9, 0x21, 0x200, 2, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);

        if (ret < 0) {
                dev_warn(&radio->vdev.dev, "%s failed (%d)\n", __func__, ret);
                return ret;
        }
        if (freq)
                radio->curfreq = freq;
        return 0;
}

/* Set TX, stereo and preemphasis mode (50 us vs 75 us). */
static int keene_cmd_set(struct keene_device *radio)
{
        int ret;

        radio->buffer[0] = 0x00;
        radio->buffer[1] = 0x51;
        radio->buffer[2] = radio->tx;
        /* If bit 0 is set, then transmit mono, otherwise stereo.
           If bit 2 is set, then enable 75 us preemphasis, otherwise
           it is 50 us. */
        radio->buffer[3] = (radio->stereo ? 0 : 1) | (radio->preemph_75_us ? 4 : 0);
        radio->buffer[4] = 0x00;
        radio->buffer[5] = 0x00;
        radio->buffer[6] = 0x00;
        radio->buffer[7] = 0x00;

        ret = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                9, 0x21, 0x200, 2, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);

        if (ret < 0) {
                dev_warn(&radio->vdev.dev, "%s failed (%d)\n", __func__, ret);
                return ret;
        }
        return 0;
}

/* Handle unplugging the device.
 * We call video_unregister_device in any case.
 * The last function called in this procedure is
 * usb_keene_device_release.
 */
static void usb_keene_disconnect(struct usb_interface *intf)
{
        struct keene_device *radio = to_keene_dev(usb_get_intfdata(intf));

        mutex_lock(&radio->lock);
        usb_set_intfdata(intf, NULL);
        video_unregister_device(&radio->vdev);
        v4l2_device_disconnect(&radio->v4l2_dev);
        mutex_unlock(&radio->lock);
        v4l2_device_put(&radio->v4l2_dev);
}

static int usb_keene_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct keene_device *radio = to_keene_dev(usb_get_intfdata(intf));

        return keene_cmd_main(radio, 0, false);
}

static int usb_keene_resume(struct usb_interface *intf)
{
        struct keene_device *radio = to_keene_dev(usb_get_intfdata(intf));

        mdelay(50);
        keene_cmd_set(radio);
        keene_cmd_main(radio, radio->curfreq, true);
        return 0;
}

static int vidioc_querycap(struct file *file, void *priv,
                                        struct v4l2_capability *v)
{
        struct keene_device *radio = video_drvdata(file);

        strscpy(v->driver, "radio-keene", sizeof(v->driver));
        strscpy(v->card, "Keene FM Transmitter", sizeof(v->card));
        usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info));
        return 0;
}

static int vidioc_g_modulator(struct file *file, void *priv,
                                struct v4l2_modulator *v)
{
        struct keene_device *radio = video_drvdata(file);

        if (v->index > 0)
                return -EINVAL;

        strscpy(v->name, "FM", sizeof(v->name));
        v->rangelow = FREQ_MIN * FREQ_MUL;
        v->rangehigh = FREQ_MAX * FREQ_MUL;
        v->txsubchans = radio->stereo ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO;
        v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO;
        return 0;
}

static int vidioc_s_modulator(struct file *file, void *priv,
                                const struct v4l2_modulator *v)
{
        struct keene_device *radio = video_drvdata(file);

        if (v->index > 0)
                return -EINVAL;

        radio->stereo = (v->txsubchans == V4L2_TUNER_SUB_STEREO);
        return keene_cmd_set(radio);
}

static int vidioc_s_frequency(struct file *file, void *priv,
                                const struct v4l2_frequency *f)
{
        struct keene_device *radio = video_drvdata(file);
        unsigned freq = f->frequency;

        if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO)
                return -EINVAL;
        freq = clamp(freq, FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL);
        return keene_cmd_main(radio, freq, true);
}

static int vidioc_g_frequency(struct file *file, void *priv,
                                struct v4l2_frequency *f)
{
        struct keene_device *radio = video_drvdata(file);

        if (f->tuner != 0)
                return -EINVAL;
        f->type = V4L2_TUNER_RADIO;
        f->frequency = radio->curfreq;
        return 0;
}

static int keene_s_ctrl(struct v4l2_ctrl *ctrl)
{
        static const u8 db2tx[] = {
             /*         -15,  -12,   -9,   -6,   -3,    0 dB */
                0x03, 0x13, 0x02, 0x12, 0x22, 0x32,
             /*           3,    6,    9,   12,   15,   18 dB */
                0x21, 0x31, 0x20, 0x30, 0x40, 0x50
        };
        struct keene_device *radio =
                container_of(ctrl->handler, struct keene_device, hdl);

        switch (ctrl->id) {
        case V4L2_CID_AUDIO_MUTE:
                radio->muted = ctrl->val;
                return keene_cmd_main(radio, 0, true);

        case V4L2_CID_TUNE_POWER_LEVEL:
                /* To go from dBuV to the register value we apply the
                   following formula: */
                radio->pa = (ctrl->val - 71) * 100 / 62;
                return keene_cmd_main(radio, 0, true);

        case V4L2_CID_TUNE_PREEMPHASIS:
                radio->preemph_75_us = ctrl->val == V4L2_PREEMPHASIS_75_uS;
                return keene_cmd_set(radio);

        case V4L2_CID_AUDIO_COMPRESSION_GAIN:
                radio->tx = db2tx[(ctrl->val - (s32)ctrl->minimum) / (s32)ctrl->step];
                return keene_cmd_set(radio);
        }
        return -EINVAL;
}

/* File system interface */
static const struct v4l2_file_operations usb_keene_fops = {
        .owner                = THIS_MODULE,
        .open           = v4l2_fh_open,
        .release        = v4l2_fh_release,
        .poll                = v4l2_ctrl_poll,
        .unlocked_ioctl        = video_ioctl2,
};

static const struct v4l2_ctrl_ops keene_ctrl_ops = {
        .s_ctrl = keene_s_ctrl,
};

static const struct v4l2_ioctl_ops usb_keene_ioctl_ops = {
        .vidioc_querycap    = vidioc_querycap,
        .vidioc_g_modulator = vidioc_g_modulator,
        .vidioc_s_modulator = vidioc_s_modulator,
        .vidioc_g_frequency = vidioc_g_frequency,
        .vidioc_s_frequency = vidioc_s_frequency,
        .vidioc_log_status = v4l2_ctrl_log_status,
        .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
};

static void usb_keene_video_device_release(struct v4l2_device *v4l2_dev)
{
        struct keene_device *radio = to_keene_dev(v4l2_dev);

        /* free rest memory */
        v4l2_ctrl_handler_free(&radio->hdl);
        kfree(radio->buffer);
        kfree(radio);
}

/* check if the device is present and register with v4l and usb if it is */
static int usb_keene_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        struct keene_device *radio;
        struct v4l2_ctrl_handler *hdl;
        int retval = 0;

        /*
         * The Keene FM transmitter USB device has the same USB ID as
         * the Logitech AudioHub Speaker, but it should ignore the hid.
         * Check if the name is that of the Keene device.
         * If not, then someone connected the AudioHub and we shouldn't
         * attempt to handle this driver.
         * For reference: the product name of the AudioHub is
         * "AudioHub Speaker".
         */
        if (dev->product && strcmp(dev->product, "B-LINK USB Audio  "))
                return -ENODEV;

        radio = kzalloc(sizeof(struct keene_device), GFP_KERNEL);
        if (radio)
                radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);

        if (!radio || !radio->buffer) {
                dev_err(&intf->dev, "kmalloc for keene_device failed\n");
                kfree(radio);
                retval = -ENOMEM;
                goto err;
        }

        hdl = &radio->hdl;
        v4l2_ctrl_handler_init(hdl, 4);
        v4l2_ctrl_new_std(hdl, &keene_ctrl_ops, V4L2_CID_AUDIO_MUTE,
                        0, 1, 1, 0);
        v4l2_ctrl_new_std_menu(hdl, &keene_ctrl_ops, V4L2_CID_TUNE_PREEMPHASIS,
                        V4L2_PREEMPHASIS_75_uS, 1, V4L2_PREEMPHASIS_50_uS);
        v4l2_ctrl_new_std(hdl, &keene_ctrl_ops, V4L2_CID_TUNE_POWER_LEVEL,
                        84, 118, 1, 118);
        v4l2_ctrl_new_std(hdl, &keene_ctrl_ops, V4L2_CID_AUDIO_COMPRESSION_GAIN,
                        -15, 18, 3, 0);
        radio->pa = 118;
        radio->tx = 0x32;
        radio->stereo = true;
        if (hdl->error) {
                retval = hdl->error;

                v4l2_ctrl_handler_free(hdl);
                goto err_v4l2;
        }
        retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
        if (retval < 0) {
                dev_err(&intf->dev, "couldn't register v4l2_device\n");
                goto err_v4l2;
        }

        mutex_init(&radio->lock);

        radio->v4l2_dev.ctrl_handler = hdl;
        radio->v4l2_dev.release = usb_keene_video_device_release;
        strscpy(radio->vdev.name, radio->v4l2_dev.name,
                sizeof(radio->vdev.name));
        radio->vdev.v4l2_dev = &radio->v4l2_dev;
        radio->vdev.fops = &usb_keene_fops;
        radio->vdev.ioctl_ops = &usb_keene_ioctl_ops;
        radio->vdev.lock = &radio->lock;
        radio->vdev.release = video_device_release_empty;
        radio->vdev.vfl_dir = VFL_DIR_TX;
        radio->vdev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_MODULATOR;

        radio->usbdev = interface_to_usbdev(intf);
        radio->intf = intf;
        usb_set_intfdata(intf, &radio->v4l2_dev);

        video_set_drvdata(&radio->vdev, radio);

        /* at least 11ms is needed in order to settle hardware */
        msleep(20);
        keene_cmd_main(radio, 95.16 * FREQ_MUL, false);

        retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO, -1);
        if (retval < 0) {
                dev_err(&intf->dev, "could not register video device\n");
                goto err_vdev;
        }
        v4l2_ctrl_handler_setup(hdl);
        dev_info(&intf->dev, "V4L2 device registered as %s\n",
                        video_device_node_name(&radio->vdev));
        return 0;

err_vdev:
        v4l2_device_unregister(&radio->v4l2_dev);
err_v4l2:
        kfree(radio->buffer);
        kfree(radio);
err:
        return retval;
}

/* USB subsystem interface */
static struct usb_driver usb_keene_driver = {
        .name                        = "radio-keene",
        .probe                        = usb_keene_probe,
        .disconnect                = usb_keene_disconnect,
        .id_table                = usb_keene_device_table,
        .suspend                = usb_keene_suspend,
        .resume                        = usb_keene_resume,
        .reset_resume                = usb_keene_resume,
};

module_usb_driver(usb_keene_driver);





























    1 



    1 























































































   61 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2008 IBM Corporation
 *
 * Authors:
 * Mimi Zohar <zohar@us.ibm.com>
 *
 * File: ima_iint.c
 *        - implements the IMA hook: ima_inode_free
 *        - cache integrity information in the inode security blob
 */
#include <linux/slab.h>

#include "ima.h"

static struct kmem_cache *ima_iint_cache __ro_after_init;

/**
 * ima_iint_find - Return the iint associated with an inode
 * @inode: Pointer to the inode
 *
 * Return the IMA integrity information (iint) associated with an inode, if the
 * inode was processed by IMA.
 *
 * Return: Found iint or NULL.
 */
struct ima_iint_cache *ima_iint_find(struct inode *inode)
{
        if (!IS_IMA(inode))
                return NULL;

        return ima_inode_get_iint(inode);
}

#define IMA_MAX_NESTING (FILESYSTEM_MAX_STACK_DEPTH + 1)

/*
 * It is not clear that IMA should be nested at all, but as long is it measures
 * files both on overlayfs and on underlying fs, we need to annotate the iint
 * mutex to avoid lockdep false positives related to IMA + overlayfs.
 * See ovl_lockdep_annotate_inode_mutex_key() for more details.
 */
static inline void ima_iint_lockdep_annotate(struct ima_iint_cache *iint,
                                             struct inode *inode)
{
#ifdef CONFIG_LOCKDEP
        static struct lock_class_key ima_iint_mutex_key[IMA_MAX_NESTING];

        int depth = inode->i_sb->s_stack_depth;

        if (WARN_ON_ONCE(depth < 0 || depth >= IMA_MAX_NESTING))
                depth = 0;

        lockdep_set_class(&iint->mutex, &ima_iint_mutex_key[depth]);
#endif
}

static void ima_iint_init_always(struct ima_iint_cache *iint,
                                 struct inode *inode)
{
        iint->ima_hash = NULL;
        iint->version = 0;
        iint->flags = 0UL;
        iint->atomic_flags = 0UL;
        iint->ima_file_status = INTEGRITY_UNKNOWN;
        iint->ima_mmap_status = INTEGRITY_UNKNOWN;
        iint->ima_bprm_status = INTEGRITY_UNKNOWN;
        iint->ima_read_status = INTEGRITY_UNKNOWN;
        iint->ima_creds_status = INTEGRITY_UNKNOWN;
        iint->measured_pcrs = 0;
        mutex_init(&iint->mutex);
        ima_iint_lockdep_annotate(iint, inode);
}

static void ima_iint_free(struct ima_iint_cache *iint)
{
        kfree(iint->ima_hash);
        mutex_destroy(&iint->mutex);
        kmem_cache_free(ima_iint_cache, iint);
}

/**
 * ima_inode_get - Find or allocate an iint associated with an inode
 * @inode: Pointer to the inode
 *
 * Find an iint associated with an inode, and allocate a new one if not found.
 * Caller must lock i_mutex.
 *
 * Return: An iint on success, NULL on error.
 */
struct ima_iint_cache *ima_inode_get(struct inode *inode)
{
        struct ima_iint_cache *iint;

        iint = ima_iint_find(inode);
        if (iint)
                return iint;

        iint = kmem_cache_alloc(ima_iint_cache, GFP_NOFS);
        if (!iint)
                return NULL;

        ima_iint_init_always(iint, inode);

        inode->i_flags |= S_IMA;
        ima_inode_set_iint(inode, iint);

        return iint;
}

/**
 * ima_inode_free - Called on inode free
 * @inode: Pointer to the inode
 *
 * Free the iint associated with an inode.
 */
void ima_inode_free(struct inode *inode)
{
        struct ima_iint_cache *iint;

        if (!IS_IMA(inode))
                return;

        iint = ima_iint_find(inode);
        ima_inode_set_iint(inode, NULL);

        ima_iint_free(iint);
}

static void ima_iint_init_once(void *foo)
{
        struct ima_iint_cache *iint = (struct ima_iint_cache *)foo;

        memset(iint, 0, sizeof(*iint));
}

void __init ima_iintcache_init(void)
{
        ima_iint_cache =
            kmem_cache_create("ima_iint_cache", sizeof(struct ima_iint_cache),
                              0, SLAB_PANIC, ima_iint_init_once);
}



































































































































































































































































































































































































































































































































































   18 


   18 

   18 

   18 

   18 
   18 









   12 

   12 

   12 
































































































































































   18 

   18 
   18 
   18 












   14 

    2 






    2 
    1 

    2 

    2 
    2 

    2 


    1 






















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Information interface for ALSA driver
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/init.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/module.h>
#include <sound/core.h>
#include <sound/minors.h>
#include <sound/info.h>
#include <linux/utsname.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>

int snd_info_check_reserved_words(const char *str)
{
        static const char * const reserved[] =
        {
                "version",
                "meminfo",
                "memdebug",
                "detect",
                "devices",
                "oss",
                "cards",
                "timers",
                "synth",
                "pcm",
                "seq",
                NULL
        };
        const char * const *xstr = reserved;

        while (*xstr) {
                if (!strcmp(*xstr, str))
                        return 0;
                xstr++;
        }
        if (!strncmp(str, "card", 4))
                return 0;
        return 1;
}

static DEFINE_MUTEX(info_mutex);

struct snd_info_private_data {
        struct snd_info_buffer *rbuffer;
        struct snd_info_buffer *wbuffer;
        struct snd_info_entry *entry;
        void *file_private_data;
};

static int snd_info_version_init(void);
static void snd_info_clear_entries(struct snd_info_entry *entry);

/*

 */

static struct snd_info_entry *snd_proc_root;
struct snd_info_entry *snd_seq_root;
EXPORT_SYMBOL(snd_seq_root);

#ifdef CONFIG_SND_OSSEMUL
struct snd_info_entry *snd_oss_root;
#endif

static int alloc_info_private(struct snd_info_entry *entry,
                              struct snd_info_private_data **ret)
{
        struct snd_info_private_data *data;

        if (!entry || !entry->p)
                return -ENODEV;
        if (!try_module_get(entry->module))
                return -EFAULT;
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data) {
                module_put(entry->module);
                return -ENOMEM;
        }
        data->entry = entry;
        *ret = data;
        return 0;
}

static bool valid_pos(loff_t pos, size_t count)
{
        if (pos < 0 || (long) pos != pos || (ssize_t) count < 0)
                return false;
        if ((unsigned long) pos + (unsigned long) count < (unsigned long) pos)
                return false;
        return true;
}

/*
 * file ops for binary proc files
 */
static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig)
{
        struct snd_info_private_data *data;
        struct snd_info_entry *entry;
        loff_t size;

        data = file->private_data;
        entry = data->entry;
        guard(mutex)(&entry->access);
        if (entry->c.ops->llseek)
                return entry->c.ops->llseek(entry,
                                            data->file_private_data,
                                            file, offset, orig);

        size = entry->size;
        switch (orig) {
        case SEEK_SET:
                break;
        case SEEK_CUR:
                offset += file->f_pos;
                break;
        case SEEK_END:
                if (!size)
                        return -EINVAL;
                offset += size;
                break;
        default:
                return -EINVAL;
        }
        if (offset < 0)
                return -EINVAL;
        if (size && offset > size)
                offset = size;
        file->f_pos = offset;
        return offset;
}

static ssize_t snd_info_entry_read(struct file *file, char __user *buffer,
                                   size_t count, loff_t * offset)
{
        struct snd_info_private_data *data = file->private_data;
        struct snd_info_entry *entry = data->entry;
        size_t size;
        loff_t pos;

        pos = *offset;
        if (!valid_pos(pos, count))
                return -EIO;
        if (pos >= entry->size)
                return 0;
        size = entry->size - pos;
        size = min(count, size);
        size = entry->c.ops->read(entry, data->file_private_data,
                                  file, buffer, size, pos);
        if ((ssize_t) size > 0)
                *offset = pos + size;
        return size;
}

static ssize_t snd_info_entry_write(struct file *file, const char __user *buffer,
                                    size_t count, loff_t * offset)
{
        struct snd_info_private_data *data = file->private_data;
        struct snd_info_entry *entry = data->entry;
        ssize_t size = 0;
        loff_t pos;

        pos = *offset;
        if (!valid_pos(pos, count))
                return -EIO;
        if (count > 0) {
                size_t maxsize = entry->size - pos;
                count = min(count, maxsize);
                size = entry->c.ops->write(entry, data->file_private_data,
                                           file, buffer, count, pos);
        }
        if (size > 0)
                *offset = pos + size;
        return size;
}

static __poll_t snd_info_entry_poll(struct file *file, poll_table *wait)
{
        struct snd_info_private_data *data = file->private_data;
        struct snd_info_entry *entry = data->entry;
        __poll_t mask = 0;

        if (entry->c.ops->poll)
                return entry->c.ops->poll(entry,
                                          data->file_private_data,
                                          file, wait);
        if (entry->c.ops->read)
                mask |= EPOLLIN | EPOLLRDNORM;
        if (entry->c.ops->write)
                mask |= EPOLLOUT | EPOLLWRNORM;
        return mask;
}

static long snd_info_entry_ioctl(struct file *file, unsigned int cmd,
                                unsigned long arg)
{
        struct snd_info_private_data *data = file->private_data;
        struct snd_info_entry *entry = data->entry;

        if (!entry->c.ops->ioctl)
                return -ENOTTY;
        return entry->c.ops->ioctl(entry, data->file_private_data,
                                   file, cmd, arg);
}

static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct inode *inode = file_inode(file);
        struct snd_info_private_data *data;
        struct snd_info_entry *entry;

        data = file->private_data;
        if (data == NULL)
                return 0;
        entry = data->entry;
        if (!entry->c.ops->mmap)
                return -ENXIO;
        return entry->c.ops->mmap(entry, data->file_private_data,
                                  inode, file, vma);
}

static int snd_info_entry_open(struct inode *inode, struct file *file)
{
        struct snd_info_entry *entry = pde_data(inode);
        struct snd_info_private_data *data;
        int mode, err;

        guard(mutex)(&info_mutex);
        err = alloc_info_private(entry, &data);
        if (err < 0)
                return err;

        mode = file->f_flags & O_ACCMODE;
        if (((mode == O_RDONLY || mode == O_RDWR) && !entry->c.ops->read) ||
            ((mode == O_WRONLY || mode == O_RDWR) && !entry->c.ops->write)) {
                err = -ENODEV;
                goto error;
        }

        if (entry->c.ops->open) {
                err = entry->c.ops->open(entry, mode, &data->file_private_data);
                if (err < 0)
                        goto error;
        }

        file->private_data = data;
        return 0;

 error:
        kfree(data);
        module_put(entry->module);
        return err;
}

static int snd_info_entry_release(struct inode *inode, struct file *file)
{
        struct snd_info_private_data *data = file->private_data;
        struct snd_info_entry *entry = data->entry;

        if (entry->c.ops->release)
                entry->c.ops->release(entry, file->f_flags & O_ACCMODE,
                                      data->file_private_data);
        module_put(entry->module);
        kfree(data);
        return 0;
}

static const struct proc_ops snd_info_entry_operations =
{
        .proc_lseek        = snd_info_entry_llseek,
        .proc_read        = snd_info_entry_read,
        .proc_write        = snd_info_entry_write,
        .proc_poll        = snd_info_entry_poll,
        .proc_ioctl        = snd_info_entry_ioctl,
        .proc_mmap        = snd_info_entry_mmap,
        .proc_open        = snd_info_entry_open,
        .proc_release        = snd_info_entry_release,
};

/*
 * file ops for text proc files
 */
static ssize_t snd_info_text_entry_write(struct file *file,
                                         const char __user *buffer,
                                         size_t count, loff_t *offset)
{
        struct seq_file *m = file->private_data;
        struct snd_info_private_data *data = m->private;
        struct snd_info_entry *entry = data->entry;
        struct snd_info_buffer *buf;
        loff_t pos;
        size_t next;

        if (!entry->c.text.write)
                return -EIO;
        pos = *offset;
        if (!valid_pos(pos, count))
                return -EIO;
        next = pos + count;
        /* don't handle too large text inputs */
        if (next > 16 * 1024)
                return -EIO;
        guard(mutex)(&entry->access);
        buf = data->wbuffer;
        if (!buf) {
                data->wbuffer = buf = kzalloc(sizeof(*buf), GFP_KERNEL);
                if (!buf)
                        return -ENOMEM;
        }
        if (next > buf->len) {
                char *nbuf = kvzalloc(PAGE_ALIGN(next), GFP_KERNEL);
                if (!nbuf)
                        return -ENOMEM;
                kvfree(buf->buffer);
                buf->buffer = nbuf;
                buf->len = PAGE_ALIGN(next);
        }
        if (copy_from_user(buf->buffer + pos, buffer, count))
                return -EFAULT;
        buf->size = next;
        *offset = next;
        return count;
}

static int snd_info_seq_show(struct seq_file *seq, void *p)
{
        struct snd_info_private_data *data = seq->private;
        struct snd_info_entry *entry = data->entry;

        if (!entry->c.text.read) {
                return -EIO;
        } else {
                data->rbuffer->buffer = (char *)seq; /* XXX hack! */
                entry->c.text.read(entry, data->rbuffer);
        }
        return 0;
}

static int snd_info_text_entry_open(struct inode *inode, struct file *file)
{
        struct snd_info_entry *entry = pde_data(inode);
        struct snd_info_private_data *data;
        int err;

        guard(mutex)(&info_mutex);
        err = alloc_info_private(entry, &data);
        if (err < 0)
                return err;

        data->rbuffer = kzalloc(sizeof(*data->rbuffer), GFP_KERNEL);
        if (!data->rbuffer) {
                err = -ENOMEM;
                goto error;
        }
        if (entry->size)
                err = single_open_size(file, snd_info_seq_show, data,
                                       entry->size);
        else
                err = single_open(file, snd_info_seq_show, data);
        if (err < 0)
                goto error;
        return 0;

 error:
        kfree(data->rbuffer);
        kfree(data);
        module_put(entry->module);
        return err;
}

static int snd_info_text_entry_release(struct inode *inode, struct file *file)
{
        struct seq_file *m = file->private_data;
        struct snd_info_private_data *data = m->private;
        struct snd_info_entry *entry = data->entry;

        if (data->wbuffer && entry->c.text.write)
                entry->c.text.write(entry, data->wbuffer);

        single_release(inode, file);
        kfree(data->rbuffer);
        if (data->wbuffer) {
                kvfree(data->wbuffer->buffer);
                kfree(data->wbuffer);
        }

        module_put(entry->module);
        kfree(data);
        return 0;
}

static const struct proc_ops snd_info_text_entry_ops =
{
        .proc_open        = snd_info_text_entry_open,
        .proc_release        = snd_info_text_entry_release,
        .proc_write        = snd_info_text_entry_write,
        .proc_lseek        = seq_lseek,
        .proc_read        = seq_read,
};

static struct snd_info_entry *create_subdir(struct module *mod,
                                            const char *name)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(mod, name, NULL);
        if (!entry)
                return NULL;
        entry->mode = S_IFDIR | 0555;
        if (snd_info_register(entry) < 0) {
                snd_info_free_entry(entry);
                return NULL;
        }
        return entry;
}

static struct snd_info_entry *
snd_info_create_entry(const char *name, struct snd_info_entry *parent,
                      struct module *module);

int __init snd_info_init(void)
{
        snd_proc_root = snd_info_create_entry("asound", NULL, THIS_MODULE);
        if (!snd_proc_root)
                return -ENOMEM;
        snd_proc_root->mode = S_IFDIR | 0555;
        snd_proc_root->p = proc_mkdir("asound", NULL);
        if (!snd_proc_root->p)
                goto error;
#ifdef CONFIG_SND_OSSEMUL
        snd_oss_root = create_subdir(THIS_MODULE, "oss");
        if (!snd_oss_root)
                goto error;
#endif
#if IS_ENABLED(CONFIG_SND_SEQUENCER)
        snd_seq_root = create_subdir(THIS_MODULE, "seq");
        if (!snd_seq_root)
                goto error;
#endif
        if (snd_info_version_init() < 0 ||
            snd_minor_info_init() < 0 ||
            snd_minor_info_oss_init() < 0 ||
            snd_card_info_init() < 0 ||
            snd_info_minor_register() < 0)
                goto error;
        return 0;

 error:
        snd_info_free_entry(snd_proc_root);
        return -ENOMEM;
}

int __exit snd_info_done(void)
{
        snd_info_free_entry(snd_proc_root);
        return 0;
}

static void snd_card_id_read(struct snd_info_entry *entry,
                             struct snd_info_buffer *buffer)
{
        struct snd_card *card = entry->private_data;

        snd_iprintf(buffer, "%s\n", card->id);
}

/*
 * create a card proc file
 * called from init.c
 */
int snd_info_card_create(struct snd_card *card)
{
        char str[8];
        struct snd_info_entry *entry;

        if (snd_BUG_ON(!card))
                return -ENXIO;

        sprintf(str, "card%i", card->number);
        entry = create_subdir(card->module, str);
        if (!entry)
                return -ENOMEM;
        card->proc_root = entry;

        return snd_card_ro_proc_new(card, "id", card, snd_card_id_read);
}

/*
 * register the card proc file
 * called from init.c
 * can be called multiple times for reinitialization
 */
int snd_info_card_register(struct snd_card *card)
{
        struct proc_dir_entry *p;
        int err;

        if (snd_BUG_ON(!card))
                return -ENXIO;

        err = snd_info_register(card->proc_root);
        if (err < 0)
                return err;

        if (!strcmp(card->id, card->proc_root->name))
                return 0;

        if (card->proc_root_link)
                return 0;
        p = proc_symlink(card->id, snd_proc_root->p, card->proc_root->name);
        if (!p)
                return -ENOMEM;
        card->proc_root_link = p;
        return 0;
}

/*
 * called on card->id change
 */
void snd_info_card_id_change(struct snd_card *card)
{
        guard(mutex)(&info_mutex);
        if (card->proc_root_link) {
                proc_remove(card->proc_root_link);
                card->proc_root_link = NULL;
        }
        if (strcmp(card->id, card->proc_root->name))
                card->proc_root_link = proc_symlink(card->id,
                                                    snd_proc_root->p,
                                                    card->proc_root->name);
}

/*
 * de-register the card proc file
 * called from init.c
 */
void snd_info_card_disconnect(struct snd_card *card)
{
        if (!card)
                return;

        proc_remove(card->proc_root_link);
        if (card->proc_root)
                proc_remove(card->proc_root->p);

        guard(mutex)(&info_mutex);
        if (card->proc_root)
                snd_info_clear_entries(card->proc_root);
        card->proc_root_link = NULL;
        card->proc_root = NULL;
}

/*
 * release the card proc file resources
 * called from init.c
 */
int snd_info_card_free(struct snd_card *card)
{
        if (!card)
                return 0;
        snd_info_free_entry(card->proc_root);
        card->proc_root = NULL;
        return 0;
}


/**
 * snd_info_get_line - read one line from the procfs buffer
 * @buffer: the procfs buffer
 * @line: the buffer to store
 * @len: the max. buffer size
 *
 * Reads one line from the buffer and stores the string.
 *
 * Return: Zero if successful, or 1 if error or EOF.
 */
int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len)
{
        int c;

        if (snd_BUG_ON(!buffer))
                return 1;
        if (!buffer->buffer)
                return 1;
        if (len <= 0 || buffer->stop || buffer->error)
                return 1;
        while (!buffer->stop) {
                c = buffer->buffer[buffer->curr++];
                if (buffer->curr >= buffer->size)
                        buffer->stop = 1;
                if (c == '\n')
                        break;
                if (len > 1) {
                        len--;
                        *line++ = c;
                }
        }
        *line = '\0';
        return 0;
}
EXPORT_SYMBOL(snd_info_get_line);

/**
 * snd_info_get_str - parse a string token
 * @dest: the buffer to store the string token
 * @src: the original string
 * @len: the max. length of token - 1
 *
 * Parses the original string and copy a token to the given
 * string buffer.
 *
 * Return: The updated pointer of the original string so that
 * it can be used for the next call.
 */
const char *snd_info_get_str(char *dest, const char *src, int len)
{
        int c;

        while (*src == ' ' || *src == '\t')
                src++;
        if (*src == '"' || *src == '\'') {
                c = *src++;
                while (--len > 0 && *src && *src != c) {
                        *dest++ = *src++;
                }
                if (*src == c)
                        src++;
        } else {
                while (--len > 0 && *src && *src != ' ' && *src != '\t') {
                        *dest++ = *src++;
                }
        }
        *dest = 0;
        while (*src == ' ' || *src == '\t')
                src++;
        return src;
}
EXPORT_SYMBOL(snd_info_get_str);

/*
 * snd_info_create_entry - create an info entry
 * @name: the proc file name
 * @parent: the parent directory
 *
 * Creates an info entry with the given file name and initializes as
 * the default state.
 *
 * Usually called from other functions such as
 * snd_info_create_card_entry().
 *
 * Return: The pointer of the new instance, or %NULL on failure.
 */
static struct snd_info_entry *
snd_info_create_entry(const char *name, struct snd_info_entry *parent,
                      struct module *module)
{
        struct snd_info_entry *entry;
        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
        if (entry == NULL)
                return NULL;
        entry->name = kstrdup(name, GFP_KERNEL);
        if (entry->name == NULL) {
                kfree(entry);
                return NULL;
        }
        entry->mode = S_IFREG | 0444;
        entry->content = SNDRV_INFO_CONTENT_TEXT;
        mutex_init(&entry->access);
        INIT_LIST_HEAD(&entry->children);
        INIT_LIST_HEAD(&entry->list);
        entry->parent = parent;
        entry->module = module;
        if (parent) {
                guard(mutex)(&parent->access);
                list_add_tail(&entry->list, &parent->children);
        }
        return entry;
}

/**
 * snd_info_create_module_entry - create an info entry for the given module
 * @module: the module pointer
 * @name: the file name
 * @parent: the parent directory
 *
 * Creates a new info entry and assigns it to the given module.
 *
 * Return: The pointer of the new instance, or %NULL on failure.
 */
struct snd_info_entry *snd_info_create_module_entry(struct module * module,
                                               const char *name,
                                               struct snd_info_entry *parent)
{
        if (!parent)
                parent = snd_proc_root;
        return snd_info_create_entry(name, parent, module);
}
EXPORT_SYMBOL(snd_info_create_module_entry);

/**
 * snd_info_create_card_entry - create an info entry for the given card
 * @card: the card instance
 * @name: the file name
 * @parent: the parent directory
 *
 * Creates a new info entry and assigns it to the given card.
 *
 * Return: The pointer of the new instance, or %NULL on failure.
 */
struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card,
                                             const char *name,
                                             struct snd_info_entry * parent)
{
        if (!parent)
                parent = card->proc_root;
        return snd_info_create_entry(name, parent, card->module);
}
EXPORT_SYMBOL(snd_info_create_card_entry);

static void snd_info_clear_entries(struct snd_info_entry *entry)
{
        struct snd_info_entry *p;

        if (!entry->p)
                return;
        list_for_each_entry(p, &entry->children, list)
                snd_info_clear_entries(p);
        entry->p = NULL;
}

/**
 * snd_info_free_entry - release the info entry
 * @entry: the info entry
 *
 * Releases the info entry.
 */
void snd_info_free_entry(struct snd_info_entry * entry)
{
        struct snd_info_entry *p, *n;

        if (!entry)
                return;
        if (entry->p) {
                proc_remove(entry->p);
                guard(mutex)(&info_mutex);
                snd_info_clear_entries(entry);
        }

        /* free all children at first */
        list_for_each_entry_safe(p, n, &entry->children, list)
                snd_info_free_entry(p);

        p = entry->parent;
        if (p) {
                guard(mutex)(&p->access);
                list_del(&entry->list);
        }
        kfree(entry->name);
        if (entry->private_free)
                entry->private_free(entry);
        kfree(entry);
}
EXPORT_SYMBOL(snd_info_free_entry);

static int __snd_info_register(struct snd_info_entry *entry)
{
        struct proc_dir_entry *root, *p = NULL;

        if (snd_BUG_ON(!entry))
                return -ENXIO;
        root = entry->parent == NULL ? snd_proc_root->p : entry->parent->p;
        guard(mutex)(&info_mutex);
        if (entry->p || !root)
                return 0;
        if (S_ISDIR(entry->mode)) {
                p = proc_mkdir_mode(entry->name, entry->mode, root);
                if (!p)
                        return -ENOMEM;
        } else {
                const struct proc_ops *ops;
                if (entry->content == SNDRV_INFO_CONTENT_DATA)
                        ops = &snd_info_entry_operations;
                else
                        ops = &snd_info_text_entry_ops;
                p = proc_create_data(entry->name, entry->mode, root,
                                     ops, entry);
                if (!p)
                        return -ENOMEM;
                proc_set_size(p, entry->size);
        }
        entry->p = p;
        return 0;
}

/**
 * snd_info_register - register the info entry
 * @entry: the info entry
 *
 * Registers the proc info entry.
 * The all children entries are registered recursively.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_info_register(struct snd_info_entry *entry)
{
        struct snd_info_entry *p;
        int err;

        if (!entry->p) {
                err = __snd_info_register(entry);
                if (err < 0)
                        return err;
        }

        list_for_each_entry(p, &entry->children, list) {
                err = snd_info_register(p);
                if (err < 0)
                        return err;
        }

        return 0;
}
EXPORT_SYMBOL(snd_info_register);

/**
 * snd_card_rw_proc_new - Create a read/write text proc file entry for the card
 * @card: the card instance
 * @name: the file name
 * @private_data: the arbitrary private data
 * @read: the read callback
 * @write: the write callback, NULL for read-only
 *
 * This proc file entry will be registered via snd_card_register() call, and
 * it will be removed automatically at the card removal, too.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_card_rw_proc_new(struct snd_card *card, const char *name,
                         void *private_data,
                         void (*read)(struct snd_info_entry *,
                                      struct snd_info_buffer *),
                         void (*write)(struct snd_info_entry *entry,
                                       struct snd_info_buffer *buffer))
{
        struct snd_info_entry *entry;

        entry = snd_info_create_card_entry(card, name, card->proc_root);
        if (!entry)
                return -ENOMEM;
        snd_info_set_text_ops(entry, private_data, read);
        if (write) {
                entry->mode |= 0200;
                entry->c.text.write = write;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(snd_card_rw_proc_new);

/*

 */

static void snd_info_version_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer)
{
        snd_iprintf(buffer,
                    "Advanced Linux Sound Architecture Driver Version k%s.\n",
                    init_utsname()->release);
}

static int __init snd_info_version_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "version", NULL);
        if (entry == NULL)
                return -ENOMEM;
        entry->c.text.read = snd_info_version_read;
        return snd_info_register(entry); /* freed in error path */
}
























   83 










   29 

  229 

  111 








  229 



































































































































































   38 
    3 


















  132 
  132 

  132 


  131 
  132 


  132 
   39 

   38 






















  148 




   10 


    2 





  147 

    9 






  145 



  132 
    2 





  131 
    1 



    4 



  132 













  131 




















   93 







   93 

























  105 
  102 




   98 






  131 
    4 





    1 








  131 
   92 
   91 





  131 








  122 
   98 











  128 


   17 










  128 
   27 
   21 






   30 





  131 
   43 

  122 


    3 




   25 












   59 


   58 
















  227 







  227 
   10 
  226 
  224 



  226 






    9 
   10 
    1 






  225 
  226 




  163 

   67 

  106 

  226 






    5 





  221 

  136 
  141 






  225 
  155 


  149 

   62 

  147 

   62 



  154 
   59 



  221 


    4 





















  233 



  233 












    4 









  233 
  229 

  230 
    8 






  229 
   25 





  228 




  227 
    2 





  227 

    2 

    1 






  226 
   15 





  225 
   15 


   15 
   11 
   11 
   11 
  226 




  164 




    1 






    1 





    1 



  163 

    4 




  205 
  233 


   38 


  192 
    2 
  193 


  226 
  226 
  226 


  226 
   20 




  226 
  226 








  226 



  225 






  233 
  229 

    4 

    3 

  229 




  227 

  223 


  223 




  226 
  219 
  218 
  219 
  219 



  219 
   13 














   75 


   66 
   67 
   66 

   67 



   67 
   66 


   57 
   59 
   59 



   65 












  233 







    4 




  229 
    7 



  229 
  232 



  233 




  233 






  233 


    1 






  233 






  232 









  233 


  232 







  233 
   12 




  232 

  233 








  215 
  215 

  215 




   71 
    9 

















   26 













   25 
   16 
   11 


   11 
   26 


   15 






   15 




   15 




   15 



    1 






   14 





   11 
    3 


   10 

    7 
    4 



   10 
    2 



    8 

    1 



    3 




    1 


    1 


    2 











   10 


   14 




    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
// SPDX-License-Identifier: GPL-2.0
/*
 * Released under the GPLv2 only.
 */

#include <linux/usb.h>
#include <linux/usb/ch9.h>
#include <linux/usb/hcd.h>
#include <linux/usb/quirks.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <asm/byteorder.h>
#include "usb.h"


#define USB_MAXALTSETTING                128        /* Hard limit */

#define USB_MAXCONFIG                        8        /* Arbitrary limit */


static inline const char *plural(int n)
{
        return (n == 1 ? "" : "s");
}

static int find_next_descriptor(unsigned char *buffer, int size,
    int dt1, int dt2, int *num_skipped)
{
        struct usb_descriptor_header *h;
        int n = 0;
        unsigned char *buffer0 = buffer;

        /* Find the next descriptor of type dt1 or dt2 */
        while (size > 0) {
                h = (struct usb_descriptor_header *) buffer;
                if (h->bDescriptorType == dt1 || h->bDescriptorType == dt2)
                        break;
                buffer += h->bLength;
                size -= h->bLength;
                ++n;
        }

        /* Store the number of descriptors skipped and return the
         * number of bytes skipped */
        if (num_skipped)
                *num_skipped = n;
        return buffer - buffer0;
}

static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev,
                int cfgno, int inum, int asnum, struct usb_host_endpoint *ep,
                unsigned char *buffer, int size)
{
        struct usb_ssp_isoc_ep_comp_descriptor *desc;

        /*
         * The SuperSpeedPlus Isoc endpoint companion descriptor immediately
         * follows the SuperSpeed Endpoint Companion descriptor
         */
        desc = (struct usb_ssp_isoc_ep_comp_descriptor *) buffer;
        if (desc->bDescriptorType != USB_DT_SSP_ISOC_ENDPOINT_COMP ||
            size < USB_DT_SSP_ISOC_EP_COMP_SIZE) {
                dev_notice(ddev, "Invalid SuperSpeedPlus isoc endpoint companion"
                         "for config %d interface %d altsetting %d ep %d.\n",
                         cfgno, inum, asnum, ep->desc.bEndpointAddress);
                return;
        }
        memcpy(&ep->ssp_isoc_ep_comp, desc, USB_DT_SSP_ISOC_EP_COMP_SIZE);
}

static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
                int inum, int asnum, struct usb_host_endpoint *ep,
                unsigned char *buffer, int size)
{
        struct usb_ss_ep_comp_descriptor *desc;
        int max_tx;

        /* The SuperSpeed endpoint companion descriptor is supposed to
         * be the first thing immediately following the endpoint descriptor.
         */
        desc = (struct usb_ss_ep_comp_descriptor *) buffer;

        if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP ||
                        size < USB_DT_SS_EP_COMP_SIZE) {
                dev_notice(ddev, "No SuperSpeed endpoint companion for config %d "
                                " interface %d altsetting %d ep %d: "
                                "using minimum values\n",
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);

                /* Fill in some default values.
                 * Leave bmAttributes as zero, which will mean no streams for
                 * bulk, and isoc won't support multiple bursts of packets.
                 * With bursts of only one packet, and a Mult of 1, the max
                 * amount of data moved per endpoint service interval is one
                 * packet.
                 */
                ep->ss_ep_comp.bLength = USB_DT_SS_EP_COMP_SIZE;
                ep->ss_ep_comp.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
                if (usb_endpoint_xfer_isoc(&ep->desc) ||
                                usb_endpoint_xfer_int(&ep->desc))
                        ep->ss_ep_comp.wBytesPerInterval =
                                        ep->desc.wMaxPacketSize;
                return;
        }
        buffer += desc->bLength;
        size -= desc->bLength;
        memcpy(&ep->ss_ep_comp, desc, USB_DT_SS_EP_COMP_SIZE);

        /* Check the various values */
        if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) {
                dev_notice(ddev, "Control endpoint with bMaxBurst = %d in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to zero\n", desc->bMaxBurst,
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);
                ep->ss_ep_comp.bMaxBurst = 0;
        } else if (desc->bMaxBurst > 15) {
                dev_notice(ddev, "Endpoint with bMaxBurst = %d in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to 15\n", desc->bMaxBurst,
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);
                ep->ss_ep_comp.bMaxBurst = 15;
        }

        if ((usb_endpoint_xfer_control(&ep->desc) ||
                        usb_endpoint_xfer_int(&ep->desc)) &&
                                desc->bmAttributes != 0) {
                dev_notice(ddev, "%s endpoint with bmAttributes = %d in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to zero\n",
                                usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk",
                                desc->bmAttributes,
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);
                ep->ss_ep_comp.bmAttributes = 0;
        } else if (usb_endpoint_xfer_bulk(&ep->desc) &&
                        desc->bmAttributes > 16) {
                dev_notice(ddev, "Bulk endpoint with more than 65536 streams in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to max\n",
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);
                ep->ss_ep_comp.bmAttributes = 16;
        } else if (usb_endpoint_xfer_isoc(&ep->desc) &&
                   !USB_SS_SSP_ISOC_COMP(desc->bmAttributes) &&
                   USB_SS_MULT(desc->bmAttributes) > 3) {
                dev_notice(ddev, "Isoc endpoint has Mult of %d in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to 3\n",
                                USB_SS_MULT(desc->bmAttributes),
                                cfgno, inum, asnum, ep->desc.bEndpointAddress);
                ep->ss_ep_comp.bmAttributes = 2;
        }

        if (usb_endpoint_xfer_isoc(&ep->desc))
                max_tx = (desc->bMaxBurst + 1) *
                        (USB_SS_MULT(desc->bmAttributes)) *
                        usb_endpoint_maxp(&ep->desc);
        else if (usb_endpoint_xfer_int(&ep->desc))
                max_tx = usb_endpoint_maxp(&ep->desc) *
                        (desc->bMaxBurst + 1);
        else
                max_tx = 999999;
        if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) {
                dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in "
                                "config %d interface %d altsetting %d ep %d: "
                                "setting to %d\n",
                                usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int",
                                le16_to_cpu(desc->wBytesPerInterval),
                                cfgno, inum, asnum, ep->desc.bEndpointAddress,
                                max_tx);
                ep->ss_ep_comp.wBytesPerInterval = cpu_to_le16(max_tx);
        }
        /* Parse a possible SuperSpeedPlus isoc ep companion descriptor */
        if (usb_endpoint_xfer_isoc(&ep->desc) &&
            USB_SS_SSP_ISOC_COMP(desc->bmAttributes))
                usb_parse_ssp_isoc_endpoint_companion(ddev, cfgno, inum, asnum,
                                                        ep, buffer, size);
}

static const unsigned short low_speed_maxpacket_maxes[4] = {
        [USB_ENDPOINT_XFER_CONTROL] = 8,
        [USB_ENDPOINT_XFER_ISOC] = 0,
        [USB_ENDPOINT_XFER_BULK] = 0,
        [USB_ENDPOINT_XFER_INT] = 8,
};
static const unsigned short full_speed_maxpacket_maxes[4] = {
        [USB_ENDPOINT_XFER_CONTROL] = 64,
        [USB_ENDPOINT_XFER_ISOC] = 1023,
        [USB_ENDPOINT_XFER_BULK] = 64,
        [USB_ENDPOINT_XFER_INT] = 64,
};
static const unsigned short high_speed_maxpacket_maxes[4] = {
        [USB_ENDPOINT_XFER_CONTROL] = 64,
        [USB_ENDPOINT_XFER_ISOC] = 1024,

        /* Bulk should be 512, but some devices use 1024: we will warn below */
        [USB_ENDPOINT_XFER_BULK] = 1024,
        [USB_ENDPOINT_XFER_INT] = 1024,
};
static const unsigned short super_speed_maxpacket_maxes[4] = {
        [USB_ENDPOINT_XFER_CONTROL] = 512,
        [USB_ENDPOINT_XFER_ISOC] = 1024,
        [USB_ENDPOINT_XFER_BULK] = 1024,
        [USB_ENDPOINT_XFER_INT] = 1024,
};

static bool endpoint_is_duplicate(struct usb_endpoint_descriptor *e1,
                struct usb_endpoint_descriptor *e2)
{
        if (e1->bEndpointAddress == e2->bEndpointAddress)
                return true;

        if (usb_endpoint_xfer_control(e1) || usb_endpoint_xfer_control(e2)) {
                if (usb_endpoint_num(e1) == usb_endpoint_num(e2))
                        return true;
        }

        return false;
}

/*
 * Check for duplicate endpoint addresses in other interfaces and in the
 * altsetting currently being parsed.
 */
static bool config_endpoint_is_duplicate(struct usb_host_config *config,
                int inum, int asnum, struct usb_endpoint_descriptor *d)
{
        struct usb_endpoint_descriptor *epd;
        struct usb_interface_cache *intfc;
        struct usb_host_interface *alt;
        int i, j, k;

        for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                intfc = config->intf_cache[i];

                for (j = 0; j < intfc->num_altsetting; ++j) {
                        alt = &intfc->altsetting[j];

                        if (alt->desc.bInterfaceNumber == inum &&
                                        alt->desc.bAlternateSetting != asnum)
                                continue;

                        for (k = 0; k < alt->desc.bNumEndpoints; ++k) {
                                epd = &alt->endpoint[k].desc;

                                if (endpoint_is_duplicate(epd, d))
                                        return true;
                        }
                }
        }

        return false;
}

static int usb_parse_endpoint(struct device *ddev, int cfgno,
                struct usb_host_config *config, int inum, int asnum,
                struct usb_host_interface *ifp, int num_ep,
                unsigned char *buffer, int size)
{
        struct usb_device *udev = to_usb_device(ddev);
        unsigned char *buffer0 = buffer;
        struct usb_endpoint_descriptor *d;
        struct usb_host_endpoint *endpoint;
        int n, i, j, retval;
        unsigned int maxp;
        const unsigned short *maxpacket_maxes;

        d = (struct usb_endpoint_descriptor *) buffer;
        buffer += d->bLength;
        size -= d->bLength;

        if (d->bLength >= USB_DT_ENDPOINT_AUDIO_SIZE)
                n = USB_DT_ENDPOINT_AUDIO_SIZE;
        else if (d->bLength >= USB_DT_ENDPOINT_SIZE)
                n = USB_DT_ENDPOINT_SIZE;
        else {
                dev_notice(ddev, "config %d interface %d altsetting %d has an "
                    "invalid endpoint descriptor of length %d, skipping\n",
                    cfgno, inum, asnum, d->bLength);
                goto skip_to_next_endpoint_or_interface_descriptor;
        }

        i = d->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
        if (i == 0) {
                dev_notice(ddev, "config %d interface %d altsetting %d has an "
                    "invalid descriptor for endpoint zero, skipping\n",
                    cfgno, inum, asnum);
                goto skip_to_next_endpoint_or_interface_descriptor;
        }

        /* Only store as many endpoints as we have room for */
        if (ifp->desc.bNumEndpoints >= num_ep)
                goto skip_to_next_endpoint_or_interface_descriptor;

        /* Check for duplicate endpoint addresses */
        if (config_endpoint_is_duplicate(config, inum, asnum, d)) {
                dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n",
                                cfgno, inum, asnum, d->bEndpointAddress);
                goto skip_to_next_endpoint_or_interface_descriptor;
        }

        /* Ignore some endpoints */
        if (udev->quirks & USB_QUIRK_ENDPOINT_IGNORE) {
                if (usb_endpoint_is_ignored(udev, ifp, d)) {
                        dev_notice(ddev, "config %d interface %d altsetting %d has an ignored endpoint with address 0x%X, skipping\n",
                                        cfgno, inum, asnum,
                                        d->bEndpointAddress);
                        goto skip_to_next_endpoint_or_interface_descriptor;
                }
        }

        endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints];
        ++ifp->desc.bNumEndpoints;

        memcpy(&endpoint->desc, d, n);
        INIT_LIST_HEAD(&endpoint->urb_list);

        /*
         * Fix up bInterval values outside the legal range.
         * Use 10 or 8 ms if no proper value can be guessed.
         */
        i = 0;                /* i = min, j = max, n = default */
        j = 255;
        if (usb_endpoint_xfer_int(d)) {
                i = 1;
                switch (udev->speed) {
                case USB_SPEED_SUPER_PLUS:
                case USB_SPEED_SUPER:
                case USB_SPEED_HIGH:
                        /*
                         * Many device manufacturers are using full-speed
                         * bInterval values in high-speed interrupt endpoint
                         * descriptors. Try to fix those and fall back to an
                         * 8-ms default value otherwise.
                         */
                        n = fls(d->bInterval*8);
                        if (n == 0)
                                n = 7;        /* 8 ms = 2^(7-1) uframes */
                        j = 16;

                        /*
                         * Adjust bInterval for quirked devices.
                         */
                        /*
                         * This quirk fixes bIntervals reported in ms.
                         */
                        if (udev->quirks & USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
                                n = clamp(fls(d->bInterval) + 3, i, j);
                                i = j = n;
                        }
                        /*
                         * This quirk fixes bIntervals reported in
                         * linear microframes.
                         */
                        if (udev->quirks & USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) {
                                n = clamp(fls(d->bInterval), i, j);
                                i = j = n;
                        }
                        break;
                default:                /* USB_SPEED_FULL or _LOW */
                        /*
                         * For low-speed, 10 ms is the official minimum.
                         * But some "overclocked" devices might want faster
                         * polling so we'll allow it.
                         */
                        n = 10;
                        break;
                }
        } else if (usb_endpoint_xfer_isoc(d)) {
                i = 1;
                j = 16;
                switch (udev->speed) {
                case USB_SPEED_HIGH:
                        n = 7;                /* 8 ms = 2^(7-1) uframes */
                        break;
                default:                /* USB_SPEED_FULL */
                        n = 4;                /* 8 ms = 2^(4-1) frames */
                        break;
                }
        }
        if (d->bInterval < i || d->bInterval > j) {
                dev_notice(ddev, "config %d interface %d altsetting %d "
                    "endpoint 0x%X has an invalid bInterval %d, "
                    "changing to %d\n",
                    cfgno, inum, asnum,
                    d->bEndpointAddress, d->bInterval, n);
                endpoint->desc.bInterval = n;
        }

        /* Some buggy low-speed devices have Bulk endpoints, which is
         * explicitly forbidden by the USB spec.  In an attempt to make
         * them usable, we will try treating them as Interrupt endpoints.
         */
        if (udev->speed == USB_SPEED_LOW && usb_endpoint_xfer_bulk(d)) {
                dev_notice(ddev, "config %d interface %d altsetting %d "
                    "endpoint 0x%X is Bulk; changing to Interrupt\n",
                    cfgno, inum, asnum, d->bEndpointAddress);
                endpoint->desc.bmAttributes = USB_ENDPOINT_XFER_INT;
                endpoint->desc.bInterval = 1;
                if (usb_endpoint_maxp(&endpoint->desc) > 8)
                        endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
        }

        /*
         * Validate the wMaxPacketSize field.
         * Some devices have isochronous endpoints in altsetting 0;
         * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0
         * (see the end of section 5.6.3), so don't warn about them.
         */
        maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize);
        if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) {
                dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n",
                    cfgno, inum, asnum, d->bEndpointAddress);
        }

        /* Find the highest legal maxpacket size for this endpoint */
        i = 0;                /* additional transactions per microframe */
        switch (udev->speed) {
        case USB_SPEED_LOW:
                maxpacket_maxes = low_speed_maxpacket_maxes;
                break;
        case USB_SPEED_FULL:
                maxpacket_maxes = full_speed_maxpacket_maxes;
                break;
        case USB_SPEED_HIGH:
                /* Multiple-transactions bits are allowed only for HS periodic endpoints */
                if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
                        i = maxp & USB_EP_MAXP_MULT_MASK;
                        maxp &= ~i;
                }
                fallthrough;
        default:
                maxpacket_maxes = high_speed_maxpacket_maxes;
                break;
        case USB_SPEED_SUPER:
        case USB_SPEED_SUPER_PLUS:
                maxpacket_maxes = super_speed_maxpacket_maxes;
                break;
        }
        j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];

        if (maxp > j) {
                dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
                    cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
                maxp = j;
                endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
        }

        /*
         * Some buggy high speed devices have bulk endpoints using
         * maxpacket sizes other than 512.  High speed HCDs may not
         * be able to handle that particular bug, so let's warn...
         */
        if (udev->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) {
                if (maxp != 512)
                        dev_notice(ddev, "config %d interface %d altsetting %d "
                                "bulk endpoint 0x%X has invalid maxpacket %d\n",
                                cfgno, inum, asnum, d->bEndpointAddress,
                                maxp);
        }

        /* Parse a possible SuperSpeed endpoint companion descriptor */
        if (udev->speed >= USB_SPEED_SUPER)
                usb_parse_ss_endpoint_companion(ddev, cfgno,
                                inum, asnum, endpoint, buffer, size);

        /* Skip over any Class Specific or Vendor Specific descriptors;
         * find the next endpoint or interface descriptor */
        endpoint->extra = buffer;
        i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
                        USB_DT_INTERFACE, &n);
        endpoint->extralen = i;
        retval = buffer - buffer0 + i;
        if (n > 0)
                dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
                    n, plural(n), "endpoint");
        return retval;

skip_to_next_endpoint_or_interface_descriptor:
        i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
            USB_DT_INTERFACE, NULL);
        return buffer - buffer0 + i;
}

void usb_release_interface_cache(struct kref *ref)
{
        struct usb_interface_cache *intfc = ref_to_usb_interface_cache(ref);
        int j;

        for (j = 0; j < intfc->num_altsetting; j++) {
                struct usb_host_interface *alt = &intfc->altsetting[j];

                kfree(alt->endpoint);
                kfree(alt->string);
        }
        kfree(intfc);
}

static int usb_parse_interface(struct device *ddev, int cfgno,
    struct usb_host_config *config, unsigned char *buffer, int size,
    u8 inums[], u8 nalts[])
{
        unsigned char *buffer0 = buffer;
        struct usb_interface_descriptor        *d;
        int inum, asnum;
        struct usb_interface_cache *intfc;
        struct usb_host_interface *alt;
        int i, n;
        int len, retval;
        int num_ep, num_ep_orig;

        d = (struct usb_interface_descriptor *) buffer;
        buffer += d->bLength;
        size -= d->bLength;

        if (d->bLength < USB_DT_INTERFACE_SIZE)
                goto skip_to_next_interface_descriptor;

        /* Which interface entry is this? */
        intfc = NULL;
        inum = d->bInterfaceNumber;
        for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                if (inums[i] == inum) {
                        intfc = config->intf_cache[i];
                        break;
                }
        }
        if (!intfc || intfc->num_altsetting >= nalts[i])
                goto skip_to_next_interface_descriptor;

        /* Check for duplicate altsetting entries */
        asnum = d->bAlternateSetting;
        for ((i = 0, alt = &intfc->altsetting[0]);
              i < intfc->num_altsetting;
             (++i, ++alt)) {
                if (alt->desc.bAlternateSetting == asnum) {
                        dev_notice(ddev, "Duplicate descriptor for config %d "
                            "interface %d altsetting %d, skipping\n",
                            cfgno, inum, asnum);
                        goto skip_to_next_interface_descriptor;
                }
        }

        ++intfc->num_altsetting;
        memcpy(&alt->desc, d, USB_DT_INTERFACE_SIZE);

        /* Skip over any Class Specific or Vendor Specific descriptors;
         * find the first endpoint or interface descriptor */
        alt->extra = buffer;
        i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
            USB_DT_INTERFACE, &n);
        alt->extralen = i;
        if (n > 0)
                dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
                    n, plural(n), "interface");
        buffer += i;
        size -= i;

        /* Allocate space for the right(?) number of endpoints */
        num_ep = num_ep_orig = alt->desc.bNumEndpoints;
        alt->desc.bNumEndpoints = 0;                /* Use as a counter */
        if (num_ep > USB_MAXENDPOINTS) {
                dev_notice(ddev, "too many endpoints for config %d interface %d "
                    "altsetting %d: %d, using maximum allowed: %d\n",
                    cfgno, inum, asnum, num_ep, USB_MAXENDPOINTS);
                num_ep = USB_MAXENDPOINTS;
        }

        if (num_ep > 0) {
                /* Can't allocate 0 bytes */
                len = sizeof(struct usb_host_endpoint) * num_ep;
                alt->endpoint = kzalloc(len, GFP_KERNEL);
                if (!alt->endpoint)
                        return -ENOMEM;
        }

        /* Parse all the endpoint descriptors */
        n = 0;
        while (size > 0) {
                if (((struct usb_descriptor_header *) buffer)->bDescriptorType
                     == USB_DT_INTERFACE)
                        break;
                retval = usb_parse_endpoint(ddev, cfgno, config, inum, asnum,
                                alt, num_ep, buffer, size);
                if (retval < 0)
                        return retval;
                ++n;

                buffer += retval;
                size -= retval;
        }

        if (n != num_ep_orig)
                dev_notice(ddev, "config %d interface %d altsetting %d has %d "
                    "endpoint descriptor%s, different from the interface "
                    "descriptor's value: %d\n",
                    cfgno, inum, asnum, n, plural(n), num_ep_orig);
        return buffer - buffer0;

skip_to_next_interface_descriptor:
        i = find_next_descriptor(buffer, size, USB_DT_INTERFACE,
            USB_DT_INTERFACE, NULL);
        return buffer - buffer0 + i;
}

static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
    struct usb_host_config *config, unsigned char *buffer, int size)
{
        struct device *ddev = &dev->dev;
        unsigned char *buffer0 = buffer;
        int cfgno;
        int nintf, nintf_orig;
        int i, j, n;
        struct usb_interface_cache *intfc;
        unsigned char *buffer2;
        int size2;
        struct usb_descriptor_header *header;
        int retval;
        u8 inums[USB_MAXINTERFACES], nalts[USB_MAXINTERFACES];
        unsigned iad_num = 0;

        memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
        nintf = nintf_orig = config->desc.bNumInterfaces;
        config->desc.bNumInterfaces = 0;        // Adjusted later

        if (config->desc.bDescriptorType != USB_DT_CONFIG ||
            config->desc.bLength < USB_DT_CONFIG_SIZE ||
            config->desc.bLength > size) {
                dev_notice(ddev, "invalid descriptor for config index %d: "
                    "type = 0x%X, length = %d\n", cfgidx,
                    config->desc.bDescriptorType, config->desc.bLength);
                return -EINVAL;
        }
        cfgno = config->desc.bConfigurationValue;

        buffer += config->desc.bLength;
        size -= config->desc.bLength;

        if (nintf > USB_MAXINTERFACES) {
                dev_notice(ddev, "config %d has too many interfaces: %d, "
                    "using maximum allowed: %d\n",
                    cfgno, nintf, USB_MAXINTERFACES);
                nintf = USB_MAXINTERFACES;
        }

        /* Go through the descriptors, checking their length and counting the
         * number of altsettings for each interface */
        n = 0;
        for ((buffer2 = buffer, size2 = size);
              size2 > 0;
             (buffer2 += header->bLength, size2 -= header->bLength)) {

                if (size2 < sizeof(struct usb_descriptor_header)) {
                        dev_notice(ddev, "config %d descriptor has %d excess "
                            "byte%s, ignoring\n",
                            cfgno, size2, plural(size2));
                        break;
                }

                header = (struct usb_descriptor_header *) buffer2;
                if ((header->bLength > size2) || (header->bLength < 2)) {
                        dev_notice(ddev, "config %d has an invalid descriptor "
                            "of length %d, skipping remainder of the config\n",
                            cfgno, header->bLength);
                        break;
                }

                if (header->bDescriptorType == USB_DT_INTERFACE) {
                        struct usb_interface_descriptor *d;
                        int inum;

                        d = (struct usb_interface_descriptor *) header;
                        if (d->bLength < USB_DT_INTERFACE_SIZE) {
                                dev_notice(ddev, "config %d has an invalid "
                                    "interface descriptor of length %d, "
                                    "skipping\n", cfgno, d->bLength);
                                continue;
                        }

                        inum = d->bInterfaceNumber;

                        if ((dev->quirks & USB_QUIRK_HONOR_BNUMINTERFACES) &&
                            n >= nintf_orig) {
                                dev_notice(ddev, "config %d has more interface "
                                    "descriptors, than it declares in "
                                    "bNumInterfaces, ignoring interface "
                                    "number: %d\n", cfgno, inum);
                                continue;
                        }

                        if (inum >= nintf_orig)
                                dev_notice(ddev, "config %d has an invalid "
                                    "interface number: %d but max is %d\n",
                                    cfgno, inum, nintf_orig - 1);

                        /* Have we already encountered this interface?
                         * Count its altsettings */
                        for (i = 0; i < n; ++i) {
                                if (inums[i] == inum)
                                        break;
                        }
                        if (i < n) {
                                if (nalts[i] < 255)
                                        ++nalts[i];
                        } else if (n < USB_MAXINTERFACES) {
                                inums[n] = inum;
                                nalts[n] = 1;
                                ++n;
                        }

                } else if (header->bDescriptorType ==
                                USB_DT_INTERFACE_ASSOCIATION) {
                        struct usb_interface_assoc_descriptor *d;

                        d = (struct usb_interface_assoc_descriptor *)header;
                        if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) {
                                dev_notice(ddev,
                                         "config %d has an invalid interface association descriptor of length %d, skipping\n",
                                         cfgno, d->bLength);
                                continue;
                        }

                        if (iad_num == USB_MAXIADS) {
                                dev_notice(ddev, "found more Interface "
                                               "Association Descriptors "
                                               "than allocated for in "
                                               "configuration %d\n", cfgno);
                        } else {
                                config->intf_assoc[iad_num] = d;
                                iad_num++;
                        }

                } else if (header->bDescriptorType == USB_DT_DEVICE ||
                            header->bDescriptorType == USB_DT_CONFIG)
                        dev_notice(ddev, "config %d contains an unexpected "
                            "descriptor of type 0x%X, skipping\n",
                            cfgno, header->bDescriptorType);

        }        /* for ((buffer2 = buffer, size2 = size); ...) */
        size = buffer2 - buffer;
        config->desc.wTotalLength = cpu_to_le16(buffer2 - buffer0);

        if (n != nintf)
                dev_notice(ddev, "config %d has %d interface%s, different from "
                    "the descriptor's value: %d\n",
                    cfgno, n, plural(n), nintf_orig);
        else if (n == 0)
                dev_notice(ddev, "config %d has no interfaces?\n", cfgno);
        config->desc.bNumInterfaces = nintf = n;

        /* Check for missing interface numbers */
        for (i = 0; i < nintf; ++i) {
                for (j = 0; j < nintf; ++j) {
                        if (inums[j] == i)
                                break;
                }
                if (j >= nintf)
                        dev_notice(ddev, "config %d has no interface number "
                            "%d\n", cfgno, i);
        }

        /* Allocate the usb_interface_caches and altsetting arrays */
        for (i = 0; i < nintf; ++i) {
                j = nalts[i];
                if (j > USB_MAXALTSETTING) {
                        dev_notice(ddev, "too many alternate settings for "
                            "config %d interface %d: %d, "
                            "using maximum allowed: %d\n",
                            cfgno, inums[i], j, USB_MAXALTSETTING);
                        nalts[i] = j = USB_MAXALTSETTING;
                }

                intfc = kzalloc(struct_size(intfc, altsetting, j), GFP_KERNEL);
                config->intf_cache[i] = intfc;
                if (!intfc)
                        return -ENOMEM;
                kref_init(&intfc->ref);
        }

        /* FIXME: parse the BOS descriptor */

        /* Skip over any Class Specific or Vendor Specific descriptors;
         * find the first interface descriptor */
        config->extra = buffer;
        i = find_next_descriptor(buffer, size, USB_DT_INTERFACE,
            USB_DT_INTERFACE, &n);
        config->extralen = i;
        if (n > 0)
                dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
                    n, plural(n), "configuration");
        buffer += i;
        size -= i;

        /* Parse all the interface/altsetting descriptors */
        while (size > 0) {
                retval = usb_parse_interface(ddev, cfgno, config,
                    buffer, size, inums, nalts);
                if (retval < 0)
                        return retval;

                buffer += retval;
                size -= retval;
        }

        /* Check for missing altsettings */
        for (i = 0; i < nintf; ++i) {
                intfc = config->intf_cache[i];
                for (j = 0; j < intfc->num_altsetting; ++j) {
                        for (n = 0; n < intfc->num_altsetting; ++n) {
                                if (intfc->altsetting[n].desc.
                                    bAlternateSetting == j)
                                        break;
                        }
                        if (n >= intfc->num_altsetting)
                                dev_notice(ddev, "config %d interface %d has no "
                                    "altsetting %d\n", cfgno, inums[i], j);
                }
        }

        return 0;
}

/* hub-only!! ... and only exported for reset/reinit path.
 * otherwise used internally on disconnect/destroy path
 */
void usb_destroy_configuration(struct usb_device *dev)
{
        int c, i;

        if (!dev->config)
                return;

        if (dev->rawdescriptors) {
                for (i = 0; i < dev->descriptor.bNumConfigurations; i++)
                        kfree(dev->rawdescriptors[i]);

                kfree(dev->rawdescriptors);
                dev->rawdescriptors = NULL;
        }

        for (c = 0; c < dev->descriptor.bNumConfigurations; c++) {
                struct usb_host_config *cf = &dev->config[c];

                kfree(cf->string);
                for (i = 0; i < cf->desc.bNumInterfaces; i++) {
                        if (cf->intf_cache[i])
                                kref_put(&cf->intf_cache[i]->ref,
                                          usb_release_interface_cache);
                }
        }
        kfree(dev->config);
        dev->config = NULL;
}


/*
 * Get the USB config descriptors, cache and parse'em
 *
 * hub-only!! ... and only in reset path, or usb_new_device()
 * (used by real hubs and virtual root hubs)
 */
int usb_get_configuration(struct usb_device *dev)
{
        struct device *ddev = &dev->dev;
        int ncfg = dev->descriptor.bNumConfigurations;
        unsigned int cfgno, length;
        unsigned char *bigbuffer;
        struct usb_config_descriptor *desc;
        int result;

        if (ncfg > USB_MAXCONFIG) {
                dev_notice(ddev, "too many configurations: %d, "
                    "using maximum allowed: %d\n", ncfg, USB_MAXCONFIG);
                dev->descriptor.bNumConfigurations = ncfg = USB_MAXCONFIG;
        }

        if (ncfg < 1) {
                dev_err(ddev, "no configurations\n");
                return -EINVAL;
        }

        length = ncfg * sizeof(struct usb_host_config);
        dev->config = kzalloc(length, GFP_KERNEL);
        if (!dev->config)
                return -ENOMEM;

        length = ncfg * sizeof(char *);
        dev->rawdescriptors = kzalloc(length, GFP_KERNEL);
        if (!dev->rawdescriptors)
                return -ENOMEM;

        desc = kmalloc(USB_DT_CONFIG_SIZE, GFP_KERNEL);
        if (!desc)
                return -ENOMEM;

        for (cfgno = 0; cfgno < ncfg; cfgno++) {
                /* We grab just the first descriptor so we know how long
                 * the whole configuration is */
                result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno,
                    desc, USB_DT_CONFIG_SIZE);
                if (result < 0) {
                        dev_err(ddev, "unable to read config index %d "
                            "descriptor/%s: %d\n", cfgno, "start", result);
                        if (result != -EPIPE)
                                goto err;
                        dev_notice(ddev, "chopping to %d config(s)\n", cfgno);
                        dev->descriptor.bNumConfigurations = cfgno;
                        break;
                } else if (result < 4) {
                        dev_err(ddev, "config index %d descriptor too short "
                            "(expected %i, got %i)\n", cfgno,
                            USB_DT_CONFIG_SIZE, result);
                        result = -EINVAL;
                        goto err;
                }
                length = max((int) le16_to_cpu(desc->wTotalLength),
                    USB_DT_CONFIG_SIZE);

                /* Now that we know the length, get the whole thing */
                bigbuffer = kmalloc(length, GFP_KERNEL);
                if (!bigbuffer) {
                        result = -ENOMEM;
                        goto err;
                }

                if (dev->quirks & USB_QUIRK_DELAY_INIT)
                        msleep(200);

                result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno,
                    bigbuffer, length);
                if (result < 0) {
                        dev_err(ddev, "unable to read config index %d "
                            "descriptor/%s\n", cfgno, "all");
                        kfree(bigbuffer);
                        goto err;
                }
                if (result < length) {
                        dev_notice(ddev, "config index %d descriptor too short "
                            "(expected %i, got %i)\n", cfgno, length, result);
                        length = result;
                }

                dev->rawdescriptors[cfgno] = bigbuffer;

                result = usb_parse_configuration(dev, cfgno,
                    &dev->config[cfgno], bigbuffer, length);
                if (result < 0) {
                        ++cfgno;
                        goto err;
                }
        }

err:
        kfree(desc);
        dev->descriptor.bNumConfigurations = cfgno;

        return result;
}

void usb_release_bos_descriptor(struct usb_device *dev)
{
        if (dev->bos) {
                kfree(dev->bos->desc);
                kfree(dev->bos);
                dev->bos = NULL;
        }
}

static const __u8 bos_desc_len[256] = {
        [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE,
        [USB_CAP_TYPE_EXT]          = USB_DT_USB_EXT_CAP_SIZE,
        [USB_SS_CAP_TYPE]           = USB_DT_USB_SS_CAP_SIZE,
        [USB_SSP_CAP_TYPE]          = USB_DT_USB_SSP_CAP_SIZE(1),
        [CONTAINER_ID_TYPE]         = USB_DT_USB_SS_CONTN_ID_SIZE,
        [USB_PTM_CAP_TYPE]          = USB_DT_USB_PTM_ID_SIZE,
};

/* Get BOS descriptor set */
int usb_get_bos_descriptor(struct usb_device *dev)
{
        struct device *ddev = &dev->dev;
        struct usb_bos_descriptor *bos;
        struct usb_dev_cap_header *cap;
        struct usb_ssp_cap_descriptor *ssp_cap;
        unsigned char *buffer, *buffer0;
        int length, total_len, num, i, ssac;
        __u8 cap_type;
        int ret;

        bos = kzalloc(sizeof(*bos), GFP_KERNEL);
        if (!bos)
                return -ENOMEM;

        /* Get BOS descriptor */
        ret = usb_get_descriptor(dev, USB_DT_BOS, 0, bos, USB_DT_BOS_SIZE);
        if (ret < USB_DT_BOS_SIZE || bos->bLength < USB_DT_BOS_SIZE) {
                dev_notice(ddev, "unable to get BOS descriptor or descriptor too short\n");
                if (ret >= 0)
                        ret = -ENOMSG;
                kfree(bos);
                return ret;
        }

        length = bos->bLength;
        total_len = le16_to_cpu(bos->wTotalLength);
        num = bos->bNumDeviceCaps;
        kfree(bos);
        if (total_len < length)
                return -EINVAL;

        dev->bos = kzalloc(sizeof(*dev->bos), GFP_KERNEL);
        if (!dev->bos)
                return -ENOMEM;

        /* Now let's get the whole BOS descriptor set */
        buffer = kzalloc(total_len, GFP_KERNEL);
        if (!buffer) {
                ret = -ENOMEM;
                goto err;
        }
        dev->bos->desc = (struct usb_bos_descriptor *)buffer;

        ret = usb_get_descriptor(dev, USB_DT_BOS, 0, buffer, total_len);
        if (ret < total_len) {
                dev_notice(ddev, "unable to get BOS descriptor set\n");
                if (ret >= 0)
                        ret = -ENOMSG;
                goto err;
        }

        buffer0 = buffer;
        total_len -= length;
        buffer += length;

        for (i = 0; i < num; i++) {
                cap = (struct usb_dev_cap_header *)buffer;

                if (total_len < sizeof(*cap) || total_len < cap->bLength) {
                        dev->bos->desc->bNumDeviceCaps = i;
                        break;
                }
                cap_type = cap->bDevCapabilityType;
                length = cap->bLength;
                if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) {
                        dev->bos->desc->bNumDeviceCaps = i;
                        break;
                }

                if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
                        dev_notice(ddev, "descriptor type invalid, skip\n");
                        goto skip_to_next_descriptor;
                }

                switch (cap_type) {
                case USB_CAP_TYPE_EXT:
                        dev->bos->ext_cap =
                                (struct usb_ext_cap_descriptor *)buffer;
                        break;
                case USB_SS_CAP_TYPE:
                        dev->bos->ss_cap =
                                (struct usb_ss_cap_descriptor *)buffer;
                        break;
                case USB_SSP_CAP_TYPE:
                        ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
                        ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
                                USB_SSP_SUBLINK_SPEED_ATTRIBS);
                        if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
                                dev->bos->ssp_cap = ssp_cap;
                        break;
                case CONTAINER_ID_TYPE:
                        dev->bos->ss_id =
                                (struct usb_ss_container_id_descriptor *)buffer;
                        break;
                case USB_PTM_CAP_TYPE:
                        dev->bos->ptm_cap =
                                (struct usb_ptm_cap_descriptor *)buffer;
                        break;
                default:
                        break;
                }

skip_to_next_descriptor:
                total_len -= length;
                buffer += length;
        }
        dev->bos->desc->wTotalLength = cpu_to_le16(buffer - buffer0);

        return 0;

err:
        usb_release_bos_descriptor(dev);
        return ret;
}

























































































































































    1 
    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>
#include <linux/hid-debug.h>
#include <linux/input.h>
#include "hid-ids.h"

#include <linux/fb.h>
#include <linux/vmalloc.h>
#include <linux/backlight.h>
#include <linux/lcd.h>

#include <linux/leds.h>

#include <linux/seq_file.h>
#include <linux/debugfs.h>

#include <linux/completion.h>
#include <linux/uaccess.h>
#include <linux/module.h>

#include "hid-picolcd.h"


void picolcd_leds_set(struct picolcd_data *data)
{
        struct hid_report *report;
        unsigned long flags;

        if (!data->led[0])
                return;
        report = picolcd_out_report(REPORT_LED_STATE, data->hdev);
        if (!report || report->maxfield != 1 || report->field[0]->report_count != 1)
                return;

        spin_lock_irqsave(&data->lock, flags);
        hid_set_field(report->field[0], 0, data->led_state);
        if (!(data->status & PICOLCD_FAILED))
                hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);
}

static void picolcd_led_set_brightness(struct led_classdev *led_cdev,
                        enum led_brightness value)
{
        struct device *dev;
        struct hid_device *hdev;
        struct picolcd_data *data;
        int i, state = 0;

        dev  = led_cdev->dev->parent;
        hdev = to_hid_device(dev);
        data = hid_get_drvdata(hdev);
        if (!data)
                return;
        for (i = 0; i < 8; i++) {
                if (led_cdev != data->led[i])
                        continue;
                state = (data->led_state >> i) & 1;
                if (value == LED_OFF && state) {
                        data->led_state &= ~(1 << i);
                        picolcd_leds_set(data);
                } else if (value != LED_OFF && !state) {
                        data->led_state |= 1 << i;
                        picolcd_leds_set(data);
                }
                break;
        }
}

static enum led_brightness picolcd_led_get_brightness(struct led_classdev *led_cdev)
{
        struct device *dev;
        struct hid_device *hdev;
        struct picolcd_data *data;
        int i, value = 0;

        dev  = led_cdev->dev->parent;
        hdev = to_hid_device(dev);
        data = hid_get_drvdata(hdev);
        for (i = 0; i < 8; i++)
                if (led_cdev == data->led[i]) {
                        value = (data->led_state >> i) & 1;
                        break;
                }
        return value ? LED_FULL : LED_OFF;
}

int picolcd_init_leds(struct picolcd_data *data, struct hid_report *report)
{
        struct device *dev = &data->hdev->dev;
        struct led_classdev *led;
        size_t name_sz = strlen(dev_name(dev)) + 8;
        char *name;
        int i, ret = 0;

        if (!report)
                return -ENODEV;
        if (report->maxfield != 1 || report->field[0]->report_count != 1 ||
                        report->field[0]->report_size != 8) {
                dev_err(dev, "unsupported LED_STATE report");
                return -EINVAL;
        }

        for (i = 0; i < 8; i++) {
                led = kzalloc(sizeof(struct led_classdev)+name_sz, GFP_KERNEL);
                if (!led) {
                        dev_err(dev, "can't allocate memory for LED %d\n", i);
                        ret = -ENOMEM;
                        goto err;
                }
                name = (void *)(&led[1]);
                snprintf(name, name_sz, "%s::GPO%d", dev_name(dev), i);
                led->name = name;
                led->brightness = 0;
                led->max_brightness = 1;
                led->brightness_get = picolcd_led_get_brightness;
                led->brightness_set = picolcd_led_set_brightness;

                data->led[i] = led;
                ret = led_classdev_register(dev, data->led[i]);
                if (ret) {
                        data->led[i] = NULL;
                        kfree(led);
                        dev_err(dev, "can't register LED %d\n", i);
                        goto err;
                }
        }
        return 0;
err:
        for (i = 0; i < 8; i++)
                if (data->led[i]) {
                        led = data->led[i];
                        data->led[i] = NULL;
                        led_classdev_unregister(led);
                        kfree(led);
                }
        return ret;
}

void picolcd_exit_leds(struct picolcd_data *data)
{
        struct led_classdev *led;
        int i;

        for (i = 0; i < 8; i++) {
                led = data->led[i];
                data->led[i] = NULL;
                if (!led)
                        continue;
                led_classdev_unregister(led);
                kfree(led);
        }
}































































































































































































































































   61 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 










    8 






































































   13 
















   14 





































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/fs/locks.c
 *
 * We implement four types of file locks: BSD locks, posix locks, open
 * file description locks, and leases.  For details about BSD locks,
 * see the flock(2) man page; for details about the other three, see
 * fcntl(2).
 *
 *
 * Locking conflicts and dependencies:
 * If multiple threads attempt to lock the same byte (or flock the same file)
 * only one can be granted the lock, and other must wait their turn.
 * The first lock has been "applied" or "granted", the others are "waiting"
 * and are "blocked" by the "applied" lock..
 *
 * Waiting and applied locks are all kept in trees whose properties are:
 *
 *        - the root of a tree may be an applied or waiting lock.
 *        - every other node in the tree is a waiting lock that
 *          conflicts with every ancestor of that node.
 *
 * Every such tree begins life as a waiting singleton which obviously
 * satisfies the above properties.
 *
 * The only ways we modify trees preserve these properties:
 *
 *        1. We may add a new leaf node, but only after first verifying that it
 *           conflicts with all of its ancestors.
 *        2. We may remove the root of a tree, creating a new singleton
 *           tree from the root and N new trees rooted in the immediate
 *           children.
 *        3. If the root of a tree is not currently an applied lock, we may
 *           apply it (if possible).
 *        4. We may upgrade the root of the tree (either extend its range,
 *           or upgrade its entire range from read to write).
 *
 * When an applied lock is modified in a way that reduces or downgrades any
 * part of its range, we remove all its children (2 above).  This particularly
 * happens when a lock is unlocked.
 *
 * For each of those child trees we "wake up" the thread which is
 * waiting for the lock so it can continue handling as follows: if the
 * root of the tree applies, we do so (3).  If it doesn't, it must
 * conflict with some applied lock.  We remove (wake up) all of its children
 * (2), and add it is a new leaf to the tree rooted in the applied
 * lock (1).  We then repeat the process recursively with those
 * children.
 *
 */
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/filelock.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/time.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
#include <linux/sysctl.h>

#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>

#include <linux/uaccess.h>

static struct file_lock *file_lock(struct file_lock_core *flc)
{
        return container_of(flc, struct file_lock, c);
}

static struct file_lease *file_lease(struct file_lock_core *flc)
{
        return container_of(flc, struct file_lease, c);
}

static bool lease_breaking(struct file_lease *fl)
{
        return fl->c.flc_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
}

static int target_leasetype(struct file_lease *fl)
{
        if (fl->c.flc_flags & FL_UNLOCK_PENDING)
                return F_UNLCK;
        if (fl->c.flc_flags & FL_DOWNGRADE_PENDING)
                return F_RDLCK;
        return fl->c.flc_type;
}

static int leases_enable = 1;
static int lease_break_time = 45;

#ifdef CONFIG_SYSCTL
static struct ctl_table locks_sysctls[] = {
        {
                .procname        = "leases-enable",
                .data                = &leases_enable,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#ifdef CONFIG_MMU
        {
                .procname        = "lease-break-time",
                .data                = &lease_break_time,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
#endif /* CONFIG_MMU */
};

static int __init init_fs_locks_sysctls(void)
{
        register_sysctl_init("fs", locks_sysctls);
        return 0;
}
early_initcall(init_fs_locks_sysctls);
#endif /* CONFIG_SYSCTL */

/*
 * The global file_lock_list is only used for displaying /proc/locks, so we
 * keep a list on each CPU, with each list protected by its own spinlock.
 * Global serialization is done using file_rwsem.
 *
 * Note that alterations to the list also require that the relevant flc_lock is
 * held.
 */
struct file_lock_list_struct {
        spinlock_t                lock;
        struct hlist_head        hlist;
};
static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);


/*
 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
 * It is protected by blocked_lock_lock.
 *
 * We hash locks by lockowner in order to optimize searching for the lock a
 * particular lockowner is waiting on.
 *
 * FIXME: make this value scale via some heuristic? We generally will want more
 * buckets when we have more lockowners holding locks, but that's a little
 * difficult to determine without knowing what the workload will look like.
 */
#define BLOCKED_HASH_BITS        7
static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);

/*
 * This lock protects the blocked_hash. Generally, if you're accessing it, you
 * want to be holding this lock.
 *
 * In addition, it also protects the fl->fl_blocked_requests list, and the
 * fl->fl_blocker pointer for file_lock structures that are acting as lock
 * requests (in contrast to those that are acting as records of acquired locks).
 *
 * Note that when we acquire this lock in order to change the above fields,
 * we often hold the flc_lock as well. In certain cases, when reading the fields
 * protected by this lock, we can skip acquiring it iff we already hold the
 * flc_lock.
 */
static DEFINE_SPINLOCK(blocked_lock_lock);

static struct kmem_cache *flctx_cache __ro_after_init;
static struct kmem_cache *filelock_cache __ro_after_init;
static struct kmem_cache *filelease_cache __ro_after_init;

static struct file_lock_context *
locks_get_lock_context(struct inode *inode, int type)
{
        struct file_lock_context *ctx;

        /* paired with cmpxchg() below */
        ctx = locks_inode_context(inode);
        if (likely(ctx) || type == F_UNLCK)
                goto out;

        ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
        if (!ctx)
                goto out;

        spin_lock_init(&ctx->flc_lock);
        INIT_LIST_HEAD(&ctx->flc_flock);
        INIT_LIST_HEAD(&ctx->flc_posix);
        INIT_LIST_HEAD(&ctx->flc_lease);

        /*
         * Assign the pointer if it's not already assigned. If it is, then
         * free the context we just allocated.
         */
        if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
                kmem_cache_free(flctx_cache, ctx);
                ctx = locks_inode_context(inode);
        }
out:
        trace_locks_get_lock_context(inode, type, ctx);
        return ctx;
}

static void
locks_dump_ctx_list(struct list_head *list, char *list_type)
{
        struct file_lock_core *flc;

        list_for_each_entry(flc, list, flc_list)
                pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
                        list_type, flc->flc_owner, flc->flc_flags,
                        flc->flc_type, flc->flc_pid);
}

static void
locks_check_ctx_lists(struct inode *inode)
{
        struct file_lock_context *ctx = inode->i_flctx;

        if (unlikely(!list_empty(&ctx->flc_flock) ||
                     !list_empty(&ctx->flc_posix) ||
                     !list_empty(&ctx->flc_lease))) {
                pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
                        MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
                        inode->i_ino);
                locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
                locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
                locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
        }
}

static void
locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_type)
{
        struct file_lock_core *flc;
        struct inode *inode = file_inode(filp);

        list_for_each_entry(flc, list, flc_list)
                if (flc->flc_file == filp)
                        pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
                                " fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
                                list_type, MAJOR(inode->i_sb->s_dev),
                                MINOR(inode->i_sb->s_dev), inode->i_ino,
                                flc->flc_owner, flc->flc_flags,
                                flc->flc_type, flc->flc_pid);
}

void
locks_free_lock_context(struct inode *inode)
{
        struct file_lock_context *ctx = locks_inode_context(inode);

        if (unlikely(ctx)) {
                locks_check_ctx_lists(inode);
                kmem_cache_free(flctx_cache, ctx);
        }
}

static void locks_init_lock_heads(struct file_lock_core *flc)
{
        INIT_HLIST_NODE(&flc->flc_link);
        INIT_LIST_HEAD(&flc->flc_list);
        INIT_LIST_HEAD(&flc->flc_blocked_requests);
        INIT_LIST_HEAD(&flc->flc_blocked_member);
        init_waitqueue_head(&flc->flc_wait);
}

/* Allocate an empty lock structure. */
struct file_lock *locks_alloc_lock(void)
{
        struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);

        if (fl)
                locks_init_lock_heads(&fl->c);

        return fl;
}
EXPORT_SYMBOL_GPL(locks_alloc_lock);

/* Allocate an empty lock structure. */
struct file_lease *locks_alloc_lease(void)
{
        struct file_lease *fl = kmem_cache_zalloc(filelease_cache, GFP_KERNEL);

        if (fl)
                locks_init_lock_heads(&fl->c);

        return fl;
}
EXPORT_SYMBOL_GPL(locks_alloc_lease);

void locks_release_private(struct file_lock *fl)
{
        struct file_lock_core *flc = &fl->c;

        BUG_ON(waitqueue_active(&flc->flc_wait));
        BUG_ON(!list_empty(&flc->flc_list));
        BUG_ON(!list_empty(&flc->flc_blocked_requests));
        BUG_ON(!list_empty(&flc->flc_blocked_member));
        BUG_ON(!hlist_unhashed(&flc->flc_link));

        if (fl->fl_ops) {
                if (fl->fl_ops->fl_release_private)
                        fl->fl_ops->fl_release_private(fl);
                fl->fl_ops = NULL;
        }

        if (fl->fl_lmops) {
                if (fl->fl_lmops->lm_put_owner) {
                        fl->fl_lmops->lm_put_owner(flc->flc_owner);
                        flc->flc_owner = NULL;
                }
                fl->fl_lmops = NULL;
        }
}
EXPORT_SYMBOL_GPL(locks_release_private);

/**
 * locks_owner_has_blockers - Check for blocking lock requests
 * @flctx: file lock context
 * @owner: lock owner
 *
 * Return values:
 *   %true: @owner has at least one blocker
 *   %false: @owner has no blockers
 */
bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner)
{
        struct file_lock_core *flc;

        spin_lock(&flctx->flc_lock);
        list_for_each_entry(flc, &flctx->flc_posix, flc_list) {
                if (flc->flc_owner != owner)
                        continue;
                if (!list_empty(&flc->flc_blocked_requests)) {
                        spin_unlock(&flctx->flc_lock);
                        return true;
                }
        }
        spin_unlock(&flctx->flc_lock);
        return false;
}
EXPORT_SYMBOL_GPL(locks_owner_has_blockers);

/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
        locks_release_private(fl);
        kmem_cache_free(filelock_cache, fl);
}
EXPORT_SYMBOL(locks_free_lock);

/* Free a lease which is not in use. */
void locks_free_lease(struct file_lease *fl)
{
        kmem_cache_free(filelease_cache, fl);
}
EXPORT_SYMBOL(locks_free_lease);

static void
locks_dispose_list(struct list_head *dispose)
{
        struct file_lock_core *flc;

        while (!list_empty(dispose)) {
                flc = list_first_entry(dispose, struct file_lock_core, flc_list);
                list_del_init(&flc->flc_list);
                if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
                        locks_free_lease(file_lease(flc));
                else
                        locks_free_lock(file_lock(flc));
        }
}

void locks_init_lock(struct file_lock *fl)
{
        memset(fl, 0, sizeof(struct file_lock));
        locks_init_lock_heads(&fl->c);
}
EXPORT_SYMBOL(locks_init_lock);

void locks_init_lease(struct file_lease *fl)
{
        memset(fl, 0, sizeof(*fl));
        locks_init_lock_heads(&fl->c);
}
EXPORT_SYMBOL(locks_init_lease);

/*
 * Initialize a new lock from an existing file_lock structure.
 */
void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
        new->c.flc_owner = fl->c.flc_owner;
        new->c.flc_pid = fl->c.flc_pid;
        new->c.flc_file = NULL;
        new->c.flc_flags = fl->c.flc_flags;
        new->c.flc_type = fl->c.flc_type;
        new->fl_start = fl->fl_start;
        new->fl_end = fl->fl_end;
        new->fl_lmops = fl->fl_lmops;
        new->fl_ops = NULL;

        if (fl->fl_lmops) {
                if (fl->fl_lmops->lm_get_owner)
                        fl->fl_lmops->lm_get_owner(fl->c.flc_owner);
        }
}
EXPORT_SYMBOL(locks_copy_conflock);

void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
        /* "new" must be a freshly-initialized lock */
        WARN_ON_ONCE(new->fl_ops);

        locks_copy_conflock(new, fl);

        new->c.flc_file = fl->c.flc_file;
        new->fl_ops = fl->fl_ops;

        if (fl->fl_ops) {
                if (fl->fl_ops->fl_copy_lock)
                        fl->fl_ops->fl_copy_lock(new, fl);
        }
}
EXPORT_SYMBOL(locks_copy_lock);

static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
{
        struct file_lock *f;

        /*
         * As ctx->flc_lock is held, new requests cannot be added to
         * ->flc_blocked_requests, so we don't need a lock to check if it
         * is empty.
         */
        if (list_empty(&fl->c.flc_blocked_requests))
                return;
        spin_lock(&blocked_lock_lock);
        list_splice_init(&fl->c.flc_blocked_requests,
                         &new->c.flc_blocked_requests);
        list_for_each_entry(f, &new->c.flc_blocked_requests,
                            c.flc_blocked_member)
                f->c.flc_blocker = &new->c;
        spin_unlock(&blocked_lock_lock);
}

static inline int flock_translate_cmd(int cmd) {
        switch (cmd) {
        case LOCK_SH:
                return F_RDLCK;
        case LOCK_EX:
                return F_WRLCK;
        case LOCK_UN:
                return F_UNLCK;
        }
        return -EINVAL;
}

/* Fill in a file_lock structure with an appropriate FLOCK lock. */
static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
        locks_init_lock(fl);

        fl->c.flc_file = filp;
        fl->c.flc_owner = filp;
        fl->c.flc_pid = current->tgid;
        fl->c.flc_flags = FL_FLOCK;
        fl->c.flc_type = type;
        fl->fl_end = OFFSET_MAX;
}

static int assign_type(struct file_lock_core *flc, int type)
{
        switch (type) {
        case F_RDLCK:
        case F_WRLCK:
        case F_UNLCK:
                flc->flc_type = type;
                break;
        default:
                return -EINVAL;
        }
        return 0;
}

static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
                                 struct flock64 *l)
{
        switch (l->l_whence) {
        case SEEK_SET:
                fl->fl_start = 0;
                break;
        case SEEK_CUR:
                fl->fl_start = filp->f_pos;
                break;
        case SEEK_END:
                fl->fl_start = i_size_read(file_inode(filp));
                break;
        default:
                return -EINVAL;
        }
        if (l->l_start > OFFSET_MAX - fl->fl_start)
                return -EOVERFLOW;
        fl->fl_start += l->l_start;
        if (fl->fl_start < 0)
                return -EINVAL;

        /* POSIX-1996 leaves the case l->l_len < 0 undefined;
           POSIX-2001 defines it. */
        if (l->l_len > 0) {
                if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
                        return -EOVERFLOW;
                fl->fl_end = fl->fl_start + (l->l_len - 1);

        } else if (l->l_len < 0) {
                if (fl->fl_start + l->l_len < 0)
                        return -EINVAL;
                fl->fl_end = fl->fl_start - 1;
                fl->fl_start += l->l_len;
        } else
                fl->fl_end = OFFSET_MAX;

        fl->c.flc_owner = current->files;
        fl->c.flc_pid = current->tgid;
        fl->c.flc_file = filp;
        fl->c.flc_flags = FL_POSIX;
        fl->fl_ops = NULL;
        fl->fl_lmops = NULL;

        return assign_type(&fl->c, l->l_type);
}

/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 * style lock.
 */
static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
                               struct flock *l)
{
        struct flock64 ll = {
                .l_type = l->l_type,
                .l_whence = l->l_whence,
                .l_start = l->l_start,
                .l_len = l->l_len,
        };

        return flock64_to_posix_lock(filp, fl, &ll);
}

/* default lease lock manager operations */
static bool
lease_break_callback(struct file_lease *fl)
{
        kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
        return false;
}

static void
lease_setup(struct file_lease *fl, void **priv)
{
        struct file *filp = fl->c.flc_file;
        struct fasync_struct *fa = *priv;

        /*
         * fasync_insert_entry() returns the old entry if any. If there was no
         * old entry, then it used "priv" and inserted it into the fasync list.
         * Clear the pointer to indicate that it shouldn't be freed.
         */
        if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
                *priv = NULL;

        __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
}

static const struct lease_manager_operations lease_manager_ops = {
        .lm_break = lease_break_callback,
        .lm_change = lease_modify,
        .lm_setup = lease_setup,
};

/*
 * Initialize a lease, use the default lock manager operations
 */
static int lease_init(struct file *filp, int type, struct file_lease *fl)
{
        if (assign_type(&fl->c, type) != 0)
                return -EINVAL;

        fl->c.flc_owner = filp;
        fl->c.flc_pid = current->tgid;

        fl->c.flc_file = filp;
        fl->c.flc_flags = FL_LEASE;
        fl->fl_lmops = &lease_manager_ops;
        return 0;
}

/* Allocate a file_lock initialised to this type of lease */
static struct file_lease *lease_alloc(struct file *filp, int type)
{
        struct file_lease *fl = locks_alloc_lease();
        int error = -ENOMEM;

        if (fl == NULL)
                return ERR_PTR(error);

        error = lease_init(filp, type, fl);
        if (error) {
                locks_free_lease(fl);
                return ERR_PTR(error);
        }
        return fl;
}

/* Check if two locks overlap each other.
 */
static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
{
        return ((fl1->fl_end >= fl2->fl_start) &&
                (fl2->fl_end >= fl1->fl_start));
}

/*
 * Check whether two locks have the same owner.
 */
static int posix_same_owner(struct file_lock_core *fl1, struct file_lock_core *fl2)
{
        return fl1->flc_owner == fl2->flc_owner;
}

/* Must be called with the flc_lock held! */
static void locks_insert_global_locks(struct file_lock_core *flc)
{
        struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);

        percpu_rwsem_assert_held(&file_rwsem);

        spin_lock(&fll->lock);
        flc->flc_link_cpu = smp_processor_id();
        hlist_add_head(&flc->flc_link, &fll->hlist);
        spin_unlock(&fll->lock);
}

/* Must be called with the flc_lock held! */
static void locks_delete_global_locks(struct file_lock_core *flc)
{
        struct file_lock_list_struct *fll;

        percpu_rwsem_assert_held(&file_rwsem);

        /*
         * Avoid taking lock if already unhashed. This is safe since this check
         * is done while holding the flc_lock, and new insertions into the list
         * also require that it be held.
         */
        if (hlist_unhashed(&flc->flc_link))
                return;

        fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu);
        spin_lock(&fll->lock);
        hlist_del_init(&flc->flc_link);
        spin_unlock(&fll->lock);
}

static unsigned long
posix_owner_key(struct file_lock_core *flc)
{
        return (unsigned long) flc->flc_owner;
}

static void locks_insert_global_blocked(struct file_lock_core *waiter)
{
        lockdep_assert_held(&blocked_lock_lock);

        hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter));
}

static void locks_delete_global_blocked(struct file_lock_core *waiter)
{
        lockdep_assert_held(&blocked_lock_lock);

        hash_del(&waiter->flc_link);
}

/* Remove waiter from blocker's block list.
 * When blocker ends up pointing to itself then the list is empty.
 *
 * Must be called with blocked_lock_lock held.
 */
static void __locks_unlink_block(struct file_lock_core *waiter)
{
        locks_delete_global_blocked(waiter);
        list_del_init(&waiter->flc_blocked_member);
}

static void __locks_wake_up_blocks(struct file_lock_core *blocker)
{
        while (!list_empty(&blocker->flc_blocked_requests)) {
                struct file_lock_core *waiter;
                struct file_lock *fl;

                waiter = list_first_entry(&blocker->flc_blocked_requests,
                                          struct file_lock_core, flc_blocked_member);

                fl = file_lock(waiter);
                __locks_unlink_block(waiter);
                if ((waiter->flc_flags & (FL_POSIX | FL_FLOCK)) &&
                    fl->fl_lmops && fl->fl_lmops->lm_notify)
                        fl->fl_lmops->lm_notify(fl);
                else
                        locks_wake_up(fl);

                /*
                 * The setting of flc_blocker to NULL marks the "done"
                 * point in deleting a block. Paired with acquire at the top
                 * of locks_delete_block().
                 */
                smp_store_release(&waiter->flc_blocker, NULL);
        }
}

static int __locks_delete_block(struct file_lock_core *waiter)
{
        int status = -ENOENT;

        /*
         * If fl_blocker is NULL, it won't be set again as this thread "owns"
         * the lock and is the only one that might try to claim the lock.
         *
         * We use acquire/release to manage fl_blocker so that we can
         * optimize away taking the blocked_lock_lock in many cases.
         *
         * The smp_load_acquire guarantees two things:
         *
         * 1/ that fl_blocked_requests can be tested locklessly. If something
         * was recently added to that list it must have been in a locked region
         * *before* the locked region when fl_blocker was set to NULL.
         *
         * 2/ that no other thread is accessing 'waiter', so it is safe to free
         * it.  __locks_wake_up_blocks is careful not to touch waiter after
         * fl_blocker is released.
         *
         * If a lockless check of fl_blocker shows it to be NULL, we know that
         * no new locks can be inserted into its fl_blocked_requests list, and
         * can avoid doing anything further if the list is empty.
         */
        if (!smp_load_acquire(&waiter->flc_blocker) &&
            list_empty(&waiter->flc_blocked_requests))
                return status;

        spin_lock(&blocked_lock_lock);
        if (waiter->flc_blocker)
                status = 0;
        __locks_wake_up_blocks(waiter);
        __locks_unlink_block(waiter);

        /*
         * The setting of fl_blocker to NULL marks the "done" point in deleting
         * a block. Paired with acquire at the top of this function.
         */
        smp_store_release(&waiter->flc_blocker, NULL);
        spin_unlock(&blocked_lock_lock);
        return status;
}

/**
 *        locks_delete_block - stop waiting for a file lock
 *        @waiter: the lock which was waiting
 *
 *        lockd/nfsd need to disconnect the lock while working on it.
 */
int locks_delete_block(struct file_lock *waiter)
{
        return __locks_delete_block(&waiter->c);
}
EXPORT_SYMBOL(locks_delete_block);

/* Insert waiter into blocker's block list.
 * We use a circular list so that processes can be easily woken up in
 * the order they blocked. The documentation doesn't require this but
 * it seems like the reasonable thing to do.
 *
 * Must be called with both the flc_lock and blocked_lock_lock held. The
 * fl_blocked_requests list itself is protected by the blocked_lock_lock,
 * but by ensuring that the flc_lock is also held on insertions we can avoid
 * taking the blocked_lock_lock in some cases when we see that the
 * fl_blocked_requests list is empty.
 *
 * Rather than just adding to the list, we check for conflicts with any existing
 * waiters, and add beneath any waiter that blocks the new waiter.
 * Thus wakeups don't happen until needed.
 */
static void __locks_insert_block(struct file_lock_core *blocker,
                                 struct file_lock_core *waiter,
                                 bool conflict(struct file_lock_core *,
                                               struct file_lock_core *))
{
        struct file_lock_core *flc;

        BUG_ON(!list_empty(&waiter->flc_blocked_member));
new_blocker:
        list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member)
                if (conflict(flc, waiter)) {
                        blocker =  flc;
                        goto new_blocker;
                }
        waiter->flc_blocker = blocker;
        list_add_tail(&waiter->flc_blocked_member,
                      &blocker->flc_blocked_requests);

        if ((blocker->flc_flags & (FL_POSIX|FL_OFDLCK)) == FL_POSIX)
                locks_insert_global_blocked(waiter);

        /* The requests in waiter->flc_blocked are known to conflict with
         * waiter, but might not conflict with blocker, or the requests
         * and lock which block it.  So they all need to be woken.
         */
        __locks_wake_up_blocks(waiter);
}

/* Must be called with flc_lock held. */
static void locks_insert_block(struct file_lock_core *blocker,
                               struct file_lock_core *waiter,
                               bool conflict(struct file_lock_core *,
                                             struct file_lock_core *))
{
        spin_lock(&blocked_lock_lock);
        __locks_insert_block(blocker, waiter, conflict);
        spin_unlock(&blocked_lock_lock);
}

/*
 * Wake up processes blocked waiting for blocker.
 *
 * Must be called with the inode->flc_lock held!
 */
static void locks_wake_up_blocks(struct file_lock_core *blocker)
{
        /*
         * Avoid taking global lock if list is empty. This is safe since new
         * blocked requests are only added to the list under the flc_lock, and
         * the flc_lock is always held here. Note that removal from the
         * fl_blocked_requests list does not require the flc_lock, so we must
         * recheck list_empty() after acquiring the blocked_lock_lock.
         */
        if (list_empty(&blocker->flc_blocked_requests))
                return;

        spin_lock(&blocked_lock_lock);
        __locks_wake_up_blocks(blocker);
        spin_unlock(&blocked_lock_lock);
}

static void
locks_insert_lock_ctx(struct file_lock_core *fl, struct list_head *before)
{
        list_add_tail(&fl->flc_list, before);
        locks_insert_global_locks(fl);
}

static void
locks_unlink_lock_ctx(struct file_lock_core *fl)
{
        locks_delete_global_locks(fl);
        list_del_init(&fl->flc_list);
        locks_wake_up_blocks(fl);
}

static void
locks_delete_lock_ctx(struct file_lock_core *fl, struct list_head *dispose)
{
        locks_unlink_lock_ctx(fl);
        if (dispose)
                list_add(&fl->flc_list, dispose);
        else
                locks_free_lock(file_lock(fl));
}

/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 * checks for shared/exclusive status of overlapping locks.
 */
static bool locks_conflict(struct file_lock_core *caller_flc,
                           struct file_lock_core *sys_flc)
{
        if (sys_flc->flc_type == F_WRLCK)
                return true;
        if (caller_flc->flc_type == F_WRLCK)
                return true;
        return false;
}

/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 * checking before calling the locks_conflict().
 */
static bool posix_locks_conflict(struct file_lock_core *caller_flc,
                                 struct file_lock_core *sys_flc)
{
        struct file_lock *caller_fl = file_lock(caller_flc);
        struct file_lock *sys_fl = file_lock(sys_flc);

        /* POSIX locks owned by the same process do not conflict with
         * each other.
         */
        if (posix_same_owner(caller_flc, sys_flc))
                return false;

        /* Check whether they overlap */
        if (!locks_overlap(caller_fl, sys_fl))
                return false;

        return locks_conflict(caller_flc, sys_flc);
}

/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
 * path so checks for additional GETLK-specific things like F_UNLCK.
 */
static bool posix_test_locks_conflict(struct file_lock *caller_fl,
                                      struct file_lock *sys_fl)
{
        struct file_lock_core *caller = &caller_fl->c;
        struct file_lock_core *sys = &sys_fl->c;

        /* F_UNLCK checks any locks on the same fd. */
        if (lock_is_unlock(caller_fl)) {
                if (!posix_same_owner(caller, sys))
                        return false;
                return locks_overlap(caller_fl, sys_fl);
        }
        return posix_locks_conflict(caller, sys);
}

/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 * checking before calling the locks_conflict().
 */
static bool flock_locks_conflict(struct file_lock_core *caller_flc,
                                 struct file_lock_core *sys_flc)
{
        /* FLOCK locks referring to the same filp do not conflict with
         * each other.
         */
        if (caller_flc->flc_file == sys_flc->flc_file)
                return false;

        return locks_conflict(caller_flc, sys_flc);
}

void
posix_test_lock(struct file *filp, struct file_lock *fl)
{
        struct file_lock *cfl;
        struct file_lock_context *ctx;
        struct inode *inode = file_inode(filp);
        void *owner;
        void (*func)(void);

        ctx = locks_inode_context(inode);
        if (!ctx || list_empty_careful(&ctx->flc_posix)) {
                fl->c.flc_type = F_UNLCK;
                return;
        }

retry:
        spin_lock(&ctx->flc_lock);
        list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) {
                if (!posix_test_locks_conflict(fl, cfl))
                        continue;
                if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
                        && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
                        owner = cfl->fl_lmops->lm_mod_owner;
                        func = cfl->fl_lmops->lm_expire_lock;
                        __module_get(owner);
                        spin_unlock(&ctx->flc_lock);
                        (*func)();
                        module_put(owner);
                        goto retry;
                }
                locks_copy_conflock(fl, cfl);
                goto out;
        }
        fl->c.flc_type = F_UNLCK;
out:
        spin_unlock(&ctx->flc_lock);
        return;
}
EXPORT_SYMBOL(posix_test_lock);

/*
 * Deadlock detection:
 *
 * We attempt to detect deadlocks that are due purely to posix file
 * locks.
 *
 * We assume that a task can be waiting for at most one lock at a time.
 * So for any acquired lock, the process holding that lock may be
 * waiting on at most one other lock.  That lock in turns may be held by
 * someone waiting for at most one other lock.  Given a requested lock
 * caller_fl which is about to wait for a conflicting lock block_fl, we
 * follow this chain of waiters to ensure we are not about to create a
 * cycle.
 *
 * Since we do this before we ever put a process to sleep on a lock, we
 * are ensured that there is never a cycle; that is what guarantees that
 * the while() loop in posix_locks_deadlock() eventually completes.
 *
 * Note: the above assumption may not be true when handling lock
 * requests from a broken NFS client. It may also fail in the presence
 * of tasks (such as posix threads) sharing the same open file table.
 * To handle those cases, we just bail out after a few iterations.
 *
 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
 * Because the owner is not even nominally tied to a thread of
 * execution, the deadlock detection below can't reasonably work well. Just
 * skip it for those.
 *
 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
 * locks that just checks for the case where two tasks are attempting to
 * upgrade from read to write locks on the same inode.
 */

#define MAX_DEADLK_ITERATIONS 10

/* Find a lock that the owner of the given @blocker is blocking on. */
static struct file_lock_core *what_owner_is_waiting_for(struct file_lock_core *blocker)
{
        struct file_lock_core *flc;

        hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) {
                if (posix_same_owner(flc, blocker)) {
                        while (flc->flc_blocker)
                                flc = flc->flc_blocker;
                        return flc;
                }
        }
        return NULL;
}

/* Must be called with the blocked_lock_lock held! */
static bool posix_locks_deadlock(struct file_lock *caller_fl,
                                 struct file_lock *block_fl)
{
        struct file_lock_core *caller = &caller_fl->c;
        struct file_lock_core *blocker = &block_fl->c;
        int i = 0;

        lockdep_assert_held(&blocked_lock_lock);

        /*
         * This deadlock detector can't reasonably detect deadlocks with
         * FL_OFDLCK locks, since they aren't owned by a process, per-se.
         */
        if (caller->flc_flags & FL_OFDLCK)
                return false;

        while ((blocker = what_owner_is_waiting_for(blocker))) {
                if (i++ > MAX_DEADLK_ITERATIONS)
                        return false;
                if (posix_same_owner(caller, blocker))
                        return true;
        }
        return false;
}

/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
 * after any leases, but before any posix locks.
 *
 * Note that if called with an FL_EXISTS argument, the caller may determine
 * whether or not a lock was successfully freed by testing the return
 * value for -ENOENT.
 */
static int flock_lock_inode(struct inode *inode, struct file_lock *request)
{
        struct file_lock *new_fl = NULL;
        struct file_lock *fl;
        struct file_lock_context *ctx;
        int error = 0;
        bool found = false;
        LIST_HEAD(dispose);

        ctx = locks_get_lock_context(inode, request->c.flc_type);
        if (!ctx) {
                if (request->c.flc_type != F_UNLCK)
                        return -ENOMEM;
                return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : 0;
        }

        if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) {
                new_fl = locks_alloc_lock();
                if (!new_fl)
                        return -ENOMEM;
        }

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        if (request->c.flc_flags & FL_ACCESS)
                goto find_conflict;

        list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
                if (request->c.flc_file != fl->c.flc_file)
                        continue;
                if (request->c.flc_type == fl->c.flc_type)
                        goto out;
                found = true;
                locks_delete_lock_ctx(&fl->c, &dispose);
                break;
        }

        if (lock_is_unlock(request)) {
                if ((request->c.flc_flags & FL_EXISTS) && !found)
                        error = -ENOENT;
                goto out;
        }

find_conflict:
        list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) {
                if (!flock_locks_conflict(&request->c, &fl->c))
                        continue;
                error = -EAGAIN;
                if (!(request->c.flc_flags & FL_SLEEP))
                        goto out;
                error = FILE_LOCK_DEFERRED;
                locks_insert_block(&fl->c, &request->c, flock_locks_conflict);
                goto out;
        }
        if (request->c.flc_flags & FL_ACCESS)
                goto out;
        locks_copy_lock(new_fl, request);
        locks_move_blocks(new_fl, request);
        locks_insert_lock_ctx(&new_fl->c, &ctx->flc_flock);
        new_fl = NULL;
        error = 0;

out:
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
        if (new_fl)
                locks_free_lock(new_fl);
        locks_dispose_list(&dispose);
        trace_flock_lock_inode(inode, request, error);
        return error;
}

static int posix_lock_inode(struct inode *inode, struct file_lock *request,
                            struct file_lock *conflock)
{
        struct file_lock *fl, *tmp;
        struct file_lock *new_fl = NULL;
        struct file_lock *new_fl2 = NULL;
        struct file_lock *left = NULL;
        struct file_lock *right = NULL;
        struct file_lock_context *ctx;
        int error;
        bool added = false;
        LIST_HEAD(dispose);
        void *owner;
        void (*func)(void);

        ctx = locks_get_lock_context(inode, request->c.flc_type);
        if (!ctx)
                return lock_is_unlock(request) ? 0 : -ENOMEM;

        /*
         * We may need two file_lock structures for this operation,
         * so we get them in advance to avoid races.
         *
         * In some cases we can be sure, that no new locks will be needed
         */
        if (!(request->c.flc_flags & FL_ACCESS) &&
            (request->c.flc_type != F_UNLCK ||
             request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
                new_fl = locks_alloc_lock();
                new_fl2 = locks_alloc_lock();
        }

retry:
        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        /*
         * New lock request. Walk all POSIX locks and look for conflicts. If
         * there are any, either return error or put the request on the
         * blocker's list of waiters and the global blocked_hash.
         */
        if (request->c.flc_type != F_UNLCK) {
                list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
                        if (!posix_locks_conflict(&request->c, &fl->c))
                                continue;
                        if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
                                && (*fl->fl_lmops->lm_lock_expirable)(fl)) {
                                owner = fl->fl_lmops->lm_mod_owner;
                                func = fl->fl_lmops->lm_expire_lock;
                                __module_get(owner);
                                spin_unlock(&ctx->flc_lock);
                                percpu_up_read(&file_rwsem);
                                (*func)();
                                module_put(owner);
                                goto retry;
                        }
                        if (conflock)
                                locks_copy_conflock(conflock, fl);
                        error = -EAGAIN;
                        if (!(request->c.flc_flags & FL_SLEEP))
                                goto out;
                        /*
                         * Deadlock detection and insertion into the blocked
                         * locks list must be done while holding the same lock!
                         */
                        error = -EDEADLK;
                        spin_lock(&blocked_lock_lock);
                        /*
                         * Ensure that we don't find any locks blocked on this
                         * request during deadlock detection.
                         */
                        __locks_wake_up_blocks(&request->c);
                        if (likely(!posix_locks_deadlock(request, fl))) {
                                error = FILE_LOCK_DEFERRED;
                                __locks_insert_block(&fl->c, &request->c,
                                                     posix_locks_conflict);
                        }
                        spin_unlock(&blocked_lock_lock);
                        goto out;
                }
        }

        /* If we're just looking for a conflict, we're done. */
        error = 0;
        if (request->c.flc_flags & FL_ACCESS)
                goto out;

        /* Find the first old lock with the same owner as the new lock */
        list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) {
                if (posix_same_owner(&request->c, &fl->c))
                        break;
        }

        /* Process locks with this owner. */
        list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) {
                if (!posix_same_owner(&request->c, &fl->c))
                        break;

                /* Detect adjacent or overlapping regions (if same lock type) */
                if (request->c.flc_type == fl->c.flc_type) {
                        /* In all comparisons of start vs end, use
                         * "start - 1" rather than "end + 1". If end
                         * is OFFSET_MAX, end + 1 will become negative.
                         */
                        if (fl->fl_end < request->fl_start - 1)
                                continue;
                        /* If the next lock in the list has entirely bigger
                         * addresses than the new one, insert the lock here.
                         */
                        if (fl->fl_start - 1 > request->fl_end)
                                break;

                        /* If we come here, the new and old lock are of the
                         * same type and adjacent or overlapping. Make one
                         * lock yielding from the lower start address of both
                         * locks to the higher end address.
                         */
                        if (fl->fl_start > request->fl_start)
                                fl->fl_start = request->fl_start;
                        else
                                request->fl_start = fl->fl_start;
                        if (fl->fl_end < request->fl_end)
                                fl->fl_end = request->fl_end;
                        else
                                request->fl_end = fl->fl_end;
                        if (added) {
                                locks_delete_lock_ctx(&fl->c, &dispose);
                                continue;
                        }
                        request = fl;
                        added = true;
                } else {
                        /* Processing for different lock types is a bit
                         * more complex.
                         */
                        if (fl->fl_end < request->fl_start)
                                continue;
                        if (fl->fl_start > request->fl_end)
                                break;
                        if (lock_is_unlock(request))
                                added = true;
                        if (fl->fl_start < request->fl_start)
                                left = fl;
                        /* If the next lock in the list has a higher end
                         * address than the new one, insert the new one here.
                         */
                        if (fl->fl_end > request->fl_end) {
                                right = fl;
                                break;
                        }
                        if (fl->fl_start >= request->fl_start) {
                                /* The new lock completely replaces an old
                                 * one (This may happen several times).
                                 */
                                if (added) {
                                        locks_delete_lock_ctx(&fl->c, &dispose);
                                        continue;
                                }
                                /*
                                 * Replace the old lock with new_fl, and
                                 * remove the old one. It's safe to do the
                                 * insert here since we know that we won't be
                                 * using new_fl later, and that the lock is
                                 * just replacing an existing lock.
                                 */
                                error = -ENOLCK;
                                if (!new_fl)
                                        goto out;
                                locks_copy_lock(new_fl, request);
                                locks_move_blocks(new_fl, request);
                                request = new_fl;
                                new_fl = NULL;
                                locks_insert_lock_ctx(&request->c,
                                                      &fl->c.flc_list);
                                locks_delete_lock_ctx(&fl->c, &dispose);
                                added = true;
                        }
                }
        }

        /*
         * The above code only modifies existing locks in case of merging or
         * replacing. If new lock(s) need to be inserted all modifications are
         * done below this, so it's safe yet to bail out.
         */
        error = -ENOLCK; /* "no luck" */
        if (right && left == right && !new_fl2)
                goto out;

        error = 0;
        if (!added) {
                if (lock_is_unlock(request)) {
                        if (request->c.flc_flags & FL_EXISTS)
                                error = -ENOENT;
                        goto out;
                }

                if (!new_fl) {
                        error = -ENOLCK;
                        goto out;
                }
                locks_copy_lock(new_fl, request);
                locks_move_blocks(new_fl, request);
                locks_insert_lock_ctx(&new_fl->c, &fl->c.flc_list);
                fl = new_fl;
                new_fl = NULL;
        }
        if (right) {
                if (left == right) {
                        /* The new lock breaks the old one in two pieces,
                         * so we have to use the second new lock.
                         */
                        left = new_fl2;
                        new_fl2 = NULL;
                        locks_copy_lock(left, right);
                        locks_insert_lock_ctx(&left->c, &fl->c.flc_list);
                }
                right->fl_start = request->fl_end + 1;
                locks_wake_up_blocks(&right->c);
        }
        if (left) {
                left->fl_end = request->fl_start - 1;
                locks_wake_up_blocks(&left->c);
        }
 out:
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
        trace_posix_lock_inode(inode, request, error);
        /*
         * Free any unused locks.
         */
        if (new_fl)
                locks_free_lock(new_fl);
        if (new_fl2)
                locks_free_lock(new_fl2);
        locks_dispose_list(&dispose);

        return error;
}

/**
 * posix_lock_file - Apply a POSIX-style lock to a file
 * @filp: The file to apply the lock to
 * @fl: The lock to be applied
 * @conflock: Place to return a copy of the conflicting lock, if found.
 *
 * Add a POSIX style lock to a file.
 * We merge adjacent & overlapping locks whenever possible.
 * POSIX locks are sorted by owner task, then by starting address
 *
 * Note that if called with an FL_EXISTS argument, the caller may determine
 * whether or not a lock was successfully freed by testing the return
 * value for -ENOENT.
 */
int posix_lock_file(struct file *filp, struct file_lock *fl,
                        struct file_lock *conflock)
{
        return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);

/**
 * posix_lock_inode_wait - Apply a POSIX-style lock to a file
 * @inode: inode of file to which lock request should be applied
 * @fl: The lock to be applied
 *
 * Apply a POSIX style lock request to an inode.
 */
static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
        int error;
        might_sleep ();
        for (;;) {
                error = posix_lock_inode(inode, fl, NULL);
                if (error != FILE_LOCK_DEFERRED)
                        break;
                error = wait_event_interruptible(fl->c.flc_wait,
                                                 list_empty(&fl->c.flc_blocked_member));
                if (error)
                        break;
        }
        locks_delete_block(fl);
        return error;
}

static void lease_clear_pending(struct file_lease *fl, int arg)
{
        switch (arg) {
        case F_UNLCK:
                fl->c.flc_flags &= ~FL_UNLOCK_PENDING;
                fallthrough;
        case F_RDLCK:
                fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING;
        }
}

/* We already had a lease on this file; just change its type */
int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose)
{
        int error = assign_type(&fl->c, arg);

        if (error)
                return error;
        lease_clear_pending(fl, arg);
        locks_wake_up_blocks(&fl->c);
        if (arg == F_UNLCK) {
                struct file *filp = fl->c.flc_file;

                f_delown(filp);
                filp->f_owner.signum = 0;
                fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync);
                if (fl->fl_fasync != NULL) {
                        printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
                        fl->fl_fasync = NULL;
                }
                locks_delete_lock_ctx(&fl->c, dispose);
        }
        return 0;
}
EXPORT_SYMBOL(lease_modify);

static bool past_time(unsigned long then)
{
        if (!then)
                /* 0 is a special value meaning "this never expires": */
                return false;
        return time_after(jiffies, then);
}

static void time_out_leases(struct inode *inode, struct list_head *dispose)
{
        struct file_lock_context *ctx = inode->i_flctx;
        struct file_lease *fl, *tmp;

        lockdep_assert_held(&ctx->flc_lock);

        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
                trace_time_out_leases(inode, fl);
                if (past_time(fl->fl_downgrade_time))
                        lease_modify(fl, F_RDLCK, dispose);
                if (past_time(fl->fl_break_time))
                        lease_modify(fl, F_UNLCK, dispose);
        }
}

static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc)
{
        bool rc;
        struct file_lease *lease = file_lease(lc);
        struct file_lease *breaker = file_lease(bc);

        if (lease->fl_lmops->lm_breaker_owns_lease
                        && lease->fl_lmops->lm_breaker_owns_lease(lease))
                return false;
        if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) {
                rc = false;
                goto trace;
        }
        if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) {
                rc = false;
                goto trace;
        }

        rc = locks_conflict(bc, lc);
trace:
        trace_leases_conflict(rc, lease, breaker);
        return rc;
}

static bool
any_leases_conflict(struct inode *inode, struct file_lease *breaker)
{
        struct file_lock_context *ctx = inode->i_flctx;
        struct file_lock_core *flc;

        lockdep_assert_held(&ctx->flc_lock);

        list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
                if (leases_conflict(flc, &breaker->c))
                        return true;
        }
        return false;
}

/**
 *        __break_lease        -        revoke all outstanding leases on file
 *        @inode: the inode of the file to return
 *        @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
 *            break all leases
 *        @type: FL_LEASE: break leases and delegations; FL_DELEG: break
 *            only delegations
 *
 *        break_lease (inlined for speed) has checked there already is at least
 *        some kind of lock (maybe a lease) on this file.  Leases are broken on
 *        a call to open() or truncate().  This function can sleep unless you
 *        specified %O_NONBLOCK to your open().
 */
int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{
        int error = 0;
        struct file_lock_context *ctx;
        struct file_lease *new_fl, *fl, *tmp;
        unsigned long break_time;
        int want_write = (mode & O_ACCMODE) != O_RDONLY;
        LIST_HEAD(dispose);

        new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
        if (IS_ERR(new_fl))
                return PTR_ERR(new_fl);
        new_fl->c.flc_flags = type;

        /* typically we will check that ctx is non-NULL before calling */
        ctx = locks_inode_context(inode);
        if (!ctx) {
                WARN_ON_ONCE(1);
                goto free_lock;
        }

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);

        time_out_leases(inode, &dispose);

        if (!any_leases_conflict(inode, new_fl))
                goto out;

        break_time = 0;
        if (lease_break_time > 0) {
                break_time = jiffies + lease_break_time * HZ;
                if (break_time == 0)
                        break_time++;        /* so that 0 means no break time */
        }

        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) {
                if (!leases_conflict(&fl->c, &new_fl->c))
                        continue;
                if (want_write) {
                        if (fl->c.flc_flags & FL_UNLOCK_PENDING)
                                continue;
                        fl->c.flc_flags |= FL_UNLOCK_PENDING;
                        fl->fl_break_time = break_time;
                } else {
                        if (lease_breaking(fl))
                                continue;
                        fl->c.flc_flags |= FL_DOWNGRADE_PENDING;
                        fl->fl_downgrade_time = break_time;
                }
                if (fl->fl_lmops->lm_break(fl))
                        locks_delete_lock_ctx(&fl->c, &dispose);
        }

        if (list_empty(&ctx->flc_lease))
                goto out;

        if (mode & O_NONBLOCK) {
                trace_break_lease_noblock(inode, new_fl);
                error = -EWOULDBLOCK;
                goto out;
        }

restart:
        fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
        break_time = fl->fl_break_time;
        if (break_time != 0)
                break_time -= jiffies;
        if (break_time == 0)
                break_time++;
        locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
        trace_break_lease_block(inode, new_fl);
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);

        locks_dispose_list(&dispose);
        error = wait_event_interruptible_timeout(new_fl->c.flc_wait,
                                                 list_empty(&new_fl->c.flc_blocked_member),
                                                 break_time);

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        trace_break_lease_unblock(inode, new_fl);
        __locks_delete_block(&new_fl->c);
        if (error >= 0) {
                /*
                 * Wait for the next conflicting lease that has not been
                 * broken yet
                 */
                if (error == 0)
                        time_out_leases(inode, &dispose);
                if (any_leases_conflict(inode, new_fl))
                        goto restart;
                error = 0;
        }
out:
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
free_lock:
        locks_free_lease(new_fl);
        return error;
}
EXPORT_SYMBOL(__break_lease);

/**
 *        lease_get_mtime - update modified time of an inode with exclusive lease
 *        @inode: the inode
 *      @time:  pointer to a timespec which contains the last modified time
 *
 * This is to force NFS clients to flush their caches for files with
 * exclusive leases.  The justification is that if someone has an
 * exclusive lease, then they could be modifying it.
 */
void lease_get_mtime(struct inode *inode, struct timespec64 *time)
{
        bool has_lease = false;
        struct file_lock_context *ctx;
        struct file_lock_core *flc;

        ctx = locks_inode_context(inode);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
                spin_lock(&ctx->flc_lock);
                flc = list_first_entry_or_null(&ctx->flc_lease,
                                               struct file_lock_core, flc_list);
                if (flc && flc->flc_type == F_WRLCK)
                        has_lease = true;
                spin_unlock(&ctx->flc_lock);
        }

        if (has_lease)
                *time = current_time(inode);
}
EXPORT_SYMBOL(lease_get_mtime);

/**
 *        fcntl_getlease - Enquire what lease is currently active
 *        @filp: the file
 *
 *        The value returned by this function will be one of
 *        (if no lease break is pending):
 *
 *        %F_RDLCK to indicate a shared lease is held.
 *
 *        %F_WRLCK to indicate an exclusive lease is held.
 *
 *        %F_UNLCK to indicate no lease is held.
 *
 *        (if a lease break is pending):
 *
 *        %F_RDLCK to indicate an exclusive lease needs to be
 *                changed to a shared lease (or removed).
 *
 *        %F_UNLCK to indicate the lease needs to be removed.
 *
 *        XXX: sfr & willy disagree over whether F_INPROGRESS
 *        should be returned to userspace.
 */
int fcntl_getlease(struct file *filp)
{
        struct file_lease *fl;
        struct inode *inode = file_inode(filp);
        struct file_lock_context *ctx;
        int type = F_UNLCK;
        LIST_HEAD(dispose);

        ctx = locks_inode_context(inode);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
                percpu_down_read(&file_rwsem);
                spin_lock(&ctx->flc_lock);
                time_out_leases(inode, &dispose);
                list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
                        if (fl->c.flc_file != filp)
                                continue;
                        type = target_leasetype(fl);
                        break;
                }
                spin_unlock(&ctx->flc_lock);
                percpu_up_read(&file_rwsem);

                locks_dispose_list(&dispose);
        }
        return type;
}

/**
 * check_conflicting_open - see if the given file points to an inode that has
 *                            an existing open that would conflict with the
 *                            desired lease.
 * @filp:        file to check
 * @arg:        type of lease that we're trying to acquire
 * @flags:        current lock flags
 *
 * Check to see if there's an existing open fd on this file that would
 * conflict with the lease we're trying to set.
 */
static int
check_conflicting_open(struct file *filp, const int arg, int flags)
{
        struct inode *inode = file_inode(filp);
        int self_wcount = 0, self_rcount = 0;

        if (flags & FL_LAYOUT)
                return 0;
        if (flags & FL_DELEG)
                /* We leave these checks to the caller */
                return 0;

        if (arg == F_RDLCK)
                return inode_is_open_for_write(inode) ? -EAGAIN : 0;
        else if (arg != F_WRLCK)
                return 0;

        /*
         * Make sure that only read/write count is from lease requestor.
         * Note that this will result in denying write leases when i_writecount
         * is negative, which is what we want.  (We shouldn't grant write leases
         * on files open for execution.)
         */
        if (filp->f_mode & FMODE_WRITE)
                self_wcount = 1;
        else if (filp->f_mode & FMODE_READ)
                self_rcount = 1;

        if (atomic_read(&inode->i_writecount) != self_wcount ||
            atomic_read(&inode->i_readcount) != self_rcount)
                return -EAGAIN;

        return 0;
}

static int
generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv)
{
        struct file_lease *fl, *my_fl = NULL, *lease;
        struct inode *inode = file_inode(filp);
        struct file_lock_context *ctx;
        bool is_deleg = (*flp)->c.flc_flags & FL_DELEG;
        int error;
        LIST_HEAD(dispose);

        lease = *flp;
        trace_generic_add_lease(inode, lease);

        /* Note that arg is never F_UNLCK here */
        ctx = locks_get_lock_context(inode, arg);
        if (!ctx)
                return -ENOMEM;

        /*
         * In the delegation case we need mutual exclusion with
         * a number of operations that take the i_mutex.  We trylock
         * because delegations are an optional optimization, and if
         * there's some chance of a conflict--we'd rather not
         * bother, maybe that's a sign this just isn't a good file to
         * hand out a delegation on.
         */
        if (is_deleg && !inode_trylock(inode))
                return -EAGAIN;

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        time_out_leases(inode, &dispose);
        error = check_conflicting_open(filp, arg, lease->c.flc_flags);
        if (error)
                goto out;

        /*
         * At this point, we know that if there is an exclusive
         * lease on this file, then we hold it on this filp
         * (otherwise our open of this file would have blocked).
         * And if we are trying to acquire an exclusive lease,
         * then the file is not open by anyone (including us)
         * except for this filp.
         */
        error = -EAGAIN;
        list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
                if (fl->c.flc_file == filp &&
                    fl->c.flc_owner == lease->c.flc_owner) {
                        my_fl = fl;
                        continue;
                }

                /*
                 * No exclusive leases if someone else has a lease on
                 * this file:
                 */
                if (arg == F_WRLCK)
                        goto out;
                /*
                 * Modifying our existing lease is OK, but no getting a
                 * new lease if someone else is opening for write:
                 */
                if (fl->c.flc_flags & FL_UNLOCK_PENDING)
                        goto out;
        }

        if (my_fl != NULL) {
                lease = my_fl;
                error = lease->fl_lmops->lm_change(lease, arg, &dispose);
                if (error)
                        goto out;
                goto out_setup;
        }

        error = -EINVAL;
        if (!leases_enable)
                goto out;

        locks_insert_lock_ctx(&lease->c, &ctx->flc_lease);
        /*
         * The check in break_lease() is lockless. It's possible for another
         * open to race in after we did the earlier check for a conflicting
         * open but before the lease was inserted. Check again for a
         * conflicting open and cancel the lease if there is one.
         *
         * We also add a barrier here to ensure that the insertion of the lock
         * precedes these checks.
         */
        smp_mb();
        error = check_conflicting_open(filp, arg, lease->c.flc_flags);
        if (error) {
                locks_unlink_lock_ctx(&lease->c);
                goto out;
        }

out_setup:
        if (lease->fl_lmops->lm_setup)
                lease->fl_lmops->lm_setup(lease, priv);
out:
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
        if (is_deleg)
                inode_unlock(inode);
        if (!error && !my_fl)
                *flp = NULL;
        return error;
}

static int generic_delete_lease(struct file *filp, void *owner)
{
        int error = -EAGAIN;
        struct file_lease *fl, *victim = NULL;
        struct inode *inode = file_inode(filp);
        struct file_lock_context *ctx;
        LIST_HEAD(dispose);

        ctx = locks_inode_context(inode);
        if (!ctx) {
                trace_generic_delete_lease(inode, NULL);
                return error;
        }

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) {
                if (fl->c.flc_file == filp &&
                    fl->c.flc_owner == owner) {
                        victim = fl;
                        break;
                }
        }
        trace_generic_delete_lease(inode, victim);
        if (victim)
                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
        return error;
}

/**
 *        generic_setlease        -        sets a lease on an open file
 *        @filp:        file pointer
 *        @arg:        type of lease to obtain
 *        @flp:        input - file_lock to use, output - file_lock inserted
 *        @priv:        private data for lm_setup (may be NULL if lm_setup
 *                doesn't require it)
 *
 *        The (input) flp->fl_lmops->lm_break function is required
 *        by break_lease().
 */
int generic_setlease(struct file *filp, int arg, struct file_lease **flp,
                        void **priv)
{
        switch (arg) {
        case F_UNLCK:
                return generic_delete_lease(filp, *priv);
        case F_RDLCK:
        case F_WRLCK:
                if (!(*flp)->fl_lmops->lm_break) {
                        WARN_ON_ONCE(1);
                        return -ENOLCK;
                }

                return generic_add_lease(filp, arg, flp, priv);
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL(generic_setlease);

/*
 * Kernel subsystems can register to be notified on any attempt to set
 * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
 * to close files that it may have cached when there is an attempt to set a
 * conflicting lease.
 */
static struct srcu_notifier_head lease_notifier_chain;

static inline void
lease_notifier_chain_init(void)
{
        srcu_init_notifier_head(&lease_notifier_chain);
}

static inline void
setlease_notifier(int arg, struct file_lease *lease)
{
        if (arg != F_UNLCK)
                srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
}

int lease_register_notifier(struct notifier_block *nb)
{
        return srcu_notifier_chain_register(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_register_notifier);

void lease_unregister_notifier(struct notifier_block *nb)
{
        srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(lease_unregister_notifier);


int
kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
{
        if (lease)
                setlease_notifier(arg, *lease);
        if (filp->f_op->setlease)
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
                return generic_setlease(filp, arg, lease, priv);
}
EXPORT_SYMBOL_GPL(kernel_setlease);

/**
 * vfs_setlease        -       sets a lease on an open file
 * @filp:        file pointer
 * @arg:        type of lease to obtain
 * @lease:        file_lock to use when adding a lease
 * @priv:        private info for lm_setup when adding a lease (may be
 *                NULL if lm_setup doesn't require it)
 *
 * Call this to establish a lease on the file. The "lease" argument is not
 * used for F_UNLCK requests and may be NULL. For commands that set or alter
 * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
 * set; if not, this function will return -ENOLCK (and generate a scary-looking
 * stack trace).
 *
 * The "priv" pointer is passed directly to the lm_setup function as-is. It
 * may be NULL if the lm_setup operation doesn't require it.
 */
int
vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
{
        struct inode *inode = file_inode(filp);
        vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
        int error;

        if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
                return -EACCES;
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
        error = security_file_lock(filp, arg);
        if (error)
                return error;
        return kernel_setlease(filp, arg, lease, priv);
}
EXPORT_SYMBOL_GPL(vfs_setlease);

static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
{
        struct file_lease *fl;
        struct fasync_struct *new;
        int error;

        fl = lease_alloc(filp, arg);
        if (IS_ERR(fl))
                return PTR_ERR(fl);

        new = fasync_alloc();
        if (!new) {
                locks_free_lease(fl);
                return -ENOMEM;
        }
        new->fa_fd = fd;

        error = vfs_setlease(filp, arg, &fl, (void **)&new);
        if (fl)
                locks_free_lease(fl);
        if (new)
                fasync_free(new);
        return error;
}

/**
 *        fcntl_setlease        -        sets a lease on an open file
 *        @fd: open file descriptor
 *        @filp: file pointer
 *        @arg: type of lease to obtain
 *
 *        Call this fcntl to establish a lease on the file.
 *        Note that you also need to call %F_SETSIG to
 *        receive a signal when the lease is broken.
 */
int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
        if (arg == F_UNLCK)
                return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
        return do_fcntl_add_lease(fd, filp, arg);
}

/**
 * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
 * @inode: inode of the file to apply to
 * @fl: The lock to be applied
 *
 * Apply a FLOCK style lock request to an inode.
 */
static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
        int error;
        might_sleep();
        for (;;) {
                error = flock_lock_inode(inode, fl);
                if (error != FILE_LOCK_DEFERRED)
                        break;
                error = wait_event_interruptible(fl->c.flc_wait,
                                                 list_empty(&fl->c.flc_blocked_member));
                if (error)
                        break;
        }
        locks_delete_block(fl);
        return error;
}

/**
 * locks_lock_inode_wait - Apply a lock to an inode
 * @inode: inode of the file to apply to
 * @fl: The lock to be applied
 *
 * Apply a POSIX or FLOCK style lock request to an inode.
 */
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
        int res = 0;
        switch (fl->c.flc_flags & (FL_POSIX|FL_FLOCK)) {
                case FL_POSIX:
                        res = posix_lock_inode_wait(inode, fl);
                        break;
                case FL_FLOCK:
                        res = flock_lock_inode_wait(inode, fl);
                        break;
                default:
                        BUG();
        }
        return res;
}
EXPORT_SYMBOL(locks_lock_inode_wait);

/**
 *        sys_flock: - flock() system call.
 *        @fd: the file descriptor to lock.
 *        @cmd: the type of lock to apply.
 *
 *        Apply a %FL_FLOCK style lock to an open file descriptor.
 *        The @cmd can be one of:
 *
 *        - %LOCK_SH -- a shared lock.
 *        - %LOCK_EX -- an exclusive lock.
 *        - %LOCK_UN -- remove an existing lock.
 *        - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
 *
 *        %LOCK_MAND support has been removed from the kernel.
 */
SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
{
        int can_sleep, error, type;
        struct file_lock fl;
        struct fd f;

        /*
         * LOCK_MAND locks were broken for a long time in that they never
         * conflicted with one another and didn't prevent any sort of open,
         * read or write activity.
         *
         * Just ignore these requests now, to preserve legacy behavior, but
         * throw a warning to let people know that they don't actually work.
         */
        if (cmd & LOCK_MAND) {
                pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
                return 0;
        }

        type = flock_translate_cmd(cmd & ~LOCK_NB);
        if (type < 0)
                return type;

        error = -EBADF;
        f = fdget(fd);
        if (!f.file)
                return error;

        if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
                goto out_putf;

        flock_make_lock(f.file, &fl, type);

        error = security_file_lock(f.file, fl.c.flc_type);
        if (error)
                goto out_putf;

        can_sleep = !(cmd & LOCK_NB);
        if (can_sleep)
                fl.c.flc_flags |= FL_SLEEP;

        if (f.file->f_op->flock)
                error = f.file->f_op->flock(f.file,
                                            (can_sleep) ? F_SETLKW : F_SETLK,
                                            &fl);
        else
                error = locks_lock_file_wait(f.file, &fl);

        locks_release_private(&fl);
 out_putf:
        fdput(f);

        return error;
}

/**
 * vfs_test_lock - test file byte range lock
 * @filp: The file to test lock for
 * @fl: The lock to test; also used to hold result
 *
 * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
 * setting conf->fl_type to something other than F_UNLCK.
 */
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
        WARN_ON_ONCE(filp != fl->c.flc_file);
        if (filp->f_op->lock)
                return filp->f_op->lock(filp, F_GETLK, fl);
        posix_test_lock(filp, fl);
        return 0;
}
EXPORT_SYMBOL_GPL(vfs_test_lock);

/**
 * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
 * @fl: The file_lock who's fl_pid should be translated
 * @ns: The namespace into which the pid should be translated
 *
 * Used to translate a fl_pid into a namespace virtual pid number
 */
static pid_t locks_translate_pid(struct file_lock_core *fl, struct pid_namespace *ns)
{
        pid_t vnr;
        struct pid *pid;

        if (fl->flc_flags & FL_OFDLCK)
                return -1;

        /* Remote locks report a negative pid value */
        if (fl->flc_pid <= 0)
                return fl->flc_pid;

        /*
         * If the flock owner process is dead and its pid has been already
         * freed, the translation below won't work, but we still want to show
         * flock owner pid number in init pidns.
         */
        if (ns == &init_pid_ns)
                return (pid_t) fl->flc_pid;

        rcu_read_lock();
        pid = find_pid_ns(fl->flc_pid, &init_pid_ns);
        vnr = pid_nr_ns(pid, ns);
        rcu_read_unlock();
        return vnr;
}

static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
{
        flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
#if BITS_PER_LONG == 32
        /*
         * Make sure we can represent the posix lock via
         * legacy 32bit flock.
         */
        if (fl->fl_start > OFFT_OFFSET_MAX)
                return -EOVERFLOW;
        if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
                return -EOVERFLOW;
#endif
        flock->l_start = fl->fl_start;
        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
                fl->fl_end - fl->fl_start + 1;
        flock->l_whence = 0;
        flock->l_type = fl->c.flc_type;
        return 0;
}

#if BITS_PER_LONG == 32
static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
{
        flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current));
        flock->l_start = fl->fl_start;
        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
                fl->fl_end - fl->fl_start + 1;
        flock->l_whence = 0;
        flock->l_type = fl->c.flc_type;
}
#endif

/* Report the first existing lock that would conflict with l.
 * This implements the F_GETLK command of fcntl().
 */
int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
{
        struct file_lock *fl;
        int error;

        fl = locks_alloc_lock();
        if (fl == NULL)
                return -ENOMEM;
        error = -EINVAL;
        if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
                        && flock->l_type != F_WRLCK)
                goto out;

        error = flock_to_posix_lock(filp, fl, flock);
        if (error)
                goto out;

        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                fl->c.flc_flags |= FL_OFDLCK;
                fl->c.flc_owner = filp;
        }

        error = vfs_test_lock(filp, fl);
        if (error)
                goto out;

        flock->l_type = fl->c.flc_type;
        if (fl->c.flc_type != F_UNLCK) {
                error = posix_lock_to_flock(flock, fl);
                if (error)
                        goto out;
        }
out:
        locks_free_lock(fl);
        return error;
}

/**
 * vfs_lock_file - file byte range lock
 * @filp: The file to apply the lock to
 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
 * @fl: The lock to be applied
 * @conf: Place to return a copy of the conflicting lock, if found.
 *
 * A caller that doesn't care about the conflicting lock may pass NULL
 * as the final argument.
 *
 * If the filesystem defines a private ->lock() method, then @conf will
 * be left unchanged; so a caller that cares should initialize it to
 * some acceptable default.
 *
 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
 * locks, the ->lock() interface may return asynchronously, before the lock has
 * been granted or denied by the underlying filesystem, if (and only if)
 * lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations
 * flags need to be set.
 *
 * Callers expecting ->lock() to return asynchronously will only use F_SETLK,
 * not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a
 * blocking lock. When ->lock() does return asynchronously, it must return
 * FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes.
 * If the request is for non-blocking lock the file system should return
 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
 * with the result. If the request timed out the callback routine will return a
 * nonzero return code and the file system should release the lock. The file
 * system is also responsible to keep a corresponding posix lock when it
 * grants a lock so the VFS can find out which locks are locally held and do
 * the correct lock cleanup when required.
 * The underlying filesystem must not drop the kernel lock or call
 * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
 * return code.
 */
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
        WARN_ON_ONCE(filp != fl->c.flc_file);
        if (filp->f_op->lock)
                return filp->f_op->lock(filp, cmd, fl);
        else
                return posix_lock_file(filp, fl, conf);
}
EXPORT_SYMBOL_GPL(vfs_lock_file);

static int do_lock_file_wait(struct file *filp, unsigned int cmd,
                             struct file_lock *fl)
{
        int error;

        error = security_file_lock(filp, fl->c.flc_type);
        if (error)
                return error;

        for (;;) {
                error = vfs_lock_file(filp, cmd, fl, NULL);
                if (error != FILE_LOCK_DEFERRED)
                        break;
                error = wait_event_interruptible(fl->c.flc_wait,
                                                 list_empty(&fl->c.flc_blocked_member));
                if (error)
                        break;
        }
        locks_delete_block(fl);

        return error;
}

/* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
static int
check_fmode_for_setlk(struct file_lock *fl)
{
        switch (fl->c.flc_type) {
        case F_RDLCK:
                if (!(fl->c.flc_file->f_mode & FMODE_READ))
                        return -EBADF;
                break;
        case F_WRLCK:
                if (!(fl->c.flc_file->f_mode & FMODE_WRITE))
                        return -EBADF;
        }
        return 0;
}

/* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
                struct flock *flock)
{
        struct file_lock *file_lock = locks_alloc_lock();
        struct inode *inode = file_inode(filp);
        struct file *f;
        int error;

        if (file_lock == NULL)
                return -ENOLCK;

        error = flock_to_posix_lock(filp, file_lock, flock);
        if (error)
                goto out;

        error = check_fmode_for_setlk(file_lock);
        if (error)
                goto out;

        /*
         * If the cmd is requesting file-private locks, then set the
         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                cmd = F_SETLK;
                file_lock->c.flc_flags |= FL_OFDLCK;
                file_lock->c.flc_owner = filp;
                break;
        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                cmd = F_SETLKW;
                file_lock->c.flc_flags |= FL_OFDLCK;
                file_lock->c.flc_owner = filp;
                fallthrough;
        case F_SETLKW:
                file_lock->c.flc_flags |= FL_SLEEP;
        }

        error = do_lock_file_wait(filp, cmd, file_lock);

        /*
         * Attempt to detect a close/fcntl race and recover by releasing the
         * lock that was just acquired. There is no need to do that when we're
         * unlocking though, or for OFD locks.
         */
        if (!error && file_lock->c.flc_type != F_UNLCK &&
            !(file_lock->c.flc_flags & FL_OFDLCK)) {
                struct files_struct *files = current->files;
                /*
                 * We need that spin_lock here - it prevents reordering between
                 * update of i_flctx->flc_posix and check for it done in
                 * close(). rcu_read_lock() wouldn't do.
                 */
                spin_lock(&files->file_lock);
                f = files_lookup_fd_locked(files, fd);
                spin_unlock(&files->file_lock);
                if (f != filp) {
                        file_lock->c.flc_type = F_UNLCK;
                        error = do_lock_file_wait(filp, cmd, file_lock);
                        WARN_ON_ONCE(error);
                        error = -EBADF;
                }
        }
out:
        trace_fcntl_setlk(inode, file_lock, error);
        locks_free_lock(file_lock);
        return error;
}

#if BITS_PER_LONG == 32
/* Report the first existing lock that would conflict with l.
 * This implements the F_GETLK command of fcntl().
 */
int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
{
        struct file_lock *fl;
        int error;

        fl = locks_alloc_lock();
        if (fl == NULL)
                return -ENOMEM;

        error = -EINVAL;
        if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
                        && flock->l_type != F_WRLCK)
                goto out;

        error = flock64_to_posix_lock(filp, fl, flock);
        if (error)
                goto out;

        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                fl->c.flc_flags |= FL_OFDLCK;
                fl->c.flc_owner = filp;
        }

        error = vfs_test_lock(filp, fl);
        if (error)
                goto out;

        flock->l_type = fl->c.flc_type;
        if (fl->c.flc_type != F_UNLCK)
                posix_lock_to_flock64(flock, fl);

out:
        locks_free_lock(fl);
        return error;
}

/* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
                struct flock64 *flock)
{
        struct file_lock *file_lock = locks_alloc_lock();
        struct file *f;
        int error;

        if (file_lock == NULL)
                return -ENOLCK;

        error = flock64_to_posix_lock(filp, file_lock, flock);
        if (error)
                goto out;

        error = check_fmode_for_setlk(file_lock);
        if (error)
                goto out;

        /*
         * If the cmd is requesting file-private locks, then set the
         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                cmd = F_SETLK64;
                file_lock->c.flc_flags |= FL_OFDLCK;
                file_lock->c.flc_owner = filp;
                break;
        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock->l_pid != 0)
                        goto out;

                cmd = F_SETLKW64;
                file_lock->c.flc_flags |= FL_OFDLCK;
                file_lock->c.flc_owner = filp;
                fallthrough;
        case F_SETLKW64:
                file_lock->c.flc_flags |= FL_SLEEP;
        }

        error = do_lock_file_wait(filp, cmd, file_lock);

        /*
         * Attempt to detect a close/fcntl race and recover by releasing the
         * lock that was just acquired. There is no need to do that when we're
         * unlocking though, or for OFD locks.
         */
        if (!error && file_lock->c.flc_type != F_UNLCK &&
            !(file_lock->c.flc_flags & FL_OFDLCK)) {
                struct files_struct *files = current->files;
                /*
                 * We need that spin_lock here - it prevents reordering between
                 * update of i_flctx->flc_posix and check for it done in
                 * close(). rcu_read_lock() wouldn't do.
                 */
                spin_lock(&files->file_lock);
                f = files_lookup_fd_locked(files, fd);
                spin_unlock(&files->file_lock);
                if (f != filp) {
                        file_lock->c.flc_type = F_UNLCK;
                        error = do_lock_file_wait(filp, cmd, file_lock);
                        WARN_ON_ONCE(error);
                        error = -EBADF;
                }
        }
out:
        locks_free_lock(file_lock);
        return error;
}
#endif /* BITS_PER_LONG == 32 */

/*
 * This function is called when the file is being removed
 * from the task's fd array.  POSIX locks belonging to this task
 * are deleted at this time.
 */
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
        int error;
        struct inode *inode = file_inode(filp);
        struct file_lock lock;
        struct file_lock_context *ctx;

        /*
         * If there are no locks held on this file, we don't need to call
         * posix_lock_file().  Another process could be setting a lock on this
         * file at the same time, but we wouldn't remove that lock anyway.
         */
        ctx = locks_inode_context(inode);
        if (!ctx || list_empty(&ctx->flc_posix))
                return;

        locks_init_lock(&lock);
        lock.c.flc_type = F_UNLCK;
        lock.c.flc_flags = FL_POSIX | FL_CLOSE;
        lock.fl_start = 0;
        lock.fl_end = OFFSET_MAX;
        lock.c.flc_owner = owner;
        lock.c.flc_pid = current->tgid;
        lock.c.flc_file = filp;
        lock.fl_ops = NULL;
        lock.fl_lmops = NULL;

        error = vfs_lock_file(filp, F_SETLK, &lock, NULL);

        if (lock.fl_ops && lock.fl_ops->fl_release_private)
                lock.fl_ops->fl_release_private(&lock);
        trace_locks_remove_posix(inode, &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);

/* The i_flctx must be valid when calling into here */
static void
locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
{
        struct file_lock fl;
        struct inode *inode = file_inode(filp);

        if (list_empty(&flctx->flc_flock))
                return;

        flock_make_lock(filp, &fl, F_UNLCK);
        fl.c.flc_flags |= FL_CLOSE;

        if (filp->f_op->flock)
                filp->f_op->flock(filp, F_SETLKW, &fl);
        else
                flock_lock_inode(inode, &fl);

        if (fl.fl_ops && fl.fl_ops->fl_release_private)
                fl.fl_ops->fl_release_private(&fl);
}

/* The i_flctx must be valid when calling into here */
static void
locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
{
        struct file_lease *fl, *tmp;
        LIST_HEAD(dispose);

        if (list_empty(&ctx->flc_lease))
                return;

        percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list)
                if (filp == fl->c.flc_file)
                        lease_modify(fl, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);

        locks_dispose_list(&dispose);
}

/*
 * This function is called on the last close of an open file.
 */
void locks_remove_file(struct file *filp)
{
        struct file_lock_context *ctx;

        ctx = locks_inode_context(file_inode(filp));
        if (!ctx)
                return;

        /* remove any OFD locks */
        locks_remove_posix(filp, filp);

        /* remove flock locks */
        locks_remove_flock(filp, ctx);

        /* remove any leases */
        locks_remove_lease(filp, ctx);

        spin_lock(&ctx->flc_lock);
        locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX");
        locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK");
        locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE");
        spin_unlock(&ctx->flc_lock);
}

/**
 * vfs_cancel_lock - file byte range unblock lock
 * @filp: The file to apply the unblock to
 * @fl: The lock to be unblocked
 *
 * Used by lock managers to cancel blocked requests
 */
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
        WARN_ON_ONCE(filp != fl->c.flc_file);
        if (filp->f_op->lock)
                return filp->f_op->lock(filp, F_CANCELLK, fl);
        return 0;
}
EXPORT_SYMBOL_GPL(vfs_cancel_lock);

/**
 * vfs_inode_has_locks - are any file locks held on @inode?
 * @inode: inode to check for locks
 *
 * Return true if there are any FL_POSIX or FL_FLOCK locks currently
 * set on @inode.
 */
bool vfs_inode_has_locks(struct inode *inode)
{
        struct file_lock_context *ctx;
        bool ret;

        ctx = locks_inode_context(inode);
        if (!ctx)
                return false;

        spin_lock(&ctx->flc_lock);
        ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
        spin_unlock(&ctx->flc_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(vfs_inode_has_locks);

#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

struct locks_iterator {
        int        li_cpu;
        loff_t        li_pos;
};

static void lock_get_status(struct seq_file *f, struct file_lock_core *flc,
                            loff_t id, char *pfx, int repeat)
{
        struct inode *inode = NULL;
        unsigned int pid;
        struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
        int type = flc->flc_type;
        struct file_lock *fl = file_lock(flc);

        pid = locks_translate_pid(flc, proc_pidns);

        /*
         * If lock owner is dead (and pid is freed) or not visible in current
         * pidns, zero is shown as a pid value. Check lock info from
         * init_pid_ns to get saved lock pid value.
         */
        if (flc->flc_file != NULL)
                inode = file_inode(flc->flc_file);

        seq_printf(f, "%lld: ", id);

        if (repeat)
                seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx);

        if (flc->flc_flags & FL_POSIX) {
                if (flc->flc_flags & FL_ACCESS)
                        seq_puts(f, "ACCESS");
                else if (flc->flc_flags & FL_OFDLCK)
                        seq_puts(f, "OFDLCK");
                else
                        seq_puts(f, "POSIX ");

                seq_printf(f, " %s ",
                             (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
        } else if (flc->flc_flags & FL_FLOCK) {
                seq_puts(f, "FLOCK  ADVISORY  ");
        } else if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) {
                struct file_lease *lease = file_lease(flc);

                type = target_leasetype(lease);

                if (flc->flc_flags & FL_DELEG)
                        seq_puts(f, "DELEG  ");
                else
                        seq_puts(f, "LEASE  ");

                if (lease_breaking(lease))
                        seq_puts(f, "BREAKING  ");
                else if (flc->flc_file)
                        seq_puts(f, "ACTIVE    ");
                else
                        seq_puts(f, "BREAKER   ");
        } else {
                seq_puts(f, "UNKNOWN UNKNOWN  ");
        }

        seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
                             (type == F_RDLCK) ? "READ" : "UNLCK");
        if (inode) {
                /* userspace relies on this representation of dev_t */
                seq_printf(f, "%d %02x:%02x:%lu ", pid,
                                MAJOR(inode->i_sb->s_dev),
                                MINOR(inode->i_sb->s_dev), inode->i_ino);
        } else {
                seq_printf(f, "%d <none>:0 ", pid);
        }
        if (flc->flc_flags & FL_POSIX) {
                if (fl->fl_end == OFFSET_MAX)
                        seq_printf(f, "%Ld EOF\n", fl->fl_start);
                else
                        seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
        } else {
                seq_puts(f, "0 EOF\n");
        }
}

static struct file_lock_core *get_next_blocked_member(struct file_lock_core *node)
{
        struct file_lock_core *tmp;

        /* NULL node or root node */
        if (node == NULL || node->flc_blocker == NULL)
                return NULL;

        /* Next member in the linked list could be itself */
        tmp = list_next_entry(node, flc_blocked_member);
        if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests,
                               flc_blocked_member)
                || tmp == node) {
                return NULL;
        }

        return tmp;
}

static int locks_show(struct seq_file *f, void *v)
{
        struct locks_iterator *iter = f->private;
        struct file_lock_core *cur, *tmp;
        struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
        int level = 0;

        cur = hlist_entry(v, struct file_lock_core, flc_link);

        if (locks_translate_pid(cur, proc_pidns) == 0)
                return 0;

        /* View this crossed linked list as a binary tree, the first member of flc_blocked_requests
         * is the left child of current node, the next silibing in flc_blocked_member is the
         * right child, we can alse get the parent of current node from flc_blocker, so this
         * question becomes traversal of a binary tree
         */
        while (cur != NULL) {
                if (level)
                        lock_get_status(f, cur, iter->li_pos, "-> ", level);
                else
                        lock_get_status(f, cur, iter->li_pos, "", level);

                if (!list_empty(&cur->flc_blocked_requests)) {
                        /* Turn left */
                        cur = list_first_entry_or_null(&cur->flc_blocked_requests,
                                                       struct file_lock_core,
                                                       flc_blocked_member);
                        level++;
                } else {
                        /* Turn right */
                        tmp = get_next_blocked_member(cur);
                        /* Fall back to parent node */
                        while (tmp == NULL && cur->flc_blocker != NULL) {
                                cur = cur->flc_blocker;
                                level--;
                                tmp = get_next_blocked_member(cur);
                        }
                        cur = tmp;
                }
        }

        return 0;
}

static void __show_fd_locks(struct seq_file *f,
                        struct list_head *head, int *id,
                        struct file *filp, struct files_struct *files)
{
        struct file_lock_core *fl;

        list_for_each_entry(fl, head, flc_list) {

                if (filp != fl->flc_file)
                        continue;
                if (fl->flc_owner != files && fl->flc_owner != filp)
                        continue;

                (*id)++;
                seq_puts(f, "lock:\t");
                lock_get_status(f, fl, *id, "", 0);
        }
}

void show_fd_locks(struct seq_file *f,
                  struct file *filp, struct files_struct *files)
{
        struct inode *inode = file_inode(filp);
        struct file_lock_context *ctx;
        int id = 0;

        ctx = locks_inode_context(inode);
        if (!ctx)
                return;

        spin_lock(&ctx->flc_lock);
        __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
        __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
        __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
        spin_unlock(&ctx->flc_lock);
}

static void *locks_start(struct seq_file *f, loff_t *pos)
        __acquires(&blocked_lock_lock)
{
        struct locks_iterator *iter = f->private;

        iter->li_pos = *pos + 1;
        percpu_down_write(&file_rwsem);
        spin_lock(&blocked_lock_lock);
        return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
}

static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
{
        struct locks_iterator *iter = f->private;

        ++iter->li_pos;
        return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
}

static void locks_stop(struct seq_file *f, void *v)
        __releases(&blocked_lock_lock)
{
        spin_unlock(&blocked_lock_lock);
        percpu_up_write(&file_rwsem);
}

static const struct seq_operations locks_seq_operations = {
        .start        = locks_start,
        .next        = locks_next,
        .stop        = locks_stop,
        .show        = locks_show,
};

static int __init proc_locks_init(void)
{
        proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
                        sizeof(struct locks_iterator), NULL);
        return 0;
}
fs_initcall(proc_locks_init);
#endif

static int __init filelock_init(void)
{
        int i;

        flctx_cache = kmem_cache_create("file_lock_ctx",
                        sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);

        filelock_cache = kmem_cache_create("file_lock_cache",
                        sizeof(struct file_lock), 0, SLAB_PANIC, NULL);

        filelease_cache = kmem_cache_create("file_lock_cache",
                        sizeof(struct file_lease), 0, SLAB_PANIC, NULL);

        for_each_possible_cpu(i) {
                struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);

                spin_lock_init(&fll->lock);
                INIT_HLIST_HEAD(&fll->hlist);
        }

        lease_notifier_chain_init();
        return 0;
}
core_initcall(filelock_init);








































































































































































































































































































































































































































































































































































































































































    1 




    1 







    1 


    1 



    1 







    1 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
// SPDX-License-Identifier: GPL-2.0
// rc-ir-raw.c - handle IR pulse/space events
//
// Copyright (C) 2010 by Mauro Carvalho Chehab

#include <linux/export.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/kmod.h>
#include <linux/sched.h>
#include "rc-core-priv.h"

/* Used to keep track of IR raw clients, protected by ir_raw_handler_lock */
static LIST_HEAD(ir_raw_client_list);

/* Used to handle IR raw handler extensions */
DEFINE_MUTEX(ir_raw_handler_lock);
static LIST_HEAD(ir_raw_handler_list);
static atomic64_t available_protocols = ATOMIC64_INIT(0);

static int ir_raw_event_thread(void *data)
{
        struct ir_raw_event ev;
        struct ir_raw_handler *handler;
        struct ir_raw_event_ctrl *raw = data;
        struct rc_dev *dev = raw->dev;

        while (1) {
                mutex_lock(&ir_raw_handler_lock);
                while (kfifo_out(&raw->kfifo, &ev, 1)) {
                        if (is_timing_event(ev)) {
                                if (ev.duration == 0)
                                        dev_warn_once(&dev->dev, "nonsensical timing event of duration 0");
                                if (is_timing_event(raw->prev_ev) &&
                                    !is_transition(&ev, &raw->prev_ev))
                                        dev_warn_once(&dev->dev, "two consecutive events of type %s",
                                                      TO_STR(ev.pulse));
                        }
                        list_for_each_entry(handler, &ir_raw_handler_list, list)
                                if (dev->enabled_protocols &
                                    handler->protocols || !handler->protocols)
                                        handler->decode(dev, ev);
                        lirc_raw_event(dev, ev);
                        raw->prev_ev = ev;
                }
                mutex_unlock(&ir_raw_handler_lock);

                set_current_state(TASK_INTERRUPTIBLE);

                if (kthread_should_stop()) {
                        __set_current_state(TASK_RUNNING);
                        break;
                } else if (!kfifo_is_empty(&raw->kfifo))
                        set_current_state(TASK_RUNNING);

                schedule();
        }

        return 0;
}

/**
 * ir_raw_event_store() - pass a pulse/space duration to the raw ir decoders
 * @dev:        the struct rc_dev device descriptor
 * @ev:                the struct ir_raw_event descriptor of the pulse/space
 *
 * This routine (which may be called from an interrupt context) stores a
 * pulse/space duration for the raw ir decoding state machines. Pulses are
 * signalled as positive values and spaces as negative values. A zero value
 * will reset the decoding state machines.
 */
int ir_raw_event_store(struct rc_dev *dev, struct ir_raw_event *ev)
{
        if (!dev->raw)
                return -EINVAL;

        dev_dbg(&dev->dev, "sample: (%05dus %s)\n",
                ev->duration, TO_STR(ev->pulse));

        if (!kfifo_put(&dev->raw->kfifo, *ev)) {
                dev_err(&dev->dev, "IR event FIFO is full!\n");
                return -ENOSPC;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(ir_raw_event_store);

/**
 * ir_raw_event_store_edge() - notify raw ir decoders of the start of a pulse/space
 * @dev:        the struct rc_dev device descriptor
 * @pulse:        true for pulse, false for space
 *
 * This routine (which may be called from an interrupt context) is used to
 * store the beginning of an ir pulse or space (or the start/end of ir
 * reception) for the raw ir decoding state machines. This is used by
 * hardware which does not provide durations directly but only interrupts
 * (or similar events) on state change.
 */
int ir_raw_event_store_edge(struct rc_dev *dev, bool pulse)
{
        ktime_t                        now;
        struct ir_raw_event        ev = {};

        if (!dev->raw)
                return -EINVAL;

        now = ktime_get();
        ev.duration = ktime_to_us(ktime_sub(now, dev->raw->last_event));
        ev.pulse = !pulse;

        return ir_raw_event_store_with_timeout(dev, &ev);
}
EXPORT_SYMBOL_GPL(ir_raw_event_store_edge);

/*
 * ir_raw_event_store_with_timeout() - pass a pulse/space duration to the raw
 *                                       ir decoders, schedule decoding and
 *                                       timeout
 * @dev:        the struct rc_dev device descriptor
 * @ev:                the struct ir_raw_event descriptor of the pulse/space
 *
 * This routine (which may be called from an interrupt context) stores a
 * pulse/space duration for the raw ir decoding state machines, schedules
 * decoding and generates a timeout.
 */
int ir_raw_event_store_with_timeout(struct rc_dev *dev, struct ir_raw_event *ev)
{
        ktime_t                now;
        int                rc = 0;

        if (!dev->raw)
                return -EINVAL;

        now = ktime_get();

        spin_lock(&dev->raw->edge_spinlock);
        rc = ir_raw_event_store(dev, ev);

        dev->raw->last_event = now;

        /* timer could be set to timeout (125ms by default) */
        if (!timer_pending(&dev->raw->edge_handle) ||
            time_after(dev->raw->edge_handle.expires,
                       jiffies + msecs_to_jiffies(15))) {
                mod_timer(&dev->raw->edge_handle,
                          jiffies + msecs_to_jiffies(15));
        }
        spin_unlock(&dev->raw->edge_spinlock);

        return rc;
}
EXPORT_SYMBOL_GPL(ir_raw_event_store_with_timeout);

/**
 * ir_raw_event_store_with_filter() - pass next pulse/space to decoders with some processing
 * @dev:        the struct rc_dev device descriptor
 * @ev:                the event that has occurred
 *
 * This routine (which may be called from an interrupt context) works
 * in similar manner to ir_raw_event_store_edge.
 * This routine is intended for devices with limited internal buffer
 * It automerges samples of same type, and handles timeouts. Returns non-zero
 * if the event was added, and zero if the event was ignored due to idle
 * processing.
 */
int ir_raw_event_store_with_filter(struct rc_dev *dev, struct ir_raw_event *ev)
{
        if (!dev->raw)
                return -EINVAL;

        /* Ignore spaces in idle mode */
        if (dev->idle && !ev->pulse)
                return 0;
        else if (dev->idle)
                ir_raw_event_set_idle(dev, false);

        if (!dev->raw->this_ev.duration)
                dev->raw->this_ev = *ev;
        else if (ev->pulse == dev->raw->this_ev.pulse)
                dev->raw->this_ev.duration += ev->duration;
        else {
                ir_raw_event_store(dev, &dev->raw->this_ev);
                dev->raw->this_ev = *ev;
        }

        /* Enter idle mode if necessary */
        if (!ev->pulse && dev->timeout &&
            dev->raw->this_ev.duration >= dev->timeout)
                ir_raw_event_set_idle(dev, true);

        return 1;
}
EXPORT_SYMBOL_GPL(ir_raw_event_store_with_filter);

/**
 * ir_raw_event_set_idle() - provide hint to rc-core when the device is idle or not
 * @dev:        the struct rc_dev device descriptor
 * @idle:        whether the device is idle or not
 */
void ir_raw_event_set_idle(struct rc_dev *dev, bool idle)
{
        if (!dev->raw)
                return;

        dev_dbg(&dev->dev, "%s idle mode\n", idle ? "enter" : "leave");

        if (idle) {
                dev->raw->this_ev.timeout = true;
                ir_raw_event_store(dev, &dev->raw->this_ev);
                dev->raw->this_ev = (struct ir_raw_event) {};
        }

        if (dev->s_idle)
                dev->s_idle(dev, idle);

        dev->idle = idle;
}
EXPORT_SYMBOL_GPL(ir_raw_event_set_idle);

/**
 * ir_raw_event_handle() - schedules the decoding of stored ir data
 * @dev:        the struct rc_dev device descriptor
 *
 * This routine will tell rc-core to start decoding stored ir data.
 */
void ir_raw_event_handle(struct rc_dev *dev)
{
        if (!dev->raw || !dev->raw->thread)
                return;

        wake_up_process(dev->raw->thread);
}
EXPORT_SYMBOL_GPL(ir_raw_event_handle);

/* used internally by the sysfs interface */
u64
ir_raw_get_allowed_protocols(void)
{
        return atomic64_read(&available_protocols);
}

static int change_protocol(struct rc_dev *dev, u64 *rc_proto)
{
        struct ir_raw_handler *handler;
        u32 timeout = 0;

        mutex_lock(&ir_raw_handler_lock);
        list_for_each_entry(handler, &ir_raw_handler_list, list) {
                if (!(dev->enabled_protocols & handler->protocols) &&
                    (*rc_proto & handler->protocols) && handler->raw_register)
                        handler->raw_register(dev);

                if ((dev->enabled_protocols & handler->protocols) &&
                    !(*rc_proto & handler->protocols) &&
                    handler->raw_unregister)
                        handler->raw_unregister(dev);
        }
        mutex_unlock(&ir_raw_handler_lock);

        if (!dev->max_timeout)
                return 0;

        mutex_lock(&ir_raw_handler_lock);
        list_for_each_entry(handler, &ir_raw_handler_list, list) {
                if (handler->protocols & *rc_proto) {
                        if (timeout < handler->min_timeout)
                                timeout = handler->min_timeout;
                }
        }
        mutex_unlock(&ir_raw_handler_lock);

        if (timeout == 0)
                timeout = IR_DEFAULT_TIMEOUT;
        else
                timeout += MS_TO_US(10);

        if (timeout < dev->min_timeout)
                timeout = dev->min_timeout;
        else if (timeout > dev->max_timeout)
                timeout = dev->max_timeout;

        if (dev->s_timeout)
                dev->s_timeout(dev, timeout);
        else
                dev->timeout = timeout;

        return 0;
}

static void ir_raw_disable_protocols(struct rc_dev *dev, u64 protocols)
{
        mutex_lock(&dev->lock);
        dev->enabled_protocols &= ~protocols;
        mutex_unlock(&dev->lock);
}

/**
 * ir_raw_gen_manchester() - Encode data with Manchester (bi-phase) modulation.
 * @ev:                Pointer to pointer to next free event. *@ev is incremented for
 *                each raw event filled.
 * @max:        Maximum number of raw events to fill.
 * @timings:        Manchester modulation timings.
 * @n:                Number of bits of data.
 * @data:        Data bits to encode.
 *
 * Encodes the @n least significant bits of @data using Manchester (bi-phase)
 * modulation with the timing characteristics described by @timings, writing up
 * to @max raw IR events using the *@ev pointer.
 *
 * Returns:        0 on success.
 *                -ENOBUFS if there isn't enough space in the array to fit the
 *                full encoded data. In this case all @max events will have been
 *                written.
 */
int ir_raw_gen_manchester(struct ir_raw_event **ev, unsigned int max,
                          const struct ir_raw_timings_manchester *timings,
                          unsigned int n, u64 data)
{
        bool need_pulse;
        u64 i;
        int ret = -ENOBUFS;

        i = BIT_ULL(n - 1);

        if (timings->leader_pulse) {
                if (!max--)
                        return ret;
                init_ir_raw_event_duration((*ev), 1, timings->leader_pulse);
                if (timings->leader_space) {
                        if (!max--)
                                return ret;
                        init_ir_raw_event_duration(++(*ev), 0,
                                                   timings->leader_space);
                }
        } else {
                /* continue existing signal */
                --(*ev);
        }
        /* from here on *ev will point to the last event rather than the next */

        while (n && i > 0) {
                need_pulse = !(data & i);
                if (timings->invert)
                        need_pulse = !need_pulse;
                if (need_pulse == !!(*ev)->pulse) {
                        (*ev)->duration += timings->clock;
                } else {
                        if (!max--)
                                goto nobufs;
                        init_ir_raw_event_duration(++(*ev), need_pulse,
                                                   timings->clock);
                }

                if (!max--)
                        goto nobufs;
                init_ir_raw_event_duration(++(*ev), !need_pulse,
                                           timings->clock);
                i >>= 1;
        }

        if (timings->trailer_space) {
                if (!(*ev)->pulse)
                        (*ev)->duration += timings->trailer_space;
                else if (!max--)
                        goto nobufs;
                else
                        init_ir_raw_event_duration(++(*ev), 0,
                                                   timings->trailer_space);
        }

        ret = 0;
nobufs:
        /* point to the next event rather than last event before returning */
        ++(*ev);
        return ret;
}
EXPORT_SYMBOL(ir_raw_gen_manchester);

/**
 * ir_raw_gen_pd() - Encode data to raw events with pulse-distance modulation.
 * @ev:                Pointer to pointer to next free event. *@ev is incremented for
 *                each raw event filled.
 * @max:        Maximum number of raw events to fill.
 * @timings:        Pulse distance modulation timings.
 * @n:                Number of bits of data.
 * @data:        Data bits to encode.
 *
 * Encodes the @n least significant bits of @data using pulse-distance
 * modulation with the timing characteristics described by @timings, writing up
 * to @max raw IR events using the *@ev pointer.
 *
 * Returns:        0 on success.
 *                -ENOBUFS if there isn't enough space in the array to fit the
 *                full encoded data. In this case all @max events will have been
 *                written.
 */
int ir_raw_gen_pd(struct ir_raw_event **ev, unsigned int max,
                  const struct ir_raw_timings_pd *timings,
                  unsigned int n, u64 data)
{
        int i;
        int ret;
        unsigned int space;

        if (timings->header_pulse) {
                ret = ir_raw_gen_pulse_space(ev, &max, timings->header_pulse,
                                             timings->header_space);
                if (ret)
                        return ret;
        }

        if (timings->msb_first) {
                for (i = n - 1; i >= 0; --i) {
                        space = timings->bit_space[(data >> i) & 1];
                        ret = ir_raw_gen_pulse_space(ev, &max,
                                                     timings->bit_pulse,
                                                     space);
                        if (ret)
                                return ret;
                }
        } else {
                for (i = 0; i < n; ++i, data >>= 1) {
                        space = timings->bit_space[data & 1];
                        ret = ir_raw_gen_pulse_space(ev, &max,
                                                     timings->bit_pulse,
                                                     space);
                        if (ret)
                                return ret;
                }
        }

        ret = ir_raw_gen_pulse_space(ev, &max, timings->trailer_pulse,
                                     timings->trailer_space);
        return ret;
}
EXPORT_SYMBOL(ir_raw_gen_pd);

/**
 * ir_raw_gen_pl() - Encode data to raw events with pulse-length modulation.
 * @ev:                Pointer to pointer to next free event. *@ev is incremented for
 *                each raw event filled.
 * @max:        Maximum number of raw events to fill.
 * @timings:        Pulse distance modulation timings.
 * @n:                Number of bits of data.
 * @data:        Data bits to encode.
 *
 * Encodes the @n least significant bits of @data using space-distance
 * modulation with the timing characteristics described by @timings, writing up
 * to @max raw IR events using the *@ev pointer.
 *
 * Returns:        0 on success.
 *                -ENOBUFS if there isn't enough space in the array to fit the
 *                full encoded data. In this case all @max events will have been
 *                written.
 */
int ir_raw_gen_pl(struct ir_raw_event **ev, unsigned int max,
                  const struct ir_raw_timings_pl *timings,
                  unsigned int n, u64 data)
{
        int i;
        int ret = -ENOBUFS;
        unsigned int pulse;

        if (!max--)
                return ret;

        init_ir_raw_event_duration((*ev)++, 1, timings->header_pulse);

        if (timings->msb_first) {
                for (i = n - 1; i >= 0; --i) {
                        if (!max--)
                                return ret;
                        init_ir_raw_event_duration((*ev)++, 0,
                                                   timings->bit_space);
                        if (!max--)
                                return ret;
                        pulse = timings->bit_pulse[(data >> i) & 1];
                        init_ir_raw_event_duration((*ev)++, 1, pulse);
                }
        } else {
                for (i = 0; i < n; ++i, data >>= 1) {
                        if (!max--)
                                return ret;
                        init_ir_raw_event_duration((*ev)++, 0,
                                                   timings->bit_space);
                        if (!max--)
                                return ret;
                        pulse = timings->bit_pulse[data & 1];
                        init_ir_raw_event_duration((*ev)++, 1, pulse);
                }
        }

        if (!max--)
                return ret;

        init_ir_raw_event_duration((*ev)++, 0, timings->trailer_space);

        return 0;
}
EXPORT_SYMBOL(ir_raw_gen_pl);

/**
 * ir_raw_encode_scancode() - Encode a scancode as raw events
 *
 * @protocol:                protocol
 * @scancode:                scancode filter describing a single scancode
 * @events:                array of raw events to write into
 * @max:                max number of raw events
 *
 * Attempts to encode the scancode as raw events.
 *
 * Returns:        The number of events written.
 *                -ENOBUFS if there isn't enough space in the array to fit the
 *                encoding. In this case all @max events will have been written.
 *                -EINVAL if the scancode is ambiguous or invalid, or if no
 *                compatible encoder was found.
 */
int ir_raw_encode_scancode(enum rc_proto protocol, u32 scancode,
                           struct ir_raw_event *events, unsigned int max)
{
        struct ir_raw_handler *handler;
        int ret = -EINVAL;
        u64 mask = 1ULL << protocol;

        ir_raw_load_modules(&mask);

        mutex_lock(&ir_raw_handler_lock);
        list_for_each_entry(handler, &ir_raw_handler_list, list) {
                if (handler->protocols & mask && handler->encode) {
                        ret = handler->encode(protocol, scancode, events, max);
                        if (ret >= 0 || ret == -ENOBUFS)
                                break;
                }
        }
        mutex_unlock(&ir_raw_handler_lock);

        return ret;
}
EXPORT_SYMBOL(ir_raw_encode_scancode);

/**
 * ir_raw_edge_handle() - Handle ir_raw_event_store_edge() processing
 *
 * @t:                timer_list
 *
 * This callback is armed by ir_raw_event_store_edge(). It does two things:
 * first of all, rather than calling ir_raw_event_handle() for each
 * edge and waking up the rc thread, 15 ms after the first edge
 * ir_raw_event_handle() is called. Secondly, generate a timeout event
 * no more IR is received after the rc_dev timeout.
 */
static void ir_raw_edge_handle(struct timer_list *t)
{
        struct ir_raw_event_ctrl *raw = from_timer(raw, t, edge_handle);
        struct rc_dev *dev = raw->dev;
        unsigned long flags;
        ktime_t interval;

        spin_lock_irqsave(&dev->raw->edge_spinlock, flags);
        interval = ktime_sub(ktime_get(), dev->raw->last_event);
        if (ktime_to_us(interval) >= dev->timeout) {
                struct ir_raw_event ev = {
                        .timeout = true,
                        .duration = ktime_to_us(interval)
                };

                ir_raw_event_store(dev, &ev);
        } else {
                mod_timer(&dev->raw->edge_handle,
                          jiffies + usecs_to_jiffies(dev->timeout -
                                                     ktime_to_us(interval)));
        }
        spin_unlock_irqrestore(&dev->raw->edge_spinlock, flags);

        ir_raw_event_handle(dev);
}

/**
 * ir_raw_encode_carrier() - Get carrier used for protocol
 *
 * @protocol:                protocol
 *
 * Attempts to find the carrier for the specified protocol
 *
 * Returns:        The carrier in Hz
 *                -EINVAL if the protocol is invalid, or if no
 *                compatible encoder was found.
 */
int ir_raw_encode_carrier(enum rc_proto protocol)
{
        struct ir_raw_handler *handler;
        int ret = -EINVAL;
        u64 mask = BIT_ULL(protocol);

        mutex_lock(&ir_raw_handler_lock);
        list_for_each_entry(handler, &ir_raw_handler_list, list) {
                if (handler->protocols & mask && handler->encode) {
                        ret = handler->carrier;
                        break;
                }
        }
        mutex_unlock(&ir_raw_handler_lock);

        return ret;
}
EXPORT_SYMBOL(ir_raw_encode_carrier);

/*
 * Used to (un)register raw event clients
 */
int ir_raw_event_prepare(struct rc_dev *dev)
{
        if (!dev)
                return -EINVAL;

        dev->raw = kzalloc(sizeof(*dev->raw), GFP_KERNEL);
        if (!dev->raw)
                return -ENOMEM;

        dev->raw->dev = dev;
        dev->change_protocol = change_protocol;
        dev->idle = true;
        spin_lock_init(&dev->raw->edge_spinlock);
        timer_setup(&dev->raw->edge_handle, ir_raw_edge_handle, 0);
        INIT_KFIFO(dev->raw->kfifo);

        return 0;
}

int ir_raw_event_register(struct rc_dev *dev)
{
        struct task_struct *thread;

        thread = kthread_run(ir_raw_event_thread, dev->raw, "rc%u", dev->minor);
        if (IS_ERR(thread))
                return PTR_ERR(thread);

        dev->raw->thread = thread;

        mutex_lock(&ir_raw_handler_lock);
        list_add_tail(&dev->raw->list, &ir_raw_client_list);
        mutex_unlock(&ir_raw_handler_lock);

        return 0;
}

void ir_raw_event_free(struct rc_dev *dev)
{
        if (!dev)
                return;

        kfree(dev->raw);
        dev->raw = NULL;
}

void ir_raw_event_unregister(struct rc_dev *dev)
{
        struct ir_raw_handler *handler;

        if (!dev || !dev->raw)
                return;

        kthread_stop(dev->raw->thread);
        del_timer_sync(&dev->raw->edge_handle);

        mutex_lock(&ir_raw_handler_lock);
        list_del(&dev->raw->list);
        list_for_each_entry(handler, &ir_raw_handler_list, list)
                if (handler->raw_unregister &&
                    (handler->protocols & dev->enabled_protocols))
                        handler->raw_unregister(dev);

        lirc_bpf_free(dev);

        ir_raw_event_free(dev);

        /*
         * A user can be calling bpf(BPF_PROG_{QUERY|ATTACH|DETACH}), so
         * ensure that the raw member is null on unlock; this is how
         * "device gone" is checked.
         */
        mutex_unlock(&ir_raw_handler_lock);
}

/*
 * Extension interface - used to register the IR decoders
 */

int ir_raw_handler_register(struct ir_raw_handler *ir_raw_handler)
{
        mutex_lock(&ir_raw_handler_lock);
        list_add_tail(&ir_raw_handler->list, &ir_raw_handler_list);
        atomic64_or(ir_raw_handler->protocols, &available_protocols);
        mutex_unlock(&ir_raw_handler_lock);

        return 0;
}
EXPORT_SYMBOL(ir_raw_handler_register);

void ir_raw_handler_unregister(struct ir_raw_handler *ir_raw_handler)
{
        struct ir_raw_event_ctrl *raw;
        u64 protocols = ir_raw_handler->protocols;

        mutex_lock(&ir_raw_handler_lock);
        list_del(&ir_raw_handler->list);
        list_for_each_entry(raw, &ir_raw_client_list, list) {
                if (ir_raw_handler->raw_unregister &&
                    (raw->dev->enabled_protocols & protocols))
                        ir_raw_handler->raw_unregister(raw->dev);
                ir_raw_disable_protocols(raw->dev, protocols);
        }
        atomic64_andnot(protocols, &available_protocols);
        mutex_unlock(&ir_raw_handler_lock);
}
EXPORT_SYMBOL(ir_raw_handler_unregister);




































































































































    4 




























    4 





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_IRQDESC_H
#define _LINUX_IRQDESC_H

#include <linux/rcupdate.h>
#include <linux/kobject.h>
#include <linux/mutex.h>

/*
 * Core internal functions to deal with irq descriptors
 */

struct irq_affinity_notify;
struct proc_dir_entry;
struct module;
struct irq_desc;
struct irq_domain;
struct pt_regs;

/**
 * struct irq_desc - interrupt descriptor
 * @irq_common_data:        per irq and chip data passed down to chip functions
 * @kstat_irqs:                irq stats per cpu
 * @handle_irq:                highlevel irq-events handler
 * @action:                the irq action chain
 * @status_use_accessors: status information
 * @core_internal_state__do_not_mess_with_it: core internal status information
 * @depth:                disable-depth, for nested irq_disable() calls
 * @wake_depth:                enable depth, for multiple irq_set_irq_wake() callers
 * @tot_count:                stats field for non-percpu irqs
 * @irq_count:                stats field to detect stalled irqs
 * @last_unhandled:        aging timer for unhandled count
 * @irqs_unhandled:        stats field for spurious unhandled interrupts
 * @threads_handled:        stats field for deferred spurious detection of threaded handlers
 * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
 * @lock:                locking for SMP
 * @affinity_hint:        hint to user space for preferred irq affinity
 * @affinity_notify:        context for notification of affinity changes
 * @pending_mask:        pending rebalanced interrupts
 * @threads_oneshot:        bitfield to handle shared oneshot threads
 * @threads_active:        number of irqaction threads currently running
 * @wait_for_threads:        wait queue for sync_irq to wait for threaded handlers
 * @nr_actions:                number of installed actions on this descriptor
 * @no_suspend_depth:        number of irqactions on a irq descriptor with
 *                        IRQF_NO_SUSPEND set
 * @force_resume_depth:        number of irqactions on a irq descriptor with
 *                        IRQF_FORCE_RESUME set
 * @rcu:                rcu head for delayed free
 * @kobj:                kobject used to represent this struct in sysfs
 * @request_mutex:        mutex to protect request/free before locking desc->lock
 * @dir:                /proc/irq/ procfs entry
 * @debugfs_file:        dentry for the debugfs file
 * @name:                flow handler name for /proc/interrupts output
 */
struct irq_desc {
        struct irq_common_data        irq_common_data;
        struct irq_data                irq_data;
        unsigned int __percpu        *kstat_irqs;
        irq_flow_handler_t        handle_irq;
        struct irqaction        *action;        /* IRQ action list */
        unsigned int                status_use_accessors;
        unsigned int                core_internal_state__do_not_mess_with_it;
        unsigned int                depth;                /* nested irq disables */
        unsigned int                wake_depth;        /* nested wake enables */
        unsigned int                tot_count;
        unsigned int                irq_count;        /* For detecting broken IRQs */
        unsigned long                last_unhandled;        /* Aging timer for unhandled count */
        unsigned int                irqs_unhandled;
        atomic_t                threads_handled;
        int                        threads_handled_last;
        raw_spinlock_t                lock;
        struct cpumask                *percpu_enabled;
        const struct cpumask        *percpu_affinity;
#ifdef CONFIG_SMP
        const struct cpumask        *affinity_hint;
        struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
        cpumask_var_t                pending_mask;
#endif
#endif
        unsigned long                threads_oneshot;
        atomic_t                threads_active;
        wait_queue_head_t       wait_for_threads;
#ifdef CONFIG_PM_SLEEP
        unsigned int                nr_actions;
        unsigned int                no_suspend_depth;
        unsigned int                cond_suspend_depth;
        unsigned int                force_resume_depth;
#endif
#ifdef CONFIG_PROC_FS
        struct proc_dir_entry        *dir;
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
        struct dentry                *debugfs_file;
        const char                *dev_name;
#endif
#ifdef CONFIG_SPARSE_IRQ
        struct rcu_head                rcu;
        struct kobject                kobj;
#endif
        struct mutex                request_mutex;
        int                        parent_irq;
        struct module                *owner;
        const char                *name;
#ifdef CONFIG_HARDIRQS_SW_RESEND
        struct hlist_node        resend_node;
#endif
} ____cacheline_internodealigned_in_smp;

#ifdef CONFIG_SPARSE_IRQ
extern void irq_lock_sparse(void);
extern void irq_unlock_sparse(void);
#else
static inline void irq_lock_sparse(void) { }
static inline void irq_unlock_sparse(void) { }
extern struct irq_desc irq_desc[NR_IRQS];
#endif

static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc,
                                              unsigned int cpu)
{
        return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}

static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
{
        return container_of(data->common, struct irq_desc, irq_common_data);
}

static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
{
        return desc->irq_data.irq;
}

static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
{
        return &desc->irq_data;
}

static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
{
        return desc->irq_data.chip;
}

static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
{
        return desc->irq_data.chip_data;
}

static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
{
        return desc->irq_common_data.handler_data;
}

/*
 * Architectures call this to let the generic IRQ layer
 * handle an interrupt.
 */
static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
        desc->handle_irq(desc);
}

int handle_irq_desc(struct irq_desc *desc);
int generic_handle_irq(unsigned int irq);
int generic_handle_irq_safe(unsigned int irq);

#ifdef CONFIG_IRQ_DOMAIN
/*
 * Convert a HW interrupt number to a logical one using a IRQ domain,
 * and handle the result interrupt number. Return -EINVAL if
 * conversion failed.
 */
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
#endif

/* Test to see if a driver has successfully requested an irq */
static inline int irq_desc_has_action(struct irq_desc *desc)
{
        return desc && desc->action != NULL;
}

/**
 * irq_set_handler_locked - Set irq handler from a locked region
 * @data:        Pointer to the irq_data structure which identifies the irq
 * @handler:        Flow control handler function for this interrupt
 *
 * Sets the handler in the irq descriptor associated to @data.
 *
 * Must be called with irq_desc locked and valid parameters. Typical
 * call site is the irq_set_type() callback.
 */
static inline void irq_set_handler_locked(struct irq_data *data,
                                          irq_flow_handler_t handler)
{
        struct irq_desc *desc = irq_data_to_desc(data);

        desc->handle_irq = handler;
}

/**
 * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region
 * @data:        Pointer to the irq_data structure for which the chip is set
 * @chip:        Pointer to the new irq chip
 * @handler:        Flow control handler function for this interrupt
 * @name:        Name of the interrupt
 *
 * Replace the irq chip at the proper hierarchy level in @data and
 * sets the handler and name in the associated irq descriptor.
 *
 * Must be called with irq_desc locked and valid parameters.
 */
static inline void
irq_set_chip_handler_name_locked(struct irq_data *data,
                                 const struct irq_chip *chip,
                                 irq_flow_handler_t handler, const char *name)
{
        struct irq_desc *desc = irq_data_to_desc(data);

        desc->handle_irq = handler;
        desc->name = name;
        data->chip = (struct irq_chip *)chip;
}

bool irq_check_status_bit(unsigned int irq, unsigned int bitmask);

static inline bool irq_balancing_disabled(unsigned int irq)
{
        return irq_check_status_bit(irq, IRQ_NO_BALANCING_MASK);
}

static inline bool irq_is_percpu(unsigned int irq)
{
        return irq_check_status_bit(irq, IRQ_PER_CPU);
}

static inline bool irq_is_percpu_devid(unsigned int irq)
{
        return irq_check_status_bit(irq, IRQ_PER_CPU_DEVID);
}

void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
                             struct lock_class_key *request_class);
static inline void
irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
                      struct lock_class_key *request_class)
{
        if (IS_ENABLED(CONFIG_LOCKDEP))
                __irq_set_lockdep_class(irq, lock_class, request_class);
}

#endif
















































































































































































































    3 

    3 



















































    3 









































































































































































































































































































































































































































































































































    3 





















    3 

    3 











    3 



    3 

    3 






    3 






    3 

    3 










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
/*
   BlueZ - Bluetooth protocol stack for Linux
   Copyright (C) 2000-2001 Qualcomm Incorporated

   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License version 2 as
   published by the Free Software Foundation;

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
   SOFTWARE IS DISCLAIMED.
*/

/* Bluetooth HCI sockets. */
#include <linux/compat.h>
#include <linux/export.h>
#include <linux/utsname.h>
#include <linux/sched.h>
#include <asm/unaligned.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/hci_mon.h>
#include <net/bluetooth/mgmt.h>

#include "mgmt_util.h"

static LIST_HEAD(mgmt_chan_list);
static DEFINE_MUTEX(mgmt_chan_list_lock);

static DEFINE_IDA(sock_cookie_ida);

static atomic_t monitor_promisc = ATOMIC_INIT(0);

/* ----- HCI socket interface ----- */

/* Socket info */
#define hci_pi(sk) ((struct hci_pinfo *) sk)

struct hci_pinfo {
        struct bt_sock    bt;
        struct hci_dev    *hdev;
        struct hci_filter filter;
        __u8              cmsg_mask;
        unsigned short    channel;
        unsigned long     flags;
        __u32             cookie;
        char              comm[TASK_COMM_LEN];
        __u16             mtu;
};

static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
{
        struct hci_dev *hdev = hci_pi(sk)->hdev;

        if (!hdev)
                return ERR_PTR(-EBADFD);
        if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
                return ERR_PTR(-EPIPE);
        return hdev;
}

void hci_sock_set_flag(struct sock *sk, int nr)
{
        set_bit(nr, &hci_pi(sk)->flags);
}

void hci_sock_clear_flag(struct sock *sk, int nr)
{
        clear_bit(nr, &hci_pi(sk)->flags);
}

int hci_sock_test_flag(struct sock *sk, int nr)
{
        return test_bit(nr, &hci_pi(sk)->flags);
}

unsigned short hci_sock_get_channel(struct sock *sk)
{
        return hci_pi(sk)->channel;
}

u32 hci_sock_get_cookie(struct sock *sk)
{
        return hci_pi(sk)->cookie;
}

static bool hci_sock_gen_cookie(struct sock *sk)
{
        int id = hci_pi(sk)->cookie;

        if (!id) {
                id = ida_alloc_min(&sock_cookie_ida, 1, GFP_KERNEL);
                if (id < 0)
                        id = 0xffffffff;

                hci_pi(sk)->cookie = id;
                get_task_comm(hci_pi(sk)->comm, current);
                return true;
        }

        return false;
}

static void hci_sock_free_cookie(struct sock *sk)
{
        int id = hci_pi(sk)->cookie;

        if (id) {
                hci_pi(sk)->cookie = 0xffffffff;
                ida_free(&sock_cookie_ida, id);
        }
}

static inline int hci_test_bit(int nr, const void *addr)
{
        return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
}

/* Security filter */
#define HCI_SFLT_MAX_OGF  5

struct hci_sec_filter {
        __u32 type_mask;
        __u32 event_mask[2];
        __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4];
};

static const struct hci_sec_filter hci_sec_filter = {
        /* Packet types */
        0x10,
        /* Events */
        { 0x1000d9fe, 0x0000b00c },
        /* Commands */
        {
                { 0x0 },
                /* OGF_LINK_CTL */
                { 0xbe000006, 0x00000001, 0x00000000, 0x00 },
                /* OGF_LINK_POLICY */
                { 0x00005200, 0x00000000, 0x00000000, 0x00 },
                /* OGF_HOST_CTL */
                { 0xaab00200, 0x2b402aaa, 0x05220154, 0x00 },
                /* OGF_INFO_PARAM */
                { 0x000002be, 0x00000000, 0x00000000, 0x00 },
                /* OGF_STATUS_PARAM */
                { 0x000000ea, 0x00000000, 0x00000000, 0x00 }
        }
};

static struct bt_sock_list hci_sk_list = {
        .lock = __RW_LOCK_UNLOCKED(hci_sk_list.lock)
};

static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb)
{
        struct hci_filter *flt;
        int flt_type, flt_event;

        /* Apply filter */
        flt = &hci_pi(sk)->filter;

        flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS;

        if (!test_bit(flt_type, &flt->type_mask))
                return true;

        /* Extra filter for event packets only */
        if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT)
                return false;

        flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS);

        if (!hci_test_bit(flt_event, &flt->event_mask))
                return true;

        /* Check filter only when opcode is set */
        if (!flt->opcode)
                return false;

        if (flt_event == HCI_EV_CMD_COMPLETE &&
            flt->opcode != get_unaligned((__le16 *)(skb->data + 3)))
                return true;

        if (flt_event == HCI_EV_CMD_STATUS &&
            flt->opcode != get_unaligned((__le16 *)(skb->data + 4)))
                return true;

        return false;
}

/* Send frame to RAW socket */
void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct sock *sk;
        struct sk_buff *skb_copy = NULL;

        BT_DBG("hdev %p len %d", hdev, skb->len);

        read_lock(&hci_sk_list.lock);

        sk_for_each(sk, &hci_sk_list.head) {
                struct sk_buff *nskb;

                if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev)
                        continue;

                /* Don't send frame to the socket it came from */
                if (skb->sk == sk)
                        continue;

                if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) {
                        if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
                            hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
                            hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
                            hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
                            hci_skb_pkt_type(skb) != HCI_ISODATA_PKT)
                                continue;
                        if (is_filtered_packet(sk, skb))
                                continue;
                } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
                        if (!bt_cb(skb)->incoming)
                                continue;
                        if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
                            hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
                            hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
                            hci_skb_pkt_type(skb) != HCI_ISODATA_PKT)
                                continue;
                } else {
                        /* Don't send frame to other channel types */
                        continue;
                }

                if (!skb_copy) {
                        /* Create a private copy with headroom */
                        skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
                        if (!skb_copy)
                                continue;

                        /* Put type byte before the data */
                        memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1);
                }

                nskb = skb_clone(skb_copy, GFP_ATOMIC);
                if (!nskb)
                        continue;

                if (sock_queue_rcv_skb(sk, nskb))
                        kfree_skb(nskb);
        }

        read_unlock(&hci_sk_list.lock);

        kfree_skb(skb_copy);
}

static void hci_sock_copy_creds(struct sock *sk, struct sk_buff *skb)
{
        struct scm_creds *creds;

        if (!sk || WARN_ON(!skb))
                return;

        creds = &bt_cb(skb)->creds;

        /* Check if peer credentials is set */
        if (!sk->sk_peer_pid) {
                /* Check if parent peer credentials is set */
                if (bt_sk(sk)->parent && bt_sk(sk)->parent->sk_peer_pid)
                        sk = bt_sk(sk)->parent;
                else
                        return;
        }

        /* Check if scm_creds already set */
        if (creds->pid == pid_vnr(sk->sk_peer_pid))
                return;

        memset(creds, 0, sizeof(*creds));

        creds->pid = pid_vnr(sk->sk_peer_pid);
        if (sk->sk_peer_cred) {
                creds->uid = sk->sk_peer_cred->uid;
                creds->gid = sk->sk_peer_cred->gid;
        }
}

static struct sk_buff *hci_skb_clone(struct sk_buff *skb)
{
        struct sk_buff *nskb;

        if (!skb)
                return NULL;

        nskb = skb_clone(skb, GFP_ATOMIC);
        if (!nskb)
                return NULL;

        hci_sock_copy_creds(skb->sk, nskb);

        return nskb;
}

/* Send frame to sockets with specific channel */
static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
                                  int flag, struct sock *skip_sk)
{
        struct sock *sk;

        BT_DBG("channel %u len %d", channel, skb->len);

        sk_for_each(sk, &hci_sk_list.head) {
                struct sk_buff *nskb;

                /* Ignore socket without the flag set */
                if (!hci_sock_test_flag(sk, flag))
                        continue;

                /* Skip the original socket */
                if (sk == skip_sk)
                        continue;

                if (sk->sk_state != BT_BOUND)
                        continue;

                if (hci_pi(sk)->channel != channel)
                        continue;

                nskb = hci_skb_clone(skb);
                if (!nskb)
                        continue;

                if (sock_queue_rcv_skb(sk, nskb))
                        kfree_skb(nskb);
        }

}

void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
                         int flag, struct sock *skip_sk)
{
        read_lock(&hci_sk_list.lock);
        __hci_send_to_channel(channel, skb, flag, skip_sk);
        read_unlock(&hci_sk_list.lock);
}

/* Send frame to monitor socket */
void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct sk_buff *skb_copy = NULL;
        struct hci_mon_hdr *hdr;
        __le16 opcode;

        if (!atomic_read(&monitor_promisc))
                return;

        BT_DBG("hdev %p len %d", hdev, skb->len);

        switch (hci_skb_pkt_type(skb)) {
        case HCI_COMMAND_PKT:
                opcode = cpu_to_le16(HCI_MON_COMMAND_PKT);
                break;
        case HCI_EVENT_PKT:
                opcode = cpu_to_le16(HCI_MON_EVENT_PKT);
                break;
        case HCI_ACLDATA_PKT:
                if (bt_cb(skb)->incoming)
                        opcode = cpu_to_le16(HCI_MON_ACL_RX_PKT);
                else
                        opcode = cpu_to_le16(HCI_MON_ACL_TX_PKT);
                break;
        case HCI_SCODATA_PKT:
                if (bt_cb(skb)->incoming)
                        opcode = cpu_to_le16(HCI_MON_SCO_RX_PKT);
                else
                        opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
                break;
        case HCI_ISODATA_PKT:
                if (bt_cb(skb)->incoming)
                        opcode = cpu_to_le16(HCI_MON_ISO_RX_PKT);
                else
                        opcode = cpu_to_le16(HCI_MON_ISO_TX_PKT);
                break;
        case HCI_DIAG_PKT:
                opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG);
                break;
        default:
                return;
        }

        /* Create a private copy with headroom */
        skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true);
        if (!skb_copy)
                return;

        hci_sock_copy_creds(skb->sk, skb_copy);

        /* Put header before the data */
        hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE);
        hdr->opcode = opcode;
        hdr->index = cpu_to_le16(hdev->id);
        hdr->len = cpu_to_le16(skb->len);

        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy,
                            HCI_SOCK_TRUSTED, NULL);
        kfree_skb(skb_copy);
}

void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
                                 void *data, u16 data_len, ktime_t tstamp,
                                 int flag, struct sock *skip_sk)
{
        struct sock *sk;
        __le16 index;

        if (hdev)
                index = cpu_to_le16(hdev->id);
        else
                index = cpu_to_le16(MGMT_INDEX_NONE);

        read_lock(&hci_sk_list.lock);

        sk_for_each(sk, &hci_sk_list.head) {
                struct hci_mon_hdr *hdr;
                struct sk_buff *skb;

                if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL)
                        continue;

                /* Ignore socket without the flag set */
                if (!hci_sock_test_flag(sk, flag))
                        continue;

                /* Skip the original socket */
                if (sk == skip_sk)
                        continue;

                skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC);
                if (!skb)
                        continue;

                put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
                put_unaligned_le16(event, skb_put(skb, 2));

                if (data)
                        skb_put_data(skb, data, data_len);

                skb->tstamp = tstamp;

                hdr = skb_push(skb, HCI_MON_HDR_SIZE);
                hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT);
                hdr->index = index;
                hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

                __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                      HCI_SOCK_TRUSTED, NULL);
                kfree_skb(skb);
        }

        read_unlock(&hci_sk_list.lock);
}

static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
{
        struct hci_mon_hdr *hdr;
        struct hci_mon_new_index *ni;
        struct hci_mon_index_info *ii;
        struct sk_buff *skb;
        __le16 opcode;

        switch (event) {
        case HCI_DEV_REG:
                skb = bt_skb_alloc(HCI_MON_NEW_INDEX_SIZE, GFP_ATOMIC);
                if (!skb)
                        return NULL;

                ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
                ni->type = hdev->dev_type;
                ni->bus = hdev->bus;
                bacpy(&ni->bdaddr, &hdev->bdaddr);
                memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
                               strnlen(hdev->name, sizeof(ni->name)), '\0');

                opcode = cpu_to_le16(HCI_MON_NEW_INDEX);
                break;

        case HCI_DEV_UNREG:
                skb = bt_skb_alloc(0, GFP_ATOMIC);
                if (!skb)
                        return NULL;

                opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
                break;

        case HCI_DEV_SETUP:
                if (hdev->manufacturer == 0xffff)
                        return NULL;
                fallthrough;

        case HCI_DEV_UP:
                skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
                if (!skb)
                        return NULL;

                ii = skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
                bacpy(&ii->bdaddr, &hdev->bdaddr);
                ii->manufacturer = cpu_to_le16(hdev->manufacturer);

                opcode = cpu_to_le16(HCI_MON_INDEX_INFO);
                break;

        case HCI_DEV_OPEN:
                skb = bt_skb_alloc(0, GFP_ATOMIC);
                if (!skb)
                        return NULL;

                opcode = cpu_to_le16(HCI_MON_OPEN_INDEX);
                break;

        case HCI_DEV_CLOSE:
                skb = bt_skb_alloc(0, GFP_ATOMIC);
                if (!skb)
                        return NULL;

                opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX);
                break;

        default:
                return NULL;
        }

        __net_timestamp(skb);

        hdr = skb_push(skb, HCI_MON_HDR_SIZE);
        hdr->opcode = opcode;
        hdr->index = cpu_to_le16(hdev->id);
        hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

        return skb;
}

static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
{
        struct hci_mon_hdr *hdr;
        struct sk_buff *skb;
        u16 format;
        u8 ver[3];
        u32 flags;

        /* No message needed when cookie is not present */
        if (!hci_pi(sk)->cookie)
                return NULL;

        switch (hci_pi(sk)->channel) {
        case HCI_CHANNEL_RAW:
                format = 0x0000;
                ver[0] = BT_SUBSYS_VERSION;
                put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1);
                break;
        case HCI_CHANNEL_USER:
                format = 0x0001;
                ver[0] = BT_SUBSYS_VERSION;
                put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1);
                break;
        case HCI_CHANNEL_CONTROL:
                format = 0x0002;
                mgmt_fill_version_info(ver);
                break;
        default:
                /* No message for unsupported format */
                return NULL;
        }

        skb = bt_skb_alloc(14 + TASK_COMM_LEN, GFP_ATOMIC);
        if (!skb)
                return NULL;

        hci_sock_copy_creds(sk, skb);

        flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0;

        put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
        put_unaligned_le16(format, skb_put(skb, 2));
        skb_put_data(skb, ver, sizeof(ver));
        put_unaligned_le32(flags, skb_put(skb, 4));
        skb_put_u8(skb, TASK_COMM_LEN);
        skb_put_data(skb, hci_pi(sk)->comm, TASK_COMM_LEN);

        __net_timestamp(skb);

        hdr = skb_push(skb, HCI_MON_HDR_SIZE);
        hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN);
        if (hci_pi(sk)->hdev)
                hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
        else
                hdr->index = cpu_to_le16(HCI_DEV_NONE);
        hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

        return skb;
}

static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
{
        struct hci_mon_hdr *hdr;
        struct sk_buff *skb;

        /* No message needed when cookie is not present */
        if (!hci_pi(sk)->cookie)
                return NULL;

        switch (hci_pi(sk)->channel) {
        case HCI_CHANNEL_RAW:
        case HCI_CHANNEL_USER:
        case HCI_CHANNEL_CONTROL:
                break;
        default:
                /* No message for unsupported format */
                return NULL;
        }

        skb = bt_skb_alloc(4, GFP_ATOMIC);
        if (!skb)
                return NULL;

        hci_sock_copy_creds(sk, skb);

        put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));

        __net_timestamp(skb);

        hdr = skb_push(skb, HCI_MON_HDR_SIZE);
        hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE);
        if (hci_pi(sk)->hdev)
                hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
        else
                hdr->index = cpu_to_le16(HCI_DEV_NONE);
        hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

        return skb;
}

static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
                                                   u16 opcode, u16 len,
                                                   const void *buf)
{
        struct hci_mon_hdr *hdr;
        struct sk_buff *skb;

        skb = bt_skb_alloc(6 + len, GFP_ATOMIC);
        if (!skb)
                return NULL;

        hci_sock_copy_creds(sk, skb);

        put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
        put_unaligned_le16(opcode, skb_put(skb, 2));

        if (buf)
                skb_put_data(skb, buf, len);

        __net_timestamp(skb);

        hdr = skb_push(skb, HCI_MON_HDR_SIZE);
        hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND);
        hdr->index = cpu_to_le16(index);
        hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

        return skb;
}

static void __printf(2, 3)
send_monitor_note(struct sock *sk, const char *fmt, ...)
{
        size_t len;
        struct hci_mon_hdr *hdr;
        struct sk_buff *skb;
        va_list args;

        va_start(args, fmt);
        len = vsnprintf(NULL, 0, fmt, args);
        va_end(args);

        skb = bt_skb_alloc(len + 1, GFP_ATOMIC);
        if (!skb)
                return;

        hci_sock_copy_creds(sk, skb);

        va_start(args, fmt);
        vsprintf(skb_put(skb, len), fmt, args);
        *(u8 *)skb_put(skb, 1) = 0;
        va_end(args);

        __net_timestamp(skb);

        hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
        hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE);
        hdr->index = cpu_to_le16(HCI_DEV_NONE);
        hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);

        if (sock_queue_rcv_skb(sk, skb))
                kfree_skb(skb);
}

static void send_monitor_replay(struct sock *sk)
{
        struct hci_dev *hdev;

        read_lock(&hci_dev_list_lock);

        list_for_each_entry(hdev, &hci_dev_list, list) {
                struct sk_buff *skb;

                skb = create_monitor_event(hdev, HCI_DEV_REG);
                if (!skb)
                        continue;

                if (sock_queue_rcv_skb(sk, skb))
                        kfree_skb(skb);

                if (!test_bit(HCI_RUNNING, &hdev->flags))
                        continue;

                skb = create_monitor_event(hdev, HCI_DEV_OPEN);
                if (!skb)
                        continue;

                if (sock_queue_rcv_skb(sk, skb))
                        kfree_skb(skb);

                if (test_bit(HCI_UP, &hdev->flags))
                        skb = create_monitor_event(hdev, HCI_DEV_UP);
                else if (hci_dev_test_flag(hdev, HCI_SETUP))
                        skb = create_monitor_event(hdev, HCI_DEV_SETUP);
                else
                        skb = NULL;

                if (skb) {
                        if (sock_queue_rcv_skb(sk, skb))
                                kfree_skb(skb);
                }
        }

        read_unlock(&hci_dev_list_lock);
}

static void send_monitor_control_replay(struct sock *mon_sk)
{
        struct sock *sk;

        read_lock(&hci_sk_list.lock);

        sk_for_each(sk, &hci_sk_list.head) {
                struct sk_buff *skb;

                skb = create_monitor_ctrl_open(sk);
                if (!skb)
                        continue;

                if (sock_queue_rcv_skb(mon_sk, skb))
                        kfree_skb(skb);
        }

        read_unlock(&hci_sk_list.lock);
}

/* Generate internal stack event */
static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
{
        struct hci_event_hdr *hdr;
        struct hci_ev_stack_internal *ev;
        struct sk_buff *skb;

        skb = bt_skb_alloc(HCI_EVENT_HDR_SIZE + sizeof(*ev) + dlen, GFP_ATOMIC);
        if (!skb)
                return;

        hdr = skb_put(skb, HCI_EVENT_HDR_SIZE);
        hdr->evt  = HCI_EV_STACK_INTERNAL;
        hdr->plen = sizeof(*ev) + dlen;

        ev = skb_put(skb, sizeof(*ev) + dlen);
        ev->type = type;
        memcpy(ev->data, data, dlen);

        bt_cb(skb)->incoming = 1;
        __net_timestamp(skb);

        hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
        hci_send_to_sock(hdev, skb);
        kfree_skb(skb);
}

void hci_sock_dev_event(struct hci_dev *hdev, int event)
{
        BT_DBG("hdev %s event %d", hdev->name, event);

        if (atomic_read(&monitor_promisc)) {
                struct sk_buff *skb;

                /* Send event to monitor */
                skb = create_monitor_event(hdev, event);
                if (skb) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(skb);
                }
        }

        if (event <= HCI_DEV_DOWN) {
                struct hci_ev_si_device ev;

                /* Send event to sockets */
                ev.event  = event;
                ev.dev_id = hdev->id;
                hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
        }

        if (event == HCI_DEV_UNREG) {
                struct sock *sk;

                /* Wake up sockets using this dead device */
                read_lock(&hci_sk_list.lock);
                sk_for_each(sk, &hci_sk_list.head) {
                        if (hci_pi(sk)->hdev == hdev) {
                                sk->sk_err = EPIPE;
                                sk->sk_state_change(sk);
                        }
                }
                read_unlock(&hci_sk_list.lock);
        }
}

static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel)
{
        struct hci_mgmt_chan *c;

        list_for_each_entry(c, &mgmt_chan_list, list) {
                if (c->channel == channel)
                        return c;
        }

        return NULL;
}

static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel)
{
        struct hci_mgmt_chan *c;

        mutex_lock(&mgmt_chan_list_lock);
        c = __hci_mgmt_chan_find(channel);
        mutex_unlock(&mgmt_chan_list_lock);

        return c;
}

int hci_mgmt_chan_register(struct hci_mgmt_chan *c)
{
        if (c->channel < HCI_CHANNEL_CONTROL)
                return -EINVAL;

        mutex_lock(&mgmt_chan_list_lock);
        if (__hci_mgmt_chan_find(c->channel)) {
                mutex_unlock(&mgmt_chan_list_lock);
                return -EALREADY;
        }

        list_add_tail(&c->list, &mgmt_chan_list);

        mutex_unlock(&mgmt_chan_list_lock);

        return 0;
}
EXPORT_SYMBOL(hci_mgmt_chan_register);

void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c)
{
        mutex_lock(&mgmt_chan_list_lock);
        list_del(&c->list);
        mutex_unlock(&mgmt_chan_list_lock);
}
EXPORT_SYMBOL(hci_mgmt_chan_unregister);

static int hci_sock_release(struct socket *sock)
{
        struct sock *sk = sock->sk;
        struct hci_dev *hdev;
        struct sk_buff *skb;

        BT_DBG("sock %p sk %p", sock, sk);

        if (!sk)
                return 0;

        lock_sock(sk);

        switch (hci_pi(sk)->channel) {
        case HCI_CHANNEL_MONITOR:
                atomic_dec(&monitor_promisc);
                break;
        case HCI_CHANNEL_RAW:
        case HCI_CHANNEL_USER:
        case HCI_CHANNEL_CONTROL:
                /* Send event to monitor */
                skb = create_monitor_ctrl_close(sk);
                if (skb) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(skb);
                }

                hci_sock_free_cookie(sk);
                break;
        }

        bt_sock_unlink(&hci_sk_list, sk);

        hdev = hci_pi(sk)->hdev;
        if (hdev) {
                if (hci_pi(sk)->channel == HCI_CHANNEL_USER &&
                    !hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
                        /* When releasing a user channel exclusive access,
                         * call hci_dev_do_close directly instead of calling
                         * hci_dev_close to ensure the exclusive access will
                         * be released and the controller brought back down.
                         *
                         * The checking of HCI_AUTO_OFF is not needed in this
                         * case since it will have been cleared already when
                         * opening the user channel.
                         *
                         * Make sure to also check that we haven't already
                         * unregistered since all the cleanup will have already
                         * been complete and hdev will get released when we put
                         * below.
                         */
                        hci_dev_do_close(hdev);
                        hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
                        mgmt_index_added(hdev);
                }

                atomic_dec(&hdev->promisc);
                hci_dev_put(hdev);
        }

        sock_orphan(sk);
        release_sock(sk);
        sock_put(sk);
        return 0;
}

static int hci_sock_reject_list_add(struct hci_dev *hdev, void __user *arg)
{
        bdaddr_t bdaddr;
        int err;

        if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
                return -EFAULT;

        hci_dev_lock(hdev);

        err = hci_bdaddr_list_add(&hdev->reject_list, &bdaddr, BDADDR_BREDR);

        hci_dev_unlock(hdev);

        return err;
}

static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
{
        bdaddr_t bdaddr;
        int err;

        if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
                return -EFAULT;

        hci_dev_lock(hdev);

        err = hci_bdaddr_list_del(&hdev->reject_list, &bdaddr, BDADDR_BREDR);

        hci_dev_unlock(hdev);

        return err;
}

/* Ioctls that require bound socket */
static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
                                unsigned long arg)
{
        struct hci_dev *hdev = hci_hdev_from_sock(sk);

        if (IS_ERR(hdev))
                return PTR_ERR(hdev);

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                return -EBUSY;

        if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
                return -EOPNOTSUPP;

        if (hdev->dev_type != HCI_PRIMARY)
                return -EOPNOTSUPP;

        switch (cmd) {
        case HCISETRAW:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return -EOPNOTSUPP;

        case HCIGETCONNINFO:
                return hci_get_conn_info(hdev, (void __user *)arg);

        case HCIGETAUTHINFO:
                return hci_get_auth_info(hdev, (void __user *)arg);

        case HCIBLOCKADDR:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_sock_reject_list_add(hdev, (void __user *)arg);

        case HCIUNBLOCKADDR:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_sock_reject_list_del(hdev, (void __user *)arg);
        }

        return -ENOIOCTLCMD;
}

static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
                          unsigned long arg)
{
        void __user *argp = (void __user *)arg;
        struct sock *sk = sock->sk;
        int err;

        BT_DBG("cmd %x arg %lx", cmd, arg);

        /* Make sure the cmd is valid before doing anything */
        switch (cmd) {
        case HCIGETDEVLIST:
        case HCIGETDEVINFO:
        case HCIGETCONNLIST:
        case HCIDEVUP:
        case HCIDEVDOWN:
        case HCIDEVRESET:
        case HCIDEVRESTAT:
        case HCISETSCAN:
        case HCISETAUTH:
        case HCISETENCRYPT:
        case HCISETPTYPE:
        case HCISETLINKPOL:
        case HCISETLINKMODE:
        case HCISETACLMTU:
        case HCISETSCOMTU:
        case HCIINQUIRY:
        case HCISETRAW:
        case HCIGETCONNINFO:
        case HCIGETAUTHINFO:
        case HCIBLOCKADDR:
        case HCIUNBLOCKADDR:
                break;
        default:
                return -ENOIOCTLCMD;
        }

        lock_sock(sk);

        if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
                err = -EBADFD;
                goto done;
        }

        /* When calling an ioctl on an unbound raw socket, then ensure
         * that the monitor gets informed. Ensure that the resulting event
         * is only send once by checking if the cookie exists or not. The
         * socket cookie will be only ever generated once for the lifetime
         * of a given socket.
         */
        if (hci_sock_gen_cookie(sk)) {
                struct sk_buff *skb;

                /* Perform careful checks before setting the HCI_SOCK_TRUSTED
                 * flag. Make sure that not only the current task but also
                 * the socket opener has the required capability, since
                 * privileged programs can be tricked into making ioctl calls
                 * on HCI sockets, and the socket should not be marked as
                 * trusted simply because the ioctl caller is privileged.
                 */
                if (sk_capable(sk, CAP_NET_ADMIN))
                        hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);

                /* Send event to monitor */
                skb = create_monitor_ctrl_open(sk);
                if (skb) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(skb);
                }
        }

        release_sock(sk);

        switch (cmd) {
        case HCIGETDEVLIST:
                return hci_get_dev_list(argp);

        case HCIGETDEVINFO:
                return hci_get_dev_info(argp);

        case HCIGETCONNLIST:
                return hci_get_conn_list(argp);

        case HCIDEVUP:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_dev_open(arg);

        case HCIDEVDOWN:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_dev_close(arg);

        case HCIDEVRESET:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_dev_reset(arg);

        case HCIDEVRESTAT:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_dev_reset_stat(arg);

        case HCISETSCAN:
        case HCISETAUTH:
        case HCISETENCRYPT:
        case HCISETPTYPE:
        case HCISETLINKPOL:
        case HCISETLINKMODE:
        case HCISETACLMTU:
        case HCISETSCOMTU:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                return hci_dev_cmd(cmd, argp);

        case HCIINQUIRY:
                return hci_inquiry(argp);
        }

        lock_sock(sk);

        err = hci_sock_bound_ioctl(sk, cmd, arg);

done:
        release_sock(sk);
        return err;
}

#ifdef CONFIG_COMPAT
static int hci_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
                                 unsigned long arg)
{
        switch (cmd) {
        case HCIDEVUP:
        case HCIDEVDOWN:
        case HCIDEVRESET:
        case HCIDEVRESTAT:
                return hci_sock_ioctl(sock, cmd, arg);
        }

        return hci_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
}
#endif

static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
                         int addr_len)
{
        struct sockaddr_hci haddr;
        struct sock *sk = sock->sk;
        struct hci_dev *hdev = NULL;
        struct sk_buff *skb;
        int len, err = 0;

        BT_DBG("sock %p sk %p", sock, sk);

        if (!addr)
                return -EINVAL;

        memset(&haddr, 0, sizeof(haddr));
        len = min_t(unsigned int, sizeof(haddr), addr_len);
        memcpy(&haddr, addr, len);

        if (haddr.hci_family != AF_BLUETOOTH)
                return -EINVAL;

        lock_sock(sk);

        /* Allow detaching from dead device and attaching to alive device, if
         * the caller wants to re-bind (instead of close) this socket in
         * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
         */
        hdev = hci_pi(sk)->hdev;
        if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
                hci_pi(sk)->hdev = NULL;
                sk->sk_state = BT_OPEN;
                hci_dev_put(hdev);
        }
        hdev = NULL;

        if (sk->sk_state == BT_BOUND) {
                err = -EALREADY;
                goto done;
        }

        switch (haddr.hci_channel) {
        case HCI_CHANNEL_RAW:
                if (hci_pi(sk)->hdev) {
                        err = -EALREADY;
                        goto done;
                }

                if (haddr.hci_dev != HCI_DEV_NONE) {
                        hdev = hci_dev_get(haddr.hci_dev);
                        if (!hdev) {
                                err = -ENODEV;
                                goto done;
                        }

                        atomic_inc(&hdev->promisc);
                }

                hci_pi(sk)->channel = haddr.hci_channel;

                if (!hci_sock_gen_cookie(sk)) {
                        /* In the case when a cookie has already been assigned,
                         * then there has been already an ioctl issued against
                         * an unbound socket and with that triggered an open
                         * notification. Send a close notification first to
                         * allow the state transition to bounded.
                         */
                        skb = create_monitor_ctrl_close(sk);
                        if (skb) {
                                hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                                    HCI_SOCK_TRUSTED, NULL);
                                kfree_skb(skb);
                        }
                }

                if (capable(CAP_NET_ADMIN))
                        hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);

                hci_pi(sk)->hdev = hdev;

                /* Send event to monitor */
                skb = create_monitor_ctrl_open(sk);
                if (skb) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(skb);
                }
                break;

        case HCI_CHANNEL_USER:
                if (hci_pi(sk)->hdev) {
                        err = -EALREADY;
                        goto done;
                }

                if (haddr.hci_dev == HCI_DEV_NONE) {
                        err = -EINVAL;
                        goto done;
                }

                if (!capable(CAP_NET_ADMIN)) {
                        err = -EPERM;
                        goto done;
                }

                hdev = hci_dev_get(haddr.hci_dev);
                if (!hdev) {
                        err = -ENODEV;
                        goto done;
                }

                if (test_bit(HCI_INIT, &hdev->flags) ||
                    hci_dev_test_flag(hdev, HCI_SETUP) ||
                    hci_dev_test_flag(hdev, HCI_CONFIG) ||
                    (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) &&
                     test_bit(HCI_UP, &hdev->flags))) {
                        err = -EBUSY;
                        hci_dev_put(hdev);
                        goto done;
                }

                if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) {
                        err = -EUSERS;
                        hci_dev_put(hdev);
                        goto done;
                }

                mgmt_index_removed(hdev);

                err = hci_dev_open(hdev->id);
                if (err) {
                        if (err == -EALREADY) {
                                /* In case the transport is already up and
                                 * running, clear the error here.
                                 *
                                 * This can happen when opening a user
                                 * channel and HCI_AUTO_OFF grace period
                                 * is still active.
                                 */
                                err = 0;
                        } else {
                                hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
                                mgmt_index_added(hdev);
                                hci_dev_put(hdev);
                                goto done;
                        }
                }

                hci_pi(sk)->channel = haddr.hci_channel;

                if (!hci_sock_gen_cookie(sk)) {
                        /* In the case when a cookie has already been assigned,
                         * this socket will transition from a raw socket into
                         * a user channel socket. For a clean transition, send
                         * the close notification first.
                         */
                        skb = create_monitor_ctrl_close(sk);
                        if (skb) {
                                hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                                    HCI_SOCK_TRUSTED, NULL);
                                kfree_skb(skb);
                        }
                }

                /* The user channel is restricted to CAP_NET_ADMIN
                 * capabilities and with that implicitly trusted.
                 */
                hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);

                hci_pi(sk)->hdev = hdev;

                /* Send event to monitor */
                skb = create_monitor_ctrl_open(sk);
                if (skb) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(skb);
                }

                atomic_inc(&hdev->promisc);
                break;

        case HCI_CHANNEL_MONITOR:
                if (haddr.hci_dev != HCI_DEV_NONE) {
                        err = -EINVAL;
                        goto done;
                }

                if (!capable(CAP_NET_RAW)) {
                        err = -EPERM;
                        goto done;
                }

                hci_pi(sk)->channel = haddr.hci_channel;

                /* The monitor interface is restricted to CAP_NET_RAW
                 * capabilities and with that implicitly trusted.
                 */
                hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);

                send_monitor_note(sk, "Linux version %s (%s)",
                                  init_utsname()->release,
                                  init_utsname()->machine);
                send_monitor_note(sk, "Bluetooth subsystem version %u.%u",
                                  BT_SUBSYS_VERSION, BT_SUBSYS_REVISION);
                send_monitor_replay(sk);
                send_monitor_control_replay(sk);

                atomic_inc(&monitor_promisc);
                break;

        case HCI_CHANNEL_LOGGING:
                if (haddr.hci_dev != HCI_DEV_NONE) {
                        err = -EINVAL;
                        goto done;
                }

                if (!capable(CAP_NET_ADMIN)) {
                        err = -EPERM;
                        goto done;
                }

                hci_pi(sk)->channel = haddr.hci_channel;
                break;

        default:
                if (!hci_mgmt_chan_find(haddr.hci_channel)) {
                        err = -EINVAL;
                        goto done;
                }

                if (haddr.hci_dev != HCI_DEV_NONE) {
                        err = -EINVAL;
                        goto done;
                }

                /* Users with CAP_NET_ADMIN capabilities are allowed
                 * access to all management commands and events. For
                 * untrusted users the interface is restricted and
                 * also only untrusted events are sent.
                 */
                if (capable(CAP_NET_ADMIN))
                        hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);

                hci_pi(sk)->channel = haddr.hci_channel;

                /* At the moment the index and unconfigured index events
                 * are enabled unconditionally. Setting them on each
                 * socket when binding keeps this functionality. They
                 * however might be cleared later and then sending of these
                 * events will be disabled, but that is then intentional.
                 *
                 * This also enables generic events that are safe to be
                 * received by untrusted users. Example for such events
                 * are changes to settings, class of device, name etc.
                 */
                if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) {
                        if (!hci_sock_gen_cookie(sk)) {
                                /* In the case when a cookie has already been
                                 * assigned, this socket will transition from
                                 * a raw socket into a control socket. To
                                 * allow for a clean transition, send the
                                 * close notification first.
                                 */
                                skb = create_monitor_ctrl_close(sk);
                                if (skb) {
                                        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                                            HCI_SOCK_TRUSTED, NULL);
                                        kfree_skb(skb);
                                }
                        }

                        /* Send event to monitor */
                        skb = create_monitor_ctrl_open(sk);
                        if (skb) {
                                hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
                                                    HCI_SOCK_TRUSTED, NULL);
                                kfree_skb(skb);
                        }

                        hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS);
                        hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
                        hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS);
                        hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS);
                        hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS);
                        hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS);
                }
                break;
        }

        /* Default MTU to HCI_MAX_FRAME_SIZE if not set */
        if (!hci_pi(sk)->mtu)
                hci_pi(sk)->mtu = HCI_MAX_FRAME_SIZE;

        sk->sk_state = BT_BOUND;

done:
        release_sock(sk);
        return err;
}

static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
                            int peer)
{
        struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
        struct sock *sk = sock->sk;
        struct hci_dev *hdev;
        int err = 0;

        BT_DBG("sock %p sk %p", sock, sk);

        if (peer)
                return -EOPNOTSUPP;

        lock_sock(sk);

        hdev = hci_hdev_from_sock(sk);
        if (IS_ERR(hdev)) {
                err = PTR_ERR(hdev);
                goto done;
        }

        haddr->hci_family = AF_BLUETOOTH;
        haddr->hci_dev    = hdev->id;
        haddr->hci_channel= hci_pi(sk)->channel;
        err = sizeof(*haddr);

done:
        release_sock(sk);
        return err;
}

static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
                          struct sk_buff *skb)
{
        __u8 mask = hci_pi(sk)->cmsg_mask;

        if (mask & HCI_CMSG_DIR) {
                int incoming = bt_cb(skb)->incoming;
                put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming),
                         &incoming);
        }

        if (mask & HCI_CMSG_TSTAMP) {
#ifdef CONFIG_COMPAT
                struct old_timeval32 ctv;
#endif
                struct __kernel_old_timeval tv;
                void *data;
                int len;

                skb_get_timestamp(skb, &tv);

                data = &tv;
                len = sizeof(tv);
#ifdef CONFIG_COMPAT
                if (!COMPAT_USE_64BIT_TIME &&
                    (msg->msg_flags & MSG_CMSG_COMPAT)) {
                        ctv.tv_sec = tv.tv_sec;
                        ctv.tv_usec = tv.tv_usec;
                        data = &ctv;
                        len = sizeof(ctv);
                }
#endif

                put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, len, data);
        }
}

static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                            size_t len, int flags)
{
        struct scm_cookie scm;
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
        int copied, err;
        unsigned int skblen;

        BT_DBG("sock %p, sk %p", sock, sk);

        if (flags & MSG_OOB)
                return -EOPNOTSUPP;

        if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING)
                return -EOPNOTSUPP;

        if (sk->sk_state == BT_CLOSED)
                return 0;

        skb = skb_recv_datagram(sk, flags, &err);
        if (!skb)
                return err;

        skblen = skb->len;
        copied = skb->len;
        if (len < copied) {
                msg->msg_flags |= MSG_TRUNC;
                copied = len;
        }

        skb_reset_transport_header(skb);
        err = skb_copy_datagram_msg(skb, 0, msg, copied);

        switch (hci_pi(sk)->channel) {
        case HCI_CHANNEL_RAW:
                hci_sock_cmsg(sk, msg, skb);
                break;
        case HCI_CHANNEL_USER:
        case HCI_CHANNEL_MONITOR:
                sock_recv_timestamp(msg, sk, skb);
                break;
        default:
                if (hci_mgmt_chan_find(hci_pi(sk)->channel))
                        sock_recv_timestamp(msg, sk, skb);
                break;
        }

        memset(&scm, 0, sizeof(scm));
        scm.creds = bt_cb(skb)->creds;

        skb_free_datagram(sk, skb);

        if (flags & MSG_TRUNC)
                copied = skblen;

        scm_recv(sock, msg, &scm, flags);

        return err ? : copied;
}

static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
                        struct sk_buff *skb)
{
        u8 *cp;
        struct mgmt_hdr *hdr;
        u16 opcode, index, len;
        struct hci_dev *hdev = NULL;
        const struct hci_mgmt_handler *handler;
        bool var_len, no_hdev;
        int err;

        BT_DBG("got %d bytes", skb->len);

        if (skb->len < sizeof(*hdr))
                return -EINVAL;

        hdr = (void *)skb->data;
        opcode = __le16_to_cpu(hdr->opcode);
        index = __le16_to_cpu(hdr->index);
        len = __le16_to_cpu(hdr->len);

        if (len != skb->len - sizeof(*hdr)) {
                err = -EINVAL;
                goto done;
        }

        if (chan->channel == HCI_CHANNEL_CONTROL) {
                struct sk_buff *cmd;

                /* Send event to monitor */
                cmd = create_monitor_ctrl_command(sk, index, opcode, len,
                                                  skb->data + sizeof(*hdr));
                if (cmd) {
                        hci_send_to_channel(HCI_CHANNEL_MONITOR, cmd,
                                            HCI_SOCK_TRUSTED, NULL);
                        kfree_skb(cmd);
                }
        }

        if (opcode >= chan->handler_count ||
            chan->handlers[opcode].func == NULL) {
                BT_DBG("Unknown op %u", opcode);
                err = mgmt_cmd_status(sk, index, opcode,
                                      MGMT_STATUS_UNKNOWN_COMMAND);
                goto done;
        }

        handler = &chan->handlers[opcode];

        if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) &&
            !(handler->flags & HCI_MGMT_UNTRUSTED)) {
                err = mgmt_cmd_status(sk, index, opcode,
                                      MGMT_STATUS_PERMISSION_DENIED);
                goto done;
        }

        if (index != MGMT_INDEX_NONE) {
                hdev = hci_dev_get(index);
                if (!hdev) {
                        err = mgmt_cmd_status(sk, index, opcode,
                                              MGMT_STATUS_INVALID_INDEX);
                        goto done;
                }

                if (hci_dev_test_flag(hdev, HCI_SETUP) ||
                    hci_dev_test_flag(hdev, HCI_CONFIG) ||
                    hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
                        err = mgmt_cmd_status(sk, index, opcode,
                                              MGMT_STATUS_INVALID_INDEX);
                        goto done;
                }

                if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
                    !(handler->flags & HCI_MGMT_UNCONFIGURED)) {
                        err = mgmt_cmd_status(sk, index, opcode,
                                              MGMT_STATUS_INVALID_INDEX);
                        goto done;
                }
        }

        if (!(handler->flags & HCI_MGMT_HDEV_OPTIONAL)) {
                no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
                if (no_hdev != !hdev) {
                        err = mgmt_cmd_status(sk, index, opcode,
                                              MGMT_STATUS_INVALID_INDEX);
                        goto done;
                }
        }

        var_len = (handler->flags & HCI_MGMT_VAR_LEN);
        if ((var_len && len < handler->data_len) ||
            (!var_len && len != handler->data_len)) {
                err = mgmt_cmd_status(sk, index, opcode,
                                      MGMT_STATUS_INVALID_PARAMS);
                goto done;
        }

        if (hdev && chan->hdev_init)
                chan->hdev_init(sk, hdev);

        cp = skb->data + sizeof(*hdr);

        err = handler->func(sk, hdev, cp, len);
        if (err < 0)
                goto done;

        err = skb->len;

done:
        if (hdev)
                hci_dev_put(hdev);

        return err;
}

static int hci_logging_frame(struct sock *sk, struct sk_buff *skb,
                             unsigned int flags)
{
        struct hci_mon_hdr *hdr;
        struct hci_dev *hdev;
        u16 index;
        int err;

        /* The logging frame consists at minimum of the standard header,
         * the priority byte, the ident length byte and at least one string
         * terminator NUL byte. Anything shorter are invalid packets.
         */
        if (skb->len < sizeof(*hdr) + 3)
                return -EINVAL;

        hdr = (void *)skb->data;

        if (__le16_to_cpu(hdr->len) != skb->len - sizeof(*hdr))
                return -EINVAL;

        if (__le16_to_cpu(hdr->opcode) == 0x0000) {
                __u8 priority = skb->data[sizeof(*hdr)];
                __u8 ident_len = skb->data[sizeof(*hdr) + 1];

                /* Only the priorities 0-7 are valid and with that any other
                 * value results in an invalid packet.
                 *
                 * The priority byte is followed by an ident length byte and
                 * the NUL terminated ident string. Check that the ident
                 * length is not overflowing the packet and also that the
                 * ident string itself is NUL terminated. In case the ident
                 * length is zero, the length value actually doubles as NUL
                 * terminator identifier.
                 *
                 * The message follows the ident string (if present) and
                 * must be NUL terminated. Otherwise it is not a valid packet.
                 */
                if (priority > 7 || skb->data[skb->len - 1] != 0x00 ||
                    ident_len > skb->len - sizeof(*hdr) - 3 ||
                    skb->data[sizeof(*hdr) + ident_len + 1] != 0x00)
                        return -EINVAL;
        } else {
                return -EINVAL;
        }

        index = __le16_to_cpu(hdr->index);

        if (index != MGMT_INDEX_NONE) {
                hdev = hci_dev_get(index);
                if (!hdev)
                        return -ENODEV;
        } else {
                hdev = NULL;
        }

        hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING);

        hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL);
        err = skb->len;

        if (hdev)
                hci_dev_put(hdev);

        return err;
}

static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                            size_t len)
{
        struct sock *sk = sock->sk;
        struct hci_mgmt_chan *chan;
        struct hci_dev *hdev;
        struct sk_buff *skb;
        int err;
        const unsigned int flags = msg->msg_flags;

        BT_DBG("sock %p sk %p", sock, sk);

        if (flags & MSG_OOB)
                return -EOPNOTSUPP;

        if (flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE | MSG_CMSG_COMPAT))
                return -EINVAL;

        if (len < 4 || len > hci_pi(sk)->mtu)
                return -EINVAL;

        skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
        if (IS_ERR(skb))
                return PTR_ERR(skb);

        lock_sock(sk);

        switch (hci_pi(sk)->channel) {
        case HCI_CHANNEL_RAW:
        case HCI_CHANNEL_USER:
                break;
        case HCI_CHANNEL_MONITOR:
                err = -EOPNOTSUPP;
                goto drop;
        case HCI_CHANNEL_LOGGING:
                err = hci_logging_frame(sk, skb, flags);
                goto drop;
        default:
                mutex_lock(&mgmt_chan_list_lock);
                chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
                if (chan)
                        err = hci_mgmt_cmd(chan, sk, skb);
                else
                        err = -EINVAL;

                mutex_unlock(&mgmt_chan_list_lock);
                goto drop;
        }

        hdev = hci_hdev_from_sock(sk);
        if (IS_ERR(hdev)) {
                err = PTR_ERR(hdev);
                goto drop;
        }

        if (!test_bit(HCI_UP, &hdev->flags)) {
                err = -ENETDOWN;
                goto drop;
        }

        hci_skb_pkt_type(skb) = skb->data[0];
        skb_pull(skb, 1);

        if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
                /* No permission check is needed for user channel
                 * since that gets enforced when binding the socket.
                 *
                 * However check that the packet type is valid.
                 */
                if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
                    hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
                    hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
                    hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) {
                        err = -EINVAL;
                        goto drop;
                }

                skb_queue_tail(&hdev->raw_q, skb);
                queue_work(hdev->workqueue, &hdev->tx_work);
        } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) {
                u16 opcode = get_unaligned_le16(skb->data);
                u16 ogf = hci_opcode_ogf(opcode);
                u16 ocf = hci_opcode_ocf(opcode);

                if (((ogf > HCI_SFLT_MAX_OGF) ||
                     !hci_test_bit(ocf & HCI_FLT_OCF_BITS,
                                   &hci_sec_filter.ocf_mask[ogf])) &&
                    !capable(CAP_NET_RAW)) {
                        err = -EPERM;
                        goto drop;
                }

                /* Since the opcode has already been extracted here, store
                 * a copy of the value for later use by the drivers.
                 */
                hci_skb_opcode(skb) = opcode;

                if (ogf == 0x3f) {
                        skb_queue_tail(&hdev->raw_q, skb);
                        queue_work(hdev->workqueue, &hdev->tx_work);
                } else {
                        /* Stand-alone HCI commands must be flagged as
                         * single-command requests.
                         */
                        bt_cb(skb)->hci.req_flags |= HCI_REQ_START;

                        skb_queue_tail(&hdev->cmd_q, skb);
                        queue_work(hdev->workqueue, &hdev->cmd_work);
                }
        } else {
                if (!capable(CAP_NET_RAW)) {
                        err = -EPERM;
                        goto drop;
                }

                if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
                    hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
                    hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) {
                        err = -EINVAL;
                        goto drop;
                }

                skb_queue_tail(&hdev->raw_q, skb);
                queue_work(hdev->workqueue, &hdev->tx_work);
        }

        err = len;

done:
        release_sock(sk);
        return err;

drop:
        kfree_skb(skb);
        goto done;
}

static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
                                   sockptr_t optval, unsigned int len)
{
        struct hci_ufilter uf = { .opcode = 0 };
        struct sock *sk = sock->sk;
        int err = 0, opt = 0;

        BT_DBG("sk %p, opt %d", sk, optname);

        lock_sock(sk);

        if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
                err = -EBADFD;
                goto done;
        }

        switch (optname) {
        case HCI_DATA_DIR:
                err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
                if (err)
                        break;

                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
                else
                        hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_DIR;
                break;

        case HCI_TIME_STAMP:
                err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
                if (err)
                        break;

                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
                else
                        hci_pi(sk)->cmsg_mask &= ~HCI_CMSG_TSTAMP;
                break;

        case HCI_FILTER:
                {
                        struct hci_filter *f = &hci_pi(sk)->filter;

                        uf.type_mask = f->type_mask;
                        uf.opcode    = f->opcode;
                        uf.event_mask[0] = *((u32 *) f->event_mask + 0);
                        uf.event_mask[1] = *((u32 *) f->event_mask + 1);
                }

                err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
                if (err)
                        break;

                if (!capable(CAP_NET_RAW)) {
                        uf.type_mask &= hci_sec_filter.type_mask;
                        uf.event_mask[0] &= *((u32 *) hci_sec_filter.event_mask + 0);
                        uf.event_mask[1] &= *((u32 *) hci_sec_filter.event_mask + 1);
                }

                {
                        struct hci_filter *f = &hci_pi(sk)->filter;

                        f->type_mask = uf.type_mask;
                        f->opcode    = uf.opcode;
                        *((u32 *) f->event_mask + 0) = uf.event_mask[0];
                        *((u32 *) f->event_mask + 1) = uf.event_mask[1];
                }
                break;

        default:
                err = -ENOPROTOOPT;
                break;
        }

done:
        release_sock(sk);
        return err;
}

static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
                               sockptr_t optval, unsigned int len)
{
        struct sock *sk = sock->sk;
        int err = 0;
        u16 opt;

        BT_DBG("sk %p, opt %d", sk, optname);

        if (level == SOL_HCI)
                return hci_sock_setsockopt_old(sock, level, optname, optval,
                                               len);

        if (level != SOL_BLUETOOTH)
                return -ENOPROTOOPT;

        lock_sock(sk);

        switch (optname) {
        case BT_SNDMTU:
        case BT_RCVMTU:
                switch (hci_pi(sk)->channel) {
                /* Don't allow changing MTU for channels that are meant for HCI
                 * traffic only.
                 */
                case HCI_CHANNEL_RAW:
                case HCI_CHANNEL_USER:
                        err = -ENOPROTOOPT;
                        goto done;
                }

                err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
                if (err)
                        break;

                hci_pi(sk)->mtu = opt;
                break;

        default:
                err = -ENOPROTOOPT;
                break;
        }

done:
        release_sock(sk);
        return err;
}

static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname,
                                   char __user *optval, int __user *optlen)
{
        struct hci_ufilter uf;
        struct sock *sk = sock->sk;
        int len, opt, err = 0;

        BT_DBG("sk %p, opt %d", sk, optname);

        if (get_user(len, optlen))
                return -EFAULT;

        lock_sock(sk);

        if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
                err = -EBADFD;
                goto done;
        }

        switch (optname) {
        case HCI_DATA_DIR:
                if (hci_pi(sk)->cmsg_mask & HCI_CMSG_DIR)
                        opt = 1;
                else
                        opt = 0;

                if (put_user(opt, optval))
                        err = -EFAULT;
                break;

        case HCI_TIME_STAMP:
                if (hci_pi(sk)->cmsg_mask & HCI_CMSG_TSTAMP)
                        opt = 1;
                else
                        opt = 0;

                if (put_user(opt, optval))
                        err = -EFAULT;
                break;

        case HCI_FILTER:
                {
                        struct hci_filter *f = &hci_pi(sk)->filter;

                        memset(&uf, 0, sizeof(uf));
                        uf.type_mask = f->type_mask;
                        uf.opcode    = f->opcode;
                        uf.event_mask[0] = *((u32 *) f->event_mask + 0);
                        uf.event_mask[1] = *((u32 *) f->event_mask + 1);
                }

                len = min_t(unsigned int, len, sizeof(uf));
                if (copy_to_user(optval, &uf, len))
                        err = -EFAULT;
                break;

        default:
                err = -ENOPROTOOPT;
                break;
        }

done:
        release_sock(sk);
        return err;
}

static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
                               char __user *optval, int __user *optlen)
{
        struct sock *sk = sock->sk;
        int err = 0;

        BT_DBG("sk %p, opt %d", sk, optname);

        if (level == SOL_HCI)
                return hci_sock_getsockopt_old(sock, level, optname, optval,
                                               optlen);

        if (level != SOL_BLUETOOTH)
                return -ENOPROTOOPT;

        lock_sock(sk);

        switch (optname) {
        case BT_SNDMTU:
        case BT_RCVMTU:
                if (put_user(hci_pi(sk)->mtu, (u16 __user *)optval))
                        err = -EFAULT;
                break;

        default:
                err = -ENOPROTOOPT;
                break;
        }

        release_sock(sk);
        return err;
}

static void hci_sock_destruct(struct sock *sk)
{
        mgmt_cleanup(sk);
        skb_queue_purge(&sk->sk_receive_queue);
        skb_queue_purge(&sk->sk_write_queue);
}

static const struct proto_ops hci_sock_ops = {
        .family                = PF_BLUETOOTH,
        .owner                = THIS_MODULE,
        .release        = hci_sock_release,
        .bind                = hci_sock_bind,
        .getname        = hci_sock_getname,
        .sendmsg        = hci_sock_sendmsg,
        .recvmsg        = hci_sock_recvmsg,
        .ioctl                = hci_sock_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl        = hci_sock_compat_ioctl,
#endif
        .poll                = datagram_poll,
        .listen                = sock_no_listen,
        .shutdown        = sock_no_shutdown,
        .setsockopt        = hci_sock_setsockopt,
        .getsockopt        = hci_sock_getsockopt,
        .connect        = sock_no_connect,
        .socketpair        = sock_no_socketpair,
        .accept                = sock_no_accept,
        .mmap                = sock_no_mmap
};

static struct proto hci_sk_proto = {
        .name                = "HCI",
        .owner                = THIS_MODULE,
        .obj_size        = sizeof(struct hci_pinfo)
};

static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
                           int kern)
{
        struct sock *sk;

        BT_DBG("sock %p", sock);

        if (sock->type != SOCK_RAW)
                return -ESOCKTNOSUPPORT;

        sock->ops = &hci_sock_ops;

        sk = bt_sock_alloc(net, sock, &hci_sk_proto, protocol, GFP_ATOMIC,
                           kern);
        if (!sk)
                return -ENOMEM;

        sock->state = SS_UNCONNECTED;
        sk->sk_destruct = hci_sock_destruct;

        bt_sock_link(&hci_sk_list, sk);
        return 0;
}

static const struct net_proto_family hci_sock_family_ops = {
        .family        = PF_BLUETOOTH,
        .owner        = THIS_MODULE,
        .create        = hci_sock_create,
};

int __init hci_sock_init(void)
{
        int err;

        BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr));

        err = proto_register(&hci_sk_proto, 0);
        if (err < 0)
                return err;

        err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops);
        if (err < 0) {
                BT_ERR("HCI socket registration failed");
                goto error;
        }

        err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL);
        if (err < 0) {
                BT_ERR("Failed to create HCI proc file");
                bt_sock_unregister(BTPROTO_HCI);
                goto error;
        }

        BT_INFO("HCI socket layer initialized");

        return 0;

error:
        proto_unregister(&hci_sk_proto);
        return err;
}

void hci_sock_cleanup(void)
{
        bt_procfs_cleanup(&init_net, "hci");
        bt_sock_unregister(BTPROTO_HCI);
        proto_unregister(&hci_sk_proto);
}












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 





    9 






    9 
    9 





    9 


    9 
    9 

    9 
































































































   11 







   11 
   11 



   11 

   11 


    9 











































































































































































































































































































































































































































































































































































    8 
    8 





    8 


    8 
    8 

    8 




















    9 
    9 

































































    9 










    9 
    9 




    9 
    9 

    8 


    8 




















































































































































































    9 


























































   11 




   11 


































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net-sysfs.c - network device class and attributes
 *
 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
 */

#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/sched/isolation.h>
#include <linux/nsproxy.h>
#include <net/sock.h>
#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <linux/jiffies.h>
#include <linux/pm_runtime.h>
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/cpu.h>
#include <net/netdev_rx_queue.h>
#include <net/rps.h>

#include "dev.h"
#include "net-sysfs.h"

#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";

/* Caller holds RTNL or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
        return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}

/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
                           struct device_attribute *attr, char *buf,
                           ssize_t (*format)(const struct net_device *, char *))
{
        struct net_device *ndev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        rcu_read_lock();
        if (dev_isalive(ndev))
                ret = (*format)(ndev, buf);
        rcu_read_unlock();

        return ret;
}

/* generate a show function for simple field */
#define NETDEVICE_SHOW(field, format_string)                                \
static ssize_t format_##field(const struct net_device *dev, char *buf)        \
{                                                                        \
        return sysfs_emit(buf, format_string, READ_ONCE(dev->field));                \
}                                                                        \
static ssize_t field##_show(struct device *dev,                                \
                            struct device_attribute *attr, char *buf)        \
{                                                                        \
        return netdev_show(dev, attr, buf, format_##field);                \
}                                                                        \

#define NETDEVICE_SHOW_RO(field, format_string)                                \
NETDEVICE_SHOW(field, format_string);                                        \
static DEVICE_ATTR_RO(field)

#define NETDEVICE_SHOW_RW(field, format_string)                                \
NETDEVICE_SHOW(field, format_string);                                        \
static DEVICE_ATTR_RW(field)

/* use same locking and permission rules as SIF* ioctl's */
static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t len,
                            int (*set)(struct net_device *, unsigned long))
{
        struct net_device *netdev = to_net_dev(dev);
        struct net *net = dev_net(netdev);
        unsigned long new;
        int ret;

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        ret = kstrtoul(buf, 0, &new);
        if (ret)
                goto err;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev)) {
                ret = (*set)(netdev, new);
                if (ret == 0)
                        ret = len;
        }
        rtnl_unlock();
 err:
        return ret;
}

NETDEVICE_SHOW_RO(dev_id, fmt_hex);
NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
NETDEVICE_SHOW_RO(ifindex, fmt_dec);
NETDEVICE_SHOW_RO(type, fmt_dec);
NETDEVICE_SHOW_RO(link_mode, fmt_dec);

static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct net_device *ndev = to_net_dev(dev);

        return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
}
static DEVICE_ATTR_RO(iflink);

static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
        return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
}

static ssize_t name_assign_type_show(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct net_device *ndev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
                ret = netdev_show(dev, attr, buf, format_name_assign_type);

        return ret;
}
static DEVICE_ATTR_RO(name_assign_type);

/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct net_device *ndev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        down_read(&dev_addr_sem);

        rcu_read_lock();
        if (dev_isalive(ndev))
                ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
        rcu_read_unlock();

        up_read(&dev_addr_sem);
        return ret;
}
static DEVICE_ATTR_RO(address);

static ssize_t broadcast_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        struct net_device *ndev = to_net_dev(dev);
        int ret = -EINVAL;

        rcu_read_lock();
        if (dev_isalive(ndev))
                ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
        rcu_read_unlock();
        return ret;
}
static DEVICE_ATTR_RO(broadcast);

static int change_carrier(struct net_device *dev, unsigned long new_carrier)
{
        if (!netif_running(dev))
                return -EINVAL;
        return dev_change_carrier(dev, (bool)new_carrier);
}

static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t len)
{
        struct net_device *netdev = to_net_dev(dev);

        /* The check is also done in change_carrier; this helps returning early
         * without hitting the trylock/restart in netdev_store.
         */
        if (!netdev->netdev_ops->ndo_change_carrier)
                return -EOPNOTSUPP;

        return netdev_store(dev, attr, buf, len, change_carrier);
}

static ssize_t carrier_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        int ret = -EINVAL;

        if (!rtnl_trylock())
                return restart_syscall();

        if (netif_running(netdev)) {
                /* Synchronize carrier state with link watch,
                 * see also rtnl_getlink().
                 */
                linkwatch_sync_dev(netdev);

                ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
        }
        rtnl_unlock();

        return ret;
}
static DEVICE_ATTR_RW(carrier);

static ssize_t speed_show(struct device *dev,
                          struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        int ret = -EINVAL;

        /* The check is also done in __ethtool_get_link_ksettings; this helps
         * returning early without hitting the trylock/restart below.
         */
        if (!netdev->ethtool_ops->get_link_ksettings)
                return ret;

        if (!rtnl_trylock())
                return restart_syscall();

        if (netif_running(netdev) && netif_device_present(netdev)) {
                struct ethtool_link_ksettings cmd;

                if (!__ethtool_get_link_ksettings(netdev, &cmd))
                        ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
        }
        rtnl_unlock();
        return ret;
}
static DEVICE_ATTR_RO(speed);

static ssize_t duplex_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        int ret = -EINVAL;

        /* The check is also done in __ethtool_get_link_ksettings; this helps
         * returning early without hitting the trylock/restart below.
         */
        if (!netdev->ethtool_ops->get_link_ksettings)
                return ret;

        if (!rtnl_trylock())
                return restart_syscall();

        if (netif_running(netdev)) {
                struct ethtool_link_ksettings cmd;

                if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
                        const char *duplex;

                        switch (cmd.base.duplex) {
                        case DUPLEX_HALF:
                                duplex = "half";
                                break;
                        case DUPLEX_FULL:
                                duplex = "full";
                                break;
                        default:
                                duplex = "unknown";
                                break;
                        }
                        ret = sysfs_emit(buf, "%s\n", duplex);
                }
        }
        rtnl_unlock();
        return ret;
}
static DEVICE_ATTR_RO(duplex);

static ssize_t testing_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);

        if (netif_running(netdev))
                return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));

        return -EINVAL;
}
static DEVICE_ATTR_RO(testing);

static ssize_t dormant_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);

        if (netif_running(netdev))
                return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));

        return -EINVAL;
}
static DEVICE_ATTR_RO(dormant);

static const char *const operstates[] = {
        "unknown",
        "notpresent", /* currently unused */
        "down",
        "lowerlayerdown",
        "testing",
        "dormant",
        "up"
};

static ssize_t operstate_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        const struct net_device *netdev = to_net_dev(dev);
        unsigned char operstate;

        operstate = READ_ONCE(netdev->operstate);
        if (!netif_running(netdev))
                operstate = IF_OPER_DOWN;

        if (operstate >= ARRAY_SIZE(operstates))
                return -EINVAL; /* should not happen */

        return sysfs_emit(buf, "%s\n", operstates[operstate]);
}
static DEVICE_ATTR_RO(operstate);

static ssize_t carrier_changes_show(struct device *dev,
                                    struct device_attribute *attr,
                                    char *buf)
{
        struct net_device *netdev = to_net_dev(dev);

        return sysfs_emit(buf, fmt_dec,
                          atomic_read(&netdev->carrier_up_count) +
                          atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_changes);

static ssize_t carrier_up_count_show(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct net_device *netdev = to_net_dev(dev);

        return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
}
static DEVICE_ATTR_RO(carrier_up_count);

static ssize_t carrier_down_count_show(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
{
        struct net_device *netdev = to_net_dev(dev);

        return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_down_count);

/* read-write attributes */

static int change_mtu(struct net_device *dev, unsigned long new_mtu)
{
        return dev_set_mtu(dev, (int)new_mtu);
}

static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t len)
{
        return netdev_store(dev, attr, buf, len, change_mtu);
}
NETDEVICE_SHOW_RW(mtu, fmt_dec);

static int change_flags(struct net_device *dev, unsigned long new_flags)
{
        return dev_change_flags(dev, (unsigned int)new_flags, NULL);
}

static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t len)
{
        return netdev_store(dev, attr, buf, len, change_flags);
}
NETDEVICE_SHOW_RW(flags, fmt_hex);

static ssize_t tx_queue_len_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t len)
{
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
}
NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);

static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
        WRITE_ONCE(dev->gro_flush_timeout, val);
        return 0;
}

static ssize_t gro_flush_timeout_store(struct device *dev,
                                       struct device_attribute *attr,
                                       const char *buf, size_t len)
{
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
}
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);

static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
        WRITE_ONCE(dev->napi_defer_hard_irqs, val);
        return 0;
}

static ssize_t napi_defer_hard_irqs_store(struct device *dev,
                                          struct device_attribute *attr,
                                          const char *buf, size_t len)
{
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
}
NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);

static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t len)
{
        struct net_device *netdev = to_net_dev(dev);
        struct net *net = dev_net(netdev);
        size_t count = len;
        ssize_t ret = 0;

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        /* ignore trailing newline */
        if (len >  0 && buf[len - 1] == '\n')
                --count;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev)) {
                ret = dev_set_alias(netdev, buf, count);
                if (ret < 0)
                        goto err;
                ret = len;
                netdev_state_change(netdev);
        }
err:
        rtnl_unlock();

        return ret;
}

static ssize_t ifalias_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
{
        const struct net_device *netdev = to_net_dev(dev);
        char tmp[IFALIASZ];
        ssize_t ret = 0;

        ret = dev_get_alias(netdev, tmp, sizeof(tmp));
        if (ret > 0)
                ret = sysfs_emit(buf, "%s\n", tmp);
        return ret;
}
static DEVICE_ATTR_RW(ifalias);

static int change_group(struct net_device *dev, unsigned long new_group)
{
        dev_set_group(dev, (int)new_group);
        return 0;
}

static ssize_t group_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t len)
{
        return netdev_store(dev, attr, buf, len, change_group);
}
NETDEVICE_SHOW(group, fmt_dec);
static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);

static int change_proto_down(struct net_device *dev, unsigned long proto_down)
{
        return dev_change_proto_down(dev, (bool)proto_down);
}

static ssize_t proto_down_store(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t len)
{
        return netdev_store(dev, attr, buf, len, change_proto_down);
}
NETDEVICE_SHOW_RW(proto_down, fmt_dec);

static ssize_t phys_port_id_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        /* The check is also done in dev_get_phys_port_id; this helps returning
         * early without hitting the trylock/restart below.
         */
        if (!netdev->netdev_ops->ndo_get_phys_port_id)
                return -EOPNOTSUPP;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev)) {
                struct netdev_phys_item_id ppid;

                ret = dev_get_phys_port_id(netdev, &ppid);
                if (!ret)
                        ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
        }
        rtnl_unlock();

        return ret;
}
static DEVICE_ATTR_RO(phys_port_id);

static ssize_t phys_port_name_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        /* The checks are also done in dev_get_phys_port_name; this helps
         * returning early without hitting the trylock/restart below.
         */
        if (!netdev->netdev_ops->ndo_get_phys_port_name &&
            !netdev->devlink_port)
                return -EOPNOTSUPP;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev)) {
                char name[IFNAMSIZ];

                ret = dev_get_phys_port_name(netdev, name, sizeof(name));
                if (!ret)
                        ret = sysfs_emit(buf, "%s\n", name);
        }
        rtnl_unlock();

        return ret;
}
static DEVICE_ATTR_RO(phys_port_name);

static ssize_t phys_switch_id_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        /* The checks are also done in dev_get_phys_port_name; this helps
         * returning early without hitting the trylock/restart below. This works
         * because recurse is false when calling dev_get_port_parent_id.
         */
        if (!netdev->netdev_ops->ndo_get_port_parent_id &&
            !netdev->devlink_port)
                return -EOPNOTSUPP;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev)) {
                struct netdev_phys_item_id ppid = { };

                ret = dev_get_port_parent_id(netdev, &ppid, false);
                if (!ret)
                        ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
        }
        rtnl_unlock();

        return ret;
}
static DEVICE_ATTR_RO(phys_switch_id);

static ssize_t threaded_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
{
        struct net_device *netdev = to_net_dev(dev);
        ssize_t ret = -EINVAL;

        if (!rtnl_trylock())
                return restart_syscall();

        if (dev_isalive(netdev))
                ret = sysfs_emit(buf, fmt_dec, netdev->threaded);

        rtnl_unlock();
        return ret;
}

static int modify_napi_threaded(struct net_device *dev, unsigned long val)
{
        int ret;

        if (list_empty(&dev->napi_list))
                return -EOPNOTSUPP;

        if (val != 0 && val != 1)
                return -EOPNOTSUPP;

        ret = dev_set_threaded(dev, val);

        return ret;
}

static ssize_t threaded_store(struct device *dev,
                              struct device_attribute *attr,
                              const char *buf, size_t len)
{
        return netdev_store(dev, attr, buf, len, modify_napi_threaded);
}
static DEVICE_ATTR_RW(threaded);

static struct attribute *net_class_attrs[] __ro_after_init = {
        &dev_attr_netdev_group.attr,
        &dev_attr_type.attr,
        &dev_attr_dev_id.attr,
        &dev_attr_dev_port.attr,
        &dev_attr_iflink.attr,
        &dev_attr_ifindex.attr,
        &dev_attr_name_assign_type.attr,
        &dev_attr_addr_assign_type.attr,
        &dev_attr_addr_len.attr,
        &dev_attr_link_mode.attr,
        &dev_attr_address.attr,
        &dev_attr_broadcast.attr,
        &dev_attr_speed.attr,
        &dev_attr_duplex.attr,
        &dev_attr_dormant.attr,
        &dev_attr_testing.attr,
        &dev_attr_operstate.attr,
        &dev_attr_carrier_changes.attr,
        &dev_attr_ifalias.attr,
        &dev_attr_carrier.attr,
        &dev_attr_mtu.attr,
        &dev_attr_flags.attr,
        &dev_attr_tx_queue_len.attr,
        &dev_attr_gro_flush_timeout.attr,
        &dev_attr_napi_defer_hard_irqs.attr,
        &dev_attr_phys_port_id.attr,
        &dev_attr_phys_port_name.attr,
        &dev_attr_phys_switch_id.attr,
        &dev_attr_proto_down.attr,
        &dev_attr_carrier_up_count.attr,
        &dev_attr_carrier_down_count.attr,
        &dev_attr_threaded.attr,
        NULL,
};
ATTRIBUTE_GROUPS(net_class);

/* Show a given an attribute in the statistics group */
static ssize_t netstat_show(const struct device *d,
                            struct device_attribute *attr, char *buf,
                            unsigned long offset)
{
        struct net_device *dev = to_net_dev(d);
        ssize_t ret = -EINVAL;

        WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
                offset % sizeof(u64) != 0);

        rcu_read_lock();
        if (dev_isalive(dev)) {
                struct rtnl_link_stats64 temp;
                const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);

                ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
        }
        rcu_read_unlock();
        return ret;
}

/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name)                                                \
static ssize_t name##_show(struct device *d,                                \
                           struct device_attribute *attr, char *buf)        \
{                                                                        \
        return netstat_show(d, attr, buf,                                \
                            offsetof(struct rtnl_link_stats64, name));        \
}                                                                        \
static DEVICE_ATTR_RO(name)

NETSTAT_ENTRY(rx_packets);
NETSTAT_ENTRY(tx_packets);
NETSTAT_ENTRY(rx_bytes);
NETSTAT_ENTRY(tx_bytes);
NETSTAT_ENTRY(rx_errors);
NETSTAT_ENTRY(tx_errors);
NETSTAT_ENTRY(rx_dropped);
NETSTAT_ENTRY(tx_dropped);
NETSTAT_ENTRY(multicast);
NETSTAT_ENTRY(collisions);
NETSTAT_ENTRY(rx_length_errors);
NETSTAT_ENTRY(rx_over_errors);
NETSTAT_ENTRY(rx_crc_errors);
NETSTAT_ENTRY(rx_frame_errors);
NETSTAT_ENTRY(rx_fifo_errors);
NETSTAT_ENTRY(rx_missed_errors);
NETSTAT_ENTRY(tx_aborted_errors);
NETSTAT_ENTRY(tx_carrier_errors);
NETSTAT_ENTRY(tx_fifo_errors);
NETSTAT_ENTRY(tx_heartbeat_errors);
NETSTAT_ENTRY(tx_window_errors);
NETSTAT_ENTRY(rx_compressed);
NETSTAT_ENTRY(tx_compressed);
NETSTAT_ENTRY(rx_nohandler);

static struct attribute *netstat_attrs[] __ro_after_init = {
        &dev_attr_rx_packets.attr,
        &dev_attr_tx_packets.attr,
        &dev_attr_rx_bytes.attr,
        &dev_attr_tx_bytes.attr,
        &dev_attr_rx_errors.attr,
        &dev_attr_tx_errors.attr,
        &dev_attr_rx_dropped.attr,
        &dev_attr_tx_dropped.attr,
        &dev_attr_multicast.attr,
        &dev_attr_collisions.attr,
        &dev_attr_rx_length_errors.attr,
        &dev_attr_rx_over_errors.attr,
        &dev_attr_rx_crc_errors.attr,
        &dev_attr_rx_frame_errors.attr,
        &dev_attr_rx_fifo_errors.attr,
        &dev_attr_rx_missed_errors.attr,
        &dev_attr_tx_aborted_errors.attr,
        &dev_attr_tx_carrier_errors.attr,
        &dev_attr_tx_fifo_errors.attr,
        &dev_attr_tx_heartbeat_errors.attr,
        &dev_attr_tx_window_errors.attr,
        &dev_attr_rx_compressed.attr,
        &dev_attr_tx_compressed.attr,
        &dev_attr_rx_nohandler.attr,
        NULL
};

static const struct attribute_group netstat_group = {
        .name  = "statistics",
        .attrs  = netstat_attrs,
};

static struct attribute *wireless_attrs[] = {
        NULL
};

static const struct attribute_group wireless_group = {
        .name = "wireless",
        .attrs = wireless_attrs,
};

static bool wireless_group_needed(struct net_device *ndev)
{
#if IS_ENABLED(CONFIG_CFG80211)
        if (ndev->ieee80211_ptr)
                return true;
#endif
#if IS_ENABLED(CONFIG_WIRELESS_EXT)
        if (ndev->wireless_handlers)
                return true;
#endif
        return false;
}

#else /* CONFIG_SYSFS */
#define net_class_groups        NULL
#endif /* CONFIG_SYSFS */

#ifdef CONFIG_SYSFS
#define to_rx_queue_attr(_attr) \
        container_of(_attr, struct rx_queue_attribute, attr)

#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)

static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
                                  char *buf)
{
        const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
        struct netdev_rx_queue *queue = to_rx_queue(kobj);

        if (!attribute->show)
                return -EIO;

        return attribute->show(queue, buf);
}

static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
                                   const char *buf, size_t count)
{
        const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
        struct netdev_rx_queue *queue = to_rx_queue(kobj);

        if (!attribute->store)
                return -EIO;

        return attribute->store(queue, buf, count);
}

static const struct sysfs_ops rx_queue_sysfs_ops = {
        .show = rx_queue_attr_show,
        .store = rx_queue_attr_store,
};

#ifdef CONFIG_RPS
static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
{
        struct rps_map *map;
        cpumask_var_t mask;
        int i, len;

        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
                return -ENOMEM;

        rcu_read_lock();
        map = rcu_dereference(queue->rps_map);
        if (map)
                for (i = 0; i < map->len; i++)
                        cpumask_set_cpu(map->cpus[i], mask);

        len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
        rcu_read_unlock();
        free_cpumask_var(mask);

        return len < PAGE_SIZE ? len : -EINVAL;
}

static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
                                        cpumask_var_t mask)
{
        static DEFINE_MUTEX(rps_map_mutex);
        struct rps_map *old_map, *map;
        int cpu, i;

        map = kzalloc(max_t(unsigned int,
                            RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
                      GFP_KERNEL);
        if (!map)
                return -ENOMEM;

        i = 0;
        for_each_cpu_and(cpu, mask, cpu_online_mask)
                map->cpus[i++] = cpu;

        if (i) {
                map->len = i;
        } else {
                kfree(map);
                map = NULL;
        }

        mutex_lock(&rps_map_mutex);
        old_map = rcu_dereference_protected(queue->rps_map,
                                            mutex_is_locked(&rps_map_mutex));
        rcu_assign_pointer(queue->rps_map, map);

        if (map)
                static_branch_inc(&rps_needed);
        if (old_map)
                static_branch_dec(&rps_needed);

        mutex_unlock(&rps_map_mutex);

        if (old_map)
                kfree_rcu(old_map, rcu);
        return 0;
}

int rps_cpumask_housekeeping(struct cpumask *mask)
{
        if (!cpumask_empty(mask)) {
                cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
                cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
                if (cpumask_empty(mask))
                        return -EINVAL;
        }
        return 0;
}

static ssize_t store_rps_map(struct netdev_rx_queue *queue,
                             const char *buf, size_t len)
{
        cpumask_var_t mask;
        int err;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
                return -ENOMEM;

        err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
        if (err)
                goto out;

        err = rps_cpumask_housekeeping(mask);
        if (err)
                goto out;

        err = netdev_rx_queue_set_rps_mask(queue, mask);

out:
        free_cpumask_var(mask);
        return err ? : len;
}

static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
                                           char *buf)
{
        struct rps_dev_flow_table *flow_table;
        unsigned long val = 0;

        rcu_read_lock();
        flow_table = rcu_dereference(queue->rps_flow_table);
        if (flow_table)
                val = (unsigned long)flow_table->mask + 1;
        rcu_read_unlock();

        return sysfs_emit(buf, "%lu\n", val);
}

static void rps_dev_flow_table_release(struct rcu_head *rcu)
{
        struct rps_dev_flow_table *table = container_of(rcu,
            struct rps_dev_flow_table, rcu);
        vfree(table);
}

static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
                                            const char *buf, size_t len)
{
        unsigned long mask, count;
        struct rps_dev_flow_table *table, *old_table;
        static DEFINE_SPINLOCK(rps_dev_flow_lock);
        int rc;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        rc = kstrtoul(buf, 0, &count);
        if (rc < 0)
                return rc;

        if (count) {
                mask = count - 1;
                /* mask = roundup_pow_of_two(count) - 1;
                 * without overflows...
                 */
                while ((mask | (mask >> 1)) != mask)
                        mask |= (mask >> 1);
                /* On 64 bit arches, must check mask fits in table->mask (u32),
                 * and on 32bit arches, must check
                 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
                 */
#if BITS_PER_LONG > 32
                if (mask > (unsigned long)(u32)mask)
                        return -EINVAL;
#else
                if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
                                / sizeof(struct rps_dev_flow)) {
                        /* Enforce a limit to prevent overflow */
                        return -EINVAL;
                }
#endif
                table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
                if (!table)
                        return -ENOMEM;

                table->mask = mask;
                for (count = 0; count <= mask; count++)
                        table->flows[count].cpu = RPS_NO_CPU;
        } else {
                table = NULL;
        }

        spin_lock(&rps_dev_flow_lock);
        old_table = rcu_dereference_protected(queue->rps_flow_table,
                                              lockdep_is_held(&rps_dev_flow_lock));
        rcu_assign_pointer(queue->rps_flow_table, table);
        spin_unlock(&rps_dev_flow_lock);

        if (old_table)
                call_rcu(&old_table->rcu, rps_dev_flow_table_release);

        return len;
}

static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
        = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);

static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
        = __ATTR(rps_flow_cnt, 0644,
                 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */

static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
#ifdef CONFIG_RPS
        &rps_cpus_attribute.attr,
        &rps_dev_flow_table_cnt_attribute.attr,
#endif
        NULL
};
ATTRIBUTE_GROUPS(rx_queue_default);

static void rx_queue_release(struct kobject *kobj)
{
        struct netdev_rx_queue *queue = to_rx_queue(kobj);
#ifdef CONFIG_RPS
        struct rps_map *map;
        struct rps_dev_flow_table *flow_table;

        map = rcu_dereference_protected(queue->rps_map, 1);
        if (map) {
                RCU_INIT_POINTER(queue->rps_map, NULL);
                kfree_rcu(map, rcu);
        }

        flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
        if (flow_table) {
                RCU_INIT_POINTER(queue->rps_flow_table, NULL);
                call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
        }
#endif

        memset(kobj, 0, sizeof(*kobj));
        netdev_put(queue->dev, &queue->dev_tracker);
}

static const void *rx_queue_namespace(const struct kobject *kobj)
{
        struct netdev_rx_queue *queue = to_rx_queue(kobj);
        struct device *dev = &queue->dev->dev;
        const void *ns = NULL;

        if (dev->class && dev->class->ns_type)
                ns = dev->class->namespace(dev);

        return ns;
}

static void rx_queue_get_ownership(const struct kobject *kobj,
                                   kuid_t *uid, kgid_t *gid)
{
        const struct net *net = rx_queue_namespace(kobj);

        net_ns_get_ownership(net, uid, gid);
}

static const struct kobj_type rx_queue_ktype = {
        .sysfs_ops = &rx_queue_sysfs_ops,
        .release = rx_queue_release,
        .default_groups = rx_queue_default_groups,
        .namespace = rx_queue_namespace,
        .get_ownership = rx_queue_get_ownership,
};

static int rx_queue_default_mask(struct net_device *dev,
                                 struct netdev_rx_queue *queue)
{
#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
        struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);

        if (rps_default_mask && !cpumask_empty(rps_default_mask))
                return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
#endif
        return 0;
}

static int rx_queue_add_kobject(struct net_device *dev, int index)
{
        struct netdev_rx_queue *queue = dev->_rx + index;
        struct kobject *kobj = &queue->kobj;
        int error = 0;

        /* Kobject_put later will trigger rx_queue_release call which
         * decreases dev refcount: Take that reference here
         */
        netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);

        kobj->kset = dev->queues_kset;
        error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
                                     "rx-%u", index);
        if (error)
                goto err;

        if (dev->sysfs_rx_queue_group) {
                error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
                if (error)
                        goto err;
        }

        error = rx_queue_default_mask(dev, queue);
        if (error)
                goto err;

        kobject_uevent(kobj, KOBJ_ADD);

        return error;

err:
        kobject_put(kobj);
        return error;
}

static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
                                 kgid_t kgid)
{
        struct netdev_rx_queue *queue = dev->_rx + index;
        struct kobject *kobj = &queue->kobj;
        int error;

        error = sysfs_change_owner(kobj, kuid, kgid);
        if (error)
                return error;

        if (dev->sysfs_rx_queue_group)
                error = sysfs_group_change_owner(
                        kobj, dev->sysfs_rx_queue_group, kuid, kgid);

        return error;
}
#endif /* CONFIG_SYSFS */

int
net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
{
#ifdef CONFIG_SYSFS
        int i;
        int error = 0;

#ifndef CONFIG_RPS
        if (!dev->sysfs_rx_queue_group)
                return 0;
#endif
        for (i = old_num; i < new_num; i++) {
                error = rx_queue_add_kobject(dev, i);
                if (error) {
                        new_num = old_num;
                        break;
                }
        }

        while (--i >= new_num) {
                struct kobject *kobj = &dev->_rx[i].kobj;

                if (!refcount_read(&dev_net(dev)->ns.count))
                        kobj->uevent_suppress = 1;
                if (dev->sysfs_rx_queue_group)
                        sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
                kobject_put(kobj);
        }

        return error;
#else
        return 0;
#endif
}

static int net_rx_queue_change_owner(struct net_device *dev, int num,
                                     kuid_t kuid, kgid_t kgid)
{
#ifdef CONFIG_SYSFS
        int error = 0;
        int i;

#ifndef CONFIG_RPS
        if (!dev->sysfs_rx_queue_group)
                return 0;
#endif
        for (i = 0; i < num; i++) {
                error = rx_queue_change_owner(dev, i, kuid, kgid);
                if (error)
                        break;
        }

        return error;
#else
        return 0;
#endif
}

#ifdef CONFIG_SYSFS
/*
 * netdev_queue sysfs structures and functions.
 */
struct netdev_queue_attribute {
        struct attribute attr;
        ssize_t (*show)(struct netdev_queue *queue, char *buf);
        ssize_t (*store)(struct netdev_queue *queue,
                         const char *buf, size_t len);
};
#define to_netdev_queue_attr(_attr) \
        container_of(_attr, struct netdev_queue_attribute, attr)

#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)

static ssize_t netdev_queue_attr_show(struct kobject *kobj,
                                      struct attribute *attr, char *buf)
{
        const struct netdev_queue_attribute *attribute
                = to_netdev_queue_attr(attr);
        struct netdev_queue *queue = to_netdev_queue(kobj);

        if (!attribute->show)
                return -EIO;

        return attribute->show(queue, buf);
}

static ssize_t netdev_queue_attr_store(struct kobject *kobj,
                                       struct attribute *attr,
                                       const char *buf, size_t count)
{
        const struct netdev_queue_attribute *attribute
                = to_netdev_queue_attr(attr);
        struct netdev_queue *queue = to_netdev_queue(kobj);

        if (!attribute->store)
                return -EIO;

        return attribute->store(queue, buf, count);
}

static const struct sysfs_ops netdev_queue_sysfs_ops = {
        .show = netdev_queue_attr_show,
        .store = netdev_queue_attr_store,
};

static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
{
        unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);

        return sysfs_emit(buf, fmt_ulong, trans_timeout);
}

static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
        struct net_device *dev = queue->dev;
        unsigned int i;

        i = queue - dev->_tx;
        BUG_ON(i >= dev->num_tx_queues);

        return i;
}

static ssize_t traffic_class_show(struct netdev_queue *queue,
                                  char *buf)
{
        struct net_device *dev = queue->dev;
        int num_tc, tc;
        int index;

        if (!netif_is_multiqueue(dev))
                return -ENOENT;

        if (!rtnl_trylock())
                return restart_syscall();

        index = get_netdev_queue_index(queue);

        /* If queue belongs to subordinate dev use its TC mapping */
        dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;

        num_tc = dev->num_tc;
        tc = netdev_txq_to_tc(dev, index);

        rtnl_unlock();

        if (tc < 0)
                return -EINVAL;

        /* We can report the traffic class one of two ways:
         * Subordinate device traffic classes are reported with the traffic
         * class first, and then the subordinate class so for example TC0 on
         * subordinate device 2 will be reported as "0-2". If the queue
         * belongs to the root device it will be reported with just the
         * traffic class, so just "0" for TC 0 for example.
         */
        return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
                            sysfs_emit(buf, "%d\n", tc);
}

#ifdef CONFIG_XPS
static ssize_t tx_maxrate_show(struct netdev_queue *queue,
                               char *buf)
{
        return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}

static ssize_t tx_maxrate_store(struct netdev_queue *queue,
                                const char *buf, size_t len)
{
        struct net_device *dev = queue->dev;
        int err, index = get_netdev_queue_index(queue);
        u32 rate = 0;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        /* The check is also done later; this helps returning early without
         * hitting the trylock/restart below.
         */
        if (!dev->netdev_ops->ndo_set_tx_maxrate)
                return -EOPNOTSUPP;

        err = kstrtou32(buf, 10, &rate);
        if (err < 0)
                return err;

        if (!rtnl_trylock())
                return restart_syscall();

        err = -EOPNOTSUPP;
        if (dev->netdev_ops->ndo_set_tx_maxrate)
                err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);

        rtnl_unlock();
        if (!err) {
                queue->tx_maxrate = rate;
                return len;
        }
        return err;
}

static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
        = __ATTR_RW(tx_maxrate);
#endif

static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
        = __ATTR_RO(tx_timeout);

static struct netdev_queue_attribute queue_traffic_class __ro_after_init
        = __ATTR_RO(traffic_class);

#ifdef CONFIG_BQL
/*
 * Byte queue limits sysfs structures and functions.
 */
static ssize_t bql_show(char *buf, unsigned int value)
{
        return sysfs_emit(buf, "%u\n", value);
}

static ssize_t bql_set(const char *buf, const size_t count,
                       unsigned int *pvalue)
{
        unsigned int value;
        int err;

        if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
                value = DQL_MAX_LIMIT;
        } else {
                err = kstrtouint(buf, 10, &value);
                if (err < 0)
                        return err;
                if (value > DQL_MAX_LIMIT)
                        return -EINVAL;
        }

        *pvalue = value;

        return count;
}

static ssize_t bql_show_hold_time(struct netdev_queue *queue,
                                  char *buf)
{
        struct dql *dql = &queue->dql;

        return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}

static ssize_t bql_set_hold_time(struct netdev_queue *queue,
                                 const char *buf, size_t len)
{
        struct dql *dql = &queue->dql;
        unsigned int value;
        int err;

        err = kstrtouint(buf, 10, &value);
        if (err < 0)
                return err;

        dql->slack_hold_time = msecs_to_jiffies(value);

        return len;
}

static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
        = __ATTR(hold_time, 0644,
                 bql_show_hold_time, bql_set_hold_time);

static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
{
        struct dql *dql = &queue->dql;

        return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}

static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
                                  const char *buf, size_t len)
{
        struct dql *dql = &queue->dql;
        unsigned int value;
        int err;

        err = kstrtouint(buf, 10, &value);
        if (err < 0)
                return err;

        value = msecs_to_jiffies(value);
        if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
                return -ERANGE;

        if (!dql->stall_thrs && value)
                dql->last_reap = jiffies;
        /* Force last_reap to be live */
        smp_wmb();
        dql->stall_thrs = value;

        return len;
}

static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
        __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);

static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
{
        return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}

static ssize_t bql_set_stall_max(struct netdev_queue *queue,
                                 const char *buf, size_t len)
{
        WRITE_ONCE(queue->dql.stall_max, 0);
        return len;
}

static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
        __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);

static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
{
        struct dql *dql = &queue->dql;

        return sprintf(buf, "%lu\n", dql->stall_cnt);
}

static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
        __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);

static ssize_t bql_show_inflight(struct netdev_queue *queue,
                                 char *buf)
{
        struct dql *dql = &queue->dql;

        return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
}

static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
        __ATTR(inflight, 0444, bql_show_inflight, NULL);

#define BQL_ATTR(NAME, FIELD)                                                \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,                \
                                 char *buf)                                \
{                                                                        \
        return bql_show(buf, queue->dql.FIELD);                                \
}                                                                        \
                                                                        \
static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,                \
                                const char *buf, size_t len)                \
{                                                                        \
        return bql_set(buf, len, &queue->dql.FIELD);                        \
}                                                                        \
                                                                        \
static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
        = __ATTR(NAME, 0644,                                \
                 bql_show_ ## NAME, bql_set_ ## NAME)

BQL_ATTR(limit, limit);
BQL_ATTR(limit_max, max_limit);
BQL_ATTR(limit_min, min_limit);

static struct attribute *dql_attrs[] __ro_after_init = {
        &bql_limit_attribute.attr,
        &bql_limit_max_attribute.attr,
        &bql_limit_min_attribute.attr,
        &bql_hold_time_attribute.attr,
        &bql_inflight_attribute.attr,
        &bql_stall_thrs_attribute.attr,
        &bql_stall_cnt_attribute.attr,
        &bql_stall_max_attribute.attr,
        NULL
};

static const struct attribute_group dql_group = {
        .name  = "byte_queue_limits",
        .attrs  = dql_attrs,
};
#else
/* Fake declaration, all the code using it should be dead */
extern const struct attribute_group dql_group;
#endif /* CONFIG_BQL */

#ifdef CONFIG_XPS
static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
                              int tc, char *buf, enum xps_map_type type)
{
        struct xps_dev_maps *dev_maps;
        unsigned long *mask;
        unsigned int nr_ids;
        int j, len;

        rcu_read_lock();
        dev_maps = rcu_dereference(dev->xps_maps[type]);

        /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
         * when dev_maps hasn't been allocated yet, to be backward compatible.
         */
        nr_ids = dev_maps ? dev_maps->nr_ids :
                 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);

        mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
        if (!mask) {
                rcu_read_unlock();
                return -ENOMEM;
        }

        if (!dev_maps || tc >= dev_maps->num_tc)
                goto out_no_maps;

        for (j = 0; j < nr_ids; j++) {
                int i, tci = j * dev_maps->num_tc + tc;
                struct xps_map *map;

                map = rcu_dereference(dev_maps->attr_map[tci]);
                if (!map)
                        continue;

                for (i = map->len; i--;) {
                        if (map->queues[i] == index) {
                                __set_bit(j, mask);
                                break;
                        }
                }
        }
out_no_maps:
        rcu_read_unlock();

        len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
        bitmap_free(mask);

        return len < PAGE_SIZE ? len : -EINVAL;
}

static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
{
        struct net_device *dev = queue->dev;
        unsigned int index;
        int len, tc;

        if (!netif_is_multiqueue(dev))
                return -ENOENT;

        index = get_netdev_queue_index(queue);

        if (!rtnl_trylock())
                return restart_syscall();

        /* If queue belongs to subordinate dev use its map */
        dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;

        tc = netdev_txq_to_tc(dev, index);
        if (tc < 0) {
                rtnl_unlock();
                return -EINVAL;
        }

        /* Make sure the subordinate device can't be freed */
        get_device(&dev->dev);
        rtnl_unlock();

        len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);

        put_device(&dev->dev);
        return len;
}

static ssize_t xps_cpus_store(struct netdev_queue *queue,
                              const char *buf, size_t len)
{
        struct net_device *dev = queue->dev;
        unsigned int index;
        cpumask_var_t mask;
        int err;

        if (!netif_is_multiqueue(dev))
                return -ENOENT;

        if (!capable(CAP_NET_ADMIN))
                return -EPERM;

        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
                return -ENOMEM;

        index = get_netdev_queue_index(queue);

        err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
        if (err) {
                free_cpumask_var(mask);
                return err;
        }

        if (!rtnl_trylock()) {
                free_cpumask_var(mask);
                return restart_syscall();
        }

        err = netif_set_xps_queue(dev, mask, index);
        rtnl_unlock();

        free_cpumask_var(mask);

        return err ? : len;
}

static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
        = __ATTR_RW(xps_cpus);

static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
{
        struct net_device *dev = queue->dev;
        unsigned int index;
        int tc;

        index = get_netdev_queue_index(queue);

        if (!rtnl_trylock())
                return restart_syscall();

        tc = netdev_txq_to_tc(dev, index);
        rtnl_unlock();
        if (tc < 0)
                return -EINVAL;

        return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
}

static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
                              size_t len)
{
        struct net_device *dev = queue->dev;
        struct net *net = dev_net(dev);
        unsigned long *mask;
        unsigned int index;
        int err;

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
        if (!mask)
                return -ENOMEM;

        index = get_netdev_queue_index(queue);

        err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
        if (err) {
                bitmap_free(mask);
                return err;
        }

        if (!rtnl_trylock()) {
                bitmap_free(mask);
                return restart_syscall();
        }

        cpus_read_lock();
        err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
        cpus_read_unlock();

        rtnl_unlock();

        bitmap_free(mask);
        return err ? : len;
}

static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
        = __ATTR_RW(xps_rxqs);
#endif /* CONFIG_XPS */

static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
        &queue_trans_timeout.attr,
        &queue_traffic_class.attr,
#ifdef CONFIG_XPS
        &xps_cpus_attribute.attr,
        &xps_rxqs_attribute.attr,
        &queue_tx_maxrate.attr,
#endif
        NULL
};
ATTRIBUTE_GROUPS(netdev_queue_default);

static void netdev_queue_release(struct kobject *kobj)
{
        struct netdev_queue *queue = to_netdev_queue(kobj);

        memset(kobj, 0, sizeof(*kobj));
        netdev_put(queue->dev, &queue->dev_tracker);
}

static const void *netdev_queue_namespace(const struct kobject *kobj)
{
        struct netdev_queue *queue = to_netdev_queue(kobj);
        struct device *dev = &queue->dev->dev;
        const void *ns = NULL;

        if (dev->class && dev->class->ns_type)
                ns = dev->class->namespace(dev);

        return ns;
}

static void netdev_queue_get_ownership(const struct kobject *kobj,
                                       kuid_t *uid, kgid_t *gid)
{
        const struct net *net = netdev_queue_namespace(kobj);

        net_ns_get_ownership(net, uid, gid);
}

static const struct kobj_type netdev_queue_ktype = {
        .sysfs_ops = &netdev_queue_sysfs_ops,
        .release = netdev_queue_release,
        .default_groups = netdev_queue_default_groups,
        .namespace = netdev_queue_namespace,
        .get_ownership = netdev_queue_get_ownership,
};

static bool netdev_uses_bql(const struct net_device *dev)
{
        if (dev->features & NETIF_F_LLTX ||
            dev->priv_flags & IFF_NO_QUEUE)
                return false;

        return IS_ENABLED(CONFIG_BQL);
}

static int netdev_queue_add_kobject(struct net_device *dev, int index)
{
        struct netdev_queue *queue = dev->_tx + index;
        struct kobject *kobj = &queue->kobj;
        int error = 0;

        /* Kobject_put later will trigger netdev_queue_release call
         * which decreases dev refcount: Take that reference here
         */
        netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);

        kobj->kset = dev->queues_kset;
        error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
                                     "tx-%u", index);
        if (error)
                goto err;

        if (netdev_uses_bql(dev)) {
                error = sysfs_create_group(kobj, &dql_group);
                if (error)
                        goto err;
        }

        kobject_uevent(kobj, KOBJ_ADD);
        return 0;

err:
        kobject_put(kobj);
        return error;
}

static int tx_queue_change_owner(struct net_device *ndev, int index,
                                 kuid_t kuid, kgid_t kgid)
{
        struct netdev_queue *queue = ndev->_tx + index;
        struct kobject *kobj = &queue->kobj;
        int error;

        error = sysfs_change_owner(kobj, kuid, kgid);
        if (error)
                return error;

        if (netdev_uses_bql(ndev))
                error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);

        return error;
}
#endif /* CONFIG_SYSFS */

int
netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
{
#ifdef CONFIG_SYSFS
        int i;
        int error = 0;

        /* Tx queue kobjects are allowed to be updated when a device is being
         * unregistered, but solely to remove queues from qdiscs. Any path
         * adding queues should be fixed.
         */
        WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
             "New queues can't be registered after device unregistration.");

        for (i = old_num; i < new_num; i++) {
                error = netdev_queue_add_kobject(dev, i);
                if (error) {
                        new_num = old_num;
                        break;
                }
        }

        while (--i >= new_num) {
                struct netdev_queue *queue = dev->_tx + i;

                if (!refcount_read(&dev_net(dev)->ns.count))
                        queue->kobj.uevent_suppress = 1;

                if (netdev_uses_bql(dev))
                        sysfs_remove_group(&queue->kobj, &dql_group);

                kobject_put(&queue->kobj);
        }

        return error;
#else
        return 0;
#endif /* CONFIG_SYSFS */
}

static int net_tx_queue_change_owner(struct net_device *dev, int num,
                                     kuid_t kuid, kgid_t kgid)
{
#ifdef CONFIG_SYSFS
        int error = 0;
        int i;

        for (i = 0; i < num; i++) {
                error = tx_queue_change_owner(dev, i, kuid, kgid);
                if (error)
                        break;
        }

        return error;
#else
        return 0;
#endif /* CONFIG_SYSFS */
}

static int register_queue_kobjects(struct net_device *dev)
{
        int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;

#ifdef CONFIG_SYSFS
        dev->queues_kset = kset_create_and_add("queues",
                                               NULL, &dev->dev.kobj);
        if (!dev->queues_kset)
                return -ENOMEM;
        real_rx = dev->real_num_rx_queues;
#endif
        real_tx = dev->real_num_tx_queues;

        error = net_rx_queue_update_kobjects(dev, 0, real_rx);
        if (error)
                goto error;
        rxq = real_rx;

        error = netdev_queue_update_kobjects(dev, 0, real_tx);
        if (error)
                goto error;
        txq = real_tx;

        return 0;

error:
        netdev_queue_update_kobjects(dev, txq, 0);
        net_rx_queue_update_kobjects(dev, rxq, 0);
#ifdef CONFIG_SYSFS
        kset_unregister(dev->queues_kset);
#endif
        return error;
}

static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
{
        int error = 0, real_rx = 0, real_tx = 0;

#ifdef CONFIG_SYSFS
        if (ndev->queues_kset) {
                error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
                if (error)
                        return error;
        }
        real_rx = ndev->real_num_rx_queues;
#endif
        real_tx = ndev->real_num_tx_queues;

        error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
        if (error)
                return error;

        error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
        if (error)
                return error;

        return 0;
}

static void remove_queue_kobjects(struct net_device *dev)
{
        int real_rx = 0, real_tx = 0;

#ifdef CONFIG_SYSFS
        real_rx = dev->real_num_rx_queues;
#endif
        real_tx = dev->real_num_tx_queues;

        net_rx_queue_update_kobjects(dev, real_rx, 0);
        netdev_queue_update_kobjects(dev, real_tx, 0);

        dev->real_num_rx_queues = 0;
        dev->real_num_tx_queues = 0;
#ifdef CONFIG_SYSFS
        kset_unregister(dev->queues_kset);
#endif
}

static bool net_current_may_mount(void)
{
        struct net *net = current->nsproxy->net_ns;

        return ns_capable(net->user_ns, CAP_SYS_ADMIN);
}

static void *net_grab_current_ns(void)
{
        struct net *ns = current->nsproxy->net_ns;
#ifdef CONFIG_NET_NS
        if (ns)
                refcount_inc(&ns->passive);
#endif
        return ns;
}

static const void *net_initial_ns(void)
{
        return &init_net;
}

static const void *net_netlink_ns(struct sock *sk)
{
        return sock_net(sk);
}

const struct kobj_ns_type_operations net_ns_type_operations = {
        .type = KOBJ_NS_TYPE_NET,
        .current_may_mount = net_current_may_mount,
        .grab_current_ns = net_grab_current_ns,
        .netlink_ns = net_netlink_ns,
        .initial_ns = net_initial_ns,
        .drop_ns = net_drop_ns,
};
EXPORT_SYMBOL_GPL(net_ns_type_operations);

static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
{
        const struct net_device *dev = to_net_dev(d);
        int retval;

        /* pass interface to uevent. */
        retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
        if (retval)
                goto exit;

        /* pass ifindex to uevent.
         * ifindex is useful as it won't change (interface name may change)
         * and is what RtNetlink uses natively.
         */
        retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);

exit:
        return retval;
}

/*
 *        netdev_release -- destroy and free a dead device.
 *        Called when last reference to device kobject is gone.
 */
static void netdev_release(struct device *d)
{
        struct net_device *dev = to_net_dev(d);

        BUG_ON(dev->reg_state != NETREG_RELEASED);

        /* no need to wait for rcu grace period:
         * device is dead and about to be freed.
         */
        kfree(rcu_access_pointer(dev->ifalias));
        netdev_freemem(dev);
}

static const void *net_namespace(const struct device *d)
{
        const struct net_device *dev = to_net_dev(d);

        return dev_net(dev);
}

static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
{
        const struct net_device *dev = to_net_dev(d);
        const struct net *net = dev_net(dev);

        net_ns_get_ownership(net, uid, gid);
}

static struct class net_class __ro_after_init = {
        .name = "net",
        .dev_release = netdev_release,
        .dev_groups = net_class_groups,
        .dev_uevent = netdev_uevent,
        .ns_type = &net_ns_type_operations,
        .namespace = net_namespace,
        .get_ownership = net_get_ownership,
};

#ifdef CONFIG_OF
static int of_dev_node_match(struct device *dev, const void *data)
{
        for (; dev; dev = dev->parent) {
                if (dev->of_node == data)
                        return 1;
        }

        return 0;
}

/*
 * of_find_net_device_by_node - lookup the net device for the device node
 * @np: OF device node
 *
 * Looks up the net_device structure corresponding with the device node.
 * If successful, returns a pointer to the net_device with the embedded
 * struct device refcount incremented by one, or NULL on failure. The
 * refcount must be dropped when done with the net_device.
 */
struct net_device *of_find_net_device_by_node(struct device_node *np)
{
        struct device *dev;

        dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
        if (!dev)
                return NULL;

        return to_net_dev(dev);
}
EXPORT_SYMBOL(of_find_net_device_by_node);
#endif

/* Delete sysfs entries but hold kobject reference until after all
 * netdev references are gone.
 */
void netdev_unregister_kobject(struct net_device *ndev)
{
        struct device *dev = &ndev->dev;

        if (!refcount_read(&dev_net(ndev)->ns.count))
                dev_set_uevent_suppress(dev, 1);

        kobject_get(&dev->kobj);

        remove_queue_kobjects(ndev);

        pm_runtime_set_memalloc_noio(dev, false);

        device_del(dev);
}

/* Create sysfs entries for network device. */
int netdev_register_kobject(struct net_device *ndev)
{
        struct device *dev = &ndev->dev;
        const struct attribute_group **groups = ndev->sysfs_groups;
        int error = 0;

        device_initialize(dev);
        dev->class = &net_class;
        dev->platform_data = ndev;
        dev->groups = groups;

        dev_set_name(dev, "%s", ndev->name);

#ifdef CONFIG_SYSFS
        /* Allow for a device specific group */
        if (*groups)
                groups++;

        *groups++ = &netstat_group;

        if (wireless_group_needed(ndev))
                *groups++ = &wireless_group;
#endif /* CONFIG_SYSFS */

        error = device_add(dev);
        if (error)
                return error;

        error = register_queue_kobjects(ndev);
        if (error) {
                device_del(dev);
                return error;
        }

        pm_runtime_set_memalloc_noio(dev, true);

        return error;
}

/* Change owner for sysfs entries when moving network devices across network
 * namespaces owned by different user namespaces.
 */
int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
                        const struct net *net_new)
{
        kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
        kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
        struct device *dev = &ndev->dev;
        int error;

        net_ns_get_ownership(net_old, &old_uid, &old_gid);
        net_ns_get_ownership(net_new, &new_uid, &new_gid);

        /* The network namespace was changed but the owning user namespace is
         * identical so there's no need to change the owner of sysfs entries.
         */
        if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
                return 0;

        error = device_change_owner(dev, new_uid, new_gid);
        if (error)
                return error;

        error = queue_change_owner(ndev, new_uid, new_gid);
        if (error)
                return error;

        return 0;
}

int netdev_class_create_file_ns(const struct class_attribute *class_attr,
                                const void *ns)
{
        return class_create_file_ns(&net_class, class_attr, ns);
}
EXPORT_SYMBOL(netdev_class_create_file_ns);

void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
                                 const void *ns)
{
        class_remove_file_ns(&net_class, class_attr, ns);
}
EXPORT_SYMBOL(netdev_class_remove_file_ns);

int __init netdev_kobject_init(void)
{
        kobj_ns_type_register(&net_ns_type_operations);
        return class_register(&net_class);
}

















   38 













   38 



   38 





    1 













    1 



    1 





    2 





















    2 


    1 



    2 







































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// SPDX-License-Identifier: GPL-2.0-only
/*
 * 32bit compatibility wrappers for the input subsystem.
 *
 * Very heavily based on evdev.c - Copyright (c) 1999-2002 Vojtech Pavlik
 */

#include <linux/export.h>
#include <linux/uaccess.h>
#include "input-compat.h"

#ifdef CONFIG_COMPAT

int input_event_from_user(const char __user *buffer,
                          struct input_event *event)
{
        if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
                struct input_event_compat compat_event;

                if (copy_from_user(&compat_event, buffer,
                                   sizeof(struct input_event_compat)))
                        return -EFAULT;

                event->input_event_sec = compat_event.sec;
                event->input_event_usec = compat_event.usec;
                event->type = compat_event.type;
                event->code = compat_event.code;
                event->value = compat_event.value;

        } else {
                if (copy_from_user(event, buffer, sizeof(struct input_event)))
                        return -EFAULT;
        }

        return 0;
}

int input_event_to_user(char __user *buffer,
                        const struct input_event *event)
{
        if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
                struct input_event_compat compat_event;

                compat_event.sec = event->input_event_sec;
                compat_event.usec = event->input_event_usec;
                compat_event.type = event->type;
                compat_event.code = event->code;
                compat_event.value = event->value;

                if (copy_to_user(buffer, &compat_event,
                                 sizeof(struct input_event_compat)))
                        return -EFAULT;

        } else {
                if (copy_to_user(buffer, event, sizeof(struct input_event)))
                        return -EFAULT;
        }

        return 0;
}

int input_ff_effect_from_user(const char __user *buffer, size_t size,
                              struct ff_effect *effect)
{
        if (in_compat_syscall()) {
                struct ff_effect_compat *compat_effect;

                if (size != sizeof(struct ff_effect_compat))
                        return -EINVAL;

                /*
                 * It so happens that the pointer which needs to be changed
                 * is the last field in the structure, so we can retrieve the
                 * whole thing and replace just the pointer.
                 */
                compat_effect = (struct ff_effect_compat *)effect;

                if (copy_from_user(compat_effect, buffer,
                                   sizeof(struct ff_effect_compat)))
                        return -EFAULT;

                if (compat_effect->type == FF_PERIODIC &&
                    compat_effect->u.periodic.waveform == FF_CUSTOM)
                        effect->u.periodic.custom_data =
                                compat_ptr(compat_effect->u.periodic.custom_data);
        } else {
                if (size != sizeof(struct ff_effect))
                        return -EINVAL;

                if (copy_from_user(effect, buffer, sizeof(struct ff_effect)))
                        return -EFAULT;
        }

        return 0;
}

#else

int input_event_from_user(const char __user *buffer,
                         struct input_event *event)
{
        if (copy_from_user(event, buffer, sizeof(struct input_event)))
                return -EFAULT;

        return 0;
}

int input_event_to_user(char __user *buffer,
                        const struct input_event *event)
{
        if (copy_to_user(buffer, event, sizeof(struct input_event)))
                return -EFAULT;

        return 0;
}

int input_ff_effect_from_user(const char __user *buffer, size_t size,
                              struct ff_effect *effect)
{
        if (size != sizeof(struct ff_effect))
                return -EINVAL;

        if (copy_from_user(effect, buffer, sizeof(struct ff_effect)))
                return -EFAULT;

        return 0;
}

#endif /* CONFIG_COMPAT */

EXPORT_SYMBOL_GPL(input_event_from_user);
EXPORT_SYMBOL_GPL(input_event_to_user);
EXPORT_SYMBOL_GPL(input_ff_effect_from_user);


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 









    1 



















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
// SPDX-License-Identifier: GPL-2.0-only
/*
 * hid-cp2112.c - Silicon Labs HID USB to SMBus master bridge
 * Copyright (c) 2013,2014 Uplogix, Inc.
 * David Barksdale <dbarksdale@uplogix.com>
 */

/*
 * The Silicon Labs CP2112 chip is a USB HID device which provides an
 * SMBus controller for talking to slave devices and 8 GPIO pins. The
 * host communicates with the CP2112 via raw HID reports.
 *
 * Data Sheet:
 *   https://www.silabs.com/Support%20Documents/TechnicalDocs/CP2112.pdf
 * Programming Interface Specification:
 *   https://www.silabs.com/documents/public/application-notes/an495-cp2112-interface-specification.pdf
 */

#include <linux/bitops.h>
#include <linux/gpio/driver.h>
#include <linux/hid.h>
#include <linux/hidraw.h>
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/nls.h>
#include <linux/string_choices.h>
#include <linux/usb/ch9.h>
#include "hid-ids.h"

#define CP2112_REPORT_MAX_LENGTH                64
#define CP2112_GPIO_CONFIG_LENGTH                5
#define CP2112_GPIO_GET_LENGTH                        2
#define CP2112_GPIO_SET_LENGTH                        3
#define CP2112_GPIO_MAX_GPIO                        8
#define CP2112_GPIO_ALL_GPIO_MASK                GENMASK(7, 0)

enum {
        CP2112_GPIO_CONFIG                = 0x02,
        CP2112_GPIO_GET                        = 0x03,
        CP2112_GPIO_SET                        = 0x04,
        CP2112_GET_VERSION_INFO                = 0x05,
        CP2112_SMBUS_CONFIG                = 0x06,
        CP2112_DATA_READ_REQUEST        = 0x10,
        CP2112_DATA_WRITE_READ_REQUEST        = 0x11,
        CP2112_DATA_READ_FORCE_SEND        = 0x12,
        CP2112_DATA_READ_RESPONSE        = 0x13,
        CP2112_DATA_WRITE_REQUEST        = 0x14,
        CP2112_TRANSFER_STATUS_REQUEST        = 0x15,
        CP2112_TRANSFER_STATUS_RESPONSE        = 0x16,
        CP2112_CANCEL_TRANSFER                = 0x17,
        CP2112_LOCK_BYTE                = 0x20,
        CP2112_USB_CONFIG                = 0x21,
        CP2112_MANUFACTURER_STRING        = 0x22,
        CP2112_PRODUCT_STRING                = 0x23,
        CP2112_SERIAL_STRING                = 0x24,
};

enum {
        STATUS0_IDLE                = 0x00,
        STATUS0_BUSY                = 0x01,
        STATUS0_COMPLETE        = 0x02,
        STATUS0_ERROR                = 0x03,
};

enum {
        STATUS1_TIMEOUT_NACK                = 0x00,
        STATUS1_TIMEOUT_BUS                = 0x01,
        STATUS1_ARBITRATION_LOST        = 0x02,
        STATUS1_READ_INCOMPLETE                = 0x03,
        STATUS1_WRITE_INCOMPLETE        = 0x04,
        STATUS1_SUCCESS                        = 0x05,
};

struct cp2112_smbus_config_report {
        u8 report;                /* CP2112_SMBUS_CONFIG */
        __be32 clock_speed;        /* Hz */
        u8 device_address;        /* Stored in the upper 7 bits */
        u8 auto_send_read;        /* 1 = enabled, 0 = disabled */
        __be16 write_timeout;        /* ms, 0 = no timeout */
        __be16 read_timeout;        /* ms, 0 = no timeout */
        u8 scl_low_timeout;        /* 1 = enabled, 0 = disabled */
        __be16 retry_time;        /* # of retries, 0 = no limit */
} __packed;

struct cp2112_usb_config_report {
        u8 report;        /* CP2112_USB_CONFIG */
        __le16 vid;        /* Vendor ID */
        __le16 pid;        /* Product ID */
        u8 max_power;        /* Power requested in 2mA units */
        u8 power_mode;        /* 0x00 = bus powered
                           0x01 = self powered & regulator off
                           0x02 = self powered & regulator on */
        u8 release_major;
        u8 release_minor;
        u8 mask;        /* What fields to program */
} __packed;

struct cp2112_read_req_report {
        u8 report;        /* CP2112_DATA_READ_REQUEST */
        u8 slave_address;
        __be16 length;
} __packed;

struct cp2112_write_read_req_report {
        u8 report;        /* CP2112_DATA_WRITE_READ_REQUEST */
        u8 slave_address;
        __be16 length;
        u8 target_address_length;
        u8 target_address[16];
} __packed;

struct cp2112_write_req_report {
        u8 report;        /* CP2112_DATA_WRITE_REQUEST */
        u8 slave_address;
        u8 length;
        u8 data[61];
} __packed;

struct cp2112_force_read_report {
        u8 report;        /* CP2112_DATA_READ_FORCE_SEND */
        __be16 length;
} __packed;

struct cp2112_xfer_status_report {
        u8 report;        /* CP2112_TRANSFER_STATUS_RESPONSE */
        u8 status0;        /* STATUS0_* */
        u8 status1;        /* STATUS1_* */
        __be16 retries;
        __be16 length;
} __packed;

struct cp2112_string_report {
        u8 dummy;                /* force .string to be aligned */
        struct_group_attr(contents, __packed,
                u8 report;                /* CP2112_*_STRING */
                u8 length;                /* length in bytes of everything after .report */
                u8 type;                /* USB_DT_STRING */
                wchar_t string[30];        /* UTF16_LITTLE_ENDIAN string */
        );
} __packed;

/* Number of times to request transfer status before giving up waiting for a
   transfer to complete. This may need to be changed if SMBUS clock, retries,
   or read/write/scl_low timeout settings are changed. */
static const int XFER_STATUS_RETRIES = 10;

/* Time in ms to wait for a CP2112_DATA_READ_RESPONSE or
   CP2112_TRANSFER_STATUS_RESPONSE. */
static const int RESPONSE_TIMEOUT = 50;

static const struct hid_device_id cp2112_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
        { }
};
MODULE_DEVICE_TABLE(hid, cp2112_devices);

struct cp2112_device {
        struct i2c_adapter adap;
        struct hid_device *hdev;
        wait_queue_head_t wait;
        u8 read_data[61];
        u8 read_length;
        u8 hwversion;
        int xfer_status;
        atomic_t read_avail;
        atomic_t xfer_avail;
        struct gpio_chip gc;
        u8 *in_out_buffer;
        struct mutex lock;

        bool gpio_poll;
        struct delayed_work gpio_poll_worker;
        unsigned long irq_mask;
        u8 gpio_prev_state;
};

static int gpio_push_pull = CP2112_GPIO_ALL_GPIO_MASK;
module_param(gpio_push_pull, int, 0644);
MODULE_PARM_DESC(gpio_push_pull, "GPIO push-pull configuration bitmask");

static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
{
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
        u8 *buf = dev->in_out_buffer;
        int ret;

        mutex_lock(&dev->lock);

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
                                 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_GET_REPORT);
        if (ret != CP2112_GPIO_CONFIG_LENGTH) {
                hid_err(hdev, "error requesting GPIO config: %d\n", ret);
                if (ret >= 0)
                        ret = -EIO;
                goto exit;
        }

        buf[1] &= ~BIT(offset);
        buf[2] = gpio_push_pull;

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
                                 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_SET_REPORT);
        if (ret != CP2112_GPIO_CONFIG_LENGTH) {
                hid_err(hdev, "error setting GPIO config: %d\n", ret);
                if (ret >= 0)
                        ret = -EIO;
                goto exit;
        }

        ret = 0;

exit:
        mutex_unlock(&dev->lock);
        return ret;
}

static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
{
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
        u8 *buf = dev->in_out_buffer;
        int ret;

        mutex_lock(&dev->lock);

        buf[0] = CP2112_GPIO_SET;
        buf[1] = value ? CP2112_GPIO_ALL_GPIO_MASK : 0;
        buf[2] = BIT(offset);

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_SET, buf,
                                 CP2112_GPIO_SET_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_SET_REPORT);
        if (ret < 0)
                hid_err(hdev, "error setting GPIO values: %d\n", ret);

        mutex_unlock(&dev->lock);
}

static int cp2112_gpio_get_all(struct gpio_chip *chip)
{
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
        u8 *buf = dev->in_out_buffer;
        int ret;

        mutex_lock(&dev->lock);

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf,
                                 CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_GET_REPORT);
        if (ret != CP2112_GPIO_GET_LENGTH) {
                hid_err(hdev, "error requesting GPIO values: %d\n", ret);
                ret = ret < 0 ? ret : -EIO;
                goto exit;
        }

        ret = buf[1];

exit:
        mutex_unlock(&dev->lock);

        return ret;
}

static int cp2112_gpio_get(struct gpio_chip *chip, unsigned int offset)
{
        int ret;

        ret = cp2112_gpio_get_all(chip);
        if (ret < 0)
                return ret;

        return (ret >> offset) & 1;
}

static int cp2112_gpio_direction_output(struct gpio_chip *chip,
                                        unsigned offset, int value)
{
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
        u8 *buf = dev->in_out_buffer;
        int ret;

        mutex_lock(&dev->lock);

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
                                 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_GET_REPORT);
        if (ret != CP2112_GPIO_CONFIG_LENGTH) {
                hid_err(hdev, "error requesting GPIO config: %d\n", ret);
                goto fail;
        }

        buf[1] |= 1 << offset;
        buf[2] = gpio_push_pull;

        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
                                 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
                                 HID_REQ_SET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "error setting GPIO config: %d\n", ret);
                goto fail;
        }

        mutex_unlock(&dev->lock);

        /*
         * Set gpio value when output direction is already set,
         * as specified in AN495, Rev. 0.2, cpt. 4.4
         */
        cp2112_gpio_set(chip, offset, value);

        return 0;

fail:
        mutex_unlock(&dev->lock);
        return ret < 0 ? ret : -EIO;
}

static int cp2112_hid_get(struct hid_device *hdev, unsigned char report_number,
                          u8 *data, size_t count, unsigned char report_type)
{
        u8 *buf;
        int ret;

        buf = kmalloc(count, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, report_number, buf, count,
                                       report_type, HID_REQ_GET_REPORT);
        memcpy(data, buf, count);
        kfree(buf);
        return ret;
}

static int cp2112_hid_output(struct hid_device *hdev, u8 *data, size_t count,
                             unsigned char report_type)
{
        u8 *buf;
        int ret;

        buf = kmemdup(data, count, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        if (report_type == HID_OUTPUT_REPORT)
                ret = hid_hw_output_report(hdev, buf, count);
        else
                ret = hid_hw_raw_request(hdev, buf[0], buf, count, report_type,
                                HID_REQ_SET_REPORT);

        kfree(buf);
        return ret;
}

static int cp2112_wait(struct cp2112_device *dev, atomic_t *avail)
{
        int ret = 0;

        /* We have sent either a CP2112_TRANSFER_STATUS_REQUEST or a
         * CP2112_DATA_READ_FORCE_SEND and we are waiting for the response to
         * come in cp2112_raw_event or timeout. There will only be one of these
         * in flight at any one time. The timeout is extremely large and is a
         * last resort if the CP2112 has died. If we do timeout we don't expect
         * to receive the response which would cause data races, it's not like
         * we can do anything about it anyway.
         */
        ret = wait_event_interruptible_timeout(dev->wait,
                atomic_read(avail), msecs_to_jiffies(RESPONSE_TIMEOUT));
        if (-ERESTARTSYS == ret)
                return ret;
        if (!ret)
                return -ETIMEDOUT;

        atomic_set(avail, 0);
        return 0;
}

static int cp2112_xfer_status(struct cp2112_device *dev)
{
        struct hid_device *hdev = dev->hdev;
        u8 buf[2];
        int ret;

        buf[0] = CP2112_TRANSFER_STATUS_REQUEST;
        buf[1] = 0x01;
        atomic_set(&dev->xfer_avail, 0);

        ret = cp2112_hid_output(hdev, buf, 2, HID_OUTPUT_REPORT);
        if (ret < 0) {
                hid_warn(hdev, "Error requesting status: %d\n", ret);
                return ret;
        }

        ret = cp2112_wait(dev, &dev->xfer_avail);
        if (ret)
                return ret;

        return dev->xfer_status;
}

static int cp2112_read(struct cp2112_device *dev, u8 *data, size_t size)
{
        struct hid_device *hdev = dev->hdev;
        struct cp2112_force_read_report report;
        int ret;

        if (size > sizeof(dev->read_data))
                size = sizeof(dev->read_data);
        report.report = CP2112_DATA_READ_FORCE_SEND;
        report.length = cpu_to_be16(size);

        atomic_set(&dev->read_avail, 0);

        ret = cp2112_hid_output(hdev, &report.report, sizeof(report),
                                HID_OUTPUT_REPORT);
        if (ret < 0) {
                hid_warn(hdev, "Error requesting data: %d\n", ret);
                return ret;
        }

        ret = cp2112_wait(dev, &dev->read_avail);
        if (ret)
                return ret;

        hid_dbg(hdev, "read %d of %zd bytes requested\n",
                dev->read_length, size);

        if (size > dev->read_length)
                size = dev->read_length;

        memcpy(data, dev->read_data, size);
        return dev->read_length;
}

static int cp2112_read_req(void *buf, u8 slave_address, u16 length)
{
        struct cp2112_read_req_report *report = buf;

        if (length < 1 || length > 512)
                return -EINVAL;

        report->report = CP2112_DATA_READ_REQUEST;
        report->slave_address = slave_address << 1;
        report->length = cpu_to_be16(length);
        return sizeof(*report);
}

static int cp2112_write_read_req(void *buf, u8 slave_address, u16 length,
                                 u8 command, u8 *data, u8 data_length)
{
        struct cp2112_write_read_req_report *report = buf;

        if (length < 1 || length > 512
            || data_length > sizeof(report->target_address) - 1)
                return -EINVAL;

        report->report = CP2112_DATA_WRITE_READ_REQUEST;
        report->slave_address = slave_address << 1;
        report->length = cpu_to_be16(length);
        report->target_address_length = data_length + 1;
        report->target_address[0] = command;
        memcpy(&report->target_address[1], data, data_length);
        return data_length + 6;
}

static int cp2112_write_req(void *buf, u8 slave_address, u8 command, u8 *data,
                            u8 data_length)
{
        struct cp2112_write_req_report *report = buf;

        if (data_length > sizeof(report->data) - 1)
                return -EINVAL;

        report->report = CP2112_DATA_WRITE_REQUEST;
        report->slave_address = slave_address << 1;
        report->length = data_length + 1;
        report->data[0] = command;
        memcpy(&report->data[1], data, data_length);
        return data_length + 4;
}

static int cp2112_i2c_write_req(void *buf, u8 slave_address, u8 *data,
                                u8 data_length)
{
        struct cp2112_write_req_report *report = buf;

        if (data_length > sizeof(report->data))
                return -EINVAL;

        report->report = CP2112_DATA_WRITE_REQUEST;
        report->slave_address = slave_address << 1;
        report->length = data_length;
        memcpy(report->data, data, data_length);
        return data_length + 3;
}

static int cp2112_i2c_write_read_req(void *buf, u8 slave_address,
                                     u8 *addr, int addr_length,
                                     int read_length)
{
        struct cp2112_write_read_req_report *report = buf;

        if (read_length < 1 || read_length > 512 ||
            addr_length > sizeof(report->target_address))
                return -EINVAL;

        report->report = CP2112_DATA_WRITE_READ_REQUEST;
        report->slave_address = slave_address << 1;
        report->length = cpu_to_be16(read_length);
        report->target_address_length = addr_length;
        memcpy(report->target_address, addr, addr_length);
        return addr_length + 5;
}

static int cp2112_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
                           int num)
{
        struct cp2112_device *dev = (struct cp2112_device *)adap->algo_data;
        struct hid_device *hdev = dev->hdev;
        u8 buf[64];
        ssize_t count;
        ssize_t read_length = 0;
        u8 *read_buf = NULL;
        unsigned int retries;
        int ret;

        hid_dbg(hdev, "I2C %d messages\n", num);

        if (num == 1) {
                hid_dbg(hdev, "I2C %s %#04x len %d\n",
                        str_read_write(msgs->flags & I2C_M_RD), msgs->addr, msgs->len);
                if (msgs->flags & I2C_M_RD) {
                        read_length = msgs->len;
                        read_buf = msgs->buf;
                        count = cp2112_read_req(buf, msgs->addr, msgs->len);
                } else {
                        count = cp2112_i2c_write_req(buf, msgs->addr,
                                                     msgs->buf, msgs->len);
                }
                if (count < 0)
                        return count;
        } else if (dev->hwversion > 1 &&  /* no repeated start in rev 1 */
                   num == 2 &&
                   msgs[0].addr == msgs[1].addr &&
                   !(msgs[0].flags & I2C_M_RD) && (msgs[1].flags & I2C_M_RD)) {
                hid_dbg(hdev, "I2C write-read %#04x wlen %d rlen %d\n",
                        msgs[0].addr, msgs[0].len, msgs[1].len);
                read_length = msgs[1].len;
                read_buf = msgs[1].buf;
                count = cp2112_i2c_write_read_req(buf, msgs[0].addr,
                                msgs[0].buf, msgs[0].len, msgs[1].len);
                if (count < 0)
                        return count;
        } else {
                hid_err(hdev,
                        "Multi-message I2C transactions not supported\n");
                return -EOPNOTSUPP;
        }

        ret = hid_hw_power(hdev, PM_HINT_FULLON);
        if (ret < 0) {
                hid_err(hdev, "power management error: %d\n", ret);
                return ret;
        }

        ret = cp2112_hid_output(hdev, buf, count, HID_OUTPUT_REPORT);
        if (ret < 0) {
                hid_warn(hdev, "Error starting transaction: %d\n", ret);
                goto power_normal;
        }

        for (retries = 0; retries < XFER_STATUS_RETRIES; ++retries) {
                ret = cp2112_xfer_status(dev);
                if (-EBUSY == ret)
                        continue;
                if (ret < 0)
                        goto power_normal;
                break;
        }

        if (XFER_STATUS_RETRIES <= retries) {
                hid_warn(hdev, "Transfer timed out, cancelling.\n");
                buf[0] = CP2112_CANCEL_TRANSFER;
                buf[1] = 0x01;

                ret = cp2112_hid_output(hdev, buf, 2, HID_OUTPUT_REPORT);
                if (ret < 0)
                        hid_warn(hdev, "Error cancelling transaction: %d\n",
                                 ret);

                ret = -ETIMEDOUT;
                goto power_normal;
        }

        for (count = 0; count < read_length;) {
                ret = cp2112_read(dev, read_buf + count, read_length - count);
                if (ret < 0)
                        goto power_normal;
                if (ret == 0) {
                        hid_err(hdev, "read returned 0\n");
                        ret = -EIO;
                        goto power_normal;
                }
                count += ret;
                if (count > read_length) {
                        /*
                         * The hardware returned too much data.
                         * This is mostly harmless because cp2112_read()
                         * has a limit check so didn't overrun our
                         * buffer.  Nevertheless, we return an error
                         * because something is seriously wrong and
                         * it shouldn't go unnoticed.
                         */
                        hid_err(hdev, "long read: %d > %zd\n",
                                ret, read_length - count + ret);
                        ret = -EIO;
                        goto power_normal;
                }
        }

        /* return the number of transferred messages */
        ret = num;

power_normal:
        hid_hw_power(hdev, PM_HINT_NORMAL);
        hid_dbg(hdev, "I2C transfer finished: %d\n", ret);
        return ret;
}

static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
                       unsigned short flags, char read_write, u8 command,
                       int size, union i2c_smbus_data *data)
{
        struct cp2112_device *dev = (struct cp2112_device *)adap->algo_data;
        struct hid_device *hdev = dev->hdev;
        u8 buf[64];
        __le16 word;
        ssize_t count;
        size_t read_length = 0;
        unsigned int retries;
        int ret;

        hid_dbg(hdev, "%s addr 0x%x flags 0x%x cmd 0x%x size %d\n",
                str_write_read(read_write == I2C_SMBUS_WRITE),
                addr, flags, command, size);

        switch (size) {
        case I2C_SMBUS_BYTE:
                read_length = 1;

                if (I2C_SMBUS_READ == read_write)
                        count = cp2112_read_req(buf, addr, read_length);
                else
                        count = cp2112_write_req(buf, addr, command, NULL,
                                                 0);
                break;
        case I2C_SMBUS_BYTE_DATA:
                read_length = 1;

                if (I2C_SMBUS_READ == read_write)
                        count = cp2112_write_read_req(buf, addr, read_length,
                                                      command, NULL, 0);
                else
                        count = cp2112_write_req(buf, addr, command,
                                                 &data->byte, 1);
                break;
        case I2C_SMBUS_WORD_DATA:
                read_length = 2;
                word = cpu_to_le16(data->word);

                if (I2C_SMBUS_READ == read_write)
                        count = cp2112_write_read_req(buf, addr, read_length,
                                                      command, NULL, 0);
                else
                        count = cp2112_write_req(buf, addr, command,
                                                 (u8 *)&word, 2);
                break;
        case I2C_SMBUS_PROC_CALL:
                size = I2C_SMBUS_WORD_DATA;
                read_write = I2C_SMBUS_READ;
                read_length = 2;
                word = cpu_to_le16(data->word);

                count = cp2112_write_read_req(buf, addr, read_length, command,
                                              (u8 *)&word, 2);
                break;
        case I2C_SMBUS_I2C_BLOCK_DATA:
                if (read_write == I2C_SMBUS_READ) {
                        read_length = data->block[0];
                        count = cp2112_write_read_req(buf, addr, read_length,
                                                      command, NULL, 0);
                } else {
                        count = cp2112_write_req(buf, addr, command,
                                                 data->block + 1,
                                                 data->block[0]);
                }
                break;
        case I2C_SMBUS_BLOCK_DATA:
                if (I2C_SMBUS_READ == read_write) {
                        count = cp2112_write_read_req(buf, addr,
                                                      I2C_SMBUS_BLOCK_MAX,
                                                      command, NULL, 0);
                } else {
                        count = cp2112_write_req(buf, addr, command,
                                                 data->block,
                                                 data->block[0] + 1);
                }
                break;
        case I2C_SMBUS_BLOCK_PROC_CALL:
                size = I2C_SMBUS_BLOCK_DATA;
                read_write = I2C_SMBUS_READ;

                count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX,
                                              command, data->block,
                                              data->block[0] + 1);
                break;
        default:
                hid_warn(hdev, "Unsupported transaction %d\n", size);
                return -EOPNOTSUPP;
        }

        if (count < 0)
                return count;

        ret = hid_hw_power(hdev, PM_HINT_FULLON);
        if (ret < 0) {
                hid_err(hdev, "power management error: %d\n", ret);
                return ret;
        }

        ret = cp2112_hid_output(hdev, buf, count, HID_OUTPUT_REPORT);
        if (ret < 0) {
                hid_warn(hdev, "Error starting transaction: %d\n", ret);
                goto power_normal;
        }

        for (retries = 0; retries < XFER_STATUS_RETRIES; ++retries) {
                ret = cp2112_xfer_status(dev);
                if (-EBUSY == ret)
                        continue;
                if (ret < 0)
                        goto power_normal;
                break;
        }

        if (XFER_STATUS_RETRIES <= retries) {
                hid_warn(hdev, "Transfer timed out, cancelling.\n");
                buf[0] = CP2112_CANCEL_TRANSFER;
                buf[1] = 0x01;

                ret = cp2112_hid_output(hdev, buf, 2, HID_OUTPUT_REPORT);
                if (ret < 0)
                        hid_warn(hdev, "Error cancelling transaction: %d\n",
                                 ret);

                ret = -ETIMEDOUT;
                goto power_normal;
        }

        if (I2C_SMBUS_WRITE == read_write) {
                ret = 0;
                goto power_normal;
        }

        if (I2C_SMBUS_BLOCK_DATA == size)
                read_length = ret;

        ret = cp2112_read(dev, buf, read_length);
        if (ret < 0)
                goto power_normal;
        if (ret != read_length) {
                hid_warn(hdev, "short read: %d < %zd\n", ret, read_length);
                ret = -EIO;
                goto power_normal;
        }

        switch (size) {
        case I2C_SMBUS_BYTE:
        case I2C_SMBUS_BYTE_DATA:
                data->byte = buf[0];
                break;
        case I2C_SMBUS_WORD_DATA:
                data->word = le16_to_cpup((__le16 *)buf);
                break;
        case I2C_SMBUS_I2C_BLOCK_DATA:
                if (read_length > I2C_SMBUS_BLOCK_MAX) {
                        ret = -EINVAL;
                        goto power_normal;
                }

                memcpy(data->block + 1, buf, read_length);
                break;
        case I2C_SMBUS_BLOCK_DATA:
                if (read_length > I2C_SMBUS_BLOCK_MAX) {
                        ret = -EPROTO;
                        goto power_normal;
                }

                memcpy(data->block, buf, read_length);
                break;
        }

        ret = 0;
power_normal:
        hid_hw_power(hdev, PM_HINT_NORMAL);
        hid_dbg(hdev, "transfer finished: %d\n", ret);
        return ret;
}

static u32 cp2112_functionality(struct i2c_adapter *adap)
{
        return I2C_FUNC_I2C |
                I2C_FUNC_SMBUS_BYTE |
                I2C_FUNC_SMBUS_BYTE_DATA |
                I2C_FUNC_SMBUS_WORD_DATA |
                I2C_FUNC_SMBUS_BLOCK_DATA |
                I2C_FUNC_SMBUS_I2C_BLOCK |
                I2C_FUNC_SMBUS_PROC_CALL |
                I2C_FUNC_SMBUS_BLOCK_PROC_CALL;
}

static const struct i2c_algorithm smbus_algorithm = {
        .master_xfer        = cp2112_i2c_xfer,
        .smbus_xfer        = cp2112_xfer,
        .functionality        = cp2112_functionality,
};

static int cp2112_get_usb_config(struct hid_device *hdev,
                                 struct cp2112_usb_config_report *cfg)
{
        int ret;

        ret = cp2112_hid_get(hdev, CP2112_USB_CONFIG, (u8 *)cfg, sizeof(*cfg),
                             HID_FEATURE_REPORT);
        if (ret != sizeof(*cfg)) {
                hid_err(hdev, "error reading usb config: %d\n", ret);
                if (ret < 0)
                        return ret;
                return -EIO;
        }

        return 0;
}

static int cp2112_set_usb_config(struct hid_device *hdev,
                                 struct cp2112_usb_config_report *cfg)
{
        int ret;

        BUG_ON(cfg->report != CP2112_USB_CONFIG);

        ret = cp2112_hid_output(hdev, (u8 *)cfg, sizeof(*cfg),
                                HID_FEATURE_REPORT);
        if (ret != sizeof(*cfg)) {
                hid_err(hdev, "error writing usb config: %d\n", ret);
                if (ret < 0)
                        return ret;
                return -EIO;
        }

        return 0;
}

static void chmod_sysfs_attrs(struct hid_device *hdev);

#define CP2112_CONFIG_ATTR(name, store, format, ...) \
static ssize_t name##_store(struct device *kdev, \
                            struct device_attribute *attr, const char *buf, \
                            size_t count) \
{ \
        struct hid_device *hdev = to_hid_device(kdev); \
        struct cp2112_usb_config_report cfg; \
        int ret = cp2112_get_usb_config(hdev, &cfg); \
        if (ret) \
                return ret; \
        store; \
        ret = cp2112_set_usb_config(hdev, &cfg); \
        if (ret) \
                return ret; \
        chmod_sysfs_attrs(hdev); \
        return count; \
} \
static ssize_t name##_show(struct device *kdev, \
                           struct device_attribute *attr, char *buf) \
{ \
        struct hid_device *hdev = to_hid_device(kdev); \
        struct cp2112_usb_config_report cfg; \
        int ret = cp2112_get_usb_config(hdev, &cfg); \
        if (ret) \
                return ret; \
        return sysfs_emit(buf, format, ##__VA_ARGS__); \
} \
static DEVICE_ATTR_RW(name);

CP2112_CONFIG_ATTR(vendor_id, ({
        u16 vid;

        if (sscanf(buf, "%hi", &vid) != 1)
                return -EINVAL;

        cfg.vid = cpu_to_le16(vid);
        cfg.mask = 0x01;
}), "0x%04x\n", le16_to_cpu(cfg.vid));

CP2112_CONFIG_ATTR(product_id, ({
        u16 pid;

        if (sscanf(buf, "%hi", &pid) != 1)
                return -EINVAL;

        cfg.pid = cpu_to_le16(pid);
        cfg.mask = 0x02;
}), "0x%04x\n", le16_to_cpu(cfg.pid));

CP2112_CONFIG_ATTR(max_power, ({
        int mA;

        if (sscanf(buf, "%i", &mA) != 1)
                return -EINVAL;

        cfg.max_power = (mA + 1) / 2;
        cfg.mask = 0x04;
}), "%u mA\n", cfg.max_power * 2);

CP2112_CONFIG_ATTR(power_mode, ({
        if (sscanf(buf, "%hhi", &cfg.power_mode) != 1)
                return -EINVAL;

        cfg.mask = 0x08;
}), "%u\n", cfg.power_mode);

CP2112_CONFIG_ATTR(release_version, ({
        if (sscanf(buf, "%hhi.%hhi", &cfg.release_major, &cfg.release_minor)
            != 2)
                return -EINVAL;

        cfg.mask = 0x10;
}), "%u.%u\n", cfg.release_major, cfg.release_minor);

#undef CP2112_CONFIG_ATTR

static ssize_t pstr_store(struct device *kdev, struct device_attribute *kattr,
                          const char *buf, size_t count, int number)
{
        struct hid_device *hdev = to_hid_device(kdev);
        struct cp2112_string_report report;
        int ret;

        memset(&report, 0, sizeof(report));

        ret = utf8s_to_utf16s(buf, count, UTF16_LITTLE_ENDIAN,
                              report.string, ARRAY_SIZE(report.string));
        report.report = number;
        report.length = ret * sizeof(report.string[0]) + 2;
        report.type = USB_DT_STRING;

        ret = cp2112_hid_output(hdev, &report.report, report.length + 1,
                                HID_FEATURE_REPORT);
        if (ret != report.length + 1) {
                hid_err(hdev, "error writing %s string: %d\n", kattr->attr.name,
                        ret);
                if (ret < 0)
                        return ret;
                return -EIO;
        }

        chmod_sysfs_attrs(hdev);
        return count;
}

static ssize_t pstr_show(struct device *kdev, struct device_attribute *kattr,
                         char *buf, int number)
{
        struct hid_device *hdev = to_hid_device(kdev);
        struct cp2112_string_report report;
        u8 length;
        int ret;

        ret = cp2112_hid_get(hdev, number, (u8 *)&report.contents,
                             sizeof(report.contents), HID_FEATURE_REPORT);
        if (ret < 3) {
                hid_err(hdev, "error reading %s string: %d\n", kattr->attr.name,
                        ret);
                if (ret < 0)
                        return ret;
                return -EIO;
        }

        if (report.length < 2) {
                hid_err(hdev, "invalid %s string length: %d\n",
                        kattr->attr.name, report.length);
                return -EIO;
        }

        length = report.length > ret - 1 ? ret - 1 : report.length;
        length = (length - 2) / sizeof(report.string[0]);
        ret = utf16s_to_utf8s(report.string, length, UTF16_LITTLE_ENDIAN, buf,
                              PAGE_SIZE - 1);
        buf[ret++] = '\n';
        return ret;
}

#define CP2112_PSTR_ATTR(name, _report) \
static ssize_t name##_store(struct device *kdev, struct device_attribute *kattr, \
                            const char *buf, size_t count) \
{ \
        return pstr_store(kdev, kattr, buf, count, _report); \
} \
static ssize_t name##_show(struct device *kdev, struct device_attribute *kattr, char *buf) \
{ \
        return pstr_show(kdev, kattr, buf, _report); \
} \
static DEVICE_ATTR_RW(name);

CP2112_PSTR_ATTR(manufacturer,        CP2112_MANUFACTURER_STRING);
CP2112_PSTR_ATTR(product,        CP2112_PRODUCT_STRING);
CP2112_PSTR_ATTR(serial,        CP2112_SERIAL_STRING);

#undef CP2112_PSTR_ATTR

static const struct attribute_group cp2112_attr_group = {
        .attrs = (struct attribute *[]){
                &dev_attr_vendor_id.attr,
                &dev_attr_product_id.attr,
                &dev_attr_max_power.attr,
                &dev_attr_power_mode.attr,
                &dev_attr_release_version.attr,
                &dev_attr_manufacturer.attr,
                &dev_attr_product.attr,
                &dev_attr_serial.attr,
                NULL
        }
};

/* Chmoding our sysfs attributes is simply a way to expose which fields in the
 * PROM have already been programmed. We do not depend on this preventing
 * writing to these attributes since the CP2112 will simply ignore writes to
 * already-programmed fields. This is why there is no sense in fixing this
 * racy behaviour.
 */
static void chmod_sysfs_attrs(struct hid_device *hdev)
{
        struct attribute **attr;
        u8 buf[2];
        int ret;

        ret = cp2112_hid_get(hdev, CP2112_LOCK_BYTE, buf, sizeof(buf),
                             HID_FEATURE_REPORT);
        if (ret != sizeof(buf)) {
                hid_err(hdev, "error reading lock byte: %d\n", ret);
                return;
        }

        for (attr = cp2112_attr_group.attrs; *attr; ++attr) {
                umode_t mode = (buf[1] & 1) ? 0644 : 0444;
                ret = sysfs_chmod_file(&hdev->dev.kobj, *attr, mode);
                if (ret < 0)
                        hid_err(hdev, "error chmoding sysfs file %s\n",
                                (*attr)->name);
                buf[1] >>= 1;
        }
}

static void cp2112_gpio_irq_ack(struct irq_data *d)
{
}

static void cp2112_gpio_irq_mask(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct cp2112_device *dev = gpiochip_get_data(gc);
        irq_hw_number_t hwirq = irqd_to_hwirq(d);

        __clear_bit(hwirq, &dev->irq_mask);
        gpiochip_disable_irq(gc, hwirq);
}

static void cp2112_gpio_irq_unmask(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct cp2112_device *dev = gpiochip_get_data(gc);
        irq_hw_number_t hwirq = irqd_to_hwirq(d);

        gpiochip_enable_irq(gc, hwirq);
        __set_bit(hwirq, &dev->irq_mask);
}

static void cp2112_gpio_poll_callback(struct work_struct *work)
{
        struct cp2112_device *dev = container_of(work, struct cp2112_device,
                                                 gpio_poll_worker.work);
        struct irq_data *d;
        u8 gpio_mask;
        u32 irq_type;
        int irq, virq, ret;

        ret = cp2112_gpio_get_all(&dev->gc);
        if (ret == -ENODEV) /* the hardware has been disconnected */
                return;
        if (ret < 0)
                goto exit;

        gpio_mask = ret;
        for_each_set_bit(virq, &dev->irq_mask, CP2112_GPIO_MAX_GPIO) {
                irq = irq_find_mapping(dev->gc.irq.domain, virq);
                if (!irq)
                        continue;

                d = irq_get_irq_data(irq);
                if (!d)
                        continue;

                irq_type = irqd_get_trigger_type(d);

                if (gpio_mask & BIT(virq)) {
                        /* Level High */

                        if (irq_type & IRQ_TYPE_LEVEL_HIGH)
                                handle_nested_irq(irq);

                        if ((irq_type & IRQ_TYPE_EDGE_RISING) &&
                            !(dev->gpio_prev_state & BIT(virq)))
                                handle_nested_irq(irq);
                } else {
                        /* Level Low */

                        if (irq_type & IRQ_TYPE_LEVEL_LOW)
                                handle_nested_irq(irq);

                        if ((irq_type & IRQ_TYPE_EDGE_FALLING) &&
                            (dev->gpio_prev_state & BIT(virq)))
                                handle_nested_irq(irq);
                }
        }

        dev->gpio_prev_state = gpio_mask;

exit:
        if (dev->gpio_poll)
                schedule_delayed_work(&dev->gpio_poll_worker, 10);
}


static unsigned int cp2112_gpio_irq_startup(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct cp2112_device *dev = gpiochip_get_data(gc);

        if (!dev->gpio_poll) {
                dev->gpio_poll = true;
                schedule_delayed_work(&dev->gpio_poll_worker, 0);
        }

        cp2112_gpio_irq_unmask(d);
        return 0;
}

static void cp2112_gpio_irq_shutdown(struct irq_data *d)
{
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct cp2112_device *dev = gpiochip_get_data(gc);

        cp2112_gpio_irq_mask(d);

        if (!dev->irq_mask) {
                dev->gpio_poll = false;
                cancel_delayed_work_sync(&dev->gpio_poll_worker);
        }
}

static int cp2112_gpio_irq_type(struct irq_data *d, unsigned int type)
{
        return 0;
}

static const struct irq_chip cp2112_gpio_irqchip = {
        .name = "cp2112-gpio",
        .irq_startup = cp2112_gpio_irq_startup,
        .irq_shutdown = cp2112_gpio_irq_shutdown,
        .irq_ack = cp2112_gpio_irq_ack,
        .irq_mask = cp2112_gpio_irq_mask,
        .irq_unmask = cp2112_gpio_irq_unmask,
        .irq_set_type = cp2112_gpio_irq_type,
        .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
        GPIOCHIP_IRQ_RESOURCE_HELPERS,
};

static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct cp2112_device *dev;
        u8 buf[3];
        struct cp2112_smbus_config_report config;
        struct gpio_irq_chip *girq;
        int ret;

        dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;

        dev->in_out_buffer = devm_kzalloc(&hdev->dev, CP2112_REPORT_MAX_LENGTH,
                                          GFP_KERNEL);
        if (!dev->in_out_buffer)
                return -ENOMEM;

        mutex_init(&dev->lock);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                return ret;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                return ret;
        }

        ret = hid_hw_open(hdev);
        if (ret) {
                hid_err(hdev, "hw open failed\n");
                goto err_hid_stop;
        }

        ret = hid_hw_power(hdev, PM_HINT_FULLON);
        if (ret < 0) {
                hid_err(hdev, "power management error: %d\n", ret);
                goto err_hid_close;
        }

        ret = cp2112_hid_get(hdev, CP2112_GET_VERSION_INFO, buf, sizeof(buf),
                             HID_FEATURE_REPORT);
        if (ret != sizeof(buf)) {
                hid_err(hdev, "error requesting version\n");
                if (ret >= 0)
                        ret = -EIO;
                goto err_power_normal;
        }

        hid_info(hdev, "Part Number: 0x%02X Device Version: 0x%02X\n",
                 buf[1], buf[2]);

        ret = cp2112_hid_get(hdev, CP2112_SMBUS_CONFIG, (u8 *)&config,
                             sizeof(config), HID_FEATURE_REPORT);
        if (ret != sizeof(config)) {
                hid_err(hdev, "error requesting SMBus config\n");
                if (ret >= 0)
                        ret = -EIO;
                goto err_power_normal;
        }

        config.retry_time = cpu_to_be16(1);

        ret = cp2112_hid_output(hdev, (u8 *)&config, sizeof(config),
                                HID_FEATURE_REPORT);
        if (ret != sizeof(config)) {
                hid_err(hdev, "error setting SMBus config\n");
                if (ret >= 0)
                        ret = -EIO;
                goto err_power_normal;
        }

        hid_set_drvdata(hdev, (void *)dev);
        dev->hdev                = hdev;
        dev->adap.owner                = THIS_MODULE;
        dev->adap.class                = I2C_CLASS_HWMON;
        dev->adap.algo                = &smbus_algorithm;
        dev->adap.algo_data        = dev;
        dev->adap.dev.parent        = &hdev->dev;
        snprintf(dev->adap.name, sizeof(dev->adap.name),
                 "CP2112 SMBus Bridge on hidraw%d",
                 ((struct hidraw *)hdev->hidraw)->minor);
        dev->hwversion = buf[2];
        init_waitqueue_head(&dev->wait);

        hid_device_io_start(hdev);
        ret = i2c_add_adapter(&dev->adap);
        hid_device_io_stop(hdev);

        if (ret) {
                hid_err(hdev, "error registering i2c adapter\n");
                goto err_power_normal;
        }

        hid_dbg(hdev, "adapter registered\n");

        dev->gc.label                        = "cp2112_gpio";
        dev->gc.direction_input                = cp2112_gpio_direction_input;
        dev->gc.direction_output        = cp2112_gpio_direction_output;
        dev->gc.set                        = cp2112_gpio_set;
        dev->gc.get                        = cp2112_gpio_get;
        dev->gc.base                        = -1;
        dev->gc.ngpio                        = CP2112_GPIO_MAX_GPIO;
        dev->gc.can_sleep                = 1;
        dev->gc.parent                        = &hdev->dev;

        girq = &dev->gc.irq;
        gpio_irq_chip_set_chip(girq, &cp2112_gpio_irqchip);
        /* The event comes from the outside so no parent handler */
        girq->parent_handler = NULL;
        girq->num_parents = 0;
        girq->parents = NULL;
        girq->default_type = IRQ_TYPE_NONE;
        girq->handler = handle_simple_irq;
        girq->threaded = true;

        INIT_DELAYED_WORK(&dev->gpio_poll_worker, cp2112_gpio_poll_callback);

        ret = gpiochip_add_data(&dev->gc, dev);
        if (ret < 0) {
                hid_err(hdev, "error registering gpio chip\n");
                goto err_free_i2c;
        }

        ret = sysfs_create_group(&hdev->dev.kobj, &cp2112_attr_group);
        if (ret < 0) {
                hid_err(hdev, "error creating sysfs attrs\n");
                goto err_gpiochip_remove;
        }

        chmod_sysfs_attrs(hdev);
        hid_hw_power(hdev, PM_HINT_NORMAL);

        return ret;

err_gpiochip_remove:
        gpiochip_remove(&dev->gc);
err_free_i2c:
        i2c_del_adapter(&dev->adap);
err_power_normal:
        hid_hw_power(hdev, PM_HINT_NORMAL);
err_hid_close:
        hid_hw_close(hdev);
err_hid_stop:
        hid_hw_stop(hdev);
        return ret;
}

static void cp2112_remove(struct hid_device *hdev)
{
        struct cp2112_device *dev = hid_get_drvdata(hdev);

        sysfs_remove_group(&hdev->dev.kobj, &cp2112_attr_group);
        i2c_del_adapter(&dev->adap);

        if (dev->gpio_poll) {
                dev->gpio_poll = false;
                cancel_delayed_work_sync(&dev->gpio_poll_worker);
        }

        gpiochip_remove(&dev->gc);
        /* i2c_del_adapter has finished removing all i2c devices from our
         * adapter. Well behaved devices should no longer call our cp2112_xfer
         * and should have waited for any pending calls to finish. It has also
         * waited for device_unregister(&adap->dev) to complete. Therefore we
         * can safely free our struct cp2112_device.
         */
        hid_hw_close(hdev);
        hid_hw_stop(hdev);
}

static int cp2112_raw_event(struct hid_device *hdev, struct hid_report *report,
                            u8 *data, int size)
{
        struct cp2112_device *dev = hid_get_drvdata(hdev);
        struct cp2112_xfer_status_report *xfer = (void *)data;

        switch (data[0]) {
        case CP2112_TRANSFER_STATUS_RESPONSE:
                hid_dbg(hdev, "xfer status: %02x %02x %04x %04x\n",
                        xfer->status0, xfer->status1,
                        be16_to_cpu(xfer->retries), be16_to_cpu(xfer->length));

                switch (xfer->status0) {
                case STATUS0_IDLE:
                        dev->xfer_status = -EAGAIN;
                        break;
                case STATUS0_BUSY:
                        dev->xfer_status = -EBUSY;
                        break;
                case STATUS0_COMPLETE:
                        dev->xfer_status = be16_to_cpu(xfer->length);
                        break;
                case STATUS0_ERROR:
                        switch (xfer->status1) {
                        case STATUS1_TIMEOUT_NACK:
                        case STATUS1_TIMEOUT_BUS:
                                dev->xfer_status = -ETIMEDOUT;
                                break;
                        default:
                                dev->xfer_status = -EIO;
                                break;
                        }
                        break;
                default:
                        dev->xfer_status = -EINVAL;
                        break;
                }

                atomic_set(&dev->xfer_avail, 1);
                break;
        case CP2112_DATA_READ_RESPONSE:
                hid_dbg(hdev, "read response: %02x %02x\n", data[1], data[2]);

                dev->read_length = data[2];
                if (dev->read_length > sizeof(dev->read_data))
                        dev->read_length = sizeof(dev->read_data);

                memcpy(dev->read_data, &data[3], dev->read_length);
                atomic_set(&dev->read_avail, 1);
                break;
        default:
                hid_err(hdev, "unknown report\n");

                return 0;
        }

        wake_up_interruptible(&dev->wait);
        return 1;
}

static struct hid_driver cp2112_driver = {
        .name                = "cp2112",
        .id_table        = cp2112_devices,
        .probe                = cp2112_probe,
        .remove                = cp2112_remove,
        .raw_event        = cp2112_raw_event,
};

module_hid_driver(cp2112_driver);
MODULE_DESCRIPTION("Silicon Labs HID USB to SMBus master bridge");
MODULE_AUTHOR("David Barksdale <dbarksdale@uplogix.com>");
MODULE_LICENSE("GPL");












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    5 







    5 







    5 









































































   47 


























































































































































   46 

   48 
   49 

   47 
   47 



   48 
   49 




















   49 








    5 

    5 



    5 



    5 
    5 
















    5 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
/*
 * mm/rmap.c - physical to virtual reverse mappings
 *
 * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
 * Released under the General Public License (GPL).
 *
 * Simple, low overhead reverse mapping scheme.
 * Please try to keep this thing as modular as possible.
 *
 * Provides methods for unmapping each kind of mapped page:
 * the anon methods track anonymous pages, and
 * the file methods track pages belonging to an inode.
 *
 * Original design by Rik van Riel <riel@conectiva.com.br> 2001
 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
 * Contributions by Hugh Dickins 2003, 2004
 */

/*
 * Lock ordering in mm:
 *
 * inode->i_rwsem        (while writing or truncating, not reading or faulting)
 *   mm->mmap_lock
 *     mapping->invalidate_lock (in filemap_fault)
 *       page->flags PG_locked (lock_page)
 *         hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
 *           vma_start_write
 *             mapping->i_mmap_rwsem
 *               anon_vma->rwsem
 *                 mm->page_table_lock or pte_lock
 *                   swap_lock (in swap_duplicate, swap_info_get)
 *                     mmlist_lock (in mmput, drain_mmlist and others)
 *                     mapping->private_lock (in block_dirty_folio)
 *                       folio_lock_memcg move_lock (in block_dirty_folio)
 *                         i_pages lock (widely used)
 *                           lruvec->lru_lock (in folio_lruvec_lock_irq)
 *                     inode->i_lock (in set_page_dirty's __mark_inode_dirty)
 *                     bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
 *                       sb_lock (within inode_lock in fs/fs-writeback.c)
 *                       i_pages lock (widely used, in set_page_dirty,
 *                                 in arch-dependent flush_dcache_mmap_lock,
 *                                 within bdi.wb->list_lock in __sync_single_inode)
 *
 * anon_vma->rwsem,mapping->i_mmap_rwsem   (memory_failure, collect_procs_anon)
 *   ->tasklist_lock
 *     pte map lock
 *
 * hugetlbfs PageHuge() take locks in this order:
 *   hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
 *     vma_lock (hugetlb specific lock for pmd_sharing)
 *       mapping->i_mmap_rwsem (also used for hugetlb pmd sharing)
 *         page->flags PG_locked (lock_page)
 */

#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/memcontrol.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
#include <linux/mm_inline.h>

#include <asm/tlbflush.h>

#define CREATE_TRACE_POINTS
#include <trace/events/tlb.h>
#include <trace/events/migrate.h>

#include "internal.h"

static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;

static inline struct anon_vma *anon_vma_alloc(void)
{
        struct anon_vma *anon_vma;

        anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
        if (anon_vma) {
                atomic_set(&anon_vma->refcount, 1);
                anon_vma->num_children = 0;
                anon_vma->num_active_vmas = 0;
                anon_vma->parent = anon_vma;
                /*
                 * Initialise the anon_vma root to point to itself. If called
                 * from fork, the root will be reset to the parents anon_vma.
                 */
                anon_vma->root = anon_vma;
        }

        return anon_vma;
}

static inline void anon_vma_free(struct anon_vma *anon_vma)
{
        VM_BUG_ON(atomic_read(&anon_vma->refcount));

        /*
         * Synchronize against folio_lock_anon_vma_read() such that
         * we can safely hold the lock without the anon_vma getting
         * freed.
         *
         * Relies on the full mb implied by the atomic_dec_and_test() from
         * put_anon_vma() against the acquire barrier implied by
         * down_read_trylock() from folio_lock_anon_vma_read(). This orders:
         *
         * folio_lock_anon_vma_read()        VS        put_anon_vma()
         *   down_read_trylock()                  atomic_dec_and_test()
         *   LOCK                                  MB
         *   atomic_read()                          rwsem_is_locked()
         *
         * LOCK should suffice since the actual taking of the lock must
         * happen _before_ what follows.
         */
        might_sleep();
        if (rwsem_is_locked(&anon_vma->root->rwsem)) {
                anon_vma_lock_write(anon_vma);
                anon_vma_unlock_write(anon_vma);
        }

        kmem_cache_free(anon_vma_cachep, anon_vma);
}

static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
{
        return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
}

static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
{
        kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
}

static void anon_vma_chain_link(struct vm_area_struct *vma,
                                struct anon_vma_chain *avc,
                                struct anon_vma *anon_vma)
{
        avc->vma = vma;
        avc->anon_vma = anon_vma;
        list_add(&avc->same_vma, &vma->anon_vma_chain);
        anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}

/**
 * __anon_vma_prepare - attach an anon_vma to a memory region
 * @vma: the memory region in question
 *
 * This makes sure the memory mapping described by 'vma' has
 * an 'anon_vma' attached to it, so that we can associate the
 * anonymous pages mapped into it with that anon_vma.
 *
 * The common case will be that we already have one, which
 * is handled inline by anon_vma_prepare(). But if
 * not we either need to find an adjacent mapping that we
 * can re-use the anon_vma from (very common when the only
 * reason for splitting a vma has been mprotect()), or we
 * allocate a new one.
 *
 * Anon-vma allocations are very subtle, because we may have
 * optimistically looked up an anon_vma in folio_lock_anon_vma_read()
 * and that may actually touch the rwsem even in the newly
 * allocated vma (it depends on RCU to make sure that the
 * anon_vma isn't actually destroyed).
 *
 * As a result, we need to do proper anon_vma locking even
 * for the new allocation. At the same time, we do not want
 * to do any locking for the common case of already having
 * an anon_vma.
 *
 * This must be called with the mmap_lock held for reading.
 */
int __anon_vma_prepare(struct vm_area_struct *vma)
{
        struct mm_struct *mm = vma->vm_mm;
        struct anon_vma *anon_vma, *allocated;
        struct anon_vma_chain *avc;

        might_sleep();

        avc = anon_vma_chain_alloc(GFP_KERNEL);
        if (!avc)
                goto out_enomem;

        anon_vma = find_mergeable_anon_vma(vma);
        allocated = NULL;
        if (!anon_vma) {
                anon_vma = anon_vma_alloc();
                if (unlikely(!anon_vma))
                        goto out_enomem_free_avc;
                anon_vma->num_children++; /* self-parent link for new root */
                allocated = anon_vma;
        }

        anon_vma_lock_write(anon_vma);
        /* page_table_lock to protect against threads */
        spin_lock(&mm->page_table_lock);
        if (likely(!vma->anon_vma)) {
                vma->anon_vma = anon_vma;
                anon_vma_chain_link(vma, avc, anon_vma);
                anon_vma->num_active_vmas++;
                allocated = NULL;
                avc = NULL;
        }
        spin_unlock(&mm->page_table_lock);
        anon_vma_unlock_write(anon_vma);

        if (unlikely(allocated))
                put_anon_vma(allocated);
        if (unlikely(avc))
                anon_vma_chain_free(avc);

        return 0;

 out_enomem_free_avc:
        anon_vma_chain_free(avc);
 out_enomem:
        return -ENOMEM;
}

/*
 * This is a useful helper function for locking the anon_vma root as
 * we traverse the vma->anon_vma_chain, looping over anon_vma's that
 * have the same vma.
 *
 * Such anon_vma's should have the same root, so you'd expect to see
 * just a single mutex_lock for the whole traversal.
 */
static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
{
        struct anon_vma *new_root = anon_vma->root;
        if (new_root != root) {
                if (WARN_ON_ONCE(root))
                        up_write(&root->rwsem);
                root = new_root;
                down_write(&root->rwsem);
        }
        return root;
}

static inline void unlock_anon_vma_root(struct anon_vma *root)
{
        if (root)
                up_write(&root->rwsem);
}

/*
 * Attach the anon_vmas from src to dst.
 * Returns 0 on success, -ENOMEM on failure.
 *
 * anon_vma_clone() is called by vma_expand(), vma_merge(), __split_vma(),
 * copy_vma() and anon_vma_fork(). The first four want an exact copy of src,
 * while the last one, anon_vma_fork(), may try to reuse an existing anon_vma to
 * prevent endless growth of anon_vma. Since dst->anon_vma is set to NULL before
 * call, we can identify this case by checking (!dst->anon_vma &&
 * src->anon_vma).
 *
 * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
 * and reuse existing anon_vma which has no vmas and only one child anon_vma.
 * This prevents degradation of anon_vma hierarchy to endless linear chain in
 * case of constantly forking task. On the other hand, an anon_vma with more
 * than one child isn't reused even if there was no alive vma, thus rmap
 * walker has a good chance of avoiding scanning the whole hierarchy when it
 * searches where page is mapped.
 */
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
        struct anon_vma_chain *avc, *pavc;
        struct anon_vma *root = NULL;

        list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
                struct anon_vma *anon_vma;

                avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
                if (unlikely(!avc)) {
                        unlock_anon_vma_root(root);
                        root = NULL;
                        avc = anon_vma_chain_alloc(GFP_KERNEL);
                        if (!avc)
                                goto enomem_failure;
                }
                anon_vma = pavc->anon_vma;
                root = lock_anon_vma_root(root, anon_vma);
                anon_vma_chain_link(dst, avc, anon_vma);

                /*
                 * Reuse existing anon_vma if it has no vma and only one
                 * anon_vma child.
                 *
                 * Root anon_vma is never reused:
                 * it has self-parent reference and at least one child.
                 */
                if (!dst->anon_vma && src->anon_vma &&
                    anon_vma->num_children < 2 &&
                    anon_vma->num_active_vmas == 0)
                        dst->anon_vma = anon_vma;
        }
        if (dst->anon_vma)
                dst->anon_vma->num_active_vmas++;
        unlock_anon_vma_root(root);
        return 0;

 enomem_failure:
        /*
         * dst->anon_vma is dropped here otherwise its num_active_vmas can
         * be incorrectly decremented in unlink_anon_vmas().
         * We can safely do this because callers of anon_vma_clone() don't care
         * about dst->anon_vma if anon_vma_clone() failed.
         */
        dst->anon_vma = NULL;
        unlink_anon_vmas(dst);
        return -ENOMEM;
}

/*
 * Attach vma to its own anon_vma, as well as to the anon_vmas that
 * the corresponding VMA in the parent process is attached to.
 * Returns 0 on success, non-zero on failure.
 */
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
{
        struct anon_vma_chain *avc;
        struct anon_vma *anon_vma;
        int error;

        /* Don't bother if the parent process has no anon_vma here. */
        if (!pvma->anon_vma)
                return 0;

        /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
        vma->anon_vma = NULL;

        /*
         * First, attach the new VMA to the parent VMA's anon_vmas,
         * so rmap can find non-COWed pages in child processes.
         */
        error = anon_vma_clone(vma, pvma);
        if (error)
                return error;

        /* An existing anon_vma has been reused, all done then. */
        if (vma->anon_vma)
                return 0;

        /* Then add our own anon_vma. */
        anon_vma = anon_vma_alloc();
        if (!anon_vma)
                goto out_error;
        anon_vma->num_active_vmas++;
        avc = anon_vma_chain_alloc(GFP_KERNEL);
        if (!avc)
                goto out_error_free_anon_vma;

        /*
         * The root anon_vma's rwsem is the lock actually used when we
         * lock any of the anon_vmas in this anon_vma tree.
         */
        anon_vma->root = pvma->anon_vma->root;
        anon_vma->parent = pvma->anon_vma;
        /*
         * With refcounts, an anon_vma can stay around longer than the
         * process it belongs to. The root anon_vma needs to be pinned until
         * this anon_vma is freed, because the lock lives in the root.
         */
        get_anon_vma(anon_vma->root);
        /* Mark this anon_vma as the one where our new (COWed) pages go. */
        vma->anon_vma = anon_vma;
        anon_vma_lock_write(anon_vma);
        anon_vma_chain_link(vma, avc, anon_vma);
        anon_vma->parent->num_children++;
        anon_vma_unlock_write(anon_vma);

        return 0;

 out_error_free_anon_vma:
        put_anon_vma(anon_vma);
 out_error:
        unlink_anon_vmas(vma);
        return -ENOMEM;
}

void unlink_anon_vmas(struct vm_area_struct *vma)
{
        struct anon_vma_chain *avc, *next;
        struct anon_vma *root = NULL;

        /*
         * Unlink each anon_vma chained to the VMA.  This list is ordered
         * from newest to oldest, ensuring the root anon_vma gets freed last.
         */
        list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                struct anon_vma *anon_vma = avc->anon_vma;

                root = lock_anon_vma_root(root, anon_vma);
                anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);

                /*
                 * Leave empty anon_vmas on the list - we'll need
                 * to free them outside the lock.
                 */
                if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
                        anon_vma->parent->num_children--;
                        continue;
                }

                list_del(&avc->same_vma);
                anon_vma_chain_free(avc);
        }
        if (vma->anon_vma) {
                vma->anon_vma->num_active_vmas--;

                /*
                 * vma would still be needed after unlink, and anon_vma will be prepared
                 * when handle fault.
                 */
                vma->anon_vma = NULL;
        }
        unlock_anon_vma_root(root);

        /*
         * Iterate the list once more, it now only contains empty and unlinked
         * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
         * needing to write-acquire the anon_vma->root->rwsem.
         */
        list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                struct anon_vma *anon_vma = avc->anon_vma;

                VM_WARN_ON(anon_vma->num_children);
                VM_WARN_ON(anon_vma->num_active_vmas);
                put_anon_vma(anon_vma);

                list_del(&avc->same_vma);
                anon_vma_chain_free(avc);
        }
}

static void anon_vma_ctor(void *data)
{
        struct anon_vma *anon_vma = data;

        init_rwsem(&anon_vma->rwsem);
        atomic_set(&anon_vma->refcount, 0);
        anon_vma->rb_root = RB_ROOT_CACHED;
}

void __init anon_vma_init(void)
{
        anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
                        0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
                        anon_vma_ctor);
        anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
                        SLAB_PANIC|SLAB_ACCOUNT);
}

/*
 * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
 *
 * Since there is no serialization what so ever against folio_remove_rmap_*()
 * the best this function can do is return a refcount increased anon_vma
 * that might have been relevant to this page.
 *
 * The page might have been remapped to a different anon_vma or the anon_vma
 * returned may already be freed (and even reused).
 *
 * In case it was remapped to a different anon_vma, the new anon_vma will be a
 * child of the old anon_vma, and the anon_vma lifetime rules will therefore
 * ensure that any anon_vma obtained from the page will still be valid for as
 * long as we observe page_mapped() [ hence all those page_mapped() tests ].
 *
 * All users of this function must be very careful when walking the anon_vma
 * chain and verify that the page in question is indeed mapped in it
 * [ something equivalent to page_mapped_in_vma() ].
 *
 * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
 * folio_remove_rmap_*() that the anon_vma pointer from page->mapping is valid
 * if there is a mapcount, we can dereference the anon_vma after observing
 * those.
 *
 * NOTE: the caller should normally hold folio lock when calling this.  If
 * not, the caller needs to double check the anon_vma didn't change after
 * taking the anon_vma lock for either read or write (UFFDIO_MOVE can modify it
 * concurrently without folio lock protection). See folio_lock_anon_vma_read()
 * which has already covered that, and comment above remap_pages().
 */
struct anon_vma *folio_get_anon_vma(struct folio *folio)
{
        struct anon_vma *anon_vma = NULL;
        unsigned long anon_mapping;

        rcu_read_lock();
        anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
        if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                goto out;
        if (!folio_mapped(folio))
                goto out;

        anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
        if (!atomic_inc_not_zero(&anon_vma->refcount)) {
                anon_vma = NULL;
                goto out;
        }

        /*
         * If this folio is still mapped, then its anon_vma cannot have been
         * freed.  But if it has been unmapped, we have no security against the
         * anon_vma structure being freed and reused (for another anon_vma:
         * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
         * above cannot corrupt).
         */
        if (!folio_mapped(folio)) {
                rcu_read_unlock();
                put_anon_vma(anon_vma);
                return NULL;
        }
out:
        rcu_read_unlock();

        return anon_vma;
}

/*
 * Similar to folio_get_anon_vma() except it locks the anon_vma.
 *
 * Its a little more complex as it tries to keep the fast path to a single
 * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
 * reference like with folio_get_anon_vma() and then block on the mutex
 * on !rwc->try_lock case.
 */
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
                                          struct rmap_walk_control *rwc)
{
        struct anon_vma *anon_vma = NULL;
        struct anon_vma *root_anon_vma;
        unsigned long anon_mapping;

retry:
        rcu_read_lock();
        anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
        if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                goto out;
        if (!folio_mapped(folio))
                goto out;

        anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
        root_anon_vma = READ_ONCE(anon_vma->root);
        if (down_read_trylock(&root_anon_vma->rwsem)) {
                /*
                 * folio_move_anon_rmap() might have changed the anon_vma as we
                 * might not hold the folio lock here.
                 */
                if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
                             anon_mapping)) {
                        up_read(&root_anon_vma->rwsem);
                        rcu_read_unlock();
                        goto retry;
                }

                /*
                 * If the folio is still mapped, then this anon_vma is still
                 * its anon_vma, and holding the mutex ensures that it will
                 * not go away, see anon_vma_free().
                 */
                if (!folio_mapped(folio)) {
                        up_read(&root_anon_vma->rwsem);
                        anon_vma = NULL;
                }
                goto out;
        }

        if (rwc && rwc->try_lock) {
                anon_vma = NULL;
                rwc->contended = true;
                goto out;
        }

        /* trylock failed, we got to sleep */
        if (!atomic_inc_not_zero(&anon_vma->refcount)) {
                anon_vma = NULL;
                goto out;
        }

        if (!folio_mapped(folio)) {
                rcu_read_unlock();
                put_anon_vma(anon_vma);
                return NULL;
        }

        /* we pinned the anon_vma, its safe to sleep */
        rcu_read_unlock();
        anon_vma_lock_read(anon_vma);

        /*
         * folio_move_anon_rmap() might have changed the anon_vma as we might
         * not hold the folio lock here.
         */
        if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
                     anon_mapping)) {
                anon_vma_unlock_read(anon_vma);
                put_anon_vma(anon_vma);
                anon_vma = NULL;
                goto retry;
        }

        if (atomic_dec_and_test(&anon_vma->refcount)) {
                /*
                 * Oops, we held the last refcount, release the lock
                 * and bail -- can't simply use put_anon_vma() because
                 * we'll deadlock on the anon_vma_lock_write() recursion.
                 */
                anon_vma_unlock_read(anon_vma);
                __put_anon_vma(anon_vma);
                anon_vma = NULL;
        }

        return anon_vma;

out:
        rcu_read_unlock();
        return anon_vma;
}

#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/*
 * Flush TLB entries for recently unmapped pages from remote CPUs. It is
 * important if a PTE was dirty when it was unmapped that it's flushed
 * before any IO is initiated on the page to prevent lost writes. Similarly,
 * it must be flushed before freeing to prevent data leakage.
 */
void try_to_unmap_flush(void)
{
        struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;

        if (!tlb_ubc->flush_required)
                return;

        arch_tlbbatch_flush(&tlb_ubc->arch);
        tlb_ubc->flush_required = false;
        tlb_ubc->writable = false;
}

/* Flush iff there are potentially writable TLB entries that can race with IO */
void try_to_unmap_flush_dirty(void)
{
        struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;

        if (tlb_ubc->writable)
                try_to_unmap_flush();
}

/*
 * Bits 0-14 of mm->tlb_flush_batched record pending generations.
 * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations.
 */
#define TLB_FLUSH_BATCH_FLUSHED_SHIFT        16
#define TLB_FLUSH_BATCH_PENDING_MASK                        \
        ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
#define TLB_FLUSH_BATCH_PENDING_LARGE                        \
        (TLB_FLUSH_BATCH_PENDING_MASK / 2)

static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
                                      unsigned long uaddr)
{
        struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
        int batch;
        bool writable = pte_dirty(pteval);

        if (!pte_accessible(mm, pteval))
                return;

        arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr);
        tlb_ubc->flush_required = true;

        /*
         * Ensure compiler does not re-order the setting of tlb_flush_batched
         * before the PTE is cleared.
         */
        barrier();
        batch = atomic_read(&mm->tlb_flush_batched);
retry:
        if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
                /*
                 * Prevent `pending' from catching up with `flushed' because of
                 * overflow.  Reset `pending' and `flushed' to be 1 and 0 if
                 * `pending' becomes large.
                 */
                if (!atomic_try_cmpxchg(&mm->tlb_flush_batched, &batch, 1))
                        goto retry;
        } else {
                atomic_inc(&mm->tlb_flush_batched);
        }

        /*
         * If the PTE was dirty then it's best to assume it's writable. The
         * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
         * before the page is queued for IO.
         */
        if (writable)
                tlb_ubc->writable = true;
}

/*
 * Returns true if the TLB flush should be deferred to the end of a batch of
 * unmap operations to reduce IPIs.
 */
static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
{
        if (!(flags & TTU_BATCH_FLUSH))
                return false;

        return arch_tlbbatch_should_defer(mm);
}

/*
 * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
 * releasing the PTL if TLB flushes are batched. It's possible for a parallel
 * operation such as mprotect or munmap to race between reclaim unmapping
 * the page and flushing the page. If this race occurs, it potentially allows
 * access to data via a stale TLB entry. Tracking all mm's that have TLB
 * batching in flight would be expensive during reclaim so instead track
 * whether TLB batching occurred in the past and if so then do a flush here
 * if required. This will cost one additional flush per reclaim cycle paid
 * by the first operation at risk such as mprotect and mumap.
 *
 * This must be called under the PTL so that an access to tlb_flush_batched
 * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
 * via the PTL.
 */
void flush_tlb_batched_pending(struct mm_struct *mm)
{
        int batch = atomic_read(&mm->tlb_flush_batched);
        int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK;
        int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;

        if (pending != flushed) {
                arch_flush_tlb_batched_pending(mm);
                /*
                 * If the new TLB flushing is pending during flushing, leave
                 * mm->tlb_flush_batched as is, to avoid losing flushing.
                 */
                atomic_cmpxchg(&mm->tlb_flush_batched, batch,
                               pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));
        }
}
#else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
                                      unsigned long uaddr)
{
}

static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
{
        return false;
}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */

/*
 * At what user virtual address is page expected in vma?
 * Caller should check the page is actually part of the vma.
 */
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
        struct folio *folio = page_folio(page);
        if (folio_test_anon(folio)) {
                struct anon_vma *page__anon_vma = folio_anon_vma(folio);
                /*
                 * Note: swapoff's unuse_vma() is more efficient with this
                 * check, and needs it to match anon_vma when KSM is active.
                 */
                if (!vma->anon_vma || !page__anon_vma ||
                    vma->anon_vma->root != page__anon_vma->root)
                        return -EFAULT;
        } else if (!vma->vm_file) {
                return -EFAULT;
        } else if (vma->vm_file->f_mapping != folio->mapping) {
                return -EFAULT;
        }

        return vma_address(page, vma);
}

/*
 * Returns the actual pmd_t* where we expect 'address' to be mapped from, or
 * NULL if it doesn't exist.  No guarantees / checks on what the pmd_t*
 * represents.
 */
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd = NULL;

        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
                goto out;

        p4d = p4d_offset(pgd, address);
        if (!p4d_present(*p4d))
                goto out;

        pud = pud_offset(p4d, address);
        if (!pud_present(*pud))
                goto out;

        pmd = pmd_offset(pud, address);
out:
        return pmd;
}

struct folio_referenced_arg {
        int mapcount;
        int referenced;
        unsigned long vm_flags;
        struct mem_cgroup *memcg;
};

/*
 * arg: folio_referenced_arg will be passed
 */
static bool folio_referenced_one(struct folio *folio,
                struct vm_area_struct *vma, unsigned long address, void *arg)
{
        struct folio_referenced_arg *pra = arg;
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
        int referenced = 0;
        unsigned long start = address, ptes = 0;

        while (page_vma_mapped_walk(&pvmw)) {
                address = pvmw.address;

                if (vma->vm_flags & VM_LOCKED) {
                        if (!folio_test_large(folio) || !pvmw.pte) {
                                /* Restore the mlock which got missed */
                                mlock_vma_folio(folio, vma);
                                page_vma_mapped_walk_done(&pvmw);
                                pra->vm_flags |= VM_LOCKED;
                                return false; /* To break the loop */
                        }
                        /*
                         * For large folio fully mapped to VMA, will
                         * be handled after the pvmw loop.
                         *
                         * For large folio cross VMA boundaries, it's
                         * expected to be picked  by page reclaim. But
                         * should skip reference of pages which are in
                         * the range of VM_LOCKED vma. As page reclaim
                         * should just count the reference of pages out
                         * the range of VM_LOCKED vma.
                         */
                        ptes++;
                        pra->mapcount--;
                        continue;
                }

                if (pvmw.pte) {
                        if (lru_gen_enabled() &&
                            pte_young(ptep_get(pvmw.pte))) {
                                lru_gen_look_around(&pvmw);
                                referenced++;
                        }

                        if (ptep_clear_flush_young_notify(vma, address,
                                                pvmw.pte))
                                referenced++;
                } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                        if (pmdp_clear_flush_young_notify(vma, address,
                                                pvmw.pmd))
                                referenced++;
                } else {
                        /* unexpected pmd-mapped folio? */
                        WARN_ON_ONCE(1);
                }

                pra->mapcount--;
        }

        if ((vma->vm_flags & VM_LOCKED) &&
                        folio_test_large(folio) &&
                        folio_within_vma(folio, vma)) {
                unsigned long s_align, e_align;

                s_align = ALIGN_DOWN(start, PMD_SIZE);
                e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);

                /* folio doesn't cross page table boundary and fully mapped */
                if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
                        /* Restore the mlock which got missed */
                        mlock_vma_folio(folio, vma);
                        pra->vm_flags |= VM_LOCKED;
                        return false; /* To break the loop */
                }
        }

        if (referenced)
                folio_clear_idle(folio);
        if (folio_test_clear_young(folio))
                referenced++;

        if (referenced) {
                pra->referenced++;
                pra->vm_flags |= vma->vm_flags & ~VM_LOCKED;
        }

        if (!pra->mapcount)
                return false; /* To break the loop */

        return true;
}

static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
{
        struct folio_referenced_arg *pra = arg;
        struct mem_cgroup *memcg = pra->memcg;

        /*
         * Ignore references from this mapping if it has no recency. If the
         * folio has been used in another mapping, we will catch it; if this
         * other mapping is already gone, the unmap path will have set the
         * referenced flag or activated the folio in zap_pte_range().
         */
        if (!vma_has_recency(vma))
                return true;

        /*
         * If we are reclaiming on behalf of a cgroup, skip counting on behalf
         * of references from different cgroups.
         */
        if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
                return true;

        return false;
}

/**
 * folio_referenced() - Test if the folio was referenced.
 * @folio: The folio to test.
 * @is_locked: Caller holds lock on the folio.
 * @memcg: target memory cgroup
 * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
 *
 * Quick test_and_clear_referenced for all mappings of a folio,
 *
 * Return: The number of mappings which referenced the folio. Return -1 if
 * the function bailed out due to rmap lock contention.
 */
int folio_referenced(struct folio *folio, int is_locked,
                     struct mem_cgroup *memcg, unsigned long *vm_flags)
{
        int we_locked = 0;
        struct folio_referenced_arg pra = {
                .mapcount = folio_mapcount(folio),
                .memcg = memcg,
        };
        struct rmap_walk_control rwc = {
                .rmap_one = folio_referenced_one,
                .arg = (void *)&pra,
                .anon_lock = folio_lock_anon_vma_read,
                .try_lock = true,
                .invalid_vma = invalid_folio_referenced_vma,
        };

        *vm_flags = 0;
        if (!pra.mapcount)
                return 0;

        if (!folio_raw_mapping(folio))
                return 0;

        if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) {
                we_locked = folio_trylock(folio);
                if (!we_locked)
                        return 1;
        }

        rmap_walk(folio, &rwc);
        *vm_flags = pra.vm_flags;

        if (we_locked)
                folio_unlock(folio);

        return rwc.contended ? -1 : pra.referenced;
}

static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
{
        int cleaned = 0;
        struct vm_area_struct *vma = pvmw->vma;
        struct mmu_notifier_range range;
        unsigned long address = pvmw->address;

        /*
         * We have to assume the worse case ie pmd for invalidation. Note that
         * the folio can not be freed from this function.
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0,
                                vma->vm_mm, address, vma_address_end(pvmw));
        mmu_notifier_invalidate_range_start(&range);

        while (page_vma_mapped_walk(pvmw)) {
                int ret = 0;

                address = pvmw->address;
                if (pvmw->pte) {
                        pte_t *pte = pvmw->pte;
                        pte_t entry = ptep_get(pte);

                        if (!pte_dirty(entry) && !pte_write(entry))
                                continue;

                        flush_cache_page(vma, address, pte_pfn(entry));
                        entry = ptep_clear_flush(vma, address, pte);
                        entry = pte_wrprotect(entry);
                        entry = pte_mkclean(entry);
                        set_pte_at(vma->vm_mm, address, pte, entry);
                        ret = 1;
                } else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
                        pmd_t *pmd = pvmw->pmd;
                        pmd_t entry;

                        if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
                                continue;

                        flush_cache_range(vma, address,
                                          address + HPAGE_PMD_SIZE);
                        entry = pmdp_invalidate(vma, address, pmd);
                        entry = pmd_wrprotect(entry);
                        entry = pmd_mkclean(entry);
                        set_pmd_at(vma->vm_mm, address, pmd, entry);
                        ret = 1;
#else
                        /* unexpected pmd-mapped folio? */
                        WARN_ON_ONCE(1);
#endif
                }

                if (ret)
                        cleaned++;
        }

        mmu_notifier_invalidate_range_end(&range);

        return cleaned;
}

static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,
                             unsigned long address, void *arg)
{
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);
        int *cleaned = arg;

        *cleaned += page_vma_mkclean_one(&pvmw);

        return true;
}

static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
{
        if (vma->vm_flags & VM_SHARED)
                return false;

        return true;
}

int folio_mkclean(struct folio *folio)
{
        int cleaned = 0;
        struct address_space *mapping;
        struct rmap_walk_control rwc = {
                .arg = (void *)&cleaned,
                .rmap_one = page_mkclean_one,
                .invalid_vma = invalid_mkclean_vma,
        };

        BUG_ON(!folio_test_locked(folio));

        if (!folio_mapped(folio))
                return 0;

        mapping = folio_mapping(folio);
        if (!mapping)
                return 0;

        rmap_walk(folio, &rwc);

        return cleaned;
}
EXPORT_SYMBOL_GPL(folio_mkclean);

/**
 * pfn_mkclean_range - Cleans the PTEs (including PMDs) mapped with range of
 *                     [@pfn, @pfn + @nr_pages) at the specific offset (@pgoff)
 *                     within the @vma of shared mappings. And since clean PTEs
 *                     should also be readonly, write protects them too.
 * @pfn: start pfn.
 * @nr_pages: number of physically contiguous pages srarting with @pfn.
 * @pgoff: page offset that the @pfn mapped with.
 * @vma: vma that @pfn mapped within.
 *
 * Returns the number of cleaned PTEs (including PMDs).
 */
int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
                      struct vm_area_struct *vma)
{
        struct page_vma_mapped_walk pvmw = {
                .pfn                = pfn,
                .nr_pages        = nr_pages,
                .pgoff                = pgoff,
                .vma                = vma,
                .flags                = PVMW_SYNC,
        };

        if (invalid_mkclean_vma(vma, NULL))
                return 0;

        pvmw.address = vma_pgoff_address(pgoff, nr_pages, vma);
        VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma);

        return page_vma_mkclean_one(&pvmw);
}

int folio_total_mapcount(struct folio *folio)
{
        int mapcount = folio_entire_mapcount(folio);
        int nr_pages;
        int i;

        /* In the common case, avoid the loop when no pages mapped by PTE */
        if (folio_nr_pages_mapped(folio) == 0)
                return mapcount;
        /*
         * Add all the PTE mappings of those pages mapped by PTE.
         * Limit the loop to folio_nr_pages_mapped()?
         * Perhaps: given all the raciness, that may be a good or a bad idea.
         */
        nr_pages = folio_nr_pages(folio);
        for (i = 0; i < nr_pages; i++)
                mapcount += atomic_read(&folio_page(folio, i)->_mapcount);

        /* But each of those _mapcounts was based on -1 */
        mapcount += nr_pages;
        return mapcount;
}

static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
                struct page *page, int nr_pages, enum rmap_level level,
                int *nr_pmdmapped)
{
        atomic_t *mapped = &folio->_nr_pages_mapped;
        int first, nr = 0;

        __folio_rmap_sanity_checks(folio, page, nr_pages, level);

        switch (level) {
        case RMAP_LEVEL_PTE:
                do {
                        first = atomic_inc_and_test(&page->_mapcount);
                        if (first && folio_test_large(folio)) {
                                first = atomic_inc_return_relaxed(mapped);
                                first = (first < ENTIRELY_MAPPED);
                        }

                        if (first)
                                nr++;
                } while (page++, --nr_pages > 0);
                break;
        case RMAP_LEVEL_PMD:
                first = atomic_inc_and_test(&folio->_entire_mapcount);
                if (first) {
                        nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
                        if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
                                *nr_pmdmapped = folio_nr_pages(folio);
                                nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
                                /* Raced ahead of a remove and another add? */
                                if (unlikely(nr < 0))
                                        nr = 0;
                        } else {
                                /* Raced ahead of a remove of ENTIRELY_MAPPED */
                                nr = 0;
                        }
                }
                break;
        }
        return nr;
}

/**
 * folio_move_anon_rmap - move a folio to our anon_vma
 * @folio:        The folio to move to our anon_vma
 * @vma:        The vma the folio belongs to
 *
 * When a folio belongs exclusively to one process after a COW event,
 * that folio can be moved into the anon_vma that belongs to just that
 * process, so the rmap code will not search the parent or sibling processes.
 */
void folio_move_anon_rmap(struct folio *folio, struct vm_area_struct *vma)
{
        void *anon_vma = vma->anon_vma;

        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_VMA(!anon_vma, vma);

        anon_vma += PAGE_MAPPING_ANON;
        /*
         * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
         * simultaneously, so a concurrent reader (eg folio_referenced()'s
         * folio_test_anon()) will not see one without the other.
         */
        WRITE_ONCE(folio->mapping, anon_vma);
}

/**
 * __folio_set_anon - set up a new anonymous rmap for a folio
 * @folio:        The folio to set up the new anonymous rmap for.
 * @vma:        VM area to add the folio to.
 * @address:        User virtual address of the mapping
 * @exclusive:        Whether the folio is exclusive to the process.
 */
static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma,
                             unsigned long address, bool exclusive)
{
        struct anon_vma *anon_vma = vma->anon_vma;

        BUG_ON(!anon_vma);

        /*
         * If the folio isn't exclusive to this vma, we must use the _oldest_
         * possible anon_vma for the folio mapping!
         */
        if (!exclusive)
                anon_vma = anon_vma->root;

        /*
         * page_idle does a lockless/optimistic rmap scan on folio->mapping.
         * Make sure the compiler doesn't split the stores of anon_vma and
         * the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
         * could mistake the mapping for a struct address_space and crash.
         */
        anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
        WRITE_ONCE(folio->mapping, (struct address_space *) anon_vma);
        folio->index = linear_page_index(vma, address);
}

/**
 * __page_check_anon_rmap - sanity check anonymous rmap addition
 * @folio:        The folio containing @page.
 * @page:        the page to check the mapping of
 * @vma:        the vm area in which the mapping is added
 * @address:        the user virtual address mapped
 */
static void __page_check_anon_rmap(struct folio *folio, struct page *page,
        struct vm_area_struct *vma, unsigned long address)
{
        /*
         * The page's anon-rmap details (mapping and index) are guaranteed to
         * be set up correctly at this point.
         *
         * We have exclusion against folio_add_anon_rmap_*() because the caller
         * always holds the page locked.
         *
         * We have exclusion against folio_add_new_anon_rmap because those pages
         * are initially only visible via the pagetables, and the pte is locked
         * over the call to folio_add_new_anon_rmap.
         */
        VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
                        folio);
        VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
                       page);
}

static __always_inline void __folio_add_anon_rmap(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *vma,
                unsigned long address, rmap_t flags, enum rmap_level level)
{
        int i, nr, nr_pmdmapped = 0;

        nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
        if (nr_pmdmapped)
                __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
        if (nr)
                __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);

        if (unlikely(!folio_test_anon(folio))) {
                VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
                /*
                 * For a PTE-mapped large folio, we only know that the single
                 * PTE is exclusive. Further, __folio_set_anon() might not get
                 * folio->index right when not given the address of the head
                 * page.
                 */
                VM_WARN_ON_FOLIO(folio_test_large(folio) &&
                                 level != RMAP_LEVEL_PMD, folio);
                __folio_set_anon(folio, vma, address,
                                 !!(flags & RMAP_EXCLUSIVE));
        } else if (likely(!folio_test_ksm(folio))) {
                __page_check_anon_rmap(folio, page, vma, address);
        }

        if (flags & RMAP_EXCLUSIVE) {
                switch (level) {
                case RMAP_LEVEL_PTE:
                        for (i = 0; i < nr_pages; i++)
                                SetPageAnonExclusive(page + i);
                        break;
                case RMAP_LEVEL_PMD:
                        SetPageAnonExclusive(page);
                        break;
                }
        }
        for (i = 0; i < nr_pages; i++) {
                struct page *cur_page = page + i;

                /* While PTE-mapping a THP we have a PMD and a PTE mapping. */
                VM_WARN_ON_FOLIO((atomic_read(&cur_page->_mapcount) > 0 ||
                                  (folio_test_large(folio) &&
                                   folio_entire_mapcount(folio) > 1)) &&
                                 PageAnonExclusive(cur_page), folio);
        }

        /*
         * For large folio, only mlock it if it's fully mapped to VMA. It's
         * not easy to check whether the large folio is fully mapped to VMA
         * here. Only mlock normal 4K folio and leave page reclaim to handle
         * large folio.
         */
        if (!folio_test_large(folio))
                mlock_vma_folio(folio, vma);
}

/**
 * folio_add_anon_rmap_ptes - add PTE mappings to a page range of an anon folio
 * @folio:        The folio to add the mappings to
 * @page:        The first page to add
 * @nr_pages:        The number of pages which will be mapped
 * @vma:        The vm area in which the mappings are added
 * @address:        The user virtual address of the first page to map
 * @flags:        The rmap flags
 *
 * The page range of folio is defined by [first_page, first_page + nr_pages)
 *
 * The caller needs to hold the page table lock, and the page must be locked in
 * the anon_vma case: to serialize mapping,index checking after setting,
 * and to ensure that an anon folio is not being upgraded racily to a KSM folio
 * (but KSM folios are never downgraded).
 */
void folio_add_anon_rmap_ptes(struct folio *folio, struct page *page,
                int nr_pages, struct vm_area_struct *vma, unsigned long address,
                rmap_t flags)
{
        __folio_add_anon_rmap(folio, page, nr_pages, vma, address, flags,
                              RMAP_LEVEL_PTE);
}

/**
 * folio_add_anon_rmap_pmd - add a PMD mapping to a page range of an anon folio
 * @folio:        The folio to add the mapping to
 * @page:        The first page to add
 * @vma:        The vm area in which the mapping is added
 * @address:        The user virtual address of the first page to map
 * @flags:        The rmap flags
 *
 * The page range of folio is defined by [first_page, first_page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock, and the page must be locked in
 * the anon_vma case: to serialize mapping,index checking after setting.
 */
void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
                struct vm_area_struct *vma, unsigned long address, rmap_t flags)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        __folio_add_anon_rmap(folio, page, HPAGE_PMD_NR, vma, address, flags,
                              RMAP_LEVEL_PMD);
#else
        WARN_ON_ONCE(true);
#endif
}

/**
 * folio_add_new_anon_rmap - Add mapping to a new anonymous folio.
 * @folio:        The folio to add the mapping to.
 * @vma:        the vm area in which the mapping is added
 * @address:        the user virtual address mapped
 *
 * Like folio_add_anon_rmap_*() but must only be called on *new* folios.
 * This means the inc-and-test can be bypassed.
 * The folio does not have to be locked.
 *
 * If the folio is pmd-mappable, it is accounted as a THP.  As the folio
 * is new, it's assumed to be mapped exclusively by a single process.
 */
void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
                unsigned long address)
{
        int nr = folio_nr_pages(folio);

        VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
        VM_BUG_ON_VMA(address < vma->vm_start ||
                        address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
        __folio_set_swapbacked(folio);
        __folio_set_anon(folio, vma, address, true);

        if (likely(!folio_test_large(folio))) {
                /* increment count (starts at -1) */
                atomic_set(&folio->_mapcount, 0);
                SetPageAnonExclusive(&folio->page);
        } else if (!folio_test_pmd_mappable(folio)) {
                int i;

                for (i = 0; i < nr; i++) {
                        struct page *page = folio_page(folio, i);

                        /* increment count (starts at -1) */
                        atomic_set(&page->_mapcount, 0);
                        SetPageAnonExclusive(page);
                }

                atomic_set(&folio->_nr_pages_mapped, nr);
        } else {
                /* increment count (starts at -1) */
                atomic_set(&folio->_entire_mapcount, 0);
                atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
                SetPageAnonExclusive(&folio->page);
                __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
        }

        __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
}

static __always_inline void __folio_add_file_rmap(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *vma,
                enum rmap_level level)
{
        int nr, nr_pmdmapped = 0;

        VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);

        nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
        if (nr_pmdmapped)
                __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
                        NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
        if (nr)
                __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);

        /* See comments in folio_add_anon_rmap_*() */
        if (!folio_test_large(folio))
                mlock_vma_folio(folio, vma);
}

/**
 * folio_add_file_rmap_ptes - add PTE mappings to a page range of a folio
 * @folio:        The folio to add the mappings to
 * @page:        The first page to add
 * @nr_pages:        The number of pages that will be mapped using PTEs
 * @vma:        The vm area in which the mappings are added
 *
 * The page range of the folio is defined by [page, page + nr_pages)
 *
 * The caller needs to hold the page table lock.
 */
void folio_add_file_rmap_ptes(struct folio *folio, struct page *page,
                int nr_pages, struct vm_area_struct *vma)
{
        __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
}

/**
 * folio_add_file_rmap_pmd - add a PMD mapping to a page range of a folio
 * @folio:        The folio to add the mapping to
 * @page:        The first page to add
 * @vma:        The vm area in which the mapping is added
 *
 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock.
 */
void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
                struct vm_area_struct *vma)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
#else
        WARN_ON_ONCE(true);
#endif
}

static __always_inline void __folio_remove_rmap(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *vma,
                enum rmap_level level)
{
        atomic_t *mapped = &folio->_nr_pages_mapped;
        int last, nr = 0, nr_pmdmapped = 0;
        enum node_stat_item idx;

        __folio_rmap_sanity_checks(folio, page, nr_pages, level);

        switch (level) {
        case RMAP_LEVEL_PTE:
                do {
                        last = atomic_add_negative(-1, &page->_mapcount);
                        if (last && folio_test_large(folio)) {
                                last = atomic_dec_return_relaxed(mapped);
                                last = (last < ENTIRELY_MAPPED);
                        }

                        if (last)
                                nr++;
                } while (page++, --nr_pages > 0);
                break;
        case RMAP_LEVEL_PMD:
                last = atomic_add_negative(-1, &folio->_entire_mapcount);
                if (last) {
                        nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
                        if (likely(nr < ENTIRELY_MAPPED)) {
                                nr_pmdmapped = folio_nr_pages(folio);
                                nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
                                /* Raced ahead of another remove and an add? */
                                if (unlikely(nr < 0))
                                        nr = 0;
                        } else {
                                /* An add of ENTIRELY_MAPPED raced ahead */
                                nr = 0;
                        }
                }
                break;
        }

        if (nr_pmdmapped) {
                if (folio_test_anon(folio))
                        idx = NR_ANON_THPS;
                else if (folio_test_swapbacked(folio))
                        idx = NR_SHMEM_PMDMAPPED;
                else
                        idx = NR_FILE_PMDMAPPED;
                __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
        }
        if (nr) {
                idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
                __lruvec_stat_mod_folio(folio, idx, -nr);

                /*
                 * Queue anon large folio for deferred split if at least one
                 * page of the folio is unmapped and at least one page
                 * is still mapped.
                 */
                if (folio_test_large(folio) && folio_test_anon(folio))
                        if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped)
                                deferred_split_folio(folio);
        }

        /*
         * It would be tidy to reset folio_test_anon mapping when fully
         * unmapped, but that might overwrite a racing folio_add_anon_rmap_*()
         * which increments mapcount after us but sets mapping before us:
         * so leave the reset to free_pages_prepare, and remember that
         * it's only reliable while mapped.
         */

        munlock_vma_folio(folio, vma);
}

/**
 * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
 * @folio:        The folio to remove the mappings from
 * @page:        The first page to remove
 * @nr_pages:        The number of pages that will be removed from the mapping
 * @vma:        The vm area from which the mappings are removed
 *
 * The page range of the folio is defined by [page, page + nr_pages)
 *
 * The caller needs to hold the page table lock.
 */
void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
                int nr_pages, struct vm_area_struct *vma)
{
        __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
}

/**
 * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
 * @folio:        The folio to remove the mapping from
 * @page:        The first page to remove
 * @vma:        The vm area from which the mapping is removed
 *
 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock.
 */
void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
                struct vm_area_struct *vma)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
#else
        WARN_ON_ONCE(true);
#endif
}

/*
 * @arg: enum ttu_flags will be passed to this argument
 */
static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                     unsigned long address, void *arg)
{
        struct mm_struct *mm = vma->vm_mm;
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
        pte_t pteval;
        struct page *subpage;
        bool anon_exclusive, ret = true;
        struct mmu_notifier_range range;
        enum ttu_flags flags = (enum ttu_flags)(long)arg;
        unsigned long pfn;
        unsigned long hsz = 0;

        /*
         * When racing against e.g. zap_pte_range() on another cpu,
         * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(),
         * try_to_unmap() may return before page_mapped() has become false,
         * if page table locking is skipped: use TTU_SYNC to wait for that.
         */
        if (flags & TTU_SYNC)
                pvmw.flags = PVMW_SYNC;

        if (flags & TTU_SPLIT_HUGE_PMD)
                split_huge_pmd_address(vma, address, false, folio);

        /*
         * For THP, we have to assume the worse case ie pmd for invalidation.
         * For hugetlb, it could be much worse if we need to do pud
         * invalidation in the case of pmd sharing.
         *
         * Note that the folio can not be freed in this function as call of
         * try_to_unmap() must hold a reference on the folio.
         */
        range.end = vma_address_end(&pvmw);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
                                address, range.end);
        if (folio_test_hugetlb(folio)) {
                /*
                 * If sharing is possible, start and end will be adjusted
                 * accordingly.
                 */
                adjust_range_if_pmd_sharing_possible(vma, &range.start,
                                                     &range.end);

                /* We need the huge page size for set_huge_pte_at() */
                hsz = huge_page_size(hstate_vma(vma));
        }
        mmu_notifier_invalidate_range_start(&range);

        while (page_vma_mapped_walk(&pvmw)) {
                /* Unexpected PMD-mapped THP? */
                VM_BUG_ON_FOLIO(!pvmw.pte, folio);

                /*
                 * If the folio is in an mlock()d vma, we must not swap it out.
                 */
                if (!(flags & TTU_IGNORE_MLOCK) &&
                    (vma->vm_flags & VM_LOCKED)) {
                        /* Restore the mlock which got missed */
                        if (!folio_test_large(folio))
                                mlock_vma_folio(folio, vma);
                        page_vma_mapped_walk_done(&pvmw);
                        ret = false;
                        break;
                }

                pfn = pte_pfn(ptep_get(pvmw.pte));
                subpage = folio_page(folio, pfn - folio_pfn(folio));
                address = pvmw.address;
                anon_exclusive = folio_test_anon(folio) &&
                                 PageAnonExclusive(subpage);

                if (folio_test_hugetlb(folio)) {
                        bool anon = folio_test_anon(folio);

                        /*
                         * The try_to_unmap() is only passed a hugetlb page
                         * in the case where the hugetlb page is poisoned.
                         */
                        VM_BUG_ON_PAGE(!PageHWPoison(subpage), subpage);
                        /*
                         * huge_pmd_unshare may unmap an entire PMD page.
                         * There is no way of knowing exactly which PMDs may
                         * be cached for this mm, so we must flush them all.
                         * start/end were already adjusted above to cover this
                         * range.
                         */
                        flush_cache_range(vma, range.start, range.end);

                        /*
                         * To call huge_pmd_unshare, i_mmap_rwsem must be
                         * held in write mode.  Caller needs to explicitly
                         * do this outside rmap routines.
                         *
                         * We also must hold hugetlb vma_lock in write mode.
                         * Lock order dictates acquiring vma_lock BEFORE
                         * i_mmap_rwsem.  We can only try lock here and fail
                         * if unsuccessful.
                         */
                        if (!anon) {
                                VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                                if (!hugetlb_vma_trylock_write(vma)) {
                                        page_vma_mapped_walk_done(&pvmw);
                                        ret = false;
                                        break;
                                }
                                if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
                                        hugetlb_vma_unlock_write(vma);
                                        flush_tlb_range(vma,
                                                range.start, range.end);
                                        /*
                                         * The ref count of the PMD page was
                                         * dropped which is part of the way map
                                         * counting is done for shared PMDs.
                                         * Return 'true' here.  When there is
                                         * no other sharing, huge_pmd_unshare
                                         * returns false and we will unmap the
                                         * actual page and drop map count
                                         * to zero.
                                         */
                                        page_vma_mapped_walk_done(&pvmw);
                                        break;
                                }
                                hugetlb_vma_unlock_write(vma);
                        }
                        pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
                } else {
                        flush_cache_page(vma, address, pfn);
                        /* Nuke the page table entry. */
                        if (should_defer_flush(mm, flags)) {
                                /*
                                 * We clear the PTE but do not flush so potentially
                                 * a remote CPU could still be writing to the folio.
                                 * If the entry was previously clean then the
                                 * architecture must guarantee that a clear->dirty
                                 * transition on a cached TLB entry is written through
                                 * and traps if the PTE is unmapped.
                                 */
                                pteval = ptep_get_and_clear(mm, address, pvmw.pte);

                                set_tlb_ubc_flush_pending(mm, pteval, address);
                        } else {
                                pteval = ptep_clear_flush(vma, address, pvmw.pte);
                        }
                }

                /*
                 * Now the pte is cleared. If this pte was uffd-wp armed,
                 * we may want to replace a none pte with a marker pte if
                 * it's file-backed, so we don't lose the tracking info.
                 */
                pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);

                /* Set the dirty flag on the folio now the pte is gone. */
                if (pte_dirty(pteval))
                        folio_mark_dirty(folio);

                /* Update high watermark before we lower rss */
                update_hiwater_rss(mm);

                if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
                        pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                        if (folio_test_hugetlb(folio)) {
                                hugetlb_count_sub(folio_nr_pages(folio), mm);
                                set_huge_pte_at(mm, address, pvmw.pte, pteval,
                                                hsz);
                        } else {
                                dec_mm_counter(mm, mm_counter(folio));
                                set_pte_at(mm, address, pvmw.pte, pteval);
                        }

                } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
                        /*
                         * The guest indicated that the page content is of no
                         * interest anymore. Simply discard the pte, vmscan
                         * will take care of the rest.
                         * A future reference will then fault in a new zero
                         * page. When userfaultfd is active, we must not drop
                         * this page though, as its main user (postcopy
                         * migration) will not expect userfaults on already
                         * copied pages.
                         */
                        dec_mm_counter(mm, mm_counter(folio));
                } else if (folio_test_anon(folio)) {
                        swp_entry_t entry = page_swap_entry(subpage);
                        pte_t swp_pte;
                        /*
                         * Store the swap location in the pte.
                         * See handle_pte_fault() ...
                         */
                        if (unlikely(folio_test_swapbacked(folio) !=
                                        folio_test_swapcache(folio))) {
                                WARN_ON_ONCE(1);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }

                        /* MADV_FREE page check */
                        if (!folio_test_swapbacked(folio)) {
                                int ref_count, map_count;

                                /*
                                 * Synchronize with gup_pte_range():
                                 * - clear PTE; barrier; read refcount
                                 * - inc refcount; barrier; read PTE
                                 */
                                smp_mb();

                                ref_count = folio_ref_count(folio);
                                map_count = folio_mapcount(folio);

                                /*
                                 * Order reads for page refcount and dirty flag
                                 * (see comments in __remove_mapping()).
                                 */
                                smp_rmb();

                                /*
                                 * The only page refs must be one from isolation
                                 * plus the rmap(s) (dropped by discard:).
                                 */
                                if (ref_count == 1 + map_count &&
                                    !folio_test_dirty(folio)) {
                                        dec_mm_counter(mm, MM_ANONPAGES);
                                        goto discard;
                                }

                                /*
                                 * If the folio was redirtied, it cannot be
                                 * discarded. Remap the page to page table.
                                 */
                                set_pte_at(mm, address, pvmw.pte, pteval);
                                folio_set_swapbacked(folio);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }

                        if (swap_duplicate(entry) < 0) {
                                set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }
                        if (arch_unmap_one(mm, vma, address, pteval) < 0) {
                                swap_free(entry);
                                set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }

                        /* See folio_try_share_anon_rmap(): clear PTE first. */
                        if (anon_exclusive &&
                            folio_try_share_anon_rmap_pte(folio, subpage)) {
                                swap_free(entry);
                                set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }
                        if (list_empty(&mm->mmlist)) {
                                spin_lock(&mmlist_lock);
                                if (list_empty(&mm->mmlist))
                                        list_add(&mm->mmlist, &init_mm.mmlist);
                                spin_unlock(&mmlist_lock);
                        }
                        dec_mm_counter(mm, MM_ANONPAGES);
                        inc_mm_counter(mm, MM_SWAPENTS);
                        swp_pte = swp_entry_to_pte(entry);
                        if (anon_exclusive)
                                swp_pte = pte_swp_mkexclusive(swp_pte);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
                        if (pte_uffd_wp(pteval))
                                swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, address, pvmw.pte, swp_pte);
                } else {
                        /*
                         * This is a locked file-backed folio,
                         * so it cannot be removed from the page
                         * cache and replaced by a new folio before
                         * mmu_notifier_invalidate_range_end, so no
                         * concurrent thread might update its page table
                         * to point at a new folio while a device is
                         * still using this folio.
                         *
                         * See Documentation/mm/mmu_notifier.rst
                         */
                        dec_mm_counter(mm, mm_counter_file(folio));
                }
discard:
                if (unlikely(folio_test_hugetlb(folio)))
                        hugetlb_remove_rmap(folio);
                else
                        folio_remove_rmap_pte(folio, subpage, vma);
                if (vma->vm_flags & VM_LOCKED)
                        mlock_drain_local();
                folio_put(folio);
        }

        mmu_notifier_invalidate_range_end(&range);

        return ret;
}

static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
{
        return vma_is_temporary_stack(vma);
}

static int folio_not_mapped(struct folio *folio)
{
        return !folio_mapped(folio);
}

/**
 * try_to_unmap - Try to remove all page table mappings to a folio.
 * @folio: The folio to unmap.
 * @flags: action and flags
 *
 * Tries to remove all the page table entries which are mapping this
 * folio.  It is the caller's responsibility to check if the folio is
 * still mapped if needed (use TTU_SYNC to prevent accounting races).
 *
 * Context: Caller must hold the folio lock.
 */
void try_to_unmap(struct folio *folio, enum ttu_flags flags)
{
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
                .arg = (void *)flags,
                .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
        };

        if (flags & TTU_RMAP_LOCKED)
                rmap_walk_locked(folio, &rwc);
        else
                rmap_walk(folio, &rwc);
}

/*
 * @arg: enum ttu_flags will be passed to this argument.
 *
 * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
 * containing migration entries.
 */
static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                     unsigned long address, void *arg)
{
        struct mm_struct *mm = vma->vm_mm;
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
        pte_t pteval;
        struct page *subpage;
        bool anon_exclusive, ret = true;
        struct mmu_notifier_range range;
        enum ttu_flags flags = (enum ttu_flags)(long)arg;
        unsigned long pfn;
        unsigned long hsz = 0;

        /*
         * When racing against e.g. zap_pte_range() on another cpu,
         * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(),
         * try_to_migrate() may return before page_mapped() has become false,
         * if page table locking is skipped: use TTU_SYNC to wait for that.
         */
        if (flags & TTU_SYNC)
                pvmw.flags = PVMW_SYNC;

        /*
         * unmap_page() in mm/huge_memory.c is the only user of migration with
         * TTU_SPLIT_HUGE_PMD and it wants to freeze.
         */
        if (flags & TTU_SPLIT_HUGE_PMD)
                split_huge_pmd_address(vma, address, true, folio);

        /*
         * For THP, we have to assume the worse case ie pmd for invalidation.
         * For hugetlb, it could be much worse if we need to do pud
         * invalidation in the case of pmd sharing.
         *
         * Note that the page can not be free in this function as call of
         * try_to_unmap() must hold a reference on the page.
         */
        range.end = vma_address_end(&pvmw);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
                                address, range.end);
        if (folio_test_hugetlb(folio)) {
                /*
                 * If sharing is possible, start and end will be adjusted
                 * accordingly.
                 */
                adjust_range_if_pmd_sharing_possible(vma, &range.start,
                                                     &range.end);

                /* We need the huge page size for set_huge_pte_at() */
                hsz = huge_page_size(hstate_vma(vma));
        }
        mmu_notifier_invalidate_range_start(&range);

        while (page_vma_mapped_walk(&pvmw)) {
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
                /* PMD-mapped THP migration entry */
                if (!pvmw.pte) {
                        subpage = folio_page(folio,
                                pmd_pfn(*pvmw.pmd) - folio_pfn(folio));
                        VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
                                        !folio_test_pmd_mappable(folio), folio);

                        if (set_pmd_migration_entry(&pvmw, subpage)) {
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }
                        continue;
                }
#endif

                /* Unexpected PMD-mapped THP? */
                VM_BUG_ON_FOLIO(!pvmw.pte, folio);

                pfn = pte_pfn(ptep_get(pvmw.pte));

                if (folio_is_zone_device(folio)) {
                        /*
                         * Our PTE is a non-present device exclusive entry and
                         * calculating the subpage as for the common case would
                         * result in an invalid pointer.
                         *
                         * Since only PAGE_SIZE pages can currently be
                         * migrated, just set it to page. This will need to be
                         * changed when hugepage migrations to device private
                         * memory are supported.
                         */
                        VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
                        subpage = &folio->page;
                } else {
                        subpage = folio_page(folio, pfn - folio_pfn(folio));
                }
                address = pvmw.address;
                anon_exclusive = folio_test_anon(folio) &&
                                 PageAnonExclusive(subpage);

                if (folio_test_hugetlb(folio)) {
                        bool anon = folio_test_anon(folio);

                        /*
                         * huge_pmd_unshare may unmap an entire PMD page.
                         * There is no way of knowing exactly which PMDs may
                         * be cached for this mm, so we must flush them all.
                         * start/end were already adjusted above to cover this
                         * range.
                         */
                        flush_cache_range(vma, range.start, range.end);

                        /*
                         * To call huge_pmd_unshare, i_mmap_rwsem must be
                         * held in write mode.  Caller needs to explicitly
                         * do this outside rmap routines.
                         *
                         * We also must hold hugetlb vma_lock in write mode.
                         * Lock order dictates acquiring vma_lock BEFORE
                         * i_mmap_rwsem.  We can only try lock here and
                         * fail if unsuccessful.
                         */
                        if (!anon) {
                                VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                                if (!hugetlb_vma_trylock_write(vma)) {
                                        page_vma_mapped_walk_done(&pvmw);
                                        ret = false;
                                        break;
                                }
                                if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
                                        hugetlb_vma_unlock_write(vma);
                                        flush_tlb_range(vma,
                                                range.start, range.end);

                                        /*
                                         * The ref count of the PMD page was
                                         * dropped which is part of the way map
                                         * counting is done for shared PMDs.
                                         * Return 'true' here.  When there is
                                         * no other sharing, huge_pmd_unshare
                                         * returns false and we will unmap the
                                         * actual page and drop map count
                                         * to zero.
                                         */
                                        page_vma_mapped_walk_done(&pvmw);
                                        break;
                                }
                                hugetlb_vma_unlock_write(vma);
                        }
                        /* Nuke the hugetlb page table entry */
                        pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
                } else {
                        flush_cache_page(vma, address, pfn);
                        /* Nuke the page table entry. */
                        if (should_defer_flush(mm, flags)) {
                                /*
                                 * We clear the PTE but do not flush so potentially
                                 * a remote CPU could still be writing to the folio.
                                 * If the entry was previously clean then the
                                 * architecture must guarantee that a clear->dirty
                                 * transition on a cached TLB entry is written through
                                 * and traps if the PTE is unmapped.
                                 */
                                pteval = ptep_get_and_clear(mm, address, pvmw.pte);

                                set_tlb_ubc_flush_pending(mm, pteval, address);
                        } else {
                                pteval = ptep_clear_flush(vma, address, pvmw.pte);
                        }
                }

                /* Set the dirty flag on the folio now the pte is gone. */
                if (pte_dirty(pteval))
                        folio_mark_dirty(folio);

                /* Update high watermark before we lower rss */
                update_hiwater_rss(mm);

                if (folio_is_device_private(folio)) {
                        unsigned long pfn = folio_pfn(folio);
                        swp_entry_t entry;
                        pte_t swp_pte;

                        if (anon_exclusive)
                                WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio,
                                                                           subpage));

                        /*
                         * Store the pfn of the page in a special migration
                         * pte. do_swap_page() will wait until the migration
                         * pte is removed and then restart fault handling.
                         */
                        entry = pte_to_swp_entry(pteval);
                        if (is_writable_device_private_entry(entry))
                                entry = make_writable_migration_entry(pfn);
                        else if (anon_exclusive)
                                entry = make_readable_exclusive_migration_entry(pfn);
                        else
                                entry = make_readable_migration_entry(pfn);
                        swp_pte = swp_entry_to_pte(entry);

                        /*
                         * pteval maps a zone device page and is therefore
                         * a swap pte.
                         */
                        if (pte_swp_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
                        if (pte_swp_uffd_wp(pteval))
                                swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
                        trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
                                                folio_order(folio));
                        /*
                         * No need to invalidate here it will synchronize on
                         * against the special swap migration pte.
                         */
                } else if (PageHWPoison(subpage)) {
                        pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                        if (folio_test_hugetlb(folio)) {
                                hugetlb_count_sub(folio_nr_pages(folio), mm);
                                set_huge_pte_at(mm, address, pvmw.pte, pteval,
                                                hsz);
                        } else {
                                dec_mm_counter(mm, mm_counter(folio));
                                set_pte_at(mm, address, pvmw.pte, pteval);
                        }

                } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
                        /*
                         * The guest indicated that the page content is of no
                         * interest anymore. Simply discard the pte, vmscan
                         * will take care of the rest.
                         * A future reference will then fault in a new zero
                         * page. When userfaultfd is active, we must not drop
                         * this page though, as its main user (postcopy
                         * migration) will not expect userfaults on already
                         * copied pages.
                         */
                        dec_mm_counter(mm, mm_counter(folio));
                } else {
                        swp_entry_t entry;
                        pte_t swp_pte;

                        if (arch_unmap_one(mm, vma, address, pteval) < 0) {
                                if (folio_test_hugetlb(folio))
                                        set_huge_pte_at(mm, address, pvmw.pte,
                                                        pteval, hsz);
                                else
                                        set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }
                        VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
                                       !anon_exclusive, subpage);

                        /* See folio_try_share_anon_rmap_pte(): clear PTE first. */
                        if (folio_test_hugetlb(folio)) {
                                if (anon_exclusive &&
                                    hugetlb_try_share_anon_rmap(folio)) {
                                        set_huge_pte_at(mm, address, pvmw.pte,
                                                        pteval, hsz);
                                        ret = false;
                                        page_vma_mapped_walk_done(&pvmw);
                                        break;
                                }
                        } else if (anon_exclusive &&
                                   folio_try_share_anon_rmap_pte(folio, subpage)) {
                                set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;
                        }

                        /*
                         * Store the pfn of the page in a special migration
                         * pte. do_swap_page() will wait until the migration
                         * pte is removed and then restart fault handling.
                         */
                        if (pte_write(pteval))
                                entry = make_writable_migration_entry(
                                                        page_to_pfn(subpage));
                        else if (anon_exclusive)
                                entry = make_readable_exclusive_migration_entry(
                                                        page_to_pfn(subpage));
                        else
                                entry = make_readable_migration_entry(
                                                        page_to_pfn(subpage));
                        if (pte_young(pteval))
                                entry = make_migration_entry_young(entry);
                        if (pte_dirty(pteval))
                                entry = make_migration_entry_dirty(entry);
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
                        if (pte_uffd_wp(pteval))
                                swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        if (folio_test_hugetlb(folio))
                                set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
                                                hsz);
                        else
                                set_pte_at(mm, address, pvmw.pte, swp_pte);
                        trace_set_migration_pte(address, pte_val(swp_pte),
                                                folio_order(folio));
                        /*
                         * No need to invalidate here it will synchronize on
                         * against the special swap migration pte.
                         */
                }

                if (unlikely(folio_test_hugetlb(folio)))
                        hugetlb_remove_rmap(folio);
                else
                        folio_remove_rmap_pte(folio, subpage, vma);
                if (vma->vm_flags & VM_LOCKED)
                        mlock_drain_local();
                folio_put(folio);
        }

        mmu_notifier_invalidate_range_end(&range);

        return ret;
}

/**
 * try_to_migrate - try to replace all page table mappings with swap entries
 * @folio: the folio to replace page table entries for
 * @flags: action and flags
 *
 * Tries to remove all the page table entries which are mapping this folio and
 * replace them with special swap entries. Caller must hold the folio lock.
 */
void try_to_migrate(struct folio *folio, enum ttu_flags flags)
{
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_migrate_one,
                .arg = (void *)flags,
                .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
        };

        /*
         * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
         * TTU_SPLIT_HUGE_PMD, TTU_SYNC, and TTU_BATCH_FLUSH flags.
         */
        if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
                                        TTU_SYNC | TTU_BATCH_FLUSH)))
                return;

        if (folio_is_zone_device(folio) &&
            (!folio_is_device_private(folio) && !folio_is_device_coherent(folio)))
                return;

        /*
         * During exec, a temporary VMA is setup and later moved.
         * The VMA is moved under the anon_vma lock but not the
         * page tables leading to a race where migration cannot
         * find the migration ptes. Rather than increasing the
         * locking requirements of exec(), migration skips
         * temporary VMAs until after exec() completes.
         */
        if (!folio_test_ksm(folio) && folio_test_anon(folio))
                rwc.invalid_vma = invalid_migration_vma;

        if (flags & TTU_RMAP_LOCKED)
                rmap_walk_locked(folio, &rwc);
        else
                rmap_walk(folio, &rwc);
}

#ifdef CONFIG_DEVICE_PRIVATE
struct make_exclusive_args {
        struct mm_struct *mm;
        unsigned long address;
        void *owner;
        bool valid;
};

static bool page_make_device_exclusive_one(struct folio *folio,
                struct vm_area_struct *vma, unsigned long address, void *priv)
{
        struct mm_struct *mm = vma->vm_mm;
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
        struct make_exclusive_args *args = priv;
        pte_t pteval;
        struct page *subpage;
        bool ret = true;
        struct mmu_notifier_range range;
        swp_entry_t entry;
        pte_t swp_pte;
        pte_t ptent;

        mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
                                      vma->vm_mm, address, min(vma->vm_end,
                                      address + folio_size(folio)),
                                      args->owner);
        mmu_notifier_invalidate_range_start(&range);

        while (page_vma_mapped_walk(&pvmw)) {
                /* Unexpected PMD-mapped THP? */
                VM_BUG_ON_FOLIO(!pvmw.pte, folio);

                ptent = ptep_get(pvmw.pte);
                if (!pte_present(ptent)) {
                        ret = false;
                        page_vma_mapped_walk_done(&pvmw);
                        break;
                }

                subpage = folio_page(folio,
                                pte_pfn(ptent) - folio_pfn(folio));
                address = pvmw.address;

                /* Nuke the page table entry. */
                flush_cache_page(vma, address, pte_pfn(ptent));
                pteval = ptep_clear_flush(vma, address, pvmw.pte);

                /* Set the dirty flag on the folio now the pte is gone. */
                if (pte_dirty(pteval))
                        folio_mark_dirty(folio);

                /*
                 * Check that our target page is still mapped at the expected
                 * address.
                 */
                if (args->mm == mm && args->address == address &&
                    pte_write(pteval))
                        args->valid = true;

                /*
                 * Store the pfn of the page in a special migration
                 * pte. do_swap_page() will wait until the migration
                 * pte is removed and then restart fault handling.
                 */
                if (pte_write(pteval))
                        entry = make_writable_device_exclusive_entry(
                                                        page_to_pfn(subpage));
                else
                        entry = make_readable_device_exclusive_entry(
                                                        page_to_pfn(subpage));
                swp_pte = swp_entry_to_pte(entry);
                if (pte_soft_dirty(pteval))
                        swp_pte = pte_swp_mksoft_dirty(swp_pte);
                if (pte_uffd_wp(pteval))
                        swp_pte = pte_swp_mkuffd_wp(swp_pte);

                set_pte_at(mm, address, pvmw.pte, swp_pte);

                /*
                 * There is a reference on the page for the swap entry which has
                 * been removed, so shouldn't take another.
                 */
                folio_remove_rmap_pte(folio, subpage, vma);
        }

        mmu_notifier_invalidate_range_end(&range);

        return ret;
}

/**
 * folio_make_device_exclusive - Mark the folio exclusively owned by a device.
 * @folio: The folio to replace page table entries for.
 * @mm: The mm_struct where the folio is expected to be mapped.
 * @address: Address where the folio is expected to be mapped.
 * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
 *
 * Tries to remove all the page table entries which are mapping this
 * folio and replace them with special device exclusive swap entries to
 * grant a device exclusive access to the folio.
 *
 * Context: Caller must hold the folio lock.
 * Return: false if the page is still mapped, or if it could not be unmapped
 * from the expected address. Otherwise returns true (success).
 */
static bool folio_make_device_exclusive(struct folio *folio,
                struct mm_struct *mm, unsigned long address, void *owner)
{
        struct make_exclusive_args args = {
                .mm = mm,
                .address = address,
                .owner = owner,
                .valid = false,
        };
        struct rmap_walk_control rwc = {
                .rmap_one = page_make_device_exclusive_one,
                .done = folio_not_mapped,
                .anon_lock = folio_lock_anon_vma_read,
                .arg = &args,
        };

        /*
         * Restrict to anonymous folios for now to avoid potential writeback
         * issues.
         */
        if (!folio_test_anon(folio))
                return false;

        rmap_walk(folio, &rwc);

        return args.valid && !folio_mapcount(folio);
}

/**
 * make_device_exclusive_range() - Mark a range for exclusive use by a device
 * @mm: mm_struct of associated target process
 * @start: start of the region to mark for exclusive device access
 * @end: end address of region
 * @pages: returns the pages which were successfully marked for exclusive access
 * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
 *
 * Returns: number of pages found in the range by GUP. A page is marked for
 * exclusive access only if the page pointer is non-NULL.
 *
 * This function finds ptes mapping page(s) to the given address range, locks
 * them and replaces mappings with special swap entries preventing userspace CPU
 * access. On fault these entries are replaced with the original mapping after
 * calling MMU notifiers.
 *
 * A driver using this to program access from a device must use a mmu notifier
 * critical section to hold a device specific lock during programming. Once
 * programming is complete it should drop the page lock and reference after
 * which point CPU access to the page will revoke the exclusive access.
 */
int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
                                unsigned long end, struct page **pages,
                                void *owner)
{
        long npages = (end - start) >> PAGE_SHIFT;
        long i;

        npages = get_user_pages_remote(mm, start, npages,
                                       FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
                                       pages, NULL);
        if (npages < 0)
                return npages;

        for (i = 0; i < npages; i++, start += PAGE_SIZE) {
                struct folio *folio = page_folio(pages[i]);
                if (PageTail(pages[i]) || !folio_trylock(folio)) {
                        folio_put(folio);
                        pages[i] = NULL;
                        continue;
                }

                if (!folio_make_device_exclusive(folio, mm, start, owner)) {
                        folio_unlock(folio);
                        folio_put(folio);
                        pages[i] = NULL;
                }
        }

        return npages;
}
EXPORT_SYMBOL_GPL(make_device_exclusive_range);
#endif

void __put_anon_vma(struct anon_vma *anon_vma)
{
        struct anon_vma *root = anon_vma->root;

        anon_vma_free(anon_vma);
        if (root != anon_vma && atomic_dec_and_test(&root->refcount))
                anon_vma_free(root);
}

static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
                                            struct rmap_walk_control *rwc)
{
        struct anon_vma *anon_vma;

        if (rwc->anon_lock)
                return rwc->anon_lock(folio, rwc);

        /*
         * Note: remove_migration_ptes() cannot use folio_lock_anon_vma_read()
         * because that depends on page_mapped(); but not all its usages
         * are holding mmap_lock. Users without mmap_lock are required to
         * take a reference count to prevent the anon_vma disappearing
         */
        anon_vma = folio_anon_vma(folio);
        if (!anon_vma)
                return NULL;

        if (anon_vma_trylock_read(anon_vma))
                goto out;

        if (rwc->try_lock) {
                anon_vma = NULL;
                rwc->contended = true;
                goto out;
        }

        anon_vma_lock_read(anon_vma);
out:
        return anon_vma;
}

/*
 * rmap_walk_anon - do something to anonymous page using the object-based
 * rmap method
 * @folio: the folio to be handled
 * @rwc: control variable according to each walk type
 * @locked: caller holds relevant rmap lock
 *
 * Find all the mappings of a folio using the mapping pointer and the vma
 * chains contained in the anon_vma struct it points to.
 */
static void rmap_walk_anon(struct folio *folio,
                struct rmap_walk_control *rwc, bool locked)
{
        struct anon_vma *anon_vma;
        pgoff_t pgoff_start, pgoff_end;
        struct anon_vma_chain *avc;

        if (locked) {
                anon_vma = folio_anon_vma(folio);
                /* anon_vma disappear under us? */
                VM_BUG_ON_FOLIO(!anon_vma, folio);
        } else {
                anon_vma = rmap_walk_anon_lock(folio, rwc);
        }
        if (!anon_vma)
                return;

        pgoff_start = folio_pgoff(folio);
        pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
        anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
                        pgoff_start, pgoff_end) {
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(&folio->page, vma);

                VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();

                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                        continue;

                if (!rwc->rmap_one(folio, vma, address, rwc->arg))
                        break;
                if (rwc->done && rwc->done(folio))
                        break;
        }

        if (!locked)
                anon_vma_unlock_read(anon_vma);
}

/*
 * rmap_walk_file - do something to file page using the object-based rmap method
 * @folio: the folio to be handled
 * @rwc: control variable according to each walk type
 * @locked: caller holds relevant rmap lock
 *
 * Find all the mappings of a folio using the mapping pointer and the vma chains
 * contained in the address_space struct it points to.
 */
static void rmap_walk_file(struct folio *folio,
                struct rmap_walk_control *rwc, bool locked)
{
        struct address_space *mapping = folio_mapping(folio);
        pgoff_t pgoff_start, pgoff_end;
        struct vm_area_struct *vma;

        /*
         * The page lock not only makes sure that page->mapping cannot
         * suddenly be NULLified by truncation, it makes sure that the
         * structure at mapping cannot be freed and reused yet,
         * so we can safely take mapping->i_mmap_rwsem.
         */
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

        if (!mapping)
                return;

        pgoff_start = folio_pgoff(folio);
        pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
        if (!locked) {
                if (i_mmap_trylock_read(mapping))
                        goto lookup;

                if (rwc->try_lock) {
                        rwc->contended = true;
                        return;
                }

                i_mmap_lock_read(mapping);
        }
lookup:
        vma_interval_tree_foreach(vma, &mapping->i_mmap,
                        pgoff_start, pgoff_end) {
                unsigned long address = vma_address(&folio->page, vma);

                VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();

                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                        continue;

                if (!rwc->rmap_one(folio, vma, address, rwc->arg))
                        goto done;
                if (rwc->done && rwc->done(folio))
                        goto done;
        }

done:
        if (!locked)
                i_mmap_unlock_read(mapping);
}

void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc)
{
        if (unlikely(folio_test_ksm(folio)))
                rmap_walk_ksm(folio, rwc);
        else if (folio_test_anon(folio))
                rmap_walk_anon(folio, rwc, false);
        else
                rmap_walk_file(folio, rwc, false);
}

/* Like rmap_walk, but caller holds relevant rmap lock */
void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc)
{
        /* no ksm support for now */
        VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio);
        if (folio_test_anon(folio))
                rmap_walk_anon(folio, rwc, true);
        else
                rmap_walk_file(folio, rwc, true);
}

#ifdef CONFIG_HUGETLB_PAGE
/*
 * The following two functions are for anonymous (private mapped) hugepages.
 * Unlike common anonymous pages, anonymous hugepages have no accounting code
 * and no lru code, because we handle hugepages differently from common pages.
 */
void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
                unsigned long address, rmap_t flags)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

        atomic_inc(&folio->_entire_mapcount);
        if (flags & RMAP_EXCLUSIVE)
                SetPageAnonExclusive(&folio->page);
        VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
                         PageAnonExclusive(&folio->page), folio);
}

void hugetlb_add_new_anon_rmap(struct folio *folio,
                struct vm_area_struct *vma, unsigned long address)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);

        BUG_ON(address < vma->vm_start || address >= vma->vm_end);
        /* increment count (starts at -1) */
        atomic_set(&folio->_entire_mapcount, 0);
        folio_clear_hugetlb_restore_reserve(folio);
        __folio_set_anon(folio, vma, address, true);
        SetPageAnonExclusive(&folio->page);
}
#endif /* CONFIG_HUGETLB_PAGE */












































































































































































































































































    4 




























    4 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
// SPDX-License-Identifier: GPL-2.0
/*
 * transport_class.c - implementation of generic transport classes
 *                     using attribute_containers
 *
 * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com>
 *
 * The basic idea here is to allow any "device controller" (which
 * would most often be a Host Bus Adapter to use the services of one
 * or more tranport classes for performing transport specific
 * services.  Transport specific services are things that the generic
 * command layer doesn't want to know about (speed settings, line
 * condidtioning, etc), but which the user might be interested in.
 * Thus, the HBA's use the routines exported by the transport classes
 * to perform these functions.  The transport classes export certain
 * values to the user via sysfs using attribute containers.
 *
 * Note: because not every HBA will care about every transport
 * attribute, there's a many to one relationship that goes like this:
 *
 * transport class<-----attribute container<----class device
 *
 * Usually the attribute container is per-HBA, but the design doesn't
 * mandate that.  Although most of the services will be specific to
 * the actual external storage connection used by the HBA, the generic
 * transport class is framed entirely in terms of generic devices to
 * allow it to be used by any physical HBA in the system.
 */
#include <linux/export.h>
#include <linux/attribute_container.h>
#include <linux/transport_class.h>

static int transport_remove_classdev(struct attribute_container *cont,
                                     struct device *dev,
                                     struct device *classdev);

/**
 * transport_class_register - register an initial transport class
 *
 * @tclass:        a pointer to the transport class structure to be initialised
 *
 * The transport class contains an embedded class which is used to
 * identify it.  The caller should initialise this structure with
 * zeros and then generic class must have been initialised with the
 * actual transport class unique name.  There's a macro
 * DECLARE_TRANSPORT_CLASS() to do this (declared classes still must
 * be registered).
 *
 * Returns 0 on success or error on failure.
 */
int transport_class_register(struct transport_class *tclass)
{
        return class_register(&tclass->class);
}
EXPORT_SYMBOL_GPL(transport_class_register);

/**
 * transport_class_unregister - unregister a previously registered class
 *
 * @tclass: The transport class to unregister
 *
 * Must be called prior to deallocating the memory for the transport
 * class.
 */
void transport_class_unregister(struct transport_class *tclass)
{
        class_unregister(&tclass->class);
}
EXPORT_SYMBOL_GPL(transport_class_unregister);

static int anon_transport_dummy_function(struct transport_container *tc,
                                         struct device *dev,
                                         struct device *cdev)
{
        /* do nothing */
        return 0;
}

/**
 * anon_transport_class_register - register an anonymous class
 *
 * @atc: The anon transport class to register
 *
 * The anonymous transport class contains both a transport class and a
 * container.  The idea of an anonymous class is that it never
 * actually has any device attributes associated with it (and thus
 * saves on container storage).  So it can only be used for triggering
 * events.  Use prezero and then use DECLARE_ANON_TRANSPORT_CLASS() to
 * initialise the anon transport class storage.
 */
int anon_transport_class_register(struct anon_transport_class *atc)
{
        int error;
        atc->container.class = &atc->tclass.class;
        attribute_container_set_no_classdevs(&atc->container);
        error = attribute_container_register(&atc->container);
        if (error)
                return error;
        atc->tclass.setup = anon_transport_dummy_function;
        atc->tclass.remove = anon_transport_dummy_function;
        return 0;
}
EXPORT_SYMBOL_GPL(anon_transport_class_register);

/**
 * anon_transport_class_unregister - unregister an anon class
 *
 * @atc: Pointer to the anon transport class to unregister
 *
 * Must be called prior to deallocating the memory for the anon
 * transport class.
 */
void anon_transport_class_unregister(struct anon_transport_class *atc)
{
        if (unlikely(attribute_container_unregister(&atc->container)))
                BUG();
}
EXPORT_SYMBOL_GPL(anon_transport_class_unregister);

static int transport_setup_classdev(struct attribute_container *cont,
                                    struct device *dev,
                                    struct device *classdev)
{
        struct transport_class *tclass = class_to_transport_class(cont->class);
        struct transport_container *tcont = attribute_container_to_transport_container(cont);

        if (tclass->setup)
                tclass->setup(tcont, dev, classdev);

        return 0;
}

/**
 * transport_setup_device - declare a new dev for transport class association but don't make it visible yet.
 * @dev: the generic device representing the entity being added
 *
 * Usually, dev represents some component in the HBA system (either
 * the HBA itself or a device remote across the HBA bus).  This
 * routine is simply a trigger point to see if any set of transport
 * classes wishes to associate with the added device.  This allocates
 * storage for the class device and initialises it, but does not yet
 * add it to the system or add attributes to it (you do this with
 * transport_add_device).  If you have no need for a separate setup
 * and add operations, use transport_register_device (see
 * transport_class.h).
 */

void transport_setup_device(struct device *dev)
{
        attribute_container_add_device(dev, transport_setup_classdev);
}
EXPORT_SYMBOL_GPL(transport_setup_device);

static int transport_add_class_device(struct attribute_container *cont,
                                      struct device *dev,
                                      struct device *classdev)
{
        struct transport_class *tclass = class_to_transport_class(cont->class);
        int error = attribute_container_add_class_device(classdev);
        struct transport_container *tcont = 
                attribute_container_to_transport_container(cont);

        if (error)
                goto err_remove;

        if (tcont->statistics) {
                error = sysfs_create_group(&classdev->kobj, tcont->statistics);
                if (error)
                        goto err_del;
        }

        return 0;

err_del:
        attribute_container_class_device_del(classdev);
err_remove:
        if (tclass->remove)
                tclass->remove(tcont, dev, classdev);

        return error;
}


/**
 * transport_add_device - declare a new dev for transport class association
 *
 * @dev: the generic device representing the entity being added
 *
 * Usually, dev represents some component in the HBA system (either
 * the HBA itself or a device remote across the HBA bus).  This
 * routine is simply a trigger point used to add the device to the
 * system and register attributes for it.
 */
int transport_add_device(struct device *dev)
{
        return attribute_container_device_trigger_safe(dev,
                                        transport_add_class_device,
                                        transport_remove_classdev);
}
EXPORT_SYMBOL_GPL(transport_add_device);

static int transport_configure(struct attribute_container *cont,
                               struct device *dev,
                               struct device *cdev)
{
        struct transport_class *tclass = class_to_transport_class(cont->class);
        struct transport_container *tcont = attribute_container_to_transport_container(cont);

        if (tclass->configure)
                tclass->configure(tcont, dev, cdev);

        return 0;
}

/**
 * transport_configure_device - configure an already set up device
 *
 * @dev: generic device representing device to be configured
 *
 * The idea of configure is simply to provide a point within the setup
 * process to allow the transport class to extract information from a
 * device after it has been setup.  This is used in SCSI because we
 * have to have a setup device to begin using the HBA, but after we
 * send the initial inquiry, we use configure to extract the device
 * parameters.  The device need not have been added to be configured.
 */
void transport_configure_device(struct device *dev)
{
        attribute_container_device_trigger(dev, transport_configure);
}
EXPORT_SYMBOL_GPL(transport_configure_device);

static int transport_remove_classdev(struct attribute_container *cont,
                                     struct device *dev,
                                     struct device *classdev)
{
        struct transport_container *tcont = 
                attribute_container_to_transport_container(cont);
        struct transport_class *tclass = class_to_transport_class(cont->class);

        if (tclass->remove)
                tclass->remove(tcont, dev, classdev);

        if (tclass->remove != anon_transport_dummy_function) {
                if (tcont->statistics)
                        sysfs_remove_group(&classdev->kobj, tcont->statistics);
                attribute_container_class_device_del(classdev);
        }

        return 0;
}


/**
 * transport_remove_device - remove the visibility of a device
 *
 * @dev: generic device to remove
 *
 * This call removes the visibility of the device (to the user from
 * sysfs), but does not destroy it.  To eliminate a device entirely
 * you must also call transport_destroy_device.  If you don't need to
 * do remove and destroy as separate operations, use
 * transport_unregister_device() (see transport_class.h) which will
 * perform both calls for you.
 */
void transport_remove_device(struct device *dev)
{
        attribute_container_device_trigger(dev, transport_remove_classdev);
}
EXPORT_SYMBOL_GPL(transport_remove_device);

static void transport_destroy_classdev(struct attribute_container *cont,
                                      struct device *dev,
                                      struct device *classdev)
{
        struct transport_class *tclass = class_to_transport_class(cont->class);

        if (tclass->remove != anon_transport_dummy_function)
                put_device(classdev);
}


/**
 * transport_destroy_device - destroy a removed device
 *
 * @dev: device to eliminate from the transport class.
 *
 * This call triggers the elimination of storage associated with the
 * transport classdev.  Note: all it really does is relinquish a
 * reference to the classdev.  The memory will not be freed until the
 * last reference goes to zero.  Note also that the classdev retains a
 * reference count on dev, so dev too will remain for as long as the
 * transport class device remains around.
 */
void transport_destroy_device(struct device *dev)
{
        attribute_container_remove_device(dev, transport_destroy_classdev);
}
EXPORT_SYMBOL_GPL(transport_destroy_device);














































































































    8 


















































































































































































  237 



















   30 





   24 


   17 














    1 


    2 
    2 



    1 










  233 




  236 
  236 
















  233 

























  247 

  248 

  234 






    4 


  236 
  245 

   17 

  245 
  241 
  236 

  236 
  236 








  248 





  245 
  244 



  248 



  248 

  248 


  248 
  248 

  234 




  234 
  233 
  231 


  233 


  232 


  248 

  238 

    6 

  234 
  238 
  235 



  248 

























   14 
   14 
   14 
   14 



   14 
   14 







   14 












    8 
    8 


    8 





    5 



    2 









    8 


    8 







    2 




    2 












    4 

















    9 








    9 











    9 












    6 

    6 




    1 
    6 


    6 







   20 


   20 


   20 
   19 






    8 




    8 

    8 
















   22 



   22 


   22 

   22 






   22 




    2 
    2 
    2 
    2 

    1 

    1 



    1 
    1 





   22 
   22 




   22 















































    5 


    6 


    5 
    1 
    5 
    3 


    4 




    1 





    5 




    1 

    1 


































































   17 











   20 

   21 











    4 





   21 











   17 
   17 






   21 




















































   17 




















   17 




   17 




    4 
    4 



    4 
    4 





































































































































   10 

    9 























   10 














































   10 
  251 
   10 



























  255 


  254 





  255 





  255 

    1 



  255 
  251 

   13 


  255 
  255 

  255 
  254 
  249 


  255 
  253 


  254 
  254 











    5 


    5 



    5 















































































  134 




































































































































   15 






























  135 

  135 
  135 

  135 
  131 
    8 
   16 



   16 






































































































































































































  135 




























  134 





  135 

  135 


  135 

  134 



  135 







  134 


  134 


  135 



  134 



  135 

  102 
  135 









  135 

  135 
  129 



  135 









  130 






























































  134 


















   15 




  134 
  135 





















  134 


  134 

  135 


  134 





















  247 



  246 





  248 
  247 



  247 
   17 


  248 































































  236 


  237 






  237 

    6 





    6 

    6 
















   17 










































  236 





  237 
    6 

    6 

  236 
  233 

  233 

  232 


  232 

  214 



   10 










  237 







  233 




























































  233 


  232 





  233 


  233 





    9 















































  236 

  237 
  236 



















    6 




    6 



    6 



    6 

    6 






































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
// SPDX-License-Identifier: GPL-2.0+
/*
 * dummy_hcd.c -- Dummy/Loopback USB host and device emulator driver.
 *
 * Maintainer: Alan Stern <stern@rowland.harvard.edu>
 *
 * Copyright (C) 2003 David Brownell
 * Copyright (C) 2003-2005 Alan Stern
 */


/*
 * This exposes a device side "USB gadget" API, driven by requests to a
 * Linux-USB host controller driver.  USB traffic is simulated; there's
 * no need for USB hardware.  Use this with two other drivers:
 *
 *  - Gadget driver, responding to requests (device);
 *  - Host-side device driver, as already familiar in Linux.
 *
 * Having this all in one kernel can help some stages of development,
 * bypassing some hardware (and driver) issues.  UML could help too.
 *
 * Note: The emulation does not include isochronous transfers!
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/hrtimer.h>
#include <linux/list.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/usb.h>
#include <linux/usb/gadget.h>
#include <linux/usb/hcd.h>
#include <linux/scatterlist.h>

#include <asm/byteorder.h>
#include <linux/io.h>
#include <asm/irq.h>
#include <asm/unaligned.h>

#define DRIVER_DESC        "USB Host+Gadget Emulator"
#define DRIVER_VERSION        "02 May 2005"

#define POWER_BUDGET        500        /* in mA; use 8 for low-power port testing */
#define POWER_BUDGET_3        900        /* in mA */

#define DUMMY_TIMER_INT_NSECS        125000 /* 1 microframe */

static const char        driver_name[] = "dummy_hcd";
static const char        driver_desc[] = "USB Host+Gadget Emulator";

static const char        gadget_name[] = "dummy_udc";

MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_AUTHOR("David Brownell");
MODULE_LICENSE("GPL");

struct dummy_hcd_module_parameters {
        bool is_super_speed;
        bool is_high_speed;
        unsigned int num;
};

static struct dummy_hcd_module_parameters mod_data = {
        .is_super_speed = false,
        .is_high_speed = true,
        .num = 1,
};
module_param_named(is_super_speed, mod_data.is_super_speed, bool, S_IRUGO);
MODULE_PARM_DESC(is_super_speed, "true to simulate SuperSpeed connection");
module_param_named(is_high_speed, mod_data.is_high_speed, bool, S_IRUGO);
MODULE_PARM_DESC(is_high_speed, "true to simulate HighSpeed connection");
module_param_named(num, mod_data.num, uint, S_IRUGO);
MODULE_PARM_DESC(num, "number of emulated controllers");
/*-------------------------------------------------------------------------*/

/* gadget side driver data structres */
struct dummy_ep {
        struct list_head                queue;
        unsigned long                        last_io;        /* jiffies timestamp */
        struct usb_gadget                *gadget;
        const struct usb_endpoint_descriptor *desc;
        struct usb_ep                        ep;
        unsigned                        halted:1;
        unsigned                        wedged:1;
        unsigned                        already_seen:1;
        unsigned                        setup_stage:1;
        unsigned                        stream_en:1;
};

struct dummy_request {
        struct list_head                queue;                /* ep's requests */
        struct usb_request                req;
};

static inline struct dummy_ep *usb_ep_to_dummy_ep(struct usb_ep *_ep)
{
        return container_of(_ep, struct dummy_ep, ep);
}

static inline struct dummy_request *usb_request_to_dummy_request
                (struct usb_request *_req)
{
        return container_of(_req, struct dummy_request, req);
}

/*-------------------------------------------------------------------------*/

/*
 * Every device has ep0 for control requests, plus up to 30 more endpoints,
 * in one of two types:
 *
 *   - Configurable:  direction (in/out), type (bulk, iso, etc), and endpoint
 *     number can be changed.  Names like "ep-a" are used for this type.
 *
 *   - Fixed Function:  in other cases.  some characteristics may be mutable;
 *     that'd be hardware-specific.  Names like "ep12out-bulk" are used.
 *
 * Gadget drivers are responsible for not setting up conflicting endpoint
 * configurations, illegal or unsupported packet lengths, and so on.
 */

static const char ep0name[] = "ep0";

static const struct {
        const char *name;
        const struct usb_ep_caps caps;
} ep_info[] = {
#define EP_INFO(_name, _caps) \
        { \
                .name = _name, \
                .caps = _caps, \
        }

/* we don't provide isochronous endpoints since we don't support them */
#define TYPE_BULK_OR_INT        (USB_EP_CAPS_TYPE_BULK | USB_EP_CAPS_TYPE_INT)

        /* everyone has ep0 */
        EP_INFO(ep0name,
                USB_EP_CAPS(USB_EP_CAPS_TYPE_CONTROL, USB_EP_CAPS_DIR_ALL)),
        /* act like a pxa250: fifteen fixed function endpoints */
        EP_INFO("ep1in-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep2out-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)),
/*
        EP_INFO("ep3in-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep4out-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)),
*/
        EP_INFO("ep5in-int",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep6in-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep7out-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)),
/*
        EP_INFO("ep8in-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep9out-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)),
*/
        EP_INFO("ep10in-int",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep11in-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep12out-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)),
/*
        EP_INFO("ep13in-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep14out-iso",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_ISO, USB_EP_CAPS_DIR_OUT)),
*/
        EP_INFO("ep15in-int",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_INT, USB_EP_CAPS_DIR_IN)),

        /* or like sa1100: two fixed function endpoints */
        EP_INFO("ep1out-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep2in-bulk",
                USB_EP_CAPS(USB_EP_CAPS_TYPE_BULK, USB_EP_CAPS_DIR_IN)),

        /* and now some generic EPs so we have enough in multi config */
        EP_INFO("ep-aout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-bin",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep-cout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-dout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-ein",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep-fout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-gin",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep-hout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-iout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-jin",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep-kout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),
        EP_INFO("ep-lin",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_IN)),
        EP_INFO("ep-mout",
                USB_EP_CAPS(TYPE_BULK_OR_INT, USB_EP_CAPS_DIR_OUT)),

#undef EP_INFO
};

#define DUMMY_ENDPOINTS        ARRAY_SIZE(ep_info)

/*-------------------------------------------------------------------------*/

#define FIFO_SIZE                64

struct urbp {
        struct urb                *urb;
        struct list_head        urbp_list;
        struct sg_mapping_iter        miter;
        u32                        miter_started;
};


enum dummy_rh_state {
        DUMMY_RH_RESET,
        DUMMY_RH_SUSPENDED,
        DUMMY_RH_RUNNING
};

struct dummy_hcd {
        struct dummy                        *dum;
        enum dummy_rh_state                rh_state;
        struct hrtimer                        timer;
        u32                                port_status;
        u32                                old_status;
        unsigned long                        re_timeout;

        struct usb_device                *udev;
        struct list_head                urbp_list;
        struct urbp                        *next_frame_urbp;

        u32                                stream_en_ep;
        u8                                num_stream[30 / 2];

        unsigned                        active:1;
        unsigned                        old_active:1;
        unsigned                        resuming:1;
};

struct dummy {
        spinlock_t                        lock;

        /*
         * DEVICE/GADGET side support
         */
        struct dummy_ep                        ep[DUMMY_ENDPOINTS];
        int                                address;
        int                                callback_usage;
        struct usb_gadget                gadget;
        struct usb_gadget_driver        *driver;
        struct dummy_request                fifo_req;
        u8                                fifo_buf[FIFO_SIZE];
        u16                                devstatus;
        unsigned                        ints_enabled:1;
        unsigned                        udc_suspended:1;
        unsigned                        pullup:1;

        /*
         * HOST side support
         */
        struct dummy_hcd                *hs_hcd;
        struct dummy_hcd                *ss_hcd;
};

static inline struct dummy_hcd *hcd_to_dummy_hcd(struct usb_hcd *hcd)
{
        return (struct dummy_hcd *) (hcd->hcd_priv);
}

static inline struct usb_hcd *dummy_hcd_to_hcd(struct dummy_hcd *dum)
{
        return container_of((void *) dum, struct usb_hcd, hcd_priv);
}

static inline struct device *dummy_dev(struct dummy_hcd *dum)
{
        return dummy_hcd_to_hcd(dum)->self.controller;
}

static inline struct device *udc_dev(struct dummy *dum)
{
        return dum->gadget.dev.parent;
}

static inline struct dummy *ep_to_dummy(struct dummy_ep *ep)
{
        return container_of(ep->gadget, struct dummy, gadget);
}

static inline struct dummy_hcd *gadget_to_dummy_hcd(struct usb_gadget *gadget)
{
        struct dummy *dum = container_of(gadget, struct dummy, gadget);
        if (dum->gadget.speed == USB_SPEED_SUPER)
                return dum->ss_hcd;
        else
                return dum->hs_hcd;
}

static inline struct dummy *gadget_dev_to_dummy(struct device *dev)
{
        return container_of(dev, struct dummy, gadget.dev);
}

/*-------------------------------------------------------------------------*/

/* DEVICE/GADGET SIDE UTILITY ROUTINES */

/* called with spinlock held */
static void nuke(struct dummy *dum, struct dummy_ep *ep)
{
        while (!list_empty(&ep->queue)) {
                struct dummy_request        *req;

                req = list_entry(ep->queue.next, struct dummy_request, queue);
                list_del_init(&req->queue);
                req->req.status = -ESHUTDOWN;

                spin_unlock(&dum->lock);
                usb_gadget_giveback_request(&ep->ep, &req->req);
                spin_lock(&dum->lock);
        }
}

/* caller must hold lock */
static void stop_activity(struct dummy *dum)
{
        int i;

        /* prevent any more requests */
        dum->address = 0;

        /* The timer is left running so that outstanding URBs can fail */

        /* nuke any pending requests first, so driver i/o is quiesced */
        for (i = 0; i < DUMMY_ENDPOINTS; ++i)
                nuke(dum, &dum->ep[i]);

        /* driver now does any non-usb quiescing necessary */
}

/**
 * set_link_state_by_speed() - Sets the current state of the link according to
 *        the hcd speed
 * @dum_hcd: pointer to the dummy_hcd structure to update the link state for
 *
 * This function updates the port_status according to the link state and the
 * speed of the hcd.
 */
static void set_link_state_by_speed(struct dummy_hcd *dum_hcd)
{
        struct dummy *dum = dum_hcd->dum;

        if (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3) {
                if ((dum_hcd->port_status & USB_SS_PORT_STAT_POWER) == 0) {
                        dum_hcd->port_status = 0;
                } else if (!dum->pullup || dum->udc_suspended) {
                        /* UDC suspend must cause a disconnect */
                        dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION |
                                                USB_PORT_STAT_ENABLE);
                        if ((dum_hcd->old_status &
                             USB_PORT_STAT_CONNECTION) != 0)
                                dum_hcd->port_status |=
                                        (USB_PORT_STAT_C_CONNECTION << 16);
                } else {
                        /* device is connected and not suspended */
                        dum_hcd->port_status |= (USB_PORT_STAT_CONNECTION |
                                                 USB_PORT_STAT_SPEED_5GBPS) ;
                        if ((dum_hcd->old_status &
                             USB_PORT_STAT_CONNECTION) == 0)
                                dum_hcd->port_status |=
                                        (USB_PORT_STAT_C_CONNECTION << 16);
                        if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) &&
                            (dum_hcd->port_status &
                             USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U0 &&
                            dum_hcd->rh_state != DUMMY_RH_SUSPENDED)
                                dum_hcd->active = 1;
                }
        } else {
                if ((dum_hcd->port_status & USB_PORT_STAT_POWER) == 0) {
                        dum_hcd->port_status = 0;
                } else if (!dum->pullup || dum->udc_suspended) {
                        /* UDC suspend must cause a disconnect */
                        dum_hcd->port_status &= ~(USB_PORT_STAT_CONNECTION |
                                                USB_PORT_STAT_ENABLE |
                                                USB_PORT_STAT_LOW_SPEED |
                                                USB_PORT_STAT_HIGH_SPEED |
                                                USB_PORT_STAT_SUSPEND);
                        if ((dum_hcd->old_status &
                             USB_PORT_STAT_CONNECTION) != 0)
                                dum_hcd->port_status |=
                                        (USB_PORT_STAT_C_CONNECTION << 16);
                } else {
                        dum_hcd->port_status |= USB_PORT_STAT_CONNECTION;
                        if ((dum_hcd->old_status &
                             USB_PORT_STAT_CONNECTION) == 0)
                                dum_hcd->port_status |=
                                        (USB_PORT_STAT_C_CONNECTION << 16);
                        if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0)
                                dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND;
                        else if ((dum_hcd->port_status &
                                  USB_PORT_STAT_SUSPEND) == 0 &&
                                        dum_hcd->rh_state != DUMMY_RH_SUSPENDED)
                                dum_hcd->active = 1;
                }
        }
}

/* caller must hold lock */
static void set_link_state(struct dummy_hcd *dum_hcd)
        __must_hold(&dum->lock)
{
        struct dummy *dum = dum_hcd->dum;
        unsigned int power_bit;

        dum_hcd->active = 0;
        if (dum->pullup)
                if ((dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 &&
                     dum->gadget.speed != USB_SPEED_SUPER) ||
                    (dummy_hcd_to_hcd(dum_hcd)->speed != HCD_USB3 &&
                     dum->gadget.speed == USB_SPEED_SUPER))
                        return;

        set_link_state_by_speed(dum_hcd);
        power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ?
                        USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER);

        if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 ||
             dum_hcd->active)
                dum_hcd->resuming = 0;

        /* Currently !connected or in reset */
        if ((dum_hcd->port_status & power_bit) == 0 ||
                        (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) {
                unsigned int disconnect = power_bit &
                                dum_hcd->old_status & (~dum_hcd->port_status);
                unsigned int reset = USB_PORT_STAT_RESET &
                                (~dum_hcd->old_status) & dum_hcd->port_status;

                /* Report reset and disconnect events to the driver */
                if (dum->ints_enabled && (disconnect || reset)) {
                        stop_activity(dum);
                        ++dum->callback_usage;
                        spin_unlock(&dum->lock);
                        if (reset)
                                usb_gadget_udc_reset(&dum->gadget, dum->driver);
                        else
                                dum->driver->disconnect(&dum->gadget);
                        spin_lock(&dum->lock);
                        --dum->callback_usage;
                }
        } else if (dum_hcd->active != dum_hcd->old_active &&
                        dum->ints_enabled) {
                ++dum->callback_usage;
                spin_unlock(&dum->lock);
                if (dum_hcd->old_active && dum->driver->suspend)
                        dum->driver->suspend(&dum->gadget);
                else if (!dum_hcd->old_active &&  dum->driver->resume)
                        dum->driver->resume(&dum->gadget);
                spin_lock(&dum->lock);
                --dum->callback_usage;
        }

        dum_hcd->old_status = dum_hcd->port_status;
        dum_hcd->old_active = dum_hcd->active;
}

/*-------------------------------------------------------------------------*/

/* DEVICE/GADGET SIDE DRIVER
 *
 * This only tracks gadget state.  All the work is done when the host
 * side tries some (emulated) i/o operation.  Real device controller
 * drivers would do real i/o using dma, fifos, irqs, timers, etc.
 */

#define is_enabled(dum) \
        (dum->port_status & USB_PORT_STAT_ENABLE)

static int dummy_enable(struct usb_ep *_ep,
                const struct usb_endpoint_descriptor *desc)
{
        struct dummy                *dum;
        struct dummy_hcd        *dum_hcd;
        struct dummy_ep                *ep;
        unsigned                max;
        int                        retval;

        ep = usb_ep_to_dummy_ep(_ep);
        if (!_ep || !desc || ep->desc || _ep->name == ep0name
                        || desc->bDescriptorType != USB_DT_ENDPOINT)
                return -EINVAL;
        dum = ep_to_dummy(ep);
        if (!dum->driver)
                return -ESHUTDOWN;

        dum_hcd = gadget_to_dummy_hcd(&dum->gadget);
        if (!is_enabled(dum_hcd))
                return -ESHUTDOWN;

        /*
         * For HS/FS devices only bits 0..10 of the wMaxPacketSize represent the
         * maximum packet size.
         * For SS devices the wMaxPacketSize is limited by 1024.
         */
        max = usb_endpoint_maxp(desc);

        /* drivers must not request bad settings, since lower levels
         * (hardware or its drivers) may not check.  some endpoints
         * can't do iso, many have maxpacket limitations, etc.
         *
         * since this "hardware" driver is here to help debugging, we
         * have some extra sanity checks.  (there could be more though,
         * especially for "ep9out" style fixed function ones.)
         */
        retval = -EINVAL;
        switch (usb_endpoint_type(desc)) {
        case USB_ENDPOINT_XFER_BULK:
                if (strstr(ep->ep.name, "-iso")
                                || strstr(ep->ep.name, "-int")) {
                        goto done;
                }
                switch (dum->gadget.speed) {
                case USB_SPEED_SUPER:
                        if (max == 1024)
                                break;
                        goto done;
                case USB_SPEED_HIGH:
                        if (max == 512)
                                break;
                        goto done;
                case USB_SPEED_FULL:
                        if (max == 8 || max == 16 || max == 32 || max == 64)
                                /* we'll fake any legal size */
                                break;
                        /* save a return statement */
                        fallthrough;
                default:
                        goto done;
                }
                break;
        case USB_ENDPOINT_XFER_INT:
                if (strstr(ep->ep.name, "-iso")) /* bulk is ok */
                        goto done;
                /* real hardware might not handle all packet sizes */
                switch (dum->gadget.speed) {
                case USB_SPEED_SUPER:
                case USB_SPEED_HIGH:
                        if (max <= 1024)
                                break;
                        /* save a return statement */
                        fallthrough;
                case USB_SPEED_FULL:
                        if (max <= 64)
                                break;
                        /* save a return statement */
                        fallthrough;
                default:
                        if (max <= 8)
                                break;
                        goto done;
                }
                break;
        case USB_ENDPOINT_XFER_ISOC:
                if (strstr(ep->ep.name, "-bulk")
                                || strstr(ep->ep.name, "-int"))
                        goto done;
                /* real hardware might not handle all packet sizes */
                switch (dum->gadget.speed) {
                case USB_SPEED_SUPER:
                case USB_SPEED_HIGH:
                        if (max <= 1024)
                                break;
                        /* save a return statement */
                        fallthrough;
                case USB_SPEED_FULL:
                        if (max <= 1023)
                                break;
                        /* save a return statement */
                        fallthrough;
                default:
                        goto done;
                }
                break;
        default:
                /* few chips support control except on ep0 */
                goto done;
        }

        _ep->maxpacket = max;
        if (usb_ss_max_streams(_ep->comp_desc)) {
                if (!usb_endpoint_xfer_bulk(desc)) {
                        dev_err(udc_dev(dum), "Can't enable stream support on "
                                        "non-bulk ep %s\n", _ep->name);
                        return -EINVAL;
                }
                ep->stream_en = 1;
        }
        ep->desc = desc;

        dev_dbg(udc_dev(dum), "enabled %s (ep%d%s-%s) maxpacket %d stream %s\n",
                _ep->name,
                desc->bEndpointAddress & 0x0f,
                (desc->bEndpointAddress & USB_DIR_IN) ? "in" : "out",
                usb_ep_type_string(usb_endpoint_type(desc)),
                max, ep->stream_en ? "enabled" : "disabled");

        /* at this point real hardware should be NAKing transfers
         * to that endpoint, until a buffer is queued to it.
         */
        ep->halted = ep->wedged = 0;
        retval = 0;
done:
        return retval;
}

static int dummy_disable(struct usb_ep *_ep)
{
        struct dummy_ep                *ep;
        struct dummy                *dum;
        unsigned long                flags;

        ep = usb_ep_to_dummy_ep(_ep);
        if (!_ep || !ep->desc || _ep->name == ep0name)
                return -EINVAL;
        dum = ep_to_dummy(ep);

        spin_lock_irqsave(&dum->lock, flags);
        ep->desc = NULL;
        ep->stream_en = 0;
        nuke(dum, ep);
        spin_unlock_irqrestore(&dum->lock, flags);

        dev_dbg(udc_dev(dum), "disabled %s\n", _ep->name);
        return 0;
}

static struct usb_request *dummy_alloc_request(struct usb_ep *_ep,
                gfp_t mem_flags)
{
        struct dummy_request        *req;

        if (!_ep)
                return NULL;

        req = kzalloc(sizeof(*req), mem_flags);
        if (!req)
                return NULL;
        INIT_LIST_HEAD(&req->queue);
        return &req->req;
}

static void dummy_free_request(struct usb_ep *_ep, struct usb_request *_req)
{
        struct dummy_request        *req;

        if (!_ep || !_req) {
                WARN_ON(1);
                return;
        }

        req = usb_request_to_dummy_request(_req);
        WARN_ON(!list_empty(&req->queue));
        kfree(req);
}

static void fifo_complete(struct usb_ep *ep, struct usb_request *req)
{
}

static int dummy_queue(struct usb_ep *_ep, struct usb_request *_req,
                gfp_t mem_flags)
{
        struct dummy_ep                *ep;
        struct dummy_request        *req;
        struct dummy                *dum;
        struct dummy_hcd        *dum_hcd;
        unsigned long                flags;

        req = usb_request_to_dummy_request(_req);
        if (!_req || !list_empty(&req->queue) || !_req->complete)
                return -EINVAL;

        ep = usb_ep_to_dummy_ep(_ep);
        if (!_ep || (!ep->desc && _ep->name != ep0name))
                return -EINVAL;

        dum = ep_to_dummy(ep);
        dum_hcd = gadget_to_dummy_hcd(&dum->gadget);
        if (!dum->driver || !is_enabled(dum_hcd))
                return -ESHUTDOWN;

#if 0
        dev_dbg(udc_dev(dum), "ep %p queue req %p to %s, len %d buf %p\n",
                        ep, _req, _ep->name, _req->length, _req->buf);
#endif
        _req->status = -EINPROGRESS;
        _req->actual = 0;
        spin_lock_irqsave(&dum->lock, flags);

        /* implement an emulated single-request FIFO */
        if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) &&
                        list_empty(&dum->fifo_req.queue) &&
                        list_empty(&ep->queue) &&
                        _req->length <= FIFO_SIZE) {
                req = &dum->fifo_req;
                req->req = *_req;
                req->req.buf = dum->fifo_buf;
                memcpy(dum->fifo_buf, _req->buf, _req->length);
                req->req.context = dum;
                req->req.complete = fifo_complete;

                list_add_tail(&req->queue, &ep->queue);
                spin_unlock(&dum->lock);
                _req->actual = _req->length;
                _req->status = 0;
                usb_gadget_giveback_request(_ep, _req);
                spin_lock(&dum->lock);
        }  else
                list_add_tail(&req->queue, &ep->queue);
        spin_unlock_irqrestore(&dum->lock, flags);

        /* real hardware would likely enable transfers here, in case
         * it'd been left NAKing.
         */
        return 0;
}

static int dummy_dequeue(struct usb_ep *_ep, struct usb_request *_req)
{
        struct dummy_ep                *ep;
        struct dummy                *dum;
        int                        retval = -EINVAL;
        unsigned long                flags;
        struct dummy_request        *req = NULL, *iter;

        if (!_ep || !_req)
                return retval;
        ep = usb_ep_to_dummy_ep(_ep);
        dum = ep_to_dummy(ep);

        if (!dum->driver)
                return -ESHUTDOWN;

        local_irq_save(flags);
        spin_lock(&dum->lock);
        list_for_each_entry(iter, &ep->queue, queue) {
                if (&iter->req != _req)
                        continue;
                list_del_init(&iter->queue);
                _req->status = -ECONNRESET;
                req = iter;
                retval = 0;
                break;
        }
        spin_unlock(&dum->lock);

        if (retval == 0) {
                dev_dbg(udc_dev(dum),
                                "dequeued req %p from %s, len %d buf %p\n",
                                req, _ep->name, _req->length, _req->buf);
                usb_gadget_giveback_request(_ep, _req);
        }
        local_irq_restore(flags);
        return retval;
}

static int
dummy_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged)
{
        struct dummy_ep                *ep;
        struct dummy                *dum;

        if (!_ep)
                return -EINVAL;
        ep = usb_ep_to_dummy_ep(_ep);
        dum = ep_to_dummy(ep);
        if (!dum->driver)
                return -ESHUTDOWN;
        if (!value)
                ep->halted = ep->wedged = 0;
        else if (ep->desc && (ep->desc->bEndpointAddress & USB_DIR_IN) &&
                        !list_empty(&ep->queue))
                return -EAGAIN;
        else {
                ep->halted = 1;
                if (wedged)
                        ep->wedged = 1;
        }
        /* FIXME clear emulated data toggle too */
        return 0;
}

static int
dummy_set_halt(struct usb_ep *_ep, int value)
{
        return dummy_set_halt_and_wedge(_ep, value, 0);
}

static int dummy_set_wedge(struct usb_ep *_ep)
{
        if (!_ep || _ep->name == ep0name)
                return -EINVAL;
        return dummy_set_halt_and_wedge(_ep, 1, 1);
}

static const struct usb_ep_ops dummy_ep_ops = {
        .enable                = dummy_enable,
        .disable        = dummy_disable,

        .alloc_request        = dummy_alloc_request,
        .free_request        = dummy_free_request,

        .queue                = dummy_queue,
        .dequeue        = dummy_dequeue,

        .set_halt        = dummy_set_halt,
        .set_wedge        = dummy_set_wedge,
};

/*-------------------------------------------------------------------------*/

/* there are both host and device side versions of this call ... */
static int dummy_g_get_frame(struct usb_gadget *_gadget)
{
        struct timespec64 ts64;

        ktime_get_ts64(&ts64);
        return ts64.tv_nsec / NSEC_PER_MSEC;
}

static int dummy_wakeup(struct usb_gadget *_gadget)
{
        struct dummy_hcd *dum_hcd;

        dum_hcd = gadget_to_dummy_hcd(_gadget);
        if (!(dum_hcd->dum->devstatus & ((1 << USB_DEVICE_B_HNP_ENABLE)
                                | (1 << USB_DEVICE_REMOTE_WAKEUP))))
                return -EINVAL;
        if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0)
                return -ENOLINK;
        if ((dum_hcd->port_status & USB_PORT_STAT_SUSPEND) == 0 &&
                         dum_hcd->rh_state != DUMMY_RH_SUSPENDED)
                return -EIO;

        /* FIXME: What if the root hub is suspended but the port isn't? */

        /* hub notices our request, issues downstream resume, etc */
        dum_hcd->resuming = 1;
        dum_hcd->re_timeout = jiffies + msecs_to_jiffies(20);
        mod_timer(&dummy_hcd_to_hcd(dum_hcd)->rh_timer, dum_hcd->re_timeout);
        return 0;
}

static int dummy_set_selfpowered(struct usb_gadget *_gadget, int value)
{
        struct dummy        *dum;

        _gadget->is_selfpowered = (value != 0);
        dum = gadget_to_dummy_hcd(_gadget)->dum;
        if (value)
                dum->devstatus |= (1 << USB_DEVICE_SELF_POWERED);
        else
                dum->devstatus &= ~(1 << USB_DEVICE_SELF_POWERED);
        return 0;
}

static void dummy_udc_update_ep0(struct dummy *dum)
{
        if (dum->gadget.speed == USB_SPEED_SUPER)
                dum->ep[0].ep.maxpacket = 9;
        else
                dum->ep[0].ep.maxpacket = 64;
}

static int dummy_pullup(struct usb_gadget *_gadget, int value)
{
        struct dummy_hcd *dum_hcd;
        struct dummy        *dum;
        unsigned long        flags;

        dum = gadget_dev_to_dummy(&_gadget->dev);
        dum_hcd = gadget_to_dummy_hcd(_gadget);

        spin_lock_irqsave(&dum->lock, flags);
        dum->pullup = (value != 0);
        set_link_state(dum_hcd);
        if (value == 0) {
                /*
                 * Emulate synchronize_irq(): wait for callbacks to finish.
                 * This seems to be the best place to emulate the call to
                 * synchronize_irq() that's in usb_gadget_remove_driver().
                 * Doing it in dummy_udc_stop() would be too late since it
                 * is called after the unbind callback and unbind shouldn't
                 * be invoked until all the other callbacks are finished.
                 */
                while (dum->callback_usage > 0) {
                        spin_unlock_irqrestore(&dum->lock, flags);
                        usleep_range(1000, 2000);
                        spin_lock_irqsave(&dum->lock, flags);
                }
        }
        spin_unlock_irqrestore(&dum->lock, flags);

        usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd));
        return 0;
}

static void dummy_udc_set_speed(struct usb_gadget *_gadget,
                enum usb_device_speed speed)
{
        struct dummy        *dum;

        dum = gadget_dev_to_dummy(&_gadget->dev);
        dum->gadget.speed = speed;
        dummy_udc_update_ep0(dum);
}

static void dummy_udc_async_callbacks(struct usb_gadget *_gadget, bool enable)
{
        struct dummy        *dum = gadget_dev_to_dummy(&_gadget->dev);

        spin_lock_irq(&dum->lock);
        dum->ints_enabled = enable;
        spin_unlock_irq(&dum->lock);
}

static int dummy_udc_start(struct usb_gadget *g,
                struct usb_gadget_driver *driver);
static int dummy_udc_stop(struct usb_gadget *g);

static const struct usb_gadget_ops dummy_ops = {
        .get_frame        = dummy_g_get_frame,
        .wakeup                = dummy_wakeup,
        .set_selfpowered = dummy_set_selfpowered,
        .pullup                = dummy_pullup,
        .udc_start        = dummy_udc_start,
        .udc_stop        = dummy_udc_stop,
        .udc_set_speed        = dummy_udc_set_speed,
        .udc_async_callbacks = dummy_udc_async_callbacks,
};

/*-------------------------------------------------------------------------*/

/* "function" sysfs attribute */
static ssize_t function_show(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct dummy        *dum = gadget_dev_to_dummy(dev);

        if (!dum->driver || !dum->driver->function)
                return 0;
        return scnprintf(buf, PAGE_SIZE, "%s\n", dum->driver->function);
}
static DEVICE_ATTR_RO(function);

/*-------------------------------------------------------------------------*/

/*
 * Driver registration/unregistration.
 *
 * This is basically hardware-specific; there's usually only one real USB
 * device (not host) controller since that's how USB devices are intended
 * to work.  So most implementations of these api calls will rely on the
 * fact that only one driver will ever bind to the hardware.  But curious
 * hardware can be built with discrete components, so the gadget API doesn't
 * require that assumption.
 *
 * For this emulator, it might be convenient to create a usb device
 * for each driver that registers:  just add to a big root hub.
 */

static int dummy_udc_start(struct usb_gadget *g,
                struct usb_gadget_driver *driver)
{
        struct dummy_hcd        *dum_hcd = gadget_to_dummy_hcd(g);
        struct dummy                *dum = dum_hcd->dum;

        switch (g->speed) {
        /* All the speeds we support */
        case USB_SPEED_LOW:
        case USB_SPEED_FULL:
        case USB_SPEED_HIGH:
        case USB_SPEED_SUPER:
                break;
        default:
                dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n",
                                driver->max_speed);
                return -EINVAL;
        }

        /*
         * DEVICE side init ... the layer above hardware, which
         * can't enumerate without help from the driver we're binding.
         */

        spin_lock_irq(&dum->lock);
        dum->devstatus = 0;
        dum->driver = driver;
        spin_unlock_irq(&dum->lock);

        return 0;
}

static int dummy_udc_stop(struct usb_gadget *g)
{
        struct dummy_hcd        *dum_hcd = gadget_to_dummy_hcd(g);
        struct dummy                *dum = dum_hcd->dum;

        spin_lock_irq(&dum->lock);
        dum->ints_enabled = 0;
        stop_activity(dum);
        dum->driver = NULL;
        spin_unlock_irq(&dum->lock);

        return 0;
}

#undef is_enabled

/* The gadget structure is stored inside the hcd structure and will be
 * released along with it. */
static void init_dummy_udc_hw(struct dummy *dum)
{
        int i;

        INIT_LIST_HEAD(&dum->gadget.ep_list);
        for (i = 0; i < DUMMY_ENDPOINTS; i++) {
                struct dummy_ep        *ep = &dum->ep[i];

                if (!ep_info[i].name)
                        break;
                ep->ep.name = ep_info[i].name;
                ep->ep.caps = ep_info[i].caps;
                ep->ep.ops = &dummy_ep_ops;
                list_add_tail(&ep->ep.ep_list, &dum->gadget.ep_list);
                ep->halted = ep->wedged = ep->already_seen =
                                ep->setup_stage = 0;
                usb_ep_set_maxpacket_limit(&ep->ep, ~0);
                ep->ep.max_streams = 16;
                ep->last_io = jiffies;
                ep->gadget = &dum->gadget;
                ep->desc = NULL;
                INIT_LIST_HEAD(&ep->queue);
        }

        dum->gadget.ep0 = &dum->ep[0].ep;
        list_del_init(&dum->ep[0].ep.ep_list);
        INIT_LIST_HEAD(&dum->fifo_req.queue);

#ifdef CONFIG_USB_OTG
        dum->gadget.is_otg = 1;
#endif
}

static int dummy_udc_probe(struct platform_device *pdev)
{
        struct dummy        *dum;
        int                rc;

        dum = *((void **)dev_get_platdata(&pdev->dev));
        /* Clear usb_gadget region for new registration to udc-core */
        memzero_explicit(&dum->gadget, sizeof(struct usb_gadget));
        dum->gadget.name = gadget_name;
        dum->gadget.ops = &dummy_ops;
        if (mod_data.is_super_speed)
                dum->gadget.max_speed = USB_SPEED_SUPER;
        else if (mod_data.is_high_speed)
                dum->gadget.max_speed = USB_SPEED_HIGH;
        else
                dum->gadget.max_speed = USB_SPEED_FULL;

        dum->gadget.dev.parent = &pdev->dev;
        init_dummy_udc_hw(dum);

        rc = usb_add_gadget_udc(&pdev->dev, &dum->gadget);
        if (rc < 0)
                goto err_udc;

        rc = device_create_file(&dum->gadget.dev, &dev_attr_function);
        if (rc < 0)
                goto err_dev;
        platform_set_drvdata(pdev, dum);
        return rc;

err_dev:
        usb_del_gadget_udc(&dum->gadget);
err_udc:
        return rc;
}

static void dummy_udc_remove(struct platform_device *pdev)
{
        struct dummy        *dum = platform_get_drvdata(pdev);

        device_remove_file(&dum->gadget.dev, &dev_attr_function);
        usb_del_gadget_udc(&dum->gadget);
}

static void dummy_udc_pm(struct dummy *dum, struct dummy_hcd *dum_hcd,
                int suspend)
{
        spin_lock_irq(&dum->lock);
        dum->udc_suspended = suspend;
        set_link_state(dum_hcd);
        spin_unlock_irq(&dum->lock);
}

static int dummy_udc_suspend(struct platform_device *pdev, pm_message_t state)
{
        struct dummy                *dum = platform_get_drvdata(pdev);
        struct dummy_hcd        *dum_hcd = gadget_to_dummy_hcd(&dum->gadget);

        dev_dbg(&pdev->dev, "%s\n", __func__);
        dummy_udc_pm(dum, dum_hcd, 1);
        usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd));
        return 0;
}

static int dummy_udc_resume(struct platform_device *pdev)
{
        struct dummy                *dum = platform_get_drvdata(pdev);
        struct dummy_hcd        *dum_hcd = gadget_to_dummy_hcd(&dum->gadget);

        dev_dbg(&pdev->dev, "%s\n", __func__);
        dummy_udc_pm(dum, dum_hcd, 0);
        usb_hcd_poll_rh_status(dummy_hcd_to_hcd(dum_hcd));
        return 0;
}

static struct platform_driver dummy_udc_driver = {
        .probe                = dummy_udc_probe,
        .remove_new        = dummy_udc_remove,
        .suspend        = dummy_udc_suspend,
        .resume                = dummy_udc_resume,
        .driver                = {
                .name        = gadget_name,
        },
};

/*-------------------------------------------------------------------------*/

static unsigned int dummy_get_ep_idx(const struct usb_endpoint_descriptor *desc)
{
        unsigned int index;

        index = usb_endpoint_num(desc) << 1;
        if (usb_endpoint_dir_in(desc))
                index |= 1;
        return index;
}

/* HOST SIDE DRIVER
 *
 * this uses the hcd framework to hook up to host side drivers.
 * its root hub will only have one device, otherwise it acts like
 * a normal host controller.
 *
 * when urbs are queued, they're just stuck on a list that we
 * scan in a timer callback.  that callback connects writes from
 * the host with reads from the device, and so on, based on the
 * usb 2.0 rules.
 */

static int dummy_ep_stream_en(struct dummy_hcd *dum_hcd, struct urb *urb)
{
        const struct usb_endpoint_descriptor *desc = &urb->ep->desc;
        u32 index;

        if (!usb_endpoint_xfer_bulk(desc))
                return 0;

        index = dummy_get_ep_idx(desc);
        return (1 << index) & dum_hcd->stream_en_ep;
}

/*
 * The max stream number is saved as a nibble so for the 30 possible endpoints
 * we only 15 bytes of memory. Therefore we are limited to max 16 streams (0
 * means we use only 1 stream). The maximum according to the spec is 16bit so
 * if the 16 stream limit is about to go, the array size should be incremented
 * to 30 elements of type u16.
 */
static int get_max_streams_for_pipe(struct dummy_hcd *dum_hcd,
                unsigned int pipe)
{
        int max_streams;

        max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)];
        if (usb_pipeout(pipe))
                max_streams >>= 4;
        else
                max_streams &= 0xf;
        max_streams++;
        return max_streams;
}

static void set_max_streams_for_pipe(struct dummy_hcd *dum_hcd,
                unsigned int pipe, unsigned int streams)
{
        int max_streams;

        streams--;
        max_streams = dum_hcd->num_stream[usb_pipeendpoint(pipe)];
        if (usb_pipeout(pipe)) {
                streams <<= 4;
                max_streams &= 0xf;
        } else {
                max_streams &= 0xf0;
        }
        max_streams |= streams;
        dum_hcd->num_stream[usb_pipeendpoint(pipe)] = max_streams;
}

static int dummy_validate_stream(struct dummy_hcd *dum_hcd, struct urb *urb)
{
        unsigned int max_streams;
        int enabled;

        enabled = dummy_ep_stream_en(dum_hcd, urb);
        if (!urb->stream_id) {
                if (enabled)
                        return -EINVAL;
                return 0;
        }
        if (!enabled)
                return -EINVAL;

        max_streams = get_max_streams_for_pipe(dum_hcd,
                        usb_pipeendpoint(urb->pipe));
        if (urb->stream_id > max_streams) {
                dev_err(dummy_dev(dum_hcd), "Stream id %d is out of range.\n",
                                urb->stream_id);
                BUG();
                return -EINVAL;
        }
        return 0;
}

static int dummy_urb_enqueue(
        struct usb_hcd                        *hcd,
        struct urb                        *urb,
        gfp_t                                mem_flags
) {
        struct dummy_hcd *dum_hcd;
        struct urbp        *urbp;
        unsigned long        flags;
        int                rc;

        urbp = kmalloc(sizeof *urbp, mem_flags);
        if (!urbp)
                return -ENOMEM;
        urbp->urb = urb;
        urbp->miter_started = 0;

        dum_hcd = hcd_to_dummy_hcd(hcd);
        spin_lock_irqsave(&dum_hcd->dum->lock, flags);

        rc = dummy_validate_stream(dum_hcd, urb);
        if (rc) {
                kfree(urbp);
                goto done;
        }

        rc = usb_hcd_link_urb_to_ep(hcd, urb);
        if (rc) {
                kfree(urbp);
                goto done;
        }

        if (!dum_hcd->udev) {
                dum_hcd->udev = urb->dev;
                usb_get_dev(dum_hcd->udev);
        } else if (unlikely(dum_hcd->udev != urb->dev))
                dev_err(dummy_dev(dum_hcd), "usb_device address has changed!\n");

        list_add_tail(&urbp->urbp_list, &dum_hcd->urbp_list);
        urb->hcpriv = urbp;
        if (!dum_hcd->next_frame_urbp)
                dum_hcd->next_frame_urbp = urbp;
        if (usb_pipetype(urb->pipe) == PIPE_CONTROL)
                urb->error_count = 1;                /* mark as a new urb */

        /* kick the scheduler, it'll do the rest */
        if (!hrtimer_active(&dum_hcd->timer))
                hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);

 done:
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
        return rc;
}

static int dummy_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
{
        struct dummy_hcd *dum_hcd;
        unsigned long        flags;
        int                rc;

        /* giveback happens automatically in timer callback,
         * so make sure the callback happens */
        dum_hcd = hcd_to_dummy_hcd(hcd);
        spin_lock_irqsave(&dum_hcd->dum->lock, flags);

        rc = usb_hcd_check_unlink_urb(hcd, urb, status);
        if (!rc && dum_hcd->rh_state != DUMMY_RH_RUNNING &&
                        !list_empty(&dum_hcd->urbp_list))
                hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);

        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
        return rc;
}

static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req,
                u32 len)
{
        void *ubuf, *rbuf;
        struct urbp *urbp = urb->hcpriv;
        int to_host;
        struct sg_mapping_iter *miter = &urbp->miter;
        u32 trans = 0;
        u32 this_sg;
        bool next_sg;

        to_host = usb_urb_dir_in(urb);
        rbuf = req->req.buf + req->req.actual;

        if (!urb->num_sgs) {
                ubuf = urb->transfer_buffer + urb->actual_length;
                if (to_host)
                        memcpy(ubuf, rbuf, len);
                else
                        memcpy(rbuf, ubuf, len);
                return len;
        }

        if (!urbp->miter_started) {
                u32 flags = SG_MITER_ATOMIC;

                if (to_host)
                        flags |= SG_MITER_TO_SG;
                else
                        flags |= SG_MITER_FROM_SG;

                sg_miter_start(miter, urb->sg, urb->num_sgs, flags);
                urbp->miter_started = 1;
        }
        next_sg = sg_miter_next(miter);
        if (next_sg == false) {
                WARN_ON_ONCE(1);
                return -EINVAL;
        }
        do {
                ubuf = miter->addr;
                this_sg = min_t(u32, len, miter->length);
                miter->consumed = this_sg;
                trans += this_sg;

                if (to_host)
                        memcpy(ubuf, rbuf, this_sg);
                else
                        memcpy(rbuf, ubuf, this_sg);
                len -= this_sg;

                if (!len)
                        break;
                next_sg = sg_miter_next(miter);
                if (next_sg == false) {
                        WARN_ON_ONCE(1);
                        return -EINVAL;
                }

                rbuf += this_sg;
        } while (1);

        sg_miter_stop(miter);
        return trans;
}

/* transfer up to a frame's worth; caller must own lock */
static int transfer(struct dummy_hcd *dum_hcd, struct urb *urb,
                struct dummy_ep *ep, int limit, int *status)
{
        struct dummy                *dum = dum_hcd->dum;
        struct dummy_request        *req;
        int                        sent = 0;

top:
        /* if there's no request queued, the device is NAKing; return */
        list_for_each_entry(req, &ep->queue, queue) {
                unsigned        host_len, dev_len, len;
                int                is_short, to_host;
                int                rescan = 0;

                if (dummy_ep_stream_en(dum_hcd, urb)) {
                        if ((urb->stream_id != req->req.stream_id))
                                continue;
                }

                /* 1..N packets of ep->ep.maxpacket each ... the last one
                 * may be short (including zero length).
                 *
                 * writer can send a zlp explicitly (length 0) or implicitly
                 * (length mod maxpacket zero, and 'zero' flag); they always
                 * terminate reads.
                 */
                host_len = urb->transfer_buffer_length - urb->actual_length;
                dev_len = req->req.length - req->req.actual;
                len = min(host_len, dev_len);

                /* FIXME update emulated data toggle too */

                to_host = usb_urb_dir_in(urb);
                if (unlikely(len == 0))
                        is_short = 1;
                else {
                        /* not enough bandwidth left? */
                        if (limit < ep->ep.maxpacket && limit < len)
                                break;
                        len = min_t(unsigned, len, limit);
                        if (len == 0)
                                break;

                        /* send multiple of maxpacket first, then remainder */
                        if (len >= ep->ep.maxpacket) {
                                is_short = 0;
                                if (len % ep->ep.maxpacket)
                                        rescan = 1;
                                len -= len % ep->ep.maxpacket;
                        } else {
                                is_short = 1;
                        }

                        len = dummy_perform_transfer(urb, req, len);

                        ep->last_io = jiffies;
                        if ((int)len < 0) {
                                req->req.status = len;
                        } else {
                                limit -= len;
                                sent += len;
                                urb->actual_length += len;
                                req->req.actual += len;
                        }
                }

                /* short packets terminate, maybe with overflow/underflow.
                 * it's only really an error to write too much.
                 *
                 * partially filling a buffer optionally blocks queue advances
                 * (so completion handlers can clean up the queue) but we don't
                 * need to emulate such data-in-flight.
                 */
                if (is_short) {
                        if (host_len == dev_len) {
                                req->req.status = 0;
                                *status = 0;
                        } else if (to_host) {
                                req->req.status = 0;
                                if (dev_len > host_len)
                                        *status = -EOVERFLOW;
                                else
                                        *status = 0;
                        } else {
                                *status = 0;
                                if (host_len > dev_len)
                                        req->req.status = -EOVERFLOW;
                                else
                                        req->req.status = 0;
                        }

                /*
                 * many requests terminate without a short packet.
                 * send a zlp if demanded by flags.
                 */
                } else {
                        if (req->req.length == req->req.actual) {
                                if (req->req.zero && to_host)
                                        rescan = 1;
                                else
                                        req->req.status = 0;
                        }
                        if (urb->transfer_buffer_length == urb->actual_length) {
                                if (urb->transfer_flags & URB_ZERO_PACKET &&
                                    !to_host)
                                        rescan = 1;
                                else
                                        *status = 0;
                        }
                }

                /* device side completion --> continuable */
                if (req->req.status != -EINPROGRESS) {
                        list_del_init(&req->queue);

                        spin_unlock(&dum->lock);
                        usb_gadget_giveback_request(&ep->ep, &req->req);
                        spin_lock(&dum->lock);

                        /* requests might have been unlinked... */
                        rescan = 1;
                }

                /* host side completion --> terminate */
                if (*status != -EINPROGRESS)
                        break;

                /* rescan to continue with any other queued i/o */
                if (rescan)
                        goto top;
        }
        return sent;
}

static int periodic_bytes(struct dummy *dum, struct dummy_ep *ep)
{
        int        limit = ep->ep.maxpacket;

        if (dum->gadget.speed == USB_SPEED_HIGH) {
                int        tmp;

                /* high bandwidth mode */
                tmp = usb_endpoint_maxp_mult(ep->desc);
                tmp *= 8 /* applies to entire frame */;
                limit += limit * tmp;
        }
        if (dum->gadget.speed == USB_SPEED_SUPER) {
                switch (usb_endpoint_type(ep->desc)) {
                case USB_ENDPOINT_XFER_ISOC:
                        /* Sec. 4.4.8.2 USB3.0 Spec */
                        limit = 3 * 16 * 1024 * 8;
                        break;
                case USB_ENDPOINT_XFER_INT:
                        /* Sec. 4.4.7.2 USB3.0 Spec */
                        limit = 3 * 1024 * 8;
                        break;
                case USB_ENDPOINT_XFER_BULK:
                default:
                        break;
                }
        }
        return limit;
}

#define is_active(dum_hcd)        ((dum_hcd->port_status & \
                (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | \
                        USB_PORT_STAT_SUSPEND)) \
                == (USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE))

static struct dummy_ep *find_endpoint(struct dummy *dum, u8 address)
{
        int                i;

        if (!is_active((dum->gadget.speed == USB_SPEED_SUPER ?
                        dum->ss_hcd : dum->hs_hcd)))
                return NULL;
        if (!dum->ints_enabled)
                return NULL;
        if ((address & ~USB_DIR_IN) == 0)
                return &dum->ep[0];
        for (i = 1; i < DUMMY_ENDPOINTS; i++) {
                struct dummy_ep        *ep = &dum->ep[i];

                if (!ep->desc)
                        continue;
                if (ep->desc->bEndpointAddress == address)
                        return ep;
        }
        return NULL;
}

#undef is_active

#define Dev_Request        (USB_TYPE_STANDARD | USB_RECIP_DEVICE)
#define Dev_InRequest        (Dev_Request | USB_DIR_IN)
#define Intf_Request        (USB_TYPE_STANDARD | USB_RECIP_INTERFACE)
#define Intf_InRequest        (Intf_Request | USB_DIR_IN)
#define Ep_Request        (USB_TYPE_STANDARD | USB_RECIP_ENDPOINT)
#define Ep_InRequest        (Ep_Request | USB_DIR_IN)


/**
 * handle_control_request() - handles all control transfers
 * @dum_hcd: pointer to dummy (the_controller)
 * @urb: the urb request to handle
 * @setup: pointer to the setup data for a USB device control
 *         request
 * @status: pointer to request handling status
 *
 * Return 0 - if the request was handled
 *          1 - if the request wasn't handles
 *          error code on error
 */
static int handle_control_request(struct dummy_hcd *dum_hcd, struct urb *urb,
                                  struct usb_ctrlrequest *setup,
                                  int *status)
{
        struct dummy_ep                *ep2;
        struct dummy                *dum = dum_hcd->dum;
        int                        ret_val = 1;
        unsigned        w_index;
        unsigned        w_value;

        w_index = le16_to_cpu(setup->wIndex);
        w_value = le16_to_cpu(setup->wValue);
        switch (setup->bRequest) {
        case USB_REQ_SET_ADDRESS:
                if (setup->bRequestType != Dev_Request)
                        break;
                dum->address = w_value;
                *status = 0;
                dev_dbg(udc_dev(dum), "set_address = %d\n",
                                w_value);
                ret_val = 0;
                break;
        case USB_REQ_SET_FEATURE:
                if (setup->bRequestType == Dev_Request) {
                        ret_val = 0;
                        switch (w_value) {
                        case USB_DEVICE_REMOTE_WAKEUP:
                                break;
                        case USB_DEVICE_B_HNP_ENABLE:
                                dum->gadget.b_hnp_enable = 1;
                                break;
                        case USB_DEVICE_A_HNP_SUPPORT:
                                dum->gadget.a_hnp_support = 1;
                                break;
                        case USB_DEVICE_A_ALT_HNP_SUPPORT:
                                dum->gadget.a_alt_hnp_support = 1;
                                break;
                        case USB_DEVICE_U1_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_U1_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        case USB_DEVICE_U2_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_U2_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        case USB_DEVICE_LTM_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_LTM_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        default:
                                ret_val = -EOPNOTSUPP;
                        }
                        if (ret_val == 0) {
                                dum->devstatus |= (1 << w_value);
                                *status = 0;
                        }
                } else if (setup->bRequestType == Ep_Request) {
                        /* endpoint halt */
                        ep2 = find_endpoint(dum, w_index);
                        if (!ep2 || ep2->ep.name == ep0name) {
                                ret_val = -EOPNOTSUPP;
                                break;
                        }
                        ep2->halted = 1;
                        ret_val = 0;
                        *status = 0;
                }
                break;
        case USB_REQ_CLEAR_FEATURE:
                if (setup->bRequestType == Dev_Request) {
                        ret_val = 0;
                        switch (w_value) {
                        case USB_DEVICE_REMOTE_WAKEUP:
                                w_value = USB_DEVICE_REMOTE_WAKEUP;
                                break;
                        case USB_DEVICE_U1_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_U1_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        case USB_DEVICE_U2_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_U2_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        case USB_DEVICE_LTM_ENABLE:
                                if (dummy_hcd_to_hcd(dum_hcd)->speed ==
                                    HCD_USB3)
                                        w_value = USB_DEV_STAT_LTM_ENABLED;
                                else
                                        ret_val = -EOPNOTSUPP;
                                break;
                        default:
                                ret_val = -EOPNOTSUPP;
                                break;
                        }
                        if (ret_val == 0) {
                                dum->devstatus &= ~(1 << w_value);
                                *status = 0;
                        }
                } else if (setup->bRequestType == Ep_Request) {
                        /* endpoint halt */
                        ep2 = find_endpoint(dum, w_index);
                        if (!ep2) {
                                ret_val = -EOPNOTSUPP;
                                break;
                        }
                        if (!ep2->wedged)
                                ep2->halted = 0;
                        ret_val = 0;
                        *status = 0;
                }
                break;
        case USB_REQ_GET_STATUS:
                if (setup->bRequestType == Dev_InRequest
                                || setup->bRequestType == Intf_InRequest
                                || setup->bRequestType == Ep_InRequest) {
                        char *buf;
                        /*
                         * device: remote wakeup, selfpowered
                         * interface: nothing
                         * endpoint: halt
                         */
                        buf = (char *)urb->transfer_buffer;
                        if (urb->transfer_buffer_length > 0) {
                                if (setup->bRequestType == Ep_InRequest) {
                                        ep2 = find_endpoint(dum, w_index);
                                        if (!ep2) {
                                                ret_val = -EOPNOTSUPP;
                                                break;
                                        }
                                        buf[0] = ep2->halted;
                                } else if (setup->bRequestType ==
                                           Dev_InRequest) {
                                        buf[0] = (u8)dum->devstatus;
                                } else
                                        buf[0] = 0;
                        }
                        if (urb->transfer_buffer_length > 1)
                                buf[1] = 0;
                        urb->actual_length = min_t(u32, 2,
                                urb->transfer_buffer_length);
                        ret_val = 0;
                        *status = 0;
                }
                break;
        }
        return ret_val;
}

/*
 * Drive both sides of the transfers; looks like irq handlers to both
 * drivers except that the callbacks are invoked from soft interrupt
 * context.
 */
static enum hrtimer_restart dummy_timer(struct hrtimer *t)
{
        struct dummy_hcd        *dum_hcd = from_timer(dum_hcd, t, timer);
        struct dummy                *dum = dum_hcd->dum;
        struct urbp                *urbp, *tmp;
        unsigned long                flags;
        int                        limit, total;
        int                        i;

        /* simplistic model for one frame's bandwidth */
        /* FIXME: account for transaction and packet overhead */
        switch (dum->gadget.speed) {
        case USB_SPEED_LOW:
                total = 8/*bytes*/ * 12/*packets*/;
                break;
        case USB_SPEED_FULL:
                total = 64/*bytes*/ * 19/*packets*/;
                break;
        case USB_SPEED_HIGH:
                total = 512/*bytes*/ * 13/*packets*/ * 8/*uframes*/;
                break;
        case USB_SPEED_SUPER:
                /* Bus speed is 500000 bytes/ms, so use a little less */
                total = 490000;
                break;
        default:        /* Can't happen */
                dev_err(dummy_dev(dum_hcd), "bogus device speed\n");
                total = 0;
                break;
        }

        /* look at each urb queued by the host side driver */
        spin_lock_irqsave(&dum->lock, flags);

        if (!dum_hcd->udev) {
                dev_err(dummy_dev(dum_hcd),
                                "timer fired with no URBs pending?\n");
                spin_unlock_irqrestore(&dum->lock, flags);
                return HRTIMER_NORESTART;
        }
        dum_hcd->next_frame_urbp = NULL;

        for (i = 0; i < DUMMY_ENDPOINTS; i++) {
                if (!ep_info[i].name)
                        break;
                dum->ep[i].already_seen = 0;
        }

restart:
        list_for_each_entry_safe(urbp, tmp, &dum_hcd->urbp_list, urbp_list) {
                struct urb                *urb;
                struct dummy_request        *req;
                u8                        address;
                struct dummy_ep                *ep = NULL;
                int                        status = -EINPROGRESS;

                /* stop when we reach URBs queued after the timer interrupt */
                if (urbp == dum_hcd->next_frame_urbp)
                        break;

                urb = urbp->urb;
                if (urb->unlinked)
                        goto return_urb;
                else if (dum_hcd->rh_state != DUMMY_RH_RUNNING)
                        continue;

                /* Used up this frame's bandwidth? */
                if (total <= 0)
                        continue;

                /* find the gadget's ep for this request (if configured) */
                address = usb_pipeendpoint (urb->pipe);
                if (usb_urb_dir_in(urb))
                        address |= USB_DIR_IN;
                ep = find_endpoint(dum, address);
                if (!ep) {
                        /* set_configuration() disagreement */
                        dev_dbg(dummy_dev(dum_hcd),
                                "no ep configured for urb %p\n",
                                urb);
                        status = -EPROTO;
                        goto return_urb;
                }

                if (ep->already_seen)
                        continue;
                ep->already_seen = 1;
                if (ep == &dum->ep[0] && urb->error_count) {
                        ep->setup_stage = 1;        /* a new urb */
                        urb->error_count = 0;
                }
                if (ep->halted && !ep->setup_stage) {
                        /* NOTE: must not be iso! */
                        dev_dbg(dummy_dev(dum_hcd), "ep %s halted, urb %p\n",
                                        ep->ep.name, urb);
                        status = -EPIPE;
                        goto return_urb;
                }
                /* FIXME make sure both ends agree on maxpacket */

                /* handle control requests */
                if (ep == &dum->ep[0] && ep->setup_stage) {
                        struct usb_ctrlrequest                setup;
                        int                                value;

                        setup = *(struct usb_ctrlrequest *) urb->setup_packet;
                        /* paranoia, in case of stale queued data */
                        list_for_each_entry(req, &ep->queue, queue) {
                                list_del_init(&req->queue);
                                req->req.status = -EOVERFLOW;
                                dev_dbg(udc_dev(dum), "stale req = %p\n",
                                                req);

                                spin_unlock(&dum->lock);
                                usb_gadget_giveback_request(&ep->ep, &req->req);
                                spin_lock(&dum->lock);
                                ep->already_seen = 0;
                                goto restart;
                        }

                        /* gadget driver never sees set_address or operations
                         * on standard feature flags.  some hardware doesn't
                         * even expose them.
                         */
                        ep->last_io = jiffies;
                        ep->setup_stage = 0;
                        ep->halted = 0;

                        value = handle_control_request(dum_hcd, urb, &setup,
                                                       &status);

                        /* gadget driver handles all other requests.  block
                         * until setup() returns; no reentrancy issues etc.
                         */
                        if (value > 0) {
                                ++dum->callback_usage;
                                spin_unlock(&dum->lock);
                                value = dum->driver->setup(&dum->gadget,
                                                &setup);
                                spin_lock(&dum->lock);
                                --dum->callback_usage;

                                if (value >= 0) {
                                        /* no delays (max 64KB data stage) */
                                        limit = 64*1024;
                                        goto treat_control_like_bulk;
                                }
                                /* error, see below */
                        }

                        if (value < 0) {
                                if (value != -EOPNOTSUPP)
                                        dev_dbg(udc_dev(dum),
                                                "setup --> %d\n",
                                                value);
                                status = -EPIPE;
                                urb->actual_length = 0;
                        }

                        goto return_urb;
                }

                /* non-control requests */
                limit = total;
                switch (usb_pipetype(urb->pipe)) {
                case PIPE_ISOCHRONOUS:
                        /*
                         * We don't support isochronous.  But if we did,
                         * here are some of the issues we'd have to face:
                         *
                         * Is it urb->interval since the last xfer?
                         * Use urb->iso_frame_desc[i].
                         * Complete whether or not ep has requests queued.
                         * Report random errors, to debug drivers.
                         */
                        limit = max(limit, periodic_bytes(dum, ep));
                        status = -EINVAL;        /* fail all xfers */
                        break;

                case PIPE_INTERRUPT:
                        /* FIXME is it urb->interval since the last xfer?
                         * this almost certainly polls too fast.
                         */
                        limit = max(limit, periodic_bytes(dum, ep));
                        fallthrough;

                default:
treat_control_like_bulk:
                        ep->last_io = jiffies;
                        total -= transfer(dum_hcd, urb, ep, limit, &status);
                        break;
                }

                /* incomplete transfer? */
                if (status == -EINPROGRESS)
                        continue;

return_urb:
                list_del(&urbp->urbp_list);
                kfree(urbp);
                if (ep)
                        ep->already_seen = ep->setup_stage = 0;

                usb_hcd_unlink_urb_from_ep(dummy_hcd_to_hcd(dum_hcd), urb);
                spin_unlock(&dum->lock);
                usb_hcd_giveback_urb(dummy_hcd_to_hcd(dum_hcd), urb, status);
                spin_lock(&dum->lock);

                goto restart;
        }

        if (list_empty(&dum_hcd->urbp_list)) {
                usb_put_dev(dum_hcd->udev);
                dum_hcd->udev = NULL;
        } else if (dum_hcd->rh_state == DUMMY_RH_RUNNING) {
                /* want a 1 msec delay here */
                hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);
        }

        spin_unlock_irqrestore(&dum->lock, flags);

        return HRTIMER_NORESTART;
}

/*-------------------------------------------------------------------------*/

#define PORT_C_MASK \
        ((USB_PORT_STAT_C_CONNECTION \
        | USB_PORT_STAT_C_ENABLE \
        | USB_PORT_STAT_C_SUSPEND \
        | USB_PORT_STAT_C_OVERCURRENT \
        | USB_PORT_STAT_C_RESET) << 16)

static int dummy_hub_status(struct usb_hcd *hcd, char *buf)
{
        struct dummy_hcd        *dum_hcd;
        unsigned long                flags;
        int                        retval = 0;

        dum_hcd = hcd_to_dummy_hcd(hcd);

        spin_lock_irqsave(&dum_hcd->dum->lock, flags);
        if (!HCD_HW_ACCESSIBLE(hcd))
                goto done;

        if (dum_hcd->resuming && time_after_eq(jiffies, dum_hcd->re_timeout)) {
                dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16);
                dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND;
                set_link_state(dum_hcd);
        }

        if ((dum_hcd->port_status & PORT_C_MASK) != 0) {
                *buf = (1 << 1);
                dev_dbg(dummy_dev(dum_hcd), "port status 0x%08x has changes\n",
                                dum_hcd->port_status);
                retval = 1;
                if (dum_hcd->rh_state == DUMMY_RH_SUSPENDED)
                        usb_hcd_resume_root_hub(hcd);
        }
done:
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
        return retval;
}

/* usb 3.0 root hub device descriptor */
static struct {
        struct usb_bos_descriptor bos;
        struct usb_ss_cap_descriptor ss_cap;
} __packed usb3_bos_desc = {

        .bos = {
                .bLength                = USB_DT_BOS_SIZE,
                .bDescriptorType        = USB_DT_BOS,
                .wTotalLength                = cpu_to_le16(sizeof(usb3_bos_desc)),
                .bNumDeviceCaps                = 1,
        },
        .ss_cap = {
                .bLength                = USB_DT_USB_SS_CAP_SIZE,
                .bDescriptorType        = USB_DT_DEVICE_CAPABILITY,
                .bDevCapabilityType        = USB_SS_CAP_TYPE,
                .wSpeedSupported        = cpu_to_le16(USB_5GBPS_OPERATION),
                .bFunctionalitySupport        = ilog2(USB_5GBPS_OPERATION),
        },
};

static inline void
ss_hub_descriptor(struct usb_hub_descriptor *desc)
{
        memset(desc, 0, sizeof *desc);
        desc->bDescriptorType = USB_DT_SS_HUB;
        desc->bDescLength = 12;
        desc->wHubCharacteristics = cpu_to_le16(
                        HUB_CHAR_INDV_PORT_LPSM |
                        HUB_CHAR_COMMON_OCPM);
        desc->bNbrPorts = 1;
        desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/
        desc->u.ss.DeviceRemovable = 0;
}

static inline void hub_descriptor(struct usb_hub_descriptor *desc)
{
        memset(desc, 0, sizeof *desc);
        desc->bDescriptorType = USB_DT_HUB;
        desc->bDescLength = 9;
        desc->wHubCharacteristics = cpu_to_le16(
                        HUB_CHAR_INDV_PORT_LPSM |
                        HUB_CHAR_COMMON_OCPM);
        desc->bNbrPorts = 1;
        desc->u.hs.DeviceRemovable[0] = 0;
        desc->u.hs.DeviceRemovable[1] = 0xff;        /* PortPwrCtrlMask */
}

static int dummy_hub_control(
        struct usb_hcd        *hcd,
        u16                typeReq,
        u16                wValue,
        u16                wIndex,
        char                *buf,
        u16                wLength
) {
        struct dummy_hcd *dum_hcd;
        int                retval = 0;
        unsigned long        flags;

        if (!HCD_HW_ACCESSIBLE(hcd))
                return -ETIMEDOUT;

        dum_hcd = hcd_to_dummy_hcd(hcd);

        spin_lock_irqsave(&dum_hcd->dum->lock, flags);
        switch (typeReq) {
        case ClearHubFeature:
                break;
        case ClearPortFeature:
                switch (wValue) {
                case USB_PORT_FEAT_SUSPEND:
                        if (hcd->speed == HCD_USB3) {
                                dev_dbg(dummy_dev(dum_hcd),
                                         "USB_PORT_FEAT_SUSPEND req not "
                                         "supported for USB 3.0 roothub\n");
                                goto error;
                        }
                        if (dum_hcd->port_status & USB_PORT_STAT_SUSPEND) {
                                /* 20msec resume signaling */
                                dum_hcd->resuming = 1;
                                dum_hcd->re_timeout = jiffies +
                                                msecs_to_jiffies(20);
                        }
                        break;
                case USB_PORT_FEAT_POWER:
                        dev_dbg(dummy_dev(dum_hcd), "power-off\n");
                        if (hcd->speed == HCD_USB3)
                                dum_hcd->port_status &= ~USB_SS_PORT_STAT_POWER;
                        else
                                dum_hcd->port_status &= ~USB_PORT_STAT_POWER;
                        set_link_state(dum_hcd);
                        break;
                case USB_PORT_FEAT_ENABLE:
                case USB_PORT_FEAT_C_ENABLE:
                case USB_PORT_FEAT_C_SUSPEND:
                        /* Not allowed for USB-3 */
                        if (hcd->speed == HCD_USB3)
                                goto error;
                        fallthrough;
                case USB_PORT_FEAT_C_CONNECTION:
                case USB_PORT_FEAT_C_RESET:
                        dum_hcd->port_status &= ~(1 << wValue);
                        set_link_state(dum_hcd);
                        break;
                default:
                /* Disallow INDICATOR and C_OVER_CURRENT */
                        goto error;
                }
                break;
        case GetHubDescriptor:
                if (hcd->speed == HCD_USB3 &&
                                (wLength < USB_DT_SS_HUB_SIZE ||
                                 wValue != (USB_DT_SS_HUB << 8))) {
                        dev_dbg(dummy_dev(dum_hcd),
                                "Wrong hub descriptor type for "
                                "USB 3.0 roothub.\n");
                        goto error;
                }
                if (hcd->speed == HCD_USB3)
                        ss_hub_descriptor((struct usb_hub_descriptor *) buf);
                else
                        hub_descriptor((struct usb_hub_descriptor *) buf);
                break;

        case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
                if (hcd->speed != HCD_USB3)
                        goto error;

                if ((wValue >> 8) != USB_DT_BOS)
                        goto error;

                memcpy(buf, &usb3_bos_desc, sizeof(usb3_bos_desc));
                retval = sizeof(usb3_bos_desc);
                break;

        case GetHubStatus:
                *(__le32 *) buf = cpu_to_le32(0);
                break;
        case GetPortStatus:
                if (wIndex != 1)
                        retval = -EPIPE;

                /* whoever resets or resumes must GetPortStatus to
                 * complete it!!
                 */
                if (dum_hcd->resuming &&
                                time_after_eq(jiffies, dum_hcd->re_timeout)) {
                        dum_hcd->port_status |= (USB_PORT_STAT_C_SUSPEND << 16);
                        dum_hcd->port_status &= ~USB_PORT_STAT_SUSPEND;
                }
                if ((dum_hcd->port_status & USB_PORT_STAT_RESET) != 0 &&
                                time_after_eq(jiffies, dum_hcd->re_timeout)) {
                        dum_hcd->port_status |= (USB_PORT_STAT_C_RESET << 16);
                        dum_hcd->port_status &= ~USB_PORT_STAT_RESET;
                        if (dum_hcd->dum->pullup) {
                                dum_hcd->port_status |= USB_PORT_STAT_ENABLE;

                                if (hcd->speed < HCD_USB3) {
                                        switch (dum_hcd->dum->gadget.speed) {
                                        case USB_SPEED_HIGH:
                                                dum_hcd->port_status |=
                                                      USB_PORT_STAT_HIGH_SPEED;
                                                break;
                                        case USB_SPEED_LOW:
                                                dum_hcd->dum->gadget.ep0->
                                                        maxpacket = 8;
                                                dum_hcd->port_status |=
                                                        USB_PORT_STAT_LOW_SPEED;
                                                break;
                                        default:
                                                break;
                                        }
                                }
                        }
                }
                set_link_state(dum_hcd);
                ((__le16 *) buf)[0] = cpu_to_le16(dum_hcd->port_status);
                ((__le16 *) buf)[1] = cpu_to_le16(dum_hcd->port_status >> 16);
                break;
        case SetHubFeature:
                retval = -EPIPE;
                break;
        case SetPortFeature:
                switch (wValue) {
                case USB_PORT_FEAT_LINK_STATE:
                        if (hcd->speed != HCD_USB3) {
                                dev_dbg(dummy_dev(dum_hcd),
                                         "USB_PORT_FEAT_LINK_STATE req not "
                                         "supported for USB 2.0 roothub\n");
                                goto error;
                        }
                        /*
                         * Since this is dummy we don't have an actual link so
                         * there is nothing to do for the SET_LINK_STATE cmd
                         */
                        break;
                case USB_PORT_FEAT_U1_TIMEOUT:
                case USB_PORT_FEAT_U2_TIMEOUT:
                        /* TODO: add suspend/resume support! */
                        if (hcd->speed != HCD_USB3) {
                                dev_dbg(dummy_dev(dum_hcd),
                                         "USB_PORT_FEAT_U1/2_TIMEOUT req not "
                                         "supported for USB 2.0 roothub\n");
                                goto error;
                        }
                        break;
                case USB_PORT_FEAT_SUSPEND:
                        /* Applicable only for USB2.0 hub */
                        if (hcd->speed == HCD_USB3) {
                                dev_dbg(dummy_dev(dum_hcd),
                                         "USB_PORT_FEAT_SUSPEND req not "
                                         "supported for USB 3.0 roothub\n");
                                goto error;
                        }
                        if (dum_hcd->active) {
                                dum_hcd->port_status |= USB_PORT_STAT_SUSPEND;

                                /* HNP would happen here; for now we
                                 * assume b_bus_req is always true.
                                 */
                                set_link_state(dum_hcd);
                                if (((1 << USB_DEVICE_B_HNP_ENABLE)
                                                & dum_hcd->dum->devstatus) != 0)
                                        dev_dbg(dummy_dev(dum_hcd),
                                                        "no HNP yet!\n");
                        }
                        break;
                case USB_PORT_FEAT_POWER:
                        if (hcd->speed == HCD_USB3)
                                dum_hcd->port_status |= USB_SS_PORT_STAT_POWER;
                        else
                                dum_hcd->port_status |= USB_PORT_STAT_POWER;
                        set_link_state(dum_hcd);
                        break;
                case USB_PORT_FEAT_BH_PORT_RESET:
                        /* Applicable only for USB3.0 hub */
                        if (hcd->speed != HCD_USB3) {
                                dev_dbg(dummy_dev(dum_hcd),
                                         "USB_PORT_FEAT_BH_PORT_RESET req not "
                                         "supported for USB 2.0 roothub\n");
                                goto error;
                        }
                        fallthrough;
                case USB_PORT_FEAT_RESET:
                        if (!(dum_hcd->port_status & USB_PORT_STAT_CONNECTION))
                                break;
                        /* if it's already enabled, disable */
                        if (hcd->speed == HCD_USB3) {
                                dum_hcd->port_status =
                                        (USB_SS_PORT_STAT_POWER |
                                         USB_PORT_STAT_CONNECTION |
                                         USB_PORT_STAT_RESET);
                        } else {
                                dum_hcd->port_status &= ~(USB_PORT_STAT_ENABLE
                                        | USB_PORT_STAT_LOW_SPEED
                                        | USB_PORT_STAT_HIGH_SPEED);
                                dum_hcd->port_status |= USB_PORT_STAT_RESET;
                        }
                        /*
                         * We want to reset device status. All but the
                         * Self powered feature
                         */
                        dum_hcd->dum->devstatus &=
                                (1 << USB_DEVICE_SELF_POWERED);
                        /*
                         * FIXME USB3.0: what is the correct reset signaling
                         * interval? Is it still 50msec as for HS?
                         */
                        dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50);
                        set_link_state(dum_hcd);
                        break;
                case USB_PORT_FEAT_C_CONNECTION:
                case USB_PORT_FEAT_C_RESET:
                case USB_PORT_FEAT_C_ENABLE:
                case USB_PORT_FEAT_C_SUSPEND:
                        /* Not allowed for USB-3, and ignored for USB-2 */
                        if (hcd->speed == HCD_USB3)
                                goto error;
                        break;
                default:
                /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */
                        goto error;
                }
                break;
        case GetPortErrorCount:
                if (hcd->speed != HCD_USB3) {
                        dev_dbg(dummy_dev(dum_hcd),
                                 "GetPortErrorCount req not "
                                 "supported for USB 2.0 roothub\n");
                        goto error;
                }
                /* We'll always return 0 since this is a dummy hub */
                *(__le32 *) buf = cpu_to_le32(0);
                break;
        case SetHubDepth:
                if (hcd->speed != HCD_USB3) {
                        dev_dbg(dummy_dev(dum_hcd),
                                 "SetHubDepth req not supported for "
                                 "USB 2.0 roothub\n");
                        goto error;
                }
                break;
        default:
                dev_dbg(dummy_dev(dum_hcd),
                        "hub control req%04x v%04x i%04x l%d\n",
                        typeReq, wValue, wIndex, wLength);
error:
                /* "protocol stall" on error */
                retval = -EPIPE;
        }
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);

        if ((dum_hcd->port_status & PORT_C_MASK) != 0)
                usb_hcd_poll_rh_status(hcd);
        return retval;
}

static int dummy_bus_suspend(struct usb_hcd *hcd)
{
        struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd);

        dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);

        spin_lock_irq(&dum_hcd->dum->lock);
        dum_hcd->rh_state = DUMMY_RH_SUSPENDED;
        set_link_state(dum_hcd);
        hcd->state = HC_STATE_SUSPENDED;
        spin_unlock_irq(&dum_hcd->dum->lock);
        return 0;
}

static int dummy_bus_resume(struct usb_hcd *hcd)
{
        struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd);
        int rc = 0;

        dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);

        spin_lock_irq(&dum_hcd->dum->lock);
        if (!HCD_HW_ACCESSIBLE(hcd)) {
                rc = -ESHUTDOWN;
        } else {
                dum_hcd->rh_state = DUMMY_RH_RUNNING;
                set_link_state(dum_hcd);
                if (!list_empty(&dum_hcd->urbp_list))
                        hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
                hcd->state = HC_STATE_RUNNING;
        }
        spin_unlock_irq(&dum_hcd->dum->lock);
        return rc;
}

/*-------------------------------------------------------------------------*/

static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb)
{
        int ep = usb_pipeendpoint(urb->pipe);

        return scnprintf(buf, size,
                "urb/%p %s ep%d%s%s len %d/%d\n",
                urb,
                ({ char *s;
                switch (urb->dev->speed) {
                case USB_SPEED_LOW:
                        s = "ls";
                        break;
                case USB_SPEED_FULL:
                        s = "fs";
                        break;
                case USB_SPEED_HIGH:
                        s = "hs";
                        break;
                case USB_SPEED_SUPER:
                        s = "ss";
                        break;
                default:
                        s = "?";
                        break;
                 } s; }),
                ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "",
                ({ char *s; \
                switch (usb_pipetype(urb->pipe)) { \
                case PIPE_CONTROL: \
                        s = ""; \
                        break; \
                case PIPE_BULK: \
                        s = "-bulk"; \
                        break; \
                case PIPE_INTERRUPT: \
                        s = "-int"; \
                        break; \
                default: \
                        s = "-iso"; \
                        break; \
                } s; }),
                urb->actual_length, urb->transfer_buffer_length);
}

static ssize_t urbs_show(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct usb_hcd                *hcd = dev_get_drvdata(dev);
        struct dummy_hcd        *dum_hcd = hcd_to_dummy_hcd(hcd);
        struct urbp                *urbp;
        size_t                        size = 0;
        unsigned long                flags;

        spin_lock_irqsave(&dum_hcd->dum->lock, flags);
        list_for_each_entry(urbp, &dum_hcd->urbp_list, urbp_list) {
                size_t                temp;

                temp = show_urb(buf, PAGE_SIZE - size, urbp->urb);
                buf += temp;
                size += temp;
        }
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);

        return size;
}
static DEVICE_ATTR_RO(urbs);

static int dummy_start_ss(struct dummy_hcd *dum_hcd)
{
        hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        dum_hcd->timer.function = dummy_timer;
        dum_hcd->rh_state = DUMMY_RH_RUNNING;
        dum_hcd->stream_en_ep = 0;
        INIT_LIST_HEAD(&dum_hcd->urbp_list);
        dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3;
        dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING;
        dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1;
#ifdef CONFIG_USB_OTG
        dummy_hcd_to_hcd(dum_hcd)->self.otg_port = 1;
#endif
        return 0;

        /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */
        return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs);
}

static int dummy_start(struct usb_hcd *hcd)
{
        struct dummy_hcd        *dum_hcd = hcd_to_dummy_hcd(hcd);

        /*
         * HOST side init ... we emulate a root hub that'll only ever
         * talk to one device (the gadget side).  Also appears in sysfs,
         * just like more familiar pci-based HCDs.
         */
        if (!usb_hcd_is_primary_hcd(hcd))
                return dummy_start_ss(dum_hcd);

        spin_lock_init(&dum_hcd->dum->lock);
        hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        dum_hcd->timer.function = dummy_timer;
        dum_hcd->rh_state = DUMMY_RH_RUNNING;

        INIT_LIST_HEAD(&dum_hcd->urbp_list);

        hcd->power_budget = POWER_BUDGET;
        hcd->state = HC_STATE_RUNNING;
        hcd->uses_new_polling = 1;

#ifdef CONFIG_USB_OTG
        hcd->self.otg_port = 1;
#endif

        /* FIXME 'urbs' should be a per-device thing, maybe in usbcore */
        return device_create_file(dummy_dev(dum_hcd), &dev_attr_urbs);
}

static void dummy_stop(struct usb_hcd *hcd)
{
        struct dummy_hcd        *dum_hcd = hcd_to_dummy_hcd(hcd);

        hrtimer_cancel(&dum_hcd->timer);
        device_remove_file(dummy_dev(dum_hcd), &dev_attr_urbs);
        dev_info(dummy_dev(dum_hcd), "stopped\n");
}

/*-------------------------------------------------------------------------*/

static int dummy_h_get_frame(struct usb_hcd *hcd)
{
        return dummy_g_get_frame(NULL);
}

static int dummy_setup(struct usb_hcd *hcd)
{
        struct dummy *dum;

        dum = *((void **)dev_get_platdata(hcd->self.controller));
        hcd->self.sg_tablesize = ~0;
        if (usb_hcd_is_primary_hcd(hcd)) {
                dum->hs_hcd = hcd_to_dummy_hcd(hcd);
                dum->hs_hcd->dum = dum;
                /*
                 * Mark the first roothub as being USB 2.0.
                 * The USB 3.0 roothub will be registered later by
                 * dummy_hcd_probe()
                 */
                hcd->speed = HCD_USB2;
                hcd->self.root_hub->speed = USB_SPEED_HIGH;
        } else {
                dum->ss_hcd = hcd_to_dummy_hcd(hcd);
                dum->ss_hcd->dum = dum;
                hcd->speed = HCD_USB3;
                hcd->self.root_hub->speed = USB_SPEED_SUPER;
        }
        return 0;
}

/* Change a group of bulk endpoints to support multiple stream IDs */
static int dummy_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev,
        struct usb_host_endpoint **eps, unsigned int num_eps,
        unsigned int num_streams, gfp_t mem_flags)
{
        struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd);
        unsigned long flags;
        int max_stream;
        int ret_streams = num_streams;
        unsigned int index;
        unsigned int i;

        if (!num_eps)
                return -EINVAL;

        spin_lock_irqsave(&dum_hcd->dum->lock, flags);
        for (i = 0; i < num_eps; i++) {
                index = dummy_get_ep_idx(&eps[i]->desc);
                if ((1 << index) & dum_hcd->stream_en_ep) {
                        ret_streams = -EINVAL;
                        goto out;
                }
                max_stream = usb_ss_max_streams(&eps[i]->ss_ep_comp);
                if (!max_stream) {
                        ret_streams = -EINVAL;
                        goto out;
                }
                if (max_stream < ret_streams) {
                        dev_dbg(dummy_dev(dum_hcd), "Ep 0x%x only supports %u "
                                        "stream IDs.\n",
                                        eps[i]->desc.bEndpointAddress,
                                        max_stream);
                        ret_streams = max_stream;
                }
        }

        for (i = 0; i < num_eps; i++) {
                index = dummy_get_ep_idx(&eps[i]->desc);
                dum_hcd->stream_en_ep |= 1 << index;
                set_max_streams_for_pipe(dum_hcd,
                                usb_endpoint_num(&eps[i]->desc), ret_streams);
        }
out:
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
        return ret_streams;
}

/* Reverts a group of bulk endpoints back to not using stream IDs. */
static int dummy_free_streams(struct usb_hcd *hcd, struct usb_device *udev,
        struct usb_host_endpoint **eps, unsigned int num_eps,
        gfp_t mem_flags)
{
        struct dummy_hcd *dum_hcd = hcd_to_dummy_hcd(hcd);
        unsigned long flags;
        int ret;
        unsigned int index;
        unsigned int i;

        spin_lock_irqsave(&dum_hcd->dum->lock, flags);
        for (i = 0; i < num_eps; i++) {
                index = dummy_get_ep_idx(&eps[i]->desc);
                if (!((1 << index) & dum_hcd->stream_en_ep)) {
                        ret = -EINVAL;
                        goto out;
                }
        }

        for (i = 0; i < num_eps; i++) {
                index = dummy_get_ep_idx(&eps[i]->desc);
                dum_hcd->stream_en_ep &= ~(1 << index);
                set_max_streams_for_pipe(dum_hcd,
                                usb_endpoint_num(&eps[i]->desc), 0);
        }
        ret = 0;
out:
        spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
        return ret;
}

static struct hc_driver dummy_hcd = {
        .description =                (char *) driver_name,
        .product_desc =                "Dummy host controller",
        .hcd_priv_size =        sizeof(struct dummy_hcd),

        .reset =                dummy_setup,
        .start =                dummy_start,
        .stop =                        dummy_stop,

        .urb_enqueue =                dummy_urb_enqueue,
        .urb_dequeue =                dummy_urb_dequeue,

        .get_frame_number =        dummy_h_get_frame,

        .hub_status_data =        dummy_hub_status,
        .hub_control =                dummy_hub_control,
        .bus_suspend =                dummy_bus_suspend,
        .bus_resume =                dummy_bus_resume,

        .alloc_streams =        dummy_alloc_streams,
        .free_streams =                dummy_free_streams,
};

static int dummy_hcd_probe(struct platform_device *pdev)
{
        struct dummy                *dum;
        struct usb_hcd                *hs_hcd;
        struct usb_hcd                *ss_hcd;
        int                        retval;

        dev_info(&pdev->dev, "%s, driver " DRIVER_VERSION "\n", driver_desc);
        dum = *((void **)dev_get_platdata(&pdev->dev));

        if (mod_data.is_super_speed)
                dummy_hcd.flags = HCD_USB3 | HCD_SHARED;
        else if (mod_data.is_high_speed)
                dummy_hcd.flags = HCD_USB2;
        else
                dummy_hcd.flags = HCD_USB11;
        hs_hcd = usb_create_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev));
        if (!hs_hcd)
                return -ENOMEM;
        hs_hcd->has_tt = 1;

        retval = usb_add_hcd(hs_hcd, 0, 0);
        if (retval)
                goto put_usb2_hcd;

        if (mod_data.is_super_speed) {
                ss_hcd = usb_create_shared_hcd(&dummy_hcd, &pdev->dev,
                                        dev_name(&pdev->dev), hs_hcd);
                if (!ss_hcd) {
                        retval = -ENOMEM;
                        goto dealloc_usb2_hcd;
                }

                retval = usb_add_hcd(ss_hcd, 0, 0);
                if (retval)
                        goto put_usb3_hcd;
        }
        return 0;

put_usb3_hcd:
        usb_put_hcd(ss_hcd);
dealloc_usb2_hcd:
        usb_remove_hcd(hs_hcd);
put_usb2_hcd:
        usb_put_hcd(hs_hcd);
        dum->hs_hcd = dum->ss_hcd = NULL;
        return retval;
}

static void dummy_hcd_remove(struct platform_device *pdev)
{
        struct dummy                *dum;

        dum = hcd_to_dummy_hcd(platform_get_drvdata(pdev))->dum;

        if (dum->ss_hcd) {
                usb_remove_hcd(dummy_hcd_to_hcd(dum->ss_hcd));
                usb_put_hcd(dummy_hcd_to_hcd(dum->ss_hcd));
        }

        usb_remove_hcd(dummy_hcd_to_hcd(dum->hs_hcd));
        usb_put_hcd(dummy_hcd_to_hcd(dum->hs_hcd));

        dum->hs_hcd = NULL;
        dum->ss_hcd = NULL;
}

static int dummy_hcd_suspend(struct platform_device *pdev, pm_message_t state)
{
        struct usb_hcd                *hcd;
        struct dummy_hcd        *dum_hcd;
        int                        rc = 0;

        dev_dbg(&pdev->dev, "%s\n", __func__);

        hcd = platform_get_drvdata(pdev);
        dum_hcd = hcd_to_dummy_hcd(hcd);
        if (dum_hcd->rh_state == DUMMY_RH_RUNNING) {
                dev_warn(&pdev->dev, "Root hub isn't suspended!\n");
                rc = -EBUSY;
        } else
                clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
        return rc;
}

static int dummy_hcd_resume(struct platform_device *pdev)
{
        struct usb_hcd                *hcd;

        dev_dbg(&pdev->dev, "%s\n", __func__);

        hcd = platform_get_drvdata(pdev);
        set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
        usb_hcd_poll_rh_status(hcd);
        return 0;
}

static struct platform_driver dummy_hcd_driver = {
        .probe                = dummy_hcd_probe,
        .remove_new        = dummy_hcd_remove,
        .suspend        = dummy_hcd_suspend,
        .resume                = dummy_hcd_resume,
        .driver                = {
                .name        = driver_name,
        },
};

/*-------------------------------------------------------------------------*/
#define MAX_NUM_UDC        32
static struct platform_device *the_udc_pdev[MAX_NUM_UDC];
static struct platform_device *the_hcd_pdev[MAX_NUM_UDC];

static int __init dummy_hcd_init(void)
{
        int        retval = -ENOMEM;
        int        i;
        struct        dummy *dum[MAX_NUM_UDC] = {};

        if (usb_disabled())
                return -ENODEV;

        if (!mod_data.is_high_speed && mod_data.is_super_speed)
                return -EINVAL;

        if (mod_data.num < 1 || mod_data.num > MAX_NUM_UDC) {
                pr_err("Number of emulated UDC must be in range of 1...%d\n",
                                MAX_NUM_UDC);
                return -EINVAL;
        }

        for (i = 0; i < mod_data.num; i++) {
                the_hcd_pdev[i] = platform_device_alloc(driver_name, i);
                if (!the_hcd_pdev[i]) {
                        i--;
                        while (i >= 0)
                                platform_device_put(the_hcd_pdev[i--]);
                        return retval;
                }
        }
        for (i = 0; i < mod_data.num; i++) {
                the_udc_pdev[i] = platform_device_alloc(gadget_name, i);
                if (!the_udc_pdev[i]) {
                        i--;
                        while (i >= 0)
                                platform_device_put(the_udc_pdev[i--]);
                        goto err_alloc_udc;
                }
        }
        for (i = 0; i < mod_data.num; i++) {
                dum[i] = kzalloc(sizeof(struct dummy), GFP_KERNEL);
                if (!dum[i]) {
                        retval = -ENOMEM;
                        goto err_add_pdata;
                }
                retval = platform_device_add_data(the_hcd_pdev[i], &dum[i],
                                sizeof(void *));
                if (retval)
                        goto err_add_pdata;
                retval = platform_device_add_data(the_udc_pdev[i], &dum[i],
                                sizeof(void *));
                if (retval)
                        goto err_add_pdata;
        }

        retval = platform_driver_register(&dummy_hcd_driver);
        if (retval < 0)
                goto err_add_pdata;
        retval = platform_driver_register(&dummy_udc_driver);
        if (retval < 0)
                goto err_register_udc_driver;

        for (i = 0; i < mod_data.num; i++) {
                retval = platform_device_add(the_hcd_pdev[i]);
                if (retval < 0) {
                        i--;
                        while (i >= 0)
                                platform_device_del(the_hcd_pdev[i--]);
                        goto err_add_hcd;
                }
        }
        for (i = 0; i < mod_data.num; i++) {
                if (!dum[i]->hs_hcd ||
                                (!dum[i]->ss_hcd && mod_data.is_super_speed)) {
                        /*
                         * The hcd was added successfully but its probe
                         * function failed for some reason.
                         */
                        retval = -EINVAL;
                        goto err_add_udc;
                }
        }

        for (i = 0; i < mod_data.num; i++) {
                retval = platform_device_add(the_udc_pdev[i]);
                if (retval < 0) {
                        i--;
                        while (i >= 0)
                                platform_device_del(the_udc_pdev[i--]);
                        goto err_add_udc;
                }
        }

        for (i = 0; i < mod_data.num; i++) {
                if (!platform_get_drvdata(the_udc_pdev[i])) {
                        /*
                         * The udc was added successfully but its probe
                         * function failed for some reason.
                         */
                        retval = -EINVAL;
                        goto err_probe_udc;
                }
        }
        return retval;

err_probe_udc:
        for (i = 0; i < mod_data.num; i++)
                platform_device_del(the_udc_pdev[i]);
err_add_udc:
        for (i = 0; i < mod_data.num; i++)
                platform_device_del(the_hcd_pdev[i]);
err_add_hcd:
        platform_driver_unregister(&dummy_udc_driver);
err_register_udc_driver:
        platform_driver_unregister(&dummy_hcd_driver);
err_add_pdata:
        for (i = 0; i < mod_data.num; i++)
                kfree(dum[i]);
        for (i = 0; i < mod_data.num; i++)
                platform_device_put(the_udc_pdev[i]);
err_alloc_udc:
        for (i = 0; i < mod_data.num; i++)
                platform_device_put(the_hcd_pdev[i]);
        return retval;
}
module_init(dummy_hcd_init);

static void __exit dummy_hcd_cleanup(void)
{
        int i;

        for (i = 0; i < mod_data.num; i++) {
                struct dummy *dum;

                dum = *((void **)dev_get_platdata(&the_udc_pdev[i]->dev));

                platform_device_unregister(the_udc_pdev[i]);
                platform_device_unregister(the_hcd_pdev[i]);
                kfree(dum);
        }
        platform_driver_unregister(&dummy_udc_driver);
        platform_driver_unregister(&dummy_hcd_driver);
}
module_exit(dummy_hcd_cleanup);


























    8 


    8 

    8 




















































































































































































































    8 
    8 

























    8 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* I/O iterator iteration building functions.
 *
 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#ifndef _LINUX_IOV_ITER_H
#define _LINUX_IOV_ITER_H

#include <linux/uio.h>
#include <linux/bvec.h>

typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
                             void *priv, void *priv2);
typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
                              void *priv, void *priv2);

/*
 * Handle ITER_UBUF.
 */
static __always_inline
size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                    iov_ustep_f step)
{
        void __user *base = iter->ubuf;
        size_t progress = 0, remain;

        remain = step(base + iter->iov_offset, 0, len, priv, priv2);
        progress = len - remain;
        iter->iov_offset += progress;
        iter->count -= progress;
        return progress;
}

/*
 * Handle ITER_IOVEC.
 */
static __always_inline
size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                     iov_ustep_f step)
{
        const struct iovec *p = iter->__iov;
        size_t progress = 0, skip = iter->iov_offset;

        do {
                size_t remain, consumed;
                size_t part = min(len, p->iov_len - skip);

                if (likely(part)) {
                        remain = step(p->iov_base + skip, progress, part, priv, priv2);
                        consumed = part - remain;
                        progress += consumed;
                        skip += consumed;
                        len -= consumed;
                        if (skip < p->iov_len)
                                break;
                }
                p++;
                skip = 0;
        } while (len);

        iter->nr_segs -= p - iter->__iov;
        iter->__iov = p;
        iter->iov_offset = skip;
        iter->count -= progress;
        return progress;
}

/*
 * Handle ITER_KVEC.
 */
static __always_inline
size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                    iov_step_f step)
{
        const struct kvec *p = iter->kvec;
        size_t progress = 0, skip = iter->iov_offset;

        do {
                size_t remain, consumed;
                size_t part = min(len, p->iov_len - skip);

                if (likely(part)) {
                        remain = step(p->iov_base + skip, progress, part, priv, priv2);
                        consumed = part - remain;
                        progress += consumed;
                        skip += consumed;
                        len -= consumed;
                        if (skip < p->iov_len)
                                break;
                }
                p++;
                skip = 0;
        } while (len);

        iter->nr_segs -= p - iter->kvec;
        iter->kvec = p;
        iter->iov_offset = skip;
        iter->count -= progress;
        return progress;
}

/*
 * Handle ITER_BVEC.
 */
static __always_inline
size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                    iov_step_f step)
{
        const struct bio_vec *p = iter->bvec;
        size_t progress = 0, skip = iter->iov_offset;

        do {
                size_t remain, consumed;
                size_t offset = p->bv_offset + skip, part;
                void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);

                part = min3(len,
                           (size_t)(p->bv_len - skip),
                           (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
                remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
                kunmap_local(kaddr);
                consumed = part - remain;
                len -= consumed;
                progress += consumed;
                skip += consumed;
                if (skip >= p->bv_len) {
                        skip = 0;
                        p++;
                }
                if (remain)
                        break;
        } while (len);

        iter->nr_segs -= p - iter->bvec;
        iter->bvec = p;
        iter->iov_offset = skip;
        iter->count -= progress;
        return progress;
}

/*
 * Handle ITER_XARRAY.
 */
static __always_inline
size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                      iov_step_f step)
{
        struct folio *folio;
        size_t progress = 0;
        loff_t start = iter->xarray_start + iter->iov_offset;
        pgoff_t index = start / PAGE_SIZE;
        XA_STATE(xas, iter->xarray, index);

        rcu_read_lock();
        xas_for_each(&xas, folio, ULONG_MAX) {
                size_t remain, consumed, offset, part, flen;

                if (xas_retry(&xas, folio))
                        continue;
                if (WARN_ON(xa_is_value(folio)))
                        break;
                if (WARN_ON(folio_test_hugetlb(folio)))
                        break;

                offset = offset_in_folio(folio, start + progress);
                flen = min(folio_size(folio) - offset, len);

                while (flen) {
                        void *base = kmap_local_folio(folio, offset);

                        part = min_t(size_t, flen,
                                     PAGE_SIZE - offset_in_page(offset));
                        remain = step(base, progress, part, priv, priv2);
                        kunmap_local(base);

                        consumed = part - remain;
                        progress += consumed;
                        len -= consumed;

                        if (remain || len == 0)
                                goto out;
                        flen -= consumed;
                        offset += consumed;
                }
        }

out:
        rcu_read_unlock();
        iter->iov_offset += progress;
        iter->count -= progress;
        return progress;
}

/*
 * Handle ITER_DISCARD.
 */
static __always_inline
size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
                      iov_step_f step)
{
        size_t progress = len;

        iter->count -= progress;
        return progress;
}

/**
 * iterate_and_advance2 - Iterate over an iterator
 * @iter: The iterator to iterate over.
 * @len: The amount to iterate over.
 * @priv: Data for the step functions.
 * @priv2: More data for the step functions.
 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
 * @step: Function for other iterators; given kernel addresses.
 *
 * Iterate over the next part of an iterator, up to the specified length.  The
 * buffer is presented in segments, which for kernel iteration are broken up by
 * physical pages and mapped, with the mapped address being presented.
 *
 * Two step functions, @step and @ustep, must be provided, one for handling
 * mapped kernel addresses and the other is given user addresses which have the
 * potential to fault since no pinning is performed.
 *
 * The step functions are passed the address and length of the segment, @priv,
 * @priv2 and the amount of data so far iterated over (which can, for example,
 * be added to @priv to point to the right part of a second buffer).  The step
 * functions should return the amount of the segment they didn't process (ie. 0
 * indicates complete processsing).
 *
 * This function returns the amount of data processed (ie. 0 means nothing was
 * processed and the value of @len means processes to completion).
 */
static __always_inline
size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
                            void *priv2, iov_ustep_f ustep, iov_step_f step)
{
        if (unlikely(iter->count < len))
                len = iter->count;
        if (unlikely(!len))
                return 0;

        if (likely(iter_is_ubuf(iter)))
                return iterate_ubuf(iter, len, priv, priv2, ustep);
        if (likely(iter_is_iovec(iter)))
                return iterate_iovec(iter, len, priv, priv2, ustep);
        if (iov_iter_is_bvec(iter))
                return iterate_bvec(iter, len, priv, priv2, step);
        if (iov_iter_is_kvec(iter))
                return iterate_kvec(iter, len, priv, priv2, step);
        if (iov_iter_is_xarray(iter))
                return iterate_xarray(iter, len, priv, priv2, step);
        return iterate_discard(iter, len, priv, priv2, step);
}

/**
 * iterate_and_advance - Iterate over an iterator
 * @iter: The iterator to iterate over.
 * @len: The amount to iterate over.
 * @priv: Data for the step functions.
 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
 * @step: Function for other iterators; given kernel addresses.
 *
 * As iterate_and_advance2(), but priv2 is always NULL.
 */
static __always_inline
size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
                           iov_ustep_f ustep, iov_step_f step)
{
        return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
}

#endif /* _LINUX_IOV_ITER_H */















































   14 

   14 





























































    1 


    1 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 

    1 















    1 





    1 













































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Resizable, Scalable, Concurrent Hash Table
 *
 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
 *
 * Code partially derived from nft_hash
 * Rewritten with rehash code from br_multicast plus single list
 * pointer as suggested by Josh Triplett
 */

#include <linux/atomic.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/rculist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <linux/rhashtable.h>
#include <linux/err.h>
#include <linux/export.h>

#define HASH_DEFAULT_SIZE        64UL
#define HASH_MIN_SIZE                4U

union nested_table {
        union nested_table __rcu *table;
        struct rhash_lock_head __rcu *bucket;
};

static u32 head_hashfn(struct rhashtable *ht,
                       const struct bucket_table *tbl,
                       const struct rhash_head *he)
{
        return rht_head_hashfn(ht, tbl, he, ht->p);
}

#ifdef CONFIG_PROVE_LOCKING
#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))

int lockdep_rht_mutex_is_held(struct rhashtable *ht)
{
        return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
}
EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);

int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
{
        if (!debug_locks)
                return 1;
        if (unlikely(tbl->nest))
                return 1;
        return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
}
EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
#else
#define ASSERT_RHT_MUTEX(HT)
#endif

static inline union nested_table *nested_table_top(
        const struct bucket_table *tbl)
{
        /* The top-level bucket entry does not need RCU protection
         * because it's set at the same time as tbl->nest.
         */
        return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
}

static void nested_table_free(union nested_table *ntbl, unsigned int size)
{
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        const unsigned int len = 1 << shift;
        unsigned int i;

        ntbl = rcu_dereference_protected(ntbl->table, 1);
        if (!ntbl)
                return;

        if (size > len) {
                size >>= shift;
                for (i = 0; i < len; i++)
                        nested_table_free(ntbl + i, size);
        }

        kfree(ntbl);
}

static void nested_bucket_table_free(const struct bucket_table *tbl)
{
        unsigned int size = tbl->size >> tbl->nest;
        unsigned int len = 1 << tbl->nest;
        union nested_table *ntbl;
        unsigned int i;

        ntbl = nested_table_top(tbl);

        for (i = 0; i < len; i++)
                nested_table_free(ntbl + i, size);

        kfree(ntbl);
}

static void bucket_table_free(const struct bucket_table *tbl)
{
        if (tbl->nest)
                nested_bucket_table_free(tbl);

        kvfree(tbl);
}

static void bucket_table_free_rcu(struct rcu_head *head)
{
        bucket_table_free(container_of(head, struct bucket_table, rcu));
}

static union nested_table *nested_table_alloc(struct rhashtable *ht,
                                              union nested_table __rcu **prev,
                                              bool leaf)
{
        union nested_table *ntbl;
        int i;

        ntbl = rcu_dereference(*prev);
        if (ntbl)
                return ntbl;

        ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);

        if (ntbl && leaf) {
                for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
                        INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
        }

        if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
                return ntbl;
        /* Raced with another thread. */
        kfree(ntbl);
        return rcu_dereference(*prev);
}

static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
                                                      size_t nbuckets,
                                                      gfp_t gfp)
{
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        struct bucket_table *tbl;
        size_t size;

        if (nbuckets < (1 << (shift + 1)))
                return NULL;

        size = sizeof(*tbl) + sizeof(tbl->buckets[0]);

        tbl = kzalloc(size, gfp);
        if (!tbl)
                return NULL;

        if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
                                false)) {
                kfree(tbl);
                return NULL;
        }

        tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;

        return tbl;
}

static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
                                               size_t nbuckets,
                                               gfp_t gfp)
{
        struct bucket_table *tbl = NULL;
        size_t size;
        int i;
        static struct lock_class_key __key;

        tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);

        size = nbuckets;

        if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
                tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
                nbuckets = 0;
        }

        if (tbl == NULL)
                return NULL;

        lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);

        tbl->size = size;

        rcu_head_init(&tbl->rcu);
        INIT_LIST_HEAD(&tbl->walkers);

        tbl->hash_rnd = get_random_u32();

        for (i = 0; i < nbuckets; i++)
                INIT_RHT_NULLS_HEAD(tbl->buckets[i]);

        return tbl;
}

static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
                                                  struct bucket_table *tbl)
{
        struct bucket_table *new_tbl;

        do {
                new_tbl = tbl;
                tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        } while (tbl);

        return new_tbl;
}

static int rhashtable_rehash_one(struct rhashtable *ht,
                                 struct rhash_lock_head __rcu **bkt,
                                 unsigned int old_hash)
{
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
        struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
        int err = -EAGAIN;
        struct rhash_head *head, *next, *entry;
        struct rhash_head __rcu **pprev = NULL;
        unsigned int new_hash;
        unsigned long flags;

        if (new_tbl->nest)
                goto out;

        err = -ENOENT;

        rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
                          old_tbl, old_hash) {
                err = 0;
                next = rht_dereference_bucket(entry->next, old_tbl, old_hash);

                if (rht_is_a_nulls(next))
                        break;

                pprev = &entry->next;
        }

        if (err)
                goto out;

        new_hash = head_hashfn(ht, new_tbl, entry);

        flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash],
                                SINGLE_DEPTH_NESTING);

        head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);

        RCU_INIT_POINTER(entry->next, head);

        rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags);

        if (pprev)
                rcu_assign_pointer(*pprev, next);
        else
                /* Need to preserved the bit lock. */
                rht_assign_locked(bkt, next);

out:
        return err;
}

static int rhashtable_rehash_chain(struct rhashtable *ht,
                                    unsigned int old_hash)
{
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
        struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
        unsigned long flags;
        int err;

        if (!bkt)
                return 0;
        flags = rht_lock(old_tbl, bkt);

        while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
                ;

        if (err == -ENOENT)
                err = 0;
        rht_unlock(old_tbl, bkt, flags);

        return err;
}

static int rhashtable_rehash_attach(struct rhashtable *ht,
                                    struct bucket_table *old_tbl,
                                    struct bucket_table *new_tbl)
{
        /* Make insertions go into the new, empty table right away. Deletions
         * and lookups will be attempted in both tables until we synchronize.
         * As cmpxchg() provides strong barriers, we do not need
         * rcu_assign_pointer().
         */

        if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
                    new_tbl) != NULL)
                return -EEXIST;

        return 0;
}

static int rhashtable_rehash_table(struct rhashtable *ht)
{
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
        struct bucket_table *new_tbl;
        struct rhashtable_walker *walker;
        unsigned int old_hash;
        int err;

        new_tbl = rht_dereference(old_tbl->future_tbl, ht);
        if (!new_tbl)
                return 0;

        for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
                err = rhashtable_rehash_chain(ht, old_hash);
                if (err)
                        return err;
                cond_resched();
        }

        /* Publish the new table pointer. */
        rcu_assign_pointer(ht->tbl, new_tbl);

        spin_lock(&ht->lock);
        list_for_each_entry(walker, &old_tbl->walkers, list)
                walker->tbl = NULL;

        /* Wait for readers. All new readers will see the new
         * table, and thus no references to the old table will
         * remain.
         * We do this inside the locked region so that
         * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
         * to check if it should not re-link the table.
         */
        call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
        spin_unlock(&ht->lock);

        return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
}

static int rhashtable_rehash_alloc(struct rhashtable *ht,
                                   struct bucket_table *old_tbl,
                                   unsigned int size)
{
        struct bucket_table *new_tbl;
        int err;

        ASSERT_RHT_MUTEX(ht);

        new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
        if (new_tbl == NULL)
                return -ENOMEM;

        err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
        if (err)
                bucket_table_free(new_tbl);

        return err;
}

/**
 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
 * @ht:                the hash table to shrink
 *
 * This function shrinks the hash table to fit, i.e., the smallest
 * size would not cause it to expand right away automatically.
 *
 * The caller must ensure that no concurrent resizing occurs by holding
 * ht->mutex.
 *
 * The caller must ensure that no concurrent table mutations take place.
 * It is however valid to have concurrent lookups if they are RCU protected.
 *
 * It is valid to have concurrent insertions and deletions protected by per
 * bucket locks or concurrent RCU protected lookups and traversals.
 */
static int rhashtable_shrink(struct rhashtable *ht)
{
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
        unsigned int nelems = atomic_read(&ht->nelems);
        unsigned int size = 0;

        if (nelems)
                size = roundup_pow_of_two(nelems * 3 / 2);
        if (size < ht->p.min_size)
                size = ht->p.min_size;

        if (old_tbl->size <= size)
                return 0;

        if (rht_dereference(old_tbl->future_tbl, ht))
                return -EEXIST;

        return rhashtable_rehash_alloc(ht, old_tbl, size);
}

static void rht_deferred_worker(struct work_struct *work)
{
        struct rhashtable *ht;
        struct bucket_table *tbl;
        int err = 0;

        ht = container_of(work, struct rhashtable, run_work);
        mutex_lock(&ht->mutex);

        tbl = rht_dereference(ht->tbl, ht);
        tbl = rhashtable_last_table(ht, tbl);

        if (rht_grow_above_75(ht, tbl))
                err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
        else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
                err = rhashtable_shrink(ht);
        else if (tbl->nest)
                err = rhashtable_rehash_alloc(ht, tbl, tbl->size);

        if (!err || err == -EEXIST) {
                int nerr;

                nerr = rhashtable_rehash_table(ht);
                err = err ?: nerr;
        }

        mutex_unlock(&ht->mutex);

        if (err)
                schedule_work(&ht->run_work);
}

static int rhashtable_insert_rehash(struct rhashtable *ht,
                                    struct bucket_table *tbl)
{
        struct bucket_table *old_tbl;
        struct bucket_table *new_tbl;
        unsigned int size;
        int err;

        old_tbl = rht_dereference_rcu(ht->tbl, ht);

        size = tbl->size;

        err = -EBUSY;

        if (rht_grow_above_75(ht, tbl))
                size *= 2;
        /* Do not schedule more than one rehash */
        else if (old_tbl != tbl)
                goto fail;

        err = -ENOMEM;

        new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
        if (new_tbl == NULL)
                goto fail;

        err = rhashtable_rehash_attach(ht, tbl, new_tbl);
        if (err) {
                bucket_table_free(new_tbl);
                if (err == -EEXIST)
                        err = 0;
        } else
                schedule_work(&ht->run_work);

        return err;

fail:
        /* Do not fail the insert if someone else did a rehash. */
        if (likely(rcu_access_pointer(tbl->future_tbl)))
                return 0;

        /* Schedule async rehash to retry allocation in process context. */
        if (err == -ENOMEM)
                schedule_work(&ht->run_work);

        return err;
}

static void *rhashtable_lookup_one(struct rhashtable *ht,
                                   struct rhash_lock_head __rcu **bkt,
                                   struct bucket_table *tbl, unsigned int hash,
                                   const void *key, struct rhash_head *obj)
{
        struct rhashtable_compare_arg arg = {
                .ht = ht,
                .key = key,
        };
        struct rhash_head __rcu **pprev = NULL;
        struct rhash_head *head;
        int elasticity;

        elasticity = RHT_ELASTICITY;
        rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
                struct rhlist_head *list;
                struct rhlist_head *plist;

                elasticity--;
                if (!key ||
                    (ht->p.obj_cmpfn ?
                     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
                     rhashtable_compare(&arg, rht_obj(ht, head)))) {
                        pprev = &head->next;
                        continue;
                }

                if (!ht->rhlist)
                        return rht_obj(ht, head);

                list = container_of(obj, struct rhlist_head, rhead);
                plist = container_of(head, struct rhlist_head, rhead);

                RCU_INIT_POINTER(list->next, plist);
                head = rht_dereference_bucket(head->next, tbl, hash);
                RCU_INIT_POINTER(list->rhead.next, head);
                if (pprev)
                        rcu_assign_pointer(*pprev, obj);
                else
                        /* Need to preserve the bit lock */
                        rht_assign_locked(bkt, obj);

                return NULL;
        }

        if (elasticity <= 0)
                return ERR_PTR(-EAGAIN);

        return ERR_PTR(-ENOENT);
}

static struct bucket_table *rhashtable_insert_one(
        struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
        struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
        void *data)
{
        struct bucket_table *new_tbl;
        struct rhash_head *head;

        if (!IS_ERR_OR_NULL(data))
                return ERR_PTR(-EEXIST);

        if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
                return ERR_CAST(data);

        new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        if (new_tbl)
                return new_tbl;

        if (PTR_ERR(data) != -ENOENT)
                return ERR_CAST(data);

        if (unlikely(rht_grow_above_max(ht, tbl)))
                return ERR_PTR(-E2BIG);

        if (unlikely(rht_grow_above_100(ht, tbl)))
                return ERR_PTR(-EAGAIN);

        head = rht_ptr(bkt, tbl, hash);

        RCU_INIT_POINTER(obj->next, head);
        if (ht->rhlist) {
                struct rhlist_head *list;

                list = container_of(obj, struct rhlist_head, rhead);
                RCU_INIT_POINTER(list->next, NULL);
        }

        /* bkt is always the head of the list, so it holds
         * the lock, which we need to preserve
         */
        rht_assign_locked(bkt, obj);

        atomic_inc(&ht->nelems);
        if (rht_grow_above_75(ht, tbl))
                schedule_work(&ht->run_work);

        return NULL;
}

static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
                                   struct rhash_head *obj)
{
        struct bucket_table *new_tbl;
        struct bucket_table *tbl;
        struct rhash_lock_head __rcu **bkt;
        unsigned long flags;
        unsigned int hash;
        void *data;

        new_tbl = rcu_dereference(ht->tbl);

        do {
                tbl = new_tbl;
                hash = rht_head_hashfn(ht, tbl, obj, ht->p);
                if (rcu_access_pointer(tbl->future_tbl))
                        /* Failure is OK */
                        bkt = rht_bucket_var(tbl, hash);
                else
                        bkt = rht_bucket_insert(ht, tbl, hash);
                if (bkt == NULL) {
                        new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
                        data = ERR_PTR(-EAGAIN);
                } else {
                        flags = rht_lock(tbl, bkt);
                        data = rhashtable_lookup_one(ht, bkt, tbl,
                                                     hash, key, obj);
                        new_tbl = rhashtable_insert_one(ht, bkt, tbl,
                                                        hash, obj, data);
                        if (PTR_ERR(new_tbl) != -EEXIST)
                                data = ERR_CAST(new_tbl);

                        rht_unlock(tbl, bkt, flags);
                }
        } while (!IS_ERR_OR_NULL(new_tbl));

        if (PTR_ERR(data) == -EAGAIN)
                data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
                               -EAGAIN);

        return data;
}

void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
                             struct rhash_head *obj)
{
        void *data;

        do {
                rcu_read_lock();
                data = rhashtable_try_insert(ht, key, obj);
                rcu_read_unlock();
        } while (PTR_ERR(data) == -EAGAIN);

        return data;
}
EXPORT_SYMBOL_GPL(rhashtable_insert_slow);

/**
 * rhashtable_walk_enter - Initialise an iterator
 * @ht:                Table to walk over
 * @iter:        Hash table Iterator
 *
 * This function prepares a hash table walk.
 *
 * Note that if you restart a walk after rhashtable_walk_stop you
 * may see the same object twice.  Also, you may miss objects if
 * there are removals in between rhashtable_walk_stop and the next
 * call to rhashtable_walk_start.
 *
 * For a completely stable walk you should construct your own data
 * structure outside the hash table.
 *
 * This function may be called from any process context, including
 * non-preemptable context, but cannot be called from softirq or
 * hardirq context.
 *
 * You must call rhashtable_walk_exit after this function returns.
 */
void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
{
        iter->ht = ht;
        iter->p = NULL;
        iter->slot = 0;
        iter->skip = 0;
        iter->end_of_table = 0;

        spin_lock(&ht->lock);
        iter->walker.tbl =
                rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
        list_add(&iter->walker.list, &iter->walker.tbl->walkers);
        spin_unlock(&ht->lock);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_enter);

/**
 * rhashtable_walk_exit - Free an iterator
 * @iter:        Hash table Iterator
 *
 * This function frees resources allocated by rhashtable_walk_enter.
 */
void rhashtable_walk_exit(struct rhashtable_iter *iter)
{
        spin_lock(&iter->ht->lock);
        if (iter->walker.tbl)
                list_del(&iter->walker.list);
        spin_unlock(&iter->ht->lock);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_exit);

/**
 * rhashtable_walk_start_check - Start a hash table walk
 * @iter:        Hash table iterator
 *
 * Start a hash table walk at the current iterator position.  Note that we take
 * the RCU lock in all cases including when we return an error.  So you must
 * always call rhashtable_walk_stop to clean up.
 *
 * Returns zero if successful.
 *
 * Returns -EAGAIN if resize event occurred.  Note that the iterator
 * will rewind back to the beginning and you may use it immediately
 * by calling rhashtable_walk_next.
 *
 * rhashtable_walk_start is defined as an inline variant that returns
 * void. This is preferred in cases where the caller would ignore
 * resize events and always continue.
 */
int rhashtable_walk_start_check(struct rhashtable_iter *iter)
        __acquires(RCU)
{
        struct rhashtable *ht = iter->ht;
        bool rhlist = ht->rhlist;

        rcu_read_lock();

        spin_lock(&ht->lock);
        if (iter->walker.tbl)
                list_del(&iter->walker.list);
        spin_unlock(&ht->lock);

        if (iter->end_of_table)
                return 0;
        if (!iter->walker.tbl) {
                iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
                iter->slot = 0;
                iter->skip = 0;
                return -EAGAIN;
        }

        if (iter->p && !rhlist) {
                /*
                 * We need to validate that 'p' is still in the table, and
                 * if so, update 'skip'
                 */
                struct rhash_head *p;
                int skip = 0;
                rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
                        skip++;
                        if (p == iter->p) {
                                iter->skip = skip;
                                goto found;
                        }
                }
                iter->p = NULL;
        } else if (iter->p && rhlist) {
                /* Need to validate that 'list' is still in the table, and
                 * if so, update 'skip' and 'p'.
                 */
                struct rhash_head *p;
                struct rhlist_head *list;
                int skip = 0;
                rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
                        for (list = container_of(p, struct rhlist_head, rhead);
                             list;
                             list = rcu_dereference(list->next)) {
                                skip++;
                                if (list == iter->list) {
                                        iter->p = p;
                                        iter->skip = skip;
                                        goto found;
                                }
                        }
                }
                iter->p = NULL;
        }
found:
        return 0;
}
EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);

/**
 * __rhashtable_walk_find_next - Find the next element in a table (or the first
 * one in case of a new walk).
 *
 * @iter:        Hash table iterator
 *
 * Returns the found object or NULL when the end of the table is reached.
 *
 * Returns -EAGAIN if resize event occurred.
 */
static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
{
        struct bucket_table *tbl = iter->walker.tbl;
        struct rhlist_head *list = iter->list;
        struct rhashtable *ht = iter->ht;
        struct rhash_head *p = iter->p;
        bool rhlist = ht->rhlist;

        if (!tbl)
                return NULL;

        for (; iter->slot < tbl->size; iter->slot++) {
                int skip = iter->skip;

                rht_for_each_rcu(p, tbl, iter->slot) {
                        if (rhlist) {
                                list = container_of(p, struct rhlist_head,
                                                    rhead);
                                do {
                                        if (!skip)
                                                goto next;
                                        skip--;
                                        list = rcu_dereference(list->next);
                                } while (list);

                                continue;
                        }
                        if (!skip)
                                break;
                        skip--;
                }

next:
                if (!rht_is_a_nulls(p)) {
                        iter->skip++;
                        iter->p = p;
                        iter->list = list;
                        return rht_obj(ht, rhlist ? &list->rhead : p);
                }

                iter->skip = 0;
        }

        iter->p = NULL;

        /* Ensure we see any new tables. */
        smp_rmb();

        iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        if (iter->walker.tbl) {
                iter->slot = 0;
                iter->skip = 0;
                return ERR_PTR(-EAGAIN);
        } else {
                iter->end_of_table = true;
        }

        return NULL;
}

/**
 * rhashtable_walk_next - Return the next object and advance the iterator
 * @iter:        Hash table iterator
 *
 * Note that you must call rhashtable_walk_stop when you are finished
 * with the walk.
 *
 * Returns the next object or NULL when the end of the table is reached.
 *
 * Returns -EAGAIN if resize event occurred.  Note that the iterator
 * will rewind back to the beginning and you may continue to use it.
 */
void *rhashtable_walk_next(struct rhashtable_iter *iter)
{
        struct rhlist_head *list = iter->list;
        struct rhashtable *ht = iter->ht;
        struct rhash_head *p = iter->p;
        bool rhlist = ht->rhlist;

        if (p) {
                if (!rhlist || !(list = rcu_dereference(list->next))) {
                        p = rcu_dereference(p->next);
                        list = container_of(p, struct rhlist_head, rhead);
                }
                if (!rht_is_a_nulls(p)) {
                        iter->skip++;
                        iter->p = p;
                        iter->list = list;
                        return rht_obj(ht, rhlist ? &list->rhead : p);
                }

                /* At the end of this slot, switch to next one and then find
                 * next entry from that point.
                 */
                iter->skip = 0;
                iter->slot++;
        }

        return __rhashtable_walk_find_next(iter);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_next);

/**
 * rhashtable_walk_peek - Return the next object but don't advance the iterator
 * @iter:        Hash table iterator
 *
 * Returns the next object or NULL when the end of the table is reached.
 *
 * Returns -EAGAIN if resize event occurred.  Note that the iterator
 * will rewind back to the beginning and you may continue to use it.
 */
void *rhashtable_walk_peek(struct rhashtable_iter *iter)
{
        struct rhlist_head *list = iter->list;
        struct rhashtable *ht = iter->ht;
        struct rhash_head *p = iter->p;

        if (p)
                return rht_obj(ht, ht->rhlist ? &list->rhead : p);

        /* No object found in current iter, find next one in the table. */

        if (iter->skip) {
                /* A nonzero skip value points to the next entry in the table
                 * beyond that last one that was found. Decrement skip so
                 * we find the current value. __rhashtable_walk_find_next
                 * will restore the original value of skip assuming that
                 * the table hasn't changed.
                 */
                iter->skip--;
        }

        return __rhashtable_walk_find_next(iter);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_peek);

/**
 * rhashtable_walk_stop - Finish a hash table walk
 * @iter:        Hash table iterator
 *
 * Finish a hash table walk.  Does not reset the iterator to the start of the
 * hash table.
 */
void rhashtable_walk_stop(struct rhashtable_iter *iter)
        __releases(RCU)
{
        struct rhashtable *ht;
        struct bucket_table *tbl = iter->walker.tbl;

        if (!tbl)
                goto out;

        ht = iter->ht;

        spin_lock(&ht->lock);
        if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
                /* This bucket table is being freed, don't re-link it. */
                iter->walker.tbl = NULL;
        else
                list_add(&iter->walker.list, &tbl->walkers);
        spin_unlock(&ht->lock);

out:
        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rhashtable_walk_stop);

static size_t rounded_hashtable_size(const struct rhashtable_params *params)
{
        size_t retsize;

        if (params->nelem_hint)
                retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
                              (unsigned long)params->min_size);
        else
                retsize = max(HASH_DEFAULT_SIZE,
                              (unsigned long)params->min_size);

        return retsize;
}

static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
{
        return jhash2(key, length, seed);
}

/**
 * rhashtable_init - initialize a new hash table
 * @ht:                hash table to be initialized
 * @params:        configuration parameters
 *
 * Initializes a new hash table based on the provided configuration
 * parameters. A table can be configured either with a variable or
 * fixed length key:
 *
 * Configuration Example 1: Fixed length keys
 * struct test_obj {
 *        int                        key;
 *        void *                        my_member;
 *        struct rhash_head        node;
 * };
 *
 * struct rhashtable_params params = {
 *        .head_offset = offsetof(struct test_obj, node),
 *        .key_offset = offsetof(struct test_obj, key),
 *        .key_len = sizeof(int),
 *        .hashfn = jhash,
 * };
 *
 * Configuration Example 2: Variable length keys
 * struct test_obj {
 *        [...]
 *        struct rhash_head        node;
 * };
 *
 * u32 my_hash_fn(const void *data, u32 len, u32 seed)
 * {
 *        struct test_obj *obj = data;
 *
 *        return [... hash ...];
 * }
 *
 * struct rhashtable_params params = {
 *        .head_offset = offsetof(struct test_obj, node),
 *        .hashfn = jhash,
 *        .obj_hashfn = my_hash_fn,
 * };
 */
int rhashtable_init(struct rhashtable *ht,
                    const struct rhashtable_params *params)
{
        struct bucket_table *tbl;
        size_t size;

        if ((!params->key_len && !params->obj_hashfn) ||
            (params->obj_hashfn && !params->obj_cmpfn))
                return -EINVAL;

        memset(ht, 0, sizeof(*ht));
        mutex_init(&ht->mutex);
        spin_lock_init(&ht->lock);
        memcpy(&ht->p, params, sizeof(*params));

        if (params->min_size)
                ht->p.min_size = roundup_pow_of_two(params->min_size);

        /* Cap total entries at 2^31 to avoid nelems overflow. */
        ht->max_elems = 1u << 31;

        if (params->max_size) {
                ht->p.max_size = rounddown_pow_of_two(params->max_size);
                if (ht->p.max_size < ht->max_elems / 2)
                        ht->max_elems = ht->p.max_size * 2;
        }

        ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);

        size = rounded_hashtable_size(&ht->p);

        ht->key_len = ht->p.key_len;
        if (!params->hashfn) {
                ht->p.hashfn = jhash;

                if (!(ht->key_len & (sizeof(u32) - 1))) {
                        ht->key_len /= sizeof(u32);
                        ht->p.hashfn = rhashtable_jhash2;
                }
        }

        /*
         * This is api initialization and thus we need to guarantee the
         * initial rhashtable allocation. Upon failure, retry with the
         * smallest possible size with __GFP_NOFAIL semantics.
         */
        tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
        if (unlikely(tbl == NULL)) {
                size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
                tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
        }

        atomic_set(&ht->nelems, 0);

        RCU_INIT_POINTER(ht->tbl, tbl);

        INIT_WORK(&ht->run_work, rht_deferred_worker);

        return 0;
}
EXPORT_SYMBOL_GPL(rhashtable_init);

/**
 * rhltable_init - initialize a new hash list table
 * @hlt:        hash list table to be initialized
 * @params:        configuration parameters
 *
 * Initializes a new hash list table.
 *
 * See documentation for rhashtable_init.
 */
int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
{
        int err;

        err = rhashtable_init(&hlt->ht, params);
        hlt->ht.rhlist = true;
        return err;
}
EXPORT_SYMBOL_GPL(rhltable_init);

static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
                                void (*free_fn)(void *ptr, void *arg),
                                void *arg)
{
        struct rhlist_head *list;

        if (!ht->rhlist) {
                free_fn(rht_obj(ht, obj), arg);
                return;
        }

        list = container_of(obj, struct rhlist_head, rhead);
        do {
                obj = &list->rhead;
                list = rht_dereference(list->next, ht);
                free_fn(rht_obj(ht, obj), arg);
        } while (list);
}

/**
 * rhashtable_free_and_destroy - free elements and destroy hash table
 * @ht:                the hash table to destroy
 * @free_fn:        callback to release resources of element
 * @arg:        pointer passed to free_fn
 *
 * Stops an eventual async resize. If defined, invokes free_fn for each
 * element to releasal resources. Please note that RCU protected
 * readers may still be accessing the elements. Releasing of resources
 * must occur in a compatible manner. Then frees the bucket array.
 *
 * This function will eventually sleep to wait for an async resize
 * to complete. The caller is responsible that no further write operations
 * occurs in parallel.
 */
void rhashtable_free_and_destroy(struct rhashtable *ht,
                                 void (*free_fn)(void *ptr, void *arg),
                                 void *arg)
{
        struct bucket_table *tbl, *next_tbl;
        unsigned int i;

        cancel_work_sync(&ht->run_work);

        mutex_lock(&ht->mutex);
        tbl = rht_dereference(ht->tbl, ht);
restart:
        if (free_fn) {
                for (i = 0; i < tbl->size; i++) {
                        struct rhash_head *pos, *next;

                        cond_resched();
                        for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
                             next = !rht_is_a_nulls(pos) ?
                                        rht_dereference(pos->next, ht) : NULL;
                             !rht_is_a_nulls(pos);
                             pos = next,
                             next = !rht_is_a_nulls(pos) ?
                                        rht_dereference(pos->next, ht) : NULL)
                                rhashtable_free_one(ht, pos, free_fn, arg);
                }
        }

        next_tbl = rht_dereference(tbl->future_tbl, ht);
        bucket_table_free(tbl);
        if (next_tbl) {
                tbl = next_tbl;
                goto restart;
        }
        mutex_unlock(&ht->mutex);
}
EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);

void rhashtable_destroy(struct rhashtable *ht)
{
        return rhashtable_free_and_destroy(ht, NULL, NULL);
}
EXPORT_SYMBOL_GPL(rhashtable_destroy);

struct rhash_lock_head __rcu **__rht_bucket_nested(
        const struct bucket_table *tbl, unsigned int hash)
{
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        unsigned int index = hash & ((1 << tbl->nest) - 1);
        unsigned int size = tbl->size >> tbl->nest;
        unsigned int subhash = hash;
        union nested_table *ntbl;

        ntbl = nested_table_top(tbl);
        ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
        subhash >>= tbl->nest;

        while (ntbl && size > (1 << shift)) {
                index = subhash & ((1 << shift) - 1);
                ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
                                                  tbl, hash);
                size >>= shift;
                subhash >>= shift;
        }

        if (!ntbl)
                return NULL;

        return &ntbl[subhash].bucket;

}
EXPORT_SYMBOL_GPL(__rht_bucket_nested);

struct rhash_lock_head __rcu **rht_bucket_nested(
        const struct bucket_table *tbl, unsigned int hash)
{
        static struct rhash_lock_head __rcu *rhnull;

        if (!rhnull)
                INIT_RHT_NULLS_HEAD(rhnull);
        return __rht_bucket_nested(tbl, hash) ?: &rhnull;
}
EXPORT_SYMBOL_GPL(rht_bucket_nested);

struct rhash_lock_head __rcu **rht_bucket_nested_insert(
        struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
        unsigned int index = hash & ((1 << tbl->nest) - 1);
        unsigned int size = tbl->size >> tbl->nest;
        union nested_table *ntbl;

        ntbl = nested_table_top(tbl);
        hash >>= tbl->nest;
        ntbl = nested_table_alloc(ht, &ntbl[index].table,
                                  size <= (1 << shift));

        while (ntbl && size > (1 << shift)) {
                index = hash & ((1 << shift) - 1);
                size >>= shift;
                hash >>= shift;
                ntbl = nested_table_alloc(ht, &ntbl[index].table,
                                          size <= (1 << shift));
        }

        if (!ntbl)
                return NULL;

        return &ntbl[hash].bucket;

}
EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);














  197 
  198 





  197 




  197 
   16 
  197 





   54 

   55 
   55 

   56 
   54 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
#include <linux/uaccess.h>
#include <linux/nospec.h>

/* out-of-line parts */

#ifndef INLINE_COPY_FROM_USER
unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
{
        unsigned long res = n;
        might_fault();
        if (!should_fail_usercopy() && likely(access_ok(from, n))) {
                /*
                 * Ensure that bad access_ok() speculation will not
                 * lead to nasty side effects *after* the copy is
                 * finished:
                 */
                barrier_nospec();
                instrument_copy_from_user_before(to, from, n);
                res = raw_copy_from_user(to, from, n);
                instrument_copy_from_user_after(to, from, n, res);
        }
        if (unlikely(res))
                memset(to + (n - res), 0, res);
        return res;
}
EXPORT_SYMBOL(_copy_from_user);
#endif

#ifndef INLINE_COPY_TO_USER
unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
{
        might_fault();
        if (should_fail_usercopy())
                return n;
        if (likely(access_ok(to, n))) {
                instrument_copy_to_user(to, from, n);
                n = raw_copy_to_user(to, from, n);
        }
        return n;
}
EXPORT_SYMBOL(_copy_to_user);
#endif

/**
 * check_zeroed_user: check if a userspace buffer only contains zero bytes
 * @from: Source address, in userspace.
 * @size: Size of buffer.
 *
 * This is effectively shorthand for "memchr_inv(from, 0, size) == NULL" for
 * userspace addresses (and is more efficient because we don't care where the
 * first non-zero byte is).
 *
 * Returns:
 *  * 0: There were non-zero bytes present in the buffer.
 *  * 1: The buffer was full of zero bytes.
 *  * -EFAULT: access to userspace failed.
 */
int check_zeroed_user(const void __user *from, size_t size)
{
        unsigned long val;
        uintptr_t align = (uintptr_t) from % sizeof(unsigned long);

        if (unlikely(size == 0))
                return 1;

        from -= align;
        size += align;

        if (!user_read_access_begin(from, size))
                return -EFAULT;

        unsafe_get_user(val, (unsigned long __user *) from, err_fault);
        if (align)
                val &= ~aligned_byte_mask(align);

        while (size > sizeof(unsigned long)) {
                if (unlikely(val))
                        goto done;

                from += sizeof(unsigned long);
                size -= sizeof(unsigned long);

                unsafe_get_user(val, (unsigned long __user *) from, err_fault);
        }

        if (size < sizeof(unsigned long))
                val &= aligned_byte_mask(size);

done:
        user_read_access_end();
        return (val == 0);
err_fault:
        user_read_access_end();
        return -EFAULT;
}
EXPORT_SYMBOL(check_zeroed_user);

































    4 


















  254 





















































































  251 






















































  233 




























  268 




































  192 














































  192 










  192 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM timer

#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_TIMER_H

#include <linux/tracepoint.h>
#include <linux/hrtimer.h>
#include <linux/timer.h>

DECLARE_EVENT_CLASS(timer_class,

        TP_PROTO(struct timer_list *timer),

        TP_ARGS(timer),

        TP_STRUCT__entry(
                __field( void *,        timer        )
        ),

        TP_fast_assign(
                __entry->timer        = timer;
        ),

        TP_printk("timer=%p", __entry->timer)
);

/**
 * timer_init - called when the timer is initialized
 * @timer:        pointer to struct timer_list
 */
DEFINE_EVENT(timer_class, timer_init,

        TP_PROTO(struct timer_list *timer),

        TP_ARGS(timer)
);

#define decode_timer_flags(flags)                        \
        __print_flags(flags, "|",                        \
                {  TIMER_MIGRATING,        "M" },                \
                {  TIMER_DEFERRABLE,        "D" },                \
                {  TIMER_PINNED,        "P" },                \
                {  TIMER_IRQSAFE,        "I" })

/**
 * timer_start - called when the timer is started
 * @timer:                pointer to struct timer_list
 * @bucket_expiry:        the bucket expiry time
 */
TRACE_EVENT(timer_start,

        TP_PROTO(struct timer_list *timer,
                unsigned long bucket_expiry),

        TP_ARGS(timer, bucket_expiry),

        TP_STRUCT__entry(
                __field( void *,        timer                )
                __field( void *,        function        )
                __field( unsigned long,        expires                )
                __field( unsigned long,        bucket_expiry        )
                __field( unsigned long,        now                )
                __field( unsigned int,        flags                )
        ),

        TP_fast_assign(
                __entry->timer                = timer;
                __entry->function        = timer->function;
                __entry->expires        = timer->expires;
                __entry->bucket_expiry        = bucket_expiry;
                __entry->now                = jiffies;
                __entry->flags                = timer->flags;
        ),

        TP_printk("timer=%p function=%ps expires=%lu [timeout=%ld] bucket_expiry=%lu cpu=%u idx=%u flags=%s",
                  __entry->timer, __entry->function, __entry->expires,
                  (long)__entry->expires - __entry->now,
                  __entry->bucket_expiry, __entry->flags & TIMER_CPUMASK,
                  __entry->flags >> TIMER_ARRAYSHIFT,
                  decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK))
);

/**
 * timer_expire_entry - called immediately before the timer callback
 * @timer:        pointer to struct timer_list
 * @baseclk:        value of timer_base::clk when timer expires
 *
 * Allows to determine the timer latency.
 */
TRACE_EVENT(timer_expire_entry,

        TP_PROTO(struct timer_list *timer, unsigned long baseclk),

        TP_ARGS(timer, baseclk),

        TP_STRUCT__entry(
                __field( void *,        timer        )
                __field( unsigned long,        now        )
                __field( void *,        function)
                __field( unsigned long,        baseclk        )
        ),

        TP_fast_assign(
                __entry->timer                = timer;
                __entry->now                = jiffies;
                __entry->function        = timer->function;
                __entry->baseclk        = baseclk;
        ),

        TP_printk("timer=%p function=%ps now=%lu baseclk=%lu",
                  __entry->timer, __entry->function, __entry->now,
                  __entry->baseclk)
);

/**
 * timer_expire_exit - called immediately after the timer callback returns
 * @timer:        pointer to struct timer_list
 *
 * When used in combination with the timer_expire_entry tracepoint we can
 * determine the runtime of the timer callback function.
 *
 * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might
 * be invalid. We solely track the pointer.
 */
DEFINE_EVENT(timer_class, timer_expire_exit,

        TP_PROTO(struct timer_list *timer),

        TP_ARGS(timer)
);

/**
 * timer_cancel - called when the timer is canceled
 * @timer:        pointer to struct timer_list
 */
DEFINE_EVENT(timer_class, timer_cancel,

        TP_PROTO(struct timer_list *timer),

        TP_ARGS(timer)
);

TRACE_EVENT(timer_base_idle,

        TP_PROTO(bool is_idle, unsigned int cpu),

        TP_ARGS(is_idle, cpu),

        TP_STRUCT__entry(
                __field( bool,                is_idle        )
                __field( unsigned int,        cpu        )
        ),

        TP_fast_assign(
                __entry->is_idle        = is_idle;
                __entry->cpu                = cpu;
        ),

        TP_printk("is_idle=%d cpu=%d",
                  __entry->is_idle, __entry->cpu)
);

#define decode_clockid(type)                                                \
        __print_symbolic(type,                                                \
                { CLOCK_REALTIME,        "CLOCK_REALTIME"        },        \
                { CLOCK_MONOTONIC,        "CLOCK_MONOTONIC"        },        \
                { CLOCK_BOOTTIME,        "CLOCK_BOOTTIME"        },        \
                { CLOCK_TAI,                "CLOCK_TAI"                })

#define decode_hrtimer_mode(mode)                                        \
        __print_symbolic(mode,                                                \
                { HRTIMER_MODE_ABS,                "ABS"                },        \
                { HRTIMER_MODE_REL,                "REL"                },        \
                { HRTIMER_MODE_ABS_PINNED,        "ABS|PINNED"        },        \
                { HRTIMER_MODE_REL_PINNED,        "REL|PINNED"        },        \
                { HRTIMER_MODE_ABS_SOFT,        "ABS|SOFT"        },        \
                { HRTIMER_MODE_REL_SOFT,        "REL|SOFT"        },        \
                { HRTIMER_MODE_ABS_PINNED_SOFT,        "ABS|PINNED|SOFT" },        \
                { HRTIMER_MODE_REL_PINNED_SOFT,        "REL|PINNED|SOFT" },        \
                { HRTIMER_MODE_ABS_HARD,        "ABS|HARD" },                \
                { HRTIMER_MODE_REL_HARD,        "REL|HARD" },                \
                { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" },        \
                { HRTIMER_MODE_REL_PINNED_HARD,        "REL|PINNED|HARD" })

/**
 * hrtimer_init - called when the hrtimer is initialized
 * @hrtimer:        pointer to struct hrtimer
 * @clockid:        the hrtimers clock
 * @mode:        the hrtimers mode
 */
TRACE_EVENT(hrtimer_init,

        TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
                 enum hrtimer_mode mode),

        TP_ARGS(hrtimer, clockid, mode),

        TP_STRUCT__entry(
                __field( void *,                hrtimer                )
                __field( clockid_t,                clockid                )
                __field( enum hrtimer_mode,        mode                )
        ),

        TP_fast_assign(
                __entry->hrtimer        = hrtimer;
                __entry->clockid        = clockid;
                __entry->mode                = mode;
        ),

        TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
                  decode_clockid(__entry->clockid),
                  decode_hrtimer_mode(__entry->mode))
);

/**
 * hrtimer_start - called when the hrtimer is started
 * @hrtimer:        pointer to struct hrtimer
 * @mode:        the hrtimers mode
 */
TRACE_EVENT(hrtimer_start,

        TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode),

        TP_ARGS(hrtimer, mode),

        TP_STRUCT__entry(
                __field( void *,        hrtimer                )
                __field( void *,        function        )
                __field( s64,                expires                )
                __field( s64,                softexpires        )
                __field( enum hrtimer_mode,        mode        )
        ),

        TP_fast_assign(
                __entry->hrtimer        = hrtimer;
                __entry->function        = hrtimer->function;
                __entry->expires        = hrtimer_get_expires(hrtimer);
                __entry->softexpires        = hrtimer_get_softexpires(hrtimer);
                __entry->mode                = mode;
        ),

        TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu "
                  "mode=%s", __entry->hrtimer, __entry->function,
                  (unsigned long long) __entry->expires,
                  (unsigned long long) __entry->softexpires,
                  decode_hrtimer_mode(__entry->mode))
);

/**
 * hrtimer_expire_entry - called immediately before the hrtimer callback
 * @hrtimer:        pointer to struct hrtimer
 * @now:        pointer to variable which contains current time of the
 *                timers base.
 *
 * Allows to determine the timer latency.
 */
TRACE_EVENT(hrtimer_expire_entry,

        TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),

        TP_ARGS(hrtimer, now),

        TP_STRUCT__entry(
                __field( void *,        hrtimer        )
                __field( s64,                now        )
                __field( void *,        function)
        ),

        TP_fast_assign(
                __entry->hrtimer        = hrtimer;
                __entry->now                = *now;
                __entry->function        = hrtimer->function;
        ),

        TP_printk("hrtimer=%p function=%ps now=%llu",
                  __entry->hrtimer, __entry->function,
                  (unsigned long long) __entry->now)
);

DECLARE_EVENT_CLASS(hrtimer_class,

        TP_PROTO(struct hrtimer *hrtimer),

        TP_ARGS(hrtimer),

        TP_STRUCT__entry(
                __field( void *,        hrtimer        )
        ),

        TP_fast_assign(
                __entry->hrtimer        = hrtimer;
        ),

        TP_printk("hrtimer=%p", __entry->hrtimer)
);

/**
 * hrtimer_expire_exit - called immediately after the hrtimer callback returns
 * @hrtimer:        pointer to struct hrtimer
 *
 * When used in combination with the hrtimer_expire_entry tracepoint we can
 * determine the runtime of the callback function.
 */
DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,

        TP_PROTO(struct hrtimer *hrtimer),

        TP_ARGS(hrtimer)
);

/**
 * hrtimer_cancel - called when the hrtimer is canceled
 * @hrtimer:        pointer to struct hrtimer
 */
DEFINE_EVENT(hrtimer_class, hrtimer_cancel,

        TP_PROTO(struct hrtimer *hrtimer),

        TP_ARGS(hrtimer)
);

/**
 * itimer_state - called when itimer is started or canceled
 * @which:        name of the interval timer
 * @value:        the itimers value, itimer is canceled if value->it_value is
 *                zero, otherwise it is started
 * @expires:        the itimers expiry time
 */
TRACE_EVENT(itimer_state,

        TP_PROTO(int which, const struct itimerspec64 *const value,
                 unsigned long long expires),

        TP_ARGS(which, value, expires),

        TP_STRUCT__entry(
                __field(        int,                        which                )
                __field(        unsigned long long,        expires                )
                __field(        long,                        value_sec        )
                __field(        long,                        value_nsec        )
                __field(        long,                        interval_sec        )
                __field(        long,                        interval_nsec        )
        ),

        TP_fast_assign(
                __entry->which                = which;
                __entry->expires        = expires;
                __entry->value_sec        = value->it_value.tv_sec;
                __entry->value_nsec        = value->it_value.tv_nsec;
                __entry->interval_sec        = value->it_interval.tv_sec;
                __entry->interval_nsec        = value->it_interval.tv_nsec;
        ),

        TP_printk("which=%d expires=%llu it_value=%ld.%06ld it_interval=%ld.%06ld",
                  __entry->which, __entry->expires,
                  __entry->value_sec, __entry->value_nsec / NSEC_PER_USEC,
                  __entry->interval_sec, __entry->interval_nsec / NSEC_PER_USEC)
);

/**
 * itimer_expire - called when itimer expires
 * @which:        type of the interval timer
 * @pid:        pid of the process which owns the timer
 * @now:        current time, used to calculate the latency of itimer
 */
TRACE_EVENT(itimer_expire,

        TP_PROTO(int which, struct pid *pid, unsigned long long now),

        TP_ARGS(which, pid, now),

        TP_STRUCT__entry(
                __field( int ,                        which        )
                __field( pid_t,                        pid        )
                __field( unsigned long long,        now        )
        ),

        TP_fast_assign(
                __entry->which        = which;
                __entry->now        = now;
                __entry->pid        = pid_nr(pid);
        ),

        TP_printk("which=%d pid=%d now=%llu", __entry->which,
                  (int) __entry->pid, __entry->now)
);

#ifdef CONFIG_NO_HZ_COMMON

#define TICK_DEP_NAMES                                        \
                tick_dep_mask_name(NONE)                \
                tick_dep_name(POSIX_TIMER)                \
                tick_dep_name(PERF_EVENTS)                \
                tick_dep_name(SCHED)                        \
                tick_dep_name(CLOCK_UNSTABLE)                \
                tick_dep_name(RCU)                        \
                tick_dep_name_end(RCU_EXP)

#undef tick_dep_name
#undef tick_dep_mask_name
#undef tick_dep_name_end

/* The MASK will convert to their bits and they need to be processed too */
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
        TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
        TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
/* NONE only has a mask defined for it */
#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);

TICK_DEP_NAMES

#undef tick_dep_name
#undef tick_dep_mask_name
#undef tick_dep_name_end

#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }

#define show_tick_dep_name(val)                                \
        __print_symbolic(val, TICK_DEP_NAMES)

TRACE_EVENT(tick_stop,

        TP_PROTO(int success, int dependency),

        TP_ARGS(success, dependency),

        TP_STRUCT__entry(
                __field( int ,                success        )
                __field( int ,                dependency )
        ),

        TP_fast_assign(
                __entry->success        = success;
                __entry->dependency        = dependency;
        ),

        TP_printk("success=%d dependency=%s",  __entry->success, \
                        show_tick_dep_name(__entry->dependency))
);
#endif

#endif /*  _TRACE_TIMER_H */

/* This part must be outside protection */
#include <trace/define_trace.h>










































   73 














   73 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * This file provides wrappers with sanitizer instrumentation for bit
 * locking operations.
 *
 * To use this functionality, an arch's bitops.h file needs to define each of
 * the below bit operations with an arch_ prefix (e.g. arch_set_bit(),
 * arch___set_bit(), etc.).
 */
#ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H
#define _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H

#include <linux/instrumented.h>

/**
 * clear_bit_unlock - Clear a bit in memory, for unlock
 * @nr: the bit to set
 * @addr: the address to start counting from
 *
 * This operation is atomic and provides release barrier semantics.
 */
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
        kcsan_release();
        instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
        arch_clear_bit_unlock(nr, addr);
}

/**
 * __clear_bit_unlock - Clears a bit in memory
 * @nr: Bit to clear
 * @addr: Address to start counting from
 *
 * This is a non-atomic operation but implies a release barrier before the
 * memory operation. It can be used for an unlock if no other CPUs can
 * concurrently modify other bits in the word.
 */
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
        kcsan_release();
        instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___clear_bit_unlock(nr, addr);
}

/**
 * test_and_set_bit_lock - Set a bit and return its old value, for lock
 * @nr: Bit to set
 * @addr: Address to count from
 *
 * This operation is atomic and provides acquire barrier semantics if
 * the returned value is 0.
 * It can be used to implement bit locks.
 */
static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
        instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_and_set_bit_lock(nr, addr);
}

/**
 * xor_unlock_is_negative_byte - XOR a single byte in memory and test if
 * it is negative, for unlock.
 * @mask: Change the bits which are set in this mask.
 * @addr: The address of the word containing the byte to change.
 *
 * Changes some of bits 0-6 in the word pointed to by @addr.
 * This operation is atomic and provides release barrier semantics.
 * Used to optimise some folio operations which are commonly paired
 * with an unlock or end of writeback.  Bit 7 is used as PG_waiters to
 * indicate whether anybody is waiting for the unlock.
 *
 * Return: Whether the top bit of the byte is set.
 */
static inline bool xor_unlock_is_negative_byte(unsigned long mask,
                volatile unsigned long *addr)
{
        kcsan_release();
        instrument_atomic_write(addr, sizeof(long));
        return arch_xor_unlock_is_negative_byte(mask, addr);
}
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */









































































































































































































































































































































































































































































































































  236 

  242 























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_UACCESS_H
#define _ASM_X86_UACCESS_H
/*
 * User space memory access functions
 */
#include <linux/compiler.h>
#include <linux/instrumented.h>
#include <linux/kasan-checks.h>
#include <linux/mm_types.h>
#include <linux/string.h>
#include <linux/mmap_lock.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/extable.h>
#include <asm/tlbflush.h>

#ifdef CONFIG_X86_32
# include <asm/uaccess_32.h>
#else
# include <asm/uaccess_64.h>
#endif

#include <asm-generic/access_ok.h>

extern int __get_user_1(void);
extern int __get_user_2(void);
extern int __get_user_4(void);
extern int __get_user_8(void);
extern int __get_user_nocheck_1(void);
extern int __get_user_nocheck_2(void);
extern int __get_user_nocheck_4(void);
extern int __get_user_nocheck_8(void);
extern int __get_user_bad(void);

#define __uaccess_begin() stac()
#define __uaccess_end()   clac()
#define __uaccess_begin_nospec()        \
({                                        \
        stac();                                \
        barrier_nospec();                \
})

/*
 * This is the smallest unsigned integer type that can fit a value
 * (up to 'long long')
 */
#define __inttype(x) __typeof__(                \
        __typefits(x,char,                        \
          __typefits(x,short,                        \
            __typefits(x,int,                        \
              __typefits(x,long,0ULL)))))

#define __typefits(x,type,not) \
        __builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not)

/*
 * This is used for both get_user() and __get_user() to expand to
 * the proper special function call that has odd calling conventions
 * due to returning both a value and an error, and that depends on
 * the size of the pointer passed in.
 *
 * Careful: we have to cast the result to the type of the pointer
 * for sign reasons.
 *
 * The use of _ASM_DX as the register specifier is a bit of a
 * simplification, as gcc only cares about it as the starting point
 * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
 * (%ecx being the next register in gcc's x86 register sequence), and
 * %rdx on 64 bits.
 *
 * Clang/LLVM cares about the size of the register, but still wants
 * the base register for something that ends up being a pair.
 */
#define do_get_user_call(fn,x,ptr)                                        \
({                                                                        \
        int __ret_gu;                                                        \
        register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);                \
        __chk_user_ptr(ptr);                                                \
        asm volatile("call __" #fn "_%P4"                                \
                     : "=a" (__ret_gu), "=r" (__val_gu),                \
                        ASM_CALL_CONSTRAINT                                \
                     : "0" (ptr), "i" (sizeof(*(ptr))));                \
        instrument_get_user(__val_gu);                                        \
        (x) = (__force __typeof__(*(ptr))) __val_gu;                        \
        __builtin_expect(__ret_gu, 0);                                        \
})

/**
 * get_user - Get a simple variable from user space.
 * @x:   Variable to store result.
 * @ptr: Source address, in user space.
 *
 * Context: User context only. This function may sleep if pagefaults are
 *          enabled.
 *
 * This macro copies a single simple variable from user space to kernel
 * space.  It supports simple types like char and int, but not larger
 * data types like structures or arrays.
 *
 * @ptr must have pointer-to-simple-variable type, and the result of
 * dereferencing @ptr must be assignable to @x without a cast.
 *
 * Return: zero on success, or -EFAULT on error.
 * On error, the variable @x is set to zero.
 */
#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })

/**
 * __get_user - Get a simple variable from user space, with less checking.
 * @x:   Variable to store result.
 * @ptr: Source address, in user space.
 *
 * Context: User context only. This function may sleep if pagefaults are
 *          enabled.
 *
 * This macro copies a single simple variable from user space to kernel
 * space.  It supports simple types like char and int, but not larger
 * data types like structures or arrays.
 *
 * @ptr must have pointer-to-simple-variable type, and the result of
 * dereferencing @ptr must be assignable to @x without a cast.
 *
 * Caller must check the pointer with access_ok() before calling this
 * function.
 *
 * Return: zero on success, or -EFAULT on error.
 * On error, the variable @x is set to zero.
 */
#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr)


#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label)                        \
        asm goto("\n"                                        \
                     "1:        movl %%eax,0(%1)\n"                \
                     "2:        movl %%edx,4(%1)\n"                \
                     _ASM_EXTABLE_UA(1b, %l2)                        \
                     _ASM_EXTABLE_UA(2b, %l2)                        \
                     : : "A" (x), "r" (addr)                        \
                     : : label)

#else
#define __put_user_goto_u64(x, ptr, label) \
        __put_user_goto(x, ptr, "q", "er", label)
#endif

extern void __put_user_bad(void);

/*
 * Strange magic calling convention: pointer in %ecx,
 * value in %eax(:%edx), return value in %ecx. clobbers %rbx
 */
extern void __put_user_1(void);
extern void __put_user_2(void);
extern void __put_user_4(void);
extern void __put_user_8(void);
extern void __put_user_nocheck_1(void);
extern void __put_user_nocheck_2(void);
extern void __put_user_nocheck_4(void);
extern void __put_user_nocheck_8(void);

/*
 * ptr must be evaluated and assigned to the temporary __ptr_pu before
 * the assignment of x to __val_pu, to avoid any function calls
 * involved in the ptr expression (possibly implicitly generated due
 * to KASAN) from clobbering %ax.
 */
#define do_put_user_call(fn,x,ptr)                                        \
({                                                                        \
        int __ret_pu;                                                        \
        void __user *__ptr_pu;                                                \
        register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX);                \
        __typeof__(*(ptr)) __x = (x); /* eval x once */                        \
        __typeof__(ptr) __ptr = (ptr); /* eval ptr once */                \
        __chk_user_ptr(__ptr);                                                \
        __ptr_pu = __ptr;                                                \
        __val_pu = __x;                                                        \
        asm volatile("call __" #fn "_%P[size]"                                \
                     : "=c" (__ret_pu),                                        \
                        ASM_CALL_CONSTRAINT                                \
                     : "0" (__ptr_pu),                                        \
                       "r" (__val_pu),                                        \
                       [size] "i" (sizeof(*(ptr)))                        \
                     :"ebx");                                                \
        instrument_put_user(__x, __ptr, sizeof(*(ptr)));                \
        __builtin_expect(__ret_pu, 0);                                        \
})

/**
 * put_user - Write a simple value into user space.
 * @x:   Value to copy to user space.
 * @ptr: Destination address, in user space.
 *
 * Context: User context only. This function may sleep if pagefaults are
 *          enabled.
 *
 * This macro copies a single simple value from kernel space to user
 * space.  It supports simple types like char and int, but not larger
 * data types like structures or arrays.
 *
 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
 * to the result of dereferencing @ptr.
 *
 * Return: zero on success, or -EFAULT on error.
 */
#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })

/**
 * __put_user - Write a simple value into user space, with less checking.
 * @x:   Value to copy to user space.
 * @ptr: Destination address, in user space.
 *
 * Context: User context only. This function may sleep if pagefaults are
 *          enabled.
 *
 * This macro copies a single simple value from kernel space to user
 * space.  It supports simple types like char and int, but not larger
 * data types like structures or arrays.
 *
 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
 * to the result of dereferencing @ptr.
 *
 * Caller must check the pointer with access_ok() before calling this
 * function.
 *
 * Return: zero on success, or -EFAULT on error.
 */
#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr)

#define __put_user_size(x, ptr, size, label)                                \
do {                                                                        \
        __typeof__(*(ptr)) __x = (x); /* eval x once */                        \
        __typeof__(ptr) __ptr = (ptr); /* eval ptr once */                \
        __chk_user_ptr(__ptr);                                                \
        switch (size) {                                                        \
        case 1:                                                                \
                __put_user_goto(__x, __ptr, "b", "iq", label);                \
                break;                                                        \
        case 2:                                                                \
                __put_user_goto(__x, __ptr, "w", "ir", label);                \
                break;                                                        \
        case 4:                                                                \
                __put_user_goto(__x, __ptr, "l", "ir", label);                \
                break;                                                        \
        case 8:                                                                \
                __put_user_goto_u64(__x, __ptr, label);                        \
                break;                                                        \
        default:                                                        \
                __put_user_bad();                                        \
        }                                                                \
        instrument_put_user(__x, __ptr, size);                                \
} while (0)

#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, label) do {                                \
        unsigned int __gu_low, __gu_high;                                \
        const unsigned int __user *__gu_ptr;                                \
        __gu_ptr = (const void __user *)(ptr);                                \
        __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label);                \
        __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label);        \
        (x) = ((unsigned long long)__gu_high << 32) | __gu_low;                \
} while (0)
#else
#define __get_user_asm_u64(x, ptr, label)                                \
        __get_user_asm(x, ptr, "q", "=r", label)
#endif

#define __get_user_size(x, ptr, size, label)                                \
do {                                                                        \
        __chk_user_ptr(ptr);                                                \
        switch (size) {                                                        \
        case 1:        {                                                        \
                unsigned char x_u8__;                                        \
                __get_user_asm(x_u8__, ptr, "b", "=q", label);                \
                (x) = x_u8__;                                                \
                break;                                                        \
        }                                                                \
        case 2:                                                                \
                __get_user_asm(x, ptr, "w", "=r", label);                \
                break;                                                        \
        case 4:                                                                \
                __get_user_asm(x, ptr, "l", "=r", label);                \
                break;                                                        \
        case 8:                                                                \
                __get_user_asm_u64(x, ptr, label);                        \
                break;                                                        \
        default:                                                        \
                (x) = __get_user_bad();                                        \
        }                                                                \
        instrument_get_user(x);                                                \
} while (0)

#define __get_user_asm(x, addr, itype, ltype, label)                        \
        asm_goto_output("\n"                                                \
                     "1:        mov"itype" %[umem],%[output]\n"                \
                     _ASM_EXTABLE_UA(1b, %l2)                                \
                     : [output] ltype(x)                                \
                     : [umem] "m" (__m(addr))                                \
                     : : label)

#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT

#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval)                                \
({                                                                        \
        __typeof__(ptr) __ptr = (ptr);                                        \
        asm volatile("\n"                                                \
                     "1:        movl %[lowbits],%%eax\n"                \
                     "2:        movl %[highbits],%%edx\n"                \
                     "3:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG |        \
                                           EX_FLAG_CLEAR_AX_DX,                \
                                           %[errout])                        \
                     _ASM_EXTABLE_TYPE_REG(2b, 3b, EX_TYPE_EFAULT_REG |        \
                                           EX_FLAG_CLEAR_AX_DX,                \
                                           %[errout])                        \
                     : [errout] "=r" (retval),                                \
                       [output] "=&A"(x)                                \
                     : [lowbits] "m" (__m(__ptr)),                        \
                       [highbits] "m" __m(((u32 __user *)(__ptr)) + 1),        \
                       "0" (retval));                                        \
})

#else
#define __get_user_asm_u64(x, ptr, retval) \
         __get_user_asm(x, ptr, retval, "q")
#endif

#define __get_user_size(x, ptr, size, retval)                                \
do {                                                                        \
        unsigned char x_u8__;                                                \
                                                                        \
        retval = 0;                                                        \
        __chk_user_ptr(ptr);                                                \
        switch (size) {                                                        \
        case 1:                                                                \
                __get_user_asm(x_u8__, ptr, retval, "b");                \
                (x) = x_u8__;                                                \
                break;                                                        \
        case 2:                                                                \
                __get_user_asm(x, ptr, retval, "w");                        \
                break;                                                        \
        case 4:                                                                \
                __get_user_asm(x, ptr, retval, "l");                        \
                break;                                                        \
        case 8:                                                                \
                __get_user_asm_u64(x, ptr, retval);                        \
                break;                                                        \
        default:                                                        \
                (x) = __get_user_bad();                                        \
        }                                                                \
} while (0)

#define __get_user_asm(x, addr, err, itype)                                \
        asm volatile("\n"                                                \
                     "1:        mov"itype" %[umem],%[output]\n"                \
                     "2:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG | \
                                           EX_FLAG_CLEAR_AX,                \
                                           %[errout])                        \
                     : [errout] "=r" (err),                                \
                       [output] "=a" (x)                                \
                     : [umem] "m" (__m(addr)),                                \
                       "0" (err))

#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT

#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)        ({ \
        bool success;                                                        \
        __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);                \
        __typeof__(*(_ptr)) __old = *_old;                                \
        __typeof__(*(_ptr)) __new = (_new);                                \
        asm_goto_output("\n"                                                \
                     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
                     _ASM_EXTABLE_UA(1b, %l[label])                        \
                     : CC_OUT(z) (success),                                \
                       [ptr] "+m" (*_ptr),                                \
                       [old] "+a" (__old)                                \
                     : [new] ltype (__new)                                \
                     : "memory"                                                \
                     : label);                                                \
        if (unlikely(!success))                                                \
                *_old = __old;                                                \
        likely(success);                                        })

#ifdef CONFIG_X86_32
#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)        ({        \
        bool success;                                                        \
        __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);                \
        __typeof__(*(_ptr)) __old = *_old;                                \
        __typeof__(*(_ptr)) __new = (_new);                                \
        asm_goto_output("\n"                                                \
                     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"                \
                     _ASM_EXTABLE_UA(1b, %l[label])                        \
                     : CC_OUT(z) (success),                                \
                       "+A" (__old),                                        \
                       [ptr] "+m" (*_ptr)                                \
                     : "b" ((u32)__new),                                \
                       "c" ((u32)((u64)__new >> 32))                        \
                     : "memory"                                                \
                     : label);                                                \
        if (unlikely(!success))                                                \
                *_old = __old;                                                \
        likely(success);                                        })
#endif // CONFIG_X86_32
#else  // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)        ({ \
        int __err = 0;                                                        \
        bool success;                                                        \
        __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);                \
        __typeof__(*(_ptr)) __old = *_old;                                \
        __typeof__(*(_ptr)) __new = (_new);                                \
        asm volatile("\n"                                                \
                     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
                     CC_SET(z)                                                \
                     "2:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG,        \
                                           %[errout])                        \
                     : CC_OUT(z) (success),                                \
                       [errout] "+r" (__err),                                \
                       [ptr] "+m" (*_ptr),                                \
                       [old] "+a" (__old)                                \
                     : [new] ltype (__new)                                \
                     : "memory");                                        \
        if (unlikely(__err))                                                \
                goto label;                                                \
        if (unlikely(!success))                                                \
                *_old = __old;                                                \
        likely(success);                                        })

#ifdef CONFIG_X86_32
/*
 * Unlike the normal CMPXCHG, use output GPR for both success/fail and error.
 * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
 * hardcoded by CMPXCHG8B, leaving only ESI and EDI.  If the compiler uses
 * both ESI and EDI for the memory operand, compilation will fail if the error
 * is an input+output as there will be no register available for input.
 */
#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)        ({        \
        int __result;                                                        \
        __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);                \
        __typeof__(*(_ptr)) __old = *_old;                                \
        __typeof__(*(_ptr)) __new = (_new);                                \
        asm volatile("\n"                                                \
                     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"                \
                     "mov $0, %[result]\n\t"                                \
                     "setz %b[result]\n"                                \
                     "2:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG,        \
                                           %[result])                        \
                     : [result] "=q" (__result),                        \
                       "+A" (__old),                                        \
                       [ptr] "+m" (*_ptr)                                \
                     : "b" ((u32)__new),                                \
                       "c" ((u32)((u64)__new >> 32))                        \
                     : "memory", "cc");                                        \
        if (unlikely(__result < 0))                                        \
                goto label;                                                \
        if (unlikely(!__result))                                        \
                *_old = __old;                                                \
        likely(__result);                                        })
#endif // CONFIG_X86_32
#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT

/* FIXME: this hack is definitely wrong -AK */
struct __large_struct { unsigned long buf[100]; };
#define __m(x) (*(struct __large_struct __user *)(x))

/*
 * Tell gcc we read from memory instead of writing: this is because
 * we do not write to any memory gcc knows about, so there are no
 * aliasing issues.
 */
#define __put_user_goto(x, addr, itype, ltype, label)                        \
        asm goto("\n"                                                        \
                "1:        mov"itype" %0,%1\n"                                \
                _ASM_EXTABLE_UA(1b, %l2)                                \
                : : ltype(x), "m" (__m(addr))                                \
                : : label)

extern unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
strncpy_from_user(char *dst, const char __user *src, long count);

extern __must_check long strnlen_user(const char __user *str, long n);

#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel

unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned len);
#endif

/*
 * movsl can be slow when source and dest are not both 8-byte aligned
 */
#ifdef CONFIG_X86_INTEL_USERCOPY
extern struct movsl_mask {
        int mask;
} ____cacheline_aligned_in_smp movsl_mask;
#endif

#define ARCH_HAS_NOCACHE_UACCESS 1

/*
 * The "unsafe" user accesses aren't really "unsafe", but the naming
 * is a big fat warning: you have to not only do the access_ok()
 * checking before using them, but you have to surround them with the
 * user_access_begin/end() pair.
 */
static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
        if (unlikely(!access_ok(ptr,len)))
                return 0;
        __uaccess_begin_nospec();
        return 1;
}
#define user_access_begin(a,b)        user_access_begin(a,b)
#define user_access_end()        __uaccess_end()

#define user_access_save()        smap_save()
#define user_access_restore(x)        smap_restore(x)

#define unsafe_put_user(x, ptr, label)        \
        __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)

#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define unsafe_get_user(x, ptr, err_label)                                        \
do {                                                                                \
        __inttype(*(ptr)) __gu_val;                                                \
        __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label);                \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                                \
} while (0)
#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define unsafe_get_user(x, ptr, err_label)                                        \
do {                                                                                \
        int __gu_err;                                                                \
        __inttype(*(ptr)) __gu_val;                                                \
        __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err);                \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                                \
        if (unlikely(__gu_err)) goto err_label;                                        \
} while (0)
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT

extern void __try_cmpxchg_user_wrong_size(void);

#ifndef CONFIG_X86_32
#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label)                \
        __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
#endif

/*
 * Force the pointer to u<size> to match the size expected by the asm helper.
 * clang/LLVM compiles all cases and only discards the unused paths after
 * processing errors, which breaks i386 if the pointer is an 8-byte value.
 */
#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({                        \
        bool __ret;                                                                \
        __chk_user_ptr(_ptr);                                                        \
        switch (sizeof(*(_ptr))) {                                                \
        case 1:        __ret = __try_cmpxchg_user_asm("b", "q",                        \
                                               (__force u8 *)(_ptr), (_oldp),        \
                                               (_nval), _label);                \
                break;                                                                \
        case 2:        __ret = __try_cmpxchg_user_asm("w", "r",                        \
                                               (__force u16 *)(_ptr), (_oldp),        \
                                               (_nval), _label);                \
                break;                                                                \
        case 4:        __ret = __try_cmpxchg_user_asm("l", "r",                        \
                                               (__force u32 *)(_ptr), (_oldp),        \
                                               (_nval), _label);                \
                break;                                                                \
        case 8:        __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
                                                 (_nval), _label);                \
                break;                                                                \
        default: __try_cmpxchg_user_wrong_size();                                \
        }                                                                        \
        __ret;                                                })

/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label)        ({                \
        int __ret = -EFAULT;                                                \
        __uaccess_begin_nospec();                                        \
        __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label);        \
_label:                                                                        \
        __uaccess_end();                                                \
        __ret;                                                                \
                                                        })

/*
 * We want the unsafe accessors to always be inlined and use
 * the error labels - thus the macro games.
 */
#define unsafe_copy_loop(dst, src, len, type, label)                                \
        while (len >= sizeof(type)) {                                                \
                unsafe_put_user(*(type *)(src),(type __user *)(dst),label);        \
                dst += sizeof(type);                                                \
                src += sizeof(type);                                                \
                len -= sizeof(type);                                                \
        }

#define unsafe_copy_to_user(_dst,_src,_len,label)                        \
do {                                                                        \
        char __user *__ucu_dst = (_dst);                                \
        const char *__ucu_src = (_src);                                        \
        size_t __ucu_len = (_len);                                        \
        unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label);        \
        unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label);        \
        unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label);        \
        unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label);        \
} while (0)

#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_kernel_nofault(dst, src, type, err_label)                        \
        __get_user_size(*((type *)(dst)), (__force type __user *)(src),        \
                        sizeof(type), err_label)
#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_kernel_nofault(dst, src, type, err_label)                        \
do {                                                                        \
        int __kr_err;                                                        \
                                                                        \
        __get_user_size(*((type *)(dst)), (__force type __user *)(src),        \
                        sizeof(type), __kr_err);                        \
        if (unlikely(__kr_err))                                                \
                goto err_label;                                                \
} while (0)
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT

#define __put_kernel_nofault(dst, src, type, err_label)                        \
        __put_user_size(*((type *)(src)), (__force type __user *)(dst),        \
                        sizeof(type), err_label)

#endif /* _ASM_X86_UACCESS_H */




























   84 












   82 























   84 
   84 




    3 
    3 













    3 




































    3 
    3 






    3 
    3 































   84 
   83 






   82 
   84 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#ifndef _LINUX_MMAP_LOCK_H
#define _LINUX_MMAP_LOCK_H

#include <linux/lockdep.h>
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/rwsem.h>
#include <linux/tracepoint-defs.h>
#include <linux/types.h>

#define MMAP_LOCK_INITIALIZER(name) \
        .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),

DECLARE_TRACEPOINT(mmap_lock_start_locking);
DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
DECLARE_TRACEPOINT(mmap_lock_released);

#ifdef CONFIG_TRACING

void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
                                           bool success);
void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);

static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
                                                   bool write)
{
        if (tracepoint_enabled(mmap_lock_start_locking))
                __mmap_lock_do_trace_start_locking(mm, write);
}

static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
                                                      bool write, bool success)
{
        if (tracepoint_enabled(mmap_lock_acquire_returned))
                __mmap_lock_do_trace_acquire_returned(mm, write, success);
}

static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
{
        if (tracepoint_enabled(mmap_lock_released))
                __mmap_lock_do_trace_released(mm, write);
}

#else /* !CONFIG_TRACING */

static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
                                                   bool write)
{
}

static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
                                                      bool write, bool success)
{
}

static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
{
}

#endif /* CONFIG_TRACING */

static inline void mmap_assert_locked(struct mm_struct *mm)
{
        lockdep_assert_held(&mm->mmap_lock);
        VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
}

static inline void mmap_assert_write_locked(struct mm_struct *mm)
{
        lockdep_assert_held_write(&mm->mmap_lock);
        VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
}

#ifdef CONFIG_PER_VMA_LOCK
/*
 * Drop all currently-held per-VMA locks.
 * This is called from the mmap_lock implementation directly before releasing
 * a write-locked mmap_lock (or downgrading it to read-locked).
 * This should normally NOT be called manually from other places.
 * If you want to call this manually anyway, keep in mind that this will release
 * *all* VMA write locks, including ones from further up the stack.
 */
static inline void vma_end_write_all(struct mm_struct *mm)
{
        mmap_assert_write_locked(mm);
        /*
         * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
         * mmap_lock being held.
         * We need RELEASE semantics here to ensure that preceding stores into
         * the VMA take effect before we unlock it with this store.
         * Pairs with ACQUIRE semantics in vma_start_read().
         */
        smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
}
#else
static inline void vma_end_write_all(struct mm_struct *mm) {}
#endif

static inline void mmap_init_lock(struct mm_struct *mm)
{
        init_rwsem(&mm->mmap_lock);
}

static inline void mmap_write_lock(struct mm_struct *mm)
{
        __mmap_lock_trace_start_locking(mm, true);
        down_write(&mm->mmap_lock);
        __mmap_lock_trace_acquire_returned(mm, true, true);
}

static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
        __mmap_lock_trace_start_locking(mm, true);
        down_write_nested(&mm->mmap_lock, subclass);
        __mmap_lock_trace_acquire_returned(mm, true, true);
}

static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
        int ret;

        __mmap_lock_trace_start_locking(mm, true);
        ret = down_write_killable(&mm->mmap_lock);
        __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
        return ret;
}

static inline void mmap_write_unlock(struct mm_struct *mm)
{
        __mmap_lock_trace_released(mm, true);
        vma_end_write_all(mm);
        up_write(&mm->mmap_lock);
}

static inline void mmap_write_downgrade(struct mm_struct *mm)
{
        __mmap_lock_trace_acquire_returned(mm, false, true);
        vma_end_write_all(mm);
        downgrade_write(&mm->mmap_lock);
}

static inline void mmap_read_lock(struct mm_struct *mm)
{
        __mmap_lock_trace_start_locking(mm, false);
        down_read(&mm->mmap_lock);
        __mmap_lock_trace_acquire_returned(mm, false, true);
}

static inline int mmap_read_lock_killable(struct mm_struct *mm)
{
        int ret;

        __mmap_lock_trace_start_locking(mm, false);
        ret = down_read_killable(&mm->mmap_lock);
        __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
        return ret;
}

static inline bool mmap_read_trylock(struct mm_struct *mm)
{
        bool ret;

        __mmap_lock_trace_start_locking(mm, false);
        ret = down_read_trylock(&mm->mmap_lock) != 0;
        __mmap_lock_trace_acquire_returned(mm, false, ret);
        return ret;
}

static inline void mmap_read_unlock(struct mm_struct *mm)
{
        __mmap_lock_trace_released(mm, false);
        up_read(&mm->mmap_lock);
}

static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
        __mmap_lock_trace_released(mm, false);
        up_read_non_owner(&mm->mmap_lock);
}

static inline int mmap_lock_is_contended(struct mm_struct *mm)
{
        return rwsem_is_contended(&mm->mmap_lock);
}

#endif /* _LINUX_MMAP_LOCK_H */






















































































    5 

    5 
    5 
    5 
    5 





    5 


    5 















































    1 







    1 



    1 


    1 
    1 

    1 











    1 

    1 




    3 












































    1 




    1 
    1 


    1 
    1 


    1 







    1 





    1 




    1 










    3 





    2 


    2 

    2 








    2 

    1 

















    1 








    2 
















































    1 
















    1 
    1 






    1 



    1 




    1 






    1 




    1 
    1 










































































































    1 


    1 


    1 
















































    1 

    1 


    1 







    1 

    1 



    1 
































    1 









    4 

    4 




    4 






























    5 











    5 







    5 


















    5 








    5 

    5 


    5 





   14 











   13 
   14 


   13 




   14 









    3 








    3 



    3 





























































































































































































   45 











































































































































































































































   45 




   45 
    2 



   45 






    1 




    1 


   45 























    5 




    5 







    5 


    1 

    1 

    1 







































































































































































   60 





   60 









   13 
























































































































































































































    8 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  USB HID support for Linux
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2007-2008 Oliver Neukum
 *  Copyright (c) 2006-2010 Jiri Kosina
 */

/*
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <asm/byteorder.h>
#include <linux/input.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/string.h>

#include <linux/usb.h>

#include <linux/hid.h>
#include <linux/hiddev.h>
#include <linux/hid-debug.h>
#include <linux/hidraw.h>
#include "usbhid.h"

/*
 * Version Information
 */

#define DRIVER_DESC "USB HID core driver"

/*
 * Module parameters.
 */

static unsigned int hid_mousepoll_interval;
module_param_named(mousepoll, hid_mousepoll_interval, uint, 0644);
MODULE_PARM_DESC(mousepoll, "Polling interval of mice");

static unsigned int hid_jspoll_interval;
module_param_named(jspoll, hid_jspoll_interval, uint, 0644);
MODULE_PARM_DESC(jspoll, "Polling interval of joysticks");

static unsigned int hid_kbpoll_interval;
module_param_named(kbpoll, hid_kbpoll_interval, uint, 0644);
MODULE_PARM_DESC(kbpoll, "Polling interval of keyboards");

static unsigned int ignoreled;
module_param_named(ignoreled, ignoreled, uint, 0644);
MODULE_PARM_DESC(ignoreled, "Autosuspend with active leds");

/* Quirks specified at module load time */
static char *quirks_param[MAX_USBHID_BOOT_QUIRKS];
module_param_array_named(quirks, quirks_param, charp, NULL, 0444);
MODULE_PARM_DESC(quirks, "Add/modify USB HID quirks by specifying "
                " quirks=vendorID:productID:quirks"
                " where vendorID, productID, and quirks are all in"
                " 0x-prefixed hex");
/*
 * Input submission and I/O error handler.
 */
static void hid_io_error(struct hid_device *hid);
static int hid_submit_out(struct hid_device *hid);
static int hid_submit_ctrl(struct hid_device *hid);
static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid);

/* Start up the input URB */
static int hid_start_in(struct hid_device *hid)
{
        unsigned long flags;
        int rc = 0;
        struct usbhid_device *usbhid = hid->driver_data;

        spin_lock_irqsave(&usbhid->lock, flags);
        if (test_bit(HID_IN_POLLING, &usbhid->iofl) &&
            !test_bit(HID_DISCONNECTED, &usbhid->iofl) &&
            !test_bit(HID_SUSPENDED, &usbhid->iofl) &&
            !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) {
                rc = usb_submit_urb(usbhid->urbin, GFP_ATOMIC);
                if (rc != 0) {
                        clear_bit(HID_IN_RUNNING, &usbhid->iofl);
                        if (rc == -ENOSPC)
                                set_bit(HID_NO_BANDWIDTH, &usbhid->iofl);
                } else {
                        clear_bit(HID_NO_BANDWIDTH, &usbhid->iofl);
                }
        }
        spin_unlock_irqrestore(&usbhid->lock, flags);
        return rc;
}

/* I/O retry timer routine */
static void hid_retry_timeout(struct timer_list *t)
{
        struct usbhid_device *usbhid = from_timer(usbhid, t, io_retry);
        struct hid_device *hid = usbhid->hid;

        dev_dbg(&usbhid->intf->dev, "retrying intr urb\n");
        if (hid_start_in(hid))
                hid_io_error(hid);
}

/* Workqueue routine to reset the device or clear a halt */
static void hid_reset(struct work_struct *work)
{
        struct usbhid_device *usbhid =
                container_of(work, struct usbhid_device, reset_work);
        struct hid_device *hid = usbhid->hid;
        int rc;

        if (test_bit(HID_CLEAR_HALT, &usbhid->iofl)) {
                dev_dbg(&usbhid->intf->dev, "clear halt\n");
                rc = usb_clear_halt(hid_to_usb_dev(hid), usbhid->urbin->pipe);
                clear_bit(HID_CLEAR_HALT, &usbhid->iofl);
                if (rc == 0) {
                        hid_start_in(hid);
                } else {
                        dev_dbg(&usbhid->intf->dev,
                                        "clear-halt failed: %d\n", rc);
                        set_bit(HID_RESET_PENDING, &usbhid->iofl);
                }
        }

        if (test_bit(HID_RESET_PENDING, &usbhid->iofl)) {
                dev_dbg(&usbhid->intf->dev, "resetting device\n");
                usb_queue_reset_device(usbhid->intf);
        }
}

/* Main I/O error handler */
static void hid_io_error(struct hid_device *hid)
{
        unsigned long flags;
        struct usbhid_device *usbhid = hid->driver_data;

        spin_lock_irqsave(&usbhid->lock, flags);

        /* Stop when disconnected */
        if (test_bit(HID_DISCONNECTED, &usbhid->iofl))
                goto done;

        /* If it has been a while since the last error, we'll assume
         * this a brand new error and reset the retry timeout. */
        if (time_after(jiffies, usbhid->stop_retry + HZ/2))
                usbhid->retry_delay = 0;

        /* When an error occurs, retry at increasing intervals */
        if (usbhid->retry_delay == 0) {
                usbhid->retry_delay = 13;        /* Then 26, 52, 104, 104, ... */
                usbhid->stop_retry = jiffies + msecs_to_jiffies(1000);
        } else if (usbhid->retry_delay < 100)
                usbhid->retry_delay *= 2;

        if (time_after(jiffies, usbhid->stop_retry)) {

                /* Retries failed, so do a port reset unless we lack bandwidth*/
                if (!test_bit(HID_NO_BANDWIDTH, &usbhid->iofl)
                     && !test_and_set_bit(HID_RESET_PENDING, &usbhid->iofl)) {

                        schedule_work(&usbhid->reset_work);
                        goto done;
                }
        }

        mod_timer(&usbhid->io_retry,
                        jiffies + msecs_to_jiffies(usbhid->retry_delay));
done:
        spin_unlock_irqrestore(&usbhid->lock, flags);
}

static void usbhid_mark_busy(struct usbhid_device *usbhid)
{
        struct usb_interface *intf = usbhid->intf;

        usb_mark_last_busy(interface_to_usbdev(intf));
}

static int usbhid_restart_out_queue(struct usbhid_device *usbhid)
{
        struct hid_device *hid = usb_get_intfdata(usbhid->intf);
        int kicked;
        int r;

        if (!hid || test_bit(HID_RESET_PENDING, &usbhid->iofl) ||
                        test_bit(HID_SUSPENDED, &usbhid->iofl))
                return 0;

        if ((kicked = (usbhid->outhead != usbhid->outtail))) {
                hid_dbg(hid, "Kicking head %d tail %d", usbhid->outhead, usbhid->outtail);

                /* Try to wake up from autosuspend... */
                r = usb_autopm_get_interface_async(usbhid->intf);
                if (r < 0)
                        return r;

                /*
                 * If still suspended, don't submit.  Submission will
                 * occur if/when resume drains the queue.
                 */
                if (test_bit(HID_SUSPENDED, &usbhid->iofl)) {
                        usb_autopm_put_interface_no_suspend(usbhid->intf);
                        return r;
                }

                /* Asynchronously flush queue. */
                set_bit(HID_OUT_RUNNING, &usbhid->iofl);
                if (hid_submit_out(hid)) {
                        clear_bit(HID_OUT_RUNNING, &usbhid->iofl);
                        usb_autopm_put_interface_async(usbhid->intf);
                }
                wake_up(&usbhid->wait);
        }
        return kicked;
}

static int usbhid_restart_ctrl_queue(struct usbhid_device *usbhid)
{
        struct hid_device *hid = usb_get_intfdata(usbhid->intf);
        int kicked;
        int r;

        WARN_ON(hid == NULL);
        if (!hid || test_bit(HID_RESET_PENDING, &usbhid->iofl) ||
                        test_bit(HID_SUSPENDED, &usbhid->iofl))
                return 0;

        if ((kicked = (usbhid->ctrlhead != usbhid->ctrltail))) {
                hid_dbg(hid, "Kicking head %d tail %d", usbhid->ctrlhead, usbhid->ctrltail);

                /* Try to wake up from autosuspend... */
                r = usb_autopm_get_interface_async(usbhid->intf);
                if (r < 0)
                        return r;

                /*
                 * If still suspended, don't submit.  Submission will
                 * occur if/when resume drains the queue.
                 */
                if (test_bit(HID_SUSPENDED, &usbhid->iofl)) {
                        usb_autopm_put_interface_no_suspend(usbhid->intf);
                        return r;
                }

                /* Asynchronously flush queue. */
                set_bit(HID_CTRL_RUNNING, &usbhid->iofl);
                if (hid_submit_ctrl(hid)) {
                        clear_bit(HID_CTRL_RUNNING, &usbhid->iofl);
                        usb_autopm_put_interface_async(usbhid->intf);
                }
                wake_up(&usbhid->wait);
        }
        return kicked;
}

/*
 * Input interrupt completion handler.
 */

static void hid_irq_in(struct urb *urb)
{
        struct hid_device        *hid = urb->context;
        struct usbhid_device        *usbhid = hid->driver_data;
        int                        status;

        switch (urb->status) {
        case 0:                        /* success */
                usbhid->retry_delay = 0;
                if (!test_bit(HID_OPENED, &usbhid->iofl))
                        break;
                usbhid_mark_busy(usbhid);
                if (!test_bit(HID_RESUME_RUNNING, &usbhid->iofl)) {
                        hid_input_report(urb->context, HID_INPUT_REPORT,
                                         urb->transfer_buffer,
                                         urb->actual_length, 1);
                        /*
                         * autosuspend refused while keys are pressed
                         * because most keyboards don't wake up when
                         * a key is released
                         */
                        if (hid_check_keys_pressed(hid))
                                set_bit(HID_KEYS_PRESSED, &usbhid->iofl);
                        else
                                clear_bit(HID_KEYS_PRESSED, &usbhid->iofl);
                }
                break;
        case -EPIPE:                /* stall */
                usbhid_mark_busy(usbhid);
                clear_bit(HID_IN_RUNNING, &usbhid->iofl);
                set_bit(HID_CLEAR_HALT, &usbhid->iofl);
                schedule_work(&usbhid->reset_work);
                return;
        case -ECONNRESET:        /* unlink */
        case -ENOENT:
        case -ESHUTDOWN:        /* unplug */
                clear_bit(HID_IN_RUNNING, &usbhid->iofl);
                return;
        case -EILSEQ:                /* protocol error or unplug */
        case -EPROTO:                /* protocol error or unplug */
        case -ETIME:                /* protocol error or unplug */
        case -ETIMEDOUT:        /* Should never happen, but... */
                usbhid_mark_busy(usbhid);
                clear_bit(HID_IN_RUNNING, &usbhid->iofl);
                hid_io_error(hid);
                return;
        default:                /* error */
                hid_warn(urb->dev, "input irq status %d received\n",
                         urb->status);
        }

        status = usb_submit_urb(urb, GFP_ATOMIC);
        if (status) {
                clear_bit(HID_IN_RUNNING, &usbhid->iofl);
                if (status != -EPERM) {
                        hid_err(hid, "can't resubmit intr, %s-%s/input%d, status %d\n",
                                hid_to_usb_dev(hid)->bus->bus_name,
                                hid_to_usb_dev(hid)->devpath,
                                usbhid->ifnum, status);
                        hid_io_error(hid);
                }
        }
}

static int hid_submit_out(struct hid_device *hid)
{
        struct hid_report *report;
        char *raw_report;
        struct usbhid_device *usbhid = hid->driver_data;
        int r;

        report = usbhid->out[usbhid->outtail].report;
        raw_report = usbhid->out[usbhid->outtail].raw_report;

        usbhid->urbout->transfer_buffer_length = hid_report_len(report);
        usbhid->urbout->dev = hid_to_usb_dev(hid);
        if (raw_report) {
                memcpy(usbhid->outbuf, raw_report,
                                usbhid->urbout->transfer_buffer_length);
                kfree(raw_report);
                usbhid->out[usbhid->outtail].raw_report = NULL;
        }

        dbg_hid("submitting out urb\n");

        r = usb_submit_urb(usbhid->urbout, GFP_ATOMIC);
        if (r < 0) {
                hid_err(hid, "usb_submit_urb(out) failed: %d\n", r);
                return r;
        }
        usbhid->last_out = jiffies;
        return 0;
}

static int hid_submit_ctrl(struct hid_device *hid)
{
        struct hid_report *report;
        unsigned char dir;
        char *raw_report;
        int len, r;
        struct usbhid_device *usbhid = hid->driver_data;

        report = usbhid->ctrl[usbhid->ctrltail].report;
        raw_report = usbhid->ctrl[usbhid->ctrltail].raw_report;
        dir = usbhid->ctrl[usbhid->ctrltail].dir;

        len = hid_report_len(report);
        if (dir == USB_DIR_OUT) {
                usbhid->urbctrl->pipe = usb_sndctrlpipe(hid_to_usb_dev(hid), 0);
                if (raw_report) {
                        memcpy(usbhid->ctrlbuf, raw_report, len);
                        kfree(raw_report);
                        usbhid->ctrl[usbhid->ctrltail].raw_report = NULL;
                }
        } else {
                int maxpacket;

                usbhid->urbctrl->pipe = usb_rcvctrlpipe(hid_to_usb_dev(hid), 0);
                maxpacket = usb_maxpacket(hid_to_usb_dev(hid),
                                          usbhid->urbctrl->pipe);
                len += (len == 0);        /* Don't allow 0-length reports */
                len = round_up(len, maxpacket);
                if (len > usbhid->bufsize)
                        len = usbhid->bufsize;
        }
        usbhid->urbctrl->transfer_buffer_length = len;
        usbhid->urbctrl->dev = hid_to_usb_dev(hid);

        usbhid->cr->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE | dir;
        usbhid->cr->bRequest = (dir == USB_DIR_OUT) ? HID_REQ_SET_REPORT :
                                                      HID_REQ_GET_REPORT;
        usbhid->cr->wValue = cpu_to_le16(((report->type + 1) << 8) |
                                         report->id);
        usbhid->cr->wIndex = cpu_to_le16(usbhid->ifnum);
        usbhid->cr->wLength = cpu_to_le16(len);

        dbg_hid("submitting ctrl urb: %s wValue=0x%04x wIndex=0x%04x wLength=%u\n",
                usbhid->cr->bRequest == HID_REQ_SET_REPORT ? "Set_Report" :
                                                             "Get_Report",
                usbhid->cr->wValue, usbhid->cr->wIndex, usbhid->cr->wLength);

        r = usb_submit_urb(usbhid->urbctrl, GFP_ATOMIC);
        if (r < 0) {
                hid_err(hid, "usb_submit_urb(ctrl) failed: %d\n", r);
                return r;
        }
        usbhid->last_ctrl = jiffies;
        return 0;
}

/*
 * Output interrupt completion handler.
 */

static void hid_irq_out(struct urb *urb)
{
        struct hid_device *hid = urb->context;
        struct usbhid_device *usbhid = hid->driver_data;
        unsigned long flags;
        int unplug = 0;

        switch (urb->status) {
        case 0:                        /* success */
                break;
        case -ESHUTDOWN:        /* unplug */
                unplug = 1;
                break;
        case -EILSEQ:                /* protocol error or unplug */
        case -EPROTO:                /* protocol error or unplug */
        case -ECONNRESET:        /* unlink */
        case -ENOENT:
                break;
        default:                /* error */
                hid_warn(urb->dev, "output irq status %d received\n",
                         urb->status);
        }

        spin_lock_irqsave(&usbhid->lock, flags);

        if (unplug) {
                usbhid->outtail = usbhid->outhead;
        } else {
                usbhid->outtail = (usbhid->outtail + 1) & (HID_OUTPUT_FIFO_SIZE - 1);

                if (usbhid->outhead != usbhid->outtail &&
                                hid_submit_out(hid) == 0) {
                        /* Successfully submitted next urb in queue */
                        spin_unlock_irqrestore(&usbhid->lock, flags);
                        return;
                }
        }

        clear_bit(HID_OUT_RUNNING, &usbhid->iofl);
        spin_unlock_irqrestore(&usbhid->lock, flags);
        usb_autopm_put_interface_async(usbhid->intf);
        wake_up(&usbhid->wait);
}

/*
 * Control pipe completion handler.
 */

static void hid_ctrl(struct urb *urb)
{
        struct hid_device *hid = urb->context;
        struct usbhid_device *usbhid = hid->driver_data;
        unsigned long flags;
        int unplug = 0, status = urb->status;

        switch (status) {
        case 0:                        /* success */
                if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_IN)
                        hid_input_report(urb->context,
                                usbhid->ctrl[usbhid->ctrltail].report->type,
                                urb->transfer_buffer, urb->actual_length, 0);
                break;
        case -ESHUTDOWN:        /* unplug */
                unplug = 1;
                break;
        case -EILSEQ:                /* protocol error or unplug */
        case -EPROTO:                /* protocol error or unplug */
        case -ECONNRESET:        /* unlink */
        case -ENOENT:
        case -EPIPE:                /* report not available */
                break;
        default:                /* error */
                hid_warn(urb->dev, "ctrl urb status %d received\n", status);
        }

        spin_lock_irqsave(&usbhid->lock, flags);

        if (unplug) {
                usbhid->ctrltail = usbhid->ctrlhead;
        } else if (usbhid->ctrlhead != usbhid->ctrltail) {
                usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1);

                if (usbhid->ctrlhead != usbhid->ctrltail &&
                                hid_submit_ctrl(hid) == 0) {
                        /* Successfully submitted next urb in queue */
                        spin_unlock_irqrestore(&usbhid->lock, flags);
                        return;
                }
        }

        clear_bit(HID_CTRL_RUNNING, &usbhid->iofl);
        spin_unlock_irqrestore(&usbhid->lock, flags);
        usb_autopm_put_interface_async(usbhid->intf);
        wake_up(&usbhid->wait);
}

static void __usbhid_submit_report(struct hid_device *hid, struct hid_report *report,
                                   unsigned char dir)
{
        int head;
        struct usbhid_device *usbhid = hid->driver_data;

        if (((hid->quirks & HID_QUIRK_NOGET) && dir == USB_DIR_IN) ||
                test_bit(HID_DISCONNECTED, &usbhid->iofl))
                return;

        if (usbhid->urbout && dir == USB_DIR_OUT && report->type == HID_OUTPUT_REPORT) {
                if ((head = (usbhid->outhead + 1) & (HID_OUTPUT_FIFO_SIZE - 1)) == usbhid->outtail) {
                        hid_warn(hid, "output queue full\n");
                        return;
                }

                usbhid->out[usbhid->outhead].raw_report = hid_alloc_report_buf(report, GFP_ATOMIC);
                if (!usbhid->out[usbhid->outhead].raw_report) {
                        hid_warn(hid, "output queueing failed\n");
                        return;
                }
                hid_output_report(report, usbhid->out[usbhid->outhead].raw_report);
                usbhid->out[usbhid->outhead].report = report;
                usbhid->outhead = head;

                /* If the queue isn't running, restart it */
                if (!test_bit(HID_OUT_RUNNING, &usbhid->iofl)) {
                        usbhid_restart_out_queue(usbhid);

                /* Otherwise see if an earlier request has timed out */
                } else if (time_after(jiffies, usbhid->last_out + HZ * 5)) {

                        /* Prevent autosuspend following the unlink */
                        usb_autopm_get_interface_no_resume(usbhid->intf);

                        /*
                         * Prevent resubmission in case the URB completes
                         * before we can unlink it.  We don't want to cancel
                         * the wrong transfer!
                         */
                        usb_block_urb(usbhid->urbout);

                        /* Drop lock to avoid deadlock if the callback runs */
                        spin_unlock(&usbhid->lock);

                        usb_unlink_urb(usbhid->urbout);
                        spin_lock(&usbhid->lock);
                        usb_unblock_urb(usbhid->urbout);

                        /* Unlink might have stopped the queue */
                        if (!test_bit(HID_OUT_RUNNING, &usbhid->iofl))
                                usbhid_restart_out_queue(usbhid);

                        /* Now we can allow autosuspend again */
                        usb_autopm_put_interface_async(usbhid->intf);
                }
                return;
        }

        if ((head = (usbhid->ctrlhead + 1) & (HID_CONTROL_FIFO_SIZE - 1)) == usbhid->ctrltail) {
                hid_warn(hid, "control queue full\n");
                return;
        }

        if (dir == USB_DIR_OUT) {
                usbhid->ctrl[usbhid->ctrlhead].raw_report = hid_alloc_report_buf(report, GFP_ATOMIC);
                if (!usbhid->ctrl[usbhid->ctrlhead].raw_report) {
                        hid_warn(hid, "control queueing failed\n");
                        return;
                }
                hid_output_report(report, usbhid->ctrl[usbhid->ctrlhead].raw_report);
        }
        usbhid->ctrl[usbhid->ctrlhead].report = report;
        usbhid->ctrl[usbhid->ctrlhead].dir = dir;
        usbhid->ctrlhead = head;

        /* If the queue isn't running, restart it */
        if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl)) {
                usbhid_restart_ctrl_queue(usbhid);

        /* Otherwise see if an earlier request has timed out */
        } else if (time_after(jiffies, usbhid->last_ctrl + HZ * 5)) {

                /* Prevent autosuspend following the unlink */
                usb_autopm_get_interface_no_resume(usbhid->intf);

                /*
                 * Prevent resubmission in case the URB completes
                 * before we can unlink it.  We don't want to cancel
                 * the wrong transfer!
                 */
                usb_block_urb(usbhid->urbctrl);

                /* Drop lock to avoid deadlock if the callback runs */
                spin_unlock(&usbhid->lock);

                usb_unlink_urb(usbhid->urbctrl);
                spin_lock(&usbhid->lock);
                usb_unblock_urb(usbhid->urbctrl);

                /* Unlink might have stopped the queue */
                if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl))
                        usbhid_restart_ctrl_queue(usbhid);

                /* Now we can allow autosuspend again */
                usb_autopm_put_interface_async(usbhid->intf);
        }
}

static void usbhid_submit_report(struct hid_device *hid, struct hid_report *report, unsigned char dir)
{
        struct usbhid_device *usbhid = hid->driver_data;
        unsigned long flags;

        spin_lock_irqsave(&usbhid->lock, flags);
        __usbhid_submit_report(hid, report, dir);
        spin_unlock_irqrestore(&usbhid->lock, flags);
}

static int usbhid_wait_io(struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;

        if (!wait_event_timeout(usbhid->wait,
                                (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl) &&
                                !test_bit(HID_OUT_RUNNING, &usbhid->iofl)),
                                        10*HZ)) {
                dbg_hid("timeout waiting for ctrl or out queue to clear\n");
                return -1;
        }

        return 0;
}

static int hid_set_idle(struct usb_device *dev, int ifnum, int report, int idle)
{
        return usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                HID_REQ_SET_IDLE, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (idle << 8) | report,
                ifnum, NULL, 0, USB_CTRL_SET_TIMEOUT);
}

static int hid_get_class_descriptor(struct usb_device *dev, int ifnum,
                unsigned char type, void *buf, int size)
{
        int result, retries = 4;

        memset(buf, 0, size);

        do {
                result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                                USB_REQ_GET_DESCRIPTOR, USB_RECIP_INTERFACE | USB_DIR_IN,
                                (type << 8), ifnum, buf, size, USB_CTRL_GET_TIMEOUT);
                retries--;
        } while (result < size && retries);
        return result;
}

static int usbhid_open(struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;
        int res;

        mutex_lock(&usbhid->mutex);

        set_bit(HID_OPENED, &usbhid->iofl);

        if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
                res = 0;
                goto Done;
        }

        res = usb_autopm_get_interface(usbhid->intf);
        /* the device must be awake to reliably request remote wakeup */
        if (res < 0) {
                clear_bit(HID_OPENED, &usbhid->iofl);
                res = -EIO;
                goto Done;
        }

        usbhid->intf->needs_remote_wakeup = 1;

        set_bit(HID_RESUME_RUNNING, &usbhid->iofl);
        set_bit(HID_IN_POLLING, &usbhid->iofl);

        res = hid_start_in(hid);
        if (res) {
                if (res != -ENOSPC) {
                        hid_io_error(hid);
                        res = 0;
                } else {
                        /* no use opening if resources are insufficient */
                        res = -EBUSY;
                        clear_bit(HID_OPENED, &usbhid->iofl);
                        clear_bit(HID_IN_POLLING, &usbhid->iofl);
                        usbhid->intf->needs_remote_wakeup = 0;
                }
        }

        usb_autopm_put_interface(usbhid->intf);

        /*
         * In case events are generated while nobody was listening,
         * some are released when the device is re-opened.
         * Wait 50 msec for the queue to empty before allowing events
         * to go through hid.
         */
        if (res == 0)
                msleep(50);

        clear_bit(HID_RESUME_RUNNING, &usbhid->iofl);

 Done:
        mutex_unlock(&usbhid->mutex);
        return res;
}

static void usbhid_close(struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;

        mutex_lock(&usbhid->mutex);

        /*
         * Make sure we don't restart data acquisition due to
         * a resumption we no longer care about by avoiding racing
         * with hid_start_in().
         */
        spin_lock_irq(&usbhid->lock);
        clear_bit(HID_OPENED, &usbhid->iofl);
        if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL))
                clear_bit(HID_IN_POLLING, &usbhid->iofl);
        spin_unlock_irq(&usbhid->lock);

        if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) {
                hid_cancel_delayed_stuff(usbhid);
                usb_kill_urb(usbhid->urbin);
                usbhid->intf->needs_remote_wakeup = 0;
        }

        mutex_unlock(&usbhid->mutex);
}

/*
 * Initialize all reports
 */

void usbhid_init_reports(struct hid_device *hid)
{
        struct hid_report *report;
        struct usbhid_device *usbhid = hid->driver_data;
        struct hid_report_enum *report_enum;
        int err, ret;

        report_enum = &hid->report_enum[HID_INPUT_REPORT];
        list_for_each_entry(report, &report_enum->report_list, list)
                usbhid_submit_report(hid, report, USB_DIR_IN);

        report_enum = &hid->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(report, &report_enum->report_list, list)
                usbhid_submit_report(hid, report, USB_DIR_IN);

        err = 0;
        ret = usbhid_wait_io(hid);
        while (ret) {
                err |= ret;
                if (test_bit(HID_CTRL_RUNNING, &usbhid->iofl))
                        usb_kill_urb(usbhid->urbctrl);
                if (test_bit(HID_OUT_RUNNING, &usbhid->iofl))
                        usb_kill_urb(usbhid->urbout);
                ret = usbhid_wait_io(hid);
        }

        if (err)
                hid_warn(hid, "timeout initializing reports\n");
}

/*
 * Reset LEDs which BIOS might have left on. For now, just NumLock (0x01).
 */
static int hid_find_field_early(struct hid_device *hid, unsigned int page,
    unsigned int hid_code, struct hid_field **pfield)
{
        struct hid_report *report;
        struct hid_field *field;
        struct hid_usage *usage;
        int i, j;

        list_for_each_entry(report, &hid->report_enum[HID_OUTPUT_REPORT].report_list, list) {
                for (i = 0; i < report->maxfield; i++) {
                        field = report->field[i];
                        for (j = 0; j < field->maxusage; j++) {
                                usage = &field->usage[j];
                                if ((usage->hid & HID_USAGE_PAGE) == page &&
                                    (usage->hid & 0xFFFF) == hid_code) {
                                        *pfield = field;
                                        return j;
                                }
                        }
                }
        }
        return -1;
}

static void usbhid_set_leds(struct hid_device *hid)
{
        struct hid_field *field;
        int offset;

        if ((offset = hid_find_field_early(hid, HID_UP_LED, 0x01, &field)) != -1) {
                hid_set_field(field, offset, 0);
                usbhid_submit_report(hid, field->report, USB_DIR_OUT);
        }
}

/*
 * Traverse the supplied list of reports and find the longest
 */
static void hid_find_max_report(struct hid_device *hid, unsigned int type,
                unsigned int *max)
{
        struct hid_report *report;
        unsigned int size;

        list_for_each_entry(report, &hid->report_enum[type].report_list, list) {
                size = ((report->size - 1) >> 3) + 1 + hid->report_enum[type].numbered;
                if (*max < size)
                        *max = size;
        }
}

static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;

        usbhid->inbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL,
                        &usbhid->inbuf_dma);
        usbhid->outbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL,
                        &usbhid->outbuf_dma);
        usbhid->cr = kmalloc(sizeof(*usbhid->cr), GFP_KERNEL);
        usbhid->ctrlbuf = usb_alloc_coherent(dev, usbhid->bufsize, GFP_KERNEL,
                        &usbhid->ctrlbuf_dma);
        if (!usbhid->inbuf || !usbhid->outbuf || !usbhid->cr ||
                        !usbhid->ctrlbuf)
                return -1;

        return 0;
}

static int usbhid_get_raw_report(struct hid_device *hid,
                unsigned char report_number, __u8 *buf, size_t count,
                unsigned char report_type)
{
        struct usbhid_device *usbhid = hid->driver_data;
        struct usb_device *dev = hid_to_usb_dev(hid);
        struct usb_interface *intf = usbhid->intf;
        struct usb_host_interface *interface = intf->cur_altsetting;
        int skipped_report_id = 0;
        int ret;

        /* Byte 0 is the report number. Report data starts at byte 1.*/
        buf[0] = report_number;
        if (report_number == 0x0) {
                /* Offset the return buffer by 1, so that the report ID
                   will remain in byte 0. */
                buf++;
                count--;
                skipped_report_id = 1;
        }
        ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                HID_REQ_GET_REPORT,
                USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                ((report_type + 1) << 8) | report_number,
                interface->desc.bInterfaceNumber, buf, count,
                USB_CTRL_SET_TIMEOUT);

        /* count also the report id */
        if (ret > 0 && skipped_report_id)
                ret++;

        return ret;
}

static int usbhid_set_raw_report(struct hid_device *hid, unsigned int reportnum,
                                 __u8 *buf, size_t count, unsigned char rtype)
{
        struct usbhid_device *usbhid = hid->driver_data;
        struct usb_device *dev = hid_to_usb_dev(hid);
        struct usb_interface *intf = usbhid->intf;
        struct usb_host_interface *interface = intf->cur_altsetting;
        int ret, skipped_report_id = 0;

        /* Byte 0 is the report number. Report data starts at byte 1.*/
        if ((rtype == HID_OUTPUT_REPORT) &&
            (hid->quirks & HID_QUIRK_SKIP_OUTPUT_REPORT_ID))
                buf[0] = 0;
        else
                buf[0] = reportnum;

        if (buf[0] == 0x0) {
                /* Don't send the Report ID */
                buf++;
                count--;
                skipped_report_id = 1;
        }

        ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                        HID_REQ_SET_REPORT,
                        USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        ((rtype + 1) << 8) | reportnum,
                        interface->desc.bInterfaceNumber, buf, count,
                        USB_CTRL_SET_TIMEOUT);
        /* count also the report id, if this was a numbered report. */
        if (ret > 0 && skipped_report_id)
                ret++;

        return ret;
}

static int usbhid_output_report(struct hid_device *hid, __u8 *buf, size_t count)
{
        struct usbhid_device *usbhid = hid->driver_data;
        struct usb_device *dev = hid_to_usb_dev(hid);
        int actual_length, skipped_report_id = 0, ret;

        if (!usbhid->urbout)
                return -ENOSYS;

        if (buf[0] == 0x0) {
                /* Don't send the Report ID */
                buf++;
                count--;
                skipped_report_id = 1;
        }

        ret = usb_interrupt_msg(dev, usbhid->urbout->pipe,
                                buf, count, &actual_length,
                                USB_CTRL_SET_TIMEOUT);
        /* return the number of bytes transferred */
        if (ret == 0) {
                ret = actual_length;
                /* count also the report id */
                if (skipped_report_id)
                        ret++;
        }

        return ret;
}

static void hid_free_buffers(struct usb_device *dev, struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;

        usb_free_coherent(dev, usbhid->bufsize, usbhid->inbuf, usbhid->inbuf_dma);
        usb_free_coherent(dev, usbhid->bufsize, usbhid->outbuf, usbhid->outbuf_dma);
        kfree(usbhid->cr);
        usb_free_coherent(dev, usbhid->bufsize, usbhid->ctrlbuf, usbhid->ctrlbuf_dma);
}

static int usbhid_parse(struct hid_device *hid)
{
        struct usb_interface *intf = to_usb_interface(hid->dev.parent);
        struct usb_host_interface *interface = intf->cur_altsetting;
        struct usb_device *dev = interface_to_usbdev (intf);
        struct hid_descriptor *hdesc;
        u32 quirks = 0;
        unsigned int rsize = 0;
        char *rdesc;
        int ret, n;
        int num_descriptors;
        size_t offset = offsetof(struct hid_descriptor, desc);

        quirks = hid_lookup_quirk(hid);

        if (quirks & HID_QUIRK_IGNORE)
                return -ENODEV;

        /* Many keyboards and mice don't like to be polled for reports,
         * so we will always set the HID_QUIRK_NOGET flag for them. */
        if (interface->desc.bInterfaceSubClass == USB_INTERFACE_SUBCLASS_BOOT) {
                if (interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_KEYBOARD ||
                        interface->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE)
                                quirks |= HID_QUIRK_NOGET;
        }

        if (usb_get_extra_descriptor(interface, HID_DT_HID, &hdesc) &&
            (!interface->desc.bNumEndpoints ||
             usb_get_extra_descriptor(&interface->endpoint[0], HID_DT_HID, &hdesc))) {
                dbg_hid("class descriptor not present\n");
                return -ENODEV;
        }

        if (hdesc->bLength < sizeof(struct hid_descriptor)) {
                dbg_hid("hid descriptor is too short\n");
                return -EINVAL;
        }

        hid->version = le16_to_cpu(hdesc->bcdHID);
        hid->country = hdesc->bCountryCode;

        num_descriptors = min_t(int, hdesc->bNumDescriptors,
               (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor));

        for (n = 0; n < num_descriptors; n++)
                if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT)
                        rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength);

        if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) {
                dbg_hid("weird size of report descriptor (%u)\n", rsize);
                return -EINVAL;
        }

        rdesc = kmalloc(rsize, GFP_KERNEL);
        if (!rdesc)
                return -ENOMEM;

        hid_set_idle(dev, interface->desc.bInterfaceNumber, 0, 0);

        ret = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber,
                        HID_DT_REPORT, rdesc, rsize);
        if (ret < 0) {
                dbg_hid("reading report descriptor failed\n");
                kfree(rdesc);
                goto err;
        }

        ret = hid_parse_report(hid, rdesc, rsize);
        kfree(rdesc);
        if (ret) {
                dbg_hid("parsing report descriptor failed\n");
                goto err;
        }

        hid->quirks |= quirks;

        return 0;
err:
        return ret;
}

static int usbhid_start(struct hid_device *hid)
{
        struct usb_interface *intf = to_usb_interface(hid->dev.parent);
        struct usb_host_interface *interface = intf->cur_altsetting;
        struct usb_device *dev = interface_to_usbdev(intf);
        struct usbhid_device *usbhid = hid->driver_data;
        unsigned int n, insize = 0;
        int ret;

        mutex_lock(&usbhid->mutex);

        clear_bit(HID_DISCONNECTED, &usbhid->iofl);

        usbhid->bufsize = HID_MIN_BUFFER_SIZE;
        hid_find_max_report(hid, HID_INPUT_REPORT, &usbhid->bufsize);
        hid_find_max_report(hid, HID_OUTPUT_REPORT, &usbhid->bufsize);
        hid_find_max_report(hid, HID_FEATURE_REPORT, &usbhid->bufsize);

        if (usbhid->bufsize > HID_MAX_BUFFER_SIZE)
                usbhid->bufsize = HID_MAX_BUFFER_SIZE;

        hid_find_max_report(hid, HID_INPUT_REPORT, &insize);

        if (insize > HID_MAX_BUFFER_SIZE)
                insize = HID_MAX_BUFFER_SIZE;

        if (hid_alloc_buffers(dev, hid)) {
                ret = -ENOMEM;
                goto fail;
        }

        for (n = 0; n < interface->desc.bNumEndpoints; n++) {
                struct usb_endpoint_descriptor *endpoint;
                int pipe;
                int interval;

                endpoint = &interface->endpoint[n].desc;
                if (!usb_endpoint_xfer_int(endpoint))
                        continue;

                interval = endpoint->bInterval;

                /* Some vendors give fullspeed interval on highspeed devides */
                if (hid->quirks & HID_QUIRK_FULLSPEED_INTERVAL &&
                    dev->speed == USB_SPEED_HIGH) {
                        interval = fls(endpoint->bInterval*8);
                        pr_info("%s: Fixing fullspeed to highspeed interval: %d -> %d\n",
                                hid->name, endpoint->bInterval, interval);
                }

                /* Change the polling interval of mice, joysticks
                 * and keyboards.
                 */
                switch (hid->collection->usage) {
                case HID_GD_MOUSE:
                        if (hid_mousepoll_interval > 0)
                                interval = hid_mousepoll_interval;
                        break;
                case HID_GD_JOYSTICK:
                        if (hid_jspoll_interval > 0)
                                interval = hid_jspoll_interval;
                        break;
                case HID_GD_KEYBOARD:
                        if (hid_kbpoll_interval > 0)
                                interval = hid_kbpoll_interval;
                        break;
                }

                ret = -ENOMEM;
                if (usb_endpoint_dir_in(endpoint)) {
                        if (usbhid->urbin)
                                continue;
                        if (!(usbhid->urbin = usb_alloc_urb(0, GFP_KERNEL)))
                                goto fail;
                        pipe = usb_rcvintpipe(dev, endpoint->bEndpointAddress);
                        usb_fill_int_urb(usbhid->urbin, dev, pipe, usbhid->inbuf, insize,
                                         hid_irq_in, hid, interval);
                        usbhid->urbin->transfer_dma = usbhid->inbuf_dma;
                        usbhid->urbin->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                } else {
                        if (usbhid->urbout)
                                continue;
                        if (!(usbhid->urbout = usb_alloc_urb(0, GFP_KERNEL)))
                                goto fail;
                        pipe = usb_sndintpipe(dev, endpoint->bEndpointAddress);
                        usb_fill_int_urb(usbhid->urbout, dev, pipe, usbhid->outbuf, 0,
                                         hid_irq_out, hid, interval);
                        usbhid->urbout->transfer_dma = usbhid->outbuf_dma;
                        usbhid->urbout->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                }
        }

        usbhid->urbctrl = usb_alloc_urb(0, GFP_KERNEL);
        if (!usbhid->urbctrl) {
                ret = -ENOMEM;
                goto fail;
        }

        usb_fill_control_urb(usbhid->urbctrl, dev, 0, (void *) usbhid->cr,
                             usbhid->ctrlbuf, 1, hid_ctrl, hid);
        usbhid->urbctrl->transfer_dma = usbhid->ctrlbuf_dma;
        usbhid->urbctrl->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        set_bit(HID_STARTED, &usbhid->iofl);

        if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
                ret = usb_autopm_get_interface(usbhid->intf);
                if (ret)
                        goto fail;
                set_bit(HID_IN_POLLING, &usbhid->iofl);
                usbhid->intf->needs_remote_wakeup = 1;
                ret = hid_start_in(hid);
                if (ret) {
                        dev_err(&hid->dev,
                                "failed to start in urb: %d\n", ret);
                }
                usb_autopm_put_interface(usbhid->intf);
        }

        /* Some keyboards don't work until their LEDs have been set.
         * Since BIOSes do set the LEDs, it must be safe for any device
         * that supports the keyboard boot protocol.
         * In addition, enable remote wakeup by default for all keyboard
         * devices supporting the boot protocol.
         */
        if (interface->desc.bInterfaceSubClass == USB_INTERFACE_SUBCLASS_BOOT &&
                        interface->desc.bInterfaceProtocol ==
                                USB_INTERFACE_PROTOCOL_KEYBOARD) {
                usbhid_set_leds(hid);
                device_set_wakeup_enable(&dev->dev, 1);
        }

        mutex_unlock(&usbhid->mutex);
        return 0;

fail:
        usb_free_urb(usbhid->urbin);
        usb_free_urb(usbhid->urbout);
        usb_free_urb(usbhid->urbctrl);
        usbhid->urbin = NULL;
        usbhid->urbout = NULL;
        usbhid->urbctrl = NULL;
        hid_free_buffers(dev, hid);
        mutex_unlock(&usbhid->mutex);
        return ret;
}

static void usbhid_stop(struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;

        if (WARN_ON(!usbhid))
                return;

        if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
                clear_bit(HID_IN_POLLING, &usbhid->iofl);
                usbhid->intf->needs_remote_wakeup = 0;
        }

        mutex_lock(&usbhid->mutex);

        clear_bit(HID_STARTED, &usbhid->iofl);

        spin_lock_irq(&usbhid->lock);        /* Sync with error and led handlers */
        set_bit(HID_DISCONNECTED, &usbhid->iofl);
        while (usbhid->ctrltail != usbhid->ctrlhead) {
                if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_OUT) {
                        kfree(usbhid->ctrl[usbhid->ctrltail].raw_report);
                        usbhid->ctrl[usbhid->ctrltail].raw_report = NULL;
                }

                usbhid->ctrltail = (usbhid->ctrltail + 1) &
                        (HID_CONTROL_FIFO_SIZE - 1);
        }
        spin_unlock_irq(&usbhid->lock);

        usb_kill_urb(usbhid->urbin);
        usb_kill_urb(usbhid->urbout);
        usb_kill_urb(usbhid->urbctrl);

        hid_cancel_delayed_stuff(usbhid);

        hid->claimed = 0;

        usb_free_urb(usbhid->urbin);
        usb_free_urb(usbhid->urbctrl);
        usb_free_urb(usbhid->urbout);
        usbhid->urbin = NULL; /* don't mess up next start */
        usbhid->urbctrl = NULL;
        usbhid->urbout = NULL;

        hid_free_buffers(hid_to_usb_dev(hid), hid);

        mutex_unlock(&usbhid->mutex);
}

static int usbhid_power(struct hid_device *hid, int lvl)
{
        struct usbhid_device *usbhid = hid->driver_data;
        int r = 0;

        switch (lvl) {
        case PM_HINT_FULLON:
                r = usb_autopm_get_interface(usbhid->intf);
                break;

        case PM_HINT_NORMAL:
                usb_autopm_put_interface(usbhid->intf);
                break;
        }

        return r;
}

static void usbhid_request(struct hid_device *hid, struct hid_report *rep, int reqtype)
{
        switch (reqtype) {
        case HID_REQ_GET_REPORT:
                usbhid_submit_report(hid, rep, USB_DIR_IN);
                break;
        case HID_REQ_SET_REPORT:
                usbhid_submit_report(hid, rep, USB_DIR_OUT);
                break;
        }
}

static int usbhid_raw_request(struct hid_device *hid, unsigned char reportnum,
                              __u8 *buf, size_t len, unsigned char rtype,
                              int reqtype)
{
        switch (reqtype) {
        case HID_REQ_GET_REPORT:
                return usbhid_get_raw_report(hid, reportnum, buf, len, rtype);
        case HID_REQ_SET_REPORT:
                return usbhid_set_raw_report(hid, reportnum, buf, len, rtype);
        default:
                return -EIO;
        }
}

static int usbhid_idle(struct hid_device *hid, int report, int idle,
                int reqtype)
{
        struct usb_device *dev = hid_to_usb_dev(hid);
        struct usb_interface *intf = to_usb_interface(hid->dev.parent);
        struct usb_host_interface *interface = intf->cur_altsetting;
        int ifnum = interface->desc.bInterfaceNumber;

        if (reqtype != HID_REQ_SET_IDLE)
                return -EINVAL;

        return hid_set_idle(dev, ifnum, report, idle);
}

static bool usbhid_may_wakeup(struct hid_device *hid)
{
        struct usb_device *dev = hid_to_usb_dev(hid);

        return device_may_wakeup(&dev->dev);
}

static const struct hid_ll_driver usb_hid_driver = {
        .parse = usbhid_parse,
        .start = usbhid_start,
        .stop = usbhid_stop,
        .open = usbhid_open,
        .close = usbhid_close,
        .power = usbhid_power,
        .request = usbhid_request,
        .wait = usbhid_wait_io,
        .raw_request = usbhid_raw_request,
        .output_report = usbhid_output_report,
        .idle = usbhid_idle,
        .may_wakeup = usbhid_may_wakeup,
};

bool hid_is_usb(const struct hid_device *hdev)
{
        return hdev->ll_driver == &usb_hid_driver;
}
EXPORT_SYMBOL_GPL(hid_is_usb);

static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_host_interface *interface = intf->cur_altsetting;
        struct usb_device *dev = interface_to_usbdev(intf);
        struct usbhid_device *usbhid;
        struct hid_device *hid;
        unsigned int n, has_in = 0;
        size_t len;
        int ret;

        dbg_hid("HID probe called for ifnum %d\n",
                        intf->altsetting->desc.bInterfaceNumber);

        for (n = 0; n < interface->desc.bNumEndpoints; n++)
                if (usb_endpoint_is_int_in(&interface->endpoint[n].desc))
                        has_in++;
        if (!has_in) {
                hid_err(intf, "couldn't find an input interrupt endpoint\n");
                return -ENODEV;
        }

        hid = hid_allocate_device();
        if (IS_ERR(hid))
                return PTR_ERR(hid);

        usb_set_intfdata(intf, hid);
        hid->ll_driver = &usb_hid_driver;
        hid->ff_init = hid_pidff_init;
#ifdef CONFIG_USB_HIDDEV
        hid->hiddev_connect = hiddev_connect;
        hid->hiddev_disconnect = hiddev_disconnect;
        hid->hiddev_hid_event = hiddev_hid_event;
        hid->hiddev_report_event = hiddev_report_event;
#endif
        hid->dev.parent = &intf->dev;
        hid->bus = BUS_USB;
        hid->vendor = le16_to_cpu(dev->descriptor.idVendor);
        hid->product = le16_to_cpu(dev->descriptor.idProduct);
        hid->version = le16_to_cpu(dev->descriptor.bcdDevice);
        hid->name[0] = 0;
        if (intf->cur_altsetting->desc.bInterfaceProtocol ==
                        USB_INTERFACE_PROTOCOL_MOUSE)
                hid->type = HID_TYPE_USBMOUSE;
        else if (intf->cur_altsetting->desc.bInterfaceProtocol == 0)
                hid->type = HID_TYPE_USBNONE;

        if (dev->manufacturer)
                strscpy(hid->name, dev->manufacturer, sizeof(hid->name));

        if (dev->product) {
                if (dev->manufacturer)
                        strlcat(hid->name, " ", sizeof(hid->name));
                strlcat(hid->name, dev->product, sizeof(hid->name));
        }

        if (!strlen(hid->name))
                snprintf(hid->name, sizeof(hid->name), "HID %04x:%04x",
                         le16_to_cpu(dev->descriptor.idVendor),
                         le16_to_cpu(dev->descriptor.idProduct));

        usb_make_path(dev, hid->phys, sizeof(hid->phys));
        strlcat(hid->phys, "/input", sizeof(hid->phys));
        len = strlen(hid->phys);
        if (len < sizeof(hid->phys) - 1)
                snprintf(hid->phys + len, sizeof(hid->phys) - len,
                         "%d", intf->altsetting[0].desc.bInterfaceNumber);

        if (usb_string(dev, dev->descriptor.iSerialNumber, hid->uniq, 64) <= 0)
                hid->uniq[0] = 0;

        usbhid = kzalloc(sizeof(*usbhid), GFP_KERNEL);
        if (usbhid == NULL) {
                ret = -ENOMEM;
                goto err;
        }

        hid->driver_data = usbhid;
        usbhid->hid = hid;
        usbhid->intf = intf;
        usbhid->ifnum = interface->desc.bInterfaceNumber;

        init_waitqueue_head(&usbhid->wait);
        INIT_WORK(&usbhid->reset_work, hid_reset);
        timer_setup(&usbhid->io_retry, hid_retry_timeout, 0);
        spin_lock_init(&usbhid->lock);
        mutex_init(&usbhid->mutex);

        ret = hid_add_device(hid);
        if (ret) {
                if (ret != -ENODEV)
                        hid_err(intf, "can't add hid device: %d\n", ret);
                goto err_free;
        }

        return 0;
err_free:
        kfree(usbhid);
err:
        hid_destroy_device(hid);
        return ret;
}

static void usbhid_disconnect(struct usb_interface *intf)
{
        struct hid_device *hid = usb_get_intfdata(intf);
        struct usbhid_device *usbhid;

        if (WARN_ON(!hid))
                return;

        usbhid = hid->driver_data;
        spin_lock_irq(&usbhid->lock);        /* Sync with error and led handlers */
        set_bit(HID_DISCONNECTED, &usbhid->iofl);
        spin_unlock_irq(&usbhid->lock);
        hid_destroy_device(hid);
        kfree(usbhid);
}

static void hid_cancel_delayed_stuff(struct usbhid_device *usbhid)
{
        del_timer_sync(&usbhid->io_retry);
        cancel_work_sync(&usbhid->reset_work);
}

static void hid_cease_io(struct usbhid_device *usbhid)
{
        del_timer_sync(&usbhid->io_retry);
        usb_kill_urb(usbhid->urbin);
        usb_kill_urb(usbhid->urbctrl);
        usb_kill_urb(usbhid->urbout);
}

static void hid_restart_io(struct hid_device *hid)
{
        struct usbhid_device *usbhid = hid->driver_data;
        int clear_halt = test_bit(HID_CLEAR_HALT, &usbhid->iofl);
        int reset_pending = test_bit(HID_RESET_PENDING, &usbhid->iofl);

        spin_lock_irq(&usbhid->lock);
        clear_bit(HID_SUSPENDED, &usbhid->iofl);
        usbhid_mark_busy(usbhid);

        if (clear_halt || reset_pending)
                schedule_work(&usbhid->reset_work);
        usbhid->retry_delay = 0;
        spin_unlock_irq(&usbhid->lock);

        if (reset_pending || !test_bit(HID_STARTED, &usbhid->iofl))
                return;

        if (!clear_halt) {
                if (hid_start_in(hid) < 0)
                        hid_io_error(hid);
        }

        spin_lock_irq(&usbhid->lock);
        if (usbhid->urbout && !test_bit(HID_OUT_RUNNING, &usbhid->iofl))
                usbhid_restart_out_queue(usbhid);
        if (!test_bit(HID_CTRL_RUNNING, &usbhid->iofl))
                usbhid_restart_ctrl_queue(usbhid);
        spin_unlock_irq(&usbhid->lock);
}

/* Treat USB reset pretty much the same as suspend/resume */
static int hid_pre_reset(struct usb_interface *intf)
{
        struct hid_device *hid = usb_get_intfdata(intf);
        struct usbhid_device *usbhid = hid->driver_data;

        spin_lock_irq(&usbhid->lock);
        set_bit(HID_RESET_PENDING, &usbhid->iofl);
        spin_unlock_irq(&usbhid->lock);
        hid_cease_io(usbhid);

        return 0;
}

/* Same routine used for post_reset and reset_resume */
static int hid_post_reset(struct usb_interface *intf)
{
        struct usb_device *dev = interface_to_usbdev (intf);
        struct hid_device *hid = usb_get_intfdata(intf);
        struct usbhid_device *usbhid = hid->driver_data;
        struct usb_host_interface *interface = intf->cur_altsetting;
        int status;
        char *rdesc;

        /* Fetch and examine the HID report descriptor. If this
         * has changed, then rebind. Since usbcore's check of the
         * configuration descriptors passed, we already know that
         * the size of the HID report descriptor has not changed.
         */
        rdesc = kmalloc(hid->dev_rsize, GFP_KERNEL);
        if (!rdesc)
                return -ENOMEM;

        status = hid_get_class_descriptor(dev,
                                interface->desc.bInterfaceNumber,
                                HID_DT_REPORT, rdesc, hid->dev_rsize);
        if (status < 0) {
                dbg_hid("reading report descriptor failed (post_reset)\n");
                kfree(rdesc);
                return status;
        }
        status = memcmp(rdesc, hid->dev_rdesc, hid->dev_rsize);
        kfree(rdesc);
        if (status != 0) {
                dbg_hid("report descriptor changed\n");
                return -EPERM;
        }

        /* No need to do another reset or clear a halted endpoint */
        spin_lock_irq(&usbhid->lock);
        clear_bit(HID_RESET_PENDING, &usbhid->iofl);
        clear_bit(HID_CLEAR_HALT, &usbhid->iofl);
        spin_unlock_irq(&usbhid->lock);
        hid_set_idle(dev, intf->cur_altsetting->desc.bInterfaceNumber, 0, 0);

        hid_restart_io(hid);

        return 0;
}

static int hid_resume_common(struct hid_device *hid, bool driver_suspended)
{
        int status = 0;

        hid_restart_io(hid);
        if (driver_suspended)
                status = hid_driver_resume(hid);
        return status;
}

static int hid_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct hid_device *hid = usb_get_intfdata(intf);
        struct usbhid_device *usbhid = hid->driver_data;
        int status = 0;
        bool driver_suspended = false;
        unsigned int ledcount;

        if (PMSG_IS_AUTO(message)) {
                ledcount = hidinput_count_leds(hid);
                spin_lock_irq(&usbhid->lock);        /* Sync with error handler */
                if (!test_bit(HID_RESET_PENDING, &usbhid->iofl)
                    && !test_bit(HID_CLEAR_HALT, &usbhid->iofl)
                    && !test_bit(HID_OUT_RUNNING, &usbhid->iofl)
                    && !test_bit(HID_CTRL_RUNNING, &usbhid->iofl)
                    && !test_bit(HID_KEYS_PRESSED, &usbhid->iofl)
                    && (!ledcount || ignoreled))
                {
                        set_bit(HID_SUSPENDED, &usbhid->iofl);
                        spin_unlock_irq(&usbhid->lock);
                        status = hid_driver_suspend(hid, message);
                        if (status < 0)
                                goto failed;
                        driver_suspended = true;
                } else {
                        usbhid_mark_busy(usbhid);
                        spin_unlock_irq(&usbhid->lock);
                        return -EBUSY;
                }

        } else {
                /* TODO: resume() might need to handle suspend failure */
                status = hid_driver_suspend(hid, message);
                driver_suspended = true;
                spin_lock_irq(&usbhid->lock);
                set_bit(HID_SUSPENDED, &usbhid->iofl);
                spin_unlock_irq(&usbhid->lock);
                if (usbhid_wait_io(hid) < 0)
                        status = -EIO;
        }

        hid_cancel_delayed_stuff(usbhid);
        hid_cease_io(usbhid);

        if (PMSG_IS_AUTO(message) && test_bit(HID_KEYS_PRESSED, &usbhid->iofl)) {
                /* lost race against keypresses */
                status = -EBUSY;
                goto failed;
        }
        dev_dbg(&intf->dev, "suspend\n");
        return status;

 failed:
        hid_resume_common(hid, driver_suspended);
        return status;
}

static int hid_resume(struct usb_interface *intf)
{
        struct hid_device *hid = usb_get_intfdata (intf);
        int status;

        status = hid_resume_common(hid, true);
        dev_dbg(&intf->dev, "resume status %d\n", status);
        return 0;
}

static int hid_reset_resume(struct usb_interface *intf)
{
        struct hid_device *hid = usb_get_intfdata(intf);
        int status;

        status = hid_post_reset(intf);
        if (status >= 0) {
                int ret = hid_driver_reset_resume(hid);
                if (ret < 0)
                        status = ret;
        }
        return status;
}

static const struct usb_device_id hid_usb_ids[] = {
        { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS,
                .bInterfaceClass = USB_INTERFACE_CLASS_HID },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE (usb, hid_usb_ids);

static struct usb_driver hid_driver = {
        .name =                "usbhid",
        .probe =        usbhid_probe,
        .disconnect =        usbhid_disconnect,
        .suspend =        pm_ptr(hid_suspend),
        .resume =        pm_ptr(hid_resume),
        .reset_resume =        pm_ptr(hid_reset_resume),
        .pre_reset =        hid_pre_reset,
        .post_reset =        hid_post_reset,
        .id_table =        hid_usb_ids,
        .supports_autosuspend = 1,
};

struct usb_interface *usbhid_find_interface(int minor)
{
        return usb_find_interface(&hid_driver, minor);
}

static int __init hid_init(void)
{
        int retval;

        retval = hid_quirks_init(quirks_param, BUS_USB, MAX_USBHID_BOOT_QUIRKS);
        if (retval)
                goto usbhid_quirks_init_fail;
        retval = usb_register(&hid_driver);
        if (retval)
                goto usb_register_fail;
        pr_info(KBUILD_MODNAME ": " DRIVER_DESC "\n");

        return 0;
usb_register_fail:
        hid_quirks_exit(BUS_USB);
usbhid_quirks_init_fail:
        return retval;
}

static void __exit hid_exit(void)
{
        usb_deregister(&hid_driver);
        hid_quirks_exit(BUS_USB);
}

module_init(hid_init);
module_exit(hid_exit);

MODULE_AUTHOR("Andreas Gal");
MODULE_AUTHOR("Vojtech Pavlik");
MODULE_AUTHOR("Jiri Kosina");
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");













  205 




  205 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2020 ARM Ltd.
 */
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H

#ifndef __ASSEMBLY__

/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
{
        asm volatile("rep; nop" ::: "memory");
}

static __always_inline void cpu_relax(void)
{
        rep_nop();
}

struct getcpu_cache;

notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);

#endif /* __ASSEMBLY__ */

#endif /* __ASM_VDSO_PROCESSOR_H */





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for N-Trig touchscreens
 *
 *  Copyright (c) 2008-2010 Rafi Rubin
 *  Copyright (c) 2009-2010 Stephane Chatty
 */

/*
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/usb.h>
#include "usbhid/usbhid.h"
#include <linux/module.h>
#include <linux/slab.h>

#include "hid-ids.h"

#define NTRIG_DUPLICATE_USAGES        0x001

static unsigned int min_width;
module_param(min_width, uint, 0644);
MODULE_PARM_DESC(min_width, "Minimum touch contact width to accept.");

static unsigned int min_height;
module_param(min_height, uint, 0644);
MODULE_PARM_DESC(min_height, "Minimum touch contact height to accept.");

static unsigned int activate_slack = 1;
module_param(activate_slack, uint, 0644);
MODULE_PARM_DESC(activate_slack, "Number of touch frames to ignore at "
                 "the start of touch input.");

static unsigned int deactivate_slack = 4;
module_param(deactivate_slack, uint, 0644);
MODULE_PARM_DESC(deactivate_slack, "Number of empty frames to ignore before "
                 "deactivating touch.");

static unsigned int activation_width = 64;
module_param(activation_width, uint, 0644);
MODULE_PARM_DESC(activation_width, "Width threshold to immediately start "
                 "processing touch events.");

static unsigned int activation_height = 32;
module_param(activation_height, uint, 0644);
MODULE_PARM_DESC(activation_height, "Height threshold to immediately start "
                 "processing touch events.");

struct ntrig_data {
        /* Incoming raw values for a single contact */
        __u16 x, y, w, h;
        __u16 id;

        bool tipswitch;
        bool confidence;
        bool first_contact_touch;

        bool reading_mt;

        __u8 mt_footer[4];
        __u8 mt_foot_count;

        /* The current activation state. */
        __s8 act_state;

        /* Empty frames to ignore before recognizing the end of activity */
        __s8 deactivate_slack;

        /* Frames to ignore before acknowledging the start of activity */
        __s8 activate_slack;

        /* Minimum size contact to accept */
        __u16 min_width;
        __u16 min_height;

        /* Threshold to override activation slack */
        __u16 activation_width;
        __u16 activation_height;

        __u16 sensor_logical_width;
        __u16 sensor_logical_height;
        __u16 sensor_physical_width;
        __u16 sensor_physical_height;
};


/*
 * This function converts the 4 byte raw firmware code into
 * a string containing 5 comma separated numbers.
 */
static int ntrig_version_string(unsigned char *raw, char *buf)
{
        __u8 a =  (raw[1] & 0x0e) >> 1;
        __u8 b =  (raw[0] & 0x3c) >> 2;
        __u8 c = ((raw[0] & 0x03) << 3) | ((raw[3] & 0xe0) >> 5);
        __u8 d = ((raw[3] & 0x07) << 3) | ((raw[2] & 0xe0) >> 5);
        __u8 e =   raw[2] & 0x07;

        /*
         * As yet unmapped bits:
         * 0b11000000 0b11110001 0b00011000 0b00011000
         */

        return sprintf(buf, "%u.%u.%u.%u.%u", a, b, c, d, e);
}

static inline int ntrig_get_mode(struct hid_device *hdev)
{
        struct hid_report *report = hdev->report_enum[HID_FEATURE_REPORT].
                                    report_id_hash[0x0d];

        if (!report || report->maxfield < 1 ||
            report->field[0]->report_count < 1)
                return -EINVAL;

        hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
        hid_hw_wait(hdev);
        return (int)report->field[0]->value[0];
}

static inline void ntrig_set_mode(struct hid_device *hdev, const int mode)
{
        struct hid_report *report;
        __u8 mode_commands[4] = { 0xe, 0xf, 0x1b, 0x10 };

        if (mode < 0 || mode > 3)
                return;

        report = hdev->report_enum[HID_FEATURE_REPORT].
                 report_id_hash[mode_commands[mode]];

        if (!report)
                return;

        hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
}

static void ntrig_report_version(struct hid_device *hdev)
{
        int ret;
        char buf[20];
        struct usb_device *usb_dev = hid_to_usb_dev(hdev);
        unsigned char *data = kmalloc(8, GFP_KERNEL);

        if (!data)
                goto err_free;

        ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0),
                              USB_REQ_CLEAR_FEATURE,
                              USB_TYPE_CLASS | USB_RECIP_INTERFACE |
                              USB_DIR_IN,
                              0x30c, 1, data, 8,
                              USB_CTRL_SET_TIMEOUT);

        if (ret == 8) {
                ret = ntrig_version_string(&data[2], buf);

                hid_info(hdev, "Firmware version: %s (%02x%02x %02x%02x)\n",
                         buf, data[2], data[3], data[4], data[5]);
        }

err_free:
        kfree(data);
}

static ssize_t show_phys_width(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->sensor_physical_width);
}

static DEVICE_ATTR(sensor_physical_width, S_IRUGO, show_phys_width, NULL);

static ssize_t show_phys_height(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->sensor_physical_height);
}

static DEVICE_ATTR(sensor_physical_height, S_IRUGO, show_phys_height, NULL);

static ssize_t show_log_width(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->sensor_logical_width);
}

static DEVICE_ATTR(sensor_logical_width, S_IRUGO, show_log_width, NULL);

static ssize_t show_log_height(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->sensor_logical_height);
}

static DEVICE_ATTR(sensor_logical_height, S_IRUGO, show_log_height, NULL);

static ssize_t show_min_width(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->min_width *
                                    nd->sensor_physical_width /
                                    nd->sensor_logical_width);
}

static ssize_t set_min_width(struct device *dev,
                             struct device_attribute *attr,
                             const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        if (val > nd->sensor_physical_width)
                return -EINVAL;

        nd->min_width = val * nd->sensor_logical_width /
                              nd->sensor_physical_width;

        return count;
}

static DEVICE_ATTR(min_width, S_IWUSR | S_IRUGO, show_min_width, set_min_width);

static ssize_t show_min_height(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->min_height *
                                    nd->sensor_physical_height /
                                    nd->sensor_logical_height);
}

static ssize_t set_min_height(struct device *dev,
                              struct device_attribute *attr,
                              const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        if (val > nd->sensor_physical_height)
                return -EINVAL;

        nd->min_height = val * nd->sensor_logical_height /
                               nd->sensor_physical_height;

        return count;
}

static DEVICE_ATTR(min_height, S_IWUSR | S_IRUGO, show_min_height,
                   set_min_height);

static ssize_t show_activate_slack(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->activate_slack);
}

static ssize_t set_activate_slack(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        if (val > 0x7f)
                return -EINVAL;

        nd->activate_slack = val;

        return count;
}

static DEVICE_ATTR(activate_slack, S_IWUSR | S_IRUGO, show_activate_slack,
                   set_activate_slack);

static ssize_t show_activation_width(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->activation_width *
                                    nd->sensor_physical_width /
                                    nd->sensor_logical_width);
}

static ssize_t set_activation_width(struct device *dev,
                                    struct device_attribute *attr,
                                    const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        if (val > nd->sensor_physical_width)
                return -EINVAL;

        nd->activation_width = val * nd->sensor_logical_width /
                                     nd->sensor_physical_width;

        return count;
}

static DEVICE_ATTR(activation_width, S_IWUSR | S_IRUGO, show_activation_width,
                   set_activation_width);

static ssize_t show_activation_height(struct device *dev,
                                      struct device_attribute *attr,
                                      char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", nd->activation_height *
                                    nd->sensor_physical_height /
                                    nd->sensor_logical_height);
}

static ssize_t set_activation_height(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        if (val > nd->sensor_physical_height)
                return -EINVAL;

        nd->activation_height = val * nd->sensor_logical_height /
                                      nd->sensor_physical_height;

        return count;
}

static DEVICE_ATTR(activation_height, S_IWUSR | S_IRUGO,
                   show_activation_height, set_activation_height);

static ssize_t show_deactivate_slack(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        return sprintf(buf, "%d\n", -nd->deactivate_slack);
}

static ssize_t set_deactivate_slack(struct device *dev,
                                    struct device_attribute *attr,
                                    const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        /*
         * No more than 8 terminal frames have been observed so far
         * and higher slack is highly likely to leave the single
         * touch emulation stuck down.
         */
        if (val > 7)
                return -EINVAL;

        nd->deactivate_slack = -val;

        return count;
}

static DEVICE_ATTR(deactivate_slack, S_IWUSR | S_IRUGO, show_deactivate_slack,
                   set_deactivate_slack);

static struct attribute *sysfs_attrs[] = {
        &dev_attr_sensor_physical_width.attr,
        &dev_attr_sensor_physical_height.attr,
        &dev_attr_sensor_logical_width.attr,
        &dev_attr_sensor_logical_height.attr,
        &dev_attr_min_height.attr,
        &dev_attr_min_width.attr,
        &dev_attr_activate_slack.attr,
        &dev_attr_activation_width.attr,
        &dev_attr_activation_height.attr,
        &dev_attr_deactivate_slack.attr,
        NULL
};

static const struct attribute_group ntrig_attribute_group = {
        .attrs = sysfs_attrs
};

/*
 * this driver is aimed at two firmware versions in circulation:
 *  - dual pen/finger single touch
 *  - finger multitouch, pen not working
 */

static int ntrig_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                               struct hid_field *field, struct hid_usage *usage,
                               unsigned long **bit, int *max)
{
        struct ntrig_data *nd = hid_get_drvdata(hdev);

        /* No special mappings needed for the pen and single touch */
        if (field->physical)
                return 0;

        switch (usage->hid & HID_USAGE_PAGE) {
        case HID_UP_GENDESK:
                switch (usage->hid) {
                case HID_GD_X:
                        hid_map_usage(hi, usage, bit, max,
                                        EV_ABS, ABS_MT_POSITION_X);
                        input_set_abs_params(hi->input, ABS_X,
                                        field->logical_minimum,
                                        field->logical_maximum, 0, 0);

                        if (!nd->sensor_logical_width) {
                                nd->sensor_logical_width =
                                        field->logical_maximum -
                                        field->logical_minimum;
                                nd->sensor_physical_width =
                                        field->physical_maximum -
                                        field->physical_minimum;
                                nd->activation_width = activation_width *
                                        nd->sensor_logical_width /
                                        nd->sensor_physical_width;
                                nd->min_width = min_width *
                                        nd->sensor_logical_width /
                                        nd->sensor_physical_width;
                        }
                        return 1;
                case HID_GD_Y:
                        hid_map_usage(hi, usage, bit, max,
                                        EV_ABS, ABS_MT_POSITION_Y);
                        input_set_abs_params(hi->input, ABS_Y,
                                        field->logical_minimum,
                                        field->logical_maximum, 0, 0);

                        if (!nd->sensor_logical_height) {
                                nd->sensor_logical_height =
                                        field->logical_maximum -
                                        field->logical_minimum;
                                nd->sensor_physical_height =
                                        field->physical_maximum -
                                        field->physical_minimum;
                                nd->activation_height = activation_height *
                                        nd->sensor_logical_height /
                                        nd->sensor_physical_height;
                                nd->min_height = min_height *
                                        nd->sensor_logical_height /
                                        nd->sensor_physical_height;
                        }
                        return 1;
                }
                return 0;

        case HID_UP_DIGITIZER:
                switch (usage->hid) {
                /* we do not want to map these for now */
                case HID_DG_CONTACTID: /* Not trustworthy, squelch for now */
                case HID_DG_INPUTMODE:
                case HID_DG_DEVICEINDEX:
                case HID_DG_CONTACTMAX:
                        return -1;

                /* width/height mapped on TouchMajor/TouchMinor/Orientation */
                case HID_DG_WIDTH:
                        hid_map_usage(hi, usage, bit, max,
                                      EV_ABS, ABS_MT_TOUCH_MAJOR);
                        return 1;
                case HID_DG_HEIGHT:
                        hid_map_usage(hi, usage, bit, max,
                                      EV_ABS, ABS_MT_TOUCH_MINOR);
                        input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
                                             0, 1, 0, 0);
                        return 1;
                }
                return 0;

        case 0xff000000:
                /* we do not want to map these: no input-oriented meaning */
                return -1;
        }

        return 0;
}

static int ntrig_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                              struct hid_field *field, struct hid_usage *usage,
                              unsigned long **bit, int *max)
{
        /* No special mappings needed for the pen and single touch */
        if (field->physical)
                return 0;

        if (usage->type == EV_KEY || usage->type == EV_REL
                        || usage->type == EV_ABS)
                clear_bit(usage->code, *bit);

        return 0;
}

/*
 * this function is called upon all reports
 * so that we can filter contact point information,
 * decide whether we are in multi or single touch mode
 * and call input_mt_sync after each point if necessary
 */
static int ntrig_event (struct hid_device *hid, struct hid_field *field,
                        struct hid_usage *usage, __s32 value)
{
        struct ntrig_data *nd = hid_get_drvdata(hid);
        struct input_dev *input;

        /* Skip processing if not a claimed input */
        if (!(hid->claimed & HID_CLAIMED_INPUT))
                goto not_claimed_input;

        /* This function is being called before the structures are fully
         * initialized */
        if(!(field->hidinput && field->hidinput->input))
                return -EINVAL;

        input = field->hidinput->input;

        /* No special handling needed for the pen */
        if (field->application == HID_DG_PEN)
                return 0;

        switch (usage->hid) {
        case 0xff000001:
                /* Tag indicating the start of a multitouch group */
                nd->reading_mt = true;
                nd->first_contact_touch = false;
                break;
        case HID_DG_TIPSWITCH:
                nd->tipswitch = value;
                /* Prevent emission of touch until validated */
                return 1;
        case HID_DG_CONFIDENCE:
                nd->confidence = value;
                break;
        case HID_GD_X:
                nd->x = value;
                /* Clear the contact footer */
                nd->mt_foot_count = 0;
                break;
        case HID_GD_Y:
                nd->y = value;
                break;
        case HID_DG_CONTACTID:
                nd->id = value;
                break;
        case HID_DG_WIDTH:
                nd->w = value;
                break;
        case HID_DG_HEIGHT:
                nd->h = value;
                /*
                 * when in single touch mode, this is the last
                 * report received in a finger event. We want
                 * to emit a normal (X, Y) position
                 */
                if (!nd->reading_mt) {
                        /*
                         * TipSwitch indicates the presence of a
                         * finger in single touch mode.
                         */
                        input_report_key(input, BTN_TOUCH,
                                         nd->tipswitch);
                        input_report_key(input, BTN_TOOL_DOUBLETAP,
                                         nd->tipswitch);
                        input_event(input, EV_ABS, ABS_X, nd->x);
                        input_event(input, EV_ABS, ABS_Y, nd->y);
                }
                break;
        case 0xff000002:
                /*
                 * we receive this when the device is in multitouch
                 * mode. The first of the three values tagged with
                 * this usage tells if the contact point is real
                 * or a placeholder
                 */

                /* Shouldn't get more than 4 footer packets, so skip */
                if (nd->mt_foot_count >= 4)
                        break;

                nd->mt_footer[nd->mt_foot_count++] = value;

                /* if the footer isn't complete break */
                if (nd->mt_foot_count != 4)
                        break;

                /* Pen activity signal. */
                if (nd->mt_footer[2]) {
                        /*
                         * When the pen deactivates touch, we see a
                         * bogus frame with ContactCount > 0.
                         * We can
                         * save a bit of work by ensuring act_state < 0
                         * even if deactivation slack is turned off.
                         */
                        nd->act_state = deactivate_slack - 1;
                        nd->confidence = false;
                        break;
                }

                /*
                 * The first footer value indicates the presence of a
                 * finger.
                 */
                if (nd->mt_footer[0]) {
                        /*
                         * We do not want to process contacts under
                         * the size threshold, but do not want to
                         * ignore them for activation state
                         */
                        if (nd->w < nd->min_width ||
                            nd->h < nd->min_height)
                                nd->confidence = false;
                } else
                        break;

                if (nd->act_state > 0) {
                        /*
                         * Contact meets the activation size threshold
                         */
                        if (nd->w >= nd->activation_width &&
                            nd->h >= nd->activation_height) {
                                if (nd->id)
                                        /*
                                         * first contact, activate now
                                         */
                                        nd->act_state = 0;
                                else {
                                        /*
                                         * avoid corrupting this frame
                                         * but ensure next frame will
                                         * be active
                                         */
                                        nd->act_state = 1;
                                        break;
                                }
                        } else
                                /*
                                 * Defer adjusting the activation state
                                 * until the end of the frame.
                                 */
                                break;
                }

                /* Discarding this contact */
                if (!nd->confidence)
                        break;

                /* emit a normal (X, Y) for the first point only */
                if (nd->id == 0) {
                        /*
                         * TipSwitch is superfluous in multitouch
                         * mode.  The footer events tell us
                         * if there is a finger on the screen or
                         * not.
                         */
                        nd->first_contact_touch = nd->confidence;
                        input_event(input, EV_ABS, ABS_X, nd->x);
                        input_event(input, EV_ABS, ABS_Y, nd->y);
                }

                /* Emit MT events */
                input_event(input, EV_ABS, ABS_MT_POSITION_X, nd->x);
                input_event(input, EV_ABS, ABS_MT_POSITION_Y, nd->y);

                /*
                 * Translate from height and width to size
                 * and orientation.
                 */
                if (nd->w > nd->h) {
                        input_event(input, EV_ABS,
                                        ABS_MT_ORIENTATION, 1);
                        input_event(input, EV_ABS,
                                        ABS_MT_TOUCH_MAJOR, nd->w);
                        input_event(input, EV_ABS,
                                        ABS_MT_TOUCH_MINOR, nd->h);
                } else {
                        input_event(input, EV_ABS,
                                        ABS_MT_ORIENTATION, 0);
                        input_event(input, EV_ABS,
                                        ABS_MT_TOUCH_MAJOR, nd->h);
                        input_event(input, EV_ABS,
                                        ABS_MT_TOUCH_MINOR, nd->w);
                }
                input_mt_sync(field->hidinput->input);
                break;

        case HID_DG_CONTACTCOUNT: /* End of a multitouch group */
                if (!nd->reading_mt) /* Just to be sure */
                        break;

                nd->reading_mt = false;


                /*
                 * Activation state machine logic:
                 *
                 * Fundamental states:
                 *        state >  0: Inactive
                 *        state <= 0: Active
                 *        state <  -deactivate_slack:
                 *                 Pen termination of touch
                 *
                 * Specific values of interest
                 *        state == activate_slack
                 *                 no valid input since the last reset
                 *
                 *        state == 0
                 *                 general operational state
                 *
                 *        state == -deactivate_slack
                 *                 read sufficient empty frames to accept
                 *                 the end of input and reset
                 */

                if (nd->act_state > 0) { /* Currently inactive */
                        if (value)
                                /*
                                 * Consider each live contact as
                                 * evidence of intentional activity.
                                 */
                                nd->act_state = (nd->act_state > value)
                                                ? nd->act_state - value
                                                : 0;
                        else
                                /*
                                 * Empty frame before we hit the
                                 * activity threshold, reset.
                                 */
                                nd->act_state = nd->activate_slack;

                        /*
                         * Entered this block inactive and no
                         * coordinates sent this frame, so hold off
                         * on button state.
                         */
                        break;
                } else { /* Currently active */
                        if (value && nd->act_state >=
                                     nd->deactivate_slack)
                                /*
                                 * Live point: clear accumulated
                                 * deactivation count.
                                 */
                                nd->act_state = 0;
                        else if (nd->act_state <= nd->deactivate_slack)
                                /*
                                 * We've consumed the deactivation
                                 * slack, time to deactivate and reset.
                                 */
                                nd->act_state =
                                        nd->activate_slack;
                        else { /* Move towards deactivation */
                                nd->act_state--;
                                break;
                        }
                }

                if (nd->first_contact_touch && nd->act_state <= 0) {
                        /*
                         * Check to see if we're ready to start
                         * emitting touch events.
                         *
                         * Note: activation slack will decrease over
                         * the course of the frame, and it will be
                         * inconsistent from the start to the end of
                         * the frame.  However if the frame starts
                         * with slack, first_contact_touch will still
                         * be 0 and we will not get to this point.
                         */
                        input_report_key(input, BTN_TOOL_DOUBLETAP, 1);
                        input_report_key(input, BTN_TOUCH, 1);
                } else {
                        input_report_key(input, BTN_TOOL_DOUBLETAP, 0);
                        input_report_key(input, BTN_TOUCH, 0);
                }
                break;

        default:
                /* fall-back to the generic hidinput handling */
                return 0;
        }

not_claimed_input:

        /* we have handled the hidinput part, now remains hiddev */
        if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_hid_event)
                hid->hiddev_hid_event(hid, field, usage, value);

        return 1;
}

static int ntrig_input_configured(struct hid_device *hid,
                struct hid_input *hidinput)

{
        struct input_dev *input = hidinput->input;

        if (hidinput->report->maxfield < 1)
                return 0;

        switch (hidinput->report->field[0]->application) {
        case HID_DG_PEN:
                input->name = "N-Trig Pen";
                break;
        case HID_DG_TOUCHSCREEN:
                /* These keys are redundant for fingers, clear them
                 * to prevent incorrect identification */
                __clear_bit(BTN_TOOL_PEN, input->keybit);
                __clear_bit(BTN_TOOL_FINGER, input->keybit);
                __clear_bit(BTN_0, input->keybit);
                __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
                /*
                 * The physical touchscreen (single touch)
                 * input has a value for physical, whereas
                 * the multitouch only has logical input
                 * fields.
                 */
                input->name = (hidinput->report->field[0]->physical) ?
                                                        "N-Trig Touchscreen" :
                                                        "N-Trig MultiTouch";
                break;
        }

        return 0;
}

static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int ret;
        struct ntrig_data *nd;
        struct hid_report *report;

        if (id->driver_data)
                hdev->quirks |= HID_QUIRK_MULTI_INPUT
                                | HID_QUIRK_NO_INIT_REPORTS;

        nd = kmalloc(sizeof(struct ntrig_data), GFP_KERNEL);
        if (!nd) {
                hid_err(hdev, "cannot allocate N-Trig data\n");
                return -ENOMEM;
        }

        nd->reading_mt = false;
        nd->min_width = 0;
        nd->min_height = 0;
        nd->activate_slack = activate_slack;
        nd->act_state = activate_slack;
        nd->deactivate_slack = -deactivate_slack;
        nd->sensor_logical_width = 1;
        nd->sensor_logical_height = 1;
        nd->sensor_physical_width = 1;
        nd->sensor_physical_height = 1;

        hid_set_drvdata(hdev, nd);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
                goto err_free;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto err_free;
        }

        /* This is needed for devices with more recent firmware versions */
        report = hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0x0a];
        if (report) {
                /* Let the device settle to ensure the wakeup message gets
                 * through */
                hid_hw_wait(hdev);
                hid_hw_request(hdev, report, HID_REQ_GET_REPORT);

                /*
                 * Sanity check: if the current mode is invalid reset it to
                 * something reasonable.
                 */
                if (ntrig_get_mode(hdev) >= 4)
                        ntrig_set_mode(hdev, 3);
        }

        ntrig_report_version(hdev);

        ret = sysfs_create_group(&hdev->dev.kobj,
                        &ntrig_attribute_group);
        if (ret)
                hid_err(hdev, "cannot create sysfs group\n");

        return 0;
err_free:
        kfree(nd);
        return ret;
}

static void ntrig_remove(struct hid_device *hdev)
{
        sysfs_remove_group(&hdev->dev.kobj,
                           &ntrig_attribute_group);
        hid_hw_stop(hdev);
        kfree(hid_get_drvdata(hdev));
}

static const struct hid_device_id ntrig_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_3),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_4),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_5),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_6),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_7),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_8),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_9),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_10),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_11),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_12),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_13),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_14),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_15),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18),
                .driver_data = NTRIG_DUPLICATE_USAGES },
        { }
};
MODULE_DEVICE_TABLE(hid, ntrig_devices);

static const struct hid_usage_id ntrig_grabbed_usages[] = {
        { HID_ANY_ID, HID_ANY_ID, HID_ANY_ID },
        { HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1 }
};

static struct hid_driver ntrig_driver = {
        .name = "ntrig",
        .id_table = ntrig_devices,
        .probe = ntrig_probe,
        .remove = ntrig_remove,
        .input_mapping = ntrig_input_mapping,
        .input_mapped = ntrig_input_mapped,
        .input_configured = ntrig_input_configured,
        .usage_table = ntrig_grabbed_usages,
        .event = ntrig_event,
};
module_hid_driver(ntrig_driver);

MODULE_LICENSE("GPL");









































































































































































    5 










    5 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_SCHED_GENERIC_H
#define __NET_SCHED_GENERIC_H

#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/pkt_sched.h>
#include <linux/pkt_cls.h>
#include <linux/percpu.h>
#include <linux/dynamic_queue_limits.h>
#include <linux/list.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include <linux/hashtable.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
#include <linux/xarray.h>

struct Qdisc_ops;
struct qdisc_walker;
struct tcf_walker;
struct module;
struct bpf_flow_keys;

struct qdisc_rate_table {
        struct tc_ratespec rate;
        u32                data[256];
        struct qdisc_rate_table *next;
        int                refcnt;
};

enum qdisc_state_t {
        __QDISC_STATE_SCHED,
        __QDISC_STATE_DEACTIVATED,
        __QDISC_STATE_MISSED,
        __QDISC_STATE_DRAINING,
};

enum qdisc_state2_t {
        /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
         * Use qdisc_run_begin/end() or qdisc_is_running() instead.
         */
        __QDISC_STATE2_RUNNING,
};

#define QDISC_STATE_MISSED        BIT(__QDISC_STATE_MISSED)
#define QDISC_STATE_DRAINING        BIT(__QDISC_STATE_DRAINING)

#define QDISC_STATE_NON_EMPTY        (QDISC_STATE_MISSED | \
                                        QDISC_STATE_DRAINING)

struct qdisc_size_table {
        struct rcu_head                rcu;
        struct list_head        list;
        struct tc_sizespec        szopts;
        int                        refcnt;
        u16                        data[];
};

/* similar to sk_buff_head, but skb->prev pointer is undefined. */
struct qdisc_skb_head {
        struct sk_buff        *head;
        struct sk_buff        *tail;
        __u32                qlen;
        spinlock_t        lock;
};

struct Qdisc {
        int                         (*enqueue)(struct sk_buff *skb,
                                           struct Qdisc *sch,
                                           struct sk_buff **to_free);
        struct sk_buff *        (*dequeue)(struct Qdisc *sch);
        unsigned int                flags;
#define TCQ_F_BUILTIN                1
#define TCQ_F_INGRESS                2
#define TCQ_F_CAN_BYPASS        4
#define TCQ_F_MQROOT                8
#define TCQ_F_ONETXQUEUE        0x10 /* dequeue_skb() can assume all skbs are for
                                      * q->dev_queue : It can test
                                      * netif_xmit_frozen_or_stopped() before
                                      * dequeueing next packet.
                                      * Its true for MQ/MQPRIO slaves, or non
                                      * multiqueue device.
                                      */
#define TCQ_F_WARN_NONWC        (1 << 16)
#define TCQ_F_CPUSTATS                0x20 /* run using percpu statistics */
#define TCQ_F_NOPARENT                0x40 /* root of its hierarchy :
                                      * qdisc_tree_decrease_qlen() should stop.
                                      */
#define TCQ_F_INVISIBLE                0x80 /* invisible by default in dump */
#define TCQ_F_NOLOCK                0x100 /* qdisc does not require locking */
#define TCQ_F_OFFLOADED                0x200 /* qdisc is offloaded to HW */
        u32                        limit;
        const struct Qdisc_ops        *ops;
        struct qdisc_size_table        __rcu *stab;
        struct hlist_node       hash;
        u32                        handle;
        u32                        parent;

        struct netdev_queue        *dev_queue;

        struct net_rate_estimator __rcu *rate_est;
        struct gnet_stats_basic_sync __percpu *cpu_bstats;
        struct gnet_stats_queue        __percpu *cpu_qstats;
        int                        pad;
        refcount_t                refcnt;

        /*
         * For performance sake on SMP, we put highly modified fields at the end
         */
        struct sk_buff_head        gso_skb ____cacheline_aligned_in_smp;
        struct qdisc_skb_head        q;
        struct gnet_stats_basic_sync bstats;
        struct gnet_stats_queue        qstats;
        int                     owner;
        unsigned long                state;
        unsigned long                state2; /* must be written under qdisc spinlock */
        struct Qdisc            *next_sched;
        struct sk_buff_head        skb_bad_txq;

        spinlock_t                busylock ____cacheline_aligned_in_smp;
        spinlock_t                seqlock;

        struct rcu_head                rcu;
        netdevice_tracker        dev_tracker;
        /* private data */
        long privdata[] ____cacheline_aligned;
};

static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN)
                return;
        refcount_inc(&qdisc->refcnt);
}

static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN)
                return true;
        return refcount_dec_if_one(&qdisc->refcnt);
}

/* Intended to be used by unlocked users, when concurrent qdisc release is
 * possible.
 */

static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_BUILTIN)
                return qdisc;
        if (refcount_inc_not_zero(&qdisc->refcnt))
                return qdisc;
        return NULL;
}

/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
 * root_lock section, or provide their own memory barriers -- ordering
 * against qdisc_run_begin/end() atomic bit operations.
 */
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_NOLOCK)
                return spin_is_locked(&qdisc->seqlock);
        return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
}

static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
{
        return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
{
        return q->flags & TCQ_F_CPUSTATS;
}

static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
        if (qdisc_is_percpu_stats(qdisc))
                return nolock_qdisc_is_empty(qdisc);
        return !READ_ONCE(qdisc->q.qlen);
}

/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
 * the qdisc root lock acquired.
 */
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_NOLOCK) {
                if (spin_trylock(&qdisc->seqlock))
                        return true;

                /* No need to insist if the MISSED flag was already set.
                 * Note that test_and_set_bit() also gives us memory ordering
                 * guarantees wrt potential earlier enqueue() and below
                 * spin_trylock(), both of which are necessary to prevent races
                 */
                if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
                        return false;

                /* Try to take the lock again to make sure that we will either
                 * grab it or the CPU that still has it will see MISSED set
                 * when testing it in qdisc_run_end()
                 */
                return spin_trylock(&qdisc->seqlock);
        }
        return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
}

static inline void qdisc_run_end(struct Qdisc *qdisc)
{
        if (qdisc->flags & TCQ_F_NOLOCK) {
                spin_unlock(&qdisc->seqlock);

                /* spin_unlock() only has store-release semantic. The unlock
                 * and test_bit() ordering is a store-load ordering, so a full
                 * memory barrier is needed here.
                 */
                smp_mb();

                if (unlikely(test_bit(__QDISC_STATE_MISSED,
                                      &qdisc->state)))
                        __netif_schedule(qdisc);
        } else {
                __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
        }
}

static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
{
        return qdisc->flags & TCQ_F_ONETXQUEUE;
}

static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
{
        return netdev_queue_dql_avail(txq);
}

struct Qdisc_class_ops {
        unsigned int                flags;
        /* Child qdisc manipulation */
        struct netdev_queue *        (*select_queue)(struct Qdisc *, struct tcmsg *);
        int                        (*graft)(struct Qdisc *, unsigned long cl,
                                        struct Qdisc *, struct Qdisc **,
                                        struct netlink_ext_ack *extack);
        struct Qdisc *                (*leaf)(struct Qdisc *, unsigned long cl);
        void                        (*qlen_notify)(struct Qdisc *, unsigned long);

        /* Class manipulation routines */
        unsigned long                (*find)(struct Qdisc *, u32 classid);
        int                        (*change)(struct Qdisc *, u32, u32,
                                        struct nlattr **, unsigned long *,
                                        struct netlink_ext_ack *);
        int                        (*delete)(struct Qdisc *, unsigned long,
                                          struct netlink_ext_ack *);
        void                        (*walk)(struct Qdisc *, struct qdisc_walker * arg);

        /* Filter manipulation */
        struct tcf_block *        (*tcf_block)(struct Qdisc *sch,
                                             unsigned long arg,
                                             struct netlink_ext_ack *extack);
        unsigned long                (*bind_tcf)(struct Qdisc *, unsigned long,
                                        u32 classid);
        void                        (*unbind_tcf)(struct Qdisc *, unsigned long);

        /* rtnetlink specific */
        int                        (*dump)(struct Qdisc *, unsigned long,
                                        struct sk_buff *skb, struct tcmsg*);
        int                        (*dump_stats)(struct Qdisc *, unsigned long,
                                        struct gnet_dump *);
};

/* Qdisc_class_ops flag values */

/* Implements API that doesn't require rtnl lock */
enum qdisc_class_ops_flags {
        QDISC_CLASS_OPS_DOIT_UNLOCKED = 1,
};

struct Qdisc_ops {
        struct Qdisc_ops        *next;
        const struct Qdisc_class_ops        *cl_ops;
        char                        id[IFNAMSIZ];
        int                        priv_size;
        unsigned int                static_flags;

        int                         (*enqueue)(struct sk_buff *skb,
                                           struct Qdisc *sch,
                                           struct sk_buff **to_free);
        struct sk_buff *        (*dequeue)(struct Qdisc *);
        struct sk_buff *        (*peek)(struct Qdisc *);

        int                        (*init)(struct Qdisc *sch, struct nlattr *arg,
                                        struct netlink_ext_ack *extack);
        void                        (*reset)(struct Qdisc *);
        void                        (*destroy)(struct Qdisc *);
        int                        (*change)(struct Qdisc *sch,
                                          struct nlattr *arg,
                                          struct netlink_ext_ack *extack);
        void                        (*attach)(struct Qdisc *sch);
        int                        (*change_tx_queue_len)(struct Qdisc *, unsigned int);
        void                        (*change_real_num_tx)(struct Qdisc *sch,
                                                      unsigned int new_real_tx);

        int                        (*dump)(struct Qdisc *, struct sk_buff *);
        int                        (*dump_stats)(struct Qdisc *, struct gnet_dump *);

        void                        (*ingress_block_set)(struct Qdisc *sch,
                                                     u32 block_index);
        void                        (*egress_block_set)(struct Qdisc *sch,
                                                    u32 block_index);
        u32                        (*ingress_block_get)(struct Qdisc *sch);
        u32                        (*egress_block_get)(struct Qdisc *sch);

        struct module                *owner;
};

struct tcf_result {
        union {
                struct {
                        unsigned long        class;
                        u32                classid;
                };
                const struct tcf_proto *goto_tp;
        };
};

struct tcf_chain;

struct tcf_proto_ops {
        struct list_head        head;
        char                        kind[IFNAMSIZ];

        int                        (*classify)(struct sk_buff *,
                                            const struct tcf_proto *,
                                            struct tcf_result *);
        int                        (*init)(struct tcf_proto*);
        void                        (*destroy)(struct tcf_proto *tp, bool rtnl_held,
                                           struct netlink_ext_ack *extack);

        void*                        (*get)(struct tcf_proto*, u32 handle);
        void                        (*put)(struct tcf_proto *tp, void *f);
        int                        (*change)(struct net *net, struct sk_buff *,
                                        struct tcf_proto*, unsigned long,
                                        u32 handle, struct nlattr **,
                                        void **, u32,
                                        struct netlink_ext_ack *);
        int                        (*delete)(struct tcf_proto *tp, void *arg,
                                          bool *last, bool rtnl_held,
                                          struct netlink_ext_ack *);
        bool                        (*delete_empty)(struct tcf_proto *tp);
        void                        (*walk)(struct tcf_proto *tp,
                                        struct tcf_walker *arg, bool rtnl_held);
        int                        (*reoffload)(struct tcf_proto *tp, bool add,
                                             flow_setup_cb_t *cb, void *cb_priv,
                                             struct netlink_ext_ack *extack);
        void                        (*hw_add)(struct tcf_proto *tp,
                                          void *type_data);
        void                        (*hw_del)(struct tcf_proto *tp,
                                          void *type_data);
        void                        (*bind_class)(void *, u32, unsigned long,
                                              void *, unsigned long);
        void *                        (*tmplt_create)(struct net *net,
                                                struct tcf_chain *chain,
                                                struct nlattr **tca,
                                                struct netlink_ext_ack *extack);
        void                        (*tmplt_destroy)(void *tmplt_priv);
        void                        (*tmplt_reoffload)(struct tcf_chain *chain,
                                                   bool add,
                                                   flow_setup_cb_t *cb,
                                                   void *cb_priv);
        struct tcf_exts *        (*get_exts)(const struct tcf_proto *tp,
                                            u32 handle);

        /* rtnetlink specific */
        int                        (*dump)(struct net*, struct tcf_proto*, void *,
                                        struct sk_buff *skb, struct tcmsg*,
                                        bool);
        int                        (*terse_dump)(struct net *net,
                                              struct tcf_proto *tp, void *fh,
                                              struct sk_buff *skb,
                                              struct tcmsg *t, bool rtnl_held);
        int                        (*tmplt_dump)(struct sk_buff *skb,
                                              struct net *net,
                                              void *tmplt_priv);

        struct module                *owner;
        int                        flags;
};

/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
 * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
 * conditions can occur when filters are inserted/deleted simultaneously.
 */
enum tcf_proto_ops_flags {
        TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
};

struct tcf_proto {
        /* Fast access part */
        struct tcf_proto __rcu        *next;
        void __rcu                *root;

        /* called under RCU BH lock*/
        int                        (*classify)(struct sk_buff *,
                                            const struct tcf_proto *,
                                            struct tcf_result *);
        __be16                        protocol;

        /* All the rest */
        u32                        prio;
        void                        *data;
        const struct tcf_proto_ops        *ops;
        struct tcf_chain        *chain;
        /* Lock protects tcf_proto shared state and can be used by unlocked
         * classifiers to protect their private data.
         */
        spinlock_t                lock;
        bool                        deleting;
        refcount_t                refcnt;
        struct rcu_head                rcu;
        struct hlist_node        destroy_ht_node;
};

struct qdisc_skb_cb {
        struct {
                unsigned int                pkt_len;
                u16                        slave_dev_queue_mapping;
                u16                        tc_classid;
        };
#define QDISC_CB_PRIV_LEN 20
        unsigned char                data[QDISC_CB_PRIV_LEN];
};

typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);

struct tcf_chain {
        /* Protects filter_chain. */
        struct mutex filter_chain_lock;
        struct tcf_proto __rcu *filter_chain;
        struct list_head list;
        struct tcf_block *block;
        u32 index; /* chain index */
        unsigned int refcnt;
        unsigned int action_refcnt;
        bool explicitly_created;
        bool flushing;
        const struct tcf_proto_ops *tmplt_ops;
        void *tmplt_priv;
        struct rcu_head rcu;
};

struct tcf_block {
        struct xarray ports; /* datapath accessible */
        /* Lock protects tcf_block and lifetime-management data of chains
         * attached to the block (refcnt, action_refcnt, explicitly_created).
         */
        struct mutex lock;
        struct list_head chain_list;
        u32 index; /* block index for shared blocks */
        u32 classid; /* which class this block belongs to */
        refcount_t refcnt;
        struct net *net;
        struct Qdisc *q;
        struct rw_semaphore cb_lock; /* protects cb_list and offload counters */
        struct flow_block flow_block;
        struct list_head owner_list;
        bool keep_dst;
        atomic_t offloadcnt; /* Number of oddloaded filters */
        unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
        unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
        struct {
                struct tcf_chain *chain;
                struct list_head filter_chain_list;
        } chain0;
        struct rcu_head rcu;
        DECLARE_HASHTABLE(proto_destroy_ht, 7);
        struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};

struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);

static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
        return lockdep_is_held(&chain->filter_chain_lock);
}

static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
{
        return lockdep_is_held(&tp->lock);
}

#define tcf_chain_dereference(p, chain)                                        \
        rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))

#define tcf_proto_dereference(p, tp)                                        \
        rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))

static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
        struct qdisc_skb_cb *qcb;

        BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
        BUILD_BUG_ON(sizeof(qcb->data) < sz);
}

static inline int qdisc_qlen(const struct Qdisc *q)
{
        return q->q.qlen;
}

static inline int qdisc_qlen_sum(const struct Qdisc *q)
{
        __u32 qlen = q->qstats.qlen;
        int i;

        if (qdisc_is_percpu_stats(q)) {
                for_each_possible_cpu(i)
                        qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
        } else {
                qlen += q->q.qlen;
        }

        return qlen;
}

static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
{
        return (struct qdisc_skb_cb *)skb->cb;
}

static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
{
        return &qdisc->q.lock;
}

static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
{
        struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);

        return q;
}

static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
{
        return rcu_dereference_bh(qdisc->dev_queue->qdisc);
}

static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
{
        return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping);
}

static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
{
        struct Qdisc *root = qdisc_root_sleeping(qdisc);

        ASSERT_RTNL();
        return qdisc_lock(root);
}

static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
        return qdisc->dev_queue->dev;
}

static inline void sch_tree_lock(struct Qdisc *q)
{
        if (q->flags & TCQ_F_MQROOT)
                spin_lock_bh(qdisc_lock(q));
        else
                spin_lock_bh(qdisc_root_sleeping_lock(q));
}

static inline void sch_tree_unlock(struct Qdisc *q)
{
        if (q->flags & TCQ_F_MQROOT)
                spin_unlock_bh(qdisc_lock(q));
        else
                spin_unlock_bh(qdisc_root_sleeping_lock(q));
}

extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1];
extern struct Qdisc_ops mq_qdisc_ops;
extern struct Qdisc_ops noqueue_qdisc_ops;
extern const struct Qdisc_ops *default_qdisc_ops;
static inline const struct Qdisc_ops *
get_default_qdisc_ops(const struct net_device *dev, int ntx)
{
        return ntx < dev->real_num_tx_queues ?
                        default_qdisc_ops : &pfifo_fast_ops;
}

struct Qdisc_class_common {
        u32                        classid;
        unsigned int                filter_cnt;
        struct hlist_node        hnode;
};

struct Qdisc_class_hash {
        struct hlist_head        *hash;
        unsigned int                hashsize;
        unsigned int                hashmask;
        unsigned int                hashelems;
};

static inline unsigned int qdisc_class_hash(u32 id, u32 mask)
{
        id ^= id >> 8;
        id ^= id >> 4;
        return id & mask;
}

static inline struct Qdisc_class_common *
qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
{
        struct Qdisc_class_common *cl;
        unsigned int h;

        if (!id)
                return NULL;

        h = qdisc_class_hash(id, hash->hashmask);
        hlist_for_each_entry(cl, &hash->hash[h], hnode) {
                if (cl->classid == id)
                        return cl;
        }
        return NULL;
}

static inline bool qdisc_class_in_use(const struct Qdisc_class_common *cl)
{
        return cl->filter_cnt > 0;
}

static inline void qdisc_class_get(struct Qdisc_class_common *cl)
{
        unsigned int res;

        if (check_add_overflow(cl->filter_cnt, 1, &res))
                WARN(1, "Qdisc class overflow");

        cl->filter_cnt = res;
}

static inline void qdisc_class_put(struct Qdisc_class_common *cl)
{
        unsigned int res;

        if (check_sub_overflow(cl->filter_cnt, 1, &res))
                WARN(1, "Qdisc class underflow");

        cl->filter_cnt = res;
}

static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
{
        u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;

        return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
}

int qdisc_class_hash_init(struct Qdisc_class_hash *);
void qdisc_class_hash_insert(struct Qdisc_class_hash *,
                             struct Qdisc_class_common *);
void qdisc_class_hash_remove(struct Qdisc_class_hash *,
                             struct Qdisc_class_common *);
void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
void qdisc_class_hash_destroy(struct Qdisc_class_hash *);

int dev_qdisc_change_tx_queue_len(struct net_device *dev);
void dev_qdisc_change_real_num_tx(struct net_device *dev,
                                  unsigned int new_real_tx);
void dev_init_scheduler(struct net_device *dev);
void dev_shutdown(struct net_device *dev);
void dev_activate(struct net_device *dev);
void dev_deactivate(struct net_device *dev);
void dev_deactivate_many(struct list_head *head);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
                              struct Qdisc *qdisc);
void qdisc_reset(struct Qdisc *qdisc);
void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_put(struct Qdisc *qdisc);
void qdisc_put_unlocked(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
#ifdef CONFIG_NET_SCHED
int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
                              void *type_data);
void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
                                struct Qdisc *new, struct Qdisc *old,
                                enum tc_setup_type type, void *type_data,
                                struct netlink_ext_ack *extack);
#else
static inline int
qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
                          void *type_data)
{
        q->flags &= ~TCQ_F_OFFLOADED;
        return 0;
}

static inline void
qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
                           struct Qdisc *new, struct Qdisc *old,
                           enum tc_setup_type type, void *type_data,
                           struct netlink_ext_ack *extack)
{
}
#endif
void qdisc_offload_query_caps(struct net_device *dev,
                              enum tc_setup_type type,
                              void *caps, size_t caps_len);
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          const struct Qdisc_ops *ops,
                          struct netlink_ext_ack *extack);
void qdisc_free(struct Qdisc *qdisc);
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
                                const struct Qdisc_ops *ops, u32 parentid,
                                struct netlink_ext_ack *extack);
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
                               const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);

static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_XGRESS
        return skb->tc_at_ingress;
#else
        return false;
#endif
}

static inline bool skb_skip_tc_classify(struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
        if (skb->tc_skip_classify) {
                skb->tc_skip_classify = 0;
                return true;
        }
#endif
        return false;
}

/* Reset all TX qdiscs greater than index of a device.  */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
        struct Qdisc *qdisc;

        for (; i < dev->num_tx_queues; i++) {
                qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
                if (qdisc) {
                        spin_lock_bh(qdisc_lock(qdisc));
                        qdisc_reset(qdisc);
                        spin_unlock_bh(qdisc_lock(qdisc));
                }
        }
}

/* Are all TX queues of the device empty?  */
static inline bool qdisc_all_tx_empty(const struct net_device *dev)
{
        unsigned int i;

        rcu_read_lock();
        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
                const struct Qdisc *q = rcu_dereference(txq->qdisc);

                if (!qdisc_is_empty(q)) {
                        rcu_read_unlock();
                        return false;
                }
        }
        rcu_read_unlock();
        return true;
}

/* Are any of the TX qdiscs changing?  */
static inline bool qdisc_tx_changing(const struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                if (rcu_access_pointer(txq->qdisc) !=
                    rcu_access_pointer(txq->qdisc_sleeping))
                        return true;
        }
        return false;
}

/* Is the device using the noop qdisc on all queues?  */
static inline bool qdisc_tx_is_noop(const struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
                if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
                        return false;
        }
        return true;
}

static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
{
        return qdisc_skb_cb(skb)->pkt_len;
}

/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */
enum net_xmit_qdisc_t {
        __NET_XMIT_STOLEN = 0x00010000,
        __NET_XMIT_BYPASS = 0x00020000,
};

#ifdef CONFIG_NET_CLS_ACT
#define net_xmit_drop_count(e)        ((e) & __NET_XMIT_STOLEN ? 0 : 1)
#else
#define net_xmit_drop_count(e)        (1)
#endif

static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
                                           const struct Qdisc *sch)
{
#ifdef CONFIG_NET_SCHED
        struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);

        if (stab)
                __qdisc_calculate_pkt_len(skb, stab);
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                                struct sk_buff **to_free)
{
        qdisc_calculate_pkt_len(skb, sch);
        return sch->enqueue(skb, sch, to_free);
}

static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
                                  __u64 bytes, __u32 packets)
{
        u64_stats_update_begin(&bstats->syncp);
        u64_stats_add(&bstats->bytes, bytes);
        u64_stats_add(&bstats->packets, packets);
        u64_stats_update_end(&bstats->syncp);
}

static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
                                 const struct sk_buff *skb)
{
        _bstats_update(bstats,
                       qdisc_pkt_len(skb),
                       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
}

static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
                                           const struct sk_buff *skb)
{
        bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
}

static inline void qdisc_bstats_update(struct Qdisc *sch,
                                       const struct sk_buff *skb)
{
        bstats_update(&sch->bstats, skb);
}

static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
                                            const struct sk_buff *skb)
{
        sch->qstats.backlog -= qdisc_pkt_len(skb);
}

static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
                                                const struct sk_buff *skb)
{
        this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
}

static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
                                            const struct sk_buff *skb)
{
        sch->qstats.backlog += qdisc_pkt_len(skb);
}

static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
                                                const struct sk_buff *skb)
{
        this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
}

static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
{
        this_cpu_inc(sch->cpu_qstats->qlen);
}

static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
{
        this_cpu_dec(sch->cpu_qstats->qlen);
}

static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
{
        this_cpu_inc(sch->cpu_qstats->requeues);
}

static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
{
        sch->qstats.drops += count;
}

static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
{
        qstats->drops++;
}

static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
{
        qstats->overlimits++;
}

static inline void qdisc_qstats_drop(struct Qdisc *sch)
{
        qstats_drop_inc(&sch->qstats);
}

static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
{
        this_cpu_inc(sch->cpu_qstats->drops);
}

static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
{
        sch->qstats.overlimits++;
}

static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
{
        __u32 qlen = qdisc_qlen_sum(sch);

        return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
}

static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
                                             __u32 *backlog)
{
        struct gnet_stats_queue qstats = { 0 };

        gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
        *qlen = qstats.qlen + qdisc_qlen(sch);
        *backlog = qstats.backlog;
}

static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
{
        __u32 qlen, backlog;

        qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
        qdisc_tree_reduce_backlog(sch, qlen, backlog);
}

static inline void qdisc_purge_queue(struct Qdisc *sch)
{
        __u32 qlen, backlog;

        qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
        qdisc_reset(sch);
        qdisc_tree_reduce_backlog(sch, qlen, backlog);
}

static inline void __qdisc_enqueue_tail(struct sk_buff *skb,
                                        struct qdisc_skb_head *qh)
{
        struct sk_buff *last = qh->tail;

        if (last) {
                skb->next = NULL;
                last->next = skb;
                qh->tail = skb;
        } else {
                qh->tail = skb;
                qh->head = skb;
        }
        qh->qlen++;
}

static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
{
        __qdisc_enqueue_tail(skb, &sch->q);
        qdisc_qstats_backlog_inc(sch, skb);
        return NET_XMIT_SUCCESS;
}

static inline void __qdisc_enqueue_head(struct sk_buff *skb,
                                        struct qdisc_skb_head *qh)
{
        skb->next = qh->head;

        if (!qh->head)
                qh->tail = skb;
        qh->head = skb;
        qh->qlen++;
}

static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
{
        struct sk_buff *skb = qh->head;

        if (likely(skb != NULL)) {
                qh->head = skb->next;
                qh->qlen--;
                if (qh->head == NULL)
                        qh->tail = NULL;
                skb->next = NULL;
        }

        return skb;
}

static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
        struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);

        if (likely(skb != NULL)) {
                qdisc_qstats_backlog_dec(sch, skb);
                qdisc_bstats_update(sch, skb);
        }

        return skb;
}

struct tc_skb_cb {
        struct qdisc_skb_cb qdisc_cb;
        u32 drop_reason;

        u16 zone; /* Only valid if post_ct = true */
        u16 mru;
        u8 post_ct:1;
        u8 post_ct_snat:1;
        u8 post_ct_dnat:1;
};

static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
{
        struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;

        BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
        return cb;
}

static inline enum skb_drop_reason
tcf_get_drop_reason(const struct sk_buff *skb)
{
        return tc_skb_cb(skb)->drop_reason;
}

static inline void tcf_set_drop_reason(const struct sk_buff *skb,
                                       enum skb_drop_reason reason)
{
        tc_skb_cb(skb)->drop_reason = reason;
}

/* Instead of calling kfree_skb() while root qdisc lock is held,
 * queue the skb for future freeing at end of __dev_xmit_skb()
 */
static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
{
        skb->next = *to_free;
        *to_free = skb;
}

static inline void __qdisc_drop_all(struct sk_buff *skb,
                                    struct sk_buff **to_free)
{
        if (skb->prev)
                skb->prev->next = *to_free;
        else
                skb->next = *to_free;
        *to_free = skb;
}

static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
                                                   struct qdisc_skb_head *qh,
                                                   struct sk_buff **to_free)
{
        struct sk_buff *skb = __qdisc_dequeue_head(qh);

        if (likely(skb != NULL)) {
                unsigned int len = qdisc_pkt_len(skb);

                qdisc_qstats_backlog_dec(sch, skb);
                __qdisc_drop(skb, to_free);
                return len;
        }

        return 0;
}

static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
{
        const struct qdisc_skb_head *qh = &sch->q;

        return qh->head;
}

/* generic pseudo peek method for non-work-conserving qdisc */
static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
{
        struct sk_buff *skb = skb_peek(&sch->gso_skb);

        /* we can reuse ->gso_skb because peek isn't called for root qdiscs */
        if (!skb) {
                skb = sch->dequeue(sch);

                if (skb) {
                        __skb_queue_head(&sch->gso_skb, skb);
                        /* it's still part of the queue */
                        qdisc_qstats_backlog_inc(sch, skb);
                        sch->q.qlen++;
                }
        }

        return skb;
}

static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
                                                 struct sk_buff *skb)
{
        if (qdisc_is_percpu_stats(sch)) {
                qdisc_qstats_cpu_backlog_dec(sch, skb);
                qdisc_bstats_cpu_update(sch, skb);
                qdisc_qstats_cpu_qlen_dec(sch);
        } else {
                qdisc_qstats_backlog_dec(sch, skb);
                qdisc_bstats_update(sch, skb);
                sch->q.qlen--;
        }
}

static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
                                                 unsigned int pkt_len)
{
        if (qdisc_is_percpu_stats(sch)) {
                qdisc_qstats_cpu_qlen_inc(sch);
                this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
        } else {
                sch->qstats.backlog += pkt_len;
                sch->q.qlen++;
        }
}

/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
{
        struct sk_buff *skb = skb_peek(&sch->gso_skb);

        if (skb) {
                skb = __skb_dequeue(&sch->gso_skb);
                if (qdisc_is_percpu_stats(sch)) {
                        qdisc_qstats_cpu_backlog_dec(sch, skb);
                        qdisc_qstats_cpu_qlen_dec(sch);
                } else {
                        qdisc_qstats_backlog_dec(sch, skb);
                        sch->q.qlen--;
                }
        } else {
                skb = sch->dequeue(sch);
        }

        return skb;
}

static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
{
        /*
         * We do not know the backlog in bytes of this list, it
         * is up to the caller to correct it
         */
        ASSERT_RTNL();
        if (qh->qlen) {
                rtnl_kfree_skbs(qh->head, qh->tail);

                qh->head = NULL;
                qh->tail = NULL;
                qh->qlen = 0;
        }
}

static inline void qdisc_reset_queue(struct Qdisc *sch)
{
        __qdisc_reset_queue(&sch->q);
}

static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
                                          struct Qdisc **pold)
{
        struct Qdisc *old;

        sch_tree_lock(sch);
        old = *pold;
        *pold = new;
        if (old != NULL)
                qdisc_purge_queue(old);
        sch_tree_unlock(sch);

        return old;
}

static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
{
        rtnl_kfree_skbs(skb, skb);
        qdisc_qstats_drop(sch);
}

static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
                                 struct sk_buff **to_free)
{
        __qdisc_drop(skb, to_free);
        qdisc_qstats_cpu_drop(sch);

        return NET_XMIT_DROP;
}

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
                             struct sk_buff **to_free)
{
        __qdisc_drop(skb, to_free);
        qdisc_qstats_drop(sch);

        return NET_XMIT_DROP;
}

static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
                                 struct sk_buff **to_free)
{
        __qdisc_drop_all(skb, to_free);
        qdisc_qstats_drop(sch);

        return NET_XMIT_DROP;
}

struct psched_ratecfg {
        u64        rate_bytes_ps; /* bytes per second */
        u32        mult;
        u16        overhead;
        u16        mpu;
        u8        linklayer;
        u8        shift;
};

static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
                                unsigned int len)
{
        len += r->overhead;

        if (len < r->mpu)
                len = r->mpu;

        if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
                return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;

        return ((u64)len * r->mult) >> r->shift;
}

void psched_ratecfg_precompute(struct psched_ratecfg *r,
                               const struct tc_ratespec *conf,
                               u64 rate64);

static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
                                          const struct psched_ratecfg *r)
{
        memset(res, 0, sizeof(*res));

        /* legacy struct tc_ratespec has a 32bit @rate field
         * Qdisc using 64bit rate should add new attributes
         * in order to maintain compatibility.
         */
        res->rate = min_t(u64, r->rate_bytes_ps, ~0U);

        res->overhead = r->overhead;
        res->mpu = r->mpu;
        res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
}

struct psched_pktrate {
        u64        rate_pkts_ps; /* packets per second */
        u32        mult;
        u8        shift;
};

static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
                                  unsigned int pkt_num)
{
        return ((u64)pkt_num * r->mult) >> r->shift;
}

void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);

/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
 * The fast path only needs to access filter list and to update stats
 */
struct mini_Qdisc {
        struct tcf_proto *filter_list;
        struct tcf_block *block;
        struct gnet_stats_basic_sync __percpu *cpu_bstats;
        struct gnet_stats_queue        __percpu *cpu_qstats;
        unsigned long rcu_state;
};

static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
                                                const struct sk_buff *skb)
{
        bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
}

static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
{
        this_cpu_inc(miniq->cpu_qstats->drops);
}

struct mini_Qdisc_pair {
        struct mini_Qdisc miniq1;
        struct mini_Qdisc miniq2;
        struct mini_Qdisc __rcu **p_miniq;
};

void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
                          struct tcf_proto *tp_head);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
                          struct mini_Qdisc __rcu **p_miniq);
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
                                struct tcf_block *block);

void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);

int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));

/* Make sure qdisc is no longer in SCHED state. */
static inline void qdisc_synchronize(const struct Qdisc *q)
{
        while (test_bit(__QDISC_STATE_SCHED, &q->state))
                msleep(1);
}

#endif




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H

#include <asm/cpufeature.h>

#ifdef CONFIG_X86_LOCAL_APIC
static inline bool arch_irq_work_has_interrupt(void)
{
        return boot_cpu_has(X86_FEATURE_APIC);
}
#else
static inline bool arch_irq_work_has_interrupt(void)
{
        return false;
}
#endif

#endif /* _ASM_IRQ_WORK_H */
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 





    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2018, The Linux Foundation. All rights reserved.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include "coredump.h"

#include <linux/devcoredump.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/utsname.h>

#include "debug.h"
#include "hw.h"

static const struct ath10k_mem_section qca6174_hw21_register_sections[] = {
        {0x800, 0x810},
        {0x820, 0x82C},
        {0x830, 0x8F4},
        {0x90C, 0x91C},
        {0xA14, 0xA18},
        {0xA84, 0xA94},
        {0xAA8, 0xAD4},
        {0xADC, 0xB40},
        {0x1000, 0x10A4},
        {0x10BC, 0x111C},
        {0x1134, 0x1138},
        {0x1144, 0x114C},
        {0x1150, 0x115C},
        {0x1160, 0x1178},
        {0x1240, 0x1260},
        {0x2000, 0x207C},
        {0x3000, 0x3014},
        {0x4000, 0x4014},
        {0x5000, 0x5124},
        {0x6000, 0x6040},
        {0x6080, 0x60CC},
        {0x6100, 0x611C},
        {0x6140, 0x61D8},
        {0x6200, 0x6238},
        {0x6240, 0x628C},
        {0x62C0, 0x62EC},
        {0x6380, 0x63E8},
        {0x6400, 0x6440},
        {0x6480, 0x64CC},
        {0x6500, 0x651C},
        {0x6540, 0x6580},
        {0x6600, 0x6638},
        {0x6640, 0x668C},
        {0x66C0, 0x66EC},
        {0x6780, 0x67E8},
        {0x7080, 0x708C},
        {0x70C0, 0x70C8},
        {0x7400, 0x741C},
        {0x7440, 0x7454},
        {0x7800, 0x7818},
        {0x8000, 0x8004},
        {0x8010, 0x8064},
        {0x8080, 0x8084},
        {0x80A0, 0x80A4},
        {0x80C0, 0x80C4},
        {0x80E0, 0x80F4},
        {0x8100, 0x8104},
        {0x8110, 0x812C},
        {0x9000, 0x9004},
        {0x9800, 0x982C},
        {0x9830, 0x9838},
        {0x9840, 0x986C},
        {0x9870, 0x9898},
        {0x9A00, 0x9C00},
        {0xD580, 0xD59C},
        {0xF000, 0xF0E0},
        {0xF140, 0xF190},
        {0xF250, 0xF25C},
        {0xF260, 0xF268},
        {0xF26C, 0xF2A8},
        {0x10008, 0x1000C},
        {0x10014, 0x10018},
        {0x1001C, 0x10020},
        {0x10024, 0x10028},
        {0x10030, 0x10034},
        {0x10040, 0x10054},
        {0x10058, 0x1007C},
        {0x10080, 0x100C4},
        {0x100C8, 0x10114},
        {0x1012C, 0x10130},
        {0x10138, 0x10144},
        {0x10200, 0x10220},
        {0x10230, 0x10250},
        {0x10260, 0x10280},
        {0x10290, 0x102B0},
        {0x102C0, 0x102DC},
        {0x102E0, 0x102F4},
        {0x102FC, 0x1037C},
        {0x10380, 0x10390},
        {0x10800, 0x10828},
        {0x10840, 0x10844},
        {0x10880, 0x10884},
        {0x108C0, 0x108E8},
        {0x10900, 0x10928},
        {0x10940, 0x10944},
        {0x10980, 0x10984},
        {0x109C0, 0x109E8},
        {0x10A00, 0x10A28},
        {0x10A40, 0x10A50},
        {0x11000, 0x11028},
        {0x11030, 0x11034},
        {0x11038, 0x11068},
        {0x11070, 0x11074},
        {0x11078, 0x110A8},
        {0x110B0, 0x110B4},
        {0x110B8, 0x110E8},
        {0x110F0, 0x110F4},
        {0x110F8, 0x11128},
        {0x11138, 0x11144},
        {0x11178, 0x11180},
        {0x111B8, 0x111C0},
        {0x111F8, 0x11200},
        {0x11238, 0x1123C},
        {0x11270, 0x11274},
        {0x11278, 0x1127C},
        {0x112B0, 0x112B4},
        {0x112B8, 0x112BC},
        {0x112F0, 0x112F4},
        {0x112F8, 0x112FC},
        {0x11338, 0x1133C},
        {0x11378, 0x1137C},
        {0x113B8, 0x113BC},
        {0x113F8, 0x113FC},
        {0x11438, 0x11440},
        {0x11478, 0x11480},
        {0x114B8, 0x114BC},
        {0x114F8, 0x114FC},
        {0x11538, 0x1153C},
        {0x11578, 0x1157C},
        {0x115B8, 0x115BC},
        {0x115F8, 0x115FC},
        {0x11638, 0x1163C},
        {0x11678, 0x1167C},
        {0x116B8, 0x116BC},
        {0x116F8, 0x116FC},
        {0x11738, 0x1173C},
        {0x11778, 0x1177C},
        {0x117B8, 0x117BC},
        {0x117F8, 0x117FC},
        {0x17000, 0x1701C},
        {0x17020, 0x170AC},
        {0x18000, 0x18050},
        {0x18054, 0x18074},
        {0x18080, 0x180D4},
        {0x180DC, 0x18104},
        {0x18108, 0x1813C},
        {0x18144, 0x18148},
        {0x18168, 0x18174},
        {0x18178, 0x18180},
        {0x181C8, 0x181E0},
        {0x181E4, 0x181E8},
        {0x181EC, 0x1820C},
        {0x1825C, 0x18280},
        {0x18284, 0x18290},
        {0x18294, 0x182A0},
        {0x18300, 0x18304},
        {0x18314, 0x18320},
        {0x18328, 0x18350},
        {0x1835C, 0x1836C},
        {0x18370, 0x18390},
        {0x18398, 0x183AC},
        {0x183BC, 0x183D8},
        {0x183DC, 0x183F4},
        {0x18400, 0x186F4},
        {0x186F8, 0x1871C},
        {0x18720, 0x18790},
        {0x19800, 0x19830},
        {0x19834, 0x19840},
        {0x19880, 0x1989C},
        {0x198A4, 0x198B0},
        {0x198BC, 0x19900},
        {0x19C00, 0x19C88},
        {0x19D00, 0x19D20},
        {0x19E00, 0x19E7C},
        {0x19E80, 0x19E94},
        {0x19E98, 0x19EAC},
        {0x19EB0, 0x19EBC},
        {0x19F70, 0x19F74},
        {0x19F80, 0x19F8C},
        {0x19FA0, 0x19FB4},
        {0x19FC0, 0x19FD8},
        {0x1A000, 0x1A200},
        {0x1A204, 0x1A210},
        {0x1A228, 0x1A22C},
        {0x1A230, 0x1A248},
        {0x1A250, 0x1A270},
        {0x1A280, 0x1A290},
        {0x1A2A0, 0x1A2A4},
        {0x1A2C0, 0x1A2EC},
        {0x1A300, 0x1A3BC},
        {0x1A3F0, 0x1A3F4},
        {0x1A3F8, 0x1A434},
        {0x1A438, 0x1A444},
        {0x1A448, 0x1A468},
        {0x1A580, 0x1A58C},
        {0x1A644, 0x1A654},
        {0x1A670, 0x1A698},
        {0x1A6AC, 0x1A6B0},
        {0x1A6D0, 0x1A6D4},
        {0x1A6EC, 0x1A70C},
        {0x1A710, 0x1A738},
        {0x1A7C0, 0x1A7D0},
        {0x1A7D4, 0x1A7D8},
        {0x1A7DC, 0x1A7E4},
        {0x1A7F0, 0x1A7F8},
        {0x1A888, 0x1A89C},
        {0x1A8A8, 0x1A8AC},
        {0x1A8C0, 0x1A8DC},
        {0x1A8F0, 0x1A8FC},
        {0x1AE04, 0x1AE08},
        {0x1AE18, 0x1AE24},
        {0x1AF80, 0x1AF8C},
        {0x1AFA0, 0x1AFB4},
        {0x1B000, 0x1B200},
        {0x1B284, 0x1B288},
        {0x1B2D0, 0x1B2D8},
        {0x1B2DC, 0x1B2EC},
        {0x1B300, 0x1B340},
        {0x1B374, 0x1B378},
        {0x1B380, 0x1B384},
        {0x1B388, 0x1B38C},
        {0x1B404, 0x1B408},
        {0x1B420, 0x1B428},
        {0x1B440, 0x1B444},
        {0x1B448, 0x1B44C},
        {0x1B450, 0x1B458},
        {0x1B45C, 0x1B468},
        {0x1B584, 0x1B58C},
        {0x1B68C, 0x1B690},
        {0x1B6AC, 0x1B6B0},
        {0x1B7F0, 0x1B7F8},
        {0x1C800, 0x1CC00},
        {0x1CE00, 0x1CE04},
        {0x1CF80, 0x1CF84},
        {0x1D200, 0x1D800},
        {0x1E000, 0x20014},
        {0x20100, 0x20124},
        {0x21400, 0x217A8},
        {0x21800, 0x21BA8},
        {0x21C00, 0x21FA8},
        {0x22000, 0x223A8},
        {0x22400, 0x227A8},
        {0x22800, 0x22BA8},
        {0x22C00, 0x22FA8},
        {0x23000, 0x233A8},
        {0x24000, 0x24034},
        {0x26000, 0x26064},
        {0x27000, 0x27024},
        {0x34000, 0x3400C},
        {0x34400, 0x3445C},
        {0x34800, 0x3485C},
        {0x34C00, 0x34C5C},
        {0x35000, 0x3505C},
        {0x35400, 0x3545C},
        {0x35800, 0x3585C},
        {0x35C00, 0x35C5C},
        {0x36000, 0x3605C},
        {0x38000, 0x38064},
        {0x38070, 0x380E0},
        {0x3A000, 0x3A064},
        {0x40000, 0x400A4},
        {0x80000, 0x8000C},
        {0x80010, 0x80020},
};

static const struct ath10k_mem_section qca6174_hw30_sdio_register_sections[] = {
        {0x800, 0x810},
        {0x820, 0x82C},
        {0x830, 0x8F4},
        {0x90C, 0x91C},
        {0xA14, 0xA18},
        {0xA84, 0xA94},
        {0xAA8, 0xAD4},
        {0xADC, 0xB40},
        {0x1000, 0x10A4},
        {0x10BC, 0x111C},
        {0x1134, 0x1138},
        {0x1144, 0x114C},
        {0x1150, 0x115C},
        {0x1160, 0x1178},
        {0x1240, 0x1260},
        {0x2000, 0x207C},
        {0x3000, 0x3014},
        {0x4000, 0x4014},
        {0x5000, 0x5124},
        {0x6000, 0x6040},
        {0x6080, 0x60CC},
        {0x6100, 0x611C},
        {0x6140, 0x61D8},
        {0x6200, 0x6238},
        {0x6240, 0x628C},
        {0x62C0, 0x62EC},
        {0x6380, 0x63E8},
        {0x6400, 0x6440},
        {0x6480, 0x64CC},
        {0x6500, 0x651C},
        {0x6540, 0x6580},
        {0x6600, 0x6638},
        {0x6640, 0x668C},
        {0x66C0, 0x66EC},
        {0x6780, 0x67E8},
        {0x7080, 0x708C},
        {0x70C0, 0x70C8},
        {0x7400, 0x741C},
        {0x7440, 0x7454},
        {0x7800, 0x7818},
        {0x8010, 0x8060},
        {0x8080, 0x8084},
        {0x80A0, 0x80A4},
        {0x80C0, 0x80C4},
        {0x80E0, 0x80ec},
        {0x8110, 0x8128},
        {0x9000, 0x9004},
        {0xF000, 0xF0E0},
        {0xF140, 0xF190},
        {0xF250, 0xF25C},
        {0xF260, 0xF268},
        {0xF26C, 0xF2A8},
        {0x10008, 0x1000C},
        {0x10014, 0x10018},
        {0x1001C, 0x10020},
        {0x10024, 0x10028},
        {0x10030, 0x10034},
        {0x10040, 0x10054},
        {0x10058, 0x1007C},
        {0x10080, 0x100C4},
        {0x100C8, 0x10114},
        {0x1012C, 0x10130},
        {0x10138, 0x10144},
        {0x10200, 0x10220},
        {0x10230, 0x10250},
        {0x10260, 0x10280},
        {0x10290, 0x102B0},
        {0x102C0, 0x102DC},
        {0x102E0, 0x102F4},
        {0x102FC, 0x1037C},
        {0x10380, 0x10390},
        {0x10800, 0x10828},
        {0x10840, 0x10844},
        {0x10880, 0x10884},
        {0x108C0, 0x108E8},
        {0x10900, 0x10928},
        {0x10940, 0x10944},
        {0x10980, 0x10984},
        {0x109C0, 0x109E8},
        {0x10A00, 0x10A28},
        {0x10A40, 0x10A50},
        {0x11000, 0x11028},
        {0x11030, 0x11034},
        {0x11038, 0x11068},
        {0x11070, 0x11074},
        {0x11078, 0x110A8},
        {0x110B0, 0x110B4},
        {0x110B8, 0x110E8},
        {0x110F0, 0x110F4},
        {0x110F8, 0x11128},
        {0x11138, 0x11144},
        {0x11178, 0x11180},
        {0x111B8, 0x111C0},
        {0x111F8, 0x11200},
        {0x11238, 0x1123C},
        {0x11270, 0x11274},
        {0x11278, 0x1127C},
        {0x112B0, 0x112B4},
        {0x112B8, 0x112BC},
        {0x112F0, 0x112F4},
        {0x112F8, 0x112FC},
        {0x11338, 0x1133C},
        {0x11378, 0x1137C},
        {0x113B8, 0x113BC},
        {0x113F8, 0x113FC},
        {0x11438, 0x11440},
        {0x11478, 0x11480},
        {0x114B8, 0x114BC},
        {0x114F8, 0x114FC},
        {0x11538, 0x1153C},
        {0x11578, 0x1157C},
        {0x115B8, 0x115BC},
        {0x115F8, 0x115FC},
        {0x11638, 0x1163C},
        {0x11678, 0x1167C},
        {0x116B8, 0x116BC},
        {0x116F8, 0x116FC},
        {0x11738, 0x1173C},
        {0x11778, 0x1177C},
        {0x117B8, 0x117BC},
        {0x117F8, 0x117FC},
        {0x17000, 0x1701C},
        {0x17020, 0x170AC},
        {0x18000, 0x18050},
        {0x18054, 0x18074},
        {0x18080, 0x180D4},
        {0x180DC, 0x18104},
        {0x18108, 0x1813C},
        {0x18144, 0x18148},
        {0x18168, 0x18174},
        {0x18178, 0x18180},
        {0x181C8, 0x181E0},
        {0x181E4, 0x181E8},
        {0x181EC, 0x1820C},
        {0x1825C, 0x18280},
        {0x18284, 0x18290},
        {0x18294, 0x182A0},
        {0x18300, 0x18304},
        {0x18314, 0x18320},
        {0x18328, 0x18350},
        {0x1835C, 0x1836C},
        {0x18370, 0x18390},
        {0x18398, 0x183AC},
        {0x183BC, 0x183D8},
        {0x183DC, 0x183F4},
        {0x18400, 0x186F4},
        {0x186F8, 0x1871C},
        {0x18720, 0x18790},
        {0x19800, 0x19830},
        {0x19834, 0x19840},
        {0x19880, 0x1989C},
        {0x198A4, 0x198B0},
        {0x198BC, 0x19900},
        {0x19C00, 0x19C88},
        {0x19D00, 0x19D20},
        {0x19E00, 0x19E7C},
        {0x19E80, 0x19E94},
        {0x19E98, 0x19EAC},
        {0x19EB0, 0x19EBC},
        {0x19F70, 0x19F74},
        {0x19F80, 0x19F8C},
        {0x19FA0, 0x19FB4},
        {0x19FC0, 0x19FD8},
        {0x1A000, 0x1A200},
        {0x1A204, 0x1A210},
        {0x1A228, 0x1A22C},
        {0x1A230, 0x1A248},
        {0x1A250, 0x1A270},
        {0x1A280, 0x1A290},
        {0x1A2A0, 0x1A2A4},
        {0x1A2C0, 0x1A2EC},
        {0x1A300, 0x1A3BC},
        {0x1A3F0, 0x1A3F4},
        {0x1A3F8, 0x1A434},
        {0x1A438, 0x1A444},
        {0x1A448, 0x1A468},
        {0x1A580, 0x1A58C},
        {0x1A644, 0x1A654},
        {0x1A670, 0x1A698},
        {0x1A6AC, 0x1A6B0},
        {0x1A6D0, 0x1A6D4},
        {0x1A6EC, 0x1A70C},
        {0x1A710, 0x1A738},
        {0x1A7C0, 0x1A7D0},
        {0x1A7D4, 0x1A7D8},
        {0x1A7DC, 0x1A7E4},
        {0x1A7F0, 0x1A7F8},
        {0x1A888, 0x1A89C},
        {0x1A8A8, 0x1A8AC},
        {0x1A8C0, 0x1A8DC},
        {0x1A8F0, 0x1A8FC},
        {0x1AE04, 0x1AE08},
        {0x1AE18, 0x1AE24},
        {0x1AF80, 0x1AF8C},
        {0x1AFA0, 0x1AFB4},
        {0x1B000, 0x1B200},
        {0x1B284, 0x1B288},
        {0x1B2D0, 0x1B2D8},
        {0x1B2DC, 0x1B2EC},
        {0x1B300, 0x1B340},
        {0x1B374, 0x1B378},
        {0x1B380, 0x1B384},
        {0x1B388, 0x1B38C},
        {0x1B404, 0x1B408},
        {0x1B420, 0x1B428},
        {0x1B440, 0x1B444},
        {0x1B448, 0x1B44C},
        {0x1B450, 0x1B458},
        {0x1B45C, 0x1B468},
        {0x1B584, 0x1B58C},
        {0x1B68C, 0x1B690},
        {0x1B6AC, 0x1B6B0},
        {0x1B7F0, 0x1B7F8},
        {0x1C800, 0x1CC00},
        {0x1CE00, 0x1CE04},
        {0x1CF80, 0x1CF84},
        {0x1D200, 0x1D800},
        {0x1E000, 0x20014},
        {0x20100, 0x20124},
        {0x21400, 0x217A8},
        {0x21800, 0x21BA8},
        {0x21C00, 0x21FA8},
        {0x22000, 0x223A8},
        {0x22400, 0x227A8},
        {0x22800, 0x22BA8},
        {0x22C00, 0x22FA8},
        {0x23000, 0x233A8},
        {0x24000, 0x24034},

        /* EFUSE0,1,2 is disabled here
         * because its state may be reset
         *
         * {0x24800, 0x24804},
         * {0x25000, 0x25004},
         * {0x25800, 0x25804},
         */

        {0x26000, 0x26064},
        {0x27000, 0x27024},
        {0x34000, 0x3400C},
        {0x34400, 0x3445C},
        {0x34800, 0x3485C},
        {0x34C00, 0x34C5C},
        {0x35000, 0x3505C},
        {0x35400, 0x3545C},
        {0x35800, 0x3585C},
        {0x35C00, 0x35C5C},
        {0x36000, 0x3605C},
        {0x38000, 0x38064},
        {0x38070, 0x380E0},
        {0x3A000, 0x3A074},

        /* DBI windows is skipped here, it can be only accessed when pcie
         * is active (not in reset) and CORE_CTRL_PCIE_LTSSM_EN = 0 &&
         * PCIE_CTRL_APP_LTSSM_ENALBE=0.
         * {0x3C000 , 0x3C004},
         */

        {0x40000, 0x400A4},

        /* SI register is skipped here.
         * Because it will cause bus hang
         *
         * {0x50000, 0x50018},
         */

        {0x80000, 0x8000C},
        {0x80010, 0x80020},
};

static const struct ath10k_mem_section qca6174_hw30_register_sections[] = {
        {0x800, 0x810},
        {0x820, 0x82C},
        {0x830, 0x8F4},
        {0x90C, 0x91C},
        {0xA14, 0xA18},
        {0xA84, 0xA94},
        {0xAA8, 0xAD4},
        {0xADC, 0xB40},
        {0x1000, 0x10A4},
        {0x10BC, 0x111C},
        {0x1134, 0x1138},
        {0x1144, 0x114C},
        {0x1150, 0x115C},
        {0x1160, 0x1178},
        {0x1240, 0x1260},
        {0x2000, 0x207C},
        {0x3000, 0x3014},
        {0x4000, 0x4014},
        {0x5000, 0x5124},
        {0x6000, 0x6040},
        {0x6080, 0x60CC},
        {0x6100, 0x611C},
        {0x6140, 0x61D8},
        {0x6200, 0x6238},
        {0x6240, 0x628C},
        {0x62C0, 0x62EC},
        {0x6380, 0x63E8},
        {0x6400, 0x6440},
        {0x6480, 0x64CC},
        {0x6500, 0x651C},
        {0x6540, 0x6580},
        {0x6600, 0x6638},
        {0x6640, 0x668C},
        {0x66C0, 0x66EC},
        {0x6780, 0x67E8},
        {0x7080, 0x708C},
        {0x70C0, 0x70C8},
        {0x7400, 0x741C},
        {0x7440, 0x7454},
        {0x7800, 0x7818},
        {0x8000, 0x8004},
        {0x8010, 0x8064},
        {0x8080, 0x8084},
        {0x80A0, 0x80A4},
        {0x80C0, 0x80C4},
        {0x80E0, 0x80F4},
        {0x8100, 0x8104},
        {0x8110, 0x812C},
        {0x9000, 0x9004},
        {0x9800, 0x982C},
        {0x9830, 0x9838},
        {0x9840, 0x986C},
        {0x9870, 0x9898},
        {0x9A00, 0x9C00},
        {0xD580, 0xD59C},
        {0xF000, 0xF0E0},
        {0xF140, 0xF190},
        {0xF250, 0xF25C},
        {0xF260, 0xF268},
        {0xF26C, 0xF2A8},
        {0x10008, 0x1000C},
        {0x10014, 0x10018},
        {0x1001C, 0x10020},
        {0x10024, 0x10028},
        {0x10030, 0x10034},
        {0x10040, 0x10054},
        {0x10058, 0x1007C},
        {0x10080, 0x100C4},
        {0x100C8, 0x10114},
        {0x1012C, 0x10130},
        {0x10138, 0x10144},
        {0x10200, 0x10220},
        {0x10230, 0x10250},
        {0x10260, 0x10280},
        {0x10290, 0x102B0},
        {0x102C0, 0x102DC},
        {0x102E0, 0x102F4},
        {0x102FC, 0x1037C},
        {0x10380, 0x10390},
        {0x10800, 0x10828},
        {0x10840, 0x10844},
        {0x10880, 0x10884},
        {0x108C0, 0x108E8},
        {0x10900, 0x10928},
        {0x10940, 0x10944},
        {0x10980, 0x10984},
        {0x109C0, 0x109E8},
        {0x10A00, 0x10A28},
        {0x10A40, 0x10A50},
        {0x11000, 0x11028},
        {0x11030, 0x11034},
        {0x11038, 0x11068},
        {0x11070, 0x11074},
        {0x11078, 0x110A8},
        {0x110B0, 0x110B4},
        {0x110B8, 0x110E8},
        {0x110F0, 0x110F4},
        {0x110F8, 0x11128},
        {0x11138, 0x11144},
        {0x11178, 0x11180},
        {0x111B8, 0x111C0},
        {0x111F8, 0x11200},
        {0x11238, 0x1123C},
        {0x11270, 0x11274},
        {0x11278, 0x1127C},
        {0x112B0, 0x112B4},
        {0x112B8, 0x112BC},
        {0x112F0, 0x112F4},
        {0x112F8, 0x112FC},
        {0x11338, 0x1133C},
        {0x11378, 0x1137C},
        {0x113B8, 0x113BC},
        {0x113F8, 0x113FC},
        {0x11438, 0x11440},
        {0x11478, 0x11480},
        {0x114B8, 0x114BC},
        {0x114F8, 0x114FC},
        {0x11538, 0x1153C},
        {0x11578, 0x1157C},
        {0x115B8, 0x115BC},
        {0x115F8, 0x115FC},
        {0x11638, 0x1163C},
        {0x11678, 0x1167C},
        {0x116B8, 0x116BC},
        {0x116F8, 0x116FC},
        {0x11738, 0x1173C},
        {0x11778, 0x1177C},
        {0x117B8, 0x117BC},
        {0x117F8, 0x117FC},
        {0x17000, 0x1701C},
        {0x17020, 0x170AC},
        {0x18000, 0x18050},
        {0x18054, 0x18074},
        {0x18080, 0x180D4},
        {0x180DC, 0x18104},
        {0x18108, 0x1813C},
        {0x18144, 0x18148},
        {0x18168, 0x18174},
        {0x18178, 0x18180},
        {0x181C8, 0x181E0},
        {0x181E4, 0x181E8},
        {0x181EC, 0x1820C},
        {0x1825C, 0x18280},
        {0x18284, 0x18290},
        {0x18294, 0x182A0},
        {0x18300, 0x18304},
        {0x18314, 0x18320},
        {0x18328, 0x18350},
        {0x1835C, 0x1836C},
        {0x18370, 0x18390},
        {0x18398, 0x183AC},
        {0x183BC, 0x183D8},
        {0x183DC, 0x183F4},
        {0x18400, 0x186F4},
        {0x186F8, 0x1871C},
        {0x18720, 0x18790},
        {0x19800, 0x19830},
        {0x19834, 0x19840},
        {0x19880, 0x1989C},
        {0x198A4, 0x198B0},
        {0x198BC, 0x19900},
        {0x19C00, 0x19C88},
        {0x19D00, 0x19D20},
        {0x19E00, 0x19E7C},
        {0x19E80, 0x19E94},
        {0x19E98, 0x19EAC},
        {0x19EB0, 0x19EBC},
        {0x19F70, 0x19F74},
        {0x19F80, 0x19F8C},
        {0x19FA0, 0x19FB4},
        {0x19FC0, 0x19FD8},
        {0x1A000, 0x1A200},
        {0x1A204, 0x1A210},
        {0x1A228, 0x1A22C},
        {0x1A230, 0x1A248},
        {0x1A250, 0x1A270},
        {0x1A280, 0x1A290},
        {0x1A2A0, 0x1A2A4},
        {0x1A2C0, 0x1A2EC},
        {0x1A300, 0x1A3BC},
        {0x1A3F0, 0x1A3F4},
        {0x1A3F8, 0x1A434},
        {0x1A438, 0x1A444},
        {0x1A448, 0x1A468},
        {0x1A580, 0x1A58C},
        {0x1A644, 0x1A654},
        {0x1A670, 0x1A698},
        {0x1A6AC, 0x1A6B0},
        {0x1A6D0, 0x1A6D4},
        {0x1A6EC, 0x1A70C},
        {0x1A710, 0x1A738},
        {0x1A7C0, 0x1A7D0},
        {0x1A7D4, 0x1A7D8},
        {0x1A7DC, 0x1A7E4},
        {0x1A7F0, 0x1A7F8},
        {0x1A888, 0x1A89C},
        {0x1A8A8, 0x1A8AC},
        {0x1A8C0, 0x1A8DC},
        {0x1A8F0, 0x1A8FC},
        {0x1AE04, 0x1AE08},
        {0x1AE18, 0x1AE24},
        {0x1AF80, 0x1AF8C},
        {0x1AFA0, 0x1AFB4},
        {0x1B000, 0x1B200},
        {0x1B284, 0x1B288},
        {0x1B2D0, 0x1B2D8},
        {0x1B2DC, 0x1B2EC},
        {0x1B300, 0x1B340},
        {0x1B374, 0x1B378},
        {0x1B380, 0x1B384},
        {0x1B388, 0x1B38C},
        {0x1B404, 0x1B408},
        {0x1B420, 0x1B428},
        {0x1B440, 0x1B444},
        {0x1B448, 0x1B44C},
        {0x1B450, 0x1B458},
        {0x1B45C, 0x1B468},
        {0x1B584, 0x1B58C},
        {0x1B68C, 0x1B690},
        {0x1B6AC, 0x1B6B0},
        {0x1B7F0, 0x1B7F8},
        {0x1C800, 0x1CC00},
        {0x1CE00, 0x1CE04},
        {0x1CF80, 0x1CF84},
        {0x1D200, 0x1D800},
        {0x1E000, 0x20014},
        {0x20100, 0x20124},
        {0x21400, 0x217A8},
        {0x21800, 0x21BA8},
        {0x21C00, 0x21FA8},
        {0x22000, 0x223A8},
        {0x22400, 0x227A8},
        {0x22800, 0x22BA8},
        {0x22C00, 0x22FA8},
        {0x23000, 0x233A8},
        {0x24000, 0x24034},
        {0x26000, 0x26064},
        {0x27000, 0x27024},
        {0x34000, 0x3400C},
        {0x34400, 0x3445C},
        {0x34800, 0x3485C},
        {0x34C00, 0x34C5C},
        {0x35000, 0x3505C},
        {0x35400, 0x3545C},
        {0x35800, 0x3585C},
        {0x35C00, 0x35C5C},
        {0x36000, 0x3605C},
        {0x38000, 0x38064},
        {0x38070, 0x380E0},
        {0x3A000, 0x3A074},
        {0x40000, 0x400A4},
        {0x80000, 0x8000C},
        {0x80010, 0x80020},
};

static const struct ath10k_mem_region qca6174_hw10_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x70000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,

                /* RTC_SOC_BASE_ADDRESS */
                .start = 0x0,

                /* WLAN_MBOX_BASE_ADDRESS - RTC_SOC_BASE_ADDRESS */
                .len = 0x800 - 0x0,

                .name = "REG_PART1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,

                /* STEREO_BASE_ADDRESS */
                .start = 0x27000,

                /* USB_BASE_ADDRESS - STEREO_BASE_ADDRESS */
                .len = 0x60000 - 0x27000,

                .name = "REG_PART2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_mem_region qca6174_hw21_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x70000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_AXI,
                .start = 0xa0000,
                .len = 0x18000,
                .name = "AXI",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x800,
                .len = 0x80020 - 0x800,
                .name = "REG_TOTAL",
                .section_table = {
                        .sections = qca6174_hw21_register_sections,
                        .size = ARRAY_SIZE(qca6174_hw21_register_sections),
                },
        },
};

static const struct ath10k_mem_region qca6174_hw30_sdio_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0xa8000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_AXI,
                .start = 0xa0000,
                .len = 0x18000,
                .name = "AXI",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IRAM1,
                .start = 0x00980000,
                .len = 0x00080000,
                .name = "IRAM1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IRAM2,
                .start = 0x00a00000,
                .len = 0x00040000,
                .name = "IRAM2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x800,
                .len = 0x80020 - 0x800,
                .name = "REG_TOTAL",
                .section_table = {
                        .sections = qca6174_hw30_sdio_register_sections,
                        .size = ARRAY_SIZE(qca6174_hw30_sdio_register_sections),
                },
        },
};

static const struct ath10k_mem_region qca6174_hw30_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0xa8000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_AXI,
                .start = 0xa0000,
                .len = 0x18000,
                .name = "AXI",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x800,
                .len = 0x80020 - 0x800,
                .name = "REG_TOTAL",
                .section_table = {
                        .sections = qca6174_hw30_register_sections,
                        .size = ARRAY_SIZE(qca6174_hw30_register_sections),
                },
        },

        /* IRAM dump must be put last */
        {
                .type = ATH10K_MEM_REGION_TYPE_IRAM1,
                .start = 0x00980000,
                .len = 0x00080000,
                .name = "IRAM1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IRAM2,
                .start = 0x00a00000,
                .len = 0x00040000,
                .name = "IRAM2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_mem_region qca988x_hw20_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x50000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x4000,
                .len = 0x2000,
                .name = "REG_PART1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x8000,
                .len = 0x58000,
                .name = "REG_PART2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_mem_region qca99x0_hw20_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x60000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x980000,
                .len = 0x50000,
                .name = "IRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOSRAM,
                .start = 0xC0000,
                .len = 0x40000,
                .name = "SRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x30000,
                .len = 0x7000,
                .name = "APB REG 1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x3f000,
                .len = 0x3000,
                .name = "APB REG 2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x43000,
                .len = 0x3000,
                .name = "WIFI REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x4A000,
                .len = 0x5000,
                .name = "CE REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x80000,
                .len = 0x6000,
                .name = "SOC REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_mem_region qca9984_hw10_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x80000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x980000,
                .len = 0x50000,
                .name = "IRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOSRAM,
                .start = 0xC0000,
                .len = 0x40000,
                .name = "SRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x30000,
                .len = 0x7000,
                .name = "APB REG 1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x3f000,
                .len = 0x3000,
                .name = "APB REG 2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x43000,
                .len = 0x3000,
                .name = "WIFI REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x4A000,
                .len = 0x5000,
                .name = "CE REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x80000,
                .len = 0x6000,
                .name = "SOC REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_mem_section ipq4019_soc_reg_range[] = {
        {0x080000, 0x080004},
        {0x080020, 0x080024},
        {0x080028, 0x080050},
        {0x0800d4, 0x0800ec},
        {0x08010c, 0x080118},
        {0x080284, 0x080290},
        {0x0802a8, 0x0802b8},
        {0x0802dc, 0x08030c},
        {0x082000, 0x083fff}
};

static const struct ath10k_mem_region qca4019_hw10_mem_regions[] = {
        {
                .type = ATH10K_MEM_REGION_TYPE_DRAM,
                .start = 0x400000,
                .len = 0x68000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0xC0000,
                .len = 0x40000,
                .name = "SRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x980000,
                .len = 0x50000,
                .name = "IRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x30000,
                .len = 0x7000,
                .name = "APB REG 1",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x3f000,
                .len = 0x3000,
                .name = "APB REG 2",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x43000,
                .len = 0x3000,
                .name = "WIFI REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_IOREG,
                .start = 0x4A000,
                .len = 0x5000,
                .name = "CE REG",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
        {
                .type = ATH10K_MEM_REGION_TYPE_REG,
                .start = 0x080000,
                .len = 0x083fff - 0x080000,
                .name = "REG_TOTAL",
                .section_table = {
                        .sections = ipq4019_soc_reg_range,
                        .size = ARRAY_SIZE(ipq4019_soc_reg_range),
                },
        },
};

static const struct ath10k_mem_region wcn399x_hw10_mem_regions[] = {
        {
                /* MSA region start is not fixed, hence it is assigned at runtime */
                .type = ATH10K_MEM_REGION_TYPE_MSA,
                .len = 0x100000,
                .name = "DRAM",
                .section_table = {
                        .sections = NULL,
                        .size = 0,
                },
        },
};

static const struct ath10k_hw_mem_layout hw_mem_layouts[] = {
        {
                .hw_id = QCA6174_HW_1_0_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw10_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_1_1_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw10_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_1_3_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw10_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_2_1_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw21_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw21_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_3_0_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw30_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw30_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_3_2_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw30_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw30_mem_regions),
                },
        },
        {
                .hw_id = QCA6174_HW_3_2_VERSION,
                .hw_rev = ATH10K_HW_QCA6174,
                .bus = ATH10K_BUS_SDIO,
                .region_table = {
                        .regions = qca6174_hw30_sdio_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw30_sdio_mem_regions),
                },
        },
        {
                .hw_id = QCA9377_HW_1_1_DEV_VERSION,
                .hw_rev = ATH10K_HW_QCA9377,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca6174_hw30_mem_regions,
                        .size = ARRAY_SIZE(qca6174_hw30_mem_regions),
                },
        },
        {
                .hw_id = QCA988X_HW_2_0_VERSION,
                .hw_rev = ATH10K_HW_QCA988X,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca988x_hw20_mem_regions,
                        .size = ARRAY_SIZE(qca988x_hw20_mem_regions),
                },
        },
        {
                .hw_id = QCA9984_HW_1_0_DEV_VERSION,
                .hw_rev = ATH10K_HW_QCA9984,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca9984_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca9984_hw10_mem_regions),
                },
        },
        {
                .hw_id = QCA9888_HW_2_0_DEV_VERSION,
                .hw_rev = ATH10K_HW_QCA9888,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca9984_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca9984_hw10_mem_regions),
                },
        },
        {
                .hw_id = QCA99X0_HW_2_0_DEV_VERSION,
                .hw_rev = ATH10K_HW_QCA99X0,
                .bus = ATH10K_BUS_PCI,
                .region_table = {
                        .regions = qca99x0_hw20_mem_regions,
                        .size = ARRAY_SIZE(qca99x0_hw20_mem_regions),
                },
        },
        {
                .hw_id = QCA4019_HW_1_0_DEV_VERSION,
                .hw_rev = ATH10K_HW_QCA4019,
                .bus = ATH10K_BUS_AHB,
                .region_table = {
                        .regions = qca4019_hw10_mem_regions,
                        .size = ARRAY_SIZE(qca4019_hw10_mem_regions),
                },
        },
        {
                .hw_id = WCN3990_HW_1_0_DEV_VERSION,
                .hw_rev = ATH10K_HW_WCN3990,
                .bus = ATH10K_BUS_SNOC,
                .region_table = {
                        .regions = wcn399x_hw10_mem_regions,
                        .size = ARRAY_SIZE(wcn399x_hw10_mem_regions),
                },
        },
};

static u32 ath10k_coredump_get_ramdump_size(struct ath10k *ar)
{
        const struct ath10k_hw_mem_layout *hw;
        const struct ath10k_mem_region *mem_region;
        size_t size = 0;
        int i;

        hw = ath10k_coredump_get_mem_layout(ar);

        if (!hw)
                return 0;

        mem_region = &hw->region_table.regions[0];

        for (i = 0; i < hw->region_table.size; i++) {
                size += mem_region->len;
                mem_region++;
        }

        /* reserve space for the headers */
        size += hw->region_table.size * sizeof(struct ath10k_dump_ram_data_hdr);

        /* make sure it is aligned 16 bytes for debug message print out */
        size = ALIGN(size, 16);

        return size;
}

const struct ath10k_hw_mem_layout *ath10k_coredump_get_mem_layout(struct ath10k *ar)
{
        if (!test_bit(ATH10K_FW_CRASH_DUMP_RAM_DATA, &ath10k_coredump_mask))
                return NULL;

        return _ath10k_coredump_get_mem_layout(ar);
}
EXPORT_SYMBOL(ath10k_coredump_get_mem_layout);

const struct ath10k_hw_mem_layout *_ath10k_coredump_get_mem_layout(struct ath10k *ar)
{
        int i;

        if (WARN_ON(ar->target_version == 0))
                return NULL;

        for (i = 0; i < ARRAY_SIZE(hw_mem_layouts); i++) {
                if (ar->target_version == hw_mem_layouts[i].hw_id &&
                    ar->hw_rev == hw_mem_layouts[i].hw_rev &&
                    hw_mem_layouts[i].bus == ar->hif.bus)
                        return &hw_mem_layouts[i];
        }

        return NULL;
}

struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar)
{
        struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;

        lockdep_assert_held(&ar->dump_mutex);

        if (ath10k_coredump_mask == 0)
                /* coredump disabled */
                return NULL;

        guid_gen(&crash_data->guid);
        ktime_get_real_ts64(&crash_data->timestamp);

        return crash_data;
}
EXPORT_SYMBOL(ath10k_coredump_new);

static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
{
        struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;
        struct ath10k_ce_crash_hdr *ce_hdr;
        struct ath10k_dump_file_data *dump_data;
        struct ath10k_tlv_dump_data *dump_tlv;
        size_t hdr_len = sizeof(*dump_data);
        size_t len, sofar = 0;
        unsigned char *buf;

        len = hdr_len;

        if (test_bit(ATH10K_FW_CRASH_DUMP_REGISTERS, &ath10k_coredump_mask))
                len += sizeof(*dump_tlv) + sizeof(crash_data->registers);

        if (test_bit(ATH10K_FW_CRASH_DUMP_CE_DATA, &ath10k_coredump_mask))
                len += sizeof(*dump_tlv) + sizeof(*ce_hdr) +
                        CE_COUNT * sizeof(ce_hdr->entries[0]);

        if (test_bit(ATH10K_FW_CRASH_DUMP_RAM_DATA, &ath10k_coredump_mask))
                len += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;

        sofar += hdr_len;

        /* This is going to get big when we start dumping FW RAM and such,
         * so go ahead and use vmalloc.
         */
        buf = vzalloc(len);
        if (!buf)
                return NULL;

        mutex_lock(&ar->dump_mutex);

        dump_data = (struct ath10k_dump_file_data *)(buf);
        strscpy(dump_data->df_magic, "ATH10K-FW-DUMP",
                sizeof(dump_data->df_magic));
        dump_data->len = cpu_to_le32(len);

        dump_data->version = cpu_to_le32(ATH10K_FW_CRASH_DUMP_VERSION);

        guid_copy(&dump_data->guid, &crash_data->guid);
        dump_data->chip_id = cpu_to_le32(ar->bus_param.chip_id);
        dump_data->bus_type = cpu_to_le32(0);
        dump_data->target_version = cpu_to_le32(ar->target_version);
        dump_data->fw_version_major = cpu_to_le32(ar->fw_version_major);
        dump_data->fw_version_minor = cpu_to_le32(ar->fw_version_minor);
        dump_data->fw_version_release = cpu_to_le32(ar->fw_version_release);
        dump_data->fw_version_build = cpu_to_le32(ar->fw_version_build);
        dump_data->phy_capability = cpu_to_le32(ar->phy_capability);
        dump_data->hw_min_tx_power = cpu_to_le32(ar->hw_min_tx_power);
        dump_data->hw_max_tx_power = cpu_to_le32(ar->hw_max_tx_power);
        dump_data->ht_cap_info = cpu_to_le32(ar->ht_cap_info);
        dump_data->vht_cap_info = cpu_to_le32(ar->vht_cap_info);
        dump_data->num_rf_chains = cpu_to_le32(ar->num_rf_chains);

        strscpy(dump_data->fw_ver, ar->hw->wiphy->fw_version,
                sizeof(dump_data->fw_ver));

        dump_data->kernel_ver_code = 0;
        strscpy(dump_data->kernel_ver, init_utsname()->release,
                sizeof(dump_data->kernel_ver));

        dump_data->tv_sec = cpu_to_le64(crash_data->timestamp.tv_sec);
        dump_data->tv_nsec = cpu_to_le64(crash_data->timestamp.tv_nsec);

        if (test_bit(ATH10K_FW_CRASH_DUMP_REGISTERS, &ath10k_coredump_mask)) {
                dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
                dump_tlv->type = cpu_to_le32(ATH10K_FW_CRASH_DUMP_REGISTERS);
                dump_tlv->tlv_len = cpu_to_le32(sizeof(crash_data->registers));
                memcpy(dump_tlv->tlv_data, &crash_data->registers,
                       sizeof(crash_data->registers));
                sofar += sizeof(*dump_tlv) + sizeof(crash_data->registers);
        }

        if (test_bit(ATH10K_FW_CRASH_DUMP_CE_DATA, &ath10k_coredump_mask)) {
                dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
                dump_tlv->type = cpu_to_le32(ATH10K_FW_CRASH_DUMP_CE_DATA);
                dump_tlv->tlv_len = cpu_to_le32(struct_size(ce_hdr, entries,
                                                            CE_COUNT));
                ce_hdr = (struct ath10k_ce_crash_hdr *)(dump_tlv->tlv_data);
                ce_hdr->ce_count = cpu_to_le32(CE_COUNT);
                memset(ce_hdr->reserved, 0, sizeof(ce_hdr->reserved));
                memcpy(ce_hdr->entries, crash_data->ce_crash_data,
                       CE_COUNT * sizeof(ce_hdr->entries[0]));
                sofar += sizeof(*dump_tlv) + sizeof(*ce_hdr) +
                        CE_COUNT * sizeof(ce_hdr->entries[0]);
        }

        /* Gather ram dump */
        if (test_bit(ATH10K_FW_CRASH_DUMP_RAM_DATA, &ath10k_coredump_mask)) {
                dump_tlv = (struct ath10k_tlv_dump_data *)(buf + sofar);
                dump_tlv->type = cpu_to_le32(ATH10K_FW_CRASH_DUMP_RAM_DATA);
                dump_tlv->tlv_len = cpu_to_le32(crash_data->ramdump_buf_len);
                if (crash_data->ramdump_buf_len) {
                        memcpy(dump_tlv->tlv_data, crash_data->ramdump_buf,
                               crash_data->ramdump_buf_len);
                        sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;
                }
        }

        mutex_unlock(&ar->dump_mutex);

        return dump_data;
}

int ath10k_coredump_submit(struct ath10k *ar)
{
        struct ath10k_dump_file_data *dump;

        if (ath10k_coredump_mask == 0)
                /* coredump disabled */
                return 0;

        dump = ath10k_coredump_build(ar);
        if (!dump) {
                ath10k_warn(ar, "no crash dump data found for devcoredump");
                return -ENODATA;
        }

        dev_coredumpv(ar->dev, dump, le32_to_cpu(dump->len), GFP_KERNEL);

        return 0;
}

int ath10k_coredump_create(struct ath10k *ar)
{
        if (ath10k_coredump_mask == 0)
                /* coredump disabled */
                return 0;

        ar->coredump.fw_crash_data = vzalloc(sizeof(*ar->coredump.fw_crash_data));
        if (!ar->coredump.fw_crash_data)
                return -ENOMEM;

        return 0;
}

int ath10k_coredump_register(struct ath10k *ar)
{
        struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;

        if (test_bit(ATH10K_FW_CRASH_DUMP_RAM_DATA, &ath10k_coredump_mask)) {
                crash_data->ramdump_buf_len = ath10k_coredump_get_ramdump_size(ar);

                if (!crash_data->ramdump_buf_len)
                        return 0;

                crash_data->ramdump_buf = vzalloc(crash_data->ramdump_buf_len);
                if (!crash_data->ramdump_buf)
                        return -ENOMEM;
        }

        return 0;
}

void ath10k_coredump_unregister(struct ath10k *ar)
{
        struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;

        vfree(crash_data->ramdump_buf);
}

void ath10k_coredump_destroy(struct ath10k *ar)
{
        if (ar->coredump.fw_crash_data->ramdump_buf) {
                vfree(ar->coredump.fw_crash_data->ramdump_buf);
                ar->coredump.fw_crash_data->ramdump_buf = NULL;
                ar->coredump.fw_crash_data->ramdump_buf_len = 0;
        }

        vfree(ar->coredump.fw_crash_data);
        ar->coredump.fw_crash_data = NULL;
}






























































































    4 




















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PGTABLE_64_H
#define _ASM_X86_PGTABLE_64_H

#include <linux/const.h>
#include <asm/pgtable_64_types.h>

#ifndef __ASSEMBLY__

/*
 * This file contains the functions and defines necessary to modify and use
 * the x86-64 page table tree.
 */
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
#include <asm/fixmap.h>

extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];

#define swapper_pg_dir init_top_pgt

extern void paging_init(void);
static inline void sync_initial_page_table(void) { }

#define pte_ERROR(e)                                        \
        pr_err("%s:%d: bad pte %p(%016lx)\n",                \
               __FILE__, __LINE__, &(e), pte_val(e))
#define pmd_ERROR(e)                                        \
        pr_err("%s:%d: bad pmd %p(%016lx)\n",                \
               __FILE__, __LINE__, &(e), pmd_val(e))
#define pud_ERROR(e)                                        \
        pr_err("%s:%d: bad pud %p(%016lx)\n",                \
               __FILE__, __LINE__, &(e), pud_val(e))

#if CONFIG_PGTABLE_LEVELS >= 5
#define p4d_ERROR(e)                                        \
        pr_err("%s:%d: bad p4d %p(%016lx)\n",                \
               __FILE__, __LINE__, &(e), p4d_val(e))
#endif

#define pgd_ERROR(e)                                        \
        pr_err("%s:%d: bad pgd %p(%016lx)\n",                \
               __FILE__, __LINE__, &(e), pgd_val(e))

struct mm_struct;

#define mm_p4d_folded mm_p4d_folded
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
        return !pgtable_l5_enabled();
}

void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);

static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
        WRITE_ONCE(*ptep, pte);
}

static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
                                    pte_t *ptep)
{
        native_set_pte(ptep, native_make_pte(0));
}

static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
        native_set_pte(ptep, pte);
}

static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
        WRITE_ONCE(*pmdp, pmd);
}

static inline void native_pmd_clear(pmd_t *pmd)
{
        native_set_pmd(pmd, native_make_pmd(0));
}

static inline pte_t native_ptep_get_and_clear(pte_t *xp)
{
#ifdef CONFIG_SMP
        return native_make_pte(xchg(&xp->pte, 0));
#else
        /* native_local_ptep_get_and_clear,
           but duplicated because of cyclic dependency */
        pte_t ret = *xp;
        native_pte_clear(NULL, 0, xp);
        return ret;
#endif
}

static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
{
#ifdef CONFIG_SMP
        return native_make_pmd(xchg(&xp->pmd, 0));
#else
        /* native_local_pmdp_get_and_clear,
           but duplicated because of cyclic dependency */
        pmd_t ret = *xp;
        native_pmd_clear(xp);
        return ret;
#endif
}

static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
        WRITE_ONCE(*pudp, pud);
}

static inline void native_pud_clear(pud_t *pud)
{
        native_set_pud(pud, native_make_pud(0));
}

static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
#ifdef CONFIG_SMP
        return native_make_pud(xchg(&xp->pud, 0));
#else
        /* native_local_pudp_get_and_clear,
         * but duplicated because of cyclic dependency
         */
        pud_t ret = *xp;

        native_pud_clear(xp);
        return ret;
#endif
}

static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
        pgd_t pgd;

        if (pgtable_l5_enabled() ||
            !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) {
                WRITE_ONCE(*p4dp, p4d);
                return;
        }

        pgd = native_make_pgd(native_p4d_val(p4d));
        pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
        WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd)));
}

static inline void native_p4d_clear(p4d_t *p4d)
{
        native_set_p4d(p4d, native_make_p4d(0));
}

static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
        WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd));
}

static inline void native_pgd_clear(pgd_t *pgd)
{
        native_set_pgd(pgd, native_make_pgd(0));
}

/*
 * Conversion functions: convert a page and protection to a page entry,
 * and a page entry and page directory to the page they refer to.
 */

/* PGD - Level 4 access */

/* PUD - Level 3 access */

/* PMD - Level 2 access */

/* PTE - Level 1 access */

/*
 * Encode and de-code a swap entry
 *
 * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2| 1|0| <- bit number
 * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
 * | TYPE (59-63) | ~OFFSET (9-58)  |0|0|X|X| X| E|F|SD|0| <- swp entry
 *
 * G (8) is aliased and used as a PROT_NONE indicator for
 * !present ptes.  We need to start storing swap entries above
 * there.  We also need to avoid using A and D because of an
 * erratum where they can be incorrectly set by hardware on
 * non-present PTEs.
 *
 * SD Bits 1-4 are not used in non-present format and available for
 * special use described below:
 *
 * SD (1) in swp entry is used to store soft dirty bit, which helps us
 * remember soft dirty over page migration
 *
 * F (2) in swp entry is used to record when a pagetable is
 * writeprotected by userfaultfd WP support.
 *
 * E (3) in swp entry is used to remember PG_anon_exclusive.
 *
 * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
 * but also L and G.
 *
 * The offset is inverted by a binary not operation to make the high
 * physical bits set.
 */
#define SWP_TYPE_BITS                5

#define SWP_OFFSET_FIRST_BIT        (_PAGE_BIT_PROTNONE + 1)

/* We always extract/encode the offset by shifting it all the way up, and then down again */
#define SWP_OFFSET_SHIFT        (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS)

#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)

/* Extract the high bits for type */
#define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS))

/* Shift up (to get rid of type), then down to get value */
#define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT)

/*
 * Shift the offset up "too far" by TYPE bits, then down again
 * The offset is inverted by a binary not operation to make the high
 * physical bits set.
 */
#define __swp_entry(type, offset) ((swp_entry_t) { \
        (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \
        | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) })

#define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val((pte)) })
#define __pmd_to_swp_entry(pmd)                ((swp_entry_t) { pmd_val((pmd)) })
#define __swp_entry_to_pte(x)                (__pte((x).val))
#define __swp_entry_to_pmd(x)                (__pmd((x).val))

extern void cleanup_highmap(void);

#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN

#define PAGE_AGP    PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1

/* fs/proc/kcore.c */
#define        kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
#define        kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)

#define __HAVE_ARCH_PTE_SAME

#define vmemmap ((struct page *)VMEMMAP_START)

extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);

#define gup_fast_permitted gup_fast_permitted
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
        if (end >> __VIRTUAL_MASK_SHIFT)
                return false;
        return true;
}

#include <asm/pgtable-invert.h>

#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */







































































































































































































   82 






























  299 














































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MSR_H
#define _ASM_X86_MSR_H

#include "msr-index.h"

#ifndef __ASSEMBLY__

#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>

#include <linux/percpu.h>

struct msr_info {
        u32                        msr_no;
        struct msr                reg;
        struct msr __percpu        *msrs;
        int                        err;
};

struct msr_regs_info {
        u32 *regs;
        int err;
};

struct saved_msr {
        bool valid;
        struct msr_info info;
};

struct saved_msrs {
        unsigned int num;
        struct saved_msr *array;
};

/*
 * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
 * constraint has different meanings. For i386, "A" means exactly
 * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
 * it means rax *or* rdx.
 */
#ifdef CONFIG_X86_64
/* Using 64-bit values saves one instruction clearing the high half of low */
#define DECLARE_ARGS(val, low, high)        unsigned long low, high
#define EAX_EDX_VAL(val, low, high)        ((low) | (high) << 32)
#define EAX_EDX_RET(val, low, high)        "=a" (low), "=d" (high)
#else
#define DECLARE_ARGS(val, low, high)        unsigned long long val
#define EAX_EDX_VAL(val, low, high)        (val)
#define EAX_EDX_RET(val, low, high)        "=A" (val)
#endif

/*
 * Be very careful with includes. This header is prone to include loops.
 */
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>

#ifdef CONFIG_TRACEPOINTS
DECLARE_TRACEPOINT(read_msr);
DECLARE_TRACEPOINT(write_msr);
DECLARE_TRACEPOINT(rdpmc);
extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
#else
static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
#endif

/*
 * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR
 * accessors and should not have any tracing or other functionality piggybacking
 * on them - those are *purely* for accessing MSRs and nothing more. So don't even
 * think of extending them - you will be slapped with a stinking trout or a frozen
 * shark will reach you, wherever you are! You've been warned.
 */
static __always_inline unsigned long long __rdmsr(unsigned int msr)
{
        DECLARE_ARGS(val, low, high);

        asm volatile("1: rdmsr\n"
                     "2:\n"
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
                     : EAX_EDX_RET(val, low, high) : "c" (msr));

        return EAX_EDX_VAL(val, low, high);
}

static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
{
        asm volatile("1: wrmsr\n"
                     "2:\n"
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
                     : : "c" (msr), "a"(low), "d" (high) : "memory");
}

/*
 * WRMSRNS behaves exactly like WRMSR with the only difference being
 * that it is not a serializing instruction by default.
 */
static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
{
        /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
        asm volatile("1: .byte 0x0f,0x01,0xc6\n"
                     "2:\n"
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
                     : : "c" (msr), "a"(low), "d" (high));
}

#define native_rdmsr(msr, val1, val2)                        \
do {                                                        \
        u64 __val = __rdmsr((msr));                        \
        (void)((val1) = (u32)__val);                        \
        (void)((val2) = (u32)(__val >> 32));                \
} while (0)

#define native_wrmsr(msr, low, high)                        \
        __wrmsr(msr, low, high)

#define native_wrmsrl(msr, val)                                \
        __wrmsr((msr), (u32)((u64)(val)),                \
                       (u32)((u64)(val) >> 32))

static inline unsigned long long native_read_msr(unsigned int msr)
{
        unsigned long long val;

        val = __rdmsr(msr);

        if (tracepoint_enabled(read_msr))
                do_trace_read_msr(msr, val, 0);

        return val;
}

static inline unsigned long long native_read_msr_safe(unsigned int msr,
                                                      int *err)
{
        DECLARE_ARGS(val, low, high);

        asm volatile("1: rdmsr ; xor %[err],%[err]\n"
                     "2:\n\t"
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
                     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
                     : "c" (msr));
        if (tracepoint_enabled(read_msr))
                do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
        return EAX_EDX_VAL(val, low, high);
}

/* Can be uninlined because referenced by paravirt */
static inline void notrace
native_write_msr(unsigned int msr, u32 low, u32 high)
{
        __wrmsr(msr, low, high);

        if (tracepoint_enabled(write_msr))
                do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}

/* Can be uninlined because referenced by paravirt */
static inline int notrace
native_write_msr_safe(unsigned int msr, u32 low, u32 high)
{
        int err;

        asm volatile("1: wrmsr ; xor %[err],%[err]\n"
                     "2:\n\t"
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
                     : [err] "=a" (err)
                     : "c" (msr), "0" (low), "d" (high)
                     : "memory");
        if (tracepoint_enabled(write_msr))
                do_trace_write_msr(msr, ((u64)high << 32 | low), err);
        return err;
}

extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);

/**
 * rdtsc() - returns the current TSC without ordering constraints
 *
 * rdtsc() returns the result of RDTSC as a 64-bit integer.  The
 * only ordering constraint it supplies is the ordering implied by
 * "asm volatile": it will put the RDTSC in the place you expect.  The
 * CPU can and will speculatively execute that RDTSC, though, so the
 * results can be non-monotonic if compared on different CPUs.
 */
static __always_inline unsigned long long rdtsc(void)
{
        DECLARE_ARGS(val, low, high);

        asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));

        return EAX_EDX_VAL(val, low, high);
}

/**
 * rdtsc_ordered() - read the current TSC in program order
 *
 * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
 * It is ordered like a load to a global in-memory counter.  It should
 * be impossible to observe non-monotonic rdtsc_unordered() behavior
 * across multiple CPUs as long as the TSC is synced.
 */
static __always_inline unsigned long long rdtsc_ordered(void)
{
        DECLARE_ARGS(val, low, high);

        /*
         * The RDTSC instruction is not ordered relative to memory
         * access.  The Intel SDM and the AMD APM are both vague on this
         * point, but empirically an RDTSC instruction can be
         * speculatively executed before prior loads.  An RDTSC
         * immediately after an appropriate barrier appears to be
         * ordered as a normal load, that is, it provides the same
         * ordering guarantees as reading from a global memory location
         * that some other imaginary CPU is updating continuously with a
         * time stamp.
         *
         * Thus, use the preferred barrier on the respective CPU, aiming for
         * RDTSCP as the default.
         */
        asm volatile(ALTERNATIVE_2("rdtsc",
                                   "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
                                   "rdtscp", X86_FEATURE_RDTSCP)
                        : EAX_EDX_RET(val, low, high)
                        /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
                        :: "ecx");

        return EAX_EDX_VAL(val, low, high);
}

static inline unsigned long long native_read_pmc(int counter)
{
        DECLARE_ARGS(val, low, high);

        asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
        if (tracepoint_enabled(rdpmc))
                do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
        return EAX_EDX_VAL(val, low, high);
}

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#include <linux/errno.h>
/*
 * Access to machine-specific registers (available on 586 and better only)
 * Note: the rd* operations modify the parameters directly (without using
 * pointer indirection), this allows gcc to optimize better
 */

#define rdmsr(msr, low, high)                                        \
do {                                                                \
        u64 __val = native_read_msr((msr));                        \
        (void)((low) = (u32)__val);                                \
        (void)((high) = (u32)(__val >> 32));                        \
} while (0)

static inline void wrmsr(unsigned int msr, u32 low, u32 high)
{
        native_write_msr(msr, low, high);
}

#define rdmsrl(msr, val)                        \
        ((val) = native_read_msr((msr)))

static inline void wrmsrl(unsigned int msr, u64 val)
{
        native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
}

/* wrmsr with exception handling */
static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
{
        return native_write_msr_safe(msr, low, high);
}

/* rdmsr with exception handling */
#define rdmsr_safe(msr, low, high)                                \
({                                                                \
        int __err;                                                \
        u64 __val = native_read_msr_safe((msr), &__err);        \
        (*low) = (u32)__val;                                        \
        (*high) = (u32)(__val >> 32);                                \
        __err;                                                        \
})

static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
{
        int err;

        *p = native_read_msr_safe(msr, &err);
        return err;
}

#define rdpmc(counter, low, high)                        \
do {                                                        \
        u64 _l = native_read_pmc((counter));                \
        (low)  = (u32)_l;                                \
        (high) = (u32)(_l >> 32);                        \
} while (0)

#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))

#endif        /* !CONFIG_PARAVIRT_XXL */

static __always_inline void wrmsrns(u32 msr, u64 val)
{
        __wrmsrns(msr, val, val >> 32);
}

/*
 * 64-bit version of wrmsr_safe():
 */
static inline int wrmsrl_safe(u32 msr, u64 val)
{
        return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
}

struct msr __percpu *msrs_alloc(void);
void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);

#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
#else  /*  CONFIG_SMP  */
static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
        rdmsr(msr_no, *l, *h);
        return 0;
}
static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
        wrmsr(msr_no, l, h);
        return 0;
}
static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
        rdmsrl(msr_no, *q);
        return 0;
}
static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
        wrmsrl(msr_no, q);
        return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
                                struct msr __percpu *msrs)
{
        rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
                                struct msr __percpu *msrs)
{
        wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
                                    u32 *l, u32 *h)
{
        return rdmsr_safe(msr_no, l, h);
}
static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
        return wrmsr_safe(msr_no, l, h);
}
static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
        return rdmsrl_safe(msr_no, q);
}
static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
{
        return wrmsrl_safe(msr_no, q);
}
static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
        return rdmsr_safe_regs(regs);
}
static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
        return wrmsr_safe_regs(regs);
}
#endif  /* CONFIG_SMP */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_MSR_H */













































   14 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *  include/linux/eventpoll.h ( Efficient event polling implementation )
 *  Copyright (C) 2001,...,2006         Davide Libenzi
 *
 *  Davide Libenzi <davidel@xmailserver.org>
 */
#ifndef _LINUX_EVENTPOLL_H
#define _LINUX_EVENTPOLL_H

#include <uapi/linux/eventpoll.h>
#include <uapi/linux/kcmp.h>


/* Forward declarations to avoid compiler errors */
struct file;


#ifdef CONFIG_EPOLL

#ifdef CONFIG_KCMP
struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
#endif

/* Used to release the epoll bits inside the "struct file" */
void eventpoll_release_file(struct file *file);

/*
 * This is called from inside fs/file_table.c:__fput() to unlink files
 * from the eventpoll interface. We need to have this facility to cleanup
 * correctly files that are closed without being removed from the eventpoll
 * interface.
 */
static inline void eventpoll_release(struct file *file)
{

        /*
         * Fast check to avoid the get/release of the semaphore. Since
         * we're doing this outside the semaphore lock, it might return
         * false negatives, but we don't care. It'll help in 99.99% of cases
         * to avoid the semaphore lock. False positives simply cannot happen
         * because the file in on the way to be removed and nobody ( but
         * eventpoll ) has still a reference to this file.
         */
        if (likely(!file->f_ep))
                return;

        /*
         * The file is being closed while it is still linked to an epoll
         * descriptor. We need to handle this by correctly unlinking it
         * from its containers.
         */
        eventpoll_release_file(file);
}

int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
                 bool nonblock);

/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
        return op != EPOLL_CTL_DEL;
}

#else

static inline void eventpoll_release(struct file *file) {}

#endif

#if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT)
/* ARM OABI has an incompatible struct layout and needs a special handler */
extern struct epoll_event __user *
epoll_put_uevent(__poll_t revents, __u64 data,
                 struct epoll_event __user *uevent);
#else
static inline struct epoll_event __user *
epoll_put_uevent(__poll_t revents, __u64 data,
                 struct epoll_event __user *uevent)
{
        if (__put_user(revents, &uevent->events) ||
            __put_user(data, &uevent->data))
                return NULL;

        return uevent+1;
}
#endif

#endif /* #ifndef _LINUX_EVENTPOLL_H */













































































































































































































































































































































































































































































































































































































    3 






    3 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
// SPDX-License-Identifier: GPL-2.0
/******************************************************************************
 * usb_intf.c
 *
 * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved.
 * Linux device driver for RTL8192SU
 *
 * Modifications for inclusion into the Linux staging tree are
 * Copyright(c) 2010 Larry Finger. All rights reserved.
 *
 * Contact information:
 * WLAN FAE <wlanfae@realtek.com>
 * Larry Finger <Larry.Finger@lwfinger.net>
 *
 ******************************************************************************/

#define _HCI_INTF_C_

#include <linux/usb.h>
#include <linux/module.h>
#include <linux/firmware.h>

#include "osdep_service.h"
#include "drv_types.h"
#include "recv_osdep.h"
#include "xmit_osdep.h"
#include "rtl8712_efuse.h"
#include "usb_ops.h"
#include "usb_osintf.h"

static struct usb_interface *pintf;

static int r871xu_drv_init(struct usb_interface *pusb_intf,
                           const struct usb_device_id *pdid);

static void r871xu_dev_remove(struct usb_interface *pusb_intf);

static const struct usb_device_id rtl871x_usb_id_tbl[] = {
/* RTL8188SU */
        /* Realtek */
        {USB_DEVICE(0x0BDA, 0x8171)},
        {USB_DEVICE(0x0bda, 0x8173)},
        {USB_DEVICE(0x0bda, 0x8712)},
        {USB_DEVICE(0x0bda, 0x8713)},
        {USB_DEVICE(0x0bda, 0xC512)},
        /* Abocom */
        {USB_DEVICE(0x07B8, 0x8188)},
        /* ASUS */
        {USB_DEVICE(0x0B05, 0x1786)},
        {USB_DEVICE(0x0B05, 0x1791)}, /* 11n mode disable */
        /* Belkin */
        {USB_DEVICE(0x050D, 0x945A)},
        /* ISY IWL - Belkin clone */
        {USB_DEVICE(0x050D, 0x11F1)},
        /* Corega */
        {USB_DEVICE(0x07AA, 0x0047)},
        /* D-Link */
        {USB_DEVICE(0x2001, 0x3306)},
        {USB_DEVICE(0x07D1, 0x3306)}, /* 11n mode disable */
        /* Edimax */
        {USB_DEVICE(0x7392, 0x7611)},
        /* EnGenius */
        {USB_DEVICE(0x1740, 0x9603)},
        /* Hawking */
        {USB_DEVICE(0x0E66, 0x0016)},
        /* Hercules */
        {USB_DEVICE(0x06F8, 0xE034)},
        {USB_DEVICE(0x06F8, 0xE032)},
        /* Logitec */
        {USB_DEVICE(0x0789, 0x0167)},
        /* PCI */
        {USB_DEVICE(0x2019, 0xAB28)},
        {USB_DEVICE(0x2019, 0xED16)},
        /* Sitecom */
        {USB_DEVICE(0x0DF6, 0x0057)},
        {USB_DEVICE(0x0DF6, 0x0045)},
        {USB_DEVICE(0x0DF6, 0x0059)}, /* 11n mode disable */
        {USB_DEVICE(0x0DF6, 0x004B)},
        {USB_DEVICE(0x0DF6, 0x005B)},
        {USB_DEVICE(0x0DF6, 0x005D)},
        {USB_DEVICE(0x0DF6, 0x0063)},
        /* Sweex */
        {USB_DEVICE(0x177F, 0x0154)},
        /* Thinkware */
        {USB_DEVICE(0x0BDA, 0x5077)},
        /* Toshiba */
        {USB_DEVICE(0x1690, 0x0752)},
        /* - */
        {USB_DEVICE(0x20F4, 0x646B)},
        {USB_DEVICE(0x083A, 0xC512)},
        {USB_DEVICE(0x25D4, 0x4CA1)},
        {USB_DEVICE(0x25D4, 0x4CAB)},

/* RTL8191SU */
        /* Realtek */
        {USB_DEVICE(0x0BDA, 0x8172)},
        {USB_DEVICE(0x0BDA, 0x8192)},
        /* Amigo */
        {USB_DEVICE(0x0EB0, 0x9061)},
        /* ASUS/EKB */
        {USB_DEVICE(0x13D3, 0x3323)},
        {USB_DEVICE(0x13D3, 0x3311)}, /* 11n mode disable */
        {USB_DEVICE(0x13D3, 0x3342)},
        /* ASUS/EKBLenovo */
        {USB_DEVICE(0x13D3, 0x3333)},
        {USB_DEVICE(0x13D3, 0x3334)},
        {USB_DEVICE(0x13D3, 0x3335)}, /* 11n mode disable */
        {USB_DEVICE(0x13D3, 0x3336)}, /* 11n mode disable */
        /* ASUS/Media BOX */
        {USB_DEVICE(0x13D3, 0x3309)},
        /* Belkin */
        {USB_DEVICE(0x050D, 0x815F)},
        /* D-Link */
        {USB_DEVICE(0x07D1, 0x3302)},
        {USB_DEVICE(0x07D1, 0x3300)},
        {USB_DEVICE(0x07D1, 0x3303)},
        /* Edimax */
        {USB_DEVICE(0x7392, 0x7612)},
        /* EnGenius */
        {USB_DEVICE(0x1740, 0x9605)},
        /* Guillemot */
        {USB_DEVICE(0x06F8, 0xE031)},
        /* Hawking */
        {USB_DEVICE(0x0E66, 0x0015)},
        /* Mediao */
        {USB_DEVICE(0x13D3, 0x3306)},
        /* PCI */
        {USB_DEVICE(0x2019, 0xED18)},
        {USB_DEVICE(0x2019, 0x4901)},
        /* Sitecom */
        {USB_DEVICE(0x0DF6, 0x0058)},
        {USB_DEVICE(0x0DF6, 0x0049)},
        {USB_DEVICE(0x0DF6, 0x004C)},
        {USB_DEVICE(0x0DF6, 0x006C)},
        {USB_DEVICE(0x0DF6, 0x0064)},
        /* Skyworth */
        {USB_DEVICE(0x14b2, 0x3300)},
        {USB_DEVICE(0x14b2, 0x3301)},
        {USB_DEVICE(0x14B2, 0x3302)},
        /* - */
        {USB_DEVICE(0x04F2, 0xAFF2)},
        {USB_DEVICE(0x04F2, 0xAFF5)},
        {USB_DEVICE(0x04F2, 0xAFF6)},
        {USB_DEVICE(0x13D3, 0x3339)},
        {USB_DEVICE(0x13D3, 0x3340)}, /* 11n mode disable */
        {USB_DEVICE(0x13D3, 0x3341)}, /* 11n mode disable */
        {USB_DEVICE(0x13D3, 0x3310)},
        {USB_DEVICE(0x13D3, 0x3325)},

/* RTL8192SU */
        /* Realtek */
        {USB_DEVICE(0x0BDA, 0x8174)},
        /* Belkin */
        {USB_DEVICE(0x050D, 0x845A)},
        /* Corega */
        {USB_DEVICE(0x07AA, 0x0051)},
        /* Edimax */
        {USB_DEVICE(0x7392, 0x7622)},
        /* NEC */
        {USB_DEVICE(0x0409, 0x02B6)},
        {}
};

MODULE_DEVICE_TABLE(usb, rtl871x_usb_id_tbl);

static struct specific_device_id specific_device_id_tbl[] = {
        {.idVendor = 0x0b05, .idProduct = 0x1791,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x0df6, .idProduct = 0x0059,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13d3, .idProduct = 0x3306,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13D3, .idProduct = 0x3311,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13d3, .idProduct = 0x3335,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13d3, .idProduct = 0x3336,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13d3, .idProduct = 0x3340,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {.idVendor = 0x13d3, .idProduct = 0x3341,
                 .flags = SPEC_DEV_ID_DISABLE_HT},
        {}
};

struct drv_priv {
        struct usb_driver r871xu_drv;
        int drv_registered;
};

#ifdef CONFIG_PM
static int r871x_suspend(struct usb_interface *pusb_intf, pm_message_t state)
{
        struct net_device *pnetdev = usb_get_intfdata(pusb_intf);
        struct _adapter *padapter = netdev_priv(pnetdev);

        netdev_info(pnetdev, "Suspending...\n");
        padapter->suspended = true;
        rtl871x_intf_stop(padapter);
        if (pnetdev->netdev_ops->ndo_stop)
                pnetdev->netdev_ops->ndo_stop(pnetdev);
        mdelay(10);
        netif_device_detach(pnetdev);
        return 0;
}

static void rtl871x_intf_resume(struct _adapter *padapter)
{
        if (padapter->dvobjpriv.inirp_init)
                padapter->dvobjpriv.inirp_init(padapter);
}

static int r871x_resume(struct usb_interface *pusb_intf)
{
        struct net_device *pnetdev = usb_get_intfdata(pusb_intf);
        struct _adapter *padapter = netdev_priv(pnetdev);

        netdev_info(pnetdev,  "Resuming...\n");
        netif_device_attach(pnetdev);
        if (pnetdev->netdev_ops->ndo_open)
                pnetdev->netdev_ops->ndo_open(pnetdev);
        padapter->suspended = false;
        rtl871x_intf_resume(padapter);
        return 0;
}
#endif

static struct drv_priv drvpriv = {
        .r871xu_drv.name = "r8712u",
        .r871xu_drv.id_table = rtl871x_usb_id_tbl,
        .r871xu_drv.probe = r871xu_drv_init,
        .r871xu_drv.disconnect = r871xu_dev_remove,
#ifdef CONFIG_PM
        .r871xu_drv.suspend = r871x_suspend,
        .r871xu_drv.resume = r871x_resume,
#endif
};

static uint r8712_usb_dvobj_init(struct _adapter *padapter)
{
        uint        status = _SUCCESS;
        struct        usb_host_interface                *phost_iface;
        struct        usb_interface_descriptor        *piface_desc;
        struct dvobj_priv *pdvobjpriv = &padapter->dvobjpriv;
        struct usb_device *pusbd = pdvobjpriv->pusbdev;

        pdvobjpriv->padapter = padapter;
        padapter->eeprom_address_size = 6;
        phost_iface = pintf->cur_altsetting;
        piface_desc = &phost_iface->desc;
        pdvobjpriv->nr_endpoint = piface_desc->bNumEndpoints;
        if (pusbd->speed == USB_SPEED_HIGH) {
                pdvobjpriv->ishighspeed = true;
                dev_info(&pusbd->dev, "r8712u: USB_SPEED_HIGH with %d endpoints\n",
                         pdvobjpriv->nr_endpoint);
        } else {
                pdvobjpriv->ishighspeed = false;
                dev_info(&pusbd->dev, "r8712u: USB_SPEED_LOW with %d endpoints\n",
                         pdvobjpriv->nr_endpoint);
        }
        if ((r8712_alloc_io_queue(padapter)) == _FAIL)
                status = _FAIL;
        return status;
}

static void r8712_usb_dvobj_deinit(struct _adapter *padapter)
{
        r8712_free_io_queue(padapter);
}

void rtl871x_intf_stop(struct _adapter *padapter)
{
        /*disable_hw_interrupt*/
        if (!padapter->surprise_removed) {
                /*device still exists, so driver can do i/o operation
                 * TODO:
                 */
        }

        /* cancel in irp */
        if (padapter->dvobjpriv.inirp_deinit)
                padapter->dvobjpriv.inirp_deinit(padapter);
        /* cancel out irp */
        r8712_usb_write_port_cancel(padapter);
        /* TODO:cancel other irps */
}

void r871x_dev_unload(struct _adapter *padapter)
{
        if (padapter->bup) {
                /*s1.*/
                padapter->driver_stopped = true;

                /*s3.*/
                rtl871x_intf_stop(padapter);

                /*s4.*/
                r8712_stop_drv_threads(padapter);

                /*s5.*/
                if (!padapter->surprise_removed) {
                        padapter->hw_init_completed = false;
                        rtl8712_hal_deinit(padapter);
                }

                padapter->bup = false;
        }
}

static void disable_ht_for_spec_devid(const struct usb_device_id *pdid,
                                      struct _adapter *padapter)
{
        u16 vid, pid;
        u32 flags;
        int i;
        int num = ARRAY_SIZE(specific_device_id_tbl);

        for (i = 0; i < num; i++) {
                vid = specific_device_id_tbl[i].idVendor;
                pid = specific_device_id_tbl[i].idProduct;
                flags = specific_device_id_tbl[i].flags;

                if ((pdid->idVendor == vid) && (pdid->idProduct == pid) &&
                    (flags & SPEC_DEV_ID_DISABLE_HT)) {
                        padapter->registrypriv.ht_enable = 0;
                        padapter->registrypriv.cbw40_enable = 0;
                        padapter->registrypriv.ampdu_enable = 0;
                }
        }
}

static const struct device_type wlan_type = {
        .name = "wlan",
};

/*
 * drv_init() - a device potentially for us
 *
 * notes: drv_init() is called when the bus driver has located a card for us
 * to support. We accept the new device by returning 0.
 */
static int r871xu_drv_init(struct usb_interface *pusb_intf,
                           const struct usb_device_id *pdid)
{
        uint status;
        struct _adapter *padapter = NULL;
        struct dvobj_priv *pdvobjpriv;
        struct net_device *pnetdev;
        struct usb_device *udev;

        /* In this probe function, O.S. will provide the usb interface pointer
         * to driver. We have to increase the reference count of the usb device
         * structure by using the usb_get_dev function.
         */
        udev = interface_to_usbdev(pusb_intf);
        usb_get_dev(udev);
        pintf = pusb_intf;
        /* step 1. */
        pnetdev = r8712_init_netdev();
        if (!pnetdev)
                goto put_dev;
        padapter = netdev_priv(pnetdev);
        disable_ht_for_spec_devid(pdid, padapter);
        pdvobjpriv = &padapter->dvobjpriv;
        pdvobjpriv->padapter = padapter;
        padapter->dvobjpriv.pusbdev = udev;
        padapter->pusb_intf = pusb_intf;
        usb_set_intfdata(pusb_intf, pnetdev);
        SET_NETDEV_DEV(pnetdev, &pusb_intf->dev);
        pnetdev->dev.type = &wlan_type;
        /* step 2. */
        padapter->dvobj_init = r8712_usb_dvobj_init;
        padapter->dvobj_deinit = r8712_usb_dvobj_deinit;
        padapter->halpriv.hal_bus_init = r8712_usb_hal_bus_init;
        padapter->dvobjpriv.inirp_init = r8712_usb_inirp_init;
        padapter->dvobjpriv.inirp_deinit = r8712_usb_inirp_deinit;
        /* step 3.
         * initialize the dvobj_priv
         */

        status = padapter->dvobj_init(padapter);
        if (status != _SUCCESS)
                goto free_netdev;

        /* step 4. */
        status = r8712_init_drv_sw(padapter);
        if (status)
                goto dvobj_deinit;
        /* step 5. read efuse/eeprom data and get mac_addr */
        {
                int i, offset;
                u8 mac[6];
                u8 tmpU1b, AutoloadFail, eeprom_CustomerID;
                u8 *pdata = padapter->eeprompriv.efuse_eeprom_data;

                tmpU1b = r8712_read8(padapter, EE_9346CR);/*CR9346*/

                /* To check system boot selection.*/
                dev_info(&udev->dev, "r8712u: Boot from %s: Autoload %s\n",
                         (tmpU1b & _9356SEL) ? "EEPROM" : "EFUSE",
                         (tmpU1b & _EEPROM_EN) ? "OK" : "Failed");

                /* To check autoload success or not.*/
                if (tmpU1b & _EEPROM_EN) {
                        AutoloadFail = true;
                        /* The following operations prevent Efuse leakage by
                         * turning on 2.5V.
                         */
                        tmpU1b = r8712_read8(padapter, EFUSE_TEST + 3);
                        r8712_write8(padapter, EFUSE_TEST + 3, tmpU1b | 0x80);
                        msleep(20);
                        r8712_write8(padapter, EFUSE_TEST + 3,
                                     (tmpU1b & (~BIT(7))));

                        /* Retrieve Chip version.
                         * Recognize IC version by Reg0x4 BIT15.
                         */
                        tmpU1b = (u8)((r8712_read32(padapter, PMC_FSM) >> 15) &
                                                    0x1F);
                        if (tmpU1b == 0x3)
                                padapter->registrypriv.chip_version =
                                     RTL8712_3rdCUT;
                        else
                                padapter->registrypriv.chip_version =
                                     (tmpU1b >> 1) + 1;
                        switch (padapter->registrypriv.chip_version) {
                        case RTL8712_1stCUT:
                        case RTL8712_2ndCUT:
                        case RTL8712_3rdCUT:
                                break;
                        default:
                                padapter->registrypriv.chip_version =
                                     RTL8712_2ndCUT;
                                break;
                        }

                        for (i = 0, offset = 0; i < 128; i += 8, offset++)
                                r8712_efuse_pg_packet_read(padapter, offset,
                                                     &pdata[i]);

                        if (!r8712_initmac || !mac_pton(r8712_initmac, mac)) {
                                /* Use the mac address stored in the Efuse
                                 * offset = 0x12 for usb in efuse
                                 */
                                ether_addr_copy(mac, &pdata[0x12]);
                        }
                        eeprom_CustomerID = pdata[0x52];
                        switch (eeprom_CustomerID) {
                        case EEPROM_CID_ALPHA:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_ALPHA;
                                break;
                        case EEPROM_CID_CAMEO:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_CAMEO;
                                break;
                        case EEPROM_CID_SITECOM:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_Sitecom;
                                break;
                        case EEPROM_CID_COREGA:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_COREGA;
                                break;
                        case EEPROM_CID_Senao:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_Senao;
                                break;
                        case EEPROM_CID_EDIMAX_BELKIN:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_Edimax_Belkin;
                                break;
                        case EEPROM_CID_SERCOMM_BELKIN:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_Sercomm_Belkin;
                                break;
                        case EEPROM_CID_WNC_COREGA:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_WNC_COREGA;
                                break;
                        case EEPROM_CID_WHQL:
                                break;
                        case EEPROM_CID_NetCore:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_Netcore;
                                break;
                        case EEPROM_CID_CAMEO1:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_CAMEO1;
                                break;
                        case EEPROM_CID_CLEVO:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_819x_CLEVO;
                                break;
                        default:
                                padapter->eeprompriv.CustomerID =
                                                 RT_CID_DEFAULT;
                                break;
                        }
                        dev_info(&udev->dev, "r8712u: CustomerID = 0x%.4x\n",
                                 padapter->eeprompriv.CustomerID);
                        /* Led mode */
                        switch (padapter->eeprompriv.CustomerID) {
                        case RT_CID_DEFAULT:
                        case RT_CID_819x_ALPHA:
                        case RT_CID_819x_CAMEO:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE1;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        case RT_CID_819x_Sitecom:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE2;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        case RT_CID_COREGA:
                        case RT_CID_819x_Senao:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE3;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        case RT_CID_819x_Edimax_Belkin:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE4;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        case RT_CID_819x_Sercomm_Belkin:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE5;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        case RT_CID_819x_WNC_COREGA:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE6;
                                padapter->ledpriv.bRegUseLed = true;
                                break;
                        default:
                                padapter->ledpriv.LedStrategy = SW_LED_MODE0;
                                padapter->ledpriv.bRegUseLed = false;
                                break;
                        }
                } else {
                        AutoloadFail = false;
                }
                if ((!AutoloadFail) ||
                    ((mac[0] == 0xff) && (mac[1] == 0xff) &&
                     (mac[2] == 0xff) && (mac[3] == 0xff) &&
                     (mac[4] == 0xff) && (mac[5] == 0xff)) ||
                    ((mac[0] == 0x00) && (mac[1] == 0x00) &&
                     (mac[2] == 0x00) && (mac[3] == 0x00) &&
                     (mac[4] == 0x00) && (mac[5] == 0x00))) {
                        mac[0] = 0x00;
                        mac[1] = 0xe0;
                        mac[2] = 0x4c;
                        mac[3] = 0x87;
                        mac[4] = 0x00;
                        mac[5] = 0x00;
                }
                if (r8712_initmac) {
                        /* Make sure the user did not select a multicast
                         * address by setting bit 1 of first octet.
                         */
                        mac[0] &= 0xFE;
                        dev_info(&udev->dev,
                                "r8712u: MAC Address from user = %pM\n", mac);
                } else {
                        dev_info(&udev->dev,
                                "r8712u: MAC Address from efuse = %pM\n", mac);
                }
                eth_hw_addr_set(pnetdev, mac);
        }
        /* step 6. Load the firmware asynchronously */
        if (rtl871x_load_fw(padapter))
                goto deinit_drv_sw;
        init_completion(&padapter->rx_filter_ready);
        return 0;

deinit_drv_sw:
        r8712_free_drv_sw(padapter);
dvobj_deinit:
        padapter->dvobj_deinit(padapter);
free_netdev:
        free_netdev(pnetdev);
put_dev:
        usb_put_dev(udev);
        usb_set_intfdata(pusb_intf, NULL);
        return -ENODEV;
}

/* rmmod module & unplug(SurpriseRemoved) will call r871xu_dev_remove()
 * => how to recognize both
 */
static void r871xu_dev_remove(struct usb_interface *pusb_intf)
{
        struct net_device *pnetdev = usb_get_intfdata(pusb_intf);
        struct usb_device *udev = interface_to_usbdev(pusb_intf);
        struct _adapter *padapter = netdev_priv(pnetdev);

        /* never exit with a firmware callback pending */
        wait_for_completion(&padapter->rtl8712_fw_ready);
        if (pnetdev->reg_state != NETREG_UNINITIALIZED)
                unregister_netdev(pnetdev); /* will call netdev_close() */
        usb_set_intfdata(pusb_intf, NULL);
        release_firmware(padapter->fw);
        if (drvpriv.drv_registered)
                padapter->surprise_removed = true;
        r8712_flush_rwctrl_works(padapter);
        r8712_flush_led_works(padapter);
        udelay(1);
        /* Stop driver mlme relation timer */
        r8712_stop_drv_timers(padapter);
        r871x_dev_unload(padapter);
        if (padapter->dvobj_deinit)
                padapter->dvobj_deinit(padapter);
        r8712_free_drv_sw(padapter);
        free_netdev(pnetdev);

        /* decrease the reference count of the usb device structure
         * when disconnect
         */
        usb_put_dev(udev);

        /* If we didn't unplug usb dongle and remove/insert module, driver
         * fails on sitesurvey for the first time when device is up.
         * Reset usb port for sitesurvey fail issue.
         */
        if (udev->state != USB_STATE_NOTATTACHED)
                usb_reset_device(udev);
}

static int __init r8712u_drv_entry(void)
{
        drvpriv.drv_registered = true;
        return usb_register(&drvpriv.r871xu_drv);
}

static void __exit r8712u_drv_halt(void)
{
        drvpriv.drv_registered = false;
        usb_deregister(&drvpriv.r871xu_drv);
}

module_init(r8712u_drv_entry);
module_exit(r8712u_drv_halt);





































































































































































































































































































































































   88 


   89 


   89 
   41 






   42 





   89 
   87 








    6 



    6 
    6 





    6 
    6 




























































   25 

   24 

   23 


   21 


   22 



   15 
    6 
    6 


   21 
    3 
    3 

   22 
   25 




   10 



   10 
   10 




    8 
    4 
    4 








































































   72 

   72 

   71 


   69 
   71 

   69 

   10 
   69 
   58 
   10 
   10 



   43 
   43 

   66 
    8 






   47 




   26 



   25 

   18 


    7 

   10 
   25 

   26 


   26 






   72 



   72 

   30 


   43 

   13 
   71 


   72 


   73 



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    7 




    7 



    7 


    6 


    7 


    7 

    7 











   10 


    7 
    1 

    7 




    7 












   10 


   10 

   10 

    7 
    5 









    4 



    4 

    4 


    2 
    4 
    1 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/read_write.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/sched/xacct.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/fs.h>
#include "internal.h"

#include <linux/uaccess.h>
#include <asm/unistd.h>

const struct file_operations generic_ro_fops = {
        .llseek                = generic_file_llseek,
        .read_iter        = generic_file_read_iter,
        .mmap                = generic_file_readonly_mmap,
        .splice_read        = filemap_splice_read,
};

EXPORT_SYMBOL(generic_ro_fops);

static inline bool unsigned_offsets(struct file *file)
{
        return file->f_mode & FMODE_UNSIGNED_OFFSET;
}

/**
 * vfs_setpos - update the file offset for lseek
 * @file:        file structure in question
 * @offset:        file offset to seek to
 * @maxsize:        maximum file size
 *
 * This is a low-level filesystem helper for updating the file offset to
 * the value specified by @offset if the given offset is valid and it is
 * not equal to the current file offset.
 *
 * Return the specified offset on success and -EINVAL on invalid offset.
 */
loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
{
        if (offset < 0 && !unsigned_offsets(file))
                return -EINVAL;
        if (offset > maxsize)
                return -EINVAL;

        if (offset != file->f_pos) {
                file->f_pos = offset;
                file->f_version = 0;
        }
        return offset;
}
EXPORT_SYMBOL(vfs_setpos);

/**
 * generic_file_llseek_size - generic llseek implementation for regular files
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 * @maxsize:        max size of this file in file system
 * @eof:        offset used for SEEK_END position
 *
 * This is a variant of generic_file_llseek that allows passing in a custom
 * maximum file size and a custom EOF position, for e.g. hashed directories
 *
 * Synchronization:
 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
 * read/writes behave like SEEK_SET against seeks.
 */
loff_t
generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                loff_t maxsize, loff_t eof)
{
        switch (whence) {
        case SEEK_END:
                offset += eof;
                break;
        case SEEK_CUR:
                /*
                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
                 * position-querying operation.  Avoid rewriting the "same"
                 * f_pos value back to the file because a concurrent read(),
                 * write() or lseek() might have altered it
                 */
                if (offset == 0)
                        return file->f_pos;
                /*
                 * f_lock protects against read/modify/write race with other
                 * SEEK_CURs. Note that parallel writes and reads behave
                 * like SEEK_SET.
                 */
                spin_lock(&file->f_lock);
                offset = vfs_setpos(file, file->f_pos + offset, maxsize);
                spin_unlock(&file->f_lock);
                return offset;
        case SEEK_DATA:
                /*
                 * In the generic case the entire file is data, so as long as
                 * offset isn't at the end of the file then the offset is data.
                 */
                if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                break;
        case SEEK_HOLE:
                /*
                 * There is a virtual hole at the end of the file, so as long as
                 * offset isn't i_size or larger, return i_size.
                 */
                if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                offset = eof;
                break;
        }

        return vfs_setpos(file, offset, maxsize);
}
EXPORT_SYMBOL(generic_file_llseek_size);

/**
 * generic_file_llseek - generic llseek implementation for regular files
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 *
 * This is a generic implemenation of ->llseek useable for all normal local
 * filesystems.  It just updates the file offset to the value specified by
 * @offset and @whence.
 */
loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
{
        struct inode *inode = file->f_mapping->host;

        return generic_file_llseek_size(file, offset, whence,
                                        inode->i_sb->s_maxbytes,
                                        i_size_read(inode));
}
EXPORT_SYMBOL(generic_file_llseek);

/**
 * fixed_size_llseek - llseek implementation for fixed-sized devices
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 * @size:        size of the file
 *
 */
loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
{
        switch (whence) {
        case SEEK_SET: case SEEK_CUR: case SEEK_END:
                return generic_file_llseek_size(file, offset, whence,
                                                size, size);
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL(fixed_size_llseek);

/**
 * no_seek_end_llseek - llseek implementation for fixed-sized devices
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 *
 */
loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
{
        switch (whence) {
        case SEEK_SET: case SEEK_CUR:
                return generic_file_llseek_size(file, offset, whence,
                                                OFFSET_MAX, 0);
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL(no_seek_end_llseek);

/**
 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 * @size:        maximal offset allowed
 *
 */
loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
{
        switch (whence) {
        case SEEK_SET: case SEEK_CUR:
                return generic_file_llseek_size(file, offset, whence,
                                                size, 0);
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL(no_seek_end_llseek_size);

/**
 * noop_llseek - No Operation Performed llseek implementation
 * @file:        file structure to seek on
 * @offset:        file offset to seek to
 * @whence:        type of seek
 *
 * This is an implementation of ->llseek useable for the rare special case when
 * userspace expects the seek to succeed but the (device) file is actually not
 * able to perform the seek. In this case you use noop_llseek() instead of
 * falling back to the default implementation of ->llseek.
 */
loff_t noop_llseek(struct file *file, loff_t offset, int whence)
{
        return file->f_pos;
}
EXPORT_SYMBOL(noop_llseek);

loff_t default_llseek(struct file *file, loff_t offset, int whence)
{
        struct inode *inode = file_inode(file);
        loff_t retval;

        inode_lock(inode);
        switch (whence) {
                case SEEK_END:
                        offset += i_size_read(inode);
                        break;
                case SEEK_CUR:
                        if (offset == 0) {
                                retval = file->f_pos;
                                goto out;
                        }
                        offset += file->f_pos;
                        break;
                case SEEK_DATA:
                        /*
                         * In the generic case the entire file is data, so as
                         * long as offset isn't at the end of the file then the
                         * offset is data.
                         */
                        if (offset >= inode->i_size) {
                                retval = -ENXIO;
                                goto out;
                        }
                        break;
                case SEEK_HOLE:
                        /*
                         * There is a virtual hole at the end of the file, so
                         * as long as offset isn't i_size or larger, return
                         * i_size.
                         */
                        if (offset >= inode->i_size) {
                                retval = -ENXIO;
                                goto out;
                        }
                        offset = inode->i_size;
                        break;
        }
        retval = -EINVAL;
        if (offset >= 0 || unsigned_offsets(file)) {
                if (offset != file->f_pos) {
                        file->f_pos = offset;
                        file->f_version = 0;
                }
                retval = offset;
        }
out:
        inode_unlock(inode);
        return retval;
}
EXPORT_SYMBOL(default_llseek);

loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
{
        if (!(file->f_mode & FMODE_LSEEK))
                return -ESPIPE;
        return file->f_op->llseek(file, offset, whence);
}
EXPORT_SYMBOL(vfs_llseek);

static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence)
{
        off_t retval;
        struct fd f = fdget_pos(fd);
        if (!f.file)
                return -EBADF;

        retval = -EINVAL;
        if (whence <= SEEK_MAX) {
                loff_t res = vfs_llseek(f.file, offset, whence);
                retval = res;
                if (res != (loff_t)retval)
                        retval = -EOVERFLOW;        /* LFS: should only happen on 32 bit platforms */
        }
        fdput_pos(f);
        return retval;
}

SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{
        return ksys_lseek(fd, offset, whence);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
{
        return ksys_lseek(fd, offset, whence);
}
#endif

#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \
        defined(__ARCH_WANT_SYS_LLSEEK)
SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
                unsigned long, offset_low, loff_t __user *, result,
                unsigned int, whence)
{
        int retval;
        struct fd f = fdget_pos(fd);
        loff_t offset;

        if (!f.file)
                return -EBADF;

        retval = -EINVAL;
        if (whence > SEEK_MAX)
                goto out_putf;

        offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
                        whence);

        retval = (int)offset;
        if (offset >= 0) {
                retval = -EFAULT;
                if (!copy_to_user(result, &offset, sizeof(offset)))
                        retval = 0;
        }
out_putf:
        fdput_pos(f);
        return retval;
}
#endif

int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
        int mask = read_write == READ ? MAY_READ : MAY_WRITE;
        int ret;

        if (unlikely((ssize_t) count < 0))
                return -EINVAL;

        if (ppos) {
                loff_t pos = *ppos;

                if (unlikely(pos < 0)) {
                        if (!unsigned_offsets(file))
                                return -EINVAL;
                        if (count >= -pos) /* both values are in 0..LLONG_MAX */
                                return -EOVERFLOW;
                } else if (unlikely((loff_t) (pos + count) < 0)) {
                        if (!unsigned_offsets(file))
                                return -EINVAL;
                }
        }

        ret = security_file_permission(file, mask);
        if (ret)
                return ret;

        return fsnotify_file_area_perm(file, mask, ppos, count);
}
EXPORT_SYMBOL(rw_verify_area);

static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
        struct kiocb kiocb;
        struct iov_iter iter;
        ssize_t ret;

        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = (ppos ? *ppos : 0);
        iov_iter_ubuf(&iter, ITER_DEST, buf, len);

        ret = call_read_iter(filp, &kiocb, &iter);
        BUG_ON(ret == -EIOCBQUEUED);
        if (ppos)
                *ppos = kiocb.ki_pos;
        return ret;
}

static int warn_unsupported(struct file *file, const char *op)
{
        pr_warn_ratelimited(
                "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
                op, file, current->pid, current->comm);
        return -EINVAL;
}

ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
        struct kvec iov = {
                .iov_base        = buf,
                .iov_len        = min_t(size_t, count, MAX_RW_COUNT),
        };
        struct kiocb kiocb;
        struct iov_iter iter;
        ssize_t ret;

        if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
                return -EINVAL;
        if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;
        /*
         * Also fail if ->read_iter and ->read are both wired up as that
         * implies very convoluted semantics.
         */
        if (unlikely(!file->f_op->read_iter || file->f_op->read))
                return warn_unsupported(file, "read");

        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = pos ? *pos : 0;
        iov_iter_kvec(&iter, ITER_DEST, &iov, 1, iov.iov_len);
        ret = file->f_op->read_iter(&kiocb, &iter);
        if (ret > 0) {
                if (pos)
                        *pos = kiocb.ki_pos;
                fsnotify_access(file);
                add_rchar(current, ret);
        }
        inc_syscr(current);
        return ret;
}

ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
        ssize_t ret;

        ret = rw_verify_area(READ, file, pos, count);
        if (ret)
                return ret;
        return __kernel_read(file, buf, count, pos);
}
EXPORT_SYMBOL(kernel_read);

ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
        ssize_t ret;

        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;
        if (unlikely(!access_ok(buf, count)))
                return -EFAULT;

        ret = rw_verify_area(READ, file, pos, count);
        if (ret)
                return ret;
        if (count > MAX_RW_COUNT)
                count =  MAX_RW_COUNT;

        if (file->f_op->read)
                ret = file->f_op->read(file, buf, count, pos);
        else if (file->f_op->read_iter)
                ret = new_sync_read(file, buf, count, pos);
        else
                ret = -EINVAL;
        if (ret > 0) {
                fsnotify_access(file);
                add_rchar(current, ret);
        }
        inc_syscr(current);
        return ret;
}

static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
        struct kiocb kiocb;
        struct iov_iter iter;
        ssize_t ret;

        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = (ppos ? *ppos : 0);
        iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)buf, len);

        ret = call_write_iter(filp, &kiocb, &iter);
        BUG_ON(ret == -EIOCBQUEUED);
        if (ret > 0 && ppos)
                *ppos = kiocb.ki_pos;
        return ret;
}

/* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
{
        struct kiocb kiocb;
        ssize_t ret;

        if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
        /*
         * Also fail if ->write_iter and ->write are both wired up as that
         * implies very convoluted semantics.
         */
        if (unlikely(!file->f_op->write_iter || file->f_op->write))
                return warn_unsupported(file, "write");

        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = pos ? *pos : 0;
        ret = file->f_op->write_iter(&kiocb, from);
        if (ret > 0) {
                if (pos)
                        *pos = kiocb.ki_pos;
                fsnotify_modify(file);
                add_wchar(current, ret);
        }
        inc_syscw(current);
        return ret;
}

/* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{
        struct kvec iov = {
                .iov_base        = (void *)buf,
                .iov_len        = min_t(size_t, count, MAX_RW_COUNT),
        };
        struct iov_iter iter;
        iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, iov.iov_len);
        return __kernel_write_iter(file, &iter, pos);
}
/*
 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
 * but autofs is one of the few internal kernel users that actually
 * wants this _and_ can be built as a module. So we need to export
 * this symbol for autofs, even though it really isn't appropriate
 * for any other kernel modules.
 */
EXPORT_SYMBOL_GPL(__kernel_write);

ssize_t kernel_write(struct file *file, const void *buf, size_t count,
                            loff_t *pos)
{
        ssize_t ret;

        ret = rw_verify_area(WRITE, file, pos, count);
        if (ret)
                return ret;

        file_start_write(file);
        ret =  __kernel_write(file, buf, count, pos);
        file_end_write(file);
        return ret;
}
EXPORT_SYMBOL(kernel_write);

ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
        ssize_t ret;

        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
        if (unlikely(!access_ok(buf, count)))
                return -EFAULT;

        ret = rw_verify_area(WRITE, file, pos, count);
        if (ret)
                return ret;
        if (count > MAX_RW_COUNT)
                count =  MAX_RW_COUNT;
        file_start_write(file);
        if (file->f_op->write)
                ret = file->f_op->write(file, buf, count, pos);
        else if (file->f_op->write_iter)
                ret = new_sync_write(file, buf, count, pos);
        else
                ret = -EINVAL;
        if (ret > 0) {
                fsnotify_modify(file);
                add_wchar(current, ret);
        }
        inc_syscw(current);
        file_end_write(file);
        return ret;
}

/* file_ppos returns &file->f_pos or NULL if file is stream */
static inline loff_t *file_ppos(struct file *file)
{
        return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos;
}

ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
{
        struct fd f = fdget_pos(fd);
        ssize_t ret = -EBADF;

        if (f.file) {
                loff_t pos, *ppos = file_ppos(f.file);
                if (ppos) {
                        pos = *ppos;
                        ppos = &pos;
                }
                ret = vfs_read(f.file, buf, count, ppos);
                if (ret >= 0 && ppos)
                        f.file->f_pos = pos;
                fdput_pos(f);
        }
        return ret;
}

SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
        return ksys_read(fd, buf, count);
}

ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
        struct fd f = fdget_pos(fd);
        ssize_t ret = -EBADF;

        if (f.file) {
                loff_t pos, *ppos = file_ppos(f.file);
                if (ppos) {
                        pos = *ppos;
                        ppos = &pos;
                }
                ret = vfs_write(f.file, buf, count, ppos);
                if (ret >= 0 && ppos)
                        f.file->f_pos = pos;
                fdput_pos(f);
        }

        return ret;
}

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
                size_t, count)
{
        return ksys_write(fd, buf, count);
}

ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count,
                     loff_t pos)
{
        struct fd f;
        ssize_t ret = -EBADF;

        if (pos < 0)
                return -EINVAL;

        f = fdget(fd);
        if (f.file) {
                ret = -ESPIPE;
                if (f.file->f_mode & FMODE_PREAD)
                        ret = vfs_read(f.file, buf, count, &pos);
                fdput(f);
        }

        return ret;
}

SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
                        size_t, count, loff_t, pos)
{
        return ksys_pread64(fd, buf, count, pos);
}

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64)
COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf,
                       size_t, count, compat_arg_u64_dual(pos))
{
        return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos));
}
#endif

ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf,
                      size_t count, loff_t pos)
{
        struct fd f;
        ssize_t ret = -EBADF;

        if (pos < 0)
                return -EINVAL;

        f = fdget(fd);
        if (f.file) {
                ret = -ESPIPE;
                if (f.file->f_mode & FMODE_PWRITE)  
                        ret = vfs_write(f.file, buf, count, &pos);
                fdput(f);
        }

        return ret;
}

SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
                         size_t, count, loff_t, pos)
{
        return ksys_pwrite64(fd, buf, count, pos);
}

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64)
COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf,
                       size_t, count, compat_arg_u64_dual(pos))
{
        return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos));
}
#endif

static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
                loff_t *ppos, int type, rwf_t flags)
{
        struct kiocb kiocb;
        ssize_t ret;

        init_sync_kiocb(&kiocb, filp);
        ret = kiocb_set_rw_flags(&kiocb, flags);
        if (ret)
                return ret;
        kiocb.ki_pos = (ppos ? *ppos : 0);

        if (type == READ)
                ret = call_read_iter(filp, &kiocb, iter);
        else
                ret = call_write_iter(filp, &kiocb, iter);
        BUG_ON(ret == -EIOCBQUEUED);
        if (ppos)
                *ppos = kiocb.ki_pos;
        return ret;
}

/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
                loff_t *ppos, int type, rwf_t flags)
{
        ssize_t ret = 0;

        if (flags & ~RWF_HIPRI)
                return -EOPNOTSUPP;

        while (iov_iter_count(iter)) {
                ssize_t nr;

                if (type == READ) {
                        nr = filp->f_op->read(filp, iter_iov_addr(iter),
                                                iter_iov_len(iter), ppos);
                } else {
                        nr = filp->f_op->write(filp, iter_iov_addr(iter),
                                                iter_iov_len(iter), ppos);
                }

                if (nr < 0) {
                        if (!ret)
                                ret = nr;
                        break;
                }
                ret += nr;
                if (nr != iter_iov_len(iter))
                        break;
                iov_iter_advance(iter, nr);
        }

        return ret;
}

ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
                           struct iov_iter *iter)
{
        size_t tot_len;
        ssize_t ret = 0;

        if (!file->f_op->read_iter)
                return -EINVAL;
        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;

        tot_len = iov_iter_count(iter);
        if (!tot_len)
                goto out;
        ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
        if (ret < 0)
                return ret;

        ret = call_read_iter(file, iocb, iter);
out:
        if (ret >= 0)
                fsnotify_access(file);
        return ret;
}
EXPORT_SYMBOL(vfs_iocb_iter_read);

ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
                      rwf_t flags)
{
        size_t tot_len;
        ssize_t ret = 0;

        if (!file->f_op->read_iter)
                return -EINVAL;
        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;

        tot_len = iov_iter_count(iter);
        if (!tot_len)
                goto out;
        ret = rw_verify_area(READ, file, ppos, tot_len);
        if (ret < 0)
                return ret;

        ret = do_iter_readv_writev(file, iter, ppos, READ, flags);
out:
        if (ret >= 0)
                fsnotify_access(file);
        return ret;
}
EXPORT_SYMBOL(vfs_iter_read);

/*
 * Caller is responsible for calling kiocb_end_write() on completion
 * if async iocb was queued.
 */
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
                            struct iov_iter *iter)
{
        size_t tot_len;
        ssize_t ret = 0;

        if (!file->f_op->write_iter)
                return -EINVAL;
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;

        tot_len = iov_iter_count(iter);
        if (!tot_len)
                return 0;
        ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
        if (ret < 0)
                return ret;

        kiocb_start_write(iocb);
        ret = call_write_iter(file, iocb, iter);
        if (ret != -EIOCBQUEUED)
                kiocb_end_write(iocb);
        if (ret > 0)
                fsnotify_modify(file);

        return ret;
}
EXPORT_SYMBOL(vfs_iocb_iter_write);

ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
                       rwf_t flags)
{
        size_t tot_len;
        ssize_t ret;

        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
        if (!file->f_op->write_iter)
                return -EINVAL;

        tot_len = iov_iter_count(iter);
        if (!tot_len)
                return 0;

        ret = rw_verify_area(WRITE, file, ppos, tot_len);
        if (ret < 0)
                return ret;

        file_start_write(file);
        ret = do_iter_readv_writev(file, iter, ppos, WRITE, flags);
        if (ret > 0)
                fsnotify_modify(file);
        file_end_write(file);

        return ret;
}
EXPORT_SYMBOL(vfs_iter_write);

static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
                         unsigned long vlen, loff_t *pos, rwf_t flags)
{
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
        struct iov_iter iter;
        size_t tot_len;
        ssize_t ret = 0;

        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;

        ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov,
                           &iter);
        if (ret < 0)
                return ret;

        tot_len = iov_iter_count(&iter);
        if (!tot_len)
                goto out;

        ret = rw_verify_area(READ, file, pos, tot_len);
        if (ret < 0)
                goto out;

        if (file->f_op->read_iter)
                ret = do_iter_readv_writev(file, &iter, pos, READ, flags);
        else
                ret = do_loop_readv_writev(file, &iter, pos, READ, flags);
out:
        if (ret >= 0)
                fsnotify_access(file);
        kfree(iov);
        return ret;
}

static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
                          unsigned long vlen, loff_t *pos, rwf_t flags)
{
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
        struct iov_iter iter;
        size_t tot_len;
        ssize_t ret = 0;

        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;

        ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov,
                           &iter);
        if (ret < 0)
                return ret;

        tot_len = iov_iter_count(&iter);
        if (!tot_len)
                goto out;

        ret = rw_verify_area(WRITE, file, pos, tot_len);
        if (ret < 0)
                goto out;

        file_start_write(file);
        if (file->f_op->write_iter)
                ret = do_iter_readv_writev(file, &iter, pos, WRITE, flags);
        else
                ret = do_loop_readv_writev(file, &iter, pos, WRITE, flags);
        if (ret > 0)
                fsnotify_modify(file);
        file_end_write(file);
out:
        kfree(iov);
        return ret;
}

static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
                        unsigned long vlen, rwf_t flags)
{
        struct fd f = fdget_pos(fd);
        ssize_t ret = -EBADF;

        if (f.file) {
                loff_t pos, *ppos = file_ppos(f.file);
                if (ppos) {
                        pos = *ppos;
                        ppos = &pos;
                }
                ret = vfs_readv(f.file, vec, vlen, ppos, flags);
                if (ret >= 0 && ppos)
                        f.file->f_pos = pos;
                fdput_pos(f);
        }

        if (ret > 0)
                add_rchar(current, ret);
        inc_syscr(current);
        return ret;
}

static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
                         unsigned long vlen, rwf_t flags)
{
        struct fd f = fdget_pos(fd);
        ssize_t ret = -EBADF;

        if (f.file) {
                loff_t pos, *ppos = file_ppos(f.file);
                if (ppos) {
                        pos = *ppos;
                        ppos = &pos;
                }
                ret = vfs_writev(f.file, vec, vlen, ppos, flags);
                if (ret >= 0 && ppos)
                        f.file->f_pos = pos;
                fdput_pos(f);
        }

        if (ret > 0)
                add_wchar(current, ret);
        inc_syscw(current);
        return ret;
}

static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
{
#define HALF_LONG_BITS (BITS_PER_LONG / 2)
        return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
}

static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
                         unsigned long vlen, loff_t pos, rwf_t flags)
{
        struct fd f;
        ssize_t ret = -EBADF;

        if (pos < 0)
                return -EINVAL;

        f = fdget(fd);
        if (f.file) {
                ret = -ESPIPE;
                if (f.file->f_mode & FMODE_PREAD)
                        ret = vfs_readv(f.file, vec, vlen, &pos, flags);
                fdput(f);
        }

        if (ret > 0)
                add_rchar(current, ret);
        inc_syscr(current);
        return ret;
}

static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
                          unsigned long vlen, loff_t pos, rwf_t flags)
{
        struct fd f;
        ssize_t ret = -EBADF;

        if (pos < 0)
                return -EINVAL;

        f = fdget(fd);
        if (f.file) {
                ret = -ESPIPE;
                if (f.file->f_mode & FMODE_PWRITE)
                        ret = vfs_writev(f.file, vec, vlen, &pos, flags);
                fdput(f);
        }

        if (ret > 0)
                add_wchar(current, ret);
        inc_syscw(current);
        return ret;
}

SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen)
{
        return do_readv(fd, vec, vlen, 0);
}

SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen)
{
        return do_writev(fd, vec, vlen, 0);
}

SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
{
        loff_t pos = pos_from_hilo(pos_h, pos_l);

        return do_preadv(fd, vec, vlen, pos, 0);
}

SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
                rwf_t, flags)
{
        loff_t pos = pos_from_hilo(pos_h, pos_l);

        if (pos == -1)
                return do_readv(fd, vec, vlen, flags);

        return do_preadv(fd, vec, vlen, pos, flags);
}

SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
{
        loff_t pos = pos_from_hilo(pos_h, pos_l);

        return do_pwritev(fd, vec, vlen, pos, 0);
}

SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
                unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
                rwf_t, flags)
{
        loff_t pos = pos_from_hilo(pos_h, pos_l);

        if (pos == -1)
                return do_writev(fd, vec, vlen, flags);

        return do_pwritev(fd, vec, vlen, pos, flags);
}

/*
 * Various compat syscalls.  Note that they all pretend to take a native
 * iovec - import_iovec will properly treat those as compat_iovecs based on
 * in_compat_syscall().
 */
#ifdef CONFIG_COMPAT
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
                const struct iovec __user *, vec,
                unsigned long, vlen, loff_t, pos)
{
        return do_preadv(fd, vec, vlen, pos, 0);
}
#endif

COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
                const struct iovec __user *, vec,
                compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
        loff_t pos = ((loff_t)pos_high << 32) | pos_low;

        return do_preadv(fd, vec, vlen, pos, 0);
}

#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
                const struct iovec __user *, vec,
                unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
        if (pos == -1)
                return do_readv(fd, vec, vlen, flags);
        return do_preadv(fd, vec, vlen, pos, flags);
}
#endif

COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
                const struct iovec __user *, vec,
                compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
                rwf_t, flags)
{
        loff_t pos = ((loff_t)pos_high << 32) | pos_low;

        if (pos == -1)
                return do_readv(fd, vec, vlen, flags);
        return do_preadv(fd, vec, vlen, pos, flags);
}

#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
                const struct iovec __user *, vec,
                unsigned long, vlen, loff_t, pos)
{
        return do_pwritev(fd, vec, vlen, pos, 0);
}
#endif

COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
                const struct iovec __user *,vec,
                compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
        loff_t pos = ((loff_t)pos_high << 32) | pos_low;

        return do_pwritev(fd, vec, vlen, pos, 0);
}

#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
                const struct iovec __user *, vec,
                unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
        if (pos == -1)
                return do_writev(fd, vec, vlen, flags);
        return do_pwritev(fd, vec, vlen, pos, flags);
}
#endif

COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
                const struct iovec __user *,vec,
                compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
{
        loff_t pos = ((loff_t)pos_high << 32) | pos_low;

        if (pos == -1)
                return do_writev(fd, vec, vlen, flags);
        return do_pwritev(fd, vec, vlen, pos, flags);
}
#endif /* CONFIG_COMPAT */

static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                           size_t count, loff_t max)
{
        struct fd in, out;
        struct inode *in_inode, *out_inode;
        struct pipe_inode_info *opipe;
        loff_t pos;
        loff_t out_pos;
        ssize_t retval;
        int fl;

        /*
         * Get input file, and verify that it is ok..
         */
        retval = -EBADF;
        in = fdget(in_fd);
        if (!in.file)
                goto out;
        if (!(in.file->f_mode & FMODE_READ))
                goto fput_in;
        retval = -ESPIPE;
        if (!ppos) {
                pos = in.file->f_pos;
        } else {
                pos = *ppos;
                if (!(in.file->f_mode & FMODE_PREAD))
                        goto fput_in;
        }
        retval = rw_verify_area(READ, in.file, &pos, count);
        if (retval < 0)
                goto fput_in;
        if (count > MAX_RW_COUNT)
                count =  MAX_RW_COUNT;

        /*
         * Get output file, and verify that it is ok..
         */
        retval = -EBADF;
        out = fdget(out_fd);
        if (!out.file)
                goto fput_in;
        if (!(out.file->f_mode & FMODE_WRITE))
                goto fput_out;
        in_inode = file_inode(in.file);
        out_inode = file_inode(out.file);
        out_pos = out.file->f_pos;

        if (!max)
                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);

        if (unlikely(pos + count > max)) {
                retval = -EOVERFLOW;
                if (pos >= max)
                        goto fput_out;
                count = max - pos;
        }

        fl = 0;
#if 0
        /*
         * We need to debate whether we can enable this or not. The
         * man page documents EAGAIN return for the output at least,
         * and the application is arguably buggy if it doesn't expect
         * EAGAIN on a non-blocking file descriptor.
         */
        if (in.file->f_flags & O_NONBLOCK)
                fl = SPLICE_F_NONBLOCK;
#endif
        opipe = get_pipe_info(out.file, true);
        if (!opipe) {
                retval = rw_verify_area(WRITE, out.file, &out_pos, count);
                if (retval < 0)
                        goto fput_out;
                retval = do_splice_direct(in.file, &pos, out.file, &out_pos,
                                          count, fl);
        } else {
                if (out.file->f_flags & O_NONBLOCK)
                        fl |= SPLICE_F_NONBLOCK;

                retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
        }

        if (retval > 0) {
                add_rchar(current, retval);
                add_wchar(current, retval);
                fsnotify_access(in.file);
                fsnotify_modify(out.file);
                out.file->f_pos = out_pos;
                if (ppos)
                        *ppos = pos;
                else
                        in.file->f_pos = pos;
        }

        inc_syscr(current);
        inc_syscw(current);
        if (pos > max)
                retval = -EOVERFLOW;

fput_out:
        fdput(out);
fput_in:
        fdput(in);
out:
        return retval;
}

SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
{
        loff_t pos;
        off_t off;
        ssize_t ret;

        if (offset) {
                if (unlikely(get_user(off, offset)))
                        return -EFAULT;
                pos = off;
                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
                if (unlikely(put_user(pos, offset)))
                        return -EFAULT;
                return ret;
        }

        return do_sendfile(out_fd, in_fd, NULL, count, 0);
}

SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
{
        loff_t pos;
        ssize_t ret;

        if (offset) {
                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
                        return -EFAULT;
                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
                if (unlikely(put_user(pos, offset)))
                        return -EFAULT;
                return ret;
        }

        return do_sendfile(out_fd, in_fd, NULL, count, 0);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
                compat_off_t __user *, offset, compat_size_t, count)
{
        loff_t pos;
        off_t off;
        ssize_t ret;

        if (offset) {
                if (unlikely(get_user(off, offset)))
                        return -EFAULT;
                pos = off;
                ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
                if (unlikely(put_user(pos, offset)))
                        return -EFAULT;
                return ret;
        }

        return do_sendfile(out_fd, in_fd, NULL, count, 0);
}

COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
                compat_loff_t __user *, offset, compat_size_t, count)
{
        loff_t pos;
        ssize_t ret;

        if (offset) {
                if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
                        return -EFAULT;
                ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
                if (unlikely(put_user(pos, offset)))
                        return -EFAULT;
                return ret;
        }

        return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif

/*
 * Performs necessary checks before doing a file copy
 *
 * Can adjust amount of bytes to copy via @req_count argument.
 * Returns appropriate error code that caller should return or
 * zero in case the copy should be allowed.
 */
static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
                                    struct file *file_out, loff_t pos_out,
                                    size_t *req_count, unsigned int flags)
{
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);
        uint64_t count = *req_count;
        loff_t size_in;
        int ret;

        ret = generic_file_rw_checks(file_in, file_out);
        if (ret)
                return ret;

        /*
         * We allow some filesystems to handle cross sb copy, but passing
         * a file of the wrong filesystem type to filesystem driver can result
         * in an attempt to dereference the wrong type of ->private_data, so
         * avoid doing that until we really have a good reason.
         *
         * nfs and cifs define several different file_system_type structures
         * and several different sets of file_operations, but they all end up
         * using the same ->copy_file_range() function pointer.
         */
        if (flags & COPY_FILE_SPLICE) {
                /* cross sb splice is allowed */
        } else if (file_out->f_op->copy_file_range) {
                if (file_in->f_op->copy_file_range !=
                    file_out->f_op->copy_file_range)
                        return -EXDEV;
        } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
                return -EXDEV;
        }

        /* Don't touch certain kinds of inodes */
        if (IS_IMMUTABLE(inode_out))
                return -EPERM;

        if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
                return -ETXTBSY;

        /* Ensure offsets don't wrap. */
        if (pos_in + count < pos_in || pos_out + count < pos_out)
                return -EOVERFLOW;

        /* Shorten the copy to EOF */
        size_in = i_size_read(inode_in);
        if (pos_in >= size_in)
                count = 0;
        else
                count = min(count, size_in - (uint64_t)pos_in);

        ret = generic_write_check_limits(file_out, pos_out, &count);
        if (ret)
                return ret;

        /* Don't allow overlapped copying within the same file. */
        if (inode_in == inode_out &&
            pos_out + count > pos_in &&
            pos_out < pos_in + count)
                return -EINVAL;

        *req_count = count;
        return 0;
}

/*
 * copy_file_range() differs from regular file read and write in that it
 * specifically allows return partial success.  When it does so is up to
 * the copy_file_range method.
 */
ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
                            struct file *file_out, loff_t pos_out,
                            size_t len, unsigned int flags)
{
        ssize_t ret;
        bool splice = flags & COPY_FILE_SPLICE;
        bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;

        if (flags & ~COPY_FILE_SPLICE)
                return -EINVAL;

        ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
                                       flags);
        if (unlikely(ret))
                return ret;

        ret = rw_verify_area(READ, file_in, &pos_in, len);
        if (unlikely(ret))
                return ret;

        ret = rw_verify_area(WRITE, file_out, &pos_out, len);
        if (unlikely(ret))
                return ret;

        if (len == 0)
                return 0;

        file_start_write(file_out);

        /*
         * Cloning is supported by more file systems, so we implement copy on
         * same sb using clone, but for filesystems where both clone and copy
         * are supported (e.g. nfs,cifs), we only call the copy method.
         */
        if (!splice && file_out->f_op->copy_file_range) {
                ret = file_out->f_op->copy_file_range(file_in, pos_in,
                                                      file_out, pos_out,
                                                      len, flags);
        } else if (!splice && file_in->f_op->remap_file_range && samesb) {
                ret = file_in->f_op->remap_file_range(file_in, pos_in,
                                file_out, pos_out,
                                min_t(loff_t, MAX_RW_COUNT, len),
                                REMAP_FILE_CAN_SHORTEN);
                /* fallback to splice */
                if (ret <= 0)
                        splice = true;
        } else if (samesb) {
                /* Fallback to splice for same sb copy for backward compat */
                splice = true;
        }

        file_end_write(file_out);

        if (!splice)
                goto done;

        /*
         * We can get here for same sb copy of filesystems that do not implement
         * ->copy_file_range() in case filesystem does not support clone or in
         * case filesystem supports clone but rejected the clone request (e.g.
         * because it was not block aligned).
         *
         * In both cases, fall back to kernel copy so we are able to maintain a
         * consistent story about which filesystems support copy_file_range()
         * and which filesystems do not, that will allow userspace tools to
         * make consistent desicions w.r.t using copy_file_range().
         *
         * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE
         * for server-side-copy between any two sb.
         *
         * In any case, we call do_splice_direct() and not splice_file_range(),
         * without file_start_write() held, to avoid possible deadlocks related
         * to splicing from input file, while file_start_write() is held on
         * the output file on a different sb.
         */
        ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
                               min_t(size_t, len, MAX_RW_COUNT), 0);
done:
        if (ret > 0) {
                fsnotify_access(file_in);
                add_rchar(current, ret);
                fsnotify_modify(file_out);
                add_wchar(current, ret);
        }

        inc_syscr(current);
        inc_syscw(current);

        return ret;
}
EXPORT_SYMBOL(vfs_copy_file_range);

SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
                int, fd_out, loff_t __user *, off_out,
                size_t, len, unsigned int, flags)
{
        loff_t pos_in;
        loff_t pos_out;
        struct fd f_in;
        struct fd f_out;
        ssize_t ret = -EBADF;

        f_in = fdget(fd_in);
        if (!f_in.file)
                goto out2;

        f_out = fdget(fd_out);
        if (!f_out.file)
                goto out1;

        ret = -EFAULT;
        if (off_in) {
                if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
                        goto out;
        } else {
                pos_in = f_in.file->f_pos;
        }

        if (off_out) {
                if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
                        goto out;
        } else {
                pos_out = f_out.file->f_pos;
        }

        ret = -EINVAL;
        if (flags != 0)
                goto out;

        ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
                                  flags);
        if (ret > 0) {
                pos_in += ret;
                pos_out += ret;

                if (off_in) {
                        if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
                                ret = -EFAULT;
                } else {
                        f_in.file->f_pos = pos_in;
                }

                if (off_out) {
                        if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
                                ret = -EFAULT;
                } else {
                        f_out.file->f_pos = pos_out;
                }
        }

out:
        fdput(f_out);
out1:
        fdput(f_in);
out2:
        return ret;
}

/*
 * Don't operate on ranges the page cache doesn't support, and don't exceed the
 * LFS limits.  If pos is under the limit it becomes a short access.  If it
 * exceeds the limit we return -EFBIG.
 */
int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
{
        struct inode *inode = file->f_mapping->host;
        loff_t max_size = inode->i_sb->s_maxbytes;
        loff_t limit = rlimit(RLIMIT_FSIZE);

        if (limit != RLIM_INFINITY) {
                if (pos >= limit) {
                        send_sig(SIGXFSZ, current, 0);
                        return -EFBIG;
                }
                *count = min(*count, limit - pos);
        }

        if (!(file->f_flags & O_LARGEFILE))
                max_size = MAX_NON_LFS;

        if (unlikely(pos >= max_size))
                return -EFBIG;

        *count = min(*count, max_size - pos);

        return 0;
}

/* Like generic_write_checks(), but takes size of write instead of iter. */
int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
{
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;

        if (IS_SWAPFILE(inode))
                return -ETXTBSY;

        if (!*count)
                return 0;

        if (iocb->ki_flags & IOCB_APPEND)
                iocb->ki_pos = i_size_read(inode);

        if ((iocb->ki_flags & IOCB_NOWAIT) &&
            !((iocb->ki_flags & IOCB_DIRECT) ||
              (file->f_mode & FMODE_BUF_WASYNC)))
                return -EINVAL;

        return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
}
EXPORT_SYMBOL(generic_write_checks_count);

/*
 * Performs necessary checks before doing a write
 *
 * Can adjust writing position or amount of bytes to write.
 * Returns appropriate error code that caller should return or
 * zero in case that write should be allowed.
 */
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
        loff_t count = iov_iter_count(from);
        int ret;

        ret = generic_write_checks_count(iocb, &count);
        if (ret)
                return ret;

        iov_iter_truncate(from, count);
        return iov_iter_count(from);
}
EXPORT_SYMBOL(generic_write_checks);

/*
 * Performs common checks before doing a file copy/clone
 * from @file_in to @file_out.
 */
int generic_file_rw_checks(struct file *file_in, struct file *file_out)
{
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);

        /* Don't copy dirs, pipes, sockets... */
        if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
                return -EISDIR;
        if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
                return -EINVAL;

        if (!(file_in->f_mode & FMODE_READ) ||
            !(file_out->f_mode & FMODE_WRITE) ||
            (file_out->f_flags & O_APPEND))
                return -EBADF;

        return 0;
}

































































































































































































































































  275 



























































































































































































































































































































































    2 




























































   33 










































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * workqueue.h --- work queue handling for Linux.
 */

#ifndef _LINUX_WORKQUEUE_H
#define _LINUX_WORKQUEUE_H

#include <linux/timer.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include <linux/threads.h>
#include <linux/atomic.h>
#include <linux/cpumask.h>
#include <linux/rcupdate.h>
#include <linux/workqueue_types.h>

/*
 * The first word is the work queue pointer and the flags rolled into
 * one
 */
#define work_data_bits(work) ((unsigned long *)(&(work)->data))

enum work_bits {
        WORK_STRUCT_PENDING_BIT        = 0,        /* work item is pending execution */
        WORK_STRUCT_INACTIVE_BIT,        /* work item is inactive */
        WORK_STRUCT_PWQ_BIT,                /* data points to pwq */
        WORK_STRUCT_LINKED_BIT,                /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
        WORK_STRUCT_STATIC_BIT,                /* static initializer (debugobjects) */
#endif
        WORK_STRUCT_FLAG_BITS,

        /* color for workqueue flushing */
        WORK_STRUCT_COLOR_SHIFT        = WORK_STRUCT_FLAG_BITS,
        WORK_STRUCT_COLOR_BITS        = 4,

        /*
         * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/
         * debugobjects turned off. This makes pwqs aligned to 256 bytes (512
         * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors.
         *
         * MSB
         * [ pwq pointer ] [ flush color ] [ STRUCT flags ]
         *                     4 bits        4 or 5 bits
         */
        WORK_STRUCT_PWQ_SHIFT        = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS,

        /*
         * data contains off-queue information when !WORK_STRUCT_PWQ.
         *
         * MSB
         * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ]
         *                 1 bit        4 or 5 bits
         */
        WORK_OFFQ_FLAG_SHIFT        = WORK_STRUCT_FLAG_BITS,
        WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT,
        WORK_OFFQ_FLAG_END,
        WORK_OFFQ_FLAG_BITS        = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT,

        /*
         * When a work item is off queue, the high bits encode off-queue flags
         * and the last pool it was on. Cap pool ID to 31 bits and use the
         * highest number to indicate that no pool is associated.
         */
        WORK_OFFQ_POOL_SHIFT        = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS,
        WORK_OFFQ_LEFT                = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
        WORK_OFFQ_POOL_BITS        = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
};

enum work_flags {
        WORK_STRUCT_PENDING        = 1 << WORK_STRUCT_PENDING_BIT,
        WORK_STRUCT_INACTIVE        = 1 << WORK_STRUCT_INACTIVE_BIT,
        WORK_STRUCT_PWQ                = 1 << WORK_STRUCT_PWQ_BIT,
        WORK_STRUCT_LINKED        = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
        WORK_STRUCT_STATIC        = 1 << WORK_STRUCT_STATIC_BIT,
#else
        WORK_STRUCT_STATIC        = 0,
#endif
};

enum wq_misc_consts {
        WORK_NR_COLORS                = (1 << WORK_STRUCT_COLOR_BITS),

        /* not bound to any CPU, prefer the local CPU */
        WORK_CPU_UNBOUND        = NR_CPUS,

        /* bit mask for work_busy() return values */
        WORK_BUSY_PENDING        = 1 << 0,
        WORK_BUSY_RUNNING        = 1 << 1,

        /* maximum string length for set_worker_desc() */
        WORKER_DESC_LEN                = 24,
};

/* Convenience constants - of type 'unsigned long', not 'enum'! */
#define WORK_OFFQ_CANCELING        (1ul << WORK_OFFQ_CANCELING_BIT)
#define WORK_OFFQ_POOL_NONE        ((1ul << WORK_OFFQ_POOL_BITS) - 1)
#define WORK_STRUCT_NO_POOL        (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
#define WORK_STRUCT_PWQ_MASK        (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1))

#define WORK_DATA_INIT()        ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT()        \
        ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC))

struct delayed_work {
        struct work_struct work;
        struct timer_list timer;

        /* target workqueue and CPU ->timer uses to queue ->work */
        struct workqueue_struct *wq;
        int cpu;
};

struct rcu_work {
        struct work_struct work;
        struct rcu_head rcu;

        /* target workqueue ->rcu uses to queue ->work */
        struct workqueue_struct *wq;
};

enum wq_affn_scope {
        WQ_AFFN_DFL,                        /* use system default */
        WQ_AFFN_CPU,                        /* one pod per CPU */
        WQ_AFFN_SMT,                        /* one pod poer SMT */
        WQ_AFFN_CACHE,                        /* one pod per LLC */
        WQ_AFFN_NUMA,                        /* one pod per NUMA node */
        WQ_AFFN_SYSTEM,                        /* one pod across the whole system */

        WQ_AFFN_NR_TYPES,
};

/**
 * struct workqueue_attrs - A struct for workqueue attributes.
 *
 * This can be used to change attributes of an unbound workqueue.
 */
struct workqueue_attrs {
        /**
         * @nice: nice level
         */
        int nice;

        /**
         * @cpumask: allowed CPUs
         *
         * Work items in this workqueue are affine to these CPUs and not allowed
         * to execute on other CPUs. A pool serving a workqueue must have the
         * same @cpumask.
         */
        cpumask_var_t cpumask;

        /**
         * @__pod_cpumask: internal attribute used to create per-pod pools
         *
         * Internal use only.
         *
         * Per-pod unbound worker pools are used to improve locality. Always a
         * subset of ->cpumask. A workqueue can be associated with multiple
         * worker pools with disjoint @__pod_cpumask's. Whether the enforcement
         * of a pool's @__pod_cpumask is strict depends on @affn_strict.
         */
        cpumask_var_t __pod_cpumask;

        /**
         * @affn_strict: affinity scope is strict
         *
         * If clear, workqueue will make a best-effort attempt at starting the
         * worker inside @__pod_cpumask but the scheduler is free to migrate it
         * outside.
         *
         * If set, workers are only allowed to run inside @__pod_cpumask.
         */
        bool affn_strict;

        /*
         * Below fields aren't properties of a worker_pool. They only modify how
         * :c:func:`apply_workqueue_attrs` select pools and thus don't
         * participate in pool hash calculations or equality comparisons.
         */

        /**
         * @affn_scope: unbound CPU affinity scope
         *
         * CPU pods are used to improve execution locality of unbound work
         * items. There are multiple pod types, one for each wq_affn_scope, and
         * every CPU in the system belongs to one pod in every pod type. CPUs
         * that belong to the same pod share the worker pool. For example,
         * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker
         * pool for each NUMA node.
         */
        enum wq_affn_scope affn_scope;

        /**
         * @ordered: work items must be executed one by one in queueing order
         */
        bool ordered;
};

static inline struct delayed_work *to_delayed_work(struct work_struct *work)
{
        return container_of(work, struct delayed_work, work);
}

static inline struct rcu_work *to_rcu_work(struct work_struct *work)
{
        return container_of(work, struct rcu_work, work);
}

struct execute_work {
        struct work_struct work;
};

#ifdef CONFIG_LOCKDEP
/*
 * NB: because we have to copy the lockdep_map, setting _key
 * here is required, otherwise it could get initialised to the
 * copy of the lockdep_map!
 */
#define __WORK_INIT_LOCKDEP_MAP(n, k) \
        .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
#else
#define __WORK_INIT_LOCKDEP_MAP(n, k)
#endif

#define __WORK_INITIALIZER(n, f) {                                        \
        .data = WORK_DATA_STATIC_INIT(),                                \
        .entry        = { &(n).entry, &(n).entry },                                \
        .func = (f),                                                        \
        __WORK_INIT_LOCKDEP_MAP(#n, &(n))                                \
        }

#define __DELAYED_WORK_INITIALIZER(n, f, tflags) {                        \
        .work = __WORK_INITIALIZER((n).work, (f)),                        \
        .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\
                                     (tflags) | TIMER_IRQSAFE),                \
        }

#define DECLARE_WORK(n, f)                                                \
        struct work_struct n = __WORK_INITIALIZER(n, f)

#define DECLARE_DELAYED_WORK(n, f)                                        \
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)

#define DECLARE_DEFERRABLE_WORK(n, f)                                        \
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)

#ifdef CONFIG_DEBUG_OBJECTS_WORK
extern void __init_work(struct work_struct *work, int onstack);
extern void destroy_work_on_stack(struct work_struct *work);
extern void destroy_delayed_work_on_stack(struct delayed_work *work);
static inline unsigned int work_static(struct work_struct *work)
{
        return *work_data_bits(work) & WORK_STRUCT_STATIC;
}
#else
static inline void __init_work(struct work_struct *work, int onstack) { }
static inline void destroy_work_on_stack(struct work_struct *work) { }
static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
static inline unsigned int work_static(struct work_struct *work) { return 0; }
#endif

/*
 * initialize all of a work item in one go
 *
 * NOTE! No point in using "atomic_long_set()": using a direct
 * assignment of the work data initializer allows the compiler
 * to generate better code.
 */
#ifdef CONFIG_LOCKDEP
#define __INIT_WORK_KEY(_work, _func, _onstack, _key)                        \
        do {                                                                \
                __init_work((_work), _onstack);                                \
                (_work)->data = (atomic_long_t) WORK_DATA_INIT();        \
                lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                (_work)->func = (_func);                                \
        } while (0)
#else
#define __INIT_WORK_KEY(_work, _func, _onstack, _key)                        \
        do {                                                                \
                __init_work((_work), _onstack);                                \
                (_work)->data = (atomic_long_t) WORK_DATA_INIT();        \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                (_work)->func = (_func);                                \
        } while (0)
#endif

#define __INIT_WORK(_work, _func, _onstack)                                \
        do {                                                                \
                static __maybe_unused struct lock_class_key __key;        \
                                                                        \
                __INIT_WORK_KEY(_work, _func, _onstack, &__key);        \
        } while (0)

#define INIT_WORK(_work, _func)                                                \
        __INIT_WORK((_work), (_func), 0)

#define INIT_WORK_ONSTACK(_work, _func)                                        \
        __INIT_WORK((_work), (_func), 1)

#define INIT_WORK_ONSTACK_KEY(_work, _func, _key)                        \
        __INIT_WORK_KEY((_work), (_func), 1, _key)

#define __INIT_DELAYED_WORK(_work, _func, _tflags)                        \
        do {                                                                \
                INIT_WORK(&(_work)->work, (_func));                        \
                __init_timer(&(_work)->timer,                                \
                             delayed_work_timer_fn,                        \
                             (_tflags) | TIMER_IRQSAFE);                \
        } while (0)

#define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)                \
        do {                                                                \
                INIT_WORK_ONSTACK(&(_work)->work, (_func));                \
                __init_timer_on_stack(&(_work)->timer,                        \
                                      delayed_work_timer_fn,                \
                                      (_tflags) | TIMER_IRQSAFE);        \
        } while (0)

#define INIT_DELAYED_WORK(_work, _func)                                        \
        __INIT_DELAYED_WORK(_work, _func, 0)

#define INIT_DELAYED_WORK_ONSTACK(_work, _func)                                \
        __INIT_DELAYED_WORK_ONSTACK(_work, _func, 0)

#define INIT_DEFERRABLE_WORK(_work, _func)                                \
        __INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE)

#define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func)                        \
        __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)

#define INIT_RCU_WORK(_work, _func)                                        \
        INIT_WORK(&(_work)->work, (_func))

#define INIT_RCU_WORK_ONSTACK(_work, _func)                                \
        INIT_WORK_ONSTACK(&(_work)->work, (_func))

/**
 * work_pending - Find out whether a work item is currently pending
 * @work: The work item in question
 */
#define work_pending(work) \
        test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))

/**
 * delayed_work_pending - Find out whether a delayable work item is currently
 * pending
 * @w: The work item in question
 */
#define delayed_work_pending(w) \
        work_pending(&(w)->work)

/*
 * Workqueue flags and constants.  For details, please refer to
 * Documentation/core-api/workqueue.rst.
 */
enum wq_flags {
        WQ_BH                        = 1 << 0, /* execute in bottom half (softirq) context */
        WQ_UNBOUND                = 1 << 1, /* not bound to any cpu */
        WQ_FREEZABLE                = 1 << 2, /* freeze during suspend */
        WQ_MEM_RECLAIM                = 1 << 3, /* may be used for memory reclaim */
        WQ_HIGHPRI                = 1 << 4, /* high priority */
        WQ_CPU_INTENSIVE        = 1 << 5, /* cpu intensive workqueue */
        WQ_SYSFS                = 1 << 6, /* visible in sysfs, see workqueue_sysfs_register() */

        /*
         * Per-cpu workqueues are generally preferred because they tend to
         * show better performance thanks to cache locality.  Per-cpu
         * workqueues exclude the scheduler from choosing the CPU to
         * execute the worker threads, which has an unfortunate side effect
         * of increasing power consumption.
         *
         * The scheduler considers a CPU idle if it doesn't have any task
         * to execute and tries to keep idle cores idle to conserve power;
         * however, for example, a per-cpu work item scheduled from an
         * interrupt handler on an idle CPU will force the scheduler to
         * execute the work item on that CPU breaking the idleness, which in
         * turn may lead to more scheduling choices which are sub-optimal
         * in terms of power consumption.
         *
         * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
         * but become unbound if workqueue.power_efficient kernel param is
         * specified.  Per-cpu workqueues which are identified to
         * contribute significantly to power-consumption are identified and
         * marked with this flag and enabling the power_efficient mode
         * leads to noticeable power saving at the cost of small
         * performance disadvantage.
         *
         * http://thread.gmane.org/gmane.linux.kernel/1480396
         */
        WQ_POWER_EFFICIENT        = 1 << 7,

        __WQ_DESTROYING                = 1 << 15, /* internal: workqueue is destroying */
        __WQ_DRAINING                = 1 << 16, /* internal: workqueue is draining */
        __WQ_ORDERED                = 1 << 17, /* internal: workqueue is ordered */
        __WQ_LEGACY                = 1 << 18, /* internal: create*_workqueue() */

        /* BH wq only allows the following flags */
        __WQ_BH_ALLOWS                = WQ_BH | WQ_HIGHPRI,
};

enum wq_consts {
        WQ_MAX_ACTIVE                = 512,          /* I like 512, better ideas? */
        WQ_UNBOUND_MAX_ACTIVE        = WQ_MAX_ACTIVE,
        WQ_DFL_ACTIVE                = WQ_MAX_ACTIVE / 2,

        /*
         * Per-node default cap on min_active. Unless explicitly set, min_active
         * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see
         * workqueue_struct->min_active definition.
         */
        WQ_DFL_MIN_ACTIVE        = 8,
};

/*
 * System-wide workqueues which are always present.
 *
 * system_wq is the one used by schedule[_delayed]_work[_on]().
 * Multi-CPU multi-threaded.  There are users which expect relatively
 * short queue flush time.  Don't queue works which can run for too
 * long.
 *
 * system_highpri_wq is similar to system_wq but for work items which
 * require WQ_HIGHPRI.
 *
 * system_long_wq is similar to system_wq but may host long running
 * works.  Queue flushing might take relatively long.
 *
 * system_unbound_wq is unbound workqueue.  Workers are not bound to
 * any specific CPU, not concurrency managed, and all queued works are
 * executed immediately as long as max_active limit is not reached and
 * resources are available.
 *
 * system_freezable_wq is equivalent to system_wq except that it's
 * freezable.
 *
 * *_power_efficient_wq are inclined towards saving power and converted
 * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
 * they are same as their non-power-efficient counterparts - e.g.
 * system_power_efficient_wq is identical to system_wq if
 * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
 *
 * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
 * are executed in the queueing CPU's BH context in the queueing order.
 */
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_highpri_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
extern struct workqueue_struct *system_bh_wq;
extern struct workqueue_struct *system_bh_highpri_wq;

void workqueue_softirq_action(bool highpri);
void workqueue_softirq_dead(unsigned int cpu);

/**
 * alloc_workqueue - allocate a workqueue
 * @fmt: printf format for the name of the workqueue
 * @flags: WQ_* flags
 * @max_active: max in-flight work items, 0 for default
 * remaining args: args for @fmt
 *
 * For a per-cpu workqueue, @max_active limits the number of in-flight work
 * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
 * executing at most one work item for the workqueue.
 *
 * For unbound workqueues, @max_active limits the number of in-flight work items
 * for the whole system. e.g. @max_active of 16 indicates that that there can be
 * at most 16 work items executing for the workqueue in the whole system.
 *
 * As sharing the same active counter for an unbound workqueue across multiple
 * NUMA nodes can be expensive, @max_active is distributed to each NUMA node
 * according to the proportion of the number of online CPUs and enforced
 * independently.
 *
 * Depending on online CPU distribution, a node may end up with per-node
 * max_active which is significantly lower than @max_active, which can lead to
 * deadlocks if the per-node concurrency limit is lower than the maximum number
 * of interdependent work items for the workqueue.
 *
 * To guarantee forward progress regardless of online CPU distribution, the
 * concurrency limit on every node is guaranteed to be equal to or greater than
 * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means
 * that the sum of per-node max_active's may be larger than @max_active.
 *
 * For detailed information on %WQ_* flags, please refer to
 * Documentation/core-api/workqueue.rst.
 *
 * RETURNS:
 * Pointer to the allocated workqueue on success, %NULL on failure.
 */
__printf(1, 4) struct workqueue_struct *
alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);

/**
 * alloc_ordered_workqueue - allocate an ordered workqueue
 * @fmt: printf format for the name of the workqueue
 * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
 * @args: args for @fmt
 *
 * Allocate an ordered workqueue.  An ordered workqueue executes at
 * most one work item at any given time in the queued order.  They are
 * implemented as unbound workqueues with @max_active of one.
 *
 * RETURNS:
 * Pointer to the allocated workqueue on success, %NULL on failure.
 */
#define alloc_ordered_workqueue(fmt, flags, args...)                        \
        alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)

#define create_workqueue(name)                                                \
        alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
#define create_freezable_workqueue(name)                                \
        alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |        \
                        WQ_MEM_RECLAIM, 1, (name))
#define create_singlethread_workqueue(name)                                \
        alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)

#define from_work(var, callback_work, work_fieldname)        \
        container_of(callback_work, typeof(*var), work_fieldname)

extern void destroy_workqueue(struct workqueue_struct *wq);

struct workqueue_attrs *alloc_workqueue_attrs(void);
void free_workqueue_attrs(struct workqueue_attrs *attrs);
int apply_workqueue_attrs(struct workqueue_struct *wq,
                          const struct workqueue_attrs *attrs);
extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask);

extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
                        struct work_struct *work);
extern bool queue_work_node(int node, struct workqueue_struct *wq,
                            struct work_struct *work);
extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
                        struct delayed_work *work, unsigned long delay);
extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
                        struct delayed_work *dwork, unsigned long delay);
extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);

extern void __flush_workqueue(struct workqueue_struct *wq);
extern void drain_workqueue(struct workqueue_struct *wq);

extern int schedule_on_each_cpu(work_func_t func);

int execute_in_process_context(work_func_t fn, struct execute_work *);

extern bool flush_work(struct work_struct *work);
extern bool cancel_work(struct work_struct *work);
extern bool cancel_work_sync(struct work_struct *work);

extern bool flush_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work_sync(struct delayed_work *dwork);

extern bool flush_rcu_work(struct rcu_work *rwork);

extern void workqueue_set_max_active(struct workqueue_struct *wq,
                                     int max_active);
extern void workqueue_set_min_active(struct workqueue_struct *wq,
                                     int min_active);
extern struct work_struct *current_work(void);
extern bool current_is_workqueue_rescuer(void);
extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
extern void show_all_workqueues(void);
extern void show_freezable_workqueues(void);
extern void show_one_workqueue(struct workqueue_struct *wq);
extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);

/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns %false if @work was already on a queue, %true otherwise.
 *
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
 *
 * Memory-ordering properties:  If it returns %true, guarantees that all stores
 * preceding the call to queue_work() in the program order will be visible from
 * the CPU which will execute @work by the time such work executes, e.g.,
 *
 * { x is initially 0 }
 *
 *   CPU0                                CPU1
 *
 *   WRITE_ONCE(x, 1);                        [ @work is being executed ]
 *   r0 = queue_work(wq, work);                  r1 = READ_ONCE(x);
 *
 * Forbids: r0 == true && r1 == 0
 */
static inline bool queue_work(struct workqueue_struct *wq,
                              struct work_struct *work)
{
        return queue_work_on(WORK_CPU_UNBOUND, wq, work);
}

/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
 * @dwork: delayable work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
 */
static inline bool queue_delayed_work(struct workqueue_struct *wq,
                                      struct delayed_work *dwork,
                                      unsigned long delay)
{
        return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
}

/**
 * mod_delayed_work - modify delay of or queue a delayed work
 * @wq: workqueue to use
 * @dwork: work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * mod_delayed_work_on() on local CPU.
 */
static inline bool mod_delayed_work(struct workqueue_struct *wq,
                                    struct delayed_work *dwork,
                                    unsigned long delay)
{
        return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
}

/**
 * schedule_work_on - put work task on a specific cpu
 * @cpu: cpu to put the work task on
 * @work: job to be done
 *
 * This puts a job on a specific cpu
 */
static inline bool schedule_work_on(int cpu, struct work_struct *work)
{
        return queue_work_on(cpu, system_wq, work);
}

/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * Returns %false if @work was already on the kernel-global workqueue and
 * %true otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.
 *
 * Shares the same memory-ordering properties of queue_work(), cf. the
 * DocBook header of queue_work().
 */
static inline bool schedule_work(struct work_struct *work)
{
        return queue_work(system_wq, work);
}

/*
 * Detect attempt to flush system-wide workqueues at compile time when possible.
 * Warn attempt to flush system-wide workqueues at runtime.
 *
 * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
 * for reasons and steps for converting system-wide workqueues into local workqueues.
 */
extern void __warn_flushing_systemwide_wq(void)
        __compiletime_warning("Please avoid flushing system-wide workqueues.");

/* Please stop using this function, for this function will be removed in near future. */
#define flush_scheduled_work()                                                \
({                                                                        \
        __warn_flushing_systemwide_wq();                                \
        __flush_workqueue(system_wq);                                        \
})

#define flush_workqueue(wq)                                                \
({                                                                        \
        struct workqueue_struct *_wq = (wq);                                \
                                                                        \
        if ((__builtin_constant_p(_wq == system_wq) &&                        \
             _wq == system_wq) ||                                        \
            (__builtin_constant_p(_wq == system_highpri_wq) &&                \
             _wq == system_highpri_wq) ||                                \
            (__builtin_constant_p(_wq == system_long_wq) &&                \
             _wq == system_long_wq) ||                                        \
            (__builtin_constant_p(_wq == system_unbound_wq) &&                \
             _wq == system_unbound_wq) ||                                \
            (__builtin_constant_p(_wq == system_freezable_wq) &&        \
             _wq == system_freezable_wq) ||                                \
            (__builtin_constant_p(_wq == system_power_efficient_wq) &&        \
             _wq == system_power_efficient_wq) ||                        \
            (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \
             _wq == system_freezable_power_efficient_wq))                \
                __warn_flushing_systemwide_wq();                        \
        __flush_workqueue(_wq);                                                \
})

/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * @cpu: cpu to use
 * @dwork: job to be done
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue on the specified CPU.
 */
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
                                            unsigned long delay)
{
        return queue_delayed_work_on(cpu, system_wq, dwork, delay);
}

/**
 * schedule_delayed_work - put work task in global workqueue after delay
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
static inline bool schedule_delayed_work(struct delayed_work *dwork,
                                         unsigned long delay)
{
        return queue_delayed_work(system_wq, dwork, delay);
}

#ifndef CONFIG_SMP
static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
{
        return fn(arg);
}
static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
{
        return fn(arg);
}
#else
long work_on_cpu_key(int cpu, long (*fn)(void *),
                     void *arg, struct lock_class_key *key);
/*
 * A new key is defined for each caller to make sure the work
 * associated with the function doesn't share its locking class.
 */
#define work_on_cpu(_cpu, _fn, _arg)                        \
({                                                        \
        static struct lock_class_key __key;                \
                                                        \
        work_on_cpu_key(_cpu, _fn, _arg, &__key);        \
})

long work_on_cpu_safe_key(int cpu, long (*fn)(void *),
                          void *arg, struct lock_class_key *key);

/*
 * A new key is defined for each caller to make sure the work
 * associated with the function doesn't share its locking class.
 */
#define work_on_cpu_safe(_cpu, _fn, _arg)                \
({                                                        \
        static struct lock_class_key __key;                \
                                                        \
        work_on_cpu_safe_key(_cpu, _fn, _arg, &__key);        \
})
#endif /* CONFIG_SMP */

#ifdef CONFIG_FREEZER
extern void freeze_workqueues_begin(void);
extern bool freeze_workqueues_busy(void);
extern void thaw_workqueues(void);
#endif /* CONFIG_FREEZER */

#ifdef CONFIG_SYSFS
int workqueue_sysfs_register(struct workqueue_struct *wq);
#else        /* CONFIG_SYSFS */
static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
{ return 0; }
#endif        /* CONFIG_SYSFS */

#ifdef CONFIG_WQ_WATCHDOG
void wq_watchdog_touch(int cpu);
#else        /* CONFIG_WQ_WATCHDOG */
static inline void wq_watchdog_touch(int cpu) { }
#endif        /* CONFIG_WQ_WATCHDOG */

#ifdef CONFIG_SMP
int workqueue_prepare_cpu(unsigned int cpu);
int workqueue_online_cpu(unsigned int cpu);
int workqueue_offline_cpu(unsigned int cpu);
#endif

void __init workqueue_init_early(void);
void __init workqueue_init(void);
void __init workqueue_init_topology(void);

#endif





























  284 





  284 



















   10 

   10 



    2 


   10 










































    2 

    2 




    2 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Lock-less NULL terminated single linked list
 *
 * The basic atomic operation of this list is cmpxchg on long.  On
 * architectures that don't have NMI-safe cmpxchg implementation, the
 * list can NOT be used in NMI handlers.  So code that uses the list in
 * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
 *
 * Copyright 2010,2011 Intel Corp.
 *   Author: Huang Ying <ying.huang@intel.com>
 */
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/llist.h>


/**
 * llist_add_batch - add several linked entries in batch
 * @new_first:        first entry in batch to be added
 * @new_last:        last entry in batch to be added
 * @head:        the head for your lock-less list
 *
 * Return whether list is empty before adding.
 */
bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
                     struct llist_head *head)
{
        struct llist_node *first = READ_ONCE(head->first);

        do {
                new_last->next = first;
        } while (!try_cmpxchg(&head->first, &first, new_first));

        return !first;
}
EXPORT_SYMBOL_GPL(llist_add_batch);

/**
 * llist_del_first - delete the first entry of lock-less list
 * @head:        the head for your lock-less list
 *
 * If list is empty, return NULL, otherwise, return the first entry
 * deleted, this is the newest added one.
 *
 * Only one llist_del_first user can be used simultaneously with
 * multiple llist_add users without lock.  Because otherwise
 * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
 * llist_add) sequence in another user may change @head->first->next,
 * but keep @head->first.  If multiple consumers are needed, please
 * use llist_del_all or use lock between consumers.
 */
struct llist_node *llist_del_first(struct llist_head *head)
{
        struct llist_node *entry, *next;

        entry = smp_load_acquire(&head->first);
        do {
                if (entry == NULL)
                        return NULL;
                next = READ_ONCE(entry->next);
        } while (!try_cmpxchg(&head->first, &entry, next));

        return entry;
}
EXPORT_SYMBOL_GPL(llist_del_first);

/**
 * llist_del_first_this - delete given entry of lock-less list if it is first
 * @head:        the head for your lock-less list
 * @this:        a list entry.
 *
 * If head of the list is given entry, delete and return %true else
 * return %false.
 *
 * Multiple callers can safely call this concurrently with multiple
 * llist_add() callers, providing all the callers offer a different @this.
 */
bool llist_del_first_this(struct llist_head *head,
                          struct llist_node *this)
{
        struct llist_node *entry, *next;

        /* acquire ensures orderig wrt try_cmpxchg() is llist_del_first() */
        entry = smp_load_acquire(&head->first);
        do {
                if (entry != this)
                        return false;
                next = READ_ONCE(entry->next);
        } while (!try_cmpxchg(&head->first, &entry, next));

        return true;
}
EXPORT_SYMBOL_GPL(llist_del_first_this);

/**
 * llist_reverse_order - reverse order of a llist chain
 * @head:        first item of the list to be reversed
 *
 * Reverse the order of a chain of llist entries and return the
 * new first entry.
 */
struct llist_node *llist_reverse_order(struct llist_node *head)
{
        struct llist_node *new_head = NULL;

        while (head) {
                struct llist_node *tmp = head;
                head = head->next;
                tmp->next = new_head;
                new_head = tmp;
        }

        return new_head;
}
EXPORT_SYMBOL_GPL(llist_reverse_order);





























































































    1 
    1 

    1 












































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
/*
 * Copyright (c) 2016 Intel Corporation
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that copyright
 * notice and this permission notice appear in supporting documentation, and
 * that the name of the copyright holders not be used in advertising or
 * publicity pertaining to distribution of the software without specific,
 * written prior permission.  The copyright holders make no representations
 * about the suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */

#include <linux/export.h>

#include <drm/drm_bridge.h>
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_encoder.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

/**
 * DOC: overview
 *
 * Encoders represent the connecting element between the CRTC (as the overall
 * pixel pipeline, represented by &struct drm_crtc) and the connectors (as the
 * generic sink entity, represented by &struct drm_connector). An encoder takes
 * pixel data from a CRTC and converts it to a format suitable for any attached
 * connector. Encoders are objects exposed to userspace, originally to allow
 * userspace to infer cloning and connector/CRTC restrictions. Unfortunately
 * almost all drivers get this wrong, making the uabi pretty much useless. On
 * top of that the exposed restrictions are too simple for today's hardware, and
 * the recommended way to infer restrictions is by using the
 * DRM_MODE_ATOMIC_TEST_ONLY flag for the atomic IOCTL.
 *
 * Otherwise encoders aren't used in the uapi at all (any modeset request from
 * userspace directly connects a connector with a CRTC), drivers are therefore
 * free to use them however they wish. Modeset helper libraries make strong use
 * of encoders to facilitate code sharing. But for more complex settings it is
 * usually better to move shared code into a separate &drm_bridge. Compared to
 * encoders, bridges also have the benefit of being purely an internal
 * abstraction since they are not exposed to userspace at all.
 *
 * Encoders are initialized with drm_encoder_init() and cleaned up using
 * drm_encoder_cleanup().
 */
static const struct drm_prop_enum_list drm_encoder_enum_list[] = {
        { DRM_MODE_ENCODER_NONE, "None" },
        { DRM_MODE_ENCODER_DAC, "DAC" },
        { DRM_MODE_ENCODER_TMDS, "TMDS" },
        { DRM_MODE_ENCODER_LVDS, "LVDS" },
        { DRM_MODE_ENCODER_TVDAC, "TV" },
        { DRM_MODE_ENCODER_VIRTUAL, "Virtual" },
        { DRM_MODE_ENCODER_DSI, "DSI" },
        { DRM_MODE_ENCODER_DPMST, "DP MST" },
        { DRM_MODE_ENCODER_DPI, "DPI" },
};

int drm_encoder_register_all(struct drm_device *dev)
{
        struct drm_encoder *encoder;
        int ret = 0;

        drm_for_each_encoder(encoder, dev) {
                drm_debugfs_encoder_add(encoder);

                if (encoder->funcs && encoder->funcs->late_register)
                        ret = encoder->funcs->late_register(encoder);
                if (ret)
                        return ret;
        }

        return 0;
}

void drm_encoder_unregister_all(struct drm_device *dev)
{
        struct drm_encoder *encoder;

        drm_for_each_encoder(encoder, dev) {
                if (encoder->funcs && encoder->funcs->early_unregister)
                        encoder->funcs->early_unregister(encoder);
                drm_debugfs_encoder_remove(encoder);
        }
}

__printf(5, 0)
static int __drm_encoder_init(struct drm_device *dev,
                              struct drm_encoder *encoder,
                              const struct drm_encoder_funcs *funcs,
                              int encoder_type, const char *name, va_list ap)
{
        int ret;

        /* encoder index is used with 32bit bitmasks */
        if (WARN_ON(dev->mode_config.num_encoder >= 32))
                return -EINVAL;

        ret = drm_mode_object_add(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER);
        if (ret)
                return ret;

        encoder->dev = dev;
        encoder->encoder_type = encoder_type;
        encoder->funcs = funcs;
        if (name) {
                encoder->name = kvasprintf(GFP_KERNEL, name, ap);
        } else {
                encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
                                          drm_encoder_enum_list[encoder_type].name,
                                          encoder->base.id);
        }
        if (!encoder->name) {
                ret = -ENOMEM;
                goto out_put;
        }

        INIT_LIST_HEAD(&encoder->bridge_chain);
        list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
        encoder->index = dev->mode_config.num_encoder++;

out_put:
        if (ret)
                drm_mode_object_unregister(dev, &encoder->base);

        return ret;
}

/**
 * drm_encoder_init - Init a preallocated encoder
 * @dev: drm device
 * @encoder: the encoder to init
 * @funcs: callbacks for this encoder
 * @encoder_type: user visible type of the encoder
 * @name: printf style format string for the encoder name, or NULL for default name
 *
 * Initializes a preallocated encoder. Encoder should be subclassed as part of
 * driver encoder objects. At driver unload time the driver's
 * &drm_encoder_funcs.destroy hook should call drm_encoder_cleanup() and kfree()
 * the encoder structure. The encoder structure should not be allocated with
 * devm_kzalloc().
 *
 * Note: consider using drmm_encoder_alloc() or drmm_encoder_init()
 * instead of drm_encoder_init() to let the DRM managed resource
 * infrastructure take care of cleanup and deallocation.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drm_encoder_init(struct drm_device *dev,
                     struct drm_encoder *encoder,
                     const struct drm_encoder_funcs *funcs,
                     int encoder_type, const char *name, ...)
{
        va_list ap;
        int ret;

        WARN_ON(!funcs->destroy);

        va_start(ap, name);
        ret = __drm_encoder_init(dev, encoder, funcs, encoder_type, name, ap);
        va_end(ap);

        return ret;
}
EXPORT_SYMBOL(drm_encoder_init);

/**
 * drm_encoder_cleanup - cleans up an initialised encoder
 * @encoder: encoder to cleanup
 *
 * Cleans up the encoder but doesn't free the object.
 */
void drm_encoder_cleanup(struct drm_encoder *encoder)
{
        struct drm_device *dev = encoder->dev;
        struct drm_bridge *bridge, *next;

        /* Note that the encoder_list is considered to be static; should we
         * remove the drm_encoder at runtime we would have to decrement all
         * the indices on the drm_encoder after us in the encoder_list.
         */

        list_for_each_entry_safe(bridge, next, &encoder->bridge_chain,
                                 chain_node)
                drm_bridge_detach(bridge);

        drm_mode_object_unregister(dev, &encoder->base);
        kfree(encoder->name);
        list_del(&encoder->head);
        dev->mode_config.num_encoder--;

        memset(encoder, 0, sizeof(*encoder));
}
EXPORT_SYMBOL(drm_encoder_cleanup);

static void drmm_encoder_alloc_release(struct drm_device *dev, void *ptr)
{
        struct drm_encoder *encoder = ptr;

        if (WARN_ON(!encoder->dev))
                return;

        drm_encoder_cleanup(encoder);
}

__printf(5, 0)
static int __drmm_encoder_init(struct drm_device *dev,
                               struct drm_encoder *encoder,
                               const struct drm_encoder_funcs *funcs,
                               int encoder_type,
                               const char *name,
                               va_list args)
{
        int ret;

        if (drm_WARN_ON(dev, funcs && funcs->destroy))
                return -EINVAL;

        ret = __drm_encoder_init(dev, encoder, funcs, encoder_type, name, args);
        if (ret)
                return ret;

        ret = drmm_add_action_or_reset(dev, drmm_encoder_alloc_release, encoder);
        if (ret)
                return ret;

        return 0;
}

void *__drmm_encoder_alloc(struct drm_device *dev, size_t size, size_t offset,
                           const struct drm_encoder_funcs *funcs,
                           int encoder_type, const char *name, ...)
{
        void *container;
        struct drm_encoder *encoder;
        va_list ap;
        int ret;

        container = drmm_kzalloc(dev, size, GFP_KERNEL);
        if (!container)
                return ERR_PTR(-ENOMEM);

        encoder = container + offset;

        va_start(ap, name);
        ret = __drmm_encoder_init(dev, encoder, funcs, encoder_type, name, ap);
        va_end(ap);
        if (ret)
                return ERR_PTR(ret);

        return container;
}
EXPORT_SYMBOL(__drmm_encoder_alloc);

/**
 * drmm_encoder_init - Initialize a preallocated encoder
 * @dev: drm device
 * @encoder: the encoder to init
 * @funcs: callbacks for this encoder (optional)
 * @encoder_type: user visible type of the encoder
 * @name: printf style format string for the encoder name, or NULL for default name
 *
 * Initializes a preallocated encoder. Encoder should be subclassed as
 * part of driver encoder objects. Cleanup is automatically handled
 * through registering drm_encoder_cleanup() with drmm_add_action(). The
 * encoder structure should be allocated with drmm_kzalloc().
 *
 * The @drm_encoder_funcs.destroy hook must be NULL.
 *
 * Returns:
 * Zero on success, error code on failure.
 */
int drmm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder,
                      const struct drm_encoder_funcs *funcs,
                      int encoder_type, const char *name, ...)
{
        va_list ap;
        int ret;

        va_start(ap, name);
        ret = __drmm_encoder_init(dev, encoder, funcs, encoder_type, name, ap);
        va_end(ap);
        if (ret)
                return ret;

        return 0;
}
EXPORT_SYMBOL(drmm_encoder_init);

static struct drm_crtc *drm_encoder_get_crtc(struct drm_encoder *encoder)
{
        struct drm_connector *connector;
        struct drm_device *dev = encoder->dev;
        bool uses_atomic = false;
        struct drm_connector_list_iter conn_iter;

        /* For atomic drivers only state objects are synchronously updated and
         * protected by modeset locks, so check those first. */
        drm_connector_list_iter_begin(dev, &conn_iter);
        drm_for_each_connector_iter(connector, &conn_iter) {
                if (!connector->state)
                        continue;

                uses_atomic = true;

                if (connector->state->best_encoder != encoder)
                        continue;

                drm_connector_list_iter_end(&conn_iter);
                return connector->state->crtc;
        }
        drm_connector_list_iter_end(&conn_iter);

        /* Don't return stale data (e.g. pending async disable). */
        if (uses_atomic)
                return NULL;

        return encoder->crtc;
}

int drm_mode_getencoder(struct drm_device *dev, void *data,
                        struct drm_file *file_priv)
{
        struct drm_mode_get_encoder *enc_resp = data;
        struct drm_encoder *encoder;
        struct drm_crtc *crtc;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EOPNOTSUPP;

        encoder = drm_encoder_find(dev, file_priv, enc_resp->encoder_id);
        if (!encoder)
                return -ENOENT;

        drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
        crtc = drm_encoder_get_crtc(encoder);
        if (crtc && drm_lease_held(file_priv, crtc->base.id))
                enc_resp->crtc_id = crtc->base.id;
        else
                enc_resp->crtc_id = 0;
        drm_modeset_unlock(&dev->mode_config.connection_mutex);

        enc_resp->encoder_type = encoder->encoder_type;
        enc_resp->encoder_id = encoder->base.id;
        enc_resp->possible_crtcs = drm_lease_filter_crtcs(file_priv,
                                                          encoder->possible_crtcs);
        enc_resp->possible_clones = encoder->possible_clones;

        return 0;
}














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NF_CONNTRACK_COMMON_H
#define _NF_CONNTRACK_COMMON_H

#include <linux/refcount.h>
#include <uapi/linux/netfilter/nf_conntrack_common.h>

struct ip_conntrack_stat {
        unsigned int found;
        unsigned int invalid;
        unsigned int insert;
        unsigned int insert_failed;
        unsigned int clash_resolve;
        unsigned int drop;
        unsigned int early_drop;
        unsigned int error;
        unsigned int expect_new;
        unsigned int expect_create;
        unsigned int expect_delete;
        unsigned int search_restart;
        unsigned int chaintoolong;
};

#define NFCT_INFOMASK        7UL
#define NFCT_PTRMASK        ~(NFCT_INFOMASK)

struct nf_conntrack {
        refcount_t use;
};

void nf_conntrack_destroy(struct nf_conntrack *nfct);

/* like nf_ct_put, but without module dependency on nf_conntrack */
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{
        if (nfct && refcount_dec_and_test(&nfct->use))
                nf_conntrack_destroy(nfct);
}
static inline void nf_conntrack_get(struct nf_conntrack *nfct)
{
        if (nfct)
                refcount_inc(&nfct->use);
}

#endif /* _NF_CONNTRACK_COMMON_H */













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 






















   13 
   13 



















   13 




   14 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
 */

/* Devmaps primary use is as a backend map for XDP BPF helper call
 * bpf_redirect_map(). Because XDP is mostly concerned with performance we
 * spent some effort to ensure the datapath with redirect maps does not use
 * any locking. This is a quick note on the details.
 *
 * We have three possible paths to get into the devmap control plane bpf
 * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall
 * will invoke an update, delete, or lookup operation. To ensure updates and
 * deletes appear atomic from the datapath side xchg() is used to modify the
 * netdev_map array. Then because the datapath does a lookup into the netdev_map
 * array (read-only) from an RCU critical section we use call_rcu() to wait for
 * an rcu grace period before free'ing the old data structures. This ensures the
 * datapath always has a valid copy. However, the datapath does a "flush"
 * operation that pushes any pending packets in the driver outside the RCU
 * critical section. Each bpf_dtab_netdev tracks these pending operations using
 * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed  until
 * this list is empty, indicating outstanding flush operations have completed.
 *
 * BPF syscalls may race with BPF program calls on any of the update, delete
 * or lookup operations. As noted above the xchg() operation also keep the
 * netdev_map consistent in this case. From the devmap side BPF programs
 * calling into these operations are the same as multiple user space threads
 * making system calls.
 *
 * Finally, any of the above may race with a netdev_unregister notifier. The
 * unregister notifier must search for net devices in the map structure that
 * contain a reference to the net device and remove them. This is a two step
 * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
 * check to see if the ifindex is the same as the net_device being removed.
 * When removing the dev a cmpxchg() is used to ensure the correct dev is
 * removed, in the case of a concurrent update or delete operation it is
 * possible that the initially referenced dev is no longer in the map. As the
 * notifier hook walks the map we know that new dev references can not be
 * added by the user because core infrastructure ensures dev_get_by_index()
 * calls will fail at this point.
 *
 * The devmap_hash type is a map type which interprets keys as ifindexes and
 * indexes these using a hashmap. This allows maps that use ifindex as key to be
 * densely packed instead of having holes in the lookup array for unused
 * ifindexes. The setup and packet enqueue/send code is shared between the two
 * types of devmap; only the lookup and insertion is different.
 */
#include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>

#define DEV_CREATE_FLAG_MASK \
        (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)

struct xdp_dev_bulk_queue {
        struct xdp_frame *q[DEV_MAP_BULK_SIZE];
        struct list_head flush_node;
        struct net_device *dev;
        struct net_device *dev_rx;
        struct bpf_prog *xdp_prog;
        unsigned int count;
};

struct bpf_dtab_netdev {
        struct net_device *dev; /* must be first member, due to tracepoint */
        struct hlist_node index_hlist;
        struct bpf_prog *xdp_prog;
        struct rcu_head rcu;
        unsigned int idx;
        struct bpf_devmap_val val;
};

struct bpf_dtab {
        struct bpf_map map;
        struct bpf_dtab_netdev __rcu **netdev_map; /* DEVMAP type only */
        struct list_head list;

        /* these are only used for DEVMAP_HASH type maps */
        struct hlist_head *dev_index_head;
        spinlock_t index_lock;
        unsigned int items;
        u32 n_buckets;
};

static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);

static struct hlist_head *dev_map_create_hash(unsigned int entries,
                                              int numa_node)
{
        int i;
        struct hlist_head *hash;

        hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node);
        if (hash != NULL)
                for (i = 0; i < entries; i++)
                        INIT_HLIST_HEAD(&hash[i]);

        return hash;
}

static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
                                                    int idx)
{
        return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
}

static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
        u32 valsize = attr->value_size;

        /* check sanity of attributes. 2 value sizes supported:
         * 4 bytes: ifindex
         * 8 bytes: ifindex + prog fd
         */
        if (attr->max_entries == 0 || attr->key_size != 4 ||
            (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
             valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
            attr->map_flags & ~DEV_CREATE_FLAG_MASK)
                return -EINVAL;

        /* Lookup returns a pointer straight to dev->ifindex, so make sure the
         * verifier prevents writes from the BPF side
         */
        attr->map_flags |= BPF_F_RDONLY_PROG;


        bpf_map_init_from_attr(&dtab->map, attr);

        if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
                /* hash table size must be power of 2; roundup_pow_of_two() can
                 * overflow into UB on 32-bit arches, so check that first
                 */
                if (dtab->map.max_entries > 1UL << 31)
                        return -EINVAL;

                dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);

                dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
                                                           dtab->map.numa_node);
                if (!dtab->dev_index_head)
                        return -ENOMEM;

                spin_lock_init(&dtab->index_lock);
        } else {
                dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries *
                                                      sizeof(struct bpf_dtab_netdev *),
                                                      dtab->map.numa_node);
                if (!dtab->netdev_map)
                        return -ENOMEM;
        }

        return 0;
}

static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
        struct bpf_dtab *dtab;
        int err;

        dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE);
        if (!dtab)
                return ERR_PTR(-ENOMEM);

        err = dev_map_init_map(dtab, attr);
        if (err) {
                bpf_map_area_free(dtab);
                return ERR_PTR(err);
        }

        spin_lock(&dev_map_lock);
        list_add_tail_rcu(&dtab->list, &dev_map_list);
        spin_unlock(&dev_map_lock);

        return &dtab->map;
}

static void dev_map_free(struct bpf_map *map)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        int i;

        /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
         * so the programs (can be more than one that used this map) were
         * disconnected from events. The following synchronize_rcu() guarantees
         * both rcu read critical sections complete and waits for
         * preempt-disable regions (NAPI being the relevant context here) so we
         * are certain there will be no further reads against the netdev_map and
         * all flush operations are complete. Flush operations can only be done
         * from NAPI context for this reason.
         */

        spin_lock(&dev_map_lock);
        list_del_rcu(&dtab->list);
        spin_unlock(&dev_map_lock);

        bpf_clear_redirect_map(map);
        synchronize_rcu();

        /* Make sure prior __dev_map_entry_free() have completed. */
        rcu_barrier();

        if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
                for (i = 0; i < dtab->n_buckets; i++) {
                        struct bpf_dtab_netdev *dev;
                        struct hlist_head *head;
                        struct hlist_node *next;

                        head = dev_map_index_hash(dtab, i);

                        hlist_for_each_entry_safe(dev, next, head, index_hlist) {
                                hlist_del_rcu(&dev->index_hlist);
                                if (dev->xdp_prog)
                                        bpf_prog_put(dev->xdp_prog);
                                dev_put(dev->dev);
                                kfree(dev);
                        }
                }

                bpf_map_area_free(dtab->dev_index_head);
        } else {
                for (i = 0; i < dtab->map.max_entries; i++) {
                        struct bpf_dtab_netdev *dev;

                        dev = rcu_dereference_raw(dtab->netdev_map[i]);
                        if (!dev)
                                continue;

                        if (dev->xdp_prog)
                                bpf_prog_put(dev->xdp_prog);
                        dev_put(dev->dev);
                        kfree(dev);
                }

                bpf_map_area_free(dtab->netdev_map);
        }

        bpf_map_area_free(dtab);
}

static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        u32 index = key ? *(u32 *)key : U32_MAX;
        u32 *next = next_key;

        if (index >= dtab->map.max_entries) {
                *next = 0;
                return 0;
        }

        if (index == dtab->map.max_entries - 1)
                return -ENOENT;
        *next = index + 1;
        return 0;
}

/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
 * by local_bh_disable() (from XDP calls inside NAPI). The
 * rcu_read_lock_bh_held() below makes lockdep accept both.
 */
static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct hlist_head *head = dev_map_index_hash(dtab, key);
        struct bpf_dtab_netdev *dev;

        hlist_for_each_entry_rcu(dev, head, index_hlist,
                                 lockdep_is_held(&dtab->index_lock))
                if (dev->idx == key)
                        return dev;

        return NULL;
}

static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
                                    void *next_key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        u32 idx, *next = next_key;
        struct bpf_dtab_netdev *dev, *next_dev;
        struct hlist_head *head;
        int i = 0;

        if (!key)
                goto find_first;

        idx = *(u32 *)key;

        dev = __dev_map_hash_lookup_elem(map, idx);
        if (!dev)
                goto find_first;

        next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
                                    struct bpf_dtab_netdev, index_hlist);

        if (next_dev) {
                *next = next_dev->idx;
                return 0;
        }

        i = idx & (dtab->n_buckets - 1);
        i++;

 find_first:
        for (; i < dtab->n_buckets; i++) {
                head = dev_map_index_hash(dtab, i);

                next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
                                            struct bpf_dtab_netdev,
                                            index_hlist);
                if (next_dev) {
                        *next = next_dev->idx;
                        return 0;
                }
        }

        return -ENOENT;
}

static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
                                struct xdp_frame **frames, int n,
                                struct net_device *dev)
{
        struct xdp_txq_info txq = { .dev = dev };
        struct xdp_buff xdp;
        int i, nframes = 0;

        for (i = 0; i < n; i++) {
                struct xdp_frame *xdpf = frames[i];
                u32 act;
                int err;

                xdp_convert_frame_to_buff(xdpf, &xdp);
                xdp.txq = &txq;

                act = bpf_prog_run_xdp(xdp_prog, &xdp);
                switch (act) {
                case XDP_PASS:
                        err = xdp_update_frame_from_buff(&xdp, xdpf);
                        if (unlikely(err < 0))
                                xdp_return_frame_rx_napi(xdpf);
                        else
                                frames[nframes++] = xdpf;
                        break;
                default:
                        bpf_warn_invalid_xdp_action(NULL, xdp_prog, act);
                        fallthrough;
                case XDP_ABORTED:
                        trace_xdp_exception(dev, xdp_prog, act);
                        fallthrough;
                case XDP_DROP:
                        xdp_return_frame_rx_napi(xdpf);
                        break;
                }
        }
        return nframes; /* sent frames count */
}

static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
        struct net_device *dev = bq->dev;
        unsigned int cnt = bq->count;
        int sent = 0, err = 0;
        int to_send = cnt;
        int i;

        if (unlikely(!cnt))
                return;

        for (i = 0; i < cnt; i++) {
                struct xdp_frame *xdpf = bq->q[i];

                prefetch(xdpf);
        }

        if (bq->xdp_prog) {
                to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
                if (!to_send)
                        goto out;
        }

        sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
        if (sent < 0) {
                /* If ndo_xdp_xmit fails with an errno, no frames have
                 * been xmit'ed.
                 */
                err = sent;
                sent = 0;
        }

        /* If not all frames have been transmitted, it is our
         * responsibility to free them
         */
        for (i = sent; unlikely(i < to_send); i++)
                xdp_return_frame_rx_napi(bq->q[i]);

out:
        bq->count = 0;
        trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
}

/* __dev_flush is called from xdp_do_flush() which _must_ be signalled from the
 * driver before returning from its napi->poll() routine. See the comment above
 * xdp_do_flush() in filter.c.
 */
void __dev_flush(void)
{
        struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
        struct xdp_dev_bulk_queue *bq, *tmp;

        list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
                bq_xmit_all(bq, XDP_XMIT_FLUSH);
                bq->dev_rx = NULL;
                bq->xdp_prog = NULL;
                __list_del_clearprev(&bq->flush_node);
        }
}

#ifdef CONFIG_DEBUG_NET
bool dev_check_flush(void)
{
        if (list_empty(this_cpu_ptr(&dev_flush_list)))
                return false;
        __dev_flush();
        return true;
}
#endif

/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
 * by local_bh_disable() (from XDP calls inside NAPI). The
 * rcu_read_lock_bh_held() below makes lockdep accept both.
 */
static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *obj;

        if (key >= map->max_entries)
                return NULL;

        obj = rcu_dereference_check(dtab->netdev_map[key],
                                    rcu_read_lock_bh_held());
        return obj;
}

/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
 * variable access, and map elements stick around. See comment above
 * xdp_do_flush() in filter.c.
 */
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
                       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
        struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
        struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);

        if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
                bq_xmit_all(bq, 0);

        /* Ingress dev_rx will be the same for all xdp_frame's in
         * bulk_queue, because bq stored per-CPU and must be flushed
         * from net_device drivers NAPI func end.
         *
         * Do the same with xdp_prog and flush_list since these fields
         * are only ever modified together.
         */
        if (!bq->dev_rx) {
                bq->dev_rx = dev_rx;
                bq->xdp_prog = xdp_prog;
                list_add(&bq->flush_node, flush_list);
        }

        bq->q[bq->count++] = xdpf;
}

static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
                                struct net_device *dev_rx,
                                struct bpf_prog *xdp_prog)
{
        int err;

        if (!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT))
                return -EOPNOTSUPP;

        if (unlikely(!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) &&
                     xdp_frame_has_frags(xdpf)))
                return -EOPNOTSUPP;

        err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf));
        if (unlikely(err))
                return err;

        bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
        return 0;
}

static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
{
        struct xdp_txq_info txq = { .dev = dst->dev };
        struct xdp_buff xdp;
        u32 act;

        if (!dst->xdp_prog)
                return XDP_PASS;

        __skb_pull(skb, skb->mac_len);
        xdp.txq = &txq;

        act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
        switch (act) {
        case XDP_PASS:
                __skb_push(skb, skb->mac_len);
                break;
        default:
                bpf_warn_invalid_xdp_action(NULL, dst->xdp_prog, act);
                fallthrough;
        case XDP_ABORTED:
                trace_xdp_exception(dst->dev, dst->xdp_prog, act);
                fallthrough;
        case XDP_DROP:
                kfree_skb(skb);
                break;
        }

        return act;
}

int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
                    struct net_device *dev_rx)
{
        return __xdp_enqueue(dev, xdpf, dev_rx, NULL);
}

int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
                    struct net_device *dev_rx)
{
        struct net_device *dev = dst->dev;

        return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog);
}

static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
{
        if (!obj)
                return false;

        if (!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT))
                return false;

        if (unlikely(!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) &&
                     xdp_frame_has_frags(xdpf)))
                return false;

        if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf)))
                return false;

        return true;
}

static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
                                 struct net_device *dev_rx,
                                 struct xdp_frame *xdpf)
{
        struct xdp_frame *nxdpf;

        nxdpf = xdpf_clone(xdpf);
        if (!nxdpf)
                return -ENOMEM;

        bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);

        return 0;
}

static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
{
        while (num_excluded--) {
                if (ifindex == excluded[num_excluded])
                        return true;
        }
        return false;
}

/* Get ifindex of each upper device. 'indexes' must be able to hold at
 * least MAX_NEST_DEV elements.
 * Returns the number of ifindexes added.
 */
static int get_upper_ifindexes(struct net_device *dev, int *indexes)
{
        struct net_device *upper;
        struct list_head *iter;
        int n = 0;

        netdev_for_each_upper_dev_rcu(dev, upper, iter) {
                indexes[n++] = upper->ifindex;
        }
        return n;
}

int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
                          struct bpf_map *map, bool exclude_ingress)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
        int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        int num_excluded = 0;
        unsigned int i;
        int err;

        if (exclude_ingress) {
                num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
                excluded_devices[num_excluded++] = dev_rx->ifindex;
        }

        if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
                        if (!is_valid_dst(dst, xdpf))
                                continue;

                        if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;

                        /* we only need n-1 clones; last_dst enqueued below */
                        if (!last_dst) {
                                last_dst = dst;
                                continue;
                        }

                        err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
                        if (err)
                                return err;

                        last_dst = dst;
                }
        } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
                for (i = 0; i < dtab->n_buckets; i++) {
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_rcu(dst, head, index_hlist,
                                                 lockdep_is_held(&dtab->index_lock)) {
                                if (!is_valid_dst(dst, xdpf))
                                        continue;

                                if (is_ifindex_excluded(excluded_devices, num_excluded,
                                                        dst->dev->ifindex))
                                        continue;

                                /* we only need n-1 clones; last_dst enqueued below */
                                if (!last_dst) {
                                        last_dst = dst;
                                        continue;
                                }

                                err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
                                if (err)
                                        return err;

                                last_dst = dst;
                        }
                }
        }

        /* consume the last copy of the frame */
        if (last_dst)
                bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
        else
                xdp_return_frame_rx_napi(xdpf); /* dtab is empty */

        return 0;
}

int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
                             struct bpf_prog *xdp_prog)
{
        int err;

        err = xdp_ok_fwd_dev(dst->dev, skb->len);
        if (unlikely(err))
                return err;

        /* Redirect has already succeeded semantically at this point, so we just
         * return 0 even if packet is dropped. Helper below takes care of
         * freeing skb.
         */
        if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
                return 0;

        skb->dev = dst->dev;
        generic_xdp_tx(skb, xdp_prog);

        return 0;
}

static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
                                  struct sk_buff *skb,
                                  struct bpf_prog *xdp_prog)
{
        struct sk_buff *nskb;
        int err;

        nskb = skb_clone(skb, GFP_ATOMIC);
        if (!nskb)
                return -ENOMEM;

        err = dev_map_generic_redirect(dst, nskb, xdp_prog);
        if (unlikely(err)) {
                consume_skb(nskb);
                return err;
        }

        return 0;
}

int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                           struct bpf_prog *xdp_prog, struct bpf_map *map,
                           bool exclude_ingress)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
        int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        struct hlist_node *next;
        int num_excluded = 0;
        unsigned int i;
        int err;

        if (exclude_ingress) {
                num_excluded = get_upper_ifindexes(dev, excluded_devices);
                excluded_devices[num_excluded++] = dev->ifindex;
        }

        if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
                        if (!dst)
                                continue;

                        if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;

                        /* we only need n-1 clones; last_dst enqueued below */
                        if (!last_dst) {
                                last_dst = dst;
                                continue;
                        }

                        err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
                        if (err)
                                return err;

                        last_dst = dst;

                }
        } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
                for (i = 0; i < dtab->n_buckets; i++) {
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_safe(dst, next, head, index_hlist) {
                                if (!dst)
                                        continue;

                                if (is_ifindex_excluded(excluded_devices, num_excluded,
                                                        dst->dev->ifindex))
                                        continue;

                                /* we only need n-1 clones; last_dst enqueued below */
                                if (!last_dst) {
                                        last_dst = dst;
                                        continue;
                                }

                                err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
                                if (err)
                                        return err;

                                last_dst = dst;
                        }
                }
        }

        /* consume the first skb and return */
        if (last_dst)
                return dev_map_generic_redirect(last_dst, skb, xdp_prog);

        /* dtab is empty */
        consume_skb(skb);
        return 0;
}

static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
        struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);

        return obj ? &obj->val : NULL;
}

static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
        struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
                                                                *(u32 *)key);
        return obj ? &obj->val : NULL;
}

static void __dev_map_entry_free(struct rcu_head *rcu)
{
        struct bpf_dtab_netdev *dev;

        dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
        if (dev->xdp_prog)
                bpf_prog_put(dev->xdp_prog);
        dev_put(dev->dev);
        kfree(dev);
}

static long dev_map_delete_elem(struct bpf_map *map, void *key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *old_dev;
        int k = *(u32 *)key;

        if (k >= map->max_entries)
                return -EINVAL;

        old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL));
        if (old_dev) {
                call_rcu(&old_dev->rcu, __dev_map_entry_free);
                atomic_dec((atomic_t *)&dtab->items);
        }
        return 0;
}

static long dev_map_hash_delete_elem(struct bpf_map *map, void *key)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *old_dev;
        int k = *(u32 *)key;
        unsigned long flags;
        int ret = -ENOENT;

        spin_lock_irqsave(&dtab->index_lock, flags);

        old_dev = __dev_map_hash_lookup_elem(map, k);
        if (old_dev) {
                dtab->items--;
                hlist_del_init_rcu(&old_dev->index_hlist);
                call_rcu(&old_dev->rcu, __dev_map_entry_free);
                ret = 0;
        }
        spin_unlock_irqrestore(&dtab->index_lock, flags);

        return ret;
}

static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
                                                    struct bpf_dtab *dtab,
                                                    struct bpf_devmap_val *val,
                                                    unsigned int idx)
{
        struct bpf_prog *prog = NULL;
        struct bpf_dtab_netdev *dev;

        dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
                                   GFP_NOWAIT | __GFP_NOWARN,
                                   dtab->map.numa_node);
        if (!dev)
                return ERR_PTR(-ENOMEM);

        dev->dev = dev_get_by_index(net, val->ifindex);
        if (!dev->dev)
                goto err_out;

        if (val->bpf_prog.fd > 0) {
                prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
                                             BPF_PROG_TYPE_XDP, false);
                if (IS_ERR(prog))
                        goto err_put_dev;
                if (prog->expected_attach_type != BPF_XDP_DEVMAP ||
                    !bpf_prog_map_compatible(&dtab->map, prog))
                        goto err_put_prog;
        }

        dev->idx = idx;
        if (prog) {
                dev->xdp_prog = prog;
                dev->val.bpf_prog.id = prog->aux->id;
        } else {
                dev->xdp_prog = NULL;
                dev->val.bpf_prog.id = 0;
        }
        dev->val.ifindex = val->ifindex;

        return dev;
err_put_prog:
        bpf_prog_put(prog);
err_put_dev:
        dev_put(dev->dev);
err_out:
        kfree(dev);
        return ERR_PTR(-EINVAL);
}

static long __dev_map_update_elem(struct net *net, struct bpf_map *map,
                                  void *key, void *value, u64 map_flags)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *dev, *old_dev;
        struct bpf_devmap_val val = {};
        u32 i = *(u32 *)key;

        if (unlikely(map_flags > BPF_EXIST))
                return -EINVAL;
        if (unlikely(i >= dtab->map.max_entries))
                return -E2BIG;
        if (unlikely(map_flags == BPF_NOEXIST))
                return -EEXIST;

        /* already verified value_size <= sizeof val */
        memcpy(&val, value, map->value_size);

        if (!val.ifindex) {
                dev = NULL;
                /* can not specify fd if ifindex is 0 */
                if (val.bpf_prog.fd > 0)
                        return -EINVAL;
        } else {
                dev = __dev_map_alloc_node(net, dtab, &val, i);
                if (IS_ERR(dev))
                        return PTR_ERR(dev);
        }

        /* Use call_rcu() here to ensure rcu critical sections have completed
         * Remembering the driver side flush operation will happen before the
         * net device is removed.
         */
        old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev)));
        if (old_dev)
                call_rcu(&old_dev->rcu, __dev_map_entry_free);
        else
                atomic_inc((atomic_t *)&dtab->items);

        return 0;
}

static long dev_map_update_elem(struct bpf_map *map, void *key, void *value,
                                u64 map_flags)
{
        return __dev_map_update_elem(current->nsproxy->net_ns,
                                     map, key, value, map_flags);
}

static long __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
                                       void *key, void *value, u64 map_flags)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        struct bpf_dtab_netdev *dev, *old_dev;
        struct bpf_devmap_val val = {};
        u32 idx = *(u32 *)key;
        unsigned long flags;
        int err = -EEXIST;

        /* already verified value_size <= sizeof val */
        memcpy(&val, value, map->value_size);

        if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
                return -EINVAL;

        spin_lock_irqsave(&dtab->index_lock, flags);

        old_dev = __dev_map_hash_lookup_elem(map, idx);
        if (old_dev && (map_flags & BPF_NOEXIST))
                goto out_err;

        dev = __dev_map_alloc_node(net, dtab, &val, idx);
        if (IS_ERR(dev)) {
                err = PTR_ERR(dev);
                goto out_err;
        }

        if (old_dev) {
                hlist_del_rcu(&old_dev->index_hlist);
        } else {
                if (dtab->items >= dtab->map.max_entries) {
                        spin_unlock_irqrestore(&dtab->index_lock, flags);
                        call_rcu(&dev->rcu, __dev_map_entry_free);
                        return -E2BIG;
                }
                dtab->items++;
        }

        hlist_add_head_rcu(&dev->index_hlist,
                           dev_map_index_hash(dtab, idx));
        spin_unlock_irqrestore(&dtab->index_lock, flags);

        if (old_dev)
                call_rcu(&old_dev->rcu, __dev_map_entry_free);

        return 0;

out_err:
        spin_unlock_irqrestore(&dtab->index_lock, flags);
        return err;
}

static long dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
                                     u64 map_flags)
{
        return __dev_map_hash_update_elem(current->nsproxy->net_ns,
                                         map, key, value, map_flags);
}

static long dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
        return __bpf_xdp_redirect_map(map, ifindex, flags,
                                      BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
                                      __dev_map_lookup_elem);
}

static long dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
{
        return __bpf_xdp_redirect_map(map, ifindex, flags,
                                      BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
                                      __dev_map_hash_lookup_elem);
}

static u64 dev_map_mem_usage(const struct bpf_map *map)
{
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
        u64 usage = sizeof(struct bpf_dtab);

        if (map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)
                usage += (u64)dtab->n_buckets * sizeof(struct hlist_head);
        else
                usage += (u64)map->max_entries * sizeof(struct bpf_dtab_netdev *);
        usage += atomic_read((atomic_t *)&dtab->items) *
                         (u64)sizeof(struct bpf_dtab_netdev);
        return usage;
}

BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab)
const struct bpf_map_ops dev_map_ops = {
        .map_meta_equal = bpf_map_meta_equal,
        .map_alloc = dev_map_alloc,
        .map_free = dev_map_free,
        .map_get_next_key = dev_map_get_next_key,
        .map_lookup_elem = dev_map_lookup_elem,
        .map_update_elem = dev_map_update_elem,
        .map_delete_elem = dev_map_delete_elem,
        .map_check_btf = map_check_no_btf,
        .map_mem_usage = dev_map_mem_usage,
        .map_btf_id = &dev_map_btf_ids[0],
        .map_redirect = dev_map_redirect,
};

const struct bpf_map_ops dev_map_hash_ops = {
        .map_meta_equal = bpf_map_meta_equal,
        .map_alloc = dev_map_alloc,
        .map_free = dev_map_free,
        .map_get_next_key = dev_map_hash_get_next_key,
        .map_lookup_elem = dev_map_hash_lookup_elem,
        .map_update_elem = dev_map_hash_update_elem,
        .map_delete_elem = dev_map_hash_delete_elem,
        .map_check_btf = map_check_no_btf,
        .map_mem_usage = dev_map_mem_usage,
        .map_btf_id = &dev_map_btf_ids[0],
        .map_redirect = dev_hash_map_redirect,
};

static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
                                       struct net_device *netdev)
{
        unsigned long flags;
        u32 i;

        spin_lock_irqsave(&dtab->index_lock, flags);
        for (i = 0; i < dtab->n_buckets; i++) {
                struct bpf_dtab_netdev *dev;
                struct hlist_head *head;
                struct hlist_node *next;

                head = dev_map_index_hash(dtab, i);

                hlist_for_each_entry_safe(dev, next, head, index_hlist) {
                        if (netdev != dev->dev)
                                continue;

                        dtab->items--;
                        hlist_del_rcu(&dev->index_hlist);
                        call_rcu(&dev->rcu, __dev_map_entry_free);
                }
        }
        spin_unlock_irqrestore(&dtab->index_lock, flags);
}

static int dev_map_notification(struct notifier_block *notifier,
                                ulong event, void *ptr)
{
        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
        struct bpf_dtab *dtab;
        int i, cpu;

        switch (event) {
        case NETDEV_REGISTER:
                if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
                        break;

                /* will be freed in free_netdev() */
                netdev->xdp_bulkq = alloc_percpu(struct xdp_dev_bulk_queue);
                if (!netdev->xdp_bulkq)
                        return NOTIFY_BAD;

                for_each_possible_cpu(cpu)
                        per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
                break;
        case NETDEV_UNREGISTER:
                /* This rcu_read_lock/unlock pair is needed because
                 * dev_map_list is an RCU list AND to ensure a delete
                 * operation does not free a netdev_map entry while we
                 * are comparing it against the netdev being unregistered.
                 */
                rcu_read_lock();
                list_for_each_entry_rcu(dtab, &dev_map_list, list) {
                        if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
                                dev_map_hash_remove_netdev(dtab, netdev);
                                continue;
                        }

                        for (i = 0; i < dtab->map.max_entries; i++) {
                                struct bpf_dtab_netdev *dev, *odev;

                                dev = rcu_dereference(dtab->netdev_map[i]);
                                if (!dev || netdev != dev->dev)
                                        continue;
                                odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL));
                                if (dev == odev) {
                                        call_rcu(&dev->rcu,
                                                 __dev_map_entry_free);
                                        atomic_dec((atomic_t *)&dtab->items);
                                }
                        }
                }
                rcu_read_unlock();
                break;
        default:
                break;
        }
        return NOTIFY_OK;
}

static struct notifier_block dev_map_notifier = {
        .notifier_call = dev_map_notification,
};

static int __init dev_map_init(void)
{
        int cpu;

        /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
        BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
                     offsetof(struct _bpf_dtab_netdev, dev));
        register_netdevice_notifier(&dev_map_notifier);

        for_each_possible_cpu(cpu)
                INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
        return 0;
}

subsys_initcall(dev_map_init);






















































   40 



   40 

   40 



   40 


   40 

   40 














   40 















  169 

  169 


















































   11 























   11 






   11 




































































  230 










   11 
   11 

    1 





   11 









   11 





   11 






   11 






   11 
    1 



   11 







  231 

































































































































































   10 


   10 



   10 


   10 

   10 

   10 






















































































































































































   10 







































































   11 











































































   10 

   11 
   10 




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
// SPDX-License-Identifier: GPL-2.0
/*
 * Devices PM QoS constraints management
 *
 * Copyright (C) 2011 Texas Instruments, Inc.
 *
 * This module exposes the interface to kernel space for specifying
 * per-device PM QoS dependencies. It provides infrastructure for registration
 * of:
 *
 * Dependents on a QoS value : register requests
 * Watchers of QoS value : get notified when target QoS value changes
 *
 * This QoS design is best effort based. Dependents register their QoS needs.
 * Watchers register to keep track of the current QoS needs of the system.
 * Watchers can register a per-device notification callback using the
 * dev_pm_qos_*_notifier API. The notification chain data is stored in the
 * per-device constraint data struct.
 *
 * Note about the per-device constraint data struct allocation:
 * . The per-device constraints data struct ptr is stored into the device
 *    dev_pm_info.
 * . To minimize the data usage by the per-device constraints, the data struct
 *   is only allocated at the first call to dev_pm_qos_add_request.
 * . The data is later free'd when the device is removed from the system.
 *  . A global mutex protects the constraints users from the data being
 *     allocated and free'd.
 */

#include <linux/pm_qos.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/export.h>
#include <linux/pm_runtime.h>
#include <linux/err.h>
#include <trace/events/power.h>

#include "power.h"

static DEFINE_MUTEX(dev_pm_qos_mtx);
static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx);

/**
 * __dev_pm_qos_flags - Check PM QoS flags for a given device.
 * @dev: Device to check the PM QoS flags for.
 * @mask: Flags to check against.
 *
 * This routine must be called with dev->power.lock held.
 */
enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask)
{
        struct dev_pm_qos *qos = dev->power.qos;
        struct pm_qos_flags *pqf;
        s32 val;

        lockdep_assert_held(&dev->power.lock);

        if (IS_ERR_OR_NULL(qos))
                return PM_QOS_FLAGS_UNDEFINED;

        pqf = &qos->flags;
        if (list_empty(&pqf->list))
                return PM_QOS_FLAGS_UNDEFINED;

        val = pqf->effective_flags & mask;
        if (val)
                return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME;

        return PM_QOS_FLAGS_NONE;
}

/**
 * dev_pm_qos_flags - Check PM QoS flags for a given device (locked).
 * @dev: Device to check the PM QoS flags for.
 * @mask: Flags to check against.
 */
enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask)
{
        unsigned long irqflags;
        enum pm_qos_flags_status ret;

        spin_lock_irqsave(&dev->power.lock, irqflags);
        ret = __dev_pm_qos_flags(dev, mask);
        spin_unlock_irqrestore(&dev->power.lock, irqflags);

        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_flags);

/**
 * __dev_pm_qos_resume_latency - Get resume latency constraint for a given device.
 * @dev: Device to get the PM QoS constraint value for.
 *
 * This routine must be called with dev->power.lock held.
 */
s32 __dev_pm_qos_resume_latency(struct device *dev)
{
        lockdep_assert_held(&dev->power.lock);

        return dev_pm_qos_raw_resume_latency(dev);
}

/**
 * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked).
 * @dev: Device to get the PM QoS constraint value for.
 * @type: QoS request type.
 */
s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type)
{
        struct dev_pm_qos *qos = dev->power.qos;
        unsigned long flags;
        s32 ret;

        spin_lock_irqsave(&dev->power.lock, flags);

        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
                        : pm_qos_read_value(&qos->resume_latency);
                break;
        case DEV_PM_QOS_MIN_FREQUENCY:
                ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE
                        : freq_qos_read_value(&qos->freq, FREQ_QOS_MIN);
                break;
        case DEV_PM_QOS_MAX_FREQUENCY:
                ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE
                        : freq_qos_read_value(&qos->freq, FREQ_QOS_MAX);
                break;
        default:
                WARN_ON(1);
                ret = 0;
        }

        spin_unlock_irqrestore(&dev->power.lock, flags);

        return ret;
}

/**
 * apply_constraint - Add/modify/remove device PM QoS request.
 * @req: Constraint request to apply
 * @action: Action to perform (add/update/remove).
 * @value: Value to assign to the QoS request.
 *
 * Internal function to update the constraints list using the PM QoS core
 * code and if needed call the per-device callbacks.
 */
static int apply_constraint(struct dev_pm_qos_request *req,
                            enum pm_qos_req_action action, s32 value)
{
        struct dev_pm_qos *qos = req->dev->power.qos;
        int ret;

        switch(req->type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                if (WARN_ON(action != PM_QOS_REMOVE_REQ && value < 0))
                        value = 0;

                ret = pm_qos_update_target(&qos->resume_latency,
                                           &req->data.pnode, action, value);
                break;
        case DEV_PM_QOS_LATENCY_TOLERANCE:
                ret = pm_qos_update_target(&qos->latency_tolerance,
                                           &req->data.pnode, action, value);
                if (ret) {
                        value = pm_qos_read_value(&qos->latency_tolerance);
                        req->dev->power.set_latency_tolerance(req->dev, value);
                }
                break;
        case DEV_PM_QOS_MIN_FREQUENCY:
        case DEV_PM_QOS_MAX_FREQUENCY:
                ret = freq_qos_apply(&req->data.freq, action, value);
                break;
        case DEV_PM_QOS_FLAGS:
                ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
                                          action, value);
                break;
        default:
                ret = -EINVAL;
        }

        return ret;
}

/*
 * dev_pm_qos_constraints_allocate
 * @dev: device to allocate data for
 *
 * Called at the first call to add_request, for constraint data allocation
 * Must be called with the dev_pm_qos_mtx mutex held
 */
static int dev_pm_qos_constraints_allocate(struct device *dev)
{
        struct dev_pm_qos *qos;
        struct pm_qos_constraints *c;
        struct blocking_notifier_head *n;

        qos = kzalloc(sizeof(*qos), GFP_KERNEL);
        if (!qos)
                return -ENOMEM;

        n = kcalloc(3, sizeof(*n), GFP_KERNEL);
        if (!n) {
                kfree(qos);
                return -ENOMEM;
        }

        c = &qos->resume_latency;
        plist_head_init(&c->list);
        c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
        c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE;
        c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
        c->type = PM_QOS_MIN;
        c->notifiers = n;
        BLOCKING_INIT_NOTIFIER_HEAD(n);

        c = &qos->latency_tolerance;
        plist_head_init(&c->list);
        c->target_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
        c->default_value = PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE;
        c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
        c->type = PM_QOS_MIN;

        freq_constraints_init(&qos->freq);

        INIT_LIST_HEAD(&qos->flags.list);

        spin_lock_irq(&dev->power.lock);
        dev->power.qos = qos;
        spin_unlock_irq(&dev->power.lock);

        return 0;
}

static void __dev_pm_qos_hide_latency_limit(struct device *dev);
static void __dev_pm_qos_hide_flags(struct device *dev);

/**
 * dev_pm_qos_constraints_destroy
 * @dev: target device
 *
 * Called from the device PM subsystem on device removal under device_pm_lock().
 */
void dev_pm_qos_constraints_destroy(struct device *dev)
{
        struct dev_pm_qos *qos;
        struct dev_pm_qos_request *req, *tmp;
        struct pm_qos_constraints *c;
        struct pm_qos_flags *f;

        mutex_lock(&dev_pm_qos_sysfs_mtx);

        /*
         * If the device's PM QoS resume latency limit or PM QoS flags have been
         * exposed to user space, they have to be hidden at this point.
         */
        pm_qos_sysfs_remove_resume_latency(dev);
        pm_qos_sysfs_remove_flags(dev);

        mutex_lock(&dev_pm_qos_mtx);

        __dev_pm_qos_hide_latency_limit(dev);
        __dev_pm_qos_hide_flags(dev);

        qos = dev->power.qos;
        if (!qos)
                goto out;

        /* Flush the constraints lists for the device. */
        c = &qos->resume_latency;
        plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
                /*
                 * Update constraints list and call the notification
                 * callbacks if needed
                 */
                apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
                memset(req, 0, sizeof(*req));
        }

        c = &qos->latency_tolerance;
        plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
                apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
                memset(req, 0, sizeof(*req));
        }

        c = &qos->freq.min_freq;
        plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) {
                apply_constraint(req, PM_QOS_REMOVE_REQ,
                                 PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE);
                memset(req, 0, sizeof(*req));
        }

        c = &qos->freq.max_freq;
        plist_for_each_entry_safe(req, tmp, &c->list, data.freq.pnode) {
                apply_constraint(req, PM_QOS_REMOVE_REQ,
                                 PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
                memset(req, 0, sizeof(*req));
        }

        f = &qos->flags;
        list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
                apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
                memset(req, 0, sizeof(*req));
        }

        spin_lock_irq(&dev->power.lock);
        dev->power.qos = ERR_PTR(-ENODEV);
        spin_unlock_irq(&dev->power.lock);

        kfree(qos->resume_latency.notifiers);
        kfree(qos);

 out:
        mutex_unlock(&dev_pm_qos_mtx);

        mutex_unlock(&dev_pm_qos_sysfs_mtx);
}

static bool dev_pm_qos_invalid_req_type(struct device *dev,
                                        enum dev_pm_qos_req_type type)
{
        return type == DEV_PM_QOS_LATENCY_TOLERANCE &&
               !dev->power.set_latency_tolerance;
}

static int __dev_pm_qos_add_request(struct device *dev,
                                    struct dev_pm_qos_request *req,
                                    enum dev_pm_qos_req_type type, s32 value)
{
        int ret = 0;

        if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type))
                return -EINVAL;

        if (WARN(dev_pm_qos_request_active(req),
                 "%s() called for already added request\n", __func__))
                return -EINVAL;

        if (IS_ERR(dev->power.qos))
                ret = -ENODEV;
        else if (!dev->power.qos)
                ret = dev_pm_qos_constraints_allocate(dev);

        trace_dev_pm_qos_add_request(dev_name(dev), type, value);
        if (ret)
                return ret;

        req->dev = dev;
        req->type = type;
        if (req->type == DEV_PM_QOS_MIN_FREQUENCY)
                ret = freq_qos_add_request(&dev->power.qos->freq,
                                           &req->data.freq,
                                           FREQ_QOS_MIN, value);
        else if (req->type == DEV_PM_QOS_MAX_FREQUENCY)
                ret = freq_qos_add_request(&dev->power.qos->freq,
                                           &req->data.freq,
                                           FREQ_QOS_MAX, value);
        else
                ret = apply_constraint(req, PM_QOS_ADD_REQ, value);

        return ret;
}

/**
 * dev_pm_qos_add_request - inserts new qos request into the list
 * @dev: target device for the constraint
 * @req: pointer to a preallocated handle
 * @type: type of the request
 * @value: defines the qos request
 *
 * This function inserts a new entry in the device constraints list of
 * requested qos performance characteristics. It recomputes the aggregate
 * QoS expectations of parameters and initializes the dev_pm_qos_request
 * handle.  Caller needs to save this handle for later use in updates and
 * removal.
 *
 * Returns 1 if the aggregated constraint value has changed,
 * 0 if the aggregated constraint value has not changed,
 * -EINVAL in case of wrong parameters, -ENOMEM if there's not enough memory
 * to allocate for data structures, -ENODEV if the device has just been removed
 * from the system.
 *
 * Callers should ensure that the target device is not RPM_SUSPENDED before
 * using this function for requests of type DEV_PM_QOS_FLAGS.
 */
int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
                           enum dev_pm_qos_req_type type, s32 value)
{
        int ret;

        mutex_lock(&dev_pm_qos_mtx);
        ret = __dev_pm_qos_add_request(dev, req, type, value);
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_add_request);

/**
 * __dev_pm_qos_update_request - Modify an existing device PM QoS request.
 * @req : PM QoS request to modify.
 * @new_value: New value to request.
 */
static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
                                       s32 new_value)
{
        s32 curr_value;
        int ret = 0;

        if (!req) /*guard against callers passing in null */
                return -EINVAL;

        if (WARN(!dev_pm_qos_request_active(req),
                 "%s() called for unknown object\n", __func__))
                return -EINVAL;

        if (IS_ERR_OR_NULL(req->dev->power.qos))
                return -ENODEV;

        switch(req->type) {
        case DEV_PM_QOS_RESUME_LATENCY:
        case DEV_PM_QOS_LATENCY_TOLERANCE:
                curr_value = req->data.pnode.prio;
                break;
        case DEV_PM_QOS_MIN_FREQUENCY:
        case DEV_PM_QOS_MAX_FREQUENCY:
                curr_value = req->data.freq.pnode.prio;
                break;
        case DEV_PM_QOS_FLAGS:
                curr_value = req->data.flr.flags;
                break;
        default:
                return -EINVAL;
        }

        trace_dev_pm_qos_update_request(dev_name(req->dev), req->type,
                                        new_value);
        if (curr_value != new_value)
                ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value);

        return ret;
}

/**
 * dev_pm_qos_update_request - modifies an existing qos request
 * @req : handle to list element holding a dev_pm_qos request to use
 * @new_value: defines the qos request
 *
 * Updates an existing dev PM qos request along with updating the
 * target value.
 *
 * Attempts are made to make this code callable on hot code paths.
 *
 * Returns 1 if the aggregated constraint value has changed,
 * 0 if the aggregated constraint value has not changed,
 * -EINVAL in case of wrong parameters, -ENODEV if the device has been
 * removed from the system
 *
 * Callers should ensure that the target device is not RPM_SUSPENDED before
 * using this function for requests of type DEV_PM_QOS_FLAGS.
 */
int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value)
{
        int ret;

        mutex_lock(&dev_pm_qos_mtx);
        ret = __dev_pm_qos_update_request(req, new_value);
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_update_request);

static int __dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
{
        int ret;

        if (!req) /*guard against callers passing in null */
                return -EINVAL;

        if (WARN(!dev_pm_qos_request_active(req),
                 "%s() called for unknown object\n", __func__))
                return -EINVAL;

        if (IS_ERR_OR_NULL(req->dev->power.qos))
                return -ENODEV;

        trace_dev_pm_qos_remove_request(dev_name(req->dev), req->type,
                                        PM_QOS_DEFAULT_VALUE);
        ret = apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
        memset(req, 0, sizeof(*req));
        return ret;
}

/**
 * dev_pm_qos_remove_request - modifies an existing qos request
 * @req: handle to request list element
 *
 * Will remove pm qos request from the list of constraints and
 * recompute the current target value. Call this on slow code paths.
 *
 * Returns 1 if the aggregated constraint value has changed,
 * 0 if the aggregated constraint value has not changed,
 * -EINVAL in case of wrong parameters, -ENODEV if the device has been
 * removed from the system
 *
 * Callers should ensure that the target device is not RPM_SUSPENDED before
 * using this function for requests of type DEV_PM_QOS_FLAGS.
 */
int dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
{
        int ret;

        mutex_lock(&dev_pm_qos_mtx);
        ret = __dev_pm_qos_remove_request(req);
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request);

/**
 * dev_pm_qos_add_notifier - sets notification entry for changes to target value
 * of per-device PM QoS constraints
 *
 * @dev: target device for the constraint
 * @notifier: notifier block managed by caller.
 * @type: request type.
 *
 * Will register the notifier into a notification chain that gets called
 * upon changes to the target value for the device.
 *
 * If the device's constraints object doesn't exist when this routine is called,
 * it will be created (or error code will be returned if that fails).
 */
int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier,
                            enum dev_pm_qos_req_type type)
{
        int ret = 0;

        mutex_lock(&dev_pm_qos_mtx);

        if (IS_ERR(dev->power.qos))
                ret = -ENODEV;
        else if (!dev->power.qos)
                ret = dev_pm_qos_constraints_allocate(dev);

        if (ret)
                goto unlock;

        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
                                                       notifier);
                break;
        case DEV_PM_QOS_MIN_FREQUENCY:
                ret = freq_qos_add_notifier(&dev->power.qos->freq,
                                            FREQ_QOS_MIN, notifier);
                break;
        case DEV_PM_QOS_MAX_FREQUENCY:
                ret = freq_qos_add_notifier(&dev->power.qos->freq,
                                            FREQ_QOS_MAX, notifier);
                break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
        }

unlock:
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier);

/**
 * dev_pm_qos_remove_notifier - deletes notification for changes to target value
 * of per-device PM QoS constraints
 *
 * @dev: target device for the constraint
 * @notifier: notifier block to be removed.
 * @type: request type.
 *
 * Will remove the notifier from the notification chain that gets called
 * upon changes to the target value.
 */
int dev_pm_qos_remove_notifier(struct device *dev,
                               struct notifier_block *notifier,
                               enum dev_pm_qos_req_type type)
{
        int ret = 0;

        mutex_lock(&dev_pm_qos_mtx);

        /* Silently return if the constraints object is not present. */
        if (IS_ERR_OR_NULL(dev->power.qos))
                goto unlock;

        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
                                                         notifier);
                break;
        case DEV_PM_QOS_MIN_FREQUENCY:
                ret = freq_qos_remove_notifier(&dev->power.qos->freq,
                                               FREQ_QOS_MIN, notifier);
                break;
        case DEV_PM_QOS_MAX_FREQUENCY:
                ret = freq_qos_remove_notifier(&dev->power.qos->freq,
                                               FREQ_QOS_MAX, notifier);
                break;
        default:
                WARN_ON(1);
                ret = -EINVAL;
        }

unlock:
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier);

/**
 * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
 * @dev: Device whose ancestor to add the request for.
 * @req: Pointer to the preallocated handle.
 * @type: Type of the request.
 * @value: Constraint latency value.
 */
int dev_pm_qos_add_ancestor_request(struct device *dev,
                                    struct dev_pm_qos_request *req,
                                    enum dev_pm_qos_req_type type, s32 value)
{
        struct device *ancestor = dev->parent;
        int ret = -ENODEV;

        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                while (ancestor && !ancestor->power.ignore_children)
                        ancestor = ancestor->parent;

                break;
        case DEV_PM_QOS_LATENCY_TOLERANCE:
                while (ancestor && !ancestor->power.set_latency_tolerance)
                        ancestor = ancestor->parent;

                break;
        default:
                ancestor = NULL;
        }
        if (ancestor)
                ret = dev_pm_qos_add_request(ancestor, req, type, value);

        if (ret < 0)
                req->dev = NULL;

        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);

static void __dev_pm_qos_drop_user_request(struct device *dev,
                                           enum dev_pm_qos_req_type type)
{
        struct dev_pm_qos_request *req = NULL;

        switch(type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                req = dev->power.qos->resume_latency_req;
                dev->power.qos->resume_latency_req = NULL;
                break;
        case DEV_PM_QOS_LATENCY_TOLERANCE:
                req = dev->power.qos->latency_tolerance_req;
                dev->power.qos->latency_tolerance_req = NULL;
                break;
        case DEV_PM_QOS_FLAGS:
                req = dev->power.qos->flags_req;
                dev->power.qos->flags_req = NULL;
                break;
        default:
                WARN_ON(1);
                return;
        }
        __dev_pm_qos_remove_request(req);
        kfree(req);
}

static void dev_pm_qos_drop_user_request(struct device *dev,
                                         enum dev_pm_qos_req_type type)
{
        mutex_lock(&dev_pm_qos_mtx);
        __dev_pm_qos_drop_user_request(dev, type);
        mutex_unlock(&dev_pm_qos_mtx);
}

/**
 * dev_pm_qos_expose_latency_limit - Expose PM QoS latency limit to user space.
 * @dev: Device whose PM QoS latency limit is to be exposed to user space.
 * @value: Initial value of the latency limit.
 */
int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
{
        struct dev_pm_qos_request *req;
        int ret;

        if (!device_is_registered(dev) || value < 0)
                return -EINVAL;

        req = kzalloc(sizeof(*req), GFP_KERNEL);
        if (!req)
                return -ENOMEM;

        ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_RESUME_LATENCY, value);
        if (ret < 0) {
                kfree(req);
                return ret;
        }

        mutex_lock(&dev_pm_qos_sysfs_mtx);

        mutex_lock(&dev_pm_qos_mtx);

        if (IS_ERR_OR_NULL(dev->power.qos))
                ret = -ENODEV;
        else if (dev->power.qos->resume_latency_req)
                ret = -EEXIST;

        if (ret < 0) {
                __dev_pm_qos_remove_request(req);
                kfree(req);
                mutex_unlock(&dev_pm_qos_mtx);
                goto out;
        }
        dev->power.qos->resume_latency_req = req;

        mutex_unlock(&dev_pm_qos_mtx);

        ret = pm_qos_sysfs_add_resume_latency(dev);
        if (ret)
                dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);

 out:
        mutex_unlock(&dev_pm_qos_sysfs_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit);

static void __dev_pm_qos_hide_latency_limit(struct device *dev)
{
        if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->resume_latency_req)
                __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_RESUME_LATENCY);
}

/**
 * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space.
 * @dev: Device whose PM QoS latency limit is to be hidden from user space.
 */
void dev_pm_qos_hide_latency_limit(struct device *dev)
{
        mutex_lock(&dev_pm_qos_sysfs_mtx);

        pm_qos_sysfs_remove_resume_latency(dev);

        mutex_lock(&dev_pm_qos_mtx);
        __dev_pm_qos_hide_latency_limit(dev);
        mutex_unlock(&dev_pm_qos_mtx);

        mutex_unlock(&dev_pm_qos_sysfs_mtx);
}
EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit);

/**
 * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space.
 * @dev: Device whose PM QoS flags are to be exposed to user space.
 * @val: Initial values of the flags.
 */
int dev_pm_qos_expose_flags(struct device *dev, s32 val)
{
        struct dev_pm_qos_request *req;
        int ret;

        if (!device_is_registered(dev))
                return -EINVAL;

        req = kzalloc(sizeof(*req), GFP_KERNEL);
        if (!req)
                return -ENOMEM;

        ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val);
        if (ret < 0) {
                kfree(req);
                return ret;
        }

        pm_runtime_get_sync(dev);
        mutex_lock(&dev_pm_qos_sysfs_mtx);

        mutex_lock(&dev_pm_qos_mtx);

        if (IS_ERR_OR_NULL(dev->power.qos))
                ret = -ENODEV;
        else if (dev->power.qos->flags_req)
                ret = -EEXIST;

        if (ret < 0) {
                __dev_pm_qos_remove_request(req);
                kfree(req);
                mutex_unlock(&dev_pm_qos_mtx);
                goto out;
        }
        dev->power.qos->flags_req = req;

        mutex_unlock(&dev_pm_qos_mtx);

        ret = pm_qos_sysfs_add_flags(dev);
        if (ret)
                dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);

 out:
        mutex_unlock(&dev_pm_qos_sysfs_mtx);
        pm_runtime_put(dev);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags);

static void __dev_pm_qos_hide_flags(struct device *dev)
{
        if (!IS_ERR_OR_NULL(dev->power.qos) && dev->power.qos->flags_req)
                __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS);
}

/**
 * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space.
 * @dev: Device whose PM QoS flags are to be hidden from user space.
 */
void dev_pm_qos_hide_flags(struct device *dev)
{
        pm_runtime_get_sync(dev);
        mutex_lock(&dev_pm_qos_sysfs_mtx);

        pm_qos_sysfs_remove_flags(dev);

        mutex_lock(&dev_pm_qos_mtx);
        __dev_pm_qos_hide_flags(dev);
        mutex_unlock(&dev_pm_qos_mtx);

        mutex_unlock(&dev_pm_qos_sysfs_mtx);
        pm_runtime_put(dev);
}
EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags);

/**
 * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space.
 * @dev: Device to update the PM QoS flags request for.
 * @mask: Flags to set/clear.
 * @set: Whether to set or clear the flags (true means set).
 */
int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set)
{
        s32 value;
        int ret;

        pm_runtime_get_sync(dev);
        mutex_lock(&dev_pm_qos_mtx);

        if (IS_ERR_OR_NULL(dev->power.qos) || !dev->power.qos->flags_req) {
                ret = -EINVAL;
                goto out;
        }

        value = dev_pm_qos_requested_flags(dev);
        if (set)
                value |= mask;
        else
                value &= ~mask;

        ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value);

 out:
        mutex_unlock(&dev_pm_qos_mtx);
        pm_runtime_put(dev);
        return ret;
}

/**
 * dev_pm_qos_get_user_latency_tolerance - Get user space latency tolerance.
 * @dev: Device to obtain the user space latency tolerance for.
 */
s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev)
{
        s32 ret;

        mutex_lock(&dev_pm_qos_mtx);
        ret = IS_ERR_OR_NULL(dev->power.qos)
                || !dev->power.qos->latency_tolerance_req ?
                        PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT :
                        dev->power.qos->latency_tolerance_req->data.pnode.prio;
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}

/**
 * dev_pm_qos_update_user_latency_tolerance - Update user space latency tolerance.
 * @dev: Device to update the user space latency tolerance for.
 * @val: New user space latency tolerance for @dev (negative values disable).
 */
int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val)
{
        int ret;

        mutex_lock(&dev_pm_qos_mtx);

        if (IS_ERR_OR_NULL(dev->power.qos)
            || !dev->power.qos->latency_tolerance_req) {
                struct dev_pm_qos_request *req;

                if (val < 0) {
                        if (val == PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT)
                                ret = 0;
                        else
                                ret = -EINVAL;
                        goto out;
                }
                req = kzalloc(sizeof(*req), GFP_KERNEL);
                if (!req) {
                        ret = -ENOMEM;
                        goto out;
                }
                ret = __dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY_TOLERANCE, val);
                if (ret < 0) {
                        kfree(req);
                        goto out;
                }
                dev->power.qos->latency_tolerance_req = req;
        } else {
                if (val < 0) {
                        __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY_TOLERANCE);
                        ret = 0;
                } else {
                        ret = __dev_pm_qos_update_request(dev->power.qos->latency_tolerance_req, val);
                }
        }

 out:
        mutex_unlock(&dev_pm_qos_mtx);
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_update_user_latency_tolerance);

/**
 * dev_pm_qos_expose_latency_tolerance - Expose latency tolerance to userspace
 * @dev: Device whose latency tolerance to expose
 */
int dev_pm_qos_expose_latency_tolerance(struct device *dev)
{
        int ret;

        if (!dev->power.set_latency_tolerance)
                return -EINVAL;

        mutex_lock(&dev_pm_qos_sysfs_mtx);
        ret = pm_qos_sysfs_add_latency_tolerance(dev);
        mutex_unlock(&dev_pm_qos_sysfs_mtx);

        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_tolerance);

/**
 * dev_pm_qos_hide_latency_tolerance - Hide latency tolerance from userspace
 * @dev: Device whose latency tolerance to hide
 */
void dev_pm_qos_hide_latency_tolerance(struct device *dev)
{
        mutex_lock(&dev_pm_qos_sysfs_mtx);
        pm_qos_sysfs_remove_latency_tolerance(dev);
        mutex_unlock(&dev_pm_qos_sysfs_mtx);

        /* Remove the request from user space now */
        pm_runtime_get_sync(dev);
        dev_pm_qos_update_user_latency_tolerance(dev,
                PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT);
        pm_runtime_put(dev);
}
EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_tolerance);





















    9 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/* SPDX-License-Identifier: GPL-2.0 */
/*
 *  include/linux/signalfd.h
 *
 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
 *
 */
#ifndef _LINUX_SIGNALFD_H
#define _LINUX_SIGNALFD_H

#include <uapi/linux/signalfd.h>
#include <linux/sched/signal.h>

#ifdef CONFIG_SIGNALFD

/*
 * Deliver the signal to listening signalfd.
 */
static inline void signalfd_notify(struct task_struct *tsk, int sig)
{
        if (unlikely(waitqueue_active(&tsk->sighand->signalfd_wqh)))
                wake_up(&tsk->sighand->signalfd_wqh);
}

extern void signalfd_cleanup(struct sighand_struct *sighand);

#else /* CONFIG_SIGNALFD */

static inline void signalfd_notify(struct task_struct *tsk, int sig) { }

static inline void signalfd_cleanup(struct sighand_struct *sighand) { }

#endif /* CONFIG_SIGNALFD */

#endif /* _LINUX_SIGNALFD_H */












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   16 




   16 
































































































































































































































































































































































   20 













































































































































































































































































































  354 



























































































  354 





  414 


  413 




























































  354 




  354 






































  414 














































   89 


  354 

  354 
  354 
  354 







  356 
  354 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MMZONE_H
#define _LINUX_MMZONE_H

#ifndef __ASSEMBLY__
#ifndef __GENERATING_BOUNDS_H

#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/list_nulls.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
#include <linux/init.h>
#include <linux/seqlock.h>
#include <linux/nodemask.h>
#include <linux/pageblock-flags.h>
#include <linux/page-flags-layout.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/local_lock.h>
#include <linux/zswap.h>
#include <asm/page.h>

/* Free memory management - zoned buddy allocator.  */
#ifndef CONFIG_ARCH_FORCE_MAX_ORDER
#define MAX_PAGE_ORDER 10
#else
#define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
#endif
#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER)

#define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES)

#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1)

/*
 * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
 * costly to service.  That is between allocation orders which should
 * coalesce naturally under reasonable reclaim pressure and those which
 * will not.
 */
#define PAGE_ALLOC_COSTLY_ORDER 3

enum migratetype {
        MIGRATE_UNMOVABLE,
        MIGRATE_MOVABLE,
        MIGRATE_RECLAIMABLE,
        MIGRATE_PCPTYPES,        /* the number of types on the pcp lists */
        MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
#ifdef CONFIG_CMA
        /*
         * MIGRATE_CMA migration type is designed to mimic the way
         * ZONE_MOVABLE works.  Only movable pages can be allocated
         * from MIGRATE_CMA pageblocks and page allocator never
         * implicitly change migration type of MIGRATE_CMA pageblock.
         *
         * The way to use it is to change migratetype of a range of
         * pageblocks to MIGRATE_CMA which can be done by
         * __free_pageblock_cma() function.
         */
        MIGRATE_CMA,
#endif
#ifdef CONFIG_MEMORY_ISOLATION
        MIGRATE_ISOLATE,        /* can't allocate from here */
#endif
        MIGRATE_TYPES
};

/* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
extern const char * const migratetype_names[MIGRATE_TYPES];

#ifdef CONFIG_CMA
#  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
#  define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
#  define is_migrate_cma_folio(folio, pfn)        (MIGRATE_CMA ==                \
        get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
#else
#  define is_migrate_cma(migratetype) false
#  define is_migrate_cma_page(_page) false
#  define is_migrate_cma_folio(folio, pfn) false
#endif

static inline bool is_migrate_movable(int mt)
{
        return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
}

/*
 * Check whether a migratetype can be merged with another migratetype.
 *
 * It is only mergeable when it can fall back to other migratetypes for
 * allocation. See fallbacks[MIGRATE_TYPES][3] in page_alloc.c.
 */
static inline bool migratetype_is_mergeable(int mt)
{
        return mt < MIGRATE_PCPTYPES;
}

#define for_each_migratetype_order(order, type) \
        for (order = 0; order < NR_PAGE_ORDERS; order++) \
                for (type = 0; type < MIGRATE_TYPES; type++)

extern int page_group_by_mobility_disabled;

#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)

#define get_pageblock_migratetype(page)                                        \
        get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)

#define folio_migratetype(folio)                                \
        get_pfnblock_flags_mask(&folio->page, folio_pfn(folio),                \
                        MIGRATETYPE_MASK)
struct free_area {
        struct list_head        free_list[MIGRATE_TYPES];
        unsigned long                nr_free;
};

struct pglist_data;

#ifdef CONFIG_NUMA
enum numa_stat_item {
        NUMA_HIT,                /* allocated in intended node */
        NUMA_MISS,                /* allocated in non intended node */
        NUMA_FOREIGN,                /* was intended here, hit elsewhere */
        NUMA_INTERLEAVE_HIT,        /* interleaver preferred this zone */
        NUMA_LOCAL,                /* allocation from local node */
        NUMA_OTHER,                /* allocation from other node */
        NR_VM_NUMA_EVENT_ITEMS
};
#else
#define NR_VM_NUMA_EVENT_ITEMS 0
#endif

enum zone_stat_item {
        /* First 128 byte cacheline (assuming 64 bit words) */
        NR_FREE_PAGES,
        NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
        NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
        NR_ZONE_ACTIVE_ANON,
        NR_ZONE_INACTIVE_FILE,
        NR_ZONE_ACTIVE_FILE,
        NR_ZONE_UNEVICTABLE,
        NR_ZONE_WRITE_PENDING,        /* Count of dirty, writeback and unstable pages */
        NR_MLOCK,                /* mlock()ed pages found and moved off LRU */
        /* Second 128 byte cacheline */
        NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
        NR_ZSPAGES,                /* allocated in zsmalloc */
#endif
        NR_FREE_CMA_PAGES,
#ifdef CONFIG_UNACCEPTED_MEMORY
        NR_UNACCEPTED,
#endif
        NR_VM_ZONE_STAT_ITEMS };

enum node_stat_item {
        NR_LRU_BASE,
        NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
        NR_ACTIVE_ANON,                /*  "     "     "   "       "         */
        NR_INACTIVE_FILE,        /*  "     "     "   "       "         */
        NR_ACTIVE_FILE,                /*  "     "     "   "       "         */
        NR_UNEVICTABLE,                /*  "     "     "   "       "         */
        NR_SLAB_RECLAIMABLE_B,
        NR_SLAB_UNRECLAIMABLE_B,
        NR_ISOLATED_ANON,        /* Temporary isolated pages from anon lru */
        NR_ISOLATED_FILE,        /* Temporary isolated pages from file lru */
        WORKINGSET_NODES,
        WORKINGSET_REFAULT_BASE,
        WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
        WORKINGSET_REFAULT_FILE,
        WORKINGSET_ACTIVATE_BASE,
        WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
        WORKINGSET_ACTIVATE_FILE,
        WORKINGSET_RESTORE_BASE,
        WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
        WORKINGSET_RESTORE_FILE,
        WORKINGSET_NODERECLAIM,
        NR_ANON_MAPPED,        /* Mapped anonymous pages */
        NR_FILE_MAPPED,        /* pagecache pages mapped into pagetables.
                           only modified from process context */
        NR_FILE_PAGES,
        NR_FILE_DIRTY,
        NR_WRITEBACK,
        NR_WRITEBACK_TEMP,        /* Writeback using temporary buffers */
        NR_SHMEM,                /* shmem pages (included tmpfs/GEM pages) */
        NR_SHMEM_THPS,
        NR_SHMEM_PMDMAPPED,
        NR_FILE_THPS,
        NR_FILE_PMDMAPPED,
        NR_ANON_THPS,
        NR_VMSCAN_WRITE,
        NR_VMSCAN_IMMEDIATE,        /* Prioritise for reclaim when writeback ends */
        NR_DIRTIED,                /* page dirtyings since bootup */
        NR_WRITTEN,                /* page writings since bootup */
        NR_THROTTLED_WRITTEN,        /* NR_WRITTEN while reclaim throttled */
        NR_KERNEL_MISC_RECLAIMABLE,        /* reclaimable non-slab kernel pages */
        NR_FOLL_PIN_ACQUIRED,        /* via: pin_user_page(), gup flag: FOLL_PIN */
        NR_FOLL_PIN_RELEASED,        /* pages returned via unpin_user_page() */
        NR_KERNEL_STACK_KB,        /* measured in KiB */
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
        NR_KERNEL_SCS_KB,        /* measured in KiB */
#endif
        NR_PAGETABLE,                /* used for pagetables */
        NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */
#ifdef CONFIG_SWAP
        NR_SWAPCACHE,
#endif
#ifdef CONFIG_NUMA_BALANCING
        PGPROMOTE_SUCCESS,        /* promote successfully */
        PGPROMOTE_CANDIDATE,        /* candidate pages to promote */
#endif
        /* PGDEMOTE_*: pages demoted */
        PGDEMOTE_KSWAPD,
        PGDEMOTE_DIRECT,
        PGDEMOTE_KHUGEPAGED,
        NR_VM_NODE_STAT_ITEMS
};

/*
 * Returns true if the item should be printed in THPs (/proc/vmstat
 * currently prints number of anon, file and shmem THPs. But the item
 * is charged in pages).
 */
static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item)
{
        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                return false;

        return item == NR_ANON_THPS ||
               item == NR_FILE_THPS ||
               item == NR_SHMEM_THPS ||
               item == NR_SHMEM_PMDMAPPED ||
               item == NR_FILE_PMDMAPPED;
}

/*
 * Returns true if the value is measured in bytes (most vmstat values are
 * measured in pages). This defines the API part, the internal representation
 * might be different.
 */
static __always_inline bool vmstat_item_in_bytes(int idx)
{
        /*
         * Global and per-node slab counters track slab pages.
         * It's expected that changes are multiples of PAGE_SIZE.
         * Internally values are stored in pages.
         *
         * Per-memcg and per-lruvec counters track memory, consumed
         * by individual slab objects. These counters are actually
         * byte-precise.
         */
        return (idx == NR_SLAB_RECLAIMABLE_B ||
                idx == NR_SLAB_UNRECLAIMABLE_B);
}

/*
 * We do arithmetic on the LRU lists in various places in the code,
 * so it is important to keep the active lists LRU_ACTIVE higher in
 * the array than the corresponding inactive lists, and to keep
 * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
 *
 * This has to be kept in sync with the statistics in zone_stat_item
 * above and the descriptions in vmstat_text in mm/vmstat.c
 */
#define LRU_BASE 0
#define LRU_ACTIVE 1
#define LRU_FILE 2

enum lru_list {
        LRU_INACTIVE_ANON = LRU_BASE,
        LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
        LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
        LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
        LRU_UNEVICTABLE,
        NR_LRU_LISTS
};

enum vmscan_throttle_state {
        VMSCAN_THROTTLE_WRITEBACK,
        VMSCAN_THROTTLE_ISOLATED,
        VMSCAN_THROTTLE_NOPROGRESS,
        VMSCAN_THROTTLE_CONGESTED,
        NR_VMSCAN_THROTTLE,
};

#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)

#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)

static inline bool is_file_lru(enum lru_list lru)
{
        return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
}

static inline bool is_active_lru(enum lru_list lru)
{
        return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}

#define WORKINGSET_ANON 0
#define WORKINGSET_FILE 1
#define ANON_AND_FILE 2

enum lruvec_flags {
        /*
         * An lruvec has many dirty pages backed by a congested BDI:
         * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim.
         *    It can be cleared by cgroup reclaim or kswapd.
         * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim.
         *    It can only be cleared by kswapd.
         *
         * Essentially, kswapd can unthrottle an lruvec throttled by cgroup
         * reclaim, but not vice versa. This only applies to the root cgroup.
         * The goal is to prevent cgroup reclaim on the root cgroup (e.g.
         * memory.reclaim) to unthrottle an unbalanced node (that was throttled
         * by kswapd).
         */
        LRUVEC_CGROUP_CONGESTED,
        LRUVEC_NODE_CONGESTED,
};

#endif /* !__GENERATING_BOUNDS_H */

/*
 * Evictable pages are divided into multiple generations. The youngest and the
 * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
 * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
 * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
 * corresponding generation. The gen counter in folio->flags stores gen+1 while
 * a page is on one of lrugen->folios[]. Otherwise it stores 0.
 *
 * A page is added to the youngest generation on faulting. The aging needs to
 * check the accessed bit at least twice before handing this page over to the
 * eviction. The first check takes care of the accessed bit set on the initial
 * fault; the second check makes sure this page hasn't been used since then.
 * This process, AKA second chance, requires a minimum of two generations,
 * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
 * LRU, e.g., /proc/vmstat, these two generations are considered active; the
 * rest of generations, if they exist, are considered inactive. See
 * lru_gen_is_active().
 *
 * PG_active is always cleared while a page is on one of lrugen->folios[] so
 * that the aging needs not to worry about it. And it's set again when a page
 * considered active is isolated for non-reclaiming purposes, e.g., migration.
 * See lru_gen_add_folio() and lru_gen_del_folio().
 *
 * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
 * number of categories of the active/inactive LRU when keeping track of
 * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
 * in folio->flags.
 */
#define MIN_NR_GENS                2U
#define MAX_NR_GENS                4U

/*
 * Each generation is divided into multiple tiers. A page accessed N times
 * through file descriptors is in tier order_base_2(N). A page in the first tier
 * (N=0,1) is marked by PG_referenced unless it was faulted in through page
 * tables or read ahead. A page in any other tier (N>1) is marked by
 * PG_referenced and PG_workingset. This implies a minimum of two tiers is
 * supported without using additional bits in folio->flags.
 *
 * In contrast to moving across generations which requires the LRU lock, moving
 * across tiers only involves atomic operations on folio->flags and therefore
 * has a negligible cost in the buffered access path. In the eviction path,
 * comparisons of refaulted/(evicted+protected) from the first tier and the
 * rest infer whether pages accessed multiple times through file descriptors
 * are statistically hot and thus worth protecting.
 *
 * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the
 * number of categories of the active/inactive LRU when keeping track of
 * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in
 * folio->flags.
 */
#define MAX_NR_TIERS                4U

#ifndef __GENERATING_BOUNDS_H

struct lruvec;
struct page_vma_mapped_walk;

#define LRU_GEN_MASK                ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
#define LRU_REFS_MASK                ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)

#ifdef CONFIG_LRU_GEN

enum {
        LRU_GEN_ANON,
        LRU_GEN_FILE,
};

enum {
        LRU_GEN_CORE,
        LRU_GEN_MM_WALK,
        LRU_GEN_NONLEAF_YOUNG,
        NR_LRU_GEN_CAPS
};

#define MIN_LRU_BATCH                BITS_PER_LONG
#define MAX_LRU_BATCH                (MIN_LRU_BATCH * 64)

/* whether to keep historical stats from evicted generations */
#ifdef CONFIG_LRU_GEN_STATS
#define NR_HIST_GENS                MAX_NR_GENS
#else
#define NR_HIST_GENS                1U
#endif

/*
 * The youngest generation number is stored in max_seq for both anon and file
 * types as they are aged on an equal footing. The oldest generation numbers are
 * stored in min_seq[] separately for anon and file types as clean file pages
 * can be evicted regardless of swap constraints.
 *
 * Normally anon and file min_seq are in sync. But if swapping is constrained,
 * e.g., out of swap space, file min_seq is allowed to advance and leave anon
 * min_seq behind.
 *
 * The number of pages in each generation is eventually consistent and therefore
 * can be transiently negative when reset_batch_size() is pending.
 */
struct lru_gen_folio {
        /* the aging increments the youngest generation number */
        unsigned long max_seq;
        /* the eviction increments the oldest generation numbers */
        unsigned long min_seq[ANON_AND_FILE];
        /* the birth time of each generation in jiffies */
        unsigned long timestamps[MAX_NR_GENS];
        /* the multi-gen LRU lists, lazily sorted on eviction */
        struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
        /* the multi-gen LRU sizes, eventually consistent */
        long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
        /* the exponential moving average of refaulted */
        unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
        /* the exponential moving average of evicted+protected */
        unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
        /* the first tier doesn't need protection, hence the minus one */
        unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
        /* can be modified without holding the LRU lock */
        atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
        atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
        /* whether the multi-gen LRU is enabled */
        bool enabled;
        /* the memcg generation this lru_gen_folio belongs to */
        u8 gen;
        /* the list segment this lru_gen_folio belongs to */
        u8 seg;
        /* per-node lru_gen_folio list for global reclaim */
        struct hlist_nulls_node list;
};

enum {
        MM_LEAF_TOTAL,                /* total leaf entries */
        MM_LEAF_OLD,                /* old leaf entries */
        MM_LEAF_YOUNG,                /* young leaf entries */
        MM_NONLEAF_TOTAL,        /* total non-leaf entries */
        MM_NONLEAF_FOUND,        /* non-leaf entries found in Bloom filters */
        MM_NONLEAF_ADDED,        /* non-leaf entries added to Bloom filters */
        NR_MM_STATS
};

/* double-buffering Bloom filters */
#define NR_BLOOM_FILTERS        2

struct lru_gen_mm_state {
        /* synced with max_seq after each iteration */
        unsigned long seq;
        /* where the current iteration continues after */
        struct list_head *head;
        /* where the last iteration ended before */
        struct list_head *tail;
        /* Bloom filters flip after each iteration */
        unsigned long *filters[NR_BLOOM_FILTERS];
        /* the mm stats for debugging */
        unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
};

struct lru_gen_mm_walk {
        /* the lruvec under reclaim */
        struct lruvec *lruvec;
        /* max_seq from lru_gen_folio: can be out of date */
        unsigned long seq;
        /* the next address within an mm to scan */
        unsigned long next_addr;
        /* to batch promoted pages */
        int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
        /* to batch the mm stats */
        int mm_stats[NR_MM_STATS];
        /* total batched items */
        int batched;
        bool can_swap;
        bool force_scan;
};

/*
 * For each node, memcgs are divided into two generations: the old and the
 * young. For each generation, memcgs are randomly sharded into multiple bins
 * to improve scalability. For each bin, the hlist_nulls is virtually divided
 * into three segments: the head, the tail and the default.
 *
 * An onlining memcg is added to the tail of a random bin in the old generation.
 * The eviction starts at the head of a random bin in the old generation. The
 * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes
 * the old generation, is incremented when all its bins become empty.
 *
 * There are four operations:
 * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its
 *    current generation (old or young) and updates its "seg" to "head";
 * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its
 *    current generation (old or young) and updates its "seg" to "tail";
 * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old
 *    generation, updates its "gen" to "old" and resets its "seg" to "default";
 * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the
 *    young generation, updates its "gen" to "young" and resets its "seg" to
 *    "default".
 *
 * The events that trigger the above operations are:
 * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
 * 2. The first attempt to reclaim a memcg below low, which triggers
 *    MEMCG_LRU_TAIL;
 * 3. The first attempt to reclaim a memcg offlined or below reclaimable size
 *    threshold, which triggers MEMCG_LRU_TAIL;
 * 4. The second attempt to reclaim a memcg offlined or below reclaimable size
 *    threshold, which triggers MEMCG_LRU_YOUNG;
 * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG;
 * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG;
 * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD.
 *
 * Notes:
 * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing
 *    of their max_seq counters ensures the eventual fairness to all eligible
 *    memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
 * 2. There are only two valid generations: old (seq) and young (seq+1).
 *    MEMCG_NR_GENS is set to three so that when reading the generation counter
 *    locklessly, a stale value (seq-1) does not wraparound to young.
 */
#define MEMCG_NR_GENS        3
#define MEMCG_NR_BINS        8

struct lru_gen_memcg {
        /* the per-node memcg generation counter */
        unsigned long seq;
        /* each memcg has one lru_gen_folio per node */
        unsigned long nr_memcgs[MEMCG_NR_GENS];
        /* per-node lru_gen_folio list for global reclaim */
        struct hlist_nulls_head        fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
        /* protects the above */
        spinlock_t lock;
};

void lru_gen_init_pgdat(struct pglist_data *pgdat);
void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);

void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
void lru_gen_online_memcg(struct mem_cgroup *memcg);
void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);

#else /* !CONFIG_LRU_GEN */

static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
{
}

static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
{
}

static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
}

static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_online_memcg(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
}

#endif /* CONFIG_LRU_GEN */

struct lruvec {
        struct list_head                lists[NR_LRU_LISTS];
        /* per lruvec lru_lock for memcg */
        spinlock_t                        lru_lock;
        /*
         * These track the cost of reclaiming one LRU - file or anon -
         * over the other. As the observed cost of reclaiming one LRU
         * increases, the reclaim scan balance tips toward the other.
         */
        unsigned long                        anon_cost;
        unsigned long                        file_cost;
        /* Non-resident age, driven by LRU movement */
        atomic_long_t                        nonresident_age;
        /* Refaults at the time of last reclaim cycle */
        unsigned long                        refaults[ANON_AND_FILE];
        /* Various lruvec state flags (enum lruvec_flags) */
        unsigned long                        flags;
#ifdef CONFIG_LRU_GEN
        /* evictable pages divided into generations */
        struct lru_gen_folio                lrugen;
#ifdef CONFIG_LRU_GEN_WALKS_MMU
        /* to concurrently iterate lru_gen_mm_list */
        struct lru_gen_mm_state                mm_state;
#endif
#endif /* CONFIG_LRU_GEN */
#ifdef CONFIG_MEMCG
        struct pglist_data *pgdat;
#endif
        struct zswap_lruvec_state zswap_lruvec_state;
};

/* Isolate for asynchronous migration */
#define ISOLATE_ASYNC_MIGRATE        ((__force isolate_mode_t)0x4)
/* Isolate unevictable pages */
#define ISOLATE_UNEVICTABLE        ((__force isolate_mode_t)0x8)

/* LRU Isolation modes. */
typedef unsigned __bitwise isolate_mode_t;

enum zone_watermarks {
        WMARK_MIN,
        WMARK_LOW,
        WMARK_HIGH,
        WMARK_PROMO,
        NR_WMARK
};

/*
 * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
 * for THP which will usually be GFP_MOVABLE. Even if it is another type,
 * it should not contribute to serious fragmentation causing THP allocation
 * failures.
 */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define NR_PCP_THP 1
#else
#define NR_PCP_THP 0
#endif
#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)

#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)

/*
 * Flags used in pcp->flags field.
 *
 * PCPF_PREV_FREE_HIGH_ORDER: a high-order page is freed in the
 * previous page freeing.  To avoid to drain PCP for an accident
 * high-order page freeing.
 *
 * PCPF_FREE_HIGH_BATCH: preserve "pcp->batch" pages in PCP before
 * draining PCP for consecutive high-order pages freeing without
 * allocation if data cache slice of CPU is large enough.  To reduce
 * zone lock contention and keep cache-hot pages reusing.
 */
#define        PCPF_PREV_FREE_HIGH_ORDER        BIT(0)
#define        PCPF_FREE_HIGH_BATCH                BIT(1)

struct per_cpu_pages {
        spinlock_t lock;        /* Protects lists field */
        int count;                /* number of pages in the list */
        int high;                /* high watermark, emptying needed */
        int high_min;                /* min high watermark */
        int high_max;                /* max high watermark */
        int batch;                /* chunk size for buddy add/remove */
        u8 flags;                /* protected by pcp->lock */
        u8 alloc_factor;        /* batch scaling factor during allocate */
#ifdef CONFIG_NUMA
        u8 expire;                /* When 0, remote pagesets are drained */
#endif
        short free_count;        /* consecutive free count */

        /* Lists of pages, one per migrate type stored on the pcp-lists */
        struct list_head lists[NR_PCP_LISTS];
} ____cacheline_aligned_in_smp;

struct per_cpu_zonestat {
#ifdef CONFIG_SMP
        s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
        s8 stat_threshold;
#endif
#ifdef CONFIG_NUMA
        /*
         * Low priority inaccurate counters that are only folded
         * on demand. Use a large type to avoid the overhead of
         * folding during refresh_cpu_vm_stats.
         */
        unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#endif
};

struct per_cpu_nodestat {
        s8 stat_threshold;
        s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
};

#endif /* !__GENERATING_BOUNDS.H */

enum zone_type {
        /*
         * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
         * to DMA to all of the addressable memory (ZONE_NORMAL).
         * On architectures where this area covers the whole 32 bit address
         * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
         * DMA addressing constraints. This distinction is important as a 32bit
         * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
         * platforms may need both zones as they support peripherals with
         * different DMA addressing limitations.
         */
#ifdef CONFIG_ZONE_DMA
        ZONE_DMA,
#endif
#ifdef CONFIG_ZONE_DMA32
        ZONE_DMA32,
#endif
        /*
         * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
         * performed on pages in ZONE_NORMAL if the DMA devices support
         * transfers to all addressable memory.
         */
        ZONE_NORMAL,
#ifdef CONFIG_HIGHMEM
        /*
         * A memory area that is only addressable by the kernel through
         * mapping portions into its own address space. This is for example
         * used by i386 to allow the kernel to address the memory beyond
         * 900MB. The kernel will set up special mappings (page
         * table entries on i386) for each page that the kernel needs to
         * access.
         */
        ZONE_HIGHMEM,
#endif
        /*
         * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains
         * movable pages with few exceptional cases described below. Main use
         * cases for ZONE_MOVABLE are to make memory offlining/unplug more
         * likely to succeed, and to locally limit unmovable allocations - e.g.,
         * to increase the number of THP/huge pages. Notable special cases are:
         *
         * 1. Pinned pages: (long-term) pinning of movable pages might
         *    essentially turn such pages unmovable. Therefore, we do not allow
         *    pinning long-term pages in ZONE_MOVABLE. When pages are pinned and
         *    faulted, they come from the right zone right away. However, it is
         *    still possible that address space already has pages in
         *    ZONE_MOVABLE at the time when pages are pinned (i.e. user has
         *    touches that memory before pinning). In such case we migrate them
         *    to a different zone. When migration fails - pinning fails.
         * 2. memblock allocations: kernelcore/movablecore setups might create
         *    situations where ZONE_MOVABLE contains unmovable allocations
         *    after boot. Memory offlining and allocations fail early.
         * 3. Memory holes: kernelcore/movablecore setups might create very rare
         *    situations where ZONE_MOVABLE contains memory holes after boot,
         *    for example, if we have sections that are only partially
         *    populated. Memory offlining and allocations fail early.
         * 4. PG_hwpoison pages: while poisoned pages can be skipped during
         *    memory offlining, such pages cannot be allocated.
         * 5. Unmovable PG_offline pages: in paravirtualized environments,
         *    hotplugged memory blocks might only partially be managed by the
         *    buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The
         *    parts not manged by the buddy are unmovable PG_offline pages. In
         *    some cases (virtio-mem), such pages can be skipped during
         *    memory offlining, however, cannot be moved/allocated. These
         *    techniques might use alloc_contig_range() to hide previously
         *    exposed pages from the buddy again (e.g., to implement some sort
         *    of memory unplug in virtio-mem).
         * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create
         *    situations where ZERO_PAGE(0) which is allocated differently
         *    on different platforms may end up in a movable zone. ZERO_PAGE(0)
         *    cannot be migrated.
         * 7. Memory-hotplug: when using memmap_on_memory and onlining the
         *    memory to the MOVABLE zone, the vmemmap pages are also placed in
         *    such zone. Such pages cannot be really moved around as they are
         *    self-stored in the range, but they are treated as movable when
         *    the range they describe is about to be offlined.
         *
         * In general, no unmovable allocations that degrade memory offlining
         * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
         * have to expect that migrating pages in ZONE_MOVABLE can fail (even
         * if has_unmovable_pages() states that there are no unmovable pages,
         * there can be false negatives).
         */
        ZONE_MOVABLE,
#ifdef CONFIG_ZONE_DEVICE
        ZONE_DEVICE,
#endif
        __MAX_NR_ZONES

};

#ifndef __GENERATING_BOUNDS_H

#define ASYNC_AND_SYNC 2

struct zone {
        /* Read-mostly fields */

        /* zone watermarks, access with *_wmark_pages(zone) macros */
        unsigned long _watermark[NR_WMARK];
        unsigned long watermark_boost;

        unsigned long nr_reserved_highatomic;

        /*
         * We don't know if the memory that we're going to allocate will be
         * freeable or/and it will be released eventually, so to avoid totally
         * wasting several GB of ram we must reserve some of the lower zone
         * memory (otherwise we risk to run OOM on the lower zones despite
         * there being tons of freeable ram on the higher zones).  This array is
         * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
         * changes.
         */
        long lowmem_reserve[MAX_NR_ZONES];

#ifdef CONFIG_NUMA
        int node;
#endif
        struct pglist_data        *zone_pgdat;
        struct per_cpu_pages        __percpu *per_cpu_pageset;
        struct per_cpu_zonestat        __percpu *per_cpu_zonestats;
        /*
         * the high and batch values are copied to individual pagesets for
         * faster access
         */
        int pageset_high_min;
        int pageset_high_max;
        int pageset_batch;

#ifndef CONFIG_SPARSEMEM
        /*
         * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
         * In SPARSEMEM, this map is stored in struct mem_section
         */
        unsigned long                *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

        /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
        unsigned long                zone_start_pfn;

        /*
         * spanned_pages is the total pages spanned by the zone, including
         * holes, which is calculated as:
         *         spanned_pages = zone_end_pfn - zone_start_pfn;
         *
         * present_pages is physical pages existing within the zone, which
         * is calculated as:
         *        present_pages = spanned_pages - absent_pages(pages in holes);
         *
         * present_early_pages is present pages existing within the zone
         * located on memory available since early boot, excluding hotplugged
         * memory.
         *
         * managed_pages is present pages managed by the buddy system, which
         * is calculated as (reserved_pages includes pages allocated by the
         * bootmem allocator):
         *        managed_pages = present_pages - reserved_pages;
         *
         * cma pages is present pages that are assigned for CMA use
         * (MIGRATE_CMA).
         *
         * So present_pages may be used by memory hotplug or memory power
         * management logic to figure out unmanaged pages by checking
         * (present_pages - managed_pages). And managed_pages should be used
         * by page allocator and vm scanner to calculate all kinds of watermarks
         * and thresholds.
         *
         * Locking rules:
         *
         * zone_start_pfn and spanned_pages are protected by span_seqlock.
         * It is a seqlock because it has to be read outside of zone->lock,
         * and it is done in the main allocator path.  But, it is written
         * quite infrequently.
         *
         * The span_seq lock is declared along with zone->lock because it is
         * frequently read in proximity to zone->lock.  It's good to
         * give them a chance of being in the same cacheline.
         *
         * Write access to present_pages at runtime should be protected by
         * mem_hotplug_begin/done(). Any reader who can't tolerant drift of
         * present_pages should use get_online_mems() to get a stable value.
         */
        atomic_long_t                managed_pages;
        unsigned long                spanned_pages;
        unsigned long                present_pages;
#if defined(CONFIG_MEMORY_HOTPLUG)
        unsigned long                present_early_pages;
#endif
#ifdef CONFIG_CMA
        unsigned long                cma_pages;
#endif

        const char                *name;

#ifdef CONFIG_MEMORY_ISOLATION
        /*
         * Number of isolated pageblock. It is used to solve incorrect
         * freepage counting problem due to racy retrieving migratetype
         * of pageblock. Protected by zone->lock.
         */
        unsigned long                nr_isolate_pageblock;
#endif

#ifdef CONFIG_MEMORY_HOTPLUG
        /* see spanned/present_pages for more description */
        seqlock_t                span_seqlock;
#endif

        int initialized;

        /* Write-intensive fields used from the page allocator */
        CACHELINE_PADDING(_pad1_);

        /* free areas of different sizes */
        struct free_area        free_area[NR_PAGE_ORDERS];

#ifdef CONFIG_UNACCEPTED_MEMORY
        /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */
        struct list_head        unaccepted_pages;
#endif

        /* zone flags, see below */
        unsigned long                flags;

        /* Primarily protects free_area */
        spinlock_t                lock;

        /* Write-intensive fields used by compaction and vmstats. */
        CACHELINE_PADDING(_pad2_);

        /*
         * When free pages are below this point, additional steps are taken
         * when reading the number of free pages to avoid per-cpu counter
         * drift allowing watermarks to be breached
         */
        unsigned long percpu_drift_mark;

#if defined CONFIG_COMPACTION || defined CONFIG_CMA
        /* pfn where compaction free scanner should start */
        unsigned long                compact_cached_free_pfn;
        /* pfn where compaction migration scanner should start */
        unsigned long                compact_cached_migrate_pfn[ASYNC_AND_SYNC];
        unsigned long                compact_init_migrate_pfn;
        unsigned long                compact_init_free_pfn;
#endif

#ifdef CONFIG_COMPACTION
        /*
         * On compaction failure, 1<<compact_defer_shift compactions
         * are skipped before trying again. The number attempted since
         * last failure is tracked with compact_considered.
         * compact_order_failed is the minimum compaction failed order.
         */
        unsigned int                compact_considered;
        unsigned int                compact_defer_shift;
        int                        compact_order_failed;
#endif

#if defined CONFIG_COMPACTION || defined CONFIG_CMA
        /* Set to true when the PG_migrate_skip bits should be cleared */
        bool                        compact_blockskip_flush;
#endif

        bool                        contiguous;

        CACHELINE_PADDING(_pad3_);
        /* Zone statistics */
        atomic_long_t                vm_stat[NR_VM_ZONE_STAT_ITEMS];
        atomic_long_t                vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
} ____cacheline_internodealigned_in_smp;

enum pgdat_flags {
        PGDAT_DIRTY,                        /* reclaim scanning has recently found
                                         * many dirty file pages at the tail
                                         * of the LRU.
                                         */
        PGDAT_WRITEBACK,                /* reclaim scanning has recently found
                                         * many pages under writeback
                                         */
        PGDAT_RECLAIM_LOCKED,                /* prevents concurrent reclaim */
};

enum zone_flags {
        ZONE_BOOSTED_WATERMARK,                /* zone recently boosted watermarks.
                                         * Cleared when kswapd is woken.
                                         */
        ZONE_RECLAIM_ACTIVE,                /* kswapd may be scanning the zone. */
        ZONE_BELOW_HIGH,                /* zone is below high watermark. */
};

static inline unsigned long zone_managed_pages(struct zone *zone)
{
        return (unsigned long)atomic_long_read(&zone->managed_pages);
}

static inline unsigned long zone_cma_pages(struct zone *zone)
{
#ifdef CONFIG_CMA
        return zone->cma_pages;
#else
        return 0;
#endif
}

static inline unsigned long zone_end_pfn(const struct zone *zone)
{
        return zone->zone_start_pfn + zone->spanned_pages;
}

static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
{
        return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
}

static inline bool zone_is_initialized(struct zone *zone)
{
        return zone->initialized;
}

static inline bool zone_is_empty(struct zone *zone)
{
        return zone->spanned_pages == 0;
}

#ifndef BUILD_VDSO32_64
/*
 * The zone field is never updated after free_area_init_core()
 * sets it, so none of the operations on it need to be atomic.
 */

/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
#define SECTIONS_PGOFF                ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
#define NODES_PGOFF                (SECTIONS_PGOFF - NODES_WIDTH)
#define ZONES_PGOFF                (NODES_PGOFF - ZONES_WIDTH)
#define LAST_CPUPID_PGOFF        (ZONES_PGOFF - LAST_CPUPID_WIDTH)
#define KASAN_TAG_PGOFF                (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
#define LRU_GEN_PGOFF                (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
#define LRU_REFS_PGOFF                (LRU_GEN_PGOFF - LRU_REFS_WIDTH)

/*
 * Define the bit shifts to access each section.  For non-existent
 * sections we define the shift as 0; that plus a 0 mask ensures
 * the compiler will optimise away reference to them.
 */
#define SECTIONS_PGSHIFT        (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
#define NODES_PGSHIFT                (NODES_PGOFF * (NODES_WIDTH != 0))
#define ZONES_PGSHIFT                (ZONES_PGOFF * (ZONES_WIDTH != 0))
#define LAST_CPUPID_PGSHIFT        (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
#define KASAN_TAG_PGSHIFT        (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0))

/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
#ifdef NODE_NOT_IN_PAGE_FLAGS
#define ZONEID_SHIFT                (SECTIONS_SHIFT + ZONES_SHIFT)
#define ZONEID_PGOFF                ((SECTIONS_PGOFF < ZONES_PGOFF) ? \
                                                SECTIONS_PGOFF : ZONES_PGOFF)
#else
#define ZONEID_SHIFT                (NODES_SHIFT + ZONES_SHIFT)
#define ZONEID_PGOFF                ((NODES_PGOFF < ZONES_PGOFF) ? \
                                                NODES_PGOFF : ZONES_PGOFF)
#endif

#define ZONEID_PGSHIFT                (ZONEID_PGOFF * (ZONEID_SHIFT != 0))

#define ZONES_MASK                ((1UL << ZONES_WIDTH) - 1)
#define NODES_MASK                ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK                ((1UL << SECTIONS_WIDTH) - 1)
#define LAST_CPUPID_MASK        ((1UL << LAST_CPUPID_SHIFT) - 1)
#define KASAN_TAG_MASK                ((1UL << KASAN_TAG_WIDTH) - 1)
#define ZONEID_MASK                ((1UL << ZONEID_SHIFT) - 1)

static inline enum zone_type page_zonenum(const struct page *page)
{
        ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
        return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}

static inline enum zone_type folio_zonenum(const struct folio *folio)
{
        return page_zonenum(&folio->page);
}

#ifdef CONFIG_ZONE_DEVICE
static inline bool is_zone_device_page(const struct page *page)
{
        return page_zonenum(page) == ZONE_DEVICE;
}

/*
 * Consecutive zone device pages should not be merged into the same sgl
 * or bvec segment with other types of pages or if they belong to different
 * pgmaps. Otherwise getting the pgmap of a given segment is not possible
 * without scanning the entire segment. This helper returns true either if
 * both pages are not zone device pages or both pages are zone device pages
 * with the same pgmap.
 */
static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
                                                     const struct page *b)
{
        if (is_zone_device_page(a) != is_zone_device_page(b))
                return false;
        if (!is_zone_device_page(a))
                return true;
        return a->pgmap == b->pgmap;
}

extern void memmap_init_zone_device(struct zone *, unsigned long,
                                    unsigned long, struct dev_pagemap *);
#else
static inline bool is_zone_device_page(const struct page *page)
{
        return false;
}
static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
                                                     const struct page *b)
{
        return true;
}
#endif

static inline bool folio_is_zone_device(const struct folio *folio)
{
        return is_zone_device_page(&folio->page);
}

static inline bool is_zone_movable_page(const struct page *page)
{
        return page_zonenum(page) == ZONE_MOVABLE;
}

static inline bool folio_is_zone_movable(const struct folio *folio)
{
        return folio_zonenum(folio) == ZONE_MOVABLE;
}
#endif

/*
 * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
 * intersection with the given zone
 */
static inline bool zone_intersects(struct zone *zone,
                unsigned long start_pfn, unsigned long nr_pages)
{
        if (zone_is_empty(zone))
                return false;
        if (start_pfn >= zone_end_pfn(zone) ||
            start_pfn + nr_pages <= zone->zone_start_pfn)
                return false;

        return true;
}

/*
 * The "priority" of VM scanning is how much of the queues we will scan in one
 * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
 * queues ("queue_length >> 12") during an aging round.
 */
#define DEF_PRIORITY 12

/* Maximum number of zones on a zonelist */
#define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)

enum {
        ZONELIST_FALLBACK,        /* zonelist with fallback */
#ifdef CONFIG_NUMA
        /*
         * The NUMA zonelists are doubled because we need zonelists that
         * restrict the allocations to a single node for __GFP_THISNODE.
         */
        ZONELIST_NOFALLBACK,        /* zonelist without fallback (__GFP_THISNODE) */
#endif
        MAX_ZONELISTS
};

/*
 * This struct contains information about a zone in a zonelist. It is stored
 * here to avoid dereferences into large structures and lookups of tables
 */
struct zoneref {
        struct zone *zone;        /* Pointer to actual zone */
        int zone_idx;                /* zone_idx(zoneref->zone) */
};

/*
 * One allocation request operates on a zonelist. A zonelist
 * is a list of zones, the first one is the 'goal' of the
 * allocation, the other zones are fallback zones, in decreasing
 * priority.
 *
 * To speed the reading of the zonelist, the zonerefs contain the zone index
 * of the entry being read. Helper functions to access information given
 * a struct zoneref are
 *
 * zonelist_zone()        - Return the struct zone * for an entry in _zonerefs
 * zonelist_zone_idx()        - Return the index of the zone for an entry
 * zonelist_node_idx()        - Return the index of the node for an entry
 */
struct zonelist {
        struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
};

/*
 * The array of struct pages for flatmem.
 * It must be declared for SPARSEMEM as well because there are configurations
 * that rely on that.
 */
extern struct page *mem_map;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split {
        spinlock_t split_queue_lock;
        struct list_head split_queue;
        unsigned long split_queue_len;
};
#endif

#ifdef CONFIG_MEMORY_FAILURE
/*
 * Per NUMA node memory failure handling statistics.
 */
struct memory_failure_stats {
        /*
         * Number of raw pages poisoned.
         * Cases not accounted: memory outside kernel control, offline page,
         * arch-specific memory_failure (SGX), hwpoison_filter() filtered
         * error events, and unpoison actions from hwpoison_unpoison.
         */
        unsigned long total;
        /*
         * Recovery results of poisoned raw pages handled by memory_failure,
         * in sync with mf_result.
         * total = ignored + failed + delayed + recovered.
         * total * PAGE_SIZE * #nodes = /proc/meminfo/HardwareCorrupted.
         */
        unsigned long ignored;
        unsigned long failed;
        unsigned long delayed;
        unsigned long recovered;
};
#endif

/*
 * On NUMA machines, each NUMA node would have a pg_data_t to describe
 * it's memory layout. On UMA machines there is a single pglist_data which
 * describes the whole memory.
 *
 * Memory statistics and page replacement data structures are maintained on a
 * per-zone basis.
 */
typedef struct pglist_data {
        /*
         * node_zones contains just the zones for THIS node. Not all of the
         * zones may be populated, but it is the full list. It is referenced by
         * this node's node_zonelists as well as other node's node_zonelists.
         */
        struct zone node_zones[MAX_NR_ZONES];

        /*
         * node_zonelists contains references to all zones in all nodes.
         * Generally the first zones will be references to this node's
         * node_zones.
         */
        struct zonelist node_zonelists[MAX_ZONELISTS];

        int nr_zones; /* number of populated zones in this node */
#ifdef CONFIG_FLATMEM        /* means !SPARSEMEM */
        struct page *node_mem_map;
#ifdef CONFIG_PAGE_EXTENSION
        struct page_ext *node_page_ext;
#endif
#endif
#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
        /*
         * Must be held any time you expect node_start_pfn,
         * node_present_pages, node_spanned_pages or nr_zones to stay constant.
         * Also synchronizes pgdat->first_deferred_pfn during deferred page
         * init.
         *
         * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
         * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
         * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
         *
         * Nests above zone->lock and zone->span_seqlock
         */
        spinlock_t node_size_lock;
#endif
        unsigned long node_start_pfn;
        unsigned long node_present_pages; /* total number of physical pages */
        unsigned long node_spanned_pages; /* total size of physical page
                                             range, including holes */
        int node_id;
        wait_queue_head_t kswapd_wait;
        wait_queue_head_t pfmemalloc_wait;

        /* workqueues for throttling reclaim for different reasons. */
        wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE];

        atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */
        unsigned long nr_reclaim_start;        /* nr pages written while throttled
                                         * when throttling started. */
#ifdef CONFIG_MEMORY_HOTPLUG
        struct mutex kswapd_lock;
#endif
        struct task_struct *kswapd;        /* Protected by kswapd_lock */
        int kswapd_order;
        enum zone_type kswapd_highest_zoneidx;

        int kswapd_failures;                /* Number of 'reclaimed == 0' runs */

#ifdef CONFIG_COMPACTION
        int kcompactd_max_order;
        enum zone_type kcompactd_highest_zoneidx;
        wait_queue_head_t kcompactd_wait;
        struct task_struct *kcompactd;
        bool proactive_compact_trigger;
#endif
        /*
         * This is a per-node reserve of pages that are not available
         * to userspace allocations.
         */
        unsigned long                totalreserve_pages;

#ifdef CONFIG_NUMA
        /*
         * node reclaim becomes active if more unmapped pages exist.
         */
        unsigned long                min_unmapped_pages;
        unsigned long                min_slab_pages;
#endif /* CONFIG_NUMA */

        /* Write-intensive fields used by page reclaim */
        CACHELINE_PADDING(_pad1_);

#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
        /*
         * If memory initialisation on large machines is deferred then this
         * is the first PFN that needs to be initialised.
         */
        unsigned long first_deferred_pfn;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        struct deferred_split deferred_split_queue;
#endif

#ifdef CONFIG_NUMA_BALANCING
        /* start time in ms of current promote rate limit period */
        unsigned int nbp_rl_start;
        /* number of promote candidate pages at start time of current rate limit period */
        unsigned long nbp_rl_nr_cand;
        /* promote threshold in ms */
        unsigned int nbp_threshold;
        /* start time in ms of current promote threshold adjustment period */
        unsigned int nbp_th_start;
        /*
         * number of promote candidate pages at start time of current promote
         * threshold adjustment period
         */
        unsigned long nbp_th_nr_cand;
#endif
        /* Fields commonly accessed by the page reclaim scanner */

        /*
         * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
         *
         * Use mem_cgroup_lruvec() to look up lruvecs.
         */
        struct lruvec                __lruvec;

        unsigned long                flags;

#ifdef CONFIG_LRU_GEN
        /* kswap mm walk data */
        struct lru_gen_mm_walk mm_walk;
        /* lru_gen_folio list */
        struct lru_gen_memcg memcg_lru;
#endif

        CACHELINE_PADDING(_pad2_);

        /* Per-node vmstats */
        struct per_cpu_nodestat __percpu *per_cpu_nodestats;
        atomic_long_t                vm_stat[NR_VM_NODE_STAT_ITEMS];
#ifdef CONFIG_NUMA
        struct memory_tier __rcu *memtier;
#endif
#ifdef CONFIG_MEMORY_FAILURE
        struct memory_failure_stats mf_stats;
#endif
} pg_data_t;

#define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)

#define node_start_pfn(nid)        (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))

static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
        return pgdat->node_start_pfn + pgdat->node_spanned_pages;
}

#include <linux/memory_hotplug.h>

void build_all_zonelists(pg_data_t *pgdat);
void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
                   enum zone_type highest_zoneidx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                         int highest_zoneidx, unsigned int alloc_flags,
                         long free_pages);
bool zone_watermark_ok(struct zone *z, unsigned int order,
                unsigned long mark, int highest_zoneidx,
                unsigned int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
                unsigned long mark, int highest_zoneidx);
/*
 * Memory initialization context, use to differentiate memory added by
 * the platform statically or via memory hotplug interface.
 */
enum meminit_context {
        MEMINIT_EARLY,
        MEMINIT_HOTPLUG,
};

extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
                                     unsigned long size);

extern void lruvec_init(struct lruvec *lruvec);

static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
{
#ifdef CONFIG_MEMCG
        return lruvec->pgdat;
#else
        return container_of(lruvec, struct pglist_data, __lruvec);
#endif
}

#ifdef CONFIG_HAVE_MEMORYLESS_NODES
int local_memory_node(int node_id);
#else
static inline int local_memory_node(int node_id) { return node_id; };
#endif

/*
 * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
 */
#define zone_idx(zone)                ((zone) - (zone)->zone_pgdat->node_zones)

#ifdef CONFIG_ZONE_DEVICE
static inline bool zone_is_zone_device(struct zone *zone)
{
        return zone_idx(zone) == ZONE_DEVICE;
}
#else
static inline bool zone_is_zone_device(struct zone *zone)
{
        return false;
}
#endif

/*
 * Returns true if a zone has pages managed by the buddy allocator.
 * All the reclaim decisions have to use this function rather than
 * populated_zone(). If the whole zone is reserved then we can easily
 * end up with populated_zone() && !managed_zone().
 */
static inline bool managed_zone(struct zone *zone)
{
        return zone_managed_pages(zone);
}

/* Returns true if a zone has memory */
static inline bool populated_zone(struct zone *zone)
{
        return zone->present_pages;
}

#ifdef CONFIG_NUMA
static inline int zone_to_nid(struct zone *zone)
{
        return zone->node;
}

static inline void zone_set_nid(struct zone *zone, int nid)
{
        zone->node = nid;
}
#else
static inline int zone_to_nid(struct zone *zone)
{
        return 0;
}

static inline void zone_set_nid(struct zone *zone, int nid) {}
#endif

extern int movable_zone;

static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
        return (idx == ZONE_HIGHMEM ||
                (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
#else
        return 0;
#endif
}

/**
 * is_highmem - helper function to quickly check if a struct zone is a
 *              highmem zone or not.  This is an attempt to keep references
 *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
 * @zone: pointer to struct zone variable
 * Return: 1 for a highmem zone, 0 otherwise
 */
static inline int is_highmem(struct zone *zone)
{
        return is_highmem_idx(zone_idx(zone));
}

#ifdef CONFIG_ZONE_DMA
bool has_managed_dma(void);
#else
static inline bool has_managed_dma(void)
{
        return false;
}
#endif


#ifndef CONFIG_NUMA

extern struct pglist_data contig_page_data;
static inline struct pglist_data *NODE_DATA(int nid)
{
        return &contig_page_data;
}

#else /* CONFIG_NUMA */

#include <asm/mmzone.h>

#endif /* !CONFIG_NUMA */

extern struct pglist_data *first_online_pgdat(void);
extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
extern struct zone *next_zone(struct zone *zone);

/**
 * for_each_online_pgdat - helper macro to iterate over all online nodes
 * @pgdat: pointer to a pg_data_t variable
 */
#define for_each_online_pgdat(pgdat)                        \
        for (pgdat = first_online_pgdat();                \
             pgdat;                                        \
             pgdat = next_online_pgdat(pgdat))
/**
 * for_each_zone - helper macro to iterate over all memory zones
 * @zone: pointer to struct zone variable
 *
 * The user only needs to declare the zone variable, for_each_zone
 * fills it in.
 */
#define for_each_zone(zone)                                \
        for (zone = (first_online_pgdat())->node_zones; \
             zone;                                        \
             zone = next_zone(zone))

#define for_each_populated_zone(zone)                        \
        for (zone = (first_online_pgdat())->node_zones; \
             zone;                                        \
             zone = next_zone(zone))                        \
                if (!populated_zone(zone))                \
                        ; /* do nothing */                \
                else

static inline struct zone *zonelist_zone(struct zoneref *zoneref)
{
        return zoneref->zone;
}

static inline int zonelist_zone_idx(struct zoneref *zoneref)
{
        return zoneref->zone_idx;
}

static inline int zonelist_node_idx(struct zoneref *zoneref)
{
        return zone_to_nid(zoneref->zone);
}

struct zoneref *__next_zones_zonelist(struct zoneref *z,
                                        enum zone_type highest_zoneidx,
                                        nodemask_t *nodes);

/**
 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
 * @z: The cursor used as a starting point for the search
 * @highest_zoneidx: The zone index of the highest zone to return
 * @nodes: An optional nodemask to filter the zonelist with
 *
 * This function returns the next zone at or below a given zone index that is
 * within the allowed nodemask using a cursor as the starting point for the
 * search. The zoneref returned is a cursor that represents the current zone
 * being examined. It should be advanced by one before calling
 * next_zones_zonelist again.
 *
 * Return: the next zone at or below highest_zoneidx within the allowed
 * nodemask using a cursor within a zonelist as a starting point
 */
static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
                                        enum zone_type highest_zoneidx,
                                        nodemask_t *nodes)
{
        if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
                return z;
        return __next_zones_zonelist(z, highest_zoneidx, nodes);
}

/**
 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
 * @zonelist: The zonelist to search for a suitable zone
 * @highest_zoneidx: The zone index of the highest zone to return
 * @nodes: An optional nodemask to filter the zonelist with
 *
 * This function returns the first zone at or below a given zone index that is
 * within the allowed nodemask. The zoneref returned is a cursor that can be
 * used to iterate the zonelist with next_zones_zonelist by advancing it by
 * one before calling.
 *
 * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
 * never NULL). This may happen either genuinely, or due to concurrent nodemask
 * update due to cpuset modification.
 *
 * Return: Zoneref pointer for the first suitable zone found
 */
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
                                        enum zone_type highest_zoneidx,
                                        nodemask_t *nodes)
{
        return next_zones_zonelist(zonelist->_zonerefs,
                                                        highest_zoneidx, nodes);
}

/**
 * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
 * @zone: The current zone in the iterator
 * @z: The current pointer within zonelist->_zonerefs being iterated
 * @zlist: The zonelist being iterated
 * @highidx: The zone index of the highest zone to return
 * @nodemask: Nodemask allowed by the allocator
 *
 * This iterator iterates though all zones at or below a given zone index and
 * within a given nodemask
 */
#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
        for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z);        \
                zone;                                                        \
                z = next_zones_zonelist(++z, highidx, nodemask),        \
                        zone = zonelist_zone(z))

#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
        for (zone = z->zone;        \
                zone;                                                        \
                z = next_zones_zonelist(++z, highidx, nodemask),        \
                        zone = zonelist_zone(z))


/**
 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
 * @zone: The current zone in the iterator
 * @z: The current pointer within zonelist->zones being iterated
 * @zlist: The zonelist being iterated
 * @highidx: The zone index of the highest zone to return
 *
 * This iterator iterates though all zones at or below a given zone index.
 */
#define for_each_zone_zonelist(zone, z, zlist, highidx) \
        for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)

/* Whether the 'nodes' are all movable nodes */
static inline bool movable_only_nodes(nodemask_t *nodes)
{
        struct zonelist *zonelist;
        struct zoneref *z;
        int nid;

        if (nodes_empty(*nodes))
                return false;

        /*
         * We can chose arbitrary node from the nodemask to get a
         * zonelist as they are interlinked. We just need to find
         * at least one zone that can satisfy kernel allocations.
         */
        nid = first_node(*nodes);
        zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
        z = first_zones_zonelist(zonelist, ZONE_NORMAL,        nodes);
        return (!z->zone) ? true : false;
}


#ifdef CONFIG_SPARSEMEM
#include <asm/sparsemem.h>
#endif

#ifdef CONFIG_FLATMEM
#define pfn_to_nid(pfn)                (0)
#endif

#ifdef CONFIG_SPARSEMEM

/*
 * PA_SECTION_SHIFT                physical address to/from section number
 * PFN_SECTION_SHIFT                pfn to/from section number
 */
#define PA_SECTION_SHIFT        (SECTION_SIZE_BITS)
#define PFN_SECTION_SHIFT        (SECTION_SIZE_BITS - PAGE_SHIFT)

#define NR_MEM_SECTIONS                (1UL << SECTIONS_SHIFT)

#define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
#define PAGE_SECTION_MASK        (~(PAGES_PER_SECTION-1))

#define SECTION_BLOCKFLAGS_BITS \
        ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)

#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
#error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE
#endif

static inline unsigned long pfn_to_section_nr(unsigned long pfn)
{
        return pfn >> PFN_SECTION_SHIFT;
}
static inline unsigned long section_nr_to_pfn(unsigned long sec)
{
        return sec << PFN_SECTION_SHIFT;
}

#define SECTION_ALIGN_UP(pfn)        (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn)        ((pfn) & PAGE_SECTION_MASK)

#define SUBSECTION_SHIFT 21
#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)

#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))

#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
#error Subsection size exceeds section size
#else
#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
#endif

#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)

struct mem_section_usage {
        struct rcu_head rcu;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
        DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
        /* See declaration of similar field in struct zone */
        unsigned long pageblock_flags[0];
};

void subsection_map_init(unsigned long pfn, unsigned long nr_pages);

struct page;
struct page_ext;
struct mem_section {
        /*
         * This is, logically, a pointer to an array of struct
         * pages.  However, it is stored with some other magic.
         * (see sparse.c::sparse_init_one_section())
         *
         * Additionally during early boot we encode node id of
         * the location of the section here to guide allocation.
         * (see sparse.c::memory_present())
         *
         * Making it a UL at least makes someone do a cast
         * before using it wrong.
         */
        unsigned long section_mem_map;

        struct mem_section_usage *usage;
#ifdef CONFIG_PAGE_EXTENSION
        /*
         * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
         * section. (see page_ext.h about this.)
         */
        struct page_ext *page_ext;
        unsigned long pad;
#endif
        /*
         * WARNING: mem_section must be a power-of-2 in size for the
         * calculation and use of SECTION_ROOT_MASK to make sense.
         */
};

#ifdef CONFIG_SPARSEMEM_EXTREME
#define SECTIONS_PER_ROOT       (PAGE_SIZE / sizeof (struct mem_section))
#else
#define SECTIONS_PER_ROOT        1
#endif

#define SECTION_NR_TO_ROOT(sec)        ((sec) / SECTIONS_PER_ROOT)
#define NR_SECTION_ROOTS        DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT)
#define SECTION_ROOT_MASK        (SECTIONS_PER_ROOT - 1)

#ifdef CONFIG_SPARSEMEM_EXTREME
extern struct mem_section **mem_section;
#else
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif

static inline unsigned long *section_to_usemap(struct mem_section *ms)
{
        return ms->usage->pageblock_flags;
}

static inline struct mem_section *__nr_to_section(unsigned long nr)
{
        unsigned long root = SECTION_NR_TO_ROOT(nr);

        if (unlikely(root >= NR_SECTION_ROOTS))
                return NULL;

#ifdef CONFIG_SPARSEMEM_EXTREME
        if (!mem_section || !mem_section[root])
                return NULL;
#endif
        return &mem_section[root][nr & SECTION_ROOT_MASK];
}
extern size_t mem_section_usage_size(void);

/*
 * We use the lower bits of the mem_map pointer to store
 * a little bit of information.  The pointer is calculated
 * as mem_map - section_nr_to_pfn(pnum).  The result is
 * aligned to the minimum alignment of the two values:
 *   1. All mem_map arrays are page-aligned.
 *   2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
 *      lowest bits.  PFN_SECTION_SHIFT is arch-specific
 *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
 *      worst combination is powerpc with 256k pages,
 *      which results in PFN_SECTION_SHIFT equal 6.
 * To sum it up, at least 6 bits are available on all architectures.
 * However, we can exceed 6 bits on some other architectures except
 * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
 * with the worst case of 64K pages on arm64) if we make sure the
 * exceeded bit is not applicable to powerpc.
 */
enum {
        SECTION_MARKED_PRESENT_BIT,
        SECTION_HAS_MEM_MAP_BIT,
        SECTION_IS_ONLINE_BIT,
        SECTION_IS_EARLY_BIT,
#ifdef CONFIG_ZONE_DEVICE
        SECTION_TAINT_ZONE_DEVICE_BIT,
#endif
        SECTION_MAP_LAST_BIT,
};

#define SECTION_MARKED_PRESENT                BIT(SECTION_MARKED_PRESENT_BIT)
#define SECTION_HAS_MEM_MAP                BIT(SECTION_HAS_MEM_MAP_BIT)
#define SECTION_IS_ONLINE                BIT(SECTION_IS_ONLINE_BIT)
#define SECTION_IS_EARLY                BIT(SECTION_IS_EARLY_BIT)
#ifdef CONFIG_ZONE_DEVICE
#define SECTION_TAINT_ZONE_DEVICE        BIT(SECTION_TAINT_ZONE_DEVICE_BIT)
#endif
#define SECTION_MAP_MASK                (~(BIT(SECTION_MAP_LAST_BIT) - 1))
#define SECTION_NID_SHIFT                SECTION_MAP_LAST_BIT

static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
        unsigned long map = section->section_mem_map;
        map &= SECTION_MAP_MASK;
        return (struct page *)map;
}

static inline int present_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
}

static inline int present_section_nr(unsigned long nr)
{
        return present_section(__nr_to_section(nr));
}

static inline int valid_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
}

static inline int early_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_IS_EARLY));
}

static inline int valid_section_nr(unsigned long nr)
{
        return valid_section(__nr_to_section(nr));
}

static inline int online_section(struct mem_section *section)
{
        return (section && (section->section_mem_map & SECTION_IS_ONLINE));
}

#ifdef CONFIG_ZONE_DEVICE
static inline int online_device_section(struct mem_section *section)
{
        unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;

        return section && ((section->section_mem_map & flags) == flags);
}
#else
static inline int online_device_section(struct mem_section *section)
{
        return 0;
}
#endif

static inline int online_section_nr(unsigned long nr)
{
        return online_section(__nr_to_section(nr));
}

#ifdef CONFIG_MEMORY_HOTPLUG
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
#endif

static inline struct mem_section *__pfn_to_section(unsigned long pfn)
{
        return __nr_to_section(pfn_to_section_nr(pfn));
}

extern unsigned long __highest_present_section_nr;

static inline int subsection_map_index(unsigned long pfn)
{
        return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
}

#ifdef CONFIG_SPARSEMEM_VMEMMAP
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
        int idx = subsection_map_index(pfn);

        return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
        return 1;
}
#endif

#ifndef CONFIG_HAVE_ARCH_PFN_VALID
/**
 * pfn_valid - check if there is a valid memory map entry for a PFN
 * @pfn: the page frame number to check
 *
 * Check if there is a valid memory map entry aka struct page for the @pfn.
 * Note, that availability of the memory map entry does not imply that
 * there is actual usable memory at that @pfn. The struct page may
 * represent a hole or an unusable page frame.
 *
 * Return: 1 for PFNs that have memory map entries and 0 otherwise
 */
static inline int pfn_valid(unsigned long pfn)
{
        struct mem_section *ms;
        int ret;

        /*
         * Ensure the upper PAGE_SHIFT bits are clear in the
         * pfn. Else it might lead to false positives when
         * some of the upper bits are set, but the lower bits
         * match a valid pfn.
         */
        if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
                return 0;

        if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
                return 0;
        ms = __pfn_to_section(pfn);
        rcu_read_lock_sched();
        if (!valid_section(ms)) {
                rcu_read_unlock_sched();
                return 0;
        }
        /*
         * Traditionally early sections always returned pfn_valid() for
         * the entire section-sized span.
         */
        ret = early_section(ms) || pfn_section_valid(ms, pfn);
        rcu_read_unlock_sched();

        return ret;
}
#endif

static inline int pfn_in_present_section(unsigned long pfn)
{
        if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
                return 0;
        return present_section(__pfn_to_section(pfn));
}

static inline unsigned long next_present_section_nr(unsigned long section_nr)
{
        while (++section_nr <= __highest_present_section_nr) {
                if (present_section_nr(section_nr))
                        return section_nr;
        }

        return -1;
}

/*
 * These are _only_ used during initialisation, therefore they
 * can use __initdata ...  They could have names to indicate
 * this restriction.
 */
#ifdef CONFIG_NUMA
#define pfn_to_nid(pfn)                                                        \
({                                                                        \
        unsigned long __pfn_to_nid_pfn = (pfn);                                \
        page_to_nid(pfn_to_page(__pfn_to_nid_pfn));                        \
})
#else
#define pfn_to_nid(pfn)                (0)
#endif

void sparse_init(void);
#else
#define sparse_init()        do {} while (0)
#define sparse_index_init(_sec, _nid)  do {} while (0)
#define pfn_in_present_section pfn_valid
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */

#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */


























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SMP_H
#define _ASM_X86_SMP_H
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>

#include <asm/cpumask.h>
#include <asm/current.h>
#include <asm/thread_info.h>

DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);

DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);

struct task_struct;

struct smp_ops {
        void (*smp_prepare_boot_cpu)(void);
        void (*smp_prepare_cpus)(unsigned max_cpus);
        void (*smp_cpus_done)(unsigned max_cpus);

        void (*stop_other_cpus)(int wait);
        void (*crash_stop_other_cpus)(void);
        void (*smp_send_reschedule)(int cpu);

        void (*cleanup_dead_cpu)(unsigned cpu);
        void (*poll_sync_state)(void);
        int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle);
        int (*cpu_disable)(void);
        void (*cpu_die)(unsigned int cpu);
        void (*play_dead)(void);

        void (*send_call_func_ipi)(const struct cpumask *mask);
        void (*send_call_func_single_ipi)(int cpu);
};

/* Globals due to paravirt */
extern void set_cpu_sibling_map(int cpu);

#ifdef CONFIG_SMP
extern struct smp_ops smp_ops;

static inline void smp_send_stop(void)
{
        smp_ops.stop_other_cpus(0);
}

static inline void stop_other_cpus(void)
{
        smp_ops.stop_other_cpus(1);
}

static inline void smp_prepare_cpus(unsigned int max_cpus)
{
        smp_ops.smp_prepare_cpus(max_cpus);
}

static inline void smp_cpus_done(unsigned int max_cpus)
{
        smp_ops.smp_cpus_done(max_cpus);
}

static inline int __cpu_disable(void)
{
        return smp_ops.cpu_disable();
}

static inline void __cpu_die(unsigned int cpu)
{
        if (smp_ops.cpu_die)
                smp_ops.cpu_die(cpu);
}

static inline void __noreturn play_dead(void)
{
        smp_ops.play_dead();
        BUG();
}

static inline void arch_smp_send_reschedule(int cpu)
{
        smp_ops.smp_send_reschedule(cpu);
}

static inline void arch_send_call_function_single_ipi(int cpu)
{
        smp_ops.send_call_func_single_ipi(cpu);
}

static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
        smp_ops.send_call_func_ipi(mask);
}

void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
int native_cpu_disable(void);
void __noreturn hlt_play_dead(void);
void native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);

void smp_kick_mwait_play_dead(void);

void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);

void smp_store_cpu_info(int id);

asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
__visible void smp_call_function_interrupt(struct pt_regs *regs);
__visible void smp_call_function_single_interrupt(struct pt_regs *r);

#define cpu_physical_id(cpu)        per_cpu(x86_cpu_to_apicid, cpu)
#define cpu_acpi_id(cpu)        per_cpu(x86_cpu_to_acpiid, cpu)

/*
 * This function is needed by all SMP systems. It must _always_ be valid
 * from the initial startup.
 */
#define raw_smp_processor_id()  this_cpu_read(pcpu_hot.cpu_number)
#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)

#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
#else
# define safe_smp_processor_id()        smp_processor_id()
#endif

static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
        return per_cpu(cpu_llc_shared_map, cpu);
}

static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
{
        return per_cpu(cpu_l2c_shared_map, cpu);
}

#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu)     wbinvd()
static inline int wbinvd_on_all_cpus(void)
{
        wbinvd();
        return 0;
}

static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
        return (struct cpumask *)cpumask_of(0);
}
#endif /* CONFIG_SMP */

#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
#define nmi_selftest() do { } while (0)
#endif

extern unsigned int smpboot_control;
extern unsigned long apic_mmio_base;

#endif /* !__ASSEMBLY__ */

/* Control bits for startup_64 */
#define STARTUP_READ_APICID        0x80000000

/* Top 8 bits are reserved for control */
#define STARTUP_PARALLEL_MASK        0xFF000000

#endif /* _ASM_X86_SMP_H */





































   90 


















    9 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PKRU_H
#define _ASM_X86_PKRU_H

#include <asm/cpufeature.h>

#define PKRU_AD_BIT 0x1u
#define PKRU_WD_BIT 0x2u
#define PKRU_BITS_PER_PKEY 2

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
extern u32 init_pkru_value;
#define pkru_get_init_value()        READ_ONCE(init_pkru_value)
#else
#define init_pkru_value        0
#define pkru_get_init_value()        0
#endif

static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
{
        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
        return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
}

static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
{
        int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
        /*
         * Access-disable disables writes too so we need to check
         * both bits here.
         */
        return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
}

static inline u32 read_pkru(void)
{
        if (cpu_feature_enabled(X86_FEATURE_OSPKE))
                return rdpkru();
        return 0;
}

static inline void write_pkru(u32 pkru)
{
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return;
        /*
         * WRPKRU is relatively expensive compared to RDPKRU.
         * Avoid WRPKRU when it would not change the value.
         */
        if (pkru != rdpkru())
                wrpkru(pkru);
}

static inline void pkru_write_default(void)
{
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return;

        wrpkru(pkru_get_init_value());
}

#endif












































































































































































































































































































































  110 










    2 



    2 

   50 












































































































































































































   50 
















   50 

















    1 


    1 















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Filesystem access notification for Linux
 *
 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
 */

#ifndef __LINUX_FSNOTIFY_BACKEND_H
#define __LINUX_FSNOTIFY_BACKEND_H

#ifdef __KERNEL__

#include <linux/idr.h> /* inotify uses this */
#include <linux/fs.h> /* struct inode */
#include <linux/list.h>
#include <linux/path.h> /* struct path */
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/user_namespace.h>
#include <linux/refcount.h>
#include <linux/mempool.h>
#include <linux/sched/mm.h>

/*
 * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
 * convert between them.  dnotify only needs conversion at watch creation
 * so no perf loss there.  fanotify isn't defined yet, so it can use the
 * wholes if it needs more events.
 */
#define FS_ACCESS                0x00000001        /* File was accessed */
#define FS_MODIFY                0x00000002        /* File was modified */
#define FS_ATTRIB                0x00000004        /* Metadata changed */
#define FS_CLOSE_WRITE                0x00000008        /* Writable file was closed */
#define FS_CLOSE_NOWRITE        0x00000010        /* Unwritable file closed */
#define FS_OPEN                        0x00000020        /* File was opened */
#define FS_MOVED_FROM                0x00000040        /* File was moved from X */
#define FS_MOVED_TO                0x00000080        /* File was moved to Y */
#define FS_CREATE                0x00000100        /* Subfile was created */
#define FS_DELETE                0x00000200        /* Subfile was deleted */
#define FS_DELETE_SELF                0x00000400        /* Self was deleted */
#define FS_MOVE_SELF                0x00000800        /* Self was moved */
#define FS_OPEN_EXEC                0x00001000        /* File was opened for exec */

#define FS_UNMOUNT                0x00002000        /* inode on umount fs */
#define FS_Q_OVERFLOW                0x00004000        /* Event queued overflowed */
#define FS_ERROR                0x00008000        /* Filesystem Error (fanotify) */

/*
 * FS_IN_IGNORED overloads FS_ERROR.  It is only used internally by inotify
 * which does not support FS_ERROR.
 */
#define FS_IN_IGNORED                0x00008000        /* last inotify event here */

#define FS_OPEN_PERM                0x00010000        /* open event in an permission hook */
#define FS_ACCESS_PERM                0x00020000        /* access event in a permissions hook */
#define FS_OPEN_EXEC_PERM        0x00040000        /* open/exec event in a permission hook */

/*
 * Set on inode mark that cares about things that happen to its children.
 * Always set for dnotify and inotify.
 * Set on inode/sb/mount marks that care about parent/name info.
 */
#define FS_EVENT_ON_CHILD        0x08000000

#define FS_RENAME                0x10000000        /* File was renamed */
#define FS_DN_MULTISHOT                0x20000000        /* dnotify multishot */
#define FS_ISDIR                0x40000000        /* event occurred against dir */

#define FS_MOVE                        (FS_MOVED_FROM | FS_MOVED_TO)

/*
 * Directory entry modification events - reported only to directory
 * where entry is modified and not to a watching parent.
 * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event
 * when a directory entry inside a child subdir changes.
 */
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)

#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
                                  FS_OPEN_EXEC_PERM)

/*
 * This is a list of all events that may get sent to a parent that is watching
 * with flag FS_EVENT_ON_CHILD based on fs event on a child of that directory.
 */
#define FS_EVENTS_POSS_ON_CHILD   (ALL_FSNOTIFY_PERM_EVENTS | \
                                   FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
                                   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \
                                   FS_OPEN | FS_OPEN_EXEC)

/*
 * This is a list of all events that may get sent with the parent inode as the
 * @to_tell argument of fsnotify().
 * It may include events that can be sent to an inode/sb/mount mark, but cannot
 * be sent to a parent watching children.
 */
#define FS_EVENTS_POSS_TO_PARENT (FS_EVENTS_POSS_ON_CHILD)

/* Events that can be reported to backends */
#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
                             FS_EVENTS_POSS_ON_CHILD | \
                             FS_DELETE_SELF | FS_MOVE_SELF | \
                             FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
                             FS_ERROR)

/* Extra flags that may be reported with event or control handling of events */
#define ALL_FSNOTIFY_FLAGS  (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)

#define ALL_FSNOTIFY_BITS   (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)

struct fsnotify_group;
struct fsnotify_event;
struct fsnotify_mark;
struct fsnotify_event_private_data;
struct fsnotify_fname;
struct fsnotify_iter_info;

struct mem_cgroup;

/*
 * Each group much define these ops.  The fsnotify infrastructure will call
 * these operations for each relevant group.
 *
 * handle_event - main call for a group to handle an fs event
 * @group:        group to notify
 * @mask:        event type and flags
 * @data:        object that event happened on
 * @data_type:        type of object for fanotify_data_XXX() accessors
 * @dir:        optional directory associated with event -
 *                if @file_name is not NULL, this is the directory that
 *                @file_name is relative to
 * @file_name:        optional file name associated with event
 * @cookie:        inotify rename cookie
 * @iter_info:        array of marks from this group that are interested in the event
 *
 * handle_inode_event - simple variant of handle_event() for groups that only
 *                have inode marks and don't have ignore mask
 * @mark:        mark to notify
 * @mask:        event type and flags
 * @inode:        inode that event happened on
 * @dir:        optional directory associated with event -
 *                if @file_name is not NULL, this is the directory that
 *                @file_name is relative to.
 *                Either @inode or @dir must be non-NULL.
 * @file_name:        optional file name associated with event
 * @cookie:        inotify rename cookie
 *
 * free_group_priv - called when a group refcnt hits 0 to clean up the private union
 * freeing_mark - called when a mark is being destroyed for some reason.  The group
 *                MUST be holding a reference on each mark and that reference must be
 *                dropped in this function.  inotify uses this function to send
 *                userspace messages that marks have been removed.
 */
struct fsnotify_ops {
        int (*handle_event)(struct fsnotify_group *group, u32 mask,
                            const void *data, int data_type, struct inode *dir,
                            const struct qstr *file_name, u32 cookie,
                            struct fsnotify_iter_info *iter_info);
        int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask,
                            struct inode *inode, struct inode *dir,
                            const struct qstr *file_name, u32 cookie);
        void (*free_group_priv)(struct fsnotify_group *group);
        void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
        void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event);
        /* called on final put+free to free memory */
        void (*free_mark)(struct fsnotify_mark *mark);
};

/*
 * all of the information about the original object we want to now send to
 * a group.  If you want to carry more info from the accessing task to the
 * listener this structure is where you need to be adding fields.
 */
struct fsnotify_event {
        struct list_head list;
};

/*
 * A group is a "thing" that wants to receive notification about filesystem
 * events.  The mask holds the subset of event types this group cares about.
 * refcnt on a group is up to the implementor and at any moment if it goes 0
 * everything will be cleaned up.
 */
struct fsnotify_group {
        const struct fsnotify_ops *ops;        /* how this group handles things */

        /*
         * How the refcnt is used is up to each group.  When the refcnt hits 0
         * fsnotify will clean up all of the resources associated with this group.
         * As an example, the dnotify group will always have a refcnt=1 and that
         * will never change.  Inotify, on the other hand, has a group per
         * inotify_init() and the refcnt will hit 0 only when that fd has been
         * closed.
         */
        refcount_t refcnt;                /* things with interest in this group */

        /* needed to send notification to userspace */
        spinlock_t notification_lock;                /* protect the notification_list */
        struct list_head notification_list;        /* list of event_holder this group needs to send to userspace */
        wait_queue_head_t notification_waitq;        /* read() on the notification file blocks on this waitq */
        unsigned int q_len;                        /* events on the queue */
        unsigned int max_events;                /* maximum events allowed on the list */
        /*
         * Valid fsnotify group priorities.  Events are send in order from highest
         * priority to lowest priority.  We default to the lowest priority.
         */
        #define FS_PRIO_0        0 /* normal notifiers, no permissions */
        #define FS_PRIO_1        1 /* fanotify content based access control */
        #define FS_PRIO_2        2 /* fanotify pre-content access */
        unsigned int priority;
        bool shutdown;                /* group is being shut down, don't queue more events */

#define FSNOTIFY_GROUP_USER        0x01 /* user allocated group */
#define FSNOTIFY_GROUP_DUPS        0x02 /* allow multiple marks per object */
#define FSNOTIFY_GROUP_NOFS        0x04 /* group lock is not direct reclaim safe */
        int flags;
        unsigned int owner_flags;        /* stored flags of mark_mutex owner */

        /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
        struct mutex mark_mutex;        /* protect marks_list */
        atomic_t user_waits;                /* Number of tasks waiting for user
                                         * response */
        struct list_head marks_list;        /* all inode marks for this group */

        struct fasync_struct *fsn_fa;    /* async notification */

        struct fsnotify_event *overflow_event;        /* Event we queue when the
                                                 * notification list is too
                                                 * full */

        struct mem_cgroup *memcg;        /* memcg to charge allocations */

        /* groups can define private fields here or use the void *private */
        union {
                void *private;
#ifdef CONFIG_INOTIFY_USER
                struct inotify_group_private_data {
                        spinlock_t        idr_lock;
                        struct idr      idr;
                        struct ucounts *ucounts;
                } inotify_data;
#endif
#ifdef CONFIG_FANOTIFY
                struct fanotify_group_private_data {
                        /* Hash table of events for merge */
                        struct hlist_head *merge_hash;
                        /* allows a group to block waiting for a userspace response */
                        struct list_head access_list;
                        wait_queue_head_t access_waitq;
                        int flags;           /* flags from fanotify_init() */
                        int f_flags; /* event_f_flags from fanotify_init() */
                        struct ucounts *ucounts;
                        mempool_t error_events_pool;
                } fanotify_data;
#endif /* CONFIG_FANOTIFY */
        };
};

/*
 * These helpers are used to prevent deadlock when reclaiming inodes with
 * evictable marks of the same group that is allocating a new mark.
 */
static inline void fsnotify_group_lock(struct fsnotify_group *group)
{
        mutex_lock(&group->mark_mutex);
        if (group->flags & FSNOTIFY_GROUP_NOFS)
                group->owner_flags = memalloc_nofs_save();
}

static inline void fsnotify_group_unlock(struct fsnotify_group *group)
{
        if (group->flags & FSNOTIFY_GROUP_NOFS)
                memalloc_nofs_restore(group->owner_flags);
        mutex_unlock(&group->mark_mutex);
}

static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
{
        WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
        if (group->flags & FSNOTIFY_GROUP_NOFS)
                WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
}

/* When calling fsnotify tell it if the data is a path or inode */
enum fsnotify_data_type {
        FSNOTIFY_EVENT_NONE,
        FSNOTIFY_EVENT_PATH,
        FSNOTIFY_EVENT_INODE,
        FSNOTIFY_EVENT_DENTRY,
        FSNOTIFY_EVENT_ERROR,
};

struct fs_error_report {
        int error;
        struct inode *inode;
        struct super_block *sb;
};

static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
{
        switch (data_type) {
        case FSNOTIFY_EVENT_INODE:
                return (struct inode *)data;
        case FSNOTIFY_EVENT_DENTRY:
                return d_inode(data);
        case FSNOTIFY_EVENT_PATH:
                return d_inode(((const struct path *)data)->dentry);
        case FSNOTIFY_EVENT_ERROR:
                return ((struct fs_error_report *)data)->inode;
        default:
                return NULL;
        }
}

static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type)
{
        switch (data_type) {
        case FSNOTIFY_EVENT_DENTRY:
                /* Non const is needed for dget() */
                return (struct dentry *)data;
        case FSNOTIFY_EVENT_PATH:
                return ((const struct path *)data)->dentry;
        default:
                return NULL;
        }
}

static inline const struct path *fsnotify_data_path(const void *data,
                                                    int data_type)
{
        switch (data_type) {
        case FSNOTIFY_EVENT_PATH:
                return data;
        default:
                return NULL;
        }
}

static inline struct super_block *fsnotify_data_sb(const void *data,
                                                   int data_type)
{
        switch (data_type) {
        case FSNOTIFY_EVENT_INODE:
                return ((struct inode *)data)->i_sb;
        case FSNOTIFY_EVENT_DENTRY:
                return ((struct dentry *)data)->d_sb;
        case FSNOTIFY_EVENT_PATH:
                return ((const struct path *)data)->dentry->d_sb;
        case FSNOTIFY_EVENT_ERROR:
                return ((struct fs_error_report *) data)->sb;
        default:
                return NULL;
        }
}

static inline struct fs_error_report *fsnotify_data_error_report(
                                                        const void *data,
                                                        int data_type)
{
        switch (data_type) {
        case FSNOTIFY_EVENT_ERROR:
                return (struct fs_error_report *) data;
        default:
                return NULL;
        }
}

/*
 * Index to merged marks iterator array that correlates to a type of watch.
 * The type of watched object can be deduced from the iterator type, but not
 * the other way around, because an event can match different watched objects
 * of the same object type.
 * For example, both parent and child are watching an object of type inode.
 */
enum fsnotify_iter_type {
        FSNOTIFY_ITER_TYPE_INODE,
        FSNOTIFY_ITER_TYPE_VFSMOUNT,
        FSNOTIFY_ITER_TYPE_SB,
        FSNOTIFY_ITER_TYPE_PARENT,
        FSNOTIFY_ITER_TYPE_INODE2,
        FSNOTIFY_ITER_TYPE_COUNT
};

/* The type of object that a mark is attached to */
enum fsnotify_obj_type {
        FSNOTIFY_OBJ_TYPE_ANY = -1,
        FSNOTIFY_OBJ_TYPE_INODE,
        FSNOTIFY_OBJ_TYPE_VFSMOUNT,
        FSNOTIFY_OBJ_TYPE_SB,
        FSNOTIFY_OBJ_TYPE_COUNT,
        FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
};

static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
{
        return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT);
}

struct fsnotify_iter_info {
        struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
        struct fsnotify_group *current_group;
        unsigned int report_mask;
        int srcu_idx;
};

static inline bool fsnotify_iter_should_report_type(
                struct fsnotify_iter_info *iter_info, int iter_type)
{
        return (iter_info->report_mask & (1U << iter_type));
}

static inline void fsnotify_iter_set_report_type(
                struct fsnotify_iter_info *iter_info, int iter_type)
{
        iter_info->report_mask |= (1U << iter_type);
}

static inline struct fsnotify_mark *fsnotify_iter_mark(
                struct fsnotify_iter_info *iter_info, int iter_type)
{
        if (fsnotify_iter_should_report_type(iter_info, iter_type))
                return iter_info->marks[iter_type];
        return NULL;
}

static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type,
                                     struct fsnotify_mark **markp)
{
        while (type < FSNOTIFY_ITER_TYPE_COUNT) {
                *markp = fsnotify_iter_mark(iter, type);
                if (*markp)
                        break;
                type++;
        }
        return type;
}

#define FSNOTIFY_ITER_FUNCS(name, NAME) \
static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
                struct fsnotify_iter_info *iter_info) \
{ \
        return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \
}

FSNOTIFY_ITER_FUNCS(inode, INODE)
FSNOTIFY_ITER_FUNCS(parent, PARENT)
FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
FSNOTIFY_ITER_FUNCS(sb, SB)

#define fsnotify_foreach_iter_type(type) \
        for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
#define fsnotify_foreach_iter_mark_type(iter, mark, type) \
        for (type = 0; \
             type = fsnotify_iter_step(iter, type, &mark), \
             type < FSNOTIFY_ITER_TYPE_COUNT; \
             type++)

/*
 * fsnotify_connp_t is what we embed in objects which connector can be attached
 * to. fsnotify_connp_t * is how we refer from connector back to object.
 */
struct fsnotify_mark_connector;
typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;

/*
 * Inode/vfsmount/sb point to this structure which tracks all marks attached to
 * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this
 * structure. We destroy this structure when there are no more marks attached
 * to it. The structure is protected by fsnotify_mark_srcu.
 */
struct fsnotify_mark_connector {
        spinlock_t lock;
        unsigned short type;        /* Type of object [lock] */
#define FSNOTIFY_CONN_FLAG_HAS_IREF        0x02
        unsigned short flags;        /* flags [lock] */
        union {
                /* Object pointer [lock] */
                fsnotify_connp_t *obj;
                /* Used listing heads to free after srcu period expires */
                struct fsnotify_mark_connector *destroy_next;
        };
        struct hlist_head list;
};

/*
 * A mark is simply an object attached to an in core inode which allows an
 * fsnotify listener to indicate they are either no longer interested in events
 * of a type matching mask or only interested in those events.
 *
 * These are flushed when an inode is evicted from core and may be flushed
 * when the inode is modified (as seen by fsnotify_access).  Some fsnotify
 * users (such as dnotify) will flush these when the open fd is closed and not
 * at inode eviction or modification.
 *
 * Text in brackets is showing the lock(s) protecting modifications of a
 * particular entry. obj_lock means either inode->i_lock or
 * mnt->mnt_root->d_lock depending on the mark type.
 */
struct fsnotify_mark {
        /* Mask this mark is for [mark->lock, group->mark_mutex] */
        __u32 mask;
        /* We hold one for presence in g_list. Also one ref for each 'thing'
         * in kernel that found and may be using this mark. */
        refcount_t refcnt;
        /* Group this mark is for. Set on mark creation, stable until last ref
         * is dropped */
        struct fsnotify_group *group;
        /* List of marks by group->marks_list. Also reused for queueing
         * mark into destroy_list when it's waiting for the end of SRCU period
         * before it can be freed. [group->mark_mutex] */
        struct list_head g_list;
        /* Protects inode / mnt pointers, flags, masks */
        spinlock_t lock;
        /* List of marks for inode / vfsmount [connector->lock, mark ref] */
        struct hlist_node obj_list;
        /* Head of list of marks for an object [mark ref] */
        struct fsnotify_mark_connector *connector;
        /* Events types and flags to ignore [mark->lock, group->mark_mutex] */
        __u32 ignore_mask;
        /* General fsnotify mark flags */
#define FSNOTIFY_MARK_FLAG_ALIVE                0x0001
#define FSNOTIFY_MARK_FLAG_ATTACHED                0x0002
        /* inotify mark flags */
#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK                0x0010
#define FSNOTIFY_MARK_FLAG_IN_ONESHOT                0x0020
        /* fanotify mark flags */
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY        0x0100
#define FSNOTIFY_MARK_FLAG_NO_IREF                0x0200
#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS        0x0400
#define FSNOTIFY_MARK_FLAG_HAS_FSID                0x0800
#define FSNOTIFY_MARK_FLAG_WEAK_FSID                0x1000
        unsigned int flags;                /* flags [mark->lock] */
};

#ifdef CONFIG_FSNOTIFY

/* called from the vfs helpers */

/* main fsnotify call to send events */
extern int fsnotify(__u32 mask, const void *data, int data_type,
                    struct inode *dir, const struct qstr *name,
                    struct inode *inode, u32 cookie);
extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
                           int data_type);
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);

static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
{
        /* FS_EVENT_ON_CHILD is set on marks that want parent/name info */
        if (!(mask & FS_EVENT_ON_CHILD))
                return 0;
        /*
         * This object might be watched by a mark that cares about parent/name
         * info, does it care about the specific set of events that can be
         * reported with parent/name info?
         */
        return mask & FS_EVENTS_POSS_TO_PARENT;
}

static inline int fsnotify_inode_watches_children(struct inode *inode)
{
        /* FS_EVENT_ON_CHILD is set if the inode may care */
        if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
                return 0;
        /* this inode might care about child events, does it care about the
         * specific set of events that can happen on a child? */
        return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
}

/*
 * Update the dentry with a flag indicating the interest of its parent to receive
 * filesystem events when those events happens to this dentry->d_inode.
 */
static inline void fsnotify_update_flags(struct dentry *dentry)
{
        assert_spin_locked(&dentry->d_lock);

        /*
         * Serialisation of setting PARENT_WATCHED on the dentries is provided
         * by d_lock. If inotify_inode_watched changes after we have taken
         * d_lock, the following __fsnotify_update_child_dentry_flags call will
         * find our entry, so it will spin until we complete here, and update
         * us with the new state.
         */
        if (fsnotify_inode_watches_children(dentry->d_parent->d_inode))
                dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
        else
                dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
}

/* called from fsnotify listeners, such as fanotify or dnotify */

/* create a new group */
extern struct fsnotify_group *fsnotify_alloc_group(
                                const struct fsnotify_ops *ops,
                                int flags);
/* get reference to a group */
extern void fsnotify_get_group(struct fsnotify_group *group);
/* drop reference on a group from fsnotify_alloc_group */
extern void fsnotify_put_group(struct fsnotify_group *group);
/* group destruction begins, stop queuing new events */
extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
/* destroy group */
extern void fsnotify_destroy_group(struct fsnotify_group *group);
/* fasync handler function */
extern int fsnotify_fasync(int fd, struct file *file, int on);
/* Free event from memory */
extern void fsnotify_destroy_event(struct fsnotify_group *group,
                                   struct fsnotify_event *event);
/* attach the event to the group notification queue */
extern int fsnotify_insert_event(struct fsnotify_group *group,
                                 struct fsnotify_event *event,
                                 int (*merge)(struct fsnotify_group *,
                                              struct fsnotify_event *),
                                 void (*insert)(struct fsnotify_group *,
                                                struct fsnotify_event *));

static inline int fsnotify_add_event(struct fsnotify_group *group,
                                     struct fsnotify_event *event,
                                     int (*merge)(struct fsnotify_group *,
                                                  struct fsnotify_event *))
{
        return fsnotify_insert_event(group, event, merge, NULL);
}

/* Queue overflow event to a notification group */
static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
{
        fsnotify_add_event(group, group->overflow_event, NULL);
}

static inline bool fsnotify_is_overflow_event(u32 mask)
{
        return mask & FS_Q_OVERFLOW;
}

static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
        assert_spin_locked(&group->notification_lock);

        return list_empty(&group->notification_list);
}

extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */
extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
/* return AND dequeue the first event on the notification queue */
extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
/* Remove event queued in the notification list */
extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
                                         struct fsnotify_event *event);

/* functions used to manipulate the marks attached to inodes */

/*
 * Canonical "ignore mask" including event flags.
 *
 * Note the subtle semantic difference from the legacy ->ignored_mask.
 * ->ignored_mask traditionally only meant which events should be ignored,
 * while ->ignore_mask also includes flags regarding the type of objects on
 * which events should be ignored.
 */
static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
{
        __u32 ignore_mask = mark->ignore_mask;

        /* The event flags in ignore mask take effect */
        if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
                return ignore_mask;

        /*
         * Legacy behavior:
         * - Always ignore events on dir
         * - Ignore events on child if parent is watching children
         */
        ignore_mask |= FS_ISDIR;
        ignore_mask &= ~FS_EVENT_ON_CHILD;
        ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;

        return ignore_mask;
}

/* Legacy ignored_mask - only event types to ignore */
static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
{
        return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
}

/*
 * Check if mask (or ignore mask) should be applied depending if victim is a
 * directory and whether it is reported to a watching parent.
 */
static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
                                            int iter_type)
{
        /* Should mask be applied to a directory? */
        if (is_dir && !(mask & FS_ISDIR))
                return false;

        /* Should mask be applied to a child? */
        if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
            !(mask & FS_EVENT_ON_CHILD))
                return false;

        return true;
}

/*
 * Effective ignore mask taking into account if event victim is a
 * directory and whether it is reported to a watching parent.
 */
static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
                                                   bool is_dir, int iter_type)
{
        __u32 ignore_mask = fsnotify_ignored_events(mark);

        if (!ignore_mask)
                return 0;

        /* For non-dir and non-child, no need to consult the event flags */
        if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
                return ignore_mask;

        ignore_mask = fsnotify_ignore_mask(mark);
        if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
                return 0;

        return ignore_mask & ALL_FSNOTIFY_EVENTS;
}

/* Get mask for calculating object interest taking ignore mask into account */
static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
{
        __u32 mask = mark->mask;

        if (!fsnotify_ignored_events(mark))
                return mask;

        /* Interest in FS_MODIFY may be needed for clearing ignore mask */
        if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
                mask |= FS_MODIFY;

        /*
         * If mark is interested in ignoring events on children, the object must
         * show interest in those events for fsnotify_parent() to notice it.
         */
        return mask | mark->ignore_mask;
}

/* Get mask of events for a list of marks */
extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn);
/* Calculate mask of events for a list of marks */
extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
extern void fsnotify_init_mark(struct fsnotify_mark *mark,
                               struct fsnotify_group *group);
/* Find mark belonging to given group in the list of marks */
extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
                                                struct fsnotify_group *group);
/* attach the mark to the object */
extern int fsnotify_add_mark(struct fsnotify_mark *mark,
                             fsnotify_connp_t *connp, unsigned int obj_type,
                             int add_flags);
extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
                                    fsnotify_connp_t *connp,
                                    unsigned int obj_type, int add_flags);

/* attach the mark to the inode */
static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                                          struct inode *inode,
                                          int add_flags)
{
        return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
                                 FSNOTIFY_OBJ_TYPE_INODE, add_flags);
}
static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
                                                 struct inode *inode,
                                                 int add_flags)
{
        return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
                                        FSNOTIFY_OBJ_TYPE_INODE, add_flags);
}

/* given a group and a mark, flag mark to be freed when all references are dropped */
extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
                                  struct fsnotify_group *group);
/* detach mark from inode / mount list, group list, drop inode reference */
extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
/* free mark */
extern void fsnotify_free_mark(struct fsnotify_mark *mark);
/* Wait until all marks queued for destruction are destroyed */
extern void fsnotify_wait_marks_destroyed(void);
/* Clear all of the marks of a group attached to a given object type */
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
                                          unsigned int obj_type);
/* run all the marks in a group, and clear all of the vfsmount marks */
static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
{
        fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT);
}
/* run all the marks in a group, and clear all of the inode marks */
static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
{
        fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE);
}
/* run all the marks in a group, and clear all of the sn marks */
static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
{
        fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB);
}
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);

static inline void fsnotify_init_event(struct fsnotify_event *event)
{
        INIT_LIST_HEAD(&event->list);
}

#else

static inline int fsnotify(__u32 mask, const void *data, int data_type,
                           struct inode *dir, const struct qstr *name,
                           struct inode *inode, u32 cookie)
{
        return 0;
}

static inline int __fsnotify_parent(struct dentry *dentry, __u32 mask,
                                  const void *data, int data_type)
{
        return 0;
}

static inline void __fsnotify_inode_delete(struct inode *inode)
{}

static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{}

static inline void fsnotify_sb_delete(struct super_block *sb)
{}

static inline void fsnotify_update_flags(struct dentry *dentry)
{}

static inline u32 fsnotify_get_cookie(void)
{
        return 0;
}

static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}

#endif        /* CONFIG_FSNOTIFY */

#endif        /* __KERNEL __ */

#endif        /* __LINUX_FSNOTIFY_BACKEND_H */





































































































































    1 






























































   64 
   64 










    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
// SPDX-License-Identifier: GPL-2.0
/*
 * Wakeup statistics in sysfs
 *
 * Copyright (c) 2019 Linux Foundation
 * Copyright (c) 2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 * Copyright (c) 2019 Google Inc.
 */

#include <linux/device.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/slab.h>
#include <linux/timekeeping.h>

#include "power.h"

static struct class *wakeup_class;

#define wakeup_attr(_name)                                                \
static ssize_t _name##_show(struct device *dev,                                \
                            struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct wakeup_source *ws = dev_get_drvdata(dev);                \
                                                                        \
        return sysfs_emit(buf, "%lu\n", ws->_name);                        \
}                                                                        \
static DEVICE_ATTR_RO(_name)

wakeup_attr(active_count);
wakeup_attr(event_count);
wakeup_attr(wakeup_count);
wakeup_attr(expire_count);

static ssize_t active_time_ms_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);
        ktime_t active_time =
                ws->active ? ktime_sub(ktime_get(), ws->last_time) : 0;

        return sysfs_emit(buf, "%lld\n", ktime_to_ms(active_time));
}
static DEVICE_ATTR_RO(active_time_ms);

static ssize_t total_time_ms_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);
        ktime_t active_time;
        ktime_t total_time = ws->total_time;

        if (ws->active) {
                active_time = ktime_sub(ktime_get(), ws->last_time);
                total_time = ktime_add(total_time, active_time);
        }

        return sysfs_emit(buf, "%lld\n", ktime_to_ms(total_time));
}
static DEVICE_ATTR_RO(total_time_ms);

static ssize_t max_time_ms_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);
        ktime_t active_time;
        ktime_t max_time = ws->max_time;

        if (ws->active) {
                active_time = ktime_sub(ktime_get(), ws->last_time);
                if (active_time > max_time)
                        max_time = active_time;
        }

        return sysfs_emit(buf, "%lld\n", ktime_to_ms(max_time));
}
static DEVICE_ATTR_RO(max_time_ms);

static ssize_t last_change_ms_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%lld\n", ktime_to_ms(ws->last_time));
}
static DEVICE_ATTR_RO(last_change_ms);

static ssize_t name_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%s\n", ws->name);
}
static DEVICE_ATTR_RO(name);

static ssize_t prevent_suspend_time_ms_show(struct device *dev,
                                            struct device_attribute *attr,
                                            char *buf)
{
        struct wakeup_source *ws = dev_get_drvdata(dev);
        ktime_t prevent_sleep_time = ws->prevent_sleep_time;

        if (ws->active && ws->autosleep_enabled) {
                prevent_sleep_time = ktime_add(prevent_sleep_time,
                        ktime_sub(ktime_get(), ws->start_prevent_time));
        }

        return sysfs_emit(buf, "%lld\n", ktime_to_ms(prevent_sleep_time));
}
static DEVICE_ATTR_RO(prevent_suspend_time_ms);

static struct attribute *wakeup_source_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_active_count.attr,
        &dev_attr_event_count.attr,
        &dev_attr_wakeup_count.attr,
        &dev_attr_expire_count.attr,
        &dev_attr_active_time_ms.attr,
        &dev_attr_total_time_ms.attr,
        &dev_attr_max_time_ms.attr,
        &dev_attr_last_change_ms.attr,
        &dev_attr_prevent_suspend_time_ms.attr,
        NULL,
};
ATTRIBUTE_GROUPS(wakeup_source);

static void device_create_release(struct device *dev)
{
        kfree(dev);
}

static struct device *wakeup_source_device_create(struct device *parent,
                                                  struct wakeup_source *ws)
{
        struct device *dev = NULL;
        int retval;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev) {
                retval = -ENOMEM;
                goto error;
        }

        device_initialize(dev);
        dev->devt = MKDEV(0, 0);
        dev->class = wakeup_class;
        dev->parent = parent;
        dev->groups = wakeup_source_groups;
        dev->release = device_create_release;
        dev_set_drvdata(dev, ws);
        device_set_pm_not_required(dev);

        retval = dev_set_name(dev, "wakeup%d", ws->id);
        if (retval)
                goto error;

        retval = device_add(dev);
        if (retval)
                goto error;

        return dev;

error:
        put_device(dev);
        return ERR_PTR(retval);
}

/**
 * wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs.
 * @parent: Device given wakeup source is associated with (or NULL if virtual).
 * @ws: Wakeup source to be added in sysfs.
 */
int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws)
{
        struct device *dev;

        dev = wakeup_source_device_create(parent, ws);
        if (IS_ERR(dev))
                return PTR_ERR(dev);
        ws->dev = dev;

        return 0;
}

/**
 * pm_wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs
 * for a device if they're missing.
 * @parent: Device given wakeup source is associated with
 */
int pm_wakeup_source_sysfs_add(struct device *parent)
{
        if (!parent->power.wakeup || parent->power.wakeup->dev)
                return 0;

        return wakeup_source_sysfs_add(parent, parent->power.wakeup);
}

/**
 * wakeup_source_sysfs_remove - Remove wakeup_source attributes from sysfs.
 * @ws: Wakeup source to be removed from sysfs.
 */
void wakeup_source_sysfs_remove(struct wakeup_source *ws)
{
        device_unregister(ws->dev);
}

static int __init wakeup_sources_sysfs_init(void)
{
        wakeup_class = class_create("wakeup");

        return PTR_ERR_OR_ZERO(wakeup_class);
}
postcore_initcall(wakeup_sources_sysfs_init);











   90 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_HUGETLB_INLINE_H
#define _LINUX_HUGETLB_INLINE_H

#ifdef CONFIG_HUGETLB_PAGE

#include <linux/mm.h>

static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
        return !!(vma->vm_flags & VM_HUGETLB);
}

#else

static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
{
        return false;
}

#endif

#endif

































































































































































































































































































































































































   13 

   14 



   14 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
// SPDX-License-Identifier: GPL-2.0

#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <net/net_debug.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/sock.h>

#include "page_pool_priv.h"
#include "netdev-genl-gen.h"

static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
 * Ordering: inside rtnl_lock
 */
static DEFINE_MUTEX(page_pools_lock);

/* Page pools are only reachable from user space (via netlink) if they are
 * linked to a netdev at creation time. Following page pool "visibility"
 * states are possible:
 *  - normal
 *    - user.list: linked to real netdev, netdev: real netdev
 *  - orphaned - real netdev has disappeared
 *    - user.list: linked to lo, netdev: lo
 *  - invisible - either (a) created without netdev linking, (b) unlisted due
 *      to error, or (c) the entire namespace which owned this pool disappeared
 *    - user.list: unhashed, netdev: unknown
 */

typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
                             const struct genl_info *info);

static int
netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
{
        struct page_pool *pool;
        struct sk_buff *rsp;
        int err;

        mutex_lock(&page_pools_lock);
        pool = xa_load(&page_pools, id);
        if (!pool || hlist_unhashed(&pool->user.list) ||
            !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
                err = -ENOENT;
                goto err_unlock;
        }

        rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!rsp) {
                err = -ENOMEM;
                goto err_unlock;
        }

        err = fill(rsp, pool, info);
        if (err)
                goto err_free_msg;

        mutex_unlock(&page_pools_lock);

        return genlmsg_reply(rsp, info);

err_free_msg:
        nlmsg_free(rsp);
err_unlock:
        mutex_unlock(&page_pools_lock);
        return err;
}

struct page_pool_dump_cb {
        unsigned long ifindex;
        u32 pp_id;
};

static int
netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
                             pp_nl_fill_cb fill)
{
        struct page_pool_dump_cb *state = (void *)cb->ctx;
        const struct genl_info *info = genl_info_dump(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        struct page_pool *pool;
        int err = 0;

        rtnl_lock();
        mutex_lock(&page_pools_lock);
        for_each_netdev_dump(net, netdev, state->ifindex) {
                hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
                        if (state->pp_id && state->pp_id < pool->user.id)
                                continue;

                        state->pp_id = pool->user.id;
                        err = fill(skb, pool, info);
                        if (err)
                                goto out;
                }

                state->pp_id = 0;
        }
out:
        mutex_unlock(&page_pools_lock);
        rtnl_unlock();

        return err;
}

static int
page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
                        const struct genl_info *info)
{
#ifdef CONFIG_PAGE_POOL_STATS
        struct page_pool_stats stats = {};
        struct nlattr *nest;
        void *hdr;

        if (!page_pool_get_stats(pool, &stats))
                return 0;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
            (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
             nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
                         pool->slow.netdev->ifindex)))
                goto err_cancel_nest;

        nla_nest_end(rsp, nest);

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
                         stats.alloc_stats.fast) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
                         stats.alloc_stats.slow) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
                         stats.alloc_stats.slow_high_order) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
                         stats.alloc_stats.empty) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
                         stats.alloc_stats.refill) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
                         stats.alloc_stats.waive) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
                         stats.recycle_stats.cached) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
                         stats.recycle_stats.cache_full) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
                         stats.recycle_stats.ring) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
                         stats.recycle_stats.ring_full) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
                         stats.recycle_stats.released_refcnt))
                goto err_cancel_msg;

        genlmsg_end(rsp, hdr);

        return 0;
err_cancel_nest:
        nla_nest_cancel(rsp, nest);
err_cancel_msg:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
#else
        GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
        return -EOPNOTSUPP;
#endif
}

int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
                                       struct genl_info *info)
{
        struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
        struct nlattr *nest;
        int err;
        u32 id;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
                return -EINVAL;

        nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
        err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
                               netdev_page_pool_info_nl_policy,
                               info->extack);
        if (err)
                return err;

        if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
                return -EINVAL;
        if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
                NL_SET_ERR_MSG_ATTR(info->extack,
                                    tb[NETDEV_A_PAGE_POOL_IFINDEX],
                                    "selecting by ifindex not supported");
                return -EINVAL;
        }

        id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);

        return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
}

int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
                                         struct netlink_callback *cb)
{
        return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
}

static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
                  const struct genl_info *info)
{
        size_t inflight, refsz;
        void *hdr;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
                goto err_cancel;

        if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
            nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
                        pool->slow.netdev->ifindex))
                goto err_cancel;
        if (pool->user.napi_id &&
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
                goto err_cancel;

        inflight = page_pool_inflight(pool, false);
        refsz =        PAGE_SIZE << pool->p.order;
        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
                         inflight * refsz))
                goto err_cancel;
        if (pool->user.detach_time &&
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
                         pool->user.detach_time))
                goto err_cancel;

        genlmsg_end(rsp, hdr);

        return 0;
err_cancel:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
{
        struct genl_info info;
        struct sk_buff *ntf;
        struct net *net;

        lockdep_assert_held(&page_pools_lock);

        /* 'invisible' page pools don't matter */
        if (hlist_unhashed(&pool->user.list))
                return;
        net = dev_net(pool->slow.netdev);

        if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
                return;

        genl_info_init_ntf(&info, &netdev_nl_family, cmd);

        ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!ntf)
                return;

        if (page_pool_nl_fill(ntf, pool, &info)) {
                nlmsg_free(ntf);
                return;
        }

        genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
                                0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
}

int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
{
        u32 id;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
                return -EINVAL;

        id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);

        return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
}

int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
                                   struct netlink_callback *cb)
{
        return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
}

int page_pool_list(struct page_pool *pool)
{
        static u32 id_alloc_next;
        int err;

        mutex_lock(&page_pools_lock);
        err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
                              &id_alloc_next, GFP_KERNEL);
        if (err < 0)
                goto err_unlock;

        INIT_HLIST_NODE(&pool->user.list);
        if (pool->slow.netdev) {
                hlist_add_head(&pool->user.list,
                               &pool->slow.netdev->page_pools);
                pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;

                netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
        }

        mutex_unlock(&page_pools_lock);
        return 0;

err_unlock:
        mutex_unlock(&page_pools_lock);
        return err;
}

void page_pool_detached(struct page_pool *pool)
{
        mutex_lock(&page_pools_lock);
        pool->user.detach_time = ktime_get_boottime_seconds();
        netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
        mutex_unlock(&page_pools_lock);
}

void page_pool_unlist(struct page_pool *pool)
{
        mutex_lock(&page_pools_lock);
        netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
        xa_erase(&page_pools, pool->user.id);
        if (!hlist_unhashed(&pool->user.list))
                hlist_del(&pool->user.list);
        mutex_unlock(&page_pools_lock);
}

static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
{
        struct page_pool *pool;
        struct hlist_node *n;

        mutex_lock(&page_pools_lock);
        hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
                hlist_del_init(&pool->user.list);
                pool->slow.netdev = NET_PTR_POISON;
        }
        mutex_unlock(&page_pools_lock);
}

static void page_pool_unreg_netdev(struct net_device *netdev)
{
        struct page_pool *pool, *last;
        struct net_device *lo;

        lo = dev_net(netdev)->loopback_dev;

        mutex_lock(&page_pools_lock);
        last = NULL;
        hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
                pool->slow.netdev = lo;
                netdev_nl_page_pool_event(pool,
                                          NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
                last = pool;
        }
        if (last)
                hlist_splice_init(&netdev->page_pools, &last->user.list,
                                  &lo->page_pools);
        mutex_unlock(&page_pools_lock);
}

static int
page_pool_netdevice_event(struct notifier_block *nb,
                          unsigned long event, void *ptr)
{
        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);

        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;

        if (hlist_empty(&netdev->page_pools))
                return NOTIFY_OK;

        if (netdev->ifindex != LOOPBACK_IFINDEX)
                page_pool_unreg_netdev(netdev);
        else
                page_pool_unreg_netdev_wipe(netdev);
        return NOTIFY_OK;
}

static struct notifier_block page_pool_netdevice_nb = {
        .notifier_call = page_pool_netdevice_event,
};

static int __init page_pool_user_init(void)
{
        return register_netdevice_notifier(&page_pool_netdevice_nb);
}

subsys_initcall(page_pool_user_init);






































































































































































































































































































    5 



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/power/common.c - Common device power management code.
 *
 * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
 */
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pm_clock.h>
#include <linux/acpi.h>
#include <linux/pm_domain.h>

#include "power.h"

/**
 * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device.
 * @dev: Device to handle.
 *
 * If power.subsys_data is NULL, point it to a new object, otherwise increment
 * its reference counter.  Return 0 if new object has been created or refcount
 * increased, otherwise negative error code.
 */
int dev_pm_get_subsys_data(struct device *dev)
{
        struct pm_subsys_data *psd;

        psd = kzalloc(sizeof(*psd), GFP_KERNEL);
        if (!psd)
                return -ENOMEM;

        spin_lock_irq(&dev->power.lock);

        if (dev->power.subsys_data) {
                dev->power.subsys_data->refcount++;
        } else {
                spin_lock_init(&psd->lock);
                psd->refcount = 1;
                dev->power.subsys_data = psd;
                pm_clk_init(dev);
                psd = NULL;
        }

        spin_unlock_irq(&dev->power.lock);

        /* kfree() verifies that its argument is nonzero. */
        kfree(psd);

        return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data);

/**
 * dev_pm_put_subsys_data - Drop reference to power.subsys_data.
 * @dev: Device to handle.
 *
 * If the reference counter of power.subsys_data is zero after dropping the
 * reference, power.subsys_data is removed.
 */
void dev_pm_put_subsys_data(struct device *dev)
{
        struct pm_subsys_data *psd;

        spin_lock_irq(&dev->power.lock);

        psd = dev_to_psd(dev);
        if (!psd)
                goto out;

        if (--psd->refcount == 0)
                dev->power.subsys_data = NULL;
        else
                psd = NULL;

 out:
        spin_unlock_irq(&dev->power.lock);
        kfree(psd);
}
EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);

/**
 * dev_pm_domain_attach - Attach a device to its PM domain.
 * @dev: Device to attach.
 * @power_on: Used to indicate whether we should power on the device.
 *
 * The @dev may only be attached to a single PM domain. By iterating through
 * the available alternatives we try to find a valid PM domain for the device.
 * As attachment succeeds, the ->detach() callback in the struct dev_pm_domain
 * should be assigned by the corresponding attach function.
 *
 * This function should typically be invoked from subsystem level code during
 * the probe phase. Especially for those that holds devices which requires
 * power management through PM domains.
 *
 * Callers must ensure proper synchronization of this function with power
 * management callbacks.
 *
 * Returns 0 on successfully attached PM domain, or when it is found that the
 * device doesn't need a PM domain, else a negative error code.
 */
int dev_pm_domain_attach(struct device *dev, bool power_on)
{
        int ret;

        if (dev->pm_domain)
                return 0;

        ret = acpi_dev_pm_attach(dev, power_on);
        if (!ret)
                ret = genpd_dev_pm_attach(dev);

        return ret < 0 ? ret : 0;
}
EXPORT_SYMBOL_GPL(dev_pm_domain_attach);

/**
 * dev_pm_domain_attach_by_id - Associate a device with one of its PM domains.
 * @dev: The device used to lookup the PM domain.
 * @index: The index of the PM domain.
 *
 * As @dev may only be attached to a single PM domain, the backend PM domain
 * provider creates a virtual device to attach instead. If attachment succeeds,
 * the ->detach() callback in the struct dev_pm_domain are assigned by the
 * corresponding backend attach function, as to deal with detaching of the
 * created virtual device.
 *
 * This function should typically be invoked by a driver during the probe phase,
 * in case its device requires power management through multiple PM domains. The
 * driver may benefit from using the received device, to configure device-links
 * towards its original device. Depending on the use-case and if needed, the
 * links may be dynamically changed by the driver, which allows it to control
 * the power to the PM domains independently from each other.
 *
 * Callers must ensure proper synchronization of this function with power
 * management callbacks.
 *
 * Returns the virtual created device when successfully attached to its PM
 * domain, NULL in case @dev don't need a PM domain, else an ERR_PTR().
 * Note that, to detach the returned virtual device, the driver shall call
 * dev_pm_domain_detach() on it, typically during the remove phase.
 */
struct device *dev_pm_domain_attach_by_id(struct device *dev,
                                          unsigned int index)
{
        if (dev->pm_domain)
                return ERR_PTR(-EEXIST);

        return genpd_dev_pm_attach_by_id(dev, index);
}
EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);

/**
 * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains.
 * @dev: The device used to lookup the PM domain.
 * @name: The name of the PM domain.
 *
 * For a detailed function description, see dev_pm_domain_attach_by_id().
 */
struct device *dev_pm_domain_attach_by_name(struct device *dev,
                                            const char *name)
{
        if (dev->pm_domain)
                return ERR_PTR(-EEXIST);

        return genpd_dev_pm_attach_by_name(dev, name);
}
EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);

/**
 * dev_pm_domain_attach_list - Associate a device with its PM domains.
 * @dev: The device used to lookup the PM domains for.
 * @data: The data used for attaching to the PM domains.
 * @list: An out-parameter with an allocated list of attached PM domains.
 *
 * This function helps to attach a device to its multiple PM domains. The
 * caller, which is typically a driver's probe function, may provide a list of
 * names for the PM domains that we should try to attach the device to, but it
 * may also provide an empty list, in case the attach should be done for all of
 * the available PM domains.
 *
 * Callers must ensure proper synchronization of this function with power
 * management callbacks.
 *
 * Returns the number of attached PM domains or a negative error code in case of
 * a failure. Note that, to detach the list of PM domains, the driver shall call
 * dev_pm_domain_detach_list(), typically during the remove phase.
 */
int dev_pm_domain_attach_list(struct device *dev,
                              const struct dev_pm_domain_attach_data *data,
                              struct dev_pm_domain_list **list)
{
        struct device_node *np = dev->of_node;
        struct dev_pm_domain_list *pds;
        struct device *pd_dev = NULL;
        int ret, i, num_pds = 0;
        bool by_id = true;
        u32 pd_flags = data ? data->pd_flags : 0;
        u32 link_flags = pd_flags & PD_FLAG_NO_DEV_LINK ? 0 :
                        DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME;

        if (dev->pm_domain)
                return -EEXIST;

        /* For now this is limited to OF based platforms. */
        if (!np)
                return 0;

        if (data && data->pd_names) {
                num_pds = data->num_pd_names;
                by_id = false;
        } else {
                num_pds = of_count_phandle_with_args(np, "power-domains",
                                                     "#power-domain-cells");
        }

        if (num_pds <= 0)
                return 0;

        pds = devm_kzalloc(dev, sizeof(*pds), GFP_KERNEL);
        if (!pds)
                return -ENOMEM;

        pds->pd_devs = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_devs),
                                    GFP_KERNEL);
        if (!pds->pd_devs)
                return -ENOMEM;

        pds->pd_links = devm_kcalloc(dev, num_pds, sizeof(*pds->pd_links),
                                     GFP_KERNEL);
        if (!pds->pd_links)
                return -ENOMEM;

        if (link_flags && pd_flags & PD_FLAG_DEV_LINK_ON)
                link_flags |= DL_FLAG_RPM_ACTIVE;

        for (i = 0; i < num_pds; i++) {
                if (by_id)
                        pd_dev = dev_pm_domain_attach_by_id(dev, i);
                else
                        pd_dev = dev_pm_domain_attach_by_name(dev,
                                                        data->pd_names[i]);
                if (IS_ERR_OR_NULL(pd_dev)) {
                        ret = pd_dev ? PTR_ERR(pd_dev) : -ENODEV;
                        goto err_attach;
                }

                if (link_flags) {
                        struct device_link *link;

                        link = device_link_add(dev, pd_dev, link_flags);
                        if (!link) {
                                ret = -ENODEV;
                                goto err_link;
                        }

                        pds->pd_links[i] = link;
                }

                pds->pd_devs[i] = pd_dev;
        }

        pds->num_pds = num_pds;
        *list = pds;
        return num_pds;

err_link:
        dev_pm_domain_detach(pd_dev, true);
err_attach:
        while (--i >= 0) {
                if (pds->pd_links[i])
                        device_link_del(pds->pd_links[i]);
                dev_pm_domain_detach(pds->pd_devs[i], true);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(dev_pm_domain_attach_list);

/**
 * dev_pm_domain_detach - Detach a device from its PM domain.
 * @dev: Device to detach.
 * @power_off: Used to indicate whether we should power off the device.
 *
 * This functions will reverse the actions from dev_pm_domain_attach(),
 * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it
 * detaches @dev from its PM domain.  Typically it should be invoked during the
 * remove phase, either from subsystem level code or from drivers.
 *
 * Callers must ensure proper synchronization of this function with power
 * management callbacks.
 */
void dev_pm_domain_detach(struct device *dev, bool power_off)
{
        if (dev->pm_domain && dev->pm_domain->detach)
                dev->pm_domain->detach(dev, power_off);
}
EXPORT_SYMBOL_GPL(dev_pm_domain_detach);

/**
 * dev_pm_domain_detach_list - Detach a list of PM domains.
 * @list: The list of PM domains to detach.
 *
 * This function reverse the actions from dev_pm_domain_attach_list().
 * Typically it should be invoked during the remove phase from drivers.
 *
 * Callers must ensure proper synchronization of this function with power
 * management callbacks.
 */
void dev_pm_domain_detach_list(struct dev_pm_domain_list *list)
{
        int i;

        if (!list)
                return;

        for (i = 0; i < list->num_pds; i++) {
                if (list->pd_links[i])
                        device_link_del(list->pd_links[i]);
                dev_pm_domain_detach(list->pd_devs[i], true);
        }
}
EXPORT_SYMBOL_GPL(dev_pm_domain_detach_list);

/**
 * dev_pm_domain_start - Start the device through its PM domain.
 * @dev: Device to start.
 *
 * This function should typically be called during probe by a subsystem/driver,
 * when it needs to start its device from the PM domain's perspective. Note
 * that, it's assumed that the PM domain is already powered on when this
 * function is called.
 *
 * Returns 0 on success and negative error values on failures.
 */
int dev_pm_domain_start(struct device *dev)
{
        if (dev->pm_domain && dev->pm_domain->start)
                return dev->pm_domain->start(dev);

        return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_domain_start);

/**
 * dev_pm_domain_set - Set PM domain of a device.
 * @dev: Device whose PM domain is to be set.
 * @pd: PM domain to be set, or NULL.
 *
 * Sets the PM domain the device belongs to. The PM domain of a device needs
 * to be set before its probe finishes (it's bound to a driver).
 *
 * This function must be called with the device lock held.
 */
void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd)
{
        if (dev->pm_domain == pd)
                return;

        WARN(pd && device_is_bound(dev),
             "PM domains can only be changed for unbound devices\n");
        dev->pm_domain = pd;
        device_pm_check_callbacks(dev);
}
EXPORT_SYMBOL_GPL(dev_pm_domain_set);

/**
 * dev_pm_domain_set_performance_state - Request a new performance state.
 * @dev: The device to make the request for.
 * @state: Target performance state for the device.
 *
 * This function should be called when a new performance state needs to be
 * requested for a device that is attached to a PM domain. Note that, the
 * support for performance scaling for PM domains is optional.
 *
 * Returns 0 on success and when performance scaling isn't supported, negative
 * error code on failure.
 */
int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state)
{
        if (dev->pm_domain && dev->pm_domain->set_performance_state)
                return dev->pm_domain->set_performance_state(dev, state);

        return 0;
}
EXPORT_SYMBOL_GPL(dev_pm_domain_set_performance_state);



























































































































































































































































































































































































































































































































































   16 









































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
/* SPDX-License-Identifier: GPL-2.0 */
/*
 *
 *        V 4 L 2   D R I V E R   H E L P E R   A P I
 *
 * Moved from videodev2.h
 *
 *        Some commonly needed functions for drivers (v4l2-common.o module)
 */
#ifndef _V4L2_DEV_H
#define _V4L2_DEV_H

#include <linux/poll.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/mutex.h>
#include <linux/videodev2.h>

#include <media/media-entity.h>

#define VIDEO_MAJOR        81

/**
 * enum vfl_devnode_type - type of V4L2 device node
 *
 * @VFL_TYPE_VIDEO:        for video input/output devices
 * @VFL_TYPE_VBI:        for vertical blank data (i.e. closed captions, teletext)
 * @VFL_TYPE_RADIO:        for radio tuners
 * @VFL_TYPE_SUBDEV:        for V4L2 subdevices
 * @VFL_TYPE_SDR:        for Software Defined Radio tuners
 * @VFL_TYPE_TOUCH:        for touch sensors
 * @VFL_TYPE_MAX:        number of VFL types, must always be last in the enum
 */
enum vfl_devnode_type {
        VFL_TYPE_VIDEO,
        VFL_TYPE_VBI,
        VFL_TYPE_RADIO,
        VFL_TYPE_SUBDEV,
        VFL_TYPE_SDR,
        VFL_TYPE_TOUCH,
        VFL_TYPE_MAX /* Shall be the last one */
};

/**
 * enum  vfl_devnode_direction - Identifies if a &struct video_device
 *         corresponds to a receiver, a transmitter or a mem-to-mem device.
 *
 * @VFL_DIR_RX:                device is a receiver.
 * @VFL_DIR_TX:                device is a transmitter.
 * @VFL_DIR_M2M:        device is a memory to memory device.
 *
 * Note: Ignored if &enum vfl_devnode_type is %VFL_TYPE_SUBDEV.
 */
enum vfl_devnode_direction {
        VFL_DIR_RX,
        VFL_DIR_TX,
        VFL_DIR_M2M,
};

struct v4l2_ioctl_callbacks;
struct video_device;
struct v4l2_device;
struct v4l2_ctrl_handler;

/**
 * enum v4l2_video_device_flags - Flags used by &struct video_device
 *
 * @V4L2_FL_REGISTERED:
 *        indicates that a &struct video_device is registered.
 *        Drivers can clear this flag if they want to block all future
 *        device access. It is cleared by video_unregister_device.
 * @V4L2_FL_USES_V4L2_FH:
 *        indicates that file->private_data points to &struct v4l2_fh.
 *        This flag is set by the core when v4l2_fh_init() is called.
 *        All new drivers should use it.
 * @V4L2_FL_QUIRK_INVERTED_CROP:
 *        some old M2M drivers use g/s_crop/cropcap incorrectly: crop and
 *        compose are swapped. If this flag is set, then the selection
 *        targets are swapped in the g/s_crop/cropcap functions in v4l2-ioctl.c.
 *        This allows those drivers to correctly implement the selection API,
 *        but the old crop API will still work as expected in order to preserve
 *        backwards compatibility.
 *        Never set this flag for new drivers.
 * @V4L2_FL_SUBDEV_RO_DEVNODE:
 *        indicates that the video device node is registered in read-only mode.
 *        The flag only applies to device nodes registered for sub-devices, it is
 *        set by the core when the sub-devices device nodes are registered with
 *        v4l2_device_register_ro_subdev_nodes() and used by the sub-device ioctl
 *        handler to restrict access to some ioctl calls.
 */
enum v4l2_video_device_flags {
        V4L2_FL_REGISTERED                = 0,
        V4L2_FL_USES_V4L2_FH                = 1,
        V4L2_FL_QUIRK_INVERTED_CROP        = 2,
        V4L2_FL_SUBDEV_RO_DEVNODE        = 3,
};

/* Priority helper functions */

/**
 * struct v4l2_prio_state - stores the priority states
 *
 * @prios: array with elements to store the array priorities
 *
 *
 * .. note::
 *    The size of @prios array matches the number of priority types defined
 *    by enum &v4l2_priority.
 */
struct v4l2_prio_state {
        atomic_t prios[4];
};

/**
 * v4l2_prio_init - initializes a struct v4l2_prio_state
 *
 * @global: pointer to &struct v4l2_prio_state
 */
void v4l2_prio_init(struct v4l2_prio_state *global);

/**
 * v4l2_prio_change - changes the v4l2 file handler priority
 *
 * @global: pointer to the &struct v4l2_prio_state of the device node.
 * @local: pointer to the desired priority, as defined by enum &v4l2_priority
 * @new: Priority type requested, as defined by enum &v4l2_priority.
 *
 * .. note::
 *        This function should be used only by the V4L2 core.
 */
int v4l2_prio_change(struct v4l2_prio_state *global, enum v4l2_priority *local,
                     enum v4l2_priority new);

/**
 * v4l2_prio_open - Implements the priority logic for a file handler open
 *
 * @global: pointer to the &struct v4l2_prio_state of the device node.
 * @local: pointer to the desired priority, as defined by enum &v4l2_priority
 *
 * .. note::
 *        This function should be used only by the V4L2 core.
 */
void v4l2_prio_open(struct v4l2_prio_state *global, enum v4l2_priority *local);

/**
 * v4l2_prio_close - Implements the priority logic for a file handler close
 *
 * @global: pointer to the &struct v4l2_prio_state of the device node.
 * @local: priority to be released, as defined by enum &v4l2_priority
 *
 * .. note::
 *        This function should be used only by the V4L2 core.
 */
void v4l2_prio_close(struct v4l2_prio_state *global, enum v4l2_priority local);

/**
 * v4l2_prio_max - Return the maximum priority, as stored at the @global array.
 *
 * @global: pointer to the &struct v4l2_prio_state of the device node.
 *
 * .. note::
 *        This function should be used only by the V4L2 core.
 */
enum v4l2_priority v4l2_prio_max(struct v4l2_prio_state *global);

/**
 * v4l2_prio_check - Implements the priority logic for a file handler close
 *
 * @global: pointer to the &struct v4l2_prio_state of the device node.
 * @local: desired priority, as defined by enum &v4l2_priority local
 *
 * .. note::
 *        This function should be used only by the V4L2 core.
 */
int v4l2_prio_check(struct v4l2_prio_state *global, enum v4l2_priority local);

/**
 * struct v4l2_file_operations - fs operations used by a V4L2 device
 *
 * @owner: pointer to struct module
 * @read: operations needed to implement the read() syscall
 * @write: operations needed to implement the write() syscall
 * @poll: operations needed to implement the poll() syscall
 * @unlocked_ioctl: operations needed to implement the ioctl() syscall
 * @compat_ioctl32: operations needed to implement the ioctl() syscall for
 *        the special case where the Kernel uses 64 bits instructions, but
 *        the userspace uses 32 bits.
 * @get_unmapped_area: called by the mmap() syscall, used when %!CONFIG_MMU
 * @mmap: operations needed to implement the mmap() syscall
 * @open: operations needed to implement the open() syscall
 * @release: operations needed to implement the release() syscall
 *
 * .. note::
 *
 *        Those operations are used to implemente the fs struct file_operations
 *        at the V4L2 drivers. The V4L2 core overrides the fs ops with some
 *        extra logic needed by the subsystem.
 */
struct v4l2_file_operations {
        struct module *owner;
        ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        __poll_t (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
#ifdef CONFIG_COMPAT
        long (*compat_ioctl32) (struct file *, unsigned int, unsigned long);
#endif
        unsigned long (*get_unmapped_area) (struct file *, unsigned long,
                                unsigned long, unsigned long, unsigned long);
        int (*mmap) (struct file *, struct vm_area_struct *);
        int (*open) (struct file *);
        int (*release) (struct file *);
};

/*
 * Newer version of video_device, handled by videodev2.c
 *        This version moves redundant code from video device code to
 *        the common handler
 */

/**
 * struct video_device - Structure used to create and manage the V4L2 device
 *        nodes.
 *
 * @entity: &struct media_entity
 * @intf_devnode: pointer to &struct media_intf_devnode
 * @pipe: &struct media_pipeline
 * @fops: pointer to &struct v4l2_file_operations for the video device
 * @device_caps: device capabilities as used in v4l2_capabilities
 * @dev: &struct device for the video device
 * @cdev: character device
 * @v4l2_dev: pointer to &struct v4l2_device parent
 * @dev_parent: pointer to &struct device parent
 * @ctrl_handler: Control handler associated with this device node.
 *         May be NULL.
 * @queue: &struct vb2_queue associated with this device node. May be NULL.
 * @prio: pointer to &struct v4l2_prio_state with device's Priority state.
 *         If NULL, then v4l2_dev->prio will be used.
 * @name: video device name
 * @vfl_type: V4L device type, as defined by &enum vfl_devnode_type
 * @vfl_dir: V4L receiver, transmitter or m2m
 * @minor: device node 'minor'. It is set to -1 if the registration failed
 * @num: number of the video device node
 * @flags: video device flags. Use bitops to set/clear/test flags.
 *           Contains a set of &enum v4l2_video_device_flags.
 * @index: attribute to differentiate multiple indices on one physical device
 * @fh_lock: Lock for all v4l2_fhs
 * @fh_list: List of &struct v4l2_fh
 * @dev_debug: Internal device debug flags, not for use by drivers
 * @tvnorms: Supported tv norms
 *
 * @release: video device release() callback
 * @ioctl_ops: pointer to &struct v4l2_ioctl_ops with ioctl callbacks
 *
 * @valid_ioctls: bitmap with the valid ioctls for this device
 * @lock: pointer to &struct mutex serialization lock
 *
 * .. note::
 *        Only set @dev_parent if that can't be deduced from @v4l2_dev.
 */

struct video_device {
#if defined(CONFIG_MEDIA_CONTROLLER)
        struct media_entity entity;
        struct media_intf_devnode *intf_devnode;
        struct media_pipeline pipe;
#endif
        const struct v4l2_file_operations *fops;

        u32 device_caps;

        /* sysfs */
        struct device dev;
        struct cdev *cdev;

        struct v4l2_device *v4l2_dev;
        struct device *dev_parent;

        struct v4l2_ctrl_handler *ctrl_handler;

        struct vb2_queue *queue;

        struct v4l2_prio_state *prio;

        /* device info */
        char name[64];
        enum vfl_devnode_type vfl_type;
        enum vfl_devnode_direction vfl_dir;
        int minor;
        u16 num;
        unsigned long flags;
        int index;

        /* V4L2 file handles */
        spinlock_t                fh_lock;
        struct list_head        fh_list;

        int dev_debug;

        v4l2_std_id tvnorms;

        /* callbacks */
        void (*release)(struct video_device *vdev);
        const struct v4l2_ioctl_ops *ioctl_ops;
        DECLARE_BITMAP(valid_ioctls, BASE_VIDIOC_PRIVATE);

        struct mutex *lock;
};

/**
 * media_entity_to_video_device - Returns a &struct video_device from
 *        the &struct media_entity embedded on it.
 *
 * @__entity: pointer to &struct media_entity
 */
#define media_entity_to_video_device(__entity) \
        container_of(__entity, struct video_device, entity)

/**
 * to_video_device - Returns a &struct video_device from the
 *        &struct device embedded on it.
 *
 * @cd: pointer to &struct device
 */
#define to_video_device(cd) container_of(cd, struct video_device, dev)

/**
 * __video_register_device - register video4linux devices
 *
 * @vdev: struct video_device to register
 * @type: type of device to register, as defined by &enum vfl_devnode_type
 * @nr:   which device node number is desired:
 *        (0 == /dev/video0, 1 == /dev/video1, ..., -1 == first free)
 * @warn_if_nr_in_use: warn if the desired device node number
 *        was already in use and another number was chosen instead.
 * @owner: module that owns the video device node
 *
 * The registration code assigns minor numbers and device node numbers
 * based on the requested type and registers the new device node with
 * the kernel.
 *
 * This function assumes that struct video_device was zeroed when it
 * was allocated and does not contain any stale date.
 *
 * An error is returned if no free minor or device node number could be
 * found, or if the registration of the device node failed.
 *
 * Returns 0 on success.
 *
 * .. note::
 *
 *        This function is meant to be used only inside the V4L2 core.
 *        Drivers should use video_register_device() or
 *        video_register_device_no_warn().
 */
int __must_check __video_register_device(struct video_device *vdev,
                                         enum vfl_devnode_type type,
                                         int nr, int warn_if_nr_in_use,
                                         struct module *owner);

/**
 *  video_register_device - register video4linux devices
 *
 * @vdev: struct video_device to register
 * @type: type of device to register, as defined by &enum vfl_devnode_type
 * @nr:   which device node number is desired:
 *        (0 == /dev/video0, 1 == /dev/video1, ..., -1 == first free)
 *
 * Internally, it calls __video_register_device(). Please see its
 * documentation for more details.
 *
 * .. note::
 *        if video_register_device fails, the release() callback of
 *        &struct video_device structure is *not* called, so the caller
 *        is responsible for freeing any data. Usually that means that
 *        you video_device_release() should be called on failure.
 */
static inline int __must_check video_register_device(struct video_device *vdev,
                                                     enum vfl_devnode_type type,
                                                     int nr)
{
        return __video_register_device(vdev, type, nr, 1, vdev->fops->owner);
}

/**
 *  video_register_device_no_warn - register video4linux devices
 *
 * @vdev: struct video_device to register
 * @type: type of device to register, as defined by &enum vfl_devnode_type
 * @nr:   which device node number is desired:
 *        (0 == /dev/video0, 1 == /dev/video1, ..., -1 == first free)
 *
 * This function is identical to video_register_device() except that no
 * warning is issued if the desired device node number was already in use.
 *
 * Internally, it calls __video_register_device(). Please see its
 * documentation for more details.
 *
 * .. note::
 *        if video_register_device fails, the release() callback of
 *        &struct video_device structure is *not* called, so the caller
 *        is responsible for freeing any data. Usually that means that
 *        you video_device_release() should be called on failure.
 */
static inline int __must_check
video_register_device_no_warn(struct video_device *vdev,
                              enum vfl_devnode_type type, int nr)
{
        return __video_register_device(vdev, type, nr, 0, vdev->fops->owner);
}

/**
 * video_unregister_device - Unregister video devices.
 *
 * @vdev: &struct video_device to register
 *
 * Does nothing if vdev == NULL or if video_is_registered() returns false.
 */
void video_unregister_device(struct video_device *vdev);

/**
 * video_device_alloc - helper function to alloc &struct video_device
 *
 * Returns NULL if %-ENOMEM or a &struct video_device on success.
 */
struct video_device * __must_check video_device_alloc(void);

/**
 * video_device_release - helper function to release &struct video_device
 *
 * @vdev: pointer to &struct video_device
 *
 * Can also be used for video_device->release\(\).
 */
void video_device_release(struct video_device *vdev);

/**
 * video_device_release_empty - helper function to implement the
 *        video_device->release\(\) callback.
 *
 * @vdev: pointer to &struct video_device
 *
 * This release function does nothing.
 *
 * It should be used when the video_device is a static global struct.
 *
 * .. note::
 *        Having a static video_device is a dubious construction at best.
 */
void video_device_release_empty(struct video_device *vdev);

/**
 * v4l2_disable_ioctl- mark that a given command isn't implemented.
 *        shouldn't use core locking
 *
 * @vdev: pointer to &struct video_device
 * @cmd: ioctl command
 *
 * This function allows drivers to provide just one v4l2_ioctl_ops struct, but
 * disable ioctls based on the specific card that is actually found.
 *
 * .. note::
 *
 *    This must be called before video_register_device.
 *    See also the comments for determine_valid_ioctls().
 */
static inline void v4l2_disable_ioctl(struct video_device *vdev,
                                      unsigned int cmd)
{
        if (_IOC_NR(cmd) < BASE_VIDIOC_PRIVATE)
                set_bit(_IOC_NR(cmd), vdev->valid_ioctls);
}

/**
 * video_get_drvdata - gets private data from &struct video_device.
 *
 * @vdev: pointer to &struct video_device
 *
 * returns a pointer to the private data
 */
static inline void *video_get_drvdata(struct video_device *vdev)
{
        return dev_get_drvdata(&vdev->dev);
}

/**
 * video_set_drvdata - sets private data from &struct video_device.
 *
 * @vdev: pointer to &struct video_device
 * @data: private data pointer
 */
static inline void video_set_drvdata(struct video_device *vdev, void *data)
{
        dev_set_drvdata(&vdev->dev, data);
}

/**
 * video_devdata - gets &struct video_device from struct file.
 *
 * @file: pointer to struct file
 */
struct video_device *video_devdata(struct file *file);

/**
 * video_drvdata - gets private data from &struct video_device using the
 *        struct file.
 *
 * @file: pointer to struct file
 *
 * This is function combines both video_get_drvdata() and video_devdata()
 * as this is used very often.
 */
static inline void *video_drvdata(struct file *file)
{
        return video_get_drvdata(video_devdata(file));
}

/**
 * video_device_node_name - returns the video device name
 *
 * @vdev: pointer to &struct video_device
 *
 * Returns the device name string
 */
static inline const char *video_device_node_name(struct video_device *vdev)
{
        return dev_name(&vdev->dev);
}

/**
 * video_is_registered - returns true if the &struct video_device is registered.
 *
 *
 * @vdev: pointer to &struct video_device
 */
static inline int video_is_registered(struct video_device *vdev)
{
        return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
}

#if defined(CONFIG_MEDIA_CONTROLLER)

/**
 * video_device_pipeline_start - Mark a pipeline as streaming
 * @vdev: Starting video device
 * @pipe: Media pipeline to be assigned to all entities in the pipeline.
 *
 * Mark all entities connected to a given video device through enabled links,
 * either directly or indirectly, as streaming. The given pipeline object is
 * assigned to every pad in the pipeline and stored in the media_pad pipe
 * field.
 *
 * Calls to this function can be nested, in which case the same number of
 * video_device_pipeline_stop() calls will be required to stop streaming. The
 * pipeline pointer must be identical for all nested calls to
 * video_device_pipeline_start().
 *
 * The video device must contain a single pad.
 *
 * This is a convenience wrapper around media_pipeline_start().
 */
__must_check int video_device_pipeline_start(struct video_device *vdev,
                                             struct media_pipeline *pipe);

/**
 * __video_device_pipeline_start - Mark a pipeline as streaming
 * @vdev: Starting video device
 * @pipe: Media pipeline to be assigned to all entities in the pipeline.
 *
 * ..note:: This is the non-locking version of video_device_pipeline_start()
 *
 * The video device must contain a single pad.
 *
 * This is a convenience wrapper around __media_pipeline_start().
 */
__must_check int __video_device_pipeline_start(struct video_device *vdev,
                                               struct media_pipeline *pipe);

/**
 * video_device_pipeline_stop - Mark a pipeline as not streaming
 * @vdev: Starting video device
 *
 * Mark all entities connected to a given video device through enabled links,
 * either directly or indirectly, as not streaming. The media_pad pipe field
 * is reset to %NULL.
 *
 * If multiple calls to media_pipeline_start() have been made, the same
 * number of calls to this function are required to mark the pipeline as not
 * streaming.
 *
 * The video device must contain a single pad.
 *
 * This is a convenience wrapper around media_pipeline_stop().
 */
void video_device_pipeline_stop(struct video_device *vdev);

/**
 * __video_device_pipeline_stop - Mark a pipeline as not streaming
 * @vdev: Starting video device
 *
 * .. note:: This is the non-locking version of media_pipeline_stop()
 *
 * The video device must contain a single pad.
 *
 * This is a convenience wrapper around __media_pipeline_stop().
 */
void __video_device_pipeline_stop(struct video_device *vdev);

/**
 * video_device_pipeline_alloc_start - Mark a pipeline as streaming
 * @vdev: Starting video device
 *
 * video_device_pipeline_alloc_start() is similar to video_device_pipeline_start()
 * but instead of working on a given pipeline the function will use an
 * existing pipeline if the video device is already part of a pipeline, or
 * allocate a new pipeline.
 *
 * Calls to video_device_pipeline_alloc_start() must be matched with
 * video_device_pipeline_stop().
 */
__must_check int video_device_pipeline_alloc_start(struct video_device *vdev);

/**
 * video_device_pipeline - Get the media pipeline a video device is part of
 * @vdev: The video device
 *
 * This function returns the media pipeline that a video device has been
 * associated with when constructing the pipeline with
 * video_device_pipeline_start(). The pointer remains valid until
 * video_device_pipeline_stop() is called.
 *
 * Return: The media_pipeline the video device is part of, or NULL if the video
 * device is not part of any pipeline.
 *
 * The video device must contain a single pad.
 *
 * This is a convenience wrapper around media_entity_pipeline().
 */
struct media_pipeline *video_device_pipeline(struct video_device *vdev);

#endif /* CONFIG_MEDIA_CONTROLLER */

#endif /* _V4L2_DEV_H */







































































































































































































































































































































































































































































































    1 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Force feedback support for memoryless devices
 *
 *  Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com>
 *  Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru>
 */

/* #define DEBUG */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/slab.h>
#include <linux/input.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/fixp-arith.h>

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anssi Hannula <anssi.hannula@gmail.com>");
MODULE_DESCRIPTION("Force feedback support for memoryless devices");

/* Number of effects handled with memoryless devices */
#define FF_MEMLESS_EFFECTS        16

/* Envelope update interval in ms */
#define FF_ENVELOPE_INTERVAL        50

#define FF_EFFECT_STARTED        0
#define FF_EFFECT_PLAYING        1
#define FF_EFFECT_ABORTING        2

struct ml_effect_state {
        struct ff_effect *effect;
        unsigned long flags;        /* effect state (STARTED, PLAYING, etc) */
        int count;                /* loop count of the effect */
        unsigned long play_at;        /* start time */
        unsigned long stop_at;        /* stop time */
        unsigned long adj_at;        /* last time the effect was sent */
};

struct ml_device {
        void *private;
        struct ml_effect_state states[FF_MEMLESS_EFFECTS];
        int gain;
        struct timer_list timer;
        struct input_dev *dev;

        int (*play_effect)(struct input_dev *dev, void *data,
                           struct ff_effect *effect);
};

static const struct ff_envelope *get_envelope(const struct ff_effect *effect)
{
        static const struct ff_envelope empty_envelope;

        switch (effect->type) {
        case FF_PERIODIC:
                return &effect->u.periodic.envelope;

        case FF_CONSTANT:
                return &effect->u.constant.envelope;

        default:
                return &empty_envelope;
        }
}

/*
 * Check for the next time envelope requires an update on memoryless devices
 */
static unsigned long calculate_next_time(struct ml_effect_state *state)
{
        const struct ff_envelope *envelope = get_envelope(state->effect);
        unsigned long attack_stop, fade_start, next_fade;

        if (envelope->attack_length) {
                attack_stop = state->play_at +
                        msecs_to_jiffies(envelope->attack_length);
                if (time_before(state->adj_at, attack_stop))
                        return state->adj_at +
                                        msecs_to_jiffies(FF_ENVELOPE_INTERVAL);
        }

        if (state->effect->replay.length) {
                if (envelope->fade_length) {
                        /* check when fading should start */
                        fade_start = state->stop_at -
                                        msecs_to_jiffies(envelope->fade_length);

                        if (time_before(state->adj_at, fade_start))
                                return fade_start;

                        /* already fading, advance to next checkpoint */
                        next_fade = state->adj_at +
                                        msecs_to_jiffies(FF_ENVELOPE_INTERVAL);
                        if (time_before(next_fade, state->stop_at))
                                return next_fade;
                }

                return state->stop_at;
        }

        return state->play_at;
}

static void ml_schedule_timer(struct ml_device *ml)
{
        struct ml_effect_state *state;
        unsigned long now = jiffies;
        unsigned long earliest = 0;
        unsigned long next_at;
        int events = 0;
        int i;

        pr_debug("calculating next timer\n");

        for (i = 0; i < FF_MEMLESS_EFFECTS; i++) {

                state = &ml->states[i];

                if (!test_bit(FF_EFFECT_STARTED, &state->flags))
                        continue;

                if (test_bit(FF_EFFECT_PLAYING, &state->flags))
                        next_at = calculate_next_time(state);
                else
                        next_at = state->play_at;

                if (time_before_eq(now, next_at) &&
                    (++events == 1 || time_before(next_at, earliest)))
                        earliest = next_at;
        }

        if (!events) {
                pr_debug("no actions\n");
                del_timer(&ml->timer);
        } else {
                pr_debug("timer set\n");
                mod_timer(&ml->timer, earliest);
        }
}

/*
 * Apply an envelope to a value
 */
static int apply_envelope(struct ml_effect_state *state, int value,
                          struct ff_envelope *envelope)
{
        struct ff_effect *effect = state->effect;
        unsigned long now = jiffies;
        int time_from_level;
        int time_of_envelope;
        int envelope_level;
        int difference;

        if (envelope->attack_length &&
            time_before(now,
                        state->play_at + msecs_to_jiffies(envelope->attack_length))) {
                pr_debug("value = 0x%x, attack_level = 0x%x\n",
                         value, envelope->attack_level);
                time_from_level = jiffies_to_msecs(now - state->play_at);
                time_of_envelope = envelope->attack_length;
                envelope_level = min_t(u16, envelope->attack_level, 0x7fff);

        } else if (envelope->fade_length && effect->replay.length &&
                   time_after(now,
                              state->stop_at - msecs_to_jiffies(envelope->fade_length)) &&
                   time_before(now, state->stop_at)) {
                time_from_level = jiffies_to_msecs(state->stop_at - now);
                time_of_envelope = envelope->fade_length;
                envelope_level = min_t(u16, envelope->fade_level, 0x7fff);
        } else
                return value;

        difference = abs(value) - envelope_level;

        pr_debug("difference = %d\n", difference);
        pr_debug("time_from_level = 0x%x\n", time_from_level);
        pr_debug("time_of_envelope = 0x%x\n", time_of_envelope);

        difference = difference * time_from_level / time_of_envelope;

        pr_debug("difference = %d\n", difference);

        return value < 0 ?
                -(difference + envelope_level) : (difference + envelope_level);
}

/*
 * Return the type the effect has to be converted into (memless devices)
 */
static int get_compatible_type(struct ff_device *ff, int effect_type)
{

        if (test_bit(effect_type, ff->ffbit))
                return effect_type;

        if (effect_type == FF_PERIODIC && test_bit(FF_RUMBLE, ff->ffbit))
                return FF_RUMBLE;

        pr_err("invalid type in get_compatible_type()\n");

        return 0;
}

/*
 * Only left/right direction should be used (under/over 0x8000) for
 * forward/reverse motor direction (to keep calculation fast & simple).
 */
static u16 ml_calculate_direction(u16 direction, u16 force,
                                  u16 new_direction, u16 new_force)
{
        if (!force)
                return new_direction;
        if (!new_force)
                return direction;
        return (((u32)(direction >> 1) * force +
                 (new_direction >> 1) * new_force) /
                (force + new_force)) << 1;
}

#define FRAC_N 8
static inline s16 fixp_new16(s16 a)
{
        return ((s32)a) >> (16 - FRAC_N);
}

static inline s16 fixp_mult(s16 a, s16 b)
{
        a = ((s32)a * 0x100) / 0x7fff;
        return ((s32)(a * b)) >> FRAC_N;
}

/*
 * Combine two effects and apply gain.
 */
static void ml_combine_effects(struct ff_effect *effect,
                               struct ml_effect_state *state,
                               int gain)
{
        struct ff_effect *new = state->effect;
        unsigned int strong, weak, i;
        int x, y;
        s16 level;

        switch (new->type) {
        case FF_CONSTANT:
                i = new->direction * 360 / 0xffff;
                level = fixp_new16(apply_envelope(state,
                                        new->u.constant.level,
                                        &new->u.constant.envelope));
                x = fixp_mult(fixp_sin16(i), level) * gain / 0xffff;
                y = fixp_mult(-fixp_cos16(i), level) * gain / 0xffff;
                /*
                 * here we abuse ff_ramp to hold x and y of constant force
                 * If in future any driver wants something else than x and y
                 * in s8, this should be changed to something more generic
                 */
                effect->u.ramp.start_level =
                        clamp_val(effect->u.ramp.start_level + x, -0x80, 0x7f);
                effect->u.ramp.end_level =
                        clamp_val(effect->u.ramp.end_level + y, -0x80, 0x7f);
                break;

        case FF_RUMBLE:
                strong = (u32)new->u.rumble.strong_magnitude * gain / 0xffff;
                weak = (u32)new->u.rumble.weak_magnitude * gain / 0xffff;

                if (effect->u.rumble.strong_magnitude + strong)
                        effect->direction = ml_calculate_direction(
                                effect->direction,
                                effect->u.rumble.strong_magnitude,
                                new->direction, strong);
                else if (effect->u.rumble.weak_magnitude + weak)
                        effect->direction = ml_calculate_direction(
                                effect->direction,
                                effect->u.rumble.weak_magnitude,
                                new->direction, weak);
                else
                        effect->direction = 0;
                effect->u.rumble.strong_magnitude =
                        min(strong + effect->u.rumble.strong_magnitude,
                            0xffffU);
                effect->u.rumble.weak_magnitude =
                        min(weak + effect->u.rumble.weak_magnitude, 0xffffU);
                break;

        case FF_PERIODIC:
                i = apply_envelope(state, abs(new->u.periodic.magnitude),
                                   &new->u.periodic.envelope);

                /* here we also scale it 0x7fff => 0xffff */
                i = i * gain / 0x7fff;

                if (effect->u.rumble.strong_magnitude + i)
                        effect->direction = ml_calculate_direction(
                                effect->direction,
                                effect->u.rumble.strong_magnitude,
                                new->direction, i);
                else
                        effect->direction = 0;
                effect->u.rumble.strong_magnitude =
                        min(i + effect->u.rumble.strong_magnitude, 0xffffU);
                effect->u.rumble.weak_magnitude =
                        min(i + effect->u.rumble.weak_magnitude, 0xffffU);
                break;

        default:
                pr_err("invalid type in ml_combine_effects()\n");
                break;
        }

}


/*
 * Because memoryless devices have only one effect per effect type active
 * at one time we have to combine multiple effects into one
 */
static int ml_get_combo_effect(struct ml_device *ml,
                               unsigned long *effect_handled,
                               struct ff_effect *combo_effect)
{
        struct ff_effect *effect;
        struct ml_effect_state *state;
        int effect_type;
        int i;

        memset(combo_effect, 0, sizeof(struct ff_effect));

        for (i = 0; i < FF_MEMLESS_EFFECTS; i++) {
                if (__test_and_set_bit(i, effect_handled))
                        continue;

                state = &ml->states[i];
                effect = state->effect;

                if (!test_bit(FF_EFFECT_STARTED, &state->flags))
                        continue;

                if (time_before(jiffies, state->play_at))
                        continue;

                /*
                 * here we have started effects that are either
                 * currently playing (and may need be aborted)
                 * or need to start playing.
                 */
                effect_type = get_compatible_type(ml->dev->ff, effect->type);
                if (combo_effect->type != effect_type) {
                        if (combo_effect->type != 0) {
                                __clear_bit(i, effect_handled);
                                continue;
                        }
                        combo_effect->type = effect_type;
                }

                if (__test_and_clear_bit(FF_EFFECT_ABORTING, &state->flags)) {
                        __clear_bit(FF_EFFECT_PLAYING, &state->flags);
                        __clear_bit(FF_EFFECT_STARTED, &state->flags);
                } else if (effect->replay.length &&
                           time_after_eq(jiffies, state->stop_at)) {

                        __clear_bit(FF_EFFECT_PLAYING, &state->flags);

                        if (--state->count <= 0) {
                                __clear_bit(FF_EFFECT_STARTED, &state->flags);
                        } else {
                                state->play_at = jiffies +
                                        msecs_to_jiffies(effect->replay.delay);
                                state->stop_at = state->play_at +
                                        msecs_to_jiffies(effect->replay.length);
                        }
                } else {
                        __set_bit(FF_EFFECT_PLAYING, &state->flags);
                        state->adj_at = jiffies;
                        ml_combine_effects(combo_effect, state, ml->gain);
                }
        }

        return combo_effect->type != 0;
}

static void ml_play_effects(struct ml_device *ml)
{
        struct ff_effect effect;
        DECLARE_BITMAP(handled_bm, FF_MEMLESS_EFFECTS);

        memset(handled_bm, 0, sizeof(handled_bm));

        while (ml_get_combo_effect(ml, handled_bm, &effect))
                ml->play_effect(ml->dev, ml->private, &effect);

        ml_schedule_timer(ml);
}

static void ml_effect_timer(struct timer_list *t)
{
        struct ml_device *ml = from_timer(ml, t, timer);
        struct input_dev *dev = ml->dev;
        unsigned long flags;

        pr_debug("timer: updating effects\n");

        spin_lock_irqsave(&dev->event_lock, flags);
        ml_play_effects(ml);
        spin_unlock_irqrestore(&dev->event_lock, flags);
}

/*
 * Sets requested gain for FF effects. Called with dev->event_lock held.
 */
static void ml_ff_set_gain(struct input_dev *dev, u16 gain)
{
        struct ml_device *ml = dev->ff->private;
        int i;

        ml->gain = gain;

        for (i = 0; i < FF_MEMLESS_EFFECTS; i++)
                __clear_bit(FF_EFFECT_PLAYING, &ml->states[i].flags);

        ml_play_effects(ml);
}

/*
 * Start/stop specified FF effect. Called with dev->event_lock held.
 */
static int ml_ff_playback(struct input_dev *dev, int effect_id, int value)
{
        struct ml_device *ml = dev->ff->private;
        struct ml_effect_state *state = &ml->states[effect_id];

        if (value > 0) {
                pr_debug("initiated play\n");

                __set_bit(FF_EFFECT_STARTED, &state->flags);
                state->count = value;
                state->play_at = jiffies +
                                 msecs_to_jiffies(state->effect->replay.delay);
                state->stop_at = state->play_at +
                                 msecs_to_jiffies(state->effect->replay.length);
                state->adj_at = state->play_at;

        } else {
                pr_debug("initiated stop\n");

                if (test_bit(FF_EFFECT_PLAYING, &state->flags))
                        __set_bit(FF_EFFECT_ABORTING, &state->flags);
                else
                        __clear_bit(FF_EFFECT_STARTED, &state->flags);
        }

        ml_play_effects(ml);

        return 0;
}

static int ml_ff_upload(struct input_dev *dev,
                        struct ff_effect *effect, struct ff_effect *old)
{
        struct ml_device *ml = dev->ff->private;
        struct ml_effect_state *state = &ml->states[effect->id];

        spin_lock_irq(&dev->event_lock);

        if (test_bit(FF_EFFECT_STARTED, &state->flags)) {
                __clear_bit(FF_EFFECT_PLAYING, &state->flags);
                state->play_at = jiffies +
                                 msecs_to_jiffies(state->effect->replay.delay);
                state->stop_at = state->play_at +
                                 msecs_to_jiffies(state->effect->replay.length);
                state->adj_at = state->play_at;
                ml_schedule_timer(ml);
        }

        spin_unlock_irq(&dev->event_lock);

        return 0;
}

static void ml_ff_destroy(struct ff_device *ff)
{
        struct ml_device *ml = ff->private;

        /*
         * Even though we stop all playing effects when tearing down
         * an input device (via input_device_flush() that calls into
         * input_ff_flush() that stops and erases all effects), we
         * do not actually stop the timer, and therefore we should
         * do it here.
         */
        del_timer_sync(&ml->timer);

        kfree(ml->private);
}

/**
 * input_ff_create_memless() - create memoryless force-feedback device
 * @dev: input device supporting force-feedback
 * @data: driver-specific data to be passed into @play_effect
 * @play_effect: driver-specific method for playing FF effect
 */
int input_ff_create_memless(struct input_dev *dev, void *data,
                int (*play_effect)(struct input_dev *, void *, struct ff_effect *))
{
        struct ml_device *ml;
        struct ff_device *ff;
        int error;
        int i;

        ml = kzalloc(sizeof(struct ml_device), GFP_KERNEL);
        if (!ml)
                return -ENOMEM;

        ml->dev = dev;
        ml->private = data;
        ml->play_effect = play_effect;
        ml->gain = 0xffff;
        timer_setup(&ml->timer, ml_effect_timer, 0);

        set_bit(FF_GAIN, dev->ffbit);

        error = input_ff_create(dev, FF_MEMLESS_EFFECTS);
        if (error) {
                kfree(ml);
                return error;
        }

        ff = dev->ff;
        ff->private = ml;
        ff->upload = ml_ff_upload;
        ff->playback = ml_ff_playback;
        ff->set_gain = ml_ff_set_gain;
        ff->destroy = ml_ff_destroy;

        /* we can emulate periodic effects with RUMBLE */
        if (test_bit(FF_RUMBLE, ff->ffbit)) {
                set_bit(FF_PERIODIC, dev->ffbit);
                set_bit(FF_SINE, dev->ffbit);
                set_bit(FF_TRIANGLE, dev->ffbit);
                set_bit(FF_SQUARE, dev->ffbit);
        }

        for (i = 0; i < FF_MEMLESS_EFFECTS; i++)
                ml->states[i].effect = &ff->effects[i];

        return 0;
}
EXPORT_SYMBOL_GPL(input_ff_create_memless);
























































































































































































































    6 



    6 





































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
// SPDX-License-Identifier: GPL-2.0
/*
 *        linux/mm/mlock.c
 *
 *  (C) Copyright 1995 Linus Torvalds
 *  (C) Copyright 2002 Christoph Hellwig
 */

#include <linux/capability.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/sched/user.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/pagewalk.h>
#include <linux/mempolicy.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/rmap.h>
#include <linux/mmzone.h>
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/secretmem.h>

#include "internal.h"

struct mlock_fbatch {
        local_lock_t lock;
        struct folio_batch fbatch;
};

static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = {
        .lock = INIT_LOCAL_LOCK(lock),
};

bool can_do_mlock(void)
{
        if (rlimit(RLIMIT_MEMLOCK) != 0)
                return true;
        if (capable(CAP_IPC_LOCK))
                return true;
        return false;
}
EXPORT_SYMBOL(can_do_mlock);

/*
 * Mlocked folios are marked with the PG_mlocked flag for efficient testing
 * in vmscan and, possibly, the fault path; and to support semi-accurate
 * statistics.
 *
 * An mlocked folio [folio_test_mlocked(folio)] is unevictable.  As such, it
 * will be ostensibly placed on the LRU "unevictable" list (actually no such
 * list exists), rather than the [in]active lists. PG_unevictable is set to
 * indicate the unevictable state.
 */

static struct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec)
{
        /* There is nothing more we can do while it's off LRU */
        if (!folio_test_clear_lru(folio))
                return lruvec;

        lruvec = folio_lruvec_relock_irq(folio, lruvec);

        if (unlikely(folio_evictable(folio))) {
                /*
                 * This is a little surprising, but quite possible: PG_mlocked
                 * must have got cleared already by another CPU.  Could this
                 * folio be unevictable?  I'm not sure, but move it now if so.
                 */
                if (folio_test_unevictable(folio)) {
                        lruvec_del_folio(lruvec, folio);
                        folio_clear_unevictable(folio);
                        lruvec_add_folio(lruvec, folio);

                        __count_vm_events(UNEVICTABLE_PGRESCUED,
                                          folio_nr_pages(folio));
                }
                goto out;
        }

        if (folio_test_unevictable(folio)) {
                if (folio_test_mlocked(folio))
                        folio->mlock_count++;
                goto out;
        }

        lruvec_del_folio(lruvec, folio);
        folio_clear_active(folio);
        folio_set_unevictable(folio);
        folio->mlock_count = !!folio_test_mlocked(folio);
        lruvec_add_folio(lruvec, folio);
        __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
out:
        folio_set_lru(folio);
        return lruvec;
}

static struct lruvec *__mlock_new_folio(struct folio *folio, struct lruvec *lruvec)
{
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

        lruvec = folio_lruvec_relock_irq(folio, lruvec);

        /* As above, this is a little surprising, but possible */
        if (unlikely(folio_evictable(folio)))
                goto out;

        folio_set_unevictable(folio);
        folio->mlock_count = !!folio_test_mlocked(folio);
        __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
out:
        lruvec_add_folio(lruvec, folio);
        folio_set_lru(folio);
        return lruvec;
}

static struct lruvec *__munlock_folio(struct folio *folio, struct lruvec *lruvec)
{
        int nr_pages = folio_nr_pages(folio);
        bool isolated = false;

        if (!folio_test_clear_lru(folio))
                goto munlock;

        isolated = true;
        lruvec = folio_lruvec_relock_irq(folio, lruvec);

        if (folio_test_unevictable(folio)) {
                /* Then mlock_count is maintained, but might undercount */
                if (folio->mlock_count)
                        folio->mlock_count--;
                if (folio->mlock_count)
                        goto out;
        }
        /* else assume that was the last mlock: reclaim will fix it if not */

munlock:
        if (folio_test_clear_mlocked(folio)) {
                __zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
                if (isolated || !folio_test_unevictable(folio))
                        __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
                else
                        __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
        }

        /* folio_evictable() has to be checked *after* clearing Mlocked */
        if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) {
                lruvec_del_folio(lruvec, folio);
                folio_clear_unevictable(folio);
                lruvec_add_folio(lruvec, folio);
                __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
        }
out:
        if (isolated)
                folio_set_lru(folio);
        return lruvec;
}

/*
 * Flags held in the low bits of a struct folio pointer on the mlock_fbatch.
 */
#define LRU_FOLIO 0x1
#define NEW_FOLIO 0x2
static inline struct folio *mlock_lru(struct folio *folio)
{
        return (struct folio *)((unsigned long)folio + LRU_FOLIO);
}

static inline struct folio *mlock_new(struct folio *folio)
{
        return (struct folio *)((unsigned long)folio + NEW_FOLIO);
}

/*
 * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can
 * make use of such folio pointer flags in future, but for now just keep it for
 * mlock.  We could use three separate folio batches instead, but one feels
 * better (munlocking a full folio batch does not need to drain mlocking folio
 * batches first).
 */
static void mlock_folio_batch(struct folio_batch *fbatch)
{
        struct lruvec *lruvec = NULL;
        unsigned long mlock;
        struct folio *folio;
        int i;

        for (i = 0; i < folio_batch_count(fbatch); i++) {
                folio = fbatch->folios[i];
                mlock = (unsigned long)folio & (LRU_FOLIO | NEW_FOLIO);
                folio = (struct folio *)((unsigned long)folio - mlock);
                fbatch->folios[i] = folio;

                if (mlock & LRU_FOLIO)
                        lruvec = __mlock_folio(folio, lruvec);
                else if (mlock & NEW_FOLIO)
                        lruvec = __mlock_new_folio(folio, lruvec);
                else
                        lruvec = __munlock_folio(folio, lruvec);
        }

        if (lruvec)
                unlock_page_lruvec_irq(lruvec);
        folios_put(fbatch);
}

void mlock_drain_local(void)
{
        struct folio_batch *fbatch;

        local_lock(&mlock_fbatch.lock);
        fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
        if (folio_batch_count(fbatch))
                mlock_folio_batch(fbatch);
        local_unlock(&mlock_fbatch.lock);
}

void mlock_drain_remote(int cpu)
{
        struct folio_batch *fbatch;

        WARN_ON_ONCE(cpu_online(cpu));
        fbatch = &per_cpu(mlock_fbatch.fbatch, cpu);
        if (folio_batch_count(fbatch))
                mlock_folio_batch(fbatch);
}

bool need_mlock_drain(int cpu)
{
        return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu));
}

/**
 * mlock_folio - mlock a folio already on (or temporarily off) LRU
 * @folio: folio to be mlocked.
 */
void mlock_folio(struct folio *folio)
{
        struct folio_batch *fbatch;

        local_lock(&mlock_fbatch.lock);
        fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);

        if (!folio_test_set_mlocked(folio)) {
                int nr_pages = folio_nr_pages(folio);

                zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
                __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
        }

        folio_get(folio);
        if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
            folio_test_large(folio) || lru_cache_disabled())
                mlock_folio_batch(fbatch);
        local_unlock(&mlock_fbatch.lock);
}

/**
 * mlock_new_folio - mlock a newly allocated folio not yet on LRU
 * @folio: folio to be mlocked, either normal or a THP head.
 */
void mlock_new_folio(struct folio *folio)
{
        struct folio_batch *fbatch;
        int nr_pages = folio_nr_pages(folio);

        local_lock(&mlock_fbatch.lock);
        fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
        folio_set_mlocked(folio);

        zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
        __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);

        folio_get(folio);
        if (!folio_batch_add(fbatch, mlock_new(folio)) ||
            folio_test_large(folio) || lru_cache_disabled())
                mlock_folio_batch(fbatch);
        local_unlock(&mlock_fbatch.lock);
}

/**
 * munlock_folio - munlock a folio
 * @folio: folio to be munlocked, either normal or a THP head.
 */
void munlock_folio(struct folio *folio)
{
        struct folio_batch *fbatch;

        local_lock(&mlock_fbatch.lock);
        fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
        /*
         * folio_test_clear_mlocked(folio) must be left to __munlock_folio(),
         * which will check whether the folio is multiply mlocked.
         */
        folio_get(folio);
        if (!folio_batch_add(fbatch, folio) ||
            folio_test_large(folio) || lru_cache_disabled())
                mlock_folio_batch(fbatch);
        local_unlock(&mlock_fbatch.lock);
}

static inline unsigned int folio_mlock_step(struct folio *folio,
                pte_t *pte, unsigned long addr, unsigned long end)
{
        unsigned int count, i, nr = folio_nr_pages(folio);
        unsigned long pfn = folio_pfn(folio);
        pte_t ptent = ptep_get(pte);

        if (!folio_test_large(folio))
                return 1;

        count = pfn + nr - pte_pfn(ptent);
        count = min_t(unsigned int, count, (end - addr) >> PAGE_SHIFT);

        for (i = 0; i < count; i++, pte++) {
                pte_t entry = ptep_get(pte);

                if (!pte_present(entry))
                        break;
                if (pte_pfn(entry) - pfn >= nr)
                        break;
        }

        return i;
}

static inline bool allow_mlock_munlock(struct folio *folio,
                struct vm_area_struct *vma, unsigned long start,
                unsigned long end, unsigned int step)
{
        /*
         * For unlock, allow munlock large folio which is partially
         * mapped to VMA. As it's possible that large folio is
         * mlocked and VMA is split later.
         *
         * During memory pressure, such kind of large folio can
         * be split. And the pages are not in VM_LOCKed VMA
         * can be reclaimed.
         */
        if (!(vma->vm_flags & VM_LOCKED))
                return true;

        /* folio_within_range() cannot take KSM, but any small folio is OK */
        if (!folio_test_large(folio))
                return true;

        /* folio not in range [start, end), skip mlock */
        if (!folio_within_range(folio, vma, start, end))
                return false;

        /* folio is not fully mapped, skip mlock */
        if (step != folio_nr_pages(folio))
                return false;

        return true;
}

static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
                           unsigned long end, struct mm_walk *walk)

{
        struct vm_area_struct *vma = walk->vma;
        spinlock_t *ptl;
        pte_t *start_pte, *pte;
        pte_t ptent;
        struct folio *folio;
        unsigned int step = 1;
        unsigned long start = addr;

        ptl = pmd_trans_huge_lock(pmd, vma);
        if (ptl) {
                if (!pmd_present(*pmd))
                        goto out;
                if (is_huge_zero_pmd(*pmd))
                        goto out;
                folio = page_folio(pmd_page(*pmd));
                if (vma->vm_flags & VM_LOCKED)
                        mlock_folio(folio);
                else
                        munlock_folio(folio);
                goto out;
        }

        start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (!start_pte) {
                walk->action = ACTION_AGAIN;
                return 0;
        }

        for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
                ptent = ptep_get(pte);
                if (!pte_present(ptent))
                        continue;
                folio = vm_normal_folio(vma, addr, ptent);
                if (!folio || folio_is_zone_device(folio))
                        continue;

                step = folio_mlock_step(folio, pte, addr, end);
                if (!allow_mlock_munlock(folio, vma, start, end, step))
                        goto next_entry;

                if (vma->vm_flags & VM_LOCKED)
                        mlock_folio(folio);
                else
                        munlock_folio(folio);

next_entry:
                pte += step - 1;
                addr += (step - 1) << PAGE_SHIFT;
        }
        pte_unmap(start_pte);
out:
        spin_unlock(ptl);
        cond_resched();
        return 0;
}

/*
 * mlock_vma_pages_range() - mlock any pages already in the range,
 *                           or munlock all pages in the range.
 * @vma - vma containing range to be mlock()ed or munlock()ed
 * @start - start address in @vma of the range
 * @end - end of range in @vma
 * @newflags - the new set of flags for @vma.
 *
 * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
 * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
 */
static void mlock_vma_pages_range(struct vm_area_struct *vma,
        unsigned long start, unsigned long end, vm_flags_t newflags)
{
        static const struct mm_walk_ops mlock_walk_ops = {
                .pmd_entry = mlock_pte_range,
                .walk_lock = PGWALK_WRLOCK_VERIFY,
        };

        /*
         * There is a slight chance that concurrent page migration,
         * or page reclaim finding a page of this now-VM_LOCKED vma,
         * will call mlock_vma_folio() and raise page's mlock_count:
         * double counting, leaving the page unevictable indefinitely.
         * Communicate this danger to mlock_vma_folio() with VM_IO,
         * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
         * mmap_lock is held in write mode here, so this weird
         * combination should not be visible to other mmap_lock users;
         * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
         */
        if (newflags & VM_LOCKED)
                newflags |= VM_IO;
        vma_start_write(vma);
        vm_flags_reset_once(vma, newflags);

        lru_add_drain();
        walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
        lru_add_drain();

        if (newflags & VM_IO) {
                newflags &= ~VM_IO;
                vm_flags_reset_once(vma, newflags);
        }
}

/*
 * mlock_fixup  - handle mlock[all]/munlock[all] requests.
 *
 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
 * munlock is a no-op.  However, for some special vmas, we go ahead and
 * populate the ptes.
 *
 * For vmas that pass the filters, merge/split as appropriate.
 */
static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
               struct vm_area_struct **prev, unsigned long start,
               unsigned long end, vm_flags_t newflags)
{
        struct mm_struct *mm = vma->vm_mm;
        int nr_pages;
        int ret = 0;
        vm_flags_t oldflags = vma->vm_flags;

        if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
            is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
            vma_is_dax(vma) || vma_is_secretmem(vma))
                /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
                goto out;

        vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto out;
        }

        /*
         * Keep track of amount of locked VM.
         */
        nr_pages = (end - start) >> PAGE_SHIFT;
        if (!(newflags & VM_LOCKED))
                nr_pages = -nr_pages;
        else if (oldflags & VM_LOCKED)
                nr_pages = 0;
        mm->locked_vm += nr_pages;

        /*
         * vm_flags is protected by the mmap_lock held in write mode.
         * It's okay if try_to_unmap_one unmaps a page just after we
         * set VM_LOCKED, populate_vma_page_range will bring it back.
         */
        if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
                /* No work to do, and mlocking twice would be wrong */
                vma_start_write(vma);
                vm_flags_reset(vma, newflags);
        } else {
                mlock_vma_pages_range(vma, start, end, newflags);
        }
out:
        *prev = vma;
        return ret;
}

static int apply_vma_lock_flags(unsigned long start, size_t len,
                                vm_flags_t flags)
{
        unsigned long nstart, end, tmp;
        struct vm_area_struct *vma, *prev;
        VMA_ITERATOR(vmi, current->mm, start);

        VM_BUG_ON(offset_in_page(start));
        VM_BUG_ON(len != PAGE_ALIGN(len));
        end = start + len;
        if (end < start)
                return -EINVAL;
        if (end == start)
                return 0;
        vma = vma_iter_load(&vmi);
        if (!vma)
                return -ENOMEM;

        prev = vma_prev(&vmi);
        if (start > vma->vm_start)
                prev = vma;

        nstart = start;
        tmp = vma->vm_start;
        for_each_vma_range(vmi, vma, end) {
                int error;
                vm_flags_t newflags;

                if (vma->vm_start != tmp)
                        return -ENOMEM;

                newflags = vma->vm_flags & ~VM_LOCKED_MASK;
                newflags |= flags;
                /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
                tmp = vma->vm_end;
                if (tmp > end)
                        tmp = end;
                error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
                if (error)
                        return error;
                tmp = vma_iter_end(&vmi);
                nstart = tmp;
        }

        if (tmp < end)
                return -ENOMEM;

        return 0;
}

/*
 * Go through vma areas and sum size of mlocked
 * vma pages, as return value.
 * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
 * is also counted.
 * Return value: previously mlocked page counts
 */
static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
                unsigned long start, size_t len)
{
        struct vm_area_struct *vma;
        unsigned long count = 0;
        unsigned long end;
        VMA_ITERATOR(vmi, mm, start);

        /* Don't overflow past ULONG_MAX */
        if (unlikely(ULONG_MAX - len < start))
                end = ULONG_MAX;
        else
                end = start + len;

        for_each_vma_range(vmi, vma, end) {
                if (vma->vm_flags & VM_LOCKED) {
                        if (start > vma->vm_start)
                                count -= (start - vma->vm_start);
                        if (end < vma->vm_end) {
                                count += end - vma->vm_start;
                                break;
                        }
                        count += vma->vm_end - vma->vm_start;
                }
        }

        return count >> PAGE_SHIFT;
}

/*
 * convert get_user_pages() return value to posix mlock() error
 */
static int __mlock_posix_error_return(long retval)
{
        if (retval == -EFAULT)
                retval = -ENOMEM;
        else if (retval == -ENOMEM)
                retval = -EAGAIN;
        return retval;
}

static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
{
        unsigned long locked;
        unsigned long lock_limit;
        int error = -ENOMEM;

        start = untagged_addr(start);

        if (!can_do_mlock())
                return -EPERM;

        len = PAGE_ALIGN(len + (offset_in_page(start)));
        start &= PAGE_MASK;

        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;
        locked = len >> PAGE_SHIFT;

        if (mmap_write_lock_killable(current->mm))
                return -EINTR;

        locked += current->mm->locked_vm;
        if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
                /*
                 * It is possible that the regions requested intersect with
                 * previously mlocked areas, that part area in "mm->locked_vm"
                 * should not be counted to new mlock increment count. So check
                 * and adjust locked count if necessary.
                 */
                locked -= count_mm_mlocked_page_nr(current->mm,
                                start, len);
        }

        /* check against resource limits */
        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
                error = apply_vma_lock_flags(start, len, flags);

        mmap_write_unlock(current->mm);
        if (error)
                return error;

        error = __mm_populate(start, len, 0);
        if (error)
                return __mlock_posix_error_return(error);
        return 0;
}

SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
{
        return do_mlock(start, len, VM_LOCKED);
}

SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
{
        vm_flags_t vm_flags = VM_LOCKED;

        if (flags & ~MLOCK_ONFAULT)
                return -EINVAL;

        if (flags & MLOCK_ONFAULT)
                vm_flags |= VM_LOCKONFAULT;

        return do_mlock(start, len, vm_flags);
}

SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
{
        int ret;

        start = untagged_addr(start);

        len = PAGE_ALIGN(len + (offset_in_page(start)));
        start &= PAGE_MASK;

        if (mmap_write_lock_killable(current->mm))
                return -EINTR;
        ret = apply_vma_lock_flags(start, len, 0);
        mmap_write_unlock(current->mm);

        return ret;
}

/*
 * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
 * and translate into the appropriate modifications to mm->def_flags and/or the
 * flags for all current VMAs.
 *
 * There are a couple of subtleties with this.  If mlockall() is called multiple
 * times with different flags, the values do not necessarily stack.  If mlockall
 * is called once including the MCL_FUTURE flag and then a second time without
 * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
 */
static int apply_mlockall_flags(int flags)
{
        VMA_ITERATOR(vmi, current->mm, 0);
        struct vm_area_struct *vma, *prev = NULL;
        vm_flags_t to_add = 0;

        current->mm->def_flags &= ~VM_LOCKED_MASK;
        if (flags & MCL_FUTURE) {
                current->mm->def_flags |= VM_LOCKED;

                if (flags & MCL_ONFAULT)
                        current->mm->def_flags |= VM_LOCKONFAULT;

                if (!(flags & MCL_CURRENT))
                        goto out;
        }

        if (flags & MCL_CURRENT) {
                to_add |= VM_LOCKED;
                if (flags & MCL_ONFAULT)
                        to_add |= VM_LOCKONFAULT;
        }

        for_each_vma(vmi, vma) {
                vm_flags_t newflags;

                newflags = vma->vm_flags & ~VM_LOCKED_MASK;
                newflags |= to_add;

                /* Ignore errors */
                mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end,
                            newflags);
                cond_resched();
        }
out:
        return 0;
}

SYSCALL_DEFINE1(mlockall, int, flags)
{
        unsigned long lock_limit;
        int ret;

        if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
            flags == MCL_ONFAULT)
                return -EINVAL;

        if (!can_do_mlock())
                return -EPERM;

        lock_limit = rlimit(RLIMIT_MEMLOCK);
        lock_limit >>= PAGE_SHIFT;

        if (mmap_write_lock_killable(current->mm))
                return -EINTR;

        ret = -ENOMEM;
        if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
            capable(CAP_IPC_LOCK))
                ret = apply_mlockall_flags(flags);
        mmap_write_unlock(current->mm);
        if (!ret && (flags & MCL_CURRENT))
                mm_populate(0, TASK_SIZE);

        return ret;
}

SYSCALL_DEFINE0(munlockall)
{
        int ret;

        if (mmap_write_lock_killable(current->mm))
                return -EINTR;
        ret = apply_mlockall_flags(0);
        mmap_write_unlock(current->mm);
        return ret;
}

/*
 * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
 * shm segments) get accounted against the user_struct instead.
 */
static DEFINE_SPINLOCK(shmlock_user_lock);

int user_shm_lock(size_t size, struct ucounts *ucounts)
{
        unsigned long lock_limit, locked;
        long memlock;
        int allowed = 0;

        locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
        lock_limit = rlimit(RLIMIT_MEMLOCK);
        if (lock_limit != RLIM_INFINITY)
                lock_limit >>= PAGE_SHIFT;
        spin_lock(&shmlock_user_lock);
        memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);

        if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
                dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
                goto out;
        }
        if (!get_ucounts(ucounts)) {
                dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
                allowed = 0;
                goto out;
        }
        allowed = 1;
out:
        spin_unlock(&shmlock_user_lock);
        return allowed;
}

void user_shm_unlock(size_t size, struct ucounts *ucounts)
{
        spin_lock(&shmlock_user_lock);
        dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
        spin_unlock(&shmlock_user_lock);
        put_ucounts(ucounts);
}























































































































    4 














































































































   16 

















































































   16 




    4 



   16 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
// SPDX-License-Identifier: GPL-2.0
/*
 * media.c - Media Controller specific ALSA driver code
 *
 * Copyright (c) 2019 Shuah Khan <shuah@kernel.org>
 *
 */

/*
 * This file adds Media Controller support to the ALSA driver
 * to use the Media Controller API to share the tuner with DVB
 * and V4L2 drivers that control the media device.
 *
 * The media device is created based on the existing quirks framework.
 * Using this approach, the media controller API usage can be added for
 * a specific device.
 */

#include <linux/init.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/usb.h>

#include <sound/pcm.h>
#include <sound/core.h>

#include "usbaudio.h"
#include "card.h"
#include "mixer.h"
#include "media.h"

int snd_media_stream_init(struct snd_usb_substream *subs, struct snd_pcm *pcm,
                          int stream)
{
        struct media_device *mdev;
        struct media_ctl *mctl;
        struct device *pcm_dev = pcm->streams[stream].dev;
        u32 intf_type;
        int ret = 0;
        u16 mixer_pad;
        struct media_entity *entity;

        mdev = subs->stream->chip->media_dev;
        if (!mdev)
                return 0;

        if (subs->media_ctl)
                return 0;

        /* allocate media_ctl */
        mctl = kzalloc(sizeof(*mctl), GFP_KERNEL);
        if (!mctl)
                return -ENOMEM;

        mctl->media_dev = mdev;
        if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
                intf_type = MEDIA_INTF_T_ALSA_PCM_PLAYBACK;
                mctl->media_entity.function = MEDIA_ENT_F_AUDIO_PLAYBACK;
                mctl->media_pad.flags = MEDIA_PAD_FL_SOURCE;
                mixer_pad = 1;
        } else {
                intf_type = MEDIA_INTF_T_ALSA_PCM_CAPTURE;
                mctl->media_entity.function = MEDIA_ENT_F_AUDIO_CAPTURE;
                mctl->media_pad.flags = MEDIA_PAD_FL_SINK;
                mixer_pad = 2;
        }
        mctl->media_entity.name = pcm->name;
        media_entity_pads_init(&mctl->media_entity, 1, &mctl->media_pad);
        ret =  media_device_register_entity(mctl->media_dev,
                                            &mctl->media_entity);
        if (ret)
                goto free_mctl;

        mctl->intf_devnode = media_devnode_create(mdev, intf_type, 0,
                                                  MAJOR(pcm_dev->devt),
                                                  MINOR(pcm_dev->devt));
        if (!mctl->intf_devnode) {
                ret = -ENOMEM;
                goto unregister_entity;
        }
        mctl->intf_link = media_create_intf_link(&mctl->media_entity,
                                                 &mctl->intf_devnode->intf,
                                                 MEDIA_LNK_FL_ENABLED);
        if (!mctl->intf_link) {
                ret = -ENOMEM;
                goto devnode_remove;
        }

        /* create link between mixer and audio */
        media_device_for_each_entity(entity, mdev) {
                switch (entity->function) {
                case MEDIA_ENT_F_AUDIO_MIXER:
                        ret = media_create_pad_link(entity, mixer_pad,
                                                    &mctl->media_entity, 0,
                                                    MEDIA_LNK_FL_ENABLED);
                        if (ret)
                                goto remove_intf_link;
                        break;
                }
        }

        subs->media_ctl = mctl;
        return 0;

remove_intf_link:
        media_remove_intf_link(mctl->intf_link);
devnode_remove:
        media_devnode_remove(mctl->intf_devnode);
unregister_entity:
        media_device_unregister_entity(&mctl->media_entity);
free_mctl:
        kfree(mctl);
        return ret;
}

void snd_media_stream_delete(struct snd_usb_substream *subs)
{
        struct media_ctl *mctl = subs->media_ctl;

        if (mctl) {
                struct media_device *mdev;

                mdev = mctl->media_dev;
                if (mdev && media_devnode_is_registered(mdev->devnode)) {
                        media_devnode_remove(mctl->intf_devnode);
                        media_device_unregister_entity(&mctl->media_entity);
                        media_entity_cleanup(&mctl->media_entity);
                }
                kfree(mctl);
                subs->media_ctl = NULL;
        }
}

int snd_media_start_pipeline(struct snd_usb_substream *subs)
{
        struct media_ctl *mctl = subs->media_ctl;
        int ret = 0;

        if (!mctl)
                return 0;

        mutex_lock(&mctl->media_dev->graph_mutex);
        if (mctl->media_dev->enable_source)
                ret = mctl->media_dev->enable_source(&mctl->media_entity,
                                                     &mctl->media_pipe);
        mutex_unlock(&mctl->media_dev->graph_mutex);
        return ret;
}

void snd_media_stop_pipeline(struct snd_usb_substream *subs)
{
        struct media_ctl *mctl = subs->media_ctl;

        if (!mctl)
                return;

        mutex_lock(&mctl->media_dev->graph_mutex);
        if (mctl->media_dev->disable_source)
                mctl->media_dev->disable_source(&mctl->media_entity);
        mutex_unlock(&mctl->media_dev->graph_mutex);
}

static int snd_media_mixer_init(struct snd_usb_audio *chip)
{
        struct device *ctl_dev = chip->card->ctl_dev;
        struct media_intf_devnode *ctl_intf;
        struct usb_mixer_interface *mixer;
        struct media_device *mdev = chip->media_dev;
        struct media_mixer_ctl *mctl;
        u32 intf_type = MEDIA_INTF_T_ALSA_CONTROL;
        int ret;

        if (!mdev)
                return -ENODEV;

        ctl_intf = chip->ctl_intf_media_devnode;
        if (!ctl_intf) {
                ctl_intf = media_devnode_create(mdev, intf_type, 0,
                                                MAJOR(ctl_dev->devt),
                                                MINOR(ctl_dev->devt));
                if (!ctl_intf)
                        return -ENOMEM;
                chip->ctl_intf_media_devnode = ctl_intf;
        }

        list_for_each_entry(mixer, &chip->mixer_list, list) {

                if (mixer->media_mixer_ctl)
                        continue;

                /* allocate media_mixer_ctl */
                mctl = kzalloc(sizeof(*mctl), GFP_KERNEL);
                if (!mctl)
                        return -ENOMEM;

                mctl->media_dev = mdev;
                mctl->media_entity.function = MEDIA_ENT_F_AUDIO_MIXER;
                mctl->media_entity.name = chip->card->mixername;
                mctl->media_pad[0].flags = MEDIA_PAD_FL_SINK;
                mctl->media_pad[1].flags = MEDIA_PAD_FL_SOURCE;
                mctl->media_pad[2].flags = MEDIA_PAD_FL_SOURCE;
                media_entity_pads_init(&mctl->media_entity, MEDIA_MIXER_PAD_MAX,
                                  mctl->media_pad);
                ret =  media_device_register_entity(mctl->media_dev,
                                                    &mctl->media_entity);
                if (ret) {
                        kfree(mctl);
                        return ret;
                }

                mctl->intf_link = media_create_intf_link(&mctl->media_entity,
                                                         &ctl_intf->intf,
                                                         MEDIA_LNK_FL_ENABLED);
                if (!mctl->intf_link) {
                        media_device_unregister_entity(&mctl->media_entity);
                        media_entity_cleanup(&mctl->media_entity);
                        kfree(mctl);
                        return -ENOMEM;
                }
                mctl->intf_devnode = ctl_intf;
                mixer->media_mixer_ctl = mctl;
        }
        return 0;
}

static void snd_media_mixer_delete(struct snd_usb_audio *chip)
{
        struct usb_mixer_interface *mixer;
        struct media_device *mdev = chip->media_dev;

        if (!mdev)
                return;

        list_for_each_entry(mixer, &chip->mixer_list, list) {
                struct media_mixer_ctl *mctl;

                mctl = mixer->media_mixer_ctl;
                if (!mixer->media_mixer_ctl)
                        continue;

                if (media_devnode_is_registered(mdev->devnode)) {
                        media_device_unregister_entity(&mctl->media_entity);
                        media_entity_cleanup(&mctl->media_entity);
                }
                kfree(mctl);
                mixer->media_mixer_ctl = NULL;
        }
        if (media_devnode_is_registered(mdev->devnode))
                media_devnode_remove(chip->ctl_intf_media_devnode);
        chip->ctl_intf_media_devnode = NULL;
}

int snd_media_device_create(struct snd_usb_audio *chip,
                        struct usb_interface *iface)
{
        struct media_device *mdev;
        struct usb_device *usbdev = interface_to_usbdev(iface);
        int ret = 0;

        /* usb-audio driver is probed for each usb interface, and
         * there are multiple interfaces per device. Avoid calling
         * media_device_usb_allocate() each time usb_audio_probe()
         * is called. Do it only once.
         */
        if (chip->media_dev) {
                mdev = chip->media_dev;
                goto snd_mixer_init;
        }

        mdev = media_device_usb_allocate(usbdev, KBUILD_MODNAME, THIS_MODULE);
        if (IS_ERR(mdev))
                return -ENOMEM;

        /* save media device - avoid lookups */
        chip->media_dev = mdev;

snd_mixer_init:
        /* Create media entities for mixer and control dev */
        ret = snd_media_mixer_init(chip);
        /* media_device might be registered, print error and continue */
        if (ret)
                dev_err(&usbdev->dev,
                        "Couldn't create media mixer entities. Error: %d\n",
                        ret);

        if (!media_devnode_is_registered(mdev->devnode)) {
                /* don't register if snd_media_mixer_init() failed */
                if (ret)
                        goto create_fail;

                /* register media_device */
                ret = media_device_register(mdev);
create_fail:
                if (ret) {
                        snd_media_mixer_delete(chip);
                        media_device_delete(mdev, KBUILD_MODNAME, THIS_MODULE);
                        /* clear saved media_dev */
                        chip->media_dev = NULL;
                        dev_err(&usbdev->dev,
                                "Couldn't register media device. Error: %d\n",
                                ret);
                        return ret;
                }
        }

        return ret;
}

void snd_media_device_delete(struct snd_usb_audio *chip)
{
        struct media_device *mdev = chip->media_dev;
        struct snd_usb_stream *stream;

        /* release resources */
        list_for_each_entry(stream, &chip->pcm_list, list) {
                snd_media_stream_delete(&stream->substream[0]);
                snd_media_stream_delete(&stream->substream[1]);
        }

        snd_media_mixer_delete(chip);

        if (mdev) {
                media_device_delete(mdev, KBUILD_MODNAME, THIS_MODULE);
                chip->media_dev = NULL;
        }
}
















































































































































































































































































































































































































































































































































































































































































































































































































































    6 

    6 










    3 

    3 






    3 
    3 









    1 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/stat.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/blkdev.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
#include <linux/iversion.h>

#include <linux/uaccess.h>
#include <asm/unistd.h>

#include "internal.h"
#include "mount.h"

/**
 * generic_fillattr - Fill in the basic attributes from the inode struct
 * @idmap:                idmap of the mount the inode was found from
 * @request_mask:        statx request_mask
 * @inode:                Inode to use as the source
 * @stat:                Where to fill in the attributes
 *
 * Fill in the basic attributes in the kstat structure from data that's to be
 * found on the VFS inode structure.  This is the default if no getattr inode
 * operation is supplied.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then
 * take care to map the inode according to @idmap before filling in the
 * uid and gid filds. On non-idmapped mounts or if permission checking is to be
 * performed on the raw inode simply pass @nop_mnt_idmap.
 */
void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
                      struct inode *inode, struct kstat *stat)
{
        vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
        vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);

        stat->dev = inode->i_sb->s_dev;
        stat->ino = inode->i_ino;
        stat->mode = inode->i_mode;
        stat->nlink = inode->i_nlink;
        stat->uid = vfsuid_into_kuid(vfsuid);
        stat->gid = vfsgid_into_kgid(vfsgid);
        stat->rdev = inode->i_rdev;
        stat->size = i_size_read(inode);
        stat->atime = inode_get_atime(inode);
        stat->mtime = inode_get_mtime(inode);
        stat->ctime = inode_get_ctime(inode);
        stat->blksize = i_blocksize(inode);
        stat->blocks = inode->i_blocks;

        if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
                stat->result_mask |= STATX_CHANGE_COOKIE;
                stat->change_cookie = inode_query_iversion(inode);
        }

}
EXPORT_SYMBOL(generic_fillattr);

/**
 * generic_fill_statx_attr - Fill in the statx attributes from the inode flags
 * @inode:        Inode to use as the source
 * @stat:        Where to fill in the attribute flags
 *
 * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the
 * inode that are published on i_flags and enforced by the VFS.
 */
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat)
{
        if (inode->i_flags & S_IMMUTABLE)
                stat->attributes |= STATX_ATTR_IMMUTABLE;
        if (inode->i_flags & S_APPEND)
                stat->attributes |= STATX_ATTR_APPEND;
        stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS;
}
EXPORT_SYMBOL(generic_fill_statx_attr);

/**
 * vfs_getattr_nosec - getattr without security checks
 * @path: file to get attributes from
 * @stat: structure to return attributes in
 * @request_mask: STATX_xxx flags indicating what the caller wants
 * @query_flags: Query mode (AT_STATX_SYNC_TYPE)
 *
 * Get attributes without calling security_inode_getattr.
 *
 * Currently the only caller other than vfs_getattr is internal to the
 * filehandle lookup code, which uses only the inode number and returns no
 * attributes to any user.  Any other code probably wants vfs_getattr.
 */
int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
                      u32 request_mask, unsigned int query_flags)
{
        struct mnt_idmap *idmap;
        struct inode *inode = d_backing_inode(path->dentry);

        memset(stat, 0, sizeof(*stat));
        stat->result_mask |= STATX_BASIC_STATS;
        query_flags &= AT_STATX_SYNC_TYPE;

        /* allow the fs to override these if it really wants to */
        /* SB_NOATIME means filesystem supplies dummy atime value */
        if (inode->i_sb->s_flags & SB_NOATIME)
                stat->result_mask &= ~STATX_ATIME;

        /*
         * Note: If you add another clause to set an attribute flag, please
         * update attributes_mask below.
         */
        if (IS_AUTOMOUNT(inode))
                stat->attributes |= STATX_ATTR_AUTOMOUNT;

        if (IS_DAX(inode))
                stat->attributes |= STATX_ATTR_DAX;

        stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
                                  STATX_ATTR_DAX);

        idmap = mnt_idmap(path->mnt);
        if (inode->i_op->getattr)
                return inode->i_op->getattr(idmap, path, stat,
                                            request_mask,
                                            query_flags | AT_GETATTR_NOSEC);

        generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
}
EXPORT_SYMBOL(vfs_getattr_nosec);

/*
 * vfs_getattr - Get the enhanced basic attributes of a file
 * @path: The file of interest
 * @stat: Where to return the statistics
 * @request_mask: STATX_xxx flags indicating what the caller wants
 * @query_flags: Query mode (AT_STATX_SYNC_TYPE)
 *
 * Ask the filesystem for a file's attributes.  The caller must indicate in
 * request_mask and query_flags to indicate what they want.
 *
 * If the file is remote, the filesystem can be forced to update the attributes
 * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can
 * suppress the update by passing AT_STATX_DONT_SYNC.
 *
 * Bits must have been set in request_mask to indicate which attributes the
 * caller wants retrieving.  Any such attribute not requested may be returned
 * anyway, but the value may be approximate, and, if remote, may not have been
 * synchronised with the server.
 *
 * 0 will be returned on success, and a -ve error code if unsuccessful.
 */
int vfs_getattr(const struct path *path, struct kstat *stat,
                u32 request_mask, unsigned int query_flags)
{
        int retval;

        if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC))
                return -EPERM;

        retval = security_inode_getattr(path);
        if (retval)
                return retval;
        return vfs_getattr_nosec(path, stat, request_mask, query_flags);
}
EXPORT_SYMBOL(vfs_getattr);

/**
 * vfs_fstat - Get the basic attributes by file descriptor
 * @fd: The file descriptor referring to the file of interest
 * @stat: The result structure to fill in.
 *
 * This function is a wrapper around vfs_getattr().  The main difference is
 * that it uses a file descriptor to determine the file location.
 *
 * 0 will be returned on success, and a -ve error code if unsuccessful.
 */
int vfs_fstat(int fd, struct kstat *stat)
{
        struct fd f;
        int error;

        f = fdget_raw(fd);
        if (!f.file)
                return -EBADF;
        error = vfs_getattr(&f.file->f_path, stat, STATX_BASIC_STATS, 0);
        fdput(f);
        return error;
}

int getname_statx_lookup_flags(int flags)
{
        int lookup_flags = 0;

        if (!(flags & AT_SYMLINK_NOFOLLOW))
                lookup_flags |= LOOKUP_FOLLOW;
        if (!(flags & AT_NO_AUTOMOUNT))
                lookup_flags |= LOOKUP_AUTOMOUNT;
        if (flags & AT_EMPTY_PATH)
                lookup_flags |= LOOKUP_EMPTY;

        return lookup_flags;
}

/**
 * vfs_statx - Get basic and extra attributes by filename
 * @dfd: A file descriptor representing the base dir for a relative filename
 * @filename: The name of the file of interest
 * @flags: Flags to control the query
 * @stat: The result structure to fill in.
 * @request_mask: STATX_xxx flags indicating what the caller wants
 *
 * This function is a wrapper around vfs_getattr().  The main difference is
 * that it uses a filename and base directory to determine the file location.
 * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
 * at the given name from being referenced.
 *
 * 0 will be returned on success, and a -ve error code if unsuccessful.
 */
static int vfs_statx(int dfd, struct filename *filename, int flags,
              struct kstat *stat, u32 request_mask)
{
        struct path path;
        unsigned int lookup_flags = getname_statx_lookup_flags(flags);
        int error;

        if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH |
                      AT_STATX_SYNC_TYPE))
                return -EINVAL;

retry:
        error = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
        if (error)
                goto out;

        error = vfs_getattr(&path, stat, request_mask, flags);

        if (request_mask & STATX_MNT_ID_UNIQUE) {
                stat->mnt_id = real_mount(path.mnt)->mnt_id_unique;
                stat->result_mask |= STATX_MNT_ID_UNIQUE;
        } else {
                stat->mnt_id = real_mount(path.mnt)->mnt_id;
                stat->result_mask |= STATX_MNT_ID;
        }

        if (path.mnt->mnt_root == path.dentry)
                stat->attributes |= STATX_ATTR_MOUNT_ROOT;
        stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;

        /* Handle STATX_DIOALIGN for block devices. */
        if (request_mask & STATX_DIOALIGN) {
                struct inode *inode = d_backing_inode(path.dentry);

                if (S_ISBLK(inode->i_mode))
                        bdev_statx_dioalign(inode, stat);
        }

        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
out:
        return error;
}

int vfs_fstatat(int dfd, const char __user *filename,
                              struct kstat *stat, int flags)
{
        int ret;
        int statx_flags = flags | AT_NO_AUTOMOUNT;
        struct filename *name;

        /*
         * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH)
         *
         * If AT_EMPTY_PATH is set, we expect the common case to be that
         * empty path, and avoid doing all the extra pathname work.
         */
        if (dfd >= 0 && flags == AT_EMPTY_PATH) {
                char c;

                ret = get_user(c, filename);
                if (unlikely(ret))
                        return ret;

                if (likely(!c))
                        return vfs_fstat(dfd, stat);
        }

        name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
        ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
        putname(name);

        return ret;
}

#ifdef __ARCH_WANT_OLD_STAT

/*
 * For backward compatibility?  Maybe this should be moved
 * into arch/i386 instead?
 */
static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * statbuf)
{
        static int warncount = 5;
        struct __old_kernel_stat tmp;

        if (warncount > 0) {
                warncount--;
                printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n",
                        current->comm);
        } else if (warncount < 0) {
                /* it's laughable, but... */
                warncount = 0;
        }

        memset(&tmp, 0, sizeof(struct __old_kernel_stat));
        tmp.st_dev = old_encode_dev(stat->dev);
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
        tmp.st_mode = stat->mode;
        tmp.st_nlink = stat->nlink;
        if (tmp.st_nlink != stat->nlink)
                return -EOVERFLOW;
        SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
        SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
        tmp.st_rdev = old_encode_dev(stat->rdev);
#if BITS_PER_LONG == 32
        if (stat->size > MAX_NON_LFS)
                return -EOVERFLOW;
#endif
        tmp.st_size = stat->size;
        tmp.st_atime = stat->atime.tv_sec;
        tmp.st_mtime = stat->mtime.tv_sec;
        tmp.st_ctime = stat->ctime.tv_sec;
        return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}

SYSCALL_DEFINE2(stat, const char __user *, filename,
                struct __old_kernel_stat __user *, statbuf)
{
        struct kstat stat;
        int error;

        error = vfs_stat(filename, &stat);
        if (error)
                return error;

        return cp_old_stat(&stat, statbuf);
}

SYSCALL_DEFINE2(lstat, const char __user *, filename,
                struct __old_kernel_stat __user *, statbuf)
{
        struct kstat stat;
        int error;

        error = vfs_lstat(filename, &stat);
        if (error)
                return error;

        return cp_old_stat(&stat, statbuf);
}

SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_fstat(fd, &stat);

        if (!error)
                error = cp_old_stat(&stat, statbuf);

        return error;
}

#endif /* __ARCH_WANT_OLD_STAT */

#ifdef __ARCH_WANT_NEW_STAT

#ifndef INIT_STRUCT_STAT_PADDING
#  define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
#endif

static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
{
        struct stat tmp;

        if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
                return -EOVERFLOW;
        if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
                return -EOVERFLOW;
#if BITS_PER_LONG == 32
        if (stat->size > MAX_NON_LFS)
                return -EOVERFLOW;
#endif

        INIT_STRUCT_STAT_PADDING(tmp);
        tmp.st_dev = new_encode_dev(stat->dev);
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
        tmp.st_mode = stat->mode;
        tmp.st_nlink = stat->nlink;
        if (tmp.st_nlink != stat->nlink)
                return -EOVERFLOW;
        SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
        SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
        tmp.st_rdev = new_encode_dev(stat->rdev);
        tmp.st_size = stat->size;
        tmp.st_atime = stat->atime.tv_sec;
        tmp.st_mtime = stat->mtime.tv_sec;
        tmp.st_ctime = stat->ctime.tv_sec;
#ifdef STAT_HAVE_NSEC
        tmp.st_atime_nsec = stat->atime.tv_nsec;
        tmp.st_mtime_nsec = stat->mtime.tv_nsec;
        tmp.st_ctime_nsec = stat->ctime.tv_nsec;
#endif
        tmp.st_blocks = stat->blocks;
        tmp.st_blksize = stat->blksize;
        return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}

SYSCALL_DEFINE2(newstat, const char __user *, filename,
                struct stat __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_stat(filename, &stat);

        if (error)
                return error;
        return cp_new_stat(&stat, statbuf);
}

SYSCALL_DEFINE2(newlstat, const char __user *, filename,
                struct stat __user *, statbuf)
{
        struct kstat stat;
        int error;

        error = vfs_lstat(filename, &stat);
        if (error)
                return error;

        return cp_new_stat(&stat, statbuf);
}

#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
                struct stat __user *, statbuf, int, flag)
{
        struct kstat stat;
        int error;

        error = vfs_fstatat(dfd, filename, &stat, flag);
        if (error)
                return error;
        return cp_new_stat(&stat, statbuf);
}
#endif

SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_fstat(fd, &stat);

        if (!error)
                error = cp_new_stat(&stat, statbuf);

        return error;
}
#endif

static int do_readlinkat(int dfd, const char __user *pathname,
                         char __user *buf, int bufsiz)
{
        struct path path;
        int error;
        int empty = 0;
        unsigned int lookup_flags = LOOKUP_EMPTY;

        if (bufsiz <= 0)
                return -EINVAL;

retry:
        error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
        if (!error) {
                struct inode *inode = d_backing_inode(path.dentry);

                error = empty ? -ENOENT : -EINVAL;
                /*
                 * AFS mountpoints allow readlink(2) but are not symlinks
                 */
                if (d_is_symlink(path.dentry) || inode->i_op->readlink) {
                        error = security_inode_readlink(path.dentry);
                        if (!error) {
                                touch_atime(&path);
                                error = vfs_readlink(path.dentry, buf, bufsiz);
                        }
                }
                path_put(&path);
                if (retry_estale(error, lookup_flags)) {
                        lookup_flags |= LOOKUP_REVAL;
                        goto retry;
                }
        }
        return error;
}

SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
                char __user *, buf, int, bufsiz)
{
        return do_readlinkat(dfd, pathname, buf, bufsiz);
}

SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
                int, bufsiz)
{
        return do_readlinkat(AT_FDCWD, path, buf, bufsiz);
}


/* ---------- LFS-64 ----------- */
#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)

#ifndef INIT_STRUCT_STAT64_PADDING
#  define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
#endif

static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
{
        struct stat64 tmp;

        INIT_STRUCT_STAT64_PADDING(tmp);
#ifdef CONFIG_MIPS
        /* mips has weird padding, so we don't get 64 bits there */
        tmp.st_dev = new_encode_dev(stat->dev);
        tmp.st_rdev = new_encode_dev(stat->rdev);
#else
        tmp.st_dev = huge_encode_dev(stat->dev);
        tmp.st_rdev = huge_encode_dev(stat->rdev);
#endif
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
#ifdef STAT64_HAS_BROKEN_ST_INO
        tmp.__st_ino = stat->ino;
#endif
        tmp.st_mode = stat->mode;
        tmp.st_nlink = stat->nlink;
        tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
        tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
        tmp.st_atime = stat->atime.tv_sec;
        tmp.st_atime_nsec = stat->atime.tv_nsec;
        tmp.st_mtime = stat->mtime.tv_sec;
        tmp.st_mtime_nsec = stat->mtime.tv_nsec;
        tmp.st_ctime = stat->ctime.tv_sec;
        tmp.st_ctime_nsec = stat->ctime.tv_nsec;
        tmp.st_size = stat->size;
        tmp.st_blocks = stat->blocks;
        tmp.st_blksize = stat->blksize;
        return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}

SYSCALL_DEFINE2(stat64, const char __user *, filename,
                struct stat64 __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_stat(filename, &stat);

        if (!error)
                error = cp_new_stat64(&stat, statbuf);

        return error;
}

SYSCALL_DEFINE2(lstat64, const char __user *, filename,
                struct stat64 __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_lstat(filename, &stat);

        if (!error)
                error = cp_new_stat64(&stat, statbuf);

        return error;
}

SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_fstat(fd, &stat);

        if (!error)
                error = cp_new_stat64(&stat, statbuf);

        return error;
}

SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
                struct stat64 __user *, statbuf, int, flag)
{
        struct kstat stat;
        int error;

        error = vfs_fstatat(dfd, filename, &stat, flag);
        if (error)
                return error;
        return cp_new_stat64(&stat, statbuf);
}
#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */

static noinline_for_stack int
cp_statx(const struct kstat *stat, struct statx __user *buffer)
{
        struct statx tmp;

        memset(&tmp, 0, sizeof(tmp));

        /* STATX_CHANGE_COOKIE is kernel-only for now */
        tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
        tmp.stx_blksize = stat->blksize;
        /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
        tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
        tmp.stx_nlink = stat->nlink;
        tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
        tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
        tmp.stx_mode = stat->mode;
        tmp.stx_ino = stat->ino;
        tmp.stx_size = stat->size;
        tmp.stx_blocks = stat->blocks;
        tmp.stx_attributes_mask = stat->attributes_mask;
        tmp.stx_atime.tv_sec = stat->atime.tv_sec;
        tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
        tmp.stx_btime.tv_sec = stat->btime.tv_sec;
        tmp.stx_btime.tv_nsec = stat->btime.tv_nsec;
        tmp.stx_ctime.tv_sec = stat->ctime.tv_sec;
        tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
        tmp.stx_mtime.tv_sec = stat->mtime.tv_sec;
        tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
        tmp.stx_rdev_major = MAJOR(stat->rdev);
        tmp.stx_rdev_minor = MINOR(stat->rdev);
        tmp.stx_dev_major = MAJOR(stat->dev);
        tmp.stx_dev_minor = MINOR(stat->dev);
        tmp.stx_mnt_id = stat->mnt_id;
        tmp.stx_dio_mem_align = stat->dio_mem_align;
        tmp.stx_dio_offset_align = stat->dio_offset_align;

        return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}

int do_statx(int dfd, struct filename *filename, unsigned int flags,
             unsigned int mask, struct statx __user *buffer)
{
        struct kstat stat;
        int error;

        if (mask & STATX__RESERVED)
                return -EINVAL;
        if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
                return -EINVAL;

        /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
         * from userland.
         */
        mask &= ~STATX_CHANGE_COOKIE;

        error = vfs_statx(dfd, filename, flags, &stat, mask);
        if (error)
                return error;

        return cp_statx(&stat, buffer);
}

/**
 * sys_statx - System call to get enhanced stats
 * @dfd: Base directory to pathwalk from *or* fd to stat.
 * @filename: File to stat or "" with AT_EMPTY_PATH
 * @flags: AT_* flags to control pathwalk.
 * @mask: Parts of statx struct actually required.
 * @buffer: Result buffer.
 *
 * Note that fstat() can be emulated by setting dfd to the fd of interest,
 * supplying "" as the filename and setting AT_EMPTY_PATH in the flags.
 */
SYSCALL_DEFINE5(statx,
                int, dfd, const char __user *, filename, unsigned, flags,
                unsigned int, mask,
                struct statx __user *, buffer)
{
        int ret;
        struct filename *name;

        name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL);
        ret = do_statx(dfd, name, flags, mask, buffer);
        putname(name);

        return ret;
}

#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_STAT)
static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
{
        struct compat_stat tmp;

        if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev))
                return -EOVERFLOW;
        if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev))
                return -EOVERFLOW;

        memset(&tmp, 0, sizeof(tmp));
        tmp.st_dev = new_encode_dev(stat->dev);
        tmp.st_ino = stat->ino;
        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
        tmp.st_mode = stat->mode;
        tmp.st_nlink = stat->nlink;
        if (tmp.st_nlink != stat->nlink)
                return -EOVERFLOW;
        SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
        SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
        tmp.st_rdev = new_encode_dev(stat->rdev);
        if ((u64) stat->size > MAX_NON_LFS)
                return -EOVERFLOW;
        tmp.st_size = stat->size;
        tmp.st_atime = stat->atime.tv_sec;
        tmp.st_atime_nsec = stat->atime.tv_nsec;
        tmp.st_mtime = stat->mtime.tv_sec;
        tmp.st_mtime_nsec = stat->mtime.tv_nsec;
        tmp.st_ctime = stat->ctime.tv_sec;
        tmp.st_ctime_nsec = stat->ctime.tv_nsec;
        tmp.st_blocks = stat->blocks;
        tmp.st_blksize = stat->blksize;
        return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}

COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename,
                       struct compat_stat __user *, statbuf)
{
        struct kstat stat;
        int error;

        error = vfs_stat(filename, &stat);
        if (error)
                return error;
        return cp_compat_stat(&stat, statbuf);
}

COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename,
                       struct compat_stat __user *, statbuf)
{
        struct kstat stat;
        int error;

        error = vfs_lstat(filename, &stat);
        if (error)
                return error;
        return cp_compat_stat(&stat, statbuf);
}

#ifndef __ARCH_WANT_STAT64
COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd,
                       const char __user *, filename,
                       struct compat_stat __user *, statbuf, int, flag)
{
        struct kstat stat;
        int error;

        error = vfs_fstatat(dfd, filename, &stat, flag);
        if (error)
                return error;
        return cp_compat_stat(&stat, statbuf);
}
#endif

COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd,
                       struct compat_stat __user *, statbuf)
{
        struct kstat stat;
        int error = vfs_fstat(fd, &stat);

        if (!error)
                error = cp_compat_stat(&stat, statbuf);
        return error;
}
#endif

/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
void __inode_add_bytes(struct inode *inode, loff_t bytes)
{
        inode->i_blocks += bytes >> 9;
        bytes &= 511;
        inode->i_bytes += bytes;
        if (inode->i_bytes >= 512) {
                inode->i_blocks++;
                inode->i_bytes -= 512;
        }
}
EXPORT_SYMBOL(__inode_add_bytes);

void inode_add_bytes(struct inode *inode, loff_t bytes)
{
        spin_lock(&inode->i_lock);
        __inode_add_bytes(inode, bytes);
        spin_unlock(&inode->i_lock);
}

EXPORT_SYMBOL(inode_add_bytes);

void __inode_sub_bytes(struct inode *inode, loff_t bytes)
{
        inode->i_blocks -= bytes >> 9;
        bytes &= 511;
        if (inode->i_bytes < bytes) {
                inode->i_blocks--;
                inode->i_bytes += 512;
        }
        inode->i_bytes -= bytes;
}

EXPORT_SYMBOL(__inode_sub_bytes);

void inode_sub_bytes(struct inode *inode, loff_t bytes)
{
        spin_lock(&inode->i_lock);
        __inode_sub_bytes(inode, bytes);
        spin_unlock(&inode->i_lock);
}

EXPORT_SYMBOL(inode_sub_bytes);

loff_t inode_get_bytes(struct inode *inode)
{
        loff_t ret;

        spin_lock(&inode->i_lock);
        ret = __inode_get_bytes(inode);
        spin_unlock(&inode->i_lock);
        return ret;
}

EXPORT_SYMBOL(inode_get_bytes);

void inode_set_bytes(struct inode *inode, loff_t bytes)
{
        /* Caller is here responsible for sufficient locking
         * (ie. inode->i_lock) */
        inode->i_blocks = bytes >> 9;
        inode->i_bytes = bytes & 511;
}

EXPORT_SYMBOL(inode_set_bytes);







































  111 
    1 




  108 






    1 













    1 














































  110 









  109 



  110 










  111 





  110 













  110 

















  108 












  109 


  110 

  111 

  111 
















  112 

  118 



  119 



  110 


  111 
























  111 


  111 
  112 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
// SPDX-License-Identifier: GPL-2.0-only
/*
 * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
 * which are designed to protect kernel memory from needless exposure
 * and overwrite under many unintended conditions. This code is based
 * on PAX_USERCOPY, which is:
 *
 * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
 * Security Inc.
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/kstrtox.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
#include <linux/vmalloc.h>
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
#include "slab.h"

/*
 * Checks if a given pointer and length is contained by the current
 * stack frame (if possible).
 *
 * Returns:
 *        NOT_STACK: not at all on the stack
 *        GOOD_FRAME: fully within a valid stack frame
 *        GOOD_STACK: within the current stack (when can't frame-check exactly)
 *        BAD_STACK: error condition (invalid stack position or bad stack frame)
 */
static noinline int check_stack_object(const void *obj, unsigned long len)
{
        const void * const stack = task_stack_page(current);
        const void * const stackend = stack + THREAD_SIZE;
        int ret;

        /* Object is not on the stack at all. */
        if (obj + len <= stack || stackend <= obj)
                return NOT_STACK;

        /*
         * Reject: object partially overlaps the stack (passing the
         * check above means at least one end is within the stack,
         * so if this check fails, the other end is outside the stack).
         */
        if (obj < stack || stackend < obj + len)
                return BAD_STACK;

        /* Check if object is safely within a valid frame. */
        ret = arch_within_stack_frames(stack, stackend, obj, len);
        if (ret)
                return ret;

        /* Finally, check stack depth if possible. */
#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
        if (IS_ENABLED(CONFIG_STACK_GROWSUP)) {
                if ((void *)current_stack_pointer < obj + len)
                        return BAD_STACK;
        } else {
                if (obj < (void *)current_stack_pointer)
                        return BAD_STACK;
        }
#endif

        return GOOD_STACK;
}

/*
 * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found
 * an unexpected state during a copy_from_user() or copy_to_user() call.
 * There are several checks being performed on the buffer by the
 * __check_object_size() function. Normal stack buffer usage should never
 * trip the checks, and kernel text addressing will always trip the check.
 * For cache objects, it is checking that only the whitelisted range of
 * bytes for a given cache is being accessed (via the cache's usersize and
 * useroffset fields). To adjust a cache whitelist, use the usercopy-aware
 * kmem_cache_create_usercopy() function to create the cache (and
 * carefully audit the whitelist range).
 */
void __noreturn usercopy_abort(const char *name, const char *detail,
                               bool to_user, unsigned long offset,
                               unsigned long len)
{
        pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
                 to_user ? "exposure" : "overwrite",
                 to_user ? "from" : "to",
                 name ? : "unknown?!",
                 detail ? " '" : "", detail ? : "", detail ? "'" : "",
                 offset, len);

        /*
         * For greater effect, it would be nice to do do_group_exit(),
         * but BUG() actually hooks all the lock-breaking and per-arch
         * Oops code, so that is used here instead.
         */
        BUG();
}

/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
static bool overlaps(const unsigned long ptr, unsigned long n,
                     unsigned long low, unsigned long high)
{
        const unsigned long check_low = ptr;
        unsigned long check_high = check_low + n;

        /* Does not overlap if entirely above or entirely below. */
        if (check_low >= high || check_high <= low)
                return false;

        return true;
}

/* Is this address range in the kernel text area? */
static inline void check_kernel_text_object(const unsigned long ptr,
                                            unsigned long n, bool to_user)
{
        unsigned long textlow = (unsigned long)_stext;
        unsigned long texthigh = (unsigned long)_etext;
        unsigned long textlow_linear, texthigh_linear;

        if (overlaps(ptr, n, textlow, texthigh))
                usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n);

        /*
         * Some architectures have virtual memory mappings with a secondary
         * mapping of the kernel text, i.e. there is more than one virtual
         * kernel address that points to the kernel image. It is usually
         * when there is a separate linear physical memory mapping, in that
         * __pa() is not just the reverse of __va(). This can be detected
         * and checked:
         */
        textlow_linear = (unsigned long)lm_alias(textlow);
        /* No different mapping: we're done. */
        if (textlow_linear == textlow)
                return;

        /* Check the secondary mapping... */
        texthigh_linear = (unsigned long)lm_alias(texthigh);
        if (overlaps(ptr, n, textlow_linear, texthigh_linear))
                usercopy_abort("linear kernel text", NULL, to_user,
                               ptr - textlow_linear, n);
}

static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
                                       bool to_user)
{
        /* Reject if object wraps past end of memory. */
        if (ptr + (n - 1) < ptr)
                usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n);

        /* Reject if NULL or ZERO-allocation. */
        if (ZERO_OR_NULL_PTR(ptr))
                usercopy_abort("null address", NULL, to_user, ptr, n);
}

static inline void check_heap_object(const void *ptr, unsigned long n,
                                     bool to_user)
{
        unsigned long addr = (unsigned long)ptr;
        unsigned long offset;
        struct folio *folio;

        if (is_kmap_addr(ptr)) {
                offset = offset_in_page(ptr);
                if (n > PAGE_SIZE - offset)
                        usercopy_abort("kmap", NULL, to_user, offset, n);
                return;
        }

        if (is_vmalloc_addr(ptr) && !pagefault_disabled()) {
                struct vmap_area *area = find_vmap_area(addr);

                if (!area)
                        usercopy_abort("vmalloc", "no area", to_user, 0, n);

                if (n > area->va_end - addr) {
                        offset = addr - area->va_start;
                        usercopy_abort("vmalloc", NULL, to_user, offset, n);
                }
                return;
        }

        if (!virt_addr_valid(ptr))
                return;

        folio = virt_to_folio(ptr);

        if (folio_test_slab(folio)) {
                /* Check slab allocator for flags and size. */
                __check_heap_object(ptr, n, folio_slab(folio), to_user);
        } else if (folio_test_large(folio)) {
                offset = ptr - folio_address(folio);
                if (n > folio_size(folio) - offset)
                        usercopy_abort("page alloc", NULL, to_user, offset, n);
        }
}

static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks);

/*
 * Validates that the given object is:
 * - not bogus address
 * - fully contained by stack (or stack frame, when available)
 * - fully within SLAB object (or object whitelist area, when available)
 * - not in kernel text
 */
void __check_object_size(const void *ptr, unsigned long n, bool to_user)
{
        if (static_branch_unlikely(&bypass_usercopy_checks))
                return;

        /* Skip all tests if size is zero. */
        if (!n)
                return;

        /* Check for invalid addresses. */
        check_bogus_address((const unsigned long)ptr, n, to_user);

        /* Check for bad stack object. */
        switch (check_stack_object(ptr, n)) {
        case NOT_STACK:
                /* Object is not touching the current process stack. */
                break;
        case GOOD_FRAME:
        case GOOD_STACK:
                /*
                 * Object is either in the correct frame (when it
                 * is possible to check) or just generally on the
                 * process stack (when frame checking not available).
                 */
                return;
        default:
                usercopy_abort("process stack", NULL, to_user,
#ifdef CONFIG_ARCH_HAS_CURRENT_STACK_POINTER
                        IS_ENABLED(CONFIG_STACK_GROWSUP) ?
                                ptr - (void *)current_stack_pointer :
                                (void *)current_stack_pointer - ptr,
#else
                        0,
#endif
                        n);
        }

        /* Check for bad heap object. */
        check_heap_object(ptr, n, to_user);

        /* Check for object in kernel to avoid text exposure. */
        check_kernel_text_object((const unsigned long)ptr, n, to_user);
}
EXPORT_SYMBOL(__check_object_size);

static bool enable_checks __initdata = true;

static int __init parse_hardened_usercopy(char *str)
{
        if (kstrtobool(str, &enable_checks))
                pr_warn("Invalid option string for hardened_usercopy: '%s'\n",
                        str);
        return 1;
}

__setup("hardened_usercopy=", parse_hardened_usercopy);

static int __init set_hardened_usercopy(void)
{
        if (enable_checks == false)
                static_branch_enable(&bypass_usercopy_checks);
        return 1;
}

late_initcall(set_hardened_usercopy);
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 
    2 
    2 
























































































































































































































    2 




    2 






















































































    2 






































































































































































































































































































































































    2 




















    2 


















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
// SPDX-License-Identifier: GPL-2.0-only
/*
 * The industrial I/O core
 *
 * Copyright (c) 2008 Jonathan Cameron
 *
 * Based on elements of hwmon and input subsystems.
 */

#define pr_fmt(fmt) "iio-core: " fmt

#include <linux/anon_inodes.h>
#include <linux/cdev.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/idr.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/property.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/wait.h>

#include <linux/iio/buffer.h>
#include <linux/iio/buffer_impl.h>
#include <linux/iio/events.h>
#include <linux/iio/iio-opaque.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>

#include "iio_core.h"
#include "iio_core_trigger.h"

/* IDA to assign each registered device a unique id */
static DEFINE_IDA(iio_ida);

static dev_t iio_devt;

#define IIO_DEV_MAX 256
const struct bus_type iio_bus_type = {
        .name = "iio",
};
EXPORT_SYMBOL(iio_bus_type);

static struct dentry *iio_debugfs_dentry;

static const char * const iio_direction[] = {
        [0] = "in",
        [1] = "out",
};

static const char * const iio_chan_type_name_spec[] = {
        [IIO_VOLTAGE] = "voltage",
        [IIO_CURRENT] = "current",
        [IIO_POWER] = "power",
        [IIO_ACCEL] = "accel",
        [IIO_ANGL_VEL] = "anglvel",
        [IIO_MAGN] = "magn",
        [IIO_LIGHT] = "illuminance",
        [IIO_INTENSITY] = "intensity",
        [IIO_PROXIMITY] = "proximity",
        [IIO_TEMP] = "temp",
        [IIO_INCLI] = "incli",
        [IIO_ROT] = "rot",
        [IIO_ANGL] = "angl",
        [IIO_TIMESTAMP] = "timestamp",
        [IIO_CAPACITANCE] = "capacitance",
        [IIO_ALTVOLTAGE] = "altvoltage",
        [IIO_CCT] = "cct",
        [IIO_PRESSURE] = "pressure",
        [IIO_HUMIDITYRELATIVE] = "humidityrelative",
        [IIO_ACTIVITY] = "activity",
        [IIO_STEPS] = "steps",
        [IIO_ENERGY] = "energy",
        [IIO_DISTANCE] = "distance",
        [IIO_VELOCITY] = "velocity",
        [IIO_CONCENTRATION] = "concentration",
        [IIO_RESISTANCE] = "resistance",
        [IIO_PH] = "ph",
        [IIO_UVINDEX] = "uvindex",
        [IIO_ELECTRICALCONDUCTIVITY] = "electricalconductivity",
        [IIO_COUNT] = "count",
        [IIO_INDEX] = "index",
        [IIO_GRAVITY]  = "gravity",
        [IIO_POSITIONRELATIVE]  = "positionrelative",
        [IIO_PHASE] = "phase",
        [IIO_MASSCONCENTRATION] = "massconcentration",
        [IIO_DELTA_ANGL] = "deltaangl",
        [IIO_DELTA_VELOCITY] = "deltavelocity",
        [IIO_COLORTEMP] = "colortemp",
        [IIO_CHROMATICITY] = "chromaticity",
};

static const char * const iio_modifier_names[] = {
        [IIO_MOD_X] = "x",
        [IIO_MOD_Y] = "y",
        [IIO_MOD_Z] = "z",
        [IIO_MOD_X_AND_Y] = "x&y",
        [IIO_MOD_X_AND_Z] = "x&z",
        [IIO_MOD_Y_AND_Z] = "y&z",
        [IIO_MOD_X_AND_Y_AND_Z] = "x&y&z",
        [IIO_MOD_X_OR_Y] = "x|y",
        [IIO_MOD_X_OR_Z] = "x|z",
        [IIO_MOD_Y_OR_Z] = "y|z",
        [IIO_MOD_X_OR_Y_OR_Z] = "x|y|z",
        [IIO_MOD_ROOT_SUM_SQUARED_X_Y] = "sqrt(x^2+y^2)",
        [IIO_MOD_SUM_SQUARED_X_Y_Z] = "x^2+y^2+z^2",
        [IIO_MOD_LIGHT_BOTH] = "both",
        [IIO_MOD_LIGHT_IR] = "ir",
        [IIO_MOD_LIGHT_CLEAR] = "clear",
        [IIO_MOD_LIGHT_RED] = "red",
        [IIO_MOD_LIGHT_GREEN] = "green",
        [IIO_MOD_LIGHT_BLUE] = "blue",
        [IIO_MOD_LIGHT_UV] = "uv",
        [IIO_MOD_LIGHT_UVA] = "uva",
        [IIO_MOD_LIGHT_UVB] = "uvb",
        [IIO_MOD_LIGHT_DUV] = "duv",
        [IIO_MOD_QUATERNION] = "quaternion",
        [IIO_MOD_TEMP_AMBIENT] = "ambient",
        [IIO_MOD_TEMP_OBJECT] = "object",
        [IIO_MOD_NORTH_MAGN] = "from_north_magnetic",
        [IIO_MOD_NORTH_TRUE] = "from_north_true",
        [IIO_MOD_NORTH_MAGN_TILT_COMP] = "from_north_magnetic_tilt_comp",
        [IIO_MOD_NORTH_TRUE_TILT_COMP] = "from_north_true_tilt_comp",
        [IIO_MOD_RUNNING] = "running",
        [IIO_MOD_JOGGING] = "jogging",
        [IIO_MOD_WALKING] = "walking",
        [IIO_MOD_STILL] = "still",
        [IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)",
        [IIO_MOD_I] = "i",
        [IIO_MOD_Q] = "q",
        [IIO_MOD_CO2] = "co2",
        [IIO_MOD_VOC] = "voc",
        [IIO_MOD_PM1] = "pm1",
        [IIO_MOD_PM2P5] = "pm2p5",
        [IIO_MOD_PM4] = "pm4",
        [IIO_MOD_PM10] = "pm10",
        [IIO_MOD_ETHANOL] = "ethanol",
        [IIO_MOD_H2] = "h2",
        [IIO_MOD_O2] = "o2",
        [IIO_MOD_LINEAR_X] = "linear_x",
        [IIO_MOD_LINEAR_Y] = "linear_y",
        [IIO_MOD_LINEAR_Z] = "linear_z",
        [IIO_MOD_PITCH] = "pitch",
        [IIO_MOD_YAW] = "yaw",
        [IIO_MOD_ROLL] = "roll",
};

/* relies on pairs of these shared then separate */
static const char * const iio_chan_info_postfix[] = {
        [IIO_CHAN_INFO_RAW] = "raw",
        [IIO_CHAN_INFO_PROCESSED] = "input",
        [IIO_CHAN_INFO_SCALE] = "scale",
        [IIO_CHAN_INFO_OFFSET] = "offset",
        [IIO_CHAN_INFO_CALIBSCALE] = "calibscale",
        [IIO_CHAN_INFO_CALIBBIAS] = "calibbias",
        [IIO_CHAN_INFO_PEAK] = "peak_raw",
        [IIO_CHAN_INFO_PEAK_SCALE] = "peak_scale",
        [IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW] = "quadrature_correction_raw",
        [IIO_CHAN_INFO_AVERAGE_RAW] = "mean_raw",
        [IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY]
        = "filter_low_pass_3db_frequency",
        [IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY]
        = "filter_high_pass_3db_frequency",
        [IIO_CHAN_INFO_SAMP_FREQ] = "sampling_frequency",
        [IIO_CHAN_INFO_FREQUENCY] = "frequency",
        [IIO_CHAN_INFO_PHASE] = "phase",
        [IIO_CHAN_INFO_HARDWAREGAIN] = "hardwaregain",
        [IIO_CHAN_INFO_HYSTERESIS] = "hysteresis",
        [IIO_CHAN_INFO_HYSTERESIS_RELATIVE] = "hysteresis_relative",
        [IIO_CHAN_INFO_INT_TIME] = "integration_time",
        [IIO_CHAN_INFO_ENABLE] = "en",
        [IIO_CHAN_INFO_CALIBHEIGHT] = "calibheight",
        [IIO_CHAN_INFO_CALIBWEIGHT] = "calibweight",
        [IIO_CHAN_INFO_DEBOUNCE_COUNT] = "debounce_count",
        [IIO_CHAN_INFO_DEBOUNCE_TIME] = "debounce_time",
        [IIO_CHAN_INFO_CALIBEMISSIVITY] = "calibemissivity",
        [IIO_CHAN_INFO_OVERSAMPLING_RATIO] = "oversampling_ratio",
        [IIO_CHAN_INFO_THERMOCOUPLE_TYPE] = "thermocouple_type",
        [IIO_CHAN_INFO_CALIBAMBIENT] = "calibambient",
        [IIO_CHAN_INFO_ZEROPOINT] = "zeropoint",
        [IIO_CHAN_INFO_TROUGH] = "trough_raw",
};
/**
 * iio_device_id() - query the unique ID for the device
 * @indio_dev:                Device structure whose ID is being queried
 *
 * The IIO device ID is a unique index used for example for the naming
 * of the character device /dev/iio\:device[ID].
 *
 * Returns: Unique ID for the device.
 */
int iio_device_id(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_dev_opaque->id;
}
EXPORT_SYMBOL_GPL(iio_device_id);

/**
 * iio_buffer_enabled() - helper function to test if the buffer is enabled
 * @indio_dev:                IIO device structure for device
 *
 * Returns: True, if the buffer is enabled.
 */
bool iio_buffer_enabled(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_dev_opaque->currentmode & INDIO_ALL_BUFFER_MODES;
}
EXPORT_SYMBOL_GPL(iio_buffer_enabled);

#if defined(CONFIG_DEBUG_FS)
/*
 * There's also a CONFIG_DEBUG_FS guard in include/linux/iio/iio.h for
 * iio_get_debugfs_dentry() to make it inline if CONFIG_DEBUG_FS is undefined
 */
struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_dev_opaque->debugfs_dentry;
}
EXPORT_SYMBOL_GPL(iio_get_debugfs_dentry);
#endif

/**
 * iio_find_channel_from_si() - get channel from its scan index
 * @indio_dev:                device
 * @si:                        scan index to match
 *
 * Returns:
 * Constant pointer to iio_chan_spec, if scan index matches, NULL on failure.
 */
const struct iio_chan_spec
*iio_find_channel_from_si(struct iio_dev *indio_dev, int si)
{
        int i;

        for (i = 0; i < indio_dev->num_channels; i++)
                if (indio_dev->channels[i].scan_index == si)
                        return &indio_dev->channels[i];
        return NULL;
}

/* This turns up an awful lot */
ssize_t iio_read_const_attr(struct device *dev,
                            struct device_attribute *attr,
                            char *buf)
{
        return sysfs_emit(buf, "%s\n", to_iio_const_attr(attr)->string);
}
EXPORT_SYMBOL(iio_read_const_attr);

/**
 * iio_device_set_clock() - Set current timestamping clock for the device
 * @indio_dev: IIO device structure containing the device
 * @clock_id: timestamping clock POSIX identifier to set.
 *
 * Returns: 0 on success, or a negative error code.
 */
int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id)
{
        int ret;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        const struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;

        ret = mutex_lock_interruptible(&iio_dev_opaque->mlock);
        if (ret)
                return ret;
        if ((ev_int && iio_event_enabled(ev_int)) ||
            iio_buffer_enabled(indio_dev)) {
                mutex_unlock(&iio_dev_opaque->mlock);
                return -EBUSY;
        }
        iio_dev_opaque->clock_id = clock_id;
        mutex_unlock(&iio_dev_opaque->mlock);

        return 0;
}
EXPORT_SYMBOL(iio_device_set_clock);

/**
 * iio_device_get_clock() - Retrieve current timestamping clock for the device
 * @indio_dev: IIO device structure containing the device
 *
 * Returns: Clock ID of the current timestamping clock for the device.
 */
clockid_t iio_device_get_clock(const struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_dev_opaque->clock_id;
}
EXPORT_SYMBOL(iio_device_get_clock);

/**
 * iio_get_time_ns() - utility function to get a time stamp for events etc
 * @indio_dev: device
 *
 * Returns: Timestamp of the event in nanoseconds.
 */
s64 iio_get_time_ns(const struct iio_dev *indio_dev)
{
        struct timespec64 tp;

        switch (iio_device_get_clock(indio_dev)) {
        case CLOCK_REALTIME:
                return ktime_get_real_ns();
        case CLOCK_MONOTONIC:
                return ktime_get_ns();
        case CLOCK_MONOTONIC_RAW:
                return ktime_get_raw_ns();
        case CLOCK_REALTIME_COARSE:
                return ktime_to_ns(ktime_get_coarse_real());
        case CLOCK_MONOTONIC_COARSE:
                ktime_get_coarse_ts64(&tp);
                return timespec64_to_ns(&tp);
        case CLOCK_BOOTTIME:
                return ktime_get_boottime_ns();
        case CLOCK_TAI:
                return ktime_get_clocktai_ns();
        default:
                BUG();
        }
}
EXPORT_SYMBOL(iio_get_time_ns);

static int __init iio_init(void)
{
        int ret;

        /* Register sysfs bus */
        ret  = bus_register(&iio_bus_type);
        if (ret < 0) {
                pr_err("could not register bus type\n");
                goto error_nothing;
        }

        ret = alloc_chrdev_region(&iio_devt, 0, IIO_DEV_MAX, "iio");
        if (ret < 0) {
                pr_err("failed to allocate char dev region\n");
                goto error_unregister_bus_type;
        }

        iio_debugfs_dentry = debugfs_create_dir("iio", NULL);

        return 0;

error_unregister_bus_type:
        bus_unregister(&iio_bus_type);
error_nothing:
        return ret;
}

static void __exit iio_exit(void)
{
        if (iio_devt)
                unregister_chrdev_region(iio_devt, IIO_DEV_MAX);
        bus_unregister(&iio_bus_type);
        debugfs_remove(iio_debugfs_dentry);
}

#if defined(CONFIG_DEBUG_FS)
static ssize_t iio_debugfs_read_reg(struct file *file, char __user *userbuf,
                              size_t count, loff_t *ppos)
{
        struct iio_dev *indio_dev = file->private_data;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        unsigned int val = 0;
        int ret;

        if (*ppos > 0)
                return simple_read_from_buffer(userbuf, count, ppos,
                                               iio_dev_opaque->read_buf,
                                               iio_dev_opaque->read_buf_len);

        ret = indio_dev->info->debugfs_reg_access(indio_dev,
                                                  iio_dev_opaque->cached_reg_addr,
                                                  0, &val);
        if (ret) {
                dev_err(indio_dev->dev.parent, "%s: read failed\n", __func__);
                return ret;
        }

        iio_dev_opaque->read_buf_len = snprintf(iio_dev_opaque->read_buf,
                                                sizeof(iio_dev_opaque->read_buf),
                                                "0x%X\n", val);

        return simple_read_from_buffer(userbuf, count, ppos,
                                       iio_dev_opaque->read_buf,
                                       iio_dev_opaque->read_buf_len);
}

static ssize_t iio_debugfs_write_reg(struct file *file,
                     const char __user *userbuf, size_t count, loff_t *ppos)
{
        struct iio_dev *indio_dev = file->private_data;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        unsigned int reg, val;
        char buf[80];
        int ret;

        count = min(count, sizeof(buf) - 1);
        if (copy_from_user(buf, userbuf, count))
                return -EFAULT;

        buf[count] = 0;

        ret = sscanf(buf, "%i %i", &reg, &val);

        switch (ret) {
        case 1:
                iio_dev_opaque->cached_reg_addr = reg;
                break;
        case 2:
                iio_dev_opaque->cached_reg_addr = reg;
                ret = indio_dev->info->debugfs_reg_access(indio_dev, reg,
                                                          val, NULL);
                if (ret) {
                        dev_err(indio_dev->dev.parent, "%s: write failed\n",
                                __func__);
                        return ret;
                }
                break;
        default:
                return -EINVAL;
        }

        return count;
}

static const struct file_operations iio_debugfs_reg_fops = {
        .open = simple_open,
        .read = iio_debugfs_read_reg,
        .write = iio_debugfs_write_reg,
};

static void iio_device_unregister_debugfs(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        debugfs_remove_recursive(iio_dev_opaque->debugfs_dentry);
}

static void iio_device_register_debugfs(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque;

        if (indio_dev->info->debugfs_reg_access == NULL)
                return;

        if (!iio_debugfs_dentry)
                return;

        iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        iio_dev_opaque->debugfs_dentry =
                debugfs_create_dir(dev_name(&indio_dev->dev),
                                   iio_debugfs_dentry);

        debugfs_create_file("direct_reg_access", 0644,
                            iio_dev_opaque->debugfs_dentry, indio_dev,
                            &iio_debugfs_reg_fops);
}
#else
static void iio_device_register_debugfs(struct iio_dev *indio_dev)
{
}

static void iio_device_unregister_debugfs(struct iio_dev *indio_dev)
{
}
#endif /* CONFIG_DEBUG_FS */

static ssize_t iio_read_channel_ext_info(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        const struct iio_chan_spec_ext_info *ext_info;

        ext_info = &this_attr->c->ext_info[this_attr->address];

        return ext_info->read(indio_dev, ext_info->private, this_attr->c, buf);
}

static ssize_t iio_write_channel_ext_info(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        const struct iio_chan_spec_ext_info *ext_info;

        ext_info = &this_attr->c->ext_info[this_attr->address];

        return ext_info->write(indio_dev, ext_info->private,
                               this_attr->c, buf, len);
}

ssize_t iio_enum_available_read(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, char *buf)
{
        const struct iio_enum *e = (const struct iio_enum *)priv;
        unsigned int i;
        size_t len = 0;

        if (!e->num_items)
                return 0;

        for (i = 0; i < e->num_items; ++i) {
                if (!e->items[i])
                        continue;
                len += sysfs_emit_at(buf, len, "%s ", e->items[i]);
        }

        /* replace last space with a newline */
        buf[len - 1] = '\n';

        return len;
}
EXPORT_SYMBOL_GPL(iio_enum_available_read);

ssize_t iio_enum_read(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, char *buf)
{
        const struct iio_enum *e = (const struct iio_enum *)priv;
        int i;

        if (!e->get)
                return -EINVAL;

        i = e->get(indio_dev, chan);
        if (i < 0)
                return i;
        if (i >= e->num_items || !e->items[i])
                return -EINVAL;

        return sysfs_emit(buf, "%s\n", e->items[i]);
}
EXPORT_SYMBOL_GPL(iio_enum_read);

ssize_t iio_enum_write(struct iio_dev *indio_dev,
        uintptr_t priv, const struct iio_chan_spec *chan, const char *buf,
        size_t len)
{
        const struct iio_enum *e = (const struct iio_enum *)priv;
        int ret;

        if (!e->set)
                return -EINVAL;

        ret = __sysfs_match_string(e->items, e->num_items, buf);
        if (ret < 0)
                return ret;

        ret = e->set(indio_dev, chan, ret);
        return ret ? ret : len;
}
EXPORT_SYMBOL_GPL(iio_enum_write);

static const struct iio_mount_matrix iio_mount_idmatrix = {
        .rotation = {
                "1", "0", "0",
                "0", "1", "0",
                "0", "0", "1"
        }
};

static int iio_setup_mount_idmatrix(const struct device *dev,
                                    struct iio_mount_matrix *matrix)
{
        *matrix = iio_mount_idmatrix;
        dev_info(dev, "mounting matrix not found: using identity...\n");
        return 0;
}

ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv,
                              const struct iio_chan_spec *chan, char *buf)
{
        const struct iio_mount_matrix *mtx;

        mtx = ((iio_get_mount_matrix_t *)priv)(indio_dev, chan);
        if (IS_ERR(mtx))
                return PTR_ERR(mtx);

        if (!mtx)
                mtx = &iio_mount_idmatrix;

        return sysfs_emit(buf, "%s, %s, %s; %s, %s, %s; %s, %s, %s\n",
                          mtx->rotation[0], mtx->rotation[1], mtx->rotation[2],
                          mtx->rotation[3], mtx->rotation[4], mtx->rotation[5],
                          mtx->rotation[6], mtx->rotation[7], mtx->rotation[8]);
}
EXPORT_SYMBOL_GPL(iio_show_mount_matrix);

/**
 * iio_read_mount_matrix() - retrieve iio device mounting matrix from
 *                           device "mount-matrix" property
 * @dev:        device the mounting matrix property is assigned to
 * @matrix:        where to store retrieved matrix
 *
 * If device is assigned no mounting matrix property, a default 3x3 identity
 * matrix will be filled in.
 *
 * Returns: 0 if success, or a negative error code on failure.
 */
int iio_read_mount_matrix(struct device *dev, struct iio_mount_matrix *matrix)
{
        size_t len = ARRAY_SIZE(iio_mount_idmatrix.rotation);
        int err;

        err = device_property_read_string_array(dev, "mount-matrix", matrix->rotation, len);
        if (err == len)
                return 0;

        if (err >= 0)
                /* Invalid number of matrix entries. */
                return -EINVAL;

        if (err != -EINVAL)
                /* Invalid matrix declaration format. */
                return err;

        /* Matrix was not declared at all: fallback to identity. */
        return iio_setup_mount_idmatrix(dev, matrix);
}
EXPORT_SYMBOL(iio_read_mount_matrix);

static ssize_t __iio_format_value(char *buf, size_t offset, unsigned int type,
                                  int size, const int *vals)
{
        int tmp0, tmp1;
        s64 tmp2;
        bool scale_db = false;

        switch (type) {
        case IIO_VAL_INT:
                return sysfs_emit_at(buf, offset, "%d", vals[0]);
        case IIO_VAL_INT_PLUS_MICRO_DB:
                scale_db = true;
                fallthrough;
        case IIO_VAL_INT_PLUS_MICRO:
                if (vals[1] < 0)
                        return sysfs_emit_at(buf, offset, "-%d.%06u%s",
                                             abs(vals[0]), -vals[1],
                                             scale_db ? " dB" : "");
                else
                        return sysfs_emit_at(buf, offset, "%d.%06u%s", vals[0],
                                             vals[1], scale_db ? " dB" : "");
        case IIO_VAL_INT_PLUS_NANO:
                if (vals[1] < 0)
                        return sysfs_emit_at(buf, offset, "-%d.%09u",
                                             abs(vals[0]), -vals[1]);
                else
                        return sysfs_emit_at(buf, offset, "%d.%09u", vals[0],
                                             vals[1]);
        case IIO_VAL_FRACTIONAL:
                tmp2 = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
                tmp1 = vals[1];
                tmp0 = (int)div_s64_rem(tmp2, 1000000000, &tmp1);
                if ((tmp2 < 0) && (tmp0 == 0))
                        return sysfs_emit_at(buf, offset, "-0.%09u", abs(tmp1));
                else
                        return sysfs_emit_at(buf, offset, "%d.%09u", tmp0,
                                             abs(tmp1));
        case IIO_VAL_FRACTIONAL_LOG2:
                tmp2 = shift_right((s64)vals[0] * 1000000000LL, vals[1]);
                tmp0 = (int)div_s64_rem(tmp2, 1000000000LL, &tmp1);
                if (tmp0 == 0 && tmp2 < 0)
                        return sysfs_emit_at(buf, offset, "-0.%09u", abs(tmp1));
                else
                        return sysfs_emit_at(buf, offset, "%d.%09u", tmp0,
                                             abs(tmp1));
        case IIO_VAL_INT_MULTIPLE:
        {
                int i;
                int l = 0;

                for (i = 0; i < size; ++i)
                        l += sysfs_emit_at(buf, offset + l, "%d ", vals[i]);
                return l;
        }
        case IIO_VAL_CHAR:
                return sysfs_emit_at(buf, offset, "%c", (char)vals[0]);
        case IIO_VAL_INT_64:
                tmp2 = (s64)((((u64)vals[1]) << 32) | (u32)vals[0]);
                return sysfs_emit_at(buf, offset, "%lld", tmp2);
        default:
                return 0;
        }
}

/**
 * iio_format_value() - Formats a IIO value into its string representation
 * @buf:        The buffer to which the formatted value gets written
 *                which is assumed to be big enough (i.e. PAGE_SIZE).
 * @type:        One of the IIO_VAL_* constants. This decides how the val
 *                and val2 parameters are formatted.
 * @size:        Number of IIO value entries contained in vals
 * @vals:        Pointer to the values, exact meaning depends on the
 *                type parameter.
 *
 * Returns:
 * 0 by default, a negative number on failure or the total number of characters
 * written for a type that belongs to the IIO_VAL_* constant.
 */
ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
{
        ssize_t len;

        len = __iio_format_value(buf, 0, type, size, vals);
        if (len >= PAGE_SIZE - 1)
                return -EFBIG;

        return len + sysfs_emit_at(buf, len, "\n");
}
EXPORT_SYMBOL_GPL(iio_format_value);

static ssize_t iio_read_channel_label(struct device *dev,
                                      struct device_attribute *attr,
                                      char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);

        if (indio_dev->info->read_label)
                return indio_dev->info->read_label(indio_dev, this_attr->c, buf);

        if (this_attr->c->extend_name)
                return sysfs_emit(buf, "%s\n", this_attr->c->extend_name);

        return -EINVAL;
}

static ssize_t iio_read_channel_info(struct device *dev,
                                     struct device_attribute *attr,
                                     char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int vals[INDIO_MAX_RAW_ELEMENTS];
        int ret;
        int val_len = 2;

        if (indio_dev->info->read_raw_multi)
                ret = indio_dev->info->read_raw_multi(indio_dev, this_attr->c,
                                                        INDIO_MAX_RAW_ELEMENTS,
                                                        vals, &val_len,
                                                        this_attr->address);
        else
                ret = indio_dev->info->read_raw(indio_dev, this_attr->c,
                                    &vals[0], &vals[1], this_attr->address);

        if (ret < 0)
                return ret;

        return iio_format_value(buf, ret, val_len, vals);
}

static ssize_t iio_format_list(char *buf, const int *vals, int type, int length,
                               const char *prefix, const char *suffix)
{
        ssize_t len;
        int stride;
        int i;

        switch (type) {
        case IIO_VAL_INT:
                stride = 1;
                break;
        default:
                stride = 2;
                break;
        }

        len = sysfs_emit(buf, prefix);

        for (i = 0; i <= length - stride; i += stride) {
                if (i != 0) {
                        len += sysfs_emit_at(buf, len, " ");
                        if (len >= PAGE_SIZE)
                                return -EFBIG;
                }

                len += __iio_format_value(buf, len, type, stride, &vals[i]);
                if (len >= PAGE_SIZE)
                        return -EFBIG;
        }

        len += sysfs_emit_at(buf, len, "%s\n", suffix);

        return len;
}

static ssize_t iio_format_avail_list(char *buf, const int *vals,
                                     int type, int length)
{

        return iio_format_list(buf, vals, type, length, "", "");
}

static ssize_t iio_format_avail_range(char *buf, const int *vals, int type)
{
        int length;

        /*
         * length refers to the array size , not the number of elements.
         * The purpose is to print the range [min , step ,max] so length should
         * be 3 in case of int, and 6 for other types.
         */
        switch (type) {
        case IIO_VAL_INT:
                length = 3;
                break;
        default:
                length = 6;
                break;
        }

        return iio_format_list(buf, vals, type, length, "[", "]");
}

static ssize_t iio_read_channel_info_avail(struct device *dev,
                                           struct device_attribute *attr,
                                           char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        const int *vals;
        int ret;
        int length;
        int type;

        ret = indio_dev->info->read_avail(indio_dev, this_attr->c,
                                          &vals, &type, &length,
                                          this_attr->address);

        if (ret < 0)
                return ret;
        switch (ret) {
        case IIO_AVAIL_LIST:
                return iio_format_avail_list(buf, vals, type, length);
        case IIO_AVAIL_RANGE:
                return iio_format_avail_range(buf, vals, type);
        default:
                return -EINVAL;
        }
}

/**
 * __iio_str_to_fixpoint() - Parse a fixed-point number from a string
 * @str: The string to parse
 * @fract_mult: Multiplier for the first decimal place, should be a power of 10
 * @integer: The integer part of the number
 * @fract: The fractional part of the number
 * @scale_db: True if this should parse as dB
 *
 * Returns:
 * 0 on success, or a negative error code if the string could not be parsed.
 */
static int __iio_str_to_fixpoint(const char *str, int fract_mult,
                                 int *integer, int *fract, bool scale_db)
{
        int i = 0, f = 0;
        bool integer_part = true, negative = false;

        if (fract_mult == 0) {
                *fract = 0;

                return kstrtoint(str, 0, integer);
        }

        if (str[0] == '-') {
                negative = true;
                str++;
        } else if (str[0] == '+') {
                str++;
        }

        while (*str) {
                if ('0' <= *str && *str <= '9') {
                        if (integer_part) {
                                i = i * 10 + *str - '0';
                        } else {
                                f += fract_mult * (*str - '0');
                                fract_mult /= 10;
                        }
                } else if (*str == '\n') {
                        if (*(str + 1) == '\0')
                                break;
                        return -EINVAL;
                } else if (!strncmp(str, " dB", sizeof(" dB") - 1) && scale_db) {
                        /* Ignore the dB suffix */
                        str += sizeof(" dB") - 1;
                        continue;
                } else if (!strncmp(str, "dB", sizeof("dB") - 1) && scale_db) {
                        /* Ignore the dB suffix */
                        str += sizeof("dB") - 1;
                        continue;
                } else if (*str == '.' && integer_part) {
                        integer_part = false;
                } else {
                        return -EINVAL;
                }
                str++;
        }

        if (negative) {
                if (i)
                        i = -i;
                else
                        f = -f;
        }

        *integer = i;
        *fract = f;

        return 0;
}

/**
 * iio_str_to_fixpoint() - Parse a fixed-point number from a string
 * @str: The string to parse
 * @fract_mult: Multiplier for the first decimal place, should be a power of 10
 * @integer: The integer part of the number
 * @fract: The fractional part of the number
 *
 * Returns:
 * 0 on success, or a negative error code if the string could not be parsed.
 */
int iio_str_to_fixpoint(const char *str, int fract_mult,
                        int *integer, int *fract)
{
        return __iio_str_to_fixpoint(str, fract_mult, integer, fract, false);
}
EXPORT_SYMBOL_GPL(iio_str_to_fixpoint);

static ssize_t iio_write_channel_info(struct device *dev,
                                      struct device_attribute *attr,
                                      const char *buf,
                                      size_t len)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
        int ret, fract_mult = 100000;
        int integer, fract = 0;
        bool is_char = false;
        bool scale_db = false;

        /* Assumes decimal - precision based on number of digits */
        if (!indio_dev->info->write_raw)
                return -EINVAL;

        if (indio_dev->info->write_raw_get_fmt)
                switch (indio_dev->info->write_raw_get_fmt(indio_dev,
                        this_attr->c, this_attr->address)) {
                case IIO_VAL_INT:
                        fract_mult = 0;
                        break;
                case IIO_VAL_INT_PLUS_MICRO_DB:
                        scale_db = true;
                        fallthrough;
                case IIO_VAL_INT_PLUS_MICRO:
                        fract_mult = 100000;
                        break;
                case IIO_VAL_INT_PLUS_NANO:
                        fract_mult = 100000000;
                        break;
                case IIO_VAL_CHAR:
                        is_char = true;
                        break;
                default:
                        return -EINVAL;
                }

        if (is_char) {
                char ch;

                if (sscanf(buf, "%c", &ch) != 1)
                        return -EINVAL;
                integer = ch;
        } else {
                ret = __iio_str_to_fixpoint(buf, fract_mult, &integer, &fract,
                                            scale_db);
                if (ret)
                        return ret;
        }

        ret = indio_dev->info->write_raw(indio_dev, this_attr->c,
                                         integer, fract, this_attr->address);
        if (ret)
                return ret;

        return len;
}

static
int __iio_device_attr_init(struct device_attribute *dev_attr,
                           const char *postfix,
                           struct iio_chan_spec const *chan,
                           ssize_t (*readfunc)(struct device *dev,
                                               struct device_attribute *attr,
                                               char *buf),
                           ssize_t (*writefunc)(struct device *dev,
                                                struct device_attribute *attr,
                                                const char *buf,
                                                size_t len),
                           enum iio_shared_by shared_by)
{
        int ret = 0;
        char *name = NULL;
        char *full_postfix;

        sysfs_attr_init(&dev_attr->attr);

        /* Build up postfix of <extend_name>_<modifier>_postfix */
        if (chan->modified && (shared_by == IIO_SEPARATE)) {
                if (chan->extend_name)
                        full_postfix = kasprintf(GFP_KERNEL, "%s_%s_%s",
                                                 iio_modifier_names[chan->channel2],
                                                 chan->extend_name,
                                                 postfix);
                else
                        full_postfix = kasprintf(GFP_KERNEL, "%s_%s",
                                                 iio_modifier_names[chan->channel2],
                                                 postfix);
        } else {
                if (chan->extend_name == NULL || shared_by != IIO_SEPARATE)
                        full_postfix = kstrdup(postfix, GFP_KERNEL);
                else
                        full_postfix = kasprintf(GFP_KERNEL,
                                                 "%s_%s",
                                                 chan->extend_name,
                                                 postfix);
        }
        if (full_postfix == NULL)
                return -ENOMEM;

        if (chan->differential) { /* Differential can not have modifier */
                switch (shared_by) {
                case IIO_SHARED_BY_ALL:
                        name = kasprintf(GFP_KERNEL, "%s", full_postfix);
                        break;
                case IIO_SHARED_BY_DIR:
                        name = kasprintf(GFP_KERNEL, "%s_%s",
                                                iio_direction[chan->output],
                                                full_postfix);
                        break;
                case IIO_SHARED_BY_TYPE:
                        name = kasprintf(GFP_KERNEL, "%s_%s-%s_%s",
                                            iio_direction[chan->output],
                                            iio_chan_type_name_spec[chan->type],
                                            iio_chan_type_name_spec[chan->type],
                                            full_postfix);
                        break;
                case IIO_SEPARATE:
                        if (!chan->indexed) {
                                WARN(1, "Differential channels must be indexed\n");
                                ret = -EINVAL;
                                goto error_free_full_postfix;
                        }
                        name = kasprintf(GFP_KERNEL,
                                            "%s_%s%d-%s%d_%s",
                                            iio_direction[chan->output],
                                            iio_chan_type_name_spec[chan->type],
                                            chan->channel,
                                            iio_chan_type_name_spec[chan->type],
                                            chan->channel2,
                                            full_postfix);
                        break;
                }
        } else { /* Single ended */
                switch (shared_by) {
                case IIO_SHARED_BY_ALL:
                        name = kasprintf(GFP_KERNEL, "%s", full_postfix);
                        break;
                case IIO_SHARED_BY_DIR:
                        name = kasprintf(GFP_KERNEL, "%s_%s",
                                                iio_direction[chan->output],
                                                full_postfix);
                        break;
                case IIO_SHARED_BY_TYPE:
                        name = kasprintf(GFP_KERNEL, "%s_%s_%s",
                                            iio_direction[chan->output],
                                            iio_chan_type_name_spec[chan->type],
                                            full_postfix);
                        break;

                case IIO_SEPARATE:
                        if (chan->indexed)
                                name = kasprintf(GFP_KERNEL, "%s_%s%d_%s",
                                                    iio_direction[chan->output],
                                                    iio_chan_type_name_spec[chan->type],
                                                    chan->channel,
                                                    full_postfix);
                        else
                                name = kasprintf(GFP_KERNEL, "%s_%s_%s",
                                                    iio_direction[chan->output],
                                                    iio_chan_type_name_spec[chan->type],
                                                    full_postfix);
                        break;
                }
        }
        if (name == NULL) {
                ret = -ENOMEM;
                goto error_free_full_postfix;
        }
        dev_attr->attr.name = name;

        if (readfunc) {
                dev_attr->attr.mode |= 0444;
                dev_attr->show = readfunc;
        }

        if (writefunc) {
                dev_attr->attr.mode |= 0200;
                dev_attr->store = writefunc;
        }

error_free_full_postfix:
        kfree(full_postfix);

        return ret;
}

static void __iio_device_attr_deinit(struct device_attribute *dev_attr)
{
        kfree(dev_attr->attr.name);
}

int __iio_add_chan_devattr(const char *postfix,
                           struct iio_chan_spec const *chan,
                           ssize_t (*readfunc)(struct device *dev,
                                               struct device_attribute *attr,
                                               char *buf),
                           ssize_t (*writefunc)(struct device *dev,
                                                struct device_attribute *attr,
                                                const char *buf,
                                                size_t len),
                           u64 mask,
                           enum iio_shared_by shared_by,
                           struct device *dev,
                           struct iio_buffer *buffer,
                           struct list_head *attr_list)
{
        int ret;
        struct iio_dev_attr *iio_attr, *t;

        iio_attr = kzalloc(sizeof(*iio_attr), GFP_KERNEL);
        if (iio_attr == NULL)
                return -ENOMEM;
        ret = __iio_device_attr_init(&iio_attr->dev_attr,
                                     postfix, chan,
                                     readfunc, writefunc, shared_by);
        if (ret)
                goto error_iio_dev_attr_free;
        iio_attr->c = chan;
        iio_attr->address = mask;
        iio_attr->buffer = buffer;
        list_for_each_entry(t, attr_list, l)
                if (strcmp(t->dev_attr.attr.name,
                           iio_attr->dev_attr.attr.name) == 0) {
                        if (shared_by == IIO_SEPARATE)
                                dev_err(dev, "tried to double register : %s\n",
                                        t->dev_attr.attr.name);
                        ret = -EBUSY;
                        goto error_device_attr_deinit;
                }
        list_add(&iio_attr->l, attr_list);

        return 0;

error_device_attr_deinit:
        __iio_device_attr_deinit(&iio_attr->dev_attr);
error_iio_dev_attr_free:
        kfree(iio_attr);
        return ret;
}

static int iio_device_add_channel_label(struct iio_dev *indio_dev,
                                         struct iio_chan_spec const *chan)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int ret;

        if (!indio_dev->info->read_label && !chan->extend_name)
                return 0;

        ret = __iio_add_chan_devattr("label",
                                     chan,
                                     &iio_read_channel_label,
                                     NULL,
                                     0,
                                     IIO_SEPARATE,
                                     &indio_dev->dev,
                                     NULL,
                                     &iio_dev_opaque->channel_attr_list);
        if (ret < 0)
                return ret;

        return 1;
}

static int iio_device_add_info_mask_type(struct iio_dev *indio_dev,
                                         struct iio_chan_spec const *chan,
                                         enum iio_shared_by shared_by,
                                         const long *infomask)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int i, ret, attrcount = 0;

        for_each_set_bit(i, infomask, sizeof(*infomask)*8) {
                if (i >= ARRAY_SIZE(iio_chan_info_postfix))
                        return -EINVAL;
                ret = __iio_add_chan_devattr(iio_chan_info_postfix[i],
                                             chan,
                                             &iio_read_channel_info,
                                             &iio_write_channel_info,
                                             i,
                                             shared_by,
                                             &indio_dev->dev,
                                             NULL,
                                             &iio_dev_opaque->channel_attr_list);
                if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
                        continue;
                if (ret < 0)
                        return ret;
                attrcount++;
        }

        return attrcount;
}

static int iio_device_add_info_mask_type_avail(struct iio_dev *indio_dev,
                                               struct iio_chan_spec const *chan,
                                               enum iio_shared_by shared_by,
                                               const long *infomask)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int i, ret, attrcount = 0;
        char *avail_postfix;

        for_each_set_bit(i, infomask, sizeof(*infomask) * 8) {
                if (i >= ARRAY_SIZE(iio_chan_info_postfix))
                        return -EINVAL;
                avail_postfix = kasprintf(GFP_KERNEL,
                                          "%s_available",
                                          iio_chan_info_postfix[i]);
                if (!avail_postfix)
                        return -ENOMEM;

                ret = __iio_add_chan_devattr(avail_postfix,
                                             chan,
                                             &iio_read_channel_info_avail,
                                             NULL,
                                             i,
                                             shared_by,
                                             &indio_dev->dev,
                                             NULL,
                                             &iio_dev_opaque->channel_attr_list);
                kfree(avail_postfix);
                if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
                        continue;
                if (ret < 0)
                        return ret;
                attrcount++;
        }

        return attrcount;
}

static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
                                        struct iio_chan_spec const *chan)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int ret, attrcount = 0;
        const struct iio_chan_spec_ext_info *ext_info;

        if (chan->channel < 0)
                return 0;
        ret = iio_device_add_info_mask_type(indio_dev, chan,
                                            IIO_SEPARATE,
                                            &chan->info_mask_separate);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
                                                  IIO_SEPARATE,
                                                  &chan->info_mask_separate_available);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type(indio_dev, chan,
                                            IIO_SHARED_BY_TYPE,
                                            &chan->info_mask_shared_by_type);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
                                                  IIO_SHARED_BY_TYPE,
                                                  &chan->info_mask_shared_by_type_available);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type(indio_dev, chan,
                                            IIO_SHARED_BY_DIR,
                                            &chan->info_mask_shared_by_dir);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
                                                  IIO_SHARED_BY_DIR,
                                                  &chan->info_mask_shared_by_dir_available);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type(indio_dev, chan,
                                            IIO_SHARED_BY_ALL,
                                            &chan->info_mask_shared_by_all);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_info_mask_type_avail(indio_dev, chan,
                                                  IIO_SHARED_BY_ALL,
                                                  &chan->info_mask_shared_by_all_available);
        if (ret < 0)
                return ret;
        attrcount += ret;

        ret = iio_device_add_channel_label(indio_dev, chan);
        if (ret < 0)
                return ret;
        attrcount += ret;

        if (chan->ext_info) {
                unsigned int i = 0;

                for (ext_info = chan->ext_info; ext_info->name; ext_info++) {
                        ret = __iio_add_chan_devattr(ext_info->name,
                                        chan,
                                        ext_info->read ?
                                            &iio_read_channel_ext_info : NULL,
                                        ext_info->write ?
                                            &iio_write_channel_ext_info : NULL,
                                        i,
                                        ext_info->shared,
                                        &indio_dev->dev,
                                        NULL,
                                        &iio_dev_opaque->channel_attr_list);
                        i++;
                        if (ret == -EBUSY && ext_info->shared)
                                continue;

                        if (ret)
                                return ret;

                        attrcount++;
                }
        }

        return attrcount;
}

/**
 * iio_free_chan_devattr_list() - Free a list of IIO device attributes
 * @attr_list: List of IIO device attributes
 *
 * This function frees the memory allocated for each of the IIO device
 * attributes in the list.
 */
void iio_free_chan_devattr_list(struct list_head *attr_list)
{
        struct iio_dev_attr *p, *n;

        list_for_each_entry_safe(p, n, attr_list, l) {
                kfree_const(p->dev_attr.attr.name);
                list_del(&p->l);
                kfree(p);
        }
}

static ssize_t name_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);

        return sysfs_emit(buf, "%s\n", indio_dev->name);
}

static DEVICE_ATTR_RO(name);

static ssize_t label_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);

        return sysfs_emit(buf, "%s\n", indio_dev->label);
}

static DEVICE_ATTR_RO(label);

static const char * const clock_names[] = {
        [CLOCK_REALTIME]                 = "realtime",
        [CLOCK_MONOTONIC]                 = "monotonic",
        [CLOCK_PROCESS_CPUTIME_ID]        = "process_cputime_id",
        [CLOCK_THREAD_CPUTIME_ID]        = "thread_cputime_id",
        [CLOCK_MONOTONIC_RAW]                 = "monotonic_raw",
        [CLOCK_REALTIME_COARSE]                 = "realtime_coarse",
        [CLOCK_MONOTONIC_COARSE]         = "monotonic_coarse",
        [CLOCK_BOOTTIME]                 = "boottime",
        [CLOCK_REALTIME_ALARM]                = "realtime_alarm",
        [CLOCK_BOOTTIME_ALARM]                = "boottime_alarm",
        [CLOCK_SGI_CYCLE]                = "sgi_cycle",
        [CLOCK_TAI]                         = "tai",
};

static ssize_t current_timestamp_clock_show(struct device *dev,
                                            struct device_attribute *attr,
                                            char *buf)
{
        const struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        const clockid_t clk = iio_device_get_clock(indio_dev);

        switch (clk) {
        case CLOCK_REALTIME:
        case CLOCK_MONOTONIC:
        case CLOCK_MONOTONIC_RAW:
        case CLOCK_REALTIME_COARSE:
        case CLOCK_MONOTONIC_COARSE:
        case CLOCK_BOOTTIME:
        case CLOCK_TAI:
                break;
        default:
                BUG();
        }

        return sysfs_emit(buf, "%s\n", clock_names[clk]);
}

static ssize_t current_timestamp_clock_store(struct device *dev,
                                             struct device_attribute *attr,
                                             const char *buf, size_t len)
{
        clockid_t clk;
        int ret;

        ret = sysfs_match_string(clock_names, buf);
        if (ret < 0)
                return ret;
        clk = ret;

        switch (clk) {
        case CLOCK_REALTIME:
        case CLOCK_MONOTONIC:
        case CLOCK_MONOTONIC_RAW:
        case CLOCK_REALTIME_COARSE:
        case CLOCK_MONOTONIC_COARSE:
        case CLOCK_BOOTTIME:
        case CLOCK_TAI:
                break;
        default:
                return -EINVAL;
        }

        ret = iio_device_set_clock(dev_to_iio_dev(dev), clk);
        if (ret)
                return ret;

        return len;
}

int iio_device_register_sysfs_group(struct iio_dev *indio_dev,
                                    const struct attribute_group *group)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        const struct attribute_group **new, **old = iio_dev_opaque->groups;
        unsigned int cnt = iio_dev_opaque->groupcounter;

        new = krealloc_array(old, cnt + 2, sizeof(*new), GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        new[iio_dev_opaque->groupcounter++] = group;
        new[iio_dev_opaque->groupcounter] = NULL;

        iio_dev_opaque->groups = new;

        return 0;
}

static DEVICE_ATTR_RW(current_timestamp_clock);

static int iio_device_register_sysfs(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        int i, ret = 0, attrcount, attrn, attrcount_orig = 0;
        struct iio_dev_attr *p;
        struct attribute **attr, *clk = NULL;

        /* First count elements in any existing group */
        if (indio_dev->info->attrs) {
                attr = indio_dev->info->attrs->attrs;
                while (*attr++ != NULL)
                        attrcount_orig++;
        }
        attrcount = attrcount_orig;
        /*
         * New channel registration method - relies on the fact a group does
         * not need to be initialized if its name is NULL.
         */
        if (indio_dev->channels)
                for (i = 0; i < indio_dev->num_channels; i++) {
                        const struct iio_chan_spec *chan =
                                &indio_dev->channels[i];

                        if (chan->type == IIO_TIMESTAMP)
                                clk = &dev_attr_current_timestamp_clock.attr;

                        ret = iio_device_add_channel_sysfs(indio_dev, chan);
                        if (ret < 0)
                                goto error_clear_attrs;
                        attrcount += ret;
                }

        if (iio_dev_opaque->event_interface)
                clk = &dev_attr_current_timestamp_clock.attr;

        if (indio_dev->name)
                attrcount++;
        if (indio_dev->label)
                attrcount++;
        if (clk)
                attrcount++;

        iio_dev_opaque->chan_attr_group.attrs =
                kcalloc(attrcount + 1,
                        sizeof(iio_dev_opaque->chan_attr_group.attrs[0]),
                        GFP_KERNEL);
        if (iio_dev_opaque->chan_attr_group.attrs == NULL) {
                ret = -ENOMEM;
                goto error_clear_attrs;
        }
        /* Copy across original attributes, and point to original binary attributes */
        if (indio_dev->info->attrs) {
                memcpy(iio_dev_opaque->chan_attr_group.attrs,
                       indio_dev->info->attrs->attrs,
                       sizeof(iio_dev_opaque->chan_attr_group.attrs[0])
                       *attrcount_orig);
                iio_dev_opaque->chan_attr_group.is_visible =
                        indio_dev->info->attrs->is_visible;
                iio_dev_opaque->chan_attr_group.bin_attrs =
                        indio_dev->info->attrs->bin_attrs;
        }
        attrn = attrcount_orig;
        /* Add all elements from the list. */
        list_for_each_entry(p, &iio_dev_opaque->channel_attr_list, l)
                iio_dev_opaque->chan_attr_group.attrs[attrn++] = &p->dev_attr.attr;
        if (indio_dev->name)
                iio_dev_opaque->chan_attr_group.attrs[attrn++] = &dev_attr_name.attr;
        if (indio_dev->label)
                iio_dev_opaque->chan_attr_group.attrs[attrn++] = &dev_attr_label.attr;
        if (clk)
                iio_dev_opaque->chan_attr_group.attrs[attrn++] = clk;

        ret = iio_device_register_sysfs_group(indio_dev,
                                              &iio_dev_opaque->chan_attr_group);
        if (ret)
                goto error_free_chan_attrs;

        return 0;

error_free_chan_attrs:
        kfree(iio_dev_opaque->chan_attr_group.attrs);
        iio_dev_opaque->chan_attr_group.attrs = NULL;
error_clear_attrs:
        iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);

        return ret;
}

static void iio_device_unregister_sysfs(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);
        kfree(iio_dev_opaque->chan_attr_group.attrs);
        iio_dev_opaque->chan_attr_group.attrs = NULL;
        kfree(iio_dev_opaque->groups);
        iio_dev_opaque->groups = NULL;
}

static void iio_dev_release(struct device *device)
{
        struct iio_dev *indio_dev = dev_to_iio_dev(device);
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        if (indio_dev->modes & INDIO_ALL_TRIGGERED_MODES)
                iio_device_unregister_trigger_consumer(indio_dev);
        iio_device_unregister_eventset(indio_dev);
        iio_device_unregister_sysfs(indio_dev);

        iio_device_detach_buffers(indio_dev);

        lockdep_unregister_key(&iio_dev_opaque->mlock_key);

        ida_free(&iio_ida, iio_dev_opaque->id);
        kfree(iio_dev_opaque);
}

const struct device_type iio_device_type = {
        .name = "iio_device",
        .release = iio_dev_release,
};

/**
 * iio_device_alloc() - allocate an iio_dev from a driver
 * @parent:                Parent device.
 * @sizeof_priv:        Space to allocate for private structure.
 *
 * Returns:
 * Pointer to allocated iio_dev on success, NULL on failure.
 */
struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv)
{
        struct iio_dev_opaque *iio_dev_opaque;
        struct iio_dev *indio_dev;
        size_t alloc_size;

        alloc_size = sizeof(struct iio_dev_opaque);
        if (sizeof_priv) {
                alloc_size = ALIGN(alloc_size, IIO_DMA_MINALIGN);
                alloc_size += sizeof_priv;
        }

        iio_dev_opaque = kzalloc(alloc_size, GFP_KERNEL);
        if (!iio_dev_opaque)
                return NULL;

        indio_dev = &iio_dev_opaque->indio_dev;
        indio_dev->priv = (char *)iio_dev_opaque +
                ALIGN(sizeof(struct iio_dev_opaque), IIO_DMA_MINALIGN);

        indio_dev->dev.parent = parent;
        indio_dev->dev.type = &iio_device_type;
        indio_dev->dev.bus = &iio_bus_type;
        device_initialize(&indio_dev->dev);
        mutex_init(&iio_dev_opaque->mlock);
        mutex_init(&iio_dev_opaque->info_exist_lock);
        INIT_LIST_HEAD(&iio_dev_opaque->channel_attr_list);

        iio_dev_opaque->id = ida_alloc(&iio_ida, GFP_KERNEL);
        if (iio_dev_opaque->id < 0) {
                /* cannot use a dev_err as the name isn't available */
                pr_err("failed to get device id\n");
                kfree(iio_dev_opaque);
                return NULL;
        }

        if (dev_set_name(&indio_dev->dev, "iio:device%d", iio_dev_opaque->id)) {
                ida_free(&iio_ida, iio_dev_opaque->id);
                kfree(iio_dev_opaque);
                return NULL;
        }

        INIT_LIST_HEAD(&iio_dev_opaque->buffer_list);
        INIT_LIST_HEAD(&iio_dev_opaque->ioctl_handlers);

        lockdep_register_key(&iio_dev_opaque->mlock_key);
        lockdep_set_class(&iio_dev_opaque->mlock, &iio_dev_opaque->mlock_key);

        return indio_dev;
}
EXPORT_SYMBOL(iio_device_alloc);

/**
 * iio_device_free() - free an iio_dev from a driver
 * @dev:                the iio_dev associated with the device
 */
void iio_device_free(struct iio_dev *dev)
{
        if (dev)
                put_device(&dev->dev);
}
EXPORT_SYMBOL(iio_device_free);

static void devm_iio_device_release(void *iio_dev)
{
        iio_device_free(iio_dev);
}

/**
 * devm_iio_device_alloc - Resource-managed iio_device_alloc()
 * @parent:                Device to allocate iio_dev for, and parent for this IIO device
 * @sizeof_priv:        Space to allocate for private structure.
 *
 * Managed iio_device_alloc. iio_dev allocated with this function is
 * automatically freed on driver detach.
 *
 * Returns:
 * Pointer to allocated iio_dev on success, NULL on failure.
 */
struct iio_dev *devm_iio_device_alloc(struct device *parent, int sizeof_priv)
{
        struct iio_dev *iio_dev;
        int ret;

        iio_dev = iio_device_alloc(parent, sizeof_priv);
        if (!iio_dev)
                return NULL;

        ret = devm_add_action_or_reset(parent, devm_iio_device_release,
                                       iio_dev);
        if (ret)
                return NULL;

        return iio_dev;
}
EXPORT_SYMBOL_GPL(devm_iio_device_alloc);

/**
 * iio_chrdev_open() - chrdev file open for buffer access and ioctls
 * @inode:        Inode structure for identifying the device in the file system
 * @filp:        File structure for iio device used to keep and later access
 *                private data
 *
 * Returns: 0 on success or -EBUSY if the device is already opened
 */
static int iio_chrdev_open(struct inode *inode, struct file *filp)
{
        struct iio_dev_opaque *iio_dev_opaque =
                container_of(inode->i_cdev, struct iio_dev_opaque, chrdev);
        struct iio_dev *indio_dev = &iio_dev_opaque->indio_dev;
        struct iio_dev_buffer_pair *ib;

        if (test_and_set_bit(IIO_BUSY_BIT_POS, &iio_dev_opaque->flags))
                return -EBUSY;

        iio_device_get(indio_dev);

        ib = kmalloc(sizeof(*ib), GFP_KERNEL);
        if (!ib) {
                iio_device_put(indio_dev);
                clear_bit(IIO_BUSY_BIT_POS, &iio_dev_opaque->flags);
                return -ENOMEM;
        }

        ib->indio_dev = indio_dev;
        ib->buffer = indio_dev->buffer;

        filp->private_data = ib;

        return 0;
}

/**
 * iio_chrdev_release() - chrdev file close buffer access and ioctls
 * @inode:        Inode structure pointer for the char device
 * @filp:        File structure pointer for the char device
 *
 * Returns: 0 for successful release.
 */
static int iio_chrdev_release(struct inode *inode, struct file *filp)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_dev_opaque *iio_dev_opaque =
                container_of(inode->i_cdev, struct iio_dev_opaque, chrdev);
        struct iio_dev *indio_dev = &iio_dev_opaque->indio_dev;

        kfree(ib);
        clear_bit(IIO_BUSY_BIT_POS, &iio_dev_opaque->flags);
        iio_device_put(indio_dev);

        return 0;
}

void iio_device_ioctl_handler_register(struct iio_dev *indio_dev,
                                       struct iio_ioctl_handler *h)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        list_add_tail(&h->entry, &iio_dev_opaque->ioctl_handlers);
}

void iio_device_ioctl_handler_unregister(struct iio_ioctl_handler *h)
{
        list_del(&h->entry);
}

static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
        struct iio_dev_buffer_pair *ib = filp->private_data;
        struct iio_dev *indio_dev = ib->indio_dev;
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct iio_ioctl_handler *h;
        int ret = -ENODEV;

        mutex_lock(&iio_dev_opaque->info_exist_lock);

        /*
         * The NULL check here is required to prevent crashing when a device
         * is being removed while userspace would still have open file handles
         * to try to access this device.
         */
        if (!indio_dev->info)
                goto out_unlock;

        list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) {
                ret = h->ioctl(indio_dev, filp, cmd, arg);
                if (ret != IIO_IOCTL_UNHANDLED)
                        break;
        }

        if (ret == IIO_IOCTL_UNHANDLED)
                ret = -ENODEV;

out_unlock:
        mutex_unlock(&iio_dev_opaque->info_exist_lock);

        return ret;
}

static const struct file_operations iio_buffer_fileops = {
        .owner = THIS_MODULE,
        .llseek = noop_llseek,
        .read = iio_buffer_read_outer_addr,
        .write = iio_buffer_write_outer_addr,
        .poll = iio_buffer_poll_addr,
        .unlocked_ioctl = iio_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .open = iio_chrdev_open,
        .release = iio_chrdev_release,
};

static const struct file_operations iio_event_fileops = {
        .owner = THIS_MODULE,
        .llseek = noop_llseek,
        .unlocked_ioctl = iio_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .open = iio_chrdev_open,
        .release = iio_chrdev_release,
};

static int iio_check_unique_scan_index(struct iio_dev *indio_dev)
{
        int i, j;
        const struct iio_chan_spec *channels = indio_dev->channels;

        if (!(indio_dev->modes & INDIO_ALL_BUFFER_MODES))
                return 0;

        for (i = 0; i < indio_dev->num_channels - 1; i++) {
                if (channels[i].scan_index < 0)
                        continue;
                for (j = i + 1; j < indio_dev->num_channels; j++)
                        if (channels[i].scan_index == channels[j].scan_index) {
                                dev_err(&indio_dev->dev,
                                        "Duplicate scan index %d\n",
                                        channels[i].scan_index);
                                return -EINVAL;
                        }
        }

        return 0;
}

static int iio_check_extended_name(const struct iio_dev *indio_dev)
{
        unsigned int i;

        if (!indio_dev->info->read_label)
                return 0;

        for (i = 0; i < indio_dev->num_channels; i++) {
                if (indio_dev->channels[i].extend_name) {
                        dev_err(&indio_dev->dev,
                                "Cannot use labels and extend_name at the same time\n");
                        return -EINVAL;
                }
        }

        return 0;
}

static const struct iio_buffer_setup_ops noop_ring_setup_ops;

static void iio_sanity_check_avail_scan_masks(struct iio_dev *indio_dev)
{
        unsigned int num_masks, masklength, longs_per_mask;
        const unsigned long *av_masks;
        int i;

        av_masks = indio_dev->available_scan_masks;
        masklength = indio_dev->masklength;
        longs_per_mask = BITS_TO_LONGS(masklength);

        /*
         * The code determining how many available_scan_masks is in the array
         * will be assuming the end of masks when first long with all bits
         * zeroed is encountered. This is incorrect for masks where mask
         * consists of more than one long, and where some of the available masks
         * has long worth of bits zeroed (but has subsequent bit(s) set). This
         * is a safety measure against bug where array of masks is terminated by
         * a single zero while mask width is greater than width of a long.
         */
        if (longs_per_mask > 1)
                dev_warn(indio_dev->dev.parent,
                         "multi long available scan masks not fully supported\n");

        if (bitmap_empty(av_masks, masklength))
                dev_warn(indio_dev->dev.parent, "empty scan mask\n");

        for (num_masks = 0; *av_masks; num_masks++)
                av_masks += longs_per_mask;

        if (num_masks < 2)
                return;

        av_masks = indio_dev->available_scan_masks;

        /*
         * Go through all the masks from first to one before the last, and see
         * that no mask found later from the available_scan_masks array is a
         * subset of mask found earlier. If this happens, then the mask found
         * later will never get used because scanning the array is stopped when
         * the first suitable mask is found. Drivers should order the array of
         * available masks in the order of preference (presumably the least
         * costy to access masks first).
         */
        for (i = 0; i < num_masks - 1; i++) {
                const unsigned long *mask1;
                int j;

                mask1 = av_masks + i * longs_per_mask;
                for (j = i + 1; j < num_masks; j++) {
                        const unsigned long *mask2;

                        mask2 = av_masks + j * longs_per_mask;
                        if (bitmap_subset(mask2, mask1, masklength))
                                dev_warn(indio_dev->dev.parent,
                                         "available_scan_mask %d subset of %d. Never used\n",
                                         j, i);
                }
        }
}

int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
        struct fwnode_handle *fwnode = NULL;
        int ret;

        if (!indio_dev->info)
                return -EINVAL;

        iio_dev_opaque->driver_module = this_mod;

        /* If the calling driver did not initialize firmware node, do it here */
        if (dev_fwnode(&indio_dev->dev))
                fwnode = dev_fwnode(&indio_dev->dev);
        /* The default dummy IIO device has no parent */
        else if (indio_dev->dev.parent)
                fwnode = dev_fwnode(indio_dev->dev.parent);
        device_set_node(&indio_dev->dev, fwnode);

        fwnode_property_read_string(fwnode, "label", &indio_dev->label);

        ret = iio_check_unique_scan_index(indio_dev);
        if (ret < 0)
                return ret;

        ret = iio_check_extended_name(indio_dev);
        if (ret < 0)
                return ret;

        iio_device_register_debugfs(indio_dev);

        ret = iio_buffers_alloc_sysfs_and_mask(indio_dev);
        if (ret) {
                dev_err(indio_dev->dev.parent,
                        "Failed to create buffer sysfs interfaces\n");
                goto error_unreg_debugfs;
        }

        if (indio_dev->available_scan_masks)
                iio_sanity_check_avail_scan_masks(indio_dev);

        ret = iio_device_register_sysfs(indio_dev);
        if (ret) {
                dev_err(indio_dev->dev.parent,
                        "Failed to register sysfs interfaces\n");
                goto error_buffer_free_sysfs;
        }
        ret = iio_device_register_eventset(indio_dev);
        if (ret) {
                dev_err(indio_dev->dev.parent,
                        "Failed to register event set\n");
                goto error_free_sysfs;
        }
        if (indio_dev->modes & INDIO_ALL_TRIGGERED_MODES)
                iio_device_register_trigger_consumer(indio_dev);

        if ((indio_dev->modes & INDIO_ALL_BUFFER_MODES) &&
                indio_dev->setup_ops == NULL)
                indio_dev->setup_ops = &noop_ring_setup_ops;

        if (iio_dev_opaque->attached_buffers_cnt)
                cdev_init(&iio_dev_opaque->chrdev, &iio_buffer_fileops);
        else if (iio_dev_opaque->event_interface)
                cdev_init(&iio_dev_opaque->chrdev, &iio_event_fileops);

        if (iio_dev_opaque->attached_buffers_cnt || iio_dev_opaque->event_interface) {
                indio_dev->dev.devt = MKDEV(MAJOR(iio_devt), iio_dev_opaque->id);
                iio_dev_opaque->chrdev.owner = this_mod;
        }

        /* assign device groups now; they should be all registered now */
        indio_dev->dev.groups = iio_dev_opaque->groups;

        ret = cdev_device_add(&iio_dev_opaque->chrdev, &indio_dev->dev);
        if (ret < 0)
                goto error_unreg_eventset;

        return 0;

error_unreg_eventset:
        iio_device_unregister_eventset(indio_dev);
error_free_sysfs:
        iio_device_unregister_sysfs(indio_dev);
error_buffer_free_sysfs:
        iio_buffers_free_sysfs_and_mask(indio_dev);
error_unreg_debugfs:
        iio_device_unregister_debugfs(indio_dev);
        return ret;
}
EXPORT_SYMBOL(__iio_device_register);

/**
 * iio_device_unregister() - unregister a device from the IIO subsystem
 * @indio_dev:                Device structure representing the device.
 */
void iio_device_unregister(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        cdev_device_del(&iio_dev_opaque->chrdev, &indio_dev->dev);

        mutex_lock(&iio_dev_opaque->info_exist_lock);

        iio_device_unregister_debugfs(indio_dev);

        iio_disable_all_buffers(indio_dev);

        indio_dev->info = NULL;

        iio_device_wakeup_eventset(indio_dev);
        iio_buffer_wakeup_poll(indio_dev);

        mutex_unlock(&iio_dev_opaque->info_exist_lock);

        iio_buffers_free_sysfs_and_mask(indio_dev);
}
EXPORT_SYMBOL(iio_device_unregister);

static void devm_iio_device_unreg(void *indio_dev)
{
        iio_device_unregister(indio_dev);
}

int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
                               struct module *this_mod)
{
        int ret;

        ret = __iio_device_register(indio_dev, this_mod);
        if (ret)
                return ret;

        return devm_add_action_or_reset(dev, devm_iio_device_unreg, indio_dev);
}
EXPORT_SYMBOL_GPL(__devm_iio_device_register);

/**
 * iio_device_claim_direct_mode - Keep device in direct mode
 * @indio_dev:        the iio_dev associated with the device
 *
 * If the device is in direct mode it is guaranteed to stay
 * that way until iio_device_release_direct_mode() is called.
 *
 * Use with iio_device_release_direct_mode()
 *
 * Returns: 0 on success, -EBUSY on failure.
 */
int iio_device_claim_direct_mode(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        mutex_lock(&iio_dev_opaque->mlock);

        if (iio_buffer_enabled(indio_dev)) {
                mutex_unlock(&iio_dev_opaque->mlock);
                return -EBUSY;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(iio_device_claim_direct_mode);

/**
 * iio_device_release_direct_mode - releases claim on direct mode
 * @indio_dev:        the iio_dev associated with the device
 *
 * Release the claim. Device is no longer guaranteed to stay
 * in direct mode.
 *
 * Use with iio_device_claim_direct_mode()
 */
void iio_device_release_direct_mode(struct iio_dev *indio_dev)
{
        mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock);
}
EXPORT_SYMBOL_GPL(iio_device_release_direct_mode);

/**
 * iio_device_claim_buffer_mode - Keep device in buffer mode
 * @indio_dev:        the iio_dev associated with the device
 *
 * If the device is in buffer mode it is guaranteed to stay
 * that way until iio_device_release_buffer_mode() is called.
 *
 * Use with iio_device_release_buffer_mode().
 *
 * Returns: 0 on success, -EBUSY on failure.
 */
int iio_device_claim_buffer_mode(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        mutex_lock(&iio_dev_opaque->mlock);

        if (iio_buffer_enabled(indio_dev))
                return 0;

        mutex_unlock(&iio_dev_opaque->mlock);
        return -EBUSY;
}
EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode);

/**
 * iio_device_release_buffer_mode - releases claim on buffer mode
 * @indio_dev:        the iio_dev associated with the device
 *
 * Release the claim. Device is no longer guaranteed to stay
 * in buffer mode.
 *
 * Use with iio_device_claim_buffer_mode().
 */
void iio_device_release_buffer_mode(struct iio_dev *indio_dev)
{
        mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock);
}
EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode);

/**
 * iio_device_get_current_mode() - helper function providing read-only access to
 *                                   the opaque @currentmode variable
 * @indio_dev:                           IIO device structure for device
 */
int iio_device_get_current_mode(struct iio_dev *indio_dev)
{
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);

        return iio_dev_opaque->currentmode;
}
EXPORT_SYMBOL_GPL(iio_device_get_current_mode);

subsys_initcall(iio_init);
module_exit(iio_exit);

MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
MODULE_DESCRIPTION("Industrial I/O core");
MODULE_LICENSE("GPL");























































































































































































































    5 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* delayacct.h - per-task delay accounting
 *
 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
 */

#ifndef _LINUX_DELAYACCT_H
#define _LINUX_DELAYACCT_H

#include <uapi/linux/taskstats.h>

#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
        raw_spinlock_t        lock;

        /* For each stat XXX, add following, aligned appropriately
         *
         * struct timespec XXX_start, XXX_end;
         * u64 XXX_delay;
         * u32 XXX_count;
         *
         * Atomicity of updates to XXX_delay, XXX_count protected by
         * single lock above (split into XXX_lock if contention is an issue).
         */

        /*
         * XXX_count is incremented on every XXX operation, the delay
         * associated with the operation is added to XXX_delay.
         * XXX_delay contains the accumulated delay time in nanoseconds.
         */
        u64 blkio_start;
        u64 blkio_delay;        /* wait for sync block io completion */
        u64 swapin_start;
        u64 swapin_delay;        /* wait for swapin */
        u32 blkio_count;        /* total count of the number of sync block */
                                /* io operations performed */
        u32 swapin_count;        /* total count of swapin */

        u64 freepages_start;
        u64 freepages_delay;        /* wait for memory reclaim */

        u64 thrashing_start;
        u64 thrashing_delay;        /* wait for thrashing page */

        u64 compact_start;
        u64 compact_delay;        /* wait for memory compact */

        u64 wpcopy_start;
        u64 wpcopy_delay;        /* wait for write-protect copy */

        u64 irq_delay;        /* wait for IRQ/SOFTIRQ */

        u32 freepages_count;        /* total count of memory reclaim */
        u32 thrashing_count;        /* total count of thrash waits */
        u32 compact_count;        /* total count of memory compact */
        u32 wpcopy_count;        /* total count of write-protect copy */
        u32 irq_count;        /* total count of IRQ/SOFTIRQ */
};
#endif

#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/jump_label.h>

#ifdef CONFIG_TASK_DELAY_ACCT
DECLARE_STATIC_KEY_FALSE(delayacct_key);
extern int delayacct_on;        /* Delay accounting turned on/off */
extern struct kmem_cache *delayacct_cache;
extern void delayacct_init(void);

extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(struct task_struct *);
extern int delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
extern void __delayacct_thrashing_start(bool *in_thrashing);
extern void __delayacct_thrashing_end(bool *in_thrashing);
extern void __delayacct_swapin_start(void);
extern void __delayacct_swapin_end(void);
extern void __delayacct_compact_start(void);
extern void __delayacct_compact_end(void);
extern void __delayacct_wpcopy_start(void);
extern void __delayacct_wpcopy_end(void);
extern void __delayacct_irq(struct task_struct *task, u32 delta);

static inline void delayacct_tsk_init(struct task_struct *tsk)
{
        /* reinitialize in case parent's non-null pointer was dup'ed*/
        tsk->delays = NULL;
        if (delayacct_on)
                __delayacct_tsk_init(tsk);
}

/* Free tsk->delays. Called from bad fork and __put_task_struct
 * where there's no risk of tsk->delays being accessed elsewhere
 */
static inline void delayacct_tsk_free(struct task_struct *tsk)
{
        if (tsk->delays)
                kmem_cache_free(delayacct_cache, tsk->delays);
        tsk->delays = NULL;
}

static inline void delayacct_blkio_start(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_blkio_start();
}

static inline void delayacct_blkio_end(struct task_struct *p)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (p->delays)
                __delayacct_blkio_end(p);
}

static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
{
        if (tsk->delays)
                return __delayacct_blkio_ticks(tsk);
        return 0;
}

static inline void delayacct_freepages_start(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_freepages_start();
}

static inline void delayacct_freepages_end(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_freepages_end();
}

static inline void delayacct_thrashing_start(bool *in_thrashing)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_thrashing_start(in_thrashing);
}

static inline void delayacct_thrashing_end(bool *in_thrashing)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_thrashing_end(in_thrashing);
}

static inline void delayacct_swapin_start(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_swapin_start();
}

static inline void delayacct_swapin_end(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_swapin_end();
}

static inline void delayacct_compact_start(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_compact_start();
}

static inline void delayacct_compact_end(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_compact_end();
}

static inline void delayacct_wpcopy_start(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_wpcopy_start();
}

static inline void delayacct_wpcopy_end(void)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (current->delays)
                __delayacct_wpcopy_end();
}

static inline void delayacct_irq(struct task_struct *task, u32 delta)
{
        if (!static_branch_unlikely(&delayacct_key))
                return;

        if (task->delays)
                __delayacct_irq(task, delta);
}

#else
static inline void delayacct_init(void)
{}
static inline void delayacct_tsk_init(struct task_struct *tsk)
{}
static inline void delayacct_tsk_free(struct task_struct *tsk)
{}
static inline void delayacct_blkio_start(void)
{}
static inline void delayacct_blkio_end(struct task_struct *p)
{}
static inline int delayacct_add_tsk(struct taskstats *d,
                                        struct task_struct *tsk)
{ return 0; }
static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
{ return 0; }
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{ return 0; }
static inline void delayacct_freepages_start(void)
{}
static inline void delayacct_freepages_end(void)
{}
static inline void delayacct_thrashing_start(bool *in_thrashing)
{}
static inline void delayacct_thrashing_end(bool *in_thrashing)
{}
static inline void delayacct_swapin_start(void)
{}
static inline void delayacct_swapin_end(void)
{}
static inline void delayacct_compact_start(void)
{}
static inline void delayacct_compact_end(void)
{}
static inline void delayacct_wpcopy_start(void)
{}
static inline void delayacct_wpcopy_end(void)
{}
static inline void delayacct_irq(struct task_struct *task, u32 delta)
{}

#endif /* CONFIG_TASK_DELAY_ACCT */

#endif

























































































































































































    9 





























    6 



















    6 



    6 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_HIGHMEM_INTERNAL_H
#define _LINUX_HIGHMEM_INTERNAL_H

/*
 * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
 */
#ifdef CONFIG_KMAP_LOCAL
void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
void kunmap_local_indexed(const void *vaddr);
void kmap_local_fork(struct task_struct *tsk);
void __kmap_local_sched_out(void);
void __kmap_local_sched_in(void);
static inline void kmap_assert_nomap(void)
{
        DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
}
#else
static inline void kmap_local_fork(struct task_struct *tsk) { }
static inline void kmap_assert_nomap(void) { }
#endif

#ifdef CONFIG_HIGHMEM
#include <asm/highmem.h>

#ifndef ARCH_HAS_KMAP_FLUSH_TLB
static inline void kmap_flush_tlb(unsigned long addr) { }
#endif

#ifndef kmap_prot
#define kmap_prot PAGE_KERNEL
#endif

void *kmap_high(struct page *page);
void kunmap_high(struct page *page);
void __kmap_flush_unused(void);
struct page *__kmap_to_page(void *addr);

static inline void *kmap(struct page *page)
{
        void *addr;

        might_sleep();
        if (!PageHighMem(page))
                addr = page_address(page);
        else
                addr = kmap_high(page);
        kmap_flush_tlb((unsigned long)addr);
        return addr;
}

static inline void kunmap(struct page *page)
{
        might_sleep();
        if (!PageHighMem(page))
                return;
        kunmap_high(page);
}

static inline struct page *kmap_to_page(void *addr)
{
        return __kmap_to_page(addr);
}

static inline void kmap_flush_unused(void)
{
        __kmap_flush_unused();
}

static inline void *kmap_local_page(struct page *page)
{
        return __kmap_local_page_prot(page, kmap_prot);
}

static inline void *kmap_local_folio(struct folio *folio, size_t offset)
{
        struct page *page = folio_page(folio, offset / PAGE_SIZE);
        return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
}

static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
        return __kmap_local_page_prot(page, prot);
}

static inline void *kmap_local_pfn(unsigned long pfn)
{
        return __kmap_local_pfn_prot(pfn, kmap_prot);
}

static inline void __kunmap_local(const void *vaddr)
{
        kunmap_local_indexed(vaddr);
}

static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_disable();
        else
                preempt_disable();

        pagefault_disable();
        return __kmap_local_page_prot(page, prot);
}

static inline void *kmap_atomic(struct page *page)
{
        return kmap_atomic_prot(page, kmap_prot);
}

static inline void *kmap_atomic_pfn(unsigned long pfn)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_disable();
        else
                preempt_disable();

        pagefault_disable();
        return __kmap_local_pfn_prot(pfn, kmap_prot);
}

static inline void __kunmap_atomic(const void *addr)
{
        kunmap_local_indexed(addr);
        pagefault_enable();
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_enable();
        else
                preempt_enable();
}

unsigned int __nr_free_highpages(void);
extern atomic_long_t _totalhigh_pages;

static inline unsigned int nr_free_highpages(void)
{
        return __nr_free_highpages();
}

static inline unsigned long totalhigh_pages(void)
{
        return (unsigned long)atomic_long_read(&_totalhigh_pages);
}

static inline void totalhigh_pages_add(long count)
{
        atomic_long_add(count, &_totalhigh_pages);
}

static inline bool is_kmap_addr(const void *x)
{
        unsigned long addr = (unsigned long)x;

        return (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) ||
                (addr >= __fix_to_virt(FIX_KMAP_END) &&
                 addr < __fix_to_virt(FIX_KMAP_BEGIN));
}
#else /* CONFIG_HIGHMEM */

static inline struct page *kmap_to_page(void *addr)
{
        return virt_to_page(addr);
}

static inline void *kmap(struct page *page)
{
        might_sleep();
        return page_address(page);
}

static inline void kunmap_high(struct page *page) { }
static inline void kmap_flush_unused(void) { }

static inline void kunmap(struct page *page)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
        kunmap_flush_on_unmap(page_address(page));
#endif
}

static inline void *kmap_local_page(struct page *page)
{
        return page_address(page);
}

static inline void *kmap_local_folio(struct folio *folio, size_t offset)
{
        return page_address(&folio->page) + offset;
}

static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
        return kmap_local_page(page);
}

static inline void *kmap_local_pfn(unsigned long pfn)
{
        return kmap_local_page(pfn_to_page(pfn));
}

static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
        kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
}

static inline void *kmap_atomic(struct page *page)
{
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_disable();
        else
                preempt_disable();
        pagefault_disable();
        return page_address(page);
}

static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
        return kmap_atomic(page);
}

static inline void *kmap_atomic_pfn(unsigned long pfn)
{
        return kmap_atomic(pfn_to_page(pfn));
}

static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
        kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
        pagefault_enable();
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                migrate_enable();
        else
                preempt_enable();
}

static inline unsigned int nr_free_highpages(void) { return 0; }
static inline unsigned long totalhigh_pages(void) { return 0UL; }

static inline bool is_kmap_addr(const void *x)
{
        return false;
}

#endif /* CONFIG_HIGHMEM */

/**
 * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() - deprecated!
 * @__addr:       Virtual address to be unmapped
 *
 * Unmaps an address previously mapped by kmap_atomic() and re-enables
 * pagefaults. Depending on PREEMP_RT configuration, re-enables also
 * migration and preemption. Users should not count on these side effects.
 *
 * Mappings should be unmapped in the reverse order that they were mapped.
 * See kmap_local_page() for details on nesting.
 *
 * @__addr can be any address within the mapped page, so there is no need
 * to subtract any offset that has been added. In contrast to kunmap(),
 * this function takes the address returned from kmap_atomic(), not the
 * page passed to it. The compiler will warn you if you pass the page.
 */
#define kunmap_atomic(__addr)                                        \
do {                                                                \
        BUILD_BUG_ON(__same_type((__addr), struct page *));        \
        __kunmap_atomic(__addr);                                \
} while (0)

/**
 * kunmap_local - Unmap a page mapped via kmap_local_page().
 * @__addr: An address within the page mapped
 *
 * @__addr can be any address within the mapped page.  Commonly it is the
 * address return from kmap_local_page(), but it can also include offsets.
 *
 * Unmapping should be done in the reverse order of the mapping.  See
 * kmap_local_page() for details.
 */
#define kunmap_local(__addr)                                        \
do {                                                                \
        BUILD_BUG_ON(__same_type((__addr), struct page *));        \
        __kunmap_local(__addr);                                        \
} while (0)

#endif




























































   16 




   21 
















   22 












   14 






   22 











































   47 










    9 
    9 
    9 




    9 





    7 














  116 
   16 
  116 


  116 
  115 
   89 
    7 

   47 



  115 
  115 




   47 
   47 

   46 
   19 






   47 




















  116 

  116 
   47 



   47 



  116 








    1 












   26 





























   26 
   26 










   26 
















































    4 





    4 




    4 


    4 






    4 
    3 






    4 





    4 





















   34 













   29 

    2 




   18 








    1 


































   22 





    7 


    8 





    5 




    7 






   18 
   18 

















    1 

    1 
    1 



    1 
    1 
    1 



    1 




    1 
















   34 


    9 

    2 

    2 
   28 
    2 


   29 

   29 



    2 



    2 


    2 



    2 





    2 


    2 

    2 






    2 
    2 


    2 



    2 


   29 


















   37 







   34 

    9 

   34 
   31 

   34 

   34 

    8 


    8 


    8 







   37 
    4 

    4 


    4 

    4 
   38 
    2 



    4 





























































   72 


   31 




   71 
   22 


















   71 




   72 


   37 
   37 

   40 


   69 


   72 





   71 
   71 





   71 
   40 









   72 


   70 

   31 


    9 




   22 


   22 

   22 






   72 































   77 

   77 
   14 




   77 
   14 

   14 
   14 


   77 















   51 
   22 




   52 
   22 








   30 




















   52 
   30 

   52 


   52 


































































































































































































































































































   14 

   14 
    1 

   14 
    2 

   14 
   13 
    7 

    9 
    9 



    9 

    9 
    4 
    4 
    4 



    4 
    4 
    3 



   13 


    1 


    6 
































   20 

   19 


   20 


   20 

   20 
   20 
   20 


   20 









































   20 




























    6 


    6 


    6 
    6 


    4 
    4 


    4 



    4 




    4 











    6 







    4 














   15 


   15 

   15 


   15 
   14 









   18 


















   18 
   18 



















   18 

























































































































































































































































































































































































































































































































































































































































































































   12 





   12 

   12 
   12 


   12 
    1 
   12 


















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
// SPDX-License-Identifier: GPL-2.0+
/*
 * XArray implementation
 * Copyright (c) 2017-2018 Microsoft Corporation
 * Copyright (c) 2018-2020 Oracle
 * Author: Matthew Wilcox <willy@infradead.org>
 */

#include <linux/bitmap.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/xarray.h>

#include "radix-tree.h"

/*
 * Coding conventions in this file:
 *
 * @xa is used to refer to the entire xarray.
 * @xas is the 'xarray operation state'.  It may be either a pointer to
 * an xa_state, or an xa_state stored on the stack.  This is an unfortunate
 * ambiguity.
 * @index is the index of the entry being operated on
 * @mark is an xa_mark_t; a small number indicating one of the mark bits.
 * @node refers to an xa_node; usually the primary one being operated on by
 * this function.
 * @offset is the index into the slots array inside an xa_node.
 * @parent refers to the @xa_node closer to the head than @node.
 * @entry refers to something stored in a slot in the xarray
 */

static inline unsigned int xa_lock_type(const struct xarray *xa)
{
        return (__force unsigned int)xa->xa_flags & 3;
}

static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
{
        if (lock_type == XA_LOCK_IRQ)
                xas_lock_irq(xas);
        else if (lock_type == XA_LOCK_BH)
                xas_lock_bh(xas);
        else
                xas_lock(xas);
}

static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
{
        if (lock_type == XA_LOCK_IRQ)
                xas_unlock_irq(xas);
        else if (lock_type == XA_LOCK_BH)
                xas_unlock_bh(xas);
        else
                xas_unlock(xas);
}

static inline bool xa_track_free(const struct xarray *xa)
{
        return xa->xa_flags & XA_FLAGS_TRACK_FREE;
}

static inline bool xa_zero_busy(const struct xarray *xa)
{
        return xa->xa_flags & XA_FLAGS_ZERO_BUSY;
}

static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
{
        if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
                xa->xa_flags |= XA_FLAGS_MARK(mark);
}

static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
{
        if (xa->xa_flags & XA_FLAGS_MARK(mark))
                xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
}

static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
{
        return node->marks[(__force unsigned)mark];
}

static inline bool node_get_mark(struct xa_node *node,
                unsigned int offset, xa_mark_t mark)
{
        return test_bit(offset, node_marks(node, mark));
}

/* returns true if the bit was set */
static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
                                xa_mark_t mark)
{
        return __test_and_set_bit(offset, node_marks(node, mark));
}

/* returns true if the bit was set */
static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
                                xa_mark_t mark)
{
        return __test_and_clear_bit(offset, node_marks(node, mark));
}

static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
{
        return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
}

static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
{
        bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
}

#define mark_inc(mark) do { \
        mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
} while (0)

/*
 * xas_squash_marks() - Merge all marks to the first entry
 * @xas: Array operation state.
 *
 * Set a mark on the first entry if any entry has it set.  Clear marks on
 * all sibling entries.
 */
static void xas_squash_marks(const struct xa_state *xas)
{
        unsigned int mark = 0;
        unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;

        if (!xas->xa_sibs)
                return;

        do {
                unsigned long *marks = xas->xa_node->marks[mark];
                if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
                        continue;
                __set_bit(xas->xa_offset, marks);
                bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
        } while (mark++ != (__force unsigned)XA_MARK_MAX);
}

/* extracts the offset within this node from the index */
static unsigned int get_offset(unsigned long index, struct xa_node *node)
{
        return (index >> node->shift) & XA_CHUNK_MASK;
}

static void xas_set_offset(struct xa_state *xas)
{
        xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
}

/* move the index either forwards (find) or backwards (sibling slot) */
static void xas_move_index(struct xa_state *xas, unsigned long offset)
{
        unsigned int shift = xas->xa_node->shift;
        xas->xa_index &= ~XA_CHUNK_MASK << shift;
        xas->xa_index += offset << shift;
}

static void xas_next_offset(struct xa_state *xas)
{
        xas->xa_offset++;
        xas_move_index(xas, xas->xa_offset);
}

static void *set_bounds(struct xa_state *xas)
{
        xas->xa_node = XAS_BOUNDS;
        return NULL;
}

/*
 * Starts a walk.  If the @xas is already valid, we assume that it's on
 * the right path and just return where we've got to.  If we're in an
 * error state, return NULL.  If the index is outside the current scope
 * of the xarray, return NULL without changing @xas->xa_node.  Otherwise
 * set @xas->xa_node to NULL and return the current head of the array.
 */
static void *xas_start(struct xa_state *xas)
{
        void *entry;

        if (xas_valid(xas))
                return xas_reload(xas);
        if (xas_error(xas))
                return NULL;

        entry = xa_head(xas->xa);
        if (!xa_is_node(entry)) {
                if (xas->xa_index)
                        return set_bounds(xas);
        } else {
                if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
                        return set_bounds(xas);
        }

        xas->xa_node = NULL;
        return entry;
}

static void *xas_descend(struct xa_state *xas, struct xa_node *node)
{
        unsigned int offset = get_offset(xas->xa_index, node);
        void *entry = xa_entry(xas->xa, node, offset);

        xas->xa_node = node;
        while (xa_is_sibling(entry)) {
                offset = xa_to_sibling(entry);
                entry = xa_entry(xas->xa, node, offset);
                if (node->shift && xa_is_node(entry))
                        entry = XA_RETRY_ENTRY;
        }

        xas->xa_offset = offset;
        return entry;
}

/**
 * xas_load() - Load an entry from the XArray (advanced).
 * @xas: XArray operation state.
 *
 * Usually walks the @xas to the appropriate state to load the entry
 * stored at xa_index.  However, it will do nothing and return %NULL if
 * @xas is in an error state.  xas_load() will never expand the tree.
 *
 * If the xa_state is set up to operate on a multi-index entry, xas_load()
 * may return %NULL or an internal entry, even if there are entries
 * present within the range specified by @xas.
 *
 * Context: Any context.  The caller should hold the xa_lock or the RCU lock.
 * Return: Usually an entry in the XArray, but see description for exceptions.
 */
void *xas_load(struct xa_state *xas)
{
        void *entry = xas_start(xas);

        while (xa_is_node(entry)) {
                struct xa_node *node = xa_to_node(entry);

                if (xas->xa_shift > node->shift)
                        break;
                entry = xas_descend(xas, node);
                if (node->shift == 0)
                        break;
        }
        return entry;
}
EXPORT_SYMBOL_GPL(xas_load);

#define XA_RCU_FREE        ((struct xarray *)1)

static void xa_node_free(struct xa_node *node)
{
        XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
        node->array = XA_RCU_FREE;
        call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
}

/*
 * xas_destroy() - Free any resources allocated during the XArray operation.
 * @xas: XArray operation state.
 *
 * Most users will not need to call this function; it is called for you
 * by xas_nomem().
 */
void xas_destroy(struct xa_state *xas)
{
        struct xa_node *next, *node = xas->xa_alloc;

        while (node) {
                XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
                next = rcu_dereference_raw(node->parent);
                radix_tree_node_rcu_free(&node->rcu_head);
                xas->xa_alloc = node = next;
        }
}

/**
 * xas_nomem() - Allocate memory if needed.
 * @xas: XArray operation state.
 * @gfp: Memory allocation flags.
 *
 * If we need to add new nodes to the XArray, we try to allocate memory
 * with GFP_NOWAIT while holding the lock, which will usually succeed.
 * If it fails, @xas is flagged as needing memory to continue.  The caller
 * should drop the lock and call xas_nomem().  If xas_nomem() succeeds,
 * the caller should retry the operation.
 *
 * Forward progress is guaranteed as one node is allocated here and
 * stored in the xa_state where it will be found by xas_alloc().  More
 * nodes will likely be found in the slab allocator, but we do not tie
 * them up here.
 *
 * Return: true if memory was needed, and was successfully allocated.
 */
bool xas_nomem(struct xa_state *xas, gfp_t gfp)
{
        if (xas->xa_node != XA_ERROR(-ENOMEM)) {
                xas_destroy(xas);
                return false;
        }
        if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                gfp |= __GFP_ACCOUNT;
        xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
        if (!xas->xa_alloc)
                return false;
        xas->xa_alloc->parent = NULL;
        XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
        xas->xa_node = XAS_RESTART;
        return true;
}
EXPORT_SYMBOL_GPL(xas_nomem);

/*
 * __xas_nomem() - Drop locks and allocate memory if needed.
 * @xas: XArray operation state.
 * @gfp: Memory allocation flags.
 *
 * Internal variant of xas_nomem().
 *
 * Return: true if memory was needed, and was successfully allocated.
 */
static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
        __must_hold(xas->xa->xa_lock)
{
        unsigned int lock_type = xa_lock_type(xas->xa);

        if (xas->xa_node != XA_ERROR(-ENOMEM)) {
                xas_destroy(xas);
                return false;
        }
        if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                gfp |= __GFP_ACCOUNT;
        if (gfpflags_allow_blocking(gfp)) {
                xas_unlock_type(xas, lock_type);
                xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
                xas_lock_type(xas, lock_type);
        } else {
                xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
        }
        if (!xas->xa_alloc)
                return false;
        xas->xa_alloc->parent = NULL;
        XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
        xas->xa_node = XAS_RESTART;
        return true;
}

static void xas_update(struct xa_state *xas, struct xa_node *node)
{
        if (xas->xa_update)
                xas->xa_update(node);
        else
                XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
}

static void *xas_alloc(struct xa_state *xas, unsigned int shift)
{
        struct xa_node *parent = xas->xa_node;
        struct xa_node *node = xas->xa_alloc;

        if (xas_invalid(xas))
                return NULL;

        if (node) {
                xas->xa_alloc = NULL;
        } else {
                gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;

                if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                        gfp |= __GFP_ACCOUNT;

                node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
                if (!node) {
                        xas_set_err(xas, -ENOMEM);
                        return NULL;
                }
        }

        if (parent) {
                node->offset = xas->xa_offset;
                parent->count++;
                XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
                xas_update(xas, parent);
        }
        XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
        XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
        node->shift = shift;
        node->count = 0;
        node->nr_values = 0;
        RCU_INIT_POINTER(node->parent, xas->xa_node);
        node->array = xas->xa;

        return node;
}

#ifdef CONFIG_XARRAY_MULTI
/* Returns the number of indices covered by a given xa_state */
static unsigned long xas_size(const struct xa_state *xas)
{
        return (xas->xa_sibs + 1UL) << xas->xa_shift;
}
#endif

/*
 * Use this to calculate the maximum index that will need to be created
 * in order to add the entry described by @xas.  Because we cannot store a
 * multi-index entry at index 0, the calculation is a little more complex
 * than you might expect.
 */
static unsigned long xas_max(struct xa_state *xas)
{
        unsigned long max = xas->xa_index;

#ifdef CONFIG_XARRAY_MULTI
        if (xas->xa_shift || xas->xa_sibs) {
                unsigned long mask = xas_size(xas) - 1;
                max |= mask;
                if (mask == max)
                        max++;
        }
#endif

        return max;
}

/* The maximum index that can be contained in the array without expanding it */
static unsigned long max_index(void *entry)
{
        if (!xa_is_node(entry))
                return 0;
        return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
}

static void xas_shrink(struct xa_state *xas)
{
        struct xarray *xa = xas->xa;
        struct xa_node *node = xas->xa_node;

        for (;;) {
                void *entry;

                XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
                if (node->count != 1)
                        break;
                entry = xa_entry_locked(xa, node, 0);
                if (!entry)
                        break;
                if (!xa_is_node(entry) && node->shift)
                        break;
                if (xa_is_zero(entry) && xa_zero_busy(xa))
                        entry = NULL;
                xas->xa_node = XAS_BOUNDS;

                RCU_INIT_POINTER(xa->xa_head, entry);
                if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
                        xa_mark_clear(xa, XA_FREE_MARK);

                node->count = 0;
                node->nr_values = 0;
                if (!xa_is_node(entry))
                        RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
                xas_update(xas, node);
                xa_node_free(node);
                if (!xa_is_node(entry))
                        break;
                node = xa_to_node(entry);
                node->parent = NULL;
        }
}

/*
 * xas_delete_node() - Attempt to delete an xa_node
 * @xas: Array operation state.
 *
 * Attempts to delete the @xas->xa_node.  This will fail if xa->node has
 * a non-zero reference count.
 */
static void xas_delete_node(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;

        for (;;) {
                struct xa_node *parent;

                XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
                if (node->count)
                        break;

                parent = xa_parent_locked(xas->xa, node);
                xas->xa_node = parent;
                xas->xa_offset = node->offset;
                xa_node_free(node);

                if (!parent) {
                        xas->xa->xa_head = NULL;
                        xas->xa_node = XAS_BOUNDS;
                        return;
                }

                parent->slots[xas->xa_offset] = NULL;
                parent->count--;
                XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
                node = parent;
                xas_update(xas, node);
        }

        if (!node->parent)
                xas_shrink(xas);
}

/**
 * xas_free_nodes() - Free this node and all nodes that it references
 * @xas: Array operation state.
 * @top: Node to free
 *
 * This node has been removed from the tree.  We must now free it and all
 * of its subnodes.  There may be RCU walkers with references into the tree,
 * so we must replace all entries with retry markers.
 */
static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
{
        unsigned int offset = 0;
        struct xa_node *node = top;

        for (;;) {
                void *entry = xa_entry_locked(xas->xa, node, offset);

                if (node->shift && xa_is_node(entry)) {
                        node = xa_to_node(entry);
                        offset = 0;
                        continue;
                }
                if (entry)
                        RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
                offset++;
                while (offset == XA_CHUNK_SIZE) {
                        struct xa_node *parent;

                        parent = xa_parent_locked(xas->xa, node);
                        offset = node->offset + 1;
                        node->count = 0;
                        node->nr_values = 0;
                        xas_update(xas, node);
                        xa_node_free(node);
                        if (node == top)
                                return;
                        node = parent;
                }
        }
}

/*
 * xas_expand adds nodes to the head of the tree until it has reached
 * sufficient height to be able to contain @xas->xa_index
 */
static int xas_expand(struct xa_state *xas, void *head)
{
        struct xarray *xa = xas->xa;
        struct xa_node *node = NULL;
        unsigned int shift = 0;
        unsigned long max = xas_max(xas);

        if (!head) {
                if (max == 0)
                        return 0;
                while ((max >> shift) >= XA_CHUNK_SIZE)
                        shift += XA_CHUNK_SHIFT;
                return shift + XA_CHUNK_SHIFT;
        } else if (xa_is_node(head)) {
                node = xa_to_node(head);
                shift = node->shift + XA_CHUNK_SHIFT;
        }
        xas->xa_node = NULL;

        while (max > max_index(head)) {
                xa_mark_t mark = 0;

                XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
                node = xas_alloc(xas, shift);
                if (!node)
                        return -ENOMEM;

                node->count = 1;
                if (xa_is_value(head))
                        node->nr_values = 1;
                RCU_INIT_POINTER(node->slots[0], head);

                /* Propagate the aggregated mark info to the new child */
                for (;;) {
                        if (xa_track_free(xa) && mark == XA_FREE_MARK) {
                                node_mark_all(node, XA_FREE_MARK);
                                if (!xa_marked(xa, XA_FREE_MARK)) {
                                        node_clear_mark(node, 0, XA_FREE_MARK);
                                        xa_mark_set(xa, XA_FREE_MARK);
                                }
                        } else if (xa_marked(xa, mark)) {
                                node_set_mark(node, 0, mark);
                        }
                        if (mark == XA_MARK_MAX)
                                break;
                        mark_inc(mark);
                }

                /*
                 * Now that the new node is fully initialised, we can add
                 * it to the tree
                 */
                if (xa_is_node(head)) {
                        xa_to_node(head)->offset = 0;
                        rcu_assign_pointer(xa_to_node(head)->parent, node);
                }
                head = xa_mk_node(node);
                rcu_assign_pointer(xa->xa_head, head);
                xas_update(xas, node);

                shift += XA_CHUNK_SHIFT;
        }

        xas->xa_node = node;
        return shift;
}

/*
 * xas_create() - Create a slot to store an entry in.
 * @xas: XArray operation state.
 * @allow_root: %true if we can store the entry in the root directly
 *
 * Most users will not need to call this function directly, as it is called
 * by xas_store().  It is useful for doing conditional store operations
 * (see the xa_cmpxchg() implementation for an example).
 *
 * Return: If the slot already existed, returns the contents of this slot.
 * If the slot was newly created, returns %NULL.  If it failed to create the
 * slot, returns %NULL and indicates the error in @xas.
 */
static void *xas_create(struct xa_state *xas, bool allow_root)
{
        struct xarray *xa = xas->xa;
        void *entry;
        void __rcu **slot;
        struct xa_node *node = xas->xa_node;
        int shift;
        unsigned int order = xas->xa_shift;

        if (xas_top(node)) {
                entry = xa_head_locked(xa);
                xas->xa_node = NULL;
                if (!entry && xa_zero_busy(xa))
                        entry = XA_ZERO_ENTRY;
                shift = xas_expand(xas, entry);
                if (shift < 0)
                        return NULL;
                if (!shift && !allow_root)
                        shift = XA_CHUNK_SHIFT;
                entry = xa_head_locked(xa);
                slot = &xa->xa_head;
        } else if (xas_error(xas)) {
                return NULL;
        } else if (node) {
                unsigned int offset = xas->xa_offset;

                shift = node->shift;
                entry = xa_entry_locked(xa, node, offset);
                slot = &node->slots[offset];
        } else {
                shift = 0;
                entry = xa_head_locked(xa);
                slot = &xa->xa_head;
        }

        while (shift > order) {
                shift -= XA_CHUNK_SHIFT;
                if (!entry) {
                        node = xas_alloc(xas, shift);
                        if (!node)
                                break;
                        if (xa_track_free(xa))
                                node_mark_all(node, XA_FREE_MARK);
                        rcu_assign_pointer(*slot, xa_mk_node(node));
                } else if (xa_is_node(entry)) {
                        node = xa_to_node(entry);
                } else {
                        break;
                }
                entry = xas_descend(xas, node);
                slot = &node->slots[xas->xa_offset];
        }

        return entry;
}

/**
 * xas_create_range() - Ensure that stores to this range will succeed
 * @xas: XArray operation state.
 *
 * Creates all of the slots in the range covered by @xas.  Sets @xas to
 * create single-index entries and positions it at the beginning of the
 * range.  This is for the benefit of users which have not yet been
 * converted to use multi-index entries.
 */
void xas_create_range(struct xa_state *xas)
{
        unsigned long index = xas->xa_index;
        unsigned char shift = xas->xa_shift;
        unsigned char sibs = xas->xa_sibs;

        xas->xa_index |= ((sibs + 1UL) << shift) - 1;
        if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
                xas->xa_offset |= sibs;
        xas->xa_shift = 0;
        xas->xa_sibs = 0;

        for (;;) {
                xas_create(xas, true);
                if (xas_error(xas))
                        goto restore;
                if (xas->xa_index <= (index | XA_CHUNK_MASK))
                        goto success;
                xas->xa_index -= XA_CHUNK_SIZE;

                for (;;) {
                        struct xa_node *node = xas->xa_node;
                        if (node->shift >= shift)
                                break;
                        xas->xa_node = xa_parent_locked(xas->xa, node);
                        xas->xa_offset = node->offset - 1;
                        if (node->offset != 0)
                                break;
                }
        }

restore:
        xas->xa_shift = shift;
        xas->xa_sibs = sibs;
        xas->xa_index = index;
        return;
success:
        xas->xa_index = index;
        if (xas->xa_node)
                xas_set_offset(xas);
}
EXPORT_SYMBOL_GPL(xas_create_range);

static void update_node(struct xa_state *xas, struct xa_node *node,
                int count, int values)
{
        if (!node || (!count && !values))
                return;

        node->count += count;
        node->nr_values += values;
        XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
        XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
        xas_update(xas, node);
        if (count < 0)
                xas_delete_node(xas);
}

/**
 * xas_store() - Store this entry in the XArray.
 * @xas: XArray operation state.
 * @entry: New entry.
 *
 * If @xas is operating on a multi-index entry, the entry returned by this
 * function is essentially meaningless (it may be an internal entry or it
 * may be %NULL, even if there are non-NULL entries at some of the indices
 * covered by the range).  This is not a problem for any current users,
 * and can be changed if needed.
 *
 * Return: The old entry at this index.
 */
void *xas_store(struct xa_state *xas, void *entry)
{
        struct xa_node *node;
        void __rcu **slot = &xas->xa->xa_head;
        unsigned int offset, max;
        int count = 0;
        int values = 0;
        void *first, *next;
        bool value = xa_is_value(entry);

        if (entry) {
                bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry);
                first = xas_create(xas, allow_root);
        } else {
                first = xas_load(xas);
        }

        if (xas_invalid(xas))
                return first;
        node = xas->xa_node;
        if (node && (xas->xa_shift < node->shift))
                xas->xa_sibs = 0;
        if ((first == entry) && !xas->xa_sibs)
                return first;

        next = first;
        offset = xas->xa_offset;
        max = xas->xa_offset + xas->xa_sibs;
        if (node) {
                slot = &node->slots[offset];
                if (xas->xa_sibs)
                        xas_squash_marks(xas);
        }
        if (!entry)
                xas_init_marks(xas);

        for (;;) {
                /*
                 * Must clear the marks before setting the entry to NULL,
                 * otherwise xas_for_each_marked may find a NULL entry and
                 * stop early.  rcu_assign_pointer contains a release barrier
                 * so the mark clearing will appear to happen before the
                 * entry is set to NULL.
                 */
                rcu_assign_pointer(*slot, entry);
                if (xa_is_node(next) && (!node || node->shift))
                        xas_free_nodes(xas, xa_to_node(next));
                if (!node)
                        break;
                count += !next - !entry;
                values += !xa_is_value(first) - !value;
                if (entry) {
                        if (offset == max)
                                break;
                        if (!xa_is_sibling(entry))
                                entry = xa_mk_sibling(xas->xa_offset);
                } else {
                        if (offset == XA_CHUNK_MASK)
                                break;
                }
                next = xa_entry_locked(xas->xa, node, ++offset);
                if (!xa_is_sibling(next)) {
                        if (!entry && (offset > max))
                                break;
                        first = next;
                }
                slot++;
        }

        update_node(xas, node, count, values);
        return first;
}
EXPORT_SYMBOL_GPL(xas_store);

/**
 * xas_get_mark() - Returns the state of this mark.
 * @xas: XArray operation state.
 * @mark: Mark number.
 *
 * Return: true if the mark is set, false if the mark is clear or @xas
 * is in an error state.
 */
bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
{
        if (xas_invalid(xas))
                return false;
        if (!xas->xa_node)
                return xa_marked(xas->xa, mark);
        return node_get_mark(xas->xa_node, xas->xa_offset, mark);
}
EXPORT_SYMBOL_GPL(xas_get_mark);

/**
 * xas_set_mark() - Sets the mark on this entry and its parents.
 * @xas: XArray operation state.
 * @mark: Mark number.
 *
 * Sets the specified mark on this entry, and walks up the tree setting it
 * on all the ancestor entries.  Does nothing if @xas has not been walked to
 * an entry, or is in an error state.
 */
void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
{
        struct xa_node *node = xas->xa_node;
        unsigned int offset = xas->xa_offset;

        if (xas_invalid(xas))
                return;

        while (node) {
                if (node_set_mark(node, offset, mark))
                        return;
                offset = node->offset;
                node = xa_parent_locked(xas->xa, node);
        }

        if (!xa_marked(xas->xa, mark))
                xa_mark_set(xas->xa, mark);
}
EXPORT_SYMBOL_GPL(xas_set_mark);

/**
 * xas_clear_mark() - Clears the mark on this entry and its parents.
 * @xas: XArray operation state.
 * @mark: Mark number.
 *
 * Clears the specified mark on this entry, and walks back to the head
 * attempting to clear it on all the ancestor entries.  Does nothing if
 * @xas has not been walked to an entry, or is in an error state.
 */
void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
{
        struct xa_node *node = xas->xa_node;
        unsigned int offset = xas->xa_offset;

        if (xas_invalid(xas))
                return;

        while (node) {
                if (!node_clear_mark(node, offset, mark))
                        return;
                if (node_any_mark(node, mark))
                        return;

                offset = node->offset;
                node = xa_parent_locked(xas->xa, node);
        }

        if (xa_marked(xas->xa, mark))
                xa_mark_clear(xas->xa, mark);
}
EXPORT_SYMBOL_GPL(xas_clear_mark);

/**
 * xas_init_marks() - Initialise all marks for the entry
 * @xas: Array operations state.
 *
 * Initialise all marks for the entry specified by @xas.  If we're tracking
 * free entries with a mark, we need to set it on all entries.  All other
 * marks are cleared.
 *
 * This implementation is not as efficient as it could be; we may walk
 * up the tree multiple times.
 */
void xas_init_marks(const struct xa_state *xas)
{
        xa_mark_t mark = 0;

        for (;;) {
                if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
                        xas_set_mark(xas, mark);
                else
                        xas_clear_mark(xas, mark);
                if (mark == XA_MARK_MAX)
                        break;
                mark_inc(mark);
        }
}
EXPORT_SYMBOL_GPL(xas_init_marks);

#ifdef CONFIG_XARRAY_MULTI
static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
{
        unsigned int marks = 0;
        xa_mark_t mark = XA_MARK_0;

        for (;;) {
                if (node_get_mark(node, offset, mark))
                        marks |= 1 << (__force unsigned int)mark;
                if (mark == XA_MARK_MAX)
                        break;
                mark_inc(mark);
        }

        return marks;
}

static void node_set_marks(struct xa_node *node, unsigned int offset,
                        struct xa_node *child, unsigned int marks)
{
        xa_mark_t mark = XA_MARK_0;

        for (;;) {
                if (marks & (1 << (__force unsigned int)mark)) {
                        node_set_mark(node, offset, mark);
                        if (child)
                                node_mark_all(child, mark);
                }
                if (mark == XA_MARK_MAX)
                        break;
                mark_inc(mark);
        }
}

/**
 * xas_split_alloc() - Allocate memory for splitting an entry.
 * @xas: XArray operation state.
 * @entry: New entry which will be stored in the array.
 * @order: Current entry order.
 * @gfp: Memory allocation flags.
 *
 * This function should be called before calling xas_split().
 * If necessary, it will allocate new nodes (and fill them with @entry)
 * to prepare for the upcoming split of an entry of @order size into
 * entries of the order stored in the @xas.
 *
 * Context: May sleep if @gfp flags permit.
 */
void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
                gfp_t gfp)
{
        unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
        unsigned int mask = xas->xa_sibs;

        /* XXX: no support for splitting really large entries yet */
        if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
                goto nomem;
        if (xas->xa_shift + XA_CHUNK_SHIFT > order)
                return;

        do {
                unsigned int i;
                void *sibling = NULL;
                struct xa_node *node;

                node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp);
                if (!node)
                        goto nomem;
                node->array = xas->xa;
                for (i = 0; i < XA_CHUNK_SIZE; i++) {
                        if ((i & mask) == 0) {
                                RCU_INIT_POINTER(node->slots[i], entry);
                                sibling = xa_mk_sibling(i);
                        } else {
                                RCU_INIT_POINTER(node->slots[i], sibling);
                        }
                }
                RCU_INIT_POINTER(node->parent, xas->xa_alloc);
                xas->xa_alloc = node;
        } while (sibs-- > 0);

        return;
nomem:
        xas_destroy(xas);
        xas_set_err(xas, -ENOMEM);
}
EXPORT_SYMBOL_GPL(xas_split_alloc);

/**
 * xas_split() - Split a multi-index entry into smaller entries.
 * @xas: XArray operation state.
 * @entry: New entry to store in the array.
 * @order: Current entry order.
 *
 * The size of the new entries is set in @xas.  The value in @entry is
 * copied to all the replacement entries.
 *
 * Context: Any context.  The caller should hold the xa_lock.
 */
void xas_split(struct xa_state *xas, void *entry, unsigned int order)
{
        unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
        unsigned int offset, marks;
        struct xa_node *node;
        void *curr = xas_load(xas);
        int values = 0;

        node = xas->xa_node;
        if (xas_top(node))
                return;

        marks = node_get_marks(node, xas->xa_offset);

        offset = xas->xa_offset + sibs;
        do {
                if (xas->xa_shift < node->shift) {
                        struct xa_node *child = xas->xa_alloc;

                        xas->xa_alloc = rcu_dereference_raw(child->parent);
                        child->shift = node->shift - XA_CHUNK_SHIFT;
                        child->offset = offset;
                        child->count = XA_CHUNK_SIZE;
                        child->nr_values = xa_is_value(entry) ?
                                        XA_CHUNK_SIZE : 0;
                        RCU_INIT_POINTER(child->parent, node);
                        node_set_marks(node, offset, child, marks);
                        rcu_assign_pointer(node->slots[offset],
                                        xa_mk_node(child));
                        if (xa_is_value(curr))
                                values--;
                        xas_update(xas, child);
                } else {
                        unsigned int canon = offset - xas->xa_sibs;

                        node_set_marks(node, canon, NULL, marks);
                        rcu_assign_pointer(node->slots[canon], entry);
                        while (offset > canon)
                                rcu_assign_pointer(node->slots[offset--],
                                                xa_mk_sibling(canon));
                        values += (xa_is_value(entry) - xa_is_value(curr)) *
                                        (xas->xa_sibs + 1);
                }
        } while (offset-- > xas->xa_offset);

        node->nr_values += values;
        xas_update(xas, node);
}
EXPORT_SYMBOL_GPL(xas_split);
#endif

/**
 * xas_pause() - Pause a walk to drop a lock.
 * @xas: XArray operation state.
 *
 * Some users need to pause a walk and drop the lock they're holding in
 * order to yield to a higher priority thread or carry out an operation
 * on an entry.  Those users should call this function before they drop
 * the lock.  It resets the @xas to be suitable for the next iteration
 * of the loop after the user has reacquired the lock.  If most entries
 * found during a walk require you to call xas_pause(), the xa_for_each()
 * iterator may be more appropriate.
 *
 * Note that xas_pause() only works for forward iteration.  If a user needs
 * to pause a reverse iteration, we will need a xas_pause_rev().
 */
void xas_pause(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;

        if (xas_invalid(xas))
                return;

        xas->xa_node = XAS_RESTART;
        if (node) {
                unsigned long offset = xas->xa_offset;
                while (++offset < XA_CHUNK_SIZE) {
                        if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
                                break;
                }
                xas->xa_index += (offset - xas->xa_offset) << node->shift;
                if (xas->xa_index == 0)
                        xas->xa_node = XAS_BOUNDS;
        } else {
                xas->xa_index++;
        }
}
EXPORT_SYMBOL_GPL(xas_pause);

/*
 * __xas_prev() - Find the previous entry in the XArray.
 * @xas: XArray operation state.
 *
 * Helper function for xas_prev() which handles all the complex cases
 * out of line.
 */
void *__xas_prev(struct xa_state *xas)
{
        void *entry;

        if (!xas_frozen(xas->xa_node))
                xas->xa_index--;
        if (!xas->xa_node)
                return set_bounds(xas);
        if (xas_not_node(xas->xa_node))
                return xas_load(xas);

        if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
                xas->xa_offset--;

        while (xas->xa_offset == 255) {
                xas->xa_offset = xas->xa_node->offset - 1;
                xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                if (!xas->xa_node)
                        return set_bounds(xas);
        }

        for (;;) {
                entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                if (!xa_is_node(entry))
                        return entry;

                xas->xa_node = xa_to_node(entry);
                xas_set_offset(xas);
        }
}
EXPORT_SYMBOL_GPL(__xas_prev);

/*
 * __xas_next() - Find the next entry in the XArray.
 * @xas: XArray operation state.
 *
 * Helper function for xas_next() which handles all the complex cases
 * out of line.
 */
void *__xas_next(struct xa_state *xas)
{
        void *entry;

        if (!xas_frozen(xas->xa_node))
                xas->xa_index++;
        if (!xas->xa_node)
                return set_bounds(xas);
        if (xas_not_node(xas->xa_node))
                return xas_load(xas);

        if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
                xas->xa_offset++;

        while (xas->xa_offset == XA_CHUNK_SIZE) {
                xas->xa_offset = xas->xa_node->offset + 1;
                xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                if (!xas->xa_node)
                        return set_bounds(xas);
        }

        for (;;) {
                entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                if (!xa_is_node(entry))
                        return entry;

                xas->xa_node = xa_to_node(entry);
                xas_set_offset(xas);
        }
}
EXPORT_SYMBOL_GPL(__xas_next);

/**
 * xas_find() - Find the next present entry in the XArray.
 * @xas: XArray operation state.
 * @max: Highest index to return.
 *
 * If the @xas has not yet been walked to an entry, return the entry
 * which has an index >= xas.xa_index.  If it has been walked, the entry
 * currently being pointed at has been processed, and so we move to the
 * next entry.
 *
 * If no entry is found and the array is smaller than @max, the iterator
 * is set to the smallest index not yet in the array.  This allows @xas
 * to be immediately passed to xas_store().
 *
 * Return: The entry, if found, otherwise %NULL.
 */
void *xas_find(struct xa_state *xas, unsigned long max)
{
        void *entry;

        if (xas_error(xas) || xas->xa_node == XAS_BOUNDS)
                return NULL;
        if (xas->xa_index > max)
                return set_bounds(xas);

        if (!xas->xa_node) {
                xas->xa_index = 1;
                return set_bounds(xas);
        } else if (xas->xa_node == XAS_RESTART) {
                entry = xas_load(xas);
                if (entry || xas_not_node(xas->xa_node))
                        return entry;
        } else if (!xas->xa_node->shift &&
                    xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
                xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
        }

        xas_next_offset(xas);

        while (xas->xa_node && (xas->xa_index <= max)) {
                if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
                        xas->xa_offset = xas->xa_node->offset + 1;
                        xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                        continue;
                }

                entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                if (xa_is_node(entry)) {
                        xas->xa_node = xa_to_node(entry);
                        xas->xa_offset = 0;
                        continue;
                }
                if (entry && !xa_is_sibling(entry))
                        return entry;

                xas_next_offset(xas);
        }

        if (!xas->xa_node)
                xas->xa_node = XAS_BOUNDS;
        return NULL;
}
EXPORT_SYMBOL_GPL(xas_find);

/**
 * xas_find_marked() - Find the next marked entry in the XArray.
 * @xas: XArray operation state.
 * @max: Highest index to return.
 * @mark: Mark number to search for.
 *
 * If the @xas has not yet been walked to an entry, return the marked entry
 * which has an index >= xas.xa_index.  If it has been walked, the entry
 * currently being pointed at has been processed, and so we return the
 * first marked entry with an index > xas.xa_index.
 *
 * If no marked entry is found and the array is smaller than @max, @xas is
 * set to the bounds state and xas->xa_index is set to the smallest index
 * not yet in the array.  This allows @xas to be immediately passed to
 * xas_store().
 *
 * If no entry is found before @max is reached, @xas is set to the restart
 * state.
 *
 * Return: The entry, if found, otherwise %NULL.
 */
void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
{
        bool advance = true;
        unsigned int offset;
        void *entry;

        if (xas_error(xas))
                return NULL;
        if (xas->xa_index > max)
                goto max;

        if (!xas->xa_node) {
                xas->xa_index = 1;
                goto out;
        } else if (xas_top(xas->xa_node)) {
                advance = false;
                entry = xa_head(xas->xa);
                xas->xa_node = NULL;
                if (xas->xa_index > max_index(entry))
                        goto out;
                if (!xa_is_node(entry)) {
                        if (xa_marked(xas->xa, mark))
                                return entry;
                        xas->xa_index = 1;
                        goto out;
                }
                xas->xa_node = xa_to_node(entry);
                xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
        }

        while (xas->xa_index <= max) {
                if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
                        xas->xa_offset = xas->xa_node->offset + 1;
                        xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                        if (!xas->xa_node)
                                break;
                        advance = false;
                        continue;
                }

                if (!advance) {
                        entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                        if (xa_is_sibling(entry)) {
                                xas->xa_offset = xa_to_sibling(entry);
                                xas_move_index(xas, xas->xa_offset);
                        }
                }

                offset = xas_find_chunk(xas, advance, mark);
                if (offset > xas->xa_offset) {
                        advance = false;
                        xas_move_index(xas, offset);
                        /* Mind the wrap */
                        if ((xas->xa_index - 1) >= max)
                                goto max;
                        xas->xa_offset = offset;
                        if (offset == XA_CHUNK_SIZE)
                                continue;
                }

                entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
                        continue;
                if (!xa_is_node(entry))
                        return entry;
                xas->xa_node = xa_to_node(entry);
                xas_set_offset(xas);
        }

out:
        if (xas->xa_index > max)
                goto max;
        return set_bounds(xas);
max:
        xas->xa_node = XAS_RESTART;
        return NULL;
}
EXPORT_SYMBOL_GPL(xas_find_marked);

/**
 * xas_find_conflict() - Find the next present entry in a range.
 * @xas: XArray operation state.
 *
 * The @xas describes both a range and a position within that range.
 *
 * Context: Any context.  Expects xa_lock to be held.
 * Return: The next entry in the range covered by @xas or %NULL.
 */
void *xas_find_conflict(struct xa_state *xas)
{
        void *curr;

        if (xas_error(xas))
                return NULL;

        if (!xas->xa_node)
                return NULL;

        if (xas_top(xas->xa_node)) {
                curr = xas_start(xas);
                if (!curr)
                        return NULL;
                while (xa_is_node(curr)) {
                        struct xa_node *node = xa_to_node(curr);
                        curr = xas_descend(xas, node);
                }
                if (curr)
                        return curr;
        }

        if (xas->xa_node->shift > xas->xa_shift)
                return NULL;

        for (;;) {
                if (xas->xa_node->shift == xas->xa_shift) {
                        if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
                                break;
                } else if (xas->xa_offset == XA_CHUNK_MASK) {
                        xas->xa_offset = xas->xa_node->offset;
                        xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
                        if (!xas->xa_node)
                                break;
                        continue;
                }
                curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
                if (xa_is_sibling(curr))
                        continue;
                while (xa_is_node(curr)) {
                        xas->xa_node = xa_to_node(curr);
                        xas->xa_offset = 0;
                        curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
                }
                if (curr)
                        return curr;
        }
        xas->xa_offset -= xas->xa_sibs;
        return NULL;
}
EXPORT_SYMBOL_GPL(xas_find_conflict);

/**
 * xa_load() - Load an entry from an XArray.
 * @xa: XArray.
 * @index: index into array.
 *
 * Context: Any context.  Takes and releases the RCU lock.
 * Return: The entry at @index in @xa.
 */
void *xa_load(struct xarray *xa, unsigned long index)
{
        XA_STATE(xas, xa, index);
        void *entry;

        rcu_read_lock();
        do {
                entry = xas_load(&xas);
                if (xa_is_zero(entry))
                        entry = NULL;
        } while (xas_retry(&xas, entry));
        rcu_read_unlock();

        return entry;
}
EXPORT_SYMBOL(xa_load);

static void *xas_result(struct xa_state *xas, void *curr)
{
        if (xa_is_zero(curr))
                return NULL;
        if (xas_error(xas))
                curr = xas->xa_node;
        return curr;
}

/**
 * __xa_erase() - Erase this entry from the XArray while locked.
 * @xa: XArray.
 * @index: Index into array.
 *
 * After this function returns, loading from @index will return %NULL.
 * If the index is part of a multi-index entry, all indices will be erased
 * and none of the entries will be part of a multi-index entry.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.
 * Return: The entry which used to be at this index.
 */
void *__xa_erase(struct xarray *xa, unsigned long index)
{
        XA_STATE(xas, xa, index);
        return xas_result(&xas, xas_store(&xas, NULL));
}
EXPORT_SYMBOL(__xa_erase);

/**
 * xa_erase() - Erase this entry from the XArray.
 * @xa: XArray.
 * @index: Index of entry.
 *
 * After this function returns, loading from @index will return %NULL.
 * If the index is part of a multi-index entry, all indices will be erased
 * and none of the entries will be part of a multi-index entry.
 *
 * Context: Any context.  Takes and releases the xa_lock.
 * Return: The entry which used to be at this index.
 */
void *xa_erase(struct xarray *xa, unsigned long index)
{
        void *entry;

        xa_lock(xa);
        entry = __xa_erase(xa, index);
        xa_unlock(xa);

        return entry;
}
EXPORT_SYMBOL(xa_erase);

/**
 * __xa_store() - Store this entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * You must already be holding the xa_lock when calling this function.
 * It will drop the lock if needed to allocate memory, and then reacquire
 * it afterwards.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.  May
 * release and reacquire xa_lock if @gfp flags permit.
 * Return: The old entry at this index or xa_err() if an error happened.
 */
void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
{
        XA_STATE(xas, xa, index);
        void *curr;

        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return XA_ERROR(-EINVAL);
        if (xa_track_free(xa) && !entry)
                entry = XA_ZERO_ENTRY;

        do {
                curr = xas_store(&xas, entry);
                if (xa_track_free(xa))
                        xas_clear_mark(&xas, XA_FREE_MARK);
        } while (__xas_nomem(&xas, gfp));

        return xas_result(&xas, curr);
}
EXPORT_SYMBOL(__xa_store);

/**
 * xa_store() - Store this entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * After this function returns, loads from this index will return @entry.
 * Storing into an existing multi-index entry updates the entry of every index.
 * The marks associated with @index are unaffected unless @entry is %NULL.
 *
 * Context: Any context.  Takes and releases the xa_lock.
 * May sleep if the @gfp flags permit.
 * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
 * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
 * failed.
 */
void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
{
        void *curr;

        xa_lock(xa);
        curr = __xa_store(xa, index, entry, gfp);
        xa_unlock(xa);

        return curr;
}
EXPORT_SYMBOL(xa_store);

/**
 * __xa_cmpxchg() - Store this entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @old: Old value to test against.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * You must already be holding the xa_lock when calling this function.
 * It will drop the lock if needed to allocate memory, and then reacquire
 * it afterwards.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.  May
 * release and reacquire xa_lock if @gfp flags permit.
 * Return: The old entry at this index or xa_err() if an error happened.
 */
void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
                        void *old, void *entry, gfp_t gfp)
{
        XA_STATE(xas, xa, index);
        void *curr;

        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return XA_ERROR(-EINVAL);

        do {
                curr = xas_load(&xas);
                if (curr == old) {
                        xas_store(&xas, entry);
                        if (xa_track_free(xa) && entry && !curr)
                                xas_clear_mark(&xas, XA_FREE_MARK);
                }
        } while (__xas_nomem(&xas, gfp));

        return xas_result(&xas, curr);
}
EXPORT_SYMBOL(__xa_cmpxchg);

/**
 * __xa_insert() - Store this entry in the XArray if no entry is present.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * Inserting a NULL entry will store a reserved entry (like xa_reserve())
 * if no entry is present.  Inserting will fail if a reserved entry is
 * present, even though loading from this index will return NULL.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.  May
 * release and reacquire xa_lock if @gfp flags permit.
 * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
 * -ENOMEM if memory could not be allocated.
 */
int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
{
        XA_STATE(xas, xa, index);
        void *curr;

        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return -EINVAL;
        if (!entry)
                entry = XA_ZERO_ENTRY;

        do {
                curr = xas_load(&xas);
                if (!curr) {
                        xas_store(&xas, entry);
                        if (xa_track_free(xa))
                                xas_clear_mark(&xas, XA_FREE_MARK);
                } else {
                        xas_set_err(&xas, -EBUSY);
                }
        } while (__xas_nomem(&xas, gfp));

        return xas_error(&xas);
}
EXPORT_SYMBOL(__xa_insert);

#ifdef CONFIG_XARRAY_MULTI
static void xas_set_range(struct xa_state *xas, unsigned long first,
                unsigned long last)
{
        unsigned int shift = 0;
        unsigned long sibs = last - first;
        unsigned int offset = XA_CHUNK_MASK;

        xas_set(xas, first);

        while ((first & XA_CHUNK_MASK) == 0) {
                if (sibs < XA_CHUNK_MASK)
                        break;
                if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
                        break;
                shift += XA_CHUNK_SHIFT;
                if (offset == XA_CHUNK_MASK)
                        offset = sibs & XA_CHUNK_MASK;
                sibs >>= XA_CHUNK_SHIFT;
                first >>= XA_CHUNK_SHIFT;
        }

        offset = first & XA_CHUNK_MASK;
        if (offset + sibs > XA_CHUNK_MASK)
                sibs = XA_CHUNK_MASK - offset;
        if ((((first + sibs + 1) << shift) - 1) > last)
                sibs -= 1;

        xas->xa_shift = shift;
        xas->xa_sibs = sibs;
}

/**
 * xa_store_range() - Store this entry at a range of indices in the XArray.
 * @xa: XArray.
 * @first: First index to affect.
 * @last: Last index to affect.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * After this function returns, loads from any index between @first and @last,
 * inclusive will return @entry.
 * Storing into an existing multi-index entry updates the entry of every index.
 * The marks associated with @index are unaffected unless @entry is %NULL.
 *
 * Context: Process context.  Takes and releases the xa_lock.  May sleep
 * if the @gfp flags permit.
 * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
 * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
 */
void *xa_store_range(struct xarray *xa, unsigned long first,
                unsigned long last, void *entry, gfp_t gfp)
{
        XA_STATE(xas, xa, 0);

        if (WARN_ON_ONCE(xa_is_internal(entry)))
                return XA_ERROR(-EINVAL);
        if (last < first)
                return XA_ERROR(-EINVAL);

        do {
                xas_lock(&xas);
                if (entry) {
                        unsigned int order = BITS_PER_LONG;
                        if (last + 1)
                                order = __ffs(last + 1);
                        xas_set_order(&xas, last, order);
                        xas_create(&xas, true);
                        if (xas_error(&xas))
                                goto unlock;
                }
                do {
                        xas_set_range(&xas, first, last);
                        xas_store(&xas, entry);
                        if (xas_error(&xas))
                                goto unlock;
                        first += xas_size(&xas);
                } while (first <= last);
unlock:
                xas_unlock(&xas);
        } while (xas_nomem(&xas, gfp));

        return xas_result(&xas, NULL);
}
EXPORT_SYMBOL(xa_store_range);

/**
 * xa_get_order() - Get the order of an entry.
 * @xa: XArray.
 * @index: Index of the entry.
 *
 * Return: A number between 0 and 63 indicating the order of the entry.
 */
int xa_get_order(struct xarray *xa, unsigned long index)
{
        XA_STATE(xas, xa, index);
        void *entry;
        int order = 0;

        rcu_read_lock();
        entry = xas_load(&xas);

        if (!entry)
                goto unlock;

        if (!xas.xa_node)
                goto unlock;

        for (;;) {
                unsigned int slot = xas.xa_offset + (1 << order);

                if (slot >= XA_CHUNK_SIZE)
                        break;
                if (!xa_is_sibling(xas.xa_node->slots[slot]))
                        break;
                order++;
        }

        order += xas.xa_node->shift;
unlock:
        rcu_read_unlock();

        return order;
}
EXPORT_SYMBOL(xa_get_order);
#endif /* CONFIG_XARRAY_MULTI */

/**
 * __xa_alloc() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @limit: Range for allocated ID.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Expects xa_lock to be held on entry.  May
 * release and reacquire xa_lock if @gfp flags permit.
 * Return: 0 on success, -ENOMEM if memory could not be allocated or
 * -EBUSY if there are no free entries in @limit.
 */
int __xa_alloc(struct xarray *xa, u32 *id, void *entry,
                struct xa_limit limit, gfp_t gfp)
{
        XA_STATE(xas, xa, 0);

        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return -EINVAL;
        if (WARN_ON_ONCE(!xa_track_free(xa)))
                return -EINVAL;

        if (!entry)
                entry = XA_ZERO_ENTRY;

        do {
                xas.xa_index = limit.min;
                xas_find_marked(&xas, limit.max, XA_FREE_MARK);
                if (xas.xa_node == XAS_RESTART)
                        xas_set_err(&xas, -EBUSY);
                else
                        *id = xas.xa_index;
                xas_store(&xas, entry);
                xas_clear_mark(&xas, XA_FREE_MARK);
        } while (__xas_nomem(&xas, gfp));

        return xas_error(&xas);
}
EXPORT_SYMBOL(__xa_alloc);

/**
 * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of allocated ID.
 * @next: Pointer to next ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 * The search for an empty entry will start at @next and will wrap
 * around if necessary.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Expects xa_lock to be held on entry.  May
 * release and reacquire xa_lock if @gfp flags permit.
 * Return: 0 if the allocation succeeded without wrapping.  1 if the
 * allocation succeeded after wrapping, -ENOMEM if memory could not be
 * allocated or -EBUSY if there are no free entries in @limit.
 */
int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
                struct xa_limit limit, u32 *next, gfp_t gfp)
{
        u32 min = limit.min;
        int ret;

        limit.min = max(min, *next);
        ret = __xa_alloc(xa, id, entry, limit, gfp);
        if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) {
                xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED;
                ret = 1;
        }

        if (ret < 0 && limit.min > min) {
                limit.min = min;
                ret = __xa_alloc(xa, id, entry, limit, gfp);
                if (ret == 0)
                        ret = 1;
        }

        if (ret >= 0) {
                *next = *id + 1;
                if (*next == 0)
                        xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED;
        }
        return ret;
}
EXPORT_SYMBOL(__xa_alloc_cyclic);

/**
 * __xa_set_mark() - Set this mark on this entry while locked.
 * @xa: XArray.
 * @index: Index of entry.
 * @mark: Mark number.
 *
 * Attempting to set a mark on a %NULL entry does not succeed.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.
 */
void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
{
        XA_STATE(xas, xa, index);
        void *entry = xas_load(&xas);

        if (entry)
                xas_set_mark(&xas, mark);
}
EXPORT_SYMBOL(__xa_set_mark);

/**
 * __xa_clear_mark() - Clear this mark on this entry while locked.
 * @xa: XArray.
 * @index: Index of entry.
 * @mark: Mark number.
 *
 * Context: Any context.  Expects xa_lock to be held on entry.
 */
void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
{
        XA_STATE(xas, xa, index);
        void *entry = xas_load(&xas);

        if (entry)
                xas_clear_mark(&xas, mark);
}
EXPORT_SYMBOL(__xa_clear_mark);

/**
 * xa_get_mark() - Inquire whether this mark is set on this entry.
 * @xa: XArray.
 * @index: Index of entry.
 * @mark: Mark number.
 *
 * This function uses the RCU read lock, so the result may be out of date
 * by the time it returns.  If you need the result to be stable, use a lock.
 *
 * Context: Any context.  Takes and releases the RCU lock.
 * Return: True if the entry at @index has this mark set, false if it doesn't.
 */
bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
{
        XA_STATE(xas, xa, index);
        void *entry;

        rcu_read_lock();
        entry = xas_start(&xas);
        while (xas_get_mark(&xas, mark)) {
                if (!xa_is_node(entry))
                        goto found;
                entry = xas_descend(&xas, xa_to_node(entry));
        }
        rcu_read_unlock();
        return false;
 found:
        rcu_read_unlock();
        return true;
}
EXPORT_SYMBOL(xa_get_mark);

/**
 * xa_set_mark() - Set this mark on this entry.
 * @xa: XArray.
 * @index: Index of entry.
 * @mark: Mark number.
 *
 * Attempting to set a mark on a %NULL entry does not succeed.
 *
 * Context: Process context.  Takes and releases the xa_lock.
 */
void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
{
        xa_lock(xa);
        __xa_set_mark(xa, index, mark);
        xa_unlock(xa);
}
EXPORT_SYMBOL(xa_set_mark);

/**
 * xa_clear_mark() - Clear this mark on this entry.
 * @xa: XArray.
 * @index: Index of entry.
 * @mark: Mark number.
 *
 * Clearing a mark always succeeds.
 *
 * Context: Process context.  Takes and releases the xa_lock.
 */
void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
{
        xa_lock(xa);
        __xa_clear_mark(xa, index, mark);
        xa_unlock(xa);
}
EXPORT_SYMBOL(xa_clear_mark);

/**
 * xa_find() - Search the XArray for an entry.
 * @xa: XArray.
 * @indexp: Pointer to an index.
 * @max: Maximum index to search to.
 * @filter: Selection criterion.
 *
 * Finds the entry in @xa which matches the @filter, and has the lowest
 * index that is at least @indexp and no more than @max.
 * If an entry is found, @indexp is updated to be the index of the entry.
 * This function is protected by the RCU read lock, so it may not find
 * entries which are being simultaneously added.  It will not return an
 * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 * Return: The entry, if found, otherwise %NULL.
 */
void *xa_find(struct xarray *xa, unsigned long *indexp,
                        unsigned long max, xa_mark_t filter)
{
        XA_STATE(xas, xa, *indexp);
        void *entry;

        rcu_read_lock();
        do {
                if ((__force unsigned int)filter < XA_MAX_MARKS)
                        entry = xas_find_marked(&xas, max, filter);
                else
                        entry = xas_find(&xas, max);
        } while (xas_retry(&xas, entry));
        rcu_read_unlock();

        if (entry)
                *indexp = xas.xa_index;
        return entry;
}
EXPORT_SYMBOL(xa_find);

static bool xas_sibling(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;
        unsigned long mask;

        if (!IS_ENABLED(CONFIG_XARRAY_MULTI) || !node)
                return false;
        mask = (XA_CHUNK_SIZE << node->shift) - 1;
        return (xas->xa_index & mask) >
                ((unsigned long)xas->xa_offset << node->shift);
}

/**
 * xa_find_after() - Search the XArray for a present entry.
 * @xa: XArray.
 * @indexp: Pointer to an index.
 * @max: Maximum index to search to.
 * @filter: Selection criterion.
 *
 * Finds the entry in @xa which matches the @filter and has the lowest
 * index that is above @indexp and no more than @max.
 * If an entry is found, @indexp is updated to be the index of the entry.
 * This function is protected by the RCU read lock, so it may miss entries
 * which are being simultaneously added.  It will not return an
 * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 * Return: The pointer, if found, otherwise %NULL.
 */
void *xa_find_after(struct xarray *xa, unsigned long *indexp,
                        unsigned long max, xa_mark_t filter)
{
        XA_STATE(xas, xa, *indexp + 1);
        void *entry;

        if (xas.xa_index == 0)
                return NULL;

        rcu_read_lock();
        for (;;) {
                if ((__force unsigned int)filter < XA_MAX_MARKS)
                        entry = xas_find_marked(&xas, max, filter);
                else
                        entry = xas_find(&xas, max);

                if (xas_invalid(&xas))
                        break;
                if (xas_sibling(&xas))
                        continue;
                if (!xas_retry(&xas, entry))
                        break;
        }
        rcu_read_unlock();

        if (entry)
                *indexp = xas.xa_index;
        return entry;
}
EXPORT_SYMBOL(xa_find_after);

static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
                        unsigned long max, unsigned int n)
{
        void *entry;
        unsigned int i = 0;

        rcu_read_lock();
        xas_for_each(xas, entry, max) {
                if (xas_retry(xas, entry))
                        continue;
                dst[i++] = entry;
                if (i == n)
                        break;
        }
        rcu_read_unlock();

        return i;
}

static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
                        unsigned long max, unsigned int n, xa_mark_t mark)
{
        void *entry;
        unsigned int i = 0;

        rcu_read_lock();
        xas_for_each_marked(xas, entry, max, mark) {
                if (xas_retry(xas, entry))
                        continue;
                dst[i++] = entry;
                if (i == n)
                        break;
        }
        rcu_read_unlock();

        return i;
}

/**
 * xa_extract() - Copy selected entries from the XArray into a normal array.
 * @xa: The source XArray to copy from.
 * @dst: The buffer to copy entries into.
 * @start: The first index in the XArray eligible to be selected.
 * @max: The last index in the XArray eligible to be selected.
 * @n: The maximum number of entries to copy.
 * @filter: Selection criterion.
 *
 * Copies up to @n entries that match @filter from the XArray.  The
 * copied entries will have indices between @start and @max, inclusive.
 *
 * The @filter may be an XArray mark value, in which case entries which are
 * marked with that mark will be copied.  It may also be %XA_PRESENT, in
 * which case all entries which are not %NULL will be copied.
 *
 * The entries returned may not represent a snapshot of the XArray at a
 * moment in time.  For example, if another thread stores to index 5, then
 * index 10, calling xa_extract() may return the old contents of index 5
 * and the new contents of index 10.  Indices not modified while this
 * function is running will not be skipped.
 *
 * If you need stronger guarantees, holding the xa_lock across calls to this
 * function will prevent concurrent modification.
 *
 * Context: Any context.  Takes and releases the RCU lock.
 * Return: The number of entries copied.
 */
unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
                        unsigned long max, unsigned int n, xa_mark_t filter)
{
        XA_STATE(xas, xa, start);

        if (!n)
                return 0;

        if ((__force unsigned int)filter < XA_MAX_MARKS)
                return xas_extract_marked(&xas, dst, max, n, filter);
        return xas_extract_present(&xas, dst, max, n);
}
EXPORT_SYMBOL(xa_extract);

/**
 * xa_delete_node() - Private interface for workingset code.
 * @node: Node to be removed from the tree.
 * @update: Function to call to update ancestor nodes.
 *
 * Context: xa_lock must be held on entry and will not be released.
 */
void xa_delete_node(struct xa_node *node, xa_update_node_t update)
{
        struct xa_state xas = {
                .xa = node->array,
                .xa_index = (unsigned long)node->offset <<
                                (node->shift + XA_CHUNK_SHIFT),
                .xa_shift = node->shift + XA_CHUNK_SHIFT,
                .xa_offset = node->offset,
                .xa_node = xa_parent_locked(node->array, node),
                .xa_update = update,
        };

        xas_store(&xas, NULL);
}
EXPORT_SYMBOL_GPL(xa_delete_node);        /* For the benefit of the test suite */

/**
 * xa_destroy() - Free all internal data structures.
 * @xa: XArray.
 *
 * After calling this function, the XArray is empty and has freed all memory
 * allocated for its internal data structures.  You are responsible for
 * freeing the objects referenced by the XArray.
 *
 * Context: Any context.  Takes and releases the xa_lock, interrupt-safe.
 */
void xa_destroy(struct xarray *xa)
{
        XA_STATE(xas, xa, 0);
        unsigned long flags;
        void *entry;

        xas.xa_node = NULL;
        xas_lock_irqsave(&xas, flags);
        entry = xa_head_locked(xa);
        RCU_INIT_POINTER(xa->xa_head, NULL);
        xas_init_marks(&xas);
        if (xa_zero_busy(xa))
                xa_mark_clear(xa, XA_FREE_MARK);
        /* lockdep checks we're still holding the lock in xas_free_nodes() */
        if (xa_is_node(entry))
                xas_free_nodes(&xas, xa_to_node(entry));
        xas_unlock_irqrestore(&xas, flags);
}
EXPORT_SYMBOL(xa_destroy);

#ifdef XA_DEBUG
void xa_dump_node(const struct xa_node *node)
{
        unsigned i, j;

        if (!node)
                return;
        if ((unsigned long)node & 3) {
                pr_cont("node %px\n", node);
                return;
        }

        pr_cont("node %px %s %d parent %px shift %d count %d values %d "
                "array %px list %px %px marks",
                node, node->parent ? "offset" : "max", node->offset,
                node->parent, node->shift, node->count, node->nr_values,
                node->array, node->private_list.prev, node->private_list.next);
        for (i = 0; i < XA_MAX_MARKS; i++)
                for (j = 0; j < XA_MARK_LONGS; j++)
                        pr_cont(" %lx", node->marks[i][j]);
        pr_cont("\n");
}

void xa_dump_index(unsigned long index, unsigned int shift)
{
        if (!shift)
                pr_info("%lu: ", index);
        else if (shift >= BITS_PER_LONG)
                pr_info("0-%lu: ", ~0UL);
        else
                pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
}

void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
{
        if (!entry)
                return;

        xa_dump_index(index, shift);

        if (xa_is_node(entry)) {
                if (shift == 0) {
                        pr_cont("%px\n", entry);
                } else {
                        unsigned long i;
                        struct xa_node *node = xa_to_node(entry);
                        xa_dump_node(node);
                        for (i = 0; i < XA_CHUNK_SIZE; i++)
                                xa_dump_entry(node->slots[i],
                                      index + (i << node->shift), node->shift);
                }
        } else if (xa_is_value(entry))
                pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
                                                xa_to_value(entry), entry);
        else if (!xa_is_internal(entry))
                pr_cont("%px\n", entry);
        else if (xa_is_retry(entry))
                pr_cont("retry (%ld)\n", xa_to_internal(entry));
        else if (xa_is_sibling(entry))
                pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
        else if (xa_is_zero(entry))
                pr_cont("zero (%ld)\n", xa_to_internal(entry));
        else
                pr_cont("UNKNOWN ENTRY (%px)\n", entry);
}

void xa_dump(const struct xarray *xa)
{
        void *entry = xa->xa_head;
        unsigned int shift = 0;

        pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
                        xa->xa_flags, xa_marked(xa, XA_MARK_0),
                        xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
        if (xa_is_node(entry))
                shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
        xa_dump_entry(entry, 0, shift);
}
#endif



















































































































































































































































































































   12 

























  118 













  133 























   16 

















   61 







   53 














   36 









   39 









   53 































































    4 




   53 


















  182 


























   14 












































   61 




    4 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_DCACHE_H
#define __LINUX_DCACHE_H

#include <linux/atomic.h>
#include <linux/list.h>
#include <linux/math.h>
#include <linux/rculist.h>
#include <linux/rculist_bl.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
#include <linux/cache.h>
#include <linux/rcupdate.h>
#include <linux/lockref.h>
#include <linux/stringhash.h>
#include <linux/wait.h>

struct path;
struct file;
struct vfsmount;

/*
 * linux/include/linux/dcache.h
 *
 * Dirent cache data structures
 *
 * (C) Copyright 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

#define IS_ROOT(x) ((x) == (x)->d_parent)

/* The hash is always the low bits of hash_len */
#ifdef __LITTLE_ENDIAN
 #define HASH_LEN_DECLARE u32 hash; u32 len
 #define bytemask_from_count(cnt)        (~(~0ul << (cnt)*8))
#else
 #define HASH_LEN_DECLARE u32 len; u32 hash
 #define bytemask_from_count(cnt)        (~(~0ul >> (cnt)*8))
#endif

/*
 * "quick string" -- eases parameter passing, but more importantly
 * saves "metadata" about the string (ie length and the hash).
 *
 * hash comes first so it snuggles against d_parent in the
 * dentry.
 */
struct qstr {
        union {
                struct {
                        HASH_LEN_DECLARE;
                };
                u64 hash_len;
        };
        const unsigned char *name;
};

#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }

extern const struct qstr empty_name;
extern const struct qstr slash_name;
extern const struct qstr dotdot_name;

/*
 * Try to keep struct dentry aligned on 64 byte cachelines (this will
 * give reasonable cacheline footprint with larger lines without the
 * large memory footprint increase).
 */
#ifdef CONFIG_64BIT
# define DNAME_INLINE_LEN 40 /* 192 bytes */
#else
# ifdef CONFIG_SMP
#  define DNAME_INLINE_LEN 40 /* 128 bytes */
# else
#  define DNAME_INLINE_LEN 44 /* 128 bytes */
# endif
#endif

#define d_lock        d_lockref.lock

struct dentry {
        /* RCU lookup touched fields */
        unsigned int d_flags;                /* protected by d_lock */
        seqcount_spinlock_t d_seq;        /* per dentry seqlock */
        struct hlist_bl_node d_hash;        /* lookup hash list */
        struct dentry *d_parent;        /* parent directory */
        struct qstr d_name;
        struct inode *d_inode;                /* Where the name belongs to - NULL is
                                         * negative */
        unsigned char d_iname[DNAME_INLINE_LEN];        /* small names */

        /* Ref lookup also touches following */
        struct lockref d_lockref;        /* per-dentry lock and refcount */
        const struct dentry_operations *d_op;
        struct super_block *d_sb;        /* The root of the dentry tree */
        unsigned long d_time;                /* used by d_revalidate */
        void *d_fsdata;                        /* fs-specific data */

        union {
                struct list_head d_lru;                /* LRU list */
                wait_queue_head_t *d_wait;        /* in-lookup ones only */
        };
        struct hlist_node d_sib;        /* child of parent list */
        struct hlist_head d_children;        /* our children */
        /*
         * d_alias and d_rcu can share memory
         */
        union {
                struct hlist_node d_alias;        /* inode alias list */
                struct hlist_bl_node d_in_lookup_hash;        /* only for in-lookup ones */
                 struct rcu_head d_rcu;
        } d_u;
};

/*
 * dentry->d_lock spinlock nesting subclasses:
 *
 * 0: normal
 * 1: nested
 */
enum dentry_d_lock_class
{
        DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */
        DENTRY_D_LOCK_NESTED
};

enum d_real_type {
        D_REAL_DATA,
        D_REAL_METADATA,
};

struct dentry_operations {
        int (*d_revalidate)(struct dentry *, unsigned int);
        int (*d_weak_revalidate)(struct dentry *, unsigned int);
        int (*d_hash)(const struct dentry *, struct qstr *);
        int (*d_compare)(const struct dentry *,
                        unsigned int, const char *, const struct qstr *);
        int (*d_delete)(const struct dentry *);
        int (*d_init)(struct dentry *);
        void (*d_release)(struct dentry *);
        void (*d_prune)(struct dentry *);
        void (*d_iput)(struct dentry *, struct inode *);
        char *(*d_dname)(struct dentry *, char *, int);
        struct vfsmount *(*d_automount)(struct path *);
        int (*d_manage)(const struct path *, bool);
        struct dentry *(*d_real)(struct dentry *, enum d_real_type type);
} ____cacheline_aligned;

/*
 * Locking rules for dentry_operations callbacks are to be found in
 * Documentation/filesystems/locking.rst. Keep it updated!
 *
 * FUrther descriptions are found in Documentation/filesystems/vfs.rst.
 * Keep it updated too!
 */

/* d_flags entries */
#define DCACHE_OP_HASH                        BIT(0)
#define DCACHE_OP_COMPARE                BIT(1)
#define DCACHE_OP_REVALIDATE                BIT(2)
#define DCACHE_OP_DELETE                BIT(3)
#define DCACHE_OP_PRUNE                        BIT(4)

#define        DCACHE_DISCONNECTED                BIT(5)
     /* This dentry is possibly not currently connected to the dcache tree, in
      * which case its parent will either be itself, or will have this flag as
      * well.  nfsd will not use a dentry with this bit set, but will first
      * endeavour to clear the bit either by discovering that it is connected,
      * or by performing lookup operations.   Any filesystem which supports
      * nfsd_operations MUST have a lookup function which, if it finds a
      * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
      * dentry into place and return that dentry rather than the passed one,
      * typically using d_splice_alias. */

#define DCACHE_REFERENCED                BIT(6) /* Recently used, don't discard. */

#define DCACHE_DONTCACHE                BIT(7) /* Purge from memory on final dput() */

#define DCACHE_CANT_MOUNT                BIT(8)
#define DCACHE_GENOCIDE                        BIT(9)
#define DCACHE_SHRINK_LIST                BIT(10)

#define DCACHE_OP_WEAK_REVALIDATE        BIT(11)

#define DCACHE_NFSFS_RENAMED                BIT(12)
     /* this dentry has been "silly renamed" and has to be deleted on the last
      * dput() */
#define DCACHE_FSNOTIFY_PARENT_WATCHED        BIT(14)
     /* Parent inode is watched by some fsnotify listener */

#define DCACHE_DENTRY_KILLED                BIT(15)

#define DCACHE_MOUNTED                        BIT(16) /* is a mountpoint */
#define DCACHE_NEED_AUTOMOUNT                BIT(17) /* handle automount on this dir */
#define DCACHE_MANAGE_TRANSIT                BIT(18) /* manage transit from this dirent */
#define DCACHE_MANAGED_DENTRY \
        (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)

#define DCACHE_LRU_LIST                        BIT(19)

#define DCACHE_ENTRY_TYPE                (7 << 20) /* bits 20..22 are for storing type: */
#define DCACHE_MISS_TYPE                (0 << 20) /* Negative dentry */
#define DCACHE_WHITEOUT_TYPE                (1 << 20) /* Whiteout dentry (stop pathwalk) */
#define DCACHE_DIRECTORY_TYPE                (2 << 20) /* Normal directory */
#define DCACHE_AUTODIR_TYPE                (3 << 20) /* Lookupless directory (presumed automount) */
#define DCACHE_REGULAR_TYPE                (4 << 20) /* Regular file type */
#define DCACHE_SPECIAL_TYPE                (5 << 20) /* Other file type */
#define DCACHE_SYMLINK_TYPE                (6 << 20) /* Symlink */

#define DCACHE_NOKEY_NAME                BIT(25) /* Encrypted name encoded without key */
#define DCACHE_OP_REAL                        BIT(26)

#define DCACHE_PAR_LOOKUP                BIT(28) /* being looked up (with parent locked shared) */
#define DCACHE_DENTRY_CURSOR                BIT(29)
#define DCACHE_NORCU                        BIT(30) /* No RCU delay for freeing */

extern seqlock_t rename_lock;

/*
 * These are the low-level FS interfaces to the dcache..
 */
extern void d_instantiate(struct dentry *, struct inode *);
extern void d_instantiate_new(struct dentry *, struct inode *);
extern void __d_drop(struct dentry *dentry);
extern void d_drop(struct dentry *dentry);
extern void d_delete(struct dentry *);
extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);

/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
                                        wait_queue_head_t *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
                        const struct qstr *name);
extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
extern struct dentry *d_find_any_alias(struct inode *inode);
extern struct dentry * d_obtain_alias(struct inode *);
extern struct dentry * d_obtain_root(struct inode *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
extern void d_invalidate(struct dentry *);

/* only used at mount-time */
extern struct dentry * d_make_root(struct inode *);

extern void d_mark_tmpfile(struct file *, struct inode *);
extern void d_tmpfile(struct file *, struct inode *);

extern struct dentry *d_find_alias(struct inode *);
extern void d_prune_aliases(struct inode *);

extern struct dentry *d_find_alias_rcu(struct inode *);

/* test whether we have any submounts in a subdir tree */
extern int path_has_submounts(const struct path *);

/*
 * This adds the entry to the hash queues.
 */
extern void d_rehash(struct dentry *);
 
extern void d_add(struct dentry *, struct inode *);

/* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *);
extern void d_exchange(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);

extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);

static inline unsigned d_count(const struct dentry *dentry)
{
        return dentry->d_lockref.count;
}

/*
 * helper function for dentry_operations.d_dname() members
 */
extern __printf(3, 4)
char *dynamic_dname(char *, int, const char *, ...);

extern char *__d_path(const struct path *, const struct path *, char *, int);
extern char *d_absolute_path(const struct path *, char *, int);
extern char *d_path(const struct path *, char *, int);
extern char *dentry_path_raw(const struct dentry *, char *, int);
extern char *dentry_path(const struct dentry *, char *, int);

/* Allocation counts.. */

/**
 * dget_dlock -        get a reference to a dentry
 * @dentry: dentry to get a reference to
 *
 * Given a live dentry, increment the reference count and return the dentry.
 * Caller must hold @dentry->d_lock.  Making sure that dentry is alive is
 * caller's resonsibility.  There are many conditions sufficient to guarantee
 * that; e.g. anything with non-negative refcount is alive, so's anything
 * hashed, anything positive, anyone's parent, etc.
 */
static inline struct dentry *dget_dlock(struct dentry *dentry)
{
        dentry->d_lockref.count++;
        return dentry;
}


/**
 * dget - get a reference to a dentry
 * @dentry: dentry to get a reference to
 *
 * Given a dentry or %NULL pointer increment the reference count
 * if appropriate and return the dentry.  A dentry will not be
 * destroyed when it has references.  Conversely, a dentry with
 * no references can disappear for any number of reasons, starting
 * with memory pressure.  In other words, that primitive is
 * used to clone an existing reference; using it on something with
 * zero refcount is a bug.
 *
 * NOTE: it will spin if @dentry->d_lock is held.  From the deadlock
 * avoidance point of view it is equivalent to spin_lock()/increment
 * refcount/spin_unlock(), so calling it under @dentry->d_lock is
 * always a bug; so's calling it under ->d_lock on any of its descendents.
 *
 */
static inline struct dentry *dget(struct dentry *dentry)
{
        if (dentry)
                lockref_get(&dentry->d_lockref);
        return dentry;
}

extern struct dentry *dget_parent(struct dentry *dentry);

/**
 * d_unhashed - is dentry hashed
 * @dentry: entry to check
 *
 * Returns true if the dentry passed is not currently hashed.
 */
static inline int d_unhashed(const struct dentry *dentry)
{
        return hlist_bl_unhashed(&dentry->d_hash);
}

static inline int d_unlinked(const struct dentry *dentry)
{
        return d_unhashed(dentry) && !IS_ROOT(dentry);
}

static inline int cant_mount(const struct dentry *dentry)
{
        return (dentry->d_flags & DCACHE_CANT_MOUNT);
}

static inline void dont_mount(struct dentry *dentry)
{
        spin_lock(&dentry->d_lock);
        dentry->d_flags |= DCACHE_CANT_MOUNT;
        spin_unlock(&dentry->d_lock);
}

extern void __d_lookup_unhash_wake(struct dentry *dentry);

static inline int d_in_lookup(const struct dentry *dentry)
{
        return dentry->d_flags & DCACHE_PAR_LOOKUP;
}

static inline void d_lookup_done(struct dentry *dentry)
{
        if (unlikely(d_in_lookup(dentry)))
                __d_lookup_unhash_wake(dentry);
}

extern void dput(struct dentry *);

static inline bool d_managed(const struct dentry *dentry)
{
        return dentry->d_flags & DCACHE_MANAGED_DENTRY;
}

static inline bool d_mountpoint(const struct dentry *dentry)
{
        return dentry->d_flags & DCACHE_MOUNTED;
}

/*
 * Directory cache entry type accessor functions.
 */
static inline unsigned __d_entry_type(const struct dentry *dentry)
{
        return dentry->d_flags & DCACHE_ENTRY_TYPE;
}

static inline bool d_is_miss(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_MISS_TYPE;
}

static inline bool d_is_whiteout(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_WHITEOUT_TYPE;
}

static inline bool d_can_lookup(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_DIRECTORY_TYPE;
}

static inline bool d_is_autodir(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_AUTODIR_TYPE;
}

static inline bool d_is_dir(const struct dentry *dentry)
{
        return d_can_lookup(dentry) || d_is_autodir(dentry);
}

static inline bool d_is_symlink(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_SYMLINK_TYPE;
}

static inline bool d_is_reg(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_REGULAR_TYPE;
}

static inline bool d_is_special(const struct dentry *dentry)
{
        return __d_entry_type(dentry) == DCACHE_SPECIAL_TYPE;
}

static inline bool d_is_file(const struct dentry *dentry)
{
        return d_is_reg(dentry) || d_is_special(dentry);
}

static inline bool d_is_negative(const struct dentry *dentry)
{
        // TODO: check d_is_whiteout(dentry) also.
        return d_is_miss(dentry);
}

static inline bool d_flags_negative(unsigned flags)
{
        return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE;
}

static inline bool d_is_positive(const struct dentry *dentry)
{
        return !d_is_negative(dentry);
}

/**
 * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs)
 * @dentry: The dentry in question
 *
 * Returns true if the dentry represents either an absent name or a name that
 * doesn't map to an inode (ie. ->d_inode is NULL).  The dentry could represent
 * a true miss, a whiteout that isn't represented by a 0,0 chardev or a
 * fallthrough marker in an opaque directory.
 *
 * Note!  (1) This should be used *only* by a filesystem to examine its own
 * dentries.  It should not be used to look at some other filesystem's
 * dentries.  (2) It should also be used in combination with d_inode() to get
 * the inode.  (3) The dentry may have something attached to ->d_lower and the
 * type field of the flags may be set to something other than miss or whiteout.
 */
static inline bool d_really_is_negative(const struct dentry *dentry)
{
        return dentry->d_inode == NULL;
}

/**
 * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs)
 * @dentry: The dentry in question
 *
 * Returns true if the dentry represents a name that maps to an inode
 * (ie. ->d_inode is not NULL).  The dentry might still represent a whiteout if
 * that is represented on medium as a 0,0 chardev.
 *
 * Note!  (1) This should be used *only* by a filesystem to examine its own
 * dentries.  It should not be used to look at some other filesystem's
 * dentries.  (2) It should also be used in combination with d_inode() to get
 * the inode.
 */
static inline bool d_really_is_positive(const struct dentry *dentry)
{
        return dentry->d_inode != NULL;
}

static inline int simple_positive(const struct dentry *dentry)
{
        return d_really_is_positive(dentry) && !d_unhashed(dentry);
}

extern int sysctl_vfs_cache_pressure;

static inline unsigned long vfs_pressure_ratio(unsigned long val)
{
        return mult_frac(val, sysctl_vfs_cache_pressure, 100);
}

/**
 * d_inode - Get the actual inode of this dentry
 * @dentry: The dentry to query
 *
 * This is the helper normal filesystems should use to get at their own inodes
 * in their own dentries and ignore the layering superimposed upon them.
 */
static inline struct inode *d_inode(const struct dentry *dentry)
{
        return dentry->d_inode;
}

/**
 * d_inode_rcu - Get the actual inode of this dentry with READ_ONCE()
 * @dentry: The dentry to query
 *
 * This is the helper normal filesystems should use to get at their own inodes
 * in their own dentries and ignore the layering superimposed upon them.
 */
static inline struct inode *d_inode_rcu(const struct dentry *dentry)
{
        return READ_ONCE(dentry->d_inode);
}

/**
 * d_backing_inode - Get upper or lower inode we should be using
 * @upper: The upper layer
 *
 * This is the helper that should be used to get at the inode that will be used
 * if this dentry were to be opened as a file.  The inode may be on the upper
 * dentry or it may be on a lower dentry pinned by the upper.
 *
 * Normal filesystems should not use this to access their own inodes.
 */
static inline struct inode *d_backing_inode(const struct dentry *upper)
{
        struct inode *inode = upper->d_inode;

        return inode;
}

/**
 * d_real - Return the real dentry
 * @dentry: the dentry to query
 * @type: the type of real dentry (data or metadata)
 *
 * If dentry is on a union/overlay, then return the underlying, real dentry.
 * Otherwise return the dentry itself.
 *
 * See also: Documentation/filesystems/vfs.rst
 */
static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type)
{
        if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
                return dentry->d_op->d_real(dentry, type);
        else
                return dentry;
}

/**
 * d_real_inode - Return the real inode hosting the data
 * @dentry: The dentry to query
 *
 * If dentry is on a union/overlay, then return the underlying, real inode.
 * Otherwise return d_inode().
 */
static inline struct inode *d_real_inode(const struct dentry *dentry)
{
        /* This usage of d_real() results in const dentry */
        return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA));
}

struct name_snapshot {
        struct qstr name;
        unsigned char inline_name[DNAME_INLINE_LEN];
};
void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
void release_dentry_name_snapshot(struct name_snapshot *);

static inline struct dentry *d_first_child(const struct dentry *dentry)
{
        return hlist_entry_safe(dentry->d_children.first, struct dentry, d_sib);
}

static inline struct dentry *d_next_sibling(const struct dentry *dentry)
{
        return hlist_entry_safe(dentry->d_sib.next, struct dentry, d_sib);
}

#endif        /* __LINUX_DCACHE_H */
















































































































































































































   32 


















   27 











































































































   68 
   27 

   68 








































































































































































































































   68 





   68 
   68 

































































































































































































































    4 




















    4 
    4 






    4 















































































    4 
    4 




    4 
    4 


    4 
    4 

    4 




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/kernel/fork.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 *  'fork.c' contains the help-routines for the 'fork' system call
 * (see also entry.S and others).
 * Fork is rather simple, once you get the hang of it, but the memory
 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
 */

#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/user.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/stat.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/seq_file.h>
#include <linux/rtmutex.h>
#include <linux/init.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/kmsan.h>
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/cgroup.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/seccomp.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/jiffies.h>
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/kthread.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/audit.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
#include <linux/proc_fs.h>
#include <linux/profile.h>
#include <linux/rmap.h>
#include <linux/ksm.h>
#include <linux/acct.h>
#include <linux/userfaultfd_k.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/tty.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
#include <linux/oom.h>
#include <linux/khugepaged.h>
#include <linux/signalfd.h>
#include <linux/uprobes.h>
#include <linux/aio.h>
#include <linux/compiler.h>
#include <linux/sysctl.h>
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <linux/stackprotector.h>
#include <linux/user_events.h>
#include <linux/iommu.h>
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>

#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

#include <trace/events/sched.h>

#define CREATE_TRACE_POINTS
#include <trace/events/task.h>

/*
 * Minimum number of threads to boot the kernel
 */
#define MIN_THREADS 20

/*
 * Maximum number of threads
 */
#define MAX_THREADS FUTEX_TID_MASK

/*
 * Protected counters by write_lock_irq(&tasklist_lock)
 */
unsigned long total_forks;        /* Handle normal Linux uptimes. */
int nr_threads;                        /* The idle threads do not count.. */

static int max_threads;                /* tunable limit on nr_threads */

#define NAMED_ARRAY_INDEX(x)        [x] = __stringify(x)

static const char * const resident_page_types[] = {
        NAMED_ARRAY_INDEX(MM_FILEPAGES),
        NAMED_ARRAY_INDEX(MM_ANONPAGES),
        NAMED_ARRAY_INDEX(MM_SWAPENTS),
        NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
};

DEFINE_PER_CPU(unsigned long, process_counts) = 0;

__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */

#ifdef CONFIG_PROVE_RCU
int lockdep_tasklist_lock_is_held(void)
{
        return lockdep_is_held(&tasklist_lock);
}
EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
#endif /* #ifdef CONFIG_PROVE_RCU */

int nr_processes(void)
{
        int cpu;
        int total = 0;

        for_each_possible_cpu(cpu)
                total += per_cpu(process_counts, cpu);

        return total;
}

void __weak arch_release_task_struct(struct task_struct *tsk)
{
}

static struct kmem_cache *task_struct_cachep;

static inline struct task_struct *alloc_task_struct_node(int node)
{
        return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
}

static inline void free_task_struct(struct task_struct *tsk)
{
        kmem_cache_free(task_struct_cachep, tsk);
}

/*
 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
 * kmemcache based allocator.
 */
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)

#  ifdef CONFIG_VMAP_STACK
/*
 * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
 * flush.  Try to minimize the number of calls by caching stacks.
 */
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);

struct vm_stack {
        struct rcu_head rcu;
        struct vm_struct *stack_vm_area;
};

static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
{
        unsigned int i;

        for (i = 0; i < NR_CACHED_STACKS; i++) {
                if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
                        continue;
                return true;
        }
        return false;
}

static void thread_stack_free_rcu(struct rcu_head *rh)
{
        struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);

        if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
                return;

        vfree(vm_stack);
}

static void thread_stack_delayed_free(struct task_struct *tsk)
{
        struct vm_stack *vm_stack = tsk->stack;

        vm_stack->stack_vm_area = tsk->stack_vm_area;
        call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
}

static int free_vm_stack_cache(unsigned int cpu)
{
        struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
        int i;

        for (i = 0; i < NR_CACHED_STACKS; i++) {
                struct vm_struct *vm_stack = cached_vm_stacks[i];

                if (!vm_stack)
                        continue;

                vfree(vm_stack->addr);
                cached_vm_stacks[i] = NULL;
        }

        return 0;
}

static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
        int i;
        int ret;
        int nr_charged = 0;

        BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);

        for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
                ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
                if (ret)
                        goto err;
                nr_charged++;
        }
        return 0;
err:
        for (i = 0; i < nr_charged; i++)
                memcg_kmem_uncharge_page(vm->pages[i], 0);
        return ret;
}

static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
        struct vm_struct *vm;
        void *stack;
        int i;

        for (i = 0; i < NR_CACHED_STACKS; i++) {
                struct vm_struct *s;

                s = this_cpu_xchg(cached_stacks[i], NULL);

                if (!s)
                        continue;

                /* Reset stack metadata. */
                kasan_unpoison_range(s->addr, THREAD_SIZE);

                stack = kasan_reset_tag(s->addr);

                /* Clear stale pointers from reused stack. */
                memset(stack, 0, THREAD_SIZE);

                if (memcg_charge_kernel_stack(s)) {
                        vfree(s->addr);
                        return -ENOMEM;
                }

                tsk->stack_vm_area = s;
                tsk->stack = stack;
                return 0;
        }

        /*
         * Allocated stacks are cached and later reused by new threads,
         * so memcg accounting is performed manually on assigning/releasing
         * stacks to tasks. Drop __GFP_ACCOUNT.
         */
        stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
                                     VMALLOC_START, VMALLOC_END,
                                     THREADINFO_GFP & ~__GFP_ACCOUNT,
                                     PAGE_KERNEL,
                                     0, node, __builtin_return_address(0));
        if (!stack)
                return -ENOMEM;

        vm = find_vm_area(stack);
        if (memcg_charge_kernel_stack(vm)) {
                vfree(stack);
                return -ENOMEM;
        }
        /*
         * We can't call find_vm_area() in interrupt context, and
         * free_thread_stack() can be called in interrupt context,
         * so cache the vm_struct.
         */
        tsk->stack_vm_area = vm;
        stack = kasan_reset_tag(stack);
        tsk->stack = stack;
        return 0;
}

static void free_thread_stack(struct task_struct *tsk)
{
        if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
                thread_stack_delayed_free(tsk);

        tsk->stack = NULL;
        tsk->stack_vm_area = NULL;
}

#  else /* !CONFIG_VMAP_STACK */

static void thread_stack_free_rcu(struct rcu_head *rh)
{
        __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
}

static void thread_stack_delayed_free(struct task_struct *tsk)
{
        struct rcu_head *rh = tsk->stack;

        call_rcu(rh, thread_stack_free_rcu);
}

static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
        struct page *page = alloc_pages_node(node, THREADINFO_GFP,
                                             THREAD_SIZE_ORDER);

        if (likely(page)) {
                tsk->stack = kasan_reset_tag(page_address(page));
                return 0;
        }
        return -ENOMEM;
}

static void free_thread_stack(struct task_struct *tsk)
{
        thread_stack_delayed_free(tsk);
        tsk->stack = NULL;
}

#  endif /* CONFIG_VMAP_STACK */
# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */

static struct kmem_cache *thread_stack_cache;

static void thread_stack_free_rcu(struct rcu_head *rh)
{
        kmem_cache_free(thread_stack_cache, rh);
}

static void thread_stack_delayed_free(struct task_struct *tsk)
{
        struct rcu_head *rh = tsk->stack;

        call_rcu(rh, thread_stack_free_rcu);
}

static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
        unsigned long *stack;
        stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
        stack = kasan_reset_tag(stack);
        tsk->stack = stack;
        return stack ? 0 : -ENOMEM;
}

static void free_thread_stack(struct task_struct *tsk)
{
        thread_stack_delayed_free(tsk);
        tsk->stack = NULL;
}

void thread_stack_cache_init(void)
{
        thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
                                        THREAD_SIZE, THREAD_SIZE, 0, 0,
                                        THREAD_SIZE, NULL);
        BUG_ON(thread_stack_cache == NULL);
}

# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */

/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;

/* SLAB cache for sighand_struct structures (tsk->sighand) */
struct kmem_cache *sighand_cachep;

/* SLAB cache for files_struct structures (tsk->files) */
struct kmem_cache *files_cachep;

/* SLAB cache for fs_struct structures (tsk->fs) */
struct kmem_cache *fs_cachep;

/* SLAB cache for vm_area_struct structures */
static struct kmem_cache *vm_area_cachep;

/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;

#ifdef CONFIG_PER_VMA_LOCK

/* SLAB cache for vm_area_struct.lock */
static struct kmem_cache *vma_lock_cachep;

static bool vma_lock_alloc(struct vm_area_struct *vma)
{
        vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
        if (!vma->vm_lock)
                return false;

        init_rwsem(&vma->vm_lock->lock);
        vma->vm_lock_seq = -1;

        return true;
}

static inline void vma_lock_free(struct vm_area_struct *vma)
{
        kmem_cache_free(vma_lock_cachep, vma->vm_lock);
}

#else /* CONFIG_PER_VMA_LOCK */

static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
static inline void vma_lock_free(struct vm_area_struct *vma) {}

#endif /* CONFIG_PER_VMA_LOCK */

struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
        struct vm_area_struct *vma;

        vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
        if (!vma)
                return NULL;

        vma_init(vma, mm);
        if (!vma_lock_alloc(vma)) {
                kmem_cache_free(vm_area_cachep, vma);
                return NULL;
        }

        return vma;
}

struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
        struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);

        if (!new)
                return NULL;

        ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
        ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
        /*
         * orig->shared.rb may be modified concurrently, but the clone
         * will be reinitialized.
         */
        data_race(memcpy(new, orig, sizeof(*new)));
        if (!vma_lock_alloc(new)) {
                kmem_cache_free(vm_area_cachep, new);
                return NULL;
        }
        INIT_LIST_HEAD(&new->anon_vma_chain);
        vma_numab_state_init(new);
        dup_anon_vma_name(orig, new);

        return new;
}

void __vm_area_free(struct vm_area_struct *vma)
{
        vma_numab_state_free(vma);
        free_anon_vma_name(vma);
        vma_lock_free(vma);
        kmem_cache_free(vm_area_cachep, vma);
}

#ifdef CONFIG_PER_VMA_LOCK
static void vm_area_free_rcu_cb(struct rcu_head *head)
{
        struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
                                                  vm_rcu);

        /* The vma should not be locked while being destroyed. */
        VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
        __vm_area_free(vma);
}
#endif

void vm_area_free(struct vm_area_struct *vma)
{
#ifdef CONFIG_PER_VMA_LOCK
        call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
#else
        __vm_area_free(vma);
#endif
}

static void account_kernel_stack(struct task_struct *tsk, int account)
{
        if (IS_ENABLED(CONFIG_VMAP_STACK)) {
                struct vm_struct *vm = task_stack_vm_area(tsk);
                int i;

                for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
                        mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
                                              account * (PAGE_SIZE / 1024));
        } else {
                void *stack = task_stack_page(tsk);

                /* All stack pages are in the same node. */
                mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
                                      account * (THREAD_SIZE / 1024));
        }
}

void exit_task_stack_account(struct task_struct *tsk)
{
        account_kernel_stack(tsk, -1);

        if (IS_ENABLED(CONFIG_VMAP_STACK)) {
                struct vm_struct *vm;
                int i;

                vm = task_stack_vm_area(tsk);
                for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
                        memcg_kmem_uncharge_page(vm->pages[i], 0);
        }
}

static void release_task_stack(struct task_struct *tsk)
{
        if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
                return;  /* Better to leak the stack than to free prematurely */

        free_thread_stack(tsk);
}

#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk)
{
        if (refcount_dec_and_test(&tsk->stack_refcount))
                release_task_stack(tsk);
}
#endif

void free_task(struct task_struct *tsk)
{
#ifdef CONFIG_SECCOMP
        WARN_ON_ONCE(tsk->seccomp.filter);
#endif
        release_user_cpus_ptr(tsk);
        scs_release(tsk);

#ifndef CONFIG_THREAD_INFO_IN_TASK
        /*
         * The task is finally done with both the stack and thread_info,
         * so free both.
         */
        release_task_stack(tsk);
#else
        /*
         * If the task had a separate stack allocation, it should be gone
         * by now.
         */
        WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
#endif
        rt_mutex_debug_task_free(tsk);
        ftrace_graph_exit_task(tsk);
        arch_release_task_struct(tsk);
        if (tsk->flags & PF_KTHREAD)
                free_kthread_struct(tsk);
        bpf_task_storage_free(tsk);
        free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);

static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
{
        struct file *exe_file;

        exe_file = get_mm_exe_file(oldmm);
        RCU_INIT_POINTER(mm->exe_file, exe_file);
        /*
         * We depend on the oldmm having properly denied write access to the
         * exe_file already.
         */
        if (exe_file && deny_write_access(exe_file))
                pr_warn_once("deny_write_access() failed in %s\n", __func__);
}

#ifdef CONFIG_MMU
static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
{
        struct vm_area_struct *mpnt, *tmp;
        int retval;
        unsigned long charge = 0;
        LIST_HEAD(uf);
        VMA_ITERATOR(vmi, mm, 0);

        uprobe_start_dup_mmap();
        if (mmap_write_lock_killable(oldmm)) {
                retval = -EINTR;
                goto fail_uprobe_end;
        }
        flush_cache_dup_mm(oldmm);
        uprobe_dup_mmap(oldmm, mm);
        /*
         * Not linked in yet - no deadlock potential:
         */
        mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);

        /* No ordering required: file already has been exposed. */
        dup_mm_exe_file(mm, oldmm);

        mm->total_vm = oldmm->total_vm;
        mm->data_vm = oldmm->data_vm;
        mm->exec_vm = oldmm->exec_vm;
        mm->stack_vm = oldmm->stack_vm;

        retval = ksm_fork(mm, oldmm);
        if (retval)
                goto out;
        khugepaged_fork(mm, oldmm);

        /* Use __mt_dup() to efficiently build an identical maple tree. */
        retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
        if (unlikely(retval))
                goto out;

        mt_clear_in_rcu(vmi.mas.tree);
        for_each_vma(vmi, mpnt) {
                struct file *file;

                vma_start_write(mpnt);
                if (mpnt->vm_flags & VM_DONTCOPY) {
                        retval = vma_iter_clear_gfp(&vmi, mpnt->vm_start,
                                                    mpnt->vm_end, GFP_KERNEL);
                        if (retval)
                                goto loop_out;

                        vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
                        continue;
                }
                charge = 0;
                /*
                 * Don't duplicate many vmas if we've been oom-killed (for
                 * example)
                 */
                if (fatal_signal_pending(current)) {
                        retval = -EINTR;
                        goto loop_out;
                }
                if (mpnt->vm_flags & VM_ACCOUNT) {
                        unsigned long len = vma_pages(mpnt);

                        if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
                                goto fail_nomem;
                        charge = len;
                }
                tmp = vm_area_dup(mpnt);
                if (!tmp)
                        goto fail_nomem;
                retval = vma_dup_policy(mpnt, tmp);
                if (retval)
                        goto fail_nomem_policy;
                tmp->vm_mm = mm;
                retval = dup_userfaultfd(tmp, &uf);
                if (retval)
                        goto fail_nomem_anon_vma_fork;
                if (tmp->vm_flags & VM_WIPEONFORK) {
                        /*
                         * VM_WIPEONFORK gets a clean slate in the child.
                         * Don't prepare anon_vma until fault since we don't
                         * copy page for current vma.
                         */
                        tmp->anon_vma = NULL;
                } else if (anon_vma_fork(tmp, mpnt))
                        goto fail_nomem_anon_vma_fork;
                vm_flags_clear(tmp, VM_LOCKED_MASK);
                /*
                 * Copy/update hugetlb private vma information.
                 */
                if (is_vm_hugetlb_page(tmp))
                        hugetlb_dup_vma_private(tmp);

                /*
                 * Link the vma into the MT. After using __mt_dup(), memory
                 * allocation is not necessary here, so it cannot fail.
                 */
                vma_iter_bulk_store(&vmi, tmp);

                mm->map_count++;

                if (tmp->vm_ops && tmp->vm_ops->open)
                        tmp->vm_ops->open(tmp);

                file = tmp->vm_file;
                if (file) {
                        struct address_space *mapping = file->f_mapping;

                        get_file(file);
                        i_mmap_lock_write(mapping);
                        if (vma_is_shared_maywrite(tmp))
                                mapping_allow_writable(mapping);
                        flush_dcache_mmap_lock(mapping);
                        /* insert tmp into the share list, just after mpnt */
                        vma_interval_tree_insert_after(tmp, mpnt,
                                        &mapping->i_mmap);
                        flush_dcache_mmap_unlock(mapping);
                        i_mmap_unlock_write(mapping);
                }

                if (!(tmp->vm_flags & VM_WIPEONFORK))
                        retval = copy_page_range(tmp, mpnt);

                if (retval) {
                        mpnt = vma_next(&vmi);
                        goto loop_out;
                }
        }
        /* a new mm has just been created */
        retval = arch_dup_mmap(oldmm, mm);
loop_out:
        vma_iter_free(&vmi);
        if (!retval) {
                mt_set_in_rcu(vmi.mas.tree);
        } else if (mpnt) {
                /*
                 * The entire maple tree has already been duplicated. If the
                 * mmap duplication fails, mark the failure point with
                 * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
                 * stop releasing VMAs that have not been duplicated after this
                 * point.
                 */
                mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
                mas_store(&vmi.mas, XA_ZERO_ENTRY);
        }
out:
        mmap_write_unlock(mm);
        flush_tlb_mm(oldmm);
        mmap_write_unlock(oldmm);
        dup_userfaultfd_complete(&uf);
fail_uprobe_end:
        uprobe_end_dup_mmap();
        return retval;

fail_nomem_anon_vma_fork:
        mpol_put(vma_policy(tmp));
fail_nomem_policy:
        vm_area_free(tmp);
fail_nomem:
        retval = -ENOMEM;
        vm_unacct_memory(charge);
        goto loop_out;
}

static inline int mm_alloc_pgd(struct mm_struct *mm)
{
        mm->pgd = pgd_alloc(mm);
        if (unlikely(!mm->pgd))
                return -ENOMEM;
        return 0;
}

static inline void mm_free_pgd(struct mm_struct *mm)
{
        pgd_free(mm, mm->pgd);
}
#else
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
        mmap_write_lock(oldmm);
        dup_mm_exe_file(mm, oldmm);
        mmap_write_unlock(oldmm);
        return 0;
}
#define mm_alloc_pgd(mm)        (0)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */

static void check_mm(struct mm_struct *mm)
{
        int i;

        BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
                         "Please make sure 'struct resident_page_types[]' is updated as well");

        for (i = 0; i < NR_MM_COUNTERS; i++) {
                long x = percpu_counter_sum(&mm->rss_stat[i]);

                if (unlikely(x))
                        pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
                                 mm, resident_page_types[i], x);
        }

        if (mm_pgtables_bytes(mm))
                pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
                                mm_pgtables_bytes(mm));

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
#endif
}

#define allocate_mm()        (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm)        (kmem_cache_free(mm_cachep, (mm)))

static void do_check_lazy_tlb(void *arg)
{
        struct mm_struct *mm = arg;

        WARN_ON_ONCE(current->active_mm == mm);
}

static void do_shoot_lazy_tlb(void *arg)
{
        struct mm_struct *mm = arg;

        if (current->active_mm == mm) {
                WARN_ON_ONCE(current->mm);
                current->active_mm = &init_mm;
                switch_mm(mm, &init_mm, current);
        }
}

static void cleanup_lazy_tlbs(struct mm_struct *mm)
{
        if (!IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
                /*
                 * In this case, lazy tlb mms are refounted and would not reach
                 * __mmdrop until all CPUs have switched away and mmdrop()ed.
                 */
                return;
        }

        /*
         * Lazy mm shootdown does not refcount "lazy tlb mm" usage, rather it
         * requires lazy mm users to switch to another mm when the refcount
         * drops to zero, before the mm is freed. This requires IPIs here to
         * switch kernel threads to init_mm.
         *
         * archs that use IPIs to flush TLBs can piggy-back that lazy tlb mm
         * switch with the final userspace teardown TLB flush which leaves the
         * mm lazy on this CPU but no others, reducing the need for additional
         * IPIs here. There are cases where a final IPI is still required here,
         * such as the final mmdrop being performed on a different CPU than the
         * one exiting, or kernel threads using the mm when userspace exits.
         *
         * IPI overheads have not found to be expensive, but they could be
         * reduced in a number of possible ways, for example (roughly
         * increasing order of complexity):
         * - The last lazy reference created by exit_mm() could instead switch
         *   to init_mm, however it's probable this will run on the same CPU
         *   immediately afterwards, so this may not reduce IPIs much.
         * - A batch of mms requiring IPIs could be gathered and freed at once.
         * - CPUs store active_mm where it can be remotely checked without a
         *   lock, to filter out false-positives in the cpumask.
         * - After mm_users or mm_count reaches zero, switching away from the
         *   mm could clear mm_cpumask to reduce some IPIs, perhaps together
         *   with some batching or delaying of the final IPIs.
         * - A delayed freeing and RCU-like quiescing sequence based on mm
         *   switching to avoid IPIs completely.
         */
        on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1);
        if (IS_ENABLED(CONFIG_DEBUG_VM_SHOOT_LAZIES))
                on_each_cpu(do_check_lazy_tlb, (void *)mm, 1);
}

/*
 * Called when the last reference to the mm
 * is dropped: either by a lazy thread or by
 * mmput. Free the page directory and the mm.
 */
void __mmdrop(struct mm_struct *mm)
{
        BUG_ON(mm == &init_mm);
        WARN_ON_ONCE(mm == current->mm);

        /* Ensure no CPUs are using this as their lazy tlb mm */
        cleanup_lazy_tlbs(mm);

        WARN_ON_ONCE(mm == current->active_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
        mmu_notifier_subscriptions_destroy(mm);
        check_mm(mm);
        put_user_ns(mm->user_ns);
        mm_pasid_drop(mm);
        mm_destroy_cid(mm);
        percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);

        free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);

static void mmdrop_async_fn(struct work_struct *work)
{
        struct mm_struct *mm;

        mm = container_of(work, struct mm_struct, async_put_work);
        __mmdrop(mm);
}

static void mmdrop_async(struct mm_struct *mm)
{
        if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
                INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
                schedule_work(&mm->async_put_work);
        }
}

static inline void free_signal_struct(struct signal_struct *sig)
{
        taskstats_tgid_free(sig);
        sched_autogroup_exit(sig);
        /*
         * __mmdrop is not safe to call from softirq context on x86 due to
         * pgd_dtor so postpone it to the async context
         */
        if (sig->oom_mm)
                mmdrop_async(sig->oom_mm);
        kmem_cache_free(signal_cachep, sig);
}

static inline void put_signal_struct(struct signal_struct *sig)
{
        if (refcount_dec_and_test(&sig->sigcnt))
                free_signal_struct(sig);
}

void __put_task_struct(struct task_struct *tsk)
{
        WARN_ON(!tsk->exit_state);
        WARN_ON(refcount_read(&tsk->usage));
        WARN_ON(tsk == current);

        io_uring_free(tsk);
        cgroup_free(tsk);
        task_numa_free(tsk, true);
        security_task_free(tsk);
        exit_creds(tsk);
        delayacct_tsk_free(tsk);
        put_signal_struct(tsk->signal);
        sched_core_free(tsk);
        free_task(tsk);
}
EXPORT_SYMBOL_GPL(__put_task_struct);

void __put_task_struct_rcu_cb(struct rcu_head *rhp)
{
        struct task_struct *task = container_of(rhp, struct task_struct, rcu);

        __put_task_struct(task);
}
EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb);

void __init __weak arch_task_cache_init(void) { }

/*
 * set_max_threads
 */
static void set_max_threads(unsigned int max_threads_suggested)
{
        u64 threads;
        unsigned long nr_pages = totalram_pages();

        /*
         * The number of threads shall be limited such that the thread
         * structures may only consume a small part of the available memory.
         */
        if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64)
                threads = MAX_THREADS;
        else
                threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE,
                                    (u64) THREAD_SIZE * 8UL);

        if (threads > max_threads_suggested)
                threads = max_threads_suggested;

        max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
}

#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
/* Initialized by the architecture: */
int arch_task_struct_size __read_mostly;
#endif

static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
{
        /* Fetch thread_struct whitelist for the architecture. */
        arch_thread_struct_whitelist(offset, size);

        /*
         * Handle zero-sized whitelist or empty thread_struct, otherwise
         * adjust offset to position of thread_struct in task_struct.
         */
        if (unlikely(*size == 0))
                *offset = 0;
        else
                *offset += offsetof(struct task_struct, thread);
}

void __init fork_init(void)
{
        int i;
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN        0
#endif
        int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
        unsigned long useroffset, usersize;

        /* create a slab on which task_structs can be allocated */
        task_struct_whitelist(&useroffset, &usersize);
        task_struct_cachep = kmem_cache_create_usercopy("task_struct",
                        arch_task_struct_size, align,
                        SLAB_PANIC|SLAB_ACCOUNT,
                        useroffset, usersize, NULL);

        /* do the arch specific task caches init */
        arch_task_cache_init();

        set_max_threads(MAX_THREADS);

        init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
        init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
        init_task.signal->rlim[RLIMIT_SIGPENDING] =
                init_task.signal->rlim[RLIMIT_NPROC];

        for (i = 0; i < UCOUNT_COUNTS; i++)
                init_user_ns.ucount_max[i] = max_threads/2;

        set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_NPROC,      RLIM_INFINITY);
        set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE,   RLIM_INFINITY);
        set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, RLIM_INFINITY);
        set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK,    RLIM_INFINITY);

#ifdef CONFIG_VMAP_STACK
        cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
                          NULL, free_vm_stack_cache);
#endif

        scs_init();

        lockdep_init_task(&init_task);
        uprobes_init();
}

int __weak arch_dup_task_struct(struct task_struct *dst,
                                               struct task_struct *src)
{
        *dst = *src;
        return 0;
}

void set_task_stack_end_magic(struct task_struct *tsk)
{
        unsigned long *stackend;

        stackend = end_of_stack(tsk);
        *stackend = STACK_END_MAGIC;        /* for overflow detection */
}

static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
        struct task_struct *tsk;
        int err;

        if (node == NUMA_NO_NODE)
                node = tsk_fork_get_node(orig);
        tsk = alloc_task_struct_node(node);
        if (!tsk)
                return NULL;

        err = arch_dup_task_struct(tsk, orig);
        if (err)
                goto free_tsk;

        err = alloc_thread_stack_node(tsk, node);
        if (err)
                goto free_tsk;

#ifdef CONFIG_THREAD_INFO_IN_TASK
        refcount_set(&tsk->stack_refcount, 1);
#endif
        account_kernel_stack(tsk, 1);

        err = scs_prepare(tsk, node);
        if (err)
                goto free_stack;

#ifdef CONFIG_SECCOMP
        /*
         * We must handle setting up seccomp filters once we're under
         * the sighand lock in case orig has changed between now and
         * then. Until then, filter must be NULL to avoid messing up
         * the usage counts on the error path calling free_task.
         */
        tsk->seccomp.filter = NULL;
#endif

        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
        clear_tsk_need_resched(tsk);
        set_task_stack_end_magic(tsk);
        clear_syscall_work_syscall_user_dispatch(tsk);

#ifdef CONFIG_STACKPROTECTOR
        tsk->stack_canary = get_random_canary();
#endif
        if (orig->cpus_ptr == &orig->cpus_mask)
                tsk->cpus_ptr = &tsk->cpus_mask;
        dup_user_cpus_ptr(tsk, orig, node);

        /*
         * One for the user space visible state that goes away when reaped.
         * One for the scheduler.
         */
        refcount_set(&tsk->rcu_users, 2);
        /* One for the rcu users */
        refcount_set(&tsk->usage, 1);
#ifdef CONFIG_BLK_DEV_IO_TRACE
        tsk->btrace_seq = 0;
#endif
        tsk->splice_pipe = NULL;
        tsk->task_frag.page = NULL;
        tsk->wake_q.next = NULL;
        tsk->worker_private = NULL;

        kcov_task_init(tsk);
        kmsan_task_create(tsk);
        kmap_local_fork(tsk);

#ifdef CONFIG_FAULT_INJECTION
        tsk->fail_nth = 0;
#endif

#ifdef CONFIG_BLK_CGROUP
        tsk->throttle_disk = NULL;
        tsk->use_memdelay = 0;
#endif

#ifdef CONFIG_ARCH_HAS_CPU_PASID
        tsk->pasid_activated = 0;
#endif

#ifdef CONFIG_MEMCG
        tsk->active_memcg = NULL;
#endif

#ifdef CONFIG_CPU_SUP_INTEL
        tsk->reported_split_lock = 0;
#endif

#ifdef CONFIG_SCHED_MM_CID
        tsk->mm_cid = -1;
        tsk->last_mm_cid = -1;
        tsk->mm_cid_active = 0;
        tsk->migrate_from_cpu = -1;
#endif
        return tsk;

free_stack:
        exit_task_stack_account(tsk);
        free_thread_stack(tsk);
free_tsk:
        free_task_struct(tsk);
        return NULL;
}

__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);

static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;

static int __init coredump_filter_setup(char *s)
{
        default_dump_filter =
                (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
                MMF_DUMP_FILTER_MASK;
        return 1;
}

__setup("coredump_filter=", coredump_filter_setup);

#include <linux/init_task.h>

static void mm_init_aio(struct mm_struct *mm)
{
#ifdef CONFIG_AIO
        spin_lock_init(&mm->ioctx_lock);
        mm->ioctx_table = NULL;
#endif
}

static __always_inline void mm_clear_owner(struct mm_struct *mm,
                                           struct task_struct *p)
{
#ifdef CONFIG_MEMCG
        if (mm->owner == p)
                WRITE_ONCE(mm->owner, NULL);
#endif
}

static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
#ifdef CONFIG_MEMCG
        mm->owner = p;
#endif
}

static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
        mm->uprobes_state.xol_area = NULL;
#endif
}

static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        struct user_namespace *user_ns)
{
        mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
        mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
        seqcount_init(&mm->write_protect_seq);
        mmap_init_lock(mm);
        INIT_LIST_HEAD(&mm->mmlist);
#ifdef CONFIG_PER_VMA_LOCK
        mm->mm_lock_seq = 0;
#endif
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
        mm->locked_vm = 0;
        atomic64_set(&mm->pinned_vm, 0);
        memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
        spin_lock_init(&mm->page_table_lock);
        spin_lock_init(&mm->arg_lock);
        mm_init_cpumask(mm);
        mm_init_aio(mm);
        mm_init_owner(mm, p);
        mm_pasid_init(mm);
        RCU_INIT_POINTER(mm->exe_file, NULL);
        mmu_notifier_subscriptions_init(mm);
        init_tlb_flush_pending(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        mm->pmd_huge_pte = NULL;
#endif
        mm_init_uprobes_state(mm);
        hugetlb_count_init(mm);

        if (current->mm) {
                mm->flags = mmf_init_flags(current->mm->flags);
                mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
        } else {
                mm->flags = default_dump_filter;
                mm->def_flags = 0;
        }

        if (mm_alloc_pgd(mm))
                goto fail_nopgd;

        if (init_new_context(p, mm))
                goto fail_nocontext;

        if (mm_alloc_cid(mm))
                goto fail_cid;

        if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
                                     NR_MM_COUNTERS))
                goto fail_pcpu;

        mm->user_ns = get_user_ns(user_ns);
        lru_gen_init_mm(mm);
        return mm;

fail_pcpu:
        mm_destroy_cid(mm);
fail_cid:
        destroy_context(mm);
fail_nocontext:
        mm_free_pgd(mm);
fail_nopgd:
        free_mm(mm);
        return NULL;
}

/*
 * Allocate and initialize an mm_struct.
 */
struct mm_struct *mm_alloc(void)
{
        struct mm_struct *mm;

        mm = allocate_mm();
        if (!mm)
                return NULL;

        memset(mm, 0, sizeof(*mm));
        return mm_init(mm, current, current_user_ns());
}

static inline void __mmput(struct mm_struct *mm)
{
        VM_BUG_ON(atomic_read(&mm->mm_users));

        uprobe_clear_state(mm);
        exit_aio(mm);
        ksm_exit(mm);
        khugepaged_exit(mm); /* must run before exit_mmap */
        exit_mmap(mm);
        mm_put_huge_zero_page(mm);
        set_mm_exe_file(mm, NULL);
        if (!list_empty(&mm->mmlist)) {
                spin_lock(&mmlist_lock);
                list_del(&mm->mmlist);
                spin_unlock(&mmlist_lock);
        }
        if (mm->binfmt)
                module_put(mm->binfmt->module);
        lru_gen_del_mm(mm);
        mmdrop(mm);
}

/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
        might_sleep();

        if (atomic_dec_and_test(&mm->mm_users))
                __mmput(mm);
}
EXPORT_SYMBOL_GPL(mmput);

#ifdef CONFIG_MMU
static void mmput_async_fn(struct work_struct *work)
{
        struct mm_struct *mm = container_of(work, struct mm_struct,
                                            async_put_work);

        __mmput(mm);
}

void mmput_async(struct mm_struct *mm)
{
        if (atomic_dec_and_test(&mm->mm_users)) {
                INIT_WORK(&mm->async_put_work, mmput_async_fn);
                schedule_work(&mm->async_put_work);
        }
}
EXPORT_SYMBOL_GPL(mmput_async);
#endif

/**
 * set_mm_exe_file - change a reference to the mm's executable file
 * @mm: The mm to change.
 * @new_exe_file: The new file to use.
 *
 * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
 *
 * Main users are mmput() and sys_execve(). Callers prevent concurrent
 * invocations: in mmput() nobody alive left, in execve it happens before
 * the new mm is made visible to anyone.
 *
 * Can only fail if new_exe_file != NULL.
 */
int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
        struct file *old_exe_file;

        /*
         * It is safe to dereference the exe_file without RCU as
         * this function is only called if nobody else can access
         * this mm -- see comment above for justification.
         */
        old_exe_file = rcu_dereference_raw(mm->exe_file);

        if (new_exe_file) {
                /*
                 * We expect the caller (i.e., sys_execve) to already denied
                 * write access, so this is unlikely to fail.
                 */
                if (unlikely(deny_write_access(new_exe_file)))
                        return -EACCES;
                get_file(new_exe_file);
        }
        rcu_assign_pointer(mm->exe_file, new_exe_file);
        if (old_exe_file) {
                allow_write_access(old_exe_file);
                fput(old_exe_file);
        }
        return 0;
}

/**
 * replace_mm_exe_file - replace a reference to the mm's executable file
 * @mm: The mm to change.
 * @new_exe_file: The new file to use.
 *
 * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
 *
 * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
 */
int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
        struct vm_area_struct *vma;
        struct file *old_exe_file;
        int ret = 0;

        /* Forbid mm->exe_file change if old file still mapped. */
        old_exe_file = get_mm_exe_file(mm);
        if (old_exe_file) {
                VMA_ITERATOR(vmi, mm, 0);
                mmap_read_lock(mm);
                for_each_vma(vmi, vma) {
                        if (!vma->vm_file)
                                continue;
                        if (path_equal(&vma->vm_file->f_path,
                                       &old_exe_file->f_path)) {
                                ret = -EBUSY;
                                break;
                        }
                }
                mmap_read_unlock(mm);
                fput(old_exe_file);
                if (ret)
                        return ret;
        }

        ret = deny_write_access(new_exe_file);
        if (ret)
                return -EACCES;
        get_file(new_exe_file);

        /* set the new file */
        mmap_write_lock(mm);
        old_exe_file = rcu_dereference_raw(mm->exe_file);
        rcu_assign_pointer(mm->exe_file, new_exe_file);
        mmap_write_unlock(mm);

        if (old_exe_file) {
                allow_write_access(old_exe_file);
                fput(old_exe_file);
        }
        return 0;
}

/**
 * get_mm_exe_file - acquire a reference to the mm's executable file
 * @mm: The mm of interest.
 *
 * Returns %NULL if mm has no associated executable file.
 * User must release file via fput().
 */
struct file *get_mm_exe_file(struct mm_struct *mm)
{
        struct file *exe_file;

        rcu_read_lock();
        exe_file = get_file_rcu(&mm->exe_file);
        rcu_read_unlock();
        return exe_file;
}

/**
 * get_task_exe_file - acquire a reference to the task's executable file
 * @task: The task.
 *
 * Returns %NULL if task's mm (if any) has no associated executable file or
 * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
 * User must release file via fput().
 */
struct file *get_task_exe_file(struct task_struct *task)
{
        struct file *exe_file = NULL;
        struct mm_struct *mm;

        task_lock(task);
        mm = task->mm;
        if (mm) {
                if (!(task->flags & PF_KTHREAD))
                        exe_file = get_mm_exe_file(mm);
        }
        task_unlock(task);
        return exe_file;
}

/**
 * get_task_mm - acquire a reference to the task's mm
 * @task: The task.
 *
 * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
 * this kernel workthread has transiently adopted a user mm with use_mm,
 * to do its AIO) is not set and if so returns a reference to it, after
 * bumping up the use count.  User must release the mm via mmput()
 * after use.  Typically used by /proc and ptrace.
 */
struct mm_struct *get_task_mm(struct task_struct *task)
{
        struct mm_struct *mm;

        task_lock(task);
        mm = task->mm;
        if (mm) {
                if (task->flags & PF_KTHREAD)
                        mm = NULL;
                else
                        mmget(mm);
        }
        task_unlock(task);
        return mm;
}
EXPORT_SYMBOL_GPL(get_task_mm);

struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
{
        struct mm_struct *mm;
        int err;

        err =  down_read_killable(&task->signal->exec_update_lock);
        if (err)
                return ERR_PTR(err);

        mm = get_task_mm(task);
        if (mm && mm != current->mm &&
                        !ptrace_may_access(task, mode)) {
                mmput(mm);
                mm = ERR_PTR(-EACCES);
        }
        up_read(&task->signal->exec_update_lock);

        return mm;
}

static void complete_vfork_done(struct task_struct *tsk)
{
        struct completion *vfork;

        task_lock(tsk);
        vfork = tsk->vfork_done;
        if (likely(vfork)) {
                tsk->vfork_done = NULL;
                complete(vfork);
        }
        task_unlock(tsk);
}

static int wait_for_vfork_done(struct task_struct *child,
                                struct completion *vfork)
{
        unsigned int state = TASK_KILLABLE|TASK_FREEZABLE;
        int killed;

        cgroup_enter_frozen();
        killed = wait_for_completion_state(vfork, state);
        cgroup_leave_frozen(false);

        if (killed) {
                task_lock(child);
                child->vfork_done = NULL;
                task_unlock(child);
        }

        put_task_struct(child);
        return killed;
}

/* Please note the differences between mmput and mm_release.
 * mmput is called whenever we stop holding onto a mm_struct,
 * error success whatever.
 *
 * mm_release is called after a mm_struct has been removed
 * from the current process.
 *
 * This difference is important for error handling, when we
 * only half set up a mm_struct for a new process and need to restore
 * the old one.  Because we mmput the new mm_struct before
 * restoring the old one. . .
 * Eric Biederman 10 January 1998
 */
static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
        uprobe_free_utask(tsk);

        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);

        /*
         * Signal userspace if we're not exiting with a core dump
         * because we want to leave the value intact for debugging
         * purposes.
         */
        if (tsk->clear_child_tid) {
                if (atomic_read(&mm->mm_users) > 1) {
                        /*
                         * We don't check the error code - if userspace has
                         * not set up a proper pointer then tough luck.
                         */
                        put_user(0, tsk->clear_child_tid);
                        do_futex(tsk->clear_child_tid, FUTEX_WAKE,
                                        1, NULL, NULL, 0, 0);
                }
                tsk->clear_child_tid = NULL;
        }

        /*
         * All done, finally we can wake up parent and return this mm to him.
         * Also kthread_stop() uses this completion for synchronization.
         */
        if (tsk->vfork_done)
                complete_vfork_done(tsk);
}

void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
        futex_exit_release(tsk);
        mm_release(tsk, mm);
}

void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
        futex_exec_release(tsk);
        mm_release(tsk, mm);
}

/**
 * dup_mm() - duplicates an existing mm structure
 * @tsk: the task_struct with which the new mm will be associated.
 * @oldmm: the mm to duplicate.
 *
 * Allocates a new mm structure and duplicates the provided @oldmm structure
 * content into it.
 *
 * Return: the duplicated mm or NULL on failure.
 */
static struct mm_struct *dup_mm(struct task_struct *tsk,
                                struct mm_struct *oldmm)
{
        struct mm_struct *mm;
        int err;

        mm = allocate_mm();
        if (!mm)
                goto fail_nomem;

        memcpy(mm, oldmm, sizeof(*mm));

        if (!mm_init(mm, tsk, mm->user_ns))
                goto fail_nomem;

        err = dup_mmap(mm, oldmm);
        if (err)
                goto free_pt;

        mm->hiwater_rss = get_mm_rss(mm);
        mm->hiwater_vm = mm->total_vm;

        if (mm->binfmt && !try_module_get(mm->binfmt->module))
                goto free_pt;

        return mm;

free_pt:
        /* don't put binfmt in mmput, we haven't got module yet */
        mm->binfmt = NULL;
        mm_init_owner(mm, NULL);
        mmput(mm);

fail_nomem:
        return NULL;
}

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
        struct mm_struct *mm, *oldmm;

        tsk->min_flt = tsk->maj_flt = 0;
        tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
        tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
        tsk->last_switch_time = 0;
#endif

        tsk->mm = NULL;
        tsk->active_mm = NULL;

        /*
         * Are we cloning a kernel thread?
         *
         * We need to steal a active VM for that..
         */
        oldmm = current->mm;
        if (!oldmm)
                return 0;

        if (clone_flags & CLONE_VM) {
                mmget(oldmm);
                mm = oldmm;
        } else {
                mm = dup_mm(tsk, current->mm);
                if (!mm)
                        return -ENOMEM;
        }

        tsk->mm = mm;
        tsk->active_mm = mm;
        sched_mm_cid_fork(tsk);
        return 0;
}

static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
{
        struct fs_struct *fs = current->fs;
        if (clone_flags & CLONE_FS) {
                /* tsk->fs is already what we want */
                spin_lock(&fs->lock);
                /* "users" and "in_exec" locked for check_unsafe_exec() */
                if (fs->in_exec) {
                        spin_unlock(&fs->lock);
                        return -EAGAIN;
                }
                fs->users++;
                spin_unlock(&fs->lock);
                return 0;
        }
        tsk->fs = copy_fs_struct(fs);
        if (!tsk->fs)
                return -ENOMEM;
        return 0;
}

static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
                      int no_files)
{
        struct files_struct *oldf, *newf;
        int error = 0;

        /*
         * A background process may not have any files ...
         */
        oldf = current->files;
        if (!oldf)
                goto out;

        if (no_files) {
                tsk->files = NULL;
                goto out;
        }

        if (clone_flags & CLONE_FILES) {
                atomic_inc(&oldf->count);
                goto out;
        }

        newf = dup_fd(oldf, NR_OPEN_MAX, &error);
        if (!newf)
                goto out;

        tsk->files = newf;
        error = 0;
out:
        return error;
}

static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
{
        struct sighand_struct *sig;

        if (clone_flags & CLONE_SIGHAND) {
                refcount_inc(&current->sighand->count);
                return 0;
        }
        sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
        RCU_INIT_POINTER(tsk->sighand, sig);
        if (!sig)
                return -ENOMEM;

        refcount_set(&sig->count, 1);
        spin_lock_irq(&current->sighand->siglock);
        memcpy(sig->action, current->sighand->action, sizeof(sig->action));
        spin_unlock_irq(&current->sighand->siglock);

        /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */
        if (clone_flags & CLONE_CLEAR_SIGHAND)
                flush_signal_handlers(tsk, 0);

        return 0;
}

void __cleanup_sighand(struct sighand_struct *sighand)
{
        if (refcount_dec_and_test(&sighand->count)) {
                signalfd_cleanup(sighand);
                /*
                 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
                 * without an RCU grace period, see __lock_task_sighand().
                 */
                kmem_cache_free(sighand_cachep, sighand);
        }
}

/*
 * Initialize POSIX timer handling for a thread group.
 */
static void posix_cpu_timers_init_group(struct signal_struct *sig)
{
        struct posix_cputimers *pct = &sig->posix_cputimers;
        unsigned long cpu_limit;

        cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
        posix_cputimers_group_init(pct, cpu_limit);
}

static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
        struct signal_struct *sig;

        if (clone_flags & CLONE_THREAD)
                return 0;

        sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
        tsk->signal = sig;
        if (!sig)
                return -ENOMEM;

        sig->nr_threads = 1;
        sig->quick_threads = 1;
        atomic_set(&sig->live, 1);
        refcount_set(&sig->sigcnt, 1);

        /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
        sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
        tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);

        init_waitqueue_head(&sig->wait_chldexit);
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
        INIT_HLIST_HEAD(&sig->multiprocess);
        seqlock_init(&sig->stats_lock);
        prev_cputime_init(&sig->prev_cputime);

#ifdef CONFIG_POSIX_TIMERS
        INIT_LIST_HEAD(&sig->posix_timers);
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        sig->real_timer.function = it_real_fn;
#endif

        task_lock(current->group_leader);
        memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
        task_unlock(current->group_leader);

        posix_cpu_timers_init_group(sig);

        tty_audit_fork(sig);
        sched_autogroup_fork(sig);

        sig->oom_score_adj = current->signal->oom_score_adj;
        sig->oom_score_adj_min = current->signal->oom_score_adj_min;

        mutex_init(&sig->cred_guard_mutex);
        init_rwsem(&sig->exec_update_lock);

        return 0;
}

static void copy_seccomp(struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
        /*
         * Must be called with sighand->lock held, which is common to
         * all threads in the group. Holding cred_guard_mutex is not
         * needed because this new task is not yet running and cannot
         * be racing exec.
         */
        assert_spin_locked(&current->sighand->siglock);

        /* Ref-count the new filter user, and assign it. */
        get_seccomp_filter(current);
        p->seccomp = current->seccomp;

        /*
         * Explicitly enable no_new_privs here in case it got set
         * between the task_struct being duplicated and holding the
         * sighand lock. The seccomp state and nnp must be in sync.
         */
        if (task_no_new_privs(current))
                task_set_no_new_privs(p);

        /*
         * If the parent gained a seccomp mode after copying thread
         * flags and between before we held the sighand lock, we have
         * to manually enable the seccomp thread flag here.
         */
        if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
                set_task_syscall_work(p, SECCOMP);
#endif
}

SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{
        current->clear_child_tid = tidptr;

        return task_pid_vnr(current);
}

static void rt_mutex_init_task(struct task_struct *p)
{
        raw_spin_lock_init(&p->pi_lock);
#ifdef CONFIG_RT_MUTEXES
        p->pi_waiters = RB_ROOT_CACHED;
        p->pi_top_task = NULL;
        p->pi_blocked_on = NULL;
#endif
}

static inline void init_task_pid_links(struct task_struct *task)
{
        enum pid_type type;

        for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type)
                INIT_HLIST_NODE(&task->pid_links[type]);
}

static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
{
        if (type == PIDTYPE_PID)
                task->thread_pid = pid;
        else
                task->signal->pids[type] = pid;
}

static inline void rcu_copy_process(struct task_struct *p)
{
#ifdef CONFIG_PREEMPT_RCU
        p->rcu_read_lock_nesting = 0;
        p->rcu_read_unlock_special.s = 0;
        p->rcu_blocked_node = NULL;
        INIT_LIST_HEAD(&p->rcu_node_entry);
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TASKS_RCU
        p->rcu_tasks_holdout = false;
        INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
        p->rcu_tasks_idle_cpu = -1;
        INIT_LIST_HEAD(&p->rcu_tasks_exit_list);
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
        p->trc_reader_nesting = 0;
        p->trc_reader_special.s = 0;
        INIT_LIST_HEAD(&p->trc_holdout_list);
        INIT_LIST_HEAD(&p->trc_blkd_node);
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}

/**
 * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
 * @pid:   the struct pid for which to create a pidfd
 * @flags: flags of the new @pidfd
 * @ret: Where to return the file for the pidfd.
 *
 * Allocate a new file that stashes @pid and reserve a new pidfd number in the
 * caller's file descriptor table. The pidfd is reserved but not installed yet.
 *
 * The helper doesn't perform checks on @pid which makes it useful for pidfds
 * created via CLONE_PIDFD where @pid has no task attached when the pidfd and
 * pidfd file are prepared.
 *
 * If this function returns successfully the caller is responsible to either
 * call fd_install() passing the returned pidfd and pidfd file as arguments in
 * order to install the pidfd into its file descriptor table or they must use
 * put_unused_fd() and fput() on the returned pidfd and pidfd file
 * respectively.
 *
 * This function is useful when a pidfd must already be reserved but there
 * might still be points of failure afterwards and the caller wants to ensure
 * that no pidfd is leaked into its file descriptor table.
 *
 * Return: On success, a reserved pidfd is returned from the function and a new
 *         pidfd file is returned in the last argument to the function. On
 *         error, a negative error code is returned from the function and the
 *         last argument remains unchanged.
 */
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
        int pidfd;
        struct file *pidfd_file;

        pidfd = get_unused_fd_flags(O_CLOEXEC);
        if (pidfd < 0)
                return pidfd;

        pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
        if (IS_ERR(pidfd_file)) {
                put_unused_fd(pidfd);
                return PTR_ERR(pidfd_file);
        }
        /*
         * anon_inode_getfile() ignores everything outside of the
         * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually.
         */
        pidfd_file->f_flags |= (flags & PIDFD_THREAD);
        *ret = pidfd_file;
        return pidfd;
}

/**
 * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
 * @pid:   the struct pid for which to create a pidfd
 * @flags: flags of the new @pidfd
 * @ret: Where to return the pidfd.
 *
 * Allocate a new file that stashes @pid and reserve a new pidfd number in the
 * caller's file descriptor table. The pidfd is reserved but not installed yet.
 *
 * The helper verifies that @pid is still in use, without PIDFD_THREAD the
 * task identified by @pid must be a thread-group leader.
 *
 * If this function returns successfully the caller is responsible to either
 * call fd_install() passing the returned pidfd and pidfd file as arguments in
 * order to install the pidfd into its file descriptor table or they must use
 * put_unused_fd() and fput() on the returned pidfd and pidfd file
 * respectively.
 *
 * This function is useful when a pidfd must already be reserved but there
 * might still be points of failure afterwards and the caller wants to ensure
 * that no pidfd is leaked into its file descriptor table.
 *
 * Return: On success, a reserved pidfd is returned from the function and a new
 *         pidfd file is returned in the last argument to the function. On
 *         error, a negative error code is returned from the function and the
 *         last argument remains unchanged.
 */
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
        bool thread = flags & PIDFD_THREAD;

        if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
                return -EINVAL;

        return __pidfd_prepare(pid, flags, ret);
}

static void __delayed_free_task(struct rcu_head *rhp)
{
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);

        free_task(tsk);
}

static __always_inline void delayed_free_task(struct task_struct *tsk)
{
        if (IS_ENABLED(CONFIG_MEMCG))
                call_rcu(&tsk->rcu, __delayed_free_task);
        else
                free_task(tsk);
}

static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
{
        /* Skip if kernel thread */
        if (!tsk->mm)
                return;

        /* Skip if spawning a thread or using vfork */
        if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
                return;

        /* We need to synchronize with __set_oom_adj */
        mutex_lock(&oom_adj_mutex);
        set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
        /* Update the values in case they were changed after copy_signal */
        tsk->signal->oom_score_adj = current->signal->oom_score_adj;
        tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
        mutex_unlock(&oom_adj_mutex);
}

#ifdef CONFIG_RV
static void rv_task_fork(struct task_struct *p)
{
        int i;

        for (i = 0; i < RV_PER_TASK_MONITORS; i++)
                p->rv[i].da_mon.monitoring = false;
}
#else
#define rv_task_fork(p) do {} while (0)
#endif

/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
__latent_entropy struct task_struct *copy_process(
                                        struct pid *pid,
                                        int trace,
                                        int node,
                                        struct kernel_clone_args *args)
{
        int pidfd = -1, retval;
        struct task_struct *p;
        struct multiprocess_signals delayed;
        struct file *pidfile = NULL;
        const u64 clone_flags = args->flags;
        struct nsproxy *nsp = current->nsproxy;

        /*
         * Don't allow sharing the root directory with processes in a different
         * namespace
         */
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);

        if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
                return ERR_PTR(-EINVAL);

        /*
         * Thread groups must share signals as well, and detached threads
         * can only be started up within the thread group.
         */
        if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
                return ERR_PTR(-EINVAL);

        /*
         * Shared signal handlers imply shared VM. By way of the above,
         * thread groups also imply shared VM. Blocking this case allows
         * for various simplifications in other code.
         */
        if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
                return ERR_PTR(-EINVAL);

        /*
         * Siblings of global init remain as zombies on exit since they are
         * not reaped by their parent (swapper). To solve this and to avoid
         * multi-rooted process trees, prevent global and container-inits
         * from creating siblings.
         */
        if ((clone_flags & CLONE_PARENT) &&
                                current->signal->flags & SIGNAL_UNKILLABLE)
                return ERR_PTR(-EINVAL);

        /*
         * If the new process will be in a different pid or user namespace
         * do not allow it to share a thread group with the forking task.
         */
        if (clone_flags & CLONE_THREAD) {
                if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
                    (task_active_pid_ns(current) != nsp->pid_ns_for_children))
                        return ERR_PTR(-EINVAL);
        }

        if (clone_flags & CLONE_PIDFD) {
                /*
                 * - CLONE_DETACHED is blocked so that we can potentially
                 *   reuse it later for CLONE_PIDFD.
                 */
                if (clone_flags & CLONE_DETACHED)
                        return ERR_PTR(-EINVAL);
        }

        /*
         * Force any signals received before this point to be delivered
         * before the fork happens.  Collect up signals sent to multiple
         * processes that happen during the fork and delay them so that
         * they appear to happen after the fork.
         */
        sigemptyset(&delayed.signal);
        INIT_HLIST_NODE(&delayed.node);

        spin_lock_irq(&current->sighand->siglock);
        if (!(clone_flags & CLONE_THREAD))
                hlist_add_head(&delayed.node, &current->signal->multiprocess);
        recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
        retval = -ERESTARTNOINTR;
        if (task_sigpending(current))
                goto fork_out;

        retval = -ENOMEM;
        p = dup_task_struct(current, node);
        if (!p)
                goto fork_out;
        p->flags &= ~PF_KTHREAD;
        if (args->kthread)
                p->flags |= PF_KTHREAD;
        if (args->user_worker) {
                /*
                 * Mark us a user worker, and block any signal that isn't
                 * fatal or STOP
                 */
                p->flags |= PF_USER_WORKER;
                siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
        }
        if (args->io_thread)
                p->flags |= PF_IO_WORKER;

        if (args->name)
                strscpy_pad(p->comm, args->name, sizeof(p->comm));

        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
        /*
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;

        ftrace_graph_init_task(p);

        rt_mutex_init_task(p);

        lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
        retval = copy_creds(p, clone_flags);
        if (retval < 0)
                goto bad_fork_free;

        retval = -EAGAIN;
        if (is_rlimit_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
                if (p->real_cred->user != INIT_USER &&
                    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
                        goto bad_fork_cleanup_count;
        }
        current->flags &= ~PF_NPROC_EXCEEDED;

        /*
         * If multiple threads are within copy_process(), then this check
         * triggers too late. This doesn't hurt, the check is only there
         * to stop root fork bombs.
         */
        retval = -EAGAIN;
        if (data_race(nr_threads >= max_threads))
                goto bad_fork_cleanup_count;

        delayacct_tsk_init(p);        /* Must remain after dup_task_struct() */
        p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY);
        p->flags |= PF_FORKNOEXEC;
        INIT_LIST_HEAD(&p->children);
        INIT_LIST_HEAD(&p->sibling);
        rcu_copy_process(p);
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);

        init_sigpending(&p->pending);

        p->utime = p->stime = p->gtime = 0;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
        p->utimescaled = p->stimescaled = 0;
#endif
        prev_cputime_init(&p->prev_cputime);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
        seqcount_init(&p->vtime.seqcount);
        p->vtime.starttime = 0;
        p->vtime.state = VTIME_INACTIVE;
#endif

#ifdef CONFIG_IO_URING
        p->io_uring = NULL;
#endif

        p->default_timer_slack_ns = current->timer_slack_ns;

#ifdef CONFIG_PSI
        p->psi_flags = 0;
#endif

        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);

        posix_cputimers_init(&p->posix_cputimers);

        p->io_context = NULL;
        audit_set_context(p, NULL);
        cgroup_fork(p);
        if (args->kthread) {
                if (!set_kthread_struct(p))
                        goto bad_fork_cleanup_delayacct;
        }
#ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
                goto bad_fork_cleanup_delayacct;
        }
#endif
#ifdef CONFIG_CPUSETS
        p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
        p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
        seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
        memset(&p->irqtrace, 0, sizeof(p->irqtrace));
        p->irqtrace.hardirq_disable_ip        = _THIS_IP_;
        p->irqtrace.softirq_enable_ip        = _THIS_IP_;
        p->softirqs_enabled                = 1;
        p->softirq_context                = 0;
#endif

        p->pagefault_disabled = 0;

#ifdef CONFIG_LOCKDEP
        lockdep_init_task(p);
#endif

#ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
#endif
#ifdef CONFIG_BCACHE
        p->sequential_io        = 0;
        p->sequential_io_avg        = 0;
#endif
#ifdef CONFIG_BPF_SYSCALL
        RCU_INIT_POINTER(p->bpf_storage, NULL);
        p->bpf_ctx = NULL;
#endif

        /* Perform scheduler related setup. Assign this task to a CPU. */
        retval = sched_fork(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_policy;

        retval = perf_event_init_task(p, clone_flags);
        if (retval)
                goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
                goto bad_fork_cleanup_perf;
        /* copy all the process information */
        shm_init_task(p);
        retval = security_task_alloc(p, clone_flags);
        if (retval)
                goto bad_fork_cleanup_audit;
        retval = copy_semundo(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_security;
        retval = copy_files(clone_flags, p, args->no_files);
        if (retval)
                goto bad_fork_cleanup_semundo;
        retval = copy_fs(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_files;
        retval = copy_sighand(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_fs;
        retval = copy_signal(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_sighand;
        retval = copy_mm(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_signal;
        retval = copy_namespaces(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_mm;
        retval = copy_io(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_namespaces;
        retval = copy_thread(p, args);
        if (retval)
                goto bad_fork_cleanup_io;

        stackleak_task_init(p);

        if (pid != &init_struct_pid) {
                pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
                                args->set_tid_size);
                if (IS_ERR(pid)) {
                        retval = PTR_ERR(pid);
                        goto bad_fork_cleanup_thread;
                }
        }

        /*
         * This has to happen after we've potentially unshared the file
         * descriptor table (so that the pidfd doesn't leak into the child
         * if the fd table isn't shared).
         */
        if (clone_flags & CLONE_PIDFD) {
                int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;

                /* Note that no task has been attached to @pid yet. */
                retval = __pidfd_prepare(pid, flags, &pidfile);
                if (retval < 0)
                        goto bad_fork_free_pid;
                pidfd = retval;

                retval = put_user(pidfd, args->pidfd);
                if (retval)
                        goto bad_fork_put_pidfd;
        }

#ifdef CONFIG_BLOCK
        p->plug = NULL;
#endif
        futex_init_task(p);

        /*
         * sigaltstack should be cleared when sharing the same VM
         */
        if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
                sas_ss_reset(p);

        /*
         * Syscall tracing and stepping should be turned off in the
         * child regardless of CLONE_PTRACE.
         */
        user_disable_single_step(p);
        clear_task_syscall_work(p, SYSCALL_TRACE);
#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU)
        clear_task_syscall_work(p, SYSCALL_EMU);
#endif
        clear_tsk_latency_tracing(p);

        /* ok, now we should be set up.. */
        p->pid = pid_nr(pid);
        if (clone_flags & CLONE_THREAD) {
                p->group_leader = current->group_leader;
                p->tgid = current->tgid;
        } else {
                p->group_leader = p;
                p->tgid = p->pid;
        }

        p->nr_dirtied = 0;
        p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
        p->dirty_paused_when = 0;

        p->pdeath_signal = 0;
        p->task_works = NULL;
        clear_posix_cputimers_work(p);

#ifdef CONFIG_KRETPROBES
        p->kretprobe_instances.first = NULL;
#endif
#ifdef CONFIG_RETHOOK
        p->rethooks.first = NULL;
#endif

        /*
         * Ensure that the cgroup subsystem policies allow the new process to be
         * forked. It should be noted that the new process's css_set can be changed
         * between here and cgroup_post_fork() if an organisation operation is in
         * progress.
         */
        retval = cgroup_can_fork(p, args);
        if (retval)
                goto bad_fork_put_pidfd;

        /*
         * Now that the cgroups are pinned, re-clone the parent cgroup and put
         * the new task on the correct runqueue. All this *before* the task
         * becomes visible.
         *
         * This isn't part of ->can_fork() because while the re-cloning is
         * cgroup specific, it unconditionally needs to place the task on a
         * runqueue.
         */
        sched_cgroup_fork(p, args);

        /*
         * From this point on we must avoid any synchronous user-space
         * communication until we take the tasklist-lock. In particular, we do
         * not want user-space to be able to predict the process start-time by
         * stalling fork(2) after we recorded the start_time but before it is
         * visible to the system.
         */

        p->start_time = ktime_get_ns();
        p->start_boottime = ktime_get_boottime_ns();

        /*
         * Make it visible to the rest of the system, but dont wake it up yet.
         * Need tasklist lock for parent etc handling!
         */
        write_lock_irq(&tasklist_lock);

        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
                p->real_parent = current->real_parent;
                p->parent_exec_id = current->parent_exec_id;
                if (clone_flags & CLONE_THREAD)
                        p->exit_signal = -1;
                else
                        p->exit_signal = current->group_leader->exit_signal;
        } else {
                p->real_parent = current;
                p->parent_exec_id = current->self_exec_id;
                p->exit_signal = args->exit_signal;
        }

        klp_copy_process(p);

        sched_core_fork(p);

        spin_lock(&current->sighand->siglock);

        rv_task_fork(p);

        rseq_fork(p, clone_flags);

        /* Don't start children in a dying pid namespace */
        if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
                retval = -ENOMEM;
                goto bad_fork_cancel_cgroup;
        }

        /* Let kill terminate clone/fork in the middle */
        if (fatal_signal_pending(current)) {
                retval = -EINTR;
                goto bad_fork_cancel_cgroup;
        }

        /* No more failure paths after this point. */

        /*
         * Copy seccomp details explicitly here, in case they were changed
         * before holding sighand lock.
         */
        copy_seccomp(p);

        init_task_pid_links(p);
        if (likely(p->pid)) {
                ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

                init_task_pid(p, PIDTYPE_PID, pid);
                if (thread_group_leader(p)) {
                        init_task_pid(p, PIDTYPE_TGID, pid);
                        init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
                        init_task_pid(p, PIDTYPE_SID, task_session(current));

                        if (is_child_reaper(pid)) {
                                ns_of_pid(pid)->child_reaper = p;
                                p->signal->flags |= SIGNAL_UNKILLABLE;
                        }
                        p->signal->shared_pending.signal = delayed.signal;
                        p->signal->tty = tty_kref_get(current->signal->tty);
                        /*
                         * Inherit has_child_subreaper flag under the same
                         * tasklist_lock with adding child to the process tree
                         * for propagate_has_child_subreaper optimization.
                         */
                        p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
                                                         p->real_parent->signal->is_child_subreaper;
                        list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        attach_pid(p, PIDTYPE_TGID);
                        attach_pid(p, PIDTYPE_PGID);
                        attach_pid(p, PIDTYPE_SID);
                        __this_cpu_inc(process_counts);
                } else {
                        current->signal->nr_threads++;
                        current->signal->quick_threads++;
                        atomic_inc(&current->signal->live);
                        refcount_inc(&current->signal->sigcnt);
                        task_join_group_stop(p);
                        list_add_tail_rcu(&p->thread_node,
                                          &p->signal->thread_head);
                }
                attach_pid(p, PIDTYPE_PID);
                nr_threads++;
        }
        total_forks++;
        hlist_del_init(&delayed.node);
        spin_unlock(&current->sighand->siglock);
        syscall_tracepoint_update(p);
        write_unlock_irq(&tasklist_lock);

        if (pidfile)
                fd_install(pidfd, pidfile);

        proc_fork_connector(p);
        sched_post_fork(p);
        cgroup_post_fork(p, args);
        perf_event_fork(p);

        trace_task_newtask(p, clone_flags);
        uprobe_copy_process(p, clone_flags);
        user_events_fork(p, clone_flags);

        copy_oom_score_adj(clone_flags, p);

        return p;

bad_fork_cancel_cgroup:
        sched_core_free(p);
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        cgroup_cancel_fork(p, args);
bad_fork_put_pidfd:
        if (clone_flags & CLONE_PIDFD) {
                fput(pidfile);
                put_unused_fd(pidfd);
        }
bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
bad_fork_cleanup_thread:
        exit_thread(p);
bad_fork_cleanup_io:
        if (p->io_context)
                exit_io_context(p);
bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
bad_fork_cleanup_mm:
        if (p->mm) {
                mm_clear_owner(p->mm, p);
                mmput(p->mm);
        }
bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
                free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
        exit_fs(p); /* blocking */
bad_fork_cleanup_files:
        exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
        exit_sem(p);
bad_fork_cleanup_security:
        security_task_free(p);
bad_fork_cleanup_audit:
        audit_free(p);
bad_fork_cleanup_perf:
        perf_event_free_task(p);
bad_fork_cleanup_policy:
        lockdep_free_task(p);
#ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
#endif
bad_fork_cleanup_delayacct:
        delayacct_tsk_free(p);
bad_fork_cleanup_count:
        dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
        exit_creds(p);
bad_fork_free:
        WRITE_ONCE(p->__state, TASK_DEAD);
        exit_task_stack_account(p);
        put_task_stack(p);
        delayed_free_task(p);
fork_out:
        spin_lock_irq(&current->sighand->siglock);
        hlist_del_init(&delayed.node);
        spin_unlock_irq(&current->sighand->siglock);
        return ERR_PTR(retval);
}

static inline void init_idle_pids(struct task_struct *idle)
{
        enum pid_type type;

        for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
                INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
                init_task_pid(idle, type, &init_struct_pid);
        }
}

static int idle_dummy(void *dummy)
{
        /* This function is never called */
        return 0;
}

struct task_struct * __init fork_idle(int cpu)
{
        struct task_struct *task;
        struct kernel_clone_args args = {
                .flags                = CLONE_VM,
                .fn                = &idle_dummy,
                .fn_arg                = NULL,
                .kthread        = 1,
                .idle                = 1,
        };

        task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
        if (!IS_ERR(task)) {
                init_idle_pids(task);
                init_idle(task, cpu);
        }

        return task;
}

/*
 * This is like kernel_clone(), but shaved down and tailored to just
 * creating io_uring workers. It returns a created task, or an error pointer.
 * The returned task is inactive, and the caller must fire it up through
 * wake_up_new_task(p). All signals are blocked in the created task.
 */
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
{
        unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
                                CLONE_IO;
        struct kernel_clone_args args = {
                .flags                = ((lower_32_bits(flags) | CLONE_VM |
                                    CLONE_UNTRACED) & ~CSIGNAL),
                .exit_signal        = (lower_32_bits(flags) & CSIGNAL),
                .fn                = fn,
                .fn_arg                = arg,
                .io_thread        = 1,
                .user_worker        = 1,
        };

        return copy_process(NULL, 0, node, &args);
}

/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 *
 * args->exit_signal is expected to be checked for sanity by the caller.
 */
pid_t kernel_clone(struct kernel_clone_args *args)
{
        u64 clone_flags = args->flags;
        struct completion vfork;
        struct pid *pid;
        struct task_struct *p;
        int trace = 0;
        pid_t nr;

        /*
         * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
         * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
         * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
         * field in struct clone_args and it still doesn't make sense to have
         * them both point at the same memory location. Performing this check
         * here has the advantage that we don't need to have a separate helper
         * to check for legacy clone().
         */
        if ((clone_flags & CLONE_PIDFD) &&
            (clone_flags & CLONE_PARENT_SETTID) &&
            (args->pidfd == args->parent_tid))
                return -EINVAL;

        /*
         * Determine whether and which event to report to ptracer.  When
         * called from kernel_thread or CLONE_UNTRACED is explicitly
         * requested, no event is reported; otherwise, report if the event
         * for the type of forking is enabled.
         */
        if (!(clone_flags & CLONE_UNTRACED)) {
                if (clone_flags & CLONE_VFORK)
                        trace = PTRACE_EVENT_VFORK;
                else if (args->exit_signal != SIGCHLD)
                        trace = PTRACE_EVENT_CLONE;
                else
                        trace = PTRACE_EVENT_FORK;

                if (likely(!ptrace_event_enabled(current, trace)))
                        trace = 0;
        }

        p = copy_process(NULL, trace, NUMA_NO_NODE, args);
        add_latent_entropy();

        if (IS_ERR(p))
                return PTR_ERR(p);

        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        trace_sched_process_fork(current, p);

        pid = get_task_pid(p, PIDTYPE_PID);
        nr = pid_vnr(pid);

        if (clone_flags & CLONE_PARENT_SETTID)
                put_user(nr, args->parent_tid);

        if (clone_flags & CLONE_VFORK) {
                p->vfork_done = &vfork;
                init_completion(&vfork);
                get_task_struct(p);
        }

        if (IS_ENABLED(CONFIG_LRU_GEN_WALKS_MMU) && !(clone_flags & CLONE_VM)) {
                /* lock the task to synchronize with memcg migration */
                task_lock(p);
                lru_gen_add_mm(p->mm);
                task_unlock(p);
        }

        wake_up_new_task(p);

        /* forking complete and child started to run, tell ptracer */
        if (unlikely(trace))
                ptrace_event_pid(trace, pid);

        if (clone_flags & CLONE_VFORK) {
                if (!wait_for_vfork_done(p, &vfork))
                        ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
        }

        put_pid(pid);
        return nr;
}

/*
 * Create a kernel thread.
 */
pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
                    unsigned long flags)
{
        struct kernel_clone_args args = {
                .flags                = ((lower_32_bits(flags) | CLONE_VM |
                                    CLONE_UNTRACED) & ~CSIGNAL),
                .exit_signal        = (lower_32_bits(flags) & CSIGNAL),
                .fn                = fn,
                .fn_arg                = arg,
                .name                = name,
                .kthread        = 1,
        };

        return kernel_clone(&args);
}

/*
 * Create a user mode thread.
 */
pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
        struct kernel_clone_args args = {
                .flags                = ((lower_32_bits(flags) | CLONE_VM |
                                    CLONE_UNTRACED) & ~CSIGNAL),
                .exit_signal        = (lower_32_bits(flags) & CSIGNAL),
                .fn                = fn,
                .fn_arg                = arg,
        };

        return kernel_clone(&args);
}

#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
        struct kernel_clone_args args = {
                .exit_signal = SIGCHLD,
        };

        return kernel_clone(&args);
#else
        /* can not support in nommu mode */
        return -EINVAL;
#endif
}
#endif

#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
        struct kernel_clone_args args = {
                .flags                = CLONE_VFORK | CLONE_VM,
                .exit_signal        = SIGCHLD,
        };

        return kernel_clone(&args);
}
#endif

#ifdef __ARCH_WANT_SYS_CLONE
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 int __user *, parent_tidptr,
                 unsigned long, tls,
                 int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
                 int __user *, parent_tidptr,
                 int __user *, child_tidptr,
                 unsigned long, tls)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
                int, stack_size,
                int __user *, parent_tidptr,
                int __user *, child_tidptr,
                unsigned long, tls)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 int __user *, parent_tidptr,
                 int __user *, child_tidptr,
                 unsigned long, tls)
#endif
{
        struct kernel_clone_args args = {
                .flags                = (lower_32_bits(clone_flags) & ~CSIGNAL),
                .pidfd                = parent_tidptr,
                .child_tid        = child_tidptr,
                .parent_tid        = parent_tidptr,
                .exit_signal        = (lower_32_bits(clone_flags) & CSIGNAL),
                .stack                = newsp,
                .tls                = tls,
        };

        return kernel_clone(&args);
}
#endif

#ifdef __ARCH_WANT_SYS_CLONE3

noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
                                              struct clone_args __user *uargs,
                                              size_t usize)
{
        int err;
        struct clone_args args;
        pid_t *kset_tid = kargs->set_tid;

        BUILD_BUG_ON(offsetofend(struct clone_args, tls) !=
                     CLONE_ARGS_SIZE_VER0);
        BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) !=
                     CLONE_ARGS_SIZE_VER1);
        BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
                     CLONE_ARGS_SIZE_VER2);
        BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);

        if (unlikely(usize > PAGE_SIZE))
                return -E2BIG;
        if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
                return -EINVAL;

        err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
        if (err)
                return err;

        if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
                return -EINVAL;

        if (unlikely(!args.set_tid && args.set_tid_size > 0))
                return -EINVAL;

        if (unlikely(args.set_tid && args.set_tid_size == 0))
                return -EINVAL;

        /*
         * Verify that higher 32bits of exit_signal are unset and that
         * it is a valid signal
         */
        if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) ||
                     !valid_signal(args.exit_signal)))
                return -EINVAL;

        if ((args.flags & CLONE_INTO_CGROUP) &&
            (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2))
                return -EINVAL;

        *kargs = (struct kernel_clone_args){
                .flags                = args.flags,
                .pidfd                = u64_to_user_ptr(args.pidfd),
                .child_tid        = u64_to_user_ptr(args.child_tid),
                .parent_tid        = u64_to_user_ptr(args.parent_tid),
                .exit_signal        = args.exit_signal,
                .stack                = args.stack,
                .stack_size        = args.stack_size,
                .tls                = args.tls,
                .set_tid_size        = args.set_tid_size,
                .cgroup                = args.cgroup,
        };

        if (args.set_tid &&
                copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
                        (kargs->set_tid_size * sizeof(pid_t))))
                return -EFAULT;

        kargs->set_tid = kset_tid;

        return 0;
}

/**
 * clone3_stack_valid - check and prepare stack
 * @kargs: kernel clone args
 *
 * Verify that the stack arguments userspace gave us are sane.
 * In addition, set the stack direction for userspace since it's easy for us to
 * determine.
 */
static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
{
        if (kargs->stack == 0) {
                if (kargs->stack_size > 0)
                        return false;
        } else {
                if (kargs->stack_size == 0)
                        return false;

                if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
                        return false;

#if !defined(CONFIG_STACK_GROWSUP)
                kargs->stack += kargs->stack_size;
#endif
        }

        return true;
}

static bool clone3_args_valid(struct kernel_clone_args *kargs)
{
        /* Verify that no unknown flags are passed along. */
        if (kargs->flags &
            ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
                return false;

        /*
         * - make the CLONE_DETACHED bit reusable for clone3
         * - make the CSIGNAL bits reusable for clone3
         */
        if (kargs->flags & (CLONE_DETACHED | (CSIGNAL & (~CLONE_NEWTIME))))
                return false;

        if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
            (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND))
                return false;

        if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
            kargs->exit_signal)
                return false;

        if (!clone3_stack_valid(kargs))
                return false;

        return true;
}

/**
 * sys_clone3 - create a new process with specific properties
 * @uargs: argument structure
 * @size:  size of @uargs
 *
 * clone3() is the extensible successor to clone()/clone2().
 * It takes a struct as argument that is versioned by its size.
 *
 * Return: On success, a positive PID for the child process.
 *         On error, a negative errno number.
 */
SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
{
        int err;

        struct kernel_clone_args kargs;
        pid_t set_tid[MAX_PID_NS_LEVEL];

        kargs.set_tid = set_tid;

        err = copy_clone_args_from_user(&kargs, uargs, size);
        if (err)
                return err;

        if (!clone3_args_valid(&kargs))
                return -EINVAL;

        return kernel_clone(&kargs);
}
#endif

void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
{
        struct task_struct *leader, *parent, *child;
        int res;

        read_lock(&tasklist_lock);
        leader = top = top->group_leader;
down:
        for_each_thread(leader, parent) {
                list_for_each_entry(child, &parent->children, sibling) {
                        res = visitor(child, data);
                        if (res) {
                                if (res < 0)
                                        goto out;
                                leader = child;
                                goto down;
                        }
up:
                        ;
                }
        }

        if (leader != top) {
                child = leader;
                parent = child->real_parent;
                leader = parent->group_leader;
                goto up;
        }
out:
        read_unlock(&tasklist_lock);
}

#ifndef ARCH_MIN_MMSTRUCT_ALIGN
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif

static void sighand_ctor(void *data)
{
        struct sighand_struct *sighand = data;

        spin_lock_init(&sighand->siglock);
        init_waitqueue_head(&sighand->signalfd_wqh);
}

void __init mm_cache_init(void)
{
        unsigned int mm_size;

        /*
         * The mm_cpumask is located at the end of mm_struct, and is
         * dynamically sized based on the maximum CPU number this system
         * can have, taking hotplug into account (nr_cpu_ids).
         */
        mm_size = sizeof(struct mm_struct) + cpumask_size() + mm_cid_size();

        mm_cachep = kmem_cache_create_usercopy("mm_struct",
                        mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        offsetof(struct mm_struct, saved_auxv),
                        sizeof_field(struct mm_struct, saved_auxv),
                        NULL);
}

void __init proc_caches_init(void)
{
        sighand_cachep = kmem_cache_create("sighand_cache",
                        sizeof(struct sighand_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
                        SLAB_ACCOUNT, sighand_ctor);
        signal_cachep = kmem_cache_create("signal_cache",
                        sizeof(struct signal_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        NULL);
        files_cachep = kmem_cache_create("files_cache",
                        sizeof(struct files_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        NULL);
        fs_cachep = kmem_cache_create("fs_cache",
                        sizeof(struct fs_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        NULL);

        vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
#ifdef CONFIG_PER_VMA_LOCK
        vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
#endif
        mmap_init();
        nsproxy_cache_init();
}

/*
 * Check constraints on flags passed to the unshare system call.
 */
static int check_unshare_flags(unsigned long unshare_flags)
{
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
                                CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
                                CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
                                CLONE_NEWTIME))
                return -EINVAL;
        /*
         * Not implemented, but pretend it works if there is nothing
         * to unshare.  Note that unsharing the address space or the
         * signal handlers also need to unshare the signal queues (aka
         * CLONE_THREAD).
         */
        if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
                if (!thread_group_empty(current))
                        return -EINVAL;
        }
        if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
                if (refcount_read(&current->sighand->count) > 1)
                        return -EINVAL;
        }
        if (unshare_flags & CLONE_VM) {
                if (!current_is_single_threaded())
                        return -EINVAL;
        }

        return 0;
}

/*
 * Unshare the filesystem structure if it is being shared
 */
static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
{
        struct fs_struct *fs = current->fs;

        if (!(unshare_flags & CLONE_FS) || !fs)
                return 0;

        /* don't need lock here; in the worst case we'll do useless copy */
        if (fs->users == 1)
                return 0;

        *new_fsp = copy_fs_struct(fs);
        if (!*new_fsp)
                return -ENOMEM;

        return 0;
}

/*
 * Unshare file descriptor table if it is being shared
 */
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
               struct files_struct **new_fdp)
{
        struct files_struct *fd = current->files;
        int error = 0;

        if ((unshare_flags & CLONE_FILES) &&
            (fd && atomic_read(&fd->count) > 1)) {
                *new_fdp = dup_fd(fd, max_fds, &error);
                if (!*new_fdp)
                        return error;
        }

        return 0;
}

/*
 * unshare allows a process to 'unshare' part of the process
 * context which was originally shared using clone.  copy_*
 * functions used by kernel_clone() cannot be used here directly
 * because they modify an inactive task_struct that is being
 * constructed. Here we are modifying the current, active,
 * task_struct.
 */
int ksys_unshare(unsigned long unshare_flags)
{
        struct fs_struct *fs, *new_fs = NULL;
        struct files_struct *new_fd = NULL;
        struct cred *new_cred = NULL;
        struct nsproxy *new_nsproxy = NULL;
        int do_sysvsem = 0;
        int err;

        /*
         * If unsharing a user namespace must also unshare the thread group
         * and unshare the filesystem root and working directories.
         */
        if (unshare_flags & CLONE_NEWUSER)
                unshare_flags |= CLONE_THREAD | CLONE_FS;
        /*
         * If unsharing vm, must also unshare signal handlers.
         */
        if (unshare_flags & CLONE_VM)
                unshare_flags |= CLONE_SIGHAND;
        /*
         * If unsharing a signal handlers, must also unshare the signal queues.
         */
        if (unshare_flags & CLONE_SIGHAND)
                unshare_flags |= CLONE_THREAD;
        /*
         * If unsharing namespace, must also unshare filesystem information.
         */
        if (unshare_flags & CLONE_NEWNS)
                unshare_flags |= CLONE_FS;

        err = check_unshare_flags(unshare_flags);
        if (err)
                goto bad_unshare_out;
        /*
         * CLONE_NEWIPC must also detach from the undolist: after switching
         * to a new ipc namespace, the semaphore arrays from the old
         * namespace are unreachable.
         */
        if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
                do_sysvsem = 1;
        err = unshare_fs(unshare_flags, &new_fs);
        if (err)
                goto bad_unshare_out;
        err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
        if (err)
                goto bad_unshare_cleanup_fs;
        err = unshare_userns(unshare_flags, &new_cred);
        if (err)
                goto bad_unshare_cleanup_fd;
        err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
                                         new_cred, new_fs);
        if (err)
                goto bad_unshare_cleanup_cred;

        if (new_cred) {
                err = set_cred_ucounts(new_cred);
                if (err)
                        goto bad_unshare_cleanup_cred;
        }

        if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
                if (do_sysvsem) {
                        /*
                         * CLONE_SYSVSEM is equivalent to sys_exit().
                         */
                        exit_sem(current);
                }
                if (unshare_flags & CLONE_NEWIPC) {
                        /* Orphan segments in old ns (see sem above). */
                        exit_shm(current);
                        shm_init_task(current);
                }

                if (new_nsproxy)
                        switch_task_namespaces(current, new_nsproxy);

                task_lock(current);

                if (new_fs) {
                        fs = current->fs;
                        spin_lock(&fs->lock);
                        current->fs = new_fs;
                        if (--fs->users)
                                new_fs = NULL;
                        else
                                new_fs = fs;
                        spin_unlock(&fs->lock);
                }

                if (new_fd)
                        swap(current->files, new_fd);

                task_unlock(current);

                if (new_cred) {
                        /* Install the new user namespace */
                        commit_creds(new_cred);
                        new_cred = NULL;
                }
        }

        perf_event_namespaces(current);

bad_unshare_cleanup_cred:
        if (new_cred)
                put_cred(new_cred);
bad_unshare_cleanup_fd:
        if (new_fd)
                put_files_struct(new_fd);

bad_unshare_cleanup_fs:
        if (new_fs)
                free_fs_struct(new_fs);

bad_unshare_out:
        return err;
}

SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
{
        return ksys_unshare(unshare_flags);
}

/*
 *        Helper to unshare the files of the current task.
 *        We don't want to expose copy_files internals to
 *        the exec layer of the kernel.
 */

int unshare_files(void)
{
        struct task_struct *task = current;
        struct files_struct *old, *copy = NULL;
        int error;

        error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
        if (error || !copy)
                return error;

        old = task->files;
        task_lock(task);
        task->files = copy;
        task_unlock(task);
        put_files_struct(old);
        return 0;
}

int sysctl_max_threads(struct ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
{
        struct ctl_table t;
        int ret;
        int threads = max_threads;
        int min = 1;
        int max = MAX_THREADS;

        t = *table;
        t.data = &threads;
        t.extra1 = &min;
        t.extra2 = &max;

        ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
        if (ret || !write)
                return ret;

        max_threads = threads;

        return 0;
}























































  522 
  523 

  523 
  524 
  524 


































  523 
  523 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/* SPDX-License-Identifier: GPL-2.0 */
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
 */

#ifndef _ASM_X86_STACKTRACE_H
#define _ASM_X86_STACKTRACE_H

#include <linux/uaccess.h>
#include <linux/ptrace.h>

#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>

enum stack_type {
        STACK_TYPE_UNKNOWN,
        STACK_TYPE_TASK,
        STACK_TYPE_IRQ,
        STACK_TYPE_SOFTIRQ,
        STACK_TYPE_ENTRY,
        STACK_TYPE_EXCEPTION,
        STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
};

struct stack_info {
        enum stack_type type;
        unsigned long *begin, *end, *next_sp;
};

bool in_task_stack(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info);

bool in_entry_stack(unsigned long *stack, struct stack_info *info);

int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
                            struct stack_info *info);

static __always_inline
bool get_stack_guard_info(unsigned long *stack, struct stack_info *info)
{
        /* make sure it's not in the stack proper */
        if (get_stack_info_noinstr(stack, current, info))
                return false;
        /* but if it is in the page below it, we hit a guard */
        return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info);
}

const char *stack_type_name(enum stack_type type);

static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
{
        void *begin = info->begin;
        void *end   = info->end;

        return (info->type != STACK_TYPE_UNKNOWN &&
                addr >= begin && addr < end &&
                addr + len > begin && addr + len <= end);
}

#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
#else
#define STACKSLOTS_PER_LINE 4
#endif

#ifdef CONFIG_FRAME_POINTER
static inline unsigned long *
get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
{
        if (regs)
                return (unsigned long *)regs->bp;

        if (task == current)
                return __builtin_frame_address(0);

        return &((struct inactive_task_frame *)task->thread.sp)->bp;
}
#else
static inline unsigned long *
get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
{
        return NULL;
}
#endif /* CONFIG_FRAME_POINTER */

static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
        if (regs)
                return (unsigned long *)regs->sp;

        if (task == current)
                return __builtin_frame_address(0);

        return (unsigned long *)task->thread.sp;
}

/* The form of the top of the frame on the stack */
struct stack_frame {
        struct stack_frame *next_frame;
        unsigned long return_address;
};

struct stack_frame_ia32 {
    u32 next_frame;
    u32 return_address;
};

void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 

























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MM_TYPES_H
#define _LINUX_MM_TYPES_H

#include <linux/mm_types_task.h>

#include <linux/auxvec.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
#include <linux/rcupdate.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>

#include <asm/mmu.h>

#ifndef AT_VECTOR_SIZE_ARCH
#define AT_VECTOR_SIZE_ARCH 0
#endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))

#define INIT_PASID        0

struct address_space;
struct mem_cgroup;

/*
 * Each physical page in the system has a struct page associated with
 * it to keep track of whatever it is we are using the page for at the
 * moment. Note that we have no way to track which tasks are using
 * a page, though if it is a pagecache page, rmap structures can tell us
 * who is mapping it.
 *
 * If you allocate the page using alloc_pages(), you can use some of the
 * space in struct page for your own purposes.  The five words in the main
 * union are available, except for bit 0 of the first word which must be
 * kept clear.  Many users use this word to store a pointer to an object
 * which is guaranteed to be aligned.  If you use the same storage as
 * page->mapping, you must restore it to NULL before freeing the page.
 *
 * If your page will not be mapped to userspace, you can also use the four
 * bytes in the mapcount union, but you must call page_mapcount_reset()
 * before freeing it.
 *
 * If you want to use the refcount field, it must be used in such a way
 * that other CPUs temporarily incrementing and then decrementing the
 * refcount does not cause problems.  On receiving the page from
 * alloc_pages(), the refcount will be positive.
 *
 * If you allocate pages of order > 0, you can use some of the fields
 * in each subpage, but you may need to restore some of their values
 * afterwards.
 *
 * SLUB uses cmpxchg_double() to atomically update its freelist and counters.
 * That requires that freelist & counters in struct slab be adjacent and
 * double-word aligned. Because struct slab currently just reinterprets the
 * bits of struct page, we align all struct pages to double-word boundaries,
 * and ensure that 'freelist' is aligned within struct slab.
 */
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment        __aligned(2 * sizeof(unsigned long))
#else
#define _struct_page_alignment        __aligned(sizeof(unsigned long))
#endif

struct page {
        unsigned long flags;                /* Atomic flags, some possibly
                                         * updated asynchronously */
        /*
         * Five words (20/40 bytes) are available in this union.
         * WARNING: bit 0 of the first word is used for PageTail(). That
         * means the other users of this union MUST NOT use the bit to
         * avoid collision and false-positive PageTail().
         */
        union {
                struct {        /* Page cache and anonymous pages */
                        /**
                         * @lru: Pageout list, eg. active_list protected by
                         * lruvec->lru_lock.  Sometimes used as a generic list
                         * by the page owner.
                         */
                        union {
                                struct list_head lru;

                                /* Or, for the Unevictable "LRU list" slot */
                                struct {
                                        /* Always even, to negate PageTail */
                                        void *__filler;
                                        /* Count page's or folio's mlocks */
                                        unsigned int mlock_count;
                                };

                                /* Or, free page */
                                struct list_head buddy_list;
                                struct list_head pcp_list;
                        };
                        /* See page-flags.h for PAGE_MAPPING_FLAGS */
                        struct address_space *mapping;
                        union {
                                pgoff_t index;                /* Our offset within mapping. */
                                unsigned long share;        /* share count for fsdax */
                        };
                        /**
                         * @private: Mapping-private opaque data.
                         * Usually used for buffer_heads if PagePrivate.
                         * Used for swp_entry_t if PageSwapCache.
                         * Indicates order in the buddy system if PageBuddy.
                         */
                        unsigned long private;
                };
                struct {        /* page_pool used by netstack */
                        /**
                         * @pp_magic: magic value to avoid recycling non
                         * page_pool allocated pages.
                         */
                        unsigned long pp_magic;
                        struct page_pool *pp;
                        unsigned long _pp_mapping_pad;
                        unsigned long dma_addr;
                        atomic_long_t pp_ref_count;
                };
                struct {        /* Tail pages of compound page */
                        unsigned long compound_head;        /* Bit zero is set */
                };
                struct {        /* ZONE_DEVICE pages */
                        /** @pgmap: Points to the hosting device page map. */
                        struct dev_pagemap *pgmap;
                        void *zone_device_data;
                        /*
                         * ZONE_DEVICE private pages are counted as being
                         * mapped so the next 3 words hold the mapping, index,
                         * and private fields from the source anonymous or
                         * page cache page while the page is migrated to device
                         * private memory.
                         * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
                         * use the mapping, index, and private fields when
                         * pmem backed DAX files are mapped.
                         */
                };

                /** @rcu_head: You can use this to free a page by RCU. */
                struct rcu_head rcu_head;
        };

        union {                /* This union is 4 bytes in size. */
                /*
                 * If the page can be mapped to userspace, encodes the number
                 * of times this page is referenced by a page table.
                 */
                atomic_t _mapcount;

                /*
                 * If the page is neither PageSlab nor mappable to userspace,
                 * the value stored here may help determine what this page
                 * is used for.  See page-flags.h for a list of page types
                 * which are currently stored here.
                 */
                unsigned int page_type;
        };

        /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
        atomic_t _refcount;

#ifdef CONFIG_MEMCG
        unsigned long memcg_data;
#endif

        /*
         * On machines where all RAM is mapped into kernel address space,
         * we can simply calculate the virtual address. On machines with
         * highmem some memory is mapped into kernel virtual memory
         * dynamically, so we need a place to store that address.
         * Note that this field could be 16 bits on x86 ... ;)
         *
         * Architectures with slow multiplication can define
         * WANT_PAGE_VIRTUAL in asm/page.h
         */
#if defined(WANT_PAGE_VIRTUAL)
        void *virtual;                        /* Kernel virtual address (NULL if
                                           not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
        int _last_cpupid;
#endif

#ifdef CONFIG_KMSAN
        /*
         * KMSAN metadata for this page:
         *  - shadow page: every bit indicates whether the corresponding
         *    bit of the original page is initialized (0) or not (1);
         *  - origin page: every 4 bytes contain an id of the stack trace
         *    where the uninitialized value was created.
         */
        struct page *kmsan_shadow;
        struct page *kmsan_origin;
#endif
} _struct_page_alignment;

/*
 * struct encoded_page - a nonexistent type marking this pointer
 *
 * An 'encoded_page' pointer is a pointer to a regular 'struct page', but
 * with the low bits of the pointer indicating extra context-dependent
 * information. Only used in mmu_gather handling, and this acts as a type
 * system check on that use.
 *
 * We only really have two guaranteed bits in general, although you could
 * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 * for more.
 *
 * Use the supplied helper functions to endcode/decode the pointer and bits.
 */
struct encoded_page;

#define ENCODED_PAGE_BITS                        3ul

/* Perform rmap removal after we have flushed the TLB. */
#define ENCODED_PAGE_BIT_DELAY_RMAP                1ul

/*
 * The next item in an encoded_page array is the "nr_pages" argument, specifying
 * the number of consecutive pages starting from this page, that all belong to
 * the same folio. For example, "nr_pages" corresponds to the number of folio
 * references that must be dropped. If this bit is not set, "nr_pages" is
 * implicitly 1.
 */
#define ENCODED_PAGE_BIT_NR_PAGES_NEXT                2ul

static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
{
        BUILD_BUG_ON(flags > ENCODED_PAGE_BITS);
        return (struct encoded_page *)(flags | (unsigned long)page);
}

static inline unsigned long encoded_page_flags(struct encoded_page *page)
{
        return ENCODED_PAGE_BITS & (unsigned long)page;
}

static inline struct page *encoded_page_ptr(struct encoded_page *page)
{
        return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page);
}

static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr)
{
        VM_WARN_ON_ONCE((nr << 2) >> 2 != nr);
        return (struct encoded_page *)(nr << 2);
}

static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page)
{
        return ((unsigned long)page) >> 2;
}

/*
 * A swap entry has to fit into a "unsigned long", as the entry is hidden
 * in the "index" field of the swapper address space.
 */
typedef struct {
        unsigned long val;
} swp_entry_t;

/**
 * struct folio - Represents a contiguous set of bytes.
 * @flags: Identical to the page flags.
 * @lru: Least Recently Used list; tracks how recently this folio was used.
 * @mlock_count: Number of times this folio has been pinned by mlock().
 * @mapping: The file this page belongs to, or refers to the anon_vma for
 *    anonymous memory.
 * @index: Offset within the file, in units of pages.  For anonymous memory,
 *    this is the index from the beginning of the mmap.
 * @private: Filesystem per-folio data (see folio_attach_private()).
 * @swap: Used for swp_entry_t if folio_test_swapcache().
 * @_mapcount: Do not access this member directly.  Use folio_mapcount() to
 *    find out how many times this folio is mapped by userspace.
 * @_refcount: Do not access this member directly.  Use folio_ref_count()
 *    to find how many references there are to this folio.
 * @memcg_data: Memory Control Group data.
 * @virtual: Virtual address in the kernel direct map.
 * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
 * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
 * @_nr_pages_mapped: Do not use directly, call folio_mapcount().
 * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
 * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
 * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
 * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
 * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
 * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
 * @_deferred_list: Folios to be split under memory pressure.
 *
 * A folio is a physically, virtually and logically contiguous set
 * of bytes.  It is a power-of-two in size, and it is aligned to that
 * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
 * in the page cache, it is at a file offset which is a multiple of that
 * power-of-two.  It may be mapped into userspace at an address which is
 * at an arbitrary page offset, but its kernel virtual address is aligned
 * to its size.
 */
struct folio {
        /* private: don't document the anon union */
        union {
                struct {
        /* public: */
                        unsigned long flags;
                        union {
                                struct list_head lru;
        /* private: avoid cluttering the output */
                                struct {
                                        void *__filler;
        /* public: */
                                        unsigned int mlock_count;
        /* private: */
                                };
        /* public: */
                        };
                        struct address_space *mapping;
                        pgoff_t index;
                        union {
                                void *private;
                                swp_entry_t swap;
                        };
                        atomic_t _mapcount;
                        atomic_t _refcount;
#ifdef CONFIG_MEMCG
                        unsigned long memcg_data;
#endif
#if defined(WANT_PAGE_VIRTUAL)
                        void *virtual;
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
                        int _last_cpupid;
#endif
        /* private: the union with struct page is transitional */
                };
                struct page page;
        };
        union {
                struct {
                        unsigned long _flags_1;
                        unsigned long _head_1;
                        unsigned long _folio_avail;
        /* public: */
                        atomic_t _entire_mapcount;
                        atomic_t _nr_pages_mapped;
                        atomic_t _pincount;
#ifdef CONFIG_64BIT
                        unsigned int _folio_nr_pages;
#endif
        /* private: the union with struct page is transitional */
                };
                struct page __page_1;
        };
        union {
                struct {
                        unsigned long _flags_2;
                        unsigned long _head_2;
        /* public: */
                        void *_hugetlb_subpool;
                        void *_hugetlb_cgroup;
                        void *_hugetlb_cgroup_rsvd;
                        void *_hugetlb_hwpoison;
        /* private: the union with struct page is transitional */
                };
                struct {
                        unsigned long _flags_2a;
                        unsigned long _head_2a;
        /* public: */
                        struct list_head _deferred_list;
        /* private: the union with struct page is transitional */
                };
                struct page __page_2;
        };
};

#define FOLIO_MATCH(pg, fl)                                                \
        static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
FOLIO_MATCH(flags, flags);
FOLIO_MATCH(lru, lru);
FOLIO_MATCH(mapping, mapping);
FOLIO_MATCH(compound_head, lru);
FOLIO_MATCH(index, index);
FOLIO_MATCH(private, private);
FOLIO_MATCH(_mapcount, _mapcount);
FOLIO_MATCH(_refcount, _refcount);
#ifdef CONFIG_MEMCG
FOLIO_MATCH(memcg_data, memcg_data);
#endif
#if defined(WANT_PAGE_VIRTUAL)
FOLIO_MATCH(virtual, virtual);
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
FOLIO_MATCH(_last_cpupid, _last_cpupid);
#endif
#undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl)                                                \
        static_assert(offsetof(struct folio, fl) ==                        \
                        offsetof(struct page, pg) + sizeof(struct page))
FOLIO_MATCH(flags, _flags_1);
FOLIO_MATCH(compound_head, _head_1);
#undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl)                                                \
        static_assert(offsetof(struct folio, fl) ==                        \
                        offsetof(struct page, pg) + 2 * sizeof(struct page))
FOLIO_MATCH(flags, _flags_2);
FOLIO_MATCH(compound_head, _head_2);
FOLIO_MATCH(flags, _flags_2a);
FOLIO_MATCH(compound_head, _head_2a);
#undef FOLIO_MATCH

/**
 * struct ptdesc -    Memory descriptor for page tables.
 * @__page_flags:     Same as page flags. Powerpc only.
 * @pt_rcu_head:      For freeing page table pages.
 * @pt_list:          List of used page tables. Used for s390 and x86.
 * @_pt_pad_1:        Padding that aliases with page's compound head.
 * @pmd_huge_pte:     Protected by ptdesc->ptl, used for THPs.
 * @__page_mapping:   Aliases with page->mapping. Unused for page tables.
 * @pt_index:         Used for s390 gmap.
 * @pt_mm:            Used for x86 pgds.
 * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
 * @_pt_pad_2:        Padding to ensure proper alignment.
 * @ptl:              Lock for the page table.
 * @__page_type:      Same as page->page_type. Unused for page tables.
 * @__page_refcount:  Same as page refcount.
 * @pt_memcg_data:    Memcg data. Tracked for page tables here.
 *
 * This struct overlays struct page for now. Do not modify without a good
 * understanding of the issues.
 */
struct ptdesc {
        unsigned long __page_flags;

        union {
                struct rcu_head pt_rcu_head;
                struct list_head pt_list;
                struct {
                        unsigned long _pt_pad_1;
                        pgtable_t pmd_huge_pte;
                };
        };
        unsigned long __page_mapping;

        union {
                pgoff_t pt_index;
                struct mm_struct *pt_mm;
                atomic_t pt_frag_refcount;
        };

        union {
                unsigned long _pt_pad_2;
#if ALLOC_SPLIT_PTLOCKS
                spinlock_t *ptl;
#else
                spinlock_t ptl;
#endif
        };
        unsigned int __page_type;
        atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
        unsigned long pt_memcg_data;
#endif
};

#define TABLE_MATCH(pg, pt)                                                \
        static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
TABLE_MATCH(flags, __page_flags);
TABLE_MATCH(compound_head, pt_list);
TABLE_MATCH(compound_head, _pt_pad_1);
TABLE_MATCH(mapping, __page_mapping);
TABLE_MATCH(index, pt_index);
TABLE_MATCH(rcu_head, pt_rcu_head);
TABLE_MATCH(page_type, __page_type);
TABLE_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
TABLE_MATCH(memcg_data, pt_memcg_data);
#endif
#undef TABLE_MATCH
static_assert(sizeof(struct ptdesc) <= sizeof(struct page));

#define ptdesc_page(pt)                        (_Generic((pt),                        \
        const struct ptdesc *:                (const struct page *)(pt),        \
        struct ptdesc *:                (struct page *)(pt)))

#define ptdesc_folio(pt)                (_Generic((pt),                        \
        const struct ptdesc *:                (const struct folio *)(pt),        \
        struct ptdesc *:                (struct folio *)(pt)))

#define page_ptdesc(p)                        (_Generic((p),                        \
        const struct page *:                (const struct ptdesc *)(p),        \
        struct page *:                        (struct ptdesc *)(p)))

/*
 * Used for sizing the vmemmap region on some architectures
 */
#define STRUCT_PAGE_MAX_SHIFT        (order_base_2(sizeof(struct page)))

#define PAGE_FRAG_CACHE_MAX_SIZE        __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER        get_order(PAGE_FRAG_CACHE_MAX_SIZE)

/*
 * page_private can be used on tail pages.  However, PagePrivate is only
 * checked by the VM on the head page.  So page_private on the tail pages
 * should be used for data that's ancillary to the head page (eg attaching
 * buffer heads to tail pages after attaching buffer heads to the head page)
 */
#define page_private(page)                ((page)->private)

static inline void set_page_private(struct page *page, unsigned long private)
{
        page->private = private;
}

static inline void *folio_get_private(struct folio *folio)
{
        return folio->private;
}

struct page_frag_cache {
        void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
        __u16 offset;
        __u16 size;
#else
        __u32 offset;
#endif
        /* we maintain a pagecount bias, so that we dont dirty cache line
         * containing page->_refcount every time we allocate a fragment.
         */
        unsigned int                pagecnt_bias;
        bool pfmemalloc;
};

typedef unsigned long vm_flags_t;

/*
 * A region containing a mapping of a non-memory backed file under NOMMU
 * conditions.  These are held in a global tree and are pinned by the VMAs that
 * map parts of them.
 */
struct vm_region {
        struct rb_node        vm_rb;                /* link in global region tree */
        vm_flags_t        vm_flags;        /* VMA vm_flags */
        unsigned long        vm_start;        /* start address of region */
        unsigned long        vm_end;                /* region initialised to here */
        unsigned long        vm_top;                /* region allocated to here */
        unsigned long        vm_pgoff;        /* the offset in vm_file corresponding to vm_start */
        struct file        *vm_file;        /* the backing file or NULL */

        int                vm_usage;        /* region usage count (access under nommu_region_sem) */
        bool                vm_icache_flushed : 1; /* true if the icache has been flushed for
                                                * this region */
};

#ifdef CONFIG_USERFAULTFD
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, })
struct vm_userfaultfd_ctx {
        struct userfaultfd_ctx *ctx;
};
#else /* CONFIG_USERFAULTFD */
#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {})
struct vm_userfaultfd_ctx {};
#endif /* CONFIG_USERFAULTFD */

struct anon_vma_name {
        struct kref kref;
        /* The name needs to be at the end because it is dynamically sized. */
        char name[];
};

#ifdef CONFIG_ANON_VMA_NAME
/*
 * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
 * either keep holding the lock while using the returned pointer or it should
 * raise anon_vma_name refcount before releasing the lock.
 */
struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
struct anon_vma_name *anon_vma_name_alloc(const char *name);
void anon_vma_name_free(struct kref *kref);
#else /* CONFIG_ANON_VMA_NAME */
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{
        return NULL;
}

static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
        return NULL;
}
#endif

struct vma_lock {
        struct rw_semaphore lock;
};

struct vma_numab_state {
        /*
         * Initialised as time in 'jiffies' after which VMA
         * should be scanned.  Delays first scan of new VMA by at
         * least sysctl_numa_balancing_scan_delay:
         */
        unsigned long next_scan;

        /*
         * Time in jiffies when pids_active[] is reset to
         * detect phase change behaviour:
         */
        unsigned long pids_active_reset;

        /*
         * Approximate tracking of PIDs that trapped a NUMA hinting
         * fault. May produce false positives due to hash collisions.
         *
         *   [0] Previous PID tracking
         *   [1] Current PID tracking
         *
         * Window moves after next_pid_reset has expired approximately
         * every VMA_PID_RESET_PERIOD jiffies:
         */
        unsigned long pids_active[2];

        /* MM scan sequence ID when scan first started after VMA creation */
        int start_scan_seq;

        /*
         * MM scan sequence ID when the VMA was last completely scanned.
         * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq
         */
        int prev_scan_seq;
};

/*
 * This struct describes a virtual memory area. There is one of these
 * per VM-area/task. A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
 */
struct vm_area_struct {
        /* The first cache line has the info for VMA tree walking. */

        union {
                struct {
                        /* VMA covers [vm_start; vm_end) addresses within mm */
                        unsigned long vm_start;
                        unsigned long vm_end;
                };
#ifdef CONFIG_PER_VMA_LOCK
                struct rcu_head vm_rcu;        /* Used for deferred freeing. */
#endif
        };

        struct mm_struct *vm_mm;        /* The address space we belong to. */
        pgprot_t vm_page_prot;          /* Access permissions of this VMA. */

        /*
         * Flags, see mm.h.
         * To modify use vm_flags_{init|reset|set|clear|mod} functions.
         */
        union {
                const vm_flags_t vm_flags;
                vm_flags_t __private __vm_flags;
        };

#ifdef CONFIG_PER_VMA_LOCK
        /*
         * Can only be written (using WRITE_ONCE()) while holding both:
         *  - mmap_lock (in write mode)
         *  - vm_lock->lock (in write mode)
         * Can be read reliably while holding one of:
         *  - mmap_lock (in read or write mode)
         *  - vm_lock->lock (in read or write mode)
         * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
         * while holding nothing (except RCU to keep the VMA struct allocated).
         *
         * This sequence counter is explicitly allowed to overflow; sequence
         * counter reuse can only lead to occasional unnecessary use of the
         * slowpath.
         */
        int vm_lock_seq;
        struct vma_lock *vm_lock;

        /* Flag to indicate areas detached from the mm->mm_mt tree */
        bool detached;
#endif

        /*
         * For areas with an address space and backing store,
         * linkage into the address_space->i_mmap interval tree.
         *
         */
        struct {
                struct rb_node rb;
                unsigned long rb_subtree_last;
        } shared;

        /*
         * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
         * list, after a COW of one of the file pages.        A MAP_SHARED vma
         * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
         * or brk vma (with NULL file) can only be in an anon_vma list.
         */
        struct list_head anon_vma_chain; /* Serialized by mmap_lock &
                                          * page_table_lock */
        struct anon_vma *anon_vma;        /* Serialized by page_table_lock */

        /* Function pointers to deal with this struct. */
        const struct vm_operations_struct *vm_ops;

        /* Information about our backing store: */
        unsigned long vm_pgoff;                /* Offset (within vm_file) in PAGE_SIZE
                                           units */
        struct file * vm_file;                /* File we map to (can be NULL). */
        void * vm_private_data;                /* was vm_pte (shared mem) */

#ifdef CONFIG_ANON_VMA_NAME
        /*
         * For private and shared anonymous mappings, a pointer to a null
         * terminated string containing the name given to the vma, or NULL if
         * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
         */
        struct anon_vma_name *anon_name;
#endif
#ifdef CONFIG_SWAP
        atomic_long_t swap_readahead_info;
#endif
#ifndef CONFIG_MMU
        struct vm_region *vm_region;        /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
        struct mempolicy *vm_policy;        /* NUMA policy for the VMA */
#endif
#ifdef CONFIG_NUMA_BALANCING
        struct vma_numab_state *numab_state;        /* NUMA Balancing state */
#endif
        struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;

#ifdef CONFIG_NUMA
#define vma_policy(vma) ((vma)->vm_policy)
#else
#define vma_policy(vma) NULL
#endif

#ifdef CONFIG_SCHED_MM_CID
struct mm_cid {
        u64 time;
        int cid;
};
#endif

struct kioctx_table;
struct iommu_mm_data;
struct mm_struct {
        struct {
                /*
                 * Fields which are often written to are placed in a separate
                 * cache line.
                 */
                struct {
                        /**
                         * @mm_count: The number of references to &struct
                         * mm_struct (@mm_users count as 1).
                         *
                         * Use mmgrab()/mmdrop() to modify. When this drops to
                         * 0, the &struct mm_struct is freed.
                         */
                        atomic_t mm_count;
                } ____cacheline_aligned_in_smp;

                struct maple_tree mm_mt;
#ifdef CONFIG_MMU
                unsigned long (*get_unmapped_area) (struct file *filp,
                                unsigned long addr, unsigned long len,
                                unsigned long pgoff, unsigned long flags);
#endif
                unsigned long mmap_base;        /* base of mmap area */
                unsigned long mmap_legacy_base;        /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
                /* Base addresses for compatible mmap() */
                unsigned long mmap_compat_base;
                unsigned long mmap_compat_legacy_base;
#endif
                unsigned long task_size;        /* size of task vm space */
                pgd_t * pgd;

#ifdef CONFIG_MEMBARRIER
                /**
                 * @membarrier_state: Flags controlling membarrier behavior.
                 *
                 * This field is close to @pgd to hopefully fit in the same
                 * cache-line, which needs to be touched by switch_mm().
                 */
                atomic_t membarrier_state;
#endif

                /**
                 * @mm_users: The number of users including userspace.
                 *
                 * Use mmget()/mmget_not_zero()/mmput() to modify. When this
                 * drops to 0 (i.e. when the task exits and there are no other
                 * temporary reference holders), we also release a reference on
                 * @mm_count (which may then free the &struct mm_struct if
                 * @mm_count also drops to 0).
                 */
                atomic_t mm_users;

#ifdef CONFIG_SCHED_MM_CID
                /**
                 * @pcpu_cid: Per-cpu current cid.
                 *
                 * Keep track of the currently allocated mm_cid for each cpu.
                 * The per-cpu mm_cid values are serialized by their respective
                 * runqueue locks.
                 */
                struct mm_cid __percpu *pcpu_cid;
                /*
                 * @mm_cid_next_scan: Next mm_cid scan (in jiffies).
                 *
                 * When the next mm_cid scan is due (in jiffies).
                 */
                unsigned long mm_cid_next_scan;
#endif
#ifdef CONFIG_MMU
                atomic_long_t pgtables_bytes;        /* size of all page tables */
#endif
                int map_count;                        /* number of VMAs */

                spinlock_t page_table_lock; /* Protects page tables and some
                                             * counters
                                             */
                /*
                 * With some kernel config, the current mmap_lock's offset
                 * inside 'mm_struct' is at 0x120, which is very optimal, as
                 * its two hot fields 'count' and 'owner' sit in 2 different
                 * cachelines,  and when mmap_lock is highly contended, both
                 * of the 2 fields will be accessed frequently, current layout
                 * will help to reduce cache bouncing.
                 *
                 * So please be careful with adding new fields before
                 * mmap_lock, which can easily push the 2 fields into one
                 * cacheline.
                 */
                struct rw_semaphore mmap_lock;

                struct list_head mmlist; /* List of maybe swapped mm's.        These
                                          * are globally strung together off
                                          * init_mm.mmlist, and are protected
                                          * by mmlist_lock
                                          */
#ifdef CONFIG_PER_VMA_LOCK
                /*
                 * This field has lock-like semantics, meaning it is sometimes
                 * accessed with ACQUIRE/RELEASE semantics.
                 * Roughly speaking, incrementing the sequence number is
                 * equivalent to releasing locks on VMAs; reading the sequence
                 * number can be part of taking a read lock on a VMA.
                 *
                 * Can be modified under write mmap_lock using RELEASE
                 * semantics.
                 * Can be read with no other protection when holding write
                 * mmap_lock.
                 * Can be read with ACQUIRE semantics if not holding write
                 * mmap_lock.
                 */
                int mm_lock_seq;
#endif


                unsigned long hiwater_rss; /* High-watermark of RSS usage */
                unsigned long hiwater_vm;  /* High-water virtual memory usage */

                unsigned long total_vm;           /* Total pages mapped */
                unsigned long locked_vm;   /* Pages that have PG_mlocked set */
                atomic64_t    pinned_vm;   /* Refcount permanently increased */
                unsigned long data_vm;           /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
                unsigned long exec_vm;           /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
                unsigned long stack_vm;           /* VM_STACK */
                unsigned long def_flags;

                /**
                 * @write_protect_seq: Locked when any thread is write
                 * protecting pages mapped by this mm to enforce a later COW,
                 * for instance during page table copying for fork().
                 */
                seqcount_t write_protect_seq;

                spinlock_t arg_lock; /* protect the below fields */

                unsigned long start_code, end_code, start_data, end_data;
                unsigned long start_brk, brk, start_stack;
                unsigned long arg_start, arg_end, env_start, env_end;

                unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

                struct percpu_counter rss_stat[NR_MM_COUNTERS];

                struct linux_binfmt *binfmt;

                /* Architecture-specific MM context */
                mm_context_t context;

                unsigned long flags; /* Must use atomic bitops to access */

#ifdef CONFIG_AIO
                spinlock_t                        ioctx_lock;
                struct kioctx_table __rcu        *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
                /*
                 * "owner" points to a task that is regarded as the canonical
                 * user/owner of this mm. All of the following must be true in
                 * order for it to be changed:
                 *
                 * current == mm->owner
                 * current->mm != mm
                 * new_owner->mm == mm
                 * new_owner->alloc_lock is held
                 */
                struct task_struct __rcu *owner;
#endif
                struct user_namespace *user_ns;

                /* store ref to file /proc/<pid>/exe symlink points to */
                struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
                struct mmu_notifier_subscriptions *notifier_subscriptions;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
                pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING
                /*
                 * numa_next_scan is the next time that PTEs will be remapped
                 * PROT_NONE to trigger NUMA hinting faults; such faults gather
                 * statistics and migrate pages to new nodes if necessary.
                 */
                unsigned long numa_next_scan;

                /* Restart point for scanning and remapping PTEs. */
                unsigned long numa_scan_offset;

                /* numa_scan_seq prevents two threads remapping PTEs. */
                int numa_scan_seq;
#endif
                /*
                 * An operation with batched TLB flushing is going on. Anything
                 * that can move process memory needs to flush the TLB when
                 * moving a PROT_NONE mapped page.
                 */
                atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
                /* See flush_tlb_batched_pending() */
                atomic_t tlb_flush_batched;
#endif
                struct uprobes_state uprobes_state;
#ifdef CONFIG_PREEMPT_RT
                struct rcu_head delayed_drop;
#endif
#ifdef CONFIG_HUGETLB_PAGE
                atomic_long_t hugetlb_usage;
#endif
                struct work_struct async_put_work;

#ifdef CONFIG_IOMMU_MM_DATA
                struct iommu_mm_data *iommu_mm;
#endif
#ifdef CONFIG_KSM
                /*
                 * Represent how many pages of this process are involved in KSM
                 * merging (not including ksm_zero_pages).
                 */
                unsigned long ksm_merging_pages;
                /*
                 * Represent how many pages are checked for ksm merging
                 * including merged and not merged.
                 */
                unsigned long ksm_rmap_items;
                /*
                 * Represent how many empty pages are merged with kernel zero
                 * pages when enabling KSM use_zero_pages.
                 */
                unsigned long ksm_zero_pages;
#endif /* CONFIG_KSM */
#ifdef CONFIG_LRU_GEN_WALKS_MMU
                struct {
                        /* this mm_struct is on lru_gen_mm_list */
                        struct list_head list;
                        /*
                         * Set when switching to this mm_struct, as a hint of
                         * whether it has been used since the last time per-node
                         * page table walkers cleared the corresponding bits.
                         */
                        unsigned long bitmap;
#ifdef CONFIG_MEMCG
                        /* points to the memcg of "owner" above */
                        struct mem_cgroup *memcg;
#endif
                } lru_gen;
#endif /* CONFIG_LRU_GEN_WALKS_MMU */
        } __randomize_layout;

        /*
         * The mm_cpumask needs to be at the end of mm_struct, because it
         * is dynamically sized based on nr_cpu_ids.
         */
        unsigned long cpu_bitmap[];
};

#define MM_MT_FLAGS        (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
                         MT_FLAGS_USE_RCU)
extern struct mm_struct init_mm;

/* Pointer magic because the dynamic array size confuses some compilers. */
static inline void mm_init_cpumask(struct mm_struct *mm)
{
        unsigned long cpu_bitmap = (unsigned long)mm;

        cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
        cpumask_clear((struct cpumask *)cpu_bitmap);
}

/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
{
        return (struct cpumask *)&mm->cpu_bitmap;
}

#ifdef CONFIG_LRU_GEN

struct lru_gen_mm_list {
        /* mm_struct list for page table walkers */
        struct list_head fifo;
        /* protects the list above */
        spinlock_t lock;
};

#endif /* CONFIG_LRU_GEN */

#ifdef CONFIG_LRU_GEN_WALKS_MMU

void lru_gen_add_mm(struct mm_struct *mm);
void lru_gen_del_mm(struct mm_struct *mm);
void lru_gen_migrate_mm(struct mm_struct *mm);

static inline void lru_gen_init_mm(struct mm_struct *mm)
{
        INIT_LIST_HEAD(&mm->lru_gen.list);
        mm->lru_gen.bitmap = 0;
#ifdef CONFIG_MEMCG
        mm->lru_gen.memcg = NULL;
#endif
}

static inline void lru_gen_use_mm(struct mm_struct *mm)
{
        /*
         * When the bitmap is set, page reclaim knows this mm_struct has been
         * used since the last time it cleared the bitmap. So it might be worth
         * walking the page tables of this mm_struct to clear the accessed bit.
         */
        WRITE_ONCE(mm->lru_gen.bitmap, -1);
}

#else /* !CONFIG_LRU_GEN_WALKS_MMU */

static inline void lru_gen_add_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_del_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_migrate_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_init_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_use_mm(struct mm_struct *mm)
{
}

#endif /* CONFIG_LRU_GEN_WALKS_MMU */

struct vma_iterator {
        struct ma_state mas;
};

#define VMA_ITERATOR(name, __mm, __addr)                                \
        struct vma_iterator name = {                                        \
                .mas = {                                                \
                        .tree = &(__mm)->mm_mt,                                \
                        .index = __addr,                                \
                        .node = NULL,                                        \
                        .status = ma_start,                                \
                },                                                        \
        }

static inline void vma_iter_init(struct vma_iterator *vmi,
                struct mm_struct *mm, unsigned long addr)
{
        mas_init(&vmi->mas, &mm->mm_mt, addr);
}

#ifdef CONFIG_SCHED_MM_CID

enum mm_cid_state {
        MM_CID_UNSET = -1U,                /* Unset state has lazy_put flag set. */
        MM_CID_LAZY_PUT = (1U << 31),
};

static inline bool mm_cid_is_unset(int cid)
{
        return cid == MM_CID_UNSET;
}

static inline bool mm_cid_is_lazy_put(int cid)
{
        return !mm_cid_is_unset(cid) && (cid & MM_CID_LAZY_PUT);
}

static inline bool mm_cid_is_valid(int cid)
{
        return !(cid & MM_CID_LAZY_PUT);
}

static inline int mm_cid_set_lazy_put(int cid)
{
        return cid | MM_CID_LAZY_PUT;
}

static inline int mm_cid_clear_lazy_put(int cid)
{
        return cid & ~MM_CID_LAZY_PUT;
}

/* Accessor for struct mm_struct's cidmask. */
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
{
        unsigned long cid_bitmap = (unsigned long)mm;

        cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
        /* Skip cpu_bitmap */
        cid_bitmap += cpumask_size();
        return (struct cpumask *)cid_bitmap;
}

static inline void mm_init_cid(struct mm_struct *mm)
{
        int i;

        for_each_possible_cpu(i) {
                struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);

                pcpu_cid->cid = MM_CID_UNSET;
                pcpu_cid->time = 0;
        }
        cpumask_clear(mm_cidmask(mm));
}

static inline int mm_alloc_cid(struct mm_struct *mm)
{
        mm->pcpu_cid = alloc_percpu(struct mm_cid);
        if (!mm->pcpu_cid)
                return -ENOMEM;
        mm_init_cid(mm);
        return 0;
}

static inline void mm_destroy_cid(struct mm_struct *mm)
{
        free_percpu(mm->pcpu_cid);
        mm->pcpu_cid = NULL;
}

static inline unsigned int mm_cid_size(void)
{
        return cpumask_size();
}
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm) { }
static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }
static inline unsigned int mm_cid_size(void)
{
        return 0;
}
#endif /* CONFIG_SCHED_MM_CID */

struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_finish_mmu(struct mmu_gather *tlb);

struct vm_fault;

/**
 * typedef vm_fault_t - Return type for page fault handlers.
 *
 * Page fault handlers return a bitmask of %VM_FAULT values.
 */
typedef __bitwise unsigned int vm_fault_t;

/**
 * enum vm_fault_reason - Page fault handlers return a bitmask of
 * these values to tell the core VM what happened when handling the
 * fault. Used to decide whether a process gets delivered SIGBUS or
 * just gets major/minor fault counters bumped up.
 *
 * @VM_FAULT_OOM:                Out Of Memory
 * @VM_FAULT_SIGBUS:                Bad access
 * @VM_FAULT_MAJOR:                Page read from storage
 * @VM_FAULT_HWPOISON:                Hit poisoned small page
 * @VM_FAULT_HWPOISON_LARGE:        Hit poisoned large page. Index encoded
 *                                in upper bits
 * @VM_FAULT_SIGSEGV:                segmentation fault
 * @VM_FAULT_NOPAGE:                ->fault installed the pte, not return page
 * @VM_FAULT_LOCKED:                ->fault locked the returned page
 * @VM_FAULT_RETRY:                ->fault blocked, must retry
 * @VM_FAULT_FALLBACK:                huge page fault failed, fall back to small
 * @VM_FAULT_DONE_COW:                ->fault has fully handled COW
 * @VM_FAULT_NEEDDSYNC:                ->fault did not modify page tables and needs
 *                                fsync() to complete (for synchronous page faults
 *                                in DAX)
 * @VM_FAULT_COMPLETED:                ->fault completed, meanwhile mmap lock released
 * @VM_FAULT_HINDEX_MASK:        mask HINDEX value
 *
 */
enum vm_fault_reason {
        VM_FAULT_OOM            = (__force vm_fault_t)0x000001,
        VM_FAULT_SIGBUS         = (__force vm_fault_t)0x000002,
        VM_FAULT_MAJOR          = (__force vm_fault_t)0x000004,
        VM_FAULT_HWPOISON       = (__force vm_fault_t)0x000010,
        VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
        VM_FAULT_SIGSEGV        = (__force vm_fault_t)0x000040,
        VM_FAULT_NOPAGE         = (__force vm_fault_t)0x000100,
        VM_FAULT_LOCKED         = (__force vm_fault_t)0x000200,
        VM_FAULT_RETRY          = (__force vm_fault_t)0x000400,
        VM_FAULT_FALLBACK       = (__force vm_fault_t)0x000800,
        VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
        VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
        VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
        VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
};

/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)

#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |        \
                        VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |        \
                        VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)

#define VM_FAULT_RESULT_TRACE \
        { VM_FAULT_OOM,                 "OOM" },        \
        { VM_FAULT_SIGBUS,              "SIGBUS" },        \
        { VM_FAULT_MAJOR,               "MAJOR" },        \
        { VM_FAULT_HWPOISON,            "HWPOISON" },        \
        { VM_FAULT_HWPOISON_LARGE,      "HWPOISON_LARGE" },        \
        { VM_FAULT_SIGSEGV,             "SIGSEGV" },        \
        { VM_FAULT_NOPAGE,              "NOPAGE" },        \
        { VM_FAULT_LOCKED,              "LOCKED" },        \
        { VM_FAULT_RETRY,               "RETRY" },        \
        { VM_FAULT_FALLBACK,            "FALLBACK" },        \
        { VM_FAULT_DONE_COW,            "DONE_COW" },        \
        { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },        \
        { VM_FAULT_COMPLETED,           "COMPLETED" }

struct vm_special_mapping {
        const char *name;        /* The name, e.g. "[vdso]". */

        /*
         * If .fault is not provided, this points to a
         * NULL-terminated array of pages that back the special mapping.
         *
         * This must not be NULL unless .fault is provided.
         */
        struct page **pages;

        /*
         * If non-NULL, then this is called to resolve page faults
         * on the special mapping.  If used, .pages is not checked.
         */
        vm_fault_t (*fault)(const struct vm_special_mapping *sm,
                                struct vm_area_struct *vma,
                                struct vm_fault *vmf);

        int (*mremap)(const struct vm_special_mapping *sm,
                     struct vm_area_struct *new_vma);
};

enum tlb_flush_reason {
        TLB_FLUSH_ON_TASK_SWITCH,
        TLB_REMOTE_SHOOTDOWN,
        TLB_LOCAL_SHOOTDOWN,
        TLB_LOCAL_MM_SHOOTDOWN,
        TLB_REMOTE_SEND_IPI,
        NR_TLB_FLUSH_REASONS,
};

/**
 * enum fault_flag - Fault flag definitions.
 * @FAULT_FLAG_WRITE: Fault was a write fault.
 * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
 * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
 * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
 * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
 * @FAULT_FLAG_TRIED: The fault has been tried once.
 * @FAULT_FLAG_USER: The fault originated in userspace.
 * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
 * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
 * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
 * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a
 *                      COW mapping, making sure that an exclusive anon page is
 *                      mapped after the fault.
 * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
 *                        We should only access orig_pte if this flag set.
 * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock.
 *
 * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
 * whether we would allow page faults to retry by specifying these two
 * fault flags correctly.  Currently there can be three legal combinations:
 *
 * (a) ALLOW_RETRY and !TRIED:  this means the page fault allows retry, and
 *                              this is the first try
 *
 * (b) ALLOW_RETRY and TRIED:   this means the page fault allows retry, and
 *                              we've already tried at least once
 *
 * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
 *
 * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
 * be used.  Note that page faults can be allowed to retry for multiple times,
 * in which case we'll have an initial fault with flags (a) then later on
 * continuous faults with flags (b).  We should always try to detect pending
 * signals before a retry to make sure the continuous page faults can still be
 * interrupted if necessary.
 *
 * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal.
 * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when
 * applied to mappings that are not COW mappings.
 */
enum fault_flag {
        FAULT_FLAG_WRITE =                1 << 0,
        FAULT_FLAG_MKWRITE =                1 << 1,
        FAULT_FLAG_ALLOW_RETRY =        1 << 2,
        FAULT_FLAG_RETRY_NOWAIT =         1 << 3,
        FAULT_FLAG_KILLABLE =                1 << 4,
        FAULT_FLAG_TRIED =                 1 << 5,
        FAULT_FLAG_USER =                1 << 6,
        FAULT_FLAG_REMOTE =                1 << 7,
        FAULT_FLAG_INSTRUCTION =        1 << 8,
        FAULT_FLAG_INTERRUPTIBLE =        1 << 9,
        FAULT_FLAG_UNSHARE =                1 << 10,
        FAULT_FLAG_ORIG_PTE_VALID =        1 << 11,
        FAULT_FLAG_VMA_LOCK =                1 << 12,
};

typedef unsigned int __bitwise zap_flags_t;

/*
 * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
 * other. Here is what they mean, and how to use them:
 *
 *
 * FIXME: For pages which are part of a filesystem, mappings are subject to the
 * lifetime enforced by the filesystem and we need guarantees that longterm
 * users like RDMA and V4L2 only establish mappings which coordinate usage with
 * the filesystem.  Ideas for this coordination include revoking the longterm
 * pin, delaying writeback, bounce buffer page writeback, etc.  As FS DAX was
 * added after the problem with filesystems was found FS DAX VMAs are
 * specifically failed.  Filesystem pages are still subject to bugs and use of
 * FOLL_LONGTERM should be avoided on those pages.
 *
 * In the CMA case: long term pins in a CMA region would unnecessarily fragment
 * that region.  And so, CMA attempts to migrate the page before pinning, when
 * FOLL_LONGTERM is specified.
 *
 * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount,
 * but an additional pin counting system) will be invoked. This is intended for
 * anything that gets a page reference and then touches page data (for example,
 * Direct IO). This lets the filesystem know that some non-file-system entity is
 * potentially changing the pages' data. In contrast to FOLL_GET (whose pages
 * are released via put_page()), FOLL_PIN pages must be released, ultimately, by
 * a call to unpin_user_page().
 *
 * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different
 * and separate refcounting mechanisms, however, and that means that each has
 * its own acquire and release mechanisms:
 *
 *     FOLL_GET: get_user_pages*() to acquire, and put_page() to release.
 *
 *     FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release.
 *
 * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call.
 * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based
 * calls applied to them, and that's perfectly OK. This is a constraint on the
 * callers, not on the pages.)
 *
 * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never
 * directly by the caller. That's in order to help avoid mismatches when
 * releasing pages: get_user_pages*() pages must be released via put_page(),
 * while pin_user_pages*() pages must be released via unpin_user_page().
 *
 * Please see Documentation/core-api/pin_user_pages.rst for more information.
 */

enum {
        /* check pte is writable */
        FOLL_WRITE = 1 << 0,
        /* do get_page on page */
        FOLL_GET = 1 << 1,
        /* give error on hole if it would be zero */
        FOLL_DUMP = 1 << 2,
        /* get_user_pages read/write w/o permission */
        FOLL_FORCE = 1 << 3,
        /*
         * if a disk transfer is needed, start the IO and return without waiting
         * upon it
         */
        FOLL_NOWAIT = 1 << 4,
        /* do not fault in pages */
        FOLL_NOFAULT = 1 << 5,
        /* check page is hwpoisoned */
        FOLL_HWPOISON = 1 << 6,
        /* don't do file mappings */
        FOLL_ANON = 1 << 7,
        /*
         * FOLL_LONGTERM indicates that the page will be held for an indefinite
         * time period _often_ under userspace control.  This is in contrast to
         * iov_iter_get_pages(), whose usages are transient.
         */
        FOLL_LONGTERM = 1 << 8,
        /* split huge pmd before returning */
        FOLL_SPLIT_PMD = 1 << 9,
        /* allow returning PCI P2PDMA pages */
        FOLL_PCI_P2PDMA = 1 << 10,
        /* allow interrupts from generic signals */
        FOLL_INTERRUPTIBLE = 1 << 11,
        /*
         * Always honor (trigger) NUMA hinting faults.
         *
         * FOLL_WRITE implicitly honors NUMA hinting faults because a
         * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
         * apply). get_user_pages_fast_only() always implicitly honors NUMA
         * hinting faults.
         */
        FOLL_HONOR_NUMA_FAULT = 1 << 12,

        /* See also internal only FOLL flags in mm/internal.h */
};

#endif /* _LINUX_MM_TYPES_H */




















































































































































































































































































































































































































































































































































































































































































































































































































































  233 































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Dynamic DMA mapping support.
 *
 * This implementation is a fallback for platforms that do not support
 * I/O TLBs (aka DMA address translation hardware).
 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
 * Copyright (C) 2000, 2003 Hewlett-Packard Co
 *        David Mosberger-Tang <davidm@hpl.hp.com>
 *
 * 03/05/07 davidm        Switch from PCI-DMA to generic device DMA API.
 * 00/12/13 davidm        Rename to swiotlb.c and add mark_clean() to avoid
 *                        unnecessary i-cache flushing.
 * 04/07/.. ak                Better overflow handling. Assorted fixes.
 * 05/09/10 linville        Add support for syncing ranges, support syncing for
 *                        DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
 * 08/12/11 beckyb        Add highmem support
 */

#define pr_fmt(fmt) "software IO TLB: " fmt

#include <linux/cache.h>
#include <linux/cc_platform.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/iommu-helper.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/rculist.h>
#include <linux/scatterlist.h>
#include <linux/set_memory.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/swiotlb.h>
#include <linux/types.h>
#ifdef CONFIG_DMA_RESTRICTED_POOL
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
#include <linux/slab.h>
#endif

#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>

#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))

/*
 * Minimum IO TLB size to bother booting with.  Systems with mainly
 * 64bit capable cards will only lightly use the swiotlb.  If we can't
 * allocate a contiguous 1MB, we're probably in trouble anyway.
 */
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)

#define INVALID_PHYS_ADDR (~(phys_addr_t)0)

/**
 * struct io_tlb_slot - IO TLB slot descriptor
 * @orig_addr:        The original address corresponding to a mapped entry.
 * @alloc_size:        Size of the allocated buffer.
 * @list:        The free list describing the number of free entries available
 *                from each index.
 * @pad_slots:        Number of preceding padding slots. Valid only in the first
 *                allocated non-padding slot.
 */
struct io_tlb_slot {
        phys_addr_t orig_addr;
        size_t alloc_size;
        unsigned short list;
        unsigned short pad_slots;
};

static bool swiotlb_force_bounce;
static bool swiotlb_force_disable;

#ifdef CONFIG_SWIOTLB_DYNAMIC

static void swiotlb_dyn_alloc(struct work_struct *work);

static struct io_tlb_mem io_tlb_default_mem = {
        .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock),
        .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools),
        .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc,
                                        swiotlb_dyn_alloc),
};

#else  /* !CONFIG_SWIOTLB_DYNAMIC */

static struct io_tlb_mem io_tlb_default_mem;

#endif        /* CONFIG_SWIOTLB_DYNAMIC */

static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;

/**
 * struct io_tlb_area - IO TLB memory area descriptor
 *
 * This is a single area with a single lock.
 *
 * @used:        The number of used IO TLB block.
 * @index:        The slot index to start searching in this area for next round.
 * @lock:        The lock to protect the above data structures in the map and
 *                unmap calls.
 */
struct io_tlb_area {
        unsigned long used;
        unsigned int index;
        spinlock_t lock;
};

/*
 * Round up number of slabs to the next power of 2. The last area is going
 * be smaller than the rest if default_nslabs is not power of two.
 * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE,
 * otherwise a segment may span two or more areas. It conflicts with free
 * contiguous slots tracking: free slots are treated contiguous no matter
 * whether they cross an area boundary.
 *
 * Return true if default_nslabs is rounded up.
 */
static bool round_up_default_nslabs(void)
{
        if (!default_nareas)
                return false;

        if (default_nslabs < IO_TLB_SEGSIZE * default_nareas)
                default_nslabs = IO_TLB_SEGSIZE * default_nareas;
        else if (is_power_of_2(default_nslabs))
                return false;
        default_nslabs = roundup_pow_of_two(default_nslabs);
        return true;
}

/**
 * swiotlb_adjust_nareas() - adjust the number of areas and slots
 * @nareas:        Desired number of areas. Zero is treated as 1.
 *
 * Adjust the default number of areas in a memory pool.
 * The default size of the memory pool may also change to meet minimum area
 * size requirements.
 */
static void swiotlb_adjust_nareas(unsigned int nareas)
{
        if (!nareas)
                nareas = 1;
        else if (!is_power_of_2(nareas))
                nareas = roundup_pow_of_two(nareas);

        default_nareas = nareas;

        pr_info("area num %d.\n", nareas);
        if (round_up_default_nslabs())
                pr_info("SWIOTLB bounce buffer size roundup to %luMB",
                        (default_nslabs << IO_TLB_SHIFT) >> 20);
}

/**
 * limit_nareas() - get the maximum number of areas for a given memory pool size
 * @nareas:        Desired number of areas.
 * @nslots:        Total number of slots in the memory pool.
 *
 * Limit the number of areas to the maximum possible number of areas in
 * a memory pool of the given size.
 *
 * Return: Maximum possible number of areas.
 */
static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
{
        if (nslots < nareas * IO_TLB_SEGSIZE)
                return nslots / IO_TLB_SEGSIZE;
        return nareas;
}

static int __init
setup_io_tlb_npages(char *str)
{
        if (isdigit(*str)) {
                /* avoid tail segment of size < IO_TLB_SEGSIZE */
                default_nslabs =
                        ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
        }
        if (*str == ',')
                ++str;
        if (isdigit(*str))
                swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
        if (*str == ',')
                ++str;
        if (!strcmp(str, "force"))
                swiotlb_force_bounce = true;
        else if (!strcmp(str, "noforce"))
                swiotlb_force_disable = true;

        return 0;
}
early_param("swiotlb", setup_io_tlb_npages);

unsigned long swiotlb_size_or_default(void)
{
        return default_nslabs << IO_TLB_SHIFT;
}

void __init swiotlb_adjust_size(unsigned long size)
{
        /*
         * If swiotlb parameter has not been specified, give a chance to
         * architectures such as those supporting memory encryption to
         * adjust/expand SWIOTLB size for their use.
         */
        if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
                return;

        size = ALIGN(size, IO_TLB_SIZE);
        default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
        if (round_up_default_nslabs())
                size = default_nslabs << IO_TLB_SHIFT;
        pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
}

void swiotlb_print_info(void)
{
        struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;

        if (!mem->nslabs) {
                pr_warn("No low mem\n");
                return;
        }

        pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
               (mem->nslabs << IO_TLB_SHIFT) >> 20);
}

static inline unsigned long io_tlb_offset(unsigned long val)
{
        return val & (IO_TLB_SEGSIZE - 1);
}

static inline unsigned long nr_slots(u64 val)
{
        return DIV_ROUND_UP(val, IO_TLB_SIZE);
}

/*
 * Early SWIOTLB allocation may be too early to allow an architecture to
 * perform the desired operations.  This function allows the architecture to
 * call SWIOTLB when the operations are possible.  It needs to be called
 * before the SWIOTLB memory is used.
 */
void __init swiotlb_update_mem_attributes(void)
{
        struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
        unsigned long bytes;

        if (!mem->nslabs || mem->late_alloc)
                return;
        bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
        set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
}

static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
                unsigned long nslabs, bool late_alloc, unsigned int nareas)
{
        void *vaddr = phys_to_virt(start);
        unsigned long bytes = nslabs << IO_TLB_SHIFT, i;

        mem->nslabs = nslabs;
        mem->start = start;
        mem->end = mem->start + bytes;
        mem->late_alloc = late_alloc;
        mem->nareas = nareas;
        mem->area_nslabs = nslabs / mem->nareas;

        for (i = 0; i < mem->nareas; i++) {
                spin_lock_init(&mem->areas[i].lock);
                mem->areas[i].index = 0;
                mem->areas[i].used = 0;
        }

        for (i = 0; i < mem->nslabs; i++) {
                mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i),
                                         mem->nslabs - i);
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
                mem->slots[i].pad_slots = 0;
        }

        memset(vaddr, 0, bytes);
        mem->vaddr = vaddr;
        return;
}

/**
 * add_mem_pool() - add a memory pool to the allocator
 * @mem:        Software IO TLB allocator.
 * @pool:        Memory pool to be added.
 */
static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool)
{
#ifdef CONFIG_SWIOTLB_DYNAMIC
        spin_lock(&mem->lock);
        list_add_rcu(&pool->node, &mem->pools);
        mem->nslabs += pool->nslabs;
        spin_unlock(&mem->lock);
#else
        mem->nslabs = pool->nslabs;
#endif
}

static void __init *swiotlb_memblock_alloc(unsigned long nslabs,
                unsigned int flags,
                int (*remap)(void *tlb, unsigned long nslabs))
{
        size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
        void *tlb;

        /*
         * By default allocate the bounce buffer memory from low memory, but
         * allow to pick a location everywhere for hypervisors with guest
         * memory encryption.
         */
        if (flags & SWIOTLB_ANY)
                tlb = memblock_alloc(bytes, PAGE_SIZE);
        else
                tlb = memblock_alloc_low(bytes, PAGE_SIZE);

        if (!tlb) {
                pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
                        __func__, bytes);
                return NULL;
        }

        if (remap && remap(tlb, nslabs) < 0) {
                memblock_free(tlb, PAGE_ALIGN(bytes));
                pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
                return NULL;
        }

        return tlb;
}

/*
 * Statically reserve bounce buffer space and initialize bounce buffer data
 * structures for the software IO TLB used to implement the DMA API.
 */
void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
                int (*remap)(void *tlb, unsigned long nslabs))
{
        struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
        unsigned long nslabs;
        unsigned int nareas;
        size_t alloc_size;
        void *tlb;

        if (!addressing_limit && !swiotlb_force_bounce)
                return;
        if (swiotlb_force_disable)
                return;

        io_tlb_default_mem.force_bounce =
                swiotlb_force_bounce || (flags & SWIOTLB_FORCE);

#ifdef CONFIG_SWIOTLB_DYNAMIC
        if (!remap)
                io_tlb_default_mem.can_grow = true;
        if (flags & SWIOTLB_ANY)
                io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
        else
                io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT;
#endif

        if (!default_nareas)
                swiotlb_adjust_nareas(num_possible_cpus());

        nslabs = default_nslabs;
        nareas = limit_nareas(default_nareas, nslabs);
        while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
                if (nslabs <= IO_TLB_MIN_SLABS)
                        return;
                nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
                nareas = limit_nareas(nareas, nslabs);
        }

        if (default_nslabs != nslabs) {
                pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
                        default_nslabs, nslabs);
                default_nslabs = nslabs;
        }

        alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
        mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
        if (!mem->slots) {
                pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n",
                        __func__, alloc_size, PAGE_SIZE);
                return;
        }

        mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
                nareas), SMP_CACHE_BYTES);
        if (!mem->areas) {
                pr_warn("%s: Failed to allocate mem->areas.\n", __func__);
                return;
        }

        swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas);
        add_mem_pool(&io_tlb_default_mem, mem);

        if (flags & SWIOTLB_VERBOSE)
                swiotlb_print_info();
}

void __init swiotlb_init(bool addressing_limit, unsigned int flags)
{
        swiotlb_init_remap(addressing_limit, flags, NULL);
}

/*
 * Systems with larger DMA zones (those that don't support ISA) can
 * initialize the swiotlb later using the slab allocator if needed.
 * This should be just like above, but with some error catching.
 */
int swiotlb_init_late(size_t size, gfp_t gfp_mask,
                int (*remap)(void *tlb, unsigned long nslabs))
{
        struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
        unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
        unsigned int nareas;
        unsigned char *vstart = NULL;
        unsigned int order, area_order;
        bool retried = false;
        int rc = 0;

        if (io_tlb_default_mem.nslabs)
                return 0;

        if (swiotlb_force_disable)
                return 0;

        io_tlb_default_mem.force_bounce = swiotlb_force_bounce;

#ifdef CONFIG_SWIOTLB_DYNAMIC
        if (!remap)
                io_tlb_default_mem.can_grow = true;
        if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA))
                io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits);
        else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32))
                io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32);
        else
                io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
#endif

        if (!default_nareas)
                swiotlb_adjust_nareas(num_possible_cpus());

retry:
        order = get_order(nslabs << IO_TLB_SHIFT);
        nslabs = SLABS_PER_PAGE << order;

        while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
                vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
                                                  order);
                if (vstart)
                        break;
                order--;
                nslabs = SLABS_PER_PAGE << order;
                retried = true;
        }

        if (!vstart)
                return -ENOMEM;

        if (remap)
                rc = remap(vstart, nslabs);
        if (rc) {
                free_pages((unsigned long)vstart, order);

                nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
                if (nslabs < IO_TLB_MIN_SLABS)
                        return rc;
                retried = true;
                goto retry;
        }

        if (retried) {
                pr_warn("only able to allocate %ld MB\n",
                        (PAGE_SIZE << order) >> 20);
        }

        nareas = limit_nareas(default_nareas, nslabs);
        area_order = get_order(array_size(sizeof(*mem->areas), nareas));
        mem->areas = (struct io_tlb_area *)
                __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
        if (!mem->areas)
                goto error_area;

        mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                get_order(array_size(sizeof(*mem->slots), nslabs)));
        if (!mem->slots)
                goto error_slots;

        set_memory_decrypted((unsigned long)vstart,
                             (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
        swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
                                 nareas);
        add_mem_pool(&io_tlb_default_mem, mem);

        swiotlb_print_info();
        return 0;

error_slots:
        free_pages((unsigned long)mem->areas, area_order);
error_area:
        free_pages((unsigned long)vstart, order);
        return -ENOMEM;
}

void __init swiotlb_exit(void)
{
        struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
        unsigned long tbl_vaddr;
        size_t tbl_size, slots_size;
        unsigned int area_order;

        if (swiotlb_force_bounce)
                return;

        if (!mem->nslabs)
                return;

        pr_info("tearing down default memory pool\n");
        tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
        tbl_size = PAGE_ALIGN(mem->end - mem->start);
        slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));

        set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
        if (mem->late_alloc) {
                area_order = get_order(array_size(sizeof(*mem->areas),
                        mem->nareas));
                free_pages((unsigned long)mem->areas, area_order);
                free_pages(tbl_vaddr, get_order(tbl_size));
                free_pages((unsigned long)mem->slots, get_order(slots_size));
        } else {
                memblock_free_late(__pa(mem->areas),
                        array_size(sizeof(*mem->areas), mem->nareas));
                memblock_free_late(mem->start, tbl_size);
                memblock_free_late(__pa(mem->slots), slots_size);
        }

        memset(mem, 0, sizeof(*mem));
}

#ifdef CONFIG_SWIOTLB_DYNAMIC

/**
 * alloc_dma_pages() - allocate pages to be used for DMA
 * @gfp:        GFP flags for the allocation.
 * @bytes:        Size of the buffer.
 * @phys_limit:        Maximum allowed physical address of the buffer.
 *
 * Allocate pages from the buddy allocator. If successful, make the allocated
 * pages decrypted that they can be used for DMA.
 *
 * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
 * if the allocated physical address was above @phys_limit.
 */
static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
{
        unsigned int order = get_order(bytes);
        struct page *page;
        phys_addr_t paddr;
        void *vaddr;

        page = alloc_pages(gfp, order);
        if (!page)
                return NULL;

        paddr = page_to_phys(page);
        if (paddr + bytes - 1 > phys_limit) {
                __free_pages(page, order);
                return ERR_PTR(-EAGAIN);
        }

        vaddr = phys_to_virt(paddr);
        if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
                goto error;
        return page;

error:
        /* Intentional leak if pages cannot be encrypted again. */
        if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
                __free_pages(page, order);
        return NULL;
}

/**
 * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer
 * @dev:        Device for which a memory pool is allocated.
 * @bytes:        Size of the buffer.
 * @phys_limit:        Maximum allowed physical address of the buffer.
 * @gfp:        GFP flags for the allocation.
 *
 * Return: Allocated pages, or %NULL on allocation failure.
 */
static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
                u64 phys_limit, gfp_t gfp)
{
        struct page *page;

        /*
         * Allocate from the atomic pools if memory is encrypted and
         * the allocation is atomic, because decrypting may block.
         */
        if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
                void *vaddr;

                if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
                        return NULL;

                return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
                                           dma_coherent_ok);
        }

        gfp &= ~GFP_ZONEMASK;
        if (phys_limit <= DMA_BIT_MASK(zone_dma_bits))
                gfp |= __GFP_DMA;
        else if (phys_limit <= DMA_BIT_MASK(32))
                gfp |= __GFP_DMA32;

        while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
                if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
                    phys_limit < DMA_BIT_MASK(64) &&
                    !(gfp & (__GFP_DMA32 | __GFP_DMA)))
                        gfp |= __GFP_DMA32;
                else if (IS_ENABLED(CONFIG_ZONE_DMA) &&
                         !(gfp & __GFP_DMA))
                        gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA;
                else
                        return NULL;
        }

        return page;
}

/**
 * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
 * @vaddr:        Virtual address of the buffer.
 * @bytes:        Size of the buffer.
 */
static void swiotlb_free_tlb(void *vaddr, size_t bytes)
{
        if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
            dma_free_from_pool(NULL, vaddr, bytes))
                return;

        /* Intentional leak if pages cannot be encrypted again. */
        if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
                __free_pages(virt_to_page(vaddr), get_order(bytes));
}

/**
 * swiotlb_alloc_pool() - allocate a new IO TLB memory pool
 * @dev:        Device for which a memory pool is allocated.
 * @minslabs:        Minimum number of slabs.
 * @nslabs:        Desired (maximum) number of slabs.
 * @nareas:        Number of areas.
 * @phys_limit:        Maximum DMA buffer physical address.
 * @gfp:        GFP flags for the allocations.
 *
 * Allocate and initialize a new IO TLB memory pool. The actual number of
 * slabs may be reduced if allocation of @nslabs fails. If even
 * @minslabs cannot be allocated, this function fails.
 *
 * Return: New memory pool, or %NULL on allocation failure.
 */
static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
                unsigned long minslabs, unsigned long nslabs,
                unsigned int nareas, u64 phys_limit, gfp_t gfp)
{
        struct io_tlb_pool *pool;
        unsigned int slot_order;
        struct page *tlb;
        size_t pool_size;
        size_t tlb_size;

        if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) {
                nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER;
                nareas = limit_nareas(nareas, nslabs);
        }

        pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas);
        pool = kzalloc(pool_size, gfp);
        if (!pool)
                goto error;
        pool->areas = (void *)pool + sizeof(*pool);

        tlb_size = nslabs << IO_TLB_SHIFT;
        while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
                if (nslabs <= minslabs)
                        goto error_tlb;
                nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
                nareas = limit_nareas(nareas, nslabs);
                tlb_size = nslabs << IO_TLB_SHIFT;
        }

        slot_order = get_order(array_size(sizeof(*pool->slots), nslabs));
        pool->slots = (struct io_tlb_slot *)
                __get_free_pages(gfp, slot_order);
        if (!pool->slots)
                goto error_slots;

        swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas);
        return pool;

error_slots:
        swiotlb_free_tlb(page_address(tlb), tlb_size);
error_tlb:
        kfree(pool);
error:
        return NULL;
}

/**
 * swiotlb_dyn_alloc() - dynamic memory pool allocation worker
 * @work:        Pointer to dyn_alloc in struct io_tlb_mem.
 */
static void swiotlb_dyn_alloc(struct work_struct *work)
{
        struct io_tlb_mem *mem =
                container_of(work, struct io_tlb_mem, dyn_alloc);
        struct io_tlb_pool *pool;

        pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
                                  default_nareas, mem->phys_limit, GFP_KERNEL);
        if (!pool) {
                pr_warn_ratelimited("Failed to allocate new pool");
                return;
        }

        add_mem_pool(mem, pool);
}

/**
 * swiotlb_dyn_free() - RCU callback to free a memory pool
 * @rcu:        RCU head in the corresponding struct io_tlb_pool.
 */
static void swiotlb_dyn_free(struct rcu_head *rcu)
{
        struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu);
        size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs);
        size_t tlb_size = pool->end - pool->start;

        free_pages((unsigned long)pool->slots, get_order(slots_size));
        swiotlb_free_tlb(pool->vaddr, tlb_size);
        kfree(pool);
}

/**
 * swiotlb_find_pool() - find the IO TLB pool for a physical address
 * @dev:        Device which has mapped the DMA buffer.
 * @paddr:      Physical address within the DMA buffer.
 *
 * Find the IO TLB memory pool descriptor which contains the given physical
 * address, if any.
 *
 * Return: Memory pool which contains @paddr, or %NULL if none.
 */
struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        struct io_tlb_pool *pool;

        rcu_read_lock();
        list_for_each_entry_rcu(pool, &mem->pools, node) {
                if (paddr >= pool->start && paddr < pool->end)
                        goto out;
        }

        list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) {
                if (paddr >= pool->start && paddr < pool->end)
                        goto out;
        }
        pool = NULL;
out:
        rcu_read_unlock();
        return pool;
}

/**
 * swiotlb_del_pool() - remove an IO TLB pool from a device
 * @dev:        Owning device.
 * @pool:        Memory pool to be removed.
 */
static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool)
{
        unsigned long flags;

        spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
        list_del_rcu(&pool->node);
        spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);

        call_rcu(&pool->rcu, swiotlb_dyn_free);
}

#endif        /* CONFIG_SWIOTLB_DYNAMIC */

/**
 * swiotlb_dev_init() - initialize swiotlb fields in &struct device
 * @dev:        Device to be initialized.
 */
void swiotlb_dev_init(struct device *dev)
{
        dev->dma_io_tlb_mem = &io_tlb_default_mem;
#ifdef CONFIG_SWIOTLB_DYNAMIC
        INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
        spin_lock_init(&dev->dma_io_tlb_lock);
        dev->dma_uses_io_tlb = false;
#endif
}

/**
 * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
 * @dev:         Owning device.
 * @align_mask:  Allocation alignment mask.
 * @addr:        DMA address.
 *
 * Return the minimum offset from the start of an IO TLB allocation which is
 * required for a given buffer address and allocation alignment to keep the
 * device happy.
 *
 * First, the address bits covered by min_align_mask must be identical in the
 * original address and the bounce buffer address. High bits are preserved by
 * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
 * padding bytes before the bounce buffer.
 *
 * Second, @align_mask specifies which bits of the first allocated slot must
 * be zero. This may require allocating additional padding slots, and then the
 * offset (in bytes) from the first such padding slot is returned.
 */
static unsigned int swiotlb_align_offset(struct device *dev,
                                         unsigned int align_mask, u64 addr)
{
        return addr & dma_get_min_align_mask(dev) &
                (align_mask | (IO_TLB_SIZE - 1));
}

/*
 * Bounce: copy the swiotlb buffer from or back to the original dma location
 */
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
                           enum dma_data_direction dir)
{
        struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
        int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
        phys_addr_t orig_addr = mem->slots[index].orig_addr;
        size_t alloc_size = mem->slots[index].alloc_size;
        unsigned long pfn = PFN_DOWN(orig_addr);
        unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
        int tlb_offset;

        if (orig_addr == INVALID_PHYS_ADDR)
                return;

        /*
         * It's valid for tlb_offset to be negative. This can happen when the
         * "offset" returned by swiotlb_align_offset() is non-zero, and the
         * tlb_addr is pointing within the first "offset" bytes of the second
         * or subsequent slots of the allocated swiotlb area. While it's not
         * valid for tlb_addr to be pointing within the first "offset" bytes
         * of the first slot, there's no way to check for such an error since
         * this function can't distinguish the first slot from the second and
         * subsequent slots.
         */
        tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
                     swiotlb_align_offset(dev, 0, orig_addr);

        orig_addr += tlb_offset;
        alloc_size -= tlb_offset;

        if (size > alloc_size) {
                dev_WARN_ONCE(dev, 1,
                        "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
                        alloc_size, size);
                size = alloc_size;
        }

        if (PageHighMem(pfn_to_page(pfn))) {
                unsigned int offset = orig_addr & ~PAGE_MASK;
                struct page *page;
                unsigned int sz = 0;
                unsigned long flags;

                while (size) {
                        sz = min_t(size_t, PAGE_SIZE - offset, size);

                        local_irq_save(flags);
                        page = pfn_to_page(pfn);
                        if (dir == DMA_TO_DEVICE)
                                memcpy_from_page(vaddr, page, offset, sz);
                        else
                                memcpy_to_page(page, offset, vaddr, sz);
                        local_irq_restore(flags);

                        size -= sz;
                        pfn++;
                        vaddr += sz;
                        offset = 0;
                }
        } else if (dir == DMA_TO_DEVICE) {
                memcpy(vaddr, phys_to_virt(orig_addr), size);
        } else {
                memcpy(phys_to_virt(orig_addr), vaddr, size);
        }
}

static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
{
        return start + (idx << IO_TLB_SHIFT);
}

/*
 * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
 */
static inline unsigned long get_max_slots(unsigned long boundary_mask)
{
        return (boundary_mask >> IO_TLB_SHIFT) + 1;
}

static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index)
{
        if (index >= mem->area_nslabs)
                return 0;
        return index;
}

/*
 * Track the total used slots with a global atomic value in order to have
 * correct information to determine the high water mark. The mem_used()
 * function gives imprecise results because there's no locking across
 * multiple areas.
 */
#ifdef CONFIG_DEBUG_FS
static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
{
        unsigned long old_hiwater, new_used;

        new_used = atomic_long_add_return(nslots, &mem->total_used);
        old_hiwater = atomic_long_read(&mem->used_hiwater);
        do {
                if (new_used <= old_hiwater)
                        break;
        } while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
                                          &old_hiwater, new_used));
}

static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
{
        atomic_long_sub(nslots, &mem->total_used);
}

#else /* !CONFIG_DEBUG_FS */
static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
{
}
static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
{
}
#endif /* CONFIG_DEBUG_FS */

#ifdef CONFIG_SWIOTLB_DYNAMIC
#ifdef CONFIG_DEBUG_FS
static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
{
        atomic_long_add(nslots, &mem->transient_nslabs);
}

static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
{
        atomic_long_sub(nslots, &mem->transient_nslabs);
}

#else /* !CONFIG_DEBUG_FS */
static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
{
}
static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
{
}
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SWIOTLB_DYNAMIC */

/**
 * swiotlb_search_pool_area() - search one memory area in one pool
 * @dev:        Device which maps the buffer.
 * @pool:        Memory pool to be searched.
 * @area_index:        Index of the IO TLB memory area to be searched.
 * @orig_addr:        Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
 *                including initial alignment padding.
 * @alloc_align_mask:        Required alignment of the allocated buffer.
 *
 * Find a suitable sequence of IO TLB entries for the request and allocate
 * a buffer from the given IO TLB memory area.
 * This function takes care of locking.
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
                int area_index, phys_addr_t orig_addr, size_t alloc_size,
                unsigned int alloc_align_mask)
{
        struct io_tlb_area *area = pool->areas + area_index;
        unsigned long boundary_mask = dma_get_seg_boundary(dev);
        dma_addr_t tbl_dma_addr =
                phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
        unsigned long max_slots = get_max_slots(boundary_mask);
        unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
        unsigned int nslots = nr_slots(alloc_size), stride;
        unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
        unsigned int index, slots_checked, count = 0, i;
        unsigned long flags;
        unsigned int slot_base;
        unsigned int slot_index;

        BUG_ON(!nslots);
        BUG_ON(area_index >= pool->nareas);

        /*
         * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be
         * page-aligned in the absence of any other alignment requirements.
         * 'alloc_align_mask' was later introduced to specify the alignment
         * explicitly, however this is passed as zero for streaming mappings
         * and so we preserve the old behaviour there in case any drivers are
         * relying on it.
         */
        if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE)
                alloc_align_mask = PAGE_SIZE - 1;

        /*
         * Ensure that the allocation is at least slot-aligned and update
         * 'iotlb_align_mask' to ignore bits that will be preserved when
         * offsetting into the allocation.
         */
        alloc_align_mask |= (IO_TLB_SIZE - 1);
        iotlb_align_mask &= ~alloc_align_mask;

        /*
         * For mappings with an alignment requirement don't bother looping to
         * unaligned slots once we found an aligned one.
         */
        stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask));

        spin_lock_irqsave(&area->lock, flags);
        if (unlikely(nslots > pool->area_nslabs - area->used))
                goto not_found;

        slot_base = area_index * pool->area_nslabs;
        index = area->index;

        for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
                phys_addr_t tlb_addr;

                slot_index = slot_base + index;
                tlb_addr = slot_addr(tbl_dma_addr, slot_index);

                if ((tlb_addr & alloc_align_mask) ||
                    (orig_addr && (tlb_addr & iotlb_align_mask) !=
                                  (orig_addr & iotlb_align_mask))) {
                        index = wrap_area_index(pool, index + 1);
                        slots_checked++;
                        continue;
                }

                if (!iommu_is_span_boundary(slot_index, nslots,
                                            nr_slots(tbl_dma_addr),
                                            max_slots)) {
                        if (pool->slots[slot_index].list >= nslots)
                                goto found;
                }
                index = wrap_area_index(pool, index + stride);
                slots_checked += stride;
        }

not_found:
        spin_unlock_irqrestore(&area->lock, flags);
        return -1;

found:
        /*
         * If we find a slot that indicates we have 'nslots' number of
         * contiguous buffers, we allocate the buffers from that slot onwards
         * and set the list of free entries to '0' indicating unavailable.
         */
        for (i = slot_index; i < slot_index + nslots; i++) {
                pool->slots[i].list = 0;
                pool->slots[i].alloc_size = alloc_size - (offset +
                                ((i - slot_index) << IO_TLB_SHIFT));
        }
        for (i = slot_index - 1;
             io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
             pool->slots[i].list; i--)
                pool->slots[i].list = ++count;

        /*
         * Update the indices to avoid searching in the next round.
         */
        area->index = wrap_area_index(pool, index + nslots);
        area->used += nslots;
        spin_unlock_irqrestore(&area->lock, flags);

        inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots);
        return slot_index;
}

#ifdef CONFIG_SWIOTLB_DYNAMIC

/**
 * swiotlb_search_area() - search one memory area in all pools
 * @dev:        Device which maps the buffer.
 * @start_cpu:        Start CPU number.
 * @cpu_offset:        Offset from @start_cpu.
 * @orig_addr:        Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
 *                including initial alignment padding.
 * @alloc_align_mask:        Required alignment of the allocated buffer.
 * @retpool:        Used memory pool, updated on return.
 *
 * Search one memory area in all pools for a sequence of slots that match the
 * allocation constraints.
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_search_area(struct device *dev, int start_cpu,
                int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
                unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        struct io_tlb_pool *pool;
        int area_index;
        int index = -1;

        rcu_read_lock();
        list_for_each_entry_rcu(pool, &mem->pools, node) {
                if (cpu_offset >= pool->nareas)
                        continue;
                area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
                index = swiotlb_search_pool_area(dev, pool, area_index,
                                                 orig_addr, alloc_size,
                                                 alloc_align_mask);
                if (index >= 0) {
                        *retpool = pool;
                        break;
                }
        }
        rcu_read_unlock();
        return index;
}

/**
 * swiotlb_find_slots() - search for slots in the whole swiotlb
 * @dev:        Device which maps the buffer.
 * @orig_addr:        Original (non-bounced) IO buffer address.
 * @alloc_size: Total requested size of the bounce buffer,
 *                including initial alignment padding.
 * @alloc_align_mask:        Required alignment of the allocated buffer.
 * @retpool:        Used memory pool, updated on return.
 *
 * Search through the whole software IO TLB to find a sequence of slots that
 * match the allocation constraints.
 *
 * Return: Index of the first allocated slot, or -1 on error.
 */
static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
                size_t alloc_size, unsigned int alloc_align_mask,
                struct io_tlb_pool **retpool)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        struct io_tlb_pool *pool;
        unsigned long nslabs;
        unsigned long flags;
        u64 phys_limit;
        int cpu, i;
        int index;

        if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE)
                return -1;

        cpu = raw_smp_processor_id();
        for (i = 0; i < default_nareas; ++i) {
                index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
                                            alloc_align_mask, &pool);
                if (index >= 0)
                        goto found;
        }

        if (!mem->can_grow)
                return -1;

        schedule_work(&mem->dyn_alloc);

        nslabs = nr_slots(alloc_size);
        phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
        pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
                                  GFP_NOWAIT | __GFP_NOWARN);
        if (!pool)
                return -1;

        index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
                                         alloc_size, alloc_align_mask);
        if (index < 0) {
                swiotlb_dyn_free(&pool->rcu);
                return -1;
        }

        pool->transient = true;
        spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
        list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
        spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
        inc_transient_used(mem, pool->nslabs);

found:
        WRITE_ONCE(dev->dma_uses_io_tlb, true);

        /*
         * The general barrier orders reads and writes against a presumed store
         * of the SWIOTLB buffer address by a device driver (to a driver private
         * data structure). It serves two purposes.
         *
         * First, the store to dev->dma_uses_io_tlb must be ordered before the
         * presumed store. This guarantees that the returned buffer address
         * cannot be passed to another CPU before updating dev->dma_uses_io_tlb.
         *
         * Second, the load from mem->pools must be ordered before the same
         * presumed store. This guarantees that the returned buffer address
         * cannot be observed by another CPU before an update of the RCU list
         * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
         * atomicity).
         *
         * See also the comment in is_swiotlb_buffer().
         */
        smp_mb();

        *retpool = pool;
        return index;
}

#else  /* !CONFIG_SWIOTLB_DYNAMIC */

static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
                size_t alloc_size, unsigned int alloc_align_mask,
                struct io_tlb_pool **retpool)
{
        struct io_tlb_pool *pool;
        int start, i;
        int index;

        *retpool = pool = &dev->dma_io_tlb_mem->defpool;
        i = start = raw_smp_processor_id() & (pool->nareas - 1);
        do {
                index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
                                                 alloc_size, alloc_align_mask);
                if (index >= 0)
                        return index;
                if (++i >= pool->nareas)
                        i = 0;
        } while (i != start);
        return -1;
}

#endif /* CONFIG_SWIOTLB_DYNAMIC */

#ifdef CONFIG_DEBUG_FS

/**
 * mem_used() - get number of used slots in an allocator
 * @mem:        Software IO TLB allocator.
 *
 * The result is accurate in this version of the function, because an atomic
 * counter is available if CONFIG_DEBUG_FS is set.
 *
 * Return: Number of used slots.
 */
static unsigned long mem_used(struct io_tlb_mem *mem)
{
        return atomic_long_read(&mem->total_used);
}

#else /* !CONFIG_DEBUG_FS */

/**
 * mem_pool_used() - get number of used slots in a memory pool
 * @pool:        Software IO TLB memory pool.
 *
 * The result is not accurate, see mem_used().
 *
 * Return: Approximate number of used slots.
 */
static unsigned long mem_pool_used(struct io_tlb_pool *pool)
{
        int i;
        unsigned long used = 0;

        for (i = 0; i < pool->nareas; i++)
                used += pool->areas[i].used;
        return used;
}

/**
 * mem_used() - get number of used slots in an allocator
 * @mem:        Software IO TLB allocator.
 *
 * The result is not accurate, because there is no locking of individual
 * areas.
 *
 * Return: Approximate number of used slots.
 */
static unsigned long mem_used(struct io_tlb_mem *mem)
{
#ifdef CONFIG_SWIOTLB_DYNAMIC
        struct io_tlb_pool *pool;
        unsigned long used = 0;

        rcu_read_lock();
        list_for_each_entry_rcu(pool, &mem->pools, node)
                used += mem_pool_used(pool);
        rcu_read_unlock();

        return used;
#else
        return mem_pool_used(&mem->defpool);
#endif
}

#endif /* CONFIG_DEBUG_FS */

phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
                size_t mapping_size, size_t alloc_size,
                unsigned int alloc_align_mask, enum dma_data_direction dir,
                unsigned long attrs)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        unsigned int offset;
        struct io_tlb_pool *pool;
        unsigned int i;
        int index;
        phys_addr_t tlb_addr;
        unsigned short pad_slots;

        if (!mem || !mem->nslabs) {
                dev_warn_ratelimited(dev,
                        "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }

        if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
                pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");

        if (mapping_size > alloc_size) {
                dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
                              mapping_size, alloc_size);
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }

        offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
        index = swiotlb_find_slots(dev, orig_addr,
                                   alloc_size + offset, alloc_align_mask, &pool);
        if (index == -1) {
                if (!(attrs & DMA_ATTR_NO_WARN))
                        dev_warn_ratelimited(dev,
        "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
                                 alloc_size, mem->nslabs, mem_used(mem));
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }

        /*
         * Save away the mapping from the original address to the DMA address.
         * This is needed when we sync the memory.  Then we sync the buffer if
         * needed.
         */
        pad_slots = offset >> IO_TLB_SHIFT;
        offset &= (IO_TLB_SIZE - 1);
        index += pad_slots;
        pool->slots[index].pad_slots = pad_slots;
        for (i = 0; i < nr_slots(alloc_size + offset); i++)
                pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
        tlb_addr = slot_addr(pool->start, index) + offset;
        /*
         * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy
         * the original buffer to the TLB buffer before initiating DMA in order
         * to preserve the original's data if the device does a partial write,
         * i.e. if the device doesn't overwrite the entire buffer.  Preserving
         * the original data, even if it's garbage, is necessary to match
         * hardware behavior.  Use of swiotlb is supposed to be transparent,
         * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
         */
        swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
        return tlb_addr;
}

static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
{
        struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
        unsigned long flags;
        unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
        int index, nslots, aindex;
        struct io_tlb_area *area;
        int count, i;

        index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
        index -= mem->slots[index].pad_slots;
        nslots = nr_slots(mem->slots[index].alloc_size + offset);
        aindex = index / mem->area_nslabs;
        area = &mem->areas[aindex];

        /*
         * Return the buffer to the free list by setting the corresponding
         * entries to indicate the number of contiguous entries available.
         * While returning the entries to the free list, we merge the entries
         * with slots below and above the pool being returned.
         */
        BUG_ON(aindex >= mem->nareas);

        spin_lock_irqsave(&area->lock, flags);
        if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
                count = mem->slots[index + nslots].list;
        else
                count = 0;

        /*
         * Step 1: return the slots to the free list, merging the slots with
         * superceeding slots
         */
        for (i = index + nslots - 1; i >= index; i--) {
                mem->slots[i].list = ++count;
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
                mem->slots[i].pad_slots = 0;
        }

        /*
         * Step 2: merge the returned slots with the preceding slots, if
         * available (non zero)
         */
        for (i = index - 1;
             io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
             i--)
                mem->slots[i].list = ++count;
        area->used -= nslots;
        spin_unlock_irqrestore(&area->lock, flags);

        dec_used(dev->dma_io_tlb_mem, nslots);
}

#ifdef CONFIG_SWIOTLB_DYNAMIC

/**
 * swiotlb_del_transient() - delete a transient memory pool
 * @dev:        Device which mapped the buffer.
 * @tlb_addr:        Physical address within a bounce buffer.
 *
 * Check whether the address belongs to a transient SWIOTLB memory pool.
 * If yes, then delete the pool.
 *
 * Return: %true if @tlb_addr belonged to a transient pool that was released.
 */
static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
{
        struct io_tlb_pool *pool;

        pool = swiotlb_find_pool(dev, tlb_addr);
        if (!pool->transient)
                return false;

        dec_used(dev->dma_io_tlb_mem, pool->nslabs);
        swiotlb_del_pool(dev, pool);
        dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs);
        return true;
}

#else  /* !CONFIG_SWIOTLB_DYNAMIC */

static inline bool swiotlb_del_transient(struct device *dev,
                                         phys_addr_t tlb_addr)
{
        return false;
}

#endif        /* CONFIG_SWIOTLB_DYNAMIC */

/*
 * tlb_addr is the physical address of the bounce buffer to unmap.
 */
void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
                              size_t mapping_size, enum dma_data_direction dir,
                              unsigned long attrs)
{
        /*
         * First, sync the memory before unmapping the entry
         */
        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
            (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
                swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);

        if (swiotlb_del_transient(dev, tlb_addr))
                return;
        swiotlb_release_slots(dev, tlb_addr);
}

void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
                size_t size, enum dma_data_direction dir)
{
        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
                swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
        else
                BUG_ON(dir != DMA_FROM_DEVICE);
}

void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
                size_t size, enum dma_data_direction dir)
{
        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
                swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
        else
                BUG_ON(dir != DMA_TO_DEVICE);
}

/*
 * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing
 * to the device copy the data into it as well.
 */
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
                enum dma_data_direction dir, unsigned long attrs)
{
        phys_addr_t swiotlb_addr;
        dma_addr_t dma_addr;

        trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);

        swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
                        attrs);
        if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
                return DMA_MAPPING_ERROR;

        /* Ensure that the address returned is DMA'ble */
        dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
        if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
                swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
                        attrs | DMA_ATTR_SKIP_CPU_SYNC);
                dev_WARN_ONCE(dev, 1,
                        "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
                        &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
                return DMA_MAPPING_ERROR;
        }

        if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
                arch_sync_dma_for_device(swiotlb_addr, size, dir);
        return dma_addr;
}

size_t swiotlb_max_mapping_size(struct device *dev)
{
        int min_align_mask = dma_get_min_align_mask(dev);
        int min_align = 0;

        /*
         * swiotlb_find_slots() skips slots according to
         * min align mask. This affects max mapping size.
         * Take it into acount here.
         */
        if (min_align_mask)
                min_align = roundup(min_align_mask, IO_TLB_SIZE);

        return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
}

/**
 * is_swiotlb_allocated() - check if the default software IO TLB is initialized
 */
bool is_swiotlb_allocated(void)
{
        return io_tlb_default_mem.nslabs;
}

bool is_swiotlb_active(struct device *dev)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;

        return mem && mem->nslabs;
}

/**
 * default_swiotlb_base() - get the base address of the default SWIOTLB
 *
 * Get the lowest physical address used by the default software IO TLB pool.
 */
phys_addr_t default_swiotlb_base(void)
{
#ifdef CONFIG_SWIOTLB_DYNAMIC
        io_tlb_default_mem.can_grow = false;
#endif
        return io_tlb_default_mem.defpool.start;
}

/**
 * default_swiotlb_limit() - get the address limit of the default SWIOTLB
 *
 * Get the highest physical address used by the default software IO TLB pool.
 */
phys_addr_t default_swiotlb_limit(void)
{
#ifdef CONFIG_SWIOTLB_DYNAMIC
        return io_tlb_default_mem.phys_limit;
#else
        return io_tlb_default_mem.defpool.end - 1;
#endif
}

#ifdef CONFIG_DEBUG_FS
#ifdef CONFIG_SWIOTLB_DYNAMIC
static unsigned long mem_transient_used(struct io_tlb_mem *mem)
{
        return atomic_long_read(&mem->transient_nslabs);
}

static int io_tlb_transient_used_get(void *data, u64 *val)
{
        struct io_tlb_mem *mem = data;

        *val = mem_transient_used(mem);
        return 0;
}

DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get,
                         NULL, "%llu\n");
#endif /* CONFIG_SWIOTLB_DYNAMIC */

static int io_tlb_used_get(void *data, u64 *val)
{
        struct io_tlb_mem *mem = data;

        *val = mem_used(mem);
        return 0;
}

static int io_tlb_hiwater_get(void *data, u64 *val)
{
        struct io_tlb_mem *mem = data;

        *val = atomic_long_read(&mem->used_hiwater);
        return 0;
}

static int io_tlb_hiwater_set(void *data, u64 val)
{
        struct io_tlb_mem *mem = data;

        /* Only allow setting to zero */
        if (val != 0)
                return -EINVAL;

        atomic_long_set(&mem->used_hiwater, val);
        return 0;
}

DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
                                io_tlb_hiwater_set, "%llu\n");

static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
                                         const char *dirname)
{
        mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
        if (!mem->nslabs)
                return;

        debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
        debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
                        &fops_io_tlb_used);
        debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
                        &fops_io_tlb_hiwater);
#ifdef CONFIG_SWIOTLB_DYNAMIC
        debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
                            mem, &fops_io_tlb_transient_used);
#endif
}

static int __init swiotlb_create_default_debugfs(void)
{
        swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
        return 0;
}

late_initcall(swiotlb_create_default_debugfs);

#else  /* !CONFIG_DEBUG_FS */

static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
                                                const char *dirname)
{
}

#endif        /* CONFIG_DEBUG_FS */

#ifdef CONFIG_DMA_RESTRICTED_POOL

struct page *swiotlb_alloc(struct device *dev, size_t size)
{
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        struct io_tlb_pool *pool;
        phys_addr_t tlb_addr;
        unsigned int align;
        int index;

        if (!mem)
                return NULL;

        align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
        index = swiotlb_find_slots(dev, 0, size, align, &pool);
        if (index == -1)
                return NULL;

        tlb_addr = slot_addr(pool->start, index);
        if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
                dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
                              &tlb_addr);
                swiotlb_release_slots(dev, tlb_addr);
                return NULL;
        }

        return pfn_to_page(PFN_DOWN(tlb_addr));
}

bool swiotlb_free(struct device *dev, struct page *page, size_t size)
{
        phys_addr_t tlb_addr = page_to_phys(page);

        if (!is_swiotlb_buffer(dev, tlb_addr))
                return false;

        swiotlb_release_slots(dev, tlb_addr);

        return true;
}

static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
                                    struct device *dev)
{
        struct io_tlb_mem *mem = rmem->priv;
        unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;

        /* Set Per-device io tlb area to one */
        unsigned int nareas = 1;

        if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
                dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping.");
                return -EINVAL;
        }

        /*
         * Since multiple devices can share the same pool, the private data,
         * io_tlb_mem struct, will be initialized by the first device attached
         * to it.
         */
        if (!mem) {
                struct io_tlb_pool *pool;

                mem = kzalloc(sizeof(*mem), GFP_KERNEL);
                if (!mem)
                        return -ENOMEM;
                pool = &mem->defpool;

                pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL);
                if (!pool->slots) {
                        kfree(mem);
                        return -ENOMEM;
                }

                pool->areas = kcalloc(nareas, sizeof(*pool->areas),
                                GFP_KERNEL);
                if (!pool->areas) {
                        kfree(pool->slots);
                        kfree(mem);
                        return -ENOMEM;
                }

                set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
                                     rmem->size >> PAGE_SHIFT);
                swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs,
                                         false, nareas);
                mem->force_bounce = true;
                mem->for_alloc = true;
#ifdef CONFIG_SWIOTLB_DYNAMIC
                spin_lock_init(&mem->lock);
                INIT_LIST_HEAD_RCU(&mem->pools);
#endif
                add_mem_pool(mem, pool);

                rmem->priv = mem;

                swiotlb_create_debugfs_files(mem, rmem->name);
        }

        dev->dma_io_tlb_mem = mem;

        return 0;
}

static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
                                        struct device *dev)
{
        dev->dma_io_tlb_mem = &io_tlb_default_mem;
}

static const struct reserved_mem_ops rmem_swiotlb_ops = {
        .device_init = rmem_swiotlb_device_init,
        .device_release = rmem_swiotlb_device_release,
};

static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
{
        unsigned long node = rmem->fdt_node;

        if (of_get_flat_dt_prop(node, "reusable", NULL) ||
            of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
            of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
            of_get_flat_dt_prop(node, "no-map", NULL))
                return -EINVAL;

        rmem->ops = &rmem_swiotlb_ops;
        pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
                &rmem->base, (unsigned long)rmem->size / SZ_1M);
        return 0;
}

RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
#endif /* CONFIG_DMA_RESTRICTED_POOL */
































































































































































    1 



























































    1 













    1 

    1 




















    1 



    1 



    1 


    1 















   61 
   61 
   61 



   53 
   53 



   61 

   61 


   61 





   61 


   61 





   61 
















   52 
   52 













   53 

   53 
   53 
















































    1 




















    1 























    8 













    3 

    1 



















    3 















    1 
    1 





   61 
   61 
   61 











































































































   61 









   60 

   61 
   61 
   61 

   61 


















   61 

   61 
   61 




   61 







   60 


    8 

   53 


   61 


   61 

   61 


   61 






































































































































































































































































































































    1 


    1 






















    1 

    1 
    1 






    1 


















































































































































































































































































































































    8 

    8 










    8 



































































































































































































































































































    8 















   60 






   61 
    8 

   53 


    8 



















   61 


   61 















   61 

   61 

   61 
   61 






   61 













































   20 

   20 




    9 
    7 






    9 




























   27 
    9 



    6 

    9 
    9 


    9 


   17 


   27 
   18 

   17 
   18 



   26 

















   27 



   27 
   27 

   27 










   27 

   27 















   20 



   20 




   20 


   19 

   20 


   20 

   20 


   19 

   20 



   18 







   18 





   18 













   18 
   18 

   18 











    6 



    6 


    5 



    5 



















   12 




   12 
   12 

    4 
    4 


    4 







    4 















    7 






   12 



   11 



    9 


   12 
    9 
    9 

   12 







    4 
    9 



    9 
    9 






















    7 





    7 





















    6 









    6 


    6 





    3 















    6 






















































































































































    1 
    1 






    1 
    1 























   12 
   12 





   12 























































    3 
    3 


















   92 




   92 
   92 



   91 







   92 















   92 















   63 












































    1 


    1 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
// SPDX-License-Identifier: GPL-2.0-only
/*
 * (C) 1997 Linus Torvalds
 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
 */
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
#include <linux/cdev.h>
#include <linux/memblock.h>
#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/posix_acl.h>
#include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include <linux/iversion.h>
#include <linux/rw_hint.h>
#include <trace/events/writeback.h>
#include "internal.h"

/*
 * Inode locking rules:
 *
 * inode->i_lock protects:
 *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
 * Inode LRU list locks protect:
 *   inode->i_sb->s_inode_lru, inode->i_lru
 * inode->i_sb->s_inode_list_lock protects:
 *   inode->i_sb->s_inodes, inode->i_sb_list
 * bdi->wb.list_lock protects:
 *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
 * inode_hash_lock protects:
 *   inode_hashtable, inode->i_hash
 *
 * Lock ordering:
 *
 * inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *     Inode LRU list locks
 *
 * bdi->wb.list_lock
 *   inode->i_lock
 *
 * inode_hash_lock
 *   inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *
 * iunique_lock
 *   inode_hash_lock
 */

static unsigned int i_hash_mask __ro_after_init;
static unsigned int i_hash_shift __ro_after_init;
static struct hlist_head *inode_hashtable __ro_after_init;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);

/*
 * Empty aops. Can be used for the cases where the user does not
 * define any of the address_space operations.
 */
const struct address_space_operations empty_aops = {
};
EXPORT_SYMBOL(empty_aops);

static DEFINE_PER_CPU(unsigned long, nr_inodes);
static DEFINE_PER_CPU(unsigned long, nr_unused);

static struct kmem_cache *inode_cachep __ro_after_init;

static long get_nr_inodes(void)
{
        int i;
        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_inodes, i);
        return sum < 0 ? 0 : sum;
}

static inline long get_nr_inodes_unused(void)
{
        int i;
        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_unused, i);
        return sum < 0 ? 0 : sum;
}

long get_nr_dirty_inodes(void)
{
        /* not actually dirty inodes, but a wild approximation */
        long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
        return nr_dirty > 0 ? nr_dirty : 0;
}

/*
 * Handle nr_inode sysctl
 */
#ifdef CONFIG_SYSCTL
/*
 * Statistics gathering..
 */
static struct inodes_stat_t inodes_stat;

static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
                          size_t *lenp, loff_t *ppos)
{
        inodes_stat.nr_inodes = get_nr_inodes();
        inodes_stat.nr_unused = get_nr_inodes_unused();
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}

static struct ctl_table inodes_sysctls[] = {
        {
                .procname        = "inode-nr",
                .data                = &inodes_stat,
                .maxlen                = 2*sizeof(long),
                .mode                = 0444,
                .proc_handler        = proc_nr_inodes,
        },
        {
                .procname        = "inode-state",
                .data                = &inodes_stat,
                .maxlen                = 7*sizeof(long),
                .mode                = 0444,
                .proc_handler        = proc_nr_inodes,
        },
};

static int __init init_fs_inode_sysctls(void)
{
        register_sysctl_init("fs", inodes_sysctls);
        return 0;
}
early_initcall(init_fs_inode_sysctls);
#endif

static int no_open(struct inode *inode, struct file *file)
{
        return -ENXIO;
}

/**
 * inode_init_always - perform inode structure initialisation
 * @sb: superblock inode belongs to
 * @inode: inode to initialise
 *
 * These are initializations that need to be done on every inode
 * allocation as the fields are not initialised by slab allocation.
 */
int inode_init_always(struct super_block *sb, struct inode *inode)
{
        static const struct inode_operations empty_iops;
        static const struct file_operations no_open_fops = {.open = no_open};
        struct address_space *const mapping = &inode->i_data;

        inode->i_sb = sb;
        inode->i_blkbits = sb->s_blocksize_bits;
        inode->i_flags = 0;
        atomic64_set(&inode->i_sequence, 0);
        atomic_set(&inode->i_count, 1);
        inode->i_op = &empty_iops;
        inode->i_fop = &no_open_fops;
        inode->i_ino = 0;
        inode->__i_nlink = 1;
        inode->i_opflags = 0;
        if (sb->s_xattr)
                inode->i_opflags |= IOP_XATTR;
        i_uid_write(inode, 0);
        i_gid_write(inode, 0);
        atomic_set(&inode->i_writecount, 0);
        inode->i_size = 0;
        inode->i_write_hint = WRITE_LIFE_NOT_SET;
        inode->i_blocks = 0;
        inode->i_bytes = 0;
        inode->i_generation = 0;
        inode->i_pipe = NULL;
        inode->i_cdev = NULL;
        inode->i_link = NULL;
        inode->i_dir_seq = 0;
        inode->i_rdev = 0;
        inode->dirtied_when = 0;

#ifdef CONFIG_CGROUP_WRITEBACK
        inode->i_wb_frn_winner = 0;
        inode->i_wb_frn_avg_time = 0;
        inode->i_wb_frn_history = 0;
#endif

        spin_lock_init(&inode->i_lock);
        lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);

        init_rwsem(&inode->i_rwsem);
        lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);

        atomic_set(&inode->i_dio_count, 0);

        mapping->a_ops = &empty_aops;
        mapping->host = inode;
        mapping->flags = 0;
        mapping->wb_err = 0;
        atomic_set(&mapping->i_mmap_writable, 0);
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
        atomic_set(&mapping->nr_thps, 0);
#endif
        mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
        mapping->i_private_data = NULL;
        mapping->writeback_index = 0;
        init_rwsem(&mapping->invalidate_lock);
        lockdep_set_class_and_name(&mapping->invalidate_lock,
                                   &sb->s_type->invalidate_lock_key,
                                   "mapping.invalidate_lock");
        if (sb->s_iflags & SB_I_STABLE_WRITES)
                mapping_set_stable_writes(mapping);
        inode->i_private = NULL;
        inode->i_mapping = mapping;
        INIT_HLIST_HEAD(&inode->i_dentry);        /* buggered by rcu freeing */
#ifdef CONFIG_FS_POSIX_ACL
        inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
#endif

#ifdef CONFIG_FSNOTIFY
        inode->i_fsnotify_mask = 0;
#endif
        inode->i_flctx = NULL;

        if (unlikely(security_inode_alloc(inode)))
                return -ENOMEM;
        this_cpu_inc(nr_inodes);

        return 0;
}
EXPORT_SYMBOL(inode_init_always);

void free_inode_nonrcu(struct inode *inode)
{
        kmem_cache_free(inode_cachep, inode);
}
EXPORT_SYMBOL(free_inode_nonrcu);

static void i_callback(struct rcu_head *head)
{
        struct inode *inode = container_of(head, struct inode, i_rcu);
        if (inode->free_inode)
                inode->free_inode(inode);
        else
                free_inode_nonrcu(inode);
}

static struct inode *alloc_inode(struct super_block *sb)
{
        const struct super_operations *ops = sb->s_op;
        struct inode *inode;

        if (ops->alloc_inode)
                inode = ops->alloc_inode(sb);
        else
                inode = alloc_inode_sb(sb, inode_cachep, GFP_KERNEL);

        if (!inode)
                return NULL;

        if (unlikely(inode_init_always(sb, inode))) {
                if (ops->destroy_inode) {
                        ops->destroy_inode(inode);
                        if (!ops->free_inode)
                                return NULL;
                }
                inode->free_inode = ops->free_inode;
                i_callback(&inode->i_rcu);
                return NULL;
        }

        return inode;
}

void __destroy_inode(struct inode *inode)
{
        BUG_ON(inode_has_buffers(inode));
        inode_detach_wb(inode);
        security_inode_free(inode);
        fsnotify_inode_delete(inode);
        locks_free_lock_context(inode);
        if (!inode->i_nlink) {
                WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
                atomic_long_dec(&inode->i_sb->s_remove_count);
        }

#ifdef CONFIG_FS_POSIX_ACL
        if (inode->i_acl && !is_uncached_acl(inode->i_acl))
                posix_acl_release(inode->i_acl);
        if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
                posix_acl_release(inode->i_default_acl);
#endif
        this_cpu_dec(nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);

static void destroy_inode(struct inode *inode)
{
        const struct super_operations *ops = inode->i_sb->s_op;

        BUG_ON(!list_empty(&inode->i_lru));
        __destroy_inode(inode);
        if (ops->destroy_inode) {
                ops->destroy_inode(inode);
                if (!ops->free_inode)
                        return;
        }
        inode->free_inode = ops->free_inode;
        call_rcu(&inode->i_rcu, i_callback);
}

/**
 * drop_nlink - directly drop an inode's link count
 * @inode: inode
 *
 * This is a low-level filesystem helper to replace any
 * direct filesystem manipulation of i_nlink.  In cases
 * where we are attempting to track writes to the
 * filesystem, a decrement to zero means an imminent
 * write when the file is truncated and actually unlinked
 * on the filesystem.
 */
void drop_nlink(struct inode *inode)
{
        WARN_ON(inode->i_nlink == 0);
        inode->__i_nlink--;
        if (!inode->i_nlink)
                atomic_long_inc(&inode->i_sb->s_remove_count);
}
EXPORT_SYMBOL(drop_nlink);

/**
 * clear_nlink - directly zero an inode's link count
 * @inode: inode
 *
 * This is a low-level filesystem helper to replace any
 * direct filesystem manipulation of i_nlink.  See
 * drop_nlink() for why we care about i_nlink hitting zero.
 */
void clear_nlink(struct inode *inode)
{
        if (inode->i_nlink) {
                inode->__i_nlink = 0;
                atomic_long_inc(&inode->i_sb->s_remove_count);
        }
}
EXPORT_SYMBOL(clear_nlink);

/**
 * set_nlink - directly set an inode's link count
 * @inode: inode
 * @nlink: new nlink (should be non-zero)
 *
 * This is a low-level filesystem helper to replace any
 * direct filesystem manipulation of i_nlink.
 */
void set_nlink(struct inode *inode, unsigned int nlink)
{
        if (!nlink) {
                clear_nlink(inode);
        } else {
                /* Yes, some filesystems do change nlink from zero to one */
                if (inode->i_nlink == 0)
                        atomic_long_dec(&inode->i_sb->s_remove_count);

                inode->__i_nlink = nlink;
        }
}
EXPORT_SYMBOL(set_nlink);

/**
 * inc_nlink - directly increment an inode's link count
 * @inode: inode
 *
 * This is a low-level filesystem helper to replace any
 * direct filesystem manipulation of i_nlink.  Currently,
 * it is only here for parity with dec_nlink().
 */
void inc_nlink(struct inode *inode)
{
        if (unlikely(inode->i_nlink == 0)) {
                WARN_ON(!(inode->i_state & I_LINKABLE));
                atomic_long_dec(&inode->i_sb->s_remove_count);
        }

        inode->__i_nlink++;
}
EXPORT_SYMBOL(inc_nlink);

static void __address_space_init_once(struct address_space *mapping)
{
        xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
        init_rwsem(&mapping->i_mmap_rwsem);
        INIT_LIST_HEAD(&mapping->i_private_list);
        spin_lock_init(&mapping->i_private_lock);
        mapping->i_mmap = RB_ROOT_CACHED;
}

void address_space_init_once(struct address_space *mapping)
{
        memset(mapping, 0, sizeof(*mapping));
        __address_space_init_once(mapping);
}
EXPORT_SYMBOL(address_space_init_once);

/*
 * These are initializations that only need to be done
 * once, because the fields are idempotent across use
 * of the inode, so let the slab aware of that.
 */
void inode_init_once(struct inode *inode)
{
        memset(inode, 0, sizeof(*inode));
        INIT_HLIST_NODE(&inode->i_hash);
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_LIST_HEAD(&inode->i_io_list);
        INIT_LIST_HEAD(&inode->i_wb_list);
        INIT_LIST_HEAD(&inode->i_lru);
        INIT_LIST_HEAD(&inode->i_sb_list);
        __address_space_init_once(&inode->i_data);
        i_size_ordered_init(inode);
}
EXPORT_SYMBOL(inode_init_once);

static void init_once(void *foo)
{
        struct inode *inode = (struct inode *) foo;

        inode_init_once(inode);
}

/*
 * inode->i_lock must be held
 */
void __iget(struct inode *inode)
{
        atomic_inc(&inode->i_count);
}

/*
 * get additional reference to inode; caller must already hold one.
 */
void ihold(struct inode *inode)
{
        WARN_ON(atomic_inc_return(&inode->i_count) < 2);
}
EXPORT_SYMBOL(ihold);

static void __inode_add_lru(struct inode *inode, bool rotate)
{
        if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
                return;
        if (atomic_read(&inode->i_count))
                return;
        if (!(inode->i_sb->s_flags & SB_ACTIVE))
                return;
        if (!mapping_shrinkable(&inode->i_data))
                return;

        if (list_lru_add_obj(&inode->i_sb->s_inode_lru, &inode->i_lru))
                this_cpu_inc(nr_unused);
        else if (rotate)
                inode->i_state |= I_REFERENCED;
}

/*
 * Add inode to LRU if needed (inode is unused and clean).
 *
 * Needs inode->i_lock held.
 */
void inode_add_lru(struct inode *inode)
{
        __inode_add_lru(inode, false);
}

static void inode_lru_list_del(struct inode *inode)
{
        if (list_lru_del_obj(&inode->i_sb->s_inode_lru, &inode->i_lru))
                this_cpu_dec(nr_unused);
}

/**
 * inode_sb_list_add - add inode to the superblock list of inodes
 * @inode: inode to add
 */
void inode_sb_list_add(struct inode *inode)
{
        spin_lock(&inode->i_sb->s_inode_list_lock);
        list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
        spin_unlock(&inode->i_sb->s_inode_list_lock);
}
EXPORT_SYMBOL_GPL(inode_sb_list_add);

static inline void inode_sb_list_del(struct inode *inode)
{
        if (!list_empty(&inode->i_sb_list)) {
                spin_lock(&inode->i_sb->s_inode_list_lock);
                list_del_init(&inode->i_sb_list);
                spin_unlock(&inode->i_sb->s_inode_list_lock);
        }
}

static unsigned long hash(struct super_block *sb, unsigned long hashval)
{
        unsigned long tmp;

        tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
                        L1_CACHE_BYTES;
        tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
        return tmp & i_hash_mask;
}

/**
 *        __insert_inode_hash - hash an inode
 *        @inode: unhashed inode
 *        @hashval: unsigned long value used to locate this object in the
 *                inode_hashtable.
 *
 *        Add an inode to the inode hash for this superblock.
 */
void __insert_inode_hash(struct inode *inode, unsigned long hashval)
{
        struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);

        spin_lock(&inode_hash_lock);
        spin_lock(&inode->i_lock);
        hlist_add_head_rcu(&inode->i_hash, b);
        spin_unlock(&inode->i_lock);
        spin_unlock(&inode_hash_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);

/**
 *        __remove_inode_hash - remove an inode from the hash
 *        @inode: inode to unhash
 *
 *        Remove an inode from the superblock.
 */
void __remove_inode_hash(struct inode *inode)
{
        spin_lock(&inode_hash_lock);
        spin_lock(&inode->i_lock);
        hlist_del_init_rcu(&inode->i_hash);
        spin_unlock(&inode->i_lock);
        spin_unlock(&inode_hash_lock);
}
EXPORT_SYMBOL(__remove_inode_hash);

void dump_mapping(const struct address_space *mapping)
{
        struct inode *host;
        const struct address_space_operations *a_ops;
        struct hlist_node *dentry_first;
        struct dentry *dentry_ptr;
        struct dentry dentry;
        unsigned long ino;

        /*
         * If mapping is an invalid pointer, we don't want to crash
         * accessing it, so probe everything depending on it carefully.
         */
        if (get_kernel_nofault(host, &mapping->host) ||
            get_kernel_nofault(a_ops, &mapping->a_ops)) {
                pr_warn("invalid mapping:%px\n", mapping);
                return;
        }

        if (!host) {
                pr_warn("aops:%ps\n", a_ops);
                return;
        }

        if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
            get_kernel_nofault(ino, &host->i_ino)) {
                pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
                return;
        }

        if (!dentry_first) {
                pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
                return;
        }

        dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
        if (get_kernel_nofault(dentry, dentry_ptr) ||
            !dentry.d_parent || !dentry.d_name.name) {
                pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
                                a_ops, ino, dentry_ptr);
                return;
        }

        /*
         * if dentry is corrupted, the %pd handler may still crash,
         * but it's unlikely that we reach here with a corrupt mapping
         */
        pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
}

void clear_inode(struct inode *inode)
{
        /*
         * We have to cycle the i_pages lock here because reclaim can be in the
         * process of removing the last page (in __filemap_remove_folio())
         * and we must not free the mapping under it.
         */
        xa_lock_irq(&inode->i_data.i_pages);
        BUG_ON(inode->i_data.nrpages);
        /*
         * Almost always, mapping_empty(&inode->i_data) here; but there are
         * two known and long-standing ways in which nodes may get left behind
         * (when deep radix-tree node allocation failed partway; or when THP
         * collapse_file() failed). Until those two known cases are cleaned up,
         * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
         * nor even WARN_ON(!mapping_empty).
         */
        xa_unlock_irq(&inode->i_data.i_pages);
        BUG_ON(!list_empty(&inode->i_data.i_private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
        BUG_ON(inode->i_state & I_CLEAR);
        BUG_ON(!list_empty(&inode->i_wb_list));
        /* don't need i_lock here, no concurrent mods to i_state */
        inode->i_state = I_FREEING | I_CLEAR;
}
EXPORT_SYMBOL(clear_inode);

/*
 * Free the inode passed in, removing it from the lists it is still connected
 * to. We remove any pages still attached to the inode and wait for any IO that
 * is still in progress before finally destroying the inode.
 *
 * An inode must already be marked I_FREEING so that we avoid the inode being
 * moved back onto lists if we race with other code that manipulates the lists
 * (e.g. writeback_single_inode). The caller is responsible for setting this.
 *
 * An inode must already be removed from the LRU list before being evicted from
 * the cache. This should occur atomically with setting the I_FREEING state
 * flag, so no inodes here should ever be on the LRU when being evicted.
 */
static void evict(struct inode *inode)
{
        const struct super_operations *op = inode->i_sb->s_op;

        BUG_ON(!(inode->i_state & I_FREEING));
        BUG_ON(!list_empty(&inode->i_lru));

        if (!list_empty(&inode->i_io_list))
                inode_io_list_del(inode);

        inode_sb_list_del(inode);

        /*
         * Wait for flusher thread to be done with the inode so that filesystem
         * does not start destroying it while writeback is still running. Since
         * the inode has I_FREEING set, flusher thread won't start new work on
         * the inode.  We just have to wait for running writeback to finish.
         */
        inode_wait_for_writeback(inode);

        if (op->evict_inode) {
                op->evict_inode(inode);
        } else {
                truncate_inode_pages_final(&inode->i_data);
                clear_inode(inode);
        }
        if (S_ISCHR(inode->i_mode) && inode->i_cdev)
                cd_forget(inode);

        remove_inode_hash(inode);

        spin_lock(&inode->i_lock);
        wake_up_bit(&inode->i_state, __I_NEW);
        BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
        spin_unlock(&inode->i_lock);

        destroy_inode(inode);
}

/*
 * dispose_list - dispose of the contents of a local list
 * @head: the head of the list to free
 *
 * Dispose-list gets a local list with local inodes in it, so it doesn't
 * need to worry about list corruption and SMP locks.
 */
static void dispose_list(struct list_head *head)
{
        while (!list_empty(head)) {
                struct inode *inode;

                inode = list_first_entry(head, struct inode, i_lru);
                list_del_init(&inode->i_lru);

                evict(inode);
                cond_resched();
        }
}

/**
 * evict_inodes        - evict all evictable inodes for a superblock
 * @sb:                superblock to operate on
 *
 * Make sure that no inodes with zero refcount are retained.  This is
 * called by superblock shutdown after having SB_ACTIVE flag removed,
 * so any inode reaching zero refcount during or after that call will
 * be immediately evicted.
 */
void evict_inodes(struct super_block *sb)
{
        struct inode *inode, *next;
        LIST_HEAD(dispose);

again:
        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
                if (atomic_read(&inode->i_count))
                        continue;

                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }

                inode->i_state |= I_FREEING;
                inode_lru_list_del(inode);
                spin_unlock(&inode->i_lock);
                list_add(&inode->i_lru, &dispose);

                /*
                 * We can have a ton of inodes to evict at unmount time given
                 * enough memory, check to see if we need to go to sleep for a
                 * bit so we don't livelock.
                 */
                if (need_resched()) {
                        spin_unlock(&sb->s_inode_list_lock);
                        cond_resched();
                        dispose_list(&dispose);
                        goto again;
                }
        }
        spin_unlock(&sb->s_inode_list_lock);

        dispose_list(&dispose);
}
EXPORT_SYMBOL_GPL(evict_inodes);

/**
 * invalidate_inodes        - attempt to free all inodes on a superblock
 * @sb:                superblock to operate on
 *
 * Attempts to free all inodes (including dirty inodes) for a given superblock.
 */
void invalidate_inodes(struct super_block *sb)
{
        struct inode *inode, *next;
        LIST_HEAD(dispose);

again:
        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
                if (atomic_read(&inode->i_count)) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }

                inode->i_state |= I_FREEING;
                inode_lru_list_del(inode);
                spin_unlock(&inode->i_lock);
                list_add(&inode->i_lru, &dispose);
                if (need_resched()) {
                        spin_unlock(&sb->s_inode_list_lock);
                        cond_resched();
                        dispose_list(&dispose);
                        goto again;
                }
        }
        spin_unlock(&sb->s_inode_list_lock);

        dispose_list(&dispose);
}

/*
 * Isolate the inode from the LRU in preparation for freeing it.
 *
 * If the inode has the I_REFERENCED flag set, then it means that it has been
 * used recently - the flag is set in iput_final(). When we encounter such an
 * inode, clear the flag and move it to the back of the LRU so it gets another
 * pass through the LRU before it gets reclaimed. This is necessary because of
 * the fact we are doing lazy LRU updates to minimise lock contention so the
 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
 * with this flag set because they are the inodes that are out of order.
 */
static enum lru_status inode_lru_isolate(struct list_head *item,
                struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
{
        struct list_head *freeable = arg;
        struct inode        *inode = container_of(item, struct inode, i_lru);

        /*
         * We are inverting the lru lock/inode->i_lock here, so use a
         * trylock. If we fail to get the lock, just skip it.
         */
        if (!spin_trylock(&inode->i_lock))
                return LRU_SKIP;

        /*
         * Inodes can get referenced, redirtied, or repopulated while
         * they're already on the LRU, and this can make them
         * unreclaimable for a while. Remove them lazily here; iput,
         * sync, or the last page cache deletion will requeue them.
         */
        if (atomic_read(&inode->i_count) ||
            (inode->i_state & ~I_REFERENCED) ||
            !mapping_shrinkable(&inode->i_data)) {
                list_lru_isolate(lru, &inode->i_lru);
                spin_unlock(&inode->i_lock);
                this_cpu_dec(nr_unused);
                return LRU_REMOVED;
        }

        /* Recently referenced inodes get one more pass */
        if (inode->i_state & I_REFERENCED) {
                inode->i_state &= ~I_REFERENCED;
                spin_unlock(&inode->i_lock);
                return LRU_ROTATE;
        }

        /*
         * On highmem systems, mapping_shrinkable() permits dropping
         * page cache in order to free up struct inodes: lowmem might
         * be under pressure before the cache inside the highmem zone.
         */
        if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
                __iget(inode);
                spin_unlock(&inode->i_lock);
                spin_unlock(lru_lock);
                if (remove_inode_buffers(inode)) {
                        unsigned long reap;
                        reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
                        if (current_is_kswapd())
                                __count_vm_events(KSWAPD_INODESTEAL, reap);
                        else
                                __count_vm_events(PGINODESTEAL, reap);
                        mm_account_reclaimed_pages(reap);
                }
                iput(inode);
                spin_lock(lru_lock);
                return LRU_RETRY;
        }

        WARN_ON(inode->i_state & I_NEW);
        inode->i_state |= I_FREEING;
        list_lru_isolate_move(lru, &inode->i_lru, freeable);
        spin_unlock(&inode->i_lock);

        this_cpu_dec(nr_unused);
        return LRU_REMOVED;
}

/*
 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
 * This is called from the superblock shrinker function with a number of inodes
 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
 * then are freed outside inode_lock by dispose_list().
 */
long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
{
        LIST_HEAD(freeable);
        long freed;

        freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
                                     inode_lru_isolate, &freeable);
        dispose_list(&freeable);
        return freed;
}

static void __wait_on_freeing_inode(struct inode *inode);
/*
 * Called with the inode lock held.
 */
static struct inode *find_inode(struct super_block *sb,
                                struct hlist_head *head,
                                int (*test)(struct inode *, void *),
                                void *data)
{
        struct inode *inode = NULL;

repeat:
        hlist_for_each_entry(inode, head, i_hash) {
                if (inode->i_sb != sb)
                        continue;
                if (!test(inode, data))
                        continue;
                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
                if (unlikely(inode->i_state & I_CREATING)) {
                        spin_unlock(&inode->i_lock);
                        return ERR_PTR(-ESTALE);
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                return inode;
        }
        return NULL;
}

/*
 * find_inode_fast is the fast path version of find_inode, see the comment at
 * iget_locked for details.
 */
static struct inode *find_inode_fast(struct super_block *sb,
                                struct hlist_head *head, unsigned long ino)
{
        struct inode *inode = NULL;

repeat:
        hlist_for_each_entry(inode, head, i_hash) {
                if (inode->i_ino != ino)
                        continue;
                if (inode->i_sb != sb)
                        continue;
                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
                if (unlikely(inode->i_state & I_CREATING)) {
                        spin_unlock(&inode->i_lock);
                        return ERR_PTR(-ESTALE);
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                return inode;
        }
        return NULL;
}

/*
 * Each cpu owns a range of LAST_INO_BATCH numbers.
 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
 * to renew the exhausted range.
 *
 * This does not significantly increase overflow rate because every CPU can
 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
 * overflow rate by 2x, which does not seem too significant.
 *
 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
 * error if st_ino won't fit in target struct field. Use 32bit counter
 * here to attempt to avoid that.
 */
#define LAST_INO_BATCH 1024
static DEFINE_PER_CPU(unsigned int, last_ino);

unsigned int get_next_ino(void)
{
        unsigned int *p = &get_cpu_var(last_ino);
        unsigned int res = *p;

#ifdef CONFIG_SMP
        if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
                static atomic_t shared_last_ino;
                int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);

                res = next - LAST_INO_BATCH;
        }
#endif

        res++;
        /* get_next_ino should not provide a 0 inode number */
        if (unlikely(!res))
                res++;
        *p = res;
        put_cpu_var(last_ino);
        return res;
}
EXPORT_SYMBOL(get_next_ino);

/**
 *        new_inode_pseudo         - obtain an inode
 *        @sb: superblock
 *
 *        Allocates a new inode for given superblock.
 *        Inode wont be chained in superblock s_inodes list
 *        This means :
 *        - fs can't be unmount
 *        - quotas, fsnotify, writeback can't work
 */
struct inode *new_inode_pseudo(struct super_block *sb)
{
        struct inode *inode = alloc_inode(sb);

        if (inode) {
                spin_lock(&inode->i_lock);
                inode->i_state = 0;
                spin_unlock(&inode->i_lock);
        }
        return inode;
}

/**
 *        new_inode         - obtain an inode
 *        @sb: superblock
 *
 *        Allocates a new inode for given superblock. The default gfp_mask
 *        for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
 *        If HIGHMEM pages are unsuitable or it is known that pages allocated
 *        for the page cache are not reclaimable or migratable,
 *        mapping_set_gfp_mask() must be called with suitable flags on the
 *        newly created inode's mapping
 *
 */
struct inode *new_inode(struct super_block *sb)
{
        struct inode *inode;

        inode = new_inode_pseudo(sb);
        if (inode)
                inode_sb_list_add(inode);
        return inode;
}
EXPORT_SYMBOL(new_inode);

#ifdef CONFIG_DEBUG_LOCK_ALLOC
void lockdep_annotate_inode_mutex_key(struct inode *inode)
{
        if (S_ISDIR(inode->i_mode)) {
                struct file_system_type *type = inode->i_sb->s_type;

                /* Set new key only if filesystem hasn't already changed it */
                if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
                        /*
                         * ensure nobody is actually holding i_mutex
                         */
                        // mutex_destroy(&inode->i_mutex);
                        init_rwsem(&inode->i_rwsem);
                        lockdep_set_class(&inode->i_rwsem,
                                          &type->i_mutex_dir_key);
                }
        }
}
EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
#endif

/**
 * unlock_new_inode - clear the I_NEW state and wake up any waiters
 * @inode:        new inode to unlock
 *
 * Called when the inode is fully initialised to clear the new state of the
 * inode and wake up anyone waiting for the inode to finish initialisation.
 */
void unlock_new_inode(struct inode *inode)
{
        lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW & ~I_CREATING;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);

void discard_new_inode(struct inode *inode)
{
        lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
        iput(inode);
}
EXPORT_SYMBOL(discard_new_inode);

/**
 * lock_two_nondirectories - take two i_mutexes on non-directory objects
 *
 * Lock any non-NULL argument. Passed objects must not be directories.
 * Zero, one or two objects may be locked by this function.
 *
 * @inode1: first inode to lock
 * @inode2: second inode to lock
 */
void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
        if (inode1)
                WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
        if (inode2)
                WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
        if (inode1 > inode2)
                swap(inode1, inode2);
        if (inode1)
                inode_lock(inode1);
        if (inode2 && inode2 != inode1)
                inode_lock_nested(inode2, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);

/**
 * unlock_two_nondirectories - release locks from lock_two_nondirectories()
 * @inode1: first inode to unlock
 * @inode2: second inode to unlock
 */
void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
        if (inode1) {
                WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
                inode_unlock(inode1);
        }
        if (inode2 && inode2 != inode1) {
                WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
                inode_unlock(inode2);
        }
}
EXPORT_SYMBOL(unlock_two_nondirectories);

/**
 * inode_insert5 - obtain an inode from a mounted file system
 * @inode:        pre-allocated inode to use for insert to cache
 * @hashval:        hash value (usually inode number) to get
 * @test:        callback used for comparisons between inodes
 * @set:        callback used to initialize a new struct inode
 * @data:        opaque data pointer to pass to @test and @set
 *
 * Search for the inode specified by @hashval and @data in the inode cache,
 * and if present it is return it with an increased reference count. This is
 * a variant of iget5_locked() for callers that don't want to fail on memory
 * allocation of inode.
 *
 * If the inode is not in cache, insert the pre-allocated inode to cache and
 * return it locked, hashed, and with the I_NEW flag set. The file system gets
 * to fill it in before unlocking it via unlock_new_inode().
 *
 * Note both @test and @set are called with the inode_hash_lock held, so can't
 * sleep.
 */
struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
                            int (*test)(struct inode *, void *),
                            int (*set)(struct inode *, void *), void *data)
{
        struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
        struct inode *old;

again:
        spin_lock(&inode_hash_lock);
        old = find_inode(inode->i_sb, head, test, data);
        if (unlikely(old)) {
                /*
                 * Uhhuh, somebody else created the same inode under us.
                 * Use the old inode instead of the preallocated one.
                 */
                spin_unlock(&inode_hash_lock);
                if (IS_ERR(old))
                        return NULL;
                wait_on_inode(old);
                if (unlikely(inode_unhashed(old))) {
                        iput(old);
                        goto again;
                }
                return old;
        }

        if (set && unlikely(set(inode, data))) {
                inode = NULL;
                goto unlock;
        }

        /*
         * Return the locked inode with I_NEW set, the
         * caller is responsible for filling in the contents
         */
        spin_lock(&inode->i_lock);
        inode->i_state |= I_NEW;
        hlist_add_head_rcu(&inode->i_hash, head);
        spin_unlock(&inode->i_lock);

        /*
         * Add inode to the sb list if it's not already. It has I_NEW at this
         * point, so it should be safe to test i_sb_list locklessly.
         */
        if (list_empty(&inode->i_sb_list))
                inode_sb_list_add(inode);
unlock:
        spin_unlock(&inode_hash_lock);

        return inode;
}
EXPORT_SYMBOL(inode_insert5);

/**
 * iget5_locked - obtain an inode from a mounted file system
 * @sb:                super block of file system
 * @hashval:        hash value (usually inode number) to get
 * @test:        callback used for comparisons between inodes
 * @set:        callback used to initialize a new struct inode
 * @data:        opaque data pointer to pass to @test and @set
 *
 * Search for the inode specified by @hashval and @data in the inode cache,
 * and if present it is return it with an increased reference count. This is
 * a generalized version of iget_locked() for file systems where the inode
 * number is not sufficient for unique identification of an inode.
 *
 * If the inode is not in cache, allocate a new inode and return it locked,
 * hashed, and with the I_NEW flag set. The file system gets to fill it in
 * before unlocking it via unlock_new_inode().
 *
 * Note both @test and @set are called with the inode_hash_lock held, so can't
 * sleep.
 */
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
                int (*test)(struct inode *, void *),
                int (*set)(struct inode *, void *), void *data)
{
        struct inode *inode = ilookup5(sb, hashval, test, data);

        if (!inode) {
                struct inode *new = alloc_inode(sb);

                if (new) {
                        new->i_state = 0;
                        inode = inode_insert5(new, hashval, test, set, data);
                        if (unlikely(inode != new))
                                destroy_inode(new);
                }
        }
        return inode;
}
EXPORT_SYMBOL(iget5_locked);

/**
 * iget_locked - obtain an inode from a mounted file system
 * @sb:                super block of file system
 * @ino:        inode number to get
 *
 * Search for the inode specified by @ino in the inode cache and if present
 * return it with an increased reference count. This is for file systems
 * where the inode number is sufficient for unique identification of an inode.
 *
 * If the inode is not in cache, allocate a new inode and return it locked,
 * hashed, and with the I_NEW flag set.  The file system gets to fill it in
 * before unlocking it via unlock_new_inode().
 */
struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        struct inode *inode;
again:
        spin_lock(&inode_hash_lock);
        inode = find_inode_fast(sb, head, ino);
        spin_unlock(&inode_hash_lock);
        if (inode) {
                if (IS_ERR(inode))
                        return NULL;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
                        goto again;
                }
                return inode;
        }

        inode = alloc_inode(sb);
        if (inode) {
                struct inode *old;

                spin_lock(&inode_hash_lock);
                /* We released the lock, so.. */
                old = find_inode_fast(sb, head, ino);
                if (!old) {
                        inode->i_ino = ino;
                        spin_lock(&inode->i_lock);
                        inode->i_state = I_NEW;
                        hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        inode_sb_list_add(inode);
                        spin_unlock(&inode_hash_lock);

                        /* Return the locked inode with I_NEW set, the
                         * caller is responsible for filling in the contents
                         */
                        return inode;
                }

                /*
                 * Uhhuh, somebody else created the same inode under
                 * us. Use the old inode instead of the one we just
                 * allocated.
                 */
                spin_unlock(&inode_hash_lock);
                destroy_inode(inode);
                if (IS_ERR(old))
                        return NULL;
                inode = old;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
                        goto again;
                }
        }
        return inode;
}
EXPORT_SYMBOL(iget_locked);

/*
 * search the inode cache for a matching inode number.
 * If we find one, then the inode number we are trying to
 * allocate is not unique and so we should not use it.
 *
 * Returns 1 if the inode number is unique, 0 if it is not.
 */
static int test_inode_iunique(struct super_block *sb, unsigned long ino)
{
        struct hlist_head *b = inode_hashtable + hash(sb, ino);
        struct inode *inode;

        hlist_for_each_entry_rcu(inode, b, i_hash) {
                if (inode->i_ino == ino && inode->i_sb == sb)
                        return 0;
        }
        return 1;
}

/**
 *        iunique - get a unique inode number
 *        @sb: superblock
 *        @max_reserved: highest reserved inode number
 *
 *        Obtain an inode number that is unique on the system for a given
 *        superblock. This is used by file systems that have no natural
 *        permanent inode numbering system. An inode number is returned that
 *        is higher than the reserved limit but unique.
 *
 *        BUGS:
 *        With a large number of inodes live on the file system this function
 *        currently becomes quite slow.
 */
ino_t iunique(struct super_block *sb, ino_t max_reserved)
{
        /*
         * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
         * error if st_ino won't fit in target struct field. Use 32bit counter
         * here to attempt to avoid that.
         */
        static DEFINE_SPINLOCK(iunique_lock);
        static unsigned int counter;
        ino_t res;

        rcu_read_lock();
        spin_lock(&iunique_lock);
        do {
                if (counter <= max_reserved)
                        counter = max_reserved + 1;
                res = counter++;
        } while (!test_inode_iunique(sb, res));
        spin_unlock(&iunique_lock);
        rcu_read_unlock();

        return res;
}
EXPORT_SYMBOL(iunique);

struct inode *igrab(struct inode *inode)
{
        spin_lock(&inode->i_lock);
        if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
                __iget(inode);
                spin_unlock(&inode->i_lock);
        } else {
                spin_unlock(&inode->i_lock);
                /*
                 * Handle the case where s_op->clear_inode is not been
                 * called yet, and somebody is calling igrab
                 * while the inode is getting freed.
                 */
                inode = NULL;
        }
        return inode;
}
EXPORT_SYMBOL(igrab);

/**
 * ilookup5_nowait - search for an inode in the inode cache
 * @sb:                super block of file system to search
 * @hashval:        hash value (usually inode number) to search for
 * @test:        callback used for comparisons between inodes
 * @data:        opaque data pointer to pass to @test
 *
 * Search for the inode specified by @hashval and @data in the inode cache.
 * If the inode is in the cache, the inode is returned with an incremented
 * reference count.
 *
 * Note: I_NEW is not waited upon so you have to be very careful what you do
 * with the returned inode.  You probably should be using ilookup5() instead.
 *
 * Note2: @test is called with the inode_hash_lock held, so can't sleep.
 */
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
                int (*test)(struct inode *, void *), void *data)
{
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        struct inode *inode;

        spin_lock(&inode_hash_lock);
        inode = find_inode(sb, head, test, data);
        spin_unlock(&inode_hash_lock);

        return IS_ERR(inode) ? NULL : inode;
}
EXPORT_SYMBOL(ilookup5_nowait);

/**
 * ilookup5 - search for an inode in the inode cache
 * @sb:                super block of file system to search
 * @hashval:        hash value (usually inode number) to search for
 * @test:        callback used for comparisons between inodes
 * @data:        opaque data pointer to pass to @test
 *
 * Search for the inode specified by @hashval and @data in the inode cache,
 * and if the inode is in the cache, return the inode with an incremented
 * reference count.  Waits on I_NEW before returning the inode.
 * returned with an incremented reference count.
 *
 * This is a generalized version of ilookup() for file systems where the
 * inode number is not sufficient for unique identification of an inode.
 *
 * Note: @test is called with the inode_hash_lock held, so can't sleep.
 */
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
                int (*test)(struct inode *, void *), void *data)
{
        struct inode *inode;
again:
        inode = ilookup5_nowait(sb, hashval, test, data);
        if (inode) {
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
                        goto again;
                }
        }
        return inode;
}
EXPORT_SYMBOL(ilookup5);

/**
 * ilookup - search for an inode in the inode cache
 * @sb:                super block of file system to search
 * @ino:        inode number to search for
 *
 * Search for the inode @ino in the inode cache, and if the inode is in the
 * cache, the inode is returned with an incremented reference count.
 */
struct inode *ilookup(struct super_block *sb, unsigned long ino)
{
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        struct inode *inode;
again:
        spin_lock(&inode_hash_lock);
        inode = find_inode_fast(sb, head, ino);
        spin_unlock(&inode_hash_lock);

        if (inode) {
                if (IS_ERR(inode))
                        return NULL;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
                        goto again;
                }
        }
        return inode;
}
EXPORT_SYMBOL(ilookup);

/**
 * find_inode_nowait - find an inode in the inode cache
 * @sb:                super block of file system to search
 * @hashval:        hash value (usually inode number) to search for
 * @match:        callback used for comparisons between inodes
 * @data:        opaque data pointer to pass to @match
 *
 * Search for the inode specified by @hashval and @data in the inode
 * cache, where the helper function @match will return 0 if the inode
 * does not match, 1 if the inode does match, and -1 if the search
 * should be stopped.  The @match function must be responsible for
 * taking the i_lock spin_lock and checking i_state for an inode being
 * freed or being initialized, and incrementing the reference count
 * before returning 1.  It also must not sleep, since it is called with
 * the inode_hash_lock spinlock held.
 *
 * This is a even more generalized version of ilookup5() when the
 * function must never block --- find_inode() can block in
 * __wait_on_freeing_inode() --- or when the caller can not increment
 * the reference count because the resulting iput() might cause an
 * inode eviction.  The tradeoff is that the @match funtion must be
 * very carefully implemented.
 */
struct inode *find_inode_nowait(struct super_block *sb,
                                unsigned long hashval,
                                int (*match)(struct inode *, unsigned long,
                                             void *),
                                void *data)
{
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        struct inode *inode, *ret_inode = NULL;
        int mval;

        spin_lock(&inode_hash_lock);
        hlist_for_each_entry(inode, head, i_hash) {
                if (inode->i_sb != sb)
                        continue;
                mval = match(inode, hashval, data);
                if (mval == 0)
                        continue;
                if (mval == 1)
                        ret_inode = inode;
                goto out;
        }
out:
        spin_unlock(&inode_hash_lock);
        return ret_inode;
}
EXPORT_SYMBOL(find_inode_nowait);

/**
 * find_inode_rcu - find an inode in the inode cache
 * @sb:                Super block of file system to search
 * @hashval:        Key to hash
 * @test:        Function to test match on an inode
 * @data:        Data for test function
 *
 * Search for the inode specified by @hashval and @data in the inode cache,
 * where the helper function @test will return 0 if the inode does not match
 * and 1 if it does.  The @test function must be responsible for taking the
 * i_lock spin_lock and checking i_state for an inode being freed or being
 * initialized.
 *
 * If successful, this will return the inode for which the @test function
 * returned 1 and NULL otherwise.
 *
 * The @test function is not permitted to take a ref on any inode presented.
 * It is also not permitted to sleep.
 *
 * The caller must hold the RCU read lock.
 */
struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
                             int (*test)(struct inode *, void *), void *data)
{
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        struct inode *inode;

        RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
                         "suspicious find_inode_rcu() usage");

        hlist_for_each_entry_rcu(inode, head, i_hash) {
                if (inode->i_sb == sb &&
                    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
                    test(inode, data))
                        return inode;
        }
        return NULL;
}
EXPORT_SYMBOL(find_inode_rcu);

/**
 * find_inode_by_ino_rcu - Find an inode in the inode cache
 * @sb:                Super block of file system to search
 * @ino:        The inode number to match
 *
 * Search for the inode specified by @hashval and @data in the inode cache,
 * where the helper function @test will return 0 if the inode does not match
 * and 1 if it does.  The @test function must be responsible for taking the
 * i_lock spin_lock and checking i_state for an inode being freed or being
 * initialized.
 *
 * If successful, this will return the inode for which the @test function
 * returned 1 and NULL otherwise.
 *
 * The @test function is not permitted to take a ref on any inode presented.
 * It is also not permitted to sleep.
 *
 * The caller must hold the RCU read lock.
 */
struct inode *find_inode_by_ino_rcu(struct super_block *sb,
                                    unsigned long ino)
{
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        struct inode *inode;

        RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
                         "suspicious find_inode_by_ino_rcu() usage");

        hlist_for_each_entry_rcu(inode, head, i_hash) {
                if (inode->i_ino == ino &&
                    inode->i_sb == sb &&
                    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
                    return inode;
        }
        return NULL;
}
EXPORT_SYMBOL(find_inode_by_ino_rcu);

int insert_inode_locked(struct inode *inode)
{
        struct super_block *sb = inode->i_sb;
        ino_t ino = inode->i_ino;
        struct hlist_head *head = inode_hashtable + hash(sb, ino);

        while (1) {
                struct inode *old = NULL;
                spin_lock(&inode_hash_lock);
                hlist_for_each_entry(old, head, i_hash) {
                        if (old->i_ino != ino)
                                continue;
                        if (old->i_sb != sb)
                                continue;
                        spin_lock(&old->i_lock);
                        if (old->i_state & (I_FREEING|I_WILL_FREE)) {
                                spin_unlock(&old->i_lock);
                                continue;
                        }
                        break;
                }
                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
                        inode->i_state |= I_NEW | I_CREATING;
                        hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        spin_unlock(&inode_hash_lock);
                        return 0;
                }
                if (unlikely(old->i_state & I_CREATING)) {
                        spin_unlock(&old->i_lock);
                        spin_unlock(&inode_hash_lock);
                        return -EBUSY;
                }
                __iget(old);
                spin_unlock(&old->i_lock);
                spin_unlock(&inode_hash_lock);
                wait_on_inode(old);
                if (unlikely(!inode_unhashed(old))) {
                        iput(old);
                        return -EBUSY;
                }
                iput(old);
        }
}
EXPORT_SYMBOL(insert_inode_locked);

int insert_inode_locked4(struct inode *inode, unsigned long hashval,
                int (*test)(struct inode *, void *), void *data)
{
        struct inode *old;

        inode->i_state |= I_CREATING;
        old = inode_insert5(inode, hashval, test, NULL, data);

        if (old != inode) {
                iput(old);
                return -EBUSY;
        }
        return 0;
}
EXPORT_SYMBOL(insert_inode_locked4);


int generic_delete_inode(struct inode *inode)
{
        return 1;
}
EXPORT_SYMBOL(generic_delete_inode);

/*
 * Called when we're dropping the last reference
 * to an inode.
 *
 * Call the FS "drop_inode()" function, defaulting to
 * the legacy UNIX filesystem behaviour.  If it tells
 * us to evict inode, do so.  Otherwise, retain inode
 * in cache if fs is alive, sync and evict if fs is
 * shutting down.
 */
static void iput_final(struct inode *inode)
{
        struct super_block *sb = inode->i_sb;
        const struct super_operations *op = inode->i_sb->s_op;
        unsigned long state;
        int drop;

        WARN_ON(inode->i_state & I_NEW);

        if (op->drop_inode)
                drop = op->drop_inode(inode);
        else
                drop = generic_drop_inode(inode);

        if (!drop &&
            !(inode->i_state & I_DONTCACHE) &&
            (sb->s_flags & SB_ACTIVE)) {
                __inode_add_lru(inode, true);
                spin_unlock(&inode->i_lock);
                return;
        }

        state = inode->i_state;
        if (!drop) {
                WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
                spin_unlock(&inode->i_lock);

                write_inode_now(inode, 1);

                spin_lock(&inode->i_lock);
                state = inode->i_state;
                WARN_ON(state & I_NEW);
                state &= ~I_WILL_FREE;
        }

        WRITE_ONCE(inode->i_state, state | I_FREEING);
        if (!list_empty(&inode->i_lru))
                inode_lru_list_del(inode);
        spin_unlock(&inode->i_lock);

        evict(inode);
}

/**
 *        iput        - put an inode
 *        @inode: inode to put
 *
 *        Puts an inode, dropping its usage count. If the inode use count hits
 *        zero, the inode is then freed and may also be destroyed.
 *
 *        Consequently, iput() can sleep.
 */
void iput(struct inode *inode)
{
        if (!inode)
                return;
        BUG_ON(inode->i_state & I_CLEAR);
retry:
        if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
                if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
                        atomic_inc(&inode->i_count);
                        spin_unlock(&inode->i_lock);
                        trace_writeback_lazytime_iput(inode);
                        mark_inode_dirty_sync(inode);
                        goto retry;
                }
                iput_final(inode);
        }
}
EXPORT_SYMBOL(iput);

#ifdef CONFIG_BLOCK
/**
 *        bmap        - find a block number in a file
 *        @inode:  inode owning the block number being requested
 *        @block: pointer containing the block to find
 *
 *        Replaces the value in ``*block`` with the block number on the device holding
 *        corresponding to the requested block number in the file.
 *        That is, asked for block 4 of inode 1 the function will replace the
 *        4 in ``*block``, with disk block relative to the disk start that holds that
 *        block of the file.
 *
 *        Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
 *        hole, returns 0 and ``*block`` is also set to 0.
 */
int bmap(struct inode *inode, sector_t *block)
{
        if (!inode->i_mapping->a_ops->bmap)
                return -EINVAL;

        *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
        return 0;
}
EXPORT_SYMBOL(bmap);
#endif

/*
 * With relative atime, only update atime if the previous atime is
 * earlier than or equal to either the ctime or mtime,
 * or if at least a day has passed since the last atime update.
 */
static bool relatime_need_update(struct vfsmount *mnt, struct inode *inode,
                             struct timespec64 now)
{
        struct timespec64 atime, mtime, ctime;

        if (!(mnt->mnt_flags & MNT_RELATIME))
                return true;
        /*
         * Is mtime younger than or equal to atime? If yes, update atime:
         */
        atime = inode_get_atime(inode);
        mtime = inode_get_mtime(inode);
        if (timespec64_compare(&mtime, &atime) >= 0)
                return true;
        /*
         * Is ctime younger than or equal to atime? If yes, update atime:
         */
        ctime = inode_get_ctime(inode);
        if (timespec64_compare(&ctime, &atime) >= 0)
                return true;

        /*
         * Is the previous atime value older than a day? If yes,
         * update atime:
         */
        if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60)
                return true;
        /*
         * Good, we can skip the atime update:
         */
        return false;
}

/**
 * inode_update_timestamps - update the timestamps on the inode
 * @inode: inode to be updated
 * @flags: S_* flags that needed to be updated
 *
 * The update_time function is called when an inode's timestamps need to be
 * updated for a read or write operation. This function handles updating the
 * actual timestamps. It's up to the caller to ensure that the inode is marked
 * dirty appropriately.
 *
 * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
 * attempt to update all three of them. S_ATIME updates can be handled
 * independently of the rest.
 *
 * Returns a set of S_* flags indicating which values changed.
 */
int inode_update_timestamps(struct inode *inode, int flags)
{
        int updated = 0;
        struct timespec64 now;

        if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
                struct timespec64 ctime = inode_get_ctime(inode);
                struct timespec64 mtime = inode_get_mtime(inode);

                now = inode_set_ctime_current(inode);
                if (!timespec64_equal(&now, &ctime))
                        updated |= S_CTIME;
                if (!timespec64_equal(&now, &mtime)) {
                        inode_set_mtime_to_ts(inode, now);
                        updated |= S_MTIME;
                }
                if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
                        updated |= S_VERSION;
        } else {
                now = current_time(inode);
        }

        if (flags & S_ATIME) {
                struct timespec64 atime = inode_get_atime(inode);

                if (!timespec64_equal(&now, &atime)) {
                        inode_set_atime_to_ts(inode, now);
                        updated |= S_ATIME;
                }
        }
        return updated;
}
EXPORT_SYMBOL(inode_update_timestamps);

/**
 * generic_update_time - update the timestamps on the inode
 * @inode: inode to be updated
 * @flags: S_* flags that needed to be updated
 *
 * The update_time function is called when an inode's timestamps need to be
 * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
 * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
 * updates can be handled done independently of the rest.
 *
 * Returns a S_* mask indicating which fields were updated.
 */
int generic_update_time(struct inode *inode, int flags)
{
        int updated = inode_update_timestamps(inode, flags);
        int dirty_flags = 0;

        if (updated & (S_ATIME|S_MTIME|S_CTIME))
                dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
        if (updated & S_VERSION)
                dirty_flags |= I_DIRTY_SYNC;
        __mark_inode_dirty(inode, dirty_flags);
        return updated;
}
EXPORT_SYMBOL(generic_update_time);

/*
 * This does the actual work of updating an inodes time or version.  Must have
 * had called mnt_want_write() before calling this.
 */
int inode_update_time(struct inode *inode, int flags)
{
        if (inode->i_op->update_time)
                return inode->i_op->update_time(inode, flags);
        generic_update_time(inode, flags);
        return 0;
}
EXPORT_SYMBOL(inode_update_time);

/**
 *        atime_needs_update        -        update the access time
 *        @path: the &struct path to update
 *        @inode: inode to update
 *
 *        Update the accessed time on an inode and mark it for writeback.
 *        This function automatically handles read only file systems and media,
 *        as well as the "noatime" flag and inode specific "noatime" markers.
 */
bool atime_needs_update(const struct path *path, struct inode *inode)
{
        struct vfsmount *mnt = path->mnt;
        struct timespec64 now, atime;

        if (inode->i_flags & S_NOATIME)
                return false;

        /* Atime updates will likely cause i_uid and i_gid to be written
         * back improprely if their true value is unknown to the vfs.
         */
        if (HAS_UNMAPPED_ID(mnt_idmap(mnt), inode))
                return false;

        if (IS_NOATIME(inode))
                return false;
        if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
                return false;

        if (mnt->mnt_flags & MNT_NOATIME)
                return false;
        if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
                return false;

        now = current_time(inode);

        if (!relatime_need_update(mnt, inode, now))
                return false;

        atime = inode_get_atime(inode);
        if (timespec64_equal(&atime, &now))
                return false;

        return true;
}

void touch_atime(const struct path *path)
{
        struct vfsmount *mnt = path->mnt;
        struct inode *inode = d_inode(path->dentry);

        if (!atime_needs_update(path, inode))
                return;

        if (!sb_start_write_trylock(inode->i_sb))
                return;

        if (mnt_get_write_access(mnt) != 0)
                goto skip_update;
        /*
         * File systems can error out when updating inodes if they need to
         * allocate new space to modify an inode (such is the case for
         * Btrfs), but since we touch atime while walking down the path we
         * really don't care if we failed to update the atime of the file,
         * so just ignore the return value.
         * We may also fail on filesystems that have the ability to make parts
         * of the fs read only, e.g. subvolumes in Btrfs.
         */
        inode_update_time(inode, S_ATIME);
        mnt_put_write_access(mnt);
skip_update:
        sb_end_write(inode->i_sb);
}
EXPORT_SYMBOL(touch_atime);

/*
 * Return mask of changes for notify_change() that need to be done as a
 * response to write or truncate. Return 0 if nothing has to be changed.
 * Negative value on error (change should be denied).
 */
int dentry_needs_remove_privs(struct mnt_idmap *idmap,
                              struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);
        int mask = 0;
        int ret;

        if (IS_NOSEC(inode))
                return 0;

        mask = setattr_should_drop_suidgid(idmap, inode);
        ret = security_inode_need_killpriv(dentry);
        if (ret < 0)
                return ret;
        if (ret)
                mask |= ATTR_KILL_PRIV;
        return mask;
}

static int __remove_privs(struct mnt_idmap *idmap,
                          struct dentry *dentry, int kill)
{
        struct iattr newattrs;

        newattrs.ia_valid = ATTR_FORCE | kill;
        /*
         * Note we call this on write, so notify_change will not
         * encounter any conflicting delegations:
         */
        return notify_change(idmap, dentry, &newattrs, NULL);
}

int file_remove_privs_flags(struct file *file, unsigned int flags)
{
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = file_inode(file);
        int error = 0;
        int kill;

        if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
                return 0;

        kill = dentry_needs_remove_privs(file_mnt_idmap(file), dentry);
        if (kill < 0)
                return kill;

        if (kill) {
                if (flags & IOCB_NOWAIT)
                        return -EAGAIN;

                error = __remove_privs(file_mnt_idmap(file), dentry, kill);
        }

        if (!error)
                inode_has_no_xattr(inode);
        return error;
}
EXPORT_SYMBOL_GPL(file_remove_privs_flags);

/**
 * file_remove_privs - remove special file privileges (suid, capabilities)
 * @file: file to remove privileges from
 *
 * When file is modified by a write or truncation ensure that special
 * file privileges are removed.
 *
 * Return: 0 on success, negative errno on failure.
 */
int file_remove_privs(struct file *file)
{
        return file_remove_privs_flags(file, 0);
}
EXPORT_SYMBOL(file_remove_privs);

static int inode_needs_update_time(struct inode *inode)
{
        int sync_it = 0;
        struct timespec64 now = current_time(inode);
        struct timespec64 ts;

        /* First try to exhaust all avenues to not sync */
        if (IS_NOCMTIME(inode))
                return 0;

        ts = inode_get_mtime(inode);
        if (!timespec64_equal(&ts, &now))
                sync_it = S_MTIME;

        ts = inode_get_ctime(inode);
        if (!timespec64_equal(&ts, &now))
                sync_it |= S_CTIME;

        if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
                sync_it |= S_VERSION;

        return sync_it;
}

static int __file_update_time(struct file *file, int sync_mode)
{
        int ret = 0;
        struct inode *inode = file_inode(file);

        /* try to update time settings */
        if (!mnt_get_write_access_file(file)) {
                ret = inode_update_time(inode, sync_mode);
                mnt_put_write_access_file(file);
        }

        return ret;
}

/**
 * file_update_time - update mtime and ctime time
 * @file: file accessed
 *
 * Update the mtime and ctime members of an inode and mark the inode for
 * writeback. Note that this function is meant exclusively for usage in
 * the file write path of filesystems, and filesystems may choose to
 * explicitly ignore updates via this function with the _NOCMTIME inode
 * flag, e.g. for network filesystem where these imestamps are handled
 * by the server. This can return an error for file systems who need to
 * allocate space in order to update an inode.
 *
 * Return: 0 on success, negative errno on failure.
 */
int file_update_time(struct file *file)
{
        int ret;
        struct inode *inode = file_inode(file);

        ret = inode_needs_update_time(inode);
        if (ret <= 0)
                return ret;

        return __file_update_time(file, ret);
}
EXPORT_SYMBOL(file_update_time);

/**
 * file_modified_flags - handle mandated vfs changes when modifying a file
 * @file: file that was modified
 * @flags: kiocb flags
 *
 * When file has been modified ensure that special
 * file privileges are removed and time settings are updated.
 *
 * If IOCB_NOWAIT is set, special file privileges will not be removed and
 * time settings will not be updated. It will return -EAGAIN.
 *
 * Context: Caller must hold the file's inode lock.
 *
 * Return: 0 on success, negative errno on failure.
 */
static int file_modified_flags(struct file *file, int flags)
{
        int ret;
        struct inode *inode = file_inode(file);

        /*
         * Clear the security bits if the process is not being run by root.
         * This keeps people from modifying setuid and setgid binaries.
         */
        ret = file_remove_privs_flags(file, flags);
        if (ret)
                return ret;

        if (unlikely(file->f_mode & FMODE_NOCMTIME))
                return 0;

        ret = inode_needs_update_time(inode);
        if (ret <= 0)
                return ret;
        if (flags & IOCB_NOWAIT)
                return -EAGAIN;

        return __file_update_time(file, ret);
}

/**
 * file_modified - handle mandated vfs changes when modifying a file
 * @file: file that was modified
 *
 * When file has been modified ensure that special
 * file privileges are removed and time settings are updated.
 *
 * Context: Caller must hold the file's inode lock.
 *
 * Return: 0 on success, negative errno on failure.
 */
int file_modified(struct file *file)
{
        return file_modified_flags(file, 0);
}
EXPORT_SYMBOL(file_modified);

/**
 * kiocb_modified - handle mandated vfs changes when modifying a file
 * @iocb: iocb that was modified
 *
 * When file has been modified ensure that special
 * file privileges are removed and time settings are updated.
 *
 * Context: Caller must hold the file's inode lock.
 *
 * Return: 0 on success, negative errno on failure.
 */
int kiocb_modified(struct kiocb *iocb)
{
        return file_modified_flags(iocb->ki_filp, iocb->ki_flags);
}
EXPORT_SYMBOL_GPL(kiocb_modified);

int inode_needs_sync(struct inode *inode)
{
        if (IS_SYNC(inode))
                return 1;
        if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
                return 1;
        return 0;
}
EXPORT_SYMBOL(inode_needs_sync);

/*
 * If we try to find an inode in the inode hash while it is being
 * deleted, we have to wait until the filesystem completes its
 * deletion before reporting that it isn't found.  This function waits
 * until the deletion _might_ have completed.  Callers are responsible
 * to recheck inode state.
 *
 * It doesn't matter if I_NEW is not set initially, a call to
 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
 * will DTRT.
 */
static void __wait_on_freeing_inode(struct inode *inode)
{
        wait_queue_head_t *wq;
        DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
        wq = bit_waitqueue(&inode->i_state, __I_NEW);
        prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
        spin_unlock(&inode->i_lock);
        spin_unlock(&inode_hash_lock);
        schedule();
        finish_wait(wq, &wait.wq_entry);
        spin_lock(&inode_hash_lock);
}

static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
        if (!str)
                return 0;
        ihash_entries = simple_strtoul(str, &str, 0);
        return 1;
}
__setup("ihash_entries=", set_ihash_entries);

/*
 * Initialize the waitqueues and inode hash table.
 */
void __init inode_init_early(void)
{
        /* If hashes are distributed across NUMA nodes, defer
         * hash allocation until vmalloc space is available.
         */
        if (hashdist)
                return;

        inode_hashtable =
                alloc_large_system_hash("Inode-cache",
                                        sizeof(struct hlist_head),
                                        ihash_entries,
                                        14,
                                        HASH_EARLY | HASH_ZERO,
                                        &i_hash_shift,
                                        &i_hash_mask,
                                        0,
                                        0);
}

void __init inode_init(void)
{
        /* inode slab cache */
        inode_cachep = kmem_cache_create("inode_cache",
                                         sizeof(struct inode),
                                         0,
                                         (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
                                         SLAB_ACCOUNT),
                                         init_once);

        /* Hash may have been set up in inode_init_early */
        if (!hashdist)
                return;

        inode_hashtable =
                alloc_large_system_hash("Inode-cache",
                                        sizeof(struct hlist_head),
                                        ihash_entries,
                                        14,
                                        HASH_ZERO,
                                        &i_hash_shift,
                                        &i_hash_mask,
                                        0,
                                        0);
}

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
        inode->i_mode = mode;
        if (S_ISCHR(mode)) {
                inode->i_fop = &def_chr_fops;
                inode->i_rdev = rdev;
        } else if (S_ISBLK(mode)) {
                if (IS_ENABLED(CONFIG_BLOCK))
                        inode->i_fop = &def_blk_fops;
                inode->i_rdev = rdev;
        } else if (S_ISFIFO(mode))
                inode->i_fop = &pipefifo_fops;
        else if (S_ISSOCK(mode))
                ;        /* leave it no_open_fops */
        else
                printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
                                  " inode %s:%lu\n", mode, inode->i_sb->s_id,
                                  inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);

/**
 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
 * @idmap: idmap of the mount the inode was created from
 * @inode: New inode
 * @dir: Directory inode
 * @mode: mode of the new inode
 *
 * If the inode has been created through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions
 * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
 * checking is to be performed on the raw inode simply pass @nop_mnt_idmap.
 */
void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
                      const struct inode *dir, umode_t mode)
{
        inode_fsuid_set(inode, idmap);
        if (dir && dir->i_mode & S_ISGID) {
                inode->i_gid = dir->i_gid;

                /* Directories are special, and always inherit S_ISGID */
                if (S_ISDIR(mode))
                        mode |= S_ISGID;
        } else
                inode_fsgid_set(inode, idmap);
        inode->i_mode = mode;
}
EXPORT_SYMBOL(inode_init_owner);

/**
 * inode_owner_or_capable - check current task permissions to inode
 * @idmap: idmap of the mount the inode was found from
 * @inode: inode being checked
 *
 * Return true if current either has CAP_FOWNER in a namespace with the
 * inode owner uid mapped, or owns the file.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
bool inode_owner_or_capable(struct mnt_idmap *idmap,
                            const struct inode *inode)
{
        vfsuid_t vfsuid;
        struct user_namespace *ns;

        vfsuid = i_uid_into_vfsuid(idmap, inode);
        if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
                return true;

        ns = current_user_ns();
        if (vfsuid_has_mapping(ns, vfsuid) && ns_capable(ns, CAP_FOWNER))
                return true;
        return false;
}
EXPORT_SYMBOL(inode_owner_or_capable);

/*
 * Direct i/o helper functions
 */
static void __inode_dio_wait(struct inode *inode)
{
        wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
        DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);

        do {
                prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
                if (atomic_read(&inode->i_dio_count))
                        schedule();
        } while (atomic_read(&inode->i_dio_count));
        finish_wait(wq, &q.wq_entry);
}

/**
 * inode_dio_wait - wait for outstanding DIO requests to finish
 * @inode: inode to wait for
 *
 * Waits for all pending direct I/O requests to finish so that we can
 * proceed with a truncate or equivalent operation.
 *
 * Must be called under a lock that serializes taking new references
 * to i_dio_count, usually by inode->i_mutex.
 */
void inode_dio_wait(struct inode *inode)
{
        if (atomic_read(&inode->i_dio_count))
                __inode_dio_wait(inode);
}
EXPORT_SYMBOL(inode_dio_wait);

/*
 * inode_set_flags - atomically set some inode flags
 *
 * Note: the caller should be holding i_mutex, or else be sure that
 * they have exclusive access to the inode structure (i.e., while the
 * inode is being instantiated).  The reason for the cmpxchg() loop
 * --- which wouldn't be necessary if all code paths which modify
 * i_flags actually followed this rule, is that there is at least one
 * code path which doesn't today so we use cmpxchg() out of an abundance
 * of caution.
 *
 * In the long run, i_mutex is overkill, and we should probably look
 * at using the i_lock spinlock to protect i_flags, and then make sure
 * it is so documented in include/linux/fs.h and that all code follows
 * the locking convention!!
 */
void inode_set_flags(struct inode *inode, unsigned int flags,
                     unsigned int mask)
{
        WARN_ON_ONCE(flags & ~mask);
        set_mask_bits(&inode->i_flags, mask, flags);
}
EXPORT_SYMBOL(inode_set_flags);

void inode_nohighmem(struct inode *inode)
{
        mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
}
EXPORT_SYMBOL(inode_nohighmem);

/**
 * timestamp_truncate - Truncate timespec to a granularity
 * @t: Timespec
 * @inode: inode being updated
 *
 * Truncate a timespec to the granularity supported by the fs
 * containing the inode. Always rounds down. gran must
 * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
 */
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
{
        struct super_block *sb = inode->i_sb;
        unsigned int gran = sb->s_time_gran;

        t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
        if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
                t.tv_nsec = 0;

        /* Avoid division in the common cases 1 ns and 1 s. */
        if (gran == 1)
                ; /* nothing */
        else if (gran == NSEC_PER_SEC)
                t.tv_nsec = 0;
        else if (gran > 1 && gran < NSEC_PER_SEC)
                t.tv_nsec -= t.tv_nsec % gran;
        else
                WARN(1, "invalid file time granularity: %u", gran);
        return t;
}
EXPORT_SYMBOL(timestamp_truncate);

/**
 * current_time - Return FS time
 * @inode: inode.
 *
 * Return the current time truncated to the time granularity supported by
 * the fs.
 *
 * Note that inode and inode->sb cannot be NULL.
 * Otherwise, the function warns and returns time without truncation.
 */
struct timespec64 current_time(struct inode *inode)
{
        struct timespec64 now;

        ktime_get_coarse_real_ts64(&now);
        return timestamp_truncate(now, inode);
}
EXPORT_SYMBOL(current_time);

/**
 * inode_set_ctime_current - set the ctime to current_time
 * @inode: inode
 *
 * Set the inode->i_ctime to the current value for the inode. Returns
 * the current value that was assigned to i_ctime.
 */
struct timespec64 inode_set_ctime_current(struct inode *inode)
{
        struct timespec64 now = current_time(inode);

        inode_set_ctime_to_ts(inode, now);
        return now;
}
EXPORT_SYMBOL(inode_set_ctime_current);

/**
 * in_group_or_capable - check whether caller is CAP_FSETID privileged
 * @idmap:        idmap of the mount @inode was found from
 * @inode:        inode to check
 * @vfsgid:        the new/current vfsgid of @inode
 *
 * Check wether @vfsgid is in the caller's group list or if the caller is
 * privileged with CAP_FSETID over @inode. This can be used to determine
 * whether the setgid bit can be kept or must be dropped.
 *
 * Return: true if the caller is sufficiently privileged, false if not.
 */
bool in_group_or_capable(struct mnt_idmap *idmap,
                         const struct inode *inode, vfsgid_t vfsgid)
{
        if (vfsgid_in_group_p(vfsgid))
                return true;
        if (capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
                return true;
        return false;
}

/**
 * mode_strip_sgid - handle the sgid bit for non-directories
 * @idmap: idmap of the mount the inode was created from
 * @dir: parent directory inode
 * @mode: mode of the file to be created in @dir
 *
 * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
 * raised and @dir has the S_ISGID bit raised ensure that the caller is
 * either in the group of the parent directory or they have CAP_FSETID
 * in their user namespace and are privileged over the parent directory.
 * In all other cases, strip the S_ISGID bit from @mode.
 *
 * Return: the new mode to use for the file
 */
umode_t mode_strip_sgid(struct mnt_idmap *idmap,
                        const struct inode *dir, umode_t mode)
{
        if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
                return mode;
        if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
                return mode;
        if (in_group_or_capable(idmap, dir, i_gid_into_vfsgid(idmap, dir)))
                return mode;
        return mode & ~S_ISGID;
}
EXPORT_SYMBOL(mode_strip_sgid);
































  170 





  170 













  170 



  162 












  170 







  168 
  168 










  168 





  167 




















  240 

  239 









  240 

  239 




  240 



  239 


















  240 
  240 
  240 
  240 


  240 














  170 

  170 
  170 





  234 


  234 
  234 




  240 



  240 













  170 


  170 






  170 



  170 
   18 
  170 
  170 


  170 




  170 














  168 















  241 
  240 

  241 









  241 








  240 







































  241 



  241 



  241 



  241 






  240 




















  170 







































  162 





  161 





  162 
  162 
  159 

  159 





















   18 





   18 
   18 




































































































































  234 



  233 


  234 
   15 

   15 


  234 














  230 



  226 


  230 











  269 
  269 



  269 

  269 





   54 

   55 


















  234 




  234 
   17 

   17 





   15 
  234 

  234 



  234 
  234 
  234 
























  234 











  258 
  256 



  258 






    1 
    1 














































































































































    8 

    8 
    8 

















   18 

   18 
   18 





   18 






    8 


    8 






































































































































  244 
  171 






  244 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
// SPDX-License-Identifier: GPL-2.0
/*
 * kobject.c - library routines for handling generic kernel objects
 *
 * Copyright (c) 2002-2003 Patrick Mochel <mochel@osdl.org>
 * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (c) 2006-2007 Novell Inc.
 *
 * Please see the file Documentation/core-api/kobject.rst for critical information
 * about using the kobject interface.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/export.h>
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/random.h>

/**
 * kobject_namespace() - Return @kobj's namespace tag.
 * @kobj: kobject in question
 *
 * Returns namespace tag of @kobj if its parent has namespace ops enabled
 * and thus @kobj should have a namespace tag associated with it.  Returns
 * %NULL otherwise.
 */
const void *kobject_namespace(const struct kobject *kobj)
{
        const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);

        if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE)
                return NULL;

        return kobj->ktype->namespace(kobj);
}

/**
 * kobject_get_ownership() - Get sysfs ownership data for @kobj.
 * @kobj: kobject in question
 * @uid: kernel user ID for sysfs objects
 * @gid: kernel group ID for sysfs objects
 *
 * Returns initial uid/gid pair that should be used when creating sysfs
 * representation of given kobject. Normally used to adjust ownership of
 * objects in a container.
 */
void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
{
        *uid = GLOBAL_ROOT_UID;
        *gid = GLOBAL_ROOT_GID;

        if (kobj->ktype->get_ownership)
                kobj->ktype->get_ownership(kobj, uid, gid);
}

static bool kobj_ns_type_is_valid(enum kobj_ns_type type)
{
        if ((type <= KOBJ_NS_TYPE_NONE) || (type >= KOBJ_NS_TYPES))
                return false;

        return true;
}

static int create_dir(struct kobject *kobj)
{
        const struct kobj_type *ktype = get_ktype(kobj);
        const struct kobj_ns_type_operations *ops;
        int error;

        error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
        if (error)
                return error;

        if (ktype) {
                error = sysfs_create_groups(kobj, ktype->default_groups);
                if (error) {
                        sysfs_remove_dir(kobj);
                        return error;
                }
        }

        /*
         * @kobj->sd may be deleted by an ancestor going away.  Hold an
         * extra reference so that it stays until @kobj is gone.
         */
        sysfs_get(kobj->sd);

        /*
         * If @kobj has ns_ops, its children need to be filtered based on
         * their namespace tags.  Enable namespace support on @kobj->sd.
         */
        ops = kobj_child_ns_ops(kobj);
        if (ops) {
                BUG_ON(!kobj_ns_type_is_valid(ops->type));
                BUG_ON(!kobj_ns_type_registered(ops->type));

                sysfs_enable_ns(kobj->sd);
        }

        return 0;
}

static int get_kobj_path_length(const struct kobject *kobj)
{
        int length = 1;
        const struct kobject *parent = kobj;

        /* walk up the ancestors until we hit the one pointing to the
         * root.
         * Add 1 to strlen for leading '/' of each level.
         */
        do {
                if (kobject_name(parent) == NULL)
                        return 0;
                length += strlen(kobject_name(parent)) + 1;
                parent = parent->parent;
        } while (parent);
        return length;
}

static int fill_kobj_path(const struct kobject *kobj, char *path, int length)
{
        const struct kobject *parent;

        --length;
        for (parent = kobj; parent; parent = parent->parent) {
                int cur = strlen(kobject_name(parent));
                /* back up enough to print this name with '/' */
                length -= cur;
                if (length <= 0)
                        return -EINVAL;
                memcpy(path + length, kobject_name(parent), cur);
                *(path + --length) = '/';
        }

        pr_debug("'%s' (%p): %s: path = '%s'\n", kobject_name(kobj),
                 kobj, __func__, path);

        return 0;
}

/**
 * kobject_get_path() - Allocate memory and fill in the path for @kobj.
 * @kobj:        kobject in question, with which to build the path
 * @gfp_mask:        the allocation type used to allocate the path
 *
 * Return: The newly allocated memory, caller must free with kfree().
 */
char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask)
{
        char *path;
        int len;

retry:
        len = get_kobj_path_length(kobj);
        if (len == 0)
                return NULL;
        path = kzalloc(len, gfp_mask);
        if (!path)
                return NULL;
        if (fill_kobj_path(kobj, path, len)) {
                kfree(path);
                goto retry;
        }

        return path;
}
EXPORT_SYMBOL_GPL(kobject_get_path);

/* add the kobject to its kset's list */
static void kobj_kset_join(struct kobject *kobj)
{
        if (!kobj->kset)
                return;

        kset_get(kobj->kset);
        spin_lock(&kobj->kset->list_lock);
        list_add_tail(&kobj->entry, &kobj->kset->list);
        spin_unlock(&kobj->kset->list_lock);
}

/* remove the kobject from its kset's list */
static void kobj_kset_leave(struct kobject *kobj)
{
        if (!kobj->kset)
                return;

        spin_lock(&kobj->kset->list_lock);
        list_del_init(&kobj->entry);
        spin_unlock(&kobj->kset->list_lock);
        kset_put(kobj->kset);
}

static void kobject_init_internal(struct kobject *kobj)
{
        if (!kobj)
                return;
        kref_init(&kobj->kref);
        INIT_LIST_HEAD(&kobj->entry);
        kobj->state_in_sysfs = 0;
        kobj->state_add_uevent_sent = 0;
        kobj->state_remove_uevent_sent = 0;
        kobj->state_initialized = 1;
}


static int kobject_add_internal(struct kobject *kobj)
{
        int error = 0;
        struct kobject *parent;

        if (!kobj)
                return -ENOENT;

        if (!kobj->name || !kobj->name[0]) {
                WARN(1,
                     "kobject: (%p): attempted to be registered with empty name!\n",
                     kobj);
                return -EINVAL;
        }

        parent = kobject_get(kobj->parent);

        /* join kset if set, use it as parent if we do not already have one */
        if (kobj->kset) {
                if (!parent)
                        parent = kobject_get(&kobj->kset->kobj);
                kobj_kset_join(kobj);
                kobj->parent = parent;
        }

        pr_debug("'%s' (%p): %s: parent: '%s', set: '%s'\n",
                 kobject_name(kobj), kobj, __func__,
                 parent ? kobject_name(parent) : "<NULL>",
                 kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>");

        error = create_dir(kobj);
        if (error) {
                kobj_kset_leave(kobj);
                kobject_put(parent);
                kobj->parent = NULL;

                /* be noisy on error issues */
                if (error == -EEXIST)
                        pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
                               __func__, kobject_name(kobj));
                else
                        pr_err("%s failed for %s (error: %d parent: %s)\n",
                               __func__, kobject_name(kobj), error,
                               parent ? kobject_name(parent) : "'none'");
        } else
                kobj->state_in_sysfs = 1;

        return error;
}

/**
 * kobject_set_name_vargs() - Set the name of a kobject.
 * @kobj: struct kobject to set the name of
 * @fmt: format string used to build the name
 * @vargs: vargs to format the string.
 */
int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
                                  va_list vargs)
{
        const char *s;

        if (kobj->name && !fmt)
                return 0;

        s = kvasprintf_const(GFP_KERNEL, fmt, vargs);
        if (!s)
                return -ENOMEM;

        /*
         * ewww... some of these buggers have '/' in the name ... If
         * that's the case, we need to make sure we have an actual
         * allocated copy to modify, since kvasprintf_const may have
         * returned something from .rodata.
         */
        if (strchr(s, '/')) {
                char *t;

                t = kstrdup(s, GFP_KERNEL);
                kfree_const(s);
                if (!t)
                        return -ENOMEM;
                s = strreplace(t, '/', '!');
        }
        kfree_const(kobj->name);
        kobj->name = s;

        return 0;
}

/**
 * kobject_set_name() - Set the name of a kobject.
 * @kobj: struct kobject to set the name of
 * @fmt: format string used to build the name
 *
 * This sets the name of the kobject.  If you have already added the
 * kobject to the system, you must call kobject_rename() in order to
 * change the name of the kobject.
 */
int kobject_set_name(struct kobject *kobj, const char *fmt, ...)
{
        va_list vargs;
        int retval;

        va_start(vargs, fmt);
        retval = kobject_set_name_vargs(kobj, fmt, vargs);
        va_end(vargs);

        return retval;
}
EXPORT_SYMBOL(kobject_set_name);

/**
 * kobject_init() - Initialize a kobject structure.
 * @kobj: pointer to the kobject to initialize
 * @ktype: pointer to the ktype for this kobject.
 *
 * This function will properly initialize a kobject such that it can then
 * be passed to the kobject_add() call.
 *
 * After this function is called, the kobject MUST be cleaned up by a call
 * to kobject_put(), not by a call to kfree directly to ensure that all of
 * the memory is cleaned up properly.
 */
void kobject_init(struct kobject *kobj, const struct kobj_type *ktype)
{
        char *err_str;

        if (!kobj) {
                err_str = "invalid kobject pointer!";
                goto error;
        }
        if (!ktype) {
                err_str = "must have a ktype to be initialized properly!\n";
                goto error;
        }
        if (kobj->state_initialized) {
                /* do not error out as sometimes we can recover */
                pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n",
                       kobj);
                dump_stack_lvl(KERN_ERR);
        }

        kobject_init_internal(kobj);
        kobj->ktype = ktype;
        return;

error:
        pr_err("kobject (%p): %s\n", kobj, err_str);
        dump_stack_lvl(KERN_ERR);
}
EXPORT_SYMBOL(kobject_init);

static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
                                           struct kobject *parent,
                                           const char *fmt, va_list vargs)
{
        int retval;

        retval = kobject_set_name_vargs(kobj, fmt, vargs);
        if (retval) {
                pr_err("can not set name properly!\n");
                return retval;
        }
        kobj->parent = parent;
        return kobject_add_internal(kobj);
}

/**
 * kobject_add() - The main kobject add function.
 * @kobj: the kobject to add
 * @parent: pointer to the parent of the kobject.
 * @fmt: format to name the kobject with.
 *
 * The kobject name is set and added to the kobject hierarchy in this
 * function.
 *
 * If @parent is set, then the parent of the @kobj will be set to it.
 * If @parent is NULL, then the parent of the @kobj will be set to the
 * kobject associated with the kset assigned to this kobject.  If no kset
 * is assigned to the kobject, then the kobject will be located in the
 * root of the sysfs tree.
 *
 * Note, no "add" uevent will be created with this call, the caller should set
 * up all of the necessary sysfs files for the object and then call
 * kobject_uevent() with the UEVENT_ADD parameter to ensure that
 * userspace is properly notified of this kobject's creation.
 *
 * Return: If this function returns an error, kobject_put() must be
 *         called to properly clean up the memory associated with the
 *         object.  Under no instance should the kobject that is passed
 *         to this function be directly freed with a call to kfree(),
 *         that can leak memory.
 *
 *         If this function returns success, kobject_put() must also be called
 *         in order to properly clean up the memory associated with the object.
 *
 *         In short, once this function is called, kobject_put() MUST be called
 *         when the use of the object is finished in order to properly free
 *         everything.
 */
int kobject_add(struct kobject *kobj, struct kobject *parent,
                const char *fmt, ...)
{
        va_list args;
        int retval;

        if (!kobj)
                return -EINVAL;

        if (!kobj->state_initialized) {
                pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n",
                       kobject_name(kobj), kobj);
                dump_stack_lvl(KERN_ERR);
                return -EINVAL;
        }
        va_start(args, fmt);
        retval = kobject_add_varg(kobj, parent, fmt, args);
        va_end(args);

        return retval;
}
EXPORT_SYMBOL(kobject_add);

/**
 * kobject_init_and_add() - Initialize a kobject structure and add it to
 *                          the kobject hierarchy.
 * @kobj: pointer to the kobject to initialize
 * @ktype: pointer to the ktype for this kobject.
 * @parent: pointer to the parent of this kobject.
 * @fmt: the name of the kobject.
 *
 * This function combines the call to kobject_init() and kobject_add().
 *
 * If this function returns an error, kobject_put() must be called to
 * properly clean up the memory associated with the object.  This is the
 * same type of error handling after a call to kobject_add() and kobject
 * lifetime rules are the same here.
 */
int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype,
                         struct kobject *parent, const char *fmt, ...)
{
        va_list args;
        int retval;

        kobject_init(kobj, ktype);

        va_start(args, fmt);
        retval = kobject_add_varg(kobj, parent, fmt, args);
        va_end(args);

        return retval;
}
EXPORT_SYMBOL_GPL(kobject_init_and_add);

/**
 * kobject_rename() - Change the name of an object.
 * @kobj: object in question.
 * @new_name: object's new name
 *
 * It is the responsibility of the caller to provide mutual
 * exclusion between two different calls of kobject_rename
 * on the same kobject and to ensure that new_name is valid and
 * won't conflict with other kobjects.
 */
int kobject_rename(struct kobject *kobj, const char *new_name)
{
        int error = 0;
        const char *devpath = NULL;
        const char *dup_name = NULL, *name;
        char *devpath_string = NULL;
        char *envp[2];

        kobj = kobject_get(kobj);
        if (!kobj)
                return -EINVAL;
        if (!kobj->parent) {
                kobject_put(kobj);
                return -EINVAL;
        }

        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
                error = -ENOMEM;
                goto out;
        }
        devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
        if (!devpath_string) {
                error = -ENOMEM;
                goto out;
        }
        sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
        envp[0] = devpath_string;
        envp[1] = NULL;

        name = dup_name = kstrdup_const(new_name, GFP_KERNEL);
        if (!name) {
                error = -ENOMEM;
                goto out;
        }

        error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj));
        if (error)
                goto out;

        /* Install the new kobject name */
        dup_name = kobj->name;
        kobj->name = name;

        /* This function is mostly/only used for network interface.
         * Some hotplug package track interfaces by their name and
         * therefore want to know when the name is changed by the user. */
        kobject_uevent_env(kobj, KOBJ_MOVE, envp);

out:
        kfree_const(dup_name);
        kfree(devpath_string);
        kfree(devpath);
        kobject_put(kobj);

        return error;
}
EXPORT_SYMBOL_GPL(kobject_rename);

/**
 * kobject_move() - Move object to another parent.
 * @kobj: object in question.
 * @new_parent: object's new parent (can be NULL)
 */
int kobject_move(struct kobject *kobj, struct kobject *new_parent)
{
        int error;
        struct kobject *old_parent;
        const char *devpath = NULL;
        char *devpath_string = NULL;
        char *envp[2];

        kobj = kobject_get(kobj);
        if (!kobj)
                return -EINVAL;
        new_parent = kobject_get(new_parent);
        if (!new_parent) {
                if (kobj->kset)
                        new_parent = kobject_get(&kobj->kset->kobj);
        }

        /* old object path */
        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
                error = -ENOMEM;
                goto out;
        }
        devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
        if (!devpath_string) {
                error = -ENOMEM;
                goto out;
        }
        sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
        envp[0] = devpath_string;
        envp[1] = NULL;
        error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj));
        if (error)
                goto out;
        old_parent = kobj->parent;
        kobj->parent = new_parent;
        new_parent = NULL;
        kobject_put(old_parent);
        kobject_uevent_env(kobj, KOBJ_MOVE, envp);
out:
        kobject_put(new_parent);
        kobject_put(kobj);
        kfree(devpath_string);
        kfree(devpath);
        return error;
}
EXPORT_SYMBOL_GPL(kobject_move);

static void __kobject_del(struct kobject *kobj)
{
        struct kernfs_node *sd;
        const struct kobj_type *ktype;

        sd = kobj->sd;
        ktype = get_ktype(kobj);

        if (ktype)
                sysfs_remove_groups(kobj, ktype->default_groups);

        /* send "remove" if the caller did not do it but sent "add" */
        if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
                pr_debug("'%s' (%p): auto cleanup 'remove' event\n",
                         kobject_name(kobj), kobj);
                kobject_uevent(kobj, KOBJ_REMOVE);
        }

        sysfs_remove_dir(kobj);
        sysfs_put(sd);

        kobj->state_in_sysfs = 0;
        kobj_kset_leave(kobj);
        kobj->parent = NULL;
}

/**
 * kobject_del() - Unlink kobject from hierarchy.
 * @kobj: object.
 *
 * This is the function that should be called to delete an object
 * successfully added via kobject_add().
 */
void kobject_del(struct kobject *kobj)
{
        struct kobject *parent;

        if (!kobj)
                return;

        parent = kobj->parent;
        __kobject_del(kobj);
        kobject_put(parent);
}
EXPORT_SYMBOL(kobject_del);

/**
 * kobject_get() - Increment refcount for object.
 * @kobj: object.
 */
struct kobject *kobject_get(struct kobject *kobj)
{
        if (kobj) {
                if (!kobj->state_initialized)
                        WARN(1, KERN_WARNING
                                "kobject: '%s' (%p): is not initialized, yet kobject_get() is being called.\n",
                             kobject_name(kobj), kobj);
                kref_get(&kobj->kref);
        }
        return kobj;
}
EXPORT_SYMBOL(kobject_get);

struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj)
{
        if (!kobj)
                return NULL;
        if (!kref_get_unless_zero(&kobj->kref))
                kobj = NULL;
        return kobj;
}
EXPORT_SYMBOL(kobject_get_unless_zero);

/*
 * kobject_cleanup - free kobject resources.
 * @kobj: object to cleanup
 */
static void kobject_cleanup(struct kobject *kobj)
{
        struct kobject *parent = kobj->parent;
        const struct kobj_type *t = get_ktype(kobj);
        const char *name = kobj->name;

        pr_debug("'%s' (%p): %s, parent %p\n",
                 kobject_name(kobj), kobj, __func__, kobj->parent);

        if (t && !t->release)
                pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
                         kobject_name(kobj), kobj);

        /* remove from sysfs if the caller did not do it */
        if (kobj->state_in_sysfs) {
                pr_debug("'%s' (%p): auto cleanup kobject_del\n",
                         kobject_name(kobj), kobj);
                __kobject_del(kobj);
        } else {
                /* avoid dropping the parent reference unnecessarily */
                parent = NULL;
        }

        if (t && t->release) {
                pr_debug("'%s' (%p): calling ktype release\n",
                         kobject_name(kobj), kobj);
                t->release(kobj);
        }

        /* free name if we allocated it */
        if (name) {
                pr_debug("'%s': free name\n", name);
                kfree_const(name);
        }

        kobject_put(parent);
}

#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
static void kobject_delayed_cleanup(struct work_struct *work)
{
        kobject_cleanup(container_of(to_delayed_work(work),
                                     struct kobject, release));
}
#endif

static void kobject_release(struct kref *kref)
{
        struct kobject *kobj = container_of(kref, struct kobject, kref);
#ifdef CONFIG_DEBUG_KOBJECT_RELEASE
        unsigned long delay = HZ + HZ * get_random_u32_below(4);
        pr_info("'%s' (%p): %s, parent %p (delayed %ld)\n",
                kobject_name(kobj), kobj, __func__, kobj->parent, delay);
        INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);

        schedule_delayed_work(&kobj->release, delay);
#else
        kobject_cleanup(kobj);
#endif
}

/**
 * kobject_put() - Decrement refcount for object.
 * @kobj: object.
 *
 * Decrement the refcount, and if 0, call kobject_cleanup().
 */
void kobject_put(struct kobject *kobj)
{
        if (kobj) {
                if (!kobj->state_initialized)
                        WARN(1, KERN_WARNING
                                "kobject: '%s' (%p): is not initialized, yet kobject_put() is being called.\n",
                             kobject_name(kobj), kobj);
                kref_put(&kobj->kref, kobject_release);
        }
}
EXPORT_SYMBOL(kobject_put);

static void dynamic_kobj_release(struct kobject *kobj)
{
        pr_debug("(%p): %s\n", kobj, __func__);
        kfree(kobj);
}

static const struct kobj_type dynamic_kobj_ktype = {
        .release        = dynamic_kobj_release,
        .sysfs_ops        = &kobj_sysfs_ops,
};

/**
 * kobject_create() - Create a struct kobject dynamically.
 *
 * This function creates a kobject structure dynamically and sets it up
 * to be a "dynamic" kobject with a default release function set up.
 *
 * If the kobject was not able to be created, NULL will be returned.
 * The kobject structure returned from here must be cleaned up with a
 * call to kobject_put() and not kfree(), as kobject_init() has
 * already been called on this structure.
 */
static struct kobject *kobject_create(void)
{
        struct kobject *kobj;

        kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
        if (!kobj)
                return NULL;

        kobject_init(kobj, &dynamic_kobj_ktype);
        return kobj;
}

/**
 * kobject_create_and_add() - Create a struct kobject dynamically and
 *                            register it with sysfs.
 * @name: the name for the kobject
 * @parent: the parent kobject of this kobject, if any.
 *
 * This function creates a kobject structure dynamically and registers it
 * with sysfs.  When you are finished with this structure, call
 * kobject_put() and the structure will be dynamically freed when
 * it is no longer being used.
 *
 * If the kobject was not able to be created, NULL will be returned.
 */
struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
{
        struct kobject *kobj;
        int retval;

        kobj = kobject_create();
        if (!kobj)
                return NULL;

        retval = kobject_add(kobj, parent, "%s", name);
        if (retval) {
                pr_warn("%s: kobject_add error: %d\n", __func__, retval);
                kobject_put(kobj);
                kobj = NULL;
        }
        return kobj;
}
EXPORT_SYMBOL_GPL(kobject_create_and_add);

/**
 * kset_init() - Initialize a kset for use.
 * @k: kset
 */
void kset_init(struct kset *k)
{
        kobject_init_internal(&k->kobj);
        INIT_LIST_HEAD(&k->list);
        spin_lock_init(&k->list_lock);
}

/* default kobject attribute operations */
static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr,
                              char *buf)
{
        struct kobj_attribute *kattr;
        ssize_t ret = -EIO;

        kattr = container_of(attr, struct kobj_attribute, attr);
        if (kattr->show)
                ret = kattr->show(kobj, kattr, buf);
        return ret;
}

static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
                               const char *buf, size_t count)
{
        struct kobj_attribute *kattr;
        ssize_t ret = -EIO;

        kattr = container_of(attr, struct kobj_attribute, attr);
        if (kattr->store)
                ret = kattr->store(kobj, kattr, buf, count);
        return ret;
}

const struct sysfs_ops kobj_sysfs_ops = {
        .show        = kobj_attr_show,
        .store        = kobj_attr_store,
};
EXPORT_SYMBOL_GPL(kobj_sysfs_ops);

/**
 * kset_register() - Initialize and add a kset.
 * @k: kset.
 *
 * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name()
 * is freed, it can not be used any more.
 */
int kset_register(struct kset *k)
{
        int err;

        if (!k)
                return -EINVAL;

        if (!k->kobj.ktype) {
                pr_err("must have a ktype to be initialized properly!\n");
                return -EINVAL;
        }

        kset_init(k);
        err = kobject_add_internal(&k->kobj);
        if (err) {
                kfree_const(k->kobj.name);
                /* Set it to NULL to avoid accessing bad pointer in callers. */
                k->kobj.name = NULL;
                return err;
        }
        kobject_uevent(&k->kobj, KOBJ_ADD);
        return 0;
}
EXPORT_SYMBOL(kset_register);

/**
 * kset_unregister() - Remove a kset.
 * @k: kset.
 */
void kset_unregister(struct kset *k)
{
        if (!k)
                return;
        kobject_del(&k->kobj);
        kobject_put(&k->kobj);
}
EXPORT_SYMBOL(kset_unregister);

/**
 * kset_find_obj() - Search for object in kset.
 * @kset: kset we're looking in.
 * @name: object's name.
 *
 * Lock kset via @kset->subsys, and iterate over @kset->list,
 * looking for a matching kobject. If matching object is found
 * take a reference and return the object.
 */
struct kobject *kset_find_obj(struct kset *kset, const char *name)
{
        struct kobject *k;
        struct kobject *ret = NULL;

        spin_lock(&kset->list_lock);

        list_for_each_entry(k, &kset->list, entry) {
                if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
                        ret = kobject_get_unless_zero(k);
                        break;
                }
        }

        spin_unlock(&kset->list_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(kset_find_obj);

static void kset_release(struct kobject *kobj)
{
        struct kset *kset = container_of(kobj, struct kset, kobj);
        pr_debug("'%s' (%p): %s\n",
                 kobject_name(kobj), kobj, __func__);
        kfree(kset);
}

static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
{
        if (kobj->parent)
                kobject_get_ownership(kobj->parent, uid, gid);
}

static const struct kobj_type kset_ktype = {
        .sysfs_ops        = &kobj_sysfs_ops,
        .release        = kset_release,
        .get_ownership        = kset_get_ownership,
};

/**
 * kset_create() - Create a struct kset dynamically.
 *
 * @name: the name for the kset
 * @uevent_ops: a struct kset_uevent_ops for the kset
 * @parent_kobj: the parent kobject of this kset, if any.
 *
 * This function creates a kset structure dynamically.  This structure can
 * then be registered with the system and show up in sysfs with a call to
 * kset_register().  When you are finished with this structure, if
 * kset_register() has been called, call kset_unregister() and the
 * structure will be dynamically freed when it is no longer being used.
 *
 * If the kset was not able to be created, NULL will be returned.
 */
static struct kset *kset_create(const char *name,
                                const struct kset_uevent_ops *uevent_ops,
                                struct kobject *parent_kobj)
{
        struct kset *kset;
        int retval;

        kset = kzalloc(sizeof(*kset), GFP_KERNEL);
        if (!kset)
                return NULL;
        retval = kobject_set_name(&kset->kobj, "%s", name);
        if (retval) {
                kfree(kset);
                return NULL;
        }
        kset->uevent_ops = uevent_ops;
        kset->kobj.parent = parent_kobj;

        /*
         * The kobject of this kset will have a type of kset_ktype and belong to
         * no kset itself.  That way we can properly free it when it is
         * finished being used.
         */
        kset->kobj.ktype = &kset_ktype;
        kset->kobj.kset = NULL;

        return kset;
}

/**
 * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs.
 *
 * @name: the name for the kset
 * @uevent_ops: a struct kset_uevent_ops for the kset
 * @parent_kobj: the parent kobject of this kset, if any.
 *
 * This function creates a kset structure dynamically and registers it
 * with sysfs.  When you are finished with this structure, call
 * kset_unregister() and the structure will be dynamically freed when it
 * is no longer being used.
 *
 * If the kset was not able to be created, NULL will be returned.
 */
struct kset *kset_create_and_add(const char *name,
                                 const struct kset_uevent_ops *uevent_ops,
                                 struct kobject *parent_kobj)
{
        struct kset *kset;
        int error;

        kset = kset_create(name, uevent_ops, parent_kobj);
        if (!kset)
                return NULL;
        error = kset_register(kset);
        if (error) {
                kfree(kset);
                return NULL;
        }
        return kset;
}
EXPORT_SYMBOL_GPL(kset_create_and_add);


static DEFINE_SPINLOCK(kobj_ns_type_lock);
static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];

int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
{
        enum kobj_ns_type type = ops->type;
        int error;

        spin_lock(&kobj_ns_type_lock);

        error = -EINVAL;
        if (!kobj_ns_type_is_valid(type))
                goto out;

        error = -EBUSY;
        if (kobj_ns_ops_tbl[type])
                goto out;

        error = 0;
        kobj_ns_ops_tbl[type] = ops;

out:
        spin_unlock(&kobj_ns_type_lock);
        return error;
}

int kobj_ns_type_registered(enum kobj_ns_type type)
{
        int registered = 0;

        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type))
                registered = kobj_ns_ops_tbl[type] != NULL;
        spin_unlock(&kobj_ns_type_lock);

        return registered;
}

const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent)
{
        const struct kobj_ns_type_operations *ops = NULL;

        if (parent && parent->ktype && parent->ktype->child_ns_type)
                ops = parent->ktype->child_ns_type(parent);

        return ops;
}

const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj)
{
        return kobj_child_ns_ops(kobj->parent);
}

bool kobj_ns_current_may_mount(enum kobj_ns_type type)
{
        bool may_mount = true;

        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
                may_mount = kobj_ns_ops_tbl[type]->current_may_mount();
        spin_unlock(&kobj_ns_type_lock);

        return may_mount;
}

void *kobj_ns_grab_current(enum kobj_ns_type type)
{
        void *ns = NULL;

        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
                ns = kobj_ns_ops_tbl[type]->grab_current_ns();
        spin_unlock(&kobj_ns_type_lock);

        return ns;
}
EXPORT_SYMBOL_GPL(kobj_ns_grab_current);

const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
{
        const void *ns = NULL;

        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
                ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
        spin_unlock(&kobj_ns_type_lock);

        return ns;
}

const void *kobj_ns_initial(enum kobj_ns_type type)
{
        const void *ns = NULL;

        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type])
                ns = kobj_ns_ops_tbl[type]->initial_ns();
        spin_unlock(&kobj_ns_type_lock);

        return ns;
}

void kobj_ns_drop(enum kobj_ns_type type, void *ns)
{
        spin_lock(&kobj_ns_type_lock);
        if (kobj_ns_type_is_valid(type) &&
            kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns)
                kobj_ns_ops_tbl[type]->drop_ns(ns);
        spin_unlock(&kobj_ns_type_lock);
}
EXPORT_SYMBOL_GPL(kobj_ns_drop);














































































































































































































































    4 










    4 



    4 

    4 
    4 
    4 












    4 








































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Common interrupt code for 32 and 64 bit
 */
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/of.h>
#include <linux/seq_file.h>
#include <linux/smp.h>
#include <linux/ftrace.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/irq.h>

#include <asm/irq_stack.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/thermal.h>

#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>

DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);

atomic_t irq_err_count;

/*
 * 'what should we do if we get a hw irq event on an illegal vector'.
 * each architecture has to answer this themselves.
 */
void ack_bad_irq(unsigned int irq)
{
        if (printk_ratelimit())
                pr_err("unexpected IRQ trap at vector %02x\n", irq);

        /*
         * Currently unexpected vectors happen only on SMP and APIC.
         * We _must_ ack these because every local APIC has only N
         * irq slots per priority level, and a 'hanging, unacked' IRQ
         * holds up an irq slot - in excessive cases (when multiple
         * unexpected vectors occur) that might lock up the APIC
         * completely.
         * But only ack when the APIC is enabled -AK
         */
        apic_eoi();
}

#define irq_stats(x)                (&per_cpu(irq_stat, x))
/*
 * /proc/interrupts printing for arch specific interrupts
 */
int arch_show_interrupts(struct seq_file *p, int prec)
{
        int j;

        seq_printf(p, "%*s: ", prec, "NMI");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
        seq_puts(p, "  Non-maskable interrupts\n");
#ifdef CONFIG_X86_LOCAL_APIC
        seq_printf(p, "%*s: ", prec, "LOC");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
        seq_puts(p, "  Local timer interrupts\n");

        seq_printf(p, "%*s: ", prec, "SPU");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
        seq_puts(p, "  Spurious interrupts\n");
        seq_printf(p, "%*s: ", prec, "PMI");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
        seq_puts(p, "  Performance monitoring interrupts\n");
        seq_printf(p, "%*s: ", prec, "IWI");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
        seq_puts(p, "  IRQ work interrupts\n");
        seq_printf(p, "%*s: ", prec, "RTR");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
        seq_puts(p, "  APIC ICR read retries\n");
        if (x86_platform_ipi_callback) {
                seq_printf(p, "%*s: ", prec, "PLT");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
                seq_puts(p, "  Platform interrupts\n");
        }
#endif
#ifdef CONFIG_SMP
        seq_printf(p, "%*s: ", prec, "RES");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
        seq_puts(p, "  Rescheduling interrupts\n");
        seq_printf(p, "%*s: ", prec, "CAL");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
        seq_puts(p, "  Function call interrupts\n");
        seq_printf(p, "%*s: ", prec, "TLB");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
        seq_puts(p, "  TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
        seq_printf(p, "%*s: ", prec, "TRM");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
        seq_puts(p, "  Thermal event interrupts\n");
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
        seq_printf(p, "%*s: ", prec, "THR");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
        seq_puts(p, "  Threshold APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE_AMD
        seq_printf(p, "%*s: ", prec, "DFR");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
        seq_puts(p, "  Deferred Error APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE
        seq_printf(p, "%*s: ", prec, "MCE");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
        seq_puts(p, "  Machine check exceptions\n");
        seq_printf(p, "%*s: ", prec, "MCP");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
        seq_puts(p, "  Machine check polls\n");
#endif
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
        if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
                seq_printf(p, "%*s: ", prec, "HYP");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
                                   irq_stats(j)->irq_hv_callback_count);
                seq_puts(p, "  Hypervisor callback interrupts\n");
        }
#endif
#if IS_ENABLED(CONFIG_HYPERV)
        if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
                seq_printf(p, "%*s: ", prec, "HRE");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
                                   irq_stats(j)->irq_hv_reenlightenment_count);
                seq_puts(p, "  Hyper-V reenlightenment interrupts\n");
        }
        if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
                seq_printf(p, "%*s: ", prec, "HVS");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
                                   irq_stats(j)->hyperv_stimer0_count);
                seq_puts(p, "  Hyper-V stimer0 interrupts\n");
        }
#endif
        seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
        seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
#if IS_ENABLED(CONFIG_KVM)
        seq_printf(p, "%*s: ", prec, "PIN");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
        seq_puts(p, "  Posted-interrupt notification event\n");

        seq_printf(p, "%*s: ", prec, "NPI");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ",
                           irq_stats(j)->kvm_posted_intr_nested_ipis);
        seq_puts(p, "  Nested posted-interrupt event\n");

        seq_printf(p, "%*s: ", prec, "PIW");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ",
                           irq_stats(j)->kvm_posted_intr_wakeup_ipis);
        seq_puts(p, "  Posted-interrupt wakeup event\n");
#endif
        return 0;
}

/*
 * /proc/stat helpers
 */
u64 arch_irq_stat_cpu(unsigned int cpu)
{
        u64 sum = irq_stats(cpu)->__nmi_count;

#ifdef CONFIG_X86_LOCAL_APIC
        sum += irq_stats(cpu)->apic_timer_irqs;
        sum += irq_stats(cpu)->irq_spurious_count;
        sum += irq_stats(cpu)->apic_perf_irqs;
        sum += irq_stats(cpu)->apic_irq_work_irqs;
        sum += irq_stats(cpu)->icr_read_retry_count;
        if (x86_platform_ipi_callback)
                sum += irq_stats(cpu)->x86_platform_ipis;
#endif
#ifdef CONFIG_SMP
        sum += irq_stats(cpu)->irq_resched_count;
        sum += irq_stats(cpu)->irq_call_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
        sum += irq_stats(cpu)->irq_thermal_count;
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
        sum += irq_stats(cpu)->irq_threshold_count;
#endif
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
        sum += irq_stats(cpu)->irq_hv_callback_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV)
        sum += irq_stats(cpu)->irq_hv_reenlightenment_count;
        sum += irq_stats(cpu)->hyperv_stimer0_count;
#endif
#ifdef CONFIG_X86_MCE
        sum += per_cpu(mce_exception_count, cpu);
        sum += per_cpu(mce_poll_count, cpu);
#endif
        return sum;
}

u64 arch_irq_stat(void)
{
        u64 sum = atomic_read(&irq_err_count);
        return sum;
}

static __always_inline void handle_irq(struct irq_desc *desc,
                                       struct pt_regs *regs)
{
        if (IS_ENABLED(CONFIG_X86_64))
                generic_handle_irq_desc(desc);
        else
                __handle_irq(desc, regs);
}

/*
 * common_interrupt() handles all normal device IRQ's (the special SMP
 * cross-CPU interrupts have their own entry points).
 */
DEFINE_IDTENTRY_IRQ(common_interrupt)
{
        struct pt_regs *old_regs = set_irq_regs(regs);
        struct irq_desc *desc;

        /* entry code tells RCU that we're not quiescent.  Check it. */
        RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");

        desc = __this_cpu_read(vector_irq[vector]);
        if (likely(!IS_ERR_OR_NULL(desc))) {
                handle_irq(desc, regs);
        } else {
                apic_eoi();

                if (desc == VECTOR_UNUSED) {
                        pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
                                             __func__, smp_processor_id(),
                                             vector);
                } else {
                        __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
                }
        }

        set_irq_regs(old_regs);
}

#ifdef CONFIG_X86_LOCAL_APIC
/* Function pointer for generic interrupt vector handling */
void (*x86_platform_ipi_callback)(void) = NULL;
/*
 * Handler for X86_PLATFORM_IPI_VECTOR.
 */
DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
{
        struct pt_regs *old_regs = set_irq_regs(regs);

        apic_eoi();
        trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
        inc_irq_stat(x86_platform_ipis);
        if (x86_platform_ipi_callback)
                x86_platform_ipi_callback();
        trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
        set_irq_regs(old_regs);
}
#endif

#if IS_ENABLED(CONFIG_KVM)
static void dummy_handler(void) {}
static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;

void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
{
        if (handler)
                kvm_posted_intr_wakeup_handler = handler;
        else {
                kvm_posted_intr_wakeup_handler = dummy_handler;
                synchronize_rcu();
        }
}
EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);

/*
 * Handler for POSTED_INTERRUPT_VECTOR.
 */
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi)
{
        apic_eoi();
        inc_irq_stat(kvm_posted_intr_ipis);
}

/*
 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
 */
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
{
        apic_eoi();
        inc_irq_stat(kvm_posted_intr_wakeup_ipis);
        kvm_posted_intr_wakeup_handler();
}

/*
 * Handler for POSTED_INTERRUPT_NESTED_VECTOR.
 */
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
{
        apic_eoi();
        inc_irq_stat(kvm_posted_intr_nested_ipis);
}
#endif


#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
void fixup_irqs(void)
{
        unsigned int irr, vector;
        struct irq_desc *desc;
        struct irq_data *data;
        struct irq_chip *chip;

        irq_migrate_all_off_this_cpu();

        /*
         * We can remove mdelay() and then send spurious interrupts to
         * new cpu targets for all the irqs that were handled previously by
         * this cpu. While it works, I have seen spurious interrupt messages
         * (nothing wrong but still...).
         *
         * So for now, retain mdelay(1) and check the IRR and then send those
         * interrupts to new targets as this cpu is already offlined...
         */
        mdelay(1);

        /*
         * We can walk the vector array of this cpu without holding
         * vector_lock because the cpu is already marked !online, so
         * nothing else will touch it.
         */
        for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
                if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
                        continue;

                irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
                if (irr  & (1 << (vector % 32))) {
                        desc = __this_cpu_read(vector_irq[vector]);

                        raw_spin_lock(&desc->lock);
                        data = irq_desc_get_irq_data(desc);
                        chip = irq_data_get_irq_chip(data);
                        if (chip->irq_retrigger) {
                                chip->irq_retrigger(data);
                                __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
                        }
                        raw_spin_unlock(&desc->lock);
                }
                if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
                        __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
        }
}
#endif

#ifdef CONFIG_X86_THERMAL_VECTOR
static void smp_thermal_vector(void)
{
        if (x86_thermal_enabled())
                intel_thermal_interrupt();
        else
                pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
                       smp_processor_id());
}

DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
{
        trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
        inc_irq_stat(irq_thermal_count);
        smp_thermal_vector();
        trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
        apic_eoi();
}
#endif




















































































































    2 
    2 

































































































































































































































































































































































    2 










    2 




























































































































































































































    2 





    2 



    2 









    2 









































































































































































    2 





    2 



    2 







    2 




























































































































































































































































































































    2 




    2 




    2 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Driver for the Conexant CX2584x Audio/Video decoder chip and related cores
 *
 *  Integrated Consumer Infrared Controller
 *
 *  Copyright (C) 2010  Andy Walls <awalls@md.metrocast.net>
 */

#include <linux/slab.h>
#include <linux/kfifo.h>
#include <linux/module.h>
#include <media/drv-intf/cx25840.h>
#include <media/rc-core.h>

#include "cx25840-core.h"

static unsigned int ir_debug;
module_param(ir_debug, int, 0644);
MODULE_PARM_DESC(ir_debug, "enable integrated IR debug messages");

#define CX25840_IR_REG_BASE        0x200

#define CX25840_IR_CNTRL_REG        0x200
#define CNTRL_WIN_3_3        0x00000000
#define CNTRL_WIN_4_3        0x00000001
#define CNTRL_WIN_3_4        0x00000002
#define CNTRL_WIN_4_4        0x00000003
#define CNTRL_WIN        0x00000003
#define CNTRL_EDG_NONE        0x00000000
#define CNTRL_EDG_FALL        0x00000004
#define CNTRL_EDG_RISE        0x00000008
#define CNTRL_EDG_BOTH        0x0000000C
#define CNTRL_EDG        0x0000000C
#define CNTRL_DMD        0x00000010
#define CNTRL_MOD        0x00000020
#define CNTRL_RFE        0x00000040
#define CNTRL_TFE        0x00000080
#define CNTRL_RXE        0x00000100
#define CNTRL_TXE        0x00000200
#define CNTRL_RIC        0x00000400
#define CNTRL_TIC        0x00000800
#define CNTRL_CPL        0x00001000
#define CNTRL_LBM        0x00002000
#define CNTRL_R                0x00004000

#define CX25840_IR_TXCLK_REG        0x204
#define TXCLK_TCD        0x0000FFFF

#define CX25840_IR_RXCLK_REG        0x208
#define RXCLK_RCD        0x0000FFFF

#define CX25840_IR_CDUTY_REG        0x20C
#define CDUTY_CDC        0x0000000F

#define CX25840_IR_STATS_REG        0x210
#define STATS_RTO        0x00000001
#define STATS_ROR        0x00000002
#define STATS_RBY        0x00000004
#define STATS_TBY        0x00000008
#define STATS_RSR        0x00000010
#define STATS_TSR        0x00000020

#define CX25840_IR_IRQEN_REG        0x214
#define IRQEN_RTE        0x00000001
#define IRQEN_ROE        0x00000002
#define IRQEN_RSE        0x00000010
#define IRQEN_TSE        0x00000020
#define IRQEN_MSK        0x00000033

#define CX25840_IR_FILTR_REG        0x218
#define FILTR_LPF        0x0000FFFF

#define CX25840_IR_FIFO_REG        0x23C
#define FIFO_RXTX        0x0000FFFF
#define FIFO_RXTX_LVL        0x00010000
#define FIFO_RXTX_RTO        0x0001FFFF
#define FIFO_RX_NDV        0x00020000
#define FIFO_RX_DEPTH        8
#define FIFO_TX_DEPTH        8

#define CX25840_VIDCLK_FREQ        108000000 /* 108 MHz, BT.656 */
#define CX25840_IR_REFCLK_FREQ        (CX25840_VIDCLK_FREQ / 2)

/*
 * We use this union internally for convenience, but callers to tx_write
 * and rx_read will be expecting records of type struct ir_raw_event.
 * Always ensure the size of this union is dictated by struct ir_raw_event.
 */
union cx25840_ir_fifo_rec {
        u32 hw_fifo_data;
        struct ir_raw_event ir_core_data;
};

#define CX25840_IR_RX_KFIFO_SIZE    (256 * sizeof(union cx25840_ir_fifo_rec))
#define CX25840_IR_TX_KFIFO_SIZE    (256 * sizeof(union cx25840_ir_fifo_rec))

struct cx25840_ir_state {
        struct i2c_client *c;

        struct v4l2_subdev_ir_parameters rx_params;
        struct mutex rx_params_lock; /* protects Rx parameter settings cache */
        atomic_t rxclk_divider;
        atomic_t rx_invert;

        struct kfifo rx_kfifo;
        spinlock_t rx_kfifo_lock; /* protect Rx data kfifo */

        struct v4l2_subdev_ir_parameters tx_params;
        struct mutex tx_params_lock; /* protects Tx parameter settings cache */
        atomic_t txclk_divider;
};

static inline struct cx25840_ir_state *to_ir_state(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);
        return state ? state->ir_state : NULL;
}


/*
 * Rx and Tx Clock Divider register computations
 *
 * Note the largest clock divider value of 0xffff corresponds to:
 *        (0xffff + 1) * 1000 / 108/2 MHz = 1,213,629.629... ns
 * which fits in 21 bits, so we'll use unsigned int for time arguments.
 */
static inline u16 count_to_clock_divider(unsigned int d)
{
        if (d > RXCLK_RCD + 1)
                d = RXCLK_RCD;
        else if (d < 2)
                d = 1;
        else
                d--;
        return (u16) d;
}

static inline u16 carrier_freq_to_clock_divider(unsigned int freq)
{
        return count_to_clock_divider(
                          DIV_ROUND_CLOSEST(CX25840_IR_REFCLK_FREQ, freq * 16));
}

static inline unsigned int clock_divider_to_carrier_freq(unsigned int divider)
{
        return DIV_ROUND_CLOSEST(CX25840_IR_REFCLK_FREQ, (divider + 1) * 16);
}

static inline unsigned int clock_divider_to_freq(unsigned int divider,
                                                 unsigned int rollovers)
{
        return DIV_ROUND_CLOSEST(CX25840_IR_REFCLK_FREQ,
                                 (divider + 1) * rollovers);
}

/*
 * Low Pass Filter register calculations
 *
 * Note the largest count value of 0xffff corresponds to:
 *        0xffff * 1000 / 108/2 MHz = 1,213,611.11... ns
 * which fits in 21 bits, so we'll use unsigned int for time arguments.
 */
static inline u16 count_to_lpf_count(unsigned int d)
{
        if (d > FILTR_LPF)
                d = FILTR_LPF;
        else if (d < 4)
                d = 0;
        return (u16) d;
}

static inline u16 ns_to_lpf_count(unsigned int ns)
{
        return count_to_lpf_count(
                DIV_ROUND_CLOSEST(CX25840_IR_REFCLK_FREQ / 1000000 * ns, 1000));
}

static inline unsigned int lpf_count_to_ns(unsigned int count)
{
        /* Duration of the Low Pass Filter rejection window in ns */
        return DIV_ROUND_CLOSEST(count * 1000,
                                 CX25840_IR_REFCLK_FREQ / 1000000);
}

static inline unsigned int lpf_count_to_us(unsigned int count)
{
        /* Duration of the Low Pass Filter rejection window in us */
        return DIV_ROUND_CLOSEST(count, CX25840_IR_REFCLK_FREQ / 1000000);
}

/*
 * FIFO register pulse width count computations
 */
static u32 clock_divider_to_resolution(u16 divider)
{
        /*
         * Resolution is the duration of 1 tick of the readable portion of
         * the pulse width counter as read from the FIFO.  The two lsb's are
         * not readable, hence the << 2.  This function returns ns.
         */
        return DIV_ROUND_CLOSEST((1 << 2)  * ((u32) divider + 1) * 1000,
                                 CX25840_IR_REFCLK_FREQ / 1000000);
}

static u64 pulse_width_count_to_ns(u16 count, u16 divider)
{
        u64 n;
        u32 rem;

        /*
         * The 2 lsb's of the pulse width timer count are not readable, hence
         * the (count << 2) | 0x3
         */
        n = (((u64) count << 2) | 0x3) * (divider + 1) * 1000; /* millicycles */
        rem = do_div(n, CX25840_IR_REFCLK_FREQ / 1000000);     /* / MHz => ns */
        if (rem >= CX25840_IR_REFCLK_FREQ / 1000000 / 2)
                n++;
        return n;
}

#if 0
/* Keep as we will need this for Transmit functionality */
static u16 ns_to_pulse_width_count(u32 ns, u16 divider)
{
        u64 n;
        u32 d;
        u32 rem;

        /*
         * The 2 lsb's of the pulse width timer count are not accessible, hence
         * the (1 << 2)
         */
        n = ((u64) ns) * CX25840_IR_REFCLK_FREQ / 1000000; /* millicycles */
        d = (1 << 2) * ((u32) divider + 1) * 1000; /* millicycles/count */
        rem = do_div(n, d);
        if (rem >= d / 2)
                n++;

        if (n > FIFO_RXTX)
                n = FIFO_RXTX;
        else if (n == 0)
                n = 1;
        return (u16) n;
}

#endif
static unsigned int pulse_width_count_to_us(u16 count, u16 divider)
{
        u64 n;
        u32 rem;

        /*
         * The 2 lsb's of the pulse width timer count are not readable, hence
         * the (count << 2) | 0x3
         */
        n = (((u64) count << 2) | 0x3) * (divider + 1);    /* cycles      */
        rem = do_div(n, CX25840_IR_REFCLK_FREQ / 1000000); /* / MHz => us */
        if (rem >= CX25840_IR_REFCLK_FREQ / 1000000 / 2)
                n++;
        return (unsigned int) n;
}

/*
 * Pulse Clocks computations: Combined Pulse Width Count & Rx Clock Counts
 *
 * The total pulse clock count is an 18 bit pulse width timer count as the most
 * significant part and (up to) 16 bit clock divider count as a modulus.
 * When the Rx clock divider ticks down to 0, it increments the 18 bit pulse
 * width timer count's least significant bit.
 */
static u64 ns_to_pulse_clocks(u32 ns)
{
        u64 clocks;
        u32 rem;
        clocks = CX25840_IR_REFCLK_FREQ / 1000000 * (u64) ns; /* millicycles  */
        rem = do_div(clocks, 1000);                         /* /1000 = cycles */
        if (rem >= 1000 / 2)
                clocks++;
        return clocks;
}

static u16 pulse_clocks_to_clock_divider(u64 count)
{
        do_div(count, (FIFO_RXTX << 2) | 0x3);

        /* net result needs to be rounded down and decremented by 1 */
        if (count > RXCLK_RCD + 1)
                count = RXCLK_RCD;
        else if (count < 2)
                count = 1;
        else
                count--;
        return (u16) count;
}

/*
 * IR Control Register helpers
 */
enum tx_fifo_watermark {
        TX_FIFO_HALF_EMPTY = 0,
        TX_FIFO_EMPTY      = CNTRL_TIC,
};

enum rx_fifo_watermark {
        RX_FIFO_HALF_FULL = 0,
        RX_FIFO_NOT_EMPTY = CNTRL_RIC,
};

static inline void control_tx_irq_watermark(struct i2c_client *c,
                                            enum tx_fifo_watermark level)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_TIC, level);
}

static inline void control_rx_irq_watermark(struct i2c_client *c,
                                            enum rx_fifo_watermark level)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_RIC, level);
}

static inline void control_tx_enable(struct i2c_client *c, bool enable)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~(CNTRL_TXE | CNTRL_TFE),
                        enable ? (CNTRL_TXE | CNTRL_TFE) : 0);
}

static inline void control_rx_enable(struct i2c_client *c, bool enable)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~(CNTRL_RXE | CNTRL_RFE),
                        enable ? (CNTRL_RXE | CNTRL_RFE) : 0);
}

static inline void control_tx_modulation_enable(struct i2c_client *c,
                                                bool enable)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_MOD,
                        enable ? CNTRL_MOD : 0);
}

static inline void control_rx_demodulation_enable(struct i2c_client *c,
                                                  bool enable)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_DMD,
                        enable ? CNTRL_DMD : 0);
}

static inline void control_rx_s_edge_detection(struct i2c_client *c,
                                               u32 edge_types)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_EDG_BOTH,
                        edge_types & CNTRL_EDG_BOTH);
}

static void control_rx_s_carrier_window(struct i2c_client *c,
                                        unsigned int carrier,
                                        unsigned int *carrier_range_low,
                                        unsigned int *carrier_range_high)
{
        u32 v;
        unsigned int c16 = carrier * 16;

        if (*carrier_range_low < DIV_ROUND_CLOSEST(c16, 16 + 3)) {
                v = CNTRL_WIN_3_4;
                *carrier_range_low = DIV_ROUND_CLOSEST(c16, 16 + 4);
        } else {
                v = CNTRL_WIN_3_3;
                *carrier_range_low = DIV_ROUND_CLOSEST(c16, 16 + 3);
        }

        if (*carrier_range_high > DIV_ROUND_CLOSEST(c16, 16 - 3)) {
                v |= CNTRL_WIN_4_3;
                *carrier_range_high = DIV_ROUND_CLOSEST(c16, 16 - 4);
        } else {
                v |= CNTRL_WIN_3_3;
                *carrier_range_high = DIV_ROUND_CLOSEST(c16, 16 - 3);
        }
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_WIN, v);
}

static inline void control_tx_polarity_invert(struct i2c_client *c,
                                              bool invert)
{
        cx25840_and_or4(c, CX25840_IR_CNTRL_REG, ~CNTRL_CPL,
                        invert ? CNTRL_CPL : 0);
}

/*
 * IR Rx & Tx Clock Register helpers
 */
static unsigned int txclk_tx_s_carrier(struct i2c_client *c,
                                       unsigned int freq,
                                       u16 *divider)
{
        *divider = carrier_freq_to_clock_divider(freq);
        cx25840_write4(c, CX25840_IR_TXCLK_REG, *divider);
        return clock_divider_to_carrier_freq(*divider);
}

static unsigned int rxclk_rx_s_carrier(struct i2c_client *c,
                                       unsigned int freq,
                                       u16 *divider)
{
        *divider = carrier_freq_to_clock_divider(freq);
        cx25840_write4(c, CX25840_IR_RXCLK_REG, *divider);
        return clock_divider_to_carrier_freq(*divider);
}

static u32 txclk_tx_s_max_pulse_width(struct i2c_client *c, u32 ns,
                                      u16 *divider)
{
        u64 pulse_clocks;

        if (ns > IR_MAX_DURATION)
                ns = IR_MAX_DURATION;
        pulse_clocks = ns_to_pulse_clocks(ns);
        *divider = pulse_clocks_to_clock_divider(pulse_clocks);
        cx25840_write4(c, CX25840_IR_TXCLK_REG, *divider);
        return (u32) pulse_width_count_to_ns(FIFO_RXTX, *divider);
}

static u32 rxclk_rx_s_max_pulse_width(struct i2c_client *c, u32 ns,
                                      u16 *divider)
{
        u64 pulse_clocks;

        if (ns > IR_MAX_DURATION)
                ns = IR_MAX_DURATION;
        pulse_clocks = ns_to_pulse_clocks(ns);
        *divider = pulse_clocks_to_clock_divider(pulse_clocks);
        cx25840_write4(c, CX25840_IR_RXCLK_REG, *divider);
        return (u32) pulse_width_count_to_ns(FIFO_RXTX, *divider);
}

/*
 * IR Tx Carrier Duty Cycle register helpers
 */
static unsigned int cduty_tx_s_duty_cycle(struct i2c_client *c,
                                          unsigned int duty_cycle)
{
        u32 n;
        n = DIV_ROUND_CLOSEST(duty_cycle * 100, 625); /* 16ths of 100% */
        if (n != 0)
                n--;
        if (n > 15)
                n = 15;
        cx25840_write4(c, CX25840_IR_CDUTY_REG, n);
        return DIV_ROUND_CLOSEST((n + 1) * 100, 16);
}

/*
 * IR Filter Register helpers
 */
static u32 filter_rx_s_min_width(struct i2c_client *c, u32 min_width_ns)
{
        u32 count = ns_to_lpf_count(min_width_ns);
        cx25840_write4(c, CX25840_IR_FILTR_REG, count);
        return lpf_count_to_ns(count);
}

/*
 * IR IRQ Enable Register helpers
 */
static inline void irqenable_rx(struct v4l2_subdev *sd, u32 mask)
{
        struct cx25840_state *state = to_state(sd);

        if (is_cx23885(state) || is_cx23887(state))
                mask ^= IRQEN_MSK;
        mask &= (IRQEN_RTE | IRQEN_ROE | IRQEN_RSE);
        cx25840_and_or4(state->c, CX25840_IR_IRQEN_REG,
                        ~(IRQEN_RTE | IRQEN_ROE | IRQEN_RSE), mask);
}

static inline void irqenable_tx(struct v4l2_subdev *sd, u32 mask)
{
        struct cx25840_state *state = to_state(sd);

        if (is_cx23885(state) || is_cx23887(state))
                mask ^= IRQEN_MSK;
        mask &= IRQEN_TSE;
        cx25840_and_or4(state->c, CX25840_IR_IRQEN_REG, ~IRQEN_TSE, mask);
}

/*
 * V4L2 Subdevice IR Ops
 */
int cx25840_ir_irq_handler(struct v4l2_subdev *sd, u32 status, bool *handled)
{
        struct cx25840_state *state = to_state(sd);
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        struct i2c_client *c = NULL;
        unsigned long flags;

        union cx25840_ir_fifo_rec rx_data[FIFO_RX_DEPTH];
        unsigned int i, j, k;
        u32 events, v;
        int tsr, rsr, rto, ror, tse, rse, rte, roe, kror;
        u32 cntrl, irqen, stats;

        *handled = false;
        if (ir_state == NULL)
                return -ENODEV;

        c = ir_state->c;

        /* Only support the IR controller for the CX2388[57] AV Core for now */
        if (!(is_cx23885(state) || is_cx23887(state)))
                return -ENODEV;

        cntrl = cx25840_read4(c, CX25840_IR_CNTRL_REG);
        irqen = cx25840_read4(c, CX25840_IR_IRQEN_REG);
        if (is_cx23885(state) || is_cx23887(state))
                irqen ^= IRQEN_MSK;
        stats = cx25840_read4(c, CX25840_IR_STATS_REG);

        tsr = stats & STATS_TSR; /* Tx FIFO Service Request */
        rsr = stats & STATS_RSR; /* Rx FIFO Service Request */
        rto = stats & STATS_RTO; /* Rx Pulse Width Timer Time Out */
        ror = stats & STATS_ROR; /* Rx FIFO Over Run */

        tse = irqen & IRQEN_TSE; /* Tx FIFO Service Request IRQ Enable */
        rse = irqen & IRQEN_RSE; /* Rx FIFO Service Request IRQ Enable */
        rte = irqen & IRQEN_RTE; /* Rx Pulse Width Timer Time Out IRQ Enable */
        roe = irqen & IRQEN_ROE; /* Rx FIFO Over Run IRQ Enable */

        v4l2_dbg(2, ir_debug, sd, "IR IRQ Status:  %s %s %s %s %s %s\n",
                 tsr ? "tsr" : "   ", rsr ? "rsr" : "   ",
                 rto ? "rto" : "   ", ror ? "ror" : "   ",
                 stats & STATS_TBY ? "tby" : "   ",
                 stats & STATS_RBY ? "rby" : "   ");

        v4l2_dbg(2, ir_debug, sd, "IR IRQ Enables: %s %s %s %s\n",
                 tse ? "tse" : "   ", rse ? "rse" : "   ",
                 rte ? "rte" : "   ", roe ? "roe" : "   ");

        /*
         * Transmitter interrupt service
         */
        if (tse && tsr) {
                /*
                 * TODO:
                 * Check the watermark threshold setting
                 * Pull FIFO_TX_DEPTH or FIFO_TX_DEPTH/2 entries from tx_kfifo
                 * Push the data to the hardware FIFO.
                 * If there was nothing more to send in the tx_kfifo, disable
                 *        the TSR IRQ and notify the v4l2_device.
                 * If there was something in the tx_kfifo, check the tx_kfifo
                 *      level and notify the v4l2_device, if it is low.
                 */
                /* For now, inhibit TSR interrupt until Tx is implemented */
                irqenable_tx(sd, 0);
                events = V4L2_SUBDEV_IR_TX_FIFO_SERVICE_REQ;
                v4l2_subdev_notify(sd, V4L2_SUBDEV_IR_TX_NOTIFY, &events);
                *handled = true;
        }

        /*
         * Receiver interrupt service
         */
        kror = 0;
        if ((rse && rsr) || (rte && rto)) {
                /*
                 * Receive data on RSR to clear the STATS_RSR.
                 * Receive data on RTO, since we may not have yet hit the RSR
                 * watermark when we receive the RTO.
                 */
                for (i = 0, v = FIFO_RX_NDV;
                     (v & FIFO_RX_NDV) && !kror; i = 0) {
                        for (j = 0;
                             (v & FIFO_RX_NDV) && j < FIFO_RX_DEPTH; j++) {
                                v = cx25840_read4(c, CX25840_IR_FIFO_REG);
                                rx_data[i].hw_fifo_data = v & ~FIFO_RX_NDV;
                                i++;
                        }
                        if (i == 0)
                                break;
                        j = i * sizeof(union cx25840_ir_fifo_rec);
                        k = kfifo_in_locked(&ir_state->rx_kfifo,
                                            (unsigned char *) rx_data, j,
                                            &ir_state->rx_kfifo_lock);
                        if (k != j)
                                kror++; /* rx_kfifo over run */
                }
                *handled = true;
        }

        events = 0;
        v = 0;
        if (kror) {
                events |= V4L2_SUBDEV_IR_RX_SW_FIFO_OVERRUN;
                v4l2_err(sd, "IR receiver software FIFO overrun\n");
        }
        if (roe && ror) {
                /*
                 * The RX FIFO Enable (CNTRL_RFE) must be toggled to clear
                 * the Rx FIFO Over Run status (STATS_ROR)
                 */
                v |= CNTRL_RFE;
                events |= V4L2_SUBDEV_IR_RX_HW_FIFO_OVERRUN;
                v4l2_err(sd, "IR receiver hardware FIFO overrun\n");
        }
        if (rte && rto) {
                /*
                 * The IR Receiver Enable (CNTRL_RXE) must be toggled to clear
                 * the Rx Pulse Width Timer Time Out (STATS_RTO)
                 */
                v |= CNTRL_RXE;
                events |= V4L2_SUBDEV_IR_RX_END_OF_RX_DETECTED;
        }
        if (v) {
                /* Clear STATS_ROR & STATS_RTO as needed by resetting hardware */
                cx25840_write4(c, CX25840_IR_CNTRL_REG, cntrl & ~v);
                cx25840_write4(c, CX25840_IR_CNTRL_REG, cntrl);
                *handled = true;
        }
        spin_lock_irqsave(&ir_state->rx_kfifo_lock, flags);
        if (kfifo_len(&ir_state->rx_kfifo) >= CX25840_IR_RX_KFIFO_SIZE / 2)
                events |= V4L2_SUBDEV_IR_RX_FIFO_SERVICE_REQ;
        spin_unlock_irqrestore(&ir_state->rx_kfifo_lock, flags);

        if (events)
                v4l2_subdev_notify(sd, V4L2_SUBDEV_IR_RX_NOTIFY, &events);
        return 0;
}

/* Receiver */
static int cx25840_ir_rx_read(struct v4l2_subdev *sd, u8 *buf, size_t count,
                              ssize_t *num)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        bool invert;
        u16 divider;
        unsigned int i, n;
        union cx25840_ir_fifo_rec *p;
        unsigned u, v, w;

        if (ir_state == NULL)
                return -ENODEV;

        invert = (bool) atomic_read(&ir_state->rx_invert);
        divider = (u16) atomic_read(&ir_state->rxclk_divider);

        n = count / sizeof(union cx25840_ir_fifo_rec)
                * sizeof(union cx25840_ir_fifo_rec);
        if (n == 0) {
                *num = 0;
                return 0;
        }

        n = kfifo_out_locked(&ir_state->rx_kfifo, buf, n,
                             &ir_state->rx_kfifo_lock);

        n /= sizeof(union cx25840_ir_fifo_rec);
        *num = n * sizeof(union cx25840_ir_fifo_rec);

        for (p = (union cx25840_ir_fifo_rec *) buf, i = 0; i < n; p++, i++) {

                if ((p->hw_fifo_data & FIFO_RXTX_RTO) == FIFO_RXTX_RTO) {
                        /* Assume RTO was because of no IR light input */
                        u = 0;
                        w = 1;
                } else {
                        u = (p->hw_fifo_data & FIFO_RXTX_LVL) ? 1 : 0;
                        if (invert)
                                u = u ? 0 : 1;
                        w = 0;
                }

                v = (unsigned) pulse_width_count_to_ns(
                                  (u16)(p->hw_fifo_data & FIFO_RXTX), divider) / 1000;
                if (v > IR_MAX_DURATION)
                        v = IR_MAX_DURATION;

                p->ir_core_data = (struct ir_raw_event)
                        { .pulse = u, .duration = v, .timeout = w };

                v4l2_dbg(2, ir_debug, sd, "rx read: %10u ns  %s  %s\n",
                         v, u ? "mark" : "space", w ? "(timed out)" : "");
                if (w)
                        v4l2_dbg(2, ir_debug, sd, "rx read: end of rx\n");
        }
        return 0;
}

static int cx25840_ir_rx_g_parameters(struct v4l2_subdev *sd,
                                      struct v4l2_subdev_ir_parameters *p)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);

        if (ir_state == NULL)
                return -ENODEV;

        mutex_lock(&ir_state->rx_params_lock);
        memcpy(p, &ir_state->rx_params,
                                      sizeof(struct v4l2_subdev_ir_parameters));
        mutex_unlock(&ir_state->rx_params_lock);
        return 0;
}

static int cx25840_ir_rx_shutdown(struct v4l2_subdev *sd)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        struct i2c_client *c;

        if (ir_state == NULL)
                return -ENODEV;

        c = ir_state->c;
        mutex_lock(&ir_state->rx_params_lock);

        /* Disable or slow down all IR Rx circuits and counters */
        irqenable_rx(sd, 0);
        control_rx_enable(c, false);
        control_rx_demodulation_enable(c, false);
        control_rx_s_edge_detection(c, CNTRL_EDG_NONE);
        filter_rx_s_min_width(c, 0);
        cx25840_write4(c, CX25840_IR_RXCLK_REG, RXCLK_RCD);

        ir_state->rx_params.shutdown = true;

        mutex_unlock(&ir_state->rx_params_lock);
        return 0;
}

static int cx25840_ir_rx_s_parameters(struct v4l2_subdev *sd,
                                      struct v4l2_subdev_ir_parameters *p)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        struct i2c_client *c;
        struct v4l2_subdev_ir_parameters *o;
        u16 rxclk_divider;

        if (ir_state == NULL)
                return -ENODEV;

        if (p->shutdown)
                return cx25840_ir_rx_shutdown(sd);

        if (p->mode != V4L2_SUBDEV_IR_MODE_PULSE_WIDTH)
                return -ENOSYS;

        c = ir_state->c;
        o = &ir_state->rx_params;

        mutex_lock(&ir_state->rx_params_lock);

        o->shutdown = p->shutdown;

        p->mode = V4L2_SUBDEV_IR_MODE_PULSE_WIDTH;
        o->mode = p->mode;

        p->bytes_per_data_element = sizeof(union cx25840_ir_fifo_rec);
        o->bytes_per_data_element = p->bytes_per_data_element;

        /* Before we tweak the hardware, we have to disable the receiver */
        irqenable_rx(sd, 0);
        control_rx_enable(c, false);

        control_rx_demodulation_enable(c, p->modulation);
        o->modulation = p->modulation;

        if (p->modulation) {
                p->carrier_freq = rxclk_rx_s_carrier(c, p->carrier_freq,
                                                     &rxclk_divider);

                o->carrier_freq = p->carrier_freq;

                p->duty_cycle = 50;
                o->duty_cycle = p->duty_cycle;

                control_rx_s_carrier_window(c, p->carrier_freq,
                                            &p->carrier_range_lower,
                                            &p->carrier_range_upper);
                o->carrier_range_lower = p->carrier_range_lower;
                o->carrier_range_upper = p->carrier_range_upper;

                p->max_pulse_width =
                        (u32) pulse_width_count_to_ns(FIFO_RXTX, rxclk_divider);
        } else {
                p->max_pulse_width =
                            rxclk_rx_s_max_pulse_width(c, p->max_pulse_width,
                                                       &rxclk_divider);
        }
        o->max_pulse_width = p->max_pulse_width;
        atomic_set(&ir_state->rxclk_divider, rxclk_divider);

        p->noise_filter_min_width =
                            filter_rx_s_min_width(c, p->noise_filter_min_width);
        o->noise_filter_min_width = p->noise_filter_min_width;

        p->resolution = clock_divider_to_resolution(rxclk_divider);
        o->resolution = p->resolution;

        /* FIXME - make this dependent on resolution for better performance */
        control_rx_irq_watermark(c, RX_FIFO_HALF_FULL);

        control_rx_s_edge_detection(c, CNTRL_EDG_BOTH);

        o->invert_level = p->invert_level;
        atomic_set(&ir_state->rx_invert, p->invert_level);

        o->interrupt_enable = p->interrupt_enable;
        o->enable = p->enable;
        if (p->enable) {
                unsigned long flags;

                spin_lock_irqsave(&ir_state->rx_kfifo_lock, flags);
                kfifo_reset(&ir_state->rx_kfifo);
                spin_unlock_irqrestore(&ir_state->rx_kfifo_lock, flags);
                if (p->interrupt_enable)
                        irqenable_rx(sd, IRQEN_RSE | IRQEN_RTE | IRQEN_ROE);
                control_rx_enable(c, p->enable);
        }

        mutex_unlock(&ir_state->rx_params_lock);
        return 0;
}

/* Transmitter */
static int cx25840_ir_tx_write(struct v4l2_subdev *sd, u8 *buf, size_t count,
                               ssize_t *num)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);

        if (ir_state == NULL)
                return -ENODEV;

#if 0
        /*
         * FIXME - the code below is an incomplete and untested sketch of what
         * may need to be done.  The critical part is to get 4 (or 8) pulses
         * from the tx_kfifo, or converted from ns to the proper units from the
         * input, and push them off to the hardware Tx FIFO right away, if the
         * HW TX fifo needs service.  The rest can be pushed to the tx_kfifo in
         * a less critical timeframe.  Also watch out for overruning the
         * tx_kfifo - don't let it happen and let the caller know not all his
         * pulses were written.
         */
        u32 *ns_pulse = (u32 *) buf;
        unsigned int n;
        u32 fifo_pulse[FIFO_TX_DEPTH];
        u32 mark;

        /* Compute how much we can fit in the tx kfifo */
        n = CX25840_IR_TX_KFIFO_SIZE - kfifo_len(ir_state->tx_kfifo);
        n = min(n, (unsigned int) count);
        n /= sizeof(u32);

        /* FIXME - turn on Tx Fifo service interrupt
         * check hardware fifo level, and other stuff
         */
        for (i = 0; i < n; ) {
                for (j = 0; j < FIFO_TX_DEPTH / 2 && i < n; j++) {
                        mark = ns_pulse[i] & LEVEL_MASK;
                        fifo_pulse[j] = ns_to_pulse_width_count(
                                         ns_pulse[i] &
                                               ~LEVEL_MASK,
                                         ir_state->txclk_divider);
                        if (mark)
                                fifo_pulse[j] &= FIFO_RXTX_LVL;
                        i++;
                }
                kfifo_put(ir_state->tx_kfifo, (u8 *) fifo_pulse,
                                                               j * sizeof(u32));
        }
        *num = n * sizeof(u32);
#else
        /* For now enable the Tx FIFO Service interrupt & pretend we did work */
        irqenable_tx(sd, IRQEN_TSE);
        *num = count;
#endif
        return 0;
}

static int cx25840_ir_tx_g_parameters(struct v4l2_subdev *sd,
                                      struct v4l2_subdev_ir_parameters *p)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);

        if (ir_state == NULL)
                return -ENODEV;

        mutex_lock(&ir_state->tx_params_lock);
        memcpy(p, &ir_state->tx_params,
                                      sizeof(struct v4l2_subdev_ir_parameters));
        mutex_unlock(&ir_state->tx_params_lock);
        return 0;
}

static int cx25840_ir_tx_shutdown(struct v4l2_subdev *sd)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        struct i2c_client *c;

        if (ir_state == NULL)
                return -ENODEV;

        c = ir_state->c;
        mutex_lock(&ir_state->tx_params_lock);

        /* Disable or slow down all IR Tx circuits and counters */
        irqenable_tx(sd, 0);
        control_tx_enable(c, false);
        control_tx_modulation_enable(c, false);
        cx25840_write4(c, CX25840_IR_TXCLK_REG, TXCLK_TCD);

        ir_state->tx_params.shutdown = true;

        mutex_unlock(&ir_state->tx_params_lock);
        return 0;
}

static int cx25840_ir_tx_s_parameters(struct v4l2_subdev *sd,
                                      struct v4l2_subdev_ir_parameters *p)
{
        struct cx25840_ir_state *ir_state = to_ir_state(sd);
        struct i2c_client *c;
        struct v4l2_subdev_ir_parameters *o;
        u16 txclk_divider;

        if (ir_state == NULL)
                return -ENODEV;

        if (p->shutdown)
                return cx25840_ir_tx_shutdown(sd);

        if (p->mode != V4L2_SUBDEV_IR_MODE_PULSE_WIDTH)
                return -ENOSYS;

        c = ir_state->c;
        o = &ir_state->tx_params;
        mutex_lock(&ir_state->tx_params_lock);

        o->shutdown = p->shutdown;

        p->mode = V4L2_SUBDEV_IR_MODE_PULSE_WIDTH;
        o->mode = p->mode;

        p->bytes_per_data_element = sizeof(union cx25840_ir_fifo_rec);
        o->bytes_per_data_element = p->bytes_per_data_element;

        /* Before we tweak the hardware, we have to disable the transmitter */
        irqenable_tx(sd, 0);
        control_tx_enable(c, false);

        control_tx_modulation_enable(c, p->modulation);
        o->modulation = p->modulation;

        if (p->modulation) {
                p->carrier_freq = txclk_tx_s_carrier(c, p->carrier_freq,
                                                     &txclk_divider);
                o->carrier_freq = p->carrier_freq;

                p->duty_cycle = cduty_tx_s_duty_cycle(c, p->duty_cycle);
                o->duty_cycle = p->duty_cycle;

                p->max_pulse_width =
                        (u32) pulse_width_count_to_ns(FIFO_RXTX, txclk_divider);
        } else {
                p->max_pulse_width =
                            txclk_tx_s_max_pulse_width(c, p->max_pulse_width,
                                                       &txclk_divider);
        }
        o->max_pulse_width = p->max_pulse_width;
        atomic_set(&ir_state->txclk_divider, txclk_divider);

        p->resolution = clock_divider_to_resolution(txclk_divider);
        o->resolution = p->resolution;

        /* FIXME - make this dependent on resolution for better performance */
        control_tx_irq_watermark(c, TX_FIFO_HALF_EMPTY);

        control_tx_polarity_invert(c, p->invert_carrier_sense);
        o->invert_carrier_sense = p->invert_carrier_sense;

        /*
         * FIXME: we don't have hardware help for IO pin level inversion
         * here like we have on the CX23888.
         * Act on this with some mix of logical inversion of data levels,
         * carrier polarity, and carrier duty cycle.
         */
        o->invert_level = p->invert_level;

        o->interrupt_enable = p->interrupt_enable;
        o->enable = p->enable;
        if (p->enable) {
                /* reset tx_fifo here */
                if (p->interrupt_enable)
                        irqenable_tx(sd, IRQEN_TSE);
                control_tx_enable(c, p->enable);
        }

        mutex_unlock(&ir_state->tx_params_lock);
        return 0;
}


/*
 * V4L2 Subdevice Core Ops support
 */
int cx25840_ir_log_status(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *c = state->c;
        char *s;
        int i, j;
        u32 cntrl, txclk, rxclk, cduty, stats, irqen, filtr;

        /* The CX23888 chip doesn't have an IR controller on the A/V core */
        if (is_cx23888(state))
                return 0;

        cntrl = cx25840_read4(c, CX25840_IR_CNTRL_REG);
        txclk = cx25840_read4(c, CX25840_IR_TXCLK_REG) & TXCLK_TCD;
        rxclk = cx25840_read4(c, CX25840_IR_RXCLK_REG) & RXCLK_RCD;
        cduty = cx25840_read4(c, CX25840_IR_CDUTY_REG) & CDUTY_CDC;
        stats = cx25840_read4(c, CX25840_IR_STATS_REG);
        irqen = cx25840_read4(c, CX25840_IR_IRQEN_REG);
        if (is_cx23885(state) || is_cx23887(state))
                irqen ^= IRQEN_MSK;
        filtr = cx25840_read4(c, CX25840_IR_FILTR_REG) & FILTR_LPF;

        v4l2_info(sd, "IR Receiver:\n");
        v4l2_info(sd, "\tEnabled:                           %s\n",
                  cntrl & CNTRL_RXE ? "yes" : "no");
        v4l2_info(sd, "\tDemodulation from a carrier:       %s\n",
                  cntrl & CNTRL_DMD ? "enabled" : "disabled");
        v4l2_info(sd, "\tFIFO:                              %s\n",
                  cntrl & CNTRL_RFE ? "enabled" : "disabled");
        switch (cntrl & CNTRL_EDG) {
        case CNTRL_EDG_NONE:
                s = "disabled";
                break;
        case CNTRL_EDG_FALL:
                s = "falling edge";
                break;
        case CNTRL_EDG_RISE:
                s = "rising edge";
                break;
        case CNTRL_EDG_BOTH:
                s = "rising & falling edges";
                break;
        default:
                s = "??? edge";
                break;
        }
        v4l2_info(sd, "\tPulse timers' start/stop trigger:  %s\n", s);
        v4l2_info(sd, "\tFIFO data on pulse timer overflow: %s\n",
                  cntrl & CNTRL_R ? "not loaded" : "overflow marker");
        v4l2_info(sd, "\tFIFO interrupt watermark:          %s\n",
                  cntrl & CNTRL_RIC ? "not empty" : "half full or greater");
        v4l2_info(sd, "\tLoopback mode:                     %s\n",
                  cntrl & CNTRL_LBM ? "loopback active" : "normal receive");
        if (cntrl & CNTRL_DMD) {
                v4l2_info(sd, "\tExpected carrier (16 clocks):      %u Hz\n",
                          clock_divider_to_carrier_freq(rxclk));
                switch (cntrl & CNTRL_WIN) {
                case CNTRL_WIN_3_3:
                        i = 3;
                        j = 3;
                        break;
                case CNTRL_WIN_4_3:
                        i = 4;
                        j = 3;
                        break;
                case CNTRL_WIN_3_4:
                        i = 3;
                        j = 4;
                        break;
                case CNTRL_WIN_4_4:
                        i = 4;
                        j = 4;
                        break;
                default:
                        i = 0;
                        j = 0;
                        break;
                }
                v4l2_info(sd, "\tNext carrier edge window:            16 clocks -%1d/+%1d, %u to %u Hz\n",
                          i, j,
                          clock_divider_to_freq(rxclk, 16 + j),
                          clock_divider_to_freq(rxclk, 16 - i));
        }
        v4l2_info(sd, "\tMax measurable pulse width:        %u us, %llu ns\n",
                  pulse_width_count_to_us(FIFO_RXTX, rxclk),
                  pulse_width_count_to_ns(FIFO_RXTX, rxclk));
        v4l2_info(sd, "\tLow pass filter:                   %s\n",
                  filtr ? "enabled" : "disabled");
        if (filtr)
                v4l2_info(sd, "\tMin acceptable pulse width (LPF):  %u us, %u ns\n",
                          lpf_count_to_us(filtr),
                          lpf_count_to_ns(filtr));
        v4l2_info(sd, "\tPulse width timer timed-out:       %s\n",
                  stats & STATS_RTO ? "yes" : "no");
        v4l2_info(sd, "\tPulse width timer time-out intr:   %s\n",
                  irqen & IRQEN_RTE ? "enabled" : "disabled");
        v4l2_info(sd, "\tFIFO overrun:                      %s\n",
                  stats & STATS_ROR ? "yes" : "no");
        v4l2_info(sd, "\tFIFO overrun interrupt:            %s\n",
                  irqen & IRQEN_ROE ? "enabled" : "disabled");
        v4l2_info(sd, "\tBusy:                              %s\n",
                  stats & STATS_RBY ? "yes" : "no");
        v4l2_info(sd, "\tFIFO service requested:            %s\n",
                  stats & STATS_RSR ? "yes" : "no");
        v4l2_info(sd, "\tFIFO service request interrupt:    %s\n",
                  irqen & IRQEN_RSE ? "enabled" : "disabled");

        v4l2_info(sd, "IR Transmitter:\n");
        v4l2_info(sd, "\tEnabled:                           %s\n",
                  cntrl & CNTRL_TXE ? "yes" : "no");
        v4l2_info(sd, "\tModulation onto a carrier:         %s\n",
                  cntrl & CNTRL_MOD ? "enabled" : "disabled");
        v4l2_info(sd, "\tFIFO:                              %s\n",
                  cntrl & CNTRL_TFE ? "enabled" : "disabled");
        v4l2_info(sd, "\tFIFO interrupt watermark:          %s\n",
                  cntrl & CNTRL_TIC ? "not empty" : "half full or less");
        v4l2_info(sd, "\tCarrier polarity:                  %s\n",
                  cntrl & CNTRL_CPL ? "space:burst mark:noburst"
                                    : "space:noburst mark:burst");
        if (cntrl & CNTRL_MOD) {
                v4l2_info(sd, "\tCarrier (16 clocks):               %u Hz\n",
                          clock_divider_to_carrier_freq(txclk));
                v4l2_info(sd, "\tCarrier duty cycle:                %2u/16\n",
                          cduty + 1);
        }
        v4l2_info(sd, "\tMax pulse width:                   %u us, %llu ns\n",
                  pulse_width_count_to_us(FIFO_RXTX, txclk),
                  pulse_width_count_to_ns(FIFO_RXTX, txclk));
        v4l2_info(sd, "\tBusy:                              %s\n",
                  stats & STATS_TBY ? "yes" : "no");
        v4l2_info(sd, "\tFIFO service requested:            %s\n",
                  stats & STATS_TSR ? "yes" : "no");
        v4l2_info(sd, "\tFIFO service request interrupt:    %s\n",
                  irqen & IRQEN_TSE ? "enabled" : "disabled");

        return 0;
}


const struct v4l2_subdev_ir_ops cx25840_ir_ops = {
        .rx_read = cx25840_ir_rx_read,
        .rx_g_parameters = cx25840_ir_rx_g_parameters,
        .rx_s_parameters = cx25840_ir_rx_s_parameters,

        .tx_write = cx25840_ir_tx_write,
        .tx_g_parameters = cx25840_ir_tx_g_parameters,
        .tx_s_parameters = cx25840_ir_tx_s_parameters,
};


static const struct v4l2_subdev_ir_parameters default_rx_params = {
        .bytes_per_data_element = sizeof(union cx25840_ir_fifo_rec),
        .mode = V4L2_SUBDEV_IR_MODE_PULSE_WIDTH,

        .enable = false,
        .interrupt_enable = false,
        .shutdown = true,

        .modulation = true,
        .carrier_freq = 36000, /* 36 kHz - RC-5, and RC-6 carrier */

        /* RC-5: 666,667 ns = 1/36 kHz * 32 cycles * 1 mark * 0.75 */
        /* RC-6: 333,333 ns = 1/36 kHz * 16 cycles * 1 mark * 0.75 */
        .noise_filter_min_width = 333333, /* ns */
        .carrier_range_lower = 35000,
        .carrier_range_upper = 37000,
        .invert_level = false,
};

static const struct v4l2_subdev_ir_parameters default_tx_params = {
        .bytes_per_data_element = sizeof(union cx25840_ir_fifo_rec),
        .mode = V4L2_SUBDEV_IR_MODE_PULSE_WIDTH,

        .enable = false,
        .interrupt_enable = false,
        .shutdown = true,

        .modulation = true,
        .carrier_freq = 36000, /* 36 kHz - RC-5 carrier */
        .duty_cycle = 25,      /* 25 %   - RC-5 carrier */
        .invert_level = false,
        .invert_carrier_sense = false,
};

int cx25840_ir_probe(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);
        struct cx25840_ir_state *ir_state;
        struct v4l2_subdev_ir_parameters default_params;

        /* Only init the IR controller for the CX2388[57] AV Core for now */
        if (!(is_cx23885(state) || is_cx23887(state)))
                return 0;

        ir_state = devm_kzalloc(&state->c->dev, sizeof(*ir_state), GFP_KERNEL);
        if (ir_state == NULL)
                return -ENOMEM;

        spin_lock_init(&ir_state->rx_kfifo_lock);
        if (kfifo_alloc(&ir_state->rx_kfifo,
                        CX25840_IR_RX_KFIFO_SIZE, GFP_KERNEL))
                return -ENOMEM;

        ir_state->c = state->c;
        state->ir_state = ir_state;

        /* Ensure no interrupts arrive yet */
        if (is_cx23885(state) || is_cx23887(state))
                cx25840_write4(ir_state->c, CX25840_IR_IRQEN_REG, IRQEN_MSK);
        else
                cx25840_write4(ir_state->c, CX25840_IR_IRQEN_REG, 0);

        mutex_init(&ir_state->rx_params_lock);
        default_params = default_rx_params;
        v4l2_subdev_call(sd, ir, rx_s_parameters, &default_params);

        mutex_init(&ir_state->tx_params_lock);
        default_params = default_tx_params;
        v4l2_subdev_call(sd, ir, tx_s_parameters, &default_params);

        return 0;
}

int cx25840_ir_remove(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);
        struct cx25840_ir_state *ir_state = to_ir_state(sd);

        if (ir_state == NULL)
                return -ENODEV;

        cx25840_ir_rx_shutdown(sd);
        cx25840_ir_tx_shutdown(sd);

        kfifo_free(&ir_state->rx_kfifo);
        state->ir_state = NULL;
        return 0;
}



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2002-2005, Instant802 Networks, Inc.
 * Copyright 2006-2007        Jiri Benc <jbenc@suse.cz>
 * Copyright 2013-2014  Intel Mobile Communications GmbH
 * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
 * Copyright (C) 2018-2023 Intel Corporation
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/etherdevice.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/timer.h>
#include <linux/rtnetlink.h>

#include <net/codel.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
#include "rate.h"
#include "sta_info.h"
#include "debugfs_sta.h"
#include "mesh.h"
#include "wme.h"

/**
 * DOC: STA information lifetime rules
 *
 * STA info structures (&struct sta_info) are managed in a hash table
 * for faster lookup and a list for iteration. They are managed using
 * RCU, i.e. access to the list and hash table is protected by RCU.
 *
 * Upon allocating a STA info structure with sta_info_alloc(), the caller
 * owns that structure. It must then insert it into the hash table using
 * either sta_info_insert() or sta_info_insert_rcu(); only in the latter
 * case (which acquires an rcu read section but must not be called from
 * within one) will the pointer still be valid after the call. Note that
 * the caller may not do much with the STA info before inserting it; in
 * particular, it may not start any mesh peer link management or add
 * encryption keys.
 *
 * When the insertion fails (sta_info_insert()) returns non-zero), the
 * structure will have been freed by sta_info_insert()!
 *
 * Station entries are added by mac80211 when you establish a link with a
 * peer. This means different things for the different type of interfaces
 * we support. For a regular station this mean we add the AP sta when we
 * receive an association response from the AP. For IBSS this occurs when
 * get to know about a peer on the same IBSS. For WDS we add the sta for
 * the peer immediately upon device open. When using AP mode we add stations
 * for each respective station upon request from userspace through nl80211.
 *
 * In order to remove a STA info structure, various sta_info_destroy_*()
 * calls are available.
 *
 * There is no concept of ownership on a STA entry; each structure is
 * owned by the global hash table/list until it is removed. All users of
 * the structure need to be RCU protected so that the structure won't be
 * freed before they are done using it.
 */

struct sta_link_alloc {
        struct link_sta_info info;
        struct ieee80211_link_sta sta;
        struct rcu_head rcu_head;
};

static const struct rhashtable_params sta_rht_params = {
        .nelem_hint = 3, /* start small */
        .automatic_shrinking = true,
        .head_offset = offsetof(struct sta_info, hash_node),
        .key_offset = offsetof(struct sta_info, addr),
        .key_len = ETH_ALEN,
        .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};

static const struct rhashtable_params link_sta_rht_params = {
        .nelem_hint = 3, /* start small */
        .automatic_shrinking = true,
        .head_offset = offsetof(struct link_sta_info, link_hash_node),
        .key_offset = offsetof(struct link_sta_info, addr),
        .key_len = ETH_ALEN,
        .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};

static int sta_info_hash_del(struct ieee80211_local *local,
                             struct sta_info *sta)
{
        return rhltable_remove(&local->sta_hash, &sta->hash_node,
                               sta_rht_params);
}

static int link_sta_info_hash_add(struct ieee80211_local *local,
                                  struct link_sta_info *link_sta)
{
        lockdep_assert_wiphy(local->hw.wiphy);

        return rhltable_insert(&local->link_sta_hash,
                               &link_sta->link_hash_node, link_sta_rht_params);
}

static int link_sta_info_hash_del(struct ieee80211_local *local,
                                  struct link_sta_info *link_sta)
{
        lockdep_assert_wiphy(local->hw.wiphy);

        return rhltable_remove(&local->link_sta_hash,
                               &link_sta->link_hash_node, link_sta_rht_params);
}

void ieee80211_purge_sta_txqs(struct sta_info *sta)
{
        struct ieee80211_local *local = sta->sdata->local;
        int i;

        for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
                struct txq_info *txqi;

                if (!sta->sta.txq[i])
                        continue;

                txqi = to_txq_info(sta->sta.txq[i]);

                ieee80211_txq_purge(local, txqi);
        }
}

static void __cleanup_single_sta(struct sta_info *sta)
{
        int ac, i;
        struct tid_ampdu_tx *tid_tx;
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        struct ps_data *ps;

        if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
            test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
            test_sta_flag(sta, WLAN_STA_PS_DELIVER)) {
                if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
                    sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                        ps = &sdata->bss->ps;
                else if (ieee80211_vif_is_mesh(&sdata->vif))
                        ps = &sdata->u.mesh.ps;
                else
                        return;

                clear_sta_flag(sta, WLAN_STA_PS_STA);
                clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
                clear_sta_flag(sta, WLAN_STA_PS_DELIVER);

                atomic_dec(&ps->num_sta_ps);
        }

        ieee80211_purge_sta_txqs(sta);

        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
                ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
                ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
        }

        if (ieee80211_vif_is_mesh(&sdata->vif))
                mesh_sta_cleanup(sta);

        cancel_work_sync(&sta->drv_deliver_wk);

        /*
         * Destroy aggregation state here. It would be nice to wait for the
         * driver to finish aggregation stop and then clean up, but for now
         * drivers have to handle aggregation stop being requested, followed
         * directly by station destruction.
         */
        for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
                kfree(sta->ampdu_mlme.tid_start_tx[i]);
                tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
                if (!tid_tx)
                        continue;
                ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
                kfree(tid_tx);
        }
}

static void cleanup_single_sta(struct sta_info *sta)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;

        __cleanup_single_sta(sta);
        sta_info_free(local, sta);
}

struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local,
                                         const u8 *addr)
{
        return rhltable_lookup(&local->sta_hash, addr, sta_rht_params);
}

/* protected by RCU */
struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
                              const u8 *addr)
{
        struct ieee80211_local *local = sdata->local;
        struct rhlist_head *tmp;
        struct sta_info *sta;

        rcu_read_lock();
        for_each_sta_info(local, addr, sta, tmp) {
                if (sta->sdata == sdata) {
                        rcu_read_unlock();
                        /* this is safe as the caller must already hold
                         * another rcu read section or the mutex
                         */
                        return sta;
                }
        }
        rcu_read_unlock();
        return NULL;
}

/*
 * Get sta info either from the specified interface
 * or from one of its vlans
 */
struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
                                  const u8 *addr)
{
        struct ieee80211_local *local = sdata->local;
        struct rhlist_head *tmp;
        struct sta_info *sta;

        rcu_read_lock();
        for_each_sta_info(local, addr, sta, tmp) {
                if (sta->sdata == sdata ||
                    (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
                        rcu_read_unlock();
                        /* this is safe as the caller must already hold
                         * another rcu read section or the mutex
                         */
                        return sta;
                }
        }
        rcu_read_unlock();
        return NULL;
}

struct rhlist_head *link_sta_info_hash_lookup(struct ieee80211_local *local,
                                              const u8 *addr)
{
        return rhltable_lookup(&local->link_sta_hash, addr,
                               link_sta_rht_params);
}

struct link_sta_info *
link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr)
{
        struct ieee80211_local *local = sdata->local;
        struct rhlist_head *tmp;
        struct link_sta_info *link_sta;

        rcu_read_lock();
        for_each_link_sta_info(local, addr, link_sta, tmp) {
                struct sta_info *sta = link_sta->sta;

                if (sta->sdata == sdata ||
                    (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
                        rcu_read_unlock();
                        /* this is safe as the caller must already hold
                         * another rcu read section or the mutex
                         */
                        return link_sta;
                }
        }
        rcu_read_unlock();
        return NULL;
}

struct ieee80211_sta *
ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw,
                                 const u8 *addr,
                                 const u8 *localaddr,
                                 unsigned int *link_id)
{
        struct ieee80211_local *local = hw_to_local(hw);
        struct link_sta_info *link_sta;
        struct rhlist_head *tmp;

        for_each_link_sta_info(local, addr, link_sta, tmp) {
                struct sta_info *sta = link_sta->sta;
                struct ieee80211_link_data *link;
                u8 _link_id = link_sta->link_id;

                if (!localaddr) {
                        if (link_id)
                                *link_id = _link_id;
                        return &sta->sta;
                }

                link = rcu_dereference(sta->sdata->link[_link_id]);
                if (!link)
                        continue;

                if (memcmp(link->conf->addr, localaddr, ETH_ALEN))
                        continue;

                if (link_id)
                        *link_id = _link_id;
                return &sta->sta;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs);

struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
                                       const u8 *sta_addr, const u8 *vif_addr)
{
        struct rhlist_head *tmp;
        struct sta_info *sta;

        for_each_sta_info(local, sta_addr, sta, tmp) {
                if (ether_addr_equal(vif_addr, sta->sdata->vif.addr))
                        return sta;
        }

        return NULL;
}

struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
                                     int idx)
{
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta;
        int i = 0;

        list_for_each_entry_rcu(sta, &local->sta_list, list,
                                lockdep_is_held(&local->hw.wiphy->mtx)) {
                if (sdata != sta->sdata)
                        continue;
                if (i < idx) {
                        ++i;
                        continue;
                }
                return sta;
        }

        return NULL;
}

static void sta_info_free_link(struct link_sta_info *link_sta)
{
        free_percpu(link_sta->pcpu_rx_stats);
}

static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
                            bool unhash)
{
        struct sta_link_alloc *alloc = NULL;
        struct link_sta_info *link_sta;

        lockdep_assert_wiphy(sta->local->hw.wiphy);

        link_sta = rcu_access_pointer(sta->link[link_id]);
        if (WARN_ON(!link_sta))
                return;

        if (unhash)
                link_sta_info_hash_del(sta->local, link_sta);

        if (test_sta_flag(sta, WLAN_STA_INSERTED))
                ieee80211_link_sta_debugfs_remove(link_sta);

        if (link_sta != &sta->deflink)
                alloc = container_of(link_sta, typeof(*alloc), info);

        sta->sta.valid_links &= ~BIT(link_id);
        RCU_INIT_POINTER(sta->link[link_id], NULL);
        RCU_INIT_POINTER(sta->sta.link[link_id], NULL);
        if (alloc) {
                sta_info_free_link(&alloc->info);
                kfree_rcu(alloc, rcu_head);
        }

        ieee80211_sta_recalc_aggregates(&sta->sta);
}

/**
 * sta_info_free - free STA
 *
 * @local: pointer to the global information
 * @sta: STA info to free
 *
 * This function must undo everything done by sta_info_alloc()
 * that may happen before sta_info_insert(). It may only be
 * called when sta_info_insert() has not been attempted (and
 * if that fails, the station is freed anyway.)
 */
void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
                struct link_sta_info *link_sta;

                link_sta = rcu_access_pointer(sta->link[i]);
                if (!link_sta)
                        continue;

                sta_remove_link(sta, i, false);
        }

        /*
         * If we had used sta_info_pre_move_state() then we might not
         * have gone through the state transitions down again, so do
         * it here now (and warn if it's inserted).
         *
         * This will clear state such as fast TX/RX that may have been
         * allocated during state transitions.
         */
        while (sta->sta_state > IEEE80211_STA_NONE) {
                int ret;

                WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED));

                ret = sta_info_move_state(sta, sta->sta_state - 1);
                if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret))
                        break;
        }

        if (sta->rate_ctrl)
                rate_control_free_sta(sta);

        sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);

        kfree(to_txq_info(sta->sta.txq[0]));
        kfree(rcu_dereference_raw(sta->sta.rates));
#ifdef CONFIG_MAC80211_MESH
        kfree(sta->mesh);
#endif

        sta_info_free_link(&sta->deflink);
        kfree(sta);
}

static int sta_info_hash_add(struct ieee80211_local *local,
                             struct sta_info *sta)
{
        return rhltable_insert(&local->sta_hash, &sta->hash_node,
                               sta_rht_params);
}

static void sta_deliver_ps_frames(struct work_struct *wk)
{
        struct sta_info *sta;

        sta = container_of(wk, struct sta_info, drv_deliver_wk);

        if (sta->dead)
                return;

        local_bh_disable();
        if (!test_sta_flag(sta, WLAN_STA_PS_STA))
                ieee80211_sta_ps_deliver_wakeup(sta);
        else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL))
                ieee80211_sta_ps_deliver_poll_response(sta);
        else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD))
                ieee80211_sta_ps_deliver_uapsd(sta);
        local_bh_enable();
}

static int sta_prepare_rate_control(struct ieee80211_local *local,
                                    struct sta_info *sta, gfp_t gfp)
{
        if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
                return 0;

        sta->rate_ctrl = local->rate_ctrl;
        sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl,
                                                     sta, gfp);
        if (!sta->rate_ctrl_priv)
                return -ENOMEM;

        return 0;
}

static int sta_info_alloc_link(struct ieee80211_local *local,
                               struct link_sta_info *link_info,
                               gfp_t gfp)
{
        struct ieee80211_hw *hw = &local->hw;
        int i;

        if (ieee80211_hw_check(hw, USES_RSS)) {
                link_info->pcpu_rx_stats =
                        alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
                if (!link_info->pcpu_rx_stats)
                        return -ENOMEM;
        }

        link_info->rx_stats.last_rx = jiffies;
        u64_stats_init(&link_info->rx_stats.syncp);

        ewma_signal_init(&link_info->rx_stats_avg.signal);
        ewma_avg_signal_init(&link_info->status_stats.avg_ack_signal);
        for (i = 0; i < ARRAY_SIZE(link_info->rx_stats_avg.chain_signal); i++)
                ewma_signal_init(&link_info->rx_stats_avg.chain_signal[i]);

        return 0;
}

static void sta_info_add_link(struct sta_info *sta,
                              unsigned int link_id,
                              struct link_sta_info *link_info,
                              struct ieee80211_link_sta *link_sta)
{
        link_info->sta = sta;
        link_info->link_id = link_id;
        link_info->pub = link_sta;
        link_info->pub->sta = &sta->sta;
        link_sta->link_id = link_id;
        rcu_assign_pointer(sta->link[link_id], link_info);
        rcu_assign_pointer(sta->sta.link[link_id], link_sta);

        link_sta->smps_mode = IEEE80211_SMPS_OFF;
        link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA;
}

static struct sta_info *
__sta_info_alloc(struct ieee80211_sub_if_data *sdata,
                 const u8 *addr, int link_id, const u8 *link_addr,
                 gfp_t gfp)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_hw *hw = &local->hw;
        struct sta_info *sta;
        void *txq_data;
        int size;
        int i;

        sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
        if (!sta)
                return NULL;

        sta->local = local;
        sta->sdata = sdata;

        if (sta_info_alloc_link(local, &sta->deflink, gfp))
                goto free;

        if (link_id >= 0) {
                sta_info_add_link(sta, link_id, &sta->deflink,
                                  &sta->sta.deflink);
                sta->sta.valid_links = BIT(link_id);
        } else {
                sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink);
        }

        sta->sta.cur = &sta->sta.deflink.agg;

        spin_lock_init(&sta->lock);
        spin_lock_init(&sta->ps_lock);
        INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
        wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
#ifdef CONFIG_MAC80211_MESH
        if (ieee80211_vif_is_mesh(&sdata->vif)) {
                sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
                if (!sta->mesh)
                        goto free;
                sta->mesh->plink_sta = sta;
                spin_lock_init(&sta->mesh->plink_lock);
                if (!sdata->u.mesh.user_mpm)
                        timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
                                    0);
                sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
        }
#endif

        memcpy(sta->addr, addr, ETH_ALEN);
        memcpy(sta->sta.addr, addr, ETH_ALEN);
        memcpy(sta->deflink.addr, link_addr, ETH_ALEN);
        memcpy(sta->sta.deflink.addr, link_addr, ETH_ALEN);
        sta->sta.max_rx_aggregation_subframes =
                local->hw.max_rx_aggregation_subframes;

        /* TODO link specific alloc and assignments for MLO Link STA */

        /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only.
         * The Tx path starts to use a key as soon as the key slot ptk_idx
         * references to is not NULL. To not use the initial Rx-only key
         * prematurely for Tx initialize ptk_idx to an impossible PTK keyid
         * which always will refer to a NULL key.
         */
        BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX);
        sta->ptk_idx = INVALID_PTK_KEYIDX;


        ieee80211_init_frag_cache(&sta->frags);

        sta->sta_state = IEEE80211_STA_NONE;

        if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
                sta->amsdu_mesh_control = -1;

        /* Mark TID as unreserved */
        sta->reserved_tid = IEEE80211_TID_UNRESERVED;

        sta->last_connected = ktime_get_seconds();

        size = sizeof(struct txq_info) +
               ALIGN(hw->txq_data_size, sizeof(void *));

        txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp);
        if (!txq_data)
                goto free;

        for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
                struct txq_info *txq = txq_data + i * size;

                /* might not do anything for the (bufferable) MMPDU TXQ */
                ieee80211_txq_init(sdata, sta, txq, i);
        }

        if (sta_prepare_rate_control(local, sta, gfp))
                goto free_txq;

        sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;

        for (i = 0; i < IEEE80211_NUM_ACS; i++) {
                skb_queue_head_init(&sta->ps_tx_buf[i]);
                skb_queue_head_init(&sta->tx_filtered[i]);
                sta->airtime[i].deficit = sta->airtime_weight;
                atomic_set(&sta->airtime[i].aql_tx_pending, 0);
                sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
                sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
        }

        for (i = 0; i < IEEE80211_NUM_TIDS; i++)
                sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);

        for (i = 0; i < NUM_NL80211_BANDS; i++) {
                u32 mandatory = 0;
                int r;

                if (!hw->wiphy->bands[i])
                        continue;

                switch (i) {
                case NL80211_BAND_2GHZ:
                case NL80211_BAND_LC:
                        /*
                         * We use both here, even if we cannot really know for
                         * sure the station will support both, but the only use
                         * for this is when we don't know anything yet and send
                         * management frames, and then we'll pick the lowest
                         * possible rate anyway.
                         * If we don't include _G here, we cannot find a rate
                         * in P2P, and thus trigger the WARN_ONCE() in rate.c
                         */
                        mandatory = IEEE80211_RATE_MANDATORY_B |
                                    IEEE80211_RATE_MANDATORY_G;
                        break;
                case NL80211_BAND_5GHZ:
                        mandatory = IEEE80211_RATE_MANDATORY_A;
                        break;
                case NL80211_BAND_60GHZ:
                        WARN_ON(1);
                        mandatory = 0;
                        break;
                }

                for (r = 0; r < hw->wiphy->bands[i]->n_bitrates; r++) {
                        struct ieee80211_rate *rate;

                        rate = &hw->wiphy->bands[i]->bitrates[r];

                        if (!(rate->flags & mandatory))
                                continue;
                        sta->sta.deflink.supp_rates[i] |= BIT(r);
                }
        }

        sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD;
        sta->cparams.target = MS2TIME(20);
        sta->cparams.interval = MS2TIME(100);
        sta->cparams.ecn = true;
        sta->cparams.ce_threshold_selector = 0;
        sta->cparams.ce_threshold_mask = 0;

        sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);

        return sta;

free_txq:
        kfree(to_txq_info(sta->sta.txq[0]));
free:
        sta_info_free_link(&sta->deflink);
#ifdef CONFIG_MAC80211_MESH
        kfree(sta->mesh);
#endif
        kfree(sta);
        return NULL;
}

struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
                                const u8 *addr, gfp_t gfp)
{
        return __sta_info_alloc(sdata, addr, -1, addr, gfp);
}

struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata,
                                          const u8 *mld_addr,
                                          unsigned int link_id,
                                          const u8 *link_addr,
                                          gfp_t gfp)
{
        return __sta_info_alloc(sdata, mld_addr, link_id, link_addr, gfp);
}

static int sta_info_insert_check(struct sta_info *sta)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;

        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        /*
         * Can't be a WARN_ON because it can be triggered through a race:
         * something inserts a STA (on one CPU) without holding the RTNL
         * and another CPU turns off the net device.
         */
        if (unlikely(!ieee80211_sdata_running(sdata)))
                return -ENETDOWN;

        if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) ||
                    !is_valid_ether_addr(sta->sta.addr)))
                return -EINVAL;

        /* The RCU read lock is required by rhashtable due to
         * asynchronous resize/rehash.  We also require the mutex
         * for correctness.
         */
        rcu_read_lock();
        if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) &&
            ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) {
                rcu_read_unlock();
                return -ENOTUNIQ;
        }
        rcu_read_unlock();

        return 0;
}

static int sta_info_insert_drv_state(struct ieee80211_local *local,
                                     struct ieee80211_sub_if_data *sdata,
                                     struct sta_info *sta)
{
        enum ieee80211_sta_state state;
        int err = 0;

        for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; state++) {
                err = drv_sta_state(local, sdata, sta, state, state + 1);
                if (err)
                        break;
        }

        if (!err) {
                /*
                 * Drivers using legacy sta_add/sta_remove callbacks only
                 * get uploaded set to true after sta_add is called.
                 */
                if (!local->ops->sta_add)
                        sta->uploaded = true;
                return 0;
        }

        if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
                sdata_info(sdata,
                           "failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n",
                           sta->sta.addr, state + 1, err);
                err = 0;
        }

        /* unwind on error */
        for (; state > IEEE80211_STA_NOTEXIST; state--)
                WARN_ON(drv_sta_state(local, sdata, sta, state, state - 1));

        return err;
}

static void
ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        bool allow_p2p_go_ps = sdata->vif.p2p;
        struct sta_info *sta;

        rcu_read_lock();
        list_for_each_entry_rcu(sta, &local->sta_list, list) {
                if (sdata != sta->sdata ||
                    !test_sta_flag(sta, WLAN_STA_ASSOC))
                        continue;
                if (!sta->sta.support_p2p_ps) {
                        allow_p2p_go_ps = false;
                        break;
                }
        }
        rcu_read_unlock();

        if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
                sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
                ieee80211_link_info_change_notify(sdata, &sdata->deflink,
                                                  BSS_CHANGED_P2P_PS);
        }
}

static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
{
        struct ieee80211_local *local = sta->local;
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct station_info *sinfo = NULL;
        int err = 0;

        lockdep_assert_wiphy(local->hw.wiphy);

        /* check if STA exists already */
        if (sta_info_get_bss(sdata, sta->sta.addr)) {
                err = -EEXIST;
                goto out_cleanup;
        }

        sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
        if (!sinfo) {
                err = -ENOMEM;
                goto out_cleanup;
        }

        local->num_sta++;
        local->sta_generation++;
        smp_mb();

        /* simplify things and don't accept BA sessions yet */
        set_sta_flag(sta, WLAN_STA_BLOCK_BA);

        /* make the station visible */
        err = sta_info_hash_add(local, sta);
        if (err)
                goto out_drop_sta;

        if (sta->sta.valid_links) {
                err = link_sta_info_hash_add(local, &sta->deflink);
                if (err) {
                        sta_info_hash_del(local, sta);
                        goto out_drop_sta;
                }
        }

        list_add_tail_rcu(&sta->list, &local->sta_list);

        /* update channel context before notifying the driver about state
         * change, this enables driver using the updated channel context right away.
         */
        if (sta->sta_state >= IEEE80211_STA_ASSOC) {
                ieee80211_recalc_min_chandef(sta->sdata, -1);
                if (!sta->sta.support_p2p_ps)
                        ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
        }

        /* notify driver */
        err = sta_info_insert_drv_state(local, sdata, sta);
        if (err)
                goto out_remove;

        set_sta_flag(sta, WLAN_STA_INSERTED);

        /* accept BA sessions now */
        clear_sta_flag(sta, WLAN_STA_BLOCK_BA);

        ieee80211_sta_debugfs_add(sta);
        rate_control_add_sta_debugfs(sta);
        if (sta->sta.valid_links) {
                int i;

                for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
                        struct link_sta_info *link_sta;

                        link_sta = rcu_dereference_protected(sta->link[i],
                                                             lockdep_is_held(&local->hw.wiphy->mtx));

                        if (!link_sta)
                                continue;

                        ieee80211_link_sta_debugfs_add(link_sta);
                        if (sdata->vif.active_links & BIT(i))
                                ieee80211_link_sta_debugfs_drv_add(link_sta);
                }
        } else {
                ieee80211_link_sta_debugfs_add(&sta->deflink);
                ieee80211_link_sta_debugfs_drv_add(&sta->deflink);
        }

        sinfo->generation = local->sta_generation;
        cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
        kfree(sinfo);

        sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);

        /* move reference to rcu-protected */
        rcu_read_lock();

        if (ieee80211_vif_is_mesh(&sdata->vif))
                mesh_accept_plinks_update(sdata);

        ieee80211_check_fast_xmit(sta);

        return 0;
 out_remove:
        if (sta->sta.valid_links)
                link_sta_info_hash_del(local, &sta->deflink);
        sta_info_hash_del(local, sta);
        list_del_rcu(&sta->list);
 out_drop_sta:
        local->num_sta--;
        synchronize_net();
 out_cleanup:
        cleanup_single_sta(sta);
        kfree(sinfo);
        rcu_read_lock();
        return err;
}

int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
{
        struct ieee80211_local *local = sta->local;
        int err;

        might_sleep();
        lockdep_assert_wiphy(local->hw.wiphy);

        err = sta_info_insert_check(sta);
        if (err) {
                sta_info_free(local, sta);
                rcu_read_lock();
                return err;
        }

        return sta_info_insert_finish(sta);
}

int sta_info_insert(struct sta_info *sta)
{
        int err = sta_info_insert_rcu(sta);

        rcu_read_unlock();

        return err;
}

static inline void __bss_tim_set(u8 *tim, u16 id)
{
        /*
         * This format has been mandated by the IEEE specifications,
         * so this line may not be changed to use the __set_bit() format.
         */
        tim[id / 8] |= (1 << (id % 8));
}

static inline void __bss_tim_clear(u8 *tim, u16 id)
{
        /*
         * This format has been mandated by the IEEE specifications,
         * so this line may not be changed to use the __clear_bit() format.
         */
        tim[id / 8] &= ~(1 << (id % 8));
}

static inline bool __bss_tim_get(u8 *tim, u16 id)
{
        /*
         * This format has been mandated by the IEEE specifications,
         * so this line may not be changed to use the test_bit() format.
         */
        return tim[id / 8] & (1 << (id % 8));
}

static unsigned long ieee80211_tids_for_ac(int ac)
{
        /* If we ever support TIDs > 7, this obviously needs to be adjusted */
        switch (ac) {
        case IEEE80211_AC_VO:
                return BIT(6) | BIT(7);
        case IEEE80211_AC_VI:
                return BIT(4) | BIT(5);
        case IEEE80211_AC_BE:
                return BIT(0) | BIT(3);
        case IEEE80211_AC_BK:
                return BIT(1) | BIT(2);
        default:
                WARN_ON(1);
                return 0;
        }
}

static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
{
        struct ieee80211_local *local = sta->local;
        struct ps_data *ps;
        bool indicate_tim = false;
        u8 ignore_for_tim = sta->sta.uapsd_queues;
        int ac;
        u16 id = sta->sta.aid;

        if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
            sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
                if (WARN_ON_ONCE(!sta->sdata->bss))
                        return;

                ps = &sta->sdata->bss->ps;
#ifdef CONFIG_MAC80211_MESH
        } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
                ps = &sta->sdata->u.mesh.ps;
#endif
        } else {
                return;
        }

        /* No need to do anything if the driver does all */
        if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
                return;

        if (sta->dead)
                goto done;

        /*
         * If all ACs are delivery-enabled then we should build
         * the TIM bit for all ACs anyway; if only some are then
         * we ignore those and build the TIM bit using only the
         * non-enabled ones.
         */
        if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1)
                ignore_for_tim = 0;

        if (ignore_pending)
                ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1;

        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                unsigned long tids;

                if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac])
                        continue;

                indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) ||
                                !skb_queue_empty(&sta->ps_tx_buf[ac]);
                if (indicate_tim)
                        break;

                tids = ieee80211_tids_for_ac(ac);

                indicate_tim |=
                        sta->driver_buffered_tids & tids;
                indicate_tim |=
                        sta->txq_buffered_tids & tids;
        }

 done:
        spin_lock_bh(&local->tim_lock);

        if (indicate_tim == __bss_tim_get(ps->tim, id))
                goto out_unlock;

        if (indicate_tim)
                __bss_tim_set(ps->tim, id);
        else
                __bss_tim_clear(ps->tim, id);

        if (local->ops->set_tim && !WARN_ON(sta->dead)) {
                local->tim_in_locked_section = true;
                drv_set_tim(local, &sta->sta, indicate_tim);
                local->tim_in_locked_section = false;
        }

out_unlock:
        spin_unlock_bh(&local->tim_lock);
}

void sta_info_recalc_tim(struct sta_info *sta)
{
        __sta_info_recalc_tim(sta, false);
}

static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb)
{
        struct ieee80211_tx_info *info;
        int timeout;

        if (!skb)
                return false;

        info = IEEE80211_SKB_CB(skb);

        /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */
        timeout = (sta->listen_interval *
                   sta->sdata->vif.bss_conf.beacon_int *
                   32 / 15625) * HZ;
        if (timeout < STA_TX_BUFFER_EXPIRE)
                timeout = STA_TX_BUFFER_EXPIRE;
        return time_after(jiffies, info->control.jiffies + timeout);
}


static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local,
                                                struct sta_info *sta, int ac)
{
        unsigned long flags;
        struct sk_buff *skb;

        /*
         * First check for frames that should expire on the filtered
         * queue. Frames here were rejected by the driver and are on
         * a separate queue to avoid reordering with normal PS-buffered
         * frames. They also aren't accounted for right now in the
         * total_ps_buffered counter.
         */
        for (;;) {
                spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
                skb = skb_peek(&sta->tx_filtered[ac]);
                if (sta_info_buffer_expired(sta, skb))
                        skb = __skb_dequeue(&sta->tx_filtered[ac]);
                else
                        skb = NULL;
                spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);

                /*
                 * Frames are queued in order, so if this one
                 * hasn't expired yet we can stop testing. If
                 * we actually reached the end of the queue we
                 * also need to stop, of course.
                 */
                if (!skb)
                        break;
                ieee80211_free_txskb(&local->hw, skb);
        }

        /*
         * Now also check the normal PS-buffered queue, this will
         * only find something if the filtered queue was emptied
         * since the filtered frames are all before the normal PS
         * buffered frames.
         */
        for (;;) {
                spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
                skb = skb_peek(&sta->ps_tx_buf[ac]);
                if (sta_info_buffer_expired(sta, skb))
                        skb = __skb_dequeue(&sta->ps_tx_buf[ac]);
                else
                        skb = NULL;
                spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);

                /*
                 * frames are queued in order, so if this one
                 * hasn't expired yet (or we reached the end of
                 * the queue) we can stop testing
                 */
                if (!skb)
                        break;

                local->total_ps_buffered--;
                ps_dbg(sta->sdata, "Buffered frame expired (STA %pM)\n",
                       sta->sta.addr);
                ieee80211_free_txskb(&local->hw, skb);
        }

        /*
         * Finally, recalculate the TIM bit for this station -- it might
         * now be clear because the station was too slow to retrieve its
         * frames.
         */
        sta_info_recalc_tim(sta);

        /*
         * Return whether there are any frames still buffered, this is
         * used to check whether the cleanup timer still needs to run,
         * if there are no frames we don't need to rearm the timer.
         */
        return !(skb_queue_empty(&sta->ps_tx_buf[ac]) &&
                 skb_queue_empty(&sta->tx_filtered[ac]));
}

static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
                                             struct sta_info *sta)
{
        bool have_buffered = false;
        int ac;

        /* This is only necessary for stations on BSS/MBSS interfaces */
        if (!sta->sdata->bss &&
            !ieee80211_vif_is_mesh(&sta->sdata->vif))
                return false;

        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                have_buffered |=
                        sta_info_cleanup_expire_buffered_ac(local, sta, ac);

        return have_buffered;
}

static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
{
        struct ieee80211_local *local;
        struct ieee80211_sub_if_data *sdata;
        int ret, i;

        might_sleep();

        if (!sta)
                return -ENOENT;

        local = sta->local;
        sdata = sta->sdata;

        lockdep_assert_wiphy(local->hw.wiphy);

        /*
         * Before removing the station from the driver and
         * rate control, it might still start new aggregation
         * sessions -- block that to make sure the tear-down
         * will be sufficient.
         */
        set_sta_flag(sta, WLAN_STA_BLOCK_BA);
        ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA);

        /*
         * Before removing the station from the driver there might be pending
         * rx frames on RSS queues sent prior to the disassociation - wait for
         * all such frames to be processed.
         */
        drv_sync_rx_queues(local, sta);

        for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
                struct link_sta_info *link_sta;

                if (!(sta->sta.valid_links & BIT(i)))
                        continue;

                link_sta = rcu_dereference_protected(sta->link[i],
                                                     lockdep_is_held(&local->hw.wiphy->mtx));

                link_sta_info_hash_del(local, link_sta);
        }

        ret = sta_info_hash_del(local, sta);
        if (WARN_ON(ret))
                return ret;

        /*
         * for TDLS peers, make sure to return to the base channel before
         * removal.
         */
        if (test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) {
                drv_tdls_cancel_channel_switch(local, sdata, &sta->sta);
                clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL);
        }

        list_del_rcu(&sta->list);
        sta->removed = true;

        if (sta->uploaded)
                drv_sta_pre_rcu_remove(local, sta->sdata, sta);

        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
            rcu_access_pointer(sdata->u.vlan.sta) == sta)
                RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);

        return 0;
}

static int _sta_info_move_state(struct sta_info *sta,
                                enum ieee80211_sta_state new_state,
                                bool recalc)
{
        struct ieee80211_local *local = sta->local;

        might_sleep();

        if (sta->sta_state == new_state)
                return 0;

        /* check allowed transitions first */

        switch (new_state) {
        case IEEE80211_STA_NONE:
                if (sta->sta_state != IEEE80211_STA_AUTH)
                        return -EINVAL;
                break;
        case IEEE80211_STA_AUTH:
                if (sta->sta_state != IEEE80211_STA_NONE &&
                    sta->sta_state != IEEE80211_STA_ASSOC)
                        return -EINVAL;
                break;
        case IEEE80211_STA_ASSOC:
                if (sta->sta_state != IEEE80211_STA_AUTH &&
                    sta->sta_state != IEEE80211_STA_AUTHORIZED)
                        return -EINVAL;
                break;
        case IEEE80211_STA_AUTHORIZED:
                if (sta->sta_state != IEEE80211_STA_ASSOC)
                        return -EINVAL;
                break;
        default:
                WARN(1, "invalid state %d", new_state);
                return -EINVAL;
        }

        sta_dbg(sta->sdata, "moving STA %pM to state %d\n",
                sta->sta.addr, new_state);

        /* notify the driver before the actual changes so it can
         * fail the transition
         */
        if (test_sta_flag(sta, WLAN_STA_INSERTED)) {
                int err = drv_sta_state(sta->local, sta->sdata, sta,
                                        sta->sta_state, new_state);
                if (err)
                        return err;
        }

        /* reflect the change in all state variables */

        switch (new_state) {
        case IEEE80211_STA_NONE:
                if (sta->sta_state == IEEE80211_STA_AUTH)
                        clear_bit(WLAN_STA_AUTH, &sta->_flags);
                break;
        case IEEE80211_STA_AUTH:
                if (sta->sta_state == IEEE80211_STA_NONE) {
                        set_bit(WLAN_STA_AUTH, &sta->_flags);
                } else if (sta->sta_state == IEEE80211_STA_ASSOC) {
                        clear_bit(WLAN_STA_ASSOC, &sta->_flags);
                        if (recalc) {
                                ieee80211_recalc_min_chandef(sta->sdata, -1);
                                if (!sta->sta.support_p2p_ps)
                                        ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
                        }
                }
                break;
        case IEEE80211_STA_ASSOC:
                if (sta->sta_state == IEEE80211_STA_AUTH) {
                        set_bit(WLAN_STA_ASSOC, &sta->_flags);
                        sta->assoc_at = ktime_get_boottime_ns();
                        if (recalc) {
                                ieee80211_recalc_min_chandef(sta->sdata, -1);
                                if (!sta->sta.support_p2p_ps)
                                        ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
                        }
                } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
                        ieee80211_vif_dec_num_mcast(sta->sdata);
                        clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);

                        /*
                         * If we have encryption offload, flush (station) queues
                         * (after ensuring concurrent TX completed) so we won't
                         * transmit anything later unencrypted if/when keys are
                         * also removed, which might otherwise happen depending
                         * on how the hardware offload works.
                         */
                        if (local->ops->set_key) {
                                synchronize_net();
                                if (local->ops->flush_sta)
                                        drv_flush_sta(local, sta->sdata, sta);
                                else
                                        ieee80211_flush_queues(local,
                                                               sta->sdata,
                                                               false);
                        }

                        ieee80211_clear_fast_xmit(sta);
                        ieee80211_clear_fast_rx(sta);
                }
                break;
        case IEEE80211_STA_AUTHORIZED:
                if (sta->sta_state == IEEE80211_STA_ASSOC) {
                        ieee80211_vif_inc_num_mcast(sta->sdata);
                        set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
                        ieee80211_check_fast_xmit(sta);
                        ieee80211_check_fast_rx(sta);
                }
                if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
                    sta->sdata->vif.type == NL80211_IFTYPE_AP)
                        cfg80211_send_layer2_update(sta->sdata->dev,
                                                    sta->sta.addr);
                break;
        default:
                break;
        }

        sta->sta_state = new_state;

        return 0;
}

int sta_info_move_state(struct sta_info *sta,
                        enum ieee80211_sta_state new_state)
{
        return _sta_info_move_state(sta, new_state, true);
}

static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
{
        struct ieee80211_local *local = sta->local;
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct station_info *sinfo;
        int ret;

        /*
         * NOTE: This assumes at least synchronize_net() was done
         *         after _part1 and before _part2!
         */

        /*
         * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA
         * but someone might have just gotten past a check, and not yet into
         * queuing the work/creating the data/etc.
         *
         * Do another round of destruction so that the worker is certainly
         * canceled before we later free the station.
         *
         * Since this is after synchronize_rcu()/synchronize_net() we're now
         * certain that nobody can actually hold a reference to the STA and
         * be calling e.g. ieee80211_start_tx_ba_session().
         */
        ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA);

        might_sleep();
        lockdep_assert_wiphy(local->hw.wiphy);

        if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
                ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc);
                WARN_ON_ONCE(ret);
        }

        /* now keys can no longer be reached */
        ieee80211_free_sta_keys(local, sta);

        /* disable TIM bit - last chance to tell driver */
        __sta_info_recalc_tim(sta, true);

        sta->dead = true;

        local->num_sta--;
        local->sta_generation++;

        while (sta->sta_state > IEEE80211_STA_NONE) {
                ret = _sta_info_move_state(sta, sta->sta_state - 1, recalc);
                if (ret) {
                        WARN_ON_ONCE(1);
                        break;
                }
        }

        if (sta->uploaded) {
                ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE,
                                    IEEE80211_STA_NOTEXIST);
                WARN_ON_ONCE(ret != 0);
        }

        sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);

        sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
        if (sinfo)
                sta_set_sinfo(sta, sinfo, true);
        cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
        kfree(sinfo);

        ieee80211_sta_debugfs_remove(sta);

        ieee80211_destroy_frag_cache(&sta->frags);

        cleanup_single_sta(sta);
}

int __must_check __sta_info_destroy(struct sta_info *sta)
{
        int err = __sta_info_destroy_part1(sta);

        if (err)
                return err;

        synchronize_net();

        __sta_info_destroy_part2(sta, true);

        return 0;
}

int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr)
{
        struct sta_info *sta;

        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        sta = sta_info_get(sdata, addr);
        return __sta_info_destroy(sta);
}

int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata,
                              const u8 *addr)
{
        struct sta_info *sta;

        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        sta = sta_info_get_bss(sdata, addr);
        return __sta_info_destroy(sta);
}

static void sta_info_cleanup(struct timer_list *t)
{
        struct ieee80211_local *local = from_timer(local, t, sta_cleanup);
        struct sta_info *sta;
        bool timer_needed = false;

        rcu_read_lock();
        list_for_each_entry_rcu(sta, &local->sta_list, list)
                if (sta_info_cleanup_expire_buffered(local, sta))
                        timer_needed = true;
        rcu_read_unlock();

        if (local->quiescing)
                return;

        if (!timer_needed)
                return;

        mod_timer(&local->sta_cleanup,
                  round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
}

int sta_info_init(struct ieee80211_local *local)
{
        int err;

        err = rhltable_init(&local->sta_hash, &sta_rht_params);
        if (err)
                return err;

        err = rhltable_init(&local->link_sta_hash, &link_sta_rht_params);
        if (err) {
                rhltable_destroy(&local->sta_hash);
                return err;
        }

        spin_lock_init(&local->tim_lock);
        INIT_LIST_HEAD(&local->sta_list);

        timer_setup(&local->sta_cleanup, sta_info_cleanup, 0);
        return 0;
}

void sta_info_stop(struct ieee80211_local *local)
{
        del_timer_sync(&local->sta_cleanup);
        rhltable_destroy(&local->sta_hash);
        rhltable_destroy(&local->link_sta_hash);
}


int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans,
                     int link_id)
{
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta, *tmp;
        LIST_HEAD(free_list);
        int ret = 0;

        might_sleep();
        lockdep_assert_wiphy(local->hw.wiphy);

        WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP);
        WARN_ON(vlans && !sdata->bss);

        list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
                if (sdata != sta->sdata &&
                    (!vlans || sdata->bss != sta->sdata->bss))
                        continue;

                if (link_id >= 0 && sta->sta.valid_links &&
                    !(sta->sta.valid_links & BIT(link_id)))
                        continue;

                if (!WARN_ON(__sta_info_destroy_part1(sta)))
                        list_add(&sta->free_list, &free_list);

                ret++;
        }

        if (!list_empty(&free_list)) {
                bool support_p2p_ps = true;

                synchronize_net();
                list_for_each_entry_safe(sta, tmp, &free_list, free_list) {
                        if (!sta->sta.support_p2p_ps)
                                support_p2p_ps = false;
                        __sta_info_destroy_part2(sta, false);
                }

                ieee80211_recalc_min_chandef(sdata, -1);
                if (!support_p2p_ps)
                        ieee80211_recalc_p2p_go_ps_allowed(sdata);
        }

        return ret;
}

void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
                          unsigned long exp_time)
{
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta, *tmp;

        lockdep_assert_wiphy(local->hw.wiphy);

        list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
                unsigned long last_active = ieee80211_sta_last_active(sta);

                if (sdata != sta->sdata)
                        continue;

                if (time_is_before_jiffies(last_active + exp_time)) {
                        sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
                                sta->sta.addr);

                        if (ieee80211_vif_is_mesh(&sdata->vif) &&
                            test_sta_flag(sta, WLAN_STA_PS_STA))
                                atomic_dec(&sdata->u.mesh.ps.num_sta_ps);

                        WARN_ON(__sta_info_destroy(sta));
                }
        }
}

struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
                                                   const u8 *addr,
                                                   const u8 *localaddr)
{
        struct ieee80211_local *local = hw_to_local(hw);
        struct rhlist_head *tmp;
        struct sta_info *sta;

        /*
         * Just return a random station if localaddr is NULL
         * ... first in list.
         */
        for_each_sta_info(local, addr, sta, tmp) {
                if (localaddr &&
                    !ether_addr_equal(sta->sdata->vif.addr, localaddr))
                        continue;
                if (!sta->uploaded)
                        return NULL;
                return &sta->sta;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr);

struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
                                         const u8 *addr)
{
        struct sta_info *sta;

        if (!vif)
                return NULL;

        sta = sta_info_get_bss(vif_to_sdata(vif), addr);
        if (!sta)
                return NULL;

        if (!sta->uploaded)
                return NULL;

        return &sta->sta;
}
EXPORT_SYMBOL(ieee80211_find_sta);

/* powersave support code */
void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        struct sk_buff_head pending;
        int filtered = 0, buffered = 0, ac, i;
        unsigned long flags;
        struct ps_data *ps;

        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
                                     u.ap);

        if (sdata->vif.type == NL80211_IFTYPE_AP)
                ps = &sdata->bss->ps;
        else if (ieee80211_vif_is_mesh(&sdata->vif))
                ps = &sdata->u.mesh.ps;
        else
                return;

        clear_sta_flag(sta, WLAN_STA_SP);

        BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1);
        sta->driver_buffered_tids = 0;
        sta->txq_buffered_tids = 0;

        if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
                drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);

        for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
                if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i]))
                        continue;

                schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i]));
        }

        skb_queue_head_init(&pending);

        /* sync with ieee80211_tx_h_unicast_ps_buf */
        spin_lock(&sta->ps_lock);
        /* Send all buffered frames to the station */
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                int count = skb_queue_len(&pending), tmp;

                spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
                skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending);
                spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);
                tmp = skb_queue_len(&pending);
                filtered += tmp - count;
                count = tmp;

                spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
                skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending);
                spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);
                tmp = skb_queue_len(&pending);
                buffered += tmp - count;
        }

        ieee80211_add_pending_skbs(local, &pending);

        /* now we're no longer in the deliver code */
        clear_sta_flag(sta, WLAN_STA_PS_DELIVER);

        /* The station might have polled and then woken up before we responded,
         * so clear these flags now to avoid them sticking around.
         */
        clear_sta_flag(sta, WLAN_STA_PSPOLL);
        clear_sta_flag(sta, WLAN_STA_UAPSD);
        spin_unlock(&sta->ps_lock);

        atomic_dec(&ps->num_sta_ps);

        local->total_ps_buffered -= buffered;

        sta_info_recalc_tim(sta);

        ps_dbg(sdata,
               "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n",
               sta->sta.addr, sta->sta.aid, filtered, buffered);

        ieee80211_check_fast_xmit(sta);
}

static void ieee80211_send_null_response(struct sta_info *sta, int tid,
                                         enum ieee80211_frame_release_type reason,
                                         bool call_driver, bool more_data)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_qos_hdr *nullfunc;
        struct sk_buff *skb;
        int size = sizeof(*nullfunc);
        __le16 fc;
        bool qos = sta->sta.wme;
        struct ieee80211_tx_info *info;
        struct ieee80211_chanctx_conf *chanctx_conf;

        if (qos) {
                fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                 IEEE80211_STYPE_QOS_NULLFUNC |
                                 IEEE80211_FCTL_FROMDS);
        } else {
                size -= 2;
                fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                 IEEE80211_STYPE_NULLFUNC |
                                 IEEE80211_FCTL_FROMDS);
        }

        skb = dev_alloc_skb(local->hw.extra_tx_headroom + size);
        if (!skb)
                return;

        skb_reserve(skb, local->hw.extra_tx_headroom);

        nullfunc = skb_put(skb, size);
        nullfunc->frame_control = fc;
        nullfunc->duration_id = 0;
        memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
        memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
        memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN);
        nullfunc->seq_ctrl = 0;

        skb->priority = tid;
        skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]);
        if (qos) {
                nullfunc->qos_ctrl = cpu_to_le16(tid);

                if (reason == IEEE80211_FRAME_RELEASE_UAPSD) {
                        nullfunc->qos_ctrl |=
                                cpu_to_le16(IEEE80211_QOS_CTL_EOSP);
                        if (more_data)
                                nullfunc->frame_control |=
                                        cpu_to_le16(IEEE80211_FCTL_MOREDATA);
                }
        }

        info = IEEE80211_SKB_CB(skb);

        /*
         * Tell TX path to send this frame even though the
         * STA may still remain is PS mode after this frame
         * exchange. Also set EOSP to indicate this packet
         * ends the poll/service period.
         */
        info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
                       IEEE80211_TX_STATUS_EOSP |
                       IEEE80211_TX_CTL_REQ_TX_STATUS;

        info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE;

        if (call_driver)
                drv_allow_buffered_frames(local, sta, BIT(tid), 1,
                                          reason, false);

        skb->dev = sdata->dev;

        rcu_read_lock();
        chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
        if (WARN_ON(!chanctx_conf)) {
                rcu_read_unlock();
                kfree_skb(skb);
                return;
        }

        info->band = chanctx_conf->def.chan->band;
        ieee80211_xmit(sdata, sta, skb);
        rcu_read_unlock();
}

static int find_highest_prio_tid(unsigned long tids)
{
        /* lower 3 TIDs aren't ordered perfectly */
        if (tids & 0xF8)
                return fls(tids) - 1;
        /* TID 0 is BE just like TID 3 */
        if (tids & BIT(0))
                return 0;
        return fls(tids) - 1;
}

/* Indicates if the MORE_DATA bit should be set in the last
 * frame obtained by ieee80211_sta_ps_get_frames.
 * Note that driver_release_tids is relevant only if
 * reason = IEEE80211_FRAME_RELEASE_PSPOLL
 */
static bool
ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs,
                           enum ieee80211_frame_release_type reason,
                           unsigned long driver_release_tids)
{
        int ac;

        /* If the driver has data on more than one TID then
         * certainly there's more data if we release just a
         * single frame now (from a single TID). This will
         * only happen for PS-Poll.
         */
        if (reason == IEEE80211_FRAME_RELEASE_PSPOLL &&
            hweight16(driver_release_tids) > 1)
                return true;

        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
                        continue;

                if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
                    !skb_queue_empty(&sta->ps_tx_buf[ac]))
                        return true;
        }

        return false;
}

static void
ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs,
                            enum ieee80211_frame_release_type reason,
                            struct sk_buff_head *frames,
                            unsigned long *driver_release_tids)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        int ac;

        /* Get response frame(s) and more data bit for the last one. */
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                unsigned long tids;

                if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
                        continue;

                tids = ieee80211_tids_for_ac(ac);

                /* if we already have frames from software, then we can't also
                 * release from hardware queues
                 */
                if (skb_queue_empty(frames)) {
                        *driver_release_tids |=
                                sta->driver_buffered_tids & tids;
                        *driver_release_tids |= sta->txq_buffered_tids & tids;
                }

                if (!*driver_release_tids) {
                        struct sk_buff *skb;

                        while (n_frames > 0) {
                                skb = skb_dequeue(&sta->tx_filtered[ac]);
                                if (!skb) {
                                        skb = skb_dequeue(
                                                &sta->ps_tx_buf[ac]);
                                        if (skb)
                                                local->total_ps_buffered--;
                                }
                                if (!skb)
                                        break;
                                n_frames--;
                                __skb_queue_tail(frames, skb);
                        }
                }

                /* If we have more frames buffered on this AC, then abort the
                 * loop since we can't send more data from other ACs before
                 * the buffered frames from this.
                 */
                if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
                    !skb_queue_empty(&sta->ps_tx_buf[ac]))
                        break;
        }
}

static void
ieee80211_sta_ps_deliver_response(struct sta_info *sta,
                                  int n_frames, u8 ignored_acs,
                                  enum ieee80211_frame_release_type reason)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        unsigned long driver_release_tids = 0;
        struct sk_buff_head frames;
        bool more_data;

        /* Service or PS-Poll period starts */
        set_sta_flag(sta, WLAN_STA_SP);

        __skb_queue_head_init(&frames);

        ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason,
                                    &frames, &driver_release_tids);

        more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);

        if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
                driver_release_tids =
                        BIT(find_highest_prio_tid(driver_release_tids));

        if (skb_queue_empty(&frames) && !driver_release_tids) {
                int tid, ac;

                /*
                 * For PS-Poll, this can only happen due to a race condition
                 * when we set the TIM bit and the station notices it, but
                 * before it can poll for the frame we expire it.
                 *
                 * For uAPSD, this is said in the standard (11.2.1.5 h):
                 *        At each unscheduled SP for a non-AP STA, the AP shall
                 *        attempt to transmit at least one MSDU or MMPDU, but no
                 *        more than the value specified in the Max SP Length field
                 *        in the QoS Capability element from delivery-enabled ACs,
                 *        that are destined for the non-AP STA.
                 *
                 * Since we have no other MSDU/MMPDU, transmit a QoS null frame.
                 */

                /* This will evaluate to 1, 3, 5 or 7. */
                for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++)
                        if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac]))
                                break;
                tid = 7 - 2 * ac;

                ieee80211_send_null_response(sta, tid, reason, true, false);
        } else if (!driver_release_tids) {
                struct sk_buff_head pending;
                struct sk_buff *skb;
                int num = 0;
                u16 tids = 0;
                bool need_null = false;

                skb_queue_head_init(&pending);

                while ((skb = __skb_dequeue(&frames))) {
                        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
                        struct ieee80211_hdr *hdr = (void *) skb->data;
                        u8 *qoshdr = NULL;

                        num++;

                        /*
                         * Tell TX path to send this frame even though the
                         * STA may still remain is PS mode after this frame
                         * exchange.
                         */
                        info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
                        info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE;

                        /*
                         * Use MoreData flag to indicate whether there are
                         * more buffered frames for this STA
                         */
                        if (more_data || !skb_queue_empty(&frames))
                                hdr->frame_control |=
                                        cpu_to_le16(IEEE80211_FCTL_MOREDATA);
                        else
                                hdr->frame_control &=
                                        cpu_to_le16(~IEEE80211_FCTL_MOREDATA);

                        if (ieee80211_is_data_qos(hdr->frame_control) ||
                            ieee80211_is_qos_nullfunc(hdr->frame_control))
                                qoshdr = ieee80211_get_qos_ctl(hdr);

                        tids |= BIT(skb->priority);

                        __skb_queue_tail(&pending, skb);

                        /* end service period after last frame or add one */
                        if (!skb_queue_empty(&frames))
                                continue;

                        if (reason != IEEE80211_FRAME_RELEASE_UAPSD) {
                                /* for PS-Poll, there's only one frame */
                                info->flags |= IEEE80211_TX_STATUS_EOSP |
                                               IEEE80211_TX_CTL_REQ_TX_STATUS;
                                break;
                        }

                        /* For uAPSD, things are a bit more complicated. If the
                         * last frame has a QoS header (i.e. is a QoS-data or
                         * QoS-nulldata frame) then just set the EOSP bit there
                         * and be done.
                         * If the frame doesn't have a QoS header (which means
                         * it should be a bufferable MMPDU) then we can't set
                         * the EOSP bit in the QoS header; add a QoS-nulldata
                         * frame to the list to send it after the MMPDU.
                         *
                         * Note that this code is only in the mac80211-release
                         * code path, we assume that the driver will not buffer
                         * anything but QoS-data frames, or if it does, will
                         * create the QoS-nulldata frame by itself if needed.
                         *
                         * Cf. 802.11-2012 10.2.1.10 (c).
                         */
                        if (qoshdr) {
                                *qoshdr |= IEEE80211_QOS_CTL_EOSP;

                                info->flags |= IEEE80211_TX_STATUS_EOSP |
                                               IEEE80211_TX_CTL_REQ_TX_STATUS;
                        } else {
                                /* The standard isn't completely clear on this
                                 * as it says the more-data bit should be set
                                 * if there are more BUs. The QoS-Null frame
                                 * we're about to send isn't buffered yet, we
                                 * only create it below, but let's pretend it
                                 * was buffered just in case some clients only
                                 * expect more-data=0 when eosp=1.
                                 */
                                hdr->frame_control |=
                                        cpu_to_le16(IEEE80211_FCTL_MOREDATA);
                                need_null = true;
                                num++;
                        }
                        break;
                }

                drv_allow_buffered_frames(local, sta, tids, num,
                                          reason, more_data);

                ieee80211_add_pending_skbs(local, &pending);

                if (need_null)
                        ieee80211_send_null_response(
                                sta, find_highest_prio_tid(tids),
                                reason, false, false);

                sta_info_recalc_tim(sta);
        } else {
                int tid;

                /*
                 * We need to release a frame that is buffered somewhere in the
                 * driver ... it'll have to handle that.
                 * Note that the driver also has to check the number of frames
                 * on the TIDs we're releasing from - if there are more than
                 * n_frames it has to set the more-data bit (if we didn't ask
                 * it to set it anyway due to other buffered frames); if there
                 * are fewer than n_frames it has to make sure to adjust that
                 * to allow the service period to end properly.
                 */
                drv_release_buffered_frames(local, sta, driver_release_tids,
                                            n_frames, reason, more_data);

                /*
                 * Note that we don't recalculate the TIM bit here as it would
                 * most likely have no effect at all unless the driver told us
                 * that the TID(s) became empty before returning here from the
                 * release function.
                 * Either way, however, when the driver tells us that the TID(s)
                 * became empty or we find that a txq became empty, we'll do the
                 * TIM recalculation.
                 */

                for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
                        if (!sta->sta.txq[tid] ||
                            !(driver_release_tids & BIT(tid)) ||
                            txq_has_queue(sta->sta.txq[tid]))
                                continue;

                        sta_info_recalc_tim(sta);
                        break;
                }
        }
}

void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta)
{
        u8 ignore_for_response = sta->sta.uapsd_queues;

        /*
         * If all ACs are delivery-enabled then we should reply
         * from any of them, if only some are enabled we reply
         * only from the non-enabled ones.
         */
        if (ignore_for_response == BIT(IEEE80211_NUM_ACS) - 1)
                ignore_for_response = 0;

        ieee80211_sta_ps_deliver_response(sta, 1, ignore_for_response,
                                          IEEE80211_FRAME_RELEASE_PSPOLL);
}

void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta)
{
        int n_frames = sta->sta.max_sp;
        u8 delivery_enabled = sta->sta.uapsd_queues;

        /*
         * If we ever grow support for TSPEC this might happen if
         * the TSPEC update from hostapd comes in between a trigger
         * frame setting WLAN_STA_UAPSD in the RX path and this
         * actually getting called.
         */
        if (!delivery_enabled)
                return;

        switch (sta->sta.max_sp) {
        case 1:
                n_frames = 2;
                break;
        case 2:
                n_frames = 4;
                break;
        case 3:
                n_frames = 6;
                break;
        case 0:
                /* XXX: what is a good value? */
                n_frames = 128;
                break;
        }

        ieee80211_sta_ps_deliver_response(sta, n_frames, ~delivery_enabled,
                                          IEEE80211_FRAME_RELEASE_UAPSD);
}

void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
                               struct ieee80211_sta *pubsta, bool block)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

        trace_api_sta_block_awake(sta->local, pubsta, block);

        if (block) {
                set_sta_flag(sta, WLAN_STA_PS_DRIVER);
                ieee80211_clear_fast_xmit(sta);
                return;
        }

        if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER))
                return;

        if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
                set_sta_flag(sta, WLAN_STA_PS_DELIVER);
                clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
                ieee80211_queue_work(hw, &sta->drv_deliver_wk);
        } else if (test_sta_flag(sta, WLAN_STA_PSPOLL) ||
                   test_sta_flag(sta, WLAN_STA_UAPSD)) {
                /* must be asleep in this case */
                clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
                ieee80211_queue_work(hw, &sta->drv_deliver_wk);
        } else {
                clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
                ieee80211_check_fast_xmit(sta);
        }
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);

void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
        struct ieee80211_local *local = sta->local;

        trace_api_eosp(local, pubsta);

        clear_sta_flag(sta, WLAN_STA_SP);
}
EXPORT_SYMBOL(ieee80211_sta_eosp);

void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
        enum ieee80211_frame_release_type reason;
        bool more_data;

        trace_api_send_eosp_nullfunc(sta->local, pubsta, tid);

        reason = IEEE80211_FRAME_RELEASE_UAPSD;
        more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues,
                                               reason, 0);

        ieee80211_send_null_response(sta, tid, reason, false, more_data);
}
EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc);

void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
                                u8 tid, bool buffered)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

        if (WARN_ON(tid >= IEEE80211_NUM_TIDS))
                return;

        trace_api_sta_set_buffered(sta->local, pubsta, tid, buffered);

        if (buffered)
                set_bit(tid, &sta->driver_buffered_tids);
        else
                clear_bit(tid, &sta->driver_buffered_tids);

        sta_info_recalc_tim(sta);
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);

void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
                                    u32 tx_airtime, u32 rx_airtime)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
        struct ieee80211_local *local = sta->sdata->local;
        u8 ac = ieee80211_ac_from_tid(tid);
        u32 airtime = 0;

        if (sta->local->airtime_flags & AIRTIME_USE_TX)
                airtime += tx_airtime;
        if (sta->local->airtime_flags & AIRTIME_USE_RX)
                airtime += rx_airtime;

        spin_lock_bh(&local->active_txq_lock[ac]);
        sta->airtime[ac].tx_airtime += tx_airtime;
        sta->airtime[ac].rx_airtime += rx_airtime;

        if (ieee80211_sta_keep_active(sta, ac))
                sta->airtime[ac].deficit -= airtime;

        spin_unlock_bh(&local->active_txq_lock[ac]);
}
EXPORT_SYMBOL(ieee80211_sta_register_airtime);

void __ieee80211_sta_recalc_aggregates(struct sta_info *sta, u16 active_links)
{
        bool first = true;
        int link_id;

        if (!sta->sta.valid_links || !sta->sta.mlo) {
                sta->sta.cur = &sta->sta.deflink.agg;
                return;
        }

        rcu_read_lock();
        for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) {
                struct ieee80211_link_sta *link_sta;
                int i;

                if (!(active_links & BIT(link_id)))
                        continue;

                link_sta = rcu_dereference(sta->sta.link[link_id]);
                if (!link_sta)
                        continue;

                if (first) {
                        sta->cur = sta->sta.deflink.agg;
                        first = false;
                        continue;
                }

                sta->cur.max_amsdu_len =
                        min(sta->cur.max_amsdu_len,
                            link_sta->agg.max_amsdu_len);
                sta->cur.max_rc_amsdu_len =
                        min(sta->cur.max_rc_amsdu_len,
                            link_sta->agg.max_rc_amsdu_len);

                for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++)
                        sta->cur.max_tid_amsdu_len[i] =
                                min(sta->cur.max_tid_amsdu_len[i],
                                    link_sta->agg.max_tid_amsdu_len[i]);
        }
        rcu_read_unlock();

        sta->sta.cur = &sta->cur;
}

void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

        __ieee80211_sta_recalc_aggregates(sta, sta->sdata->vif.active_links);
}
EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates);

void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
                                          struct sta_info *sta, u8 ac,
                                          u16 tx_airtime, bool tx_completed)
{
        int tx_pending;

        if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
                return;

        if (!tx_completed) {
                if (sta)
                        atomic_add(tx_airtime,
                                   &sta->airtime[ac].aql_tx_pending);

                atomic_add(tx_airtime, &local->aql_total_pending_airtime);
                atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]);
                return;
        }

        if (sta) {
                tx_pending = atomic_sub_return(tx_airtime,
                                               &sta->airtime[ac].aql_tx_pending);
                if (tx_pending < 0)
                        atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
                                       tx_pending, 0);
        }

        atomic_sub(tx_airtime, &local->aql_total_pending_airtime);
        tx_pending = atomic_sub_return(tx_airtime,
                                       &local->aql_ac_pending_airtime[ac]);
        if (WARN_ONCE(tx_pending < 0,
                      "Device %s AC %d pending airtime underflow: %u, %u",
                      wiphy_name(local->hw.wiphy), ac, tx_pending,
                      tx_airtime)) {
                atomic_cmpxchg(&local->aql_ac_pending_airtime[ac],
                               tx_pending, 0);
                atomic_sub(tx_pending, &local->aql_total_pending_airtime);
        }
}

static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
        struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats;
        int cpu;

        if (!sta->deflink.pcpu_rx_stats)
                return stats;

        for_each_possible_cpu(cpu) {
                struct ieee80211_sta_rx_stats *cpustats;

                cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);

                if (time_after(cpustats->last_rx, stats->last_rx))
                        stats = cpustats;
        }

        return stats;
}

static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
                                  struct rate_info *rinfo)
{
        rinfo->bw = STA_STATS_GET(BW, rate);

        switch (STA_STATS_GET(TYPE, rate)) {
        case STA_STATS_RATE_TYPE_VHT:
                rinfo->flags = RATE_INFO_FLAGS_VHT_MCS;
                rinfo->mcs = STA_STATS_GET(VHT_MCS, rate);
                rinfo->nss = STA_STATS_GET(VHT_NSS, rate);
                if (STA_STATS_GET(SGI, rate))
                        rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
                break;
        case STA_STATS_RATE_TYPE_HT:
                rinfo->flags = RATE_INFO_FLAGS_MCS;
                rinfo->mcs = STA_STATS_GET(HT_MCS, rate);
                if (STA_STATS_GET(SGI, rate))
                        rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
                break;
        case STA_STATS_RATE_TYPE_LEGACY: {
                struct ieee80211_supported_band *sband;
                u16 brate;
                unsigned int shift;
                int band = STA_STATS_GET(LEGACY_BAND, rate);
                int rate_idx = STA_STATS_GET(LEGACY_IDX, rate);

                sband = local->hw.wiphy->bands[band];

                if (WARN_ON_ONCE(!sband->bitrates))
                        break;

                brate = sband->bitrates[rate_idx].bitrate;
                if (rinfo->bw == RATE_INFO_BW_5)
                        shift = 2;
                else if (rinfo->bw == RATE_INFO_BW_10)
                        shift = 1;
                else
                        shift = 0;
                rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
                break;
                }
        case STA_STATS_RATE_TYPE_HE:
                rinfo->flags = RATE_INFO_FLAGS_HE_MCS;
                rinfo->mcs = STA_STATS_GET(HE_MCS, rate);
                rinfo->nss = STA_STATS_GET(HE_NSS, rate);
                rinfo->he_gi = STA_STATS_GET(HE_GI, rate);
                rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
                rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
                break;
        case STA_STATS_RATE_TYPE_EHT:
                rinfo->flags = RATE_INFO_FLAGS_EHT_MCS;
                rinfo->mcs = STA_STATS_GET(EHT_MCS, rate);
                rinfo->nss = STA_STATS_GET(EHT_NSS, rate);
                rinfo->eht_gi = STA_STATS_GET(EHT_GI, rate);
                rinfo->eht_ru_alloc = STA_STATS_GET(EHT_RU, rate);
                break;
        }
}

static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
        u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);

        if (rate == STA_STATS_RATE_INVALID)
                return -EINVAL;

        sta_stats_decode_rate(sta->local, rate, rinfo);
        return 0;
}

static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats,
                                        int tid)
{
        unsigned int start;
        u64 value;

        do {
                start = u64_stats_fetch_begin(&rxstats->syncp);
                value = rxstats->msdu[tid];
        } while (u64_stats_fetch_retry(&rxstats->syncp, start));

        return value;
}

static void sta_set_tidstats(struct sta_info *sta,
                             struct cfg80211_tid_stats *tidstats,
                             int tid)
{
        struct ieee80211_local *local = sta->local;
        int cpu;

        if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
                tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats,
                                                           tid);

                if (sta->deflink.pcpu_rx_stats) {
                        for_each_possible_cpu(cpu) {
                                struct ieee80211_sta_rx_stats *cpurxs;

                                cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
                                                     cpu);
                                tidstats->rx_msdu +=
                                        sta_get_tidstats_msdu(cpurxs, tid);
                        }
                }

                tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU);
        }

        if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
                tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
                tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid];
        }

        if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
            ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
                tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
                tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid];
        }

        if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
            ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
                tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
                tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid];
        }

        if (tid < IEEE80211_NUM_TIDS) {
                spin_lock_bh(&local->fq.lock);
                rcu_read_lock();

                tidstats->filled |= BIT(NL80211_TID_STATS_TXQ_STATS);
                ieee80211_fill_txq_stats(&tidstats->txq_stats,
                                         to_txq_info(sta->sta.txq[tid]));

                rcu_read_unlock();
                spin_unlock_bh(&local->fq.lock);
        }
}

static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
{
        unsigned int start;
        u64 value;

        do {
                start = u64_stats_fetch_begin(&rxstats->syncp);
                value = rxstats->bytes;
        } while (u64_stats_fetch_retry(&rxstats->syncp, start));

        return value;
}

void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
                   bool tidstats)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        u32 thr = 0;
        int i, ac, cpu;
        struct ieee80211_sta_rx_stats *last_rxstats;

        last_rxstats = sta_get_last_rx_stats(sta);

        sinfo->generation = sdata->local->sta_generation;

        /* do before driver, so beacon filtering drivers have a
         * chance to e.g. just add the number of filtered beacons
         * (or just modify the value entirely, of course)
         */
        if (sdata->vif.type == NL80211_IFTYPE_STATION)
                sinfo->rx_beacon = sdata->deflink.u.mgd.count_beacon_signal;

        drv_sta_statistics(local, sdata, &sta->sta, sinfo);
        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
                         BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
                         BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
                         BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) |
                         BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) |
                         BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);

        if (sdata->vif.type == NL80211_IFTYPE_STATION) {
                sinfo->beacon_loss_count =
                        sdata->deflink.u.mgd.beacon_loss_count;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
        }

        sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
        sinfo->assoc_at = sta->assoc_at;
        sinfo->inactive_time =
                jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));

        if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) |
                               BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
                sinfo->tx_bytes = 0;
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac];
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
                sinfo->tx_packets = 0;
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->tx_packets += sta->deflink.tx_stats.packets[ac];
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
        }

        if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
                               BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
                sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats);

                if (sta->deflink.pcpu_rx_stats) {
                        for_each_possible_cpu(cpu) {
                                struct ieee80211_sta_rx_stats *cpurxs;

                                cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
                                                     cpu);
                                sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
                        }
                }

                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
                sinfo->rx_packets = sta->deflink.rx_stats.packets;
                if (sta->deflink.pcpu_rx_stats) {
                        for_each_possible_cpu(cpu) {
                                struct ieee80211_sta_rx_stats *cpurxs;

                                cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
                                                     cpu);
                                sinfo->rx_packets += cpurxs->packets;
                        }
                }
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
                sinfo->tx_retries = sta->deflink.status_stats.retry_count;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
                sinfo->tx_failed = sta->deflink.status_stats.retry_failed;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) {
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->rx_duration += sta->airtime[ac].rx_airtime;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) {
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->tx_duration += sta->airtime[ac].tx_airtime;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
                sinfo->airtime_weight = sta->airtime_weight;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
        }

        sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped;
        if (sta->deflink.pcpu_rx_stats) {
                for_each_possible_cpu(cpu) {
                        struct ieee80211_sta_rx_stats *cpurxs;

                        cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
                        sinfo->rx_dropped_misc += cpurxs->dropped;
                }
        }

        if (sdata->vif.type == NL80211_IFTYPE_STATION &&
            !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) |
                                 BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
                sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
        }

        if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
            ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
                if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) {
                        sinfo->signal = (s8)last_rxstats->last_signal;
                        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                }

                if (!sta->deflink.pcpu_rx_stats &&
                    !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
                        sinfo->signal_avg =
                                -ewma_signal_read(&sta->deflink.rx_stats_avg.signal);
                        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
                }
        }

        /* for the average - if pcpu_rx_stats isn't set - rxstats must point to
         * the sta->rx_stats struct, so the check here is fine with and without
         * pcpu statistics
         */
        if (last_rxstats->chains &&
            !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
                               BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
                if (!sta->deflink.pcpu_rx_stats)
                        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);

                sinfo->chains = last_rxstats->chains;

                for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
                        sinfo->chain_signal[i] =
                                last_rxstats->chain_signal_last[i];
                        sinfo->chain_signal_avg[i] =
                                -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]);
                }
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) &&
            !sta->sta.valid_links &&
            ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) {
                sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate,
                                     &sinfo->txrate);
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) &&
            !sta->sta.valid_links) {
                if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
                        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
        }

        if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
                for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
                        sta_set_tidstats(sta, &sinfo->pertid[i], i);
        }

        if (ieee80211_vif_is_mesh(&sdata->vif)) {
#ifdef CONFIG_MAC80211_MESH
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) |
                                 BIT_ULL(NL80211_STA_INFO_PLID) |
                                 BIT_ULL(NL80211_STA_INFO_PLINK_STATE) |
                                 BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
                                 BIT_ULL(NL80211_STA_INFO_PEER_PM) |
                                 BIT_ULL(NL80211_STA_INFO_NONPEER_PM) |
                                 BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) |
                                 BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS);

                sinfo->llid = sta->mesh->llid;
                sinfo->plid = sta->mesh->plid;
                sinfo->plink_state = sta->mesh->plink_state;
                if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
                        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET);
                        sinfo->t_offset = sta->mesh->t_offset;
                }
                sinfo->local_pm = sta->mesh->local_pm;
                sinfo->peer_pm = sta->mesh->peer_pm;
                sinfo->nonpeer_pm = sta->mesh->nonpeer_pm;
                sinfo->connected_to_gate = sta->mesh->connected_to_gate;
                sinfo->connected_to_as = sta->mesh->connected_to_as;
#endif
        }

        sinfo->bss_param.flags = 0;
        if (sdata->vif.bss_conf.use_cts_prot)
                sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
        if (sdata->vif.bss_conf.use_short_preamble)
                sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
        if (sdata->vif.bss_conf.use_short_slot)
                sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
        sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period;
        sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int;

        sinfo->sta_flags.set = 0;
        sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) |
                                BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
                                BIT(NL80211_STA_FLAG_WME) |
                                BIT(NL80211_STA_FLAG_MFP) |
                                BIT(NL80211_STA_FLAG_AUTHENTICATED) |
                                BIT(NL80211_STA_FLAG_ASSOCIATED) |
                                BIT(NL80211_STA_FLAG_TDLS_PEER);
        if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED);
        if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE);
        if (sta->sta.wme)
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME);
        if (test_sta_flag(sta, WLAN_STA_MFP))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP);
        if (test_sta_flag(sta, WLAN_STA_AUTH))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED);
        if (test_sta_flag(sta, WLAN_STA_ASSOC))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
        if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
                sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);

        thr = sta_get_expected_throughput(sta);

        if (thr != 0) {
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
                sinfo->expected_throughput = thr;
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
            sta->deflink.status_stats.ack_signal_filled) {
                sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
        }

        if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
            sta->deflink.status_stats.ack_signal_filled) {
                sinfo->avg_ack_signal =
                        -(s8)ewma_avg_signal_read(
                                &sta->deflink.status_stats.avg_ack_signal);
                sinfo->filled |=
                        BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
        }

        if (ieee80211_vif_is_mesh(&sdata->vif)) {
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC);
                sinfo->airtime_link_metric =
                        airtime_link_metric_get(local, sta);
        }
}

u32 sta_get_expected_throughput(struct sta_info *sta)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct ieee80211_local *local = sdata->local;
        struct rate_control_ref *ref = NULL;
        u32 thr = 0;

        if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
                ref = local->rate_ctrl;

        /* check if the driver has a SW RC implementation */
        if (ref && ref->ops->get_expected_throughput)
                thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
        else
                thr = drv_get_expected_throughput(local, sta);

        return thr;
}

unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
        struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);

        if (!sta->deflink.status_stats.last_ack ||
            time_after(stats->last_rx, sta->deflink.status_stats.last_ack))
                return stats->last_rx;
        return sta->deflink.status_stats.last_ack;
}

static void sta_update_codel_params(struct sta_info *sta, u32 thr)
{
        if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) {
                sta->cparams.target = MS2TIME(50);
                sta->cparams.interval = MS2TIME(300);
                sta->cparams.ecn = false;
        } else {
                sta->cparams.target = MS2TIME(20);
                sta->cparams.interval = MS2TIME(100);
                sta->cparams.ecn = true;
        }
}

void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
                                           u32 thr)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

        sta_update_codel_params(sta, thr);
}

int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct sta_link_alloc *alloc;
        int ret;

        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED));

        /* must represent an MLD from the start */
        if (WARN_ON(!sta->sta.valid_links))
                return -EINVAL;

        if (WARN_ON(sta->sta.valid_links & BIT(link_id) ||
                    sta->link[link_id]))
                return -EBUSY;

        alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
        if (!alloc)
                return -ENOMEM;

        ret = sta_info_alloc_link(sdata->local, &alloc->info, GFP_KERNEL);
        if (ret) {
                kfree(alloc);
                return ret;
        }

        sta_info_add_link(sta, link_id, &alloc->info, &alloc->sta);

        ieee80211_link_sta_debugfs_add(&alloc->info);

        return 0;
}

void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id)
{
        lockdep_assert_wiphy(sta->sdata->local->hw.wiphy);

        WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED));

        sta_remove_link(sta, link_id, false);
}

int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        struct link_sta_info *link_sta;
        u16 old_links = sta->sta.valid_links;
        u16 new_links = old_links | BIT(link_id);
        int ret;

        link_sta = rcu_dereference_protected(sta->link[link_id],
                                             lockdep_is_held(&sdata->local->hw.wiphy->mtx));

        if (WARN_ON(old_links == new_links || !link_sta))
                return -EINVAL;

        rcu_read_lock();
        if (link_sta_info_hash_lookup(sdata->local, link_sta->addr)) {
                rcu_read_unlock();
                return -EALREADY;
        }
        /* we only modify under the mutex so this is fine */
        rcu_read_unlock();

        sta->sta.valid_links = new_links;

        if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)))
                goto hash;

        ieee80211_recalc_min_chandef(sdata, link_id);

        /* Ensure the values are updated for the driver,
         * redone by sta_remove_link on failure.
         */
        ieee80211_sta_recalc_aggregates(&sta->sta);

        ret = drv_change_sta_links(sdata->local, sdata, &sta->sta,
                                   old_links, new_links);
        if (ret) {
                sta->sta.valid_links = old_links;
                sta_remove_link(sta, link_id, false);
                return ret;
        }

hash:
        ret = link_sta_info_hash_add(sdata->local, link_sta);
        WARN_ON(ret);
        return 0;
}

void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id)
{
        struct ieee80211_sub_if_data *sdata = sta->sdata;
        u16 old_links = sta->sta.valid_links;

        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        sta->sta.valid_links &= ~BIT(link_id);

        if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)))
                drv_change_sta_links(sdata->local, sdata, &sta->sta,
                                     old_links, sta->sta.valid_links);

        sta_remove_link(sta, link_id, true);
}

void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta,
                                           const u8 *ext_capab,
                                           unsigned int ext_capab_len)
{
        u8 val;

        sta->sta.max_amsdu_subframes = 0;

        if (ext_capab_len < 8)
                return;

        /* The sender might not have sent the last bit, consider it to be 0 */
        val = u8_get_bits(ext_capab[7], WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB);

        /* we did get all the bits, take the MSB as well */
        if (ext_capab_len >= 9)
                val |= u8_get_bits(ext_capab[8],
                                   WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1;

        if (val)
                sta->sta.max_amsdu_subframes = 4 << (4 - val);
}

#ifdef CONFIG_LOCKDEP
bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
{
        struct sta_info *sta = container_of(pubsta, struct sta_info, sta);

        return lockdep_is_held(&sta->local->hw.wiphy->mtx);
}
EXPORT_SYMBOL(lockdep_sta_mutex_held);
#endif













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 











































































































































































    1 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2005-2011 Atheros Communications Inc.
 * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
 * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include <linux/module.h>
#include <linux/firmware.h>
#include <linux/of.h>
#include <linux/property.h>
#include <linux/dmi.h>
#include <linux/ctype.h>
#include <linux/pm_qos.h>
#include <linux/nvmem-consumer.h>
#include <asm/byteorder.h>

#include "core.h"
#include "mac.h"
#include "htc.h"
#include "hif.h"
#include "wmi.h"
#include "bmi.h"
#include "debug.h"
#include "htt.h"
#include "testmode.h"
#include "wmi-ops.h"
#include "coredump.h"

unsigned int ath10k_debug_mask;
EXPORT_SYMBOL(ath10k_debug_mask);

static unsigned int ath10k_cryptmode_param;
static bool uart_print;
static bool skip_otp;
static bool fw_diag_log;

/* frame mode values are mapped as per enum ath10k_hw_txrx_mode */
unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI;

unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) |
                                     BIT(ATH10K_FW_CRASH_DUMP_CE_DATA);

/* FIXME: most of these should be readonly */
module_param_named(debug_mask, ath10k_debug_mask, uint, 0644);
module_param_named(cryptmode, ath10k_cryptmode_param, uint, 0644);
module_param(uart_print, bool, 0644);
module_param(skip_otp, bool, 0644);
module_param(fw_diag_log, bool, 0644);
module_param_named(frame_mode, ath10k_frame_mode, uint, 0644);
module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444);

MODULE_PARM_DESC(debug_mask, "Debugging mask");
MODULE_PARM_DESC(uart_print, "Uart target debugging");
MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode");
MODULE_PARM_DESC(cryptmode, "Crypto mode: 0-hardware, 1-software");
MODULE_PARM_DESC(frame_mode,
                 "Datapath frame mode (0: raw, 1: native wifi (default), 2: ethernet)");
MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file");
MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging");

static const struct ath10k_hw_params ath10k_hw_params_list[] = {
        {
                .id = QCA988X_HW_2_0_VERSION,
                .dev_id = QCA988X_2_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca988x hw2.0",
                .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 2116,
                .fw = {
                        .dir = QCA988X_HW_2_0_FW_DIR,
                        .board = QCA988X_HW_2_0_BOARD_DATA_FILE,
                        .board_size = QCA988X_BOARD_DATA_SZ,
                        .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = true,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA988X_HW_2_0_VERSION,
                .dev_id = QCA988X_2_0_DEVICE_ID_UBNT,
                .name = "qca988x hw2.0 ubiquiti",
                .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 2116,
                .fw = {
                        .dir = QCA988X_HW_2_0_FW_DIR,
                        .board = QCA988X_HW_2_0_BOARD_DATA_FILE,
                        .board_size = QCA988X_BOARD_DATA_SZ,
                        .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = true,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9887_HW_1_0_VERSION,
                .dev_id = QCA9887_1_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca9887 hw1.0",
                .patch_load_addr = QCA9887_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 2116,
                .fw = {
                        .dir = QCA9887_HW_1_0_FW_DIR,
                        .board = QCA9887_HW_1_0_BOARD_DATA_FILE,
                        .board_size = QCA9887_BOARD_DATA_SZ,
                        .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA6174_HW_3_2_VERSION,
                .dev_id = QCA6174_3_2_DEVICE_ID,
                .bus = ATH10K_BUS_SDIO,
                .name = "qca6174 hw3.2 sdio",
                .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
                .uart_pin = 19,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 0,
                .fw = {
                        .dir = QCA6174_HW_3_0_FW_DIR,
                        .board = QCA6174_HW_3_0_BOARD_DATA_FILE,
                        .board_size = QCA6174_BOARD_DATA_SZ,
                        .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca6174_sdio_ops,
                .hw_clk = qca6174_clk,
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .n_cipher_suites = 8,
                .num_peers = 10,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .uart_pin_workaround = true,
                .tx_stats_over_pktlog = false,
                .credit_size_workaround = false,
                .bmi_large_size_download = true,
                .supports_peer_stats_info = true,
                .dynamic_sar_support = true,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA6174_HW_2_1_VERSION,
                .dev_id = QCA6164_2_1_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca6164 hw2.1",
                .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA6174_HW_2_1_FW_DIR,
                        .board = QCA6174_HW_2_1_BOARD_DATA_FILE,
                        .board_size = QCA6174_BOARD_DATA_SZ,
                        .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA6174_HW_2_1_VERSION,
                .dev_id = QCA6174_2_1_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca6174 hw2.1",
                .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA6174_HW_2_1_FW_DIR,
                        .board = QCA6174_HW_2_1_BOARD_DATA_FILE,
                        .board_size = QCA6174_BOARD_DATA_SZ,
                        .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA6174_HW_3_0_VERSION,
                .dev_id = QCA6174_2_1_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca6174 hw3.0",
                .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA6174_HW_3_0_FW_DIR,
                        .board = QCA6174_HW_3_0_BOARD_DATA_FILE,
                        .board_size = QCA6174_BOARD_DATA_SZ,
                        .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA6174_HW_3_2_VERSION,
                .dev_id = QCA6174_2_1_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca6174 hw3.2",
                .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        /* uses same binaries as hw3.0 */
                        .dir = QCA6174_HW_3_0_FW_DIR,
                        .board = QCA6174_HW_3_0_BOARD_DATA_FILE,
                        .board_size = QCA6174_BOARD_DATA_SZ,
                        .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca6174_ops,
                .hw_clk = qca6174_clk,
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = true,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .supports_peer_stats_info = true,
                .dynamic_sar_support = true,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = true,
        },
        {
                .id = QCA99X0_HW_2_0_DEV_VERSION,
                .dev_id = QCA99X0_2_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca99x0 hw2.0",
                .patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .otp_exe_param = 0x00000700,
                .continuous_frag_desc = true,
                .cck_rate_map_rev2 = true,
                .channel_counters_freq_hz = 150000,
                .max_probe_resp_desc_thres = 24,
                .tx_chain_mask = 0xf,
                .rx_chain_mask = 0xf,
                .max_spatial_stream = 4,
                .cal_data_len = 12064,
                .fw = {
                        .dir = QCA99X0_HW_2_0_FW_DIR,
                        .board = QCA99X0_HW_2_0_BOARD_DATA_FILE,
                        .board_size = QCA99X0_BOARD_DATA_SZ,
                        .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
                },
                .sw_decrypt_mcast_mgmt = true,
                .rx_desc_ops = &qca99x0_rx_desc_ops,
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 4,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 11,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9984_HW_1_0_DEV_VERSION,
                .dev_id = QCA9984_1_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca9984/qca9994 hw1.0",
                .patch_load_addr = QCA9984_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH,
                .otp_exe_param = 0x00000700,
                .continuous_frag_desc = true,
                .cck_rate_map_rev2 = true,
                .channel_counters_freq_hz = 150000,
                .max_probe_resp_desc_thres = 24,
                .tx_chain_mask = 0xf,
                .rx_chain_mask = 0xf,
                .max_spatial_stream = 4,
                .cal_data_len = 12064,
                .fw = {
                        .dir = QCA9984_HW_1_0_FW_DIR,
                        .board = QCA9984_HW_1_0_BOARD_DATA_FILE,
                        .eboard = QCA9984_HW_1_0_EBOARD_DATA_FILE,
                        .board_size = QCA99X0_BOARD_DATA_SZ,
                        .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
                        .ext_board_size = QCA99X0_EXT_BOARD_DATA_SZ,
                },
                .sw_decrypt_mcast_mgmt = true,
                .rx_desc_ops = &qca99x0_rx_desc_ops,
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 12,
                .spectral_bin_offset = 8,

                /* Can do only 2x2 VHT160 or 80+80. 1560Mbps is 4x4 80Mhz
                 * or 2x2 160Mhz, long-guard-interval.
                 */
                .vht160_mcs_rx_highest = 1560,
                .vht160_mcs_tx_highest = 1560,
                .n_cipher_suites = 11,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9888_HW_2_0_DEV_VERSION,
                .dev_id = QCA9888_2_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca9888 hw2.0",
                .patch_load_addr = QCA9888_HW_2_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH,
                .otp_exe_param = 0x00000700,
                .continuous_frag_desc = true,
                .channel_counters_freq_hz = 150000,
                .max_probe_resp_desc_thres = 24,
                .tx_chain_mask = 3,
                .rx_chain_mask = 3,
                .max_spatial_stream = 2,
                .cal_data_len = 12064,
                .fw = {
                        .dir = QCA9888_HW_2_0_FW_DIR,
                        .board = QCA9888_HW_2_0_BOARD_DATA_FILE,
                        .board_size = QCA99X0_BOARD_DATA_SZ,
                        .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
                },
                .sw_decrypt_mcast_mgmt = true,
                .rx_desc_ops = &qca99x0_rx_desc_ops,
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 12,
                .spectral_bin_offset = 8,

                /* Can do only 1x1 VHT160 or 80+80. 780Mbps is 2x2 80Mhz or
                 * 1x1 160Mhz, long-guard-interval.
                 */
                .vht160_mcs_rx_highest = 780,
                .vht160_mcs_tx_highest = 780,
                .n_cipher_suites = 11,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9377_HW_1_0_DEV_VERSION,
                .dev_id = QCA9377_1_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca9377 hw1.0",
                .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA9377_HW_1_0_FW_DIR,
                        .board = QCA9377_HW_1_0_BOARD_DATA_FILE,
                        .board_size = QCA9377_BOARD_DATA_SZ,
                        .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9377_HW_1_1_DEV_VERSION,
                .dev_id = QCA9377_1_0_DEVICE_ID,
                .bus = ATH10K_BUS_PCI,
                .name = "qca9377 hw1.1",
                .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 6,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA9377_HW_1_0_FW_DIR,
                        .board = QCA9377_HW_1_0_BOARD_DATA_FILE,
                        .board_size = QCA9377_BOARD_DATA_SZ,
                        .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca6174_ops,
                .hw_clk = qca6174_clk,
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = true,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA9377_HW_1_1_DEV_VERSION,
                .dev_id = QCA9377_1_0_DEVICE_ID,
                .bus = ATH10K_BUS_SDIO,
                .name = "qca9377 hw1.1 sdio",
                .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 19,
                .otp_exe_param = 0,
                .channel_counters_freq_hz = 88000,
                .max_probe_resp_desc_thres = 0,
                .cal_data_len = 8124,
                .fw = {
                        .dir = QCA9377_HW_1_0_FW_DIR,
                        .board = QCA9377_HW_1_0_BOARD_DATA_FILE,
                        .board_size = QCA9377_BOARD_DATA_SZ,
                        .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
                },
                .rx_desc_ops = &qca988x_rx_desc_ops,
                .hw_ops = &qca6174_ops,
                .hw_clk = qca6174_clk,
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .n_cipher_suites = 8,
                .num_peers = TARGET_QCA9377_HL_NUM_PEERS,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .uart_pin_workaround = true,
                .credit_size_workaround = true,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = QCA4019_HW_1_0_DEV_VERSION,
                .dev_id = 0,
                .bus = ATH10K_BUS_AHB,
                .name = "qca4019 hw1.0",
                .patch_load_addr = QCA4019_HW_1_0_PATCH_LOAD_ADDR,
                .uart_pin = 7,
                .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH,
                .otp_exe_param = 0x0010000,
                .continuous_frag_desc = true,
                .cck_rate_map_rev2 = true,
                .channel_counters_freq_hz = 125000,
                .max_probe_resp_desc_thres = 24,
                .tx_chain_mask = 0x3,
                .rx_chain_mask = 0x3,
                .max_spatial_stream = 2,
                .cal_data_len = 12064,
                .fw = {
                        .dir = QCA4019_HW_1_0_FW_DIR,
                        .board = QCA4019_HW_1_0_BOARD_DATA_FILE,
                        .board_size = QCA4019_BOARD_DATA_SZ,
                        .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ,
                },
                .sw_decrypt_mcast_mgmt = true,
                .rx_desc_ops = &qca99x0_rx_desc_ops,
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 4,
                .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 11,
                .ast_skid_limit = 0x10,
                .num_wds_entries = 0x20,
                .target_64bit = false,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL,
                .shadow_reg_support = false,
                .rri_on_ddr = false,
                .hw_filter_reset_required = true,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = false,
                .hw_restart_disconnect = false,
                .use_fw_tx_credits = true,
                .delay_unmap_buffer = false,
                .mcast_frame_registration = false,
        },
        {
                .id = WCN3990_HW_1_0_DEV_VERSION,
                .dev_id = 0,
                .bus = ATH10K_BUS_SNOC,
                .name = "wcn3990 hw1.0",
                .continuous_frag_desc = true,
                .tx_chain_mask = 0x7,
                .rx_chain_mask = 0x7,
                .max_spatial_stream = 4,
                .fw = {
                        .dir = WCN3990_HW_1_0_FW_DIR,
                },
                .sw_decrypt_mcast_mgmt = true,
                .rx_desc_ops = &wcn3990_rx_desc_ops,
                .hw_ops = &wcn3990_ops,
                .decap_align_bytes = 1,
                .num_peers = TARGET_HL_TLV_NUM_PEERS,
                .n_cipher_suites = 11,
                .ast_skid_limit = TARGET_HL_TLV_AST_SKID_LIMIT,
                .num_wds_entries = TARGET_HL_TLV_NUM_WDS_ENTRIES,
                .target_64bit = true,
                .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL_DUAL_MAC,
                .shadow_reg_support = true,
                .rri_on_ddr = true,
                .hw_filter_reset_required = false,
                .fw_diag_ce_download = false,
                .credit_size_workaround = false,
                .tx_stats_over_pktlog = false,
                .dynamic_sar_support = true,
                .hw_restart_disconnect = true,
                .use_fw_tx_credits = false,
                .delay_unmap_buffer = true,
                .mcast_frame_registration = false,
        },
};

static const char *const ath10k_core_fw_feature_str[] = {
        [ATH10K_FW_FEATURE_EXT_WMI_MGMT_RX] = "wmi-mgmt-rx",
        [ATH10K_FW_FEATURE_WMI_10X] = "wmi-10.x",
        [ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX] = "has-wmi-mgmt-tx",
        [ATH10K_FW_FEATURE_NO_P2P] = "no-p2p",
        [ATH10K_FW_FEATURE_WMI_10_2] = "wmi-10.2",
        [ATH10K_FW_FEATURE_MULTI_VIF_PS_SUPPORT] = "multi-vif-ps",
        [ATH10K_FW_FEATURE_WOWLAN_SUPPORT] = "wowlan",
        [ATH10K_FW_FEATURE_IGNORE_OTP_RESULT] = "ignore-otp",
        [ATH10K_FW_FEATURE_NO_NWIFI_DECAP_4ADDR_PADDING] = "no-4addr-pad",
        [ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT] = "skip-clock-init",
        [ATH10K_FW_FEATURE_RAW_MODE_SUPPORT] = "raw-mode",
        [ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA] = "adaptive-cca",
        [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp",
        [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl",
        [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param",
        [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war",
        [ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST] = "allows-mesh-bcast",
        [ATH10K_FW_FEATURE_NO_PS] = "no-ps",
        [ATH10K_FW_FEATURE_MGMT_TX_BY_REF] = "mgmt-tx-by-reference",
        [ATH10K_FW_FEATURE_NON_BMI] = "non-bmi",
        [ATH10K_FW_FEATURE_SINGLE_CHAN_INFO_PER_CHANNEL] = "single-chan-info-per-channel",
        [ATH10K_FW_FEATURE_PEER_FIXED_RATE] = "peer-fixed-rate",
        [ATH10K_FW_FEATURE_IRAM_RECOVERY] = "iram-recovery",
};

static unsigned int ath10k_core_get_fw_feature_str(char *buf,
                                                   size_t buf_len,
                                                   enum ath10k_fw_features feat)
{
        /* make sure that ath10k_core_fw_feature_str[] gets updated */
        BUILD_BUG_ON(ARRAY_SIZE(ath10k_core_fw_feature_str) !=
                     ATH10K_FW_FEATURE_COUNT);

        if (feat >= ARRAY_SIZE(ath10k_core_fw_feature_str) ||
            WARN_ON(!ath10k_core_fw_feature_str[feat])) {
                return scnprintf(buf, buf_len, "bit%d", feat);
        }

        return scnprintf(buf, buf_len, "%s", ath10k_core_fw_feature_str[feat]);
}

void ath10k_core_get_fw_features_str(struct ath10k *ar,
                                     char *buf,
                                     size_t buf_len)
{
        size_t len = 0;
        int i;

        for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) {
                if (test_bit(i, ar->normal_mode_fw.fw_file.fw_features)) {
                        if (len > 0)
                                len += scnprintf(buf + len, buf_len - len, ",");

                        len += ath10k_core_get_fw_feature_str(buf + len,
                                                              buf_len - len,
                                                              i);
                }
        }
}

static void ath10k_send_suspend_complete(struct ath10k *ar)
{
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot suspend complete\n");

        complete(&ar->target_suspend);
}

static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode)
{
        bool mtu_workaround = ar->hw_params.credit_size_workaround;
        int ret;
        u32 param = 0;

        ret = ath10k_bmi_write32(ar, hi_mbox_io_block_sz, 256);
        if (ret)
                return ret;

        ret = ath10k_bmi_write32(ar, hi_mbox_isr_yield_limit, 99);
        if (ret)
                return ret;

        ret = ath10k_bmi_read32(ar, hi_acs_flags, &param);
        if (ret)
                return ret;

        param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET;

        if (mode == ATH10K_FIRMWARE_MODE_NORMAL && !mtu_workaround)
                param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
        else
                param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;

        if (mode == ATH10K_FIRMWARE_MODE_UTF)
                param &= ~HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET;
        else
                param |= HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET;

        ret = ath10k_bmi_write32(ar, hi_acs_flags, param);
        if (ret)
                return ret;

        ret = ath10k_bmi_read32(ar, hi_option_flag2, &param);
        if (ret)
                return ret;

        param |= HI_OPTION_SDIO_CRASH_DUMP_ENHANCEMENT_HOST;

        ret = ath10k_bmi_write32(ar, hi_option_flag2, param);
        if (ret)
                return ret;

        return 0;
}

static int ath10k_init_configure_target(struct ath10k *ar)
{
        u32 param_host;
        int ret;

        /* tell target which HTC version it is used*/
        ret = ath10k_bmi_write32(ar, hi_app_host_interest,
                                 HTC_PROTOCOL_VERSION);
        if (ret) {
                ath10k_err(ar, "settings HTC version failed\n");
                return ret;
        }

        /* set the firmware mode to STA/IBSS/AP */
        ret = ath10k_bmi_read32(ar, hi_option_flag, &param_host);
        if (ret) {
                ath10k_err(ar, "setting firmware mode (1/2) failed\n");
                return ret;
        }

        /* TODO following parameters need to be re-visited. */
        /* num_device */
        param_host |= (1 << HI_OPTION_NUM_DEV_SHIFT);
        /* Firmware mode */
        /* FIXME: Why FW_MODE_AP ??.*/
        param_host |= (HI_OPTION_FW_MODE_AP << HI_OPTION_FW_MODE_SHIFT);
        /* mac_addr_method */
        param_host |= (1 << HI_OPTION_MAC_ADDR_METHOD_SHIFT);
        /* firmware_bridge */
        param_host |= (0 << HI_OPTION_FW_BRIDGE_SHIFT);
        /* fwsubmode */
        param_host |= (0 << HI_OPTION_FW_SUBMODE_SHIFT);

        ret = ath10k_bmi_write32(ar, hi_option_flag, param_host);
        if (ret) {
                ath10k_err(ar, "setting firmware mode (2/2) failed\n");
                return ret;
        }

        /* We do all byte-swapping on the host */
        ret = ath10k_bmi_write32(ar, hi_be, 0);
        if (ret) {
                ath10k_err(ar, "setting host CPU BE mode failed\n");
                return ret;
        }

        /* FW descriptor/Data swap flags */
        ret = ath10k_bmi_write32(ar, hi_fw_swap, 0);

        if (ret) {
                ath10k_err(ar, "setting FW data/desc swap flags failed\n");
                return ret;
        }

        /* Some devices have a special sanity check that verifies the PCI
         * Device ID is written to this host interest var. It is known to be
         * required to boot QCA6164.
         */
        ret = ath10k_bmi_write32(ar, hi_hci_uart_pwr_mgmt_params_ext,
                                 ar->dev_id);
        if (ret) {
                ath10k_err(ar, "failed to set pwr_mgmt_params: %d\n", ret);
                return ret;
        }

        return 0;
}

static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar,
                                                   const char *dir,
                                                   const char *file)
{
        char filename[100];
        const struct firmware *fw;
        int ret;

        if (file == NULL)
                return ERR_PTR(-ENOENT);

        if (dir == NULL)
                dir = ".";

        snprintf(filename, sizeof(filename), "%s/%s", dir, file);
        ret = firmware_request_nowarn(&fw, filename, ar->dev);
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n",
                   filename, ret);

        if (ret)
                return ERR_PTR(ret);

        return fw;
}

static int ath10k_push_board_ext_data(struct ath10k *ar, const void *data,
                                      size_t data_len)
{
        u32 board_data_size = ar->hw_params.fw.board_size;
        u32 board_ext_data_size = ar->hw_params.fw.board_ext_size;
        u32 board_ext_data_addr;
        int ret;

        ret = ath10k_bmi_read32(ar, hi_board_ext_data, &board_ext_data_addr);
        if (ret) {
                ath10k_err(ar, "could not read board ext data addr (%d)\n",
                           ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot push board extended data addr 0x%x\n",
                   board_ext_data_addr);

        if (board_ext_data_addr == 0)
                return 0;

        if (data_len != (board_data_size + board_ext_data_size)) {
                ath10k_err(ar, "invalid board (ext) data sizes %zu != %d+%d\n",
                           data_len, board_data_size, board_ext_data_size);
                return -EINVAL;
        }

        ret = ath10k_bmi_write_memory(ar, board_ext_data_addr,
                                      data + board_data_size,
                                      board_ext_data_size);
        if (ret) {
                ath10k_err(ar, "could not write board ext data (%d)\n", ret);
                return ret;
        }

        ret = ath10k_bmi_write32(ar, hi_board_ext_data_config,
                                 (board_ext_data_size << 16) | 1);
        if (ret) {
                ath10k_err(ar, "could not write board ext data bit (%d)\n",
                           ret);
                return ret;
        }

        return 0;
}

static int ath10k_core_get_board_id_from_otp(struct ath10k *ar)
{
        u32 result, address;
        u8 board_id, chip_id;
        bool ext_bid_support;
        int ret, bmi_board_id_param;

        address = ar->hw_params.patch_load_addr;

        if (!ar->normal_mode_fw.fw_file.otp_data ||
            !ar->normal_mode_fw.fw_file.otp_len) {
                ath10k_warn(ar,
                            "failed to retrieve board id because of invalid otp\n");
                return -ENODATA;
        }

        if (ar->id.bmi_ids_valid) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "boot already acquired valid otp board id,skip download, board_id %d chip_id %d\n",
                           ar->id.bmi_board_id, ar->id.bmi_chip_id);
                goto skip_otp_download;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot upload otp to 0x%x len %zd for board id\n",
                   address, ar->normal_mode_fw.fw_file.otp_len);

        ret = ath10k_bmi_fast_download(ar, address,
                                       ar->normal_mode_fw.fw_file.otp_data,
                                       ar->normal_mode_fw.fw_file.otp_len);
        if (ret) {
                ath10k_err(ar, "could not write otp for board id check: %d\n",
                           ret);
                return ret;
        }

        if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT ||
            ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE ||
            ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM)
                bmi_board_id_param = BMI_PARAM_GET_FLASH_BOARD_ID;
        else
                bmi_board_id_param = BMI_PARAM_GET_EEPROM_BOARD_ID;

        ret = ath10k_bmi_execute(ar, address, bmi_board_id_param, &result);
        if (ret) {
                ath10k_err(ar, "could not execute otp for board id check: %d\n",
                           ret);
                return ret;
        }

        board_id = MS(result, ATH10K_BMI_BOARD_ID_FROM_OTP);
        chip_id = MS(result, ATH10K_BMI_CHIP_ID_FROM_OTP);
        ext_bid_support = (result & ATH10K_BMI_EXT_BOARD_ID_SUPPORT);

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot get otp board id result 0x%08x board_id %d chip_id %d ext_bid_support %d\n",
                   result, board_id, chip_id, ext_bid_support);

        ar->id.ext_bid_supported = ext_bid_support;

        if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0 ||
            (board_id == 0)) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "board id does not exist in otp, ignore it\n");
                return -EOPNOTSUPP;
        }

        ar->id.bmi_ids_valid = true;
        ar->id.bmi_board_id = board_id;
        ar->id.bmi_chip_id = chip_id;

skip_otp_download:

        return 0;
}

static void ath10k_core_check_bdfext(const struct dmi_header *hdr, void *data)
{
        struct ath10k *ar = data;
        const char *bdf_ext;
        const char *magic = ATH10K_SMBIOS_BDF_EXT_MAGIC;
        u8 bdf_enabled;
        int i;

        if (hdr->type != ATH10K_SMBIOS_BDF_EXT_TYPE)
                return;

        if (hdr->length != ATH10K_SMBIOS_BDF_EXT_LENGTH) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "wrong smbios bdf ext type length (%d).\n",
                           hdr->length);
                return;
        }

        bdf_enabled = *((u8 *)hdr + ATH10K_SMBIOS_BDF_EXT_OFFSET);
        if (!bdf_enabled) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not found.\n");
                return;
        }

        /* Only one string exists (per spec) */
        bdf_ext = (char *)hdr + hdr->length;

        if (memcmp(bdf_ext, magic, strlen(magic)) != 0) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "bdf variant magic does not match.\n");
                return;
        }

        for (i = 0; i < strlen(bdf_ext); i++) {
                if (!isascii(bdf_ext[i]) || !isprint(bdf_ext[i])) {
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "bdf variant name contains non ascii chars.\n");
                        return;
                }
        }

        /* Copy extension name without magic suffix */
        if (strscpy(ar->id.bdf_ext, bdf_ext + strlen(magic),
                    sizeof(ar->id.bdf_ext)) < 0) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "bdf variant string is longer than the buffer can accommodate (variant: %s)\n",
                            bdf_ext);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "found and validated bdf variant smbios_type 0x%x bdf %s\n",
                   ATH10K_SMBIOS_BDF_EXT_TYPE, bdf_ext);
}

static int ath10k_core_check_smbios(struct ath10k *ar)
{
        ar->id.bdf_ext[0] = '\0';
        dmi_walk(ath10k_core_check_bdfext, ar);

        if (ar->id.bdf_ext[0] == '\0')
                return -ENODATA;

        return 0;
}

int ath10k_core_check_dt(struct ath10k *ar)
{
        struct device_node *node;
        const char *variant = NULL;

        node = ar->dev->of_node;
        if (!node)
                return -ENOENT;

        of_property_read_string(node, "qcom,ath10k-calibration-variant",
                                &variant);
        if (!variant)
                return -ENODATA;

        if (strscpy(ar->id.bdf_ext, variant, sizeof(ar->id.bdf_ext)) < 0)
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "bdf variant string is longer than the buffer can accommodate (variant: %s)\n",
                            variant);

        return 0;
}
EXPORT_SYMBOL(ath10k_core_check_dt);

static int ath10k_download_fw(struct ath10k *ar)
{
        u32 address, data_len;
        const void *data;
        int ret;
        struct pm_qos_request latency_qos;

        address = ar->hw_params.patch_load_addr;

        data = ar->running_fw->fw_file.firmware_data;
        data_len = ar->running_fw->fw_file.firmware_len;

        ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file);
        if (ret) {
                ath10k_err(ar, "failed to configure fw code swap: %d\n",
                           ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot uploading firmware image %pK len %d\n",
                   data, data_len);

        /* Check if device supports to download firmware via
         * diag copy engine. Downloading firmware via diag CE
         * greatly reduces the time to download firmware.
         */
        if (ar->hw_params.fw_diag_ce_download) {
                ret = ath10k_hw_diag_fast_download(ar, address,
                                                   data, data_len);
                if (ret == 0)
                        /* firmware upload via diag ce was successful */
                        return 0;

                ath10k_warn(ar,
                            "failed to upload firmware via diag ce, trying BMI: %d",
                            ret);
        }

        memset(&latency_qos, 0, sizeof(latency_qos));
        cpu_latency_qos_add_request(&latency_qos, 0);

        ret = ath10k_bmi_fast_download(ar, address, data, data_len);

        cpu_latency_qos_remove_request(&latency_qos);

        return ret;
}

void ath10k_core_free_board_files(struct ath10k *ar)
{
        if (!IS_ERR(ar->normal_mode_fw.board))
                release_firmware(ar->normal_mode_fw.board);

        if (!IS_ERR(ar->normal_mode_fw.ext_board))
                release_firmware(ar->normal_mode_fw.ext_board);

        ar->normal_mode_fw.board = NULL;
        ar->normal_mode_fw.board_data = NULL;
        ar->normal_mode_fw.board_len = 0;
        ar->normal_mode_fw.ext_board = NULL;
        ar->normal_mode_fw.ext_board_data = NULL;
        ar->normal_mode_fw.ext_board_len = 0;
}
EXPORT_SYMBOL(ath10k_core_free_board_files);

static void ath10k_core_free_firmware_files(struct ath10k *ar)
{
        if (!IS_ERR(ar->normal_mode_fw.fw_file.firmware))
                release_firmware(ar->normal_mode_fw.fw_file.firmware);

        if (!IS_ERR(ar->cal_file))
                release_firmware(ar->cal_file);

        if (!IS_ERR(ar->pre_cal_file))
                release_firmware(ar->pre_cal_file);

        ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file);

        ar->normal_mode_fw.fw_file.otp_data = NULL;
        ar->normal_mode_fw.fw_file.otp_len = 0;

        ar->normal_mode_fw.fw_file.firmware = NULL;
        ar->normal_mode_fw.fw_file.firmware_data = NULL;
        ar->normal_mode_fw.fw_file.firmware_len = 0;

        ar->cal_file = NULL;
        ar->pre_cal_file = NULL;
}

static int ath10k_fetch_cal_file(struct ath10k *ar)
{
        char filename[100];

        /* pre-cal-<bus>-<id>.bin */
        scnprintf(filename, sizeof(filename), "pre-cal-%s-%s.bin",
                  ath10k_bus_str(ar->hif.bus), dev_name(ar->dev));

        ar->pre_cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename);
        if (!IS_ERR(ar->pre_cal_file))
                goto success;

        /* cal-<bus>-<id>.bin */
        scnprintf(filename, sizeof(filename), "cal-%s-%s.bin",
                  ath10k_bus_str(ar->hif.bus), dev_name(ar->dev));

        ar->cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename);
        if (IS_ERR(ar->cal_file))
                /* calibration file is optional, don't print any warnings */
                return PTR_ERR(ar->cal_file);
success:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "found calibration file %s/%s\n",
                   ATH10K_FW_DIR, filename);

        return 0;
}

static int ath10k_core_fetch_board_data_api_1(struct ath10k *ar, int bd_ie_type)
{
        const struct firmware *fw;
        char boardname[100];

        if (bd_ie_type == ATH10K_BD_IE_BOARD) {
                if (!ar->hw_params.fw.board) {
                        ath10k_err(ar, "failed to find board file fw entry\n");
                        return -EINVAL;
                }

                scnprintf(boardname, sizeof(boardname), "board-%s-%s.bin",
                          ath10k_bus_str(ar->hif.bus), dev_name(ar->dev));

                ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar,
                                                                ar->hw_params.fw.dir,
                                                                boardname);
                if (IS_ERR(ar->normal_mode_fw.board)) {
                        fw = ath10k_fetch_fw_file(ar,
                                                  ar->hw_params.fw.dir,
                                                  ar->hw_params.fw.board);
                        ar->normal_mode_fw.board = fw;
                }

                if (IS_ERR(ar->normal_mode_fw.board))
                        return PTR_ERR(ar->normal_mode_fw.board);

                ar->normal_mode_fw.board_data = ar->normal_mode_fw.board->data;
                ar->normal_mode_fw.board_len = ar->normal_mode_fw.board->size;
        } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) {
                if (!ar->hw_params.fw.eboard) {
                        ath10k_err(ar, "failed to find eboard file fw entry\n");
                        return -EINVAL;
                }

                fw = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir,
                                          ar->hw_params.fw.eboard);
                ar->normal_mode_fw.ext_board = fw;
                if (IS_ERR(ar->normal_mode_fw.ext_board))
                        return PTR_ERR(ar->normal_mode_fw.ext_board);

                ar->normal_mode_fw.ext_board_data = ar->normal_mode_fw.ext_board->data;
                ar->normal_mode_fw.ext_board_len = ar->normal_mode_fw.ext_board->size;
        }

        return 0;
}

static int ath10k_core_parse_bd_ie_board(struct ath10k *ar,
                                         const void *buf, size_t buf_len,
                                         const char *boardname,
                                         int bd_ie_type)
{
        const struct ath10k_fw_ie *hdr;
        bool name_match_found;
        int ret, board_ie_id;
        size_t board_ie_len;
        const void *board_ie_data;

        name_match_found = false;

        /* go through ATH10K_BD_IE_BOARD_ elements */
        while (buf_len > sizeof(struct ath10k_fw_ie)) {
                hdr = buf;
                board_ie_id = le32_to_cpu(hdr->id);
                board_ie_len = le32_to_cpu(hdr->len);
                board_ie_data = hdr->data;

                buf_len -= sizeof(*hdr);
                buf += sizeof(*hdr);

                if (buf_len < ALIGN(board_ie_len, 4)) {
                        ath10k_err(ar, "invalid ATH10K_BD_IE_BOARD length: %zu < %zu\n",
                                   buf_len, ALIGN(board_ie_len, 4));
                        ret = -EINVAL;
                        goto out;
                }

                switch (board_ie_id) {
                case ATH10K_BD_IE_BOARD_NAME:
                        ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "board name", "",
                                        board_ie_data, board_ie_len);

                        if (board_ie_len != strlen(boardname))
                                break;

                        ret = memcmp(board_ie_data, boardname, strlen(boardname));
                        if (ret)
                                break;

                        name_match_found = true;
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "boot found match for name '%s'",
                                   boardname);
                        break;
                case ATH10K_BD_IE_BOARD_DATA:
                        if (!name_match_found)
                                /* no match found */
                                break;

                        if (bd_ie_type == ATH10K_BD_IE_BOARD) {
                                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                           "boot found board data for '%s'",
                                                boardname);

                                ar->normal_mode_fw.board_data = board_ie_data;
                                ar->normal_mode_fw.board_len = board_ie_len;
                        } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) {
                                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                           "boot found eboard data for '%s'",
                                                boardname);

                                ar->normal_mode_fw.ext_board_data = board_ie_data;
                                ar->normal_mode_fw.ext_board_len = board_ie_len;
                        }

                        ret = 0;
                        goto out;
                default:
                        ath10k_warn(ar, "unknown ATH10K_BD_IE_BOARD found: %d\n",
                                    board_ie_id);
                        break;
                }

                /* jump over the padding */
                board_ie_len = ALIGN(board_ie_len, 4);

                buf_len -= board_ie_len;
                buf += board_ie_len;
        }

        /* no match found */
        ret = -ENOENT;

out:
        return ret;
}

static int ath10k_core_search_bd(struct ath10k *ar,
                                 const char *boardname,
                                 const u8 *data,
                                 size_t len)
{
        size_t ie_len;
        struct ath10k_fw_ie *hdr;
        int ret = -ENOENT, ie_id;

        while (len > sizeof(struct ath10k_fw_ie)) {
                hdr = (struct ath10k_fw_ie *)data;
                ie_id = le32_to_cpu(hdr->id);
                ie_len = le32_to_cpu(hdr->len);

                len -= sizeof(*hdr);
                data = hdr->data;

                if (len < ALIGN(ie_len, 4)) {
                        ath10k_err(ar, "invalid length for board ie_id %d ie_len %zu len %zu\n",
                                   ie_id, ie_len, len);
                        return -EINVAL;
                }

                switch (ie_id) {
                case ATH10K_BD_IE_BOARD:
                        ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len,
                                                            boardname,
                                                            ATH10K_BD_IE_BOARD);
                        if (ret == -ENOENT)
                                /* no match found, continue */
                                break;

                        /* either found or error, so stop searching */
                        goto out;
                case ATH10K_BD_IE_BOARD_EXT:
                        ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len,
                                                            boardname,
                                                            ATH10K_BD_IE_BOARD_EXT);
                        if (ret == -ENOENT)
                                /* no match found, continue */
                                break;

                        /* either found or error, so stop searching */
                        goto out;
                }

                /* jump over the padding */
                ie_len = ALIGN(ie_len, 4);

                len -= ie_len;
                data += ie_len;
        }

out:
        /* return result of parse_bd_ie_board() or -ENOENT */
        return ret;
}

static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar,
                                              const char *boardname,
                                              const char *fallback_boardname1,
                                              const char *fallback_boardname2,
                                              const char *filename)
{
        size_t len, magic_len;
        const u8 *data;
        int ret;

        /* Skip if already fetched during board data download */
        if (!ar->normal_mode_fw.board)
                ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar,
                                                                ar->hw_params.fw.dir,
                                                                filename);
        if (IS_ERR(ar->normal_mode_fw.board))
                return PTR_ERR(ar->normal_mode_fw.board);

        data = ar->normal_mode_fw.board->data;
        len = ar->normal_mode_fw.board->size;

        /* magic has extra null byte padded */
        magic_len = strlen(ATH10K_BOARD_MAGIC) + 1;
        if (len < magic_len) {
                ath10k_err(ar, "failed to find magic value in %s/%s, file too short: %zu\n",
                           ar->hw_params.fw.dir, filename, len);
                ret = -EINVAL;
                goto err;
        }

        if (memcmp(data, ATH10K_BOARD_MAGIC, magic_len)) {
                ath10k_err(ar, "found invalid board magic\n");
                ret = -EINVAL;
                goto err;
        }

        /* magic is padded to 4 bytes */
        magic_len = ALIGN(magic_len, 4);
        if (len < magic_len) {
                ath10k_err(ar, "failed: %s/%s too small to contain board data, len: %zu\n",
                           ar->hw_params.fw.dir, filename, len);
                ret = -EINVAL;
                goto err;
        }

        data += magic_len;
        len -= magic_len;

        /* attempt to find boardname in the IE list */
        ret = ath10k_core_search_bd(ar, boardname, data, len);

        /* if we didn't find it and have a fallback name, try that */
        if (ret == -ENOENT && fallback_boardname1)
                ret = ath10k_core_search_bd(ar, fallback_boardname1, data, len);

        if (ret == -ENOENT && fallback_boardname2)
                ret = ath10k_core_search_bd(ar, fallback_boardname2, data, len);

        if (ret == -ENOENT) {
                ath10k_err(ar,
                           "failed to fetch board data for %s from %s/%s\n",
                           boardname, ar->hw_params.fw.dir, filename);
                ret = -ENODATA;
        }

        if (ret)
                goto err;

        return 0;

err:
        ath10k_core_free_board_files(ar);
        return ret;
}

static int ath10k_core_create_board_name(struct ath10k *ar, char *name,
                                         size_t name_len, bool with_variant,
                                         bool with_chip_id)
{
        /* strlen(',variant=') + strlen(ar->id.bdf_ext) */
        char variant[9 + ATH10K_SMBIOS_BDF_EXT_STR_LENGTH] = { 0 };

        if (with_variant && ar->id.bdf_ext[0] != '\0')
                scnprintf(variant, sizeof(variant), ",variant=%s",
                          ar->id.bdf_ext);

        if (ar->id.bmi_ids_valid) {
                scnprintf(name, name_len,
                          "bus=%s,bmi-chip-id=%d,bmi-board-id=%d%s",
                          ath10k_bus_str(ar->hif.bus),
                          ar->id.bmi_chip_id,
                          ar->id.bmi_board_id, variant);
                goto out;
        }

        if (ar->id.qmi_ids_valid) {
                if (with_chip_id)
                        scnprintf(name, name_len,
                                  "bus=%s,qmi-board-id=%x,qmi-chip-id=%x%s",
                                  ath10k_bus_str(ar->hif.bus),
                                  ar->id.qmi_board_id, ar->id.qmi_chip_id,
                                  variant);
                else
                        scnprintf(name, name_len,
                                  "bus=%s,qmi-board-id=%x",
                                  ath10k_bus_str(ar->hif.bus),
                                  ar->id.qmi_board_id);
                goto out;
        }

        scnprintf(name, name_len,
                  "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x%s",
                  ath10k_bus_str(ar->hif.bus),
                  ar->id.vendor, ar->id.device,
                  ar->id.subsystem_vendor, ar->id.subsystem_device, variant);
out:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using board name '%s'\n", name);

        return 0;
}

static int ath10k_core_create_eboard_name(struct ath10k *ar, char *name,
                                          size_t name_len)
{
        if (ar->id.bmi_ids_valid) {
                scnprintf(name, name_len,
                          "bus=%s,bmi-chip-id=%d,bmi-eboard-id=%d",
                          ath10k_bus_str(ar->hif.bus),
                          ar->id.bmi_chip_id,
                          ar->id.bmi_eboard_id);

                ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using eboard name '%s'\n", name);
                return 0;
        }
        /* Fallback if returned board id is zero */
        return -1;
}

int ath10k_core_fetch_board_file(struct ath10k *ar, int bd_ie_type)
{
        char boardname[100], fallback_boardname1[100], fallback_boardname2[100];
        int ret;

        if (bd_ie_type == ATH10K_BD_IE_BOARD) {
                /* With variant and chip id */
                ret = ath10k_core_create_board_name(ar, boardname,
                                                    sizeof(boardname), true,
                                                    true);
                if (ret) {
                        ath10k_err(ar, "failed to create board name: %d", ret);
                        return ret;
                }

                /* Without variant and only chip-id */
                ret = ath10k_core_create_board_name(ar, fallback_boardname1,
                                                    sizeof(boardname), false,
                                                    true);
                if (ret) {
                        ath10k_err(ar, "failed to create 1st fallback board name: %d",
                                   ret);
                        return ret;
                }

                /* Without variant and without chip-id */
                ret = ath10k_core_create_board_name(ar, fallback_boardname2,
                                                    sizeof(boardname), false,
                                                    false);
                if (ret) {
                        ath10k_err(ar, "failed to create 2nd fallback board name: %d",
                                   ret);
                        return ret;
                }
        } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) {
                ret = ath10k_core_create_eboard_name(ar, boardname,
                                                     sizeof(boardname));
                if (ret) {
                        ath10k_err(ar, "fallback to eboard.bin since board id 0");
                        goto fallback;
                }
        }

        ar->bd_api = 2;
        ret = ath10k_core_fetch_board_data_api_n(ar, boardname,
                                                 fallback_boardname1,
                                                 fallback_boardname2,
                                                 ATH10K_BOARD_API2_FILE);
        if (!ret)
                goto success;

fallback:
        ar->bd_api = 1;
        ret = ath10k_core_fetch_board_data_api_1(ar, bd_ie_type);
        if (ret) {
                ath10k_err(ar, "failed to fetch board-2.bin or board.bin from %s\n",
                           ar->hw_params.fw.dir);
                return ret;
        }

success:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "using board api %d\n", ar->bd_api);
        return 0;
}
EXPORT_SYMBOL(ath10k_core_fetch_board_file);

static int ath10k_core_get_ext_board_id_from_otp(struct ath10k *ar)
{
        u32 result, address;
        u8 ext_board_id;
        int ret;

        address = ar->hw_params.patch_load_addr;

        if (!ar->normal_mode_fw.fw_file.otp_data ||
            !ar->normal_mode_fw.fw_file.otp_len) {
                ath10k_warn(ar,
                            "failed to retrieve extended board id due to otp binary missing\n");
                return -ENODATA;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot upload otp to 0x%x len %zd for ext board id\n",
                   address, ar->normal_mode_fw.fw_file.otp_len);

        ret = ath10k_bmi_fast_download(ar, address,
                                       ar->normal_mode_fw.fw_file.otp_data,
                                       ar->normal_mode_fw.fw_file.otp_len);
        if (ret) {
                ath10k_err(ar, "could not write otp for ext board id check: %d\n",
                           ret);
                return ret;
        }

        ret = ath10k_bmi_execute(ar, address, BMI_PARAM_GET_EXT_BOARD_ID, &result);
        if (ret) {
                ath10k_err(ar, "could not execute otp for ext board id check: %d\n",
                           ret);
                return ret;
        }

        if (!result) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "ext board id does not exist in otp, ignore it\n");
                return -EOPNOTSUPP;
        }

        ext_board_id = result & ATH10K_BMI_EBOARD_ID_STATUS_MASK;

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot get otp ext board id result 0x%08x ext_board_id %d\n",
                   result, ext_board_id);

        ar->id.bmi_eboard_id = ext_board_id;

        return 0;
}

static int ath10k_download_board_data(struct ath10k *ar, const void *data,
                                      size_t data_len)
{
        u32 board_data_size = ar->hw_params.fw.board_size;
        u32 eboard_data_size = ar->hw_params.fw.ext_board_size;
        u32 board_address;
        u32 ext_board_address;
        int ret;

        ret = ath10k_push_board_ext_data(ar, data, data_len);
        if (ret) {
                ath10k_err(ar, "could not push board ext data (%d)\n", ret);
                goto exit;
        }

        ret = ath10k_bmi_read32(ar, hi_board_data, &board_address);
        if (ret) {
                ath10k_err(ar, "could not read board data addr (%d)\n", ret);
                goto exit;
        }

        ret = ath10k_bmi_write_memory(ar, board_address, data,
                                      min_t(u32, board_data_size,
                                            data_len));
        if (ret) {
                ath10k_err(ar, "could not write board data (%d)\n", ret);
                goto exit;
        }

        ret = ath10k_bmi_write32(ar, hi_board_data_initialized, 1);
        if (ret) {
                ath10k_err(ar, "could not write board data bit (%d)\n", ret);
                goto exit;
        }

        if (!ar->id.ext_bid_supported)
                goto exit;

        /* Extended board data download */
        ret = ath10k_core_get_ext_board_id_from_otp(ar);
        if (ret == -EOPNOTSUPP) {
                /* Not fetching ext_board_data if ext board id is 0 */
                ath10k_dbg(ar, ATH10K_DBG_BOOT, "otp returned ext board id 0\n");
                return 0;
        } else if (ret) {
                ath10k_err(ar, "failed to get extended board id: %d\n", ret);
                goto exit;
        }

        ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD_EXT);
        if (ret)
                goto exit;

        if (ar->normal_mode_fw.ext_board_data) {
                ext_board_address = board_address + EXT_BOARD_ADDRESS_OFFSET;
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "boot writing ext board data to addr 0x%x",
                           ext_board_address);
                ret = ath10k_bmi_write_memory(ar, ext_board_address,
                                              ar->normal_mode_fw.ext_board_data,
                                              min_t(u32, eboard_data_size, data_len));
                if (ret)
                        ath10k_err(ar, "failed to write ext board data: %d\n", ret);
        }

exit:
        return ret;
}

static int ath10k_download_and_run_otp(struct ath10k *ar)
{
        u32 result, address = ar->hw_params.patch_load_addr;
        u32 bmi_otp_exe_param = ar->hw_params.otp_exe_param;
        int ret;

        ret = ath10k_download_board_data(ar,
                                         ar->running_fw->board_data,
                                         ar->running_fw->board_len);
        if (ret) {
                ath10k_err(ar, "failed to download board data: %d\n", ret);
                return ret;
        }

        /* OTP is optional */

        if (!ar->running_fw->fw_file.otp_data ||
            !ar->running_fw->fw_file.otp_len) {
                ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n",
                            ar->running_fw->fw_file.otp_data,
                            ar->running_fw->fw_file.otp_len);
                return 0;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd\n",
                   address, ar->running_fw->fw_file.otp_len);

        ret = ath10k_bmi_fast_download(ar, address,
                                       ar->running_fw->fw_file.otp_data,
                                       ar->running_fw->fw_file.otp_len);
        if (ret) {
                ath10k_err(ar, "could not write otp (%d)\n", ret);
                return ret;
        }

        /* As of now pre-cal is valid for 10_4 variants */
        if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT ||
            ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE ||
            ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM)
                bmi_otp_exe_param = BMI_PARAM_FLASH_SECTION_ALL;

        ret = ath10k_bmi_execute(ar, address, bmi_otp_exe_param, &result);
        if (ret) {
                ath10k_err(ar, "could not execute otp (%d)\n", ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot otp execute result %d\n", result);

        if (!(skip_otp || test_bit(ATH10K_FW_FEATURE_IGNORE_OTP_RESULT,
                                   ar->running_fw->fw_file.fw_features)) &&
            result != 0) {
                ath10k_err(ar, "otp calibration failed: %d", result);
                return -EINVAL;
        }

        return 0;
}

static int ath10k_download_cal_file(struct ath10k *ar,
                                    const struct firmware *file)
{
        int ret;

        if (!file)
                return -ENOENT;

        if (IS_ERR(file))
                return PTR_ERR(file);

        ret = ath10k_download_board_data(ar, file->data, file->size);
        if (ret) {
                ath10k_err(ar, "failed to download cal_file data: %d\n", ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot cal file downloaded\n");

        return 0;
}

static int ath10k_download_cal_dt(struct ath10k *ar, const char *dt_name)
{
        struct device_node *node;
        int data_len;
        void *data;
        int ret;

        node = ar->dev->of_node;
        if (!node)
                /* Device Tree is optional, don't print any warnings if
                 * there's no node for ath10k.
                 */
                return -ENOENT;

        if (!of_get_property(node, dt_name, &data_len)) {
                /* The calibration data node is optional */
                return -ENOENT;
        }

        if (data_len != ar->hw_params.cal_data_len) {
                ath10k_warn(ar, "invalid calibration data length in DT: %d\n",
                            data_len);
                ret = -EMSGSIZE;
                goto out;
        }

        data = kmalloc(data_len, GFP_KERNEL);
        if (!data) {
                ret = -ENOMEM;
                goto out;
        }

        ret = of_property_read_u8_array(node, dt_name, data, data_len);
        if (ret) {
                ath10k_warn(ar, "failed to read calibration data from DT: %d\n",
                            ret);
                goto out_free;
        }

        ret = ath10k_download_board_data(ar, data, data_len);
        if (ret) {
                ath10k_warn(ar, "failed to download calibration data from Device Tree: %d\n",
                            ret);
                goto out_free;
        }

        ret = 0;

out_free:
        kfree(data);

out:
        return ret;
}

static int ath10k_download_cal_eeprom(struct ath10k *ar)
{
        size_t data_len;
        void *data = NULL;
        int ret;

        ret = ath10k_hif_fetch_cal_eeprom(ar, &data, &data_len);
        if (ret) {
                if (ret != -EOPNOTSUPP)
                        ath10k_warn(ar, "failed to read calibration data from EEPROM: %d\n",
                                    ret);
                goto out_free;
        }

        ret = ath10k_download_board_data(ar, data, data_len);
        if (ret) {
                ath10k_warn(ar, "failed to download calibration data from EEPROM: %d\n",
                            ret);
                goto out_free;
        }

        ret = 0;

out_free:
        kfree(data);

        return ret;
}

static int ath10k_download_cal_nvmem(struct ath10k *ar, const char *cell_name)
{
        struct nvmem_cell *cell;
        void *buf;
        size_t len;
        int ret;

        cell = devm_nvmem_cell_get(ar->dev, cell_name);
        if (IS_ERR(cell)) {
                ret = PTR_ERR(cell);
                return ret;
        }

        buf = nvmem_cell_read(cell, &len);
        if (IS_ERR(buf))
                return PTR_ERR(buf);

        if (ar->hw_params.cal_data_len != len) {
                kfree(buf);
                ath10k_warn(ar, "invalid calibration data length in nvmem-cell '%s': %zu != %u\n",
                            cell_name, len, ar->hw_params.cal_data_len);
                return -EMSGSIZE;
        }

        ret = ath10k_download_board_data(ar, buf, len);
        kfree(buf);
        if (ret)
                ath10k_warn(ar, "failed to download calibration data from nvmem-cell '%s': %d\n",
                            cell_name, ret);

        return ret;
}

int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name,
                                     struct ath10k_fw_file *fw_file)
{
        size_t magic_len, len, ie_len;
        int ie_id, i, index, bit, ret;
        struct ath10k_fw_ie *hdr;
        const u8 *data;
        __le32 *timestamp, *version;

        /* first fetch the firmware file (firmware-*.bin) */
        fw_file->firmware = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir,
                                                 name);
        if (IS_ERR(fw_file->firmware))
                return PTR_ERR(fw_file->firmware);

        data = fw_file->firmware->data;
        len = fw_file->firmware->size;

        /* magic also includes the null byte, check that as well */
        magic_len = strlen(ATH10K_FIRMWARE_MAGIC) + 1;

        if (len < magic_len) {
                ath10k_err(ar, "firmware file '%s/%s' too small to contain magic: %zu\n",
                           ar->hw_params.fw.dir, name, len);
                ret = -EINVAL;
                goto err;
        }

        if (memcmp(data, ATH10K_FIRMWARE_MAGIC, magic_len) != 0) {
                ath10k_err(ar, "invalid firmware magic\n");
                ret = -EINVAL;
                goto err;
        }

        /* jump over the padding */
        magic_len = ALIGN(magic_len, 4);

        len -= magic_len;
        data += magic_len;

        /* loop elements */
        while (len > sizeof(struct ath10k_fw_ie)) {
                hdr = (struct ath10k_fw_ie *)data;

                ie_id = le32_to_cpu(hdr->id);
                ie_len = le32_to_cpu(hdr->len);

                len -= sizeof(*hdr);
                data += sizeof(*hdr);

                if (len < ie_len) {
                        ath10k_err(ar, "invalid length for FW IE %d (%zu < %zu)\n",
                                   ie_id, len, ie_len);
                        ret = -EINVAL;
                        goto err;
                }

                switch (ie_id) {
                case ATH10K_FW_IE_FW_VERSION:
                        if (ie_len > sizeof(fw_file->fw_version) - 1)
                                break;

                        memcpy(fw_file->fw_version, data, ie_len);
                        fw_file->fw_version[ie_len] = '\0';

                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "found fw version %s\n",
                                    fw_file->fw_version);
                        break;
                case ATH10K_FW_IE_TIMESTAMP:
                        if (ie_len != sizeof(u32))
                                break;

                        timestamp = (__le32 *)data;

                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw timestamp %d\n",
                                   le32_to_cpup(timestamp));
                        break;
                case ATH10K_FW_IE_FEATURES:
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "found firmware features ie (%zd B)\n",
                                   ie_len);

                        for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) {
                                index = i / 8;
                                bit = i % 8;

                                if (index == ie_len)
                                        break;

                                if (data[index] & (1 << bit)) {
                                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                                   "Enabling feature bit: %i\n",
                                                   i);
                                        __set_bit(i, fw_file->fw_features);
                                }
                        }

                        ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "",
                                        fw_file->fw_features,
                                        sizeof(fw_file->fw_features));
                        break;
                case ATH10K_FW_IE_FW_IMAGE:
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "found fw image ie (%zd B)\n",
                                   ie_len);

                        fw_file->firmware_data = data;
                        fw_file->firmware_len = ie_len;

                        break;
                case ATH10K_FW_IE_OTP_IMAGE:
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "found otp image ie (%zd B)\n",
                                   ie_len);

                        fw_file->otp_data = data;
                        fw_file->otp_len = ie_len;

                        break;
                case ATH10K_FW_IE_WMI_OP_VERSION:
                        if (ie_len != sizeof(u32))
                                break;

                        version = (__le32 *)data;

                        fw_file->wmi_op_version = le32_to_cpup(version);

                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie wmi op version %d\n",
                                   fw_file->wmi_op_version);
                        break;
                case ATH10K_FW_IE_HTT_OP_VERSION:
                        if (ie_len != sizeof(u32))
                                break;

                        version = (__le32 *)data;

                        fw_file->htt_op_version = le32_to_cpup(version);

                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie htt op version %d\n",
                                   fw_file->htt_op_version);
                        break;
                case ATH10K_FW_IE_FW_CODE_SWAP_IMAGE:
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "found fw code swap image ie (%zd B)\n",
                                   ie_len);
                        fw_file->codeswap_data = data;
                        fw_file->codeswap_len = ie_len;
                        break;
                default:
                        ath10k_warn(ar, "Unknown FW IE: %u\n",
                                    le32_to_cpu(hdr->id));
                        break;
                }

                /* jump over the padding */
                ie_len = ALIGN(ie_len, 4);

                len -= ie_len;
                data += ie_len;
        }

        if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, fw_file->fw_features) &&
            (!fw_file->firmware_data || !fw_file->firmware_len)) {
                ath10k_warn(ar, "No ATH10K_FW_IE_FW_IMAGE found from '%s/%s', skipping\n",
                            ar->hw_params.fw.dir, name);
                ret = -ENOMEDIUM;
                goto err;
        }

        return 0;

err:
        ath10k_core_free_firmware_files(ar);
        return ret;
}

static void ath10k_core_get_fw_name(struct ath10k *ar, char *fw_name,
                                    size_t fw_name_len, int fw_api)
{
        switch (ar->hif.bus) {
        case ATH10K_BUS_SDIO:
        case ATH10K_BUS_USB:
                scnprintf(fw_name, fw_name_len, "%s-%s-%d.bin",
                          ATH10K_FW_FILE_BASE, ath10k_bus_str(ar->hif.bus),
                          fw_api);
                break;
        case ATH10K_BUS_PCI:
        case ATH10K_BUS_AHB:
        case ATH10K_BUS_SNOC:
                scnprintf(fw_name, fw_name_len, "%s-%d.bin",
                          ATH10K_FW_FILE_BASE, fw_api);
                break;
        }
}

static int ath10k_core_fetch_firmware_files(struct ath10k *ar)
{
        int ret, i;
        char fw_name[100];

        /* calibration file is optional, don't check for any errors */
        ath10k_fetch_cal_file(ar);

        for (i = ATH10K_FW_API_MAX; i >= ATH10K_FW_API_MIN; i--) {
                ar->fw_api = i;
                ath10k_dbg(ar, ATH10K_DBG_BOOT, "trying fw api %d\n",
                           ar->fw_api);

                ath10k_core_get_fw_name(ar, fw_name, sizeof(fw_name), ar->fw_api);
                ret = ath10k_core_fetch_firmware_api_n(ar, fw_name,
                                                       &ar->normal_mode_fw.fw_file);
                if (!ret)
                        goto success;
        }

        /* we end up here if we couldn't fetch any firmware */

        ath10k_err(ar, "Failed to find firmware-N.bin (N between %d and %d) from %s: %d",
                   ATH10K_FW_API_MIN, ATH10K_FW_API_MAX, ar->hw_params.fw.dir,
                   ret);

        return ret;

success:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "using fw api %d\n", ar->fw_api);

        return 0;
}

static int ath10k_core_pre_cal_download(struct ath10k *ar)
{
        int ret;

        ret = ath10k_download_cal_nvmem(ar, "pre-calibration");
        if (ret == 0) {
                ar->cal_mode = ATH10K_PRE_CAL_MODE_NVMEM;
                goto success;
        } else if (ret == -EPROBE_DEFER) {
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find a pre-calibration nvmem-cell, try file next: %d\n",
                   ret);

        ret = ath10k_download_cal_file(ar, ar->pre_cal_file);
        if (ret == 0) {
                ar->cal_mode = ATH10K_PRE_CAL_MODE_FILE;
                goto success;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find a pre calibration file, try DT next: %d\n",
                   ret);

        ret = ath10k_download_cal_dt(ar, "qcom,ath10k-pre-calibration-data");
        if (ret) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "unable to load pre cal data from DT: %d\n", ret);
                return ret;
        }
        ar->cal_mode = ATH10K_PRE_CAL_MODE_DT;

success:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n",
                   ath10k_cal_mode_str(ar->cal_mode));

        return 0;
}

static int ath10k_core_pre_cal_config(struct ath10k *ar)
{
        int ret;

        ret = ath10k_core_pre_cal_download(ar);
        if (ret) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "failed to load pre cal data: %d\n", ret);
                return ret;
        }

        ret = ath10k_core_get_board_id_from_otp(ar);
        if (ret) {
                ath10k_err(ar, "failed to get board id: %d\n", ret);
                return ret;
        }

        ret = ath10k_download_and_run_otp(ar);
        if (ret) {
                ath10k_err(ar, "failed to run otp: %d\n", ret);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "pre cal configuration done successfully\n");

        return 0;
}

static int ath10k_download_cal_data(struct ath10k *ar)
{
        int ret;

        ret = ath10k_core_pre_cal_config(ar);
        if (ret == 0)
                return 0;

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "pre cal download procedure failed, try cal file: %d\n",
                   ret);

        ret = ath10k_download_cal_nvmem(ar, "calibration");
        if (ret == 0) {
                ar->cal_mode = ATH10K_CAL_MODE_NVMEM;
                goto done;
        } else if (ret == -EPROBE_DEFER) {
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find a calibration nvmem-cell, try file next: %d\n",
                   ret);

        ret = ath10k_download_cal_file(ar, ar->cal_file);
        if (ret == 0) {
                ar->cal_mode = ATH10K_CAL_MODE_FILE;
                goto done;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find a calibration file, try DT next: %d\n",
                   ret);

        ret = ath10k_download_cal_dt(ar, "qcom,ath10k-calibration-data");
        if (ret == 0) {
                ar->cal_mode = ATH10K_CAL_MODE_DT;
                goto done;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find DT entry, try target EEPROM next: %d\n",
                   ret);

        ret = ath10k_download_cal_eeprom(ar);
        if (ret == 0) {
                ar->cal_mode = ATH10K_CAL_MODE_EEPROM;
                goto done;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "boot did not find target EEPROM entry, try OTP next: %d\n",
                   ret);

        ret = ath10k_download_and_run_otp(ar);
        if (ret) {
                ath10k_err(ar, "failed to run otp: %d\n", ret);
                return ret;
        }

        ar->cal_mode = ATH10K_CAL_MODE_OTP;

done:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n",
                   ath10k_cal_mode_str(ar->cal_mode));
        return 0;
}

static void ath10k_core_fetch_btcoex_dt(struct ath10k *ar)
{
        struct device_node *node;
        u8 coex_support = 0;
        int ret;

        node = ar->dev->of_node;
        if (!node)
                goto out;

        ret = of_property_read_u8(node, "qcom,coexist-support", &coex_support);
        if (ret) {
                ar->coex_support = true;
                goto out;
        }

        if (coex_support) {
                ar->coex_support = true;
        } else {
                ar->coex_support = false;
                ar->coex_gpio_pin = -1;
                goto out;
        }

        ret = of_property_read_u32(node, "qcom,coexist-gpio-pin",
                                   &ar->coex_gpio_pin);
        if (ret)
                ar->coex_gpio_pin = -1;

out:
        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot coex_support %d coex_gpio_pin %d\n",
                   ar->coex_support, ar->coex_gpio_pin);
}

static int ath10k_init_uart(struct ath10k *ar)
{
        int ret;

        /*
         * Explicitly setting UART prints to zero as target turns it on
         * based on scratch registers.
         */
        ret = ath10k_bmi_write32(ar, hi_serial_enable, 0);
        if (ret) {
                ath10k_warn(ar, "could not disable UART prints (%d)\n", ret);
                return ret;
        }

        if (!uart_print) {
                if (ar->hw_params.uart_pin_workaround) {
                        ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
                                                 ar->hw_params.uart_pin);
                        if (ret) {
                                ath10k_warn(ar, "failed to set UART TX pin: %d",
                                            ret);
                                return ret;
                        }
                }

                return 0;
        }

        ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, ar->hw_params.uart_pin);
        if (ret) {
                ath10k_warn(ar, "could not enable UART prints (%d)\n", ret);
                return ret;
        }

        ret = ath10k_bmi_write32(ar, hi_serial_enable, 1);
        if (ret) {
                ath10k_warn(ar, "could not enable UART prints (%d)\n", ret);
                return ret;
        }

        /* Set the UART baud rate to 19200. */
        ret = ath10k_bmi_write32(ar, hi_desired_baud_rate, 19200);
        if (ret) {
                ath10k_warn(ar, "could not set the baud rate (%d)\n", ret);
                return ret;
        }

        ath10k_info(ar, "UART prints enabled\n");
        return 0;
}

static int ath10k_init_hw_params(struct ath10k *ar)
{
        const struct ath10k_hw_params *hw_params;
        int i;

        for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) {
                hw_params = &ath10k_hw_params_list[i];

                if (hw_params->bus == ar->hif.bus &&
                    hw_params->id == ar->target_version &&
                    hw_params->dev_id == ar->dev_id)
                        break;
        }

        if (i == ARRAY_SIZE(ath10k_hw_params_list)) {
                ath10k_err(ar, "Unsupported hardware version: 0x%x\n",
                           ar->target_version);
                return -EINVAL;
        }

        ar->hw_params = *hw_params;

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "Hardware name %s version 0x%x\n",
                   ar->hw_params.name, ar->target_version);

        return 0;
}

void ath10k_core_start_recovery(struct ath10k *ar)
{
        if (test_and_set_bit(ATH10K_FLAG_RESTARTING, &ar->dev_flags)) {
                ath10k_warn(ar, "already restarting\n");
                return;
        }

        queue_work(ar->workqueue, &ar->restart_work);
}
EXPORT_SYMBOL(ath10k_core_start_recovery);

void ath10k_core_napi_enable(struct ath10k *ar)
{
        lockdep_assert_held(&ar->conf_mutex);

        if (test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags))
                return;

        napi_enable(&ar->napi);
        set_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags);
}
EXPORT_SYMBOL(ath10k_core_napi_enable);

void ath10k_core_napi_sync_disable(struct ath10k *ar)
{
        lockdep_assert_held(&ar->conf_mutex);

        if (!test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags))
                return;

        napi_synchronize(&ar->napi);
        napi_disable(&ar->napi);
        clear_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags);
}
EXPORT_SYMBOL(ath10k_core_napi_sync_disable);

static void ath10k_core_restart(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k, restart_work);
        int ret;

        set_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags);

        /* Place a barrier to make sure the compiler doesn't reorder
         * CRASH_FLUSH and calling other functions.
         */
        barrier();

        ieee80211_stop_queues(ar->hw);
        ath10k_drain_tx(ar);
        complete(&ar->scan.started);
        complete(&ar->scan.completed);
        complete(&ar->scan.on_channel);
        complete(&ar->offchan_tx_completed);
        complete(&ar->install_key_done);
        complete(&ar->vdev_setup_done);
        complete(&ar->vdev_delete_done);
        complete(&ar->thermal.wmi_sync);
        complete(&ar->bss_survey_done);
        wake_up(&ar->htt.empty_tx_wq);
        wake_up(&ar->wmi.tx_credits_wq);
        wake_up(&ar->peer_mapping_wq);

        /* TODO: We can have one instance of cancelling coverage_class_work by
         * moving it to ath10k_halt(), so that both stop() and restart() would
         * call that but it takes conf_mutex() and if we call cancel_work_sync()
         * with conf_mutex it will deadlock.
         */
        cancel_work_sync(&ar->set_coverage_class_work);

        mutex_lock(&ar->conf_mutex);

        switch (ar->state) {
        case ATH10K_STATE_ON:
                ar->state = ATH10K_STATE_RESTARTING;
                ath10k_halt(ar);
                ath10k_scan_finish(ar);
                ieee80211_restart_hw(ar->hw);
                break;
        case ATH10K_STATE_OFF:
                /* this can happen if driver is being unloaded
                 * or if the crash happens during FW probing
                 */
                ath10k_warn(ar, "cannot restart a device that hasn't been started\n");
                break;
        case ATH10K_STATE_RESTARTING:
                /* hw restart might be requested from multiple places */
                break;
        case ATH10K_STATE_RESTARTED:
                ar->state = ATH10K_STATE_WEDGED;
                fallthrough;
        case ATH10K_STATE_WEDGED:
                ath10k_warn(ar, "device is wedged, will not restart\n");
                break;
        case ATH10K_STATE_UTF:
                ath10k_warn(ar, "firmware restart in UTF mode not supported\n");
                break;
        }

        mutex_unlock(&ar->conf_mutex);

        ret = ath10k_coredump_submit(ar);
        if (ret)
                ath10k_warn(ar, "failed to send firmware crash dump via devcoredump: %d",
                            ret);

        complete(&ar->driver_recovery);
}

static void ath10k_core_set_coverage_class_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k,
                                         set_coverage_class_work);

        if (ar->hw_params.hw_ops->set_coverage_class)
                ar->hw_params.hw_ops->set_coverage_class(ar, -1);
}

static int ath10k_core_init_firmware_features(struct ath10k *ar)
{
        struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file;
        int max_num_peers;

        if (test_bit(ATH10K_FW_FEATURE_WMI_10_2, fw_file->fw_features) &&
            !test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) {
                ath10k_err(ar, "feature bits corrupted: 10.2 feature requires 10.x feature to be set as well");
                return -EINVAL;
        }

        if (fw_file->wmi_op_version >= ATH10K_FW_WMI_OP_VERSION_MAX) {
                ath10k_err(ar, "unsupported WMI OP version (max %d): %d\n",
                           ATH10K_FW_WMI_OP_VERSION_MAX, fw_file->wmi_op_version);
                return -EINVAL;
        }

        ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_NATIVE_WIFI;
        switch (ath10k_cryptmode_param) {
        case ATH10K_CRYPT_MODE_HW:
                clear_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags);
                clear_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags);
                break;
        case ATH10K_CRYPT_MODE_SW:
                if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT,
                              fw_file->fw_features)) {
                        ath10k_err(ar, "cryptmode > 0 requires raw mode support from firmware");
                        return -EINVAL;
                }

                set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags);
                set_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags);
                break;
        default:
                ath10k_info(ar, "invalid cryptmode: %d\n",
                            ath10k_cryptmode_param);
                return -EINVAL;
        }

        ar->htt.max_num_amsdu = ATH10K_HTT_MAX_NUM_AMSDU_DEFAULT;
        ar->htt.max_num_ampdu = ATH10K_HTT_MAX_NUM_AMPDU_DEFAULT;

        if (ath10k_frame_mode == ATH10K_HW_TXRX_RAW) {
                if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT,
                              fw_file->fw_features)) {
                        ath10k_err(ar, "rawmode = 1 requires support from firmware");
                        return -EINVAL;
                }
                set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags);
        }

        if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_RAW;

                /* Workaround:
                 *
                 * Firmware A-MSDU aggregation breaks with RAW Tx encap mode
                 * and causes enormous performance issues (malformed frames,
                 * etc).
                 *
                 * Disabling A-MSDU makes RAW mode stable with heavy traffic
                 * albeit a bit slower compared to regular operation.
                 */
                ar->htt.max_num_amsdu = 1;
        }

        /* Backwards compatibility for firmwares without
         * ATH10K_FW_IE_WMI_OP_VERSION.
         */
        if (fw_file->wmi_op_version == ATH10K_FW_WMI_OP_VERSION_UNSET) {
                if (test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) {
                        if (test_bit(ATH10K_FW_FEATURE_WMI_10_2,
                                     fw_file->fw_features))
                                fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_2;
                        else
                                fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_1;
                } else {
                        fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_MAIN;
                }
        }

        switch (fw_file->wmi_op_version) {
        case ATH10K_FW_WMI_OP_VERSION_MAIN:
                max_num_peers = TARGET_NUM_PEERS;
                ar->max_num_stations = TARGET_NUM_STATIONS;
                ar->max_num_vdevs = TARGET_NUM_VDEVS;
                ar->htt.max_num_pending_tx = TARGET_NUM_MSDU_DESC;
                ar->fw_stats_req_mask = WMI_STAT_PDEV | WMI_STAT_VDEV |
                        WMI_STAT_PEER;
                ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM;
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_1:
        case ATH10K_FW_WMI_OP_VERSION_10_2:
        case ATH10K_FW_WMI_OP_VERSION_10_2_4:
                if (ath10k_peer_stats_enabled(ar)) {
                        max_num_peers = TARGET_10X_TX_STATS_NUM_PEERS;
                        ar->max_num_stations = TARGET_10X_TX_STATS_NUM_STATIONS;
                } else {
                        max_num_peers = TARGET_10X_NUM_PEERS;
                        ar->max_num_stations = TARGET_10X_NUM_STATIONS;
                }
                ar->max_num_vdevs = TARGET_10X_NUM_VDEVS;
                ar->htt.max_num_pending_tx = TARGET_10X_NUM_MSDU_DESC;
                ar->fw_stats_req_mask = WMI_STAT_PEER;
                ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM;
                break;
        case ATH10K_FW_WMI_OP_VERSION_TLV:
                max_num_peers = TARGET_TLV_NUM_PEERS;
                ar->max_num_stations = TARGET_TLV_NUM_STATIONS;
                ar->max_num_vdevs = TARGET_TLV_NUM_VDEVS;
                ar->max_num_tdls_vdevs = TARGET_TLV_NUM_TDLS_VDEVS;
                if (ar->hif.bus == ATH10K_BUS_SDIO)
                        ar->htt.max_num_pending_tx =
                                TARGET_TLV_NUM_MSDU_DESC_HL;
                else
                        ar->htt.max_num_pending_tx = TARGET_TLV_NUM_MSDU_DESC;
                ar->wow.max_num_patterns = TARGET_TLV_NUM_WOW_PATTERNS;
                ar->fw_stats_req_mask = WMI_TLV_STAT_PDEV | WMI_TLV_STAT_VDEV |
                        WMI_TLV_STAT_PEER | WMI_TLV_STAT_PEER_EXTD;
                ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM;
                ar->wmi.mgmt_max_num_pending_tx = TARGET_TLV_MGMT_NUM_MSDU_DESC;
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_4:
                max_num_peers = TARGET_10_4_NUM_PEERS;
                ar->max_num_stations = TARGET_10_4_NUM_STATIONS;
                ar->num_active_peers = TARGET_10_4_ACTIVE_PEERS;
                ar->max_num_vdevs = TARGET_10_4_NUM_VDEVS;
                ar->num_tids = TARGET_10_4_TGT_NUM_TIDS;
                ar->fw_stats_req_mask = WMI_10_4_STAT_PEER |
                                        WMI_10_4_STAT_PEER_EXTD |
                                        WMI_10_4_STAT_VDEV_EXTD;
                ar->max_spatial_stream = ar->hw_params.max_spatial_stream;
                ar->max_num_tdls_vdevs = TARGET_10_4_NUM_TDLS_VDEVS;

                if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                             fw_file->fw_features))
                        ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC_PFC;
                else
                        ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC;
                break;
        case ATH10K_FW_WMI_OP_VERSION_UNSET:
        case ATH10K_FW_WMI_OP_VERSION_MAX:
        default:
                WARN_ON(1);
                return -EINVAL;
        }

        if (ar->hw_params.num_peers)
                ar->max_num_peers = ar->hw_params.num_peers;
        else
                ar->max_num_peers = max_num_peers;

        /* Backwards compatibility for firmwares without
         * ATH10K_FW_IE_HTT_OP_VERSION.
         */
        if (fw_file->htt_op_version == ATH10K_FW_HTT_OP_VERSION_UNSET) {
                switch (fw_file->wmi_op_version) {
                case ATH10K_FW_WMI_OP_VERSION_MAIN:
                        fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_MAIN;
                        break;
                case ATH10K_FW_WMI_OP_VERSION_10_1:
                case ATH10K_FW_WMI_OP_VERSION_10_2:
                case ATH10K_FW_WMI_OP_VERSION_10_2_4:
                        fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_10_1;
                        break;
                case ATH10K_FW_WMI_OP_VERSION_TLV:
                        fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_TLV;
                        break;
                case ATH10K_FW_WMI_OP_VERSION_10_4:
                case ATH10K_FW_WMI_OP_VERSION_UNSET:
                case ATH10K_FW_WMI_OP_VERSION_MAX:
                        ath10k_err(ar, "htt op version not found from fw meta data");
                        return -EINVAL;
                }
        }

        return 0;
}

static int ath10k_core_reset_rx_filter(struct ath10k *ar)
{
        int ret;
        int vdev_id;
        int vdev_type;
        int vdev_subtype;
        const u8 *vdev_addr;

        vdev_id = 0;
        vdev_type = WMI_VDEV_TYPE_STA;
        vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE);
        vdev_addr = ar->mac_addr;

        ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype,
                                     vdev_addr);
        if (ret) {
                ath10k_err(ar, "failed to create dummy vdev: %d\n", ret);
                return ret;
        }

        ret = ath10k_wmi_vdev_delete(ar, vdev_id);
        if (ret) {
                ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret);
                return ret;
        }

        /* WMI and HTT may use separate HIF pipes and are not guaranteed to be
         * serialized properly implicitly.
         *
         * Moreover (most) WMI commands have no explicit acknowledges. It is
         * possible to infer it implicitly by poking firmware with echo
         * command - getting a reply means all preceding comments have been
         * (mostly) processed.
         *
         * In case of vdev create/delete this is sufficient.
         *
         * Without this it's possible to end up with a race when HTT Rx ring is
         * started before vdev create/delete hack is complete allowing a short
         * window of opportunity to receive (and Tx ACK) a bunch of frames.
         */
        ret = ath10k_wmi_barrier(ar);
        if (ret) {
                ath10k_err(ar, "failed to ping firmware: %d\n", ret);
                return ret;
        }

        return 0;
}

static int ath10k_core_compat_services(struct ath10k *ar)
{
        struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file;

        /* all 10.x firmware versions support thermal throttling but don't
         * advertise the support via service flags so we have to hardcode
         * it here
         */
        switch (fw_file->wmi_op_version) {
        case ATH10K_FW_WMI_OP_VERSION_10_1:
        case ATH10K_FW_WMI_OP_VERSION_10_2:
        case ATH10K_FW_WMI_OP_VERSION_10_2_4:
        case ATH10K_FW_WMI_OP_VERSION_10_4:
                set_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map);
                break;
        default:
                break;
        }

        return 0;
}

#define TGT_IRAM_READ_PER_ITR (8 * 1024)

static int ath10k_core_copy_target_iram(struct ath10k *ar)
{
        const struct ath10k_hw_mem_layout *hw_mem;
        const struct ath10k_mem_region *tmp, *mem_region = NULL;
        dma_addr_t paddr;
        void *vaddr = NULL;
        u8 num_read_itr;
        int i, ret;
        u32 len, remaining_len;

        /* copy target iram feature must work also when
         * ATH10K_FW_CRASH_DUMP_RAM_DATA is disabled, so
         * _ath10k_coredump_get_mem_layout() to accomplist that
         */
        hw_mem = _ath10k_coredump_get_mem_layout(ar);
        if (!hw_mem)
                /* if CONFIG_DEV_COREDUMP is disabled we get NULL, then
                 * just silently disable the feature by doing nothing
                 */
                return 0;

        for (i = 0; i < hw_mem->region_table.size; i++) {
                tmp = &hw_mem->region_table.regions[i];
                if (tmp->type == ATH10K_MEM_REGION_TYPE_REG) {
                        mem_region = tmp;
                        break;
                }
        }

        if (!mem_region)
                return -ENOMEM;

        for (i = 0; i < ar->wmi.num_mem_chunks; i++) {
                if (ar->wmi.mem_chunks[i].req_id ==
                    WMI_IRAM_RECOVERY_HOST_MEM_REQ_ID) {
                        vaddr = ar->wmi.mem_chunks[i].vaddr;
                        len = ar->wmi.mem_chunks[i].len;
                        break;
                }
        }

        if (!vaddr || !len) {
                ath10k_warn(ar, "No allocated memory for IRAM back up");
                return -ENOMEM;
        }

        len = (len < mem_region->len) ? len : mem_region->len;
        paddr = mem_region->start;
        num_read_itr = len / TGT_IRAM_READ_PER_ITR;
        remaining_len = len % TGT_IRAM_READ_PER_ITR;
        for (i = 0; i < num_read_itr; i++) {
                ret = ath10k_hif_diag_read(ar, paddr, vaddr,
                                           TGT_IRAM_READ_PER_ITR);
                if (ret) {
                        ath10k_warn(ar, "failed to copy firmware IRAM contents: %d",
                                    ret);
                        return ret;
                }

                paddr += TGT_IRAM_READ_PER_ITR;
                vaddr += TGT_IRAM_READ_PER_ITR;
        }

        if (remaining_len) {
                ret = ath10k_hif_diag_read(ar, paddr, vaddr, remaining_len);
                if (ret) {
                        ath10k_warn(ar, "failed to copy firmware IRAM contents: %d",
                                    ret);
                        return ret;
                }
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "target IRAM back up completed\n");

        return 0;
}

int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
                      const struct ath10k_fw_components *fw)
{
        int status;
        u32 val;

        lockdep_assert_held(&ar->conf_mutex);

        clear_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags);

        ar->running_fw = fw;

        if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
                      ar->running_fw->fw_file.fw_features)) {
                ath10k_bmi_start(ar);

                /* Enable hardware clock to speed up firmware download */
                if (ar->hw_params.hw_ops->enable_pll_clk) {
                        status = ar->hw_params.hw_ops->enable_pll_clk(ar);
                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot enable pll ret %d\n",
                                   status);
                }

                if (ath10k_init_configure_target(ar)) {
                        status = -EINVAL;
                        goto err;
                }

                status = ath10k_download_cal_data(ar);
                if (status)
                        goto err;

                /* Some of qca988x solutions are having global reset issue
                 * during target initialization. Bypassing PLL setting before
                 * downloading firmware and letting the SoC run on REF_CLK is
                 * fixing the problem. Corresponding firmware change is also
                 * needed to set the clock source once the target is
                 * initialized.
                 */
                if (test_bit(ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT,
                             ar->running_fw->fw_file.fw_features)) {
                        status = ath10k_bmi_write32(ar, hi_skip_clock_init, 1);
                        if (status) {
                                ath10k_err(ar, "could not write to skip_clock_init: %d\n",
                                           status);
                                goto err;
                        }
                }

                status = ath10k_download_fw(ar);
                if (status)
                        goto err;

                status = ath10k_init_uart(ar);
                if (status)
                        goto err;

                if (ar->hif.bus == ATH10K_BUS_SDIO) {
                        status = ath10k_init_sdio(ar, mode);
                        if (status) {
                                ath10k_err(ar, "failed to init SDIO: %d\n", status);
                                goto err;
                        }
                }
        }

        ar->htc.htc_ops.target_send_suspend_complete =
                ath10k_send_suspend_complete;

        status = ath10k_htc_init(ar);
        if (status) {
                ath10k_err(ar, "could not init HTC (%d)\n", status);
                goto err;
        }

        if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
                      ar->running_fw->fw_file.fw_features)) {
                status = ath10k_bmi_done(ar);
                if (status)
                        goto err;
        }

        status = ath10k_wmi_attach(ar);
        if (status) {
                ath10k_err(ar, "WMI attach failed: %d\n", status);
                goto err;
        }

        status = ath10k_htt_init(ar);
        if (status) {
                ath10k_err(ar, "failed to init htt: %d\n", status);
                goto err_wmi_detach;
        }

        status = ath10k_htt_tx_start(&ar->htt);
        if (status) {
                ath10k_err(ar, "failed to alloc htt tx: %d\n", status);
                goto err_wmi_detach;
        }

        /* If firmware indicates Full Rx Reorder support it must be used in a
         * slightly different manner. Let HTT code know.
         */
        ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER,
                                                ar->wmi.svc_map));

        status = ath10k_htt_rx_alloc(&ar->htt);
        if (status) {
                ath10k_err(ar, "failed to alloc htt rx: %d\n", status);
                goto err_htt_tx_detach;
        }

        status = ath10k_hif_start(ar);
        if (status) {
                ath10k_err(ar, "could not start HIF: %d\n", status);
                goto err_htt_rx_detach;
        }

        status = ath10k_htc_wait_target(&ar->htc);
        if (status) {
                ath10k_err(ar, "failed to connect to HTC: %d\n", status);
                goto err_hif_stop;
        }

        status = ath10k_hif_start_post(ar);
        if (status) {
                ath10k_err(ar, "failed to swap mailbox: %d\n", status);
                goto err_hif_stop;
        }

        if (mode == ATH10K_FIRMWARE_MODE_NORMAL) {
                status = ath10k_htt_connect(&ar->htt);
                if (status) {
                        ath10k_err(ar, "failed to connect htt (%d)\n", status);
                        goto err_hif_stop;
                }
        }

        status = ath10k_wmi_connect(ar);
        if (status) {
                ath10k_err(ar, "could not connect wmi: %d\n", status);
                goto err_hif_stop;
        }

        status = ath10k_htc_start(&ar->htc);
        if (status) {
                ath10k_err(ar, "failed to start htc: %d\n", status);
                goto err_hif_stop;
        }

        if (mode == ATH10K_FIRMWARE_MODE_NORMAL) {
                status = ath10k_wmi_wait_for_service_ready(ar);
                if (status) {
                        ath10k_warn(ar, "wmi service ready event not received");
                        goto err_hif_stop;
                }
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "firmware %s booted\n",
                   ar->hw->wiphy->fw_version);

        if (test_bit(ATH10K_FW_FEATURE_IRAM_RECOVERY,
                     ar->running_fw->fw_file.fw_features)) {
                status = ath10k_core_copy_target_iram(ar);
                if (status) {
                        ath10k_warn(ar, "failed to copy target iram contents: %d",
                                    status);
                        goto err_hif_stop;
                }
        }

        if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map) &&
            mode == ATH10K_FIRMWARE_MODE_NORMAL) {
                val = 0;
                if (ath10k_peer_stats_enabled(ar))
                        val = WMI_10_4_PEER_STATS;

                /* Enable vdev stats by default */
                val |= WMI_10_4_VDEV_STATS;

                if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map))
                        val |= WMI_10_4_BSS_CHANNEL_INFO_64;

                ath10k_core_fetch_btcoex_dt(ar);

                /* 10.4 firmware supports BT-Coex without reloading firmware
                 * via pdev param. To support Bluetooth coexistence pdev param,
                 * WMI_COEX_GPIO_SUPPORT of extended resource config should be
                 * enabled always.
                 *
                 * We can still enable BTCOEX if firmware has the support
                 * even though btceox_support value is
                 * ATH10K_DT_BTCOEX_NOT_FOUND
                 */

                if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) &&
                    test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM,
                             ar->running_fw->fw_file.fw_features) &&
                    ar->coex_support)
                        val |= WMI_10_4_COEX_GPIO_SUPPORT;

                if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
                             ar->wmi.svc_map))
                        val |= WMI_10_4_TDLS_EXPLICIT_MODE_ONLY;

                if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA,
                             ar->wmi.svc_map))
                        val |= WMI_10_4_TDLS_UAPSD_BUFFER_STA;

                if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI,
                             ar->wmi.svc_map))
                        val |= WMI_10_4_TX_DATA_ACK_RSSI;

                if (test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map))
                        val |= WMI_10_4_REPORT_AIRTIME;

                if (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT,
                             ar->wmi.svc_map))
                        val |= WMI_10_4_EXT_PEER_TID_CONFIGS_SUPPORT;

                status = ath10k_mac_ext_resource_config(ar, val);
                if (status) {
                        ath10k_err(ar,
                                   "failed to send ext resource cfg command : %d\n",
                                   status);
                        goto err_hif_stop;
                }
        }

        status = ath10k_wmi_cmd_init(ar);
        if (status) {
                ath10k_err(ar, "could not send WMI init command (%d)\n",
                           status);
                goto err_hif_stop;
        }

        status = ath10k_wmi_wait_for_unified_ready(ar);
        if (status) {
                ath10k_err(ar, "wmi unified ready event not received\n");
                goto err_hif_stop;
        }

        status = ath10k_core_compat_services(ar);
        if (status) {
                ath10k_err(ar, "compat services failed: %d\n", status);
                goto err_hif_stop;
        }

        status = ath10k_wmi_pdev_set_base_macaddr(ar, ar->mac_addr);
        if (status && status != -EOPNOTSUPP) {
                ath10k_err(ar,
                           "failed to set base mac address: %d\n", status);
                goto err_hif_stop;
        }

        /* Some firmware revisions do not properly set up hardware rx filter
         * registers.
         *
         * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK
         * is filled with 0s instead of 1s allowing HW to respond with ACKs to
         * any frames that matches MAC_PCU_RX_FILTER which is also
         * misconfigured to accept anything.
         *
         * The ADDR1 is programmed using internal firmware structure field and
         * can't be (easily/sanely) reached from the driver explicitly. It is
         * possible to implicitly make it correct by creating a dummy vdev and
         * then deleting it.
         */
        if (ar->hw_params.hw_filter_reset_required &&
            mode == ATH10K_FIRMWARE_MODE_NORMAL) {
                status = ath10k_core_reset_rx_filter(ar);
                if (status) {
                        ath10k_err(ar,
                                   "failed to reset rx filter: %d\n", status);
                        goto err_hif_stop;
                }
        }

        status = ath10k_htt_rx_ring_refill(ar);
        if (status) {
                ath10k_err(ar, "failed to refill htt rx ring: %d\n", status);
                goto err_hif_stop;
        }

        if (ar->max_num_vdevs >= 64)
                ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL;
        else
                ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1;

        INIT_LIST_HEAD(&ar->arvifs);

        /* we don't care about HTT in UTF mode */
        if (mode == ATH10K_FIRMWARE_MODE_NORMAL) {
                status = ath10k_htt_setup(&ar->htt);
                if (status) {
                        ath10k_err(ar, "failed to setup htt: %d\n", status);
                        goto err_hif_stop;
                }
        }

        status = ath10k_debug_start(ar);
        if (status)
                goto err_hif_stop;

        status = ath10k_hif_set_target_log_mode(ar, fw_diag_log);
        if (status && status != -EOPNOTSUPP) {
                ath10k_warn(ar, "set target log mode failed: %d\n", status);
                goto err_hif_stop;
        }

        return 0;

err_hif_stop:
        ath10k_hif_stop(ar);
err_htt_rx_detach:
        ath10k_htt_rx_free(&ar->htt);
err_htt_tx_detach:
        ath10k_htt_tx_free(&ar->htt);
err_wmi_detach:
        ath10k_wmi_detach(ar);
err:
        return status;
}
EXPORT_SYMBOL(ath10k_core_start);

int ath10k_wait_for_suspend(struct ath10k *ar, u32 suspend_opt)
{
        int ret;
        unsigned long time_left;

        reinit_completion(&ar->target_suspend);

        ret = ath10k_wmi_pdev_suspend_target(ar, suspend_opt);
        if (ret) {
                ath10k_warn(ar, "could not suspend target (%d)\n", ret);
                return ret;
        }

        time_left = wait_for_completion_timeout(&ar->target_suspend, 1 * HZ);

        if (!time_left) {
                ath10k_warn(ar, "suspend timed out - target pause event never came\n");
                return -ETIMEDOUT;
        }

        return 0;
}

void ath10k_core_stop(struct ath10k *ar)
{
        lockdep_assert_held(&ar->conf_mutex);
        ath10k_debug_stop(ar);

        /* try to suspend target */
        if (ar->state != ATH10K_STATE_RESTARTING &&
            ar->state != ATH10K_STATE_UTF)
                ath10k_wait_for_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR);

        ath10k_hif_stop(ar);
        ath10k_htt_tx_stop(&ar->htt);
        ath10k_htt_rx_free(&ar->htt);
        ath10k_wmi_detach(ar);

        ar->id.bmi_ids_valid = false;
}
EXPORT_SYMBOL(ath10k_core_stop);

/* mac80211 manages fw/hw initialization through start/stop hooks. However in
 * order to know what hw capabilities should be advertised to mac80211 it is
 * necessary to load the firmware (and tear it down immediately since start
 * hook will try to init it again) before registering
 */
static int ath10k_core_probe_fw(struct ath10k *ar)
{
        struct bmi_target_info target_info;
        int ret = 0;

        ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL);
        if (ret) {
                ath10k_err(ar, "could not power on hif bus (%d)\n", ret);
                return ret;
        }

        switch (ar->hif.bus) {
        case ATH10K_BUS_SDIO:
                memset(&target_info, 0, sizeof(target_info));
                ret = ath10k_bmi_get_target_info_sdio(ar, &target_info);
                if (ret) {
                        ath10k_err(ar, "could not get target info (%d)\n", ret);
                        goto err_power_down;
                }
                ar->target_version = target_info.version;
                ar->hw->wiphy->hw_version = target_info.version;
                break;
        case ATH10K_BUS_PCI:
        case ATH10K_BUS_AHB:
        case ATH10K_BUS_USB:
                memset(&target_info, 0, sizeof(target_info));
                ret = ath10k_bmi_get_target_info(ar, &target_info);
                if (ret) {
                        ath10k_err(ar, "could not get target info (%d)\n", ret);
                        goto err_power_down;
                }
                ar->target_version = target_info.version;
                ar->hw->wiphy->hw_version = target_info.version;
                break;
        case ATH10K_BUS_SNOC:
                memset(&target_info, 0, sizeof(target_info));
                ret = ath10k_hif_get_target_info(ar, &target_info);
                if (ret) {
                        ath10k_err(ar, "could not get target info (%d)\n", ret);
                        goto err_power_down;
                }
                ar->target_version = target_info.version;
                ar->hw->wiphy->hw_version = target_info.version;
                break;
        default:
                ath10k_err(ar, "incorrect hif bus type: %d\n", ar->hif.bus);
        }

        ret = ath10k_init_hw_params(ar);
        if (ret) {
                ath10k_err(ar, "could not get hw params (%d)\n", ret);
                goto err_power_down;
        }

        ret = ath10k_core_fetch_firmware_files(ar);
        if (ret) {
                ath10k_err(ar, "could not fetch firmware files (%d)\n", ret);
                goto err_power_down;
        }

        BUILD_BUG_ON(sizeof(ar->hw->wiphy->fw_version) !=
                     sizeof(ar->normal_mode_fw.fw_file.fw_version));
        memcpy(ar->hw->wiphy->fw_version, ar->normal_mode_fw.fw_file.fw_version,
               sizeof(ar->hw->wiphy->fw_version));

        ath10k_debug_print_hwfw_info(ar);

        if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
                      ar->normal_mode_fw.fw_file.fw_features)) {
                ret = ath10k_core_pre_cal_download(ar);
                if (ret) {
                        /* pre calibration data download is not necessary
                         * for all the chipsets. Ignore failures and continue.
                         */
                        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                                   "could not load pre cal data: %d\n", ret);
                }

                ret = ath10k_core_get_board_id_from_otp(ar);
                if (ret && ret != -EOPNOTSUPP) {
                        ath10k_err(ar, "failed to get board id from otp: %d\n",
                                   ret);
                        goto err_free_firmware_files;
                }

                ret = ath10k_core_check_smbios(ar);
                if (ret)
                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "SMBIOS bdf variant name not set.\n");

                ret = ath10k_core_check_dt(ar);
                if (ret)
                        ath10k_dbg(ar, ATH10K_DBG_BOOT, "DT bdf variant name not set.\n");

                ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD);
                if (ret) {
                        ath10k_err(ar, "failed to fetch board file: %d\n", ret);
                        goto err_free_firmware_files;
                }

                ath10k_debug_print_board_info(ar);
        }

        device_get_mac_address(ar->dev, ar->mac_addr);

        ret = ath10k_core_init_firmware_features(ar);
        if (ret) {
                ath10k_err(ar, "fatal problem with firmware features: %d\n",
                           ret);
                goto err_free_firmware_files;
        }

        if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
                      ar->normal_mode_fw.fw_file.fw_features)) {
                ret = ath10k_swap_code_seg_init(ar,
                                                &ar->normal_mode_fw.fw_file);
                if (ret) {
                        ath10k_err(ar, "failed to initialize code swap segment: %d\n",
                                   ret);
                        goto err_free_firmware_files;
                }
        }

        mutex_lock(&ar->conf_mutex);

        ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_NORMAL,
                                &ar->normal_mode_fw);
        if (ret) {
                ath10k_err(ar, "could not init core (%d)\n", ret);
                goto err_unlock;
        }

        ath10k_debug_print_boot_info(ar);
        ath10k_core_stop(ar);

        mutex_unlock(&ar->conf_mutex);

        ath10k_hif_power_down(ar);
        return 0;

err_unlock:
        mutex_unlock(&ar->conf_mutex);

err_free_firmware_files:
        ath10k_core_free_firmware_files(ar);

err_power_down:
        ath10k_hif_power_down(ar);

        return ret;
}

static void ath10k_core_register_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k, register_work);
        int status;

        /* peer stats are enabled by default */
        set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags);

        status = ath10k_core_probe_fw(ar);
        if (status) {
                ath10k_err(ar, "could not probe fw (%d)\n", status);
                goto err;
        }

        status = ath10k_mac_register(ar);
        if (status) {
                ath10k_err(ar, "could not register to mac80211 (%d)\n", status);
                goto err_release_fw;
        }

        status = ath10k_coredump_register(ar);
        if (status) {
                ath10k_err(ar, "unable to register coredump\n");
                goto err_unregister_mac;
        }

        status = ath10k_debug_register(ar);
        if (status) {
                ath10k_err(ar, "unable to initialize debugfs\n");
                goto err_unregister_coredump;
        }

        status = ath10k_spectral_create(ar);
        if (status) {
                ath10k_err(ar, "failed to initialize spectral\n");
                goto err_debug_destroy;
        }

        status = ath10k_thermal_register(ar);
        if (status) {
                ath10k_err(ar, "could not register thermal device: %d\n",
                           status);
                goto err_spectral_destroy;
        }

        set_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags);
        return;

err_spectral_destroy:
        ath10k_spectral_destroy(ar);
err_debug_destroy:
        ath10k_debug_destroy(ar);
err_unregister_coredump:
        ath10k_coredump_unregister(ar);
err_unregister_mac:
        ath10k_mac_unregister(ar);
err_release_fw:
        ath10k_core_free_firmware_files(ar);
err:
        /* TODO: It's probably a good idea to release device from the driver
         * but calling device_release_driver() here will cause a deadlock.
         */
        return;
}

int ath10k_core_register(struct ath10k *ar,
                         const struct ath10k_bus_params *bus_params)
{
        ar->bus_param = *bus_params;

        queue_work(ar->workqueue, &ar->register_work);

        return 0;
}
EXPORT_SYMBOL(ath10k_core_register);

void ath10k_core_unregister(struct ath10k *ar)
{
        cancel_work_sync(&ar->register_work);

        if (!test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags))
                return;

        ath10k_thermal_unregister(ar);
        /* Stop spectral before unregistering from mac80211 to remove the
         * relayfs debugfs file cleanly. Otherwise the parent debugfs tree
         * would be already be free'd recursively, leading to a double free.
         */
        ath10k_spectral_destroy(ar);

        /* We must unregister from mac80211 before we stop HTC and HIF.
         * Otherwise we will fail to submit commands to FW and mac80211 will be
         * unhappy about callback failures.
         */
        ath10k_mac_unregister(ar);

        ath10k_testmode_destroy(ar);

        ath10k_core_free_firmware_files(ar);
        ath10k_core_free_board_files(ar);

        ath10k_debug_unregister(ar);
}
EXPORT_SYMBOL(ath10k_core_unregister);

struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
                                  enum ath10k_bus bus,
                                  enum ath10k_hw_rev hw_rev,
                                  const struct ath10k_hif_ops *hif_ops)
{
        struct ath10k *ar;
        int ret;

        ar = ath10k_mac_create(priv_size);
        if (!ar)
                return NULL;

        ar->ath_common.priv = ar;
        ar->ath_common.hw = ar->hw;
        ar->dev = dev;
        ar->hw_rev = hw_rev;
        ar->hif.ops = hif_ops;
        ar->hif.bus = bus;

        switch (hw_rev) {
        case ATH10K_HW_QCA988X:
        case ATH10K_HW_QCA9887:
                ar->regs = &qca988x_regs;
                ar->hw_ce_regs = &qcax_ce_regs;
                ar->hw_values = &qca988x_values;
                break;
        case ATH10K_HW_QCA6174:
        case ATH10K_HW_QCA9377:
                ar->regs = &qca6174_regs;
                ar->hw_ce_regs = &qcax_ce_regs;
                ar->hw_values = &qca6174_values;
                break;
        case ATH10K_HW_QCA99X0:
        case ATH10K_HW_QCA9984:
                ar->regs = &qca99x0_regs;
                ar->hw_ce_regs = &qcax_ce_regs;
                ar->hw_values = &qca99x0_values;
                break;
        case ATH10K_HW_QCA9888:
                ar->regs = &qca99x0_regs;
                ar->hw_ce_regs = &qcax_ce_regs;
                ar->hw_values = &qca9888_values;
                break;
        case ATH10K_HW_QCA4019:
                ar->regs = &qca4019_regs;
                ar->hw_ce_regs = &qcax_ce_regs;
                ar->hw_values = &qca4019_values;
                break;
        case ATH10K_HW_WCN3990:
                ar->regs = &wcn3990_regs;
                ar->hw_ce_regs = &wcn3990_ce_regs;
                ar->hw_values = &wcn3990_values;
                break;
        default:
                ath10k_err(ar, "unsupported core hardware revision %d\n",
                           hw_rev);
                ret = -EOPNOTSUPP;
                goto err_free_mac;
        }

        init_completion(&ar->scan.started);
        init_completion(&ar->scan.completed);
        init_completion(&ar->scan.on_channel);
        init_completion(&ar->target_suspend);
        init_completion(&ar->driver_recovery);
        init_completion(&ar->wow.wakeup_completed);

        init_completion(&ar->install_key_done);
        init_completion(&ar->vdev_setup_done);
        init_completion(&ar->vdev_delete_done);
        init_completion(&ar->thermal.wmi_sync);
        init_completion(&ar->bss_survey_done);
        init_completion(&ar->peer_delete_done);
        init_completion(&ar->peer_stats_info_complete);

        INIT_DELAYED_WORK(&ar->scan.timeout, ath10k_scan_timeout_work);

        ar->workqueue = create_singlethread_workqueue("ath10k_wq");
        if (!ar->workqueue)
                goto err_free_mac;

        ar->workqueue_aux = create_singlethread_workqueue("ath10k_aux_wq");
        if (!ar->workqueue_aux)
                goto err_free_wq;

        ar->workqueue_tx_complete =
                create_singlethread_workqueue("ath10k_tx_complete_wq");
        if (!ar->workqueue_tx_complete)
                goto err_free_aux_wq;

        mutex_init(&ar->conf_mutex);
        mutex_init(&ar->dump_mutex);
        spin_lock_init(&ar->data_lock);

        for (int ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                spin_lock_init(&ar->queue_lock[ac]);

        INIT_LIST_HEAD(&ar->peers);
        init_waitqueue_head(&ar->peer_mapping_wq);
        init_waitqueue_head(&ar->htt.empty_tx_wq);
        init_waitqueue_head(&ar->wmi.tx_credits_wq);

        skb_queue_head_init(&ar->htt.rx_indication_head);

        init_completion(&ar->offchan_tx_completed);
        INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work);
        skb_queue_head_init(&ar->offchan_tx_queue);

        INIT_WORK(&ar->wmi_mgmt_tx_work, ath10k_mgmt_over_wmi_tx_work);
        skb_queue_head_init(&ar->wmi_mgmt_tx_queue);

        INIT_WORK(&ar->register_work, ath10k_core_register_work);
        INIT_WORK(&ar->restart_work, ath10k_core_restart);
        INIT_WORK(&ar->set_coverage_class_work,
                  ath10k_core_set_coverage_class_work);

        init_dummy_netdev(&ar->napi_dev);

        ret = ath10k_coredump_create(ar);
        if (ret)
                goto err_free_tx_complete;

        ret = ath10k_debug_create(ar);
        if (ret)
                goto err_free_coredump;

        return ar;

err_free_coredump:
        ath10k_coredump_destroy(ar);
err_free_tx_complete:
        destroy_workqueue(ar->workqueue_tx_complete);
err_free_aux_wq:
        destroy_workqueue(ar->workqueue_aux);
err_free_wq:
        destroy_workqueue(ar->workqueue);
err_free_mac:
        ath10k_mac_destroy(ar);

        return NULL;
}
EXPORT_SYMBOL(ath10k_core_create);

void ath10k_core_destroy(struct ath10k *ar)
{
        destroy_workqueue(ar->workqueue);

        destroy_workqueue(ar->workqueue_aux);

        destroy_workqueue(ar->workqueue_tx_complete);

        ath10k_debug_destroy(ar);
        ath10k_coredump_destroy(ar);
        ath10k_htt_tx_destroy(&ar->htt);
        ath10k_wmi_free_host_mem(ar);
        ath10k_mac_destroy(ar);
}
EXPORT_SYMBOL(ath10k_core_destroy);

MODULE_AUTHOR("Qualcomm Atheros");
MODULE_DESCRIPTION("Core module for Qualcomm Atheros 802.11ac wireless LAN cards.");
MODULE_LICENSE("Dual BSD/GPL");
























































































































  256 





































































































































































  256 












  256 






















  256 

  252 

  256 

  256 
  255 









  254 

  252 





  256 











  255 










  256 


























































































































































  255 






























































































































































































































































































































































































































































































































































































































































































































































    1 














































































  255 












  255 


  255 
  255 

  256 





























  255 

























  256 




  256 








  255 








  256 
  256 













  256 



























  256 
  255 

  255 































































































































































































































































































































































































  255 





  255 






































  255 





  256 




  196 







  196 


























  256 









  207 


  207 




  207 
  202 


   37 











   37 







  202 
  175 

  175 






  174 





  253 































  255 





  256 
  256 









































  256 











  255 
    3 

























  256 
    9 



    9 

    9 







    9 



  255 









  256 


  254 
  254 




  255 


  255 

    9 




  256 

  256 







  255 













  256 
  255 









  255 







  256 








  256 

  255 



  256 

  255 























  256 










  256 


  254 






  254 


  256 


  256 

  256 

  256 



  256 







  254 







  253 







  256 

  254 










  254 






  256 

  256 



  256 

  256 







    9 





    9 


























































































































































































































































































































  255 

  255 

  254 

















  256 




  255 


  255 







  256 







  256 
  256 








































































  254 
















  254 






  253 



  256 


  256 




  254 


  256 
  255 


























  256 











  255 



    1 



  254 














  256 
  256 











  256 
  256 












































  256 





  256 
  256 


  256 



  256 









  256 
  255 






  256 


  256 


  256 
  256 






  256 
















  256 

















  256 



  256 







  256 








  254 






































































































































































































































































































































































































































































































































































































































































































































































































































































  255 


  255 












  236 
  256 


  256 












  255 

  256 














  256 





  256 




























































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/kernel/printk.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 * Modified to make sys_syslog() more flexible: added commands to
 * return the last 4k of kernel messages, regardless of whether
 * they've been read or not.  Added option to suppress kernel printk's
 * to the console.  Added hook for sending the console messages
 * elsewhere, in preparation for a serial line console (someday).
 * Ted Ts'o, 2/11/93.
 * Modified for sysctl support, 1/8/97, Chris Horn.
 * Fixed SMP synchronization, 08/08/99, Manfred Spraul
 *     manfred@colorfullife.com
 * Rewrote bits to get rid of console_lock
 *        01Mar01 Andrew Morton
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/console.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/nmi.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/security.h>
#include <linux/memblock.h>
#include <linux/syscalls.h>
#include <linux/vmcore_info.h>
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
#include <linux/syslog.h>
#include <linux/cpu.h>
#include <linux/rculist.h>
#include <linux/poll.h>
#include <linux/irq_work.h>
#include <linux/ctype.h>
#include <linux/uio.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>

#include <linux/uaccess.h>
#include <asm/sections.h>

#include <trace/events/initcall.h>
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>

#include "printk_ringbuffer.h"
#include "console_cmdline.h"
#include "braille.h"
#include "internal.h"

int console_printk[4] = {
        CONSOLE_LOGLEVEL_DEFAULT,        /* console_loglevel */
        MESSAGE_LOGLEVEL_DEFAULT,        /* default_message_loglevel */
        CONSOLE_LOGLEVEL_MIN,                /* minimum_console_loglevel */
        CONSOLE_LOGLEVEL_DEFAULT,        /* default_console_loglevel */
};
EXPORT_SYMBOL_GPL(console_printk);

atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
EXPORT_SYMBOL(ignore_console_lock_warning);

EXPORT_TRACEPOINT_SYMBOL_GPL(console);

/*
 * Low level drivers may need that to know if they can schedule in
 * their unblank() callback or not. So let's export it.
 */
int oops_in_progress;
EXPORT_SYMBOL(oops_in_progress);

/*
 * console_mutex protects console_list updates and console->flags updates.
 * The flags are synchronized only for consoles that are registered, i.e.
 * accessible via the console list.
 */
static DEFINE_MUTEX(console_mutex);

/*
 * console_sem protects updates to console->seq
 * and also provides serialization for console printing.
 */
static DEFINE_SEMAPHORE(console_sem, 1);
HLIST_HEAD(console_list);
EXPORT_SYMBOL_GPL(console_list);
DEFINE_STATIC_SRCU(console_srcu);

/*
 * System may need to suppress printk message under certain
 * circumstances, like after kernel panic happens.
 */
int __read_mostly suppress_printk;

#ifdef CONFIG_LOCKDEP
static struct lockdep_map console_lock_dep_map = {
        .name = "console_lock"
};

void lockdep_assert_console_list_lock_held(void)
{
        lockdep_assert_held(&console_mutex);
}
EXPORT_SYMBOL(lockdep_assert_console_list_lock_held);
#endif

#ifdef CONFIG_DEBUG_LOCK_ALLOC
bool console_srcu_read_lock_is_held(void)
{
        return srcu_read_lock_held(&console_srcu);
}
EXPORT_SYMBOL(console_srcu_read_lock_is_held);
#endif

enum devkmsg_log_bits {
        __DEVKMSG_LOG_BIT_ON = 0,
        __DEVKMSG_LOG_BIT_OFF,
        __DEVKMSG_LOG_BIT_LOCK,
};

enum devkmsg_log_masks {
        DEVKMSG_LOG_MASK_ON             = BIT(__DEVKMSG_LOG_BIT_ON),
        DEVKMSG_LOG_MASK_OFF            = BIT(__DEVKMSG_LOG_BIT_OFF),
        DEVKMSG_LOG_MASK_LOCK           = BIT(__DEVKMSG_LOG_BIT_LOCK),
};

/* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */
#define DEVKMSG_LOG_MASK_DEFAULT        0

static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;

static int __control_devkmsg(char *str)
{
        size_t len;

        if (!str)
                return -EINVAL;

        len = str_has_prefix(str, "on");
        if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_ON;
                return len;
        }

        len = str_has_prefix(str, "off");
        if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_OFF;
                return len;
        }

        len = str_has_prefix(str, "ratelimit");
        if (len) {
                devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
                return len;
        }

        return -EINVAL;
}

static int __init control_devkmsg(char *str)
{
        if (__control_devkmsg(str) < 0) {
                pr_warn("printk.devkmsg: bad option string '%s'\n", str);
                return 1;
        }

        /*
         * Set sysctl string accordingly:
         */
        if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
                strcpy(devkmsg_log_str, "on");
        else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
                strcpy(devkmsg_log_str, "off");
        /* else "ratelimit" which is set by default. */

        /*
         * Sysctl cannot change it anymore. The kernel command line setting of
         * this parameter is to force the setting to be permanent throughout the
         * runtime of the system. This is a precation measure against userspace
         * trying to be a smarta** and attempting to change it up on us.
         */
        devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;

        return 1;
}
__setup("printk.devkmsg=", control_devkmsg);

char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
                              void *buffer, size_t *lenp, loff_t *ppos)
{
        char old_str[DEVKMSG_STR_MAX_SIZE];
        unsigned int old;
        int err;

        if (write) {
                if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK)
                        return -EINVAL;

                old = devkmsg_log;
                strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE);
        }

        err = proc_dostring(table, write, buffer, lenp, ppos);
        if (err)
                return err;

        if (write) {
                err = __control_devkmsg(devkmsg_log_str);

                /*
                 * Do not accept an unknown string OR a known string with
                 * trailing crap...
                 */
                if (err < 0 || (err + 1 != *lenp)) {

                        /* ... and restore old setting. */
                        devkmsg_log = old;
                        strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE);

                        return -EINVAL;
                }
        }

        return 0;
}
#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */

/**
 * console_list_lock - Lock the console list
 *
 * For console list or console->flags updates
 */
void console_list_lock(void)
{
        /*
         * In unregister_console() and console_force_preferred_locked(),
         * synchronize_srcu() is called with the console_list_lock held.
         * Therefore it is not allowed that the console_list_lock is taken
         * with the srcu_lock held.
         *
         * Detecting if this context is really in the read-side critical
         * section is only possible if the appropriate debug options are
         * enabled.
         */
        WARN_ON_ONCE(debug_lockdep_rcu_enabled() &&
                     srcu_read_lock_held(&console_srcu));

        mutex_lock(&console_mutex);
}
EXPORT_SYMBOL(console_list_lock);

/**
 * console_list_unlock - Unlock the console list
 *
 * Counterpart to console_list_lock()
 */
void console_list_unlock(void)
{
        mutex_unlock(&console_mutex);
}
EXPORT_SYMBOL(console_list_unlock);

/**
 * console_srcu_read_lock - Register a new reader for the
 *        SRCU-protected console list
 *
 * Use for_each_console_srcu() to iterate the console list
 *
 * Context: Any context.
 * Return: A cookie to pass to console_srcu_read_unlock().
 */
int console_srcu_read_lock(void)
{
        return srcu_read_lock_nmisafe(&console_srcu);
}
EXPORT_SYMBOL(console_srcu_read_lock);

/**
 * console_srcu_read_unlock - Unregister an old reader from
 *        the SRCU-protected console list
 * @cookie: cookie returned from console_srcu_read_lock()
 *
 * Counterpart to console_srcu_read_lock()
 */
void console_srcu_read_unlock(int cookie)
{
        srcu_read_unlock_nmisafe(&console_srcu, cookie);
}
EXPORT_SYMBOL(console_srcu_read_unlock);

/*
 * Helper macros to handle lockdep when locking/unlocking console_sem. We use
 * macros instead of functions so that _RET_IP_ contains useful information.
 */
#define down_console_sem() do { \
        down(&console_sem);\
        mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
} while (0)

static int __down_trylock_console_sem(unsigned long ip)
{
        int lock_failed;
        unsigned long flags;

        /*
         * Here and in __up_console_sem() we need to be in safe mode,
         * because spindump/WARN/etc from under console ->lock will
         * deadlock in printk()->down_trylock_console_sem() otherwise.
         */
        printk_safe_enter_irqsave(flags);
        lock_failed = down_trylock(&console_sem);
        printk_safe_exit_irqrestore(flags);

        if (lock_failed)
                return 1;
        mutex_acquire(&console_lock_dep_map, 0, 1, ip);
        return 0;
}
#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)

static void __up_console_sem(unsigned long ip)
{
        unsigned long flags;

        mutex_release(&console_lock_dep_map, ip);

        printk_safe_enter_irqsave(flags);
        up(&console_sem);
        printk_safe_exit_irqrestore(flags);
}
#define up_console_sem() __up_console_sem(_RET_IP_)

static bool panic_in_progress(void)
{
        return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
}

/* Return true if a panic is in progress on the current CPU. */
bool this_cpu_in_panic(void)
{
        /*
         * We can use raw_smp_processor_id() here because it is impossible for
         * the task to be migrated to the panic_cpu, or away from it. If
         * panic_cpu has already been set, and we're not currently executing on
         * that CPU, then we never will be.
         */
        return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
}

/*
 * Return true if a panic is in progress on a remote CPU.
 *
 * On true, the local CPU should immediately release any printing resources
 * that may be needed by the panic CPU.
 */
bool other_cpu_in_panic(void)
{
        return (panic_in_progress() && !this_cpu_in_panic());
}

/*
 * This is used for debugging the mess that is the VT code by
 * keeping track if we have the console semaphore held. It's
 * definitely not the perfect debug tool (we don't know if _WE_
 * hold it and are racing, but it helps tracking those weird code
 * paths in the console code where we end up in places I want
 * locked without the console semaphore held).
 */
static int console_locked;

/*
 *        Array of consoles built from command line options (console=)
 */

#define MAX_CMDLINECONSOLES 8

static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];

static int preferred_console = -1;
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);

/* Flag: console code may call schedule() */
static int console_may_schedule;

enum con_msg_format_flags {
        MSG_FORMAT_DEFAULT        = 0,
        MSG_FORMAT_SYSLOG        = (1 << 0),
};

static int console_msg_format = MSG_FORMAT_DEFAULT;

/*
 * The printk log buffer consists of a sequenced collection of records, each
 * containing variable length message text. Every record also contains its
 * own meta-data (@info).
 *
 * Every record meta-data carries the timestamp in microseconds, as well as
 * the standard userspace syslog level and syslog facility. The usual kernel
 * messages use LOG_KERN; userspace-injected messages always carry a matching
 * syslog facility, by default LOG_USER. The origin of every message can be
 * reliably determined that way.
 *
 * The human readable log message of a record is available in @text, the
 * length of the message text in @text_len. The stored message is not
 * terminated.
 *
 * Optionally, a record can carry a dictionary of properties (key/value
 * pairs), to provide userspace with a machine-readable message context.
 *
 * Examples for well-defined, commonly used property names are:
 *   DEVICE=b12:8               device identifier
 *                                b12:8         block dev_t
 *                                c127:3        char dev_t
 *                                n8            netdev ifindex
 *                                +sound:card0  subsystem:devname
 *   SUBSYSTEM=pci              driver-core subsystem name
 *
 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names
 * and values are terminated by a '\0' character.
 *
 * Example of record values:
 *   record.text_buf                = "it's a line" (unterminated)
 *   record.info.seq                = 56
 *   record.info.ts_nsec            = 36863
 *   record.info.text_len           = 11
 *   record.info.facility           = 0 (LOG_KERN)
 *   record.info.flags              = 0
 *   record.info.level              = 3 (LOG_ERR)
 *   record.info.caller_id          = 299 (task 299)
 *   record.info.dev_info.subsystem = "pci" (terminated)
 *   record.info.dev_info.device    = "+pci:0000:00:01.0" (terminated)
 *
 * The 'struct printk_info' buffer must never be directly exported to
 * userspace, it is a kernel-private implementation detail that might
 * need to be changed in the future, when the requirements change.
 *
 * /dev/kmsg exports the structured data in the following line format:
 *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
 *
 * Users of the export format should ignore possible additional values
 * separated by ',', and find the message after the ';' character.
 *
 * The optional key/value pairs are attached as continuation lines starting
 * with a space character and terminated by a newline. All possible
 * non-prinatable characters are escaped in the "\xff" notation.
 */

/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);

#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;

struct latched_seq {
        seqcount_latch_t        latch;
        u64                        val[2];
};

/*
 * The next printk record to read after the last 'clear' command. There are
 * two copies (updated with seqcount_latch) so that reads can locklessly
 * access a valid value. Writers are synchronized by @syslog_lock.
 */
static struct latched_seq clear_seq = {
        .latch                = SEQCNT_LATCH_ZERO(clear_seq.latch),
        .val[0]                = 0,
        .val[1]                = 0,
};

#define LOG_LEVEL(v)                ((v) & 0x07)
#define LOG_FACILITY(v)                ((v) >> 3 & 0xff)

/* record buffer */
#define LOG_ALIGN __alignof__(unsigned long)
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
#define LOG_BUF_LEN_MAX (u32)(1 << 31)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;

/*
 * Define the average message size. This only affects the number of
 * descriptors that will be available. Underestimating is better than
 * overestimating (too many available descriptors is better than not enough).
 */
#define PRB_AVGBITS 5        /* 32 character average length */

#if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS
#error CONFIG_LOG_BUF_SHIFT value too small.
#endif
_DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
                 PRB_AVGBITS, &__log_buf[0]);

static struct printk_ringbuffer printk_rb_dynamic;

struct printk_ringbuffer *prb = &printk_rb_static;

/*
 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
 * per_cpu_areas are initialised. This variable is set to true when
 * it's safe to access per-CPU data.
 */
static bool __printk_percpu_data_ready __ro_after_init;

bool printk_percpu_data_ready(void)
{
        return __printk_percpu_data_ready;
}

/* Must be called under syslog_lock. */
static void latched_seq_write(struct latched_seq *ls, u64 val)
{
        raw_write_seqcount_latch(&ls->latch);
        ls->val[0] = val;
        raw_write_seqcount_latch(&ls->latch);
        ls->val[1] = val;
}

/* Can be called from any context. */
static u64 latched_seq_read_nolock(struct latched_seq *ls)
{
        unsigned int seq;
        unsigned int idx;
        u64 val;

        do {
                seq = raw_read_seqcount_latch(&ls->latch);
                idx = seq & 0x1;
                val = ls->val[idx];
        } while (raw_read_seqcount_latch_retry(&ls->latch, seq));

        return val;
}

/* Return log buffer address */
char *log_buf_addr_get(void)
{
        return log_buf;
}

/* Return log buffer size */
u32 log_buf_len_get(void)
{
        return log_buf_len;
}

/*
 * Define how much of the log buffer we could take at maximum. The value
 * must be greater than two. Note that only half of the buffer is available
 * when the index points to the middle.
 */
#define MAX_LOG_TAKE_PART 4
static const char trunc_msg[] = "<truncated>";

static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
{
        /*
         * The message should not take the whole buffer. Otherwise, it might
         * get removed too soon.
         */
        u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;

        if (*text_len > max_text_len)
                *text_len = max_text_len;

        /* enable the warning message (if there is room) */
        *trunc_msg_len = strlen(trunc_msg);
        if (*text_len >= *trunc_msg_len)
                *text_len -= *trunc_msg_len;
        else
                *trunc_msg_len = 0;
}

int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);

static int syslog_action_restricted(int type)
{
        if (dmesg_restrict)
                return 1;
        /*
         * Unless restricted, we allow "read all" and "get buffer size"
         * for everybody.
         */
        return type != SYSLOG_ACTION_READ_ALL &&
               type != SYSLOG_ACTION_SIZE_BUFFER;
}

static int check_syslog_permissions(int type, int source)
{
        /*
         * If this is from /proc/kmsg and we've already opened it, then we've
         * already done the capabilities checks at open time.
         */
        if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
                goto ok;

        if (syslog_action_restricted(type)) {
                if (capable(CAP_SYSLOG))
                        goto ok;
                return -EPERM;
        }
ok:
        return security_syslog(type);
}

static void append_char(char **pp, char *e, char c)
{
        if (*pp < e)
                *(*pp)++ = c;
}

static ssize_t info_print_ext_header(char *buf, size_t size,
                                     struct printk_info *info)
{
        u64 ts_usec = info->ts_nsec;
        char caller[20];
#ifdef CONFIG_PRINTK_CALLER
        u32 id = info->caller_id;

        snprintf(caller, sizeof(caller), ",caller=%c%u",
                 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
#else
        caller[0] = '\0';
#endif

        do_div(ts_usec, 1000);

        return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
                         (info->facility << 3) | info->level, info->seq,
                         ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller);
}

static ssize_t msg_add_ext_text(char *buf, size_t size,
                                const char *text, size_t text_len,
                                unsigned char endc)
{
        char *p = buf, *e = buf + size;
        size_t i;

        /* escape non-printable characters */
        for (i = 0; i < text_len; i++) {
                unsigned char c = text[i];

                if (c < ' ' || c >= 127 || c == '\\')
                        p += scnprintf(p, e - p, "\\x%02x", c);
                else
                        append_char(&p, e, c);
        }
        append_char(&p, e, endc);

        return p - buf;
}

static ssize_t msg_add_dict_text(char *buf, size_t size,
                                 const char *key, const char *val)
{
        size_t val_len = strlen(val);
        ssize_t len;

        if (!val_len)
                return 0;

        len = msg_add_ext_text(buf, size, "", 0, ' ');        /* dict prefix */
        len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '=');
        len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n');

        return len;
}

static ssize_t msg_print_ext_body(char *buf, size_t size,
                                  char *text, size_t text_len,
                                  struct dev_printk_info *dev_info)
{
        ssize_t len;

        len = msg_add_ext_text(buf, size, text, text_len, '\n');

        if (!dev_info)
                goto out;

        len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM",
                                 dev_info->subsystem);
        len += msg_add_dict_text(buf + len, size - len, "DEVICE",
                                 dev_info->device);
out:
        return len;
}

/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
        atomic64_t seq;
        struct ratelimit_state rs;
        struct mutex lock;
        struct printk_buffers pbufs;
};

static __printf(3, 4) __cold
int devkmsg_emit(int facility, int level, const char *fmt, ...)
{
        va_list args;
        int r;

        va_start(args, fmt);
        r = vprintk_emit(facility, level, NULL, fmt, args);
        va_end(args);

        return r;
}

static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
{
        char *buf, *line;
        int level = default_message_loglevel;
        int facility = 1;        /* LOG_USER */
        struct file *file = iocb->ki_filp;
        struct devkmsg_user *user = file->private_data;
        size_t len = iov_iter_count(from);
        ssize_t ret = len;

        if (len > PRINTKRB_RECORD_MAX)
                return -EINVAL;

        /* Ignore when user logging is disabled. */
        if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
                return len;

        /* Ratelimit when not explicitly enabled. */
        if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) {
                if (!___ratelimit(&user->rs, current->comm))
                        return ret;
        }

        buf = kmalloc(len+1, GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;

        buf[len] = '\0';
        if (!copy_from_iter_full(buf, len, from)) {
                kfree(buf);
                return -EFAULT;
        }

        /*
         * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
         * the decimal value represents 32bit, the lower 3 bit are the log
         * level, the rest are the log facility.
         *
         * If no prefix or no userspace facility is specified, we
         * enforce LOG_USER, to be able to reliably distinguish
         * kernel-generated messages from userspace-injected ones.
         */
        line = buf;
        if (line[0] == '<') {
                char *endp = NULL;
                unsigned int u;

                u = simple_strtoul(line + 1, &endp, 10);
                if (endp && endp[0] == '>') {
                        level = LOG_LEVEL(u);
                        if (LOG_FACILITY(u) != 0)
                                facility = LOG_FACILITY(u);
                        endp++;
                        line = endp;
                }
        }

        devkmsg_emit(facility, level, "%s", line);
        kfree(buf);
        return ret;
}

static ssize_t devkmsg_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
{
        struct devkmsg_user *user = file->private_data;
        char *outbuf = &user->pbufs.outbuf[0];
        struct printk_message pmsg = {
                .pbufs = &user->pbufs,
        };
        ssize_t ret;

        ret = mutex_lock_interruptible(&user->lock);
        if (ret)
                return ret;

        if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) {
                if (file->f_flags & O_NONBLOCK) {
                        ret = -EAGAIN;
                        goto out;
                }

                /*
                 * Guarantee this task is visible on the waitqueue before
                 * checking the wake condition.
                 *
                 * The full memory barrier within set_current_state() of
                 * prepare_to_wait_event() pairs with the full memory barrier
                 * within wq_has_sleeper().
                 *
                 * This pairs with __wake_up_klogd:A.
                 */
                ret = wait_event_interruptible(log_wait,
                                printk_get_next_message(&pmsg, atomic64_read(&user->seq), true,
                                                        false)); /* LMM(devkmsg_read:A) */
                if (ret)
                        goto out;
        }

        if (pmsg.dropped) {
                /* our last seen message is gone, return error and reset */
                atomic64_set(&user->seq, pmsg.seq);
                ret = -EPIPE;
                goto out;
        }

        atomic64_set(&user->seq, pmsg.seq + 1);

        if (pmsg.outbuf_len > count) {
                ret = -EINVAL;
                goto out;
        }

        if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) {
                ret = -EFAULT;
                goto out;
        }
        ret = pmsg.outbuf_len;
out:
        mutex_unlock(&user->lock);
        return ret;
}

/*
 * Be careful when modifying this function!!!
 *
 * Only few operations are supported because the device works only with the
 * entire variable length messages (records). Non-standard values are
 * returned in the other cases and has been this way for quite some time.
 * User space applications might depend on this behavior.
 */
static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
{
        struct devkmsg_user *user = file->private_data;
        loff_t ret = 0;

        if (offset)
                return -ESPIPE;

        switch (whence) {
        case SEEK_SET:
                /* the first record */
                atomic64_set(&user->seq, prb_first_valid_seq(prb));
                break;
        case SEEK_DATA:
                /*
                 * The first record after the last SYSLOG_ACTION_CLEAR,
                 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
                 * changes no global state, and does not clear anything.
                 */
                atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
                break;
        case SEEK_END:
                /* after the last record */
                atomic64_set(&user->seq, prb_next_seq(prb));
                break;
        default:
                ret = -EINVAL;
        }
        return ret;
}

static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
{
        struct devkmsg_user *user = file->private_data;
        struct printk_info info;
        __poll_t ret = 0;

        poll_wait(file, &log_wait, wait);

        if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
                /* return error when data has vanished underneath us */
                if (info.seq != atomic64_read(&user->seq))
                        ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
                else
                        ret = EPOLLIN|EPOLLRDNORM;
        }

        return ret;
}

static int devkmsg_open(struct inode *inode, struct file *file)
{
        struct devkmsg_user *user;
        int err;

        if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
                return -EPERM;

        /* write-only does not need any file context */
        if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
                err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
                                               SYSLOG_FROM_READER);
                if (err)
                        return err;
        }

        user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
        if (!user)
                return -ENOMEM;

        ratelimit_default_init(&user->rs);
        ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE);

        mutex_init(&user->lock);

        atomic64_set(&user->seq, prb_first_valid_seq(prb));

        file->private_data = user;
        return 0;
}

static int devkmsg_release(struct inode *inode, struct file *file)
{
        struct devkmsg_user *user = file->private_data;

        ratelimit_state_exit(&user->rs);

        mutex_destroy(&user->lock);
        kvfree(user);
        return 0;
}

const struct file_operations kmsg_fops = {
        .open = devkmsg_open,
        .read = devkmsg_read,
        .write_iter = devkmsg_write,
        .llseek = devkmsg_llseek,
        .poll = devkmsg_poll,
        .release = devkmsg_release,
};

#ifdef CONFIG_VMCORE_INFO
/*
 * This appends the listed symbols to /proc/vmcore
 *
 * /proc/vmcore is used by various utilities, like crash and makedumpfile to
 * obtain access to symbols that are otherwise very difficult to locate.  These
 * symbols are specifically used so that utilities can access and extract the
 * dmesg log from a vmcore file after a crash.
 */
void log_buf_vmcoreinfo_setup(void)
{
        struct dev_printk_info *dev_info = NULL;

        VMCOREINFO_SYMBOL(prb);
        VMCOREINFO_SYMBOL(printk_rb_static);
        VMCOREINFO_SYMBOL(clear_seq);

        /*
         * Export struct size and field offsets. User space tools can
         * parse it and detect any changes to structure down the line.
         */

        VMCOREINFO_STRUCT_SIZE(printk_ringbuffer);
        VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring);
        VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring);
        VMCOREINFO_OFFSET(printk_ringbuffer, fail);

        VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
        VMCOREINFO_OFFSET(prb_desc_ring, count_bits);
        VMCOREINFO_OFFSET(prb_desc_ring, descs);
        VMCOREINFO_OFFSET(prb_desc_ring, infos);
        VMCOREINFO_OFFSET(prb_desc_ring, head_id);
        VMCOREINFO_OFFSET(prb_desc_ring, tail_id);

        VMCOREINFO_STRUCT_SIZE(prb_desc);
        VMCOREINFO_OFFSET(prb_desc, state_var);
        VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);

        VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos);
        VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
        VMCOREINFO_OFFSET(prb_data_blk_lpos, next);

        VMCOREINFO_STRUCT_SIZE(printk_info);
        VMCOREINFO_OFFSET(printk_info, seq);
        VMCOREINFO_OFFSET(printk_info, ts_nsec);
        VMCOREINFO_OFFSET(printk_info, text_len);
        VMCOREINFO_OFFSET(printk_info, caller_id);
        VMCOREINFO_OFFSET(printk_info, dev_info);

        VMCOREINFO_STRUCT_SIZE(dev_printk_info);
        VMCOREINFO_OFFSET(dev_printk_info, subsystem);
        VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem));
        VMCOREINFO_OFFSET(dev_printk_info, device);
        VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device));

        VMCOREINFO_STRUCT_SIZE(prb_data_ring);
        VMCOREINFO_OFFSET(prb_data_ring, size_bits);
        VMCOREINFO_OFFSET(prb_data_ring, data);
        VMCOREINFO_OFFSET(prb_data_ring, head_lpos);
        VMCOREINFO_OFFSET(prb_data_ring, tail_lpos);

        VMCOREINFO_SIZE(atomic_long_t);
        VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);

        VMCOREINFO_STRUCT_SIZE(latched_seq);
        VMCOREINFO_OFFSET(latched_seq, val);
}
#endif

/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;

/* we practice scaling the ring buffer by powers of 2 */
static void __init log_buf_len_update(u64 size)
{
        if (size > (u64)LOG_BUF_LEN_MAX) {
                size = (u64)LOG_BUF_LEN_MAX;
                pr_err("log_buf over 2G is not supported.\n");
        }

        if (size)
                size = roundup_pow_of_two(size);
        if (size > log_buf_len)
                new_log_buf_len = (unsigned long)size;
}

/* save requested log_buf_len since it's too early to process it */
static int __init log_buf_len_setup(char *str)
{
        u64 size;

        if (!str)
                return -EINVAL;

        size = memparse(str, &str);

        log_buf_len_update(size);

        return 0;
}
early_param("log_buf_len", log_buf_len_setup);

#ifdef CONFIG_SMP
#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)

static void __init log_buf_add_cpu(void)
{
        unsigned int cpu_extra;

        /*
         * archs should set up cpu_possible_bits properly with
         * set_cpu_possible() after setup_arch() but just in
         * case lets ensure this is valid.
         */
        if (num_possible_cpus() == 1)
                return;

        cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;

        /* by default this will only continue through for large > 64 CPUs */
        if (cpu_extra <= __LOG_BUF_LEN / 2)
                return;

        pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
                __LOG_CPU_MAX_BUF_LEN);
        pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
                cpu_extra);
        pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);

        log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
}
#else /* !CONFIG_SMP */
static inline void log_buf_add_cpu(void) {}
#endif /* CONFIG_SMP */

static void __init set_percpu_data_ready(void)
{
        __printk_percpu_data_ready = true;
}

static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
                                     struct printk_record *r)
{
        struct prb_reserved_entry e;
        struct printk_record dest_r;

        prb_rec_init_wr(&dest_r, r->info->text_len);

        if (!prb_reserve(&e, rb, &dest_r))
                return 0;

        memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len);
        dest_r.info->text_len = r->info->text_len;
        dest_r.info->facility = r->info->facility;
        dest_r.info->level = r->info->level;
        dest_r.info->flags = r->info->flags;
        dest_r.info->ts_nsec = r->info->ts_nsec;
        dest_r.info->caller_id = r->info->caller_id;
        memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));

        prb_final_commit(&e);

        return prb_record_text_space(&e);
}

static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata;

void __init setup_log_buf(int early)
{
        struct printk_info *new_infos;
        unsigned int new_descs_count;
        struct prb_desc *new_descs;
        struct printk_info info;
        struct printk_record r;
        unsigned int text_size;
        size_t new_descs_size;
        size_t new_infos_size;
        unsigned long flags;
        char *new_log_buf;
        unsigned int free;
        u64 seq;

        /*
         * Some archs call setup_log_buf() multiple times - first is very
         * early, e.g. from setup_arch(), and second - when percpu_areas
         * are initialised.
         */
        if (!early)
                set_percpu_data_ready();

        if (log_buf != __log_buf)
                return;

        if (!early && !new_log_buf_len)
                log_buf_add_cpu();

        if (!new_log_buf_len)
                return;

        new_descs_count = new_log_buf_len >> PRB_AVGBITS;
        if (new_descs_count == 0) {
                pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
                return;
        }

        new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
        if (unlikely(!new_log_buf)) {
                pr_err("log_buf_len: %lu text bytes not available\n",
                       new_log_buf_len);
                return;
        }

        new_descs_size = new_descs_count * sizeof(struct prb_desc);
        new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
        if (unlikely(!new_descs)) {
                pr_err("log_buf_len: %zu desc bytes not available\n",
                       new_descs_size);
                goto err_free_log_buf;
        }

        new_infos_size = new_descs_count * sizeof(struct printk_info);
        new_infos = memblock_alloc(new_infos_size, LOG_ALIGN);
        if (unlikely(!new_infos)) {
                pr_err("log_buf_len: %zu info bytes not available\n",
                       new_infos_size);
                goto err_free_descs;
        }

        prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf));

        prb_init(&printk_rb_dynamic,
                 new_log_buf, ilog2(new_log_buf_len),
                 new_descs, ilog2(new_descs_count),
                 new_infos);

        local_irq_save(flags);

        log_buf_len = new_log_buf_len;
        log_buf = new_log_buf;
        new_log_buf_len = 0;

        free = __LOG_BUF_LEN;
        prb_for_each_record(0, &printk_rb_static, seq, &r) {
                text_size = add_to_rb(&printk_rb_dynamic, &r);
                if (text_size > free)
                        free = 0;
                else
                        free -= text_size;
        }

        prb = &printk_rb_dynamic;

        local_irq_restore(flags);

        /*
         * Copy any remaining messages that might have appeared from
         * NMI context after copying but before switching to the
         * dynamic buffer.
         */
        prb_for_each_record(seq, &printk_rb_static, seq, &r) {
                text_size = add_to_rb(&printk_rb_dynamic, &r);
                if (text_size > free)
                        free = 0;
                else
                        free -= text_size;
        }

        if (seq != prb_next_seq(&printk_rb_static)) {
                pr_err("dropped %llu messages\n",
                       prb_next_seq(&printk_rb_static) - seq);
        }

        pr_info("log_buf_len: %u bytes\n", log_buf_len);
        pr_info("early log buf free: %u(%u%%)\n",
                free, (free * 100) / __LOG_BUF_LEN);
        return;

err_free_descs:
        memblock_free(new_descs, new_descs_size);
err_free_log_buf:
        memblock_free(new_log_buf, new_log_buf_len);
}

static bool __read_mostly ignore_loglevel;

static int __init ignore_loglevel_setup(char *str)
{
        ignore_loglevel = true;
        pr_info("debug: ignoring loglevel setting.\n");

        return 0;
}

early_param("ignore_loglevel", ignore_loglevel_setup);
module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ignore_loglevel,
                 "ignore loglevel setting (prints all kernel messages to the console)");

static bool suppress_message_printing(int level)
{
        return (level >= console_loglevel && !ignore_loglevel);
}

#ifdef CONFIG_BOOT_PRINTK_DELAY

static int boot_delay; /* msecs delay after each printk during bootup */
static unsigned long long loops_per_msec;        /* based on boot_delay */

static int __init boot_delay_setup(char *str)
{
        unsigned long lpj;

        lpj = preset_lpj ? preset_lpj : 1000000;        /* some guess */
        loops_per_msec = (unsigned long long)lpj / 1000 * HZ;

        get_option(&str, &boot_delay);
        if (boot_delay > 10 * 1000)
                boot_delay = 0;

        pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
                "HZ: %d, loops_per_msec: %llu\n",
                boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
        return 0;
}
early_param("boot_delay", boot_delay_setup);

static void boot_delay_msec(int level)
{
        unsigned long long k;
        unsigned long timeout;

        if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
                || suppress_message_printing(level)) {
                return;
        }

        k = (unsigned long long)loops_per_msec * boot_delay;

        timeout = jiffies + msecs_to_jiffies(boot_delay);
        while (k) {
                k--;
                cpu_relax();
                /*
                 * use (volatile) jiffies to prevent
                 * compiler reduction; loop termination via jiffies
                 * is secondary and may or may not happen.
                 */
                if (time_after(jiffies, timeout))
                        break;
                touch_nmi_watchdog();
        }
}
#else
static inline void boot_delay_msec(int level)
{
}
#endif

static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);

static size_t print_syslog(unsigned int level, char *buf)
{
        return sprintf(buf, "<%u>", level);
}

static size_t print_time(u64 ts, char *buf)
{
        unsigned long rem_nsec = do_div(ts, 1000000000);

        return sprintf(buf, "[%5lu.%06lu]",
                       (unsigned long)ts, rem_nsec / 1000);
}

#ifdef CONFIG_PRINTK_CALLER
static size_t print_caller(u32 id, char *buf)
{
        char caller[12];

        snprintf(caller, sizeof(caller), "%c%u",
                 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
        return sprintf(buf, "[%6s]", caller);
}
#else
#define print_caller(id, buf) 0
#endif

static size_t info_print_prefix(const struct printk_info  *info, bool syslog,
                                bool time, char *buf)
{
        size_t len = 0;

        if (syslog)
                len = print_syslog((info->facility << 3) | info->level, buf);

        if (time)
                len += print_time(info->ts_nsec, buf + len);

        len += print_caller(info->caller_id, buf + len);

        if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) {
                buf[len++] = ' ';
                buf[len] = '\0';
        }

        return len;
}

/*
 * Prepare the record for printing. The text is shifted within the given
 * buffer to avoid a need for another one. The following operations are
 * done:
 *
 *   - Add prefix for each line.
 *   - Drop truncated lines that no longer fit into the buffer.
 *   - Add the trailing newline that has been removed in vprintk_store().
 *   - Add a string terminator.
 *
 * Since the produced string is always terminated, the maximum possible
 * return value is @r->text_buf_size - 1;
 *
 * Return: The length of the updated/prepared text, including the added
 * prefixes and the newline. The terminator is not counted. The dropped
 * line(s) are not counted.
 */
static size_t record_print_text(struct printk_record *r, bool syslog,
                                bool time)
{
        size_t text_len = r->info->text_len;
        size_t buf_size = r->text_buf_size;
        char *text = r->text_buf;
        char prefix[PRINTK_PREFIX_MAX];
        bool truncated = false;
        size_t prefix_len;
        size_t line_len;
        size_t len = 0;
        char *next;

        /*
         * If the message was truncated because the buffer was not large
         * enough, treat the available text as if it were the full text.
         */
        if (text_len > buf_size)
                text_len = buf_size;

        prefix_len = info_print_prefix(r->info, syslog, time, prefix);

        /*
         * @text_len: bytes of unprocessed text
         * @line_len: bytes of current line _without_ newline
         * @text:     pointer to beginning of current line
         * @len:      number of bytes prepared in r->text_buf
         */
        for (;;) {
                next = memchr(text, '\n', text_len);
                if (next) {
                        line_len = next - text;
                } else {
                        /* Drop truncated line(s). */
                        if (truncated)
                                break;
                        line_len = text_len;
                }

                /*
                 * Truncate the text if there is not enough space to add the
                 * prefix and a trailing newline and a terminator.
                 */
                if (len + prefix_len + text_len + 1 + 1 > buf_size) {
                        /* Drop even the current line if no space. */
                        if (len + prefix_len + line_len + 1 + 1 > buf_size)
                                break;

                        text_len = buf_size - len - prefix_len - 1 - 1;
                        truncated = true;
                }

                memmove(text + prefix_len, text, text_len);
                memcpy(text, prefix, prefix_len);

                /*
                 * Increment the prepared length to include the text and
                 * prefix that were just moved+copied. Also increment for the
                 * newline at the end of this line. If this is the last line,
                 * there is no newline, but it will be added immediately below.
                 */
                len += prefix_len + line_len + 1;
                if (text_len == line_len) {
                        /*
                         * This is the last line. Add the trailing newline
                         * removed in vprintk_store().
                         */
                        text[prefix_len + line_len] = '\n';
                        break;
                }

                /*
                 * Advance beyond the added prefix and the related line with
                 * its newline.
                 */
                text += prefix_len + line_len + 1;

                /*
                 * The remaining text has only decreased by the line with its
                 * newline.
                 *
                 * Note that @text_len can become zero. It happens when @text
                 * ended with a newline (either due to truncation or the
                 * original string ending with "\n\n"). The loop is correctly
                 * repeated and (if not truncated) an empty line with a prefix
                 * will be prepared.
                 */
                text_len -= line_len + 1;
        }

        /*
         * If a buffer was provided, it will be terminated. Space for the
         * string terminator is guaranteed to be available. The terminator is
         * not counted in the return value.
         */
        if (buf_size > 0)
                r->text_buf[len] = 0;

        return len;
}

static size_t get_record_print_text_size(struct printk_info *info,
                                         unsigned int line_count,
                                         bool syslog, bool time)
{
        char prefix[PRINTK_PREFIX_MAX];
        size_t prefix_len;

        prefix_len = info_print_prefix(info, syslog, time, prefix);

        /*
         * Each line will be preceded with a prefix. The intermediate
         * newlines are already within the text, but a final trailing
         * newline will be added.
         */
        return ((prefix_len * line_count) + info->text_len + 1);
}

/*
 * Beginning with @start_seq, find the first record where it and all following
 * records up to (but not including) @max_seq fit into @size.
 *
 * @max_seq is simply an upper bound and does not need to exist. If the caller
 * does not require an upper bound, -1 can be used for @max_seq.
 */
static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
                                  bool syslog, bool time)
{
        struct printk_info info;
        unsigned int line_count;
        size_t len = 0;
        u64 seq;

        /* Determine the size of the records up to @max_seq. */
        prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
                if (info.seq >= max_seq)
                        break;
                len += get_record_print_text_size(&info, line_count, syslog, time);
        }

        /*
         * Adjust the upper bound for the next loop to avoid subtracting
         * lengths that were never added.
         */
        if (seq < max_seq)
                max_seq = seq;

        /*
         * Move first record forward until length fits into the buffer. Ignore
         * newest messages that were not counted in the above cycle. Messages
         * might appear and get lost in the meantime. This is a best effort
         * that prevents an infinite loop that could occur with a retry.
         */
        prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
                if (len <= size || info.seq >= max_seq)
                        break;
                len -= get_record_print_text_size(&info, line_count, syslog, time);
        }

        return seq;
}

/* The caller is responsible for making sure @size is greater than 0. */
static int syslog_print(char __user *buf, int size)
{
        struct printk_info info;
        struct printk_record r;
        char *text;
        int len = 0;
        u64 seq;

        text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
        if (!text)
                return -ENOMEM;

        prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);

        mutex_lock(&syslog_lock);

        /*
         * Wait for the @syslog_seq record to be available. @syslog_seq may
         * change while waiting.
         */
        do {
                seq = syslog_seq;

                mutex_unlock(&syslog_lock);
                /*
                 * Guarantee this task is visible on the waitqueue before
                 * checking the wake condition.
                 *
                 * The full memory barrier within set_current_state() of
                 * prepare_to_wait_event() pairs with the full memory barrier
                 * within wq_has_sleeper().
                 *
                 * This pairs with __wake_up_klogd:A.
                 */
                len = wait_event_interruptible(log_wait,
                                prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
                mutex_lock(&syslog_lock);

                if (len)
                        goto out;
        } while (syslog_seq != seq);

        /*
         * Copy records that fit into the buffer. The above cycle makes sure
         * that the first record is always available.
         */
        do {
                size_t n;
                size_t skip;
                int err;

                if (!prb_read_valid(prb, syslog_seq, &r))
                        break;

                if (r.info->seq != syslog_seq) {
                        /* message is gone, move to next valid one */
                        syslog_seq = r.info->seq;
                        syslog_partial = 0;
                }

                /*
                 * To keep reading/counting partial line consistent,
                 * use printk_time value as of the beginning of a line.
                 */
                if (!syslog_partial)
                        syslog_time = printk_time;

                skip = syslog_partial;
                n = record_print_text(&r, true, syslog_time);
                if (n - syslog_partial <= size) {
                        /* message fits into buffer, move forward */
                        syslog_seq = r.info->seq + 1;
                        n -= syslog_partial;
                        syslog_partial = 0;
                } else if (!len){
                        /* partial read(), remember position */
                        n = size;
                        syslog_partial += n;
                } else
                        n = 0;

                if (!n)
                        break;

                mutex_unlock(&syslog_lock);
                err = copy_to_user(buf, text + skip, n);
                mutex_lock(&syslog_lock);

                if (err) {
                        if (!len)
                                len = -EFAULT;
                        break;
                }

                len += n;
                size -= n;
                buf += n;
        } while (size);
out:
        mutex_unlock(&syslog_lock);
        kfree(text);
        return len;
}

static int syslog_print_all(char __user *buf, int size, bool clear)
{
        struct printk_info info;
        struct printk_record r;
        char *text;
        int len = 0;
        u64 seq;
        bool time;

        text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
        if (!text)
                return -ENOMEM;

        time = printk_time;
        /*
         * Find first record that fits, including all following records,
         * into the user-provided buffer for this dump.
         */
        seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
                                     size, true, time);

        prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);

        prb_for_each_record(seq, prb, seq, &r) {
                int textlen;

                textlen = record_print_text(&r, true, time);

                if (len + textlen > size) {
                        seq--;
                        break;
                }

                if (copy_to_user(buf + len, text, textlen))
                        len = -EFAULT;
                else
                        len += textlen;

                if (len < 0)
                        break;
        }

        if (clear) {
                mutex_lock(&syslog_lock);
                latched_seq_write(&clear_seq, seq);
                mutex_unlock(&syslog_lock);
        }

        kfree(text);
        return len;
}

static void syslog_clear(void)
{
        mutex_lock(&syslog_lock);
        latched_seq_write(&clear_seq, prb_next_seq(prb));
        mutex_unlock(&syslog_lock);
}

int do_syslog(int type, char __user *buf, int len, int source)
{
        struct printk_info info;
        bool clear = false;
        static int saved_console_loglevel = LOGLEVEL_DEFAULT;
        int error;

        error = check_syslog_permissions(type, source);
        if (error)
                return error;

        switch (type) {
        case SYSLOG_ACTION_CLOSE:        /* Close log */
                break;
        case SYSLOG_ACTION_OPEN:        /* Open log */
                break;
        case SYSLOG_ACTION_READ:        /* Read from log */
                if (!buf || len < 0)
                        return -EINVAL;
                if (!len)
                        return 0;
                if (!access_ok(buf, len))
                        return -EFAULT;
                error = syslog_print(buf, len);
                break;
        /* Read/clear last kernel messages */
        case SYSLOG_ACTION_READ_CLEAR:
                clear = true;
                fallthrough;
        /* Read last kernel messages */
        case SYSLOG_ACTION_READ_ALL:
                if (!buf || len < 0)
                        return -EINVAL;
                if (!len)
                        return 0;
                if (!access_ok(buf, len))
                        return -EFAULT;
                error = syslog_print_all(buf, len, clear);
                break;
        /* Clear ring buffer */
        case SYSLOG_ACTION_CLEAR:
                syslog_clear();
                break;
        /* Disable logging to console */
        case SYSLOG_ACTION_CONSOLE_OFF:
                if (saved_console_loglevel == LOGLEVEL_DEFAULT)
                        saved_console_loglevel = console_loglevel;
                console_loglevel = minimum_console_loglevel;
                break;
        /* Enable logging to console */
        case SYSLOG_ACTION_CONSOLE_ON:
                if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
                        console_loglevel = saved_console_loglevel;
                        saved_console_loglevel = LOGLEVEL_DEFAULT;
                }
                break;
        /* Set level of messages printed to console */
        case SYSLOG_ACTION_CONSOLE_LEVEL:
                if (len < 1 || len > 8)
                        return -EINVAL;
                if (len < minimum_console_loglevel)
                        len = minimum_console_loglevel;
                console_loglevel = len;
                /* Implicitly re-enable logging to console */
                saved_console_loglevel = LOGLEVEL_DEFAULT;
                break;
        /* Number of chars in the log buffer */
        case SYSLOG_ACTION_SIZE_UNREAD:
                mutex_lock(&syslog_lock);
                if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
                        /* No unread messages. */
                        mutex_unlock(&syslog_lock);
                        return 0;
                }
                if (info.seq != syslog_seq) {
                        /* messages are gone, move to first one */
                        syslog_seq = info.seq;
                        syslog_partial = 0;
                }
                if (source == SYSLOG_FROM_PROC) {
                        /*
                         * Short-cut for poll(/"proc/kmsg") which simply checks
                         * for pending data, not the size; return the count of
                         * records, not the length.
                         */
                        error = prb_next_seq(prb) - syslog_seq;
                } else {
                        bool time = syslog_partial ? syslog_time : printk_time;
                        unsigned int line_count;
                        u64 seq;

                        prb_for_each_info(syslog_seq, prb, seq, &info,
                                          &line_count) {
                                error += get_record_print_text_size(&info, line_count,
                                                                    true, time);
                                time = printk_time;
                        }
                        error -= syslog_partial;
                }
                mutex_unlock(&syslog_lock);
                break;
        /* Size of the log buffer */
        case SYSLOG_ACTION_SIZE_BUFFER:
                error = log_buf_len;
                break;
        default:
                error = -EINVAL;
                break;
        }

        return error;
}

SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
{
        return do_syslog(type, buf, len, SYSLOG_FROM_READER);
}

/*
 * Special console_lock variants that help to reduce the risk of soft-lockups.
 * They allow to pass console_lock to another printk() call using a busy wait.
 */

#ifdef CONFIG_LOCKDEP
static struct lockdep_map console_owner_dep_map = {
        .name = "console_owner"
};
#endif

static DEFINE_RAW_SPINLOCK(console_owner_lock);
static struct task_struct *console_owner;
static bool console_waiter;

/**
 * console_lock_spinning_enable - mark beginning of code where another
 *        thread might safely busy wait
 *
 * This basically converts console_lock into a spinlock. This marks
 * the section where the console_lock owner can not sleep, because
 * there may be a waiter spinning (like a spinlock). Also it must be
 * ready to hand over the lock at the end of the section.
 */
static void console_lock_spinning_enable(void)
{
        /*
         * Do not use spinning in panic(). The panic CPU wants to keep the lock.
         * Non-panic CPUs abandon the flush anyway.
         *
         * Just keep the lockdep annotation. The panic-CPU should avoid
         * taking console_owner_lock because it might cause a deadlock.
         * This looks like the easiest way how to prevent false lockdep
         * reports without handling races a lockless way.
         */
        if (panic_in_progress())
                goto lockdep;

        raw_spin_lock(&console_owner_lock);
        console_owner = current;
        raw_spin_unlock(&console_owner_lock);

lockdep:
        /* The waiter may spin on us after setting console_owner */
        spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
}

/**
 * console_lock_spinning_disable_and_check - mark end of code where another
 *        thread was able to busy wait and check if there is a waiter
 * @cookie: cookie returned from console_srcu_read_lock()
 *
 * This is called at the end of the section where spinning is allowed.
 * It has two functions. First, it is a signal that it is no longer
 * safe to start busy waiting for the lock. Second, it checks if
 * there is a busy waiter and passes the lock rights to her.
 *
 * Important: Callers lose both the console_lock and the SRCU read lock if
 *        there was a busy waiter. They must not touch items synchronized by
 *        console_lock or SRCU read lock in this case.
 *
 * Return: 1 if the lock rights were passed, 0 otherwise.
 */
static int console_lock_spinning_disable_and_check(int cookie)
{
        int waiter;

        /*
         * Ignore spinning waiters during panic() because they might get stopped
         * or blocked at any time,
         *
         * It is safe because nobody is allowed to start spinning during panic
         * in the first place. If there has been a waiter then non panic CPUs
         * might stay spinning. They would get stopped anyway. The panic context
         * will never start spinning and an interrupted spin on panic CPU will
         * never continue.
         */
        if (panic_in_progress()) {
                /* Keep lockdep happy. */
                spin_release(&console_owner_dep_map, _THIS_IP_);
                return 0;
        }

        raw_spin_lock(&console_owner_lock);
        waiter = READ_ONCE(console_waiter);
        console_owner = NULL;
        raw_spin_unlock(&console_owner_lock);

        if (!waiter) {
                spin_release(&console_owner_dep_map, _THIS_IP_);
                return 0;
        }

        /* The waiter is now free to continue */
        WRITE_ONCE(console_waiter, false);

        spin_release(&console_owner_dep_map, _THIS_IP_);

        /*
         * Preserve lockdep lock ordering. Release the SRCU read lock before
         * releasing the console_lock.
         */
        console_srcu_read_unlock(cookie);

        /*
         * Hand off console_lock to waiter. The waiter will perform
         * the up(). After this, the waiter is the console_lock owner.
         */
        mutex_release(&console_lock_dep_map, _THIS_IP_);
        return 1;
}

/**
 * console_trylock_spinning - try to get console_lock by busy waiting
 *
 * This allows to busy wait for the console_lock when the current
 * owner is running in specially marked sections. It means that
 * the current owner is running and cannot reschedule until it
 * is ready to lose the lock.
 *
 * Return: 1 if we got the lock, 0 othrewise
 */
static int console_trylock_spinning(void)
{
        struct task_struct *owner = NULL;
        bool waiter;
        bool spin = false;
        unsigned long flags;

        if (console_trylock())
                return 1;

        /*
         * It's unsafe to spin once a panic has begun. If we are the
         * panic CPU, we may have already halted the owner of the
         * console_sem. If we are not the panic CPU, then we should
         * avoid taking console_sem, so the panic CPU has a better
         * chance of cleanly acquiring it later.
         */
        if (panic_in_progress())
                return 0;

        printk_safe_enter_irqsave(flags);

        raw_spin_lock(&console_owner_lock);
        owner = READ_ONCE(console_owner);
        waiter = READ_ONCE(console_waiter);
        if (!waiter && owner && owner != current) {
                WRITE_ONCE(console_waiter, true);
                spin = true;
        }
        raw_spin_unlock(&console_owner_lock);

        /*
         * If there is an active printk() writing to the
         * consoles, instead of having it write our data too,
         * see if we can offload that load from the active
         * printer, and do some printing ourselves.
         * Go into a spin only if there isn't already a waiter
         * spinning, and there is an active printer, and
         * that active printer isn't us (recursive printk?).
         */
        if (!spin) {
                printk_safe_exit_irqrestore(flags);
                return 0;
        }

        /* We spin waiting for the owner to release us */
        spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
        /* Owner will clear console_waiter on hand off */
        while (READ_ONCE(console_waiter))
                cpu_relax();
        spin_release(&console_owner_dep_map, _THIS_IP_);

        printk_safe_exit_irqrestore(flags);
        /*
         * The owner passed the console lock to us.
         * Since we did not spin on console lock, annotate
         * this as a trylock. Otherwise lockdep will
         * complain.
         */
        mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);

        /*
         * Update @console_may_schedule for trylock because the previous
         * owner may have been schedulable.
         */
        console_may_schedule = 0;

        return 1;
}

/*
 * Recursion is tracked separately on each CPU. If NMIs are supported, an
 * additional NMI context per CPU is also separately tracked. Until per-CPU
 * is available, a separate "early tracking" is performed.
 */
static DEFINE_PER_CPU(u8, printk_count);
static u8 printk_count_early;
#ifdef CONFIG_HAVE_NMI
static DEFINE_PER_CPU(u8, printk_count_nmi);
static u8 printk_count_nmi_early;
#endif

/*
 * Recursion is limited to keep the output sane. printk() should not require
 * more than 1 level of recursion (allowing, for example, printk() to trigger
 * a WARN), but a higher value is used in case some printk-internal errors
 * exist, such as the ringbuffer validation checks failing.
 */
#define PRINTK_MAX_RECURSION 3

/*
 * Return a pointer to the dedicated counter for the CPU+context of the
 * caller.
 */
static u8 *__printk_recursion_counter(void)
{
#ifdef CONFIG_HAVE_NMI
        if (in_nmi()) {
                if (printk_percpu_data_ready())
                        return this_cpu_ptr(&printk_count_nmi);
                return &printk_count_nmi_early;
        }
#endif
        if (printk_percpu_data_ready())
                return this_cpu_ptr(&printk_count);
        return &printk_count_early;
}

/*
 * Enter recursion tracking. Interrupts are disabled to simplify tracking.
 * The caller must check the boolean return value to see if the recursion is
 * allowed. On failure, interrupts are not disabled.
 *
 * @recursion_ptr must be a variable of type (u8 *) and is the same variable
 * that is passed to printk_exit_irqrestore().
 */
#define printk_enter_irqsave(recursion_ptr, flags)        \
({                                                        \
        bool success = true;                                \
                                                        \
        typecheck(u8 *, recursion_ptr);                        \
        local_irq_save(flags);                                \
        (recursion_ptr) = __printk_recursion_counter();        \
        if (*(recursion_ptr) > PRINTK_MAX_RECURSION) {        \
                local_irq_restore(flags);                \
                success = false;                        \
        } else {                                        \
                (*(recursion_ptr))++;                        \
        }                                                \
        success;                                        \
})

/* Exit recursion tracking, restoring interrupts. */
#define printk_exit_irqrestore(recursion_ptr, flags)        \
        do {                                                \
                typecheck(u8 *, recursion_ptr);                \
                (*(recursion_ptr))--;                        \
                local_irq_restore(flags);                \
        } while (0)

int printk_delay_msec __read_mostly;

static inline void printk_delay(int level)
{
        boot_delay_msec(level);

        if (unlikely(printk_delay_msec)) {
                int m = printk_delay_msec;

                while (m--) {
                        mdelay(1);
                        touch_nmi_watchdog();
                }
        }
}

static inline u32 printk_caller_id(void)
{
        return in_task() ? task_pid_nr(current) :
                0x80000000 + smp_processor_id();
}

/**
 * printk_parse_prefix - Parse level and control flags.
 *
 * @text:     The terminated text message.
 * @level:    A pointer to the current level value, will be updated.
 * @flags:    A pointer to the current printk_info flags, will be updated.
 *
 * @level may be NULL if the caller is not interested in the parsed value.
 * Otherwise the variable pointed to by @level must be set to
 * LOGLEVEL_DEFAULT in order to be updated with the parsed value.
 *
 * @flags may be NULL if the caller is not interested in the parsed value.
 * Otherwise the variable pointed to by @flags will be OR'd with the parsed
 * value.
 *
 * Return: The length of the parsed level and control flags.
 */
u16 printk_parse_prefix(const char *text, int *level,
                        enum printk_info_flags *flags)
{
        u16 prefix_len = 0;
        int kern_level;

        while (*text) {
                kern_level = printk_get_level(text);
                if (!kern_level)
                        break;

                switch (kern_level) {
                case '0' ... '7':
                        if (level && *level == LOGLEVEL_DEFAULT)
                                *level = kern_level - '0';
                        break;
                case 'c':        /* KERN_CONT */
                        if (flags)
                                *flags |= LOG_CONT;
                }

                prefix_len += 2;
                text += 2;
        }

        return prefix_len;
}

__printf(5, 0)
static u16 printk_sprint(char *text, u16 size, int facility,
                         enum printk_info_flags *flags, const char *fmt,
                         va_list args)
{
        u16 text_len;

        text_len = vscnprintf(text, size, fmt, args);

        /* Mark and strip a trailing newline. */
        if (text_len && text[text_len - 1] == '\n') {
                text_len--;
                *flags |= LOG_NEWLINE;
        }

        /* Strip log level and control flags. */
        if (facility == 0) {
                u16 prefix_len;

                prefix_len = printk_parse_prefix(text, NULL, NULL);
                if (prefix_len) {
                        text_len -= prefix_len;
                        memmove(text, text + prefix_len, text_len);
                }
        }

        trace_console(text, text_len);

        return text_len;
}

__printf(4, 0)
int vprintk_store(int facility, int level,
                  const struct dev_printk_info *dev_info,
                  const char *fmt, va_list args)
{
        struct prb_reserved_entry e;
        enum printk_info_flags flags = 0;
        struct printk_record r;
        unsigned long irqflags;
        u16 trunc_msg_len = 0;
        char prefix_buf[8];
        u8 *recursion_ptr;
        u16 reserve_size;
        va_list args2;
        u32 caller_id;
        u16 text_len;
        int ret = 0;
        u64 ts_nsec;

        if (!printk_enter_irqsave(recursion_ptr, irqflags))
                return 0;

        /*
         * Since the duration of printk() can vary depending on the message
         * and state of the ringbuffer, grab the timestamp now so that it is
         * close to the call of printk(). This provides a more deterministic
         * timestamp with respect to the caller.
         */
        ts_nsec = local_clock();

        caller_id = printk_caller_id();

        /*
         * The sprintf needs to come first since the syslog prefix might be
         * passed in as a parameter. An extra byte must be reserved so that
         * later the vscnprintf() into the reserved buffer has room for the
         * terminating '\0', which is not counted by vsnprintf().
         */
        va_copy(args2, args);
        reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1;
        va_end(args2);

        if (reserve_size > PRINTKRB_RECORD_MAX)
                reserve_size = PRINTKRB_RECORD_MAX;

        /* Extract log level or control flags. */
        if (facility == 0)
                printk_parse_prefix(&prefix_buf[0], &level, &flags);

        if (level == LOGLEVEL_DEFAULT)
                level = default_message_loglevel;

        if (dev_info)
                flags |= LOG_NEWLINE;

        if (flags & LOG_CONT) {
                prb_rec_init_wr(&r, reserve_size);
                if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) {
                        text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
                                                 facility, &flags, fmt, args);
                        r.info->text_len += text_len;

                        if (flags & LOG_NEWLINE) {
                                r.info->flags |= LOG_NEWLINE;
                                prb_final_commit(&e);
                        } else {
                                prb_commit(&e);
                        }

                        ret = text_len;
                        goto out;
                }
        }

        /*
         * Explicitly initialize the record before every prb_reserve() call.
         * prb_reserve_in_last() and prb_reserve() purposely invalidate the
         * structure when they fail.
         */
        prb_rec_init_wr(&r, reserve_size);
        if (!prb_reserve(&e, prb, &r)) {
                /* truncate the message if it is too long for empty buffer */
                truncate_msg(&reserve_size, &trunc_msg_len);

                prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
                if (!prb_reserve(&e, prb, &r))
                        goto out;
        }

        /* fill message */
        text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
        if (trunc_msg_len)
                memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len);
        r.info->text_len = text_len + trunc_msg_len;
        r.info->facility = facility;
        r.info->level = level & 7;
        r.info->flags = flags & 0x1f;
        r.info->ts_nsec = ts_nsec;
        r.info->caller_id = caller_id;
        if (dev_info)
                memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));

        /* A message without a trailing newline can be continued. */
        if (!(flags & LOG_NEWLINE))
                prb_commit(&e);
        else
                prb_final_commit(&e);

        ret = text_len + trunc_msg_len;
out:
        printk_exit_irqrestore(recursion_ptr, irqflags);
        return ret;
}

asmlinkage int vprintk_emit(int facility, int level,
                            const struct dev_printk_info *dev_info,
                            const char *fmt, va_list args)
{
        int printed_len;
        bool in_sched = false;

        /* Suppress unimportant messages after panic happens */
        if (unlikely(suppress_printk))
                return 0;

        /*
         * The messages on the panic CPU are the most important. If
         * non-panic CPUs are generating any messages, they will be
         * silently dropped.
         */
        if (other_cpu_in_panic())
                return 0;

        if (level == LOGLEVEL_SCHED) {
                level = LOGLEVEL_DEFAULT;
                in_sched = true;
        }

        printk_delay(level);

        printed_len = vprintk_store(facility, level, dev_info, fmt, args);

        /* If called from the scheduler, we can not call up(). */
        if (!in_sched) {
                /*
                 * The caller may be holding system-critical or
                 * timing-sensitive locks. Disable preemption during
                 * printing of all remaining records to all consoles so that
                 * this context can return as soon as possible. Hopefully
                 * another printk() caller will take over the printing.
                 */
                preempt_disable();
                /*
                 * Try to acquire and then immediately release the console
                 * semaphore. The release will print out buffers. With the
                 * spinning variant, this context tries to take over the
                 * printing from another printing context.
                 */
                if (console_trylock_spinning())
                        console_unlock();
                preempt_enable();
        }

        if (in_sched)
                defer_console_output();
        else
                wake_up_klogd();

        return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);

int vprintk_default(const char *fmt, va_list args)
{
        return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
}
EXPORT_SYMBOL_GPL(vprintk_default);

asmlinkage __visible int _printk(const char *fmt, ...)
{
        va_list args;
        int r;

        va_start(args, fmt);
        r = vprintk(fmt, args);
        va_end(args);

        return r;
}
EXPORT_SYMBOL(_printk);

static bool pr_flush(int timeout_ms, bool reset_on_progress);
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);

#else /* CONFIG_PRINTK */

#define printk_time                false

#define prb_read_valid(rb, seq, r)        false
#define prb_first_valid_seq(rb)                0
#define prb_next_seq(rb)                0

static u64 syslog_seq;

static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }

#endif /* CONFIG_PRINTK */

#ifdef CONFIG_EARLY_PRINTK
struct console *early_console;

asmlinkage __visible void early_printk(const char *fmt, ...)
{
        va_list ap;
        char buf[512];
        int n;

        if (!early_console)
                return;

        va_start(ap, fmt);
        n = vscnprintf(buf, sizeof(buf), fmt, ap);
        va_end(ap);

        early_console->write(early_console, buf, n);
}
#endif

static void set_user_specified(struct console_cmdline *c, bool user_specified)
{
        if (!user_specified)
                return;

        /*
         * @c console was defined by the user on the command line.
         * Do not clear when added twice also by SPCR or the device tree.
         */
        c->user_specified = true;
        /* At least one console defined by the user on the command line. */
        console_set_on_cmdline = 1;
}

static int __add_preferred_console(const char *name, const short idx, char *options,
                                   char *brl_options, bool user_specified)
{
        struct console_cmdline *c;
        int i;

        /*
         * We use a signed short index for struct console for device drivers to
         * indicate a not yet assigned index or port. However, a negative index
         * value is not valid for preferred console.
         */
        if (idx < 0)
                return -EINVAL;

        /*
         *        See if this tty is not yet registered, and
         *        if we have a slot free.
         */
        for (i = 0, c = console_cmdline;
             i < MAX_CMDLINECONSOLES && c->name[0];
             i++, c++) {
                if (strcmp(c->name, name) == 0 && c->index == idx) {
                        if (!brl_options)
                                preferred_console = i;
                        set_user_specified(c, user_specified);
                        return 0;
                }
        }
        if (i == MAX_CMDLINECONSOLES)
                return -E2BIG;
        if (!brl_options)
                preferred_console = i;
        strscpy(c->name, name, sizeof(c->name));
        c->options = options;
        set_user_specified(c, user_specified);
        braille_set_options(c, brl_options);

        c->index = idx;
        return 0;
}

static int __init console_msg_format_setup(char *str)
{
        if (!strcmp(str, "syslog"))
                console_msg_format = MSG_FORMAT_SYSLOG;
        if (!strcmp(str, "default"))
                console_msg_format = MSG_FORMAT_DEFAULT;
        return 1;
}
__setup("console_msg_format=", console_msg_format_setup);

/*
 * Set up a console.  Called via do_early_param() in init/main.c
 * for each "console=" parameter in the boot command line.
 */
static int __init console_setup(char *str)
{
        char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
        char *s, *options, *brl_options = NULL;
        int idx;

        /*
         * console="" or console=null have been suggested as a way to
         * disable console output. Use ttynull that has been created
         * for exactly this purpose.
         */
        if (str[0] == 0 || strcmp(str, "null") == 0) {
                __add_preferred_console("ttynull", 0, NULL, NULL, true);
                return 1;
        }

        if (_braille_console_setup(&str, &brl_options))
                return 1;

        /*
         * Decode str into name, index, options.
         */
        if (str[0] >= '0' && str[0] <= '9') {
                strcpy(buf, "ttyS");
                strncpy(buf + 4, str, sizeof(buf) - 5);
        } else {
                strncpy(buf, str, sizeof(buf) - 1);
        }
        buf[sizeof(buf) - 1] = 0;
        options = strchr(str, ',');
        if (options)
                *(options++) = 0;
#ifdef __sparc__
        if (!strcmp(str, "ttya"))
                strcpy(buf, "ttyS0");
        if (!strcmp(str, "ttyb"))
                strcpy(buf, "ttyS1");
#endif
        for (s = buf; *s; s++)
                if (isdigit(*s) || *s == ',')
                        break;
        idx = simple_strtoul(s, NULL, 10);
        *s = 0;

        __add_preferred_console(buf, idx, options, brl_options, true);
        return 1;
}
__setup("console=", console_setup);

/**
 * add_preferred_console - add a device to the list of preferred consoles.
 * @name: device name
 * @idx: device index
 * @options: options for this console
 *
 * The last preferred console added will be used for kernel messages
 * and stdin/out/err for init.  Normally this is used by console_setup
 * above to handle user-supplied console arguments; however it can also
 * be used by arch-specific code either to override the user or more
 * commonly to provide a default console (ie from PROM variables) when
 * the user has not supplied one.
 */
int add_preferred_console(const char *name, const short idx, char *options)
{
        return __add_preferred_console(name, idx, options, NULL, false);
}

bool console_suspend_enabled = true;
EXPORT_SYMBOL(console_suspend_enabled);

static int __init console_suspend_disable(char *str)
{
        console_suspend_enabled = false;
        return 1;
}
__setup("no_console_suspend", console_suspend_disable);
module_param_named(console_suspend, console_suspend_enabled,
                bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
        " and hibernate operations");

static bool printk_console_no_auto_verbose;

void console_verbose(void)
{
        if (console_loglevel && !printk_console_no_auto_verbose)
                console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
}
EXPORT_SYMBOL_GPL(console_verbose);

module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644);
MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");

/**
 * suspend_console - suspend the console subsystem
 *
 * This disables printk() while we go into suspend states
 */
void suspend_console(void)
{
        struct console *con;

        if (!console_suspend_enabled)
                return;
        pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
        pr_flush(1000, true);

        console_list_lock();
        for_each_console(con)
                console_srcu_write_flags(con, con->flags | CON_SUSPENDED);
        console_list_unlock();

        /*
         * Ensure that all SRCU list walks have completed. All printing
         * contexts must be able to see that they are suspended so that it
         * is guaranteed that all printing has stopped when this function
         * completes.
         */
        synchronize_srcu(&console_srcu);
}

void resume_console(void)
{
        struct console *con;

        if (!console_suspend_enabled)
                return;

        console_list_lock();
        for_each_console(con)
                console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
        console_list_unlock();

        /*
         * Ensure that all SRCU list walks have completed. All printing
         * contexts must be able to see they are no longer suspended so
         * that they are guaranteed to wake up and resume printing.
         */
        synchronize_srcu(&console_srcu);

        pr_flush(1000, true);
}

/**
 * console_cpu_notify - print deferred console messages after CPU hotplug
 * @cpu: unused
 *
 * If printk() is called from a CPU that is not online yet, the messages
 * will be printed on the console only if there are CON_ANYTIME consoles.
 * This function is called when a new CPU comes online (or fails to come
 * up) or goes offline.
 */
static int console_cpu_notify(unsigned int cpu)
{
        if (!cpuhp_tasks_frozen) {
                /* If trylock fails, someone else is doing the printing */
                if (console_trylock())
                        console_unlock();
        }
        return 0;
}

/**
 * console_lock - block the console subsystem from printing
 *
 * Acquires a lock which guarantees that no consoles will
 * be in or enter their write() callback.
 *
 * Can sleep, returns nothing.
 */
void console_lock(void)
{
        might_sleep();

        /* On panic, the console_lock must be left to the panic cpu. */
        while (other_cpu_in_panic())
                msleep(1000);

        down_console_sem();
        console_locked = 1;
        console_may_schedule = 1;
}
EXPORT_SYMBOL(console_lock);

/**
 * console_trylock - try to block the console subsystem from printing
 *
 * Try to acquire a lock which guarantees that no consoles will
 * be in or enter their write() callback.
 *
 * returns 1 on success, and 0 on failure to acquire the lock.
 */
int console_trylock(void)
{
        /* On panic, the console_lock must be left to the panic cpu. */
        if (other_cpu_in_panic())
                return 0;
        if (down_trylock_console_sem())
                return 0;
        console_locked = 1;
        console_may_schedule = 0;
        return 1;
}
EXPORT_SYMBOL(console_trylock);

int is_console_locked(void)
{
        return console_locked;
}
EXPORT_SYMBOL(is_console_locked);

/*
 * Check if the given console is currently capable and allowed to print
 * records.
 *
 * Requires the console_srcu_read_lock.
 */
static inline bool console_is_usable(struct console *con)
{
        short flags = console_srcu_read_flags(con);

        if (!(flags & CON_ENABLED))
                return false;

        if ((flags & CON_SUSPENDED))
                return false;

        if (!con->write)
                return false;

        /*
         * Console drivers may assume that per-cpu resources have been
         * allocated. So unless they're explicitly marked as being able to
         * cope (CON_ANYTIME) don't call them until this CPU is officially up.
         */
        if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
                return false;

        return true;
}

static void __console_unlock(void)
{
        console_locked = 0;
        up_console_sem();
}

#ifdef CONFIG_PRINTK

/*
 * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This
 * is achieved by shifting the existing message over and inserting the dropped
 * message.
 *
 * @pmsg is the printk message to prepend.
 *
 * @dropped is the dropped count to report in the dropped message.
 *
 * If the message text in @pmsg->pbufs->outbuf does not have enough space for
 * the dropped message, the message text will be sufficiently truncated.
 *
 * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
 */
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
{
        struct printk_buffers *pbufs = pmsg->pbufs;
        const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
        const size_t outbuf_sz = sizeof(pbufs->outbuf);
        char *scratchbuf = &pbufs->scratchbuf[0];
        char *outbuf = &pbufs->outbuf[0];
        size_t len;

        len = scnprintf(scratchbuf, scratchbuf_sz,
                       "** %lu printk messages dropped **\n", dropped);

        /*
         * Make sure outbuf is sufficiently large before prepending.
         * Keep at least the prefix when the message must be truncated.
         * It is a rather theoretical problem when someone tries to
         * use a minimalist buffer.
         */
        if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz))
                return;

        if (pmsg->outbuf_len + len >= outbuf_sz) {
                /* Truncate the message, but keep it terminated. */
                pmsg->outbuf_len = outbuf_sz - (len + 1);
                outbuf[pmsg->outbuf_len] = 0;
        }

        memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1);
        memcpy(outbuf, scratchbuf, len);
        pmsg->outbuf_len += len;
}

/*
 * Read and format the specified record (or a later record if the specified
 * record is not available).
 *
 * @pmsg will contain the formatted result. @pmsg->pbufs must point to a
 * struct printk_buffers.
 *
 * @seq is the record to read and format. If it is not available, the next
 * valid record is read.
 *
 * @is_extended specifies if the message should be formatted for extended
 * console output.
 *
 * @may_supress specifies if records may be skipped based on loglevel.
 *
 * Returns false if no record is available. Otherwise true and all fields
 * of @pmsg are valid. (See the documentation of struct printk_message
 * for information about the @pmsg fields.)
 */
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
                             bool is_extended, bool may_suppress)
{
        struct printk_buffers *pbufs = pmsg->pbufs;
        const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
        const size_t outbuf_sz = sizeof(pbufs->outbuf);
        char *scratchbuf = &pbufs->scratchbuf[0];
        char *outbuf = &pbufs->outbuf[0];
        struct printk_info info;
        struct printk_record r;
        size_t len = 0;

        /*
         * Formatting extended messages requires a separate buffer, so use the
         * scratch buffer to read in the ringbuffer text.
         *
         * Formatting normal messages is done in-place, so read the ringbuffer
         * text directly into the output buffer.
         */
        if (is_extended)
                prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz);
        else
                prb_rec_init_rd(&r, &info, outbuf, outbuf_sz);

        if (!prb_read_valid(prb, seq, &r))
                return false;

        pmsg->seq = r.info->seq;
        pmsg->dropped = r.info->seq - seq;

        /* Skip record that has level above the console loglevel. */
        if (may_suppress && suppress_message_printing(r.info->level))
                goto out;

        if (is_extended) {
                len = info_print_ext_header(outbuf, outbuf_sz, r.info);
                len += msg_print_ext_body(outbuf + len, outbuf_sz - len,
                                          &r.text_buf[0], r.info->text_len, &r.info->dev_info);
        } else {
                len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
        }
out:
        pmsg->outbuf_len = len;
        return true;
}

/*
 * Used as the printk buffers for non-panic, serialized console printing.
 * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles.
 * Its usage requires the console_lock held.
 */
struct printk_buffers printk_shared_pbufs;

/*
 * Print one record for the given console. The record printed is whatever
 * record is the next available record for the given console.
 *
 * @handover will be set to true if a printk waiter has taken over the
 * console_lock, in which case the caller is no longer holding both the
 * console_lock and the SRCU read lock. Otherwise it is set to false.
 *
 * @cookie is the cookie from the SRCU read lock.
 *
 * Returns false if the given console has no next record to print, otherwise
 * true.
 *
 * Requires the console_lock and the SRCU read lock.
 */
static bool console_emit_next_record(struct console *con, bool *handover, int cookie)
{
        bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
        char *outbuf = &printk_shared_pbufs.outbuf[0];
        struct printk_message pmsg = {
                .pbufs = &printk_shared_pbufs,
        };
        unsigned long flags;

        *handover = false;

        if (!printk_get_next_message(&pmsg, con->seq, is_extended, true))
                return false;

        con->dropped += pmsg.dropped;

        /* Skip messages of formatted length 0. */
        if (pmsg.outbuf_len == 0) {
                con->seq = pmsg.seq + 1;
                goto skip;
        }

        if (con->dropped && !is_extended) {
                console_prepend_dropped(&pmsg, con->dropped);
                con->dropped = 0;
        }

        /*
         * While actively printing out messages, if another printk()
         * were to occur on another CPU, it may wait for this one to
         * finish. This task can not be preempted if there is a
         * waiter waiting to take over.
         *
         * Interrupts are disabled because the hand over to a waiter
         * must not be interrupted until the hand over is completed
         * (@console_waiter is cleared).
         */
        printk_safe_enter_irqsave(flags);
        console_lock_spinning_enable();

        /* Do not trace print latency. */
        stop_critical_timings();

        /* Write everything out to the hardware. */
        con->write(con, outbuf, pmsg.outbuf_len);

        start_critical_timings();

        con->seq = pmsg.seq + 1;

        *handover = console_lock_spinning_disable_and_check(cookie);
        printk_safe_exit_irqrestore(flags);
skip:
        return true;
}

#else

static bool console_emit_next_record(struct console *con, bool *handover, int cookie)
{
        *handover = false;
        return false;
}

#endif /* CONFIG_PRINTK */

/*
 * Print out all remaining records to all consoles.
 *
 * @do_cond_resched is set by the caller. It can be true only in schedulable
 * context.
 *
 * @next_seq is set to the sequence number after the last available record.
 * The value is valid only when this function returns true. It means that all
 * usable consoles are completely flushed.
 *
 * @handover will be set to true if a printk waiter has taken over the
 * console_lock, in which case the caller is no longer holding the
 * console_lock. Otherwise it is set to false.
 *
 * Returns true when there was at least one usable console and all messages
 * were flushed to all usable consoles. A returned false informs the caller
 * that everything was not flushed (either there were no usable consoles or
 * another context has taken over printing or it is a panic situation and this
 * is not the panic CPU). Regardless the reason, the caller should assume it
 * is not useful to immediately try again.
 *
 * Requires the console_lock.
 */
static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
{
        bool any_usable = false;
        struct console *con;
        bool any_progress;
        int cookie;

        *next_seq = 0;
        *handover = false;

        do {
                any_progress = false;

                cookie = console_srcu_read_lock();
                for_each_console_srcu(con) {
                        bool progress;

                        if (!console_is_usable(con))
                                continue;
                        any_usable = true;

                        progress = console_emit_next_record(con, handover, cookie);

                        /*
                         * If a handover has occurred, the SRCU read lock
                         * is already released.
                         */
                        if (*handover)
                                return false;

                        /* Track the next of the highest seq flushed. */
                        if (con->seq > *next_seq)
                                *next_seq = con->seq;

                        if (!progress)
                                continue;
                        any_progress = true;

                        /* Allow panic_cpu to take over the consoles safely. */
                        if (other_cpu_in_panic())
                                goto abandon;

                        if (do_cond_resched)
                                cond_resched();
                }
                console_srcu_read_unlock(cookie);
        } while (any_progress);

        return any_usable;

abandon:
        console_srcu_read_unlock(cookie);
        return false;
}

/**
 * console_unlock - unblock the console subsystem from printing
 *
 * Releases the console_lock which the caller holds to block printing of
 * the console subsystem.
 *
 * While the console_lock was held, console output may have been buffered
 * by printk().  If this is the case, console_unlock(); emits
 * the output prior to releasing the lock.
 *
 * console_unlock(); may be called from any context.
 */
void console_unlock(void)
{
        bool do_cond_resched;
        bool handover;
        bool flushed;
        u64 next_seq;

        /*
         * Console drivers are called with interrupts disabled, so
         * @console_may_schedule should be cleared before; however, we may
         * end up dumping a lot of lines, for example, if called from
         * console registration path, and should invoke cond_resched()
         * between lines if allowable.  Not doing so can cause a very long
         * scheduling stall on a slow console leading to RCU stall and
         * softlockup warnings which exacerbate the issue with more
         * messages practically incapacitating the system. Therefore, create
         * a local to use for the printing loop.
         */
        do_cond_resched = console_may_schedule;

        do {
                console_may_schedule = 0;

                flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
                if (!handover)
                        __console_unlock();

                /*
                 * Abort if there was a failure to flush all messages to all
                 * usable consoles. Either it is not possible to flush (in
                 * which case it would be an infinite loop of retrying) or
                 * another context has taken over printing.
                 */
                if (!flushed)
                        break;

                /*
                 * Some context may have added new records after
                 * console_flush_all() but before unlocking the console.
                 * Re-check if there is a new record to flush. If the trylock
                 * fails, another context is already handling the printing.
                 */
        } while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
}
EXPORT_SYMBOL(console_unlock);

/**
 * console_conditional_schedule - yield the CPU if required
 *
 * If the console code is currently allowed to sleep, and
 * if this CPU should yield the CPU to another task, do
 * so here.
 *
 * Must be called within console_lock();.
 */
void __sched console_conditional_schedule(void)
{
        if (console_may_schedule)
                cond_resched();
}
EXPORT_SYMBOL(console_conditional_schedule);

void console_unblank(void)
{
        bool found_unblank = false;
        struct console *c;
        int cookie;

        /*
         * First check if there are any consoles implementing the unblank()
         * callback. If not, there is no reason to continue and take the
         * console lock, which in particular can be dangerous if
         * @oops_in_progress is set.
         */
        cookie = console_srcu_read_lock();
        for_each_console_srcu(c) {
                if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
                        found_unblank = true;
                        break;
                }
        }
        console_srcu_read_unlock(cookie);
        if (!found_unblank)
                return;

        /*
         * Stop console printing because the unblank() callback may
         * assume the console is not within its write() callback.
         *
         * If @oops_in_progress is set, this may be an atomic context.
         * In that case, attempt a trylock as best-effort.
         */
        if (oops_in_progress) {
                /* Semaphores are not NMI-safe. */
                if (in_nmi())
                        return;

                /*
                 * Attempting to trylock the console lock can deadlock
                 * if another CPU was stopped while modifying the
                 * semaphore. "Hope and pray" that this is not the
                 * current situation.
                 */
                if (down_trylock_console_sem() != 0)
                        return;
        } else
                console_lock();

        console_locked = 1;
        console_may_schedule = 0;

        cookie = console_srcu_read_lock();
        for_each_console_srcu(c) {
                if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank)
                        c->unblank();
        }
        console_srcu_read_unlock(cookie);

        console_unlock();

        if (!oops_in_progress)
                pr_flush(1000, true);
}

/**
 * console_flush_on_panic - flush console content on panic
 * @mode: flush all messages in buffer or just the pending ones
 *
 * Immediately output all pending messages no matter what.
 */
void console_flush_on_panic(enum con_flush_mode mode)
{
        bool handover;
        u64 next_seq;

        /*
         * Ignore the console lock and flush out the messages. Attempting a
         * trylock would not be useful because:
         *
         *   - if it is contended, it must be ignored anyway
         *   - console_lock() and console_trylock() block and fail
         *     respectively in panic for non-panic CPUs
         *   - semaphores are not NMI-safe
         */

        /*
         * If another context is holding the console lock,
         * @console_may_schedule might be set. Clear it so that
         * this context does not call cond_resched() while flushing.
         */
        console_may_schedule = 0;

        if (mode == CONSOLE_REPLAY_ALL) {
                struct console *c;
                short flags;
                int cookie;
                u64 seq;

                seq = prb_first_valid_seq(prb);

                cookie = console_srcu_read_lock();
                for_each_console_srcu(c) {
                        flags = console_srcu_read_flags(c);

                        if (flags & CON_NBCON) {
                                nbcon_seq_force(c, seq);
                        } else {
                                /*
                                 * This is an unsynchronized assignment. On
                                 * panic legacy consoles are only best effort.
                                 */
                                c->seq = seq;
                        }
                }
                console_srcu_read_unlock(cookie);
        }

        console_flush_all(false, &next_seq, &handover);
}

/*
 * Return the console tty driver structure and its associated index
 */
struct tty_driver *console_device(int *index)
{
        struct console *c;
        struct tty_driver *driver = NULL;
        int cookie;

        /*
         * Take console_lock to serialize device() callback with
         * other console operations. For example, fg_console is
         * modified under console_lock when switching vt.
         */
        console_lock();

        cookie = console_srcu_read_lock();
        for_each_console_srcu(c) {
                if (!c->device)
                        continue;
                driver = c->device(c, index);
                if (driver)
                        break;
        }
        console_srcu_read_unlock(cookie);

        console_unlock();
        return driver;
}

/*
 * Prevent further output on the passed console device so that (for example)
 * serial drivers can disable console output before suspending a port, and can
 * re-enable output afterwards.
 */
void console_stop(struct console *console)
{
        __pr_flush(console, 1000, true);
        console_list_lock();
        console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
        console_list_unlock();

        /*
         * Ensure that all SRCU list walks have completed. All contexts must
         * be able to see that this console is disabled so that (for example)
         * the caller can suspend the port without risk of another context
         * using the port.
         */
        synchronize_srcu(&console_srcu);
}
EXPORT_SYMBOL(console_stop);

void console_start(struct console *console)
{
        console_list_lock();
        console_srcu_write_flags(console, console->flags | CON_ENABLED);
        console_list_unlock();
        __pr_flush(console, 1000, true);
}
EXPORT_SYMBOL(console_start);

static int __read_mostly keep_bootcon;

static int __init keep_bootcon_setup(char *str)
{
        keep_bootcon = 1;
        pr_info("debug: skip boot console de-registration.\n");

        return 0;
}

early_param("keep_bootcon", keep_bootcon_setup);

static int console_call_setup(struct console *newcon, char *options)
{
        int err;

        if (!newcon->setup)
                return 0;

        /* Synchronize with possible boot console. */
        console_lock();
        err = newcon->setup(newcon, options);
        console_unlock();

        return err;
}

/*
 * This is called by register_console() to try to match
 * the newly registered console with any of the ones selected
 * by either the command line or add_preferred_console() and
 * setup/enable it.
 *
 * Care need to be taken with consoles that are statically
 * enabled such as netconsole
 */
static int try_enable_preferred_console(struct console *newcon,
                                        bool user_specified)
{
        struct console_cmdline *c;
        int i, err;

        for (i = 0, c = console_cmdline;
             i < MAX_CMDLINECONSOLES && c->name[0];
             i++, c++) {
                if (c->user_specified != user_specified)
                        continue;
                if (!newcon->match ||
                    newcon->match(newcon, c->name, c->index, c->options) != 0) {
                        /* default matching */
                        BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
                        if (strcmp(c->name, newcon->name) != 0)
                                continue;
                        if (newcon->index >= 0 &&
                            newcon->index != c->index)
                                continue;
                        if (newcon->index < 0)
                                newcon->index = c->index;

                        if (_braille_register_console(newcon, c))
                                return 0;

                        err = console_call_setup(newcon, c->options);
                        if (err)
                                return err;
                }
                newcon->flags |= CON_ENABLED;
                if (i == preferred_console)
                        newcon->flags |= CON_CONSDEV;
                return 0;
        }

        /*
         * Some consoles, such as pstore and netconsole, can be enabled even
         * without matching. Accept the pre-enabled consoles only when match()
         * and setup() had a chance to be called.
         */
        if (newcon->flags & CON_ENABLED && c->user_specified ==        user_specified)
                return 0;

        return -ENOENT;
}

/* Try to enable the console unconditionally */
static void try_enable_default_console(struct console *newcon)
{
        if (newcon->index < 0)
                newcon->index = 0;

        if (console_call_setup(newcon, NULL) != 0)
                return;

        newcon->flags |= CON_ENABLED;

        if (newcon->device)
                newcon->flags |= CON_CONSDEV;
}

static void console_init_seq(struct console *newcon, bool bootcon_registered)
{
        struct console *con;
        bool handover;

        if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
                /* Get a consistent copy of @syslog_seq. */
                mutex_lock(&syslog_lock);
                newcon->seq = syslog_seq;
                mutex_unlock(&syslog_lock);
        } else {
                /* Begin with next message added to ringbuffer. */
                newcon->seq = prb_next_seq(prb);

                /*
                 * If any enabled boot consoles are due to be unregistered
                 * shortly, some may not be caught up and may be the same
                 * device as @newcon. Since it is not known which boot console
                 * is the same device, flush all consoles and, if necessary,
                 * start with the message of the enabled boot console that is
                 * the furthest behind.
                 */
                if (bootcon_registered && !keep_bootcon) {
                        /*
                         * Hold the console_lock to stop console printing and
                         * guarantee safe access to console->seq.
                         */
                        console_lock();

                        /*
                         * Flush all consoles and set the console to start at
                         * the next unprinted sequence number.
                         */
                        if (!console_flush_all(true, &newcon->seq, &handover)) {
                                /*
                                 * Flushing failed. Just choose the lowest
                                 * sequence of the enabled boot consoles.
                                 */

                                /*
                                 * If there was a handover, this context no
                                 * longer holds the console_lock.
                                 */
                                if (handover)
                                        console_lock();

                                newcon->seq = prb_next_seq(prb);
                                for_each_console(con) {
                                        if ((con->flags & CON_BOOT) &&
                                            (con->flags & CON_ENABLED) &&
                                            con->seq < newcon->seq) {
                                                newcon->seq = con->seq;
                                        }
                                }
                        }

                        console_unlock();
                }
        }
}

#define console_first()                                \
        hlist_entry(console_list.first, struct console, node)

static int unregister_console_locked(struct console *console);

/*
 * The console driver calls this routine during kernel initialization
 * to register the console printing procedure with printk() and to
 * print any messages that were printed by the kernel before the
 * console driver was initialized.
 *
 * This can happen pretty early during the boot process (because of
 * early_printk) - sometimes before setup_arch() completes - be careful
 * of what kernel features are used - they may not be initialised yet.
 *
 * There are two types of consoles - bootconsoles (early_printk) and
 * "real" consoles (everything which is not a bootconsole) which are
 * handled differently.
 *  - Any number of bootconsoles can be registered at any time.
 *  - As soon as a "real" console is registered, all bootconsoles
 *    will be unregistered automatically.
 *  - Once a "real" console is registered, any attempt to register a
 *    bootconsoles will be rejected
 */
void register_console(struct console *newcon)
{
        struct console *con;
        bool bootcon_registered = false;
        bool realcon_registered = false;
        int err;

        console_list_lock();

        for_each_console(con) {
                if (WARN(con == newcon, "console '%s%d' already registered\n",
                                         con->name, con->index)) {
                        goto unlock;
                }

                if (con->flags & CON_BOOT)
                        bootcon_registered = true;
                else
                        realcon_registered = true;
        }

        /* Do not register boot consoles when there already is a real one. */
        if ((newcon->flags & CON_BOOT) && realcon_registered) {
                pr_info("Too late to register bootconsole %s%d\n",
                        newcon->name, newcon->index);
                goto unlock;
        }

        if (newcon->flags & CON_NBCON) {
                /*
                 * Ensure the nbcon console buffers can be allocated
                 * before modifying any global data.
                 */
                if (!nbcon_alloc(newcon))
                        goto unlock;
        }

        /*
         * See if we want to enable this console driver by default.
         *
         * Nope when a console is preferred by the command line, device
         * tree, or SPCR.
         *
         * The first real console with tty binding (driver) wins. More
         * consoles might get enabled before the right one is found.
         *
         * Note that a console with tty binding will have CON_CONSDEV
         * flag set and will be first in the list.
         */
        if (preferred_console < 0) {
                if (hlist_empty(&console_list) || !console_first()->device ||
                    console_first()->flags & CON_BOOT) {
                        try_enable_default_console(newcon);
                }
        }

        /* See if this console matches one we selected on the command line */
        err = try_enable_preferred_console(newcon, true);

        /* If not, try to match against the platform default(s) */
        if (err == -ENOENT)
                err = try_enable_preferred_console(newcon, false);

        /* printk() messages are not printed to the Braille console. */
        if (err || newcon->flags & CON_BRL) {
                if (newcon->flags & CON_NBCON)
                        nbcon_free(newcon);
                goto unlock;
        }

        /*
         * If we have a bootconsole, and are switching to a real console,
         * don't print everything out again, since when the boot console, and
         * the real console are the same physical device, it's annoying to
         * see the beginning boot messages twice
         */
        if (bootcon_registered &&
            ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
                newcon->flags &= ~CON_PRINTBUFFER;
        }

        newcon->dropped = 0;
        console_init_seq(newcon, bootcon_registered);

        if (newcon->flags & CON_NBCON)
                nbcon_init(newcon);

        /*
         * Put this console in the list - keep the
         * preferred driver at the head of the list.
         */
        if (hlist_empty(&console_list)) {
                /* Ensure CON_CONSDEV is always set for the head. */
                newcon->flags |= CON_CONSDEV;
                hlist_add_head_rcu(&newcon->node, &console_list);

        } else if (newcon->flags & CON_CONSDEV) {
                /* Only the new head can have CON_CONSDEV set. */
                console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV);
                hlist_add_head_rcu(&newcon->node, &console_list);

        } else {
                hlist_add_behind_rcu(&newcon->node, console_list.first);
        }

        /*
         * No need to synchronize SRCU here! The caller does not rely
         * on all contexts being able to see the new console before
         * register_console() completes.
         */

        console_sysfs_notify();

        /*
         * By unregistering the bootconsoles after we enable the real console
         * we get the "console xxx enabled" message on all the consoles -
         * boot consoles, real consoles, etc - this is to ensure that end
         * users know there might be something in the kernel's log buffer that
         * went to the bootconsole (that they do not see on the real console)
         */
        con_printk(KERN_INFO, newcon, "enabled\n");
        if (bootcon_registered &&
            ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
            !keep_bootcon) {
                struct hlist_node *tmp;

                hlist_for_each_entry_safe(con, tmp, &console_list, node) {
                        if (con->flags & CON_BOOT)
                                unregister_console_locked(con);
                }
        }
unlock:
        console_list_unlock();
}
EXPORT_SYMBOL(register_console);

/* Must be called under console_list_lock(). */
static int unregister_console_locked(struct console *console)
{
        int res;

        lockdep_assert_console_list_lock_held();

        con_printk(KERN_INFO, console, "disabled\n");

        res = _braille_unregister_console(console);
        if (res < 0)
                return res;
        if (res > 0)
                return 0;

        /* Disable it unconditionally */
        console_srcu_write_flags(console, console->flags & ~CON_ENABLED);

        if (!console_is_registered_locked(console))
                return -ENODEV;

        hlist_del_init_rcu(&console->node);

        /*
         * <HISTORICAL>
         * If this isn't the last console and it has CON_CONSDEV set, we
         * need to set it on the next preferred console.
         * </HISTORICAL>
         *
         * The above makes no sense as there is no guarantee that the next
         * console has any device attached. Oh well....
         */
        if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV)
                console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV);

        /*
         * Ensure that all SRCU list walks have completed. All contexts
         * must not be able to see this console in the list so that any
         * exit/cleanup routines can be performed safely.
         */
        synchronize_srcu(&console_srcu);

        if (console->flags & CON_NBCON)
                nbcon_free(console);

        console_sysfs_notify();

        if (console->exit)
                res = console->exit(console);

        return res;
}

int unregister_console(struct console *console)
{
        int res;

        console_list_lock();
        res = unregister_console_locked(console);
        console_list_unlock();
        return res;
}
EXPORT_SYMBOL(unregister_console);

/**
 * console_force_preferred_locked - force a registered console preferred
 * @con: The registered console to force preferred.
 *
 * Must be called under console_list_lock().
 */
void console_force_preferred_locked(struct console *con)
{
        struct console *cur_pref_con;

        if (!console_is_registered_locked(con))
                return;

        cur_pref_con = console_first();

        /* Already preferred? */
        if (cur_pref_con == con)
                return;

        /*
         * Delete, but do not re-initialize the entry. This allows the console
         * to continue to appear registered (via any hlist_unhashed_lockless()
         * checks), even though it was briefly removed from the console list.
         */
        hlist_del_rcu(&con->node);

        /*
         * Ensure that all SRCU list walks have completed so that the console
         * can be added to the beginning of the console list and its forward
         * list pointer can be re-initialized.
         */
        synchronize_srcu(&console_srcu);

        con->flags |= CON_CONSDEV;
        WARN_ON(!con->device);

        /* Only the new head can have CON_CONSDEV set. */
        console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV);
        hlist_add_head_rcu(&con->node, &console_list);
}
EXPORT_SYMBOL(console_force_preferred_locked);

/*
 * Initialize the console device. This is called *early*, so
 * we can't necessarily depend on lots of kernel help here.
 * Just do some early initializations, and do the complex setup
 * later.
 */
void __init console_init(void)
{
        int ret;
        initcall_t call;
        initcall_entry_t *ce;

        /* Setup the default TTY line discipline. */
        n_tty_init();

        /*
         * set up the console device so that later boot sequences can
         * inform about problems etc..
         */
        ce = __con_initcall_start;
        trace_initcall_level("console");
        while (ce < __con_initcall_end) {
                call = initcall_from_entry(ce);
                trace_initcall_start(call);
                ret = call();
                trace_initcall_finish(call, ret);
                ce++;
        }
}

/*
 * Some boot consoles access data that is in the init section and which will
 * be discarded after the initcalls have been run. To make sure that no code
 * will access this data, unregister the boot consoles in a late initcall.
 *
 * If for some reason, such as deferred probe or the driver being a loadable
 * module, the real console hasn't registered yet at this point, there will
 * be a brief interval in which no messages are logged to the console, which
 * makes it difficult to diagnose problems that occur during this time.
 *
 * To mitigate this problem somewhat, only unregister consoles whose memory
 * intersects with the init section. Note that all other boot consoles will
 * get unregistered when the real preferred console is registered.
 */
static int __init printk_late_init(void)
{
        struct hlist_node *tmp;
        struct console *con;
        int ret;

        console_list_lock();
        hlist_for_each_entry_safe(con, tmp, &console_list, node) {
                if (!(con->flags & CON_BOOT))
                        continue;

                /* Check addresses that might be used for enabled consoles. */
                if (init_section_intersects(con, sizeof(*con)) ||
                    init_section_contains(con->write, 0) ||
                    init_section_contains(con->read, 0) ||
                    init_section_contains(con->device, 0) ||
                    init_section_contains(con->unblank, 0) ||
                    init_section_contains(con->data, 0)) {
                        /*
                         * Please, consider moving the reported consoles out
                         * of the init section.
                         */
                        pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
                                con->name, con->index);
                        unregister_console_locked(con);
                }
        }
        console_list_unlock();

        ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
                                        console_cpu_notify);
        WARN_ON(ret < 0);
        ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
                                        console_cpu_notify, NULL);
        WARN_ON(ret < 0);
        printk_sysctl_init();
        return 0;
}
late_initcall(printk_late_init);

#if defined CONFIG_PRINTK
/* If @con is specified, only wait for that console. Otherwise wait for all. */
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
{
        unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms);
        unsigned long remaining_jiffies = timeout_jiffies;
        struct console *c;
        u64 last_diff = 0;
        u64 printk_seq;
        short flags;
        int cookie;
        u64 diff;
        u64 seq;

        might_sleep();

        seq = prb_next_reserve_seq(prb);

        /* Flush the consoles so that records up to @seq are printed. */
        console_lock();
        console_unlock();

        for (;;) {
                unsigned long begin_jiffies;
                unsigned long slept_jiffies;

                diff = 0;

                /*
                 * Hold the console_lock to guarantee safe access to
                 * console->seq. Releasing console_lock flushes more
                 * records in case @seq is still not printed on all
                 * usable consoles.
                 */
                console_lock();

                cookie = console_srcu_read_lock();
                for_each_console_srcu(c) {
                        if (con && con != c)
                                continue;

                        flags = console_srcu_read_flags(c);

                        /*
                         * If consoles are not usable, it cannot be expected
                         * that they make forward progress, so only increment
                         * @diff for usable consoles.
                         */
                        if (!console_is_usable(c))
                                continue;

                        if (flags & CON_NBCON) {
                                printk_seq = nbcon_seq_read(c);
                        } else {
                                printk_seq = c->seq;
                        }

                        if (printk_seq < seq)
                                diff += seq - printk_seq;
                }
                console_srcu_read_unlock(cookie);

                if (diff != last_diff && reset_on_progress)
                        remaining_jiffies = timeout_jiffies;

                console_unlock();

                /* Note: @diff is 0 if there are no usable consoles. */
                if (diff == 0 || remaining_jiffies == 0)
                        break;

                /* msleep(1) might sleep much longer. Check time by jiffies. */
                begin_jiffies = jiffies;
                msleep(1);
                slept_jiffies = jiffies - begin_jiffies;

                remaining_jiffies -= min(slept_jiffies, remaining_jiffies);

                last_diff = diff;
        }

        return (diff == 0);
}

/**
 * pr_flush() - Wait for printing threads to catch up.
 *
 * @timeout_ms:        The maximum time (in ms) to wait.
 * @reset_on_progress: Reset the timeout if forward progress is seen.
 *
 * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
 * represents infinite waiting.
 *
 * If @reset_on_progress is true, the timeout will be reset whenever any
 * printer has been seen to make some forward progress.
 *
 * Context: Process context. May sleep while acquiring console lock.
 * Return: true if all usable printers are caught up.
 */
static bool pr_flush(int timeout_ms, bool reset_on_progress)
{
        return __pr_flush(NULL, timeout_ms, reset_on_progress);
}

/*
 * Delayed printk version, for scheduler-internal messages:
 */
#define PRINTK_PENDING_WAKEUP        0x01
#define PRINTK_PENDING_OUTPUT        0x02

static DEFINE_PER_CPU(int, printk_pending);

static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
        int pending = this_cpu_xchg(printk_pending, 0);

        if (pending & PRINTK_PENDING_OUTPUT) {
                /* If trylock fails, someone else is doing the printing */
                if (console_trylock())
                        console_unlock();
        }

        if (pending & PRINTK_PENDING_WAKEUP)
                wake_up_interruptible(&log_wait);
}

static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
        IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);

static void __wake_up_klogd(int val)
{
        if (!printk_percpu_data_ready())
                return;

        preempt_disable();
        /*
         * Guarantee any new records can be seen by tasks preparing to wait
         * before this context checks if the wait queue is empty.
         *
         * The full memory barrier within wq_has_sleeper() pairs with the full
         * memory barrier within set_current_state() of
         * prepare_to_wait_event(), which is called after ___wait_event() adds
         * the waiter but before it has checked the wait condition.
         *
         * This pairs with devkmsg_read:A and syslog_print:A.
         */
        if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
            (val & PRINTK_PENDING_OUTPUT)) {
                this_cpu_or(printk_pending, val);
                irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
        }
        preempt_enable();
}

/**
 * wake_up_klogd - Wake kernel logging daemon
 *
 * Use this function when new records have been added to the ringbuffer
 * and the console printing of those records has already occurred or is
 * known to be handled by some other context. This function will only
 * wake the logging daemon.
 *
 * Context: Any context.
 */
void wake_up_klogd(void)
{
        __wake_up_klogd(PRINTK_PENDING_WAKEUP);
}

/**
 * defer_console_output - Wake kernel logging daemon and trigger
 *        console printing in a deferred context
 *
 * Use this function when new records have been added to the ringbuffer,
 * this context is responsible for console printing those records, but
 * the current context is not allowed to perform the console printing.
 * Trigger an irq_work context to perform the console printing. This
 * function also wakes the logging daemon.
 *
 * Context: Any context.
 */
void defer_console_output(void)
{
        /*
         * New messages may have been added directly to the ringbuffer
         * using vprintk_store(), so wake any waiters as well.
         */
        __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
}

void printk_trigger_flush(void)
{
        defer_console_output();
}

int vprintk_deferred(const char *fmt, va_list args)
{
        return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
}

int _printk_deferred(const char *fmt, ...)
{
        va_list args;
        int r;

        va_start(args, fmt);
        r = vprintk_deferred(fmt, args);
        va_end(args);

        return r;
}

/*
 * printk rate limiting, lifted from the networking subsystem.
 *
 * This enforces a rate limit: not more than 10 kernel messages
 * every 5s to make a denial-of-service attack impossible.
 */
DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);

int __printk_ratelimit(const char *func)
{
        return ___ratelimit(&printk_ratelimit_state, func);
}
EXPORT_SYMBOL(__printk_ratelimit);

/**
 * printk_timed_ratelimit - caller-controlled printk ratelimiting
 * @caller_jiffies: pointer to caller's state
 * @interval_msecs: minimum interval between prints
 *
 * printk_timed_ratelimit() returns true if more than @interval_msecs
 * milliseconds have elapsed since the last time printk_timed_ratelimit()
 * returned true.
 */
bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                        unsigned int interval_msecs)
{
        unsigned long elapsed = jiffies - *caller_jiffies;

        if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
                return false;

        *caller_jiffies = jiffies;
        return true;
}
EXPORT_SYMBOL(printk_timed_ratelimit);

static DEFINE_SPINLOCK(dump_list_lock);
static LIST_HEAD(dump_list);

/**
 * kmsg_dump_register - register a kernel log dumper.
 * @dumper: pointer to the kmsg_dumper structure
 *
 * Adds a kernel log dumper to the system. The dump callback in the
 * structure will be called when the kernel oopses or panics and must be
 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
 */
int kmsg_dump_register(struct kmsg_dumper *dumper)
{
        unsigned long flags;
        int err = -EBUSY;

        /* The dump callback needs to be set */
        if (!dumper->dump)
                return -EINVAL;

        spin_lock_irqsave(&dump_list_lock, flags);
        /* Don't allow registering multiple times */
        if (!dumper->registered) {
                dumper->registered = 1;
                list_add_tail_rcu(&dumper->list, &dump_list);
                err = 0;
        }
        spin_unlock_irqrestore(&dump_list_lock, flags);

        return err;
}
EXPORT_SYMBOL_GPL(kmsg_dump_register);

/**
 * kmsg_dump_unregister - unregister a kmsg dumper.
 * @dumper: pointer to the kmsg_dumper structure
 *
 * Removes a dump device from the system. Returns zero on success and
 * %-EINVAL otherwise.
 */
int kmsg_dump_unregister(struct kmsg_dumper *dumper)
{
        unsigned long flags;
        int err = -EINVAL;

        spin_lock_irqsave(&dump_list_lock, flags);
        if (dumper->registered) {
                dumper->registered = 0;
                list_del_rcu(&dumper->list);
                err = 0;
        }
        spin_unlock_irqrestore(&dump_list_lock, flags);
        synchronize_rcu();

        return err;
}
EXPORT_SYMBOL_GPL(kmsg_dump_unregister);

static bool always_kmsg_dump;
module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);

const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
{
        switch (reason) {
        case KMSG_DUMP_PANIC:
                return "Panic";
        case KMSG_DUMP_OOPS:
                return "Oops";
        case KMSG_DUMP_EMERG:
                return "Emergency";
        case KMSG_DUMP_SHUTDOWN:
                return "Shutdown";
        default:
                return "Unknown";
        }
}
EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);

/**
 * kmsg_dump - dump kernel log to kernel message dumpers.
 * @reason: the reason (oops, panic etc) for dumping
 *
 * Call each of the registered dumper's dump() callback, which can
 * retrieve the kmsg records with kmsg_dump_get_line() or
 * kmsg_dump_get_buffer().
 */
void kmsg_dump(enum kmsg_dump_reason reason)
{
        struct kmsg_dumper *dumper;

        rcu_read_lock();
        list_for_each_entry_rcu(dumper, &dump_list, list) {
                enum kmsg_dump_reason max_reason = dumper->max_reason;

                /*
                 * If client has not provided a specific max_reason, default
                 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
                 */
                if (max_reason == KMSG_DUMP_UNDEF) {
                        max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
                                                        KMSG_DUMP_OOPS;
                }
                if (reason > max_reason)
                        continue;

                /* invoke dumper which will iterate over records */
                dumper->dump(dumper, reason);
        }
        rcu_read_unlock();
}

/**
 * kmsg_dump_get_line - retrieve one kmsg log line
 * @iter: kmsg dump iterator
 * @syslog: include the "<4>" prefixes
 * @line: buffer to copy the line to
 * @size: maximum size of the buffer
 * @len: length of line placed into buffer
 *
 * Start at the beginning of the kmsg buffer, with the oldest kmsg
 * record, and copy one record into the provided buffer.
 *
 * Consecutive calls will return the next available record moving
 * towards the end of the buffer with the youngest messages.
 *
 * A return value of FALSE indicates that there are no more records to
 * read.
 */
bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
                        char *line, size_t size, size_t *len)
{
        u64 min_seq = latched_seq_read_nolock(&clear_seq);
        struct printk_info info;
        unsigned int line_count;
        struct printk_record r;
        size_t l = 0;
        bool ret = false;

        if (iter->cur_seq < min_seq)
                iter->cur_seq = min_seq;

        prb_rec_init_rd(&r, &info, line, size);

        /* Read text or count text lines? */
        if (line) {
                if (!prb_read_valid(prb, iter->cur_seq, &r))
                        goto out;
                l = record_print_text(&r, syslog, printk_time);
        } else {
                if (!prb_read_valid_info(prb, iter->cur_seq,
                                         &info, &line_count)) {
                        goto out;
                }
                l = get_record_print_text_size(&info, line_count, syslog,
                                               printk_time);

        }

        iter->cur_seq = r.info->seq + 1;
        ret = true;
out:
        if (len)
                *len = l;
        return ret;
}
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);

/**
 * kmsg_dump_get_buffer - copy kmsg log lines
 * @iter: kmsg dump iterator
 * @syslog: include the "<4>" prefixes
 * @buf: buffer to copy the line to
 * @size: maximum size of the buffer
 * @len_out: length of line placed into buffer
 *
 * Start at the end of the kmsg buffer and fill the provided buffer
 * with as many of the *youngest* kmsg records that fit into it.
 * If the buffer is large enough, all available kmsg records will be
 * copied with a single call.
 *
 * Consecutive calls will fill the buffer with the next block of
 * available older records, not including the earlier retrieved ones.
 *
 * A return value of FALSE indicates that there are no more records to
 * read.
 */
bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
                          char *buf, size_t size, size_t *len_out)
{
        u64 min_seq = latched_seq_read_nolock(&clear_seq);
        struct printk_info info;
        struct printk_record r;
        u64 seq;
        u64 next_seq;
        size_t len = 0;
        bool ret = false;
        bool time = printk_time;

        if (!buf || !size)
                goto out;

        if (iter->cur_seq < min_seq)
                iter->cur_seq = min_seq;

        if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
                if (info.seq != iter->cur_seq) {
                        /* messages are gone, move to first available one */
                        iter->cur_seq = info.seq;
                }
        }

        /* last entry */
        if (iter->cur_seq >= iter->next_seq)
                goto out;

        /*
         * Find first record that fits, including all following records,
         * into the user-provided buffer for this dump. Pass in size-1
         * because this function (by way of record_print_text()) will
         * not write more than size-1 bytes of text into @buf.
         */
        seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
                                     size - 1, syslog, time);

        /*
         * Next kmsg_dump_get_buffer() invocation will dump block of
         * older records stored right before this one.
         */
        next_seq = seq;

        prb_rec_init_rd(&r, &info, buf, size);

        prb_for_each_record(seq, prb, seq, &r) {
                if (r.info->seq >= iter->next_seq)
                        break;

                len += record_print_text(&r, syslog, time);

                /* Adjust record to store to remaining buffer space. */
                prb_rec_init_rd(&r, &info, buf + len, size - len);
        }

        iter->next_seq = next_seq;
        ret = true;
out:
        if (len_out)
                *len_out = len;
        return ret;
}
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);

/**
 * kmsg_dump_rewind - reset the iterator
 * @iter: kmsg dump iterator
 *
 * Reset the dumper's iterator so that kmsg_dump_get_line() and
 * kmsg_dump_get_buffer() can be called again and used multiple
 * times within the same dumper.dump() callback.
 */
void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
{
        iter->cur_seq = latched_seq_read_nolock(&clear_seq);
        iter->next_seq = prb_next_seq(prb);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);

#endif

#ifdef CONFIG_SMP
static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);

/**
 * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
 *                            spinning lock is not owned by any CPU.
 *
 * Context: Any context.
 */
void __printk_cpu_sync_wait(void)
{
        do {
                cpu_relax();
        } while (atomic_read(&printk_cpu_sync_owner) != -1);
}
EXPORT_SYMBOL(__printk_cpu_sync_wait);

/**
 * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
 *                               spinning lock.
 *
 * If no processor has the lock, the calling processor takes the lock and
 * becomes the owner. If the calling processor is already the owner of the
 * lock, this function succeeds immediately.
 *
 * Context: Any context. Expects interrupts to be disabled.
 * Return: 1 on success, otherwise 0.
 */
int __printk_cpu_sync_try_get(void)
{
        int cpu;
        int old;

        cpu = smp_processor_id();

        /*
         * Guarantee loads and stores from this CPU when it is the lock owner
         * are _not_ visible to the previous lock owner. This pairs with
         * __printk_cpu_sync_put:B.
         *
         * Memory barrier involvement:
         *
         * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
         * then __printk_cpu_sync_put:A can never read from
         * __printk_cpu_sync_try_get:B.
         *
         * Relies on:
         *
         * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
         * of the previous CPU
         *    matching
         * ACQUIRE from __printk_cpu_sync_try_get:A to
         * __printk_cpu_sync_try_get:B of this CPU
         */
        old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
                                     cpu); /* LMM(__printk_cpu_sync_try_get:A) */
        if (old == -1) {
                /*
                 * This CPU is now the owner and begins loading/storing
                 * data: LMM(__printk_cpu_sync_try_get:B)
                 */
                return 1;

        } else if (old == cpu) {
                /* This CPU is already the owner. */
                atomic_inc(&printk_cpu_sync_nested);
                return 1;
        }

        return 0;
}
EXPORT_SYMBOL(__printk_cpu_sync_try_get);

/**
 * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
 *
 * The calling processor must be the owner of the lock.
 *
 * Context: Any context. Expects interrupts to be disabled.
 */
void __printk_cpu_sync_put(void)
{
        if (atomic_read(&printk_cpu_sync_nested)) {
                atomic_dec(&printk_cpu_sync_nested);
                return;
        }

        /*
         * This CPU is finished loading/storing data:
         * LMM(__printk_cpu_sync_put:A)
         */

        /*
         * Guarantee loads and stores from this CPU when it was the
         * lock owner are visible to the next lock owner. This pairs
         * with __printk_cpu_sync_try_get:A.
         *
         * Memory barrier involvement:
         *
         * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
         * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
         *
         * Relies on:
         *
         * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
         * of this CPU
         *    matching
         * ACQUIRE from __printk_cpu_sync_try_get:A to
         * __printk_cpu_sync_try_get:B of the next CPU
         */
        atomic_set_release(&printk_cpu_sync_owner,
                           -1); /* LMM(__printk_cpu_sync_put:B) */
}
EXPORT_SYMBOL(__printk_cpu_sync_put);
#endif /* CONFIG_SMP */





































































































































































































































































    4 





    4 






























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * IRQ subsystem internal functions and variables:
 *
 * Do not ever include this file from anything else than
 * kernel/irq/. Do not even think about using any information outside
 * of this file for your non core code.
 */
#include <linux/irqdesc.h>
#include <linux/kernel_stat.h>
#include <linux/pm_runtime.h>
#include <linux/sched/clock.h>

#ifdef CONFIG_SPARSE_IRQ
# define MAX_SPARSE_IRQS        INT_MAX
#else
# define MAX_SPARSE_IRQS        NR_IRQS
#endif

#define istate core_internal_state__do_not_mess_with_it

extern bool noirqdebug;

extern struct irqaction chained_action;

/*
 * Bits used by threaded handlers:
 * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
 * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
 * IRQTF_AFFINITY  - irq thread is requested to adjust affinity
 * IRQTF_FORCED_THREAD  - irq action is force threaded
 * IRQTF_READY     - signals that irq thread is ready
 */
enum {
        IRQTF_RUNTHREAD,
        IRQTF_WARNED,
        IRQTF_AFFINITY,
        IRQTF_FORCED_THREAD,
        IRQTF_READY,
};

/*
 * Bit masks for desc->core_internal_state__do_not_mess_with_it
 *
 * IRQS_AUTODETECT                - autodetection in progress
 * IRQS_SPURIOUS_DISABLED        - was disabled due to spurious interrupt
 *                                  detection
 * IRQS_POLL_INPROGRESS                - polling in progress
 * IRQS_ONESHOT                        - irq is not unmasked in primary handler
 * IRQS_REPLAY                        - irq has been resent and will not be resent
 *                                   again until the handler has run and cleared
 *                                   this flag.
 * IRQS_WAITING                        - irq is waiting
 * IRQS_PENDING                        - irq needs to be resent and should be resent
 *                                   at the next available opportunity.
 * IRQS_SUSPENDED                - irq is suspended
 * IRQS_NMI                        - irq line is used to deliver NMIs
 * IRQS_SYSFS                        - descriptor has been added to sysfs
 */
enum {
        IRQS_AUTODETECT                = 0x00000001,
        IRQS_SPURIOUS_DISABLED        = 0x00000002,
        IRQS_POLL_INPROGRESS        = 0x00000008,
        IRQS_ONESHOT                = 0x00000020,
        IRQS_REPLAY                = 0x00000040,
        IRQS_WAITING                = 0x00000080,
        IRQS_PENDING                = 0x00000200,
        IRQS_SUSPENDED                = 0x00000800,
        IRQS_TIMINGS                = 0x00001000,
        IRQS_NMI                = 0x00002000,
        IRQS_SYSFS                = 0x00004000,
};

#include "debug.h"
#include "settings.h"

extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags);
extern void __disable_irq(struct irq_desc *desc);
extern void __enable_irq(struct irq_desc *desc);

#define IRQ_RESEND        true
#define IRQ_NORESEND        false

#define IRQ_START_FORCE        true
#define IRQ_START_COND        false

extern int irq_activate(struct irq_desc *desc);
extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);

extern void irq_shutdown(struct irq_desc *desc);
extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu);
extern void mask_irq(struct irq_desc *desc);
extern void unmask_irq(struct irq_desc *desc);
extern void unmask_threaded_irq(struct irq_desc *desc);

#ifdef CONFIG_SPARSE_IRQ
static inline void irq_mark_irq(unsigned int irq) { }
#else
extern void irq_mark_irq(unsigned int irq);
#endif

extern int __irq_get_irqchip_state(struct irq_data *data,
                                   enum irqchip_irq_state which,
                                   bool *state);

irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
irqreturn_t handle_irq_event(struct irq_desc *desc);

/* Resending of interrupts :*/
int check_irq_resend(struct irq_desc *desc, bool inject);
void clear_irq_resend(struct irq_desc *desc);
void irq_resend_init(struct irq_desc *desc);
bool irq_wait_for_poll(struct irq_desc *desc);
void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);

void wake_threads_waitq(struct irq_desc *desc);

#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
#else
static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
static inline void register_handler_proc(unsigned int irq,
                                         struct irqaction *action) { }
static inline void unregister_handler_proc(unsigned int irq,
                                           struct irqaction *action) { }
#endif

extern bool irq_can_set_affinity_usr(unsigned int irq);

extern void irq_set_thread_affinity(struct irq_desc *desc);

extern int irq_do_set_affinity(struct irq_data *data,
                               const struct cpumask *dest, bool force);

#ifdef CONFIG_SMP
extern int irq_setup_affinity(struct irq_desc *desc);
#else
static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; }
#endif

/* Inline functions for support of irq chips on slow busses */
static inline void chip_bus_lock(struct irq_desc *desc)
{
        if (unlikely(desc->irq_data.chip->irq_bus_lock))
                desc->irq_data.chip->irq_bus_lock(&desc->irq_data);
}

static inline void chip_bus_sync_unlock(struct irq_desc *desc)
{
        if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock))
                desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
}

#define _IRQ_DESC_CHECK                (1 << 0)
#define _IRQ_DESC_PERCPU        (1 << 1)

#define IRQ_GET_DESC_CHECK_GLOBAL        (_IRQ_DESC_CHECK)
#define IRQ_GET_DESC_CHECK_PERCPU        (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)

#define for_each_action_of_desc(desc, act)                        \
        for (act = desc->action; act; act = act->next)

struct irq_desc *
__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
                    unsigned int check);
void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);

static inline struct irq_desc *
irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check)
{
        return __irq_get_desc_lock(irq, flags, true, check);
}

static inline void
irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
{
        __irq_put_desc_unlock(desc, flags, true);
}

static inline struct irq_desc *
irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check)
{
        return __irq_get_desc_lock(irq, flags, false, check);
}

static inline void
irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
{
        __irq_put_desc_unlock(desc, flags, false);
}

#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)

static inline unsigned int irqd_get(struct irq_data *d)
{
        return __irqd_to_state(d);
}

/*
 * Manipulation functions for irq_data.state
 */
static inline void irqd_set_move_pending(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_SETAFFINITY_PENDING;
}

static inline void irqd_clr_move_pending(struct irq_data *d)
{
        __irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING;
}

static inline void irqd_set_managed_shutdown(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN;
}

static inline void irqd_clr_managed_shutdown(struct irq_data *d)
{
        __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN;
}

static inline void irqd_clear(struct irq_data *d, unsigned int mask)
{
        __irqd_to_state(d) &= ~mask;
}

static inline void irqd_set(struct irq_data *d, unsigned int mask)
{
        __irqd_to_state(d) |= mask;
}

static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
{
        return __irqd_to_state(d) & mask;
}

static inline void irq_state_set_disabled(struct irq_desc *desc)
{
        irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
}

static inline void irq_state_set_masked(struct irq_desc *desc)
{
        irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
}

#undef __irqd_to_state

static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
{
        __this_cpu_inc(*desc->kstat_irqs);
        __this_cpu_inc(kstat.irqs_sum);
}

static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
{
        __kstat_incr_irqs_this_cpu(desc);
        desc->tot_count++;
}

static inline int irq_desc_get_node(struct irq_desc *desc)
{
        return irq_common_data_get_node(&desc->irq_common_data);
}

static inline int irq_desc_is_chained(struct irq_desc *desc)
{
        return (desc->action && desc->action == &chained_action);
}

#ifdef CONFIG_PM_SLEEP
bool irq_pm_check_wakeup(struct irq_desc *desc);
void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
#else
static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; }
static inline void
irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
static inline void
irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
#endif

#ifdef CONFIG_IRQ_TIMINGS

#define IRQ_TIMINGS_SHIFT        5
#define IRQ_TIMINGS_SIZE        (1 << IRQ_TIMINGS_SHIFT)
#define IRQ_TIMINGS_MASK        (IRQ_TIMINGS_SIZE - 1)

/**
 * struct irq_timings - irq timings storing structure
 * @values: a circular buffer of u64 encoded <timestamp,irq> values
 * @count: the number of elements in the array
 */
struct irq_timings {
        u64        values[IRQ_TIMINGS_SIZE];
        int        count;
};

DECLARE_PER_CPU(struct irq_timings, irq_timings);

extern void irq_timings_free(int irq);
extern int irq_timings_alloc(int irq);

static inline void irq_remove_timings(struct irq_desc *desc)
{
        desc->istate &= ~IRQS_TIMINGS;

        irq_timings_free(irq_desc_get_irq(desc));
}

static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
{
        int irq = irq_desc_get_irq(desc);
        int ret;

        /*
         * We don't need the measurement because the idle code already
         * knows the next expiry event.
         */
        if (act->flags & __IRQF_TIMER)
                return;

        /*
         * In case the timing allocation fails, we just want to warn,
         * not fail, so letting the system boot anyway.
         */
        ret = irq_timings_alloc(irq);
        if (ret) {
                pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
                        irq, ret);
                return;
        }

        desc->istate |= IRQS_TIMINGS;
}

extern void irq_timings_enable(void);
extern void irq_timings_disable(void);

DECLARE_STATIC_KEY_FALSE(irq_timing_enabled);

/*
 * The interrupt number and the timestamp are encoded into a single
 * u64 variable to optimize the size.
 * 48 bit time stamp and 16 bit IRQ number is way sufficient.
 *  Who cares an IRQ after 78 hours of idle time?
 */
static inline u64 irq_timing_encode(u64 timestamp, int irq)
{
        return (timestamp << 16) | irq;
}

static inline int irq_timing_decode(u64 value, u64 *timestamp)
{
        *timestamp = value >> 16;
        return value & U16_MAX;
}

static __always_inline void irq_timings_push(u64 ts, int irq)
{
        struct irq_timings *timings = this_cpu_ptr(&irq_timings);

        timings->values[timings->count & IRQ_TIMINGS_MASK] =
                irq_timing_encode(ts, irq);

        timings->count++;
}

/*
 * The function record_irq_time is only called in one place in the
 * interrupts handler. We want this function always inline so the code
 * inside is embedded in the function and the static key branching
 * code can act at the higher level. Without the explicit
 * __always_inline we can end up with a function call and a small
 * overhead in the hotpath for nothing.
 */
static __always_inline void record_irq_time(struct irq_desc *desc)
{
        if (!static_branch_likely(&irq_timing_enabled))
                return;

        if (desc->istate & IRQS_TIMINGS)
                irq_timings_push(local_clock(), irq_desc_get_irq(desc));
}
#else
static inline void irq_remove_timings(struct irq_desc *desc) {}
static inline void irq_setup_timings(struct irq_desc *desc,
                                     struct irqaction *act) {};
static inline void record_irq_time(struct irq_desc *desc) {}
#endif /* CONFIG_IRQ_TIMINGS */


#ifdef CONFIG_GENERIC_IRQ_CHIP
void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
                           int num_ct, unsigned int irq_base,
                           void __iomem *reg_base, irq_flow_handler_t handler);
#else
static inline void
irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
                      int num_ct, unsigned int irq_base,
                      void __iomem *reg_base, irq_flow_handler_t handler) { }
#endif /* CONFIG_GENERIC_IRQ_CHIP */

#ifdef CONFIG_GENERIC_PENDING_IRQ
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
        return irqd_can_move_in_process_context(data);
}
static inline bool irq_move_pending(struct irq_data *data)
{
        return irqd_is_setaffinity_pending(data);
}
static inline void
irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
{
        cpumask_copy(desc->pending_mask, mask);
}
static inline void
irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
{
        cpumask_copy(mask, desc->pending_mask);
}
static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
{
        return desc->pending_mask;
}
static inline bool handle_enforce_irqctx(struct irq_data *data)
{
        return irqd_is_handle_enforce_irqctx(data);
}
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
        return true;
}
static inline bool irq_move_pending(struct irq_data *data)
{
        return false;
}
static inline void
irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
{
}
static inline void
irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
{
}
static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
{
        return NULL;
}
static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
{
        return false;
}
static inline bool handle_enforce_irqctx(struct irq_data *data)
{
        return false;
}
#endif /* !CONFIG_GENERIC_PENDING_IRQ */

#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
{
        irqd_set_activated(data);
        return 0;
}
static inline void irq_domain_deactivate_irq(struct irq_data *data)
{
        irqd_clr_activated(data);
}
#endif

static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd)
{
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
        return irqd->parent_data;
#else
        return NULL;
#endif
}

#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
#include <linux/debugfs.h>

void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
{
        debugfs_remove(desc->debugfs_file);
        kfree(desc->dev_name);
}
void irq_debugfs_copy_devname(int irq, struct device *dev);
# ifdef CONFIG_IRQ_DOMAIN
void irq_domain_debugfs_init(struct dentry *root);
# else
static inline void irq_domain_debugfs_init(struct dentry *root)
{
}
# endif
#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
{
}
static inline void irq_remove_debugfs_entry(struct irq_desc *d)
{
}
static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
{
}
#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */

































































































































































































































































































  256 


























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
#ifndef __LINUX_OVERFLOW_H
#define __LINUX_OVERFLOW_H

#include <linux/compiler.h>
#include <linux/limits.h>
#include <linux/const.h>

/*
 * We need to compute the minimum and maximum values representable in a given
 * type. These macros may also be useful elsewhere. It would seem more obvious
 * to do something like:
 *
 * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
 * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
 *
 * Unfortunately, the middle expressions, strictly speaking, have
 * undefined behaviour, and at least some versions of gcc warn about
 * the type_max expression (but not if -fsanitize=undefined is in
 * effect; in that case, the warning is deferred to runtime...).
 *
 * The slightly excessive casting in type_min is to make sure the
 * macros also produce sensible values for the exotic type _Bool. [The
 * overflow checkers only almost work for _Bool, but that's
 * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
 * _Bools. Besides, the gcc builtins don't allow _Bool* as third
 * argument.]
 *
 * Idea stolen from
 * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
 * credit to Christian Biere.
 */
#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
#define type_max(t)        __type_max(typeof(t))
#define __type_min(T) ((T)((T)-type_max(T)-(T)1))
#define type_min(t)        __type_min(typeof(t))

/*
 * Avoids triggering -Wtype-limits compilation warning,
 * while using unsigned data types to check a < 0.
 */
#define is_non_negative(a) ((a) > 0 || (a) == 0)
#define is_negative(a) (!(is_non_negative(a)))

/*
 * Allows for effectively applying __must_check to a macro so we can have
 * both the type-agnostic benefits of the macros while also being able to
 * enforce that the return value is, in fact, checked.
 */
static inline bool __must_check __must_check_overflow(bool overflow)
{
        return unlikely(overflow);
}

/**
 * check_add_overflow() - Calculate addition with overflow checking
 * @a: first addend
 * @b: second addend
 * @d: pointer to store sum
 *
 * Returns true on wrap-around, false otherwise.
 *
 * *@d holds the results of the attempted addition, regardless of whether
 * wrap-around occurred.
 */
#define check_add_overflow(a, b, d)        \
        __must_check_overflow(__builtin_add_overflow(a, b, d))

/**
 * wrapping_add() - Intentionally perform a wrapping addition
 * @type: type for result of calculation
 * @a: first addend
 * @b: second addend
 *
 * Return the potentially wrapped-around addition without
 * tripping any wrap-around sanitizers that may be enabled.
 */
#define wrapping_add(type, a, b)                                \
        ({                                                        \
                type __val;                                        \
                __builtin_add_overflow(a, b, &__val);                \
                __val;                                                \
        })

/**
 * wrapping_assign_add() - Intentionally perform a wrapping increment assignment
 * @var: variable to be incremented
 * @offset: amount to add
 *
 * Increments @var by @offset with wrap-around. Returns the resulting
 * value of @var. Will not trip any wrap-around sanitizers.
 *
 * Returns the new value of @var.
 */
#define wrapping_assign_add(var, offset)                                \
        ({                                                                \
                typeof(var) *__ptr = &(var);                                \
                *__ptr = wrapping_add(typeof(var), *__ptr, offset);        \
        })

/**
 * check_sub_overflow() - Calculate subtraction with overflow checking
 * @a: minuend; value to subtract from
 * @b: subtrahend; value to subtract from @a
 * @d: pointer to store difference
 *
 * Returns true on wrap-around, false otherwise.
 *
 * *@d holds the results of the attempted subtraction, regardless of whether
 * wrap-around occurred.
 */
#define check_sub_overflow(a, b, d)        \
        __must_check_overflow(__builtin_sub_overflow(a, b, d))

/**
 * wrapping_sub() - Intentionally perform a wrapping subtraction
 * @type: type for result of calculation
 * @a: minuend; value to subtract from
 * @b: subtrahend; value to subtract from @a
 *
 * Return the potentially wrapped-around subtraction without
 * tripping any wrap-around sanitizers that may be enabled.
 */
#define wrapping_sub(type, a, b)                                \
        ({                                                        \
                type __val;                                        \
                __builtin_sub_overflow(a, b, &__val);                \
                __val;                                                \
        })

/**
 * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign
 * @var: variable to be decremented
 * @offset: amount to subtract
 *
 * Decrements @var by @offset with wrap-around. Returns the resulting
 * value of @var. Will not trip any wrap-around sanitizers.
 *
 * Returns the new value of @var.
 */
#define wrapping_assign_sub(var, offset)                                \
        ({                                                                \
                typeof(var) *__ptr = &(var);                                \
                *__ptr = wrapping_sub(typeof(var), *__ptr, offset);        \
        })

/**
 * check_mul_overflow() - Calculate multiplication with overflow checking
 * @a: first factor
 * @b: second factor
 * @d: pointer to store product
 *
 * Returns true on wrap-around, false otherwise.
 *
 * *@d holds the results of the attempted multiplication, regardless of whether
 * wrap-around occurred.
 */
#define check_mul_overflow(a, b, d)        \
        __must_check_overflow(__builtin_mul_overflow(a, b, d))

/**
 * wrapping_mul() - Intentionally perform a wrapping multiplication
 * @type: type for result of calculation
 * @a: first factor
 * @b: second factor
 *
 * Return the potentially wrapped-around multiplication without
 * tripping any wrap-around sanitizers that may be enabled.
 */
#define wrapping_mul(type, a, b)                                \
        ({                                                        \
                type __val;                                        \
                __builtin_mul_overflow(a, b, &__val);                \
                __val;                                                \
        })

/**
 * check_shl_overflow() - Calculate a left-shifted value and check overflow
 * @a: Value to be shifted
 * @s: How many bits left to shift
 * @d: Pointer to where to store the result
 *
 * Computes *@d = (@a << @s)
 *
 * Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't
 * make sense. Example conditions:
 *
 * - '@a << @s' causes bits to be lost when stored in *@d.
 * - '@s' is garbage (e.g. negative) or so large that the result of
 *   '@a << @s' is guaranteed to be 0.
 * - '@a' is negative.
 * - '@a << @s' sets the sign bit, if any, in '*@d'.
 *
 * '*@d' will hold the results of the attempted shift, but is not
 * considered "safe for use" if true is returned.
 */
#define check_shl_overflow(a, s, d) __must_check_overflow(({                \
        typeof(a) _a = a;                                                \
        typeof(s) _s = s;                                                \
        typeof(d) _d = d;                                                \
        unsigned long long _a_full = _a;                                \
        unsigned int _to_shift =                                        \
                is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0;        \
        *_d = (_a_full << _to_shift);                                        \
        (_to_shift != _s || is_negative(*_d) || is_negative(_a) ||        \
        (*_d >> _to_shift) != _a);                                        \
}))

#define __overflows_type_constexpr(x, T) (                        \
        is_unsigned_type(typeof(x)) ?                                \
                (x) > type_max(T) :                                \
        is_unsigned_type(typeof(T)) ?                                \
                (x) < 0 || (x) > type_max(T) :                        \
        (x) < type_min(T) || (x) > type_max(T))

#define __overflows_type(x, T)                ({        \
        typeof(T) v = 0;                        \
        check_add_overflow((x), v, &v);                \
})

/**
 * overflows_type - helper for checking the overflows between value, variables,
 *                    or data type
 *
 * @n: source constant value or variable to be checked
 * @T: destination variable or data type proposed to store @x
 *
 * Compares the @x expression for whether or not it can safely fit in
 * the storage of the type in @T. @x and @T can have different types.
 * If @x is a constant expression, this will also resolve to a constant
 * expression.
 *
 * Returns: true if overflow can occur, false otherwise.
 */
#define overflows_type(n, T)                                        \
        __builtin_choose_expr(__is_constexpr(n),                \
                              __overflows_type_constexpr(n, T),        \
                              __overflows_type(n, T))

/**
 * castable_to_type - like __same_type(), but also allows for casted literals
 *
 * @n: variable or constant value
 * @T: variable or data type
 *
 * Unlike the __same_type() macro, this allows a constant value as the
 * first argument. If this value would not overflow into an assignment
 * of the second argument's type, it returns true. Otherwise, this falls
 * back to __same_type().
 */
#define castable_to_type(n, T)                                                \
        __builtin_choose_expr(__is_constexpr(n),                        \
                              !__overflows_type_constexpr(n, T),        \
                              __same_type(n, T))

/**
 * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
 * @factor1: first factor
 * @factor2: second factor
 *
 * Returns: calculate @factor1 * @factor2, both promoted to size_t,
 * with any overflow causing the return value to be SIZE_MAX. The
 * lvalue must be size_t to avoid implicit type conversion.
 */
static inline size_t __must_check size_mul(size_t factor1, size_t factor2)
{
        size_t bytes;

        if (check_mul_overflow(factor1, factor2, &bytes))
                return SIZE_MAX;

        return bytes;
}

/**
 * size_add() - Calculate size_t addition with saturation at SIZE_MAX
 * @addend1: first addend
 * @addend2: second addend
 *
 * Returns: calculate @addend1 + @addend2, both promoted to size_t,
 * with any overflow causing the return value to be SIZE_MAX. The
 * lvalue must be size_t to avoid implicit type conversion.
 */
static inline size_t __must_check size_add(size_t addend1, size_t addend2)
{
        size_t bytes;

        if (check_add_overflow(addend1, addend2, &bytes))
                return SIZE_MAX;

        return bytes;
}

/**
 * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX
 * @minuend: value to subtract from
 * @subtrahend: value to subtract from @minuend
 *
 * Returns: calculate @minuend - @subtrahend, both promoted to size_t,
 * with any overflow causing the return value to be SIZE_MAX. For
 * composition with the size_add() and size_mul() helpers, neither
 * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX).
 * The lvalue must be size_t to avoid implicit type conversion.
 */
static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
{
        size_t bytes;

        if (minuend == SIZE_MAX || subtrahend == SIZE_MAX ||
            check_sub_overflow(minuend, subtrahend, &bytes))
                return SIZE_MAX;

        return bytes;
}

/**
 * array_size() - Calculate size of 2-dimensional array.
 * @a: dimension one
 * @b: dimension two
 *
 * Calculates size of 2-dimensional array: @a * @b.
 *
 * Returns: number of bytes needed to represent the array or SIZE_MAX on
 * overflow.
 */
#define array_size(a, b)        size_mul(a, b)

/**
 * array3_size() - Calculate size of 3-dimensional array.
 * @a: dimension one
 * @b: dimension two
 * @c: dimension three
 *
 * Calculates size of 3-dimensional array: @a * @b * @c.
 *
 * Returns: number of bytes needed to represent the array or SIZE_MAX on
 * overflow.
 */
#define array3_size(a, b, c)        size_mul(size_mul(a, b), c)

/**
 * flex_array_size() - Calculate size of a flexible array member
 *                     within an enclosing structure.
 * @p: Pointer to the structure.
 * @member: Name of the flexible array member.
 * @count: Number of elements in the array.
 *
 * Calculates size of a flexible array of @count number of @member
 * elements, at the end of structure @p.
 *
 * Return: number of bytes needed or SIZE_MAX on overflow.
 */
#define flex_array_size(p, member, count)                                \
        __builtin_choose_expr(__is_constexpr(count),                        \
                (count) * sizeof(*(p)->member) + __must_be_array((p)->member),        \
                size_mul(count, sizeof(*(p)->member) + __must_be_array((p)->member)))

/**
 * struct_size() - Calculate size of structure with trailing flexible array.
 * @p: Pointer to the structure.
 * @member: Name of the array member.
 * @count: Number of elements in the array.
 *
 * Calculates size of memory needed for structure of @p followed by an
 * array of @count number of @member elements.
 *
 * Return: number of bytes needed or SIZE_MAX on overflow.
 */
#define struct_size(p, member, count)                                        \
        __builtin_choose_expr(__is_constexpr(count),                        \
                sizeof(*(p)) + flex_array_size(p, member, count),        \
                size_add(sizeof(*(p)), flex_array_size(p, member, count)))

/**
 * struct_size_t() - Calculate size of structure with trailing flexible array
 * @type: structure type name.
 * @member: Name of the array member.
 * @count: Number of elements in the array.
 *
 * Calculates size of memory needed for structure @type followed by an
 * array of @count number of @member elements. Prefer using struct_size()
 * when possible instead, to keep calculations associated with a specific
 * instance variable of type @type.
 *
 * Return: number of bytes needed or SIZE_MAX on overflow.
 */
#define struct_size_t(type, member, count)                                        \
        struct_size((type *)NULL, member, count)

/**
 * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
 * Enables caller macro to pass (different) initializer.
 *
 * @type: structure type name, including "struct" keyword.
 * @name: Name for a variable to define.
 * @member: Name of the array member.
 * @count: Number of elements in the array; must be compile-time const.
 * @initializer: initializer expression (could be empty for no init).
 */
#define _DEFINE_FLEX(type, name, member, count, initializer...)                        \
        _Static_assert(__builtin_constant_p(count),                                \
                       "onstack flex array members require compile-time const count"); \
        union {                                                                        \
                u8 bytes[struct_size_t(type, member, count)];                        \
                type obj;                                                        \
        } name##_u initializer;                                                        \
        type *name = (type *)&name##_u

/**
 * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
 * flexible array member, when it does not have a __counted_by annotation.
 *
 * @type: structure type name, including "struct" keyword.
 * @name: Name for a variable to define.
 * @member: Name of the array member.
 * @count: Number of elements in the array; must be compile-time const.
 *
 * Define a zeroed, on-stack, instance of @type structure with a trailing
 * flexible array member.
 * Use __struct_size(@name) to get compile-time size of it afterwards.
 */
#define DEFINE_RAW_FLEX(type, name, member, count)        \
        _DEFINE_FLEX(type, name, member, count, = {})

/**
 * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
 * flexible array member.
 *
 * @TYPE: structure type name, including "struct" keyword.
 * @NAME: Name for a variable to define.
 * @MEMBER: Name of the array member.
 * @COUNTER: Name of the __counted_by member.
 * @COUNT: Number of elements in the array; must be compile-time const.
 *
 * Define a zeroed, on-stack, instance of @TYPE structure with a trailing
 * flexible array member.
 * Use __struct_size(@NAME) to get compile-time size of it afterwards.
 */
#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT)        \
        _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, })

#endif /* __LINUX_OVERFLOW_H */


























































































    2 



































    2 



















    2 















    2 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
// SPDX-License-Identifier: GPL-2.0-or-later
/* cx25840 - Conexant CX25840 audio/video decoder driver
 *
 * Copyright (C) 2004 Ulf Eklund
 *
 * Based on the saa7115 driver and on the first version of Chris Kennedy's
 * cx25840 driver.
 *
 * Changes by Tyler Trafford <tatrafford@comcast.net>
 *    - cleanup/rewrite for V4L2 API (2005)
 *
 * VBI support by Hans Verkuil <hverkuil@xs4all.nl>.
 *
 * NTSC sliced VBI support by Christopher Neufeld <television@cneufeld.ca>
 * with additional fixes by Hans Verkuil <hverkuil@xs4all.nl>.
 *
 * CX23885 support by Steven Toth <stoth@linuxtv.org>.
 *
 * CX2388[578] IRQ handling, IO Pin mux configuration and other small fixes are
 * Copyright (C) 2010 Andy Walls <awalls@md.metrocast.net>
 *
 * CX23888 DIF support for the HVR1850
 * Copyright (C) 2011 Steven Toth <stoth@kernellabs.com>
 *
 * CX2584x pin to pad mapping and output format configuration support are
 * Copyright (C) 2011 Maciej S. Szmigiero <mail@maciej.szmigiero.name>
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/videodev2.h>
#include <linux/i2c.h>
#include <linux/delay.h>
#include <linux/math64.h>
#include <media/v4l2-common.h>
#include <media/drv-intf/cx25840.h>

#include "cx25840-core.h"

MODULE_DESCRIPTION("Conexant CX25840 audio/video decoder driver");
MODULE_AUTHOR("Ulf Eklund, Chris Kennedy, Hans Verkuil, Tyler Trafford");
MODULE_LICENSE("GPL");

#define CX25840_VID_INT_STAT_REG 0x410
#define CX25840_VID_INT_STAT_BITS 0x0000ffff
#define CX25840_VID_INT_MASK_BITS 0xffff0000
#define CX25840_VID_INT_MASK_SHFT 16
#define CX25840_VID_INT_MASK_REG 0x412

#define CX23885_AUD_MC_INT_MASK_REG 0x80c
#define CX23885_AUD_MC_INT_STAT_BITS 0xffff0000
#define CX23885_AUD_MC_INT_CTRL_BITS 0x0000ffff
#define CX23885_AUD_MC_INT_STAT_SHFT 16

#define CX25840_AUD_INT_CTRL_REG 0x812
#define CX25840_AUD_INT_STAT_REG 0x813

#define CX23885_PIN_CTRL_IRQ_REG 0x123
#define CX23885_PIN_CTRL_IRQ_IR_STAT  0x40
#define CX23885_PIN_CTRL_IRQ_AUD_STAT 0x20
#define CX23885_PIN_CTRL_IRQ_VID_STAT 0x10

#define CX25840_IR_STATS_REG        0x210
#define CX25840_IR_IRQEN_REG        0x214

static int cx25840_debug;

module_param_named(debug, cx25840_debug, int, 0644);

MODULE_PARM_DESC(debug, "Debugging messages [0=Off (default) 1=On]");

/* ----------------------------------------------------------------------- */
static void cx23888_std_setup(struct i2c_client *client);

int cx25840_write(struct i2c_client *client, u16 addr, u8 value)
{
        u8 buffer[3];

        buffer[0] = addr >> 8;
        buffer[1] = addr & 0xff;
        buffer[2] = value;
        return i2c_master_send(client, buffer, 3);
}

int cx25840_write4(struct i2c_client *client, u16 addr, u32 value)
{
        u8 buffer[6];

        buffer[0] = addr >> 8;
        buffer[1] = addr & 0xff;
        buffer[2] = value & 0xff;
        buffer[3] = (value >> 8) & 0xff;
        buffer[4] = (value >> 16) & 0xff;
        buffer[5] = value >> 24;
        return i2c_master_send(client, buffer, 6);
}

u8 cx25840_read(struct i2c_client *client, u16 addr)
{
        struct i2c_msg msgs[2];
        u8 tx_buf[2], rx_buf[1];

        /* Write register address */
        tx_buf[0] = addr >> 8;
        tx_buf[1] = addr & 0xff;
        msgs[0].addr = client->addr;
        msgs[0].flags = 0;
        msgs[0].len = 2;
        msgs[0].buf = (char *)tx_buf;

        /* Read data from register */
        msgs[1].addr = client->addr;
        msgs[1].flags = I2C_M_RD;
        msgs[1].len = 1;
        msgs[1].buf = (char *)rx_buf;

        if (i2c_transfer(client->adapter, msgs, 2) < 2)
                return 0;

        return rx_buf[0];
}

u32 cx25840_read4(struct i2c_client *client, u16 addr)
{
        struct i2c_msg msgs[2];
        u8 tx_buf[2], rx_buf[4];

        /* Write register address */
        tx_buf[0] = addr >> 8;
        tx_buf[1] = addr & 0xff;
        msgs[0].addr = client->addr;
        msgs[0].flags = 0;
        msgs[0].len = 2;
        msgs[0].buf = (char *)tx_buf;

        /* Read data from registers */
        msgs[1].addr = client->addr;
        msgs[1].flags = I2C_M_RD;
        msgs[1].len = 4;
        msgs[1].buf = (char *)rx_buf;

        if (i2c_transfer(client->adapter, msgs, 2) < 2)
                return 0;

        return (rx_buf[3] << 24) | (rx_buf[2] << 16) | (rx_buf[1] << 8) |
                rx_buf[0];
}

int cx25840_and_or(struct i2c_client *client, u16 addr, unsigned int and_mask,
                   u8 or_value)
{
        return cx25840_write(client, addr,
                             (cx25840_read(client, addr) & and_mask) |
                             or_value);
}

int cx25840_and_or4(struct i2c_client *client, u16 addr, u32 and_mask,
                    u32 or_value)
{
        return cx25840_write4(client, addr,
                              (cx25840_read4(client, addr) & and_mask) |
                              or_value);
}

/* ----------------------------------------------------------------------- */

static int set_input(struct i2c_client *client,
                     enum cx25840_video_input vid_input,
                     enum cx25840_audio_input aud_input);

/* ----------------------------------------------------------------------- */

static int cx23885_s_io_pin_config(struct v4l2_subdev *sd, size_t n,
                                   struct v4l2_subdev_io_pin_config *p)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        int i;
        u32 pin_ctrl;
        u8 gpio_oe, gpio_data, strength;

        pin_ctrl = cx25840_read4(client, 0x120);
        gpio_oe = cx25840_read(client, 0x160);
        gpio_data = cx25840_read(client, 0x164);

        for (i = 0; i < n; i++) {
                strength = p[i].strength;
                if (strength > CX25840_PIN_DRIVE_FAST)
                        strength = CX25840_PIN_DRIVE_FAST;

                switch (p[i].pin) {
                case CX23885_PIN_IRQ_N_GPIO16:
                        if (p[i].function != CX23885_PAD_IRQ_N) {
                                /* GPIO16 */
                                pin_ctrl &= ~(0x1 << 25);
                        } else {
                                /* IRQ_N */
                                if (p[i].flags &
                                        (BIT(V4L2_SUBDEV_IO_PIN_DISABLE) |
                                         BIT(V4L2_SUBDEV_IO_PIN_INPUT))) {
                                        pin_ctrl &= ~(0x1 << 25);
                                } else {
                                        pin_ctrl |= (0x1 << 25);
                                }
                                if (p[i].flags &
                                        BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)) {
                                        pin_ctrl &= ~(0x1 << 24);
                                } else {
                                        pin_ctrl |= (0x1 << 24);
                                }
                        }
                        break;
                case CX23885_PIN_IR_RX_GPIO19:
                        if (p[i].function != CX23885_PAD_GPIO19) {
                                /* IR_RX */
                                gpio_oe |= (0x1 << 0);
                                pin_ctrl &= ~(0x3 << 18);
                                pin_ctrl |= (strength << 18);
                        } else {
                                /* GPIO19 */
                                gpio_oe &= ~(0x1 << 0);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) {
                                        gpio_data &= ~(0x1 << 0);
                                        gpio_data |= ((p[i].value & 0x1) << 0);
                                }
                                pin_ctrl &= ~(0x3 << 12);
                                pin_ctrl |= (strength << 12);
                        }
                        break;
                case CX23885_PIN_IR_TX_GPIO20:
                        if (p[i].function != CX23885_PAD_GPIO20) {
                                /* IR_TX */
                                gpio_oe |= (0x1 << 1);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE))
                                        pin_ctrl &= ~(0x1 << 10);
                                else
                                        pin_ctrl |= (0x1 << 10);
                                pin_ctrl &= ~(0x3 << 18);
                                pin_ctrl |= (strength << 18);
                        } else {
                                /* GPIO20 */
                                gpio_oe &= ~(0x1 << 1);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) {
                                        gpio_data &= ~(0x1 << 1);
                                        gpio_data |= ((p[i].value & 0x1) << 1);
                                }
                                pin_ctrl &= ~(0x3 << 12);
                                pin_ctrl |= (strength << 12);
                        }
                        break;
                case CX23885_PIN_I2S_SDAT_GPIO21:
                        if (p[i].function != CX23885_PAD_GPIO21) {
                                /* I2S_SDAT */
                                /* TODO: Input or Output config */
                                gpio_oe |= (0x1 << 2);
                                pin_ctrl &= ~(0x3 << 22);
                                pin_ctrl |= (strength << 22);
                        } else {
                                /* GPIO21 */
                                gpio_oe &= ~(0x1 << 2);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) {
                                        gpio_data &= ~(0x1 << 2);
                                        gpio_data |= ((p[i].value & 0x1) << 2);
                                }
                                pin_ctrl &= ~(0x3 << 12);
                                pin_ctrl |= (strength << 12);
                        }
                        break;
                case CX23885_PIN_I2S_WCLK_GPIO22:
                        if (p[i].function != CX23885_PAD_GPIO22) {
                                /* I2S_WCLK */
                                /* TODO: Input or Output config */
                                gpio_oe |= (0x1 << 3);
                                pin_ctrl &= ~(0x3 << 22);
                                pin_ctrl |= (strength << 22);
                        } else {
                                /* GPIO22 */
                                gpio_oe &= ~(0x1 << 3);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) {
                                        gpio_data &= ~(0x1 << 3);
                                        gpio_data |= ((p[i].value & 0x1) << 3);
                                }
                                pin_ctrl &= ~(0x3 << 12);
                                pin_ctrl |= (strength << 12);
                        }
                        break;
                case CX23885_PIN_I2S_BCLK_GPIO23:
                        if (p[i].function != CX23885_PAD_GPIO23) {
                                /* I2S_BCLK */
                                /* TODO: Input or Output config */
                                gpio_oe |= (0x1 << 4);
                                pin_ctrl &= ~(0x3 << 22);
                                pin_ctrl |= (strength << 22);
                        } else {
                                /* GPIO23 */
                                gpio_oe &= ~(0x1 << 4);
                                if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) {
                                        gpio_data &= ~(0x1 << 4);
                                        gpio_data |= ((p[i].value & 0x1) << 4);
                                }
                                pin_ctrl &= ~(0x3 << 12);
                                pin_ctrl |= (strength << 12);
                        }
                        break;
                }
        }

        cx25840_write(client, 0x164, gpio_data);
        cx25840_write(client, 0x160, gpio_oe);
        cx25840_write4(client, 0x120, pin_ctrl);
        return 0;
}

static u8 cx25840_function_to_pad(struct i2c_client *client, u8 function)
{
        if (function > CX25840_PAD_VRESET) {
                v4l_err(client, "invalid function %u, assuming default\n",
                        (unsigned int)function);
                return 0;
        }

        return function;
}

static void cx25840_set_invert(u8 *pinctrl3, u8 *voutctrl4, u8 function,
                               u8 pin, bool invert)
{
        switch (function) {
        case CX25840_PAD_IRQ_N:
                if (invert)
                        *pinctrl3 &= ~2;
                else
                        *pinctrl3 |= 2;
                break;

        case CX25840_PAD_ACTIVE:
                if (invert)
                        *voutctrl4 |= BIT(2);
                else
                        *voutctrl4 &= ~BIT(2);
                break;

        case CX25840_PAD_VACTIVE:
                if (invert)
                        *voutctrl4 |= BIT(5);
                else
                        *voutctrl4 &= ~BIT(5);
                break;

        case CX25840_PAD_CBFLAG:
                if (invert)
                        *voutctrl4 |= BIT(4);
                else
                        *voutctrl4 &= ~BIT(4);
                break;

        case CX25840_PAD_VRESET:
                if (invert)
                        *voutctrl4 |= BIT(0);
                else
                        *voutctrl4 &= ~BIT(0);
                break;
        }

        if (function != CX25840_PAD_DEFAULT)
                return;

        switch (pin) {
        case CX25840_PIN_DVALID_PRGM0:
                if (invert)
                        *voutctrl4 |= BIT(6);
                else
                        *voutctrl4 &= ~BIT(6);
                break;

        case CX25840_PIN_HRESET_PRGM2:
                if (invert)
                        *voutctrl4 |= BIT(1);
                else
                        *voutctrl4 &= ~BIT(1);
                break;
        }
}

static int cx25840_s_io_pin_config(struct v4l2_subdev *sd, size_t n,
                                   struct v4l2_subdev_io_pin_config *p)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        unsigned int i;
        u8 pinctrl[6], pinconf[10], voutctrl4;

        for (i = 0; i < 6; i++)
                pinctrl[i] = cx25840_read(client, 0x114 + i);

        for (i = 0; i < 10; i++)
                pinconf[i] = cx25840_read(client, 0x11c + i);

        voutctrl4 = cx25840_read(client, 0x407);

        for (i = 0; i < n; i++) {
                u8 strength = p[i].strength;

                if (strength != CX25840_PIN_DRIVE_SLOW &&
                    strength != CX25840_PIN_DRIVE_MEDIUM &&
                    strength != CX25840_PIN_DRIVE_FAST) {
                        v4l_err(client,
                                "invalid drive speed for pin %u (%u), assuming fast\n",
                                (unsigned int)p[i].pin,
                                (unsigned int)strength);

                        strength = CX25840_PIN_DRIVE_FAST;
                }

                switch (p[i].pin) {
                case CX25840_PIN_DVALID_PRGM0:
                        if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE))
                                pinctrl[0] &= ~BIT(6);
                        else
                                pinctrl[0] |= BIT(6);

                        pinconf[3] &= 0xf0;
                        pinconf[3] |= cx25840_function_to_pad(client,
                                                              p[i].function);

                        cx25840_set_invert(&pinctrl[3], &voutctrl4,
                                           p[i].function,
                                           CX25840_PIN_DVALID_PRGM0,
                                           p[i].flags &
                                           BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW));

                        pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */
                        switch (strength) {
                        case CX25840_PIN_DRIVE_SLOW:
                                pinctrl[4] |= 1 << 2;
                                break;

                        case CX25840_PIN_DRIVE_FAST:
                                pinctrl[4] |= 2 << 2;
                                break;
                        }

                        break;

                case CX25840_PIN_HRESET_PRGM2:
                        if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE))
                                pinctrl[1] &= ~BIT(0);
                        else
                                pinctrl[1] |= BIT(0);

                        pinconf[4] &= 0xf0;
                        pinconf[4] |= cx25840_function_to_pad(client,
                                                              p[i].function);

                        cx25840_set_invert(&pinctrl[3], &voutctrl4,
                                           p[i].function,
                                           CX25840_PIN_HRESET_PRGM2,
                                           p[i].flags &
                                           BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW));

                        pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */
                        switch (strength) {
                        case CX25840_PIN_DRIVE_SLOW:
                                pinctrl[4] |= 1 << 2;
                                break;

                        case CX25840_PIN_DRIVE_FAST:
                                pinctrl[4] |= 2 << 2;
                                break;
                        }

                        break;

                case CX25840_PIN_PLL_CLK_PRGM7:
                        if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE))
                                pinctrl[2] &= ~BIT(2);
                        else
                                pinctrl[2] |= BIT(2);

                        switch (p[i].function) {
                        case CX25840_PAD_XTI_X5_DLL:
                                pinconf[6] = 0;
                                break;

                        case CX25840_PAD_AUX_PLL:
                                pinconf[6] = 1;
                                break;

                        case CX25840_PAD_VID_PLL:
                                pinconf[6] = 5;
                                break;

                        case CX25840_PAD_XTI:
                                pinconf[6] = 2;
                                break;

                        default:
                                pinconf[6] = 3;
                                pinconf[6] |=
                                        cx25840_function_to_pad(client,
                                                                p[i].function)
                                        << 4;
                        }

                        break;

                default:
                        v4l_err(client, "invalid or unsupported pin %u\n",
                                (unsigned int)p[i].pin);
                        break;
                }
        }

        cx25840_write(client, 0x407, voutctrl4);

        for (i = 0; i < 6; i++)
                cx25840_write(client, 0x114 + i, pinctrl[i]);

        for (i = 0; i < 10; i++)
                cx25840_write(client, 0x11c + i, pinconf[i]);

        return 0;
}

static int common_s_io_pin_config(struct v4l2_subdev *sd, size_t n,
                                  struct v4l2_subdev_io_pin_config *pincfg)
{
        struct cx25840_state *state = to_state(sd);

        if (is_cx2388x(state))
                return cx23885_s_io_pin_config(sd, n, pincfg);
        else if (is_cx2584x(state))
                return cx25840_s_io_pin_config(sd, n, pincfg);
        return 0;
}

/* ----------------------------------------------------------------------- */

static void init_dll1(struct i2c_client *client)
{
        /*
         * This is the Hauppauge sequence used to
         * initialize the Delay Lock Loop 1 (ADC DLL).
         */
        cx25840_write(client, 0x159, 0x23);
        cx25840_write(client, 0x15a, 0x87);
        cx25840_write(client, 0x15b, 0x06);
        udelay(10);
        cx25840_write(client, 0x159, 0xe1);
        udelay(10);
        cx25840_write(client, 0x15a, 0x86);
        cx25840_write(client, 0x159, 0xe0);
        cx25840_write(client, 0x159, 0xe1);
        cx25840_write(client, 0x15b, 0x10);
}

static void init_dll2(struct i2c_client *client)
{
        /*
         * This is the Hauppauge sequence used to
         * initialize the Delay Lock Loop 2 (ADC DLL).
         */
        cx25840_write(client, 0x15d, 0xe3);
        cx25840_write(client, 0x15e, 0x86);
        cx25840_write(client, 0x15f, 0x06);
        udelay(10);
        cx25840_write(client, 0x15d, 0xe1);
        cx25840_write(client, 0x15d, 0xe0);
        cx25840_write(client, 0x15d, 0xe1);
}

static void cx25836_initialize(struct i2c_client *client)
{
        /*
         *reset configuration is described on page 3-77
         * of the CX25836 datasheet
         */

        /* 2. */
        cx25840_and_or(client, 0x000, ~0x01, 0x01);
        cx25840_and_or(client, 0x000, ~0x01, 0x00);
        /* 3a. */
        cx25840_and_or(client, 0x15a, ~0x70, 0x00);
        /* 3b. */
        cx25840_and_or(client, 0x15b, ~0x1e, 0x06);
        /* 3c. */
        cx25840_and_or(client, 0x159, ~0x02, 0x02);
        /* 3d. */
        udelay(10);
        /* 3e. */
        cx25840_and_or(client, 0x159, ~0x02, 0x00);
        /* 3f. */
        cx25840_and_or(client, 0x159, ~0xc0, 0xc0);
        /* 3g. */
        cx25840_and_or(client, 0x159, ~0x01, 0x00);
        cx25840_and_or(client, 0x159, ~0x01, 0x01);
        /* 3h. */
        cx25840_and_or(client, 0x15b, ~0x1e, 0x10);
}

static void cx25840_work_handler(struct work_struct *work)
{
        struct cx25840_state *state = container_of(work, struct cx25840_state, fw_work);

        cx25840_loadfw(state->c);
        wake_up(&state->fw_wait);
}

#define CX25840_VCONFIG_SET_BIT(state, opt_msk, voc, idx, bit, oneval)        \
        do {                                                                \
                if ((state)->vid_config & (opt_msk)) {                        \
                        if (((state)->vid_config & (opt_msk)) ==        \
                            (oneval))                                        \
                                (voc)[idx] |= BIT(bit);                \
                        else                                                \
                                (voc)[idx] &= ~BIT(bit);                \
                }                                                        \
        } while (0)

/* apply current vconfig to hardware regs */
static void cx25840_vconfig_apply(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u8 voutctrl[3];
        unsigned int i;

        for (i = 0; i < 3; i++)
                voutctrl[i] = cx25840_read(client, 0x404 + i);

        if (state->vid_config & CX25840_VCONFIG_FMT_MASK)
                voutctrl[0] &= ~3;
        switch (state->vid_config & CX25840_VCONFIG_FMT_MASK) {
        case CX25840_VCONFIG_FMT_BT656:
                voutctrl[0] |= 1;
                break;

        case CX25840_VCONFIG_FMT_VIP11:
                voutctrl[0] |= 2;
                break;

        case CX25840_VCONFIG_FMT_VIP2:
                voutctrl[0] |= 3;
                break;

        case CX25840_VCONFIG_FMT_BT601:
                /* zero */
        default:
                break;
        }

        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_RES_MASK, voutctrl,
                                0, 2, CX25840_VCONFIG_RES_10BIT);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VBIRAW_MASK, voutctrl,
                                0, 3, CX25840_VCONFIG_VBIRAW_ENABLED);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ANCDATA_MASK, voutctrl,
                                0, 4, CX25840_VCONFIG_ANCDATA_ENABLED);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_TASKBIT_MASK, voutctrl,
                                0, 5, CX25840_VCONFIG_TASKBIT_ONE);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ACTIVE_MASK, voutctrl,
                                1, 2, CX25840_VCONFIG_ACTIVE_HORIZONTAL);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VALID_MASK, voutctrl,
                                1, 3, CX25840_VCONFIG_VALID_ANDACTIVE);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_HRESETW_MASK, voutctrl,
                                1, 4, CX25840_VCONFIG_HRESETW_PIXCLK);

        if (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK)
                voutctrl[1] &= ~(3 << 6);
        switch (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK) {
        case CX25840_VCONFIG_CLKGATE_VALID:
                voutctrl[1] |= 2;
                break;

        case CX25840_VCONFIG_CLKGATE_VALIDACTIVE:
                voutctrl[1] |= 3;
                break;

        case CX25840_VCONFIG_CLKGATE_NONE:
                /* zero */
        default:
                break;
        }

        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_DCMODE_MASK, voutctrl,
                                2, 0, CX25840_VCONFIG_DCMODE_BYTES);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_IDID0S_MASK, voutctrl,
                                2, 1, CX25840_VCONFIG_IDID0S_LINECNT);
        CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VIPCLAMP_MASK, voutctrl,
                                2, 4, CX25840_VCONFIG_VIPCLAMP_ENABLED);

        for (i = 0; i < 3; i++)
                cx25840_write(client, 0x404 + i, voutctrl[i]);
}

static void cx25840_initialize(struct i2c_client *client)
{
        DEFINE_WAIT(wait);
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        struct workqueue_struct *q;

        /* datasheet startup in numbered steps, refer to page 3-77 */
        /* 2. */
        cx25840_and_or(client, 0x803, ~0x10, 0x00);
        /*
         * The default of this register should be 4, but I get 0 instead.
         * Set this register to 4 manually.
         */
        cx25840_write(client, 0x000, 0x04);
        /* 3. */
        init_dll1(client);
        init_dll2(client);
        cx25840_write(client, 0x136, 0x0a);
        /* 4. */
        cx25840_write(client, 0x13c, 0x01);
        cx25840_write(client, 0x13c, 0x00);
        /* 5. */
        /*
         * Do the firmware load in a work handler to prevent.
         * Otherwise the kernel is blocked waiting for the
         * bit-banging i2c interface to finish uploading the
         * firmware.
         */
        INIT_WORK(&state->fw_work, cx25840_work_handler);
        init_waitqueue_head(&state->fw_wait);
        q = create_singlethread_workqueue("cx25840_fw");
        if (q) {
                prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE);
                queue_work(q, &state->fw_work);
                schedule();
                finish_wait(&state->fw_wait, &wait);
                destroy_workqueue(q);
        }

        /* 6. */
        cx25840_write(client, 0x115, 0x8c);
        cx25840_write(client, 0x116, 0x07);
        cx25840_write(client, 0x118, 0x02);
        /* 7. */
        cx25840_write(client, 0x4a5, 0x80);
        cx25840_write(client, 0x4a5, 0x00);
        cx25840_write(client, 0x402, 0x00);
        /* 8. */
        cx25840_and_or(client, 0x401, ~0x18, 0);
        cx25840_and_or(client, 0x4a2, ~0x10, 0x10);
        /* steps 8c and 8d are done in change_input() */
        /* 10. */
        cx25840_write(client, 0x8d3, 0x1f);
        cx25840_write(client, 0x8e3, 0x03);

        cx25840_std_setup(client);

        /* trial and error says these are needed to get audio */
        cx25840_write(client, 0x914, 0xa0);
        cx25840_write(client, 0x918, 0xa0);
        cx25840_write(client, 0x919, 0x01);

        /* stereo preferred */
        cx25840_write(client, 0x809, 0x04);
        /* AC97 shift */
        cx25840_write(client, 0x8cf, 0x0f);

        /* (re)set input */
        set_input(client, state->vid_input, state->aud_input);

        if (state->generic_mode)
                cx25840_vconfig_apply(client);

        /* start microcontroller */
        cx25840_and_or(client, 0x803, ~0x10, 0x10);
}

static void cx23885_initialize(struct i2c_client *client)
{
        DEFINE_WAIT(wait);
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u32 clk_freq = 0;
        struct workqueue_struct *q;

        /* cx23885 sets hostdata to clk_freq pointer */
        if (v4l2_get_subdev_hostdata(&state->sd))
                clk_freq = *((u32 *)v4l2_get_subdev_hostdata(&state->sd));

        /*
         * Come out of digital power down
         * The CX23888, at least, needs this, otherwise registers aside from
         * 0x0-0x2 can't be read or written.
         */
        cx25840_write(client, 0x000, 0);

        /* Internal Reset */
        cx25840_and_or(client, 0x102, ~0x01, 0x01);
        cx25840_and_or(client, 0x102, ~0x01, 0x00);

        /* Stop microcontroller */
        cx25840_and_or(client, 0x803, ~0x10, 0x00);

        /* DIF in reset? */
        cx25840_write(client, 0x398, 0);

        /*
         * Trust the default xtal, no division
         * '885: 28.636363... MHz
         * '887: 25.000000 MHz
         * '888: 50.000000 MHz
         */
        cx25840_write(client, 0x2, 0x76);

        /* Power up all the PLL's and DLL */
        cx25840_write(client, 0x1, 0x40);

        /* Sys PLL */
        switch (state->id) {
        case CX23888_AV:
                /*
                 * 50.0 MHz * (0xb + 0xe8ba26/0x2000000)/4 = 5 * 28.636363 MHz
                 * 572.73 MHz before post divide
                 */
                if (clk_freq == 25000000) {
                        /* 888/ImpactVCBe or 25Mhz xtal */
                        ; /* nothing to do */
                } else {
                        /* HVR1850 or 50MHz xtal */
                        cx25840_write(client, 0x2, 0x71);
                }
                cx25840_write4(client, 0x11c, 0x01d1744c);
                cx25840_write4(client, 0x118, 0x00000416);
                cx25840_write4(client, 0x404, 0x0010253e);
                cx25840_write4(client, 0x42c, 0x42600000);
                cx25840_write4(client, 0x44c, 0x161f1000);
                break;
        case CX23887_AV:
                /*
                 * 25.0 MHz * (0x16 + 0x1d1744c/0x2000000)/4 = 5 * 28.636363 MHz
                 * 572.73 MHz before post divide
                 */
                cx25840_write4(client, 0x11c, 0x01d1744c);
                cx25840_write4(client, 0x118, 0x00000416);
                break;
        case CX23885_AV:
        default:
                /*
                 * 28.636363 MHz * (0x14 + 0x0/0x2000000)/4 = 5 * 28.636363 MHz
                 * 572.73 MHz before post divide
                 */
                cx25840_write4(client, 0x11c, 0x00000000);
                cx25840_write4(client, 0x118, 0x00000414);
                break;
        }

        /* Disable DIF bypass */
        cx25840_write4(client, 0x33c, 0x00000001);

        /* DIF Src phase inc */
        cx25840_write4(client, 0x340, 0x0df7df83);

        /*
         * Vid PLL
         * Setup for a BT.656 pixel clock of 13.5 Mpixels/second
         *
         * 28.636363 MHz * (0xf + 0x02be2c9/0x2000000)/4 = 8 * 13.5 MHz
         * 432.0 MHz before post divide
         */

        /* HVR1850 */
        switch (state->id) {
        case CX23888_AV:
                if (clk_freq == 25000000) {
                        /* 888/ImpactVCBe or 25MHz xtal */
                        cx25840_write4(client, 0x10c, 0x01b6db7b);
                        cx25840_write4(client, 0x108, 0x00000512);
                } else {
                        /* 888/HVR1250 or 50MHz xtal */
                        cx25840_write4(client, 0x10c, 0x13333333);
                        cx25840_write4(client, 0x108, 0x00000515);
                }
                break;
        default:
                cx25840_write4(client, 0x10c, 0x002be2c9);
                cx25840_write4(client, 0x108, 0x0000040f);
        }

        /* Luma */
        cx25840_write4(client, 0x414, 0x00107d12);

        /* Chroma */
        if (is_cx23888(state))
                cx25840_write4(client, 0x418, 0x1d008282);
        else
                cx25840_write4(client, 0x420, 0x3d008282);

        /*
         * Aux PLL
         * Initial setup for audio sample clock:
         * 48 ksps, 16 bits/sample, x160 multiplier = 122.88 MHz
         * Initial I2S output/master clock(?):
         * 48 ksps, 16 bits/sample, x16 multiplier = 12.288 MHz
         */
        switch (state->id) {
        case CX23888_AV:
                /*
                 * 50.0 MHz * (0x7 + 0x0bedfa4/0x2000000)/3 = 122.88 MHz
                 * 368.64 MHz before post divide
                 * 122.88 MHz / 0xa = 12.288 MHz
                 */
                /* HVR1850 or 50MHz xtal or 25MHz xtal */
                cx25840_write4(client, 0x114, 0x017dbf48);
                cx25840_write4(client, 0x110, 0x000a030e);
                break;
        case CX23887_AV:
                /*
                 * 25.0 MHz * (0xe + 0x17dbf48/0x2000000)/3 = 122.88 MHz
                 * 368.64 MHz before post divide
                 * 122.88 MHz / 0xa = 12.288 MHz
                 */
                cx25840_write4(client, 0x114, 0x017dbf48);
                cx25840_write4(client, 0x110, 0x000a030e);
                break;
        case CX23885_AV:
        default:
                /*
                 * 28.636363 MHz * (0xc + 0x1bf0c9e/0x2000000)/3 = 122.88 MHz
                 * 368.64 MHz before post divide
                 * 122.88 MHz / 0xa = 12.288 MHz
                 */
                cx25840_write4(client, 0x114, 0x01bf0c9e);
                cx25840_write4(client, 0x110, 0x000a030c);
                break;
        }

        /* ADC2 input select */
        cx25840_write(client, 0x102, 0x10);

        /* VIN1 & VIN5 */
        cx25840_write(client, 0x103, 0x11);

        /* Enable format auto detect */
        cx25840_write(client, 0x400, 0);
        /* Fast subchroma lock */
        /* White crush, Chroma AGC & Chroma Killer enabled */
        cx25840_write(client, 0x401, 0xe8);

        /* Select AFE clock pad output source */
        cx25840_write(client, 0x144, 0x05);

        /* Drive GPIO2 direction and values for HVR1700
         * where an onboard mux selects the output of demodulator
         * vs the 417. Failure to set this results in no DTV.
         * It's safe to set this across all Hauppauge boards
         * currently, regardless of the board type.
         */
        cx25840_write(client, 0x160, 0x1d);
        cx25840_write(client, 0x164, 0x00);

        /*
         * Do the firmware load in a work handler to prevent.
         * Otherwise the kernel is blocked waiting for the
         * bit-banging i2c interface to finish uploading the
         * firmware.
         */
        INIT_WORK(&state->fw_work, cx25840_work_handler);
        init_waitqueue_head(&state->fw_wait);
        q = create_singlethread_workqueue("cx25840_fw");
        if (q) {
                prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE);
                queue_work(q, &state->fw_work);
                schedule();
                finish_wait(&state->fw_wait, &wait);
                destroy_workqueue(q);
        }

        /*
         * Call the cx23888 specific std setup func, we no longer rely on
         * the generic cx24840 func.
         */
        if (is_cx23888(state))
                cx23888_std_setup(client);
        else
                cx25840_std_setup(client);

        /* (re)set input */
        set_input(client, state->vid_input, state->aud_input);

        /* start microcontroller */
        cx25840_and_or(client, 0x803, ~0x10, 0x10);

        /* Disable and clear video interrupts - we don't use them */
        cx25840_write4(client, CX25840_VID_INT_STAT_REG, 0xffffffff);

        /* Disable and clear audio interrupts - we don't use them */
        cx25840_write(client, CX25840_AUD_INT_CTRL_REG, 0xff);
        cx25840_write(client, CX25840_AUD_INT_STAT_REG, 0xff);

        /* CC raw enable */

        /*
         *  - VIP 1.1 control codes - 10bit, blue field enable.
         *  - enable raw data during vertical blanking.
         *  - enable ancillary Data insertion for 656 or VIP.
         */
        cx25840_write4(client, 0x404, 0x0010253e);

        /* CC on  - VBI_LINE_CTRL3, FLD_VBI_MD_LINE12 */
        cx25840_write(client, state->vbi_regs_offset + 0x42f, 0x66);

        /* HVR-1250 / HVR1850 DIF related */
        /* Power everything up */
        cx25840_write4(client, 0x130, 0x0);

        /* SRC_COMB_CFG */
        if (is_cx23888(state))
                cx25840_write4(client, 0x454, 0x6628021F);
        else
                cx25840_write4(client, 0x478, 0x6628021F);

        /* AFE_CLK_OUT_CTRL - Select the clock output source as output */
        cx25840_write4(client, 0x144, 0x5);

        /* I2C_OUT_CTL - I2S output configuration as
         * Master, Sony, Left justified, left sample on WS=1
         */
        cx25840_write4(client, 0x918, 0x1a0);

        /* AFE_DIAG_CTRL1 */
        cx25840_write4(client, 0x134, 0x000a1800);

        /* AFE_DIAG_CTRL3 - Inverted Polarity for Audio and Video */
        cx25840_write4(client, 0x13c, 0x00310000);
}

/* ----------------------------------------------------------------------- */

static void cx231xx_initialize(struct i2c_client *client)
{
        DEFINE_WAIT(wait);
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        struct workqueue_struct *q;

        /* Internal Reset */
        cx25840_and_or(client, 0x102, ~0x01, 0x01);
        cx25840_and_or(client, 0x102, ~0x01, 0x00);

        /* Stop microcontroller */
        cx25840_and_or(client, 0x803, ~0x10, 0x00);

        /* DIF in reset? */
        cx25840_write(client, 0x398, 0);

        /* Trust the default xtal, no division */
        /* This changes for the cx23888 products */
        cx25840_write(client, 0x2, 0x76);

        /* Bring down the regulator for AUX clk */
        cx25840_write(client, 0x1, 0x40);

        /* Disable DIF bypass */
        cx25840_write4(client, 0x33c, 0x00000001);

        /* DIF Src phase inc */
        cx25840_write4(client, 0x340, 0x0df7df83);

        /* Luma */
        cx25840_write4(client, 0x414, 0x00107d12);

        /* Chroma */
        cx25840_write4(client, 0x420, 0x3d008282);

        /* ADC2 input select */
        cx25840_write(client, 0x102, 0x10);

        /* VIN1 & VIN5 */
        cx25840_write(client, 0x103, 0x11);

        /* Enable format auto detect */
        cx25840_write(client, 0x400, 0);
        /* Fast subchroma lock */
        /* White crush, Chroma AGC & Chroma Killer enabled */
        cx25840_write(client, 0x401, 0xe8);

        /*
         * Do the firmware load in a work handler to prevent.
         * Otherwise the kernel is blocked waiting for the
         * bit-banging i2c interface to finish uploading the
         * firmware.
         */
        INIT_WORK(&state->fw_work, cx25840_work_handler);
        init_waitqueue_head(&state->fw_wait);
        q = create_singlethread_workqueue("cx25840_fw");
        if (q) {
                prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE);
                queue_work(q, &state->fw_work);
                schedule();
                finish_wait(&state->fw_wait, &wait);
                destroy_workqueue(q);
        }

        cx25840_std_setup(client);

        /* (re)set input */
        set_input(client, state->vid_input, state->aud_input);

        /* start microcontroller */
        cx25840_and_or(client, 0x803, ~0x10, 0x10);

        /* CC raw enable */
        cx25840_write(client, 0x404, 0x0b);

        /* CC on */
        cx25840_write(client, 0x42f, 0x66);
        cx25840_write4(client, 0x474, 0x1e1e601a);
}

/* ----------------------------------------------------------------------- */

void cx25840_std_setup(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        v4l2_std_id std = state->std;
        int hblank, hactive, burst, vblank, vactive, sc;
        int vblank656, src_decimation;
        int luma_lpf, uv_lpf, comb;
        u32 pll_int, pll_frac, pll_post;

        /* datasheet startup, step 8d */
        if (std & ~V4L2_STD_NTSC)
                cx25840_write(client, 0x49f, 0x11);
        else
                cx25840_write(client, 0x49f, 0x14);

        /* generic mode uses the values that the chip autoconfig would set */
        if (std & V4L2_STD_625_50) {
                hblank = 132;
                hactive = 720;
                burst = 93;
                if (state->generic_mode) {
                        vblank = 34;
                        vactive = 576;
                        vblank656 = 38;
                } else {
                        vblank = 36;
                        vactive = 580;
                        vblank656 = 40;
                }
                src_decimation = 0x21f;
                luma_lpf = 2;

                if (std & V4L2_STD_SECAM) {
                        uv_lpf = 0;
                        comb = 0;
                        sc = 0x0a425f;
                } else if (std == V4L2_STD_PAL_Nc) {
                        if (state->generic_mode) {
                                burst = 95;
                                luma_lpf = 1;
                        }
                        uv_lpf = 1;
                        comb = 0x20;
                        sc = 556453;
                } else {
                        uv_lpf = 1;
                        comb = 0x20;
                        sc = 688739;
                }
        } else {
                hactive = 720;
                hblank = 122;
                vactive = 487;
                luma_lpf = 1;
                uv_lpf = 1;
                if (state->generic_mode) {
                        vblank = 20;
                        vblank656 = 24;
                }

                src_decimation = 0x21f;
                if (std == V4L2_STD_PAL_60) {
                        if (!state->generic_mode) {
                                vblank = 26;
                                vblank656 = 26;
                                burst = 0x5b;
                        } else {
                                burst = 0x59;
                        }
                        luma_lpf = 2;
                        comb = 0x20;
                        sc = 688739;
                } else if (std == V4L2_STD_PAL_M) {
                        vblank = 20;
                        vblank656 = 24;
                        burst = 0x61;
                        comb = 0x20;
                        sc = 555452;
                } else {
                        if (!state->generic_mode) {
                                vblank = 26;
                                vblank656 = 26;
                        }
                        burst = 0x5b;
                        comb = 0x66;
                        sc = 556063;
                }
        }

        /* DEBUG: Displays configured PLL frequency */
        if (!is_cx231xx(state)) {
                pll_int = cx25840_read(client, 0x108);
                pll_frac = cx25840_read4(client, 0x10c) & 0x1ffffff;
                pll_post = cx25840_read(client, 0x109);
                v4l_dbg(1, cx25840_debug, client,
                        "PLL regs = int: %u, frac: %u, post: %u\n",
                        pll_int, pll_frac, pll_post);

                if (pll_post) {
                        int fin, fsc;
                        int pll = (28636363L * ((((u64)pll_int) << 25L) + pll_frac)) >> 25L;

                        pll /= pll_post;
                        v4l_dbg(1, cx25840_debug, client,
                                "PLL = %d.%06d MHz\n",
                                pll / 1000000, pll % 1000000);
                        v4l_dbg(1, cx25840_debug, client,
                                "PLL/8 = %d.%06d MHz\n",
                                pll / 8000000, (pll / 8) % 1000000);

                        fin = ((u64)src_decimation * pll) >> 12;
                        v4l_dbg(1, cx25840_debug, client,
                                "ADC Sampling freq = %d.%06d MHz\n",
                                fin / 1000000, fin % 1000000);

                        fsc = (((u64)sc) * pll) >> 24L;
                        v4l_dbg(1, cx25840_debug, client,
                                "Chroma sub-carrier freq = %d.%06d MHz\n",
                                fsc / 1000000, fsc % 1000000);

                        v4l_dbg(1, cx25840_debug, client,
                                "hblank %i, hactive %i, vblank %i, vactive %i, vblank656 %i, src_dec %i, burst 0x%02x, luma_lpf %i, uv_lpf %i, comb 0x%02x, sc 0x%06x\n",
                                hblank, hactive, vblank, vactive, vblank656,
                                src_decimation, burst, luma_lpf, uv_lpf,
                                comb, sc);
                }
        }

        /* Sets horizontal blanking delay and active lines */
        cx25840_write(client, 0x470, hblank);
        cx25840_write(client, 0x471,
                      (((hblank >> 8) & 0x3) | (hactive << 4)) & 0xff);
        cx25840_write(client, 0x472, hactive >> 4);

        /* Sets burst gate delay */
        cx25840_write(client, 0x473, burst);

        /* Sets vertical blanking delay and active duration */
        cx25840_write(client, 0x474, vblank);
        cx25840_write(client, 0x475,
                      (((vblank >> 8) & 0x3) | (vactive << 4)) & 0xff);
        cx25840_write(client, 0x476, vactive >> 4);
        cx25840_write(client, 0x477, vblank656);

        /* Sets src decimation rate */
        cx25840_write(client, 0x478, src_decimation & 0xff);
        cx25840_write(client, 0x479, (src_decimation >> 8) & 0xff);

        /* Sets Luma and UV Low pass filters */
        cx25840_write(client, 0x47a, luma_lpf << 6 | ((uv_lpf << 4) & 0x30));

        /* Enables comb filters */
        cx25840_write(client, 0x47b, comb);

        /* Sets SC Step*/
        cx25840_write(client, 0x47c, sc);
        cx25840_write(client, 0x47d, (sc >> 8) & 0xff);
        cx25840_write(client, 0x47e, (sc >> 16) & 0xff);

        /* Sets VBI parameters */
        if (std & V4L2_STD_625_50) {
                cx25840_write(client, 0x47f, 0x01);
                state->vbi_line_offset = 5;
        } else {
                cx25840_write(client, 0x47f, 0x00);
                state->vbi_line_offset = 8;
        }
}

/* ----------------------------------------------------------------------- */

static void input_change(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        v4l2_std_id std = state->std;

        /* Follow step 8c and 8d of section 3.16 in the cx25840 datasheet */
        if (std & V4L2_STD_SECAM) {
                cx25840_write(client, 0x402, 0);
        } else {
                cx25840_write(client, 0x402, 0x04);
                cx25840_write(client, 0x49f,
                              (std & V4L2_STD_NTSC) ? 0x14 : 0x11);
        }
        cx25840_and_or(client, 0x401, ~0x60, 0);
        cx25840_and_or(client, 0x401, ~0x60, 0x60);

        /* Don't write into audio registers on cx2583x chips */
        if (is_cx2583x(state))
                return;

        cx25840_and_or(client, 0x810, ~0x01, 1);

        if (state->radio) {
                cx25840_write(client, 0x808, 0xf9);
                cx25840_write(client, 0x80b, 0x00);
        } else if (std & V4L2_STD_525_60) {
                /*
                 * Certain Hauppauge PVR150 models have a hardware bug
                 * that causes audio to drop out. For these models the
                 * audio standard must be set explicitly.
                 * To be precise: it affects cards with tuner models
                 * 85, 99 and 112 (model numbers from tveeprom).
                 */
                int hw_fix = state->pvr150_workaround;

                if (std == V4L2_STD_NTSC_M_JP) {
                        /* Japan uses EIAJ audio standard */
                        cx25840_write(client, 0x808, hw_fix ? 0x2f : 0xf7);
                } else if (std == V4L2_STD_NTSC_M_KR) {
                        /* South Korea uses A2 audio standard */
                        cx25840_write(client, 0x808, hw_fix ? 0x3f : 0xf8);
                } else {
                        /* Others use the BTSC audio standard */
                        cx25840_write(client, 0x808, hw_fix ? 0x1f : 0xf6);
                }
                cx25840_write(client, 0x80b, 0x00);
        } else if (std & V4L2_STD_PAL) {
                /* Autodetect audio standard and audio system */
                cx25840_write(client, 0x808, 0xff);
                /*
                 * Since system PAL-L is pretty much non-existent and
                 * not used by any public broadcast network, force
                 * 6.5 MHz carrier to be interpreted as System DK,
                 * this avoids DK audio detection instability
                 */
                cx25840_write(client, 0x80b, 0x00);
        } else if (std & V4L2_STD_SECAM) {
                /* Autodetect audio standard and audio system */
                cx25840_write(client, 0x808, 0xff);
                /*
                 * If only one of SECAM-DK / SECAM-L is required, then force
                 * 6.5MHz carrier, else autodetect it
                 */
                if ((std & V4L2_STD_SECAM_DK) &&
                    !(std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) {
                        /* 6.5 MHz carrier to be interpreted as System DK */
                        cx25840_write(client, 0x80b, 0x00);
                } else if (!(std & V4L2_STD_SECAM_DK) &&
                           (std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) {
                        /* 6.5 MHz carrier to be interpreted as System L */
                        cx25840_write(client, 0x80b, 0x08);
                } else {
                        /* 6.5 MHz carrier to be autodetected */
                        cx25840_write(client, 0x80b, 0x10);
                }
        }

        cx25840_and_or(client, 0x810, ~0x01, 0);
}

static int set_input(struct i2c_client *client,
                     enum cx25840_video_input vid_input,
                     enum cx25840_audio_input aud_input)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u8 is_composite = (vid_input >= CX25840_COMPOSITE1 &&
                           vid_input <= CX25840_COMPOSITE8);
        u8 is_component = (vid_input & CX25840_COMPONENT_ON) ==
                        CX25840_COMPONENT_ON;
        u8 is_dif = (vid_input & CX25840_DIF_ON) ==
                        CX25840_DIF_ON;
        u8 is_svideo = (vid_input & CX25840_SVIDEO_ON) ==
                        CX25840_SVIDEO_ON;
        int luma = vid_input & 0xf0;
        int chroma = vid_input & 0xf00;
        u8 reg;
        u32 val;

        v4l_dbg(1, cx25840_debug, client,
                "decoder set video input %d, audio input %d\n",
                vid_input, aud_input);

        if (vid_input >= CX25840_VIN1_CH1) {
                v4l_dbg(1, cx25840_debug, client, "vid_input 0x%x\n",
                        vid_input);
                reg = vid_input & 0xff;
                is_composite = !is_component &&
                               ((vid_input & CX25840_SVIDEO_ON) != CX25840_SVIDEO_ON);

                v4l_dbg(1, cx25840_debug, client, "mux cfg 0x%x comp=%d\n",
                        reg, is_composite);
        } else if (is_composite) {
                reg = 0xf0 + (vid_input - CX25840_COMPOSITE1);
        } else {
                if ((vid_input & ~0xff0) ||
                    luma < CX25840_SVIDEO_LUMA1 ||
                    luma > CX25840_SVIDEO_LUMA8 ||
                    chroma < CX25840_SVIDEO_CHROMA4 ||
                    chroma > CX25840_SVIDEO_CHROMA8) {
                        v4l_err(client, "0x%04x is not a valid video input!\n",
                                vid_input);
                        return -EINVAL;
                }
                reg = 0xf0 + ((luma - CX25840_SVIDEO_LUMA1) >> 4);
                if (chroma >= CX25840_SVIDEO_CHROMA7) {
                        reg &= 0x3f;
                        reg |= (chroma - CX25840_SVIDEO_CHROMA7) >> 2;
                } else {
                        reg &= 0xcf;
                        reg |= (chroma - CX25840_SVIDEO_CHROMA4) >> 4;
                }
        }

        /* The caller has previously prepared the correct routing
         * configuration in reg (for the cx23885) so we have no
         * need to attempt to flip bits for earlier av decoders.
         */
        if (!is_cx2388x(state) && !is_cx231xx(state)) {
                switch (aud_input) {
                case CX25840_AUDIO_SERIAL:
                        /* do nothing, use serial audio input */
                        break;
                case CX25840_AUDIO4:
                        reg &= ~0x30;
                        break;
                case CX25840_AUDIO5:
                        reg &= ~0x30;
                        reg |= 0x10;
                        break;
                case CX25840_AUDIO6:
                        reg &= ~0x30;
                        reg |= 0x20;
                        break;
                case CX25840_AUDIO7:
                        reg &= ~0xc0;
                        break;
                case CX25840_AUDIO8:
                        reg &= ~0xc0;
                        reg |= 0x40;
                        break;
                default:
                        v4l_err(client, "0x%04x is not a valid audio input!\n",
                                aud_input);
                        return -EINVAL;
                }
        }

        cx25840_write(client, 0x103, reg);

        /* Set INPUT_MODE to Composite, S-Video or Component */
        if (is_component)
                cx25840_and_or(client, 0x401, ~0x6, 0x6);
        else
                cx25840_and_or(client, 0x401, ~0x6, is_composite ? 0 : 0x02);

        if (is_cx2388x(state)) {
                /* Enable or disable the DIF for tuner use */
                if (is_dif) {
                        cx25840_and_or(client, 0x102, ~0x80, 0x80);

                        /* Set of defaults for NTSC and PAL */
                        cx25840_write4(client, 0x31c, 0xc2262600);
                        cx25840_write4(client, 0x320, 0xc2262600);

                        /* 18271 IF - Nobody else yet uses a different
                         * tuner with the DIF, so these are reasonable
                         * assumptions (HVR1250 and HVR1850 specific).
                         */
                        cx25840_write4(client, 0x318, 0xda262600);
                        cx25840_write4(client, 0x33c, 0x2a24c800);
                        cx25840_write4(client, 0x104, 0x0704dd00);
                } else {
                        cx25840_write4(client, 0x300, 0x015c28f5);

                        cx25840_and_or(client, 0x102, ~0x80, 0);
                        cx25840_write4(client, 0x340, 0xdf7df83);
                        cx25840_write4(client, 0x104, 0x0704dd80);
                        cx25840_write4(client, 0x314, 0x22400600);
                        cx25840_write4(client, 0x318, 0x40002600);
                        cx25840_write4(client, 0x324, 0x40002600);
                        cx25840_write4(client, 0x32c, 0x0250e620);
                        cx25840_write4(client, 0x39c, 0x01FF0B00);

                        cx25840_write4(client, 0x410, 0xffff0dbf);
                        cx25840_write4(client, 0x414, 0x00137d03);

                        if (is_cx23888(state)) {
                                /* 888 MISC_TIM_CTRL */
                                cx25840_write4(client, 0x42c, 0x42600000);
                                /* 888 FIELD_COUNT */
                                cx25840_write4(client, 0x430, 0x0000039b);
                                /* 888 VSCALE_CTRL */
                                cx25840_write4(client, 0x438, 0x00000000);
                                /* 888 DFE_CTRL1 */
                                cx25840_write4(client, 0x440, 0xF8E3E824);
                                /* 888 DFE_CTRL2 */
                                cx25840_write4(client, 0x444, 0x401040dc);
                                /* 888 DFE_CTRL3 */
                                cx25840_write4(client, 0x448, 0xcd3f02a0);
                                /* 888 PLL_CTRL */
                                cx25840_write4(client, 0x44c, 0x161f1000);
                                /* 888 HTL_CTRL */
                                cx25840_write4(client, 0x450, 0x00000802);
                        }
                        cx25840_write4(client, 0x91c, 0x01000000);
                        cx25840_write4(client, 0x8e0, 0x03063870);
                        cx25840_write4(client, 0x8d4, 0x7FFF0024);
                        cx25840_write4(client, 0x8d0, 0x00063073);

                        cx25840_write4(client, 0x8c8, 0x00010000);
                        cx25840_write4(client, 0x8cc, 0x00080023);

                        /* DIF BYPASS */
                        cx25840_write4(client, 0x33c, 0x2a04c800);
                }

                /* Reset the DIF */
                cx25840_write4(client, 0x398, 0);
        }

        if (!is_cx2388x(state) && !is_cx231xx(state)) {
                /* Set CH_SEL_ADC2 to 1 if input comes from CH3 */
                cx25840_and_or(client, 0x102, ~0x2, (reg & 0x80) == 0 ? 2 : 0);
                /* Set DUAL_MODE_ADC2 to 1 if input comes from both CH2&CH3 */
                if ((reg & 0xc0) != 0xc0 && (reg & 0x30) != 0x30)
                        cx25840_and_or(client, 0x102, ~0x4, 4);
                else
                        cx25840_and_or(client, 0x102, ~0x4, 0);
        } else {
                /* Set DUAL_MODE_ADC2 to 1 if component*/
                cx25840_and_or(client, 0x102, ~0x4, is_component ? 0x4 : 0x0);
                if (is_composite) {
                        /* ADC2 input select channel 2 */
                        cx25840_and_or(client, 0x102, ~0x2, 0);
                } else if (!is_component) {
                        /* S-Video */
                        if (chroma >= CX25840_SVIDEO_CHROMA7) {
                                /* ADC2 input select channel 3 */
                                cx25840_and_or(client, 0x102, ~0x2, 2);
                        } else {
                                /* ADC2 input select channel 2 */
                                cx25840_and_or(client, 0x102, ~0x2, 0);
                        }
                }

                /* cx23885 / SVIDEO */
                if (is_cx2388x(state) && is_svideo) {
#define AFE_CTRL  (0x104)
#define MODE_CTRL (0x400)
                        cx25840_and_or(client, 0x102, ~0x2, 0x2);

                        val = cx25840_read4(client, MODE_CTRL);
                        val &= 0xFFFFF9FF;

                        /* YC */
                        val |= 0x00000200;
                        val &= ~0x2000;
                        cx25840_write4(client, MODE_CTRL, val);

                        val = cx25840_read4(client, AFE_CTRL);

                        /* Chroma in select */
                        val |= 0x00001000;
                        val &= 0xfffffe7f;
                        /* Clear VGA_SEL_CH2 and VGA_SEL_CH3 (bits 7 and 8).
                         * This sets them to use video rather than audio.
                         * Only one of the two will be in use.
                         */
                        cx25840_write4(client, AFE_CTRL, val);
                } else {
                        cx25840_and_or(client, 0x102, ~0x2, 0);
                }
        }

        state->vid_input = vid_input;
        state->aud_input = aud_input;
        cx25840_audio_set_path(client);
        input_change(client);

        if (is_cx2388x(state)) {
                /* Audio channel 1 src : Parallel 1 */
                cx25840_write(client, 0x124, 0x03);

                /* Select AFE clock pad output source */
                cx25840_write(client, 0x144, 0x05);

                /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */
                cx25840_write(client, 0x914, 0xa0);

                /* I2S_OUT_CTL:
                 * I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1
                 * I2S_OUT_MASTER_MODE = Master
                 */
                cx25840_write(client, 0x918, 0xa0);
                cx25840_write(client, 0x919, 0x01);
        } else if (is_cx231xx(state)) {
                /* Audio channel 1 src : Parallel 1 */
                cx25840_write(client, 0x124, 0x03);

                /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */
                cx25840_write(client, 0x914, 0xa0);

                /* I2S_OUT_CTL:
                 * I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1
                 * I2S_OUT_MASTER_MODE = Master
                 */
                cx25840_write(client, 0x918, 0xa0);
                cx25840_write(client, 0x919, 0x01);
        }

        if (is_cx2388x(state) &&
            ((aud_input == CX25840_AUDIO7) || (aud_input == CX25840_AUDIO6))) {
                /* Configure audio from LR1 or LR2 input */
                cx25840_write4(client, 0x910, 0);
                cx25840_write4(client, 0x8d0, 0x63073);
        } else if (is_cx2388x(state) && (aud_input == CX25840_AUDIO8)) {
                /* Configure audio from tuner/sif input */
                cx25840_write4(client, 0x910, 0x12b000c9);
                cx25840_write4(client, 0x8d0, 0x1f063870);
        }

        if (is_cx23888(state)) {
                /*
                 * HVR1850
                 *
                 * AUD_IO_CTRL - I2S Input, Parallel1
                 *  - Channel 1 src - Parallel1 (Merlin out)
                 *  - Channel 2 src - Parallel2 (Merlin out)
                 *  - Channel 3 src - Parallel3 (Merlin AC97 out)
                 *  - I2S source and dir - Merlin, output
                 */
                cx25840_write4(client, 0x124, 0x100);

                if (!is_dif) {
                        /*
                         * Stop microcontroller if we don't need it
                         * to avoid audio popping on svideo/composite use.
                         */
                        cx25840_and_or(client, 0x803, ~0x10, 0x00);
                }
        }

        return 0;
}

/* ----------------------------------------------------------------------- */

static int set_v4lstd(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u8 fmt = 0;        /* zero is autodetect */
        u8 pal_m = 0;

        /* First tests should be against specific std */
        if (state->std == V4L2_STD_NTSC_M_JP) {
                fmt = 0x2;
        } else if (state->std == V4L2_STD_NTSC_443) {
                fmt = 0x3;
        } else if (state->std == V4L2_STD_PAL_M) {
                pal_m = 1;
                fmt = 0x5;
        } else if (state->std == V4L2_STD_PAL_N) {
                fmt = 0x6;
        } else if (state->std == V4L2_STD_PAL_Nc) {
                fmt = 0x7;
        } else if (state->std == V4L2_STD_PAL_60) {
                fmt = 0x8;
        } else {
                /* Then, test against generic ones */
                if (state->std & V4L2_STD_NTSC)
                        fmt = 0x1;
                else if (state->std & V4L2_STD_PAL)
                        fmt = 0x4;
                else if (state->std & V4L2_STD_SECAM)
                        fmt = 0xc;
        }

        v4l_dbg(1, cx25840_debug, client,
                "changing video std to fmt %i\n", fmt);

        /*
         * Follow step 9 of section 3.16 in the cx25840 datasheet.
         * Without this PAL may display a vertical ghosting effect.
         * This happens for example with the Yuan MPC622.
         */
        if (fmt >= 4 && fmt < 8) {
                /* Set format to NTSC-M */
                cx25840_and_or(client, 0x400, ~0xf, 1);
                /* Turn off LCOMB */
                cx25840_and_or(client, 0x47b, ~6, 0);
        }
        cx25840_and_or(client, 0x400, ~0xf, fmt);
        cx25840_and_or(client, 0x403, ~0x3, pal_m);
        if (is_cx23888(state))
                cx23888_std_setup(client);
        else
                cx25840_std_setup(client);
        if (!is_cx2583x(state))
                input_change(client);
        return 0;
}

/* ----------------------------------------------------------------------- */

static int cx25840_s_ctrl(struct v4l2_ctrl *ctrl)
{
        struct v4l2_subdev *sd = to_sd(ctrl);
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        switch (ctrl->id) {
        case V4L2_CID_BRIGHTNESS:
                cx25840_write(client, 0x414, ctrl->val - 128);
                break;

        case V4L2_CID_CONTRAST:
                cx25840_write(client, 0x415, ctrl->val << 1);
                break;

        case V4L2_CID_SATURATION:
                if (is_cx23888(state)) {
                        cx25840_write(client, 0x418, ctrl->val << 1);
                        cx25840_write(client, 0x419, ctrl->val << 1);
                } else {
                        cx25840_write(client, 0x420, ctrl->val << 1);
                        cx25840_write(client, 0x421, ctrl->val << 1);
                }
                break;

        case V4L2_CID_HUE:
                if (is_cx23888(state))
                        cx25840_write(client, 0x41a, ctrl->val);
                else
                        cx25840_write(client, 0x422, ctrl->val);
                break;

        default:
                return -EINVAL;
        }

        return 0;
}

/* ----------------------------------------------------------------------- */

static int cx25840_set_fmt(struct v4l2_subdev *sd,
                           struct v4l2_subdev_state *sd_state,
                           struct v4l2_subdev_format *format)
{
        struct v4l2_mbus_framefmt *fmt = &format->format;
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        u32 hsc, vsc, v_src, h_src, v_add;
        int filter;
        int is_50hz = !(state->std & V4L2_STD_525_60);

        if (format->pad || fmt->code != MEDIA_BUS_FMT_FIXED)
                return -EINVAL;

        fmt->field = V4L2_FIELD_INTERLACED;
        fmt->colorspace = V4L2_COLORSPACE_SMPTE170M;

        if (is_cx23888(state)) {
                v_src = (cx25840_read(client, 0x42a) & 0x3f) << 4;
                v_src |= (cx25840_read(client, 0x429) & 0xf0) >> 4;
        } else {
                v_src = (cx25840_read(client, 0x476) & 0x3f) << 4;
                v_src |= (cx25840_read(client, 0x475) & 0xf0) >> 4;
        }

        if (is_cx23888(state)) {
                h_src = (cx25840_read(client, 0x426) & 0x3f) << 4;
                h_src |= (cx25840_read(client, 0x425) & 0xf0) >> 4;
        } else {
                h_src = (cx25840_read(client, 0x472) & 0x3f) << 4;
                h_src |= (cx25840_read(client, 0x471) & 0xf0) >> 4;
        }

        if (!state->generic_mode) {
                v_add = is_50hz ? 4 : 7;

                /*
                 * cx23888 in 525-line mode is programmed for 486 active lines
                 * while other chips use 487 active lines.
                 *
                 * See reg 0x428 bits [21:12] in cx23888_std_setup() vs
                 * vactive in cx25840_std_setup().
                 */
                if (is_cx23888(state) && !is_50hz)
                        v_add--;
        } else {
                v_add = 0;
        }

        if (h_src == 0 ||
            v_src <= v_add) {
                v4l_err(client,
                        "chip reported picture size (%u x %u) is far too small\n",
                        (unsigned int)h_src, (unsigned int)v_src);
                /*
                 * that's the best we can do since the output picture
                 * size is completely unknown in this case
                 */
                return -EINVAL;
        }

        fmt->width = clamp(fmt->width, (h_src + 15) / 16, h_src);

        if (v_add * 8 >= v_src)
                fmt->height = clamp(fmt->height, (u32)1, v_src - v_add);
        else
                fmt->height = clamp(fmt->height, (v_src - v_add * 8 + 7) / 8,
                                    v_src - v_add);

        if (format->which == V4L2_SUBDEV_FORMAT_TRY)
                return 0;

        hsc = (h_src * (1 << 20)) / fmt->width - (1 << 20);
        vsc = (1 << 16) - (v_src * (1 << 9) / (fmt->height + v_add) - (1 << 9));
        vsc &= 0x1fff;

        if (fmt->width >= 385)
                filter = 0;
        else if (fmt->width > 192)
                filter = 1;
        else if (fmt->width > 96)
                filter = 2;
        else
                filter = 3;

        v4l_dbg(1, cx25840_debug, client,
                "decoder set size %u x %u with scale %x x %x\n",
                (unsigned int)fmt->width, (unsigned int)fmt->height,
                (unsigned int)hsc, (unsigned int)vsc);

        /* HSCALE=hsc */
        if (is_cx23888(state)) {
                cx25840_write4(client, 0x434, hsc | (1 << 24));
                /* VSCALE=vsc VS_INTRLACE=1 VFILT=filter */
                cx25840_write4(client, 0x438, vsc | (1 << 19) | (filter << 16));
        } else {
                cx25840_write(client, 0x418, hsc & 0xff);
                cx25840_write(client, 0x419, (hsc >> 8) & 0xff);
                cx25840_write(client, 0x41a, hsc >> 16);
                /* VSCALE=vsc */
                cx25840_write(client, 0x41c, vsc & 0xff);
                cx25840_write(client, 0x41d, vsc >> 8);
                /* VS_INTRLACE=1 VFILT=filter */
                cx25840_write(client, 0x41e, 0x8 | filter);
        }
        return 0;
}

/* ----------------------------------------------------------------------- */

static void log_video_status(struct i2c_client *client)
{
        static const char *const fmt_strs[] = {
                "0x0",
                "NTSC-M", "NTSC-J", "NTSC-4.43",
                "PAL-BDGHI", "PAL-M", "PAL-N", "PAL-Nc", "PAL-60",
                "0x9", "0xA", "0xB",
                "SECAM",
                "0xD", "0xE", "0xF"
        };

        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u8 vidfmt_sel = cx25840_read(client, 0x400) & 0xf;
        u8 gen_stat1 = cx25840_read(client, 0x40d);
        u8 gen_stat2 = cx25840_read(client, 0x40e);
        int vid_input = state->vid_input;

        v4l_info(client, "Video signal:              %spresent\n",
                 (gen_stat2 & 0x20) ? "" : "not ");
        v4l_info(client, "Detected format:           %s\n",
                 fmt_strs[gen_stat1 & 0xf]);

        v4l_info(client, "Specified standard:        %s\n",
                 vidfmt_sel ? fmt_strs[vidfmt_sel] : "automatic detection");

        if (vid_input >= CX25840_COMPOSITE1 &&
            vid_input <= CX25840_COMPOSITE8) {
                v4l_info(client, "Specified video input:     Composite %d\n",
                         vid_input - CX25840_COMPOSITE1 + 1);
        } else {
                v4l_info(client,
                         "Specified video input:     S-Video (Luma In%d, Chroma In%d)\n",
                         (vid_input & 0xf0) >> 4, (vid_input & 0xf00) >> 8);
        }

        v4l_info(client, "Specified audioclock freq: %d Hz\n",
                 state->audclk_freq);
}

/* ----------------------------------------------------------------------- */

static void log_audio_status(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        u8 download_ctl = cx25840_read(client, 0x803);
        u8 mod_det_stat0 = cx25840_read(client, 0x804);
        u8 mod_det_stat1 = cx25840_read(client, 0x805);
        u8 audio_config = cx25840_read(client, 0x808);
        u8 pref_mode = cx25840_read(client, 0x809);
        u8 afc0 = cx25840_read(client, 0x80b);
        u8 mute_ctl = cx25840_read(client, 0x8d3);
        int aud_input = state->aud_input;
        char *p;

        switch (mod_det_stat0) {
        case 0x00:
                p = "mono";
                break;
        case 0x01:
                p = "stereo";
                break;
        case 0x02:
                p = "dual";
                break;
        case 0x04:
                p = "tri";
                break;
        case 0x10:
                p = "mono with SAP";
                break;
        case 0x11:
                p = "stereo with SAP";
                break;
        case 0x12:
                p = "dual with SAP";
                break;
        case 0x14:
                p = "tri with SAP";
                break;
        case 0xfe:
                p = "forced mode";
                break;
        default:
                p = "not defined";
        }
        v4l_info(client, "Detected audio mode:       %s\n", p);

        switch (mod_det_stat1) {
        case 0x00:
                p = "not defined";
                break;
        case 0x01:
                p = "EIAJ";
                break;
        case 0x02:
                p = "A2-M";
                break;
        case 0x03:
                p = "A2-BG";
                break;
        case 0x04:
                p = "A2-DK1";
                break;
        case 0x05:
                p = "A2-DK2";
                break;
        case 0x06:
                p = "A2-DK3";
                break;
        case 0x07:
                p = "A1 (6.0 MHz FM Mono)";
                break;
        case 0x08:
                p = "AM-L";
                break;
        case 0x09:
                p = "NICAM-BG";
                break;
        case 0x0a:
                p = "NICAM-DK";
                break;
        case 0x0b:
                p = "NICAM-I";
                break;
        case 0x0c:
                p = "NICAM-L";
                break;
        case 0x0d:
                p = "BTSC/EIAJ/A2-M Mono (4.5 MHz FMMono)";
                break;
        case 0x0e:
                p = "IF FM Radio";
                break;
        case 0x0f:
                p = "BTSC";
                break;
        case 0x10:
                p = "high-deviation FM";
                break;
        case 0x11:
                p = "very high-deviation FM";
                break;
        case 0xfd:
                p = "unknown audio standard";
                break;
        case 0xfe:
                p = "forced audio standard";
                break;
        case 0xff:
                p = "no detected audio standard";
                break;
        default:
                p = "not defined";
        }
        v4l_info(client, "Detected audio standard:   %s\n", p);
        v4l_info(client, "Audio microcontroller:     %s\n",
                 (download_ctl & 0x10) ?
                 ((mute_ctl & 0x2) ? "detecting" : "running") : "stopped");

        switch (audio_config >> 4) {
        case 0x00:
                p = "undefined";
                break;
        case 0x01:
                p = "BTSC";
                break;
        case 0x02:
                p = "EIAJ";
                break;
        case 0x03:
                p = "A2-M";
                break;
        case 0x04:
                p = "A2-BG";
                break;
        case 0x05:
                p = "A2-DK1";
                break;
        case 0x06:
                p = "A2-DK2";
                break;
        case 0x07:
                p = "A2-DK3";
                break;
        case 0x08:
                p = "A1 (6.0 MHz FM Mono)";
                break;
        case 0x09:
                p = "AM-L";
                break;
        case 0x0a:
                p = "NICAM-BG";
                break;
        case 0x0b:
                p = "NICAM-DK";
                break;
        case 0x0c:
                p = "NICAM-I";
                break;
        case 0x0d:
                p = "NICAM-L";
                break;
        case 0x0e:
                p = "FM radio";
                break;
        case 0x0f:
                p = "automatic detection";
                break;
        default:
                p = "undefined";
        }
        v4l_info(client, "Configured audio standard: %s\n", p);

        if ((audio_config >> 4) < 0xF) {
                switch (audio_config & 0xF) {
                case 0x00:
                        p = "MONO1 (LANGUAGE A/Mono L+R channel for BTSC, EIAJ, A2)";
                        break;
                case 0x01:
                        p = "MONO2 (LANGUAGE B)";
                        break;
                case 0x02:
                        p = "MONO3 (STEREO forced MONO)";
                        break;
                case 0x03:
                        p = "MONO4 (NICAM ANALOG-Language C/Analog Fallback)";
                        break;
                case 0x04:
                        p = "STEREO";
                        break;
                case 0x05:
                        p = "DUAL1 (AB)";
                        break;
                case 0x06:
                        p = "DUAL2 (AC) (FM)";
                        break;
                case 0x07:
                        p = "DUAL3 (BC) (FM)";
                        break;
                case 0x08:
                        p = "DUAL4 (AC) (AM)";
                        break;
                case 0x09:
                        p = "DUAL5 (BC) (AM)";
                        break;
                case 0x0a:
                        p = "SAP";
                        break;
                default:
                        p = "undefined";
                }
                v4l_info(client, "Configured audio mode:     %s\n", p);
        } else {
                switch (audio_config & 0xF) {
                case 0x00:
                        p = "BG";
                        break;
                case 0x01:
                        p = "DK1";
                        break;
                case 0x02:
                        p = "DK2";
                        break;
                case 0x03:
                        p = "DK3";
                        break;
                case 0x04:
                        p = "I";
                        break;
                case 0x05:
                        p = "L";
                        break;
                case 0x06:
                        p = "BTSC";
                        break;
                case 0x07:
                        p = "EIAJ";
                        break;
                case 0x08:
                        p = "A2-M";
                        break;
                case 0x09:
                        p = "FM Radio";
                        break;
                case 0x0f:
                        p = "automatic standard and mode detection";
                        break;
                default:
                        p = "undefined";
                }
                v4l_info(client, "Configured audio system:   %s\n", p);
        }

        if (aud_input) {
                v4l_info(client, "Specified audio input:     Tuner (In%d)\n",
                         aud_input);
        } else {
                v4l_info(client, "Specified audio input:     External\n");
        }

        switch (pref_mode & 0xf) {
        case 0:
                p = "mono/language A";
                break;
        case 1:
                p = "language B";
                break;
        case 2:
                p = "language C";
                break;
        case 3:
                p = "analog fallback";
                break;
        case 4:
                p = "stereo";
                break;
        case 5:
                p = "language AC";
                break;
        case 6:
                p = "language BC";
                break;
        case 7:
                p = "language AB";
                break;
        default:
                p = "undefined";
        }
        v4l_info(client, "Preferred audio mode:      %s\n", p);

        if ((audio_config & 0xf) == 0xf) {
                switch ((afc0 >> 3) & 0x3) {
                case 0:
                        p = "system DK";
                        break;
                case 1:
                        p = "system L";
                        break;
                case 2:
                        p = "autodetect";
                        break;
                default:
                        p = "undefined";
                }
                v4l_info(client, "Selected 65 MHz format:    %s\n", p);

                switch (afc0 & 0x7) {
                case 0:
                        p = "chroma";
                        break;
                case 1:
                        p = "BTSC";
                        break;
                case 2:
                        p = "EIAJ";
                        break;
                case 3:
                        p = "A2-M";
                        break;
                case 4:
                        p = "autodetect";
                        break;
                default:
                        p = "undefined";
                }
                v4l_info(client, "Selected 45 MHz format:    %s\n", p);
        }
}

#define CX25840_VCONFIG_OPTION(state, cfg_in, opt_msk)                        \
        do {                                                                \
                if ((cfg_in) & (opt_msk)) {                                \
                        (state)->vid_config &= ~(opt_msk);                \
                        (state)->vid_config |= (cfg_in) & (opt_msk);        \
                }                                                        \
        } while (0)

/* apply incoming options to the current vconfig */
static void cx25840_vconfig_add(struct cx25840_state *state, u32 cfg_in)
{
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_FMT_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_RES_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VBIRAW_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_ANCDATA_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_TASKBIT_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_ACTIVE_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VALID_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_HRESETW_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_CLKGATE_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_DCMODE_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_IDID0S_MASK);
        CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VIPCLAMP_MASK);
}

/* ----------------------------------------------------------------------- */

/*
 * Initializes the device in the generic mode.
 * For cx2584x chips also adds additional video output settings provided
 * in @val parameter (CX25840_VCONFIG_*).
 *
 * The generic mode disables some of the ivtv-related hacks in this driver.
 * For cx2584x chips it also enables setting video output configuration while
 * setting it according to datasheet defaults by default.
 */
static int cx25840_init(struct v4l2_subdev *sd, u32 val)
{
        struct cx25840_state *state = to_state(sd);

        state->generic_mode = true;

        if (is_cx2584x(state)) {
                /* set datasheet video output defaults */
                state->vid_config = CX25840_VCONFIG_FMT_BT656 |
                                    CX25840_VCONFIG_RES_8BIT |
                                    CX25840_VCONFIG_VBIRAW_DISABLED |
                                    CX25840_VCONFIG_ANCDATA_ENABLED |
                                    CX25840_VCONFIG_TASKBIT_ONE |
                                    CX25840_VCONFIG_ACTIVE_HORIZONTAL |
                                    CX25840_VCONFIG_VALID_NORMAL |
                                    CX25840_VCONFIG_HRESETW_NORMAL |
                                    CX25840_VCONFIG_CLKGATE_NONE |
                                    CX25840_VCONFIG_DCMODE_DWORDS |
                                    CX25840_VCONFIG_IDID0S_NORMAL |
                                    CX25840_VCONFIG_VIPCLAMP_DISABLED;

                /* add additional settings */
                cx25840_vconfig_add(state, val);
        } else {
                /* TODO: generic mode needs to be developed for other chips */
                WARN_ON(1);
        }

        return 0;
}

static int cx25840_reset(struct v4l2_subdev *sd, u32 val)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        if (is_cx2583x(state))
                cx25836_initialize(client);
        else if (is_cx2388x(state))
                cx23885_initialize(client);
        else if (is_cx231xx(state))
                cx231xx_initialize(client);
        else
                cx25840_initialize(client);

        state->is_initialized = 1;

        return 0;
}

/*
 * This load_fw operation must be called to load the driver's firmware.
 * This will load the firmware on the first invocation (further ones are NOP).
 * Without this the audio standard detection will fail and you will
 * only get mono.
 * Alternatively, you can call the reset operation instead of this one.
 *
 * Since loading the firmware is often problematic when the driver is
 * compiled into the kernel I recommend postponing calling this function
 * until the first open of the video device. Another reason for
 * postponing it is that loading this firmware takes a long time (seconds)
 * due to the slow i2c bus speed. So it will speed up the boot process if
 * you can avoid loading the fw as long as the video device isn't used.
 */
static int cx25840_load_fw(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);

        if (!state->is_initialized) {
                /* initialize and load firmware */
                cx25840_reset(sd, 0);
        }
        return 0;
}

#ifdef CONFIG_VIDEO_ADV_DEBUG
static int cx25840_g_register(struct v4l2_subdev *sd,
                              struct v4l2_dbg_register *reg)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        reg->size = 1;
        reg->val = cx25840_read(client, reg->reg & 0x0fff);
        return 0;
}

static int cx25840_s_register(struct v4l2_subdev *sd,
                              const struct v4l2_dbg_register *reg)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        cx25840_write(client, reg->reg & 0x0fff, reg->val & 0xff);
        return 0;
}
#endif

static int cx25840_s_audio_stream(struct v4l2_subdev *sd, int enable)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        u8 v;

        if (is_cx2583x(state) || is_cx2388x(state) || is_cx231xx(state))
                return 0;

        v4l_dbg(1, cx25840_debug, client, "%s audio output\n",
                enable ? "enable" : "disable");

        if (enable) {
                v = cx25840_read(client, 0x115) | 0x80;
                cx25840_write(client, 0x115, v);
                v = cx25840_read(client, 0x116) | 0x03;
                cx25840_write(client, 0x116, v);
        } else {
                v = cx25840_read(client, 0x115) & ~(0x80);
                cx25840_write(client, 0x115, v);
                v = cx25840_read(client, 0x116) & ~(0x03);
                cx25840_write(client, 0x116, v);
        }
        return 0;
}

static int cx25840_s_stream(struct v4l2_subdev *sd, int enable)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        u8 v;

        v4l_dbg(1, cx25840_debug, client, "%s video output\n",
                enable ? "enable" : "disable");

        /*
         * It's not clear what should be done for these devices.
         * The original code used the same addresses as for the cx25840, but
         * those addresses do something else entirely on the cx2388x and
         * cx231xx. Since it never did anything in the first place, just do
         * nothing.
         */
        if (is_cx2388x(state) || is_cx231xx(state))
                return 0;

        if (enable) {
                v = cx25840_read(client, 0x115) | 0x0c;
                cx25840_write(client, 0x115, v);
                v = cx25840_read(client, 0x116) | 0x04;
                cx25840_write(client, 0x116, v);
        } else {
                v = cx25840_read(client, 0x115) & ~(0x0c);
                cx25840_write(client, 0x115, v);
                v = cx25840_read(client, 0x116) & ~(0x04);
                cx25840_write(client, 0x116, v);
        }
        return 0;
}

/* Query the current detected video format */
static int cx25840_querystd(struct v4l2_subdev *sd, v4l2_std_id *std)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        static const v4l2_std_id stds[] = {
                /* 0000 */ V4L2_STD_UNKNOWN,

                /* 0001 */ V4L2_STD_NTSC_M,
                /* 0010 */ V4L2_STD_NTSC_M_JP,
                /* 0011 */ V4L2_STD_NTSC_443,
                /* 0100 */ V4L2_STD_PAL,
                /* 0101 */ V4L2_STD_PAL_M,
                /* 0110 */ V4L2_STD_PAL_N,
                /* 0111 */ V4L2_STD_PAL_Nc,
                /* 1000 */ V4L2_STD_PAL_60,

                /* 1001 */ V4L2_STD_UNKNOWN,
                /* 1010 */ V4L2_STD_UNKNOWN,
                /* 1011 */ V4L2_STD_UNKNOWN,
                /* 1100 */ V4L2_STD_SECAM,
                /* 1101 */ V4L2_STD_UNKNOWN,
                /* 1110 */ V4L2_STD_UNKNOWN,
                /* 1111 */ V4L2_STD_UNKNOWN
        };

        u32 fmt = (cx25840_read4(client, 0x40c) >> 8) & 0xf;
        *std = stds[fmt];

        v4l_dbg(1, cx25840_debug, client,
                "querystd fmt = %x, v4l2_std_id = 0x%x\n",
                fmt, (unsigned int)stds[fmt]);

        return 0;
}

static int cx25840_g_input_status(struct v4l2_subdev *sd, u32 *status)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        /*
         * A limited function that checks for signal status and returns
         * the state.
         */

        /* Check for status of Horizontal lock (SRC lock isn't reliable) */
        if ((cx25840_read4(client, 0x40c) & 0x00010000) == 0)
                *status |= V4L2_IN_ST_NO_SIGNAL;

        return 0;
}

static int cx25840_g_std(struct v4l2_subdev *sd, v4l2_std_id *std)
{
        struct cx25840_state *state = to_state(sd);

        *std = state->std;

        return 0;
}

static int cx25840_s_std(struct v4l2_subdev *sd, v4l2_std_id std)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        if (state->radio == 0 && state->std == std)
                return 0;
        state->radio = 0;
        state->std = std;
        return set_v4lstd(client);
}

static int cx25840_s_radio(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);

        state->radio = 1;
        return 0;
}

static int cx25840_s_video_routing(struct v4l2_subdev *sd,
                                   u32 input, u32 output, u32 config)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        if (is_cx23888(state))
                cx23888_std_setup(client);

        if (is_cx2584x(state) && state->generic_mode && config) {
                cx25840_vconfig_add(state, config);
                cx25840_vconfig_apply(client);
        }

        return set_input(client, input, state->aud_input);
}

static int cx25840_s_audio_routing(struct v4l2_subdev *sd,
                                   u32 input, u32 output, u32 config)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        if (is_cx23888(state))
                cx23888_std_setup(client);
        return set_input(client, state->vid_input, input);
}

static int cx25840_s_frequency(struct v4l2_subdev *sd,
                               const struct v4l2_frequency *freq)
{
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        input_change(client);
        return 0;
}

static int cx25840_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);
        u8 vpres = cx25840_read(client, 0x40e) & 0x20;
        u8 mode;
        int val = 0;

        if (state->radio)
                return 0;

        vt->signal = vpres ? 0xffff : 0x0;
        if (is_cx2583x(state))
                return 0;

        vt->capability |= V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LANG1 |
                          V4L2_TUNER_CAP_LANG2 | V4L2_TUNER_CAP_SAP;

        mode = cx25840_read(client, 0x804);

        /* get rxsubchans and audmode */
        if ((mode & 0xf) == 1)
                val |= V4L2_TUNER_SUB_STEREO;
        else
                val |= V4L2_TUNER_SUB_MONO;

        if (mode == 2 || mode == 4)
                val = V4L2_TUNER_SUB_LANG1 | V4L2_TUNER_SUB_LANG2;

        if (mode & 0x10)
                val |= V4L2_TUNER_SUB_SAP;

        vt->rxsubchans = val;
        vt->audmode = state->audmode;
        return 0;
}

static int cx25840_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *vt)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        if (state->radio || is_cx2583x(state))
                return 0;

        switch (vt->audmode) {
        case V4L2_TUNER_MODE_MONO:
                /*
                 * mono      -> mono
                 * stereo    -> mono
                 * bilingual -> lang1
                 */
                cx25840_and_or(client, 0x809, ~0xf, 0x00);
                break;
        case V4L2_TUNER_MODE_STEREO:
        case V4L2_TUNER_MODE_LANG1:
                /*
                 * mono      -> mono
                 * stereo    -> stereo
                 * bilingual -> lang1
                 */
                cx25840_and_or(client, 0x809, ~0xf, 0x04);
                break;
        case V4L2_TUNER_MODE_LANG1_LANG2:
                /*
                 * mono      -> mono
                 * stereo    -> stereo
                 * bilingual -> lang1/lang2
                 */
                cx25840_and_or(client, 0x809, ~0xf, 0x07);
                break;
        case V4L2_TUNER_MODE_LANG2:
                /*
                 * mono      -> mono
                 * stereo    -> stereo
                 * bilingual -> lang2
                 */
                cx25840_and_or(client, 0x809, ~0xf, 0x01);
                break;
        default:
                return -EINVAL;
        }
        state->audmode = vt->audmode;
        return 0;
}

static int cx25840_log_status(struct v4l2_subdev *sd)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *client = v4l2_get_subdevdata(sd);

        log_video_status(client);
        if (!is_cx2583x(state))
                log_audio_status(client);
        cx25840_ir_log_status(sd);
        v4l2_ctrl_handler_log_status(&state->hdl, sd->name);
        return 0;
}

static int cx23885_irq_handler(struct v4l2_subdev *sd, u32 status,
                               bool *handled)
{
        struct cx25840_state *state = to_state(sd);
        struct i2c_client *c = v4l2_get_subdevdata(sd);
        u8 irq_stat, aud_stat, aud_en, ir_stat, ir_en;
        u32 vid_stat, aud_mc_stat;
        bool block_handled;
        int ret = 0;

        irq_stat = cx25840_read(c, CX23885_PIN_CTRL_IRQ_REG);
        v4l_dbg(2, cx25840_debug, c, "AV Core IRQ status (entry): %s %s %s\n",
                irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT ? "ir" : "  ",
                irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT ? "aud" : "   ",
                irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT ? "vid" : "   ");

        if ((is_cx23885(state) || is_cx23887(state))) {
                ir_stat = cx25840_read(c, CX25840_IR_STATS_REG);
                ir_en = cx25840_read(c, CX25840_IR_IRQEN_REG);
                v4l_dbg(2, cx25840_debug, c,
                        "AV Core ir IRQ status: %#04x disables: %#04x\n",
                        ir_stat, ir_en);
                if (irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT) {
                        block_handled = false;
                        ret = cx25840_ir_irq_handler(sd,
                                                     status, &block_handled);
                        if (block_handled)
                                *handled = true;
                }
        }

        aud_stat = cx25840_read(c, CX25840_AUD_INT_STAT_REG);
        aud_en = cx25840_read(c, CX25840_AUD_INT_CTRL_REG);
        v4l_dbg(2, cx25840_debug, c,
                "AV Core audio IRQ status: %#04x disables: %#04x\n",
                aud_stat, aud_en);
        aud_mc_stat = cx25840_read4(c, CX23885_AUD_MC_INT_MASK_REG);
        v4l_dbg(2, cx25840_debug, c,
                "AV Core audio MC IRQ status: %#06x enables: %#06x\n",
                aud_mc_stat >> CX23885_AUD_MC_INT_STAT_SHFT,
                aud_mc_stat & CX23885_AUD_MC_INT_CTRL_BITS);
        if (irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT) {
                if (aud_stat) {
                        cx25840_write(c, CX25840_AUD_INT_STAT_REG, aud_stat);
                        *handled = true;
                }
        }

        vid_stat = cx25840_read4(c, CX25840_VID_INT_STAT_REG);
        v4l_dbg(2, cx25840_debug, c,
                "AV Core video IRQ status: %#06x disables: %#06x\n",
                vid_stat & CX25840_VID_INT_STAT_BITS,
                vid_stat >> CX25840_VID_INT_MASK_SHFT);
        if (irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT) {
                if (vid_stat & CX25840_VID_INT_STAT_BITS) {
                        cx25840_write4(c, CX25840_VID_INT_STAT_REG, vid_stat);
                        *handled = true;
                }
        }

        irq_stat = cx25840_read(c, CX23885_PIN_CTRL_IRQ_REG);
        v4l_dbg(2, cx25840_debug, c, "AV Core IRQ status (exit): %s %s %s\n",
                irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT ? "ir" : "  ",
                irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT ? "aud" : "   ",
                irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT ? "vid" : "   ");

        return ret;
}

static int cx25840_irq_handler(struct v4l2_subdev *sd, u32 status,
                               bool *handled)
{
        struct cx25840_state *state = to_state(sd);

        *handled = false;

        /* Only support the CX2388[578] AV Core for now */
        if (is_cx2388x(state))
                return cx23885_irq_handler(sd, status, handled);

        return -ENODEV;
}

/* ----------------------------------------------------------------------- */

#define DIF_PLL_FREQ_WORD        (0x300)
#define DIF_BPF_COEFF01                (0x348)
#define DIF_BPF_COEFF23                (0x34c)
#define DIF_BPF_COEFF45                (0x350)
#define DIF_BPF_COEFF67                (0x354)
#define DIF_BPF_COEFF89                (0x358)
#define DIF_BPF_COEFF1011        (0x35c)
#define DIF_BPF_COEFF1213        (0x360)
#define DIF_BPF_COEFF1415        (0x364)
#define DIF_BPF_COEFF1617        (0x368)
#define DIF_BPF_COEFF1819        (0x36c)
#define DIF_BPF_COEFF2021        (0x370)
#define DIF_BPF_COEFF2223        (0x374)
#define DIF_BPF_COEFF2425        (0x378)
#define DIF_BPF_COEFF2627        (0x37c)
#define DIF_BPF_COEFF2829        (0x380)
#define DIF_BPF_COEFF3031        (0x384)
#define DIF_BPF_COEFF3233        (0x388)
#define DIF_BPF_COEFF3435        (0x38c)
#define DIF_BPF_COEFF36                (0x390)

static const u32 ifhz_coeffs[][19] = {
        {        // 3.0 MHz
                0x00000002, 0x00080012, 0x001e0024, 0x001bfff8,
                0xffb4ff50, 0xfed8fe68, 0xfe24fe34, 0xfebaffc7,
                0x014d031f, 0x04f0065d, 0x07010688, 0x04c901d6,
                0xfe00f9d3, 0xf600f342, 0xf235f337, 0xf64efb22,
                0x0105070f, 0x0c460fce, 0x110d0000,
        }, {        // 3.1 MHz
                0x00000001, 0x00070012, 0x00220032, 0x00370026,
                0xfff0ff91, 0xff0efe7c, 0xfe01fdcc, 0xfe0afedb,
                0x00440224, 0x0434060c, 0x0738074e, 0x06090361,
                0xff99fb39, 0xf6fef3b6, 0xf21af2a5, 0xf573fa33,
                0x0034067d, 0x0bfb0fb9, 0x110d0000,
        }, {        // 3.2 MHz
                0x00000000, 0x0004000e, 0x00200038, 0x004c004f,
                0x002fffdf, 0xff5cfeb6, 0xfe0dfd92, 0xfd7ffe03,
                0xff36010a, 0x03410575, 0x072607d2, 0x071804d5,
                0x0134fcb7, 0xf81ff451, 0xf223f22e, 0xf4a7f94b,
                0xff6405e8, 0x0bae0fa4, 0x110d0000,
        }, {        // 3.3 MHz
                0x0000ffff, 0x00000008, 0x001a0036, 0x0056006d,
                0x00670030, 0xffbdff10, 0xfe46fd8d, 0xfd25fd4f,
                0xfe35ffe0, 0x0224049f, 0x06c9080e, 0x07ef0627,
                0x02c9fe45, 0xf961f513, 0xf250f1d2, 0xf3ecf869,
                0xfe930552, 0x0b5f0f8f, 0x110d0000,
        }, {        // 3.4 MHz
                0xfffffffe, 0xfffd0001, 0x000f002c, 0x0054007d,
                0x0093007c, 0x0024ff82, 0xfea6fdbb, 0xfd03fcca,
                0xfd51feb9, 0x00eb0392, 0x06270802, 0x08880750,
                0x044dffdb, 0xfabdf5f8, 0xf2a0f193, 0xf342f78f,
                0xfdc404b9, 0x0b0e0f78, 0x110d0000,
        }, {        // 3.5 MHz
                0xfffffffd, 0xfffafff9, 0x0002001b, 0x0046007d,
                0x00ad00ba, 0x00870000, 0xff26fe1a, 0xfd1bfc7e,
                0xfc99fda4, 0xffa5025c, 0x054507ad, 0x08dd0847,
                0x05b80172, 0xfc2ef6ff, 0xf313f170, 0xf2abf6bd,
                0xfcf6041f, 0x0abc0f61, 0x110d0000,
        }, {        // 3.6 MHz
                0xfffffffd, 0xfff8fff3, 0xfff50006, 0x002f006c,
                0x00b200e3, 0x00dc007e, 0xffb9fea0, 0xfd6bfc71,
                0xfc17fcb1, 0xfe65010b, 0x042d0713, 0x08ec0906,
                0x07020302, 0xfdaff823, 0xf3a7f16a, 0xf228f5f5,
                0xfc2a0384, 0x0a670f4a, 0x110d0000,
        }, {        // 3.7 MHz
                0x0000fffd, 0xfff7ffef, 0xffe9fff1, 0x0010004d,
                0x00a100f2, 0x011a00f0, 0x0053ff44, 0xfdedfca2,
                0xfbd3fbef, 0xfd39ffae, 0x02ea0638, 0x08b50987,
                0x08230483, 0xff39f960, 0xf45bf180, 0xf1b8f537,
                0xfb6102e7, 0x0a110f32, 0x110d0000,
        }, {        // 3.8 MHz
                0x0000fffe, 0xfff9ffee, 0xffe1ffdd, 0xfff00024,
                0x007c00e5, 0x013a014a, 0x00e6fff8, 0xfe98fd0f,
                0xfbd3fb67, 0xfc32fe54, 0x01880525, 0x083909c7,
                0x091505ee, 0x00c7fab3, 0xf52df1b4, 0xf15df484,
                0xfa9b0249, 0x09ba0f19, 0x110d0000,
        }, {        // 3.9 MHz
                0x00000000, 0xfffbfff0, 0xffdeffcf, 0xffd1fff6,
                0x004800be, 0x01390184, 0x016300ac, 0xff5efdb1,
                0xfc17fb23, 0xfb5cfd0d, 0x001703e4, 0x077b09c4,
                0x09d2073c, 0x0251fc18, 0xf61cf203, 0xf118f3dc,
                0xf9d801aa, 0x09600eff, 0x110d0000,
        }, {        // 4.0 MHz
                0x00000001, 0xfffefff4, 0xffe1ffc8, 0xffbaffca,
                0x000b0082, 0x01170198, 0x01c10152, 0x0030fe7b,
                0xfc99fb24, 0xfac3fbe9, 0xfea5027f, 0x0683097f,
                0x0a560867, 0x03d2fd89, 0xf723f26f, 0xf0e8f341,
                0xf919010a, 0x09060ee5, 0x110d0000,
        }, {        // 4.1 MHz
                0x00010002, 0x0002fffb, 0xffe8ffca, 0xffacffa4,
                0xffcd0036, 0x00d70184, 0x01f601dc, 0x00ffff60,
                0xfd51fb6d, 0xfa6efaf5, 0xfd410103, 0x055708f9,
                0x0a9e0969, 0x0543ff02, 0xf842f2f5, 0xf0cef2b2,
                0xf85e006b, 0x08aa0ecb, 0x110d0000,
        }, {        // 4.2 MHz
                0x00010003, 0x00050003, 0xfff3ffd3, 0xffaaff8b,
                0xff95ffe5, 0x0080014a, 0x01fe023f, 0x01ba0050,
                0xfe35fbf8, 0xfa62fa3b, 0xfbf9ff7e, 0x04010836,
                0x0aa90a3d, 0x069f007f, 0xf975f395, 0xf0cbf231,
                0xf7a9ffcb, 0x084c0eaf, 0x110d0000,
        }, {        // 4.3 MHz
                0x00010003, 0x0008000a, 0x0000ffe4, 0xffb4ff81,
                0xff6aff96, 0x001c00f0, 0x01d70271, 0x0254013b,
                0xff36fcbd, 0xfa9ff9c5, 0xfadbfdfe, 0x028c073b,
                0x0a750adf, 0x07e101fa, 0xfab8f44e, 0xf0ddf1be,
                0xf6f9ff2b, 0x07ed0e94, 0x110d0000,
        }, {        // 4.4 MHz
                0x00000003, 0x0009000f, 0x000efff8, 0xffc9ff87,
                0xff52ff54, 0xffb5007e, 0x01860270, 0x02c00210,
                0x0044fdb2, 0xfb22f997, 0xf9f2fc90, 0x0102060f,
                0x0a050b4c, 0x0902036e, 0xfc0af51e, 0xf106f15a,
                0xf64efe8b, 0x078d0e77, 0x110d0000,
        }, {        // 4.5 MHz
                0x00000002, 0x00080012, 0x0019000e, 0xffe5ff9e,
                0xff4fff25, 0xff560000, 0x0112023b, 0x02f702c0,
                0x014dfec8, 0xfbe5f9b3, 0xf947fb41, 0xff7004b9,
                0x095a0b81, 0x0a0004d8, 0xfd65f603, 0xf144f104,
                0xf5aafdec, 0x072b0e5a, 0x110d0000,
        }, {        // 4.6 MHz
                0x00000001, 0x00060012, 0x00200022, 0x0005ffc1,
                0xff61ff10, 0xff09ff82, 0x008601d7, 0x02f50340,
                0x0241fff0, 0xfcddfa19, 0xf8e2fa1e, 0xfde30343,
                0x08790b7f, 0x0ad50631, 0xfec7f6fc, 0xf198f0bd,
                0xf50dfd4e, 0x06c90e3d, 0x110d0000,
        }, {        // 4.7 MHz
                0x0000ffff, 0x0003000f, 0x00220030, 0x0025ffed,
                0xff87ff15, 0xfed6ff10, 0xffed014c, 0x02b90386,
                0x03110119, 0xfdfefac4, 0xf8c6f92f, 0xfc6701b7,
                0x07670b44, 0x0b7e0776, 0x002df807, 0xf200f086,
                0xf477fcb1, 0x06650e1e, 0x110d0000,
        }, {        // 4.8 MHz
                0xfffffffe, 0xffff0009, 0x001e0038, 0x003f001b,
                0xffbcff36, 0xfec2feb6, 0xff5600a5, 0x0248038d,
                0x03b00232, 0xff39fbab, 0xf8f4f87f, 0xfb060020,
                0x062a0ad2, 0x0bf908a3, 0x0192f922, 0xf27df05e,
                0xf3e8fc14, 0x06000e00, 0x110d0000,
        }, {        // 4.9 MHz
                0xfffffffd, 0xfffc0002, 0x00160037, 0x00510046,
                0xfff9ff6d, 0xfed0fe7c, 0xfecefff0, 0x01aa0356,
                0x0413032b, 0x007ffcc5, 0xf96cf812, 0xf9cefe87,
                0x04c90a2c, 0x0c4309b4, 0x02f3fa4a, 0xf30ef046,
                0xf361fb7a, 0x059b0de0, 0x110d0000,
        }, {        // 5.0 MHz
                0xfffffffd, 0xfff9fffa, 0x000a002d, 0x00570067,
                0x0037ffb5, 0xfefffe68, 0xfe62ff3d, 0x00ec02e3,
                0x043503f6, 0x01befe05, 0xfa27f7ee, 0xf8c6fcf8,
                0x034c0954, 0x0c5c0aa4, 0x044cfb7e, 0xf3b1f03f,
                0xf2e2fae1, 0x05340dc0, 0x110d0000,
        }, {        // 5.1 MHz
                0x0000fffd, 0xfff8fff4, 0xfffd001e, 0x0051007b,
                0x006e0006, 0xff48fe7c, 0xfe1bfe9a, 0x001d023e,
                0x04130488, 0x02e6ff5b, 0xfb1ef812, 0xf7f7fb7f,
                0x01bc084e, 0x0c430b72, 0x059afcba, 0xf467f046,
                0xf26cfa4a, 0x04cd0da0, 0x110d0000,
        }, {        // 5.2 MHz
                0x0000fffe, 0xfff8ffef, 0xfff00009, 0x003f007f,
                0x00980056, 0xffa5feb6, 0xfe00fe15, 0xff4b0170,
                0x03b004d7, 0x03e800b9, 0xfc48f87f, 0xf768fa23,
                0x0022071f, 0x0bf90c1b, 0x06dafdfd, 0xf52df05e,
                0xf1fef9b5, 0x04640d7f, 0x110d0000,
        }, {        // 5.3 MHz
                0x0000ffff, 0xfff9ffee, 0xffe6fff3, 0x00250072,
                0x00af009c, 0x000cff10, 0xfe13fdb8, 0xfe870089,
                0x031104e1, 0x04b8020f, 0xfd98f92f, 0xf71df8f0,
                0xfe8805ce, 0x0b7e0c9c, 0x0808ff44, 0xf603f086,
                0xf19af922, 0x03fb0d5e, 0x110d0000,
        }, {        // 5.4 MHz
                0x00000001, 0xfffcffef, 0xffe0ffe0, 0x00050056,
                0x00b000d1, 0x0071ff82, 0xfe53fd8c, 0xfddfff99,
                0x024104a3, 0x054a034d, 0xff01fa1e, 0xf717f7ed,
                0xfcf50461, 0x0ad50cf4, 0x0921008d, 0xf6e7f0bd,
                0xf13ff891, 0x03920d3b, 0x110d0000,
        }, {        // 5.5 MHz
                0x00010002, 0xfffffff3, 0xffdeffd1, 0xffe5002f,
                0x009c00ed, 0x00cb0000, 0xfebafd94, 0xfd61feb0,
                0x014d0422, 0x05970464, 0x0074fb41, 0xf759f721,
                0xfb7502de, 0x0a000d21, 0x0a2201d4, 0xf7d9f104,
                0xf0edf804, 0x03280d19, 0x110d0000,
        }, {        // 5.6 MHz
                0x00010003, 0x0003fffa, 0xffe3ffc9, 0xffc90002,
                0x007500ef, 0x010e007e, 0xff3dfdcf, 0xfd16fddd,
                0x00440365, 0x059b0548, 0x01e3fc90, 0xf7dff691,
                0xfa0f014d, 0x09020d23, 0x0b0a0318, 0xf8d7f15a,
                0xf0a5f779, 0x02bd0cf6, 0x110d0000,
        }, {        // 5.7 MHz
                0x00010003, 0x00060001, 0xffecffc9, 0xffb4ffd4,
                0x004000d5, 0x013600f0, 0xffd3fe39, 0xfd04fd31,
                0xff360277, 0x055605ef, 0x033efdfe, 0xf8a5f642,
                0xf8cbffb6, 0x07e10cfb, 0x0bd50456, 0xf9dff1be,
                0xf067f6f2, 0x02520cd2, 0x110d0000,
        }, {        // 5.8 MHz
                0x00000003, 0x00080009, 0xfff8ffd2, 0xffaaffac,
                0x000200a3, 0x013c014a, 0x006dfec9, 0xfd2bfcb7,
                0xfe350165, 0x04cb0651, 0x0477ff7e, 0xf9a5f635,
                0xf7b1fe20, 0x069f0ca8, 0x0c81058b, 0xfaf0f231,
                0xf033f66d, 0x01e60cae, 0x110d0000,
        }, {        // 5.9 MHz
                0x00000002, 0x0009000e, 0x0005ffe1, 0xffacff90,
                0xffc5005f, 0x01210184, 0x00fcff72, 0xfd8afc77,
                0xfd51003f, 0x04020669, 0x05830103, 0xfad7f66b,
                0xf6c8fc93, 0x05430c2b, 0x0d0d06b5, 0xfc08f2b2,
                0xf00af5ec, 0x017b0c89, 0x110d0000,
        }, {        // 6.0 MHz
                0x00000001, 0x00070012, 0x0012fff5, 0xffbaff82,
                0xff8e000f, 0x00e80198, 0x01750028, 0xfe18fc75,
                0xfc99ff15, 0x03050636, 0x0656027f, 0xfc32f6e2,
                0xf614fb17, 0x03d20b87, 0x0d7707d2, 0xfd26f341,
                0xefeaf56f, 0x010f0c64, 0x110d0000,
        }, {        // 6.1 MHz
                0xffff0000, 0x00050012, 0x001c000b, 0xffd1ff84,
                0xff66ffbe, 0x00960184, 0x01cd00da, 0xfeccfcb2,
                0xfc17fdf9, 0x01e005bc, 0x06e703e4, 0xfdabf798,
                0xf599f9b3, 0x02510abd, 0x0dbf08df, 0xfe48f3dc,
                0xefd5f4f6, 0x00a20c3e, 0x110d0000,
        }, {        // 6.2 MHz
                0xfffffffe, 0x0002000f, 0x0021001f, 0xfff0ff97,
                0xff50ff74, 0x0034014a, 0x01fa0179, 0xff97fd2a,
                0xfbd3fcfa, 0x00a304fe, 0x07310525, 0xff37f886,
                0xf55cf86e, 0x00c709d0, 0x0de209db, 0xff6df484,
                0xefcbf481, 0x00360c18, 0x110d0000,
        }, {        // 6.3 MHz
                0xfffffffd, 0xfffe000a, 0x0021002f, 0x0010ffb8,
                0xff50ff3b, 0xffcc00f0, 0x01fa01fa, 0x0069fdd4,
                0xfbd3fc26, 0xff5d0407, 0x07310638, 0x00c9f9a8,
                0xf55cf74e, 0xff3908c3, 0x0de20ac3, 0x0093f537,
                0xefcbf410, 0xffca0bf2, 0x110d0000,
        }, {        // 6.4 MHz
                0xfffffffd, 0xfffb0003, 0x001c0037, 0x002fffe2,
                0xff66ff17, 0xff6a007e, 0x01cd0251, 0x0134fea5,
                0xfc17fb8b, 0xfe2002e0, 0x06e70713, 0x0255faf5,
                0xf599f658, 0xfdaf0799, 0x0dbf0b96, 0x01b8f5f5,
                0xefd5f3a3, 0xff5e0bca, 0x110d0000,
        }, {        // 6.5 MHz
                0x0000fffd, 0xfff9fffb, 0x00120037, 0x00460010,
                0xff8eff0f, 0xff180000, 0x01750276, 0x01e8ff8d,
                0xfc99fb31, 0xfcfb0198, 0x065607ad, 0x03cefc64,
                0xf614f592, 0xfc2e0656, 0x0d770c52, 0x02daf6bd,
                0xefeaf33b, 0xfef10ba3, 0x110d0000,
        }, {        // 6.6 MHz
                0x0000fffe, 0xfff7fff5, 0x0005002f, 0x0054003c,
                0xffc5ff22, 0xfedfff82, 0x00fc0267, 0x0276007e,
                0xfd51fb1c, 0xfbfe003e, 0x05830802, 0x0529fdec,
                0xf6c8f4fe, 0xfabd04ff, 0x0d0d0cf6, 0x03f8f78f,
                0xf00af2d7, 0xfe850b7b, 0x110d0000,
        }, {        // 6.7 MHz
                0x0000ffff, 0xfff8fff0, 0xfff80020, 0x00560060,
                0x0002ff4e, 0xfec4ff10, 0x006d0225, 0x02d50166,
                0xfe35fb4e, 0xfb35fee1, 0x0477080e, 0x065bff82,
                0xf7b1f4a0, 0xf9610397, 0x0c810d80, 0x0510f869,
                0xf033f278, 0xfe1a0b52, 0x110d0000,
        }, {        // 6.8 MHz
                0x00010000, 0xfffaffee, 0xffec000c, 0x004c0078,
                0x0040ff8e, 0xfecafeb6, 0xffd301b6, 0x02fc0235,
                0xff36fbc5, 0xfaaafd90, 0x033e07d2, 0x075b011b,
                0xf8cbf47a, 0xf81f0224, 0x0bd50def, 0x0621f94b,
                0xf067f21e, 0xfdae0b29, 0x110d0000,
        }, {        // 6.9 MHz
                0x00010001, 0xfffdffef, 0xffe3fff6, 0x0037007f,
                0x0075ffdc, 0xfef2fe7c, 0xff3d0122, 0x02ea02dd,
                0x0044fc79, 0xfa65fc5d, 0x01e3074e, 0x082102ad,
                0xfa0ff48c, 0xf6fe00a9, 0x0b0a0e43, 0x0729fa33,
                0xf0a5f1c9, 0xfd430b00, 0x110d0000,
        }, {        // 7.0 MHz
                0x00010002, 0x0001fff3, 0xffdeffe2, 0x001b0076,
                0x009c002d, 0xff35fe68, 0xfeba0076, 0x029f0352,
                0x014dfd60, 0xfa69fb53, 0x00740688, 0x08a7042d,
                0xfb75f4d6, 0xf600ff2d, 0x0a220e7a, 0x0827fb22,
                0xf0edf17a, 0xfcd80ad6, 0x110d0000,
        }, {        // 7.1 MHz
                0x00000003, 0x0004fff9, 0xffe0ffd2, 0xfffb005e,
                0x00b0007a, 0xff8ffe7c, 0xfe53ffc1, 0x0221038c,
                0x0241fe6e, 0xfab6fa80, 0xff010587, 0x08e90590,
                0xfcf5f556, 0xf52bfdb3, 0x09210e95, 0x0919fc15,
                0xf13ff12f, 0xfc6e0aab, 0x110d0000,
        }, {        // 7.2 MHz
                0x00000003, 0x00070000, 0xffe6ffc9, 0xffdb0039,
                0x00af00b8, 0xfff4feb6, 0xfe13ff10, 0x01790388,
                0x0311ff92, 0xfb48f9ed, 0xfd980453, 0x08e306cd,
                0xfe88f60a, 0xf482fc40, 0x08080e93, 0x09fdfd0c,
                0xf19af0ea, 0xfc050a81, 0x110d0000,
        }, {        // 7.3 MHz
                0x00000002, 0x00080008, 0xfff0ffc9, 0xffc1000d,
                0x009800e2, 0x005bff10, 0xfe00fe74, 0x00b50345,
                0x03b000bc, 0xfc18f9a1, 0xfc4802f9, 0x089807dc,
                0x0022f6f0, 0xf407fada, 0x06da0e74, 0x0ad3fe06,
                0xf1fef0ab, 0xfb9c0a55, 0x110d0000,
        }, {        // 7.4 MHz
                0x00000001, 0x0008000e, 0xfffdffd0, 0xffafffdf,
                0x006e00f2, 0x00b8ff82, 0xfe1bfdf8, 0xffe302c8,
                0x041301dc, 0xfd1af99e, 0xfb1e0183, 0x080908b5,
                0x01bcf801, 0xf3bdf985, 0x059a0e38, 0x0b99ff03,
                0xf26cf071, 0xfb330a2a, 0x110d0000,
        }, {        // 7.5 MHz
                0xffff0000, 0x00070011, 0x000affdf, 0xffa9ffb5,
                0x003700e6, 0x01010000, 0xfe62fda8, 0xff140219,
                0x043502e1, 0xfe42f9e6, 0xfa270000, 0x073a0953,
                0x034cf939, 0xf3a4f845, 0x044c0de1, 0x0c4f0000,
                0xf2e2f03c, 0xfacc09fe, 0x110d0000,
        }, {        // 7.6 MHz
                0xffffffff, 0x00040012, 0x0016fff3, 0xffafff95,
                0xfff900c0, 0x0130007e, 0xfecefd89, 0xfe560146,
                0x041303bc, 0xff81fa76, 0xf96cfe7d, 0x063209b1,
                0x04c9fa93, 0xf3bdf71e, 0x02f30d6e, 0x0cf200fd,
                0xf361f00e, 0xfa6509d1, 0x110d0000,
        }, {        // 7.7 MHz
                0xfffffffe, 0x00010010, 0x001e0008, 0xffc1ff84,
                0xffbc0084, 0x013e00f0, 0xff56fd9f, 0xfdb8005c,
                0x03b00460, 0x00c7fb45, 0xf8f4fd07, 0x04fa09ce,
                0x062afc07, 0xf407f614, 0x01920ce0, 0x0d8301fa,
                0xf3e8efe5, 0xfa0009a4, 0x110d0000,
        }, {        // 7.8 MHz
                0x0000fffd, 0xfffd000b, 0x0022001d, 0xffdbff82,
                0xff870039, 0x012a014a, 0xffedfde7, 0xfd47ff6b,
                0x031104c6, 0x0202fc4c, 0xf8c6fbad, 0x039909a7,
                0x0767fd8e, 0xf482f52b, 0x002d0c39, 0x0e0002f4,
                0xf477efc2, 0xf99b0977, 0x110d0000,
        }, {        // 7.9 MHz
                0x0000fffd, 0xfffa0004, 0x0020002d, 0xfffbff91,
                0xff61ffe8, 0x00f70184, 0x0086fe5c, 0xfd0bfe85,
                0x024104e5, 0x0323fd7d, 0xf8e2fa79, 0x021d093f,
                0x0879ff22, 0xf52bf465, 0xfec70b79, 0x0e6803eb,
                0xf50defa5, 0xf937094a, 0x110d0000,
        }, {        // 8.0 MHz
                0x0000fffe, 0xfff8fffd, 0x00190036, 0x001bffaf,
                0xff4fff99, 0x00aa0198, 0x0112fef3, 0xfd09fdb9,
                0x014d04be, 0x041bfecc, 0xf947f978, 0x00900897,
                0x095a00b9, 0xf600f3c5, 0xfd650aa3, 0x0ebc04de,
                0xf5aaef8e, 0xf8d5091c, 0x110d0000,
        }, {        // 8.1 MHz
                0x0000ffff, 0xfff7fff6, 0x000e0038, 0x0037ffd7,
                0xff52ff56, 0x004b0184, 0x0186ffa1, 0xfd40fd16,
                0x00440452, 0x04de0029, 0xf9f2f8b2, 0xfefe07b5,
                0x0a05024d, 0xf6fef34d, 0xfc0a09b8, 0x0efa05cd,
                0xf64eef7d, 0xf87308ed, 0x110d0000,
        }, {        // 8.2 MHz
                0x00010000, 0xfff8fff0, 0x00000031, 0x004c0005,
                0xff6aff27, 0xffe4014a, 0x01d70057, 0xfdacfca6,
                0xff3603a7, 0x05610184, 0xfadbf82e, 0xfd74069f,
                0x0a7503d6, 0xf81ff2ff, 0xfab808b9, 0x0f2306b5,
                0xf6f9ef72, 0xf81308bf, 0x110d0000,
        }, {        // 8.3 MHz
                0x00010001, 0xfffbffee, 0xfff30022, 0x00560032,
                0xff95ff10, 0xff8000f0, 0x01fe0106, 0xfe46fc71,
                0xfe3502c7, 0x059e02ce, 0xfbf9f7f2, 0xfbff055b,
                0x0aa9054c, 0xf961f2db, 0xf97507aa, 0x0f350797,
                0xf7a9ef6d, 0xf7b40890, 0x110d0000,
        }, {        // 8.4 MHz
                0x00010002, 0xfffeffee, 0xffe8000f, 0x00540058,
                0xffcdff14, 0xff29007e, 0x01f6019e, 0xff01fc7c,
                0xfd5101bf, 0x059203f6, 0xfd41f7fe, 0xfaa903f3,
                0x0a9e06a9, 0xfabdf2e2, 0xf842068b, 0x0f320871,
                0xf85eef6e, 0xf7560860, 0x110d0000,
        }, {        // 8.5 MHz
                0x00000003, 0x0002fff2, 0xffe1fff9, 0x00460073,
                0x000bff34, 0xfee90000, 0x01c10215, 0xffd0fcc5,
                0xfc99009d, 0x053d04f1, 0xfea5f853, 0xf97d0270,
                0x0a5607e4, 0xfc2ef314, 0xf723055f, 0x0f180943,
                0xf919ef75, 0xf6fa0830, 0x110d0000,
        }, {        // 8.6 MHz
                0x00000003, 0x0005fff8, 0xffdeffe4, 0x002f007f,
                0x0048ff6b, 0xfec7ff82, 0x0163025f, 0x00a2fd47,
                0xfc17ff73, 0x04a405b2, 0x0017f8ed, 0xf88500dc,
                0x09d208f9, 0xfdaff370, 0xf61c0429, 0x0ee80a0b,
                0xf9d8ef82, 0xf6a00800, 0x110d0000,
        }, {        // 8.7 MHz
                0x00000003, 0x0007ffff, 0xffe1ffd4, 0x0010007a,
                0x007cffb2, 0xfec6ff10, 0x00e60277, 0x0168fdf9,
                0xfbd3fe50, 0x03ce0631, 0x0188f9c8, 0xf7c7ff43,
                0x091509e3, 0xff39f3f6, 0xf52d02ea, 0x0ea30ac9,
                0xfa9bef95, 0xf64607d0, 0x110d0000,
        }, {        // 8.8 MHz
                0x00000002, 0x00090007, 0xffe9ffca, 0xfff00065,
                0x00a10003, 0xfee6feb6, 0x0053025b, 0x0213fed0,
                0xfbd3fd46, 0x02c70668, 0x02eafadb, 0xf74bfdae,
                0x08230a9c, 0x00c7f4a3, 0xf45b01a6, 0x0e480b7c,
                0xfb61efae, 0xf5ef079f, 0x110d0000,
        }, {        // 8.9 MHz
                0xffff0000, 0x0008000d, 0xfff5ffc8, 0xffd10043,
                0x00b20053, 0xff24fe7c, 0xffb9020c, 0x0295ffbb,
                0xfc17fc64, 0x019b0654, 0x042dfc1c, 0xf714fc2a,
                0x07020b21, 0x0251f575, 0xf3a7005e, 0x0dd80c24,
                0xfc2aefcd, 0xf599076e, 0x110d0000,
        }, {        // 9.0 MHz
                0xffffffff, 0x00060011, 0x0002ffcf, 0xffba0018,
                0x00ad009a, 0xff79fe68, 0xff260192, 0x02e500ab,
                0xfc99fbb6, 0x005b05f7, 0x0545fd81, 0xf723fabf,
                0x05b80b70, 0x03d2f669, 0xf313ff15, 0x0d550cbf,
                0xfcf6eff2, 0xf544073d, 0x110d0000,
        }, {        // 9.1 MHz
                0xfffffffe, 0x00030012, 0x000fffdd, 0xffacffea,
                0x009300cf, 0xffdcfe7c, 0xfea600f7, 0x02fd0190,
                0xfd51fb46, 0xff150554, 0x0627fefd, 0xf778f978,
                0x044d0b87, 0x0543f77d, 0xf2a0fdcf, 0x0cbe0d4e,
                0xfdc4f01d, 0xf4f2070b, 0x110d0000,
        }, {        // 9.2 MHz
                0x0000fffd, 0x00000010, 0x001afff0, 0xffaaffbf,
                0x006700ed, 0x0043feb6, 0xfe460047, 0x02db0258,
                0xfe35fb1b, 0xfddc0473, 0x06c90082, 0xf811f85e,
                0x02c90b66, 0x069ff8ad, 0xf250fc8d, 0x0c140dcf,
                0xfe93f04d, 0xf4a106d9, 0x110d0000,
        }, {        // 9.3 MHz
                0x0000fffd, 0xfffc000c, 0x00200006, 0xffb4ff9c,
                0x002f00ef, 0x00a4ff10, 0xfe0dff92, 0x028102f7,
                0xff36fb37, 0xfcbf035e, 0x07260202, 0xf8e8f778,
                0x01340b0d, 0x07e1f9f4, 0xf223fb51, 0x0b590e42,
                0xff64f083, 0xf45206a7, 0x110d0000,
        }, {        // 9.4 MHz
                0x0000fffd, 0xfff90005, 0x0022001a, 0xffc9ff86,
                0xfff000d7, 0x00f2ff82, 0xfe01fee5, 0x01f60362,
                0x0044fb99, 0xfbcc0222, 0x07380370, 0xf9f7f6cc,
                0xff990a7e, 0x0902fb50, 0xf21afa1f, 0x0a8d0ea6,
                0x0034f0bf, 0xf4050675, 0x110d0000,
        }, {        // 9.5 MHz
                0x0000fffe, 0xfff8fffe, 0x001e002b, 0xffe5ff81,
                0xffb400a5, 0x01280000, 0xfe24fe50, 0x01460390,
                0x014dfc3a, 0xfb1000ce, 0x070104bf, 0xfb37f65f,
                0xfe0009bc, 0x0a00fcbb, 0xf235f8f8, 0x09b20efc,
                0x0105f101, 0xf3ba0642, 0x110d0000,
        }, {        // 9.6 MHz
                0x0001ffff, 0xfff8fff7, 0x00150036, 0x0005ff8c,
                0xff810061, 0x013d007e, 0xfe71fddf, 0x007c0380,
                0x0241fd13, 0xfa94ff70, 0x068005e2, 0xfc9bf633,
                0xfc7308ca, 0x0ad5fe30, 0xf274f7e0, 0x08c90f43,
                0x01d4f147, 0xf371060f, 0x110d0000,
        }, {        // 9.7 MHz
                0x00010001, 0xfff9fff1, 0x00090038, 0x0025ffa7,
                0xff5e0012, 0x013200f0, 0xfee3fd9b, 0xffaa0331,
                0x0311fe15, 0xfa60fe18, 0x05bd06d1, 0xfe1bf64a,
                0xfafa07ae, 0x0b7effab, 0xf2d5f6d7, 0x07d30f7a,
                0x02a3f194, 0xf32905dc, 0x110d0000,
        }, {        // 9.8 MHz
                0x00010002, 0xfffcffee, 0xfffb0032, 0x003fffcd,
                0xff4effc1, 0x0106014a, 0xff6efd8a, 0xfedd02aa,
                0x03b0ff34, 0xfa74fcd7, 0x04bf0781, 0xffaaf6a3,
                0xf99e066b, 0x0bf90128, 0xf359f5e1, 0x06d20fa2,
                0x0370f1e5, 0xf2e405a8, 0x110d0000,
        }, {        // 9.9 MHz
                0x00000003, 0xffffffee, 0xffef0024, 0x0051fffa,
                0xff54ff77, 0x00be0184, 0x0006fdad, 0xfe2701f3,
                0x0413005e, 0xfad1fbba, 0x039007ee, 0x013bf73d,
                0xf868050a, 0x0c4302a1, 0xf3fdf4fe, 0x05c70fba,
                0x043bf23c, 0xf2a10575, 0x110d0000,
        }, {        // 10.0 MHz
                0x00000003, 0x0003fff1, 0xffe50011, 0x00570027,
                0xff70ff3c, 0x00620198, 0x009efe01, 0xfd95011a,
                0x04350183, 0xfb71fad0, 0x023c0812, 0x02c3f811,
                0xf75e0390, 0x0c5c0411, 0xf4c1f432, 0x04b30fc1,
                0x0503f297, 0xf2610541, 0x110d0000,
        }, {        // 10.1 MHz
                0x00000003, 0x0006fff7, 0xffdffffc, 0x00510050,
                0xff9dff18, 0xfffc0184, 0x0128fe80, 0xfd32002e,
                0x04130292, 0xfc4dfa21, 0x00d107ee, 0x0435f91c,
                0xf6850205, 0x0c430573, 0xf5a1f37d, 0x03990fba,
                0x05c7f2f8, 0xf222050d, 0x110d0000,
        }, {        // 10.2 MHz
                0x00000002, 0x0008fffe, 0xffdfffe7, 0x003f006e,
                0xffd6ff0f, 0xff96014a, 0x0197ff1f, 0xfd05ff3e,
                0x03b0037c, 0xfd59f9b7, 0xff5d0781, 0x0585fa56,
                0xf5e4006f, 0x0bf906c4, 0xf69df2e0, 0x02790fa2,
                0x0688f35d, 0xf1e604d8, 0x110d0000,
        }, {        // 10.3 MHz
                0xffff0001, 0x00090005, 0xffe4ffd6, 0x0025007e,
                0x0014ff20, 0xff3c00f0, 0x01e1ffd0, 0xfd12fe5c,
                0x03110433, 0xfe88f996, 0xfdf106d1, 0x06aafbb7,
                0xf57efed8, 0x0b7e07ff, 0xf7b0f25e, 0x01560f7a,
                0x0745f3c7, 0xf1ac04a4, 0x110d0000,
        }, {        // 10.4 MHz
                0xffffffff, 0x0008000c, 0xffedffcb, 0x0005007d,
                0x0050ff4c, 0xfef6007e, 0x01ff0086, 0xfd58fd97,
                0x024104ad, 0xffcaf9c0, 0xfc9905e2, 0x079afd35,
                0xf555fd46, 0x0ad50920, 0xf8d9f1f6, 0x00310f43,
                0x07fdf435, 0xf174046f, 0x110d0000,
        }, {        // 10.5 MHz
                0xfffffffe, 0x00050011, 0xfffaffc8, 0xffe5006b,
                0x0082ff8c, 0xfecc0000, 0x01f00130, 0xfdd2fcfc,
                0x014d04e3, 0x010efa32, 0xfb6404bf, 0x084efec5,
                0xf569fbc2, 0x0a000a23, 0xfa15f1ab, 0xff0b0efc,
                0x08b0f4a7, 0xf13f043a, 0x110d0000,
        }, {        // 10.6 MHz
                0x0000fffd, 0x00020012, 0x0007ffcd, 0xffc9004c,
                0x00a4ffd9, 0xfec3ff82, 0x01b401c1, 0xfe76fc97,
                0x004404d2, 0x0245fae8, 0xfa5f0370, 0x08c1005f,
                0xf5bcfa52, 0x09020b04, 0xfb60f17b, 0xfde70ea6,
                0x095df51e, 0xf10c0405, 0x110d0000,
        }, {        // 10.7 MHz
                0x0000fffd, 0xffff0011, 0x0014ffdb, 0xffb40023,
                0x00b2002a, 0xfedbff10, 0x0150022d, 0xff38fc6f,
                0xff36047b, 0x035efbda, 0xf9940202, 0x08ee01f5,
                0xf649f8fe, 0x07e10bc2, 0xfcb6f169, 0xfcc60e42,
                0x0a04f599, 0xf0db03d0, 0x110d0000,
        }, {        // 10.8 MHz
                0x0000fffd, 0xfffb000d, 0x001dffed, 0xffaafff5,
                0x00aa0077, 0xff13feb6, 0x00ce026b, 0x000afc85,
                0xfe3503e3, 0x044cfcfb, 0xf90c0082, 0x08d5037f,
                0xf710f7cc, 0x069f0c59, 0xfe16f173, 0xfbaa0dcf,
                0x0aa5f617, 0xf0ad039b, 0x110d0000,
        }, {        // 10.9 MHz
                0x0000fffe, 0xfff90006, 0x00210003, 0xffacffc8,
                0x008e00b6, 0xff63fe7c, 0x003a0275, 0x00dafcda,
                0xfd510313, 0x0501fe40, 0xf8cbfefd, 0x087604f0,
                0xf80af6c2, 0x05430cc8, 0xff7af19a, 0xfa940d4e,
                0x0b3ff699, 0xf0810365, 0x110d0000,
        }, {        // 11.0 MHz
                0x0001ffff, 0xfff8ffff, 0x00210018, 0xffbaffa3,
                0x006000e1, 0xffc4fe68, 0xffa0024b, 0x019afd66,
                0xfc990216, 0x0575ff99, 0xf8d4fd81, 0x07d40640,
                0xf932f5e6, 0x03d20d0d, 0x00dff1de, 0xf9860cbf,
                0x0bd1f71e, 0xf058032f, 0x110d0000,
        }, {        // 11.1 MHz
                0x00010000, 0xfff8fff8, 0x001b0029, 0xffd1ff8a,
                0x002600f2, 0x002cfe7c, 0xff0f01f0, 0x023bfe20,
                0xfc1700fa, 0x05a200f7, 0xf927fc1c, 0x06f40765,
                0xfa82f53b, 0x02510d27, 0x0243f23d, 0xf8810c24,
                0x0c5cf7a7, 0xf03102fa, 0x110d0000,
        }, {        // 11.2 MHz
                0x00010002, 0xfffafff2, 0x00110035, 0xfff0ff81,
                0xffe700e7, 0x008ffeb6, 0xfe94016d, 0x02b0fefb,
                0xfbd3ffd1, 0x05850249, 0xf9c1fadb, 0x05de0858,
                0xfbf2f4c4, 0x00c70d17, 0x03a0f2b8, 0xf7870b7c,
                0x0cdff833, 0xf00d02c4, 0x110d0000,
        }, {        // 11.3 MHz
                0x00000003, 0xfffdffee, 0x00040038, 0x0010ff88,
                0xffac00c2, 0x00e2ff10, 0xfe3900cb, 0x02f1ffe9,
                0xfbd3feaa, 0x05210381, 0xfa9cf9c8, 0x04990912,
                0xfd7af484, 0xff390cdb, 0x04f4f34d, 0xf69a0ac9,
                0x0d5af8c1, 0xefec028e, 0x110d0000,
        }, {        // 11.4 MHz
                0x00000003, 0x0000ffee, 0xfff60033, 0x002fff9f,
                0xff7b0087, 0x011eff82, 0xfe080018, 0x02f900d8,
                0xfc17fd96, 0x04790490, 0xfbadf8ed, 0x032f098e,
                0xff10f47d, 0xfdaf0c75, 0x063cf3fc, 0xf5ba0a0b,
                0x0dccf952, 0xefcd0258, 0x110d0000,
        }, {        // 11.5 MHz
                0x00000003, 0x0004fff1, 0xffea0026, 0x0046ffc3,
                0xff5a003c, 0x013b0000, 0xfe04ff63, 0x02c801b8,
                0xfc99fca6, 0x0397056a, 0xfcecf853, 0x01ad09c9,
                0x00acf4ad, 0xfc2e0be7, 0x0773f4c2, 0xf4e90943,
                0x0e35f9e6, 0xefb10221, 0x110d0000,
        }, {        // 11.6 MHz
                0x00000002, 0x0007fff6, 0xffe20014, 0x0054ffee,
                0xff4effeb, 0x0137007e, 0xfe2efebb, 0x0260027a,
                0xfd51fbe6, 0x02870605, 0xfe4af7fe, 0x001d09c1,
                0x0243f515, 0xfabd0b32, 0x0897f59e, 0xf4280871,
                0x0e95fa7c, 0xef9701eb, 0x110d0000,
        }, {        // 11.7 MHz
                0xffff0001, 0x0008fffd, 0xffdeffff, 0x0056001d,
                0xff57ff9c, 0x011300f0, 0xfe82fe2e, 0x01ca0310,
                0xfe35fb62, 0x0155065a, 0xffbaf7f2, 0xfe8c0977,
                0x03cef5b2, 0xf9610a58, 0x09a5f68f, 0xf3790797,
                0x0eebfb14, 0xef8001b5, 0x110d0000,
        }, {        // 11.8 MHz
                0xffff0000, 0x00080004, 0xffe0ffe9, 0x004c0047,
                0xff75ff58, 0x00d1014a, 0xfef9fdc8, 0x0111036f,
                0xff36fb21, 0x00120665, 0x012df82e, 0xfd0708ec,
                0x0542f682, 0xf81f095c, 0x0a9af792, 0xf2db06b5,
                0x0f38fbad, 0xef6c017e, 0x110d0000,
        }, {        // 11.9 MHz
                0xffffffff, 0x0007000b, 0xffe7ffd8, 0x00370068,
                0xffa4ff28, 0x00790184, 0xff87fd91, 0x00430392,
                0x0044fb26, 0xfece0626, 0x0294f8b2, 0xfb990825,
                0x0698f77f, 0xf6fe0842, 0x0b73f8a7, 0xf25105cd,
                0x0f7bfc48, 0xef5a0148, 0x110d0000,
        }, {        // 12.0 MHz
                0x0000fffe, 0x00050010, 0xfff2ffcc, 0x001b007b,
                0xffdfff10, 0x00140198, 0x0020fd8e, 0xff710375,
                0x014dfb73, 0xfd9a059f, 0x03e0f978, 0xfa4e0726,
                0x07c8f8a7, 0xf600070c, 0x0c2ff9c9, 0xf1db04de,
                0x0fb4fce5, 0xef4b0111, 0x110d0000,
        }, {        // 12.1 MHz
                0x0000fffd, 0x00010012, 0xffffffc8, 0xfffb007e,
                0x001dff14, 0xffad0184, 0x00b7fdbe, 0xfea9031b,
                0x0241fc01, 0xfc8504d6, 0x0504fa79, 0xf93005f6,
                0x08caf9f2, 0xf52b05c0, 0x0ccbfaf9, 0xf17903eb,
                0x0fe3fd83, 0xef3f00db, 0x110d0000,
        }, {        // 12.2 MHz
                0x0000fffd, 0xfffe0011, 0x000cffcc, 0xffdb0071,
                0x0058ff32, 0xff4f014a, 0x013cfe1f, 0xfdfb028a,
                0x0311fcc9, 0xfb9d03d6, 0x05f4fbad, 0xf848049d,
                0x0999fb5b, 0xf4820461, 0x0d46fc32, 0xf12d02f4,
                0x1007fe21, 0xef3600a4, 0x110d0000,
        }, {        // 12.3 MHz
                0x0000fffe, 0xfffa000e, 0x0017ffd9, 0xffc10055,
                0x0088ff68, 0xff0400f0, 0x01a6fea7, 0xfd7501cc,
                0x03b0fdc0, 0xfaef02a8, 0x06a7fd07, 0xf79d0326,
                0x0a31fcda, 0xf40702f3, 0x0d9ffd72, 0xf0f601fa,
                0x1021fec0, 0xef2f006d, 0x110d0000,
        }, {        // 12.4 MHz
                0x0001ffff, 0xfff80007, 0x001fffeb, 0xffaf002d,
                0x00a8ffb0, 0xfed3007e, 0x01e9ff4c, 0xfd2000ee,
                0x0413fed8, 0xfa82015c, 0x0715fe7d, 0xf7340198,
                0x0a8dfe69, 0xf3bd017c, 0x0dd5feb8, 0xf0d500fd,
                0x1031ff60, 0xef2b0037, 0x110d0000,
        }, {        // 12.5 MHz
                0x00010000, 0xfff70000, 0x00220000, 0xffa90000,
                0x00b30000, 0xfec20000, 0x02000000, 0xfd030000,
                0x04350000, 0xfa5e0000, 0x073b0000, 0xf7110000,
                0x0aac0000, 0xf3a40000, 0x0de70000, 0xf0c90000,
                0x10360000, 0xef290000, 0x110d0000,
        }, {        // 12.6 MHz
                0x00010001, 0xfff8fff9, 0x001f0015, 0xffafffd3,
                0x00a80050, 0xfed3ff82, 0x01e900b4, 0xfd20ff12,
                0x04130128, 0xfa82fea4, 0x07150183, 0xf734fe68,
                0x0a8d0197, 0xf3bdfe84, 0x0dd50148, 0xf0d5ff03,
                0x103100a0, 0xef2bffc9, 0x110d0000,
        }, {        // 12.7 MHz
                0x00000002, 0xfffafff2, 0x00170027, 0xffc1ffab,
                0x00880098, 0xff04ff10, 0x01a60159, 0xfd75fe34,
                0x03b00240, 0xfaeffd58, 0x06a702f9, 0xf79dfcda,
                0x0a310326, 0xf407fd0d, 0x0d9f028e, 0xf0f6fe06,
                0x10210140, 0xef2fff93, 0x110d0000,
        }, {        // 12.8 MHz
                0x00000003, 0xfffeffef, 0x000c0034, 0xffdbff8f,
                0x005800ce, 0xff4ffeb6, 0x013c01e1, 0xfdfbfd76,
                0x03110337, 0xfb9dfc2a, 0x05f40453, 0xf848fb63,
                0x099904a5, 0xf482fb9f, 0x0d4603ce, 0xf12dfd0c,
                0x100701df, 0xef36ff5c, 0x110d0000,
        }, {        // 12.9 MHz
                0x00000003, 0x0001ffee, 0xffff0038, 0xfffbff82,
                0x001d00ec, 0xffadfe7c, 0x00b70242, 0xfea9fce5,
                0x024103ff, 0xfc85fb2a, 0x05040587, 0xf930fa0a,
                0x08ca060e, 0xf52bfa40, 0x0ccb0507, 0xf179fc15,
                0x0fe3027d, 0xef3fff25, 0x110d0000,
        }, {        // 13.0 MHz
                0x00000002, 0x0005fff0, 0xfff20034, 0x001bff85,
                0xffdf00f0, 0x0014fe68, 0x00200272, 0xff71fc8b,
                0x014d048d, 0xfd9afa61, 0x03e00688, 0xfa4ef8da,
                0x07c80759, 0xf600f8f4, 0x0c2f0637, 0xf1dbfb22,
                0x0fb4031b, 0xef4bfeef, 0x110d0000,
        }, {        // 13.1 MHz
                0xffff0001, 0x0007fff5, 0xffe70028, 0x0037ff98,
                0xffa400d8, 0x0079fe7c, 0xff87026f, 0x0043fc6e,
                0x004404da, 0xfecef9da, 0x0294074e, 0xfb99f7db,
                0x06980881, 0xf6fef7be, 0x0b730759, 0xf251fa33,
                0x0f7b03b8, 0xef5afeb8, 0x110d0000,
        }, {        // 13.2 MHz
                0xffff0000, 0x0008fffc, 0xffe00017, 0x004cffb9,
                0xff7500a8, 0x00d1feb6, 0xfef90238, 0x0111fc91,
                0xff3604df, 0x0012f99b, 0x012d07d2, 0xfd07f714,
                0x0542097e, 0xf81ff6a4, 0x0a9a086e, 0xf2dbf94b,
                0x0f380453, 0xef6cfe82, 0x110d0000,
        }, {        // 13.3 MHz
                0xffffffff, 0x00080003, 0xffde0001, 0x0056ffe3,
                0xff570064, 0x0113ff10, 0xfe8201d2, 0x01cafcf0,
                0xfe35049e, 0x0155f9a6, 0xffba080e, 0xfe8cf689,
                0x03ce0a4e, 0xf961f5a8, 0x09a50971, 0xf379f869,
                0x0eeb04ec, 0xef80fe4b, 0x110d0000,
        }, {        // 13.4 MHz
                0x0000fffe, 0x0007000a, 0xffe2ffec, 0x00540012,
                0xff4e0015, 0x0137ff82, 0xfe2e0145, 0x0260fd86,
                0xfd51041a, 0x0287f9fb, 0xfe4a0802, 0x001df63f,
                0x02430aeb, 0xfabdf4ce, 0x08970a62, 0xf428f78f,
                0x0e950584, 0xef97fe15, 0x110d0000,
        }, {        // 13.5 MHz
                0x0000fffd, 0x0004000f, 0xffeaffda, 0x0046003d,
                0xff5affc4, 0x013b0000, 0xfe04009d, 0x02c8fe48,
                0xfc99035a, 0x0397fa96, 0xfcec07ad, 0x01adf637,
                0x00ac0b53, 0xfc2ef419, 0x07730b3e, 0xf4e9f6bd,
                0x0e35061a, 0xefb1fddf, 0x110d0000,
        }, {        // 13.6 MHz
                0x0000fffd, 0x00000012, 0xfff6ffcd, 0x002f0061,
                0xff7bff79, 0x011e007e, 0xfe08ffe8, 0x02f9ff28,
                0xfc17026a, 0x0479fb70, 0xfbad0713, 0x032ff672,
                0xff100b83, 0xfdaff38b, 0x063c0c04, 0xf5baf5f5,
                0x0dcc06ae, 0xefcdfda8, 0x110d0000,
        }, {        // 13.7 MHz
                0x0000fffd, 0xfffd0012, 0x0004ffc8, 0x00100078,
                0xffacff3e, 0x00e200f0, 0xfe39ff35, 0x02f10017,
                0xfbd30156, 0x0521fc7f, 0xfa9c0638, 0x0499f6ee,
                0xfd7a0b7c, 0xff39f325, 0x04f40cb3, 0xf69af537,
                0x0d5a073f, 0xefecfd72, 0x110d0000,
        }, {        // 13.8 MHz
                0x0001fffe, 0xfffa000e, 0x0011ffcb, 0xfff0007f,
                0xffe7ff19, 0x008f014a, 0xfe94fe93, 0x02b00105,
                0xfbd3002f, 0x0585fdb7, 0xf9c10525, 0x05def7a8,
                0xfbf20b3c, 0x00c7f2e9, 0x03a00d48, 0xf787f484,
                0x0cdf07cd, 0xf00dfd3c, 0x110d0000,
        }, {        // 13.9 MHz
                0x00010000, 0xfff80008, 0x001bffd7, 0xffd10076,
                0x0026ff0e, 0x002c0184, 0xff0ffe10, 0x023b01e0,
                0xfc17ff06, 0x05a2ff09, 0xf92703e4, 0x06f4f89b,
                0xfa820ac5, 0x0251f2d9, 0x02430dc3, 0xf881f3dc,
                0x0c5c0859, 0xf031fd06, 0x110d0000,
        }, {        // 14.0 MHz
                0x00010001, 0xfff80001, 0x0021ffe8, 0xffba005d,
                0x0060ff1f, 0xffc40198, 0xffa0fdb5, 0x019a029a,
                0xfc99fdea, 0x05750067, 0xf8d4027f, 0x07d4f9c0,
                0xf9320a1a, 0x03d2f2f3, 0x00df0e22, 0xf986f341,
                0x0bd108e2, 0xf058fcd1, 0x110d0000,
        }, {        // 14.1 MHz
                0x00000002, 0xfff9fffa, 0x0021fffd, 0xffac0038,
                0x008eff4a, 0xff630184, 0x003afd8b, 0x00da0326,
                0xfd51fced, 0x050101c0, 0xf8cb0103, 0x0876fb10,
                0xf80a093e, 0x0543f338, 0xff7a0e66, 0xfa94f2b2,
                0x0b3f0967, 0xf081fc9b, 0x110d0000,
        }, {        // 14.2 MHz
                0x00000003, 0xfffbfff3, 0x001d0013, 0xffaa000b,
                0x00aaff89, 0xff13014a, 0x00cefd95, 0x000a037b,
                0xfe35fc1d, 0x044c0305, 0xf90cff7e, 0x08d5fc81,
                0xf7100834, 0x069ff3a7, 0xfe160e8d, 0xfbaaf231,
                0x0aa509e9, 0xf0adfc65, 0x110d0000,
        }, {        // 14.3 MHz
                0x00000003, 0xffffffef, 0x00140025, 0xffb4ffdd,
                0x00b2ffd6, 0xfedb00f0, 0x0150fdd3, 0xff380391,
                0xff36fb85, 0x035e0426, 0xf994fdfe, 0x08eefe0b,
                0xf6490702, 0x07e1f43e, 0xfcb60e97, 0xfcc6f1be,
                0x0a040a67, 0xf0dbfc30, 0x110d0000,
        }, {        // 14.4 MHz
                0x00000003, 0x0002ffee, 0x00070033, 0xffc9ffb4,
                0x00a40027, 0xfec3007e, 0x01b4fe3f, 0xfe760369,
                0x0044fb2e, 0x02450518, 0xfa5ffc90, 0x08c1ffa1,
                0xf5bc05ae, 0x0902f4fc, 0xfb600e85, 0xfde7f15a,
                0x095d0ae2, 0xf10cfbfb, 0x110d0000,
        }, {        // 14.5 MHz
                0xffff0002, 0x0005ffef, 0xfffa0038, 0xffe5ff95,
                0x00820074, 0xfecc0000, 0x01f0fed0, 0xfdd20304,
                0x014dfb1d, 0x010e05ce, 0xfb64fb41, 0x084e013b,
                0xf569043e, 0x0a00f5dd, 0xfa150e55, 0xff0bf104,
                0x08b00b59, 0xf13ffbc6, 0x110d0000,
        }, {        // 14.6 MHz
                0xffff0001, 0x0008fff4, 0xffed0035, 0x0005ff83,
                0x005000b4, 0xfef6ff82, 0x01ffff7a, 0xfd580269,
                0x0241fb53, 0xffca0640, 0xfc99fa1e, 0x079a02cb,
                0xf55502ba, 0x0ad5f6e0, 0xf8d90e0a, 0x0031f0bd,
                0x07fd0bcb, 0xf174fb91, 0x110d0000,
        }, {        // 14.7 MHz
                0xffffffff, 0x0009fffb, 0xffe4002a, 0x0025ff82,
                0x001400e0, 0xff3cff10, 0x01e10030, 0xfd1201a4,
                0x0311fbcd, 0xfe88066a, 0xfdf1f92f, 0x06aa0449,
                0xf57e0128, 0x0b7ef801, 0xf7b00da2, 0x0156f086,
                0x07450c39, 0xf1acfb5c, 0x110d0000,
        }, {        // 14.8 MHz
                0x0000fffe, 0x00080002, 0xffdf0019, 0x003fff92,
                0xffd600f1, 0xff96feb6, 0x019700e1, 0xfd0500c2,
                0x03b0fc84, 0xfd590649, 0xff5df87f, 0x058505aa,
                0xf5e4ff91, 0x0bf9f93c, 0xf69d0d20, 0x0279f05e,
                0x06880ca3, 0xf1e6fb28, 0x110d0000,
        }, {        // 14.9 MHz
                0x0000fffd, 0x00060009, 0xffdf0004, 0x0051ffb0,
                0xff9d00e8, 0xfffcfe7c, 0x01280180, 0xfd32ffd2,
                0x0413fd6e, 0xfc4d05df, 0x00d1f812, 0x043506e4,
                0xf685fdfb, 0x0c43fa8d, 0xf5a10c83, 0x0399f046,
                0x05c70d08, 0xf222faf3, 0x110d0000,
        }, {        // 15.0 MHz
                0x0000fffd, 0x0003000f, 0xffe5ffef, 0x0057ffd9,
                0xff7000c4, 0x0062fe68, 0x009e01ff, 0xfd95fee6,
                0x0435fe7d, 0xfb710530, 0x023cf7ee, 0x02c307ef,
                0xf75efc70, 0x0c5cfbef, 0xf4c10bce, 0x04b3f03f,
                0x05030d69, 0xf261fabf, 0x110d0000,
        }, {        // 15.1 MHz
                0x0000fffd, 0xffff0012, 0xffefffdc, 0x00510006,
                0xff540089, 0x00befe7c, 0x00060253, 0xfe27fe0d,
                0x0413ffa2, 0xfad10446, 0x0390f812, 0x013b08c3,
                0xf868faf6, 0x0c43fd5f, 0xf3fd0b02, 0x05c7f046,
                0x043b0dc4, 0xf2a1fa8b, 0x110d0000,
        }, {        // 15.2 MHz
                0x0001fffe, 0xfffc0012, 0xfffbffce, 0x003f0033,
                0xff4e003f, 0x0106feb6, 0xff6e0276, 0xfeddfd56,
                0x03b000cc, 0xfa740329, 0x04bff87f, 0xffaa095d,
                0xf99ef995, 0x0bf9fed8, 0xf3590a1f, 0x06d2f05e,
                0x03700e1b, 0xf2e4fa58, 0x110d0000,
        }, {        // 15.3 MHz
                0x0001ffff, 0xfff9000f, 0x0009ffc8, 0x00250059,
                0xff5effee, 0x0132ff10, 0xfee30265, 0xffaafccf,
                0x031101eb, 0xfa6001e8, 0x05bdf92f, 0xfe1b09b6,
                0xfafaf852, 0x0b7e0055, 0xf2d50929, 0x07d3f086,
                0x02a30e6c, 0xf329fa24, 0x110d0000,
        }, {        // 15.4 MHz
                0x00010001, 0xfff80009, 0x0015ffca, 0x00050074,
                0xff81ff9f, 0x013dff82, 0xfe710221, 0x007cfc80,
                0x024102ed, 0xfa940090, 0x0680fa1e, 0xfc9b09cd,
                0xfc73f736, 0x0ad501d0, 0xf2740820, 0x08c9f0bd,
                0x01d40eb9, 0xf371f9f1, 0x110d0000,
        }, {        // 15.5 MHz
                0x00000002, 0xfff80002, 0x001effd5, 0xffe5007f,
                0xffb4ff5b, 0x01280000, 0xfe2401b0, 0x0146fc70,
                0x014d03c6, 0xfb10ff32, 0x0701fb41, 0xfb3709a1,
                0xfe00f644, 0x0a000345, 0xf2350708, 0x09b2f104,
                0x01050eff, 0xf3baf9be, 0x110d0000,
        }, {        // 15.6 MHz
                0x00000003, 0xfff9fffb, 0x0022ffe6, 0xffc9007a,
                0xfff0ff29, 0x00f2007e, 0xfe01011b, 0x01f6fc9e,
                0x00440467, 0xfbccfdde, 0x0738fc90, 0xf9f70934,
                0xff99f582, 0x090204b0, 0xf21a05e1, 0x0a8df15a,
                0x00340f41, 0xf405f98b, 0x110d0000,
        }, {        // 15.7 MHz
                0x00000003, 0xfffcfff4, 0x0020fffa, 0xffb40064,
                0x002fff11, 0x00a400f0, 0xfe0d006e, 0x0281fd09,
                0xff3604c9, 0xfcbffca2, 0x0726fdfe, 0xf8e80888,
                0x0134f4f3, 0x07e1060c, 0xf22304af, 0x0b59f1be,
                0xff640f7d, 0xf452f959, 0x110d0000,
        }, {        // 15.8 MHz
                0x00000003, 0x0000fff0, 0x001a0010, 0xffaa0041,
                0x0067ff13, 0x0043014a, 0xfe46ffb9, 0x02dbfda8,
                0xfe3504e5, 0xfddcfb8d, 0x06c9ff7e, 0xf81107a2,
                0x02c9f49a, 0x069f0753, 0xf2500373, 0x0c14f231,
                0xfe930fb3, 0xf4a1f927, 0x110d0000,
        }, {        // 15.9 MHz
                0xffff0002, 0x0003ffee, 0x000f0023, 0xffac0016,
                0x0093ff31, 0xffdc0184, 0xfea6ff09, 0x02fdfe70,
                0xfd5104ba, 0xff15faac, 0x06270103, 0xf7780688,
                0x044df479, 0x05430883, 0xf2a00231, 0x0cbef2b2,
                0xfdc40fe3, 0xf4f2f8f5, 0x110d0000,
        }, {        // 16.0 MHz
                0xffff0001, 0x0006ffef, 0x00020031, 0xffbaffe8,
                0x00adff66, 0xff790198, 0xff26fe6e, 0x02e5ff55,
                0xfc99044a, 0x005bfa09, 0x0545027f, 0xf7230541,
                0x05b8f490, 0x03d20997, 0xf31300eb, 0x0d55f341,
                0xfcf6100e, 0xf544f8c3, 0x110d0000,
        }
};

static void cx23885_dif_setup(struct i2c_client *client, u32 ifHz)
{
        u64 pll_freq;
        u32 pll_freq_word;
        const u32 *coeffs;

        v4l_dbg(1, cx25840_debug, client, "%s(%d)\n", __func__, ifHz);

        /* Assuming TV */
        /* Calculate the PLL frequency word based on the adjusted ifHz */
        pll_freq = div_u64((u64)ifHz * 268435456, 50000000);
        pll_freq_word = (u32)pll_freq;

        cx25840_write4(client, DIF_PLL_FREQ_WORD,  pll_freq_word);

        /* Round down to the nearest 100KHz */
        ifHz = (ifHz / 100000) * 100000;

        if (ifHz < 3000000)
                ifHz = 3000000;

        if (ifHz > 16000000)
                ifHz = 16000000;

        v4l_dbg(1, cx25840_debug, client, "%s(%d) again\n", __func__, ifHz);

        coeffs = ifhz_coeffs[(ifHz - 3000000) / 100000];
        cx25840_write4(client, DIF_BPF_COEFF01, coeffs[0]);
        cx25840_write4(client, DIF_BPF_COEFF23, coeffs[1]);
        cx25840_write4(client, DIF_BPF_COEFF45, coeffs[2]);
        cx25840_write4(client, DIF_BPF_COEFF67, coeffs[3]);
        cx25840_write4(client, DIF_BPF_COEFF89, coeffs[4]);
        cx25840_write4(client, DIF_BPF_COEFF1011, coeffs[5]);
        cx25840_write4(client, DIF_BPF_COEFF1213, coeffs[6]);
        cx25840_write4(client, DIF_BPF_COEFF1415, coeffs[7]);
        cx25840_write4(client, DIF_BPF_COEFF1617, coeffs[8]);
        cx25840_write4(client, DIF_BPF_COEFF1819, coeffs[9]);
        cx25840_write4(client, DIF_BPF_COEFF2021, coeffs[10]);
        cx25840_write4(client, DIF_BPF_COEFF2223, coeffs[11]);
        cx25840_write4(client, DIF_BPF_COEFF2425, coeffs[12]);
        cx25840_write4(client, DIF_BPF_COEFF2627, coeffs[13]);
        cx25840_write4(client, DIF_BPF_COEFF2829, coeffs[14]);
        cx25840_write4(client, DIF_BPF_COEFF3031, coeffs[15]);
        cx25840_write4(client, DIF_BPF_COEFF3233, coeffs[16]);
        cx25840_write4(client, DIF_BPF_COEFF3435, coeffs[17]);
        cx25840_write4(client, DIF_BPF_COEFF36, coeffs[18]);
}

static void cx23888_std_setup(struct i2c_client *client)
{
        struct cx25840_state *state = to_state(i2c_get_clientdata(client));
        v4l2_std_id std = state->std;
        u32 ifHz;

        cx25840_write4(client, 0x478, 0x6628021F);
        cx25840_write4(client, 0x400, 0x0);
        cx25840_write4(client, 0x4b4, 0x20524030);
        cx25840_write4(client, 0x47c, 0x010a8263);

        if (std & V4L2_STD_525_60) {
                v4l_dbg(1, cx25840_debug, client, "%s() Selecting NTSC",
                        __func__);

                /* Horiz / vert timing */
                cx25840_write4(client, 0x428, 0x1e1e601a);
                cx25840_write4(client, 0x424, 0x5b2d007a);

                /* DIF NTSC */
                cx25840_write4(client, 0x304, 0x6503bc0c);
                cx25840_write4(client, 0x308, 0xbd038c85);
                cx25840_write4(client, 0x30c, 0x1db4640a);
                cx25840_write4(client, 0x310, 0x00008800);
                cx25840_write4(client, 0x314, 0x44400400);
                cx25840_write4(client, 0x32c, 0x0c800800);
                cx25840_write4(client, 0x330, 0x27000100);
                cx25840_write4(client, 0x334, 0x1f296e1f);
                cx25840_write4(client, 0x338, 0x009f50c1);
                cx25840_write4(client, 0x340, 0x1befbf06);
                cx25840_write4(client, 0x344, 0x000035e8);

                /* DIF I/F */
                ifHz = 5400000;

        } else {
                v4l_dbg(1, cx25840_debug, client, "%s() Selecting PAL-BG",
                        __func__);

                /* Horiz / vert timing */
                cx25840_write4(client, 0x428, 0x28244024);
                cx25840_write4(client, 0x424, 0x5d2d0084);

                /* DIF */
                cx25840_write4(client, 0x304, 0x6503bc0c);
                cx25840_write4(client, 0x308, 0xbd038c85);
                cx25840_write4(client, 0x30c, 0x1db4640a);
                cx25840_write4(client, 0x310, 0x00008800);
                cx25840_write4(client, 0x314, 0x44400600);
                cx25840_write4(client, 0x32c, 0x0c800800);
                cx25840_write4(client, 0x330, 0x27000100);
                cx25840_write4(client, 0x334, 0x213530ec);
                cx25840_write4(client, 0x338, 0x00a65ba8);
                cx25840_write4(client, 0x340, 0x1befbf06);
                cx25840_write4(client, 0x344, 0x000035e8);

                /* DIF I/F */
                ifHz = 6000000;
        }

        cx23885_dif_setup(client, ifHz);

        /* Explicitly ensure the inputs are reconfigured after
         * a standard change.
         */
        set_input(client, state->vid_input, state->aud_input);
}

/* ----------------------------------------------------------------------- */

static const struct v4l2_ctrl_ops cx25840_ctrl_ops = {
        .s_ctrl = cx25840_s_ctrl,
};

static const struct v4l2_subdev_core_ops cx25840_core_ops = {
        .log_status = cx25840_log_status,
        .reset = cx25840_reset,
        /* calling the (optional) init op will turn on the generic mode */
        .init = cx25840_init,
        .load_fw = cx25840_load_fw,
        .s_io_pin_config = common_s_io_pin_config,
#ifdef CONFIG_VIDEO_ADV_DEBUG
        .g_register = cx25840_g_register,
        .s_register = cx25840_s_register,
#endif
        .interrupt_service_routine = cx25840_irq_handler,
};

static const struct v4l2_subdev_tuner_ops cx25840_tuner_ops = {
        .s_frequency = cx25840_s_frequency,
        .s_radio = cx25840_s_radio,
        .g_tuner = cx25840_g_tuner,
        .s_tuner = cx25840_s_tuner,
};

static const struct v4l2_subdev_audio_ops cx25840_audio_ops = {
        .s_clock_freq = cx25840_s_clock_freq,
        .s_routing = cx25840_s_audio_routing,
        .s_stream = cx25840_s_audio_stream,
};

static const struct v4l2_subdev_video_ops cx25840_video_ops = {
        .g_std = cx25840_g_std,
        .s_std = cx25840_s_std,
        .querystd = cx25840_querystd,
        .s_routing = cx25840_s_video_routing,
        .s_stream = cx25840_s_stream,
        .g_input_status = cx25840_g_input_status,
};

static const struct v4l2_subdev_vbi_ops cx25840_vbi_ops = {
        .decode_vbi_line = cx25840_decode_vbi_line,
        .s_raw_fmt = cx25840_s_raw_fmt,
        .s_sliced_fmt = cx25840_s_sliced_fmt,
        .g_sliced_fmt = cx25840_g_sliced_fmt,
};

static const struct v4l2_subdev_pad_ops cx25840_pad_ops = {
        .set_fmt = cx25840_set_fmt,
};

static const struct v4l2_subdev_ops cx25840_ops = {
        .core = &cx25840_core_ops,
        .tuner = &cx25840_tuner_ops,
        .audio = &cx25840_audio_ops,
        .video = &cx25840_video_ops,
        .vbi = &cx25840_vbi_ops,
        .pad = &cx25840_pad_ops,
        .ir = &cx25840_ir_ops,
};

/* ----------------------------------------------------------------------- */

static u32 get_cx2388x_ident(struct i2c_client *client)
{
        u32 ret;

        /* Come out of digital power down */
        cx25840_write(client, 0x000, 0);

        /*
         * Detecting whether the part is cx23885/7/8 is more
         * difficult than it needs to be. No ID register. Instead we
         * probe certain registers indicated in the datasheets to look
         * for specific defaults that differ between the silicon designs.
         */

        /* It's either 885/7 if the IR Tx Clk Divider register exists */
        if (cx25840_read4(client, 0x204) & 0xffff) {
                /*
                 * CX23885 returns bogus repetitive byte values for the DIF,
                 * which doesn't exist for it. (Ex. 8a8a8a8a or 31313131)
                 */
                ret = cx25840_read4(client, 0x300);
                if (((ret & 0xffff0000) >> 16) == (ret & 0xffff)) {
                        /* No DIF */
                        ret = CX23885_AV;
                } else {
                        /*
                         * CX23887 has a broken DIF, but the registers
                         * appear valid (but unused), good enough to detect.
                         */
                        ret = CX23887_AV;
                }
        } else if (cx25840_read4(client, 0x300) & 0x0fffffff) {
                /* DIF PLL Freq Word reg exists; chip must be a CX23888 */
                ret = CX23888_AV;
        } else {
                v4l_err(client, "Unable to detect h/w, assuming cx23887\n");
                ret = CX23887_AV;
        }

        /* Back into digital power down */
        cx25840_write(client, 0x000, 2);
        return ret;
}

static int cx25840_probe(struct i2c_client *client)
{
        struct cx25840_state *state;
        struct v4l2_subdev *sd;
        int default_volume;
        u32 id;
        u16 device_id;
#if defined(CONFIG_MEDIA_CONTROLLER)
        int ret;
#endif

        /* Check if the adapter supports the needed features */
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
                return -EIO;

        v4l_dbg(1, cx25840_debug, client,
                "detecting cx25840 client on address 0x%x\n",
                client->addr << 1);

        device_id = cx25840_read(client, 0x101) << 8;
        device_id |= cx25840_read(client, 0x100);
        v4l_dbg(1, cx25840_debug, client, "device_id = 0x%04x\n", device_id);

        /*
         * The high byte of the device ID should be
         * 0x83 for the cx2583x and 0x84 for the cx2584x
         */
        if ((device_id & 0xff00) == 0x8300) {
                id = CX25836 + ((device_id >> 4) & 0xf) - 6;
        } else if ((device_id & 0xff00) == 0x8400) {
                id = CX25840 + ((device_id >> 4) & 0xf);
        } else if (device_id == 0x0000) {
                id = get_cx2388x_ident(client);
        } else if ((device_id & 0xfff0) == 0x5A30) {
                /* The CX23100 (0x5A3C = 23100) doesn't have an A/V decoder */
                id = CX2310X_AV;
        } else if ((device_id & 0xff) == (device_id >> 8)) {
                v4l_err(client,
                        "likely a confused/unresponsive cx2388[578] A/V decoder found @ 0x%x (%s)\n",
                        client->addr << 1, client->adapter->name);
                v4l_err(client,
                        "A method to reset it from the cx25840 driver software is not known at this time\n");
                return -ENODEV;
        } else {
                v4l_dbg(1, cx25840_debug, client, "cx25840 not found\n");
                return -ENODEV;
        }

        state = devm_kzalloc(&client->dev, sizeof(*state), GFP_KERNEL);
        if (!state)
                return -ENOMEM;

        sd = &state->sd;
        v4l2_i2c_subdev_init(sd, client, &cx25840_ops);
#if defined(CONFIG_MEDIA_CONTROLLER)
        /*
         * TODO: add media controller support for analog video inputs like
         * composite, svideo, etc.
         * A real input pad for this analog demod would be like:
         *                 ___________
         * TUNER --------> |         |
         *                   |         |
         * SVIDEO .......> | cx25840 |
         *                   |         |
         * COMPOSITE1 ...> |_________|
         *
         * However, at least for now, there's no much gain on modelling
         * those extra inputs. So, let's add it only when needed.
         */
        state->pads[CX25840_PAD_INPUT].flags = MEDIA_PAD_FL_SINK;
        state->pads[CX25840_PAD_INPUT].sig_type = PAD_SIGNAL_ANALOG;
        state->pads[CX25840_PAD_VID_OUT].flags = MEDIA_PAD_FL_SOURCE;
        state->pads[CX25840_PAD_VID_OUT].sig_type = PAD_SIGNAL_DV;
        sd->entity.function = MEDIA_ENT_F_ATV_DECODER;

        ret = media_entity_pads_init(&sd->entity, ARRAY_SIZE(state->pads),
                                     state->pads);
        if (ret < 0) {
                v4l_info(client, "failed to initialize media entity!\n");
                return ret;
        }
#endif

        switch (id) {
        case CX23885_AV:
                v4l_info(client, "cx23885 A/V decoder found @ 0x%x (%s)\n",
                         client->addr << 1, client->adapter->name);
                break;
        case CX23887_AV:
                v4l_info(client, "cx23887 A/V decoder found @ 0x%x (%s)\n",
                         client->addr << 1, client->adapter->name);
                break;
        case CX23888_AV:
                v4l_info(client, "cx23888 A/V decoder found @ 0x%x (%s)\n",
                         client->addr << 1, client->adapter->name);
                break;
        case CX2310X_AV:
                v4l_info(client, "cx%d A/V decoder found @ 0x%x (%s)\n",
                         device_id, client->addr << 1, client->adapter->name);
                break;
        case CX25840:
        case CX25841:
        case CX25842:
        case CX25843:
                /*
                 * Note: revision '(device_id & 0x0f) == 2' was never built.
                 * The marking skips from 0x1 == 22 to 0x3 == 23.
                 */
                v4l_info(client, "cx25%3x-2%x found @ 0x%x (%s)\n",
                         (device_id & 0xfff0) >> 4,
                         (device_id & 0x0f) < 3 ? (device_id & 0x0f) + 1
                                                : (device_id & 0x0f),
                         client->addr << 1, client->adapter->name);
                break;
        case CX25836:
        case CX25837:
        default:
                v4l_info(client, "cx25%3x-%x found @ 0x%x (%s)\n",
                         (device_id & 0xfff0) >> 4, device_id & 0x0f,
                         client->addr << 1, client->adapter->name);
                break;
        }

        state->c = client;
        state->vid_input = CX25840_COMPOSITE7;
        state->aud_input = CX25840_AUDIO8;
        state->audclk_freq = 48000;
        state->audmode = V4L2_TUNER_MODE_LANG1;
        state->vbi_line_offset = 8;
        state->id = id;
        state->rev = device_id;
        state->vbi_regs_offset = id == CX23888_AV ? 0x500 - 0x424 : 0;
        state->std = V4L2_STD_NTSC_M;
        v4l2_ctrl_handler_init(&state->hdl, 9);
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                          V4L2_CID_BRIGHTNESS, 0, 255, 1, 128);
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                          V4L2_CID_CONTRAST, 0, 127, 1, 64);
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                          V4L2_CID_SATURATION, 0, 127, 1, 64);
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                          V4L2_CID_HUE, -128, 127, 1, 0);
        if (!is_cx2583x(state)) {
                default_volume = cx25840_read(client, 0x8d4);
                /*
                 * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume
                 * scale mapping limits to avoid -ERANGE errors when
                 * initializing the volume control
                 */
                if (default_volume > 228) {
                        /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */
                        default_volume = 228;
                        cx25840_write(client, 0x8d4, 228);
                } else if (default_volume < 20) {
                        /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */
                        default_volume = 20;
                        cx25840_write(client, 0x8d4, 20);
                }
                default_volume = (((228 - default_volume) >> 1) + 23) << 9;

                state->volume = v4l2_ctrl_new_std(&state->hdl,
                                                  &cx25840_audio_ctrl_ops,
                                                  V4L2_CID_AUDIO_VOLUME,
                                                  0, 65535, 65535 / 100,
                                                  default_volume);
                state->mute = v4l2_ctrl_new_std(&state->hdl,
                                                &cx25840_audio_ctrl_ops,
                                                V4L2_CID_AUDIO_MUTE,
                                                0, 1, 1, 0);
                v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops,
                                  V4L2_CID_AUDIO_BALANCE,
                                  0, 65535, 65535 / 100, 32768);
                v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops,
                                  V4L2_CID_AUDIO_BASS,
                                  0, 65535, 65535 / 100, 32768);
                v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops,
                                  V4L2_CID_AUDIO_TREBLE,
                                  0, 65535, 65535 / 100, 32768);
        }
        sd->ctrl_handler = &state->hdl;
        if (state->hdl.error) {
                int err = state->hdl.error;

                v4l2_ctrl_handler_free(&state->hdl);
                return err;
        }
        if (!is_cx2583x(state))
                v4l2_ctrl_cluster(2, &state->volume);
        v4l2_ctrl_handler_setup(&state->hdl);

        if (client->dev.platform_data) {
                struct cx25840_platform_data *pdata = client->dev.platform_data;

                state->pvr150_workaround = pdata->pvr150_workaround;
        }

        cx25840_ir_probe(sd);
        return 0;
}

static void cx25840_remove(struct i2c_client *client)
{
        struct v4l2_subdev *sd = i2c_get_clientdata(client);
        struct cx25840_state *state = to_state(sd);

        cx25840_ir_remove(sd);
        v4l2_device_unregister_subdev(sd);
        v4l2_ctrl_handler_free(&state->hdl);
}

static const struct i2c_device_id cx25840_id[] = {
        { "cx25840", 0 },
        { }
};
MODULE_DEVICE_TABLE(i2c, cx25840_id);

static struct i2c_driver cx25840_driver = {
        .driver = {
                .name        = "cx25840",
        },
        .probe                = cx25840_probe,
        .remove                = cx25840_remove,
        .id_table        = cx25840_id,
};

module_i2c_driver(cx25840_driver);































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H

/*
 * If more than 16 keys are ever supported, a thorough audit
 * will be necessary to ensure that the types that store key
 * numbers and masks have sufficient capacity.
 */
#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1)

extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
                unsigned long init_val);

static inline bool arch_pkeys_enabled(void)
{
        return cpu_feature_enabled(X86_FEATURE_OSPKE);
}

/*
 * Try to dedicate one of the protection keys to be used as an
 * execute-only protection key.
 */
extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return ARCH_DEFAULT_PKEY;

        return __execute_only_pkey(mm);
}

extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
                int prot, int pkey);
static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
                int prot, int pkey)
{
        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return 0;

        return __arch_override_mprotect_pkey(vma, prot, pkey);
}

#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)

#define mm_pkey_allocation_map(mm)        (mm->context.pkey_allocation_map)
#define mm_set_pkey_allocated(mm, pkey) do {                \
        mm_pkey_allocation_map(mm) |= (1U << pkey);        \
} while (0)
#define mm_set_pkey_free(mm, pkey) do {                        \
        mm_pkey_allocation_map(mm) &= ~(1U << pkey);        \
} while (0)

static inline
bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
        /*
         * "Allocated" pkeys are those that have been returned
         * from pkey_alloc() or pkey 0 which is allocated
         * implicitly when the mm is created.
         */
        if (pkey < 0)
                return false;
        if (pkey >= arch_max_pkey())
                return false;
        /*
         * The exec-only pkey is set in the allocation map, but
         * is not available to any of the user interfaces like
         * mprotect_pkey().
         */
        if (pkey == mm->context.execute_only_pkey)
                return false;

        return mm_pkey_allocation_map(mm) & (1U << pkey);
}

/*
 * Returns a positive, 4-bit key on success, or -1 on failure.
 */
static inline
int mm_pkey_alloc(struct mm_struct *mm)
{
        /*
         * Note: this is the one and only place we make sure
         * that the pkey is valid as far as the hardware is
         * concerned.  The rest of the kernel trusts that
         * only good, valid pkeys come out of here.
         */
        u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1);
        int ret;

        /*
         * Are we out of pkeys?  We must handle this specially
         * because ffz() behavior is undefined if there are no
         * zeros.
         */
        if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
                return -1;

        ret = ffz(mm_pkey_allocation_map(mm));

        mm_set_pkey_allocated(mm, ret);

        return ret;
}

static inline
int mm_pkey_free(struct mm_struct *mm, int pkey)
{
        if (!mm_pkey_is_allocated(mm, pkey))
                return -EINVAL;

        mm_set_pkey_free(mm, pkey);

        return 0;
}

static inline int vma_pkey(struct vm_area_struct *vma)
{
        unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
                                      VM_PKEY_BIT2 | VM_PKEY_BIT3;

        return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
}

#endif /*_ASM_X86_PKEYS_H */




































































































































































































































































































































































































































































































































































































   61 
   61 





   61 



   61 


















































































































































































































   61 



























   61 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
 */

/*
 * fsnotify inode mark locking/lifetime/and refcnting
 *
 * REFCNT:
 * The group->recnt and mark->refcnt tell how many "things" in the kernel
 * currently are referencing the objects. Both kind of objects typically will
 * live inside the kernel with a refcnt of 2, one for its creation and one for
 * the reference a group and a mark hold to each other.
 * If you are holding the appropriate locks, you can take a reference and the
 * object itself is guaranteed to survive until the reference is dropped.
 *
 * LOCKING:
 * There are 3 locks involved with fsnotify inode marks and they MUST be taken
 * in order as follows:
 *
 * group->mark_mutex
 * mark->lock
 * mark->connector->lock
 *
 * group->mark_mutex protects the marks_list anchored inside a given group and
 * each mark is hooked via the g_list.  It also protects the groups private
 * data (i.e group limits).

 * mark->lock protects the marks attributes like its masks and flags.
 * Furthermore it protects the access to a reference of the group that the mark
 * is assigned to as well as the access to a reference of the inode/vfsmount
 * that is being watched by the mark.
 *
 * mark->connector->lock protects the list of marks anchored inside an
 * inode / vfsmount and each mark is hooked via the i_list.
 *
 * A list of notification marks relating to inode / mnt is contained in
 * fsnotify_mark_connector. That structure is alive as long as there are any
 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
 * detached from fsnotify_mark_connector when last reference to the mark is
 * dropped.  Thus having mark reference is enough to protect mark->connector
 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
 * because we remove mark from g_list before dropping mark reference associated
 * with that, any mark found through g_list is guaranteed to have
 * mark->connector set until we drop group->mark_mutex.
 *
 * LIFETIME:
 * Inode marks survive between when they are added to an inode and when their
 * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
 *
 * The inode mark can be cleared for a number of different reasons including:
 * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
 * - The inode is being evicted from cache. (fsnotify_inode_delete)
 * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
 * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
 * - The fsnotify_group associated with the mark is going away and all such marks
 *   need to be cleaned up. (fsnotify_clear_marks_by_group)
 *
 * This has the very interesting property of being able to run concurrently with
 * any (or all) other directions.
 */

#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/ratelimit.h>

#include <linux/atomic.h>

#include <linux/fsnotify_backend.h>
#include "fsnotify.h"

#define FSNOTIFY_REAPER_DELAY        (1)        /* 1 jiffy */

struct srcu_struct fsnotify_mark_srcu;
struct kmem_cache *fsnotify_mark_connector_cachep;

static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
static struct fsnotify_mark_connector *connector_destroy_list;

static void fsnotify_mark_destroy_workfn(struct work_struct *work);
static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);

static void fsnotify_connector_destroy_workfn(struct work_struct *work);
static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);

void fsnotify_get_mark(struct fsnotify_mark *mark)
{
        WARN_ON_ONCE(!refcount_read(&mark->refcnt));
        refcount_inc(&mark->refcnt);
}

static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
{
        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
                return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
        else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
                return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
        else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
                return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
        return NULL;
}

__u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
{
        if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
                return 0;

        return *fsnotify_conn_mask_p(conn);
}

static void fsnotify_get_inode_ref(struct inode *inode)
{
        ihold(inode);
        atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
}

/*
 * Grab or drop inode reference for the connector if needed.
 *
 * When it's time to drop the reference, we only clear the HAS_IREF flag and
 * return the inode object. fsnotify_drop_object() will be resonsible for doing
 * iput() outside of spinlocks. This happens when last mark that wanted iref is
 * detached.
 */
static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
                                          bool want_iref)
{
        bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
        struct inode *inode = NULL;

        if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
            want_iref == has_iref)
                return NULL;

        if (want_iref) {
                /* Pin inode if any mark wants inode refcount held */
                fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
                conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
        } else {
                /* Unpin inode after detach of last mark that wanted iref */
                inode = fsnotify_conn_inode(conn);
                conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
        }

        return inode;
}

static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
        u32 new_mask = 0;
        bool want_iref = false;
        struct fsnotify_mark *mark;

        assert_spin_locked(&conn->lock);
        /* We can get detached connector here when inode is getting unlinked. */
        if (!fsnotify_valid_obj_type(conn->type))
                return NULL;
        hlist_for_each_entry(mark, &conn->list, obj_list) {
                if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
                        continue;
                new_mask |= fsnotify_calc_mask(mark);
                if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
                    !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
                        want_iref = true;
        }
        *fsnotify_conn_mask_p(conn) = new_mask;

        return fsnotify_update_iref(conn, want_iref);
}

/*
 * Calculate mask of events for a list of marks. The caller must make sure
 * connector and connector->obj cannot disappear under us.  Callers achieve
 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
 * list.
 */
void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
        if (!conn)
                return;

        spin_lock(&conn->lock);
        __fsnotify_recalc_mask(conn);
        spin_unlock(&conn->lock);
        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
                __fsnotify_update_child_dentry_flags(
                                        fsnotify_conn_inode(conn));
}

/* Free all connectors queued for freeing once SRCU period ends */
static void fsnotify_connector_destroy_workfn(struct work_struct *work)
{
        struct fsnotify_mark_connector *conn, *free;

        spin_lock(&destroy_lock);
        conn = connector_destroy_list;
        connector_destroy_list = NULL;
        spin_unlock(&destroy_lock);

        synchronize_srcu(&fsnotify_mark_srcu);
        while (conn) {
                free = conn;
                conn = conn->destroy_next;
                kmem_cache_free(fsnotify_mark_connector_cachep, free);
        }
}

static void fsnotify_put_inode_ref(struct inode *inode)
{
        struct super_block *sb = inode->i_sb;

        iput(inode);
        if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
                wake_up_var(&sb->s_fsnotify_connectors);
}

static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
{
        struct super_block *sb = fsnotify_connector_sb(conn);

        if (sb)
                atomic_long_inc(&sb->s_fsnotify_connectors);
}

static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
{
        struct super_block *sb = fsnotify_connector_sb(conn);

        if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
                wake_up_var(&sb->s_fsnotify_connectors);
}

static void *fsnotify_detach_connector_from_object(
                                        struct fsnotify_mark_connector *conn,
                                        unsigned int *type)
{
        struct inode *inode = NULL;

        *type = conn->type;
        if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
                return NULL;

        if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
                inode = fsnotify_conn_inode(conn);
                inode->i_fsnotify_mask = 0;

                /* Unpin inode when detaching from connector */
                if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
                        inode = NULL;
        } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
                fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
        } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
                fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
        }

        fsnotify_put_sb_connectors(conn);
        rcu_assign_pointer(*(conn->obj), NULL);
        conn->obj = NULL;
        conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;

        return inode;
}

static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
{
        struct fsnotify_group *group = mark->group;

        if (WARN_ON_ONCE(!group))
                return;
        group->ops->free_mark(mark);
        fsnotify_put_group(group);
}

/* Drop object reference originally held by a connector */
static void fsnotify_drop_object(unsigned int type, void *objp)
{
        if (!objp)
                return;
        /* Currently only inode references are passed to be dropped */
        if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
                return;
        fsnotify_put_inode_ref(objp);
}

void fsnotify_put_mark(struct fsnotify_mark *mark)
{
        struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
        void *objp = NULL;
        unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
        bool free_conn = false;

        /* Catch marks that were actually never attached to object */
        if (!conn) {
                if (refcount_dec_and_test(&mark->refcnt))
                        fsnotify_final_mark_destroy(mark);
                return;
        }

        /*
         * We have to be careful so that traversals of obj_list under lock can
         * safely grab mark reference.
         */
        if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
                return;

        hlist_del_init_rcu(&mark->obj_list);
        if (hlist_empty(&conn->list)) {
                objp = fsnotify_detach_connector_from_object(conn, &type);
                free_conn = true;
        } else {
                objp = __fsnotify_recalc_mask(conn);
                type = conn->type;
        }
        WRITE_ONCE(mark->connector, NULL);
        spin_unlock(&conn->lock);

        fsnotify_drop_object(type, objp);

        if (free_conn) {
                spin_lock(&destroy_lock);
                conn->destroy_next = connector_destroy_list;
                connector_destroy_list = conn;
                spin_unlock(&destroy_lock);
                queue_work(system_unbound_wq, &connector_reaper_work);
        }
        /*
         * Note that we didn't update flags telling whether inode cares about
         * what's happening with children. We update these flags from
         * __fsnotify_parent() lazily when next event happens on one of our
         * children.
         */
        spin_lock(&destroy_lock);
        list_add(&mark->g_list, &destroy_list);
        spin_unlock(&destroy_lock);
        queue_delayed_work(system_unbound_wq, &reaper_work,
                           FSNOTIFY_REAPER_DELAY);
}
EXPORT_SYMBOL_GPL(fsnotify_put_mark);

/*
 * Get mark reference when we found the mark via lockless traversal of object
 * list. Mark can be already removed from the list by now and on its way to be
 * destroyed once SRCU period ends.
 *
 * Also pin the group so it doesn't disappear under us.
 */
static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
{
        if (!mark)
                return true;

        if (refcount_inc_not_zero(&mark->refcnt)) {
                spin_lock(&mark->lock);
                if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
                        /* mark is attached, group is still alive then */
                        atomic_inc(&mark->group->user_waits);
                        spin_unlock(&mark->lock);
                        return true;
                }
                spin_unlock(&mark->lock);
                fsnotify_put_mark(mark);
        }
        return false;
}

/*
 * Puts marks and wakes up group destruction if necessary.
 *
 * Pairs with fsnotify_get_mark_safe()
 */
static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
{
        if (mark) {
                struct fsnotify_group *group = mark->group;

                fsnotify_put_mark(mark);
                /*
                 * We abuse notification_waitq on group shutdown for waiting for
                 * all marks pinned when waiting for userspace.
                 */
                if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
                        wake_up(&group->notification_waitq);
        }
}

bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
        __releases(&fsnotify_mark_srcu)
{
        int type;

        fsnotify_foreach_iter_type(type) {
                /* This can fail if mark is being removed */
                if (!fsnotify_get_mark_safe(iter_info->marks[type])) {
                        __release(&fsnotify_mark_srcu);
                        goto fail;
                }
        }

        /*
         * Now that both marks are pinned by refcount in the inode / vfsmount
         * lists, we can drop SRCU lock, and safely resume the list iteration
         * once userspace returns.
         */
        srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);

        return true;

fail:
        for (type--; type >= 0; type--)
                fsnotify_put_mark_wake(iter_info->marks[type]);
        return false;
}

void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
        __acquires(&fsnotify_mark_srcu)
{
        int type;

        iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
        fsnotify_foreach_iter_type(type)
                fsnotify_put_mark_wake(iter_info->marks[type]);
}

/*
 * Mark mark as detached, remove it from group list. Mark still stays in object
 * list until its last reference is dropped. Note that we rely on mark being
 * removed from group list before corresponding reference to it is dropped. In
 * particular we rely on mark->connector being valid while we hold
 * group->mark_mutex if we found the mark through g_list.
 *
 * Must be called with group->mark_mutex held. The caller must either hold
 * reference to the mark or be protected by fsnotify_mark_srcu.
 */
void fsnotify_detach_mark(struct fsnotify_mark *mark)
{
        fsnotify_group_assert_locked(mark->group);
        WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
                     refcount_read(&mark->refcnt) < 1 +
                        !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));

        spin_lock(&mark->lock);
        /* something else already called this function on this mark */
        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
                spin_unlock(&mark->lock);
                return;
        }
        mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
        list_del_init(&mark->g_list);
        spin_unlock(&mark->lock);

        /* Drop mark reference acquired in fsnotify_add_mark_locked() */
        fsnotify_put_mark(mark);
}

/*
 * Free fsnotify mark. The mark is actually only marked as being freed.  The
 * freeing is actually happening only once last reference to the mark is
 * dropped from a workqueue which first waits for srcu period end.
 *
 * Caller must have a reference to the mark or be protected by
 * fsnotify_mark_srcu.
 */
void fsnotify_free_mark(struct fsnotify_mark *mark)
{
        struct fsnotify_group *group = mark->group;

        spin_lock(&mark->lock);
        /* something else already called this function on this mark */
        if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
                spin_unlock(&mark->lock);
                return;
        }
        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
        spin_unlock(&mark->lock);

        /*
         * Some groups like to know that marks are being freed.  This is a
         * callback to the group function to let it know that this mark
         * is being freed.
         */
        if (group->ops->freeing_mark)
                group->ops->freeing_mark(mark, group);
}

void fsnotify_destroy_mark(struct fsnotify_mark *mark,
                           struct fsnotify_group *group)
{
        fsnotify_group_lock(group);
        fsnotify_detach_mark(mark);
        fsnotify_group_unlock(group);
        fsnotify_free_mark(mark);
}
EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);

/*
 * Sorting function for lists of fsnotify marks.
 *
 * Fanotify supports different notification classes (reflected as priority of
 * notification group). Events shall be passed to notification groups in
 * decreasing priority order. To achieve this marks in notification lists for
 * inodes and vfsmounts are sorted so that priorities of corresponding groups
 * are descending.
 *
 * Furthermore correct handling of the ignore mask requires processing inode
 * and vfsmount marks of each group together. Using the group address as
 * further sort criterion provides a unique sorting order and thus we can
 * merge inode and vfsmount lists of marks in linear time and find groups
 * present in both lists.
 *
 * A return value of 1 signifies that b has priority over a.
 * A return value of 0 signifies that the two marks have to be handled together.
 * A return value of -1 signifies that a has priority over b.
 */
int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
{
        if (a == b)
                return 0;
        if (!a)
                return 1;
        if (!b)
                return -1;
        if (a->priority < b->priority)
                return 1;
        if (a->priority > b->priority)
                return -1;
        if (a < b)
                return 1;
        return -1;
}

static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
                                               unsigned int obj_type)
{
        struct fsnotify_mark_connector *conn;

        conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
        if (!conn)
                return -ENOMEM;
        spin_lock_init(&conn->lock);
        INIT_HLIST_HEAD(&conn->list);
        conn->flags = 0;
        conn->type = obj_type;
        conn->obj = connp;
        conn->flags = 0;
        fsnotify_get_sb_connectors(conn);

        /*
         * cmpxchg() provides the barrier so that readers of *connp can see
         * only initialized structure
         */
        if (cmpxchg(connp, NULL, conn)) {
                /* Someone else created list structure for us */
                fsnotify_put_sb_connectors(conn);
                kmem_cache_free(fsnotify_mark_connector_cachep, conn);
        }

        return 0;
}

/*
 * Get mark connector, make sure it is alive and return with its lock held.
 * This is for users that get connector pointer from inode or mount. Users that
 * hold reference to a mark on the list may directly lock connector->lock as
 * they are sure list cannot go away under them.
 */
static struct fsnotify_mark_connector *fsnotify_grab_connector(
                                                fsnotify_connp_t *connp)
{
        struct fsnotify_mark_connector *conn;
        int idx;

        idx = srcu_read_lock(&fsnotify_mark_srcu);
        conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
        if (!conn)
                goto out;
        spin_lock(&conn->lock);
        if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) {
                spin_unlock(&conn->lock);
                srcu_read_unlock(&fsnotify_mark_srcu, idx);
                return NULL;
        }
out:
        srcu_read_unlock(&fsnotify_mark_srcu, idx);
        return conn;
}

/*
 * Add mark into proper place in given list of marks. These marks may be used
 * for the fsnotify backend to determine which event types should be delivered
 * to which group and for which inodes. These marks are ordered according to
 * priority, highest number first, and then by the group's location in memory.
 */
static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
                                  fsnotify_connp_t *connp,
                                  unsigned int obj_type, int add_flags)
{
        struct fsnotify_mark *lmark, *last = NULL;
        struct fsnotify_mark_connector *conn;
        int cmp;
        int err = 0;

        if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
                return -EINVAL;

restart:
        spin_lock(&mark->lock);
        conn = fsnotify_grab_connector(connp);
        if (!conn) {
                spin_unlock(&mark->lock);
                err = fsnotify_attach_connector_to_object(connp, obj_type);
                if (err)
                        return err;
                goto restart;
        }

        /* is mark the first mark? */
        if (hlist_empty(&conn->list)) {
                hlist_add_head_rcu(&mark->obj_list, &conn->list);
                goto added;
        }

        /* should mark be in the middle of the current list? */
        hlist_for_each_entry(lmark, &conn->list, obj_list) {
                last = lmark;

                if ((lmark->group == mark->group) &&
                    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
                    !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
                        err = -EEXIST;
                        goto out_err;
                }

                cmp = fsnotify_compare_groups(lmark->group, mark->group);
                if (cmp >= 0) {
                        hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
                        goto added;
                }
        }

        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
        hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
        /*
         * Since connector is attached to object using cmpxchg() we are
         * guaranteed that connector initialization is fully visible by anyone
         * seeing mark->connector set.
         */
        WRITE_ONCE(mark->connector, conn);
out_err:
        spin_unlock(&conn->lock);
        spin_unlock(&mark->lock);
        return err;
}

/*
 * Attach an initialized mark to a given group and fs object.
 * These marks may be used for the fsnotify backend to determine which
 * event types should be delivered to which group.
 */
int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
                             fsnotify_connp_t *connp, unsigned int obj_type,
                             int add_flags)
{
        struct fsnotify_group *group = mark->group;
        int ret = 0;

        fsnotify_group_assert_locked(group);

        /*
         * LOCKING ORDER!!!!
         * group->mark_mutex
         * mark->lock
         * mark->connector->lock
         */
        spin_lock(&mark->lock);
        mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;

        list_add(&mark->g_list, &group->marks_list);
        fsnotify_get_mark(mark); /* for g_list */
        spin_unlock(&mark->lock);

        ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags);
        if (ret)
                goto err;

        fsnotify_recalc_mask(mark->connector);

        return ret;
err:
        spin_lock(&mark->lock);
        mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
                         FSNOTIFY_MARK_FLAG_ATTACHED);
        list_del_init(&mark->g_list);
        spin_unlock(&mark->lock);

        fsnotify_put_mark(mark);
        return ret;
}

int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
                      unsigned int obj_type, int add_flags)
{
        int ret;
        struct fsnotify_group *group = mark->group;

        fsnotify_group_lock(group);
        ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags);
        fsnotify_group_unlock(group);
        return ret;
}
EXPORT_SYMBOL_GPL(fsnotify_add_mark);

/*
 * Given a list of marks, find the mark associated with given group. If found
 * take a reference to that mark and return it, else return NULL.
 */
struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
                                         struct fsnotify_group *group)
{
        struct fsnotify_mark_connector *conn;
        struct fsnotify_mark *mark;

        conn = fsnotify_grab_connector(connp);
        if (!conn)
                return NULL;

        hlist_for_each_entry(mark, &conn->list, obj_list) {
                if (mark->group == group &&
                    (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
                        fsnotify_get_mark(mark);
                        spin_unlock(&conn->lock);
                        return mark;
                }
        }
        spin_unlock(&conn->lock);
        return NULL;
}
EXPORT_SYMBOL_GPL(fsnotify_find_mark);

/* Clear any marks in a group with given type mask */
void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
                                   unsigned int obj_type)
{
        struct fsnotify_mark *lmark, *mark;
        LIST_HEAD(to_free);
        struct list_head *head = &to_free;

        /* Skip selection step if we want to clear all marks. */
        if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) {
                head = &group->marks_list;
                goto clear;
        }
        /*
         * We have to be really careful here. Anytime we drop mark_mutex, e.g.
         * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
         * to_free list so we have to use mark_mutex even when accessing that
         * list. And freeing mark requires us to drop mark_mutex. So we can
         * reliably free only the first mark in the list. That's why we first
         * move marks to free to to_free list in one go and then free marks in
         * to_free list one by one.
         */
        fsnotify_group_lock(group);
        list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
                if (mark->connector->type == obj_type)
                        list_move(&mark->g_list, &to_free);
        }
        fsnotify_group_unlock(group);

clear:
        while (1) {
                fsnotify_group_lock(group);
                if (list_empty(head)) {
                        fsnotify_group_unlock(group);
                        break;
                }
                mark = list_first_entry(head, struct fsnotify_mark, g_list);
                fsnotify_get_mark(mark);
                fsnotify_detach_mark(mark);
                fsnotify_group_unlock(group);
                fsnotify_free_mark(mark);
                fsnotify_put_mark(mark);
        }
}

/* Destroy all marks attached to an object via connector */
void fsnotify_destroy_marks(fsnotify_connp_t *connp)
{
        struct fsnotify_mark_connector *conn;
        struct fsnotify_mark *mark, *old_mark = NULL;
        void *objp;
        unsigned int type;

        conn = fsnotify_grab_connector(connp);
        if (!conn)
                return;
        /*
         * We have to be careful since we can race with e.g.
         * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
         * list can get modified. However we are holding mark reference and
         * thus our mark cannot be removed from obj_list so we can continue
         * iteration after regaining conn->lock.
         */
        hlist_for_each_entry(mark, &conn->list, obj_list) {
                fsnotify_get_mark(mark);
                spin_unlock(&conn->lock);
                if (old_mark)
                        fsnotify_put_mark(old_mark);
                old_mark = mark;
                fsnotify_destroy_mark(mark, mark->group);
                spin_lock(&conn->lock);
        }
        /*
         * Detach list from object now so that we don't pin inode until all
         * mark references get dropped. It would lead to strange results such
         * as delaying inode deletion or blocking unmount.
         */
        objp = fsnotify_detach_connector_from_object(conn, &type);
        spin_unlock(&conn->lock);
        if (old_mark)
                fsnotify_put_mark(old_mark);
        fsnotify_drop_object(type, objp);
}

/*
 * Nothing fancy, just initialize lists and locks and counters.
 */
void fsnotify_init_mark(struct fsnotify_mark *mark,
                        struct fsnotify_group *group)
{
        memset(mark, 0, sizeof(*mark));
        spin_lock_init(&mark->lock);
        refcount_set(&mark->refcnt, 1);
        fsnotify_get_group(group);
        mark->group = group;
        WRITE_ONCE(mark->connector, NULL);
}
EXPORT_SYMBOL_GPL(fsnotify_init_mark);

/*
 * Destroy all marks in destroy_list, waits for SRCU period to finish before
 * actually freeing marks.
 */
static void fsnotify_mark_destroy_workfn(struct work_struct *work)
{
        struct fsnotify_mark *mark, *next;
        struct list_head private_destroy_list;

        spin_lock(&destroy_lock);
        /* exchange the list head */
        list_replace_init(&destroy_list, &private_destroy_list);
        spin_unlock(&destroy_lock);

        synchronize_srcu(&fsnotify_mark_srcu);

        list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
                list_del_init(&mark->g_list);
                fsnotify_final_mark_destroy(mark);
        }
}

/* Wait for all marks queued for destruction to be actually destroyed */
void fsnotify_wait_marks_destroyed(void)
{
        flush_delayed_work(&reaper_work);
}
EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);

















































































































































































































    4 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net/sched/gen_estimator.c        Simple rate estimator.
 *
 * Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *                Eric Dumazet <edumazet@google.com>
 *
 * Changes:
 *              Jamal Hadi Salim - moved it to net/core and reshulfed
 *              names to make it usable in general net subsystem.
 */

#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seqlock.h>
#include <net/sock.h>
#include <net/gen_stats.h>

/* This code is NOT intended to be used for statistics collection,
 * its purpose is to provide a base for statistical multiplexing
 * for controlled load service.
 * If you need only statistics, run a user level daemon which
 * periodically reads byte counters.
 */

struct net_rate_estimator {
        struct gnet_stats_basic_sync        *bstats;
        spinlock_t                *stats_lock;
        bool                        running;
        struct gnet_stats_basic_sync __percpu *cpu_bstats;
        u8                        ewma_log;
        u8                        intvl_log; /* period : (250ms << intvl_log) */

        seqcount_t                seq;
        u64                        last_packets;
        u64                        last_bytes;

        u64                        avpps;
        u64                        avbps;

        unsigned long           next_jiffies;
        struct timer_list       timer;
        struct rcu_head                rcu;
};

static void est_fetch_counters(struct net_rate_estimator *e,
                               struct gnet_stats_basic_sync *b)
{
        gnet_stats_basic_sync_init(b);
        if (e->stats_lock)
                spin_lock(e->stats_lock);

        gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);

        if (e->stats_lock)
                spin_unlock(e->stats_lock);

}

static void est_timer(struct timer_list *t)
{
        struct net_rate_estimator *est = from_timer(est, t, timer);
        struct gnet_stats_basic_sync b;
        u64 b_bytes, b_packets;
        u64 rate, brate;

        est_fetch_counters(est, &b);
        b_bytes = u64_stats_read(&b.bytes);
        b_packets = u64_stats_read(&b.packets);

        brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
        brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);

        rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
        rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);

        write_seqcount_begin(&est->seq);
        est->avbps += brate;
        est->avpps += rate;
        write_seqcount_end(&est->seq);

        est->last_bytes = b_bytes;
        est->last_packets = b_packets;

        est->next_jiffies += ((HZ/4) << est->intvl_log);

        if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
                /* Ouch... timer was delayed. */
                est->next_jiffies = jiffies + 1;
        }
        mod_timer(&est->timer, est->next_jiffies);
}

/**
 * gen_new_estimator - create a new rate estimator
 * @bstats: basic statistics
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @lock: lock for statistics and control path
 * @running: true if @bstats represents a running qdisc, thus @bstats'
 *           internal values might change during basic reads. Only used
 *           if @bstats_cpu is NULL
 * @opt: rate estimator configuration TLV
 *
 * Creates a new rate estimator with &bstats as source and &rate_est
 * as destination. A new timer with the interval specified in the
 * configuration TLV is created. Upon each interval, the latest statistics
 * will be read from &bstats and the estimated rate will be stored in
 * &rate_est with the statistics lock grabbed during this period.
 *
 * Returns 0 on success or a negative error code.
 *
 */
int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
                      struct gnet_stats_basic_sync __percpu *cpu_bstats,
                      struct net_rate_estimator __rcu **rate_est,
                      spinlock_t *lock,
                      bool running,
                      struct nlattr *opt)
{
        struct gnet_estimator *parm = nla_data(opt);
        struct net_rate_estimator *old, *est;
        struct gnet_stats_basic_sync b;
        int intvl_log;

        if (nla_len(opt) < sizeof(*parm))
                return -EINVAL;

        /* allowed timer periods are :
         * -2 : 250ms,   -1 : 500ms,    0 : 1 sec
         *  1 : 2 sec,    2 : 4 sec,    3 : 8 sec
         */
        if (parm->interval < -2 || parm->interval > 3)
                return -EINVAL;

        if (parm->ewma_log == 0 || parm->ewma_log >= 31)
                return -EINVAL;

        est = kzalloc(sizeof(*est), GFP_KERNEL);
        if (!est)
                return -ENOBUFS;

        seqcount_init(&est->seq);
        intvl_log = parm->interval + 2;
        est->bstats = bstats;
        est->stats_lock = lock;
        est->running  = running;
        est->ewma_log = parm->ewma_log;
        est->intvl_log = intvl_log;
        est->cpu_bstats = cpu_bstats;

        if (lock)
                local_bh_disable();
        est_fetch_counters(est, &b);
        if (lock)
                local_bh_enable();
        est->last_bytes = u64_stats_read(&b.bytes);
        est->last_packets = u64_stats_read(&b.packets);

        if (lock)
                spin_lock_bh(lock);
        old = rcu_dereference_protected(*rate_est, 1);
        if (old) {
                del_timer_sync(&old->timer);
                est->avbps = old->avbps;
                est->avpps = old->avpps;
        }

        est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
        timer_setup(&est->timer, est_timer, 0);
        mod_timer(&est->timer, est->next_jiffies);

        rcu_assign_pointer(*rate_est, est);
        if (lock)
                spin_unlock_bh(lock);
        if (old)
                kfree_rcu(old, rcu);
        return 0;
}
EXPORT_SYMBOL(gen_new_estimator);

/**
 * gen_kill_estimator - remove a rate estimator
 * @rate_est: rate estimator
 *
 * Removes the rate estimator.
 *
 */
void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est)
{
        struct net_rate_estimator *est;

        est = xchg((__force struct net_rate_estimator **)rate_est, NULL);
        if (est) {
                timer_shutdown_sync(&est->timer);
                kfree_rcu(est, rcu);
        }
}
EXPORT_SYMBOL(gen_kill_estimator);

/**
 * gen_replace_estimator - replace rate estimator configuration
 * @bstats: basic statistics
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @lock: lock for statistics and control path
 * @running: true if @bstats represents a running qdisc, thus @bstats'
 *           internal values might change during basic reads. Only used
 *           if @cpu_bstats is NULL
 * @opt: rate estimator configuration TLV
 *
 * Replaces the configuration of a rate estimator by calling
 * gen_kill_estimator() and gen_new_estimator().
 *
 * Returns 0 on success or a negative error code.
 */
int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
                          struct gnet_stats_basic_sync __percpu *cpu_bstats,
                          struct net_rate_estimator __rcu **rate_est,
                          spinlock_t *lock,
                          bool running, struct nlattr *opt)
{
        return gen_new_estimator(bstats, cpu_bstats, rate_est,
                                 lock, running, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

/**
 * gen_estimator_active - test if estimator is currently in use
 * @rate_est: rate estimator
 *
 * Returns true if estimator is active, and false if not.
 */
bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est)
{
        return !!rcu_access_pointer(*rate_est);
}
EXPORT_SYMBOL(gen_estimator_active);

bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
                        struct gnet_stats_rate_est64 *sample)
{
        struct net_rate_estimator *est;
        unsigned seq;

        rcu_read_lock();
        est = rcu_dereference(*rate_est);
        if (!est) {
                rcu_read_unlock();
                return false;
        }

        do {
                seq = read_seqcount_begin(&est->seq);
                sample->bps = est->avbps >> 8;
                sample->pps = est->avpps >> 8;
        } while (read_seqcount_retry(&est->seq, seq));

        rcu_read_unlock();
        return true;
}
EXPORT_SYMBOL(gen_estimator_read);











































































   84 





































































































































































   19 



   16 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/* SPDX-License-Identifier: GPL-2.0 */
/* rwsem.h: R/W semaphores, public interface
 *
 * Written by David Howells (dhowells@redhat.com).
 * Derived from asm-i386/semaphore.h
 */

#ifndef _LINUX_RWSEM_H
#define _LINUX_RWSEM_H

#include <linux/linkage.h>

#include <linux/types.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/err.h>
#include <linux/cleanup.h>

#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __RWSEM_DEP_MAP_INIT(lockname)                        \
        .dep_map = {                                        \
                .name = #lockname,                        \
                .wait_type_inner = LD_WAIT_SLEEP,        \
        },
#else
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif

#ifndef CONFIG_PREEMPT_RT

#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
#include <linux/osq_lock.h>
#endif

/*
 * For an uncontended rwsem, count and owner are the only fields a task
 * needs to touch when acquiring the rwsem. So they are put next to each
 * other to increase the chance that they will share the same cacheline.
 *
 * In a contended rwsem, the owner is likely the most frequently accessed
 * field in the structure as the optimistic waiter that holds the osq lock
 * will spin on owner. For an embedded rwsem, other hot fields in the
 * containing structure should be moved further away from the rwsem to
 * reduce the chance that they will share the same cacheline causing
 * cacheline bouncing problem.
 */
struct rw_semaphore {
        atomic_long_t count;
        /*
         * Write owner or one of the read owners as well flags regarding
         * the current state of the rwsem. Can be used as a speculative
         * check to see if the write owner is running on the cpu.
         */
        atomic_long_t owner;
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
        struct optimistic_spin_queue osq; /* spinner MCS lock */
#endif
        raw_spinlock_t wait_lock;
        struct list_head wait_list;
#ifdef CONFIG_DEBUG_RWSEMS
        void *magic;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map        dep_map;
#endif
};

#define RWSEM_UNLOCKED_VALUE                0UL
#define RWSEM_WRITER_LOCKED                (1UL << 0)
#define __RWSEM_COUNT_INIT(name)        .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)

static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
        return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE;
}

static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
        WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE);
}

static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
        WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED));
}

/* Common initializer macros and functions */

#ifdef CONFIG_DEBUG_RWSEMS
# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname,
#else
# define __RWSEM_DEBUG_INIT(lockname)
#endif

#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED,
#else
#define __RWSEM_OPT_INIT(lockname)
#endif

#define __RWSEM_INITIALIZER(name)                                \
        { __RWSEM_COUNT_INIT(name),                                \
          .owner = ATOMIC_LONG_INIT(0),                                \
          __RWSEM_OPT_INIT(name)                                \
          .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\
          .wait_list = LIST_HEAD_INIT((name).wait_list),        \
          __RWSEM_DEBUG_INIT(name)                                \
          __RWSEM_DEP_MAP_INIT(name) }

#define DECLARE_RWSEM(name) \
        struct rw_semaphore name = __RWSEM_INITIALIZER(name)

extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
                         struct lock_class_key *key);

#define init_rwsem(sem)                                                \
do {                                                                \
        static struct lock_class_key __key;                        \
                                                                \
        __init_rwsem((sem), #sem, &__key);                        \
} while (0)

/*
 * This is the same regardless of which rwsem implementation that is being used.
 * It is just a heuristic meant to be called by somebody already holding the
 * rwsem to see if somebody from an incompatible type is wanting access to the
 * lock.
 */
static inline int rwsem_is_contended(struct rw_semaphore *sem)
{
        return !list_empty(&sem->wait_list);
}

#else /* !CONFIG_PREEMPT_RT */

#include <linux/rwbase_rt.h>

struct rw_semaphore {
        struct rwbase_rt        rwbase;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map        dep_map;
#endif
};

#define __RWSEM_INITIALIZER(name)                                \
        {                                                        \
                .rwbase = __RWBASE_INITIALIZER(name),                \
                __RWSEM_DEP_MAP_INIT(name)                        \
        }

#define DECLARE_RWSEM(lockname) \
        struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)

extern void  __init_rwsem(struct rw_semaphore *rwsem, const char *name,
                          struct lock_class_key *key);

#define init_rwsem(sem)                                                \
do {                                                                \
        static struct lock_class_key __key;                        \
                                                                \
        __init_rwsem((sem), #sem, &__key);                        \
} while (0)

static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
{
        return rw_base_is_locked(&sem->rwbase);
}

static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
        WARN_ON(!rwsem_is_locked(sem));
}

static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
        WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
}

static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
{
        return rw_base_is_contended(&sem->rwbase);
}

#endif /* CONFIG_PREEMPT_RT */

/*
 * The functions below are the same for all rwsem implementations including
 * the RT specific variant.
 */

static inline void rwsem_assert_held(const struct rw_semaphore *sem)
{
        if (IS_ENABLED(CONFIG_LOCKDEP))
                lockdep_assert_held(sem);
        else
                rwsem_assert_held_nolockdep(sem);
}

static inline void rwsem_assert_held_write(const struct rw_semaphore *sem)
{
        if (IS_ENABLED(CONFIG_LOCKDEP))
                lockdep_assert_held_write(sem);
        else
                rwsem_assert_held_write_nolockdep(sem);
}

/*
 * lock for reading
 */
extern void down_read(struct rw_semaphore *sem);
extern int __must_check down_read_interruptible(struct rw_semaphore *sem);
extern int __must_check down_read_killable(struct rw_semaphore *sem);

/*
 * trylock for reading -- returns 1 if successful, 0 if contention
 */
extern int down_read_trylock(struct rw_semaphore *sem);

/*
 * lock for writing
 */
extern void down_write(struct rw_semaphore *sem);
extern int __must_check down_write_killable(struct rw_semaphore *sem);

/*
 * trylock for writing -- returns 1 if successful, 0 if contention
 */
extern int down_write_trylock(struct rw_semaphore *sem);

/*
 * release a read lock
 */
extern void up_read(struct rw_semaphore *sem);

/*
 * release a write lock
 */
extern void up_write(struct rw_semaphore *sem);

DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T))
DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T))
DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0)

DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T))

/*
 * downgrade write lock to read lock
 */
extern void downgrade_write(struct rw_semaphore *sem);

#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
 * nested locking. NOTE: rwsems are not allowed to recurse
 * (which occurs if the same task tries to acquire the same
 * lock instance multiple times), but multiple locks of the
 * same lock class might be taken, if the order of the locks
 * is always the same. This ordering rule can be expressed
 * to lockdep via the _nested() APIs, but enumerating the
 * subclasses that are used. (If the nesting relationship is
 * static then another method for expressing nested locking is
 * the explicit definition of lock class keys and the use of
 * lockdep_set_class() at lock initialization time.
 * See Documentation/locking/lockdep-design.rst for more details.)
 */
extern void down_read_nested(struct rw_semaphore *sem, int subclass);
extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass);
extern void down_write_nested(struct rw_semaphore *sem, int subclass);
extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass);
extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);

# define down_write_nest_lock(sem, nest_lock)                        \
do {                                                                \
        typecheck(struct lockdep_map *, &(nest_lock)->dep_map);        \
        _down_write_nest_lock(sem, &(nest_lock)->dep_map);        \
} while (0)

/*
 * Take/release a lock when not the owner will release it.
 *
 * [ This API should be avoided as much as possible - the
 *   proper abstraction for this case is completions. ]
 */
extern void down_read_non_owner(struct rw_semaphore *sem);
extern void up_read_non_owner(struct rw_semaphore *sem);
#else
# define down_read_nested(sem, subclass)                down_read(sem)
# define down_read_killable_nested(sem, subclass)        down_read_killable(sem)
# define down_write_nest_lock(sem, nest_lock)        down_write(sem)
# define down_write_nested(sem, subclass)        down_write(sem)
# define down_write_killable_nested(sem, subclass)        down_write_killable(sem)
# define down_read_non_owner(sem)                down_read(sem)
# define up_read_non_owner(sem)                        up_read(sem)
#endif

#endif /* _LINUX_RWSEM_H */











  235 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* SPDX-License-Identifier: GPL-2.0+ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rseq

#if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RSEQ_H

#include <linux/tracepoint.h>
#include <linux/types.h>

TRACE_EVENT(rseq_update,

        TP_PROTO(struct task_struct *t),

        TP_ARGS(t),

        TP_STRUCT__entry(
                __field(s32, cpu_id)
                __field(s32, node_id)
                __field(s32, mm_cid)
        ),

        TP_fast_assign(
                __entry->cpu_id = raw_smp_processor_id();
                __entry->node_id = cpu_to_node(__entry->cpu_id);
                __entry->mm_cid = task_mm_cid(t);
        ),

        TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
                  __entry->node_id, __entry->mm_cid)
);

TRACE_EVENT(rseq_ip_fixup,

        TP_PROTO(unsigned long regs_ip, unsigned long start_ip,
                unsigned long post_commit_offset, unsigned long abort_ip),

        TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip),

        TP_STRUCT__entry(
                __field(unsigned long, regs_ip)
                __field(unsigned long, start_ip)
                __field(unsigned long, post_commit_offset)
                __field(unsigned long, abort_ip)
        ),

        TP_fast_assign(
                __entry->regs_ip = regs_ip;
                __entry->start_ip = start_ip;
                __entry->post_commit_offset = post_commit_offset;
                __entry->abort_ip = abort_ip;
        ),

        TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx",
                __entry->regs_ip, __entry->start_ip,
                __entry->post_commit_offset, __entry->abort_ip)
);

#endif /* _TRACE_SOCK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_RT_H
#define _LINUX_SCHED_RT_H

#include <linux/sched.h>

struct task_struct;

static inline int rt_prio(int prio)
{
        if (unlikely(prio < MAX_RT_PRIO))
                return 1;
        return 0;
}

static inline int rt_task(struct task_struct *p)
{
        return rt_prio(p->prio);
}

static inline bool task_is_realtime(struct task_struct *tsk)
{
        int policy = tsk->policy;

        if (policy == SCHED_FIFO || policy == SCHED_RR)
                return true;
        if (policy == SCHED_DEADLINE)
                return true;
        return false;
}

#ifdef CONFIG_RT_MUTEXES
extern void rt_mutex_pre_schedule(void);
extern void rt_mutex_schedule(void);
extern void rt_mutex_post_schedule(void);

/*
 * Must hold either p->pi_lock or task_rq(p)->lock.
 */
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
{
        return p->pi_top_task;
}
extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
extern void rt_mutex_adjust_pi(struct task_struct *p);
#else
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
        return NULL;
}
# define rt_mutex_adjust_pi(p)                do { } while (0)
#endif

extern void normalize_rt_tasks(void);


/*
 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
 * Timeslices get refilled after they expire.
 */
#define RR_TIMESLICE                (100 * HZ / 1000)

#endif /* _LINUX_SCHED_RT_H */









































































































































































































    4 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
 * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
 */

#ifndef _LINUX_BITFIELD_H
#define _LINUX_BITFIELD_H

#include <linux/build_bug.h>
#include <asm/byteorder.h>

/*
 * Bitfield access macros
 *
 * FIELD_{GET,PREP} macros take as first parameter shifted mask
 * from which they extract the base mask and shift amount.
 * Mask must be a compilation time constant.
 *
 * Example:
 *
 *  #include <linux/bitfield.h>
 *  #include <linux/bits.h>
 *
 *  #define REG_FIELD_A  GENMASK(6, 0)
 *  #define REG_FIELD_B  BIT(7)
 *  #define REG_FIELD_C  GENMASK(15, 8)
 *  #define REG_FIELD_D  GENMASK(31, 16)
 *
 * Get:
 *  a = FIELD_GET(REG_FIELD_A, reg);
 *  b = FIELD_GET(REG_FIELD_B, reg);
 *
 * Set:
 *  reg = FIELD_PREP(REG_FIELD_A, 1) |
 *          FIELD_PREP(REG_FIELD_B, 0) |
 *          FIELD_PREP(REG_FIELD_C, c) |
 *          FIELD_PREP(REG_FIELD_D, 0x40);
 *
 * Modify:
 *  reg &= ~REG_FIELD_C;
 *  reg |= FIELD_PREP(REG_FIELD_C, c);
 */

#define __bf_shf(x) (__builtin_ffsll(x) - 1)

#define __scalar_type_to_unsigned_cases(type)                                \
                unsigned type:        (unsigned type)0,                        \
                signed type:        (unsigned type)0

#define __unsigned_scalar_typeof(x) typeof(                                \
                _Generic((x),                                                \
                        char:        (unsigned char)0,                        \
                        __scalar_type_to_unsigned_cases(char),                \
                        __scalar_type_to_unsigned_cases(short),                \
                        __scalar_type_to_unsigned_cases(int),                \
                        __scalar_type_to_unsigned_cases(long),                \
                        __scalar_type_to_unsigned_cases(long long),        \
                        default: (x)))

#define __bf_cast_unsigned(type, x)        ((__unsigned_scalar_typeof(type))(x))

#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx)                        \
        ({                                                                \
                BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),                \
                                 _pfx "mask is not constant");                \
                BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");        \
                BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?                \
                                 ~((_mask) >> __bf_shf(_mask)) &        \
                                        (0 + (_val)) : 0,                \
                                 _pfx "value too large for the field"); \
                BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >        \
                                 __bf_cast_unsigned(_reg, ~0ull),        \
                                 _pfx "type of reg too small for mask"); \
                __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) +                        \
                                              (1ULL << __bf_shf(_mask))); \
        })

/**
 * FIELD_MAX() - produce the maximum value representable by a field
 * @_mask: shifted mask defining the field's length and position
 *
 * FIELD_MAX() returns the maximum value that can be held in the field
 * specified by @_mask.
 */
#define FIELD_MAX(_mask)                                                \
        ({                                                                \
                __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: ");        \
                (typeof(_mask))((_mask) >> __bf_shf(_mask));                \
        })

/**
 * FIELD_FIT() - check if value fits in the field
 * @_mask: shifted mask defining the field's length and position
 * @_val:  value to test against the field
 *
 * Return: true if @_val can fit inside @_mask, false if @_val is too big.
 */
#define FIELD_FIT(_mask, _val)                                                \
        ({                                                                \
                __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: ");        \
                !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
        })

/**
 * FIELD_PREP() - prepare a bitfield element
 * @_mask: shifted mask defining the field's length and position
 * @_val:  value to put in the field
 *
 * FIELD_PREP() masks and shifts up the value.  The result should
 * be combined with other fields of the bitfield using logical OR.
 */
#define FIELD_PREP(_mask, _val)                                                \
        ({                                                                \
                __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");        \
                ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask);        \
        })

#define __BF_CHECK_POW2(n)        BUILD_BUG_ON_ZERO(((n) & ((n) - 1)) != 0)

/**
 * FIELD_PREP_CONST() - prepare a constant bitfield element
 * @_mask: shifted mask defining the field's length and position
 * @_val:  value to put in the field
 *
 * FIELD_PREP_CONST() masks and shifts up the value.  The result should
 * be combined with other fields of the bitfield using logical OR.
 *
 * Unlike FIELD_PREP() this is a constant expression and can therefore
 * be used in initializers. Error checking is less comfortable for this
 * version, and non-constant masks cannot be used.
 */
#define FIELD_PREP_CONST(_mask, _val)                                        \
        (                                                                \
                /* mask must be non-zero */                                \
                BUILD_BUG_ON_ZERO((_mask) == 0) +                        \
                /* check if value fits */                                \
                BUILD_BUG_ON_ZERO(~((_mask) >> __bf_shf(_mask)) & (_val)) + \
                /* check if mask is contiguous */                        \
                __BF_CHECK_POW2((_mask) + (1ULL << __bf_shf(_mask))) +        \
                /* and create the value */                                \
                (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask))        \
        )

/**
 * FIELD_GET() - extract a bitfield element
 * @_mask: shifted mask defining the field's length and position
 * @_reg:  value of entire bitfield
 *
 * FIELD_GET() extracts the field specified by @_mask from the
 * bitfield passed in as @_reg by masking and shifting it down.
 */
#define FIELD_GET(_mask, _reg)                                                \
        ({                                                                \
                __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");        \
                (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));        \
        })

extern void __compiletime_error("value doesn't fit into mask")
__field_overflow(void);
extern void __compiletime_error("bad bitfield mask")
__bad_mask(void);
static __always_inline u64 field_multiplier(u64 field)
{
        if ((field | (field - 1)) & ((field | (field - 1)) + 1))
                __bad_mask();
        return field & -field;
}
static __always_inline u64 field_mask(u64 field)
{
        return field / field_multiplier(field);
}
#define field_max(field)        ((typeof(field))field_mask(field))
#define ____MAKE_OP(type,base,to,from)                                        \
static __always_inline __##type type##_encode_bits(base v, base field)        \
{                                                                        \
        if (__builtin_constant_p(v) && (v & ~field_mask(field)))        \
                __field_overflow();                                        \
        return to((v & field_mask(field)) * field_multiplier(field));        \
}                                                                        \
static __always_inline __##type type##_replace_bits(__##type old,        \
                                        base val, base field)                \
{                                                                        \
        return (old & ~to(field)) | type##_encode_bits(val, field);        \
}                                                                        \
static __always_inline void type##p_replace_bits(__##type *p,                \
                                        base val, base field)                \
{                                                                        \
        *p = (*p & ~to(field)) | type##_encode_bits(val, field);        \
}                                                                        \
static __always_inline base type##_get_bits(__##type v, base field)        \
{                                                                        \
        return (from(v) & field)/field_multiplier(field);                \
}
#define __MAKE_OP(size)                                                        \
        ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu)        \
        ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu)        \
        ____MAKE_OP(u##size,u##size,,)
____MAKE_OP(u8,u8,,)
__MAKE_OP(16)
__MAKE_OP(32)
__MAKE_OP(64)
#undef __MAKE_OP
#undef ____MAKE_OP

#endif




































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 



    1 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   Apple "Magic" Wireless Mouse driver
 *
 *   Copyright (c) 2010 Michael Poole <mdpoole@troilus.org>
 *   Copyright (c) 2010 Chase Douglas <chase.douglas@canonical.com>
 */

/*
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/input/mt.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/workqueue.h>

#include "hid-ids.h"

static bool emulate_3button = true;
module_param(emulate_3button, bool, 0644);
MODULE_PARM_DESC(emulate_3button, "Emulate a middle button");

static int middle_button_start = -350;
static int middle_button_stop = +350;

static bool emulate_scroll_wheel = true;
module_param(emulate_scroll_wheel, bool, 0644);
MODULE_PARM_DESC(emulate_scroll_wheel, "Emulate a scroll wheel");

static unsigned int scroll_speed = 32;
static int param_set_scroll_speed(const char *val,
                                  const struct kernel_param *kp) {
        unsigned long speed;
        if (!val || kstrtoul(val, 0, &speed) || speed > 63)
                return -EINVAL;
        scroll_speed = speed;
        return 0;
}
module_param_call(scroll_speed, param_set_scroll_speed, param_get_uint, &scroll_speed, 0644);
MODULE_PARM_DESC(scroll_speed, "Scroll speed, value from 0 (slow) to 63 (fast)");

static bool scroll_acceleration = false;
module_param(scroll_acceleration, bool, 0644);
MODULE_PARM_DESC(scroll_acceleration, "Accelerate sequential scroll events");

static bool report_undeciphered;
module_param(report_undeciphered, bool, 0644);
MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state field using a MSC_RAW event");

#define TRACKPAD2_2021_BT_VERSION 0x110

#define TRACKPAD_REPORT_ID 0x28
#define TRACKPAD2_USB_REPORT_ID 0x02
#define TRACKPAD2_BT_REPORT_ID 0x31
#define MOUSE_REPORT_ID    0x29
#define MOUSE2_REPORT_ID   0x12
#define DOUBLE_REPORT_ID   0xf7
#define USB_BATTERY_TIMEOUT_MS 60000

/* These definitions are not precise, but they're close enough.  (Bits
 * 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem
 * to be some kind of bit mask -- 0x20 may be a near-field reading,
 * and 0x40 is actual contact, and 0x10 may be a start/stop or change
 * indication.)
 */
#define TOUCH_STATE_MASK  0xf0
#define TOUCH_STATE_NONE  0x00
#define TOUCH_STATE_START 0x30
#define TOUCH_STATE_DRAG  0x40

/* Number of high-resolution events for each low-resolution detent. */
#define SCROLL_HR_STEPS 10
#define SCROLL_HR_MULT (120 / SCROLL_HR_STEPS)
#define SCROLL_HR_THRESHOLD 90 /* units */
#define SCROLL_ACCEL_DEFAULT 7

/* Touch surface information. Dimension is in hundredths of a mm, min and max
 * are in units. */
#define MOUSE_DIMENSION_X (float)9056
#define MOUSE_MIN_X -1100
#define MOUSE_MAX_X 1258
#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100))
#define MOUSE_DIMENSION_Y (float)5152
#define MOUSE_MIN_Y -1589
#define MOUSE_MAX_Y 2047
#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100))

#define TRACKPAD_DIMENSION_X (float)13000
#define TRACKPAD_MIN_X -2909
#define TRACKPAD_MAX_X 3167
#define TRACKPAD_RES_X \
        ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100))
#define TRACKPAD_DIMENSION_Y (float)11000
#define TRACKPAD_MIN_Y -2456
#define TRACKPAD_MAX_Y 2565
#define TRACKPAD_RES_Y \
        ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100))

#define TRACKPAD2_DIMENSION_X (float)16000
#define TRACKPAD2_MIN_X -3678
#define TRACKPAD2_MAX_X 3934
#define TRACKPAD2_RES_X \
        ((TRACKPAD2_MAX_X - TRACKPAD2_MIN_X) / (TRACKPAD2_DIMENSION_X / 100))
#define TRACKPAD2_DIMENSION_Y (float)11490
#define TRACKPAD2_MIN_Y -2478
#define TRACKPAD2_MAX_Y 2587
#define TRACKPAD2_RES_Y \
        ((TRACKPAD2_MAX_Y - TRACKPAD2_MIN_Y) / (TRACKPAD2_DIMENSION_Y / 100))

/**
 * struct magicmouse_sc - Tracks Magic Mouse-specific data.
 * @input: Input device through which we report events.
 * @quirks: Currently unused.
 * @ntouches: Number of touches in most recent touch report.
 * @scroll_accel: Number of consecutive scroll motions.
 * @scroll_jiffies: Time of last scroll motion.
 * @touches: Most recent data for a touch, indexed by tracking ID.
 * @tracking_ids: Mapping of current touch input data to @touches.
 * @hdev: Pointer to the underlying HID device.
 * @work: Workqueue to handle initialization retry for quirky devices.
 * @battery_timer: Timer for obtaining battery level information.
 */
struct magicmouse_sc {
        struct input_dev *input;
        unsigned long quirks;

        int ntouches;
        int scroll_accel;
        unsigned long scroll_jiffies;

        struct {
                short x;
                short y;
                short scroll_x;
                short scroll_y;
                short scroll_x_hr;
                short scroll_y_hr;
                u8 size;
                bool scroll_x_active;
                bool scroll_y_active;
        } touches[16];
        int tracking_ids[16];

        struct hid_device *hdev;
        struct delayed_work work;
        struct timer_list battery_timer;
};

static int magicmouse_firm_touch(struct magicmouse_sc *msc)
{
        int touch = -1;
        int ii;

        /* If there is only one "firm" touch, set touch to its
         * tracking ID.
         */
        for (ii = 0; ii < msc->ntouches; ii++) {
                int idx = msc->tracking_ids[ii];
                if (msc->touches[idx].size < 8) {
                        /* Ignore this touch. */
                } else if (touch >= 0) {
                        touch = -1;
                        break;
                } else {
                        touch = idx;
                }
        }

        return touch;
}

static void magicmouse_emit_buttons(struct magicmouse_sc *msc, int state)
{
        int last_state = test_bit(BTN_LEFT, msc->input->key) << 0 |
                test_bit(BTN_RIGHT, msc->input->key) << 1 |
                test_bit(BTN_MIDDLE, msc->input->key) << 2;

        if (emulate_3button) {
                int id;

                /* If some button was pressed before, keep it held
                 * down.  Otherwise, if there's exactly one firm
                 * touch, use that to override the mouse's guess.
                 */
                if (state == 0) {
                        /* The button was released. */
                } else if (last_state != 0) {
                        state = last_state;
                } else if ((id = magicmouse_firm_touch(msc)) >= 0) {
                        int x = msc->touches[id].x;
                        if (x < middle_button_start)
                                state = 1;
                        else if (x > middle_button_stop)
                                state = 2;
                        else
                                state = 4;
                } /* else: we keep the mouse's guess */

                input_report_key(msc->input, BTN_MIDDLE, state & 4);
        }

        input_report_key(msc->input, BTN_LEFT, state & 1);
        input_report_key(msc->input, BTN_RIGHT, state & 2);

        if (state != last_state)
                msc->scroll_accel = SCROLL_ACCEL_DEFAULT;
}

static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tdata)
{
        struct input_dev *input = msc->input;
        int id, x, y, size, orientation, touch_major, touch_minor, state, down;
        int pressure = 0;

        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
            input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf;
                x = (tdata[1] << 28 | tdata[0] << 20) >> 20;
                y = -((tdata[2] << 24 | tdata[1] << 16) >> 20);
                size = tdata[5] & 0x3f;
                orientation = (tdata[6] >> 2) - 32;
                touch_major = tdata[3];
                touch_minor = tdata[4];
                state = tdata[7] & TOUCH_STATE_MASK;
                down = state != TOUCH_STATE_NONE;
        } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                id = tdata[8] & 0xf;
                x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
                y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
                size = tdata[6];
                orientation = (tdata[8] >> 5) - 4;
                touch_major = tdata[4];
                touch_minor = tdata[5];
                pressure = tdata[7];
                state = tdata[3] & 0xC0;
                down = state == 0x80;
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
                id = (tdata[7] << 2 | tdata[6] >> 6) & 0xf;
                x = (tdata[1] << 27 | tdata[0] << 19) >> 19;
                y = -((tdata[3] << 30 | tdata[2] << 22 | tdata[1] << 14) >> 19);
                size = tdata[6] & 0x3f;
                orientation = (tdata[7] >> 2) - 32;
                touch_major = tdata[4];
                touch_minor = tdata[5];
                state = tdata[8] & TOUCH_STATE_MASK;
                down = state != TOUCH_STATE_NONE;
        }

        /* Store tracking ID and other fields. */
        msc->tracking_ids[raw_id] = id;
        msc->touches[id].x = x;
        msc->touches[id].y = y;
        msc->touches[id].size = size;

        /* If requested, emulate a scroll wheel by detecting small
         * vertical touch motions.
         */
        if (emulate_scroll_wheel && (input->id.product !=
                        USB_DEVICE_ID_APPLE_MAGICTRACKPAD2)) {
                unsigned long now = jiffies;
                int step_x = msc->touches[id].scroll_x - x;
                int step_y = msc->touches[id].scroll_y - y;
                int step_hr =
                        max_t(int,
                              ((64 - (int)scroll_speed) * msc->scroll_accel) /
                                        SCROLL_HR_STEPS,
                              1);
                int step_x_hr = msc->touches[id].scroll_x_hr - x;
                int step_y_hr = msc->touches[id].scroll_y_hr - y;

                /* Calculate and apply the scroll motion. */
                switch (state) {
                case TOUCH_STATE_START:
                        msc->touches[id].scroll_x = x;
                        msc->touches[id].scroll_y = y;
                        msc->touches[id].scroll_x_hr = x;
                        msc->touches[id].scroll_y_hr = y;
                        msc->touches[id].scroll_x_active = false;
                        msc->touches[id].scroll_y_active = false;

                        /* Reset acceleration after half a second. */
                        if (scroll_acceleration && time_before(now,
                                                msc->scroll_jiffies + HZ / 2))
                                msc->scroll_accel = max_t(int,
                                                msc->scroll_accel - 1, 1);
                        else
                                msc->scroll_accel = SCROLL_ACCEL_DEFAULT;

                        break;
                case TOUCH_STATE_DRAG:
                        step_x /= (64 - (int)scroll_speed) * msc->scroll_accel;
                        if (step_x != 0) {
                                msc->touches[id].scroll_x -= step_x *
                                        (64 - scroll_speed) * msc->scroll_accel;
                                msc->scroll_jiffies = now;
                                input_report_rel(input, REL_HWHEEL, -step_x);
                        }

                        step_y /= (64 - (int)scroll_speed) * msc->scroll_accel;
                        if (step_y != 0) {
                                msc->touches[id].scroll_y -= step_y *
                                        (64 - scroll_speed) * msc->scroll_accel;
                                msc->scroll_jiffies = now;
                                input_report_rel(input, REL_WHEEL, step_y);
                        }

                        if (!msc->touches[id].scroll_x_active &&
                            abs(step_x_hr) > SCROLL_HR_THRESHOLD) {
                                msc->touches[id].scroll_x_active = true;
                                msc->touches[id].scroll_x_hr = x;
                                step_x_hr = 0;
                        }

                        step_x_hr /= step_hr;
                        if (step_x_hr != 0 &&
                            msc->touches[id].scroll_x_active) {
                                msc->touches[id].scroll_x_hr -= step_x_hr *
                                        step_hr;
                                input_report_rel(input,
                                                 REL_HWHEEL_HI_RES,
                                                 -step_x_hr * SCROLL_HR_MULT);
                        }

                        if (!msc->touches[id].scroll_y_active &&
                            abs(step_y_hr) > SCROLL_HR_THRESHOLD) {
                                msc->touches[id].scroll_y_active = true;
                                msc->touches[id].scroll_y_hr = y;
                                step_y_hr = 0;
                        }

                        step_y_hr /= step_hr;
                        if (step_y_hr != 0 &&
                            msc->touches[id].scroll_y_active) {
                                msc->touches[id].scroll_y_hr -= step_y_hr *
                                        step_hr;
                                input_report_rel(input,
                                                 REL_WHEEL_HI_RES,
                                                 step_y_hr * SCROLL_HR_MULT);
                        }
                        break;
                }
        }

        if (down)
                msc->ntouches++;

        input_mt_slot(input, id);
        input_mt_report_slot_state(input, MT_TOOL_FINGER, down);

        /* Generate the input events for this touch. */
        if (down) {
                input_report_abs(input, ABS_MT_TOUCH_MAJOR, touch_major << 2);
                input_report_abs(input, ABS_MT_TOUCH_MINOR, touch_minor << 2);
                input_report_abs(input, ABS_MT_ORIENTATION, -orientation);
                input_report_abs(input, ABS_MT_POSITION_X, x);
                input_report_abs(input, ABS_MT_POSITION_Y, y);

                if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2)
                        input_report_abs(input, ABS_MT_PRESSURE, pressure);

                if (report_undeciphered) {
                        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
                            input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2)
                                input_event(input, EV_MSC, MSC_RAW, tdata[7]);
                        else if (input->id.product !=
                                        USB_DEVICE_ID_APPLE_MAGICTRACKPAD2)
                                input_event(input, EV_MSC, MSC_RAW, tdata[8]);
                }
        }
}

static int magicmouse_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct magicmouse_sc *msc = hid_get_drvdata(hdev);
        struct input_dev *input = msc->input;
        int x = 0, y = 0, ii, clicks = 0, npoints;

        switch (data[0]) {
        case TRACKPAD_REPORT_ID:
        case TRACKPAD2_BT_REPORT_ID:
                /* Expect four bytes of prefix, and N*9 bytes of touch data. */
                if (size < 4 || ((size - 4) % 9) != 0)
                        return 0;
                npoints = (size - 4) / 9;
                if (npoints > 15) {
                        hid_warn(hdev, "invalid size value (%d) for TRACKPAD_REPORT_ID\n",
                                        size);
                        return 0;
                }
                msc->ntouches = 0;
                for (ii = 0; ii < npoints; ii++)
                        magicmouse_emit_touch(msc, ii, data + ii * 9 + 4);

                clicks = data[1];

                /* The following bits provide a device specific timestamp. They
                 * are unused here.
                 *
                 * ts = data[1] >> 6 | data[2] << 2 | data[3] << 10;
                 */
                break;
        case TRACKPAD2_USB_REPORT_ID:
                /* Expect twelve bytes of prefix and N*9 bytes of touch data. */
                if (size < 12 || ((size - 12) % 9) != 0)
                        return 0;
                npoints = (size - 12) / 9;
                if (npoints > 15) {
                        hid_warn(hdev, "invalid size value (%d) for TRACKPAD2_USB_REPORT_ID\n",
                                        size);
                        return 0;
                }
                msc->ntouches = 0;
                for (ii = 0; ii < npoints; ii++)
                        magicmouse_emit_touch(msc, ii, data + ii * 9 + 12);

                clicks = data[1];
                break;
        case MOUSE_REPORT_ID:
                /* Expect six bytes of prefix, and N*8 bytes of touch data. */
                if (size < 6 || ((size - 6) % 8) != 0)
                        return 0;
                npoints = (size - 6) / 8;
                if (npoints > 15) {
                        hid_warn(hdev, "invalid size value (%d) for MOUSE_REPORT_ID\n",
                                        size);
                        return 0;
                }
                msc->ntouches = 0;
                for (ii = 0; ii < npoints; ii++)
                        magicmouse_emit_touch(msc, ii, data + ii * 8 + 6);

                /* When emulating three-button mode, it is important
                 * to have the current touch information before
                 * generating a click event.
                 */
                x = (int)(((data[3] & 0x0c) << 28) | (data[1] << 22)) >> 22;
                y = (int)(((data[3] & 0x30) << 26) | (data[2] << 22)) >> 22;
                clicks = data[3];

                /* The following bits provide a device specific timestamp. They
                 * are unused here.
                 *
                 * ts = data[3] >> 6 | data[4] << 2 | data[5] << 10;
                 */
                break;
        case MOUSE2_REPORT_ID:
                /* Size is either 8 or (14 + 8 * N) */
                if (size != 8 && (size < 14 || (size - 14) % 8 != 0))
                        return 0;
                npoints = (size - 14) / 8;
                if (npoints > 15) {
                        hid_warn(hdev, "invalid size value (%d) for MOUSE2_REPORT_ID\n",
                                        size);
                        return 0;
                }
                msc->ntouches = 0;
                for (ii = 0; ii < npoints; ii++)
                        magicmouse_emit_touch(msc, ii, data + ii * 8 + 14);

                /* When emulating three-button mode, it is important
                 * to have the current touch information before
                 * generating a click event.
                 */
                x = (int)((data[3] << 24) | (data[2] << 16)) >> 16;
                y = (int)((data[5] << 24) | (data[4] << 16)) >> 16;
                clicks = data[1];

                /* The following bits provide a device specific timestamp. They
                 * are unused here.
                 *
                 * ts = data[11] >> 6 | data[12] << 2 | data[13] << 10;
                 */
                break;
        case DOUBLE_REPORT_ID:
                /* Sometimes the trackpad sends two touch reports in one
                 * packet.
                 */
                magicmouse_raw_event(hdev, report, data + 2, data[1]);
                magicmouse_raw_event(hdev, report, data + 2 + data[1],
                        size - 2 - data[1]);
                return 0;
        default:
                return 0;
        }

        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
            input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                magicmouse_emit_buttons(msc, clicks & 3);
                input_report_rel(input, REL_X, x);
                input_report_rel(input, REL_Y, y);
        } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                input_mt_sync_frame(input);
                input_report_key(input, BTN_MOUSE, clicks & 1);
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
                input_report_key(input, BTN_MOUSE, clicks & 1);
                input_mt_report_pointer_emulation(input, true);
        }

        input_sync(input);
        return 1;
}

static int magicmouse_event(struct hid_device *hdev, struct hid_field *field,
                struct hid_usage *usage, __s32 value)
{
        struct magicmouse_sc *msc = hid_get_drvdata(hdev);
        if (msc->input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 &&
            field->report->id == MOUSE2_REPORT_ID) {
                /*
                 * magic_mouse_raw_event has done all the work. Skip hidinput.
                 *
                 * Specifically, hidinput may modify BTN_LEFT and BTN_RIGHT,
                 * breaking emulate_3button.
                 */
                return 1;
        }
        return 0;
}

static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev)
{
        int error;
        int mt_flags = 0;

        __set_bit(EV_KEY, input->evbit);

        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
            input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                __set_bit(BTN_LEFT, input->keybit);
                __set_bit(BTN_RIGHT, input->keybit);
                if (emulate_3button)
                        __set_bit(BTN_MIDDLE, input->keybit);

                __set_bit(EV_REL, input->evbit);
                __set_bit(REL_X, input->relbit);
                __set_bit(REL_Y, input->relbit);
                if (emulate_scroll_wheel) {
                        __set_bit(REL_WHEEL, input->relbit);
                        __set_bit(REL_HWHEEL, input->relbit);
                        __set_bit(REL_WHEEL_HI_RES, input->relbit);
                        __set_bit(REL_HWHEEL_HI_RES, input->relbit);
                }
        } else if (input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                /* If the trackpad has been connected to a Mac, the name is
                 * automatically personalized, e.g., "José Expósito's Trackpad".
                 * When connected through Bluetooth, the personalized name is
                 * reported, however, when connected through USB the generic
                 * name is reported.
                 * Set the device name to ensure the same driver settings get
                 * loaded, whether connected through bluetooth or USB.
                 */
                if (hdev->vendor == BT_VENDOR_ID_APPLE) {
                        if (input->id.version == TRACKPAD2_2021_BT_VERSION)
                                input->name = "Apple Inc. Magic Trackpad";
                        else
                                input->name = "Apple Inc. Magic Trackpad 2";
                } else { /* USB_VENDOR_ID_APPLE */
                        input->name = hdev->name;
                }

                __clear_bit(EV_MSC, input->evbit);
                __clear_bit(BTN_0, input->keybit);
                __clear_bit(BTN_RIGHT, input->keybit);
                __clear_bit(BTN_MIDDLE, input->keybit);
                __set_bit(BTN_MOUSE, input->keybit);
                __set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
                __set_bit(BTN_TOOL_FINGER, input->keybit);

                mt_flags = INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED |
                                INPUT_MT_TRACK;
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
                /* input->keybit is initialized with incorrect button info
                 * for Magic Trackpad. There really is only one physical
                 * button (BTN_LEFT == BTN_MOUSE). Make sure we don't
                 * advertise buttons that don't exist...
                 */
                __clear_bit(BTN_RIGHT, input->keybit);
                __clear_bit(BTN_MIDDLE, input->keybit);
                __set_bit(BTN_MOUSE, input->keybit);
                __set_bit(BTN_TOOL_FINGER, input->keybit);
                __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
                __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
                __set_bit(BTN_TOOL_QUADTAP, input->keybit);
                __set_bit(BTN_TOOL_QUINTTAP, input->keybit);
                __set_bit(BTN_TOUCH, input->keybit);
                __set_bit(INPUT_PROP_POINTER, input->propbit);
                __set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
        }


        __set_bit(EV_ABS, input->evbit);

        error = input_mt_init_slots(input, 16, mt_flags);
        if (error)
                return error;
        input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 255 << 2,
                             4, 0);
        input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 255 << 2,
                             4, 0);

        /* Note: Touch Y position from the device is inverted relative
         * to how pointer motion is reported (and relative to how USB
         * HID recommends the coordinates work).  This driver keeps
         * the origin at the same position, and just uses the additive
         * inverse of the reported Y.
         */
        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
            input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0);
                input_set_abs_params(input, ABS_MT_POSITION_X,
                                     MOUSE_MIN_X, MOUSE_MAX_X, 4, 0);
                input_set_abs_params(input, ABS_MT_POSITION_Y,
                                     MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0);

                input_abs_set_res(input, ABS_MT_POSITION_X,
                                  MOUSE_RES_X);
                input_abs_set_res(input, ABS_MT_POSITION_Y,
                                  MOUSE_RES_Y);
        } else if (input->id.product ==  USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                input_set_abs_params(input, ABS_MT_PRESSURE, 0, 253, 0, 0);
                input_set_abs_params(input, ABS_PRESSURE, 0, 253, 0, 0);
                input_set_abs_params(input, ABS_MT_ORIENTATION, -3, 4, 0, 0);
                input_set_abs_params(input, ABS_X, TRACKPAD2_MIN_X,
                                     TRACKPAD2_MAX_X, 0, 0);
                input_set_abs_params(input, ABS_Y, TRACKPAD2_MIN_Y,
                                     TRACKPAD2_MAX_Y, 0, 0);
                input_set_abs_params(input, ABS_MT_POSITION_X,
                                     TRACKPAD2_MIN_X, TRACKPAD2_MAX_X, 0, 0);
                input_set_abs_params(input, ABS_MT_POSITION_Y,
                                     TRACKPAD2_MIN_Y, TRACKPAD2_MAX_Y, 0, 0);

                input_abs_set_res(input, ABS_X, TRACKPAD2_RES_X);
                input_abs_set_res(input, ABS_Y, TRACKPAD2_RES_Y);
                input_abs_set_res(input, ABS_MT_POSITION_X, TRACKPAD2_RES_X);
                input_abs_set_res(input, ABS_MT_POSITION_Y, TRACKPAD2_RES_Y);
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
                input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0);
                input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X,
                                     TRACKPAD_MAX_X, 4, 0);
                input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y,
                                     TRACKPAD_MAX_Y, 4, 0);
                input_set_abs_params(input, ABS_MT_POSITION_X,
                                     TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0);
                input_set_abs_params(input, ABS_MT_POSITION_Y,
                                     TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0);

                input_abs_set_res(input, ABS_X, TRACKPAD_RES_X);
                input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y);
                input_abs_set_res(input, ABS_MT_POSITION_X,
                                  TRACKPAD_RES_X);
                input_abs_set_res(input, ABS_MT_POSITION_Y,
                                  TRACKPAD_RES_Y);
        }

        input_set_events_per_packet(input, 60);

        if (report_undeciphered &&
            input->id.product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                __set_bit(EV_MSC, input->evbit);
                __set_bit(MSC_RAW, input->mscbit);
        }

        /*
         * hid-input may mark device as using autorepeat, but neither
         * the trackpad, nor the mouse actually want it.
         */
        __clear_bit(EV_REP, input->evbit);

        return 0;
}

static int magicmouse_input_mapping(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        struct magicmouse_sc *msc = hid_get_drvdata(hdev);

        if (!msc->input)
                msc->input = hi->input;

        /* Magic Trackpad does not give relative data after switching to MT */
        if ((hi->input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD ||
             hi->input->id.product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) &&
            field->flags & HID_MAIN_ITEM_RELATIVE)
                return -1;

        return 0;
}

static int magicmouse_input_configured(struct hid_device *hdev,
                struct hid_input *hi)

{
        struct magicmouse_sc *msc = hid_get_drvdata(hdev);
        int ret;

        ret = magicmouse_setup_input(msc->input, hdev);
        if (ret) {
                hid_err(hdev, "magicmouse setup input failed (%d)\n", ret);
                /* clean msc->input to notify probe() of the failure */
                msc->input = NULL;
                return ret;
        }

        return 0;
}

static int magicmouse_enable_multitouch(struct hid_device *hdev)
{
        const u8 *feature;
        const u8 feature_mt[] = { 0xD7, 0x01 };
        const u8 feature_mt_mouse2[] = { 0xF1, 0x02, 0x01 };
        const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 };
        const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 };
        u8 *buf;
        int ret;
        int feature_size;

        if (hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                if (hdev->vendor == BT_VENDOR_ID_APPLE) {
                        feature_size = sizeof(feature_mt_trackpad2_bt);
                        feature = feature_mt_trackpad2_bt;
                } else { /* USB_VENDOR_ID_APPLE */
                        feature_size = sizeof(feature_mt_trackpad2_usb);
                        feature = feature_mt_trackpad2_usb;
                }
        } else if (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                feature_size = sizeof(feature_mt_mouse2);
                feature = feature_mt_mouse2;
        } else {
                feature_size = sizeof(feature_mt);
                feature = feature_mt;
        }

        buf = kmemdup(feature, feature_size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size,
                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
        kfree(buf);
        return ret;
}

static void magicmouse_enable_mt_work(struct work_struct *work)
{
        struct magicmouse_sc *msc =
                container_of(work, struct magicmouse_sc, work.work);
        int ret;

        ret = magicmouse_enable_multitouch(msc->hdev);
        if (ret < 0)
                hid_err(msc->hdev, "unable to request touch data (%d)\n", ret);
}

static int magicmouse_fetch_battery(struct hid_device *hdev)
{
#ifdef CONFIG_HID_BATTERY_STRENGTH
        struct hid_report_enum *report_enum;
        struct hid_report *report;

        if (!hdev->battery || hdev->vendor != USB_VENDOR_ID_APPLE ||
            (hdev->product != USB_DEVICE_ID_APPLE_MAGICMOUSE2 &&
             hdev->product != USB_DEVICE_ID_APPLE_MAGICTRACKPAD2))
                return -1;

        report_enum = &hdev->report_enum[hdev->battery_report_type];
        report = report_enum->report_id_hash[hdev->battery_report_id];

        if (!report || report->maxfield < 1)
                return -1;

        if (hdev->battery_capacity == hdev->battery_max)
                return -1;

        hid_hw_request(hdev, report, HID_REQ_GET_REPORT);
        return 0;
#else
        return -1;
#endif
}

static void magicmouse_battery_timer_tick(struct timer_list *t)
{
        struct magicmouse_sc *msc = from_timer(msc, t, battery_timer);
        struct hid_device *hdev = msc->hdev;

        if (magicmouse_fetch_battery(hdev) == 0) {
                mod_timer(&msc->battery_timer,
                          jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS));
        }
}

static int magicmouse_probe(struct hid_device *hdev,
        const struct hid_device_id *id)
{
        struct magicmouse_sc *msc;
        struct hid_report *report;
        int ret;

        msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL);
        if (msc == NULL) {
                hid_err(hdev, "can't alloc magicmouse descriptor\n");
                return -ENOMEM;
        }

        msc->scroll_accel = SCROLL_ACCEL_DEFAULT;
        msc->hdev = hdev;
        INIT_DEFERRABLE_WORK(&msc->work, magicmouse_enable_mt_work);

        msc->quirks = id->driver_data;
        hid_set_drvdata(hdev, msc);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "magicmouse hid parse failed\n");
                return ret;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "magicmouse hw start failed\n");
                return ret;
        }

        timer_setup(&msc->battery_timer, magicmouse_battery_timer_tick, 0);
        mod_timer(&msc->battery_timer,
                  jiffies + msecs_to_jiffies(USB_BATTERY_TIMEOUT_MS));
        magicmouse_fetch_battery(hdev);

        if (id->vendor == USB_VENDOR_ID_APPLE &&
            (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
             (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && hdev->type != HID_TYPE_USBMOUSE)))
                return 0;

        if (!msc->input) {
                hid_err(hdev, "magicmouse input not registered\n");
                ret = -ENOMEM;
                goto err_stop_hw;
        }

        if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE)
                report = hid_register_report(hdev, HID_INPUT_REPORT,
                        MOUSE_REPORT_ID, 0);
        else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2)
                report = hid_register_report(hdev, HID_INPUT_REPORT,
                        MOUSE2_REPORT_ID, 0);
        else if (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
                if (id->vendor == BT_VENDOR_ID_APPLE)
                        report = hid_register_report(hdev, HID_INPUT_REPORT,
                                TRACKPAD2_BT_REPORT_ID, 0);
                else /* USB_VENDOR_ID_APPLE */
                        report = hid_register_report(hdev, HID_INPUT_REPORT,
                                TRACKPAD2_USB_REPORT_ID, 0);
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
                report = hid_register_report(hdev, HID_INPUT_REPORT,
                        TRACKPAD_REPORT_ID, 0);
                report = hid_register_report(hdev, HID_INPUT_REPORT,
                        DOUBLE_REPORT_ID, 0);
        }

        if (!report) {
                hid_err(hdev, "unable to register touch report\n");
                ret = -ENOMEM;
                goto err_stop_hw;
        }
        report->size = 6;

        /*
         * Some devices repond with 'invalid report id' when feature
         * report switching it into multitouch mode is sent to it.
         *
         * This results in -EIO from the _raw low-level transport callback,
         * but there seems to be no other way of switching the mode.
         * Thus the super-ugly hacky success check below.
         */
        ret = magicmouse_enable_multitouch(hdev);
        if (ret != -EIO && ret < 0) {
                hid_err(hdev, "unable to request touch data (%d)\n", ret);
                goto err_stop_hw;
        }
        if (ret == -EIO && id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
                schedule_delayed_work(&msc->work, msecs_to_jiffies(500));
        }

        return 0;
err_stop_hw:
        del_timer_sync(&msc->battery_timer);
        hid_hw_stop(hdev);
        return ret;
}

static void magicmouse_remove(struct hid_device *hdev)
{
        struct magicmouse_sc *msc = hid_get_drvdata(hdev);

        if (msc) {
                cancel_delayed_work_sync(&msc->work);
                del_timer_sync(&msc->battery_timer);
        }

        hid_hw_stop(hdev);
}

static __u8 *magicmouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                                     unsigned int *rsize)
{
        /*
         * Change the usage from:
         *   0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1)  0
         *   0x09, 0x0b,       // Usage (Vendor Usage 0x0b)           3
         * To:
         *   0x05, 0x01,       // Usage Page (Generic Desktop)        0
         *   0x09, 0x02,       // Usage (Mouse)                       2
         */
        if (hdev->vendor == USB_VENDOR_ID_APPLE &&
            (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 ||
             hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) &&
            *rsize == 83 && rdesc[46] == 0x84 && rdesc[58] == 0x85) {
                hid_info(hdev,
                         "fixing up magicmouse battery report descriptor\n");
                *rsize = *rsize - 1;
                rdesc = kmemdup(rdesc + 1, *rsize, GFP_KERNEL);
                if (!rdesc)
                        return NULL;

                rdesc[0] = 0x05;
                rdesc[1] = 0x01;
                rdesc[2] = 0x09;
                rdesc[3] = 0x02;
        }

        return rdesc;
}

static const struct hid_device_id magic_mice[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 },
        { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICTRACKPAD2), .driver_data = 0 },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE,
                USB_DEVICE_ID_APPLE_MAGICTRACKPAD2), .driver_data = 0 },
        { }
};
MODULE_DEVICE_TABLE(hid, magic_mice);

static struct hid_driver magicmouse_driver = {
        .name = "magicmouse",
        .id_table = magic_mice,
        .probe = magicmouse_probe,
        .remove = magicmouse_remove,
        .report_fixup = magicmouse_report_fixup,
        .raw_event = magicmouse_raw_event,
        .event = magicmouse_event,
        .input_mapping = magicmouse_input_mapping,
        .input_configured = magicmouse_input_configured,
};
module_hid_driver(magicmouse_driver);

MODULE_LICENSE("GPL");










  256 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM printk

#if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PRINTK_H

#include <linux/tracepoint.h>

TRACE_EVENT(console,
        TP_PROTO(const char *text, size_t len),

        TP_ARGS(text, len),

        TP_STRUCT__entry(
                __dynamic_array(char, msg, len + 1)
        ),

        TP_fast_assign(
                /*
                 * Each trace entry is printed in a new line.
                 * If the msg finishes with '\n', cut it off
                 * to avoid blank lines in the trace.
                 */
                if ((len > 0) && (text[len-1] == '\n'))
                        len -= 1;

                memcpy(__get_str(msg), text, len);
                __get_str(msg)[len] = 0;
        ),

        TP_printk("%s", __get_str(msg))
);
#endif /* _TRACE_PRINTK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>































































































































  240 
































  242 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_WAIT_H
#define _LINUX_WAIT_H
/*
 * Linux wait queue related types and methods
 */
#include <linux/list.h>
#include <linux/stddef.h>
#include <linux/spinlock.h>

#include <asm/current.h>

typedef struct wait_queue_entry wait_queue_entry_t;

typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);

/* wait_queue_entry::flags */
#define WQ_FLAG_EXCLUSIVE        0x01
#define WQ_FLAG_WOKEN                0x02
#define WQ_FLAG_CUSTOM                0x04
#define WQ_FLAG_DONE                0x08
#define WQ_FLAG_PRIORITY        0x10

/*
 * A single wait-queue entry structure:
 */
struct wait_queue_entry {
        unsigned int                flags;
        void                        *private;
        wait_queue_func_t        func;
        struct list_head        entry;
};

struct wait_queue_head {
        spinlock_t                lock;
        struct list_head        head;
};
typedef struct wait_queue_head wait_queue_head_t;

struct task_struct;

/*
 * Macros for declaration and initialisaton of the datatypes
 */

#define __WAITQUEUE_INITIALIZER(name, tsk) {                                        \
        .private        = tsk,                                                        \
        .func                = default_wake_function,                                \
        .entry                = { NULL, NULL } }

#define DECLARE_WAITQUEUE(name, tsk)                                                \
        struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                                        \
        .lock                = __SPIN_LOCK_UNLOCKED(name.lock),                        \
        .head                = LIST_HEAD_INIT(name.head) }

#define DECLARE_WAIT_QUEUE_HEAD(name) \
        struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);

#define init_waitqueue_head(wq_head)                                                \
        do {                                                                        \
                static struct lock_class_key __key;                                \
                                                                                \
                __init_waitqueue_head((wq_head), #wq_head, &__key);                \
        } while (0)

#ifdef CONFIG_LOCKDEP
# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
        ({ init_waitqueue_head(&name); name; })
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
        struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
#else
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
#endif

static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p)
{
        wq_entry->flags                = 0;
        wq_entry->private        = p;
        wq_entry->func                = default_wake_function;
}

static inline void
init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
{
        wq_entry->flags                = 0;
        wq_entry->private        = NULL;
        wq_entry->func                = func;
}

/**
 * waitqueue_active -- locklessly test for waiters on the queue
 * @wq_head: the waitqueue to test for waiters
 *
 * returns true if the wait list is not empty
 *
 * NOTE: this function is lockless and requires care, incorrect usage _will_
 * lead to sporadic and non-obvious failure.
 *
 * Use either while holding wait_queue_head::lock or when used for wakeups
 * with an extra smp_mb() like::
 *
 *      CPU0 - waker                    CPU1 - waiter
 *
 *                                      for (;;) {
 *      @cond = true;                     prepare_to_wait(&wq_head, &wait, state);
 *      smp_mb();                         // smp_mb() from set_current_state()
 *      if (waitqueue_active(wq_head))         if (@cond)
 *        wake_up(wq_head);                      break;
 *                                        schedule();
 *                                      }
 *                                      finish_wait(&wq_head, &wait);
 *
 * Because without the explicit smp_mb() it's possible for the
 * waitqueue_active() load to get hoisted over the @cond store such that we'll
 * observe an empty wait list while the waiter might not observe @cond.
 *
 * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
 * which (when the lock is uncontended) are of roughly equal cost.
 */
static inline int waitqueue_active(struct wait_queue_head *wq_head)
{
        return !list_empty(&wq_head->head);
}

/**
 * wq_has_single_sleeper - check if there is only one sleeper
 * @wq_head: wait queue head
 *
 * Returns true of wq_head has only one sleeper on the list.
 *
 * Please refer to the comment for waitqueue_active.
 */
static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head)
{
        return list_is_singular(&wq_head->head);
}

/**
 * wq_has_sleeper - check if there are any waiting processes
 * @wq_head: wait queue head
 *
 * Returns true if wq_head has waiting processes
 *
 * Please refer to the comment for waitqueue_active.
 */
static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
{
        /*
         * We need to be sure we are in sync with the
         * add_wait_queue modifications to the wait queue.
         *
         * This memory barrier should be paired with one on the
         * waiting side.
         */
        smp_mb();
        return waitqueue_active(wq_head);
}

extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);

static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
        struct list_head *head = &wq_head->head;
        struct wait_queue_entry *wq;

        list_for_each_entry(wq, &wq_head->head, entry) {
                if (!(wq->flags & WQ_FLAG_PRIORITY))
                        break;
                head = &wq->entry;
        }
        list_add(&wq_entry->entry, head);
}

/*
 * Used for wake-one threads:
 */
static inline void
__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
        wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
        __add_wait_queue(wq_head, wq_entry);
}

static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
        list_add_tail(&wq_entry->entry, &wq_head->head);
}

static inline void
__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
        wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
        __add_wait_queue_entry_tail(wq_head, wq_entry);
}

static inline void
__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
        list_del(&wq_entry->entry);
}

int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
void __wake_up_pollfree(struct wait_queue_head *wq_head);

#define wake_up(x)                        __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr)                __wake_up(x, TASK_NORMAL, nr, NULL)
#define wake_up_all(x)                        __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x)                __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x)                __wake_up_locked((x), TASK_NORMAL, 0)

#define wake_up_interruptible(x)        __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr)        __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x)        __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible_sync(x)        __wake_up_sync((x), TASK_INTERRUPTIBLE)

/*
 * Wakeup macros to be used to report events to the targets.
 */
#define poll_to_key(m) ((void *)(__force uintptr_t)(__poll_t)(m))
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m)                                                        \
        __wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
#define wake_up_poll_on_current_cpu(x, m)                                        \
        __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m)                                                \
        __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m)                                        \
        __wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m))
#define wake_up_interruptible_sync_poll(x, m)                                        \
        __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
#define wake_up_interruptible_sync_poll_locked(x, m)                                \
        __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))

/**
 * wake_up_pollfree - signal that a polled waitqueue is going away
 * @wq_head: the wait queue head
 *
 * In the very rare cases where a ->poll() implementation uses a waitqueue whose
 * lifetime is tied to a task rather than to the 'struct file' being polled,
 * this function must be called before the waitqueue is freed so that
 * non-blocking polls (e.g. epoll) are notified that the queue is going away.
 *
 * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
 * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
 */
static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
{
        /*
         * For performance reasons, we don't always take the queue lock here.
         * Therefore, we might race with someone removing the last entry from
         * the queue, and proceed while they still hold the queue lock.
         * However, rcu_read_lock() is required to be held in such cases, so we
         * can safely proceed with an RCU-delayed free.
         */
        if (waitqueue_active(wq_head))
                __wake_up_pollfree(wq_head);
}

#define ___wait_cond_timeout(condition)                                                \
({                                                                                \
        bool __cond = (condition);                                                \
        if (__cond && !__ret)                                                        \
                __ret = 1;                                                        \
        __cond || !__ret;                                                        \
})

#define ___wait_is_interruptible(state)                                                \
        (!__builtin_constant_p(state) ||                                        \
         (state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))

extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);

/*
 * The below macro ___wait_event() has an explicit shadow of the __ret
 * variable when used from the wait_event_*() macros.
 *
 * This is so that both can use the ___wait_cond_timeout() construct
 * to wrap the condition.
 *
 * The type inconsistency of the wait_event_*() __ret variable is also
 * on purpose; we use long where we can return timeout values and int
 * otherwise.
 */

#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)                \
({                                                                                \
        __label__ __out;                                                        \
        struct wait_queue_entry __wq_entry;                                        \
        long __ret = ret;        /* explicit shadow */                                \
                                                                                \
        init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);        \
        for (;;) {                                                                \
                long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
                                                                                \
                if (condition)                                                        \
                        break;                                                        \
                                                                                \
                if (___wait_is_interruptible(state) && __int) {                        \
                        __ret = __int;                                                \
                        goto __out;                                                \
                }                                                                \
                                                                                \
                cmd;                                                                \
        }                                                                        \
        finish_wait(&wq_head, &__wq_entry);                                        \
__out:        __ret;                                                                        \
})

#define __wait_event(wq_head, condition)                                        \
        (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,        \
                            schedule())

/**
 * wait_event - sleep until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 */
#define wait_event(wq_head, condition)                                                \
do {                                                                                \
        might_sleep();                                                                \
        if (condition)                                                                \
                break;                                                                \
        __wait_event(wq_head, condition);                                        \
} while (0)

#define __io_wait_event(wq_head, condition)                                        \
        (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,        \
                            io_schedule())

/*
 * io_wait_event() -- like wait_event() but with io_schedule()
 */
#define io_wait_event(wq_head, condition)                                        \
do {                                                                                \
        might_sleep();                                                                \
        if (condition)                                                                \
                break;                                                                \
        __io_wait_event(wq_head, condition);                                        \
} while (0)

#define __wait_event_freezable(wq_head, condition)                                \
        ___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE),        \
                        0, 0, schedule())

/**
 * wait_event_freezable - sleep (or freeze) until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
 * to system load) until the @condition evaluates to true. The
 * @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 */
#define wait_event_freezable(wq_head, condition)                                \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_freezable(wq_head, condition);                \
        __ret;                                                                        \
})

#define __wait_event_timeout(wq_head, condition, timeout)                        \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      TASK_UNINTERRUPTIBLE, 0, timeout,                                \
                      __ret = schedule_timeout(__ret))

/**
 * wait_event_timeout - sleep until a condition gets true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * Returns:
 * 0 if the @condition evaluated to %false after the @timeout elapsed,
 * 1 if the @condition evaluated to %true after the @timeout elapsed,
 * or the remaining jiffies (at least 1) if the @condition evaluated
 * to %true before the @timeout elapsed.
 */
#define wait_event_timeout(wq_head, condition, timeout)                                \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_timeout(wq_head, condition, timeout);        \
        __ret;                                                                        \
})

#define __wait_event_freezable_timeout(wq_head, condition, timeout)                \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout,                \
                      __ret = schedule_timeout(__ret))

/*
 * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
 * increasing load and is freezable.
 */
#define wait_event_freezable_timeout(wq_head, condition, timeout)                \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_freezable_timeout(wq_head, condition, timeout); \
        __ret;                                                                        \
})

#define __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)                \
        (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 1, 0,        \
                            cmd1; schedule(); cmd2)
/*
 * Just like wait_event_cmd(), except it sets exclusive flag
 */
#define wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)                \
do {                                                                                \
        if (condition)                                                                \
                break;                                                                \
        __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2);                \
} while (0)

#define __wait_event_cmd(wq_head, condition, cmd1, cmd2)                        \
        (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,        \
                            cmd1; schedule(); cmd2)

/**
 * wait_event_cmd - sleep until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @cmd1: the command will be executed before sleep
 * @cmd2: the command will be executed after sleep
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 */
#define wait_event_cmd(wq_head, condition, cmd1, cmd2)                                \
do {                                                                                \
        if (condition)                                                                \
                break;                                                                \
        __wait_event_cmd(wq_head, condition, cmd1, cmd2);                        \
} while (0)

#define __wait_event_interruptible(wq_head, condition)                                \
        ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,                \
                      schedule())

/**
 * wait_event_interruptible - sleep until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible(wq_head, condition)                                \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_interruptible(wq_head, condition);                \
        __ret;                                                                        \
})

#define __wait_event_interruptible_timeout(wq_head, condition, timeout)                \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      TASK_INTERRUPTIBLE, 0, timeout,                                \
                      __ret = schedule_timeout(__ret))

/**
 * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * Returns:
 * 0 if the @condition evaluated to %false after the @timeout elapsed,
 * 1 if the @condition evaluated to %true after the @timeout elapsed,
 * the remaining jiffies (at least 1) if the @condition evaluated
 * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
 * interrupted by a signal.
 */
#define wait_event_interruptible_timeout(wq_head, condition, timeout)                \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_interruptible_timeout(wq_head,                \
                                                condition, timeout);                \
        __ret;                                                                        \
})

#define __wait_event_hrtimeout(wq_head, condition, timeout, state)                \
({                                                                                \
        int __ret = 0;                                                                \
        struct hrtimer_sleeper __t;                                                \
                                                                                \
        hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,                        \
                                      HRTIMER_MODE_REL);                        \
        if ((timeout) != KTIME_MAX) {                                                \
                hrtimer_set_expires_range_ns(&__t.timer, timeout,                \
                                        current->timer_slack_ns);                \
                hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL);                \
        }                                                                        \
                                                                                \
        __ret = ___wait_event(wq_head, condition, state, 0, 0,                        \
                if (!__t.task) {                                                \
                        __ret = -ETIME;                                                \
                        break;                                                        \
                }                                                                \
                schedule());                                                        \
                                                                                \
        hrtimer_cancel(&__t.timer);                                                \
        destroy_hrtimer_on_stack(&__t.timer);                                        \
        __ret;                                                                        \
})

/**
 * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, as a ktime_t
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function returns 0 if @condition became true, or -ETIME if the timeout
 * elapsed.
 */
#define wait_event_hrtimeout(wq_head, condition, timeout)                        \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_hrtimeout(wq_head, condition, timeout,        \
                                               TASK_UNINTERRUPTIBLE);                \
        __ret;                                                                        \
})

/**
 * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, as a ktime_t
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function returns 0 if @condition became true, -ERESTARTSYS if it was
 * interrupted by a signal, or -ETIME if the timeout elapsed.
 */
#define wait_event_interruptible_hrtimeout(wq, condition, timeout)                \
({                                                                                \
        long __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_hrtimeout(wq, condition, timeout,                \
                                               TASK_INTERRUPTIBLE);                \
        __ret;                                                                        \
})

#define __wait_event_interruptible_exclusive(wq, condition)                        \
        ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,                        \
                      schedule())

#define wait_event_interruptible_exclusive(wq, condition)                        \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_interruptible_exclusive(wq, condition);        \
        __ret;                                                                        \
})

#define __wait_event_killable_exclusive(wq, condition)                                \
        ___wait_event(wq, condition, TASK_KILLABLE, 1, 0,                        \
                      schedule())

#define wait_event_killable_exclusive(wq, condition)                                \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_killable_exclusive(wq, condition);                \
        __ret;                                                                        \
})


#define __wait_event_freezable_exclusive(wq, condition)                                \
        ___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\
                        schedule())

#define wait_event_freezable_exclusive(wq, condition)                                \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_freezable_exclusive(wq, condition);        \
        __ret;                                                                        \
})

/**
 * wait_event_idle - wait for a condition without contributing to system load
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_IDLE) until the
 * @condition evaluates to true.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 */
#define wait_event_idle(wq_head, condition)                                        \
do {                                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                ___wait_event(wq_head, condition, TASK_IDLE, 0, 0, schedule());        \
} while (0)

/**
 * wait_event_idle_exclusive - wait for a condition with contributing to system load
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_IDLE) until the
 * @condition evaluates to true.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
 * set thus if other processes wait on the same list, when this
 * process is woken further processes are not considered.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 */
#define wait_event_idle_exclusive(wq_head, condition)                                \
do {                                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                ___wait_event(wq_head, condition, TASK_IDLE, 1, 0, schedule());        \
} while (0)

#define __wait_event_idle_timeout(wq_head, condition, timeout)                        \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      TASK_IDLE, 0, timeout,                                        \
                      __ret = schedule_timeout(__ret))

/**
 * wait_event_idle_timeout - sleep without load until a condition becomes true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_IDLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * Returns:
 * 0 if the @condition evaluated to %false after the @timeout elapsed,
 * 1 if the @condition evaluated to %true after the @timeout elapsed,
 * or the remaining jiffies (at least 1) if the @condition evaluated
 * to %true before the @timeout elapsed.
 */
#define wait_event_idle_timeout(wq_head, condition, timeout)                        \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_idle_timeout(wq_head, condition, timeout);        \
        __ret;                                                                        \
})

#define __wait_event_idle_exclusive_timeout(wq_head, condition, timeout)        \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      TASK_IDLE, 1, timeout,                                        \
                      __ret = schedule_timeout(__ret))

/**
 * wait_event_idle_exclusive_timeout - sleep without load until a condition becomes true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_IDLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
 * set thus if other processes wait on the same list, when this
 * process is woken further processes are not considered.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * Returns:
 * 0 if the @condition evaluated to %false after the @timeout elapsed,
 * 1 if the @condition evaluated to %true after the @timeout elapsed,
 * or the remaining jiffies (at least 1) if the @condition evaluated
 * to %true before the @timeout elapsed.
 */
#define wait_event_idle_exclusive_timeout(wq_head, condition, timeout)                \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_idle_exclusive_timeout(wq_head, condition, timeout);\
        __ret;                                                                        \
})

extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *);
extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);

#define __wait_event_interruptible_locked(wq, condition, exclusive, fn)                \
({                                                                                \
        int __ret;                                                                \
        DEFINE_WAIT(__wait);                                                        \
        if (exclusive)                                                                \
                __wait.flags |= WQ_FLAG_EXCLUSIVE;                                \
        do {                                                                        \
                __ret = fn(&(wq), &__wait);                                        \
                if (__ret)                                                        \
                        break;                                                        \
        } while (!(condition));                                                        \
        __remove_wait_queue(&(wq), &__wait);                                        \
        __set_current_state(TASK_RUNNING);                                        \
        __ret;                                                                        \
})


/**
 * wait_event_interruptible_locked - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * It must be called with wq.lock being held.  This spinlock is
 * unlocked while sleeping but @condition testing is done while lock
 * is held and when this macro exits the lock is held.
 *
 * The lock is locked/unlocked using spin_lock()/spin_unlock()
 * functions which must match the way they are locked/unlocked outside
 * of this macro.
 *
 * wake_up_locked() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_locked(wq, condition)                                \
        ((condition)                                                                \
         ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))

/**
 * wait_event_interruptible_locked_irq - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * It must be called with wq.lock being held.  This spinlock is
 * unlocked while sleeping but @condition testing is done while lock
 * is held and when this macro exits the lock is held.
 *
 * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
 * functions which must match the way they are locked/unlocked outside
 * of this macro.
 *
 * wake_up_locked() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_locked_irq(wq, condition)                        \
        ((condition)                                                                \
         ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))

/**
 * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * It must be called with wq.lock being held.  This spinlock is
 * unlocked while sleeping but @condition testing is done while lock
 * is held and when this macro exits the lock is held.
 *
 * The lock is locked/unlocked using spin_lock()/spin_unlock()
 * functions which must match the way they are locked/unlocked outside
 * of this macro.
 *
 * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
 * set thus when other process waits process on the list if this
 * process is awaken further processes are not considered.
 *
 * wake_up_locked() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_exclusive_locked(wq, condition)                \
        ((condition)                                                                \
         ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))

/**
 * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq is woken up.
 *
 * It must be called with wq.lock being held.  This spinlock is
 * unlocked while sleeping but @condition testing is done while lock
 * is held and when this macro exits the lock is held.
 *
 * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
 * functions which must match the way they are locked/unlocked outside
 * of this macro.
 *
 * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
 * set thus when other process waits process on the list if this
 * process is awaken further processes are not considered.
 *
 * wake_up_locked() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_exclusive_locked_irq(wq, condition)                \
        ((condition)                                                                \
         ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))


#define __wait_event_killable(wq, condition)                                        \
        ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())

/**
 * wait_event_killable - sleep until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_KILLABLE) until the
 * @condition evaluates to true or a signal is received.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a
 * signal and 0 if @condition evaluated to true.
 */
#define wait_event_killable(wq_head, condition)                                        \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_killable(wq_head, condition);                \
        __ret;                                                                        \
})

#define __wait_event_state(wq, condition, state)                                \
        ___wait_event(wq, condition, state, 0, 0, schedule())

/**
 * wait_event_state - sleep until a condition gets true
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @state: state to sleep in
 *
 * The process is put to sleep (@state) until the @condition evaluates to true
 * or a signal is received (when allowed by @state).  The @condition is checked
 * each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * The function will return -ERESTARTSYS if it was interrupted by a signal
 * (when allowed by @state) and 0 if @condition evaluated to true.
 */
#define wait_event_state(wq_head, condition, state)                                \
({                                                                                \
        int __ret = 0;                                                                \
        might_sleep();                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_state(wq_head, condition, state);                \
        __ret;                                                                        \
})

#define __wait_event_killable_timeout(wq_head, condition, timeout)                \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      TASK_KILLABLE, 0, timeout,                                \
                      __ret = schedule_timeout(__ret))

/**
 * wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_KILLABLE) until the
 * @condition evaluates to true or a kill signal is received.
 * The @condition is checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * Returns:
 * 0 if the @condition evaluated to %false after the @timeout elapsed,
 * 1 if the @condition evaluated to %true after the @timeout elapsed,
 * the remaining jiffies (at least 1) if the @condition evaluated
 * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
 * interrupted by a kill signal.
 *
 * Only kill signals interrupt this process.
 */
#define wait_event_killable_timeout(wq_head, condition, timeout)                \
({                                                                                \
        long __ret = timeout;                                                        \
        might_sleep();                                                                \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_killable_timeout(wq_head,                        \
                                                condition, timeout);                \
        __ret;                                                                        \
})


#define __wait_event_lock_irq(wq_head, condition, lock, cmd)                        \
        (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,        \
                            spin_unlock_irq(&lock);                                \
                            cmd;                                                \
                            schedule();                                                \
                            spin_lock_irq(&lock))

/**
 * wait_event_lock_irq_cmd - sleep until a condition gets true. The
 *                             condition is checked under the lock. This
 *                             is expected to be called with the lock
 *                             taken.
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @lock: a locked spinlock_t, which will be released before cmd
 *          and schedule() and reacquired afterwards.
 * @cmd: a command which is invoked outside the critical section before
 *         sleep
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * This is supposed to be called while holding the lock. The lock is
 * dropped before invoking the cmd and going to sleep and is reacquired
 * afterwards.
 */
#define wait_event_lock_irq_cmd(wq_head, condition, lock, cmd)                        \
do {                                                                                \
        if (condition)                                                                \
                break;                                                                \
        __wait_event_lock_irq(wq_head, condition, lock, cmd);                        \
} while (0)

/**
 * wait_event_lock_irq - sleep until a condition gets true. The
 *                         condition is checked under the lock. This
 *                         is expected to be called with the lock
 *                         taken.
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @lock: a locked spinlock_t, which will be released before schedule()
 *          and reacquired afterwards.
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * This is supposed to be called while holding the lock. The lock is
 * dropped before going to sleep and is reacquired afterwards.
 */
#define wait_event_lock_irq(wq_head, condition, lock)                                \
do {                                                                                \
        if (condition)                                                                \
                break;                                                                \
        __wait_event_lock_irq(wq_head, condition, lock, );                        \
} while (0)


#define __wait_event_interruptible_lock_irq(wq_head, condition, lock, cmd)        \
        ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,                \
                      spin_unlock_irq(&lock);                                        \
                      cmd;                                                        \
                      schedule();                                                \
                      spin_lock_irq(&lock))

/**
 * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
 *                The condition is checked under the lock. This is expected to
 *                be called with the lock taken.
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @lock: a locked spinlock_t, which will be released before cmd and
 *          schedule() and reacquired afterwards.
 * @cmd: a command which is invoked outside the critical section before
 *         sleep
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or a signal is received. The @condition is
 * checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * This is supposed to be called while holding the lock. The lock is
 * dropped before invoking the cmd and going to sleep and is reacquired
 * afterwards.
 *
 * The macro will return -ERESTARTSYS if it was interrupted by a signal
 * and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_lock_irq_cmd(wq_head, condition, lock, cmd)        \
({                                                                                \
        int __ret = 0;                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_interruptible_lock_irq(wq_head,                \
                                                condition, lock, cmd);                \
        __ret;                                                                        \
})

/**
 * wait_event_interruptible_lock_irq - sleep until a condition gets true.
 *                The condition is checked under the lock. This is expected
 *                to be called with the lock taken.
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @lock: a locked spinlock_t, which will be released before schedule()
 *          and reacquired afterwards.
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or signal is received. The @condition is
 * checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * This is supposed to be called while holding the lock. The lock is
 * dropped before going to sleep and is reacquired afterwards.
 *
 * The macro will return -ERESTARTSYS if it was interrupted by a signal
 * and 0 if @condition evaluated to true.
 */
#define wait_event_interruptible_lock_irq(wq_head, condition, lock)                \
({                                                                                \
        int __ret = 0;                                                                \
        if (!(condition))                                                        \
                __ret = __wait_event_interruptible_lock_irq(wq_head,                \
                                                condition, lock,);                \
        __ret;                                                                        \
})

#define __wait_event_lock_irq_timeout(wq_head, condition, lock, timeout, state)        \
        ___wait_event(wq_head, ___wait_cond_timeout(condition),                        \
                      state, 0, timeout,                                        \
                      spin_unlock_irq(&lock);                                        \
                      __ret = schedule_timeout(__ret);                                \
                      spin_lock_irq(&lock));

/**
 * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
 *                true or a timeout elapses. The condition is checked under
 *                the lock. This is expected to be called with the lock taken.
 * @wq_head: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 * @lock: a locked spinlock_t, which will be released before schedule()
 *          and reacquired afterwards.
 * @timeout: timeout, in jiffies
 *
 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
 * @condition evaluates to true or signal is received. The @condition is
 * checked each time the waitqueue @wq_head is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 *
 * This is supposed to be called while holding the lock. The lock is
 * dropped before going to sleep and is reacquired afterwards.
 *
 * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
 * was interrupted by a signal, and the remaining jiffies otherwise
 * if the condition evaluated to true before the timeout elapsed.
 */
#define wait_event_interruptible_lock_irq_timeout(wq_head, condition, lock,        \
                                                  timeout)                        \
({                                                                                \
        long __ret = timeout;                                                        \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_lock_irq_timeout(                                \
                                        wq_head, condition, lock, timeout,        \
                                        TASK_INTERRUPTIBLE);                        \
        __ret;                                                                        \
})

#define wait_event_lock_irq_timeout(wq_head, condition, lock, timeout)                \
({                                                                                \
        long __ret = timeout;                                                        \
        if (!___wait_cond_timeout(condition))                                        \
                __ret = __wait_event_lock_irq_timeout(                                \
                                        wq_head, condition, lock, timeout,        \
                                        TASK_UNINTERRUPTIBLE);                        \
        __ret;                                                                        \
})

/*
 * Waitqueues which are removed from the waitqueue_head at wakeup time
 */
void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);

#define DEFINE_WAIT_FUNC(name, function)                                        \
        struct wait_queue_entry name = {                                        \
                .private        = current,                                        \
                .func                = function,                                        \
                .entry                = LIST_HEAD_INIT((name).entry),                        \
        }

#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)

#define init_wait(wait)                                                                \
        do {                                                                        \
                (wait)->private = current;                                        \
                (wait)->func = autoremove_wake_function;                        \
                INIT_LIST_HEAD(&(wait)->entry);                                        \
                (wait)->flags = 0;                                                \
        } while (0)

typedef int (*task_call_f)(struct task_struct *p, void *arg);
extern int task_call_func(struct task_struct *p, task_call_f func, void *arg);

#endif /* _LINUX_WAIT_H */



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


    4 










    4 


    4 
    4 





    4 
    4 










































    4 


    4 
    4 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Timers abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/delay.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/mutex.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/sched/signal.h>
#include <sound/core.h>
#include <sound/timer.h>
#include <sound/control.h>
#include <sound/info.h>
#include <sound/minors.h>
#include <sound/initval.h>
#include <linux/kmod.h>

/* internal flags */
#define SNDRV_TIMER_IFLG_PAUSED                0x00010000
#define SNDRV_TIMER_IFLG_DEAD                0x00020000

#if IS_ENABLED(CONFIG_SND_HRTIMER)
#define DEFAULT_TIMER_LIMIT 4
#else
#define DEFAULT_TIMER_LIMIT 1
#endif

static int timer_limit = DEFAULT_TIMER_LIMIT;
static int timer_tstamp_monotonic = 1;
MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Takashi Iwai <tiwai@suse.de>");
MODULE_DESCRIPTION("ALSA timer interface");
MODULE_LICENSE("GPL");
module_param(timer_limit, int, 0444);
MODULE_PARM_DESC(timer_limit, "Maximum global timers in system.");
module_param(timer_tstamp_monotonic, int, 0444);
MODULE_PARM_DESC(timer_tstamp_monotonic, "Use posix monotonic clock source for timestamps (default).");

MODULE_ALIAS_CHARDEV(CONFIG_SND_MAJOR, SNDRV_MINOR_TIMER);
MODULE_ALIAS("devname:snd/timer");

enum timer_tread_format {
        TREAD_FORMAT_NONE = 0,
        TREAD_FORMAT_TIME64,
        TREAD_FORMAT_TIME32,
};

struct snd_timer_tread32 {
        int event;
        s32 tstamp_sec;
        s32 tstamp_nsec;
        unsigned int val;
};

struct snd_timer_tread64 {
        int event;
        u8 pad1[4];
        s64 tstamp_sec;
        s64 tstamp_nsec;
        unsigned int val;
        u8 pad2[4];
};

struct snd_timer_user {
        struct snd_timer_instance *timeri;
        int tread;                /* enhanced read with timestamps and events */
        unsigned long ticks;
        unsigned long overrun;
        int qhead;
        int qtail;
        int qused;
        int queue_size;
        bool disconnected;
        struct snd_timer_read *queue;
        struct snd_timer_tread64 *tqueue;
        spinlock_t qlock;
        unsigned long last_resolution;
        unsigned int filter;
        struct timespec64 tstamp;                /* trigger tstamp */
        wait_queue_head_t qchange_sleep;
        struct snd_fasync *fasync;
        struct mutex ioctl_lock;
};

struct snd_timer_status32 {
        s32 tstamp_sec;                        /* Timestamp - last update */
        s32 tstamp_nsec;
        unsigned int resolution;        /* current period resolution in ns */
        unsigned int lost;                /* counter of master tick lost */
        unsigned int overrun;                /* count of read queue overruns */
        unsigned int queue;                /* used queue size */
        unsigned char reserved[64];        /* reserved */
};

#define SNDRV_TIMER_IOCTL_STATUS32        _IOR('T', 0x14, struct snd_timer_status32)

struct snd_timer_status64 {
        s64 tstamp_sec;                        /* Timestamp - last update */
        s64 tstamp_nsec;
        unsigned int resolution;        /* current period resolution in ns */
        unsigned int lost;                /* counter of master tick lost */
        unsigned int overrun;                /* count of read queue overruns */
        unsigned int queue;                /* used queue size */
        unsigned char reserved[64];        /* reserved */
};

#define SNDRV_TIMER_IOCTL_STATUS64        _IOR('T', 0x14, struct snd_timer_status64)

/* list of timers */
static LIST_HEAD(snd_timer_list);

/* list of slave instances */
static LIST_HEAD(snd_timer_slave_list);

/* lock for slave active lists */
static DEFINE_SPINLOCK(slave_active_lock);

#define MAX_SLAVE_INSTANCES        1000
static int num_slaves;

static DEFINE_MUTEX(register_mutex);

static int snd_timer_free(struct snd_timer *timer);
static int snd_timer_dev_free(struct snd_device *device);
static int snd_timer_dev_register(struct snd_device *device);
static int snd_timer_dev_disconnect(struct snd_device *device);

static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left);

/*
 * create a timer instance with the given owner string.
 */
struct snd_timer_instance *snd_timer_instance_new(const char *owner)
{
        struct snd_timer_instance *timeri;

        timeri = kzalloc(sizeof(*timeri), GFP_KERNEL);
        if (timeri == NULL)
                return NULL;
        timeri->owner = kstrdup(owner, GFP_KERNEL);
        if (! timeri->owner) {
                kfree(timeri);
                return NULL;
        }
        INIT_LIST_HEAD(&timeri->open_list);
        INIT_LIST_HEAD(&timeri->active_list);
        INIT_LIST_HEAD(&timeri->ack_list);
        INIT_LIST_HEAD(&timeri->slave_list_head);
        INIT_LIST_HEAD(&timeri->slave_active_head);

        return timeri;
}
EXPORT_SYMBOL(snd_timer_instance_new);

void snd_timer_instance_free(struct snd_timer_instance *timeri)
{
        if (timeri) {
                if (timeri->private_free)
                        timeri->private_free(timeri);
                kfree(timeri->owner);
                kfree(timeri);
        }
}
EXPORT_SYMBOL(snd_timer_instance_free);

/*
 * find a timer instance from the given timer id
 */
static struct snd_timer *snd_timer_find(struct snd_timer_id *tid)
{
        struct snd_timer *timer;

        list_for_each_entry(timer, &snd_timer_list, device_list) {
                if (timer->tmr_class != tid->dev_class)
                        continue;
                if ((timer->tmr_class == SNDRV_TIMER_CLASS_CARD ||
                     timer->tmr_class == SNDRV_TIMER_CLASS_PCM) &&
                    (timer->card == NULL ||
                     timer->card->number != tid->card))
                        continue;
                if (timer->tmr_device != tid->device)
                        continue;
                if (timer->tmr_subdevice != tid->subdevice)
                        continue;
                return timer;
        }
        return NULL;
}

#ifdef CONFIG_MODULES

static void snd_timer_request(struct snd_timer_id *tid)
{
        switch (tid->dev_class) {
        case SNDRV_TIMER_CLASS_GLOBAL:
                if (tid->device < timer_limit)
                        request_module("snd-timer-%i", tid->device);
                break;
        case SNDRV_TIMER_CLASS_CARD:
        case SNDRV_TIMER_CLASS_PCM:
                if (tid->card < snd_ecards_limit)
                        request_module("snd-card-%i", tid->card);
                break;
        default:
                break;
        }
}

#endif

/* move the slave if it belongs to the master; return 1 if match */
static int check_matching_master_slave(struct snd_timer_instance *master,
                                       struct snd_timer_instance *slave)
{
        if (slave->slave_class != master->slave_class ||
            slave->slave_id != master->slave_id)
                return 0;
        if (master->timer->num_instances >= master->timer->max_instances)
                return -EBUSY;
        list_move_tail(&slave->open_list, &master->slave_list_head);
        master->timer->num_instances++;
        guard(spinlock_irq)(&slave_active_lock);
        guard(spinlock)(&master->timer->lock);
        slave->master = master;
        slave->timer = master->timer;
        if (slave->flags & SNDRV_TIMER_IFLG_RUNNING)
                list_add_tail(&slave->active_list, &master->slave_active_head);
        return 1;
}

/*
 * look for a master instance matching with the slave id of the given slave.
 * when found, relink the open_link of the slave.
 *
 * call this with register_mutex down.
 */
static int snd_timer_check_slave(struct snd_timer_instance *slave)
{
        struct snd_timer *timer;
        struct snd_timer_instance *master;
        int err = 0;

        /* FIXME: it's really dumb to look up all entries.. */
        list_for_each_entry(timer, &snd_timer_list, device_list) {
                list_for_each_entry(master, &timer->open_list_head, open_list) {
                        err = check_matching_master_slave(master, slave);
                        if (err != 0) /* match found or error */
                                goto out;
                }
        }
 out:
        return err < 0 ? err : 0;
}

/*
 * look for slave instances matching with the slave id of the given master.
 * when found, relink the open_link of slaves.
 *
 * call this with register_mutex down.
 */
static int snd_timer_check_master(struct snd_timer_instance *master)
{
        struct snd_timer_instance *slave, *tmp;
        int err = 0;

        /* check all pending slaves */
        list_for_each_entry_safe(slave, tmp, &snd_timer_slave_list, open_list) {
                err = check_matching_master_slave(master, slave);
                if (err < 0)
                        break;
        }
        return err < 0 ? err : 0;
}

static void snd_timer_close_locked(struct snd_timer_instance *timeri,
                                   struct device **card_devp_to_put);

/*
 * open a timer instance
 * when opening a master, the slave id must be here given.
 */
int snd_timer_open(struct snd_timer_instance *timeri,
                   struct snd_timer_id *tid,
                   unsigned int slave_id)
{
        struct snd_timer *timer;
        struct device *card_dev_to_put = NULL;
        int err;

        mutex_lock(&register_mutex);
        if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) {
                /* open a slave instance */
                if (tid->dev_sclass <= SNDRV_TIMER_SCLASS_NONE ||
                    tid->dev_sclass > SNDRV_TIMER_SCLASS_OSS_SEQUENCER) {
                        pr_debug("ALSA: timer: invalid slave class %i\n",
                                 tid->dev_sclass);
                        err = -EINVAL;
                        goto unlock;
                }
                if (num_slaves >= MAX_SLAVE_INSTANCES) {
                        err = -EBUSY;
                        goto unlock;
                }
                timeri->slave_class = tid->dev_sclass;
                timeri->slave_id = tid->device;
                timeri->flags |= SNDRV_TIMER_IFLG_SLAVE;
                list_add_tail(&timeri->open_list, &snd_timer_slave_list);
                num_slaves++;
                err = snd_timer_check_slave(timeri);
                goto list_added;
        }

        /* open a master instance */
        timer = snd_timer_find(tid);
#ifdef CONFIG_MODULES
        if (!timer) {
                mutex_unlock(&register_mutex);
                snd_timer_request(tid);
                mutex_lock(&register_mutex);
                timer = snd_timer_find(tid);
        }
#endif
        if (!timer) {
                err = -ENODEV;
                goto unlock;
        }
        if (!list_empty(&timer->open_list_head)) {
                struct snd_timer_instance *t =
                        list_entry(timer->open_list_head.next,
                                    struct snd_timer_instance, open_list);
                if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) {
                        err = -EBUSY;
                        goto unlock;
                }
        }
        if (timer->num_instances >= timer->max_instances) {
                err = -EBUSY;
                goto unlock;
        }
        if (!try_module_get(timer->module)) {
                err = -EBUSY;
                goto unlock;
        }
        /* take a card refcount for safe disconnection */
        if (timer->card) {
                get_device(&timer->card->card_dev);
                card_dev_to_put = &timer->card->card_dev;
        }

        if (list_empty(&timer->open_list_head) && timer->hw.open) {
                err = timer->hw.open(timer);
                if (err) {
                        module_put(timer->module);
                        goto unlock;
                }
        }

        timeri->timer = timer;
        timeri->slave_class = tid->dev_sclass;
        timeri->slave_id = slave_id;

        list_add_tail(&timeri->open_list, &timer->open_list_head);
        timer->num_instances++;
        err = snd_timer_check_master(timeri);
list_added:
        if (err < 0)
                snd_timer_close_locked(timeri, &card_dev_to_put);

 unlock:
        mutex_unlock(&register_mutex);
        /* put_device() is called after unlock for avoiding deadlock */
        if (err < 0 && card_dev_to_put)
                put_device(card_dev_to_put);
        return err;
}
EXPORT_SYMBOL(snd_timer_open);

/* remove slave links, called from snd_timer_close_locked() below */
static void remove_slave_links(struct snd_timer_instance *timeri,
                               struct snd_timer *timer)
{
        struct snd_timer_instance *slave, *tmp;

        guard(spinlock_irq)(&slave_active_lock);
        guard(spinlock)(&timer->lock);
        timeri->timer = NULL;
        list_for_each_entry_safe(slave, tmp, &timeri->slave_list_head, open_list) {
                list_move_tail(&slave->open_list, &snd_timer_slave_list);
                timer->num_instances--;
                slave->master = NULL;
                slave->timer = NULL;
                list_del_init(&slave->ack_list);
                list_del_init(&slave->active_list);
        }
}

/*
 * close a timer instance
 * call this with register_mutex down.
 */
static void snd_timer_close_locked(struct snd_timer_instance *timeri,
                                   struct device **card_devp_to_put)
{
        struct snd_timer *timer = timeri->timer;

        if (timer) {
                guard(spinlock_irq)(&timer->lock);
                timeri->flags |= SNDRV_TIMER_IFLG_DEAD;
        }

        if (!list_empty(&timeri->open_list)) {
                list_del_init(&timeri->open_list);
                if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                        num_slaves--;
        }

        /* force to stop the timer */
        snd_timer_stop(timeri);

        if (timer) {
                timer->num_instances--;
                /* wait, until the active callback is finished */
                spin_lock_irq(&timer->lock);
                while (timeri->flags & SNDRV_TIMER_IFLG_CALLBACK) {
                        spin_unlock_irq(&timer->lock);
                        udelay(10);
                        spin_lock_irq(&timer->lock);
                }
                spin_unlock_irq(&timer->lock);

                remove_slave_links(timeri, timer);

                /* slave doesn't need to release timer resources below */
                if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                        timer = NULL;
        }

        if (timer) {
                if (list_empty(&timer->open_list_head) && timer->hw.close)
                        timer->hw.close(timer);
                /* release a card refcount for safe disconnection */
                if (timer->card)
                        *card_devp_to_put = &timer->card->card_dev;
                module_put(timer->module);
        }
}

/*
 * close a timer instance
 */
void snd_timer_close(struct snd_timer_instance *timeri)
{
        struct device *card_dev_to_put = NULL;

        if (snd_BUG_ON(!timeri))
                return;

        scoped_guard(mutex, &register_mutex)
                snd_timer_close_locked(timeri, &card_dev_to_put);
        /* put_device() is called after unlock for avoiding deadlock */
        if (card_dev_to_put)
                put_device(card_dev_to_put);
}
EXPORT_SYMBOL(snd_timer_close);

static unsigned long snd_timer_hw_resolution(struct snd_timer *timer)
{
        if (timer->hw.c_resolution)
                return timer->hw.c_resolution(timer);
        else
                return timer->hw.resolution;
}

unsigned long snd_timer_resolution(struct snd_timer_instance *timeri)
{
        struct snd_timer * timer;
        unsigned long ret = 0;

        if (timeri == NULL)
                return 0;
        timer = timeri->timer;
        if (timer) {
                guard(spinlock_irqsave)(&timer->lock);
                ret = snd_timer_hw_resolution(timer);
        }
        return ret;
}
EXPORT_SYMBOL(snd_timer_resolution);

static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
{
        struct snd_timer *timer = ti->timer;
        unsigned long resolution = 0;
        struct snd_timer_instance *ts;
        struct timespec64 tstamp;

        if (timer_tstamp_monotonic)
                ktime_get_ts64(&tstamp);
        else
                ktime_get_real_ts64(&tstamp);
        if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START ||
                       event > SNDRV_TIMER_EVENT_PAUSE))
                return;
        if (timer &&
            (event == SNDRV_TIMER_EVENT_START ||
             event == SNDRV_TIMER_EVENT_CONTINUE))
                resolution = snd_timer_hw_resolution(timer);
        if (ti->ccallback)
                ti->ccallback(ti, event, &tstamp, resolution);
        if (ti->flags & SNDRV_TIMER_IFLG_SLAVE)
                return;
        if (timer == NULL)
                return;
        if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)
                return;
        event += 10; /* convert to SNDRV_TIMER_EVENT_MXXX */
        list_for_each_entry(ts, &ti->slave_active_head, active_list)
                if (ts->ccallback)
                        ts->ccallback(ts, event, &tstamp, resolution);
}

/* start/continue a master timer */
static int snd_timer_start1(struct snd_timer_instance *timeri,
                            bool start, unsigned long ticks)
{
        struct snd_timer *timer;
        int result;

        timer = timeri->timer;
        if (!timer)
                return -EINVAL;

        guard(spinlock_irqsave)(&timer->lock);
        if (timeri->flags & SNDRV_TIMER_IFLG_DEAD)
                return -EINVAL;
        if (timer->card && timer->card->shutdown)
                return -ENODEV;
        if (timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
                             SNDRV_TIMER_IFLG_START))
                return -EBUSY;

        if (start)
                timeri->ticks = timeri->cticks = ticks;
        else if (!timeri->cticks)
                timeri->cticks = 1;
        timeri->pticks = 0;

        list_move_tail(&timeri->active_list, &timer->active_list_head);
        if (timer->running) {
                if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)
                        goto __start_now;
                timer->flags |= SNDRV_TIMER_FLG_RESCHED;
                timeri->flags |= SNDRV_TIMER_IFLG_START;
                result = 1; /* delayed start */
        } else {
                if (start)
                        timer->sticks = ticks;
                timer->hw.start(timer);
              __start_now:
                timer->running++;
                timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
                result = 0;
        }
        snd_timer_notify1(timeri, start ? SNDRV_TIMER_EVENT_START :
                          SNDRV_TIMER_EVENT_CONTINUE);
        return result;
}

/* start/continue a slave timer */
static int snd_timer_start_slave(struct snd_timer_instance *timeri,
                                 bool start)
{
        guard(spinlock_irqsave)(&slave_active_lock);
        if (timeri->flags & SNDRV_TIMER_IFLG_DEAD)
                return -EINVAL;
        if (timeri->flags & SNDRV_TIMER_IFLG_RUNNING)
                return -EBUSY;
        timeri->flags |= SNDRV_TIMER_IFLG_RUNNING;
        if (timeri->master && timeri->timer) {
                guard(spinlock)(&timeri->timer->lock);
                list_add_tail(&timeri->active_list,
                              &timeri->master->slave_active_head);
                snd_timer_notify1(timeri, start ? SNDRV_TIMER_EVENT_START :
                                  SNDRV_TIMER_EVENT_CONTINUE);
        }
        return 1; /* delayed start */
}

/* stop/pause a master timer */
static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
{
        struct snd_timer *timer;

        timer = timeri->timer;
        if (!timer)
                return -EINVAL;
        guard(spinlock_irqsave)(&timer->lock);
        list_del_init(&timeri->ack_list);
        list_del_init(&timeri->active_list);
        if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
                               SNDRV_TIMER_IFLG_START)))
                return -EBUSY;
        if (timer->card && timer->card->shutdown)
                return 0;
        if (stop) {
                timeri->cticks = timeri->ticks;
                timeri->pticks = 0;
        }
        if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) &&
            !(--timer->running)) {
                timer->hw.stop(timer);
                if (timer->flags & SNDRV_TIMER_FLG_RESCHED) {
                        timer->flags &= ~SNDRV_TIMER_FLG_RESCHED;
                        snd_timer_reschedule(timer, 0);
                        if (timer->flags & SNDRV_TIMER_FLG_CHANGE) {
                                timer->flags &= ~SNDRV_TIMER_FLG_CHANGE;
                                timer->hw.start(timer);
                        }
                }
        }
        timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START);
        if (stop)
                timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED;
        else
                timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
        snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
                          SNDRV_TIMER_EVENT_PAUSE);
        return 0;
}

/* stop/pause a slave timer */
static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
{
        bool running;

        guard(spinlock_irqsave)(&slave_active_lock);
        running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING;
        timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
        if (timeri->timer) {
                guard(spinlock)(&timeri->timer->lock);
                list_del_init(&timeri->ack_list);
                list_del_init(&timeri->active_list);
                if (running)
                        snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
                                          SNDRV_TIMER_EVENT_PAUSE);
        }
        return running ? 0 : -EBUSY;
}

/*
 *  start the timer instance
 */
int snd_timer_start(struct snd_timer_instance *timeri, unsigned int ticks)
{
        if (timeri == NULL || ticks < 1)
                return -EINVAL;
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                return snd_timer_start_slave(timeri, true);
        else
                return snd_timer_start1(timeri, true, ticks);
}
EXPORT_SYMBOL(snd_timer_start);

/*
 * stop the timer instance.
 *
 * do not call this from the timer callback!
 */
int snd_timer_stop(struct snd_timer_instance *timeri)
{
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                return snd_timer_stop_slave(timeri, true);
        else
                return snd_timer_stop1(timeri, true);
}
EXPORT_SYMBOL(snd_timer_stop);

/*
 * start again..  the tick is kept.
 */
int snd_timer_continue(struct snd_timer_instance *timeri)
{
        /* timer can continue only after pause */
        if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
                return -EINVAL;

        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                return snd_timer_start_slave(timeri, false);
        else
                return snd_timer_start1(timeri, false, 0);
}
EXPORT_SYMBOL(snd_timer_continue);

/*
 * pause.. remember the ticks left
 */
int snd_timer_pause(struct snd_timer_instance * timeri)
{
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                return snd_timer_stop_slave(timeri, false);
        else
                return snd_timer_stop1(timeri, false);
}
EXPORT_SYMBOL(snd_timer_pause);

/*
 * reschedule the timer
 *
 * start pending instances and check the scheduling ticks.
 * when the scheduling ticks is changed set CHANGE flag to reprogram the timer.
 */
static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left)
{
        struct snd_timer_instance *ti;
        unsigned long ticks = ~0UL;

        list_for_each_entry(ti, &timer->active_list_head, active_list) {
                if (ti->flags & SNDRV_TIMER_IFLG_START) {
                        ti->flags &= ~SNDRV_TIMER_IFLG_START;
                        ti->flags |= SNDRV_TIMER_IFLG_RUNNING;
                        timer->running++;
                }
                if (ti->flags & SNDRV_TIMER_IFLG_RUNNING) {
                        if (ticks > ti->cticks)
                                ticks = ti->cticks;
                }
        }
        if (ticks == ~0UL) {
                timer->flags &= ~SNDRV_TIMER_FLG_RESCHED;
                return;
        }
        if (ticks > timer->hw.ticks)
                ticks = timer->hw.ticks;
        if (ticks_left != ticks)
                timer->flags |= SNDRV_TIMER_FLG_CHANGE;
        timer->sticks = ticks;
}

/* call callbacks in timer ack list */
static void snd_timer_process_callbacks(struct snd_timer *timer,
                                        struct list_head *head)
{
        struct snd_timer_instance *ti;
        unsigned long resolution, ticks;

        while (!list_empty(head)) {
                ti = list_first_entry(head, struct snd_timer_instance,
                                      ack_list);

                /* remove from ack_list and make empty */
                list_del_init(&ti->ack_list);

                if (!(ti->flags & SNDRV_TIMER_IFLG_DEAD)) {
                        ticks = ti->pticks;
                        ti->pticks = 0;
                        resolution = ti->resolution;
                        ti->flags |= SNDRV_TIMER_IFLG_CALLBACK;
                        spin_unlock(&timer->lock);
                        if (ti->callback)
                                ti->callback(ti, resolution, ticks);
                        spin_lock(&timer->lock);
                        ti->flags &= ~SNDRV_TIMER_IFLG_CALLBACK;
                }
        }
}

/* clear pending instances from ack list */
static void snd_timer_clear_callbacks(struct snd_timer *timer,
                                      struct list_head *head)
{
        guard(spinlock_irqsave)(&timer->lock);
        while (!list_empty(head))
                list_del_init(head->next);
}

/*
 * timer work
 *
 */
static void snd_timer_work(struct work_struct *work)
{
        struct snd_timer *timer = container_of(work, struct snd_timer, task_work);

        if (timer->card && timer->card->shutdown) {
                snd_timer_clear_callbacks(timer, &timer->sack_list_head);
                return;
        }

        guard(spinlock_irqsave)(&timer->lock);
        snd_timer_process_callbacks(timer, &timer->sack_list_head);
}

/*
 * timer interrupt
 *
 * ticks_left is usually equal to timer->sticks.
 *
 */
void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left)
{
        struct snd_timer_instance *ti, *ts, *tmp;
        unsigned long resolution;
        struct list_head *ack_list_head;

        if (timer == NULL)
                return;

        if (timer->card && timer->card->shutdown) {
                snd_timer_clear_callbacks(timer, &timer->ack_list_head);
                return;
        }

        guard(spinlock_irqsave)(&timer->lock);

        /* remember the current resolution */
        resolution = snd_timer_hw_resolution(timer);

        /* loop for all active instances
         * Here we cannot use list_for_each_entry because the active_list of a
         * processed instance is relinked to done_list_head before the callback
         * is called.
         */
        list_for_each_entry_safe(ti, tmp, &timer->active_list_head,
                                 active_list) {
                if (ti->flags & SNDRV_TIMER_IFLG_DEAD)
                        continue;
                if (!(ti->flags & SNDRV_TIMER_IFLG_RUNNING))
                        continue;
                ti->pticks += ticks_left;
                ti->resolution = resolution;
                if (ti->cticks < ticks_left)
                        ti->cticks = 0;
                else
                        ti->cticks -= ticks_left;
                if (ti->cticks) /* not expired */
                        continue;
                if (ti->flags & SNDRV_TIMER_IFLG_AUTO) {
                        ti->cticks = ti->ticks;
                } else {
                        ti->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
                        --timer->running;
                        list_del_init(&ti->active_list);
                }
                if ((timer->hw.flags & SNDRV_TIMER_HW_WORK) ||
                    (ti->flags & SNDRV_TIMER_IFLG_FAST))
                        ack_list_head = &timer->ack_list_head;
                else
                        ack_list_head = &timer->sack_list_head;
                if (list_empty(&ti->ack_list))
                        list_add_tail(&ti->ack_list, ack_list_head);
                list_for_each_entry(ts, &ti->slave_active_head, active_list) {
                        ts->pticks = ti->pticks;
                        ts->resolution = resolution;
                        if (list_empty(&ts->ack_list))
                                list_add_tail(&ts->ack_list, ack_list_head);
                }
        }
        if (timer->flags & SNDRV_TIMER_FLG_RESCHED)
                snd_timer_reschedule(timer, timer->sticks);
        if (timer->running) {
                if (timer->hw.flags & SNDRV_TIMER_HW_STOP) {
                        timer->hw.stop(timer);
                        timer->flags |= SNDRV_TIMER_FLG_CHANGE;
                }
                if (!(timer->hw.flags & SNDRV_TIMER_HW_AUTO) ||
                    (timer->flags & SNDRV_TIMER_FLG_CHANGE)) {
                        /* restart timer */
                        timer->flags &= ~SNDRV_TIMER_FLG_CHANGE;
                        timer->hw.start(timer);
                }
        } else {
                timer->hw.stop(timer);
        }

        /* now process all fast callbacks */
        snd_timer_process_callbacks(timer, &timer->ack_list_head);

        /* do we have any slow callbacks? */
        if (!list_empty(&timer->sack_list_head))
                queue_work(system_highpri_wq, &timer->task_work);
}
EXPORT_SYMBOL(snd_timer_interrupt);

/*

 */

int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid,
                  struct snd_timer **rtimer)
{
        struct snd_timer *timer;
        int err;
        static const struct snd_device_ops ops = {
                .dev_free = snd_timer_dev_free,
                .dev_register = snd_timer_dev_register,
                .dev_disconnect = snd_timer_dev_disconnect,
        };

        if (snd_BUG_ON(!tid))
                return -EINVAL;
        if (tid->dev_class == SNDRV_TIMER_CLASS_CARD ||
            tid->dev_class == SNDRV_TIMER_CLASS_PCM) {
                if (WARN_ON(!card))
                        return -EINVAL;
        }
        if (rtimer)
                *rtimer = NULL;
        timer = kzalloc(sizeof(*timer), GFP_KERNEL);
        if (!timer)
                return -ENOMEM;
        timer->tmr_class = tid->dev_class;
        timer->card = card;
        timer->tmr_device = tid->device;
        timer->tmr_subdevice = tid->subdevice;
        if (id)
                strscpy(timer->id, id, sizeof(timer->id));
        timer->sticks = 1;
        INIT_LIST_HEAD(&timer->device_list);
        INIT_LIST_HEAD(&timer->open_list_head);
        INIT_LIST_HEAD(&timer->active_list_head);
        INIT_LIST_HEAD(&timer->ack_list_head);
        INIT_LIST_HEAD(&timer->sack_list_head);
        spin_lock_init(&timer->lock);
        INIT_WORK(&timer->task_work, snd_timer_work);
        timer->max_instances = 1000; /* default limit per timer */
        if (card != NULL) {
                timer->module = card->module;
                err = snd_device_new(card, SNDRV_DEV_TIMER, timer, &ops);
                if (err < 0) {
                        snd_timer_free(timer);
                        return err;
                }
        }
        if (rtimer)
                *rtimer = timer;
        return 0;
}
EXPORT_SYMBOL(snd_timer_new);

static int snd_timer_free(struct snd_timer *timer)
{
        if (!timer)
                return 0;

        guard(mutex)(&register_mutex);
        if (! list_empty(&timer->open_list_head)) {
                struct list_head *p, *n;
                struct snd_timer_instance *ti;
                pr_warn("ALSA: timer %p is busy?\n", timer);
                list_for_each_safe(p, n, &timer->open_list_head) {
                        list_del_init(p);
                        ti = list_entry(p, struct snd_timer_instance, open_list);
                        ti->timer = NULL;
                }
        }
        list_del(&timer->device_list);

        if (timer->private_free)
                timer->private_free(timer);
        kfree(timer);
        return 0;
}

static int snd_timer_dev_free(struct snd_device *device)
{
        struct snd_timer *timer = device->device_data;
        return snd_timer_free(timer);
}

static int snd_timer_dev_register(struct snd_device *dev)
{
        struct snd_timer *timer = dev->device_data;
        struct snd_timer *timer1;

        if (snd_BUG_ON(!timer || !timer->hw.start || !timer->hw.stop))
                return -ENXIO;
        if (!(timer->hw.flags & SNDRV_TIMER_HW_SLAVE) &&
            !timer->hw.resolution && timer->hw.c_resolution == NULL)
                    return -EINVAL;

        guard(mutex)(&register_mutex);
        list_for_each_entry(timer1, &snd_timer_list, device_list) {
                if (timer1->tmr_class > timer->tmr_class)
                        break;
                if (timer1->tmr_class < timer->tmr_class)
                        continue;
                if (timer1->card && timer->card) {
                        if (timer1->card->number > timer->card->number)
                                break;
                        if (timer1->card->number < timer->card->number)
                                continue;
                }
                if (timer1->tmr_device > timer->tmr_device)
                        break;
                if (timer1->tmr_device < timer->tmr_device)
                        continue;
                if (timer1->tmr_subdevice > timer->tmr_subdevice)
                        break;
                if (timer1->tmr_subdevice < timer->tmr_subdevice)
                        continue;
                /* conflicts.. */
                return -EBUSY;
        }
        list_add_tail(&timer->device_list, &timer1->device_list);
        return 0;
}

static int snd_timer_dev_disconnect(struct snd_device *device)
{
        struct snd_timer *timer = device->device_data;
        struct snd_timer_instance *ti;

        guard(mutex)(&register_mutex);
        list_del_init(&timer->device_list);
        /* wake up pending sleepers */
        list_for_each_entry(ti, &timer->open_list_head, open_list) {
                if (ti->disconnect)
                        ti->disconnect(ti);
        }
        return 0;
}

void snd_timer_notify(struct snd_timer *timer, int event, struct timespec64 *tstamp)
{
        unsigned long resolution = 0;
        struct snd_timer_instance *ti, *ts;

        if (timer->card && timer->card->shutdown)
                return;
        if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE))
                return;
        if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART ||
                       event > SNDRV_TIMER_EVENT_MRESUME))
                return;
        guard(spinlock_irqsave)(&timer->lock);
        if (event == SNDRV_TIMER_EVENT_MSTART ||
            event == SNDRV_TIMER_EVENT_MCONTINUE ||
            event == SNDRV_TIMER_EVENT_MRESUME)
                resolution = snd_timer_hw_resolution(timer);
        list_for_each_entry(ti, &timer->active_list_head, active_list) {
                if (ti->ccallback)
                        ti->ccallback(ti, event, tstamp, resolution);
                list_for_each_entry(ts, &ti->slave_active_head, active_list)
                        if (ts->ccallback)
                                ts->ccallback(ts, event, tstamp, resolution);
        }
}
EXPORT_SYMBOL(snd_timer_notify);

/*
 * exported functions for global timers
 */
int snd_timer_global_new(char *id, int device, struct snd_timer **rtimer)
{
        struct snd_timer_id tid;

        tid.dev_class = SNDRV_TIMER_CLASS_GLOBAL;
        tid.dev_sclass = SNDRV_TIMER_SCLASS_NONE;
        tid.card = -1;
        tid.device = device;
        tid.subdevice = 0;
        return snd_timer_new(NULL, id, &tid, rtimer);
}
EXPORT_SYMBOL(snd_timer_global_new);

int snd_timer_global_free(struct snd_timer *timer)
{
        return snd_timer_free(timer);
}
EXPORT_SYMBOL(snd_timer_global_free);

int snd_timer_global_register(struct snd_timer *timer)
{
        struct snd_device dev;

        memset(&dev, 0, sizeof(dev));
        dev.device_data = timer;
        return snd_timer_dev_register(&dev);
}
EXPORT_SYMBOL(snd_timer_global_register);

/*
 *  System timer
 */

struct snd_timer_system_private {
        struct timer_list tlist;
        struct snd_timer *snd_timer;
        unsigned long last_expires;
        unsigned long last_jiffies;
        unsigned long correction;
};

static void snd_timer_s_function(struct timer_list *t)
{
        struct snd_timer_system_private *priv = from_timer(priv, t,
                                                                tlist);
        struct snd_timer *timer = priv->snd_timer;
        unsigned long jiff = jiffies;
        if (time_after(jiff, priv->last_expires))
                priv->correction += (long)jiff - (long)priv->last_expires;
        snd_timer_interrupt(timer, (long)jiff - (long)priv->last_jiffies);
}

static int snd_timer_s_start(struct snd_timer * timer)
{
        struct snd_timer_system_private *priv;
        unsigned long njiff;

        priv = (struct snd_timer_system_private *) timer->private_data;
        njiff = (priv->last_jiffies = jiffies);
        if (priv->correction > timer->sticks - 1) {
                priv->correction -= timer->sticks - 1;
                njiff++;
        } else {
                njiff += timer->sticks - priv->correction;
                priv->correction = 0;
        }
        priv->last_expires = njiff;
        mod_timer(&priv->tlist, njiff);
        return 0;
}

static int snd_timer_s_stop(struct snd_timer * timer)
{
        struct snd_timer_system_private *priv;
        unsigned long jiff;

        priv = (struct snd_timer_system_private *) timer->private_data;
        del_timer(&priv->tlist);
        jiff = jiffies;
        if (time_before(jiff, priv->last_expires))
                timer->sticks = priv->last_expires - jiff;
        else
                timer->sticks = 1;
        priv->correction = 0;
        return 0;
}

static int snd_timer_s_close(struct snd_timer *timer)
{
        struct snd_timer_system_private *priv;

        priv = (struct snd_timer_system_private *)timer->private_data;
        del_timer_sync(&priv->tlist);
        return 0;
}

static const struct snd_timer_hardware snd_timer_system =
{
        .flags =        SNDRV_TIMER_HW_FIRST | SNDRV_TIMER_HW_WORK,
        .resolution =        1000000000L / HZ,
        .ticks =        10000000L,
        .close =        snd_timer_s_close,
        .start =        snd_timer_s_start,
        .stop =                snd_timer_s_stop
};

static void snd_timer_free_system(struct snd_timer *timer)
{
        kfree(timer->private_data);
}

static int snd_timer_register_system(void)
{
        struct snd_timer *timer;
        struct snd_timer_system_private *priv;
        int err;

        err = snd_timer_global_new("system", SNDRV_TIMER_GLOBAL_SYSTEM, &timer);
        if (err < 0)
                return err;
        strcpy(timer->name, "system timer");
        timer->hw = snd_timer_system;
        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (priv == NULL) {
                snd_timer_free(timer);
                return -ENOMEM;
        }
        priv->snd_timer = timer;
        timer_setup(&priv->tlist, snd_timer_s_function, 0);
        timer->private_data = priv;
        timer->private_free = snd_timer_free_system;
        return snd_timer_global_register(timer);
}

#ifdef CONFIG_SND_PROC_FS
/*
 *  Info interface
 */

static void snd_timer_proc_read(struct snd_info_entry *entry,
                                struct snd_info_buffer *buffer)
{
        struct snd_timer *timer;
        struct snd_timer_instance *ti;
        unsigned long resolution;

        guard(mutex)(&register_mutex);
        list_for_each_entry(timer, &snd_timer_list, device_list) {
                if (timer->card && timer->card->shutdown)
                        continue;
                switch (timer->tmr_class) {
                case SNDRV_TIMER_CLASS_GLOBAL:
                        snd_iprintf(buffer, "G%i: ", timer->tmr_device);
                        break;
                case SNDRV_TIMER_CLASS_CARD:
                        snd_iprintf(buffer, "C%i-%i: ",
                                    timer->card->number, timer->tmr_device);
                        break;
                case SNDRV_TIMER_CLASS_PCM:
                        snd_iprintf(buffer, "P%i-%i-%i: ", timer->card->number,
                                    timer->tmr_device, timer->tmr_subdevice);
                        break;
                default:
                        snd_iprintf(buffer, "?%i-%i-%i-%i: ", timer->tmr_class,
                                    timer->card ? timer->card->number : -1,
                                    timer->tmr_device, timer->tmr_subdevice);
                }
                snd_iprintf(buffer, "%s :", timer->name);
                scoped_guard(spinlock_irq, &timer->lock)
                        resolution = snd_timer_hw_resolution(timer);
                if (resolution)
                        snd_iprintf(buffer, " %lu.%03luus (%lu ticks)",
                                    resolution / 1000,
                                    resolution % 1000,
                                    timer->hw.ticks);
                if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)
                        snd_iprintf(buffer, " SLAVE");
                snd_iprintf(buffer, "\n");
                list_for_each_entry(ti, &timer->open_list_head, open_list)
                        snd_iprintf(buffer, "  Client %s : %s\n",
                                    ti->owner ? ti->owner : "unknown",
                                    (ti->flags & (SNDRV_TIMER_IFLG_START |
                                                  SNDRV_TIMER_IFLG_RUNNING))
                                    ? "running" : "stopped");
        }
}

static struct snd_info_entry *snd_timer_proc_entry;

static void __init snd_timer_proc_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "timers", NULL);
        if (entry != NULL) {
                entry->c.text.read = snd_timer_proc_read;
                if (snd_info_register(entry) < 0) {
                        snd_info_free_entry(entry);
                        entry = NULL;
                }
        }
        snd_timer_proc_entry = entry;
}

static void __exit snd_timer_proc_done(void)
{
        snd_info_free_entry(snd_timer_proc_entry);
}
#else /* !CONFIG_SND_PROC_FS */
#define snd_timer_proc_init()
#define snd_timer_proc_done()
#endif

/*
 *  USER SPACE interface
 */

static void snd_timer_user_interrupt(struct snd_timer_instance *timeri,
                                     unsigned long resolution,
                                     unsigned long ticks)
{
        struct snd_timer_user *tu = timeri->callback_data;
        struct snd_timer_read *r;
        int prev;

        guard(spinlock)(&tu->qlock);
        if (tu->qused > 0) {
                prev = tu->qtail == 0 ? tu->queue_size - 1 : tu->qtail - 1;
                r = &tu->queue[prev];
                if (r->resolution == resolution) {
                        r->ticks += ticks;
                        goto __wake;
                }
        }
        if (tu->qused >= tu->queue_size) {
                tu->overrun++;
        } else {
                r = &tu->queue[tu->qtail++];
                tu->qtail %= tu->queue_size;
                r->resolution = resolution;
                r->ticks = ticks;
                tu->qused++;
        }
      __wake:
        snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
        wake_up(&tu->qchange_sleep);
}

static void snd_timer_user_append_to_tqueue(struct snd_timer_user *tu,
                                            struct snd_timer_tread64 *tread)
{
        if (tu->qused >= tu->queue_size) {
                tu->overrun++;
        } else {
                memcpy(&tu->tqueue[tu->qtail++], tread, sizeof(*tread));
                tu->qtail %= tu->queue_size;
                tu->qused++;
        }
}

static void snd_timer_user_ccallback(struct snd_timer_instance *timeri,
                                     int event,
                                     struct timespec64 *tstamp,
                                     unsigned long resolution)
{
        struct snd_timer_user *tu = timeri->callback_data;
        struct snd_timer_tread64 r1;

        if (event >= SNDRV_TIMER_EVENT_START &&
            event <= SNDRV_TIMER_EVENT_PAUSE)
                tu->tstamp = *tstamp;
        if ((tu->filter & (1 << event)) == 0 || !tu->tread)
                return;
        memset(&r1, 0, sizeof(r1));
        r1.event = event;
        r1.tstamp_sec = tstamp->tv_sec;
        r1.tstamp_nsec = tstamp->tv_nsec;
        r1.val = resolution;
        scoped_guard(spinlock_irqsave, &tu->qlock)
                snd_timer_user_append_to_tqueue(tu, &r1);
        snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
        wake_up(&tu->qchange_sleep);
}

static void snd_timer_user_disconnect(struct snd_timer_instance *timeri)
{
        struct snd_timer_user *tu = timeri->callback_data;

        tu->disconnected = true;
        wake_up(&tu->qchange_sleep);
}

static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
                                      unsigned long resolution,
                                      unsigned long ticks)
{
        struct snd_timer_user *tu = timeri->callback_data;
        struct snd_timer_tread64 *r, r1;
        struct timespec64 tstamp;
        int prev, append = 0;

        memset(&r1, 0, sizeof(r1));
        memset(&tstamp, 0, sizeof(tstamp));
        scoped_guard(spinlock, &tu->qlock) {
                if ((tu->filter & ((1 << SNDRV_TIMER_EVENT_RESOLUTION) |
                                   (1 << SNDRV_TIMER_EVENT_TICK))) == 0)
                        return;
                if (tu->last_resolution != resolution || ticks > 0) {
                        if (timer_tstamp_monotonic)
                                ktime_get_ts64(&tstamp);
                        else
                                ktime_get_real_ts64(&tstamp);
                }
                if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) &&
                    tu->last_resolution != resolution) {
                        r1.event = SNDRV_TIMER_EVENT_RESOLUTION;
                        r1.tstamp_sec = tstamp.tv_sec;
                        r1.tstamp_nsec = tstamp.tv_nsec;
                        r1.val = resolution;
                        snd_timer_user_append_to_tqueue(tu, &r1);
                        tu->last_resolution = resolution;
                        append++;
                }
                if ((tu->filter & (1 << SNDRV_TIMER_EVENT_TICK)) == 0)
                        break;
                if (ticks == 0)
                        break;
                if (tu->qused > 0) {
                        prev = tu->qtail == 0 ? tu->queue_size - 1 : tu->qtail - 1;
                        r = &tu->tqueue[prev];
                        if (r->event == SNDRV_TIMER_EVENT_TICK) {
                                r->tstamp_sec = tstamp.tv_sec;
                                r->tstamp_nsec = tstamp.tv_nsec;
                                r->val += ticks;
                                append++;
                                break;
                        }
                }
                r1.event = SNDRV_TIMER_EVENT_TICK;
                r1.tstamp_sec = tstamp.tv_sec;
                r1.tstamp_nsec = tstamp.tv_nsec;
                r1.val = ticks;
                snd_timer_user_append_to_tqueue(tu, &r1);
                append++;
        }
        if (append == 0)
                return;
        snd_kill_fasync(tu->fasync, SIGIO, POLL_IN);
        wake_up(&tu->qchange_sleep);
}

static int realloc_user_queue(struct snd_timer_user *tu, int size)
{
        struct snd_timer_read *queue = NULL;
        struct snd_timer_tread64 *tqueue = NULL;

        if (tu->tread) {
                tqueue = kcalloc(size, sizeof(*tqueue), GFP_KERNEL);
                if (!tqueue)
                        return -ENOMEM;
        } else {
                queue = kcalloc(size, sizeof(*queue), GFP_KERNEL);
                if (!queue)
                        return -ENOMEM;
        }

        guard(spinlock_irq)(&tu->qlock);
        kfree(tu->queue);
        kfree(tu->tqueue);
        tu->queue_size = size;
        tu->queue = queue;
        tu->tqueue = tqueue;
        tu->qhead = tu->qtail = tu->qused = 0;

        return 0;
}

static int snd_timer_user_open(struct inode *inode, struct file *file)
{
        struct snd_timer_user *tu;
        int err;

        err = stream_open(inode, file);
        if (err < 0)
                return err;

        tu = kzalloc(sizeof(*tu), GFP_KERNEL);
        if (tu == NULL)
                return -ENOMEM;
        spin_lock_init(&tu->qlock);
        init_waitqueue_head(&tu->qchange_sleep);
        mutex_init(&tu->ioctl_lock);
        tu->ticks = 1;
        if (realloc_user_queue(tu, 128) < 0) {
                kfree(tu);
                return -ENOMEM;
        }
        file->private_data = tu;
        return 0;
}

static int snd_timer_user_release(struct inode *inode, struct file *file)
{
        struct snd_timer_user *tu;

        if (file->private_data) {
                tu = file->private_data;
                file->private_data = NULL;
                scoped_guard(mutex, &tu->ioctl_lock) {
                        if (tu->timeri) {
                                snd_timer_close(tu->timeri);
                                snd_timer_instance_free(tu->timeri);
                        }
                }
                snd_fasync_free(tu->fasync);
                kfree(tu->queue);
                kfree(tu->tqueue);
                kfree(tu);
        }
        return 0;
}

static void snd_timer_user_zero_id(struct snd_timer_id *id)
{
        id->dev_class = SNDRV_TIMER_CLASS_NONE;
        id->dev_sclass = SNDRV_TIMER_SCLASS_NONE;
        id->card = -1;
        id->device = -1;
        id->subdevice = -1;
}

static void snd_timer_user_copy_id(struct snd_timer_id *id, struct snd_timer *timer)
{
        id->dev_class = timer->tmr_class;
        id->dev_sclass = SNDRV_TIMER_SCLASS_NONE;
        id->card = timer->card ? timer->card->number : -1;
        id->device = timer->tmr_device;
        id->subdevice = timer->tmr_subdevice;
}

static int snd_timer_user_next_device(struct snd_timer_id __user *_tid)
{
        struct snd_timer_id id;
        struct snd_timer *timer;
        struct list_head *p;

        if (copy_from_user(&id, _tid, sizeof(id)))
                return -EFAULT;
        guard(mutex)(&register_mutex);
        if (id.dev_class < 0) {                /* first item */
                if (list_empty(&snd_timer_list))
                        snd_timer_user_zero_id(&id);
                else {
                        timer = list_entry(snd_timer_list.next,
                                           struct snd_timer, device_list);
                        snd_timer_user_copy_id(&id, timer);
                }
        } else {
                switch (id.dev_class) {
                case SNDRV_TIMER_CLASS_GLOBAL:
                        id.device = id.device < 0 ? 0 : id.device + 1;
                        list_for_each(p, &snd_timer_list) {
                                timer = list_entry(p, struct snd_timer, device_list);
                                if (timer->tmr_class > SNDRV_TIMER_CLASS_GLOBAL) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                                if (timer->tmr_device >= id.device) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                        }
                        if (p == &snd_timer_list)
                                snd_timer_user_zero_id(&id);
                        break;
                case SNDRV_TIMER_CLASS_CARD:
                case SNDRV_TIMER_CLASS_PCM:
                        if (id.card < 0) {
                                id.card = 0;
                        } else {
                                if (id.device < 0) {
                                        id.device = 0;
                                } else {
                                        if (id.subdevice < 0)
                                                id.subdevice = 0;
                                        else if (id.subdevice < INT_MAX)
                                                id.subdevice++;
                                }
                        }
                        list_for_each(p, &snd_timer_list) {
                                timer = list_entry(p, struct snd_timer, device_list);
                                if (timer->tmr_class > id.dev_class) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                                if (timer->tmr_class < id.dev_class)
                                        continue;
                                if (timer->card->number > id.card) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                                if (timer->card->number < id.card)
                                        continue;
                                if (timer->tmr_device > id.device) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                                if (timer->tmr_device < id.device)
                                        continue;
                                if (timer->tmr_subdevice > id.subdevice) {
                                        snd_timer_user_copy_id(&id, timer);
                                        break;
                                }
                                if (timer->tmr_subdevice < id.subdevice)
                                        continue;
                                snd_timer_user_copy_id(&id, timer);
                                break;
                        }
                        if (p == &snd_timer_list)
                                snd_timer_user_zero_id(&id);
                        break;
                default:
                        snd_timer_user_zero_id(&id);
                }
        }
        if (copy_to_user(_tid, &id, sizeof(*_tid)))
                return -EFAULT;
        return 0;
}

static int snd_timer_user_ginfo(struct file *file,
                                struct snd_timer_ginfo __user *_ginfo)
{
        struct snd_timer_ginfo *ginfo __free(kfree) = NULL;
        struct snd_timer_id tid;
        struct snd_timer *t;
        struct list_head *p;

        ginfo = memdup_user(_ginfo, sizeof(*ginfo));
        if (IS_ERR(ginfo))
                return PTR_ERR(no_free_ptr(ginfo));

        tid = ginfo->tid;
        memset(ginfo, 0, sizeof(*ginfo));
        ginfo->tid = tid;
        guard(mutex)(&register_mutex);
        t = snd_timer_find(&tid);
        if (!t)
                return -ENODEV;
        ginfo->card = t->card ? t->card->number : -1;
        if (t->hw.flags & SNDRV_TIMER_HW_SLAVE)
                ginfo->flags |= SNDRV_TIMER_FLG_SLAVE;
        strscpy(ginfo->id, t->id, sizeof(ginfo->id));
        strscpy(ginfo->name, t->name, sizeof(ginfo->name));
        scoped_guard(spinlock_irq, &t->lock)
                ginfo->resolution = snd_timer_hw_resolution(t);
        if (t->hw.resolution_min > 0) {
                ginfo->resolution_min = t->hw.resolution_min;
                ginfo->resolution_max = t->hw.resolution_max;
        }
        list_for_each(p, &t->open_list_head) {
                ginfo->clients++;
        }
        if (copy_to_user(_ginfo, ginfo, sizeof(*ginfo)))
                return -EFAULT;
        return 0;
}

static int timer_set_gparams(struct snd_timer_gparams *gparams)
{
        struct snd_timer *t;

        guard(mutex)(&register_mutex);
        t = snd_timer_find(&gparams->tid);
        if (!t)
                return -ENODEV;
        if (!list_empty(&t->open_list_head))
                return -EBUSY;
        if (!t->hw.set_period)
                return -ENOSYS;
        return t->hw.set_period(t, gparams->period_num, gparams->period_den);
}

static int snd_timer_user_gparams(struct file *file,
                                  struct snd_timer_gparams __user *_gparams)
{
        struct snd_timer_gparams gparams;

        if (copy_from_user(&gparams, _gparams, sizeof(gparams)))
                return -EFAULT;
        return timer_set_gparams(&gparams);
}

static int snd_timer_user_gstatus(struct file *file,
                                  struct snd_timer_gstatus __user *_gstatus)
{
        struct snd_timer_gstatus gstatus;
        struct snd_timer_id tid;
        struct snd_timer *t;
        int err = 0;

        if (copy_from_user(&gstatus, _gstatus, sizeof(gstatus)))
                return -EFAULT;
        tid = gstatus.tid;
        memset(&gstatus, 0, sizeof(gstatus));
        gstatus.tid = tid;
        guard(mutex)(&register_mutex);
        t = snd_timer_find(&tid);
        if (t != NULL) {
                guard(spinlock_irq)(&t->lock);
                gstatus.resolution = snd_timer_hw_resolution(t);
                if (t->hw.precise_resolution) {
                        t->hw.precise_resolution(t, &gstatus.resolution_num,
                                                 &gstatus.resolution_den);
                } else {
                        gstatus.resolution_num = gstatus.resolution;
                        gstatus.resolution_den = 1000000000uL;
                }
        } else {
                err = -ENODEV;
        }
        if (err >= 0 && copy_to_user(_gstatus, &gstatus, sizeof(gstatus)))
                err = -EFAULT;
        return err;
}

static int snd_timer_user_tselect(struct file *file,
                                  struct snd_timer_select __user *_tselect)
{
        struct snd_timer_user *tu;
        struct snd_timer_select tselect;
        char str[32];
        int err = 0;

        tu = file->private_data;
        if (tu->timeri) {
                snd_timer_close(tu->timeri);
                snd_timer_instance_free(tu->timeri);
                tu->timeri = NULL;
        }
        if (copy_from_user(&tselect, _tselect, sizeof(tselect))) {
                err = -EFAULT;
                goto __err;
        }
        sprintf(str, "application %i", current->pid);
        if (tselect.id.dev_class != SNDRV_TIMER_CLASS_SLAVE)
                tselect.id.dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION;
        tu->timeri = snd_timer_instance_new(str);
        if (!tu->timeri) {
                err = -ENOMEM;
                goto __err;
        }

        tu->timeri->flags |= SNDRV_TIMER_IFLG_FAST;
        tu->timeri->callback = tu->tread
                        ? snd_timer_user_tinterrupt : snd_timer_user_interrupt;
        tu->timeri->ccallback = snd_timer_user_ccallback;
        tu->timeri->callback_data = (void *)tu;
        tu->timeri->disconnect = snd_timer_user_disconnect;

        err = snd_timer_open(tu->timeri, &tselect.id, current->pid);
        if (err < 0) {
                snd_timer_instance_free(tu->timeri);
                tu->timeri = NULL;
        }

      __err:
        return err;
}

static int snd_timer_user_info(struct file *file,
                               struct snd_timer_info __user *_info)
{
        struct snd_timer_user *tu;
        struct snd_timer_info *info __free(kfree) = NULL;
        struct snd_timer *t;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        t = tu->timeri->timer;
        if (!t)
                return -EBADFD;

        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (! info)
                return -ENOMEM;
        info->card = t->card ? t->card->number : -1;
        if (t->hw.flags & SNDRV_TIMER_HW_SLAVE)
                info->flags |= SNDRV_TIMER_FLG_SLAVE;
        strscpy(info->id, t->id, sizeof(info->id));
        strscpy(info->name, t->name, sizeof(info->name));
        scoped_guard(spinlock_irq, &t->lock)
                info->resolution = snd_timer_hw_resolution(t);
        if (copy_to_user(_info, info, sizeof(*_info)))
                return -EFAULT;
        return 0;
}

static int snd_timer_user_params(struct file *file,
                                 struct snd_timer_params __user *_params)
{
        struct snd_timer_user *tu;
        struct snd_timer_params params;
        struct snd_timer *t;
        int err;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        t = tu->timeri->timer;
        if (!t)
                return -EBADFD;
        if (copy_from_user(&params, _params, sizeof(params)))
                return -EFAULT;
        if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) {
                u64 resolution;

                if (params.ticks < 1) {
                        err = -EINVAL;
                        goto _end;
                }

                /* Don't allow resolution less than 1ms */
                resolution = snd_timer_resolution(tu->timeri);
                resolution *= params.ticks;
                if (resolution < 1000000) {
                        err = -EINVAL;
                        goto _end;
                }
        }
        if (params.queue_size > 0 &&
            (params.queue_size < 32 || params.queue_size > 1024)) {
                err = -EINVAL;
                goto _end;
        }
        if (params.filter & ~((1<<SNDRV_TIMER_EVENT_RESOLUTION)|
                              (1<<SNDRV_TIMER_EVENT_TICK)|
                              (1<<SNDRV_TIMER_EVENT_START)|
                              (1<<SNDRV_TIMER_EVENT_STOP)|
                              (1<<SNDRV_TIMER_EVENT_CONTINUE)|
                              (1<<SNDRV_TIMER_EVENT_PAUSE)|
                              (1<<SNDRV_TIMER_EVENT_SUSPEND)|
                              (1<<SNDRV_TIMER_EVENT_RESUME)|
                              (1<<SNDRV_TIMER_EVENT_MSTART)|
                              (1<<SNDRV_TIMER_EVENT_MSTOP)|
                              (1<<SNDRV_TIMER_EVENT_MCONTINUE)|
                              (1<<SNDRV_TIMER_EVENT_MPAUSE)|
                              (1<<SNDRV_TIMER_EVENT_MSUSPEND)|
                              (1<<SNDRV_TIMER_EVENT_MRESUME))) {
                err = -EINVAL;
                goto _end;
        }
        snd_timer_stop(tu->timeri);
        scoped_guard(spinlock_irq, &t->lock) {
                tu->timeri->flags &= ~(SNDRV_TIMER_IFLG_AUTO|
                                       SNDRV_TIMER_IFLG_EXCLUSIVE|
                                       SNDRV_TIMER_IFLG_EARLY_EVENT);
                if (params.flags & SNDRV_TIMER_PSFLG_AUTO)
                        tu->timeri->flags |= SNDRV_TIMER_IFLG_AUTO;
                if (params.flags & SNDRV_TIMER_PSFLG_EXCLUSIVE)
                        tu->timeri->flags |= SNDRV_TIMER_IFLG_EXCLUSIVE;
                if (params.flags & SNDRV_TIMER_PSFLG_EARLY_EVENT)
                        tu->timeri->flags |= SNDRV_TIMER_IFLG_EARLY_EVENT;
        }
        if (params.queue_size > 0 &&
            (unsigned int)tu->queue_size != params.queue_size) {
                err = realloc_user_queue(tu, params.queue_size);
                if (err < 0)
                        goto _end;
        }
        scoped_guard(spinlock_irq, &tu->qlock) {
                tu->qhead = tu->qtail = tu->qused = 0;
                if (tu->timeri->flags & SNDRV_TIMER_IFLG_EARLY_EVENT) {
                        if (tu->tread) {
                                struct snd_timer_tread64 tread;

                                memset(&tread, 0, sizeof(tread));
                                tread.event = SNDRV_TIMER_EVENT_EARLY;
                                tread.tstamp_sec = 0;
                                tread.tstamp_nsec = 0;
                                tread.val = 0;
                                snd_timer_user_append_to_tqueue(tu, &tread);
                        } else {
                                struct snd_timer_read *r = &tu->queue[0];

                                r->resolution = 0;
                                r->ticks = 0;
                                tu->qused++;
                                tu->qtail++;
                        }
                }
                tu->filter = params.filter;
                tu->ticks = params.ticks;
        }
        err = 0;
 _end:
        if (copy_to_user(_params, &params, sizeof(params)))
                return -EFAULT;
        return err;
}

static int snd_timer_user_status32(struct file *file,
                                   struct snd_timer_status32 __user *_status)
 {
        struct snd_timer_user *tu;
        struct snd_timer_status32 status;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        memset(&status, 0, sizeof(status));
        status.tstamp_sec = tu->tstamp.tv_sec;
        status.tstamp_nsec = tu->tstamp.tv_nsec;
        status.resolution = snd_timer_resolution(tu->timeri);
        status.lost = tu->timeri->lost;
        status.overrun = tu->overrun;
        scoped_guard(spinlock_irq, &tu->qlock)
                status.queue = tu->qused;
        if (copy_to_user(_status, &status, sizeof(status)))
                return -EFAULT;
        return 0;
}

static int snd_timer_user_status64(struct file *file,
                                   struct snd_timer_status64 __user *_status)
{
        struct snd_timer_user *tu;
        struct snd_timer_status64 status;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        memset(&status, 0, sizeof(status));
        status.tstamp_sec = tu->tstamp.tv_sec;
        status.tstamp_nsec = tu->tstamp.tv_nsec;
        status.resolution = snd_timer_resolution(tu->timeri);
        status.lost = tu->timeri->lost;
        status.overrun = tu->overrun;
        scoped_guard(spinlock_irq, &tu->qlock)
                status.queue = tu->qused;
        if (copy_to_user(_status, &status, sizeof(status)))
                return -EFAULT;
        return 0;
}

static int snd_timer_user_start(struct file *file)
{
        int err;
        struct snd_timer_user *tu;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        snd_timer_stop(tu->timeri);
        tu->timeri->lost = 0;
        tu->last_resolution = 0;
        err = snd_timer_start(tu->timeri, tu->ticks);
        if (err < 0)
                return err;
        return 0;
}

static int snd_timer_user_stop(struct file *file)
{
        int err;
        struct snd_timer_user *tu;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        err = snd_timer_stop(tu->timeri);
        if (err < 0)
                return err;
        return 0;
}

static int snd_timer_user_continue(struct file *file)
{
        int err;
        struct snd_timer_user *tu;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        /* start timer instead of continue if it's not used before */
        if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
                return snd_timer_user_start(file);
        tu->timeri->lost = 0;
        err = snd_timer_continue(tu->timeri);
        if (err < 0)
                return err;
        return 0;
}

static int snd_timer_user_pause(struct file *file)
{
        int err;
        struct snd_timer_user *tu;

        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
        err = snd_timer_pause(tu->timeri);
        if (err < 0)
                return err;
        return 0;
}

static int snd_timer_user_tread(void __user *argp, struct snd_timer_user *tu,
                                unsigned int cmd, bool compat)
{
        int __user *p = argp;
        int xarg, old_tread;

        if (tu->timeri)        /* too late */
                return -EBUSY;
        if (get_user(xarg, p))
                return -EFAULT;

        old_tread = tu->tread;

        if (!xarg)
                tu->tread = TREAD_FORMAT_NONE;
        else if (cmd == SNDRV_TIMER_IOCTL_TREAD64 ||
                 (IS_ENABLED(CONFIG_64BIT) && !compat))
                tu->tread = TREAD_FORMAT_TIME64;
        else
                tu->tread = TREAD_FORMAT_TIME32;

        if (tu->tread != old_tread &&
            realloc_user_queue(tu, tu->queue_size) < 0) {
                tu->tread = old_tread;
                return -ENOMEM;
        }

        return 0;
}

enum {
        SNDRV_TIMER_IOCTL_START_OLD = _IO('T', 0x20),
        SNDRV_TIMER_IOCTL_STOP_OLD = _IO('T', 0x21),
        SNDRV_TIMER_IOCTL_CONTINUE_OLD = _IO('T', 0x22),
        SNDRV_TIMER_IOCTL_PAUSE_OLD = _IO('T', 0x23),
};

static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd,
                                 unsigned long arg, bool compat)
{
        struct snd_timer_user *tu;
        void __user *argp = (void __user *)arg;
        int __user *p = argp;

        tu = file->private_data;
        switch (cmd) {
        case SNDRV_TIMER_IOCTL_PVERSION:
                return put_user(SNDRV_TIMER_VERSION, p) ? -EFAULT : 0;
        case SNDRV_TIMER_IOCTL_NEXT_DEVICE:
                return snd_timer_user_next_device(argp);
        case SNDRV_TIMER_IOCTL_TREAD_OLD:
        case SNDRV_TIMER_IOCTL_TREAD64:
                return snd_timer_user_tread(argp, tu, cmd, compat);
        case SNDRV_TIMER_IOCTL_GINFO:
                return snd_timer_user_ginfo(file, argp);
        case SNDRV_TIMER_IOCTL_GPARAMS:
                return snd_timer_user_gparams(file, argp);
        case SNDRV_TIMER_IOCTL_GSTATUS:
                return snd_timer_user_gstatus(file, argp);
        case SNDRV_TIMER_IOCTL_SELECT:
                return snd_timer_user_tselect(file, argp);
        case SNDRV_TIMER_IOCTL_INFO:
                return snd_timer_user_info(file, argp);
        case SNDRV_TIMER_IOCTL_PARAMS:
                return snd_timer_user_params(file, argp);
        case SNDRV_TIMER_IOCTL_STATUS32:
                return snd_timer_user_status32(file, argp);
        case SNDRV_TIMER_IOCTL_STATUS64:
                return snd_timer_user_status64(file, argp);
        case SNDRV_TIMER_IOCTL_START:
        case SNDRV_TIMER_IOCTL_START_OLD:
                return snd_timer_user_start(file);
        case SNDRV_TIMER_IOCTL_STOP:
        case SNDRV_TIMER_IOCTL_STOP_OLD:
                return snd_timer_user_stop(file);
        case SNDRV_TIMER_IOCTL_CONTINUE:
        case SNDRV_TIMER_IOCTL_CONTINUE_OLD:
                return snd_timer_user_continue(file);
        case SNDRV_TIMER_IOCTL_PAUSE:
        case SNDRV_TIMER_IOCTL_PAUSE_OLD:
                return snd_timer_user_pause(file);
        }
        return -ENOTTY;
}

static long snd_timer_user_ioctl(struct file *file, unsigned int cmd,
                                 unsigned long arg)
{
        struct snd_timer_user *tu = file->private_data;

        guard(mutex)(&tu->ioctl_lock);
        return __snd_timer_user_ioctl(file, cmd, arg, false);
}

static int snd_timer_user_fasync(int fd, struct file * file, int on)
{
        struct snd_timer_user *tu;

        tu = file->private_data;
        return snd_fasync_helper(fd, file, on, &tu->fasync);
}

static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
                                   size_t count, loff_t *offset)
{
        struct snd_timer_tread64 *tread;
        struct snd_timer_tread32 tread32;
        struct snd_timer_user *tu;
        long result = 0, unit;
        int qhead;
        int err = 0;

        tu = file->private_data;
        switch (tu->tread) {
        case TREAD_FORMAT_TIME64:
                unit = sizeof(struct snd_timer_tread64);
                break;
        case TREAD_FORMAT_TIME32:
                unit = sizeof(struct snd_timer_tread32);
                break;
        case TREAD_FORMAT_NONE:
                unit = sizeof(struct snd_timer_read);
                break;
        default:
                WARN_ONCE(1, "Corrupt snd_timer_user\n");
                return -ENOTSUPP;
        }

        mutex_lock(&tu->ioctl_lock);
        spin_lock_irq(&tu->qlock);
        while ((long)count - result >= unit) {
                while (!tu->qused) {
                        wait_queue_entry_t wait;

                        if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
                                err = -EAGAIN;
                                goto _error;
                        }

                        set_current_state(TASK_INTERRUPTIBLE);
                        init_waitqueue_entry(&wait, current);
                        add_wait_queue(&tu->qchange_sleep, &wait);

                        spin_unlock_irq(&tu->qlock);
                        mutex_unlock(&tu->ioctl_lock);
                        schedule();
                        mutex_lock(&tu->ioctl_lock);
                        spin_lock_irq(&tu->qlock);

                        remove_wait_queue(&tu->qchange_sleep, &wait);

                        if (tu->disconnected) {
                                err = -ENODEV;
                                goto _error;
                        }
                        if (signal_pending(current)) {
                                err = -ERESTARTSYS;
                                goto _error;
                        }
                }

                qhead = tu->qhead++;
                tu->qhead %= tu->queue_size;
                tu->qused--;
                spin_unlock_irq(&tu->qlock);

                tread = &tu->tqueue[qhead];

                switch (tu->tread) {
                case TREAD_FORMAT_TIME64:
                        if (copy_to_user(buffer, tread,
                                         sizeof(struct snd_timer_tread64)))
                                err = -EFAULT;
                        break;
                case TREAD_FORMAT_TIME32:
                        memset(&tread32, 0, sizeof(tread32));
                        tread32 = (struct snd_timer_tread32) {
                                .event = tread->event,
                                .tstamp_sec = tread->tstamp_sec,
                                .tstamp_nsec = tread->tstamp_nsec,
                                .val = tread->val,
                        };

                        if (copy_to_user(buffer, &tread32, sizeof(tread32)))
                                err = -EFAULT;
                        break;
                case TREAD_FORMAT_NONE:
                        if (copy_to_user(buffer, &tu->queue[qhead],
                                         sizeof(struct snd_timer_read)))
                                err = -EFAULT;
                        break;
                default:
                        err = -ENOTSUPP;
                        break;
                }

                spin_lock_irq(&tu->qlock);
                if (err < 0)
                        goto _error;
                result += unit;
                buffer += unit;
        }
 _error:
        spin_unlock_irq(&tu->qlock);
        mutex_unlock(&tu->ioctl_lock);
        return result > 0 ? result : err;
}

static __poll_t snd_timer_user_poll(struct file *file, poll_table * wait)
{
        __poll_t mask;
        struct snd_timer_user *tu;

        tu = file->private_data;

        poll_wait(file, &tu->qchange_sleep, wait);

        mask = 0;
        guard(spinlock_irq)(&tu->qlock);
        if (tu->qused)
                mask |= EPOLLIN | EPOLLRDNORM;
        if (tu->disconnected)
                mask |= EPOLLERR;

        return mask;
}

#ifdef CONFIG_COMPAT
#include "timer_compat.c"
#else
#define snd_timer_user_ioctl_compat        NULL
#endif

static const struct file_operations snd_timer_f_ops =
{
        .owner =        THIS_MODULE,
        .read =                snd_timer_user_read,
        .open =                snd_timer_user_open,
        .release =        snd_timer_user_release,
        .llseek =        no_llseek,
        .poll =                snd_timer_user_poll,
        .unlocked_ioctl =        snd_timer_user_ioctl,
        .compat_ioctl =        snd_timer_user_ioctl_compat,
        .fasync =         snd_timer_user_fasync,
};

/* unregister the system timer */
static void snd_timer_free_all(void)
{
        struct snd_timer *timer, *n;

        list_for_each_entry_safe(timer, n, &snd_timer_list, device_list)
                snd_timer_free(timer);
}

static struct device *timer_dev;

/*
 *  ENTRY functions
 */

static int __init alsa_timer_init(void)
{
        int err;

        err = snd_device_alloc(&timer_dev, NULL);
        if (err < 0)
                return err;
        dev_set_name(timer_dev, "timer");

#ifdef SNDRV_OSS_INFO_DEV_TIMERS
        snd_oss_info_register(SNDRV_OSS_INFO_DEV_TIMERS, SNDRV_CARDS - 1,
                              "system timer");
#endif

        err = snd_timer_register_system();
        if (err < 0) {
                pr_err("ALSA: unable to register system timer (%i)\n", err);
                goto put_timer;
        }

        err = snd_register_device(SNDRV_DEVICE_TYPE_TIMER, NULL, 0,
                                  &snd_timer_f_ops, NULL, timer_dev);
        if (err < 0) {
                pr_err("ALSA: unable to register timer device (%i)\n", err);
                snd_timer_free_all();
                goto put_timer;
        }

        snd_timer_proc_init();
        return 0;

put_timer:
        put_device(timer_dev);
        return err;
}

static void __exit alsa_timer_exit(void)
{
        snd_unregister_device(timer_dev);
        snd_timer_free_all();
        put_device(timer_dev);
        snd_timer_proc_done();
#ifdef SNDRV_OSS_INFO_DEV_TIMERS
        snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_TIMERS, SNDRV_CARDS - 1);
#endif
}

module_init(alsa_timer_init)
module_exit(alsa_timer_exit)













































































































































  390 

























































































  392 




  391 
  392 
  389 








  392 

  390 


































































































































































































































































  392 
  392 
  389 



















  387 


  392 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/memblock.h>
#include <linux/page_ext.h>
#include <linux/memory.h>
#include <linux/vmalloc.h>
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
#include <linux/page_idle.h>
#include <linux/page_table_check.h>
#include <linux/rcupdate.h>

/*
 * struct page extension
 *
 * This is the feature to manage memory for extended data per page.
 *
 * Until now, we must modify struct page itself to store extra data per page.
 * This requires rebuilding the kernel and it is really time consuming process.
 * And, sometimes, rebuild is impossible due to third party module dependency.
 * At last, enlarging struct page could cause un-wanted system behaviour change.
 *
 * This feature is intended to overcome above mentioned problems. This feature
 * allocates memory for extended data per page in certain place rather than
 * the struct page itself. This memory can be accessed by the accessor
 * functions provided by this code. During the boot process, it checks whether
 * allocation of huge chunk of memory is needed or not. If not, it avoids
 * allocating memory at all. With this advantage, we can include this feature
 * into the kernel in default and can avoid rebuild and solve related problems.
 *
 * To help these things to work well, there are two callbacks for clients. One
 * is the need callback which is mandatory if user wants to avoid useless
 * memory allocation at boot-time. The other is optional, init callback, which
 * is used to do proper initialization after memory is allocated.
 *
 * The need callback is used to decide whether extended memory allocation is
 * needed or not. Sometimes users want to deactivate some features in this
 * boot and extra memory would be unnecessary. In this case, to avoid
 * allocating huge chunk of memory, each clients represent their need of
 * extra memory through the need callback. If one of the need callbacks
 * returns true, it means that someone needs extra memory so that
 * page extension core should allocates memory for page extension. If
 * none of need callbacks return true, memory isn't needed at all in this boot
 * and page extension core can skip to allocate memory. As result,
 * none of memory is wasted.
 *
 * When need callback returns true, page_ext checks if there is a request for
 * extra memory through size in struct page_ext_operations. If it is non-zero,
 * extra space is allocated for each page_ext entry and offset is returned to
 * user through offset in struct page_ext_operations.
 *
 * The init callback is used to do proper initialization after page extension
 * is completely initialized. In sparse memory system, extra memory is
 * allocated some time later than memmap is allocated. In other words, lifetime
 * of memory for page extension isn't same with memmap for struct page.
 * Therefore, clients can't store extra data until page extension is
 * initialized, even if pages are allocated and used freely. This could
 * cause inadequate state of extra data per page, so, to prevent it, client
 * can utilize this callback to initialize the state of it correctly.
 */

#ifdef CONFIG_SPARSEMEM
#define PAGE_EXT_INVALID       (0x1)
#endif

#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
static bool need_page_idle(void)
{
        return true;
}
static struct page_ext_operations page_idle_ops __initdata = {
        .need = need_page_idle,
        .need_shared_flags = true,
};
#endif

static struct page_ext_operations *page_ext_ops[] __initdata = {
#ifdef CONFIG_PAGE_OWNER
        &page_owner_ops,
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
        &page_idle_ops,
#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
        &page_table_check_ops,
#endif
};

unsigned long page_ext_size;

static unsigned long total_usage;

bool early_page_ext __meminitdata;
static int __init setup_early_page_ext(char *str)
{
        early_page_ext = true;
        return 0;
}
early_param("early_page_ext", setup_early_page_ext);

static bool __init invoke_need_callbacks(void)
{
        int i;
        int entries = ARRAY_SIZE(page_ext_ops);
        bool need = false;

        for (i = 0; i < entries; i++) {
                if (page_ext_ops[i]->need()) {
                        if (page_ext_ops[i]->need_shared_flags) {
                                page_ext_size = sizeof(struct page_ext);
                                break;
                        }
                }
        }

        for (i = 0; i < entries; i++) {
                if (page_ext_ops[i]->need()) {
                        page_ext_ops[i]->offset = page_ext_size;
                        page_ext_size += page_ext_ops[i]->size;
                        need = true;
                }
        }

        return need;
}

static void __init invoke_init_callbacks(void)
{
        int i;
        int entries = ARRAY_SIZE(page_ext_ops);

        for (i = 0; i < entries; i++) {
                if (page_ext_ops[i]->init)
                        page_ext_ops[i]->init();
        }
}

static inline struct page_ext *get_entry(void *base, unsigned long index)
{
        return base + page_ext_size * index;
}

#ifndef CONFIG_SPARSEMEM
void __init page_ext_init_flatmem_late(void)
{
        invoke_init_callbacks();
}

void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
{
        pgdat->node_page_ext = NULL;
}

static struct page_ext *lookup_page_ext(const struct page *page)
{
        unsigned long pfn = page_to_pfn(page);
        unsigned long index;
        struct page_ext *base;

        WARN_ON_ONCE(!rcu_read_lock_held());
        base = NODE_DATA(page_to_nid(page))->node_page_ext;
        /*
         * The sanity checks the page allocator does upon freeing a
         * page can reach here before the page_ext arrays are
         * allocated when feeding a range of pages to the allocator
         * for the first time during bootup or memory hotplug.
         */
        if (unlikely(!base))
                return NULL;
        index = pfn - round_down(node_start_pfn(page_to_nid(page)),
                                        MAX_ORDER_NR_PAGES);
        return get_entry(base, index);
}

static int __init alloc_node_page_ext(int nid)
{
        struct page_ext *base;
        unsigned long table_size;
        unsigned long nr_pages;

        nr_pages = NODE_DATA(nid)->node_spanned_pages;
        if (!nr_pages)
                return 0;

        /*
         * Need extra space if node range is not aligned with
         * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
         * checks buddy's status, range could be out of exact node range.
         */
        if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) ||
                !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
                nr_pages += MAX_ORDER_NR_PAGES;

        table_size = page_ext_size * nr_pages;

        base = memblock_alloc_try_nid(
                        table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
                        MEMBLOCK_ALLOC_ACCESSIBLE, nid);
        if (!base)
                return -ENOMEM;
        NODE_DATA(nid)->node_page_ext = base;
        total_usage += table_size;
        return 0;
}

void __init page_ext_init_flatmem(void)
{

        int nid, fail;

        if (!invoke_need_callbacks())
                return;

        for_each_online_node(nid)  {
                fail = alloc_node_page_ext(nid);
                if (fail)
                        goto fail;
        }
        pr_info("allocated %ld bytes of page_ext\n", total_usage);
        return;

fail:
        pr_crit("allocation of page_ext failed.\n");
        panic("Out of memory");
}

#else /* CONFIG_SPARSEMEM */
static bool page_ext_invalid(struct page_ext *page_ext)
{
        return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID);
}

static struct page_ext *lookup_page_ext(const struct page *page)
{
        unsigned long pfn = page_to_pfn(page);
        struct mem_section *section = __pfn_to_section(pfn);
        struct page_ext *page_ext = READ_ONCE(section->page_ext);

        WARN_ON_ONCE(!rcu_read_lock_held());
        /*
         * The sanity checks the page allocator does upon freeing a
         * page can reach here before the page_ext arrays are
         * allocated when feeding a range of pages to the allocator
         * for the first time during bootup or memory hotplug.
         */
        if (page_ext_invalid(page_ext))
                return NULL;
        return get_entry(page_ext, pfn);
}

static void *__meminit alloc_page_ext(size_t size, int nid)
{
        gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
        void *addr = NULL;

        addr = alloc_pages_exact_nid(nid, size, flags);
        if (addr) {
                kmemleak_alloc(addr, size, 1, flags);
                return addr;
        }

        addr = vzalloc_node(size, nid);

        return addr;
}

static int __meminit init_section_page_ext(unsigned long pfn, int nid)
{
        struct mem_section *section;
        struct page_ext *base;
        unsigned long table_size;

        section = __pfn_to_section(pfn);

        if (section->page_ext)
                return 0;

        table_size = page_ext_size * PAGES_PER_SECTION;
        base = alloc_page_ext(table_size, nid);

        /*
         * The value stored in section->page_ext is (base - pfn)
         * and it does not point to the memory block allocated above,
         * causing kmemleak false positives.
         */
        kmemleak_not_leak(base);

        if (!base) {
                pr_err("page ext allocation failure\n");
                return -ENOMEM;
        }

        /*
         * The passed "pfn" may not be aligned to SECTION.  For the calculation
         * we need to apply a mask.
         */
        pfn &= PAGE_SECTION_MASK;
        section->page_ext = (void *)base - page_ext_size * pfn;
        total_usage += table_size;
        return 0;
}

static void free_page_ext(void *addr)
{
        if (is_vmalloc_addr(addr)) {
                vfree(addr);
        } else {
                struct page *page = virt_to_page(addr);
                size_t table_size;

                table_size = page_ext_size * PAGES_PER_SECTION;

                BUG_ON(PageReserved(page));
                kmemleak_free(addr);
                free_pages_exact(addr, table_size);
        }
}

static void __free_page_ext(unsigned long pfn)
{
        struct mem_section *ms;
        struct page_ext *base;

        ms = __pfn_to_section(pfn);
        if (!ms || !ms->page_ext)
                return;

        base = READ_ONCE(ms->page_ext);
        /*
         * page_ext here can be valid while doing the roll back
         * operation in online_page_ext().
         */
        if (page_ext_invalid(base))
                base = (void *)base - PAGE_EXT_INVALID;
        WRITE_ONCE(ms->page_ext, NULL);

        base = get_entry(base, pfn);
        free_page_ext(base);
}

static void __invalidate_page_ext(unsigned long pfn)
{
        struct mem_section *ms;
        void *val;

        ms = __pfn_to_section(pfn);
        if (!ms || !ms->page_ext)
                return;
        val = (void *)ms->page_ext + PAGE_EXT_INVALID;
        WRITE_ONCE(ms->page_ext, val);
}

static int __meminit online_page_ext(unsigned long start_pfn,
                                unsigned long nr_pages,
                                int nid)
{
        unsigned long start, end, pfn;
        int fail = 0;

        start = SECTION_ALIGN_DOWN(start_pfn);
        end = SECTION_ALIGN_UP(start_pfn + nr_pages);

        if (nid == NUMA_NO_NODE) {
                /*
                 * In this case, "nid" already exists and contains valid memory.
                 * "start_pfn" passed to us is a pfn which is an arg for
                 * online__pages(), and start_pfn should exist.
                 */
                nid = pfn_to_nid(start_pfn);
                VM_BUG_ON(!node_online(nid));
        }

        for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
                fail = init_section_page_ext(pfn, nid);
        if (!fail)
                return 0;

        /* rollback */
        end = pfn - PAGES_PER_SECTION;
        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
                __free_page_ext(pfn);

        return -ENOMEM;
}

static void __meminit offline_page_ext(unsigned long start_pfn,
                                unsigned long nr_pages)
{
        unsigned long start, end, pfn;

        start = SECTION_ALIGN_DOWN(start_pfn);
        end = SECTION_ALIGN_UP(start_pfn + nr_pages);

        /*
         * Freeing of page_ext is done in 3 steps to avoid
         * use-after-free of it:
         * 1) Traverse all the sections and mark their page_ext
         *    as invalid.
         * 2) Wait for all the existing users of page_ext who
         *    started before invalidation to finish.
         * 3) Free the page_ext.
         */
        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
                __invalidate_page_ext(pfn);

        synchronize_rcu();

        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
                __free_page_ext(pfn);
}

static int __meminit page_ext_callback(struct notifier_block *self,
                               unsigned long action, void *arg)
{
        struct memory_notify *mn = arg;
        int ret = 0;

        switch (action) {
        case MEM_GOING_ONLINE:
                ret = online_page_ext(mn->start_pfn,
                                   mn->nr_pages, mn->status_change_nid);
                break;
        case MEM_OFFLINE:
                offline_page_ext(mn->start_pfn,
                                mn->nr_pages);
                break;
        case MEM_CANCEL_ONLINE:
                offline_page_ext(mn->start_pfn,
                                mn->nr_pages);
                break;
        case MEM_GOING_OFFLINE:
                break;
        case MEM_ONLINE:
        case MEM_CANCEL_OFFLINE:
                break;
        }

        return notifier_from_errno(ret);
}

void __init page_ext_init(void)
{
        unsigned long pfn;
        int nid;

        if (!invoke_need_callbacks())
                return;

        for_each_node_state(nid, N_MEMORY) {
                unsigned long start_pfn, end_pfn;

                start_pfn = node_start_pfn(nid);
                end_pfn = node_end_pfn(nid);
                /*
                 * start_pfn and end_pfn may not be aligned to SECTION and the
                 * page->flags of out of node pages are not initialized.  So we
                 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
                 */
                for (pfn = start_pfn; pfn < end_pfn;
                        pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {

                        if (!pfn_valid(pfn))
                                continue;
                        /*
                         * Nodes's pfns can be overlapping.
                         * We know some arch can have a nodes layout such as
                         * -------------pfn-------------->
                         * N0 | N1 | N2 | N0 | N1 | N2|....
                         */
                        if (pfn_to_nid(pfn) != nid)
                                continue;
                        if (init_section_page_ext(pfn, nid))
                                goto oom;
                        cond_resched();
                }
        }
        hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI);
        pr_info("allocated %ld bytes of page_ext\n", total_usage);
        invoke_init_callbacks();
        return;

oom:
        panic("Out of memory");
}

void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
{
}

#endif

/**
 * page_ext_get() - Get the extended information for a page.
 * @page: The page we're interested in.
 *
 * Ensures that the page_ext will remain valid until page_ext_put()
 * is called.
 *
 * Return: NULL if no page_ext exists for this page.
 * Context: Any context.  Caller may not sleep until they have called
 * page_ext_put().
 */
struct page_ext *page_ext_get(struct page *page)
{
        struct page_ext *page_ext;

        rcu_read_lock();
        page_ext = lookup_page_ext(page);
        if (!page_ext) {
                rcu_read_unlock();
                return NULL;
        }

        return page_ext;
}

/**
 * page_ext_put() - Working with page extended information is done.
 * @page_ext: Page extended information received from page_ext_get().
 *
 * The page extended information of the page may not be valid after this
 * function is called.
 *
 * Return: None.
 * Context: Any context with corresponding page_ext_get() is called.
 */
void page_ext_put(struct page_ext *page_ext)
{
        if (unlikely(!page_ext))
                return;

        rcu_read_unlock();
}


































































































    5 






















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Definitions for the 'struct skb_array' datastructure.
 *
 *        Author:
 *                Michael S. Tsirkin <mst@redhat.com>
 *
 *        Copyright (C) 2016 Red Hat, Inc.
 *
 *        Limited-size FIFO of skbs. Can be used more or less whenever
 *        sk_buff_head can be used, except you need to know the queue size in
 *        advance.
 *        Implemented as a type-safe wrapper around ptr_ring.
 */

#ifndef _LINUX_SKB_ARRAY_H
#define _LINUX_SKB_ARRAY_H 1

#ifdef __KERNEL__
#include <linux/ptr_ring.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
#endif

struct skb_array {
        struct ptr_ring ring;
};

/* Might be slightly faster than skb_array_full below, but callers invoking
 * this in a loop must use a compiler barrier, for example cpu_relax().
 */
static inline bool __skb_array_full(struct skb_array *a)
{
        return __ptr_ring_full(&a->ring);
}

static inline bool skb_array_full(struct skb_array *a)
{
        return ptr_ring_full(&a->ring);
}

static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb)
{
        return ptr_ring_produce(&a->ring, skb);
}

static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb)
{
        return ptr_ring_produce_irq(&a->ring, skb);
}

static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb)
{
        return ptr_ring_produce_bh(&a->ring, skb);
}

static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb)
{
        return ptr_ring_produce_any(&a->ring, skb);
}

/* Might be slightly faster than skb_array_empty below, but only safe if the
 * array is never resized. Also, callers invoking this in a loop must take care
 * to use a compiler barrier, for example cpu_relax().
 */
static inline bool __skb_array_empty(struct skb_array *a)
{
        return __ptr_ring_empty(&a->ring);
}

static inline struct sk_buff *__skb_array_peek(struct skb_array *a)
{
        return __ptr_ring_peek(&a->ring);
}

static inline bool skb_array_empty(struct skb_array *a)
{
        return ptr_ring_empty(&a->ring);
}

static inline bool skb_array_empty_bh(struct skb_array *a)
{
        return ptr_ring_empty_bh(&a->ring);
}

static inline bool skb_array_empty_irq(struct skb_array *a)
{
        return ptr_ring_empty_irq(&a->ring);
}

static inline bool skb_array_empty_any(struct skb_array *a)
{
        return ptr_ring_empty_any(&a->ring);
}

static inline struct sk_buff *__skb_array_consume(struct skb_array *a)
{
        return __ptr_ring_consume(&a->ring);
}

static inline struct sk_buff *skb_array_consume(struct skb_array *a)
{
        return ptr_ring_consume(&a->ring);
}

static inline int skb_array_consume_batched(struct skb_array *a,
                                            struct sk_buff **array, int n)
{
        return ptr_ring_consume_batched(&a->ring, (void **)array, n);
}

static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a)
{
        return ptr_ring_consume_irq(&a->ring);
}

static inline int skb_array_consume_batched_irq(struct skb_array *a,
                                                struct sk_buff **array, int n)
{
        return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n);
}

static inline struct sk_buff *skb_array_consume_any(struct skb_array *a)
{
        return ptr_ring_consume_any(&a->ring);
}

static inline int skb_array_consume_batched_any(struct skb_array *a,
                                                struct sk_buff **array, int n)
{
        return ptr_ring_consume_batched_any(&a->ring, (void **)array, n);
}


static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a)
{
        return ptr_ring_consume_bh(&a->ring);
}

static inline int skb_array_consume_batched_bh(struct skb_array *a,
                                               struct sk_buff **array, int n)
{
        return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n);
}

static inline int __skb_array_len_with_tag(struct sk_buff *skb)
{
        if (likely(skb)) {
                int len = skb->len;

                if (skb_vlan_tag_present(skb))
                        len += VLAN_HLEN;

                return len;
        } else {
                return 0;
        }
}

static inline int skb_array_peek_len(struct skb_array *a)
{
        return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag);
}

static inline int skb_array_peek_len_irq(struct skb_array *a)
{
        return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag);
}

static inline int skb_array_peek_len_bh(struct skb_array *a)
{
        return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag);
}

static inline int skb_array_peek_len_any(struct skb_array *a)
{
        return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
}

static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
{
        return ptr_ring_init(&a->ring, size, gfp);
}

static void __skb_array_destroy_skb(void *ptr)
{
        kfree_skb(ptr);
}

static inline void skb_array_unconsume(struct skb_array *a,
                                       struct sk_buff **skbs, int n)
{
        ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb);
}

static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
{
        return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
}

static inline int skb_array_resize_multiple(struct skb_array **rings,
                                            int nrings, unsigned int size,
                                            gfp_t gfp)
{
        BUILD_BUG_ON(offsetof(struct skb_array, ring));
        return ptr_ring_resize_multiple((struct ptr_ring **)rings,
                                        nrings, size, gfp,
                                        __skb_array_destroy_skb);
}

static inline void skb_array_cleanup(struct skb_array *a)
{
        ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb);
}

#endif /* _LINUX_SKB_ARRAY_H  */


















































































































































































































































































































































































































































































































































































    1 



    1 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
// SPDX-License-Identifier: GPL-2.0-only
/*
 * keyspan_remote: USB driver for the Keyspan DMR
 *
 * Copyright (C) 2005 Zymeta Corporation - Michael Downey (downey@zymeta.com)
 *
 * This driver has been put together with the support of Innosys, Inc.
 * and Keyspan, Inc the manufacturers of the Keyspan USB DMR product.
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb/input.h>

/* Parameters that can be passed to the driver. */
static int debug;
module_param(debug, int, 0444);
MODULE_PARM_DESC(debug, "Enable extra debug messages and information");

/* Vendor and product ids */
#define USB_KEYSPAN_VENDOR_ID                0x06CD
#define USB_KEYSPAN_PRODUCT_UIA11        0x0202

/* Defines for converting the data from the remote. */
#define ZERO                0x18
#define ZERO_MASK        0x1F        /* 5 bits for a 0 */
#define ONE                0x3C
#define ONE_MASK        0x3F        /* 6 bits for a 1 */
#define SYNC                0x3F80
#define SYNC_MASK        0x3FFF        /* 14 bits for a SYNC sequence */
#define STOP                0x00
#define STOP_MASK        0x1F        /* 5 bits for the STOP sequence */
#define GAP                0xFF

#define RECV_SIZE        8        /* The UIA-11 type have a 8 byte limit. */

/*
 * Table that maps the 31 possible keycodes to input keys.
 * Currently there are 15 and 17 button models so RESERVED codes
 * are blank areas in the mapping.
 */
static const unsigned short keyspan_key_table[] = {
        KEY_RESERVED,                /* 0 is just a place holder. */
        KEY_RESERVED,
        KEY_STOP,
        KEY_PLAYCD,
        KEY_RESERVED,
        KEY_PREVIOUSSONG,
        KEY_REWIND,
        KEY_FORWARD,
        KEY_NEXTSONG,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_PAUSE,
        KEY_VOLUMEUP,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_VOLUMEDOWN,
        KEY_RESERVED,
        KEY_UP,
        KEY_RESERVED,
        KEY_MUTE,
        KEY_LEFT,
        KEY_ENTER,
        KEY_RIGHT,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_DOWN,
        KEY_RESERVED,
        KEY_KPASTERISK,
        KEY_RESERVED,
        KEY_MENU
};

/* table of devices that work with this driver */
static const struct usb_device_id keyspan_table[] = {
        { USB_DEVICE(USB_KEYSPAN_VENDOR_ID, USB_KEYSPAN_PRODUCT_UIA11) },
        { }                                        /* Terminating entry */
};

/* Structure to store all the real stuff that a remote sends to us. */
struct keyspan_message {
        u16        system;
        u8        button;
        u8        toggle;
};

/* Structure used for all the bit testing magic needed to be done. */
struct bit_tester {
        u32        tester;
        int        len;
        int        pos;
        int        bits_left;
        u8        buffer[32];
};

/* Structure to hold all of our driver specific stuff */
struct usb_keyspan {
        char                                name[128];
        char                                phys[64];
        unsigned short                        keymap[ARRAY_SIZE(keyspan_key_table)];
        struct usb_device                *udev;
        struct input_dev                *input;
        struct usb_interface                *interface;
        struct usb_endpoint_descriptor        *in_endpoint;
        struct urb*                        irq_urb;
        int                                open;
        dma_addr_t                        in_dma;
        unsigned char                        *in_buffer;

        /* variables used to parse messages from remote. */
        struct bit_tester                data;
        int                                stage;
        int                                toggle;
};

static struct usb_driver keyspan_driver;

/*
 * Debug routine that prints out what we've received from the remote.
 */
static void keyspan_print(struct usb_keyspan* dev) /*unsigned char* data)*/
{
        char codes[4 * RECV_SIZE];
        int i;

        for (i = 0; i < RECV_SIZE; i++)
                snprintf(codes + i * 3, 4, "%02x ", dev->in_buffer[i]);

        dev_info(&dev->udev->dev, "%s\n", codes);
}

/*
 * Routine that manages the bit_tester structure.  It makes sure that there are
 * at least bits_needed bits loaded into the tester.
 */
static int keyspan_load_tester(struct usb_keyspan* dev, int bits_needed)
{
        if (dev->data.bits_left >= bits_needed)
                return 0;

        /*
         * Somehow we've missed the last message. The message will be repeated
         * though so it's not too big a deal
         */
        if (dev->data.pos >= dev->data.len) {
                dev_dbg(&dev->interface->dev,
                        "%s - Error ran out of data. pos: %d, len: %d\n",
                        __func__, dev->data.pos, dev->data.len);
                return -1;
        }

        /* Load as much as we can into the tester. */
        while ((dev->data.bits_left + 7 < (sizeof(dev->data.tester) * 8)) &&
               (dev->data.pos < dev->data.len)) {
                dev->data.tester += (dev->data.buffer[dev->data.pos++] << dev->data.bits_left);
                dev->data.bits_left += 8;
        }

        return 0;
}

static void keyspan_report_button(struct usb_keyspan *remote, int button, int press)
{
        struct input_dev *input = remote->input;

        input_event(input, EV_MSC, MSC_SCAN, button);
        input_report_key(input, remote->keymap[button], press);
        input_sync(input);
}

/*
 * Routine that handles all the logic needed to parse out the message from the remote.
 */
static void keyspan_check_data(struct usb_keyspan *remote)
{
        int i;
        int found = 0;
        struct keyspan_message message;

        switch(remote->stage) {
        case 0:
                /*
                 * In stage 0 we want to find the start of a message.  The remote sends a 0xFF as filler.
                 * So the first byte that isn't a FF should be the start of a new message.
                 */
                for (i = 0; i < RECV_SIZE && remote->in_buffer[i] == GAP; ++i);

                if (i < RECV_SIZE) {
                        memcpy(remote->data.buffer, remote->in_buffer, RECV_SIZE);
                        remote->data.len = RECV_SIZE;
                        remote->data.pos = 0;
                        remote->data.tester = 0;
                        remote->data.bits_left = 0;
                        remote->stage = 1;
                }
                break;

        case 1:
                /*
                 * Stage 1 we should have 16 bytes and should be able to detect a
                 * SYNC.  The SYNC is 14 bits, 7 0's and then 7 1's.
                 */
                memcpy(remote->data.buffer + remote->data.len, remote->in_buffer, RECV_SIZE);
                remote->data.len += RECV_SIZE;

                found = 0;
                while ((remote->data.bits_left >= 14 || remote->data.pos < remote->data.len) && !found) {
                        for (i = 0; i < 8; ++i) {
                                if (keyspan_load_tester(remote, 14) != 0) {
                                        remote->stage = 0;
                                        return;
                                }

                                if ((remote->data.tester & SYNC_MASK) == SYNC) {
                                        remote->data.tester = remote->data.tester >> 14;
                                        remote->data.bits_left -= 14;
                                        found = 1;
                                        break;
                                } else {
                                        remote->data.tester = remote->data.tester >> 1;
                                        --remote->data.bits_left;
                                }
                        }
                }

                if (!found) {
                        remote->stage = 0;
                        remote->data.len = 0;
                } else {
                        remote->stage = 2;
                }
                break;

        case 2:
                /*
                 * Stage 2 we should have 24 bytes which will be enough for a full
                 * message.  We need to parse out the system code, button code,
                 * toggle code, and stop.
                 */
                memcpy(remote->data.buffer + remote->data.len, remote->in_buffer, RECV_SIZE);
                remote->data.len += RECV_SIZE;

                message.system = 0;
                for (i = 0; i < 9; i++) {
                        keyspan_load_tester(remote, 6);

                        if ((remote->data.tester & ZERO_MASK) == ZERO) {
                                message.system = message.system << 1;
                                remote->data.tester = remote->data.tester >> 5;
                                remote->data.bits_left -= 5;
                        } else if ((remote->data.tester & ONE_MASK) == ONE) {
                                message.system = (message.system << 1) + 1;
                                remote->data.tester = remote->data.tester >> 6;
                                remote->data.bits_left -= 6;
                        } else {
                                dev_err(&remote->interface->dev,
                                        "%s - Unknown sequence found in system data.\n",
                                        __func__);
                                remote->stage = 0;
                                return;
                        }
                }

                message.button = 0;
                for (i = 0; i < 5; i++) {
                        keyspan_load_tester(remote, 6);

                        if ((remote->data.tester & ZERO_MASK) == ZERO) {
                                message.button = message.button << 1;
                                remote->data.tester = remote->data.tester >> 5;
                                remote->data.bits_left -= 5;
                        } else if ((remote->data.tester & ONE_MASK) == ONE) {
                                message.button = (message.button << 1) + 1;
                                remote->data.tester = remote->data.tester >> 6;
                                remote->data.bits_left -= 6;
                        } else {
                                dev_err(&remote->interface->dev,
                                        "%s - Unknown sequence found in button data.\n",
                                        __func__);
                                remote->stage = 0;
                                return;
                        }
                }

                keyspan_load_tester(remote, 6);
                if ((remote->data.tester & ZERO_MASK) == ZERO) {
                        message.toggle = 0;
                        remote->data.tester = remote->data.tester >> 5;
                        remote->data.bits_left -= 5;
                } else if ((remote->data.tester & ONE_MASK) == ONE) {
                        message.toggle = 1;
                        remote->data.tester = remote->data.tester >> 6;
                        remote->data.bits_left -= 6;
                } else {
                        dev_err(&remote->interface->dev,
                                "%s - Error in message, invalid toggle.\n",
                                __func__);
                        remote->stage = 0;
                        return;
                }

                keyspan_load_tester(remote, 5);
                if ((remote->data.tester & STOP_MASK) == STOP) {
                        remote->data.tester = remote->data.tester >> 5;
                        remote->data.bits_left -= 5;
                } else {
                        dev_err(&remote->interface->dev,
                                "Bad message received, no stop bit found.\n");
                }

                dev_dbg(&remote->interface->dev,
                        "%s found valid message: system: %d, button: %d, toggle: %d\n",
                        __func__, message.system, message.button, message.toggle);

                if (message.toggle != remote->toggle) {
                        keyspan_report_button(remote, message.button, 1);
                        keyspan_report_button(remote, message.button, 0);
                        remote->toggle = message.toggle;
                }

                remote->stage = 0;
                break;
        }
}

/*
 * Routine for sending all the initialization messages to the remote.
 */
static int keyspan_setup(struct usb_device* dev)
{
        int retval = 0;

        retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                                 0x11, 0x40, 0x5601, 0x0, NULL, 0,
                                 USB_CTRL_SET_TIMEOUT);
        if (retval) {
                dev_dbg(&dev->dev, "%s - failed to set bit rate due to error: %d\n",
                        __func__, retval);
                return(retval);
        }

        retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                                 0x44, 0x40, 0x0, 0x0, NULL, 0,
                                 USB_CTRL_SET_TIMEOUT);
        if (retval) {
                dev_dbg(&dev->dev, "%s - failed to set resume sensitivity due to error: %d\n",
                        __func__, retval);
                return(retval);
        }

        retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                                 0x22, 0x40, 0x0, 0x0, NULL, 0,
                                 USB_CTRL_SET_TIMEOUT);
        if (retval) {
                dev_dbg(&dev->dev, "%s - failed to turn receive on due to error: %d\n",
                        __func__, retval);
                return(retval);
        }

        dev_dbg(&dev->dev, "%s - Setup complete.\n", __func__);
        return(retval);
}

/*
 * Routine used to handle a new message that has come in.
 */
static void keyspan_irq_recv(struct urb *urb)
{
        struct usb_keyspan *dev = urb->context;
        int retval;

        /* Check our status in case we need to bail out early. */
        switch (urb->status) {
        case 0:
                break;

        /* Device went away so don't keep trying to read from it. */
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                return;

        default:
                goto resubmit;
        }

        if (debug)
                keyspan_print(dev);

        keyspan_check_data(dev);

resubmit:
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(&dev->interface->dev,
                        "%s - usb_submit_urb failed with result: %d\n",
                        __func__, retval);
}

static int keyspan_open(struct input_dev *dev)
{
        struct usb_keyspan *remote = input_get_drvdata(dev);

        remote->irq_urb->dev = remote->udev;
        if (usb_submit_urb(remote->irq_urb, GFP_KERNEL))
                return -EIO;

        return 0;
}

static void keyspan_close(struct input_dev *dev)
{
        struct usb_keyspan *remote = input_get_drvdata(dev);

        usb_kill_urb(remote->irq_urb);
}

static struct usb_endpoint_descriptor *keyspan_get_in_endpoint(struct usb_host_interface *iface)
{

        struct usb_endpoint_descriptor *endpoint;
        int i;

        for (i = 0; i < iface->desc.bNumEndpoints; ++i) {
                endpoint = &iface->endpoint[i].desc;

                if (usb_endpoint_is_int_in(endpoint)) {
                        /* we found our interrupt in endpoint */
                        return endpoint;
                }
        }

        return NULL;
}

/*
 * Routine that sets up the driver to handle a specific USB device detected on the bus.
 */
static int keyspan_probe(struct usb_interface *interface, const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_endpoint_descriptor *endpoint;
        struct usb_keyspan *remote;
        struct input_dev *input_dev;
        int i, error;

        endpoint = keyspan_get_in_endpoint(interface->cur_altsetting);
        if (!endpoint)
                return -ENODEV;

        remote = kzalloc(sizeof(*remote), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!remote || !input_dev) {
                error = -ENOMEM;
                goto fail1;
        }

        remote->udev = udev;
        remote->input = input_dev;
        remote->interface = interface;
        remote->in_endpoint = endpoint;
        remote->toggle = -1;        /* Set to -1 so we will always not match the toggle from the first remote message. */

        remote->in_buffer = usb_alloc_coherent(udev, RECV_SIZE, GFP_KERNEL, &remote->in_dma);
        if (!remote->in_buffer) {
                error = -ENOMEM;
                goto fail1;
        }

        remote->irq_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!remote->irq_urb) {
                error = -ENOMEM;
                goto fail2;
        }

        error = keyspan_setup(udev);
        if (error) {
                error = -ENODEV;
                goto fail3;
        }

        if (udev->manufacturer)
                strscpy(remote->name, udev->manufacturer, sizeof(remote->name));

        if (udev->product) {
                if (udev->manufacturer)
                        strlcat(remote->name, " ", sizeof(remote->name));
                strlcat(remote->name, udev->product, sizeof(remote->name));
        }

        if (!strlen(remote->name))
                snprintf(remote->name, sizeof(remote->name),
                         "USB Keyspan Remote %04x:%04x",
                         le16_to_cpu(udev->descriptor.idVendor),
                         le16_to_cpu(udev->descriptor.idProduct));

        usb_make_path(udev, remote->phys, sizeof(remote->phys));
        strlcat(remote->phys, "/input0", sizeof(remote->phys));
        memcpy(remote->keymap, keyspan_key_table, sizeof(remote->keymap));

        input_dev->name = remote->name;
        input_dev->phys = remote->phys;
        usb_to_input_id(udev, &input_dev->id);
        input_dev->dev.parent = &interface->dev;
        input_dev->keycode = remote->keymap;
        input_dev->keycodesize = sizeof(unsigned short);
        input_dev->keycodemax = ARRAY_SIZE(remote->keymap);

        input_set_capability(input_dev, EV_MSC, MSC_SCAN);
        __set_bit(EV_KEY, input_dev->evbit);
        for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++)
                __set_bit(keyspan_key_table[i], input_dev->keybit);
        __clear_bit(KEY_RESERVED, input_dev->keybit);

        input_set_drvdata(input_dev, remote);

        input_dev->open = keyspan_open;
        input_dev->close = keyspan_close;

        /*
         * Initialize the URB to access the device.
         * The urb gets sent to the device in keyspan_open()
         */
        usb_fill_int_urb(remote->irq_urb,
                         remote->udev,
                         usb_rcvintpipe(remote->udev, endpoint->bEndpointAddress),
                         remote->in_buffer, RECV_SIZE, keyspan_irq_recv, remote,
                         endpoint->bInterval);
        remote->irq_urb->transfer_dma = remote->in_dma;
        remote->irq_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        /* we can register the device now, as it is ready */
        error = input_register_device(remote->input);
        if (error)
                goto fail3;

        /* save our data pointer in this interface device */
        usb_set_intfdata(interface, remote);

        return 0;

 fail3:        usb_free_urb(remote->irq_urb);
 fail2:        usb_free_coherent(udev, RECV_SIZE, remote->in_buffer, remote->in_dma);
 fail1:        kfree(remote);
        input_free_device(input_dev);

        return error;
}

/*
 * Routine called when a device is disconnected from the USB.
 */
static void keyspan_disconnect(struct usb_interface *interface)
{
        struct usb_keyspan *remote;

        remote = usb_get_intfdata(interface);
        usb_set_intfdata(interface, NULL);

        if (remote) {        /* We have a valid driver structure so clean up everything we allocated. */
                input_unregister_device(remote->input);
                usb_kill_urb(remote->irq_urb);
                usb_free_urb(remote->irq_urb);
                usb_free_coherent(remote->udev, RECV_SIZE, remote->in_buffer, remote->in_dma);
                kfree(remote);
        }
}

/*
 * Standard driver set up sections
 */
static struct usb_driver keyspan_driver =
{
        .name =                "keyspan_remote",
        .probe =        keyspan_probe,
        .disconnect =        keyspan_disconnect,
        .id_table =        keyspan_table
};

module_usb_driver(keyspan_driver);

MODULE_DEVICE_TABLE(usb, keyspan_table);
MODULE_AUTHOR("Michael Downey <downey@zymeta.com>");
MODULE_DESCRIPTION("Driver for the USB Keyspan remote control.");
MODULE_LICENSE("GPL");






































































































    1 







   12 
   11 
   14 
    1 















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* fs/ internal definitions
 *
 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

struct super_block;
struct file_system_type;
struct iomap;
struct iomap_ops;
struct linux_binprm;
struct path;
struct mount;
struct shrink_control;
struct fs_context;
struct pipe_inode_info;
struct iov_iter;
struct mnt_idmap;

/*
 * block/bdev.c
 */
#ifdef CONFIG_BLOCK
extern void __init bdev_cache_init(void);
#else
static inline void bdev_cache_init(void)
{
}
#endif /* CONFIG_BLOCK */

/*
 * buffer.c
 */
int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
                get_block_t *get_block, const struct iomap *iomap);

/*
 * char_dev.c
 */
extern void __init chrdev_init(void);

/*
 * fs_context.c
 */
extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);

/*
 * namei.c
 */
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
                           struct path *path, struct path *root);
int do_rmdir(int dfd, struct filename *name);
int do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct mnt_idmap *idmap, const struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
                 struct filename *newname, unsigned int flags);
int do_mkdirat(int dfd, struct filename *name, umode_t mode);
int do_symlinkat(struct filename *from, int newdfd, struct filename *to);
int do_linkat(int olddfd, struct filename *old, int newdfd,
                        struct filename *new, int flags);

/*
 * namespace.c
 */
extern struct vfsmount *lookup_mnt(const struct path *);
extern int finish_automount(struct vfsmount *, const struct path *);

extern int sb_prepare_remount_readonly(struct super_block *);

extern void __init mnt_init(void);

int mnt_get_write_access_file(struct file *file);
void mnt_put_write_access_file(struct file *file);

extern void dissolve_on_fput(struct vfsmount *);
extern bool may_mount(void);

int path_mount(const char *dev_name, struct path *path,
                const char *type_page, unsigned long flags, void *data_page);
int path_umount(struct path *path, int flags);

int show_path(struct seq_file *m, struct dentry *root);

/*
 * fs_struct.c
 */
extern void chroot_fs_refs(const struct path *, const struct path *);

/*
 * file_table.c
 */
struct file *alloc_empty_file(int flags, const struct cred *cred);
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);

static inline void file_put_write_access(struct file *file)
{
        put_write_access(file->f_inode);
        mnt_put_write_access(file->f_path.mnt);
        if (unlikely(file->f_mode & FMODE_BACKING))
                mnt_put_write_access(backing_file_user_path(file)->mnt);
}

static inline void put_file_access(struct file *file)
{
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
                i_readcount_dec(file->f_inode);
        } else if (file->f_mode & FMODE_WRITER) {
                file_put_write_access(file);
        }
}

/*
 * super.c
 */
extern int reconfigure_super(struct fs_context *);
extern bool super_trylock_shared(struct super_block *sb);
struct super_block *user_get_super(dev_t, bool excl);
void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *);
int sb_init_dio_done_wq(struct super_block *sb);

/*
 * Prepare superblock for changing its read-only state (i.e., either remount
 * read-write superblock read-only or vice versa). After this function returns
 * mnt_is_readonly() will return true for any mount of the superblock if its
 * caller is able to observe any changes done by the remount. This holds until
 * sb_end_ro_state_change() is called.
 */
static inline void sb_start_ro_state_change(struct super_block *sb)
{
        WRITE_ONCE(sb->s_readonly_remount, 1);
        /*
         * For RO->RW transition, the barrier pairs with the barrier in
         * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
         * cleared, it will see s_readonly_remount set.
         * For RW->RO transition, the barrier pairs with the barrier in
         * mnt_get_write_access() before the mnt_is_readonly() check.
         * The barrier makes sure if mnt_get_write_access() sees MNT_WRITE_HOLD
         * already cleared, it will see s_readonly_remount set.
         */
        smp_wmb();
}

/*
 * Ends section changing read-only state of the superblock. After this function
 * returns if mnt_is_readonly() returns false, the caller will be able to
 * observe all the changes remount did to the superblock.
 */
static inline void sb_end_ro_state_change(struct super_block *sb)
{
        /*
         * This barrier provides release semantics that pairs with
         * the smp_rmb() acquire semantics in mnt_is_readonly().
         * This barrier pair ensure that when mnt_is_readonly() sees
         * 0 for sb->s_readonly_remount, it will also see all the
         * preceding flag changes that were made during the RO state
         * change.
         */
        smp_wmb();
        WRITE_ONCE(sb->s_readonly_remount, 0);
}

/*
 * open.c
 */
struct open_flags {
        int open_flag;
        umode_t mode;
        int acc_mode;
        int intent;
        int lookup_flags;
};
extern struct file *do_filp_open(int dfd, struct filename *pathname,
                const struct open_flags *op);
extern struct file *do_file_open_root(const struct path *,
                const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);

long do_ftruncate(struct file *file, loff_t length, int small);
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
                int flag);
int chown_common(const struct path *path, uid_t user, gid_t group);
extern int vfs_open(const struct path *, struct file *);

/*
 * inode.c
 */
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
bool in_group_or_capable(struct mnt_idmap *idmap,
                         const struct inode *inode, vfsgid_t vfsgid);

/*
 * fs-writeback.c
 */
extern long get_nr_dirty_inodes(void);
void invalidate_inodes(struct super_block *sb);

/*
 * dcache.c
 */
extern int d_set_mounted(struct dentry *dentry);
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
extern struct dentry *d_alloc_cursor(struct dentry *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern char *simple_dname(struct dentry *, char *, int);
extern void dput_to_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *);
extern void shrink_dcache_for_umount(struct super_block *);
extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
                                const struct qstr *name, unsigned *seq);
extern void d_genocide(struct dentry *);

/*
 * pipe.c
 */
extern const struct file_operations pipefifo_fops;

/*
 * fs_pin.c
 */
extern void group_pin_kill(struct hlist_head *p);
extern void mnt_pin_kill(struct mount *m);

/*
 * fs/nsfs.c
 */
extern const struct dentry_operations ns_dentry_operations;

/*
 * fs/stat.c:
 */

int getname_statx_lookup_flags(int flags);
int do_statx(int dfd, struct filename *filename, unsigned int flags,
             unsigned int mask, struct statx __user *buffer);

/*
 * fs/splice.c:
 */
ssize_t splice_file_to_pipe(struct file *in,
                            struct pipe_inode_info *opipe,
                            loff_t *offset,
                            size_t len, unsigned int flags);

/*
 * fs/xattr.c:
 */
struct xattr_name {
        char name[XATTR_NAME_MAX + 1];
};

struct xattr_ctx {
        /* Value of attribute */
        union {
                const void __user *cvalue;
                void __user *value;
        };
        void *kvalue;
        size_t size;
        /* Attribute name */
        struct xattr_name *kname;
        unsigned int flags;
};


ssize_t do_getxattr(struct mnt_idmap *idmap,
                    struct dentry *d,
                    struct xattr_ctx *ctx);

int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
                struct xattr_ctx *ctx);
int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode);

#ifdef CONFIG_FS_POSIX_ACL
int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
               const char *acl_name, const void *kvalue, size_t size);
ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                   const char *acl_name, void *kvalue, size_t size);
#else
static inline int do_set_acl(struct mnt_idmap *idmap,
                             struct dentry *dentry, const char *acl_name,
                             const void *kvalue, size_t size)
{
        return -EOPNOTSUPP;
}
static inline ssize_t do_get_acl(struct mnt_idmap *idmap,
                                 struct dentry *dentry, const char *acl_name,
                                 void *kvalue, size_t size)
{
        return -EOPNOTSUPP;
}
#endif

ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);

/*
 * fs/attr.c
 */
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
void mnt_idmap_put(struct mnt_idmap *idmap);
struct stashed_operations {
        void (*put_data)(void *data);
        int (*init_inode)(struct inode *inode, void *data);
};
int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
                      struct path *path);
void stashed_dentry_prune(struct dentry *dentry);































































































































































































































































































































































































































































































    1 

    1 




    1 















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
// SPDX-License-Identifier: GPL-2.0
/* Sysctl interface for parport devices.
 * 
 * Authors: David Campbell
 *          Tim Waugh <tim@cyberelk.demon.co.uk>
 *          Philip Blundell <philb@gnu.org>
 *          Andrea Arcangeli
 *          Riccardo Facchetti <fizban@tin.it>
 *
 * based on work by Grant Guenther <grant@torque.net>
 *              and Philip Blundell
 *
 * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
 */

#include <linux/string.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/parport.h>
#include <linux/ctype.h>
#include <linux/sysctl.h>
#include <linux/device.h>

#include <linux/uaccess.h>

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)

#define PARPORT_MIN_TIMESLICE_VALUE 1ul 
#define PARPORT_MAX_TIMESLICE_VALUE ((unsigned long) HZ)
#define PARPORT_MIN_SPINTIME_VALUE 1
#define PARPORT_MAX_SPINTIME_VALUE 1000

static int do_active_device(struct ctl_table *table, int write,
                      void *result, size_t *lenp, loff_t *ppos)
{
        struct parport *port = (struct parport *)table->extra1;
        char buffer[256];
        struct pardevice *dev;
        int len = 0;

        if (write)                /* can't happen anyway */
                return -EACCES;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }
        
        for (dev = port->devices; dev ; dev = dev->next) {
                if(dev == port->cad) {
                        len += sprintf(buffer, "%s\n", dev->name);
                }
        }

        if(!len) {
                len += sprintf(buffer, "%s\n", "none");
        }

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;
        memcpy(result, buffer, len);
        return 0;
}

#ifdef CONFIG_PARPORT_1284
static int do_autoprobe(struct ctl_table *table, int write,
                        void *result, size_t *lenp, loff_t *ppos)
{
        struct parport_device_info *info = table->extra2;
        const char *str;
        char buffer[256];
        int len = 0;

        if (write) /* permissions stop this */
                return -EACCES;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }
        
        if ((str = info->class_name) != NULL)
                len += sprintf (buffer + len, "CLASS:%s;\n", str);

        if ((str = info->model) != NULL)
                len += sprintf (buffer + len, "MODEL:%s;\n", str);

        if ((str = info->mfr) != NULL)
                len += sprintf (buffer + len, "MANUFACTURER:%s;\n", str);

        if ((str = info->description) != NULL)
                len += sprintf (buffer + len, "DESCRIPTION:%s;\n", str);

        if ((str = info->cmdset) != NULL)
                len += sprintf (buffer + len, "COMMAND SET:%s;\n", str);

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;

        memcpy(result, buffer, len);
        return 0;
}
#endif /* IEEE1284.3 support. */

static int do_hardware_base_addr(struct ctl_table *table, int write,
                                 void *result, size_t *lenp, loff_t *ppos)
{
        struct parport *port = (struct parport *)table->extra1;
        char buffer[20];
        int len = 0;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }

        if (write) /* permissions prevent this anyway */
                return -EACCES;

        len += sprintf (buffer, "%lu\t%lu\n", port->base, port->base_hi);

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;
        memcpy(result, buffer, len);
        return 0;
}

static int do_hardware_irq(struct ctl_table *table, int write,
                           void *result, size_t *lenp, loff_t *ppos)
{
        struct parport *port = (struct parport *)table->extra1;
        char buffer[20];
        int len = 0;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }

        if (write) /* permissions prevent this anyway */
                return -EACCES;

        len += sprintf (buffer, "%d\n", port->irq);

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;
        memcpy(result, buffer, len);
        return 0;
}

static int do_hardware_dma(struct ctl_table *table, int write,
                           void *result, size_t *lenp, loff_t *ppos)
{
        struct parport *port = (struct parport *)table->extra1;
        char buffer[20];
        int len = 0;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }

        if (write) /* permissions prevent this anyway */
                return -EACCES;

        len += sprintf (buffer, "%d\n", port->dma);

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;
        memcpy(result, buffer, len);
        return 0;
}

static int do_hardware_modes(struct ctl_table *table, int write,
                             void *result, size_t *lenp, loff_t *ppos)
{
        struct parport *port = (struct parport *)table->extra1;
        char buffer[40];
        int len = 0;

        if (*ppos) {
                *lenp = 0;
                return 0;
        }

        if (write) /* permissions prevent this anyway */
                return -EACCES;

        {
#define printmode(x)                                                        \
do {                                                                        \
        if (port->modes & PARPORT_MODE_##x)                                \
                len += sprintf(buffer + len, "%s%s", f++ ? "," : "", #x); \
} while (0)
                int f = 0;
                printmode(PCSPP);
                printmode(TRISTATE);
                printmode(COMPAT);
                printmode(EPP);
                printmode(ECP);
                printmode(DMA);
#undef printmode
        }
        buffer[len++] = '\n';

        if (len > *lenp)
                len = *lenp;
        else
                *lenp = len;

        *ppos += len;
        memcpy(result, buffer, len);
        return 0;
}

static const unsigned long parport_min_timeslice_value =
PARPORT_MIN_TIMESLICE_VALUE;

static const unsigned long parport_max_timeslice_value =
PARPORT_MAX_TIMESLICE_VALUE;

static const  int parport_min_spintime_value =
PARPORT_MIN_SPINTIME_VALUE;

static const int parport_max_spintime_value =
PARPORT_MAX_SPINTIME_VALUE;


struct parport_sysctl_table {
        struct ctl_table_header *port_header;
        struct ctl_table_header *devices_header;
#ifdef CONFIG_PARPORT_1284
        struct ctl_table vars[10];
#else
        struct ctl_table vars[5];
#endif /* IEEE 1284 support */
        struct ctl_table device_dir[1];
};

static const struct parport_sysctl_table parport_sysctl_template = {
        .port_header = NULL,
        .devices_header = NULL,
        {
                {
                        .procname        = "spintime",
                        .data                = NULL,
                        .maxlen                = sizeof(int),
                        .mode                = 0644,
                        .proc_handler        = proc_dointvec_minmax,
                        .extra1                = (void*) &parport_min_spintime_value,
                        .extra2                = (void*) &parport_max_spintime_value
                },
                {
                        .procname        = "base-addr",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_hardware_base_addr
                },
                {
                        .procname        = "irq",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_hardware_irq
                },
                {
                        .procname        = "dma",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_hardware_dma
                },
                {
                        .procname        = "modes",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_hardware_modes
                },
#ifdef CONFIG_PARPORT_1284
                {
                        .procname        = "autoprobe",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_autoprobe
                },
                {
                        .procname        = "autoprobe0",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_autoprobe
                },
                {
                        .procname        = "autoprobe1",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_autoprobe
                },
                {
                        .procname        = "autoprobe2",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_autoprobe
                },
                {
                        .procname        = "autoprobe3",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_autoprobe
                },
#endif /* IEEE 1284 support */
        },
        {
                {
                        .procname        = "active",
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0444,
                        .proc_handler        = do_active_device
                },
        },
};

struct parport_device_sysctl_table
{
        struct ctl_table_header *sysctl_header;
        struct ctl_table vars[1];
        struct ctl_table device_dir[1];
};

static const struct parport_device_sysctl_table
parport_device_sysctl_template = {
        .sysctl_header = NULL,
        {
                {
                        .procname         = "timeslice",
                        .data                = NULL,
                        .maxlen                = sizeof(unsigned long),
                        .mode                = 0644,
                        .proc_handler        = proc_doulongvec_ms_jiffies_minmax,
                        .extra1                = (void*) &parport_min_timeslice_value,
                        .extra2                = (void*) &parport_max_timeslice_value
                },
        },
        {
                {
                        .procname        = NULL,
                        .data                = NULL,
                        .maxlen                = 0,
                        .mode                = 0555,
                },
        }
};

struct parport_default_sysctl_table
{
        struct ctl_table_header *sysctl_header;
        struct ctl_table vars[2];
};

static struct parport_default_sysctl_table
parport_default_sysctl_table = {
        .sysctl_header        = NULL,
        {
                {
                        .procname        = "timeslice",
                        .data                = &parport_default_timeslice,
                        .maxlen                = sizeof(parport_default_timeslice),
                        .mode                = 0644,
                        .proc_handler        = proc_doulongvec_ms_jiffies_minmax,
                        .extra1                = (void*) &parport_min_timeslice_value,
                        .extra2                = (void*) &parport_max_timeslice_value
                },
                {
                        .procname        = "spintime",
                        .data                = &parport_default_spintime,
                        .maxlen                = sizeof(parport_default_spintime),
                        .mode                = 0644,
                        .proc_handler        = proc_dointvec_minmax,
                        .extra1                = (void*) &parport_min_spintime_value,
                        .extra2                = (void*) &parport_max_spintime_value
                },
        }
};

int parport_proc_register(struct parport *port)
{
        struct parport_sysctl_table *t;
        char *tmp_dir_path;
        int i, err = 0;

        t = kmemdup(&parport_sysctl_template, sizeof(*t), GFP_KERNEL);
        if (t == NULL)
                return -ENOMEM;

        t->device_dir[0].extra1 = port;

        t->vars[0].data = &port->spintime;
        for (i = 0; i < 5; i++) {
                t->vars[i].extra1 = port;
#ifdef CONFIG_PARPORT_1284
                t->vars[5 + i].extra2 = &port->probe_info[i];
#endif /* IEEE 1284 support */
        }

        tmp_dir_path = kasprintf(GFP_KERNEL, "dev/parport/%s/devices", port->name);
        if (!tmp_dir_path) {
                err = -ENOMEM;
                goto exit_free_t;
        }

        t->devices_header = register_sysctl(tmp_dir_path, t->device_dir);
        if (t->devices_header == NULL) {
                err = -ENOENT;
                goto  exit_free_tmp_dir_path;
        }

        kfree(tmp_dir_path);

        tmp_dir_path = kasprintf(GFP_KERNEL, "dev/parport/%s", port->name);
        if (!tmp_dir_path) {
                err = -ENOMEM;
                goto unregister_devices_h;
        }

        t->port_header = register_sysctl(tmp_dir_path, t->vars);
        if (t->port_header == NULL) {
                err = -ENOENT;
                goto unregister_devices_h;
        }

        port->sysctl_table = t;

        kfree(tmp_dir_path);
        return 0;

unregister_devices_h:
        unregister_sysctl_table(t->devices_header);

exit_free_tmp_dir_path:
        kfree(tmp_dir_path);

exit_free_t:
        kfree(t);
        return err;
}

int parport_proc_unregister(struct parport *port)
{
        if (port->sysctl_table) {
                struct parport_sysctl_table *t = port->sysctl_table;
                port->sysctl_table = NULL;
                unregister_sysctl_table(t->devices_header);
                unregister_sysctl_table(t->port_header);
                kfree(t);
        }
        return 0;
}

int parport_device_proc_register(struct pardevice *device)
{
        struct parport_device_sysctl_table *t;
        struct parport * port = device->port;
        char *tmp_dir_path;
        int err = 0;
        
        t = kmemdup(&parport_device_sysctl_template, sizeof(*t), GFP_KERNEL);
        if (t == NULL)
                return -ENOMEM;

        /* Allocate a buffer for two paths: dev/parport/PORT/devices/DEVICE. */
        tmp_dir_path = kasprintf(GFP_KERNEL, "dev/parport/%s/devices/%s", port->name, device->name);
        if (!tmp_dir_path) {
                err = -ENOMEM;
                goto exit_free_t;
        }

        t->vars[0].data = &device->timeslice;

        t->sysctl_header = register_sysctl(tmp_dir_path, t->vars);
        if (t->sysctl_header == NULL) {
                kfree(t);
                t = NULL;
        }
        device->sysctl_table = t;

        kfree(tmp_dir_path);
        return 0;

exit_free_t:
        kfree(t);

        return err;
}

int parport_device_proc_unregister(struct pardevice *device)
{
        if (device->sysctl_table) {
                struct parport_device_sysctl_table *t = device->sysctl_table;
                device->sysctl_table = NULL;
                unregister_sysctl_table(t->sysctl_header);
                kfree(t);
        }
        return 0;
}

static int __init parport_default_proc_register(void)
{
        int ret;

        parport_default_sysctl_table.sysctl_header =
                register_sysctl("dev/parport/default", parport_default_sysctl_table.vars);
        if (!parport_default_sysctl_table.sysctl_header)
                return -ENOMEM;
        ret = parport_bus_init();
        if (ret) {
                unregister_sysctl_table(parport_default_sysctl_table.
                                        sysctl_header);
                return ret;
        }
        return 0;
}

static void __exit parport_default_proc_unregister(void)
{
        if (parport_default_sysctl_table.sysctl_header) {
                unregister_sysctl_table(parport_default_sysctl_table.
                                        sysctl_header);
                parport_default_sysctl_table.sysctl_header = NULL;
        }
        parport_bus_exit();
}

#else /* no sysctl or no procfs*/

int parport_proc_register(struct parport *pp)
{
        return 0;
}

int parport_proc_unregister(struct parport *pp)
{
        return 0;
}

int parport_device_proc_register(struct pardevice *device)
{
        return 0;
}

int parport_device_proc_unregister(struct pardevice *device)
{
        return 0;
}

static int __init parport_default_proc_register (void)
{
        return parport_bus_init();
}

static void __exit parport_default_proc_unregister (void)
{
        parport_bus_exit();
}
#endif

subsys_initcall(parport_default_proc_register)
module_exit(parport_default_proc_unregister)


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    5 
















   14 
   14 








































































   15 
















    5 











































































































































































































































































































































































































   14 















































































































































































































































































































































































    5 











    5 






















    5 




















































































































































































































































   14 


















































































































































































































































































































































































































    1 

    1 



    1 




    1 





    1 









    1 



















   14 












   14 


   14 
   14 


































    1 

    1 
    1 

    1 
    1 





























   14 

























































































































































































































































































    5 







    5 


























































   13 












    8 





















































































































































































































































































































































































































































































































































































































































    8 






    8 































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Definitions for the Interfaces handler.
 *
 * Version:        @(#)dev.h        1.0.10        08/12/93
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Corey Minyard <wf-rch!minyard@relay.EU.net>
 *                Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
 *                Alan Cox, <alan@lxorguk.ukuu.org.uk>
 *                Bjorn Ekwall. <bj0rn@blox.se>
 *              Pekka Riikonen <priikone@poseidon.pspt.fi>
 *
 *                Moved to /usr/include/linux for NET3
 */
#ifndef _LINUX_NETDEVICE_H
#define _LINUX_NETDEVICE_H

#include <linux/timer.h>
#include <linux/bug.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <linux/prefetch.h>
#include <asm/cache.h>
#include <asm/byteorder.h>
#include <asm/local.h>

#include <linux/percpu.h>
#include <linux/rculist.h>
#include <linux/workqueue.h>
#include <linux/dynamic_queue_limits.h>

#include <net/net_namespace.h>
#ifdef CONFIG_DCB
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>

#include <linux/netdev_features.h>
#include <linux/neighbour.h>
#include <uapi/linux/netdevice.h>
#include <uapi/linux/if_bonding.h>
#include <uapi/linux/pkt_cls.h>
#include <uapi/linux/netdev.h>
#include <linux/hashtable.h>
#include <linux/rbtree.h>
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>

struct netpoll_info;
struct device;
struct ethtool_ops;
struct kernel_hwtstamp_config;
struct phy_device;
struct dsa_port;
struct ip_tunnel_parm;
struct macsec_context;
struct macsec_ops;
struct netdev_name_node;
struct sd_flow_limit;
struct sfp_bus;
/* 802.11 specific */
struct wireless_dev;
/* 802.15.4 specific */
struct wpan_dev;
struct mpls_dev;
/* UDP Tunnel offloads */
struct udp_tunnel_info;
struct udp_tunnel_nic_info;
struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
struct xdp_frame;
struct xdp_metadata_ops;
struct xdp_md;

typedef u32 xdp_features_t;

void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
                                    const struct ethtool_ops *ops);
void netdev_sw_irq_coalesce_default_on(struct net_device *dev);

/* Backlog congestion levels */
#define NET_RX_SUCCESS                0        /* keep 'em coming, baby */
#define NET_RX_DROP                1        /* packet dropped */

#define MAX_NEST_DEV 8

/*
 * Transmit return codes: transmit return codes originate from three different
 * namespaces:
 *
 * - qdisc return codes
 * - driver transmit return codes
 * - errno values
 *
 * Drivers are allowed to return any one of those in their hard_start_xmit()
 * function. Real network devices commonly used with qdiscs should only return
 * the driver transmit return codes though - when qdiscs are used, the actual
 * transmission happens asynchronously, so the value is not propagated to
 * higher layers. Virtual network devices transmit synchronously; in this case
 * the driver transmit return codes are consumed by dev_queue_xmit(), and all
 * others are propagated to higher layers.
 */

/* qdisc ->enqueue() return codes. */
#define NET_XMIT_SUCCESS        0x00
#define NET_XMIT_DROP                0x01        /* skb dropped                        */
#define NET_XMIT_CN                0x02        /* congestion notification        */
#define NET_XMIT_MASK                0x0f        /* qdisc flags in net/sch_generic.h */

/* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
 * indicates that the device will soon be dropping packets, or already drops
 * some packets of the same priority; prompting us to send less aggressively. */
#define net_xmit_eval(e)        ((e) == NET_XMIT_CN ? 0 : (e))
#define net_xmit_errno(e)        ((e) != NET_XMIT_CN ? -ENOBUFS : 0)

/* Driver transmit return codes */
#define NETDEV_TX_MASK                0xf0

enum netdev_tx {
        __NETDEV_TX_MIN         = INT_MIN,        /* make sure enum is signed */
        NETDEV_TX_OK         = 0x00,        /* driver took care of packet */
        NETDEV_TX_BUSY         = 0x10,        /* driver tx path was busy*/
};
typedef enum netdev_tx netdev_tx_t;

/*
 * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant;
 * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed.
 */
static inline bool dev_xmit_complete(int rc)
{
        /*
         * Positive cases with an skb consumed by a driver:
         * - successful transmission (rc == NETDEV_TX_OK)
         * - error while transmitting (rc < 0)
         * - error while queueing to a different device (rc & NET_XMIT_MASK)
         */
        if (likely(rc < NET_XMIT_MASK))
                return true;

        return false;
}

/*
 *        Compute the worst-case header length according to the protocols
 *        used.
 */

#if defined(CONFIG_HYPERV_NET)
# define LL_MAX_HEADER 128
#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
# if defined(CONFIG_MAC80211_MESH)
#  define LL_MAX_HEADER 128
# else
#  define LL_MAX_HEADER 96
# endif
#else
# define LL_MAX_HEADER 32
#endif

#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
    !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
#define MAX_HEADER LL_MAX_HEADER
#else
#define MAX_HEADER (LL_MAX_HEADER + 48)
#endif

/*
 *        Old network device statistics. Fields are native words
 *        (unsigned long) so they can be read and written atomically.
 */

#define NET_DEV_STAT(FIELD)                        \
        union {                                        \
                unsigned long FIELD;                \
                atomic_long_t __##FIELD;        \
        }

struct net_device_stats {
        NET_DEV_STAT(rx_packets);
        NET_DEV_STAT(tx_packets);
        NET_DEV_STAT(rx_bytes);
        NET_DEV_STAT(tx_bytes);
        NET_DEV_STAT(rx_errors);
        NET_DEV_STAT(tx_errors);
        NET_DEV_STAT(rx_dropped);
        NET_DEV_STAT(tx_dropped);
        NET_DEV_STAT(multicast);
        NET_DEV_STAT(collisions);
        NET_DEV_STAT(rx_length_errors);
        NET_DEV_STAT(rx_over_errors);
        NET_DEV_STAT(rx_crc_errors);
        NET_DEV_STAT(rx_frame_errors);
        NET_DEV_STAT(rx_fifo_errors);
        NET_DEV_STAT(rx_missed_errors);
        NET_DEV_STAT(tx_aborted_errors);
        NET_DEV_STAT(tx_carrier_errors);
        NET_DEV_STAT(tx_fifo_errors);
        NET_DEV_STAT(tx_heartbeat_errors);
        NET_DEV_STAT(tx_window_errors);
        NET_DEV_STAT(rx_compressed);
        NET_DEV_STAT(tx_compressed);
};
#undef NET_DEV_STAT

/* per-cpu stats, allocated on demand.
 * Try to fit them in a single cache line, for dev_get_stats() sake.
 */
struct net_device_core_stats {
        unsigned long        rx_dropped;
        unsigned long        tx_dropped;
        unsigned long        rx_nohandler;
        unsigned long        rx_otherhost_dropped;
} __aligned(4 * sizeof(unsigned long));

#include <linux/cache.h>
#include <linux/skbuff.h>

struct neighbour;
struct neigh_parms;
struct sk_buff;

struct netdev_hw_addr {
        struct list_head        list;
        struct rb_node                node;
        unsigned char                addr[MAX_ADDR_LEN];
        unsigned char                type;
#define NETDEV_HW_ADDR_T_LAN                1
#define NETDEV_HW_ADDR_T_SAN                2
#define NETDEV_HW_ADDR_T_UNICAST        3
#define NETDEV_HW_ADDR_T_MULTICAST        4
        bool                        global_use;
        int                        sync_cnt;
        int                        refcount;
        int                        synced;
        struct rcu_head                rcu_head;
};

struct netdev_hw_addr_list {
        struct list_head        list;
        int                        count;

        /* Auxiliary tree for faster lookup on addition and deletion */
        struct rb_root                tree;
};

#define netdev_hw_addr_list_count(l) ((l)->count)
#define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
#define netdev_hw_addr_list_for_each(ha, l) \
        list_for_each_entry(ha, &(l)->list, list)

#define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
#define netdev_for_each_uc_addr(ha, dev) \
        netdev_hw_addr_list_for_each(ha, &(dev)->uc)
#define netdev_for_each_synced_uc_addr(_ha, _dev) \
        netdev_for_each_uc_addr((_ha), (_dev)) \
                if ((_ha)->sync_cnt)

#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
#define netdev_for_each_mc_addr(ha, dev) \
        netdev_hw_addr_list_for_each(ha, &(dev)->mc)
#define netdev_for_each_synced_mc_addr(_ha, _dev) \
        netdev_for_each_mc_addr((_ha), (_dev)) \
                if ((_ha)->sync_cnt)

struct hh_cache {
        unsigned int        hh_len;
        seqlock_t        hh_lock;

        /* cached hardware header; allow for machine alignment needs.        */
#define HH_DATA_MOD        16
#define HH_DATA_OFF(__len) \
        (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
#define HH_DATA_ALIGN(__len) \
        (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
        unsigned long        hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};

/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much.
 * Alternative is:
 *   dev->hard_header_len ? (dev->hard_header_len +
 *                           (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
 *
 * We could use other alignment values, but we must maintain the
 * relationship HH alignment <= LL alignment.
 */
#define LL_RESERVED_SPACE(dev) \
        ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \
          & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
#define LL_RESERVED_SPACE_EXTRA(dev,extra) \
        ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \
          & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)

struct header_ops {
        int        (*create) (struct sk_buff *skb, struct net_device *dev,
                           unsigned short type, const void *daddr,
                           const void *saddr, unsigned int len);
        int        (*parse)(const struct sk_buff *skb, unsigned char *haddr);
        int        (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
        void        (*cache_update)(struct hh_cache *hh,
                                const struct net_device *dev,
                                const unsigned char *haddr);
        bool        (*validate)(const char *ll_header, unsigned int len);
        __be16        (*parse_protocol)(const struct sk_buff *skb);
};

/* These flag bits are private to the generic network queueing
 * layer; they may not be explicitly referenced by any other
 * code.
 */

enum netdev_state_t {
        __LINK_STATE_START,
        __LINK_STATE_PRESENT,
        __LINK_STATE_NOCARRIER,
        __LINK_STATE_LINKWATCH_PENDING,
        __LINK_STATE_DORMANT,
        __LINK_STATE_TESTING,
};

struct gro_list {
        struct list_head        list;
        int                        count;
};

/*
 * size of gro hash buckets, must less than bit number of
 * napi_struct::gro_bitmask
 */
#define GRO_HASH_BUCKETS        8

/*
 * Structure for NAPI scheduling similar to tasklet but with weighting
 */
struct napi_struct {
        /* The poll_list must only be managed by the entity which
         * changes the state of the NAPI_STATE_SCHED bit.  This means
         * whoever atomically sets that bit can add this napi_struct
         * to the per-CPU poll_list, and whoever clears that bit
         * can remove from the list right before clearing the bit.
         */
        struct list_head        poll_list;

        unsigned long                state;
        int                        weight;
        int                        defer_hard_irqs_count;
        unsigned long                gro_bitmask;
        int                        (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
        /* CPU actively polling if netpoll is configured */
        int                        poll_owner;
#endif
        /* CPU on which NAPI has been scheduled for processing */
        int                        list_owner;
        struct net_device        *dev;
        struct gro_list                gro_hash[GRO_HASH_BUCKETS];
        struct sk_buff                *skb;
        struct list_head        rx_list; /* Pending GRO_NORMAL skbs */
        int                        rx_count; /* length of rx_list */
        unsigned int                napi_id;
        struct hrtimer                timer;
        struct task_struct        *thread;
        /* control-path-only fields follow */
        struct list_head        dev_list;
        struct hlist_node        napi_hash_node;
        int                        irq;
};

enum {
        NAPI_STATE_SCHED,                /* Poll is scheduled */
        NAPI_STATE_MISSED,                /* reschedule a napi */
        NAPI_STATE_DISABLE,                /* Disable pending */
        NAPI_STATE_NPSVC,                /* Netpoll - don't dequeue from poll_list */
        NAPI_STATE_LISTED,                /* NAPI added to system lists */
        NAPI_STATE_NO_BUSY_POLL,        /* Do not add in napi_hash, no busy polling */
        NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() owns this NAPI */
        NAPI_STATE_PREFER_BUSY_POLL,        /* prefer busy-polling over softirq processing*/
        NAPI_STATE_THREADED,                /* The poll is performed inside its own thread*/
        NAPI_STATE_SCHED_THREADED,        /* Napi is currently scheduled in threaded mode */
};

enum {
        NAPIF_STATE_SCHED                = BIT(NAPI_STATE_SCHED),
        NAPIF_STATE_MISSED                = BIT(NAPI_STATE_MISSED),
        NAPIF_STATE_DISABLE                = BIT(NAPI_STATE_DISABLE),
        NAPIF_STATE_NPSVC                = BIT(NAPI_STATE_NPSVC),
        NAPIF_STATE_LISTED                = BIT(NAPI_STATE_LISTED),
        NAPIF_STATE_NO_BUSY_POLL        = BIT(NAPI_STATE_NO_BUSY_POLL),
        NAPIF_STATE_IN_BUSY_POLL        = BIT(NAPI_STATE_IN_BUSY_POLL),
        NAPIF_STATE_PREFER_BUSY_POLL        = BIT(NAPI_STATE_PREFER_BUSY_POLL),
        NAPIF_STATE_THREADED                = BIT(NAPI_STATE_THREADED),
        NAPIF_STATE_SCHED_THREADED        = BIT(NAPI_STATE_SCHED_THREADED),
};

enum gro_result {
        GRO_MERGED,
        GRO_MERGED_FREE,
        GRO_HELD,
        GRO_NORMAL,
        GRO_CONSUMED,
};
typedef enum gro_result gro_result_t;

/*
 * enum rx_handler_result - Possible return values for rx_handlers.
 * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it
 * further.
 * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
 * case skb->dev was changed by rx_handler.
 * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
 * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called.
 *
 * rx_handlers are functions called from inside __netif_receive_skb(), to do
 * special processing of the skb, prior to delivery to protocol handlers.
 *
 * Currently, a net_device can only have a single rx_handler registered. Trying
 * to register a second rx_handler will return -EBUSY.
 *
 * To register a rx_handler on a net_device, use netdev_rx_handler_register().
 * To unregister a rx_handler on a net_device, use
 * netdev_rx_handler_unregister().
 *
 * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
 * do with the skb.
 *
 * If the rx_handler consumed the skb in some way, it should return
 * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
 * the skb to be delivered in some other way.
 *
 * If the rx_handler changed skb->dev, to divert the skb to another
 * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
 * new device will be called if it exists.
 *
 * If the rx_handler decides the skb should be ignored, it should return
 * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
 * are registered on exact device (ptype->dev == skb->dev).
 *
 * If the rx_handler didn't change skb->dev, but wants the skb to be normally
 * delivered, it should return RX_HANDLER_PASS.
 *
 * A device without a registered rx_handler will behave as if rx_handler
 * returned RX_HANDLER_PASS.
 */

enum rx_handler_result {
        RX_HANDLER_CONSUMED,
        RX_HANDLER_ANOTHER,
        RX_HANDLER_EXACT,
        RX_HANDLER_PASS,
};
typedef enum rx_handler_result rx_handler_result_t;
typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);

void __napi_schedule(struct napi_struct *n);
void __napi_schedule_irqoff(struct napi_struct *n);

static inline bool napi_disable_pending(struct napi_struct *n)
{
        return test_bit(NAPI_STATE_DISABLE, &n->state);
}

static inline bool napi_prefer_busy_poll(struct napi_struct *n)
{
        return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
}

/**
 * napi_is_scheduled - test if NAPI is scheduled
 * @n: NAPI context
 *
 * This check is "best-effort". With no locking implemented,
 * a NAPI can be scheduled or terminate right after this check
 * and produce not precise results.
 *
 * NAPI_STATE_SCHED is an internal state, napi_is_scheduled
 * should not be used normally and napi_schedule should be
 * used instead.
 *
 * Use only if the driver really needs to check if a NAPI
 * is scheduled for example in the context of delayed timer
 * that can be skipped if a NAPI is already scheduled.
 *
 * Return True if NAPI is scheduled, False otherwise.
 */
static inline bool napi_is_scheduled(struct napi_struct *n)
{
        return test_bit(NAPI_STATE_SCHED, &n->state);
}

bool napi_schedule_prep(struct napi_struct *n);

/**
 *        napi_schedule - schedule NAPI poll
 *        @n: NAPI context
 *
 * Schedule NAPI poll routine to be called if it is not already
 * running.
 * Return true if we schedule a NAPI or false if not.
 * Refer to napi_schedule_prep() for additional reason on why
 * a NAPI might not be scheduled.
 */
static inline bool napi_schedule(struct napi_struct *n)
{
        if (napi_schedule_prep(n)) {
                __napi_schedule(n);
                return true;
        }

        return false;
}

/**
 *        napi_schedule_irqoff - schedule NAPI poll
 *        @n: NAPI context
 *
 * Variant of napi_schedule(), assuming hard irqs are masked.
 */
static inline void napi_schedule_irqoff(struct napi_struct *n)
{
        if (napi_schedule_prep(n))
                __napi_schedule_irqoff(n);
}

/**
 * napi_complete_done - NAPI processing complete
 * @n: NAPI context
 * @work_done: number of packets processed
 *
 * Mark NAPI processing as complete. Should only be called if poll budget
 * has not been completely consumed.
 * Prefer over napi_complete().
 * Return false if device should avoid rearming interrupts.
 */
bool napi_complete_done(struct napi_struct *n, int work_done);

static inline bool napi_complete(struct napi_struct *n)
{
        return napi_complete_done(n, 0);
}

int dev_set_threaded(struct net_device *dev, bool threaded);

/**
 *        napi_disable - prevent NAPI from scheduling
 *        @n: NAPI context
 *
 * Stop NAPI from being scheduled on this context.
 * Waits till any outstanding processing completes.
 */
void napi_disable(struct napi_struct *n);

void napi_enable(struct napi_struct *n);

/**
 *        napi_synchronize - wait until NAPI is not running
 *        @n: NAPI context
 *
 * Wait until NAPI is done being scheduled on this context.
 * Waits till any outstanding processing completes but
 * does not disable future activations.
 */
static inline void napi_synchronize(const struct napi_struct *n)
{
        if (IS_ENABLED(CONFIG_SMP))
                while (test_bit(NAPI_STATE_SCHED, &n->state))
                        msleep(1);
        else
                barrier();
}

/**
 *        napi_if_scheduled_mark_missed - if napi is running, set the
 *        NAPIF_STATE_MISSED
 *        @n: NAPI context
 *
 * If napi is running, set the NAPIF_STATE_MISSED, and return true if
 * NAPI is scheduled.
 **/
static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
{
        unsigned long val, new;

        val = READ_ONCE(n->state);
        do {
                if (val & NAPIF_STATE_DISABLE)
                        return true;

                if (!(val & NAPIF_STATE_SCHED))
                        return false;

                new = val | NAPIF_STATE_MISSED;
        } while (!try_cmpxchg(&n->state, &val, new));

        return true;
}

enum netdev_queue_state_t {
        __QUEUE_STATE_DRV_XOFF,
        __QUEUE_STATE_STACK_XOFF,
        __QUEUE_STATE_FROZEN,
};

#define QUEUE_STATE_DRV_XOFF        (1 << __QUEUE_STATE_DRV_XOFF)
#define QUEUE_STATE_STACK_XOFF        (1 << __QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_FROZEN        (1 << __QUEUE_STATE_FROZEN)

#define QUEUE_STATE_ANY_XOFF        (QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF)
#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \
                                        QUEUE_STATE_FROZEN)
#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \
                                        QUEUE_STATE_FROZEN)

/*
 * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue.  The
 * netif_tx_* functions below are used to manipulate this flag.  The
 * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
 * queue independently.  The netif_xmit_*stopped functions below are called
 * to check if the queue has been stopped by the driver or stack (either
 * of the XOFF bits are set in the state).  Drivers should not need to call
 * netif_xmit*stopped functions, they should only be using netif_tx_*.
 */

struct netdev_queue {
/*
 * read-mostly part
 */
        struct net_device        *dev;
        netdevice_tracker        dev_tracker;

        struct Qdisc __rcu        *qdisc;
        struct Qdisc __rcu        *qdisc_sleeping;
#ifdef CONFIG_SYSFS
        struct kobject                kobj;
#endif
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
        int                        numa_node;
#endif
        unsigned long                tx_maxrate;
        /*
         * Number of TX timeouts for this queue
         * (/sys/class/net/DEV/Q/trans_timeout)
         */
        atomic_long_t                trans_timeout;

        /* Subordinate device that the queue has been assigned to */
        struct net_device        *sb_dev;
#ifdef CONFIG_XDP_SOCKETS
        struct xsk_buff_pool    *pool;
#endif
        /* NAPI instance for the queue
         * Readers and writers must hold RTNL
         */
        struct napi_struct      *napi;
/*
 * write-mostly part
 */
        spinlock_t                _xmit_lock ____cacheline_aligned_in_smp;
        int                        xmit_lock_owner;
        /*
         * Time (in jiffies) of last Tx
         */
        unsigned long                trans_start;

        unsigned long                state;

#ifdef CONFIG_BQL
        struct dql                dql;
#endif
} ____cacheline_aligned_in_smp;

extern int sysctl_fb_tunnels_only_for_init_net;
extern int sysctl_devconf_inherit_init_net;

/*
 * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns
 *                                     == 1 : For initns only
 *                                     == 2 : For none.
 */
static inline bool net_has_fallback_tunnels(const struct net *net)
{
#if IS_ENABLED(CONFIG_SYSCTL)
        int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);

        return !fb_tunnels_only_for_init_net ||
                (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
#else
        return true;
#endif
}

static inline int net_inherit_devconf(void)
{
#if IS_ENABLED(CONFIG_SYSCTL)
        return READ_ONCE(sysctl_devconf_inherit_init_net);
#else
        return 0;
#endif
}

static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
        return q->numa_node;
#else
        return NUMA_NO_NODE;
#endif
}

static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
{
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
        q->numa_node = node;
#endif
}

#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
                         u16 filter_id);
#endif

/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
        XPS_CPUS = 0,
        XPS_RXQS,
        XPS_MAPS_MAX,
};

#ifdef CONFIG_XPS
/*
 * This structure holds an XPS map which can be of variable length.  The
 * map is an array of queues.
 */
struct xps_map {
        unsigned int len;
        unsigned int alloc_len;
        struct rcu_head rcu;
        u16 queues[];
};
#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
       - sizeof(struct xps_map)) / sizeof(u16))

/*
 * This structure holds all XPS maps for device.  Maps are indexed by CPU.
 *
 * We keep track of the number of cpus/rxqs used when the struct is allocated,
 * in nr_ids. This will help not accessing out-of-bound memory.
 *
 * We keep track of the number of traffic classes used when the struct is
 * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
 * not crossing its upper bound, as the original dev->num_tc can be updated in
 * the meantime.
 */
struct xps_dev_maps {
        struct rcu_head rcu;
        unsigned int nr_ids;
        s16 num_tc;
        struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
};

#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +        \
        (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))

#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
        (_rxqs * (_tcs) * sizeof(struct xps_map *)))

#endif /* CONFIG_XPS */

#define TC_MAX_QUEUE        16
#define TC_BITMASK        15
/* HW offloaded queuing disciplines txq count and offset maps */
struct netdev_tc_txq {
        u16 count;
        u16 offset;
};

#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
/*
 * This structure is to hold information about the device
 * configured to run FCoE protocol stack.
 */
struct netdev_fcoe_hbainfo {
        char        manufacturer[64];
        char        serial_number[64];
        char        hardware_version[64];
        char        driver_version[64];
        char        optionrom_version[64];
        char        firmware_version[64];
        char        model[256];
        char        model_description[256];
};
#endif

#define MAX_PHYS_ITEM_ID_LEN 32

/* This structure holds a unique identifier to identify some
 * physical item (port for example) used by a netdevice.
 */
struct netdev_phys_item_id {
        unsigned char id[MAX_PHYS_ITEM_ID_LEN];
        unsigned char id_len;
};

static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
                                            struct netdev_phys_item_id *b)
{
        return a->id_len == b->id_len &&
               memcmp(a->id, b->id, a->id_len) == 0;
}

typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
                                       struct sk_buff *skb,
                                       struct net_device *sb_dev);

enum net_device_path_type {
        DEV_PATH_ETHERNET = 0,
        DEV_PATH_VLAN,
        DEV_PATH_BRIDGE,
        DEV_PATH_PPPOE,
        DEV_PATH_DSA,
        DEV_PATH_MTK_WDMA,
};

struct net_device_path {
        enum net_device_path_type        type;
        const struct net_device                *dev;
        union {
                struct {
                        u16                id;
                        __be16                proto;
                        u8                h_dest[ETH_ALEN];
                } encap;
                struct {
                        enum {
                                DEV_PATH_BR_VLAN_KEEP,
                                DEV_PATH_BR_VLAN_TAG,
                                DEV_PATH_BR_VLAN_UNTAG,
                                DEV_PATH_BR_VLAN_UNTAG_HW,
                        }                vlan_mode;
                        u16                vlan_id;
                        __be16                vlan_proto;
                } bridge;
                struct {
                        int port;
                        u16 proto;
                } dsa;
                struct {
                        u8 wdma_idx;
                        u8 queue;
                        u16 wcid;
                        u8 bss;
                        u8 amsdu;
                } mtk_wdma;
        };
};

#define NET_DEVICE_PATH_STACK_MAX        5
#define NET_DEVICE_PATH_VLAN_MAX        2

struct net_device_path_stack {
        int                        num_paths;
        struct net_device_path        path[NET_DEVICE_PATH_STACK_MAX];
};

struct net_device_path_ctx {
        const struct net_device *dev;
        u8                        daddr[ETH_ALEN];

        int                        num_vlans;
        struct {
                u16                id;
                __be16                proto;
        } vlan[NET_DEVICE_PATH_VLAN_MAX];
};

enum tc_setup_type {
        TC_QUERY_CAPS,
        TC_SETUP_QDISC_MQPRIO,
        TC_SETUP_CLSU32,
        TC_SETUP_CLSFLOWER,
        TC_SETUP_CLSMATCHALL,
        TC_SETUP_CLSBPF,
        TC_SETUP_BLOCK,
        TC_SETUP_QDISC_CBS,
        TC_SETUP_QDISC_RED,
        TC_SETUP_QDISC_PRIO,
        TC_SETUP_QDISC_MQ,
        TC_SETUP_QDISC_ETF,
        TC_SETUP_ROOT_QDISC,
        TC_SETUP_QDISC_GRED,
        TC_SETUP_QDISC_TAPRIO,
        TC_SETUP_FT,
        TC_SETUP_QDISC_ETS,
        TC_SETUP_QDISC_TBF,
        TC_SETUP_QDISC_FIFO,
        TC_SETUP_QDISC_HTB,
        TC_SETUP_ACT,
};

/* These structures hold the attributes of bpf state that are being passed
 * to the netdevice through the bpf op.
 */
enum bpf_netdev_command {
        /* Set or clear a bpf program used in the earliest stages of packet
         * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee
         * is responsible for calling bpf_prog_put on any old progs that are
         * stored. In case of error, the callee need not release the new prog
         * reference, but on success it takes ownership and must bpf_prog_put
         * when it is no longer used.
         */
        XDP_SETUP_PROG,
        XDP_SETUP_PROG_HW,
        /* BPF program for offload callbacks, invoked at program load time. */
        BPF_OFFLOAD_MAP_ALLOC,
        BPF_OFFLOAD_MAP_FREE,
        XDP_SETUP_XSK_POOL,
};

struct bpf_prog_offload_ops;
struct netlink_ext_ack;
struct xdp_umem;
struct xdp_dev_bulk_queue;
struct bpf_xdp_link;

enum bpf_xdp_mode {
        XDP_MODE_SKB = 0,
        XDP_MODE_DRV = 1,
        XDP_MODE_HW = 2,
        __MAX_XDP_MODE
};

struct bpf_xdp_entity {
        struct bpf_prog *prog;
        struct bpf_xdp_link *link;
};

struct netdev_bpf {
        enum bpf_netdev_command command;
        union {
                /* XDP_SETUP_PROG */
                struct {
                        u32 flags;
                        struct bpf_prog *prog;
                        struct netlink_ext_ack *extack;
                };
                /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
                struct {
                        struct bpf_offloaded_map *offmap;
                };
                /* XDP_SETUP_XSK_POOL */
                struct {
                        struct xsk_buff_pool *pool;
                        u16 queue_id;
                } xsk;
        };
};

/* Flags for ndo_xsk_wakeup. */
#define XDP_WAKEUP_RX (1 << 0)
#define XDP_WAKEUP_TX (1 << 1)

#ifdef CONFIG_XFRM_OFFLOAD
struct xfrmdev_ops {
        int        (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack);
        void        (*xdo_dev_state_delete) (struct xfrm_state *x);
        void        (*xdo_dev_state_free) (struct xfrm_state *x);
        bool        (*xdo_dev_offload_ok) (struct sk_buff *skb,
                                       struct xfrm_state *x);
        void        (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
        void        (*xdo_dev_state_update_stats) (struct xfrm_state *x);
        int        (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack);
        void        (*xdo_dev_policy_delete) (struct xfrm_policy *x);
        void        (*xdo_dev_policy_free) (struct xfrm_policy *x);
};
#endif

struct dev_ifalias {
        struct rcu_head rcuhead;
        char ifalias[];
};

struct devlink;
struct tlsdev_ops;

struct netdev_net_notifier {
        struct list_head list;
        struct notifier_block *nb;
};

/*
 * This structure defines the management hooks for network devices.
 * The following hooks can be defined; unless noted otherwise, they are
 * optional and can be filled with a null pointer.
 *
 * int (*ndo_init)(struct net_device *dev);
 *     This function is called once when a network device is registered.
 *     The network device can use this for any late stage initialization
 *     or semantic validation. It can fail with an error code which will
 *     be propagated back to register_netdev.
 *
 * void (*ndo_uninit)(struct net_device *dev);
 *     This function is called when device is unregistered or when registration
 *     fails. It is not called if init fails.
 *
 * int (*ndo_open)(struct net_device *dev);
 *     This function is called when a network device transitions to the up
 *     state.
 *
 * int (*ndo_stop)(struct net_device *dev);
 *     This function is called when a network device transitions to the down
 *     state.
 *
 * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
 *                               struct net_device *dev);
 *        Called when a packet needs to be transmitted.
 *        Returns NETDEV_TX_OK.  Can return NETDEV_TX_BUSY, but you should stop
 *        the queue before that can happen; it's for obsolete devices and weird
 *        corner cases, but the stack really does a non-trivial amount
 *        of useless work if you return NETDEV_TX_BUSY.
 *        Required; cannot be NULL.
 *
 * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
 *                                           struct net_device *dev
 *                                           netdev_features_t features);
 *        Called by core transmit path to determine if device is capable of
 *        performing offload operations on a given packet. This is to give
 *        the device an opportunity to implement any restrictions that cannot
 *        be otherwise expressed by feature flags. The check is called with
 *        the set of features that the stack has calculated and it returns
 *        those the driver believes to be appropriate.
 *
 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
 *                         struct net_device *sb_dev);
 *        Called to decide which queue to use when device supports multiple
 *        transmit queues.
 *
 * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
 *        This function is called to allow device receiver to make
 *        changes to configuration when multicast or promiscuous is enabled.
 *
 * void (*ndo_set_rx_mode)(struct net_device *dev);
 *        This function is called device changes address list filtering.
 *        If driver handles unicast address filtering, it should set
 *        IFF_UNICAST_FLT in its priv_flags.
 *
 * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
 *        This function  is called when the Media Access Control address
 *        needs to be changed. If this interface is not defined, the
 *        MAC address can not be changed.
 *
 * int (*ndo_validate_addr)(struct net_device *dev);
 *        Test if Media Access Control address is valid for the device.
 *
 * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
 *        Old-style ioctl entry point. This is used internally by the
 *        appletalk and ieee802154 subsystems but is no longer called by
 *        the device ioctl handler.
 *
 * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd);
 *        Used by the bonding driver for its device specific ioctls:
 *        SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE,
 *        SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY
 *
 * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
 *        Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG,
 *        SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP.
 *
 * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
 *        Used to set network devices bus interface parameters. This interface
 *        is retained for legacy reasons; new devices should use the bus
 *        interface (PCI) for low level management.
 *
 * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
 *        Called when a user wants to change the Maximum Transfer Unit
 *        of a device.
 *
 * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue);
 *        Callback used when the transmitter has not made any progress
 *        for dev->watchdog ticks.
 *
 * void (*ndo_get_stats64)(struct net_device *dev,
 *                         struct rtnl_link_stats64 *storage);
 * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 *        Called when a user wants to get the network device usage
 *        statistics. Drivers must do one of the following:
 *        1. Define @ndo_get_stats64 to fill in a zero-initialised
 *           rtnl_link_stats64 structure passed by the caller.
 *        2. Define @ndo_get_stats to update a net_device_stats structure
 *           (which should normally be dev->stats) and return a pointer to
 *           it. The structure may be changed asynchronously only if each
 *           field is written atomically.
 *        3. Update dev->stats asynchronously and atomically, and define
 *           neither operation.
 *
 * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
 *        Return true if this device supports offload stats of this attr_id.
 *
 * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
 *        void *attr_data)
 *        Get statistics for offload operations by attr_id. Write it into the
 *        attr_data pointer.
 *
 * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
 *        If device supports VLAN filtering this function is called when a
 *        VLAN id is registered.
 *
 * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
 *        If device supports VLAN filtering this function is called when a
 *        VLAN id is unregistered.
 *
 * void (*ndo_poll_controller)(struct net_device *dev);
 *
 *        SR-IOV management functions.
 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
 *                          u8 qos, __be16 proto);
 * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
 *                          int max_tx_rate);
 * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
 * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
 * int (*ndo_get_vf_config)(struct net_device *dev,
 *                            int vf, struct ifla_vf_info *ivf);
 * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
 * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
 *                          struct nlattr *port[]);
 *
 *      Enable or disable the VF ability to query its RSS Redirection Table and
 *      Hash Key. This is needed since on some devices VF share this information
 *      with PF and querying it may introduce a theoretical security risk.
 * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
 * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
 * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type,
 *                       void *type_data);
 *        Called to setup any 'tc' scheduler, classifier or action on @dev.
 *        This is always called from the stack with the rtnl lock held and netif
 *        tx queues stopped. This allows the netdevice to perform queue
 *        management safely.
 *
 *        Fiber Channel over Ethernet (FCoE) offload functions.
 * int (*ndo_fcoe_enable)(struct net_device *dev);
 *        Called when the FCoE protocol stack wants to start using LLD for FCoE
 *        so the underlying device can perform whatever needed configuration or
 *        initialization to support acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_disable)(struct net_device *dev);
 *        Called when the FCoE protocol stack wants to stop using LLD for FCoE
 *        so the underlying device can perform whatever needed clean-ups to
 *        stop supporting acceleration of FCoE traffic.
 *
 * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid,
 *                             struct scatterlist *sgl, unsigned int sgc);
 *        Called when the FCoE Initiator wants to initialize an I/O that
 *        is a possible candidate for Direct Data Placement (DDP). The LLD can
 *        perform necessary setup and returns 1 to indicate the device is set up
 *        successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_ddp_done)(struct net_device *dev,  u16 xid);
 *        Called when the FCoE Initiator/Target is done with the DDPed I/O as
 *        indicated by the FC exchange id 'xid', so the underlying device can
 *        clean up and reuse resources for later DDP requests.
 *
 * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid,
 *                              struct scatterlist *sgl, unsigned int sgc);
 *        Called when the FCoE Target wants to initialize an I/O that
 *        is a possible candidate for Direct Data Placement (DDP). The LLD can
 *        perform necessary setup and returns 1 to indicate the device is set up
 *        successfully to perform DDP on this I/O, otherwise this returns 0.
 *
 * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
 *                               struct netdev_fcoe_hbainfo *hbainfo);
 *        Called when the FCoE Protocol stack wants information on the underlying
 *        device. This information is utilized by the FCoE protocol stack to
 *        register attributes with Fiber Channel management service as per the
 *        FC-GS Fabric Device Management Information(FDMI) specification.
 *
 * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
 *        Called when the underlying device wants to override default World Wide
 *        Name (WWN) generation mechanism in FCoE protocol stack to pass its own
 *        World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE
 *        protocol stack to use.
 *
 *        RFS acceleration.
 * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
 *                            u16 rxq_index, u32 flow_id);
 *        Set hardware filter for RFS.  rxq_index is the target queue index;
 *        flow_id is a flow ID to be passed to rps_may_expire_flow() later.
 *        Return the filter ID on success, or a negative error code.
 *
 *        Slave management functions (for bridge, bonding, etc).
 * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
 *        Called to make another netdev an underling.
 *
 * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
 *        Called to release previously enslaved netdev.
 *
 * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev,
 *                                            struct sk_buff *skb,
 *                                            bool all_slaves);
 *        Get the xmit slave of master device. If all_slaves is true, function
 *        assume all the slaves can transmit.
 *
 *      Feature/offload setting functions.
 * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
 *                netdev_features_t features);
 *        Adjusts the requested feature flags according to device-specific
 *        constraints, and returns the resulting flags. Must not modify
 *        the device state.
 *
 * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
 *        Called to update device configuration to new features. Passed
 *        feature set might be less than what was returned by ndo_fix_features()).
 *        Must return >0 or -errno if it changed dev->features itself.
 *
 * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
 *                      struct net_device *dev,
 *                      const unsigned char *addr, u16 vid, u16 flags,
 *                      struct netlink_ext_ack *extack);
 *        Adds an FDB entry to dev for addr.
 * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
 *                      struct net_device *dev,
 *                      const unsigned char *addr, u16 vid)
 *        Deletes the FDB entry from dev coresponding to addr.
 * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev,
 *                           struct netlink_ext_ack *extack);
 * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
 *                       struct net_device *dev, struct net_device *filter_dev,
 *                       int *idx)
 *        Used to add FDB entries to dump requests. Implementers should add
 *        entries to skb and update idx with the number of entries.
 *
 * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[],
 *                      u16 nlmsg_flags, struct netlink_ext_ack *extack);
 *        Adds an MDB entry to dev.
 * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
 *                      struct netlink_ext_ack *extack);
 *        Deletes the MDB entry from dev.
 * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[],
 *                           struct netlink_ext_ack *extack);
 *        Bulk deletes MDB entries from dev.
 * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
 *                       struct netlink_callback *cb);
 *        Dumps MDB entries from dev. The first argument (marker) in the netlink
 *        callback is used by core rtnetlink code.
 *
 * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
 *                             u16 flags, struct netlink_ext_ack *extack)
 * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
 *                             struct net_device *dev, u32 filter_mask,
 *                             int nlflags)
 * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
 *                             u16 flags);
 *
 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
 *        Called to change device carrier. Soft-devices (like dummy, team, etc)
 *        which do not represent real hardware may define this to allow their
 *        userspace components to manage their virtual carrier state. Devices
 *        that determine carrier state from physical hardware properties (eg
 *        network cables) or protocol-dependent mechanisms (eg
 *        USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
 *
 * int (*ndo_get_phys_port_id)(struct net_device *dev,
 *                               struct netdev_phys_item_id *ppid);
 *        Called to get ID of physical port of this device. If driver does
 *        not implement this, it is assumed that the hw is not able to have
 *        multiple net devices on single physical port.
 *
 * int (*ndo_get_port_parent_id)(struct net_device *dev,
 *                                 struct netdev_phys_item_id *ppid)
 *        Called to get the parent ID of the physical port of this device.
 *
 * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
 *                                 struct net_device *dev)
 *        Called by upper layer devices to accelerate switching or other
 *        station functionality into hardware. 'pdev is the lowerdev
 *        to use for the offload and 'dev' is the net device that will
 *        back the offload. Returns a pointer to the private structure
 *        the upper layer will maintain.
 * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
 *        Called by upper layer device to delete the station created
 *        by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
 *        the station and priv is the structure returned by the add
 *        operation.
 * int (*ndo_set_tx_maxrate)(struct net_device *dev,
 *                             int queue_index, u32 maxrate);
 *        Called when a user wants to set a max-rate limitation of specific
 *        TX queue.
 * int (*ndo_get_iflink)(const struct net_device *dev);
 *        Called to get the iflink value of this device.
 * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
 *        This function is used to get egress tunnel information for given skb.
 *        This is useful for retrieving outer tunnel header parameters while
 *        sampling packet.
 * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
 *        This function is used to specify the headroom that the skb must
 *        consider when allocation skb during packet reception. Setting
 *        appropriate rx headroom value allows avoiding skb head copy on
 *        forward. Setting a negative value resets the rx headroom to the
 *        default value.
 * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf);
 *        This function is used to set or query state related to XDP on the
 *        netdevice and manage BPF offload. See definition of
 *        enum bpf_netdev_command for details.
 * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
 *                        u32 flags);
 *        This function is used to submit @n XDP packets for transmit on a
 *        netdevice. Returns number of frames successfully transmitted, frames
 *        that got dropped are freed/returned via xdp_return_frame().
 *        Returns negative number, means general error invoking ndo, meaning
 *        no frames were xmit'ed and core-caller will free all frames.
 * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
 *                                                struct xdp_buff *xdp);
 *      Get the xmit slave of master device based on the xdp_buff.
 * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
 *      This function is used to wake up the softirq, ksoftirqd or kthread
 *        responsible for sending and/or receiving packets on a specific
 *        queue id bound to an AF_XDP socket. The flags field specifies if
 *        only RX, only Tx, or both should be woken up using the flags
 *        XDP_WAKEUP_RX and XDP_WAKEUP_TX.
 * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
 *                         int cmd);
 *        Add, change, delete or get information on an IPv4 tunnel.
 * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
 *        If a device is paired with a peer device, return the peer instance.
 *        The caller must be under RCU read context.
 * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
 *     Get the forwarding path to reach the real device from the HW destination address
 * ktime_t (*ndo_get_tstamp)(struct net_device *dev,
 *                             const struct skb_shared_hwtstamps *hwtstamps,
 *                             bool cycles);
 *        Get hardware timestamp based on normal/adjustable time or free running
 *        cycle counter. This function is required if physical clock supports a
 *        free running cycle counter.
 *
 * int (*ndo_hwtstamp_get)(struct net_device *dev,
 *                           struct kernel_hwtstamp_config *kernel_config);
 *        Get the currently configured hardware timestamping parameters for the
 *        NIC device.
 *
 * int (*ndo_hwtstamp_set)(struct net_device *dev,
 *                           struct kernel_hwtstamp_config *kernel_config,
 *                           struct netlink_ext_ack *extack);
 *        Change the hardware timestamping parameters for NIC device.
 */
struct net_device_ops {
        int                        (*ndo_init)(struct net_device *dev);
        void                        (*ndo_uninit)(struct net_device *dev);
        int                        (*ndo_open)(struct net_device *dev);
        int                        (*ndo_stop)(struct net_device *dev);
        netdev_tx_t                (*ndo_start_xmit)(struct sk_buff *skb,
                                                  struct net_device *dev);
        netdev_features_t        (*ndo_features_check)(struct sk_buff *skb,
                                                      struct net_device *dev,
                                                      netdev_features_t features);
        u16                        (*ndo_select_queue)(struct net_device *dev,
                                                    struct sk_buff *skb,
                                                    struct net_device *sb_dev);
        void                        (*ndo_change_rx_flags)(struct net_device *dev,
                                                       int flags);
        void                        (*ndo_set_rx_mode)(struct net_device *dev);
        int                        (*ndo_set_mac_address)(struct net_device *dev,
                                                       void *addr);
        int                        (*ndo_validate_addr)(struct net_device *dev);
        int                        (*ndo_do_ioctl)(struct net_device *dev,
                                                struct ifreq *ifr, int cmd);
        int                        (*ndo_eth_ioctl)(struct net_device *dev,
                                                 struct ifreq *ifr, int cmd);
        int                        (*ndo_siocbond)(struct net_device *dev,
                                                struct ifreq *ifr, int cmd);
        int                        (*ndo_siocwandev)(struct net_device *dev,
                                                  struct if_settings *ifs);
        int                        (*ndo_siocdevprivate)(struct net_device *dev,
                                                      struct ifreq *ifr,
                                                      void __user *data, int cmd);
        int                        (*ndo_set_config)(struct net_device *dev,
                                                  struct ifmap *map);
        int                        (*ndo_change_mtu)(struct net_device *dev,
                                                  int new_mtu);
        int                        (*ndo_neigh_setup)(struct net_device *dev,
                                                   struct neigh_parms *);
        void                        (*ndo_tx_timeout) (struct net_device *dev,
                                                   unsigned int txqueue);

        void                        (*ndo_get_stats64)(struct net_device *dev,
                                                   struct rtnl_link_stats64 *storage);
        bool                        (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
        int                        (*ndo_get_offload_stats)(int attr_id,
                                                         const struct net_device *dev,
                                                         void *attr_data);
        struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);

        int                        (*ndo_vlan_rx_add_vid)(struct net_device *dev,
                                                       __be16 proto, u16 vid);
        int                        (*ndo_vlan_rx_kill_vid)(struct net_device *dev,
                                                        __be16 proto, u16 vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
        void                    (*ndo_poll_controller)(struct net_device *dev);
        int                        (*ndo_netpoll_setup)(struct net_device *dev,
                                                     struct netpoll_info *info);
        void                        (*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
        int                        (*ndo_set_vf_mac)(struct net_device *dev,
                                                  int queue, u8 *mac);
        int                        (*ndo_set_vf_vlan)(struct net_device *dev,
                                                   int queue, u16 vlan,
                                                   u8 qos, __be16 proto);
        int                        (*ndo_set_vf_rate)(struct net_device *dev,
                                                   int vf, int min_tx_rate,
                                                   int max_tx_rate);
        int                        (*ndo_set_vf_spoofchk)(struct net_device *dev,
                                                       int vf, bool setting);
        int                        (*ndo_set_vf_trust)(struct net_device *dev,
                                                    int vf, bool setting);
        int                        (*ndo_get_vf_config)(struct net_device *dev,
                                                     int vf,
                                                     struct ifla_vf_info *ivf);
        int                        (*ndo_set_vf_link_state)(struct net_device *dev,
                                                         int vf, int link_state);
        int                        (*ndo_get_vf_stats)(struct net_device *dev,
                                                    int vf,
                                                    struct ifla_vf_stats
                                                    *vf_stats);
        int                        (*ndo_set_vf_port)(struct net_device *dev,
                                                   int vf,
                                                   struct nlattr *port[]);
        int                        (*ndo_get_vf_port)(struct net_device *dev,
                                                   int vf, struct sk_buff *skb);
        int                        (*ndo_get_vf_guid)(struct net_device *dev,
                                                   int vf,
                                                   struct ifla_vf_guid *node_guid,
                                                   struct ifla_vf_guid *port_guid);
        int                        (*ndo_set_vf_guid)(struct net_device *dev,
                                                   int vf, u64 guid,
                                                   int guid_type);
        int                        (*ndo_set_vf_rss_query_en)(
                                                   struct net_device *dev,
                                                   int vf, bool setting);
        int                        (*ndo_setup_tc)(struct net_device *dev,
                                                enum tc_setup_type type,
                                                void *type_data);
#if IS_ENABLED(CONFIG_FCOE)
        int                        (*ndo_fcoe_enable)(struct net_device *dev);
        int                        (*ndo_fcoe_disable)(struct net_device *dev);
        int                        (*ndo_fcoe_ddp_setup)(struct net_device *dev,
                                                      u16 xid,
                                                      struct scatterlist *sgl,
                                                      unsigned int sgc);
        int                        (*ndo_fcoe_ddp_done)(struct net_device *dev,
                                                     u16 xid);
        int                        (*ndo_fcoe_ddp_target)(struct net_device *dev,
                                                       u16 xid,
                                                       struct scatterlist *sgl,
                                                       unsigned int sgc);
        int                        (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
                                                        struct netdev_fcoe_hbainfo *hbainfo);
#endif

#if IS_ENABLED(CONFIG_LIBFCOE)
#define NETDEV_FCOE_WWNN 0
#define NETDEV_FCOE_WWPN 1
        int                        (*ndo_fcoe_get_wwn)(struct net_device *dev,
                                                    u64 *wwn, int type);
#endif

#ifdef CONFIG_RFS_ACCEL
        int                        (*ndo_rx_flow_steer)(struct net_device *dev,
                                                     const struct sk_buff *skb,
                                                     u16 rxq_index,
                                                     u32 flow_id);
#endif
        int                        (*ndo_add_slave)(struct net_device *dev,
                                                 struct net_device *slave_dev,
                                                 struct netlink_ext_ack *extack);
        int                        (*ndo_del_slave)(struct net_device *dev,
                                                 struct net_device *slave_dev);
        struct net_device*        (*ndo_get_xmit_slave)(struct net_device *dev,
                                                      struct sk_buff *skb,
                                                      bool all_slaves);
        struct net_device*        (*ndo_sk_get_lower_dev)(struct net_device *dev,
                                                        struct sock *sk);
        netdev_features_t        (*ndo_fix_features)(struct net_device *dev,
                                                    netdev_features_t features);
        int                        (*ndo_set_features)(struct net_device *dev,
                                                    netdev_features_t features);
        int                        (*ndo_neigh_construct)(struct net_device *dev,
                                                       struct neighbour *n);
        void                        (*ndo_neigh_destroy)(struct net_device *dev,
                                                     struct neighbour *n);

        int                        (*ndo_fdb_add)(struct ndmsg *ndm,
                                               struct nlattr *tb[],
                                               struct net_device *dev,
                                               const unsigned char *addr,
                                               u16 vid,
                                               u16 flags,
                                               struct netlink_ext_ack *extack);
        int                        (*ndo_fdb_del)(struct ndmsg *ndm,
                                               struct nlattr *tb[],
                                               struct net_device *dev,
                                               const unsigned char *addr,
                                               u16 vid, struct netlink_ext_ack *extack);
        int                        (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh,
                                                    struct net_device *dev,
                                                    struct netlink_ext_ack *extack);
        int                        (*ndo_fdb_dump)(struct sk_buff *skb,
                                                struct netlink_callback *cb,
                                                struct net_device *dev,
                                                struct net_device *filter_dev,
                                                int *idx);
        int                        (*ndo_fdb_get)(struct sk_buff *skb,
                                               struct nlattr *tb[],
                                               struct net_device *dev,
                                               const unsigned char *addr,
                                               u16 vid, u32 portid, u32 seq,
                                               struct netlink_ext_ack *extack);
        int                        (*ndo_mdb_add)(struct net_device *dev,
                                               struct nlattr *tb[],
                                               u16 nlmsg_flags,
                                               struct netlink_ext_ack *extack);
        int                        (*ndo_mdb_del)(struct net_device *dev,
                                               struct nlattr *tb[],
                                               struct netlink_ext_ack *extack);
        int                        (*ndo_mdb_del_bulk)(struct net_device *dev,
                                                    struct nlattr *tb[],
                                                    struct netlink_ext_ack *extack);
        int                        (*ndo_mdb_dump)(struct net_device *dev,
                                                struct sk_buff *skb,
                                                struct netlink_callback *cb);
        int                        (*ndo_mdb_get)(struct net_device *dev,
                                               struct nlattr *tb[], u32 portid,
                                               u32 seq,
                                               struct netlink_ext_ack *extack);
        int                        (*ndo_bridge_setlink)(struct net_device *dev,
                                                      struct nlmsghdr *nlh,
                                                      u16 flags,
                                                      struct netlink_ext_ack *extack);
        int                        (*ndo_bridge_getlink)(struct sk_buff *skb,
                                                      u32 pid, u32 seq,
                                                      struct net_device *dev,
                                                      u32 filter_mask,
                                                      int nlflags);
        int                        (*ndo_bridge_dellink)(struct net_device *dev,
                                                      struct nlmsghdr *nlh,
                                                      u16 flags);
        int                        (*ndo_change_carrier)(struct net_device *dev,
                                                      bool new_carrier);
        int                        (*ndo_get_phys_port_id)(struct net_device *dev,
                                                        struct netdev_phys_item_id *ppid);
        int                        (*ndo_get_port_parent_id)(struct net_device *dev,
                                                          struct netdev_phys_item_id *ppid);
        int                        (*ndo_get_phys_port_name)(struct net_device *dev,
                                                          char *name, size_t len);
        void*                        (*ndo_dfwd_add_station)(struct net_device *pdev,
                                                        struct net_device *dev);
        void                        (*ndo_dfwd_del_station)(struct net_device *pdev,
                                                        void *priv);

        int                        (*ndo_set_tx_maxrate)(struct net_device *dev,
                                                      int queue_index,
                                                      u32 maxrate);
        int                        (*ndo_get_iflink)(const struct net_device *dev);
        int                        (*ndo_fill_metadata_dst)(struct net_device *dev,
                                                       struct sk_buff *skb);
        void                        (*ndo_set_rx_headroom)(struct net_device *dev,
                                                       int needed_headroom);
        int                        (*ndo_bpf)(struct net_device *dev,
                                           struct netdev_bpf *bpf);
        int                        (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                struct xdp_frame **xdp,
                                                u32 flags);
        struct net_device *        (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
                                                          struct xdp_buff *xdp);
        int                        (*ndo_xsk_wakeup)(struct net_device *dev,
                                                  u32 queue_id, u32 flags);
        int                        (*ndo_tunnel_ctl)(struct net_device *dev,
                                                  struct ip_tunnel_parm *p, int cmd);
        struct net_device *        (*ndo_get_peer_dev)(struct net_device *dev);
        int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
                                                         struct net_device_path *path);
        ktime_t                        (*ndo_get_tstamp)(struct net_device *dev,
                                                  const struct skb_shared_hwtstamps *hwtstamps,
                                                  bool cycles);
        int                        (*ndo_hwtstamp_get)(struct net_device *dev,
                                                    struct kernel_hwtstamp_config *kernel_config);
        int                        (*ndo_hwtstamp_set)(struct net_device *dev,
                                                    struct kernel_hwtstamp_config *kernel_config,
                                                    struct netlink_ext_ack *extack);
};

/**
 * enum netdev_priv_flags - &struct net_device priv_flags
 *
 * These are the &struct net_device, they are only set internally
 * by drivers and used in the kernel. These flags are invisible to
 * userspace; this means that the order of these flags can change
 * during any kernel release.
 *
 * You should have a pretty good reason to be extending these flags.
 *
 * @IFF_802_1Q_VLAN: 802.1Q VLAN device
 * @IFF_EBRIDGE: Ethernet bridging device
 * @IFF_BONDING: bonding master or slave
 * @IFF_ISATAP: ISATAP interface (RFC4214)
 * @IFF_WAN_HDLC: WAN HDLC device
 * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to
 *        release skb->dst
 * @IFF_DONT_BRIDGE: disallow bridging this ether dev
 * @IFF_DISABLE_NETPOLL: disable netpoll at run-time
 * @IFF_MACVLAN_PORT: device used as macvlan port
 * @IFF_BRIDGE_PORT: device used as bridge port
 * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port
 * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit
 * @IFF_UNICAST_FLT: Supports unicast filtering
 * @IFF_TEAM_PORT: device used as team port
 * @IFF_SUPP_NOFCS: device supports sending custom FCS
 * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
 *        change when it's running
 * @IFF_MACVLAN: Macvlan device
 * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
 *        underlying stacked devices
 * @IFF_L3MDEV_MASTER: device is an L3 master device
 * @IFF_NO_QUEUE: device can run without qdisc attached
 * @IFF_OPENVSWITCH: device is a Open vSwitch master
 * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
 * @IFF_TEAM: device is a team device
 * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
 * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
 *        entity (i.e. the master device for bridged veth)
 * @IFF_MACSEC: device is a MACsec device
 * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
 * @IFF_FAILOVER: device is a failover master device
 * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
 * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
 * @IFF_NO_ADDRCONF: prevent ipv6 addrconf
 * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
 *        skb_headlen(skb) == 0 (data starts from frag0)
 * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
 * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to
 *        ndo_hwtstamp_set() for all timestamp requests regardless of source,
 *        even if those aren't HWTSTAMP_SOURCE_NETDEV.
 */
enum netdev_priv_flags {
        IFF_802_1Q_VLAN                        = 1<<0,
        IFF_EBRIDGE                        = 1<<1,
        IFF_BONDING                        = 1<<2,
        IFF_ISATAP                        = 1<<3,
        IFF_WAN_HDLC                        = 1<<4,
        IFF_XMIT_DST_RELEASE                = 1<<5,
        IFF_DONT_BRIDGE                        = 1<<6,
        IFF_DISABLE_NETPOLL                = 1<<7,
        IFF_MACVLAN_PORT                = 1<<8,
        IFF_BRIDGE_PORT                        = 1<<9,
        IFF_OVS_DATAPATH                = 1<<10,
        IFF_TX_SKB_SHARING                = 1<<11,
        IFF_UNICAST_FLT                        = 1<<12,
        IFF_TEAM_PORT                        = 1<<13,
        IFF_SUPP_NOFCS                        = 1<<14,
        IFF_LIVE_ADDR_CHANGE                = 1<<15,
        IFF_MACVLAN                        = 1<<16,
        IFF_XMIT_DST_RELEASE_PERM        = 1<<17,
        IFF_L3MDEV_MASTER                = 1<<18,
        IFF_NO_QUEUE                        = 1<<19,
        IFF_OPENVSWITCH                        = 1<<20,
        IFF_L3MDEV_SLAVE                = 1<<21,
        IFF_TEAM                        = 1<<22,
        IFF_RXFH_CONFIGURED                = 1<<23,
        IFF_PHONY_HEADROOM                = 1<<24,
        IFF_MACSEC                        = 1<<25,
        IFF_NO_RX_HANDLER                = 1<<26,
        IFF_FAILOVER                        = 1<<27,
        IFF_FAILOVER_SLAVE                = 1<<28,
        IFF_L3MDEV_RX_HANDLER                = 1<<29,
        IFF_NO_ADDRCONF                        = BIT_ULL(30),
        IFF_TX_SKB_NO_LINEAR                = BIT_ULL(31),
        IFF_CHANGE_PROTO_DOWN                = BIT_ULL(32),
        IFF_SEE_ALL_HWTSTAMP_REQUESTS        = BIT_ULL(33),
};

#define IFF_802_1Q_VLAN                        IFF_802_1Q_VLAN
#define IFF_EBRIDGE                        IFF_EBRIDGE
#define IFF_BONDING                        IFF_BONDING
#define IFF_ISATAP                        IFF_ISATAP
#define IFF_WAN_HDLC                        IFF_WAN_HDLC
#define IFF_XMIT_DST_RELEASE                IFF_XMIT_DST_RELEASE
#define IFF_DONT_BRIDGE                        IFF_DONT_BRIDGE
#define IFF_DISABLE_NETPOLL                IFF_DISABLE_NETPOLL
#define IFF_MACVLAN_PORT                IFF_MACVLAN_PORT
#define IFF_BRIDGE_PORT                        IFF_BRIDGE_PORT
#define IFF_OVS_DATAPATH                IFF_OVS_DATAPATH
#define IFF_TX_SKB_SHARING                IFF_TX_SKB_SHARING
#define IFF_UNICAST_FLT                        IFF_UNICAST_FLT
#define IFF_TEAM_PORT                        IFF_TEAM_PORT
#define IFF_SUPP_NOFCS                        IFF_SUPP_NOFCS
#define IFF_LIVE_ADDR_CHANGE                IFF_LIVE_ADDR_CHANGE
#define IFF_MACVLAN                        IFF_MACVLAN
#define IFF_XMIT_DST_RELEASE_PERM        IFF_XMIT_DST_RELEASE_PERM
#define IFF_L3MDEV_MASTER                IFF_L3MDEV_MASTER
#define IFF_NO_QUEUE                        IFF_NO_QUEUE
#define IFF_OPENVSWITCH                        IFF_OPENVSWITCH
#define IFF_L3MDEV_SLAVE                IFF_L3MDEV_SLAVE
#define IFF_TEAM                        IFF_TEAM
#define IFF_RXFH_CONFIGURED                IFF_RXFH_CONFIGURED
#define IFF_PHONY_HEADROOM                IFF_PHONY_HEADROOM
#define IFF_MACSEC                        IFF_MACSEC
#define IFF_NO_RX_HANDLER                IFF_NO_RX_HANDLER
#define IFF_FAILOVER                        IFF_FAILOVER
#define IFF_FAILOVER_SLAVE                IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER                IFF_L3MDEV_RX_HANDLER
#define IFF_TX_SKB_NO_LINEAR                IFF_TX_SKB_NO_LINEAR

/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {
        ML_PRIV_NONE,
        ML_PRIV_CAN,
};

enum netdev_stat_type {
        NETDEV_PCPU_STAT_NONE,
        NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
        NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
        NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
};

enum netdev_reg_state {
        NETREG_UNINITIALIZED = 0,
        NETREG_REGISTERED,        /* completed register_netdevice */
        NETREG_UNREGISTERING,        /* called unregister_netdevice */
        NETREG_UNREGISTERED,        /* completed unregister todo */
        NETREG_RELEASED,        /* called free_netdev */
        NETREG_DUMMY,                /* dummy device for NAPI poll */
};

/**
 *        struct net_device - The DEVICE structure.
 *
 *        Actually, this whole structure is a big mistake.  It mixes I/O
 *        data with strictly "high-level" data, and it has to know about
 *        almost every data structure used in the INET module.
 *
 *        @name:        This is the first field of the "visible" part of this structure
 *                (i.e. as seen by users in the "Space.c" file).  It is the name
 *                of the interface.
 *
 *        @name_node:        Name hashlist node
 *        @ifalias:        SNMP alias
 *        @mem_end:        Shared memory end
 *        @mem_start:        Shared memory start
 *        @base_addr:        Device I/O address
 *        @irq:                Device IRQ number
 *
 *        @state:                Generic network queuing layer state, see netdev_state_t
 *        @dev_list:        The global list of network devices
 *        @napi_list:        List entry used for polling NAPI devices
 *        @unreg_list:        List entry  when we are unregistering the
 *                        device; see the function unregister_netdev
 *        @close_list:        List entry used when we are closing the device
 *        @ptype_all:     Device-specific packet handlers for all protocols
 *        @ptype_specific: Device-specific, protocol-specific packet handlers
 *
 *        @adj_list:        Directly linked devices, like slaves for bonding
 *        @features:        Currently active device features
 *        @hw_features:        User-changeable features
 *
 *        @wanted_features:        User-requested features
 *        @vlan_features:                Mask of features inheritable by VLAN devices
 *
 *        @hw_enc_features:        Mask of features inherited by encapsulating devices
 *                                This field indicates what encapsulation
 *                                offloads the hardware is capable of doing,
 *                                and drivers will need to set them appropriately.
 *
 *        @mpls_features:        Mask of features inheritable by MPLS
 *        @gso_partial_features: value(s) from NETIF_F_GSO\*
 *
 *        @ifindex:        interface index
 *        @group:                The group the device belongs to
 *
 *        @stats:                Statistics struct, which was left as a legacy, use
 *                        rtnl_link_stats64 instead
 *
 *        @core_stats:        core networking counters,
 *                        do not use this in drivers
 *        @carrier_up_count:        Number of times the carrier has been up
 *        @carrier_down_count:        Number of times the carrier has been down
 *
 *        @wireless_handlers:        List of functions to handle Wireless Extensions,
 *                                instead of ioctl,
 *                                see <net/iw_handler.h> for details.
 *        @wireless_data:        Instance data managed by the core of wireless extensions
 *
 *        @netdev_ops:        Includes several pointers to callbacks,
 *                        if one wants to override the ndo_*() functions
 *        @xdp_metadata_ops:        Includes pointers to XDP metadata callbacks.
 *        @xsk_tx_metadata_ops:        Includes pointers to AF_XDP TX metadata callbacks.
 *        @ethtool_ops:        Management operations
 *        @l3mdev_ops:        Layer 3 master device operations
 *        @ndisc_ops:        Includes callbacks for different IPv6 neighbour
 *                        discovery handling. Necessary for e.g. 6LoWPAN.
 *        @xfrmdev_ops:        Transformation offload operations
 *        @tlsdev_ops:        Transport Layer Security offload operations
 *        @header_ops:        Includes callbacks for creating,parsing,caching,etc
 *                        of Layer 2 headers.
 *
 *        @flags:                Interface flags (a la BSD)
 *        @xdp_features:        XDP capability supported by the device
 *        @priv_flags:        Like 'flags' but invisible to userspace,
 *                        see if.h for the definitions
 *        @gflags:        Global flags ( kept as legacy )
 *        @padded:        How much padding added by alloc_netdev()
 *        @operstate:        RFC2863 operstate
 *        @link_mode:        Mapping policy to operstate
 *        @if_port:        Selectable AUI, TP, ...
 *        @dma:                DMA channel
 *        @mtu:                Interface MTU value
 *        @min_mtu:        Interface Minimum MTU value
 *        @max_mtu:        Interface Maximum MTU value
 *        @type:                Interface hardware type
 *        @hard_header_len: Maximum hardware header length.
 *        @min_header_len:  Minimum hardware header length
 *
 *        @needed_headroom: Extra headroom the hardware may need, but not in all
 *                          cases can this be guaranteed
 *        @needed_tailroom: Extra tailroom the hardware may need, but not in all
 *                          cases can this be guaranteed. Some cases also use
 *                          LL_MAX_HEADER instead to allocate the skb
 *
 *        interface address info:
 *
 *         @perm_addr:                Permanent hw address
 *         @addr_assign_type:        Hw address assignment type
 *         @addr_len:                Hardware address length
 *        @upper_level:                Maximum depth level of upper devices.
 *        @lower_level:                Maximum depth level of lower devices.
 *        @neigh_priv_len:        Used in neigh_alloc()
 *         @dev_id:                Used to differentiate devices that share
 *                                 the same link layer address
 *         @dev_port:                Used to differentiate devices that share
 *                                 the same function
 *        @addr_list_lock:        XXX: need comments on this one
 *        @name_assign_type:        network interface name assignment type
 *        @uc_promisc:                Counter that indicates promiscuous mode
 *                                has been enabled due to the need to listen to
 *                                additional unicast addresses in a device that
 *                                does not implement ndo_set_rx_mode()
 *        @uc:                        unicast mac addresses
 *        @mc:                        multicast mac addresses
 *        @dev_addrs:                list of device hw addresses
 *        @queues_kset:                Group of all Kobjects in the Tx and RX queues
 *        @promiscuity:                Number of times the NIC is told to work in
 *                                promiscuous mode; if it becomes 0 the NIC will
 *                                exit promiscuous mode
 *        @allmulti:                Counter, enables or disables allmulticast mode
 *
 *        @vlan_info:        VLAN info
 *        @dsa_ptr:        dsa specific data
 *        @tipc_ptr:        TIPC specific data
 *        @atalk_ptr:        AppleTalk link
 *        @ip_ptr:        IPv4 specific data
 *        @ip6_ptr:        IPv6 specific data
 *        @ax25_ptr:        AX.25 specific data
 *        @ieee80211_ptr:        IEEE 802.11 specific data, assign before registering
 *        @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
 *                         device struct
 *        @mpls_ptr:        mpls_dev struct pointer
 *        @mctp_ptr:        MCTP specific data
 *
 *        @dev_addr:        Hw address (before bcast,
 *                        because most packets are unicast)
 *
 *        @_rx:                        Array of RX queues
 *        @num_rx_queues:                Number of RX queues
 *                                allocated at register_netdev() time
 *        @real_num_rx_queues:         Number of RX queues currently active in device
 *        @xdp_prog:                XDP sockets filter program pointer
 *        @gro_flush_timeout:        timeout for GRO layer in NAPI
 *        @napi_defer_hard_irqs:        If not zero, provides a counter that would
 *                                allow to avoid NIC hard IRQ, on busy queues.
 *
 *        @rx_handler:                handler for received packets
 *        @rx_handler_data:         XXX: need comments on this one
 *        @tcx_ingress:                BPF & clsact qdisc specific data for ingress processing
 *        @ingress_queue:                XXX: need comments on this one
 *        @nf_hooks_ingress:        netfilter hooks executed for ingress packets
 *        @broadcast:                hw bcast address
 *
 *        @rx_cpu_rmap:        CPU reverse-mapping for RX completion interrupts,
 *                        indexed by RX queue number. Assigned by driver.
 *                        This must only be set if the ndo_rx_flow_steer
 *                        operation is defined
 *        @index_hlist:                Device index hash chain
 *
 *        @_tx:                        Array of TX queues
 *        @num_tx_queues:                Number of TX queues allocated at alloc_netdev_mq() time
 *        @real_num_tx_queues:         Number of TX queues currently active in device
 *        @qdisc:                        Root qdisc from userspace point of view
 *        @tx_queue_len:                Max frames per queue allowed
 *        @tx_global_lock:         XXX: need comments on this one
 *        @xdp_bulkq:                XDP device bulk queue
 *        @xps_maps:                all CPUs/RXQs maps for XPS device
 *
 *        @xps_maps:        XXX: need comments on this one
 *        @tcx_egress:                BPF & clsact qdisc specific data for egress processing
 *        @nf_hooks_egress:        netfilter hooks executed for egress packets
 *        @qdisc_hash:                qdisc hash table
 *        @watchdog_timeo:        Represents the timeout that is used by
 *                                the watchdog (see dev_watchdog())
 *        @watchdog_timer:        List of timers
 *
 *        @proto_down_reason:        reason a netdev interface is held down
 *        @pcpu_refcnt:                Number of references to this device
 *        @dev_refcnt:                Number of references to this device
 *        @refcnt_tracker:        Tracker directory for tracked references to this device
 *        @todo_list:                Delayed register/unregister
 *        @link_watch_list:        XXX: need comments on this one
 *
 *        @reg_state:                Register/unregister state machine
 *        @dismantle:                Device is going to be freed
 *        @rtnl_link_state:        This enum represents the phases of creating
 *                                a new link
 *
 *        @needs_free_netdev:        Should unregister perform free_netdev?
 *        @priv_destructor:        Called from unregister
 *        @npinfo:                XXX: need comments on this one
 *         @nd_net:                Network namespace this network device is inside
 *
 *         @ml_priv:        Mid-layer private
 *        @ml_priv_type:  Mid-layer private type
 *
 *        @pcpu_stat_type:        Type of device statistics which the core should
 *                                allocate/free: none, lstats, tstats, dstats. none
 *                                means the driver is handling statistics allocation/
 *                                freeing internally.
 *        @lstats:                Loopback statistics: packets, bytes
 *        @tstats:                Tunnel statistics: RX/TX packets, RX/TX bytes
 *        @dstats:                Dummy statistics: RX/TX/drop packets, RX/TX bytes
 *
 *        @garp_port:        GARP
 *        @mrp_port:        MRP
 *
 *        @dm_private:        Drop monitor private
 *
 *        @dev:                Class/net/name entry
 *        @sysfs_groups:        Space for optional device, statistics and wireless
 *                        sysfs groups
 *
 *        @sysfs_rx_queue_group:        Space for optional per-rx queue attributes
 *        @rtnl_link_ops:        Rtnl_link_ops
 *        @stat_ops:        Optional ops for queue-aware statistics
 *
 *        @gso_max_size:        Maximum size of generic segmentation offload
 *        @tso_max_size:        Device (as in HW) limit on the max TSO request size
 *        @gso_max_segs:        Maximum number of segments that can be passed to the
 *                        NIC for GSO
 *        @tso_max_segs:        Device (as in HW) limit on the max TSO segment count
 *         @gso_ipv4_max_size:        Maximum size of generic segmentation offload,
 *                                 for IPv4.
 *
 *        @dcbnl_ops:        Data Center Bridging netlink ops
 *        @num_tc:        Number of traffic classes in the net device
 *        @tc_to_txq:        XXX: need comments on this one
 *        @prio_tc_map:        XXX: need comments on this one
 *
 *        @fcoe_ddp_xid:        Max exchange id for FCoE LRO by ddp
 *
 *        @priomap:        XXX: need comments on this one
 *        @phydev:        Physical device may attach itself
 *                        for hardware timestamping
 *        @sfp_bus:        attached &struct sfp_bus structure.
 *
 *        @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
 *
 *        @proto_down:        protocol port state information can be sent to the
 *                        switch driver and used to set the phys state of the
 *                        switch port.
 *
 *        @wol_enabled:        Wake-on-LAN is enabled
 *
 *        @threaded:        napi threaded mode is enabled
 *
 *        @net_notifier_list:        List of per-net netdev notifier block
 *                                that follow this device when it is moved
 *                                to another network namespace.
 *
 *        @macsec_ops:    MACsec offloading ops
 *
 *        @udp_tunnel_nic_info:        static structure describing the UDP tunnel
 *                                offload capabilities of the device
 *        @udp_tunnel_nic:        UDP tunnel offload state
 *        @xdp_state:                stores info on attached XDP BPF programs
 *
 *        @nested_level:        Used as a parameter of spin_lock_nested() of
 *                        dev->addr_list_lock.
 *        @unlink_list:        As netif_addr_lock() can be called recursively,
 *                        keep a list of interfaces to be deleted.
 *        @gro_max_size:        Maximum size of aggregated packet in generic
 *                        receive offload (GRO)
 *         @gro_ipv4_max_size:        Maximum size of aggregated packet in generic
 *                                 receive offload (GRO), for IPv4.
 *        @xdp_zc_max_segs:        Maximum number of segments supported by AF_XDP
 *                                zero copy driver
 *
 *        @dev_addr_shadow:        Copy of @dev_addr to catch direct writes.
 *        @linkwatch_dev_tracker:        refcount tracker used by linkwatch.
 *        @watchdog_dev_tracker:        refcount tracker used by watchdog.
 *        @dev_registered_tracker:        tracker for reference held while
 *                                        registered
 *        @offload_xstats_l3:        L3 HW stats for this netdevice.
 *
 *        @devlink_port:        Pointer to related devlink port structure.
 *                        Assigned by a driver before netdev registration using
 *                        SET_NETDEV_DEVLINK_PORT macro. This pointer is static
 *                        during the time netdevice is registered.
 *
 *        @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
 *                   where the clock is recovered.
 *
 *        FIXME: cleanup struct net_device such that network protocol info
 *        moves out.
 */

struct net_device {
        /* Cacheline organization can be found documented in
         * Documentation/networking/net_cachelines/net_device.rst.
         * Please update the document when adding new fields.
         */

        /* TX read-mostly hotpath */
        __cacheline_group_begin(net_device_read_tx);
        unsigned long long        priv_flags;
        const struct net_device_ops *netdev_ops;
        const struct header_ops *header_ops;
        struct netdev_queue        *_tx;
        netdev_features_t        gso_partial_features;
        unsigned int                real_num_tx_queues;
        unsigned int                gso_max_size;
        unsigned int                gso_ipv4_max_size;
        u16                        gso_max_segs;
        s16                        num_tc;
        /* Note : dev->mtu is often read without holding a lock.
         * Writers usually hold RTNL.
         * It is recommended to use READ_ONCE() to annotate the reads,
         * and to use WRITE_ONCE() to annotate the writes.
         */
        unsigned int                mtu;
        unsigned short                needed_headroom;
        struct netdev_tc_txq        tc_to_txq[TC_MAX_QUEUE];
#ifdef CONFIG_XPS
        struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
#endif
#ifdef CONFIG_NETFILTER_EGRESS
        struct nf_hook_entries __rcu *nf_hooks_egress;
#endif
#ifdef CONFIG_NET_XGRESS
        struct bpf_mprog_entry __rcu *tcx_egress;
#endif
        __cacheline_group_end(net_device_read_tx);

        /* TXRX read-mostly hotpath */
        __cacheline_group_begin(net_device_read_txrx);
        union {
                struct pcpu_lstats __percpu                *lstats;
                struct pcpu_sw_netstats __percpu        *tstats;
                struct pcpu_dstats __percpu                *dstats;
        };
        unsigned long                state;
        unsigned int                flags;
        unsigned short                hard_header_len;
        netdev_features_t        features;
        struct inet6_dev __rcu        *ip6_ptr;
        __cacheline_group_end(net_device_read_txrx);

        /* RX read-mostly hotpath */
        __cacheline_group_begin(net_device_read_rx);
        struct bpf_prog __rcu        *xdp_prog;
        struct list_head        ptype_specific;
        int                        ifindex;
        unsigned int                real_num_rx_queues;
        struct netdev_rx_queue        *_rx;
        unsigned long                gro_flush_timeout;
        int                        napi_defer_hard_irqs;
        unsigned int                gro_max_size;
        unsigned int                gro_ipv4_max_size;
        rx_handler_func_t __rcu        *rx_handler;
        void __rcu                *rx_handler_data;
        possible_net_t                        nd_net;
#ifdef CONFIG_NETPOLL
        struct netpoll_info __rcu        *npinfo;
#endif
#ifdef CONFIG_NET_XGRESS
        struct bpf_mprog_entry __rcu *tcx_ingress;
#endif
        __cacheline_group_end(net_device_read_rx);

        char                        name[IFNAMSIZ];
        struct netdev_name_node        *name_node;
        struct dev_ifalias        __rcu *ifalias;
        /*
         *        I/O specific fields
         *        FIXME: Merge these and struct ifmap into one
         */
        unsigned long                mem_end;
        unsigned long                mem_start;
        unsigned long                base_addr;

        /*
         *        Some hardware also needs these fields (state,dev_list,
         *        napi_list,unreg_list,close_list) but they are not
         *        part of the usual set specified in Space.c.
         */


        struct list_head        dev_list;
        struct list_head        napi_list;
        struct list_head        unreg_list;
        struct list_head        close_list;
        struct list_head        ptype_all;

        struct {
                struct list_head upper;
                struct list_head lower;
        } adj_list;

        /* Read-mostly cache-line for fast-path access */
        xdp_features_t                xdp_features;
        const struct xdp_metadata_ops *xdp_metadata_ops;
        const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
        unsigned short                gflags;

        unsigned short                needed_tailroom;

        netdev_features_t        hw_features;
        netdev_features_t        wanted_features;
        netdev_features_t        vlan_features;
        netdev_features_t        hw_enc_features;
        netdev_features_t        mpls_features;

        unsigned int                min_mtu;
        unsigned int                max_mtu;
        unsigned short                type;
        unsigned char                min_header_len;
        unsigned char                name_assign_type;

        int                        group;

        struct net_device_stats        stats; /* not used by modern drivers */

        struct net_device_core_stats __percpu *core_stats;

        /* Stats to monitor link on/off, flapping */
        atomic_t                carrier_up_count;
        atomic_t                carrier_down_count;

#ifdef CONFIG_WIRELESS_EXT
        const struct iw_handler_def *wireless_handlers;
        struct iw_public_data        *wireless_data;
#endif
        const struct ethtool_ops *ethtool_ops;
#ifdef CONFIG_NET_L3_MASTER_DEV
        const struct l3mdev_ops        *l3mdev_ops;
#endif
#if IS_ENABLED(CONFIG_IPV6)
        const struct ndisc_ops *ndisc_ops;
#endif

#ifdef CONFIG_XFRM_OFFLOAD
        const struct xfrmdev_ops *xfrmdev_ops;
#endif

#if IS_ENABLED(CONFIG_TLS_DEVICE)
        const struct tlsdev_ops *tlsdev_ops;
#endif

        unsigned int                operstate;
        unsigned char                link_mode;

        unsigned char                if_port;
        unsigned char                dma;

        /* Interface address info. */
        unsigned char                perm_addr[MAX_ADDR_LEN];
        unsigned char                addr_assign_type;
        unsigned char                addr_len;
        unsigned char                upper_level;
        unsigned char                lower_level;

        unsigned short                neigh_priv_len;
        unsigned short          dev_id;
        unsigned short          dev_port;
        unsigned short                padded;

        spinlock_t                addr_list_lock;
        int                        irq;

        struct netdev_hw_addr_list        uc;
        struct netdev_hw_addr_list        mc;
        struct netdev_hw_addr_list        dev_addrs;

#ifdef CONFIG_SYSFS
        struct kset                *queues_kset;
#endif
#ifdef CONFIG_LOCKDEP
        struct list_head        unlink_list;
#endif
        unsigned int                promiscuity;
        unsigned int                allmulti;
        bool                        uc_promisc;
#ifdef CONFIG_LOCKDEP
        unsigned char                nested_level;
#endif


        /* Protocol-specific pointers */
        struct in_device __rcu        *ip_ptr;
#if IS_ENABLED(CONFIG_VLAN_8021Q)
        struct vlan_info __rcu        *vlan_info;
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
        struct dsa_port                *dsa_ptr;
#endif
#if IS_ENABLED(CONFIG_TIPC)
        struct tipc_bearer __rcu *tipc_ptr;
#endif
#if IS_ENABLED(CONFIG_ATALK)
        void                         *atalk_ptr;
#endif
#if IS_ENABLED(CONFIG_AX25)
        void                        *ax25_ptr;
#endif
#if IS_ENABLED(CONFIG_CFG80211)
        struct wireless_dev        *ieee80211_ptr;
#endif
#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
        struct wpan_dev                *ieee802154_ptr;
#endif
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
        struct mpls_dev __rcu        *mpls_ptr;
#endif
#if IS_ENABLED(CONFIG_MCTP)
        struct mctp_dev __rcu        *mctp_ptr;
#endif

/*
 * Cache lines mostly used on receive path (including eth_type_trans())
 */
        /* Interface address info used in eth_type_trans() */
        const unsigned char        *dev_addr;

        unsigned int                num_rx_queues;
#define GRO_LEGACY_MAX_SIZE        65536u
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
 * and shinfo->gso_segs is a 16bit field.
 */
#define GRO_MAX_SIZE                (8 * 65535u)
        unsigned int                xdp_zc_max_segs;
        struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
        struct nf_hook_entries __rcu *nf_hooks_ingress;
#endif

        unsigned char                broadcast[MAX_ADDR_LEN];
#ifdef CONFIG_RFS_ACCEL
        struct cpu_rmap                *rx_cpu_rmap;
#endif
        struct hlist_node        index_hlist;

/*
 * Cache lines mostly used on transmit path
 */
        unsigned int                num_tx_queues;
        struct Qdisc __rcu        *qdisc;
        unsigned int                tx_queue_len;
        spinlock_t                tx_global_lock;

        struct xdp_dev_bulk_queue __percpu *xdp_bulkq;

#ifdef CONFIG_NET_SCHED
        DECLARE_HASHTABLE        (qdisc_hash, 4);
#endif
        /* These may be needed for future network-power-down code. */
        struct timer_list        watchdog_timer;
        int                        watchdog_timeo;

        u32                     proto_down_reason;

        struct list_head        todo_list;

#ifdef CONFIG_PCPU_DEV_REFCNT
        int __percpu                *pcpu_refcnt;
#else
        refcount_t                dev_refcnt;
#endif
        struct ref_tracker_dir        refcnt_tracker;

        struct list_head        link_watch_list;

        u8 reg_state;

        bool dismantle;

        enum {
                RTNL_LINK_INITIALIZED,
                RTNL_LINK_INITIALIZING,
        } rtnl_link_state:16;

        bool needs_free_netdev;
        void (*priv_destructor)(struct net_device *dev);

        /* mid-layer private */
        void                                *ml_priv;
        enum netdev_ml_priv_type        ml_priv_type;

        enum netdev_stat_type                pcpu_stat_type:8;

#if IS_ENABLED(CONFIG_GARP)
        struct garp_port __rcu        *garp_port;
#endif
#if IS_ENABLED(CONFIG_MRP)
        struct mrp_port __rcu        *mrp_port;
#endif
#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
        struct dm_hw_stat_delta __rcu *dm_private;
#endif
        struct device                dev;
        const struct attribute_group *sysfs_groups[4];
        const struct attribute_group *sysfs_rx_queue_group;

        const struct rtnl_link_ops *rtnl_link_ops;

        const struct netdev_stat_ops *stat_ops;

        /* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SEGS                65535u
#define GSO_LEGACY_MAX_SIZE        65536u
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
 * and shinfo->gso_segs is a 16bit field.
 */
#define GSO_MAX_SIZE                (8 * GSO_MAX_SEGS)

#define TSO_LEGACY_MAX_SIZE        65536
#define TSO_MAX_SIZE                UINT_MAX
        unsigned int                tso_max_size;
#define TSO_MAX_SEGS                U16_MAX
        u16                        tso_max_segs;

#ifdef CONFIG_DCB
        const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
        u8                        prio_tc_map[TC_BITMASK + 1];

#if IS_ENABLED(CONFIG_FCOE)
        unsigned int                fcoe_ddp_xid;
#endif
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
        struct netprio_map __rcu *priomap;
#endif
        struct phy_device        *phydev;
        struct sfp_bus                *sfp_bus;
        struct lock_class_key        *qdisc_tx_busylock;
        bool                        proto_down;
        unsigned                wol_enabled:1;
        unsigned                threaded:1;

        struct list_head        net_notifier_list;

#if IS_ENABLED(CONFIG_MACSEC)
        /* MACsec management functions */
        const struct macsec_ops *macsec_ops;
#endif
        const struct udp_tunnel_nic_info        *udp_tunnel_nic_info;
        struct udp_tunnel_nic        *udp_tunnel_nic;

        /* protected by rtnl_lock */
        struct bpf_xdp_entity        xdp_state[__MAX_XDP_MODE];

        u8 dev_addr_shadow[MAX_ADDR_LEN];
        netdevice_tracker        linkwatch_dev_tracker;
        netdevice_tracker        watchdog_dev_tracker;
        netdevice_tracker        dev_registered_tracker;
        struct rtnl_hw_stats64        *offload_xstats_l3;

        struct devlink_port        *devlink_port;

#if IS_ENABLED(CONFIG_DPLL)
        struct dpll_pin        __rcu        *dpll_pin;
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
        /** @page_pools: page pools created for this netdevice */
        struct hlist_head        page_pools;
#endif
};
#define to_net_dev(d) container_of(d, struct net_device, dev)

/*
 * Driver should use this to assign devlink port instance to a netdevice
 * before it registers the netdevice. Therefore devlink_port is static
 * during the netdev lifetime after it is registered.
 */
#define SET_NETDEV_DEVLINK_PORT(dev, port)                        \
({                                                                \
        WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED);        \
        ((dev)->devlink_port = (port));                                \
})

static inline bool netif_elide_gro(const struct net_device *dev)
{
        if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
                return true;
        return false;
}

#define        NETDEV_ALIGN                32

static inline
int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
{
        return dev->prio_tc_map[prio & TC_BITMASK];
}

static inline
int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
{
        if (tc >= dev->num_tc)
                return -EINVAL;

        dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
        return 0;
}

int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
void netdev_reset_tc(struct net_device *dev);
int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
int netdev_set_num_tc(struct net_device *dev, u8 num_tc);

static inline
int netdev_get_num_tc(struct net_device *dev)
{
        return dev->num_tc;
}

static inline void net_prefetch(void *p)
{
        prefetch(p);
#if L1_CACHE_BYTES < 128
        prefetch((u8 *)p + L1_CACHE_BYTES);
#endif
}

static inline void net_prefetchw(void *p)
{
        prefetchw(p);
#if L1_CACHE_BYTES < 128
        prefetchw((u8 *)p + L1_CACHE_BYTES);
#endif
}

void netdev_unbind_sb_channel(struct net_device *dev,
                              struct net_device *sb_dev);
int netdev_bind_sb_channel_queue(struct net_device *dev,
                                 struct net_device *sb_dev,
                                 u8 tc, u16 count, u16 offset);
int netdev_set_sb_channel(struct net_device *dev, u16 channel);
static inline int netdev_get_sb_channel(struct net_device *dev)
{
        return max_t(int, -dev->num_tc, 0);
}

static inline
struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
                                         unsigned int index)
{
        DEBUG_NET_WARN_ON_ONCE(index >= dev->num_tx_queues);
        return &dev->_tx[index];
}

static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev,
                                                    const struct sk_buff *skb)
{
        return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
}

static inline void netdev_for_each_tx_queue(struct net_device *dev,
                                            void (*f)(struct net_device *,
                                                      struct netdev_queue *,
                                                      void *),
                                            void *arg)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++)
                f(dev, &dev->_tx[i], arg);
}

#define netdev_lockdep_set_classes(dev)                                \
{                                                                \
        static struct lock_class_key qdisc_tx_busylock_key;        \
        static struct lock_class_key qdisc_xmit_lock_key;        \
        static struct lock_class_key dev_addr_list_lock_key;        \
        unsigned int i;                                                \
                                                                \
        (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;        \
        lockdep_set_class(&(dev)->addr_list_lock,                \
                          &dev_addr_list_lock_key);                \
        for (i = 0; i < (dev)->num_tx_queues; i++)                \
                lockdep_set_class(&(dev)->_tx[i]._xmit_lock,        \
                                  &qdisc_xmit_lock_key);        \
}

u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev);
struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
                                         struct sk_buff *skb,
                                         struct net_device *sb_dev);

/* returns the headroom that the master device needs to take in account
 * when forwarding to this dev
 */
static inline unsigned netdev_get_fwd_headroom(struct net_device *dev)
{
        return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom;
}

static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr)
{
        if (dev->netdev_ops->ndo_set_rx_headroom)
                dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr);
}

/* set the device rx headroom to the dev's default */
static inline void netdev_reset_rx_headroom(struct net_device *dev)
{
        netdev_set_rx_headroom(dev, -1);
}

static inline void *netdev_get_ml_priv(struct net_device *dev,
                                       enum netdev_ml_priv_type type)
{
        if (dev->ml_priv_type != type)
                return NULL;

        return dev->ml_priv;
}

static inline void netdev_set_ml_priv(struct net_device *dev,
                                      void *ml_priv,
                                      enum netdev_ml_priv_type type)
{
        WARN(dev->ml_priv_type && dev->ml_priv_type != type,
             "Overwriting already set ml_priv_type (%u) with different ml_priv_type (%u)!\n",
             dev->ml_priv_type, type);
        WARN(!dev->ml_priv_type && dev->ml_priv,
             "Overwriting already set ml_priv and ml_priv_type is ML_PRIV_NONE!\n");

        dev->ml_priv = ml_priv;
        dev->ml_priv_type = type;
}

/*
 * Net namespace inlines
 */
static inline
struct net *dev_net(const struct net_device *dev)
{
        return read_pnet(&dev->nd_net);
}

static inline
void dev_net_set(struct net_device *dev, struct net *net)
{
        write_pnet(&dev->nd_net, net);
}

/**
 *        netdev_priv - access network device private data
 *        @dev: network device
 *
 * Get network device private data
 */
static inline void *netdev_priv(const struct net_device *dev)
{
        return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
}

/* Set the sysfs physical device reference for the network logical device
 * if set prior to registration will cause a symlink during initialization.
 */
#define SET_NETDEV_DEV(net, pdev)        ((net)->dev.parent = (pdev))

/* Set the sysfs device type for the network logical device to allow
 * fine-grained identification of different network device types. For
 * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc.
 */
#define SET_NETDEV_DEVTYPE(net, devtype)        ((net)->dev.type = (devtype))

void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
                          enum netdev_queue_type type,
                          struct napi_struct *napi);

static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
{
        napi->irq = irq;
}

/* Default NAPI poll() weight
 * Device drivers are strongly advised to not use bigger value
 */
#define NAPI_POLL_WEIGHT 64

void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
                           int (*poll)(struct napi_struct *, int), int weight);

/**
 * netif_napi_add() - initialize a NAPI context
 * @dev:  network device
 * @napi: NAPI context
 * @poll: polling function
 *
 * netif_napi_add() must be used to initialize a NAPI context prior to calling
 * *any* of the other NAPI-related functions.
 */
static inline void
netif_napi_add(struct net_device *dev, struct napi_struct *napi,
               int (*poll)(struct napi_struct *, int))
{
        netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}

static inline void
netif_napi_add_tx_weight(struct net_device *dev,
                         struct napi_struct *napi,
                         int (*poll)(struct napi_struct *, int),
                         int weight)
{
        set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state);
        netif_napi_add_weight(dev, napi, poll, weight);
}

/**
 * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
 * @dev:  network device
 * @napi: NAPI context
 * @poll: polling function
 *
 * This variant of netif_napi_add() should be used from drivers using NAPI
 * to exclusively poll a TX queue.
 * This will avoid we add it into napi_hash[], thus polluting this hash table.
 */
static inline void netif_napi_add_tx(struct net_device *dev,
                                     struct napi_struct *napi,
                                     int (*poll)(struct napi_struct *, int))
{
        netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
}

/**
 *  __netif_napi_del - remove a NAPI context
 *  @napi: NAPI context
 *
 * Warning: caller must observe RCU grace period before freeing memory
 * containing @napi. Drivers might want to call this helper to combine
 * all the needed RCU grace periods into a single one.
 */
void __netif_napi_del(struct napi_struct *napi);

/**
 *  netif_napi_del - remove a NAPI context
 *  @napi: NAPI context
 *
 *  netif_napi_del() removes a NAPI context from the network device NAPI list
 */
static inline void netif_napi_del(struct napi_struct *napi)
{
        __netif_napi_del(napi);
        synchronize_net();
}

struct packet_type {
        __be16                        type;        /* This is really htons(ether_type). */
        bool                        ignore_outgoing;
        struct net_device        *dev;        /* NULL is wildcarded here             */
        netdevice_tracker        dev_tracker;
        int                        (*func) (struct sk_buff *,
                                         struct net_device *,
                                         struct packet_type *,
                                         struct net_device *);
        void                        (*list_func) (struct list_head *,
                                              struct packet_type *,
                                              struct net_device *);
        bool                        (*id_match)(struct packet_type *ptype,
                                            struct sock *sk);
        struct net                *af_packet_net;
        void                        *af_packet_priv;
        struct list_head        list;
};

struct offload_callbacks {
        struct sk_buff                *(*gso_segment)(struct sk_buff *skb,
                                                netdev_features_t features);
        struct sk_buff                *(*gro_receive)(struct list_head *head,
                                                struct sk_buff *skb);
        int                        (*gro_complete)(struct sk_buff *skb, int nhoff);
};

struct packet_offload {
        __be16                         type;        /* This is really htons(ether_type). */
        u16                         priority;
        struct offload_callbacks callbacks;
        struct list_head         list;
};

/* often modified stats are per-CPU, other are shared (netdev->stats) */
struct pcpu_sw_netstats {
        u64_stats_t                rx_packets;
        u64_stats_t                rx_bytes;
        u64_stats_t                tx_packets;
        u64_stats_t                tx_bytes;
        struct u64_stats_sync   syncp;
} __aligned(4 * sizeof(u64));

struct pcpu_dstats {
        u64                        rx_packets;
        u64                        rx_bytes;
        u64                        rx_drops;
        u64                        tx_packets;
        u64                        tx_bytes;
        u64                        tx_drops;
        struct u64_stats_sync        syncp;
} __aligned(8 * sizeof(u64));

struct pcpu_lstats {
        u64_stats_t packets;
        u64_stats_t bytes;
        struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));

void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);

static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
{
        struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);

        u64_stats_update_begin(&tstats->syncp);
        u64_stats_add(&tstats->rx_bytes, len);
        u64_stats_inc(&tstats->rx_packets);
        u64_stats_update_end(&tstats->syncp);
}

static inline void dev_sw_netstats_tx_add(struct net_device *dev,
                                          unsigned int packets,
                                          unsigned int len)
{
        struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);

        u64_stats_update_begin(&tstats->syncp);
        u64_stats_add(&tstats->tx_bytes, len);
        u64_stats_add(&tstats->tx_packets, packets);
        u64_stats_update_end(&tstats->syncp);
}

static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
{
        struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats);

        u64_stats_update_begin(&lstats->syncp);
        u64_stats_add(&lstats->bytes, len);
        u64_stats_inc(&lstats->packets);
        u64_stats_update_end(&lstats->syncp);
}

#define __netdev_alloc_pcpu_stats(type, gfp)                                \
({                                                                        \
        typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
        if (pcpu_stats)        {                                                \
                int __cpu;                                                \
                for_each_possible_cpu(__cpu) {                                \
                        typeof(type) *stat;                                \
                        stat = per_cpu_ptr(pcpu_stats, __cpu);                \
                        u64_stats_init(&stat->syncp);                        \
                }                                                        \
        }                                                                \
        pcpu_stats;                                                        \
})

#define netdev_alloc_pcpu_stats(type)                                        \
        __netdev_alloc_pcpu_stats(type, GFP_KERNEL)

#define devm_netdev_alloc_pcpu_stats(dev, type)                                \
({                                                                        \
        typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\
        if (pcpu_stats) {                                                \
                int __cpu;                                                \
                for_each_possible_cpu(__cpu) {                                \
                        typeof(type) *stat;                                \
                        stat = per_cpu_ptr(pcpu_stats, __cpu);                \
                        u64_stats_init(&stat->syncp);                        \
                }                                                        \
        }                                                                \
        pcpu_stats;                                                        \
})

enum netdev_lag_tx_type {
        NETDEV_LAG_TX_TYPE_UNKNOWN,
        NETDEV_LAG_TX_TYPE_RANDOM,
        NETDEV_LAG_TX_TYPE_BROADCAST,
        NETDEV_LAG_TX_TYPE_ROUNDROBIN,
        NETDEV_LAG_TX_TYPE_ACTIVEBACKUP,
        NETDEV_LAG_TX_TYPE_HASH,
};

enum netdev_lag_hash {
        NETDEV_LAG_HASH_NONE,
        NETDEV_LAG_HASH_L2,
        NETDEV_LAG_HASH_L34,
        NETDEV_LAG_HASH_L23,
        NETDEV_LAG_HASH_E23,
        NETDEV_LAG_HASH_E34,
        NETDEV_LAG_HASH_VLAN_SRCMAC,
        NETDEV_LAG_HASH_UNKNOWN,
};

struct netdev_lag_upper_info {
        enum netdev_lag_tx_type tx_type;
        enum netdev_lag_hash hash_type;
};

struct netdev_lag_lower_state_info {
        u8 link_up : 1,
           tx_enabled : 1;
};

#include <linux/notifier.h>

/* netdevice notifier chain. Please remember to update netdev_cmd_to_name()
 * and the rtnetlink notification exclusion list in rtnetlink_event() when
 * adding new types.
 */
enum netdev_cmd {
        NETDEV_UP        = 1,        /* For now you can't veto a device up/down */
        NETDEV_DOWN,
        NETDEV_REBOOT,                /* Tell a protocol stack a network interface
                                   detected a hardware crash and restarted
                                   - we can use this eg to kick tcp sessions
                                   once done */
        NETDEV_CHANGE,                /* Notify device state change */
        NETDEV_REGISTER,
        NETDEV_UNREGISTER,
        NETDEV_CHANGEMTU,        /* notify after mtu change happened */
        NETDEV_CHANGEADDR,        /* notify after the address change */
        NETDEV_PRE_CHANGEADDR,        /* notify before the address change */
        NETDEV_GOING_DOWN,
        NETDEV_CHANGENAME,
        NETDEV_FEAT_CHANGE,
        NETDEV_BONDING_FAILOVER,
        NETDEV_PRE_UP,
        NETDEV_PRE_TYPE_CHANGE,
        NETDEV_POST_TYPE_CHANGE,
        NETDEV_POST_INIT,
        NETDEV_PRE_UNINIT,
        NETDEV_RELEASE,
        NETDEV_NOTIFY_PEERS,
        NETDEV_JOIN,
        NETDEV_CHANGEUPPER,
        NETDEV_RESEND_IGMP,
        NETDEV_PRECHANGEMTU,        /* notify before mtu change happened */
        NETDEV_CHANGEINFODATA,
        NETDEV_BONDING_INFO,
        NETDEV_PRECHANGEUPPER,
        NETDEV_CHANGELOWERSTATE,
        NETDEV_UDP_TUNNEL_PUSH_INFO,
        NETDEV_UDP_TUNNEL_DROP_INFO,
        NETDEV_CHANGE_TX_QUEUE_LEN,
        NETDEV_CVLAN_FILTER_PUSH_INFO,
        NETDEV_CVLAN_FILTER_DROP_INFO,
        NETDEV_SVLAN_FILTER_PUSH_INFO,
        NETDEV_SVLAN_FILTER_DROP_INFO,
        NETDEV_OFFLOAD_XSTATS_ENABLE,
        NETDEV_OFFLOAD_XSTATS_DISABLE,
        NETDEV_OFFLOAD_XSTATS_REPORT_USED,
        NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
        NETDEV_XDP_FEAT_CHANGE,
};
const char *netdev_cmd_to_name(enum netdev_cmd cmd);

int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
int unregister_netdevice_notifier_net(struct net *net,
                                      struct notifier_block *nb);
int register_netdevice_notifier_dev_net(struct net_device *dev,
                                        struct notifier_block *nb,
                                        struct netdev_net_notifier *nn);
int unregister_netdevice_notifier_dev_net(struct net_device *dev,
                                          struct notifier_block *nb,
                                          struct netdev_net_notifier *nn);

struct netdev_notifier_info {
        struct net_device        *dev;
        struct netlink_ext_ack        *extack;
};

struct netdev_notifier_info_ext {
        struct netdev_notifier_info info; /* must be first */
        union {
                u32 mtu;
        } ext;
};

struct netdev_notifier_change_info {
        struct netdev_notifier_info info; /* must be first */
        unsigned int flags_changed;
};

struct netdev_notifier_changeupper_info {
        struct netdev_notifier_info info; /* must be first */
        struct net_device *upper_dev; /* new upper dev */
        bool master; /* is upper dev master */
        bool linking; /* is the notification for link or unlink */
        void *upper_info; /* upper dev info */
};

struct netdev_notifier_changelowerstate_info {
        struct netdev_notifier_info info; /* must be first */
        void *lower_state_info; /* is lower dev state */
};

struct netdev_notifier_pre_changeaddr_info {
        struct netdev_notifier_info info; /* must be first */
        const unsigned char *dev_addr;
};

enum netdev_offload_xstats_type {
        NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1,
};

struct netdev_notifier_offload_xstats_info {
        struct netdev_notifier_info info; /* must be first */
        enum netdev_offload_xstats_type type;

        union {
                /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */
                struct netdev_notifier_offload_xstats_rd *report_delta;
                /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */
                struct netdev_notifier_offload_xstats_ru *report_used;
        };
};

int netdev_offload_xstats_enable(struct net_device *dev,
                                 enum netdev_offload_xstats_type type,
                                 struct netlink_ext_ack *extack);
int netdev_offload_xstats_disable(struct net_device *dev,
                                  enum netdev_offload_xstats_type type);
bool netdev_offload_xstats_enabled(const struct net_device *dev,
                                   enum netdev_offload_xstats_type type);
int netdev_offload_xstats_get(struct net_device *dev,
                              enum netdev_offload_xstats_type type,
                              struct rtnl_hw_stats64 *stats, bool *used,
                              struct netlink_ext_ack *extack);
void
netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd,
                                   const struct rtnl_hw_stats64 *stats);
void
netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru);
void netdev_offload_xstats_push_delta(struct net_device *dev,
                                      enum netdev_offload_xstats_type type,
                                      const struct rtnl_hw_stats64 *stats);

static inline void netdev_notifier_info_init(struct netdev_notifier_info *info,
                                             struct net_device *dev)
{
        info->dev = dev;
        info->extack = NULL;
}

static inline struct net_device *
netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
{
        return info->dev;
}

static inline struct netlink_ext_ack *
netdev_notifier_info_to_extack(const struct netdev_notifier_info *info)
{
        return info->extack;
}

int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
int call_netdevice_notifiers_info(unsigned long val,
                                  struct netdev_notifier_info *info);

#define for_each_netdev(net, d)                \
                list_for_each_entry(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_reverse(net, d)        \
                list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_rcu(net, d)                \
                list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_safe(net, d, n)        \
                list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
#define for_each_netdev_continue(net, d)                \
                list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_continue_reverse(net, d)                \
                list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \
                                                     dev_list)
#define for_each_netdev_continue_rcu(net, d)                \
        list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_in_bond_rcu(bond, slave)        \
                for_each_netdev_rcu(&init_net, slave)        \
                        if (netdev_master_upper_dev_get_rcu(slave) == (bond))
#define net_device_entry(lh)        list_entry(lh, struct net_device, dev_list)

#define for_each_netdev_dump(net, d, ifindex)                                \
        xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))

static inline struct net_device *next_net_device(struct net_device *dev)
{
        struct list_head *lh;
        struct net *net;

        net = dev_net(dev);
        lh = dev->dev_list.next;
        return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
}

static inline struct net_device *next_net_device_rcu(struct net_device *dev)
{
        struct list_head *lh;
        struct net *net;

        net = dev_net(dev);
        lh = rcu_dereference(list_next_rcu(&dev->dev_list));
        return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
}

static inline struct net_device *first_net_device(struct net *net)
{
        return list_empty(&net->dev_base_head) ? NULL :
                net_device_entry(net->dev_base_head.next);
}

static inline struct net_device *first_net_device_rcu(struct net *net)
{
        struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head));

        return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
}

int netdev_boot_setup_check(struct net_device *dev);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                       const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
void dev_add_pack(struct packet_type *pt);
void dev_remove_pack(struct packet_type *pt);
void __dev_remove_pack(struct packet_type *pt);
void dev_add_offload(struct packet_offload *po);
void dev_remove_offload(struct packet_offload *po);

int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
                          struct net_device_path_stack *stack);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
                                      unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
struct net_device *__dev_get_by_name(struct net *net, const char *name);
bool netdev_name_in_use(struct net *net, const char *name);
int dev_alloc_name(struct net_device *dev, const char *name);
int dev_open(struct net_device *dev, struct netlink_ext_ack *extack);
void dev_close(struct net_device *dev);
void dev_close_many(struct list_head *head, bool unlink);
void dev_disable_lro(struct net_device *dev);
int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev);
u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
                       struct net_device *sb_dev);

int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id);

static inline int dev_queue_xmit(struct sk_buff *skb)
{
        return __dev_queue_xmit(skb, NULL);
}

static inline int dev_queue_xmit_accel(struct sk_buff *skb,
                                       struct net_device *sb_dev)
{
        return __dev_queue_xmit(skb, sb_dev);
}

static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
        int ret;

        ret = __dev_direct_xmit(skb, queue_id);
        if (!dev_xmit_complete(ret))
                kfree_skb(skb);
        return ret;
}

int register_netdevice(struct net_device *dev);
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
void unregister_netdevice_many(struct list_head *head);
static inline void unregister_netdevice(struct net_device *dev)
{
        unregister_netdevice_queue(dev, NULL);
}

int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
void netdev_freemem(struct net_device *dev);
void init_dummy_netdev(struct net_device *dev);

struct net_device *netdev_get_xmit_slave(struct net_device *dev,
                                         struct sk_buff *skb,
                                         bool all_slaves);
struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
                                            struct sock *sk);
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *netdev_get_by_index(struct net *net, int ifindex,
                                       netdevice_tracker *tracker, gfp_t gfp);
struct net_device *netdev_get_by_name(struct net *net, const char *name,
                                      netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);

static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
                                  unsigned short type,
                                  const void *daddr, const void *saddr,
                                  unsigned int len)
{
        if (!dev->header_ops || !dev->header_ops->create)
                return 0;

        return dev->header_ops->create(skb, dev, type, daddr, saddr, len);
}

static inline int dev_parse_header(const struct sk_buff *skb,
                                   unsigned char *haddr)
{
        const struct net_device *dev = skb->dev;

        if (!dev->header_ops || !dev->header_ops->parse)
                return 0;
        return dev->header_ops->parse(skb, haddr);
}

static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb)
{
        const struct net_device *dev = skb->dev;

        if (!dev->header_ops || !dev->header_ops->parse_protocol)
                return 0;
        return dev->header_ops->parse_protocol(skb);
}

/* ll_header must have at least hard_header_len allocated */
static inline bool dev_validate_header(const struct net_device *dev,
                                       char *ll_header, int len)
{
        if (likely(len >= dev->hard_header_len))
                return true;
        if (len < dev->min_header_len)
                return false;

        if (capable(CAP_SYS_RAWIO)) {
                memset(ll_header + len, 0, dev->hard_header_len - len);
                return true;
        }

        if (dev->header_ops && dev->header_ops->validate)
                return dev->header_ops->validate(ll_header, len);

        return false;
}

static inline bool dev_has_header(const struct net_device *dev)
{
        return dev->header_ops && dev->header_ops->create;
}

/*
 * Incoming packets are placed on per-CPU queues
 */
struct softnet_data {
        struct list_head        poll_list;
        struct sk_buff_head        process_queue;

        /* stats */
        unsigned int                processed;
        unsigned int                time_squeeze;
#ifdef CONFIG_RPS
        struct softnet_data        *rps_ipi_list;
#endif

        bool                        in_net_rx_action;
        bool                        in_napi_threaded_poll;

#ifdef CONFIG_NET_FLOW_LIMIT
        struct sd_flow_limit __rcu *flow_limit;
#endif
        struct Qdisc                *output_queue;
        struct Qdisc                **output_queue_tailp;
        struct sk_buff                *completion_queue;
#ifdef CONFIG_XFRM_OFFLOAD
        struct sk_buff_head        xfrm_backlog;
#endif
        /* written and read only by owning cpu: */
        struct {
                u16 recursion;
                u8  more;
#ifdef CONFIG_NET_EGRESS
                u8  skip_txqueue;
#endif
        } xmit;
#ifdef CONFIG_RPS
        /* input_queue_head should be written by cpu owning this struct,
         * and only read by other cpus. Worth using a cache line.
         */
        unsigned int                input_queue_head ____cacheline_aligned_in_smp;

        /* Elements below can be accessed between CPUs for RPS/RFS */
        call_single_data_t        csd ____cacheline_aligned_in_smp;
        struct softnet_data        *rps_ipi_next;
        unsigned int                cpu;
        unsigned int                input_queue_tail;
#endif
        unsigned int                received_rps;
        unsigned int                dropped;
        struct sk_buff_head        input_pkt_queue;
        struct napi_struct        backlog;

        /* Another possibly contended cache line */
        spinlock_t                defer_lock ____cacheline_aligned_in_smp;
        int                        defer_count;
        int                        defer_ipi_scheduled;
        struct sk_buff                *defer_list;
        call_single_data_t        defer_csd;
};

static inline void input_queue_head_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
        sd->input_queue_head++;
#endif
}

static inline void input_queue_tail_incr_save(struct softnet_data *sd,
                                              unsigned int *qtail)
{
#ifdef CONFIG_RPS
        *qtail = ++sd->input_queue_tail;
#endif
}

DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

static inline int dev_recursion_level(void)
{
        return this_cpu_read(softnet_data.xmit.recursion);
}

#define XMIT_RECURSION_LIMIT        8
static inline bool dev_xmit_recursion(void)
{
        return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
                        XMIT_RECURSION_LIMIT);
}

static inline void dev_xmit_recursion_inc(void)
{
        __this_cpu_inc(softnet_data.xmit.recursion);
}

static inline void dev_xmit_recursion_dec(void)
{
        __this_cpu_dec(softnet_data.xmit.recursion);
}

void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);

static inline void netif_tx_schedule_all(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++)
                netif_schedule_queue(netdev_get_tx_queue(dev, i));
}

static __always_inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
{
        clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}

/**
 *        netif_start_queue - allow transmit
 *        @dev: network device
 *
 *        Allow upper layers to call the device hard_start_xmit routine.
 */
static inline void netif_start_queue(struct net_device *dev)
{
        netif_tx_start_queue(netdev_get_tx_queue(dev, 0));
}

static inline void netif_tx_start_all_queues(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
                netif_tx_start_queue(txq);
        }
}

void netif_tx_wake_queue(struct netdev_queue *dev_queue);

/**
 *        netif_wake_queue - restart transmit
 *        @dev: network device
 *
 *        Allow upper layers to call the device hard_start_xmit routine.
 *        Used for flow control when transmit resources are available.
 */
static inline void netif_wake_queue(struct net_device *dev)
{
        netif_tx_wake_queue(netdev_get_tx_queue(dev, 0));
}

static inline void netif_tx_wake_all_queues(struct net_device *dev)
{
        unsigned int i;

        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
                netif_tx_wake_queue(txq);
        }
}

static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
{
        /* Must be an atomic op see netif_txq_try_stop() */
        set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}

/**
 *        netif_stop_queue - stop transmitted packets
 *        @dev: network device
 *
 *        Stop upper layers calling the device hard_start_xmit routine.
 *        Used for flow control when transmit resources are unavailable.
 */
static inline void netif_stop_queue(struct net_device *dev)
{
        netif_tx_stop_queue(netdev_get_tx_queue(dev, 0));
}

void netif_tx_stop_all_queues(struct net_device *dev);

static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
{
        return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}

/**
 *        netif_queue_stopped - test if transmit queue is flowblocked
 *        @dev: network device
 *
 *        Test if transmit queue on device is currently unable to send.
 */
static inline bool netif_queue_stopped(const struct net_device *dev)
{
        return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
}

static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue)
{
        return dev_queue->state & QUEUE_STATE_ANY_XOFF;
}

static inline bool
netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
{
        return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
}

static inline bool
netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
{
        return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
}

/**
 *        netdev_queue_set_dql_min_limit - set dql minimum limit
 *        @dev_queue: pointer to transmit queue
 *        @min_limit: dql minimum limit
 *
 * Forces xmit_more() to return true until the minimum threshold
 * defined by @min_limit is reached (or until the tx queue is
 * empty). Warning: to be use with care, misuse will impact the
 * latency.
 */
static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue,
                                                  unsigned int min_limit)
{
#ifdef CONFIG_BQL
        dev_queue->dql.min_limit = min_limit;
#endif
}

static inline int netdev_queue_dql_avail(const struct netdev_queue *txq)
{
#ifdef CONFIG_BQL
        /* Non-BQL migrated drivers will return 0, too. */
        return dql_avail(&txq->dql);
#else
        return 0;
#endif
}

/**
 *        netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
 *        @dev_queue: pointer to transmit queue
 *
 * BQL enabled drivers might use this helper in their ndo_start_xmit(),
 * to give appropriate hint to the CPU.
 */
static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
{
#ifdef CONFIG_BQL
        prefetchw(&dev_queue->dql.num_queued);
#endif
}

/**
 *        netdev_txq_bql_complete_prefetchw - prefetch bql data for write
 *        @dev_queue: pointer to transmit queue
 *
 * BQL enabled drivers might use this helper in their TX completion path,
 * to give appropriate hint to the CPU.
 */
static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
{
#ifdef CONFIG_BQL
        prefetchw(&dev_queue->dql.limit);
#endif
}

/**
 *        netdev_tx_sent_queue - report the number of bytes queued to a given tx queue
 *        @dev_queue: network device queue
 *        @bytes: number of bytes queued to the device queue
 *
 *        Report the number of bytes queued for sending/completion to the network
 *        device hardware queue. @bytes should be a good approximation and should
 *        exactly match netdev_completed_queue() @bytes.
 *        This is typically called once per packet, from ndo_start_xmit().
 */
static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
                                        unsigned int bytes)
{
#ifdef CONFIG_BQL
        dql_queued(&dev_queue->dql, bytes);

        if (likely(dql_avail(&dev_queue->dql) >= 0))
                return;

        set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);

        /*
         * The XOFF flag must be set before checking the dql_avail below,
         * because in netdev_tx_completed_queue we update the dql_completed
         * before checking the XOFF flag.
         */
        smp_mb();

        /* check again in case another CPU has just made room avail */
        if (unlikely(dql_avail(&dev_queue->dql) >= 0))
                clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
#endif
}

/* Variant of netdev_tx_sent_queue() for drivers that are aware
 * that they should not test BQL status themselves.
 * We do want to change __QUEUE_STATE_STACK_XOFF only for the last
 * skb of a batch.
 * Returns true if the doorbell must be used to kick the NIC.
 */
static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
                                          unsigned int bytes,
                                          bool xmit_more)
{
        if (xmit_more) {
#ifdef CONFIG_BQL
                dql_queued(&dev_queue->dql, bytes);
#endif
                return netif_tx_queue_stopped(dev_queue);
        }
        netdev_tx_sent_queue(dev_queue, bytes);
        return true;
}

/**
 *        netdev_sent_queue - report the number of bytes queued to hardware
 *        @dev: network device
 *        @bytes: number of bytes queued to the hardware device queue
 *
 *        Report the number of bytes queued for sending/completion to the network
 *        device hardware queue#0. @bytes should be a good approximation and should
 *        exactly match netdev_completed_queue() @bytes.
 *        This is typically called once per packet, from ndo_start_xmit().
 */
static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
{
        netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
}

static inline bool __netdev_sent_queue(struct net_device *dev,
                                       unsigned int bytes,
                                       bool xmit_more)
{
        return __netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes,
                                      xmit_more);
}

/**
 *        netdev_tx_completed_queue - report number of packets/bytes at TX completion.
 *        @dev_queue: network device queue
 *        @pkts: number of packets (currently ignored)
 *        @bytes: number of bytes dequeued from the device queue
 *
 *        Must be called at most once per TX completion round (and not per
 *        individual packet), so that BQL can adjust its limits appropriately.
 */
static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
                                             unsigned int pkts, unsigned int bytes)
{
#ifdef CONFIG_BQL
        if (unlikely(!bytes))
                return;

        dql_completed(&dev_queue->dql, bytes);

        /*
         * Without the memory barrier there is a small possiblity that
         * netdev_tx_sent_queue will miss the update and cause the queue to
         * be stopped forever
         */
        smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */

        if (unlikely(dql_avail(&dev_queue->dql) < 0))
                return;

        if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state))
                netif_schedule_queue(dev_queue);
#endif
}

/**
 *         netdev_completed_queue - report bytes and packets completed by device
 *         @dev: network device
 *         @pkts: actual number of packets sent over the medium
 *         @bytes: actual number of bytes sent over the medium
 *
 *         Report the number of bytes and packets transmitted by the network device
 *         hardware queue over the physical medium, @bytes must exactly match the
 *         @bytes amount passed to netdev_sent_queue()
 */
static inline void netdev_completed_queue(struct net_device *dev,
                                          unsigned int pkts, unsigned int bytes)
{
        netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
}

static inline void netdev_tx_reset_queue(struct netdev_queue *q)
{
#ifdef CONFIG_BQL
        clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
        dql_reset(&q->dql);
#endif
}

/**
 *         netdev_reset_queue - reset the packets and bytes count of a network device
 *         @dev_queue: network device
 *
 *         Reset the bytes and packet count of a network device and clear the
 *         software flow control OFF bit for this network device
 */
static inline void netdev_reset_queue(struct net_device *dev_queue)
{
        netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
}

/**
 *         netdev_cap_txqueue - check if selected tx queue exceeds device queues
 *         @dev: network device
 *         @queue_index: given tx queue index
 *
 *         Returns 0 if given tx queue index >= number of device tx queues,
 *         otherwise returns the originally passed tx queue index.
 */
static inline u16 netdev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
        if (unlikely(queue_index >= dev->real_num_tx_queues)) {
                net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
                                     dev->name, queue_index,
                                     dev->real_num_tx_queues);
                return 0;
        }

        return queue_index;
}

/**
 *        netif_running - test if up
 *        @dev: network device
 *
 *        Test if the device has been brought up.
 */
static inline bool netif_running(const struct net_device *dev)
{
        return test_bit(__LINK_STATE_START, &dev->state);
}

/*
 * Routines to manage the subqueues on a device.  We only need start,
 * stop, and a check if it's stopped.  All other device management is
 * done at the overall netdevice level.
 * Also test the device if we're multiqueue.
 */

/**
 *        netif_start_subqueue - allow sending packets on subqueue
 *        @dev: network device
 *        @queue_index: sub queue index
 *
 * Start individual transmit queue of a device with multiple transmit queues.
 */
static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
{
        struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);

        netif_tx_start_queue(txq);
}

/**
 *        netif_stop_subqueue - stop sending packets on subqueue
 *        @dev: network device
 *        @queue_index: sub queue index
 *
 * Stop individual transmit queue of a device with multiple transmit queues.
 */
static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
{
        struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
        netif_tx_stop_queue(txq);
}

/**
 *        __netif_subqueue_stopped - test status of subqueue
 *        @dev: network device
 *        @queue_index: sub queue index
 *
 * Check individual transmit queue of a device with multiple transmit queues.
 */
static inline bool __netif_subqueue_stopped(const struct net_device *dev,
                                            u16 queue_index)
{
        struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);

        return netif_tx_queue_stopped(txq);
}

/**
 *        netif_subqueue_stopped - test status of subqueue
 *        @dev: network device
 *        @skb: sub queue buffer pointer
 *
 * Check individual transmit queue of a device with multiple transmit queues.
 */
static inline bool netif_subqueue_stopped(const struct net_device *dev,
                                          struct sk_buff *skb)
{
        return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
}

/**
 *        netif_wake_subqueue - allow sending packets on subqueue
 *        @dev: network device
 *        @queue_index: sub queue index
 *
 * Resume individual transmit queue of a device with multiple transmit queues.
 */
static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
{
        struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);

        netif_tx_wake_queue(txq);
}

#ifdef CONFIG_XPS
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        u16 index);
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
                          u16 index, enum xps_map_type type);

/**
 *        netif_attr_test_mask - Test a CPU or Rx queue set in a mask
 *        @j: CPU/Rx queue index
 *        @mask: bitmask of all cpus/rx queues
 *        @nr_bits: number of bits in the bitmask
 *
 * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
 */
static inline bool netif_attr_test_mask(unsigned long j,
                                        const unsigned long *mask,
                                        unsigned int nr_bits)
{
        cpu_max_bits_warn(j, nr_bits);
        return test_bit(j, mask);
}

/**
 *        netif_attr_test_online - Test for online CPU/Rx queue
 *        @j: CPU/Rx queue index
 *        @online_mask: bitmask for CPUs/Rx queues that are online
 *        @nr_bits: number of bits in the bitmask
 *
 * Returns true if a CPU/Rx queue is online.
 */
static inline bool netif_attr_test_online(unsigned long j,
                                          const unsigned long *online_mask,
                                          unsigned int nr_bits)
{
        cpu_max_bits_warn(j, nr_bits);

        if (online_mask)
                return test_bit(j, online_mask);

        return (j < nr_bits);
}

/**
 *        netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
 *        @n: CPU/Rx queue index
 *        @srcp: the cpumask/Rx queue mask pointer
 *        @nr_bits: number of bits in the bitmask
 *
 * Returns >= nr_bits if no further CPUs/Rx queues set.
 */
static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
                                               unsigned int nr_bits)
{
        /* -1 is a legal arg here. */
        if (n != -1)
                cpu_max_bits_warn(n, nr_bits);

        if (srcp)
                return find_next_bit(srcp, nr_bits, n + 1);

        return n + 1;
}

/**
 *        netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p
 *        @n: CPU/Rx queue index
 *        @src1p: the first CPUs/Rx queues mask pointer
 *        @src2p: the second CPUs/Rx queues mask pointer
 *        @nr_bits: number of bits in the bitmask
 *
 * Returns >= nr_bits if no further CPUs/Rx queues set in both.
 */
static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
                                          const unsigned long *src2p,
                                          unsigned int nr_bits)
{
        /* -1 is a legal arg here. */
        if (n != -1)
                cpu_max_bits_warn(n, nr_bits);

        if (src1p && src2p)
                return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
        else if (src1p)
                return find_next_bit(src1p, nr_bits, n + 1);
        else if (src2p)
                return find_next_bit(src2p, nr_bits, n + 1);

        return n + 1;
}
#else
static inline int netif_set_xps_queue(struct net_device *dev,
                                      const struct cpumask *mask,
                                      u16 index)
{
        return 0;
}

static inline int __netif_set_xps_queue(struct net_device *dev,
                                        const unsigned long *mask,
                                        u16 index, enum xps_map_type type)
{
        return 0;
}
#endif

/**
 *        netif_is_multiqueue - test if device has multiple transmit queues
 *        @dev: network device
 *
 * Check if device has multiple transmit queues
 */
static inline bool netif_is_multiqueue(const struct net_device *dev)
{
        return dev->num_tx_queues > 1;
}

int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);

#ifdef CONFIG_SYSFS
int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
#else
static inline int netif_set_real_num_rx_queues(struct net_device *dev,
                                                unsigned int rxqs)
{
        dev->real_num_rx_queues = rxqs;
        return 0;
}
#endif
int netif_set_real_num_queues(struct net_device *dev,
                              unsigned int txq, unsigned int rxq);

int netif_get_num_default_rss_queues(void);

void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason);
void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason);

/*
 * It is not allowed to call kfree_skb() or consume_skb() from hardware
 * interrupt context or with hardware interrupts being disabled.
 * (in_hardirq() || irqs_disabled())
 *
 * We provide four helpers that can be used in following contexts :
 *
 * dev_kfree_skb_irq(skb) when caller drops a packet from irq context,
 *  replacing kfree_skb(skb)
 *
 * dev_consume_skb_irq(skb) when caller consumes a packet from irq context.
 *  Typically used in place of consume_skb(skb) in TX completion path
 *
 * dev_kfree_skb_any(skb) when caller doesn't know its current irq context,
 *  replacing kfree_skb(skb)
 *
 * dev_consume_skb_any(skb) when caller doesn't know its current irq context,
 *  and consumed a packet. Used in place of consume_skb(skb)
 */
static inline void dev_kfree_skb_irq(struct sk_buff *skb)
{
        dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

static inline void dev_consume_skb_irq(struct sk_buff *skb)
{
        dev_kfree_skb_irq_reason(skb, SKB_CONSUMED);
}

static inline void dev_kfree_skb_any(struct sk_buff *skb)
{
        dev_kfree_skb_any_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

static inline void dev_consume_skb_any(struct sk_buff *skb)
{
        dev_kfree_skb_any_reason(skb, SKB_CONSUMED);
}

u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
                             struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb);
int netif_rx(struct sk_buff *skb);
int __netif_rx(struct sk_buff *skb);

int netif_receive_skb(struct sk_buff *skb);
int netif_receive_skb_core(struct sk_buff *skb);
void netif_receive_skb_list_internal(struct list_head *head);
void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);

static inline void napi_free_frags(struct napi_struct *napi)
{
        kfree_skb(napi->skb);
        napi->skb = NULL;
}

bool netdev_is_rx_handler_busy(struct net_device *dev);
int netdev_rx_handler_register(struct net_device *dev,
                               rx_handler_func_t *rx_handler,
                               void *rx_handler_data);
void netdev_rx_handler_unregister(struct net_device *dev);

bool dev_valid_name(const char *name);
static inline bool is_socket_ioctl_cmd(unsigned int cmd)
{
        return _IOC_TYPE(cmd) == SOCK_IOC_TYPE;
}
int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg);
int put_user_ifreq(struct ifreq *ifr, void __user *arg);
int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
                void __user *data, bool *need_copyout);
int dev_ifconf(struct net *net, struct ifconf __user *ifc);
int generic_hwtstamp_get_lower(struct net_device *dev,
                               struct kernel_hwtstamp_config *kernel_cfg);
int generic_hwtstamp_set_lower(struct net_device *dev,
                               struct kernel_hwtstamp_config *kernel_cfg,
                               struct netlink_ext_ack *extack);
int dev_set_hwtstamp_phylib(struct net_device *dev,
                            struct kernel_hwtstamp_config *cfg,
                            struct netlink_ext_ack *extack);
int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
                       struct netlink_ext_ack *extack);
int dev_change_flags(struct net_device *dev, unsigned int flags,
                     struct netlink_ext_ack *extack);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
                               const char *pat, int new_ifindex);
static inline
int dev_change_net_namespace(struct net_device *dev, struct net *net,
                             const char *pat)
{
        return __dev_change_net_namespace(dev, net, pat, 0);
}
int __dev_set_mtu(struct net_device *, int);
int dev_set_mtu(struct net_device *, int);
int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
                              struct netlink_ext_ack *extack);
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
                        struct netlink_ext_ack *extack);
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
                             struct netlink_ext_ack *extack);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
int dev_get_port_parent_id(struct net_device *dev,
                           struct netdev_phys_item_id *ppid, bool recurse);
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);

struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                    struct netdev_queue *txq, int *ret);

int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);

int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
                        const struct sk_buff *skb);

static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
                                                 const struct sk_buff *skb,
                                                 const bool check_mtu)
{
        const u32 vlan_hdr_len = 4; /* VLAN_HLEN */
        unsigned int len;

        if (!(dev->flags & IFF_UP))
                return false;

        if (!check_mtu)
                return true;

        len = dev->mtu + dev->hard_header_len + vlan_hdr_len;
        if (skb->len <= len)
                return true;

        /* if TSO is enabled, we don't care about the length as the packet
         * could be forwarded without being segmented before
         */
        if (skb_is_gso(skb))
                return true;

        return false;
}

void netdev_core_stats_inc(struct net_device *dev, u32 offset);

#define DEV_CORE_STATS_INC(FIELD)                                                \
static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
{                                                                                \
        netdev_core_stats_inc(dev,                                                \
                        offsetof(struct net_device_core_stats, FIELD));                \
}
DEV_CORE_STATS_INC(rx_dropped)
DEV_CORE_STATS_INC(tx_dropped)
DEV_CORE_STATS_INC(rx_nohandler)
DEV_CORE_STATS_INC(rx_otherhost_dropped)
#undef DEV_CORE_STATS_INC

static __always_inline int ____dev_forward_skb(struct net_device *dev,
                                               struct sk_buff *skb,
                                               const bool check_mtu)
{
        if (skb_orphan_frags(skb, GFP_ATOMIC) ||
            unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) {
                dev_core_stats_rx_dropped_inc(dev);
                kfree_skb(skb);
                return NET_RX_DROP;
        }

        skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev)));
        skb->priority = 0;
        return 0;
}

bool dev_nit_active(struct net_device *dev);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);

static inline void __dev_put(struct net_device *dev)
{
        if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
                this_cpu_dec(*dev->pcpu_refcnt);
#else
                refcount_dec(&dev->dev_refcnt);
#endif
        }
}

static inline void __dev_hold(struct net_device *dev)
{
        if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
                this_cpu_inc(*dev->pcpu_refcnt);
#else
                refcount_inc(&dev->dev_refcnt);
#endif
        }
}

static inline void __netdev_tracker_alloc(struct net_device *dev,
                                          netdevice_tracker *tracker,
                                          gfp_t gfp)
{
#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
        ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
#endif
}

/* netdev_tracker_alloc() can upgrade a prior untracked reference
 * taken by dev_get_by_name()/dev_get_by_index() to a tracked one.
 */
static inline void netdev_tracker_alloc(struct net_device *dev,
                                        netdevice_tracker *tracker, gfp_t gfp)
{
#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
        refcount_dec(&dev->refcnt_tracker.no_tracker);
        __netdev_tracker_alloc(dev, tracker, gfp);
#endif
}

static inline void netdev_tracker_free(struct net_device *dev,
                                       netdevice_tracker *tracker)
{
#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
        ref_tracker_free(&dev->refcnt_tracker, tracker);
#endif
}

static inline void netdev_hold(struct net_device *dev,
                               netdevice_tracker *tracker, gfp_t gfp)
{
        if (dev) {
                __dev_hold(dev);
                __netdev_tracker_alloc(dev, tracker, gfp);
        }
}

static inline void netdev_put(struct net_device *dev,
                              netdevice_tracker *tracker)
{
        if (dev) {
                netdev_tracker_free(dev, tracker);
                __dev_put(dev);
        }
}

/**
 *        dev_hold - get reference to device
 *        @dev: network device
 *
 * Hold reference to device to keep it from being freed.
 * Try using netdev_hold() instead.
 */
static inline void dev_hold(struct net_device *dev)
{
        netdev_hold(dev, NULL, GFP_ATOMIC);
}

/**
 *        dev_put - release reference to device
 *        @dev: network device
 *
 * Release reference to device to allow it to be freed.
 * Try using netdev_put() instead.
 */
static inline void dev_put(struct net_device *dev)
{
        netdev_put(dev, NULL);
}

static inline void netdev_ref_replace(struct net_device *odev,
                                      struct net_device *ndev,
                                      netdevice_tracker *tracker,
                                      gfp_t gfp)
{
        if (odev)
                netdev_tracker_free(odev, tracker);

        __dev_hold(ndev);
        __dev_put(odev);

        if (ndev)
                __netdev_tracker_alloc(ndev, tracker, gfp);
}

/* Carrier loss detection, dial on demand. The functions netif_carrier_on
 * and _off may be called from IRQ context, but it is caller
 * who is responsible for serialization of these calls.
 *
 * The name carrier is inappropriate, these functions should really be
 * called netif_lowerlayer_*() because they represent the state of any
 * kind of lower layer not just hardware media.
 */
void linkwatch_fire_event(struct net_device *dev);

/**
 * linkwatch_sync_dev - sync linkwatch for the given device
 * @dev: network device to sync linkwatch for
 *
 * Sync linkwatch for the given device, removing it from the
 * pending work list (if queued).
 */
void linkwatch_sync_dev(struct net_device *dev);

/**
 *        netif_carrier_ok - test if carrier present
 *        @dev: network device
 *
 * Check if carrier is present on device
 */
static inline bool netif_carrier_ok(const struct net_device *dev)
{
        return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
}

unsigned long dev_trans_start(struct net_device *dev);

void __netdev_watchdog_up(struct net_device *dev);

void netif_carrier_on(struct net_device *dev);
void netif_carrier_off(struct net_device *dev);
void netif_carrier_event(struct net_device *dev);

/**
 *        netif_dormant_on - mark device as dormant.
 *        @dev: network device
 *
 * Mark device as dormant (as per RFC2863).
 *
 * The dormant state indicates that the relevant interface is not
 * actually in a condition to pass packets (i.e., it is not 'up') but is
 * in a "pending" state, waiting for some external event.  For "on-
 * demand" interfaces, this new state identifies the situation where the
 * interface is waiting for events to place it in the up state.
 */
static inline void netif_dormant_on(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state))
                linkwatch_fire_event(dev);
}

/**
 *        netif_dormant_off - set device as not dormant.
 *        @dev: network device
 *
 * Device is not in dormant state.
 */
static inline void netif_dormant_off(struct net_device *dev)
{
        if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state))
                linkwatch_fire_event(dev);
}

/**
 *        netif_dormant - test if device is dormant
 *        @dev: network device
 *
 * Check if device is dormant.
 */
static inline bool netif_dormant(const struct net_device *dev)
{
        return test_bit(__LINK_STATE_DORMANT, &dev->state);
}


/**
 *        netif_testing_on - mark device as under test.
 *        @dev: network device
 *
 * Mark device as under test (as per RFC2863).
 *
 * The testing state indicates that some test(s) must be performed on
 * the interface. After completion, of the test, the interface state
 * will change to up, dormant, or down, as appropriate.
 */
static inline void netif_testing_on(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state))
                linkwatch_fire_event(dev);
}

/**
 *        netif_testing_off - set device as not under test.
 *        @dev: network device
 *
 * Device is not in testing state.
 */
static inline void netif_testing_off(struct net_device *dev)
{
        if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state))
                linkwatch_fire_event(dev);
}

/**
 *        netif_testing - test if device is under test
 *        @dev: network device
 *
 * Check if device is under test
 */
static inline bool netif_testing(const struct net_device *dev)
{
        return test_bit(__LINK_STATE_TESTING, &dev->state);
}


/**
 *        netif_oper_up - test if device is operational
 *        @dev: network device
 *
 * Check if carrier is operational
 */
static inline bool netif_oper_up(const struct net_device *dev)
{
        unsigned int operstate = READ_ONCE(dev->operstate);

        return        operstate == IF_OPER_UP ||
                operstate == IF_OPER_UNKNOWN /* backward compat */;
}

/**
 *        netif_device_present - is device available or removed
 *        @dev: network device
 *
 * Check if device has not been removed from system.
 */
static inline bool netif_device_present(const struct net_device *dev)
{
        return test_bit(__LINK_STATE_PRESENT, &dev->state);
}

void netif_device_detach(struct net_device *dev);

void netif_device_attach(struct net_device *dev);

/*
 * Network interface message level settings
 */

enum {
        NETIF_MSG_DRV_BIT,
        NETIF_MSG_PROBE_BIT,
        NETIF_MSG_LINK_BIT,
        NETIF_MSG_TIMER_BIT,
        NETIF_MSG_IFDOWN_BIT,
        NETIF_MSG_IFUP_BIT,
        NETIF_MSG_RX_ERR_BIT,
        NETIF_MSG_TX_ERR_BIT,
        NETIF_MSG_TX_QUEUED_BIT,
        NETIF_MSG_INTR_BIT,
        NETIF_MSG_TX_DONE_BIT,
        NETIF_MSG_RX_STATUS_BIT,
        NETIF_MSG_PKTDATA_BIT,
        NETIF_MSG_HW_BIT,
        NETIF_MSG_WOL_BIT,

        /* When you add a new bit above, update netif_msg_class_names array
         * in net/ethtool/common.c
         */
        NETIF_MSG_CLASS_COUNT,
};
/* Both ethtool_ops interface and internal driver implementation use u32 */
static_assert(NETIF_MSG_CLASS_COUNT <= 32);

#define __NETIF_MSG_BIT(bit)        ((u32)1 << (bit))
#define __NETIF_MSG(name)        __NETIF_MSG_BIT(NETIF_MSG_ ## name ## _BIT)

#define NETIF_MSG_DRV                __NETIF_MSG(DRV)
#define NETIF_MSG_PROBE                __NETIF_MSG(PROBE)
#define NETIF_MSG_LINK                __NETIF_MSG(LINK)
#define NETIF_MSG_TIMER                __NETIF_MSG(TIMER)
#define NETIF_MSG_IFDOWN        __NETIF_MSG(IFDOWN)
#define NETIF_MSG_IFUP                __NETIF_MSG(IFUP)
#define NETIF_MSG_RX_ERR        __NETIF_MSG(RX_ERR)
#define NETIF_MSG_TX_ERR        __NETIF_MSG(TX_ERR)
#define NETIF_MSG_TX_QUEUED        __NETIF_MSG(TX_QUEUED)
#define NETIF_MSG_INTR                __NETIF_MSG(INTR)
#define NETIF_MSG_TX_DONE        __NETIF_MSG(TX_DONE)
#define NETIF_MSG_RX_STATUS        __NETIF_MSG(RX_STATUS)
#define NETIF_MSG_PKTDATA        __NETIF_MSG(PKTDATA)
#define NETIF_MSG_HW                __NETIF_MSG(HW)
#define NETIF_MSG_WOL                __NETIF_MSG(WOL)

#define netif_msg_drv(p)        ((p)->msg_enable & NETIF_MSG_DRV)
#define netif_msg_probe(p)        ((p)->msg_enable & NETIF_MSG_PROBE)
#define netif_msg_link(p)        ((p)->msg_enable & NETIF_MSG_LINK)
#define netif_msg_timer(p)        ((p)->msg_enable & NETIF_MSG_TIMER)
#define netif_msg_ifdown(p)        ((p)->msg_enable & NETIF_MSG_IFDOWN)
#define netif_msg_ifup(p)        ((p)->msg_enable & NETIF_MSG_IFUP)
#define netif_msg_rx_err(p)        ((p)->msg_enable & NETIF_MSG_RX_ERR)
#define netif_msg_tx_err(p)        ((p)->msg_enable & NETIF_MSG_TX_ERR)
#define netif_msg_tx_queued(p)        ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
#define netif_msg_intr(p)        ((p)->msg_enable & NETIF_MSG_INTR)
#define netif_msg_tx_done(p)        ((p)->msg_enable & NETIF_MSG_TX_DONE)
#define netif_msg_rx_status(p)        ((p)->msg_enable & NETIF_MSG_RX_STATUS)
#define netif_msg_pktdata(p)        ((p)->msg_enable & NETIF_MSG_PKTDATA)
#define netif_msg_hw(p)                ((p)->msg_enable & NETIF_MSG_HW)
#define netif_msg_wol(p)        ((p)->msg_enable & NETIF_MSG_WOL)

static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
{
        /* use default */
        if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
                return default_msg_enable_bits;
        if (debug_value == 0)        /* no output */
                return 0;
        /* set low N bits */
        return (1U << debug_value) - 1;
}

static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
{
        spin_lock(&txq->_xmit_lock);
        /* Pairs with READ_ONCE() in __dev_queue_xmit() */
        WRITE_ONCE(txq->xmit_lock_owner, cpu);
}

static inline bool __netif_tx_acquire(struct netdev_queue *txq)
{
        __acquire(&txq->_xmit_lock);
        return true;
}

static inline void __netif_tx_release(struct netdev_queue *txq)
{
        __release(&txq->_xmit_lock);
}

static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
        spin_lock_bh(&txq->_xmit_lock);
        /* Pairs with READ_ONCE() in __dev_queue_xmit() */
        WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
}

static inline bool __netif_tx_trylock(struct netdev_queue *txq)
{
        bool ok = spin_trylock(&txq->_xmit_lock);

        if (likely(ok)) {
                /* Pairs with READ_ONCE() in __dev_queue_xmit() */
                WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
        }
        return ok;
}

static inline void __netif_tx_unlock(struct netdev_queue *txq)
{
        /* Pairs with READ_ONCE() in __dev_queue_xmit() */
        WRITE_ONCE(txq->xmit_lock_owner, -1);
        spin_unlock(&txq->_xmit_lock);
}

static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
{
        /* Pairs with READ_ONCE() in __dev_queue_xmit() */
        WRITE_ONCE(txq->xmit_lock_owner, -1);
        spin_unlock_bh(&txq->_xmit_lock);
}

/*
 * txq->trans_start can be read locklessly from dev_watchdog()
 */
static inline void txq_trans_update(struct netdev_queue *txq)
{
        if (txq->xmit_lock_owner != -1)
                WRITE_ONCE(txq->trans_start, jiffies);
}

static inline void txq_trans_cond_update(struct netdev_queue *txq)
{
        unsigned long now = jiffies;

        if (READ_ONCE(txq->trans_start) != now)
                WRITE_ONCE(txq->trans_start, now);
}

/* legacy drivers only, netdev_start_xmit() sets txq->trans_start */
static inline void netif_trans_update(struct net_device *dev)
{
        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);

        txq_trans_cond_update(txq);
}

/**
 *        netif_tx_lock - grab network device transmit lock
 *        @dev: network device
 *
 * Get network device transmit lock
 */
void netif_tx_lock(struct net_device *dev);

static inline void netif_tx_lock_bh(struct net_device *dev)
{
        local_bh_disable();
        netif_tx_lock(dev);
}

void netif_tx_unlock(struct net_device *dev);

static inline void netif_tx_unlock_bh(struct net_device *dev)
{
        netif_tx_unlock(dev);
        local_bh_enable();
}

#define HARD_TX_LOCK(dev, txq, cpu) {                        \
        if ((dev->features & NETIF_F_LLTX) == 0) {        \
                __netif_tx_lock(txq, cpu);                \
        } else {                                        \
                __netif_tx_acquire(txq);                \
        }                                                \
}

#define HARD_TX_TRYLOCK(dev, txq)                        \
        (((dev->features & NETIF_F_LLTX) == 0) ?        \
                __netif_tx_trylock(txq) :                \
                __netif_tx_acquire(txq))

#define HARD_TX_UNLOCK(dev, txq) {                        \
        if ((dev->features & NETIF_F_LLTX) == 0) {        \
                __netif_tx_unlock(txq);                        \
        } else {                                        \
                __netif_tx_release(txq);                \
        }                                                \
}

static inline void netif_tx_disable(struct net_device *dev)
{
        unsigned int i;
        int cpu;

        local_bh_disable();
        cpu = smp_processor_id();
        spin_lock(&dev->tx_global_lock);
        for (i = 0; i < dev->num_tx_queues; i++) {
                struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

                __netif_tx_lock(txq, cpu);
                netif_tx_stop_queue(txq);
                __netif_tx_unlock(txq);
        }
        spin_unlock(&dev->tx_global_lock);
        local_bh_enable();
}

static inline void netif_addr_lock(struct net_device *dev)
{
        unsigned char nest_level = 0;

#ifdef CONFIG_LOCKDEP
        nest_level = dev->nested_level;
#endif
        spin_lock_nested(&dev->addr_list_lock, nest_level);
}

static inline void netif_addr_lock_bh(struct net_device *dev)
{
        unsigned char nest_level = 0;

#ifdef CONFIG_LOCKDEP
        nest_level = dev->nested_level;
#endif
        local_bh_disable();
        spin_lock_nested(&dev->addr_list_lock, nest_level);
}

static inline void netif_addr_unlock(struct net_device *dev)
{
        spin_unlock(&dev->addr_list_lock);
}

static inline void netif_addr_unlock_bh(struct net_device *dev)
{
        spin_unlock_bh(&dev->addr_list_lock);
}

/*
 * dev_addrs walker. Should be used only for read access. Call with
 * rcu_read_lock held.
 */
#define for_each_dev_addr(dev, ha) \
                list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list)

/* These functions live elsewhere (drivers/net/net_init.c, but related) */

void ether_setup(struct net_device *dev);

/* Support for loadable net-drivers */
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
                                    unsigned char name_assign_type,
                                    void (*setup)(struct net_device *),
                                    unsigned int txqs, unsigned int rxqs);
#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
        alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)

#define alloc_netdev_mq(sizeof_priv, name, name_assign_type, setup, count) \
        alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, count, \
                         count)

int register_netdev(struct net_device *dev);
void unregister_netdev(struct net_device *dev);

int devm_register_netdev(struct device *dev, struct net_device *ndev);

/* General hardware address lists handling functions */
int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
                   struct netdev_hw_addr_list *from_list, int addr_len);
void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
                      struct netdev_hw_addr_list *from_list, int addr_len);
int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
                       struct net_device *dev,
                       int (*sync)(struct net_device *, const unsigned char *),
                       int (*unsync)(struct net_device *,
                                     const unsigned char *));
int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
                           struct net_device *dev,
                           int (*sync)(struct net_device *,
                                       const unsigned char *, int),
                           int (*unsync)(struct net_device *,
                                         const unsigned char *, int));
void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
                              struct net_device *dev,
                              int (*unsync)(struct net_device *,
                                            const unsigned char *, int));
void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
                          struct net_device *dev,
                          int (*unsync)(struct net_device *,
                                        const unsigned char *));
void __hw_addr_init(struct netdev_hw_addr_list *list);

/* Functions used for device addresses handling */
void dev_addr_mod(struct net_device *dev, unsigned int offset,
                  const void *addr, size_t len);

static inline void
__dev_addr_set(struct net_device *dev, const void *addr, size_t len)
{
        dev_addr_mod(dev, 0, addr, len);
}

static inline void dev_addr_set(struct net_device *dev, const u8 *addr)
{
        __dev_addr_set(dev, addr, dev->addr_len);
}

int dev_addr_add(struct net_device *dev, const unsigned char *addr,
                 unsigned char addr_type);
int dev_addr_del(struct net_device *dev, const unsigned char *addr,
                 unsigned char addr_type);

/* Functions used for unicast addresses handling */
int dev_uc_add(struct net_device *dev, const unsigned char *addr);
int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
int dev_uc_del(struct net_device *dev, const unsigned char *addr);
int dev_uc_sync(struct net_device *to, struct net_device *from);
int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
void dev_uc_unsync(struct net_device *to, struct net_device *from);
void dev_uc_flush(struct net_device *dev);
void dev_uc_init(struct net_device *dev);

/**
 *  __dev_uc_sync - Synchonize device's unicast list
 *  @dev:  device to sync
 *  @sync: function to call if address should be added
 *  @unsync: function to call if address should be removed
 *
 *  Add newly added addresses to the interface, and release
 *  addresses that have been deleted.
 */
static inline int __dev_uc_sync(struct net_device *dev,
                                int (*sync)(struct net_device *,
                                            const unsigned char *),
                                int (*unsync)(struct net_device *,
                                              const unsigned char *))
{
        return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync);
}

/**
 *  __dev_uc_unsync - Remove synchronized addresses from device
 *  @dev:  device to sync
 *  @unsync: function to call if address should be removed
 *
 *  Remove all addresses that were added to the device by dev_uc_sync().
 */
static inline void __dev_uc_unsync(struct net_device *dev,
                                   int (*unsync)(struct net_device *,
                                                 const unsigned char *))
{
        __hw_addr_unsync_dev(&dev->uc, dev, unsync);
}

/* Functions used for multicast addresses handling */
int dev_mc_add(struct net_device *dev, const unsigned char *addr);
int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
int dev_mc_del(struct net_device *dev, const unsigned char *addr);
int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
int dev_mc_sync(struct net_device *to, struct net_device *from);
int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
void dev_mc_unsync(struct net_device *to, struct net_device *from);
void dev_mc_flush(struct net_device *dev);
void dev_mc_init(struct net_device *dev);

/**
 *  __dev_mc_sync - Synchonize device's multicast list
 *  @dev:  device to sync
 *  @sync: function to call if address should be added
 *  @unsync: function to call if address should be removed
 *
 *  Add newly added addresses to the interface, and release
 *  addresses that have been deleted.
 */
static inline int __dev_mc_sync(struct net_device *dev,
                                int (*sync)(struct net_device *,
                                            const unsigned char *),
                                int (*unsync)(struct net_device *,
                                              const unsigned char *))
{
        return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync);
}

/**
 *  __dev_mc_unsync - Remove synchronized addresses from device
 *  @dev:  device to sync
 *  @unsync: function to call if address should be removed
 *
 *  Remove all addresses that were added to the device by dev_mc_sync().
 */
static inline void __dev_mc_unsync(struct net_device *dev,
                                   int (*unsync)(struct net_device *,
                                                 const unsigned char *))
{
        __hw_addr_unsync_dev(&dev->mc, dev, unsync);
}

/* Functions used for secondary unicast and multicast support */
void dev_set_rx_mode(struct net_device *dev);
int dev_set_promiscuity(struct net_device *dev, int inc);
int dev_set_allmulti(struct net_device *dev, int inc);
void netdev_state_change(struct net_device *dev);
void __netdev_notify_peers(struct net_device *dev);
void netdev_notify_peers(struct net_device *dev);
void netdev_features_change(struct net_device *dev);
/* Load a device via the kmod */
void dev_load(struct net *net, const char *name);
struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
                                        struct rtnl_link_stats64 *storage);
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
                             const struct net_device_stats *netdev_stats);
void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
                           const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);

enum {
        NESTED_SYNC_IMM_BIT,
        NESTED_SYNC_TODO_BIT,
};

#define __NESTED_SYNC_BIT(bit)        ((u32)1 << (bit))
#define __NESTED_SYNC(name)        __NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT)

#define NESTED_SYNC_IMM                __NESTED_SYNC(IMM)
#define NESTED_SYNC_TODO        __NESTED_SYNC(TODO)

struct netdev_nested_priv {
        unsigned char flags;
        void *data;
};

bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
                                                     struct list_head **iter);

/* iterate through upper list, must be called under RCU read lock */
#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \
        for (iter = &(dev)->adj_list.upper, \
             updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
             updev; \
             updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))

int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *upper_dev,
                                            struct netdev_nested_priv *priv),
                                  struct netdev_nested_priv *priv);

bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev);

bool netdev_has_any_upper_dev(struct net_device *dev);

void *netdev_lower_get_next_private(struct net_device *dev,
                                    struct list_head **iter);
void *netdev_lower_get_next_private_rcu(struct net_device *dev,
                                        struct list_head **iter);

#define netdev_for_each_lower_private(dev, priv, iter) \
        for (iter = (dev)->adj_list.lower.next, \
             priv = netdev_lower_get_next_private(dev, &(iter)); \
             priv; \
             priv = netdev_lower_get_next_private(dev, &(iter)))

#define netdev_for_each_lower_private_rcu(dev, priv, iter) \
        for (iter = &(dev)->adj_list.lower, \
             priv = netdev_lower_get_next_private_rcu(dev, &(iter)); \
             priv; \
             priv = netdev_lower_get_next_private_rcu(dev, &(iter)))

void *netdev_lower_get_next(struct net_device *dev,
                                struct list_head **iter);

#define netdev_for_each_lower_dev(dev, ldev, iter) \
        for (iter = (dev)->adj_list.lower.next, \
             ldev = netdev_lower_get_next(dev, &(iter)); \
             ldev; \
             ldev = netdev_lower_get_next(dev, &(iter)))

struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
                                             struct list_head **iter);
int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *lower_dev,
                                        struct netdev_nested_priv *priv),
                              struct netdev_nested_priv *priv);
int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *lower_dev,
                                            struct netdev_nested_priv *priv),
                                  struct netdev_nested_priv *priv);

void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev,
                          struct netlink_ext_ack *extack);
int netdev_master_upper_dev_link(struct net_device *dev,
                                 struct net_device *upper_dev,
                                 void *upper_priv, void *upper_info,
                                 struct netlink_ext_ack *extack);
void netdev_upper_dev_unlink(struct net_device *dev,
                             struct net_device *upper_dev);
int netdev_adjacent_change_prepare(struct net_device *old_dev,
                                   struct net_device *new_dev,
                                   struct net_device *dev,
                                   struct netlink_ext_ack *extack);
void netdev_adjacent_change_commit(struct net_device *old_dev,
                                   struct net_device *new_dev,
                                   struct net_device *dev);
void netdev_adjacent_change_abort(struct net_device *old_dev,
                                  struct net_device *new_dev,
                                  struct net_device *dev);
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
void *netdev_lower_dev_get_private(struct net_device *dev,
                                   struct net_device *lower_dev);
void netdev_lower_state_changed(struct net_device *lower_dev,
                                void *lower_state_info);

/* RSS keys are 40 or 52 bytes long */
#define NETDEV_RSS_KEY_LEN 52
extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len);

int skb_checksum_help(struct sk_buff *skb);
int skb_crc32c_csum_help(struct sk_buff *skb);
int skb_csum_hwoffload_help(struct sk_buff *skb,
                            const netdev_features_t features);

struct netdev_bonding_info {
        ifslave        slave;
        ifbond        master;
};

struct netdev_notifier_bonding_info {
        struct netdev_notifier_info info; /* must be first */
        struct netdev_bonding_info  bonding_info;
};

void netdev_bonding_info_change(struct net_device *dev,
                                struct netdev_bonding_info *bonding_info);

#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK)
void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data);
#else
static inline void ethtool_notify(struct net_device *dev, unsigned int cmd,
                                  const void *data)
{
}
#endif

__be16 skb_network_protocol(struct sk_buff *skb, int *depth);

static inline bool can_checksum_protocol(netdev_features_t features,
                                         __be16 protocol)
{
        if (protocol == htons(ETH_P_FCOE))
                return !!(features & NETIF_F_FCOE_CRC);

        /* Assume this is an IP checksum (not SCTP CRC) */

        if (features & NETIF_F_HW_CSUM) {
                /* Can checksum everything */
                return true;
        }

        switch (protocol) {
        case htons(ETH_P_IP):
                return !!(features & NETIF_F_IP_CSUM);
        case htons(ETH_P_IPV6):
                return !!(features & NETIF_F_IPV6_CSUM);
        default:
                return false;
        }
}

#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb);
#else
static inline void netdev_rx_csum_fault(struct net_device *dev,
                                        struct sk_buff *skb)
{
}
#endif
/* rx skb timestamps */
void net_enable_timestamp(void);
void net_disable_timestamp(void);

static inline ktime_t netdev_get_tstamp(struct net_device *dev,
                                        const struct skb_shared_hwtstamps *hwtstamps,
                                        bool cycles)
{
        const struct net_device_ops *ops = dev->netdev_ops;

        if (ops->ndo_get_tstamp)
                return ops->ndo_get_tstamp(dev, hwtstamps, cycles);

        return hwtstamps->hwtstamp;
}

static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
                                              struct sk_buff *skb, struct net_device *dev,
                                              bool more)
{
        __this_cpu_write(softnet_data.xmit.more, more);
        return ops->ndo_start_xmit(skb, dev);
}

static inline bool netdev_xmit_more(void)
{
        return __this_cpu_read(softnet_data.xmit.more);
}

static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                            struct netdev_queue *txq, bool more)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        netdev_tx_t rc;

        rc = __netdev_start_xmit(ops, skb, dev, more);
        if (rc == NETDEV_TX_OK)
                txq_trans_update(txq);

        return rc;
}

int netdev_class_create_file_ns(const struct class_attribute *class_attr,
                                const void *ns);
void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
                                 const void *ns);

extern const struct kobj_ns_type_operations net_ns_type_operations;

const char *netdev_drivername(const struct net_device *dev);

static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
                                                          netdev_features_t f2)
{
        if ((f1 ^ f2) & NETIF_F_HW_CSUM) {
                if (f1 & NETIF_F_HW_CSUM)
                        f1 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
                else
                        f2 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
        }

        return f1 & f2;
}

static inline netdev_features_t netdev_get_wanted_features(
        struct net_device *dev)
{
        return (dev->features & ~dev->hw_features) | dev->wanted_features;
}
netdev_features_t netdev_increment_features(netdev_features_t all,
        netdev_features_t one, netdev_features_t mask);

/* Allow TSO being used on stacked device :
 * Performing the GSO segmentation before last device
 * is a performance improvement.
 */
static inline netdev_features_t netdev_add_tso_features(netdev_features_t features,
                                                        netdev_features_t mask)
{
        return netdev_increment_features(features, NETIF_F_ALL_TSO, mask);
}

int __netdev_update_features(struct net_device *dev);
void netdev_update_features(struct net_device *dev);
void netdev_change_features(struct net_device *dev);

void netif_stacked_transfer_operstate(const struct net_device *rootdev,
                                        struct net_device *dev);

netdev_features_t passthru_features_check(struct sk_buff *skb,
                                          struct net_device *dev,
                                          netdev_features_t features);
netdev_features_t netif_skb_features(struct sk_buff *skb);
void skb_warn_bad_offload(const struct sk_buff *skb);

static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{
        netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT;

        /* check flags correspondence */
        BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_GRE     != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_IPXIP4  != (NETIF_F_GSO_IPXIP4 >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_IPXIP6  != (NETIF_F_GSO_IPXIP6 >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT));

        return (features & feature) == feature;
}

static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
{
        return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
               (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
}

static inline bool netif_needs_gso(struct sk_buff *skb,
                                   netdev_features_t features)
{
        return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
                unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
                         (skb->ip_summed != CHECKSUM_UNNECESSARY)));
}

void netif_set_tso_max_size(struct net_device *dev, unsigned int size);
void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs);
void netif_inherit_tso_max(struct net_device *to,
                           const struct net_device *from);

static inline bool netif_is_macsec(const struct net_device *dev)
{
        return dev->priv_flags & IFF_MACSEC;
}

static inline bool netif_is_macvlan(const struct net_device *dev)
{
        return dev->priv_flags & IFF_MACVLAN;
}

static inline bool netif_is_macvlan_port(const struct net_device *dev)
{
        return dev->priv_flags & IFF_MACVLAN_PORT;
}

static inline bool netif_is_bond_master(const struct net_device *dev)
{
        return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
}

static inline bool netif_is_bond_slave(const struct net_device *dev)
{
        return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING;
}

static inline bool netif_supports_nofcs(struct net_device *dev)
{
        return dev->priv_flags & IFF_SUPP_NOFCS;
}

static inline bool netif_has_l3_rx_handler(const struct net_device *dev)
{
        return dev->priv_flags & IFF_L3MDEV_RX_HANDLER;
}

static inline bool netif_is_l3_master(const struct net_device *dev)
{
        return dev->priv_flags & IFF_L3MDEV_MASTER;
}

static inline bool netif_is_l3_slave(const struct net_device *dev)
{
        return dev->priv_flags & IFF_L3MDEV_SLAVE;
}

static inline int dev_sdif(const struct net_device *dev)
{
#ifdef CONFIG_NET_L3_MASTER_DEV
        if (netif_is_l3_slave(dev))
                return dev->ifindex;
#endif
        return 0;
}

static inline bool netif_is_bridge_master(const struct net_device *dev)
{
        return dev->priv_flags & IFF_EBRIDGE;
}

static inline bool netif_is_bridge_port(const struct net_device *dev)
{
        return dev->priv_flags & IFF_BRIDGE_PORT;
}

static inline bool netif_is_ovs_master(const struct net_device *dev)
{
        return dev->priv_flags & IFF_OPENVSWITCH;
}

static inline bool netif_is_ovs_port(const struct net_device *dev)
{
        return dev->priv_flags & IFF_OVS_DATAPATH;
}

static inline bool netif_is_any_bridge_master(const struct net_device *dev)
{
        return netif_is_bridge_master(dev) || netif_is_ovs_master(dev);
}

static inline bool netif_is_any_bridge_port(const struct net_device *dev)
{
        return netif_is_bridge_port(dev) || netif_is_ovs_port(dev);
}

static inline bool netif_is_team_master(const struct net_device *dev)
{
        return dev->priv_flags & IFF_TEAM;
}

static inline bool netif_is_team_port(const struct net_device *dev)
{
        return dev->priv_flags & IFF_TEAM_PORT;
}

static inline bool netif_is_lag_master(const struct net_device *dev)
{
        return netif_is_bond_master(dev) || netif_is_team_master(dev);
}

static inline bool netif_is_lag_port(const struct net_device *dev)
{
        return netif_is_bond_slave(dev) || netif_is_team_port(dev);
}

static inline bool netif_is_rxfh_configured(const struct net_device *dev)
{
        return dev->priv_flags & IFF_RXFH_CONFIGURED;
}

static inline bool netif_is_failover(const struct net_device *dev)
{
        return dev->priv_flags & IFF_FAILOVER;
}

static inline bool netif_is_failover_slave(const struct net_device *dev)
{
        return dev->priv_flags & IFF_FAILOVER_SLAVE;
}

/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
}

/* return true if dev can't cope with mtu frames that need vlan tag insertion */
static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
{
        /* TODO: reserve and use an additional IFF bit, if we get more users */
        return netif_is_macsec(dev);
}

extern struct pernet_operations __net_initdata loopback_net_ops;

/* Logging, debugging and troubleshooting/diagnostic helpers. */

/* netdev_printk helpers, similar to dev_printk */

static inline const char *netdev_name(const struct net_device *dev)
{
        if (!dev->name[0] || strchr(dev->name, '%'))
                return "(unnamed net_device)";
        return dev->name;
}

static inline const char *netdev_reg_state(const struct net_device *dev)
{
        u8 reg_state = READ_ONCE(dev->reg_state);

        switch (reg_state) {
        case NETREG_UNINITIALIZED: return " (uninitialized)";
        case NETREG_REGISTERED: return "";
        case NETREG_UNREGISTERING: return " (unregistering)";
        case NETREG_UNREGISTERED: return " (unregistered)";
        case NETREG_RELEASED: return " (released)";
        case NETREG_DUMMY: return " (dummy)";
        }

        WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state);
        return " (unknown)";
}

#define MODULE_ALIAS_NETDEV(device) \
        MODULE_ALIAS("netdev-" device)

/*
 * netdev_WARN() acts like dev_printk(), but with the key difference
 * of using a WARN/WARN_ON to get the message out, including the
 * file/line information and a backtrace.
 */
#define netdev_WARN(dev, format, args...)                        \
        WARN(1, "netdevice: %s%s: " format, netdev_name(dev),        \
             netdev_reg_state(dev), ##args)

#define netdev_WARN_ONCE(dev, format, args...)                                \
        WARN_ONCE(1, "netdevice: %s%s: " format, netdev_name(dev),        \
                  netdev_reg_state(dev), ##args)

/*
 *        The list of packet types we will receive (as opposed to discard)
 *        and the routines to invoke.
 *
 *        Why 16. Because with 16 the only overlap we get on a hash of the
 *        low nibble of the protocol value is RARP/SNAP/X.25.
 *
 *                0800        IP
 *                0001        802.3
 *                0002        AX.25
 *                0004        802.2
 *                8035        RARP
 *                0005        SNAP
 *                0805        X.25
 *                0806        ARP
 *                8137        IPX
 *                0009        Localtalk
 *                86DD        IPv6
 */
#define PTYPE_HASH_SIZE        (16)
#define PTYPE_HASH_MASK        (PTYPE_HASH_SIZE - 1)

extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

extern struct net_device *blackhole_netdev;

/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */
#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
#define DEV_STATS_ADD(DEV, FIELD, VAL)         \
                atomic_long_add((VAL), &(DEV)->stats.__##FIELD)
#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD)

#endif        /* _LINUX_NETDEVICE_H */








































































































    8 























   20 
   20 




   20 



   20 


























    8 


    8 









    8 

    8 




    8 

    8 













































































   14 
   14 
   10 

   14 
   10 


    8 



    8 













   14 












   14 



   14 






   14 




   14 



   14 


   14 








   14 




































































































































































































   17 
























































































   20 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
// SPDX-License-Identifier: GPL-2.0
/*
 * Tty buffer allocation management
 */

#include <linux/types.h>
#include <linux/errno.h>
#include <linux/minmax.h>
#include <linux/tty.h>
#include <linux/tty_buffer.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ratelimit.h>
#include "tty.h"

#define MIN_TTYB_SIZE        256
#define TTYB_ALIGN_MASK        0xff

/*
 * Byte threshold to limit memory consumption for flip buffers.
 * The actual memory limit is > 2x this amount.
 */
#define TTYB_DEFAULT_MEM_LIMIT        (640 * 1024UL)

/*
 * We default to dicing tty buffer allocations to this many characters
 * in order to avoid multiple page allocations. We know the size of
 * tty_buffer itself but it must also be taken into account that the
 * buffer is 256 byte aligned. See tty_buffer_find for the allocation
 * logic this must match.
 */

#define TTY_BUFFER_PAGE        (((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~TTYB_ALIGN_MASK)

/**
 * tty_buffer_lock_exclusive        -        gain exclusive access to buffer
 * @port: tty port owning the flip buffer
 *
 * Guarantees safe use of the &tty_ldisc_ops.receive_buf() method by excluding
 * the buffer work and any pending flush from using the flip buffer. Data can
 * continue to be added concurrently to the flip buffer from the driver side.
 *
 * See also tty_buffer_unlock_exclusive().
 */
void tty_buffer_lock_exclusive(struct tty_port *port)
{
        struct tty_bufhead *buf = &port->buf;

        atomic_inc(&buf->priority);
        mutex_lock(&buf->lock);
}
EXPORT_SYMBOL_GPL(tty_buffer_lock_exclusive);

/**
 * tty_buffer_unlock_exclusive        -        release exclusive access
 * @port: tty port owning the flip buffer
 *
 * The buffer work is restarted if there is data in the flip buffer.
 *
 * See also tty_buffer_lock_exclusive().
 */
void tty_buffer_unlock_exclusive(struct tty_port *port)
{
        struct tty_bufhead *buf = &port->buf;
        bool restart = buf->head->commit != buf->head->read;

        atomic_dec(&buf->priority);
        mutex_unlock(&buf->lock);

        if (restart)
                queue_work(system_unbound_wq, &buf->work);
}
EXPORT_SYMBOL_GPL(tty_buffer_unlock_exclusive);

/**
 * tty_buffer_space_avail        -        return unused buffer space
 * @port: tty port owning the flip buffer
 *
 * Returns: the # of bytes which can be written by the driver without reaching
 * the buffer limit.
 *
 * Note: this does not guarantee that memory is available to write the returned
 * # of bytes (use tty_prepare_flip_string() to pre-allocate if memory
 * guarantee is required).
 */
unsigned int tty_buffer_space_avail(struct tty_port *port)
{
        int space = port->buf.mem_limit - atomic_read(&port->buf.mem_used);

        return max(space, 0);
}
EXPORT_SYMBOL_GPL(tty_buffer_space_avail);

static void tty_buffer_reset(struct tty_buffer *p, size_t size)
{
        p->used = 0;
        p->size = size;
        p->next = NULL;
        p->commit = 0;
        p->lookahead = 0;
        p->read = 0;
        p->flags = true;
}

/**
 * tty_buffer_free_all                -        free buffers used by a tty
 * @port: tty port to free from
 *
 * Remove all the buffers pending on a tty whether queued with data or in the
 * free ring. Must be called when the tty is no longer in use.
 */
void tty_buffer_free_all(struct tty_port *port)
{
        struct tty_bufhead *buf = &port->buf;
        struct tty_buffer *p, *next;
        struct llist_node *llist;
        unsigned int freed = 0;
        int still_used;

        while ((p = buf->head) != NULL) {
                buf->head = p->next;
                freed += p->size;
                if (p->size > 0)
                        kfree(p);
        }
        llist = llist_del_all(&buf->free);
        llist_for_each_entry_safe(p, next, llist, free)
                kfree(p);

        tty_buffer_reset(&buf->sentinel, 0);
        buf->head = &buf->sentinel;
        buf->tail = &buf->sentinel;

        still_used = atomic_xchg(&buf->mem_used, 0);
        WARN(still_used != freed, "we still have not freed %d bytes!",
                        still_used - freed);
}

/**
 * tty_buffer_alloc        -        allocate a tty buffer
 * @port: tty port
 * @size: desired size (characters)
 *
 * Allocate a new tty buffer to hold the desired number of characters. We
 * round our buffers off in 256 character chunks to get better allocation
 * behaviour.
 *
 * Returns: %NULL if out of memory or the allocation would exceed the per
 * device queue.
 */
static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size)
{
        struct llist_node *free;
        struct tty_buffer *p;

        /* Round the buffer size out */
        size = __ALIGN_MASK(size, TTYB_ALIGN_MASK);

        if (size <= MIN_TTYB_SIZE) {
                free = llist_del_first(&port->buf.free);
                if (free) {
                        p = llist_entry(free, struct tty_buffer, free);
                        goto found;
                }
        }

        /* Should possibly check if this fails for the largest buffer we
         * have queued and recycle that ?
         */
        if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit)
                return NULL;
        p = kmalloc(struct_size(p, data, 2 * size), GFP_ATOMIC | __GFP_NOWARN);
        if (p == NULL)
                return NULL;

found:
        tty_buffer_reset(p, size);
        atomic_add(size, &port->buf.mem_used);
        return p;
}

/**
 * tty_buffer_free                -        free a tty buffer
 * @port: tty port owning the buffer
 * @b: the buffer to free
 *
 * Free a tty buffer, or add it to the free list according to our internal
 * strategy.
 */
static void tty_buffer_free(struct tty_port *port, struct tty_buffer *b)
{
        struct tty_bufhead *buf = &port->buf;

        /* Dumb strategy for now - should keep some stats */
        WARN_ON(atomic_sub_return(b->size, &buf->mem_used) < 0);

        if (b->size > MIN_TTYB_SIZE)
                kfree(b);
        else if (b->size > 0)
                llist_add(&b->free, &buf->free);
}

/**
 * tty_buffer_flush                -        flush full tty buffers
 * @tty: tty to flush
 * @ld: optional ldisc ptr (must be referenced)
 *
 * Flush all the buffers containing receive data. If @ld != %NULL, flush the
 * ldisc input buffer.
 *
 * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'.
 */
void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
{
        struct tty_port *port = tty->port;
        struct tty_bufhead *buf = &port->buf;
        struct tty_buffer *next;

        atomic_inc(&buf->priority);

        mutex_lock(&buf->lock);
        /* paired w/ release in __tty_buffer_request_room; ensures there are
         * no pending memory accesses to the freed buffer
         */
        while ((next = smp_load_acquire(&buf->head->next)) != NULL) {
                tty_buffer_free(port, buf->head);
                buf->head = next;
        }
        buf->head->read = buf->head->commit;
        buf->head->lookahead = buf->head->read;

        if (ld && ld->ops->flush_buffer)
                ld->ops->flush_buffer(tty);

        atomic_dec(&buf->priority);
        mutex_unlock(&buf->lock);
}

/**
 * __tty_buffer_request_room        -        grow tty buffer if needed
 * @port: tty port
 * @size: size desired
 * @flags: buffer has to store flags along character data
 *
 * Make at least @size bytes of linear space available for the tty buffer.
 *
 * Will change over to a new buffer if the current buffer is encoded as
 * %TTY_NORMAL (so has no flags buffer) and the new buffer requires a flags
 * buffer.
 *
 * Returns: the size we managed to find.
 */
static int __tty_buffer_request_room(struct tty_port *port, size_t size,
                                     bool flags)
{
        struct tty_bufhead *buf = &port->buf;
        struct tty_buffer *n, *b = buf->tail;
        size_t left = (b->flags ? 1 : 2) * b->size - b->used;
        bool change = !b->flags && flags;

        if (!change && left >= size)
                return size;

        /* This is the slow path - looking for new buffers to use */
        n = tty_buffer_alloc(port, size);
        if (n == NULL)
                return change ? 0 : left;

        n->flags = flags;
        buf->tail = n;
        /*
         * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs()
         * ensures they see all buffer data.
         */
        smp_store_release(&b->commit, b->used);
        /*
         * Paired w/ acquire in flush_to_ldisc() and lookahead_bufs()
         * ensures the latest commit value can be read before the head
         * is advanced to the next buffer.
         */
        smp_store_release(&b->next, n);

        return size;
}

int tty_buffer_request_room(struct tty_port *port, size_t size)
{
        return __tty_buffer_request_room(port, size, true);
}
EXPORT_SYMBOL_GPL(tty_buffer_request_room);

size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars,
                                      const u8 *flags, bool mutable_flags,
                                      size_t size)
{
        bool need_flags = mutable_flags || flags[0] != TTY_NORMAL;
        size_t copied = 0;

        do {
                size_t goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
                size_t space = __tty_buffer_request_room(port, goal, need_flags);
                struct tty_buffer *tb = port->buf.tail;

                if (unlikely(space == 0))
                        break;

                memcpy(char_buf_ptr(tb, tb->used), chars, space);

                if (mutable_flags) {
                        memcpy(flag_buf_ptr(tb, tb->used), flags, space);
                        flags += space;
                } else if (tb->flags) {
                        memset(flag_buf_ptr(tb, tb->used), flags[0], space);
                } else {
                        /* tb->flags should be available once requested */
                        WARN_ON_ONCE(need_flags);
                }

                tb->used += space;
                copied += space;
                chars += space;

                /* There is a small chance that we need to split the data over
                 * several buffers. If this is the case we must loop.
                 */
        } while (unlikely(size > copied));

        return copied;
}
EXPORT_SYMBOL(__tty_insert_flip_string_flags);

/**
 * tty_prepare_flip_string        -        make room for characters
 * @port: tty port
 * @chars: return pointer for character write area
 * @size: desired size
 *
 * Prepare a block of space in the buffer for data.
 *
 * This is used for drivers that need their own block copy routines into the
 * buffer. There is no guarantee the buffer is a DMA target!
 *
 * Returns: the length available and buffer pointer (@chars) to the space which
 * is now allocated and accounted for as ready for normal characters.
 */
size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size)
{
        size_t space = __tty_buffer_request_room(port, size, false);

        if (likely(space)) {
                struct tty_buffer *tb = port->buf.tail;

                *chars = char_buf_ptr(tb, tb->used);
                if (tb->flags)
                        memset(flag_buf_ptr(tb, tb->used), TTY_NORMAL, space);
                tb->used += space;
        }

        return space;
}
EXPORT_SYMBOL_GPL(tty_prepare_flip_string);

/**
 * tty_ldisc_receive_buf        -        forward data to line discipline
 * @ld: line discipline to process input
 * @p: char buffer
 * @f: %TTY_NORMAL, %TTY_BREAK, etc. flags buffer
 * @count: number of bytes to process
 *
 * Callers other than flush_to_ldisc() need to exclude the kworker from
 * concurrent use of the line discipline, see paste_selection().
 *
 * Returns: the number of bytes processed.
 */
size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f,
                             size_t count)
{
        if (ld->ops->receive_buf2)
                count = ld->ops->receive_buf2(ld->tty, p, f, count);
        else {
                count = min_t(size_t, count, ld->tty->receive_room);
                if (count && ld->ops->receive_buf)
                        ld->ops->receive_buf(ld->tty, p, f, count);
        }
        return count;
}
EXPORT_SYMBOL_GPL(tty_ldisc_receive_buf);

static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head)
{
        head->lookahead = max(head->lookahead, head->read);

        while (head) {
                struct tty_buffer *next;
                unsigned int count;

                /*
                 * Paired w/ release in __tty_buffer_request_room();
                 * ensures commit value read is not stale if the head
                 * is advancing to the next buffer.
                 */
                next = smp_load_acquire(&head->next);
                /*
                 * Paired w/ release in __tty_buffer_request_room() or in
                 * tty_buffer_flush(); ensures we see the committed buffer data.
                 */
                count = smp_load_acquire(&head->commit) - head->lookahead;
                if (!count) {
                        head = next;
                        continue;
                }

                if (port->client_ops->lookahead_buf) {
                        u8 *p, *f = NULL;

                        p = char_buf_ptr(head, head->lookahead);
                        if (head->flags)
                                f = flag_buf_ptr(head, head->lookahead);

                        port->client_ops->lookahead_buf(port, p, f, count);
                }

                head->lookahead += count;
        }
}

static size_t
receive_buf(struct tty_port *port, struct tty_buffer *head, size_t count)
{
        u8 *p = char_buf_ptr(head, head->read);
        const u8 *f = NULL;
        size_t n;

        if (head->flags)
                f = flag_buf_ptr(head, head->read);

        n = port->client_ops->receive_buf(port, p, f, count);
        if (n > 0)
                memset(p, 0, n);
        return n;
}

/**
 * flush_to_ldisc                -        flush data from buffer to ldisc
 * @work: tty structure passed from work queue.
 *
 * This routine is called out of the software interrupt to flush data from the
 * buffer chain to the line discipline.
 *
 * The receive_buf() method is single threaded for each tty instance.
 *
 * Locking: takes buffer lock to ensure single-threaded flip buffer 'consumer'.
 */
static void flush_to_ldisc(struct work_struct *work)
{
        struct tty_port *port = container_of(work, struct tty_port, buf.work);
        struct tty_bufhead *buf = &port->buf;

        mutex_lock(&buf->lock);

        while (1) {
                struct tty_buffer *head = buf->head;
                struct tty_buffer *next;
                size_t count, rcvd;

                /* Ldisc or user is trying to gain exclusive access */
                if (atomic_read(&buf->priority))
                        break;

                /* paired w/ release in __tty_buffer_request_room();
                 * ensures commit value read is not stale if the head
                 * is advancing to the next buffer
                 */
                next = smp_load_acquire(&head->next);
                /* paired w/ release in __tty_buffer_request_room() or in
                 * tty_buffer_flush(); ensures we see the committed buffer data
                 */
                count = smp_load_acquire(&head->commit) - head->read;
                if (!count) {
                        if (next == NULL)
                                break;
                        buf->head = next;
                        tty_buffer_free(port, head);
                        continue;
                }

                rcvd = receive_buf(port, head, count);
                head->read += rcvd;
                if (rcvd < count)
                        lookahead_bufs(port, head);
                if (!rcvd)
                        break;

                if (need_resched())
                        cond_resched();
        }

        mutex_unlock(&buf->lock);

}

static inline void tty_flip_buffer_commit(struct tty_buffer *tail)
{
        /*
         * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
         * buffer data.
         */
        smp_store_release(&tail->commit, tail->used);
}

/**
 * tty_flip_buffer_push                -        push terminal buffers
 * @port: tty port to push
 *
 * Queue a push of the terminal flip buffers to the line discipline. Can be
 * called from IRQ/atomic context.
 *
 * In the event of the queue being busy for flipping the work will be held off
 * and retried later.
 */
void tty_flip_buffer_push(struct tty_port *port)
{
        struct tty_bufhead *buf = &port->buf;

        tty_flip_buffer_commit(buf->tail);
        queue_work(system_unbound_wq, &buf->work);
}
EXPORT_SYMBOL(tty_flip_buffer_push);

/**
 * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and
 *        push
 * @port: tty port
 * @chars: characters
 * @size: size
 *
 * The function combines tty_insert_flip_string() and tty_flip_buffer_push()
 * with the exception of properly holding the @port->lock.
 *
 * To be used only internally (by pty currently).
 *
 * Returns: the number added.
 */
int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
                                           const u8 *chars, size_t size)
{
        struct tty_bufhead *buf = &port->buf;
        unsigned long flags;

        spin_lock_irqsave(&port->lock, flags);
        size = tty_insert_flip_string(port, chars, size);
        if (size)
                tty_flip_buffer_commit(buf->tail);
        spin_unlock_irqrestore(&port->lock, flags);

        queue_work(system_unbound_wq, &buf->work);

        return size;
}

/**
 * tty_buffer_init                -        prepare a tty buffer structure
 * @port: tty port to initialise
 *
 * Set up the initial state of the buffer management for a tty device. Must be
 * called before the other tty buffer functions are used.
 */
void tty_buffer_init(struct tty_port *port)
{
        struct tty_bufhead *buf = &port->buf;

        mutex_init(&buf->lock);
        tty_buffer_reset(&buf->sentinel, 0);
        buf->head = &buf->sentinel;
        buf->tail = &buf->sentinel;
        init_llist_head(&buf->free);
        atomic_set(&buf->mem_used, 0);
        atomic_set(&buf->priority, 0);
        INIT_WORK(&buf->work, flush_to_ldisc);
        buf->mem_limit = TTYB_DEFAULT_MEM_LIMIT;
}

/**
 * tty_buffer_set_limit                -        change the tty buffer memory limit
 * @port: tty port to change
 * @limit: memory limit to set
 *
 * Change the tty buffer memory limit.
 *
 * Must be called before the other tty buffer functions are used.
 */
int tty_buffer_set_limit(struct tty_port *port, int limit)
{
        if (limit < MIN_TTYB_SIZE)
                return -EINVAL;
        port->buf.mem_limit = limit;
        return 0;
}
EXPORT_SYMBOL_GPL(tty_buffer_set_limit);

/* slave ptys can claim nested buffer lock when handling BRK and INTR */
void tty_buffer_set_lock_subclass(struct tty_port *port)
{
        lockdep_set_subclass(&port->buf.lock, TTY_LOCK_SLAVE);
}

bool tty_buffer_restart_work(struct tty_port *port)
{
        return queue_work(system_unbound_wq, &port->buf.work);
}

bool tty_buffer_cancel_work(struct tty_port *port)
{
        return cancel_work_sync(&port->buf.work);
}

void tty_buffer_flush_work(struct tty_port *port)
{
        flush_work(&port->buf.work);
}





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 




    3 


    3 





    3 


    3 


    3 

    3 











    3 


    3 


    3 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *
 *  Generic Bluetooth USB driver
 *
 *  Copyright (C) 2005-2008  Marcel Holtmann <marcel@holtmann.org>
 */

#include <linux/dmi.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/quirks.h>
#include <linux/firmware.h>
#include <linux/iopoll.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/suspend.h>
#include <linux/gpio/consumer.h>
#include <linux/debugfs.h>
#include <asm/unaligned.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>

#include "btintel.h"
#include "btbcm.h"
#include "btrtl.h"
#include "btmtk.h"

#define VERSION "0.8"

static bool disable_scofix;
static bool force_scofix;
static bool enable_autosuspend = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTOSUSPEND);
static bool enable_poll_sync = IS_ENABLED(CONFIG_BT_HCIBTUSB_POLL_SYNC);
static bool reset = true;

static struct usb_driver btusb_driver;

#define BTUSB_IGNORE                        BIT(0)
#define BTUSB_DIGIANSWER                BIT(1)
#define BTUSB_CSR                        BIT(2)
#define BTUSB_SNIFFER                        BIT(3)
#define BTUSB_BCM92035                        BIT(4)
#define BTUSB_BROKEN_ISOC                BIT(5)
#define BTUSB_WRONG_SCO_MTU                BIT(6)
#define BTUSB_ATH3012                        BIT(7)
#define BTUSB_INTEL_COMBINED                BIT(8)
#define BTUSB_INTEL_BOOT                BIT(9)
#define BTUSB_BCM_PATCHRAM                BIT(10)
#define BTUSB_MARVELL                        BIT(11)
#define BTUSB_SWAVE                        BIT(12)
#define BTUSB_AMP                        BIT(13)
#define BTUSB_QCA_ROME                        BIT(14)
#define BTUSB_BCM_APPLE                        BIT(15)
#define BTUSB_REALTEK                        BIT(16)
#define BTUSB_BCM2045                        BIT(17)
#define BTUSB_IFNUM_2                        BIT(18)
#define BTUSB_CW6622                        BIT(19)
#define BTUSB_MEDIATEK                        BIT(20)
#define BTUSB_WIDEBAND_SPEECH                BIT(21)
#define BTUSB_VALID_LE_STATES                BIT(22)
#define BTUSB_QCA_WCN6855                BIT(23)
#define BTUSB_INTEL_BROKEN_SHUTDOWN_LED        BIT(24)
#define BTUSB_INTEL_BROKEN_INITIAL_NCMD BIT(25)
#define BTUSB_INTEL_NO_WBS_SUPPORT        BIT(26)
#define BTUSB_ACTIONS_SEMI                BIT(27)

static const struct usb_device_id btusb_table[] = {
        /* Generic Bluetooth USB device */
        { USB_DEVICE_INFO(0xe0, 0x01, 0x01) },

        /* Generic Bluetooth AMP device */
        { USB_DEVICE_INFO(0xe0, 0x01, 0x04), .driver_info = BTUSB_AMP },

        /* Generic Bluetooth USB interface */
        { USB_INTERFACE_INFO(0xe0, 0x01, 0x01) },

        /* Apple-specific (Broadcom) devices */
        { USB_VENDOR_AND_INTERFACE_INFO(0x05ac, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_APPLE | BTUSB_IFNUM_2 },

        /* MediaTek MT76x0E */
        { USB_DEVICE(0x0e8d, 0x763f) },

        /* Broadcom SoftSailing reporting vendor specific */
        { USB_DEVICE(0x0a5c, 0x21e1) },

        /* Apple MacBookPro 7,1 */
        { USB_DEVICE(0x05ac, 0x8213) },

        /* Apple iMac11,1 */
        { USB_DEVICE(0x05ac, 0x8215) },

        /* Apple MacBookPro6,2 */
        { USB_DEVICE(0x05ac, 0x8218) },

        /* Apple MacBookAir3,1, MacBookAir3,2 */
        { USB_DEVICE(0x05ac, 0x821b) },

        /* Apple MacBookAir4,1 */
        { USB_DEVICE(0x05ac, 0x821f) },

        /* Apple MacBookPro8,2 */
        { USB_DEVICE(0x05ac, 0x821a) },

        /* Apple MacMini5,1 */
        { USB_DEVICE(0x05ac, 0x8281) },

        /* AVM BlueFRITZ! USB v2.0 */
        { USB_DEVICE(0x057c, 0x3800), .driver_info = BTUSB_SWAVE },

        /* Bluetooth Ultraport Module from IBM */
        { USB_DEVICE(0x04bf, 0x030a) },

        /* ALPS Modules with non-standard id */
        { USB_DEVICE(0x044e, 0x3001) },
        { USB_DEVICE(0x044e, 0x3002) },

        /* Ericsson with non-standard id */
        { USB_DEVICE(0x0bdb, 0x1002) },

        /* Canyon CN-BTU1 with HID interfaces */
        { USB_DEVICE(0x0c10, 0x0000) },

        /* Broadcom BCM20702B0 (Dynex/Insignia) */
        { USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM },

        /* Broadcom BCM43142A0 (Foxconn/Lenovo) */
        { USB_VENDOR_AND_INTERFACE_INFO(0x105b, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Broadcom BCM920703 (HTC Vive) */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0bb4, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Foxconn - Hon Hai */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0489, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Lite-On Technology - Broadcom based */
        { USB_VENDOR_AND_INTERFACE_INFO(0x04ca, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Broadcom devices with vendor specific id */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* ASUSTek Computer - Broadcom based */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0b05, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Belkin F8065bf - Broadcom based */
        { USB_VENDOR_AND_INTERFACE_INFO(0x050d, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* IMC Networks - Broadcom based */
        { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Dell Computer - Broadcom based  */
        { USB_VENDOR_AND_INTERFACE_INFO(0x413c, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Toshiba Corp - Broadcom based */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01),
          .driver_info = BTUSB_BCM_PATCHRAM },

        /* Intel Bluetooth USB Bootloader (RAM module) */
        { USB_DEVICE(0x8087, 0x0a5a),
          .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC },

        { }        /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, btusb_table);

static const struct usb_device_id quirks_table[] = {
        /* CSR BlueCore devices */
        { USB_DEVICE(0x0a12, 0x0001), .driver_info = BTUSB_CSR },

        /* Broadcom BCM2033 without firmware */
        { USB_DEVICE(0x0a5c, 0x2033), .driver_info = BTUSB_IGNORE },

        /* Broadcom BCM2045 devices */
        { USB_DEVICE(0x0a5c, 0x2045), .driver_info = BTUSB_BCM2045 },

        /* Atheros 3011 with sflash firmware */
        { USB_DEVICE(0x0489, 0xe027), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x0489, 0xe03d), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x04f2, 0xaff1), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x0cf3, 0xe019), .driver_info = BTUSB_IGNORE },
        { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE },

        /* Atheros AR9285 Malbec with sflash firmware */
        { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },

        /* Atheros 3012 with sflash firmware */
        { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe076), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe078), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe095), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x300d), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0x817b), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3395), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3408), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3423), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3472), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3474), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3487), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3490), .driver_info = BTUSB_ATH3012 },

        /* Atheros AR5BBU12 with sflash firmware */
        { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },

        /* Atheros AR5BBU12 with sflash firmware */
        { USB_DEVICE(0x0489, 0xe036), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },

        /* QCA ROME chipset */
        { USB_DEVICE(0x0cf3, 0x535b), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe301), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cf3, 0xe500), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0489, 0xe09f), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0489, 0xe0a2), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x3015), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x301a), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x3021), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3491), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3496), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3501), .driver_info = BTUSB_QCA_ROME |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* QCA WCN6855 chipset */
        { USB_DEVICE(0x0cf3, 0xe600), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0cc), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0d6), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0e3), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9309), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9409), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0d0), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9108), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9109), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9208), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9209), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9308), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9408), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9508), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9509), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9608), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9609), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x10ab, 0x9f09), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3022), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0c7), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0c9), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0ca), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0cb), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0ce), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0de), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0df), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0e1), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0ea), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0ec), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3023), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3024), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3a22), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3a24), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3a26), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3a27), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* QCA WCN785x chipset */
        { USB_DEVICE(0x0cf3, 0xe700), .driver_info = BTUSB_QCA_WCN6855 |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* Broadcom BCM2035 */
        { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
        { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU },
        { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Broadcom BCM2045 */
        { USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_WRONG_SCO_MTU },
        { USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* IBM/Lenovo ThinkPad with Broadcom chip */
        { USB_DEVICE(0x0a5c, 0x201e), .driver_info = BTUSB_WRONG_SCO_MTU },
        { USB_DEVICE(0x0a5c, 0x2110), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* HP laptop with Broadcom chip */
        { USB_DEVICE(0x03f0, 0x171d), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Dell laptop with Broadcom chip */
        { USB_DEVICE(0x413c, 0x8126), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Dell Wireless 370 and 410 devices */
        { USB_DEVICE(0x413c, 0x8152), .driver_info = BTUSB_WRONG_SCO_MTU },
        { USB_DEVICE(0x413c, 0x8156), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Belkin F8T012 and F8T013 devices */
        { USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_WRONG_SCO_MTU },
        { USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Asus WL-BTD202 device */
        { USB_DEVICE(0x0b05, 0x1715), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* Kensington Bluetooth USB adapter */
        { USB_DEVICE(0x047d, 0x105e), .driver_info = BTUSB_WRONG_SCO_MTU },

        /* RTX Telecom based adapters with buggy SCO support */
        { USB_DEVICE(0x0400, 0x0807), .driver_info = BTUSB_BROKEN_ISOC },
        { USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC },

        /* CONWISE Technology based adapters with buggy SCO support */
        { USB_DEVICE(0x0e5e, 0x6622),
          .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622},

        /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */
        { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE },

        /* Digianswer devices */
        { USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER },
        { USB_DEVICE(0x08fd, 0x0002), .driver_info = BTUSB_IGNORE },

        /* CSR BlueCore Bluetooth Sniffer */
        { USB_DEVICE(0x0a12, 0x0002),
          .driver_info = BTUSB_SNIFFER | BTUSB_BROKEN_ISOC },

        /* Frontline ComProbe Bluetooth Sniffer */
        { USB_DEVICE(0x16d3, 0x0002),
          .driver_info = BTUSB_SNIFFER | BTUSB_BROKEN_ISOC },

        /* Marvell Bluetooth devices */
        { USB_DEVICE(0x1286, 0x2044), .driver_info = BTUSB_MARVELL },
        { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL },
        { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL },

        /* Intel Bluetooth devices */
        { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0035), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0036), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0038), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
        { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
                                                     BTUSB_INTEL_NO_WBS_SUPPORT |
                                                     BTUSB_INTEL_BROKEN_INITIAL_NCMD |
                                                     BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
        { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED |
                                                     BTUSB_INTEL_NO_WBS_SUPPORT |
                                                     BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
        { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED |
                                                     BTUSB_INTEL_BROKEN_SHUTDOWN_LED },
        { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_COMBINED },

        /* Other Intel Bluetooth devices */
        { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
          .driver_info = BTUSB_IGNORE },

        /* Realtek 8821CE Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3529), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8822CE Bluetooth devices */
        { USB_DEVICE(0x0bda, 0xb00c), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0xc822), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8822CU Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3549), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8852AE Bluetooth devices */
        { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x4852), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04c5, 0x165c), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cb8, 0xc549), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8852CE Bluetooth devices */
        { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3592), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8852BE Bluetooth devices */
        { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3570), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3571), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3572), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Realtek 8852BT/8852BE-VT Bluetooth devices */
        { USB_DEVICE(0x0bda, 0x8520), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        /* Realtek Bluetooth devices */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01),
          .driver_info = BTUSB_REALTEK },

        /* MediaTek Bluetooth devices */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0e8d, 0xe0, 0x01, 0x01),
          .driver_info = BTUSB_MEDIATEK |
                         BTUSB_WIDEBAND_SPEECH |
                         BTUSB_VALID_LE_STATES },

        /* Additional MediaTek MT7615E Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},

        /* Additional MediaTek MT7663 Bluetooth devices */
        { USB_DEVICE(0x043e, 0x310c), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3801), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* Additional MediaTek MT7668 Bluetooth devices */
        { USB_DEVICE(0x043e, 0x3109), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* Additional MediaTek MT7921 Bluetooth devices */
        { USB_DEVICE(0x0489, 0xe0c8), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0e0), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f2), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3802), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3563), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3564), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3567), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3578), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3583), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0cd), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0e8d, 0x0608), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* MediaTek MT7922A Bluetooth devices */
        { USB_DEVICE(0x0489, 0xe0d8), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f5), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0e2), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0e4), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f1), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f2), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f5), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe0f6), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x0489, 0xe102), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x04ca, 0x3804), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
        { USB_DEVICE(0x35f5, 0x7922), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* Additional MediaTek MT7925 Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3602), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },

        /* Additional Realtek 8723AE Bluetooth devices */
        { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8723BE Bluetooth devices */
        { USB_DEVICE(0x0489, 0xe085), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x0489, 0xe08b), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x04f2, 0xb49f), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3410), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3416), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3494), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8723BU Bluetooth devices */
        { USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8723DE Bluetooth devices */
        { USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8761BUV Bluetooth devices */
        { USB_DEVICE(0x2357, 0x0604), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK |
                                                       BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x2550, 0x8761), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0x8771), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x6655, 0x8771), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x7392, 0xc611), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x2b89, 0x8761), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Additional Realtek 8821AE Bluetooth devices */
        { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3458), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8822BE Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3526), .driver_info = BTUSB_REALTEK },
        { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },

        /* Additional Realtek 8822CE Bluetooth devices */
        { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3548), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3549), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3553), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x13d3, 0x3555), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x2ff8, 0x3051), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x1358, 0xc123), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0bda, 0xc123), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0cb5, 0xc547), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },

        /* Actions Semiconductor ATS2851 based devices */
        { USB_DEVICE(0x10d7, 0xb012), .driver_info = BTUSB_ACTIONS_SEMI },

        /* Silicon Wave based devices */
        { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },

        { }        /* Terminating entry */
};

/* The Bluetooth USB module build into some devices needs to be reset on resume,
 * this is a problem with the platform (likely shutting off all power) not with
 * the module itself. So we use a DMI list to match known broken platforms.
 */
static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
        {
                /* Dell OptiPlex 3060 (QCA ROME device 0cf3:e007) */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"),
                },
        },
        {
                /* Dell XPS 9360 (QCA ROME device 0cf3:e300) */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
                },
        },
        {
                /* Dell Inspiron 5565 (QCA ROME device 0cf3:e009) */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5565"),
                },
        },
        {}
};

struct qca_dump_info {
        /* fields for dump collection */
        u16 id_vendor;
        u16 id_product;
        u32 fw_version;
        u32 controller_id;
        u32 ram_dump_size;
        u16 ram_dump_seqno;
};

#define BTUSB_MAX_ISOC_FRAMES        10

#define BTUSB_INTR_RUNNING        0
#define BTUSB_BULK_RUNNING        1
#define BTUSB_ISOC_RUNNING        2
#define BTUSB_SUSPENDING        3
#define BTUSB_DID_ISO_RESUME        4
#define BTUSB_BOOTLOADER        5
#define BTUSB_DOWNLOADING        6
#define BTUSB_FIRMWARE_LOADED        7
#define BTUSB_FIRMWARE_FAILED        8
#define BTUSB_BOOTING                9
#define BTUSB_DIAG_RUNNING        10
#define BTUSB_OOB_WAKE_ENABLED        11
#define BTUSB_HW_RESET_ACTIVE        12
#define BTUSB_TX_WAIT_VND_EVT        13
#define BTUSB_WAKEUP_AUTOSUSPEND        14
#define BTUSB_USE_ALT3_FOR_WBS        15
#define BTUSB_ALT6_CONTINUOUS_TX        16
#define BTUSB_HW_SSR_ACTIVE        17

struct btusb_data {
        struct hci_dev       *hdev;
        struct usb_device    *udev;
        struct usb_interface *intf;
        struct usb_interface *isoc;
        struct usb_interface *diag;
        unsigned isoc_ifnum;

        unsigned long flags;

        bool poll_sync;
        int intr_interval;
        struct work_struct  work;
        struct work_struct  waker;
        struct delayed_work rx_work;

        struct sk_buff_head acl_q;

        struct usb_anchor deferred;
        struct usb_anchor tx_anchor;
        int tx_in_flight;
        spinlock_t txlock;

        struct usb_anchor intr_anchor;
        struct usb_anchor bulk_anchor;
        struct usb_anchor isoc_anchor;
        struct usb_anchor diag_anchor;
        struct usb_anchor ctrl_anchor;
        spinlock_t rxlock;

        struct sk_buff *evt_skb;
        struct sk_buff *acl_skb;
        struct sk_buff *sco_skb;

        struct usb_endpoint_descriptor *intr_ep;
        struct usb_endpoint_descriptor *bulk_tx_ep;
        struct usb_endpoint_descriptor *bulk_rx_ep;
        struct usb_endpoint_descriptor *isoc_tx_ep;
        struct usb_endpoint_descriptor *isoc_rx_ep;
        struct usb_endpoint_descriptor *diag_tx_ep;
        struct usb_endpoint_descriptor *diag_rx_ep;

        struct gpio_desc *reset_gpio;

        __u8 cmdreq_type;
        __u8 cmdreq;

        unsigned int sco_num;
        unsigned int air_mode;
        bool usb_alt6_packet_flow;
        int isoc_altsetting;
        int suspend_count;

        int (*recv_event)(struct hci_dev *hdev, struct sk_buff *skb);
        int (*recv_acl)(struct hci_dev *hdev, struct sk_buff *skb);
        int (*recv_bulk)(struct btusb_data *data, void *buffer, int count);

        int (*setup_on_usb)(struct hci_dev *hdev);

        int oob_wake_irq;   /* irq for out-of-band wake-on-bt */
        unsigned cmd_timeout_cnt;

        struct qca_dump_info qca_dump;
};

static void btusb_reset(struct hci_dev *hdev)
{
        struct btusb_data *data;
        int err;

        if (hdev->reset) {
                hdev->reset(hdev);
                return;
        }

        data = hci_get_drvdata(hdev);
        /* This is not an unbalanced PM reference since the device will reset */
        err = usb_autopm_get_interface(data->intf);
        if (err) {
                bt_dev_err(hdev, "Failed usb_autopm_get_interface: %d", err);
                return;
        }

        bt_dev_err(hdev, "Resetting usb device.");
        usb_queue_reset_device(data->intf);
}

static void btusb_intel_cmd_timeout(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct gpio_desc *reset_gpio = data->reset_gpio;
        struct btintel_data *intel_data = hci_get_priv(hdev);

        if (++data->cmd_timeout_cnt < 5)
                return;

        if (intel_data->acpi_reset_method) {
                if (test_and_set_bit(INTEL_ACPI_RESET_ACTIVE, intel_data->flags)) {
                        bt_dev_err(hdev, "acpi: last reset failed ? Not resetting again");
                        return;
                }

                bt_dev_err(hdev, "Initiating acpi reset method");
                /* If ACPI reset method fails, lets try with legacy GPIO
                 * toggling
                 */
                if (!intel_data->acpi_reset_method(hdev)) {
                        return;
                }
        }

        if (!reset_gpio) {
                btusb_reset(hdev);
                return;
        }

        /*
         * Toggle the hard reset line if the platform provides one. The reset
         * is going to yank the device off the USB and then replug. So doing
         * once is enough. The cleanup is handled correctly on the way out
         * (standard USB disconnect), and the new device is detected cleanly
         * and bound to the driver again like it should be.
         */
        if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
                bt_dev_err(hdev, "last reset failed? Not resetting again");
                return;
        }

        bt_dev_err(hdev, "Initiating HW reset via gpio");
        gpiod_set_value_cansleep(reset_gpio, 1);
        msleep(100);
        gpiod_set_value_cansleep(reset_gpio, 0);
}

#define RTK_DEVCOREDUMP_CODE_MEMDUMP                0x01
#define RTK_DEVCOREDUMP_CODE_HW_ERR                0x02
#define RTK_DEVCOREDUMP_CODE_CMD_TIMEOUT        0x03

#define RTK_SUB_EVENT_CODE_COREDUMP                0x34

struct rtk_dev_coredump_hdr {
        u8 type;
        u8 code;
        u8 reserved[2];
} __packed;

static inline void btusb_rtl_alloc_devcoredump(struct hci_dev *hdev,
                struct rtk_dev_coredump_hdr *hdr, u8 *buf, u32 len)
{
        struct sk_buff *skb;

        skb = alloc_skb(len + sizeof(*hdr), GFP_ATOMIC);
        if (!skb)
                return;

        skb_put_data(skb, hdr, sizeof(*hdr));
        if (len)
                skb_put_data(skb, buf, len);

        if (!hci_devcd_init(hdev, skb->len)) {
                hci_devcd_append(hdev, skb);
                hci_devcd_complete(hdev);
        } else {
                bt_dev_err(hdev, "RTL: Failed to generate devcoredump");
                kfree_skb(skb);
        }
}

static void btusb_rtl_cmd_timeout(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct gpio_desc *reset_gpio = data->reset_gpio;
        struct rtk_dev_coredump_hdr hdr = {
                .type = RTK_DEVCOREDUMP_CODE_CMD_TIMEOUT,
        };

        btusb_rtl_alloc_devcoredump(hdev, &hdr, NULL, 0);

        if (++data->cmd_timeout_cnt < 5)
                return;

        if (!reset_gpio) {
                btusb_reset(hdev);
                return;
        }

        /* Toggle the hard reset line. The Realtek device is going to
         * yank itself off the USB and then replug. The cleanup is handled
         * correctly on the way out (standard USB disconnect), and the new
         * device is detected cleanly and bound to the driver again like
         * it should be.
         */
        if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
                bt_dev_err(hdev, "last reset failed? Not resetting again");
                return;
        }

        bt_dev_err(hdev, "Reset Realtek device via gpio");
        gpiod_set_value_cansleep(reset_gpio, 1);
        msleep(200);
        gpiod_set_value_cansleep(reset_gpio, 0);
}

static void btusb_rtl_hw_error(struct hci_dev *hdev, u8 code)
{
        struct rtk_dev_coredump_hdr hdr = {
                .type = RTK_DEVCOREDUMP_CODE_HW_ERR,
                .code = code,
        };

        bt_dev_err(hdev, "RTL: hw err, trigger devcoredump (%d)", code);

        btusb_rtl_alloc_devcoredump(hdev, &hdr, NULL, 0);
}

static void btusb_qca_cmd_timeout(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct gpio_desc *reset_gpio = data->reset_gpio;

        if (test_bit(BTUSB_HW_SSR_ACTIVE, &data->flags)) {
                bt_dev_info(hdev, "Ramdump in progress, defer cmd_timeout");
                return;
        }

        if (++data->cmd_timeout_cnt < 5)
                return;

        if (reset_gpio) {
                bt_dev_err(hdev, "Reset qca device via bt_en gpio");

                /* Toggle the hard reset line. The qca bt device is going to
                 * yank itself off the USB and then replug. The cleanup is handled
                 * correctly on the way out (standard USB disconnect), and the new
                 * device is detected cleanly and bound to the driver again like
                 * it should be.
                 */
                if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
                        bt_dev_err(hdev, "last reset failed? Not resetting again");
                        return;
                }

                gpiod_set_value_cansleep(reset_gpio, 0);
                msleep(200);
                gpiod_set_value_cansleep(reset_gpio, 1);

                return;
        }

        btusb_reset(hdev);
}

static inline void btusb_free_frags(struct btusb_data *data)
{
        unsigned long flags;

        spin_lock_irqsave(&data->rxlock, flags);

        dev_kfree_skb_irq(data->evt_skb);
        data->evt_skb = NULL;

        dev_kfree_skb_irq(data->acl_skb);
        data->acl_skb = NULL;

        dev_kfree_skb_irq(data->sco_skb);
        data->sco_skb = NULL;

        spin_unlock_irqrestore(&data->rxlock, flags);
}

static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb)
{
        if (data->intr_interval) {
                /* Trigger dequeue immediatelly if an event is received */
                schedule_delayed_work(&data->rx_work, 0);
        }

        return data->recv_event(data->hdev, skb);
}

static int btusb_recv_intr(struct btusb_data *data, void *buffer, int count)
{
        struct sk_buff *skb;
        unsigned long flags;
        int err = 0;

        spin_lock_irqsave(&data->rxlock, flags);
        skb = data->evt_skb;

        while (count) {
                int len;

                if (!skb) {
                        skb = bt_skb_alloc(HCI_MAX_EVENT_SIZE, GFP_ATOMIC);
                        if (!skb) {
                                err = -ENOMEM;
                                break;
                        }

                        hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
                        hci_skb_expect(skb) = HCI_EVENT_HDR_SIZE;
                }

                len = min_t(uint, hci_skb_expect(skb), count);
                skb_put_data(skb, buffer, len);

                count -= len;
                buffer += len;
                hci_skb_expect(skb) -= len;

                if (skb->len == HCI_EVENT_HDR_SIZE) {
                        /* Complete event header */
                        hci_skb_expect(skb) = hci_event_hdr(skb)->plen;

                        if (skb_tailroom(skb) < hci_skb_expect(skb)) {
                                kfree_skb(skb);
                                skb = NULL;

                                err = -EILSEQ;
                                break;
                        }
                }

                if (!hci_skb_expect(skb)) {
                        /* Complete frame */
                        btusb_recv_event(data, skb);
                        skb = NULL;
                }
        }

        data->evt_skb = skb;
        spin_unlock_irqrestore(&data->rxlock, flags);

        return err;
}

static int btusb_recv_acl(struct btusb_data *data, struct sk_buff *skb)
{
        /* Only queue ACL packet if intr_interval is set as it means
         * force_poll_sync has been enabled.
         */
        if (!data->intr_interval)
                return data->recv_acl(data->hdev, skb);

        skb_queue_tail(&data->acl_q, skb);
        schedule_delayed_work(&data->rx_work, data->intr_interval);

        return 0;
}

static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count)
{
        struct sk_buff *skb;
        unsigned long flags;
        int err = 0;

        spin_lock_irqsave(&data->rxlock, flags);
        skb = data->acl_skb;

        while (count) {
                int len;

                if (!skb) {
                        skb = bt_skb_alloc(HCI_MAX_FRAME_SIZE, GFP_ATOMIC);
                        if (!skb) {
                                err = -ENOMEM;
                                break;
                        }

                        hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT;
                        hci_skb_expect(skb) = HCI_ACL_HDR_SIZE;
                }

                len = min_t(uint, hci_skb_expect(skb), count);
                skb_put_data(skb, buffer, len);

                count -= len;
                buffer += len;
                hci_skb_expect(skb) -= len;

                if (skb->len == HCI_ACL_HDR_SIZE) {
                        __le16 dlen = hci_acl_hdr(skb)->dlen;

                        /* Complete ACL header */
                        hci_skb_expect(skb) = __le16_to_cpu(dlen);

                        if (skb_tailroom(skb) < hci_skb_expect(skb)) {
                                kfree_skb(skb);
                                skb = NULL;

                                err = -EILSEQ;
                                break;
                        }
                }

                if (!hci_skb_expect(skb)) {
                        /* Complete frame */
                        btusb_recv_acl(data, skb);
                        skb = NULL;
                }
        }

        data->acl_skb = skb;
        spin_unlock_irqrestore(&data->rxlock, flags);

        return err;
}

static bool btusb_validate_sco_handle(struct hci_dev *hdev,
                                      struct hci_sco_hdr *hdr)
{
        __u16 handle;

        if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                // Can't validate, userspace controls everything.
                return true;

        /*
         * USB isochronous transfers are not designed to be reliable and may
         * lose fragments.  When this happens, the next first fragment
         * encountered might actually be a continuation fragment.
         * Validate the handle to detect it and drop it, or else the upper
         * layer will get garbage for a while.
         */

        handle = hci_handle(__le16_to_cpu(hdr->handle));

        switch (hci_conn_lookup_type(hdev, handle)) {
        case SCO_LINK:
        case ESCO_LINK:
                return true;
        default:
                return false;
        }
}

static int btusb_recv_isoc(struct btusb_data *data, void *buffer, int count)
{
        struct sk_buff *skb;
        unsigned long flags;
        int err = 0;

        spin_lock_irqsave(&data->rxlock, flags);
        skb = data->sco_skb;

        while (count) {
                int len;

                if (!skb) {
                        skb = bt_skb_alloc(HCI_MAX_SCO_SIZE, GFP_ATOMIC);
                        if (!skb) {
                                err = -ENOMEM;
                                break;
                        }

                        hci_skb_pkt_type(skb) = HCI_SCODATA_PKT;
                        hci_skb_expect(skb) = HCI_SCO_HDR_SIZE;
                }

                len = min_t(uint, hci_skb_expect(skb), count);
                skb_put_data(skb, buffer, len);

                count -= len;
                buffer += len;
                hci_skb_expect(skb) -= len;

                if (skb->len == HCI_SCO_HDR_SIZE) {
                        /* Complete SCO header */
                        struct hci_sco_hdr *hdr = hci_sco_hdr(skb);

                        hci_skb_expect(skb) = hdr->dlen;

                        if (skb_tailroom(skb) < hci_skb_expect(skb) ||
                            !btusb_validate_sco_handle(data->hdev, hdr)) {
                                kfree_skb(skb);
                                skb = NULL;

                                err = -EILSEQ;
                                break;
                        }
                }

                if (!hci_skb_expect(skb)) {
                        /* Complete frame */
                        hci_recv_frame(data->hdev, skb);
                        skb = NULL;
                }
        }

        data->sco_skb = skb;
        spin_unlock_irqrestore(&data->rxlock, flags);

        return err;
}

static void btusb_intr_complete(struct urb *urb)
{
        struct hci_dev *hdev = urb->context;
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                return;

        if (urb->status == 0) {
                hdev->stat.byte_rx += urb->actual_length;

                if (btusb_recv_intr(data, urb->transfer_buffer,
                                    urb->actual_length) < 0) {
                        bt_dev_err(hdev, "corrupted event packet");
                        hdev->stat.err_rx++;
                }
        } else if (urb->status == -ENOENT) {
                /* Avoid suspend failed when usb_kill_urb */
                return;
        }

        if (!test_bit(BTUSB_INTR_RUNNING, &data->flags))
                return;

        usb_mark_last_busy(data->udev);
        usb_anchor_urb(urb, &data->intr_anchor);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p failed to resubmit (%d)",
                                   urb, -err);
                if (err != -EPERM)
                        hci_cmd_sync_cancel(hdev, -err);
                usb_unanchor_urb(urb);
        }
}

static int btusb_submit_intr_urb(struct hci_dev *hdev, gfp_t mem_flags)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned char *buf;
        unsigned int pipe;
        int err, size;

        BT_DBG("%s", hdev->name);

        if (!data->intr_ep)
                return -ENODEV;

        urb = usb_alloc_urb(0, mem_flags);
        if (!urb)
                return -ENOMEM;

        size = le16_to_cpu(data->intr_ep->wMaxPacketSize);

        buf = kmalloc(size, mem_flags);
        if (!buf) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvintpipe(data->udev, data->intr_ep->bEndpointAddress);

        usb_fill_int_urb(urb, data->udev, pipe, buf, size,
                         btusb_intr_complete, hdev, data->intr_ep->bInterval);

        urb->transfer_flags |= URB_FREE_BUFFER;

        usb_anchor_urb(urb, &data->intr_anchor);

        err = usb_submit_urb(urb, mem_flags);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                if (err != -EPERM)
                        hci_cmd_sync_cancel(hdev, -err);
                usb_unanchor_urb(urb);
        }

        /* Only initialize intr_interval if URB poll sync is enabled */
        if (!data->poll_sync)
                goto done;

        /* The units are frames (milliseconds) for full and low speed devices,
         * and microframes (1/8 millisecond) for highspeed and SuperSpeed
         * devices.
         *
         * This is done once on open/resume so it shouldn't change even if
         * force_poll_sync changes.
         */
        switch (urb->dev->speed) {
        case USB_SPEED_SUPER_PLUS:
        case USB_SPEED_SUPER:        /* units are 125us */
                data->intr_interval = usecs_to_jiffies(urb->interval * 125);
                break;
        default:
                data->intr_interval = msecs_to_jiffies(urb->interval);
                break;
        }

done:
        usb_free_urb(urb);

        return err;
}

static void btusb_bulk_complete(struct urb *urb)
{
        struct hci_dev *hdev = urb->context;
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                return;

        if (urb->status == 0) {
                hdev->stat.byte_rx += urb->actual_length;

                if (data->recv_bulk(data, urb->transfer_buffer,
                                    urb->actual_length) < 0) {
                        bt_dev_err(hdev, "corrupted ACL packet");
                        hdev->stat.err_rx++;
                }
        } else if (urb->status == -ENOENT) {
                /* Avoid suspend failed when usb_kill_urb */
                return;
        }

        if (!test_bit(BTUSB_BULK_RUNNING, &data->flags))
                return;

        usb_anchor_urb(urb, &data->bulk_anchor);
        usb_mark_last_busy(data->udev);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p failed to resubmit (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }
}

static int btusb_submit_bulk_urb(struct hci_dev *hdev, gfp_t mem_flags)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned char *buf;
        unsigned int pipe;
        int err, size = HCI_MAX_FRAME_SIZE;

        BT_DBG("%s", hdev->name);

        if (!data->bulk_rx_ep)
                return -ENODEV;

        urb = usb_alloc_urb(0, mem_flags);
        if (!urb)
                return -ENOMEM;

        buf = kmalloc(size, mem_flags);
        if (!buf) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvbulkpipe(data->udev, data->bulk_rx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, data->udev, pipe, buf, size,
                          btusb_bulk_complete, hdev);

        urb->transfer_flags |= URB_FREE_BUFFER;

        usb_mark_last_busy(data->udev);
        usb_anchor_urb(urb, &data->bulk_anchor);

        err = usb_submit_urb(urb, mem_flags);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }

        usb_free_urb(urb);

        return err;
}

static void btusb_isoc_complete(struct urb *urb)
{
        struct hci_dev *hdev = urb->context;
        struct btusb_data *data = hci_get_drvdata(hdev);
        int i, err;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                return;

        if (urb->status == 0) {
                for (i = 0; i < urb->number_of_packets; i++) {
                        unsigned int offset = urb->iso_frame_desc[i].offset;
                        unsigned int length = urb->iso_frame_desc[i].actual_length;

                        if (urb->iso_frame_desc[i].status)
                                continue;

                        hdev->stat.byte_rx += length;

                        if (btusb_recv_isoc(data, urb->transfer_buffer + offset,
                                            length) < 0) {
                                bt_dev_err(hdev, "corrupted SCO packet");
                                hdev->stat.err_rx++;
                        }
                }
        } else if (urb->status == -ENOENT) {
                /* Avoid suspend failed when usb_kill_urb */
                return;
        }

        if (!test_bit(BTUSB_ISOC_RUNNING, &data->flags))
                return;

        usb_anchor_urb(urb, &data->isoc_anchor);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p failed to resubmit (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }
}

static inline void __fill_isoc_descriptor_msbc(struct urb *urb, int len,
                                               int mtu, struct btusb_data *data)
{
        int i = 0, offset = 0;
        unsigned int interval;

        BT_DBG("len %d mtu %d", len, mtu);

        /* For mSBC ALT 6 settings some chips need to transmit the data
         * continuously without the zero length of USB packets.
         */
        if (test_bit(BTUSB_ALT6_CONTINUOUS_TX, &data->flags))
                goto ignore_usb_alt6_packet_flow;

        /* For mSBC ALT 6 setting the host will send the packet at continuous
         * flow. As per core spec 5, vol 4, part B, table 2.1. For ALT setting
         * 6 the HCI PACKET INTERVAL should be 7.5ms for every usb packets.
         * To maintain the rate we send 63bytes of usb packets alternatively for
         * 7ms and 8ms to maintain the rate as 7.5ms.
         */
        if (data->usb_alt6_packet_flow) {
                interval = 7;
                data->usb_alt6_packet_flow = false;
        } else {
                interval = 6;
                data->usb_alt6_packet_flow = true;
        }

        for (i = 0; i < interval; i++) {
                urb->iso_frame_desc[i].offset = offset;
                urb->iso_frame_desc[i].length = offset;
        }

ignore_usb_alt6_packet_flow:
        if (len && i < BTUSB_MAX_ISOC_FRAMES) {
                urb->iso_frame_desc[i].offset = offset;
                urb->iso_frame_desc[i].length = len;
                i++;
        }

        urb->number_of_packets = i;
}

static inline void __fill_isoc_descriptor(struct urb *urb, int len, int mtu)
{
        int i, offset = 0;

        BT_DBG("len %d mtu %d", len, mtu);

        for (i = 0; i < BTUSB_MAX_ISOC_FRAMES && len >= mtu;
                                        i++, offset += mtu, len -= mtu) {
                urb->iso_frame_desc[i].offset = offset;
                urb->iso_frame_desc[i].length = mtu;
        }

        if (len && i < BTUSB_MAX_ISOC_FRAMES) {
                urb->iso_frame_desc[i].offset = offset;
                urb->iso_frame_desc[i].length = len;
                i++;
        }

        urb->number_of_packets = i;
}

static int btusb_submit_isoc_urb(struct hci_dev *hdev, gfp_t mem_flags)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned char *buf;
        unsigned int pipe;
        int err, size;

        BT_DBG("%s", hdev->name);

        if (!data->isoc_rx_ep)
                return -ENODEV;

        urb = usb_alloc_urb(BTUSB_MAX_ISOC_FRAMES, mem_flags);
        if (!urb)
                return -ENOMEM;

        size = le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize) *
                                                BTUSB_MAX_ISOC_FRAMES;

        buf = kmalloc(size, mem_flags);
        if (!buf) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvisocpipe(data->udev, data->isoc_rx_ep->bEndpointAddress);

        usb_fill_int_urb(urb, data->udev, pipe, buf, size, btusb_isoc_complete,
                         hdev, data->isoc_rx_ep->bInterval);

        urb->transfer_flags = URB_FREE_BUFFER | URB_ISO_ASAP;

        __fill_isoc_descriptor(urb, size,
                               le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize));

        usb_anchor_urb(urb, &data->isoc_anchor);

        err = usb_submit_urb(urb, mem_flags);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }

        usb_free_urb(urb);

        return err;
}

static void btusb_diag_complete(struct urb *urb)
{
        struct hci_dev *hdev = urb->context;
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (urb->status == 0) {
                struct sk_buff *skb;

                skb = bt_skb_alloc(urb->actual_length, GFP_ATOMIC);
                if (skb) {
                        skb_put_data(skb, urb->transfer_buffer,
                                     urb->actual_length);
                        hci_recv_diag(hdev, skb);
                }
        } else if (urb->status == -ENOENT) {
                /* Avoid suspend failed when usb_kill_urb */
                return;
        }

        if (!test_bit(BTUSB_DIAG_RUNNING, &data->flags))
                return;

        usb_anchor_urb(urb, &data->diag_anchor);
        usb_mark_last_busy(data->udev);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p failed to resubmit (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }
}

static int btusb_submit_diag_urb(struct hci_dev *hdev, gfp_t mem_flags)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned char *buf;
        unsigned int pipe;
        int err, size = HCI_MAX_FRAME_SIZE;

        BT_DBG("%s", hdev->name);

        if (!data->diag_rx_ep)
                return -ENODEV;

        urb = usb_alloc_urb(0, mem_flags);
        if (!urb)
                return -ENOMEM;

        buf = kmalloc(size, mem_flags);
        if (!buf) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvbulkpipe(data->udev, data->diag_rx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, data->udev, pipe, buf, size,
                          btusb_diag_complete, hdev);

        urb->transfer_flags |= URB_FREE_BUFFER;

        usb_mark_last_busy(data->udev);
        usb_anchor_urb(urb, &data->diag_anchor);

        err = usb_submit_urb(urb, mem_flags);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }

        usb_free_urb(urb);

        return err;
}

static void btusb_tx_complete(struct urb *urb)
{
        struct sk_buff *skb = urb->context;
        struct hci_dev *hdev = (struct hci_dev *)skb->dev;
        struct btusb_data *data = hci_get_drvdata(hdev);
        unsigned long flags;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                goto done;

        if (!urb->status) {
                hdev->stat.byte_tx += urb->transfer_buffer_length;
        } else {
                if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT)
                        hci_cmd_sync_cancel(hdev, -urb->status);
                hdev->stat.err_tx++;
        }

done:
        spin_lock_irqsave(&data->txlock, flags);
        data->tx_in_flight--;
        spin_unlock_irqrestore(&data->txlock, flags);

        kfree(urb->setup_packet);

        kfree_skb(skb);
}

static void btusb_isoc_tx_complete(struct urb *urb)
{
        struct sk_buff *skb = urb->context;
        struct hci_dev *hdev = (struct hci_dev *)skb->dev;

        BT_DBG("%s urb %p status %d count %d", hdev->name, urb, urb->status,
               urb->actual_length);

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                goto done;

        if (!urb->status)
                hdev->stat.byte_tx += urb->transfer_buffer_length;
        else
                hdev->stat.err_tx++;

done:
        kfree(urb->setup_packet);

        kfree_skb(skb);
}

static int btusb_open(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        BT_DBG("%s", hdev->name);

        err = usb_autopm_get_interface(data->intf);
        if (err < 0)
                return err;

        /* Patching USB firmware files prior to starting any URBs of HCI path
         * It is more safe to use USB bulk channel for downloading USB patch
         */
        if (data->setup_on_usb) {
                err = data->setup_on_usb(hdev);
                if (err < 0)
                        goto setup_fail;
        }

        data->intf->needs_remote_wakeup = 1;

        if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
                goto done;

        err = btusb_submit_intr_urb(hdev, GFP_KERNEL);
        if (err < 0)
                goto failed;

        err = btusb_submit_bulk_urb(hdev, GFP_KERNEL);
        if (err < 0) {
                usb_kill_anchored_urbs(&data->intr_anchor);
                goto failed;
        }

        set_bit(BTUSB_BULK_RUNNING, &data->flags);
        btusb_submit_bulk_urb(hdev, GFP_KERNEL);

        if (data->diag) {
                if (!btusb_submit_diag_urb(hdev, GFP_KERNEL))
                        set_bit(BTUSB_DIAG_RUNNING, &data->flags);
        }

done:
        usb_autopm_put_interface(data->intf);
        return 0;

failed:
        clear_bit(BTUSB_INTR_RUNNING, &data->flags);
setup_fail:
        usb_autopm_put_interface(data->intf);
        return err;
}

static void btusb_stop_traffic(struct btusb_data *data)
{
        usb_kill_anchored_urbs(&data->intr_anchor);
        usb_kill_anchored_urbs(&data->bulk_anchor);
        usb_kill_anchored_urbs(&data->isoc_anchor);
        usb_kill_anchored_urbs(&data->diag_anchor);
        usb_kill_anchored_urbs(&data->ctrl_anchor);
}

static int btusb_close(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        BT_DBG("%s", hdev->name);

        cancel_delayed_work(&data->rx_work);
        cancel_work_sync(&data->work);
        cancel_work_sync(&data->waker);

        skb_queue_purge(&data->acl_q);

        clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
        clear_bit(BTUSB_BULK_RUNNING, &data->flags);
        clear_bit(BTUSB_INTR_RUNNING, &data->flags);
        clear_bit(BTUSB_DIAG_RUNNING, &data->flags);

        btusb_stop_traffic(data);
        btusb_free_frags(data);

        err = usb_autopm_get_interface(data->intf);
        if (err < 0)
                goto failed;

        data->intf->needs_remote_wakeup = 0;

        /* Enable remote wake up for auto-suspend */
        if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags))
                data->intf->needs_remote_wakeup = 1;

        usb_autopm_put_interface(data->intf);

failed:
        usb_scuttle_anchored_urbs(&data->deferred);
        return 0;
}

static int btusb_flush(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);

        BT_DBG("%s", hdev->name);

        cancel_delayed_work(&data->rx_work);

        skb_queue_purge(&data->acl_q);

        usb_kill_anchored_urbs(&data->tx_anchor);
        btusb_free_frags(data);

        return 0;
}

static struct urb *alloc_ctrl_urb(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct usb_ctrlrequest *dr;
        struct urb *urb;
        unsigned int pipe;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return ERR_PTR(-ENOMEM);

        dr = kmalloc(sizeof(*dr), GFP_KERNEL);
        if (!dr) {
                usb_free_urb(urb);
                return ERR_PTR(-ENOMEM);
        }

        dr->bRequestType = data->cmdreq_type;
        dr->bRequest     = data->cmdreq;
        dr->wIndex       = 0;
        dr->wValue       = 0;
        dr->wLength      = __cpu_to_le16(skb->len);

        pipe = usb_sndctrlpipe(data->udev, 0x00);

        usb_fill_control_urb(urb, data->udev, pipe, (void *)dr,
                             skb->data, skb->len, btusb_tx_complete, skb);

        skb->dev = (void *)hdev;

        return urb;
}

static struct urb *alloc_bulk_urb(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned int pipe;

        if (!data->bulk_tx_ep)
                return ERR_PTR(-ENODEV);

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return ERR_PTR(-ENOMEM);

        pipe = usb_sndbulkpipe(data->udev, data->bulk_tx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, data->udev, pipe,
                          skb->data, skb->len, btusb_tx_complete, skb);

        skb->dev = (void *)hdev;

        return urb;
}

static struct urb *alloc_isoc_urb(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
        unsigned int pipe;

        if (!data->isoc_tx_ep)
                return ERR_PTR(-ENODEV);

        urb = usb_alloc_urb(BTUSB_MAX_ISOC_FRAMES, GFP_KERNEL);
        if (!urb)
                return ERR_PTR(-ENOMEM);

        pipe = usb_sndisocpipe(data->udev, data->isoc_tx_ep->bEndpointAddress);

        usb_fill_int_urb(urb, data->udev, pipe,
                         skb->data, skb->len, btusb_isoc_tx_complete,
                         skb, data->isoc_tx_ep->bInterval);

        urb->transfer_flags  = URB_ISO_ASAP;

        if (data->isoc_altsetting == 6)
                __fill_isoc_descriptor_msbc(urb, skb->len,
                                            le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize),
                                            data);
        else
                __fill_isoc_descriptor(urb, skb->len,
                                       le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize));
        skb->dev = (void *)hdev;

        return urb;
}

static int submit_tx_urb(struct hci_dev *hdev, struct urb *urb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        usb_anchor_urb(urb, &data->tx_anchor);

        err = usb_submit_urb(urb, GFP_KERNEL);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                kfree(urb->setup_packet);
                usb_unanchor_urb(urb);
        } else {
                usb_mark_last_busy(data->udev);
        }

        usb_free_urb(urb);
        return err;
}

static int submit_or_queue_tx_urb(struct hci_dev *hdev, struct urb *urb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        unsigned long flags;
        bool suspending;

        spin_lock_irqsave(&data->txlock, flags);
        suspending = test_bit(BTUSB_SUSPENDING, &data->flags);
        if (!suspending)
                data->tx_in_flight++;
        spin_unlock_irqrestore(&data->txlock, flags);

        if (!suspending)
                return submit_tx_urb(hdev, urb);

        usb_anchor_urb(urb, &data->deferred);
        schedule_work(&data->waker);

        usb_free_urb(urb);
        return 0;
}

static int btusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct urb *urb;

        BT_DBG("%s", hdev->name);

        switch (hci_skb_pkt_type(skb)) {
        case HCI_COMMAND_PKT:
                urb = alloc_ctrl_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.cmd_tx++;
                return submit_or_queue_tx_urb(hdev, urb);

        case HCI_ACLDATA_PKT:
                urb = alloc_bulk_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.acl_tx++;
                return submit_or_queue_tx_urb(hdev, urb);

        case HCI_SCODATA_PKT:
                if (hci_conn_num(hdev, SCO_LINK) < 1)
                        return -ENODEV;

                urb = alloc_isoc_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.sco_tx++;
                return submit_tx_urb(hdev, urb);

        case HCI_ISODATA_PKT:
                urb = alloc_bulk_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                return submit_or_queue_tx_urb(hdev, urb);
        }

        return -EILSEQ;
}

static void btusb_notify(struct hci_dev *hdev, unsigned int evt)
{
        struct btusb_data *data = hci_get_drvdata(hdev);

        BT_DBG("%s evt %d", hdev->name, evt);

        if (hci_conn_num(hdev, SCO_LINK) != data->sco_num) {
                data->sco_num = hci_conn_num(hdev, SCO_LINK);
                data->air_mode = evt;
                schedule_work(&data->work);
        }
}

static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct usb_interface *intf = data->isoc;
        struct usb_endpoint_descriptor *ep_desc;
        int i, err;

        if (!data->isoc)
                return -ENODEV;

        err = usb_set_interface(data->udev, data->isoc_ifnum, altsetting);
        if (err < 0) {
                bt_dev_err(hdev, "setting interface failed (%d)", -err);
                return err;
        }

        data->isoc_altsetting = altsetting;

        data->isoc_tx_ep = NULL;
        data->isoc_rx_ep = NULL;

        for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
                ep_desc = &intf->cur_altsetting->endpoint[i].desc;

                if (!data->isoc_tx_ep && usb_endpoint_is_isoc_out(ep_desc)) {
                        data->isoc_tx_ep = ep_desc;
                        continue;
                }

                if (!data->isoc_rx_ep && usb_endpoint_is_isoc_in(ep_desc)) {
                        data->isoc_rx_ep = ep_desc;
                        continue;
                }
        }

        if (!data->isoc_tx_ep || !data->isoc_rx_ep) {
                bt_dev_err(hdev, "invalid SCO descriptors");
                return -ENODEV;
        }

        return 0;
}

static int btusb_switch_alt_setting(struct hci_dev *hdev, int new_alts)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        int err;

        if (data->isoc_altsetting != new_alts) {
                unsigned long flags;

                clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
                usb_kill_anchored_urbs(&data->isoc_anchor);

                /* When isochronous alternate setting needs to be
                 * changed, because SCO connection has been added
                 * or removed, a packet fragment may be left in the
                 * reassembling state. This could lead to wrongly
                 * assembled fragments.
                 *
                 * Clear outstanding fragment when selecting a new
                 * alternate setting.
                 */
                spin_lock_irqsave(&data->rxlock, flags);
                dev_kfree_skb_irq(data->sco_skb);
                data->sco_skb = NULL;
                spin_unlock_irqrestore(&data->rxlock, flags);

                err = __set_isoc_interface(hdev, new_alts);
                if (err < 0)
                        return err;
        }

        if (!test_and_set_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
                if (btusb_submit_isoc_urb(hdev, GFP_KERNEL) < 0)
                        clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
                else
                        btusb_submit_isoc_urb(hdev, GFP_KERNEL);
        }

        return 0;
}

static struct usb_host_interface *btusb_find_altsetting(struct btusb_data *data,
                                                        int alt)
{
        struct usb_interface *intf = data->isoc;
        int i;

        BT_DBG("Looking for Alt no :%d", alt);

        if (!intf)
                return NULL;

        for (i = 0; i < intf->num_altsetting; i++) {
                if (intf->altsetting[i].desc.bAlternateSetting == alt)
                        return &intf->altsetting[i];
        }

        return NULL;
}

static void btusb_work(struct work_struct *work)
{
        struct btusb_data *data = container_of(work, struct btusb_data, work);
        struct hci_dev *hdev = data->hdev;
        int new_alts = 0;
        int err;

        if (data->sco_num > 0) {
                if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) {
                        err = usb_autopm_get_interface(data->isoc ? data->isoc : data->intf);
                        if (err < 0) {
                                clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
                                usb_kill_anchored_urbs(&data->isoc_anchor);
                                return;
                        }

                        set_bit(BTUSB_DID_ISO_RESUME, &data->flags);
                }

                if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) {
                        if (hdev->voice_setting & 0x0020) {
                                static const int alts[3] = { 2, 4, 5 };

                                new_alts = alts[data->sco_num - 1];
                        } else {
                                new_alts = data->sco_num;
                        }
                } else if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_TRANSP) {
                        /* Bluetooth USB spec recommends alt 6 (63 bytes), but
                         * many adapters do not support it.  Alt 1 appears to
                         * work for all adapters that do not have alt 6, and
                         * which work with WBS at all.  Some devices prefer
                         * alt 3 (HCI payload >= 60 Bytes let air packet
                         * data satisfy 60 bytes), requiring
                         * MTU >= 3 (packets) * 25 (size) - 3 (headers) = 72
                         * see also Core spec 5, vol 4, B 2.1.1 & Table 2.1.
                         */
                        if (btusb_find_altsetting(data, 6))
                                new_alts = 6;
                        else if (btusb_find_altsetting(data, 3) &&
                                 hdev->sco_mtu >= 72 &&
                                 test_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags))
                                new_alts = 3;
                        else
                                new_alts = 1;
                }

                if (btusb_switch_alt_setting(hdev, new_alts) < 0)
                        bt_dev_err(hdev, "set USB alt:(%d) failed!", new_alts);
        } else {
                usb_kill_anchored_urbs(&data->isoc_anchor);

                if (test_and_clear_bit(BTUSB_ISOC_RUNNING, &data->flags))
                        __set_isoc_interface(hdev, 0);

                if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags))
                        usb_autopm_put_interface(data->isoc ? data->isoc : data->intf);
        }
}

static void btusb_waker(struct work_struct *work)
{
        struct btusb_data *data = container_of(work, struct btusb_data, waker);
        int err;

        err = usb_autopm_get_interface(data->intf);
        if (err < 0)
                return;

        usb_autopm_put_interface(data->intf);
}

static void btusb_rx_work(struct work_struct *work)
{
        struct btusb_data *data = container_of(work, struct btusb_data,
                                               rx_work.work);
        struct sk_buff *skb;

        /* Dequeue ACL data received during the interval */
        while ((skb = skb_dequeue(&data->acl_q)))
                data->recv_acl(data->hdev, skb);
}

static int btusb_setup_bcm92035(struct hci_dev *hdev)
{
        struct sk_buff *skb;
        u8 val = 0x00;

        BT_DBG("%s", hdev->name);

        skb = __hci_cmd_sync(hdev, 0xfc3b, 1, &val, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb))
                bt_dev_err(hdev, "BCM92035 command failed (%ld)", PTR_ERR(skb));
        else
                kfree_skb(skb);

        return 0;
}

static int btusb_setup_csr(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        u16 bcdDevice = le16_to_cpu(data->udev->descriptor.bcdDevice);
        struct hci_rp_read_local_version *rp;
        struct sk_buff *skb;
        bool is_fake = false;
        int ret;

        BT_DBG("%s", hdev->name);

        skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL,
                             HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                int err = PTR_ERR(skb);
                bt_dev_err(hdev, "CSR: Local version failed (%d)", err);
                return err;
        }

        rp = skb_pull_data(skb, sizeof(*rp));
        if (!rp) {
                bt_dev_err(hdev, "CSR: Local version length mismatch");
                kfree_skb(skb);
                return -EIO;
        }

        bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x",
                    rp->hci_ver, le16_to_cpu(rp->hci_rev));

        bt_dev_info(hdev, "LMP ver=%u subver=%04x; manufacturer=%u",
                    rp->lmp_ver, le16_to_cpu(rp->lmp_subver),
                    le16_to_cpu(rp->manufacturer));

        /* Detect a wide host of Chinese controllers that aren't CSR.
         *
         * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891
         *
         * The main thing they have in common is that these are really popular low-cost
         * options that support newer Bluetooth versions but rely on heavy VID/PID
         * squatting of this poor old Bluetooth 1.1 device. Even sold as such.
         *
         * We detect actual CSR devices by checking that the HCI manufacturer code
         * is Cambridge Silicon Radio (10) and ensuring that LMP sub-version and
         * HCI rev values always match. As they both store the firmware number.
         */
        if (le16_to_cpu(rp->manufacturer) != 10 ||
            le16_to_cpu(rp->hci_rev) != le16_to_cpu(rp->lmp_subver))
                is_fake = true;

        /* Known legit CSR firmware build numbers and their supported BT versions:
         * - 1.1 (0x1) -> 0x0073, 0x020d, 0x033c, 0x034e
         * - 1.2 (0x2) ->                 0x04d9, 0x0529
         * - 2.0 (0x3) ->         0x07a6, 0x07ad, 0x0c5c
         * - 2.1 (0x4) ->         0x149c, 0x1735, 0x1899 (0x1899 is a BlueCore4-External)
         * - 4.0 (0x6) ->         0x1d86, 0x2031, 0x22bb
         *
         * e.g. Real CSR dongles with LMP subversion 0x73 are old enough that
         *      support BT 1.1 only; so it's a dead giveaway when some
         *      third-party BT 4.0 dongle reuses it.
         */
        else if (le16_to_cpu(rp->lmp_subver) <= 0x034e &&
                 rp->hci_ver > BLUETOOTH_VER_1_1)
                is_fake = true;

        else if (le16_to_cpu(rp->lmp_subver) <= 0x0529 &&
                 rp->hci_ver > BLUETOOTH_VER_1_2)
                is_fake = true;

        else if (le16_to_cpu(rp->lmp_subver) <= 0x0c5c &&
                 rp->hci_ver > BLUETOOTH_VER_2_0)
                is_fake = true;

        else if (le16_to_cpu(rp->lmp_subver) <= 0x1899 &&
                 rp->hci_ver > BLUETOOTH_VER_2_1)
                is_fake = true;

        else if (le16_to_cpu(rp->lmp_subver) <= 0x22bb &&
                 rp->hci_ver > BLUETOOTH_VER_4_0)
                is_fake = true;

        /* Other clones which beat all the above checks */
        else if (bcdDevice == 0x0134 &&
                 le16_to_cpu(rp->lmp_subver) == 0x0c5c &&
                 rp->hci_ver == BLUETOOTH_VER_2_0)
                is_fake = true;

        if (is_fake) {
                bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds and force-suspending once...");

                /* Generally these clones have big discrepancies between
                 * advertised features and what's actually supported.
                 * Probably will need to be expanded in the future;
                 * without these the controller will lock up.
                 */
                set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks);
                set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);

                /* Clear the reset quirk since this is not an actual
                 * early Bluetooth 1.1 device from CSR.
                 */
                clear_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
                clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);

                /*
                 * Special workaround for these BT 4.0 chip clones, and potentially more:
                 *
                 * - 0x0134: a Barrot 8041a02                 (HCI rev: 0x0810 sub: 0x1012)
                 * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709)
                 *
                 * These controllers are really messed-up.
                 *
                 * 1. Their bulk RX endpoint will never report any data unless
                 *    the device was suspended at least once (yes, really).
                 * 2. They will not wakeup when autosuspended and receiving data
                 *    on their bulk RX endpoint from e.g. a keyboard or mouse
                 *    (IOW remote-wakeup support is broken for the bulk endpoint).
                 *
                 * To fix 1. enable runtime-suspend, force-suspend the
                 * HCI and then wake-it up by disabling runtime-suspend.
                 *
                 * To fix 2. clear the HCI's can_wake flag, this way the HCI
                 * will still be autosuspended when it is not open.
                 *
                 * --
                 *
                 * Because these are widespread problems we prefer generic solutions; so
                 * apply this initialization quirk to every controller that gets here,
                 * it should be harmless. The alternative is to not work at all.
                 */
                pm_runtime_allow(&data->udev->dev);

                ret = pm_runtime_suspend(&data->udev->dev);
                if (ret >= 0)
                        msleep(200);
                else
                        bt_dev_warn(hdev, "CSR: Couldn't suspend the device for our Barrot 8041a02 receive-issue workaround");

                pm_runtime_forbid(&data->udev->dev);

                device_set_wakeup_capable(&data->udev->dev, false);

                /* Re-enable autosuspend if this was requested */
                if (enable_autosuspend)
                        usb_enable_autosuspend(data->udev);
        }

        kfree_skb(skb);

        return 0;
}

static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
{
        struct sk_buff *skb;
        struct hci_event_hdr *hdr;
        struct hci_ev_cmd_complete *evt;

        skb = bt_skb_alloc(sizeof(*hdr) + sizeof(*evt) + 1, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;

        hdr = skb_put(skb, sizeof(*hdr));
        hdr->evt = HCI_EV_CMD_COMPLETE;
        hdr->plen = sizeof(*evt) + 1;

        evt = skb_put(skb, sizeof(*evt));
        evt->ncmd = 0x01;
        evt->opcode = cpu_to_le16(opcode);

        skb_put_u8(skb, 0x00);

        hci_skb_pkt_type(skb) = HCI_EVENT_PKT;

        return hci_recv_frame(hdev, skb);
}

static int btusb_recv_bulk_intel(struct btusb_data *data, void *buffer,
                                 int count)
{
        struct hci_dev *hdev = data->hdev;

        /* When the device is in bootloader mode, then it can send
         * events via the bulk endpoint. These events are treated the
         * same way as the ones received from the interrupt endpoint.
         */
        if (btintel_test_flag(hdev, INTEL_BOOTLOADER))
                return btusb_recv_intr(data, buffer, count);

        return btusb_recv_bulk(data, buffer, count);
}

static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct urb *urb;

        BT_DBG("%s", hdev->name);

        switch (hci_skb_pkt_type(skb)) {
        case HCI_COMMAND_PKT:
                if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
                        struct hci_command_hdr *cmd = (void *)skb->data;
                        __u16 opcode = le16_to_cpu(cmd->opcode);

                        /* When in bootloader mode and the command 0xfc09
                         * is received, it needs to be send down the
                         * bulk endpoint. So allocate a bulk URB instead.
                         */
                        if (opcode == 0xfc09)
                                urb = alloc_bulk_urb(hdev, skb);
                        else
                                urb = alloc_ctrl_urb(hdev, skb);

                        /* When the 0xfc01 command is issued to boot into
                         * the operational firmware, it will actually not
                         * send a command complete event. To keep the flow
                         * control working inject that event here.
                         */
                        if (opcode == 0xfc01)
                                inject_cmd_complete(hdev, opcode);
                } else {
                        urb = alloc_ctrl_urb(hdev, skb);
                }
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.cmd_tx++;
                return submit_or_queue_tx_urb(hdev, urb);

        case HCI_ACLDATA_PKT:
                urb = alloc_bulk_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.acl_tx++;
                return submit_or_queue_tx_urb(hdev, urb);

        case HCI_SCODATA_PKT:
                if (hci_conn_num(hdev, SCO_LINK) < 1)
                        return -ENODEV;

                urb = alloc_isoc_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                hdev->stat.sco_tx++;
                return submit_tx_urb(hdev, urb);

        case HCI_ISODATA_PKT:
                urb = alloc_bulk_urb(hdev, skb);
                if (IS_ERR(urb))
                        return PTR_ERR(urb);

                return submit_or_queue_tx_urb(hdev, urb);
        }

        return -EILSEQ;
}

static int btusb_setup_realtek(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        int ret;

        ret = btrtl_setup_realtek(hdev);

        if (btrealtek_test_flag(data->hdev, REALTEK_ALT6_CONTINUOUS_TX_CHIP))
                set_bit(BTUSB_ALT6_CONTINUOUS_TX, &data->flags);

        return ret;
}

static int btusb_recv_event_realtek(struct hci_dev *hdev, struct sk_buff *skb)
{
        if (skb->data[0] == HCI_VENDOR_PKT && skb->data[2] == RTK_SUB_EVENT_CODE_COREDUMP) {
                struct rtk_dev_coredump_hdr hdr = {
                        .code = RTK_DEVCOREDUMP_CODE_MEMDUMP,
                };

                bt_dev_dbg(hdev, "RTL: received coredump vendor evt, len %u",
                        skb->len);

                btusb_rtl_alloc_devcoredump(hdev, &hdr, skb->data, skb->len);
                kfree_skb(skb);

                return 0;
        }

        return hci_recv_frame(hdev, skb);
}

/* UHW CR mapping */
#define MTK_BT_MISC                0x70002510
#define MTK_BT_SUBSYS_RST        0x70002610
#define MTK_UDMA_INT_STA_BT        0x74000024
#define MTK_UDMA_INT_STA_BT1        0x74000308
#define MTK_BT_WDT_STATUS        0x740003A0
#define MTK_EP_RST_OPT                0x74011890
#define MTK_EP_RST_IN_OUT_OPT        0x00010001
#define MTK_BT_RST_DONE                0x00000100
#define MTK_BT_RESET_REG_CONNV3        0x70028610
#define MTK_BT_READ_DEV_ID        0x70010200


static void btusb_mtk_wmt_recv(struct urb *urb)
{
        struct hci_dev *hdev = urb->context;
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct sk_buff *skb;
        int err;

        if (urb->status == 0 && urb->actual_length > 0) {
                hdev->stat.byte_rx += urb->actual_length;

                /* WMT event shouldn't be fragmented and the size should be
                 * less than HCI_WMT_MAX_EVENT_SIZE.
                 */
                skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC);
                if (!skb) {
                        hdev->stat.err_rx++;
                        kfree(urb->setup_packet);
                        return;
                }

                hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
                skb_put_data(skb, urb->transfer_buffer, urb->actual_length);

                /* When someone waits for the WMT event, the skb is being cloned
                 * and being processed the events from there then.
                 */
                if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) {
                        data->evt_skb = skb_clone(skb, GFP_ATOMIC);
                        if (!data->evt_skb) {
                                kfree_skb(skb);
                                kfree(urb->setup_packet);
                                return;
                        }
                }

                err = hci_recv_frame(hdev, skb);
                if (err < 0) {
                        kfree_skb(data->evt_skb);
                        data->evt_skb = NULL;
                        kfree(urb->setup_packet);
                        return;
                }

                if (test_and_clear_bit(BTUSB_TX_WAIT_VND_EVT,
                                       &data->flags)) {
                        /* Barrier to sync with other CPUs */
                        smp_mb__after_atomic();
                        wake_up_bit(&data->flags,
                                    BTUSB_TX_WAIT_VND_EVT);
                }
                kfree(urb->setup_packet);
                return;
        } else if (urb->status == -ENOENT) {
                /* Avoid suspend failed when usb_kill_urb */
                return;
        }

        usb_mark_last_busy(data->udev);

        /* The URB complete handler is still called with urb->actual_length = 0
         * when the event is not available, so we should keep re-submitting
         * URB until WMT event returns, Also, It's necessary to wait some time
         * between the two consecutive control URBs to relax the target device
         * to generate the event. Otherwise, the WMT event cannot return from
         * the device successfully.
         */
        udelay(500);

        usb_anchor_urb(urb, &data->ctrl_anchor);
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                kfree(urb->setup_packet);
                /* -EPERM: urb is being killed;
                 * -ENODEV: device got disconnected
                 */
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p failed to resubmit (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }
}

static int btusb_mtk_submit_wmt_recv_urb(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct usb_ctrlrequest *dr;
        unsigned char *buf;
        int err, size = 64;
        unsigned int pipe;
        struct urb *urb;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return -ENOMEM;

        dr = kmalloc(sizeof(*dr), GFP_KERNEL);
        if (!dr) {
                usb_free_urb(urb);
                return -ENOMEM;
        }

        dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_IN;
        dr->bRequest     = 1;
        dr->wIndex       = cpu_to_le16(0);
        dr->wValue       = cpu_to_le16(48);
        dr->wLength      = cpu_to_le16(size);

        buf = kmalloc(size, GFP_KERNEL);
        if (!buf) {
                kfree(dr);
                usb_free_urb(urb);
                return -ENOMEM;
        }

        pipe = usb_rcvctrlpipe(data->udev, 0);

        usb_fill_control_urb(urb, data->udev, pipe, (void *)dr,
                             buf, size, btusb_mtk_wmt_recv, hdev);

        urb->transfer_flags |= URB_FREE_BUFFER;

        usb_anchor_urb(urb, &data->ctrl_anchor);
        err = usb_submit_urb(urb, GFP_KERNEL);
        if (err < 0) {
                if (err != -EPERM && err != -ENODEV)
                        bt_dev_err(hdev, "urb %p submission failed (%d)",
                                   urb, -err);
                usb_unanchor_urb(urb);
        }

        usb_free_urb(urb);

        return err;
}

static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev,
                                  struct btmtk_hci_wmt_params *wmt_params)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct btmtk_hci_wmt_evt_funcc *wmt_evt_funcc;
        u32 hlen, status = BTMTK_WMT_INVALID;
        struct btmtk_hci_wmt_evt *wmt_evt;
        struct btmtk_hci_wmt_cmd *wc;
        struct btmtk_wmt_hdr *hdr;
        int err;

        /* Send the WMT command and wait until the WMT event returns */
        hlen = sizeof(*hdr) + wmt_params->dlen;
        if (hlen > 255)
                return -EINVAL;

        wc = kzalloc(hlen, GFP_KERNEL);
        if (!wc)
                return -ENOMEM;

        hdr = &wc->hdr;
        hdr->dir = 1;
        hdr->op = wmt_params->op;
        hdr->dlen = cpu_to_le16(wmt_params->dlen + 1);
        hdr->flag = wmt_params->flag;
        memcpy(wc->data, wmt_params->data, wmt_params->dlen);

        set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);

        /* WMT cmd/event doesn't follow up the generic HCI cmd/event handling,
         * it needs constantly polling control pipe until the host received the
         * WMT event, thus, we should require to specifically acquire PM counter
         * on the USB to prevent the interface from entering auto suspended
         * while WMT cmd/event in progress.
         */
        err = usb_autopm_get_interface(data->intf);
        if (err < 0)
                goto err_free_wc;

        err = __hci_cmd_send(hdev, 0xfc6f, hlen, wc);

        if (err < 0) {
                clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);
                usb_autopm_put_interface(data->intf);
                goto err_free_wc;
        }

        /* Submit control IN URB on demand to process the WMT event */
        err = btusb_mtk_submit_wmt_recv_urb(hdev);

        usb_autopm_put_interface(data->intf);

        if (err < 0)
                goto err_free_wc;

        /* The vendor specific WMT commands are all answered by a vendor
         * specific event and will have the Command Status or Command
         * Complete as with usual HCI command flow control.
         *
         * After sending the command, wait for BTUSB_TX_WAIT_VND_EVT
         * state to be cleared. The driver specific event receive routine
         * will clear that state and with that indicate completion of the
         * WMT command.
         */
        err = wait_on_bit_timeout(&data->flags, BTUSB_TX_WAIT_VND_EVT,
                                  TASK_INTERRUPTIBLE, HCI_INIT_TIMEOUT);
        if (err == -EINTR) {
                bt_dev_err(hdev, "Execution of wmt command interrupted");
                clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);
                goto err_free_wc;
        }

        if (err) {
                bt_dev_err(hdev, "Execution of wmt command timed out");
                clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags);
                err = -ETIMEDOUT;
                goto err_free_wc;
        }

        if (data->evt_skb == NULL)
                goto err_free_wc;

        /* Parse and handle the return WMT event */
        wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data;
        if (wmt_evt->whdr.op != hdr->op) {
                bt_dev_err(hdev, "Wrong op received %d expected %d",
                           wmt_evt->whdr.op, hdr->op);
                err = -EIO;
                goto err_free_skb;
        }

        switch (wmt_evt->whdr.op) {
        case BTMTK_WMT_SEMAPHORE:
                if (wmt_evt->whdr.flag == 2)
                        status = BTMTK_WMT_PATCH_UNDONE;
                else
                        status = BTMTK_WMT_PATCH_DONE;
                break;
        case BTMTK_WMT_FUNC_CTRL:
                wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt;
                if (be16_to_cpu(wmt_evt_funcc->status) == 0x404)
                        status = BTMTK_WMT_ON_DONE;
                else if (be16_to_cpu(wmt_evt_funcc->status) == 0x420)
                        status = BTMTK_WMT_ON_PROGRESS;
                else
                        status = BTMTK_WMT_ON_UNDONE;
                break;
        case BTMTK_WMT_PATCH_DWNLD:
                if (wmt_evt->whdr.flag == 2)
                        status = BTMTK_WMT_PATCH_DONE;
                else if (wmt_evt->whdr.flag == 1)
                        status = BTMTK_WMT_PATCH_PROGRESS;
                else
                        status = BTMTK_WMT_PATCH_UNDONE;
                break;
        }

        if (wmt_params->status)
                *wmt_params->status = status;

err_free_skb:
        kfree_skb(data->evt_skb);
        data->evt_skb = NULL;
err_free_wc:
        kfree(wc);
        return err;
}

static int btusb_mtk_func_query(struct hci_dev *hdev)
{
        struct btmtk_hci_wmt_params wmt_params;
        int status, err;
        u8 param = 0;

        /* Query whether the function is enabled */
        wmt_params.op = BTMTK_WMT_FUNC_CTRL;
        wmt_params.flag = 4;
        wmt_params.dlen = sizeof(param);
        wmt_params.data = &param;
        wmt_params.status = &status;

        err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to query function status (%d)", err);
                return err;
        }

        return status;
}

static int btusb_mtk_uhw_reg_write(struct btusb_data *data, u32 reg, u32 val)
{
        struct hci_dev *hdev = data->hdev;
        int pipe, err;
        void *buf;

        buf = kzalloc(4, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        put_unaligned_le32(val, buf);

        pipe = usb_sndctrlpipe(data->udev, 0);
        err = usb_control_msg(data->udev, pipe, 0x02,
                              0x5E,
                              reg >> 16, reg & 0xffff,
                              buf, 4, USB_CTRL_SET_TIMEOUT);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to write uhw reg(%d)", err);
                goto err_free_buf;
        }

err_free_buf:
        kfree(buf);

        return err;
}

static int btusb_mtk_uhw_reg_read(struct btusb_data *data, u32 reg, u32 *val)
{
        struct hci_dev *hdev = data->hdev;
        int pipe, err;
        void *buf;

        buf = kzalloc(4, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        pipe = usb_rcvctrlpipe(data->udev, 0);
        err = usb_control_msg(data->udev, pipe, 0x01,
                              0xDE,
                              reg >> 16, reg & 0xffff,
                              buf, 4, USB_CTRL_SET_TIMEOUT);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to read uhw reg(%d)", err);
                goto err_free_buf;
        }

        *val = get_unaligned_le32(buf);
        bt_dev_dbg(hdev, "reg=%x, value=0x%08x", reg, *val);

err_free_buf:
        kfree(buf);

        return err;
}

static int btusb_mtk_reg_read(struct btusb_data *data, u32 reg, u32 *val)
{
        int pipe, err, size = sizeof(u32);
        void *buf;

        buf = kzalloc(size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        pipe = usb_rcvctrlpipe(data->udev, 0);
        err = usb_control_msg(data->udev, pipe, 0x63,
                              USB_TYPE_VENDOR | USB_DIR_IN,
                              reg >> 16, reg & 0xffff,
                              buf, size, USB_CTRL_SET_TIMEOUT);
        if (err < 0)
                goto err_free_buf;

        *val = get_unaligned_le32(buf);

err_free_buf:
        kfree(buf);

        return err;
}

static int btusb_mtk_id_get(struct btusb_data *data, u32 reg, u32 *id)
{
        return btusb_mtk_reg_read(data, reg, id);
}

static u32 btusb_mtk_reset_done(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        u32 val = 0;

        btusb_mtk_uhw_reg_read(data, MTK_BT_MISC, &val);

        return val & MTK_BT_RST_DONE;
}

static int btusb_mtk_reset(struct hci_dev *hdev, void *rst_data)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct btmediatek_data *mediatek;
        u32 val;
        int err;

        /* It's MediaTek specific bluetooth reset mechanism via USB */
        if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
                bt_dev_err(hdev, "last reset failed? Not resetting again");
                return -EBUSY;
        }

        err = usb_autopm_get_interface(data->intf);
        if (err < 0)
                return err;

        btusb_stop_traffic(data);
        usb_kill_anchored_urbs(&data->tx_anchor);
        mediatek = hci_get_priv(hdev);

        if (mediatek->dev_id == 0x7925) {
                btusb_mtk_uhw_reg_read(data, MTK_BT_RESET_REG_CONNV3, &val);
                val |= (1 << 5);
                btusb_mtk_uhw_reg_write(data, MTK_BT_RESET_REG_CONNV3, val);
                btusb_mtk_uhw_reg_read(data, MTK_BT_RESET_REG_CONNV3, &val);
                val &= 0xFFFF00FF;
                val |= (1 << 13);
                btusb_mtk_uhw_reg_write(data, MTK_BT_RESET_REG_CONNV3, val);
                btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, 0x00010001);
                btusb_mtk_uhw_reg_read(data, MTK_BT_RESET_REG_CONNV3, &val);
                val |= (1 << 0);
                btusb_mtk_uhw_reg_write(data, MTK_BT_RESET_REG_CONNV3, val);
                btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT, 0x000000FF);
                btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT, &val);
                btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT1, 0x000000FF);
                btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT1, &val);
                msleep(100);
        } else {
                /* It's Device EndPoint Reset Option Register */
                bt_dev_dbg(hdev, "Initiating reset mechanism via uhw");
                btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
                btusb_mtk_uhw_reg_read(data, MTK_BT_WDT_STATUS, &val);

                /* Reset the bluetooth chip via USB interface. */
                btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 1);
                btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT, 0x000000FF);
                btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT, &val);
                btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT1, 0x000000FF);
                btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT1, &val);
                /* MT7921 need to delay 20ms between toggle reset bit */
                msleep(20);
                btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 0);
                btusb_mtk_uhw_reg_read(data, MTK_BT_SUBSYS_RST, &val);
        }

        err = readx_poll_timeout(btusb_mtk_reset_done, hdev, val,
                                 val & MTK_BT_RST_DONE, 20000, 1000000);
        if (err < 0)
                bt_dev_err(hdev, "Reset timeout");

        btusb_mtk_id_get(data, 0x70010200, &val);
        if (!val)
                bt_dev_err(hdev, "Can't get device id, subsys reset fail.");

        usb_queue_reset_device(data->intf);

        clear_bit(BTUSB_HW_RESET_ACTIVE, &data->flags);

        return err;
}

static int btusb_mtk_setup(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct btmtk_hci_wmt_params wmt_params;
        ktime_t calltime, delta, rettime;
        struct btmtk_tci_sleep tci_sleep;
        unsigned long long duration;
        struct sk_buff *skb;
        const char *fwname;
        int err, status;
        u32 dev_id = 0;
        char fw_bin_name[64];
        u32 fw_version = 0, fw_flavor = 0;
        u8 param;
        struct btmediatek_data *mediatek;

        calltime = ktime_get();

        err = btusb_mtk_id_get(data, 0x80000008, &dev_id);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to get device id (%d)", err);
                return err;
        }

        if (!dev_id || dev_id != 0x7663) {
                err = btusb_mtk_id_get(data, 0x70010200, &dev_id);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to get device id (%d)", err);
                        return err;
                }
                err = btusb_mtk_id_get(data, 0x80021004, &fw_version);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to get fw version (%d)", err);
                        return err;
                }
                err = btusb_mtk_id_get(data, 0x70010020, &fw_flavor);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to get fw flavor (%d)", err);
                        return err;
                }
        }

        mediatek = hci_get_priv(hdev);
        mediatek->dev_id = dev_id;
        mediatek->reset_sync = btusb_mtk_reset;

        err = btmtk_register_coredump(hdev, btusb_driver.name, fw_version);
        if (err < 0)
                bt_dev_err(hdev, "Failed to register coredump (%d)", err);

        switch (dev_id) {
        case 0x7663:
                fwname = FIRMWARE_MT7663;
                break;
        case 0x7668:
                fwname = FIRMWARE_MT7668;
                break;
        case 0x7922:
        case 0x7961:
        case 0x7925:
                if (dev_id == 0x7925)
                        snprintf(fw_bin_name, sizeof(fw_bin_name),
                                 "mediatek/mt%04x/BT_RAM_CODE_MT%04x_1_%x_hdr.bin",
                                 dev_id & 0xffff, dev_id & 0xffff, (fw_version & 0xff) + 1);
                else if (dev_id == 0x7961 && fw_flavor)
                        snprintf(fw_bin_name, sizeof(fw_bin_name),
                                 "mediatek/BT_RAM_CODE_MT%04x_1a_%x_hdr.bin",
                                 dev_id & 0xffff, (fw_version & 0xff) + 1);
                else
                        snprintf(fw_bin_name, sizeof(fw_bin_name),
                                 "mediatek/BT_RAM_CODE_MT%04x_1_%x_hdr.bin",
                                 dev_id & 0xffff, (fw_version & 0xff) + 1);

                err = btmtk_setup_firmware_79xx(hdev, fw_bin_name,
                                                btusb_mtk_hci_wmt_sync);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to set up firmware (%d)", err);
                        return err;
                }

                /* It's Device EndPoint Reset Option Register */
                btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);

                /* Enable Bluetooth protocol */
                param = 1;
                wmt_params.op = BTMTK_WMT_FUNC_CTRL;
                wmt_params.flag = 0;
                wmt_params.dlen = sizeof(param);
                wmt_params.data = &param;
                wmt_params.status = NULL;

                err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
                        return err;
                }

                hci_set_msft_opcode(hdev, 0xFD30);
                hci_set_aosp_capable(hdev);
                goto done;
        default:
                bt_dev_err(hdev, "Unsupported hardware variant (%08x)",
                           dev_id);
                return -ENODEV;
        }

        /* Query whether the firmware is already download */
        wmt_params.op = BTMTK_WMT_SEMAPHORE;
        wmt_params.flag = 1;
        wmt_params.dlen = 0;
        wmt_params.data = NULL;
        wmt_params.status = &status;

        err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to query firmware status (%d)", err);
                return err;
        }

        if (status == BTMTK_WMT_PATCH_DONE) {
                bt_dev_info(hdev, "firmware already downloaded");
                goto ignore_setup_fw;
        }

        /* Setup a firmware which the device definitely requires */
        err = btmtk_setup_firmware(hdev, fwname,
                                   btusb_mtk_hci_wmt_sync);
        if (err < 0)
                return err;

ignore_setup_fw:
        err = readx_poll_timeout(btusb_mtk_func_query, hdev, status,
                                 status < 0 || status != BTMTK_WMT_ON_PROGRESS,
                                 2000, 5000000);
        /* -ETIMEDOUT happens */
        if (err < 0)
                return err;

        /* The other errors happen in btusb_mtk_func_query */
        if (status < 0)
                return status;

        if (status == BTMTK_WMT_ON_DONE) {
                bt_dev_info(hdev, "function already on");
                goto ignore_func_on;
        }

        /* Enable Bluetooth protocol */
        param = 1;
        wmt_params.op = BTMTK_WMT_FUNC_CTRL;
        wmt_params.flag = 0;
        wmt_params.dlen = sizeof(param);
        wmt_params.data = &param;
        wmt_params.status = NULL;

        err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
                return err;
        }

ignore_func_on:
        /* Apply the low power environment setup */
        tci_sleep.mode = 0x5;
        tci_sleep.duration = cpu_to_le16(0x640);
        tci_sleep.host_duration = cpu_to_le16(0x640);
        tci_sleep.host_wakeup_pin = 0;
        tci_sleep.time_compensation = 0;

        skb = __hci_cmd_sync(hdev, 0xfc7a, sizeof(tci_sleep), &tci_sleep,
                             HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                err = PTR_ERR(skb);
                bt_dev_err(hdev, "Failed to apply low power setting (%d)", err);
                return err;
        }
        kfree_skb(skb);

done:
        rettime = ktime_get();
        delta = ktime_sub(rettime, calltime);
        duration = (unsigned long long)ktime_to_ns(delta) >> 10;

        bt_dev_info(hdev, "Device setup in %llu usecs", duration);

        return 0;
}

static int btusb_mtk_shutdown(struct hci_dev *hdev)
{
        struct btmtk_hci_wmt_params wmt_params;
        u8 param = 0;
        int err;

        /* Disable the device */
        wmt_params.op = BTMTK_WMT_FUNC_CTRL;
        wmt_params.flag = 0;
        wmt_params.dlen = sizeof(param);
        wmt_params.data = &param;
        wmt_params.status = NULL;

        err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
                return err;
        }

        return 0;
}

static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        u16 handle = le16_to_cpu(hci_acl_hdr(skb)->handle);

        switch (handle) {
        case 0xfc6f:                /* Firmware dump from device */
                /* When the firmware hangs, the device can no longer
                 * suspend and thus disable auto-suspend.
                 */
                usb_disable_autosuspend(data->udev);

                /* We need to forward the diagnostic packet to userspace daemon
                 * for backward compatibility, so we have to clone the packet
                 * extraly for the in-kernel coredump support.
                 */
                if (IS_ENABLED(CONFIG_DEV_COREDUMP)) {
                        struct sk_buff *skb_cd = skb_clone(skb, GFP_ATOMIC);

                        if (skb_cd)
                                btmtk_process_coredump(hdev, skb_cd);
                }

                fallthrough;
        case 0x05ff:                /* Firmware debug logging 1 */
        case 0x05fe:                /* Firmware debug logging 2 */
                return hci_recv_diag(hdev, skb);
        }

        return hci_recv_frame(hdev, skb);
}

#ifdef CONFIG_PM
/* Configure an out-of-band gpio as wake-up pin, if specified in device tree */
static int marvell_config_oob_wake(struct hci_dev *hdev)
{
        struct sk_buff *skb;
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct device *dev = &data->udev->dev;
        u16 pin, gap, opcode;
        int ret;
        u8 cmd[5];

        /* Move on if no wakeup pin specified */
        if (of_property_read_u16(dev->of_node, "marvell,wakeup-pin", &pin) ||
            of_property_read_u16(dev->of_node, "marvell,wakeup-gap-ms", &gap))
                return 0;

        /* Vendor specific command to configure a GPIO as wake-up pin */
        opcode = hci_opcode_pack(0x3F, 0x59);
        cmd[0] = opcode & 0xFF;
        cmd[1] = opcode >> 8;
        cmd[2] = 2; /* length of parameters that follow */
        cmd[3] = pin;
        cmd[4] = gap; /* time in ms, for which wakeup pin should be asserted */

        skb = bt_skb_alloc(sizeof(cmd), GFP_KERNEL);
        if (!skb) {
                bt_dev_err(hdev, "%s: No memory", __func__);
                return -ENOMEM;
        }

        skb_put_data(skb, cmd, sizeof(cmd));
        hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;

        ret = btusb_send_frame(hdev, skb);
        if (ret) {
                bt_dev_err(hdev, "%s: configuration failed", __func__);
                kfree_skb(skb);
                return ret;
        }

        return 0;
}
#endif

static int btusb_set_bdaddr_marvell(struct hci_dev *hdev,
                                    const bdaddr_t *bdaddr)
{
        struct sk_buff *skb;
        u8 buf[8];
        long ret;

        buf[0] = 0xfe;
        buf[1] = sizeof(bdaddr_t);
        memcpy(buf + 2, bdaddr, sizeof(bdaddr_t));

        skb = __hci_cmd_sync(hdev, 0xfc22, sizeof(buf), buf, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                ret = PTR_ERR(skb);
                bt_dev_err(hdev, "changing Marvell device address failed (%ld)",
                           ret);
                return ret;
        }
        kfree_skb(skb);

        return 0;
}

static int btusb_set_bdaddr_ath3012(struct hci_dev *hdev,
                                    const bdaddr_t *bdaddr)
{
        struct sk_buff *skb;
        u8 buf[10];
        long ret;

        buf[0] = 0x01;
        buf[1] = 0x01;
        buf[2] = 0x00;
        buf[3] = sizeof(bdaddr_t);
        memcpy(buf + 4, bdaddr, sizeof(bdaddr_t));

        skb = __hci_cmd_sync(hdev, 0xfc0b, sizeof(buf), buf, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                ret = PTR_ERR(skb);
                bt_dev_err(hdev, "Change address command failed (%ld)", ret);
                return ret;
        }
        kfree_skb(skb);

        return 0;
}

static int btusb_set_bdaddr_wcn6855(struct hci_dev *hdev,
                                const bdaddr_t *bdaddr)
{
        struct sk_buff *skb;
        u8 buf[6];
        long ret;

        memcpy(buf, bdaddr, sizeof(bdaddr_t));

        skb = __hci_cmd_sync_ev(hdev, 0xfc14, sizeof(buf), buf,
                                HCI_EV_CMD_COMPLETE, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                ret = PTR_ERR(skb);
                bt_dev_err(hdev, "Change address command failed (%ld)", ret);
                return ret;
        }
        kfree_skb(skb);

        return 0;
}

#define QCA_MEMDUMP_ACL_HANDLE 0x2EDD
#define QCA_MEMDUMP_SIZE_MAX  0x100000
#define QCA_MEMDUMP_VSE_CLASS 0x01
#define QCA_MEMDUMP_MSG_TYPE 0x08
#define QCA_MEMDUMP_PKT_SIZE 248
#define QCA_LAST_SEQUENCE_NUM 0xffff

struct qca_dump_hdr {
        u8 vse_class;
        u8 msg_type;
        __le16 seqno;
        u8 reserved;
        union {
                u8 data[0];
                struct {
                        __le32 ram_dump_size;
                        u8 data0[0];
                } __packed;
        };
} __packed;


static void btusb_dump_hdr_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
        char buf[128];
        struct btusb_data *btdata = hci_get_drvdata(hdev);

        snprintf(buf, sizeof(buf), "Controller Name: 0x%x\n",
                        btdata->qca_dump.controller_id);
        skb_put_data(skb, buf, strlen(buf));

        snprintf(buf, sizeof(buf), "Firmware Version: 0x%x\n",
                        btdata->qca_dump.fw_version);
        skb_put_data(skb, buf, strlen(buf));

        snprintf(buf, sizeof(buf), "Driver: %s\nVendor: qca\n",
                        btusb_driver.name);
        skb_put_data(skb, buf, strlen(buf));

        snprintf(buf, sizeof(buf), "VID: 0x%x\nPID:0x%x\n",
                        btdata->qca_dump.id_vendor, btdata->qca_dump.id_product);
        skb_put_data(skb, buf, strlen(buf));

        snprintf(buf, sizeof(buf), "Lmp Subversion: 0x%x\n",
                        hdev->lmp_subver);
        skb_put_data(skb, buf, strlen(buf));
}

static void btusb_coredump_qca(struct hci_dev *hdev)
{
        int err;
        static const u8 param[] = { 0x26 };

        err = __hci_cmd_send(hdev, 0xfc0c, 1, param);
        if (err < 0)
                bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err);
}

/*
 * ==0: not a dump pkt.
 * < 0: fails to handle a dump pkt
 * > 0: otherwise.
 */
static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
        int ret = 1;
        u8 pkt_type;
        u8 *sk_ptr;
        unsigned int sk_len;
        u16 seqno;
        u32 dump_size;

        struct hci_event_hdr *event_hdr;
        struct hci_acl_hdr *acl_hdr;
        struct qca_dump_hdr *dump_hdr;
        struct btusb_data *btdata = hci_get_drvdata(hdev);
        struct usb_device *udev = btdata->udev;

        pkt_type = hci_skb_pkt_type(skb);
        sk_ptr = skb->data;
        sk_len = skb->len;

        if (pkt_type == HCI_ACLDATA_PKT) {
                acl_hdr = hci_acl_hdr(skb);
                if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)
                        return 0;
                sk_ptr += HCI_ACL_HDR_SIZE;
                sk_len -= HCI_ACL_HDR_SIZE;
                event_hdr = (struct hci_event_hdr *)sk_ptr;
        } else {
                event_hdr = hci_event_hdr(skb);
        }

        if ((event_hdr->evt != HCI_VENDOR_PKT)
                || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE)))
                return 0;

        sk_ptr += HCI_EVENT_HDR_SIZE;
        sk_len -= HCI_EVENT_HDR_SIZE;

        dump_hdr = (struct qca_dump_hdr *)sk_ptr;
        if ((sk_len < offsetof(struct qca_dump_hdr, data))
                || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS)
            || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE))
                return 0;

        /*it is dump pkt now*/
        seqno = le16_to_cpu(dump_hdr->seqno);
        if (seqno == 0) {
                set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags);
                dump_size = le32_to_cpu(dump_hdr->ram_dump_size);
                if (!dump_size || (dump_size > QCA_MEMDUMP_SIZE_MAX)) {
                        ret = -EILSEQ;
                        bt_dev_err(hdev, "Invalid memdump size(%u)",
                                   dump_size);
                        goto out;
                }

                ret = hci_devcd_init(hdev, dump_size);
                if (ret < 0) {
                        bt_dev_err(hdev, "memdump init error(%d)", ret);
                        goto out;
                }

                btdata->qca_dump.ram_dump_size = dump_size;
                btdata->qca_dump.ram_dump_seqno = 0;
                sk_ptr += offsetof(struct qca_dump_hdr, data0);
                sk_len -= offsetof(struct qca_dump_hdr, data0);

                usb_disable_autosuspend(udev);
                bt_dev_info(hdev, "%s memdump size(%u)\n",
                            (pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event",
                            dump_size);
        } else {
                sk_ptr += offsetof(struct qca_dump_hdr, data);
                sk_len -= offsetof(struct qca_dump_hdr, data);
        }

        if (!btdata->qca_dump.ram_dump_size) {
                ret = -EINVAL;
                bt_dev_err(hdev, "memdump is not active");
                goto out;
        }

        if ((seqno > btdata->qca_dump.ram_dump_seqno + 1) && (seqno != QCA_LAST_SEQUENCE_NUM)) {
                dump_size = QCA_MEMDUMP_PKT_SIZE * (seqno - btdata->qca_dump.ram_dump_seqno - 1);
                hci_devcd_append_pattern(hdev, 0x0, dump_size);
                bt_dev_err(hdev,
                           "expected memdump seqno(%u) is not received(%u)\n",
                           btdata->qca_dump.ram_dump_seqno, seqno);
                btdata->qca_dump.ram_dump_seqno = seqno;
                kfree_skb(skb);
                return ret;
        }

        skb_pull(skb, skb->len - sk_len);
        hci_devcd_append(hdev, skb);
        btdata->qca_dump.ram_dump_seqno++;
        if (seqno == QCA_LAST_SEQUENCE_NUM) {
                bt_dev_info(hdev,
                                "memdump done: pkts(%u), total(%u)\n",
                                btdata->qca_dump.ram_dump_seqno, btdata->qca_dump.ram_dump_size);

                hci_devcd_complete(hdev);
                goto out;
        }
        return ret;

out:
        if (btdata->qca_dump.ram_dump_size)
                usb_enable_autosuspend(udev);
        btdata->qca_dump.ram_dump_size = 0;
        btdata->qca_dump.ram_dump_seqno = 0;
        clear_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags);

        if (ret < 0)
                kfree_skb(skb);
        return ret;
}

static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
        if (handle_dump_pkt_qca(hdev, skb))
                return 0;
        return hci_recv_frame(hdev, skb);
}

static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb)
{
        if (handle_dump_pkt_qca(hdev, skb))
                return 0;
        return hci_recv_frame(hdev, skb);
}


#define QCA_DFU_PACKET_LEN        4096

#define QCA_GET_TARGET_VERSION        0x09
#define QCA_CHECK_STATUS        0x05
#define QCA_DFU_DOWNLOAD        0x01

#define QCA_SYSCFG_UPDATED        0x40
#define QCA_PATCH_UPDATED        0x80
#define QCA_DFU_TIMEOUT                3000
#define QCA_FLAG_MULTI_NVM      0x80
#define QCA_BT_RESET_WAIT_MS    100

#define WCN6855_2_0_RAM_VERSION_GF 0x400c1200
#define WCN6855_2_1_RAM_VERSION_GF 0x400c1211

struct qca_version {
        __le32        rom_version;
        __le32        patch_version;
        __le32        ram_version;
        __u8        chip_id;
        __u8        platform_id;
        __le16        flag;
        __u8        reserved[4];
} __packed;

struct qca_rampatch_version {
        __le16        rom_version_high;
        __le16  rom_version_low;
        __le16        patch_version;
} __packed;

struct qca_device_info {
        u32        rom_version;
        u8        rampatch_hdr;        /* length of header in rampatch */
        u8        nvm_hdr;        /* length of header in NVM */
        u8        ver_offset;        /* offset of version structure in rampatch */
};

static const struct qca_device_info qca_devices_table[] = {
        { 0x00000100, 20, 4,  8 }, /* Rome 1.0 */
        { 0x00000101, 20, 4,  8 }, /* Rome 1.1 */
        { 0x00000200, 28, 4, 16 }, /* Rome 2.0 */
        { 0x00000201, 28, 4, 16 }, /* Rome 2.1 */
        { 0x00000300, 28, 4, 16 }, /* Rome 3.0 */
        { 0x00000302, 28, 4, 16 }, /* Rome 3.2 */
        { 0x00130100, 40, 4, 16 }, /* WCN6855 1.0 */
        { 0x00130200, 40, 4, 16 }, /* WCN6855 2.0 */
        { 0x00130201, 40, 4, 16 }, /* WCN6855 2.1 */
        { 0x00190200, 40, 4, 16 }, /* WCN785x 2.0 */
};

static int btusb_qca_send_vendor_req(struct usb_device *udev, u8 request,
                                     void *data, u16 size)
{
        int pipe, err;
        u8 *buf;

        buf = kmalloc(size, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        /* Found some of USB hosts have IOT issues with ours so that we should
         * not wait until HCI layer is ready.
         */
        pipe = usb_rcvctrlpipe(udev, 0);
        err = usb_control_msg(udev, pipe, request, USB_TYPE_VENDOR | USB_DIR_IN,
                              0, 0, buf, size, USB_CTRL_SET_TIMEOUT);
        if (err < 0) {
                dev_err(&udev->dev, "Failed to access otp area (%d)", err);
                goto done;
        }

        memcpy(data, buf, size);

done:
        kfree(buf);

        return err;
}

static int btusb_setup_qca_download_fw(struct hci_dev *hdev,
                                       const struct firmware *firmware,
                                       size_t hdr_size)
{
        struct btusb_data *btdata = hci_get_drvdata(hdev);
        struct usb_device *udev = btdata->udev;
        size_t count, size, sent = 0;
        int pipe, len, err;
        u8 *buf;

        buf = kmalloc(QCA_DFU_PACKET_LEN, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        count = firmware->size;

        size = min_t(size_t, count, hdr_size);
        memcpy(buf, firmware->data, size);

        /* USB patches should go down to controller through USB path
         * because binary format fits to go down through USB channel.
         * USB control path is for patching headers and USB bulk is for
         * patch body.
         */
        pipe = usb_sndctrlpipe(udev, 0);
        err = usb_control_msg(udev, pipe, QCA_DFU_DOWNLOAD, USB_TYPE_VENDOR,
                              0, 0, buf, size, USB_CTRL_SET_TIMEOUT);
        if (err < 0) {
                bt_dev_err(hdev, "Failed to send headers (%d)", err);
                goto done;
        }

        sent += size;
        count -= size;

        /* ep2 need time to switch from function acl to function dfu,
         * so we add 20ms delay here.
         */
        msleep(20);

        while (count) {
                size = min_t(size_t, count, QCA_DFU_PACKET_LEN);

                memcpy(buf, firmware->data + sent, size);

                pipe = usb_sndbulkpipe(udev, 0x02);
                err = usb_bulk_msg(udev, pipe, buf, size, &len,
                                   QCA_DFU_TIMEOUT);
                if (err < 0) {
                        bt_dev_err(hdev, "Failed to send body at %zd of %zd (%d)",
                                   sent, firmware->size, err);
                        break;
                }

                if (size != len) {
                        bt_dev_err(hdev, "Failed to get bulk buffer");
                        err = -EILSEQ;
                        break;
                }

                sent  += size;
                count -= size;
        }

done:
        kfree(buf);
        return err;
}

static int btusb_setup_qca_load_rampatch(struct hci_dev *hdev,
                                         struct qca_version *ver,
                                         const struct qca_device_info *info)
{
        struct qca_rampatch_version *rver;
        const struct firmware *fw;
        u32 ver_rom, ver_patch, rver_rom;
        u16 rver_rom_low, rver_rom_high, rver_patch;
        char fwname[64];
        int err;

        ver_rom = le32_to_cpu(ver->rom_version);
        ver_patch = le32_to_cpu(ver->patch_version);

        snprintf(fwname, sizeof(fwname), "qca/rampatch_usb_%08x.bin", ver_rom);

        err = request_firmware(&fw, fwname, &hdev->dev);
        if (err) {
                bt_dev_err(hdev, "failed to request rampatch file: %s (%d)",
                           fwname, err);
                return err;
        }

        bt_dev_info(hdev, "using rampatch file: %s", fwname);

        rver = (struct qca_rampatch_version *)(fw->data + info->ver_offset);
        rver_rom_low = le16_to_cpu(rver->rom_version_low);
        rver_patch = le16_to_cpu(rver->patch_version);

        if (ver_rom & ~0xffffU) {
                rver_rom_high = le16_to_cpu(rver->rom_version_high);
                rver_rom = rver_rom_high << 16 | rver_rom_low;
        } else {
                rver_rom = rver_rom_low;
        }

        bt_dev_info(hdev, "QCA: patch rome 0x%x build 0x%x, "
                    "firmware rome 0x%x build 0x%x",
                    rver_rom, rver_patch, ver_rom, ver_patch);

        if (rver_rom != ver_rom || rver_patch <= ver_patch) {
                bt_dev_err(hdev, "rampatch file version did not match with firmware");
                err = -EINVAL;
                goto done;
        }

        err = btusb_setup_qca_download_fw(hdev, fw, info->rampatch_hdr);

done:
        release_firmware(fw);

        return err;
}

static void btusb_generate_qca_nvm_name(char *fwname, size_t max_size,
                                        const struct qca_version *ver)
{
        u32 rom_version = le32_to_cpu(ver->rom_version);
        u16 flag = le16_to_cpu(ver->flag);

        if (((flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) {
                /* The board_id should be split into two bytes
                 * The 1st byte is chip ID, and the 2nd byte is platform ID
                 * For example, board ID 0x010A, 0x01 is platform ID. 0x0A is chip ID
                 * we have several platforms, and platform IDs are continuously added
                 * Platform ID:
                 * 0x00 is for Mobile
                 * 0x01 is for X86
                 * 0x02 is for Automotive
                 * 0x03 is for Consumer electronic
                 */
                u16 board_id = (ver->chip_id << 8) + ver->platform_id;
                const char *variant;

                switch (le32_to_cpu(ver->ram_version)) {
                case WCN6855_2_0_RAM_VERSION_GF:
                case WCN6855_2_1_RAM_VERSION_GF:
                        variant = "_gf";
                        break;
                default:
                        variant = "";
                        break;
                }

                if (board_id == 0) {
                        snprintf(fwname, max_size, "qca/nvm_usb_%08x%s.bin",
                                rom_version, variant);
                } else {
                        snprintf(fwname, max_size, "qca/nvm_usb_%08x%s_%04x.bin",
                                rom_version, variant, board_id);
                }
        } else {
                snprintf(fwname, max_size, "qca/nvm_usb_%08x.bin",
                        rom_version);
        }

}

static int btusb_setup_qca_load_nvm(struct hci_dev *hdev,
                                    struct qca_version *ver,
                                    const struct qca_device_info *info)
{
        const struct firmware *fw;
        char fwname[64];
        int err;

        btusb_generate_qca_nvm_name(fwname, sizeof(fwname), ver);

        err = request_firmware(&fw, fwname, &hdev->dev);
        if (err) {
                bt_dev_err(hdev, "failed to request NVM file: %s (%d)",
                           fwname, err);
                return err;
        }

        bt_dev_info(hdev, "using NVM file: %s", fwname);

        err = btusb_setup_qca_download_fw(hdev, fw, info->nvm_hdr);

        release_firmware(fw);

        return err;
}

/* identify the ROM version and check whether patches are needed */
static bool btusb_qca_need_patch(struct usb_device *udev)
{
        struct qca_version ver;

        if (btusb_qca_send_vendor_req(udev, QCA_GET_TARGET_VERSION, &ver,
                                      sizeof(ver)) < 0)
                return false;
        /* only low ROM versions need patches */
        return !(le32_to_cpu(ver.rom_version) & ~0xffffU);
}

static int btusb_setup_qca(struct hci_dev *hdev)
{
        struct btusb_data *btdata = hci_get_drvdata(hdev);
        struct usb_device *udev = btdata->udev;
        const struct qca_device_info *info = NULL;
        struct qca_version ver;
        u32 ver_rom;
        u8 status;
        int i, err;

        err = btusb_qca_send_vendor_req(udev, QCA_GET_TARGET_VERSION, &ver,
                                        sizeof(ver));
        if (err < 0)
                return err;

        ver_rom = le32_to_cpu(ver.rom_version);

        for (i = 0; i < ARRAY_SIZE(qca_devices_table); i++) {
                if (ver_rom == qca_devices_table[i].rom_version)
                        info = &qca_devices_table[i];
        }
        if (!info) {
                /* If the rom_version is not matched in the qca_devices_table
                 * and the high ROM version is not zero, we assume this chip no
                 * need to load the rampatch and nvm.
                 */
                if (ver_rom & ~0xffffU)
                        return 0;

                bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom);
                return -ENODEV;
        }

        err = btusb_qca_send_vendor_req(udev, QCA_CHECK_STATUS, &status,
                                        sizeof(status));
        if (err < 0)
                return err;

        if (!(status & QCA_PATCH_UPDATED)) {
                err = btusb_setup_qca_load_rampatch(hdev, &ver, info);
                if (err < 0)
                        return err;
        }

        err = btusb_qca_send_vendor_req(udev, QCA_GET_TARGET_VERSION, &ver,
                                        sizeof(ver));
        if (err < 0)
                return err;

        btdata->qca_dump.fw_version = le32_to_cpu(ver.patch_version);
        btdata->qca_dump.controller_id = le32_to_cpu(ver.rom_version);

        if (!(status & QCA_SYSCFG_UPDATED)) {
                err = btusb_setup_qca_load_nvm(hdev, &ver, info);
                if (err < 0)
                        return err;

                /* WCN6855 2.1 and later will reset to apply firmware downloaded here, so
                 * wait ~100ms for reset Done then go ahead, otherwise, it maybe
                 * cause potential enable failure.
                 */
                if (info->rom_version >= 0x00130201)
                        msleep(QCA_BT_RESET_WAIT_MS);
        }

        /* Mark HCI_OP_ENHANCED_SETUP_SYNC_CONN as broken as it doesn't seem to
         * work with the likes of HSP/HFP mSBC.
         */
        set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks);

        return 0;
}

static inline int __set_diag_interface(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct usb_interface *intf = data->diag;
        int i;

        if (!data->diag)
                return -ENODEV;

        data->diag_tx_ep = NULL;
        data->diag_rx_ep = NULL;

        for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
                struct usb_endpoint_descriptor *ep_desc;

                ep_desc = &intf->cur_altsetting->endpoint[i].desc;

                if (!data->diag_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) {
                        data->diag_tx_ep = ep_desc;
                        continue;
                }

                if (!data->diag_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) {
                        data->diag_rx_ep = ep_desc;
                        continue;
                }
        }

        if (!data->diag_tx_ep || !data->diag_rx_ep) {
                bt_dev_err(hdev, "invalid diagnostic descriptors");
                return -ENODEV;
        }

        return 0;
}

static struct urb *alloc_diag_urb(struct hci_dev *hdev, bool enable)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct sk_buff *skb;
        struct urb *urb;
        unsigned int pipe;

        if (!data->diag_tx_ep)
                return ERR_PTR(-ENODEV);

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return ERR_PTR(-ENOMEM);

        skb = bt_skb_alloc(2, GFP_KERNEL);
        if (!skb) {
                usb_free_urb(urb);
                return ERR_PTR(-ENOMEM);
        }

        skb_put_u8(skb, 0xf0);
        skb_put_u8(skb, enable);

        pipe = usb_sndbulkpipe(data->udev, data->diag_tx_ep->bEndpointAddress);

        usb_fill_bulk_urb(urb, data->udev, pipe,
                          skb->data, skb->len, btusb_tx_complete, skb);

        skb->dev = (void *)hdev;

        return urb;
}

static int btusb_bcm_set_diag(struct hci_dev *hdev, bool enable)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;

        if (!data->diag)
                return -ENODEV;

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                return -ENETDOWN;

        urb = alloc_diag_urb(hdev, enable);
        if (IS_ERR(urb))
                return PTR_ERR(urb);

        return submit_or_queue_tx_urb(hdev, urb);
}

#ifdef CONFIG_PM
static irqreturn_t btusb_oob_wake_handler(int irq, void *priv)
{
        struct btusb_data *data = priv;

        pm_wakeup_event(&data->udev->dev, 0);
        pm_system_wakeup();

        /* Disable only if not already disabled (keep it balanced) */
        if (test_and_clear_bit(BTUSB_OOB_WAKE_ENABLED, &data->flags)) {
                disable_irq_nosync(irq);
                disable_irq_wake(irq);
        }
        return IRQ_HANDLED;
}

static const struct of_device_id btusb_match_table[] = {
        { .compatible = "usb1286,204e" },
        { .compatible = "usbcf3,e300" }, /* QCA6174A */
        { .compatible = "usb4ca,301a" }, /* QCA6174A (Lite-On) */
        { }
};
MODULE_DEVICE_TABLE(of, btusb_match_table);

/* Use an oob wakeup pin? */
static int btusb_config_oob_wake(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);
        struct device *dev = &data->udev->dev;
        int irq, ret;

        clear_bit(BTUSB_OOB_WAKE_ENABLED, &data->flags);

        if (!of_match_device(btusb_match_table, dev))
                return 0;

        /* Move on if no IRQ specified */
        irq = of_irq_get_byname(dev->of_node, "wakeup");
        if (irq <= 0) {
                bt_dev_dbg(hdev, "%s: no OOB Wakeup IRQ in DT", __func__);
                return 0;
        }

        irq_set_status_flags(irq, IRQ_NOAUTOEN);
        ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler,
                               0, "OOB Wake-on-BT", data);
        if (ret) {
                bt_dev_err(hdev, "%s: IRQ request failed", __func__);
                return ret;
        }

        ret = device_init_wakeup(dev, true);
        if (ret) {
                bt_dev_err(hdev, "%s: failed to init_wakeup", __func__);
                return ret;
        }

        data->oob_wake_irq = irq;
        bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq);
        return 0;
}
#endif

static void btusb_check_needs_reset_resume(struct usb_interface *intf)
{
        if (dmi_check_system(btusb_needs_reset_resume_table))
                interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
}

static bool btusb_wakeup(struct hci_dev *hdev)
{
        struct btusb_data *data = hci_get_drvdata(hdev);

        return device_may_wakeup(&data->udev->dev);
}

static int btusb_shutdown_qca(struct hci_dev *hdev)
{
        struct sk_buff *skb;

        skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                bt_dev_err(hdev, "HCI reset during shutdown failed");
                return PTR_ERR(skb);
        }
        kfree_skb(skb);

        return 0;
}

static ssize_t force_poll_sync_read(struct file *file, char __user *user_buf,
                                    size_t count, loff_t *ppos)
{
        struct btusb_data *data = file->private_data;
        char buf[3];

        buf[0] = data->poll_sync ? 'Y' : 'N';
        buf[1] = '\n';
        buf[2] = '\0';
        return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}

static ssize_t force_poll_sync_write(struct file *file,
                                     const char __user *user_buf,
                                     size_t count, loff_t *ppos)
{
        struct btusb_data *data = file->private_data;
        bool enable;
        int err;

        err = kstrtobool_from_user(user_buf, count, &enable);
        if (err)
                return err;

        /* Only allow changes while the adapter is down */
        if (test_bit(HCI_UP, &data->hdev->flags))
                return -EPERM;

        if (data->poll_sync == enable)
                return -EALREADY;

        data->poll_sync = enable;

        return count;
}

static const struct file_operations force_poll_sync_fops = {
        .open                = simple_open,
        .read                = force_poll_sync_read,
        .write                = force_poll_sync_write,
        .llseek                = default_llseek,
};

static int btusb_probe(struct usb_interface *intf,
                       const struct usb_device_id *id)
{
        struct usb_endpoint_descriptor *ep_desc;
        struct gpio_desc *reset_gpio;
        struct btusb_data *data;
        struct hci_dev *hdev;
        unsigned ifnum_base;
        int i, err, priv_size;

        BT_DBG("intf %p id %p", intf, id);

        if ((id->driver_info & BTUSB_IFNUM_2) &&
            (intf->cur_altsetting->desc.bInterfaceNumber != 0) &&
            (intf->cur_altsetting->desc.bInterfaceNumber != 2))
                return -ENODEV;

        ifnum_base = intf->cur_altsetting->desc.bInterfaceNumber;

        if (!id->driver_info) {
                const struct usb_device_id *match;

                match = usb_match_id(intf, quirks_table);
                if (match)
                        id = match;
        }

        if (id->driver_info == BTUSB_IGNORE)
                return -ENODEV;

        if (id->driver_info & BTUSB_ATH3012) {
                struct usb_device *udev = interface_to_usbdev(intf);

                /* Old firmware would otherwise let ath3k driver load
                 * patch and sysconfig files
                 */
                if (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x0001 &&
                    !btusb_qca_need_patch(udev))
                        return -ENODEV;
        }

        data = devm_kzalloc(&intf->dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
                ep_desc = &intf->cur_altsetting->endpoint[i].desc;

                if (!data->intr_ep && usb_endpoint_is_int_in(ep_desc)) {
                        data->intr_ep = ep_desc;
                        continue;
                }

                if (!data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) {
                        data->bulk_tx_ep = ep_desc;
                        continue;
                }

                if (!data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) {
                        data->bulk_rx_ep = ep_desc;
                        continue;
                }
        }

        if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep)
                return -ENODEV;

        if (id->driver_info & BTUSB_AMP) {
                data->cmdreq_type = USB_TYPE_CLASS | 0x01;
                data->cmdreq = 0x2b;
        } else {
                data->cmdreq_type = USB_TYPE_CLASS;
                data->cmdreq = 0x00;
        }

        data->udev = interface_to_usbdev(intf);
        data->intf = intf;

        INIT_WORK(&data->work, btusb_work);
        INIT_WORK(&data->waker, btusb_waker);
        INIT_DELAYED_WORK(&data->rx_work, btusb_rx_work);

        skb_queue_head_init(&data->acl_q);

        init_usb_anchor(&data->deferred);
        init_usb_anchor(&data->tx_anchor);
        spin_lock_init(&data->txlock);

        init_usb_anchor(&data->intr_anchor);
        init_usb_anchor(&data->bulk_anchor);
        init_usb_anchor(&data->isoc_anchor);
        init_usb_anchor(&data->diag_anchor);
        init_usb_anchor(&data->ctrl_anchor);
        spin_lock_init(&data->rxlock);

        priv_size = 0;

        data->recv_event = hci_recv_frame;
        data->recv_bulk = btusb_recv_bulk;

        if (id->driver_info & BTUSB_INTEL_COMBINED) {
                /* Allocate extra space for Intel device */
                priv_size += sizeof(struct btintel_data);

                /* Override the rx handlers */
                data->recv_event = btintel_recv_event;
                data->recv_bulk = btusb_recv_bulk_intel;
        } else if (id->driver_info & BTUSB_REALTEK) {
                /* Allocate extra space for Realtek device */
                priv_size += sizeof(struct btrealtek_data);

                data->recv_event = btusb_recv_event_realtek;
        } else if (id->driver_info & BTUSB_MEDIATEK) {
                /* Allocate extra space for Mediatek device */
                priv_size += sizeof(struct btmediatek_data);
        }

        data->recv_acl = hci_recv_frame;

        hdev = hci_alloc_dev_priv(priv_size);
        if (!hdev)
                return -ENOMEM;

        hdev->bus = HCI_USB;
        hci_set_drvdata(hdev, data);

        if (id->driver_info & BTUSB_AMP)
                hdev->dev_type = HCI_AMP;
        else
                hdev->dev_type = HCI_PRIMARY;

        data->hdev = hdev;

        SET_HCIDEV_DEV(hdev, &intf->dev);

        reset_gpio = gpiod_get_optional(&data->udev->dev, "reset",
                                        GPIOD_OUT_LOW);
        if (IS_ERR(reset_gpio)) {
                err = PTR_ERR(reset_gpio);
                goto out_free_dev;
        } else if (reset_gpio) {
                data->reset_gpio = reset_gpio;
        }

        hdev->open   = btusb_open;
        hdev->close  = btusb_close;
        hdev->flush  = btusb_flush;
        hdev->send   = btusb_send_frame;
        hdev->notify = btusb_notify;
        hdev->wakeup = btusb_wakeup;

#ifdef CONFIG_PM
        err = btusb_config_oob_wake(hdev);
        if (err)
                goto out_free_dev;

        /* Marvell devices may need a specific chip configuration */
        if (id->driver_info & BTUSB_MARVELL && data->oob_wake_irq) {
                err = marvell_config_oob_wake(hdev);
                if (err)
                        goto out_free_dev;
        }
#endif
        if (id->driver_info & BTUSB_CW6622)
                set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);

        if (id->driver_info & BTUSB_BCM2045)
                set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);

        if (id->driver_info & BTUSB_BCM92035)
                hdev->setup = btusb_setup_bcm92035;

        if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
            (id->driver_info & BTUSB_BCM_PATCHRAM)) {
                hdev->manufacturer = 15;
                hdev->setup = btbcm_setup_patchram;
                hdev->set_diag = btusb_bcm_set_diag;
                hdev->set_bdaddr = btbcm_set_bdaddr;

                /* Broadcom LM_DIAG Interface numbers are hardcoded */
                data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
        }

        if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) &&
            (id->driver_info & BTUSB_BCM_APPLE)) {
                hdev->manufacturer = 15;
                hdev->setup = btbcm_setup_apple;
                hdev->set_diag = btusb_bcm_set_diag;

                /* Broadcom LM_DIAG Interface numbers are hardcoded */
                data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
        }

        /* Combined Intel Device setup to support multiple setup routine */
        if (id->driver_info & BTUSB_INTEL_COMBINED) {
                err = btintel_configure_setup(hdev, btusb_driver.name);
                if (err)
                        goto out_free_dev;

                /* Transport specific configuration */
                hdev->send = btusb_send_frame_intel;
                hdev->cmd_timeout = btusb_intel_cmd_timeout;

                if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT)
                        btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT);

                if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
                        btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);

                if (id->driver_info & BTUSB_INTEL_BROKEN_SHUTDOWN_LED)
                        btintel_set_flag(hdev, INTEL_BROKEN_SHUTDOWN_LED);
        }

        if (id->driver_info & BTUSB_MARVELL)
                hdev->set_bdaddr = btusb_set_bdaddr_marvell;

        if (IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) &&
            (id->driver_info & BTUSB_MEDIATEK)) {
                hdev->setup = btusb_mtk_setup;
                hdev->shutdown = btusb_mtk_shutdown;
                hdev->manufacturer = 70;
                hdev->cmd_timeout = btmtk_reset_sync;
                hdev->set_bdaddr = btmtk_set_bdaddr;
                set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks);
                set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
                data->recv_acl = btusb_recv_acl_mtk;
        }

        if (id->driver_info & BTUSB_SWAVE) {
                set_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_INTEL_BOOT) {
                hdev->manufacturer = 2;
                set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_ATH3012) {
                data->setup_on_usb = btusb_setup_qca;
                hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
                set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_QCA_ROME) {
                data->setup_on_usb = btusb_setup_qca;
                hdev->shutdown = btusb_shutdown_qca;
                hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
                hdev->cmd_timeout = btusb_qca_cmd_timeout;
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
                btusb_check_needs_reset_resume(intf);
        }

        if (id->driver_info & BTUSB_QCA_WCN6855) {
                data->qca_dump.id_vendor = id->idVendor;
                data->qca_dump.id_product = id->idProduct;
                data->recv_event = btusb_recv_evt_qca;
                data->recv_acl = btusb_recv_acl_qca;
                hci_devcd_register(hdev, btusb_coredump_qca, btusb_dump_hdr_qca, NULL);
                data->setup_on_usb = btusb_setup_qca;
                hdev->shutdown = btusb_shutdown_qca;
                hdev->set_bdaddr = btusb_set_bdaddr_wcn6855;
                hdev->cmd_timeout = btusb_qca_cmd_timeout;
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
                hci_set_msft_opcode(hdev, 0xFD70);
        }

        if (id->driver_info & BTUSB_AMP) {
                /* AMP controllers do not support SCO packets */
                data->isoc = NULL;
        } else {
                /* Interface orders are hardcoded in the specification */
                data->isoc = usb_ifnum_to_if(data->udev, ifnum_base + 1);
                data->isoc_ifnum = ifnum_base + 1;
        }

        if (IS_ENABLED(CONFIG_BT_HCIBTUSB_RTL) &&
            (id->driver_info & BTUSB_REALTEK)) {
                btrtl_set_driver_name(hdev, btusb_driver.name);
                hdev->setup = btusb_setup_realtek;
                hdev->shutdown = btrtl_shutdown_realtek;
                hdev->cmd_timeout = btusb_rtl_cmd_timeout;
                hdev->hw_error = btusb_rtl_hw_error;

                /* Realtek devices need to set remote wakeup on auto-suspend */
                set_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags);
                set_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags);
        }

        if (id->driver_info & BTUSB_ACTIONS_SEMI) {
                /* Support is advertised, but not implemented */
                set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
                set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks);
        }

        if (!reset)
                set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);

        if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) {
                if (!disable_scofix)
                        set_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_BROKEN_ISOC)
                data->isoc = NULL;

        if (id->driver_info & BTUSB_WIDEBAND_SPEECH)
                set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);

        if (id->driver_info & BTUSB_VALID_LE_STATES)
                set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);

        if (id->driver_info & BTUSB_DIGIANSWER) {
                data->cmdreq_type = USB_TYPE_VENDOR;
                set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_CSR) {
                struct usb_device *udev = data->udev;
                u16 bcdDevice = le16_to_cpu(udev->descriptor.bcdDevice);

                /* Old firmware would otherwise execute USB reset */
                if (bcdDevice < 0x117)
                        set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);

                /* This must be set first in case we disable it for fakes */
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);

                /* Fake CSR devices with broken commands */
                if (le16_to_cpu(udev->descriptor.idVendor)  == 0x0a12 &&
                    le16_to_cpu(udev->descriptor.idProduct) == 0x0001)
                        hdev->setup = btusb_setup_csr;
        }

        if (id->driver_info & BTUSB_SNIFFER) {
                struct usb_device *udev = data->udev;

                /* New sniffer firmware has crippled HCI interface */
                if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997)
                        set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
        }

        if (id->driver_info & BTUSB_INTEL_BOOT) {
                /* A bug in the bootloader causes that interrupt interface is
                 * only enabled after receiving SetInterface(0, AltSetting=0).
                 */
                err = usb_set_interface(data->udev, 0, 0);
                if (err < 0) {
                        BT_ERR("failed to set interface 0, alt 0 %d", err);
                        goto out_free_dev;
                }
        }

        if (data->isoc) {
                err = usb_driver_claim_interface(&btusb_driver,
                                                 data->isoc, data);
                if (err < 0)
                        goto out_free_dev;
        }

        if (IS_ENABLED(CONFIG_BT_HCIBTUSB_BCM) && data->diag) {
                if (!usb_driver_claim_interface(&btusb_driver,
                                                data->diag, data))
                        __set_diag_interface(hdev);
                else
                        data->diag = NULL;
        }

        if (enable_autosuspend)
                usb_enable_autosuspend(data->udev);

        data->poll_sync = enable_poll_sync;

        err = hci_register_dev(hdev);
        if (err < 0)
                goto out_free_dev;

        usb_set_intfdata(intf, data);

        debugfs_create_file("force_poll_sync", 0644, hdev->debugfs, data,
                            &force_poll_sync_fops);

        return 0;

out_free_dev:
        if (data->reset_gpio)
                gpiod_put(data->reset_gpio);
        hci_free_dev(hdev);
        return err;
}

static void btusb_disconnect(struct usb_interface *intf)
{
        struct btusb_data *data = usb_get_intfdata(intf);
        struct hci_dev *hdev;

        BT_DBG("intf %p", intf);

        if (!data)
                return;

        hdev = data->hdev;
        usb_set_intfdata(data->intf, NULL);

        if (data->isoc)
                usb_set_intfdata(data->isoc, NULL);

        if (data->diag)
                usb_set_intfdata(data->diag, NULL);

        hci_unregister_dev(hdev);

        if (intf == data->intf) {
                if (data->isoc)
                        usb_driver_release_interface(&btusb_driver, data->isoc);
                if (data->diag)
                        usb_driver_release_interface(&btusb_driver, data->diag);
        } else if (intf == data->isoc) {
                if (data->diag)
                        usb_driver_release_interface(&btusb_driver, data->diag);
                usb_driver_release_interface(&btusb_driver, data->intf);
        } else if (intf == data->diag) {
                usb_driver_release_interface(&btusb_driver, data->intf);
                if (data->isoc)
                        usb_driver_release_interface(&btusb_driver, data->isoc);
        }

        if (data->oob_wake_irq)
                device_init_wakeup(&data->udev->dev, false);

        if (data->reset_gpio)
                gpiod_put(data->reset_gpio);

        hci_free_dev(hdev);
}

#ifdef CONFIG_PM
static int btusb_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct btusb_data *data = usb_get_intfdata(intf);

        BT_DBG("intf %p", intf);

        /* Don't suspend if there are connections */
        if (hci_conn_count(data->hdev))
                return -EBUSY;

        if (data->suspend_count++)
                return 0;

        spin_lock_irq(&data->txlock);
        if (!(PMSG_IS_AUTO(message) && data->tx_in_flight)) {
                set_bit(BTUSB_SUSPENDING, &data->flags);
                spin_unlock_irq(&data->txlock);
        } else {
                spin_unlock_irq(&data->txlock);
                data->suspend_count--;
                return -EBUSY;
        }

        cancel_work_sync(&data->work);

        btusb_stop_traffic(data);
        usb_kill_anchored_urbs(&data->tx_anchor);

        if (data->oob_wake_irq && device_may_wakeup(&data->udev->dev)) {
                set_bit(BTUSB_OOB_WAKE_ENABLED, &data->flags);
                enable_irq_wake(data->oob_wake_irq);
                enable_irq(data->oob_wake_irq);
        }

        /* For global suspend, Realtek devices lose the loaded fw
         * in them. But for autosuspend, firmware should remain.
         * Actually, it depends on whether the usb host sends
         * set feature (enable wakeup) or not.
         */
        if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags)) {
                if (PMSG_IS_AUTO(message) &&
                    device_can_wakeup(&data->udev->dev))
                        data->udev->do_remote_wakeup = 1;
                else if (!PMSG_IS_AUTO(message) &&
                         !device_may_wakeup(&data->udev->dev)) {
                        data->udev->do_remote_wakeup = 0;
                        data->udev->reset_resume = 1;
                }
        }

        return 0;
}

static void play_deferred(struct btusb_data *data)
{
        struct urb *urb;
        int err;

        while ((urb = usb_get_from_anchor(&data->deferred))) {
                usb_anchor_urb(urb, &data->tx_anchor);

                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err < 0) {
                        if (err != -EPERM && err != -ENODEV)
                                BT_ERR("%s urb %p submission failed (%d)",
                                       data->hdev->name, urb, -err);
                        kfree(urb->setup_packet);
                        usb_unanchor_urb(urb);
                        usb_free_urb(urb);
                        break;
                }

                data->tx_in_flight++;
                usb_free_urb(urb);
        }

        /* Cleanup the rest deferred urbs. */
        while ((urb = usb_get_from_anchor(&data->deferred))) {
                kfree(urb->setup_packet);
                usb_free_urb(urb);
        }
}

static int btusb_resume(struct usb_interface *intf)
{
        struct btusb_data *data = usb_get_intfdata(intf);
        struct hci_dev *hdev = data->hdev;
        int err = 0;

        BT_DBG("intf %p", intf);

        if (--data->suspend_count)
                return 0;

        /* Disable only if not already disabled (keep it balanced) */
        if (test_and_clear_bit(BTUSB_OOB_WAKE_ENABLED, &data->flags)) {
                disable_irq(data->oob_wake_irq);
                disable_irq_wake(data->oob_wake_irq);
        }

        if (!test_bit(HCI_RUNNING, &hdev->flags))
                goto done;

        if (test_bit(BTUSB_INTR_RUNNING, &data->flags)) {
                err = btusb_submit_intr_urb(hdev, GFP_NOIO);
                if (err < 0) {
                        clear_bit(BTUSB_INTR_RUNNING, &data->flags);
                        goto failed;
                }
        }

        if (test_bit(BTUSB_BULK_RUNNING, &data->flags)) {
                err = btusb_submit_bulk_urb(hdev, GFP_NOIO);
                if (err < 0) {
                        clear_bit(BTUSB_BULK_RUNNING, &data->flags);
                        goto failed;
                }

                btusb_submit_bulk_urb(hdev, GFP_NOIO);
        }

        if (test_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
                if (btusb_submit_isoc_urb(hdev, GFP_NOIO) < 0)
                        clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
                else
                        btusb_submit_isoc_urb(hdev, GFP_NOIO);
        }

        spin_lock_irq(&data->txlock);
        play_deferred(data);
        clear_bit(BTUSB_SUSPENDING, &data->flags);
        spin_unlock_irq(&data->txlock);
        schedule_work(&data->work);

        return 0;

failed:
        usb_scuttle_anchored_urbs(&data->deferred);
done:
        spin_lock_irq(&data->txlock);
        clear_bit(BTUSB_SUSPENDING, &data->flags);
        spin_unlock_irq(&data->txlock);

        return err;
}
#endif

#ifdef CONFIG_DEV_COREDUMP
static void btusb_coredump(struct device *dev)
{
        struct btusb_data *data = dev_get_drvdata(dev);
        struct hci_dev *hdev = data->hdev;

        if (hdev->dump.coredump)
                hdev->dump.coredump(hdev);
}
#endif

static struct usb_driver btusb_driver = {
        .name                = "btusb",
        .probe                = btusb_probe,
        .disconnect        = btusb_disconnect,
#ifdef CONFIG_PM
        .suspend        = btusb_suspend,
        .resume                = btusb_resume,
#endif
        .id_table        = btusb_table,
        .supports_autosuspend = 1,
        .disable_hub_initiated_lpm = 1,

#ifdef CONFIG_DEV_COREDUMP
        .driver = {
                .coredump = btusb_coredump,
        },
#endif
};

module_usb_driver(btusb_driver);

module_param(disable_scofix, bool, 0644);
MODULE_PARM_DESC(disable_scofix, "Disable fixup of wrong SCO buffer size");

module_param(force_scofix, bool, 0644);
MODULE_PARM_DESC(force_scofix, "Force fixup of wrong SCO buffers size");

module_param(enable_autosuspend, bool, 0644);
MODULE_PARM_DESC(enable_autosuspend, "Enable USB autosuspend by default");

module_param(reset, bool, 0644);
MODULE_PARM_DESC(reset, "Send HCI reset command on initialization");

MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Generic Bluetooth USB driver ver " VERSION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");



















































































































































  234 





  233 























   17 







   17 



   17 











































































































































   18 
































































   17 




   17 




   17 


   17 









   17 

   17 


















   25 



   20 


   21 




   20 


















  233 


  234 
    3 
  233 









































































  190 







  191 



  230 

  234 


  234 
   83 
  229 
  230 






   20 

    3 
   20 

   17 



























   25 









   25 



   25 

   25 






   25 












   25 





   19 





   20 


















   17 





   17 





















   17 


   17 















   17 


























































   26 

   26 


   25 



   25 

   25 

   25 



   25 
   17 


   17 

   16 
   18 


















   26 




   18 









   18 




























   30 







   30 


   29 




   30 





































   12 



   12 


   12 
    9 








    9 




    9 




    9 






    9 














































   14 


   14 












   14 






   14 

   14 
















































   13 













  242 

  241 













  193 
   22 





























































   18 



   18 







   18 
   18 








   18 




















   18 

   18 















   18 











  231 

  233 

  232 
















  234 

  234 





  192 
    2 

  191 














  234 

    4 
  234 

  193 




















  230 
























    6 



    6 

    6 


    4 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/dd.c - The core device/driver interactions.
 *
 * This file contains the (sometimes tricky) code that controls the
 * interactions between devices and drivers, which primarily includes
 * driver binding and unbinding.
 *
 * All of this code used to exist in drivers/base/bus.c, but was
 * relocated to here in the name of compartmentalization (since it wasn't
 * strictly code just for the 'struct bus_type'.
 *
 * Copyright (c) 2002-5 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 * Copyright (c) 2007-2009 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2007-2009 Novell Inc.
 */

#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/dma-map-ops.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/async.h>
#include <linux/pm_runtime.h>
#include <linux/pinctrl/devinfo.h>
#include <linux/slab.h>

#include "base.h"
#include "power/power.h"

/*
 * Deferred Probe infrastructure.
 *
 * Sometimes driver probe order matters, but the kernel doesn't always have
 * dependency information which means some drivers will get probed before a
 * resource it depends on is available.  For example, an SDHCI driver may
 * first need a GPIO line from an i2c GPIO controller before it can be
 * initialized.  If a required resource is not available yet, a driver can
 * request probing to be deferred by returning -EPROBE_DEFER from its probe hook
 *
 * Deferred probe maintains two lists of devices, a pending list and an active
 * list.  A driver returning -EPROBE_DEFER causes the device to be added to the
 * pending list.  A successful driver probe will trigger moving all devices
 * from the pending to the active list so that the workqueue will eventually
 * retry them.
 *
 * The deferred_probe_mutex must be held any time the deferred_probe_*_list
 * of the (struct device*)->p->deferred_probe pointers are manipulated
 */
static DEFINE_MUTEX(deferred_probe_mutex);
static LIST_HEAD(deferred_probe_pending_list);
static LIST_HEAD(deferred_probe_active_list);
static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
static bool initcalls_done;

/* Save the async probe drivers' name from kernel cmdline */
#define ASYNC_DRV_NAMES_MAX_LEN        256
static char async_probe_drv_names[ASYNC_DRV_NAMES_MAX_LEN];
static bool async_probe_default;

/*
 * In some cases, like suspend to RAM or hibernation, It might be reasonable
 * to prohibit probing of devices as it could be unsafe.
 * Once defer_all_probes is true all drivers probes will be forcibly deferred.
 */
static bool defer_all_probes;

static void __device_set_deferred_probe_reason(const struct device *dev, char *reason)
{
        kfree(dev->p->deferred_probe_reason);
        dev->p->deferred_probe_reason = reason;
}

/*
 * deferred_probe_work_func() - Retry probing devices in the active list.
 */
static void deferred_probe_work_func(struct work_struct *work)
{
        struct device *dev;
        struct device_private *private;
        /*
         * This block processes every device in the deferred 'active' list.
         * Each device is removed from the active list and passed to
         * bus_probe_device() to re-attempt the probe.  The loop continues
         * until every device in the active list is removed and retried.
         *
         * Note: Once the device is removed from the list and the mutex is
         * released, it is possible for the device get freed by another thread
         * and cause a illegal pointer dereference.  This code uses
         * get/put_device() to ensure the device structure cannot disappear
         * from under our feet.
         */
        mutex_lock(&deferred_probe_mutex);
        while (!list_empty(&deferred_probe_active_list)) {
                private = list_first_entry(&deferred_probe_active_list,
                                        typeof(*dev->p), deferred_probe);
                dev = private->device;
                list_del_init(&private->deferred_probe);

                get_device(dev);

                __device_set_deferred_probe_reason(dev, NULL);

                /*
                 * Drop the mutex while probing each device; the probe path may
                 * manipulate the deferred list
                 */
                mutex_unlock(&deferred_probe_mutex);

                /*
                 * Force the device to the end of the dpm_list since
                 * the PM code assumes that the order we add things to
                 * the list is a good order for suspend but deferred
                 * probe makes that very unsafe.
                 */
                device_pm_move_to_tail(dev);

                dev_dbg(dev, "Retrying from deferred list\n");
                bus_probe_device(dev);
                mutex_lock(&deferred_probe_mutex);

                put_device(dev);
        }
        mutex_unlock(&deferred_probe_mutex);
}
static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func);

void driver_deferred_probe_add(struct device *dev)
{
        if (!dev->can_match)
                return;

        mutex_lock(&deferred_probe_mutex);
        if (list_empty(&dev->p->deferred_probe)) {
                dev_dbg(dev, "Added to deferred list\n");
                list_add_tail(&dev->p->deferred_probe, &deferred_probe_pending_list);
        }
        mutex_unlock(&deferred_probe_mutex);
}

void driver_deferred_probe_del(struct device *dev)
{
        mutex_lock(&deferred_probe_mutex);
        if (!list_empty(&dev->p->deferred_probe)) {
                dev_dbg(dev, "Removed from deferred list\n");
                list_del_init(&dev->p->deferred_probe);
                __device_set_deferred_probe_reason(dev, NULL);
        }
        mutex_unlock(&deferred_probe_mutex);
}

static bool driver_deferred_probe_enable;
/**
 * driver_deferred_probe_trigger() - Kick off re-probing deferred devices
 *
 * This functions moves all devices from the pending list to the active
 * list and schedules the deferred probe workqueue to process them.  It
 * should be called anytime a driver is successfully bound to a device.
 *
 * Note, there is a race condition in multi-threaded probe. In the case where
 * more than one device is probing at the same time, it is possible for one
 * probe to complete successfully while another is about to defer. If the second
 * depends on the first, then it will get put on the pending list after the
 * trigger event has already occurred and will be stuck there.
 *
 * The atomic 'deferred_trigger_count' is used to determine if a successful
 * trigger has occurred in the midst of probing a driver. If the trigger count
 * changes in the midst of a probe, then deferred processing should be triggered
 * again.
 */
void driver_deferred_probe_trigger(void)
{
        if (!driver_deferred_probe_enable)
                return;

        /*
         * A successful probe means that all the devices in the pending list
         * should be triggered to be reprobed.  Move all the deferred devices
         * into the active list so they can be retried by the workqueue
         */
        mutex_lock(&deferred_probe_mutex);
        atomic_inc(&deferred_trigger_count);
        list_splice_tail_init(&deferred_probe_pending_list,
                              &deferred_probe_active_list);
        mutex_unlock(&deferred_probe_mutex);

        /*
         * Kick the re-probe thread.  It may already be scheduled, but it is
         * safe to kick it again.
         */
        queue_work(system_unbound_wq, &deferred_probe_work);
}

/**
 * device_block_probing() - Block/defer device's probes
 *
 *        It will disable probing of devices and defer their probes instead.
 */
void device_block_probing(void)
{
        defer_all_probes = true;
        /* sync with probes to avoid races. */
        wait_for_device_probe();
}

/**
 * device_unblock_probing() - Unblock/enable device's probes
 *
 *        It will restore normal behavior and trigger re-probing of deferred
 * devices.
 */
void device_unblock_probing(void)
{
        defer_all_probes = false;
        driver_deferred_probe_trigger();
}

/**
 * device_set_deferred_probe_reason() - Set defer probe reason message for device
 * @dev: the pointer to the struct device
 * @vaf: the pointer to va_format structure with message
 */
void device_set_deferred_probe_reason(const struct device *dev, struct va_format *vaf)
{
        const char *drv = dev_driver_string(dev);
        char *reason;

        mutex_lock(&deferred_probe_mutex);

        reason = kasprintf(GFP_KERNEL, "%s: %pV", drv, vaf);
        __device_set_deferred_probe_reason(dev, reason);

        mutex_unlock(&deferred_probe_mutex);
}

/*
 * deferred_devs_show() - Show the devices in the deferred probe pending list.
 */
static int deferred_devs_show(struct seq_file *s, void *data)
{
        struct device_private *curr;

        mutex_lock(&deferred_probe_mutex);

        list_for_each_entry(curr, &deferred_probe_pending_list, deferred_probe)
                seq_printf(s, "%s\t%s", dev_name(curr->device),
                           curr->device->p->deferred_probe_reason ?: "\n");

        mutex_unlock(&deferred_probe_mutex);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(deferred_devs);

#ifdef CONFIG_MODULES
static int driver_deferred_probe_timeout = 10;
#else
static int driver_deferred_probe_timeout;
#endif

static int __init deferred_probe_timeout_setup(char *str)
{
        int timeout;

        if (!kstrtoint(str, 10, &timeout))
                driver_deferred_probe_timeout = timeout;
        return 1;
}
__setup("deferred_probe_timeout=", deferred_probe_timeout_setup);

/**
 * driver_deferred_probe_check_state() - Check deferred probe state
 * @dev: device to check
 *
 * Return:
 * * -ENODEV if initcalls have completed and modules are disabled.
 * * -ETIMEDOUT if the deferred probe timeout was set and has expired
 *   and modules are enabled.
 * * -EPROBE_DEFER in other cases.
 *
 * Drivers or subsystems can opt-in to calling this function instead of directly
 * returning -EPROBE_DEFER.
 */
int driver_deferred_probe_check_state(struct device *dev)
{
        if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done) {
                dev_warn(dev, "ignoring dependency for device, assuming no driver\n");
                return -ENODEV;
        }

        if (!driver_deferred_probe_timeout && initcalls_done) {
                dev_warn(dev, "deferred probe timeout, ignoring dependency\n");
                return -ETIMEDOUT;
        }

        return -EPROBE_DEFER;
}
EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state);

static void deferred_probe_timeout_work_func(struct work_struct *work)
{
        struct device_private *p;

        fw_devlink_drivers_done();

        driver_deferred_probe_timeout = 0;
        driver_deferred_probe_trigger();
        flush_work(&deferred_probe_work);

        mutex_lock(&deferred_probe_mutex);
        list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe)
                dev_warn(p->device, "deferred probe pending: %s", p->deferred_probe_reason ?: "(reason unknown)\n");
        mutex_unlock(&deferred_probe_mutex);

        fw_devlink_probing_done();
}
static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);

void deferred_probe_extend_timeout(void)
{
        /*
         * If the work hasn't been queued yet or if the work expired, don't
         * start a new one.
         */
        if (cancel_delayed_work(&deferred_probe_timeout_work)) {
                schedule_delayed_work(&deferred_probe_timeout_work,
                                driver_deferred_probe_timeout * HZ);
                pr_debug("Extended deferred probe timeout by %d secs\n",
                                        driver_deferred_probe_timeout);
        }
}

/**
 * deferred_probe_initcall() - Enable probing of deferred devices
 *
 * We don't want to get in the way when the bulk of drivers are getting probed.
 * Instead, this initcall makes sure that deferred probing is delayed until
 * late_initcall time.
 */
static int deferred_probe_initcall(void)
{
        debugfs_create_file("devices_deferred", 0444, NULL, NULL,
                            &deferred_devs_fops);

        driver_deferred_probe_enable = true;
        driver_deferred_probe_trigger();
        /* Sort as many dependencies as possible before exiting initcalls */
        flush_work(&deferred_probe_work);
        initcalls_done = true;

        if (!IS_ENABLED(CONFIG_MODULES))
                fw_devlink_drivers_done();

        /*
         * Trigger deferred probe again, this time we won't defer anything
         * that is optional
         */
        driver_deferred_probe_trigger();
        flush_work(&deferred_probe_work);

        if (driver_deferred_probe_timeout > 0) {
                schedule_delayed_work(&deferred_probe_timeout_work,
                        driver_deferred_probe_timeout * HZ);
        }

        if (!IS_ENABLED(CONFIG_MODULES))
                fw_devlink_probing_done();

        return 0;
}
late_initcall(deferred_probe_initcall);

static void __exit deferred_probe_exit(void)
{
        debugfs_lookup_and_remove("devices_deferred", NULL);
}
__exitcall(deferred_probe_exit);

/**
 * device_is_bound() - Check if device is bound to a driver
 * @dev: device to check
 *
 * Returns true if passed device has already finished probing successfully
 * against a driver.
 *
 * This function must be called with the device lock held.
 */
bool device_is_bound(struct device *dev)
{
        return dev->p && klist_node_attached(&dev->p->knode_driver);
}

static void driver_bound(struct device *dev)
{
        if (device_is_bound(dev)) {
                dev_warn(dev, "%s: device already bound\n", __func__);
                return;
        }

        dev_dbg(dev, "driver: '%s': %s: bound to device\n", dev->driver->name,
                __func__);

        klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
        device_links_driver_bound(dev);

        device_pm_check_callbacks(dev);

        /*
         * Make sure the device is no longer in one of the deferred lists and
         * kick off retrying all pending devices
         */
        driver_deferred_probe_del(dev);
        driver_deferred_probe_trigger();

        bus_notify(dev, BUS_NOTIFY_BOUND_DRIVER);
        kobject_uevent(&dev->kobj, KOBJ_BIND);
}

static ssize_t coredump_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        device_lock(dev);
        dev->driver->coredump(dev);
        device_unlock(dev);

        return count;
}
static DEVICE_ATTR_WO(coredump);

static int driver_sysfs_add(struct device *dev)
{
        int ret;

        bus_notify(dev, BUS_NOTIFY_BIND_DRIVER);

        ret = sysfs_create_link(&dev->driver->p->kobj, &dev->kobj,
                                kobject_name(&dev->kobj));
        if (ret)
                goto fail;

        ret = sysfs_create_link(&dev->kobj, &dev->driver->p->kobj,
                                "driver");
        if (ret)
                goto rm_dev;

        if (!IS_ENABLED(CONFIG_DEV_COREDUMP) || !dev->driver->coredump)
                return 0;

        ret = device_create_file(dev, &dev_attr_coredump);
        if (!ret)
                return 0;

        sysfs_remove_link(&dev->kobj, "driver");

rm_dev:
        sysfs_remove_link(&dev->driver->p->kobj,
                          kobject_name(&dev->kobj));

fail:
        return ret;
}

static void driver_sysfs_remove(struct device *dev)
{
        struct device_driver *drv = dev->driver;

        if (drv) {
                if (drv->coredump)
                        device_remove_file(dev, &dev_attr_coredump);
                sysfs_remove_link(&drv->p->kobj, kobject_name(&dev->kobj));
                sysfs_remove_link(&dev->kobj, "driver");
        }
}

/**
 * device_bind_driver - bind a driver to one device.
 * @dev: device.
 *
 * Allow manual attachment of a driver to a device.
 * Caller must have already set @dev->driver.
 *
 * Note that this does not modify the bus reference count.
 * Please verify that is accounted for before calling this.
 * (It is ok to call with no other effort from a driver's probe() method.)
 *
 * This function must be called with the device lock held.
 *
 * Callers should prefer to use device_driver_attach() instead.
 */
int device_bind_driver(struct device *dev)
{
        int ret;

        ret = driver_sysfs_add(dev);
        if (!ret) {
                device_links_force_bind(dev);
                driver_bound(dev);
        }
        else
                bus_notify(dev, BUS_NOTIFY_DRIVER_NOT_BOUND);
        return ret;
}
EXPORT_SYMBOL_GPL(device_bind_driver);

static atomic_t probe_count = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);

static ssize_t state_synced_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
{
        int ret = 0;

        if (strcmp("1", buf))
                return -EINVAL;

        device_lock(dev);
        if (!dev->state_synced) {
                dev->state_synced = true;
                dev_sync_state(dev);
        } else {
                ret = -EINVAL;
        }
        device_unlock(dev);

        return ret ? ret : count;
}

static ssize_t state_synced_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        bool val;

        device_lock(dev);
        val = dev->state_synced;
        device_unlock(dev);

        return sysfs_emit(buf, "%u\n", val);
}
static DEVICE_ATTR_RW(state_synced);

static void device_unbind_cleanup(struct device *dev)
{
        devres_release_all(dev);
        arch_teardown_dma_ops(dev);
        kfree(dev->dma_range_map);
        dev->dma_range_map = NULL;
        dev->driver = NULL;
        dev_set_drvdata(dev, NULL);
        if (dev->pm_domain && dev->pm_domain->dismiss)
                dev->pm_domain->dismiss(dev);
        pm_runtime_reinit(dev);
        dev_pm_set_driver_flags(dev, 0);
}

static void device_remove(struct device *dev)
{
        device_remove_file(dev, &dev_attr_state_synced);
        device_remove_groups(dev, dev->driver->dev_groups);

        if (dev->bus && dev->bus->remove)
                dev->bus->remove(dev);
        else if (dev->driver->remove)
                dev->driver->remove(dev);
}

static int call_driver_probe(struct device *dev, struct device_driver *drv)
{
        int ret = 0;

        if (dev->bus->probe)
                ret = dev->bus->probe(dev);
        else if (drv->probe)
                ret = drv->probe(dev);

        switch (ret) {
        case 0:
                break;
        case -EPROBE_DEFER:
                /* Driver requested deferred probing */
                dev_dbg(dev, "Driver %s requests probe deferral\n", drv->name);
                break;
        case -ENODEV:
        case -ENXIO:
                dev_dbg(dev, "probe with driver %s rejects match %d\n",
                        drv->name, ret);
                break;
        default:
                /* driver matched but the probe failed */
                dev_err(dev, "probe with driver %s failed with error %d\n",
                        drv->name, ret);
                break;
        }

        return ret;
}

static int really_probe(struct device *dev, struct device_driver *drv)
{
        bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
                           !drv->suppress_bind_attrs;
        int ret, link_ret;

        if (defer_all_probes) {
                /*
                 * Value of defer_all_probes can be set only by
                 * device_block_probing() which, in turn, will call
                 * wait_for_device_probe() right after that to avoid any races.
                 */
                dev_dbg(dev, "Driver %s force probe deferral\n", drv->name);
                return -EPROBE_DEFER;
        }

        link_ret = device_links_check_suppliers(dev);
        if (link_ret == -EPROBE_DEFER)
                return link_ret;

        dev_dbg(dev, "bus: '%s': %s: probing driver %s with device\n",
                drv->bus->name, __func__, drv->name);
        if (!list_empty(&dev->devres_head)) {
                dev_crit(dev, "Resources present before probing\n");
                ret = -EBUSY;
                goto done;
        }

re_probe:
        dev->driver = drv;

        /* If using pinctrl, bind pins now before probing */
        ret = pinctrl_bind_pins(dev);
        if (ret)
                goto pinctrl_bind_failed;

        if (dev->bus->dma_configure) {
                ret = dev->bus->dma_configure(dev);
                if (ret)
                        goto pinctrl_bind_failed;
        }

        ret = driver_sysfs_add(dev);
        if (ret) {
                dev_err(dev, "%s: driver_sysfs_add failed\n", __func__);
                goto sysfs_failed;
        }

        if (dev->pm_domain && dev->pm_domain->activate) {
                ret = dev->pm_domain->activate(dev);
                if (ret)
                        goto probe_failed;
        }

        ret = call_driver_probe(dev, drv);
        if (ret) {
                /*
                 * If fw_devlink_best_effort is active (denoted by -EAGAIN), the
                 * device might actually probe properly once some of its missing
                 * suppliers have probed. So, treat this as if the driver
                 * returned -EPROBE_DEFER.
                 */
                if (link_ret == -EAGAIN)
                        ret = -EPROBE_DEFER;

                /*
                 * Return probe errors as positive values so that the callers
                 * can distinguish them from other errors.
                 */
                ret = -ret;
                goto probe_failed;
        }

        ret = device_add_groups(dev, drv->dev_groups);
        if (ret) {
                dev_err(dev, "device_add_groups() failed\n");
                goto dev_groups_failed;
        }

        if (dev_has_sync_state(dev)) {
                ret = device_create_file(dev, &dev_attr_state_synced);
                if (ret) {
                        dev_err(dev, "state_synced sysfs add failed\n");
                        goto dev_sysfs_state_synced_failed;
                }
        }

        if (test_remove) {
                test_remove = false;

                device_remove(dev);
                driver_sysfs_remove(dev);
                if (dev->bus && dev->bus->dma_cleanup)
                        dev->bus->dma_cleanup(dev);
                device_unbind_cleanup(dev);

                goto re_probe;
        }

        pinctrl_init_done(dev);

        if (dev->pm_domain && dev->pm_domain->sync)
                dev->pm_domain->sync(dev);

        driver_bound(dev);
        dev_dbg(dev, "bus: '%s': %s: bound device to driver %s\n",
                drv->bus->name, __func__, drv->name);
        goto done;

dev_sysfs_state_synced_failed:
dev_groups_failed:
        device_remove(dev);
probe_failed:
        driver_sysfs_remove(dev);
sysfs_failed:
        bus_notify(dev, BUS_NOTIFY_DRIVER_NOT_BOUND);
        if (dev->bus && dev->bus->dma_cleanup)
                dev->bus->dma_cleanup(dev);
pinctrl_bind_failed:
        device_links_no_driver(dev);
        device_unbind_cleanup(dev);
done:
        return ret;
}

/*
 * For initcall_debug, show the driver probe time.
 */
static int really_probe_debug(struct device *dev, struct device_driver *drv)
{
        ktime_t calltime, rettime;
        int ret;

        calltime = ktime_get();
        ret = really_probe(dev, drv);
        rettime = ktime_get();
        /*
         * Don't change this to pr_debug() because that requires
         * CONFIG_DYNAMIC_DEBUG and we want a simple 'initcall_debug' on the
         * kernel commandline to print this all the time at the debug level.
         */
        printk(KERN_DEBUG "probe of %s returned %d after %lld usecs\n",
                 dev_name(dev), ret, ktime_us_delta(rettime, calltime));
        return ret;
}

/**
 * driver_probe_done
 * Determine if the probe sequence is finished or not.
 *
 * Should somehow figure out how to use a semaphore, not an atomic variable...
 */
bool __init driver_probe_done(void)
{
        int local_probe_count = atomic_read(&probe_count);

        pr_debug("%s: probe_count = %d\n", __func__, local_probe_count);
        return !local_probe_count;
}

/**
 * wait_for_device_probe
 * Wait for device probing to be completed.
 */
void wait_for_device_probe(void)
{
        /* wait for the deferred probe workqueue to finish */
        flush_work(&deferred_probe_work);

        /* wait for the known devices to complete their probing */
        wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);
        async_synchronize_full();
}
EXPORT_SYMBOL_GPL(wait_for_device_probe);

static int __driver_probe_device(struct device_driver *drv, struct device *dev)
{
        int ret = 0;

        if (dev->p->dead || !device_is_registered(dev))
                return -ENODEV;
        if (dev->driver)
                return -EBUSY;

        dev->can_match = true;
        dev_dbg(dev, "bus: '%s': %s: matched device with driver %s\n",
                drv->bus->name, __func__, drv->name);

        pm_runtime_get_suppliers(dev);
        if (dev->parent)
                pm_runtime_get_sync(dev->parent);

        pm_runtime_barrier(dev);
        if (initcall_debug)
                ret = really_probe_debug(dev, drv);
        else
                ret = really_probe(dev, drv);
        pm_request_idle(dev);

        if (dev->parent)
                pm_runtime_put(dev->parent);

        pm_runtime_put_suppliers(dev);
        return ret;
}

/**
 * driver_probe_device - attempt to bind device & driver together
 * @drv: driver to bind a device to
 * @dev: device to try to bind to the driver
 *
 * This function returns -ENODEV if the device is not registered, -EBUSY if it
 * already has a driver, 0 if the device is bound successfully and a positive
 * (inverted) error code for failures from the ->probe method.
 *
 * This function must be called with @dev lock held.  When called for a
 * USB interface, @dev->parent lock must be held as well.
 *
 * If the device has a parent, runtime-resume the parent before driver probing.
 */
static int driver_probe_device(struct device_driver *drv, struct device *dev)
{
        int trigger_count = atomic_read(&deferred_trigger_count);
        int ret;

        atomic_inc(&probe_count);
        ret = __driver_probe_device(drv, dev);
        if (ret == -EPROBE_DEFER || ret == EPROBE_DEFER) {
                driver_deferred_probe_add(dev);

                /*
                 * Did a trigger occur while probing? Need to re-trigger if yes
                 */
                if (trigger_count != atomic_read(&deferred_trigger_count) &&
                    !defer_all_probes)
                        driver_deferred_probe_trigger();
        }
        atomic_dec(&probe_count);
        wake_up_all(&probe_waitqueue);
        return ret;
}

static inline bool cmdline_requested_async_probing(const char *drv_name)
{
        bool async_drv;

        async_drv = parse_option_str(async_probe_drv_names, drv_name);

        return (async_probe_default != async_drv);
}

/* The option format is "driver_async_probe=drv_name1,drv_name2,..." */
static int __init save_async_options(char *buf)
{
        if (strlen(buf) >= ASYNC_DRV_NAMES_MAX_LEN)
                pr_warn("Too long list of driver names for 'driver_async_probe'!\n");

        strscpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN);
        async_probe_default = parse_option_str(async_probe_drv_names, "*");

        return 1;
}
__setup("driver_async_probe=", save_async_options);

static bool driver_allows_async_probing(struct device_driver *drv)
{
        switch (drv->probe_type) {
        case PROBE_PREFER_ASYNCHRONOUS:
                return true;

        case PROBE_FORCE_SYNCHRONOUS:
                return false;

        default:
                if (cmdline_requested_async_probing(drv->name))
                        return true;

                if (module_requested_async_probing(drv->owner))
                        return true;

                return false;
        }
}

struct device_attach_data {
        struct device *dev;

        /*
         * Indicates whether we are considering asynchronous probing or
         * not. Only initial binding after device or driver registration
         * (including deferral processing) may be done asynchronously, the
         * rest is always synchronous, as we expect it is being done by
         * request from userspace.
         */
        bool check_async;

        /*
         * Indicates if we are binding synchronous or asynchronous drivers.
         * When asynchronous probing is enabled we'll execute 2 passes
         * over drivers: first pass doing synchronous probing and second
         * doing asynchronous probing (if synchronous did not succeed -
         * most likely because there was no driver requiring synchronous
         * probing - and we found asynchronous driver during first pass).
         * The 2 passes are done because we can't shoot asynchronous
         * probe for given device and driver from bus_for_each_drv() since
         * driver pointer is not guaranteed to stay valid once
         * bus_for_each_drv() iterates to the next driver on the bus.
         */
        bool want_async;

        /*
         * We'll set have_async to 'true' if, while scanning for matching
         * driver, we'll encounter one that requests asynchronous probing.
         */
        bool have_async;
};

static int __device_attach_driver(struct device_driver *drv, void *_data)
{
        struct device_attach_data *data = _data;
        struct device *dev = data->dev;
        bool async_allowed;
        int ret;

        ret = driver_match_device(drv, dev);
        if (ret == 0) {
                /* no match */
                return 0;
        } else if (ret == -EPROBE_DEFER) {
                dev_dbg(dev, "Device match requests probe deferral\n");
                dev->can_match = true;
                driver_deferred_probe_add(dev);
                /*
                 * Device can't match with a driver right now, so don't attempt
                 * to match or bind with other drivers on the bus.
                 */
                return ret;
        } else if (ret < 0) {
                dev_dbg(dev, "Bus failed to match device: %d\n", ret);
                return ret;
        } /* ret > 0 means positive match */

        async_allowed = driver_allows_async_probing(drv);

        if (async_allowed)
                data->have_async = true;

        if (data->check_async && async_allowed != data->want_async)
                return 0;

        /*
         * Ignore errors returned by ->probe so that the next driver can try
         * its luck.
         */
        ret = driver_probe_device(drv, dev);
        if (ret < 0)
                return ret;
        return ret == 0;
}

static void __device_attach_async_helper(void *_dev, async_cookie_t cookie)
{
        struct device *dev = _dev;
        struct device_attach_data data = {
                .dev                = dev,
                .check_async        = true,
                .want_async        = true,
        };

        device_lock(dev);

        /*
         * Check if device has already been removed or claimed. This may
         * happen with driver loading, device discovery/registration,
         * and deferred probe processing happens all at once with
         * multiple threads.
         */
        if (dev->p->dead || dev->driver)
                goto out_unlock;

        if (dev->parent)
                pm_runtime_get_sync(dev->parent);

        bus_for_each_drv(dev->bus, NULL, &data, __device_attach_driver);
        dev_dbg(dev, "async probe completed\n");

        pm_request_idle(dev);

        if (dev->parent)
                pm_runtime_put(dev->parent);
out_unlock:
        device_unlock(dev);

        put_device(dev);
}

static int __device_attach(struct device *dev, bool allow_async)
{
        int ret = 0;
        bool async = false;

        device_lock(dev);
        if (dev->p->dead) {
                goto out_unlock;
        } else if (dev->driver) {
                if (device_is_bound(dev)) {
                        ret = 1;
                        goto out_unlock;
                }
                ret = device_bind_driver(dev);
                if (ret == 0)
                        ret = 1;
                else {
                        dev->driver = NULL;
                        ret = 0;
                }
        } else {
                struct device_attach_data data = {
                        .dev = dev,
                        .check_async = allow_async,
                        .want_async = false,
                };

                if (dev->parent)
                        pm_runtime_get_sync(dev->parent);

                ret = bus_for_each_drv(dev->bus, NULL, &data,
                                        __device_attach_driver);
                if (!ret && allow_async && data.have_async) {
                        /*
                         * If we could not find appropriate driver
                         * synchronously and we are allowed to do
                         * async probes and there are drivers that
                         * want to probe asynchronously, we'll
                         * try them.
                         */
                        dev_dbg(dev, "scheduling asynchronous probe\n");
                        get_device(dev);
                        async = true;
                } else {
                        pm_request_idle(dev);
                }

                if (dev->parent)
                        pm_runtime_put(dev->parent);
        }
out_unlock:
        device_unlock(dev);
        if (async)
                async_schedule_dev(__device_attach_async_helper, dev);
        return ret;
}

/**
 * device_attach - try to attach device to a driver.
 * @dev: device.
 *
 * Walk the list of drivers that the bus has and call
 * driver_probe_device() for each pair. If a compatible
 * pair is found, break out and return.
 *
 * Returns 1 if the device was bound to a driver;
 * 0 if no matching driver was found;
 * -ENODEV if the device is not registered.
 *
 * When called for a USB interface, @dev->parent lock must be held.
 */
int device_attach(struct device *dev)
{
        return __device_attach(dev, false);
}
EXPORT_SYMBOL_GPL(device_attach);

void device_initial_probe(struct device *dev)
{
        __device_attach(dev, true);
}

/*
 * __device_driver_lock - acquire locks needed to manipulate dev->drv
 * @dev: Device we will update driver info for
 * @parent: Parent device. Needed if the bus requires parent lock
 *
 * This function will take the required locks for manipulating dev->drv.
 * Normally this will just be the @dev lock, but when called for a USB
 * interface, @parent lock will be held as well.
 */
static void __device_driver_lock(struct device *dev, struct device *parent)
{
        if (parent && dev->bus->need_parent_lock)
                device_lock(parent);
        device_lock(dev);
}

/*
 * __device_driver_unlock - release locks needed to manipulate dev->drv
 * @dev: Device we will update driver info for
 * @parent: Parent device. Needed if the bus requires parent lock
 *
 * This function will release the required locks for manipulating dev->drv.
 * Normally this will just be the @dev lock, but when called for a
 * USB interface, @parent lock will be released as well.
 */
static void __device_driver_unlock(struct device *dev, struct device *parent)
{
        device_unlock(dev);
        if (parent && dev->bus->need_parent_lock)
                device_unlock(parent);
}

/**
 * device_driver_attach - attach a specific driver to a specific device
 * @drv: Driver to attach
 * @dev: Device to attach it to
 *
 * Manually attach driver to a device. Will acquire both @dev lock and
 * @dev->parent lock if needed. Returns 0 on success, -ERR on failure.
 */
int device_driver_attach(struct device_driver *drv, struct device *dev)
{
        int ret;

        __device_driver_lock(dev, dev->parent);
        ret = __driver_probe_device(drv, dev);
        __device_driver_unlock(dev, dev->parent);

        /* also return probe errors as normal negative errnos */
        if (ret > 0)
                ret = -ret;
        if (ret == -EPROBE_DEFER)
                return -EAGAIN;
        return ret;
}
EXPORT_SYMBOL_GPL(device_driver_attach);

static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie)
{
        struct device *dev = _dev;
        struct device_driver *drv;
        int ret;

        __device_driver_lock(dev, dev->parent);
        drv = dev->p->async_driver;
        dev->p->async_driver = NULL;
        ret = driver_probe_device(drv, dev);
        __device_driver_unlock(dev, dev->parent);

        dev_dbg(dev, "driver %s async attach completed: %d\n", drv->name, ret);

        put_device(dev);
}

static int __driver_attach(struct device *dev, void *data)
{
        struct device_driver *drv = data;
        bool async = false;
        int ret;

        /*
         * Lock device and try to bind to it. We drop the error
         * here and always return 0, because we need to keep trying
         * to bind to devices and some drivers will return an error
         * simply if it didn't support the device.
         *
         * driver_probe_device() will spit a warning if there
         * is an error.
         */

        ret = driver_match_device(drv, dev);
        if (ret == 0) {
                /* no match */
                return 0;
        } else if (ret == -EPROBE_DEFER) {
                dev_dbg(dev, "Device match requests probe deferral\n");
                dev->can_match = true;
                driver_deferred_probe_add(dev);
                /*
                 * Driver could not match with device, but may match with
                 * another device on the bus.
                 */
                return 0;
        } else if (ret < 0) {
                dev_dbg(dev, "Bus failed to match device: %d\n", ret);
                /*
                 * Driver could not match with device, but may match with
                 * another device on the bus.
                 */
                return 0;
        } /* ret > 0 means positive match */

        if (driver_allows_async_probing(drv)) {
                /*
                 * Instead of probing the device synchronously we will
                 * probe it asynchronously to allow for more parallelism.
                 *
                 * We only take the device lock here in order to guarantee
                 * that the dev->driver and async_driver fields are protected
                 */
                dev_dbg(dev, "probing driver %s asynchronously\n", drv->name);
                device_lock(dev);
                if (!dev->driver && !dev->p->async_driver) {
                        get_device(dev);
                        dev->p->async_driver = drv;
                        async = true;
                }
                device_unlock(dev);
                if (async)
                        async_schedule_dev(__driver_attach_async_helper, dev);
                return 0;
        }

        __device_driver_lock(dev, dev->parent);
        driver_probe_device(drv, dev);
        __device_driver_unlock(dev, dev->parent);

        return 0;
}

/**
 * driver_attach - try to bind driver to devices.
 * @drv: driver.
 *
 * Walk the list of devices that the bus has on it and try to
 * match the driver with each one.  If driver_probe_device()
 * returns 0 and the @dev->driver is set, we've found a
 * compatible pair.
 */
int driver_attach(struct device_driver *drv)
{
        return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
}
EXPORT_SYMBOL_GPL(driver_attach);

/*
 * __device_release_driver() must be called with @dev lock held.
 * When called for a USB interface, @dev->parent lock must be held as well.
 */
static void __device_release_driver(struct device *dev, struct device *parent)
{
        struct device_driver *drv;

        drv = dev->driver;
        if (drv) {
                pm_runtime_get_sync(dev);

                while (device_links_busy(dev)) {
                        __device_driver_unlock(dev, parent);

                        device_links_unbind_consumers(dev);

                        __device_driver_lock(dev, parent);
                        /*
                         * A concurrent invocation of the same function might
                         * have released the driver successfully while this one
                         * was waiting, so check for that.
                         */
                        if (dev->driver != drv) {
                                pm_runtime_put(dev);
                                return;
                        }
                }

                driver_sysfs_remove(dev);

                bus_notify(dev, BUS_NOTIFY_UNBIND_DRIVER);

                pm_runtime_put_sync(dev);

                device_remove(dev);

                if (dev->bus && dev->bus->dma_cleanup)
                        dev->bus->dma_cleanup(dev);

                device_unbind_cleanup(dev);
                device_links_driver_cleanup(dev);

                klist_remove(&dev->p->knode_driver);
                device_pm_check_callbacks(dev);

                bus_notify(dev, BUS_NOTIFY_UNBOUND_DRIVER);
                kobject_uevent(&dev->kobj, KOBJ_UNBIND);
        }
}

void device_release_driver_internal(struct device *dev,
                                    struct device_driver *drv,
                                    struct device *parent)
{
        __device_driver_lock(dev, parent);

        if (!drv || drv == dev->driver)
                __device_release_driver(dev, parent);

        __device_driver_unlock(dev, parent);
}

/**
 * device_release_driver - manually detach device from driver.
 * @dev: device.
 *
 * Manually detach device from driver.
 * When called for a USB interface, @dev->parent lock must be held.
 *
 * If this function is to be called with @dev->parent lock held, ensure that
 * the device's consumers are unbound in advance or that their locks can be
 * acquired under the @dev->parent lock.
 */
void device_release_driver(struct device *dev)
{
        /*
         * If anyone calls device_release_driver() recursively from
         * within their ->remove callback for the same device, they
         * will deadlock right here.
         */
        device_release_driver_internal(dev, NULL, NULL);
}
EXPORT_SYMBOL_GPL(device_release_driver);

/**
 * device_driver_detach - detach driver from a specific device
 * @dev: device to detach driver from
 *
 * Detach driver from device. Will acquire both @dev lock and @dev->parent
 * lock if needed.
 */
void device_driver_detach(struct device *dev)
{
        device_release_driver_internal(dev, NULL, dev->parent);
}

/**
 * driver_detach - detach driver from all devices it controls.
 * @drv: driver.
 */
void driver_detach(struct device_driver *drv)
{
        struct device_private *dev_prv;
        struct device *dev;

        if (driver_allows_async_probing(drv))
                async_synchronize_full();

        for (;;) {
                spin_lock(&drv->p->klist_devices.k_lock);
                if (list_empty(&drv->p->klist_devices.k_list)) {
                        spin_unlock(&drv->p->klist_devices.k_lock);
                        break;
                }
                dev_prv = list_last_entry(&drv->p->klist_devices.k_list,
                                     struct device_private,
                                     knode_driver.n_node);
                dev = dev_prv->device;
                get_device(dev);
                spin_unlock(&drv->p->klist_devices.k_lock);
                device_release_driver_internal(dev, drv, dev->parent);
                put_device(dev);
        }
}























































































































































































































































































































































































































































































    7 






    7 

    7 
    7 
    7 


























































































































































































































































































































































































































































































































































































































































































































    7 
























































































































































































































































































































































































































































    7 







    7 




    7 




    3 




    7 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Main USB camera driver
 *
 * Copyright (C) 2008-2011 Jean-François Moine <http://moinejf.free.fr>
 *
 * Camera button input handling by Márton Németh
 * Copyright (C) 2009-2010 Márton Németh <nm127@freemail.hu>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#define GSPCA_VERSION        "2.14.0"

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/io.h>
#include <asm/page.h>
#include <linux/uaccess.h>
#include <linux/ktime.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-fh.h>
#include <media/v4l2-event.h>

#include "gspca.h"

#if IS_ENABLED(CONFIG_INPUT)
#include <linux/input.h>
#include <linux/usb/input.h>
#endif

/* global values */
#define DEF_NURBS 3                /* default number of URBs */
#if DEF_NURBS > MAX_NURBS
#error "DEF_NURBS too big"
#endif

MODULE_AUTHOR("Jean-François Moine <http://moinejf.free.fr>");
MODULE_DESCRIPTION("GSPCA USB Camera Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(GSPCA_VERSION);

int gspca_debug;
EXPORT_SYMBOL(gspca_debug);

static void PDEBUG_MODE(struct gspca_dev *gspca_dev, int debug, char *txt,
                        __u32 pixfmt, int w, int h)
{
        if ((pixfmt >> 24) >= '0' && (pixfmt >> 24) <= 'z') {
                gspca_dbg(gspca_dev, debug, "%s %c%c%c%c %dx%d\n",
                          txt,
                          pixfmt & 0xff,
                          (pixfmt >> 8) & 0xff,
                          (pixfmt >> 16) & 0xff,
                          pixfmt >> 24,
                          w, h);
        } else {
                gspca_dbg(gspca_dev, debug, "%s 0x%08x %dx%d\n",
                          txt,
                          pixfmt,
                          w, h);
        }
}

/* specific memory types - !! should be different from V4L2_MEMORY_xxx */
#define GSPCA_MEMORY_NO 0        /* V4L2_MEMORY_xxx starts from 1 */
#define GSPCA_MEMORY_READ 7

/*
 * Input and interrupt endpoint handling functions
 */
#if IS_ENABLED(CONFIG_INPUT)
static void int_irq(struct urb *urb)
{
        struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context;
        int ret;

        ret = urb->status;
        switch (ret) {
        case 0:
                if (gspca_dev->sd_desc->int_pkt_scan(gspca_dev,
                    urb->transfer_buffer, urb->actual_length) < 0) {
                        gspca_err(gspca_dev, "Unknown packet received\n");
                }
                break;

        case -ENOENT:
        case -ECONNRESET:
        case -ENODEV:
        case -ESHUTDOWN:
                /* Stop is requested either by software or hardware is gone,
                 * keep the ret value non-zero and don't resubmit later.
                 */
                break;

        default:
                gspca_err(gspca_dev, "URB error %i, resubmitting\n",
                          urb->status);
                urb->status = 0;
                ret = 0;
        }

        if (ret == 0) {
                ret = usb_submit_urb(urb, GFP_ATOMIC);
                if (ret < 0)
                        pr_err("Resubmit URB failed with error %i\n", ret);
        }
}

static int gspca_input_connect(struct gspca_dev *dev)
{
        struct input_dev *input_dev;
        int err = 0;

        dev->input_dev = NULL;
        if (dev->sd_desc->int_pkt_scan || dev->sd_desc->other_input)  {
                input_dev = input_allocate_device();
                if (!input_dev)
                        return -ENOMEM;

                usb_make_path(dev->dev, dev->phys, sizeof(dev->phys));
                strlcat(dev->phys, "/input0", sizeof(dev->phys));

                input_dev->name = dev->sd_desc->name;
                input_dev->phys = dev->phys;

                usb_to_input_id(dev->dev, &input_dev->id);

                input_dev->evbit[0] = BIT_MASK(EV_KEY);
                input_dev->keybit[BIT_WORD(KEY_CAMERA)] = BIT_MASK(KEY_CAMERA);
                input_dev->dev.parent = &dev->dev->dev;

                err = input_register_device(input_dev);
                if (err) {
                        pr_err("Input device registration failed with error %i\n",
                               err);
                        input_dev->dev.parent = NULL;
                        input_free_device(input_dev);
                } else {
                        dev->input_dev = input_dev;
                }
        }

        return err;
}

static int alloc_and_submit_int_urb(struct gspca_dev *gspca_dev,
                          struct usb_endpoint_descriptor *ep)
{
        unsigned int buffer_len;
        int interval;
        struct urb *urb;
        struct usb_device *dev;
        void *buffer = NULL;
        int ret = -EINVAL;

        buffer_len = le16_to_cpu(ep->wMaxPacketSize);
        interval = ep->bInterval;
        gspca_dbg(gspca_dev, D_CONF, "found int in endpoint: 0x%x, buffer_len=%u, interval=%u\n",
                  ep->bEndpointAddress, buffer_len, interval);

        dev = gspca_dev->dev;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb) {
                ret = -ENOMEM;
                goto error;
        }

        buffer = usb_alloc_coherent(dev, buffer_len,
                                GFP_KERNEL, &urb->transfer_dma);
        if (!buffer) {
                ret = -ENOMEM;
                goto error_buffer;
        }
        usb_fill_int_urb(urb, dev,
                usb_rcvintpipe(dev, ep->bEndpointAddress),
                buffer, buffer_len,
                int_irq, (void *)gspca_dev, interval);
        urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        ret = usb_submit_urb(urb, GFP_KERNEL);
        if (ret < 0) {
                gspca_err(gspca_dev, "submit int URB failed with error %i\n",
                          ret);
                goto error_submit;
        }
        gspca_dev->int_urb = urb;
        return ret;

error_submit:
        usb_free_coherent(dev,
                          urb->transfer_buffer_length,
                          urb->transfer_buffer,
                          urb->transfer_dma);
error_buffer:
        usb_free_urb(urb);
error:
        return ret;
}

static void gspca_input_create_urb(struct gspca_dev *gspca_dev)
{
        struct usb_interface *intf;
        struct usb_host_interface *intf_desc;
        struct usb_endpoint_descriptor *ep;
        int i;

        if (gspca_dev->sd_desc->int_pkt_scan)  {
                intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
                intf_desc = intf->cur_altsetting;
                for (i = 0; i < intf_desc->desc.bNumEndpoints; i++) {
                        ep = &intf_desc->endpoint[i].desc;
                        if (usb_endpoint_dir_in(ep) &&
                            usb_endpoint_xfer_int(ep)) {

                                alloc_and_submit_int_urb(gspca_dev, ep);
                                break;
                        }
                }
        }
}

static void gspca_input_destroy_urb(struct gspca_dev *gspca_dev)
{
        struct urb *urb;

        urb = gspca_dev->int_urb;
        if (urb) {
                gspca_dev->int_urb = NULL;
                usb_kill_urb(urb);
                usb_free_coherent(gspca_dev->dev,
                                  urb->transfer_buffer_length,
                                  urb->transfer_buffer,
                                  urb->transfer_dma);
                usb_free_urb(urb);
        }
}
#else
static inline void gspca_input_destroy_urb(struct gspca_dev *gspca_dev)
{
}

static inline void gspca_input_create_urb(struct gspca_dev *gspca_dev)
{
}

static inline int gspca_input_connect(struct gspca_dev *dev)
{
        return 0;
}
#endif

/*
 * fill a video frame from an URB and resubmit
 */
static void fill_frame(struct gspca_dev *gspca_dev,
                        struct urb *urb)
{
        u8 *data;                /* address of data in the iso message */
        int i, len, st;
        cam_pkt_op pkt_scan;

        if (urb->status != 0) {
                if (urb->status == -ESHUTDOWN)
                        return;                /* disconnection */
#ifdef CONFIG_PM
                if (gspca_dev->frozen)
                        return;
#endif
                gspca_err(gspca_dev, "urb status: %d\n", urb->status);
                urb->status = 0;
                goto resubmit;
        }
        pkt_scan = gspca_dev->sd_desc->pkt_scan;
        for (i = 0; i < urb->number_of_packets; i++) {
                len = urb->iso_frame_desc[i].actual_length;

                /* check the packet status and length */
                st = urb->iso_frame_desc[i].status;
                if (st) {
                        gspca_dbg(gspca_dev, D_PACK, "ISOC data error: [%d] len=%d, status=%d\n",
                               i, len, st);
                        gspca_dev->last_packet_type = DISCARD_PACKET;
                        continue;
                }
                if (len == 0) {
                        if (gspca_dev->empty_packet == 0)
                                gspca_dev->empty_packet = 1;
                        continue;
                }

                /* let the packet be analyzed by the subdriver */
                gspca_dbg(gspca_dev, D_PACK, "packet [%d] o:%d l:%d\n",
                          i, urb->iso_frame_desc[i].offset, len);
                data = (u8 *) urb->transfer_buffer
                                        + urb->iso_frame_desc[i].offset;
                pkt_scan(gspca_dev, data, len);
        }

resubmit:
        if (!gspca_dev->streaming)
                return;
        /* resubmit the URB */
        st = usb_submit_urb(urb, GFP_ATOMIC);
        if (st < 0)
                pr_err("usb_submit_urb() ret %d\n", st);
}

/*
 * ISOC message interrupt from the USB device
 *
 * Analyse each packet and call the subdriver for copy to the frame buffer.
 */
static void isoc_irq(struct urb *urb)
{
        struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context;

        gspca_dbg(gspca_dev, D_PACK, "isoc irq\n");
        if (!gspca_dev->streaming)
                return;
        fill_frame(gspca_dev, urb);
}

/*
 * bulk message interrupt from the USB device
 */
static void bulk_irq(struct urb *urb)
{
        struct gspca_dev *gspca_dev = (struct gspca_dev *) urb->context;
        int st;

        gspca_dbg(gspca_dev, D_PACK, "bulk irq\n");
        if (!gspca_dev->streaming)
                return;
        switch (urb->status) {
        case 0:
                break;
        case -ESHUTDOWN:
                return;                /* disconnection */
        default:
#ifdef CONFIG_PM
                if (gspca_dev->frozen)
                        return;
#endif
                gspca_err(gspca_dev, "urb status: %d\n", urb->status);
                urb->status = 0;
                goto resubmit;
        }

        gspca_dbg(gspca_dev, D_PACK, "packet l:%d\n", urb->actual_length);
        gspca_dev->sd_desc->pkt_scan(gspca_dev,
                                urb->transfer_buffer,
                                urb->actual_length);

resubmit:
        if (!gspca_dev->streaming)
                return;
        /* resubmit the URB */
        if (gspca_dev->cam.bulk_nurbs != 0) {
                st = usb_submit_urb(urb, GFP_ATOMIC);
                if (st < 0)
                        pr_err("usb_submit_urb() ret %d\n", st);
        }
}

/*
 * add data to the current frame
 *
 * This function is called by the subdrivers at interrupt level.
 *
 * To build a frame, these ones must add
 *        - one FIRST_PACKET
 *        - 0 or many INTER_PACKETs
 *        - one LAST_PACKET
 * DISCARD_PACKET invalidates the whole frame.
 */
void gspca_frame_add(struct gspca_dev *gspca_dev,
                        enum gspca_packet_type packet_type,
                        const u8 *data,
                        int len)
{
        struct gspca_buffer *buf;
        unsigned long flags;

        gspca_dbg(gspca_dev, D_PACK, "add t:%d l:%d\n",        packet_type, len);

        spin_lock_irqsave(&gspca_dev->qlock, flags);
        buf = list_first_entry_or_null(&gspca_dev->buf_list,
                                       typeof(*buf), list);
        spin_unlock_irqrestore(&gspca_dev->qlock, flags);

        if (packet_type == FIRST_PACKET) {
                /* if there is no queued buffer, discard the whole frame */
                if (!buf) {
                        gspca_dev->last_packet_type = DISCARD_PACKET;
                        gspca_dev->sequence++;
                        return;
                }
                gspca_dev->image = vb2_plane_vaddr(&buf->vb.vb2_buf, 0);
                gspca_dev->image_len = 0;
        } else {
                switch (gspca_dev->last_packet_type) {
                case DISCARD_PACKET:
                        if (packet_type == LAST_PACKET) {
                                gspca_dev->last_packet_type = packet_type;
                                gspca_dev->image = NULL;
                                gspca_dev->image_len = 0;
                        }
                        return;
                case LAST_PACKET:
                        return;
                }
        }

        /* append the packet to the frame buffer */
        if (len > 0) {
                if (gspca_dev->image_len + len > PAGE_ALIGN(gspca_dev->pixfmt.sizeimage)) {
                        gspca_err(gspca_dev, "frame overflow %d > %d\n",
                                  gspca_dev->image_len + len,
                                  PAGE_ALIGN(gspca_dev->pixfmt.sizeimage));
                        packet_type = DISCARD_PACKET;
                } else {
/* !! image is NULL only when last pkt is LAST or DISCARD
                        if (gspca_dev->image == NULL) {
                                pr_err("gspca_frame_add() image == NULL\n");
                                return;
                        }
 */
                        memcpy(gspca_dev->image + gspca_dev->image_len,
                                data, len);
                        gspca_dev->image_len += len;
                }
        }
        gspca_dev->last_packet_type = packet_type;

        /* if last packet, invalidate packet concatenation until
         * next first packet, wake up the application and advance
         * in the queue */
        if (packet_type == LAST_PACKET) {
                if (gspca_dev->image_len > gspca_dev->pixfmt.sizeimage)
                        gspca_dev->image_len = gspca_dev->pixfmt.sizeimage;
                spin_lock_irqsave(&gspca_dev->qlock, flags);
                list_del(&buf->list);
                spin_unlock_irqrestore(&gspca_dev->qlock, flags);
                buf->vb.vb2_buf.timestamp = ktime_get_ns();
                vb2_set_plane_payload(&buf->vb.vb2_buf, 0,
                                      gspca_dev->image_len);
                buf->vb.sequence = gspca_dev->sequence++;
                buf->vb.field = V4L2_FIELD_NONE;
                gspca_dbg(gspca_dev, D_FRAM, "frame complete len:%d\n",
                          gspca_dev->image_len);
                vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
                gspca_dev->image = NULL;
                gspca_dev->image_len = 0;
        }
}
EXPORT_SYMBOL(gspca_frame_add);

static void destroy_urbs(struct gspca_dev *gspca_dev)
{
        struct urb *urb;
        unsigned int i;

        gspca_dbg(gspca_dev, D_STREAM, "kill transfer\n");

        /* Killing all URBs guarantee that no URB completion
         * handler is running. Therefore, there shouldn't
         * be anyone trying to access gspca_dev->urb[i]
         */
        for (i = 0; i < MAX_NURBS; i++)
                usb_kill_urb(gspca_dev->urb[i]);

        gspca_dbg(gspca_dev, D_STREAM, "releasing urbs\n");
        for (i = 0; i < MAX_NURBS; i++) {
                urb = gspca_dev->urb[i];
                if (!urb)
                        continue;
                gspca_dev->urb[i] = NULL;
                usb_free_coherent(gspca_dev->dev,
                                  urb->transfer_buffer_length,
                                  urb->transfer_buffer,
                                  urb->transfer_dma);
                usb_free_urb(urb);
        }
}

static int gspca_set_alt0(struct gspca_dev *gspca_dev)
{
        int ret;

        if (gspca_dev->alt == 0)
                return 0;
        ret = usb_set_interface(gspca_dev->dev, gspca_dev->iface, 0);
        if (ret < 0)
                pr_err("set alt 0 err %d\n", ret);
        return ret;
}

/*
 * look for an input transfer endpoint in an alternate setting.
 *
 * If xfer_ep is invalid, return the first valid ep found, otherwise
 * look for exactly the ep with address equal to xfer_ep.
 */
static struct usb_host_endpoint *alt_xfer(struct usb_host_interface *alt,
                                          int xfer, int xfer_ep)
{
        struct usb_host_endpoint *ep;
        int i, attr;

        for (i = 0; i < alt->desc.bNumEndpoints; i++) {
                ep = &alt->endpoint[i];
                attr = ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
                if (attr == xfer
                    && ep->desc.wMaxPacketSize != 0
                    && usb_endpoint_dir_in(&ep->desc)
                    && (xfer_ep < 0 || ep->desc.bEndpointAddress == xfer_ep))
                        return ep;
        }
        return NULL;
}

/* compute the minimum bandwidth for the current transfer */
static u32 which_bandwidth(struct gspca_dev *gspca_dev)
{
        u32 bandwidth;

        /* get the (max) image size */
        bandwidth = gspca_dev->pixfmt.sizeimage;

        /* if the image is compressed, estimate its mean size */
        if (!gspca_dev->cam.needs_full_bandwidth &&
            bandwidth < gspca_dev->pixfmt.width *
                                gspca_dev->pixfmt.height)
                bandwidth = bandwidth * 3 / 8;        /* 0.375 */

        /* estimate the frame rate */
        if (gspca_dev->sd_desc->get_streamparm) {
                struct v4l2_streamparm parm;

                gspca_dev->sd_desc->get_streamparm(gspca_dev, &parm);
                bandwidth *= parm.parm.capture.timeperframe.denominator;
                bandwidth /= parm.parm.capture.timeperframe.numerator;
        } else {

                /* don't hope more than 15 fps with USB 1.1 and
                 * image resolution >= 640x480 */
                if (gspca_dev->pixfmt.width >= 640
                 && gspca_dev->dev->speed == USB_SPEED_FULL)
                        bandwidth *= 15;                /* 15 fps */
                else
                        bandwidth *= 30;                /* 30 fps */
        }

        gspca_dbg(gspca_dev, D_STREAM, "min bandwidth: %d\n", bandwidth);
        return bandwidth;
}

/* endpoint table */
#define MAX_ALT 16
struct ep_tb_s {
        u32 alt;
        u32 bandwidth;
};

/*
 * build the table of the endpoints
 * and compute the minimum bandwidth for the image transfer
 */
static int build_isoc_ep_tb(struct gspca_dev *gspca_dev,
                        struct usb_interface *intf,
                        struct ep_tb_s *ep_tb)
{
        struct usb_host_endpoint *ep;
        int i, j, nbalt, psize, found;
        u32 bandwidth, last_bw;

        nbalt = intf->num_altsetting;
        if (nbalt > MAX_ALT)
                nbalt = MAX_ALT;        /* fixme: should warn */

        /* build the endpoint table */
        i = 0;
        last_bw = 0;
        for (;;) {
                ep_tb->bandwidth = 2000 * 2000 * 120;
                found = 0;
                for (j = 0; j < nbalt; j++) {
                        ep = alt_xfer(&intf->altsetting[j],
                                      USB_ENDPOINT_XFER_ISOC,
                                      gspca_dev->xfer_ep);
                        if (ep == NULL)
                                continue;
                        if (ep->desc.bInterval == 0) {
                                pr_err("alt %d iso endp with 0 interval\n", j);
                                continue;
                        }
                        psize = le16_to_cpu(ep->desc.wMaxPacketSize);
                        psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3));
                        bandwidth = psize * 1000;
                        if (gspca_dev->dev->speed == USB_SPEED_HIGH
                         || gspca_dev->dev->speed >= USB_SPEED_SUPER)
                                bandwidth *= 8;
                        bandwidth /= 1 << (ep->desc.bInterval - 1);
                        if (bandwidth <= last_bw)
                                continue;
                        if (bandwidth < ep_tb->bandwidth) {
                                ep_tb->bandwidth = bandwidth;
                                ep_tb->alt = j;
                                found = 1;
                        }
                }
                if (!found)
                        break;
                gspca_dbg(gspca_dev, D_STREAM, "alt %d bandwidth %d\n",
                          ep_tb->alt, ep_tb->bandwidth);
                last_bw = ep_tb->bandwidth;
                i++;
                ep_tb++;
        }

        /*
         * If the camera:
         * has a usb audio class interface (a built in usb mic); and
         * is a usb 1 full speed device; and
         * uses the max full speed iso bandwidth; and
         * and has more than 1 alt setting
         * then skip the highest alt setting to spare bandwidth for the mic
         */
        if (gspca_dev->audio &&
                        gspca_dev->dev->speed == USB_SPEED_FULL &&
                        last_bw >= 1000000 &&
                        i > 1) {
                gspca_dbg(gspca_dev, D_STREAM, "dev has usb audio, skipping highest alt\n");
                i--;
                ep_tb--;
        }

        /* get the requested bandwidth and start at the highest atlsetting */
        bandwidth = which_bandwidth(gspca_dev);
        ep_tb--;
        while (i > 1) {
                ep_tb--;
                if (ep_tb->bandwidth < bandwidth)
                        break;
                i--;
        }
        return i;
}

/*
 * create the URBs for image transfer
 */
static int create_urbs(struct gspca_dev *gspca_dev,
                        struct usb_host_endpoint *ep)
{
        struct urb *urb;
        int n, nurbs, i, psize, npkt, bsize;

        /* calculate the packet size and the number of packets */
        psize = le16_to_cpu(ep->desc.wMaxPacketSize);

        if (!gspca_dev->cam.bulk) {                /* isoc */

                /* See paragraph 5.9 / table 5-11 of the usb 2.0 spec. */
                if (gspca_dev->pkt_size == 0)
                        psize = (psize & 0x07ff) * (1 + ((psize >> 11) & 3));
                else
                        psize = gspca_dev->pkt_size;
                npkt = gspca_dev->cam.npkt;
                if (npkt == 0)
                        npkt = 32;                /* default value */
                bsize = psize * npkt;
                gspca_dbg(gspca_dev, D_STREAM,
                          "isoc %d pkts size %d = bsize:%d\n",
                          npkt, psize, bsize);
                nurbs = DEF_NURBS;
        } else {                                /* bulk */
                npkt = 0;
                bsize = gspca_dev->cam.bulk_size;
                if (bsize == 0)
                        bsize = psize;
                gspca_dbg(gspca_dev, D_STREAM, "bulk bsize:%d\n", bsize);
                if (gspca_dev->cam.bulk_nurbs != 0)
                        nurbs = gspca_dev->cam.bulk_nurbs;
                else
                        nurbs = 1;
        }

        for (n = 0; n < nurbs; n++) {
                urb = usb_alloc_urb(npkt, GFP_KERNEL);
                if (!urb)
                        return -ENOMEM;
                gspca_dev->urb[n] = urb;
                urb->transfer_buffer = usb_alloc_coherent(gspca_dev->dev,
                                                bsize,
                                                GFP_KERNEL,
                                                &urb->transfer_dma);

                if (urb->transfer_buffer == NULL) {
                        pr_err("usb_alloc_coherent failed\n");
                        return -ENOMEM;
                }
                urb->dev = gspca_dev->dev;
                urb->context = gspca_dev;
                urb->transfer_buffer_length = bsize;
                if (npkt != 0) {                /* ISOC */
                        urb->pipe = usb_rcvisocpipe(gspca_dev->dev,
                                                    ep->desc.bEndpointAddress);
                        urb->transfer_flags = URB_ISO_ASAP
                                        | URB_NO_TRANSFER_DMA_MAP;
                        urb->interval = 1 << (ep->desc.bInterval - 1);
                        urb->complete = isoc_irq;
                        urb->number_of_packets = npkt;
                        for (i = 0; i < npkt; i++) {
                                urb->iso_frame_desc[i].length = psize;
                                urb->iso_frame_desc[i].offset = psize * i;
                        }
                } else {                /* bulk */
                        urb->pipe = usb_rcvbulkpipe(gspca_dev->dev,
                                                ep->desc.bEndpointAddress);
                        urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                        urb->complete = bulk_irq;
                }
        }
        return 0;
}

/* Note: both the queue and the usb locks should be held when calling this */
static void gspca_stream_off(struct gspca_dev *gspca_dev)
{
        gspca_dev->streaming = false;
        gspca_dev->usb_err = 0;
        if (gspca_dev->sd_desc->stopN)
                gspca_dev->sd_desc->stopN(gspca_dev);
        destroy_urbs(gspca_dev);
        gspca_input_destroy_urb(gspca_dev);
        gspca_set_alt0(gspca_dev);
        if (gspca_dev->present)
                gspca_input_create_urb(gspca_dev);
        if (gspca_dev->sd_desc->stop0)
                gspca_dev->sd_desc->stop0(gspca_dev);
        gspca_dbg(gspca_dev, D_STREAM, "stream off OK\n");
}

/*
 * start the USB transfer
 */
static int gspca_init_transfer(struct gspca_dev *gspca_dev)
{
        struct usb_interface *intf;
        struct usb_host_endpoint *ep;
        struct urb *urb;
        struct ep_tb_s ep_tb[MAX_ALT];
        int n, ret, xfer, alt, alt_idx;

        /* reset the streaming variables */
        gspca_dev->image = NULL;
        gspca_dev->image_len = 0;
        gspca_dev->last_packet_type = DISCARD_PACKET;

        gspca_dev->usb_err = 0;

        /* do the specific subdriver stuff before endpoint selection */
        intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
        gspca_dev->alt = gspca_dev->cam.bulk ? intf->num_altsetting : 0;
        if (gspca_dev->sd_desc->isoc_init) {
                ret = gspca_dev->sd_desc->isoc_init(gspca_dev);
                if (ret < 0)
                        return ret;
        }
        xfer = gspca_dev->cam.bulk ? USB_ENDPOINT_XFER_BULK
                                   : USB_ENDPOINT_XFER_ISOC;

        /* if bulk or the subdriver forced an altsetting, get the endpoint */
        if (gspca_dev->alt != 0) {
                gspca_dev->alt--;        /* (previous version compatibility) */
                ep = alt_xfer(&intf->altsetting[gspca_dev->alt], xfer,
                              gspca_dev->xfer_ep);
                if (ep == NULL) {
                        pr_err("bad altsetting %d\n", gspca_dev->alt);
                        return -EIO;
                }
                ep_tb[0].alt = gspca_dev->alt;
                alt_idx = 1;
        } else {
                /* else, compute the minimum bandwidth
                 * and build the endpoint table */
                alt_idx = build_isoc_ep_tb(gspca_dev, intf, ep_tb);
                if (alt_idx <= 0) {
                        pr_err("no transfer endpoint found\n");
                        return -EIO;
                }
        }

        /* set the highest alternate setting and
         * loop until urb submit succeeds */
        gspca_input_destroy_urb(gspca_dev);

        gspca_dev->alt = ep_tb[--alt_idx].alt;
        alt = -1;
        for (;;) {
                if (alt != gspca_dev->alt) {
                        alt = gspca_dev->alt;
                        if (intf->num_altsetting > 1) {
                                ret = usb_set_interface(gspca_dev->dev,
                                                        gspca_dev->iface,
                                                        alt);
                                if (ret < 0) {
                                        if (ret == -ENOSPC)
                                                goto retry; /*fixme: ugly*/
                                        pr_err("set alt %d err %d\n", alt, ret);
                                        goto out;
                                }
                        }
                }
                if (!gspca_dev->cam.no_urb_create) {
                        gspca_dbg(gspca_dev, D_STREAM, "init transfer alt %d\n",
                                  alt);
                        ret = create_urbs(gspca_dev,
                                alt_xfer(&intf->altsetting[alt], xfer,
                                         gspca_dev->xfer_ep));
                        if (ret < 0) {
                                destroy_urbs(gspca_dev);
                                goto out;
                        }
                }

                /* clear the bulk endpoint */
                if (gspca_dev->cam.bulk)
                        usb_clear_halt(gspca_dev->dev,
                                        gspca_dev->urb[0]->pipe);

                /* start the cam */
                ret = gspca_dev->sd_desc->start(gspca_dev);
                if (ret < 0) {
                        destroy_urbs(gspca_dev);
                        goto out;
                }
                v4l2_ctrl_handler_setup(gspca_dev->vdev.ctrl_handler);
                gspca_dev->streaming = true;

                /* some bulk transfers are started by the subdriver */
                if (gspca_dev->cam.bulk && gspca_dev->cam.bulk_nurbs == 0)
                        break;

                /* submit the URBs */
                for (n = 0; n < MAX_NURBS; n++) {
                        urb = gspca_dev->urb[n];
                        if (urb == NULL)
                                break;
                        ret = usb_submit_urb(urb, GFP_KERNEL);
                        if (ret < 0)
                                break;
                }
                if (ret >= 0)
                        break;                        /* transfer is started */

                /* something when wrong
                 * stop the webcam and free the transfer resources */
                gspca_stream_off(gspca_dev);
                if (ret != -ENOSPC) {
                        pr_err("usb_submit_urb alt %d err %d\n",
                               gspca_dev->alt, ret);
                        goto out;
                }

                /* the bandwidth is not wide enough
                 * negotiate or try a lower alternate setting */
retry:
                gspca_err(gspca_dev, "alt %d - bandwidth not wide enough, trying again\n",
                          alt);
                msleep(20);        /* wait for kill complete */
                if (gspca_dev->sd_desc->isoc_nego) {
                        ret = gspca_dev->sd_desc->isoc_nego(gspca_dev);
                        if (ret < 0)
                                goto out;
                } else {
                        if (alt_idx <= 0) {
                                pr_err("no transfer endpoint found\n");
                                ret = -EIO;
                                goto out;
                        }
                        gspca_dev->alt = ep_tb[--alt_idx].alt;
                }
        }
out:
        gspca_input_create_urb(gspca_dev);
        return ret;
}

static void gspca_set_default_mode(struct gspca_dev *gspca_dev)
{
        int i;

        i = gspca_dev->cam.nmodes - 1;        /* take the highest mode */
        gspca_dev->curr_mode = i;
        gspca_dev->pixfmt = gspca_dev->cam.cam_mode[i];

        /* does nothing if ctrl_handler == NULL */
        v4l2_ctrl_handler_setup(gspca_dev->vdev.ctrl_handler);
}

static int wxh_to_mode(struct gspca_dev *gspca_dev,
                        int width, int height, u32 pixelformat)
{
        int i;

        for (i = 0; i < gspca_dev->cam.nmodes; i++) {
                if (width == gspca_dev->cam.cam_mode[i].width
                    && height == gspca_dev->cam.cam_mode[i].height
                    && pixelformat == gspca_dev->cam.cam_mode[i].pixelformat)
                        return i;
        }
        return -EINVAL;
}

static int wxh_to_nearest_mode(struct gspca_dev *gspca_dev,
                        int width, int height, u32 pixelformat)
{
        int i;

        for (i = gspca_dev->cam.nmodes; --i >= 0; ) {
                if (width >= gspca_dev->cam.cam_mode[i].width
                    && height >= gspca_dev->cam.cam_mode[i].height
                    && pixelformat == gspca_dev->cam.cam_mode[i].pixelformat)
                        return i;
        }
        for (i = gspca_dev->cam.nmodes; --i > 0; ) {
                if (width >= gspca_dev->cam.cam_mode[i].width
                    && height >= gspca_dev->cam.cam_mode[i].height)
                        break;
        }
        return i;
}

/*
 * search a mode with the right pixel format
 */
static int gspca_get_mode(struct gspca_dev *gspca_dev,
                        int mode,
                        int pixfmt)
{
        int modeU, modeD;

        modeU = modeD = mode;
        while ((modeU < gspca_dev->cam.nmodes) || modeD >= 0) {
                if (--modeD >= 0) {
                        if (gspca_dev->cam.cam_mode[modeD].pixelformat
                                                                == pixfmt)
                                return modeD;
                }
                if (++modeU < gspca_dev->cam.nmodes) {
                        if (gspca_dev->cam.cam_mode[modeU].pixelformat
                                                                == pixfmt)
                                return modeU;
                }
        }
        return -EINVAL;
}

#ifdef CONFIG_VIDEO_ADV_DEBUG
static int vidioc_g_chip_info(struct file *file, void *priv,
                                struct v4l2_dbg_chip_info *chip)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        gspca_dev->usb_err = 0;
        if (gspca_dev->sd_desc->get_chip_info)
                return gspca_dev->sd_desc->get_chip_info(gspca_dev, chip);
        return chip->match.addr ? -EINVAL : 0;
}

static int vidioc_g_register(struct file *file, void *priv,
                struct v4l2_dbg_register *reg)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        gspca_dev->usb_err = 0;
        return gspca_dev->sd_desc->get_register(gspca_dev, reg);
}

static int vidioc_s_register(struct file *file, void *priv,
                const struct v4l2_dbg_register *reg)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        gspca_dev->usb_err = 0;
        return gspca_dev->sd_desc->set_register(gspca_dev, reg);
}
#endif

static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
                                struct v4l2_fmtdesc *fmtdesc)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);
        int i, j, index;
        __u32 fmt_tb[8];

        /* give an index to each format */
        index = 0;
        for (i = gspca_dev->cam.nmodes; --i >= 0; ) {
                fmt_tb[index] = gspca_dev->cam.cam_mode[i].pixelformat;
                j = 0;
                for (;;) {
                        if (fmt_tb[j] == fmt_tb[index])
                                break;
                        j++;
                }
                if (j == index) {
                        if (fmtdesc->index == index)
                                break;                /* new format */
                        index++;
                        if (index >= ARRAY_SIZE(fmt_tb))
                                return -EINVAL;
                }
        }
        if (i < 0)
                return -EINVAL;                /* no more format */

        fmtdesc->pixelformat = fmt_tb[index];
        return 0;
}

static int vidioc_g_fmt_vid_cap(struct file *file, void *_priv,
                                struct v4l2_format *fmt)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);
        u32 priv = fmt->fmt.pix.priv;

        fmt->fmt.pix = gspca_dev->pixfmt;
        /* some drivers use priv internally, so keep the original value */
        fmt->fmt.pix.priv = priv;
        return 0;
}

static int try_fmt_vid_cap(struct gspca_dev *gspca_dev,
                        struct v4l2_format *fmt)
{
        int w, h, mode, mode2;

        w = fmt->fmt.pix.width;
        h = fmt->fmt.pix.height;

        PDEBUG_MODE(gspca_dev, D_CONF, "try fmt cap",
                    fmt->fmt.pix.pixelformat, w, h);

        /* search the nearest mode for width and height */
        mode = wxh_to_nearest_mode(gspca_dev, w, h, fmt->fmt.pix.pixelformat);

        /* OK if right palette */
        if (gspca_dev->cam.cam_mode[mode].pixelformat
                                                != fmt->fmt.pix.pixelformat) {

                /* else, search the closest mode with the same pixel format */
                mode2 = gspca_get_mode(gspca_dev, mode,
                                        fmt->fmt.pix.pixelformat);
                if (mode2 >= 0)
                        mode = mode2;
        }
        fmt->fmt.pix = gspca_dev->cam.cam_mode[mode];
        if (gspca_dev->sd_desc->try_fmt) {
                /* pass original resolution to subdriver try_fmt */
                fmt->fmt.pix.width = w;
                fmt->fmt.pix.height = h;
                gspca_dev->sd_desc->try_fmt(gspca_dev, fmt);
        }
        return mode;                        /* used when s_fmt */
}

static int vidioc_try_fmt_vid_cap(struct file *file, void *_priv,
                                  struct v4l2_format *fmt)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);
        u32 priv = fmt->fmt.pix.priv;

        if (try_fmt_vid_cap(gspca_dev, fmt) < 0)
                return -EINVAL;
        /* some drivers use priv internally, so keep the original value */
        fmt->fmt.pix.priv = priv;
        return 0;
}

static int vidioc_s_fmt_vid_cap(struct file *file, void *_priv,
                                struct v4l2_format *fmt)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);
        u32 priv = fmt->fmt.pix.priv;
        int mode;

        if (vb2_is_busy(&gspca_dev->queue))
                return -EBUSY;

        mode = try_fmt_vid_cap(gspca_dev, fmt);
        if (mode < 0)
                return -EINVAL;

        gspca_dev->curr_mode = mode;
        if (gspca_dev->sd_desc->try_fmt)
                /* subdriver try_fmt can modify format parameters */
                gspca_dev->pixfmt = fmt->fmt.pix;
        else
                gspca_dev->pixfmt = gspca_dev->cam.cam_mode[mode];
        /* some drivers use priv internally, so keep the original value */
        fmt->fmt.pix.priv = priv;
        return 0;
}

static int vidioc_enum_framesizes(struct file *file, void *priv,
                                  struct v4l2_frmsizeenum *fsize)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);
        int i;
        __u32 index = 0;

        if (gspca_dev->sd_desc->enum_framesizes)
                return gspca_dev->sd_desc->enum_framesizes(gspca_dev, fsize);

        for (i = 0; i < gspca_dev->cam.nmodes; i++) {
                if (fsize->pixel_format !=
                                gspca_dev->cam.cam_mode[i].pixelformat)
                        continue;

                if (fsize->index == index) {
                        fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
                        fsize->discrete.width =
                                gspca_dev->cam.cam_mode[i].width;
                        fsize->discrete.height =
                                gspca_dev->cam.cam_mode[i].height;
                        return 0;
                }
                index++;
        }

        return -EINVAL;
}

static int vidioc_enum_frameintervals(struct file *filp, void *priv,
                                      struct v4l2_frmivalenum *fival)
{
        struct gspca_dev *gspca_dev = video_drvdata(filp);
        int mode;
        __u32 i;

        mode = wxh_to_mode(gspca_dev, fival->width, fival->height,
                           fival->pixel_format);
        if (mode < 0)
                return -EINVAL;

        if (gspca_dev->cam.mode_framerates == NULL ||
                        gspca_dev->cam.mode_framerates[mode].nrates == 0)
                return -EINVAL;

        if (fival->pixel_format !=
                        gspca_dev->cam.cam_mode[mode].pixelformat)
                return -EINVAL;

        for (i = 0; i < gspca_dev->cam.mode_framerates[mode].nrates; i++) {
                if (fival->index == i) {
                        fival->type = V4L2_FRMIVAL_TYPE_DISCRETE;
                        fival->discrete.numerator = 1;
                        fival->discrete.denominator =
                                gspca_dev->cam.mode_framerates[mode].rates[i];
                        return 0;
                }
        }

        return -EINVAL;
}

static void gspca_release(struct v4l2_device *v4l2_device)
{
        struct gspca_dev *gspca_dev =
                container_of(v4l2_device, struct gspca_dev, v4l2_dev);

        v4l2_ctrl_handler_free(gspca_dev->vdev.ctrl_handler);
        v4l2_device_unregister(&gspca_dev->v4l2_dev);
        kfree(gspca_dev->usb_buf);
        kfree(gspca_dev);
}

static int vidioc_querycap(struct file *file, void  *priv,
                           struct v4l2_capability *cap)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        strscpy((char *)cap->driver, gspca_dev->sd_desc->name,
                sizeof(cap->driver));
        if (gspca_dev->dev->product != NULL) {
                strscpy((char *)cap->card, gspca_dev->dev->product,
                        sizeof(cap->card));
        } else {
                snprintf((char *) cap->card, sizeof cap->card,
                        "USB Camera (%04x:%04x)",
                        le16_to_cpu(gspca_dev->dev->descriptor.idVendor),
                        le16_to_cpu(gspca_dev->dev->descriptor.idProduct));
        }
        usb_make_path(gspca_dev->dev, (char *) cap->bus_info,
                        sizeof(cap->bus_info));
        return 0;
}

static int vidioc_enum_input(struct file *file, void *priv,
                                struct v4l2_input *input)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        if (input->index != 0)
                return -EINVAL;
        input->type = V4L2_INPUT_TYPE_CAMERA;
        input->status = gspca_dev->cam.input_flags;
        strscpy(input->name, gspca_dev->sd_desc->name,
                sizeof input->name);
        return 0;
}

static int vidioc_g_input(struct file *file, void *priv, unsigned int *i)
{
        *i = 0;
        return 0;
}

static int vidioc_s_input(struct file *file, void *priv, unsigned int i)
{
        if (i > 0)
                return -EINVAL;
        return 0;
}

static int vidioc_g_jpegcomp(struct file *file, void *priv,
                        struct v4l2_jpegcompression *jpegcomp)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        gspca_dev->usb_err = 0;
        return gspca_dev->sd_desc->get_jcomp(gspca_dev, jpegcomp);
}

static int vidioc_s_jpegcomp(struct file *file, void *priv,
                        const struct v4l2_jpegcompression *jpegcomp)
{
        struct gspca_dev *gspca_dev = video_drvdata(file);

        gspca_dev->usb_err = 0;
        return gspca_dev->sd_desc->set_jcomp(gspca_dev, jpegcomp);
}

static int vidioc_g_parm(struct file *filp, void *priv,
                        struct v4l2_streamparm *parm)
{
        struct gspca_dev *gspca_dev = video_drvdata(filp);

        parm->parm.capture.readbuffers = gspca_dev->queue.min_queued_buffers;

        if (!gspca_dev->sd_desc->get_streamparm)
                return 0;

        parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME;
        gspca_dev->usb_err = 0;
        gspca_dev->sd_desc->get_streamparm(gspca_dev, parm);
        return gspca_dev->usb_err;
}

static int vidioc_s_parm(struct file *filp, void *priv,
                        struct v4l2_streamparm *parm)
{
        struct gspca_dev *gspca_dev = video_drvdata(filp);

        parm->parm.capture.readbuffers = gspca_dev->queue.min_queued_buffers;

        if (!gspca_dev->sd_desc->set_streamparm) {
                parm->parm.capture.capability = 0;
                return 0;
        }

        parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME;
        gspca_dev->usb_err = 0;
        gspca_dev->sd_desc->set_streamparm(gspca_dev, parm);
        return gspca_dev->usb_err;
}

static int gspca_queue_setup(struct vb2_queue *vq,
                             unsigned int *nbuffers, unsigned int *nplanes,
                             unsigned int sizes[], struct device *alloc_devs[])
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vq);
        unsigned int size = PAGE_ALIGN(gspca_dev->pixfmt.sizeimage);

        if (*nplanes)
                return sizes[0] < size ? -EINVAL : 0;
        *nplanes = 1;
        sizes[0] = size;
        return 0;
}

static int gspca_buffer_prepare(struct vb2_buffer *vb)
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vb->vb2_queue);
        unsigned long size = PAGE_ALIGN(gspca_dev->pixfmt.sizeimage);

        if (vb2_plane_size(vb, 0) < size) {
                gspca_err(gspca_dev, "buffer too small (%lu < %lu)\n",
                         vb2_plane_size(vb, 0), size);
                return -EINVAL;
        }
        return 0;
}

static void gspca_buffer_finish(struct vb2_buffer *vb)
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vb->vb2_queue);

        if (!gspca_dev->sd_desc->dq_callback)
                return;

        gspca_dev->usb_err = 0;
        if (gspca_dev->present)
                gspca_dev->sd_desc->dq_callback(gspca_dev);
}

static void gspca_buffer_queue(struct vb2_buffer *vb)
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vb->vb2_queue);
        struct gspca_buffer *buf = to_gspca_buffer(vb);
        unsigned long flags;

        spin_lock_irqsave(&gspca_dev->qlock, flags);
        list_add_tail(&buf->list, &gspca_dev->buf_list);
        spin_unlock_irqrestore(&gspca_dev->qlock, flags);
}

static void gspca_return_all_buffers(struct gspca_dev *gspca_dev,
                                     enum vb2_buffer_state state)
{
        struct gspca_buffer *buf, *node;
        unsigned long flags;

        spin_lock_irqsave(&gspca_dev->qlock, flags);
        list_for_each_entry_safe(buf, node, &gspca_dev->buf_list, list) {
                vb2_buffer_done(&buf->vb.vb2_buf, state);
                list_del(&buf->list);
        }
        spin_unlock_irqrestore(&gspca_dev->qlock, flags);
}

static int gspca_start_streaming(struct vb2_queue *vq, unsigned int count)
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vq);
        int ret;

        gspca_dev->sequence = 0;

        ret = gspca_init_transfer(gspca_dev);
        if (ret)
                gspca_return_all_buffers(gspca_dev, VB2_BUF_STATE_QUEUED);
        return ret;
}

static void gspca_stop_streaming(struct vb2_queue *vq)
{
        struct gspca_dev *gspca_dev = vb2_get_drv_priv(vq);

        gspca_stream_off(gspca_dev);

        /* Release all active buffers */
        gspca_return_all_buffers(gspca_dev, VB2_BUF_STATE_ERROR);
}

static const struct vb2_ops gspca_qops = {
        .queue_setup                = gspca_queue_setup,
        .buf_prepare                = gspca_buffer_prepare,
        .buf_finish                = gspca_buffer_finish,
        .buf_queue                = gspca_buffer_queue,
        .start_streaming        = gspca_start_streaming,
        .stop_streaming                = gspca_stop_streaming,
        .wait_prepare                = vb2_ops_wait_prepare,
        .wait_finish                = vb2_ops_wait_finish,
};

static const struct v4l2_file_operations dev_fops = {
        .owner = THIS_MODULE,
        .open = v4l2_fh_open,
        .release = vb2_fop_release,
        .unlocked_ioctl = video_ioctl2,
        .read = vb2_fop_read,
        .mmap = vb2_fop_mmap,
        .poll = vb2_fop_poll,
};

static const struct v4l2_ioctl_ops dev_ioctl_ops = {
        .vidioc_querycap        = vidioc_querycap,
        .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap,
        .vidioc_try_fmt_vid_cap        = vidioc_try_fmt_vid_cap,
        .vidioc_g_fmt_vid_cap        = vidioc_g_fmt_vid_cap,
        .vidioc_s_fmt_vid_cap        = vidioc_s_fmt_vid_cap,
        .vidioc_enum_input        = vidioc_enum_input,
        .vidioc_g_input                = vidioc_g_input,
        .vidioc_s_input                = vidioc_s_input,
        .vidioc_g_jpegcomp        = vidioc_g_jpegcomp,
        .vidioc_s_jpegcomp        = vidioc_s_jpegcomp,
        .vidioc_g_parm                = vidioc_g_parm,
        .vidioc_s_parm                = vidioc_s_parm,
        .vidioc_enum_framesizes = vidioc_enum_framesizes,
        .vidioc_enum_frameintervals = vidioc_enum_frameintervals,

        .vidioc_reqbufs                = vb2_ioctl_reqbufs,
        .vidioc_create_bufs        = vb2_ioctl_create_bufs,
        .vidioc_querybuf        = vb2_ioctl_querybuf,
        .vidioc_qbuf                = vb2_ioctl_qbuf,
        .vidioc_dqbuf                = vb2_ioctl_dqbuf,
        .vidioc_expbuf                = vb2_ioctl_expbuf,
        .vidioc_streamon        = vb2_ioctl_streamon,
        .vidioc_streamoff        = vb2_ioctl_streamoff,

#ifdef CONFIG_VIDEO_ADV_DEBUG
        .vidioc_g_chip_info        = vidioc_g_chip_info,
        .vidioc_g_register        = vidioc_g_register,
        .vidioc_s_register        = vidioc_s_register,
#endif
        .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
};

static const struct video_device gspca_template = {
        .name = "gspca main driver",
        .fops = &dev_fops,
        .ioctl_ops = &dev_ioctl_ops,
        .release = video_device_release_empty, /* We use v4l2_dev.release */
};

/*
 * probe and create a new gspca device
 *
 * This function must be called by the sub-driver when it is
 * called for probing a new device.
 */
int gspca_dev_probe2(struct usb_interface *intf,
                const struct usb_device_id *id,
                const struct sd_desc *sd_desc,
                int dev_size,
                struct module *module)
{
        struct gspca_dev *gspca_dev;
        struct usb_device *dev = interface_to_usbdev(intf);
        struct vb2_queue *q;
        int ret;

        pr_info("%s-" GSPCA_VERSION " probing %04x:%04x\n",
                sd_desc->name, id->idVendor, id->idProduct);

        /* create the device */
        if (dev_size < sizeof *gspca_dev)
                dev_size = sizeof *gspca_dev;
        gspca_dev = kzalloc(dev_size, GFP_KERNEL);
        if (!gspca_dev) {
                pr_err("couldn't kzalloc gspca struct\n");
                return -ENOMEM;
        }
        gspca_dev->usb_buf = kzalloc(USB_BUF_SZ, GFP_KERNEL);
        if (!gspca_dev->usb_buf) {
                pr_err("out of memory\n");
                ret = -ENOMEM;
                goto out;
        }
        gspca_dev->dev = dev;
        gspca_dev->iface = intf->cur_altsetting->desc.bInterfaceNumber;
        gspca_dev->xfer_ep = -1;

        /* check if any audio device */
        if (dev->actconfig->desc.bNumInterfaces != 1) {
                int i;
                struct usb_interface *intf2;

                for (i = 0; i < dev->actconfig->desc.bNumInterfaces; i++) {
                        intf2 = dev->actconfig->interface[i];
                        if (intf2 != NULL
                         && intf2->altsetting != NULL
                         && intf2->altsetting->desc.bInterfaceClass ==
                                         USB_CLASS_AUDIO) {
                                gspca_dev->audio = 1;
                                break;
                        }
                }
        }

        gspca_dev->v4l2_dev.release = gspca_release;
        ret = v4l2_device_register(&intf->dev, &gspca_dev->v4l2_dev);
        if (ret)
                goto out;
        gspca_dev->present = true;
        gspca_dev->sd_desc = sd_desc;
        gspca_dev->empty_packet = -1;        /* don't check the empty packets */
        gspca_dev->vdev = gspca_template;
        gspca_dev->vdev.v4l2_dev = &gspca_dev->v4l2_dev;
        gspca_dev->vdev.device_caps = V4L2_CAP_VIDEO_CAPTURE |
                                      V4L2_CAP_STREAMING | V4L2_CAP_READWRITE;
        video_set_drvdata(&gspca_dev->vdev, gspca_dev);
        gspca_dev->module = module;

        mutex_init(&gspca_dev->usb_lock);
        gspca_dev->vdev.lock = &gspca_dev->usb_lock;
        init_waitqueue_head(&gspca_dev->wq);

        /* Initialize the vb2 queue */
        q = &gspca_dev->queue;
        q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ;
        q->drv_priv = gspca_dev;
        q->buf_struct_size = sizeof(struct gspca_buffer);
        q->ops = &gspca_qops;
        q->mem_ops = &vb2_vmalloc_memops;
        q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        q->min_queued_buffers = 2;
        q->lock = &gspca_dev->usb_lock;
        ret = vb2_queue_init(q);
        if (ret)
                goto out;
        gspca_dev->vdev.queue = q;

        INIT_LIST_HEAD(&gspca_dev->buf_list);
        spin_lock_init(&gspca_dev->qlock);

        /* configure the subdriver and initialize the USB device */
        ret = sd_desc->config(gspca_dev, id);
        if (ret < 0)
                goto out;
        ret = sd_desc->init(gspca_dev);
        if (ret < 0)
                goto out;
        if (sd_desc->init_controls)
                ret = sd_desc->init_controls(gspca_dev);
        if (ret < 0)
                goto out;
        gspca_set_default_mode(gspca_dev);

        ret = gspca_input_connect(gspca_dev);
        if (ret)
                goto out;

#ifdef CONFIG_VIDEO_ADV_DEBUG
        if (!gspca_dev->sd_desc->get_register)
                v4l2_disable_ioctl(&gspca_dev->vdev, VIDIOC_DBG_G_REGISTER);
        if (!gspca_dev->sd_desc->set_register)
                v4l2_disable_ioctl(&gspca_dev->vdev, VIDIOC_DBG_S_REGISTER);
#endif
        if (!gspca_dev->sd_desc->get_jcomp)
                v4l2_disable_ioctl(&gspca_dev->vdev, VIDIOC_G_JPEGCOMP);
        if (!gspca_dev->sd_desc->set_jcomp)
                v4l2_disable_ioctl(&gspca_dev->vdev, VIDIOC_S_JPEGCOMP);

        /* init video stuff */
        ret = video_register_device(&gspca_dev->vdev,
                                  VFL_TYPE_VIDEO,
                                  -1);
        if (ret < 0) {
                pr_err("video_register_device err %d\n", ret);
                goto out;
        }

        usb_set_intfdata(intf, gspca_dev);
        gspca_dbg(gspca_dev, D_PROBE, "%s created\n",
                  video_device_node_name(&gspca_dev->vdev));

        gspca_input_create_urb(gspca_dev);

        return 0;
out:
#if IS_ENABLED(CONFIG_INPUT)
        if (gspca_dev->input_dev)
                input_unregister_device(gspca_dev->input_dev);
#endif
        v4l2_ctrl_handler_free(gspca_dev->vdev.ctrl_handler);
        v4l2_device_unregister(&gspca_dev->v4l2_dev);
        if (sd_desc->probe_error)
                sd_desc->probe_error(gspca_dev);
        kfree(gspca_dev->usb_buf);
        kfree(gspca_dev);
        return ret;
}
EXPORT_SYMBOL(gspca_dev_probe2);

/* same function as the previous one, but check the interface */
int gspca_dev_probe(struct usb_interface *intf,
                const struct usb_device_id *id,
                const struct sd_desc *sd_desc,
                int dev_size,
                struct module *module)
{
        struct usb_device *dev = interface_to_usbdev(intf);

        /* we don't handle multi-config cameras */
        if (dev->descriptor.bNumConfigurations != 1) {
                pr_err("%04x:%04x too many config\n",
                       id->idVendor, id->idProduct);
                return -ENODEV;
        }

        /* the USB video interface must be the first one */
        if (dev->actconfig->desc.bNumInterfaces != 1
         && intf->cur_altsetting->desc.bInterfaceNumber != 0)
                return -ENODEV;

        return gspca_dev_probe2(intf, id, sd_desc, dev_size, module);
}
EXPORT_SYMBOL(gspca_dev_probe);

/*
 * USB disconnection
 *
 * This function must be called by the sub-driver
 * when the device disconnects, after the specific resources are freed.
 */
void gspca_disconnect(struct usb_interface *intf)
{
        struct gspca_dev *gspca_dev = usb_get_intfdata(intf);
#if IS_ENABLED(CONFIG_INPUT)
        struct input_dev *input_dev;
#endif

        gspca_dbg(gspca_dev, D_PROBE, "%s disconnect\n",
                  video_device_node_name(&gspca_dev->vdev));

        mutex_lock(&gspca_dev->usb_lock);
        gspca_dev->present = false;
        destroy_urbs(gspca_dev);
        gspca_input_destroy_urb(gspca_dev);

        vb2_queue_error(&gspca_dev->queue);

#if IS_ENABLED(CONFIG_INPUT)
        input_dev = gspca_dev->input_dev;
        if (input_dev) {
                gspca_dev->input_dev = NULL;
                input_unregister_device(input_dev);
        }
#endif

        v4l2_device_disconnect(&gspca_dev->v4l2_dev);
        video_unregister_device(&gspca_dev->vdev);

        mutex_unlock(&gspca_dev->usb_lock);

        /* (this will call gspca_release() immediately or on last close) */
        v4l2_device_put(&gspca_dev->v4l2_dev);
}
EXPORT_SYMBOL(gspca_disconnect);

#ifdef CONFIG_PM
int gspca_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct gspca_dev *gspca_dev = usb_get_intfdata(intf);

        gspca_input_destroy_urb(gspca_dev);

        if (!vb2_start_streaming_called(&gspca_dev->queue))
                return 0;

        mutex_lock(&gspca_dev->usb_lock);
        gspca_dev->frozen = 1;                /* avoid urb error messages */
        gspca_dev->usb_err = 0;
        if (gspca_dev->sd_desc->stopN)
                gspca_dev->sd_desc->stopN(gspca_dev);
        destroy_urbs(gspca_dev);
        gspca_set_alt0(gspca_dev);
        if (gspca_dev->sd_desc->stop0)
                gspca_dev->sd_desc->stop0(gspca_dev);
        mutex_unlock(&gspca_dev->usb_lock);

        return 0;
}
EXPORT_SYMBOL(gspca_suspend);

int gspca_resume(struct usb_interface *intf)
{
        struct gspca_dev *gspca_dev = usb_get_intfdata(intf);
        int streaming, ret = 0;

        mutex_lock(&gspca_dev->usb_lock);
        gspca_dev->frozen = 0;
        gspca_dev->usb_err = 0;
        gspca_dev->sd_desc->init(gspca_dev);
        /*
         * Most subdrivers send all ctrl values on sd_start and thus
         * only write to the device registers on s_ctrl when streaming ->
         * Clear streaming to avoid setting all ctrls twice.
         */
        streaming = vb2_start_streaming_called(&gspca_dev->queue);
        if (streaming)
                ret = gspca_init_transfer(gspca_dev);
        else
                gspca_input_create_urb(gspca_dev);
        mutex_unlock(&gspca_dev->usb_lock);

        return ret;
}
EXPORT_SYMBOL(gspca_resume);
#endif

/* -- module insert / remove -- */
static int __init gspca_init(void)
{
        pr_info("v" GSPCA_VERSION " registered\n");
        return 0;
}
static void __exit gspca_exit(void)
{
}

module_init(gspca_init);
module_exit(gspca_exit);

module_param_named(debug, gspca_debug, int, 0644);
MODULE_PARM_DESC(debug,
                "1:probe 2:config 3:stream 4:frame 5:packet 6:usbi 7:usbo");







































































































































































































































































































































































































































    4 
























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Fast and scalable bitmaps.
 *
 * Copyright (C) 2016 Facebook
 * Copyright (C) 2013-2014 Jens Axboe
 */

#ifndef __LINUX_SCALE_BITMAP_H
#define __LINUX_SCALE_BITMAP_H

#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/cache.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/minmax.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <linux/wait.h>

struct seq_file;

/**
 * struct sbitmap_word - Word in a &struct sbitmap.
 */
struct sbitmap_word {
        /**
         * @word: word holding free bits
         */
        unsigned long word;

        /**
         * @cleared: word holding cleared bits
         */
        unsigned long cleared ____cacheline_aligned_in_smp;
} ____cacheline_aligned_in_smp;

/**
 * struct sbitmap - Scalable bitmap.
 *
 * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This
 * trades off higher memory usage for better scalability.
 */
struct sbitmap {
        /**
         * @depth: Number of bits used in the whole bitmap.
         */
        unsigned int depth;

        /**
         * @shift: log2(number of bits used per word)
         */
        unsigned int shift;

        /**
         * @map_nr: Number of words (cachelines) being used for the bitmap.
         */
        unsigned int map_nr;

        /**
         * @round_robin: Allocate bits in strict round-robin order.
         */
        bool round_robin;

        /**
         * @map: Allocated bitmap.
         */
        struct sbitmap_word *map;

        /*
         * @alloc_hint: Cache of last successfully allocated or freed bit.
         *
         * This is per-cpu, which allows multiple users to stick to different
         * cachelines until the map is exhausted.
         */
        unsigned int __percpu *alloc_hint;
};

#define SBQ_WAIT_QUEUES 8
#define SBQ_WAKE_BATCH 8

/**
 * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue.
 */
struct sbq_wait_state {
        /**
         * @wait: Wait queue.
         */
        wait_queue_head_t wait;
} ____cacheline_aligned_in_smp;

/**
 * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free
 * bits.
 *
 * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to
 * avoid contention on the wait queue spinlock. This ensures that we don't hit a
 * scalability wall when we run out of free bits and have to start putting tasks
 * to sleep.
 */
struct sbitmap_queue {
        /**
         * @sb: Scalable bitmap.
         */
        struct sbitmap sb;

        /**
         * @wake_batch: Number of bits which must be freed before we wake up any
         * waiters.
         */
        unsigned int wake_batch;

        /**
         * @wake_index: Next wait queue in @ws to wake up.
         */
        atomic_t wake_index;

        /**
         * @ws: Wait queues.
         */
        struct sbq_wait_state *ws;

        /*
         * @ws_active: count of currently active ws waitqueues
         */
        atomic_t ws_active;

        /**
         * @min_shallow_depth: The minimum shallow depth which may be passed to
         * sbitmap_queue_get_shallow()
         */
        unsigned int min_shallow_depth;

        /**
         * @completion_cnt: Number of bits cleared passed to the
         * wakeup function.
         */
        atomic_t completion_cnt;

        /**
         * @wakeup_cnt: Number of thread wake ups issued.
         */
        atomic_t wakeup_cnt;
};

/**
 * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node.
 * @sb: Bitmap to initialize.
 * @depth: Number of bits to allocate.
 * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if
 *         given, a good default is chosen.
 * @flags: Allocation flags.
 * @node: Memory node to allocate on.
 * @round_robin: If true, be stricter about allocation order; always allocate
 *               starting from the last allocated bit. This is less efficient
 *               than the default behavior (false).
 * @alloc_hint: If true, apply percpu hint for where to start searching for
 *              a free bit.
 *
 * Return: Zero on success or negative errno on failure.
 */
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
                      gfp_t flags, int node, bool round_robin, bool alloc_hint);

/* sbitmap internal helper */
static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
{
        if (index == sb->map_nr - 1)
                return sb->depth - (index << sb->shift);
        return 1U << sb->shift;
}

/**
 * sbitmap_free() - Free memory used by a &struct sbitmap.
 * @sb: Bitmap to free.
 */
static inline void sbitmap_free(struct sbitmap *sb)
{
        free_percpu(sb->alloc_hint);
        kvfree(sb->map);
        sb->map = NULL;
}

/**
 * sbitmap_resize() - Resize a &struct sbitmap.
 * @sb: Bitmap to resize.
 * @depth: New number of bits to resize to.
 *
 * Doesn't reallocate anything. It's up to the caller to ensure that the new
 * depth doesn't exceed the depth that the sb was initialized with.
 */
void sbitmap_resize(struct sbitmap *sb, unsigned int depth);

/**
 * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap.
 * @sb: Bitmap to allocate from.
 *
 * This operation provides acquire barrier semantics if it succeeds.
 *
 * Return: Non-negative allocated bit number if successful, -1 otherwise.
 */
int sbitmap_get(struct sbitmap *sb);

/**
 * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
 * limiting the depth used from each word.
 * @sb: Bitmap to allocate from.
 * @shallow_depth: The maximum number of bits to allocate from a single word.
 *
 * This rather specific operation allows for having multiple users with
 * different allocation limits. E.g., there can be a high-priority class that
 * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
 * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
 * class can only allocate half of the total bits in the bitmap, preventing it
 * from starving out the high-priority class.
 *
 * Return: Non-negative allocated bit number if successful, -1 otherwise.
 */
int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth);

/**
 * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
 * @sb: Bitmap to check.
 *
 * Return: true if any bit in the bitmap is set, false otherwise.
 */
bool sbitmap_any_bit_set(const struct sbitmap *sb);

#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))

typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *);

/**
 * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
 * @start: Where to start the iteration.
 * @sb: Bitmap to iterate over.
 * @fn: Callback. Should return true to continue or false to break early.
 * @data: Pointer to pass to callback.
 *
 * This is inline even though it's non-trivial so that the function calls to the
 * callback will hopefully get optimized away.
 */
static inline void __sbitmap_for_each_set(struct sbitmap *sb,
                                          unsigned int start,
                                          sb_for_each_fn fn, void *data)
{
        unsigned int index;
        unsigned int nr;
        unsigned int scanned = 0;

        if (start >= sb->depth)
                start = 0;
        index = SB_NR_TO_INDEX(sb, start);
        nr = SB_NR_TO_BIT(sb, start);

        while (scanned < sb->depth) {
                unsigned long word;
                unsigned int depth = min_t(unsigned int,
                                           __map_depth(sb, index) - nr,
                                           sb->depth - scanned);

                scanned += depth;
                word = sb->map[index].word & ~sb->map[index].cleared;
                if (!word)
                        goto next;

                /*
                 * On the first iteration of the outer loop, we need to add the
                 * bit offset back to the size of the word for find_next_bit().
                 * On all other iterations, nr is zero, so this is a noop.
                 */
                depth += nr;
                while (1) {
                        nr = find_next_bit(&word, depth, nr);
                        if (nr >= depth)
                                break;
                        if (!fn(sb, (index << sb->shift) + nr, data))
                                return;

                        nr++;
                }
next:
                nr = 0;
                if (++index >= sb->map_nr)
                        index = 0;
        }
}

/**
 * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
 * @sb: Bitmap to iterate over.
 * @fn: Callback. Should return true to continue or false to break early.
 * @data: Pointer to pass to callback.
 */
static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn,
                                        void *data)
{
        __sbitmap_for_each_set(sb, 0, fn, data);
}

static inline unsigned long *__sbitmap_word(struct sbitmap *sb,
                                            unsigned int bitnr)
{
        return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word;
}

/* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */

static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr)
{
        set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}

static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
{
        clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}

/*
 * This one is special, since it doesn't actually clear the bit, rather it
 * sets the corresponding bit in the ->cleared mask instead. Paired with
 * the caller doing sbitmap_deferred_clear() if a given index is full, which
 * will clear the previously freed entries in the corresponding ->word.
 */
static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
{
        unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;

        set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
}

/*
 * Pair of sbitmap_get, and this one applies both cleared bit and
 * allocation hint.
 */
static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr)
{
        sbitmap_deferred_clear_bit(sb, bitnr);

        if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth))
                *raw_cpu_ptr(sb->alloc_hint) = bitnr;
}

static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
{
        return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
}

static inline int sbitmap_calculate_shift(unsigned int depth)
{
        int        shift = ilog2(BITS_PER_LONG);

        /*
         * If the bitmap is small, shrink the number of bits per word so
         * we spread over a few cachelines, at least. If less than 4
         * bits, just forget about it, it's not going to work optimally
         * anyway.
         */
        if (depth >= 4) {
                while ((4U << shift) > depth)
                        shift--;
        }

        return shift;
}

/**
 * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
 * @sb: Bitmap to show.
 * @m: struct seq_file to write to.
 *
 * This is intended for debugging. The format may change at any time.
 */
void sbitmap_show(struct sbitmap *sb, struct seq_file *m);


/**
 * sbitmap_weight() - Return how many set and not cleared bits in a &struct
 * sbitmap.
 * @sb: Bitmap to check.
 *
 * Return: How many set and not cleared bits set
 */
unsigned int sbitmap_weight(const struct sbitmap *sb);

/**
 * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
 * seq_file.
 * @sb: Bitmap to show.
 * @m: struct seq_file to write to.
 *
 * This is intended for debugging. The output isn't guaranteed to be internally
 * consistent.
 */
void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);

/**
 * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
 * memory node.
 * @sbq: Bitmap queue to initialize.
 * @depth: See sbitmap_init_node().
 * @shift: See sbitmap_init_node().
 * @round_robin: See sbitmap_get().
 * @flags: Allocation flags.
 * @node: Memory node to allocate on.
 *
 * Return: Zero on success or negative errno on failure.
 */
int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
                            int shift, bool round_robin, gfp_t flags, int node);

/**
 * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue.
 *
 * @sbq: Bitmap queue to free.
 */
static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
{
        kfree(sbq->ws);
        sbitmap_free(&sbq->sb);
}

/**
 * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
 * @sbq: Bitmap queue to recalculate wake batch.
 * @users: Number of shares.
 *
 * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
 * by depth. This interface is for HCTX shared tags or queue shared tags.
 */
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
                                            unsigned int users);

/**
 * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
 * @sbq: Bitmap queue to resize.
 * @depth: New number of bits to resize to.
 *
 * Like sbitmap_resize(), this doesn't reallocate anything. It has to do
 * some extra work on the &struct sbitmap_queue, so it's not safe to just
 * resize the underlying &struct sbitmap.
 */
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);

/**
 * __sbitmap_queue_get() - Try to allocate a free bit from a &struct
 * sbitmap_queue with preemption already disabled.
 * @sbq: Bitmap queue to allocate from.
 *
 * Return: Non-negative allocated bit number if successful, -1 otherwise.
 */
int __sbitmap_queue_get(struct sbitmap_queue *sbq);

/**
 * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits
 * @sbq: Bitmap queue to allocate from.
 * @nr_tags: number of tags requested
 * @offset: offset to add to returned bits
 *
 * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is
 * a bit in the mask returned, and the caller must add @offset to the value to
 * get the absolute tag value.
 */
unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
                                        unsigned int *offset);

/**
 * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
 * sbitmap_queue, limiting the depth used from each word, with preemption
 * already disabled.
 * @sbq: Bitmap queue to allocate from.
 * @shallow_depth: The maximum number of bits to allocate from a single word.
 * See sbitmap_get_shallow().
 *
 * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
 * initializing @sbq.
 *
 * Return: Non-negative allocated bit number if successful, -1 otherwise.
 */
int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
                              unsigned int shallow_depth);

/**
 * sbitmap_queue_get() - Try to allocate a free bit from a &struct
 * sbitmap_queue.
 * @sbq: Bitmap queue to allocate from.
 * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
 *       sbitmap_queue_clear()).
 *
 * Return: Non-negative allocated bit number if successful, -1 otherwise.
 */
static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
                                    unsigned int *cpu)
{
        int nr;

        *cpu = get_cpu();
        nr = __sbitmap_queue_get(sbq);
        put_cpu();
        return nr;
}

/**
 * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
 * minimum shallow depth that will be used.
 * @sbq: Bitmap queue in question.
 * @min_shallow_depth: The minimum shallow depth that will be passed to
 * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
 *
 * sbitmap_queue_clear() batches wakeups as an optimization. The batch size
 * depends on the depth of the bitmap. Since the shallow allocation functions
 * effectively operate with a different depth, the shallow depth must be taken
 * into account when calculating the batch size. This function must be called
 * with the minimum shallow depth that will be used. Failure to do so can result
 * in missed wakeups.
 */
void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
                                     unsigned int min_shallow_depth);

/**
 * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
 * &struct sbitmap_queue.
 * @sbq: Bitmap to free from.
 * @nr: Bit number to free.
 * @cpu: CPU the bit was allocated on.
 */
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
                         unsigned int cpu);

/**
 * sbitmap_queue_clear_batch() - Free a batch of allocated bits
 * &struct sbitmap_queue.
 * @sbq: Bitmap to free from.
 * @offset: offset for each tag in array
 * @tags: array of tags
 * @nr_tags: number of tags in array
 */
void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
                                int *tags, int nr_tags);

static inline int sbq_index_inc(int index)
{
        return (index + 1) & (SBQ_WAIT_QUEUES - 1);
}

static inline void sbq_index_atomic_inc(atomic_t *index)
{
        int old = atomic_read(index);
        int new = sbq_index_inc(old);
        atomic_cmpxchg(index, old, new);
}

/**
 * sbq_wait_ptr() - Get the next wait queue to use for a &struct
 * sbitmap_queue.
 * @sbq: Bitmap queue to wait on.
 * @wait_index: A counter per "user" of @sbq.
 */
static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
                                                  atomic_t *wait_index)
{
        struct sbq_wait_state *ws;

        ws = &sbq->ws[atomic_read(wait_index)];
        sbq_index_atomic_inc(wait_index);
        return ws;
}

/**
 * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct
 * sbitmap_queue.
 * @sbq: Bitmap queue to wake up.
 */
void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);

/**
 * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
 * on a &struct sbitmap_queue.
 * @sbq: Bitmap queue to wake up.
 * @nr: Number of bits cleared.
 */
void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr);

/**
 * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
 * seq_file.
 * @sbq: Bitmap queue to show.
 * @m: struct seq_file to write to.
 *
 * This is intended for debugging. The format may change at any time.
 */
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);

struct sbq_wait {
        struct sbitmap_queue *sbq;        /* if set, sbq_wait is accounted */
        struct wait_queue_entry wait;
};

#define DEFINE_SBQ_WAIT(name)                                                        \
        struct sbq_wait name = {                                                \
                .sbq = NULL,                                                        \
                .wait = {                                                        \
                        .private        = current,                                \
                        .func                = autoremove_wake_function,                \
                        .entry                = LIST_HEAD_INIT((name).wait.entry),        \
                }                                                                \
        }

/*
 * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
 * internal state.
 */
void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
                                struct sbq_wait_state *ws,
                                struct sbq_wait *sbq_wait, int state);

/*
 * Must be paired with sbitmap_prepare_to_wait().
 */
void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
                                struct sbq_wait *sbq_wait);

/*
 * Wrapper around add_wait_queue(), which maintains some extra internal state
 */
void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
                            struct sbq_wait_state *ws,
                            struct sbq_wait *sbq_wait);

/*
 * Must be paired with sbitmap_add_wait_queue()
 */
void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait);

#endif /* __LINUX_SCALE_BITMAP_H */
































































































   45 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __INCLUDE_LINUX_OOM_H
#define __INCLUDE_LINUX_OOM_H


#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/nodemask.h>
#include <uapi/linux/oom.h>
#include <linux/sched/coredump.h> /* MMF_* */
#include <linux/mm.h> /* VM_FAULT* */

struct zonelist;
struct notifier_block;
struct mem_cgroup;
struct task_struct;

enum oom_constraint {
        CONSTRAINT_NONE,
        CONSTRAINT_CPUSET,
        CONSTRAINT_MEMORY_POLICY,
        CONSTRAINT_MEMCG,
};

/*
 * Details of the page allocation that triggered the oom killer that are used to
 * determine what should be killed.
 */
struct oom_control {
        /* Used to determine cpuset */
        struct zonelist *zonelist;

        /* Used to determine mempolicy */
        nodemask_t *nodemask;

        /* Memory cgroup in which oom is invoked, or NULL for global oom */
        struct mem_cgroup *memcg;

        /* Used to determine cpuset and node locality requirement */
        const gfp_t gfp_mask;

        /*
         * order == -1 means the oom kill is required by sysrq, otherwise only
         * for display purposes.
         */
        const int order;

        /* Used by oom implementation, do not set */
        unsigned long totalpages;
        struct task_struct *chosen;
        long chosen_points;

        /* Used to print the constraint info. */
        enum oom_constraint constraint;
};

extern struct mutex oom_lock;
extern struct mutex oom_adj_mutex;

static inline void set_current_oom_origin(void)
{
        current->signal->oom_flag_origin = true;
}

static inline void clear_current_oom_origin(void)
{
        current->signal->oom_flag_origin = false;
}

static inline bool oom_task_origin(const struct task_struct *p)
{
        return p->signal->oom_flag_origin;
}

static inline bool tsk_is_oom_victim(struct task_struct * tsk)
{
        return tsk->signal->oom_mm;
}

/*
 * Checks whether a page fault on the given mm is still reliable.
 * This is no longer true if the oom reaper started to reap the
 * address space which is reflected by MMF_UNSTABLE flag set in
 * the mm. At that moment any !shared mapping would lose the content
 * and could cause a memory corruption (zero pages instead of the
 * original content).
 *
 * User should call this before establishing a page table entry for
 * a !shared mapping and under the proper page table lock.
 *
 * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
 */
static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
{
        if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
                return VM_FAULT_SIGBUS;
        return 0;
}

long oom_badness(struct task_struct *p,
                unsigned long totalpages);

extern bool out_of_memory(struct oom_control *oc);

extern void exit_oom_victim(void);

extern int register_oom_notifier(struct notifier_block *nb);
extern int unregister_oom_notifier(struct notifier_block *nb);

extern bool oom_killer_disable(signed long timeout);
extern void oom_killer_enable(void);

extern struct task_struct *find_lock_task_mm(struct task_struct *p);

#endif /* _INCLUDE_LINUX_OOM_H */














































































































































































  168 




































































































   19 
























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * include/linux/idr.h
 * 
 * 2002-10-18  written by Jim Houston jim.houston@ccur.com
 *        Copyright (C) 2002 by Concurrent Computer Corporation
 *
 * Small id to pointer translation service avoiding fixed sized
 * tables.
 */

#ifndef __IDR_H__
#define __IDR_H__

#include <linux/radix-tree.h>
#include <linux/gfp.h>
#include <linux/percpu.h>

struct idr {
        struct radix_tree_root        idr_rt;
        unsigned int                idr_base;
        unsigned int                idr_next;
};

/*
 * The IDR API does not expose the tagging functionality of the radix tree
 * to users.  Use tag 0 to track whether a node has free space below it.
 */
#define IDR_FREE        0

/* Set the IDR flag and the IDR_FREE tag */
#define IDR_RT_MARKER        (ROOT_IS_IDR | (__force gfp_t)                        \
                                        (1 << (ROOT_TAG_SHIFT + IDR_FREE)))

#define IDR_INIT_BASE(name, base) {                                        \
        .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER),                        \
        .idr_base = (base),                                                \
        .idr_next = 0,                                                        \
}

/**
 * IDR_INIT() - Initialise an IDR.
 * @name: Name of IDR.
 *
 * A freshly-initialised IDR contains no IDs.
 */
#define IDR_INIT(name)        IDR_INIT_BASE(name, 0)

/**
 * DEFINE_IDR() - Define a statically-allocated IDR.
 * @name: Name of IDR.
 *
 * An IDR defined using this macro is ready for use with no additional
 * initialisation required.  It contains no IDs.
 */
#define DEFINE_IDR(name)        struct idr name = IDR_INIT(name)

/**
 * idr_get_cursor - Return the current position of the cyclic allocator
 * @idr: idr handle
 *
 * The value returned is the value that will be next returned from
 * idr_alloc_cyclic() if it is free (otherwise the search will start from
 * this position).
 */
static inline unsigned int idr_get_cursor(const struct idr *idr)
{
        return READ_ONCE(idr->idr_next);
}

/**
 * idr_set_cursor - Set the current position of the cyclic allocator
 * @idr: idr handle
 * @val: new position
 *
 * The next call to idr_alloc_cyclic() will return @val if it is free
 * (otherwise the search will start from this position).
 */
static inline void idr_set_cursor(struct idr *idr, unsigned int val)
{
        WRITE_ONCE(idr->idr_next, val);
}

/**
 * DOC: idr sync
 * idr synchronization (stolen from radix-tree.h)
 *
 * idr_find() is able to be called locklessly, using RCU. The caller must
 * ensure calls to this function are made within rcu_read_lock() regions.
 * Other readers (lock-free or otherwise) and modifications may be running
 * concurrently.
 *
 * It is still required that the caller manage the synchronization and
 * lifetimes of the items. So if RCU lock-free lookups are used, typically
 * this would mean that the items have their own locks, or are amenable to
 * lock-free access; and that the items are freed by RCU (or only freed after
 * having been deleted from the idr tree *and* a synchronize_rcu() grace
 * period).
 */

#define idr_lock(idr)                xa_lock(&(idr)->idr_rt)
#define idr_unlock(idr)                xa_unlock(&(idr)->idr_rt)
#define idr_lock_bh(idr)        xa_lock_bh(&(idr)->idr_rt)
#define idr_unlock_bh(idr)        xa_unlock_bh(&(idr)->idr_rt)
#define idr_lock_irq(idr)        xa_lock_irq(&(idr)->idr_rt)
#define idr_unlock_irq(idr)        xa_unlock_irq(&(idr)->idr_rt)
#define idr_lock_irqsave(idr, flags) \
                                xa_lock_irqsave(&(idr)->idr_rt, flags)
#define idr_unlock_irqrestore(idr, flags) \
                                xa_unlock_irqrestore(&(idr)->idr_rt, flags)

void idr_preload(gfp_t gfp_mask);

int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
int __must_check idr_alloc_u32(struct idr *, void *ptr, u32 *id,
                                unsigned long max, gfp_t);
int idr_alloc_cyclic(struct idr *, void *ptr, int start, int end, gfp_t);
void *idr_remove(struct idr *, unsigned long id);
void *idr_find(const struct idr *, unsigned long id);
int idr_for_each(const struct idr *,
                 int (*fn)(int id, void *p, void *data), void *data);
void *idr_get_next(struct idr *, int *nextid);
void *idr_get_next_ul(struct idr *, unsigned long *nextid);
void *idr_replace(struct idr *, void *, unsigned long id);
void idr_destroy(struct idr *);

/**
 * idr_init_base() - Initialise an IDR.
 * @idr: IDR handle.
 * @base: The base value for the IDR.
 *
 * This variation of idr_init() creates an IDR which will allocate IDs
 * starting at %base.
 */
static inline void idr_init_base(struct idr *idr, int base)
{
        INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
        idr->idr_base = base;
        idr->idr_next = 0;
}

/**
 * idr_init() - Initialise an IDR.
 * @idr: IDR handle.
 *
 * Initialise a dynamically allocated IDR.  To initialise a
 * statically allocated IDR, use DEFINE_IDR().
 */
static inline void idr_init(struct idr *idr)
{
        idr_init_base(idr, 0);
}

/**
 * idr_is_empty() - Are there any IDs allocated?
 * @idr: IDR handle.
 *
 * Return: %true if any IDs have been allocated from this IDR.
 */
static inline bool idr_is_empty(const struct idr *idr)
{
        return radix_tree_empty(&idr->idr_rt) &&
                radix_tree_tagged(&idr->idr_rt, IDR_FREE);
}

/**
 * idr_preload_end - end preload section started with idr_preload()
 *
 * Each idr_preload() should be matched with an invocation of this
 * function.  See idr_preload() for details.
 */
static inline void idr_preload_end(void)
{
        local_unlock(&radix_tree_preloads.lock);
}

/**
 * idr_for_each_entry() - Iterate over an IDR's elements of a given type.
 * @idr: IDR handle.
 * @entry: The type * to use as cursor
 * @id: Entry ID.
 *
 * @entry and @id do not need to be initialized before the loop, and
 * after normal termination @entry is left with the value NULL.  This
 * is convenient for a "not found" value.
 */
#define idr_for_each_entry(idr, entry, id)                        \
        for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U)

/**
 * idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type.
 * @idr: IDR handle.
 * @entry: The type * to use as cursor.
 * @tmp: A temporary placeholder for ID.
 * @id: Entry ID.
 *
 * @entry and @id do not need to be initialized before the loop, and
 * after normal termination @entry is left with the value NULL.  This
 * is convenient for a "not found" value.
 */
#define idr_for_each_entry_ul(idr, entry, tmp, id)                        \
        for (tmp = 0, id = 0;                                                \
             ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
             tmp = id, ++id)

/**
 * idr_for_each_entry_continue() - Continue iteration over an IDR's elements of a given type
 * @idr: IDR handle.
 * @entry: The type * to use as a cursor.
 * @id: Entry ID.
 *
 * Continue to iterate over entries, continuing after the current position.
 */
#define idr_for_each_entry_continue(idr, entry, id)                        \
        for ((entry) = idr_get_next((idr), &(id));                        \
             entry;                                                        \
             ++id, (entry) = idr_get_next((idr), &(id)))

/**
 * idr_for_each_entry_continue_ul() - Continue iteration over an IDR's elements of a given type
 * @idr: IDR handle.
 * @entry: The type * to use as a cursor.
 * @tmp: A temporary placeholder for ID.
 * @id: Entry ID.
 *
 * Continue to iterate over entries, continuing after the current position.
 * After normal termination @entry is left with the value NULL.  This
 * is convenient for a "not found" value.
 */
#define idr_for_each_entry_continue_ul(idr, entry, tmp, id)                \
        for (tmp = id;                                                        \
             ((entry) = tmp <= id ? idr_get_next_ul(idr, &(id)) : NULL) != NULL; \
             tmp = id, ++id)

/*
 * IDA - ID Allocator, use when translation from id to pointer isn't necessary.
 */
#define IDA_CHUNK_SIZE                128        /* 128 bytes per chunk */
#define IDA_BITMAP_LONGS        (IDA_CHUNK_SIZE / sizeof(long))
#define IDA_BITMAP_BITS         (IDA_BITMAP_LONGS * sizeof(long) * 8)

struct ida_bitmap {
        unsigned long                bitmap[IDA_BITMAP_LONGS];
};

struct ida {
        struct xarray xa;
};

#define IDA_INIT_FLAGS        (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC)

#define IDA_INIT(name)        {                                                \
        .xa = XARRAY_INIT(name, IDA_INIT_FLAGS)                                \
}
#define DEFINE_IDA(name)        struct ida name = IDA_INIT(name)

int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
void ida_free(struct ida *, unsigned int id);
void ida_destroy(struct ida *ida);

/**
 * ida_alloc() - Allocate an unused ID.
 * @ida: IDA handle.
 * @gfp: Memory allocation flags.
 *
 * Allocate an ID between 0 and %INT_MAX, inclusive.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
 * or %-ENOSPC if there are no free IDs.
 */
static inline int ida_alloc(struct ida *ida, gfp_t gfp)
{
        return ida_alloc_range(ida, 0, ~0, gfp);
}

/**
 * ida_alloc_min() - Allocate an unused ID.
 * @ida: IDA handle.
 * @min: Lowest ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Allocate an ID between @min and %INT_MAX, inclusive.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
 * or %-ENOSPC if there are no free IDs.
 */
static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
{
        return ida_alloc_range(ida, min, ~0, gfp);
}

/**
 * ida_alloc_max() - Allocate an unused ID.
 * @ida: IDA handle.
 * @max: Highest ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Allocate an ID between 0 and @max, inclusive.
 *
 * Context: Any context. It is safe to call this function without
 * locking in your code.
 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
 * or %-ENOSPC if there are no free IDs.
 */
static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
{
        return ida_alloc_range(ida, 0, max, gfp);
}

static inline void ida_init(struct ida *ida)
{
        xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
}

/*
 * ida_simple_get() and ida_simple_remove() are deprecated. Use
 * ida_alloc() and ida_free() instead respectively.
 */
#define ida_simple_get(ida, start, end, gfp)        \
                        ida_alloc_range(ida, start, (end) - 1, gfp)
#define ida_simple_remove(ida, id)        ida_free(ida, id)

static inline bool ida_is_empty(const struct ida *ida)
{
        return xa_empty(&ida->xa);
}
#endif /* __IDR_H__ */















































































   28 















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Internal procfs definitions
 *
 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/refcount.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/binfmts.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>

struct ctl_table_header;
struct mempolicy;

/*
 * This is not completely implemented yet. The idea is to
 * create an in-memory tree (like the actual /proc filesystem
 * tree) of these proc_dir_entries, so that we can dynamically
 * add new files to /proc.
 *
 * parent/subdir are used for the directory structure (every /proc file has a
 * parent, but "subdir" is empty for all non-directory entries).
 * subdir_node is used to build the rb tree "subdir" of the parent.
 */
struct proc_dir_entry {
        /*
         * number of callers into module in progress;
         * negative -> it's going away RSN
         */
        atomic_t in_use;
        refcount_t refcnt;
        struct list_head pde_openers;        /* who did ->open, but not ->release */
        /* protects ->pde_openers and all struct pde_opener instances */
        spinlock_t pde_unload_lock;
        struct completion *pde_unload_completion;
        const struct inode_operations *proc_iops;
        union {
                const struct proc_ops *proc_ops;
                const struct file_operations *proc_dir_ops;
        };
        const struct dentry_operations *proc_dops;
        union {
                const struct seq_operations *seq_ops;
                int (*single_show)(struct seq_file *, void *);
        };
        proc_write_t write;
        void *data;
        unsigned int state_size;
        unsigned int low_ino;
        nlink_t nlink;
        kuid_t uid;
        kgid_t gid;
        loff_t size;
        struct proc_dir_entry *parent;
        struct rb_root subdir;
        struct rb_node subdir_node;
        char *name;
        umode_t mode;
        u8 flags;
        u8 namelen;
        char inline_name[];
} __randomize_layout;

#define SIZEOF_PDE        (                                \
        sizeof(struct proc_dir_entry) < 128 ? 128 :        \
        sizeof(struct proc_dir_entry) < 192 ? 192 :        \
        sizeof(struct proc_dir_entry) < 256 ? 256 :        \
        sizeof(struct proc_dir_entry) < 512 ? 512 :        \
        0)
#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))

static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
{
        return pde->flags & PROC_ENTRY_PERMANENT;
}

static inline void pde_make_permanent(struct proc_dir_entry *pde)
{
        pde->flags |= PROC_ENTRY_PERMANENT;
}

extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);

union proc_op {
        int (*proc_get_link)(struct dentry *, struct path *);
        int (*proc_show)(struct seq_file *m,
                struct pid_namespace *ns, struct pid *pid,
                struct task_struct *task);
        int lsmid;
};

struct proc_inode {
        struct pid *pid;
        unsigned int fd;
        union proc_op op;
        struct proc_dir_entry *pde;
        struct ctl_table_header *sysctl;
        struct ctl_table *sysctl_entry;
        struct hlist_node sibling_inodes;
        const struct proc_ns_operations *ns_ops;
        struct inode vfs_inode;
} __randomize_layout;

/*
 * General functions
 */
static inline struct proc_inode *PROC_I(const struct inode *inode)
{
        return container_of(inode, struct proc_inode, vfs_inode);
}

static inline struct proc_dir_entry *PDE(const struct inode *inode)
{
        return PROC_I(inode)->pde;
}

static inline struct pid *proc_pid(const struct inode *inode)
{
        return PROC_I(inode)->pid;
}

static inline struct task_struct *get_proc_task(const struct inode *inode)
{
        return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}

void task_dump_owner(struct task_struct *task, umode_t mode,
                     kuid_t *ruid, kgid_t *rgid);

unsigned name_to_int(const struct qstr *qstr);
/*
 * Offset of the first process in the /proc root directory..
 */
#define FIRST_PROCESS_ENTRY 256

/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13

/*
 * array.c
 */
extern const struct file_operations proc_tid_children_operations;

extern void proc_task_name(struct seq_file *m, struct task_struct *p,
                           bool escape);
extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
                         struct pid *, struct task_struct *);
extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
                          struct pid *, struct task_struct *);
extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
                           struct pid *, struct task_struct *);
extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
                          struct pid *, struct task_struct *);

/*
 * base.c
 */
extern const struct dentry_operations pid_dentry_operations;
extern int pid_getattr(struct mnt_idmap *, const struct path *,
                       struct kstat *, u32, unsigned int);
extern int proc_setattr(struct mnt_idmap *, struct dentry *,
                        struct iattr *);
extern void proc_pid_evict_inode(struct proc_inode *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
extern void pid_update_inode(struct task_struct *, struct inode *);
extern int pid_delete_dentry(const struct dentry *);
extern int proc_pid_readdir(struct file *, struct dir_context *);
struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
extern loff_t mem_lseek(struct file *, loff_t, int);

/* Lookups */
typedef struct dentry *instantiate_t(struct dentry *,
                                     struct task_struct *, const void *);
bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int,
                           instantiate_t, struct task_struct *, const void *);

/*
 * generic.c
 */
struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
                struct proc_dir_entry **parent, void *data);
struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
                struct proc_dir_entry *dp);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);

static inline void pde_get(struct proc_dir_entry *pde)
{
        refcount_inc(&pde->refcnt);
}
extern void pde_put(struct proc_dir_entry *);

static inline bool is_empty_pde(const struct proc_dir_entry *pde)
{
        return S_ISDIR(pde->mode) && !pde->proc_iops;
}
extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *);

/*
 * inode.c
 */
struct pde_opener {
        struct list_head lh;
        struct file *file;
        bool closing;
        struct completion *c;
} __randomize_layout;
extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
extern const struct super_operations proc_sops;

void proc_init_kmemcache(void);
void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock);
void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern void proc_entry_rundown(struct proc_dir_entry *);

/*
 * proc_namespaces.c
 */
extern const struct inode_operations proc_ns_dir_inode_operations;
extern const struct file_operations proc_ns_dir_operations;

/*
 * proc_net.c
 */
extern const struct file_operations proc_net_operations;
extern const struct inode_operations proc_net_inode_operations;

#ifdef CONFIG_NET
extern int proc_net_init(void);
#else
static inline int proc_net_init(void) { return 0; }
#endif

/*
 * proc_self.c
 */
extern int proc_setup_self(struct super_block *);

/*
 * proc_thread_self.c
 */
extern int proc_setup_thread_self(struct super_block *);
extern void proc_thread_self_init(void);

/*
 * proc_sysctl.c
 */
#ifdef CONFIG_PROC_SYSCTL
extern int proc_sys_init(void);
extern void proc_sys_evict_inode(struct inode *inode,
                                 struct ctl_table_header *head);
#else
static inline void proc_sys_init(void) { }
static inline void proc_sys_evict_inode(struct  inode *inode,
                                        struct ctl_table_header *head) { }
#endif

/*
 * proc_tty.c
 */
#ifdef CONFIG_TTY
extern void proc_tty_init(void);
#else
static inline void proc_tty_init(void) {}
#endif

/*
 * root.c
 */
extern struct proc_dir_entry proc_root;

extern void proc_self_init(void);

/*
 * task_[no]mmu.c
 */
struct mem_size_stats;
struct proc_maps_private {
        struct inode *inode;
        struct task_struct *task;
        struct mm_struct *mm;
        struct vma_iterator iter;
#ifdef CONFIG_NUMA
        struct mempolicy *task_mempolicy;
#endif
} __randomize_layout;

struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);

extern const struct file_operations proc_pid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;

extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
                                unsigned long *, unsigned long *,
                                unsigned long *, unsigned long *);
extern void task_mem(struct seq_file *, struct mm_struct *);

extern const struct dentry_operations proc_net_dentry_ops;
static inline void pde_force_lookup(struct proc_dir_entry *pde)
{
        /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
        pde->proc_dops = &proc_net_dentry_ops;
}





















































































































































































   18 
   18 


   18 











    6 







   18 





    6 















   18 





   18 
   18 
   18 



   18 






   18 


   18 




   18 


   18 











    6 



    6 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
// SPDX-License-Identifier: GPL-2.0
/*
 * driver.c - centralized device driver management
 *
 * Copyright (c) 2002-3 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
 * Copyright (c) 2007 Novell Inc.
 */

#include <linux/device/driver.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include "base.h"

static struct device *next_device(struct klist_iter *i)
{
        struct klist_node *n = klist_next(i);
        struct device *dev = NULL;
        struct device_private *dev_prv;

        if (n) {
                dev_prv = to_device_private_driver(n);
                dev = dev_prv->device;
        }
        return dev;
}

/**
 * driver_set_override() - Helper to set or clear driver override.
 * @dev: Device to change
 * @override: Address of string to change (e.g. &device->driver_override);
 *            The contents will be freed and hold newly allocated override.
 * @s: NUL-terminated string, new driver name to force a match, pass empty
 *     string to clear it ("" or "\n", where the latter is only for sysfs
 *     interface).
 * @len: length of @s
 *
 * Helper to set or clear driver override in a device, intended for the cases
 * when the driver_override field is allocated by driver/bus code.
 *
 * Returns: 0 on success or a negative error code on failure.
 */
int driver_set_override(struct device *dev, const char **override,
                        const char *s, size_t len)
{
        const char *new, *old;
        char *cp;

        if (!override || !s)
                return -EINVAL;

        /*
         * The stored value will be used in sysfs show callback (sysfs_emit()),
         * which has a length limit of PAGE_SIZE and adds a trailing newline.
         * Thus we can store one character less to avoid truncation during sysfs
         * show.
         */
        if (len >= (PAGE_SIZE - 1))
                return -EINVAL;

        /*
         * Compute the real length of the string in case userspace sends us a
         * bunch of \0 characters like python likes to do.
         */
        len = strlen(s);

        if (!len) {
                /* Empty string passed - clear override */
                device_lock(dev);
                old = *override;
                *override = NULL;
                device_unlock(dev);
                kfree(old);

                return 0;
        }

        cp = strnchr(s, len, '\n');
        if (cp)
                len = cp - s;

        new = kstrndup(s, len, GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        device_lock(dev);
        old = *override;
        if (cp != s) {
                *override = new;
        } else {
                /* "\n" passed - clear override */
                kfree(new);
                *override = NULL;
        }
        device_unlock(dev);

        kfree(old);

        return 0;
}
EXPORT_SYMBOL_GPL(driver_set_override);

/**
 * driver_for_each_device - Iterator for devices bound to a driver.
 * @drv: Driver we're iterating.
 * @start: Device to begin with
 * @data: Data to pass to the callback.
 * @fn: Function to call for each device.
 *
 * Iterate over the @drv's list of devices calling @fn for each one.
 */
int driver_for_each_device(struct device_driver *drv, struct device *start,
                           void *data, int (*fn)(struct device *, void *))
{
        struct klist_iter i;
        struct device *dev;
        int error = 0;

        if (!drv)
                return -EINVAL;

        klist_iter_init_node(&drv->p->klist_devices, &i,
                             start ? &start->p->knode_driver : NULL);
        while (!error && (dev = next_device(&i)))
                error = fn(dev, data);
        klist_iter_exit(&i);
        return error;
}
EXPORT_SYMBOL_GPL(driver_for_each_device);

/**
 * driver_find_device - device iterator for locating a particular device.
 * @drv: The device's driver
 * @start: Device to begin with
 * @data: Data to pass to match function
 * @match: Callback function to check device
 *
 * This is similar to the driver_for_each_device() function above, but
 * it returns a reference to a device that is 'found' for later use, as
 * determined by the @match callback.
 *
 * The callback should return 0 if the device doesn't match and non-zero
 * if it does.  If the callback returns non-zero, this function will
 * return to the caller and not iterate over any more devices.
 */
struct device *driver_find_device(struct device_driver *drv,
                                  struct device *start, const void *data,
                                  int (*match)(struct device *dev, const void *data))
{
        struct klist_iter i;
        struct device *dev;

        if (!drv || !drv->p)
                return NULL;

        klist_iter_init_node(&drv->p->klist_devices, &i,
                             (start ? &start->p->knode_driver : NULL));
        while ((dev = next_device(&i)))
                if (match(dev, data) && get_device(dev))
                        break;
        klist_iter_exit(&i);
        return dev;
}
EXPORT_SYMBOL_GPL(driver_find_device);

/**
 * driver_create_file - create sysfs file for driver.
 * @drv: driver.
 * @attr: driver attribute descriptor.
 */
int driver_create_file(struct device_driver *drv,
                       const struct driver_attribute *attr)
{
        int error;

        if (drv)
                error = sysfs_create_file(&drv->p->kobj, &attr->attr);
        else
                error = -EINVAL;
        return error;
}
EXPORT_SYMBOL_GPL(driver_create_file);

/**
 * driver_remove_file - remove sysfs file for driver.
 * @drv: driver.
 * @attr: driver attribute descriptor.
 */
void driver_remove_file(struct device_driver *drv,
                        const struct driver_attribute *attr)
{
        if (drv)
                sysfs_remove_file(&drv->p->kobj, &attr->attr);
}
EXPORT_SYMBOL_GPL(driver_remove_file);

int driver_add_groups(struct device_driver *drv,
                      const struct attribute_group **groups)
{
        return sysfs_create_groups(&drv->p->kobj, groups);
}

void driver_remove_groups(struct device_driver *drv,
                          const struct attribute_group **groups)
{
        sysfs_remove_groups(&drv->p->kobj, groups);
}

/**
 * driver_register - register driver with bus
 * @drv: driver to register
 *
 * We pass off most of the work to the bus_add_driver() call,
 * since most of the things we have to do deal with the bus
 * structures.
 */
int driver_register(struct device_driver *drv)
{
        int ret;
        struct device_driver *other;

        if (!bus_is_registered(drv->bus)) {
                pr_err("Driver '%s' was unable to register with bus_type '%s' because the bus was not initialized.\n",
                           drv->name, drv->bus->name);
                return -EINVAL;
        }

        if ((drv->bus->probe && drv->probe) ||
            (drv->bus->remove && drv->remove) ||
            (drv->bus->shutdown && drv->shutdown))
                pr_warn("Driver '%s' needs updating - please use "
                        "bus_type methods\n", drv->name);

        other = driver_find(drv->name, drv->bus);
        if (other) {
                pr_err("Error: Driver '%s' is already registered, "
                        "aborting...\n", drv->name);
                return -EBUSY;
        }

        ret = bus_add_driver(drv);
        if (ret)
                return ret;
        ret = driver_add_groups(drv, drv->groups);
        if (ret) {
                bus_remove_driver(drv);
                return ret;
        }
        kobject_uevent(&drv->p->kobj, KOBJ_ADD);
        deferred_probe_extend_timeout();

        return ret;
}
EXPORT_SYMBOL_GPL(driver_register);

/**
 * driver_unregister - remove driver from system.
 * @drv: driver.
 *
 * Again, we pass off most of the work to the bus-level call.
 */
void driver_unregister(struct device_driver *drv)
{
        if (!drv || !drv->p) {
                WARN(1, "Unexpected driver unregister!\n");
                return;
        }
        driver_remove_groups(drv, drv->groups);
        bus_remove_driver(drv);
}
EXPORT_SYMBOL_GPL(driver_unregister);
















































































































































































































































































































































































    2 





    2 
    2 


















































    2 








































    2 





























    2 

























































































































































    3 
    2 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
// SPDX-License-Identifier: GPL-2.0-only
/*
 *
 *  Copyright (C) 2005 Mike Isely <isely@pobox.com>
 */

#include <linux/i2c.h>
#include <linux/module.h>
#include <media/i2c/ir-kbd-i2c.h>
#include "pvrusb2-i2c-core.h"
#include "pvrusb2-hdw-internal.h"
#include "pvrusb2-debug.h"
#include "pvrusb2-fx2-cmd.h"
#include "pvrusb2.h"

#define trace_i2c(...) pvr2_trace(PVR2_TRACE_I2C,__VA_ARGS__)

/*

  This module attempts to implement a compliant I2C adapter for the pvrusb2
  device.

*/

static unsigned int i2c_scan;
module_param(i2c_scan, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(i2c_scan,"scan i2c bus at insmod time");

static int ir_mode[PVR_NUM] = { [0 ... PVR_NUM-1] = 1 };
module_param_array(ir_mode, int, NULL, 0444);
MODULE_PARM_DESC(ir_mode,"specify: 0=disable IR reception, 1=normal IR");

static int pvr2_disable_ir_video;
module_param_named(disable_autoload_ir_video, pvr2_disable_ir_video,
                   int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(disable_autoload_ir_video,
                 "1=do not try to autoload ir_video IR receiver");

static int pvr2_i2c_write(struct pvr2_hdw *hdw, /* Context */
                          u8 i2c_addr,      /* I2C address we're talking to */
                          u8 *data,         /* Data to write */
                          u16 length)       /* Size of data to write */
{
        /* Return value - default 0 means success */
        int ret;


        if (!data) length = 0;
        if (length > (sizeof(hdw->cmd_buffer) - 3)) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Killing an I2C write to %u that is too large (desired=%u limit=%u)",
                           i2c_addr,
                           length,(unsigned int)(sizeof(hdw->cmd_buffer) - 3));
                return -ENOTSUPP;
        }

        LOCK_TAKE(hdw->ctl_lock);

        /* Clear the command buffer (likely to be paranoia) */
        memset(hdw->cmd_buffer, 0, sizeof(hdw->cmd_buffer));

        /* Set up command buffer for an I2C write */
        hdw->cmd_buffer[0] = FX2CMD_I2C_WRITE;      /* write prefix */
        hdw->cmd_buffer[1] = i2c_addr;  /* i2c addr of chip */
        hdw->cmd_buffer[2] = length;    /* length of what follows */
        if (length) memcpy(hdw->cmd_buffer + 3, data, length);

        /* Do the operation */
        ret = pvr2_send_request(hdw,
                                hdw->cmd_buffer,
                                length + 3,
                                hdw->cmd_buffer,
                                1);
        if (!ret) {
                if (hdw->cmd_buffer[0] != 8) {
                        ret = -EIO;
                        if (hdw->cmd_buffer[0] != 7) {
                                trace_i2c("unexpected status from i2_write[%d]: %d",
                                          i2c_addr,hdw->cmd_buffer[0]);
                        }
                }
        }

        LOCK_GIVE(hdw->ctl_lock);

        return ret;
}

static int pvr2_i2c_read(struct pvr2_hdw *hdw, /* Context */
                         u8 i2c_addr,       /* I2C address we're talking to */
                         u8 *data,          /* Data to write */
                         u16 dlen,          /* Size of data to write */
                         u8 *res,           /* Where to put data we read */
                         u16 rlen)          /* Amount of data to read */
{
        /* Return value - default 0 means success */
        int ret;


        if (!data) dlen = 0;
        if (dlen > (sizeof(hdw->cmd_buffer) - 4)) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Killing an I2C read to %u that has wlen too large (desired=%u limit=%u)",
                           i2c_addr,
                           dlen,(unsigned int)(sizeof(hdw->cmd_buffer) - 4));
                return -ENOTSUPP;
        }
        if (res && (rlen > (sizeof(hdw->cmd_buffer) - 1))) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Killing an I2C read to %u that has rlen too large (desired=%u limit=%u)",
                           i2c_addr,
                           rlen,(unsigned int)(sizeof(hdw->cmd_buffer) - 1));
                return -ENOTSUPP;
        }

        LOCK_TAKE(hdw->ctl_lock);

        /* Clear the command buffer (likely to be paranoia) */
        memset(hdw->cmd_buffer, 0, sizeof(hdw->cmd_buffer));

        /* Set up command buffer for an I2C write followed by a read */
        hdw->cmd_buffer[0] = FX2CMD_I2C_READ;  /* read prefix */
        hdw->cmd_buffer[1] = dlen;  /* arg length */
        hdw->cmd_buffer[2] = rlen;  /* answer length. Device will send one
                                       more byte (status). */
        hdw->cmd_buffer[3] = i2c_addr;  /* i2c addr of chip */
        if (dlen) memcpy(hdw->cmd_buffer + 4, data, dlen);

        /* Do the operation */
        ret = pvr2_send_request(hdw,
                                hdw->cmd_buffer,
                                4 + dlen,
                                hdw->cmd_buffer,
                                rlen + 1);
        if (!ret) {
                if (hdw->cmd_buffer[0] != 8) {
                        ret = -EIO;
                        if (hdw->cmd_buffer[0] != 7) {
                                trace_i2c("unexpected status from i2_read[%d]: %d",
                                          i2c_addr,hdw->cmd_buffer[0]);
                        }
                }
        }

        /* Copy back the result */
        if (res && rlen) {
                if (ret) {
                        /* Error, just blank out the return buffer */
                        memset(res, 0, rlen);
                } else {
                        memcpy(res, hdw->cmd_buffer + 1, rlen);
                }
        }

        LOCK_GIVE(hdw->ctl_lock);

        return ret;
}

/* This is the common low level entry point for doing I2C operations to the
   hardware. */
static int pvr2_i2c_basic_op(struct pvr2_hdw *hdw,
                             u8 i2c_addr,
                             u8 *wdata,
                             u16 wlen,
                             u8 *rdata,
                             u16 rlen)
{
        if (!rdata) rlen = 0;
        if (!wdata) wlen = 0;
        if (rlen || !wlen) {
                return pvr2_i2c_read(hdw,i2c_addr,wdata,wlen,rdata,rlen);
        } else {
                return pvr2_i2c_write(hdw,i2c_addr,wdata,wlen);
        }
}


/* This is a special entry point for cases of I2C transaction attempts to
   the IR receiver.  The implementation here simulates the IR receiver by
   issuing a command to the FX2 firmware and using that response to return
   what the real I2C receiver would have returned.  We use this for 24xxx
   devices, where the IR receiver chip has been removed and replaced with
   FX2 related logic. */
static int i2c_24xxx_ir(struct pvr2_hdw *hdw,
                        u8 i2c_addr,u8 *wdata,u16 wlen,u8 *rdata,u16 rlen)
{
        u8 dat[4];
        unsigned int stat;

        if (!(rlen || wlen)) {
                /* This is a probe attempt.  Just let it succeed. */
                return 0;
        }

        /* We don't understand this kind of transaction */
        if ((wlen != 0) || (rlen == 0)) return -EIO;

        if (rlen < 3) {
                /* Mike Isely <isely@pobox.com> Appears to be a probe
                   attempt from lirc.  Just fill in zeroes and return.  If
                   we try instead to do the full transaction here, then bad
                   things seem to happen within the lirc driver module
                   (version 0.8.0-7 sources from Debian, when run under
                   vanilla 2.6.17.6 kernel) - and I don't have the patience
                   to chase it down. */
                if (rlen > 0) rdata[0] = 0;
                if (rlen > 1) rdata[1] = 0;
                return 0;
        }

        /* Issue a command to the FX2 to read the IR receiver. */
        LOCK_TAKE(hdw->ctl_lock); do {
                hdw->cmd_buffer[0] = FX2CMD_GET_IR_CODE;
                stat = pvr2_send_request(hdw,
                                         hdw->cmd_buffer,1,
                                         hdw->cmd_buffer,4);
                dat[0] = hdw->cmd_buffer[0];
                dat[1] = hdw->cmd_buffer[1];
                dat[2] = hdw->cmd_buffer[2];
                dat[3] = hdw->cmd_buffer[3];
        } while (0); LOCK_GIVE(hdw->ctl_lock);

        /* Give up if that operation failed. */
        if (stat != 0) return stat;

        /* Mangle the results into something that looks like the real IR
           receiver. */
        rdata[2] = 0xc1;
        if (dat[0] != 1) {
                /* No code received. */
                rdata[0] = 0;
                rdata[1] = 0;
        } else {
                u16 val;
                /* Mash the FX2 firmware-provided IR code into something
                   that the normal i2c chip-level driver expects. */
                val = dat[1];
                val <<= 8;
                val |= dat[2];
                val >>= 1;
                val &= ~0x0003;
                val |= 0x8000;
                rdata[0] = (val >> 8) & 0xffu;
                rdata[1] = val & 0xffu;
        }

        return 0;
}

/* This is a special entry point that is entered if an I2C operation is
   attempted to a wm8775 chip on model 24xxx hardware.  Autodetect of this
   part doesn't work, but we know it is really there.  So let's look for
   the autodetect attempt and just return success if we see that. */
static int i2c_hack_wm8775(struct pvr2_hdw *hdw,
                           u8 i2c_addr,u8 *wdata,u16 wlen,u8 *rdata,u16 rlen)
{
        if (!(rlen || wlen)) {
                // This is a probe attempt.  Just let it succeed.
                return 0;
        }
        return pvr2_i2c_basic_op(hdw,i2c_addr,wdata,wlen,rdata,rlen);
}

/* This is an entry point designed to always fail any attempt to perform a
   transfer.  We use this to cause certain I2C addresses to not be
   probed. */
static int i2c_black_hole(struct pvr2_hdw *hdw,
                           u8 i2c_addr,u8 *wdata,u16 wlen,u8 *rdata,u16 rlen)
{
        return -EIO;
}

/* This is a special entry point that is entered if an I2C operation is
   attempted to a cx25840 chip on model 24xxx hardware.  This chip can
   sometimes wedge itself.  Worse still, when this happens msp3400 can
   falsely detect this part and then the system gets hosed up after msp3400
   gets confused and dies.  What we want to do here is try to keep msp3400
   away and also try to notice if the chip is wedged and send a warning to
   the system log. */
static int i2c_hack_cx25840(struct pvr2_hdw *hdw,
                            u8 i2c_addr,u8 *wdata,u16 wlen,u8 *rdata,u16 rlen)
{
        int ret;
        unsigned int subaddr;
        u8 wbuf[2];
        int state = hdw->i2c_cx25840_hack_state;

        if (!(rlen || wlen)) {
                // Probe attempt - always just succeed and don't bother the
                // hardware (this helps to make the state machine further
                // down somewhat easier).
                return 0;
        }

        if (state == 3) {
                return pvr2_i2c_basic_op(hdw,i2c_addr,wdata,wlen,rdata,rlen);
        }

        /* We're looking for the exact pattern where the revision register
           is being read.  The cx25840 module will always look at the
           revision register first.  Any other pattern of access therefore
           has to be a probe attempt from somebody else so we'll reject it.
           Normally we could just let each client just probe the part
           anyway, but when the cx25840 is wedged, msp3400 will get a false
           positive and that just screws things up... */

        if (wlen == 0) {
                switch (state) {
                case 1: subaddr = 0x0100; break;
                case 2: subaddr = 0x0101; break;
                default: goto fail;
                }
        } else if (wlen == 2) {
                subaddr = (wdata[0] << 8) | wdata[1];
                switch (subaddr) {
                case 0x0100: state = 1; break;
                case 0x0101: state = 2; break;
                default: goto fail;
                }
        } else {
                goto fail;
        }
        if (!rlen) goto success;
        state = 0;
        if (rlen != 1) goto fail;

        /* If we get to here then we have a legitimate read for one of the
           two revision bytes, so pass it through. */
        wbuf[0] = subaddr >> 8;
        wbuf[1] = subaddr;
        ret = pvr2_i2c_basic_op(hdw,i2c_addr,wbuf,2,rdata,rlen);

        if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "***WARNING*** Detected a wedged cx25840 chip; the device will not work.");
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "***WARNING*** Try power cycling the pvrusb2 device.");
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "***WARNING*** Disabling further access to the device to prevent other foul-ups.");
                // This blocks all further communication with the part.
                hdw->i2c_func[0x44] = NULL;
                pvr2_hdw_render_useless(hdw);
                goto fail;
        }

        /* Success! */
        pvr2_trace(PVR2_TRACE_CHIPS,"cx25840 appears to be OK.");
        state = 3;

 success:
        hdw->i2c_cx25840_hack_state = state;
        return 0;

 fail:
        hdw->i2c_cx25840_hack_state = state;
        return -EIO;
}

/* This is a very, very limited I2C adapter implementation.  We can only
   support what we actually know will work on the device... */
static int pvr2_i2c_xfer(struct i2c_adapter *i2c_adap,
                         struct i2c_msg msgs[],
                         int num)
{
        int ret = -ENOTSUPP;
        pvr2_i2c_func funcp = NULL;
        struct pvr2_hdw *hdw = (struct pvr2_hdw *)(i2c_adap->algo_data);

        if (!num) {
                ret = -EINVAL;
                goto done;
        }
        if (msgs[0].addr < PVR2_I2C_FUNC_CNT) {
                funcp = hdw->i2c_func[msgs[0].addr];
        }
        if (!funcp) {
                ret = -EIO;
                goto done;
        }

        if (num == 1) {
                if (msgs[0].flags & I2C_M_RD) {
                        /* Simple read */
                        u16 tcnt,bcnt,offs;
                        if (!msgs[0].len) {
                                /* Length == 0 read.  This is a probe. */
                                if (funcp(hdw,msgs[0].addr,NULL,0,NULL,0)) {
                                        ret = -EIO;
                                        goto done;
                                }
                                ret = 1;
                                goto done;
                        }
                        /* If the read is short enough we'll do the whole
                           thing atomically.  Otherwise we have no choice
                           but to break apart the reads. */
                        tcnt = msgs[0].len;
                        offs = 0;
                        while (tcnt) {
                                bcnt = tcnt;
                                if (bcnt > sizeof(hdw->cmd_buffer)-1) {
                                        bcnt = sizeof(hdw->cmd_buffer)-1;
                                }
                                if (funcp(hdw,msgs[0].addr,NULL,0,
                                          msgs[0].buf+offs,bcnt)) {
                                        ret = -EIO;
                                        goto done;
                                }
                                offs += bcnt;
                                tcnt -= bcnt;
                        }
                        ret = 1;
                        goto done;
                } else {
                        /* Simple write */
                        ret = 1;
                        if (funcp(hdw,msgs[0].addr,
                                  msgs[0].buf,msgs[0].len,NULL,0)) {
                                ret = -EIO;
                        }
                        goto done;
                }
        } else if (num == 2) {
                if (msgs[0].addr != msgs[1].addr) {
                        trace_i2c("i2c refusing 2 phase transfer with conflicting target addresses");
                        ret = -ENOTSUPP;
                        goto done;
                }
                if ((!((msgs[0].flags & I2C_M_RD))) &&
                    (msgs[1].flags & I2C_M_RD)) {
                        u16 tcnt,bcnt,wcnt,offs;
                        /* Write followed by atomic read.  If the read
                           portion is short enough we'll do the whole thing
                           atomically.  Otherwise we have no choice but to
                           break apart the reads. */
                        tcnt = msgs[1].len;
                        wcnt = msgs[0].len;
                        offs = 0;
                        while (tcnt || wcnt) {
                                bcnt = tcnt;
                                if (bcnt > sizeof(hdw->cmd_buffer)-1) {
                                        bcnt = sizeof(hdw->cmd_buffer)-1;
                                }
                                if (funcp(hdw,msgs[0].addr,
                                          msgs[0].buf,wcnt,
                                          msgs[1].buf+offs,bcnt)) {
                                        ret = -EIO;
                                        goto done;
                                }
                                offs += bcnt;
                                tcnt -= bcnt;
                                wcnt = 0;
                        }
                        ret = 2;
                        goto done;
                } else {
                        trace_i2c("i2c refusing complex transfer read0=%d read1=%d",
                                  (msgs[0].flags & I2C_M_RD),
                                  (msgs[1].flags & I2C_M_RD));
                }
        } else {
                trace_i2c("i2c refusing %d phase transfer",num);
        }

 done:
        if (pvrusb2_debug & PVR2_TRACE_I2C_TRAF) {
                unsigned int idx,offs,cnt;
                for (idx = 0; idx < num; idx++) {
                        cnt = msgs[idx].len;
                        pr_info("pvrusb2 i2c xfer %u/%u: addr=0x%x len=%d %s",
                               idx+1,num,
                               msgs[idx].addr,
                               cnt,
                               (msgs[idx].flags & I2C_M_RD ?
                                "read" : "write"));
                        if ((ret > 0) || !(msgs[idx].flags & I2C_M_RD)) {
                                if (cnt > 8) cnt = 8;
                                pr_cont(" [");
                                for (offs = 0; offs < cnt; offs++) {
                                        if (offs) pr_cont(" ");
                                        pr_cont("%02x", msgs[idx].buf[offs]);
                                }
                                if (offs < cnt) pr_cont(" ...");
                                pr_cont("]");
                        }
                        if (idx+1 == num) {
                                pr_cont(" result=%d", ret);
                        }
                        pr_cont("\n");
                }
                if (!num) {
                        pr_info("pvrusb2 i2c xfer null transfer result=%d\n",
                               ret);
                }
        }
        return ret;
}

static u32 pvr2_i2c_functionality(struct i2c_adapter *adap)
{
        return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C;
}

static const struct i2c_algorithm pvr2_i2c_algo_template = {
        .master_xfer   = pvr2_i2c_xfer,
        .functionality = pvr2_i2c_functionality,
};

static const struct i2c_adapter pvr2_i2c_adap_template = {
        .owner         = THIS_MODULE,
        .class               = 0,
};


/* Return true if device exists at given address */
static int do_i2c_probe(struct pvr2_hdw *hdw, int addr)
{
        struct i2c_msg msg[1];
        int rc;
        msg[0].addr = 0;
        msg[0].flags = I2C_M_RD;
        msg[0].len = 0;
        msg[0].buf = NULL;
        msg[0].addr = addr;
        rc = i2c_transfer(&hdw->i2c_adap, msg, ARRAY_SIZE(msg));
        return rc == 1;
}

static void do_i2c_scan(struct pvr2_hdw *hdw)
{
        int i;
        pr_info("%s: i2c scan beginning\n", hdw->name);
        for (i = 0; i < 128; i++) {
                if (do_i2c_probe(hdw, i)) {
                        pr_info("%s: i2c scan: found device @ 0x%x\n",
                               hdw->name, i);
                }
        }
        pr_info("%s: i2c scan done.\n", hdw->name);
}

static void pvr2_i2c_register_ir(struct pvr2_hdw *hdw)
{
        struct i2c_board_info info;
        struct IR_i2c_init_data *init_data = &hdw->ir_init_data;
        if (pvr2_disable_ir_video) {
                pvr2_trace(PVR2_TRACE_INFO,
                           "Automatic binding of ir_video has been disabled.");
                return;
        }
        memset(&info, 0, sizeof(struct i2c_board_info));
        switch (hdw->ir_scheme_active) {
        case PVR2_IR_SCHEME_24XXX: /* FX2-controlled IR */
        case PVR2_IR_SCHEME_29XXX: /* Original 29xxx device */
                init_data->ir_codes              = RC_MAP_HAUPPAUGE;
                init_data->internal_get_key_func = IR_KBD_GET_KEY_HAUP;
                init_data->type                  = RC_PROTO_BIT_RC5;
                init_data->name                  = hdw->hdw_desc->description;
                init_data->polling_interval      = 100; /* ms From ir-kbd-i2c */
                /* IR Receiver */
                info.addr          = 0x18;
                info.platform_data = init_data;
                strscpy(info.type, "ir_video", I2C_NAME_SIZE);
                pvr2_trace(PVR2_TRACE_INFO, "Binding %s to i2c address 0x%02x.",
                           info.type, info.addr);
                i2c_new_client_device(&hdw->i2c_adap, &info);
                break;
        case PVR2_IR_SCHEME_ZILOG:     /* HVR-1950 style */
        case PVR2_IR_SCHEME_24XXX_MCE: /* 24xxx MCE device */
                init_data->ir_codes = RC_MAP_HAUPPAUGE;
                init_data->internal_get_key_func = IR_KBD_GET_KEY_HAUP_XVR;
                init_data->type = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC6_MCE |
                                                        RC_PROTO_BIT_RC6_6A_32;
                init_data->name = hdw->hdw_desc->description;
                /* IR Transceiver */
                info.addr = 0x71;
                info.platform_data = init_data;
                strscpy(info.type, "ir_z8f0811_haup", I2C_NAME_SIZE);
                pvr2_trace(PVR2_TRACE_INFO, "Binding %s to i2c address 0x%02x.",
                           info.type, info.addr);
                i2c_new_client_device(&hdw->i2c_adap, &info);
                break;
        default:
                /* The device either doesn't support I2C-based IR or we
                   don't know (yet) how to operate IR on the device. */
                break;
        }
}

void pvr2_i2c_core_init(struct pvr2_hdw *hdw)
{
        unsigned int idx;

        /* The default action for all possible I2C addresses is just to do
           the transfer normally. */
        for (idx = 0; idx < PVR2_I2C_FUNC_CNT; idx++) {
                hdw->i2c_func[idx] = pvr2_i2c_basic_op;
        }

        /* However, deal with various special cases for 24xxx hardware. */
        if (ir_mode[hdw->unit_number] == 0) {
                pr_info("%s: IR disabled\n", hdw->name);
                hdw->i2c_func[0x18] = i2c_black_hole;
        } else if (ir_mode[hdw->unit_number] == 1) {
                if (hdw->ir_scheme_active == PVR2_IR_SCHEME_24XXX) {
                        /* Set up translation so that our IR looks like a
                           29xxx device */
                        hdw->i2c_func[0x18] = i2c_24xxx_ir;
                }
        }
        if (hdw->hdw_desc->flag_has_cx25840) {
                hdw->i2c_func[0x44] = i2c_hack_cx25840;
        }
        if (hdw->hdw_desc->flag_has_wm8775) {
                hdw->i2c_func[0x1b] = i2c_hack_wm8775;
        }

        // Configure the adapter and set up everything else related to it.
        hdw->i2c_adap = pvr2_i2c_adap_template;
        hdw->i2c_algo = pvr2_i2c_algo_template;
        strscpy(hdw->i2c_adap.name, hdw->name, sizeof(hdw->i2c_adap.name));
        hdw->i2c_adap.dev.parent = &hdw->usb_dev->dev;
        hdw->i2c_adap.algo = &hdw->i2c_algo;
        hdw->i2c_adap.algo_data = hdw;
        hdw->i2c_linked = !0;
        i2c_set_adapdata(&hdw->i2c_adap, &hdw->v4l2_dev);
        i2c_add_adapter(&hdw->i2c_adap);
        if (hdw->i2c_func[0x18] == i2c_24xxx_ir) {
                /* Probe for a different type of IR receiver on this
                   device.  This is really the only way to differentiate
                   older 24xxx devices from 24xxx variants that include an
                   IR blaster.  If the IR blaster is present, the IR
                   receiver is part of that chip and thus we must disable
                   the emulated IR receiver. */
                if (do_i2c_probe(hdw, 0x71)) {
                        pvr2_trace(PVR2_TRACE_INFO,
                                   "Device has newer IR hardware; disabling unneeded virtual IR device");
                        hdw->i2c_func[0x18] = NULL;
                        /* Remember that this is a different device... */
                        hdw->ir_scheme_active = PVR2_IR_SCHEME_24XXX_MCE;
                }
        }
        if (i2c_scan) do_i2c_scan(hdw);

        pvr2_i2c_register_ir(hdw);
}

void pvr2_i2c_core_done(struct pvr2_hdw *hdw)
{
        if (hdw->i2c_linked) {
                i2c_del_adapter(&hdw->i2c_adap);
                hdw->i2c_linked = 0;
        }
}

































































































    4 
    4 





































    4 
    4 





















    4 
    4 




















































































































































































































































































































































































































































































    4 











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/compat.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/time.h>
#include <linux/pm_qos.h>
#include <linux/io.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
#include <sound/core.h>
#include <sound/control.h>
#include <sound/info.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>
#include <sound/timer.h>
#include <sound/minors.h>
#include <linux/uio.h>
#include <linux/delay.h>

#include "pcm_local.h"

#ifdef CONFIG_SND_DEBUG
#define CREATE_TRACE_POINTS
#include "pcm_param_trace.h"
#else
#define trace_hw_mask_param_enabled()                0
#define trace_hw_interval_param_enabled()        0
#define trace_hw_mask_param(substream, type, index, prev, curr)
#define trace_hw_interval_param(substream, type, index, prev, curr)
#endif

/*
 *  Compatibility
 */

struct snd_pcm_hw_params_old {
        unsigned int flags;
        unsigned int masks[SNDRV_PCM_HW_PARAM_SUBFORMAT -
                           SNDRV_PCM_HW_PARAM_ACCESS + 1];
        struct snd_interval intervals[SNDRV_PCM_HW_PARAM_TICK_TIME -
                                        SNDRV_PCM_HW_PARAM_SAMPLE_BITS + 1];
        unsigned int rmask;
        unsigned int cmask;
        unsigned int info;
        unsigned int msbits;
        unsigned int rate_num;
        unsigned int rate_den;
        snd_pcm_uframes_t fifo_size;
        unsigned char reserved[64];
};

#ifdef CONFIG_SND_SUPPORT_OLD_API
#define SNDRV_PCM_IOCTL_HW_REFINE_OLD _IOWR('A', 0x10, struct snd_pcm_hw_params_old)
#define SNDRV_PCM_IOCTL_HW_PARAMS_OLD _IOWR('A', 0x11, struct snd_pcm_hw_params_old)

static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
                                      struct snd_pcm_hw_params_old __user * _oparams);
static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream,
                                      struct snd_pcm_hw_params_old __user * _oparams);
#endif
static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream);

/*
 *
 */

static DECLARE_RWSEM(snd_pcm_link_rwsem);

void snd_pcm_group_init(struct snd_pcm_group *group)
{
        spin_lock_init(&group->lock);
        mutex_init(&group->mutex);
        INIT_LIST_HEAD(&group->substreams);
        refcount_set(&group->refs, 1);
}

/* define group lock helpers */
#define DEFINE_PCM_GROUP_LOCK(action, mutex_action) \
static void snd_pcm_group_ ## action(struct snd_pcm_group *group, bool nonatomic) \
{ \
        if (nonatomic) \
                mutex_ ## mutex_action(&group->mutex); \
        else \
                spin_ ## action(&group->lock); \
}

DEFINE_PCM_GROUP_LOCK(lock, lock);
DEFINE_PCM_GROUP_LOCK(unlock, unlock);
DEFINE_PCM_GROUP_LOCK(lock_irq, lock);
DEFINE_PCM_GROUP_LOCK(unlock_irq, unlock);

/**
 * snd_pcm_stream_lock - Lock the PCM stream
 * @substream: PCM substream
 *
 * This locks the PCM stream's spinlock or mutex depending on the nonatomic
 * flag of the given substream.  This also takes the global link rw lock
 * (or rw sem), too, for avoiding the race with linked streams.
 */
void snd_pcm_stream_lock(struct snd_pcm_substream *substream)
{
        snd_pcm_group_lock(&substream->self_group, substream->pcm->nonatomic);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_lock);

/**
 * snd_pcm_stream_unlock - Unlock the PCM stream
 * @substream: PCM substream
 *
 * This unlocks the PCM stream that has been locked via snd_pcm_stream_lock().
 */
void snd_pcm_stream_unlock(struct snd_pcm_substream *substream)
{
        snd_pcm_group_unlock(&substream->self_group, substream->pcm->nonatomic);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);

/**
 * snd_pcm_stream_lock_irq - Lock the PCM stream
 * @substream: PCM substream
 *
 * This locks the PCM stream like snd_pcm_stream_lock() and disables the local
 * IRQ (only when nonatomic is false).  In nonatomic case, this is identical
 * as snd_pcm_stream_lock().
 */
void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
{
        snd_pcm_group_lock_irq(&substream->self_group,
                               substream->pcm->nonatomic);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);

static void snd_pcm_stream_lock_nested(struct snd_pcm_substream *substream)
{
        struct snd_pcm_group *group = &substream->self_group;

        if (substream->pcm->nonatomic)
                mutex_lock_nested(&group->mutex, SINGLE_DEPTH_NESTING);
        else
                spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING);
}

/**
 * snd_pcm_stream_unlock_irq - Unlock the PCM stream
 * @substream: PCM substream
 *
 * This is a counter-part of snd_pcm_stream_lock_irq().
 */
void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
{
        snd_pcm_group_unlock_irq(&substream->self_group,
                                 substream->pcm->nonatomic);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);

unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
{
        unsigned long flags = 0;
        if (substream->pcm->nonatomic)
                mutex_lock(&substream->self_group.mutex);
        else
                spin_lock_irqsave(&substream->self_group.lock, flags);
        return flags;
}
EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);

unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream)
{
        unsigned long flags = 0;
        if (substream->pcm->nonatomic)
                mutex_lock_nested(&substream->self_group.mutex,
                                  SINGLE_DEPTH_NESTING);
        else
                spin_lock_irqsave_nested(&substream->self_group.lock, flags,
                                         SINGLE_DEPTH_NESTING);
        return flags;
}
EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested);

/**
 * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream
 * @substream: PCM substream
 * @flags: irq flags
 *
 * This is a counter-part of snd_pcm_stream_lock_irqsave().
 */
void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
                                      unsigned long flags)
{
        if (substream->pcm->nonatomic)
                mutex_unlock(&substream->self_group.mutex);
        else
                spin_unlock_irqrestore(&substream->self_group.lock, flags);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);

/* Run PCM ioctl ops */
static int snd_pcm_ops_ioctl(struct snd_pcm_substream *substream,
                             unsigned cmd, void *arg)
{
        if (substream->ops->ioctl)
                return substream->ops->ioctl(substream, cmd, arg);
        else
                return snd_pcm_lib_ioctl(substream, cmd, arg);
}

int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info)
{
        struct snd_pcm *pcm = substream->pcm;
        struct snd_pcm_str *pstr = substream->pstr;

        memset(info, 0, sizeof(*info));
        info->card = pcm->card->number;
        info->device = pcm->device;
        info->stream = substream->stream;
        info->subdevice = substream->number;
        strscpy(info->id, pcm->id, sizeof(info->id));
        strscpy(info->name, pcm->name, sizeof(info->name));
        info->dev_class = pcm->dev_class;
        info->dev_subclass = pcm->dev_subclass;
        info->subdevices_count = pstr->substream_count;
        info->subdevices_avail = pstr->substream_count - pstr->substream_opened;
        strscpy(info->subname, substream->name, sizeof(info->subname));

        return 0;
}

int snd_pcm_info_user(struct snd_pcm_substream *substream,
                      struct snd_pcm_info __user * _info)
{
        struct snd_pcm_info *info __free(kfree) = NULL;
        int err;

        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (! info)
                return -ENOMEM;
        err = snd_pcm_info(substream, info);
        if (err >= 0) {
                if (copy_to_user(_info, info, sizeof(*info)))
                        err = -EFAULT;
        }
        return err;
}

/* macro for simplified cast */
#define PARAM_MASK_BIT(b)        (1U << (__force int)(b))

static bool hw_support_mmap(struct snd_pcm_substream *substream)
{
        struct snd_dma_buffer *dmabuf;

        if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
                return false;

        if (substream->ops->mmap || substream->ops->page)
                return true;

        dmabuf = snd_pcm_get_dma_buf(substream);
        if (!dmabuf)
                dmabuf = &substream->dma_buffer;
        switch (dmabuf->dev.type) {
        case SNDRV_DMA_TYPE_UNKNOWN:
                /* we can't know the device, so just assume that the driver does
                 * everything right
                 */
                return true;
        case SNDRV_DMA_TYPE_CONTINUOUS:
        case SNDRV_DMA_TYPE_VMALLOC:
                return true;
        default:
                return dma_can_mmap(dmabuf->dev.dev);
        }
}

static int constrain_mask_params(struct snd_pcm_substream *substream,
                                 struct snd_pcm_hw_params *params)
{
        struct snd_pcm_hw_constraints *constrs =
                                        &substream->runtime->hw_constraints;
        struct snd_mask *m;
        unsigned int k;
        struct snd_mask old_mask __maybe_unused;
        int changed;

        for (k = SNDRV_PCM_HW_PARAM_FIRST_MASK; k <= SNDRV_PCM_HW_PARAM_LAST_MASK; k++) {
                m = hw_param_mask(params, k);
                if (snd_mask_empty(m))
                        return -EINVAL;

                /* This parameter is not requested to change by a caller. */
                if (!(params->rmask & PARAM_MASK_BIT(k)))
                        continue;

                if (trace_hw_mask_param_enabled())
                        old_mask = *m;

                changed = snd_mask_refine(m, constrs_mask(constrs, k));
                if (changed < 0)
                        return changed;
                if (changed == 0)
                        continue;

                /* Set corresponding flag so that the caller gets it. */
                trace_hw_mask_param(substream, k, 0, &old_mask, m);
                params->cmask |= PARAM_MASK_BIT(k);
        }

        return 0;
}

static int constrain_interval_params(struct snd_pcm_substream *substream,
                                     struct snd_pcm_hw_params *params)
{
        struct snd_pcm_hw_constraints *constrs =
                                        &substream->runtime->hw_constraints;
        struct snd_interval *i;
        unsigned int k;
        struct snd_interval old_interval __maybe_unused;
        int changed;

        for (k = SNDRV_PCM_HW_PARAM_FIRST_INTERVAL; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++) {
                i = hw_param_interval(params, k);
                if (snd_interval_empty(i))
                        return -EINVAL;

                /* This parameter is not requested to change by a caller. */
                if (!(params->rmask & PARAM_MASK_BIT(k)))
                        continue;

                if (trace_hw_interval_param_enabled())
                        old_interval = *i;

                changed = snd_interval_refine(i, constrs_interval(constrs, k));
                if (changed < 0)
                        return changed;
                if (changed == 0)
                        continue;

                /* Set corresponding flag so that the caller gets it. */
                trace_hw_interval_param(substream, k, 0, &old_interval, i);
                params->cmask |= PARAM_MASK_BIT(k);
        }

        return 0;
}

static int constrain_params_by_rules(struct snd_pcm_substream *substream,
                                     struct snd_pcm_hw_params *params)
{
        struct snd_pcm_hw_constraints *constrs =
                                        &substream->runtime->hw_constraints;
        unsigned int k;
        unsigned int *rstamps __free(kfree) = NULL;
        unsigned int vstamps[SNDRV_PCM_HW_PARAM_LAST_INTERVAL + 1];
        unsigned int stamp;
        struct snd_pcm_hw_rule *r;
        unsigned int d;
        struct snd_mask old_mask __maybe_unused;
        struct snd_interval old_interval __maybe_unused;
        bool again;
        int changed, err = 0;

        /*
         * Each application of rule has own sequence number.
         *
         * Each member of 'rstamps' array represents the sequence number of
         * recent application of corresponding rule.
         */
        rstamps = kcalloc(constrs->rules_num, sizeof(unsigned int), GFP_KERNEL);
        if (!rstamps)
                return -ENOMEM;

        /*
         * Each member of 'vstamps' array represents the sequence number of
         * recent application of rule in which corresponding parameters were
         * changed.
         *
         * In initial state, elements corresponding to parameters requested by
         * a caller is 1. For unrequested parameters, corresponding members
         * have 0 so that the parameters are never changed anymore.
         */
        for (k = 0; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++)
                vstamps[k] = (params->rmask & PARAM_MASK_BIT(k)) ? 1 : 0;

        /* Due to the above design, actual sequence number starts at 2. */
        stamp = 2;
retry:
        /* Apply all rules in order. */
        again = false;
        for (k = 0; k < constrs->rules_num; k++) {
                r = &constrs->rules[k];

                /*
                 * Check condition bits of this rule. When the rule has
                 * some condition bits, parameter without the bits is
                 * never processed. SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP
                 * is an example of the condition bits.
                 */
                if (r->cond && !(r->cond & params->flags))
                        continue;

                /*
                 * The 'deps' array includes maximum four dependencies
                 * to SNDRV_PCM_HW_PARAM_XXXs for this rule. The fifth
                 * member of this array is a sentinel and should be
                 * negative value.
                 *
                 * This rule should be processed in this time when dependent
                 * parameters were changed at former applications of the other
                 * rules.
                 */
                for (d = 0; r->deps[d] >= 0; d++) {
                        if (vstamps[r->deps[d]] > rstamps[k])
                                break;
                }
                if (r->deps[d] < 0)
                        continue;

                if (trace_hw_mask_param_enabled()) {
                        if (hw_is_mask(r->var))
                                old_mask = *hw_param_mask(params, r->var);
                }
                if (trace_hw_interval_param_enabled()) {
                        if (hw_is_interval(r->var))
                                old_interval = *hw_param_interval(params, r->var);
                }

                changed = r->func(params, r);
                if (changed < 0)
                        return changed;

                /*
                 * When the parameter is changed, notify it to the caller
                 * by corresponding returned bit, then preparing for next
                 * iteration.
                 */
                if (changed && r->var >= 0) {
                        if (hw_is_mask(r->var)) {
                                trace_hw_mask_param(substream, r->var,
                                        k + 1, &old_mask,
                                        hw_param_mask(params, r->var));
                        }
                        if (hw_is_interval(r->var)) {
                                trace_hw_interval_param(substream, r->var,
                                        k + 1, &old_interval,
                                        hw_param_interval(params, r->var));
                        }

                        params->cmask |= PARAM_MASK_BIT(r->var);
                        vstamps[r->var] = stamp;
                        again = true;
                }

                rstamps[k] = stamp++;
        }

        /* Iterate to evaluate all rules till no parameters are changed. */
        if (again)
                goto retry;

        return err;
}

static int fixup_unreferenced_params(struct snd_pcm_substream *substream,
                                     struct snd_pcm_hw_params *params)
{
        const struct snd_interval *i;
        const struct snd_mask *m;
        struct snd_mask *m_rw;
        int err;

        if (!params->msbits) {
                i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS);
                if (snd_interval_single(i))
                        params->msbits = snd_interval_value(i);
                m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
                if (snd_mask_single(m)) {
                        snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m);
                        params->msbits = snd_pcm_format_width(format);
                }
        }

        if (params->msbits) {
                m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
                if (snd_mask_single(m)) {
                        snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m);

                        if (snd_pcm_format_linear(format) &&
                            snd_pcm_format_width(format) != params->msbits) {
                                m_rw = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT);
                                snd_mask_reset(m_rw,
                                               (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX);
                                if (snd_mask_empty(m_rw))
                                        return -EINVAL;
                        }
                }
        }

        if (!params->rate_den) {
                i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
                if (snd_interval_single(i)) {
                        params->rate_num = snd_interval_value(i);
                        params->rate_den = 1;
                }
        }

        if (!params->fifo_size) {
                m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT);
                i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_CHANNELS);
                if (snd_mask_single(m) && snd_interval_single(i)) {
                        err = snd_pcm_ops_ioctl(substream,
                                                SNDRV_PCM_IOCTL1_FIFO_SIZE,
                                                params);
                        if (err < 0)
                                return err;
                }
        }

        if (!params->info) {
                params->info = substream->runtime->hw.info;
                params->info &= ~(SNDRV_PCM_INFO_FIFO_IN_FRAMES |
                                  SNDRV_PCM_INFO_DRAIN_TRIGGER);
                if (!hw_support_mmap(substream))
                        params->info &= ~(SNDRV_PCM_INFO_MMAP |
                                          SNDRV_PCM_INFO_MMAP_VALID);
        }

        return 0;
}

int snd_pcm_hw_refine(struct snd_pcm_substream *substream,
                      struct snd_pcm_hw_params *params)
{
        int err;

        params->info = 0;
        params->fifo_size = 0;
        if (params->rmask & PARAM_MASK_BIT(SNDRV_PCM_HW_PARAM_SAMPLE_BITS))
                params->msbits = 0;
        if (params->rmask & PARAM_MASK_BIT(SNDRV_PCM_HW_PARAM_RATE)) {
                params->rate_num = 0;
                params->rate_den = 0;
        }

        err = constrain_mask_params(substream, params);
        if (err < 0)
                return err;

        err = constrain_interval_params(substream, params);
        if (err < 0)
                return err;

        err = constrain_params_by_rules(substream, params);
        if (err < 0)
                return err;

        params->rmask = 0;

        return 0;
}
EXPORT_SYMBOL(snd_pcm_hw_refine);

static int snd_pcm_hw_refine_user(struct snd_pcm_substream *substream,
                                  struct snd_pcm_hw_params __user * _params)
{
        struct snd_pcm_hw_params *params __free(kfree) = NULL;
        int err;

        params = memdup_user(_params, sizeof(*params));
        if (IS_ERR(params))
                return PTR_ERR(no_free_ptr(params));

        err = snd_pcm_hw_refine(substream, params);
        if (err < 0)
                return err;

        err = fixup_unreferenced_params(substream, params);
        if (err < 0)
                return err;

        if (copy_to_user(_params, params, sizeof(*params)))
                return -EFAULT;
        return 0;
}

static int period_to_usecs(struct snd_pcm_runtime *runtime)
{
        int usecs;

        if (! runtime->rate)
                return -1; /* invalid */

        /* take 75% of period time as the deadline */
        usecs = (750000 / runtime->rate) * runtime->period_size;
        usecs += ((750000 % runtime->rate) * runtime->period_size) /
                runtime->rate;

        return usecs;
}

static void snd_pcm_set_state(struct snd_pcm_substream *substream,
                              snd_pcm_state_t state)
{
        guard(pcm_stream_lock_irq)(substream);
        if (substream->runtime->state != SNDRV_PCM_STATE_DISCONNECTED)
                __snd_pcm_set_state(substream->runtime, state);
}

static inline void snd_pcm_timer_notify(struct snd_pcm_substream *substream,
                                        int event)
{
#ifdef CONFIG_SND_PCM_TIMER
        if (substream->timer)
                snd_timer_notify(substream->timer, event,
                                        &substream->runtime->trigger_tstamp);
#endif
}

void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq)
{
        if (substream->runtime && substream->runtime->stop_operating) {
                substream->runtime->stop_operating = false;
                if (substream->ops && substream->ops->sync_stop)
                        substream->ops->sync_stop(substream);
                else if (sync_irq && substream->pcm->card->sync_irq > 0)
                        synchronize_irq(substream->pcm->card->sync_irq);
        }
}

/**
 * snd_pcm_hw_params_choose - choose a configuration defined by @params
 * @pcm: PCM instance
 * @params: the hw_params instance
 *
 * Choose one configuration from configuration space defined by @params.
 * The configuration chosen is that obtained fixing in this order:
 * first access, first format, first subformat, min channels,
 * min rate, min period time, max buffer size, min tick time
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm,
                                    struct snd_pcm_hw_params *params)
{
        static const int vars[] = {
                SNDRV_PCM_HW_PARAM_ACCESS,
                SNDRV_PCM_HW_PARAM_FORMAT,
                SNDRV_PCM_HW_PARAM_SUBFORMAT,
                SNDRV_PCM_HW_PARAM_CHANNELS,
                SNDRV_PCM_HW_PARAM_RATE,
                SNDRV_PCM_HW_PARAM_PERIOD_TIME,
                SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
                SNDRV_PCM_HW_PARAM_TICK_TIME,
                -1
        };
        const int *v;
        struct snd_mask old_mask __maybe_unused;
        struct snd_interval old_interval __maybe_unused;
        int changed;

        for (v = vars; *v != -1; v++) {
                /* Keep old parameter to trace. */
                if (trace_hw_mask_param_enabled()) {
                        if (hw_is_mask(*v))
                                old_mask = *hw_param_mask(params, *v);
                }
                if (trace_hw_interval_param_enabled()) {
                        if (hw_is_interval(*v))
                                old_interval = *hw_param_interval(params, *v);
                }
                if (*v != SNDRV_PCM_HW_PARAM_BUFFER_SIZE)
                        changed = snd_pcm_hw_param_first(pcm, params, *v, NULL);
                else
                        changed = snd_pcm_hw_param_last(pcm, params, *v, NULL);
                if (changed < 0)
                        return changed;
                if (changed == 0)
                        continue;

                /* Trace the changed parameter. */
                if (hw_is_mask(*v)) {
                        trace_hw_mask_param(pcm, *v, 0, &old_mask,
                                            hw_param_mask(params, *v));
                }
                if (hw_is_interval(*v)) {
                        trace_hw_interval_param(pcm, *v, 0, &old_interval,
                                                hw_param_interval(params, *v));
                }
        }

        return 0;
}

/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise
 * block the further r/w operations
 */
static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime)
{
        if (!atomic_dec_unless_positive(&runtime->buffer_accessing))
                return -EBUSY;
        mutex_lock(&runtime->buffer_mutex);
        return 0; /* keep buffer_mutex, unlocked by below */
}

/* release buffer_mutex and clear r/w access flag */
static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime)
{
        mutex_unlock(&runtime->buffer_mutex);
        atomic_inc(&runtime->buffer_accessing);
}

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
#define is_oss_stream(substream)        ((substream)->oss.oss)
#else
#define is_oss_stream(substream)        false
#endif

static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
                             struct snd_pcm_hw_params *params)
{
        struct snd_pcm_runtime *runtime;
        int err, usecs;
        unsigned int bits;
        snd_pcm_uframes_t frames;

        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        err = snd_pcm_buffer_access_lock(runtime);
        if (err < 0)
                return err;
        scoped_guard(pcm_stream_lock_irq, substream) {
                switch (runtime->state) {
                case SNDRV_PCM_STATE_OPEN:
                case SNDRV_PCM_STATE_SETUP:
                case SNDRV_PCM_STATE_PREPARED:
                        if (!is_oss_stream(substream) &&
                            atomic_read(&substream->mmap_count))
                                err = -EBADFD;
                        break;
                default:
                        err = -EBADFD;
                        break;
                }
        }
        if (err)
                goto unlock;

        snd_pcm_sync_stop(substream, true);

        params->rmask = ~0U;
        err = snd_pcm_hw_refine(substream, params);
        if (err < 0)
                goto _error;

        err = snd_pcm_hw_params_choose(substream, params);
        if (err < 0)
                goto _error;

        err = fixup_unreferenced_params(substream, params);
        if (err < 0)
                goto _error;

        if (substream->managed_buffer_alloc) {
                err = snd_pcm_lib_malloc_pages(substream,
                                               params_buffer_bytes(params));
                if (err < 0)
                        goto _error;
                runtime->buffer_changed = err > 0;
        }

        if (substream->ops->hw_params != NULL) {
                err = substream->ops->hw_params(substream, params);
                if (err < 0)
                        goto _error;
        }

        runtime->access = params_access(params);
        runtime->format = params_format(params);
        runtime->subformat = params_subformat(params);
        runtime->channels = params_channels(params);
        runtime->rate = params_rate(params);
        runtime->period_size = params_period_size(params);
        runtime->periods = params_periods(params);
        runtime->buffer_size = params_buffer_size(params);
        runtime->info = params->info;
        runtime->rate_num = params->rate_num;
        runtime->rate_den = params->rate_den;
        runtime->no_period_wakeup =
                        (params->info & SNDRV_PCM_INFO_NO_PERIOD_WAKEUP) &&
                        (params->flags & SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP);

        bits = snd_pcm_format_physical_width(runtime->format);
        runtime->sample_bits = bits;
        bits *= runtime->channels;
        runtime->frame_bits = bits;
        frames = 1;
        while (bits % 8 != 0) {
                bits *= 2;
                frames *= 2;
        }
        runtime->byte_align = bits / 8;
        runtime->min_align = frames;

        /* Default sw params */
        runtime->tstamp_mode = SNDRV_PCM_TSTAMP_NONE;
        runtime->period_step = 1;
        runtime->control->avail_min = runtime->period_size;
        runtime->start_threshold = 1;
        runtime->stop_threshold = runtime->buffer_size;
        runtime->silence_threshold = 0;
        runtime->silence_size = 0;
        runtime->boundary = runtime->buffer_size;
        while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size)
                runtime->boundary *= 2;

        /* clear the buffer for avoiding possible kernel info leaks */
        if (runtime->dma_area && !substream->ops->copy) {
                size_t size = runtime->dma_bytes;

                if (runtime->info & SNDRV_PCM_INFO_MMAP)
                        size = PAGE_ALIGN(size);
                memset(runtime->dma_area, 0, size);
        }

        snd_pcm_timer_resolution_change(substream);
        snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP);

        if (cpu_latency_qos_request_active(&substream->latency_pm_qos_req))
                cpu_latency_qos_remove_request(&substream->latency_pm_qos_req);
        usecs = period_to_usecs(runtime);
        if (usecs >= 0)
                cpu_latency_qos_add_request(&substream->latency_pm_qos_req,
                                            usecs);
        err = 0;
 _error:
        if (err) {
                /* hardware might be unusable from this time,
                 * so we force application to retry to set
                 * the correct hardware parameter settings
                 */
                snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
                if (substream->ops->hw_free != NULL)
                        substream->ops->hw_free(substream);
                if (substream->managed_buffer_alloc)
                        snd_pcm_lib_free_pages(substream);
        }
 unlock:
        snd_pcm_buffer_access_unlock(runtime);
        return err;
}

static int snd_pcm_hw_params_user(struct snd_pcm_substream *substream,
                                  struct snd_pcm_hw_params __user * _params)
{
        struct snd_pcm_hw_params *params __free(kfree) = NULL;
        int err;

        params = memdup_user(_params, sizeof(*params));
        if (IS_ERR(params))
                return PTR_ERR(no_free_ptr(params));

        err = snd_pcm_hw_params(substream, params);
        if (err < 0)
                return err;

        if (copy_to_user(_params, params, sizeof(*params)))
                return -EFAULT;
        return err;
}

static int do_hw_free(struct snd_pcm_substream *substream)
{
        int result = 0;

        snd_pcm_sync_stop(substream, true);
        if (substream->ops->hw_free)
                result = substream->ops->hw_free(substream);
        if (substream->managed_buffer_alloc)
                snd_pcm_lib_free_pages(substream);
        return result;
}

static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;
        int result = 0;

        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        result = snd_pcm_buffer_access_lock(runtime);
        if (result < 0)
                return result;
        scoped_guard(pcm_stream_lock_irq, substream) {
                switch (runtime->state) {
                case SNDRV_PCM_STATE_SETUP:
                case SNDRV_PCM_STATE_PREPARED:
                        if (atomic_read(&substream->mmap_count))
                                result = -EBADFD;
                        break;
                default:
                        result = -EBADFD;
                        break;
                }
        }
        if (result)
                goto unlock;
        result = do_hw_free(substream);
        snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN);
        cpu_latency_qos_remove_request(&substream->latency_pm_qos_req);
 unlock:
        snd_pcm_buffer_access_unlock(runtime);
        return result;
}

static int snd_pcm_sw_params(struct snd_pcm_substream *substream,
                             struct snd_pcm_sw_params *params)
{
        struct snd_pcm_runtime *runtime;
        int err;

        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        scoped_guard(pcm_stream_lock_irq, substream) {
                if (runtime->state == SNDRV_PCM_STATE_OPEN)
                        return -EBADFD;
        }

        if (params->tstamp_mode < 0 ||
            params->tstamp_mode > SNDRV_PCM_TSTAMP_LAST)
                return -EINVAL;
        if (params->proto >= SNDRV_PROTOCOL_VERSION(2, 0, 12) &&
            params->tstamp_type > SNDRV_PCM_TSTAMP_TYPE_LAST)
                return -EINVAL;
        if (params->avail_min == 0)
                return -EINVAL;
        if (params->silence_size >= runtime->boundary) {
                if (params->silence_threshold != 0)
                        return -EINVAL;
        } else {
                if (params->silence_size > params->silence_threshold)
                        return -EINVAL;
                if (params->silence_threshold > runtime->buffer_size)
                        return -EINVAL;
        }
        err = 0;
        scoped_guard(pcm_stream_lock_irq, substream) {
                runtime->tstamp_mode = params->tstamp_mode;
                if (params->proto >= SNDRV_PROTOCOL_VERSION(2, 0, 12))
                        runtime->tstamp_type = params->tstamp_type;
                runtime->period_step = params->period_step;
                runtime->control->avail_min = params->avail_min;
                runtime->start_threshold = params->start_threshold;
                runtime->stop_threshold = params->stop_threshold;
                runtime->silence_threshold = params->silence_threshold;
                runtime->silence_size = params->silence_size;
                params->boundary = runtime->boundary;
                if (snd_pcm_running(substream)) {
                        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
                            runtime->silence_size > 0)
                                snd_pcm_playback_silence(substream, ULONG_MAX);
                        err = snd_pcm_update_state(substream, runtime);
                }
        }
        return err;
}

static int snd_pcm_sw_params_user(struct snd_pcm_substream *substream,
                                  struct snd_pcm_sw_params __user * _params)
{
        struct snd_pcm_sw_params params;
        int err;
        if (copy_from_user(&params, _params, sizeof(params)))
                return -EFAULT;
        err = snd_pcm_sw_params(substream, &params);
        if (copy_to_user(_params, &params, sizeof(params)))
                return -EFAULT;
        return err;
}

static inline snd_pcm_uframes_t
snd_pcm_calc_delay(struct snd_pcm_substream *substream)
{
        snd_pcm_uframes_t delay;

        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                delay = snd_pcm_playback_hw_avail(substream->runtime);
        else
                delay = snd_pcm_capture_avail(substream->runtime);
        return delay + substream->runtime->delay;
}

int snd_pcm_status64(struct snd_pcm_substream *substream,
                     struct snd_pcm_status64 *status)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        guard(pcm_stream_lock_irq)(substream);

        snd_pcm_unpack_audio_tstamp_config(status->audio_tstamp_data,
                                        &runtime->audio_tstamp_config);

        /* backwards compatible behavior */
        if (runtime->audio_tstamp_config.type_requested ==
                SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT) {
                if (runtime->hw.info & SNDRV_PCM_INFO_HAS_WALL_CLOCK)
                        runtime->audio_tstamp_config.type_requested =
                                SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK;
                else
                        runtime->audio_tstamp_config.type_requested =
                                SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT;
                runtime->audio_tstamp_report.valid = 0;
        } else
                runtime->audio_tstamp_report.valid = 1;

        status->state = runtime->state;
        status->suspended_state = runtime->suspended_state;
        if (status->state == SNDRV_PCM_STATE_OPEN)
                return 0;
        status->trigger_tstamp_sec = runtime->trigger_tstamp.tv_sec;
        status->trigger_tstamp_nsec = runtime->trigger_tstamp.tv_nsec;
        if (snd_pcm_running(substream)) {
                snd_pcm_update_hw_ptr(substream);
                if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) {
                        status->tstamp_sec = runtime->status->tstamp.tv_sec;
                        status->tstamp_nsec =
                                runtime->status->tstamp.tv_nsec;
                        status->driver_tstamp_sec =
                                runtime->driver_tstamp.tv_sec;
                        status->driver_tstamp_nsec =
                                runtime->driver_tstamp.tv_nsec;
                        status->audio_tstamp_sec =
                                runtime->status->audio_tstamp.tv_sec;
                        status->audio_tstamp_nsec =
                                runtime->status->audio_tstamp.tv_nsec;
                        if (runtime->audio_tstamp_report.valid == 1)
                                /* backwards compatibility, no report provided in COMPAT mode */
                                snd_pcm_pack_audio_tstamp_report(&status->audio_tstamp_data,
                                                                &status->audio_tstamp_accuracy,
                                                                &runtime->audio_tstamp_report);

                        goto _tstamp_end;
                }
        } else {
                /* get tstamp only in fallback mode and only if enabled */
                if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) {
                        struct timespec64 tstamp;

                        snd_pcm_gettime(runtime, &tstamp);
                        status->tstamp_sec = tstamp.tv_sec;
                        status->tstamp_nsec = tstamp.tv_nsec;
                }
        }
 _tstamp_end:
        status->appl_ptr = runtime->control->appl_ptr;
        status->hw_ptr = runtime->status->hw_ptr;
        status->avail = snd_pcm_avail(substream);
        status->delay = snd_pcm_running(substream) ?
                snd_pcm_calc_delay(substream) : 0;
        status->avail_max = runtime->avail_max;
        status->overrange = runtime->overrange;
        runtime->avail_max = 0;
        runtime->overrange = 0;
        return 0;
}

static int snd_pcm_status_user64(struct snd_pcm_substream *substream,
                                 struct snd_pcm_status64 __user * _status,
                                 bool ext)
{
        struct snd_pcm_status64 status;
        int res;

        memset(&status, 0, sizeof(status));
        /*
         * with extension, parameters are read/write,
         * get audio_tstamp_data from user,
         * ignore rest of status structure
         */
        if (ext && get_user(status.audio_tstamp_data,
                                (u32 __user *)(&_status->audio_tstamp_data)))
                return -EFAULT;
        res = snd_pcm_status64(substream, &status);
        if (res < 0)
                return res;
        if (copy_to_user(_status, &status, sizeof(status)))
                return -EFAULT;
        return 0;
}

static int snd_pcm_status_user32(struct snd_pcm_substream *substream,
                                 struct snd_pcm_status32 __user * _status,
                                 bool ext)
{
        struct snd_pcm_status64 status64;
        struct snd_pcm_status32 status32;
        int res;

        memset(&status64, 0, sizeof(status64));
        memset(&status32, 0, sizeof(status32));
        /*
         * with extension, parameters are read/write,
         * get audio_tstamp_data from user,
         * ignore rest of status structure
         */
        if (ext && get_user(status64.audio_tstamp_data,
                            (u32 __user *)(&_status->audio_tstamp_data)))
                return -EFAULT;
        res = snd_pcm_status64(substream, &status64);
        if (res < 0)
                return res;

        status32 = (struct snd_pcm_status32) {
                .state = status64.state,
                .trigger_tstamp_sec = status64.trigger_tstamp_sec,
                .trigger_tstamp_nsec = status64.trigger_tstamp_nsec,
                .tstamp_sec = status64.tstamp_sec,
                .tstamp_nsec = status64.tstamp_nsec,
                .appl_ptr = status64.appl_ptr,
                .hw_ptr = status64.hw_ptr,
                .delay = status64.delay,
                .avail = status64.avail,
                .avail_max = status64.avail_max,
                .overrange = status64.overrange,
                .suspended_state = status64.suspended_state,
                .audio_tstamp_data = status64.audio_tstamp_data,
                .audio_tstamp_sec = status64.audio_tstamp_sec,
                .audio_tstamp_nsec = status64.audio_tstamp_nsec,
                .driver_tstamp_sec = status64.audio_tstamp_sec,
                .driver_tstamp_nsec = status64.audio_tstamp_nsec,
                .audio_tstamp_accuracy = status64.audio_tstamp_accuracy,
        };

        if (copy_to_user(_status, &status32, sizeof(status32)))
                return -EFAULT;

        return 0;
}

static int snd_pcm_channel_info(struct snd_pcm_substream *substream,
                                struct snd_pcm_channel_info * info)
{
        struct snd_pcm_runtime *runtime;
        unsigned int channel;
        
        channel = info->channel;
        runtime = substream->runtime;
        scoped_guard(pcm_stream_lock_irq, substream) {
                if (runtime->state == SNDRV_PCM_STATE_OPEN)
                        return -EBADFD;
        }
        if (channel >= runtime->channels)
                return -EINVAL;
        memset(info, 0, sizeof(*info));
        info->channel = channel;
        return snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_CHANNEL_INFO, info);
}

static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream,
                                     struct snd_pcm_channel_info __user * _info)
{
        struct snd_pcm_channel_info info;
        int res;
        
        if (copy_from_user(&info, _info, sizeof(info)))
                return -EFAULT;
        res = snd_pcm_channel_info(substream, &info);
        if (res < 0)
                return res;
        if (copy_to_user(_info, &info, sizeof(info)))
                return -EFAULT;
        return 0;
}

static void snd_pcm_trigger_tstamp(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->trigger_master == NULL)
                return;
        if (runtime->trigger_master == substream) {
                if (!runtime->trigger_tstamp_latched)
                        snd_pcm_gettime(runtime, &runtime->trigger_tstamp);
        } else {
                snd_pcm_trigger_tstamp(runtime->trigger_master);
                runtime->trigger_tstamp = runtime->trigger_master->runtime->trigger_tstamp;
        }
        runtime->trigger_master = NULL;
}

#define ACTION_ARG_IGNORE        (__force snd_pcm_state_t)0

struct action_ops {
        int (*pre_action)(struct snd_pcm_substream *substream,
                          snd_pcm_state_t state);
        int (*do_action)(struct snd_pcm_substream *substream,
                         snd_pcm_state_t state);
        void (*undo_action)(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state);
        void (*post_action)(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state);
};

/*
 *  this functions is core for handling of linked stream
 *  Note: the stream state might be changed also on failure
 *  Note2: call with calling stream lock + link lock
 */
static int snd_pcm_action_group(const struct action_ops *ops,
                                struct snd_pcm_substream *substream,
                                snd_pcm_state_t state,
                                bool stream_lock)
{
        struct snd_pcm_substream *s = NULL;
        struct snd_pcm_substream *s1;
        int res = 0, depth = 1;

        snd_pcm_group_for_each_entry(s, substream) {
                if (s != substream) {
                        if (!stream_lock)
                                mutex_lock_nested(&s->runtime->buffer_mutex, depth);
                        else if (s->pcm->nonatomic)
                                mutex_lock_nested(&s->self_group.mutex, depth);
                        else
                                spin_lock_nested(&s->self_group.lock, depth);
                        depth++;
                }
                res = ops->pre_action(s, state);
                if (res < 0)
                        goto _unlock;
        }
        snd_pcm_group_for_each_entry(s, substream) {
                res = ops->do_action(s, state);
                if (res < 0) {
                        if (ops->undo_action) {
                                snd_pcm_group_for_each_entry(s1, substream) {
                                        if (s1 == s) /* failed stream */
                                                break;
                                        ops->undo_action(s1, state);
                                }
                        }
                        s = NULL; /* unlock all */
                        goto _unlock;
                }
        }
        snd_pcm_group_for_each_entry(s, substream) {
                ops->post_action(s, state);
        }
 _unlock:
        /* unlock streams */
        snd_pcm_group_for_each_entry(s1, substream) {
                if (s1 != substream) {
                        if (!stream_lock)
                                mutex_unlock(&s1->runtime->buffer_mutex);
                        else if (s1->pcm->nonatomic)
                                mutex_unlock(&s1->self_group.mutex);
                        else
                                spin_unlock(&s1->self_group.lock);
                }
                if (s1 == s)        /* end */
                        break;
        }
        return res;
}

/*
 *  Note: call with stream lock
 */
static int snd_pcm_action_single(const struct action_ops *ops,
                                 struct snd_pcm_substream *substream,
                                 snd_pcm_state_t state)
{
        int res;
        
        res = ops->pre_action(substream, state);
        if (res < 0)
                return res;
        res = ops->do_action(substream, state);
        if (res == 0)
                ops->post_action(substream, state);
        else if (ops->undo_action)
                ops->undo_action(substream, state);
        return res;
}

static void snd_pcm_group_assign(struct snd_pcm_substream *substream,
                                 struct snd_pcm_group *new_group)
{
        substream->group = new_group;
        list_move(&substream->link_list, &new_group->substreams);
}

/*
 * Unref and unlock the group, but keep the stream lock;
 * when the group becomes empty and no longer referred, destroy itself
 */
static void snd_pcm_group_unref(struct snd_pcm_group *group,
                                struct snd_pcm_substream *substream)
{
        bool do_free;

        if (!group)
                return;
        do_free = refcount_dec_and_test(&group->refs);
        snd_pcm_group_unlock(group, substream->pcm->nonatomic);
        if (do_free)
                kfree(group);
}

/*
 * Lock the group inside a stream lock and reference it;
 * return the locked group object, or NULL if not linked
 */
static struct snd_pcm_group *
snd_pcm_stream_group_ref(struct snd_pcm_substream *substream)
{
        bool nonatomic = substream->pcm->nonatomic;
        struct snd_pcm_group *group;
        bool trylock;

        for (;;) {
                if (!snd_pcm_stream_linked(substream))
                        return NULL;
                group = substream->group;
                /* block freeing the group object */
                refcount_inc(&group->refs);

                trylock = nonatomic ? mutex_trylock(&group->mutex) :
                        spin_trylock(&group->lock);
                if (trylock)
                        break; /* OK */

                /* re-lock for avoiding ABBA deadlock */
                snd_pcm_stream_unlock(substream);
                snd_pcm_group_lock(group, nonatomic);
                snd_pcm_stream_lock(substream);

                /* check the group again; the above opens a small race window */
                if (substream->group == group)
                        break; /* OK */
                /* group changed, try again */
                snd_pcm_group_unref(group, substream);
        }
        return group;
}

/*
 *  Note: call with stream lock
 */
static int snd_pcm_action(const struct action_ops *ops,
                          struct snd_pcm_substream *substream,
                          snd_pcm_state_t state)
{
        struct snd_pcm_group *group;
        int res;

        group = snd_pcm_stream_group_ref(substream);
        if (group)
                res = snd_pcm_action_group(ops, substream, state, true);
        else
                res = snd_pcm_action_single(ops, substream, state);
        snd_pcm_group_unref(group, substream);
        return res;
}

/*
 *  Note: don't use any locks before
 */
static int snd_pcm_action_lock_irq(const struct action_ops *ops,
                                   struct snd_pcm_substream *substream,
                                   snd_pcm_state_t state)
{
        guard(pcm_stream_lock_irq)(substream);
        return snd_pcm_action(ops, substream, state);
}

/*
 */
static int snd_pcm_action_nonatomic(const struct action_ops *ops,
                                    struct snd_pcm_substream *substream,
                                    snd_pcm_state_t state)
{
        int res;

        /* Guarantee the group members won't change during non-atomic action */
        guard(rwsem_read)(&snd_pcm_link_rwsem);
        res = snd_pcm_buffer_access_lock(substream->runtime);
        if (res < 0)
                return res;
        if (snd_pcm_stream_linked(substream))
                res = snd_pcm_action_group(ops, substream, state, false);
        else
                res = snd_pcm_action_single(ops, substream, state);
        snd_pcm_buffer_access_unlock(substream->runtime);
        return res;
}

/*
 * start callbacks
 */
static int snd_pcm_pre_start(struct snd_pcm_substream *substream,
                             snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->state != SNDRV_PCM_STATE_PREPARED)
                return -EBADFD;
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
            !snd_pcm_playback_data(substream))
                return -EPIPE;
        runtime->trigger_tstamp_latched = false;
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_start(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state)
{
        int err;

        if (substream->runtime->trigger_master != substream)
                return 0;
        err = substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_START);
        /* XRUN happened during the start */
        if (err == -EPIPE)
                __snd_pcm_set_state(substream->runtime, SNDRV_PCM_STATE_XRUN);
        return err;
}

static void snd_pcm_undo_start(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        if (substream->runtime->trigger_master == substream) {
                substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP);
                substream->runtime->stop_operating = true;
        }
}

static void snd_pcm_post_start(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_trigger_tstamp(substream);
        runtime->hw_ptr_jiffies = jiffies;
        runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / 
                                                            runtime->rate;
        __snd_pcm_set_state(runtime, state);
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
            runtime->silence_size > 0)
                snd_pcm_playback_silence(substream, ULONG_MAX);
        snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSTART);
}

static const struct action_ops snd_pcm_action_start = {
        .pre_action = snd_pcm_pre_start,
        .do_action = snd_pcm_do_start,
        .undo_action = snd_pcm_undo_start,
        .post_action = snd_pcm_post_start
};

/**
 * snd_pcm_start - start all linked streams
 * @substream: the PCM substream instance
 *
 * Return: Zero if successful, or a negative error code.
 * The stream lock must be acquired before calling this function.
 */
int snd_pcm_start(struct snd_pcm_substream *substream)
{
        return snd_pcm_action(&snd_pcm_action_start, substream,
                              SNDRV_PCM_STATE_RUNNING);
}

/* take the stream lock and start the streams */
static int snd_pcm_start_lock_irq(struct snd_pcm_substream *substream)
{
        return snd_pcm_action_lock_irq(&snd_pcm_action_start, substream,
                                       SNDRV_PCM_STATE_RUNNING);
}

/*
 * stop callbacks
 */
static int snd_pcm_pre_stop(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_stop(struct snd_pcm_substream *substream,
                           snd_pcm_state_t state)
{
        if (substream->runtime->trigger_master == substream &&
            snd_pcm_running(substream)) {
                substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP);
                substream->runtime->stop_operating = true;
        }
        return 0; /* unconditionally stop all substreams */
}

static void snd_pcm_post_stop(struct snd_pcm_substream *substream,
                              snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->state != state) {
                snd_pcm_trigger_tstamp(substream);
                __snd_pcm_set_state(runtime, state);
                snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSTOP);
        }
        wake_up(&runtime->sleep);
        wake_up(&runtime->tsleep);
}

static const struct action_ops snd_pcm_action_stop = {
        .pre_action = snd_pcm_pre_stop,
        .do_action = snd_pcm_do_stop,
        .post_action = snd_pcm_post_stop
};

/**
 * snd_pcm_stop - try to stop all running streams in the substream group
 * @substream: the PCM substream instance
 * @state: PCM state after stopping the stream
 *
 * The state of each stream is then changed to the given state unconditionally.
 *
 * Return: Zero if successful, or a negative error code.
 */
int snd_pcm_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state)
{
        return snd_pcm_action(&snd_pcm_action_stop, substream, state);
}
EXPORT_SYMBOL(snd_pcm_stop);

/**
 * snd_pcm_drain_done - stop the DMA only when the given stream is playback
 * @substream: the PCM substream
 *
 * After stopping, the state is changed to SETUP.
 * Unlike snd_pcm_stop(), this affects only the given stream.
 *
 * Return: Zero if successful, or a negative error code.
 */
int snd_pcm_drain_done(struct snd_pcm_substream *substream)
{
        return snd_pcm_action_single(&snd_pcm_action_stop, substream,
                                     SNDRV_PCM_STATE_SETUP);
}

/**
 * snd_pcm_stop_xrun - stop the running streams as XRUN
 * @substream: the PCM substream instance
 *
 * This stops the given running substream (and all linked substreams) as XRUN.
 * Unlike snd_pcm_stop(), this function takes the substream lock by itself.
 *
 * Return: Zero if successful, or a negative error code.
 */
int snd_pcm_stop_xrun(struct snd_pcm_substream *substream)
{
        guard(pcm_stream_lock_irqsave)(substream);
        if (substream->runtime && snd_pcm_running(substream))
                __snd_pcm_xrun(substream);
        return 0;
}
EXPORT_SYMBOL_GPL(snd_pcm_stop_xrun);

/*
 * pause callbacks: pass boolean (to start pause or resume) as state argument
 */
#define pause_pushed(state)        (__force bool)(state)

static int snd_pcm_pre_pause(struct snd_pcm_substream *substream,
                             snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (!(runtime->info & SNDRV_PCM_INFO_PAUSE))
                return -ENOSYS;
        if (pause_pushed(state)) {
                if (runtime->state != SNDRV_PCM_STATE_RUNNING)
                        return -EBADFD;
        } else if (runtime->state != SNDRV_PCM_STATE_PAUSED)
                return -EBADFD;
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_pause(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state)
{
        if (substream->runtime->trigger_master != substream)
                return 0;
        /* The jiffies check in snd_pcm_update_hw_ptr*() is done by
         * a delta between the current jiffies, this gives a large enough
         * delta, effectively to skip the check once.
         */
        substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000;
        return substream->ops->trigger(substream,
                                       pause_pushed(state) ?
                                       SNDRV_PCM_TRIGGER_PAUSE_PUSH :
                                       SNDRV_PCM_TRIGGER_PAUSE_RELEASE);
}

static void snd_pcm_undo_pause(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        if (substream->runtime->trigger_master == substream)
                substream->ops->trigger(substream,
                                        pause_pushed(state) ?
                                        SNDRV_PCM_TRIGGER_PAUSE_RELEASE :
                                        SNDRV_PCM_TRIGGER_PAUSE_PUSH);
}

static void snd_pcm_post_pause(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_trigger_tstamp(substream);
        if (pause_pushed(state)) {
                __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_PAUSED);
                snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MPAUSE);
                wake_up(&runtime->sleep);
                wake_up(&runtime->tsleep);
        } else {
                __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_RUNNING);
                snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MCONTINUE);
        }
}

static const struct action_ops snd_pcm_action_pause = {
        .pre_action = snd_pcm_pre_pause,
        .do_action = snd_pcm_do_pause,
        .undo_action = snd_pcm_undo_pause,
        .post_action = snd_pcm_post_pause
};

/*
 * Push/release the pause for all linked streams.
 */
static int snd_pcm_pause(struct snd_pcm_substream *substream, bool push)
{
        return snd_pcm_action(&snd_pcm_action_pause, substream,
                              (__force snd_pcm_state_t)push);
}

static int snd_pcm_pause_lock_irq(struct snd_pcm_substream *substream,
                                  bool push)
{
        return snd_pcm_action_lock_irq(&snd_pcm_action_pause, substream,
                                       (__force snd_pcm_state_t)push);
}

#ifdef CONFIG_PM
/* suspend callback: state argument ignored */

static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        switch (runtime->state) {
        case SNDRV_PCM_STATE_SUSPENDED:
                return -EBUSY;
        /* unresumable PCM state; return -EBUSY for skipping suspend */
        case SNDRV_PCM_STATE_OPEN:
        case SNDRV_PCM_STATE_SETUP:
        case SNDRV_PCM_STATE_DISCONNECTED:
                return -EBUSY;
        }
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_suspend(struct snd_pcm_substream *substream,
                              snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->trigger_master != substream)
                return 0;
        if (! snd_pcm_running(substream))
                return 0;
        substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND);
        runtime->stop_operating = true;
        return 0; /* suspend unconditionally */
}

static void snd_pcm_post_suspend(struct snd_pcm_substream *substream,
                                 snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_trigger_tstamp(substream);
        runtime->suspended_state = runtime->state;
        runtime->status->suspended_state = runtime->suspended_state;
        __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SUSPENDED);
        snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSUSPEND);
        wake_up(&runtime->sleep);
        wake_up(&runtime->tsleep);
}

static const struct action_ops snd_pcm_action_suspend = {
        .pre_action = snd_pcm_pre_suspend,
        .do_action = snd_pcm_do_suspend,
        .post_action = snd_pcm_post_suspend
};

/*
 * snd_pcm_suspend - trigger SUSPEND to all linked streams
 * @substream: the PCM substream
 *
 * After this call, all streams are changed to SUSPENDED state.
 *
 * Return: Zero if successful, or a negative error code.
 */
static int snd_pcm_suspend(struct snd_pcm_substream *substream)
{
        guard(pcm_stream_lock_irqsave)(substream);
        return snd_pcm_action(&snd_pcm_action_suspend, substream,
                              ACTION_ARG_IGNORE);
}

/**
 * snd_pcm_suspend_all - trigger SUSPEND to all substreams in the given pcm
 * @pcm: the PCM instance
 *
 * After this call, all streams are changed to SUSPENDED state.
 *
 * Return: Zero if successful (or @pcm is %NULL), or a negative error code.
 */
int snd_pcm_suspend_all(struct snd_pcm *pcm)
{
        struct snd_pcm_substream *substream;
        int stream, err = 0;

        if (! pcm)
                return 0;

        for_each_pcm_substream(pcm, stream, substream) {
                /* FIXME: the open/close code should lock this as well */
                if (!substream->runtime)
                        continue;

                /*
                 * Skip BE dai link PCM's that are internal and may
                 * not have their substream ops set.
                 */
                if (!substream->ops)
                        continue;

                err = snd_pcm_suspend(substream);
                if (err < 0 && err != -EBUSY)
                        return err;
        }

        for_each_pcm_substream(pcm, stream, substream)
                snd_pcm_sync_stop(substream, false);

        return 0;
}
EXPORT_SYMBOL(snd_pcm_suspend_all);

/* resume callbacks: state argument ignored */

static int snd_pcm_pre_resume(struct snd_pcm_substream *substream,
                              snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (!(runtime->info & SNDRV_PCM_INFO_RESUME))
                return -ENOSYS;
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_resume(struct snd_pcm_substream *substream,
                             snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (runtime->trigger_master != substream)
                return 0;
        /* DMA not running previously? */
        if (runtime->suspended_state != SNDRV_PCM_STATE_RUNNING &&
            (runtime->suspended_state != SNDRV_PCM_STATE_DRAINING ||
             substream->stream != SNDRV_PCM_STREAM_PLAYBACK))
                return 0;
        return substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_RESUME);
}

static void snd_pcm_undo_resume(struct snd_pcm_substream *substream,
                                snd_pcm_state_t state)
{
        if (substream->runtime->trigger_master == substream &&
            snd_pcm_running(substream))
                substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND);
}

static void snd_pcm_post_resume(struct snd_pcm_substream *substream,
                                snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_trigger_tstamp(substream);
        __snd_pcm_set_state(runtime, runtime->suspended_state);
        snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MRESUME);
}

static const struct action_ops snd_pcm_action_resume = {
        .pre_action = snd_pcm_pre_resume,
        .do_action = snd_pcm_do_resume,
        .undo_action = snd_pcm_undo_resume,
        .post_action = snd_pcm_post_resume
};

static int snd_pcm_resume(struct snd_pcm_substream *substream)
{
        return snd_pcm_action_lock_irq(&snd_pcm_action_resume, substream,
                                       ACTION_ARG_IGNORE);
}

#else

static int snd_pcm_resume(struct snd_pcm_substream *substream)
{
        return -ENOSYS;
}

#endif /* CONFIG_PM */

/*
 * xrun ioctl
 *
 * Change the RUNNING stream(s) to XRUN state.
 */
static int snd_pcm_xrun(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        guard(pcm_stream_lock_irq)(substream);
        switch (runtime->state) {
        case SNDRV_PCM_STATE_XRUN:
                return 0;        /* already there */
        case SNDRV_PCM_STATE_RUNNING:
                __snd_pcm_xrun(substream);
                return 0;
        default:
                return -EBADFD;
        }
}

/*
 * reset ioctl
 */
/* reset callbacks:  state argument ignored */
static int snd_pcm_pre_reset(struct snd_pcm_substream *substream,
                             snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        switch (runtime->state) {
        case SNDRV_PCM_STATE_RUNNING:
        case SNDRV_PCM_STATE_PREPARED:
        case SNDRV_PCM_STATE_PAUSED:
        case SNDRV_PCM_STATE_SUSPENDED:
                return 0;
        default:
                return -EBADFD;
        }
}

static int snd_pcm_do_reset(struct snd_pcm_substream *substream,
                            snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL);
        if (err < 0)
                return err;
        guard(pcm_stream_lock_irq)(substream);
        runtime->hw_ptr_base = 0;
        runtime->hw_ptr_interrupt = runtime->status->hw_ptr -
                runtime->status->hw_ptr % runtime->period_size;
        runtime->silence_start = runtime->status->hw_ptr;
        runtime->silence_filled = 0;
        return 0;
}

static void snd_pcm_post_reset(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        guard(pcm_stream_lock_irq)(substream);
        runtime->control->appl_ptr = runtime->status->hw_ptr;
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
            runtime->silence_size > 0)
                snd_pcm_playback_silence(substream, ULONG_MAX);
}

static const struct action_ops snd_pcm_action_reset = {
        .pre_action = snd_pcm_pre_reset,
        .do_action = snd_pcm_do_reset,
        .post_action = snd_pcm_post_reset
};

static int snd_pcm_reset(struct snd_pcm_substream *substream)
{
        return snd_pcm_action_nonatomic(&snd_pcm_action_reset, substream,
                                        ACTION_ARG_IGNORE);
}

/*
 * prepare ioctl
 */
/* pass f_flags as state argument */
static int snd_pcm_pre_prepare(struct snd_pcm_substream *substream,
                               snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        int f_flags = (__force int)state;

        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        if (snd_pcm_running(substream))
                return -EBUSY;
        substream->f_flags = f_flags;
        return 0;
}

static int snd_pcm_do_prepare(struct snd_pcm_substream *substream,
                              snd_pcm_state_t state)
{
        int err;
        snd_pcm_sync_stop(substream, true);
        err = substream->ops->prepare(substream);
        if (err < 0)
                return err;
        return snd_pcm_do_reset(substream, state);
}

static void snd_pcm_post_prepare(struct snd_pcm_substream *substream,
                                 snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        runtime->control->appl_ptr = runtime->status->hw_ptr;
        snd_pcm_set_state(substream, SNDRV_PCM_STATE_PREPARED);
}

static const struct action_ops snd_pcm_action_prepare = {
        .pre_action = snd_pcm_pre_prepare,
        .do_action = snd_pcm_do_prepare,
        .post_action = snd_pcm_post_prepare
};

/**
 * snd_pcm_prepare - prepare the PCM substream to be triggerable
 * @substream: the PCM substream instance
 * @file: file to refer f_flags
 *
 * Return: Zero if successful, or a negative error code.
 */
static int snd_pcm_prepare(struct snd_pcm_substream *substream,
                           struct file *file)
{
        int f_flags;

        if (file)
                f_flags = file->f_flags;
        else
                f_flags = substream->f_flags;

        scoped_guard(pcm_stream_lock_irq, substream) {
                switch (substream->runtime->state) {
                case SNDRV_PCM_STATE_PAUSED:
                        snd_pcm_pause(substream, false);
                        fallthrough;
                case SNDRV_PCM_STATE_SUSPENDED:
                        snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP);
                        break;
                }
        }

        return snd_pcm_action_nonatomic(&snd_pcm_action_prepare,
                                        substream,
                                        (__force snd_pcm_state_t)f_flags);
}

/*
 * drain ioctl
 */

/* drain init callbacks: state argument ignored */
static int snd_pcm_pre_drain_init(struct snd_pcm_substream *substream,
                                  snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        switch (runtime->state) {
        case SNDRV_PCM_STATE_OPEN:
        case SNDRV_PCM_STATE_DISCONNECTED:
        case SNDRV_PCM_STATE_SUSPENDED:
                return -EBADFD;
        }
        runtime->trigger_master = substream;
        return 0;
}

static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream,
                                 snd_pcm_state_t state)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
                switch (runtime->state) {
                case SNDRV_PCM_STATE_PREPARED:
                        /* start playback stream if possible */
                        if (! snd_pcm_playback_empty(substream)) {
                                snd_pcm_do_start(substream, SNDRV_PCM_STATE_DRAINING);
                                snd_pcm_post_start(substream, SNDRV_PCM_STATE_DRAINING);
                        } else {
                                __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SETUP);
                        }
                        break;
                case SNDRV_PCM_STATE_RUNNING:
                        __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_DRAINING);
                        break;
                case SNDRV_PCM_STATE_XRUN:
                        __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SETUP);
                        break;
                default:
                        break;
                }
        } else {
                /* stop running stream */
                if (runtime->state == SNDRV_PCM_STATE_RUNNING) {
                        snd_pcm_state_t new_state;

                        new_state = snd_pcm_capture_avail(runtime) > 0 ?
                                SNDRV_PCM_STATE_DRAINING : SNDRV_PCM_STATE_SETUP;
                        snd_pcm_do_stop(substream, new_state);
                        snd_pcm_post_stop(substream, new_state);
                }
        }

        if (runtime->state == SNDRV_PCM_STATE_DRAINING &&
            runtime->trigger_master == substream &&
            (runtime->hw.info & SNDRV_PCM_INFO_DRAIN_TRIGGER))
                return substream->ops->trigger(substream,
                                               SNDRV_PCM_TRIGGER_DRAIN);

        return 0;
}

static void snd_pcm_post_drain_init(struct snd_pcm_substream *substream,
                                    snd_pcm_state_t state)
{
}

static const struct action_ops snd_pcm_action_drain_init = {
        .pre_action = snd_pcm_pre_drain_init,
        .do_action = snd_pcm_do_drain_init,
        .post_action = snd_pcm_post_drain_init
};

/*
 * Drain the stream(s).
 * When the substream is linked, sync until the draining of all playback streams
 * is finished.
 * After this call, all streams are supposed to be either SETUP or DRAINING
 * (capture only) state.
 */
static int snd_pcm_drain(struct snd_pcm_substream *substream,
                         struct file *file)
{
        struct snd_card *card;
        struct snd_pcm_runtime *runtime;
        struct snd_pcm_substream *s;
        struct snd_pcm_group *group;
        wait_queue_entry_t wait;
        int result = 0;
        int nonblock = 0;

        card = substream->pcm->card;
        runtime = substream->runtime;

        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;

        if (file) {
                if (file->f_flags & O_NONBLOCK)
                        nonblock = 1;
        } else if (substream->f_flags & O_NONBLOCK)
                nonblock = 1;

        snd_pcm_stream_lock_irq(substream);
        /* resume pause */
        if (runtime->state == SNDRV_PCM_STATE_PAUSED)
                snd_pcm_pause(substream, false);

        /* pre-start/stop - all running streams are changed to DRAINING state */
        result = snd_pcm_action(&snd_pcm_action_drain_init, substream,
                                ACTION_ARG_IGNORE);
        if (result < 0)
                goto unlock;
        /* in non-blocking, we don't wait in ioctl but let caller poll */
        if (nonblock) {
                result = -EAGAIN;
                goto unlock;
        }

        for (;;) {
                long tout;
                struct snd_pcm_runtime *to_check;
                if (signal_pending(current)) {
                        result = -ERESTARTSYS;
                        break;
                }
                /* find a substream to drain */
                to_check = NULL;
                group = snd_pcm_stream_group_ref(substream);
                snd_pcm_group_for_each_entry(s, substream) {
                        if (s->stream != SNDRV_PCM_STREAM_PLAYBACK)
                                continue;
                        runtime = s->runtime;
                        if (runtime->state == SNDRV_PCM_STATE_DRAINING) {
                                to_check = runtime;
                                break;
                        }
                }
                snd_pcm_group_unref(group, substream);
                if (!to_check)
                        break; /* all drained */
                init_waitqueue_entry(&wait, current);
                set_current_state(TASK_INTERRUPTIBLE);
                add_wait_queue(&to_check->sleep, &wait);
                snd_pcm_stream_unlock_irq(substream);
                if (runtime->no_period_wakeup)
                        tout = MAX_SCHEDULE_TIMEOUT;
                else {
                        tout = 100;
                        if (runtime->rate) {
                                long t = runtime->buffer_size * 1100 / runtime->rate;
                                tout = max(t, tout);
                        }
                        tout = msecs_to_jiffies(tout);
                }
                tout = schedule_timeout(tout);

                snd_pcm_stream_lock_irq(substream);
                group = snd_pcm_stream_group_ref(substream);
                snd_pcm_group_for_each_entry(s, substream) {
                        if (s->runtime == to_check) {
                                remove_wait_queue(&to_check->sleep, &wait);
                                break;
                        }
                }
                snd_pcm_group_unref(group, substream);

                if (card->shutdown) {
                        result = -ENODEV;
                        break;
                }
                if (tout == 0) {
                        if (substream->runtime->state == SNDRV_PCM_STATE_SUSPENDED)
                                result = -ESTRPIPE;
                        else {
                                dev_dbg(substream->pcm->card->dev,
                                        "playback drain timeout (DMA or IRQ trouble?)\n");
                                snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP);
                                result = -EIO;
                        }
                        break;
                }
        }

 unlock:
        snd_pcm_stream_unlock_irq(substream);

        return result;
}

/*
 * drop ioctl
 *
 * Immediately put all linked substreams into SETUP state.
 */
static int snd_pcm_drop(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;
        int result = 0;
        
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;

        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;

        guard(pcm_stream_lock_irq)(substream);
        /* resume pause */
        if (runtime->state == SNDRV_PCM_STATE_PAUSED)
                snd_pcm_pause(substream, false);

        snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP);
        /* runtime->control->appl_ptr = runtime->status->hw_ptr; */

        return result;
}


static bool is_pcm_file(struct file *file)
{
        struct inode *inode = file_inode(file);
        struct snd_pcm *pcm;
        unsigned int minor;

        if (!S_ISCHR(inode->i_mode) || imajor(inode) != snd_major)
                return false;
        minor = iminor(inode);
        pcm = snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_PLAYBACK);
        if (!pcm)
                pcm = snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_CAPTURE);
        if (!pcm)
                return false;
        snd_card_unref(pcm->card);
        return true;
}

/*
 * PCM link handling
 */
static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream1;
        struct snd_pcm_group *group __free(kfree) = NULL;
        struct snd_pcm_group *target_group;
        bool nonatomic = substream->pcm->nonatomic;
        CLASS(fd, f)(fd);

        if (!f.file)
                return -EBADFD;
        if (!is_pcm_file(f.file))
                return -EBADFD;

        pcm_file = f.file->private_data;
        substream1 = pcm_file->substream;

        if (substream == substream1)
                return -EINVAL;

        group = kzalloc(sizeof(*group), GFP_KERNEL);
        if (!group)
                return -ENOMEM;
        snd_pcm_group_init(group);

        guard(rwsem_write)(&snd_pcm_link_rwsem);
        if (substream->runtime->state == SNDRV_PCM_STATE_OPEN ||
            substream->runtime->state != substream1->runtime->state ||
            substream->pcm->nonatomic != substream1->pcm->nonatomic)
                return -EBADFD;
        if (snd_pcm_stream_linked(substream1))
                return -EALREADY;

        scoped_guard(pcm_stream_lock_irq, substream) {
                if (!snd_pcm_stream_linked(substream)) {
                        snd_pcm_group_assign(substream, group);
                        group = NULL; /* assigned, don't free this one below */
                }
                target_group = substream->group;
        }

        snd_pcm_group_lock_irq(target_group, nonatomic);
        snd_pcm_stream_lock_nested(substream1);
        snd_pcm_group_assign(substream1, target_group);
        refcount_inc(&target_group->refs);
        snd_pcm_stream_unlock(substream1);
        snd_pcm_group_unlock_irq(target_group, nonatomic);
        return 0;
}

static void relink_to_local(struct snd_pcm_substream *substream)
{
        snd_pcm_stream_lock_nested(substream);
        snd_pcm_group_assign(substream, &substream->self_group);
        snd_pcm_stream_unlock(substream);
}

static int snd_pcm_unlink(struct snd_pcm_substream *substream)
{
        struct snd_pcm_group *group;
        bool nonatomic = substream->pcm->nonatomic;
        bool do_free = false;

        guard(rwsem_write)(&snd_pcm_link_rwsem);

        if (!snd_pcm_stream_linked(substream))
                return -EALREADY;

        group = substream->group;
        snd_pcm_group_lock_irq(group, nonatomic);

        relink_to_local(substream);
        refcount_dec(&group->refs);

        /* detach the last stream, too */
        if (list_is_singular(&group->substreams)) {
                relink_to_local(list_first_entry(&group->substreams,
                                                 struct snd_pcm_substream,
                                                 link_list));
                do_free = refcount_dec_and_test(&group->refs);
        }

        snd_pcm_group_unlock_irq(group, nonatomic);
        if (do_free)
                kfree(group);
        return 0;
}

/*
 * hw configurator
 */
static int snd_pcm_hw_rule_mul(struct snd_pcm_hw_params *params,
                               struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        snd_interval_mul(hw_param_interval_c(params, rule->deps[0]),
                     hw_param_interval_c(params, rule->deps[1]), &t);
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}

static int snd_pcm_hw_rule_div(struct snd_pcm_hw_params *params,
                               struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        snd_interval_div(hw_param_interval_c(params, rule->deps[0]),
                     hw_param_interval_c(params, rule->deps[1]), &t);
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}

static int snd_pcm_hw_rule_muldivk(struct snd_pcm_hw_params *params,
                                   struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        snd_interval_muldivk(hw_param_interval_c(params, rule->deps[0]),
                         hw_param_interval_c(params, rule->deps[1]),
                         (unsigned long) rule->private, &t);
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}

static int snd_pcm_hw_rule_mulkdiv(struct snd_pcm_hw_params *params,
                                   struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        snd_interval_mulkdiv(hw_param_interval_c(params, rule->deps[0]),
                         (unsigned long) rule->private,
                         hw_param_interval_c(params, rule->deps[1]), &t);
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}

static int snd_pcm_hw_rule_format(struct snd_pcm_hw_params *params,
                                  struct snd_pcm_hw_rule *rule)
{
        snd_pcm_format_t k;
        const struct snd_interval *i =
                                hw_param_interval_c(params, rule->deps[0]);
        struct snd_mask m;
        struct snd_mask *mask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
        snd_mask_any(&m);
        pcm_for_each_format(k) {
                int bits;
                if (!snd_mask_test_format(mask, k))
                        continue;
                bits = snd_pcm_format_physical_width(k);
                if (bits <= 0)
                        continue; /* ignore invalid formats */
                if ((unsigned)bits < i->min || (unsigned)bits > i->max)
                        snd_mask_reset(&m, (__force unsigned)k);
        }
        return snd_mask_refine(mask, &m);
}

static int snd_pcm_hw_rule_sample_bits(struct snd_pcm_hw_params *params,
                                       struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        snd_pcm_format_t k;

        t.min = UINT_MAX;
        t.max = 0;
        t.openmin = 0;
        t.openmax = 0;
        pcm_for_each_format(k) {
                int bits;
                if (!snd_mask_test_format(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), k))
                        continue;
                bits = snd_pcm_format_physical_width(k);
                if (bits <= 0)
                        continue; /* ignore invalid formats */
                if (t.min > (unsigned)bits)
                        t.min = bits;
                if (t.max < (unsigned)bits)
                        t.max = bits;
        }
        t.integer = 1;
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}

#if SNDRV_PCM_RATE_5512 != 1 << 0 || SNDRV_PCM_RATE_192000 != 1 << 12
#error "Change this table"
#endif

static const unsigned int rates[] = {
        5512, 8000, 11025, 16000, 22050, 32000, 44100,
        48000, 64000, 88200, 96000, 176400, 192000, 352800, 384000
};

const struct snd_pcm_hw_constraint_list snd_pcm_known_rates = {
        .count = ARRAY_SIZE(rates),
        .list = rates,
};

static int snd_pcm_hw_rule_rate(struct snd_pcm_hw_params *params,
                                struct snd_pcm_hw_rule *rule)
{
        struct snd_pcm_hardware *hw = rule->private;
        return snd_interval_list(hw_param_interval(params, rule->var),
                                 snd_pcm_known_rates.count,
                                 snd_pcm_known_rates.list, hw->rates);
}                

static int snd_pcm_hw_rule_buffer_bytes_max(struct snd_pcm_hw_params *params,
                                            struct snd_pcm_hw_rule *rule)
{
        struct snd_interval t;
        struct snd_pcm_substream *substream = rule->private;
        t.min = 0;
        t.max = substream->buffer_bytes_max;
        t.openmin = 0;
        t.openmax = 0;
        t.integer = 1;
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}                

static int snd_pcm_hw_rule_subformats(struct snd_pcm_hw_params *params,
                                      struct snd_pcm_hw_rule *rule)
{
        struct snd_mask *sfmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT);
        struct snd_mask *fmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
        u32 *subformats = rule->private;
        snd_pcm_format_t f;
        struct snd_mask m;

        snd_mask_none(&m);
        /* All PCMs support at least the default STD subformat. */
        snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_STD);

        pcm_for_each_format(f) {
                if (!snd_mask_test(fmask, (__force unsigned)f))
                        continue;

                if (f == SNDRV_PCM_FORMAT_S32_LE && *subformats)
                        m.bits[0] |= *subformats;
                else if (snd_pcm_format_linear(f))
                        snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX);
        }

        return snd_mask_refine(sfmask, &m);
}

static int snd_pcm_hw_constraint_subformats(struct snd_pcm_runtime *runtime,
                                           unsigned int cond, u32 *subformats)
{
        return snd_pcm_hw_rule_add(runtime, cond, -1,
                                   snd_pcm_hw_rule_subformats, (void *)subformats,
                                   SNDRV_PCM_HW_PARAM_SUBFORMAT,
                                   SNDRV_PCM_HW_PARAM_FORMAT, -1);
}

static int snd_pcm_hw_constraints_init(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints;
        int k, err;

        for (k = SNDRV_PCM_HW_PARAM_FIRST_MASK; k <= SNDRV_PCM_HW_PARAM_LAST_MASK; k++) {
                snd_mask_any(constrs_mask(constrs, k));
        }

        for (k = SNDRV_PCM_HW_PARAM_FIRST_INTERVAL; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++) {
                snd_interval_any(constrs_interval(constrs, k));
        }

        snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_CHANNELS));
        snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_BUFFER_SIZE));
        snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_BUFFER_BYTES));
        snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_SAMPLE_BITS));
        snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_FRAME_BITS));

        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
                                   snd_pcm_hw_rule_format, NULL,
                                   SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, 
                                  snd_pcm_hw_rule_sample_bits, NULL,
                                  SNDRV_PCM_HW_PARAM_FORMAT, 
                                  SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, 
                                  snd_pcm_hw_rule_div, NULL,
                                  SNDRV_PCM_HW_PARAM_FRAME_BITS, SNDRV_PCM_HW_PARAM_CHANNELS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, 
                                  snd_pcm_hw_rule_mul, NULL,
                                  SNDRV_PCM_HW_PARAM_SAMPLE_BITS, SNDRV_PCM_HW_PARAM_CHANNELS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_PERIOD_BYTES, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_BUFFER_BYTES, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, 
                                  snd_pcm_hw_rule_div, NULL,
                                  SNDRV_PCM_HW_PARAM_FRAME_BITS, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_PERIOD_TIME, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_BUFFER_TIME, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS, 
                                  snd_pcm_hw_rule_div, NULL,
                                  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 
                                  snd_pcm_hw_rule_div, NULL,
                                  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_PERIODS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_PERIOD_BYTES, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 
                                  snd_pcm_hw_rule_muldivk, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_PERIOD_TIME, SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 
                                  snd_pcm_hw_rule_mul, NULL,
                                  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_PERIODS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_BUFFER_BYTES, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 
                                  snd_pcm_hw_rule_muldivk, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_BUFFER_TIME, SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 
                                  snd_pcm_hw_rule_muldivk, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 
                                  snd_pcm_hw_rule_muldivk, (void*) 8,
                                  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_TIME, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                return err;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_TIME, 
                                  snd_pcm_hw_rule_mulkdiv, (void*) 1000000,
                                  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                return err;
        return 0;
}

static int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_hardware *hw = &runtime->hw;
        int err;
        unsigned int mask = 0;

        if (hw->info & SNDRV_PCM_INFO_INTERLEAVED)
                mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_RW_INTERLEAVED);
        if (hw->info & SNDRV_PCM_INFO_NONINTERLEAVED)
                mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_RW_NONINTERLEAVED);
        if (hw_support_mmap(substream)) {
                if (hw->info & SNDRV_PCM_INFO_INTERLEAVED)
                        mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_INTERLEAVED);
                if (hw->info & SNDRV_PCM_INFO_NONINTERLEAVED)
                        mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED);
                if (hw->info & SNDRV_PCM_INFO_COMPLEX)
                        mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_COMPLEX);
        }
        err = snd_pcm_hw_constraint_mask(runtime, SNDRV_PCM_HW_PARAM_ACCESS, mask);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_mask64(runtime, SNDRV_PCM_HW_PARAM_FORMAT, hw->formats);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_subformats(runtime, 0, &hw->subformats);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS,
                                           hw->channels_min, hw->channels_max);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE,
                                           hw->rate_min, hw->rate_max);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_BYTES,
                                           hw->period_bytes_min, hw->period_bytes_max);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS,
                                           hw->periods_min, hw->periods_max);
        if (err < 0)
                return err;

        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
                                           hw->period_bytes_min, hw->buffer_bytes_max);
        if (err < 0)
                return err;

        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 
                                  snd_pcm_hw_rule_buffer_bytes_max, substream,
                                  SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1);
        if (err < 0)
                return err;

        /* FIXME: remove */
        if (runtime->dma_bytes) {
                err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 0, runtime->dma_bytes);
                if (err < 0)
                        return err;
        }

        if (!(hw->rates & (SNDRV_PCM_RATE_KNOT | SNDRV_PCM_RATE_CONTINUOUS))) {
                err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, 
                                          snd_pcm_hw_rule_rate, hw,
                                          SNDRV_PCM_HW_PARAM_RATE, -1);
                if (err < 0)
                        return err;
        }

        /* FIXME: this belong to lowlevel */
        snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE);

        return 0;
}

static void pcm_release_private(struct snd_pcm_substream *substream)
{
        if (snd_pcm_stream_linked(substream))
                snd_pcm_unlink(substream);
}

void snd_pcm_release_substream(struct snd_pcm_substream *substream)
{
        substream->ref_count--;
        if (substream->ref_count > 0)
                return;

        snd_pcm_drop(substream);
        if (substream->hw_opened) {
                if (substream->runtime->state != SNDRV_PCM_STATE_OPEN)
                        do_hw_free(substream);
                substream->ops->close(substream);
                substream->hw_opened = 0;
        }
        if (cpu_latency_qos_request_active(&substream->latency_pm_qos_req))
                cpu_latency_qos_remove_request(&substream->latency_pm_qos_req);
        if (substream->pcm_release) {
                substream->pcm_release(substream);
                substream->pcm_release = NULL;
        }
        snd_pcm_detach_substream(substream);
}
EXPORT_SYMBOL(snd_pcm_release_substream);

int snd_pcm_open_substream(struct snd_pcm *pcm, int stream,
                           struct file *file,
                           struct snd_pcm_substream **rsubstream)
{
        struct snd_pcm_substream *substream;
        int err;

        err = snd_pcm_attach_substream(pcm, stream, file, &substream);
        if (err < 0)
                return err;
        if (substream->ref_count > 1) {
                *rsubstream = substream;
                return 0;
        }

        err = snd_pcm_hw_constraints_init(substream);
        if (err < 0) {
                pcm_dbg(pcm, "snd_pcm_hw_constraints_init failed\n");
                goto error;
        }

        err = substream->ops->open(substream);
        if (err < 0)
                goto error;

        substream->hw_opened = 1;

        err = snd_pcm_hw_constraints_complete(substream);
        if (err < 0) {
                pcm_dbg(pcm, "snd_pcm_hw_constraints_complete failed\n");
                goto error;
        }

        /* automatically set EXPLICIT_SYNC flag in the managed mode whenever
         * the DMA buffer requires it
         */
        if (substream->managed_buffer_alloc &&
            substream->dma_buffer.dev.need_sync)
                substream->runtime->hw.info |= SNDRV_PCM_INFO_EXPLICIT_SYNC;

        *rsubstream = substream;
        return 0;

 error:
        snd_pcm_release_substream(substream);
        return err;
}
EXPORT_SYMBOL(snd_pcm_open_substream);

static int snd_pcm_open_file(struct file *file,
                             struct snd_pcm *pcm,
                             int stream)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        int err;

        err = snd_pcm_open_substream(pcm, stream, file, &substream);
        if (err < 0)
                return err;

        pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL);
        if (pcm_file == NULL) {
                snd_pcm_release_substream(substream);
                return -ENOMEM;
        }
        pcm_file->substream = substream;
        if (substream->ref_count == 1)
                substream->pcm_release = pcm_release_private;
        file->private_data = pcm_file;

        return 0;
}

static int snd_pcm_playback_open(struct inode *inode, struct file *file)
{
        struct snd_pcm *pcm;
        int err = nonseekable_open(inode, file);
        if (err < 0)
                return err;
        pcm = snd_lookup_minor_data(iminor(inode),
                                    SNDRV_DEVICE_TYPE_PCM_PLAYBACK);
        err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK);
        if (pcm)
                snd_card_unref(pcm->card);
        return err;
}

static int snd_pcm_capture_open(struct inode *inode, struct file *file)
{
        struct snd_pcm *pcm;
        int err = nonseekable_open(inode, file);
        if (err < 0)
                return err;
        pcm = snd_lookup_minor_data(iminor(inode),
                                    SNDRV_DEVICE_TYPE_PCM_CAPTURE);
        err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE);
        if (pcm)
                snd_card_unref(pcm->card);
        return err;
}

static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
{
        int err;
        wait_queue_entry_t wait;

        if (pcm == NULL) {
                err = -ENODEV;
                goto __error1;
        }
        err = snd_card_file_add(pcm->card, file);
        if (err < 0)
                goto __error1;
        if (!try_module_get(pcm->card->module)) {
                err = -EFAULT;
                goto __error2;
        }
        init_waitqueue_entry(&wait, current);
        add_wait_queue(&pcm->open_wait, &wait);
        mutex_lock(&pcm->open_mutex);
        while (1) {
                err = snd_pcm_open_file(file, pcm, stream);
                if (err >= 0)
                        break;
                if (err == -EAGAIN) {
                        if (file->f_flags & O_NONBLOCK) {
                                err = -EBUSY;
                                break;
                        }
                } else
                        break;
                set_current_state(TASK_INTERRUPTIBLE);
                mutex_unlock(&pcm->open_mutex);
                schedule();
                mutex_lock(&pcm->open_mutex);
                if (pcm->card->shutdown) {
                        err = -ENODEV;
                        break;
                }
                if (signal_pending(current)) {
                        err = -ERESTARTSYS;
                        break;
                }
        }
        remove_wait_queue(&pcm->open_wait, &wait);
        mutex_unlock(&pcm->open_mutex);
        if (err < 0)
                goto __error;
        return err;

      __error:
        module_put(pcm->card->module);
      __error2:
              snd_card_file_remove(pcm->card, file);
      __error1:
              return err;
}

static int snd_pcm_release(struct inode *inode, struct file *file)
{
        struct snd_pcm *pcm;
        struct snd_pcm_substream *substream;
        struct snd_pcm_file *pcm_file;

        pcm_file = file->private_data;
        substream = pcm_file->substream;
        if (snd_BUG_ON(!substream))
                return -ENXIO;
        pcm = substream->pcm;

        /* block until the device gets woken up as it may touch the hardware */
        snd_power_wait(pcm->card);

        scoped_guard(mutex, &pcm->open_mutex) {
                snd_pcm_release_substream(substream);
                kfree(pcm_file);
        }
        wake_up(&pcm->open_wait);
        module_put(pcm->card->module);
        snd_card_file_remove(pcm->card, file);
        return 0;
}

/* check and update PCM state; return 0 or a negative error
 * call this inside PCM lock
 */
static int do_pcm_hwsync(struct snd_pcm_substream *substream)
{
        switch (substream->runtime->state) {
        case SNDRV_PCM_STATE_DRAINING:
                if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
                        return -EBADFD;
                fallthrough;
        case SNDRV_PCM_STATE_RUNNING:
                return snd_pcm_update_hw_ptr(substream);
        case SNDRV_PCM_STATE_PREPARED:
        case SNDRV_PCM_STATE_PAUSED:
                return 0;
        case SNDRV_PCM_STATE_SUSPENDED:
                return -ESTRPIPE;
        case SNDRV_PCM_STATE_XRUN:
                return -EPIPE;
        default:
                return -EBADFD;
        }
}

/* increase the appl_ptr; returns the processed frames or a negative error */
static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
                                          snd_pcm_uframes_t frames,
                                           snd_pcm_sframes_t avail)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_sframes_t appl_ptr;
        int ret;

        if (avail <= 0)
                return 0;
        if (frames > (snd_pcm_uframes_t)avail)
                frames = avail;
        appl_ptr = runtime->control->appl_ptr + frames;
        if (appl_ptr >= (snd_pcm_sframes_t)runtime->boundary)
                appl_ptr -= runtime->boundary;
        ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
        return ret < 0 ? ret : frames;
}

/* decrease the appl_ptr; returns the processed frames or zero for error */
static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
                                         snd_pcm_uframes_t frames,
                                         snd_pcm_sframes_t avail)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_sframes_t appl_ptr;
        int ret;

        if (avail <= 0)
                return 0;
        if (frames > (snd_pcm_uframes_t)avail)
                frames = avail;
        appl_ptr = runtime->control->appl_ptr - frames;
        if (appl_ptr < 0)
                appl_ptr += runtime->boundary;
        ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
        /* NOTE: we return zero for errors because PulseAudio gets depressed
         * upon receiving an error from rewind ioctl and stops processing
         * any longer.  Returning zero means that no rewind is done, so
         * it's not absolutely wrong to answer like that.
         */
        return ret < 0 ? 0 : frames;
}

static snd_pcm_sframes_t snd_pcm_rewind(struct snd_pcm_substream *substream,
                                        snd_pcm_uframes_t frames)
{
        snd_pcm_sframes_t ret;

        if (frames == 0)
                return 0;

        scoped_guard(pcm_stream_lock_irq, substream) {
                ret = do_pcm_hwsync(substream);
                if (!ret)
                        ret = rewind_appl_ptr(substream, frames,
                                              snd_pcm_hw_avail(substream));
        }
        if (ret >= 0)
                snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
        return ret;
}

static snd_pcm_sframes_t snd_pcm_forward(struct snd_pcm_substream *substream,
                                         snd_pcm_uframes_t frames)
{
        snd_pcm_sframes_t ret;

        if (frames == 0)
                return 0;

        scoped_guard(pcm_stream_lock_irq, substream) {
                ret = do_pcm_hwsync(substream);
                if (!ret)
                        ret = forward_appl_ptr(substream, frames,
                                               snd_pcm_avail(substream));
        }
        if (ret >= 0)
                snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
        return ret;
}

static int snd_pcm_delay(struct snd_pcm_substream *substream,
                         snd_pcm_sframes_t *delay)
{
        int err;

        scoped_guard(pcm_stream_lock_irq, substream) {
                err = do_pcm_hwsync(substream);
                if (delay && !err)
                        *delay = snd_pcm_calc_delay(substream);
        }
        snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU);

        return err;
}
                
static inline int snd_pcm_hwsync(struct snd_pcm_substream *substream)
{
        return snd_pcm_delay(substream, NULL);
}

static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream,
                            struct snd_pcm_sync_ptr __user *_sync_ptr)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_pcm_sync_ptr sync_ptr;
        volatile struct snd_pcm_mmap_status *status;
        volatile struct snd_pcm_mmap_control *control;
        int err;

        memset(&sync_ptr, 0, sizeof(sync_ptr));
        if (get_user(sync_ptr.flags, (unsigned __user *)&(_sync_ptr->flags)))
                return -EFAULT;
        if (copy_from_user(&sync_ptr.c.control, &(_sync_ptr->c.control), sizeof(struct snd_pcm_mmap_control)))
                return -EFAULT;        
        status = runtime->status;
        control = runtime->control;
        if (sync_ptr.flags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
                err = snd_pcm_hwsync(substream);
                if (err < 0)
                        return err;
        }
        scoped_guard(pcm_stream_lock_irq, substream) {
                if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL)) {
                        err = pcm_lib_apply_appl_ptr(substream,
                                                     sync_ptr.c.control.appl_ptr);
                        if (err < 0)
                                return err;
                } else {
                        sync_ptr.c.control.appl_ptr = control->appl_ptr;
                }
                if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
                        control->avail_min = sync_ptr.c.control.avail_min;
                else
                        sync_ptr.c.control.avail_min = control->avail_min;
                sync_ptr.s.status.state = status->state;
                sync_ptr.s.status.hw_ptr = status->hw_ptr;
                sync_ptr.s.status.tstamp = status->tstamp;
                sync_ptr.s.status.suspended_state = status->suspended_state;
                sync_ptr.s.status.audio_tstamp = status->audio_tstamp;
        }
        if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL))
                snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
        if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr)))
                return -EFAULT;
        return 0;
}

struct snd_pcm_mmap_status32 {
        snd_pcm_state_t state;
        s32 pad1;
        u32 hw_ptr;
        s32 tstamp_sec;
        s32 tstamp_nsec;
        snd_pcm_state_t suspended_state;
        s32 audio_tstamp_sec;
        s32 audio_tstamp_nsec;
} __packed;

struct snd_pcm_mmap_control32 {
        u32 appl_ptr;
        u32 avail_min;
};

struct snd_pcm_sync_ptr32 {
        u32 flags;
        union {
                struct snd_pcm_mmap_status32 status;
                unsigned char reserved[64];
        } s;
        union {
                struct snd_pcm_mmap_control32 control;
                unsigned char reserved[64];
        } c;
} __packed;

/* recalcuate the boundary within 32bit */
static snd_pcm_uframes_t recalculate_boundary(struct snd_pcm_runtime *runtime)
{
        snd_pcm_uframes_t boundary;

        if (! runtime->buffer_size)
                return 0;
        boundary = runtime->buffer_size;
        while (boundary * 2 <= 0x7fffffffUL - runtime->buffer_size)
                boundary *= 2;
        return boundary;
}

static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
                                         struct snd_pcm_sync_ptr32 __user *src)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        volatile struct snd_pcm_mmap_status *status;
        volatile struct snd_pcm_mmap_control *control;
        u32 sflags;
        struct snd_pcm_mmap_control scontrol;
        struct snd_pcm_mmap_status sstatus;
        snd_pcm_uframes_t boundary;
        int err;

        if (snd_BUG_ON(!runtime))
                return -EINVAL;

        if (get_user(sflags, &src->flags) ||
            get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
            get_user(scontrol.avail_min, &src->c.control.avail_min))
                return -EFAULT;
        if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
                err = snd_pcm_hwsync(substream);
                if (err < 0)
                        return err;
        }
        status = runtime->status;
        control = runtime->control;
        boundary = recalculate_boundary(runtime);
        if (! boundary)
                boundary = 0x7fffffff;
        scoped_guard(pcm_stream_lock_irq, substream) {
                /* FIXME: we should consider the boundary for the sync from app */
                if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) {
                        err = pcm_lib_apply_appl_ptr(substream,
                                                     scontrol.appl_ptr);
                        if (err < 0)
                                return err;
                } else
                        scontrol.appl_ptr = control->appl_ptr % boundary;
                if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
                        control->avail_min = scontrol.avail_min;
                else
                        scontrol.avail_min = control->avail_min;
                sstatus.state = status->state;
                sstatus.hw_ptr = status->hw_ptr % boundary;
                sstatus.tstamp = status->tstamp;
                sstatus.suspended_state = status->suspended_state;
                sstatus.audio_tstamp = status->audio_tstamp;
        }
        if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
                snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE);
        if (put_user(sstatus.state, &src->s.status.state) ||
            put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
            put_user(sstatus.tstamp.tv_sec, &src->s.status.tstamp_sec) ||
            put_user(sstatus.tstamp.tv_nsec, &src->s.status.tstamp_nsec) ||
            put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
            put_user(sstatus.audio_tstamp.tv_sec, &src->s.status.audio_tstamp_sec) ||
            put_user(sstatus.audio_tstamp.tv_nsec, &src->s.status.audio_tstamp_nsec) ||
            put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
            put_user(scontrol.avail_min, &src->c.control.avail_min))
                return -EFAULT;

        return 0;
}
#define __SNDRV_PCM_IOCTL_SYNC_PTR32 _IOWR('A', 0x23, struct snd_pcm_sync_ptr32)

static int snd_pcm_tstamp(struct snd_pcm_substream *substream, int __user *_arg)
{
        struct snd_pcm_runtime *runtime = substream->runtime;
        int arg;
        
        if (get_user(arg, _arg))
                return -EFAULT;
        if (arg < 0 || arg > SNDRV_PCM_TSTAMP_TYPE_LAST)
                return -EINVAL;
        runtime->tstamp_type = arg;
        return 0;
}

static int snd_pcm_xferi_frames_ioctl(struct snd_pcm_substream *substream,
                                      struct snd_xferi __user *_xferi)
{
        struct snd_xferi xferi;
        struct snd_pcm_runtime *runtime = substream->runtime;
        snd_pcm_sframes_t result;

        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;
        if (put_user(0, &_xferi->result))
                return -EFAULT;
        if (copy_from_user(&xferi, _xferi, sizeof(xferi)))
                return -EFAULT;
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                result = snd_pcm_lib_write(substream, xferi.buf, xferi.frames);
        else
                result = snd_pcm_lib_read(substream, xferi.buf, xferi.frames);
        if (put_user(result, &_xferi->result))
                return -EFAULT;
        return result < 0 ? result : 0;
}

static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream,
                                      struct snd_xfern __user *_xfern)
{
        struct snd_xfern xfern;
        struct snd_pcm_runtime *runtime = substream->runtime;
        void *bufs __free(kfree) = NULL;
        snd_pcm_sframes_t result;

        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;
        if (runtime->channels > 128)
                return -EINVAL;
        if (put_user(0, &_xfern->result))
                return -EFAULT;
        if (copy_from_user(&xfern, _xfern, sizeof(xfern)))
                return -EFAULT;

        bufs = memdup_user(xfern.bufs, sizeof(void *) * runtime->channels);
        if (IS_ERR(bufs))
                return PTR_ERR(no_free_ptr(bufs));
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                result = snd_pcm_lib_writev(substream, bufs, xfern.frames);
        else
                result = snd_pcm_lib_readv(substream, bufs, xfern.frames);
        if (put_user(result, &_xfern->result))
                return -EFAULT;
        return result < 0 ? result : 0;
}

static int snd_pcm_rewind_ioctl(struct snd_pcm_substream *substream,
                                snd_pcm_uframes_t __user *_frames)
{
        snd_pcm_uframes_t frames;
        snd_pcm_sframes_t result;

        if (get_user(frames, _frames))
                return -EFAULT;
        if (put_user(0, _frames))
                return -EFAULT;
        result = snd_pcm_rewind(substream, frames);
        if (put_user(result, _frames))
                return -EFAULT;
        return result < 0 ? result : 0;
}

static int snd_pcm_forward_ioctl(struct snd_pcm_substream *substream,
                                 snd_pcm_uframes_t __user *_frames)
{
        snd_pcm_uframes_t frames;
        snd_pcm_sframes_t result;

        if (get_user(frames, _frames))
                return -EFAULT;
        if (put_user(0, _frames))
                return -EFAULT;
        result = snd_pcm_forward(substream, frames);
        if (put_user(result, _frames))
                return -EFAULT;
        return result < 0 ? result : 0;
}

static int snd_pcm_common_ioctl(struct file *file,
                                 struct snd_pcm_substream *substream,
                                 unsigned int cmd, void __user *arg)
{
        struct snd_pcm_file *pcm_file = file->private_data;
        int res;

        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;

        if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;

        res = snd_power_wait(substream->pcm->card);
        if (res < 0)
                return res;

        switch (cmd) {
        case SNDRV_PCM_IOCTL_PVERSION:
                return put_user(SNDRV_PCM_VERSION, (int __user *)arg) ? -EFAULT : 0;
        case SNDRV_PCM_IOCTL_INFO:
                return snd_pcm_info_user(substream, arg);
        case SNDRV_PCM_IOCTL_TSTAMP:        /* just for compatibility */
                return 0;
        case SNDRV_PCM_IOCTL_TTSTAMP:
                return snd_pcm_tstamp(substream, arg);
        case SNDRV_PCM_IOCTL_USER_PVERSION:
                if (get_user(pcm_file->user_pversion,
                             (unsigned int __user *)arg))
                        return -EFAULT;
                return 0;
        case SNDRV_PCM_IOCTL_HW_REFINE:
                return snd_pcm_hw_refine_user(substream, arg);
        case SNDRV_PCM_IOCTL_HW_PARAMS:
                return snd_pcm_hw_params_user(substream, arg);
        case SNDRV_PCM_IOCTL_HW_FREE:
                return snd_pcm_hw_free(substream);
        case SNDRV_PCM_IOCTL_SW_PARAMS:
                return snd_pcm_sw_params_user(substream, arg);
        case SNDRV_PCM_IOCTL_STATUS32:
                return snd_pcm_status_user32(substream, arg, false);
        case SNDRV_PCM_IOCTL_STATUS_EXT32:
                return snd_pcm_status_user32(substream, arg, true);
        case SNDRV_PCM_IOCTL_STATUS64:
                return snd_pcm_status_user64(substream, arg, false);
        case SNDRV_PCM_IOCTL_STATUS_EXT64:
                return snd_pcm_status_user64(substream, arg, true);
        case SNDRV_PCM_IOCTL_CHANNEL_INFO:
                return snd_pcm_channel_info_user(substream, arg);
        case SNDRV_PCM_IOCTL_PREPARE:
                return snd_pcm_prepare(substream, file);
        case SNDRV_PCM_IOCTL_RESET:
                return snd_pcm_reset(substream);
        case SNDRV_PCM_IOCTL_START:
                return snd_pcm_start_lock_irq(substream);
        case SNDRV_PCM_IOCTL_LINK:
                return snd_pcm_link(substream, (int)(unsigned long) arg);
        case SNDRV_PCM_IOCTL_UNLINK:
                return snd_pcm_unlink(substream);
        case SNDRV_PCM_IOCTL_RESUME:
                return snd_pcm_resume(substream);
        case SNDRV_PCM_IOCTL_XRUN:
                return snd_pcm_xrun(substream);
        case SNDRV_PCM_IOCTL_HWSYNC:
                return snd_pcm_hwsync(substream);
        case SNDRV_PCM_IOCTL_DELAY:
        {
                snd_pcm_sframes_t delay = 0;
                snd_pcm_sframes_t __user *res = arg;
                int err;

                err = snd_pcm_delay(substream, &delay);
                if (err)
                        return err;
                if (put_user(delay, res))
                        return -EFAULT;
                return 0;
        }
        case __SNDRV_PCM_IOCTL_SYNC_PTR32:
                return snd_pcm_ioctl_sync_ptr_compat(substream, arg);
        case __SNDRV_PCM_IOCTL_SYNC_PTR64:
                return snd_pcm_sync_ptr(substream, arg);
#ifdef CONFIG_SND_SUPPORT_OLD_API
        case SNDRV_PCM_IOCTL_HW_REFINE_OLD:
                return snd_pcm_hw_refine_old_user(substream, arg);
        case SNDRV_PCM_IOCTL_HW_PARAMS_OLD:
                return snd_pcm_hw_params_old_user(substream, arg);
#endif
        case SNDRV_PCM_IOCTL_DRAIN:
                return snd_pcm_drain(substream, file);
        case SNDRV_PCM_IOCTL_DROP:
                return snd_pcm_drop(substream);
        case SNDRV_PCM_IOCTL_PAUSE:
                return snd_pcm_pause_lock_irq(substream, (unsigned long)arg);
        case SNDRV_PCM_IOCTL_WRITEI_FRAMES:
        case SNDRV_PCM_IOCTL_READI_FRAMES:
                return snd_pcm_xferi_frames_ioctl(substream, arg);
        case SNDRV_PCM_IOCTL_WRITEN_FRAMES:
        case SNDRV_PCM_IOCTL_READN_FRAMES:
                return snd_pcm_xfern_frames_ioctl(substream, arg);
        case SNDRV_PCM_IOCTL_REWIND:
                return snd_pcm_rewind_ioctl(substream, arg);
        case SNDRV_PCM_IOCTL_FORWARD:
                return snd_pcm_forward_ioctl(substream, arg);
        }
        pcm_dbg(substream->pcm, "unknown ioctl = 0x%x\n", cmd);
        return -ENOTTY;
}

static long snd_pcm_ioctl(struct file *file, unsigned int cmd,
                          unsigned long arg)
{
        struct snd_pcm_file *pcm_file;

        pcm_file = file->private_data;

        if (((cmd >> 8) & 0xff) != 'A')
                return -ENOTTY;

        return snd_pcm_common_ioctl(file, pcm_file->substream, cmd,
                                     (void __user *)arg);
}

/**
 * snd_pcm_kernel_ioctl - Execute PCM ioctl in the kernel-space
 * @substream: PCM substream
 * @cmd: IOCTL cmd
 * @arg: IOCTL argument
 *
 * The function is provided primarily for OSS layer and USB gadget drivers,
 * and it allows only the limited set of ioctls (hw_params, sw_params,
 * prepare, start, drain, drop, forward).
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream,
                         unsigned int cmd, void *arg)
{
        snd_pcm_uframes_t *frames = arg;
        snd_pcm_sframes_t result;
        
        if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;

        switch (cmd) {
        case SNDRV_PCM_IOCTL_FORWARD:
        {
                /* provided only for OSS; capture-only and no value returned */
                if (substream->stream != SNDRV_PCM_STREAM_CAPTURE)
                        return -EINVAL;
                result = snd_pcm_forward(substream, *frames);
                return result < 0 ? result : 0;
        }
        case SNDRV_PCM_IOCTL_HW_PARAMS:
                return snd_pcm_hw_params(substream, arg);
        case SNDRV_PCM_IOCTL_SW_PARAMS:
                return snd_pcm_sw_params(substream, arg);
        case SNDRV_PCM_IOCTL_PREPARE:
                return snd_pcm_prepare(substream, NULL);
        case SNDRV_PCM_IOCTL_START:
                return snd_pcm_start_lock_irq(substream);
        case SNDRV_PCM_IOCTL_DRAIN:
                return snd_pcm_drain(substream, NULL);
        case SNDRV_PCM_IOCTL_DROP:
                return snd_pcm_drop(substream);
        case SNDRV_PCM_IOCTL_DELAY:
                return snd_pcm_delay(substream, frames);
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL(snd_pcm_kernel_ioctl);

static ssize_t snd_pcm_read(struct file *file, char __user *buf, size_t count,
                            loff_t * offset)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        snd_pcm_sframes_t result;

        pcm_file = file->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        if (!frame_aligned(runtime, count))
                return -EINVAL;
        count = bytes_to_frames(runtime, count);
        result = snd_pcm_lib_read(substream, buf, count);
        if (result > 0)
                result = frames_to_bytes(runtime, result);
        return result;
}

static ssize_t snd_pcm_write(struct file *file, const char __user *buf,
                             size_t count, loff_t * offset)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        snd_pcm_sframes_t result;

        pcm_file = file->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        if (!frame_aligned(runtime, count))
                return -EINVAL;
        count = bytes_to_frames(runtime, count);
        result = snd_pcm_lib_write(substream, buf, count);
        if (result > 0)
                result = frames_to_bytes(runtime, result);
        return result;
}

static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        snd_pcm_sframes_t result;
        unsigned long i;
        void __user **bufs __free(kfree) = NULL;
        snd_pcm_uframes_t frames;
        const struct iovec *iov = iter_iov(to);

        pcm_file = iocb->ki_filp->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        if (!user_backed_iter(to))
                return -EINVAL;
        if (to->nr_segs > 1024 || to->nr_segs != runtime->channels)
                return -EINVAL;
        if (!frame_aligned(runtime, iov->iov_len))
                return -EINVAL;
        frames = bytes_to_samples(runtime, iov->iov_len);
        bufs = kmalloc_array(to->nr_segs, sizeof(void *), GFP_KERNEL);
        if (bufs == NULL)
                return -ENOMEM;
        for (i = 0; i < to->nr_segs; ++i) {
                bufs[i] = iov->iov_base;
                iov++;
        }
        result = snd_pcm_lib_readv(substream, bufs, frames);
        if (result > 0)
                result = frames_to_bytes(runtime, result);
        return result;
}

static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        snd_pcm_sframes_t result;
        unsigned long i;
        void __user **bufs __free(kfree) = NULL;
        snd_pcm_uframes_t frames;
        const struct iovec *iov = iter_iov(from);

        pcm_file = iocb->ki_filp->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN ||
            runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        if (!user_backed_iter(from))
                return -EINVAL;
        if (from->nr_segs > 128 || from->nr_segs != runtime->channels ||
            !frame_aligned(runtime, iov->iov_len))
                return -EINVAL;
        frames = bytes_to_samples(runtime, iov->iov_len);
        bufs = kmalloc_array(from->nr_segs, sizeof(void *), GFP_KERNEL);
        if (bufs == NULL)
                return -ENOMEM;
        for (i = 0; i < from->nr_segs; ++i) {
                bufs[i] = iov->iov_base;
                iov++;
        }
        result = snd_pcm_lib_writev(substream, bufs, frames);
        if (result > 0)
                result = frames_to_bytes(runtime, result);
        return result;
}

static __poll_t snd_pcm_poll(struct file *file, poll_table *wait)
{
        struct snd_pcm_file *pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        __poll_t mask, ok;
        snd_pcm_uframes_t avail;

        pcm_file = file->private_data;

        substream = pcm_file->substream;
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
                ok = EPOLLOUT | EPOLLWRNORM;
        else
                ok = EPOLLIN | EPOLLRDNORM;
        if (PCM_RUNTIME_CHECK(substream))
                return ok | EPOLLERR;

        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return ok | EPOLLERR;

        poll_wait(file, &runtime->sleep, wait);

        mask = 0;
        guard(pcm_stream_lock_irq)(substream);
        avail = snd_pcm_avail(substream);
        switch (runtime->state) {
        case SNDRV_PCM_STATE_RUNNING:
        case SNDRV_PCM_STATE_PREPARED:
        case SNDRV_PCM_STATE_PAUSED:
                if (avail >= runtime->control->avail_min)
                        mask = ok;
                break;
        case SNDRV_PCM_STATE_DRAINING:
                if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
                        mask = ok;
                        if (!avail)
                                mask |= EPOLLERR;
                }
                break;
        default:
                mask = ok | EPOLLERR;
                break;
        }
        return mask;
}

/*
 * mmap support
 */

/*
 * Only on coherent architectures, we can mmap the status and the control records
 * for effcient data transfer.  On others, we have to use HWSYNC ioctl...
 */
#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_ALPHA)
/*
 * mmap status record
 */
static vm_fault_t snd_pcm_mmap_status_fault(struct vm_fault *vmf)
{
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        
        if (substream == NULL)
                return VM_FAULT_SIGBUS;
        runtime = substream->runtime;
        vmf->page = virt_to_page(runtime->status);
        get_page(vmf->page);
        return 0;
}

static const struct vm_operations_struct snd_pcm_vm_ops_status =
{
        .fault =        snd_pcm_mmap_status_fault,
};

static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file,
                               struct vm_area_struct *area)
{
        long size;
        if (!(area->vm_flags & VM_READ))
                return -EINVAL;
        size = area->vm_end - area->vm_start;
        if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status)))
                return -EINVAL;
        area->vm_ops = &snd_pcm_vm_ops_status;
        area->vm_private_data = substream;
        vm_flags_mod(area, VM_DONTEXPAND | VM_DONTDUMP,
                     VM_WRITE | VM_MAYWRITE);

        return 0;
}

/*
 * mmap control record
 */
static vm_fault_t snd_pcm_mmap_control_fault(struct vm_fault *vmf)
{
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        
        if (substream == NULL)
                return VM_FAULT_SIGBUS;
        runtime = substream->runtime;
        vmf->page = virt_to_page(runtime->control);
        get_page(vmf->page);
        return 0;
}

static const struct vm_operations_struct snd_pcm_vm_ops_control =
{
        .fault =        snd_pcm_mmap_control_fault,
};

static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file *file,
                                struct vm_area_struct *area)
{
        long size;
        if (!(area->vm_flags & VM_READ))
                return -EINVAL;
        size = area->vm_end - area->vm_start;
        if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control)))
                return -EINVAL;
        area->vm_ops = &snd_pcm_vm_ops_control;
        area->vm_private_data = substream;
        vm_flags_set(area, VM_DONTEXPAND | VM_DONTDUMP);
        return 0;
}

static bool pcm_status_mmap_allowed(struct snd_pcm_file *pcm_file)
{
        /* If drivers require the explicit sync (typically for non-coherent
         * pages), we have to disable the mmap of status and control data
         * to enforce the control via SYNC_PTR ioctl.
         */
        if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_EXPLICIT_SYNC)
                return false;
        /* See pcm_control_mmap_allowed() below.
         * Since older alsa-lib requires both status and control mmaps to be
         * coupled, we have to disable the status mmap for old alsa-lib, too.
         */
        if (pcm_file->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 14) &&
            (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_SYNC_APPLPTR))
                return false;
        return true;
}

static bool pcm_control_mmap_allowed(struct snd_pcm_file *pcm_file)
{
        if (pcm_file->no_compat_mmap)
                return false;
        /* see above */
        if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_EXPLICIT_SYNC)
                return false;
        /* Disallow the control mmap when SYNC_APPLPTR flag is set;
         * it enforces the user-space to fall back to snd_pcm_sync_ptr(),
         * thus it effectively assures the manual update of appl_ptr.
         */
        if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_SYNC_APPLPTR)
                return false;
        return true;
}

#else /* ! coherent mmap */
/*
 * don't support mmap for status and control records.
 */
#define pcm_status_mmap_allowed(pcm_file)        false
#define pcm_control_mmap_allowed(pcm_file)        false

static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file,
                               struct vm_area_struct *area)
{
        return -ENXIO;
}
static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file *file,
                                struct vm_area_struct *area)
{
        return -ENXIO;
}
#endif /* coherent mmap */

/*
 * fault callback for mmapping a RAM page
 */
static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf)
{
        struct snd_pcm_substream *substream = vmf->vma->vm_private_data;
        struct snd_pcm_runtime *runtime;
        unsigned long offset;
        struct page * page;
        size_t dma_bytes;
        
        if (substream == NULL)
                return VM_FAULT_SIGBUS;
        runtime = substream->runtime;
        offset = vmf->pgoff << PAGE_SHIFT;
        dma_bytes = PAGE_ALIGN(runtime->dma_bytes);
        if (offset > dma_bytes - PAGE_SIZE)
                return VM_FAULT_SIGBUS;
        if (substream->ops->page)
                page = substream->ops->page(substream, offset);
        else if (!snd_pcm_get_dma_buf(substream))
                page = virt_to_page(runtime->dma_area + offset);
        else
                page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset);
        if (!page)
                return VM_FAULT_SIGBUS;
        get_page(page);
        vmf->page = page;
        return 0;
}

static const struct vm_operations_struct snd_pcm_vm_ops_data = {
        .open =                snd_pcm_mmap_data_open,
        .close =        snd_pcm_mmap_data_close,
};

static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {
        .open =                snd_pcm_mmap_data_open,
        .close =        snd_pcm_mmap_data_close,
        .fault =        snd_pcm_mmap_data_fault,
};

/*
 * mmap the DMA buffer on RAM
 */

/**
 * snd_pcm_lib_default_mmap - Default PCM data mmap function
 * @substream: PCM substream
 * @area: VMA
 *
 * This is the default mmap handler for PCM data.  When mmap pcm_ops is NULL,
 * this function is invoked implicitly.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
                             struct vm_area_struct *area)
{
        vm_flags_set(area, VM_DONTEXPAND | VM_DONTDUMP);
        if (!substream->ops->page &&
            !snd_dma_buffer_mmap(snd_pcm_get_dma_buf(substream), area))
                return 0;
        /* mmap with fault handler */
        area->vm_ops = &snd_pcm_vm_ops_data_fault;
        return 0;
}
EXPORT_SYMBOL_GPL(snd_pcm_lib_default_mmap);

/*
 * mmap the DMA buffer on I/O memory area
 */
#if SNDRV_PCM_INFO_MMAP_IOMEM
/**
 * snd_pcm_lib_mmap_iomem - Default PCM data mmap function for I/O mem
 * @substream: PCM substream
 * @area: VMA
 *
 * When your hardware uses the iomapped pages as the hardware buffer and
 * wants to mmap it, pass this function as mmap pcm_ops.  Note that this
 * is supposed to work only on limited architectures.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream,
                           struct vm_area_struct *area)
{
        struct snd_pcm_runtime *runtime = substream->runtime;

        area->vm_page_prot = pgprot_noncached(area->vm_page_prot);
        return vm_iomap_memory(area, runtime->dma_addr, runtime->dma_bytes);
}
EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem);
#endif /* SNDRV_PCM_INFO_MMAP */

/*
 * mmap DMA buffer
 */
int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file,
                      struct vm_area_struct *area)
{
        struct snd_pcm_runtime *runtime;
        long size;
        unsigned long offset;
        size_t dma_bytes;
        int err;

        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
                if (!(area->vm_flags & (VM_WRITE|VM_READ)))
                        return -EINVAL;
        } else {
                if (!(area->vm_flags & VM_READ))
                        return -EINVAL;
        }
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_OPEN)
                return -EBADFD;
        if (!(runtime->info & SNDRV_PCM_INFO_MMAP))
                return -ENXIO;
        if (runtime->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED ||
            runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED)
                return -EINVAL;
        size = area->vm_end - area->vm_start;
        offset = area->vm_pgoff << PAGE_SHIFT;
        dma_bytes = PAGE_ALIGN(runtime->dma_bytes);
        if ((size_t)size > dma_bytes)
                return -EINVAL;
        if (offset > dma_bytes - size)
                return -EINVAL;

        area->vm_ops = &snd_pcm_vm_ops_data;
        area->vm_private_data = substream;
        if (substream->ops->mmap)
                err = substream->ops->mmap(substream, area);
        else
                err = snd_pcm_lib_default_mmap(substream, area);
        if (!err)
                atomic_inc(&substream->mmap_count);
        return err;
}
EXPORT_SYMBOL(snd_pcm_mmap_data);

static int snd_pcm_mmap(struct file *file, struct vm_area_struct *area)
{
        struct snd_pcm_file * pcm_file;
        struct snd_pcm_substream *substream;        
        unsigned long offset;
        
        pcm_file = file->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;

        offset = area->vm_pgoff << PAGE_SHIFT;
        switch (offset) {
        case SNDRV_PCM_MMAP_OFFSET_STATUS_OLD:
                if (pcm_file->no_compat_mmap || !IS_ENABLED(CONFIG_64BIT))
                        return -ENXIO;
                fallthrough;
        case SNDRV_PCM_MMAP_OFFSET_STATUS_NEW:
                if (!pcm_status_mmap_allowed(pcm_file))
                        return -ENXIO;
                return snd_pcm_mmap_status(substream, file, area);
        case SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD:
                if (pcm_file->no_compat_mmap || !IS_ENABLED(CONFIG_64BIT))
                        return -ENXIO;
                fallthrough;
        case SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW:
                if (!pcm_control_mmap_allowed(pcm_file))
                        return -ENXIO;
                return snd_pcm_mmap_control(substream, file, area);
        default:
                return snd_pcm_mmap_data(substream, file, area);
        }
        return 0;
}

static int snd_pcm_fasync(int fd, struct file * file, int on)
{
        struct snd_pcm_file * pcm_file;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;

        pcm_file = file->private_data;
        substream = pcm_file->substream;
        if (PCM_RUNTIME_CHECK(substream))
                return -ENXIO;
        runtime = substream->runtime;
        if (runtime->state == SNDRV_PCM_STATE_DISCONNECTED)
                return -EBADFD;
        return snd_fasync_helper(fd, file, on, &runtime->fasync);
}

/*
 * ioctl32 compat
 */
#ifdef CONFIG_COMPAT
#include "pcm_compat.c"
#else
#define snd_pcm_ioctl_compat        NULL
#endif

/*
 *  To be removed helpers to keep binary compatibility
 */

#ifdef CONFIG_SND_SUPPORT_OLD_API
#define __OLD_TO_NEW_MASK(x) ((x&7)|((x&0x07fffff8)<<5))
#define __NEW_TO_OLD_MASK(x) ((x&7)|((x&0xffffff00)>>5))

static void snd_pcm_hw_convert_from_old_params(struct snd_pcm_hw_params *params,
                                               struct snd_pcm_hw_params_old *oparams)
{
        unsigned int i;

        memset(params, 0, sizeof(*params));
        params->flags = oparams->flags;
        for (i = 0; i < ARRAY_SIZE(oparams->masks); i++)
                params->masks[i].bits[0] = oparams->masks[i];
        memcpy(params->intervals, oparams->intervals, sizeof(oparams->intervals));
        params->rmask = __OLD_TO_NEW_MASK(oparams->rmask);
        params->cmask = __OLD_TO_NEW_MASK(oparams->cmask);
        params->info = oparams->info;
        params->msbits = oparams->msbits;
        params->rate_num = oparams->rate_num;
        params->rate_den = oparams->rate_den;
        params->fifo_size = oparams->fifo_size;
}

static void snd_pcm_hw_convert_to_old_params(struct snd_pcm_hw_params_old *oparams,
                                             struct snd_pcm_hw_params *params)
{
        unsigned int i;

        memset(oparams, 0, sizeof(*oparams));
        oparams->flags = params->flags;
        for (i = 0; i < ARRAY_SIZE(oparams->masks); i++)
                oparams->masks[i] = params->masks[i].bits[0];
        memcpy(oparams->intervals, params->intervals, sizeof(oparams->intervals));
        oparams->rmask = __NEW_TO_OLD_MASK(params->rmask);
        oparams->cmask = __NEW_TO_OLD_MASK(params->cmask);
        oparams->info = params->info;
        oparams->msbits = params->msbits;
        oparams->rate_num = params->rate_num;
        oparams->rate_den = params->rate_den;
        oparams->fifo_size = params->fifo_size;
}

static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
                                      struct snd_pcm_hw_params_old __user * _oparams)
{
        struct snd_pcm_hw_params *params __free(kfree) = NULL;
        struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL;
        int err;

        params = kmalloc(sizeof(*params), GFP_KERNEL);
        if (!params)
                return -ENOMEM;

        oparams = memdup_user(_oparams, sizeof(*oparams));
        if (IS_ERR(oparams))
                return PTR_ERR(no_free_ptr(oparams));
        snd_pcm_hw_convert_from_old_params(params, oparams);
        err = snd_pcm_hw_refine(substream, params);
        if (err < 0)
                return err;

        err = fixup_unreferenced_params(substream, params);
        if (err < 0)
                return err;

        snd_pcm_hw_convert_to_old_params(oparams, params);
        if (copy_to_user(_oparams, oparams, sizeof(*oparams)))
                return -EFAULT;
        return 0;
}

static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream,
                                      struct snd_pcm_hw_params_old __user * _oparams)
{
        struct snd_pcm_hw_params *params __free(kfree) = NULL;
        struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL;
        int err;

        params = kmalloc(sizeof(*params), GFP_KERNEL);
        if (!params)
                return -ENOMEM;

        oparams = memdup_user(_oparams, sizeof(*oparams));
        if (IS_ERR(oparams))
                return PTR_ERR(no_free_ptr(oparams));

        snd_pcm_hw_convert_from_old_params(params, oparams);
        err = snd_pcm_hw_params(substream, params);
        if (err < 0)
                return err;

        snd_pcm_hw_convert_to_old_params(oparams, params);
        if (copy_to_user(_oparams, oparams, sizeof(*oparams)))
                return -EFAULT;
        return 0;
}
#endif /* CONFIG_SND_SUPPORT_OLD_API */

#ifndef CONFIG_MMU
static unsigned long snd_pcm_get_unmapped_area(struct file *file,
                                               unsigned long addr,
                                               unsigned long len,
                                               unsigned long pgoff,
                                               unsigned long flags)
{
        struct snd_pcm_file *pcm_file = file->private_data;
        struct snd_pcm_substream *substream = pcm_file->substream;
        struct snd_pcm_runtime *runtime = substream->runtime;
        unsigned long offset = pgoff << PAGE_SHIFT;

        switch (offset) {
        case SNDRV_PCM_MMAP_OFFSET_STATUS_NEW:
                return (unsigned long)runtime->status;
        case SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW:
                return (unsigned long)runtime->control;
        default:
                return (unsigned long)runtime->dma_area + offset;
        }
}
#else
# define snd_pcm_get_unmapped_area NULL
#endif

/*
 *  Register section
 */

const struct file_operations snd_pcm_f_ops[2] = {
        {
                .owner =                THIS_MODULE,
                .write =                snd_pcm_write,
                .write_iter =                snd_pcm_writev,
                .open =                        snd_pcm_playback_open,
                .release =                snd_pcm_release,
                .llseek =                no_llseek,
                .poll =                        snd_pcm_poll,
                .unlocked_ioctl =        snd_pcm_ioctl,
                .compat_ioctl =         snd_pcm_ioctl_compat,
                .mmap =                        snd_pcm_mmap,
                .fasync =                snd_pcm_fasync,
                .get_unmapped_area =        snd_pcm_get_unmapped_area,
        },
        {
                .owner =                THIS_MODULE,
                .read =                        snd_pcm_read,
                .read_iter =                snd_pcm_readv,
                .open =                        snd_pcm_capture_open,
                .release =                snd_pcm_release,
                .llseek =                no_llseek,
                .poll =                        snd_pcm_poll,
                .unlocked_ioctl =        snd_pcm_ioctl,
                .compat_ioctl =         snd_pcm_ioctl_compat,
                .mmap =                        snd_pcm_mmap,
                .fasync =                snd_pcm_fasync,
                .get_unmapped_area =        snd_pcm_get_unmapped_area,
        }
};






























































   74 






















  295 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H

#include <linux/bitops.h>
#include <linux/wordpart.h>

/*
 * This is largely generic for little-endian machines, but the
 * optimal byte mask counting is probably going to be something
 * that is architecture-specific. If you have a reliably fast
 * bit count instruction, that might be better than the multiply
 * and shift, for example.
 */
struct word_at_a_time {
        const unsigned long one_bits, high_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }

#ifdef CONFIG_64BIT

/*
 * Jan Achrenius on G+: microoptimized version of
 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
 * that works for the bytemasks without having to
 * mask them first.
 */
static inline long count_masked_bytes(unsigned long mask)
{
        return mask*0x0001020304050608ul >> 56;
}

#else        /* 32-bit case */

/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
        /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
        long a = (0x0ff0001+mask) >> 23;
        /* Fix the 1 for 00 case */
        return a & mask;
}

#endif

/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
        unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
        *bits = mask;
        return mask;
}

static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
        return bits;
}

static inline unsigned long create_zero_mask(unsigned long bits)
{
        bits = (bits - 1) & ~bits;
        return bits >> 7;
}

/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)

static inline unsigned long find_zero(unsigned long mask)
{
        return count_masked_bytes(mask);
}

/*
 * Load an unaligned word from kernel space.
 *
 * In the (very unlikely) case of the word being a page-crosser
 * and the next page not being mapped, take the exception and
 * return zeroes in the non-existing part.
 */
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
        unsigned long ret;

        asm volatile(
                "1:        mov %[mem], %[ret]\n"
                "2:\n"
                _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD)
                : [ret] "=r" (ret)
                : [mem] "m" (*(unsigned long *)addr));

        return ret;
}

#endif /* _ASM_WORD_AT_A_TIME_H */






























































































































































































































































































    3 


    3 

    3 










    3 












    3 
    3 

    3 
    3 


    3 

    3 



    3 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   84 

   84 
   82 






































    3 








































































































































    3 







    3 




    3 


    3 
    3 
    3 




    3 

    3 




    3 





    3 





    3 





    3 




    3 


























    3 


    3 











































    3 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 







    3 

















































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/mmap.c
 *
 * Written by obz.
 *
 * Address space accounting code        <alan@lxorguk.ukuu.org.uk>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/capability.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/shmem_fs.h>
#include <linux/profile.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/mmdebug.h>
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
#include <linux/uprobes.h>
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/userfaultfd_k.h>
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
#include <linux/sched/mm.h>
#include <linux/ksm.h>

#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>

#define CREATE_TRACE_POINTS
#include <trace/events/mmap.h>

#include "internal.h"

#ifndef arch_mmap_check
#define arch_mmap_check(addr, len, flags)        (0)
#endif

#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
int mmap_rnd_bits_max __ro_after_init = CONFIG_ARCH_MMAP_RND_BITS_MAX;
int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
#endif

static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);

static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                struct vm_area_struct *next, unsigned long start,
                unsigned long end, unsigned long tree_end, bool mm_wr_locked);

static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
        return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
}

/* Update vma->vm_page_prot to reflect vma->vm_flags. */
void vma_set_page_prot(struct vm_area_struct *vma)
{
        unsigned long vm_flags = vma->vm_flags;
        pgprot_t vm_page_prot;

        vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
        if (vma_wants_writenotify(vma, vm_page_prot)) {
                vm_flags &= ~VM_SHARED;
                vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
        }
        /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
        WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}

/*
 * Requires inode->i_mapping->i_mmap_rwsem
 */
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                                      struct address_space *mapping)
{
        if (vma_is_shared_maywrite(vma))
                mapping_unmap_writable(mapping);

        flush_dcache_mmap_lock(mapping);
        vma_interval_tree_remove(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
}

/*
 * Unlink a file-based vm structure from its interval tree, to hide
 * vma from rmap and vmtruncate before freeing its page tables.
 */
void unlink_file_vma(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;

        if (file) {
                struct address_space *mapping = file->f_mapping;
                i_mmap_lock_write(mapping);
                __remove_shared_vm_struct(vma, mapping);
                i_mmap_unlock_write(mapping);
        }
}

/*
 * Close a vm structure and free it.
 */
static void remove_vma(struct vm_area_struct *vma, bool unreachable)
{
        might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
        if (vma->vm_file)
                fput(vma->vm_file);
        mpol_put(vma_policy(vma));
        if (unreachable)
                __vm_area_free(vma);
        else
                vm_area_free(vma);
}

static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
                                                    unsigned long min)
{
        return mas_prev(&vmi->mas, min);
}

/*
 * check_brk_limits() - Use platform specific check of range & verify mlock
 * limits.
 * @addr: The address to check
 * @len: The size of increase.
 *
 * Return: 0 on success.
 */
static int check_brk_limits(unsigned long addr, unsigned long len)
{
        unsigned long mapped_addr;

        mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
        if (IS_ERR_VALUE(mapped_addr))
                return mapped_addr;

        return mlock_future_ok(current->mm, current->mm->def_flags, len)
                ? 0 : -EAGAIN;
}
static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
                unsigned long addr, unsigned long request, unsigned long flags);
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
        unsigned long newbrk, oldbrk, origbrk;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *brkvma, *next = NULL;
        unsigned long min_brk;
        bool populate = false;
        LIST_HEAD(uf);
        struct vma_iterator vmi;

        if (mmap_write_lock_killable(mm))
                return -EINTR;

        origbrk = mm->brk;

#ifdef CONFIG_COMPAT_BRK
        /*
         * CONFIG_COMPAT_BRK can still be overridden by setting
         * randomize_va_space to 2, which will still cause mm->start_brk
         * to be arbitrarily shifted
         */
        if (current->brk_randomized)
                min_brk = mm->start_brk;
        else
                min_brk = mm->end_data;
#else
        min_brk = mm->start_brk;
#endif
        if (brk < min_brk)
                goto out;

        /*
         * Check against rlimit here. If this check is done later after the test
         * of oldbrk with newbrk then it can escape the test and let the data
         * segment grow beyond its set limit the in case where the limit is
         * not page aligned -Ram Gupta
         */
        if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
                              mm->end_data, mm->start_data))
                goto out;

        newbrk = PAGE_ALIGN(brk);
        oldbrk = PAGE_ALIGN(mm->brk);
        if (oldbrk == newbrk) {
                mm->brk = brk;
                goto success;
        }

        /* Always allow shrinking brk. */
        if (brk <= mm->brk) {
                /* Search one past newbrk */
                vma_iter_init(&vmi, mm, newbrk);
                brkvma = vma_find(&vmi, oldbrk);
                if (!brkvma || brkvma->vm_start >= oldbrk)
                        goto out; /* mapping intersects with an existing non-brk vma. */
                /*
                 * mm->brk must be protected by write mmap_lock.
                 * do_vma_munmap() will drop the lock on success,  so update it
                 * before calling do_vma_munmap().
                 */
                mm->brk = brk;
                if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true))
                        goto out;

                goto success_unlocked;
        }

        if (check_brk_limits(oldbrk, newbrk - oldbrk))
                goto out;

        /*
         * Only check if the next VMA is within the stack_guard_gap of the
         * expansion area
         */
        vma_iter_init(&vmi, mm, oldbrk);
        next = vma_find(&vmi, newbrk + PAGE_SIZE + stack_guard_gap);
        if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;

        brkvma = vma_prev_limit(&vmi, mm->start_brk);
        /* Ok, looks good - let it rip. */
        if (do_brk_flags(&vmi, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
                goto out;

        mm->brk = brk;
        if (mm->def_flags & VM_LOCKED)
                populate = true;

success:
        mmap_write_unlock(mm);
success_unlocked:
        userfaultfd_unmap_complete(mm, &uf);
        if (populate)
                mm_populate(oldbrk, newbrk - oldbrk);
        return brk;

out:
        mm->brk = origbrk;
        mmap_write_unlock(mm);
        return origbrk;
}

#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
static void validate_mm(struct mm_struct *mm)
{
        int bug = 0;
        int i = 0;
        struct vm_area_struct *vma;
        VMA_ITERATOR(vmi, mm, 0);

        mt_validate(&mm->mm_mt);
        for_each_vma(vmi, vma) {
#ifdef CONFIG_DEBUG_VM_RB
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
#endif
                unsigned long vmi_start, vmi_end;
                bool warn = 0;

                vmi_start = vma_iter_addr(&vmi);
                vmi_end = vma_iter_end(&vmi);
                if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
                        warn = 1;

                if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))
                        warn = 1;

                if (warn) {
                        pr_emerg("issue in %s\n", current->comm);
                        dump_stack();
                        dump_vma(vma);
                        pr_emerg("tree range: %px start %lx end %lx\n", vma,
                                 vmi_start, vmi_end - 1);
                        vma_iter_dump_tree(&vmi);
                }

#ifdef CONFIG_DEBUG_VM_RB
                if (anon_vma) {
                        anon_vma_lock_read(anon_vma);
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                anon_vma_interval_tree_verify(avc);
                        anon_vma_unlock_read(anon_vma);
                }
#endif
                i++;
        }
        if (i != mm->map_count) {
                pr_emerg("map_count %d vma iterator %d\n", mm->map_count, i);
                bug = 1;
        }
        VM_BUG_ON_MM(bug, mm);
}

#else /* !CONFIG_DEBUG_VM_MAPLE_TREE */
#define validate_mm(mm) do { } while (0)
#endif /* CONFIG_DEBUG_VM_MAPLE_TREE */

/*
 * vma has some anon_vma assigned, and is already inserted on that
 * anon_vma's interval trees.
 *
 * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
 * vma must be removed from the anon_vma's interval trees using
 * anon_vma_interval_tree_pre_update_vma().
 *
 * After the update, the vma will be reinserted using
 * anon_vma_interval_tree_post_update_vma().
 *
 * The entire update must be protected by exclusive mmap_lock and by
 * the root anon_vma's mutex.
 */
static inline void
anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
{
        struct anon_vma_chain *avc;

        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
}

static inline void
anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
{
        struct anon_vma_chain *avc;

        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
}

static unsigned long count_vma_pages_range(struct mm_struct *mm,
                unsigned long addr, unsigned long end)
{
        VMA_ITERATOR(vmi, mm, addr);
        struct vm_area_struct *vma;
        unsigned long nr_pages = 0;

        for_each_vma_range(vmi, vma, end) {
                unsigned long vm_start = max(addr, vma->vm_start);
                unsigned long vm_end = min(end, vma->vm_end);

                nr_pages += PHYS_PFN(vm_end - vm_start);
        }

        return nr_pages;
}

static void __vma_link_file(struct vm_area_struct *vma,
                            struct address_space *mapping)
{
        if (vma_is_shared_maywrite(vma))
                mapping_allow_writable(mapping);

        flush_dcache_mmap_lock(mapping);
        vma_interval_tree_insert(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
}

static void vma_link_file(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct address_space *mapping;

        if (file) {
                mapping = file->f_mapping;
                i_mmap_lock_write(mapping);
                __vma_link_file(vma, mapping);
                i_mmap_unlock_write(mapping);
        }
}

static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
{
        VMA_ITERATOR(vmi, mm, 0);

        vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
        if (vma_iter_prealloc(&vmi, vma))
                return -ENOMEM;

        vma_start_write(vma);
        vma_iter_store(&vmi, vma);
        vma_link_file(vma);
        mm->map_count++;
        validate_mm(mm);
        return 0;
}

/*
 * init_multi_vma_prep() - Initializer for struct vma_prepare
 * @vp: The vma_prepare struct
 * @vma: The vma that will be altered once locked
 * @next: The next vma if it is to be adjusted
 * @remove: The first vma to be removed
 * @remove2: The second vma to be removed
 */
static inline void init_multi_vma_prep(struct vma_prepare *vp,
                struct vm_area_struct *vma, struct vm_area_struct *next,
                struct vm_area_struct *remove, struct vm_area_struct *remove2)
{
        memset(vp, 0, sizeof(struct vma_prepare));
        vp->vma = vma;
        vp->anon_vma = vma->anon_vma;
        vp->remove = remove;
        vp->remove2 = remove2;
        vp->adj_next = next;
        if (!vp->anon_vma && next)
                vp->anon_vma = next->anon_vma;

        vp->file = vma->vm_file;
        if (vp->file)
                vp->mapping = vma->vm_file->f_mapping;

}

/*
 * init_vma_prep() - Initializer wrapper for vma_prepare struct
 * @vp: The vma_prepare struct
 * @vma: The vma that will be altered once locked
 */
static inline void init_vma_prep(struct vma_prepare *vp,
                                 struct vm_area_struct *vma)
{
        init_multi_vma_prep(vp, vma, NULL, NULL, NULL);
}


/*
 * vma_prepare() - Helper function for handling locking VMAs prior to altering
 * @vp: The initialized vma_prepare struct
 */
static inline void vma_prepare(struct vma_prepare *vp)
{
        if (vp->file) {
                uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end);

                if (vp->adj_next)
                        uprobe_munmap(vp->adj_next, vp->adj_next->vm_start,
                                      vp->adj_next->vm_end);

                i_mmap_lock_write(vp->mapping);
                if (vp->insert && vp->insert->vm_file) {
                        /*
                         * Put into interval tree now, so instantiated pages
                         * are visible to arm/parisc __flush_dcache_page
                         * throughout; but we cannot insert into address
                         * space until vma start or end is updated.
                         */
                        __vma_link_file(vp->insert,
                                        vp->insert->vm_file->f_mapping);
                }
        }

        if (vp->anon_vma) {
                anon_vma_lock_write(vp->anon_vma);
                anon_vma_interval_tree_pre_update_vma(vp->vma);
                if (vp->adj_next)
                        anon_vma_interval_tree_pre_update_vma(vp->adj_next);
        }

        if (vp->file) {
                flush_dcache_mmap_lock(vp->mapping);
                vma_interval_tree_remove(vp->vma, &vp->mapping->i_mmap);
                if (vp->adj_next)
                        vma_interval_tree_remove(vp->adj_next,
                                                 &vp->mapping->i_mmap);
        }

}

/*
 * vma_complete- Helper function for handling the unlocking after altering VMAs,
 * or for inserting a VMA.
 *
 * @vp: The vma_prepare struct
 * @vmi: The vma iterator
 * @mm: The mm_struct
 */
static inline void vma_complete(struct vma_prepare *vp,
                                struct vma_iterator *vmi, struct mm_struct *mm)
{
        if (vp->file) {
                if (vp->adj_next)
                        vma_interval_tree_insert(vp->adj_next,
                                                 &vp->mapping->i_mmap);
                vma_interval_tree_insert(vp->vma, &vp->mapping->i_mmap);
                flush_dcache_mmap_unlock(vp->mapping);
        }

        if (vp->remove && vp->file) {
                __remove_shared_vm_struct(vp->remove, vp->mapping);
                if (vp->remove2)
                        __remove_shared_vm_struct(vp->remove2, vp->mapping);
        } else if (vp->insert) {
                /*
                 * split_vma has split insert from vma, and needs
                 * us to insert it before dropping the locks
                 * (it may either follow vma or precede it).
                 */
                vma_iter_store(vmi, vp->insert);
                mm->map_count++;
        }

        if (vp->anon_vma) {
                anon_vma_interval_tree_post_update_vma(vp->vma);
                if (vp->adj_next)
                        anon_vma_interval_tree_post_update_vma(vp->adj_next);
                anon_vma_unlock_write(vp->anon_vma);
        }

        if (vp->file) {
                i_mmap_unlock_write(vp->mapping);
                uprobe_mmap(vp->vma);

                if (vp->adj_next)
                        uprobe_mmap(vp->adj_next);
        }

        if (vp->remove) {
again:
                vma_mark_detached(vp->remove, true);
                if (vp->file) {
                        uprobe_munmap(vp->remove, vp->remove->vm_start,
                                      vp->remove->vm_end);
                        fput(vp->file);
                }
                if (vp->remove->anon_vma)
                        anon_vma_merge(vp->vma, vp->remove);
                mm->map_count--;
                mpol_put(vma_policy(vp->remove));
                if (!vp->remove2)
                        WARN_ON_ONCE(vp->vma->vm_end < vp->remove->vm_end);
                vm_area_free(vp->remove);

                /*
                 * In mprotect's case 6 (see comments on vma_merge),
                 * we are removing both mid and next vmas
                 */
                if (vp->remove2) {
                        vp->remove = vp->remove2;
                        vp->remove2 = NULL;
                        goto again;
                }
        }
        if (vp->insert && vp->file)
                uprobe_mmap(vp->insert);
        validate_mm(mm);
}

/*
 * dup_anon_vma() - Helper function to duplicate anon_vma
 * @dst: The destination VMA
 * @src: The source VMA
 * @dup: Pointer to the destination VMA when successful.
 *
 * Returns: 0 on success.
 */
static inline int dup_anon_vma(struct vm_area_struct *dst,
                struct vm_area_struct *src, struct vm_area_struct **dup)
{
        /*
         * Easily overlooked: when mprotect shifts the boundary, make sure the
         * expanding vma has anon_vma set if the shrinking vma had, to cover any
         * anon pages imported.
         */
        if (src->anon_vma && !dst->anon_vma) {
                int ret;

                vma_assert_write_locked(dst);
                dst->anon_vma = src->anon_vma;
                ret = anon_vma_clone(dst, src);
                if (ret)
                        return ret;

                *dup = dst;
        }

        return 0;
}

/*
 * vma_expand - Expand an existing VMA
 *
 * @vmi: The vma iterator
 * @vma: The vma to expand
 * @start: The start of the vma
 * @end: The exclusive end of the vma
 * @pgoff: The page offset of vma
 * @next: The current of next vma.
 *
 * Expand @vma to @start and @end.  Can expand off the start and end.  Will
 * expand over @next if it's different from @vma and @end == @next->vm_end.
 * Checking if the @vma can expand and merge with @next needs to be handled by
 * the caller.
 *
 * Returns: 0 on success
 */
int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
               unsigned long start, unsigned long end, pgoff_t pgoff,
               struct vm_area_struct *next)
{
        struct vm_area_struct *anon_dup = NULL;
        bool remove_next = false;
        struct vma_prepare vp;

        vma_start_write(vma);
        if (next && (vma != next) && (end == next->vm_end)) {
                int ret;

                remove_next = true;
                vma_start_write(next);
                ret = dup_anon_vma(vma, next, &anon_dup);
                if (ret)
                        return ret;
        }

        init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL);
        /* Not merging but overwriting any part of next is not handled. */
        VM_WARN_ON(next && !vp.remove &&
                  next != vma && end > next->vm_start);
        /* Only handles expanding */
        VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);

        /* Note: vma iterator must be pointing to 'start' */
        vma_iter_config(vmi, start, end);
        if (vma_iter_prealloc(vmi, vma))
                goto nomem;

        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, start, end, 0);
        vma_set_range(vma, start, end, pgoff);
        vma_iter_store(vmi, vma);

        vma_complete(&vp, vmi, vma->vm_mm);
        return 0;

nomem:
        if (anon_dup)
                unlink_anon_vmas(anon_dup);
        return -ENOMEM;
}

/*
 * vma_shrink() - Reduce an existing VMAs memory area
 * @vmi: The vma iterator
 * @vma: The VMA to modify
 * @start: The new start
 * @end: The new end
 *
 * Returns: 0 on success, -ENOMEM otherwise
 */
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
               unsigned long start, unsigned long end, pgoff_t pgoff)
{
        struct vma_prepare vp;

        WARN_ON((vma->vm_start != start) && (vma->vm_end != end));

        if (vma->vm_start < start)
                vma_iter_config(vmi, vma->vm_start, start);
        else
                vma_iter_config(vmi, end, vma->vm_end);

        if (vma_iter_prealloc(vmi, NULL))
                return -ENOMEM;

        vma_start_write(vma);

        init_vma_prep(&vp, vma);
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, start, end, 0);

        vma_iter_clear(vmi);
        vma_set_range(vma, start, end, pgoff);
        vma_complete(&vp, vmi, vma->vm_mm);
        return 0;
}

/*
 * If the vma has a ->close operation then the driver probably needs to release
 * per-vma resources, so we don't attempt to merge those if the caller indicates
 * the current vma may be removed as part of the merge.
 */
static inline bool is_mergeable_vma(struct vm_area_struct *vma,
                struct file *file, unsigned long vm_flags,
                struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
                struct anon_vma_name *anon_name, bool may_remove_vma)
{
        /*
         * VM_SOFTDIRTY should not prevent from VMA merging, if we
         * match the flags but dirty bit -- the caller should mark
         * merged VMA as dirty. If dirty bit won't be excluded from
         * comparison, we increase pressure on the memory system forcing
         * the kernel to generate new VMAs when old one could be
         * extended instead.
         */
        if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
                return false;
        if (vma->vm_file != file)
                return false;
        if (may_remove_vma && vma->vm_ops && vma->vm_ops->close)
                return false;
        if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
                return false;
        if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
                return false;
        return true;
}

static inline bool is_mergeable_anon_vma(struct anon_vma *anon_vma1,
                 struct anon_vma *anon_vma2, struct vm_area_struct *vma)
{
        /*
         * The list_is_singular() test is to avoid merging VMA cloned from
         * parents. This can improve scalability caused by anon_vma lock.
         */
        if ((!anon_vma1 || !anon_vma2) && (!vma ||
                list_is_singular(&vma->anon_vma_chain)))
                return true;
        return anon_vma1 == anon_vma2;
}

/*
 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
 * in front of (at a lower virtual address and file offset than) the vma.
 *
 * We cannot merge two vmas if they have differently assigned (non-NULL)
 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
 *
 * We don't check here for the merged mmap wrapping around the end of pagecache
 * indices (16TB on ia32) because do_mmap() does not permit mmap's which
 * wrap, nor mmaps which cover the final page at index -1UL.
 *
 * We assume the vma may be removed as part of the merge.
 */
static bool
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
                struct anon_vma *anon_vma, struct file *file,
                pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
                struct anon_vma_name *anon_name)
{
        if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, true) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                if (vma->vm_pgoff == vm_pgoff)
                        return true;
        }
        return false;
}

/*
 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
 * beyond (at a higher virtual address and file offset than) the vma.
 *
 * We cannot merge two vmas if they have differently assigned (non-NULL)
 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
 *
 * We assume that vma is not removed as part of the merge.
 */
static bool
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
                struct anon_vma *anon_vma, struct file *file,
                pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
                struct anon_vma_name *anon_name)
{
        if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, false) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                pgoff_t vm_pglen;
                vm_pglen = vma_pages(vma);
                if (vma->vm_pgoff + vm_pglen == vm_pgoff)
                        return true;
        }
        return false;
}

/*
 * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
 * figure out whether that can be merged with its predecessor or its
 * successor.  Or both (it neatly fills a hole).
 *
 * In most cases - when called for mmap, brk or mremap - [addr,end) is
 * certain not to be mapped by the time vma_merge is called; but when
 * called for mprotect, it is certain to be already mapped (either at
 * an offset within prev, or at the start of next), and the flags of
 * this area are about to be changed to vm_flags - and the no-change
 * case has already been eliminated.
 *
 * The following mprotect cases have to be considered, where **** is
 * the area passed down from mprotect_fixup, never extending beyond one
 * vma, PPPP is the previous vma, CCCC is a concurrent vma that starts
 * at the same address as **** and is of the same or larger span, and
 * NNNN the next vma after ****:
 *
 *     ****             ****                   ****
 *    PPPPPPNNNNNN    PPPPPPNNNNNN       PPPPPPCCCCCC
 *    cannot merge    might become       might become
 *                    PPNNNNNNNNNN       PPPPPPPPPPCC
 *    mmap, brk or    case 4 below       case 5 below
 *    mremap move:
 *                        ****               ****
 *                    PPPP    NNNN       PPPPCCCCNNNN
 *                    might become       might become
 *                    PPPPPPPPPPPP 1 or  PPPPPPPPPPPP 6 or
 *                    PPPPPPPPNNNN 2 or  PPPPPPPPNNNN 7 or
 *                    PPPPNNNNNNNN 3     PPPPNNNNNNNN 8
 *
 * It is important for case 8 that the vma CCCC overlapping the
 * region **** is never going to extended over NNNN. Instead NNNN must
 * be extended in region **** and CCCC must be removed. This way in
 * all cases where vma_merge succeeds, the moment vma_merge drops the
 * rmap_locks, the properties of the merged vma will be already
 * correct for the whole merged range. Some of those properties like
 * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
 * be correct for the whole merged range immediately after the
 * rmap_locks are released. Otherwise if NNNN would be removed and
 * CCCC would be extended over the NNNN range, remove_migration_ptes
 * or other rmap walkers (if working on addresses beyond the "end"
 * parameter) may establish ptes with the wrong permissions of CCCC
 * instead of the right permissions of NNNN.
 *
 * In the code below:
 * PPPP is represented by *prev
 * CCCC is represented by *curr or not represented at all (NULL)
 * NNNN is represented by *next or not represented at all (NULL)
 * **** is not represented - it will be merged and the vma containing the
 *      area is returned, or the function will return NULL
 */
static struct vm_area_struct
*vma_merge(struct vma_iterator *vmi, struct vm_area_struct *prev,
           struct vm_area_struct *src, unsigned long addr, unsigned long end,
           unsigned long vm_flags, pgoff_t pgoff, struct mempolicy *policy,
           struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
           struct anon_vma_name *anon_name)
{
        struct mm_struct *mm = src->vm_mm;
        struct anon_vma *anon_vma = src->anon_vma;
        struct file *file = src->vm_file;
        struct vm_area_struct *curr, *next, *res;
        struct vm_area_struct *vma, *adjust, *remove, *remove2;
        struct vm_area_struct *anon_dup = NULL;
        struct vma_prepare vp;
        pgoff_t vma_pgoff;
        int err = 0;
        bool merge_prev = false;
        bool merge_next = false;
        bool vma_expanded = false;
        unsigned long vma_start = addr;
        unsigned long vma_end = end;
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
        long adj_start = 0;

        /*
         * We later require that vma->vm_flags == vm_flags,
         * so this tests vma->vm_flags & VM_SPECIAL, too.
         */
        if (vm_flags & VM_SPECIAL)
                return NULL;

        /* Does the input range span an existing VMA? (cases 5 - 8) */
        curr = find_vma_intersection(mm, prev ? prev->vm_end : 0, end);

        if (!curr ||                        /* cases 1 - 4 */
            end == curr->vm_end)        /* cases 6 - 8, adjacent VMA */
                next = vma_lookup(mm, end);
        else
                next = NULL;                /* case 5 */

        if (prev) {
                vma_start = prev->vm_start;
                vma_pgoff = prev->vm_pgoff;

                /* Can we merge the predecessor? */
                if (addr == prev->vm_end && mpol_equal(vma_policy(prev), policy)
                    && can_vma_merge_after(prev, vm_flags, anon_vma, file,
                                           pgoff, vm_userfaultfd_ctx, anon_name)) {
                        merge_prev = true;
                        vma_prev(vmi);
                }
        }

        /* Can we merge the successor? */
        if (next && mpol_equal(policy, vma_policy(next)) &&
            can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen,
                                 vm_userfaultfd_ctx, anon_name)) {
                merge_next = true;
        }

        /* Verify some invariant that must be enforced by the caller. */
        VM_WARN_ON(prev && addr <= prev->vm_start);
        VM_WARN_ON(curr && (addr != curr->vm_start || end > curr->vm_end));
        VM_WARN_ON(addr >= end);

        if (!merge_prev && !merge_next)
                return NULL; /* Not mergeable. */

        if (merge_prev)
                vma_start_write(prev);

        res = vma = prev;
        remove = remove2 = adjust = NULL;

        /* Can we merge both the predecessor and the successor? */
        if (merge_prev && merge_next &&
            is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) {
                vma_start_write(next);
                remove = next;                                /* case 1 */
                vma_end = next->vm_end;
                err = dup_anon_vma(prev, next, &anon_dup);
                if (curr) {                                /* case 6 */
                        vma_start_write(curr);
                        remove = curr;
                        remove2 = next;
                        /*
                         * Note that the dup_anon_vma below cannot overwrite err
                         * since the first caller would do nothing unless next
                         * has an anon_vma.
                         */
                        if (!next->anon_vma)
                                err = dup_anon_vma(prev, curr, &anon_dup);
                }
        } else if (merge_prev) {                        /* case 2 */
                if (curr) {
                        vma_start_write(curr);
                        if (end == curr->vm_end) {        /* case 7 */
                                /*
                                 * can_vma_merge_after() assumed we would not be
                                 * removing prev vma, so it skipped the check
                                 * for vm_ops->close, but we are removing curr
                                 */
                                if (curr->vm_ops && curr->vm_ops->close)
                                        err = -EINVAL;
                                remove = curr;
                        } else {                        /* case 5 */
                                adjust = curr;
                                adj_start = (end - curr->vm_start);
                        }
                        if (!err)
                                err = dup_anon_vma(prev, curr, &anon_dup);
                }
        } else { /* merge_next */
                vma_start_write(next);
                res = next;
                if (prev && addr < prev->vm_end) {        /* case 4 */
                        vma_start_write(prev);
                        vma_end = addr;
                        adjust = next;
                        adj_start = -(prev->vm_end - addr);
                        err = dup_anon_vma(next, prev, &anon_dup);
                } else {
                        /*
                         * Note that cases 3 and 8 are the ONLY ones where prev
                         * is permitted to be (but is not necessarily) NULL.
                         */
                        vma = next;                        /* case 3 */
                        vma_start = addr;
                        vma_end = next->vm_end;
                        vma_pgoff = next->vm_pgoff - pglen;
                        if (curr) {                        /* case 8 */
                                vma_pgoff = curr->vm_pgoff;
                                vma_start_write(curr);
                                remove = curr;
                                err = dup_anon_vma(next, curr, &anon_dup);
                        }
                }
        }

        /* Error in anon_vma clone. */
        if (err)
                goto anon_vma_fail;

        if (vma_start < vma->vm_start || vma_end > vma->vm_end)
                vma_expanded = true;

        if (vma_expanded) {
                vma_iter_config(vmi, vma_start, vma_end);
        } else {
                vma_iter_config(vmi, adjust->vm_start + adj_start,
                                adjust->vm_end);
        }

        if (vma_iter_prealloc(vmi, vma))
                goto prealloc_fail;

        init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
        VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma &&
                   vp.anon_vma != adjust->anon_vma);

        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start);
        vma_set_range(vma, vma_start, vma_end, vma_pgoff);

        if (vma_expanded)
                vma_iter_store(vmi, vma);

        if (adj_start) {
                adjust->vm_start += adj_start;
                adjust->vm_pgoff += adj_start >> PAGE_SHIFT;
                if (adj_start < 0) {
                        WARN_ON(vma_expanded);
                        vma_iter_store(vmi, next);
                }
        }

        vma_complete(&vp, vmi, mm);
        khugepaged_enter_vma(res, vm_flags);
        return res;

prealloc_fail:
        if (anon_dup)
                unlink_anon_vmas(anon_dup);

anon_vma_fail:
        vma_iter_set(vmi, addr);
        vma_iter_load(vmi);
        return NULL;
}

/*
 * Rough compatibility check to quickly see if it's even worth looking
 * at sharing an anon_vma.
 *
 * They need to have the same vm_file, and the flags can only differ
 * in things that mprotect may change.
 *
 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
 * we can merge the two vma's. For example, we refuse to merge a vma if
 * there is a vm_ops->close() function, because that indicates that the
 * driver is doing some kind of reference counting. But that doesn't
 * really matter for the anon_vma sharing case.
 */
static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
{
        return a->vm_end == b->vm_start &&
                mpol_equal(vma_policy(a), vma_policy(b)) &&
                a->vm_file == b->vm_file &&
                !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
                b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}

/*
 * Do some basic sanity checking to see if we can re-use the anon_vma
 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
 * the same as 'old', the other will be the new one that is trying
 * to share the anon_vma.
 *
 * NOTE! This runs with mmap_lock held for reading, so it is possible that
 * the anon_vma of 'old' is concurrently in the process of being set up
 * by another page fault trying to merge _that_. But that's ok: if it
 * is being set up, that automatically means that it will be a singleton
 * acceptable for merging, so we can do all of this optimistically. But
 * we do that READ_ONCE() to make sure that we never re-load the pointer.
 *
 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
 * is to return an anon_vma that is "complex" due to having gone through
 * a fork).
 *
 * We also make sure that the two vma's are compatible (adjacent,
 * and with the same memory policies). That's all stable, even with just
 * a read lock on the mmap_lock.
 */
static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
{
        if (anon_vma_compatible(a, b)) {
                struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);

                if (anon_vma && list_is_singular(&old->anon_vma_chain))
                        return anon_vma;
        }
        return NULL;
}

/*
 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
 * neighbouring vmas for a suitable anon_vma, before it goes off
 * to allocate a new anon_vma.  It checks because a repetitive
 * sequence of mprotects and faults may otherwise lead to distinct
 * anon_vmas being allocated, preventing vma merge in subsequent
 * mprotect.
 */
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
        MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
        struct anon_vma *anon_vma = NULL;
        struct vm_area_struct *prev, *next;

        /* Try next first. */
        next = mas_walk(&mas);
        if (next) {
                anon_vma = reusable_anon_vma(next, vma, next);
                if (anon_vma)
                        return anon_vma;
        }

        prev = mas_prev(&mas, 0);
        VM_BUG_ON_VMA(prev != vma, vma);
        prev = mas_prev(&mas, 0);
        /* Try prev next. */
        if (prev)
                anon_vma = reusable_anon_vma(prev, prev, vma);

        /*
         * We might reach here with anon_vma == NULL if we can't find
         * any reusable anon_vma.
         * There's no absolute need to look only at touching neighbours:
         * we could search further afield for "compatible" anon_vmas.
         * But it would probably just be a waste of time searching,
         * or lead to too many vmas hanging off the same anon_vma.
         * We're trying to allow mprotect remerging later on,
         * not trying to minimize memory used for anon_vmas.
         */
        return anon_vma;
}

/*
 * If a hint addr is less than mmap_min_addr change hint to be as
 * low as possible but still greater than mmap_min_addr
 */
static inline unsigned long round_hint_to_min(unsigned long hint)
{
        hint &= PAGE_MASK;
        if (((void *)hint != NULL) &&
            (hint < mmap_min_addr))
                return PAGE_ALIGN(mmap_min_addr);
        return hint;
}

bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
                        unsigned long bytes)
{
        unsigned long locked_pages, limit_pages;

        if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
                return true;

        locked_pages = bytes >> PAGE_SHIFT;
        locked_pages += mm->locked_vm;

        limit_pages = rlimit(RLIMIT_MEMLOCK);
        limit_pages >>= PAGE_SHIFT;

        return locked_pages <= limit_pages;
}

static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
{
        if (S_ISREG(inode->i_mode))
                return MAX_LFS_FILESIZE;

        if (S_ISBLK(inode->i_mode))
                return MAX_LFS_FILESIZE;

        if (S_ISSOCK(inode->i_mode))
                return MAX_LFS_FILESIZE;

        /* Special "we do even unsigned file positions" case */
        if (file->f_mode & FMODE_UNSIGNED_OFFSET)
                return 0;

        /* Yes, random drivers might want more. But I'm tired of buggy drivers */
        return ULONG_MAX;
}

static inline bool file_mmap_ok(struct file *file, struct inode *inode,
                                unsigned long pgoff, unsigned long len)
{
        u64 maxsize = file_mmap_size_max(file, inode);

        if (maxsize && len > maxsize)
                return false;
        maxsize -= len;
        if (pgoff > maxsize >> PAGE_SHIFT)
                return false;
        return true;
}

/*
 * The caller must write-lock current->mm->mmap_lock.
 */
unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
                        unsigned long flags, vm_flags_t vm_flags,
                        unsigned long pgoff, unsigned long *populate,
                        struct list_head *uf)
{
        struct mm_struct *mm = current->mm;
        int pkey = 0;

        *populate = 0;

        if (!len)
                return -EINVAL;

        /*
         * Does the application expect PROT_READ to imply PROT_EXEC?
         *
         * (the exception is when the underlying filesystem is noexec
         *  mounted, in which case we don't add PROT_EXEC.)
         */
        if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
                if (!(file && path_noexec(&file->f_path)))
                        prot |= PROT_EXEC;

        /* force arch specific MAP_FIXED handling in get_unmapped_area */
        if (flags & MAP_FIXED_NOREPLACE)
                flags |= MAP_FIXED;

        if (!(flags & MAP_FIXED))
                addr = round_hint_to_min(addr);

        /* Careful about overflows.. */
        len = PAGE_ALIGN(len);
        if (!len)
                return -ENOMEM;

        /* offset overflow? */
        if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
                return -EOVERFLOW;

        /* Too many mappings? */
        if (mm->map_count > sysctl_max_map_count)
                return -ENOMEM;

        /* Obtain the address to map to. we verify (or select) it and ensure
         * that it represents a valid section of the address space.
         */
        addr = get_unmapped_area(file, addr, len, pgoff, flags);
        if (IS_ERR_VALUE(addr))
                return addr;

        if (flags & MAP_FIXED_NOREPLACE) {
                if (find_vma_intersection(mm, addr, addr + len))
                        return -EEXIST;
        }

        if (prot == PROT_EXEC) {
                pkey = execute_only_pkey(mm);
                if (pkey < 0)
                        pkey = 0;
        }

        /* Do simple checking here so the lower-level routines won't have
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
        vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

        if (flags & MAP_LOCKED)
                if (!can_do_mlock())
                        return -EPERM;

        if (!mlock_future_ok(mm, vm_flags, len))
                return -EAGAIN;

        if (file) {
                struct inode *inode = file_inode(file);
                unsigned long flags_mask;

                if (!file_mmap_ok(file, inode, pgoff, len))
                        return -EOVERFLOW;

                flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;

                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
                        /*
                         * Force use of MAP_SHARED_VALIDATE with non-legacy
                         * flags. E.g. MAP_SYNC is dangerous to use with
                         * MAP_SHARED as you don't know which consistency model
                         * you will get. We silently ignore unsupported flags
                         * with MAP_SHARED to preserve backward compatibility.
                         */
                        flags &= LEGACY_MAP_MASK;
                        fallthrough;
                case MAP_SHARED_VALIDATE:
                        if (flags & ~flags_mask)
                                return -EOPNOTSUPP;
                        if (prot & PROT_WRITE) {
                                if (!(file->f_mode & FMODE_WRITE))
                                        return -EACCES;
                                if (IS_SWAPFILE(file->f_mapping->host))
                                        return -ETXTBSY;
                        }

                        /*
                         * Make sure we don't allow writing to an append-only
                         * file..
                         */
                        if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
                                return -EACCES;

                        vm_flags |= VM_SHARED | VM_MAYSHARE;
                        if (!(file->f_mode & FMODE_WRITE))
                                vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
                        fallthrough;
                case MAP_PRIVATE:
                        if (!(file->f_mode & FMODE_READ))
                                return -EACCES;
                        if (path_noexec(&file->f_path)) {
                                if (vm_flags & VM_EXEC)
                                        return -EPERM;
                                vm_flags &= ~VM_MAYEXEC;
                        }

                        if (!file->f_op->mmap)
                                return -ENODEV;
                        if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
                                return -EINVAL;
                        break;

                default:
                        return -EINVAL;
                }
        } else {
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
                        if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
                                return -EINVAL;
                        /*
                         * Ignore pgoff.
                         */
                        pgoff = 0;
                        vm_flags |= VM_SHARED | VM_MAYSHARE;
                        break;
                case MAP_PRIVATE:
                        /*
                         * Set pgoff according to addr for anon_vma.
                         */
                        pgoff = addr >> PAGE_SHIFT;
                        break;
                default:
                        return -EINVAL;
                }
        }

        /*
         * Set 'VM_NORESERVE' if we should not account for the
         * memory use of this mapping.
         */
        if (flags & MAP_NORESERVE) {
                /* We honor MAP_NORESERVE if allowed to overcommit */
                if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
                        vm_flags |= VM_NORESERVE;

                /* hugetlb applies strict overcommit unless MAP_NORESERVE */
                if (file && is_file_hugepages(file))
                        vm_flags |= VM_NORESERVE;
        }

        addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
        if (!IS_ERR_VALUE(addr) &&
            ((vm_flags & VM_LOCKED) ||
             (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
                *populate = len;
        return addr;
}

unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
                              unsigned long prot, unsigned long flags,
                              unsigned long fd, unsigned long pgoff)
{
        struct file *file = NULL;
        unsigned long retval;

        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
                file = fget(fd);
                if (!file)
                        return -EBADF;
                if (is_file_hugepages(file)) {
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
                } else if (unlikely(flags & MAP_HUGETLB)) {
                        retval = -EINVAL;
                        goto out_fput;
                }
        } else if (flags & MAP_HUGETLB) {
                struct hstate *hs;

                hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
                if (!hs)
                        return -EINVAL;

                len = ALIGN(len, huge_page_size(hs));
                /*
                 * VM_NORESERVE is used because the reservations will be
                 * taken when vm_ops->mmap() is called
                 */
                file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
                                VM_NORESERVE,
                                HUGETLB_ANONHUGE_INODE,
                                (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
                if (IS_ERR(file))
                        return PTR_ERR(file);
        }

        retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
out_fput:
        if (file)
                fput(file);
        return retval;
}

SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                unsigned long, prot, unsigned long, flags,
                unsigned long, fd, unsigned long, pgoff)
{
        return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
}

#ifdef __ARCH_WANT_SYS_OLD_MMAP
struct mmap_arg_struct {
        unsigned long addr;
        unsigned long len;
        unsigned long prot;
        unsigned long flags;
        unsigned long fd;
        unsigned long offset;
};

SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
{
        struct mmap_arg_struct a;

        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
        if (offset_in_page(a.offset))
                return -EINVAL;

        return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
                               a.offset >> PAGE_SHIFT);
}
#endif /* __ARCH_WANT_SYS_OLD_MMAP */

static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops)
{
        return vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite);
}

static bool vma_is_shared_writable(struct vm_area_struct *vma)
{
        return (vma->vm_flags & (VM_WRITE | VM_SHARED)) ==
                (VM_WRITE | VM_SHARED);
}

static bool vma_fs_can_writeback(struct vm_area_struct *vma)
{
        /* No managed pages to writeback. */
        if (vma->vm_flags & VM_PFNMAP)
                return false;

        return vma->vm_file && vma->vm_file->f_mapping &&
                mapping_can_writeback(vma->vm_file->f_mapping);
}

/*
 * Does this VMA require the underlying folios to have their dirty state
 * tracked?
 */
bool vma_needs_dirty_tracking(struct vm_area_struct *vma)
{
        /* Only shared, writable VMAs require dirty tracking. */
        if (!vma_is_shared_writable(vma))
                return false;

        /* Does the filesystem need to be notified? */
        if (vm_ops_needs_writenotify(vma->vm_ops))
                return true;

        /*
         * Even if the filesystem doesn't indicate a need for writenotify, if it
         * can writeback, dirty tracking is still required.
         */
        return vma_fs_can_writeback(vma);
}

/*
 * Some shared mappings will want the pages marked read-only
 * to track write events. If so, we'll downgrade vm_page_prot
 * to the private version (using protection_map[] without the
 * VM_SHARED bit).
 */
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
{
        /* If it was private or non-writable, the write bit is already clear */
        if (!vma_is_shared_writable(vma))
                return 0;

        /* The backer wishes to know when pages are first written to? */
        if (vm_ops_needs_writenotify(vma->vm_ops))
                return 1;

        /* The open routine did something to the protections that pgprot_modify
         * won't preserve? */
        if (pgprot_val(vm_page_prot) !=
            pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags)))
                return 0;

        /*
         * Do we need to track softdirty? hugetlb does not support softdirty
         * tracking yet.
         */
        if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
                return 1;

        /* Do we need write faults for uffd-wp tracking? */
        if (userfaultfd_wp(vma))
                return 1;

        /* Can the mapping track the dirty pages? */
        return vma_fs_can_writeback(vma);
}

/*
 * We account for memory if it's a private writeable mapping,
 * not hugepages and VM_NORESERVE wasn't set.
 */
static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
{
        /*
         * hugetlb has its own accounting separate from the core VM
         * VM_HUGETLB may not be set yet so we cannot check for that flag.
         */
        if (file && is_file_hugepages(file))
                return 0;

        return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
}

/**
 * unmapped_area() - Find an area between the low_limit and the high_limit with
 * the correct alignment and offset, all from @info. Note: current->mm is used
 * for the search.
 *
 * @info: The unmapped area information including the range [low_limit -
 * high_limit), the alignment offset and mask.
 *
 * Return: A memory address or -ENOMEM.
 */
static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
        unsigned long length, gap;
        unsigned long low_limit, high_limit;
        struct vm_area_struct *tmp;

        MA_STATE(mas, &current->mm->mm_mt, 0, 0);

        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
        if (length < info->length)
                return -ENOMEM;

        low_limit = info->low_limit;
        if (low_limit < mmap_min_addr)
                low_limit = mmap_min_addr;
        high_limit = info->high_limit;
retry:
        if (mas_empty_area(&mas, low_limit, high_limit - 1, length))
                return -ENOMEM;

        gap = mas.index;
        gap += (info->align_offset - gap) & info->align_mask;
        tmp = mas_next(&mas, ULONG_MAX);
        if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
                if (vm_start_gap(tmp) < gap + length - 1) {
                        low_limit = tmp->vm_end;
                        mas_reset(&mas);
                        goto retry;
                }
        } else {
                tmp = mas_prev(&mas, 0);
                if (tmp && vm_end_gap(tmp) > gap) {
                        low_limit = vm_end_gap(tmp);
                        mas_reset(&mas);
                        goto retry;
                }
        }

        return gap;
}

/**
 * unmapped_area_topdown() - Find an area between the low_limit and the
 * high_limit with the correct alignment and offset at the highest available
 * address, all from @info. Note: current->mm is used for the search.
 *
 * @info: The unmapped area information including the range [low_limit -
 * high_limit), the alignment offset and mask.
 *
 * Return: A memory address or -ENOMEM.
 */
static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
        unsigned long length, gap, gap_end;
        unsigned long low_limit, high_limit;
        struct vm_area_struct *tmp;

        MA_STATE(mas, &current->mm->mm_mt, 0, 0);
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
        if (length < info->length)
                return -ENOMEM;

        low_limit = info->low_limit;
        if (low_limit < mmap_min_addr)
                low_limit = mmap_min_addr;
        high_limit = info->high_limit;
retry:
        if (mas_empty_area_rev(&mas, low_limit, high_limit - 1, length))
                return -ENOMEM;

        gap = mas.last + 1 - info->length;
        gap -= (gap - info->align_offset) & info->align_mask;
        gap_end = mas.last;
        tmp = mas_next(&mas, ULONG_MAX);
        if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
                if (vm_start_gap(tmp) <= gap_end) {
                        high_limit = vm_start_gap(tmp);
                        mas_reset(&mas);
                        goto retry;
                }
        } else {
                tmp = mas_prev(&mas, 0);
                if (tmp && vm_end_gap(tmp) > gap) {
                        high_limit = tmp->vm_start;
                        mas_reset(&mas);
                        goto retry;
                }
        }

        return gap;
}

/*
 * Search for an unmapped address range.
 *
 * We are looking for a range that:
 * - does not intersect with any VMA;
 * - is contained within the [low_limit, high_limit) interval;
 * - is at least the desired size.
 * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
 */
unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
{
        unsigned long addr;

        if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
                addr = unmapped_area_topdown(info);
        else
                addr = unmapped_area(info);

        trace_vm_unmapped_area(addr, info);
        return addr;
}

/* Get an address range which is currently unmapped.
 * For shmat() with addr=0.
 *
 * Ugly calling convention alert:
 * Return value with the low bits set means error value,
 * ie
 *        if (ret & ~PAGE_MASK)
 *                error = ret;
 *
 * This function "knows" that -ENOMEM has the bits set.
 */
unsigned long
generic_get_unmapped_area(struct file *filp, unsigned long addr,
                          unsigned long len, unsigned long pgoff,
                          unsigned long flags)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
        struct vm_unmapped_area_info info;
        const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);

        if (len > mmap_end - mmap_min_addr)
                return -ENOMEM;

        if (flags & MAP_FIXED)
                return addr;

        if (addr) {
                addr = PAGE_ALIGN(addr);
                vma = find_vma_prev(mm, addr, &prev);
                if (mmap_end - len >= addr && addr >= mmap_min_addr &&
                    (!vma || addr + len <= vm_start_gap(vma)) &&
                    (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }

        info.flags = 0;
        info.length = len;
        info.low_limit = mm->mmap_base;
        info.high_limit = mmap_end;
        info.align_mask = 0;
        info.align_offset = 0;
        return vm_unmapped_area(&info);
}

#ifndef HAVE_ARCH_UNMAPPED_AREA
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
                       unsigned long len, unsigned long pgoff,
                       unsigned long flags)
{
        return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
}
#endif

/*
 * This mmap-allocator allocates new areas top-down from below the
 * stack's low limit (the base):
 */
unsigned long
generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                                  unsigned long len, unsigned long pgoff,
                                  unsigned long flags)
{
        struct vm_area_struct *vma, *prev;
        struct mm_struct *mm = current->mm;
        struct vm_unmapped_area_info info;
        const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);

        /* requested length too big for entire address space */
        if (len > mmap_end - mmap_min_addr)
                return -ENOMEM;

        if (flags & MAP_FIXED)
                return addr;

        /* requesting a specific address */
        if (addr) {
                addr = PAGE_ALIGN(addr);
                vma = find_vma_prev(mm, addr, &prev);
                if (mmap_end - len >= addr && addr >= mmap_min_addr &&
                                (!vma || addr + len <= vm_start_gap(vma)) &&
                                (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }

        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
        info.low_limit = PAGE_SIZE;
        info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
        info.align_mask = 0;
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);

        /*
         * A failed mmap() very likely causes application failure,
         * so fall back to the bottom-up function here. This scenario
         * can happen with large stack limits and large mmap()
         * allocations.
         */
        if (offset_in_page(addr)) {
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = TASK_UNMAPPED_BASE;
                info.high_limit = mmap_end;
                addr = vm_unmapped_area(&info);
        }

        return addr;
}

#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                               unsigned long len, unsigned long pgoff,
                               unsigned long flags)
{
        return generic_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
}
#endif

unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags)
{
        unsigned long (*get_area)(struct file *, unsigned long,
                                  unsigned long, unsigned long, unsigned long);

        unsigned long error = arch_mmap_check(addr, len, flags);
        if (error)
                return error;

        /* Careful about overflows.. */
        if (len > TASK_SIZE)
                return -ENOMEM;

        get_area = current->mm->get_unmapped_area;
        if (file) {
                if (file->f_op->get_unmapped_area)
                        get_area = file->f_op->get_unmapped_area;
        } else if (flags & MAP_SHARED) {
                /*
                 * mmap_region() will call shmem_zero_setup() to create a file,
                 * so use shmem's get_unmapped_area in case it can be huge.
                 */
                get_area = shmem_get_unmapped_area;
        } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                /* Ensures that larger anonymous mappings are THP aligned. */
                get_area = thp_get_unmapped_area;
        }

        /* Always treat pgoff as zero for anonymous memory. */
        if (!file)
                pgoff = 0;

        addr = get_area(file, addr, len, pgoff, flags);
        if (IS_ERR_VALUE(addr))
                return addr;

        if (addr > TASK_SIZE - len)
                return -ENOMEM;
        if (offset_in_page(addr))
                return -EINVAL;

        error = security_mmap_addr(addr);
        return error ? error : addr;
}

EXPORT_SYMBOL(get_unmapped_area);

/**
 * find_vma_intersection() - Look up the first VMA which intersects the interval
 * @mm: The process address space.
 * @start_addr: The inclusive start user address.
 * @end_addr: The exclusive end user address.
 *
 * Returns: The first VMA within the provided range, %NULL otherwise.  Assumes
 * start_addr < end_addr.
 */
struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
                                             unsigned long start_addr,
                                             unsigned long end_addr)
{
        unsigned long index = start_addr;

        mmap_assert_locked(mm);
        return mt_find(&mm->mm_mt, &index, end_addr - 1);
}
EXPORT_SYMBOL(find_vma_intersection);

/**
 * find_vma() - Find the VMA for a given address, or the next VMA.
 * @mm: The mm_struct to check
 * @addr: The address
 *
 * Returns: The VMA associated with addr, or the next VMA.
 * May return %NULL in the case of no VMA at addr or above.
 */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
        unsigned long index = addr;

        mmap_assert_locked(mm);
        return mt_find(&mm->mm_mt, &index, ULONG_MAX);
}
EXPORT_SYMBOL(find_vma);

/**
 * find_vma_prev() - Find the VMA for a given address, or the next vma and
 * set %pprev to the previous VMA, if any.
 * @mm: The mm_struct to check
 * @addr: The address
 * @pprev: The pointer to set to the previous VMA
 *
 * Note that RCU lock is missing here since the external mmap_lock() is used
 * instead.
 *
 * Returns: The VMA associated with @addr, or the next vma.
 * May return %NULL in the case of no vma at addr or above.
 */
struct vm_area_struct *
find_vma_prev(struct mm_struct *mm, unsigned long addr,
                        struct vm_area_struct **pprev)
{
        struct vm_area_struct *vma;
        MA_STATE(mas, &mm->mm_mt, addr, addr);

        vma = mas_walk(&mas);
        *pprev = mas_prev(&mas, 0);
        if (!vma)
                vma = mas_next(&mas, ULONG_MAX);
        return vma;
}

/*
 * Verify that the stack growth is acceptable and
 * update accounting. This is shared with both the
 * grow-up and grow-down cases.
 */
static int acct_stack_growth(struct vm_area_struct *vma,
                             unsigned long size, unsigned long grow)
{
        struct mm_struct *mm = vma->vm_mm;
        unsigned long new_start;

        /* address space limit tests */
        if (!may_expand_vm(mm, vma->vm_flags, grow))
                return -ENOMEM;

        /* Stack limit test */
        if (size > rlimit(RLIMIT_STACK))
                return -ENOMEM;

        /* mlock limit tests */
        if (!mlock_future_ok(mm, vma->vm_flags, grow << PAGE_SHIFT))
                return -ENOMEM;

        /* Check to ensure the stack will not grow into a hugetlb-only region */
        new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
                        vma->vm_end - size;
        if (is_hugepage_only_range(vma->vm_mm, new_start, size))
                return -EFAULT;

        /*
         * Overcommit..  This must be the final test, as it will
         * update security statistics.
         */
        if (security_vm_enough_memory_mm(mm, grow))
                return -ENOMEM;

        return 0;
}

#if defined(CONFIG_STACK_GROWSUP)
/*
 * PA-RISC uses this for its stack.
 * vma is the last one with address > vma->vm_end.  Have to extend vma.
 */
static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *next;
        unsigned long gap_addr;
        int error = 0;
        MA_STATE(mas, &mm->mm_mt, vma->vm_start, address);

        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;

        /* Guard against exceeding limits of the address space. */
        address &= PAGE_MASK;
        if (address >= (TASK_SIZE & PAGE_MASK))
                return -ENOMEM;
        address += PAGE_SIZE;

        /* Enforce stack_guard_gap */
        gap_addr = address + stack_guard_gap;

        /* Guard against overflow */
        if (gap_addr < address || gap_addr > TASK_SIZE)
                gap_addr = TASK_SIZE;

        next = find_vma_intersection(mm, vma->vm_end, gap_addr);
        if (next && vma_is_accessible(next)) {
                if (!(next->vm_flags & VM_GROWSUP))
                        return -ENOMEM;
                /* Check that both stack segments have the same anon_vma? */
        }

        if (next)
                mas_prev_range(&mas, address);

        __mas_set_range(&mas, vma->vm_start, address - 1);
        if (mas_preallocate(&mas, vma, GFP_KERNEL))
                return -ENOMEM;

        /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma))) {
                mas_destroy(&mas);
                return -ENOMEM;
        }

        /* Lock the VMA before expanding to prevent concurrent page faults */
        vma_start_write(vma);
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_lock in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
        anon_vma_lock_write(vma->anon_vma);

        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
                unsigned long size, grow;

                size = address - vma->vm_start;
                grow = (address - vma->vm_end) >> PAGE_SHIFT;

                error = -ENOMEM;
                if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
                        error = acct_stack_growth(vma, size, grow);
                        if (!error) {
                                /*
                                 * We only hold a shared mmap_lock lock here, so
                                 * we need to protect against concurrent vma
                                 * expansions.  anon_vma_lock_write() doesn't
                                 * help here, as we don't guarantee that all
                                 * growable vmas in a mm share the same root
                                 * anon vma.  So, we reuse mm->page_table_lock
                                 * to guard against concurrent vma expansions.
                                 */
                                spin_lock(&mm->page_table_lock);
                                if (vma->vm_flags & VM_LOCKED)
                                        mm->locked_vm += grow;
                                vm_stat_account(mm, vma->vm_flags, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                /* Overwrite old entry in mtree. */
                                mas_store_prealloc(&mas, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
                                spin_unlock(&mm->page_table_lock);

                                perf_event_mmap(vma);
                        }
                }
        }
        anon_vma_unlock_write(vma->anon_vma);
        mas_destroy(&mas);
        validate_mm(mm);
        return error;
}
#endif /* CONFIG_STACK_GROWSUP */

/*
 * vma is the first one with address < vma->vm_start.  Have to extend vma.
 * mmap_lock held for writing.
 */
int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
        struct mm_struct *mm = vma->vm_mm;
        MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
        struct vm_area_struct *prev;
        int error = 0;

        if (!(vma->vm_flags & VM_GROWSDOWN))
                return -EFAULT;

        address &= PAGE_MASK;
        if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
                return -EPERM;

        /* Enforce stack_guard_gap */
        prev = mas_prev(&mas, 0);
        /* Check that both stack segments have the same anon_vma? */
        if (prev) {
                if (!(prev->vm_flags & VM_GROWSDOWN) &&
                    vma_is_accessible(prev) &&
                    (address - prev->vm_end < stack_guard_gap))
                        return -ENOMEM;
        }

        if (prev)
                mas_next_range(&mas, vma->vm_start);

        __mas_set_range(&mas, address, vma->vm_end - 1);
        if (mas_preallocate(&mas, vma, GFP_KERNEL))
                return -ENOMEM;

        /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma))) {
                mas_destroy(&mas);
                return -ENOMEM;
        }

        /* Lock the VMA before expanding to prevent concurrent page faults */
        vma_start_write(vma);
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_lock in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
        anon_vma_lock_write(vma->anon_vma);

        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
                unsigned long size, grow;

                size = vma->vm_end - address;
                grow = (vma->vm_start - address) >> PAGE_SHIFT;

                error = -ENOMEM;
                if (grow <= vma->vm_pgoff) {
                        error = acct_stack_growth(vma, size, grow);
                        if (!error) {
                                /*
                                 * We only hold a shared mmap_lock lock here, so
                                 * we need to protect against concurrent vma
                                 * expansions.  anon_vma_lock_write() doesn't
                                 * help here, as we don't guarantee that all
                                 * growable vmas in a mm share the same root
                                 * anon vma.  So, we reuse mm->page_table_lock
                                 * to guard against concurrent vma expansions.
                                 */
                                spin_lock(&mm->page_table_lock);
                                if (vma->vm_flags & VM_LOCKED)
                                        mm->locked_vm += grow;
                                vm_stat_account(mm, vma->vm_flags, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                /* Overwrite old entry in mtree. */
                                mas_store_prealloc(&mas, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
                                spin_unlock(&mm->page_table_lock);

                                perf_event_mmap(vma);
                        }
                }
        }
        anon_vma_unlock_write(vma->anon_vma);
        mas_destroy(&mas);
        validate_mm(mm);
        return error;
}

/* enforced gap between the expanding stack and other mappings. */
unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;

static int __init cmdline_parse_stack_guard_gap(char *p)
{
        unsigned long val;
        char *endptr;

        val = simple_strtoul(p, &endptr, 10);
        if (!*endptr)
                stack_guard_gap = val << PAGE_SHIFT;

        return 1;
}
__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);

#ifdef CONFIG_STACK_GROWSUP
int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
{
        return expand_upwards(vma, address);
}

struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
{
        struct vm_area_struct *vma, *prev;

        addr &= PAGE_MASK;
        vma = find_vma_prev(mm, addr, &prev);
        if (vma && (vma->vm_start <= addr))
                return vma;
        if (!prev)
                return NULL;
        if (expand_stack_locked(prev, addr))
                return NULL;
        if (prev->vm_flags & VM_LOCKED)
                populate_vma_page_range(prev, addr, prev->vm_end, NULL);
        return prev;
}
#else
int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
{
        return expand_downwards(vma, address);
}

struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
{
        struct vm_area_struct *vma;
        unsigned long start;

        addr &= PAGE_MASK;
        vma = find_vma(mm, addr);
        if (!vma)
                return NULL;
        if (vma->vm_start <= addr)
                return vma;
        start = vma->vm_start;
        if (expand_stack_locked(vma, addr))
                return NULL;
        if (vma->vm_flags & VM_LOCKED)
                populate_vma_page_range(vma, addr, start, NULL);
        return vma;
}
#endif

#if defined(CONFIG_STACK_GROWSUP)

#define vma_expand_up(vma,addr) expand_upwards(vma, addr)
#define vma_expand_down(vma, addr) (-EFAULT)

#else

#define vma_expand_up(vma,addr) (-EFAULT)
#define vma_expand_down(vma, addr) expand_downwards(vma, addr)

#endif

/*
 * expand_stack(): legacy interface for page faulting. Don't use unless
 * you have to.
 *
 * This is called with the mm locked for reading, drops the lock, takes
 * the lock for writing, tries to look up a vma again, expands it if
 * necessary, and downgrades the lock to reading again.
 *
 * If no vma is found or it can't be expanded, it returns NULL and has
 * dropped the lock.
 */
struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
{
        struct vm_area_struct *vma, *prev;

        mmap_read_unlock(mm);
        if (mmap_write_lock_killable(mm))
                return NULL;

        vma = find_vma_prev(mm, addr, &prev);
        if (vma && vma->vm_start <= addr)
                goto success;

        if (prev && !vma_expand_up(prev, addr)) {
                vma = prev;
                goto success;
        }

        if (vma && !vma_expand_down(vma, addr))
                goto success;

        mmap_write_unlock(mm);
        return NULL;

success:
        mmap_write_downgrade(mm);
        return vma;
}

/*
 * Ok - we have the memory areas we should free on a maple tree so release them,
 * and do the vma updates.
 *
 * Called with the mm semaphore held.
 */
static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
{
        unsigned long nr_accounted = 0;
        struct vm_area_struct *vma;

        /* Update high watermark before we lower total_vm */
        update_hiwater_vm(mm);
        mas_for_each(mas, vma, ULONG_MAX) {
                long nrpages = vma_pages(vma);

                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += nrpages;
                vm_stat_account(mm, vma->vm_flags, -nrpages);
                remove_vma(vma, false);
        }
        vm_unacct_memory(nr_accounted);
}

/*
 * Get rid of page table information in the indicated region.
 *
 * Called with the mm semaphore held.
 */
static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                struct vm_area_struct *next, unsigned long start,
                unsigned long end, unsigned long tree_end, bool mm_wr_locked)
{
        struct mmu_gather tlb;
        unsigned long mt_start = mas->index;

        lru_add_drain();
        tlb_gather_mmu(&tlb, mm);
        update_hiwater_rss(mm);
        unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked);
        mas_set(mas, mt_start);
        free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
                                 next ? next->vm_start : USER_PGTABLES_CEILING,
                                 mm_wr_locked);
        tlb_finish_mmu(&tlb);
}

/*
 * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
 * has already been checked or doesn't make sense to fail.
 * VMA Iterator will point to the end VMA.
 */
static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
                       unsigned long addr, int new_below)
{
        struct vma_prepare vp;
        struct vm_area_struct *new;
        int err;

        WARN_ON(vma->vm_start >= addr);
        WARN_ON(vma->vm_end <= addr);

        if (vma->vm_ops && vma->vm_ops->may_split) {
                err = vma->vm_ops->may_split(vma, addr);
                if (err)
                        return err;
        }

        new = vm_area_dup(vma);
        if (!new)
                return -ENOMEM;

        if (new_below) {
                new->vm_end = addr;
        } else {
                new->vm_start = addr;
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }

        err = -ENOMEM;
        vma_iter_config(vmi, new->vm_start, new->vm_end);
        if (vma_iter_prealloc(vmi, new))
                goto out_free_vma;

        err = vma_dup_policy(vma, new);
        if (err)
                goto out_free_vmi;

        err = anon_vma_clone(new, vma);
        if (err)
                goto out_free_mpol;

        if (new->vm_file)
                get_file(new->vm_file);

        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);

        vma_start_write(vma);
        vma_start_write(new);

        init_vma_prep(&vp, vma);
        vp.insert = new;
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, vma->vm_start, addr, 0);

        if (new_below) {
                vma->vm_start = addr;
                vma->vm_pgoff += (addr - new->vm_start) >> PAGE_SHIFT;
        } else {
                vma->vm_end = addr;
        }

        /* vma_complete stores the new vma */
        vma_complete(&vp, vmi, vma->vm_mm);

        /* Success. */
        if (new_below)
                vma_next(vmi);
        return 0;

out_free_mpol:
        mpol_put(vma_policy(new));
out_free_vmi:
        vma_iter_free(vmi);
out_free_vma:
        vm_area_free(new);
        return err;
}

/*
 * Split a vma into two pieces at address 'addr', a new vma is allocated
 * either for the first part or the tail.
 */
static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
                     unsigned long addr, int new_below)
{
        if (vma->vm_mm->map_count >= sysctl_max_map_count)
                return -ENOMEM;

        return __split_vma(vmi, vma, addr, new_below);
}

/*
 * We are about to modify one or multiple of a VMA's flags, policy, userfaultfd
 * context and anonymous VMA name within the range [start, end).
 *
 * As a result, we might be able to merge the newly modified VMA range with an
 * adjacent VMA with identical properties.
 *
 * If no merge is possible and the range does not span the entirety of the VMA,
 * we then need to split the VMA to accommodate the change.
 *
 * The function returns either the merged VMA, the original VMA if a split was
 * required instead, or an error if the split failed.
 */
struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
                                  struct vm_area_struct *prev,
                                  struct vm_area_struct *vma,
                                  unsigned long start, unsigned long end,
                                  unsigned long vm_flags,
                                  struct mempolicy *policy,
                                  struct vm_userfaultfd_ctx uffd_ctx,
                                  struct anon_vma_name *anon_name)
{
        pgoff_t pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
        struct vm_area_struct *merged;

        merged = vma_merge(vmi, prev, vma, start, end, vm_flags,
                           pgoff, policy, uffd_ctx, anon_name);
        if (merged)
                return merged;

        if (vma->vm_start < start) {
                int err = split_vma(vmi, vma, start, 1);

                if (err)
                        return ERR_PTR(err);
        }

        if (vma->vm_end > end) {
                int err = split_vma(vmi, vma, end, 0);

                if (err)
                        return ERR_PTR(err);
        }

        return vma;
}

/*
 * Attempt to merge a newly mapped VMA with those adjacent to it. The caller
 * must ensure that [start, end) does not overlap any existing VMA.
 */
static struct vm_area_struct
*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
                   struct vm_area_struct *vma, unsigned long start,
                   unsigned long end, pgoff_t pgoff)
{
        return vma_merge(vmi, prev, vma, start, end, vma->vm_flags, pgoff,
                         vma_policy(vma), vma->vm_userfaultfd_ctx, anon_vma_name(vma));
}

/*
 * Expand vma by delta bytes, potentially merging with an immediately adjacent
 * VMA with identical properties.
 */
struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
                                        struct vm_area_struct *vma,
                                        unsigned long delta)
{
        pgoff_t pgoff = vma->vm_pgoff + vma_pages(vma);

        /* vma is specified as prev, so case 1 or 2 will apply. */
        return vma_merge(vmi, vma, vma, vma->vm_end, vma->vm_end + delta,
                         vma->vm_flags, pgoff, vma_policy(vma),
                         vma->vm_userfaultfd_ctx, anon_vma_name(vma));
}

/*
 * do_vmi_align_munmap() - munmap the aligned region from @start to @end.
 * @vmi: The vma iterator
 * @vma: The starting vm_area_struct
 * @mm: The mm_struct
 * @start: The aligned start address to munmap.
 * @end: The aligned end address to munmap.
 * @uf: The userfaultfd list_head
 * @unlock: Set to true to drop the mmap_lock.  unlocking only happens on
 * success.
 *
 * Return: 0 on success and drops the lock if so directed, error and leaves the
 * lock held otherwise.
 */
static int
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                    struct mm_struct *mm, unsigned long start,
                    unsigned long end, struct list_head *uf, bool unlock)
{
        struct vm_area_struct *prev, *next = NULL;
        struct maple_tree mt_detach;
        int count = 0;
        int error = -ENOMEM;
        unsigned long locked_vm = 0;
        MA_STATE(mas_detach, &mt_detach, 0, 0);
        mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
        mt_on_stack(mt_detach);

        /*
         * If we need to split any vma, do it now to save pain later.
         *
         * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
         * unmapped vm_area_struct will remain in use: so lower split_vma
         * places tmp vma above, and higher split_vma places tmp vma below.
         */

        /* Does it split the first one? */
        if (start > vma->vm_start) {

                /*
                 * Make sure that map_count on return from munmap() will
                 * not exceed its limit; but let map_count go just above
                 * its limit temporarily, to help free resources as expected.
                 */
                if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
                        goto map_count_exceeded;

                error = __split_vma(vmi, vma, start, 1);
                if (error)
                        goto start_split_failed;
        }

        /*
         * Detach a range of VMAs from the mm. Using next as a temp variable as
         * it is always overwritten.
         */
        next = vma;
        do {
                /* Does it split the end? */
                if (next->vm_end > end) {
                        error = __split_vma(vmi, next, end, 0);
                        if (error)
                                goto end_split_failed;
                }
                vma_start_write(next);
                mas_set(&mas_detach, count);
                error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
                if (error)
                        goto munmap_gather_failed;
                vma_mark_detached(next, true);
                if (next->vm_flags & VM_LOCKED)
                        locked_vm += vma_pages(next);

                count++;
                if (unlikely(uf)) {
                        /*
                         * If userfaultfd_unmap_prep returns an error the vmas
                         * will remain split, but userland will get a
                         * highly unexpected error anyway. This is no
                         * different than the case where the first of the two
                         * __split_vma fails, but we don't undo the first
                         * split, despite we could. This is unlikely enough
                         * failure that it's not worth optimizing it for.
                         */
                        error = userfaultfd_unmap_prep(next, start, end, uf);

                        if (error)
                                goto userfaultfd_error;
                }
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
                BUG_ON(next->vm_start < start);
                BUG_ON(next->vm_start > end);
#endif
        } for_each_vma_range(*vmi, next, end);

#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
        /* Make sure no VMAs are about to be lost. */
        {
                MA_STATE(test, &mt_detach, 0, 0);
                struct vm_area_struct *vma_mas, *vma_test;
                int test_count = 0;

                vma_iter_set(vmi, start);
                rcu_read_lock();
                vma_test = mas_find(&test, count - 1);
                for_each_vma_range(*vmi, vma_mas, end) {
                        BUG_ON(vma_mas != vma_test);
                        test_count++;
                        vma_test = mas_next(&test, count - 1);
                }
                rcu_read_unlock();
                BUG_ON(count != test_count);
        }
#endif

        while (vma_iter_addr(vmi) > start)
                vma_iter_prev_range(vmi);

        error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
        if (error)
                goto clear_tree_failed;

        /* Point of no return */
        mm->locked_vm -= locked_vm;
        mm->map_count -= count;
        if (unlock)
                mmap_write_downgrade(mm);

        prev = vma_iter_prev_range(vmi);
        next = vma_next(vmi);
        if (next)
                vma_iter_prev_range(vmi);

        /*
         * We can free page tables without write-locking mmap_lock because VMAs
         * were isolated before we downgraded mmap_lock.
         */
        mas_set(&mas_detach, 1);
        unmap_region(mm, &mas_detach, vma, prev, next, start, end, count,
                     !unlock);
        /* Statistics and freeing VMAs */
        mas_set(&mas_detach, 0);
        remove_mt(mm, &mas_detach);
        validate_mm(mm);
        if (unlock)
                mmap_read_unlock(mm);

        __mt_destroy(&mt_detach);
        return 0;

clear_tree_failed:
userfaultfd_error:
munmap_gather_failed:
end_split_failed:
        mas_set(&mas_detach, 0);
        mas_for_each(&mas_detach, next, end)
                vma_mark_detached(next, false);

        __mt_destroy(&mt_detach);
start_split_failed:
map_count_exceeded:
        validate_mm(mm);
        return error;
}

/*
 * do_vmi_munmap() - munmap a given range.
 * @vmi: The vma iterator
 * @mm: The mm_struct
 * @start: The start address to munmap
 * @len: The length of the range to munmap
 * @uf: The userfaultfd list_head
 * @unlock: set to true if the user wants to drop the mmap_lock on success
 *
 * This function takes a @mas that is either pointing to the previous VMA or set
 * to MA_START and sets it up to remove the mapping(s).  The @len will be
 * aligned and any arch_unmap work will be preformed.
 *
 * Return: 0 on success and drops the lock if so directed, error and leaves the
 * lock held otherwise.
 */
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
                  unsigned long start, size_t len, struct list_head *uf,
                  bool unlock)
{
        unsigned long end;
        struct vm_area_struct *vma;

        if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
                return -EINVAL;

        end = start + PAGE_ALIGN(len);
        if (end == start)
                return -EINVAL;

         /* arch_unmap() might do unmaps itself.  */
        arch_unmap(mm, start, end);

        /* Find the first overlapping VMA */
        vma = vma_find(vmi, end);
        if (!vma) {
                if (unlock)
                        mmap_write_unlock(mm);
                return 0;
        }

        return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}

/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
 * @mm: The mm_struct
 * @start: The start address to munmap
 * @len: The length to be munmapped.
 * @uf: The userfaultfd list_head
 *
 * Return: 0 on success, error otherwise.
 */
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
              struct list_head *uf)
{
        VMA_ITERATOR(vmi, mm, start);

        return do_vmi_munmap(&vmi, mm, start, len, uf, false);
}

unsigned long mmap_region(struct file *file, unsigned long addr,
                unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
                struct list_head *uf)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma = NULL;
        struct vm_area_struct *next, *prev, *merge;
        pgoff_t pglen = len >> PAGE_SHIFT;
        unsigned long charged = 0;
        unsigned long end = addr + len;
        unsigned long merge_start = addr, merge_end = end;
        bool writable_file_mapping = false;
        pgoff_t vm_pgoff;
        int error;
        VMA_ITERATOR(vmi, mm, addr);

        /* Check against address space limit. */
        if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
                unsigned long nr_pages;

                /*
                 * MAP_FIXED may remove pages of mappings that intersects with
                 * requested mapping. Account for the pages it would unmap.
                 */
                nr_pages = count_vma_pages_range(mm, addr, end);

                if (!may_expand_vm(mm, vm_flags,
                                        (len >> PAGE_SHIFT) - nr_pages))
                        return -ENOMEM;
        }

        /* Unmap any existing mapping in the area */
        if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
                return -ENOMEM;

        /*
         * Private writable mapping: check memory availability
         */
        if (accountable_mapping(file, vm_flags)) {
                charged = len >> PAGE_SHIFT;
                if (security_vm_enough_memory_mm(mm, charged))
                        return -ENOMEM;
                vm_flags |= VM_ACCOUNT;
        }

        next = vma_next(&vmi);
        prev = vma_prev(&vmi);
        if (vm_flags & VM_SPECIAL) {
                if (prev)
                        vma_iter_next_range(&vmi);
                goto cannot_expand;
        }

        /* Attempt to expand an old mapping */
        /* Check next */
        if (next && next->vm_start == end && !vma_policy(next) &&
            can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
                                 NULL_VM_UFFD_CTX, NULL)) {
                merge_end = next->vm_end;
                vma = next;
                vm_pgoff = next->vm_pgoff - pglen;
        }

        /* Check prev */
        if (prev && prev->vm_end == addr && !vma_policy(prev) &&
            (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
                                       pgoff, vma->vm_userfaultfd_ctx, NULL) :
                   can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
                                       NULL_VM_UFFD_CTX, NULL))) {
                merge_start = prev->vm_start;
                vma = prev;
                vm_pgoff = prev->vm_pgoff;
        } else if (prev) {
                vma_iter_next_range(&vmi);
        }

        /* Actually expand, if possible */
        if (vma &&
            !vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
                khugepaged_enter_vma(vma, vm_flags);
                goto expanded;
        }

        if (vma == prev)
                vma_iter_set(&vmi, addr);
cannot_expand:

        /*
         * Determine the object being mapped and call the appropriate
         * specific mapper. the address has already been validated, but
         * not unmapped, but the maps are removed from the list.
         */
        vma = vm_area_alloc(mm);
        if (!vma) {
                error = -ENOMEM;
                goto unacct_error;
        }

        vma_iter_config(&vmi, addr, end);
        vma_set_range(vma, addr, end, pgoff);
        vm_flags_init(vma, vm_flags);
        vma->vm_page_prot = vm_get_page_prot(vm_flags);

        if (file) {
                vma->vm_file = get_file(file);
                error = call_mmap(file, vma);
                if (error)
                        goto unmap_and_free_vma;

                if (vma_is_shared_maywrite(vma)) {
                        error = mapping_map_writable(file->f_mapping);
                        if (error)
                                goto close_and_free_vma;

                        writable_file_mapping = true;
                }

                /*
                 * Expansion is handled above, merging is handled below.
                 * Drivers should not alter the address of the VMA.
                 */
                error = -EINVAL;
                if (WARN_ON((addr != vma->vm_start)))
                        goto close_and_free_vma;

                vma_iter_config(&vmi, addr, end);
                /*
                 * If vm_flags changed after call_mmap(), we should try merge
                 * vma again as we may succeed this time.
                 */
                if (unlikely(vm_flags != vma->vm_flags && prev)) {
                        merge = vma_merge_new_vma(&vmi, prev, vma,
                                                  vma->vm_start, vma->vm_end,
                                                  vma->vm_pgoff);
                        if (merge) {
                                /*
                                 * ->mmap() can change vma->vm_file and fput
                                 * the original file. So fput the vma->vm_file
                                 * here or we would add an extra fput for file
                                 * and cause general protection fault
                                 * ultimately.
                                 */
                                fput(vma->vm_file);
                                vm_area_free(vma);
                                vma = merge;
                                /* Update vm_flags to pick up the change. */
                                vm_flags = vma->vm_flags;
                                goto unmap_writable;
                        }
                }

                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
                error = shmem_zero_setup(vma);
                if (error)
                        goto free_vma;
        } else {
                vma_set_anonymous(vma);
        }

        if (map_deny_write_exec(vma, vma->vm_flags)) {
                error = -EACCES;
                goto close_and_free_vma;
        }

        /* Allow architectures to sanity-check the vm_flags */
        error = -EINVAL;
        if (!arch_validate_flags(vma->vm_flags))
                goto close_and_free_vma;

        error = -ENOMEM;
        if (vma_iter_prealloc(&vmi, vma))
                goto close_and_free_vma;

        /* Lock the VMA since it is modified after insertion into VMA tree */
        vma_start_write(vma);
        vma_iter_store(&vmi, vma);
        mm->map_count++;
        vma_link_file(vma);

        /*
         * vma_merge() calls khugepaged_enter_vma() either, the below
         * call covers the non-merge case.
         */
        khugepaged_enter_vma(vma, vma->vm_flags);

        /* Once vma denies write, undo our temporary denial count */
unmap_writable:
        if (writable_file_mapping)
                mapping_unmap_writable(file->f_mapping);
        file = vma->vm_file;
        ksm_add_vma(vma);
expanded:
        perf_event_mmap(vma);

        vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
                if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
                                        is_vm_hugetlb_page(vma) ||
                                        vma == get_gate_vma(current->mm))
                        vm_flags_clear(vma, VM_LOCKED_MASK);
                else
                        mm->locked_vm += (len >> PAGE_SHIFT);
        }

        if (file)
                uprobe_mmap(vma);

        /*
         * New (or expanded) vma always get soft dirty status.
         * Otherwise user-space soft-dirty page tracker won't
         * be able to distinguish situation when vma area unmapped,
         * then new mapped in-place (which must be aimed as
         * a completely new data area).
         */
        vm_flags_set(vma, VM_SOFTDIRTY);

        vma_set_page_prot(vma);

        validate_mm(mm);
        return addr;

close_and_free_vma:
        if (file && vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);

        if (file || vma->vm_file) {
unmap_and_free_vma:
                fput(vma->vm_file);
                vma->vm_file = NULL;

                vma_iter_set(&vmi, vma->vm_end);
                /* Undo any partial mapping done by a device driver. */
                unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
                             vma->vm_end, vma->vm_end, true);
        }
        if (writable_file_mapping)
                mapping_unmap_writable(file->f_mapping);
free_vma:
        vm_area_free(vma);
unacct_error:
        if (charged)
                vm_unacct_memory(charged);
        validate_mm(mm);
        return error;
}

static int __vm_munmap(unsigned long start, size_t len, bool unlock)
{
        int ret;
        struct mm_struct *mm = current->mm;
        LIST_HEAD(uf);
        VMA_ITERATOR(vmi, mm, start);

        if (mmap_write_lock_killable(mm))
                return -EINTR;

        ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock);
        if (ret || !unlock)
                mmap_write_unlock(mm);

        userfaultfd_unmap_complete(mm, &uf);
        return ret;
}

int vm_munmap(unsigned long start, size_t len)
{
        return __vm_munmap(start, len, false);
}
EXPORT_SYMBOL(vm_munmap);

SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
        addr = untagged_addr(addr);
        return __vm_munmap(addr, len, true);
}


/*
 * Emulation of deprecated remap_file_pages() syscall.
 */
SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
{

        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long populate = 0;
        unsigned long ret = -EINVAL;
        struct file *file;

        pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n",
                     current->comm, current->pid);

        if (prot)
                return ret;
        start = start & PAGE_MASK;
        size = size & PAGE_MASK;

        if (start + size <= start)
                return ret;

        /* Does pgoff wrap? */
        if (pgoff + (size >> PAGE_SHIFT) < pgoff)
                return ret;

        if (mmap_write_lock_killable(mm))
                return -EINTR;

        vma = vma_lookup(mm, start);

        if (!vma || !(vma->vm_flags & VM_SHARED))
                goto out;

        if (start + size > vma->vm_end) {
                VMA_ITERATOR(vmi, mm, vma->vm_end);
                struct vm_area_struct *next, *prev = vma;

                for_each_vma_range(vmi, next, start + size) {
                        /* hole between vmas ? */
                        if (next->vm_start != prev->vm_end)
                                goto out;

                        if (next->vm_file != vma->vm_file)
                                goto out;

                        if (next->vm_flags != vma->vm_flags)
                                goto out;

                        if (start + size <= next->vm_end)
                                break;

                        prev = next;
                }

                if (!next)
                        goto out;
        }

        prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
        prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
        prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;

        flags &= MAP_NONBLOCK;
        flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
        if (vma->vm_flags & VM_LOCKED)
                flags |= MAP_LOCKED;

        file = get_file(vma->vm_file);
        ret = do_mmap(vma->vm_file, start, size,
                        prot, flags, 0, pgoff, &populate, NULL);
        fput(file);
out:
        mmap_write_unlock(mm);
        if (populate)
                mm_populate(ret, populate);
        if (!IS_ERR_VALUE(ret))
                ret = 0;
        return ret;
}

/*
 * do_vma_munmap() - Unmap a full or partial vma.
 * @vmi: The vma iterator pointing at the vma
 * @vma: The first vma to be munmapped
 * @start: the start of the address to unmap
 * @end: The end of the address to unmap
 * @uf: The userfaultfd list_head
 * @unlock: Drop the lock on success
 *
 * unmaps a VMA mapping when the vma iterator is already in position.
 * Does not handle alignment.
 *
 * Return: 0 on success drops the lock of so directed, error on failure and will
 * still hold the lock.
 */
int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                unsigned long start, unsigned long end, struct list_head *uf,
                bool unlock)
{
        struct mm_struct *mm = vma->vm_mm;

        arch_unmap(mm, start, end);
        return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}

/*
 * do_brk_flags() - Increase the brk vma if the flags match.
 * @vmi: The vma iterator
 * @addr: The start address
 * @len: The length of the increase
 * @vma: The vma,
 * @flags: The VMA Flags
 *
 * Extend the brk VMA from addr to addr + len.  If the VMA is NULL or the flags
 * do not match then create a new anonymous VMA.  Eventually we may be able to
 * do some brk-specific accounting here.
 */
static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
                unsigned long addr, unsigned long len, unsigned long flags)
{
        struct mm_struct *mm = current->mm;
        struct vma_prepare vp;

        /*
         * Check against address space limits by the changed size
         * Note: This happens *after* clearing old mappings in some code paths.
         */
        flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
        if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
                return -ENOMEM;

        if (mm->map_count > sysctl_max_map_count)
                return -ENOMEM;

        if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;

        /*
         * Expand the existing vma if possible; Note that singular lists do not
         * occur after forking, so the expand will only happen on new VMAs.
         */
        if (vma && vma->vm_end == addr && !vma_policy(vma) &&
            can_vma_merge_after(vma, flags, NULL, NULL,
                                addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
                vma_iter_config(vmi, vma->vm_start, addr + len);
                if (vma_iter_prealloc(vmi, vma))
                        goto unacct_fail;

                vma_start_write(vma);

                init_vma_prep(&vp, vma);
                vma_prepare(&vp);
                vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
                vma->vm_end = addr + len;
                vm_flags_set(vma, VM_SOFTDIRTY);
                vma_iter_store(vmi, vma);

                vma_complete(&vp, vmi, mm);
                khugepaged_enter_vma(vma, flags);
                goto out;
        }

        if (vma)
                vma_iter_next_range(vmi);
        /* create a vma struct for an anonymous mapping */
        vma = vm_area_alloc(mm);
        if (!vma)
                goto unacct_fail;

        vma_set_anonymous(vma);
        vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
        vm_flags_init(vma, flags);
        vma->vm_page_prot = vm_get_page_prot(flags);
        vma_start_write(vma);
        if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
                goto mas_store_fail;

        mm->map_count++;
        validate_mm(mm);
        ksm_add_vma(vma);
out:
        perf_event_mmap(vma);
        mm->total_vm += len >> PAGE_SHIFT;
        mm->data_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
        vm_flags_set(vma, VM_SOFTDIRTY);
        return 0;

mas_store_fail:
        vm_area_free(vma);
unacct_fail:
        vm_unacct_memory(len >> PAGE_SHIFT);
        return -ENOMEM;
}

int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma = NULL;
        unsigned long len;
        int ret;
        bool populate;
        LIST_HEAD(uf);
        VMA_ITERATOR(vmi, mm, addr);

        len = PAGE_ALIGN(request);
        if (len < request)
                return -ENOMEM;
        if (!len)
                return 0;

        /* Until we need other flags, refuse anything except VM_EXEC. */
        if ((flags & (~VM_EXEC)) != 0)
                return -EINVAL;

        if (mmap_write_lock_killable(mm))
                return -EINTR;

        ret = check_brk_limits(addr, len);
        if (ret)
                goto limits_failed;

        ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0);
        if (ret)
                goto munmap_failed;

        vma = vma_prev(&vmi);
        ret = do_brk_flags(&vmi, vma, addr, len, flags);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        mmap_write_unlock(mm);
        userfaultfd_unmap_complete(mm, &uf);
        if (populate && !ret)
                mm_populate(addr, len);
        return ret;

munmap_failed:
limits_failed:
        mmap_write_unlock(mm);
        return ret;
}
EXPORT_SYMBOL(vm_brk_flags);

/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
        MA_STATE(mas, &mm->mm_mt, 0, 0);
        int count = 0;

        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);

        mmap_read_lock(mm);
        arch_exit_mmap(mm);

        vma = mas_find(&mas, ULONG_MAX);
        if (!vma || unlikely(xa_is_zero(vma))) {
                /* Can happen if dup_mmap() received an OOM */
                mmap_read_unlock(mm);
                mmap_write_lock(mm);
                goto destroy;
        }

        lru_add_drain();
        flush_cache_mm(mm);
        tlb_gather_mmu_fullmm(&tlb, mm);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
        unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
        mmap_read_unlock(mm);

        /*
         * Set MMF_OOM_SKIP to hide this task from the oom killer/reaper
         * because the memory has been already freed.
         */
        set_bit(MMF_OOM_SKIP, &mm->flags);
        mmap_write_lock(mm);
        mt_clear_in_rcu(&mm->mm_mt);
        mas_set(&mas, vma->vm_end);
        free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
                      USER_PGTABLES_CEILING, true);
        tlb_finish_mmu(&tlb);

        /*
         * Walk the list again, actually closing and freeing it, with preemption
         * enabled, without holding any MM locks besides the unreachable
         * mmap_write_lock.
         */
        mas_set(&mas, vma->vm_end);
        do {
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
                remove_vma(vma, true);
                count++;
                cond_resched();
                vma = mas_find(&mas, ULONG_MAX);
        } while (vma && likely(!xa_is_zero(vma)));

        BUG_ON(count != mm->map_count);

        trace_exit_mmap(mm);
destroy:
        __mt_destroy(&mm->mm_mt);
        mmap_write_unlock(mm);
        vm_unacct_memory(nr_accounted);
}

/* Insert vm structure into process list sorted by address
 * and into the inode's i_mmap tree.  If vm_file is non-NULL
 * then i_mmap_rwsem is taken here.
 */
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
        unsigned long charged = vma_pages(vma);


        if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
                return -ENOMEM;

        if ((vma->vm_flags & VM_ACCOUNT) &&
             security_vm_enough_memory_mm(mm, charged))
                return -ENOMEM;

        /*
         * The vm_pgoff of a purely anonymous vma should be irrelevant
         * until its first write fault, when page's anon_vma and index
         * are set.  But now set the vm_pgoff it will almost certainly
         * end up with (unless mremap moves it elsewhere before that
         * first wfault), so /proc/pid/maps tells a consistent story.
         *
         * By setting it to reflect the virtual start address of the
         * vma, merges and splits can happen in a seamless way, just
         * using the existing file pgoff checks and manipulations.
         * Similarly in do_mmap and in do_brk_flags.
         */
        if (vma_is_anonymous(vma)) {
                BUG_ON(vma->anon_vma);
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }

        if (vma_link(mm, vma)) {
                if (vma->vm_flags & VM_ACCOUNT)
                        vm_unacct_memory(charged);
                return -ENOMEM;
        }

        return 0;
}

/*
 * Copy the vma structure to a new location in the same mm,
 * prior to moving page table entries, to effect an mremap move.
 */
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        unsigned long addr, unsigned long len, pgoff_t pgoff,
        bool *need_rmap_locks)
{
        struct vm_area_struct *vma = *vmap;
        unsigned long vma_start = vma->vm_start;
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma, *prev;
        bool faulted_in_anon_vma = true;
        VMA_ITERATOR(vmi, mm, addr);

        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
         */
        if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
                pgoff = addr >> PAGE_SHIFT;
                faulted_in_anon_vma = false;
        }

        new_vma = find_vma_prev(mm, addr, &prev);
        if (new_vma && new_vma->vm_start < addr + len)
                return NULL;        /* should never get here */

        new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff);
        if (new_vma) {
                /*
                 * Source vma may have been merged into new_vma
                 */
                if (unlikely(vma_start >= new_vma->vm_start &&
                             vma_start < new_vma->vm_end)) {
                        /*
                         * The only way we can get a vma_merge with
                         * self during an mremap is if the vma hasn't
                         * been faulted in yet and we were allowed to
                         * reset the dst vma->vm_pgoff to the
                         * destination address of the mremap to allow
                         * the merge to happen. mremap must change the
                         * vm_pgoff linearity between src and dst vmas
                         * (in turn preventing a vma_merge) to be
                         * safe. It is only safe to keep the vm_pgoff
                         * linear if there are no pages mapped yet.
                         */
                        VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
                        *vmap = vma = new_vma;
                }
                *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
        } else {
                new_vma = vm_area_dup(vma);
                if (!new_vma)
                        goto out;
                vma_set_range(new_vma, addr, addr + len, pgoff);
                if (vma_dup_policy(vma, new_vma))
                        goto out_free_vma;
                if (anon_vma_clone(new_vma, vma))
                        goto out_free_mempol;
                if (new_vma->vm_file)
                        get_file(new_vma->vm_file);
                if (new_vma->vm_ops && new_vma->vm_ops->open)
                        new_vma->vm_ops->open(new_vma);
                if (vma_link(mm, new_vma))
                        goto out_vma_link;
                *need_rmap_locks = false;
        }
        return new_vma;

out_vma_link:
        if (new_vma->vm_ops && new_vma->vm_ops->close)
                new_vma->vm_ops->close(new_vma);

        if (new_vma->vm_file)
                fput(new_vma->vm_file);

        unlink_anon_vmas(new_vma);
out_free_mempol:
        mpol_put(vma_policy(new_vma));
out_free_vma:
        vm_area_free(new_vma);
out:
        return NULL;
}

/*
 * Return true if the calling process may expand its vm space by the passed
 * number of pages
 */
bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
{
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                return false;

        if (is_data_mapping(flags) &&
            mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
                /* Workaround for Valgrind */
                if (rlimit(RLIMIT_DATA) == 0 &&
                    mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
                        return true;

                pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
                             current->comm, current->pid,
                             (mm->data_vm + npages) << PAGE_SHIFT,
                             rlimit(RLIMIT_DATA),
                             ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");

                if (!ignore_rlimit_data)
                        return false;
        }

        return true;
}

void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
{
        WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);

        if (is_exec_mapping(flags))
                mm->exec_vm += npages;
        else if (is_stack_mapping(flags))
                mm->stack_vm += npages;
        else if (is_data_mapping(flags))
                mm->data_vm += npages;
}

static vm_fault_t special_mapping_fault(struct vm_fault *vmf);

/*
 * Having a close hook prevents vma merging regardless of flags.
 */
static void special_mapping_close(struct vm_area_struct *vma)
{
}

static const char *special_mapping_name(struct vm_area_struct *vma)
{
        return ((struct vm_special_mapping *)vma->vm_private_data)->name;
}

static int special_mapping_mremap(struct vm_area_struct *new_vma)
{
        struct vm_special_mapping *sm = new_vma->vm_private_data;

        if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
                return -EFAULT;

        if (sm->mremap)
                return sm->mremap(sm, new_vma);

        return 0;
}

static int special_mapping_split(struct vm_area_struct *vma, unsigned long addr)
{
        /*
         * Forbid splitting special mappings - kernel has expectations over
         * the number of pages in mapping. Together with VM_DONTEXPAND
         * the size of vma should stay the same over the special mapping's
         * lifetime.
         */
        return -EINVAL;
}

static const struct vm_operations_struct special_mapping_vmops = {
        .close = special_mapping_close,
        .fault = special_mapping_fault,
        .mremap = special_mapping_mremap,
        .name = special_mapping_name,
        /* vDSO code relies that VVAR can't be accessed remotely */
        .access = NULL,
        .may_split = special_mapping_split,
};

static const struct vm_operations_struct legacy_special_mapping_vmops = {
        .close = special_mapping_close,
        .fault = special_mapping_fault,
};

static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        pgoff_t pgoff;
        struct page **pages;

        if (vma->vm_ops == &legacy_special_mapping_vmops) {
                pages = vma->vm_private_data;
        } else {
                struct vm_special_mapping *sm = vma->vm_private_data;

                if (sm->fault)
                        return sm->fault(sm, vmf->vma, vmf);

                pages = sm->pages;
        }

        for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                pgoff--;

        if (*pages) {
                struct page *page = *pages;
                get_page(page);
                vmf->page = page;
                return 0;
        }

        return VM_FAULT_SIGBUS;
}

static struct vm_area_struct *__install_special_mapping(
        struct mm_struct *mm,
        unsigned long addr, unsigned long len,
        unsigned long vm_flags, void *priv,
        const struct vm_operations_struct *ops)
{
        int ret;
        struct vm_area_struct *vma;

        vma = vm_area_alloc(mm);
        if (unlikely(vma == NULL))
                return ERR_PTR(-ENOMEM);

        vma_set_range(vma, addr, addr + len, 0);
        vm_flags_init(vma, (vm_flags | mm->def_flags |
                      VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);

        vma->vm_ops = ops;
        vma->vm_private_data = priv;

        ret = insert_vm_struct(mm, vma);
        if (ret)
                goto out;

        vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);

        perf_event_mmap(vma);

        return vma;

out:
        vm_area_free(vma);
        return ERR_PTR(ret);
}

bool vma_is_special_mapping(const struct vm_area_struct *vma,
        const struct vm_special_mapping *sm)
{
        return vma->vm_private_data == sm &&
                (vma->vm_ops == &special_mapping_vmops ||
                 vma->vm_ops == &legacy_special_mapping_vmops);
}

/*
 * Called with mm->mmap_lock held for writing.
 * Insert a new vma covering the given region, with the given flags.
 * Its pages are supplied by the given array of struct page *.
 * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
 * The region past the last page supplied will always produce SIGBUS.
 * The array pointer and the pages it points to are assumed to stay alive
 * for as long as this mapping might exist.
 */
struct vm_area_struct *_install_special_mapping(
        struct mm_struct *mm,
        unsigned long addr, unsigned long len,
        unsigned long vm_flags, const struct vm_special_mapping *spec)
{
        return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
                                        &special_mapping_vmops);
}

int install_special_mapping(struct mm_struct *mm,
                            unsigned long addr, unsigned long len,
                            unsigned long vm_flags, struct page **pages)
{
        struct vm_area_struct *vma = __install_special_mapping(
                mm, addr, len, vm_flags, (void *)pages,
                &legacy_special_mapping_vmops);

        return PTR_ERR_OR_ZERO(vma);
}

static DEFINE_MUTEX(mm_all_locks_mutex);

static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
{
        if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
                /*
                 * The LSB of head.next can't change from under us
                 * because we hold the mm_all_locks_mutex.
                 */
                down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
                /*
                 * We can safely modify head.next after taking the
                 * anon_vma->root->rwsem. If some other vma in this mm shares
                 * the same anon_vma we won't take it again.
                 *
                 * No need of atomic instructions here, head.next
                 * can't change from under us thanks to the
                 * anon_vma->root->rwsem.
                 */
                if (__test_and_set_bit(0, (unsigned long *)
                                       &anon_vma->root->rb_root.rb_root.rb_node))
                        BUG();
        }
}

static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
{
        if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
                /*
                 * AS_MM_ALL_LOCKS can't change from under us because
                 * we hold the mm_all_locks_mutex.
                 *
                 * Operations on ->flags have to be atomic because
                 * even if AS_MM_ALL_LOCKS is stable thanks to the
                 * mm_all_locks_mutex, there may be other cpus
                 * changing other bitflags in parallel to us.
                 */
                if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                        BUG();
                down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
        }
}

/*
 * This operation locks against the VM for all pte/vma/mm related
 * operations that could ever happen on a certain mm. This includes
 * vmtruncate, try_to_unmap, and all page faults.
 *
 * The caller must take the mmap_lock in write mode before calling
 * mm_take_all_locks(). The caller isn't allowed to release the
 * mmap_lock until mm_drop_all_locks() returns.
 *
 * mmap_lock in write mode is required in order to block all operations
 * that could modify pagetables and free pages without need of
 * altering the vma layout. It's also needed in write mode to avoid new
 * anon_vmas to be associated with existing vmas.
 *
 * A single task can't take more than one mm_take_all_locks() in a row
 * or it would deadlock.
 *
 * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in
 * mapping->flags avoid to take the same lock twice, if more than one
 * vma in this mm is backed by the same anon_vma or address_space.
 *
 * We take locks in following order, accordingly to comment at beginning
 * of mm/rmap.c:
 *   - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for
 *     hugetlb mapping);
 *   - all vmas marked locked
 *   - all i_mmap_rwsem locks;
 *   - all anon_vma->rwseml
 *
 * We can take all locks within these types randomly because the VM code
 * doesn't nest them and we protected from parallel mm_take_all_locks() by
 * mm_all_locks_mutex.
 *
 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
 * that may have to take thousand of locks.
 *
 * mm_take_all_locks() can fail if it's interrupted by signals.
 */
int mm_take_all_locks(struct mm_struct *mm)
{
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
        MA_STATE(mas, &mm->mm_mt, 0, 0);

        mmap_assert_write_locked(mm);

        mutex_lock(&mm_all_locks_mutex);

        /*
         * vma_start_write() does not have a complement in mm_drop_all_locks()
         * because vma_start_write() is always asymmetrical; it marks a VMA as
         * being written to until mmap_write_unlock() or mmap_write_downgrade()
         * is reached.
         */
        mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                vma_start_write(vma);
        }

        mas_set(&mas, 0);
        mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
                                is_vm_hugetlb_page(vma))
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }

        mas_set(&mas, 0);
        mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
                                !is_vm_hugetlb_page(vma))
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }

        mas_set(&mas, 0);
        mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->anon_vma)
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                vm_lock_anon_vma(mm, avc->anon_vma);
        }

        return 0;

out_unlock:
        mm_drop_all_locks(mm);
        return -EINTR;
}

static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
{
        if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
                /*
                 * The LSB of head.next can't change to 0 from under
                 * us because we hold the mm_all_locks_mutex.
                 *
                 * We must however clear the bitflag before unlocking
                 * the vma so the users using the anon_vma->rb_root will
                 * never see our bitflag.
                 *
                 * No need of atomic instructions here, head.next
                 * can't change from under us until we release the
                 * anon_vma->root->rwsem.
                 */
                if (!__test_and_clear_bit(0, (unsigned long *)
                                          &anon_vma->root->rb_root.rb_root.rb_node))
                        BUG();
                anon_vma_unlock_write(anon_vma);
        }
}

static void vm_unlock_mapping(struct address_space *mapping)
{
        if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
                /*
                 * AS_MM_ALL_LOCKS can't change to 0 from under us
                 * because we hold the mm_all_locks_mutex.
                 */
                i_mmap_unlock_write(mapping);
                if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
                                        &mapping->flags))
                        BUG();
        }
}

/*
 * The mmap_lock cannot be released by the caller until
 * mm_drop_all_locks() returns.
 */
void mm_drop_all_locks(struct mm_struct *mm)
{
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
        MA_STATE(mas, &mm->mm_mt, 0, 0);

        mmap_assert_write_locked(mm);
        BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));

        mas_for_each(&mas, vma, ULONG_MAX) {
                if (vma->anon_vma)
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                vm_unlock_anon_vma(avc->anon_vma);
                if (vma->vm_file && vma->vm_file->f_mapping)
                        vm_unlock_mapping(vma->vm_file->f_mapping);
        }

        mutex_unlock(&mm_all_locks_mutex);
}

/*
 * initialise the percpu counter for VM
 */
void __init mmap_init(void)
{
        int ret;

        ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
        VM_BUG_ON(ret);
}

/*
 * Initialise sysctl_user_reserve_kbytes.
 *
 * This is intended to prevent a user from starting a single memory hogging
 * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER
 * mode.
 *
 * The default value is min(3% of free memory, 128MB)
 * 128MB is enough to recover with sshd/login, bash, and top/kill.
 */
static int init_user_reserve(void)
{
        unsigned long free_kbytes;

        free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));

        sysctl_user_reserve_kbytes = min(free_kbytes / 32, SZ_128K);
        return 0;
}
subsys_initcall(init_user_reserve);

/*
 * Initialise sysctl_admin_reserve_kbytes.
 *
 * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin
 * to log in and kill a memory hogging process.
 *
 * Systems with more than 256MB will reserve 8MB, enough to recover
 * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will
 * only reserve 3% of free pages by default.
 */
static int init_admin_reserve(void)
{
        unsigned long free_kbytes;

        free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));

        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, SZ_8K);
        return 0;
}
subsys_initcall(init_admin_reserve);

/*
 * Reinititalise user and admin reserves if memory is added or removed.
 *
 * The default user reserve max is 128MB, and the default max for the
 * admin reserve is 8MB. These are usually, but not always, enough to
 * enable recovery from a memory hogging process using login/sshd, a shell,
 * and tools like top. It may make sense to increase or even disable the
 * reserve depending on the existence of swap or variations in the recovery
 * tools. So, the admin may have changed them.
 *
 * If memory is added and the reserves have been eliminated or increased above
 * the default max, then we'll trust the admin.
 *
 * If memory is removed and there isn't enough free memory, then we
 * need to reset the reserves.
 *
 * Otherwise keep the reserve set by the admin.
 */
static int reserve_mem_notifier(struct notifier_block *nb,
                             unsigned long action, void *data)
{
        unsigned long tmp, free_kbytes;

        switch (action) {
        case MEM_ONLINE:
                /* Default max is 128MB. Leave alone if modified by operator. */
                tmp = sysctl_user_reserve_kbytes;
                if (tmp > 0 && tmp < SZ_128K)
                        init_user_reserve();

                /* Default max is 8MB.  Leave alone if modified by operator. */
                tmp = sysctl_admin_reserve_kbytes;
                if (tmp > 0 && tmp < SZ_8K)
                        init_admin_reserve();

                break;
        case MEM_OFFLINE:
                free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));

                if (sysctl_user_reserve_kbytes > free_kbytes) {
                        init_user_reserve();
                        pr_info("vm.user_reserve_kbytes reset to %lu\n",
                                sysctl_user_reserve_kbytes);
                }

                if (sysctl_admin_reserve_kbytes > free_kbytes) {
                        init_admin_reserve();
                        pr_info("vm.admin_reserve_kbytes reset to %lu\n",
                                sysctl_admin_reserve_kbytes);
                }
                break;
        default:
                break;
        }
        return NOTIFY_OK;
}

static int __meminit init_reserve_notifier(void)
{
        if (hotplug_memory_notifier(reserve_mem_notifier, DEFAULT_CALLBACK_PRI))
                pr_err("Failed registering memory add/remove notifier for admin reserve\n");

        return 0;
}
subsys_initcall(init_reserve_notifier);













































































































    2 


























































































































































    6 


    6 
    6 




    6 
    6 



    6 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
// SPDX-License-Identifier: GPL-2.0-only
/*
 * IPv6 library code, needed by static components when full IPv6 support is
 * not configured or static.
 */

#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ipv6_stubs.h>
#include <net/addrconf.h>
#include <net/ip.h>

/* if ipv6 module registers this function is used by xfrm to force all
 * sockets to relookup their nodes - this is fairly expensive, be
 * careful
 */
void (*__fib6_flush_trees)(struct net *);
EXPORT_SYMBOL(__fib6_flush_trees);

#define IPV6_ADDR_SCOPE_TYPE(scope)        ((scope) << 16)

static inline unsigned int ipv6_addr_scope2type(unsigned int scope)
{
        switch (scope) {
        case IPV6_ADDR_SCOPE_NODELOCAL:
                return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
                        IPV6_ADDR_LOOPBACK);
        case IPV6_ADDR_SCOPE_LINKLOCAL:
                return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) |
                        IPV6_ADDR_LINKLOCAL);
        case IPV6_ADDR_SCOPE_SITELOCAL:
                return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) |
                        IPV6_ADDR_SITELOCAL);
        }
        return IPV6_ADDR_SCOPE_TYPE(scope);
}

int __ipv6_addr_type(const struct in6_addr *addr)
{
        __be32 st;

        st = addr->s6_addr32[0];

        /* Consider all addresses with the first three bits different of
           000 and 111 as unicasts.
         */
        if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
            (st & htonl(0xE0000000)) != htonl(0xE0000000))
                return (IPV6_ADDR_UNICAST |
                        IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));

        if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
                /* multicast */
                /* addr-select 3.1 */
                return (IPV6_ADDR_MULTICAST |
                        ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr)));
        }

        if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
                return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST |
                        IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL));                /* addr-select 3.1 */
        if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
                return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
                        IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL));                /* addr-select 3.1 */
        if ((st & htonl(0xFE000000)) == htonl(0xFC000000))
                return (IPV6_ADDR_UNICAST |
                        IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));                        /* RFC 4193 */

        if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
                if (addr->s6_addr32[2] == 0) {
                        if (addr->s6_addr32[3] == 0)
                                return IPV6_ADDR_ANY;

                        if (addr->s6_addr32[3] == htonl(0x00000001))
                                return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST |
                                        IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL));        /* addr-select 3.4 */

                        return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST |
                                IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));        /* addr-select 3.3 */
                }

                if (addr->s6_addr32[2] == htonl(0x0000ffff))
                        return (IPV6_ADDR_MAPPED |
                                IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));        /* addr-select 3.3 */
        }

        return (IPV6_ADDR_UNICAST |
                IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));        /* addr-select 3.4 */
}
EXPORT_SYMBOL(__ipv6_addr_type);

static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);

int register_inet6addr_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&inet6addr_chain, nb);
}
EXPORT_SYMBOL(register_inet6addr_notifier);

int unregister_inet6addr_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
}
EXPORT_SYMBOL(unregister_inet6addr_notifier);

int inet6addr_notifier_call_chain(unsigned long val, void *v)
{
        return atomic_notifier_call_chain(&inet6addr_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);

int register_inet6addr_validator_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inet6addr_validator_notifier);

int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
                                                  nb);
}
EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);

int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
{
        return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);

static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net,
                                                           const struct sock *sk,
                                                           struct flowi6 *fl6,
                                                           const struct in6_addr *final_dst)
{
        return ERR_PTR(-EAFNOSUPPORT);
}

static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
{
        return -EAFNOSUPPORT;
}

static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
{
        return NULL;
}

static int
eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
                               int oif, struct flowi6 *fl6,
                               struct fib6_result *res, int flags)
{
        return -EAFNOSUPPORT;
}

static int
eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
                         struct fib6_result *res, int flags)
{
        return -EAFNOSUPPORT;
}

static void
eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res,
                              struct flowi6 *fl6, int oif, bool have_oif_match,
                              const struct sk_buff *skb, int strict)
{
}

static u32
eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res,
                               const struct in6_addr *daddr,
                               const struct in6_addr *saddr)
{
        return 0;
}

static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
                                     struct fib6_config *cfg, gfp_t gfp_flags,
                                     struct netlink_ext_ack *extack)
{
        NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel");
        return -EAFNOSUPPORT;
}

static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt,
                                   bool skip_notify)
{
        return -EAFNOSUPPORT;
}

static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                                      int (*output)(struct net *, struct sock *, struct sk_buff *))
{
        kfree_skb(skb);
        return -EAFNOSUPPORT;
}

static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr,
                                                     struct net_device *dev)
{
        return ERR_PTR(-EAFNOSUPPORT);
}

const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
        .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
        .ipv6_route_input  = eafnosupport_ipv6_route_input,
        .fib6_get_table    = eafnosupport_fib6_get_table,
        .fib6_table_lookup = eafnosupport_fib6_table_lookup,
        .fib6_lookup       = eafnosupport_fib6_lookup,
        .fib6_select_path  = eafnosupport_fib6_select_path,
        .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
        .fib6_nh_init           = eafnosupport_fib6_nh_init,
        .ip6_del_rt           = eafnosupport_ip6_del_rt,
        .ipv6_fragment           = eafnosupport_ipv6_fragment,
        .ipv6_dev_find     = eafnosupport_ipv6_dev_find,
};
EXPORT_SYMBOL_GPL(ipv6_stub);

/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
        = IN6ADDR_LOOPBACK_INIT;
EXPORT_SYMBOL(in6addr_loopback);
const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
        = IN6ADDR_ANY_INIT;
EXPORT_SYMBOL(in6addr_any);
const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
        = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allnodes);
const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
        = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allrouters);
const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
        = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
        = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
        = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_sitelocal_allrouters);

static void snmp6_free_dev(struct inet6_dev *idev)
{
        kfree(idev->stats.icmpv6msgdev);
        kfree(idev->stats.icmpv6dev);
        free_percpu(idev->stats.ipv6);
}

static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
{
        struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);

        snmp6_free_dev(idev);
        kfree(idev);
}

/* Nobody refers to this device, we may destroy it. */

void in6_dev_finish_destroy(struct inet6_dev *idev)
{
        struct net_device *dev = idev->dev;

        WARN_ON(!list_empty(&idev->addr_list));
        WARN_ON(rcu_access_pointer(idev->mc_list));
        WARN_ON(timer_pending(&idev->rs_timer));

#ifdef NET_REFCNT_DEBUG
        pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
#endif
        netdev_put(dev, &idev->dev_tracker);
        if (!idev->dead) {
                pr_warn("Freeing alive inet6 device %p\n", idev);
                return;
        }
        call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
}
EXPORT_SYMBOL(in6_dev_finish_destroy);


















































































   16 













































   16 




























   70 
   70 
























    3 
   10 
   10 








































































































































































































































































































































   14 



































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RCULIST_H
#define _LINUX_RCULIST_H

#ifdef __KERNEL__

/*
 * RCU-protected list version
 */
#include <linux/list.h>
#include <linux/rcupdate.h>

/*
 * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
 * @list: list to be initialized
 *
 * You should instead use INIT_LIST_HEAD() for normal initialization and
 * cleanup tasks, when readers have no access to the list being initialized.
 * However, if the list being initialized is visible to readers, you
 * need to keep the compiler from being too mischievous.
 */
static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
{
        WRITE_ONCE(list->next, list);
        WRITE_ONCE(list->prev, list);
}

/*
 * return the ->next pointer of a list_head in an rcu safe
 * way, we must not access it directly
 */
#define list_next_rcu(list)        (*((struct list_head __rcu **)(&(list)->next)))

/**
 * list_tail_rcu - returns the prev pointer of the head of the list
 * @head: the head of the list
 *
 * Note: This should only be used with the list header, and even then
 * only if list_del() and similar primitives are not also used on the
 * list header.
 */
#define list_tail_rcu(head)        (*((struct list_head __rcu **)(&(head)->prev)))

/*
 * Check during list traversal that we are within an RCU reader
 */

#define check_arg_count_one(dummy)

#ifdef CONFIG_PROVE_RCU_LIST
#define __list_check_rcu(dummy, cond, extra...)                                \
        ({                                                                \
        check_arg_count_one(extra);                                        \
        RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(),                \
                         "RCU-list traversed in non-reader section!");        \
        })

#define __list_check_srcu(cond)                                         \
        ({                                                                 \
        RCU_LOCKDEP_WARN(!(cond),                                         \
                "RCU-list traversed without holding the required lock!");\
        })
#else
#define __list_check_rcu(dummy, cond, extra...)                                \
        ({ check_arg_count_one(extra); })

#define __list_check_srcu(cond) ({ })
#endif

/*
 * Insert a new entry between two known consecutive entries.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_add_rcu(struct list_head *new,
                struct list_head *prev, struct list_head *next)
{
        if (!__list_add_valid(new, prev, next))
                return;

        new->next = next;
        new->prev = prev;
        rcu_assign_pointer(list_next_rcu(prev), new);
        next->prev = new;
}

/**
 * list_add_rcu - add a new entry to rcu-protected list
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as list_add_rcu()
 * or list_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * list_for_each_entry_rcu().
 */
static inline void list_add_rcu(struct list_head *new, struct list_head *head)
{
        __list_add_rcu(new, head, head->next);
}

/**
 * list_add_tail_rcu - add a new entry to rcu-protected list
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as list_add_tail_rcu()
 * or list_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * list_for_each_entry_rcu().
 */
static inline void list_add_tail_rcu(struct list_head *new,
                                        struct list_head *head)
{
        __list_add_rcu(new, head->prev, head);
}

/**
 * list_del_rcu - deletes entry from list without re-initialization
 * @entry: the element to delete from the list.
 *
 * Note: list_empty() on entry does not return true after this,
 * the entry is in an undefined state. It is useful for RCU based
 * lockfree traversal.
 *
 * In particular, it means that we can not poison the forward
 * pointers that may still be used for walking the list.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as list_del_rcu()
 * or list_add_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * list_for_each_entry_rcu().
 *
 * Note that the caller is not permitted to immediately free
 * the newly deleted entry.  Instead, either synchronize_rcu()
 * or call_rcu() must be used to defer freeing until an RCU
 * grace period has elapsed.
 */
static inline void list_del_rcu(struct list_head *entry)
{
        __list_del_entry(entry);
        entry->prev = LIST_POISON2;
}

/**
 * hlist_del_init_rcu - deletes entry from hash list with re-initialization
 * @n: the element to delete from the hash list.
 *
 * Note: list_unhashed() on the node return true after this. It is
 * useful for RCU based read lockfree traversal if the writer side
 * must know if the list entry is still hashed or already unhashed.
 *
 * In particular, it means that we can not poison the forward pointers
 * that may still be used for walking the hash list and we can only
 * zero the pprev pointer so list_unhashed() will return true after
 * this.
 *
 * The caller must take whatever precautions are necessary (such as
 * holding appropriate locks) to avoid racing with another
 * list-mutation primitive, such as hlist_add_head_rcu() or
 * hlist_del_rcu(), running on this same list.  However, it is
 * perfectly legal to run concurrently with the _rcu list-traversal
 * primitives, such as hlist_for_each_entry_rcu().
 */
static inline void hlist_del_init_rcu(struct hlist_node *n)
{
        if (!hlist_unhashed(n)) {
                __hlist_del(n);
                WRITE_ONCE(n->pprev, NULL);
        }
}

/**
 * list_replace_rcu - replace old entry by new one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * The @old entry will be replaced with the @new entry atomically.
 * Note: @old should not be empty.
 */
static inline void list_replace_rcu(struct list_head *old,
                                struct list_head *new)
{
        new->next = old->next;
        new->prev = old->prev;
        rcu_assign_pointer(list_next_rcu(new->prev), new);
        new->next->prev = new;
        old->prev = LIST_POISON2;
}

/**
 * __list_splice_init_rcu - join an RCU-protected list into an existing list.
 * @list:        the RCU-protected list to splice
 * @prev:        points to the last element of the existing list
 * @next:        points to the first element of the existing list
 * @sync:        synchronize_rcu, synchronize_rcu_expedited, ...
 *
 * The list pointed to by @prev and @next can be RCU-read traversed
 * concurrently with this function.
 *
 * Note that this function blocks.
 *
 * Important note: the caller must take whatever action is necessary to prevent
 * any other updates to the existing list.  In principle, it is possible to
 * modify the list as soon as sync() begins execution. If this sort of thing
 * becomes necessary, an alternative version based on call_rcu() could be
 * created.  But only if -really- needed -- there is no shortage of RCU API
 * members.
 */
static inline void __list_splice_init_rcu(struct list_head *list,
                                          struct list_head *prev,
                                          struct list_head *next,
                                          void (*sync)(void))
{
        struct list_head *first = list->next;
        struct list_head *last = list->prev;

        /*
         * "first" and "last" tracking list, so initialize it.  RCU readers
         * have access to this list, so we must use INIT_LIST_HEAD_RCU()
         * instead of INIT_LIST_HEAD().
         */

        INIT_LIST_HEAD_RCU(list);

        /*
         * At this point, the list body still points to the source list.
         * Wait for any readers to finish using the list before splicing
         * the list body into the new list.  Any new readers will see
         * an empty list.
         */

        sync();
        ASSERT_EXCLUSIVE_ACCESS(*first);
        ASSERT_EXCLUSIVE_ACCESS(*last);

        /*
         * Readers are finished with the source list, so perform splice.
         * The order is important if the new list is global and accessible
         * to concurrent RCU readers.  Note that RCU readers are not
         * permitted to traverse the prev pointers without excluding
         * this function.
         */

        last->next = next;
        rcu_assign_pointer(list_next_rcu(prev), first);
        first->prev = prev;
        next->prev = last;
}

/**
 * list_splice_init_rcu - splice an RCU-protected list into an existing list,
 *                        designed for stacks.
 * @list:        the RCU-protected list to splice
 * @head:        the place in the existing list to splice the first list into
 * @sync:        synchronize_rcu, synchronize_rcu_expedited, ...
 */
static inline void list_splice_init_rcu(struct list_head *list,
                                        struct list_head *head,
                                        void (*sync)(void))
{
        if (!list_empty(list))
                __list_splice_init_rcu(list, head, head->next, sync);
}

/**
 * list_splice_tail_init_rcu - splice an RCU-protected list into an existing
 *                             list, designed for queues.
 * @list:        the RCU-protected list to splice
 * @head:        the place in the existing list to splice the first list into
 * @sync:        synchronize_rcu, synchronize_rcu_expedited, ...
 */
static inline void list_splice_tail_init_rcu(struct list_head *list,
                                             struct list_head *head,
                                             void (*sync)(void))
{
        if (!list_empty(list))
                __list_splice_init_rcu(list, head->prev, head, sync);
}

/**
 * list_entry_rcu - get the struct for this entry
 * @ptr:        the &struct list_head pointer.
 * @type:       the type of the struct this is embedded in.
 * @member:     the name of the list_head within the struct.
 *
 * This primitive may safely run concurrently with the _rcu list-mutation
 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
 */
#define list_entry_rcu(ptr, type, member) \
        container_of(READ_ONCE(ptr), type, member)

/*
 * Where are list_empty_rcu() and list_first_entry_rcu()?
 *
 * They do not exist because they would lead to subtle race conditions:
 *
 * if (!list_empty_rcu(mylist)) {
 *        struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
 *        do_something(bar);
 * }
 *
 * The list might be non-empty when list_empty_rcu() checks it, but it
 * might have become empty by the time that list_first_entry_rcu() rereads
 * the ->next pointer, which would result in a SEGV.
 *
 * When not using RCU, it is OK for list_first_entry() to re-read that
 * pointer because both functions should be protected by some lock that
 * blocks writers.
 *
 * When using RCU, list_empty() uses READ_ONCE() to fetch the
 * RCU-protected ->next pointer and then compares it to the address of the
 * list head.  However, it neither dereferences this pointer nor provides
 * this pointer to its caller.  Thus, READ_ONCE() suffices (that is,
 * rcu_dereference() is not needed), which means that list_empty() can be
 * used anywhere you would want to use list_empty_rcu().  Just don't
 * expect anything useful to happen if you do a subsequent lockless
 * call to list_first_entry_rcu()!!!
 *
 * See list_first_or_null_rcu for an alternative.
 */

/**
 * list_first_or_null_rcu - get the first element from a list
 * @ptr:        the list head to take the element from.
 * @type:       the type of the struct this is embedded in.
 * @member:     the name of the list_head within the struct.
 *
 * Note that if the list is empty, it returns NULL.
 *
 * This primitive may safely run concurrently with the _rcu list-mutation
 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
 */
#define list_first_or_null_rcu(ptr, type, member) \
({ \
        struct list_head *__ptr = (ptr); \
        struct list_head *__next = READ_ONCE(__ptr->next); \
        likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
})

/**
 * list_next_or_null_rcu - get the next element from a list
 * @head:        the head for the list.
 * @ptr:        the list head to take the next element from.
 * @type:       the type of the struct this is embedded in.
 * @member:     the name of the list_head within the struct.
 *
 * Note that if the ptr is at the end of the list, NULL is returned.
 *
 * This primitive may safely run concurrently with the _rcu list-mutation
 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
 */
#define list_next_or_null_rcu(head, ptr, type, member) \
({ \
        struct list_head *__head = (head); \
        struct list_head *__ptr = (ptr); \
        struct list_head *__next = READ_ONCE(__ptr->next); \
        likely(__next != __head) ? list_entry_rcu(__next, type, \
                                                  member) : NULL; \
})

/**
 * list_for_each_entry_rcu        -        iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 * @cond:        optional lockdep expression if called from non-RCU protection.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as list_add_rcu()
 * as long as the traversal is guarded by rcu_read_lock().
 */
#define list_for_each_entry_rcu(pos, head, member, cond...)                \
        for (__list_check_rcu(dummy, ## cond, 0),                        \
             pos = list_entry_rcu((head)->next, typeof(*pos), member);        \
                &pos->member != (head);                                        \
                pos = list_entry_rcu(pos->member.next, typeof(*pos), member))

/**
 * list_for_each_entry_srcu        -        iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 * @cond:        lockdep expression for the lock required to traverse the list.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as list_add_rcu()
 * as long as the traversal is guarded by srcu_read_lock().
 * The lockdep expression srcu_read_lock_held() can be passed as the
 * cond argument from read side.
 */
#define list_for_each_entry_srcu(pos, head, member, cond)                \
        for (__list_check_srcu(cond),                                        \
             pos = list_entry_rcu((head)->next, typeof(*pos), member);        \
                &pos->member != (head);                                        \
                pos = list_entry_rcu(pos->member.next, typeof(*pos), member))

/**
 * list_entry_lockless - get the struct for this entry
 * @ptr:        the &struct list_head pointer.
 * @type:       the type of the struct this is embedded in.
 * @member:     the name of the list_head within the struct.
 *
 * This primitive may safely run concurrently with the _rcu
 * list-mutation primitives such as list_add_rcu(), but requires some
 * implicit RCU read-side guarding.  One example is running within a special
 * exception-time environment where preemption is disabled and where lockdep
 * cannot be invoked.  Another example is when items are added to the list,
 * but never deleted.
 */
#define list_entry_lockless(ptr, type, member) \
        container_of((typeof(ptr))READ_ONCE(ptr), type, member)

/**
 * list_for_each_entry_lockless - iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_struct within the struct.
 *
 * This primitive may safely run concurrently with the _rcu
 * list-mutation primitives such as list_add_rcu(), but requires some
 * implicit RCU read-side guarding.  One example is running within a special
 * exception-time environment where preemption is disabled and where lockdep
 * cannot be invoked.  Another example is when items are added to the list,
 * but never deleted.
 */
#define list_for_each_entry_lockless(pos, head, member) \
        for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \
             &pos->member != (head); \
             pos = list_entry_lockless(pos->member.next, typeof(*pos), member))

/**
 * list_for_each_entry_continue_rcu - continue iteration over list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_head within the struct.
 *
 * Continue to iterate over list of given type, continuing after
 * the current position which must have been in the list when the RCU read
 * lock was taken.
 * This would typically require either that you obtained the node from a
 * previous walk of the list in the same RCU read-side critical section, or
 * that you held some sort of non-RCU reference (such as a reference count)
 * to keep the node alive *and* in the list.
 *
 * This iterator is similar to list_for_each_entry_from_rcu() except
 * this starts after the given position and that one starts at the given
 * position.
 */
#define list_for_each_entry_continue_rcu(pos, head, member)                 \
        for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
             &pos->member != (head);        \
             pos = list_entry_rcu(pos->member.next, typeof(*pos), member))

/**
 * list_for_each_entry_from_rcu - iterate over a list from current point
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the list_node within the struct.
 *
 * Iterate over the tail of a list starting from a given position,
 * which must have been in the list when the RCU read lock was taken.
 * This would typically require either that you obtained the node from a
 * previous walk of the list in the same RCU read-side critical section, or
 * that you held some sort of non-RCU reference (such as a reference count)
 * to keep the node alive *and* in the list.
 *
 * This iterator is similar to list_for_each_entry_continue_rcu() except
 * this starts from the given position and that one starts from the position
 * after the given position.
 */
#define list_for_each_entry_from_rcu(pos, head, member)                        \
        for (; &(pos)->member != (head);                                        \
                pos = list_entry_rcu(pos->member.next, typeof(*(pos)), member))

/**
 * hlist_del_rcu - deletes entry from hash list without re-initialization
 * @n: the element to delete from the hash list.
 *
 * Note: list_unhashed() on entry does not return true after this,
 * the entry is in an undefined state. It is useful for RCU based
 * lockfree traversal.
 *
 * In particular, it means that we can not poison the forward
 * pointers that may still be used for walking the hash list.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_add_head_rcu()
 * or hlist_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_for_each_entry().
 */
static inline void hlist_del_rcu(struct hlist_node *n)
{
        __hlist_del(n);
        WRITE_ONCE(n->pprev, LIST_POISON2);
}

/**
 * hlist_replace_rcu - replace old entry by new one
 * @old : the element to be replaced
 * @new : the new element to insert
 *
 * The @old entry will be replaced with the @new entry atomically.
 */
static inline void hlist_replace_rcu(struct hlist_node *old,
                                        struct hlist_node *new)
{
        struct hlist_node *next = old->next;

        new->next = next;
        WRITE_ONCE(new->pprev, old->pprev);
        rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
        if (next)
                WRITE_ONCE(new->next->pprev, &new->next);
        WRITE_ONCE(old->pprev, LIST_POISON2);
}

/**
 * hlists_swap_heads_rcu - swap the lists the hlist heads point to
 * @left:  The hlist head on the left
 * @right: The hlist head on the right
 *
 * The lists start out as [@left  ][node1 ... ] and
 *                        [@right ][node2 ... ]
 * The lists end up as    [@left  ][node2 ... ]
 *                        [@right ][node1 ... ]
 */
static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right)
{
        struct hlist_node *node1 = left->first;
        struct hlist_node *node2 = right->first;

        rcu_assign_pointer(left->first, node2);
        rcu_assign_pointer(right->first, node1);
        WRITE_ONCE(node2->pprev, &left->first);
        WRITE_ONCE(node1->pprev, &right->first);
}

/*
 * return the first or the next element in an RCU protected hlist
 */
#define hlist_first_rcu(head)        (*((struct hlist_node __rcu **)(&(head)->first)))
#define hlist_next_rcu(node)        (*((struct hlist_node __rcu **)(&(node)->next)))
#define hlist_pprev_rcu(node)        (*((struct hlist_node __rcu **)((node)->pprev)))

/**
 * hlist_add_head_rcu
 * @n: the element to add to the hash list.
 * @h: the list to add to.
 *
 * Description:
 * Adds the specified element to the specified hlist,
 * while permitting racing traversals.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_add_head_rcu()
 * or hlist_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.  Regardless of the type of CPU, the
 * list-traversal primitive must be guarded by rcu_read_lock().
 */
static inline void hlist_add_head_rcu(struct hlist_node *n,
                                        struct hlist_head *h)
{
        struct hlist_node *first = h->first;

        n->next = first;
        WRITE_ONCE(n->pprev, &h->first);
        rcu_assign_pointer(hlist_first_rcu(h), n);
        if (first)
                WRITE_ONCE(first->pprev, &n->next);
}

/**
 * hlist_add_tail_rcu
 * @n: the element to add to the hash list.
 * @h: the list to add to.
 *
 * Description:
 * Adds the specified element to the specified hlist,
 * while permitting racing traversals.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_add_head_rcu()
 * or hlist_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.  Regardless of the type of CPU, the
 * list-traversal primitive must be guarded by rcu_read_lock().
 */
static inline void hlist_add_tail_rcu(struct hlist_node *n,
                                      struct hlist_head *h)
{
        struct hlist_node *i, *last = NULL;

        /* Note: write side code, so rcu accessors are not needed. */
        for (i = h->first; i; i = i->next)
                last = i;

        if (last) {
                n->next = last->next;
                WRITE_ONCE(n->pprev, &last->next);
                rcu_assign_pointer(hlist_next_rcu(last), n);
        } else {
                hlist_add_head_rcu(n, h);
        }
}

/**
 * hlist_add_before_rcu
 * @n: the new element to add to the hash list.
 * @next: the existing element to add the new element before.
 *
 * Description:
 * Adds the specified element to the specified hlist
 * before the specified node while permitting racing traversals.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_add_head_rcu()
 * or hlist_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.
 */
static inline void hlist_add_before_rcu(struct hlist_node *n,
                                        struct hlist_node *next)
{
        WRITE_ONCE(n->pprev, next->pprev);
        n->next = next;
        rcu_assign_pointer(hlist_pprev_rcu(n), n);
        WRITE_ONCE(next->pprev, &n->next);
}

/**
 * hlist_add_behind_rcu
 * @n: the new element to add to the hash list.
 * @prev: the existing element to add the new element after.
 *
 * Description:
 * Adds the specified element to the specified hlist
 * after the specified node while permitting racing traversals.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as hlist_add_head_rcu()
 * or hlist_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
 * problems on Alpha CPUs.
 */
static inline void hlist_add_behind_rcu(struct hlist_node *n,
                                        struct hlist_node *prev)
{
        n->next = prev->next;
        WRITE_ONCE(n->pprev, &prev->next);
        rcu_assign_pointer(hlist_next_rcu(prev), n);
        if (n->next)
                WRITE_ONCE(n->next->pprev, &n->next);
}

#define __hlist_for_each_rcu(pos, head)                                \
        for (pos = rcu_dereference(hlist_first_rcu(head));        \
             pos;                                                \
             pos = rcu_dereference(hlist_next_rcu(pos)))

/**
 * hlist_for_each_entry_rcu - iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 * @cond:        optional lockdep expression if called from non-RCU protection.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
 * as long as the traversal is guarded by rcu_read_lock().
 */
#define hlist_for_each_entry_rcu(pos, head, member, cond...)                \
        for (__list_check_rcu(dummy, ## cond, 0),                        \
             pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
                        typeof(*(pos)), member);                        \
                pos;                                                        \
                pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_srcu - iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 * @cond:        lockdep expression for the lock required to traverse the list.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
 * as long as the traversal is guarded by srcu_read_lock().
 * The lockdep expression srcu_read_lock_held() can be passed as the
 * cond argument from read side.
 */
#define hlist_for_each_entry_srcu(pos, head, member, cond)                \
        for (__list_check_srcu(cond),                                        \
             pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
                        typeof(*(pos)), member);                        \
                pos;                                                        \
                pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing)
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
 * as long as the traversal is guarded by rcu_read_lock().
 *
 * This is the same as hlist_for_each_entry_rcu() except that it does
 * not do any RCU debugging or tracing.
 */
#define hlist_for_each_entry_rcu_notrace(pos, head, member)                        \
        for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\
                        typeof(*(pos)), member);                        \
                pos;                                                        \
                pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
 * @pos:        the type * to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 *
 * This list-traversal primitive may safely run concurrently with
 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
 * as long as the traversal is guarded by rcu_read_lock().
 */
#define hlist_for_each_entry_rcu_bh(pos, head, member)                        \
        for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\
                        typeof(*(pos)), member);                        \
                pos;                                                        \
                pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
 * @pos:        the type * to use as a loop cursor.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_continue_rcu(pos, member)                        \
        for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
                        &(pos)->member)), typeof(*(pos)), member);        \
             pos;                                                        \
             pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(        \
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point
 * @pos:        the type * to use as a loop cursor.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_continue_rcu_bh(pos, member)                \
        for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(  \
                        &(pos)->member)), typeof(*(pos)), member);        \
             pos;                                                        \
             pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(        \
                        &(pos)->member)), typeof(*(pos)), member))

/**
 * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point
 * @pos:        the type * to use as a loop cursor.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_from_rcu(pos, member)                        \
        for (; pos;                                                        \
             pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(        \
                        &(pos)->member)), typeof(*(pos)), member))

#endif        /* __KERNEL__ */
#endif























   10 



































































































































  256 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_ATOMIC_H
#define _ASM_X86_ATOMIC_H

#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
#include <asm/rmwcc.h>
#include <asm/barrier.h>

/*
 * Atomic operations that C can't guarantee us.  Useful for
 * resource counting etc..
 */

static __always_inline int arch_atomic_read(const atomic_t *v)
{
        /*
         * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
         * it's non-inlined function that increases binary size and stack usage.
         */
        return __READ_ONCE((v)->counter);
}

static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
        __WRITE_ONCE(v->counter, i);
}

static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
        asm volatile(LOCK_PREFIX "addl %1,%0"
                     : "+m" (v->counter)
                     : "ir" (i) : "memory");
}

static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
        asm volatile(LOCK_PREFIX "subl %1,%0"
                     : "+m" (v->counter)
                     : "ir" (i) : "memory");
}

static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test

static __always_inline void arch_atomic_inc(atomic_t *v)
{
        asm volatile(LOCK_PREFIX "incl %0"
                     : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc

static __always_inline void arch_atomic_dec(atomic_t *v)
{
        asm volatile(LOCK_PREFIX "decl %0"
                     : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec

static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
        return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test

static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
        return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test

static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
        return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
}
#define arch_atomic_add_negative arch_atomic_add_negative

static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
        return i + xadd(&v->counter, i);
}
#define arch_atomic_add_return arch_atomic_add_return

static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
{
        return arch_atomic_add_return(-i, v);
}
#define arch_atomic_sub_return arch_atomic_sub_return

static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
        return xadd(&v->counter, i);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add

static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
{
        return xadd(&v->counter, -i);
}
#define arch_atomic_fetch_sub arch_atomic_fetch_sub

static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
        return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic_cmpxchg arch_atomic_cmpxchg

static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
        return arch_try_cmpxchg(&v->counter, old, new);
}
#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg

static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
{
        return arch_xchg(&v->counter, new);
}
#define arch_atomic_xchg arch_atomic_xchg

static __always_inline void arch_atomic_and(int i, atomic_t *v)
{
        asm volatile(LOCK_PREFIX "andl %1,%0"
                        : "+m" (v->counter)
                        : "ir" (i)
                        : "memory");
}

static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
{
        int val = arch_atomic_read(v);

        do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i));

        return val;
}
#define arch_atomic_fetch_and arch_atomic_fetch_and

static __always_inline void arch_atomic_or(int i, atomic_t *v)
{
        asm volatile(LOCK_PREFIX "orl %1,%0"
                        : "+m" (v->counter)
                        : "ir" (i)
                        : "memory");
}

static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
{
        int val = arch_atomic_read(v);

        do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i));

        return val;
}
#define arch_atomic_fetch_or arch_atomic_fetch_or

static __always_inline void arch_atomic_xor(int i, atomic_t *v)
{
        asm volatile(LOCK_PREFIX "xorl %1,%0"
                        : "+m" (v->counter)
                        : "ir" (i)
                        : "memory");
}

static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
{
        int val = arch_atomic_read(v);

        do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i));

        return val;
}
#define arch_atomic_fetch_xor arch_atomic_fetch_xor

#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else
# include <asm/atomic64_64.h>
#endif

#endif /* _ASM_X86_ATOMIC_H */




















































































































































































































































































































































































































    3 



































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Abilis Systems Single DVB-T Receiver
 * Copyright (C) 2008 Pierrick Hascoet <pierrick.hascoet@abilis.com>
 * Copyright (C) 2010 Devin Heitmueller <dheitmueller@kernellabs.com>
 */

#include <media/dvb_frontend.h>

#include "as102_fe.h"

struct as102_state {
        struct dvb_frontend frontend;
        struct as10x_demod_stats demod_stats;

        const struct as102_fe_ops *ops;
        void *priv;
        uint8_t elna_cfg;

        /* signal strength */
        uint16_t signal_strength;
        /* bit error rate */
        uint32_t ber;
};

static uint8_t as102_fe_get_code_rate(enum fe_code_rate arg)
{
        uint8_t c;

        switch (arg) {
        case FEC_1_2:
                c = CODE_RATE_1_2;
                break;
        case FEC_2_3:
                c = CODE_RATE_2_3;
                break;
        case FEC_3_4:
                c = CODE_RATE_3_4;
                break;
        case FEC_5_6:
                c = CODE_RATE_5_6;
                break;
        case FEC_7_8:
                c = CODE_RATE_7_8;
                break;
        default:
                c = CODE_RATE_UNKNOWN;
                break;
        }

        return c;
}

static int as102_fe_set_frontend(struct dvb_frontend *fe)
{
        struct as102_state *state = fe->demodulator_priv;
        struct dtv_frontend_properties *c = &fe->dtv_property_cache;
        struct as10x_tune_args tune_args = { 0 };

        /* set frequency */
        tune_args.freq = c->frequency / 1000;

        /* fix interleaving_mode */
        tune_args.interleaving_mode = INTLV_NATIVE;

        switch (c->bandwidth_hz) {
        case 8000000:
                tune_args.bandwidth = BW_8_MHZ;
                break;
        case 7000000:
                tune_args.bandwidth = BW_7_MHZ;
                break;
        case 6000000:
                tune_args.bandwidth = BW_6_MHZ;
                break;
        default:
                tune_args.bandwidth = BW_8_MHZ;
        }

        switch (c->guard_interval) {
        case GUARD_INTERVAL_1_32:
                tune_args.guard_interval = GUARD_INT_1_32;
                break;
        case GUARD_INTERVAL_1_16:
                tune_args.guard_interval = GUARD_INT_1_16;
                break;
        case GUARD_INTERVAL_1_8:
                tune_args.guard_interval = GUARD_INT_1_8;
                break;
        case GUARD_INTERVAL_1_4:
                tune_args.guard_interval = GUARD_INT_1_4;
                break;
        case GUARD_INTERVAL_AUTO:
        default:
                tune_args.guard_interval = GUARD_UNKNOWN;
                break;
        }

        switch (c->modulation) {
        case QPSK:
                tune_args.modulation = CONST_QPSK;
                break;
        case QAM_16:
                tune_args.modulation = CONST_QAM16;
                break;
        case QAM_64:
                tune_args.modulation = CONST_QAM64;
                break;
        default:
                tune_args.modulation = CONST_UNKNOWN;
                break;
        }

        switch (c->transmission_mode) {
        case TRANSMISSION_MODE_2K:
                tune_args.transmission_mode = TRANS_MODE_2K;
                break;
        case TRANSMISSION_MODE_8K:
                tune_args.transmission_mode = TRANS_MODE_8K;
                break;
        default:
                tune_args.transmission_mode = TRANS_MODE_UNKNOWN;
        }

        switch (c->hierarchy) {
        case HIERARCHY_NONE:
                tune_args.hierarchy = HIER_NONE;
                break;
        case HIERARCHY_1:
                tune_args.hierarchy = HIER_ALPHA_1;
                break;
        case HIERARCHY_2:
                tune_args.hierarchy = HIER_ALPHA_2;
                break;
        case HIERARCHY_4:
                tune_args.hierarchy = HIER_ALPHA_4;
                break;
        case HIERARCHY_AUTO:
                tune_args.hierarchy = HIER_UNKNOWN;
                break;
        }

        pr_debug("as102: tuner parameters: freq: %d  bw: 0x%02x  gi: 0x%02x\n",
                        c->frequency,
                        tune_args.bandwidth,
                        tune_args.guard_interval);

        /*
         * Detect a hierarchy selection
         * if HP/LP are both set to FEC_NONE, HP will be selected.
         */
        if ((tune_args.hierarchy != HIER_NONE) &&
                       ((c->code_rate_LP == FEC_NONE) ||
                        (c->code_rate_HP == FEC_NONE))) {

                if (c->code_rate_LP == FEC_NONE) {
                        tune_args.hier_select = HIER_HIGH_PRIORITY;
                        tune_args.code_rate =
                           as102_fe_get_code_rate(c->code_rate_HP);
                }

                if (c->code_rate_HP == FEC_NONE) {
                        tune_args.hier_select = HIER_LOW_PRIORITY;
                        tune_args.code_rate =
                           as102_fe_get_code_rate(c->code_rate_LP);
                }

                pr_debug("as102: \thierarchy: 0x%02x  selected: %s  code_rate_%s: 0x%02x\n",
                        tune_args.hierarchy,
                        tune_args.hier_select == HIER_HIGH_PRIORITY ?
                        "HP" : "LP",
                        tune_args.hier_select == HIER_HIGH_PRIORITY ?
                        "HP" : "LP",
                        tune_args.code_rate);
        } else {
                tune_args.code_rate =
                        as102_fe_get_code_rate(c->code_rate_HP);
        }

        /* Set frontend arguments */
        return state->ops->set_tune(state->priv, &tune_args);
}

static int as102_fe_get_frontend(struct dvb_frontend *fe,
                                 struct dtv_frontend_properties *c)
{
        struct as102_state *state = fe->demodulator_priv;
        int ret = 0;
        struct as10x_tps tps = { 0 };

        /* send abilis command: GET_TPS */
        ret = state->ops->get_tps(state->priv, &tps);
        if (ret < 0)
                return ret;

        /* extract constellation */
        switch (tps.modulation) {
        case CONST_QPSK:
                c->modulation = QPSK;
                break;
        case CONST_QAM16:
                c->modulation = QAM_16;
                break;
        case CONST_QAM64:
                c->modulation = QAM_64;
                break;
        }

        /* extract hierarchy */
        switch (tps.hierarchy) {
        case HIER_NONE:
                c->hierarchy = HIERARCHY_NONE;
                break;
        case HIER_ALPHA_1:
                c->hierarchy = HIERARCHY_1;
                break;
        case HIER_ALPHA_2:
                c->hierarchy = HIERARCHY_2;
                break;
        case HIER_ALPHA_4:
                c->hierarchy = HIERARCHY_4;
                break;
        }

        /* extract code rate HP */
        switch (tps.code_rate_HP) {
        case CODE_RATE_1_2:
                c->code_rate_HP = FEC_1_2;
                break;
        case CODE_RATE_2_3:
                c->code_rate_HP = FEC_2_3;
                break;
        case CODE_RATE_3_4:
                c->code_rate_HP = FEC_3_4;
                break;
        case CODE_RATE_5_6:
                c->code_rate_HP = FEC_5_6;
                break;
        case CODE_RATE_7_8:
                c->code_rate_HP = FEC_7_8;
                break;
        }

        /* extract code rate LP */
        switch (tps.code_rate_LP) {
        case CODE_RATE_1_2:
                c->code_rate_LP = FEC_1_2;
                break;
        case CODE_RATE_2_3:
                c->code_rate_LP = FEC_2_3;
                break;
        case CODE_RATE_3_4:
                c->code_rate_LP = FEC_3_4;
                break;
        case CODE_RATE_5_6:
                c->code_rate_LP = FEC_5_6;
                break;
        case CODE_RATE_7_8:
                c->code_rate_LP = FEC_7_8;
                break;
        }

        /* extract guard interval */
        switch (tps.guard_interval) {
        case GUARD_INT_1_32:
                c->guard_interval = GUARD_INTERVAL_1_32;
                break;
        case GUARD_INT_1_16:
                c->guard_interval = GUARD_INTERVAL_1_16;
                break;
        case GUARD_INT_1_8:
                c->guard_interval = GUARD_INTERVAL_1_8;
                break;
        case GUARD_INT_1_4:
                c->guard_interval = GUARD_INTERVAL_1_4;
                break;
        }

        /* extract transmission mode */
        switch (tps.transmission_mode) {
        case TRANS_MODE_2K:
                c->transmission_mode = TRANSMISSION_MODE_2K;
                break;
        case TRANS_MODE_8K:
                c->transmission_mode = TRANSMISSION_MODE_8K;
                break;
        }

        return 0;
}

static int as102_fe_get_tune_settings(struct dvb_frontend *fe,
                        struct dvb_frontend_tune_settings *settings)
{

        settings->min_delay_ms = 1000;

        return 0;
}

static int as102_fe_read_status(struct dvb_frontend *fe, enum fe_status *status)
{
        int ret = 0;
        struct as102_state *state = fe->demodulator_priv;
        struct as10x_tune_status tstate = { 0 };

        /* send abilis command: GET_TUNE_STATUS */
        ret = state->ops->get_status(state->priv, &tstate);
        if (ret < 0)
                return ret;

        state->signal_strength  = tstate.signal_strength;
        state->ber  = tstate.BER;

        switch (tstate.tune_state) {
        case TUNE_STATUS_SIGNAL_DVB_OK:
                *status = FE_HAS_SIGNAL | FE_HAS_CARRIER;
                break;
        case TUNE_STATUS_STREAM_DETECTED:
                *status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_SYNC |
                          FE_HAS_VITERBI;
                break;
        case TUNE_STATUS_STREAM_TUNED:
                *status = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_SYNC |
                          FE_HAS_LOCK | FE_HAS_VITERBI;
                break;
        default:
                *status = TUNE_STATUS_NOT_TUNED;
        }

        pr_debug("as102: tuner status: 0x%02x, strength %d, per: %d, ber: %d\n",
                 tstate.tune_state, tstate.signal_strength,
                 tstate.PER, tstate.BER);

        if (!(*status & FE_HAS_LOCK)) {
                memset(&state->demod_stats, 0, sizeof(state->demod_stats));
                return 0;
        }

        ret = state->ops->get_stats(state->priv, &state->demod_stats);
        if (ret < 0)
                memset(&state->demod_stats, 0, sizeof(state->demod_stats));

        return ret;
}

/*
 * Note:
 * - in AS102 SNR=MER
 *   - the SNR will be returned in linear terms, i.e. not in dB
 *   - the accuracy equals ±2dB for a SNR range from 4dB to 30dB
 *   - the accuracy is >2dB for SNR values outside this range
 */
static int as102_fe_read_snr(struct dvb_frontend *fe, u16 *snr)
{
        struct as102_state *state = fe->demodulator_priv;

        *snr = state->demod_stats.mer;

        return 0;
}

static int as102_fe_read_ber(struct dvb_frontend *fe, u32 *ber)
{
        struct as102_state *state = fe->demodulator_priv;

        *ber = state->ber;

        return 0;
}

static int as102_fe_read_signal_strength(struct dvb_frontend *fe,
                                         u16 *strength)
{
        struct as102_state *state = fe->demodulator_priv;

        *strength = (((0xffff * 400) * state->signal_strength + 41000) * 2);

        return 0;
}

static int as102_fe_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
{
        struct as102_state *state = fe->demodulator_priv;

        if (state->demod_stats.has_started)
                *ucblocks = state->demod_stats.bad_frame_count;
        else
                *ucblocks = 0;

        return 0;
}

static int as102_fe_ts_bus_ctrl(struct dvb_frontend *fe, int acquire)
{
        struct as102_state *state = fe->demodulator_priv;

        return state->ops->stream_ctrl(state->priv, acquire,
                                      state->elna_cfg);
}

static void as102_fe_release(struct dvb_frontend *fe)
{
        struct as102_state *state = fe->demodulator_priv;

        kfree(state);
}


static const struct dvb_frontend_ops as102_fe_ops = {
        .delsys = { SYS_DVBT },
        .info = {
                .name                        = "Abilis AS102 DVB-T",
                .frequency_min_hz        = 174 * MHz,
                .frequency_max_hz        = 862 * MHz,
                .frequency_stepsize_hz        = 166667,
                .caps = FE_CAN_INVERSION_AUTO
                        | FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4
                        | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO
                        | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QPSK
                        | FE_CAN_QAM_AUTO
                        | FE_CAN_TRANSMISSION_MODE_AUTO
                        | FE_CAN_GUARD_INTERVAL_AUTO
                        | FE_CAN_HIERARCHY_AUTO
                        | FE_CAN_RECOVER
                        | FE_CAN_MUTE_TS
        },

        .set_frontend                = as102_fe_set_frontend,
        .get_frontend                = as102_fe_get_frontend,
        .get_tune_settings        = as102_fe_get_tune_settings,

        .read_status                = as102_fe_read_status,
        .read_snr                = as102_fe_read_snr,
        .read_ber                = as102_fe_read_ber,
        .read_signal_strength        = as102_fe_read_signal_strength,
        .read_ucblocks                = as102_fe_read_ucblocks,
        .ts_bus_ctrl                = as102_fe_ts_bus_ctrl,
        .release                = as102_fe_release,
};

struct dvb_frontend *as102_attach(const char *name,
                                  const struct as102_fe_ops *ops,
                                  void *priv,
                                  uint8_t elna_cfg)
{
        struct as102_state *state;
        struct dvb_frontend *fe;

        state = kzalloc(sizeof(*state), GFP_KERNEL);
        if (!state)
                return NULL;

        fe = &state->frontend;
        fe->demodulator_priv = state;
        state->ops = ops;
        state->priv = priv;
        state->elna_cfg = elna_cfg;

        /* init frontend callback ops */
        memcpy(&fe->ops, &as102_fe_ops, sizeof(struct dvb_frontend_ops));
        strscpy(fe->ops.info.name, name, sizeof(fe->ops.info.name));

        return fe;

}
EXPORT_SYMBOL_GPL(as102_attach);

MODULE_DESCRIPTION("as102-fe");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pierrick Hascoet <pierrick.hascoet@abilis.com>");







































































































































































































































































































































































































































   61 


















   61 




   61 













   14 












































































































































































































































































































































    3 






































   69 







   70 







   93 


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 

    1 

   14 













  444 







  444 

























  438 

  435 

  437 






















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2002 Richard Henderson
 * Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
 * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
 */

#define INCLUDE_VERMAGIC

#include <linux/export.h>
#include <linux/extable.h>
#include <linux/moduleloader.h>
#include <linux/module_signature.h>
#include <linux/trace_events.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/buildid.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kernel_read_file.h>
#include <linux/kstrtox.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/elf.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/rcupdate.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/vermagic.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <linux/set_memory.h>
#include <asm/mmu_context.h>
#include <linux/license.h>
#include <asm/sections.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/livepatch.h>
#include <linux/async.h>
#include <linux/percpu.h>
#include <linux/kmemleak.h>
#include <linux/jump_label.h>
#include <linux/pfn.h>
#include <linux/bsearch.h>
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
#include <linux/cfi.h>
#include <linux/debugfs.h>
#include <uapi/linux/module.h>
#include "internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/module.h>

/*
 * Mutex protects:
 * 1) List of modules (also safely readable with preempt_disable),
 * 2) module_use links,
 * 3) mod_tree.addr_min/mod_tree.addr_max.
 * (delete and add uses RCU list operations).
 */
DEFINE_MUTEX(module_mutex);
LIST_HEAD(modules);

/* Work queue for freeing init sections in success case */
static void do_free_init(struct work_struct *w);
static DECLARE_WORK(init_free_wq, do_free_init);
static LLIST_HEAD(init_free_list);

struct mod_tree_root mod_tree __cacheline_aligned = {
        .addr_min = -1UL,
};

struct symsearch {
        const struct kernel_symbol *start, *stop;
        const s32 *crcs;
        enum mod_license license;
};

/*
 * Bounds of module memory, for speeding up __module_address.
 * Protected by module_mutex.
 */
static void __mod_update_bounds(enum mod_mem_type type __maybe_unused, void *base,
                                unsigned int size, struct mod_tree_root *tree)
{
        unsigned long min = (unsigned long)base;
        unsigned long max = min + size;

#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
        if (mod_mem_type_is_core_data(type)) {
                if (min < tree->data_addr_min)
                        tree->data_addr_min = min;
                if (max > tree->data_addr_max)
                        tree->data_addr_max = max;
                return;
        }
#endif
        if (min < tree->addr_min)
                tree->addr_min = min;
        if (max > tree->addr_max)
                tree->addr_max = max;
}

static void mod_update_bounds(struct module *mod)
{
        for_each_mod_mem_type(type) {
                struct module_memory *mod_mem = &mod->mem[type];

                if (mod_mem->size)
                        __mod_update_bounds(type, mod_mem->base, mod_mem->size, &mod_tree);
        }
}

/* Block module loading/unloading? */
int modules_disabled;
core_param(nomodule, modules_disabled, bint, 0);

/* Waiting for a module to finish initializing? */
static DECLARE_WAIT_QUEUE_HEAD(module_wq);

static BLOCKING_NOTIFIER_HEAD(module_notify_list);

int register_module_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&module_notify_list, nb);
}
EXPORT_SYMBOL(register_module_notifier);

int unregister_module_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&module_notify_list, nb);
}
EXPORT_SYMBOL(unregister_module_notifier);

/*
 * We require a truly strong try_module_get(): 0 means success.
 * Otherwise an error is returned due to ongoing or failed
 * initialization etc.
 */
static inline int strong_try_module_get(struct module *mod)
{
        BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
        if (mod && mod->state == MODULE_STATE_COMING)
                return -EBUSY;
        if (try_module_get(mod))
                return 0;
        else
                return -ENOENT;
}

static inline void add_taint_module(struct module *mod, unsigned flag,
                                    enum lockdep_ok lockdep_ok)
{
        add_taint(flag, lockdep_ok);
        set_bit(flag, &mod->taints);
}

/*
 * A thread that wants to hold a reference to a module only while it
 * is running can call this to safely exit.
 */
void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
{
        module_put(mod);
        kthread_exit(code);
}
EXPORT_SYMBOL(__module_put_and_kthread_exit);

/* Find a module section: 0 means not found. */
static unsigned int find_sec(const struct load_info *info, const char *name)
{
        unsigned int i;

        for (i = 1; i < info->hdr->e_shnum; i++) {
                Elf_Shdr *shdr = &info->sechdrs[i];
                /* Alloc bit cleared means "ignore it." */
                if ((shdr->sh_flags & SHF_ALLOC)
                    && strcmp(info->secstrings + shdr->sh_name, name) == 0)
                        return i;
        }
        return 0;
}

/* Find a module section, or NULL. */
static void *section_addr(const struct load_info *info, const char *name)
{
        /* Section 0 has sh_addr 0. */
        return (void *)info->sechdrs[find_sec(info, name)].sh_addr;
}

/* Find a module section, or NULL.  Fill in number of "objects" in section. */
static void *section_objs(const struct load_info *info,
                          const char *name,
                          size_t object_size,
                          unsigned int *num)
{
        unsigned int sec = find_sec(info, name);

        /* Section 0 has sh_addr 0 and sh_size 0. */
        *num = info->sechdrs[sec].sh_size / object_size;
        return (void *)info->sechdrs[sec].sh_addr;
}

/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
static unsigned int find_any_sec(const struct load_info *info, const char *name)
{
        unsigned int i;

        for (i = 1; i < info->hdr->e_shnum; i++) {
                Elf_Shdr *shdr = &info->sechdrs[i];
                if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
                        return i;
        }
        return 0;
}

/*
 * Find a module section, or NULL. Fill in number of "objects" in section.
 * Ignores SHF_ALLOC flag.
 */
static __maybe_unused void *any_section_objs(const struct load_info *info,
                                             const char *name,
                                             size_t object_size,
                                             unsigned int *num)
{
        unsigned int sec = find_any_sec(info, name);

        /* Section 0 has sh_addr 0 and sh_size 0. */
        *num = info->sechdrs[sec].sh_size / object_size;
        return (void *)info->sechdrs[sec].sh_addr;
}

#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
#else
#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
#endif

static const char *kernel_symbol_name(const struct kernel_symbol *sym)
{
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
        return offset_to_ptr(&sym->name_offset);
#else
        return sym->name;
#endif
}

static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
{
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
        if (!sym->namespace_offset)
                return NULL;
        return offset_to_ptr(&sym->namespace_offset);
#else
        return sym->namespace;
#endif
}

int cmp_name(const void *name, const void *sym)
{
        return strcmp(name, kernel_symbol_name(sym));
}

static bool find_exported_symbol_in_section(const struct symsearch *syms,
                                            struct module *owner,
                                            struct find_symbol_arg *fsa)
{
        struct kernel_symbol *sym;

        if (!fsa->gplok && syms->license == GPL_ONLY)
                return false;

        sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
                        sizeof(struct kernel_symbol), cmp_name);
        if (!sym)
                return false;

        fsa->owner = owner;
        fsa->crc = symversion(syms->crcs, sym - syms->start);
        fsa->sym = sym;
        fsa->license = syms->license;

        return true;
}

/*
 * Find an exported symbol and return it, along with, (optional) crc and
 * (optional) module which owns it.  Needs preempt disabled or module_mutex.
 */
bool find_symbol(struct find_symbol_arg *fsa)
{
        static const struct symsearch arr[] = {
                { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
                  NOT_GPL_ONLY },
                { __start___ksymtab_gpl, __stop___ksymtab_gpl,
                  __start___kcrctab_gpl,
                  GPL_ONLY },
        };
        struct module *mod;
        unsigned int i;

        module_assert_mutex_or_preempt();

        for (i = 0; i < ARRAY_SIZE(arr); i++)
                if (find_exported_symbol_in_section(&arr[i], NULL, fsa))
                        return true;

        list_for_each_entry_rcu(mod, &modules, list,
                                lockdep_is_held(&module_mutex)) {
                struct symsearch arr[] = {
                        { mod->syms, mod->syms + mod->num_syms, mod->crcs,
                          NOT_GPL_ONLY },
                        { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
                          mod->gpl_crcs,
                          GPL_ONLY },
                };

                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;

                for (i = 0; i < ARRAY_SIZE(arr); i++)
                        if (find_exported_symbol_in_section(&arr[i], mod, fsa))
                                return true;
        }

        pr_debug("Failed to find symbol %s\n", fsa->name);
        return false;
}

/*
 * Search for module by name: must hold module_mutex (or preempt disabled
 * for read-only access).
 */
struct module *find_module_all(const char *name, size_t len,
                               bool even_unformed)
{
        struct module *mod;

        module_assert_mutex_or_preempt();

        list_for_each_entry_rcu(mod, &modules, list,
                                lockdep_is_held(&module_mutex)) {
                if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
                        continue;
                if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
                        return mod;
        }
        return NULL;
}

struct module *find_module(const char *name)
{
        return find_module_all(name, strlen(name), false);
}

#ifdef CONFIG_SMP

static inline void __percpu *mod_percpu(struct module *mod)
{
        return mod->percpu;
}

static int percpu_modalloc(struct module *mod, struct load_info *info)
{
        Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
        unsigned long align = pcpusec->sh_addralign;

        if (!pcpusec->sh_size)
                return 0;

        if (align > PAGE_SIZE) {
                pr_warn("%s: per-cpu alignment %li > %li\n",
                        mod->name, align, PAGE_SIZE);
                align = PAGE_SIZE;
        }

        mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align);
        if (!mod->percpu) {
                pr_warn("%s: Could not allocate %lu bytes percpu data\n",
                        mod->name, (unsigned long)pcpusec->sh_size);
                return -ENOMEM;
        }
        mod->percpu_size = pcpusec->sh_size;
        return 0;
}

static void percpu_modfree(struct module *mod)
{
        free_percpu(mod->percpu);
}

static unsigned int find_pcpusec(struct load_info *info)
{
        return find_sec(info, ".data..percpu");
}

static void percpu_modcopy(struct module *mod,
                           const void *from, unsigned long size)
{
        int cpu;

        for_each_possible_cpu(cpu)
                memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
}

bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
{
        struct module *mod;
        unsigned int cpu;

        preempt_disable();

        list_for_each_entry_rcu(mod, &modules, list) {
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
                if (!mod->percpu_size)
                        continue;
                for_each_possible_cpu(cpu) {
                        void *start = per_cpu_ptr(mod->percpu, cpu);
                        void *va = (void *)addr;

                        if (va >= start && va < start + mod->percpu_size) {
                                if (can_addr) {
                                        *can_addr = (unsigned long) (va - start);
                                        *can_addr += (unsigned long)
                                                per_cpu_ptr(mod->percpu,
                                                            get_boot_cpu_id());
                                }
                                preempt_enable();
                                return true;
                        }
                }
        }

        preempt_enable();
        return false;
}

/**
 * is_module_percpu_address() - test whether address is from module static percpu
 * @addr: address to test
 *
 * Test whether @addr belongs to module static percpu area.
 *
 * Return: %true if @addr is from module static percpu area
 */
bool is_module_percpu_address(unsigned long addr)
{
        return __is_module_percpu_address(addr, NULL);
}

#else /* ... !CONFIG_SMP */

static inline void __percpu *mod_percpu(struct module *mod)
{
        return NULL;
}
static int percpu_modalloc(struct module *mod, struct load_info *info)
{
        /* UP modules shouldn't have this section: ENOMEM isn't quite right */
        if (info->sechdrs[info->index.pcpu].sh_size != 0)
                return -ENOMEM;
        return 0;
}
static inline void percpu_modfree(struct module *mod)
{
}
static unsigned int find_pcpusec(struct load_info *info)
{
        return 0;
}
static inline void percpu_modcopy(struct module *mod,
                                  const void *from, unsigned long size)
{
        /* pcpusec should be 0, and size of that section should be 0. */
        BUG_ON(size != 0);
}
bool is_module_percpu_address(unsigned long addr)
{
        return false;
}

bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
{
        return false;
}

#endif /* CONFIG_SMP */

#define MODINFO_ATTR(field)        \
static void setup_modinfo_##field(struct module *mod, const char *s)  \
{                                                                     \
        mod->field = kstrdup(s, GFP_KERNEL);                          \
}                                                                     \
static ssize_t show_modinfo_##field(struct module_attribute *mattr,   \
                        struct module_kobject *mk, char *buffer)      \
{                                                                     \
        return scnprintf(buffer, PAGE_SIZE, "%s\n", mk->mod->field);  \
}                                                                     \
static int modinfo_##field##_exists(struct module *mod)               \
{                                                                     \
        return mod->field != NULL;                                    \
}                                                                     \
static void free_modinfo_##field(struct module *mod)                  \
{                                                                     \
        kfree(mod->field);                                            \
        mod->field = NULL;                                            \
}                                                                     \
static struct module_attribute modinfo_##field = {                    \
        .attr = { .name = __stringify(field), .mode = 0444 },         \
        .show = show_modinfo_##field,                                 \
        .setup = setup_modinfo_##field,                               \
        .test = modinfo_##field##_exists,                             \
        .free = free_modinfo_##field,                                 \
};

MODINFO_ATTR(version);
MODINFO_ATTR(srcversion);

static struct {
        char name[MODULE_NAME_LEN + 1];
        char taints[MODULE_FLAGS_BUF_SIZE];
} last_unloaded_module;

#ifdef CONFIG_MODULE_UNLOAD

EXPORT_TRACEPOINT_SYMBOL(module_get);

/* MODULE_REF_BASE is the base reference count by kmodule loader. */
#define MODULE_REF_BASE        1

/* Init the unload section of the module. */
static int module_unload_init(struct module *mod)
{
        /*
         * Initialize reference counter to MODULE_REF_BASE.
         * refcnt == 0 means module is going.
         */
        atomic_set(&mod->refcnt, MODULE_REF_BASE);

        INIT_LIST_HEAD(&mod->source_list);
        INIT_LIST_HEAD(&mod->target_list);

        /* Hold reference count during initialization. */
        atomic_inc(&mod->refcnt);

        return 0;
}

/* Does a already use b? */
static int already_uses(struct module *a, struct module *b)
{
        struct module_use *use;

        list_for_each_entry(use, &b->source_list, source_list) {
                if (use->source == a)
                        return 1;
        }
        pr_debug("%s does not use %s!\n", a->name, b->name);
        return 0;
}

/*
 * Module a uses b
 *  - we add 'a' as a "source", 'b' as a "target" of module use
 *  - the module_use is added to the list of 'b' sources (so
 *    'b' can walk the list to see who sourced them), and of 'a'
 *    targets (so 'a' can see what modules it targets).
 */
static int add_module_usage(struct module *a, struct module *b)
{
        struct module_use *use;

        pr_debug("Allocating new usage for %s.\n", a->name);
        use = kmalloc(sizeof(*use), GFP_ATOMIC);
        if (!use)
                return -ENOMEM;

        use->source = a;
        use->target = b;
        list_add(&use->source_list, &b->source_list);
        list_add(&use->target_list, &a->target_list);
        return 0;
}

/* Module a uses b: caller needs module_mutex() */
static int ref_module(struct module *a, struct module *b)
{
        int err;

        if (b == NULL || already_uses(a, b))
                return 0;

        /* If module isn't available, we fail. */
        err = strong_try_module_get(b);
        if (err)
                return err;

        err = add_module_usage(a, b);
        if (err) {
                module_put(b);
                return err;
        }
        return 0;
}

/* Clear the unload stuff of the module. */
static void module_unload_free(struct module *mod)
{
        struct module_use *use, *tmp;

        mutex_lock(&module_mutex);
        list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
                struct module *i = use->target;
                pr_debug("%s unusing %s\n", mod->name, i->name);
                module_put(i);
                list_del(&use->source_list);
                list_del(&use->target_list);
                kfree(use);
        }
        mutex_unlock(&module_mutex);
}

#ifdef CONFIG_MODULE_FORCE_UNLOAD
static inline int try_force_unload(unsigned int flags)
{
        int ret = (flags & O_TRUNC);
        if (ret)
                add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE);
        return ret;
}
#else
static inline int try_force_unload(unsigned int flags)
{
        return 0;
}
#endif /* CONFIG_MODULE_FORCE_UNLOAD */

/* Try to release refcount of module, 0 means success. */
static int try_release_module_ref(struct module *mod)
{
        int ret;

        /* Try to decrement refcnt which we set at loading */
        ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
        BUG_ON(ret < 0);
        if (ret)
                /* Someone can put this right now, recover with checking */
                ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);

        return ret;
}

static int try_stop_module(struct module *mod, int flags, int *forced)
{
        /* If it's not unused, quit unless we're forcing. */
        if (try_release_module_ref(mod) != 0) {
                *forced = try_force_unload(flags);
                if (!(*forced))
                        return -EWOULDBLOCK;
        }

        /* Mark it as dying. */
        mod->state = MODULE_STATE_GOING;

        return 0;
}

/**
 * module_refcount() - return the refcount or -1 if unloading
 * @mod:        the module we're checking
 *
 * Return:
 *        -1 if the module is in the process of unloading
 *        otherwise the number of references in the kernel to the module
 */
int module_refcount(struct module *mod)
{
        return atomic_read(&mod->refcnt) - MODULE_REF_BASE;
}
EXPORT_SYMBOL(module_refcount);

/* This exists whether we can unload or not */
static void free_module(struct module *mod);

SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
                unsigned int, flags)
{
        struct module *mod;
        char name[MODULE_NAME_LEN];
        char buf[MODULE_FLAGS_BUF_SIZE];
        int ret, forced = 0;

        if (!capable(CAP_SYS_MODULE) || modules_disabled)
                return -EPERM;

        if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
                return -EFAULT;
        name[MODULE_NAME_LEN-1] = '\0';

        audit_log_kern_module(name);

        if (mutex_lock_interruptible(&module_mutex) != 0)
                return -EINTR;

        mod = find_module(name);
        if (!mod) {
                ret = -ENOENT;
                goto out;
        }

        if (!list_empty(&mod->source_list)) {
                /* Other modules depend on us: get rid of them first. */
                ret = -EWOULDBLOCK;
                goto out;
        }

        /* Doing init or already dying? */
        if (mod->state != MODULE_STATE_LIVE) {
                /* FIXME: if (force), slam module count damn the torpedoes */
                pr_debug("%s already dying\n", mod->name);
                ret = -EBUSY;
                goto out;
        }

        /* If it has an init func, it must have an exit func to unload */
        if (mod->init && !mod->exit) {
                forced = try_force_unload(flags);
                if (!forced) {
                        /* This module can't be removed */
                        ret = -EBUSY;
                        goto out;
                }
        }

        ret = try_stop_module(mod, flags, &forced);
        if (ret != 0)
                goto out;

        mutex_unlock(&module_mutex);
        /* Final destruction now no one is using it. */
        if (mod->exit != NULL)
                mod->exit();
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_GOING, mod);
        klp_module_going(mod);
        ftrace_release_mod(mod);

        async_synchronize_full();

        /* Store the name and taints of the last unloaded module for diagnostic purposes */
        strscpy(last_unloaded_module.name, mod->name, sizeof(last_unloaded_module.name));
        strscpy(last_unloaded_module.taints, module_flags(mod, buf, false), sizeof(last_unloaded_module.taints));

        free_module(mod);
        /* someone could wait for the module in add_unformed_module() */
        wake_up_all(&module_wq);
        return 0;
out:
        mutex_unlock(&module_mutex);
        return ret;
}

void __symbol_put(const char *symbol)
{
        struct find_symbol_arg fsa = {
                .name        = symbol,
                .gplok        = true,
        };

        preempt_disable();
        BUG_ON(!find_symbol(&fsa));
        module_put(fsa.owner);
        preempt_enable();
}
EXPORT_SYMBOL(__symbol_put);

/* Note this assumes addr is a function, which it currently always is. */
void symbol_put_addr(void *addr)
{
        struct module *modaddr;
        unsigned long a = (unsigned long)dereference_function_descriptor(addr);

        if (core_kernel_text(a))
                return;

        /*
         * Even though we hold a reference on the module; we still need to
         * disable preemption in order to safely traverse the data structure.
         */
        preempt_disable();
        modaddr = __module_text_address(a);
        BUG_ON(!modaddr);
        module_put(modaddr);
        preempt_enable();
}
EXPORT_SYMBOL_GPL(symbol_put_addr);

static ssize_t show_refcnt(struct module_attribute *mattr,
                           struct module_kobject *mk, char *buffer)
{
        return sprintf(buffer, "%i\n", module_refcount(mk->mod));
}

static struct module_attribute modinfo_refcnt =
        __ATTR(refcnt, 0444, show_refcnt, NULL);

void __module_get(struct module *module)
{
        if (module) {
                atomic_inc(&module->refcnt);
                trace_module_get(module, _RET_IP_);
        }
}
EXPORT_SYMBOL(__module_get);

bool try_module_get(struct module *module)
{
        bool ret = true;

        if (module) {
                /* Note: here, we can fail to get a reference */
                if (likely(module_is_live(module) &&
                           atomic_inc_not_zero(&module->refcnt) != 0))
                        trace_module_get(module, _RET_IP_);
                else
                        ret = false;
        }
        return ret;
}
EXPORT_SYMBOL(try_module_get);

void module_put(struct module *module)
{
        int ret;

        if (module) {
                ret = atomic_dec_if_positive(&module->refcnt);
                WARN_ON(ret < 0);        /* Failed to put refcount */
                trace_module_put(module, _RET_IP_);
        }
}
EXPORT_SYMBOL(module_put);

#else /* !CONFIG_MODULE_UNLOAD */
static inline void module_unload_free(struct module *mod)
{
}

static int ref_module(struct module *a, struct module *b)
{
        return strong_try_module_get(b);
}

static inline int module_unload_init(struct module *mod)
{
        return 0;
}
#endif /* CONFIG_MODULE_UNLOAD */

size_t module_flags_taint(unsigned long taints, char *buf)
{
        size_t l = 0;
        int i;

        for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
                if (taint_flags[i].module && test_bit(i, &taints))
                        buf[l++] = taint_flags[i].c_true;
        }

        return l;
}

static ssize_t show_initstate(struct module_attribute *mattr,
                              struct module_kobject *mk, char *buffer)
{
        const char *state = "unknown";

        switch (mk->mod->state) {
        case MODULE_STATE_LIVE:
                state = "live";
                break;
        case MODULE_STATE_COMING:
                state = "coming";
                break;
        case MODULE_STATE_GOING:
                state = "going";
                break;
        default:
                BUG();
        }
        return sprintf(buffer, "%s\n", state);
}

static struct module_attribute modinfo_initstate =
        __ATTR(initstate, 0444, show_initstate, NULL);

static ssize_t store_uevent(struct module_attribute *mattr,
                            struct module_kobject *mk,
                            const char *buffer, size_t count)
{
        int rc;

        rc = kobject_synth_uevent(&mk->kobj, buffer, count);
        return rc ? rc : count;
}

struct module_attribute module_uevent =
        __ATTR(uevent, 0200, NULL, store_uevent);

static ssize_t show_coresize(struct module_attribute *mattr,
                             struct module_kobject *mk, char *buffer)
{
        unsigned int size = mk->mod->mem[MOD_TEXT].size;

        if (!IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC)) {
                for_class_mod_mem_type(type, core_data)
                        size += mk->mod->mem[type].size;
        }
        return sprintf(buffer, "%u\n", size);
}

static struct module_attribute modinfo_coresize =
        __ATTR(coresize, 0444, show_coresize, NULL);

#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
static ssize_t show_datasize(struct module_attribute *mattr,
                             struct module_kobject *mk, char *buffer)
{
        unsigned int size = 0;

        for_class_mod_mem_type(type, core_data)
                size += mk->mod->mem[type].size;
        return sprintf(buffer, "%u\n", size);
}

static struct module_attribute modinfo_datasize =
        __ATTR(datasize, 0444, show_datasize, NULL);
#endif

static ssize_t show_initsize(struct module_attribute *mattr,
                             struct module_kobject *mk, char *buffer)
{
        unsigned int size = 0;

        for_class_mod_mem_type(type, init)
                size += mk->mod->mem[type].size;
        return sprintf(buffer, "%u\n", size);
}

static struct module_attribute modinfo_initsize =
        __ATTR(initsize, 0444, show_initsize, NULL);

static ssize_t show_taint(struct module_attribute *mattr,
                          struct module_kobject *mk, char *buffer)
{
        size_t l;

        l = module_flags_taint(mk->mod->taints, buffer);
        buffer[l++] = '\n';
        return l;
}

static struct module_attribute modinfo_taint =
        __ATTR(taint, 0444, show_taint, NULL);

struct module_attribute *modinfo_attrs[] = {
        &module_uevent,
        &modinfo_version,
        &modinfo_srcversion,
        &modinfo_initstate,
        &modinfo_coresize,
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
        &modinfo_datasize,
#endif
        &modinfo_initsize,
        &modinfo_taint,
#ifdef CONFIG_MODULE_UNLOAD
        &modinfo_refcnt,
#endif
        NULL,
};

size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs);

static const char vermagic[] = VERMAGIC_STRING;

int try_to_force_load(struct module *mod, const char *reason)
{
#ifdef CONFIG_MODULE_FORCE_LOAD
        if (!test_taint(TAINT_FORCED_MODULE))
                pr_warn("%s: %s: kernel tainted.\n", mod->name, reason);
        add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE);
        return 0;
#else
        return -ENOEXEC;
#endif
}

/* Parse tag=value strings from .modinfo section */
char *module_next_tag_pair(char *string, unsigned long *secsize)
{
        /* Skip non-zero chars */
        while (string[0]) {
                string++;
                if ((*secsize)-- <= 1)
                        return NULL;
        }

        /* Skip any zero padding. */
        while (!string[0]) {
                string++;
                if ((*secsize)-- <= 1)
                        return NULL;
        }
        return string;
}

static char *get_next_modinfo(const struct load_info *info, const char *tag,
                              char *prev)
{
        char *p;
        unsigned int taglen = strlen(tag);
        Elf_Shdr *infosec = &info->sechdrs[info->index.info];
        unsigned long size = infosec->sh_size;

        /*
         * get_modinfo() calls made before rewrite_section_headers()
         * must use sh_offset, as sh_addr isn't set!
         */
        char *modinfo = (char *)info->hdr + infosec->sh_offset;

        if (prev) {
                size -= prev - modinfo;
                modinfo = module_next_tag_pair(prev, &size);
        }

        for (p = modinfo; p; p = module_next_tag_pair(p, &size)) {
                if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
                        return p + taglen + 1;
        }
        return NULL;
}

static char *get_modinfo(const struct load_info *info, const char *tag)
{
        return get_next_modinfo(info, tag, NULL);
}

static int verify_namespace_is_imported(const struct load_info *info,
                                        const struct kernel_symbol *sym,
                                        struct module *mod)
{
        const char *namespace;
        char *imported_namespace;

        namespace = kernel_symbol_namespace(sym);
        if (namespace && namespace[0]) {
                for_each_modinfo_entry(imported_namespace, info, "import_ns") {
                        if (strcmp(namespace, imported_namespace) == 0)
                                return 0;
                }
#ifdef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
                pr_warn(
#else
                pr_err(
#endif
                        "%s: module uses symbol (%s) from namespace %s, but does not import it.\n",
                        mod->name, kernel_symbol_name(sym), namespace);
#ifndef CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
                return -EINVAL;
#endif
        }
        return 0;
}

static bool inherit_taint(struct module *mod, struct module *owner, const char *name)
{
        if (!owner || !test_bit(TAINT_PROPRIETARY_MODULE, &owner->taints))
                return true;

        if (mod->using_gplonly_symbols) {
                pr_err("%s: module using GPL-only symbols uses symbols %s from proprietary module %s.\n",
                        mod->name, name, owner->name);
                return false;
        }

        if (!test_bit(TAINT_PROPRIETARY_MODULE, &mod->taints)) {
                pr_warn("%s: module uses symbols %s from proprietary module %s, inheriting taint.\n",
                        mod->name, name, owner->name);
                set_bit(TAINT_PROPRIETARY_MODULE, &mod->taints);
        }
        return true;
}

/* Resolve a symbol for this module.  I.e. if we find one, record usage. */
static const struct kernel_symbol *resolve_symbol(struct module *mod,
                                                  const struct load_info *info,
                                                  const char *name,
                                                  char ownername[])
{
        struct find_symbol_arg fsa = {
                .name        = name,
                .gplok        = !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)),
                .warn        = true,
        };
        int err;

        /*
         * The module_mutex should not be a heavily contended lock;
         * if we get the occasional sleep here, we'll go an extra iteration
         * in the wait_event_interruptible(), which is harmless.
         */
        sched_annotate_sleep();
        mutex_lock(&module_mutex);
        if (!find_symbol(&fsa))
                goto unlock;

        if (fsa.license == GPL_ONLY)
                mod->using_gplonly_symbols = true;

        if (!inherit_taint(mod, fsa.owner, name)) {
                fsa.sym = NULL;
                goto getname;
        }

        if (!check_version(info, name, mod, fsa.crc)) {
                fsa.sym = ERR_PTR(-EINVAL);
                goto getname;
        }

        err = verify_namespace_is_imported(info, fsa.sym, mod);
        if (err) {
                fsa.sym = ERR_PTR(err);
                goto getname;
        }

        err = ref_module(mod, fsa.owner);
        if (err) {
                fsa.sym = ERR_PTR(err);
                goto getname;
        }

getname:
        /* We must make copy under the lock if we failed to get ref. */
        strncpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN);
unlock:
        mutex_unlock(&module_mutex);
        return fsa.sym;
}

static const struct kernel_symbol *
resolve_symbol_wait(struct module *mod,
                    const struct load_info *info,
                    const char *name)
{
        const struct kernel_symbol *ksym;
        char owner[MODULE_NAME_LEN];

        if (wait_event_interruptible_timeout(module_wq,
                        !IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
                        || PTR_ERR(ksym) != -EBUSY,
                                             30 * HZ) <= 0) {
                pr_warn("%s: gave up waiting for init of module %s.\n",
                        mod->name, owner);
        }
        return ksym;
}

void __weak module_memfree(void *module_region)
{
        /*
         * This memory may be RO, and freeing RO memory in an interrupt is not
         * supported by vmalloc.
         */
        WARN_ON(in_interrupt());
        vfree(module_region);
}

void __weak module_arch_cleanup(struct module *mod)
{
}

void __weak module_arch_freeing_init(struct module *mod)
{
}

static bool mod_mem_use_vmalloc(enum mod_mem_type type)
{
        return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) &&
                mod_mem_type_is_core_data(type);
}

static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
{
        if (mod_mem_use_vmalloc(type))
                return vzalloc(size);
        return module_alloc(size);
}

static void module_memory_free(void *ptr, enum mod_mem_type type)
{
        if (mod_mem_use_vmalloc(type))
                vfree(ptr);
        else
                module_memfree(ptr);
}

static void free_mod_mem(struct module *mod)
{
        for_each_mod_mem_type(type) {
                struct module_memory *mod_mem = &mod->mem[type];

                if (type == MOD_DATA)
                        continue;

                /* Free lock-classes; relies on the preceding sync_rcu(). */
                lockdep_free_key_range(mod_mem->base, mod_mem->size);
                if (mod_mem->size)
                        module_memory_free(mod_mem->base, type);
        }

        /* MOD_DATA hosts mod, so free it at last */
        lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size);
        module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
}

/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
        trace_module_free(mod);

        mod_sysfs_teardown(mod);

        /*
         * We leave it in list to prevent duplicate loads, but make sure
         * that noone uses it while it's being deconstructed.
         */
        mutex_lock(&module_mutex);
        mod->state = MODULE_STATE_UNFORMED;
        mutex_unlock(&module_mutex);

        /* Arch-specific cleanup. */
        module_arch_cleanup(mod);

        /* Module unload stuff */
        module_unload_free(mod);

        /* Free any allocated parameters. */
        destroy_params(mod->kp, mod->num_kp);

        if (is_livepatch_module(mod))
                free_module_elf(mod);

        /* Now we can delete it from the lists */
        mutex_lock(&module_mutex);
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
        mod_tree_remove(mod);
        /* Remove this module from bug list, this uses list_del_rcu */
        module_bug_cleanup(mod);
        /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
        synchronize_rcu();
        if (try_add_tainted_module(mod))
                pr_err("%s: adding tainted module to the unloaded tainted modules list failed.\n",
                       mod->name);
        mutex_unlock(&module_mutex);

        /* This may be empty, but that's OK */
        module_arch_freeing_init(mod);
        kfree(mod->args);
        percpu_modfree(mod);

        free_mod_mem(mod);
}

void *__symbol_get(const char *symbol)
{
        struct find_symbol_arg fsa = {
                .name        = symbol,
                .gplok        = true,
                .warn        = true,
        };

        preempt_disable();
        if (!find_symbol(&fsa))
                goto fail;
        if (fsa.license != GPL_ONLY) {
                pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n",
                        symbol);
                goto fail;
        }
        if (strong_try_module_get(fsa.owner))
                goto fail;
        preempt_enable();
        return (void *)kernel_symbol_value(fsa.sym);
fail:
        preempt_enable();
        return NULL;
}
EXPORT_SYMBOL_GPL(__symbol_get);

/*
 * Ensure that an exported symbol [global namespace] does not already exist
 * in the kernel or in some other module's exported symbol table.
 *
 * You must hold the module_mutex.
 */
static int verify_exported_symbols(struct module *mod)
{
        unsigned int i;
        const struct kernel_symbol *s;
        struct {
                const struct kernel_symbol *sym;
                unsigned int num;
        } arr[] = {
                { mod->syms, mod->num_syms },
                { mod->gpl_syms, mod->num_gpl_syms },
        };

        for (i = 0; i < ARRAY_SIZE(arr); i++) {
                for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
                        struct find_symbol_arg fsa = {
                                .name        = kernel_symbol_name(s),
                                .gplok        = true,
                        };
                        if (find_symbol(&fsa)) {
                                pr_err("%s: exports duplicate symbol %s"
                                       " (owned by %s)\n",
                                       mod->name, kernel_symbol_name(s),
                                       module_name(fsa.owner));
                                return -ENOEXEC;
                        }
                }
        }
        return 0;
}

static bool ignore_undef_symbol(Elf_Half emachine, const char *name)
{
        /*
         * On x86, PIC code and Clang non-PIC code may have call foo@PLT. GNU as
         * before 2.37 produces an unreferenced _GLOBAL_OFFSET_TABLE_ on x86-64.
         * i386 has a similar problem but may not deserve a fix.
         *
         * If we ever have to ignore many symbols, consider refactoring the code to
         * only warn if referenced by a relocation.
         */
        if (emachine == EM_386 || emachine == EM_X86_64)
                return !strcmp(name, "_GLOBAL_OFFSET_TABLE_");
        return false;
}

/* Change all symbols so that st_value encodes the pointer directly. */
static int simplify_symbols(struct module *mod, const struct load_info *info)
{
        Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
        Elf_Sym *sym = (void *)symsec->sh_addr;
        unsigned long secbase;
        unsigned int i;
        int ret = 0;
        const struct kernel_symbol *ksym;

        for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
                const char *name = info->strtab + sym[i].st_name;

                switch (sym[i].st_shndx) {
                case SHN_COMMON:
                        /* Ignore common symbols */
                        if (!strncmp(name, "__gnu_lto", 9))
                                break;

                        /*
                         * We compiled with -fno-common.  These are not
                         * supposed to happen.
                         */
                        pr_debug("Common symbol: %s\n", name);
                        pr_warn("%s: please compile with -fno-common\n",
                               mod->name);
                        ret = -ENOEXEC;
                        break;

                case SHN_ABS:
                        /* Don't need to do anything */
                        pr_debug("Absolute symbol: 0x%08lx %s\n",
                                 (long)sym[i].st_value, name);
                        break;

                case SHN_LIVEPATCH:
                        /* Livepatch symbols are resolved by livepatch */
                        break;

                case SHN_UNDEF:
                        ksym = resolve_symbol_wait(mod, info, name);
                        /* Ok if resolved.  */
                        if (ksym && !IS_ERR(ksym)) {
                                sym[i].st_value = kernel_symbol_value(ksym);
                                break;
                        }

                        /* Ok if weak or ignored.  */
                        if (!ksym &&
                            (ELF_ST_BIND(sym[i].st_info) == STB_WEAK ||
                             ignore_undef_symbol(info->hdr->e_machine, name)))
                                break;

                        ret = PTR_ERR(ksym) ?: -ENOENT;
                        pr_warn("%s: Unknown symbol %s (err %d)\n",
                                mod->name, name, ret);
                        break;

                default:
                        /* Divert to percpu allocation if a percpu var. */
                        if (sym[i].st_shndx == info->index.pcpu)
                                secbase = (unsigned long)mod_percpu(mod);
                        else
                                secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
                        sym[i].st_value += secbase;
                        break;
                }
        }

        return ret;
}

static int apply_relocations(struct module *mod, const struct load_info *info)
{
        unsigned int i;
        int err = 0;

        /* Now do relocations. */
        for (i = 1; i < info->hdr->e_shnum; i++) {
                unsigned int infosec = info->sechdrs[i].sh_info;

                /* Not a valid relocation section? */
                if (infosec >= info->hdr->e_shnum)
                        continue;

                /* Don't bother with non-allocated sections */
                if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
                        continue;

                if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH)
                        err = klp_apply_section_relocs(mod, info->sechdrs,
                                                       info->secstrings,
                                                       info->strtab,
                                                       info->index.sym, i,
                                                       NULL);
                else if (info->sechdrs[i].sh_type == SHT_REL)
                        err = apply_relocate(info->sechdrs, info->strtab,
                                             info->index.sym, i, mod);
                else if (info->sechdrs[i].sh_type == SHT_RELA)
                        err = apply_relocate_add(info->sechdrs, info->strtab,
                                                 info->index.sym, i, mod);
                if (err < 0)
                        break;
        }
        return err;
}

/* Additional bytes needed by arch in front of individual sections */
unsigned int __weak arch_mod_section_prepend(struct module *mod,
                                             unsigned int section)
{
        /* default implementation just returns zero */
        return 0;
}

long module_get_offset_and_type(struct module *mod, enum mod_mem_type type,
                                Elf_Shdr *sechdr, unsigned int section)
{
        long offset;
        long mask = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK) << SH_ENTSIZE_TYPE_SHIFT;

        mod->mem[type].size += arch_mod_section_prepend(mod, section);
        offset = ALIGN(mod->mem[type].size, sechdr->sh_addralign ?: 1);
        mod->mem[type].size = offset + sechdr->sh_size;

        WARN_ON_ONCE(offset & mask);
        return offset | mask;
}

bool module_init_layout_section(const char *sname)
{
#ifndef CONFIG_MODULE_UNLOAD
        if (module_exit_section(sname))
                return true;
#endif
        return module_init_section(sname);
}

static void __layout_sections(struct module *mod, struct load_info *info, bool is_init)
{
        unsigned int m, i;

        static const unsigned long masks[][2] = {
                /*
                 * NOTE: all executable code must be the first section
                 * in this array; otherwise modify the text_size
                 * finder in the two loops below
                 */
                { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
                { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
                { SHF_RO_AFTER_INIT | SHF_ALLOC, ARCH_SHF_SMALL },
                { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
                { ARCH_SHF_SMALL | SHF_ALLOC, 0 }
        };
        static const int core_m_to_mem_type[] = {
                MOD_TEXT,
                MOD_RODATA,
                MOD_RO_AFTER_INIT,
                MOD_DATA,
                MOD_DATA,
        };
        static const int init_m_to_mem_type[] = {
                MOD_INIT_TEXT,
                MOD_INIT_RODATA,
                MOD_INVALID,
                MOD_INIT_DATA,
                MOD_INIT_DATA,
        };

        for (m = 0; m < ARRAY_SIZE(masks); ++m) {
                enum mod_mem_type type = is_init ? init_m_to_mem_type[m] : core_m_to_mem_type[m];

                for (i = 0; i < info->hdr->e_shnum; ++i) {
                        Elf_Shdr *s = &info->sechdrs[i];
                        const char *sname = info->secstrings + s->sh_name;

                        if ((s->sh_flags & masks[m][0]) != masks[m][0]
                            || (s->sh_flags & masks[m][1])
                            || s->sh_entsize != ~0UL
                            || is_init != module_init_layout_section(sname))
                                continue;

                        if (WARN_ON_ONCE(type == MOD_INVALID))
                                continue;

                        s->sh_entsize = module_get_offset_and_type(mod, type, s, i);
                        pr_debug("\t%s\n", sname);
                }
        }
}

/*
 * Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
 * might -- code, read-only data, read-write data, small data.  Tally
 * sizes, and place the offsets into sh_entsize fields: high bit means it
 * belongs in init.
 */
static void layout_sections(struct module *mod, struct load_info *info)
{
        unsigned int i;

        for (i = 0; i < info->hdr->e_shnum; i++)
                info->sechdrs[i].sh_entsize = ~0UL;

        pr_debug("Core section allocation order for %s:\n", mod->name);
        __layout_sections(mod, info, false);

        pr_debug("Init section allocation order for %s:\n", mod->name);
        __layout_sections(mod, info, true);
}

static void module_license_taint_check(struct module *mod, const char *license)
{
        if (!license)
                license = "unspecified";

        if (!license_is_gpl_compatible(license)) {
                if (!test_taint(TAINT_PROPRIETARY_MODULE))
                        pr_warn("%s: module license '%s' taints kernel.\n",
                                mod->name, license);
                add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
                                 LOCKDEP_NOW_UNRELIABLE);
        }
}

static void setup_modinfo(struct module *mod, struct load_info *info)
{
        struct module_attribute *attr;
        int i;

        for (i = 0; (attr = modinfo_attrs[i]); i++) {
                if (attr->setup)
                        attr->setup(mod, get_modinfo(info, attr->attr.name));
        }
}

static void free_modinfo(struct module *mod)
{
        struct module_attribute *attr;
        int i;

        for (i = 0; (attr = modinfo_attrs[i]); i++) {
                if (attr->free)
                        attr->free(mod);
        }
}

void * __weak module_alloc(unsigned long size)
{
        return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
                        GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
                        NUMA_NO_NODE, __builtin_return_address(0));
}

bool __weak module_init_section(const char *name)
{
        return strstarts(name, ".init");
}

bool __weak module_exit_section(const char *name)
{
        return strstarts(name, ".exit");
}

static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr)
{
#if defined(CONFIG_64BIT)
        unsigned long long secend;
#else
        unsigned long secend;
#endif

        /*
         * Check for both overflow and offset/size being
         * too large.
         */
        secend = shdr->sh_offset + shdr->sh_size;
        if (secend < shdr->sh_offset || secend > info->len)
                return -ENOEXEC;

        return 0;
}

/*
 * Check userspace passed ELF module against our expectations, and cache
 * useful variables for further processing as we go.
 *
 * This does basic validity checks against section offsets and sizes, the
 * section name string table, and the indices used for it (sh_name).
 *
 * As a last step, since we're already checking the ELF sections we cache
 * useful variables which will be used later for our convenience:
 *
 *         o pointers to section headers
 *         o cache the modinfo symbol section
 *         o cache the string symbol section
 *         o cache the module section
 *
 * As a last step we set info->mod to the temporary copy of the module in
 * info->hdr. The final one will be allocated in move_module(). Any
 * modifications we make to our copy of the module will be carried over
 * to the final minted module.
 */
static int elf_validity_cache_copy(struct load_info *info, int flags)
{
        unsigned int i;
        Elf_Shdr *shdr, *strhdr;
        int err;
        unsigned int num_mod_secs = 0, mod_idx;
        unsigned int num_info_secs = 0, info_idx;
        unsigned int num_sym_secs = 0, sym_idx;

        if (info->len < sizeof(*(info->hdr))) {
                pr_err("Invalid ELF header len %lu\n", info->len);
                goto no_exec;
        }

        if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) {
                pr_err("Invalid ELF header magic: != %s\n", ELFMAG);
                goto no_exec;
        }
        if (info->hdr->e_type != ET_REL) {
                pr_err("Invalid ELF header type: %u != %u\n",
                       info->hdr->e_type, ET_REL);
                goto no_exec;
        }
        if (!elf_check_arch(info->hdr)) {
                pr_err("Invalid architecture in ELF header: %u\n",
                       info->hdr->e_machine);
                goto no_exec;
        }
        if (!module_elf_check_arch(info->hdr)) {
                pr_err("Invalid module architecture in ELF header: %u\n",
                       info->hdr->e_machine);
                goto no_exec;
        }
        if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) {
                pr_err("Invalid ELF section header size\n");
                goto no_exec;
        }

        /*
         * e_shnum is 16 bits, and sizeof(Elf_Shdr) is
         * known and small. So e_shnum * sizeof(Elf_Shdr)
         * will not overflow unsigned long on any platform.
         */
        if (info->hdr->e_shoff >= info->len
            || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
                info->len - info->hdr->e_shoff)) {
                pr_err("Invalid ELF section header overflow\n");
                goto no_exec;
        }

        info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;

        /*
         * Verify if the section name table index is valid.
         */
        if (info->hdr->e_shstrndx == SHN_UNDEF
            || info->hdr->e_shstrndx >= info->hdr->e_shnum) {
                pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n",
                       info->hdr->e_shstrndx, info->hdr->e_shstrndx,
                       info->hdr->e_shnum);
                goto no_exec;
        }

        strhdr = &info->sechdrs[info->hdr->e_shstrndx];
        err = validate_section_offset(info, strhdr);
        if (err < 0) {
                pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type);
                return err;
        }

        /*
         * The section name table must be NUL-terminated, as required
         * by the spec. This makes strcmp and pr_* calls that access
         * strings in the section safe.
         */
        info->secstrings = (void *)info->hdr + strhdr->sh_offset;
        if (strhdr->sh_size == 0) {
                pr_err("empty section name table\n");
                goto no_exec;
        }
        if (info->secstrings[strhdr->sh_size - 1] != '\0') {
                pr_err("ELF Spec violation: section name table isn't null terminated\n");
                goto no_exec;
        }

        /*
         * The code assumes that section 0 has a length of zero and
         * an addr of zero, so check for it.
         */
        if (info->sechdrs[0].sh_type != SHT_NULL
            || info->sechdrs[0].sh_size != 0
            || info->sechdrs[0].sh_addr != 0) {
                pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n",
                       info->sechdrs[0].sh_type);
                goto no_exec;
        }

        for (i = 1; i < info->hdr->e_shnum; i++) {
                shdr = &info->sechdrs[i];
                switch (shdr->sh_type) {
                case SHT_NULL:
                case SHT_NOBITS:
                        continue;
                case SHT_SYMTAB:
                        if (shdr->sh_link == SHN_UNDEF
                            || shdr->sh_link >= info->hdr->e_shnum) {
                                pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n",
                                       shdr->sh_link, shdr->sh_link,
                                       info->hdr->e_shnum);
                                goto no_exec;
                        }
                        num_sym_secs++;
                        sym_idx = i;
                        fallthrough;
                default:
                        err = validate_section_offset(info, shdr);
                        if (err < 0) {
                                pr_err("Invalid ELF section in module (section %u type %u)\n",
                                        i, shdr->sh_type);
                                return err;
                        }
                        if (strcmp(info->secstrings + shdr->sh_name,
                                   ".gnu.linkonce.this_module") == 0) {
                                num_mod_secs++;
                                mod_idx = i;
                        } else if (strcmp(info->secstrings + shdr->sh_name,
                                   ".modinfo") == 0) {
                                num_info_secs++;
                                info_idx = i;
                        }

                        if (shdr->sh_flags & SHF_ALLOC) {
                                if (shdr->sh_name >= strhdr->sh_size) {
                                        pr_err("Invalid ELF section name in module (section %u type %u)\n",
                                               i, shdr->sh_type);
                                        return -ENOEXEC;
                                }
                        }
                        break;
                }
        }

        if (num_info_secs > 1) {
                pr_err("Only one .modinfo section must exist.\n");
                goto no_exec;
        } else if (num_info_secs == 1) {
                /* Try to find a name early so we can log errors with a module name */
                info->index.info = info_idx;
                info->name = get_modinfo(info, "name");
        }

        if (num_sym_secs != 1) {
                pr_warn("%s: module has no symbols (stripped?)\n",
                        info->name ?: "(missing .modinfo section or name field)");
                goto no_exec;
        }

        /* Sets internal symbols and strings. */
        info->index.sym = sym_idx;
        shdr = &info->sechdrs[sym_idx];
        info->index.str = shdr->sh_link;
        info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset;

        /*
         * The ".gnu.linkonce.this_module" ELF section is special. It is
         * what modpost uses to refer to __this_module and let's use rely
         * on THIS_MODULE to point to &__this_module properly. The kernel's
         * modpost declares it on each modules's *.mod.c file. If the struct
         * module of the kernel changes a full kernel rebuild is required.
         *
         * We have a few expectaions for this special section, the following
         * code validates all this for us:
         *
         *   o Only one section must exist
         *   o We expect the kernel to always have to allocate it: SHF_ALLOC
         *   o The section size must match the kernel's run time's struct module
         *     size
         */
        if (num_mod_secs != 1) {
                pr_err("module %s: Only one .gnu.linkonce.this_module section must exist.\n",
                       info->name ?: "(missing .modinfo section or name field)");
                goto no_exec;
        }

        shdr = &info->sechdrs[mod_idx];

        /*
         * This is already implied on the switch above, however let's be
         * pedantic about it.
         */
        if (shdr->sh_type == SHT_NOBITS) {
                pr_err("module %s: .gnu.linkonce.this_module section must have a size set\n",
                       info->name ?: "(missing .modinfo section or name field)");
                goto no_exec;
        }

        if (!(shdr->sh_flags & SHF_ALLOC)) {
                pr_err("module %s: .gnu.linkonce.this_module must occupy memory during process execution\n",
                       info->name ?: "(missing .modinfo section or name field)");
                goto no_exec;
        }

        if (shdr->sh_size != sizeof(struct module)) {
                pr_err("module %s: .gnu.linkonce.this_module section size must match the kernel's built struct module size at run time\n",
                       info->name ?: "(missing .modinfo section or name field)");
                goto no_exec;
        }

        info->index.mod = mod_idx;

        /* This is temporary: point mod into copy of data. */
        info->mod = (void *)info->hdr + shdr->sh_offset;

        /*
         * If we didn't load the .modinfo 'name' field earlier, fall back to
         * on-disk struct mod 'name' field.
         */
        if (!info->name)
                info->name = info->mod->name;

        if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
                info->index.vers = 0; /* Pretend no __versions section! */
        else
                info->index.vers = find_sec(info, "__versions");

        info->index.pcpu = find_pcpusec(info);

        return 0;

no_exec:
        return -ENOEXEC;
}

#define COPY_CHUNK_SIZE (16*PAGE_SIZE)

static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len)
{
        do {
                unsigned long n = min(len, COPY_CHUNK_SIZE);

                if (copy_from_user(dst, usrc, n) != 0)
                        return -EFAULT;
                cond_resched();
                dst += n;
                usrc += n;
                len -= n;
        } while (len);
        return 0;
}

static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
{
        if (!get_modinfo(info, "livepatch"))
                /* Nothing more to do */
                return 0;

        if (set_livepatch_module(mod))
                return 0;

        pr_err("%s: module is marked as livepatch module, but livepatch support is disabled",
               mod->name);
        return -ENOEXEC;
}

static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
{
        if (retpoline_module_ok(get_modinfo(info, "retpoline")))
                return;

        pr_warn("%s: loading module not compiled with retpoline compiler.\n",
                mod->name);
}

/* Sets info->hdr and info->len. */
static int copy_module_from_user(const void __user *umod, unsigned long len,
                                  struct load_info *info)
{
        int err;

        info->len = len;
        if (info->len < sizeof(*(info->hdr)))
                return -ENOEXEC;

        err = security_kernel_load_data(LOADING_MODULE, true);
        if (err)
                return err;

        /* Suck in entire file: we'll want most of it. */
        info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN);
        if (!info->hdr)
                return -ENOMEM;

        if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) {
                err = -EFAULT;
                goto out;
        }

        err = security_kernel_post_load_data((char *)info->hdr, info->len,
                                             LOADING_MODULE, "init_module");
out:
        if (err)
                vfree(info->hdr);

        return err;
}

static void free_copy(struct load_info *info, int flags)
{
        if (flags & MODULE_INIT_COMPRESSED_FILE)
                module_decompress_cleanup(info);
        else
                vfree(info->hdr);
}

static int rewrite_section_headers(struct load_info *info, int flags)
{
        unsigned int i;

        /* This should always be true, but let's be sure. */
        info->sechdrs[0].sh_addr = 0;

        for (i = 1; i < info->hdr->e_shnum; i++) {
                Elf_Shdr *shdr = &info->sechdrs[i];

                /*
                 * Mark all sections sh_addr with their address in the
                 * temporary image.
                 */
                shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;

        }

        /* Track but don't keep modinfo and version sections. */
        info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
        info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;

        return 0;
}

/*
 * These calls taint the kernel depending certain module circumstances */
static void module_augment_kernel_taints(struct module *mod, struct load_info *info)
{
        int prev_taint = test_taint(TAINT_PROPRIETARY_MODULE);

        if (!get_modinfo(info, "intree")) {
                if (!test_taint(TAINT_OOT_MODULE))
                        pr_warn("%s: loading out-of-tree module taints kernel.\n",
                                mod->name);
                add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
        }

        check_modinfo_retpoline(mod, info);

        if (get_modinfo(info, "staging")) {
                add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
                pr_warn("%s: module is from the staging directory, the quality "
                        "is unknown, you have been warned.\n", mod->name);
        }

        if (is_livepatch_module(mod)) {
                add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
                pr_notice_once("%s: tainting kernel with TAINT_LIVEPATCH\n",
                                mod->name);
        }

        module_license_taint_check(mod, get_modinfo(info, "license"));

        if (get_modinfo(info, "test")) {
                if (!test_taint(TAINT_TEST))
                        pr_warn("%s: loading test module taints kernel.\n",
                                mod->name);
                add_taint_module(mod, TAINT_TEST, LOCKDEP_STILL_OK);
        }
#ifdef CONFIG_MODULE_SIG
        mod->sig_ok = info->sig_ok;
        if (!mod->sig_ok) {
                pr_notice_once("%s: module verification failed: signature "
                               "and/or required key missing - tainting "
                               "kernel\n", mod->name);
                add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK);
        }
#endif

        /*
         * ndiswrapper is under GPL by itself, but loads proprietary modules.
         * Don't use add_taint_module(), as it would prevent ndiswrapper from
         * using GPL-only symbols it needs.
         */
        if (strcmp(mod->name, "ndiswrapper") == 0)
                add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE);

        /* driverloader was caught wrongly pretending to be under GPL */
        if (strcmp(mod->name, "driverloader") == 0)
                add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
                                 LOCKDEP_NOW_UNRELIABLE);

        /* lve claims to be GPL but upstream won't provide source */
        if (strcmp(mod->name, "lve") == 0)
                add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
                                 LOCKDEP_NOW_UNRELIABLE);

        if (!prev_taint && test_taint(TAINT_PROPRIETARY_MODULE))
                pr_warn("%s: module license taints kernel.\n", mod->name);

}

static int check_modinfo(struct module *mod, struct load_info *info, int flags)
{
        const char *modmagic = get_modinfo(info, "vermagic");
        int err;

        if (flags & MODULE_INIT_IGNORE_VERMAGIC)
                modmagic = NULL;

        /* This is allowed: modprobe --force will invalidate it. */
        if (!modmagic) {
                err = try_to_force_load(mod, "bad vermagic");
                if (err)
                        return err;
        } else if (!same_magic(modmagic, vermagic, info->index.vers)) {
                pr_err("%s: version magic '%s' should be '%s'\n",
                       info->name, modmagic, vermagic);
                return -ENOEXEC;
        }

        err = check_modinfo_livepatch(mod, info);
        if (err)
                return err;

        return 0;
}

static int find_module_sections(struct module *mod, struct load_info *info)
{
        mod->kp = section_objs(info, "__param",
                               sizeof(*mod->kp), &mod->num_kp);
        mod->syms = section_objs(info, "__ksymtab",
                                 sizeof(*mod->syms), &mod->num_syms);
        mod->crcs = section_addr(info, "__kcrctab");
        mod->gpl_syms = section_objs(info, "__ksymtab_gpl",
                                     sizeof(*mod->gpl_syms),
                                     &mod->num_gpl_syms);
        mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");

#ifdef CONFIG_CONSTRUCTORS
        mod->ctors = section_objs(info, ".ctors",
                                  sizeof(*mod->ctors), &mod->num_ctors);
        if (!mod->ctors)
                mod->ctors = section_objs(info, ".init_array",
                                sizeof(*mod->ctors), &mod->num_ctors);
        else if (find_sec(info, ".init_array")) {
                /*
                 * This shouldn't happen with same compiler and binutils
                 * building all parts of the module.
                 */
                pr_warn("%s: has both .ctors and .init_array.\n",
                       mod->name);
                return -EINVAL;
        }
#endif

        mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1,
                                                &mod->noinstr_text_size);

#ifdef CONFIG_TRACEPOINTS
        mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
                                             sizeof(*mod->tracepoints_ptrs),
                                             &mod->num_tracepoints);
#endif
#ifdef CONFIG_TREE_SRCU
        mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
                                             sizeof(*mod->srcu_struct_ptrs),
                                             &mod->num_srcu_structs);
#endif
#ifdef CONFIG_BPF_EVENTS
        mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
                                           sizeof(*mod->bpf_raw_events),
                                           &mod->num_bpf_raw_events);
#endif
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
        mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
#endif
#ifdef CONFIG_JUMP_LABEL
        mod->jump_entries = section_objs(info, "__jump_table",
                                        sizeof(*mod->jump_entries),
                                        &mod->num_jump_entries);
#endif
#ifdef CONFIG_EVENT_TRACING
        mod->trace_events = section_objs(info, "_ftrace_events",
                                         sizeof(*mod->trace_events),
                                         &mod->num_trace_events);
        mod->trace_evals = section_objs(info, "_ftrace_eval_map",
                                        sizeof(*mod->trace_evals),
                                        &mod->num_trace_evals);
#endif
#ifdef CONFIG_TRACING
        mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
                                         sizeof(*mod->trace_bprintk_fmt_start),
                                         &mod->num_trace_bprintk_fmt);
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
        /* sechdrs[0].sh_size is always zero */
        mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION,
                                             sizeof(*mod->ftrace_callsites),
                                             &mod->num_ftrace_callsites);
#endif
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
        mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
                                            sizeof(*mod->ei_funcs),
                                            &mod->num_ei_funcs);
#endif
#ifdef CONFIG_KPROBES
        mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1,
                                                &mod->kprobes_text_size);
        mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
                                                sizeof(unsigned long),
                                                &mod->num_kprobe_blacklist);
#endif
#ifdef CONFIG_PRINTK_INDEX
        mod->printk_index_start = section_objs(info, ".printk_index",
                                               sizeof(*mod->printk_index_start),
                                               &mod->printk_index_size);
#endif
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
        mod->static_call_sites = section_objs(info, ".static_call_sites",
                                              sizeof(*mod->static_call_sites),
                                              &mod->num_static_call_sites);
#endif
#if IS_ENABLED(CONFIG_KUNIT)
        mod->kunit_suites = section_objs(info, ".kunit_test_suites",
                                              sizeof(*mod->kunit_suites),
                                              &mod->num_kunit_suites);
        mod->kunit_init_suites = section_objs(info, ".kunit_init_test_suites",
                                              sizeof(*mod->kunit_init_suites),
                                              &mod->num_kunit_init_suites);
#endif

        mod->extable = section_objs(info, "__ex_table",
                                    sizeof(*mod->extable), &mod->num_exentries);

        if (section_addr(info, "__obsparm"))
                pr_warn("%s: Ignoring obsolete parameters\n", mod->name);

#ifdef CONFIG_DYNAMIC_DEBUG_CORE
        mod->dyndbg_info.descs = section_objs(info, "__dyndbg",
                                              sizeof(*mod->dyndbg_info.descs),
                                              &mod->dyndbg_info.num_descs);
        mod->dyndbg_info.classes = section_objs(info, "__dyndbg_classes",
                                                sizeof(*mod->dyndbg_info.classes),
                                                &mod->dyndbg_info.num_classes);
#endif

        return 0;
}

static int move_module(struct module *mod, struct load_info *info)
{
        int i;
        void *ptr;
        enum mod_mem_type t = 0;
        int ret = -ENOMEM;

        for_each_mod_mem_type(type) {
                if (!mod->mem[type].size) {
                        mod->mem[type].base = NULL;
                        continue;
                }
                mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
                ptr = module_memory_alloc(mod->mem[type].size, type);
                /*
                 * The pointer to these blocks of memory are stored on the module
                 * structure and we keep that around so long as the module is
                 * around. We only free that memory when we unload the module.
                 * Just mark them as not being a leak then. The .init* ELF
                 * sections *do* get freed after boot so we *could* treat them
                 * slightly differently with kmemleak_ignore() and only grey
                 * them out as they work as typical memory allocations which
                 * *do* eventually get freed, but let's just keep things simple
                 * and avoid *any* false positives.
                 */
                kmemleak_not_leak(ptr);
                if (!ptr) {
                        t = type;
                        goto out_enomem;
                }
                memset(ptr, 0, mod->mem[type].size);
                mod->mem[type].base = ptr;
        }

        /* Transfer each section which specifies SHF_ALLOC */
        pr_debug("Final section addresses for %s:\n", mod->name);
        for (i = 0; i < info->hdr->e_shnum; i++) {
                void *dest;
                Elf_Shdr *shdr = &info->sechdrs[i];
                enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT;

                if (!(shdr->sh_flags & SHF_ALLOC))
                        continue;

                dest = mod->mem[type].base + (shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK);

                if (shdr->sh_type != SHT_NOBITS) {
                        /*
                         * Our ELF checker already validated this, but let's
                         * be pedantic and make the goal clearer. We actually
                         * end up copying over all modifications made to the
                         * userspace copy of the entire struct module.
                         */
                        if (i == info->index.mod &&
                           (WARN_ON_ONCE(shdr->sh_size != sizeof(struct module)))) {
                                ret = -ENOEXEC;
                                goto out_enomem;
                        }
                        memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
                }
                /*
                 * Update the userspace copy's ELF section address to point to
                 * our newly allocated memory as a pure convenience so that
                 * users of info can keep taking advantage and using the newly
                 * minted official memory area.
                 */
                shdr->sh_addr = (unsigned long)dest;
                pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr,
                         (long)shdr->sh_size, info->secstrings + shdr->sh_name);
        }

        return 0;
out_enomem:
        for (t--; t >= 0; t--)
                module_memory_free(mod->mem[t].base, t);
        return ret;
}

static int check_export_symbol_versions(struct module *mod)
{
#ifdef CONFIG_MODVERSIONS
        if ((mod->num_syms && !mod->crcs) ||
            (mod->num_gpl_syms && !mod->gpl_crcs)) {
                return try_to_force_load(mod,
                                         "no versions for exported symbols");
        }
#endif
        return 0;
}

static void flush_module_icache(const struct module *mod)
{
        /*
         * Flush the instruction cache, since we've played with text.
         * Do it before processing of module parameters, so the module
         * can provide parameter accessor functions of its own.
         */
        for_each_mod_mem_type(type) {
                const struct module_memory *mod_mem = &mod->mem[type];

                if (mod_mem->size) {
                        flush_icache_range((unsigned long)mod_mem->base,
                                           (unsigned long)mod_mem->base + mod_mem->size);
                }
        }
}

bool __weak module_elf_check_arch(Elf_Ehdr *hdr)
{
        return true;
}

int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
                                     Elf_Shdr *sechdrs,
                                     char *secstrings,
                                     struct module *mod)
{
        return 0;
}

/* module_blacklist is a comma-separated list of module names */
static char *module_blacklist;
static bool blacklisted(const char *module_name)
{
        const char *p;
        size_t len;

        if (!module_blacklist)
                return false;

        for (p = module_blacklist; *p; p += len) {
                len = strcspn(p, ",");
                if (strlen(module_name) == len && !memcmp(module_name, p, len))
                        return true;
                if (p[len] == ',')
                        len++;
        }
        return false;
}
core_param(module_blacklist, module_blacklist, charp, 0400);

static struct module *layout_and_allocate(struct load_info *info, int flags)
{
        struct module *mod;
        unsigned int ndx;
        int err;

        /* Allow arches to frob section contents and sizes.  */
        err = module_frob_arch_sections(info->hdr, info->sechdrs,
                                        info->secstrings, info->mod);
        if (err < 0)
                return ERR_PTR(err);

        err = module_enforce_rwx_sections(info->hdr, info->sechdrs,
                                          info->secstrings, info->mod);
        if (err < 0)
                return ERR_PTR(err);

        /* We will do a special allocation for per-cpu sections later. */
        info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;

        /*
         * Mark ro_after_init section with SHF_RO_AFTER_INIT so that
         * layout_sections() can put it in the right place.
         * Note: ro_after_init sections also have SHF_{WRITE,ALLOC} set.
         */
        ndx = find_sec(info, ".data..ro_after_init");
        if (ndx)
                info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
        /*
         * Mark the __jump_table section as ro_after_init as well: these data
         * structures are never modified, with the exception of entries that
         * refer to code in the __init section, which are annotated as such
         * at module load time.
         */
        ndx = find_sec(info, "__jump_table");
        if (ndx)
                info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;

        /*
         * Determine total sizes, and put offsets in sh_entsize.  For now
         * this is done generically; there doesn't appear to be any
         * special cases for the architectures.
         */
        layout_sections(info->mod, info);
        layout_symtab(info->mod, info);

        /* Allocate and move to the final place */
        err = move_module(info->mod, info);
        if (err)
                return ERR_PTR(err);

        /* Module has been copied to its final place now: return it. */
        mod = (void *)info->sechdrs[info->index.mod].sh_addr;
        kmemleak_load_module(mod, info);
        return mod;
}

/* mod is no longer valid after this! */
static void module_deallocate(struct module *mod, struct load_info *info)
{
        percpu_modfree(mod);
        module_arch_freeing_init(mod);

        free_mod_mem(mod);
}

int __weak module_finalize(const Elf_Ehdr *hdr,
                           const Elf_Shdr *sechdrs,
                           struct module *me)
{
        return 0;
}

static int post_relocation(struct module *mod, const struct load_info *info)
{
        /* Sort exception table now relocations are done. */
        sort_extable(mod->extable, mod->extable + mod->num_exentries);

        /* Copy relocated percpu area over. */
        percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr,
                       info->sechdrs[info->index.pcpu].sh_size);

        /* Setup kallsyms-specific fields. */
        add_kallsyms(mod, info);

        /* Arch-specific module finalizing. */
        return module_finalize(info->hdr, info->sechdrs, mod);
}

/* Call module constructors. */
static void do_mod_ctors(struct module *mod)
{
#ifdef CONFIG_CONSTRUCTORS
        unsigned long i;

        for (i = 0; i < mod->num_ctors; i++)
                mod->ctors[i]();
#endif
}

/* For freeing module_init on success, in case kallsyms traversing */
struct mod_initfree {
        struct llist_node node;
        void *init_text;
        void *init_data;
        void *init_rodata;
};

static void do_free_init(struct work_struct *w)
{
        struct llist_node *pos, *n, *list;
        struct mod_initfree *initfree;

        list = llist_del_all(&init_free_list);

        synchronize_rcu();

        llist_for_each_safe(pos, n, list) {
                initfree = container_of(pos, struct mod_initfree, node);
                module_memfree(initfree->init_text);
                module_memfree(initfree->init_data);
                module_memfree(initfree->init_rodata);
                kfree(initfree);
        }
}

void flush_module_init_free_work(void)
{
        flush_work(&init_free_wq);
}

#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "module."
/* Default value for module->async_probe_requested */
static bool async_probe;
module_param(async_probe, bool, 0644);

/*
 * This is where the real work happens.
 *
 * Keep it uninlined to provide a reliable breakpoint target, e.g. for the gdb
 * helper command 'lx-symbols'.
 */
static noinline int do_init_module(struct module *mod)
{
        int ret = 0;
        struct mod_initfree *freeinit;
#if defined(CONFIG_MODULE_STATS)
        unsigned int text_size = 0, total_size = 0;

        for_each_mod_mem_type(type) {
                const struct module_memory *mod_mem = &mod->mem[type];
                if (mod_mem->size) {
                        total_size += mod_mem->size;
                        if (type == MOD_TEXT || type == MOD_INIT_TEXT)
                                text_size += mod_mem->size;
                }
        }
#endif

        freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
        if (!freeinit) {
                ret = -ENOMEM;
                goto fail;
        }
        freeinit->init_text = mod->mem[MOD_INIT_TEXT].base;
        freeinit->init_data = mod->mem[MOD_INIT_DATA].base;
        freeinit->init_rodata = mod->mem[MOD_INIT_RODATA].base;

        do_mod_ctors(mod);
        /* Start the module */
        if (mod->init != NULL)
                ret = do_one_initcall(mod->init);
        if (ret < 0) {
                goto fail_free_freeinit;
        }
        if (ret > 0) {
                pr_warn("%s: '%s'->init suspiciously returned %d, it should "
                        "follow 0/-E convention\n"
                        "%s: loading module anyway...\n",
                        __func__, mod->name, ret, __func__);
                dump_stack();
        }

        /* Now it's a first class citizen! */
        mod->state = MODULE_STATE_LIVE;
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_LIVE, mod);

        /* Delay uevent until module has finished its init routine */
        kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);

        /*
         * We need to finish all async code before the module init sequence
         * is done. This has potential to deadlock if synchronous module
         * loading is requested from async (which is not allowed!).
         *
         * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
         * request_module() from async workers") for more details.
         */
        if (!mod->async_probe_requested)
                async_synchronize_full();

        ftrace_free_mem(mod, mod->mem[MOD_INIT_TEXT].base,
                        mod->mem[MOD_INIT_TEXT].base + mod->mem[MOD_INIT_TEXT].size);
        mutex_lock(&module_mutex);
        /* Drop initial reference. */
        module_put(mod);
        trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
        /* Switch to core kallsyms now init is done: kallsyms may be walking! */
        rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
#endif
        ret = module_enable_rodata_ro(mod, true);
        if (ret)
                goto fail_mutex_unlock;
        mod_tree_remove_init(mod);
        module_arch_freeing_init(mod);
        for_class_mod_mem_type(type, init) {
                mod->mem[type].base = NULL;
                mod->mem[type].size = 0;
        }

#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
        /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointer */
        mod->btf_data = NULL;
#endif
        /*
         * We want to free module_init, but be aware that kallsyms may be
         * walking this with preempt disabled.  In all the failure paths, we
         * call synchronize_rcu(), but we don't want to slow down the success
         * path. module_memfree() cannot be called in an interrupt, so do the
         * work and call synchronize_rcu() in a work queue.
         *
         * Note that module_alloc() on most architectures creates W+X page
         * mappings which won't be cleaned up until do_free_init() runs.  Any
         * code such as mark_rodata_ro() which depends on those mappings to
         * be cleaned up needs to sync with the queued work by invoking
         * flush_module_init_free_work().
         */
        if (llist_add(&freeinit->node, &init_free_list))
                schedule_work(&init_free_wq);

        mutex_unlock(&module_mutex);
        wake_up_all(&module_wq);

        mod_stat_add_long(text_size, &total_text_size);
        mod_stat_add_long(total_size, &total_mod_size);

        mod_stat_inc(&modcount);

        return 0;

fail_mutex_unlock:
        mutex_unlock(&module_mutex);
fail_free_freeinit:
        kfree(freeinit);
fail:
        /* Try to protect us from buggy refcounters. */
        mod->state = MODULE_STATE_GOING;
        synchronize_rcu();
        module_put(mod);
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_GOING, mod);
        klp_module_going(mod);
        ftrace_release_mod(mod);
        free_module(mod);
        wake_up_all(&module_wq);

        return ret;
}

static int may_init_module(void)
{
        if (!capable(CAP_SYS_MODULE) || modules_disabled)
                return -EPERM;

        return 0;
}

/* Is this module of this name done loading?  No locks held. */
static bool finished_loading(const char *name)
{
        struct module *mod;
        bool ret;

        /*
         * The module_mutex should not be a heavily contended lock;
         * if we get the occasional sleep here, we'll go an extra iteration
         * in the wait_event_interruptible(), which is harmless.
         */
        sched_annotate_sleep();
        mutex_lock(&module_mutex);
        mod = find_module_all(name, strlen(name), true);
        ret = !mod || mod->state == MODULE_STATE_LIVE
                || mod->state == MODULE_STATE_GOING;
        mutex_unlock(&module_mutex);

        return ret;
}

/* Must be called with module_mutex held */
static int module_patient_check_exists(const char *name,
                                       enum fail_dup_mod_reason reason)
{
        struct module *old;
        int err = 0;

        old = find_module_all(name, strlen(name), true);
        if (old == NULL)
                return 0;

        if (old->state == MODULE_STATE_COMING ||
            old->state == MODULE_STATE_UNFORMED) {
                /* Wait in case it fails to load. */
                mutex_unlock(&module_mutex);
                err = wait_event_interruptible(module_wq,
                                       finished_loading(name));
                mutex_lock(&module_mutex);
                if (err)
                        return err;

                /* The module might have gone in the meantime. */
                old = find_module_all(name, strlen(name), true);
        }

        if (try_add_failed_module(name, reason))
                pr_warn("Could not add fail-tracking for module: %s\n", name);

        /*
         * We are here only when the same module was being loaded. Do
         * not try to load it again right now. It prevents long delays
         * caused by serialized module load failures. It might happen
         * when more devices of the same type trigger load of
         * a particular module.
         */
        if (old && old->state == MODULE_STATE_LIVE)
                return -EEXIST;
        return -EBUSY;
}

/*
 * We try to place it in the list now to make sure it's unique before
 * we dedicate too many resources.  In particular, temporary percpu
 * memory exhaustion.
 */
static int add_unformed_module(struct module *mod)
{
        int err;

        mod->state = MODULE_STATE_UNFORMED;

        mutex_lock(&module_mutex);
        err = module_patient_check_exists(mod->name, FAIL_DUP_MOD_LOAD);
        if (err)
                goto out;

        mod_update_bounds(mod);
        list_add_rcu(&mod->list, &modules);
        mod_tree_insert(mod);
        err = 0;

out:
        mutex_unlock(&module_mutex);
        return err;
}

static int complete_formation(struct module *mod, struct load_info *info)
{
        int err;

        mutex_lock(&module_mutex);

        /* Find duplicate symbols (must be called under lock). */
        err = verify_exported_symbols(mod);
        if (err < 0)
                goto out;

        /* These rely on module_mutex for list integrity. */
        module_bug_finalize(info->hdr, info->sechdrs, mod);
        module_cfi_finalize(info->hdr, info->sechdrs, mod);

        err = module_enable_rodata_ro(mod, false);
        if (err)
                goto out_strict_rwx;
        err = module_enable_data_nx(mod);
        if (err)
                goto out_strict_rwx;
        err = module_enable_text_rox(mod);
        if (err)
                goto out_strict_rwx;

        /*
         * Mark state as coming so strong_try_module_get() ignores us,
         * but kallsyms etc. can see us.
         */
        mod->state = MODULE_STATE_COMING;
        mutex_unlock(&module_mutex);

        return 0;

out_strict_rwx:
        module_bug_cleanup(mod);
out:
        mutex_unlock(&module_mutex);
        return err;
}

static int prepare_coming_module(struct module *mod)
{
        int err;

        ftrace_module_enable(mod);
        err = klp_module_coming(mod);
        if (err)
                return err;

        err = blocking_notifier_call_chain_robust(&module_notify_list,
                        MODULE_STATE_COMING, MODULE_STATE_GOING, mod);
        err = notifier_to_errno(err);
        if (err)
                klp_module_going(mod);

        return err;
}

static int unknown_module_param_cb(char *param, char *val, const char *modname,
                                   void *arg)
{
        struct module *mod = arg;
        int ret;

        if (strcmp(param, "async_probe") == 0) {
                if (kstrtobool(val, &mod->async_probe_requested))
                        mod->async_probe_requested = true;
                return 0;
        }

        /* Check for magic 'dyndbg' arg */
        ret = ddebug_dyndbg_module_param_cb(param, val, modname);
        if (ret != 0)
                pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
        return 0;
}

/* Module within temporary copy, this doesn't do any allocation  */
static int early_mod_check(struct load_info *info, int flags)
{
        int err;

        /*
         * Now that we know we have the correct module name, check
         * if it's blacklisted.
         */
        if (blacklisted(info->name)) {
                pr_err("Module %s is blacklisted\n", info->name);
                return -EPERM;
        }

        err = rewrite_section_headers(info, flags);
        if (err)
                return err;

        /* Check module struct version now, before we try to use module. */
        if (!check_modstruct_version(info, info->mod))
                return -ENOEXEC;

        err = check_modinfo(info->mod, info, flags);
        if (err)
                return err;

        mutex_lock(&module_mutex);
        err = module_patient_check_exists(info->mod->name, FAIL_DUP_MOD_BECOMING);
        mutex_unlock(&module_mutex);

        return err;
}

/*
 * Allocate and load the module: note that size of section 0 is always
 * zero, and we rely on this for optional sections.
 */
static int load_module(struct load_info *info, const char __user *uargs,
                       int flags)
{
        struct module *mod;
        bool module_allocated = false;
        long err = 0;
        char *after_dashes;

        /*
         * Do the signature check (if any) first. All that
         * the signature check needs is info->len, it does
         * not need any of the section info. That can be
         * set up later. This will minimize the chances
         * of a corrupt module causing problems before
         * we even get to the signature check.
         *
         * The check will also adjust info->len by stripping
         * off the sig length at the end of the module, making
         * checks against info->len more correct.
         */
        err = module_sig_check(info, flags);
        if (err)
                goto free_copy;

        /*
         * Do basic sanity checks against the ELF header and
         * sections. Cache useful sections and set the
         * info->mod to the userspace passed struct module.
         */
        err = elf_validity_cache_copy(info, flags);
        if (err)
                goto free_copy;

        err = early_mod_check(info, flags);
        if (err)
                goto free_copy;

        /* Figure out module layout, and allocate all the memory. */
        mod = layout_and_allocate(info, flags);
        if (IS_ERR(mod)) {
                err = PTR_ERR(mod);
                goto free_copy;
        }

        module_allocated = true;

        audit_log_kern_module(mod->name);

        /* Reserve our place in the list. */
        err = add_unformed_module(mod);
        if (err)
                goto free_module;

        /*
         * We are tainting your kernel if your module gets into
         * the modules linked list somehow.
         */
        module_augment_kernel_taints(mod, info);

        /* To avoid stressing percpu allocator, do this once we're unique. */
        err = percpu_modalloc(mod, info);
        if (err)
                goto unlink_mod;

        /* Now module is in final location, initialize linked lists, etc. */
        err = module_unload_init(mod);
        if (err)
                goto unlink_mod;

        init_param_lock(mod);

        /*
         * Now we've got everything in the final locations, we can
         * find optional sections.
         */
        err = find_module_sections(mod, info);
        if (err)
                goto free_unload;

        err = check_export_symbol_versions(mod);
        if (err)
                goto free_unload;

        /* Set up MODINFO_ATTR fields */
        setup_modinfo(mod, info);

        /* Fix up syms, so that st_value is a pointer to location. */
        err = simplify_symbols(mod, info);
        if (err < 0)
                goto free_modinfo;

        err = apply_relocations(mod, info);
        if (err < 0)
                goto free_modinfo;

        err = post_relocation(mod, info);
        if (err < 0)
                goto free_modinfo;

        flush_module_icache(mod);

        /* Now copy in args */
        mod->args = strndup_user(uargs, ~0UL >> 1);
        if (IS_ERR(mod->args)) {
                err = PTR_ERR(mod->args);
                goto free_arch_cleanup;
        }

        init_build_id(mod, info);

        /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
        ftrace_module_init(mod);

        /* Finally it's fully formed, ready to start executing. */
        err = complete_formation(mod, info);
        if (err)
                goto ddebug_cleanup;

        err = prepare_coming_module(mod);
        if (err)
                goto bug_cleanup;

        mod->async_probe_requested = async_probe;

        /* Module is ready to execute: parsing args may do that. */
        after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
                                  -32768, 32767, mod,
                                  unknown_module_param_cb);
        if (IS_ERR(after_dashes)) {
                err = PTR_ERR(after_dashes);
                goto coming_cleanup;
        } else if (after_dashes) {
                pr_warn("%s: parameters '%s' after `--' ignored\n",
                       mod->name, after_dashes);
        }

        /* Link in to sysfs. */
        err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
        if (err < 0)
                goto coming_cleanup;

        if (is_livepatch_module(mod)) {
                err = copy_module_elf(mod, info);
                if (err < 0)
                        goto sysfs_cleanup;
        }

        /* Get rid of temporary copy. */
        free_copy(info, flags);

        /* Done! */
        trace_module_load(mod);

        return do_init_module(mod);

 sysfs_cleanup:
        mod_sysfs_teardown(mod);
 coming_cleanup:
        mod->state = MODULE_STATE_GOING;
        destroy_params(mod->kp, mod->num_kp);
        blocking_notifier_call_chain(&module_notify_list,
                                     MODULE_STATE_GOING, mod);
        klp_module_going(mod);
 bug_cleanup:
        mod->state = MODULE_STATE_GOING;
        /* module_bug_cleanup needs module_mutex protection */
        mutex_lock(&module_mutex);
        module_bug_cleanup(mod);
        mutex_unlock(&module_mutex);

 ddebug_cleanup:
        ftrace_release_mod(mod);
        synchronize_rcu();
        kfree(mod->args);
 free_arch_cleanup:
        module_arch_cleanup(mod);
 free_modinfo:
        free_modinfo(mod);
 free_unload:
        module_unload_free(mod);
 unlink_mod:
        mutex_lock(&module_mutex);
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
        mod_tree_remove(mod);
        wake_up_all(&module_wq);
        /* Wait for RCU-sched synchronizing before releasing mod->list. */
        synchronize_rcu();
        mutex_unlock(&module_mutex);
 free_module:
        mod_stat_bump_invalid(info, flags);
        /* Free lock-classes; relies on the preceding sync_rcu() */
        for_class_mod_mem_type(type, core_data) {
                lockdep_free_key_range(mod->mem[type].base,
                                       mod->mem[type].size);
        }

        module_deallocate(mod, info);
 free_copy:
        /*
         * The info->len is always set. We distinguish between
         * failures once the proper module was allocated and
         * before that.
         */
        if (!module_allocated)
                mod_stat_bump_becoming(info, flags);
        free_copy(info, flags);
        return err;
}

SYSCALL_DEFINE3(init_module, void __user *, umod,
                unsigned long, len, const char __user *, uargs)
{
        int err;
        struct load_info info = { };

        err = may_init_module();
        if (err)
                return err;

        pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
               umod, len, uargs);

        err = copy_module_from_user(umod, len, &info);
        if (err) {
                mod_stat_inc(&failed_kreads);
                mod_stat_add_long(len, &invalid_kread_bytes);
                return err;
        }

        return load_module(&info, uargs, 0);
}

struct idempotent {
        const void *cookie;
        struct hlist_node entry;
        struct completion complete;
        int ret;
};

#define IDEM_HASH_BITS 8
static struct hlist_head idem_hash[1 << IDEM_HASH_BITS];
static DEFINE_SPINLOCK(idem_lock);

static bool idempotent(struct idempotent *u, const void *cookie)
{
        int hash = hash_ptr(cookie, IDEM_HASH_BITS);
        struct hlist_head *head = idem_hash + hash;
        struct idempotent *existing;
        bool first;

        u->ret = 0;
        u->cookie = cookie;
        init_completion(&u->complete);

        spin_lock(&idem_lock);
        first = true;
        hlist_for_each_entry(existing, head, entry) {
                if (existing->cookie != cookie)
                        continue;
                first = false;
                break;
        }
        hlist_add_head(&u->entry, idem_hash + hash);
        spin_unlock(&idem_lock);

        return !first;
}

/*
 * We were the first one with 'cookie' on the list, and we ended
 * up completing the operation. We now need to walk the list,
 * remove everybody - which includes ourselves - fill in the return
 * value, and then complete the operation.
 */
static int idempotent_complete(struct idempotent *u, int ret)
{
        const void *cookie = u->cookie;
        int hash = hash_ptr(cookie, IDEM_HASH_BITS);
        struct hlist_head *head = idem_hash + hash;
        struct hlist_node *next;
        struct idempotent *pos;

        spin_lock(&idem_lock);
        hlist_for_each_entry_safe(pos, next, head, entry) {
                if (pos->cookie != cookie)
                        continue;
                hlist_del(&pos->entry);
                pos->ret = ret;
                complete(&pos->complete);
        }
        spin_unlock(&idem_lock);
        return ret;
}

static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
{
        struct load_info info = { };
        void *buf = NULL;
        int len;

        len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
        if (len < 0) {
                mod_stat_inc(&failed_kreads);
                return len;
        }

        if (flags & MODULE_INIT_COMPRESSED_FILE) {
                int err = module_decompress(&info, buf, len);
                vfree(buf); /* compressed data is no longer needed */
                if (err) {
                        mod_stat_inc(&failed_decompress);
                        mod_stat_add_long(len, &invalid_decompress_bytes);
                        return err;
                }
        } else {
                info.hdr = buf;
                info.len = len;
        }

        return load_module(&info, uargs, flags);
}

static int idempotent_init_module(struct file *f, const char __user * uargs, int flags)
{
        struct idempotent idem;

        if (!f || !(f->f_mode & FMODE_READ))
                return -EBADF;

        /* See if somebody else is doing the operation? */
        if (idempotent(&idem, file_inode(f))) {
                wait_for_completion(&idem.complete);
                return idem.ret;
        }

        /* Otherwise, we'll do it and complete others */
        return idempotent_complete(&idem,
                init_module_from_file(f, uargs, flags));
}

SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
{
        int err;
        struct fd f;

        err = may_init_module();
        if (err)
                return err;

        pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);

        if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
                      |MODULE_INIT_IGNORE_VERMAGIC
                      |MODULE_INIT_COMPRESSED_FILE))
                return -EINVAL;

        f = fdget(fd);
        err = idempotent_init_module(f.file, uargs, flags);
        fdput(f);
        return err;
}

/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
char *module_flags(struct module *mod, char *buf, bool show_state)
{
        int bx = 0;

        BUG_ON(mod->state == MODULE_STATE_UNFORMED);
        if (!mod->taints && !show_state)
                goto out;
        if (mod->taints ||
            mod->state == MODULE_STATE_GOING ||
            mod->state == MODULE_STATE_COMING) {
                buf[bx++] = '(';
                bx += module_flags_taint(mod->taints, buf + bx);
                /* Show a - for module-is-being-unloaded */
                if (mod->state == MODULE_STATE_GOING && show_state)
                        buf[bx++] = '-';
                /* Show a + for module-is-being-loaded */
                if (mod->state == MODULE_STATE_COMING && show_state)
                        buf[bx++] = '+';
                buf[bx++] = ')';
        }
out:
        buf[bx] = '\0';

        return buf;
}

/* Given an address, look for it in the module exception tables. */
const struct exception_table_entry *search_module_extables(unsigned long addr)
{
        const struct exception_table_entry *e = NULL;
        struct module *mod;

        preempt_disable();
        mod = __module_address(addr);
        if (!mod)
                goto out;

        if (!mod->num_exentries)
                goto out;

        e = search_extable(mod->extable,
                           mod->num_exentries,
                           addr);
out:
        preempt_enable();

        /*
         * Now, if we found one, we are running inside it now, hence
         * we cannot unload the module, hence no refcnt needed.
         */
        return e;
}

/**
 * is_module_address() - is this address inside a module?
 * @addr: the address to check.
 *
 * See is_module_text_address() if you simply want to see if the address
 * is code (not data).
 */
bool is_module_address(unsigned long addr)
{
        bool ret;

        preempt_disable();
        ret = __module_address(addr) != NULL;
        preempt_enable();

        return ret;
}

/**
 * __module_address() - get the module which contains an address.
 * @addr: the address.
 *
 * Must be called with preempt disabled or module mutex held so that
 * module doesn't get freed during this.
 */
struct module *__module_address(unsigned long addr)
{
        struct module *mod;

        if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max)
                goto lookup;

#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
        if (addr >= mod_tree.data_addr_min && addr <= mod_tree.data_addr_max)
                goto lookup;
#endif

        return NULL;

lookup:
        module_assert_mutex_or_preempt();

        mod = mod_find(addr, &mod_tree);
        if (mod) {
                BUG_ON(!within_module(addr, mod));
                if (mod->state == MODULE_STATE_UNFORMED)
                        mod = NULL;
        }
        return mod;
}

/**
 * is_module_text_address() - is this address inside module code?
 * @addr: the address to check.
 *
 * See is_module_address() if you simply want to see if the address is
 * anywhere in a module.  See kernel_text_address() for testing if an
 * address corresponds to kernel or module code.
 */
bool is_module_text_address(unsigned long addr)
{
        bool ret;

        preempt_disable();
        ret = __module_text_address(addr) != NULL;
        preempt_enable();

        return ret;
}

/**
 * __module_text_address() - get the module whose code contains an address.
 * @addr: the address.
 *
 * Must be called with preempt disabled or module mutex held so that
 * module doesn't get freed during this.
 */
struct module *__module_text_address(unsigned long addr)
{
        struct module *mod = __module_address(addr);
        if (mod) {
                /* Make sure it's within the text section. */
                if (!within_module_mem_type(addr, mod, MOD_TEXT) &&
                    !within_module_mem_type(addr, mod, MOD_INIT_TEXT))
                        mod = NULL;
        }
        return mod;
}

/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
        struct module *mod;
        char buf[MODULE_FLAGS_BUF_SIZE];

        printk(KERN_DEFAULT "Modules linked in:");
        /* Most callers should already have preempt disabled, but make sure */
        preempt_disable();
        list_for_each_entry_rcu(mod, &modules, list) {
                if (mod->state == MODULE_STATE_UNFORMED)
                        continue;
                pr_cont(" %s%s", mod->name, module_flags(mod, buf, true));
        }

        print_unloaded_tainted_modules();
        preempt_enable();
        if (last_unloaded_module.name[0])
                pr_cont(" [last unloaded: %s%s]", last_unloaded_module.name,
                        last_unloaded_module.taints);
        pr_cont("\n");
}

#ifdef CONFIG_MODULE_DEBUGFS
struct dentry *mod_debugfs_root;

static int module_debugfs_init(void)
{
        mod_debugfs_root = debugfs_create_dir("modules", NULL);
        return 0;
}
module_init(module_debugfs_init);
#endif





















































































































































































































































































































































































  294 
  296 









































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/audit.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include "common.h"
#include <linux/slab.h>

/**
 * tomoyo_print_bprm - Print "struct linux_binprm" for auditing.
 *
 * @bprm: Pointer to "struct linux_binprm".
 * @dump: Pointer to "struct tomoyo_page_dump".
 *
 * Returns the contents of @bprm on success, NULL otherwise.
 *
 * This function uses kzalloc(), so caller must kfree() if this function
 * didn't return NULL.
 */
static char *tomoyo_print_bprm(struct linux_binprm *bprm,
                               struct tomoyo_page_dump *dump)
{
        static const int tomoyo_buffer_len = 4096 * 2;
        char *buffer = kzalloc(tomoyo_buffer_len, GFP_NOFS);
        char *cp;
        char *last_start;
        int len;
        unsigned long pos = bprm->p;
        int offset = pos % PAGE_SIZE;
        int argv_count = bprm->argc;
        int envp_count = bprm->envc;
        bool truncated = false;

        if (!buffer)
                return NULL;
        len = snprintf(buffer, tomoyo_buffer_len - 1, "argv[]={ ");
        cp = buffer + len;
        if (!argv_count) {
                memmove(cp, "} envp[]={ ", 11);
                cp += 11;
        }
        last_start = cp;
        while (argv_count || envp_count) {
                if (!tomoyo_dump_page(bprm, pos, dump))
                        goto out;
                pos += PAGE_SIZE - offset;
                /* Read. */
                while (offset < PAGE_SIZE) {
                        const char *kaddr = dump->data;
                        const unsigned char c = kaddr[offset++];

                        if (cp == last_start)
                                *cp++ = '"';
                        if (cp >= buffer + tomoyo_buffer_len - 32) {
                                /* Reserve some room for "..." string. */
                                truncated = true;
                        } else if (c == '\\') {
                                *cp++ = '\\';
                                *cp++ = '\\';
                        } else if (c > ' ' && c < 127) {
                                *cp++ = c;
                        } else if (!c) {
                                *cp++ = '"';
                                *cp++ = ' ';
                                last_start = cp;
                        } else {
                                *cp++ = '\\';
                                *cp++ = (c >> 6) + '0';
                                *cp++ = ((c >> 3) & 7) + '0';
                                *cp++ = (c & 7) + '0';
                        }
                        if (c)
                                continue;
                        if (argv_count) {
                                if (--argv_count == 0) {
                                        if (truncated) {
                                                cp = last_start;
                                                memmove(cp, "... ", 4);
                                                cp += 4;
                                        }
                                        memmove(cp, "} envp[]={ ", 11);
                                        cp += 11;
                                        last_start = cp;
                                        truncated = false;
                                }
                        } else if (envp_count) {
                                if (--envp_count == 0) {
                                        if (truncated) {
                                                cp = last_start;
                                                memmove(cp, "... ", 4);
                                                cp += 4;
                                        }
                                }
                        }
                        if (!argv_count && !envp_count)
                                break;
                }
                offset = 0;
        }
        *cp++ = '}';
        *cp = '\0';
        return buffer;
out:
        snprintf(buffer, tomoyo_buffer_len - 1,
                 "argv[]={ ... } envp[]= { ... }");
        return buffer;
}

/**
 * tomoyo_filetype - Get string representation of file type.
 *
 * @mode: Mode value for stat().
 *
 * Returns file type string.
 */
static inline const char *tomoyo_filetype(const umode_t mode)
{
        switch (mode & S_IFMT) {
        case S_IFREG:
        case 0:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FILE];
        case S_IFDIR:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_DIRECTORY];
        case S_IFLNK:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SYMLINK];
        case S_IFIFO:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FIFO];
        case S_IFSOCK:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SOCKET];
        case S_IFBLK:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_BLOCK_DEV];
        case S_IFCHR:
                return tomoyo_condition_keyword[TOMOYO_TYPE_IS_CHAR_DEV];
        }
        return "unknown"; /* This should not happen. */
}

/**
 * tomoyo_print_header - Get header line of audit log.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns string representation.
 *
 * This function uses kmalloc(), so caller must kfree() if this function
 * didn't return NULL.
 */
static char *tomoyo_print_header(struct tomoyo_request_info *r)
{
        struct tomoyo_time stamp;
        const pid_t gpid = task_pid_nr(current);
        struct tomoyo_obj_info *obj = r->obj;
        static const int tomoyo_buffer_len = 4096;
        char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
        int pos;
        u8 i;

        if (!buffer)
                return NULL;

        tomoyo_convert_time(ktime_get_real_seconds(), &stamp);

        pos = snprintf(buffer, tomoyo_buffer_len - 1,
                       "#%04u/%02u/%02u %02u:%02u:%02u# profile=%u mode=%s granted=%s (global-pid=%u) task={ pid=%u ppid=%u uid=%u gid=%u euid=%u egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }",
                       stamp.year, stamp.month, stamp.day, stamp.hour,
                       stamp.min, stamp.sec, r->profile, tomoyo_mode[r->mode],
                       str_yes_no(r->granted), gpid, tomoyo_sys_getpid(),
                       tomoyo_sys_getppid(),
                       from_kuid(&init_user_ns, current_uid()),
                       from_kgid(&init_user_ns, current_gid()),
                       from_kuid(&init_user_ns, current_euid()),
                       from_kgid(&init_user_ns, current_egid()),
                       from_kuid(&init_user_ns, current_suid()),
                       from_kgid(&init_user_ns, current_sgid()),
                       from_kuid(&init_user_ns, current_fsuid()),
                       from_kgid(&init_user_ns, current_fsgid()));
        if (!obj)
                goto no_obj_info;
        if (!obj->validate_done) {
                tomoyo_get_attributes(obj);
                obj->validate_done = true;
        }
        for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
                struct tomoyo_mini_stat *stat;
                unsigned int dev;
                umode_t mode;

                if (!obj->stat_valid[i])
                        continue;
                stat = &obj->stat[i];
                dev = stat->dev;
                mode = stat->mode;
                if (i & 1) {
                        pos += snprintf(buffer + pos,
                                        tomoyo_buffer_len - 1 - pos,
                                        " path%u.parent={ uid=%u gid=%u ino=%lu perm=0%o }",
                                        (i >> 1) + 1,
                                        from_kuid(&init_user_ns, stat->uid),
                                        from_kgid(&init_user_ns, stat->gid),
                                        (unsigned long)stat->ino,
                                        stat->mode & S_IALLUGO);
                        continue;
                }
                pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
                                " path%u={ uid=%u gid=%u ino=%lu major=%u minor=%u perm=0%o type=%s",
                                (i >> 1) + 1,
                                from_kuid(&init_user_ns, stat->uid),
                                from_kgid(&init_user_ns, stat->gid),
                                (unsigned long)stat->ino,
                                MAJOR(dev), MINOR(dev),
                                mode & S_IALLUGO, tomoyo_filetype(mode));
                if (S_ISCHR(mode) || S_ISBLK(mode)) {
                        dev = stat->rdev;
                        pos += snprintf(buffer + pos,
                                        tomoyo_buffer_len - 1 - pos,
                                        " dev_major=%u dev_minor=%u",
                                        MAJOR(dev), MINOR(dev));
                }
                pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
                                " }");
        }
no_obj_info:
        if (pos < tomoyo_buffer_len - 1)
                return buffer;
        kfree(buffer);
        return NULL;
}

/**
 * tomoyo_init_log - Allocate buffer for audit logs.
 *
 * @r:    Pointer to "struct tomoyo_request_info".
 * @len:  Buffer size needed for @fmt and @args.
 * @fmt:  The printf()'s format string.
 * @args: va_list structure for @fmt.
 *
 * Returns pointer to allocated memory.
 *
 * This function uses kzalloc(), so caller must kfree() if this function
 * didn't return NULL.
 */
char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
                      va_list args)
{
        char *buf = NULL;
        char *bprm_info = NULL;
        const char *header = NULL;
        char *realpath = NULL;
        const char *symlink = NULL;
        int pos;
        const char *domainname = r->domain->domainname->name;

        header = tomoyo_print_header(r);
        if (!header)
                return NULL;
        /* +10 is for '\n' etc. and '\0'. */
        len += strlen(domainname) + strlen(header) + 10;
        if (r->ee) {
                struct file *file = r->ee->bprm->file;

                realpath = tomoyo_realpath_from_path(&file->f_path);
                bprm_info = tomoyo_print_bprm(r->ee->bprm, &r->ee->dump);
                if (!realpath || !bprm_info)
                        goto out;
                /* +80 is for " exec={ realpath=\"%s\" argc=%d envc=%d %s }" */
                len += strlen(realpath) + 80 + strlen(bprm_info);
        } else if (r->obj && r->obj->symlink_target) {
                symlink = r->obj->symlink_target->name;
                /* +18 is for " symlink.target=\"%s\"" */
                len += 18 + strlen(symlink);
        }
        len = kmalloc_size_roundup(len);
        buf = kzalloc(len, GFP_NOFS);
        if (!buf)
                goto out;
        len--;
        pos = snprintf(buf, len, "%s", header);
        if (realpath) {
                struct linux_binprm *bprm = r->ee->bprm;

                pos += snprintf(buf + pos, len - pos,
                                " exec={ realpath=\"%s\" argc=%d envc=%d %s }",
                                realpath, bprm->argc, bprm->envc, bprm_info);
        } else if (symlink)
                pos += snprintf(buf + pos, len - pos, " symlink.target=\"%s\"",
                                symlink);
        pos += snprintf(buf + pos, len - pos, "\n%s\n", domainname);
        vsnprintf(buf + pos, len - pos, fmt, args);
out:
        kfree(realpath);
        kfree(bprm_info);
        kfree(header);
        return buf;
}

/* Wait queue for /sys/kernel/security/tomoyo/audit. */
static DECLARE_WAIT_QUEUE_HEAD(tomoyo_log_wait);

/* Structure for audit log. */
struct tomoyo_log {
        struct list_head list;
        char *log;
        int size;
};

/* The list for "struct tomoyo_log". */
static LIST_HEAD(tomoyo_log);

/* Lock for "struct list_head tomoyo_log". */
static DEFINE_SPINLOCK(tomoyo_log_lock);

/* Length of "struct list_head tomoyo_log". */
static unsigned int tomoyo_log_count;

/**
 * tomoyo_get_audit - Get audit mode.
 *
 * @ns:          Pointer to "struct tomoyo_policy_namespace".
 * @profile:     Profile number.
 * @index:       Index number of functionality.
 * @matched_acl: Pointer to "struct tomoyo_acl_info".
 * @is_granted:  True if granted log, false otherwise.
 *
 * Returns true if this request should be audited, false otherwise.
 */
static bool tomoyo_get_audit(const struct tomoyo_policy_namespace *ns,
                             const u8 profile, const u8 index,
                             const struct tomoyo_acl_info *matched_acl,
                             const bool is_granted)
{
        u8 mode;
        const u8 category = tomoyo_index2category[index] +
                TOMOYO_MAX_MAC_INDEX;
        struct tomoyo_profile *p;

        if (!tomoyo_policy_loaded)
                return false;
        p = tomoyo_profile(ns, profile);
        if (tomoyo_log_count >= p->pref[TOMOYO_PREF_MAX_AUDIT_LOG])
                return false;
        if (is_granted && matched_acl && matched_acl->cond &&
            matched_acl->cond->grant_log != TOMOYO_GRANTLOG_AUTO)
                return matched_acl->cond->grant_log == TOMOYO_GRANTLOG_YES;
        mode = p->config[index];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
                mode = p->config[category];
        if (mode == TOMOYO_CONFIG_USE_DEFAULT)
                mode = p->default_config;
        if (is_granted)
                return mode & TOMOYO_CONFIG_WANT_GRANT_LOG;
        return mode & TOMOYO_CONFIG_WANT_REJECT_LOG;
}

/**
 * tomoyo_write_log2 - Write an audit log.
 *
 * @r:    Pointer to "struct tomoyo_request_info".
 * @len:  Buffer size needed for @fmt and @args.
 * @fmt:  The printf()'s format string.
 * @args: va_list structure for @fmt.
 *
 * Returns nothing.
 */
void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
                       va_list args)
{
        char *buf;
        struct tomoyo_log *entry;
        bool quota_exceeded = false;

        if (!tomoyo_get_audit(r->domain->ns, r->profile, r->type,
                              r->matched_acl, r->granted))
                goto out;
        buf = tomoyo_init_log(r, len, fmt, args);
        if (!buf)
                goto out;
        entry = kzalloc(sizeof(*entry), GFP_NOFS);
        if (!entry) {
                kfree(buf);
                goto out;
        }
        entry->log = buf;
        len = kmalloc_size_roundup(strlen(buf) + 1);
        /*
         * The entry->size is used for memory quota checks.
         * Don't go beyond strlen(entry->log).
         */
        entry->size = len + kmalloc_size_roundup(sizeof(*entry));
        spin_lock(&tomoyo_log_lock);
        if (tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT] &&
            tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] + entry->size >=
            tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT]) {
                quota_exceeded = true;
        } else {
                tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] += entry->size;
                list_add_tail(&entry->list, &tomoyo_log);
                tomoyo_log_count++;
        }
        spin_unlock(&tomoyo_log_lock);
        if (quota_exceeded) {
                kfree(buf);
                kfree(entry);
                goto out;
        }
        wake_up(&tomoyo_log_wait);
out:
        return;
}

/**
 * tomoyo_write_log - Write an audit log.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @fmt: The printf()'s format string, followed by parameters.
 *
 * Returns nothing.
 */
void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
{
        va_list args;
        int len;

        va_start(args, fmt);
        len = vsnprintf(NULL, 0, fmt, args) + 1;
        va_end(args);
        va_start(args, fmt);
        tomoyo_write_log2(r, len, fmt, args);
        va_end(args);
}

/**
 * tomoyo_read_log - Read an audit log.
 *
 * @head: Pointer to "struct tomoyo_io_buffer".
 *
 * Returns nothing.
 */
void tomoyo_read_log(struct tomoyo_io_buffer *head)
{
        struct tomoyo_log *ptr = NULL;

        if (head->r.w_pos)
                return;
        kfree(head->read_buf);
        head->read_buf = NULL;
        spin_lock(&tomoyo_log_lock);
        if (!list_empty(&tomoyo_log)) {
                ptr = list_entry(tomoyo_log.next, typeof(*ptr), list);
                list_del(&ptr->list);
                tomoyo_log_count--;
                tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] -= ptr->size;
        }
        spin_unlock(&tomoyo_log_lock);
        if (ptr) {
                head->read_buf = ptr->log;
                head->r.w[head->r.w_pos++] = head->read_buf;
                kfree(ptr);
        }
}

/**
 * tomoyo_poll_log - Wait for an audit log.
 *
 * @file: Pointer to "struct file".
 * @wait: Pointer to "poll_table". Maybe NULL.
 *
 * Returns EPOLLIN | EPOLLRDNORM when ready to read an audit log.
 */
__poll_t tomoyo_poll_log(struct file *file, poll_table *wait)
{
        if (tomoyo_log_count)
                return EPOLLIN | EPOLLRDNORM;
        poll_wait(file, &tomoyo_log_wait, wait);
        if (tomoyo_log_count)
                return EPOLLIN | EPOLLRDNORM;
        return 0;
}

























































































































































































    2 






















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2011  Intel Corporation. All rights reserved.
 */

#define pr_fmt(fmt) "llcp: %s: " fmt, __func__

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/nfc.h>

#include <net/nfc/nfc.h>

#include "nfc.h"
#include "llcp.h"

static const u8 llcp_tlv_length[LLCP_TLV_MAX] = {
        0,
        1, /* VERSION */
        2, /* MIUX */
        2, /* WKS */
        1, /* LTO */
        1, /* RW */
        0, /* SN */
        1, /* OPT */
        0, /* SDREQ */
        2, /* SDRES */

};

static u8 llcp_tlv8(const u8 *tlv, u8 type)
{
        if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
                return 0;

        return tlv[2];
}

static u16 llcp_tlv16(const u8 *tlv, u8 type)
{
        if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
                return 0;

        return be16_to_cpu(*((__be16 *)(tlv + 2)));
}


static u8 llcp_tlv_version(const u8 *tlv)
{
        return llcp_tlv8(tlv, LLCP_TLV_VERSION);
}

static u16 llcp_tlv_miux(const u8 *tlv)
{
        return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff;
}

static u16 llcp_tlv_wks(const u8 *tlv)
{
        return llcp_tlv16(tlv, LLCP_TLV_WKS);
}

static u16 llcp_tlv_lto(const u8 *tlv)
{
        return llcp_tlv8(tlv, LLCP_TLV_LTO);
}

static u8 llcp_tlv_opt(const u8 *tlv)
{
        return llcp_tlv8(tlv, LLCP_TLV_OPT);
}

static u8 llcp_tlv_rw(const u8 *tlv)
{
        return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
}

u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length)
{
        u8 *tlv, length;

        pr_debug("type %d\n", type);

        if (type >= LLCP_TLV_MAX)
                return NULL;

        length = llcp_tlv_length[type];
        if (length == 0 && value_length == 0)
                return NULL;
        else if (length == 0)
                length = value_length;

        *tlv_length = 2 + length;
        tlv = kzalloc(2 + length, GFP_KERNEL);
        if (tlv == NULL)
                return tlv;

        tlv[0] = type;
        tlv[1] = length;
        memcpy(tlv + 2, value, length);

        return tlv;
}

struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
{
        struct nfc_llcp_sdp_tlv *sdres;
        u8 value[2];

        sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
        if (sdres == NULL)
                return NULL;

        value[0] = tid;
        value[1] = sap;

        sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2,
                                        &sdres->tlv_len);
        if (sdres->tlv == NULL) {
                kfree(sdres);
                return NULL;
        }

        sdres->tid = tid;
        sdres->sap = sap;

        INIT_HLIST_NODE(&sdres->node);

        return sdres;
}

struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
                                                  size_t uri_len)
{
        struct nfc_llcp_sdp_tlv *sdreq;

        pr_debug("uri: %s, len: %zu\n", uri, uri_len);

        /* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */
        if (WARN_ON_ONCE(uri_len > U8_MAX - 4))
                return NULL;

        sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
        if (sdreq == NULL)
                return NULL;

        sdreq->tlv_len = uri_len + 3;

        if (uri[uri_len - 1] == 0)
                sdreq->tlv_len--;

        sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
        if (sdreq->tlv == NULL) {
                kfree(sdreq);
                return NULL;
        }

        sdreq->tlv[0] = LLCP_TLV_SDREQ;
        sdreq->tlv[1] = sdreq->tlv_len - 2;
        sdreq->tlv[2] = tid;

        sdreq->tid = tid;
        sdreq->uri = sdreq->tlv + 3;
        memcpy(sdreq->uri, uri, uri_len);

        sdreq->time = jiffies;

        INIT_HLIST_NODE(&sdreq->node);

        return sdreq;
}

void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
{
        kfree(sdp->tlv);
        kfree(sdp);
}

void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
{
        struct nfc_llcp_sdp_tlv *sdp;
        struct hlist_node *n;

        hlist_for_each_entry_safe(sdp, n, head, node) {
                hlist_del(&sdp->node);

                nfc_llcp_free_sdp_tlv(sdp);
        }
}

int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
                          const u8 *tlv_array, u16 tlv_array_len)
{
        const u8 *tlv = tlv_array;
        u8 type, length, offset = 0;

        pr_debug("TLV array length %d\n", tlv_array_len);

        if (local == NULL)
                return -ENODEV;

        while (offset < tlv_array_len) {
                type = tlv[0];
                length = tlv[1];

                pr_debug("type 0x%x length %d\n", type, length);

                switch (type) {
                case LLCP_TLV_VERSION:
                        local->remote_version = llcp_tlv_version(tlv);
                        break;
                case LLCP_TLV_MIUX:
                        local->remote_miu = llcp_tlv_miux(tlv) + 128;
                        break;
                case LLCP_TLV_WKS:
                        local->remote_wks = llcp_tlv_wks(tlv);
                        break;
                case LLCP_TLV_LTO:
                        local->remote_lto = llcp_tlv_lto(tlv) * 10;
                        break;
                case LLCP_TLV_OPT:
                        local->remote_opt = llcp_tlv_opt(tlv);
                        break;
                default:
                        pr_err("Invalid gt tlv value 0x%x\n", type);
                        break;
                }

                offset += length + 2;
                tlv += length + 2;
        }

        pr_debug("version 0x%x miu %d lto %d opt 0x%x wks 0x%x\n",
                 local->remote_version, local->remote_miu,
                 local->remote_lto, local->remote_opt,
                 local->remote_wks);

        return 0;
}

int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
                                  const u8 *tlv_array, u16 tlv_array_len)
{
        const u8 *tlv = tlv_array;
        u8 type, length, offset = 0;

        pr_debug("TLV array length %d\n", tlv_array_len);

        if (sock == NULL)
                return -ENOTCONN;

        while (offset < tlv_array_len) {
                type = tlv[0];
                length = tlv[1];

                pr_debug("type 0x%x length %d\n", type, length);

                switch (type) {
                case LLCP_TLV_MIUX:
                        sock->remote_miu = llcp_tlv_miux(tlv) + 128;
                        break;
                case LLCP_TLV_RW:
                        sock->remote_rw = llcp_tlv_rw(tlv);
                        break;
                case LLCP_TLV_SN:
                        break;
                default:
                        pr_err("Invalid gt tlv value 0x%x\n", type);
                        break;
                }

                offset += length + 2;
                tlv += length + 2;
        }

        pr_debug("sock %p rw %d miu %d\n", sock,
                 sock->remote_rw, sock->remote_miu);

        return 0;
}

static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
                                       u8 dsap, u8 ssap, u8 ptype)
{
        u8 header[2];

        pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);

        header[0] = (u8)((dsap << 2) | (ptype >> 2));
        header[1] = (u8)((ptype << 6) | ssap);

        pr_debug("header 0x%x 0x%x\n", header[0], header[1]);

        skb_put_data(pdu, header, LLCP_HEADER_SIZE);

        return pdu;
}

static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv,
                                    u8 tlv_length)
{
        /* XXX Add an skb length check */

        if (tlv == NULL)
                return NULL;

        skb_put_data(pdu, tlv, tlv_length);

        return pdu;
}

static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock,
                                         u8 cmd, u16 size)
{
        struct sk_buff *skb;
        int err;

        if (sock->ssap == 0)
                return NULL;

        skb = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
                                 size + LLCP_HEADER_SIZE, &err);
        if (skb == NULL) {
                pr_err("Could not allocate PDU\n");
                return NULL;
        }

        skb = llcp_add_header(skb, sock->dsap, sock->ssap, cmd);

        return skb;
}

int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
{
        struct sk_buff *skb;
        struct nfc_dev *dev;
        struct nfc_llcp_local *local;

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        dev = sock->dev;
        if (dev == NULL)
                return -ENODEV;

        skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0);
        if (skb == NULL)
                return -ENOMEM;

        skb_queue_tail(&local->tx_queue, skb);

        return 0;
}

int nfc_llcp_send_symm(struct nfc_dev *dev)
{
        struct sk_buff *skb;
        struct nfc_llcp_local *local;
        u16 size = 0;
        int err;

        local = nfc_llcp_find_local(dev);
        if (local == NULL)
                return -ENODEV;

        size += LLCP_HEADER_SIZE;
        size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;

        skb = alloc_skb(size, GFP_KERNEL);
        if (skb == NULL) {
                err = -ENOMEM;
                goto out;
        }

        skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);

        skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);

        __net_timestamp(skb);

        nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);

        err = nfc_data_exchange(dev, local->target_idx, skb,
                                 nfc_llcp_recv, local);
out:
        nfc_llcp_local_put(local);
        return err;
}

int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
{
        struct nfc_llcp_local *local;
        struct sk_buff *skb;
        const u8 *service_name_tlv = NULL;
        const u8 *miux_tlv = NULL;
        const u8 *rw_tlv = NULL;
        u8 service_name_tlv_length = 0;
        u8 miux_tlv_length,  rw_tlv_length, rw;
        int err;
        u16 size = 0;
        __be16 miux;

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        if (sock->service_name != NULL) {
                service_name_tlv = nfc_llcp_build_tlv(LLCP_TLV_SN,
                                                      sock->service_name,
                                                      sock->service_name_len,
                                                      &service_name_tlv_length);
                if (!service_name_tlv) {
                        err = -ENOMEM;
                        goto error_tlv;
                }
                size += service_name_tlv_length;
        }

        /* If the socket parameters are not set, use the local ones */
        miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
                local->miux : sock->miux;
        rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;

        miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
                                      &miux_tlv_length);
        if (!miux_tlv) {
                err = -ENOMEM;
                goto error_tlv;
        }
        size += miux_tlv_length;

        rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
        if (!rw_tlv) {
                err = -ENOMEM;
                goto error_tlv;
        }
        size += rw_tlv_length;

        pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);

        skb = llcp_allocate_pdu(sock, LLCP_PDU_CONNECT, size);
        if (skb == NULL) {
                err = -ENOMEM;
                goto error_tlv;
        }

        llcp_add_tlv(skb, service_name_tlv, service_name_tlv_length);
        llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
        llcp_add_tlv(skb, rw_tlv, rw_tlv_length);

        skb_queue_tail(&local->tx_queue, skb);

        err = 0;

error_tlv:
        if (err)
                pr_err("error %d\n", err);

        kfree(service_name_tlv);
        kfree(miux_tlv);
        kfree(rw_tlv);

        return err;
}

int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
{
        struct nfc_llcp_local *local;
        struct sk_buff *skb;
        const u8 *miux_tlv = NULL;
        const u8 *rw_tlv = NULL;
        u8 miux_tlv_length, rw_tlv_length, rw;
        int err;
        u16 size = 0;
        __be16 miux;

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        /* If the socket parameters are not set, use the local ones */
        miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
                local->miux : sock->miux;
        rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;

        miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
                                      &miux_tlv_length);
        if (!miux_tlv) {
                err = -ENOMEM;
                goto error_tlv;
        }
        size += miux_tlv_length;

        rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
        if (!rw_tlv) {
                err = -ENOMEM;
                goto error_tlv;
        }
        size += rw_tlv_length;

        skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
        if (skb == NULL) {
                err = -ENOMEM;
                goto error_tlv;
        }

        llcp_add_tlv(skb, miux_tlv, miux_tlv_length);
        llcp_add_tlv(skb, rw_tlv, rw_tlv_length);

        skb_queue_tail(&local->tx_queue, skb);

        err = 0;

error_tlv:
        if (err)
                pr_err("error %d\n", err);

        kfree(miux_tlv);
        kfree(rw_tlv);

        return err;
}

static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local,
                                             size_t tlv_length)
{
        struct sk_buff *skb;
        struct nfc_dev *dev;
        u16 size = 0;

        if (local == NULL)
                return ERR_PTR(-ENODEV);

        dev = local->dev;
        if (dev == NULL)
                return ERR_PTR(-ENODEV);

        size += LLCP_HEADER_SIZE;
        size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
        size += tlv_length;

        skb = alloc_skb(size, GFP_KERNEL);
        if (skb == NULL)
                return ERR_PTR(-ENOMEM);

        skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);

        skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL);

        return skb;
}

int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
                            struct hlist_head *tlv_list, size_t tlvs_len)
{
        struct nfc_llcp_sdp_tlv *sdp;
        struct hlist_node *n;
        struct sk_buff *skb;

        skb = nfc_llcp_allocate_snl(local, tlvs_len);
        if (IS_ERR(skb))
                return PTR_ERR(skb);

        hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
                skb_put_data(skb, sdp->tlv, sdp->tlv_len);

                hlist_del(&sdp->node);

                nfc_llcp_free_sdp_tlv(sdp);
        }

        skb_queue_tail(&local->tx_queue, skb);

        return 0;
}

int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
                            struct hlist_head *tlv_list, size_t tlvs_len)
{
        struct nfc_llcp_sdp_tlv *sdreq;
        struct hlist_node *n;
        struct sk_buff *skb;

        skb = nfc_llcp_allocate_snl(local, tlvs_len);
        if (IS_ERR(skb))
                return PTR_ERR(skb);

        mutex_lock(&local->sdreq_lock);

        if (hlist_empty(&local->pending_sdreqs))
                mod_timer(&local->sdreq_timer,
                          jiffies + msecs_to_jiffies(3 * local->remote_lto));

        hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
                pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);

                skb_put_data(skb, sdreq->tlv, sdreq->tlv_len);

                hlist_del(&sdreq->node);

                hlist_add_head(&sdreq->node, &local->pending_sdreqs);
        }

        mutex_unlock(&local->sdreq_lock);

        skb_queue_tail(&local->tx_queue, skb);

        return 0;
}

int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
{
        struct sk_buff *skb;
        struct nfc_dev *dev;
        u16 size = 1; /* Reason code */

        pr_debug("Sending DM reason 0x%x\n", reason);

        if (local == NULL)
                return -ENODEV;

        dev = local->dev;
        if (dev == NULL)
                return -ENODEV;

        size += LLCP_HEADER_SIZE;
        size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;

        skb = alloc_skb(size, GFP_KERNEL);
        if (skb == NULL)
                return -ENOMEM;

        skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);

        skb = llcp_add_header(skb, dsap, ssap, LLCP_PDU_DM);

        skb_put_data(skb, &reason, 1);

        skb_queue_head(&local->tx_queue, skb);

        return 0;
}

int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
                          struct msghdr *msg, size_t len)
{
        struct sk_buff *pdu;
        struct sock *sk = &sock->sk;
        struct nfc_llcp_local *local;
        size_t frag_len = 0, remaining_len;
        u8 *msg_data, *msg_ptr;
        u16 remote_miu;

        pr_debug("Send I frame len %zd\n", len);

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        /* Remote is ready but has not acknowledged our frames */
        if((sock->remote_ready &&
            skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw &&
            skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
                pr_err("Pending queue is full %d frames\n",
                       skb_queue_len(&sock->tx_pending_queue));
                return -ENOBUFS;
        }

        /* Remote is not ready and we've been queueing enough frames */
        if ((!sock->remote_ready &&
             skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
                pr_err("Tx queue is full %d frames\n",
                       skb_queue_len(&sock->tx_queue));
                return -ENOBUFS;
        }

        msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
        if (msg_data == NULL)
                return -ENOMEM;

        if (memcpy_from_msg(msg_data, msg, len)) {
                kfree(msg_data);
                return -EFAULT;
        }

        remaining_len = len;
        msg_ptr = msg_data;

        do {
                remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
                                LLCP_DEFAULT_MIU : sock->remote_miu;

                frag_len = min_t(size_t, remote_miu, remaining_len);

                pr_debug("Fragment %zd bytes remaining %zd",
                         frag_len, remaining_len);

                pdu = llcp_allocate_pdu(sock, LLCP_PDU_I,
                                        frag_len + LLCP_SEQUENCE_SIZE);
                if (pdu == NULL) {
                        kfree(msg_data);
                        return -ENOMEM;
                }

                skb_put(pdu, LLCP_SEQUENCE_SIZE);

                if (likely(frag_len > 0))
                        skb_put_data(pdu, msg_ptr, frag_len);

                skb_queue_tail(&sock->tx_queue, pdu);

                lock_sock(sk);

                nfc_llcp_queue_i_frames(sock);

                release_sock(sk);

                remaining_len -= frag_len;
                msg_ptr += frag_len;
        } while (remaining_len > 0);

        kfree(msg_data);

        return len;
}

int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
                           struct msghdr *msg, size_t len)
{
        struct sk_buff *pdu;
        struct nfc_llcp_local *local;
        size_t frag_len = 0, remaining_len;
        u8 *msg_ptr, *msg_data;
        u16 remote_miu;
        int err;

        pr_debug("Send UI frame len %zd\n", len);

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
        if (msg_data == NULL)
                return -ENOMEM;

        if (memcpy_from_msg(msg_data, msg, len)) {
                kfree(msg_data);
                return -EFAULT;
        }

        remaining_len = len;
        msg_ptr = msg_data;

        do {
                remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
                                local->remote_miu : sock->remote_miu;

                frag_len = min_t(size_t, remote_miu, remaining_len);

                pr_debug("Fragment %zd bytes remaining %zd",
                         frag_len, remaining_len);

                pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0,
                                         frag_len + LLCP_HEADER_SIZE, &err);
                if (pdu == NULL) {
                        pr_err("Could not allocate PDU (error=%d)\n", err);
                        len -= remaining_len;
                        if (len == 0)
                                len = err;
                        break;
                }

                pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);

                if (likely(frag_len > 0))
                        skb_put_data(pdu, msg_ptr, frag_len);

                /* No need to check for the peer RW for UI frames */
                skb_queue_tail(&local->tx_queue, pdu);

                remaining_len -= frag_len;
                msg_ptr += frag_len;
        } while (remaining_len > 0);

        kfree(msg_data);

        return len;
}

int nfc_llcp_send_rr(struct nfc_llcp_sock *sock)
{
        struct sk_buff *skb;
        struct nfc_llcp_local *local;

        pr_debug("Send rr nr %d\n", sock->recv_n);

        local = sock->local;
        if (local == NULL)
                return -ENODEV;

        skb = llcp_allocate_pdu(sock, LLCP_PDU_RR, LLCP_SEQUENCE_SIZE);
        if (skb == NULL)
                return -ENOMEM;

        skb_put(skb, LLCP_SEQUENCE_SIZE);

        skb->data[2] = sock->recv_n;

        skb_queue_head(&local->tx_queue, skb);

        return 0;
}
































































    3 

    3 
























































































  222 




   18 














  145 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Released under the GPLv2 only.
 */

#include <linux/pm.h>
#include <linux/acpi.h>

struct usb_hub_descriptor;
struct usb_dev_state;

/* Functions local to drivers/usb/core/ */

extern int usb_create_sysfs_dev_files(struct usb_device *dev);
extern void usb_remove_sysfs_dev_files(struct usb_device *dev);
extern void usb_create_sysfs_intf_files(struct usb_interface *intf);
extern void usb_remove_sysfs_intf_files(struct usb_interface *intf);
extern int usb_update_wireless_status_attr(struct usb_interface *intf);
extern int usb_create_ep_devs(struct device *parent,
                                struct usb_host_endpoint *endpoint,
                                struct usb_device *udev);
extern void usb_remove_ep_devs(struct usb_host_endpoint *endpoint);

extern void usb_enable_endpoint(struct usb_device *dev,
                struct usb_host_endpoint *ep, bool reset_toggle);
extern void usb_enable_interface(struct usb_device *dev,
                struct usb_interface *intf, bool reset_toggles);
extern void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr,
                bool reset_hardware);
extern void usb_disable_interface(struct usb_device *dev,
                struct usb_interface *intf, bool reset_hardware);
extern void usb_release_interface_cache(struct kref *ref);
extern void usb_disable_device(struct usb_device *dev, int skip_ep0);
extern int usb_deauthorize_device(struct usb_device *);
extern int usb_authorize_device(struct usb_device *);
extern void usb_deauthorize_interface(struct usb_interface *);
extern void usb_authorize_interface(struct usb_interface *);
extern void usb_detect_quirks(struct usb_device *udev);
extern void usb_detect_interface_quirks(struct usb_device *udev);
extern void usb_release_quirk_list(void);
extern bool usb_endpoint_is_ignored(struct usb_device *udev,
                struct usb_host_interface *intf,
                struct usb_endpoint_descriptor *epd);
extern int usb_remove_device(struct usb_device *udev);

extern struct usb_device_descriptor *usb_get_device_descriptor(
                struct usb_device *udev);
extern int usb_set_isoch_delay(struct usb_device *dev);
extern int usb_get_bos_descriptor(struct usb_device *dev);
extern void usb_release_bos_descriptor(struct usb_device *dev);
extern int usb_set_configuration(struct usb_device *dev, int configuration);
extern int usb_choose_configuration(struct usb_device *udev);
extern int usb_generic_driver_probe(struct usb_device *udev);
extern void usb_generic_driver_disconnect(struct usb_device *udev);
extern int usb_generic_driver_suspend(struct usb_device *udev,
                pm_message_t msg);
extern int usb_generic_driver_resume(struct usb_device *udev,
                pm_message_t msg);

static inline unsigned usb_get_max_power(struct usb_device *udev,
                struct usb_host_config *c)
{
        /* SuperSpeed power is in 8 mA units; others are in 2 mA units */
        unsigned mul = (udev->speed >= USB_SPEED_SUPER ? 8 : 2);

        return c->desc.bMaxPower * mul;
}

extern void usb_kick_hub_wq(struct usb_device *dev);
extern int usb_match_one_id_intf(struct usb_device *dev,
                                 struct usb_host_interface *intf,
                                 const struct usb_device_id *id);
extern int usb_match_device(struct usb_device *dev,
                            const struct usb_device_id *id);
extern const struct usb_device_id *usb_device_match_id(struct usb_device *udev,
                                const struct usb_device_id *id);
extern bool usb_driver_applicable(struct usb_device *udev,
                                  struct usb_device_driver *udrv);
extern void usb_forced_unbind_intf(struct usb_interface *intf);
extern void usb_unbind_and_rebind_marked_interfaces(struct usb_device *udev);

extern void usb_hub_release_all_ports(struct usb_device *hdev,
                struct usb_dev_state *owner);
extern bool usb_device_is_owned(struct usb_device *udev);

extern int  usb_hub_init(void);
extern void usb_hub_cleanup(void);
extern int usb_major_init(void);
extern void usb_major_cleanup(void);
extern int usb_device_supports_lpm(struct usb_device *udev);
extern int usb_port_disable(struct usb_device *udev);

#ifdef        CONFIG_PM

extern int usb_suspend(struct device *dev, pm_message_t msg);
extern int usb_resume(struct device *dev, pm_message_t msg);
extern int usb_resume_complete(struct device *dev);

extern int usb_port_suspend(struct usb_device *dev, pm_message_t msg);
extern int usb_port_resume(struct usb_device *dev, pm_message_t msg);

extern void usb_autosuspend_device(struct usb_device *udev);
extern int usb_autoresume_device(struct usb_device *udev);
extern int usb_remote_wakeup(struct usb_device *dev);
extern int usb_runtime_suspend(struct device *dev);
extern int usb_runtime_resume(struct device *dev);
extern int usb_runtime_idle(struct device *dev);
extern int usb_enable_usb2_hardware_lpm(struct usb_device *udev);
extern int usb_disable_usb2_hardware_lpm(struct usb_device *udev);

extern void usbfs_notify_suspend(struct usb_device *udev);
extern void usbfs_notify_resume(struct usb_device *udev);

#else

static inline int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
{
        return 0;
}

static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
{
        return 0;
}

#define usb_autosuspend_device(udev)                do {} while (0)
static inline int usb_autoresume_device(struct usb_device *udev)
{
        return 0;
}

static inline int usb_enable_usb2_hardware_lpm(struct usb_device *udev)
{
        return 0;
}

static inline int usb_disable_usb2_hardware_lpm(struct usb_device *udev)
{
        return 0;
}

#endif

extern const struct class usbmisc_class;
extern const struct bus_type usb_bus_type;
extern struct mutex usb_port_peer_mutex;
extern const struct device_type usb_device_type;
extern const struct device_type usb_if_device_type;
extern const struct device_type usb_ep_device_type;
extern const struct device_type usb_port_device_type;
extern struct usb_device_driver usb_generic_driver;

static inline int is_usb_device(const struct device *dev)
{
        return dev->type == &usb_device_type;
}

static inline int is_usb_interface(const struct device *dev)
{
        return dev->type == &usb_if_device_type;
}

static inline int is_usb_endpoint(const struct device *dev)
{
        return dev->type == &usb_ep_device_type;
}

static inline int is_usb_port(const struct device *dev)
{
        return dev->type == &usb_port_device_type;
}

static inline int is_root_hub(struct usb_device *udev)
{
        return (udev->parent == NULL);
}

extern bool is_usb_device_driver(const struct device_driver *drv);

/* for labeling diagnostics */
extern const char *usbcore_name;

/* sysfs stuff */
extern const struct attribute_group *usb_device_groups[];
extern const struct attribute_group *usb_interface_groups[];

/* usbfs stuff */
extern struct usb_driver usbfs_driver;
extern const struct file_operations usbfs_devices_fops;
extern const struct file_operations usbdev_file_operations;

extern int usb_devio_init(void);
extern void usb_devio_cleanup(void);

/*
 * Firmware specific cookie identifying a port's location. '0' == no location
 * data available
 */
typedef u32 usb_port_location_t;

/* internal notify stuff */
extern void usb_notify_add_device(struct usb_device *udev);
extern void usb_notify_remove_device(struct usb_device *udev);
extern void usb_notify_add_bus(struct usb_bus *ubus);
extern void usb_notify_remove_bus(struct usb_bus *ubus);
extern void usb_hub_adjust_deviceremovable(struct usb_device *hdev,
                struct usb_hub_descriptor *desc);

#ifdef CONFIG_ACPI
extern int usb_acpi_register(void);
extern void usb_acpi_unregister(void);
extern acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
        int port1);
#else
static inline int usb_acpi_register(void) { return 0; };
static inline void usb_acpi_unregister(void) { };
#endif























































































































    1 





    1 




    1 


    1 
    1 











































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 
    1 


    1 






    1 

















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
// SPDX-License-Identifier: GPL-2.0-only
/*
 * hwmon.c - part of lm_sensors, Linux kernel modules for hardware monitoring
 *
 * This file defines the sysfs class "hwmon", for use by sensors drivers.
 *
 * Copyright (C) 2005 Mark M. Hoffman <mhoffman@lightlink.com>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/hwmon.h>
#include <linux/idr.h>
#include <linux/kstrtox.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/property.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/thermal.h>

#define CREATE_TRACE_POINTS
#include <trace/events/hwmon.h>

#define HWMON_ID_PREFIX "hwmon"
#define HWMON_ID_FORMAT HWMON_ID_PREFIX "%d"

struct hwmon_device {
        const char *name;
        const char *label;
        struct device dev;
        const struct hwmon_chip_info *chip;
        struct list_head tzdata;
        struct attribute_group group;
        const struct attribute_group **groups;
};

#define to_hwmon_device(d) container_of(d, struct hwmon_device, dev)

#define MAX_SYSFS_ATTR_NAME_LENGTH        32

struct hwmon_device_attribute {
        struct device_attribute dev_attr;
        const struct hwmon_ops *ops;
        enum hwmon_sensor_types type;
        u32 attr;
        int index;
        char name[MAX_SYSFS_ATTR_NAME_LENGTH];
};

#define to_hwmon_attr(d) \
        container_of(d, struct hwmon_device_attribute, dev_attr)
#define to_dev_attr(a) container_of(a, struct device_attribute, attr)

/*
 * Thermal zone information
 */
struct hwmon_thermal_data {
        struct list_head node;                /* hwmon tzdata list entry */
        struct device *dev;                /* Reference to hwmon device */
        int index;                        /* sensor index */
        struct thermal_zone_device *tzd;/* thermal zone device */
};

static ssize_t
name_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        return sprintf(buf, "%s\n", to_hwmon_device(dev)->name);
}
static DEVICE_ATTR_RO(name);

static ssize_t
label_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        return sysfs_emit(buf, "%s\n", to_hwmon_device(dev)->label);
}
static DEVICE_ATTR_RO(label);

static struct attribute *hwmon_dev_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_label.attr,
        NULL
};

static umode_t hwmon_dev_attr_is_visible(struct kobject *kobj,
                                         struct attribute *attr, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct hwmon_device *hdev = to_hwmon_device(dev);

        if (attr == &dev_attr_name.attr && hdev->name == NULL)
                return 0;

        if (attr == &dev_attr_label.attr && hdev->label == NULL)
                return 0;

        return attr->mode;
}

static const struct attribute_group hwmon_dev_attr_group = {
        .attrs                = hwmon_dev_attrs,
        .is_visible        = hwmon_dev_attr_is_visible,
};

static const struct attribute_group *hwmon_dev_attr_groups[] = {
        &hwmon_dev_attr_group,
        NULL
};

static void hwmon_free_attrs(struct attribute **attrs)
{
        int i;

        for (i = 0; attrs[i]; i++) {
                struct device_attribute *dattr = to_dev_attr(attrs[i]);
                struct hwmon_device_attribute *hattr = to_hwmon_attr(dattr);

                kfree(hattr);
        }
        kfree(attrs);
}

static void hwmon_dev_release(struct device *dev)
{
        struct hwmon_device *hwdev = to_hwmon_device(dev);

        if (hwdev->group.attrs)
                hwmon_free_attrs(hwdev->group.attrs);
        kfree(hwdev->groups);
        kfree(hwdev->label);
        kfree(hwdev);
}

static struct class hwmon_class = {
        .name = "hwmon",
        .dev_groups = hwmon_dev_attr_groups,
        .dev_release = hwmon_dev_release,
};

static DEFINE_IDA(hwmon_ida);

/* Thermal zone handling */

/*
 * The complex conditional is necessary to avoid a cyclic dependency
 * between hwmon and thermal_sys modules.
 */
#ifdef CONFIG_THERMAL_OF
static int hwmon_thermal_get_temp(struct thermal_zone_device *tz, int *temp)
{
        struct hwmon_thermal_data *tdata = thermal_zone_device_priv(tz);
        struct hwmon_device *hwdev = to_hwmon_device(tdata->dev);
        int ret;
        long t;

        ret = hwdev->chip->ops->read(tdata->dev, hwmon_temp, hwmon_temp_input,
                                     tdata->index, &t);
        if (ret < 0)
                return ret;

        *temp = t;

        return 0;
}

static int hwmon_thermal_set_trips(struct thermal_zone_device *tz, int low, int high)
{
        struct hwmon_thermal_data *tdata = thermal_zone_device_priv(tz);
        struct hwmon_device *hwdev = to_hwmon_device(tdata->dev);
        const struct hwmon_chip_info *chip = hwdev->chip;
        const struct hwmon_channel_info * const *info = chip->info;
        unsigned int i;
        int err;

        if (!chip->ops->write)
                return 0;

        for (i = 0; info[i] && info[i]->type != hwmon_temp; i++)
                continue;

        if (!info[i])
                return 0;

        if (info[i]->config[tdata->index] & HWMON_T_MIN) {
                err = chip->ops->write(tdata->dev, hwmon_temp,
                                       hwmon_temp_min, tdata->index, low);
                if (err && err != -EOPNOTSUPP)
                        return err;
        }

        if (info[i]->config[tdata->index] & HWMON_T_MAX) {
                err = chip->ops->write(tdata->dev, hwmon_temp,
                                       hwmon_temp_max, tdata->index, high);
                if (err && err != -EOPNOTSUPP)
                        return err;
        }

        return 0;
}

static const struct thermal_zone_device_ops hwmon_thermal_ops = {
        .get_temp = hwmon_thermal_get_temp,
        .set_trips = hwmon_thermal_set_trips,
};

static void hwmon_thermal_remove_sensor(void *data)
{
        list_del(data);
}

static int hwmon_thermal_add_sensor(struct device *dev, int index)
{
        struct hwmon_device *hwdev = to_hwmon_device(dev);
        struct hwmon_thermal_data *tdata;
        struct thermal_zone_device *tzd;
        int err;

        tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
        if (!tdata)
                return -ENOMEM;

        tdata->dev = dev;
        tdata->index = index;

        tzd = devm_thermal_of_zone_register(dev, index, tdata,
                                            &hwmon_thermal_ops);
        if (IS_ERR(tzd)) {
                if (PTR_ERR(tzd) != -ENODEV)
                        return PTR_ERR(tzd);
                dev_info(dev, "temp%d_input not attached to any thermal zone\n",
                         index + 1);
                devm_kfree(dev, tdata);
                return 0;
        }

        err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node);
        if (err)
                return err;

        tdata->tzd = tzd;
        list_add(&tdata->node, &hwdev->tzdata);

        return 0;
}

static int hwmon_thermal_register_sensors(struct device *dev)
{
        struct hwmon_device *hwdev = to_hwmon_device(dev);
        const struct hwmon_chip_info *chip = hwdev->chip;
        const struct hwmon_channel_info * const *info = chip->info;
        void *drvdata = dev_get_drvdata(dev);
        int i;

        for (i = 1; info[i]; i++) {
                int j;

                if (info[i]->type != hwmon_temp)
                        continue;

                for (j = 0; info[i]->config[j]; j++) {
                        int err;

                        if (!(info[i]->config[j] & HWMON_T_INPUT) ||
                            !chip->ops->is_visible(drvdata, hwmon_temp,
                                                   hwmon_temp_input, j))
                                continue;

                        err = hwmon_thermal_add_sensor(dev, j);
                        if (err)
                                return err;
                }
        }

        return 0;
}

static void hwmon_thermal_notify(struct device *dev, int index)
{
        struct hwmon_device *hwdev = to_hwmon_device(dev);
        struct hwmon_thermal_data *tzdata;

        list_for_each_entry(tzdata, &hwdev->tzdata, node) {
                if (tzdata->index == index) {
                        thermal_zone_device_update(tzdata->tzd,
                                                   THERMAL_EVENT_UNSPECIFIED);
                }
        }
}

#else
static int hwmon_thermal_register_sensors(struct device *dev)
{
        return 0;
}

static void hwmon_thermal_notify(struct device *dev, int index) { }

#endif /* IS_REACHABLE(CONFIG_THERMAL) && ... */

static int hwmon_attr_base(enum hwmon_sensor_types type)
{
        if (type == hwmon_in || type == hwmon_intrusion)
                return 0;
        return 1;
}

/* sysfs attribute management */

static ssize_t hwmon_attr_show(struct device *dev,
                               struct device_attribute *devattr, char *buf)
{
        struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
        long val;
        int ret;

        ret = hattr->ops->read(dev, hattr->type, hattr->attr, hattr->index,
                               &val);
        if (ret < 0)
                return ret;

        trace_hwmon_attr_show(hattr->index + hwmon_attr_base(hattr->type),
                              hattr->name, val);

        return sprintf(buf, "%ld\n", val);
}

static ssize_t hwmon_attr_show_string(struct device *dev,
                                      struct device_attribute *devattr,
                                      char *buf)
{
        struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
        enum hwmon_sensor_types type = hattr->type;
        const char *s;
        int ret;

        ret = hattr->ops->read_string(dev, hattr->type, hattr->attr,
                                      hattr->index, &s);
        if (ret < 0)
                return ret;

        trace_hwmon_attr_show_string(hattr->index + hwmon_attr_base(type),
                                     hattr->name, s);

        return sprintf(buf, "%s\n", s);
}

static ssize_t hwmon_attr_store(struct device *dev,
                                struct device_attribute *devattr,
                                const char *buf, size_t count)
{
        struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
        long val;
        int ret;

        ret = kstrtol(buf, 10, &val);
        if (ret < 0)
                return ret;

        ret = hattr->ops->write(dev, hattr->type, hattr->attr, hattr->index,
                                val);
        if (ret < 0)
                return ret;

        trace_hwmon_attr_store(hattr->index + hwmon_attr_base(hattr->type),
                               hattr->name, val);

        return count;
}

static bool is_string_attr(enum hwmon_sensor_types type, u32 attr)
{
        return (type == hwmon_temp && attr == hwmon_temp_label) ||
               (type == hwmon_in && attr == hwmon_in_label) ||
               (type == hwmon_curr && attr == hwmon_curr_label) ||
               (type == hwmon_power && attr == hwmon_power_label) ||
               (type == hwmon_energy && attr == hwmon_energy_label) ||
               (type == hwmon_humidity && attr == hwmon_humidity_label) ||
               (type == hwmon_fan && attr == hwmon_fan_label);
}

static struct attribute *hwmon_genattr(const void *drvdata,
                                       enum hwmon_sensor_types type,
                                       u32 attr,
                                       int index,
                                       const char *template,
                                       const struct hwmon_ops *ops)
{
        struct hwmon_device_attribute *hattr;
        struct device_attribute *dattr;
        struct attribute *a;
        umode_t mode;
        const char *name;
        bool is_string = is_string_attr(type, attr);

        /* The attribute is invisible if there is no template string */
        if (!template)
                return ERR_PTR(-ENOENT);

        mode = ops->is_visible(drvdata, type, attr, index);
        if (!mode)
                return ERR_PTR(-ENOENT);

        if ((mode & 0444) && ((is_string && !ops->read_string) ||
                                 (!is_string && !ops->read)))
                return ERR_PTR(-EINVAL);
        if ((mode & 0222) && !ops->write)
                return ERR_PTR(-EINVAL);

        hattr = kzalloc(sizeof(*hattr), GFP_KERNEL);
        if (!hattr)
                return ERR_PTR(-ENOMEM);

        if (type == hwmon_chip) {
                name = template;
        } else {
                scnprintf(hattr->name, sizeof(hattr->name), template,
                          index + hwmon_attr_base(type));
                name = hattr->name;
        }

        hattr->type = type;
        hattr->attr = attr;
        hattr->index = index;
        hattr->ops = ops;

        dattr = &hattr->dev_attr;
        dattr->show = is_string ? hwmon_attr_show_string : hwmon_attr_show;
        dattr->store = hwmon_attr_store;

        a = &dattr->attr;
        sysfs_attr_init(a);
        a->name = name;
        a->mode = mode;

        return a;
}

/*
 * Chip attributes are not attribute templates but actual sysfs attributes.
 * See hwmon_genattr() for special handling.
 */
static const char * const hwmon_chip_attrs[] = {
        [hwmon_chip_temp_reset_history] = "temp_reset_history",
        [hwmon_chip_in_reset_history] = "in_reset_history",
        [hwmon_chip_curr_reset_history] = "curr_reset_history",
        [hwmon_chip_power_reset_history] = "power_reset_history",
        [hwmon_chip_update_interval] = "update_interval",
        [hwmon_chip_alarms] = "alarms",
        [hwmon_chip_samples] = "samples",
        [hwmon_chip_curr_samples] = "curr_samples",
        [hwmon_chip_in_samples] = "in_samples",
        [hwmon_chip_power_samples] = "power_samples",
        [hwmon_chip_temp_samples] = "temp_samples",
        [hwmon_chip_beep_enable] = "beep_enable",
};

static const char * const hwmon_temp_attr_templates[] = {
        [hwmon_temp_enable] = "temp%d_enable",
        [hwmon_temp_input] = "temp%d_input",
        [hwmon_temp_type] = "temp%d_type",
        [hwmon_temp_lcrit] = "temp%d_lcrit",
        [hwmon_temp_lcrit_hyst] = "temp%d_lcrit_hyst",
        [hwmon_temp_min] = "temp%d_min",
        [hwmon_temp_min_hyst] = "temp%d_min_hyst",
        [hwmon_temp_max] = "temp%d_max",
        [hwmon_temp_max_hyst] = "temp%d_max_hyst",
        [hwmon_temp_crit] = "temp%d_crit",
        [hwmon_temp_crit_hyst] = "temp%d_crit_hyst",
        [hwmon_temp_emergency] = "temp%d_emergency",
        [hwmon_temp_emergency_hyst] = "temp%d_emergency_hyst",
        [hwmon_temp_alarm] = "temp%d_alarm",
        [hwmon_temp_lcrit_alarm] = "temp%d_lcrit_alarm",
        [hwmon_temp_min_alarm] = "temp%d_min_alarm",
        [hwmon_temp_max_alarm] = "temp%d_max_alarm",
        [hwmon_temp_crit_alarm] = "temp%d_crit_alarm",
        [hwmon_temp_emergency_alarm] = "temp%d_emergency_alarm",
        [hwmon_temp_fault] = "temp%d_fault",
        [hwmon_temp_offset] = "temp%d_offset",
        [hwmon_temp_label] = "temp%d_label",
        [hwmon_temp_lowest] = "temp%d_lowest",
        [hwmon_temp_highest] = "temp%d_highest",
        [hwmon_temp_reset_history] = "temp%d_reset_history",
        [hwmon_temp_rated_min] = "temp%d_rated_min",
        [hwmon_temp_rated_max] = "temp%d_rated_max",
        [hwmon_temp_beep] = "temp%d_beep",
};

static const char * const hwmon_in_attr_templates[] = {
        [hwmon_in_enable] = "in%d_enable",
        [hwmon_in_input] = "in%d_input",
        [hwmon_in_min] = "in%d_min",
        [hwmon_in_max] = "in%d_max",
        [hwmon_in_lcrit] = "in%d_lcrit",
        [hwmon_in_crit] = "in%d_crit",
        [hwmon_in_average] = "in%d_average",
        [hwmon_in_lowest] = "in%d_lowest",
        [hwmon_in_highest] = "in%d_highest",
        [hwmon_in_reset_history] = "in%d_reset_history",
        [hwmon_in_label] = "in%d_label",
        [hwmon_in_alarm] = "in%d_alarm",
        [hwmon_in_min_alarm] = "in%d_min_alarm",
        [hwmon_in_max_alarm] = "in%d_max_alarm",
        [hwmon_in_lcrit_alarm] = "in%d_lcrit_alarm",
        [hwmon_in_crit_alarm] = "in%d_crit_alarm",
        [hwmon_in_rated_min] = "in%d_rated_min",
        [hwmon_in_rated_max] = "in%d_rated_max",
        [hwmon_in_beep] = "in%d_beep",
        [hwmon_in_fault] = "in%d_fault",
};

static const char * const hwmon_curr_attr_templates[] = {
        [hwmon_curr_enable] = "curr%d_enable",
        [hwmon_curr_input] = "curr%d_input",
        [hwmon_curr_min] = "curr%d_min",
        [hwmon_curr_max] = "curr%d_max",
        [hwmon_curr_lcrit] = "curr%d_lcrit",
        [hwmon_curr_crit] = "curr%d_crit",
        [hwmon_curr_average] = "curr%d_average",
        [hwmon_curr_lowest] = "curr%d_lowest",
        [hwmon_curr_highest] = "curr%d_highest",
        [hwmon_curr_reset_history] = "curr%d_reset_history",
        [hwmon_curr_label] = "curr%d_label",
        [hwmon_curr_alarm] = "curr%d_alarm",
        [hwmon_curr_min_alarm] = "curr%d_min_alarm",
        [hwmon_curr_max_alarm] = "curr%d_max_alarm",
        [hwmon_curr_lcrit_alarm] = "curr%d_lcrit_alarm",
        [hwmon_curr_crit_alarm] = "curr%d_crit_alarm",
        [hwmon_curr_rated_min] = "curr%d_rated_min",
        [hwmon_curr_rated_max] = "curr%d_rated_max",
        [hwmon_curr_beep] = "curr%d_beep",
};

static const char * const hwmon_power_attr_templates[] = {
        [hwmon_power_enable] = "power%d_enable",
        [hwmon_power_average] = "power%d_average",
        [hwmon_power_average_interval] = "power%d_average_interval",
        [hwmon_power_average_interval_max] = "power%d_interval_max",
        [hwmon_power_average_interval_min] = "power%d_interval_min",
        [hwmon_power_average_highest] = "power%d_average_highest",
        [hwmon_power_average_lowest] = "power%d_average_lowest",
        [hwmon_power_average_max] = "power%d_average_max",
        [hwmon_power_average_min] = "power%d_average_min",
        [hwmon_power_input] = "power%d_input",
        [hwmon_power_input_highest] = "power%d_input_highest",
        [hwmon_power_input_lowest] = "power%d_input_lowest",
        [hwmon_power_reset_history] = "power%d_reset_history",
        [hwmon_power_accuracy] = "power%d_accuracy",
        [hwmon_power_cap] = "power%d_cap",
        [hwmon_power_cap_hyst] = "power%d_cap_hyst",
        [hwmon_power_cap_max] = "power%d_cap_max",
        [hwmon_power_cap_min] = "power%d_cap_min",
        [hwmon_power_min] = "power%d_min",
        [hwmon_power_max] = "power%d_max",
        [hwmon_power_lcrit] = "power%d_lcrit",
        [hwmon_power_crit] = "power%d_crit",
        [hwmon_power_label] = "power%d_label",
        [hwmon_power_alarm] = "power%d_alarm",
        [hwmon_power_cap_alarm] = "power%d_cap_alarm",
        [hwmon_power_min_alarm] = "power%d_min_alarm",
        [hwmon_power_max_alarm] = "power%d_max_alarm",
        [hwmon_power_lcrit_alarm] = "power%d_lcrit_alarm",
        [hwmon_power_crit_alarm] = "power%d_crit_alarm",
        [hwmon_power_rated_min] = "power%d_rated_min",
        [hwmon_power_rated_max] = "power%d_rated_max",
};

static const char * const hwmon_energy_attr_templates[] = {
        [hwmon_energy_enable] = "energy%d_enable",
        [hwmon_energy_input] = "energy%d_input",
        [hwmon_energy_label] = "energy%d_label",
};

static const char * const hwmon_humidity_attr_templates[] = {
        [hwmon_humidity_enable] = "humidity%d_enable",
        [hwmon_humidity_input] = "humidity%d_input",
        [hwmon_humidity_label] = "humidity%d_label",
        [hwmon_humidity_min] = "humidity%d_min",
        [hwmon_humidity_min_hyst] = "humidity%d_min_hyst",
        [hwmon_humidity_max] = "humidity%d_max",
        [hwmon_humidity_max_hyst] = "humidity%d_max_hyst",
        [hwmon_humidity_alarm] = "humidity%d_alarm",
        [hwmon_humidity_fault] = "humidity%d_fault",
        [hwmon_humidity_rated_min] = "humidity%d_rated_min",
        [hwmon_humidity_rated_max] = "humidity%d_rated_max",
        [hwmon_humidity_min_alarm] = "humidity%d_min_alarm",
        [hwmon_humidity_max_alarm] = "humidity%d_max_alarm",
};

static const char * const hwmon_fan_attr_templates[] = {
        [hwmon_fan_enable] = "fan%d_enable",
        [hwmon_fan_input] = "fan%d_input",
        [hwmon_fan_label] = "fan%d_label",
        [hwmon_fan_min] = "fan%d_min",
        [hwmon_fan_max] = "fan%d_max",
        [hwmon_fan_div] = "fan%d_div",
        [hwmon_fan_pulses] = "fan%d_pulses",
        [hwmon_fan_target] = "fan%d_target",
        [hwmon_fan_alarm] = "fan%d_alarm",
        [hwmon_fan_min_alarm] = "fan%d_min_alarm",
        [hwmon_fan_max_alarm] = "fan%d_max_alarm",
        [hwmon_fan_fault] = "fan%d_fault",
        [hwmon_fan_beep] = "fan%d_beep",
};

static const char * const hwmon_pwm_attr_templates[] = {
        [hwmon_pwm_input] = "pwm%d",
        [hwmon_pwm_enable] = "pwm%d_enable",
        [hwmon_pwm_mode] = "pwm%d_mode",
        [hwmon_pwm_freq] = "pwm%d_freq",
        [hwmon_pwm_auto_channels_temp] = "pwm%d_auto_channels_temp",
};

static const char * const hwmon_intrusion_attr_templates[] = {
        [hwmon_intrusion_alarm] = "intrusion%d_alarm",
        [hwmon_intrusion_beep]  = "intrusion%d_beep",
};

static const char * const *__templates[] = {
        [hwmon_chip] = hwmon_chip_attrs,
        [hwmon_temp] = hwmon_temp_attr_templates,
        [hwmon_in] = hwmon_in_attr_templates,
        [hwmon_curr] = hwmon_curr_attr_templates,
        [hwmon_power] = hwmon_power_attr_templates,
        [hwmon_energy] = hwmon_energy_attr_templates,
        [hwmon_humidity] = hwmon_humidity_attr_templates,
        [hwmon_fan] = hwmon_fan_attr_templates,
        [hwmon_pwm] = hwmon_pwm_attr_templates,
        [hwmon_intrusion] = hwmon_intrusion_attr_templates,
};

static const int __templates_size[] = {
        [hwmon_chip] = ARRAY_SIZE(hwmon_chip_attrs),
        [hwmon_temp] = ARRAY_SIZE(hwmon_temp_attr_templates),
        [hwmon_in] = ARRAY_SIZE(hwmon_in_attr_templates),
        [hwmon_curr] = ARRAY_SIZE(hwmon_curr_attr_templates),
        [hwmon_power] = ARRAY_SIZE(hwmon_power_attr_templates),
        [hwmon_energy] = ARRAY_SIZE(hwmon_energy_attr_templates),
        [hwmon_humidity] = ARRAY_SIZE(hwmon_humidity_attr_templates),
        [hwmon_fan] = ARRAY_SIZE(hwmon_fan_attr_templates),
        [hwmon_pwm] = ARRAY_SIZE(hwmon_pwm_attr_templates),
        [hwmon_intrusion] = ARRAY_SIZE(hwmon_intrusion_attr_templates),
};

int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type,
                       u32 attr, int channel)
{
        char event[MAX_SYSFS_ATTR_NAME_LENGTH + 5];
        char sattr[MAX_SYSFS_ATTR_NAME_LENGTH];
        char *envp[] = { event, NULL };
        const char * const *templates;
        const char *template;
        int base;

        if (type >= ARRAY_SIZE(__templates))
                return -EINVAL;
        if (attr >= __templates_size[type])
                return -EINVAL;

        templates = __templates[type];
        template = templates[attr];

        base = hwmon_attr_base(type);

        scnprintf(sattr, MAX_SYSFS_ATTR_NAME_LENGTH, template, base + channel);
        scnprintf(event, sizeof(event), "NAME=%s", sattr);
        sysfs_notify(&dev->kobj, NULL, sattr);
        kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);

        if (type == hwmon_temp)
                hwmon_thermal_notify(dev, channel);

        return 0;
}
EXPORT_SYMBOL_GPL(hwmon_notify_event);

static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info)
{
        int i, n;

        for (i = n = 0; info->config[i]; i++)
                n += hweight32(info->config[i]);

        return n;
}

static int hwmon_genattrs(const void *drvdata,
                          struct attribute **attrs,
                          const struct hwmon_ops *ops,
                          const struct hwmon_channel_info *info)
{
        const char * const *templates;
        int template_size;
        int i, aindex = 0;

        if (info->type >= ARRAY_SIZE(__templates))
                return -EINVAL;

        templates = __templates[info->type];
        template_size = __templates_size[info->type];

        for (i = 0; info->config[i]; i++) {
                u32 attr_mask = info->config[i];
                u32 attr;

                while (attr_mask) {
                        struct attribute *a;

                        attr = __ffs(attr_mask);
                        attr_mask &= ~BIT(attr);
                        if (attr >= template_size)
                                return -EINVAL;
                        a = hwmon_genattr(drvdata, info->type, attr, i,
                                          templates[attr], ops);
                        if (IS_ERR(a)) {
                                if (PTR_ERR(a) != -ENOENT)
                                        return PTR_ERR(a);
                                continue;
                        }
                        attrs[aindex++] = a;
                }
        }
        return aindex;
}

static struct attribute **
__hwmon_create_attrs(const void *drvdata, const struct hwmon_chip_info *chip)
{
        int ret, i, aindex = 0, nattrs = 0;
        struct attribute **attrs;

        for (i = 0; chip->info[i]; i++)
                nattrs += hwmon_num_channel_attrs(chip->info[i]);

        if (nattrs == 0)
                return ERR_PTR(-EINVAL);

        attrs = kcalloc(nattrs + 1, sizeof(*attrs), GFP_KERNEL);
        if (!attrs)
                return ERR_PTR(-ENOMEM);

        for (i = 0; chip->info[i]; i++) {
                ret = hwmon_genattrs(drvdata, &attrs[aindex], chip->ops,
                                     chip->info[i]);
                if (ret < 0) {
                        hwmon_free_attrs(attrs);
                        return ERR_PTR(ret);
                }
                aindex += ret;
        }

        return attrs;
}

static struct device *
__hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                        const struct hwmon_chip_info *chip,
                        const struct attribute_group **groups)
{
        struct hwmon_device *hwdev;
        const char *label;
        struct device *hdev;
        struct device *tdev = dev;
        int i, err, id;

        /* Complain about invalid characters in hwmon name attribute */
        if (name && (!strlen(name) || strpbrk(name, "-* \t\n")))
                dev_warn(dev,
                         "hwmon: '%s' is not a valid name attribute, please fix\n",
                         name);

        id = ida_alloc(&hwmon_ida, GFP_KERNEL);
        if (id < 0)
                return ERR_PTR(id);

        hwdev = kzalloc(sizeof(*hwdev), GFP_KERNEL);
        if (hwdev == NULL) {
                err = -ENOMEM;
                goto ida_remove;
        }

        hdev = &hwdev->dev;

        if (chip) {
                struct attribute **attrs;
                int ngroups = 2; /* terminating NULL plus &hwdev->groups */

                if (groups)
                        for (i = 0; groups[i]; i++)
                                ngroups++;

                hwdev->groups = kcalloc(ngroups, sizeof(*groups), GFP_KERNEL);
                if (!hwdev->groups) {
                        err = -ENOMEM;
                        goto free_hwmon;
                }

                attrs = __hwmon_create_attrs(drvdata, chip);
                if (IS_ERR(attrs)) {
                        err = PTR_ERR(attrs);
                        goto free_hwmon;
                }

                hwdev->group.attrs = attrs;
                ngroups = 0;
                hwdev->groups[ngroups++] = &hwdev->group;

                if (groups) {
                        for (i = 0; groups[i]; i++)
                                hwdev->groups[ngroups++] = groups[i];
                }

                hdev->groups = hwdev->groups;
        } else {
                hdev->groups = groups;
        }

        if (dev && device_property_present(dev, "label")) {
                err = device_property_read_string(dev, "label", &label);
                if (err < 0)
                        goto free_hwmon;

                hwdev->label = kstrdup(label, GFP_KERNEL);
                if (hwdev->label == NULL) {
                        err = -ENOMEM;
                        goto free_hwmon;
                }
        }

        hwdev->name = name;
        hdev->class = &hwmon_class;
        hdev->parent = dev;
        while (tdev && !tdev->of_node)
                tdev = tdev->parent;
        hdev->of_node = tdev ? tdev->of_node : NULL;
        hwdev->chip = chip;
        dev_set_drvdata(hdev, drvdata);
        dev_set_name(hdev, HWMON_ID_FORMAT, id);
        err = device_register(hdev);
        if (err) {
                put_device(hdev);
                goto ida_remove;
        }

        INIT_LIST_HEAD(&hwdev->tzdata);

        if (hdev->of_node && chip && chip->ops->read &&
            chip->info[0]->type == hwmon_chip &&
            (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) {
                err = hwmon_thermal_register_sensors(hdev);
                if (err) {
                        device_unregister(hdev);
                        /*
                         * Don't worry about hwdev; hwmon_dev_release(), called
                         * from device_unregister(), will free it.
                         */
                        goto ida_remove;
                }
        }

        return hdev;

free_hwmon:
        hwmon_dev_release(hdev);
ida_remove:
        ida_free(&hwmon_ida, id);
        return ERR_PTR(err);
}

/**
 * hwmon_device_register_with_groups - register w/ hwmon
 * @dev: the parent device
 * @name: hwmon name attribute
 * @drvdata: driver data to attach to created device
 * @groups: List of attribute groups to create
 *
 * hwmon_device_unregister() must be called when the device is no
 * longer needed.
 *
 * Returns the pointer to the new device.
 */
struct device *
hwmon_device_register_with_groups(struct device *dev, const char *name,
                                  void *drvdata,
                                  const struct attribute_group **groups)
{
        if (!name)
                return ERR_PTR(-EINVAL);

        return __hwmon_device_register(dev, name, drvdata, NULL, groups);
}
EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups);

/**
 * hwmon_device_register_with_info - register w/ hwmon
 * @dev: the parent device (mandatory)
 * @name: hwmon name attribute (mandatory)
 * @drvdata: driver data to attach to created device (optional)
 * @chip: pointer to hwmon chip information (mandatory)
 * @extra_groups: pointer to list of additional non-standard attribute groups
 *        (optional)
 *
 * hwmon_device_unregister() must be called when the device is no
 * longer needed.
 *
 * Returns the pointer to the new device.
 */
struct device *
hwmon_device_register_with_info(struct device *dev, const char *name,
                                void *drvdata,
                                const struct hwmon_chip_info *chip,
                                const struct attribute_group **extra_groups)
{
        if (!dev || !name || !chip)
                return ERR_PTR(-EINVAL);

        if (!chip->ops || !chip->ops->is_visible || !chip->info)
                return ERR_PTR(-EINVAL);

        return __hwmon_device_register(dev, name, drvdata, chip, extra_groups);
}
EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);

/**
 * hwmon_device_register_for_thermal - register hwmon device for thermal subsystem
 * @dev: the parent device
 * @name: hwmon name attribute
 * @drvdata: driver data to attach to created device
 *
 * The use of this function is restricted. It is provided for legacy reasons
 * and must only be called from the thermal subsystem.
 *
 * hwmon_device_unregister() must be called when the device is no
 * longer needed.
 *
 * Returns the pointer to the new device.
 */
struct device *
hwmon_device_register_for_thermal(struct device *dev, const char *name,
                                  void *drvdata)
{
        if (!name || !dev)
                return ERR_PTR(-EINVAL);

        return __hwmon_device_register(dev, name, drvdata, NULL, NULL);
}
EXPORT_SYMBOL_NS_GPL(hwmon_device_register_for_thermal, HWMON_THERMAL);

/**
 * hwmon_device_register - register w/ hwmon
 * @dev: the device to register
 *
 * hwmon_device_unregister() must be called when the device is no
 * longer needed.
 *
 * Returns the pointer to the new device.
 */
struct device *hwmon_device_register(struct device *dev)
{
        dev_warn(dev,
                 "hwmon_device_register() is deprecated. Please convert the driver to use hwmon_device_register_with_info().\n");

        return __hwmon_device_register(dev, NULL, NULL, NULL, NULL);
}
EXPORT_SYMBOL_GPL(hwmon_device_register);

/**
 * hwmon_device_unregister - removes the previously registered class device
 *
 * @dev: the class device to destroy
 */
void hwmon_device_unregister(struct device *dev)
{
        int id;

        if (likely(sscanf(dev_name(dev), HWMON_ID_FORMAT, &id) == 1)) {
                device_unregister(dev);
                ida_free(&hwmon_ida, id);
        } else
                dev_dbg(dev->parent,
                        "hwmon_device_unregister() failed: bad class ID!\n");
}
EXPORT_SYMBOL_GPL(hwmon_device_unregister);

static void devm_hwmon_release(struct device *dev, void *res)
{
        struct device *hwdev = *(struct device **)res;

        hwmon_device_unregister(hwdev);
}

/**
 * devm_hwmon_device_register_with_groups - register w/ hwmon
 * @dev: the parent device
 * @name: hwmon name attribute
 * @drvdata: driver data to attach to created device
 * @groups: List of attribute groups to create
 *
 * Returns the pointer to the new device. The new device is automatically
 * unregistered with the parent device.
 */
struct device *
devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
                                       void *drvdata,
                                       const struct attribute_group **groups)
{
        struct device **ptr, *hwdev;

        if (!dev)
                return ERR_PTR(-EINVAL);

        ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        hwdev = hwmon_device_register_with_groups(dev, name, drvdata, groups);
        if (IS_ERR(hwdev))
                goto error;

        *ptr = hwdev;
        devres_add(dev, ptr);
        return hwdev;

error:
        devres_free(ptr);
        return hwdev;
}
EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups);

/**
 * devm_hwmon_device_register_with_info - register w/ hwmon
 * @dev:        the parent device
 * @name:        hwmon name attribute
 * @drvdata:        driver data to attach to created device
 * @chip:        pointer to hwmon chip information
 * @extra_groups: pointer to list of driver specific attribute groups
 *
 * Returns the pointer to the new device. The new device is automatically
 * unregistered with the parent device.
 */
struct device *
devm_hwmon_device_register_with_info(struct device *dev, const char *name,
                                     void *drvdata,
                                     const struct hwmon_chip_info *chip,
                                     const struct attribute_group **extra_groups)
{
        struct device **ptr, *hwdev;

        if (!dev)
                return ERR_PTR(-EINVAL);

        ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL);
        if (!ptr)
                return ERR_PTR(-ENOMEM);

        hwdev = hwmon_device_register_with_info(dev, name, drvdata, chip,
                                                extra_groups);
        if (IS_ERR(hwdev))
                goto error;

        *ptr = hwdev;
        devres_add(dev, ptr);

        return hwdev;

error:
        devres_free(ptr);
        return hwdev;
}
EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_info);

static int devm_hwmon_match(struct device *dev, void *res, void *data)
{
        struct device **hwdev = res;

        return *hwdev == data;
}

/**
 * devm_hwmon_device_unregister - removes a previously registered hwmon device
 *
 * @dev: the parent device of the device to unregister
 */
void devm_hwmon_device_unregister(struct device *dev)
{
        WARN_ON(devres_release(dev, devm_hwmon_release, devm_hwmon_match, dev));
}
EXPORT_SYMBOL_GPL(devm_hwmon_device_unregister);

static char *__hwmon_sanitize_name(struct device *dev, const char *old_name)
{
        char *name, *p;

        if (dev)
                name = devm_kstrdup(dev, old_name, GFP_KERNEL);
        else
                name = kstrdup(old_name, GFP_KERNEL);
        if (!name)
                return ERR_PTR(-ENOMEM);

        for (p = name; *p; p++)
                if (hwmon_is_bad_char(*p))
                        *p = '_';

        return name;
}

/**
 * hwmon_sanitize_name - Replaces invalid characters in a hwmon name
 * @name: NUL-terminated name
 *
 * Allocates a new string where any invalid characters will be replaced
 * by an underscore. It is the responsibility of the caller to release
 * the memory.
 *
 * Returns newly allocated name, or ERR_PTR on error.
 */
char *hwmon_sanitize_name(const char *name)
{
        return __hwmon_sanitize_name(NULL, name);
}
EXPORT_SYMBOL_GPL(hwmon_sanitize_name);

/**
 * devm_hwmon_sanitize_name - resource managed hwmon_sanitize_name()
 * @dev: device to allocate memory for
 * @name: NUL-terminated name
 *
 * Allocates a new string where any invalid characters will be replaced
 * by an underscore.
 *
 * Returns newly allocated name, or ERR_PTR on error.
 */
char *devm_hwmon_sanitize_name(struct device *dev, const char *name)
{
        if (!dev)
                return ERR_PTR(-EINVAL);

        return __hwmon_sanitize_name(dev, name);
}
EXPORT_SYMBOL_GPL(devm_hwmon_sanitize_name);

static void __init hwmon_pci_quirks(void)
{
#if defined CONFIG_X86 && defined CONFIG_PCI
        struct pci_dev *sb;
        u16 base;
        u8 enable;

        /* Open access to 0x295-0x296 on MSI MS-7031 */
        sb = pci_get_device(PCI_VENDOR_ID_ATI, 0x436c, NULL);
        if (sb) {
                if (sb->subsystem_vendor == 0x1462 &&        /* MSI */
                    sb->subsystem_device == 0x0031) {        /* MS-7031 */
                        pci_read_config_byte(sb, 0x48, &enable);
                        pci_read_config_word(sb, 0x64, &base);

                        if (base == 0 && !(enable & BIT(2))) {
                                dev_info(&sb->dev,
                                         "Opening wide generic port at 0x295\n");
                                pci_write_config_word(sb, 0x64, 0x295);
                                pci_write_config_byte(sb, 0x48,
                                                      enable | BIT(2));
                        }
                }
                pci_dev_put(sb);
        }
#endif
}

static int __init hwmon_init(void)
{
        int err;

        hwmon_pci_quirks();

        err = class_register(&hwmon_class);
        if (err) {
                pr_err("couldn't register hwmon sysfs class\n");
                return err;
        }
        return 0;
}

static void __exit hwmon_exit(void)
{
        class_unregister(&hwmon_class);
}

subsys_initcall(hwmon_init);
module_exit(hwmon_exit);

MODULE_AUTHOR("Mark M. Hoffman <mhoffman@lightlink.com>");
MODULE_DESCRIPTION("hardware monitoring sysfs/class support");
MODULE_LICENSE("GPL");












































































































































































































































































































































































































    6 
    6 













    6 

































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Anycast support for IPv6
 *        Linux INET6 implementation
 *
 *        Authors:
 *        David L Stevens (dlstevens@us.ibm.com)
 *
 *        based heavily on net/ipv6/mcast.c
 */

#include <linux/capability.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/random.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/route.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>

#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>

#include <net/checksum.h>

#define IN6_ADDR_HSIZE_SHIFT        8
#define IN6_ADDR_HSIZE                BIT(IN6_ADDR_HSIZE_SHIFT)
/*        anycast address hash table
 */
static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(acaddr_hash_lock);

static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);

static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
{
        u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);

        return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}

/*
 *        socket join an anycast group
 */

int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net_device *dev = NULL;
        struct inet6_dev *idev;
        struct ipv6_ac_socklist *pac;
        struct net *net = sock_net(sk);
        int        ishost = !net->ipv6.devconf_all->forwarding;
        int        err = 0;

        ASSERT_RTNL();

        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
        if (ipv6_addr_is_multicast(addr))
                return -EINVAL;

        if (ifindex)
                dev = __dev_get_by_index(net, ifindex);

        if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
                return -EINVAL;

        pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
        if (!pac)
                return -ENOMEM;
        pac->acl_next = NULL;
        pac->acl_addr = *addr;

        if (ifindex == 0) {
                struct rt6_info *rt;

                rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
                } else if (ishost) {
                        err = -EADDRNOTAVAIL;
                        goto error;
                } else {
                        /* router, no matching interface: just pick one */
                        dev = __dev_get_by_flags(net, IFF_UP,
                                                 IFF_UP | IFF_LOOPBACK);
                }
        }

        if (!dev) {
                err = -ENODEV;
                goto error;
        }

        idev = __in6_dev_get(dev);
        if (!idev) {
                if (ifindex)
                        err = -ENODEV;
                else
                        err = -EADDRNOTAVAIL;
                goto error;
        }
        /* reset ishost, now that we have a specific device */
        ishost = !idev->cnf.forwarding;

        pac->acl_ifindex = dev->ifindex;

        /* XXX
         * For hosts, allow link-local or matching prefix anycasts.
         * This obviates the need for propagating anycast routes while
         * still allowing some non-router anycast participation.
         */
        if (!ipv6_chk_prefix(addr, dev)) {
                if (ishost)
                        err = -EADDRNOTAVAIL;
                if (err)
                        goto error;
        }

        err = __ipv6_dev_ac_inc(idev, addr);
        if (!err) {
                pac->acl_next = np->ipv6_ac_list;
                np->ipv6_ac_list = pac;
                pac = NULL;
        }

error:
        if (pac)
                sock_kfree_s(sk, pac, sizeof(*pac));
        return err;
}

/*
 *        socket leave an anycast group
 */
int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net_device *dev;
        struct ipv6_ac_socklist *pac, *prev_pac;
        struct net *net = sock_net(sk);

        ASSERT_RTNL();

        prev_pac = NULL;
        for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
                if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
                     ipv6_addr_equal(&pac->acl_addr, addr))
                        break;
                prev_pac = pac;
        }
        if (!pac)
                return -ENOENT;
        if (prev_pac)
                prev_pac->acl_next = pac->acl_next;
        else
                np->ipv6_ac_list = pac->acl_next;

        dev = __dev_get_by_index(net, pac->acl_ifindex);
        if (dev)
                ipv6_dev_ac_dec(dev, &pac->acl_addr);

        sock_kfree_s(sk, pac, sizeof(*pac));
        return 0;
}

void __ipv6_sock_ac_close(struct sock *sk)
{
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net_device *dev = NULL;
        struct ipv6_ac_socklist *pac;
        struct net *net = sock_net(sk);
        int        prev_index;

        ASSERT_RTNL();
        pac = np->ipv6_ac_list;
        np->ipv6_ac_list = NULL;

        prev_index = 0;
        while (pac) {
                struct ipv6_ac_socklist *next = pac->acl_next;

                if (pac->acl_ifindex != prev_index) {
                        dev = __dev_get_by_index(net, pac->acl_ifindex);
                        prev_index = pac->acl_ifindex;
                }
                if (dev)
                        ipv6_dev_ac_dec(dev, &pac->acl_addr);
                sock_kfree_s(sk, pac, sizeof(*pac));
                pac = next;
        }
}

void ipv6_sock_ac_close(struct sock *sk)
{
        struct ipv6_pinfo *np = inet6_sk(sk);

        if (!np->ipv6_ac_list)
                return;
        rtnl_lock();
        __ipv6_sock_ac_close(sk);
        rtnl_unlock();
}

static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
{
        unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);

        spin_lock(&acaddr_hash_lock);
        hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
        spin_unlock(&acaddr_hash_lock);
}

static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
{
        spin_lock(&acaddr_hash_lock);
        hlist_del_init_rcu(&aca->aca_addr_lst);
        spin_unlock(&acaddr_hash_lock);
}

static void aca_get(struct ifacaddr6 *aca)
{
        refcount_inc(&aca->aca_refcnt);
}

static void aca_free_rcu(struct rcu_head *h)
{
        struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);

        fib6_info_release(aca->aca_rt);
        kfree(aca);
}

static void aca_put(struct ifacaddr6 *ac)
{
        if (refcount_dec_and_test(&ac->aca_refcnt)) {
                call_rcu(&ac->rcu, aca_free_rcu);
        }
}

static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
                                   const struct in6_addr *addr)
{
        struct ifacaddr6 *aca;

        aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
        if (!aca)
                return NULL;

        aca->aca_addr = *addr;
        fib6_info_hold(f6i);
        aca->aca_rt = f6i;
        INIT_HLIST_NODE(&aca->aca_addr_lst);
        aca->aca_users = 1;
        /* aca_tstamp should be updated upon changes */
        aca->aca_cstamp = aca->aca_tstamp = jiffies;
        refcount_set(&aca->aca_refcnt, 1);

        return aca;
}

/*
 *        device anycast group inc (add if not found)
 */
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
{
        struct ifacaddr6 *aca;
        struct fib6_info *f6i;
        struct net *net;
        int err;

        ASSERT_RTNL();

        write_lock_bh(&idev->lock);
        if (idev->dead) {
                err = -ENODEV;
                goto out;
        }

        for (aca = rtnl_dereference(idev->ac_list); aca;
             aca = rtnl_dereference(aca->aca_next)) {
                if (ipv6_addr_equal(&aca->aca_addr, addr)) {
                        aca->aca_users++;
                        err = 0;
                        goto out;
                }
        }

        net = dev_net(idev->dev);
        f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL);
        if (IS_ERR(f6i)) {
                err = PTR_ERR(f6i);
                goto out;
        }
        aca = aca_alloc(f6i, addr);
        if (!aca) {
                fib6_info_release(f6i);
                err = -ENOMEM;
                goto out;
        }

        /* Hold this for addrconf_join_solict() below before we unlock,
         * it is already exposed via idev->ac_list.
         */
        aca_get(aca);
        aca->aca_next = idev->ac_list;
        rcu_assign_pointer(idev->ac_list, aca);

        write_unlock_bh(&idev->lock);

        ipv6_add_acaddr_hash(net, aca);

        ip6_ins_rt(net, f6i);

        addrconf_join_solict(idev->dev, &aca->aca_addr);

        aca_put(aca);
        return 0;
out:
        write_unlock_bh(&idev->lock);
        return err;
}

/*
 *        device anycast group decrement
 */
int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
        struct ifacaddr6 *aca, *prev_aca;

        ASSERT_RTNL();

        write_lock_bh(&idev->lock);
        prev_aca = NULL;
        for (aca = rtnl_dereference(idev->ac_list); aca;
             aca = rtnl_dereference(aca->aca_next)) {
                if (ipv6_addr_equal(&aca->aca_addr, addr))
                        break;
                prev_aca = aca;
        }
        if (!aca) {
                write_unlock_bh(&idev->lock);
                return -ENOENT;
        }
        if (--aca->aca_users > 0) {
                write_unlock_bh(&idev->lock);
                return 0;
        }
        if (prev_aca)
                rcu_assign_pointer(prev_aca->aca_next, aca->aca_next);
        else
                rcu_assign_pointer(idev->ac_list, aca->aca_next);
        write_unlock_bh(&idev->lock);
        ipv6_del_acaddr_hash(aca);
        addrconf_leave_solict(idev, &aca->aca_addr);

        ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);

        aca_put(aca);
        return 0;
}

/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
        struct inet6_dev *idev = __in6_dev_get(dev);

        if (!idev)
                return -ENODEV;
        return __ipv6_dev_ac_dec(idev, addr);
}

void ipv6_ac_destroy_dev(struct inet6_dev *idev)
{
        struct ifacaddr6 *aca;

        write_lock_bh(&idev->lock);
        while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
                rcu_assign_pointer(idev->ac_list, aca->aca_next);
                write_unlock_bh(&idev->lock);

                ipv6_del_acaddr_hash(aca);

                addrconf_leave_solict(idev, &aca->aca_addr);

                ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);

                aca_put(aca);

                write_lock_bh(&idev->lock);
        }
        write_unlock_bh(&idev->lock);
}

/*
 *        check if the interface has this anycast address
 *        called with rcu_read_lock()
 */
static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
{
        struct inet6_dev *idev;
        struct ifacaddr6 *aca;

        idev = __in6_dev_get(dev);
        if (idev) {
                for (aca = rcu_dereference(idev->ac_list); aca;
                     aca = rcu_dereference(aca->aca_next))
                        if (ipv6_addr_equal(&aca->aca_addr, addr))
                                break;
                return aca != NULL;
        }
        return false;
}

/*
 *        check if given interface (or any, if dev==0) has this anycast address
 */
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
                         const struct in6_addr *addr)
{
        struct net_device *nh_dev;
        struct ifacaddr6 *aca;
        bool found = false;

        rcu_read_lock();
        if (dev)
                found = ipv6_chk_acast_dev(dev, addr);
        else {
                unsigned int hash = inet6_acaddr_hash(net, addr);

                hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
                                         aca_addr_lst) {
                        nh_dev = fib6_info_nh_dev(aca->aca_rt);
                        if (!nh_dev || !net_eq(dev_net(nh_dev), net))
                                continue;
                        if (ipv6_addr_equal(&aca->aca_addr, addr)) {
                                found = true;
                                break;
                        }
                }
        }
        rcu_read_unlock();
        return found;
}

/*        check if this anycast address is link-local on given interface or
 *        is global
 */
bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
                             const struct in6_addr *addr)
{
        return ipv6_chk_acast_addr(net,
                                   (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
                                    dev : NULL),
                                   addr);
}

#ifdef CONFIG_PROC_FS
struct ac6_iter_state {
        struct seq_net_private p;
        struct net_device *dev;
};

#define ac6_seq_private(seq)        ((struct ac6_iter_state *)(seq)->private)

static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
{
        struct ac6_iter_state *state = ac6_seq_private(seq);
        struct net *net = seq_file_net(seq);
        struct ifacaddr6 *im = NULL;

        for_each_netdev_rcu(net, state->dev) {
                struct inet6_dev *idev;

                idev = __in6_dev_get(state->dev);
                if (!idev)
                        continue;
                im = rcu_dereference(idev->ac_list);
                if (im)
                        break;
        }
        return im;
}

static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
{
        struct ac6_iter_state *state = ac6_seq_private(seq);
        struct inet6_dev *idev;

        im = rcu_dereference(im->aca_next);
        while (!im) {
                state->dev = next_net_device_rcu(state->dev);
                if (!state->dev)
                        break;
                idev = __in6_dev_get(state->dev);
                if (!idev)
                        continue;
                im = rcu_dereference(idev->ac_list);
        }
        return im;
}

static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
{
        struct ifacaddr6 *im = ac6_get_first(seq);
        if (im)
                while (pos && (im = ac6_get_next(seq, im)) != NULL)
                        --pos;
        return pos ? NULL : im;
}

static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        rcu_read_lock();
        return ac6_get_idx(seq, *pos);
}

static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct ifacaddr6 *im = ac6_get_next(seq, v);

        ++*pos;
        return im;
}

static void ac6_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        rcu_read_unlock();
}

static int ac6_seq_show(struct seq_file *seq, void *v)
{
        struct ifacaddr6 *im = (struct ifacaddr6 *)v;
        struct ac6_iter_state *state = ac6_seq_private(seq);

        seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
                   state->dev->ifindex, state->dev->name,
                   &im->aca_addr, im->aca_users);
        return 0;
}

static const struct seq_operations ac6_seq_ops = {
        .start        =        ac6_seq_start,
        .next        =        ac6_seq_next,
        .stop        =        ac6_seq_stop,
        .show        =        ac6_seq_show,
};

int __net_init ac6_proc_init(struct net *net)
{
        if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
                        sizeof(struct ac6_iter_state)))
                return -ENOMEM;

        return 0;
}

void ac6_proc_exit(struct net *net)
{
        remove_proc_entry("anycast6", net->proc_net);
}
#endif

/*        Init / cleanup code
 */
int __init ipv6_anycast_init(void)
{
        int i;

        for (i = 0; i < IN6_ADDR_HSIZE; i++)
                INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
        return 0;
}

void ipv6_anycast_cleanup(void)
{
        int i;

        spin_lock(&acaddr_hash_lock);
        for (i = 0; i < IN6_ADDR_HSIZE; i++)
                WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
        spin_unlock(&acaddr_hash_lock);
}




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _DELAYED_CALL_H
#define _DELAYED_CALL_H

/*
 * Poor man's closures; I wish we could've done them sanely polymorphic,
 * but...
 */

struct delayed_call {
        void (*fn)(void *);
        void *arg;
};

#define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL}

/* I really wish we had closures with sane typechecking... */
static inline void set_delayed_call(struct delayed_call *call,
                void (*fn)(void *), void *arg)
{
        call->fn = fn;
        call->arg = arg;
}

static inline void do_delayed_call(struct delayed_call *call)
{
        if (call->fn)
                call->fn(call->arg);
}

static inline void clear_delayed_call(struct delayed_call *call)
{
        call->fn = NULL;
}
#endif
















































































  307 

   24 







































































































































































































































































    4 

    4 
    4 
    4 










    4 

    4 

    4 
    4 




    4 

    4 

    4 
    4 









    4 

    4 

    4 
    4 




    4 

    4 
    4 













    4 


    4 

    4 






















    4 
    4 

    4 




    4 
    4 






































    8 



    8 



    8 

    8 


    8 













    8 



    8 
    8 

    7 










    8 



    8 
    8 

    8 















    8 















    8 
    8 

    8 


    8 


    8 




    8 














    8 



    8 

    8 







    8 





































































































   12 



   24 










   12 











   12 









   12 



   12 


   12 


   12 

   12 


   12 


   12 

   12 


   12 


   12 




















































































































    4 











   12 














    8 













    4 
    4 
























    8 







    8 
    8 


    8 




















    4 


    4 
    4 
    1 
    4 
    4 


































































































   11 














   12 






   12 
   11 
   12 
   12 






   12 

   12 































    8 
   12 
    7 



   12 




















   12 







   12 













   12 








    4 






    4 





    8 
































































    8 









   12 



   12 

   12 












































































































































    8 
    8 

    8 


    8 



    8 


















    8 




    8 
    8 

    8 



    8 







    8 










    8 

    8 


    8 







    8 
































































    8 
    8 



    8 



















    8 









    8 









    8 
































































    8 

    8 















    8 
    8 




    8 



    8 































    8 
    8 


    8 














































    8 
    3 

    7 

    3 









































    8 













































    8 
    8 


    8 
    8 
    8 



















    8 





    8 
    8 

    8 


    8 









    8 

    8 













    8 
    8 
    8 

    7 


    8 





    8 


    8 











    8 
    7 

    7 






























































    4 












































































































































































































































    4 








    4 







    4 

    4 



    4 


    4 













    4 
















































    4 

    4 


    4 
    4 
    4 


    4 
































































































































































































































































































































































































































































































































































































































































    8 


























    8 
    8 



    8 



    8 



    8 
    8 

    8 





    8 









    8 













































































    4 





    4 
    4 



    4 
    4 

    4 

    4 










































































































    4 




    4 

    4 




    4 






    4 

    4 
    4 


    4 




    4 


    1 





































































































































































    7 








    7 







    8 








    8 




















    8 
    8 


    8 
    8 


    8 
















    8 







    8 
    8 

    8 










    8 

    7 
    8 

    8 

    8 




    8 




    8 


    8 










    8 


    8 






    8 







    8 





















    8 

    8 



    7 





    8 

    8 


    8 






    8 














































    8 
    8 

    8 






    8 






















    8 


















    8 




















    8 











    8 
    8 



    8 

























































































































    8 























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  Copyright (C) 1993  Linus Torvalds
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
 *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
 *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
 *  Numa awareness, Christoph Lameter, SGI, June 2005
 *  Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019
 */

#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/set_memory.h>
#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
#include <linux/list.h>
#include <linux/notifier.h>
#include <linux/rbtree.h>
#include <linux/xarray.h>
#include <linux/io.h>
#include <linux/rcupdate.h>
#include <linux/pfn.h>
#include <linux/kmemleak.h>
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/memcontrol.h>
#include <linux/llist.h>
#include <linux/uio.h>
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
#include <linux/overflow.h>
#include <linux/pgtable.h>
#include <linux/hugetlb.h>
#include <linux/sched/mm.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>

#define CREATE_TRACE_POINTS
#include <trace/events/vmalloc.h>

#include "internal.h"
#include "pgalloc-track.h"

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;

static int __init set_nohugeiomap(char *str)
{
        ioremap_max_page_shift = PAGE_SHIFT;
        return 0;
}
early_param("nohugeiomap", set_nohugeiomap);
#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
#endif        /* CONFIG_HAVE_ARCH_HUGE_VMAP */

#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
static bool __ro_after_init vmap_allow_huge = true;

static int __init set_nohugevmalloc(char *str)
{
        vmap_allow_huge = false;
        return 0;
}
early_param("nohugevmalloc", set_nohugevmalloc);
#else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
static const bool vmap_allow_huge = false;
#endif        /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */

bool is_vmalloc_addr(const void *x)
{
        unsigned long addr = (unsigned long)kasan_reset_tag(x);

        return addr >= VMALLOC_START && addr < VMALLOC_END;
}
EXPORT_SYMBOL(is_vmalloc_addr);

struct vfree_deferred {
        struct llist_head list;
        struct work_struct wq;
};
static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);

/*** Page table manipulation functions ***/
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
        pte_t *pte;
        u64 pfn;
        unsigned long size = PAGE_SIZE;

        pfn = phys_addr >> PAGE_SHIFT;
        pte = pte_alloc_kernel_track(pmd, addr, mask);
        if (!pte)
                return -ENOMEM;
        do {
                BUG_ON(!pte_none(ptep_get(pte)));

#ifdef CONFIG_HUGETLB_PAGE
                size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
                if (size != PAGE_SIZE) {
                        pte_t entry = pfn_pte(pfn, prot);

                        entry = arch_make_huge_pte(entry, ilog2(size), 0);
                        set_huge_pte_at(&init_mm, addr, pte, entry, size);
                        pfn += PFN_DOWN(size);
                        continue;
                }
#endif
                set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte += PFN_DOWN(size), addr += size, addr != end);
        *mask |= PGTBL_PTE_MODIFIED;
        return 0;
}

static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift)
{
        if (max_page_shift < PMD_SHIFT)
                return 0;

        if (!arch_vmap_pmd_supported(prot))
                return 0;

        if ((end - addr) != PMD_SIZE)
                return 0;

        if (!IS_ALIGNED(addr, PMD_SIZE))
                return 0;

        if (!IS_ALIGNED(phys_addr, PMD_SIZE))
                return 0;

        if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
                return 0;

        return pmd_set_huge(pmd, phys_addr, prot);
}

static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
        pmd_t *pmd;
        unsigned long next;

        pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
        if (!pmd)
                return -ENOMEM;
        do {
                next = pmd_addr_end(addr, end);

                if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
                                        max_page_shift)) {
                        *mask |= PGTBL_PMD_MODIFIED;
                        continue;
                }

                if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
                        return -ENOMEM;
        } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
        return 0;
}

static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift)
{
        if (max_page_shift < PUD_SHIFT)
                return 0;

        if (!arch_vmap_pud_supported(prot))
                return 0;

        if ((end - addr) != PUD_SIZE)
                return 0;

        if (!IS_ALIGNED(addr, PUD_SIZE))
                return 0;

        if (!IS_ALIGNED(phys_addr, PUD_SIZE))
                return 0;

        if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
                return 0;

        return pud_set_huge(pud, phys_addr, prot);
}

static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
        pud_t *pud;
        unsigned long next;

        pud = pud_alloc_track(&init_mm, p4d, addr, mask);
        if (!pud)
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);

                if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
                                        max_page_shift)) {
                        *mask |= PGTBL_PUD_MODIFIED;
                        continue;
                }

                if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
                                        max_page_shift, mask))
                        return -ENOMEM;
        } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
        return 0;
}

static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift)
{
        if (max_page_shift < P4D_SHIFT)
                return 0;

        if (!arch_vmap_p4d_supported(prot))
                return 0;

        if ((end - addr) != P4D_SIZE)
                return 0;

        if (!IS_ALIGNED(addr, P4D_SIZE))
                return 0;

        if (!IS_ALIGNED(phys_addr, P4D_SIZE))
                return 0;

        if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
                return 0;

        return p4d_set_huge(p4d, phys_addr, prot);
}

static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
        p4d_t *p4d;
        unsigned long next;

        p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
        if (!p4d)
                return -ENOMEM;
        do {
                next = p4d_addr_end(addr, end);

                if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
                                        max_page_shift)) {
                        *mask |= PGTBL_P4D_MODIFIED;
                        continue;
                }

                if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
                                        max_page_shift, mask))
                        return -ENOMEM;
        } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
        return 0;
}

static int vmap_range_noflush(unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift)
{
        pgd_t *pgd;
        unsigned long start;
        unsigned long next;
        int err;
        pgtbl_mod_mask mask = 0;

        might_sleep();
        BUG_ON(addr >= end);

        start = addr;
        pgd = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
                err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
                                        max_page_shift, &mask);
                if (err)
                        break;
        } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);

        if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
                arch_sync_kernel_mappings(start, end);

        return err;
}

int vmap_page_range(unsigned long addr, unsigned long end,
                    phys_addr_t phys_addr, pgprot_t prot)
{
        int err;

        err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
                                 ioremap_max_page_shift);
        flush_cache_vmap(addr, end);
        if (!err)
                err = kmsan_ioremap_page_range(addr, end, phys_addr, prot,
                                               ioremap_max_page_shift);
        return err;
}

int ioremap_page_range(unsigned long addr, unsigned long end,
                phys_addr_t phys_addr, pgprot_t prot)
{
        struct vm_struct *area;

        area = find_vm_area((void *)addr);
        if (!area || !(area->flags & VM_IOREMAP)) {
                WARN_ONCE(1, "vm_area at addr %lx is not marked as VM_IOREMAP\n", addr);
                return -EINVAL;
        }
        if (addr != (unsigned long)area->addr ||
            (void *)end != area->addr + get_vm_area_size(area)) {
                WARN_ONCE(1, "ioremap request [%lx,%lx) doesn't match vm_area [%lx, %lx)\n",
                          addr, end, (long)area->addr,
                          (long)area->addr + get_vm_area_size(area));
                return -ERANGE;
        }
        return vmap_page_range(addr, end, phys_addr, prot);
}

static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
{
        pte_t *pte;

        pte = pte_offset_kernel(pmd, addr);
        do {
                pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
                WARN_ON(!pte_none(ptent) && !pte_present(ptent));
        } while (pte++, addr += PAGE_SIZE, addr != end);
        *mask |= PGTBL_PTE_MODIFIED;
}

static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
{
        pmd_t *pmd;
        unsigned long next;
        int cleared;

        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);

                cleared = pmd_clear_huge(pmd);
                if (cleared || pmd_bad(*pmd))
                        *mask |= PGTBL_PMD_MODIFIED;

                if (cleared)
                        continue;
                if (pmd_none_or_clear_bad(pmd))
                        continue;
                vunmap_pte_range(pmd, addr, next, mask);

                cond_resched();
        } while (pmd++, addr = next, addr != end);
}

static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
{
        pud_t *pud;
        unsigned long next;
        int cleared;

        pud = pud_offset(p4d, addr);
        do {
                next = pud_addr_end(addr, end);

                cleared = pud_clear_huge(pud);
                if (cleared || pud_bad(*pud))
                        *mask |= PGTBL_PUD_MODIFIED;

                if (cleared)
                        continue;
                if (pud_none_or_clear_bad(pud))
                        continue;
                vunmap_pmd_range(pud, addr, next, mask);
        } while (pud++, addr = next, addr != end);
}

static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
{
        p4d_t *p4d;
        unsigned long next;

        p4d = p4d_offset(pgd, addr);
        do {
                next = p4d_addr_end(addr, end);

                p4d_clear_huge(p4d);
                if (p4d_bad(*p4d))
                        *mask |= PGTBL_P4D_MODIFIED;

                if (p4d_none_or_clear_bad(p4d))
                        continue;
                vunmap_pud_range(p4d, addr, next, mask);
        } while (p4d++, addr = next, addr != end);
}

/*
 * vunmap_range_noflush is similar to vunmap_range, but does not
 * flush caches or TLBs.
 *
 * The caller is responsible for calling flush_cache_vmap() before calling
 * this function, and flush_tlb_kernel_range after it has returned
 * successfully (and before the addresses are expected to cause a page fault
 * or be re-mapped for something else, if TLB flushes are being delayed or
 * coalesced).
 *
 * This is an internal function only. Do not use outside mm/.
 */
void __vunmap_range_noflush(unsigned long start, unsigned long end)
{
        unsigned long next;
        pgd_t *pgd;
        unsigned long addr = start;
        pgtbl_mod_mask mask = 0;

        BUG_ON(addr >= end);
        pgd = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_bad(*pgd))
                        mask |= PGTBL_PGD_MODIFIED;
                if (pgd_none_or_clear_bad(pgd))
                        continue;
                vunmap_p4d_range(pgd, addr, next, &mask);
        } while (pgd++, addr = next, addr != end);

        if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
                arch_sync_kernel_mappings(start, end);
}

void vunmap_range_noflush(unsigned long start, unsigned long end)
{
        kmsan_vunmap_range_noflush(start, end);
        __vunmap_range_noflush(start, end);
}

/**
 * vunmap_range - unmap kernel virtual addresses
 * @addr: start of the VM area to unmap
 * @end: end of the VM area to unmap (non-inclusive)
 *
 * Clears any present PTEs in the virtual address range, flushes TLBs and
 * caches. Any subsequent access to the address before it has been re-mapped
 * is a kernel bug.
 */
void vunmap_range(unsigned long addr, unsigned long end)
{
        flush_cache_vunmap(addr, end);
        vunmap_range_noflush(addr, end);
        flush_tlb_kernel_range(addr, end);
}

static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
                unsigned long end, pgprot_t prot, struct page **pages, int *nr,
                pgtbl_mod_mask *mask)
{
        pte_t *pte;

        /*
         * nr is a running index into the array which helps higher level
         * callers keep track of where we're up to.
         */

        pte = pte_alloc_kernel_track(pmd, addr, mask);
        if (!pte)
                return -ENOMEM;
        do {
                struct page *page = pages[*nr];

                if (WARN_ON(!pte_none(ptep_get(pte))))
                        return -EBUSY;
                if (WARN_ON(!page))
                        return -ENOMEM;
                if (WARN_ON(!pfn_valid(page_to_pfn(page))))
                        return -EINVAL;

                set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
                (*nr)++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        *mask |= PGTBL_PTE_MODIFIED;
        return 0;
}

static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
                unsigned long end, pgprot_t prot, struct page **pages, int *nr,
                pgtbl_mod_mask *mask)
{
        pmd_t *pmd;
        unsigned long next;

        pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
        if (!pmd)
                return -ENOMEM;
        do {
                next = pmd_addr_end(addr, end);
                if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
                        return -ENOMEM;
        } while (pmd++, addr = next, addr != end);
        return 0;
}

static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
                unsigned long end, pgprot_t prot, struct page **pages, int *nr,
                pgtbl_mod_mask *mask)
{
        pud_t *pud;
        unsigned long next;

        pud = pud_alloc_track(&init_mm, p4d, addr, mask);
        if (!pud)
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);
                if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
                        return -ENOMEM;
        } while (pud++, addr = next, addr != end);
        return 0;
}

static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
                unsigned long end, pgprot_t prot, struct page **pages, int *nr,
                pgtbl_mod_mask *mask)
{
        p4d_t *p4d;
        unsigned long next;

        p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
        if (!p4d)
                return -ENOMEM;
        do {
                next = p4d_addr_end(addr, end);
                if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
                        return -ENOMEM;
        } while (p4d++, addr = next, addr != end);
        return 0;
}

static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages)
{
        unsigned long start = addr;
        pgd_t *pgd;
        unsigned long next;
        int err = 0;
        int nr = 0;
        pgtbl_mod_mask mask = 0;

        BUG_ON(addr >= end);
        pgd = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_bad(*pgd))
                        mask |= PGTBL_PGD_MODIFIED;
                err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
                if (err)
                        return err;
        } while (pgd++, addr = next, addr != end);

        if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
                arch_sync_kernel_mappings(start, end);

        return 0;
}

/*
 * vmap_pages_range_noflush is similar to vmap_pages_range, but does not
 * flush caches.
 *
 * The caller is responsible for calling flush_cache_vmap() after this
 * function returns successfully and before the addresses are accessed.
 *
 * This is an internal function only. Do not use outside mm/.
 */
int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages, unsigned int page_shift)
{
        unsigned int i, nr = (end - addr) >> PAGE_SHIFT;

        WARN_ON(page_shift < PAGE_SHIFT);

        if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
                        page_shift == PAGE_SHIFT)
                return vmap_small_pages_range_noflush(addr, end, prot, pages);

        for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
                int err;

                err = vmap_range_noflush(addr, addr + (1UL << page_shift),
                                        page_to_phys(pages[i]), prot,
                                        page_shift);
                if (err)
                        return err;

                addr += 1UL << page_shift;
        }

        return 0;
}

int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages, unsigned int page_shift)
{
        int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
                                                 page_shift);

        if (ret)
                return ret;
        return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
}

/**
 * vmap_pages_range - map pages to a kernel virtual address
 * @addr: start of the VM area to map
 * @end: end of the VM area to map (non-inclusive)
 * @prot: page protection flags to use
 * @pages: pages to map (always PAGE_SIZE pages)
 * @page_shift: maximum shift that the pages may be mapped with, @pages must
 * be aligned and contiguous up to at least this shift.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
static int vmap_pages_range(unsigned long addr, unsigned long end,
                pgprot_t prot, struct page **pages, unsigned int page_shift)
{
        int err;

        err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
        flush_cache_vmap(addr, end);
        return err;
}

static int check_sparse_vm_area(struct vm_struct *area, unsigned long start,
                                unsigned long end)
{
        might_sleep();
        if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS))
                return -EINVAL;
        if (WARN_ON_ONCE(area->flags & VM_NO_GUARD))
                return -EINVAL;
        if (WARN_ON_ONCE(!(area->flags & VM_SPARSE)))
                return -EINVAL;
        if ((end - start) >> PAGE_SHIFT > totalram_pages())
                return -E2BIG;
        if (start < (unsigned long)area->addr ||
            (void *)end > area->addr + get_vm_area_size(area))
                return -ERANGE;
        return 0;
}

/**
 * vm_area_map_pages - map pages inside given sparse vm_area
 * @area: vm_area
 * @start: start address inside vm_area
 * @end: end address inside vm_area
 * @pages: pages to map (always PAGE_SIZE pages)
 */
int vm_area_map_pages(struct vm_struct *area, unsigned long start,
                      unsigned long end, struct page **pages)
{
        int err;

        err = check_sparse_vm_area(area, start, end);
        if (err)
                return err;

        return vmap_pages_range(start, end, PAGE_KERNEL, pages, PAGE_SHIFT);
}

/**
 * vm_area_unmap_pages - unmap pages inside given sparse vm_area
 * @area: vm_area
 * @start: start address inside vm_area
 * @end: end address inside vm_area
 */
void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
                         unsigned long end)
{
        if (check_sparse_vm_area(area, start, end))
                return;

        vunmap_range(start, end);
}

int is_vmalloc_or_module_addr(const void *x)
{
        /*
         * ARM, x86-64 and sparc64 put modules in a special place,
         * and fall back on vmalloc() if that fails. Others
         * just put it in the vmalloc space.
         */
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
        unsigned long addr = (unsigned long)kasan_reset_tag(x);
        if (addr >= MODULES_VADDR && addr < MODULES_END)
                return 1;
#endif
        return is_vmalloc_addr(x);
}
EXPORT_SYMBOL_GPL(is_vmalloc_or_module_addr);

/*
 * Walk a vmap address to the struct page it maps. Huge vmap mappings will
 * return the tail page that corresponds to the base page address, which
 * matches small vmap mappings.
 */
struct page *vmalloc_to_page(const void *vmalloc_addr)
{
        unsigned long addr = (unsigned long) vmalloc_addr;
        struct page *page = NULL;
        pgd_t *pgd = pgd_offset_k(addr);
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *ptep, pte;

        /*
         * XXX we might need to change this if we add VIRTUAL_BUG_ON for
         * architectures that do not vmalloc module space
         */
        VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));

        if (pgd_none(*pgd))
                return NULL;
        if (WARN_ON_ONCE(pgd_leaf(*pgd)))
                return NULL; /* XXX: no allowance for huge pgd */
        if (WARN_ON_ONCE(pgd_bad(*pgd)))
                return NULL;

        p4d = p4d_offset(pgd, addr);
        if (p4d_none(*p4d))
                return NULL;
        if (p4d_leaf(*p4d))
                return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT);
        if (WARN_ON_ONCE(p4d_bad(*p4d)))
                return NULL;

        pud = pud_offset(p4d, addr);
        if (pud_none(*pud))
                return NULL;
        if (pud_leaf(*pud))
                return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
        if (WARN_ON_ONCE(pud_bad(*pud)))
                return NULL;

        pmd = pmd_offset(pud, addr);
        if (pmd_none(*pmd))
                return NULL;
        if (pmd_leaf(*pmd))
                return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
        if (WARN_ON_ONCE(pmd_bad(*pmd)))
                return NULL;

        ptep = pte_offset_kernel(pmd, addr);
        pte = ptep_get(ptep);
        if (pte_present(pte))
                page = pte_page(pte);

        return page;
}
EXPORT_SYMBOL(vmalloc_to_page);

/*
 * Map a vmalloc()-space virtual address to the physical page frame number.
 */
unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
{
        return page_to_pfn(vmalloc_to_page(vmalloc_addr));
}
EXPORT_SYMBOL(vmalloc_to_pfn);


/*** Global kva allocator ***/

#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0


static DEFINE_SPINLOCK(free_vmap_area_lock);
static bool vmap_initialized __read_mostly;

/*
 * This kmem_cache is used for vmap_area objects. Instead of
 * allocating from slab we reuse an object from this cache to
 * make things faster. Especially in "no edge" splitting of
 * free block.
 */
static struct kmem_cache *vmap_area_cachep;

/*
 * This linked list is used in pair with free_vmap_area_root.
 * It gives O(1) access to prev/next to perform fast coalescing.
 */
static LIST_HEAD(free_vmap_area_list);

/*
 * This augment red-black tree represents the free vmap space.
 * All vmap_area objects in this tree are sorted by va->va_start
 * address. It is used for allocation and merging when a vmap
 * object is released.
 *
 * Each vmap_area node contains a maximum available free block
 * of its sub-tree, right or left. Therefore it is possible to
 * find a lowest match of free area.
 */
static struct rb_root free_vmap_area_root = RB_ROOT;

/*
 * Preload a CPU with one object for "no edge" split case. The
 * aim is to get rid of allocations from the atomic context, thus
 * to use more permissive allocation masks.
 */
static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);

/*
 * This structure defines a single, solid model where a list and
 * rb-tree are part of one entity protected by the lock. Nodes are
 * sorted in ascending order, thus for O(1) access to left/right
 * neighbors a list is used as well as for sequential traversal.
 */
struct rb_list {
        struct rb_root root;
        struct list_head head;
        spinlock_t lock;
};

/*
 * A fast size storage contains VAs up to 1M size. A pool consists
 * of linked between each other ready to go VAs of certain sizes.
 * An index in the pool-array corresponds to number of pages + 1.
 */
#define MAX_VA_SIZE_PAGES 256

struct vmap_pool {
        struct list_head head;
        unsigned long len;
};

/*
 * An effective vmap-node logic. Users make use of nodes instead
 * of a global heap. It allows to balance an access and mitigate
 * contention.
 */
static struct vmap_node {
        /* Simple size segregated storage. */
        struct vmap_pool pool[MAX_VA_SIZE_PAGES];
        spinlock_t pool_lock;
        bool skip_populate;

        /* Bookkeeping data of this node. */
        struct rb_list busy;
        struct rb_list lazy;

        /*
         * Ready-to-free areas.
         */
        struct list_head purge_list;
        struct work_struct purge_work;
        unsigned long nr_purged;
} single;

/*
 * Initial setup consists of one single node, i.e. a balancing
 * is fully disabled. Later on, after vmap is initialized these
 * parameters are updated based on a system capacity.
 */
static struct vmap_node *vmap_nodes = &single;
static __read_mostly unsigned int nr_vmap_nodes = 1;
static __read_mostly unsigned int vmap_zone_size = 1;

static inline unsigned int
addr_to_node_id(unsigned long addr)
{
        return (addr / vmap_zone_size) % nr_vmap_nodes;
}

static inline struct vmap_node *
addr_to_node(unsigned long addr)
{
        return &vmap_nodes[addr_to_node_id(addr)];
}

static inline struct vmap_node *
id_to_node(unsigned int id)
{
        return &vmap_nodes[id % nr_vmap_nodes];
}

/*
 * We use the value 0 to represent "no node", that is why
 * an encoded value will be the node-id incremented by 1.
 * It is always greater then 0. A valid node_id which can
 * be encoded is [0:nr_vmap_nodes - 1]. If a passed node_id
 * is not valid 0 is returned.
 */
static unsigned int
encode_vn_id(unsigned int node_id)
{
        /* Can store U8_MAX [0:254] nodes. */
        if (node_id < nr_vmap_nodes)
                return (node_id + 1) << BITS_PER_BYTE;

        /* Warn and no node encoded. */
        WARN_ONCE(1, "Encode wrong node id (%u)\n", node_id);
        return 0;
}

/*
 * Returns an encoded node-id, the valid range is within
 * [0:nr_vmap_nodes-1] values. Otherwise nr_vmap_nodes is
 * returned if extracted data is wrong.
 */
static unsigned int
decode_vn_id(unsigned int val)
{
        unsigned int node_id = (val >> BITS_PER_BYTE) - 1;

        /* Can store U8_MAX [0:254] nodes. */
        if (node_id < nr_vmap_nodes)
                return node_id;

        /* If it was _not_ zero, warn. */
        WARN_ONCE(node_id != UINT_MAX,
                "Decode wrong node id (%d)\n", node_id);

        return nr_vmap_nodes;
}

static bool
is_vn_id_valid(unsigned int node_id)
{
        if (node_id < nr_vmap_nodes)
                return true;

        return false;
}

static __always_inline unsigned long
va_size(struct vmap_area *va)
{
        return (va->va_end - va->va_start);
}

static __always_inline unsigned long
get_subtree_max_size(struct rb_node *node)
{
        struct vmap_area *va;

        va = rb_entry_safe(node, struct vmap_area, rb_node);
        return va ? va->subtree_max_size : 0;
}

RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
        struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)

static void reclaim_and_purge_vmap_areas(void);
static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static void drain_vmap_area_work(struct work_struct *work);
static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);

static atomic_long_t nr_vmalloc_pages;

unsigned long vmalloc_nr_pages(void)
{
        return atomic_long_read(&nr_vmalloc_pages);
}

static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
{
        struct rb_node *n = root->rb_node;

        addr = (unsigned long)kasan_reset_tag((void *)addr);

        while (n) {
                struct vmap_area *va;

                va = rb_entry(n, struct vmap_area, rb_node);
                if (addr < va->va_start)
                        n = n->rb_left;
                else if (addr >= va->va_end)
                        n = n->rb_right;
                else
                        return va;
        }

        return NULL;
}

/* Look up the first VA which satisfies addr < va_end, NULL if none. */
static struct vmap_area *
__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
{
        struct vmap_area *va = NULL;
        struct rb_node *n = root->rb_node;

        addr = (unsigned long)kasan_reset_tag((void *)addr);

        while (n) {
                struct vmap_area *tmp;

                tmp = rb_entry(n, struct vmap_area, rb_node);
                if (tmp->va_end > addr) {
                        va = tmp;
                        if (tmp->va_start <= addr)
                                break;

                        n = n->rb_left;
                } else
                        n = n->rb_right;
        }

        return va;
}

/*
 * Returns a node where a first VA, that satisfies addr < va_end, resides.
 * If success, a node is locked. A user is responsible to unlock it when a
 * VA is no longer needed to be accessed.
 *
 * Returns NULL if nothing found.
 */
static struct vmap_node *
find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
{
        unsigned long va_start_lowest;
        struct vmap_node *vn;
        int i;

repeat:
        for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) {
                vn = &vmap_nodes[i];

                spin_lock(&vn->busy.lock);
                *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);

                if (*va)
                        if (!va_start_lowest || (*va)->va_start < va_start_lowest)
                                va_start_lowest = (*va)->va_start;
                spin_unlock(&vn->busy.lock);
        }

        /*
         * Check if found VA exists, it might have gone away.  In this case we
         * repeat the search because a VA has been removed concurrently and we
         * need to proceed to the next one, which is a rare case.
         */
        if (va_start_lowest) {
                vn = addr_to_node(va_start_lowest);

                spin_lock(&vn->busy.lock);
                *va = __find_vmap_area(va_start_lowest, &vn->busy.root);

                if (*va)
                        return vn;

                spin_unlock(&vn->busy.lock);
                goto repeat;
        }

        return NULL;
}

/*
 * This function returns back addresses of parent node
 * and its left or right link for further processing.
 *
 * Otherwise NULL is returned. In that case all further
 * steps regarding inserting of conflicting overlap range
 * have to be declined and actually considered as a bug.
 */
static __always_inline struct rb_node **
find_va_links(struct vmap_area *va,
        struct rb_root *root, struct rb_node *from,
        struct rb_node **parent)
{
        struct vmap_area *tmp_va;
        struct rb_node **link;

        if (root) {
                link = &root->rb_node;
                if (unlikely(!*link)) {
                        *parent = NULL;
                        return link;
                }
        } else {
                link = &from;
        }

        /*
         * Go to the bottom of the tree. When we hit the last point
         * we end up with parent rb_node and correct direction, i name
         * it link, where the new va->rb_node will be attached to.
         */
        do {
                tmp_va = rb_entry(*link, struct vmap_area, rb_node);

                /*
                 * During the traversal we also do some sanity check.
                 * Trigger the BUG() if there are sides(left/right)
                 * or full overlaps.
                 */
                if (va->va_end <= tmp_va->va_start)
                        link = &(*link)->rb_left;
                else if (va->va_start >= tmp_va->va_end)
                        link = &(*link)->rb_right;
                else {
                        WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
                                va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);

                        return NULL;
                }
        } while (*link);

        *parent = &tmp_va->rb_node;
        return link;
}

static __always_inline struct list_head *
get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
{
        struct list_head *list;

        if (unlikely(!parent))
                /*
                 * The red-black tree where we try to find VA neighbors
                 * before merging or inserting is empty, i.e. it means
                 * there is no free vmap space. Normally it does not
                 * happen but we handle this case anyway.
                 */
                return NULL;

        list = &rb_entry(parent, struct vmap_area, rb_node)->list;
        return (&parent->rb_right == link ? list->next : list);
}

static __always_inline void
__link_va(struct vmap_area *va, struct rb_root *root,
        struct rb_node *parent, struct rb_node **link,
        struct list_head *head, bool augment)
{
        /*
         * VA is still not in the list, but we can
         * identify its future previous list_head node.
         */
        if (likely(parent)) {
                head = &rb_entry(parent, struct vmap_area, rb_node)->list;
                if (&parent->rb_right != link)
                        head = head->prev;
        }

        /* Insert to the rb-tree */
        rb_link_node(&va->rb_node, parent, link);
        if (augment) {
                /*
                 * Some explanation here. Just perform simple insertion
                 * to the tree. We do not set va->subtree_max_size to
                 * its current size before calling rb_insert_augmented().
                 * It is because we populate the tree from the bottom
                 * to parent levels when the node _is_ in the tree.
                 *
                 * Therefore we set subtree_max_size to zero after insertion,
                 * to let __augment_tree_propagate_from() puts everything to
                 * the correct order later on.
                 */
                rb_insert_augmented(&va->rb_node,
                        root, &free_vmap_area_rb_augment_cb);
                va->subtree_max_size = 0;
        } else {
                rb_insert_color(&va->rb_node, root);
        }

        /* Address-sort this list */
        list_add(&va->list, head);
}

static __always_inline void
link_va(struct vmap_area *va, struct rb_root *root,
        struct rb_node *parent, struct rb_node **link,
        struct list_head *head)
{
        __link_va(va, root, parent, link, head, false);
}

static __always_inline void
link_va_augment(struct vmap_area *va, struct rb_root *root,
        struct rb_node *parent, struct rb_node **link,
        struct list_head *head)
{
        __link_va(va, root, parent, link, head, true);
}

static __always_inline void
__unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
{
        if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
                return;

        if (augment)
                rb_erase_augmented(&va->rb_node,
                        root, &free_vmap_area_rb_augment_cb);
        else
                rb_erase(&va->rb_node, root);

        list_del_init(&va->list);
        RB_CLEAR_NODE(&va->rb_node);
}

static __always_inline void
unlink_va(struct vmap_area *va, struct rb_root *root)
{
        __unlink_va(va, root, false);
}

static __always_inline void
unlink_va_augment(struct vmap_area *va, struct rb_root *root)
{
        __unlink_va(va, root, true);
}

#if DEBUG_AUGMENT_PROPAGATE_CHECK
/*
 * Gets called when remove the node and rotate.
 */
static __always_inline unsigned long
compute_subtree_max_size(struct vmap_area *va)
{
        return max3(va_size(va),
                get_subtree_max_size(va->rb_node.rb_left),
                get_subtree_max_size(va->rb_node.rb_right));
}

static void
augment_tree_propagate_check(void)
{
        struct vmap_area *va;
        unsigned long computed_size;

        list_for_each_entry(va, &free_vmap_area_list, list) {
                computed_size = compute_subtree_max_size(va);
                if (computed_size != va->subtree_max_size)
                        pr_emerg("tree is corrupted: %lu, %lu\n",
                                va_size(va), va->subtree_max_size);
        }
}
#endif

/*
 * This function populates subtree_max_size from bottom to upper
 * levels starting from VA point. The propagation must be done
 * when VA size is modified by changing its va_start/va_end. Or
 * in case of newly inserting of VA to the tree.
 *
 * It means that __augment_tree_propagate_from() must be called:
 * - After VA has been inserted to the tree(free path);
 * - After VA has been shrunk(allocation path);
 * - After VA has been increased(merging path).
 *
 * Please note that, it does not mean that upper parent nodes
 * and their subtree_max_size are recalculated all the time up
 * to the root node.
 *
 *       4--8
 *        /\
 *       /  \
 *      /    \
 *    2--2  8--8
 *
 * For example if we modify the node 4, shrinking it to 2, then
 * no any modification is required. If we shrink the node 2 to 1
 * its subtree_max_size is updated only, and set to 1. If we shrink
 * the node 8 to 6, then its subtree_max_size is set to 6 and parent
 * node becomes 4--6.
 */
static __always_inline void
augment_tree_propagate_from(struct vmap_area *va)
{
        /*
         * Populate the tree from bottom towards the root until
         * the calculated maximum available size of checked node
         * is equal to its current one.
         */
        free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);

#if DEBUG_AUGMENT_PROPAGATE_CHECK
        augment_tree_propagate_check();
#endif
}

static void
insert_vmap_area(struct vmap_area *va,
        struct rb_root *root, struct list_head *head)
{
        struct rb_node **link;
        struct rb_node *parent;

        link = find_va_links(va, root, NULL, &parent);
        if (link)
                link_va(va, root, parent, link, head);
}

static void
insert_vmap_area_augment(struct vmap_area *va,
        struct rb_node *from, struct rb_root *root,
        struct list_head *head)
{
        struct rb_node **link;
        struct rb_node *parent;

        if (from)
                link = find_va_links(va, NULL, from, &parent);
        else
                link = find_va_links(va, root, NULL, &parent);

        if (link) {
                link_va_augment(va, root, parent, link, head);
                augment_tree_propagate_from(va);
        }
}

/*
 * Merge de-allocated chunk of VA memory with previous
 * and next free blocks. If coalesce is not done a new
 * free area is inserted. If VA has been merged, it is
 * freed.
 *
 * Please note, it can return NULL in case of overlap
 * ranges, followed by WARN() report. Despite it is a
 * buggy behaviour, a system can be alive and keep
 * ongoing.
 */
static __always_inline struct vmap_area *
__merge_or_add_vmap_area(struct vmap_area *va,
        struct rb_root *root, struct list_head *head, bool augment)
{
        struct vmap_area *sibling;
        struct list_head *next;
        struct rb_node **link;
        struct rb_node *parent;
        bool merged = false;

        /*
         * Find a place in the tree where VA potentially will be
         * inserted, unless it is merged with its sibling/siblings.
         */
        link = find_va_links(va, root, NULL, &parent);
        if (!link)
                return NULL;

        /*
         * Get next node of VA to check if merging can be done.
         */
        next = get_va_next_sibling(parent, link);
        if (unlikely(next == NULL))
                goto insert;

        /*
         * start            end
         * |                |
         * |<------VA------>|<-----Next----->|
         *                  |                |
         *                  start            end
         */
        if (next != head) {
                sibling = list_entry(next, struct vmap_area, list);
                if (sibling->va_start == va->va_end) {
                        sibling->va_start = va->va_start;

                        /* Free vmap_area object. */
                        kmem_cache_free(vmap_area_cachep, va);

                        /* Point to the new merged area. */
                        va = sibling;
                        merged = true;
                }
        }

        /*
         * start            end
         * |                |
         * |<-----Prev----->|<------VA------>|
         *                  |                |
         *                  start            end
         */
        if (next->prev != head) {
                sibling = list_entry(next->prev, struct vmap_area, list);
                if (sibling->va_end == va->va_start) {
                        /*
                         * If both neighbors are coalesced, it is important
                         * to unlink the "next" node first, followed by merging
                         * with "previous" one. Otherwise the tree might not be
                         * fully populated if a sibling's augmented value is
                         * "normalized" because of rotation operations.
                         */
                        if (merged)
                                __unlink_va(va, root, augment);

                        sibling->va_end = va->va_end;

                        /* Free vmap_area object. */
                        kmem_cache_free(vmap_area_cachep, va);

                        /* Point to the new merged area. */
                        va = sibling;
                        merged = true;
                }
        }

insert:
        if (!merged)
                __link_va(va, root, parent, link, head, augment);

        return va;
}

static __always_inline struct vmap_area *
merge_or_add_vmap_area(struct vmap_area *va,
        struct rb_root *root, struct list_head *head)
{
        return __merge_or_add_vmap_area(va, root, head, false);
}

static __always_inline struct vmap_area *
merge_or_add_vmap_area_augment(struct vmap_area *va,
        struct rb_root *root, struct list_head *head)
{
        va = __merge_or_add_vmap_area(va, root, head, true);
        if (va)
                augment_tree_propagate_from(va);

        return va;
}

static __always_inline bool
is_within_this_va(struct vmap_area *va, unsigned long size,
        unsigned long align, unsigned long vstart)
{
        unsigned long nva_start_addr;

        if (va->va_start > vstart)
                nva_start_addr = ALIGN(va->va_start, align);
        else
                nva_start_addr = ALIGN(vstart, align);

        /* Can be overflowed due to big size or alignment. */
        if (nva_start_addr + size < nva_start_addr ||
                        nva_start_addr < vstart)
                return false;

        return (nva_start_addr + size <= va->va_end);
}

/*
 * Find the first free block(lowest start address) in the tree,
 * that will accomplish the request corresponding to passing
 * parameters. Please note, with an alignment bigger than PAGE_SIZE,
 * a search length is adjusted to account for worst case alignment
 * overhead.
 */
static __always_inline struct vmap_area *
find_vmap_lowest_match(struct rb_root *root, unsigned long size,
        unsigned long align, unsigned long vstart, bool adjust_search_size)
{
        struct vmap_area *va;
        struct rb_node *node;
        unsigned long length;

        /* Start from the root. */
        node = root->rb_node;

        /* Adjust the search size for alignment overhead. */
        length = adjust_search_size ? size + align - 1 : size;

        while (node) {
                va = rb_entry(node, struct vmap_area, rb_node);

                if (get_subtree_max_size(node->rb_left) >= length &&
                                vstart < va->va_start) {
                        node = node->rb_left;
                } else {
                        if (is_within_this_va(va, size, align, vstart))
                                return va;

                        /*
                         * Does not make sense to go deeper towards the right
                         * sub-tree if it does not have a free block that is
                         * equal or bigger to the requested search length.
                         */
                        if (get_subtree_max_size(node->rb_right) >= length) {
                                node = node->rb_right;
                                continue;
                        }

                        /*
                         * OK. We roll back and find the first right sub-tree,
                         * that will satisfy the search criteria. It can happen
                         * due to "vstart" restriction or an alignment overhead
                         * that is bigger then PAGE_SIZE.
                         */
                        while ((node = rb_parent(node))) {
                                va = rb_entry(node, struct vmap_area, rb_node);
                                if (is_within_this_va(va, size, align, vstart))
                                        return va;

                                if (get_subtree_max_size(node->rb_right) >= length &&
                                                vstart <= va->va_start) {
                                        /*
                                         * Shift the vstart forward. Please note, we update it with
                                         * parent's start address adding "1" because we do not want
                                         * to enter same sub-tree after it has already been checked
                                         * and no suitable free block found there.
                                         */
                                        vstart = va->va_start + 1;
                                        node = node->rb_right;
                                        break;
                                }
                        }
                }
        }

        return NULL;
}

#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
#include <linux/random.h>

static struct vmap_area *
find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
        unsigned long align, unsigned long vstart)
{
        struct vmap_area *va;

        list_for_each_entry(va, head, list) {
                if (!is_within_this_va(va, size, align, vstart))
                        continue;

                return va;
        }

        return NULL;
}

static void
find_vmap_lowest_match_check(struct rb_root *root, struct list_head *head,
                             unsigned long size, unsigned long align)
{
        struct vmap_area *va_1, *va_2;
        unsigned long vstart;
        unsigned int rnd;

        get_random_bytes(&rnd, sizeof(rnd));
        vstart = VMALLOC_START + rnd;

        va_1 = find_vmap_lowest_match(root, size, align, vstart, false);
        va_2 = find_vmap_lowest_linear_match(head, size, align, vstart);

        if (va_1 != va_2)
                pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
                        va_1, va_2, vstart);
}
#endif

enum fit_type {
        NOTHING_FIT = 0,
        FL_FIT_TYPE = 1,        /* full fit */
        LE_FIT_TYPE = 2,        /* left edge fit */
        RE_FIT_TYPE = 3,        /* right edge fit */
        NE_FIT_TYPE = 4                /* no edge fit */
};

static __always_inline enum fit_type
classify_va_fit_type(struct vmap_area *va,
        unsigned long nva_start_addr, unsigned long size)
{
        enum fit_type type;

        /* Check if it is within VA. */
        if (nva_start_addr < va->va_start ||
                        nva_start_addr + size > va->va_end)
                return NOTHING_FIT;

        /* Now classify. */
        if (va->va_start == nva_start_addr) {
                if (va->va_end == nva_start_addr + size)
                        type = FL_FIT_TYPE;
                else
                        type = LE_FIT_TYPE;
        } else if (va->va_end == nva_start_addr + size) {
                type = RE_FIT_TYPE;
        } else {
                type = NE_FIT_TYPE;
        }

        return type;
}

static __always_inline int
va_clip(struct rb_root *root, struct list_head *head,
                struct vmap_area *va, unsigned long nva_start_addr,
                unsigned long size)
{
        struct vmap_area *lva = NULL;
        enum fit_type type = classify_va_fit_type(va, nva_start_addr, size);

        if (type == FL_FIT_TYPE) {
                /*
                 * No need to split VA, it fully fits.
                 *
                 * |               |
                 * V      NVA      V
                 * |---------------|
                 */
                unlink_va_augment(va, root);
                kmem_cache_free(vmap_area_cachep, va);
        } else if (type == LE_FIT_TYPE) {
                /*
                 * Split left edge of fit VA.
                 *
                 * |       |
                 * V  NVA  V   R
                 * |-------|-------|
                 */
                va->va_start += size;
        } else if (type == RE_FIT_TYPE) {
                /*
                 * Split right edge of fit VA.
                 *
                 *         |       |
                 *     L   V  NVA  V
                 * |-------|-------|
                 */
                va->va_end = nva_start_addr;
        } else if (type == NE_FIT_TYPE) {
                /*
                 * Split no edge of fit VA.
                 *
                 *     |       |
                 *   L V  NVA  V R
                 * |---|-------|---|
                 */
                lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
                if (unlikely(!lva)) {
                        /*
                         * For percpu allocator we do not do any pre-allocation
                         * and leave it as it is. The reason is it most likely
                         * never ends up with NE_FIT_TYPE splitting. In case of
                         * percpu allocations offsets and sizes are aligned to
                         * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
                         * are its main fitting cases.
                         *
                         * There are a few exceptions though, as an example it is
                         * a first allocation (early boot up) when we have "one"
                         * big free space that has to be split.
                         *
                         * Also we can hit this path in case of regular "vmap"
                         * allocations, if "this" current CPU was not preloaded.
                         * See the comment in alloc_vmap_area() why. If so, then
                         * GFP_NOWAIT is used instead to get an extra object for
                         * split purpose. That is rare and most time does not
                         * occur.
                         *
                         * What happens if an allocation gets failed. Basically,
                         * an "overflow" path is triggered to purge lazily freed
                         * areas to free some memory, then, the "retry" path is
                         * triggered to repeat one more time. See more details
                         * in alloc_vmap_area() function.
                         */
                        lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
                        if (!lva)
                                return -1;
                }

                /*
                 * Build the remainder.
                 */
                lva->va_start = va->va_start;
                lva->va_end = nva_start_addr;

                /*
                 * Shrink this VA to remaining size.
                 */
                va->va_start = nva_start_addr + size;
        } else {
                return -1;
        }

        if (type != FL_FIT_TYPE) {
                augment_tree_propagate_from(va);

                if (lva)        /* type == NE_FIT_TYPE */
                        insert_vmap_area_augment(lva, &va->rb_node, root, head);
        }

        return 0;
}

static unsigned long
va_alloc(struct vmap_area *va,
                struct rb_root *root, struct list_head *head,
                unsigned long size, unsigned long align,
                unsigned long vstart, unsigned long vend)
{
        unsigned long nva_start_addr;
        int ret;

        if (va->va_start > vstart)
                nva_start_addr = ALIGN(va->va_start, align);
        else
                nva_start_addr = ALIGN(vstart, align);

        /* Check the "vend" restriction. */
        if (nva_start_addr + size > vend)
                return vend;

        /* Update the free vmap_area. */
        ret = va_clip(root, head, va, nva_start_addr, size);
        if (WARN_ON_ONCE(ret))
                return vend;

        return nva_start_addr;
}

/*
 * Returns a start address of the newly allocated area, if success.
 * Otherwise a vend is returned that indicates failure.
 */
static __always_inline unsigned long
__alloc_vmap_area(struct rb_root *root, struct list_head *head,
        unsigned long size, unsigned long align,
        unsigned long vstart, unsigned long vend)
{
        bool adjust_search_size = true;
        unsigned long nva_start_addr;
        struct vmap_area *va;

        /*
         * Do not adjust when:
         *   a) align <= PAGE_SIZE, because it does not make any sense.
         *      All blocks(their start addresses) are at least PAGE_SIZE
         *      aligned anyway;
         *   b) a short range where a requested size corresponds to exactly
         *      specified [vstart:vend] interval and an alignment > PAGE_SIZE.
         *      With adjusted search length an allocation would not succeed.
         */
        if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
                adjust_search_size = false;

        va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
        if (unlikely(!va))
                return vend;

        nva_start_addr = va_alloc(va, root, head, size, align, vstart, vend);
        if (nva_start_addr == vend)
                return vend;

#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
        find_vmap_lowest_match_check(root, head, size, align);
#endif

        return nva_start_addr;
}

/*
 * Free a region of KVA allocated by alloc_vmap_area
 */
static void free_vmap_area(struct vmap_area *va)
{
        struct vmap_node *vn = addr_to_node(va->va_start);

        /*
         * Remove from the busy tree/list.
         */
        spin_lock(&vn->busy.lock);
        unlink_va(va, &vn->busy.root);
        spin_unlock(&vn->busy.lock);

        /*
         * Insert/Merge it back to the free tree/list.
         */
        spin_lock(&free_vmap_area_lock);
        merge_or_add_vmap_area_augment(va, &free_vmap_area_root, &free_vmap_area_list);
        spin_unlock(&free_vmap_area_lock);
}

static inline void
preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node)
{
        struct vmap_area *va = NULL;

        /*
         * Preload this CPU with one extra vmap_area object. It is used
         * when fit type of free area is NE_FIT_TYPE. It guarantees that
         * a CPU that does an allocation is preloaded.
         *
         * We do it in non-atomic context, thus it allows us to use more
         * permissive allocation masks to be more stable under low memory
         * condition and high memory pressure.
         */
        if (!this_cpu_read(ne_fit_preload_node))
                va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);

        spin_lock(lock);

        if (va && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, va))
                kmem_cache_free(vmap_area_cachep, va);
}

static struct vmap_pool *
size_to_va_pool(struct vmap_node *vn, unsigned long size)
{
        unsigned int idx = (size - 1) / PAGE_SIZE;

        if (idx < MAX_VA_SIZE_PAGES)
                return &vn->pool[idx];

        return NULL;
}

static bool
node_pool_add_va(struct vmap_node *n, struct vmap_area *va)
{
        struct vmap_pool *vp;

        vp = size_to_va_pool(n, va_size(va));
        if (!vp)
                return false;

        spin_lock(&n->pool_lock);
        list_add(&va->list, &vp->head);
        WRITE_ONCE(vp->len, vp->len + 1);
        spin_unlock(&n->pool_lock);

        return true;
}

static struct vmap_area *
node_pool_del_va(struct vmap_node *vn, unsigned long size,
                unsigned long align, unsigned long vstart,
                unsigned long vend)
{
        struct vmap_area *va = NULL;
        struct vmap_pool *vp;
        int err = 0;

        vp = size_to_va_pool(vn, size);
        if (!vp || list_empty(&vp->head))
                return NULL;

        spin_lock(&vn->pool_lock);
        if (!list_empty(&vp->head)) {
                va = list_first_entry(&vp->head, struct vmap_area, list);

                if (IS_ALIGNED(va->va_start, align)) {
                        /*
                         * Do some sanity check and emit a warning
                         * if one of below checks detects an error.
                         */
                        err |= (va_size(va) != size);
                        err |= (va->va_start < vstart);
                        err |= (va->va_end > vend);

                        if (!WARN_ON_ONCE(err)) {
                                list_del_init(&va->list);
                                WRITE_ONCE(vp->len, vp->len - 1);
                        } else {
                                va = NULL;
                        }
                } else {
                        list_move_tail(&va->list, &vp->head);
                        va = NULL;
                }
        }
        spin_unlock(&vn->pool_lock);

        return va;
}

static struct vmap_area *
node_alloc(unsigned long size, unsigned long align,
                unsigned long vstart, unsigned long vend,
                unsigned long *addr, unsigned int *vn_id)
{
        struct vmap_area *va;

        *vn_id = 0;
        *addr = vend;

        /*
         * Fallback to a global heap if not vmalloc or there
         * is only one node.
         */
        if (vstart != VMALLOC_START || vend != VMALLOC_END ||
                        nr_vmap_nodes == 1)
                return NULL;

        *vn_id = raw_smp_processor_id() % nr_vmap_nodes;
        va = node_pool_del_va(id_to_node(*vn_id), size, align, vstart, vend);
        *vn_id = encode_vn_id(*vn_id);

        if (va)
                *addr = va->va_start;

        return va;
}

/*
 * Allocate a region of KVA of the specified size and alignment, within the
 * vstart and vend.
 */
static struct vmap_area *alloc_vmap_area(unsigned long size,
                                unsigned long align,
                                unsigned long vstart, unsigned long vend,
                                int node, gfp_t gfp_mask,
                                unsigned long va_flags)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        unsigned long freed;
        unsigned long addr;
        unsigned int vn_id;
        int purged = 0;
        int ret;

        if (unlikely(!size || offset_in_page(size) || !is_power_of_2(align)))
                return ERR_PTR(-EINVAL);

        if (unlikely(!vmap_initialized))
                return ERR_PTR(-EBUSY);

        might_sleep();

        /*
         * If a VA is obtained from a global heap(if it fails here)
         * it is anyway marked with this "vn_id" so it is returned
         * to this pool's node later. Such way gives a possibility
         * to populate pools based on users demand.
         *
         * On success a ready to go VA is returned.
         */
        va = node_alloc(size, align, vstart, vend, &addr, &vn_id);
        if (!va) {
                gfp_mask = gfp_mask & GFP_RECLAIM_MASK;

                va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
                if (unlikely(!va))
                        return ERR_PTR(-ENOMEM);

                /*
                 * Only scan the relevant parts containing pointers to other objects
                 * to avoid false negatives.
                 */
                kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
        }

retry:
        if (addr == vend) {
                preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
                addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
                        size, align, vstart, vend);
                spin_unlock(&free_vmap_area_lock);
        }

        trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);

        /*
         * If an allocation fails, the "vend" address is
         * returned. Therefore trigger the overflow path.
         */
        if (unlikely(addr == vend))
                goto overflow;

        va->va_start = addr;
        va->va_end = addr + size;
        va->vm = NULL;
        va->flags = (va_flags | vn_id);

        vn = addr_to_node(va->va_start);

        spin_lock(&vn->busy.lock);
        insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
        spin_unlock(&vn->busy.lock);

        BUG_ON(!IS_ALIGNED(va->va_start, align));
        BUG_ON(va->va_start < vstart);
        BUG_ON(va->va_end > vend);

        ret = kasan_populate_vmalloc(addr, size);
        if (ret) {
                free_vmap_area(va);
                return ERR_PTR(ret);
        }

        return va;

overflow:
        if (!purged) {
                reclaim_and_purge_vmap_areas();
                purged = 1;
                goto retry;
        }

        freed = 0;
        blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);

        if (freed > 0) {
                purged = 0;
                goto retry;
        }

        if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
                pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
                        size);

        kmem_cache_free(vmap_area_cachep, va);
        return ERR_PTR(-EBUSY);
}

int register_vmap_purge_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&vmap_notify_list, nb);
}
EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);

int unregister_vmap_purge_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
}
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);

/*
 * lazy_max_pages is the maximum amount of virtual address space we gather up
 * before attempting to purge with a TLB flush.
 *
 * There is a tradeoff here: a larger number will cover more kernel page tables
 * and take slightly longer to purge, but it will linearly reduce the number of
 * global TLB flushes that must be performed. It would seem natural to scale
 * this number up linearly with the number of CPUs (because vmapping activity
 * could also scale linearly with the number of CPUs), however it is likely
 * that in practice, workloads might be constrained in other ways that mean
 * vmap activity will not scale linearly with CPUs. Also, I want to be
 * conservative and not introduce a big latency on huge systems, so go with
 * a less aggressive log scale. It will still be an improvement over the old
 * code, and it will be simple to change the scale factor if we find that it
 * becomes a problem on bigger systems.
 */
static unsigned long lazy_max_pages(void)
{
        unsigned int log;

        log = fls(num_online_cpus());

        return log * (32UL * 1024 * 1024 / PAGE_SIZE);
}

static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);

/*
 * Serialize vmap purging.  There is no actual critical section protected
 * by this lock, but we want to avoid concurrent calls for performance
 * reasons and to make the pcpu_get_vm_areas more deterministic.
 */
static DEFINE_MUTEX(vmap_purge_lock);

/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
static cpumask_t purge_nodes;

static void
reclaim_list_global(struct list_head *head)
{
        struct vmap_area *va, *n;

        if (list_empty(head))
                return;

        spin_lock(&free_vmap_area_lock);
        list_for_each_entry_safe(va, n, head, list)
                merge_or_add_vmap_area_augment(va,
                        &free_vmap_area_root, &free_vmap_area_list);
        spin_unlock(&free_vmap_area_lock);
}

static void
decay_va_pool_node(struct vmap_node *vn, bool full_decay)
{
        struct vmap_area *va, *nva;
        struct list_head decay_list;
        struct rb_root decay_root;
        unsigned long n_decay;
        int i;

        decay_root = RB_ROOT;
        INIT_LIST_HEAD(&decay_list);

        for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
                struct list_head tmp_list;

                if (list_empty(&vn->pool[i].head))
                        continue;

                INIT_LIST_HEAD(&tmp_list);

                /* Detach the pool, so no-one can access it. */
                spin_lock(&vn->pool_lock);
                list_replace_init(&vn->pool[i].head, &tmp_list);
                spin_unlock(&vn->pool_lock);

                if (full_decay)
                        WRITE_ONCE(vn->pool[i].len, 0);

                /* Decay a pool by ~25% out of left objects. */
                n_decay = vn->pool[i].len >> 2;

                list_for_each_entry_safe(va, nva, &tmp_list, list) {
                        list_del_init(&va->list);
                        merge_or_add_vmap_area(va, &decay_root, &decay_list);

                        if (!full_decay) {
                                WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1);

                                if (!--n_decay)
                                        break;
                        }
                }

                /*
                 * Attach the pool back if it has been partly decayed.
                 * Please note, it is supposed that nobody(other contexts)
                 * can populate the pool therefore a simple list replace
                 * operation takes place here.
                 */
                if (!full_decay && !list_empty(&tmp_list)) {
                        spin_lock(&vn->pool_lock);
                        list_replace_init(&tmp_list, &vn->pool[i].head);
                        spin_unlock(&vn->pool_lock);
                }
        }

        reclaim_list_global(&decay_list);
}

static void purge_vmap_node(struct work_struct *work)
{
        struct vmap_node *vn = container_of(work,
                struct vmap_node, purge_work);
        struct vmap_area *va, *n_va;
        LIST_HEAD(local_list);

        vn->nr_purged = 0;

        list_for_each_entry_safe(va, n_va, &vn->purge_list, list) {
                unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
                unsigned long orig_start = va->va_start;
                unsigned long orig_end = va->va_end;
                unsigned int vn_id = decode_vn_id(va->flags);

                list_del_init(&va->list);

                if (is_vmalloc_or_module_addr((void *)orig_start))
                        kasan_release_vmalloc(orig_start, orig_end,
                                              va->va_start, va->va_end);

                atomic_long_sub(nr, &vmap_lazy_nr);
                vn->nr_purged++;

                if (is_vn_id_valid(vn_id) && !vn->skip_populate)
                        if (node_pool_add_va(vn, va))
                                continue;

                /* Go back to global. */
                list_add(&va->list, &local_list);
        }

        reclaim_list_global(&local_list);
}

/*
 * Purges all lazily-freed vmap areas.
 */
static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
                bool full_pool_decay)
{
        unsigned long nr_purged_areas = 0;
        unsigned int nr_purge_helpers;
        unsigned int nr_purge_nodes;
        struct vmap_node *vn;
        int i;

        lockdep_assert_held(&vmap_purge_lock);

        /*
         * Use cpumask to mark which node has to be processed.
         */
        purge_nodes = CPU_MASK_NONE;

        for (i = 0; i < nr_vmap_nodes; i++) {
                vn = &vmap_nodes[i];

                INIT_LIST_HEAD(&vn->purge_list);
                vn->skip_populate = full_pool_decay;
                decay_va_pool_node(vn, full_pool_decay);

                if (RB_EMPTY_ROOT(&vn->lazy.root))
                        continue;

                spin_lock(&vn->lazy.lock);
                WRITE_ONCE(vn->lazy.root.rb_node, NULL);
                list_replace_init(&vn->lazy.head, &vn->purge_list);
                spin_unlock(&vn->lazy.lock);

                start = min(start, list_first_entry(&vn->purge_list,
                        struct vmap_area, list)->va_start);

                end = max(end, list_last_entry(&vn->purge_list,
                        struct vmap_area, list)->va_end);

                cpumask_set_cpu(i, &purge_nodes);
        }

        nr_purge_nodes = cpumask_weight(&purge_nodes);
        if (nr_purge_nodes > 0) {
                flush_tlb_kernel_range(start, end);

                /* One extra worker is per a lazy_max_pages() full set minus one. */
                nr_purge_helpers = atomic_long_read(&vmap_lazy_nr) / lazy_max_pages();
                nr_purge_helpers = clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1;

                for_each_cpu(i, &purge_nodes) {
                        vn = &vmap_nodes[i];

                        if (nr_purge_helpers > 0) {
                                INIT_WORK(&vn->purge_work, purge_vmap_node);

                                if (cpumask_test_cpu(i, cpu_online_mask))
                                        schedule_work_on(i, &vn->purge_work);
                                else
                                        schedule_work(&vn->purge_work);

                                nr_purge_helpers--;
                        } else {
                                vn->purge_work.func = NULL;
                                purge_vmap_node(&vn->purge_work);
                                nr_purged_areas += vn->nr_purged;
                        }
                }

                for_each_cpu(i, &purge_nodes) {
                        vn = &vmap_nodes[i];

                        if (vn->purge_work.func) {
                                flush_work(&vn->purge_work);
                                nr_purged_areas += vn->nr_purged;
                        }
                }
        }

        trace_purge_vmap_area_lazy(start, end, nr_purged_areas);
        return nr_purged_areas > 0;
}

/*
 * Reclaim vmap areas by purging fragmented blocks and purge_vmap_area_list.
 */
static void reclaim_and_purge_vmap_areas(void)

{
        mutex_lock(&vmap_purge_lock);
        purge_fragmented_blocks_allcpus();
        __purge_vmap_area_lazy(ULONG_MAX, 0, true);
        mutex_unlock(&vmap_purge_lock);
}

static void drain_vmap_area_work(struct work_struct *work)
{
        mutex_lock(&vmap_purge_lock);
        __purge_vmap_area_lazy(ULONG_MAX, 0, false);
        mutex_unlock(&vmap_purge_lock);
}

/*
 * Free a vmap area, caller ensuring that the area has been unmapped,
 * unlinked and flush_cache_vunmap had been called for the correct
 * range previously.
 */
static void free_vmap_area_noflush(struct vmap_area *va)
{
        unsigned long nr_lazy_max = lazy_max_pages();
        unsigned long va_start = va->va_start;
        unsigned int vn_id = decode_vn_id(va->flags);
        struct vmap_node *vn;
        unsigned long nr_lazy;

        if (WARN_ON_ONCE(!list_empty(&va->list)))
                return;

        nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
                                PAGE_SHIFT, &vmap_lazy_nr);

        /*
         * If it was request by a certain node we would like to
         * return it to that node, i.e. its pool for later reuse.
         */
        vn = is_vn_id_valid(vn_id) ?
                id_to_node(vn_id):addr_to_node(va->va_start);

        spin_lock(&vn->lazy.lock);
        insert_vmap_area(va, &vn->lazy.root, &vn->lazy.head);
        spin_unlock(&vn->lazy.lock);

        trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);

        /* After this point, we may free va at any time */
        if (unlikely(nr_lazy > nr_lazy_max))
                schedule_work(&drain_vmap_work);
}

/*
 * Free and unmap a vmap area
 */
static void free_unmap_vmap_area(struct vmap_area *va)
{
        flush_cache_vunmap(va->va_start, va->va_end);
        vunmap_range_noflush(va->va_start, va->va_end);
        if (debug_pagealloc_enabled_static())
                flush_tlb_kernel_range(va->va_start, va->va_end);

        free_vmap_area_noflush(va);
}

struct vmap_area *find_vmap_area(unsigned long addr)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        int i, j;

        if (unlikely(!vmap_initialized))
                return NULL;

        /*
         * An addr_to_node_id(addr) converts an address to a node index
         * where a VA is located. If VA spans several zones and passed
         * addr is not the same as va->va_start, what is not common, we
         * may need to scan extra nodes. See an example:
         *
         *      <----va---->
         * -|-----|-----|-----|-----|-
         *     1     2     0     1
         *
         * VA resides in node 1 whereas it spans 1, 2 an 0. If passed
         * addr is within 2 or 0 nodes we should do extra work.
         */
        i = j = addr_to_node_id(addr);
        do {
                vn = &vmap_nodes[i];

                spin_lock(&vn->busy.lock);
                va = __find_vmap_area(addr, &vn->busy.root);
                spin_unlock(&vn->busy.lock);

                if (va)
                        return va;
        } while ((i = (i + 1) % nr_vmap_nodes) != j);

        return NULL;
}

static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        int i, j;

        /*
         * Check the comment in the find_vmap_area() about the loop.
         */
        i = j = addr_to_node_id(addr);
        do {
                vn = &vmap_nodes[i];

                spin_lock(&vn->busy.lock);
                va = __find_vmap_area(addr, &vn->busy.root);
                if (va)
                        unlink_va(va, &vn->busy.root);
                spin_unlock(&vn->busy.lock);

                if (va)
                        return va;
        } while ((i = (i + 1) % nr_vmap_nodes) != j);

        return NULL;
}

/*** Per cpu kva allocator ***/

/*
 * vmap space is limited especially on 32 bit architectures. Ensure there is
 * room for at least 16 percpu vmap blocks per CPU.
 */
/*
 * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
 * to #define VMALLOC_SPACE                (VMALLOC_END-VMALLOC_START). Guess
 * instead (we just need a rough idea)
 */
#if BITS_PER_LONG == 32
#define VMALLOC_SPACE                (128UL*1024*1024)
#else
#define VMALLOC_SPACE                (128UL*1024*1024*1024)
#endif

#define VMALLOC_PAGES                (VMALLOC_SPACE / PAGE_SIZE)
#define VMAP_MAX_ALLOC                BITS_PER_LONG        /* 256K with 4K pages */
#define VMAP_BBMAP_BITS_MAX        1024        /* 4MB with 4K pages */
#define VMAP_BBMAP_BITS_MIN        (VMAP_MAX_ALLOC*2)
#define VMAP_MIN(x, y)                ((x) < (y) ? (x) : (y)) /* can't use min() */
#define VMAP_MAX(x, y)                ((x) > (y) ? (x) : (y)) /* can't use max() */
#define VMAP_BBMAP_BITS                \
                VMAP_MIN(VMAP_BBMAP_BITS_MAX,        \
                VMAP_MAX(VMAP_BBMAP_BITS_MIN,        \
                        VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))

#define VMAP_BLOCK_SIZE                (VMAP_BBMAP_BITS * PAGE_SIZE)

/*
 * Purge threshold to prevent overeager purging of fragmented blocks for
 * regular operations: Purge if vb->free is less than 1/4 of the capacity.
 */
#define VMAP_PURGE_THRESHOLD        (VMAP_BBMAP_BITS / 4)

#define VMAP_RAM                0x1 /* indicates vm_map_ram area*/
#define VMAP_BLOCK                0x2 /* mark out the vmap_block sub-type*/
#define VMAP_FLAGS_MASK                0x3

struct vmap_block_queue {
        spinlock_t lock;
        struct list_head free;

        /*
         * An xarray requires an extra memory dynamically to
         * be allocated. If it is an issue, we can use rb-tree
         * instead.
         */
        struct xarray vmap_blocks;
};

struct vmap_block {
        spinlock_t lock;
        struct vmap_area *va;
        unsigned long free, dirty;
        DECLARE_BITMAP(used_map, VMAP_BBMAP_BITS);
        unsigned long dirty_min, dirty_max; /*< dirty range */
        struct list_head free_list;
        struct rcu_head rcu_head;
        struct list_head purge;
};

/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);

/*
 * In order to fast access to any "vmap_block" associated with a
 * specific address, we use a hash.
 *
 * A per-cpu vmap_block_queue is used in both ways, to serialize
 * an access to free block chains among CPUs(alloc path) and it
 * also acts as a vmap_block hash(alloc/free paths). It means we
 * overload it, since we already have the per-cpu array which is
 * used as a hash table. When used as a hash a 'cpu' passed to
 * per_cpu() is not actually a CPU but rather a hash index.
 *
 * A hash function is addr_to_vb_xa() which hashes any address
 * to a specific index(in a hash) it belongs to. This then uses a
 * per_cpu() macro to access an array with generated index.
 *
 * An example:
 *
 *  CPU_1  CPU_2  CPU_0
 *    |      |      |
 *    V      V      V
 * 0     10     20     30     40     50     60
 * |------|------|------|------|------|------|...<vmap address space>
 *   CPU0   CPU1   CPU2   CPU0   CPU1   CPU2
 *
 * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
 *   it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
 *
 * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
 *   it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
 *
 * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
 *   it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
 *
 * This technique almost always avoids lock contention on insert/remove,
 * however xarray spinlocks protect against any contention that remains.
 */
static struct xarray *
addr_to_vb_xa(unsigned long addr)
{
        int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();

        return &per_cpu(vmap_block_queue, index).vmap_blocks;
}

/*
 * We should probably have a fallback mechanism to allocate virtual memory
 * out of partially filled vmap blocks. However vmap block sizing should be
 * fairly reasonable according to the vmalloc size, so it shouldn't be a
 * big problem.
 */

static unsigned long addr_to_vb_idx(unsigned long addr)
{
        addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
        addr /= VMAP_BLOCK_SIZE;
        return addr;
}

static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
{
        unsigned long addr;

        addr = va_start + (pages_off << PAGE_SHIFT);
        BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
        return (void *)addr;
}

/**
 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
 *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
 * @order:    how many 2^order pages should be occupied in newly allocated block
 * @gfp_mask: flags for the page level allocator
 *
 * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
 */
static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
{
        struct vmap_block_queue *vbq;
        struct vmap_block *vb;
        struct vmap_area *va;
        struct xarray *xa;
        unsigned long vb_idx;
        int node, err;
        void *vaddr;

        node = numa_node_id();

        vb = kmalloc_node(sizeof(struct vmap_block),
                        gfp_mask & GFP_RECLAIM_MASK, node);
        if (unlikely(!vb))
                return ERR_PTR(-ENOMEM);

        va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
                                        VMALLOC_START, VMALLOC_END,
                                        node, gfp_mask,
                                        VMAP_RAM|VMAP_BLOCK);
        if (IS_ERR(va)) {
                kfree(vb);
                return ERR_CAST(va);
        }

        vaddr = vmap_block_vaddr(va->va_start, 0);
        spin_lock_init(&vb->lock);
        vb->va = va;
        /* At least something should be left free */
        BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
        bitmap_zero(vb->used_map, VMAP_BBMAP_BITS);
        vb->free = VMAP_BBMAP_BITS - (1UL << order);
        vb->dirty = 0;
        vb->dirty_min = VMAP_BBMAP_BITS;
        vb->dirty_max = 0;
        bitmap_set(vb->used_map, 0, (1UL << order));
        INIT_LIST_HEAD(&vb->free_list);

        xa = addr_to_vb_xa(va->va_start);
        vb_idx = addr_to_vb_idx(va->va_start);
        err = xa_insert(xa, vb_idx, vb, gfp_mask);
        if (err) {
                kfree(vb);
                free_vmap_area(va);
                return ERR_PTR(err);
        }

        vbq = raw_cpu_ptr(&vmap_block_queue);
        spin_lock(&vbq->lock);
        list_add_tail_rcu(&vb->free_list, &vbq->free);
        spin_unlock(&vbq->lock);

        return vaddr;
}

static void free_vmap_block(struct vmap_block *vb)
{
        struct vmap_node *vn;
        struct vmap_block *tmp;
        struct xarray *xa;

        xa = addr_to_vb_xa(vb->va->va_start);
        tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start));
        BUG_ON(tmp != vb);

        vn = addr_to_node(vb->va->va_start);
        spin_lock(&vn->busy.lock);
        unlink_va(vb->va, &vn->busy.root);
        spin_unlock(&vn->busy.lock);

        free_vmap_area_noflush(vb->va);
        kfree_rcu(vb, rcu_head);
}

static bool purge_fragmented_block(struct vmap_block *vb,
                struct vmap_block_queue *vbq, struct list_head *purge_list,
                bool force_purge)
{
        if (vb->free + vb->dirty != VMAP_BBMAP_BITS ||
            vb->dirty == VMAP_BBMAP_BITS)
                return false;

        /* Don't overeagerly purge usable blocks unless requested */
        if (!(force_purge || vb->free < VMAP_PURGE_THRESHOLD))
                return false;

        /* prevent further allocs after releasing lock */
        WRITE_ONCE(vb->free, 0);
        /* prevent purging it again */
        WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS);
        vb->dirty_min = 0;
        vb->dirty_max = VMAP_BBMAP_BITS;
        spin_lock(&vbq->lock);
        list_del_rcu(&vb->free_list);
        spin_unlock(&vbq->lock);
        list_add_tail(&vb->purge, purge_list);
        return true;
}

static void free_purged_blocks(struct list_head *purge_list)
{
        struct vmap_block *vb, *n_vb;

        list_for_each_entry_safe(vb, n_vb, purge_list, purge) {
                list_del(&vb->purge);
                free_vmap_block(vb);
        }
}

static void purge_fragmented_blocks(int cpu)
{
        LIST_HEAD(purge);
        struct vmap_block *vb;
        struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);

        rcu_read_lock();
        list_for_each_entry_rcu(vb, &vbq->free, free_list) {
                unsigned long free = READ_ONCE(vb->free);
                unsigned long dirty = READ_ONCE(vb->dirty);

                if (free + dirty != VMAP_BBMAP_BITS ||
                    dirty == VMAP_BBMAP_BITS)
                        continue;

                spin_lock(&vb->lock);
                purge_fragmented_block(vb, vbq, &purge, true);
                spin_unlock(&vb->lock);
        }
        rcu_read_unlock();
        free_purged_blocks(&purge);
}

static void purge_fragmented_blocks_allcpus(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                purge_fragmented_blocks(cpu);
}

static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{
        struct vmap_block_queue *vbq;
        struct vmap_block *vb;
        void *vaddr = NULL;
        unsigned int order;

        BUG_ON(offset_in_page(size));
        BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
        if (WARN_ON(size == 0)) {
                /*
                 * Allocating 0 bytes isn't what caller wants since
                 * get_order(0) returns funny result. Just warn and terminate
                 * early.
                 */
                return NULL;
        }
        order = get_order(size);

        rcu_read_lock();
        vbq = raw_cpu_ptr(&vmap_block_queue);
        list_for_each_entry_rcu(vb, &vbq->free, free_list) {
                unsigned long pages_off;

                if (READ_ONCE(vb->free) < (1UL << order))
                        continue;

                spin_lock(&vb->lock);
                if (vb->free < (1UL << order)) {
                        spin_unlock(&vb->lock);
                        continue;
                }

                pages_off = VMAP_BBMAP_BITS - vb->free;
                vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
                WRITE_ONCE(vb->free, vb->free - (1UL << order));
                bitmap_set(vb->used_map, pages_off, (1UL << order));
                if (vb->free == 0) {
                        spin_lock(&vbq->lock);
                        list_del_rcu(&vb->free_list);
                        spin_unlock(&vbq->lock);
                }

                spin_unlock(&vb->lock);
                break;
        }

        rcu_read_unlock();

        /* Allocate new block if nothing was found */
        if (!vaddr)
                vaddr = new_vmap_block(order, gfp_mask);

        return vaddr;
}

static void vb_free(unsigned long addr, unsigned long size)
{
        unsigned long offset;
        unsigned int order;
        struct vmap_block *vb;
        struct xarray *xa;

        BUG_ON(offset_in_page(size));
        BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);

        flush_cache_vunmap(addr, addr + size);

        order = get_order(size);
        offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;

        xa = addr_to_vb_xa(addr);
        vb = xa_load(xa, addr_to_vb_idx(addr));

        spin_lock(&vb->lock);
        bitmap_clear(vb->used_map, offset, (1UL << order));
        spin_unlock(&vb->lock);

        vunmap_range_noflush(addr, addr + size);

        if (debug_pagealloc_enabled_static())
                flush_tlb_kernel_range(addr, addr + size);

        spin_lock(&vb->lock);

        /* Expand the not yet TLB flushed dirty range */
        vb->dirty_min = min(vb->dirty_min, offset);
        vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));

        WRITE_ONCE(vb->dirty, vb->dirty + (1UL << order));
        if (vb->dirty == VMAP_BBMAP_BITS) {
                BUG_ON(vb->free);
                spin_unlock(&vb->lock);
                free_vmap_block(vb);
        } else
                spin_unlock(&vb->lock);
}

static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
{
        LIST_HEAD(purge_list);
        int cpu;

        if (unlikely(!vmap_initialized))
                return;

        mutex_lock(&vmap_purge_lock);

        for_each_possible_cpu(cpu) {
                struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
                struct vmap_block *vb;
                unsigned long idx;

                rcu_read_lock();
                xa_for_each(&vbq->vmap_blocks, idx, vb) {
                        spin_lock(&vb->lock);

                        /*
                         * Try to purge a fragmented block first. If it's
                         * not purgeable, check whether there is dirty
                         * space to be flushed.
                         */
                        if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
                            vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
                                unsigned long va_start = vb->va->va_start;
                                unsigned long s, e;

                                s = va_start + (vb->dirty_min << PAGE_SHIFT);
                                e = va_start + (vb->dirty_max << PAGE_SHIFT);

                                start = min(s, start);
                                end   = max(e, end);

                                /* Prevent that this is flushed again */
                                vb->dirty_min = VMAP_BBMAP_BITS;
                                vb->dirty_max = 0;

                                flush = 1;
                        }
                        spin_unlock(&vb->lock);
                }
                rcu_read_unlock();
        }
        free_purged_blocks(&purge_list);

        if (!__purge_vmap_area_lazy(start, end, false) && flush)
                flush_tlb_kernel_range(start, end);
        mutex_unlock(&vmap_purge_lock);
}

/**
 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
 *
 * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
 * to amortize TLB flushing overheads. What this means is that any page you
 * have now, may, in a former life, have been mapped into kernel virtual
 * address by the vmap layer and so there might be some CPUs with TLB entries
 * still referencing that page (additional to the regular 1:1 kernel mapping).
 *
 * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
 * be sure that none of the pages we have control over will have any aliases
 * from the vmap layer.
 */
void vm_unmap_aliases(void)
{
        unsigned long start = ULONG_MAX, end = 0;
        int flush = 0;

        _vm_unmap_aliases(start, end, flush);
}
EXPORT_SYMBOL_GPL(vm_unmap_aliases);

/**
 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
 * @mem: the pointer returned by vm_map_ram
 * @count: the count passed to that vm_map_ram call (cannot unmap partial)
 */
void vm_unmap_ram(const void *mem, unsigned int count)
{
        unsigned long size = (unsigned long)count << PAGE_SHIFT;
        unsigned long addr = (unsigned long)kasan_reset_tag(mem);
        struct vmap_area *va;

        might_sleep();
        BUG_ON(!addr);
        BUG_ON(addr < VMALLOC_START);
        BUG_ON(addr > VMALLOC_END);
        BUG_ON(!PAGE_ALIGNED(addr));

        kasan_poison_vmalloc(mem, size);

        if (likely(count <= VMAP_MAX_ALLOC)) {
                debug_check_no_locks_freed(mem, size);
                vb_free(addr, size);
                return;
        }

        va = find_unlink_vmap_area(addr);
        if (WARN_ON_ONCE(!va))
                return;

        debug_check_no_locks_freed((void *)va->va_start,
                                    (va->va_end - va->va_start));
        free_unmap_vmap_area(va);
}
EXPORT_SYMBOL(vm_unmap_ram);

/**
 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
 * @pages: an array of pointers to the pages to be mapped
 * @count: number of pages
 * @node: prefer to allocate data structures on this node
 *
 * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
 * faster than vmap so it's good.  But if you mix long-life and short-life
 * objects with vm_map_ram(), it could consume lots of address space through
 * fragmentation (especially on a 32bit machine).  You could see failures in
 * the end.  Please use this function for short-lived objects.
 *
 * Returns: a pointer to the address that has been mapped, or %NULL on failure
 */
void *vm_map_ram(struct page **pages, unsigned int count, int node)
{
        unsigned long size = (unsigned long)count << PAGE_SHIFT;
        unsigned long addr;
        void *mem;

        if (likely(count <= VMAP_MAX_ALLOC)) {
                mem = vb_alloc(size, GFP_KERNEL);
                if (IS_ERR(mem))
                        return NULL;
                addr = (unsigned long)mem;
        } else {
                struct vmap_area *va;
                va = alloc_vmap_area(size, PAGE_SIZE,
                                VMALLOC_START, VMALLOC_END,
                                node, GFP_KERNEL, VMAP_RAM);
                if (IS_ERR(va))
                        return NULL;

                addr = va->va_start;
                mem = (void *)addr;
        }

        if (vmap_pages_range(addr, addr + size, PAGE_KERNEL,
                                pages, PAGE_SHIFT) < 0) {
                vm_unmap_ram(mem, count);
                return NULL;
        }

        /*
         * Mark the pages as accessible, now that they are mapped.
         * With hardware tag-based KASAN, marking is skipped for
         * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
         */
        mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL);

        return mem;
}
EXPORT_SYMBOL(vm_map_ram);

static struct vm_struct *vmlist __initdata;

static inline unsigned int vm_area_page_order(struct vm_struct *vm)
{
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
        return vm->page_order;
#else
        return 0;
#endif
}

static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order)
{
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
        vm->page_order = order;
#else
        BUG_ON(order != 0);
#endif
}

/**
 * vm_area_add_early - add vmap area early during boot
 * @vm: vm_struct to add
 *
 * This function is used to add fixed kernel vm area to vmlist before
 * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
 * should contain proper values and the other fields should be zero.
 *
 * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
 */
void __init vm_area_add_early(struct vm_struct *vm)
{
        struct vm_struct *tmp, **p;

        BUG_ON(vmap_initialized);
        for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
                if (tmp->addr >= vm->addr) {
                        BUG_ON(tmp->addr < vm->addr + vm->size);
                        break;
                } else
                        BUG_ON(tmp->addr + tmp->size > vm->addr);
        }
        vm->next = *p;
        *p = vm;
}

/**
 * vm_area_register_early - register vmap area early during boot
 * @vm: vm_struct to register
 * @align: requested alignment
 *
 * This function is used to register kernel vm area before
 * vmalloc_init() is called.  @vm->size and @vm->flags should contain
 * proper values on entry and other fields should be zero.  On return,
 * vm->addr contains the allocated address.
 *
 * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
 */
void __init vm_area_register_early(struct vm_struct *vm, size_t align)
{
        unsigned long addr = ALIGN(VMALLOC_START, align);
        struct vm_struct *cur, **p;

        BUG_ON(vmap_initialized);

        for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) {
                if ((unsigned long)cur->addr - addr >= vm->size)
                        break;
                addr = ALIGN((unsigned long)cur->addr + cur->size, align);
        }

        BUG_ON(addr > VMALLOC_END - vm->size);
        vm->addr = (void *)addr;
        vm->next = *p;
        *p = vm;
        kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
}

static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
        struct vmap_area *va, unsigned long flags, const void *caller)
{
        vm->flags = flags;
        vm->addr = (void *)va->va_start;
        vm->size = va->va_end - va->va_start;
        vm->caller = caller;
        va->vm = vm;
}

static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
                              unsigned long flags, const void *caller)
{
        struct vmap_node *vn = addr_to_node(va->va_start);

        spin_lock(&vn->busy.lock);
        setup_vmalloc_vm_locked(vm, va, flags, caller);
        spin_unlock(&vn->busy.lock);
}

static void clear_vm_uninitialized_flag(struct vm_struct *vm)
{
        /*
         * Before removing VM_UNINITIALIZED,
         * we should make sure that vm has proper values.
         * Pair with smp_rmb() in show_numa_info().
         */
        smp_wmb();
        vm->flags &= ~VM_UNINITIALIZED;
}

static struct vm_struct *__get_vm_area_node(unsigned long size,
                unsigned long align, unsigned long shift, unsigned long flags,
                unsigned long start, unsigned long end, int node,
                gfp_t gfp_mask, const void *caller)
{
        struct vmap_area *va;
        struct vm_struct *area;
        unsigned long requested_size = size;

        BUG_ON(in_interrupt());
        size = ALIGN(size, 1ul << shift);
        if (unlikely(!size))
                return NULL;

        if (flags & VM_IOREMAP)
                align = 1ul << clamp_t(int, get_count_order_long(size),
                                       PAGE_SHIFT, IOREMAP_MAX_ORDER);

        area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
        if (unlikely(!area))
                return NULL;

        if (!(flags & VM_NO_GUARD))
                size += PAGE_SIZE;

        va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0);
        if (IS_ERR(va)) {
                kfree(area);
                return NULL;
        }

        setup_vmalloc_vm(area, va, flags, caller);

        /*
         * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a
         * best-effort approach, as they can be mapped outside of vmalloc code.
         * For VM_ALLOC mappings, the pages are marked as accessible after
         * getting mapped in __vmalloc_node_range().
         * With hardware tag-based KASAN, marking is skipped for
         * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
         */
        if (!(flags & VM_ALLOC))
                area->addr = kasan_unpoison_vmalloc(area->addr, requested_size,
                                                    KASAN_VMALLOC_PROT_NORMAL);

        return area;
}

struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
                                       unsigned long start, unsigned long end,
                                       const void *caller)
{
        return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
                                  NUMA_NO_NODE, GFP_KERNEL, caller);
}

/**
 * get_vm_area - reserve a contiguous kernel virtual area
 * @size:         size of the area
 * @flags:         %VM_IOREMAP for I/O mappings or VM_ALLOC
 *
 * Search an area of @size in the kernel virtual mapping area,
 * and reserved it for out purposes.  Returns the area descriptor
 * on success or %NULL on failure.
 *
 * Return: the area descriptor on success or %NULL on failure.
 */
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
        return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
                                  VMALLOC_START, VMALLOC_END,
                                  NUMA_NO_NODE, GFP_KERNEL,
                                  __builtin_return_address(0));
}

struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
                                const void *caller)
{
        return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
                                  VMALLOC_START, VMALLOC_END,
                                  NUMA_NO_NODE, GFP_KERNEL, caller);
}

/**
 * find_vm_area - find a continuous kernel virtual area
 * @addr:          base address
 *
 * Search for the kernel VM area starting at @addr, and return it.
 * It is up to the caller to do all required locking to keep the returned
 * pointer valid.
 *
 * Return: the area descriptor on success or %NULL on failure.
 */
struct vm_struct *find_vm_area(const void *addr)
{
        struct vmap_area *va;

        va = find_vmap_area((unsigned long)addr);
        if (!va)
                return NULL;

        return va->vm;
}

/**
 * remove_vm_area - find and remove a continuous kernel virtual area
 * @addr:            base address
 *
 * Search for the kernel VM area starting at @addr, and remove it.
 * This function returns the found VM area, but using it is NOT safe
 * on SMP machines, except for its size or flags.
 *
 * Return: the area descriptor on success or %NULL on failure.
 */
struct vm_struct *remove_vm_area(const void *addr)
{
        struct vmap_area *va;
        struct vm_struct *vm;

        might_sleep();

        if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
                        addr))
                return NULL;

        va = find_unlink_vmap_area((unsigned long)addr);
        if (!va || !va->vm)
                return NULL;
        vm = va->vm;

        debug_check_no_locks_freed(vm->addr, get_vm_area_size(vm));
        debug_check_no_obj_freed(vm->addr, get_vm_area_size(vm));
        kasan_free_module_shadow(vm);
        kasan_poison_vmalloc(vm->addr, get_vm_area_size(vm));

        free_unmap_vmap_area(va);
        return vm;
}

static inline void set_area_direct_map(const struct vm_struct *area,
                                       int (*set_direct_map)(struct page *page))
{
        int i;

        /* HUGE_VMALLOC passes small pages to set_direct_map */
        for (i = 0; i < area->nr_pages; i++)
                if (page_address(area->pages[i]))
                        set_direct_map(area->pages[i]);
}

/*
 * Flush the vm mapping and reset the direct map.
 */
static void vm_reset_perms(struct vm_struct *area)
{
        unsigned long start = ULONG_MAX, end = 0;
        unsigned int page_order = vm_area_page_order(area);
        int flush_dmap = 0;
        int i;

        /*
         * Find the start and end range of the direct mappings to make sure that
         * the vm_unmap_aliases() flush includes the direct map.
         */
        for (i = 0; i < area->nr_pages; i += 1U << page_order) {
                unsigned long addr = (unsigned long)page_address(area->pages[i]);

                if (addr) {
                        unsigned long page_size;

                        page_size = PAGE_SIZE << page_order;
                        start = min(addr, start);
                        end = max(addr + page_size, end);
                        flush_dmap = 1;
                }
        }

        /*
         * Set direct map to something invalid so that it won't be cached if
         * there are any accesses after the TLB flush, then flush the TLB and
         * reset the direct map permissions to the default.
         */
        set_area_direct_map(area, set_direct_map_invalid_noflush);
        _vm_unmap_aliases(start, end, flush_dmap);
        set_area_direct_map(area, set_direct_map_default_noflush);
}

static void delayed_vfree_work(struct work_struct *w)
{
        struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
        struct llist_node *t, *llnode;

        llist_for_each_safe(llnode, t, llist_del_all(&p->list))
                vfree(llnode);
}

/**
 * vfree_atomic - release memory allocated by vmalloc()
 * @addr:          memory base address
 *
 * This one is just like vfree() but can be called in any atomic context
 * except NMIs.
 */
void vfree_atomic(const void *addr)
{
        struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);

        BUG_ON(in_nmi());
        kmemleak_free(addr);

        /*
         * Use raw_cpu_ptr() because this can be called from preemptible
         * context. Preemption is absolutely fine here, because the llist_add()
         * implementation is lockless, so it works even if we are adding to
         * another cpu's list. schedule_work() should be fine with this too.
         */
        if (addr && llist_add((struct llist_node *)addr, &p->list))
                schedule_work(&p->wq);
}

/**
 * vfree - Release memory allocated by vmalloc()
 * @addr:  Memory base address
 *
 * Free the virtually continuous memory area starting at @addr, as obtained
 * from one of the vmalloc() family of APIs.  This will usually also free the
 * physical memory underlying the virtual allocation, but that memory is
 * reference counted, so it will not be freed until the last user goes away.
 *
 * If @addr is NULL, no operation is performed.
 *
 * Context:
 * May sleep if called *not* from interrupt context.
 * Must not be called in NMI context (strictly speaking, it could be
 * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
 * conventions for vfree() arch-dependent would be a really bad idea).
 */
void vfree(const void *addr)
{
        struct vm_struct *vm;
        int i;

        if (unlikely(in_interrupt())) {
                vfree_atomic(addr);
                return;
        }

        BUG_ON(in_nmi());
        kmemleak_free(addr);
        might_sleep();

        if (!addr)
                return;

        vm = remove_vm_area(addr);
        if (unlikely(!vm)) {
                WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
                                addr);
                return;
        }

        if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
                vm_reset_perms(vm);
        for (i = 0; i < vm->nr_pages; i++) {
                struct page *page = vm->pages[i];

                BUG_ON(!page);
                mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
                /*
                 * High-order allocs for huge vmallocs are split, so
                 * can be freed as an array of order-0 allocations
                 */
                __free_page(page);
                cond_resched();
        }
        atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
        kvfree(vm->pages);
        kfree(vm);
}
EXPORT_SYMBOL(vfree);

/**
 * vunmap - release virtual mapping obtained by vmap()
 * @addr:   memory base address
 *
 * Free the virtually contiguous memory area starting at @addr,
 * which was created from the page array passed to vmap().
 *
 * Must not be called in interrupt context.
 */
void vunmap(const void *addr)
{
        struct vm_struct *vm;

        BUG_ON(in_interrupt());
        might_sleep();

        if (!addr)
                return;
        vm = remove_vm_area(addr);
        if (unlikely(!vm)) {
                WARN(1, KERN_ERR "Trying to vunmap() nonexistent vm area (%p)\n",
                                addr);
                return;
        }
        kfree(vm);
}
EXPORT_SYMBOL(vunmap);

/**
 * vmap - map an array of pages into virtually contiguous space
 * @pages: array of page pointers
 * @count: number of pages to map
 * @flags: vm_area->flags
 * @prot: page protection for the mapping
 *
 * Maps @count pages from @pages into contiguous kernel virtual space.
 * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
 * (which must be kmalloc or vmalloc memory) and one reference per pages in it
 * are transferred from the caller to vmap(), and will be freed / dropped when
 * vfree() is called on the return value.
 *
 * Return: the address of the area or %NULL on failure
 */
void *vmap(struct page **pages, unsigned int count,
           unsigned long flags, pgprot_t prot)
{
        struct vm_struct *area;
        unsigned long addr;
        unsigned long size;                /* In bytes */

        might_sleep();

        if (WARN_ON_ONCE(flags & VM_FLUSH_RESET_PERMS))
                return NULL;

        /*
         * Your top guard is someone else's bottom guard. Not having a top
         * guard compromises someone else's mappings too.
         */
        if (WARN_ON_ONCE(flags & VM_NO_GUARD))
                flags &= ~VM_NO_GUARD;

        if (count > totalram_pages())
                return NULL;

        size = (unsigned long)count << PAGE_SHIFT;
        area = get_vm_area_caller(size, flags, __builtin_return_address(0));
        if (!area)
                return NULL;

        addr = (unsigned long)area->addr;
        if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
                                pages, PAGE_SHIFT) < 0) {
                vunmap(area->addr);
                return NULL;
        }

        if (flags & VM_MAP_PUT_PAGES) {
                area->pages = pages;
                area->nr_pages = count;
        }
        return area->addr;
}
EXPORT_SYMBOL(vmap);

#ifdef CONFIG_VMAP_PFN
struct vmap_pfn_data {
        unsigned long        *pfns;
        pgprot_t        prot;
        unsigned int        idx;
};

static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
{
        struct vmap_pfn_data *data = private;
        unsigned long pfn = data->pfns[data->idx];
        pte_t ptent;

        if (WARN_ON_ONCE(pfn_valid(pfn)))
                return -EINVAL;

        ptent = pte_mkspecial(pfn_pte(pfn, data->prot));
        set_pte_at(&init_mm, addr, pte, ptent);

        data->idx++;
        return 0;
}

/**
 * vmap_pfn - map an array of PFNs into virtually contiguous space
 * @pfns: array of PFNs
 * @count: number of pages to map
 * @prot: page protection for the mapping
 *
 * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
 * the start address of the mapping.
 */
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
{
        struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
        struct vm_struct *area;

        area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
                        __builtin_return_address(0));
        if (!area)
                return NULL;
        if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
                        count * PAGE_SIZE, vmap_pfn_apply, &data)) {
                free_vm_area(area);
                return NULL;
        }

        flush_cache_vmap((unsigned long)area->addr,
                         (unsigned long)area->addr + count * PAGE_SIZE);

        return area->addr;
}
EXPORT_SYMBOL_GPL(vmap_pfn);
#endif /* CONFIG_VMAP_PFN */

static inline unsigned int
vm_area_alloc_pages(gfp_t gfp, int nid,
                unsigned int order, unsigned int nr_pages, struct page **pages)
{
        unsigned int nr_allocated = 0;
        gfp_t alloc_gfp = gfp;
        bool nofail = false;
        struct page *page;
        int i;

        /*
         * For order-0 pages we make use of bulk allocator, if
         * the page array is partly or not at all populated due
         * to fails, fallback to a single page allocator that is
         * more permissive.
         */
        if (!order) {
                /* bulk allocator doesn't support nofail req. officially */
                gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL;

                while (nr_allocated < nr_pages) {
                        unsigned int nr, nr_pages_request;

                        /*
                         * A maximum allowed request is hard-coded and is 100
                         * pages per call. That is done in order to prevent a
                         * long preemption off scenario in the bulk-allocator
                         * so the range is [1:100].
                         */
                        nr_pages_request = min(100U, nr_pages - nr_allocated);

                        /* memory allocation should consider mempolicy, we can't
                         * wrongly use nearest node when nid == NUMA_NO_NODE,
                         * otherwise memory may be allocated in only one node,
                         * but mempolicy wants to alloc memory by interleaving.
                         */
                        if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
                                nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
                                                        nr_pages_request,
                                                        pages + nr_allocated);

                        else
                                nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
                                                        nr_pages_request,
                                                        pages + nr_allocated);

                        nr_allocated += nr;
                        cond_resched();

                        /*
                         * If zero or pages were obtained partly,
                         * fallback to a single page allocator.
                         */
                        if (nr != nr_pages_request)
                                break;
                }
        } else if (gfp & __GFP_NOFAIL) {
                /*
                 * Higher order nofail allocations are really expensive and
                 * potentially dangerous (pre-mature OOM, disruptive reclaim
                 * and compaction etc.
                 */
                alloc_gfp &= ~__GFP_NOFAIL;
                nofail = true;
        }

        /* High-order pages or fallback path if "bulk" fails. */
        while (nr_allocated < nr_pages) {
                if (fatal_signal_pending(current))
                        break;

                if (nid == NUMA_NO_NODE)
                        page = alloc_pages(alloc_gfp, order);
                else
                        page = alloc_pages_node(nid, alloc_gfp, order);
                if (unlikely(!page)) {
                        if (!nofail)
                                break;

                        /* fall back to the zero order allocations */
                        alloc_gfp |= __GFP_NOFAIL;
                        order = 0;
                        continue;
                }

                /*
                 * Higher order allocations must be able to be treated as
                 * indepdenent small pages by callers (as they can with
                 * small-page vmallocs). Some drivers do their own refcounting
                 * on vmalloc_to_page() pages, some use page->mapping,
                 * page->lru, etc.
                 */
                if (order)
                        split_page(page, order);

                /*
                 * Careful, we allocate and map page-order pages, but
                 * tracking is done per PAGE_SIZE page so as to keep the
                 * vm_struct APIs independent of the physical/mapped size.
                 */
                for (i = 0; i < (1U << order); i++)
                        pages[nr_allocated + i] = page + i;

                cond_resched();
                nr_allocated += 1U << order;
        }

        return nr_allocated;
}

static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                                 pgprot_t prot, unsigned int page_shift,
                                 int node)
{
        const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
        bool nofail = gfp_mask & __GFP_NOFAIL;
        unsigned long addr = (unsigned long)area->addr;
        unsigned long size = get_vm_area_size(area);
        unsigned long array_size;
        unsigned int nr_small_pages = size >> PAGE_SHIFT;
        unsigned int page_order;
        unsigned int flags;
        int ret;

        array_size = (unsigned long)nr_small_pages * sizeof(struct page *);

        if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
                gfp_mask |= __GFP_HIGHMEM;

        /* Please note that the recursion is strictly bounded. */
        if (array_size > PAGE_SIZE) {
                area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
                                        area->caller);
        } else {
                area->pages = kmalloc_node(array_size, nested_gfp, node);
        }

        if (!area->pages) {
                warn_alloc(gfp_mask, NULL,
                        "vmalloc error: size %lu, failed to allocated page array size %lu",
                        nr_small_pages * PAGE_SIZE, array_size);
                free_vm_area(area);
                return NULL;
        }

        set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
        page_order = vm_area_page_order(area);

        area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN,
                node, page_order, nr_small_pages, area->pages);

        atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
        if (gfp_mask & __GFP_ACCOUNT) {
                int i;

                for (i = 0; i < area->nr_pages; i++)
                        mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
        }

        /*
         * If not enough pages were obtained to accomplish an
         * allocation request, free them via vfree() if any.
         */
        if (area->nr_pages != nr_small_pages) {
                /*
                 * vm_area_alloc_pages() can fail due to insufficient memory but
                 * also:-
                 *
                 * - a pending fatal signal
                 * - insufficient huge page-order pages
                 *
                 * Since we always retry allocations at order-0 in the huge page
                 * case a warning for either is spurious.
                 */
                if (!fatal_signal_pending(current) && page_order == 0)
                        warn_alloc(gfp_mask, NULL,
                                "vmalloc error: size %lu, failed to allocate pages",
                                area->nr_pages * PAGE_SIZE);
                goto fail;
        }

        /*
         * page tables allocations ignore external gfp mask, enforce it
         * by the scope API
         */
        if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
                flags = memalloc_nofs_save();
        else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
                flags = memalloc_noio_save();

        do {
                ret = vmap_pages_range(addr, addr + size, prot, area->pages,
                        page_shift);
                if (nofail && (ret < 0))
                        schedule_timeout_uninterruptible(1);
        } while (nofail && (ret < 0));

        if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
                memalloc_nofs_restore(flags);
        else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
                memalloc_noio_restore(flags);

        if (ret < 0) {
                warn_alloc(gfp_mask, NULL,
                        "vmalloc error: size %lu, failed to map pages",
                        area->nr_pages * PAGE_SIZE);
                goto fail;
        }

        return area->addr;

fail:
        vfree(area->addr);
        return NULL;
}

/**
 * __vmalloc_node_range - allocate virtually contiguous memory
 * @size:                  allocation size
 * @align:                  desired alignment
 * @start:                  vm area range start
 * @end:                  vm area range end
 * @gfp_mask:                  flags for the page level allocator
 * @prot:                  protection mask for the allocated pages
 * @vm_flags:                  additional vm area flags (e.g. %VM_NO_GUARD)
 * @node:                  node to use for allocation or NUMA_NO_NODE
 * @caller:                  caller's return address
 *
 * Allocate enough pages to cover @size from the page level
 * allocator with @gfp_mask flags. Please note that the full set of gfp
 * flags are not supported. GFP_KERNEL, GFP_NOFS and GFP_NOIO are all
 * supported.
 * Zone modifiers are not supported. From the reclaim modifiers
 * __GFP_DIRECT_RECLAIM is required (aka GFP_NOWAIT is not supported)
 * and only __GFP_NOFAIL is supported (i.e. __GFP_NORETRY and
 * __GFP_RETRY_MAYFAIL are not supported).
 *
 * __GFP_NOWARN can be used to suppress failures messages.
 *
 * Map them into contiguous kernel virtual space, using a pagetable
 * protection of @prot.
 *
 * Return: the address of the area or %NULL on failure
 */
void *__vmalloc_node_range(unsigned long size, unsigned long align,
                        unsigned long start, unsigned long end, gfp_t gfp_mask,
                        pgprot_t prot, unsigned long vm_flags, int node,
                        const void *caller)
{
        struct vm_struct *area;
        void *ret;
        kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE;
        unsigned long real_size = size;
        unsigned long real_align = align;
        unsigned int shift = PAGE_SHIFT;

        if (WARN_ON_ONCE(!size))
                return NULL;

        if ((size >> PAGE_SHIFT) > totalram_pages()) {
                warn_alloc(gfp_mask, NULL,
                        "vmalloc error: size %lu, exceeds total pages",
                        real_size);
                return NULL;
        }

        if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) {
                unsigned long size_per_node;

                /*
                 * Try huge pages. Only try for PAGE_KERNEL allocations,
                 * others like modules don't yet expect huge pages in
                 * their allocations due to apply_to_page_range not
                 * supporting them.
                 */

                size_per_node = size;
                if (node == NUMA_NO_NODE)
                        size_per_node /= num_online_nodes();
                if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
                        shift = PMD_SHIFT;
                else
                        shift = arch_vmap_pte_supported_shift(size_per_node);

                align = max(real_align, 1UL << shift);
                size = ALIGN(real_size, 1UL << shift);
        }

again:
        area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
                                  VM_UNINITIALIZED | vm_flags, start, end, node,
                                  gfp_mask, caller);
        if (!area) {
                bool nofail = gfp_mask & __GFP_NOFAIL;
                warn_alloc(gfp_mask, NULL,
                        "vmalloc error: size %lu, vm_struct allocation failed%s",
                        real_size, (nofail) ? ". Retrying." : "");
                if (nofail) {
                        schedule_timeout_uninterruptible(1);
                        goto again;
                }
                goto fail;
        }

        /*
         * Prepare arguments for __vmalloc_area_node() and
         * kasan_unpoison_vmalloc().
         */
        if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
                if (kasan_hw_tags_enabled()) {
                        /*
                         * Modify protection bits to allow tagging.
                         * This must be done before mapping.
                         */
                        prot = arch_vmap_pgprot_tagged(prot);

                        /*
                         * Skip page_alloc poisoning and zeroing for physical
                         * pages backing VM_ALLOC mapping. Memory is instead
                         * poisoned and zeroed by kasan_unpoison_vmalloc().
                         */
                        gfp_mask |= __GFP_SKIP_KASAN | __GFP_SKIP_ZERO;
                }

                /* Take note that the mapping is PAGE_KERNEL. */
                kasan_flags |= KASAN_VMALLOC_PROT_NORMAL;
        }

        /* Allocate physical pages and map them into vmalloc space. */
        ret = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
        if (!ret)
                goto fail;

        /*
         * Mark the pages as accessible, now that they are mapped.
         * The condition for setting KASAN_VMALLOC_INIT should complement the
         * one in post_alloc_hook() with regards to the __GFP_SKIP_ZERO check
         * to make sure that memory is initialized under the same conditions.
         * Tag-based KASAN modes only assign tags to normal non-executable
         * allocations, see __kasan_unpoison_vmalloc().
         */
        kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
        if (!want_init_on_free() && want_init_on_alloc(gfp_mask) &&
            (gfp_mask & __GFP_SKIP_ZERO))
                kasan_flags |= KASAN_VMALLOC_INIT;
        /* KASAN_VMALLOC_PROT_NORMAL already set if required. */
        area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);

        /*
         * In this function, newly allocated vm_struct has VM_UNINITIALIZED
         * flag. It means that vm_struct is not fully initialized.
         * Now, it is fully initialized, so remove this flag here.
         */
        clear_vm_uninitialized_flag(area);

        size = PAGE_ALIGN(size);
        if (!(vm_flags & VM_DEFER_KMEMLEAK))
                kmemleak_vmalloc(area, size, gfp_mask);

        return area->addr;

fail:
        if (shift > PAGE_SHIFT) {
                shift = PAGE_SHIFT;
                align = real_align;
                size = real_size;
                goto again;
        }

        return NULL;
}

/**
 * __vmalloc_node - allocate virtually contiguous memory
 * @size:            allocation size
 * @align:            desired alignment
 * @gfp_mask:            flags for the page level allocator
 * @node:            node to use for allocation or NUMA_NO_NODE
 * @caller:            caller's return address
 *
 * Allocate enough pages to cover @size from the page level allocator with
 * @gfp_mask flags.  Map them into contiguous kernel virtual space.
 *
 * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
 * and __GFP_NOFAIL are not supported
 *
 * Any use of gfp flags outside of GFP_KERNEL should be consulted
 * with mm people.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *__vmalloc_node(unsigned long size, unsigned long align,
                            gfp_t gfp_mask, int node, const void *caller)
{
        return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
                                gfp_mask, PAGE_KERNEL, 0, node, caller);
}
/*
 * This is only for performance analysis of vmalloc and stress purpose.
 * It is required by vmalloc test module, therefore do not use it other
 * than that.
 */
#ifdef CONFIG_TEST_VMALLOC_MODULE
EXPORT_SYMBOL_GPL(__vmalloc_node);
#endif

void *__vmalloc(unsigned long size, gfp_t gfp_mask)
{
        return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
                                __builtin_return_address(0));
}
EXPORT_SYMBOL(__vmalloc);

/**
 * vmalloc - allocate virtually contiguous memory
 * @size:    allocation size
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 *
 * For tight control over page level allocator and protection flags
 * use __vmalloc() instead.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc(unsigned long size)
{
        return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
                                __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc);

/**
 * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
 * @size:      allocation size
 * @gfp_mask:  flags for the page level allocator
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 * If @size is greater than or equal to PMD_SIZE, allow using
 * huge pages for the memory
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
{
        return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
                                    gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
                                    NUMA_NO_NODE, __builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(vmalloc_huge);

/**
 * vzalloc - allocate virtually contiguous memory with zero fill
 * @size:    allocation size
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 * The memory allocated is set to zero.
 *
 * For tight control over page level allocator and protection flags
 * use __vmalloc() instead.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vzalloc(unsigned long size)
{
        return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
                                __builtin_return_address(0));
}
EXPORT_SYMBOL(vzalloc);

/**
 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
 * @size: allocation size
 *
 * The resulting memory area is zeroed so it can be mapped to userspace
 * without leaking data.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc_user(unsigned long size)
{
        return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
                                    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
                                    VM_USERMAP, NUMA_NO_NODE,
                                    __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_user);

/**
 * vmalloc_node - allocate memory on a specific node
 * @size:          allocation size
 * @node:          numa node
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 *
 * For tight control over page level allocator and protection flags
 * use __vmalloc() instead.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc_node(unsigned long size, int node)
{
        return __vmalloc_node(size, 1, GFP_KERNEL, node,
                        __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);

/**
 * vzalloc_node - allocate memory on a specific node with zero fill
 * @size:        allocation size
 * @node:        numa node
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 * The memory allocated is set to zero.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vzalloc_node(unsigned long size, int node)
{
        return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
                                __builtin_return_address(0));
}
EXPORT_SYMBOL(vzalloc_node);

#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
#else
/*
 * 64b systems should always have either DMA or DMA32 zones. For others
 * GFP_DMA32 should do the right thing and use the normal zone.
 */
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#endif

/**
 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
 * @size:        allocation size
 *
 * Allocate enough 32bit PA addressable pages to cover @size from the
 * page level allocator and map them into contiguous kernel virtual space.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc_32(unsigned long size)
{
        return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
                        __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_32);

/**
 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
 * @size:             allocation size
 *
 * The resulting memory area is 32bit addressable and zeroed so it can be
 * mapped to userspace without leaking data.
 *
 * Return: pointer to the allocated memory or %NULL on error
 */
void *vmalloc_32_user(unsigned long size)
{
        return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
                                    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
                                    VM_USERMAP, NUMA_NO_NODE,
                                    __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_32_user);

/*
 * Atomically zero bytes in the iterator.
 *
 * Returns the number of zeroed bytes.
 */
static size_t zero_iter(struct iov_iter *iter, size_t count)
{
        size_t remains = count;

        while (remains > 0) {
                size_t num, copied;

                num = min_t(size_t, remains, PAGE_SIZE);
                copied = copy_page_to_iter_nofault(ZERO_PAGE(0), 0, num, iter);
                remains -= copied;

                if (copied < num)
                        break;
        }

        return count - remains;
}

/*
 * small helper routine, copy contents to iter from addr.
 * If the page is not present, fill zero.
 *
 * Returns the number of copied bytes.
 */
static size_t aligned_vread_iter(struct iov_iter *iter,
                                 const char *addr, size_t count)
{
        size_t remains = count;
        struct page *page;

        while (remains > 0) {
                unsigned long offset, length;
                size_t copied = 0;

                offset = offset_in_page(addr);
                length = PAGE_SIZE - offset;
                if (length > remains)
                        length = remains;
                page = vmalloc_to_page(addr);
                /*
                 * To do safe access to this _mapped_ area, we need lock. But
                 * adding lock here means that we need to add overhead of
                 * vmalloc()/vfree() calls for this _debug_ interface, rarely
                 * used. Instead of that, we'll use an local mapping via
                 * copy_page_to_iter_nofault() and accept a small overhead in
                 * this access function.
                 */
                if (page)
                        copied = copy_page_to_iter_nofault(page, offset,
                                                           length, iter);
                else
                        copied = zero_iter(iter, length);

                addr += copied;
                remains -= copied;

                if (copied != length)
                        break;
        }

        return count - remains;
}

/*
 * Read from a vm_map_ram region of memory.
 *
 * Returns the number of copied bytes.
 */
static size_t vmap_ram_vread_iter(struct iov_iter *iter, const char *addr,
                                  size_t count, unsigned long flags)
{
        char *start;
        struct vmap_block *vb;
        struct xarray *xa;
        unsigned long offset;
        unsigned int rs, re;
        size_t remains, n;

        /*
         * If it's area created by vm_map_ram() interface directly, but
         * not further subdividing and delegating management to vmap_block,
         * handle it here.
         */
        if (!(flags & VMAP_BLOCK))
                return aligned_vread_iter(iter, addr, count);

        remains = count;

        /*
         * Area is split into regions and tracked with vmap_block, read out
         * each region and zero fill the hole between regions.
         */
        xa = addr_to_vb_xa((unsigned long) addr);
        vb = xa_load(xa, addr_to_vb_idx((unsigned long)addr));
        if (!vb)
                goto finished_zero;

        spin_lock(&vb->lock);
        if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
                spin_unlock(&vb->lock);
                goto finished_zero;
        }

        for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
                size_t copied;

                if (remains == 0)
                        goto finished;

                start = vmap_block_vaddr(vb->va->va_start, rs);

                if (addr < start) {
                        size_t to_zero = min_t(size_t, start - addr, remains);
                        size_t zeroed = zero_iter(iter, to_zero);

                        addr += zeroed;
                        remains -= zeroed;

                        if (remains == 0 || zeroed != to_zero)
                                goto finished;
                }

                /*it could start reading from the middle of used region*/
                offset = offset_in_page(addr);
                n = ((re - rs + 1) << PAGE_SHIFT) - offset;
                if (n > remains)
                        n = remains;

                copied = aligned_vread_iter(iter, start + offset, n);

                addr += copied;
                remains -= copied;

                if (copied != n)
                        goto finished;
        }

        spin_unlock(&vb->lock);

finished_zero:
        /* zero-fill the left dirty or free regions */
        return count - remains + zero_iter(iter, remains);
finished:
        /* We couldn't copy/zero everything */
        spin_unlock(&vb->lock);
        return count - remains;
}

/**
 * vread_iter() - read vmalloc area in a safe way to an iterator.
 * @iter:         the iterator to which data should be written.
 * @addr:         vm address.
 * @count:        number of bytes to be read.
 *
 * This function checks that addr is a valid vmalloc'ed area, and
 * copy data from that area to a given buffer. If the given memory range
 * of [addr...addr+count) includes some valid address, data is copied to
 * proper area of @buf. If there are memory holes, they'll be zero-filled.
 * IOREMAP area is treated as memory hole and no copy is done.
 *
 * If [addr...addr+count) doesn't includes any intersects with alive
 * vm_struct area, returns 0. @buf should be kernel's buffer.
 *
 * Note: In usual ops, vread() is never necessary because the caller
 * should know vmalloc() area is valid and can use memcpy().
 * This is for routines which have to access vmalloc area without
 * any information, as /proc/kcore.
 *
 * Return: number of bytes for which addr and buf should be increased
 * (same number as @count) or %0 if [addr...addr+count) doesn't
 * include any intersection with valid vmalloc area
 */
long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        struct vm_struct *vm;
        char *vaddr;
        size_t n, size, flags, remains;
        unsigned long next;

        addr = kasan_reset_tag(addr);

        /* Don't allow overflow */
        if ((unsigned long) addr + count < count)
                count = -(unsigned long) addr;

        remains = count;

        vn = find_vmap_area_exceed_addr_lock((unsigned long) addr, &va);
        if (!vn)
                goto finished_zero;

        /* no intersects with alive vmap_area */
        if ((unsigned long)addr + remains <= va->va_start)
                goto finished_zero;

        do {
                size_t copied;

                if (remains == 0)
                        goto finished;

                vm = va->vm;
                flags = va->flags & VMAP_FLAGS_MASK;
                /*
                 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need
                 * be set together with VMAP_RAM.
                 */
                WARN_ON(flags == VMAP_BLOCK);

                if (!vm && !flags)
                        goto next_va;

                if (vm && (vm->flags & VM_UNINITIALIZED))
                        goto next_va;

                /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
                smp_rmb();

                vaddr = (char *) va->va_start;
                size = vm ? get_vm_area_size(vm) : va_size(va);

                if (addr >= vaddr + size)
                        goto next_va;

                if (addr < vaddr) {
                        size_t to_zero = min_t(size_t, vaddr - addr, remains);
                        size_t zeroed = zero_iter(iter, to_zero);

                        addr += zeroed;
                        remains -= zeroed;

                        if (remains == 0 || zeroed != to_zero)
                                goto finished;
                }

                n = vaddr + size - addr;
                if (n > remains)
                        n = remains;

                if (flags & VMAP_RAM)
                        copied = vmap_ram_vread_iter(iter, addr, n, flags);
                else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE))))
                        copied = aligned_vread_iter(iter, addr, n);
                else /* IOREMAP | SPARSE area is treated as memory hole */
                        copied = zero_iter(iter, n);

                addr += copied;
                remains -= copied;

                if (copied != n)
                        goto finished;

        next_va:
                next = va->va_end;
                spin_unlock(&vn->busy.lock);
        } while ((vn = find_vmap_area_exceed_addr_lock(next, &va)));

finished_zero:
        if (vn)
                spin_unlock(&vn->busy.lock);

        /* zero-fill memory holes */
        return count - remains + zero_iter(iter, remains);
finished:
        /* Nothing remains, or We couldn't copy/zero everything. */
        if (vn)
                spin_unlock(&vn->busy.lock);

        return count - remains;
}

/**
 * remap_vmalloc_range_partial - map vmalloc pages to userspace
 * @vma:                vma to cover
 * @uaddr:                target user address to start at
 * @kaddr:                virtual address of vmalloc kernel memory
 * @pgoff:                offset from @kaddr to start at
 * @size:                size of map area
 *
 * Returns:        0 for success, -Exxx on failure
 *
 * This function checks that @kaddr is a valid vmalloc'ed area,
 * and that it is big enough to cover the range starting at
 * @uaddr in @vma. Will return failure if that criteria isn't
 * met.
 *
 * Similar to remap_pfn_range() (see mm/memory.c)
 */
int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
                                void *kaddr, unsigned long pgoff,
                                unsigned long size)
{
        struct vm_struct *area;
        unsigned long off;
        unsigned long end_index;

        if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
                return -EINVAL;

        size = PAGE_ALIGN(size);

        if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
                return -EINVAL;

        area = find_vm_area(kaddr);
        if (!area)
                return -EINVAL;

        if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
                return -EINVAL;

        if (check_add_overflow(size, off, &end_index) ||
            end_index > get_vm_area_size(area))
                return -EINVAL;
        kaddr += off;

        do {
                struct page *page = vmalloc_to_page(kaddr);
                int ret;

                ret = vm_insert_page(vma, uaddr, page);
                if (ret)
                        return ret;

                uaddr += PAGE_SIZE;
                kaddr += PAGE_SIZE;
                size -= PAGE_SIZE;
        } while (size > 0);

        vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);

        return 0;
}

/**
 * remap_vmalloc_range - map vmalloc pages to userspace
 * @vma:                vma to cover (map full range of vma)
 * @addr:                vmalloc memory
 * @pgoff:                number of pages into addr before first page to map
 *
 * Returns:        0 for success, -Exxx on failure
 *
 * This function checks that addr is a valid vmalloc'ed area, and
 * that it is big enough to cover the vma. Will return failure if
 * that criteria isn't met.
 *
 * Similar to remap_pfn_range() (see mm/memory.c)
 */
int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
                                                unsigned long pgoff)
{
        return remap_vmalloc_range_partial(vma, vma->vm_start,
                                           addr, pgoff,
                                           vma->vm_end - vma->vm_start);
}
EXPORT_SYMBOL(remap_vmalloc_range);

void free_vm_area(struct vm_struct *area)
{
        struct vm_struct *ret;
        ret = remove_vm_area(area->addr);
        BUG_ON(ret != area);
        kfree(area);
}
EXPORT_SYMBOL_GPL(free_vm_area);

#ifdef CONFIG_SMP
static struct vmap_area *node_to_va(struct rb_node *n)
{
        return rb_entry_safe(n, struct vmap_area, rb_node);
}

/**
 * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
 * @addr: target address
 *
 * Returns: vmap_area if it is found. If there is no such area
 *   the first highest(reverse order) vmap_area is returned
 *   i.e. va->va_start < addr && va->va_end < addr or NULL
 *   if there are no any areas before @addr.
 */
static struct vmap_area *
pvm_find_va_enclose_addr(unsigned long addr)
{
        struct vmap_area *va, *tmp;
        struct rb_node *n;

        n = free_vmap_area_root.rb_node;
        va = NULL;

        while (n) {
                tmp = rb_entry(n, struct vmap_area, rb_node);
                if (tmp->va_start <= addr) {
                        va = tmp;
                        if (tmp->va_end >= addr)
                                break;

                        n = n->rb_right;
                } else {
                        n = n->rb_left;
                }
        }

        return va;
}

/**
 * pvm_determine_end_from_reverse - find the highest aligned address
 * of free block below VMALLOC_END
 * @va:
 *   in - the VA we start the search(reverse order);
 *   out - the VA with the highest aligned end address.
 * @align: alignment for required highest address
 *
 * Returns: determined end address within vmap_area
 */
static unsigned long
pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
{
        unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
        unsigned long addr;

        if (likely(*va)) {
                list_for_each_entry_from_reverse((*va),
                                &free_vmap_area_list, list) {
                        addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
                        if ((*va)->va_start < addr)
                                return addr;
                }
        }

        return 0;
}

/**
 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
 * @offsets: array containing offset of each area
 * @sizes: array containing size of each area
 * @nr_vms: the number of areas to allocate
 * @align: alignment, all entries in @offsets and @sizes must be aligned to this
 *
 * Returns: kmalloc'd vm_struct pointer array pointing to allocated
 *            vm_structs on success, %NULL on failure
 *
 * Percpu allocator wants to use congruent vm areas so that it can
 * maintain the offsets among percpu areas.  This function allocates
 * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
 * be scattered pretty far, distance between two areas easily going up
 * to gigabytes.  To avoid interacting with regular vmallocs, these
 * areas are allocated from top.
 *
 * Despite its complicated look, this allocator is rather simple. It
 * does everything top-down and scans free blocks from the end looking
 * for matching base. While scanning, if any of the areas do not fit the
 * base address is pulled down to fit the area. Scanning is repeated till
 * all the areas fit and then all necessary data structures are inserted
 * and the result is returned.
 */
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
                                     const size_t *sizes, int nr_vms,
                                     size_t align)
{
        const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
        const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
        struct vmap_area **vas, *va;
        struct vm_struct **vms;
        int area, area2, last_area, term_area;
        unsigned long base, start, size, end, last_end, orig_start, orig_end;
        bool purged = false;

        /* verify parameters and allocate data structures */
        BUG_ON(offset_in_page(align) || !is_power_of_2(align));
        for (last_area = 0, area = 0; area < nr_vms; area++) {
                start = offsets[area];
                end = start + sizes[area];

                /* is everything aligned properly? */
                BUG_ON(!IS_ALIGNED(offsets[area], align));
                BUG_ON(!IS_ALIGNED(sizes[area], align));

                /* detect the area with the highest address */
                if (start > offsets[last_area])
                        last_area = area;

                for (area2 = area + 1; area2 < nr_vms; area2++) {
                        unsigned long start2 = offsets[area2];
                        unsigned long end2 = start2 + sizes[area2];

                        BUG_ON(start2 < end && start < end2);
                }
        }
        last_end = offsets[last_area] + sizes[last_area];

        if (vmalloc_end - vmalloc_start < last_end) {
                WARN_ON(true);
                return NULL;
        }

        vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
        vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
        if (!vas || !vms)
                goto err_free2;

        for (area = 0; area < nr_vms; area++) {
                vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
                vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
                if (!vas[area] || !vms[area])
                        goto err_free;
        }
retry:
        spin_lock(&free_vmap_area_lock);

        /* start scanning - we scan from the top, begin with the last area */
        area = term_area = last_area;
        start = offsets[area];
        end = start + sizes[area];

        va = pvm_find_va_enclose_addr(vmalloc_end);
        base = pvm_determine_end_from_reverse(&va, align) - end;

        while (true) {
                /*
                 * base might have underflowed, add last_end before
                 * comparing.
                 */
                if (base + last_end < vmalloc_start + last_end)
                        goto overflow;

                /*
                 * Fitting base has not been found.
                 */
                if (va == NULL)
                        goto overflow;

                /*
                 * If required width exceeds current VA block, move
                 * base downwards and then recheck.
                 */
                if (base + end > va->va_end) {
                        base = pvm_determine_end_from_reverse(&va, align) - end;
                        term_area = area;
                        continue;
                }

                /*
                 * If this VA does not fit, move base downwards and recheck.
                 */
                if (base + start < va->va_start) {
                        va = node_to_va(rb_prev(&va->rb_node));
                        base = pvm_determine_end_from_reverse(&va, align) - end;
                        term_area = area;
                        continue;
                }

                /*
                 * This area fits, move on to the previous one.  If
                 * the previous one is the terminal one, we're done.
                 */
                area = (area + nr_vms - 1) % nr_vms;
                if (area == term_area)
                        break;

                start = offsets[area];
                end = start + sizes[area];
                va = pvm_find_va_enclose_addr(base + end);
        }

        /* we've found a fitting base, insert all va's */
        for (area = 0; area < nr_vms; area++) {
                int ret;

                start = base + offsets[area];
                size = sizes[area];

                va = pvm_find_va_enclose_addr(start);
                if (WARN_ON_ONCE(va == NULL))
                        /* It is a BUG(), but trigger recovery instead. */
                        goto recovery;

                ret = va_clip(&free_vmap_area_root,
                        &free_vmap_area_list, va, start, size);
                if (WARN_ON_ONCE(unlikely(ret)))
                        /* It is a BUG(), but trigger recovery instead. */
                        goto recovery;

                /* Allocated area. */
                va = vas[area];
                va->va_start = start;
                va->va_end = start + size;
        }

        spin_unlock(&free_vmap_area_lock);

        /* populate the kasan shadow space */
        for (area = 0; area < nr_vms; area++) {
                if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
                        goto err_free_shadow;
        }

        /* insert all vm's */
        for (area = 0; area < nr_vms; area++) {
                struct vmap_node *vn = addr_to_node(vas[area]->va_start);

                spin_lock(&vn->busy.lock);
                insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head);
                setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
                                 pcpu_get_vm_areas);
                spin_unlock(&vn->busy.lock);
        }

        /*
         * Mark allocated areas as accessible. Do it now as a best-effort
         * approach, as they can be mapped outside of vmalloc code.
         * With hardware tag-based KASAN, marking is skipped for
         * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
         */
        for (area = 0; area < nr_vms; area++)
                vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
                                vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);

        kfree(vas);
        return vms;

recovery:
        /*
         * Remove previously allocated areas. There is no
         * need in removing these areas from the busy tree,
         * because they are inserted only on the final step
         * and when pcpu_get_vm_areas() is success.
         */
        while (area--) {
                orig_start = vas[area]->va_start;
                orig_end = vas[area]->va_end;
                va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
                                &free_vmap_area_list);
                if (va)
                        kasan_release_vmalloc(orig_start, orig_end,
                                va->va_start, va->va_end);
                vas[area] = NULL;
        }

overflow:
        spin_unlock(&free_vmap_area_lock);
        if (!purged) {
                reclaim_and_purge_vmap_areas();
                purged = true;

                /* Before "retry", check if we recover. */
                for (area = 0; area < nr_vms; area++) {
                        if (vas[area])
                                continue;

                        vas[area] = kmem_cache_zalloc(
                                vmap_area_cachep, GFP_KERNEL);
                        if (!vas[area])
                                goto err_free;
                }

                goto retry;
        }

err_free:
        for (area = 0; area < nr_vms; area++) {
                if (vas[area])
                        kmem_cache_free(vmap_area_cachep, vas[area]);

                kfree(vms[area]);
        }
err_free2:
        kfree(vas);
        kfree(vms);
        return NULL;

err_free_shadow:
        spin_lock(&free_vmap_area_lock);
        /*
         * We release all the vmalloc shadows, even the ones for regions that
         * hadn't been successfully added. This relies on kasan_release_vmalloc
         * being able to tolerate this case.
         */
        for (area = 0; area < nr_vms; area++) {
                orig_start = vas[area]->va_start;
                orig_end = vas[area]->va_end;
                va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
                                &free_vmap_area_list);
                if (va)
                        kasan_release_vmalloc(orig_start, orig_end,
                                va->va_start, va->va_end);
                vas[area] = NULL;
                kfree(vms[area]);
        }
        spin_unlock(&free_vmap_area_lock);
        kfree(vas);
        kfree(vms);
        return NULL;
}

/**
 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
 * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
 * @nr_vms: the number of allocated areas
 *
 * Free vm_structs and the array allocated by pcpu_get_vm_areas().
 */
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
{
        int i;

        for (i = 0; i < nr_vms; i++)
                free_vm_area(vms[i]);
        kfree(vms);
}
#endif        /* CONFIG_SMP */

#ifdef CONFIG_PRINTK
bool vmalloc_dump_obj(void *object)
{
        const void *caller;
        struct vm_struct *vm;
        struct vmap_area *va;
        struct vmap_node *vn;
        unsigned long addr;
        unsigned int nr_pages;

        addr = PAGE_ALIGN((unsigned long) object);
        vn = addr_to_node(addr);

        if (!spin_trylock(&vn->busy.lock))
                return false;

        va = __find_vmap_area(addr, &vn->busy.root);
        if (!va || !va->vm) {
                spin_unlock(&vn->busy.lock);
                return false;
        }

        vm = va->vm;
        addr = (unsigned long) vm->addr;
        caller = vm->caller;
        nr_pages = vm->nr_pages;
        spin_unlock(&vn->busy.lock);

        pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
                nr_pages, addr, caller);

        return true;
}
#endif

#ifdef CONFIG_PROC_FS
static void show_numa_info(struct seq_file *m, struct vm_struct *v)
{
        if (IS_ENABLED(CONFIG_NUMA)) {
                unsigned int nr, *counters = m->private;
                unsigned int step = 1U << vm_area_page_order(v);

                if (!counters)
                        return;

                if (v->flags & VM_UNINITIALIZED)
                        return;
                /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
                smp_rmb();

                memset(counters, 0, nr_node_ids * sizeof(unsigned int));

                for (nr = 0; nr < v->nr_pages; nr += step)
                        counters[page_to_nid(v->pages[nr])] += step;
                for_each_node_state(nr, N_HIGH_MEMORY)
                        if (counters[nr])
                                seq_printf(m, " N%u=%u", nr, counters[nr]);
        }
}

static void show_purge_info(struct seq_file *m)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        int i;

        for (i = 0; i < nr_vmap_nodes; i++) {
                vn = &vmap_nodes[i];

                spin_lock(&vn->lazy.lock);
                list_for_each_entry(va, &vn->lazy.head, list) {
                        seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
                                (void *)va->va_start, (void *)va->va_end,
                                va->va_end - va->va_start);
                }
                spin_unlock(&vn->lazy.lock);
        }
}

static int vmalloc_info_show(struct seq_file *m, void *p)
{
        struct vmap_node *vn;
        struct vmap_area *va;
        struct vm_struct *v;
        int i;

        for (i = 0; i < nr_vmap_nodes; i++) {
                vn = &vmap_nodes[i];

                spin_lock(&vn->busy.lock);
                list_for_each_entry(va, &vn->busy.head, list) {
                        if (!va->vm) {
                                if (va->flags & VMAP_RAM)
                                        seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
                                                (void *)va->va_start, (void *)va->va_end,
                                                va->va_end - va->va_start);

                                continue;
                        }

                        v = va->vm;

                        seq_printf(m, "0x%pK-0x%pK %7ld",
                                v->addr, v->addr + v->size, v->size);

                        if (v->caller)
                                seq_printf(m, " %pS", v->caller);

                        if (v->nr_pages)
                                seq_printf(m, " pages=%d", v->nr_pages);

                        if (v->phys_addr)
                                seq_printf(m, " phys=%pa", &v->phys_addr);

                        if (v->flags & VM_IOREMAP)
                                seq_puts(m, " ioremap");

                        if (v->flags & VM_SPARSE)
                                seq_puts(m, " sparse");

                        if (v->flags & VM_ALLOC)
                                seq_puts(m, " vmalloc");

                        if (v->flags & VM_MAP)
                                seq_puts(m, " vmap");

                        if (v->flags & VM_USERMAP)
                                seq_puts(m, " user");

                        if (v->flags & VM_DMA_COHERENT)
                                seq_puts(m, " dma-coherent");

                        if (is_vmalloc_addr(v->pages))
                                seq_puts(m, " vpages");

                        show_numa_info(m, v);
                        seq_putc(m, '\n');
                }
                spin_unlock(&vn->busy.lock);
        }

        /*
         * As a final step, dump "unpurged" areas.
         */
        show_purge_info(m);
        return 0;
}

static int __init proc_vmalloc_init(void)
{
        void *priv_data = NULL;

        if (IS_ENABLED(CONFIG_NUMA))
                priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);

        proc_create_single_data("vmallocinfo",
                0400, NULL, vmalloc_info_show, priv_data);

        return 0;
}
module_init(proc_vmalloc_init);

#endif

static void __init vmap_init_free_space(void)
{
        unsigned long vmap_start = 1;
        const unsigned long vmap_end = ULONG_MAX;
        struct vmap_area *free;
        struct vm_struct *busy;

        /*
         *     B     F     B     B     B     F
         * -|-----|.....|-----|-----|-----|.....|-
         *  |           The KVA space           |
         *  |<--------------------------------->|
         */
        for (busy = vmlist; busy; busy = busy->next) {
                if ((unsigned long) busy->addr - vmap_start > 0) {
                        free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
                        if (!WARN_ON_ONCE(!free)) {
                                free->va_start = vmap_start;
                                free->va_end = (unsigned long) busy->addr;

                                insert_vmap_area_augment(free, NULL,
                                        &free_vmap_area_root,
                                                &free_vmap_area_list);
                        }
                }

                vmap_start = (unsigned long) busy->addr + busy->size;
        }

        if (vmap_end - vmap_start > 0) {
                free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
                if (!WARN_ON_ONCE(!free)) {
                        free->va_start = vmap_start;
                        free->va_end = vmap_end;

                        insert_vmap_area_augment(free, NULL,
                                &free_vmap_area_root,
                                        &free_vmap_area_list);
                }
        }
}

static void vmap_init_nodes(void)
{
        struct vmap_node *vn;
        int i, n;

#if BITS_PER_LONG == 64
        /*
         * A high threshold of max nodes is fixed and bound to 128,
         * thus a scale factor is 1 for systems where number of cores
         * are less or equal to specified threshold.
         *
         * As for NUMA-aware notes. For bigger systems, for example
         * NUMA with multi-sockets, where we can end-up with thousands
         * of cores in total, a "sub-numa-clustering" should be added.
         *
         * In this case a NUMA domain is considered as a single entity
         * with dedicated sub-nodes in it which describe one group or
         * set of cores. Therefore a per-domain purging is supposed to
         * be added as well as a per-domain balancing.
         */
        n = clamp_t(unsigned int, num_possible_cpus(), 1, 128);

        if (n > 1) {
                vn = kmalloc_array(n, sizeof(*vn), GFP_NOWAIT | __GFP_NOWARN);
                if (vn) {
                        /* Node partition is 16 pages. */
                        vmap_zone_size = (1 << 4) * PAGE_SIZE;
                        nr_vmap_nodes = n;
                        vmap_nodes = vn;
                } else {
                        pr_err("Failed to allocate an array. Disable a node layer\n");
                }
        }
#endif

        for (n = 0; n < nr_vmap_nodes; n++) {
                vn = &vmap_nodes[n];
                vn->busy.root = RB_ROOT;
                INIT_LIST_HEAD(&vn->busy.head);
                spin_lock_init(&vn->busy.lock);

                vn->lazy.root = RB_ROOT;
                INIT_LIST_HEAD(&vn->lazy.head);
                spin_lock_init(&vn->lazy.lock);

                for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
                        INIT_LIST_HEAD(&vn->pool[i].head);
                        WRITE_ONCE(vn->pool[i].len, 0);
                }

                spin_lock_init(&vn->pool_lock);
        }
}

static unsigned long
vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
        unsigned long count;
        struct vmap_node *vn;
        int i, j;

        for (count = 0, i = 0; i < nr_vmap_nodes; i++) {
                vn = &vmap_nodes[i];

                for (j = 0; j < MAX_VA_SIZE_PAGES; j++)
                        count += READ_ONCE(vn->pool[j].len);
        }

        return count ? count : SHRINK_EMPTY;
}

static unsigned long
vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
        int i;

        for (i = 0; i < nr_vmap_nodes; i++)
                decay_va_pool_node(&vmap_nodes[i], true);

        return SHRINK_STOP;
}

void __init vmalloc_init(void)
{
        struct shrinker *vmap_node_shrinker;
        struct vmap_area *va;
        struct vmap_node *vn;
        struct vm_struct *tmp;
        int i;

        /*
         * Create the cache for vmap_area objects.
         */
        vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);

        for_each_possible_cpu(i) {
                struct vmap_block_queue *vbq;
                struct vfree_deferred *p;

                vbq = &per_cpu(vmap_block_queue, i);
                spin_lock_init(&vbq->lock);
                INIT_LIST_HEAD(&vbq->free);
                p = &per_cpu(vfree_deferred, i);
                init_llist_head(&p->list);
                INIT_WORK(&p->wq, delayed_vfree_work);
                xa_init(&vbq->vmap_blocks);
        }

        /*
         * Setup nodes before importing vmlist.
         */
        vmap_init_nodes();

        /* Import existing vmlist entries. */
        for (tmp = vmlist; tmp; tmp = tmp->next) {
                va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
                if (WARN_ON_ONCE(!va))
                        continue;

                va->va_start = (unsigned long)tmp->addr;
                va->va_end = va->va_start + tmp->size;
                va->vm = tmp;

                vn = addr_to_node(va->va_start);
                insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
        }

        /*
         * Now we can initialize a free vmap space.
         */
        vmap_init_free_space();
        vmap_initialized = true;

        vmap_node_shrinker = shrinker_alloc(0, "vmap-node");
        if (!vmap_node_shrinker) {
                pr_err("Failed to allocate vmap-node shrinker!\n");
                return;
        }

        vmap_node_shrinker->count_objects = vmap_node_shrink_count;
        vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
        shrinker_register(vmap_node_shrinker);
}



























































































































































































































































































































































































































































































































































































































































































































   74 
   64 

   74 



   74 
   73 



   64 




   64 





   64 


































































    4 










  230 









  231 




















  231 

  231 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
// SPDX-License-Identifier: GPL-2.0
/* sysfs entries for device PM */
#include <linux/device.h>
#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/export.h>
#include <linux/pm_qos.h>
#include <linux/pm_runtime.h>
#include <linux/pm_wakeup.h>
#include <linux/atomic.h>
#include <linux/jiffies.h>
#include "power.h"

/*
 *        control - Report/change current runtime PM setting of the device
 *
 *        Runtime power management of a device can be blocked with the help of
 *        this attribute.  All devices have one of the following two values for
 *        the power/control file:
 *
 *         + "auto\n" to allow the device to be power managed at run time;
 *         + "on\n" to prevent the device from being power managed at run time;
 *
 *        The default for all devices is "auto", which means that devices may be
 *        subject to automatic power management, depending on their drivers.
 *        Changing this attribute to "on" prevents the driver from power managing
 *        the device at run time.  Doing that while the device is suspended causes
 *        it to be woken up.
 *
 *        wakeup - Report/change current wakeup option for device
 *
 *        Some devices support "wakeup" events, which are hardware signals
 *        used to activate devices from suspended or low power states.  Such
 *        devices have one of three values for the sysfs power/wakeup file:
 *
 *         + "enabled\n" to issue the events;
 *         + "disabled\n" not to do so; or
 *         + "\n" for temporary or permanent inability to issue wakeup.
 *
 *        (For example, unconfigured USB devices can't issue wakeups.)
 *
 *        Familiar examples of devices that can issue wakeup events include
 *        keyboards and mice (both PS2 and USB styles), power buttons, modems,
 *        "Wake-On-LAN" Ethernet links, GPIO lines, and more.  Some events
 *        will wake the entire system from a suspend state; others may just
 *        wake up the device (if the system as a whole is already active).
 *        Some wakeup events use normal IRQ lines; other use special out
 *        of band signaling.
 *
 *        It is the responsibility of device drivers to enable (or disable)
 *        wakeup signaling as part of changing device power states, respecting
 *        the policy choices provided through the driver model.
 *
 *        Devices may not be able to generate wakeup events from all power
 *        states.  Also, the events may be ignored in some configurations;
 *        for example, they might need help from other devices that aren't
 *        active, or which may have wakeup disabled.  Some drivers rely on
 *        wakeup events internally (unless they are disabled), keeping
 *        their hardware in low power modes whenever they're unused.  This
 *        saves runtime power, without requiring system-wide sleep states.
 *
 *        async - Report/change current async suspend setting for the device
 *
 *        Asynchronous suspend and resume of the device during system-wide power
 *        state transitions can be enabled by writing "enabled" to this file.
 *        Analogously, if "disabled" is written to this file, the device will be
 *        suspended and resumed synchronously.
 *
 *        All devices have one of the following two values for power/async:
 *
 *         + "enabled\n" to permit the asynchronous suspend/resume of the device;
 *         + "disabled\n" to forbid it;
 *
 *        NOTE: It generally is unsafe to permit the asynchronous suspend/resume
 *        of a device unless it is certain that all of the PM dependencies of the
 *        device are known to the PM core.  However, for some devices this
 *        attribute is set to "enabled" by bus type code or device drivers and in
 *        that cases it should be safe to leave the default value.
 *
 *        autosuspend_delay_ms - Report/change a device's autosuspend_delay value
 *
 *        Some drivers don't want to carry out a runtime suspend as soon as a
 *        device becomes idle; they want it always to remain idle for some period
 *        of time before suspending it.  This period is the autosuspend_delay
 *        value (expressed in milliseconds) and it can be controlled by the user.
 *        If the value is negative then the device will never be runtime
 *        suspended.
 *
 *        NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay
 *        value are used only if the driver calls pm_runtime_use_autosuspend().
 *
 *        wakeup_count - Report the number of wakeup events related to the device
 */

const char power_group_name[] = "power";
EXPORT_SYMBOL_GPL(power_group_name);

static const char ctrl_auto[] = "auto";
static const char ctrl_on[] = "on";

static ssize_t control_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        return sysfs_emit(buf, "%s\n",
                          dev->power.runtime_auto ? ctrl_auto : ctrl_on);
}

static ssize_t control_store(struct device * dev, struct device_attribute *attr,
                             const char * buf, size_t n)
{
        device_lock(dev);
        if (sysfs_streq(buf, ctrl_auto))
                pm_runtime_allow(dev);
        else if (sysfs_streq(buf, ctrl_on))
                pm_runtime_forbid(dev);
        else
                n = -EINVAL;
        device_unlock(dev);
        return n;
}

static DEVICE_ATTR_RW(control);

static ssize_t runtime_active_time_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        u64 tmp = pm_runtime_active_time(dev);

        do_div(tmp, NSEC_PER_MSEC);

        return sysfs_emit(buf, "%llu\n", tmp);
}

static DEVICE_ATTR_RO(runtime_active_time);

static ssize_t runtime_suspended_time_show(struct device *dev,
                                           struct device_attribute *attr,
                                           char *buf)
{
        u64 tmp = pm_runtime_suspended_time(dev);

        do_div(tmp, NSEC_PER_MSEC);

        return sysfs_emit(buf, "%llu\n", tmp);
}

static DEVICE_ATTR_RO(runtime_suspended_time);

static ssize_t runtime_status_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        const char *output;

        if (dev->power.runtime_error) {
                output = "error";
        } else if (dev->power.disable_depth) {
                output = "unsupported";
        } else {
                switch (dev->power.runtime_status) {
                case RPM_SUSPENDED:
                        output = "suspended";
                        break;
                case RPM_SUSPENDING:
                        output = "suspending";
                        break;
                case RPM_RESUMING:
                        output = "resuming";
                        break;
                case RPM_ACTIVE:
                        output = "active";
                        break;
                default:
                        return -EIO;
                }
        }
        return sysfs_emit(buf, "%s\n", output);
}

static DEVICE_ATTR_RO(runtime_status);

static ssize_t autosuspend_delay_ms_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
{
        if (!dev->power.use_autosuspend)
                return -EIO;

        return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay);
}

static ssize_t autosuspend_delay_ms_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t n)
{
        long delay;

        if (!dev->power.use_autosuspend)
                return -EIO;

        if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay)
                return -EINVAL;

        device_lock(dev);
        pm_runtime_set_autosuspend_delay(dev, delay);
        device_unlock(dev);
        return n;
}

static DEVICE_ATTR_RW(autosuspend_delay_ms);

static ssize_t pm_qos_resume_latency_us_show(struct device *dev,
                                             struct device_attribute *attr,
                                             char *buf)
{
        s32 value = dev_pm_qos_requested_resume_latency(dev);

        if (value == 0)
                return sysfs_emit(buf, "n/a\n");
        if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
                value = 0;

        return sysfs_emit(buf, "%d\n", value);
}

static ssize_t pm_qos_resume_latency_us_store(struct device *dev,
                                              struct device_attribute *attr,
                                              const char *buf, size_t n)
{
        s32 value;
        int ret;

        if (!kstrtos32(buf, 0, &value)) {
                /*
                 * Prevent users from writing negative or "no constraint" values
                 * directly.
                 */
                if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
                        return -EINVAL;

                if (value == 0)
                        value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
        } else if (sysfs_streq(buf, "n/a")) {
                value = 0;
        } else {
                return -EINVAL;
        }

        ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req,
                                        value);
        return ret < 0 ? ret : n;
}

static DEVICE_ATTR_RW(pm_qos_resume_latency_us);

static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev,
                                                struct device_attribute *attr,
                                                char *buf)
{
        s32 value = dev_pm_qos_get_user_latency_tolerance(dev);

        if (value < 0)
                return sysfs_emit(buf, "%s\n", "auto");
        if (value == PM_QOS_LATENCY_ANY)
                return sysfs_emit(buf, "%s\n", "any");

        return sysfs_emit(buf, "%d\n", value);
}

static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev,
                                                 struct device_attribute *attr,
                                                 const char *buf, size_t n)
{
        s32 value;
        int ret;

        if (kstrtos32(buf, 0, &value) == 0) {
                /* Users can't write negative values directly */
                if (value < 0)
                        return -EINVAL;
        } else {
                if (sysfs_streq(buf, "auto"))
                        value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
                else if (sysfs_streq(buf, "any"))
                        value = PM_QOS_LATENCY_ANY;
                else
                        return -EINVAL;
        }
        ret = dev_pm_qos_update_user_latency_tolerance(dev, value);
        return ret < 0 ? ret : n;
}

static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us);

static ssize_t pm_qos_no_power_off_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
                                          & PM_QOS_FLAG_NO_POWER_OFF));
}

static ssize_t pm_qos_no_power_off_store(struct device *dev,
                                         struct device_attribute *attr,
                                         const char *buf, size_t n)
{
        int ret;

        if (kstrtoint(buf, 0, &ret))
                return -EINVAL;

        if (ret != 0 && ret != 1)
                return -EINVAL;

        ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret);
        return ret < 0 ? ret : n;
}

static DEVICE_ATTR_RW(pm_qos_no_power_off);

#ifdef CONFIG_PM_SLEEP
static const char _enabled[] = "enabled";
static const char _disabled[] = "disabled";

static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        return sysfs_emit(buf, "%s\n", device_can_wakeup(dev)
                          ? (device_may_wakeup(dev) ? _enabled : _disabled)
                          : "");
}

static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t n)
{
        if (!device_can_wakeup(dev))
                return -EINVAL;

        if (sysfs_streq(buf, _enabled))
                device_set_wakeup_enable(dev, 1);
        else if (sysfs_streq(buf, _disabled))
                device_set_wakeup_enable(dev, 0);
        else
                return -EINVAL;
        return n;
}

static DEVICE_ATTR_RW(wakeup);

static ssize_t wakeup_count_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
{
        unsigned long count;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                count = dev->power.wakeup->wakeup_count;
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lu\n", count);
}

static DEVICE_ATTR_RO(wakeup_count);

static ssize_t wakeup_active_count_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        unsigned long count;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                count = dev->power.wakeup->active_count;
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lu\n", count);
}

static DEVICE_ATTR_RO(wakeup_active_count);

static ssize_t wakeup_abort_count_show(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
{
        unsigned long count;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                count = dev->power.wakeup->wakeup_count;
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lu\n", count);
}

static DEVICE_ATTR_RO(wakeup_abort_count);

static ssize_t wakeup_expire_count_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        unsigned long count;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                count = dev->power.wakeup->expire_count;
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lu\n", count);
}

static DEVICE_ATTR_RO(wakeup_expire_count);

static ssize_t wakeup_active_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        unsigned int active;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                active = dev->power.wakeup->active;
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%u\n", active);
}

static DEVICE_ATTR_RO(wakeup_active);

static ssize_t wakeup_total_time_ms_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
{
        s64 msec;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                msec = ktime_to_ms(dev->power.wakeup->total_time);
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lld\n", msec);
}

static DEVICE_ATTR_RO(wakeup_total_time_ms);

static ssize_t wakeup_max_time_ms_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        s64 msec;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                msec = ktime_to_ms(dev->power.wakeup->max_time);
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lld\n", msec);
}

static DEVICE_ATTR_RO(wakeup_max_time_ms);

static ssize_t wakeup_last_time_ms_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        s64 msec;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                msec = ktime_to_ms(dev->power.wakeup->last_time);
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lld\n", msec);
}

static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid,
                                                kgid_t kgid)
{
        if (dev->power.wakeup && dev->power.wakeup->dev)
                return device_change_owner(dev->power.wakeup->dev, kuid, kgid);
        return 0;
}

static DEVICE_ATTR_RO(wakeup_last_time_ms);

#ifdef CONFIG_PM_AUTOSLEEP
static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev,
                                                 struct device_attribute *attr,
                                                 char *buf)
{
        s64 msec;
        bool enabled = false;

        spin_lock_irq(&dev->power.lock);
        if (dev->power.wakeup) {
                msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time);
                enabled = true;
        }
        spin_unlock_irq(&dev->power.lock);

        if (!enabled)
                return sysfs_emit(buf, "\n");
        return sysfs_emit(buf, "%lld\n", msec);
}

static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms);
#endif /* CONFIG_PM_AUTOSLEEP */
#else /* CONFIG_PM_SLEEP */
static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid,
                                                kgid_t kgid)
{
        return 0;
}
#endif

#ifdef CONFIG_PM_ADVANCED_DEBUG
static ssize_t runtime_usage_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count));
}
static DEVICE_ATTR_RO(runtime_usage);

static ssize_t runtime_active_kids_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        return sysfs_emit(buf, "%d\n", dev->power.ignore_children ?
                          0 : atomic_read(&dev->power.child_count));
}
static DEVICE_ATTR_RO(runtime_active_kids);

static ssize_t runtime_enabled_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        const char *output;

        if (dev->power.disable_depth && !dev->power.runtime_auto)
                output = "disabled & forbidden";
        else if (dev->power.disable_depth)
                output = "disabled";
        else if (!dev->power.runtime_auto)
                output = "forbidden";
        else
                output = "enabled";

        return sysfs_emit(buf, "%s\n", output);
}
static DEVICE_ATTR_RO(runtime_enabled);

#ifdef CONFIG_PM_SLEEP
static ssize_t async_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        return sysfs_emit(buf, "%s\n",
                          device_async_suspend_enabled(dev) ?
                          _enabled : _disabled);
}

static ssize_t async_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t n)
{
        if (sysfs_streq(buf, _enabled))
                device_enable_async_suspend(dev);
        else if (sysfs_streq(buf, _disabled))
                device_disable_async_suspend(dev);
        else
                return -EINVAL;
        return n;
}

static DEVICE_ATTR_RW(async);

#endif /* CONFIG_PM_SLEEP */
#endif /* CONFIG_PM_ADVANCED_DEBUG */

static struct attribute *power_attrs[] = {
#ifdef CONFIG_PM_ADVANCED_DEBUG
#ifdef CONFIG_PM_SLEEP
        &dev_attr_async.attr,
#endif
        &dev_attr_runtime_status.attr,
        &dev_attr_runtime_usage.attr,
        &dev_attr_runtime_active_kids.attr,
        &dev_attr_runtime_enabled.attr,
#endif /* CONFIG_PM_ADVANCED_DEBUG */
        NULL,
};
static const struct attribute_group pm_attr_group = {
        .name        = power_group_name,
        .attrs        = power_attrs,
};

static struct attribute *wakeup_attrs[] = {
#ifdef CONFIG_PM_SLEEP
        &dev_attr_wakeup.attr,
        &dev_attr_wakeup_count.attr,
        &dev_attr_wakeup_active_count.attr,
        &dev_attr_wakeup_abort_count.attr,
        &dev_attr_wakeup_expire_count.attr,
        &dev_attr_wakeup_active.attr,
        &dev_attr_wakeup_total_time_ms.attr,
        &dev_attr_wakeup_max_time_ms.attr,
        &dev_attr_wakeup_last_time_ms.attr,
#ifdef CONFIG_PM_AUTOSLEEP
        &dev_attr_wakeup_prevent_sleep_time_ms.attr,
#endif
#endif
        NULL,
};
static const struct attribute_group pm_wakeup_attr_group = {
        .name        = power_group_name,
        .attrs        = wakeup_attrs,
};

static struct attribute *runtime_attrs[] = {
#ifndef CONFIG_PM_ADVANCED_DEBUG
        &dev_attr_runtime_status.attr,
#endif
        &dev_attr_control.attr,
        &dev_attr_runtime_suspended_time.attr,
        &dev_attr_runtime_active_time.attr,
        &dev_attr_autosuspend_delay_ms.attr,
        NULL,
};
static const struct attribute_group pm_runtime_attr_group = {
        .name        = power_group_name,
        .attrs        = runtime_attrs,
};

static struct attribute *pm_qos_resume_latency_attrs[] = {
        &dev_attr_pm_qos_resume_latency_us.attr,
        NULL,
};
static const struct attribute_group pm_qos_resume_latency_attr_group = {
        .name        = power_group_name,
        .attrs        = pm_qos_resume_latency_attrs,
};

static struct attribute *pm_qos_latency_tolerance_attrs[] = {
        &dev_attr_pm_qos_latency_tolerance_us.attr,
        NULL,
};
static const struct attribute_group pm_qos_latency_tolerance_attr_group = {
        .name        = power_group_name,
        .attrs        = pm_qos_latency_tolerance_attrs,
};

static struct attribute *pm_qos_flags_attrs[] = {
        &dev_attr_pm_qos_no_power_off.attr,
        NULL,
};
static const struct attribute_group pm_qos_flags_attr_group = {
        .name        = power_group_name,
        .attrs        = pm_qos_flags_attrs,
};

int dpm_sysfs_add(struct device *dev)
{
        int rc;

        /* No need to create PM sysfs if explicitly disabled. */
        if (device_pm_not_required(dev))
                return 0;

        rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
        if (rc)
                return rc;

        if (!pm_runtime_has_no_callbacks(dev)) {
                rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
                if (rc)
                        goto err_out;
        }
        if (device_can_wakeup(dev)) {
                rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
                if (rc)
                        goto err_runtime;
        }
        if (dev->power.set_latency_tolerance) {
                rc = sysfs_merge_group(&dev->kobj,
                                       &pm_qos_latency_tolerance_attr_group);
                if (rc)
                        goto err_wakeup;
        }
        rc = pm_wakeup_source_sysfs_add(dev);
        if (rc)
                goto err_latency;
        return 0;

 err_latency:
        sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
 err_wakeup:
        sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
 err_runtime:
        sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
 err_out:
        sysfs_remove_group(&dev->kobj, &pm_attr_group);
        return rc;
}

int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
{
        int rc;

        if (device_pm_not_required(dev))
                return 0;

        rc = sysfs_group_change_owner(&dev->kobj, &pm_attr_group, kuid, kgid);
        if (rc)
                return rc;

        if (!pm_runtime_has_no_callbacks(dev)) {
                rc = sysfs_group_change_owner(
                        &dev->kobj, &pm_runtime_attr_group, kuid, kgid);
                if (rc)
                        return rc;
        }

        if (device_can_wakeup(dev)) {
                rc = sysfs_group_change_owner(&dev->kobj, &pm_wakeup_attr_group,
                                              kuid, kgid);
                if (rc)
                        return rc;

                rc = dpm_sysfs_wakeup_change_owner(dev, kuid, kgid);
                if (rc)
                        return rc;
        }

        if (dev->power.set_latency_tolerance) {
                rc = sysfs_group_change_owner(
                        &dev->kobj, &pm_qos_latency_tolerance_attr_group, kuid,
                        kgid);
                if (rc)
                        return rc;
        }
        return 0;
}

int wakeup_sysfs_add(struct device *dev)
{
        int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);

        if (!ret)
                kobject_uevent(&dev->kobj, KOBJ_CHANGE);

        return ret;
}

void wakeup_sysfs_remove(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
        kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}

int pm_qos_sysfs_add_resume_latency(struct device *dev)
{
        return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
}

void pm_qos_sysfs_remove_resume_latency(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group);
}

int pm_qos_sysfs_add_flags(struct device *dev)
{
        return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group);
}

void pm_qos_sysfs_remove_flags(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group);
}

int pm_qos_sysfs_add_latency_tolerance(struct device *dev)
{
        return sysfs_merge_group(&dev->kobj,
                                 &pm_qos_latency_tolerance_attr_group);
}

void pm_qos_sysfs_remove_latency_tolerance(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
}

void rpm_sysfs_remove(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
}

void dpm_sysfs_remove(struct device *dev)
{
        if (device_pm_not_required(dev))
                return;
        sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
        dev_pm_qos_constraints_destroy(dev);
        rpm_sysfs_remove(dev);
        sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
        sysfs_remove_group(&dev->kobj, &pm_attr_group);
}





















































































































































































































































































































































































































































































































































































    2 





    2 


































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Mirics MSi2500 driver
 * Mirics MSi3101 SDR Dongle driver
 *
 * Copyright (C) 2013 Antti Palosaari <crope@iki.fi>
 *
 * That driver is somehow based of pwc driver:
 *  (C) 1999-2004 Nemosoft Unv.
 *  (C) 2004-2006 Luc Saillard (luc@saillard.org)
 *  (C) 2011 Hans de Goede <hdegoede@redhat.com>
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <asm/div64.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>
#include <linux/usb.h>
#include <media/videobuf2-v4l2.h>
#include <media/videobuf2-vmalloc.h>
#include <linux/spi/spi.h>

static bool msi2500_emulated_fmt;
module_param_named(emulated_formats, msi2500_emulated_fmt, bool, 0644);
MODULE_PARM_DESC(emulated_formats, "enable emulated formats (disappears in future)");

/*
 *   iConfiguration          0
 *     bInterfaceNumber        0
 *     bAlternateSetting       1
 *     bNumEndpoints           1
 *       bEndpointAddress     0x81  EP 1 IN
 *       bmAttributes            1
 *         Transfer Type            Isochronous
 *       wMaxPacketSize     0x1400  3x 1024 bytes
 *       bInterval               1
 */
#define MAX_ISO_BUFS            (8)
#define ISO_FRAMES_PER_DESC     (8)
#define ISO_MAX_FRAME_SIZE      (3 * 1024)
#define ISO_BUFFER_SIZE         (ISO_FRAMES_PER_DESC * ISO_MAX_FRAME_SIZE)
#define MAX_ISOC_ERRORS         20

/*
 * TODO: These formats should be moved to V4L2 API. Formats are currently
 * disabled from formats[] table, not visible to userspace.
 */
 /* signed 12-bit */
#define MSI2500_PIX_FMT_SDR_S12         v4l2_fourcc('D', 'S', '1', '2')
/* Mirics MSi2500 format 384 */
#define MSI2500_PIX_FMT_SDR_MSI2500_384 v4l2_fourcc('M', '3', '8', '4')

static const struct v4l2_frequency_band bands[] = {
        {
                .tuner = 0,
                .type = V4L2_TUNER_ADC,
                .index = 0,
                .capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   =  1200000,
                .rangehigh  = 15000000,
        },
};

/* stream formats */
struct msi2500_format {
        u32        pixelformat;
        u32        buffersize;
};

/* format descriptions for capture and preview */
static struct msi2500_format formats[] = {
        {
                .pixelformat        = V4L2_SDR_FMT_CS8,
                .buffersize        = 3 * 1008,
#if 0
        }, {
                .pixelformat        = MSI2500_PIX_FMT_SDR_MSI2500_384,
        }, {
                .pixelformat        = MSI2500_PIX_FMT_SDR_S12,
#endif
        }, {
                .pixelformat        = V4L2_SDR_FMT_CS14LE,
                .buffersize        = 3 * 1008,
        }, {
                .pixelformat        = V4L2_SDR_FMT_CU8,
                .buffersize        = 3 * 1008,
        }, {
                .pixelformat        =  V4L2_SDR_FMT_CU16LE,
                .buffersize        = 3 * 1008,
        },
};

static const unsigned int NUM_FORMATS = ARRAY_SIZE(formats);

/* intermediate buffers with raw data from the USB device */
struct msi2500_frame_buf {
        /* common v4l buffer stuff -- must be first */
        struct vb2_v4l2_buffer vb;
        struct list_head list;
};

struct msi2500_dev {
        struct device *dev;
        struct video_device vdev;
        struct v4l2_device v4l2_dev;
        struct v4l2_subdev *v4l2_subdev;
        struct spi_controller *ctlr;

        /* videobuf2 queue and queued buffers list */
        struct vb2_queue vb_queue;
        struct list_head queued_bufs;
        spinlock_t queued_bufs_lock; /* Protects queued_bufs */

        /* Note if taking both locks v4l2_lock must always be locked first! */
        struct mutex v4l2_lock;      /* Protects everything else */
        struct mutex vb_queue_lock;  /* Protects vb_queue and capt_file */

        /* Pointer to our usb_device, will be NULL after unplug */
        struct usb_device *udev; /* Both mutexes most be hold when setting! */

        unsigned int f_adc;
        u32 pixelformat;
        u32 buffersize;
        unsigned int num_formats;

        unsigned int isoc_errors; /* number of contiguous ISOC errors */
        unsigned int vb_full; /* vb is full and packets dropped */

        struct urb *urbs[MAX_ISO_BUFS];

        /* Controls */
        struct v4l2_ctrl_handler hdl;

        u32 next_sample; /* for track lost packets */
        u32 sample; /* for sample rate calc */
        unsigned long jiffies_next;
};

/* Private functions */
static struct msi2500_frame_buf *msi2500_get_next_fill_buf(
                                                        struct msi2500_dev *dev)
{
        unsigned long flags;
        struct msi2500_frame_buf *buf = NULL;

        spin_lock_irqsave(&dev->queued_bufs_lock, flags);
        if (list_empty(&dev->queued_bufs))
                goto leave;

        buf = list_entry(dev->queued_bufs.next, struct msi2500_frame_buf, list);
        list_del(&buf->list);
leave:
        spin_unlock_irqrestore(&dev->queued_bufs_lock, flags);
        return buf;
}

/*
 * +===========================================================================
 * |   00-1023 | USB packet type '504'
 * +===========================================================================
 * |   00-  03 | sequence number of first sample in that USB packet
 * +---------------------------------------------------------------------------
 * |   04-  15 | garbage
 * +---------------------------------------------------------------------------
 * |   16-1023 | samples
 * +---------------------------------------------------------------------------
 * signed 8-bit sample
 * 504 * 2 = 1008 samples
 *
 *
 * +===========================================================================
 * |   00-1023 | USB packet type '384'
 * +===========================================================================
 * |   00-  03 | sequence number of first sample in that USB packet
 * +---------------------------------------------------------------------------
 * |   04-  15 | garbage
 * +---------------------------------------------------------------------------
 * |   16- 175 | samples
 * +---------------------------------------------------------------------------
 * |  176- 179 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * |  180- 339 | samples
 * +---------------------------------------------------------------------------
 * |  340- 343 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * |  344- 503 | samples
 * +---------------------------------------------------------------------------
 * |  504- 507 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * |  508- 667 | samples
 * +---------------------------------------------------------------------------
 * |  668- 671 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * |  672- 831 | samples
 * +---------------------------------------------------------------------------
 * |  832- 835 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * |  836- 995 | samples
 * +---------------------------------------------------------------------------
 * |  996- 999 | control bits for previous samples
 * +---------------------------------------------------------------------------
 * | 1000-1023 | garbage
 * +---------------------------------------------------------------------------
 *
 * Bytes 4 - 7 could have some meaning?
 *
 * Control bits for previous samples is 32-bit field, containing 16 x 2-bit
 * numbers. This results one 2-bit number for 8 samples. It is likely used for
 * bit shifting sample by given bits, increasing actual sampling resolution.
 * Number 2 (0b10) was never seen.
 *
 * 6 * 16 * 2 * 4 = 768 samples. 768 * 4 = 3072 bytes
 *
 *
 * +===========================================================================
 * |   00-1023 | USB packet type '336'
 * +===========================================================================
 * |   00-  03 | sequence number of first sample in that USB packet
 * +---------------------------------------------------------------------------
 * |   04-  15 | garbage
 * +---------------------------------------------------------------------------
 * |   16-1023 | samples
 * +---------------------------------------------------------------------------
 * signed 12-bit sample
 *
 *
 * +===========================================================================
 * |   00-1023 | USB packet type '252'
 * +===========================================================================
 * |   00-  03 | sequence number of first sample in that USB packet
 * +---------------------------------------------------------------------------
 * |   04-  15 | garbage
 * +---------------------------------------------------------------------------
 * |   16-1023 | samples
 * +---------------------------------------------------------------------------
 * signed 14-bit sample
 */

static int msi2500_convert_stream(struct msi2500_dev *dev, u8 *dst, u8 *src,
                                  unsigned int src_len)
{
        unsigned int i, j, transactions, dst_len = 0;
        u32 sample[3];

        /* There could be 1-3 1024 byte transactions per packet */
        transactions = src_len / 1024;

        for (i = 0; i < transactions; i++) {
                sample[i] = src[3] << 24 | src[2] << 16 | src[1] << 8 |
                                src[0] << 0;
                if (i == 0 && dev->next_sample != sample[0]) {
                        dev_dbg_ratelimited(dev->dev,
                                            "%d samples lost, %d %08x:%08x\n",
                                            sample[0] - dev->next_sample,
                                            src_len, dev->next_sample,
                                            sample[0]);
                }

                /*
                 * Dump all unknown 'garbage' data - maybe we will discover
                 * someday if there is something rational...
                 */
                dev_dbg_ratelimited(dev->dev, "%*ph\n", 12, &src[4]);

                src += 16; /* skip header */

                switch (dev->pixelformat) {
                case V4L2_SDR_FMT_CU8: /* 504 x IQ samples */
                {
                        s8 *s8src = (s8 *)src;
                        u8 *u8dst = (u8 *)dst;

                        for (j = 0; j < 1008; j++)
                                *u8dst++ = *s8src++ + 128;

                        src += 1008;
                        dst += 1008;
                        dst_len += 1008;
                        dev->next_sample = sample[i] + 504;
                        break;
                }
                case  V4L2_SDR_FMT_CU16LE: /* 252 x IQ samples */
                {
                        s16 *s16src = (s16 *)src;
                        u16 *u16dst = (u16 *)dst;
                        struct {signed int x:14; } se; /* sign extension */
                        unsigned int utmp;

                        for (j = 0; j < 1008; j += 2) {
                                /* sign extension from 14-bit to signed int */
                                se.x = *s16src++;
                                /* from signed int to unsigned int */
                                utmp = se.x + 8192;
                                /* from 14-bit to 16-bit */
                                *u16dst++ = utmp << 2 | utmp >> 12;
                        }

                        src += 1008;
                        dst += 1008;
                        dst_len += 1008;
                        dev->next_sample = sample[i] + 252;
                        break;
                }
                case MSI2500_PIX_FMT_SDR_MSI2500_384: /* 384 x IQ samples */
                        /* Dump unknown 'garbage' data */
                        dev_dbg_ratelimited(dev->dev, "%*ph\n", 24, &src[1000]);
                        memcpy(dst, src, 984);
                        src += 984 + 24;
                        dst += 984;
                        dst_len += 984;
                        dev->next_sample = sample[i] + 384;
                        break;
                case V4L2_SDR_FMT_CS8:         /* 504 x IQ samples */
                        memcpy(dst, src, 1008);
                        src += 1008;
                        dst += 1008;
                        dst_len += 1008;
                        dev->next_sample = sample[i] + 504;
                        break;
                case MSI2500_PIX_FMT_SDR_S12:  /* 336 x IQ samples */
                        memcpy(dst, src, 1008);
                        src += 1008;
                        dst += 1008;
                        dst_len += 1008;
                        dev->next_sample = sample[i] + 336;
                        break;
                case V4L2_SDR_FMT_CS14LE:      /* 252 x IQ samples */
                        memcpy(dst, src, 1008);
                        src += 1008;
                        dst += 1008;
                        dst_len += 1008;
                        dev->next_sample = sample[i] + 252;
                        break;
                default:
                        break;
                }
        }

        /* calculate sample rate and output it in 10 seconds intervals */
        if (unlikely(time_is_before_jiffies(dev->jiffies_next))) {
                #define MSECS 10000UL
                unsigned int msecs = jiffies_to_msecs(jiffies -
                                dev->jiffies_next + msecs_to_jiffies(MSECS));
                unsigned int samples = dev->next_sample - dev->sample;

                dev->jiffies_next = jiffies + msecs_to_jiffies(MSECS);
                dev->sample = dev->next_sample;
                dev_dbg(dev->dev, "size=%u samples=%u msecs=%u sample rate=%lu\n",
                        src_len, samples, msecs,
                        samples * 1000UL / msecs);
        }

        return dst_len;
}

/*
 * This gets called for the Isochronous pipe (stream). This is done in interrupt
 * time, so it has to be fast, not crash, and not stall. Neat.
 */
static void msi2500_isoc_handler(struct urb *urb)
{
        struct msi2500_dev *dev = (struct msi2500_dev *)urb->context;
        int i, flen, fstatus;
        unsigned char *iso_buf = NULL;
        struct msi2500_frame_buf *fbuf;

        if (unlikely(urb->status == -ENOENT ||
                     urb->status == -ECONNRESET ||
                     urb->status == -ESHUTDOWN)) {
                dev_dbg(dev->dev, "URB (%p) unlinked %ssynchronously\n",
                        urb, urb->status == -ENOENT ? "" : "a");
                return;
        }

        if (unlikely(urb->status != 0)) {
                dev_dbg(dev->dev, "called with status %d\n", urb->status);
                /* Give up after a number of contiguous errors */
                if (++dev->isoc_errors > MAX_ISOC_ERRORS)
                        dev_dbg(dev->dev, "Too many ISOC errors, bailing out\n");
                goto handler_end;
        } else {
                /* Reset ISOC error counter. We did get here, after all. */
                dev->isoc_errors = 0;
        }

        /* Compact data */
        for (i = 0; i < urb->number_of_packets; i++) {
                void *ptr;

                /* Check frame error */
                fstatus = urb->iso_frame_desc[i].status;
                if (unlikely(fstatus)) {
                        dev_dbg_ratelimited(dev->dev,
                                            "frame=%d/%d has error %d skipping\n",
                                            i, urb->number_of_packets, fstatus);
                        continue;
                }

                /* Check if that frame contains data */
                flen = urb->iso_frame_desc[i].actual_length;
                if (unlikely(flen == 0))
                        continue;

                iso_buf = urb->transfer_buffer + urb->iso_frame_desc[i].offset;

                /* Get free framebuffer */
                fbuf = msi2500_get_next_fill_buf(dev);
                if (unlikely(fbuf == NULL)) {
                        dev->vb_full++;
                        dev_dbg_ratelimited(dev->dev,
                                            "video buffer is full, %d packets dropped\n",
                                            dev->vb_full);
                        continue;
                }

                /* fill framebuffer */
                ptr = vb2_plane_vaddr(&fbuf->vb.vb2_buf, 0);
                flen = msi2500_convert_stream(dev, ptr, iso_buf, flen);
                vb2_set_plane_payload(&fbuf->vb.vb2_buf, 0, flen);
                vb2_buffer_done(&fbuf->vb.vb2_buf, VB2_BUF_STATE_DONE);
        }

handler_end:
        i = usb_submit_urb(urb, GFP_ATOMIC);
        if (unlikely(i != 0))
                dev_dbg(dev->dev, "Error (%d) re-submitting urb\n", i);
}

static void msi2500_iso_stop(struct msi2500_dev *dev)
{
        int i;

        dev_dbg(dev->dev, "\n");

        /* Unlinking ISOC buffers one by one */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                if (dev->urbs[i]) {
                        dev_dbg(dev->dev, "Unlinking URB %p\n", dev->urbs[i]);
                        usb_kill_urb(dev->urbs[i]);
                }
        }
}

static void msi2500_iso_free(struct msi2500_dev *dev)
{
        int i;

        dev_dbg(dev->dev, "\n");

        /* Freeing ISOC buffers one by one */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                if (dev->urbs[i]) {
                        dev_dbg(dev->dev, "Freeing URB\n");
                        if (dev->urbs[i]->transfer_buffer) {
                                usb_free_coherent(dev->udev,
                                        dev->urbs[i]->transfer_buffer_length,
                                        dev->urbs[i]->transfer_buffer,
                                        dev->urbs[i]->transfer_dma);
                        }
                        usb_free_urb(dev->urbs[i]);
                        dev->urbs[i] = NULL;
                }
        }
}

/* Both v4l2_lock and vb_queue_lock should be locked when calling this */
static void msi2500_isoc_cleanup(struct msi2500_dev *dev)
{
        dev_dbg(dev->dev, "\n");

        msi2500_iso_stop(dev);
        msi2500_iso_free(dev);
}

/* Both v4l2_lock and vb_queue_lock should be locked when calling this */
static int msi2500_isoc_init(struct msi2500_dev *dev)
{
        struct urb *urb;
        int i, j, ret;

        dev_dbg(dev->dev, "\n");

        dev->isoc_errors = 0;

        ret = usb_set_interface(dev->udev, 0, 1);
        if (ret)
                return ret;

        /* Allocate and init Isochronuous urbs */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL);
                if (urb == NULL) {
                        msi2500_isoc_cleanup(dev);
                        return -ENOMEM;
                }
                dev->urbs[i] = urb;
                dev_dbg(dev->dev, "Allocated URB at 0x%p\n", urb);

                urb->interval = 1;
                urb->dev = dev->udev;
                urb->pipe = usb_rcvisocpipe(dev->udev, 0x81);
                urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
                urb->transfer_buffer = usb_alloc_coherent(dev->udev,
                                ISO_BUFFER_SIZE,
                                GFP_KERNEL, &urb->transfer_dma);
                if (urb->transfer_buffer == NULL) {
                        dev_err(dev->dev,
                                "Failed to allocate urb buffer %d\n", i);
                        msi2500_isoc_cleanup(dev);
                        return -ENOMEM;
                }
                urb->transfer_buffer_length = ISO_BUFFER_SIZE;
                urb->complete = msi2500_isoc_handler;
                urb->context = dev;
                urb->start_frame = 0;
                urb->number_of_packets = ISO_FRAMES_PER_DESC;
                for (j = 0; j < ISO_FRAMES_PER_DESC; j++) {
                        urb->iso_frame_desc[j].offset = j * ISO_MAX_FRAME_SIZE;
                        urb->iso_frame_desc[j].length = ISO_MAX_FRAME_SIZE;
                }
        }

        /* link */
        for (i = 0; i < MAX_ISO_BUFS; i++) {
                ret = usb_submit_urb(dev->urbs[i], GFP_KERNEL);
                if (ret) {
                        dev_err(dev->dev,
                                "usb_submit_urb %d failed with error %d\n",
                                i, ret);
                        msi2500_isoc_cleanup(dev);
                        return ret;
                }
                dev_dbg(dev->dev, "URB 0x%p submitted.\n", dev->urbs[i]);
        }

        /* All is done... */
        return 0;
}

/* Must be called with vb_queue_lock hold */
static void msi2500_cleanup_queued_bufs(struct msi2500_dev *dev)
{
        unsigned long flags;

        dev_dbg(dev->dev, "\n");

        spin_lock_irqsave(&dev->queued_bufs_lock, flags);
        while (!list_empty(&dev->queued_bufs)) {
                struct msi2500_frame_buf *buf;

                buf = list_entry(dev->queued_bufs.next,
                                 struct msi2500_frame_buf, list);
                list_del(&buf->list);
                vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
        }
        spin_unlock_irqrestore(&dev->queued_bufs_lock, flags);
}

/* The user yanked out the cable... */
static void msi2500_disconnect(struct usb_interface *intf)
{
        struct v4l2_device *v = usb_get_intfdata(intf);
        struct msi2500_dev *dev =
                        container_of(v, struct msi2500_dev, v4l2_dev);

        dev_dbg(dev->dev, "\n");

        mutex_lock(&dev->vb_queue_lock);
        mutex_lock(&dev->v4l2_lock);
        /* No need to keep the urbs around after disconnection */
        dev->udev = NULL;
        v4l2_device_disconnect(&dev->v4l2_dev);
        video_unregister_device(&dev->vdev);
        spi_unregister_controller(dev->ctlr);
        mutex_unlock(&dev->v4l2_lock);
        mutex_unlock(&dev->vb_queue_lock);

        v4l2_device_put(&dev->v4l2_dev);
}

static int msi2500_querycap(struct file *file, void *fh,
                            struct v4l2_capability *cap)
{
        struct msi2500_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "\n");

        strscpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
        strscpy(cap->card, dev->vdev.name, sizeof(cap->card));
        usb_make_path(dev->udev, cap->bus_info, sizeof(cap->bus_info));
        return 0;
}

/* Videobuf2 operations */
static int msi2500_queue_setup(struct vb2_queue *vq,
                               unsigned int *nbuffers,
                               unsigned int *nplanes, unsigned int sizes[],
                               struct device *alloc_devs[])
{
        struct msi2500_dev *dev = vb2_get_drv_priv(vq);

        dev_dbg(dev->dev, "nbuffers=%d\n", *nbuffers);

        /* Absolute min and max number of buffers available for mmap() */
        *nbuffers = clamp_t(unsigned int, *nbuffers, 8, 32);
        *nplanes = 1;
        sizes[0] = PAGE_ALIGN(dev->buffersize);
        dev_dbg(dev->dev, "nbuffers=%d sizes[0]=%d\n", *nbuffers, sizes[0]);
        return 0;
}

static void msi2500_buf_queue(struct vb2_buffer *vb)
{
        struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
        struct msi2500_dev *dev = vb2_get_drv_priv(vb->vb2_queue);
        struct msi2500_frame_buf *buf = container_of(vbuf,
                                                     struct msi2500_frame_buf,
                                                     vb);
        unsigned long flags;

        /* Check the device has not disconnected between prep and queuing */
        if (unlikely(!dev->udev)) {
                vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
                return;
        }

        spin_lock_irqsave(&dev->queued_bufs_lock, flags);
        list_add_tail(&buf->list, &dev->queued_bufs);
        spin_unlock_irqrestore(&dev->queued_bufs_lock, flags);
}

#define CMD_WREG               0x41
#define CMD_START_STREAMING    0x43
#define CMD_STOP_STREAMING     0x45
#define CMD_READ_UNKNOWN       0x48

#define msi2500_dbg_usb_control_msg(_dev, _r, _t, _v, _i, _b, _l) { \
        char *_direction; \
        if (_t & USB_DIR_IN) \
                _direction = "<<<"; \
        else \
                _direction = ">>>"; \
        dev_dbg(_dev, "%02x %02x %02x %02x %02x %02x %02x %02x %s %*ph\n", \
                        _t, _r, _v & 0xff, _v >> 8, _i & 0xff, _i >> 8, \
                        _l & 0xff, _l >> 8, _direction, _l, _b); \
}

static int msi2500_ctrl_msg(struct msi2500_dev *dev, u8 cmd, u32 data)
{
        int ret;
        u8 request = cmd;
        u8 requesttype = USB_DIR_OUT | USB_TYPE_VENDOR;
        u16 value = (data >> 0) & 0xffff;
        u16 index = (data >> 16) & 0xffff;

        msi2500_dbg_usb_control_msg(dev->dev, request, requesttype,
                                    value, index, NULL, 0);
        ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), request,
                              requesttype, value, index, NULL, 0, 2000);
        if (ret)
                dev_err(dev->dev, "failed %d, cmd %02x, data %04x\n",
                        ret, cmd, data);

        return ret;
}

static int msi2500_set_usb_adc(struct msi2500_dev *dev)
{
        int ret;
        unsigned int f_vco, f_sr, div_n, k, k_cw, div_out;
        u32 reg3, reg4, reg7;
        struct v4l2_ctrl *bandwidth_auto;
        struct v4l2_ctrl *bandwidth;

        f_sr = dev->f_adc;

        /* set tuner, subdev, filters according to sampling rate */
        bandwidth_auto = v4l2_ctrl_find(&dev->hdl,
                        V4L2_CID_RF_TUNER_BANDWIDTH_AUTO);
        if (v4l2_ctrl_g_ctrl(bandwidth_auto)) {
                bandwidth = v4l2_ctrl_find(&dev->hdl,
                                V4L2_CID_RF_TUNER_BANDWIDTH);
                v4l2_ctrl_s_ctrl(bandwidth, dev->f_adc);
        }

        /* select stream format */
        switch (dev->pixelformat) {
        case V4L2_SDR_FMT_CU8:
                reg7 = 0x000c9407; /* 504 */
                break;
        case  V4L2_SDR_FMT_CU16LE:
                reg7 = 0x00009407; /* 252 */
                break;
        case V4L2_SDR_FMT_CS8:
                reg7 = 0x000c9407; /* 504 */
                break;
        case MSI2500_PIX_FMT_SDR_MSI2500_384:
                reg7 = 0x0000a507; /* 384 */
                break;
        case MSI2500_PIX_FMT_SDR_S12:
                reg7 = 0x00008507; /* 336 */
                break;
        case V4L2_SDR_FMT_CS14LE:
                reg7 = 0x00009407; /* 252 */
                break;
        default:
                reg7 = 0x000c9407; /* 504 */
                break;
        }

        /*
         * Fractional-N synthesizer
         *
         *           +----------------------------------------+
         *           v                                        |
         *  Fref   +----+     +-------+     +-----+         +------+     +---+
         * ------> | PD | --> |  VCO  | --> | /2  | ------> | /N.F | <-- | K |
         *         +----+     +-------+     +-----+         +------+     +---+
         *                      |
         *                      |
         *                      v
         *                    +-------+     +-----+  Fout
         *                    | /Rout | --> | /12 | ------>
         *                    +-------+     +-----+
         */
        /*
         * Synthesizer config is just a educated guess...
         *
         * [7:0]   0x03, register address
         * [8]     1, power control
         * [9]     ?, power control
         * [12:10] output divider
         * [13]    0 ?
         * [14]    0 ?
         * [15]    fractional MSB, bit 20
         * [16:19] N
         * [23:20] ?
         * [24:31] 0x01
         *
         * output divider
         * val   div
         *   0     - (invalid)
         *   1     4
         *   2     6
         *   3     8
         *   4    10
         *   5    12
         *   6    14
         *   7    16
         *
         * VCO 202000000 - 720000000++
         */

        #define F_REF 24000000
        #define DIV_PRE_N 2
        #define DIV_LO_OUT 12
        reg3 = 0x01000303;
        reg4 = 0x00000004;

        /* XXX: Filters? AGC? VCO band? */
        if (f_sr < 6000000)
                reg3 |= 0x1 << 20;
        else if (f_sr < 7000000)
                reg3 |= 0x5 << 20;
        else if (f_sr < 8500000)
                reg3 |= 0x9 << 20;
        else
                reg3 |= 0xd << 20;

        for (div_out = 4; div_out < 16; div_out += 2) {
                f_vco = f_sr * div_out * DIV_LO_OUT;
                dev_dbg(dev->dev, "div_out=%u f_vco=%u\n", div_out, f_vco);
                if (f_vco >= 202000000)
                        break;
        }

        /* Calculate PLL integer and fractional control word. */
        div_n = div_u64_rem(f_vco, DIV_PRE_N * F_REF, &k);
        k_cw = div_u64((u64) k * 0x200000, DIV_PRE_N * F_REF);

        reg3 |= div_n << 16;
        reg3 |= (div_out / 2 - 1) << 10;
        reg3 |= ((k_cw >> 20) & 0x000001) << 15; /* [20] */
        reg4 |= ((k_cw >>  0) & 0x0fffff) <<  8; /* [19:0] */

        dev_dbg(dev->dev,
                "f_sr=%u f_vco=%u div_n=%u k=%u div_out=%u reg3=%08x reg4=%08x\n",
                f_sr, f_vco, div_n, k, div_out, reg3, reg4);

        ret = msi2500_ctrl_msg(dev, CMD_WREG, 0x00608008);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, 0x00000c05);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, 0x00020000);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, 0x00480102);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, 0x00f38008);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, reg7);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, reg4);
        if (ret)
                goto err;

        ret = msi2500_ctrl_msg(dev, CMD_WREG, reg3);
err:
        return ret;
}

static int msi2500_start_streaming(struct vb2_queue *vq, unsigned int count)
{
        struct msi2500_dev *dev = vb2_get_drv_priv(vq);
        int ret;

        dev_dbg(dev->dev, "\n");

        if (!dev->udev)
                return -ENODEV;

        if (mutex_lock_interruptible(&dev->v4l2_lock))
                return -ERESTARTSYS;

        /* wake-up tuner */
        v4l2_subdev_call(dev->v4l2_subdev, core, s_power, 1);

        ret = msi2500_set_usb_adc(dev);

        ret = msi2500_isoc_init(dev);
        if (ret)
                msi2500_cleanup_queued_bufs(dev);

        ret = msi2500_ctrl_msg(dev, CMD_START_STREAMING, 0);

        mutex_unlock(&dev->v4l2_lock);

        return ret;
}

static void msi2500_stop_streaming(struct vb2_queue *vq)
{
        struct msi2500_dev *dev = vb2_get_drv_priv(vq);

        dev_dbg(dev->dev, "\n");

        mutex_lock(&dev->v4l2_lock);

        if (dev->udev)
                msi2500_isoc_cleanup(dev);

        msi2500_cleanup_queued_bufs(dev);

        /* according to tests, at least 700us delay is required  */
        msleep(20);
        if (dev->udev && !msi2500_ctrl_msg(dev, CMD_STOP_STREAMING, 0)) {
                /* sleep USB IF / ADC */
                msi2500_ctrl_msg(dev, CMD_WREG, 0x01000003);
        }

        /* sleep tuner */
        v4l2_subdev_call(dev->v4l2_subdev, core, s_power, 0);

        mutex_unlock(&dev->v4l2_lock);
}

static const struct vb2_ops msi2500_vb2_ops = {
        .queue_setup            = msi2500_queue_setup,
        .buf_queue              = msi2500_buf_queue,
        .start_streaming        = msi2500_start_streaming,
        .stop_streaming         = msi2500_stop_streaming,
        .wait_prepare           = vb2_ops_wait_prepare,
        .wait_finish            = vb2_ops_wait_finish,
};

static int msi2500_enum_fmt_sdr_cap(struct file *file, void *priv,
                                    struct v4l2_fmtdesc *f)
{
        struct msi2500_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "index=%d\n", f->index);

        if (f->index >= dev->num_formats)
                return -EINVAL;

        f->pixelformat = formats[f->index].pixelformat;

        return 0;
}

static int msi2500_g_fmt_sdr_cap(struct file *file, void *priv,
                                 struct v4l2_format *f)
{
        struct msi2500_dev *dev = video_drvdata(file);

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                (char *)&dev->pixelformat);

        f->fmt.sdr.pixelformat = dev->pixelformat;
        f->fmt.sdr.buffersize = dev->buffersize;

        return 0;
}

static int msi2500_s_fmt_sdr_cap(struct file *file, void *priv,
                                 struct v4l2_format *f)
{
        struct msi2500_dev *dev = video_drvdata(file);
        struct vb2_queue *q = &dev->vb_queue;
        int i;

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                (char *)&f->fmt.sdr.pixelformat);

        if (vb2_is_busy(q))
                return -EBUSY;

        for (i = 0; i < dev->num_formats; i++) {
                if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
                        dev->pixelformat = formats[i].pixelformat;
                        dev->buffersize = formats[i].buffersize;
                        f->fmt.sdr.buffersize = formats[i].buffersize;
                        return 0;
                }
        }

        dev->pixelformat = formats[0].pixelformat;
        dev->buffersize = formats[0].buffersize;
        f->fmt.sdr.pixelformat = formats[0].pixelformat;
        f->fmt.sdr.buffersize = formats[0].buffersize;

        return 0;
}

static int msi2500_try_fmt_sdr_cap(struct file *file, void *priv,
                                   struct v4l2_format *f)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int i;

        dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
                (char *)&f->fmt.sdr.pixelformat);

        for (i = 0; i < dev->num_formats; i++) {
                if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
                        f->fmt.sdr.buffersize = formats[i].buffersize;
                        return 0;
                }
        }

        f->fmt.sdr.pixelformat = formats[0].pixelformat;
        f->fmt.sdr.buffersize = formats[0].buffersize;

        return 0;
}

static int msi2500_s_tuner(struct file *file, void *priv,
                           const struct v4l2_tuner *v)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "index=%d\n", v->index);

        if (v->index == 0)
                ret = 0;
        else if (v->index == 1)
                ret = v4l2_subdev_call(dev->v4l2_subdev, tuner, s_tuner, v);
        else
                ret = -EINVAL;

        return ret;
}

static int msi2500_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "index=%d\n", v->index);

        if (v->index == 0) {
                strscpy(v->name, "Mirics MSi2500", sizeof(v->name));
                v->type = V4L2_TUNER_ADC;
                v->capability = V4L2_TUNER_CAP_1HZ | V4L2_TUNER_CAP_FREQ_BANDS;
                v->rangelow =   1200000;
                v->rangehigh = 15000000;
                ret = 0;
        } else if (v->index == 1) {
                ret = v4l2_subdev_call(dev->v4l2_subdev, tuner, g_tuner, v);
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static int msi2500_g_frequency(struct file *file, void *priv,
                               struct v4l2_frequency *f)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int ret  = 0;

        dev_dbg(dev->dev, "tuner=%d type=%d\n", f->tuner, f->type);

        if (f->tuner == 0) {
                f->frequency = dev->f_adc;
                ret = 0;
        } else if (f->tuner == 1) {
                f->type = V4L2_TUNER_RF;
                ret = v4l2_subdev_call(dev->v4l2_subdev, tuner, g_frequency, f);
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static int msi2500_s_frequency(struct file *file, void *priv,
                               const struct v4l2_frequency *f)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "tuner=%d type=%d frequency=%u\n",
                f->tuner, f->type, f->frequency);

        if (f->tuner == 0) {
                dev->f_adc = clamp_t(unsigned int, f->frequency,
                                     bands[0].rangelow,
                                     bands[0].rangehigh);
                dev_dbg(dev->dev, "ADC frequency=%u Hz\n", dev->f_adc);
                ret = msi2500_set_usb_adc(dev);
        } else if (f->tuner == 1) {
                ret = v4l2_subdev_call(dev->v4l2_subdev, tuner, s_frequency, f);
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static int msi2500_enum_freq_bands(struct file *file, void *priv,
                                   struct v4l2_frequency_band *band)
{
        struct msi2500_dev *dev = video_drvdata(file);
        int ret;

        dev_dbg(dev->dev, "tuner=%d type=%d index=%d\n",
                band->tuner, band->type, band->index);

        if (band->tuner == 0) {
                if (band->index >= ARRAY_SIZE(bands)) {
                        ret = -EINVAL;
                } else {
                        *band = bands[band->index];
                        ret = 0;
                }
        } else if (band->tuner == 1) {
                ret = v4l2_subdev_call(dev->v4l2_subdev, tuner,
                                       enum_freq_bands, band);
        } else {
                ret = -EINVAL;
        }

        return ret;
}

static const struct v4l2_ioctl_ops msi2500_ioctl_ops = {
        .vidioc_querycap          = msi2500_querycap,

        .vidioc_enum_fmt_sdr_cap  = msi2500_enum_fmt_sdr_cap,
        .vidioc_g_fmt_sdr_cap     = msi2500_g_fmt_sdr_cap,
        .vidioc_s_fmt_sdr_cap     = msi2500_s_fmt_sdr_cap,
        .vidioc_try_fmt_sdr_cap   = msi2500_try_fmt_sdr_cap,

        .vidioc_reqbufs           = vb2_ioctl_reqbufs,
        .vidioc_create_bufs       = vb2_ioctl_create_bufs,
        .vidioc_prepare_buf       = vb2_ioctl_prepare_buf,
        .vidioc_querybuf          = vb2_ioctl_querybuf,
        .vidioc_qbuf              = vb2_ioctl_qbuf,
        .vidioc_dqbuf             = vb2_ioctl_dqbuf,

        .vidioc_streamon          = vb2_ioctl_streamon,
        .vidioc_streamoff         = vb2_ioctl_streamoff,

        .vidioc_g_tuner           = msi2500_g_tuner,
        .vidioc_s_tuner           = msi2500_s_tuner,

        .vidioc_g_frequency       = msi2500_g_frequency,
        .vidioc_s_frequency       = msi2500_s_frequency,
        .vidioc_enum_freq_bands   = msi2500_enum_freq_bands,

        .vidioc_subscribe_event   = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
        .vidioc_log_status        = v4l2_ctrl_log_status,
};

static const struct v4l2_file_operations msi2500_fops = {
        .owner                    = THIS_MODULE,
        .open                     = v4l2_fh_open,
        .release                  = vb2_fop_release,
        .read                     = vb2_fop_read,
        .poll                     = vb2_fop_poll,
        .mmap                     = vb2_fop_mmap,
        .unlocked_ioctl           = video_ioctl2,
};

static const struct video_device msi2500_template = {
        .name                     = "Mirics MSi3101 SDR Dongle",
        .release                  = video_device_release_empty,
        .fops                     = &msi2500_fops,
        .ioctl_ops                = &msi2500_ioctl_ops,
};

static void msi2500_video_release(struct v4l2_device *v)
{
        struct msi2500_dev *dev = container_of(v, struct msi2500_dev, v4l2_dev);

        v4l2_ctrl_handler_free(&dev->hdl);
        v4l2_device_unregister(&dev->v4l2_dev);
        kfree(dev);
}

static int msi2500_transfer_one_message(struct spi_controller *ctlr,
                                        struct spi_message *m)
{
        struct msi2500_dev *dev = spi_controller_get_devdata(ctlr);
        struct spi_transfer *t;
        int ret = 0;
        u32 data;

        list_for_each_entry(t, &m->transfers, transfer_list) {
                dev_dbg(dev->dev, "msg=%*ph\n", t->len, t->tx_buf);
                data = 0x09; /* reg 9 is SPI adapter */
                data |= ((u8 *)t->tx_buf)[0] << 8;
                data |= ((u8 *)t->tx_buf)[1] << 16;
                data |= ((u8 *)t->tx_buf)[2] << 24;
                ret = msi2500_ctrl_msg(dev, CMD_WREG, data);
        }

        m->status = ret;
        spi_finalize_current_message(ctlr);
        return ret;
}

static int msi2500_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
{
        struct msi2500_dev *dev;
        struct v4l2_subdev *sd;
        struct spi_controller *ctlr;
        int ret;
        static struct spi_board_info board_info = {
                .modalias                = "msi001",
                .bus_num                = 0,
                .chip_select                = 0,
                .max_speed_hz                = 12000000,
        };

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev) {
                ret = -ENOMEM;
                goto err;
        }

        mutex_init(&dev->v4l2_lock);
        mutex_init(&dev->vb_queue_lock);
        spin_lock_init(&dev->queued_bufs_lock);
        INIT_LIST_HEAD(&dev->queued_bufs);
        dev->dev = &intf->dev;
        dev->udev = interface_to_usbdev(intf);
        dev->f_adc = bands[0].rangelow;
        dev->pixelformat = formats[0].pixelformat;
        dev->buffersize = formats[0].buffersize;
        dev->num_formats = NUM_FORMATS;
        if (!msi2500_emulated_fmt)
                dev->num_formats -= 2;

        /* Init videobuf2 queue structure */
        dev->vb_queue.type = V4L2_BUF_TYPE_SDR_CAPTURE;
        dev->vb_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_READ;
        dev->vb_queue.drv_priv = dev;
        dev->vb_queue.buf_struct_size = sizeof(struct msi2500_frame_buf);
        dev->vb_queue.ops = &msi2500_vb2_ops;
        dev->vb_queue.mem_ops = &vb2_vmalloc_memops;
        dev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        ret = vb2_queue_init(&dev->vb_queue);
        if (ret) {
                dev_err(dev->dev, "Could not initialize vb2 queue\n");
                goto err_free_mem;
        }

        /* Init video_device structure */
        dev->vdev = msi2500_template;
        dev->vdev.queue = &dev->vb_queue;
        dev->vdev.queue->lock = &dev->vb_queue_lock;
        video_set_drvdata(&dev->vdev, dev);

        /* Register the v4l2_device structure */
        dev->v4l2_dev.release = msi2500_video_release;
        ret = v4l2_device_register(&intf->dev, &dev->v4l2_dev);
        if (ret) {
                dev_err(dev->dev, "Failed to register v4l2-device (%d)\n", ret);
                goto err_free_mem;
        }

        /* SPI master adapter */
        ctlr = spi_alloc_master(dev->dev, 0);
        if (ctlr == NULL) {
                ret = -ENOMEM;
                goto err_unregister_v4l2_dev;
        }

        dev->ctlr = ctlr;
        ctlr->bus_num = -1;
        ctlr->num_chipselect = 1;
        ctlr->transfer_one_message = msi2500_transfer_one_message;
        spi_controller_set_devdata(ctlr, dev);
        ret = spi_register_controller(ctlr);
        if (ret) {
                spi_controller_put(ctlr);
                goto err_unregister_v4l2_dev;
        }

        /* load v4l2 subdevice */
        sd = v4l2_spi_new_subdev(&dev->v4l2_dev, ctlr, &board_info);
        dev->v4l2_subdev = sd;
        if (sd == NULL) {
                dev_err(dev->dev, "cannot get v4l2 subdevice\n");
                ret = -ENODEV;
                goto err_unregister_controller;
        }

        /* Register controls */
        v4l2_ctrl_handler_init(&dev->hdl, 0);
        if (dev->hdl.error) {
                ret = dev->hdl.error;
                dev_err(dev->dev, "Could not initialize controls\n");
                goto err_free_controls;
        }

        /* currently all controls are from subdev */
        v4l2_ctrl_add_handler(&dev->hdl, sd->ctrl_handler, NULL, true);

        dev->v4l2_dev.ctrl_handler = &dev->hdl;
        dev->vdev.v4l2_dev = &dev->v4l2_dev;
        dev->vdev.lock = &dev->v4l2_lock;
        dev->vdev.device_caps = V4L2_CAP_SDR_CAPTURE | V4L2_CAP_STREAMING |
                                V4L2_CAP_READWRITE | V4L2_CAP_TUNER;

        ret = video_register_device(&dev->vdev, VFL_TYPE_SDR, -1);
        if (ret) {
                dev_err(dev->dev,
                        "Failed to register as video device (%d)\n", ret);
                goto err_unregister_v4l2_dev;
        }
        dev_info(dev->dev, "Registered as %s\n",
                 video_device_node_name(&dev->vdev));
        dev_notice(dev->dev,
                   "SDR API is still slightly experimental and functionality changes may follow\n");
        return 0;
err_free_controls:
        v4l2_ctrl_handler_free(&dev->hdl);
err_unregister_controller:
        spi_unregister_controller(dev->ctlr);
err_unregister_v4l2_dev:
        v4l2_device_unregister(&dev->v4l2_dev);
err_free_mem:
        kfree(dev);
err:
        return ret;
}

/* USB device ID list */
static const struct usb_device_id msi2500_id_table[] = {
        {USB_DEVICE(0x1df7, 0x2500)}, /* Mirics MSi3101 SDR Dongle */
        {USB_DEVICE(0x2040, 0xd300)}, /* Hauppauge WinTV 133559 LF */
        {}
};
MODULE_DEVICE_TABLE(usb, msi2500_id_table);

/* USB subsystem interface */
static struct usb_driver msi2500_driver = {
        .name                     = KBUILD_MODNAME,
        .probe                    = msi2500_probe,
        .disconnect               = msi2500_disconnect,
        .id_table                 = msi2500_id_table,
};

module_usb_driver(msi2500_driver);

MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
MODULE_DESCRIPTION("Mirics MSi3101 SDR Dongle");
MODULE_LICENSE("GPL");

































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/**
 * css_get - obtain a reference on the specified css
 * @css: target css
 *
 * The caller must already have a reference.
 */
CGROUP_REF_FN_ATTRS
void css_get(struct cgroup_subsys_state *css)
{
        if (!(css->flags & CSS_NO_REF))
                percpu_ref_get(&css->refcnt);
}
CGROUP_REF_EXPORT(css_get)

/**
 * css_get_many - obtain references on the specified css
 * @css: target css
 * @n: number of references to get
 *
 * The caller must already have a reference.
 */
CGROUP_REF_FN_ATTRS
void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
{
        if (!(css->flags & CSS_NO_REF))
                percpu_ref_get_many(&css->refcnt, n);
}
CGROUP_REF_EXPORT(css_get_many)

/**
 * css_tryget - try to obtain a reference on the specified css
 * @css: target css
 *
 * Obtain a reference on @css unless it already has reached zero and is
 * being released.  This function doesn't care whether @css is on or
 * offline.  The caller naturally needs to ensure that @css is accessible
 * but doesn't have to be holding a reference on it - IOW, RCU protected
 * access is good enough for this function.  Returns %true if a reference
 * count was successfully obtained; %false otherwise.
 */
CGROUP_REF_FN_ATTRS
bool css_tryget(struct cgroup_subsys_state *css)
{
        if (!(css->flags & CSS_NO_REF))
                return percpu_ref_tryget(&css->refcnt);
        return true;
}
CGROUP_REF_EXPORT(css_tryget)

/**
 * css_tryget_online - try to obtain a reference on the specified css if online
 * @css: target css
 *
 * Obtain a reference on @css if it's online.  The caller naturally needs
 * to ensure that @css is accessible but doesn't have to be holding a
 * reference on it - IOW, RCU protected access is good enough for this
 * function.  Returns %true if a reference count was successfully obtained;
 * %false otherwise.
 */
CGROUP_REF_FN_ATTRS
bool css_tryget_online(struct cgroup_subsys_state *css)
{
        if (!(css->flags & CSS_NO_REF))
                return percpu_ref_tryget_live(&css->refcnt);
        return true;
}
CGROUP_REF_EXPORT(css_tryget_online)

/**
 * css_put - put a css reference
 * @css: target css
 *
 * Put a reference obtained via css_get() and css_tryget_online().
 */
CGROUP_REF_FN_ATTRS
void css_put(struct cgroup_subsys_state *css)
{
        if (!(css->flags & CSS_NO_REF))
                percpu_ref_put(&css->refcnt);
}
CGROUP_REF_EXPORT(css_put)

/**
 * css_put_many - put css references
 * @css: target css
 * @n: number of references to put
 *
 * Put references obtained via css_get() and css_tryget_online().
 */
CGROUP_REF_FN_ATTRS
void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
{
        if (!(css->flags & CSS_NO_REF))
                percpu_ref_put_many(&css->refcnt, n);
}
CGROUP_REF_EXPORT(css_put_many)











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __NET_CFG80211_H
#define __NET_CFG80211_H
/*
 * 802.11 device and configuration interface
 *
 * Copyright 2006-2010        Johannes Berg <johannes@sipsolutions.net>
 * Copyright 2013-2014 Intel Mobile Communications GmbH
 * Copyright 2015-2017        Intel Deutschland GmbH
 * Copyright (C) 2018-2024 Intel Corporation
 */

#include <linux/ethtool.h>
#include <uapi/linux/rfkill.h>
#include <linux/netdevice.h>
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/bug.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/nl80211.h>
#include <linux/if_ether.h>
#include <linux/ieee80211.h>
#include <linux/net.h>
#include <linux/rfkill.h>
#include <net/regulatory.h>

/**
 * DOC: Introduction
 *
 * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges
 * userspace and drivers, and offers some utility functionality associated
 * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used
 * by all modern wireless drivers in Linux, so that they offer a consistent
 * API through nl80211. For backward compatibility, cfg80211 also offers
 * wireless extensions to userspace, but hides them from drivers completely.
 *
 * Additionally, cfg80211 contains code to help enforce regulatory spectrum
 * use restrictions.
 */


/**
 * DOC: Device registration
 *
 * In order for a driver to use cfg80211, it must register the hardware device
 * with cfg80211. This happens through a number of hardware capability structs
 * described below.
 *
 * The fundamental structure for each device is the 'wiphy', of which each
 * instance describes a physical wireless device connected to the system. Each
 * such wiphy can have zero, one, or many virtual interfaces associated with
 * it, which need to be identified as such by pointing the network interface's
 * @ieee80211_ptr pointer to a &struct wireless_dev which further describes
 * the wireless part of the interface. Normally this struct is embedded in the
 * network interface's private data area. Drivers can optionally allow creating
 * or destroying virtual interfaces on the fly, but without at least one or the
 * ability to create some the wireless device isn't useful.
 *
 * Each wiphy structure contains device capability information, and also has
 * a pointer to the various operations the driver offers. The definitions and
 * structures here describe these capabilities in detail.
 */

struct wiphy;

/*
 * wireless hardware capability structures
 */

/**
 * enum ieee80211_channel_flags - channel flags
 *
 * Channel flags set by the regulatory control code.
 *
 * @IEEE80211_CHAN_DISABLED: This channel is disabled.
 * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes
 *        sending probe requests or beaconing.
 * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this
 *        channel.
 * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
 * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel
 *        is not permitted.
 * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel
 *        is not permitted.
 * @IEEE80211_CHAN_NO_OFDM: OFDM is not allowed on this channel.
 * @IEEE80211_CHAN_NO_80MHZ: If the driver supports 80 MHz on the band,
 *        this flag indicates that an 80 MHz channel cannot use this
 *        channel as the control or any of the secondary channels.
 *        This may be due to the driver or due to regulatory bandwidth
 *        restrictions.
 * @IEEE80211_CHAN_NO_160MHZ: If the driver supports 160 MHz on the band,
 *        this flag indicates that an 160 MHz channel cannot use this
 *        channel as the control or any of the secondary channels.
 *        This may be due to the driver or due to regulatory bandwidth
 *        restrictions.
 * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
 * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT
 * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted
 *        on this channel.
 * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
 *        on this channel.
 * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel.
 * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted
 *        on this channel.
 * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted
 *        on this channel.
 * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted
 *        on this channel.
 * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted
 *        on this channel.
 * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted
 *        on this channel.
 * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band,
 *        this flag indicates that a 320 MHz channel cannot use this
 *        channel as the control or any of the secondary channels.
 *        This may be due to the driver or due to regulatory bandwidth
 *        restrictions.
 * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel.
 * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT
 * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP
 *        not permitted using this channel
 * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP
 *        not permitted using this channel
 * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor
 *        mode even in the presence of other (regulatory) restrictions,
 *        even if it is otherwise disabled.
 */
enum ieee80211_channel_flags {
        IEEE80211_CHAN_DISABLED                = 1<<0,
        IEEE80211_CHAN_NO_IR                = 1<<1,
        IEEE80211_CHAN_PSD                = 1<<2,
        IEEE80211_CHAN_RADAR                = 1<<3,
        IEEE80211_CHAN_NO_HT40PLUS        = 1<<4,
        IEEE80211_CHAN_NO_HT40MINUS        = 1<<5,
        IEEE80211_CHAN_NO_OFDM                = 1<<6,
        IEEE80211_CHAN_NO_80MHZ                = 1<<7,
        IEEE80211_CHAN_NO_160MHZ        = 1<<8,
        IEEE80211_CHAN_INDOOR_ONLY        = 1<<9,
        IEEE80211_CHAN_IR_CONCURRENT        = 1<<10,
        IEEE80211_CHAN_NO_20MHZ                = 1<<11,
        IEEE80211_CHAN_NO_10MHZ                = 1<<12,
        IEEE80211_CHAN_NO_HE                = 1<<13,
        IEEE80211_CHAN_1MHZ                = 1<<14,
        IEEE80211_CHAN_2MHZ                = 1<<15,
        IEEE80211_CHAN_4MHZ                = 1<<16,
        IEEE80211_CHAN_8MHZ                = 1<<17,
        IEEE80211_CHAN_16MHZ                = 1<<18,
        IEEE80211_CHAN_NO_320MHZ        = 1<<19,
        IEEE80211_CHAN_NO_EHT                = 1<<20,
        IEEE80211_CHAN_DFS_CONCURRENT        = 1<<21,
        IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22,
        IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23,
        IEEE80211_CHAN_CAN_MONITOR        = 1<<24,
};

#define IEEE80211_CHAN_NO_HT40 \
        (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS)

#define IEEE80211_DFS_MIN_CAC_TIME_MS                60000
#define IEEE80211_DFS_MIN_NOP_TIME_MS                (30 * 60 * 1000)

/**
 * struct ieee80211_channel - channel definition
 *
 * This structure describes a single channel for use
 * with cfg80211.
 *
 * @center_freq: center frequency in MHz
 * @freq_offset: offset from @center_freq, in KHz
 * @hw_value: hardware-specific value for the channel
 * @flags: channel flags from &enum ieee80211_channel_flags.
 * @orig_flags: channel flags at registration time, used by regulatory
 *        code to support devices with additional restrictions
 * @band: band this channel belongs to.
 * @max_antenna_gain: maximum antenna gain in dBi
 * @max_power: maximum transmission power (in dBm)
 * @max_reg_power: maximum regulatory transmission power (in dBm)
 * @beacon_found: helper to regulatory code to indicate when a beacon
 *        has been found on this channel. Use regulatory_hint_found_beacon()
 *        to enable this, this is useful only on 5 GHz band.
 * @orig_mag: internal use
 * @orig_mpwr: internal use
 * @dfs_state: current state of this channel. Only relevant if radar is required
 *        on this channel.
 * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered.
 * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels.
 * @psd: power spectral density (in dBm)
 */
struct ieee80211_channel {
        enum nl80211_band band;
        u32 center_freq;
        u16 freq_offset;
        u16 hw_value;
        u32 flags;
        int max_antenna_gain;
        int max_power;
        int max_reg_power;
        bool beacon_found;
        u32 orig_flags;
        int orig_mag, orig_mpwr;
        enum nl80211_dfs_state dfs_state;
        unsigned long dfs_state_entered;
        unsigned int dfs_cac_ms;
        s8 psd;
};

/**
 * enum ieee80211_rate_flags - rate flags
 *
 * Hardware/specification flags for rates. These are structured
 * in a way that allows using the same bitrate structure for
 * different bands/PHY modes.
 *
 * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short
 *        preamble on this bitrate; only relevant in 2.4GHz band and
 *        with CCK rates.
 * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate
 *        when used with 802.11a (on the 5 GHz band); filled by the
 *        core code when registering the wiphy.
 * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate
 *        when used with 802.11b (on the 2.4 GHz band); filled by the
 *        core code when registering the wiphy.
 * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate
 *        when used with 802.11g (on the 2.4 GHz band); filled by the
 *        core code when registering the wiphy.
 * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode.
 * @IEEE80211_RATE_SUPPORTS_5MHZ: Rate can be used in 5 MHz mode
 * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode
 */
enum ieee80211_rate_flags {
        IEEE80211_RATE_SHORT_PREAMBLE        = 1<<0,
        IEEE80211_RATE_MANDATORY_A        = 1<<1,
        IEEE80211_RATE_MANDATORY_B        = 1<<2,
        IEEE80211_RATE_MANDATORY_G        = 1<<3,
        IEEE80211_RATE_ERP_G                = 1<<4,
        IEEE80211_RATE_SUPPORTS_5MHZ        = 1<<5,
        IEEE80211_RATE_SUPPORTS_10MHZ        = 1<<6,
};

/**
 * enum ieee80211_bss_type - BSS type filter
 *
 * @IEEE80211_BSS_TYPE_ESS: Infrastructure BSS
 * @IEEE80211_BSS_TYPE_PBSS: Personal BSS
 * @IEEE80211_BSS_TYPE_IBSS: Independent BSS
 * @IEEE80211_BSS_TYPE_MBSS: Mesh BSS
 * @IEEE80211_BSS_TYPE_ANY: Wildcard value for matching any BSS type
 */
enum ieee80211_bss_type {
        IEEE80211_BSS_TYPE_ESS,
        IEEE80211_BSS_TYPE_PBSS,
        IEEE80211_BSS_TYPE_IBSS,
        IEEE80211_BSS_TYPE_MBSS,
        IEEE80211_BSS_TYPE_ANY
};

/**
 * enum ieee80211_privacy - BSS privacy filter
 *
 * @IEEE80211_PRIVACY_ON: privacy bit set
 * @IEEE80211_PRIVACY_OFF: privacy bit clear
 * @IEEE80211_PRIVACY_ANY: Wildcard value for matching any privacy setting
 */
enum ieee80211_privacy {
        IEEE80211_PRIVACY_ON,
        IEEE80211_PRIVACY_OFF,
        IEEE80211_PRIVACY_ANY
};

#define IEEE80211_PRIVACY(x)        \
        ((x) ? IEEE80211_PRIVACY_ON : IEEE80211_PRIVACY_OFF)

/**
 * struct ieee80211_rate - bitrate definition
 *
 * This structure describes a bitrate that an 802.11 PHY can
 * operate with. The two values @hw_value and @hw_value_short
 * are only for driver use when pointers to this structure are
 * passed around.
 *
 * @flags: rate-specific flags from &enum ieee80211_rate_flags
 * @bitrate: bitrate in units of 100 Kbps
 * @hw_value: driver/hardware value for this rate
 * @hw_value_short: driver/hardware value for this rate when
 *        short preamble is used
 */
struct ieee80211_rate {
        u32 flags;
        u16 bitrate;
        u16 hw_value, hw_value_short;
};

/**
 * struct ieee80211_he_obss_pd - AP settings for spatial reuse
 *
 * @enable: is the feature enabled.
 * @sr_ctrl: The SR Control field of SRP element.
 * @non_srg_max_offset: non-SRG maximum tx power offset
 * @min_offset: minimal tx power offset an associated station shall use
 * @max_offset: maximum tx power offset an associated station shall use
 * @bss_color_bitmap: bitmap that indicates the BSS color values used by
 *        members of the SRG
 * @partial_bssid_bitmap: bitmap that indicates the partial BSSID values
 *        used by members of the SRG
 */
struct ieee80211_he_obss_pd {
        bool enable;
        u8 sr_ctrl;
        u8 non_srg_max_offset;
        u8 min_offset;
        u8 max_offset;
        u8 bss_color_bitmap[8];
        u8 partial_bssid_bitmap[8];
};

/**
 * struct cfg80211_he_bss_color - AP settings for BSS coloring
 *
 * @color: the current color.
 * @enabled: HE BSS color is used
 * @partial: define the AID equation.
 */
struct cfg80211_he_bss_color {
        u8 color;
        bool enabled;
        bool partial;
};

/**
 * struct ieee80211_sta_ht_cap - STA's HT capabilities
 *
 * This structure describes most essential parameters needed
 * to describe 802.11n HT capabilities for an STA.
 *
 * @ht_supported: is HT supported by the STA
 * @cap: HT capabilities map as described in 802.11n spec
 * @ampdu_factor: Maximum A-MPDU length factor
 * @ampdu_density: Minimum A-MPDU spacing
 * @mcs: Supported MCS rates
 */
struct ieee80211_sta_ht_cap {
        u16 cap; /* use IEEE80211_HT_CAP_ */
        bool ht_supported;
        u8 ampdu_factor;
        u8 ampdu_density;
        struct ieee80211_mcs_info mcs;
};

/**
 * struct ieee80211_sta_vht_cap - STA's VHT capabilities
 *
 * This structure describes most essential parameters needed
 * to describe 802.11ac VHT capabilities for an STA.
 *
 * @vht_supported: is VHT supported by the STA
 * @cap: VHT capabilities map as described in 802.11ac spec
 * @vht_mcs: Supported VHT MCS rates
 */
struct ieee80211_sta_vht_cap {
        bool vht_supported;
        u32 cap; /* use IEEE80211_VHT_CAP_ */
        struct ieee80211_vht_mcs_info vht_mcs;
};

#define IEEE80211_HE_PPE_THRES_MAX_LEN                25

/**
 * struct ieee80211_sta_he_cap - STA's HE capabilities
 *
 * This structure describes most essential parameters needed
 * to describe 802.11ax HE capabilities for a STA.
 *
 * @has_he: true iff HE data is valid.
 * @he_cap_elem: Fixed portion of the HE capabilities element.
 * @he_mcs_nss_supp: The supported NSS/MCS combinations.
 * @ppe_thres: Holds the PPE Thresholds data.
 */
struct ieee80211_sta_he_cap {
        bool has_he;
        struct ieee80211_he_cap_elem he_cap_elem;
        struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp;
        u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN];
};

/**
 * struct ieee80211_eht_mcs_nss_supp - EHT max supported NSS per MCS
 *
 * See P802.11be_D1.3 Table 9-401k - "Subfields of the Supported EHT-MCS
 * and NSS Set field"
 *
 * @only_20mhz: MCS/NSS support for 20 MHz-only STA.
 * @bw: MCS/NSS support for 80, 160 and 320 MHz
 * @bw._80: MCS/NSS support for BW <= 80 MHz
 * @bw._160: MCS/NSS support for BW = 160 MHz
 * @bw._320: MCS/NSS support for BW = 320 MHz
 */
struct ieee80211_eht_mcs_nss_supp {
        union {
                struct ieee80211_eht_mcs_nss_supp_20mhz_only only_20mhz;
                struct {
                        struct ieee80211_eht_mcs_nss_supp_bw _80;
                        struct ieee80211_eht_mcs_nss_supp_bw _160;
                        struct ieee80211_eht_mcs_nss_supp_bw _320;
                } __packed bw;
        } __packed;
} __packed;

#define IEEE80211_EHT_PPE_THRES_MAX_LEN                32

/**
 * struct ieee80211_sta_eht_cap - STA's EHT capabilities
 *
 * This structure describes most essential parameters needed
 * to describe 802.11be EHT capabilities for a STA.
 *
 * @has_eht: true iff EHT data is valid.
 * @eht_cap_elem: Fixed portion of the eht capabilities element.
 * @eht_mcs_nss_supp: The supported NSS/MCS combinations.
 * @eht_ppe_thres: Holds the PPE Thresholds data.
 */
struct ieee80211_sta_eht_cap {
        bool has_eht;
        struct ieee80211_eht_cap_elem_fixed eht_cap_elem;
        struct ieee80211_eht_mcs_nss_supp eht_mcs_nss_supp;
        u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN];
};

/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
#ifdef __CHECKER__
/*
 * This is used to mark the sband->iftype_data pointer which is supposed
 * to be an array with special access semantics (per iftype), but a lot
 * of code got it wrong in the past, so with this marking sparse will be
 * noisy when the pointer is used directly.
 */
# define __iftd                __attribute__((noderef, address_space(__iftype_data)))
#else
# define __iftd
#endif /* __CHECKER__ */

/**
 * struct ieee80211_sband_iftype_data - sband data per interface type
 *
 * This structure encapsulates sband data that is relevant for the
 * interface types defined in @types_mask.  Each type in the
 * @types_mask must be unique across all instances of iftype_data.
 *
 * @types_mask: interface types mask
 * @he_cap: holds the HE capabilities
 * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a
 *        6 GHz band channel (and 0 may be valid value).
 * @eht_cap: STA's EHT capabilities
 * @vendor_elems: vendor element(s) to advertise
 * @vendor_elems.data: vendor element(s) data
 * @vendor_elems.len: vendor element(s) length
 */
struct ieee80211_sband_iftype_data {
        u16 types_mask;
        struct ieee80211_sta_he_cap he_cap;
        struct ieee80211_he_6ghz_capa he_6ghz_capa;
        struct ieee80211_sta_eht_cap eht_cap;
        struct {
                const u8 *data;
                unsigned int len;
        } vendor_elems;
};

/**
 * enum ieee80211_edmg_bw_config - allowed channel bandwidth configurations
 *
 * @IEEE80211_EDMG_BW_CONFIG_4: 2.16GHz
 * @IEEE80211_EDMG_BW_CONFIG_5: 2.16GHz and 4.32GHz
 * @IEEE80211_EDMG_BW_CONFIG_6: 2.16GHz, 4.32GHz and 6.48GHz
 * @IEEE80211_EDMG_BW_CONFIG_7: 2.16GHz, 4.32GHz, 6.48GHz and 8.64GHz
 * @IEEE80211_EDMG_BW_CONFIG_8: 2.16GHz and 2.16GHz + 2.16GHz
 * @IEEE80211_EDMG_BW_CONFIG_9: 2.16GHz, 4.32GHz and 2.16GHz + 2.16GHz
 * @IEEE80211_EDMG_BW_CONFIG_10: 2.16GHz, 4.32GHz, 6.48GHz and 2.16GHz+2.16GHz
 * @IEEE80211_EDMG_BW_CONFIG_11: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz and
 *        2.16GHz+2.16GHz
 * @IEEE80211_EDMG_BW_CONFIG_12: 2.16GHz, 2.16GHz + 2.16GHz and
 *        4.32GHz + 4.32GHz
 * @IEEE80211_EDMG_BW_CONFIG_13: 2.16GHz, 4.32GHz, 2.16GHz + 2.16GHz and
 *        4.32GHz + 4.32GHz
 * @IEEE80211_EDMG_BW_CONFIG_14: 2.16GHz, 4.32GHz, 6.48GHz, 2.16GHz + 2.16GHz
 *        and 4.32GHz + 4.32GHz
 * @IEEE80211_EDMG_BW_CONFIG_15: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz,
 *        2.16GHz + 2.16GHz and 4.32GHz + 4.32GHz
 */
enum ieee80211_edmg_bw_config {
        IEEE80211_EDMG_BW_CONFIG_4        = 4,
        IEEE80211_EDMG_BW_CONFIG_5        = 5,
        IEEE80211_EDMG_BW_CONFIG_6        = 6,
        IEEE80211_EDMG_BW_CONFIG_7        = 7,
        IEEE80211_EDMG_BW_CONFIG_8        = 8,
        IEEE80211_EDMG_BW_CONFIG_9        = 9,
        IEEE80211_EDMG_BW_CONFIG_10        = 10,
        IEEE80211_EDMG_BW_CONFIG_11        = 11,
        IEEE80211_EDMG_BW_CONFIG_12        = 12,
        IEEE80211_EDMG_BW_CONFIG_13        = 13,
        IEEE80211_EDMG_BW_CONFIG_14        = 14,
        IEEE80211_EDMG_BW_CONFIG_15        = 15,
};

/**
 * struct ieee80211_edmg - EDMG configuration
 *
 * This structure describes most essential parameters needed
 * to describe 802.11ay EDMG configuration
 *
 * @channels: bitmap that indicates the 2.16 GHz channel(s)
 *        that are allowed to be used for transmissions.
 *        Bit 0 indicates channel 1, bit 1 indicates channel 2, etc.
 *        Set to 0 indicate EDMG not supported.
 * @bw_config: Channel BW Configuration subfield encodes
 *        the allowed channel bandwidth configurations
 */
struct ieee80211_edmg {
        u8 channels;
        enum ieee80211_edmg_bw_config bw_config;
};

/**
 * struct ieee80211_sta_s1g_cap - STA's S1G capabilities
 *
 * This structure describes most essential parameters needed
 * to describe 802.11ah S1G capabilities for a STA.
 *
 * @s1g: is STA an S1G STA
 * @cap: S1G capabilities information
 * @nss_mcs: Supported NSS MCS set
 */
struct ieee80211_sta_s1g_cap {
        bool s1g;
        u8 cap[10]; /* use S1G_CAPAB_ */
        u8 nss_mcs[5];
};

/**
 * struct ieee80211_supported_band - frequency band definition
 *
 * This structure describes a frequency band a wiphy
 * is able to operate in.
 *
 * @channels: Array of channels the hardware can operate with
 *        in this band.
 * @band: the band this structure represents
 * @n_channels: Number of channels in @channels
 * @bitrates: Array of bitrates the hardware can operate with
 *        in this band. Must be sorted to give a valid "supported
 *        rates" IE, i.e. CCK rates first, then OFDM.
 * @n_bitrates: Number of bitrates in @bitrates
 * @ht_cap: HT capabilities in this band
 * @vht_cap: VHT capabilities in this band
 * @s1g_cap: S1G capabilities in this band
 * @edmg_cap: EDMG capabilities in this band
 * @s1g_cap: S1G capabilities in this band (S1B band only, of course)
 * @n_iftype_data: number of iftype data entries
 * @iftype_data: interface type data entries.  Note that the bits in
 *        @types_mask inside this structure cannot overlap (i.e. only
 *        one occurrence of each type is allowed across all instances of
 *        iftype_data).
 */
struct ieee80211_supported_band {
        struct ieee80211_channel *channels;
        struct ieee80211_rate *bitrates;
        enum nl80211_band band;
        int n_channels;
        int n_bitrates;
        struct ieee80211_sta_ht_cap ht_cap;
        struct ieee80211_sta_vht_cap vht_cap;
        struct ieee80211_sta_s1g_cap s1g_cap;
        struct ieee80211_edmg edmg_cap;
        u16 n_iftype_data;
        const struct ieee80211_sband_iftype_data __iftd *iftype_data;
};

/**
 * _ieee80211_set_sband_iftype_data - set sband iftype data array
 * @sband: the sband to initialize
 * @iftd: the iftype data array pointer
 * @n_iftd: the length of the iftype data array
 *
 * Set the sband iftype data array; use this where the length cannot
 * be derived from the ARRAY_SIZE() of the argument, but prefer
 * ieee80211_set_sband_iftype_data() where it can be used.
 */
static inline void
_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband,
                                 const struct ieee80211_sband_iftype_data *iftd,
                                 u16 n_iftd)
{
        sband->iftype_data = (const void __iftd __force *)iftd;
        sband->n_iftype_data = n_iftd;
}

/**
 * ieee80211_set_sband_iftype_data - set sband iftype data array
 * @sband: the sband to initialize
 * @iftd: the iftype data array
 */
#define ieee80211_set_sband_iftype_data(sband, iftd)        \
        _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd))

/**
 * for_each_sband_iftype_data - iterate sband iftype data entries
 * @sband: the sband whose iftype_data array to iterate
 * @i: iterator counter
 * @iftd: iftype data pointer to set
 */
#define for_each_sband_iftype_data(sband, i, iftd)                                \
        for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i];        \
             i < (sband)->n_iftype_data;                                        \
             i++, iftd = (const void __force *)&(sband)->iftype_data[i])

/**
 * ieee80211_get_sband_iftype_data - return sband data for a given iftype
 * @sband: the sband to search for the STA on
 * @iftype: enum nl80211_iftype
 *
 * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found
 */
static inline const struct ieee80211_sband_iftype_data *
ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
                                u8 iftype)
{
        const struct ieee80211_sband_iftype_data *data;
        int i;

        if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
                return NULL;

        if (iftype == NL80211_IFTYPE_AP_VLAN)
                iftype = NL80211_IFTYPE_AP;

        for_each_sband_iftype_data(sband, i, data) {
                if (data->types_mask & BIT(iftype))
                        return data;
        }

        return NULL;
}

/**
 * ieee80211_get_he_iftype_cap - return HE capabilities for an sband's iftype
 * @sband: the sband to search for the iftype on
 * @iftype: enum nl80211_iftype
 *
 * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
 */
static inline const struct ieee80211_sta_he_cap *
ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband,
                            u8 iftype)
{
        const struct ieee80211_sband_iftype_data *data =
                ieee80211_get_sband_iftype_data(sband, iftype);

        if (data && data->he_cap.has_he)
                return &data->he_cap;

        return NULL;
}

/**
 * ieee80211_get_he_6ghz_capa - return HE 6 GHz capabilities
 * @sband: the sband to search for the STA on
 * @iftype: the iftype to search for
 *
 * Return: the 6GHz capabilities
 */
static inline __le16
ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband,
                           enum nl80211_iftype iftype)
{
        const struct ieee80211_sband_iftype_data *data =
                ieee80211_get_sband_iftype_data(sband, iftype);

        if (WARN_ON(!data || !data->he_cap.has_he))
                return 0;

        return data->he_6ghz_capa.capa;
}

/**
 * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype
 * @sband: the sband to search for the iftype on
 * @iftype: enum nl80211_iftype
 *
 * Return: pointer to the struct ieee80211_sta_eht_cap, or NULL is none found
 */
static inline const struct ieee80211_sta_eht_cap *
ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband,
                             enum nl80211_iftype iftype)
{
        const struct ieee80211_sband_iftype_data *data =
                ieee80211_get_sband_iftype_data(sband, iftype);

        if (data && data->eht_cap.has_eht)
                return &data->eht_cap;

        return NULL;
}

/**
 * wiphy_read_of_freq_limits - read frequency limits from device tree
 *
 * @wiphy: the wireless device to get extra limits for
 *
 * Some devices may have extra limitations specified in DT. This may be useful
 * for chipsets that normally support more bands but are limited due to board
 * design (e.g. by antennas or external power amplifier).
 *
 * This function reads info from DT and uses it to *modify* channels (disable
 * unavailable ones). It's usually a *bad* idea to use it in drivers with
 * shared channel data as DT limitations are device specific. You should make
 * sure to call it only if channels in wiphy are copied and can be modified
 * without affecting other devices.
 *
 * As this function access device node it has to be called after set_wiphy_dev.
 * It also modifies channels so they have to be set first.
 * If using this helper, call it before wiphy_register().
 */
#ifdef CONFIG_OF
void wiphy_read_of_freq_limits(struct wiphy *wiphy);
#else /* CONFIG_OF */
static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy)
{
}
#endif /* !CONFIG_OF */


/*
 * Wireless hardware/device configuration structures and methods
 */

/**
 * DOC: Actions and configuration
 *
 * Each wireless device and each virtual interface offer a set of configuration
 * operations and other actions that are invoked by userspace. Each of these
 * actions is described in the operations structure, and the parameters these
 * operations use are described separately.
 *
 * Additionally, some operations are asynchronous and expect to get status
 * information via some functions that drivers need to call.
 *
 * Scanning and BSS list handling with its associated functionality is described
 * in a separate chapter.
 */

#define VHT_MUMIMO_GROUPS_DATA_LEN (WLAN_MEMBERSHIP_LEN +\
                                    WLAN_USER_POSITION_LEN)

/**
 * struct vif_params - describes virtual interface parameters
 * @flags: monitor interface flags, unchanged if 0, otherwise
 *        %MONITOR_FLAG_CHANGED will be set
 * @use_4addr: use 4-address frames
 * @macaddr: address to use for this virtual interface.
 *        If this parameter is set to zero address the driver may
 *        determine the address as needed.
 *        This feature is only fully supported by drivers that enable the
 *        %NL80211_FEATURE_MAC_ON_CREATE flag.  Others may support creating
 **        only p2p devices with specified MAC.
 * @vht_mumimo_groups: MU-MIMO groupID, used for monitoring MU-MIMO packets
 *        belonging to that MU-MIMO groupID; %NULL if not changed
 * @vht_mumimo_follow_addr: MU-MIMO follow address, used for monitoring
 *        MU-MIMO packets going to the specified station; %NULL if not changed
 */
struct vif_params {
        u32 flags;
        int use_4addr;
        u8 macaddr[ETH_ALEN];
        const u8 *vht_mumimo_groups;
        const u8 *vht_mumimo_follow_addr;
};

/**
 * struct key_params - key information
 *
 * Information about a key
 *
 * @key: key material
 * @key_len: length of key material
 * @cipher: cipher suite selector
 * @seq: sequence counter (IV/PN) for TKIP and CCMP keys, only used
 *        with the get_key() callback, must be in little endian,
 *        length given by @seq_len.
 * @seq_len: length of @seq.
 * @vlan_id: vlan_id for VLAN group key (if nonzero)
 * @mode: key install mode (RX_TX, NO_TX or SET_TX)
 */
struct key_params {
        const u8 *key;
        const u8 *seq;
        int key_len;
        int seq_len;
        u16 vlan_id;
        u32 cipher;
        enum nl80211_key_mode mode;
};

/**
 * struct cfg80211_chan_def - channel definition
 * @chan: the (control) channel
 * @width: channel width
 * @center_freq1: center frequency of first segment
 * @center_freq2: center frequency of second segment
 *        (only with 80+80 MHz)
 * @edmg: define the EDMG channels configuration.
 *        If edmg is requested (i.e. the .channels member is non-zero),
 *        chan will define the primary channel and all other
 *        parameters are ignored.
 * @freq1_offset: offset from @center_freq1, in KHz
 * @punctured: mask of the punctured 20 MHz subchannels, with
 *        bits turned on being disabled (punctured); numbered
 *        from lower to higher frequency (like in the spec)
 */
struct cfg80211_chan_def {
        struct ieee80211_channel *chan;
        enum nl80211_chan_width width;
        u32 center_freq1;
        u32 center_freq2;
        struct ieee80211_edmg edmg;
        u16 freq1_offset;
        u16 punctured;
};

/*
 * cfg80211_bitrate_mask - masks for bitrate control
 */
struct cfg80211_bitrate_mask {
        struct {
                u32 legacy;
                u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
                u16 vht_mcs[NL80211_VHT_NSS_MAX];
                u16 he_mcs[NL80211_HE_NSS_MAX];
                enum nl80211_txrate_gi gi;
                enum nl80211_he_gi he_gi;
                enum nl80211_he_ltf he_ltf;
        } control[NUM_NL80211_BANDS];
};


/**
 * struct cfg80211_tid_cfg - TID specific configuration
 * @config_override: Flag to notify driver to reset TID configuration
 *        of the peer.
 * @tids: bitmap of TIDs to modify
 * @mask: bitmap of attributes indicating which parameter changed,
 *        similar to &nl80211_tid_config_supp.
 * @noack: noack configuration value for the TID
 * @retry_long: retry count value
 * @retry_short: retry count value
 * @ampdu: Enable/Disable MPDU aggregation
 * @rtscts: Enable/Disable RTS/CTS
 * @amsdu: Enable/Disable MSDU aggregation
 * @txrate_type: Tx bitrate mask type
 * @txrate_mask: Tx bitrate to be applied for the TID
 */
struct cfg80211_tid_cfg {
        bool config_override;
        u8 tids;
        u64 mask;
        enum nl80211_tid_config noack;
        u8 retry_long, retry_short;
        enum nl80211_tid_config ampdu;
        enum nl80211_tid_config rtscts;
        enum nl80211_tid_config amsdu;
        enum nl80211_tx_rate_setting txrate_type;
        struct cfg80211_bitrate_mask txrate_mask;
};

/**
 * struct cfg80211_tid_config - TID configuration
 * @peer: Station's MAC address
 * @n_tid_conf: Number of TID specific configurations to be applied
 * @tid_conf: Configuration change info
 */
struct cfg80211_tid_config {
        const u8 *peer;
        u32 n_tid_conf;
        struct cfg80211_tid_cfg tid_conf[] __counted_by(n_tid_conf);
};

/**
 * struct cfg80211_fils_aad - FILS AAD data
 * @macaddr: STA MAC address
 * @kek: FILS KEK
 * @kek_len: FILS KEK length
 * @snonce: STA Nonce
 * @anonce: AP Nonce
 */
struct cfg80211_fils_aad {
        const u8 *macaddr;
        const u8 *kek;
        u8 kek_len;
        const u8 *snonce;
        const u8 *anonce;
};

/**
 * struct cfg80211_set_hw_timestamp - enable/disable HW timestamping
 * @macaddr: peer MAC address. NULL to enable/disable HW timestamping for all
 *        addresses.
 * @enable: if set, enable HW timestamping for the specified MAC address.
 *        Otherwise disable HW timestamping for the specified MAC address.
 */
struct cfg80211_set_hw_timestamp {
        const u8 *macaddr;
        bool enable;
};

/**
 * cfg80211_get_chandef_type - return old channel type from chandef
 * @chandef: the channel definition
 *
 * Return: The old channel type (NOHT, HT20, HT40+/-) from a given
 * chandef, which must have a bandwidth allowing this conversion.
 */
static inline enum nl80211_channel_type
cfg80211_get_chandef_type(const struct cfg80211_chan_def *chandef)
{
        switch (chandef->width) {
        case NL80211_CHAN_WIDTH_20_NOHT:
                return NL80211_CHAN_NO_HT;
        case NL80211_CHAN_WIDTH_20:
                return NL80211_CHAN_HT20;
        case NL80211_CHAN_WIDTH_40:
                if (chandef->center_freq1 > chandef->chan->center_freq)
                        return NL80211_CHAN_HT40PLUS;
                return NL80211_CHAN_HT40MINUS;
        default:
                WARN_ON(1);
                return NL80211_CHAN_NO_HT;
        }
}

/**
 * cfg80211_chandef_create - create channel definition using channel type
 * @chandef: the channel definition struct to fill
 * @channel: the control channel
 * @chantype: the channel type
 *
 * Given a channel type, create a channel definition.
 */
void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
                             struct ieee80211_channel *channel,
                             enum nl80211_channel_type chantype);

/**
 * cfg80211_chandef_identical - check if two channel definitions are identical
 * @chandef1: first channel definition
 * @chandef2: second channel definition
 *
 * Return: %true if the channels defined by the channel definitions are
 * identical, %false otherwise.
 */
static inline bool
cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
                           const struct cfg80211_chan_def *chandef2)
{
        return (chandef1->chan == chandef2->chan &&
                chandef1->width == chandef2->width &&
                chandef1->center_freq1 == chandef2->center_freq1 &&
                chandef1->freq1_offset == chandef2->freq1_offset &&
                chandef1->center_freq2 == chandef2->center_freq2 &&
                chandef1->punctured == chandef2->punctured);
}

/**
 * cfg80211_chandef_is_edmg - check if chandef represents an EDMG channel
 *
 * @chandef: the channel definition
 *
 * Return: %true if EDMG defined, %false otherwise.
 */
static inline bool
cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef)
{
        return chandef->edmg.channels || chandef->edmg.bw_config;
}

/**
 * cfg80211_chandef_compatible - check if two channel definitions are compatible
 * @chandef1: first channel definition
 * @chandef2: second channel definition
 *
 * Return: %NULL if the given channel definitions are incompatible,
 * chandef1 or chandef2 otherwise.
 */
const struct cfg80211_chan_def *
cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
                            const struct cfg80211_chan_def *chandef2);

/**
 * nl80211_chan_width_to_mhz - get the channel width in MHz
 * @chan_width: the channel width from &enum nl80211_chan_width
 *
 * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width
 * is valid. -1 otherwise.
 */
int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);

/**
 * cfg80211_chandef_valid - check if a channel definition is valid
 * @chandef: the channel definition to check
 * Return: %true if the channel definition is valid. %false otherwise.
 */
bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef);

/**
 * cfg80211_chandef_usable - check if secondary channels can be used
 * @wiphy: the wiphy to validate against
 * @chandef: the channel definition to check
 * @prohibited_flags: the regulatory channel flags that must not be set
 * Return: %true if secondary channels are usable. %false otherwise.
 */
bool cfg80211_chandef_usable(struct wiphy *wiphy,
                             const struct cfg80211_chan_def *chandef,
                             u32 prohibited_flags);

/**
 * cfg80211_chandef_dfs_required - checks if radar detection is required
 * @wiphy: the wiphy to validate against
 * @chandef: the channel definition to check
 * @iftype: the interface type as specified in &enum nl80211_iftype
 * Returns:
 *        1 if radar detection is required, 0 if it is not, < 0 on error
 */
int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
                                  const struct cfg80211_chan_def *chandef,
                                  enum nl80211_iftype iftype);

/**
 * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we
 *                                 can/need start CAC on such channel
 * @wiphy: the wiphy to validate against
 * @chandef: the channel definition to check
 *
 * Return: true if all channels available and at least
 *           one channel requires CAC (NL80211_DFS_USABLE)
 */
bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
                                 const struct cfg80211_chan_def *chandef);

/**
 * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given
 *                                   channel definition
 * @wiphy: the wiphy to validate against
 * @chandef: the channel definition to check
 *
 * Returns: DFS CAC time (in ms) which applies for this channel definition
 */
unsigned int
cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
                              const struct cfg80211_chan_def *chandef);

/**
 * cfg80211_chandef_primary - calculate primary 40/80/160 MHz freq
 * @chandef: chandef to calculate for
 * @primary_chan_width: primary channel width to calculate center for
 * @punctured: punctured sub-channel bitmap, will be recalculated
 *        according to the new bandwidth, can be %NULL
 *
 * Returns: the primary 40/80/160 MHz channel center frequency, or -1
 *        for errors, updating the punctured bitmap
 */
int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef,
                             enum nl80211_chan_width primary_chan_width,
                             u16 *punctured);

/**
 * nl80211_send_chandef - sends the channel definition.
 * @msg: the msg to send channel definition
 * @chandef: the channel definition to check
 *
 * Returns: 0 if sent the channel definition to msg, < 0 on error
 **/
int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef);

/**
 * ieee80211_chanwidth_rate_flags - return rate flags for channel width
 * @width: the channel width of the channel
 *
 * In some channel types, not all rates may be used - for example CCK
 * rates may not be used in 5/10 MHz channels.
 *
 * Returns: rate flags which apply for this channel width
 */
static inline enum ieee80211_rate_flags
ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width)
{
        switch (width) {
        case NL80211_CHAN_WIDTH_5:
                return IEEE80211_RATE_SUPPORTS_5MHZ;
        case NL80211_CHAN_WIDTH_10:
                return IEEE80211_RATE_SUPPORTS_10MHZ;
        default:
                break;
        }
        return 0;
}

/**
 * ieee80211_chandef_rate_flags - returns rate flags for a channel
 * @chandef: channel definition for the channel
 *
 * See ieee80211_chanwidth_rate_flags().
 *
 * Returns: rate flags which apply for this channel
 */
static inline enum ieee80211_rate_flags
ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef)
{
        return ieee80211_chanwidth_rate_flags(chandef->width);
}

/**
 * ieee80211_chandef_max_power - maximum transmission power for the chandef
 *
 * In some regulations, the transmit power may depend on the configured channel
 * bandwidth which may be defined as dBm/MHz. This function returns the actual
 * max_power for non-standard (20 MHz) channels.
 *
 * @chandef: channel definition for the channel
 *
 * Returns: maximum allowed transmission power in dBm for the chandef
 */
static inline int
ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
{
        switch (chandef->width) {
        case NL80211_CHAN_WIDTH_5:
                return min(chandef->chan->max_reg_power - 6,
                           chandef->chan->max_power);
        case NL80211_CHAN_WIDTH_10:
                return min(chandef->chan->max_reg_power - 3,
                           chandef->chan->max_power);
        default:
                break;
        }
        return chandef->chan->max_power;
}

/**
 * cfg80211_any_usable_channels - check for usable channels
 * @wiphy: the wiphy to check for
 * @band_mask: which bands to check on
 * @prohibited_flags: which channels to not consider usable,
 *        %IEEE80211_CHAN_DISABLED is always taken into account
 */
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
                                  unsigned long band_mask,
                                  u32 prohibited_flags);

/**
 * enum survey_info_flags - survey information flags
 *
 * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
 * @SURVEY_INFO_IN_USE: channel is currently being used
 * @SURVEY_INFO_TIME: active time (in ms) was filled in
 * @SURVEY_INFO_TIME_BUSY: busy time was filled in
 * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
 * @SURVEY_INFO_TIME_RX: receive time was filled in
 * @SURVEY_INFO_TIME_TX: transmit time was filled in
 * @SURVEY_INFO_TIME_SCAN: scan time was filled in
 * @SURVEY_INFO_TIME_BSS_RX: local BSS receive time was filled in
 *
 * Used by the driver to indicate which info in &struct survey_info
 * it has filled in during the get_survey().
 */
enum survey_info_flags {
        SURVEY_INFO_NOISE_DBM                = BIT(0),
        SURVEY_INFO_IN_USE                = BIT(1),
        SURVEY_INFO_TIME                = BIT(2),
        SURVEY_INFO_TIME_BUSY                = BIT(3),
        SURVEY_INFO_TIME_EXT_BUSY        = BIT(4),
        SURVEY_INFO_TIME_RX                = BIT(5),
        SURVEY_INFO_TIME_TX                = BIT(6),
        SURVEY_INFO_TIME_SCAN                = BIT(7),
        SURVEY_INFO_TIME_BSS_RX                = BIT(8),
};

/**
 * struct survey_info - channel survey response
 *
 * @channel: the channel this survey record reports, may be %NULL for a single
 *        record to report global statistics
 * @filled: bitflag of flags from &enum survey_info_flags
 * @noise: channel noise in dBm. This and all following fields are
 *        optional
 * @time: amount of time in ms the radio was turn on (on the channel)
 * @time_busy: amount of time the primary channel was sensed busy
 * @time_ext_busy: amount of time the extension channel was sensed busy
 * @time_rx: amount of time the radio spent receiving data
 * @time_tx: amount of time the radio spent transmitting data
 * @time_scan: amount of time the radio spent for scanning
 * @time_bss_rx: amount of time the radio spent receiving data on a local BSS
 *
 * Used by dump_survey() to report back per-channel survey information.
 *
 * This structure can later be expanded with things like
 * channel duty cycle etc.
 */
struct survey_info {
        struct ieee80211_channel *channel;
        u64 time;
        u64 time_busy;
        u64 time_ext_busy;
        u64 time_rx;
        u64 time_tx;
        u64 time_scan;
        u64 time_bss_rx;
        u32 filled;
        s8 noise;
};

#define CFG80211_MAX_NUM_AKM_SUITES        10

/**
 * struct cfg80211_crypto_settings - Crypto settings
 * @wpa_versions: indicates which, if any, WPA versions are enabled
 *        (from enum nl80211_wpa_versions)
 * @cipher_group: group key cipher suite (or 0 if unset)
 * @n_ciphers_pairwise: number of AP supported unicast ciphers
 * @ciphers_pairwise: unicast key cipher suites
 * @n_akm_suites: number of AKM suites
 * @akm_suites: AKM suites
 * @control_port: Whether user space controls IEEE 802.1X port, i.e.,
 *        sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
 *        required to assume that the port is unauthorized until authorized by
 *        user space. Otherwise, port is marked authorized by default.
 * @control_port_ethertype: the control port protocol that should be
 *        allowed through even on unauthorized ports
 * @control_port_no_encrypt: TRUE to prevent encryption of control port
 *        protocol frames.
 * @control_port_over_nl80211: TRUE if userspace expects to exchange control
 *        port frames over NL80211 instead of the network interface.
 * @control_port_no_preauth: disables pre-auth rx over the nl80211 control
 *        port for mac80211
 * @psk: PSK (for devices supporting 4-way-handshake offload)
 * @sae_pwd: password for SAE authentication (for devices supporting SAE
 *        offload)
 * @sae_pwd_len: length of SAE password (for devices supporting SAE offload)
 * @sae_pwe: The mechanisms allowed for SAE PWE derivation:
 *
 *        NL80211_SAE_PWE_UNSPECIFIED
 *          Not-specified, used to indicate userspace did not specify any
 *          preference. The driver should follow its internal policy in
 *          such a scenario.
 *
 *        NL80211_SAE_PWE_HUNT_AND_PECK
 *          Allow hunting-and-pecking loop only
 *
 *        NL80211_SAE_PWE_HASH_TO_ELEMENT
 *          Allow hash-to-element only
 *
 *        NL80211_SAE_PWE_BOTH
 *          Allow either hunting-and-pecking loop or hash-to-element
 */
struct cfg80211_crypto_settings {
        u32 wpa_versions;
        u32 cipher_group;
        int n_ciphers_pairwise;
        u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES];
        int n_akm_suites;
        u32 akm_suites[CFG80211_MAX_NUM_AKM_SUITES];
        bool control_port;
        __be16 control_port_ethertype;
        bool control_port_no_encrypt;
        bool control_port_over_nl80211;
        bool control_port_no_preauth;
        const u8 *psk;
        const u8 *sae_pwd;
        u8 sae_pwd_len;
        enum nl80211_sae_pwe_mechanism sae_pwe;
};

/**
 * struct cfg80211_mbssid_config - AP settings for multi bssid
 *
 * @tx_wdev: pointer to the transmitted interface in the MBSSID set
 * @index: index of this AP in the multi bssid group.
 * @ema: set to true if the beacons should be sent out in EMA mode.
 */
struct cfg80211_mbssid_config {
        struct wireless_dev *tx_wdev;
        u8 index;
        bool ema;
};

/**
 * struct cfg80211_mbssid_elems - Multiple BSSID elements
 *
 * @cnt: Number of elements in array %elems.
 *
 * @elem: Array of multiple BSSID element(s) to be added into Beacon frames.
 * @elem.data: Data for multiple BSSID elements.
 * @elem.len: Length of data.
 */
struct cfg80211_mbssid_elems {
        u8 cnt;
        struct {
                const u8 *data;
                size_t len;
        } elem[] __counted_by(cnt);
};

/**
 * struct cfg80211_rnr_elems - Reduced neighbor report (RNR) elements
 *
 * @cnt: Number of elements in array %elems.
 *
 * @elem: Array of RNR element(s) to be added into Beacon frames.
 * @elem.data: Data for RNR elements.
 * @elem.len: Length of data.
 */
struct cfg80211_rnr_elems {
        u8 cnt;
        struct {
                const u8 *data;
                size_t len;
        } elem[] __counted_by(cnt);
};

/**
 * struct cfg80211_beacon_data - beacon data
 * @link_id: the link ID for the AP MLD link sending this beacon
 * @head: head portion of beacon (before TIM IE)
 *        or %NULL if not changed
 * @tail: tail portion of beacon (after TIM IE)
 *        or %NULL if not changed
 * @head_len: length of @head
 * @tail_len: length of @tail
 * @beacon_ies: extra information element(s) to add into Beacon frames or %NULL
 * @beacon_ies_len: length of beacon_ies in octets
 * @proberesp_ies: extra information element(s) to add into Probe Response
 *        frames or %NULL
 * @proberesp_ies_len: length of proberesp_ies in octets
 * @assocresp_ies: extra information element(s) to add into (Re)Association
 *        Response frames or %NULL
 * @assocresp_ies_len: length of assocresp_ies in octets
 * @probe_resp_len: length of probe response template (@probe_resp)
 * @probe_resp: probe response template (AP mode only)
 * @mbssid_ies: multiple BSSID elements
 * @rnr_ies: reduced neighbor report elements
 * @ftm_responder: enable FTM responder functionality; -1 for no change
 *        (which also implies no change in LCI/civic location data)
 * @lci: Measurement Report element content, starting with Measurement Token
 *        (measurement type 8)
 * @civicloc: Measurement Report element content, starting with Measurement
 *        Token (measurement type 11)
 * @lci_len: LCI data length
 * @civicloc_len: Civic location data length
 * @he_bss_color: BSS Color settings
 * @he_bss_color_valid: indicates whether bss color
 *        attribute is present in beacon data or not.
 */
struct cfg80211_beacon_data {
        unsigned int link_id;

        const u8 *head, *tail;
        const u8 *beacon_ies;
        const u8 *proberesp_ies;
        const u8 *assocresp_ies;
        const u8 *probe_resp;
        const u8 *lci;
        const u8 *civicloc;
        struct cfg80211_mbssid_elems *mbssid_ies;
        struct cfg80211_rnr_elems *rnr_ies;
        s8 ftm_responder;

        size_t head_len, tail_len;
        size_t beacon_ies_len;
        size_t proberesp_ies_len;
        size_t assocresp_ies_len;
        size_t probe_resp_len;
        size_t lci_len;
        size_t civicloc_len;
        struct cfg80211_he_bss_color he_bss_color;
        bool he_bss_color_valid;
};

struct mac_address {
        u8 addr[ETH_ALEN];
};

/**
 * struct cfg80211_acl_data - Access control list data
 *
 * @acl_policy: ACL policy to be applied on the station's
 *        entry specified by mac_addr
 * @n_acl_entries: Number of MAC address entries passed
 * @mac_addrs: List of MAC addresses of stations to be used for ACL
 */
struct cfg80211_acl_data {
        enum nl80211_acl_policy acl_policy;
        int n_acl_entries;

        /* Keep it last */
        struct mac_address mac_addrs[] __counted_by(n_acl_entries);
};

/**
 * struct cfg80211_fils_discovery - FILS discovery parameters from
 * IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
 *
 * @update: Set to true if the feature configuration should be updated.
 * @min_interval: Minimum packet interval in TUs (0 - 10000)
 * @max_interval: Maximum packet interval in TUs (0 - 10000)
 * @tmpl_len: Template length
 * @tmpl: Template data for FILS discovery frame including the action
 *        frame headers.
 */
struct cfg80211_fils_discovery {
        bool update;
        u32 min_interval;
        u32 max_interval;
        size_t tmpl_len;
        const u8 *tmpl;
};

/**
 * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
 *        response parameters in 6GHz.
 *
 * @update: Set to true if the feature configuration should be updated.
 * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
 *        in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
 *        scanning
 * @tmpl_len: Template length
 * @tmpl: Template data for probe response
 */
struct cfg80211_unsol_bcast_probe_resp {
        bool update;
        u32 interval;
        size_t tmpl_len;
        const u8 *tmpl;
};

/**
 * struct cfg80211_ap_settings - AP configuration
 *
 * Used to configure an AP interface.
 *
 * @chandef: defines the channel to use
 * @beacon: beacon data
 * @beacon_interval: beacon interval
 * @dtim_period: DTIM period
 * @ssid: SSID to be used in the BSS (note: may be %NULL if not provided from
 *        user space)
 * @ssid_len: length of @ssid
 * @hidden_ssid: whether to hide the SSID in Beacon/Probe Response frames
 * @crypto: crypto settings
 * @privacy: the BSS uses privacy
 * @auth_type: Authentication type (algorithm)
 * @smps_mode: SMPS mode
 * @inactivity_timeout: time in seconds to determine station's inactivity.
 * @p2p_ctwindow: P2P CT Window
 * @p2p_opp_ps: P2P opportunistic PS
 * @acl: ACL configuration used by the drivers which has support for
 *        MAC address based access control
 * @pbss: If set, start as a PCP instead of AP. Relevant for DMG
 *        networks.
 * @beacon_rate: bitrate to be used for beacons
 * @ht_cap: HT capabilities (or %NULL if HT isn't enabled)
 * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled)
 * @he_cap: HE capabilities (or %NULL if HE isn't enabled)
 * @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled)
 * @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled)
 * @ht_required: stations must support HT
 * @vht_required: stations must support VHT
 * @twt_responder: Enable Target Wait Time
 * @he_required: stations must support HE
 * @sae_h2e_required: stations must support direct H2E technique in SAE
 * @flags: flags, as defined in &enum nl80211_ap_settings_flags
 * @he_obss_pd: OBSS Packet Detection settings
 * @he_oper: HE operation IE (or %NULL if HE isn't enabled)
 * @fils_discovery: FILS discovery transmission parameters
 * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
 * @mbssid_config: AP settings for multiple bssid
 */
struct cfg80211_ap_settings {
        struct cfg80211_chan_def chandef;

        struct cfg80211_beacon_data beacon;

        int beacon_interval, dtim_period;
        const u8 *ssid;
        size_t ssid_len;
        enum nl80211_hidden_ssid hidden_ssid;
        struct cfg80211_crypto_settings crypto;
        bool privacy;
        enum nl80211_auth_type auth_type;
        enum nl80211_smps_mode smps_mode;
        int inactivity_timeout;
        u8 p2p_ctwindow;
        bool p2p_opp_ps;
        const struct cfg80211_acl_data *acl;
        bool pbss;
        struct cfg80211_bitrate_mask beacon_rate;

        const struct ieee80211_ht_cap *ht_cap;
        const struct ieee80211_vht_cap *vht_cap;
        const struct ieee80211_he_cap_elem *he_cap;
        const struct ieee80211_he_operation *he_oper;
        const struct ieee80211_eht_cap_elem *eht_cap;
        const struct ieee80211_eht_operation *eht_oper;
        bool ht_required, vht_required, he_required, sae_h2e_required;
        bool twt_responder;
        u32 flags;
        struct ieee80211_he_obss_pd he_obss_pd;
        struct cfg80211_fils_discovery fils_discovery;
        struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
        struct cfg80211_mbssid_config mbssid_config;
};


/**
 * struct cfg80211_ap_update - AP configuration update
 *
 * Subset of &struct cfg80211_ap_settings, for updating a running AP.
 *
 * @beacon: beacon data
 * @fils_discovery: FILS discovery transmission parameters
 * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
 */
struct cfg80211_ap_update {
        struct cfg80211_beacon_data beacon;
        struct cfg80211_fils_discovery fils_discovery;
        struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
};

/**
 * struct cfg80211_csa_settings - channel switch settings
 *
 * Used for channel switch
 *
 * @chandef: defines the channel to use after the switch
 * @beacon_csa: beacon data while performing the switch
 * @counter_offsets_beacon: offsets of the counters within the beacon (tail)
 * @counter_offsets_presp: offsets of the counters within the probe response
 * @n_counter_offsets_beacon: number of csa counters the beacon (tail)
 * @n_counter_offsets_presp: number of csa counters in the probe response
 * @beacon_after: beacon data to be used on the new channel
 * @radar_required: whether radar detection is required on the new channel
 * @block_tx: whether transmissions should be blocked while changing
 * @count: number of beacons until switch
 * @link_id: defines the link on which channel switch is expected during
 *        MLO. 0 in case of non-MLO.
 */
struct cfg80211_csa_settings {
        struct cfg80211_chan_def chandef;
        struct cfg80211_beacon_data beacon_csa;
        const u16 *counter_offsets_beacon;
        const u16 *counter_offsets_presp;
        unsigned int n_counter_offsets_beacon;
        unsigned int n_counter_offsets_presp;
        struct cfg80211_beacon_data beacon_after;
        bool radar_required;
        bool block_tx;
        u8 count;
        u8 link_id;
};

/**
 * struct cfg80211_color_change_settings - color change settings
 *
 * Used for bss color change
 *
 * @beacon_color_change: beacon data while performing the color countdown
 * @counter_offset_beacon: offsets of the counters within the beacon (tail)
 * @counter_offset_presp: offsets of the counters within the probe response
 * @beacon_next: beacon data to be used after the color change
 * @count: number of beacons until the color change
 * @color: the color used after the change
 */
struct cfg80211_color_change_settings {
        struct cfg80211_beacon_data beacon_color_change;
        u16 counter_offset_beacon;
        u16 counter_offset_presp;
        struct cfg80211_beacon_data beacon_next;
        u8 count;
        u8 color;
};

/**
 * struct iface_combination_params - input parameters for interface combinations
 *
 * Used to pass interface combination parameters
 *
 * @num_different_channels: the number of different channels we want
 *        to use for verification
 * @radar_detect: a bitmap where each bit corresponds to a channel
 *        width where radar detection is needed, as in the definition of
 *        &struct ieee80211_iface_combination.@radar_detect_widths
 * @iftype_num: array with the number of interfaces of each interface
 *        type.  The index is the interface type as specified in &enum
 *        nl80211_iftype.
 * @new_beacon_int: set this to the beacon interval of a new interface
 *        that's not operating yet, if such is to be checked as part of
 *        the verification
 */
struct iface_combination_params {
        int num_different_channels;
        u8 radar_detect;
        int iftype_num[NUM_NL80211_IFTYPES];
        u32 new_beacon_int;
};

/**
 * enum station_parameters_apply_mask - station parameter values to apply
 * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
 * @STATION_PARAM_APPLY_CAPABILITY: apply new capability
 * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state
 *
 * Not all station parameters have in-band "no change" signalling,
 * for those that don't these flags will are used.
 */
enum station_parameters_apply_mask {
        STATION_PARAM_APPLY_UAPSD = BIT(0),
        STATION_PARAM_APPLY_CAPABILITY = BIT(1),
        STATION_PARAM_APPLY_PLINK_STATE = BIT(2),
};

/**
 * struct sta_txpwr - station txpower configuration
 *
 * Used to configure txpower for station.
 *
 * @power: tx power (in dBm) to be used for sending data traffic. If tx power
 *        is not provided, the default per-interface tx power setting will be
 *        overriding. Driver should be picking up the lowest tx power, either tx
 *        power per-interface or per-station.
 * @type: In particular if TPC %type is NL80211_TX_POWER_LIMITED then tx power
 *        will be less than or equal to specified from userspace, whereas if TPC
 *        %type is NL80211_TX_POWER_AUTOMATIC then it indicates default tx power.
 *        NL80211_TX_POWER_FIXED is not a valid configuration option for
 *        per peer TPC.
 */
struct sta_txpwr {
        s16 power;
        enum nl80211_tx_power_setting type;
};

/**
 * struct link_station_parameters - link station parameters
 *
 * Used to change and create a new link station.
 *
 * @mld_mac: MAC address of the station
 * @link_id: the link id (-1 for non-MLD station)
 * @link_mac: MAC address of the link
 * @supported_rates: supported rates in IEEE 802.11 format
 *        (or NULL for no change)
 * @supported_rates_len: number of supported rates
 * @ht_capa: HT capabilities of station
 * @vht_capa: VHT capabilities of station
 * @opmode_notif: operating mode field from Operating Mode Notification
 * @opmode_notif_used: information if operating mode field is used
 * @he_capa: HE capabilities of station
 * @he_capa_len: the length of the HE capabilities
 * @txpwr: transmit power for an associated station
 * @txpwr_set: txpwr field is set
 * @he_6ghz_capa: HE 6 GHz Band capabilities of station
 * @eht_capa: EHT capabilities of station
 * @eht_capa_len: the length of the EHT capabilities
 */
struct link_station_parameters {
        const u8 *mld_mac;
        int link_id;
        const u8 *link_mac;
        const u8 *supported_rates;
        u8 supported_rates_len;
        const struct ieee80211_ht_cap *ht_capa;
        const struct ieee80211_vht_cap *vht_capa;
        u8 opmode_notif;
        bool opmode_notif_used;
        const struct ieee80211_he_cap_elem *he_capa;
        u8 he_capa_len;
        struct sta_txpwr txpwr;
        bool txpwr_set;
        const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
        const struct ieee80211_eht_cap_elem *eht_capa;
        u8 eht_capa_len;
};

/**
 * struct link_station_del_parameters - link station deletion parameters
 *
 * Used to delete a link station entry (or all stations).
 *
 * @mld_mac: MAC address of the station
 * @link_id: the link id
 */
struct link_station_del_parameters {
        const u8 *mld_mac;
        u32 link_id;
};

/**
 * struct cfg80211_ttlm_params: TID to link mapping parameters
 *
 * Used for setting a TID to link mapping.
 *
 * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314
 *     (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
 * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314
 *     (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
 */
struct cfg80211_ttlm_params {
        u16 dlink[8];
        u16 ulink[8];
};

/**
 * struct station_parameters - station parameters
 *
 * Used to change and create a new station.
 *
 * @vlan: vlan interface station should belong to
 * @sta_flags_mask: station flags that changed
 *        (bitmask of BIT(%NL80211_STA_FLAG_...))
 * @sta_flags_set: station flags values
 *        (bitmask of BIT(%NL80211_STA_FLAG_...))
 * @listen_interval: listen interval or -1 for no change
 * @aid: AID or zero for no change
 * @vlan_id: VLAN ID for station (if nonzero)
 * @peer_aid: mesh peer AID or zero for no change
 * @plink_action: plink action to take
 * @plink_state: set the peer link state for a station
 * @uapsd_queues: bitmap of queues configured for uapsd. same format
 *        as the AC bitmap in the QoS info field
 * @max_sp: max Service Period. same format as the MAX_SP in the
 *        QoS info field (but already shifted down)
 * @sta_modify_mask: bitmap indicating which parameters changed
 *        (for those that don't have a natural "no change" value),
 *        see &enum station_parameters_apply_mask
 * @local_pm: local link-specific mesh power save mode (no change when set
 *        to unknown)
 * @capability: station capability
 * @ext_capab: extended capabilities of the station
 * @ext_capab_len: number of extended capabilities
 * @supported_channels: supported channels in IEEE 802.11 format
 * @supported_channels_len: number of supported channels
 * @supported_oper_classes: supported oper classes in IEEE 802.11 format
 * @supported_oper_classes_len: number of supported operating classes
 * @support_p2p_ps: information if station supports P2P PS mechanism
 * @airtime_weight: airtime scheduler weight for this station
 * @link_sta_params: link related params.
 */
struct station_parameters {
        struct net_device *vlan;
        u32 sta_flags_mask, sta_flags_set;
        u32 sta_modify_mask;
        int listen_interval;
        u16 aid;
        u16 vlan_id;
        u16 peer_aid;
        u8 plink_action;
        u8 plink_state;
        u8 uapsd_queues;
        u8 max_sp;
        enum nl80211_mesh_power_mode local_pm;
        u16 capability;
        const u8 *ext_capab;
        u8 ext_capab_len;
        const u8 *supported_channels;
        u8 supported_channels_len;
        const u8 *supported_oper_classes;
        u8 supported_oper_classes_len;
        int support_p2p_ps;
        u16 airtime_weight;
        struct link_station_parameters link_sta_params;
};

/**
 * struct station_del_parameters - station deletion parameters
 *
 * Used to delete a station entry (or all stations).
 *
 * @mac: MAC address of the station to remove or NULL to remove all stations
 * @subtype: Management frame subtype to use for indicating removal
 *        (10 = Disassociation, 12 = Deauthentication)
 * @reason_code: Reason code for the Disassociation/Deauthentication frame
 * @link_id: Link ID indicating a link that stations to be flushed must be
 *        using; valid only for MLO, but can also be -1 for MLO to really
 *        remove all stations.
 */
struct station_del_parameters {
        const u8 *mac;
        u8 subtype;
        u16 reason_code;
        int link_id;
};

/**
 * enum cfg80211_station_type - the type of station being modified
 * @CFG80211_STA_AP_CLIENT: client of an AP interface
 * @CFG80211_STA_AP_CLIENT_UNASSOC: client of an AP interface that is still
 *        unassociated (update properties for this type of client is permitted)
 * @CFG80211_STA_AP_MLME_CLIENT: client of an AP interface that has
 *        the AP MLME in the device
 * @CFG80211_STA_AP_STA: AP station on managed interface
 * @CFG80211_STA_IBSS: IBSS station
 * @CFG80211_STA_TDLS_PEER_SETUP: TDLS peer on managed interface (dummy entry
 *        while TDLS setup is in progress, it moves out of this state when
 *        being marked authorized; use this only if TDLS with external setup is
 *        supported/used)
 * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active
 *        entry that is operating, has been marked authorized by userspace)
 * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed)
 * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed)
 */
enum cfg80211_station_type {
        CFG80211_STA_AP_CLIENT,
        CFG80211_STA_AP_CLIENT_UNASSOC,
        CFG80211_STA_AP_MLME_CLIENT,
        CFG80211_STA_AP_STA,
        CFG80211_STA_IBSS,
        CFG80211_STA_TDLS_PEER_SETUP,
        CFG80211_STA_TDLS_PEER_ACTIVE,
        CFG80211_STA_MESH_PEER_KERNEL,
        CFG80211_STA_MESH_PEER_USER,
};

/**
 * cfg80211_check_station_change - validate parameter changes
 * @wiphy: the wiphy this operates on
 * @params: the new parameters for a station
 * @statype: the type of station being modified
 *
 * Utility function for the @change_station driver method. Call this function
 * with the appropriate station type looking up the station (and checking that
 * it exists). It will verify whether the station change is acceptable, and if
 * not will return an error code. Note that it may modify the parameters for
 * backward compatibility reasons, so don't use them before calling this.
 */
int cfg80211_check_station_change(struct wiphy *wiphy,
                                  struct station_parameters *params,
                                  enum cfg80211_station_type statype);

/**
 * enum rate_info_flags - bitrate info flags
 *
 * Used by the driver to indicate the specific rate transmission
 * type for 802.11n transmissions.
 *
 * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS
 * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
 * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
 * @RATE_INFO_FLAGS_DMG: 60GHz MCS
 * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
 * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode
 * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS
 * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information
 * @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS
 */
enum rate_info_flags {
        RATE_INFO_FLAGS_MCS                        = BIT(0),
        RATE_INFO_FLAGS_VHT_MCS                        = BIT(1),
        RATE_INFO_FLAGS_SHORT_GI                = BIT(2),
        RATE_INFO_FLAGS_DMG                        = BIT(3),
        RATE_INFO_FLAGS_HE_MCS                        = BIT(4),
        RATE_INFO_FLAGS_EDMG                        = BIT(5),
        RATE_INFO_FLAGS_EXTENDED_SC_DMG                = BIT(6),
        RATE_INFO_FLAGS_EHT_MCS                        = BIT(7),
        RATE_INFO_FLAGS_S1G_MCS                        = BIT(8),
};

/**
 * enum rate_info_bw - rate bandwidth information
 *
 * Used by the driver to indicate the rate bandwidth.
 *
 * @RATE_INFO_BW_5: 5 MHz bandwidth
 * @RATE_INFO_BW_10: 10 MHz bandwidth
 * @RATE_INFO_BW_20: 20 MHz bandwidth
 * @RATE_INFO_BW_40: 40 MHz bandwidth
 * @RATE_INFO_BW_80: 80 MHz bandwidth
 * @RATE_INFO_BW_160: 160 MHz bandwidth
 * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
 * @RATE_INFO_BW_320: 320 MHz bandwidth
 * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation
 * @RATE_INFO_BW_1: 1 MHz bandwidth
 * @RATE_INFO_BW_2: 2 MHz bandwidth
 * @RATE_INFO_BW_4: 4 MHz bandwidth
 * @RATE_INFO_BW_8: 8 MHz bandwidth
 * @RATE_INFO_BW_16: 16 MHz bandwidth
 */
enum rate_info_bw {
        RATE_INFO_BW_20 = 0,
        RATE_INFO_BW_5,
        RATE_INFO_BW_10,
        RATE_INFO_BW_40,
        RATE_INFO_BW_80,
        RATE_INFO_BW_160,
        RATE_INFO_BW_HE_RU,
        RATE_INFO_BW_320,
        RATE_INFO_BW_EHT_RU,
        RATE_INFO_BW_1,
        RATE_INFO_BW_2,
        RATE_INFO_BW_4,
        RATE_INFO_BW_8,
        RATE_INFO_BW_16,
};

/**
 * struct rate_info - bitrate information
 *
 * Information about a receiving or transmitting bitrate
 *
 * @flags: bitflag of flags from &enum rate_info_flags
 * @legacy: bitrate in 100kbit/s for 802.11abg
 * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate
 * @nss: number of streams (VHT & HE only)
 * @bw: bandwidth (from &enum rate_info_bw)
 * @he_gi: HE guard interval (from &enum nl80211_he_gi)
 * @he_dcm: HE DCM value
 * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
 *        only valid if bw is %RATE_INFO_BW_HE_RU)
 * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4)
 * @eht_gi: EHT guard interval (from &enum nl80211_eht_gi)
 * @eht_ru_alloc: EHT RU allocation (from &enum nl80211_eht_ru_alloc,
 *        only valid if bw is %RATE_INFO_BW_EHT_RU)
 */
struct rate_info {
        u16 flags;
        u16 legacy;
        u8 mcs;
        u8 nss;
        u8 bw;
        u8 he_gi;
        u8 he_dcm;
        u8 he_ru_alloc;
        u8 n_bonded_ch;
        u8 eht_gi;
        u8 eht_ru_alloc;
};

/**
 * enum bss_param_flags - bitrate info flags
 *
 * Used by the driver to indicate the specific rate transmission
 * type for 802.11n transmissions.
 *
 * @BSS_PARAM_FLAGS_CTS_PROT: whether CTS protection is enabled
 * @BSS_PARAM_FLAGS_SHORT_PREAMBLE: whether short preamble is enabled
 * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled
 */
enum bss_param_flags {
        BSS_PARAM_FLAGS_CTS_PROT        = 1<<0,
        BSS_PARAM_FLAGS_SHORT_PREAMBLE        = 1<<1,
        BSS_PARAM_FLAGS_SHORT_SLOT_TIME        = 1<<2,
};

/**
 * struct sta_bss_parameters - BSS parameters for the attached station
 *
 * Information about the currently associated BSS
 *
 * @flags: bitflag of flags from &enum bss_param_flags
 * @dtim_period: DTIM period for the BSS
 * @beacon_interval: beacon interval
 */
struct sta_bss_parameters {
        u8 flags;
        u8 dtim_period;
        u16 beacon_interval;
};

/**
 * struct cfg80211_txq_stats - TXQ statistics for this TID
 * @filled: bitmap of flags using the bits of &enum nl80211_txq_stats to
 *        indicate the relevant values in this struct are filled
 * @backlog_bytes: total number of bytes currently backlogged
 * @backlog_packets: total number of packets currently backlogged
 * @flows: number of new flows seen
 * @drops: total number of packets dropped
 * @ecn_marks: total number of packets marked with ECN CE
 * @overlimit: number of drops due to queue space overflow
 * @overmemory: number of drops due to memory limit overflow
 * @collisions: number of hash collisions
 * @tx_bytes: total number of bytes dequeued
 * @tx_packets: total number of packets dequeued
 * @max_flows: maximum number of flows supported
 */
struct cfg80211_txq_stats {
        u32 filled;
        u32 backlog_bytes;
        u32 backlog_packets;
        u32 flows;
        u32 drops;
        u32 ecn_marks;
        u32 overlimit;
        u32 overmemory;
        u32 collisions;
        u32 tx_bytes;
        u32 tx_packets;
        u32 max_flows;
};

/**
 * struct cfg80211_tid_stats - per-TID statistics
 * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to
 *        indicate the relevant values in this struct are filled
 * @rx_msdu: number of received MSDUs
 * @tx_msdu: number of (attempted) transmitted MSDUs
 * @tx_msdu_retries: number of retries (not counting the first) for
 *        transmitted MSDUs
 * @tx_msdu_failed: number of failed transmitted MSDUs
 * @txq_stats: TXQ statistics
 */
struct cfg80211_tid_stats {
        u32 filled;
        u64 rx_msdu;
        u64 tx_msdu;
        u64 tx_msdu_retries;
        u64 tx_msdu_failed;
        struct cfg80211_txq_stats txq_stats;
};

#define IEEE80211_MAX_CHAINS        4

/**
 * struct station_info - station information
 *
 * Station information filled by driver for get_station() and dump_station.
 *
 * @filled: bitflag of flags using the bits of &enum nl80211_sta_info to
 *        indicate the relevant values in this struct for them
 * @connected_time: time(in secs) since a station is last connected
 * @inactive_time: time since last station activity (tx/rx) in milliseconds
 * @assoc_at: bootime (ns) of the last association
 * @rx_bytes: bytes (size of MPDUs) received from this station
 * @tx_bytes: bytes (size of MPDUs) transmitted to this station
 * @llid: mesh local link id
 * @plid: mesh peer link id
 * @plink_state: mesh peer link state
 * @signal: The signal strength, type depends on the wiphy's signal_type.
 *        For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_.
 * @signal_avg: Average signal strength, type depends on the wiphy's signal_type.
 *        For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_.
 * @chains: bitmask for filled values in @chain_signal, @chain_signal_avg
 * @chain_signal: per-chain signal strength of last received packet in dBm
 * @chain_signal_avg: per-chain signal strength average in dBm
 * @txrate: current unicast bitrate from this station
 * @rxrate: current unicast bitrate to this station
 * @rx_packets: packets (MSDUs & MMPDUs) received from this station
 * @tx_packets: packets (MSDUs & MMPDUs) transmitted to this station
 * @tx_retries: cumulative retry counts (MPDUs)
 * @tx_failed: number of failed transmissions (MPDUs) (retries exceeded, no ACK)
 * @rx_dropped_misc:  Dropped for un-specified reason.
 * @bss_param: current BSS parameters
 * @generation: generation number for nl80211 dumps.
 *        This number should increase every time the list of stations
 *        changes, i.e. when a station is added or removed, so that
 *        userspace can tell whether it got a consistent snapshot.
 * @assoc_req_ies: IEs from (Re)Association Request.
 *        This is used only when in AP mode with drivers that do not use
 *        user space MLME/SME implementation. The information is provided for
 *        the cfg80211_new_sta() calls to notify user space of the IEs.
 * @assoc_req_ies_len: Length of assoc_req_ies buffer in octets.
 * @sta_flags: station flags mask & values
 * @beacon_loss_count: Number of times beacon loss event has triggered.
 * @t_offset: Time offset of the station relative to this host.
 * @local_pm: local mesh STA power save mode
 * @peer_pm: peer mesh STA power save mode
 * @nonpeer_pm: non-peer mesh STA power save mode
 * @expected_throughput: expected throughput in kbps (including 802.11 headers)
 *        towards this station.
 * @rx_beacon: number of beacons received from this peer
 * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
 *        from this peer
 * @connected_to_gate: true if mesh STA has a path to mesh gate
 * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
 * @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer
 * @airtime_weight: current airtime scheduling weight
 * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
 *        (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
 *        Note that this doesn't use the @filled bit, but is used if non-NULL.
 * @ack_signal: signal strength (in dBm) of the last ACK frame.
 * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has
 *        been sent.
 * @rx_mpdu_count: number of MPDUs received from this station
 * @fcs_err_count: number of packets (MPDUs) received from this station with
 *        an FCS error. This counter should be incremented only when TA of the
 *        received packet with an FCS error matches the peer MAC address.
 * @airtime_link_metric: mesh airtime link metric.
 * @connected_to_as: true if mesh STA has a path to authentication server
 * @mlo_params_valid: Indicates @assoc_link_id and @mld_addr fields are filled
 *        by driver. Drivers use this only in cfg80211_new_sta() calls when AP
 *        MLD's MLME/SME is offload to driver. Drivers won't fill this
 *        information in cfg80211_del_sta_sinfo(), get_station() and
 *        dump_station() callbacks.
 * @assoc_link_id: Indicates MLO link ID of the AP, with which the station
 *        completed (re)association. This information filled for both MLO
 *        and non-MLO STA connections when the AP affiliated with an MLD.
 * @mld_addr: For MLO STA connection, filled with MLD address of the station.
 *        For non-MLO STA connection, filled with all zeros.
 * @assoc_resp_ies: IEs from (Re)Association Response.
 *        This is used only when in AP mode with drivers that do not use user
 *        space MLME/SME implementation. The information is provided only for the
 *        cfg80211_new_sta() calls to notify user space of the IEs. Drivers won't
 *        fill this information in cfg80211_del_sta_sinfo(), get_station() and
 *        dump_station() callbacks. User space needs this information to determine
 *        the accepted and rejected affiliated links of the connected station.
 * @assoc_resp_ies_len: Length of @assoc_resp_ies buffer in octets.
 */
struct station_info {
        u64 filled;
        u32 connected_time;
        u32 inactive_time;
        u64 assoc_at;
        u64 rx_bytes;
        u64 tx_bytes;
        u16 llid;
        u16 plid;
        u8 plink_state;
        s8 signal;
        s8 signal_avg;

        u8 chains;
        s8 chain_signal[IEEE80211_MAX_CHAINS];
        s8 chain_signal_avg[IEEE80211_MAX_CHAINS];

        struct rate_info txrate;
        struct rate_info rxrate;
        u32 rx_packets;
        u32 tx_packets;
        u32 tx_retries;
        u32 tx_failed;
        u32 rx_dropped_misc;
        struct sta_bss_parameters bss_param;
        struct nl80211_sta_flag_update sta_flags;

        int generation;

        const u8 *assoc_req_ies;
        size_t assoc_req_ies_len;

        u32 beacon_loss_count;
        s64 t_offset;
        enum nl80211_mesh_power_mode local_pm;
        enum nl80211_mesh_power_mode peer_pm;
        enum nl80211_mesh_power_mode nonpeer_pm;

        u32 expected_throughput;

        u64 tx_duration;
        u64 rx_duration;
        u64 rx_beacon;
        u8 rx_beacon_signal_avg;
        u8 connected_to_gate;

        struct cfg80211_tid_stats *pertid;
        s8 ack_signal;
        s8 avg_ack_signal;

        u16 airtime_weight;

        u32 rx_mpdu_count;
        u32 fcs_err_count;

        u32 airtime_link_metric;

        u8 connected_to_as;

        bool mlo_params_valid;
        u8 assoc_link_id;
        u8 mld_addr[ETH_ALEN] __aligned(2);
        const u8 *assoc_resp_ies;
        size_t assoc_resp_ies_len;
};

/**
 * struct cfg80211_sar_sub_specs - sub specs limit
 * @power: power limitation in 0.25dbm
 * @freq_range_index: index the power limitation applies to
 */
struct cfg80211_sar_sub_specs {
        s32 power;
        u32 freq_range_index;
};

/**
 * struct cfg80211_sar_specs - sar limit specs
 * @type: it's set with power in 0.25dbm or other types
 * @num_sub_specs: number of sar sub specs
 * @sub_specs: memory to hold the sar sub specs
 */
struct cfg80211_sar_specs {
        enum nl80211_sar_type type;
        u32 num_sub_specs;
        struct cfg80211_sar_sub_specs sub_specs[];
};


/**
 * struct cfg80211_sar_freq_ranges - sar frequency ranges
 * @start_freq:  start range edge frequency
 * @end_freq:    end range edge frequency
 */
struct cfg80211_sar_freq_ranges {
        u32 start_freq;
        u32 end_freq;
};

/**
 * struct cfg80211_sar_capa - sar limit capability
 * @type: it's set via power in 0.25dbm or other types
 * @num_freq_ranges: number of frequency ranges
 * @freq_ranges: memory to hold the freq ranges.
 *
 * Note: WLAN driver may append new ranges or split an existing
 * range to small ones and then append them.
 */
struct cfg80211_sar_capa {
        enum nl80211_sar_type type;
        u32 num_freq_ranges;
        const struct cfg80211_sar_freq_ranges *freq_ranges;
};

#if IS_ENABLED(CONFIG_CFG80211)
/**
 * cfg80211_get_station - retrieve information about a given station
 * @dev: the device where the station is supposed to be connected to
 * @mac_addr: the mac address of the station of interest
 * @sinfo: pointer to the structure to fill with the information
 *
 * Returns 0 on success and sinfo is filled with the available information
 * otherwise returns a negative error code and the content of sinfo has to be
 * considered undefined.
 */
int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
                         struct station_info *sinfo);
#else
static inline int cfg80211_get_station(struct net_device *dev,
                                       const u8 *mac_addr,
                                       struct station_info *sinfo)
{
        return -ENOENT;
}
#endif

/**
 * enum monitor_flags - monitor flags
 *
 * Monitor interface configuration flags. Note that these must be the bits
 * according to the nl80211 flags.
 *
 * @MONITOR_FLAG_CHANGED: set if the flags were changed
 * @MONITOR_FLAG_FCSFAIL: pass frames with bad FCS
 * @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP
 * @MONITOR_FLAG_CONTROL: pass control frames
 * @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering
 * @MONITOR_FLAG_COOK_FRAMES: report frames after processing
 * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
 */
enum monitor_flags {
        MONITOR_FLAG_CHANGED                = 1<<__NL80211_MNTR_FLAG_INVALID,
        MONITOR_FLAG_FCSFAIL                = 1<<NL80211_MNTR_FLAG_FCSFAIL,
        MONITOR_FLAG_PLCPFAIL                = 1<<NL80211_MNTR_FLAG_PLCPFAIL,
        MONITOR_FLAG_CONTROL                = 1<<NL80211_MNTR_FLAG_CONTROL,
        MONITOR_FLAG_OTHER_BSS                = 1<<NL80211_MNTR_FLAG_OTHER_BSS,
        MONITOR_FLAG_COOK_FRAMES        = 1<<NL80211_MNTR_FLAG_COOK_FRAMES,
        MONITOR_FLAG_ACTIVE                = 1<<NL80211_MNTR_FLAG_ACTIVE,
};

/**
 * enum mpath_info_flags -  mesh path information flags
 *
 * Used by the driver to indicate which info in &struct mpath_info it has filled
 * in during get_station() or dump_station().
 *
 * @MPATH_INFO_FRAME_QLEN: @frame_qlen filled
 * @MPATH_INFO_SN: @sn filled
 * @MPATH_INFO_METRIC: @metric filled
 * @MPATH_INFO_EXPTIME: @exptime filled
 * @MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled
 * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled
 * @MPATH_INFO_FLAGS: @flags filled
 * @MPATH_INFO_HOP_COUNT: @hop_count filled
 * @MPATH_INFO_PATH_CHANGE: @path_change_count filled
 */
enum mpath_info_flags {
        MPATH_INFO_FRAME_QLEN                = BIT(0),
        MPATH_INFO_SN                        = BIT(1),
        MPATH_INFO_METRIC                = BIT(2),
        MPATH_INFO_EXPTIME                = BIT(3),
        MPATH_INFO_DISCOVERY_TIMEOUT        = BIT(4),
        MPATH_INFO_DISCOVERY_RETRIES        = BIT(5),
        MPATH_INFO_FLAGS                = BIT(6),
        MPATH_INFO_HOP_COUNT                = BIT(7),
        MPATH_INFO_PATH_CHANGE                = BIT(8),
};

/**
 * struct mpath_info - mesh path information
 *
 * Mesh path information filled by driver for get_mpath() and dump_mpath().
 *
 * @filled: bitfield of flags from &enum mpath_info_flags
 * @frame_qlen: number of queued frames for this destination
 * @sn: target sequence number
 * @metric: metric (cost) of this mesh path
 * @exptime: expiration time for the mesh path from now, in msecs
 * @flags: mesh path flags from &enum mesh_path_flags
 * @discovery_timeout: total mesh path discovery timeout, in msecs
 * @discovery_retries: mesh path discovery retries
 * @generation: generation number for nl80211 dumps.
 *        This number should increase every time the list of mesh paths
 *        changes, i.e. when a station is added or removed, so that
 *        userspace can tell whether it got a consistent snapshot.
 * @hop_count: hops to destination
 * @path_change_count: total number of path changes to destination
 */
struct mpath_info {
        u32 filled;
        u32 frame_qlen;
        u32 sn;
        u32 metric;
        u32 exptime;
        u32 discovery_timeout;
        u8 discovery_retries;
        u8 flags;
        u8 hop_count;
        u32 path_change_count;

        int generation;
};

/**
 * struct bss_parameters - BSS parameters
 *
 * Used to change BSS parameters (mainly for AP mode).
 *
 * @link_id: link_id or -1 for non-MLD
 * @use_cts_prot: Whether to use CTS protection
 *        (0 = no, 1 = yes, -1 = do not change)
 * @use_short_preamble: Whether the use of short preambles is allowed
 *        (0 = no, 1 = yes, -1 = do not change)
 * @use_short_slot_time: Whether the use of short slot time is allowed
 *        (0 = no, 1 = yes, -1 = do not change)
 * @basic_rates: basic rates in IEEE 802.11 format
 *        (or NULL for no change)
 * @basic_rates_len: number of basic rates
 * @ap_isolate: do not forward packets between connected stations
 *        (0 = no, 1 = yes, -1 = do not change)
 * @ht_opmode: HT Operation mode
 *        (u16 = opmode, -1 = do not change)
 * @p2p_ctwindow: P2P CT Window (-1 = no change)
 * @p2p_opp_ps: P2P opportunistic PS (-1 = no change)
 */
struct bss_parameters {
        int link_id;
        int use_cts_prot;
        int use_short_preamble;
        int use_short_slot_time;
        const u8 *basic_rates;
        u8 basic_rates_len;
        int ap_isolate;
        int ht_opmode;
        s8 p2p_ctwindow, p2p_opp_ps;
};

/**
 * struct mesh_config - 802.11s mesh configuration
 *
 * These parameters can be changed while the mesh is active.
 *
 * @dot11MeshRetryTimeout: the initial retry timeout in millisecond units used
 *        by the Mesh Peering Open message
 * @dot11MeshConfirmTimeout: the initial retry timeout in millisecond units
 *        used by the Mesh Peering Open message
 * @dot11MeshHoldingTimeout: the confirm timeout in millisecond units used by
 *        the mesh peering management to close a mesh peering
 * @dot11MeshMaxPeerLinks: the maximum number of peer links allowed on this
 *        mesh interface
 * @dot11MeshMaxRetries: the maximum number of peer link open retries that can
 *        be sent to establish a new peer link instance in a mesh
 * @dot11MeshTTL: the value of TTL field set at a source mesh STA
 * @element_ttl: the value of TTL field set at a mesh STA for path selection
 *        elements
 * @auto_open_plinks: whether we should automatically open peer links when we
 *        detect compatible mesh peers
 * @dot11MeshNbrOffsetMaxNeighbor: the maximum number of neighbors to
 *        synchronize to for 11s default synchronization method
 * @dot11MeshHWMPmaxPREQretries: the number of action frames containing a PREQ
 *        that an originator mesh STA can send to a particular path target
 * @path_refresh_time: how frequently to refresh mesh paths in milliseconds
 * @min_discovery_timeout: the minimum length of time to wait until giving up on
 *        a path discovery in milliseconds
 * @dot11MeshHWMPactivePathTimeout: the time (in TUs) for which mesh STAs
 *        receiving a PREQ shall consider the forwarding information from the
 *        root to be valid. (TU = time unit)
 * @dot11MeshHWMPpreqMinInterval: the minimum interval of time (in TUs) during
 *        which a mesh STA can send only one action frame containing a PREQ
 *        element
 * @dot11MeshHWMPperrMinInterval: the minimum interval of time (in TUs) during
 *        which a mesh STA can send only one Action frame containing a PERR
 *        element
 * @dot11MeshHWMPnetDiameterTraversalTime: the interval of time (in TUs) that
 *        it takes for an HWMP information element to propagate across the mesh
 * @dot11MeshHWMPRootMode: the configuration of a mesh STA as root mesh STA
 * @dot11MeshHWMPRannInterval: the interval of time (in TUs) between root
 *        announcements are transmitted
 * @dot11MeshGateAnnouncementProtocol: whether to advertise that this mesh
 *        station has access to a broader network beyond the MBSS. (This is
 *        missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol set to true
 *        only means that the station will announce others it's a mesh gate, but
 *        not necessarily using the gate announcement protocol. Still keeping the
 *        same nomenclature to be in sync with the spec)
 * @dot11MeshForwarding: whether the Mesh STA is forwarding or non-forwarding
 *        entity (default is TRUE - forwarding entity)
 * @rssi_threshold: the threshold for average signal strength of candidate
 *        station to establish a peer link
 * @ht_opmode: mesh HT protection mode
 *
 * @dot11MeshHWMPactivePathToRootTimeout: The time (in TUs) for which mesh STAs
 *        receiving a proactive PREQ shall consider the forwarding information to
 *        the root mesh STA to be valid.
 *
 * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive
 *        PREQs are transmitted.
 * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs)
 *        during which a mesh STA can send only one Action frame containing
 *        a PREQ element for root path confirmation.
 * @power_mode: The default mesh power save mode which will be the initial
 *        setting for new peer links.
 * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake
 *        after transmitting its beacon.
 * @plink_timeout: If no tx activity is seen from a STA we've established
 *        peering with for longer than this time (in seconds), then remove it
 *        from the STA's list of peers.  Default is 30 minutes.
 * @dot11MeshConnectedToAuthServer: if set to true then this mesh STA
 *        will advertise that it is connected to a authentication server
 *        in the mesh formation field.
 * @dot11MeshConnectedToMeshGate: if set to true, advertise that this STA is
 *      connected to a mesh gate in mesh formation info.  If false, the
 *      value in mesh formation is determined by the presence of root paths
 *      in the mesh path table
 * @dot11MeshNolearn: Try to avoid multi-hop path discovery (e.g. PREQ/PREP
 *      for HWMP) if the destination is a direct neighbor. Note that this might
 *      not be the optimal decision as a multi-hop route might be better. So
 *      if using this setting you will likely also want to disable
 *      dot11MeshForwarding and use another mesh routing protocol on top.
 */
struct mesh_config {
        u16 dot11MeshRetryTimeout;
        u16 dot11MeshConfirmTimeout;
        u16 dot11MeshHoldingTimeout;
        u16 dot11MeshMaxPeerLinks;
        u8 dot11MeshMaxRetries;
        u8 dot11MeshTTL;
        u8 element_ttl;
        bool auto_open_plinks;
        u32 dot11MeshNbrOffsetMaxNeighbor;
        u8 dot11MeshHWMPmaxPREQretries;
        u32 path_refresh_time;
        u16 min_discovery_timeout;
        u32 dot11MeshHWMPactivePathTimeout;
        u16 dot11MeshHWMPpreqMinInterval;
        u16 dot11MeshHWMPperrMinInterval;
        u16 dot11MeshHWMPnetDiameterTraversalTime;
        u8 dot11MeshHWMPRootMode;
        bool dot11MeshConnectedToMeshGate;
        bool dot11MeshConnectedToAuthServer;
        u16 dot11MeshHWMPRannInterval;
        bool dot11MeshGateAnnouncementProtocol;
        bool dot11MeshForwarding;
        s32 rssi_threshold;
        u16 ht_opmode;
        u32 dot11MeshHWMPactivePathToRootTimeout;
        u16 dot11MeshHWMProotInterval;
        u16 dot11MeshHWMPconfirmationInterval;
        enum nl80211_mesh_power_mode power_mode;
        u16 dot11MeshAwakeWindowDuration;
        u32 plink_timeout;
        bool dot11MeshNolearn;
};

/**
 * struct mesh_setup - 802.11s mesh setup configuration
 * @chandef: defines the channel to use
 * @mesh_id: the mesh ID
 * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
 * @sync_method: which synchronization method to use
 * @path_sel_proto: which path selection protocol to use
 * @path_metric: which metric to use
 * @auth_id: which authentication method this mesh is using
 * @ie: vendor information elements (optional)
 * @ie_len: length of vendor information elements
 * @is_authenticated: this mesh requires authentication
 * @is_secure: this mesh uses security
 * @user_mpm: userspace handles all MPM functions
 * @dtim_period: DTIM period to use
 * @beacon_interval: beacon interval to use
 * @mcast_rate: multicast rate for Mesh Node [6Mbps is the default for 802.11a]
 * @basic_rates: basic rates to use when creating the mesh
 * @beacon_rate: bitrate to be used for beacons
 * @userspace_handles_dfs: whether user space controls DFS operation, i.e.
 *        changes the channel when a radar is detected. This is required
 *        to operate on DFS channels.
 * @control_port_over_nl80211: TRUE if userspace expects to exchange control
 *        port frames over NL80211 instead of the network interface.
 *
 * These parameters are fixed when the mesh is created.
 */
struct mesh_setup {
        struct cfg80211_chan_def chandef;
        const u8 *mesh_id;
        u8 mesh_id_len;
        u8 sync_method;
        u8 path_sel_proto;
        u8 path_metric;
        u8 auth_id;
        const u8 *ie;
        u8 ie_len;
        bool is_authenticated;
        bool is_secure;
        bool user_mpm;
        u8 dtim_period;
        u16 beacon_interval;
        int mcast_rate[NUM_NL80211_BANDS];
        u32 basic_rates;
        struct cfg80211_bitrate_mask beacon_rate;
        bool userspace_handles_dfs;
        bool control_port_over_nl80211;
};

/**
 * struct ocb_setup - 802.11p OCB mode setup configuration
 * @chandef: defines the channel to use
 *
 * These parameters are fixed when connecting to the network
 */
struct ocb_setup {
        struct cfg80211_chan_def chandef;
};

/**
 * struct ieee80211_txq_params - TX queue parameters
 * @ac: AC identifier
 * @txop: Maximum burst time in units of 32 usecs, 0 meaning disabled
 * @cwmin: Minimum contention window [a value of the form 2^n-1 in the range
 *        1..32767]
 * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range
 *        1..32767]
 * @aifs: Arbitration interframe space [0..255]
 * @link_id: link_id or -1 for non-MLD
 */
struct ieee80211_txq_params {
        enum nl80211_ac ac;
        u16 txop;
        u16 cwmin;
        u16 cwmax;
        u8 aifs;
        int link_id;
};

/**
 * DOC: Scanning and BSS list handling
 *
 * The scanning process itself is fairly simple, but cfg80211 offers quite
 * a bit of helper functionality. To start a scan, the scan operation will
 * be invoked with a scan definition. This scan definition contains the
 * channels to scan, and the SSIDs to send probe requests for (including the
 * wildcard, if desired). A passive scan is indicated by having no SSIDs to
 * probe. Additionally, a scan request may contain extra information elements
 * that should be added to the probe request. The IEs are guaranteed to be
 * well-formed, and will not exceed the maximum length the driver advertised
 * in the wiphy structure.
 *
 * When scanning finds a BSS, cfg80211 needs to be notified of that, because
 * it is responsible for maintaining the BSS list; the driver should not
 * maintain a list itself. For this notification, various functions exist.
 *
 * Since drivers do not maintain a BSS list, there are also a number of
 * functions to search for a BSS and obtain information about it from the
 * BSS structure cfg80211 maintains. The BSS list is also made available
 * to userspace.
 */

/**
 * struct cfg80211_ssid - SSID description
 * @ssid: the SSID
 * @ssid_len: length of the ssid
 */
struct cfg80211_ssid {
        u8 ssid[IEEE80211_MAX_SSID_LEN];
        u8 ssid_len;
};

/**
 * struct cfg80211_scan_info - information about completed scan
 * @scan_start_tsf: scan start time in terms of the TSF of the BSS that the
 *        wireless device that requested the scan is connected to. If this
 *        information is not available, this field is left zero.
 * @tsf_bssid: the BSSID according to which %scan_start_tsf is set.
 * @aborted: set to true if the scan was aborted for any reason,
 *        userspace will be notified of that
 */
struct cfg80211_scan_info {
        u64 scan_start_tsf;
        u8 tsf_bssid[ETH_ALEN] __aligned(2);
        bool aborted;
};

/**
 * struct cfg80211_scan_6ghz_params - relevant for 6 GHz only
 *
 * @short_ssid: short ssid to scan for
 * @bssid: bssid to scan for
 * @channel_idx: idx of the channel in the channel array in the scan request
 *         which the above info is relevant to
 * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
 * @short_ssid_valid: @short_ssid is valid and can be used
 * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
 *       20 TUs before starting to send probe requests.
 * @psd_20: The AP's 20 MHz PSD value.
 */
struct cfg80211_scan_6ghz_params {
        u32 short_ssid;
        u32 channel_idx;
        u8 bssid[ETH_ALEN];
        bool unsolicited_probe;
        bool short_ssid_valid;
        bool psc_no_listen;
        s8 psd_20;
};

/**
 * struct cfg80211_scan_request - scan request description
 *
 * @ssids: SSIDs to scan for (active scan only)
 * @n_ssids: number of SSIDs
 * @channels: channels to scan on.
 * @n_channels: total number of channels to scan
 * @ie: optional information element(s) to add into Probe Request or %NULL
 * @ie_len: length of ie in octets
 * @duration: how long to listen on each channel, in TUs. If
 *        %duration_mandatory is not set, this is the maximum dwell time and
 *        the actual dwell time may be shorter.
 * @duration_mandatory: if set, the scan duration must be as specified by the
 *        %duration field.
 * @flags: control flags from &enum nl80211_scan_flags
 * @rates: bitmap of rates to advertise for each band
 * @wiphy: the wiphy this was for
 * @scan_start: time (in jiffies) when the scan started
 * @wdev: the wireless device to scan for
 * @info: (internal) information about completed scan
 * @notified: (internal) scan request was notified as done or aborted
 * @no_cck: used to send probe requests at non CCK rate in 2GHz band
 * @mac_addr: MAC address used with randomisation
 * @mac_addr_mask: MAC address mask used with randomisation, bits that
 *        are 0 in the mask should be randomised, bits that are 1 should
 *        be taken from the @mac_addr
 * @scan_6ghz: relevant for split scan request only,
 *        true if this is the second scan request
 * @n_6ghz_params: number of 6 GHz params
 * @scan_6ghz_params: 6 GHz params
 * @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
 * @tsf_report_link_id: for MLO, indicates the link ID of the BSS that should be
 *      used for TSF reporting. Can be set to -1 to indicate no preference.
 */
struct cfg80211_scan_request {
        struct cfg80211_ssid *ssids;
        int n_ssids;
        u32 n_channels;
        const u8 *ie;
        size_t ie_len;
        u16 duration;
        bool duration_mandatory;
        u32 flags;

        u32 rates[NUM_NL80211_BANDS];

        struct wireless_dev *wdev;

        u8 mac_addr[ETH_ALEN] __aligned(2);
        u8 mac_addr_mask[ETH_ALEN] __aligned(2);
        u8 bssid[ETH_ALEN] __aligned(2);

        /* internal */
        struct wiphy *wiphy;
        unsigned long scan_start;
        struct cfg80211_scan_info info;
        bool notified;
        bool no_cck;
        bool scan_6ghz;
        u32 n_6ghz_params;
        struct cfg80211_scan_6ghz_params *scan_6ghz_params;
        s8 tsf_report_link_id;

        /* keep last */
        struct ieee80211_channel *channels[] __counted_by(n_channels);
};

static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
{
        int i;

        get_random_bytes(buf, ETH_ALEN);
        for (i = 0; i < ETH_ALEN; i++) {
                buf[i] &= ~mask[i];
                buf[i] |= addr[i] & mask[i];
        }
}

/**
 * struct cfg80211_match_set - sets of attributes to match
 *
 * @ssid: SSID to be matched; may be zero-length in case of BSSID match
 *        or no match (RSSI only)
 * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match
 *        or no match (RSSI only)
 * @rssi_thold: don't report scan results below this threshold (in s32 dBm)
 */
struct cfg80211_match_set {
        struct cfg80211_ssid ssid;
        u8 bssid[ETH_ALEN];
        s32 rssi_thold;
};

/**
 * struct cfg80211_sched_scan_plan - scan plan for scheduled scan
 *
 * @interval: interval between scheduled scan iterations. In seconds.
 * @iterations: number of scan iterations in this scan plan. Zero means
 *        infinite loop.
 *        The last scan plan will always have this parameter set to zero,
 *        all other scan plans will have a finite number of iterations.
 */
struct cfg80211_sched_scan_plan {
        u32 interval;
        u32 iterations;
};

/**
 * struct cfg80211_bss_select_adjust - BSS selection with RSSI adjustment.
 *
 * @band: band of BSS which should match for RSSI level adjustment.
 * @delta: value of RSSI level adjustment.
 */
struct cfg80211_bss_select_adjust {
        enum nl80211_band band;
        s8 delta;
};

/**
 * struct cfg80211_sched_scan_request - scheduled scan request description
 *
 * @reqid: identifies this request.
 * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
 * @n_ssids: number of SSIDs
 * @n_channels: total number of channels to scan
 * @ie: optional information element(s) to add into Probe Request or %NULL
 * @ie_len: length of ie in octets
 * @flags: control flags from &enum nl80211_scan_flags
 * @match_sets: sets of parameters to be matched for a scan result
 *        entry to be considered valid and to be passed to the host
 *        (others are filtered out).
 *        If omitted, all results are passed.
 * @n_match_sets: number of match sets
 * @report_results: indicates that results were reported for this request
 * @wiphy: the wiphy this was for
 * @dev: the interface
 * @scan_start: start time of the scheduled scan
 * @channels: channels to scan
 * @min_rssi_thold: for drivers only supporting a single threshold, this
 *        contains the minimum over all matchsets
 * @mac_addr: MAC address used with randomisation
 * @mac_addr_mask: MAC address mask used with randomisation, bits that
 *        are 0 in the mask should be randomised, bits that are 1 should
 *        be taken from the @mac_addr
 * @scan_plans: scan plans to be executed in this scheduled scan. Lowest
 *        index must be executed first.
 * @n_scan_plans: number of scan plans, at least 1.
 * @rcu_head: RCU callback used to free the struct
 * @owner_nlportid: netlink portid of owner (if this should is a request
 *        owned by a particular socket)
 * @nl_owner_dead: netlink owner socket was closed - this request be freed
 * @list: for keeping list of requests.
 * @delay: delay in seconds to use before starting the first scan
 *        cycle.  The driver may ignore this parameter and start
 *        immediately (or at any other time), if this feature is not
 *        supported.
 * @relative_rssi_set: Indicates whether @relative_rssi is set or not.
 * @relative_rssi: Relative RSSI threshold in dB to restrict scan result
 *        reporting in connected state to cases where a matching BSS is determined
 *        to have better or slightly worse RSSI than the current connected BSS.
 *        The relative RSSI threshold values are ignored in disconnected state.
 * @rssi_adjust: delta dB of RSSI preference to be given to the BSSs that belong
 *        to the specified band while deciding whether a better BSS is reported
 *        using @relative_rssi. If delta is a negative number, the BSSs that
 *        belong to the specified band will be penalized by delta dB in relative
 *        comparisons.
 */
struct cfg80211_sched_scan_request {
        u64 reqid;
        struct cfg80211_ssid *ssids;
        int n_ssids;
        u32 n_channels;
        const u8 *ie;
        size_t ie_len;
        u32 flags;
        struct cfg80211_match_set *match_sets;
        int n_match_sets;
        s32 min_rssi_thold;
        u32 delay;
        struct cfg80211_sched_scan_plan *scan_plans;
        int n_scan_plans;

        u8 mac_addr[ETH_ALEN] __aligned(2);
        u8 mac_addr_mask[ETH_ALEN] __aligned(2);

        bool relative_rssi_set;
        s8 relative_rssi;
        struct cfg80211_bss_select_adjust rssi_adjust;

        /* internal */
        struct wiphy *wiphy;
        struct net_device *dev;
        unsigned long scan_start;
        bool report_results;
        struct rcu_head rcu_head;
        u32 owner_nlportid;
        bool nl_owner_dead;
        struct list_head list;

        /* keep last */
        struct ieee80211_channel *channels[];
};

/**
 * enum cfg80211_signal_type - signal type
 *
 * @CFG80211_SIGNAL_TYPE_NONE: no signal strength information available
 * @CFG80211_SIGNAL_TYPE_MBM: signal strength in mBm (100*dBm)
 * @CFG80211_SIGNAL_TYPE_UNSPEC: signal strength, increasing from 0 through 100
 */
enum cfg80211_signal_type {
        CFG80211_SIGNAL_TYPE_NONE,
        CFG80211_SIGNAL_TYPE_MBM,
        CFG80211_SIGNAL_TYPE_UNSPEC,
};

/**
 * struct cfg80211_inform_bss - BSS inform data
 * @chan: channel the frame was received on
 * @signal: signal strength value, according to the wiphy's
 *        signal type
 * @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was
 *        received; should match the time when the frame was actually
 *        received by the device (not just by the host, in case it was
 *        buffered on the device) and be accurate to about 10ms.
 *        If the frame isn't buffered, just passing the return value of
 *        ktime_get_boottime_ns() is likely appropriate.
 * @parent_tsf: the time at the start of reception of the first octet of the
 *        timestamp field of the frame. The time is the TSF of the BSS specified
 *        by %parent_bssid.
 * @parent_bssid: the BSS according to which %parent_tsf is set. This is set to
 *        the BSS that requested the scan in which the beacon/probe was received.
 * @chains: bitmask for filled values in @chain_signal.
 * @chain_signal: per-chain signal strength of last received BSS in dBm.
 * @restrict_use: restrict usage, if not set, assume @use_for is
 *        %NL80211_BSS_USE_FOR_NORMAL.
 * @use_for: bitmap of possible usage for this BSS, see
 *        &enum nl80211_bss_use_for
 * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
 *        if @restrict_use is set and @use_for is zero (empty); may be 0 for
 *        unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
 * @drv_data: Data to be passed through to @inform_bss
 */
struct cfg80211_inform_bss {
        struct ieee80211_channel *chan;
        s32 signal;
        u64 boottime_ns;
        u64 parent_tsf;
        u8 parent_bssid[ETH_ALEN] __aligned(2);
        u8 chains;
        s8 chain_signal[IEEE80211_MAX_CHAINS];

        u8 restrict_use:1, use_for:7;
        u8 cannot_use_reasons;

        void *drv_data;
};

/**
 * struct cfg80211_bss_ies - BSS entry IE data
 * @tsf: TSF contained in the frame that carried these IEs
 * @rcu_head: internal use, for freeing
 * @len: length of the IEs
 * @from_beacon: these IEs are known to come from a beacon
 * @data: IE data
 */
struct cfg80211_bss_ies {
        u64 tsf;
        struct rcu_head rcu_head;
        int len;
        bool from_beacon;
        u8 data[];
};

/**
 * struct cfg80211_bss - BSS description
 *
 * This structure describes a BSS (which may also be a mesh network)
 * for use in scan results and similar.
 *
 * @channel: channel this BSS is on
 * @bssid: BSSID of the BSS
 * @beacon_interval: the beacon interval as from the frame
 * @capability: the capability field in host byte order
 * @ies: the information elements (Note that there is no guarantee that these
 *        are well-formed!); this is a pointer to either the beacon_ies or
 *        proberesp_ies depending on whether Probe Response frame has been
 *        received. It is always non-%NULL.
 * @beacon_ies: the information elements from the last Beacon frame
 *        (implementation note: if @hidden_beacon_bss is set this struct doesn't
 *        own the beacon_ies, but they're just pointers to the ones from the
 *        @hidden_beacon_bss struct)
 * @proberesp_ies: the information elements from the last Probe Response frame
 * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
 *        cannot rely on it having valid data
 * @hidden_beacon_bss: in case this BSS struct represents a probe response from
 *        a BSS that hides the SSID in its beacon, this points to the BSS struct
 *        that holds the beacon data. @beacon_ies is still valid, of course, and
 *        points to the same data as hidden_beacon_bss->beacon_ies in that case.
 * @transmitted_bss: pointer to the transmitted BSS, if this is a
 *        non-transmitted one (multi-BSSID support)
 * @nontrans_list: list of non-transmitted BSS, if this is a transmitted one
 *        (multi-BSSID support)
 * @signal: signal strength value (type depends on the wiphy's signal_type)
 * @chains: bitmask for filled values in @chain_signal.
 * @chain_signal: per-chain signal strength of last received BSS in dBm.
 * @bssid_index: index in the multiple BSS set
 * @max_bssid_indicator: max number of members in the BSS set
 * @use_for: bitmap of possible usage for this BSS, see
 *        &enum nl80211_bss_use_for
 * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
 *        if @restrict_use is set and @use_for is zero (empty); may be 0 for
 *        unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
 * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
 */
struct cfg80211_bss {
        struct ieee80211_channel *channel;

        const struct cfg80211_bss_ies __rcu *ies;
        const struct cfg80211_bss_ies __rcu *beacon_ies;
        const struct cfg80211_bss_ies __rcu *proberesp_ies;

        struct cfg80211_bss *hidden_beacon_bss;
        struct cfg80211_bss *transmitted_bss;
        struct list_head nontrans_list;

        s32 signal;

        u16 beacon_interval;
        u16 capability;

        u8 bssid[ETH_ALEN];
        u8 chains;
        s8 chain_signal[IEEE80211_MAX_CHAINS];

        u8 proberesp_ecsa_stuck:1;

        u8 bssid_index;
        u8 max_bssid_indicator;

        u8 use_for;
        u8 cannot_use_reasons;

        u8 priv[] __aligned(sizeof(void *));
};

/**
 * ieee80211_bss_get_elem - find element with given ID
 * @bss: the bss to search
 * @id: the element ID
 *
 * Note that the return value is an RCU-protected pointer, so
 * rcu_read_lock() must be held when calling this function.
 * Return: %NULL if not found.
 */
const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id);

/**
 * ieee80211_bss_get_ie - find IE with given ID
 * @bss: the bss to search
 * @id: the element ID
 *
 * Note that the return value is an RCU-protected pointer, so
 * rcu_read_lock() must be held when calling this function.
 * Return: %NULL if not found.
 */
static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
{
        return (const void *)ieee80211_bss_get_elem(bss, id);
}


/**
 * struct cfg80211_auth_request - Authentication request data
 *
 * This structure provides information needed to complete IEEE 802.11
 * authentication.
 *
 * @bss: The BSS to authenticate with, the callee must obtain a reference
 *        to it if it needs to keep it.
 * @auth_type: Authentication type (algorithm)
 * @ie: Extra IEs to add to Authentication frame or %NULL
 * @ie_len: Length of ie buffer in octets
 * @key_len: length of WEP key for shared key authentication
 * @key_idx: index of WEP key for shared key authentication
 * @key: WEP key for shared key authentication
 * @auth_data: Fields and elements in Authentication frames. This contains
 *        the authentication frame body (non-IE and IE data), excluding the
 *        Authentication algorithm number, i.e., starting at the Authentication
 *        transaction sequence number field.
 * @auth_data_len: Length of auth_data buffer in octets
 * @link_id: if >= 0, indicates authentication should be done as an MLD,
 *        the interface address is included as the MLD address and the
 *        necessary link (with the given link_id) will be created (and
 *        given an MLD address) by the driver
 * @ap_mld_addr: AP MLD address in case of authentication request with
 *        an AP MLD, valid iff @link_id >= 0
 */
struct cfg80211_auth_request {
        struct cfg80211_bss *bss;
        const u8 *ie;
        size_t ie_len;
        enum nl80211_auth_type auth_type;
        const u8 *key;
        u8 key_len;
        s8 key_idx;
        const u8 *auth_data;
        size_t auth_data_len;
        s8 link_id;
        const u8 *ap_mld_addr;
};

/**
 * struct cfg80211_assoc_link - per-link information for MLO association
 * @bss: the BSS pointer, see also &struct cfg80211_assoc_request::bss;
 *        if this is %NULL for a link, that link is not requested
 * @elems: extra elements for the per-STA profile for this link
 * @elems_len: length of the elements
 * @disabled: If set this link should be included during association etc. but it
 *        should not be used until enabled by the AP MLD.
 * @error: per-link error code, must be <= 0. If there is an error, then the
 *        operation as a whole must fail.
 */
struct cfg80211_assoc_link {
        struct cfg80211_bss *bss;
        const u8 *elems;
        size_t elems_len;
        bool disabled;
        int error;
};

/**
 * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
 *
 * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
 * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
 * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
 * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
 *        authentication capability. Drivers can offload authentication to
 *        userspace if this flag is set. Only applicable for cfg80211_connect()
 *        request (connect callback).
 * @ASSOC_REQ_DISABLE_HE:  Disable HE
 * @ASSOC_REQ_DISABLE_EHT:  Disable EHT
 * @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links.
 *        Drivers shall disable MLO features for the current association if this
 *        flag is not set.
 * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any)
 */
enum cfg80211_assoc_req_flags {
        ASSOC_REQ_DISABLE_HT                        = BIT(0),
        ASSOC_REQ_DISABLE_VHT                        = BIT(1),
        ASSOC_REQ_USE_RRM                        = BIT(2),
        CONNECT_REQ_EXTERNAL_AUTH_SUPPORT        = BIT(3),
        ASSOC_REQ_DISABLE_HE                        = BIT(4),
        ASSOC_REQ_DISABLE_EHT                        = BIT(5),
        CONNECT_REQ_MLO_SUPPORT                        = BIT(6),
        ASSOC_REQ_SPP_AMSDU                        = BIT(7),
};

/**
 * struct cfg80211_assoc_request - (Re)Association request data
 *
 * This structure provides information needed to complete IEEE 802.11
 * (re)association.
 * @bss: The BSS to associate with. If the call is successful the driver is
 *        given a reference that it must give back to cfg80211_send_rx_assoc()
 *        or to cfg80211_assoc_timeout(). To ensure proper refcounting, new
 *        association requests while already associating must be rejected.
 *        This also applies to the @links.bss parameter, which is used instead
 *        of this one (it is %NULL) for MLO associations.
 * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
 * @ie_len: Length of ie buffer in octets
 * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
 * @crypto: crypto settings
 * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used
 *        to indicate a request to reassociate within the ESS instead of a request
 *        do the initial association with the ESS. When included, this is set to
 *        the BSSID of the current association, i.e., to the value that is
 *        included in the Current AP address field of the Reassociation Request
 *        frame.
 * @flags:  See &enum cfg80211_assoc_req_flags
 * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
 *        will be used in ht_capa.  Un-supported values will be ignored.
 * @ht_capa_mask:  The bits of ht_capa which are to be used.
 * @vht_capa: VHT capability override
 * @vht_capa_mask: VHT capability mask indicating which fields to use
 * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or
 *        %NULL if FILS is not used.
 * @fils_kek_len: Length of fils_kek in octets
 * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
 *        Request/Response frame or %NULL if FILS is not used. This field starts
 *        with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
 * @s1g_capa: S1G capability override
 * @s1g_capa_mask: S1G capability override mask
 * @links: per-link information for MLO connections
 * @link_id: >= 0 for MLO connections, where links are given, and indicates
 *        the link on which the association request should be sent
 * @ap_mld_addr: AP MLD address in case of MLO association request,
 *        valid iff @link_id >= 0
 */
struct cfg80211_assoc_request {
        struct cfg80211_bss *bss;
        const u8 *ie, *prev_bssid;
        size_t ie_len;
        struct cfg80211_crypto_settings crypto;
        bool use_mfp;
        u32 flags;
        struct ieee80211_ht_cap ht_capa;
        struct ieee80211_ht_cap ht_capa_mask;
        struct ieee80211_vht_cap vht_capa, vht_capa_mask;
        const u8 *fils_kek;
        size_t fils_kek_len;
        const u8 *fils_nonces;
        struct ieee80211_s1g_cap s1g_capa, s1g_capa_mask;
        struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS];
        const u8 *ap_mld_addr;
        s8 link_id;
};

/**
 * struct cfg80211_deauth_request - Deauthentication request data
 *
 * This structure provides information needed to complete IEEE 802.11
 * deauthentication.
 *
 * @bssid: the BSSID or AP MLD address to deauthenticate from
 * @ie: Extra IEs to add to Deauthentication frame or %NULL
 * @ie_len: Length of ie buffer in octets
 * @reason_code: The reason code for the deauthentication
 * @local_state_change: if set, change local state only and
 *        do not set a deauth frame
 */
struct cfg80211_deauth_request {
        const u8 *bssid;
        const u8 *ie;
        size_t ie_len;
        u16 reason_code;
        bool local_state_change;
};

/**
 * struct cfg80211_disassoc_request - Disassociation request data
 *
 * This structure provides information needed to complete IEEE 802.11
 * disassociation.
 *
 * @ap_addr: the BSSID or AP MLD address to disassociate from
 * @ie: Extra IEs to add to Disassociation frame or %NULL
 * @ie_len: Length of ie buffer in octets
 * @reason_code: The reason code for the disassociation
 * @local_state_change: This is a request for a local state only, i.e., no
 *        Disassociation frame is to be transmitted.
 */
struct cfg80211_disassoc_request {
        const u8 *ap_addr;
        const u8 *ie;
        size_t ie_len;
        u16 reason_code;
        bool local_state_change;
};

/**
 * struct cfg80211_ibss_params - IBSS parameters
 *
 * This structure defines the IBSS parameters for the join_ibss()
 * method.
 *
 * @ssid: The SSID, will always be non-null.
 * @ssid_len: The length of the SSID, will always be non-zero.
 * @bssid: Fixed BSSID requested, maybe be %NULL, if set do not
 *        search for IBSSs with a different BSSID.
 * @chandef: defines the channel to use if no other IBSS to join can be found
 * @channel_fixed: The channel should be fixed -- do not search for
 *        IBSSs to join on other channels.
 * @ie: information element(s) to include in the beacon
 * @ie_len: length of that
 * @beacon_interval: beacon interval to use
 * @privacy: this is a protected network, keys will be configured
 *        after joining
 * @control_port: whether user space controls IEEE 802.1X port, i.e.,
 *        sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
 *        required to assume that the port is unauthorized until authorized by
 *        user space. Otherwise, port is marked authorized by default.
 * @control_port_over_nl80211: TRUE if userspace expects to exchange control
 *        port frames over NL80211 instead of the network interface.
 * @userspace_handles_dfs: whether user space controls DFS operation, i.e.
 *        changes the channel when a radar is detected. This is required
 *        to operate on DFS channels.
 * @basic_rates: bitmap of basic rates to use when creating the IBSS
 * @mcast_rate: per-band multicast rate index + 1 (0: disabled)
 * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
 *        will be used in ht_capa.  Un-supported values will be ignored.
 * @ht_capa_mask:  The bits of ht_capa which are to be used.
 * @wep_keys: static WEP keys, if not NULL points to an array of
 *        CFG80211_MAX_WEP_KEYS WEP keys
 * @wep_tx_key: key index (0..3) of the default TX static WEP key
 */
struct cfg80211_ibss_params {
        const u8 *ssid;
        const u8 *bssid;
        struct cfg80211_chan_def chandef;
        const u8 *ie;
        u8 ssid_len, ie_len;
        u16 beacon_interval;
        u32 basic_rates;
        bool channel_fixed;
        bool privacy;
        bool control_port;
        bool control_port_over_nl80211;
        bool userspace_handles_dfs;
        int mcast_rate[NUM_NL80211_BANDS];
        struct ieee80211_ht_cap ht_capa;
        struct ieee80211_ht_cap ht_capa_mask;
        struct key_params *wep_keys;
        int wep_tx_key;
};

/**
 * struct cfg80211_bss_selection - connection parameters for BSS selection.
 *
 * @behaviour: requested BSS selection behaviour.
 * @param: parameters for requestion behaviour.
 * @param.band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF.
 * @param.adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST.
 */
struct cfg80211_bss_selection {
        enum nl80211_bss_select_attr behaviour;
        union {
                enum nl80211_band band_pref;
                struct cfg80211_bss_select_adjust adjust;
        } param;
};

/**
 * struct cfg80211_connect_params - Connection parameters
 *
 * This structure provides information needed to complete IEEE 802.11
 * authentication and association.
 *
 * @channel: The channel to use or %NULL if not specified (auto-select based
 *        on scan results)
 * @channel_hint: The channel of the recommended BSS for initial connection or
 *        %NULL if not specified
 * @bssid: The AP BSSID or %NULL if not specified (auto-select based on scan
 *        results)
 * @bssid_hint: The recommended AP BSSID for initial connection to the BSS or
 *        %NULL if not specified. Unlike the @bssid parameter, the driver is
 *        allowed to ignore this @bssid_hint if it has knowledge of a better BSS
 *        to use.
 * @ssid: SSID
 * @ssid_len: Length of ssid in octets
 * @auth_type: Authentication type (algorithm)
 * @ie: IEs for association request
 * @ie_len: Length of assoc_ie in octets
 * @privacy: indicates whether privacy-enabled APs should be used
 * @mfp: indicate whether management frame protection is used
 * @crypto: crypto settings
 * @key_len: length of WEP key for shared key authentication
 * @key_idx: index of WEP key for shared key authentication
 * @key: WEP key for shared key authentication
 * @flags:  See &enum cfg80211_assoc_req_flags
 * @bg_scan_period:  Background scan period in seconds
 *        or -1 to indicate that default value is to be used.
 * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
 *        will be used in ht_capa.  Un-supported values will be ignored.
 * @ht_capa_mask:  The bits of ht_capa which are to be used.
 * @vht_capa:  VHT Capability overrides
 * @vht_capa_mask: The bits of vht_capa which are to be used.
 * @pbss: if set, connect to a PCP instead of AP. Valid for DMG
 *        networks.
 * @bss_select: criteria to be used for BSS selection.
 * @prev_bssid: previous BSSID, if not %NULL use reassociate frame. This is used
 *        to indicate a request to reassociate within the ESS instead of a request
 *        do the initial association with the ESS. When included, this is set to
 *        the BSSID of the current association, i.e., to the value that is
 *        included in the Current AP address field of the Reassociation Request
 *        frame.
 * @fils_erp_username: EAP re-authentication protocol (ERP) username part of the
 *        NAI or %NULL if not specified. This is used to construct FILS wrapped
 *        data IE.
 * @fils_erp_username_len: Length of @fils_erp_username in octets.
 * @fils_erp_realm: EAP re-authentication protocol (ERP) realm part of NAI or
 *        %NULL if not specified. This specifies the domain name of ER server and
 *        is used to construct FILS wrapped data IE.
 * @fils_erp_realm_len: Length of @fils_erp_realm in octets.
 * @fils_erp_next_seq_num: The next sequence number to use in the FILS ERP
 *        messages. This is also used to construct FILS wrapped data IE.
 * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional
 *        keys in FILS or %NULL if not specified.
 * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets.
 * @want_1x: indicates user-space supports and wants to use 802.1X driver
 *        offload of 4-way handshake.
 * @edmg: define the EDMG channels.
 *        This may specify multiple channels and bonding options for the driver
 *        to choose from, based on BSS configuration.
 */
struct cfg80211_connect_params {
        struct ieee80211_channel *channel;
        struct ieee80211_channel *channel_hint;
        const u8 *bssid;
        const u8 *bssid_hint;
        const u8 *ssid;
        size_t ssid_len;
        enum nl80211_auth_type auth_type;
        const u8 *ie;
        size_t ie_len;
        bool privacy;
        enum nl80211_mfp mfp;
        struct cfg80211_crypto_settings crypto;
        const u8 *key;
        u8 key_len, key_idx;
        u32 flags;
        int bg_scan_period;
        struct ieee80211_ht_cap ht_capa;
        struct ieee80211_ht_cap ht_capa_mask;
        struct ieee80211_vht_cap vht_capa;
        struct ieee80211_vht_cap vht_capa_mask;
        bool pbss;
        struct cfg80211_bss_selection bss_select;
        const u8 *prev_bssid;
        const u8 *fils_erp_username;
        size_t fils_erp_username_len;
        const u8 *fils_erp_realm;
        size_t fils_erp_realm_len;
        u16 fils_erp_next_seq_num;
        const u8 *fils_erp_rrk;
        size_t fils_erp_rrk_len;
        bool want_1x;
        struct ieee80211_edmg edmg;
};

/**
 * enum cfg80211_connect_params_changed - Connection parameters being updated
 *
 * This enum provides information of all connect parameters that
 * have to be updated as part of update_connect_params() call.
 *
 * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated
 * @UPDATE_FILS_ERP_INFO: Indicates that FILS connection parameters (realm,
 *        username, erp sequence number and rrk) are updated
 * @UPDATE_AUTH_TYPE: Indicates that authentication type is updated
 */
enum cfg80211_connect_params_changed {
        UPDATE_ASSOC_IES                = BIT(0),
        UPDATE_FILS_ERP_INFO                = BIT(1),
        UPDATE_AUTH_TYPE                = BIT(2),
};

/**
 * enum wiphy_params_flags - set_wiphy_params bitfield values
 * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
 * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed
 * @WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
 * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
 * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed
 * @WIPHY_PARAM_DYN_ACK: dynack has been enabled
 * @WIPHY_PARAM_TXQ_LIMIT: TXQ packet limit has been changed
 * @WIPHY_PARAM_TXQ_MEMORY_LIMIT: TXQ memory limit has been changed
 * @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum
 */
enum wiphy_params_flags {
        WIPHY_PARAM_RETRY_SHORT                = 1 << 0,
        WIPHY_PARAM_RETRY_LONG                = 1 << 1,
        WIPHY_PARAM_FRAG_THRESHOLD        = 1 << 2,
        WIPHY_PARAM_RTS_THRESHOLD        = 1 << 3,
        WIPHY_PARAM_COVERAGE_CLASS        = 1 << 4,
        WIPHY_PARAM_DYN_ACK                = 1 << 5,
        WIPHY_PARAM_TXQ_LIMIT                = 1 << 6,
        WIPHY_PARAM_TXQ_MEMORY_LIMIT        = 1 << 7,
        WIPHY_PARAM_TXQ_QUANTUM                = 1 << 8,
};

#define IEEE80211_DEFAULT_AIRTIME_WEIGHT        256

/* The per TXQ device queue limit in airtime */
#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L        5000
#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H        12000

/* The per interface airtime threshold to switch to lower queue limit */
#define IEEE80211_AQL_THRESHOLD                        24000

/**
 * struct cfg80211_pmksa - PMK Security Association
 *
 * This structure is passed to the set/del_pmksa() method for PMKSA
 * caching.
 *
 * @bssid: The AP's BSSID (may be %NULL).
 * @pmkid: The identifier to refer a PMKSA.
 * @pmk: The PMK for the PMKSA identified by @pmkid. This is used for key
 *        derivation by a FILS STA. Otherwise, %NULL.
 * @pmk_len: Length of the @pmk. The length of @pmk can differ depending on
 *        the hash algorithm used to generate this.
 * @ssid: SSID to specify the ESS within which a PMKSA is valid when using FILS
 *        cache identifier (may be %NULL).
 * @ssid_len: Length of the @ssid in octets.
 * @cache_id: 2-octet cache identifier advertized by a FILS AP identifying the
 *        scope of PMKSA. This is valid only if @ssid_len is non-zero (may be
 *        %NULL).
 * @pmk_lifetime: Maximum lifetime for PMKSA in seconds
 *        (dot11RSNAConfigPMKLifetime) or 0 if not specified.
 *        The configured PMKSA must not be used for PMKSA caching after
 *        expiration and any keys derived from this PMK become invalid on
 *        expiration, i.e., the current association must be dropped if the PMK
 *        used for it expires.
 * @pmk_reauth_threshold: Threshold time for reauthentication (percentage of
 *        PMK lifetime, dot11RSNAConfigPMKReauthThreshold) or 0 if not specified.
 *        Drivers are expected to trigger a full authentication instead of using
 *        this PMKSA for caching when reassociating to a new BSS after this
 *        threshold to generate a new PMK before the current one expires.
 */
struct cfg80211_pmksa {
        const u8 *bssid;
        const u8 *pmkid;
        const u8 *pmk;
        size_t pmk_len;
        const u8 *ssid;
        size_t ssid_len;
        const u8 *cache_id;
        u32 pmk_lifetime;
        u8 pmk_reauth_threshold;
};

/**
 * struct cfg80211_pkt_pattern - packet pattern
 * @mask: bitmask where to match pattern and where to ignore bytes,
 *        one bit per byte, in same format as nl80211
 * @pattern: bytes to match where bitmask is 1
 * @pattern_len: length of pattern (in bytes)
 * @pkt_offset: packet offset (in bytes)
 *
 * Internal note: @mask and @pattern are allocated in one chunk of
 * memory, free @mask only!
 */
struct cfg80211_pkt_pattern {
        const u8 *mask, *pattern;
        int pattern_len;
        int pkt_offset;
};

/**
 * struct cfg80211_wowlan_tcp - TCP connection parameters
 *
 * @sock: (internal) socket for source port allocation
 * @src: source IP address
 * @dst: destination IP address
 * @dst_mac: destination MAC address
 * @src_port: source port
 * @dst_port: destination port
 * @payload_len: data payload length
 * @payload: data payload buffer
 * @payload_seq: payload sequence stamping configuration
 * @data_interval: interval at which to send data packets
 * @wake_len: wakeup payload match length
 * @wake_data: wakeup payload match data
 * @wake_mask: wakeup payload match mask
 * @tokens_size: length of the tokens buffer
 * @payload_tok: payload token usage configuration
 */
struct cfg80211_wowlan_tcp {
        struct socket *sock;
        __be32 src, dst;
        u16 src_port, dst_port;
        u8 dst_mac[ETH_ALEN];
        int payload_len;
        const u8 *payload;
        struct nl80211_wowlan_tcp_data_seq payload_seq;
        u32 data_interval;
        u32 wake_len;
        const u8 *wake_data, *wake_mask;
        u32 tokens_size;
        /* must be last, variable member */
        struct nl80211_wowlan_tcp_data_token payload_tok;
};

/**
 * struct cfg80211_wowlan - Wake on Wireless-LAN support info
 *
 * This structure defines the enabled WoWLAN triggers for the device.
 * @any: wake up on any activity -- special trigger if device continues
 *        operating as normal during suspend
 * @disconnect: wake up if getting disconnected
 * @magic_pkt: wake up on receiving magic packet
 * @patterns: wake up on receiving packet matching a pattern
 * @n_patterns: number of patterns
 * @gtk_rekey_failure: wake up on GTK rekey failure
 * @eap_identity_req: wake up on EAP identity request packet
 * @four_way_handshake: wake up on 4-way handshake
 * @rfkill_release: wake up when rfkill is released
 * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h.
 *        NULL if not configured.
 * @nd_config: configuration for the scan to be used for net detect wake.
 */
struct cfg80211_wowlan {
        bool any, disconnect, magic_pkt, gtk_rekey_failure,
             eap_identity_req, four_way_handshake,
             rfkill_release;
        struct cfg80211_pkt_pattern *patterns;
        struct cfg80211_wowlan_tcp *tcp;
        int n_patterns;
        struct cfg80211_sched_scan_request *nd_config;
};

/**
 * struct cfg80211_coalesce_rules - Coalesce rule parameters
 *
 * This structure defines coalesce rule for the device.
 * @delay: maximum coalescing delay in msecs.
 * @condition: condition for packet coalescence.
 *        see &enum nl80211_coalesce_condition.
 * @patterns: array of packet patterns
 * @n_patterns: number of patterns
 */
struct cfg80211_coalesce_rules {
        int delay;
        enum nl80211_coalesce_condition condition;
        struct cfg80211_pkt_pattern *patterns;
        int n_patterns;
};

/**
 * struct cfg80211_coalesce - Packet coalescing settings
 *
 * This structure defines coalescing settings.
 * @rules: array of coalesce rules
 * @n_rules: number of rules
 */
struct cfg80211_coalesce {
        struct cfg80211_coalesce_rules *rules;
        int n_rules;
};

/**
 * struct cfg80211_wowlan_nd_match - information about the match
 *
 * @ssid: SSID of the match that triggered the wake up
 * @n_channels: Number of channels where the match occurred.  This
 *        value may be zero if the driver can't report the channels.
 * @channels: center frequencies of the channels where a match
 *        occurred (in MHz)
 */
struct cfg80211_wowlan_nd_match {
        struct cfg80211_ssid ssid;
        int n_channels;
        u32 channels[];
};

/**
 * struct cfg80211_wowlan_nd_info - net detect wake up information
 *
 * @n_matches: Number of match information instances provided in
 *        @matches.  This value may be zero if the driver can't provide
 *        match information.
 * @matches: Array of pointers to matches containing information about
 *        the matches that triggered the wake up.
 */
struct cfg80211_wowlan_nd_info {
        int n_matches;
        struct cfg80211_wowlan_nd_match *matches[];
};

/**
 * struct cfg80211_wowlan_wakeup - wakeup report
 * @disconnect: woke up by getting disconnected
 * @magic_pkt: woke up by receiving magic packet
 * @gtk_rekey_failure: woke up by GTK rekey failure
 * @eap_identity_req: woke up by EAP identity request packet
 * @four_way_handshake: woke up by 4-way handshake
 * @rfkill_release: woke up by rfkill being released
 * @pattern_idx: pattern that caused wakeup, -1 if not due to pattern
 * @packet_present_len: copied wakeup packet data
 * @packet_len: original wakeup packet length
 * @packet: The packet causing the wakeup, if any.
 * @packet_80211:  For pattern match, magic packet and other data
 *        frame triggers an 802.3 frame should be reported, for
 *        disconnect due to deauth 802.11 frame. This indicates which
 *        it is.
 * @tcp_match: TCP wakeup packet received
 * @tcp_connlost: TCP connection lost or failed to establish
 * @tcp_nomoretokens: TCP data ran out of tokens
 * @net_detect: if not %NULL, woke up because of net detect
 * @unprot_deauth_disassoc: woke up due to unprotected deauth or
 *        disassoc frame (in MFP).
 */
struct cfg80211_wowlan_wakeup {
        bool disconnect, magic_pkt, gtk_rekey_failure,
             eap_identity_req, four_way_handshake,
             rfkill_release, packet_80211,
             tcp_match, tcp_connlost, tcp_nomoretokens,
             unprot_deauth_disassoc;
        s32 pattern_idx;
        u32 packet_present_len, packet_len;
        const void *packet;
        struct cfg80211_wowlan_nd_info *net_detect;
};

/**
 * struct cfg80211_gtk_rekey_data - rekey data
 * @kek: key encryption key (@kek_len bytes)
 * @kck: key confirmation key (@kck_len bytes)
 * @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes)
 * @kek_len: length of kek
 * @kck_len: length of kck
 * @akm: akm (oui, id)
 */
struct cfg80211_gtk_rekey_data {
        const u8 *kek, *kck, *replay_ctr;
        u32 akm;
        u8 kek_len, kck_len;
};

/**
 * struct cfg80211_update_ft_ies_params - FT IE Information
 *
 * This structure provides information needed to update the fast transition IE
 *
 * @md: The Mobility Domain ID, 2 Octet value
 * @ie: Fast Transition IEs
 * @ie_len: Length of ft_ie in octets
 */
struct cfg80211_update_ft_ies_params {
        u16 md;
        const u8 *ie;
        size_t ie_len;
};

/**
 * struct cfg80211_mgmt_tx_params - mgmt tx parameters
 *
 * This structure provides information needed to transmit a mgmt frame
 *
 * @chan: channel to use
 * @offchan: indicates whether off channel operation is required
 * @wait: duration for ROC
 * @buf: buffer to transmit
 * @len: buffer length
 * @no_cck: don't use cck rates for this frame
 * @dont_wait_for_ack: tells the low level not to wait for an ack
 * @n_csa_offsets: length of csa_offsets array
 * @csa_offsets: array of all the csa offsets in the frame
 * @link_id: for MLO, the link ID to transmit on, -1 if not given; note
 *        that the link ID isn't validated (much), it's in range but the
 *        link might not exist (or be used by the receiver STA)
 */
struct cfg80211_mgmt_tx_params {
        struct ieee80211_channel *chan;
        bool offchan;
        unsigned int wait;
        const u8 *buf;
        size_t len;
        bool no_cck;
        bool dont_wait_for_ack;
        int n_csa_offsets;
        const u16 *csa_offsets;
        int link_id;
};

/**
 * struct cfg80211_dscp_exception - DSCP exception
 *
 * @dscp: DSCP value that does not adhere to the user priority range definition
 * @up: user priority value to which the corresponding DSCP value belongs
 */
struct cfg80211_dscp_exception {
        u8 dscp;
        u8 up;
};

/**
 * struct cfg80211_dscp_range - DSCP range definition for user priority
 *
 * @low: lowest DSCP value of this user priority range, inclusive
 * @high: highest DSCP value of this user priority range, inclusive
 */
struct cfg80211_dscp_range {
        u8 low;
        u8 high;
};

/* QoS Map Set element length defined in IEEE Std 802.11-2012, 8.4.2.97 */
#define IEEE80211_QOS_MAP_MAX_EX        21
#define IEEE80211_QOS_MAP_LEN_MIN        16
#define IEEE80211_QOS_MAP_LEN_MAX \
        (IEEE80211_QOS_MAP_LEN_MIN + 2 * IEEE80211_QOS_MAP_MAX_EX)

/**
 * struct cfg80211_qos_map - QoS Map Information
 *
 * This struct defines the Interworking QoS map setting for DSCP values
 *
 * @num_des: number of DSCP exceptions (0..21)
 * @dscp_exception: optionally up to maximum of 21 DSCP exceptions from
 *        the user priority DSCP range definition
 * @up: DSCP range definition for a particular user priority
 */
struct cfg80211_qos_map {
        u8 num_des;
        struct cfg80211_dscp_exception dscp_exception[IEEE80211_QOS_MAP_MAX_EX];
        struct cfg80211_dscp_range up[8];
};

/**
 * struct cfg80211_nan_conf - NAN configuration
 *
 * This struct defines NAN configuration parameters
 *
 * @master_pref: master preference (1 - 255)
 * @bands: operating bands, a bitmap of &enum nl80211_band values.
 *        For instance, for NL80211_BAND_2GHZ, bit 0 would be set
 *        (i.e. BIT(NL80211_BAND_2GHZ)).
 */
struct cfg80211_nan_conf {
        u8 master_pref;
        u8 bands;
};

/**
 * enum cfg80211_nan_conf_changes - indicates changed fields in NAN
 * configuration
 *
 * @CFG80211_NAN_CONF_CHANGED_PREF: master preference
 * @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands
 */
enum cfg80211_nan_conf_changes {
        CFG80211_NAN_CONF_CHANGED_PREF = BIT(0),
        CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1),
};

/**
 * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter
 *
 * @filter: the content of the filter
 * @len: the length of the filter
 */
struct cfg80211_nan_func_filter {
        const u8 *filter;
        u8 len;
};

/**
 * struct cfg80211_nan_func - a NAN function
 *
 * @type: &enum nl80211_nan_function_type
 * @service_id: the service ID of the function
 * @publish_type: &nl80211_nan_publish_type
 * @close_range: if true, the range should be limited. Threshold is
 *        implementation specific.
 * @publish_bcast: if true, the solicited publish should be broadcasted
 * @subscribe_active: if true, the subscribe is active
 * @followup_id: the instance ID for follow up
 * @followup_reqid: the requester instance ID for follow up
 * @followup_dest: MAC address of the recipient of the follow up
 * @ttl: time to live counter in DW.
 * @serv_spec_info: Service Specific Info
 * @serv_spec_info_len: Service Specific Info length
 * @srf_include: if true, SRF is inclusive
 * @srf_bf: Bloom Filter
 * @srf_bf_len: Bloom Filter length
 * @srf_bf_idx: Bloom Filter index
 * @srf_macs: SRF MAC addresses
 * @srf_num_macs: number of MAC addresses in SRF
 * @rx_filters: rx filters that are matched with corresponding peer's tx_filter
 * @tx_filters: filters that should be transmitted in the SDF.
 * @num_rx_filters: length of &rx_filters.
 * @num_tx_filters: length of &tx_filters.
 * @instance_id: driver allocated id of the function.
 * @cookie: unique NAN function identifier.
 */
struct cfg80211_nan_func {
        enum nl80211_nan_function_type type;
        u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN];
        u8 publish_type;
        bool close_range;
        bool publish_bcast;
        bool subscribe_active;
        u8 followup_id;
        u8 followup_reqid;
        struct mac_address followup_dest;
        u32 ttl;
        const u8 *serv_spec_info;
        u8 serv_spec_info_len;
        bool srf_include;
        const u8 *srf_bf;
        u8 srf_bf_len;
        u8 srf_bf_idx;
        struct mac_address *srf_macs;
        int srf_num_macs;
        struct cfg80211_nan_func_filter *rx_filters;
        struct cfg80211_nan_func_filter *tx_filters;
        u8 num_tx_filters;
        u8 num_rx_filters;
        u8 instance_id;
        u64 cookie;
};

/**
 * struct cfg80211_pmk_conf - PMK configuration
 *
 * @aa: authenticator address
 * @pmk_len: PMK length in bytes.
 * @pmk: the PMK material
 * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK
 *        is not PMK-R0). When pmk_r0_name is not NULL, the pmk field
 *        holds PMK-R0.
 */
struct cfg80211_pmk_conf {
        const u8 *aa;
        u8 pmk_len;
        const u8 *pmk;
        const u8 *pmk_r0_name;
};

/**
 * struct cfg80211_external_auth_params - Trigger External authentication.
 *
 * Commonly used across the external auth request and event interfaces.
 *
 * @action: action type / trigger for external authentication. Only significant
 *        for the authentication request event interface (driver to user space).
 * @bssid: BSSID of the peer with which the authentication has
 *        to happen. Used by both the authentication request event and
 *        authentication response command interface.
 * @ssid: SSID of the AP.  Used by both the authentication request event and
 *        authentication response command interface.
 * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
 *        authentication request event interface.
 * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
 *        use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
 *        the real status code for failures. Used only for the authentication
 *        response command interface (user space to driver).
 * @pmkid: The identifier to refer a PMKSA.
 * @mld_addr: MLD address of the peer. Used by the authentication request event
 *        interface. Driver indicates this to enable MLO during the authentication
 *        offload to user space. Driver shall look at %NL80211_ATTR_MLO_SUPPORT
 *        flag capability in NL80211_CMD_CONNECT to know whether the user space
 *        supports enabling MLO during the authentication offload.
 *        User space should use the address of the interface (on which the
 *        authentication request event reported) as self MLD address. User space
 *        and driver should use MLD addresses in RA, TA and BSSID fields of
 *        authentication frames sent or received via cfg80211. The driver
 *        translates the MLD addresses to/from link addresses based on the link
 *        chosen for the authentication.
 */
struct cfg80211_external_auth_params {
        enum nl80211_external_auth_action action;
        u8 bssid[ETH_ALEN] __aligned(2);
        struct cfg80211_ssid ssid;
        unsigned int key_mgmt_suite;
        u16 status;
        const u8 *pmkid;
        u8 mld_addr[ETH_ALEN] __aligned(2);
};

/**
 * struct cfg80211_ftm_responder_stats - FTM responder statistics
 *
 * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to
 *        indicate the relevant values in this struct for them
 * @success_num: number of FTM sessions in which all frames were successfully
 *        answered
 * @partial_num: number of FTM sessions in which part of frames were
 *        successfully answered
 * @failed_num: number of failed FTM sessions
 * @asap_num: number of ASAP FTM sessions
 * @non_asap_num: number of  non-ASAP FTM sessions
 * @total_duration_ms: total sessions durations - gives an indication
 *        of how much time the responder was busy
 * @unknown_triggers_num: number of unknown FTM triggers - triggers from
 *        initiators that didn't finish successfully the negotiation phase with
 *        the responder
 * @reschedule_requests_num: number of FTM reschedule requests - initiator asks
 *        for a new scheduling although it already has scheduled FTM slot
 * @out_of_window_triggers_num: total FTM triggers out of scheduled window
 */
struct cfg80211_ftm_responder_stats {
        u32 filled;
        u32 success_num;
        u32 partial_num;
        u32 failed_num;
        u32 asap_num;
        u32 non_asap_num;
        u64 total_duration_ms;
        u32 unknown_triggers_num;
        u32 reschedule_requests_num;
        u32 out_of_window_triggers_num;
};

/**
 * struct cfg80211_pmsr_ftm_result - FTM result
 * @failure_reason: if this measurement failed (PMSR status is
 *        %NL80211_PMSR_STATUS_FAILURE), this gives a more precise
 *        reason than just "failure"
 * @burst_index: if reporting partial results, this is the index
 *        in [0 .. num_bursts-1] of the burst that's being reported
 * @num_ftmr_attempts: number of FTM request frames transmitted
 * @num_ftmr_successes: number of FTM request frames acked
 * @busy_retry_time: if failure_reason is %NL80211_PMSR_FTM_FAILURE_PEER_BUSY,
 *        fill this to indicate in how many seconds a retry is deemed possible
 *        by the responder
 * @num_bursts_exp: actual number of bursts exponent negotiated
 * @burst_duration: actual burst duration negotiated
 * @ftms_per_burst: actual FTMs per burst negotiated
 * @lci_len: length of LCI information (if present)
 * @civicloc_len: length of civic location information (if present)
 * @lci: LCI data (may be %NULL)
 * @civicloc: civic location data (may be %NULL)
 * @rssi_avg: average RSSI over FTM action frames reported
 * @rssi_spread: spread of the RSSI over FTM action frames reported
 * @tx_rate: bitrate for transmitted FTM action frame response
 * @rx_rate: bitrate of received FTM action frame
 * @rtt_avg: average of RTTs measured (must have either this or @dist_avg)
 * @rtt_variance: variance of RTTs measured (note that standard deviation is
 *        the square root of the variance)
 * @rtt_spread: spread of the RTTs measured
 * @dist_avg: average of distances (mm) measured
 *        (must have either this or @rtt_avg)
 * @dist_variance: variance of distances measured (see also @rtt_variance)
 * @dist_spread: spread of distances measured (see also @rtt_spread)
 * @num_ftmr_attempts_valid: @num_ftmr_attempts is valid
 * @num_ftmr_successes_valid: @num_ftmr_successes is valid
 * @rssi_avg_valid: @rssi_avg is valid
 * @rssi_spread_valid: @rssi_spread is valid
 * @tx_rate_valid: @tx_rate is valid
 * @rx_rate_valid: @rx_rate is valid
 * @rtt_avg_valid: @rtt_avg is valid
 * @rtt_variance_valid: @rtt_variance is valid
 * @rtt_spread_valid: @rtt_spread is valid
 * @dist_avg_valid: @dist_avg is valid
 * @dist_variance_valid: @dist_variance is valid
 * @dist_spread_valid: @dist_spread is valid
 */
struct cfg80211_pmsr_ftm_result {
        const u8 *lci;
        const u8 *civicloc;
        unsigned int lci_len;
        unsigned int civicloc_len;
        enum nl80211_peer_measurement_ftm_failure_reasons failure_reason;
        u32 num_ftmr_attempts, num_ftmr_successes;
        s16 burst_index;
        u8 busy_retry_time;
        u8 num_bursts_exp;
        u8 burst_duration;
        u8 ftms_per_burst;
        s32 rssi_avg;
        s32 rssi_spread;
        struct rate_info tx_rate, rx_rate;
        s64 rtt_avg;
        s64 rtt_variance;
        s64 rtt_spread;
        s64 dist_avg;
        s64 dist_variance;
        s64 dist_spread;

        u16 num_ftmr_attempts_valid:1,
            num_ftmr_successes_valid:1,
            rssi_avg_valid:1,
            rssi_spread_valid:1,
            tx_rate_valid:1,
            rx_rate_valid:1,
            rtt_avg_valid:1,
            rtt_variance_valid:1,
            rtt_spread_valid:1,
            dist_avg_valid:1,
            dist_variance_valid:1,
            dist_spread_valid:1;
};

/**
 * struct cfg80211_pmsr_result - peer measurement result
 * @addr: address of the peer
 * @host_time: host time (use ktime_get_boottime() adjust to the time when the
 *        measurement was made)
 * @ap_tsf: AP's TSF at measurement time
 * @status: status of the measurement
 * @final: if reporting partial results, mark this as the last one; if not
 *        reporting partial results always set this flag
 * @ap_tsf_valid: indicates the @ap_tsf value is valid
 * @type: type of the measurement reported, note that we only support reporting
 *        one type at a time, but you can report multiple results separately and
 *        they're all aggregated for userspace.
 * @ftm: FTM result
 */
struct cfg80211_pmsr_result {
        u64 host_time, ap_tsf;
        enum nl80211_peer_measurement_status status;

        u8 addr[ETH_ALEN];

        u8 final:1,
           ap_tsf_valid:1;

        enum nl80211_peer_measurement_type type;

        union {
                struct cfg80211_pmsr_ftm_result ftm;
        };
};

/**
 * struct cfg80211_pmsr_ftm_request_peer - FTM request data
 * @requested: indicates FTM is requested
 * @preamble: frame preamble to use
 * @burst_period: burst period to use
 * @asap: indicates to use ASAP mode
 * @num_bursts_exp: number of bursts exponent
 * @burst_duration: burst duration
 * @ftms_per_burst: number of FTMs per burst
 * @ftmr_retries: number of retries for FTM request
 * @request_lci: request LCI information
 * @request_civicloc: request civic location information
 * @trigger_based: use trigger based ranging for the measurement
 *                 If neither @trigger_based nor @non_trigger_based is set,
 *                 EDCA based ranging will be used.
 * @non_trigger_based: use non trigger based ranging for the measurement
 *                 If neither @trigger_based nor @non_trigger_based is set,
 *                 EDCA based ranging will be used.
 * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either
 *                 @trigger_based or @non_trigger_based is set.
 * @bss_color: the bss color of the responder. Optional. Set to zero to
 *        indicate the driver should set the BSS color. Only valid if
 *        @non_trigger_based or @trigger_based is set.
 *
 * See also nl80211 for the respective attribute documentation.
 */
struct cfg80211_pmsr_ftm_request_peer {
        enum nl80211_preamble preamble;
        u16 burst_period;
        u8 requested:1,
           asap:1,
           request_lci:1,
           request_civicloc:1,
           trigger_based:1,
           non_trigger_based:1,
           lmr_feedback:1;
        u8 num_bursts_exp;
        u8 burst_duration;
        u8 ftms_per_burst;
        u8 ftmr_retries;
        u8 bss_color;
};

/**
 * struct cfg80211_pmsr_request_peer - peer data for a peer measurement request
 * @addr: MAC address
 * @chandef: channel to use
 * @report_ap_tsf: report the associated AP's TSF
 * @ftm: FTM data, see &struct cfg80211_pmsr_ftm_request_peer
 */
struct cfg80211_pmsr_request_peer {
        u8 addr[ETH_ALEN];
        struct cfg80211_chan_def chandef;
        u8 report_ap_tsf:1;
        struct cfg80211_pmsr_ftm_request_peer ftm;
};

/**
 * struct cfg80211_pmsr_request - peer measurement request
 * @cookie: cookie, set by cfg80211
 * @nl_portid: netlink portid - used by cfg80211
 * @drv_data: driver data for this request, if required for aborting,
 *        not otherwise freed or anything by cfg80211
 * @mac_addr: MAC address used for (randomised) request
 * @mac_addr_mask: MAC address mask used for randomisation, bits that
 *        are 0 in the mask should be randomised, bits that are 1 should
 *        be taken from the @mac_addr
 * @list: used by cfg80211 to hold on to the request
 * @timeout: timeout (in milliseconds) for the whole operation, if
 *        zero it means there's no timeout
 * @n_peers: number of peers to do measurements with
 * @peers: per-peer measurement request data
 */
struct cfg80211_pmsr_request {
        u64 cookie;
        void *drv_data;
        u32 n_peers;
        u32 nl_portid;

        u32 timeout;

        u8 mac_addr[ETH_ALEN] __aligned(2);
        u8 mac_addr_mask[ETH_ALEN] __aligned(2);

        struct list_head list;

        struct cfg80211_pmsr_request_peer peers[] __counted_by(n_peers);
};

/**
 * struct cfg80211_update_owe_info - OWE Information
 *
 * This structure provides information needed for the drivers to offload OWE
 * (Opportunistic Wireless Encryption) processing to the user space.
 *
 * Commonly used across update_owe_info request and event interfaces.
 *
 * @peer: MAC address of the peer device for which the OWE processing
 *        has to be done.
 * @status: status code, %WLAN_STATUS_SUCCESS for successful OWE info
 *        processing, use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space
 *        cannot give you the real status code for failures. Used only for
 *        OWE update request command interface (user space to driver).
 * @ie: IEs obtained from the peer or constructed by the user space. These are
 *        the IEs of the remote peer in the event from the host driver and
 *        the constructed IEs by the user space in the request interface.
 * @ie_len: Length of IEs in octets.
 * @assoc_link_id: MLO link ID of the AP, with which (re)association requested
 *        by peer. This will be filled by driver for both MLO and non-MLO station
 *        connections when the AP affiliated with an MLD. For non-MLD AP mode, it
 *        will be -1. Used only with OWE update event (driver to user space).
 * @peer_mld_addr: For MLO connection, MLD address of the peer. For non-MLO
 *        connection, it will be all zeros. This is applicable only when
 *        @assoc_link_id is not -1, i.e., the AP affiliated with an MLD. Used only
 *        with OWE update event (driver to user space).
 */
struct cfg80211_update_owe_info {
        u8 peer[ETH_ALEN] __aligned(2);
        u16 status;
        const u8 *ie;
        size_t ie_len;
        int assoc_link_id;
        u8 peer_mld_addr[ETH_ALEN] __aligned(2);
};

/**
 * struct mgmt_frame_regs - management frame registrations data
 * @global_stypes: bitmap of management frame subtypes registered
 *        for the entire device
 * @interface_stypes: bitmap of management frame subtypes registered
 *        for the given interface
 * @global_mcast_stypes: mcast RX is needed globally for these subtypes
 * @interface_mcast_stypes: mcast RX is needed on this interface
 *        for these subtypes
 */
struct mgmt_frame_regs {
        u32 global_stypes, interface_stypes;
        u32 global_mcast_stypes, interface_mcast_stypes;
};

/**
 * struct cfg80211_ops - backend description for wireless configuration
 *
 * This struct is registered by fullmac card drivers and/or wireless stacks
 * in order to handle configuration requests on their interfaces.
 *
 * All callbacks except where otherwise noted should return 0
 * on success or a negative error code.
 *
 * All operations are invoked with the wiphy mutex held. The RTNL may be
 * held in addition (due to wireless extensions) but this cannot be relied
 * upon except in cases where documented below. Note that due to ordering,
 * the RTNL also cannot be acquired in any handlers.
 *
 * @suspend: wiphy device needs to be suspended. The variable @wow will
 *        be %NULL or contain the enabled Wake-on-Wireless triggers that are
 *        configured for the device.
 * @resume: wiphy device needs to be resumed
 * @set_wakeup: Called when WoWLAN is enabled/disabled, use this callback
 *        to call device_set_wakeup_enable() to enable/disable wakeup from
 *        the device.
 *
 * @add_virtual_intf: create a new virtual interface with the given name,
 *        must set the struct wireless_dev's iftype. Beware: You must create
 *        the new netdev in the wiphy's network namespace! Returns the struct
 *        wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must
 *        also set the address member in the wdev.
 *        This additionally holds the RTNL to be able to do netdev changes.
 *
 * @del_virtual_intf: remove the virtual interface
 *        This additionally holds the RTNL to be able to do netdev changes.
 *
 * @change_virtual_intf: change type/configuration of virtual interface,
 *        keep the struct wireless_dev's iftype updated.
 *        This additionally holds the RTNL to be able to do netdev changes.
 *
 * @add_intf_link: Add a new MLO link to the given interface. Note that
 *        the wdev->link[] data structure has been updated, so the new link
 *        address is available.
 * @del_intf_link: Remove an MLO link from the given interface.
 *
 * @add_key: add a key with the given parameters. @mac_addr will be %NULL
 *        when adding a group key. @link_id will be -1 for non-MLO connection.
 *        For MLO connection, @link_id will be >= 0 for group key and -1 for
 *        pairwise key, @mac_addr will be peer's MLD address for MLO pairwise key.
 *
 * @get_key: get information about the key with the given parameters.
 *        @mac_addr will be %NULL when requesting information for a group
 *        key. All pointers given to the @callback function need not be valid
 *        after it returns. This function should return an error if it is
 *        not possible to retrieve the key, -ENOENT if it doesn't exist.
 *        @link_id will be -1 for non-MLO connection. For MLO connection,
 *        @link_id will be >= 0 for group key and -1 for pairwise key, @mac_addr
 *        will be peer's MLD address for MLO pairwise key.
 *
 * @del_key: remove a key given the @mac_addr (%NULL for a group key)
 *        and @key_index, return -ENOENT if the key doesn't exist. @link_id will
 *        be -1 for non-MLO connection. For MLO connection, @link_id will be >= 0
 *        for group key and -1 for pairwise key, @mac_addr will be peer's MLD
 *        address for MLO pairwise key.
 *
 * @set_default_key: set the default key on an interface. @link_id will be >= 0
 *        for MLO connection and -1 for non-MLO connection.
 *
 * @set_default_mgmt_key: set the default management frame key on an interface.
 *        @link_id will be >= 0 for MLO connection and -1 for non-MLO connection.
 *
 * @set_default_beacon_key: set the default Beacon frame key on an interface.
 *        @link_id will be >= 0 for MLO connection and -1 for non-MLO connection.
 *
 * @set_rekey_data: give the data necessary for GTK rekeying to the driver
 *
 * @start_ap: Start acting in AP mode defined by the parameters.
 * @change_beacon: Change the beacon parameters for an access point mode
 *        interface. This should reject the call when AP mode wasn't started.
 * @stop_ap: Stop being an AP, including stopping beaconing.
 *
 * @add_station: Add a new station.
 * @del_station: Remove a station
 * @change_station: Modify a given station. Note that flags changes are not much
 *        validated in cfg80211, in particular the auth/assoc/authorized flags
 *        might come to the driver in invalid combinations -- make sure to check
 *        them, also against the existing state! Drivers must call
 *        cfg80211_check_station_change() to validate the information.
 * @get_station: get station information for the station identified by @mac
 * @dump_station: dump station callback -- resume dump at index @idx
 *
 * @add_mpath: add a fixed mesh path
 * @del_mpath: delete a given mesh path
 * @change_mpath: change a given mesh path
 * @get_mpath: get a mesh path for the given parameters
 * @dump_mpath: dump mesh path callback -- resume dump at index @idx
 * @get_mpp: get a mesh proxy path for the given parameters
 * @dump_mpp: dump mesh proxy path callback -- resume dump at index @idx
 * @join_mesh: join the mesh network with the specified parameters
 *        (invoked with the wireless_dev mutex held)
 * @leave_mesh: leave the current mesh network
 *        (invoked with the wireless_dev mutex held)
 *
 * @get_mesh_config: Get the current mesh configuration
 *
 * @update_mesh_config: Update mesh parameters on a running mesh.
 *        The mask is a bitfield which tells us which parameters to
 *        set, and which to leave alone.
 *
 * @change_bss: Modify parameters for a given BSS.
 *
 * @inform_bss: Called by cfg80211 while being informed about new BSS data
 *        for every BSS found within the reported data or frame. This is called
 *        from within the cfg8011 inform_bss handlers while holding the bss_lock.
 *        The data parameter is passed through from drv_data inside
 *        struct cfg80211_inform_bss.
 *        The new IE data for the BSS is explicitly passed.
 *
 * @set_txq_params: Set TX queue parameters
 *
 * @libertas_set_mesh_channel: Only for backward compatibility for libertas,
 *        as it doesn't implement join_mesh and needs to set the channel to
 *        join the mesh instead.
 *
 * @set_monitor_channel: Set the monitor mode channel for the device. If other
 *        interfaces are active this callback should reject the configuration.
 *        If no interfaces are active or the device is down, the channel should
 *        be stored for when a monitor interface becomes active.
 *
 * @scan: Request to do a scan. If returning zero, the scan request is given
 *        the driver, and will be valid until passed to cfg80211_scan_done().
 *        For scan results, call cfg80211_inform_bss(); you can call this outside
 *        the scan/scan_done bracket too.
 * @abort_scan: Tell the driver to abort an ongoing scan. The driver shall
 *        indicate the status of the scan through cfg80211_scan_done().
 *
 * @auth: Request to authenticate with the specified peer
 *        (invoked with the wireless_dev mutex held)
 * @assoc: Request to (re)associate with the specified peer
 *        (invoked with the wireless_dev mutex held)
 * @deauth: Request to deauthenticate from the specified peer
 *        (invoked with the wireless_dev mutex held)
 * @disassoc: Request to disassociate from the specified peer
 *        (invoked with the wireless_dev mutex held)
 *
 * @connect: Connect to the ESS with the specified parameters. When connected,
 *        call cfg80211_connect_result()/cfg80211_connect_bss() with status code
 *        %WLAN_STATUS_SUCCESS. If the connection fails for some reason, call
 *        cfg80211_connect_result()/cfg80211_connect_bss() with the status code
 *        from the AP or cfg80211_connect_timeout() if no frame with status code
 *        was received.
 *        The driver is allowed to roam to other BSSes within the ESS when the
 *        other BSS matches the connect parameters. When such roaming is initiated
 *        by the driver, the driver is expected to verify that the target matches
 *        the configured security parameters and to use Reassociation Request
 *        frame instead of Association Request frame.
 *        The connect function can also be used to request the driver to perform a
 *        specific roam when connected to an ESS. In that case, the prev_bssid
 *        parameter is set to the BSSID of the currently associated BSS as an
 *        indication of requesting reassociation.
 *        In both the driver-initiated and new connect() call initiated roaming
 *        cases, the result of roaming is indicated with a call to
 *        cfg80211_roamed(). (invoked with the wireless_dev mutex held)
 * @update_connect_params: Update the connect parameters while connected to a
 *        BSS. The updated parameters can be used by driver/firmware for
 *        subsequent BSS selection (roaming) decisions and to form the
 *        Authentication/(Re)Association Request frames. This call does not
 *        request an immediate disassociation or reassociation with the current
 *        BSS, i.e., this impacts only subsequent (re)associations. The bits in
 *        changed are defined in &enum cfg80211_connect_params_changed.
 *        (invoked with the wireless_dev mutex held)
 * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if
 *      connection is in progress. Once done, call cfg80211_disconnected() in
 *      case connection was already established (invoked with the
 *      wireless_dev mutex held), otherwise call cfg80211_connect_timeout().
 *
 * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call
 *        cfg80211_ibss_joined(), also call that function when changing BSSID due
 *        to a merge.
 *        (invoked with the wireless_dev mutex held)
 * @leave_ibss: Leave the IBSS.
 *        (invoked with the wireless_dev mutex held)
 *
 * @set_mcast_rate: Set the specified multicast rate (only if vif is in ADHOC or
 *        MESH mode)
 *
 * @set_wiphy_params: Notify that wiphy parameters have changed;
 *        @changed bitfield (see &enum wiphy_params_flags) describes which values
 *        have changed. The actual parameter values are available in
 *        struct wiphy. If returning an error, no value should be changed.
 *
 * @set_tx_power: set the transmit power according to the parameters,
 *        the power passed is in mBm, to get dBm use MBM_TO_DBM(). The
 *        wdev may be %NULL if power was set for the wiphy, and will
 *        always be %NULL unless the driver supports per-vif TX power
 *        (as advertised by the nl80211 feature flag.)
 * @get_tx_power: store the current TX power into the dbm variable;
 *        return 0 if successful
 *
 * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
 *        functions to adjust rfkill hw state
 *
 * @dump_survey: get site survey information.
 *
 * @remain_on_channel: Request the driver to remain awake on the specified
 *        channel for the specified duration to complete an off-channel
 *        operation (e.g., public action frame exchange). When the driver is
 *        ready on the requested channel, it must indicate this with an event
 *        notification by calling cfg80211_ready_on_channel().
 * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
 *        This allows the operation to be terminated prior to timeout based on
 *        the duration value.
 * @mgmt_tx: Transmit a management frame.
 * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management
 *        frame on another channel
 *
 * @testmode_cmd: run a test mode command; @wdev may be %NULL
 * @testmode_dump: Implement a test mode dump. The cb->args[2] and up may be
 *        used by the function, but 0 and 1 must not be touched. Additionally,
 *        return error codes other than -ENOBUFS and -ENOENT will terminate the
 *        dump and return to userspace with an error, so be careful. If any data
 *        was passed in from userspace then the data/len arguments will be present
 *        and point to the data contained in %NL80211_ATTR_TESTDATA.
 *
 * @set_bitrate_mask: set the bitrate mask configuration
 *
 * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac
 *        devices running firmwares capable of generating the (re) association
 *        RSN IE. It allows for faster roaming between WPA2 BSSIDs.
 * @del_pmksa: Delete a cached PMKID.
 * @flush_pmksa: Flush all cached PMKIDs.
 * @set_power_mgmt: Configure WLAN power management. A timeout value of -1
 *        allows the driver to adjust the dynamic ps timeout value.
 * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
 *        After configuration, the driver should (soon) send an event indicating
 *        the current level is above/below the configured threshold; this may
 *        need some care when the configuration is changed (without first being
 *        disabled.)
 * @set_cqm_rssi_range_config: Configure two RSSI thresholds in the
 *        connection quality monitor.  An event is to be sent only when the
 *        signal level is found to be outside the two values.  The driver should
 *        set %NL80211_EXT_FEATURE_CQM_RSSI_LIST if this method is implemented.
 *        If it is provided then there's no point providing @set_cqm_rssi_config.
 * @set_cqm_txe_config: Configure connection quality monitor TX error
 *        thresholds.
 * @sched_scan_start: Tell the driver to start a scheduled scan.
 * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan with
 *        given request id. This call must stop the scheduled scan and be ready
 *        for starting a new one before it returns, i.e. @sched_scan_start may be
 *        called immediately after that again and should not fail in that case.
 *        The driver should not call cfg80211_sched_scan_stopped() for a requested
 *        stop (when this method returns 0).
 *
 * @update_mgmt_frame_registrations: Notify the driver that management frame
 *        registrations were updated. The callback is allowed to sleep.
 *
 * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device.
 *        Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may
 *        reject TX/RX mask combinations they cannot support by returning -EINVAL
 *        (also see nl80211.h @NL80211_ATTR_WIPHY_ANTENNA_TX).
 *
 * @get_antenna: Get current antenna configuration from device (tx_ant, rx_ant).
 *
 * @tdls_mgmt: Transmit a TDLS management frame.
 * @tdls_oper: Perform a high-level TDLS operation (e.g. TDLS link setup).
 *
 * @probe_client: probe an associated client, must return a cookie that it
 *        later passes to cfg80211_probe_status().
 *
 * @set_noack_map: Set the NoAck Map for the TIDs.
 *
 * @get_channel: Get the current operating channel for the virtual interface.
 *        For monitor interfaces, it should return %NULL unless there's a single
 *        current monitoring channel.
 *
 * @start_p2p_device: Start the given P2P device.
 * @stop_p2p_device: Stop the given P2P device.
 *
 * @set_mac_acl: Sets MAC address control list in AP and P2P GO mode.
 *        Parameters include ACL policy, an array of MAC address of stations
 *        and the number of MAC addresses. If there is already a list in driver
 *        this new list replaces the existing one. Driver has to clear its ACL
 *        when number of MAC addresses entries is passed as 0. Drivers which
 *        advertise the support for MAC based ACL have to implement this callback.
 *
 * @start_radar_detection: Start radar detection in the driver.
 *
 * @end_cac: End running CAC, probably because a related CAC
 *        was finished on another phy.
 *
 * @update_ft_ies: Provide updated Fast BSS Transition information to the
 *        driver. If the SME is in the driver/firmware, this information can be
 *        used in building Authentication and Reassociation Request frames.
 *
 * @crit_proto_start: Indicates a critical protocol needs more link reliability
 *        for a given duration (milliseconds). The protocol is provided so the
 *        driver can take the most appropriate actions.
 * @crit_proto_stop: Indicates critical protocol no longer needs increased link
 *        reliability. This operation can not fail.
 * @set_coalesce: Set coalesce parameters.
 *
 * @channel_switch: initiate channel-switch procedure (with CSA). Driver is
 *        responsible for veryfing if the switch is possible. Since this is
 *        inherently tricky driver may decide to disconnect an interface later
 *        with cfg80211_stop_iface(). This doesn't mean driver can accept
 *        everything. It should do it's best to verify requests and reject them
 *        as soon as possible.
 *
 * @set_qos_map: Set QoS mapping information to the driver
 *
 * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
 *        given interface This is used e.g. for dynamic HT 20/40 MHz channel width
 *        changes during the lifetime of the BSS.
 *
 * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device
 *        with the given parameters; action frame exchange has been handled by
 *        userspace so this just has to modify the TX path to take the TS into
 *        account.
 *        If the admitted time is 0 just validate the parameters to make sure
 *        the session can be created at all; it is valid to just always return
 *        success for that but that may result in inefficient behaviour (handshake
 *        with the peer followed by immediate teardown when the addition is later
 *        rejected)
 * @del_tx_ts: remove an existing TX TS
 *
 * @join_ocb: join the OCB network with the specified parameters
 *        (invoked with the wireless_dev mutex held)
 * @leave_ocb: leave the current OCB network
 *        (invoked with the wireless_dev mutex held)
 *
 * @tdls_channel_switch: Start channel-switching with a TDLS peer. The driver
 *        is responsible for continually initiating channel-switching operations
 *        and returning to the base channel for communication with the AP.
 * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both
 *        peers must be on the base channel when the call completes.
 * @start_nan: Start the NAN interface.
 * @stop_nan: Stop the NAN interface.
 * @add_nan_func: Add a NAN function. Returns negative value on failure.
 *        On success @nan_func ownership is transferred to the driver and
 *        it may access it outside of the scope of this function. The driver
 *        should free the @nan_func when no longer needed by calling
 *        cfg80211_free_nan_func().
 *        On success the driver should assign an instance_id in the
 *        provided @nan_func.
 * @del_nan_func: Delete a NAN function.
 * @nan_change_conf: changes NAN configuration. The changed parameters must
 *        be specified in @changes (using &enum cfg80211_nan_conf_changes);
 *        All other parameters must be ignored.
 *
 * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS
 *
 * @get_txq_stats: Get TXQ stats for interface or phy. If wdev is %NULL, this
 *      function should return phy stats, and interface stats otherwise.
 *
 * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake.
 *        If not deleted through @del_pmk the PMK remains valid until disconnect
 *        upon which the driver should clear it.
 *        (invoked with the wireless_dev mutex held)
 * @del_pmk: delete the previously configured PMK for the given authenticator.
 *        (invoked with the wireless_dev mutex held)
 *
 * @external_auth: indicates result of offloaded authentication processing from
 *     user space
 *
 * @tx_control_port: TX a control port frame (EAPoL).  The noencrypt parameter
 *        tells the driver that the frame should not be encrypted.
 *
 * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available.
 *        Statistics should be cumulative, currently no way to reset is provided.
 * @start_pmsr: start peer measurement (e.g. FTM)
 * @abort_pmsr: abort peer measurement
 *
 * @update_owe_info: Provide updated OWE info to driver. Driver implementing SME
 *        but offloading OWE processing to the user space will get the updated
 *        DH IE through this interface.
 *
 * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame
 *        and overrule HWMP path selection algorithm.
 * @set_tid_config: TID specific configuration, this can be peer or BSS specific
 *        This callback may sleep.
 * @reset_tid_config: Reset TID specific configuration for the peer, for the
 *        given TIDs. This callback may sleep.
 *
 * @set_sar_specs: Update the SAR (TX power) settings.
 *
 * @color_change: Initiate a color change.
 *
 * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use
 *        those to decrypt (Re)Association Request and encrypt (Re)Association
 *        Response frame.
 *
 * @set_radar_background: Configure dedicated offchannel chain available for
 *        radar/CAC detection on some hw. This chain can't be used to transmit
 *        or receive frames and it is bounded to a running wdev.
 *        Background radar/CAC detection allows to avoid the CAC downtime
 *        switching to a different channel during CAC detection on the selected
 *        radar channel.
 *        The caller is expected to set chandef pointer to NULL in order to
 *        disable background CAC/radar detection.
 * @add_link_station: Add a link to a station.
 * @mod_link_station: Modify a link of a station.
 * @del_link_station: Remove a link of a station.
 *
 * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
 * @set_ttlm: set the TID to link mapping.
 */
struct cfg80211_ops {
        int        (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
        int        (*resume)(struct wiphy *wiphy);
        void        (*set_wakeup)(struct wiphy *wiphy, bool enabled);

        struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy,
                                                  const char *name,
                                                  unsigned char name_assign_type,
                                                  enum nl80211_iftype type,
                                                  struct vif_params *params);
        int        (*del_virtual_intf)(struct wiphy *wiphy,
                                    struct wireless_dev *wdev);
        int        (*change_virtual_intf)(struct wiphy *wiphy,
                                       struct net_device *dev,
                                       enum nl80211_iftype type,
                                       struct vif_params *params);

        int        (*add_intf_link)(struct wiphy *wiphy,
                                 struct wireless_dev *wdev,
                                 unsigned int link_id);
        void        (*del_intf_link)(struct wiphy *wiphy,
                                 struct wireless_dev *wdev,
                                 unsigned int link_id);

        int        (*add_key)(struct wiphy *wiphy, struct net_device *netdev,
                           int link_id, u8 key_index, bool pairwise,
                           const u8 *mac_addr, struct key_params *params);
        int        (*get_key)(struct wiphy *wiphy, struct net_device *netdev,
                           int link_id, u8 key_index, bool pairwise,
                           const u8 *mac_addr, void *cookie,
                           void (*callback)(void *cookie, struct key_params*));
        int        (*del_key)(struct wiphy *wiphy, struct net_device *netdev,
                           int link_id, u8 key_index, bool pairwise,
                           const u8 *mac_addr);
        int        (*set_default_key)(struct wiphy *wiphy,
                                   struct net_device *netdev, int link_id,
                                   u8 key_index, bool unicast, bool multicast);
        int        (*set_default_mgmt_key)(struct wiphy *wiphy,
                                        struct net_device *netdev, int link_id,
                                        u8 key_index);
        int        (*set_default_beacon_key)(struct wiphy *wiphy,
                                          struct net_device *netdev,
                                          int link_id,
                                          u8 key_index);

        int        (*start_ap)(struct wiphy *wiphy, struct net_device *dev,
                            struct cfg80211_ap_settings *settings);
        int        (*change_beacon)(struct wiphy *wiphy, struct net_device *dev,
                                 struct cfg80211_ap_update *info);
        int        (*stop_ap)(struct wiphy *wiphy, struct net_device *dev,
                           unsigned int link_id);


        int        (*add_station)(struct wiphy *wiphy, struct net_device *dev,
                               const u8 *mac,
                               struct station_parameters *params);
        int        (*del_station)(struct wiphy *wiphy, struct net_device *dev,
                               struct station_del_parameters *params);
        int        (*change_station)(struct wiphy *wiphy, struct net_device *dev,
                                  const u8 *mac,
                                  struct station_parameters *params);
        int        (*get_station)(struct wiphy *wiphy, struct net_device *dev,
                               const u8 *mac, struct station_info *sinfo);
        int        (*dump_station)(struct wiphy *wiphy, struct net_device *dev,
                                int idx, u8 *mac, struct station_info *sinfo);

        int        (*add_mpath)(struct wiphy *wiphy, struct net_device *dev,
                               const u8 *dst, const u8 *next_hop);
        int        (*del_mpath)(struct wiphy *wiphy, struct net_device *dev,
                               const u8 *dst);
        int        (*change_mpath)(struct wiphy *wiphy, struct net_device *dev,
                                  const u8 *dst, const u8 *next_hop);
        int        (*get_mpath)(struct wiphy *wiphy, struct net_device *dev,
                             u8 *dst, u8 *next_hop, struct mpath_info *pinfo);
        int        (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
                              int idx, u8 *dst, u8 *next_hop,
                              struct mpath_info *pinfo);
        int        (*get_mpp)(struct wiphy *wiphy, struct net_device *dev,
                           u8 *dst, u8 *mpp, struct mpath_info *pinfo);
        int        (*dump_mpp)(struct wiphy *wiphy, struct net_device *dev,
                            int idx, u8 *dst, u8 *mpp,
                            struct mpath_info *pinfo);
        int        (*get_mesh_config)(struct wiphy *wiphy,
                                struct net_device *dev,
                                struct mesh_config *conf);
        int        (*update_mesh_config)(struct wiphy *wiphy,
                                      struct net_device *dev, u32 mask,
                                      const struct mesh_config *nconf);
        int        (*join_mesh)(struct wiphy *wiphy, struct net_device *dev,
                             const struct mesh_config *conf,
                             const struct mesh_setup *setup);
        int        (*leave_mesh)(struct wiphy *wiphy, struct net_device *dev);

        int        (*join_ocb)(struct wiphy *wiphy, struct net_device *dev,
                            struct ocb_setup *setup);
        int        (*leave_ocb)(struct wiphy *wiphy, struct net_device *dev);

        int        (*change_bss)(struct wiphy *wiphy, struct net_device *dev,
                              struct bss_parameters *params);

        void        (*inform_bss)(struct wiphy *wiphy, struct cfg80211_bss *bss,
                              const struct cfg80211_bss_ies *ies, void *data);

        int        (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev,
                                  struct ieee80211_txq_params *params);

        int        (*libertas_set_mesh_channel)(struct wiphy *wiphy,
                                             struct net_device *dev,
                                             struct ieee80211_channel *chan);

        int        (*set_monitor_channel)(struct wiphy *wiphy,
                                       struct cfg80211_chan_def *chandef);

        int        (*scan)(struct wiphy *wiphy,
                        struct cfg80211_scan_request *request);
        void        (*abort_scan)(struct wiphy *wiphy, struct wireless_dev *wdev);

        int        (*auth)(struct wiphy *wiphy, struct net_device *dev,
                        struct cfg80211_auth_request *req);
        int        (*assoc)(struct wiphy *wiphy, struct net_device *dev,
                         struct cfg80211_assoc_request *req);
        int        (*deauth)(struct wiphy *wiphy, struct net_device *dev,
                          struct cfg80211_deauth_request *req);
        int        (*disassoc)(struct wiphy *wiphy, struct net_device *dev,
                            struct cfg80211_disassoc_request *req);

        int        (*connect)(struct wiphy *wiphy, struct net_device *dev,
                           struct cfg80211_connect_params *sme);
        int        (*update_connect_params)(struct wiphy *wiphy,
                                         struct net_device *dev,
                                         struct cfg80211_connect_params *sme,
                                         u32 changed);
        int        (*disconnect)(struct wiphy *wiphy, struct net_device *dev,
                              u16 reason_code);

        int        (*join_ibss)(struct wiphy *wiphy, struct net_device *dev,
                             struct cfg80211_ibss_params *params);
        int        (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);

        int        (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev,
                                  int rate[NUM_NL80211_BANDS]);

        int        (*set_wiphy_params)(struct wiphy *wiphy, u32 changed);

        int        (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
                                enum nl80211_tx_power_setting type, int mbm);
        int        (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
                                int *dbm);

        void        (*rfkill_poll)(struct wiphy *wiphy);

#ifdef CONFIG_NL80211_TESTMODE
        int        (*testmode_cmd)(struct wiphy *wiphy, struct wireless_dev *wdev,
                                void *data, int len);
        int        (*testmode_dump)(struct wiphy *wiphy, struct sk_buff *skb,
                                 struct netlink_callback *cb,
                                 void *data, int len);
#endif

        int        (*set_bitrate_mask)(struct wiphy *wiphy,
                                    struct net_device *dev,
                                    unsigned int link_id,
                                    const u8 *peer,
                                    const struct cfg80211_bitrate_mask *mask);

        int        (*dump_survey)(struct wiphy *wiphy, struct net_device *netdev,
                        int idx, struct survey_info *info);

        int        (*set_pmksa)(struct wiphy *wiphy, struct net_device *netdev,
                             struct cfg80211_pmksa *pmksa);
        int        (*del_pmksa)(struct wiphy *wiphy, struct net_device *netdev,
                             struct cfg80211_pmksa *pmksa);
        int        (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev);

        int        (*remain_on_channel)(struct wiphy *wiphy,
                                     struct wireless_dev *wdev,
                                     struct ieee80211_channel *chan,
                                     unsigned int duration,
                                     u64 *cookie);
        int        (*cancel_remain_on_channel)(struct wiphy *wiphy,
                                            struct wireless_dev *wdev,
                                            u64 cookie);

        int        (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev,
                           struct cfg80211_mgmt_tx_params *params,
                           u64 *cookie);
        int        (*mgmt_tx_cancel_wait)(struct wiphy *wiphy,
                                       struct wireless_dev *wdev,
                                       u64 cookie);

        int        (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
                                  bool enabled, int timeout);

        int        (*set_cqm_rssi_config)(struct wiphy *wiphy,
                                       struct net_device *dev,
                                       s32 rssi_thold, u32 rssi_hyst);

        int        (*set_cqm_rssi_range_config)(struct wiphy *wiphy,
                                             struct net_device *dev,
                                             s32 rssi_low, s32 rssi_high);

        int        (*set_cqm_txe_config)(struct wiphy *wiphy,
                                      struct net_device *dev,
                                      u32 rate, u32 pkts, u32 intvl);

        void        (*update_mgmt_frame_registrations)(struct wiphy *wiphy,
                                                   struct wireless_dev *wdev,
                                                   struct mgmt_frame_regs *upd);

        int        (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant);
        int        (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant);

        int        (*sched_scan_start)(struct wiphy *wiphy,
                                struct net_device *dev,
                                struct cfg80211_sched_scan_request *request);
        int        (*sched_scan_stop)(struct wiphy *wiphy, struct net_device *dev,
                                   u64 reqid);

        int        (*set_rekey_data)(struct wiphy *wiphy, struct net_device *dev,
                                  struct cfg80211_gtk_rekey_data *data);

        int        (*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
                             const u8 *peer, int link_id,
                             u8 action_code, u8 dialog_token, u16 status_code,
                             u32 peer_capability, bool initiator,
                             const u8 *buf, size_t len);
        int        (*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
                             const u8 *peer, enum nl80211_tdls_operation oper);

        int        (*probe_client)(struct wiphy *wiphy, struct net_device *dev,
                                const u8 *peer, u64 *cookie);

        int        (*set_noack_map)(struct wiphy *wiphy,
                                  struct net_device *dev,
                                  u16 noack_map);

        int        (*get_channel)(struct wiphy *wiphy,
                               struct wireless_dev *wdev,
                               unsigned int link_id,
                               struct cfg80211_chan_def *chandef);

        int        (*start_p2p_device)(struct wiphy *wiphy,
                                    struct wireless_dev *wdev);
        void        (*stop_p2p_device)(struct wiphy *wiphy,
                                   struct wireless_dev *wdev);

        int        (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev,
                               const struct cfg80211_acl_data *params);

        int        (*start_radar_detection)(struct wiphy *wiphy,
                                         struct net_device *dev,
                                         struct cfg80211_chan_def *chandef,
                                         u32 cac_time_ms);
        void        (*end_cac)(struct wiphy *wiphy,
                                struct net_device *dev);
        int        (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
                                 struct cfg80211_update_ft_ies_params *ftie);
        int        (*crit_proto_start)(struct wiphy *wiphy,
                                    struct wireless_dev *wdev,
                                    enum nl80211_crit_proto_id protocol,
                                    u16 duration);
        void        (*crit_proto_stop)(struct wiphy *wiphy,
                                   struct wireless_dev *wdev);
        int        (*set_coalesce)(struct wiphy *wiphy,
                                struct cfg80211_coalesce *coalesce);

        int        (*channel_switch)(struct wiphy *wiphy,
                                  struct net_device *dev,
                                  struct cfg80211_csa_settings *params);

        int     (*set_qos_map)(struct wiphy *wiphy,
                               struct net_device *dev,
                               struct cfg80211_qos_map *qos_map);

        int        (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
                                    unsigned int link_id,
                                    struct cfg80211_chan_def *chandef);

        int        (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
                             u8 tsid, const u8 *peer, u8 user_prio,
                             u16 admitted_time);
        int        (*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
                             u8 tsid, const u8 *peer);

        int        (*tdls_channel_switch)(struct wiphy *wiphy,
                                       struct net_device *dev,
                                       const u8 *addr, u8 oper_class,
                                       struct cfg80211_chan_def *chandef);
        void        (*tdls_cancel_channel_switch)(struct wiphy *wiphy,
                                              struct net_device *dev,
                                              const u8 *addr);
        int        (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev,
                             struct cfg80211_nan_conf *conf);
        void        (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev);
        int        (*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
                                struct cfg80211_nan_func *nan_func);
        void        (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
                               u64 cookie);
        int        (*nan_change_conf)(struct wiphy *wiphy,
                                   struct wireless_dev *wdev,
                                   struct cfg80211_nan_conf *conf,
                                   u32 changes);

        int        (*set_multicast_to_unicast)(struct wiphy *wiphy,
                                            struct net_device *dev,
                                            const bool enabled);

        int        (*get_txq_stats)(struct wiphy *wiphy,
                                 struct wireless_dev *wdev,
                                 struct cfg80211_txq_stats *txqstats);

        int        (*set_pmk)(struct wiphy *wiphy, struct net_device *dev,
                           const struct cfg80211_pmk_conf *conf);
        int        (*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
                           const u8 *aa);
        int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
                                 struct cfg80211_external_auth_params *params);

        int        (*tx_control_port)(struct wiphy *wiphy,
                                   struct net_device *dev,
                                   const u8 *buf, size_t len,
                                   const u8 *dest, const __be16 proto,
                                   const bool noencrypt, int link_id,
                                   u64 *cookie);

        int        (*get_ftm_responder_stats)(struct wiphy *wiphy,
                                struct net_device *dev,
                                struct cfg80211_ftm_responder_stats *ftm_stats);

        int        (*start_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev,
                              struct cfg80211_pmsr_request *request);
        void        (*abort_pmsr)(struct wiphy *wiphy, struct wireless_dev *wdev,
                              struct cfg80211_pmsr_request *request);
        int        (*update_owe_info)(struct wiphy *wiphy, struct net_device *dev,
                                   struct cfg80211_update_owe_info *owe_info);
        int        (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev,
                                   const u8 *buf, size_t len);
        int     (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev,
                                  struct cfg80211_tid_config *tid_conf);
        int        (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev,
                                    const u8 *peer, u8 tids);
        int        (*set_sar_specs)(struct wiphy *wiphy,
                                 struct cfg80211_sar_specs *sar);
        int        (*color_change)(struct wiphy *wiphy,
                                struct net_device *dev,
                                struct cfg80211_color_change_settings *params);
        int     (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev,
                                struct cfg80211_fils_aad *fils_aad);
        int        (*set_radar_background)(struct wiphy *wiphy,
                                        struct cfg80211_chan_def *chandef);
        int        (*add_link_station)(struct wiphy *wiphy, struct net_device *dev,
                                    struct link_station_parameters *params);
        int        (*mod_link_station)(struct wiphy *wiphy, struct net_device *dev,
                                    struct link_station_parameters *params);
        int        (*del_link_station)(struct wiphy *wiphy, struct net_device *dev,
                                    struct link_station_del_parameters *params);
        int        (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
                                    struct cfg80211_set_hw_timestamp *hwts);
        int        (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
                            struct cfg80211_ttlm_params *params);
};

/*
 * wireless hardware and networking interfaces structures
 * and registration/helper functions
 */

/**
 * enum wiphy_flags - wiphy capability flags
 *
 * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
 *         into two, first for legacy bands and second for 6 GHz.
 * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
 *        wiphy at all
 * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
 *        by default -- this flag will be set depending on the kernel's default
 *        on wiphy_new(), but can be changed by the driver if it has a good
 *        reason to override the default
 * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station
 *        on a VLAN interface). This flag also serves an extra purpose of
 *        supporting 4ADDR AP mode on devices which do not support AP/VLAN iftype.
 * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station
 * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the
 *        control port protocol ethertype. The device also honours the
 *        control_port_no_encrypt flag.
 * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
 * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
 *        auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
 * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
 *        firmware.
 * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
 * @WIPHY_FLAG_SUPPORTS_TDLS: The device supports TDLS (802.11z) operation.
 * @WIPHY_FLAG_TDLS_EXTERNAL_SETUP: The device does not handle TDLS (802.11z)
 *        link setup/discovery operations internally. Setup, discovery and
 *        teardown packets should be sent through the @NL80211_CMD_TDLS_MGMT
 *        command. When this flag is not set, @NL80211_CMD_TDLS_OPER should be
 *        used for asking the driver/firmware to perform a TDLS operation.
 * @WIPHY_FLAG_HAVE_AP_SME: device integrates AP SME
 * @WIPHY_FLAG_REPORTS_OBSS: the device will report beacons from other BSSes
 *        when there are virtual interfaces in AP mode by calling
 *        cfg80211_report_obss_beacon().
 * @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD: When operating as an AP, the device
 *        responds to probe-requests in hardware.
 * @WIPHY_FLAG_OFFCHAN_TX: Device supports direct off-channel TX.
 * @WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL: Device supports remain-on-channel call.
 * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
 * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
 *        beaconing mode (AP, IBSS, Mesh, ...).
 * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys
 * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs,
 *        in order to not have them reachable in normal drivers, until we have
 *        complete feature/interface combinations/etc. advertisement. No driver
 *        should set this flag for now.
 * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys.
 * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for
 *        NL80211_REGDOM_SET_BY_DRIVER.
 * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver
 *        set this flag to update channels on beacon hints.
 * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
 *        of an NSTR mobile AP MLD.
 * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device
 */
enum wiphy_flags {
        WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK                = BIT(0),
        WIPHY_FLAG_SUPPORTS_MLO                        = BIT(1),
        WIPHY_FLAG_SPLIT_SCAN_6GHZ                = BIT(2),
        WIPHY_FLAG_NETNS_OK                        = BIT(3),
        WIPHY_FLAG_PS_ON_BY_DEFAULT                = BIT(4),
        WIPHY_FLAG_4ADDR_AP                        = BIT(5),
        WIPHY_FLAG_4ADDR_STATION                = BIT(6),
        WIPHY_FLAG_CONTROL_PORT_PROTOCOL        = BIT(7),
        WIPHY_FLAG_IBSS_RSN                        = BIT(8),
        WIPHY_FLAG_DISABLE_WEXT                        = BIT(9),
        WIPHY_FLAG_MESH_AUTH                        = BIT(10),
        WIPHY_FLAG_SUPPORTS_EXT_KCK_32          = BIT(11),
        WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY        = BIT(12),
        WIPHY_FLAG_SUPPORTS_FW_ROAM                = BIT(13),
        WIPHY_FLAG_AP_UAPSD                        = BIT(14),
        WIPHY_FLAG_SUPPORTS_TDLS                = BIT(15),
        WIPHY_FLAG_TDLS_EXTERNAL_SETUP                = BIT(16),
        WIPHY_FLAG_HAVE_AP_SME                        = BIT(17),
        WIPHY_FLAG_REPORTS_OBSS                        = BIT(18),
        WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD        = BIT(19),
        WIPHY_FLAG_OFFCHAN_TX                        = BIT(20),
        WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL        = BIT(21),
        WIPHY_FLAG_SUPPORTS_5_10_MHZ                = BIT(22),
        WIPHY_FLAG_HAS_CHANNEL_SWITCH                = BIT(23),
        WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER        = BIT(24),
        WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON     = BIT(25),
};

/**
 * struct ieee80211_iface_limit - limit on certain interface types
 * @max: maximum number of interfaces of these types
 * @types: interface types (bits)
 */
struct ieee80211_iface_limit {
        u16 max;
        u16 types;
};

/**
 * struct ieee80211_iface_combination - possible interface combination
 *
 * With this structure the driver can describe which interface
 * combinations it supports concurrently.
 *
 * Examples:
 *
 * 1. Allow #STA <= 1, #AP <= 1, matching BI, channels = 1, 2 total:
 *
 *    .. code-block:: c
 *
 *        struct ieee80211_iface_limit limits1[] = {
 *                { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
 *                { .max = 1, .types = BIT(NL80211_IFTYPE_AP), },
 *        };
 *        struct ieee80211_iface_combination combination1 = {
 *                .limits = limits1,
 *                .n_limits = ARRAY_SIZE(limits1),
 *                .max_interfaces = 2,
 *                .beacon_int_infra_match = true,
 *        };
 *
 *
 * 2. Allow #{AP, P2P-GO} <= 8, channels = 1, 8 total:
 *
 *    .. code-block:: c
 *
 *        struct ieee80211_iface_limit limits2[] = {
 *                { .max = 8, .types = BIT(NL80211_IFTYPE_AP) |
 *                                     BIT(NL80211_IFTYPE_P2P_GO), },
 *        };
 *        struct ieee80211_iface_combination combination2 = {
 *                .limits = limits2,
 *                .n_limits = ARRAY_SIZE(limits2),
 *                .max_interfaces = 8,
 *                .num_different_channels = 1,
 *        };
 *
 *
 * 3. Allow #STA <= 1, #{P2P-client,P2P-GO} <= 3 on two channels, 4 total.
 *
 *    This allows for an infrastructure connection and three P2P connections.
 *
 *    .. code-block:: c
 *
 *        struct ieee80211_iface_limit limits3[] = {
 *                { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
 *                { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) |
 *                                     BIT(NL80211_IFTYPE_P2P_CLIENT), },
 *        };
 *        struct ieee80211_iface_combination combination3 = {
 *                .limits = limits3,
 *                .n_limits = ARRAY_SIZE(limits3),
 *                .max_interfaces = 4,
 *                .num_different_channels = 2,
 *        };
 *
 */
struct ieee80211_iface_combination {
        /**
         * @limits:
         * limits for the given interface types
         */
        const struct ieee80211_iface_limit *limits;

        /**
         * @num_different_channels:
         * can use up to this many different channels
         */
        u32 num_different_channels;

        /**
         * @max_interfaces:
         * maximum number of interfaces in total allowed in this group
         */
        u16 max_interfaces;

        /**
         * @n_limits:
         * number of limitations
         */
        u8 n_limits;

        /**
         * @beacon_int_infra_match:
         * In this combination, the beacon intervals between infrastructure
         * and AP types must match. This is required only in special cases.
         */
        bool beacon_int_infra_match;

        /**
         * @radar_detect_widths:
         * bitmap of channel widths supported for radar detection
         */
        u8 radar_detect_widths;

        /**
         * @radar_detect_regions:
         * bitmap of regions supported for radar detection
         */
        u8 radar_detect_regions;

        /**
         * @beacon_int_min_gcd:
         * This interface combination supports different beacon intervals.
         *
         * = 0
         *   all beacon intervals for different interface must be same.
         * > 0
         *   any beacon interval for the interface part of this combination AND
         *   GCD of all beacon intervals from beaconing interfaces of this
         *   combination must be greater or equal to this value.
         */
        u32 beacon_int_min_gcd;
};

struct ieee80211_txrx_stypes {
        u16 tx, rx;
};

/**
 * enum wiphy_wowlan_support_flags - WoWLAN support flags
 * @WIPHY_WOWLAN_ANY: supports wakeup for the special "any"
 *        trigger that keeps the device operating as-is and
 *        wakes up the host on any activity, for example a
 *        received packet that passed filtering; note that the
 *        packet should be preserved in that case
 * @WIPHY_WOWLAN_MAGIC_PKT: supports wakeup on magic packet
 *        (see nl80211.h)
 * @WIPHY_WOWLAN_DISCONNECT: supports wakeup on disconnect
 * @WIPHY_WOWLAN_SUPPORTS_GTK_REKEY: supports GTK rekeying while asleep
 * @WIPHY_WOWLAN_GTK_REKEY_FAILURE: supports wakeup on GTK rekey failure
 * @WIPHY_WOWLAN_EAP_IDENTITY_REQ: supports wakeup on EAP identity request
 * @WIPHY_WOWLAN_4WAY_HANDSHAKE: supports wakeup on 4-way handshake failure
 * @WIPHY_WOWLAN_RFKILL_RELEASE: supports wakeup on RF-kill release
 * @WIPHY_WOWLAN_NET_DETECT: supports wakeup on network detection
 */
enum wiphy_wowlan_support_flags {
        WIPHY_WOWLAN_ANY                = BIT(0),
        WIPHY_WOWLAN_MAGIC_PKT                = BIT(1),
        WIPHY_WOWLAN_DISCONNECT                = BIT(2),
        WIPHY_WOWLAN_SUPPORTS_GTK_REKEY        = BIT(3),
        WIPHY_WOWLAN_GTK_REKEY_FAILURE        = BIT(4),
        WIPHY_WOWLAN_EAP_IDENTITY_REQ        = BIT(5),
        WIPHY_WOWLAN_4WAY_HANDSHAKE        = BIT(6),
        WIPHY_WOWLAN_RFKILL_RELEASE        = BIT(7),
        WIPHY_WOWLAN_NET_DETECT                = BIT(8),
};

struct wiphy_wowlan_tcp_support {
        const struct nl80211_wowlan_tcp_data_token_feature *tok;
        u32 data_payload_max;
        u32 data_interval_max;
        u32 wake_payload_max;
        bool seq;
};

/**
 * struct wiphy_wowlan_support - WoWLAN support data
 * @flags: see &enum wiphy_wowlan_support_flags
 * @n_patterns: number of supported wakeup patterns
 *        (see nl80211.h for the pattern definition)
 * @pattern_max_len: maximum length of each pattern
 * @pattern_min_len: minimum length of each pattern
 * @max_pkt_offset: maximum Rx packet offset
 * @max_nd_match_sets: maximum number of matchsets for net-detect,
 *        similar, but not necessarily identical, to max_match_sets for
 *        scheduled scans.
 *        See &struct cfg80211_sched_scan_request.@match_sets for more
 *        details.
 * @tcp: TCP wakeup support information
 */
struct wiphy_wowlan_support {
        u32 flags;
        int n_patterns;
        int pattern_max_len;
        int pattern_min_len;
        int max_pkt_offset;
        int max_nd_match_sets;
        const struct wiphy_wowlan_tcp_support *tcp;
};

/**
 * struct wiphy_coalesce_support - coalesce support data
 * @n_rules: maximum number of coalesce rules
 * @max_delay: maximum supported coalescing delay in msecs
 * @n_patterns: number of supported patterns in a rule
 *        (see nl80211.h for the pattern definition)
 * @pattern_max_len: maximum length of each pattern
 * @pattern_min_len: minimum length of each pattern
 * @max_pkt_offset: maximum Rx packet offset
 */
struct wiphy_coalesce_support {
        int n_rules;
        int max_delay;
        int n_patterns;
        int pattern_max_len;
        int pattern_min_len;
        int max_pkt_offset;
};

/**
 * enum wiphy_vendor_command_flags - validation flags for vendor commands
 * @WIPHY_VENDOR_CMD_NEED_WDEV: vendor command requires wdev
 * @WIPHY_VENDOR_CMD_NEED_NETDEV: vendor command requires netdev
 * @WIPHY_VENDOR_CMD_NEED_RUNNING: interface/wdev must be up & running
 *        (must be combined with %_WDEV or %_NETDEV)
 */
enum wiphy_vendor_command_flags {
        WIPHY_VENDOR_CMD_NEED_WDEV = BIT(0),
        WIPHY_VENDOR_CMD_NEED_NETDEV = BIT(1),
        WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2),
};

/**
 * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
 *
 * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
 * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
 * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
 *
 */
enum wiphy_opmode_flag {
        STA_OPMODE_MAX_BW_CHANGED        = BIT(0),
        STA_OPMODE_SMPS_MODE_CHANGED        = BIT(1),
        STA_OPMODE_N_SS_CHANGED                = BIT(2),
};

/**
 * struct sta_opmode_info - Station's ht/vht operation mode information
 * @changed: contains value from &enum wiphy_opmode_flag
 * @smps_mode: New SMPS mode value from &enum nl80211_smps_mode of a station
 * @bw: new max bandwidth value from &enum nl80211_chan_width of a station
 * @rx_nss: new rx_nss value of a station
 */

struct sta_opmode_info {
        u32 changed;
        enum nl80211_smps_mode smps_mode;
        enum nl80211_chan_width bw;
        u8 rx_nss;
};

#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)(long)(-ENODATA))

/**
 * struct wiphy_vendor_command - vendor command definition
 * @info: vendor command identifying information, as used in nl80211
 * @flags: flags, see &enum wiphy_vendor_command_flags
 * @doit: callback for the operation, note that wdev is %NULL if the
 *        flags didn't ask for a wdev and non-%NULL otherwise; the data
 *        pointer may be %NULL if userspace provided no data at all
 * @dumpit: dump callback, for transferring bigger/multiple items. The
 *        @storage points to cb->args[5], ie. is preserved over the multiple
 *        dumpit calls.
 * @policy: policy pointer for attributes within %NL80211_ATTR_VENDOR_DATA.
 *        Set this to %VENDOR_CMD_RAW_DATA if no policy can be given and the
 *        attribute is just raw data (e.g. a firmware command).
 * @maxattr: highest attribute number in policy
 * It's recommended to not have the same sub command with both @doit and
 * @dumpit, so that userspace can assume certain ones are get and others
 * are used with dump requests.
 */
struct wiphy_vendor_command {
        struct nl80211_vendor_cmd_info info;
        u32 flags;
        int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev,
                    const void *data, int data_len);
        int (*dumpit)(struct wiphy *wiphy, struct wireless_dev *wdev,
                      struct sk_buff *skb, const void *data, int data_len,
                      unsigned long *storage);
        const struct nla_policy *policy;
        unsigned int maxattr;
};

/**
 * struct wiphy_iftype_ext_capab - extended capabilities per interface type
 * @iftype: interface type
 * @extended_capabilities: extended capabilities supported by the driver,
 *        additional capabilities might be supported by userspace; these are the
 *        802.11 extended capabilities ("Extended Capabilities element") and are
 *        in the same format as in the information element. See IEEE Std
 *        802.11-2012 8.4.2.29 for the defined fields.
 * @extended_capabilities_mask: mask of the valid values
 * @extended_capabilities_len: length of the extended capabilities
 * @eml_capabilities: EML capabilities (for MLO)
 * @mld_capa_and_ops: MLD capabilities and operations (for MLO)
 */
struct wiphy_iftype_ext_capab {
        enum nl80211_iftype iftype;
        const u8 *extended_capabilities;
        const u8 *extended_capabilities_mask;
        u8 extended_capabilities_len;
        u16 eml_capabilities;
        u16 mld_capa_and_ops;
};

/**
 * cfg80211_get_iftype_ext_capa - lookup interface type extended capability
 * @wiphy: the wiphy to look up from
 * @type: the interface type to look up
 */
const struct wiphy_iftype_ext_capab *
cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type);

/**
 * struct cfg80211_pmsr_capabilities - cfg80211 peer measurement capabilities
 * @max_peers: maximum number of peers in a single measurement
 * @report_ap_tsf: can report assoc AP's TSF for radio resource measurement
 * @randomize_mac_addr: can randomize MAC address for measurement
 * @ftm: FTM measurement data
 * @ftm.supported: FTM measurement is supported
 * @ftm.asap: ASAP-mode is supported
 * @ftm.non_asap: non-ASAP-mode is supported
 * @ftm.request_lci: can request LCI data
 * @ftm.request_civicloc: can request civic location data
 * @ftm.preambles: bitmap of preambles supported (&enum nl80211_preamble)
 * @ftm.bandwidths: bitmap of bandwidths supported (&enum nl80211_chan_width)
 * @ftm.max_bursts_exponent: maximum burst exponent supported
 *        (set to -1 if not limited; note that setting this will necessarily
 *        forbid using the value 15 to let the responder pick)
 * @ftm.max_ftms_per_burst: maximum FTMs per burst supported (set to 0 if
 *        not limited)
 * @ftm.trigger_based: trigger based ranging measurement is supported
 * @ftm.non_trigger_based: non trigger based ranging measurement is supported
 */
struct cfg80211_pmsr_capabilities {
        unsigned int max_peers;
        u8 report_ap_tsf:1,
           randomize_mac_addr:1;

        struct {
                u32 preambles;
                u32 bandwidths;
                s8 max_bursts_exponent;
                u8 max_ftms_per_burst;
                u8 supported:1,
                   asap:1,
                   non_asap:1,
                   request_lci:1,
                   request_civicloc:1,
                   trigger_based:1,
                   non_trigger_based:1;
        } ftm;
};

/**
 * struct wiphy_iftype_akm_suites - This structure encapsulates supported akm
 * suites for interface types defined in @iftypes_mask. Each type in the
 * @iftypes_mask must be unique across all instances of iftype_akm_suites.
 *
 * @iftypes_mask: bitmask of interfaces types
 * @akm_suites: points to an array of supported akm suites
 * @n_akm_suites: number of supported AKM suites
 */
struct wiphy_iftype_akm_suites {
        u16 iftypes_mask;
        const u32 *akm_suites;
        int n_akm_suites;
};

#define CFG80211_HW_TIMESTAMP_ALL_PEERS        0xffff

/**
 * struct wiphy - wireless hardware description
 * @mtx: mutex for the data (structures) of this device
 * @reg_notifier: the driver's regulatory notification callback,
 *        note that if your driver uses wiphy_apply_custom_regulatory()
 *        the reg_notifier's request can be passed as NULL
 * @regd: the driver's regulatory domain, if one was requested via
 *        the regulatory_hint() API. This can be used by the driver
 *        on the reg_notifier() if it chooses to ignore future
 *        regulatory domain changes caused by other drivers.
 * @signal_type: signal type reported in &struct cfg80211_bss.
 * @cipher_suites: supported cipher suites
 * @n_cipher_suites: number of supported cipher suites
 * @akm_suites: supported AKM suites. These are the default AKMs supported if
 *        the supported AKMs not advertized for a specific interface type in
 *        iftype_akm_suites.
 * @n_akm_suites: number of supported AKM suites
 * @iftype_akm_suites: array of supported akm suites info per interface type.
 *        Note that the bits in @iftypes_mask inside this structure cannot
 *        overlap (i.e. only one occurrence of each type is allowed across all
 *        instances of iftype_akm_suites).
 * @num_iftype_akm_suites: number of interface types for which supported akm
 *        suites are specified separately.
 * @retry_short: Retry limit for short frames (dot11ShortRetryLimit)
 * @retry_long: Retry limit for long frames (dot11LongRetryLimit)
 * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
 *        -1 = fragmentation disabled, only odd values >= 256 used
 * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled
 * @_net: the network namespace this wiphy currently lives in
 * @perm_addr: permanent MAC address of this device
 * @addr_mask: If the device supports multiple MAC addresses by masking,
 *        set this to a mask with variable bits set to 1, e.g. if the last
 *        four bits are variable then set it to 00-00-00-00-00-0f. The actual
 *        variable bits shall be determined by the interfaces added, with
 *        interfaces not matching the mask being rejected to be brought up.
 * @n_addresses: number of addresses in @addresses.
 * @addresses: If the device has more than one address, set this pointer
 *        to a list of addresses (6 bytes each). The first one will be used
 *        by default for perm_addr. In this case, the mask should be set to
 *        all-zeroes. In this case it is assumed that the device can handle
 *        the same number of arbitrary MAC addresses.
 * @registered: protects ->resume and ->suspend sysfs callbacks against
 *        unregister hardware
 * @debugfsdir: debugfs directory used for this wiphy (ieee80211/<wiphyname>).
 *        It will be renamed automatically on wiphy renames
 * @dev: (virtual) struct device for this wiphy. The item in
 *        /sys/class/ieee80211/ points to this. You need use set_wiphy_dev()
 *        (see below).
 * @wext: wireless extension handlers
 * @priv: driver private data (sized according to wiphy_new() parameter)
 * @interface_modes: bitmask of interfaces types valid for this wiphy,
 *        must be set by driver
 * @iface_combinations: Valid interface combinations array, should not
 *        list single interface types.
 * @n_iface_combinations: number of entries in @iface_combinations array.
 * @software_iftypes: bitmask of software interface types, these are not
 *        subject to any restrictions since they are purely managed in SW.
 * @flags: wiphy flags, see &enum wiphy_flags
 * @regulatory_flags: wiphy regulatory flags, see
 *        &enum ieee80211_regulatory_flags
 * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
 * @ext_features: extended features advertised to nl80211, see
 *        &enum nl80211_ext_feature_index.
 * @bss_priv_size: each BSS struct has private data allocated with it,
 *        this variable determines its size
 * @max_scan_ssids: maximum number of SSIDs the device can scan for in
 *        any given scan
 * @max_sched_scan_reqs: maximum number of scheduled scan requests that
 *        the device can run concurrently.
 * @max_sched_scan_ssids: maximum number of SSIDs the device can scan
 *        for in any given scheduled scan
 * @max_match_sets: maximum number of match sets the device can handle
 *        when performing a scheduled scan, 0 if filtering is not
 *        supported.
 * @max_scan_ie_len: maximum length of user-controlled IEs device can
 *        add to probe request frames transmitted during a scan, must not
 *        include fixed IEs like supported rates
 * @max_sched_scan_ie_len: same as max_scan_ie_len, but for scheduled
 *        scans
 * @max_sched_scan_plans: maximum number of scan plans (scan interval and number
 *        of iterations) for scheduled scan supported by the device.
 * @max_sched_scan_plan_interval: maximum interval (in seconds) for a
 *        single scan plan supported by the device.
 * @max_sched_scan_plan_iterations: maximum number of iterations for a single
 *        scan plan supported by the device.
 * @coverage_class: current coverage class
 * @fw_version: firmware version for ethtool reporting
 * @hw_version: hardware version for ethtool reporting
 * @max_num_pmkids: maximum number of PMKIDs supported by device
 * @privid: a pointer that drivers can use to identify if an arbitrary
 *        wiphy is theirs, e.g. in global notifiers
 * @bands: information about bands/channels supported by this device
 *
 * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or
 *        transmitted through nl80211, points to an array indexed by interface
 *        type
 *
 * @available_antennas_tx: bitmap of antennas which are available to be
 *        configured as TX antennas. Antenna configuration commands will be
 *        rejected unless this or @available_antennas_rx is set.
 *
 * @available_antennas_rx: bitmap of antennas which are available to be
 *        configured as RX antennas. Antenna configuration commands will be
 *        rejected unless this or @available_antennas_tx is set.
 *
 * @probe_resp_offload:
 *         Bitmap of supported protocols for probe response offloading.
 *         See &enum nl80211_probe_resp_offload_support_attr. Only valid
 *         when the wiphy flag @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD is set.
 *
 * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
 *        may request, if implemented.
 *
 * @wowlan: WoWLAN support information
 * @wowlan_config: current WoWLAN configuration; this should usually not be
 *        used since access to it is necessarily racy, use the parameter passed
 *        to the suspend() operation instead.
 *
 * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
 * @ht_capa_mod_mask:  Specify what ht_cap values can be over-ridden.
 *        If null, then none can be over-ridden.
 * @vht_capa_mod_mask:  Specify what VHT capabilities can be over-ridden.
 *        If null, then none can be over-ridden.
 *
 * @wdev_list: the list of associated (virtual) interfaces; this list must
 *        not be modified by the driver, but can be read with RTNL/RCU protection.
 *
 * @max_acl_mac_addrs: Maximum number of MAC addresses that the device
 *        supports for ACL.
 *
 * @extended_capabilities: extended capabilities supported by the driver,
 *        additional capabilities might be supported by userspace; these are
 *        the 802.11 extended capabilities ("Extended Capabilities element")
 *        and are in the same format as in the information element. See
 *        802.11-2012 8.4.2.29 for the defined fields. These are the default
 *        extended capabilities to be used if the capabilities are not specified
 *        for a specific interface type in iftype_ext_capab.
 * @extended_capabilities_mask: mask of the valid values
 * @extended_capabilities_len: length of the extended capabilities
 * @iftype_ext_capab: array of extended capabilities per interface type
 * @num_iftype_ext_capab: number of interface types for which extended
 *        capabilities are specified separately.
 * @coalesce: packet coalescing support information
 *
 * @vendor_commands: array of vendor commands supported by the hardware
 * @n_vendor_commands: number of vendor commands
 * @vendor_events: array of vendor events supported by the hardware
 * @n_vendor_events: number of vendor events
 *
 * @max_ap_assoc_sta: maximum number of associated stations supported in AP mode
 *        (including P2P GO) or 0 to indicate no such limit is advertised. The
 *        driver is allowed to advertise a theoretical limit that it can reach in
 *        some cases, but may not always reach.
 *
 * @max_num_csa_counters: Number of supported csa_counters in beacons
 *        and probe responses.  This value should be set if the driver
 *        wishes to limit the number of csa counters. Default (0) means
 *        infinite.
 * @bss_select_support: bitmask indicating the BSS selection criteria supported
 *        by the driver in the .connect() callback. The bit position maps to the
 *        attribute indices defined in &enum nl80211_bss_select_attr.
 *
 * @nan_supported_bands: bands supported by the device in NAN mode, a
 *        bitmap of &enum nl80211_band values.  For instance, for
 *        NL80211_BAND_2GHZ, bit 0 would be set
 *        (i.e. BIT(NL80211_BAND_2GHZ)).
 *
 * @txq_limit: configuration of internal TX queue frame limit
 * @txq_memory_limit: configuration internal TX queue memory limit
 * @txq_quantum: configuration of internal TX queue scheduler quantum
 *
 * @tx_queue_len: allow setting transmit queue len for drivers not using
 *        wake_tx_queue
 *
 * @support_mbssid: can HW support association with nontransmitted AP
 * @support_only_he_mbssid: don't parse MBSSID elements if it is not
 *        HE AP, in order to avoid compatibility issues.
 *        @support_mbssid must be set for this to have any effect.
 *
 * @pmsr_capa: peer measurement capabilities
 *
 * @tid_config_support: describes the per-TID config support that the
 *        device has
 * @tid_config_support.vif: bitmap of attributes (configurations)
 *        supported by the driver for each vif
 * @tid_config_support.peer: bitmap of attributes (configurations)
 *        supported by the driver for each peer
 * @tid_config_support.max_retry: maximum supported retry count for
 *        long/short retry configuration
 *
 * @max_data_retry_count: maximum supported per TID retry count for
 *        configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and
 *        %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes
 * @sar_capa: SAR control capabilities
 * @rfkill: a pointer to the rfkill structure
 *
 * @mbssid_max_interfaces: maximum number of interfaces supported by the driver
 *        in a multiple BSSID set. This field must be set to a non-zero value
 *        by the driver to advertise MBSSID support.
 * @ema_max_profile_periodicity: maximum profile periodicity supported by
 *        the driver. Setting this field to a non-zero value indicates that the
 *        driver supports enhanced multi-BSSID advertisements (EMA AP).
 * @max_num_akm_suites: maximum number of AKM suites allowed for
 *        configuration through %NL80211_CMD_CONNECT, %NL80211_CMD_ASSOCIATE and
 *        %NL80211_CMD_START_AP. Set to NL80211_MAX_NR_AKM_SUITES if not set by
 *        driver. If set by driver minimum allowed value is
 *        NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with
 *        legacy userspace and maximum allowed value is
 *        CFG80211_MAX_NUM_AKM_SUITES.
 *
 * @hw_timestamp_max_peers: maximum number of peers that the driver supports
 *        enabling HW timestamping for concurrently. Setting this field to a
 *        non-zero value indicates that the driver supports HW timestamping.
 *        A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver
 *        supports enabling HW timestamping for all peers (i.e. no need to
 *        specify a mac address).
 */
struct wiphy {
        struct mutex mtx;

        /* assign these fields before you register the wiphy */

        u8 perm_addr[ETH_ALEN];
        u8 addr_mask[ETH_ALEN];

        struct mac_address *addresses;

        const struct ieee80211_txrx_stypes *mgmt_stypes;

        const struct ieee80211_iface_combination *iface_combinations;
        int n_iface_combinations;
        u16 software_iftypes;

        u16 n_addresses;

        /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
        u16 interface_modes;

        u16 max_acl_mac_addrs;

        u32 flags, regulatory_flags, features;
        u8 ext_features[DIV_ROUND_UP(NUM_NL80211_EXT_FEATURES, 8)];

        u32 ap_sme_capa;

        enum cfg80211_signal_type signal_type;

        int bss_priv_size;
        u8 max_scan_ssids;
        u8 max_sched_scan_reqs;
        u8 max_sched_scan_ssids;
        u8 max_match_sets;
        u16 max_scan_ie_len;
        u16 max_sched_scan_ie_len;
        u32 max_sched_scan_plans;
        u32 max_sched_scan_plan_interval;
        u32 max_sched_scan_plan_iterations;

        int n_cipher_suites;
        const u32 *cipher_suites;

        int n_akm_suites;
        const u32 *akm_suites;

        const struct wiphy_iftype_akm_suites *iftype_akm_suites;
        unsigned int num_iftype_akm_suites;

        u8 retry_short;
        u8 retry_long;
        u32 frag_threshold;
        u32 rts_threshold;
        u8 coverage_class;

        char fw_version[ETHTOOL_FWVERS_LEN];
        u32 hw_version;

#ifdef CONFIG_PM
        const struct wiphy_wowlan_support *wowlan;
        struct cfg80211_wowlan *wowlan_config;
#endif

        u16 max_remain_on_channel_duration;

        u8 max_num_pmkids;

        u32 available_antennas_tx;
        u32 available_antennas_rx;

        u32 probe_resp_offload;

        const u8 *extended_capabilities, *extended_capabilities_mask;
        u8 extended_capabilities_len;

        const struct wiphy_iftype_ext_capab *iftype_ext_capab;
        unsigned int num_iftype_ext_capab;

        const void *privid;

        struct ieee80211_supported_band *bands[NUM_NL80211_BANDS];

        void (*reg_notifier)(struct wiphy *wiphy,
                             struct regulatory_request *request);

        /* fields below are read-only, assigned by cfg80211 */

        const struct ieee80211_regdomain __rcu *regd;

        struct device dev;

        bool registered;

        struct dentry *debugfsdir;

        const struct ieee80211_ht_cap *ht_capa_mod_mask;
        const struct ieee80211_vht_cap *vht_capa_mod_mask;

        struct list_head wdev_list;

        possible_net_t _net;

#ifdef CONFIG_CFG80211_WEXT
        const struct iw_handler_def *wext;
#endif

        const struct wiphy_coalesce_support *coalesce;

        const struct wiphy_vendor_command *vendor_commands;
        const struct nl80211_vendor_cmd_info *vendor_events;
        int n_vendor_commands, n_vendor_events;

        u16 max_ap_assoc_sta;

        u8 max_num_csa_counters;

        u32 bss_select_support;

        u8 nan_supported_bands;

        u32 txq_limit;
        u32 txq_memory_limit;
        u32 txq_quantum;

        unsigned long tx_queue_len;

        u8 support_mbssid:1,
           support_only_he_mbssid:1;

        const struct cfg80211_pmsr_capabilities *pmsr_capa;

        struct {
                u64 peer, vif;
                u8 max_retry;
        } tid_config_support;

        u8 max_data_retry_count;

        const struct cfg80211_sar_capa *sar_capa;

        struct rfkill *rfkill;

        u8 mbssid_max_interfaces;
        u8 ema_max_profile_periodicity;
        u16 max_num_akm_suites;

        u16 hw_timestamp_max_peers;

        char priv[] __aligned(NETDEV_ALIGN);
};

static inline struct net *wiphy_net(struct wiphy *wiphy)
{
        return read_pnet(&wiphy->_net);
}

static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
{
        write_pnet(&wiphy->_net, net);
}

/**
 * wiphy_priv - return priv from wiphy
 *
 * @wiphy: the wiphy whose priv pointer to return
 * Return: The priv of @wiphy.
 */
static inline void *wiphy_priv(struct wiphy *wiphy)
{
        BUG_ON(!wiphy);
        return &wiphy->priv;
}

/**
 * priv_to_wiphy - return the wiphy containing the priv
 *
 * @priv: a pointer previously returned by wiphy_priv
 * Return: The wiphy of @priv.
 */
static inline struct wiphy *priv_to_wiphy(void *priv)
{
        BUG_ON(!priv);
        return container_of(priv, struct wiphy, priv);
}

/**
 * set_wiphy_dev - set device pointer for wiphy
 *
 * @wiphy: The wiphy whose device to bind
 * @dev: The device to parent it to
 */
static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev)
{
        wiphy->dev.parent = dev;
}

/**
 * wiphy_dev - get wiphy dev pointer
 *
 * @wiphy: The wiphy whose device struct to look up
 * Return: The dev of @wiphy.
 */
static inline struct device *wiphy_dev(struct wiphy *wiphy)
{
        return wiphy->dev.parent;
}

/**
 * wiphy_name - get wiphy name
 *
 * @wiphy: The wiphy whose name to return
 * Return: The name of @wiphy.
 */
static inline const char *wiphy_name(const struct wiphy *wiphy)
{
        return dev_name(&wiphy->dev);
}

/**
 * wiphy_new_nm - create a new wiphy for use with cfg80211
 *
 * @ops: The configuration operations for this device
 * @sizeof_priv: The size of the private area to allocate
 * @requested_name: Request a particular name.
 *        NULL is valid value, and means use the default phy%d naming.
 *
 * Create a new wiphy and associate the given operations with it.
 * @sizeof_priv bytes are allocated for private use.
 *
 * Return: A pointer to the new wiphy. This pointer must be
 * assigned to each netdev's ieee80211_ptr for proper operation.
 */
struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
                           const char *requested_name);

/**
 * wiphy_new - create a new wiphy for use with cfg80211
 *
 * @ops: The configuration operations for this device
 * @sizeof_priv: The size of the private area to allocate
 *
 * Create a new wiphy and associate the given operations with it.
 * @sizeof_priv bytes are allocated for private use.
 *
 * Return: A pointer to the new wiphy. This pointer must be
 * assigned to each netdev's ieee80211_ptr for proper operation.
 */
static inline struct wiphy *wiphy_new(const struct cfg80211_ops *ops,
                                      int sizeof_priv)
{
        return wiphy_new_nm(ops, sizeof_priv, NULL);
}

/**
 * wiphy_register - register a wiphy with cfg80211
 *
 * @wiphy: The wiphy to register.
 *
 * Return: A non-negative wiphy index or a negative error code.
 */
int wiphy_register(struct wiphy *wiphy);

/* this is a define for better error reporting (file/line) */
#define lockdep_assert_wiphy(wiphy) lockdep_assert_held(&(wiphy)->mtx)

/**
 * rcu_dereference_wiphy - rcu_dereference with debug checking
 * @wiphy: the wiphy to check the locking on
 * @p: The pointer to read, prior to dereferencing
 *
 * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
 * or RTNL. Note: Please prefer wiphy_dereference() or rcu_dereference().
 */
#define rcu_dereference_wiphy(wiphy, p)                                \
        rcu_dereference_check(p, lockdep_is_held(&wiphy->mtx))

/**
 * wiphy_dereference - fetch RCU pointer when updates are prevented by wiphy mtx
 * @wiphy: the wiphy to check the locking on
 * @p: The pointer to read, prior to dereferencing
 *
 * Return the value of the specified RCU-protected pointer, but omit the
 * READ_ONCE(), because caller holds the wiphy mutex used for updates.
 */
#define wiphy_dereference(wiphy, p)                                \
        rcu_dereference_protected(p, lockdep_is_held(&wiphy->mtx))

/**
 * get_wiphy_regdom - get custom regdomain for the given wiphy
 * @wiphy: the wiphy to get the regdomain from
 */
const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);

/**
 * wiphy_unregister - deregister a wiphy from cfg80211
 *
 * @wiphy: The wiphy to unregister.
 *
 * After this call, no more requests can be made with this priv
 * pointer, but the call may sleep to wait for an outstanding
 * request that is being handled.
 */
void wiphy_unregister(struct wiphy *wiphy);

/**
 * wiphy_free - free wiphy
 *
 * @wiphy: The wiphy to free
 */
void wiphy_free(struct wiphy *wiphy);

/* internal structs */
struct cfg80211_conn;
struct cfg80211_internal_bss;
struct cfg80211_cached_keys;
struct cfg80211_cqm_config;

/**
 * wiphy_lock - lock the wiphy
 * @wiphy: the wiphy to lock
 *
 * This is needed around registering and unregistering netdevs that
 * aren't created through cfg80211 calls, since that requires locking
 * in cfg80211 when the notifiers is called, but that cannot
 * differentiate which way it's called.
 *
 * It can also be used by drivers for their own purposes.
 *
 * When cfg80211 ops are called, the wiphy is already locked.
 *
 * Note that this makes sure that no workers that have been queued
 * with wiphy_queue_work() are running.
 */
static inline void wiphy_lock(struct wiphy *wiphy)
        __acquires(&wiphy->mtx)
{
        mutex_lock(&wiphy->mtx);
        __acquire(&wiphy->mtx);
}

/**
 * wiphy_unlock - unlock the wiphy again
 * @wiphy: the wiphy to unlock
 */
static inline void wiphy_unlock(struct wiphy *wiphy)
        __releases(&wiphy->mtx)
{
        __release(&wiphy->mtx);
        mutex_unlock(&wiphy->mtx);
}

struct wiphy_work;
typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *);

struct wiphy_work {
        struct list_head entry;
        wiphy_work_func_t func;
};

static inline void wiphy_work_init(struct wiphy_work *work,
                                   wiphy_work_func_t func)
{
        INIT_LIST_HEAD(&work->entry);
        work->func = func;
}

/**
 * wiphy_work_queue - queue work for the wiphy
 * @wiphy: the wiphy to queue for
 * @work: the work item
 *
 * This is useful for work that must be done asynchronously, and work
 * queued here has the special property that the wiphy mutex will be
 * held as if wiphy_lock() was called, and that it cannot be running
 * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
 * use just cancel_work() instead of cancel_work_sync(), it requires
 * being in a section protected by wiphy_lock().
 */
void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work);

/**
 * wiphy_work_cancel - cancel previously queued work
 * @wiphy: the wiphy, for debug purposes
 * @work: the work to cancel
 *
 * Cancel the work *without* waiting for it, this assumes being
 * called under the wiphy mutex acquired by wiphy_lock().
 */
void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work);

/**
 * wiphy_work_flush - flush previously queued work
 * @wiphy: the wiphy, for debug purposes
 * @work: the work to flush, this can be %NULL to flush all work
 *
 * Flush the work (i.e. run it if pending). This must be called
 * under the wiphy mutex acquired by wiphy_lock().
 */
void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work);

struct wiphy_delayed_work {
        struct wiphy_work work;
        struct wiphy *wiphy;
        struct timer_list timer;
};

void wiphy_delayed_work_timer(struct timer_list *t);

static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
                                           wiphy_work_func_t func)
{
        timer_setup(&dwork->timer, wiphy_delayed_work_timer, 0);
        wiphy_work_init(&dwork->work, func);
}

/**
 * wiphy_delayed_work_queue - queue delayed work for the wiphy
 * @wiphy: the wiphy to queue for
 * @dwork: the delayable worker
 * @delay: number of jiffies to wait before queueing
 *
 * This is useful for work that must be done asynchronously, and work
 * queued here has the special property that the wiphy mutex will be
 * held as if wiphy_lock() was called, and that it cannot be running
 * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
 * use just cancel_work() instead of cancel_work_sync(), it requires
 * being in a section protected by wiphy_lock().
 */
void wiphy_delayed_work_queue(struct wiphy *wiphy,
                              struct wiphy_delayed_work *dwork,
                              unsigned long delay);

/**
 * wiphy_delayed_work_cancel - cancel previously queued delayed work
 * @wiphy: the wiphy, for debug purposes
 * @dwork: the delayed work to cancel
 *
 * Cancel the work *without* waiting for it, this assumes being
 * called under the wiphy mutex acquired by wiphy_lock().
 */
void wiphy_delayed_work_cancel(struct wiphy *wiphy,
                               struct wiphy_delayed_work *dwork);

/**
 * wiphy_delayed_work_flush - flush previously queued delayed work
 * @wiphy: the wiphy, for debug purposes
 * @dwork: the delayed work to flush
 *
 * Flush the work (i.e. run it if pending). This must be called
 * under the wiphy mutex acquired by wiphy_lock().
 */
void wiphy_delayed_work_flush(struct wiphy *wiphy,
                              struct wiphy_delayed_work *dwork);

/**
 * struct wireless_dev - wireless device state
 *
 * For netdevs, this structure must be allocated by the driver
 * that uses the ieee80211_ptr field in struct net_device (this
 * is intentional so it can be allocated along with the netdev.)
 * It need not be registered then as netdev registration will
 * be intercepted by cfg80211 to see the new wireless device,
 * however, drivers must lock the wiphy before registering or
 * unregistering netdevs if they pre-create any netdevs (in ops
 * called from cfg80211, the wiphy is already locked.)
 *
 * For non-netdev uses, it must also be allocated by the driver
 * in response to the cfg80211 callbacks that require it, as
 * there's no netdev registration in that case it may not be
 * allocated outside of callback operations that return it.
 *
 * @wiphy: pointer to hardware description
 * @iftype: interface type
 * @registered: is this wdev already registered with cfg80211
 * @registering: indicates we're doing registration under wiphy lock
 *        for the notifier
 * @list: (private) Used to collect the interfaces
 * @netdev: (private) Used to reference back to the netdev, may be %NULL
 * @identifier: (private) Identifier used in nl80211 to identify this
 *        wireless device if it has no netdev
 * @u: union containing data specific to @iftype
 * @connected: indicates if connected or not (STA mode)
 * @wext: (private) Used by the internal wireless extensions compat code
 * @wext.ibss: (private) IBSS data part of wext handling
 * @wext.connect: (private) connection handling data
 * @wext.keys: (private) (WEP) key data
 * @wext.ie: (private) extra elements for association
 * @wext.ie_len: (private) length of extra elements
 * @wext.bssid: (private) selected network BSSID
 * @wext.ssid: (private) selected network SSID
 * @wext.default_key: (private) selected default key index
 * @wext.default_mgmt_key: (private) selected default management key index
 * @wext.prev_bssid: (private) previous BSSID for reassociation
 * @wext.prev_bssid_valid: (private) previous BSSID validity
 * @use_4addr: indicates 4addr mode is used on this interface, must be
 *        set by driver (if supported) on add_interface BEFORE registering the
 *        netdev and may otherwise be used by driver read-only, will be update
 *        by cfg80211 on change_interface
 * @mgmt_registrations: list of registrations for management frames
 * @mgmt_registrations_need_update: mgmt registrations were updated,
 *        need to propagate the update to the driver
 * @address: The address for this device, valid only if @netdev is %NULL
 * @is_running: true if this is a non-netdev device that has been started, e.g.
 *        the P2P Device.
 * @cac_started: true if DFS channel availability check has been started
 * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
 * @cac_time_ms: CAC time in ms
 * @ps: powersave mode is enabled
 * @ps_timeout: dynamic powersave timeout
 * @ap_unexpected_nlportid: (private) netlink port ID of application
 *        registered for unexpected class 3 frames (AP mode)
 * @conn: (private) cfg80211 software SME connection state machine data
 * @connect_keys: (private) keys to set after connection is established
 * @conn_bss_type: connecting/connected BSS type
 * @conn_owner_nlportid: (private) connection owner socket port ID
 * @disconnect_wk: (private) auto-disconnect work
 * @disconnect_bssid: (private) the BSSID to use for auto-disconnect
 * @event_list: (private) list for internal event processing
 * @event_lock: (private) lock for event list
 * @owner_nlportid: (private) owner socket port ID
 * @nl_owner_dead: (private) owner socket went away
 * @cqm_rssi_work: (private) CQM RSSI reporting work
 * @cqm_config: (private) nl80211 RSSI monitor state
 * @pmsr_list: (private) peer measurement requests
 * @pmsr_lock: (private) peer measurements requests/results lock
 * @pmsr_free_wk: (private) peer measurements cleanup work
 * @unprot_beacon_reported: (private) timestamp of last
 *        unprotected beacon report
 * @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr
 *        @ap and @client for each link
 * @valid_links: bitmap describing what elements of @links are valid
 */
struct wireless_dev {
        struct wiphy *wiphy;
        enum nl80211_iftype iftype;

        /* the remainder of this struct should be private to cfg80211 */
        struct list_head list;
        struct net_device *netdev;

        u32 identifier;

        struct list_head mgmt_registrations;
        u8 mgmt_registrations_need_update:1;

        bool use_4addr, is_running, registered, registering;

        u8 address[ETH_ALEN] __aligned(sizeof(u16));

        /* currently used for IBSS and SME - might be rearranged later */
        struct cfg80211_conn *conn;
        struct cfg80211_cached_keys *connect_keys;
        enum ieee80211_bss_type conn_bss_type;
        u32 conn_owner_nlportid;

        struct work_struct disconnect_wk;
        u8 disconnect_bssid[ETH_ALEN];

        struct list_head event_list;
        spinlock_t event_lock;

        u8 connected:1;

        bool ps;
        int ps_timeout;

        u32 ap_unexpected_nlportid;

        u32 owner_nlportid;
        bool nl_owner_dead;

        /* FIXME: need to rework radar detection for MLO */
        bool cac_started;
        unsigned long cac_start_time;
        unsigned int cac_time_ms;

#ifdef CONFIG_CFG80211_WEXT
        /* wext data */
        struct {
                struct cfg80211_ibss_params ibss;
                struct cfg80211_connect_params connect;
                struct cfg80211_cached_keys *keys;
                const u8 *ie;
                size_t ie_len;
                u8 bssid[ETH_ALEN];
                u8 prev_bssid[ETH_ALEN];
                u8 ssid[IEEE80211_MAX_SSID_LEN];
                s8 default_key, default_mgmt_key;
                bool prev_bssid_valid;
        } wext;
#endif

        struct wiphy_work cqm_rssi_work;
        struct cfg80211_cqm_config __rcu *cqm_config;

        struct list_head pmsr_list;
        spinlock_t pmsr_lock;
        struct work_struct pmsr_free_wk;

        unsigned long unprot_beacon_reported;

        union {
                struct {
                        u8 connected_addr[ETH_ALEN] __aligned(2);
                        u8 ssid[IEEE80211_MAX_SSID_LEN];
                        u8 ssid_len;
                } client;
                struct {
                        int beacon_interval;
                        struct cfg80211_chan_def preset_chandef;
                        struct cfg80211_chan_def chandef;
                        u8 id[IEEE80211_MAX_MESH_ID_LEN];
                        u8 id_len, id_up_len;
                } mesh;
                struct {
                        struct cfg80211_chan_def preset_chandef;
                        u8 ssid[IEEE80211_MAX_SSID_LEN];
                        u8 ssid_len;
                } ap;
                struct {
                        struct cfg80211_internal_bss *current_bss;
                        struct cfg80211_chan_def chandef;
                        int beacon_interval;
                        u8 ssid[IEEE80211_MAX_SSID_LEN];
                        u8 ssid_len;
                } ibss;
                struct {
                        struct cfg80211_chan_def chandef;
                } ocb;
        } u;

        struct {
                u8 addr[ETH_ALEN] __aligned(2);
                union {
                        struct {
                                unsigned int beacon_interval;
                                struct cfg80211_chan_def chandef;
                        } ap;
                        struct {
                                struct cfg80211_internal_bss *current_bss;
                        } client;
                };
        } links[IEEE80211_MLD_MAX_NUM_LINKS];
        u16 valid_links;
};

static inline const u8 *wdev_address(struct wireless_dev *wdev)
{
        if (wdev->netdev)
                return wdev->netdev->dev_addr;
        return wdev->address;
}

static inline bool wdev_running(struct wireless_dev *wdev)
{
        if (wdev->netdev)
                return netif_running(wdev->netdev);
        return wdev->is_running;
}

/**
 * wdev_priv - return wiphy priv from wireless_dev
 *
 * @wdev: The wireless device whose wiphy's priv pointer to return
 * Return: The wiphy priv of @wdev.
 */
static inline void *wdev_priv(struct wireless_dev *wdev)
{
        BUG_ON(!wdev);
        return wiphy_priv(wdev->wiphy);
}

/**
 * wdev_chandef - return chandef pointer from wireless_dev
 * @wdev: the wdev
 * @link_id: the link ID for MLO
 *
 * Return: The chandef depending on the mode, or %NULL.
 */
struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
                                       unsigned int link_id);

static inline void WARN_INVALID_LINK_ID(struct wireless_dev *wdev,
                                        unsigned int link_id)
{
        WARN_ON(link_id && !wdev->valid_links);
        WARN_ON(wdev->valid_links &&
                !(wdev->valid_links & BIT(link_id)));
}

#define for_each_valid_link(link_info, link_id)                        \
        for (link_id = 0;                                        \
             link_id < ((link_info)->valid_links ?                \
                        ARRAY_SIZE((link_info)->links) : 1);        \
             link_id++)                                                \
                if (!(link_info)->valid_links ||                \
                    ((link_info)->valid_links & BIT(link_id)))

/**
 * DOC: Utility functions
 *
 * cfg80211 offers a number of utility functions that can be useful.
 */

/**
 * ieee80211_channel_equal - compare two struct ieee80211_channel
 *
 * @a: 1st struct ieee80211_channel
 * @b: 2nd struct ieee80211_channel
 * Return: true if center frequency of @a == @b
 */
static inline bool
ieee80211_channel_equal(struct ieee80211_channel *a,
                        struct ieee80211_channel *b)
{
        return (a->center_freq == b->center_freq &&
                a->freq_offset == b->freq_offset);
}

/**
 * ieee80211_channel_to_khz - convert ieee80211_channel to frequency in KHz
 * @chan: struct ieee80211_channel to convert
 * Return: The corresponding frequency (in KHz)
 */
static inline u32
ieee80211_channel_to_khz(const struct ieee80211_channel *chan)
{
        return MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset;
}

/**
 * ieee80211_s1g_channel_width - get allowed channel width from @chan
 *
 * Only allowed for band NL80211_BAND_S1GHZ
 * @chan: channel
 * Return: The allowed channel width for this center_freq
 */
enum nl80211_chan_width
ieee80211_s1g_channel_width(const struct ieee80211_channel *chan);

/**
 * ieee80211_channel_to_freq_khz - convert channel number to frequency
 * @chan: channel number
 * @band: band, necessary due to channel number overlap
 * Return: The corresponding frequency (in KHz), or 0 if the conversion failed.
 */
u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band);

/**
 * ieee80211_channel_to_frequency - convert channel number to frequency
 * @chan: channel number
 * @band: band, necessary due to channel number overlap
 * Return: The corresponding frequency (in MHz), or 0 if the conversion failed.
 */
static inline int
ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
{
        return KHZ_TO_MHZ(ieee80211_channel_to_freq_khz(chan, band));
}

/**
 * ieee80211_freq_khz_to_channel - convert frequency to channel number
 * @freq: center frequency in KHz
 * Return: The corresponding channel, or 0 if the conversion failed.
 */
int ieee80211_freq_khz_to_channel(u32 freq);

/**
 * ieee80211_frequency_to_channel - convert frequency to channel number
 * @freq: center frequency in MHz
 * Return: The corresponding channel, or 0 if the conversion failed.
 */
static inline int
ieee80211_frequency_to_channel(int freq)
{
        return ieee80211_freq_khz_to_channel(MHZ_TO_KHZ(freq));
}

/**
 * ieee80211_get_channel_khz - get channel struct from wiphy for specified
 * frequency
 * @wiphy: the struct wiphy to get the channel for
 * @freq: the center frequency (in KHz) of the channel
 * Return: The channel struct from @wiphy at @freq.
 */
struct ieee80211_channel *
ieee80211_get_channel_khz(struct wiphy *wiphy, u32 freq);

/**
 * ieee80211_get_channel - get channel struct from wiphy for specified frequency
 *
 * @wiphy: the struct wiphy to get the channel for
 * @freq: the center frequency (in MHz) of the channel
 * Return: The channel struct from @wiphy at @freq.
 */
static inline struct ieee80211_channel *
ieee80211_get_channel(struct wiphy *wiphy, int freq)
{
        return ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(freq));
}

/**
 * cfg80211_channel_is_psc - Check if the channel is a 6 GHz PSC
 * @chan: control channel to check
 *
 * The Preferred Scanning Channels (PSC) are defined in
 * Draft IEEE P802.11ax/D5.0, 26.17.2.3.3
 */
static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
{
        if (chan->band != NL80211_BAND_6GHZ)
                return false;

        return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5;
}

/**
 * ieee80211_get_response_rate - get basic rate for a given rate
 *
 * @sband: the band to look for rates in
 * @basic_rates: bitmap of basic rates
 * @bitrate: the bitrate for which to find the basic rate
 *
 * Return: The basic rate corresponding to a given bitrate, that
 * is the next lower bitrate contained in the basic rate map,
 * which is, for this function, given as a bitmap of indices of
 * rates in the band's bitrate table.
 */
const struct ieee80211_rate *
ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
                            u32 basic_rates, int bitrate);

/**
 * ieee80211_mandatory_rates - get mandatory rates for a given band
 * @sband: the band to look for rates in
 *
 * This function returns a bitmap of the mandatory rates for the given
 * band, bits are set according to the rate position in the bitrates array.
 */
u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband);

/*
 * Radiotap parsing functions -- for controlled injection support
 *
 * Implemented in net/wireless/radiotap.c
 * Documentation in Documentation/networking/radiotap-headers.rst
 */

struct radiotap_align_size {
        uint8_t align:4, size:4;
};

struct ieee80211_radiotap_namespace {
        const struct radiotap_align_size *align_size;
        int n_bits;
        uint32_t oui;
        uint8_t subns;
};

struct ieee80211_radiotap_vendor_namespaces {
        const struct ieee80211_radiotap_namespace *ns;
        int n_ns;
};

/**
 * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args
 * @this_arg_index: index of current arg, valid after each successful call
 *        to ieee80211_radiotap_iterator_next()
 * @this_arg: pointer to current radiotap arg; it is valid after each
 *        call to ieee80211_radiotap_iterator_next() but also after
 *        ieee80211_radiotap_iterator_init() where it will point to
 *        the beginning of the actual data portion
 * @this_arg_size: length of the current arg, for convenience
 * @current_namespace: pointer to the current namespace definition
 *        (or internally %NULL if the current namespace is unknown)
 * @is_radiotap_ns: indicates whether the current namespace is the default
 *        radiotap namespace or not
 *
 * @_rtheader: pointer to the radiotap header we are walking through
 * @_max_length: length of radiotap header in cpu byte ordering
 * @_arg_index: next argument index
 * @_arg: next argument pointer
 * @_next_bitmap: internal pointer to next present u32
 * @_bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present
 * @_vns: vendor namespace definitions
 * @_next_ns_data: beginning of the next namespace's data
 * @_reset_on_ext: internal; reset the arg index to 0 when going to the
 *        next bitmap word
 *
 * Describes the radiotap parser state. Fields prefixed with an underscore
 * must not be used by users of the parser, only by the parser internally.
 */

struct ieee80211_radiotap_iterator {
        struct ieee80211_radiotap_header *_rtheader;
        const struct ieee80211_radiotap_vendor_namespaces *_vns;
        const struct ieee80211_radiotap_namespace *current_namespace;

        unsigned char *_arg, *_next_ns_data;
        __le32 *_next_bitmap;

        unsigned char *this_arg;
        int this_arg_index;
        int this_arg_size;

        int is_radiotap_ns;

        int _max_length;
        int _arg_index;
        uint32_t _bitmap_shifter;
        int _reset_on_ext;
};

int
ieee80211_radiotap_iterator_init(struct ieee80211_radiotap_iterator *iterator,
                                 struct ieee80211_radiotap_header *radiotap_header,
                                 int max_length,
                                 const struct ieee80211_radiotap_vendor_namespaces *vns);

int
ieee80211_radiotap_iterator_next(struct ieee80211_radiotap_iterator *iterator);


extern const unsigned char rfc1042_header[6];
extern const unsigned char bridge_tunnel_header[6];

/**
 * ieee80211_get_hdrlen_from_skb - get header length from data
 *
 * @skb: the frame
 *
 * Given an skb with a raw 802.11 header at the data pointer this function
 * returns the 802.11 header length.
 *
 * Return: The 802.11 header length in bytes (not including encryption
 * headers). Or 0 if the data in the sk_buff is too short to contain a valid
 * 802.11 header.
 */
unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);

/**
 * ieee80211_hdrlen - get header length in bytes from frame control
 * @fc: frame control field in little-endian format
 * Return: The header length in bytes.
 */
unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc);

/**
 * ieee80211_get_mesh_hdrlen - get mesh extension header length
 * @meshhdr: the mesh extension header, only the flags field
 *        (first byte) will be accessed
 * Return: The length of the extension header, which is always at
 * least 6 bytes and at most 18 if address 5 and 6 are present.
 */
unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);

/**
 * DOC: Data path helpers
 *
 * In addition to generic utilities, cfg80211 also offers
 * functions that help implement the data path for devices
 * that do not do the 802.11/802.3 conversion on the device.
 */

/**
 * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3
 * @skb: the 802.11 data frame
 * @ehdr: pointer to a &struct ethhdr that will get the header, instead
 *        of it being pushed into the SKB
 * @addr: the device MAC address
 * @iftype: the virtual interface type
 * @data_offset: offset of payload after the 802.11 header
 * @is_amsdu: true if the 802.11 header is A-MSDU
 * Return: 0 on success. Non-zero on error.
 */
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
                                  const u8 *addr, enum nl80211_iftype iftype,
                                  u8 data_offset, bool is_amsdu);

/**
 * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
 * @skb: the 802.11 data frame
 * @addr: the device MAC address
 * @iftype: the virtual interface type
 * Return: 0 on success. Non-zero on error.
 */
static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
                                         enum nl80211_iftype iftype)
{
        return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
}

/**
 * ieee80211_is_valid_amsdu - check if subframe lengths of an A-MSDU are valid
 *
 * This is used to detect non-standard A-MSDU frames, e.g. the ones generated
 * by ath10k and ath11k, where the subframe length includes the length of the
 * mesh control field.
 *
 * @skb: The input A-MSDU frame without any headers.
 * @mesh_hdr: the type of mesh header to test
 *        0: non-mesh A-MSDU length field
 *        1: big-endian mesh A-MSDU length field
 *        2: little-endian mesh A-MSDU length field
 * Returns: true if subframe header lengths are valid for the @mesh_hdr mode
 */
bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr);

/**
 * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
 *
 * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames.
 * The @list will be empty if the decode fails. The @skb must be fully
 * header-less before being passed in here; it is freed in this function.
 *
 * @skb: The input A-MSDU frame without any headers.
 * @list: The output list of 802.3 frames. It must be allocated and
 *        initialized by the caller.
 * @addr: The device MAC address.
 * @iftype: The device interface type.
 * @extra_headroom: The hardware extra headroom for SKBs in the @list.
 * @check_da: DA to check in the inner ethernet header, or NULL
 * @check_sa: SA to check in the inner ethernet header, or NULL
 * @mesh_control: see mesh_hdr in ieee80211_is_valid_amsdu
 */
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
                              const u8 *addr, enum nl80211_iftype iftype,
                              const unsigned int extra_headroom,
                              const u8 *check_da, const u8 *check_sa,
                              u8 mesh_control);

/**
 * ieee80211_get_8023_tunnel_proto - get RFC1042 or bridge tunnel encap protocol
 *
 * Check for RFC1042 or bridge tunnel header and fetch the encapsulated
 * protocol.
 *
 * @hdr: pointer to the MSDU payload
 * @proto: destination pointer to store the protocol
 * Return: true if encapsulation was found
 */
bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto);

/**
 * ieee80211_strip_8023_mesh_hdr - strip mesh header from converted 802.3 frames
 *
 * Strip the mesh header, which was left in by ieee80211_data_to_8023 as part
 * of the MSDU data. Also move any source/destination addresses from the mesh
 * header to the ethernet header (if present).
 *
 * @skb: The 802.3 frame with embedded mesh header
 */
int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb);

/**
 * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame
 * @skb: the data frame
 * @qos_map: Interworking QoS mapping or %NULL if not in use
 * Return: The 802.1p/1d tag.
 */
unsigned int cfg80211_classify8021d(struct sk_buff *skb,
                                    struct cfg80211_qos_map *qos_map);

/**
 * cfg80211_find_elem_match - match information element and byte array in data
 *
 * @eid: element ID
 * @ies: data consisting of IEs
 * @len: length of data
 * @match: byte array to match
 * @match_len: number of bytes in the match array
 * @match_offset: offset in the IE data where the byte array should match.
 *        Note the difference to cfg80211_find_ie_match() which considers
 *        the offset to start from the element ID byte, but here we take
 *        the data portion instead.
 *
 * Return: %NULL if the element ID could not be found or if
 * the element is invalid (claims to be longer than the given
 * data) or if the byte array doesn't match; otherwise return the
 * requested element struct.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data and being large enough for the
 * byte array to match.
 */
const struct element *
cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len,
                         const u8 *match, unsigned int match_len,
                         unsigned int match_offset);

/**
 * cfg80211_find_ie_match - match information element and byte array in data
 *
 * @eid: element ID
 * @ies: data consisting of IEs
 * @len: length of data
 * @match: byte array to match
 * @match_len: number of bytes in the match array
 * @match_offset: offset in the IE where the byte array should match.
 *        If match_len is zero, this must also be set to zero.
 *        Otherwise this must be set to 2 or more, because the first
 *        byte is the element id, which is already compared to eid, and
 *        the second byte is the IE length.
 *
 * Return: %NULL if the element ID could not be found or if
 * the element is invalid (claims to be longer than the given
 * data) or if the byte array doesn't match, or a pointer to the first
 * byte of the requested element, that is the byte containing the
 * element ID.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data and being large enough for the
 * byte array to match.
 */
static inline const u8 *
cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len,
                       const u8 *match, unsigned int match_len,
                       unsigned int match_offset)
{
        /* match_offset can't be smaller than 2, unless match_len is
         * zero, in which case match_offset must be zero as well.
         */
        if (WARN_ON((match_len && match_offset < 2) ||
                    (!match_len && match_offset)))
                return NULL;

        return (const void *)cfg80211_find_elem_match(eid, ies, len,
                                                      match, match_len,
                                                      match_offset ?
                                                        match_offset - 2 : 0);
}

/**
 * cfg80211_find_elem - find information element in data
 *
 * @eid: element ID
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the element ID could not be found or if
 * the element is invalid (claims to be longer than the given
 * data) or if the byte array doesn't match; otherwise return the
 * requested element struct.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data.
 */
static inline const struct element *
cfg80211_find_elem(u8 eid, const u8 *ies, int len)
{
        return cfg80211_find_elem_match(eid, ies, len, NULL, 0, 0);
}

/**
 * cfg80211_find_ie - find information element in data
 *
 * @eid: element ID
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the element ID could not be found or if
 * the element is invalid (claims to be longer than the given
 * data), or a pointer to the first byte of the requested
 * element, that is the byte containing the element ID.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data.
 */
static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
{
        return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0);
}

/**
 * cfg80211_find_ext_elem - find information element with EID Extension in data
 *
 * @ext_eid: element ID Extension
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the extended element could not be found or if
 * the element is invalid (claims to be longer than the given
 * data) or if the byte array doesn't match; otherwise return the
 * requested element struct.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data.
 */
static inline const struct element *
cfg80211_find_ext_elem(u8 ext_eid, const u8 *ies, int len)
{
        return cfg80211_find_elem_match(WLAN_EID_EXTENSION, ies, len,
                                        &ext_eid, 1, 0);
}

/**
 * cfg80211_find_ext_ie - find information element with EID Extension in data
 *
 * @ext_eid: element ID Extension
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the extended element ID could not be found or if
 * the element is invalid (claims to be longer than the given
 * data), or a pointer to the first byte of the requested
 * element, that is the byte containing the element ID.
 *
 * Note: There are no checks on the element length other than
 * having to fit into the given data.
 */
static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
{
        return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len,
                                      &ext_eid, 1, 2);
}

/**
 * cfg80211_find_vendor_elem - find vendor specific information element in data
 *
 * @oui: vendor OUI
 * @oui_type: vendor-specific OUI type (must be < 0xff), negative means any
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the vendor specific element ID could not be found or if the
 * element is invalid (claims to be longer than the given data); otherwise
 * return the element structure for the requested element.
 *
 * Note: There are no checks on the element length other than having to fit into
 * the given data.
 */
const struct element *cfg80211_find_vendor_elem(unsigned int oui, int oui_type,
                                                const u8 *ies,
                                                unsigned int len);

/**
 * cfg80211_find_vendor_ie - find vendor specific information element in data
 *
 * @oui: vendor OUI
 * @oui_type: vendor-specific OUI type (must be < 0xff), negative means any
 * @ies: data consisting of IEs
 * @len: length of data
 *
 * Return: %NULL if the vendor specific element ID could not be found or if the
 * element is invalid (claims to be longer than the given data), or a pointer to
 * the first byte of the requested element, that is the byte containing the
 * element ID.
 *
 * Note: There are no checks on the element length other than having to fit into
 * the given data.
 */
static inline const u8 *
cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
                        const u8 *ies, unsigned int len)
{
        return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len);
}

/**
 * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state
 * @RNR_ITER_CONTINUE: continue iterating with the next entry
 * @RNR_ITER_BREAK: break iteration and return success
 * @RNR_ITER_ERROR: break iteration and return error
 */
enum cfg80211_rnr_iter_ret {
        RNR_ITER_CONTINUE,
        RNR_ITER_BREAK,
        RNR_ITER_ERROR,
};

/**
 * cfg80211_iter_rnr - iterate reduced neighbor report entries
 * @elems: the frame elements to iterate RNR elements and then
 *        their entries in
 * @elems_len: length of the elements
 * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret
 *        for the return value
 * @iter_data: additional data passed to the iteration function
 * Return: %true on success (after successfully iterating all entries
 *        or if the iteration function returned %RNR_ITER_BREAK),
 *        %false on error (iteration function returned %RNR_ITER_ERROR
 *        or elements were malformed.)
 */
bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
                       enum cfg80211_rnr_iter_ret
                       (*iter)(void *data, u8 type,
                               const struct ieee80211_neighbor_ap_info *info,
                               const u8 *tbtt_info, u8 tbtt_info_len),
                       void *iter_data);

/**
 * cfg80211_defragment_element - Defrag the given element data into a buffer
 *
 * @elem: the element to defragment
 * @ies: elements where @elem is contained
 * @ieslen: length of @ies
 * @data: buffer to store element data, or %NULL to just determine size
 * @data_len: length of @data, or 0
 * @frag_id: the element ID of fragments
 *
 * Return: length of @data, or -EINVAL on error
 *
 * Copy out all data from an element that may be fragmented into @data, while
 * skipping all headers.
 *
 * The function uses memmove() internally. It is acceptable to defragment an
 * element in-place.
 */
ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies,
                                    size_t ieslen, u8 *data, size_t data_len,
                                    u8 frag_id);

/**
 * cfg80211_send_layer2_update - send layer 2 update frame
 *
 * @dev: network device
 * @addr: STA MAC address
 *
 * Wireless drivers can use this function to update forwarding tables in bridge
 * devices upon STA association.
 */
void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr);

/**
 * DOC: Regulatory enforcement infrastructure
 *
 * TODO
 */

/**
 * regulatory_hint - driver hint to the wireless core a regulatory domain
 * @wiphy: the wireless device giving the hint (used only for reporting
 *        conflicts)
 * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
 *        should be in. If @rd is set this should be NULL. Note that if you
 *        set this to NULL you should still set rd->alpha2 to some accepted
 *        alpha2.
 *
 * Wireless drivers can use this function to hint to the wireless core
 * what it believes should be the current regulatory domain by
 * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
 * domain should be in or by providing a completely build regulatory domain.
 * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried
 * for a regulatory domain structure for the respective country.
 *
 * The wiphy must have been registered to cfg80211 prior to this call.
 * For cfg80211 drivers this means you must first use wiphy_register(),
 * for mac80211 drivers you must first use ieee80211_register_hw().
 *
 * Drivers should check the return value, its possible you can get
 * an -ENOMEM.
 *
 * Return: 0 on success. -ENOMEM.
 */
int regulatory_hint(struct wiphy *wiphy, const char *alpha2);

/**
 * regulatory_set_wiphy_regd - set regdom info for self managed drivers
 * @wiphy: the wireless device we want to process the regulatory domain on
 * @rd: the regulatory domain information to use for this wiphy
 *
 * Set the regulatory domain information for self-managed wiphys, only they
 * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
 * information.
 *
 * Return: 0 on success. -EINVAL, -EPERM
 */
int regulatory_set_wiphy_regd(struct wiphy *wiphy,
                              struct ieee80211_regdomain *rd);

/**
 * regulatory_set_wiphy_regd_sync - set regdom for self-managed drivers
 * @wiphy: the wireless device we want to process the regulatory domain on
 * @rd: the regulatory domain information to use for this wiphy
 *
 * This functions requires the RTNL and the wiphy mutex to be held and
 * applies the new regdomain synchronously to this wiphy. For more details
 * see regulatory_set_wiphy_regd().
 *
 * Return: 0 on success. -EINVAL, -EPERM
 */
int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy,
                                   struct ieee80211_regdomain *rd);

/**
 * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
 * @wiphy: the wireless device we want to process the regulatory domain on
 * @regd: the custom regulatory domain to use for this wiphy
 *
 * Drivers can sometimes have custom regulatory domains which do not apply
 * to a specific country. Drivers can use this to apply such custom regulatory
 * domains. This routine must be called prior to wiphy registration. The
 * custom regulatory domain will be trusted completely and as such previous
 * default channel settings will be disregarded. If no rule is found for a
 * channel on the regulatory domain the channel will be disabled.
 * Drivers using this for a wiphy should also set the wiphy flag
 * REGULATORY_CUSTOM_REG or cfg80211 will set it for the wiphy
 * that called this helper.
 */
void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
                                   const struct ieee80211_regdomain *regd);

/**
 * freq_reg_info - get regulatory information for the given frequency
 * @wiphy: the wiphy for which we want to process this rule for
 * @center_freq: Frequency in KHz for which we want regulatory information for
 *
 * Use this function to get the regulatory rule for a specific frequency on
 * a given wireless device. If the device has a specific regulatory domain
 * it wants to follow we respect that unless a country IE has been received
 * and processed already.
 *
 * Return: A valid pointer, or, when an error occurs, for example if no rule
 * can be found, the return value is encoded using ERR_PTR(). Use IS_ERR() to
 * check and PTR_ERR() to obtain the numeric return value. The numeric return
 * value will be -ERANGE if we determine the given center_freq does not even
 * have a regulatory rule for a frequency range in the center_freq's band.
 * See freq_in_rule_band() for our current definition of a band -- this is
 * purely subjective and right now it's 802.11 specific.
 */
const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
                                               u32 center_freq);

/**
 * reg_initiator_name - map regulatory request initiator enum to name
 * @initiator: the regulatory request initiator
 *
 * You can use this to map the regulatory request initiator enum to a
 * proper string representation.
 */
const char *reg_initiator_name(enum nl80211_reg_initiator initiator);

/**
 * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom
 * @wiphy: wiphy for which pre-CAC capability is checked.
 *
 * Pre-CAC is allowed only in some regdomains (notable ETSI).
 */
bool regulatory_pre_cac_allowed(struct wiphy *wiphy);

/**
 * DOC: Internal regulatory db functions
 *
 */

/**
 * reg_query_regdb_wmm -  Query internal regulatory db for wmm rule
 * Regulatory self-managed driver can use it to proactively
 *
 * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
 * @freq: the frequency (in MHz) to be queried.
 * @rule: pointer to store the wmm rule from the regulatory db.
 *
 * Self-managed wireless drivers can use this function to  query
 * the internal regulatory database to check whether the given
 * ISO/IEC 3166 alpha2 country and freq have wmm rule limitations.
 *
 * Drivers should check the return value, its possible you can get
 * an -ENODATA.
 *
 * Return: 0 on success. -ENODATA.
 */
int reg_query_regdb_wmm(char *alpha2, int freq,
                        struct ieee80211_reg_rule *rule);

/*
 * callbacks for asynchronous cfg80211 methods, notification
 * functions and BSS handling helpers
 */

/**
 * cfg80211_scan_done - notify that scan finished
 *
 * @request: the corresponding scan request
 * @info: information about the completed scan
 */
void cfg80211_scan_done(struct cfg80211_scan_request *request,
                        struct cfg80211_scan_info *info);

/**
 * cfg80211_sched_scan_results - notify that new scan results are available
 *
 * @wiphy: the wiphy which got scheduled scan results
 * @reqid: identifier for the related scheduled scan request
 */
void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid);

/**
 * cfg80211_sched_scan_stopped - notify that the scheduled scan has stopped
 *
 * @wiphy: the wiphy on which the scheduled scan stopped
 * @reqid: identifier for the related scheduled scan request
 *
 * The driver can call this function to inform cfg80211 that the
 * scheduled scan had to be stopped, for whatever reason.  The driver
 * is then called back via the sched_scan_stop operation when done.
 */
void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid);

/**
 * cfg80211_sched_scan_stopped_locked - notify that the scheduled scan has stopped
 *
 * @wiphy: the wiphy on which the scheduled scan stopped
 * @reqid: identifier for the related scheduled scan request
 *
 * The driver can call this function to inform cfg80211 that the
 * scheduled scan had to be stopped, for whatever reason.  The driver
 * is then called back via the sched_scan_stop operation when done.
 * This function should be called with the wiphy mutex held.
 */
void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid);

/**
 * cfg80211_inform_bss_frame_data - inform cfg80211 of a received BSS frame
 * @wiphy: the wiphy reporting the BSS
 * @data: the BSS metadata
 * @mgmt: the management frame (probe response or beacon)
 * @len: length of the management frame
 * @gfp: context flags
 *
 * This informs cfg80211 that BSS information was found and
 * the BSS should be updated/added.
 *
 * Return: A referenced struct, must be released with cfg80211_put_bss()!
 * Or %NULL on error.
 */
struct cfg80211_bss * __must_check
cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
                               struct cfg80211_inform_bss *data,
                               struct ieee80211_mgmt *mgmt, size_t len,
                               gfp_t gfp);

static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss_frame(struct wiphy *wiphy,
                          struct ieee80211_channel *rx_channel,
                          struct ieee80211_mgmt *mgmt, size_t len,
                          s32 signal, gfp_t gfp)
{
        struct cfg80211_inform_bss data = {
                .chan = rx_channel,
                .signal = signal,
        };

        return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp);
}

/**
 * cfg80211_gen_new_bssid - generate a nontransmitted BSSID for multi-BSSID
 * @bssid: transmitter BSSID
 * @max_bssid: max BSSID indicator, taken from Multiple BSSID element
 * @mbssid_index: BSSID index, taken from Multiple BSSID index element
 * @new_bssid: calculated nontransmitted BSSID
 */
static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
                                          u8 mbssid_index, u8 *new_bssid)
{
        u64 bssid_u64 = ether_addr_to_u64(bssid);
        u64 mask = GENMASK_ULL(max_bssid - 1, 0);
        u64 new_bssid_u64;

        new_bssid_u64 = bssid_u64 & ~mask;

        new_bssid_u64 |= ((bssid_u64 & mask) + mbssid_index) & mask;

        u64_to_ether_addr(new_bssid_u64, new_bssid);
}

/**
 * cfg80211_is_element_inherited - returns if element ID should be inherited
 * @element: element to check
 * @non_inherit_element: non inheritance element
 */
bool cfg80211_is_element_inherited(const struct element *element,
                                   const struct element *non_inherit_element);

/**
 * cfg80211_merge_profile - merges a MBSSID profile if it is split between IEs
 * @ie: ies
 * @ielen: length of IEs
 * @mbssid_elem: current MBSSID element
 * @sub_elem: current MBSSID subelement (profile)
 * @merged_ie: location of the merged profile
 * @max_copy_len: max merged profile length
 */
size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
                              const struct element *mbssid_elem,
                              const struct element *sub_elem,
                              u8 *merged_ie, size_t max_copy_len);

/**
 * enum cfg80211_bss_frame_type - frame type that the BSS data came from
 * @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is
 *        from a beacon or probe response
 * @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon
 * @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response
 * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon
 */
enum cfg80211_bss_frame_type {
        CFG80211_BSS_FTYPE_UNKNOWN,
        CFG80211_BSS_FTYPE_BEACON,
        CFG80211_BSS_FTYPE_PRESP,
        CFG80211_BSS_FTYPE_S1G_BEACON,
};

/**
 * cfg80211_get_ies_channel_number - returns the channel number from ies
 * @ie: IEs
 * @ielen: length of IEs
 * @band: enum nl80211_band of the channel
 *
 * Returns the channel number, or -1 if none could be determined.
 */
int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
                                    enum nl80211_band band);

/**
 * cfg80211_ssid_eq - compare two SSIDs
 * @a: first SSID
 * @b: second SSID
 *
 * Return: %true if SSIDs are equal, %false otherwise.
 */
static inline bool
cfg80211_ssid_eq(struct cfg80211_ssid *a, struct cfg80211_ssid *b)
{
        if (WARN_ON(!a || !b))
                return false;
        if (a->ssid_len != b->ssid_len)
                return false;
        return memcmp(a->ssid, b->ssid, a->ssid_len) ? false : true;
}

/**
 * cfg80211_inform_bss_data - inform cfg80211 of a new BSS
 *
 * @wiphy: the wiphy reporting the BSS
 * @data: the BSS metadata
 * @ftype: frame type (if known)
 * @bssid: the BSSID of the BSS
 * @tsf: the TSF sent by the peer in the beacon/probe response (or 0)
 * @capability: the capability field sent by the peer
 * @beacon_interval: the beacon interval announced by the peer
 * @ie: additional IEs sent by the peer
 * @ielen: length of the additional IEs
 * @gfp: context flags
 *
 * This informs cfg80211 that BSS information was found and
 * the BSS should be updated/added.
 *
 * Return: A referenced struct, must be released with cfg80211_put_bss()!
 * Or %NULL on error.
 */
struct cfg80211_bss * __must_check
cfg80211_inform_bss_data(struct wiphy *wiphy,
                         struct cfg80211_inform_bss *data,
                         enum cfg80211_bss_frame_type ftype,
                         const u8 *bssid, u64 tsf, u16 capability,
                         u16 beacon_interval, const u8 *ie, size_t ielen,
                         gfp_t gfp);

static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss(struct wiphy *wiphy,
                    struct ieee80211_channel *rx_channel,
                    enum cfg80211_bss_frame_type ftype,
                    const u8 *bssid, u64 tsf, u16 capability,
                    u16 beacon_interval, const u8 *ie, size_t ielen,
                    s32 signal, gfp_t gfp)
{
        struct cfg80211_inform_bss data = {
                .chan = rx_channel,
                .signal = signal,
        };

        return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf,
                                        capability, beacon_interval, ie, ielen,
                                        gfp);
}

/**
 * __cfg80211_get_bss - get a BSS reference
 * @wiphy: the wiphy this BSS struct belongs to
 * @channel: the channel to search on (or %NULL)
 * @bssid: the desired BSSID (or %NULL)
 * @ssid: the desired SSID (or %NULL)
 * @ssid_len: length of the SSID (or 0)
 * @bss_type: type of BSS, see &enum ieee80211_bss_type
 * @privacy: privacy filter, see &enum ieee80211_privacy
 * @use_for: indicates which use is intended
 */
struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
                                        struct ieee80211_channel *channel,
                                        const u8 *bssid,
                                        const u8 *ssid, size_t ssid_len,
                                        enum ieee80211_bss_type bss_type,
                                        enum ieee80211_privacy privacy,
                                        u32 use_for);

/**
 * cfg80211_get_bss - get a BSS reference
 * @wiphy: the wiphy this BSS struct belongs to
 * @channel: the channel to search on (or %NULL)
 * @bssid: the desired BSSID (or %NULL)
 * @ssid: the desired SSID (or %NULL)
 * @ssid_len: length of the SSID (or 0)
 * @bss_type: type of BSS, see &enum ieee80211_bss_type
 * @privacy: privacy filter, see &enum ieee80211_privacy
 *
 * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL.
 */
static inline struct cfg80211_bss *
cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel,
                 const u8 *bssid, const u8 *ssid, size_t ssid_len,
                 enum ieee80211_bss_type bss_type,
                 enum ieee80211_privacy privacy)
{
        return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len,
                                  bss_type, privacy,
                                  NL80211_BSS_USE_FOR_NORMAL);
}

static inline struct cfg80211_bss *
cfg80211_get_ibss(struct wiphy *wiphy,
                  struct ieee80211_channel *channel,
                  const u8 *ssid, size_t ssid_len)
{
        return cfg80211_get_bss(wiphy, channel, NULL, ssid, ssid_len,
                                IEEE80211_BSS_TYPE_IBSS,
                                IEEE80211_PRIVACY_ANY);
}

/**
 * cfg80211_ref_bss - reference BSS struct
 * @wiphy: the wiphy this BSS struct belongs to
 * @bss: the BSS struct to reference
 *
 * Increments the refcount of the given BSS struct.
 */
void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);

/**
 * cfg80211_put_bss - unref BSS struct
 * @wiphy: the wiphy this BSS struct belongs to
 * @bss: the BSS struct
 *
 * Decrements the refcount of the given BSS struct.
 */
void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);

/**
 * cfg80211_unlink_bss - unlink BSS from internal data structures
 * @wiphy: the wiphy
 * @bss: the bss to remove
 *
 * This function removes the given BSS from the internal data structures
 * thereby making it no longer show up in scan results etc. Use this
 * function when you detect a BSS is gone. Normally BSSes will also time
 * out, so it is not necessary to use this function at all.
 */
void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);

/**
 * cfg80211_bss_iter - iterate all BSS entries
 *
 * This function iterates over the BSS entries associated with the given wiphy
 * and calls the callback for the iterated BSS. The iterator function is not
 * allowed to call functions that might modify the internal state of the BSS DB.
 *
 * @wiphy: the wiphy
 * @chandef: if given, the iterator function will be called only if the channel
 *     of the currently iterated BSS is a subset of the given channel.
 * @iter: the iterator function to call
 * @iter_data: an argument to the iterator function
 */
void cfg80211_bss_iter(struct wiphy *wiphy,
                       struct cfg80211_chan_def *chandef,
                       void (*iter)(struct wiphy *wiphy,
                                    struct cfg80211_bss *bss,
                                    void *data),
                       void *iter_data);

/**
 * cfg80211_rx_mlme_mgmt - notification of processed MLME management frame
 * @dev: network device
 * @buf: authentication frame (header + body)
 * @len: length of the frame data
 *
 * This function is called whenever an authentication, disassociation or
 * deauthentication frame has been received and processed in station mode.
 * After being asked to authenticate via cfg80211_ops::auth() the driver must
 * call either this function or cfg80211_auth_timeout().
 * After being asked to associate via cfg80211_ops::assoc() the driver must
 * call either this function or cfg80211_auth_timeout().
 * While connected, the driver must calls this for received and processed
 * disassociation and deauthentication frames. If the frame couldn't be used
 * because it was unprotected, the driver must call the function
 * cfg80211_rx_unprot_mlme_mgmt() instead.
 *
 * This function may sleep. The caller must hold the corresponding wdev's mutex.
 */
void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);

/**
 * cfg80211_auth_timeout - notification of timed out authentication
 * @dev: network device
 * @addr: The MAC address of the device with which the authentication timed out
 *
 * This function may sleep. The caller must hold the corresponding wdev's
 * mutex.
 */
void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);

/**
 * struct cfg80211_rx_assoc_resp_data - association response data
 * @buf: (Re)Association Response frame (header + body)
 * @len: length of the frame data
 * @uapsd_queues: bitmap of queues configured for uapsd. Same format
 *        as the AC bitmap in the QoS info field
 * @req_ies: information elements from the (Re)Association Request frame
 * @req_ies_len: length of req_ies data
 * @ap_mld_addr: AP MLD address (in case of MLO)
 * @links: per-link information indexed by link ID, use links[0] for
 *        non-MLO connections
 * @links.bss: the BSS that association was requested with, ownership of the
 *      pointer moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
 * @links.status: Set this (along with a BSS pointer) for links that
 *        were rejected by the AP.
 */
struct cfg80211_rx_assoc_resp_data {
        const u8 *buf;
        size_t len;
        const u8 *req_ies;
        size_t req_ies_len;
        int uapsd_queues;
        const u8 *ap_mld_addr;
        struct {
                u8 addr[ETH_ALEN] __aligned(2);
                struct cfg80211_bss *bss;
                u16 status;
        } links[IEEE80211_MLD_MAX_NUM_LINKS];
};

/**
 * cfg80211_rx_assoc_resp - notification of processed association response
 * @dev: network device
 * @data: association response data, &struct cfg80211_rx_assoc_resp_data
 *
 * After being asked to associate via cfg80211_ops::assoc() the driver must
 * call either this function or cfg80211_auth_timeout().
 *
 * This function may sleep. The caller must hold the corresponding wdev's mutex.
 */
void cfg80211_rx_assoc_resp(struct net_device *dev,
                            const struct cfg80211_rx_assoc_resp_data *data);

/**
 * struct cfg80211_assoc_failure - association failure data
 * @ap_mld_addr: AP MLD address, or %NULL
 * @bss: list of BSSes, must use entry 0 for non-MLO connections
 *        (@ap_mld_addr is %NULL)
 * @timeout: indicates the association failed due to timeout, otherwise
 *        the association was abandoned for a reason reported through some
 *        other API (e.g. deauth RX)
 */
struct cfg80211_assoc_failure {
        const u8 *ap_mld_addr;
        struct cfg80211_bss *bss[IEEE80211_MLD_MAX_NUM_LINKS];
        bool timeout;
};

/**
 * cfg80211_assoc_failure - notification of association failure
 * @dev: network device
 * @data: data describing the association failure
 *
 * This function may sleep. The caller must hold the corresponding wdev's mutex.
 */
void cfg80211_assoc_failure(struct net_device *dev,
                            struct cfg80211_assoc_failure *data);

/**
 * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame
 * @dev: network device
 * @buf: 802.11 frame (header + body)
 * @len: length of the frame data
 * @reconnect: immediate reconnect is desired (include the nl80211 attribute)
 *
 * This function is called whenever deauthentication has been processed in
 * station mode. This includes both received deauthentication frames and
 * locally generated ones. This function may sleep. The caller must hold the
 * corresponding wdev's mutex.
 */
void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
                           bool reconnect);

/**
 * cfg80211_rx_unprot_mlme_mgmt - notification of unprotected mlme mgmt frame
 * @dev: network device
 * @buf: received management frame (header + body)
 * @len: length of the frame data
 *
 * This function is called whenever a received deauthentication or dissassoc
 * frame has been dropped in station mode because of MFP being used but the
 * frame was not protected. This is also used to notify reception of a Beacon
 * frame that was dropped because it did not include a valid MME MIC while
 * beacon protection was enabled (BIGTK configured in station mode).
 *
 * This function may sleep.
 */
void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev,
                                  const u8 *buf, size_t len);

/**
 * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
 * @dev: network device
 * @addr: The source MAC address of the frame
 * @key_type: The key type that the received frame used
 * @key_id: Key identifier (0..3). Can be -1 if missing.
 * @tsc: The TSC value of the frame that generated the MIC failure (6 octets)
 * @gfp: allocation flags
 *
 * This function is called whenever the local MAC detects a MIC failure in a
 * received frame. This matches with MLME-MICHAELMICFAILURE.indication()
 * primitive.
 */
void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
                                  enum nl80211_key_type key_type, int key_id,
                                  const u8 *tsc, gfp_t gfp);

/**
 * cfg80211_ibss_joined - notify cfg80211 that device joined an IBSS
 *
 * @dev: network device
 * @bssid: the BSSID of the IBSS joined
 * @channel: the channel of the IBSS joined
 * @gfp: allocation flags
 *
 * This function notifies cfg80211 that the device joined an IBSS or
 * switched to a different BSSID. Before this function can be called,
 * either a beacon has to have been received from the IBSS, or one of
 * the cfg80211_inform_bss{,_frame} functions must have been called
 * with the locally generated beacon -- this guarantees that there is
 * always a scan result for this IBSS. cfg80211 will handle the rest.
 */
void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
                          struct ieee80211_channel *channel, gfp_t gfp);

/**
 * cfg80211_notify_new_peer_candidate - notify cfg80211 of a new mesh peer
 *                                         candidate
 *
 * @dev: network device
 * @macaddr: the MAC address of the new candidate
 * @ie: information elements advertised by the peer candidate
 * @ie_len: length of the information elements buffer
 * @sig_dbm: signal level in dBm
 * @gfp: allocation flags
 *
 * This function notifies cfg80211 that the mesh peer candidate has been
 * detected, most likely via a beacon or, less likely, via a probe response.
 * cfg80211 then sends a notification to userspace.
 */
void cfg80211_notify_new_peer_candidate(struct net_device *dev,
                const u8 *macaddr, const u8 *ie, u8 ie_len,
                int sig_dbm, gfp_t gfp);

/**
 * DOC: RFkill integration
 *
 * RFkill integration in cfg80211 is almost invisible to drivers,
 * as cfg80211 automatically registers an rfkill instance for each
 * wireless device it knows about. Soft kill is also translated
 * into disconnecting and turning all interfaces off. Drivers are
 * expected to turn off the device when all interfaces are down.
 *
 * However, devices may have a hard RFkill line, in which case they
 * also need to interact with the rfkill subsystem, via cfg80211.
 * They can do this with a few helper functions documented here.
 */

/**
 * wiphy_rfkill_set_hw_state_reason - notify cfg80211 about hw block state
 * @wiphy: the wiphy
 * @blocked: block status
 * @reason: one of reasons in &enum rfkill_hard_block_reasons
 */
void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
                                      enum rfkill_hard_block_reasons reason);

static inline void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
{
        wiphy_rfkill_set_hw_state_reason(wiphy, blocked,
                                         RFKILL_HARD_BLOCK_SIGNAL);
}

/**
 * wiphy_rfkill_start_polling - start polling rfkill
 * @wiphy: the wiphy
 */
void wiphy_rfkill_start_polling(struct wiphy *wiphy);

/**
 * wiphy_rfkill_stop_polling - stop polling rfkill
 * @wiphy: the wiphy
 */
static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
{
        rfkill_pause_polling(wiphy->rfkill);
}

/**
 * DOC: Vendor commands
 *
 * Occasionally, there are special protocol or firmware features that
 * can't be implemented very openly. For this and similar cases, the
 * vendor command functionality allows implementing the features with
 * (typically closed-source) userspace and firmware, using nl80211 as
 * the configuration mechanism.
 *
 * A driver supporting vendor commands must register them as an array
 * in struct wiphy, with handlers for each one. Each command has an
 * OUI and sub command ID to identify it.
 *
 * Note that this feature should not be (ab)used to implement protocol
 * features that could openly be shared across drivers. In particular,
 * it must never be required to use vendor commands to implement any
 * "normal" functionality that higher-level userspace like connection
 * managers etc. need.
 */

struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
                                           enum nl80211_commands cmd,
                                           enum nl80211_attrs attr,
                                           int approxlen);

struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
                                           struct wireless_dev *wdev,
                                           enum nl80211_commands cmd,
                                           enum nl80211_attrs attr,
                                           unsigned int portid,
                                           int vendor_event_idx,
                                           int approxlen, gfp_t gfp);

void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp);

/**
 * cfg80211_vendor_cmd_alloc_reply_skb - allocate vendor command reply
 * @wiphy: the wiphy
 * @approxlen: an upper bound of the length of the data that will
 *        be put into the skb
 *
 * This function allocates and pre-fills an skb for a reply to
 * a vendor command. Since it is intended for a reply, calling
 * it outside of a vendor command's doit() operation is invalid.
 *
 * The returned skb is pre-filled with some identifying data in
 * a way that any data that is put into the skb (with skb_put(),
 * nla_put() or similar) will end up being within the
 * %NL80211_ATTR_VENDOR_DATA attribute, so all that needs to be done
 * with the skb is adding data for the corresponding userspace tool
 * which can then read that data out of the vendor data attribute.
 * You must not modify the skb in any other way.
 *
 * When done, call cfg80211_vendor_cmd_reply() with the skb and return
 * its error code as the result of the doit() operation.
 *
 * Return: An allocated and pre-filled skb. %NULL if any errors happen.
 */
static inline struct sk_buff *
cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
{
        return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR,
                                          NL80211_ATTR_VENDOR_DATA, approxlen);
}

/**
 * cfg80211_vendor_cmd_reply - send the reply skb
 * @skb: The skb, must have been allocated with
 *        cfg80211_vendor_cmd_alloc_reply_skb()
 *
 * Since calling this function will usually be the last thing
 * before returning from the vendor command doit() you should
 * return the error code.  Note that this function consumes the
 * skb regardless of the return value.
 *
 * Return: An error code or 0 on success.
 */
int cfg80211_vendor_cmd_reply(struct sk_buff *skb);

/**
 * cfg80211_vendor_cmd_get_sender - get the current sender netlink ID
 * @wiphy: the wiphy
 *
 * Return the current netlink port ID in a vendor command handler.
 * Valid to call only there.
 */
unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy);

/**
 * cfg80211_vendor_event_alloc - allocate vendor-specific event skb
 * @wiphy: the wiphy
 * @wdev: the wireless device
 * @event_idx: index of the vendor event in the wiphy's vendor_events
 * @approxlen: an upper bound of the length of the data that will
 *        be put into the skb
 * @gfp: allocation flags
 *
 * This function allocates and pre-fills an skb for an event on the
 * vendor-specific multicast group.
 *
 * If wdev != NULL, both the ifindex and identifier of the specified
 * wireless device are added to the event message before the vendor data
 * attribute.
 *
 * When done filling the skb, call cfg80211_vendor_event() with the
 * skb to send the event.
 *
 * Return: An allocated and pre-filled skb. %NULL if any errors happen.
 */
static inline struct sk_buff *
cfg80211_vendor_event_alloc(struct wiphy *wiphy, struct wireless_dev *wdev,
                             int approxlen, int event_idx, gfp_t gfp)
{
        return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR,
                                          NL80211_ATTR_VENDOR_DATA,
                                          0, event_idx, approxlen, gfp);
}

/**
 * cfg80211_vendor_event_alloc_ucast - alloc unicast vendor-specific event skb
 * @wiphy: the wiphy
 * @wdev: the wireless device
 * @event_idx: index of the vendor event in the wiphy's vendor_events
 * @portid: port ID of the receiver
 * @approxlen: an upper bound of the length of the data that will
 *        be put into the skb
 * @gfp: allocation flags
 *
 * This function allocates and pre-fills an skb for an event to send to
 * a specific (userland) socket. This socket would previously have been
 * obtained by cfg80211_vendor_cmd_get_sender(), and the caller MUST take
 * care to register a netlink notifier to see when the socket closes.
 *
 * If wdev != NULL, both the ifindex and identifier of the specified
 * wireless device are added to the event message before the vendor data
 * attribute.
 *
 * When done filling the skb, call cfg80211_vendor_event() with the
 * skb to send the event.
 *
 * Return: An allocated and pre-filled skb. %NULL if any errors happen.
 */
static inline struct sk_buff *
cfg80211_vendor_event_alloc_ucast(struct wiphy *wiphy,
                                  struct wireless_dev *wdev,
                                  unsigned int portid, int approxlen,
                                  int event_idx, gfp_t gfp)
{
        return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR,
                                          NL80211_ATTR_VENDOR_DATA,
                                          portid, event_idx, approxlen, gfp);
}

/**
 * cfg80211_vendor_event - send the event
 * @skb: The skb, must have been allocated with cfg80211_vendor_event_alloc()
 * @gfp: allocation flags
 *
 * This function sends the given @skb, which must have been allocated
 * by cfg80211_vendor_event_alloc(), as an event. It always consumes it.
 */
static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp)
{
        __cfg80211_send_event_skb(skb, gfp);
}

#ifdef CONFIG_NL80211_TESTMODE
/**
 * DOC: Test mode
 *
 * Test mode is a set of utility functions to allow drivers to
 * interact with driver-specific tools to aid, for instance,
 * factory programming.
 *
 * This chapter describes how drivers interact with it. For more
 * information see the nl80211 book's chapter on it.
 */

/**
 * cfg80211_testmode_alloc_reply_skb - allocate testmode reply
 * @wiphy: the wiphy
 * @approxlen: an upper bound of the length of the data that will
 *        be put into the skb
 *
 * This function allocates and pre-fills an skb for a reply to
 * the testmode command. Since it is intended for a reply, calling
 * it outside of the @testmode_cmd operation is invalid.
 *
 * The returned skb is pre-filled with the wiphy index and set up in
 * a way that any data that is put into the skb (with skb_put(),
 * nla_put() or similar) will end up being within the
 * %NL80211_ATTR_TESTDATA attribute, so all that needs to be done
 * with the skb is adding data for the corresponding userspace tool
 * which can then read that data out of the testdata attribute. You
 * must not modify the skb in any other way.
 *
 * When done, call cfg80211_testmode_reply() with the skb and return
 * its error code as the result of the @testmode_cmd operation.
 *
 * Return: An allocated and pre-filled skb. %NULL if any errors happen.
 */
static inline struct sk_buff *
cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
{
        return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE,
                                          NL80211_ATTR_TESTDATA, approxlen);
}

/**
 * cfg80211_testmode_reply - send the reply skb
 * @skb: The skb, must have been allocated with
 *        cfg80211_testmode_alloc_reply_skb()
 *
 * Since calling this function will usually be the last thing
 * before returning from the @testmode_cmd you should return
 * the error code.  Note that this function consumes the skb
 * regardless of the return value.
 *
 * Return: An error code or 0 on success.
 */
static inline int cfg80211_testmode_reply(struct sk_buff *skb)
{
        return cfg80211_vendor_cmd_reply(skb);
}

/**
 * cfg80211_testmode_alloc_event_skb - allocate testmode event
 * @wiphy: the wiphy
 * @approxlen: an upper bound of the length of the data that will
 *        be put into the skb
 * @gfp: allocation flags
 *
 * This function allocates and pre-fills an skb for an event on the
 * testmode multicast group.
 *
 * The returned skb is set up in the same way as with
 * cfg80211_testmode_alloc_reply_skb() but prepared for an event. As
 * there, you should simply add data to it that will then end up in the
 * %NL80211_ATTR_TESTDATA attribute. Again, you must not modify the skb
 * in any other way.
 *
 * When done filling the skb, call cfg80211_testmode_event() with the
 * skb to send the event.
 *
 * Return: An allocated and pre-filled skb. %NULL if any errors happen.
 */
static inline struct sk_buff *
cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp)
{
        return __cfg80211_alloc_event_skb(wiphy, NULL, NL80211_CMD_TESTMODE,
                                          NL80211_ATTR_TESTDATA, 0, -1,
                                          approxlen, gfp);
}

/**
 * cfg80211_testmode_event - send the event
 * @skb: The skb, must have been allocated with
 *        cfg80211_testmode_alloc_event_skb()
 * @gfp: allocation flags
 *
 * This function sends the given @skb, which must have been allocated
 * by cfg80211_testmode_alloc_event_skb(), as an event. It always
 * consumes it.
 */
static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
{
        __cfg80211_send_event_skb(skb, gfp);
}

#define CFG80211_TESTMODE_CMD(cmd)        .testmode_cmd = (cmd),
#define CFG80211_TESTMODE_DUMP(cmd)        .testmode_dump = (cmd),
#else
#define CFG80211_TESTMODE_CMD(cmd)
#define CFG80211_TESTMODE_DUMP(cmd)
#endif

/**
 * struct cfg80211_fils_resp_params - FILS connection response params
 * @kek: KEK derived from a successful FILS connection (may be %NULL)
 * @kek_len: Length of @fils_kek in octets
 * @update_erp_next_seq_num: Boolean value to specify whether the value in
 *        @erp_next_seq_num is valid.
 * @erp_next_seq_num: The next sequence number to use in ERP message in
 *        FILS Authentication. This value should be specified irrespective of the
 *        status for a FILS connection.
 * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL).
 * @pmk_len: Length of @pmk in octets
 * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID
 *        used for this FILS connection (may be %NULL).
 */
struct cfg80211_fils_resp_params {
        const u8 *kek;
        size_t kek_len;
        bool update_erp_next_seq_num;
        u16 erp_next_seq_num;
        const u8 *pmk;
        size_t pmk_len;
        const u8 *pmkid;
};

/**
 * struct cfg80211_connect_resp_params - Connection response params
 * @status: Status code, %WLAN_STATUS_SUCCESS for successful connection, use
 *        %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you
 *        the real status code for failures. If this call is used to report a
 *        failure due to a timeout (e.g., not receiving an Authentication frame
 *        from the AP) instead of an explicit rejection by the AP, -1 is used to
 *        indicate that this is a failure, but without a status code.
 *        @timeout_reason is used to report the reason for the timeout in that
 *        case.
 * @req_ie: Association request IEs (may be %NULL)
 * @req_ie_len: Association request IEs length
 * @resp_ie: Association response IEs (may be %NULL)
 * @resp_ie_len: Association response IEs length
 * @fils: FILS connection response parameters.
 * @timeout_reason: Reason for connection timeout. This is used when the
 *        connection fails due to a timeout instead of an explicit rejection from
 *        the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is
 *        not known. This value is used only if @status < 0 to indicate that the
 *        failure is due to a timeout and not due to explicit rejection by the AP.
 *        This value is ignored in other cases (@status >= 0).
 * @valid_links: For MLO connection, BIT mask of the valid link ids. Otherwise
 *        zero.
 * @ap_mld_addr: For MLO connection, MLD address of the AP. Otherwise %NULL.
 * @links : For MLO connection, contains link info for the valid links indicated
 *        using @valid_links. For non-MLO connection, links[0] contains the
 *        connected AP info.
 * @links.addr: For MLO connection, MAC address of the STA link. Otherwise
 *        %NULL.
 * @links.bssid: For MLO connection, MAC address of the AP link. For non-MLO
 *        connection, links[0].bssid points to the BSSID of the AP (may be %NULL).
 * @links.bss: For MLO connection, entry of bss to which STA link is connected.
 *        For non-MLO connection, links[0].bss points to entry of bss to which STA
 *        is connected. It can be obtained through cfg80211_get_bss() (may be
 *        %NULL). It is recommended to store the bss from the connect_request and
 *        hold a reference to it and return through this param to avoid a warning
 *        if the bss is expired during the connection, esp. for those drivers
 *        implementing connect op. Only one parameter among @bssid and @bss needs
 *        to be specified.
 * @links.status: per-link status code, to report a status code that's not
 *        %WLAN_STATUS_SUCCESS for a given link, it must also be in the
 *        @valid_links bitmap and may have a BSS pointer (which is then released)
 */
struct cfg80211_connect_resp_params {
        int status;
        const u8 *req_ie;
        size_t req_ie_len;
        const u8 *resp_ie;
        size_t resp_ie_len;
        struct cfg80211_fils_resp_params fils;
        enum nl80211_timeout_reason timeout_reason;

        const u8 *ap_mld_addr;
        u16 valid_links;
        struct {
                const u8 *addr;
                const u8 *bssid;
                struct cfg80211_bss *bss;
                u16 status;
        } links[IEEE80211_MLD_MAX_NUM_LINKS];
};

/**
 * cfg80211_connect_done - notify cfg80211 of connection result
 *
 * @dev: network device
 * @params: connection response parameters
 * @gfp: allocation flags
 *
 * It should be called by the underlying driver once execution of the connection
 * request from connect() has been completed. This is similar to
 * cfg80211_connect_bss(), but takes a structure pointer for connection response
 * parameters. Only one of the functions among cfg80211_connect_bss(),
 * cfg80211_connect_result(), cfg80211_connect_timeout(),
 * and cfg80211_connect_done() should be called.
 */
void cfg80211_connect_done(struct net_device *dev,
                           struct cfg80211_connect_resp_params *params,
                           gfp_t gfp);

/**
 * cfg80211_connect_bss - notify cfg80211 of connection result
 *
 * @dev: network device
 * @bssid: the BSSID of the AP
 * @bss: Entry of bss to which STA got connected to, can be obtained through
 *        cfg80211_get_bss() (may be %NULL). But it is recommended to store the
 *        bss from the connect_request and hold a reference to it and return
 *        through this param to avoid a warning if the bss is expired during the
 *        connection, esp. for those drivers implementing connect op.
 *        Only one parameter among @bssid and @bss needs to be specified.
 * @req_ie: association request IEs (maybe be %NULL)
 * @req_ie_len: association request IEs length
 * @resp_ie: association response IEs (may be %NULL)
 * @resp_ie_len: assoc response IEs length
 * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use
 *        %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you
 *        the real status code for failures. If this call is used to report a
 *        failure due to a timeout (e.g., not receiving an Authentication frame
 *        from the AP) instead of an explicit rejection by the AP, -1 is used to
 *        indicate that this is a failure, but without a status code.
 *        @timeout_reason is used to report the reason for the timeout in that
 *        case.
 * @gfp: allocation flags
 * @timeout_reason: reason for connection timeout. This is used when the
 *        connection fails due to a timeout instead of an explicit rejection from
 *        the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is
 *        not known. This value is used only if @status < 0 to indicate that the
 *        failure is due to a timeout and not due to explicit rejection by the AP.
 *        This value is ignored in other cases (@status >= 0).
 *
 * It should be called by the underlying driver once execution of the connection
 * request from connect() has been completed. This is similar to
 * cfg80211_connect_result(), but with the option of identifying the exact bss
 * entry for the connection. Only one of the functions among
 * cfg80211_connect_bss(), cfg80211_connect_result(),
 * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called.
 */
static inline void
cfg80211_connect_bss(struct net_device *dev, const u8 *bssid,
                     struct cfg80211_bss *bss, const u8 *req_ie,
                     size_t req_ie_len, const u8 *resp_ie,
                     size_t resp_ie_len, int status, gfp_t gfp,
                     enum nl80211_timeout_reason timeout_reason)
{
        struct cfg80211_connect_resp_params params;

        memset(&params, 0, sizeof(params));
        params.status = status;
        params.links[0].bssid = bssid;
        params.links[0].bss = bss;
        params.req_ie = req_ie;
        params.req_ie_len = req_ie_len;
        params.resp_ie = resp_ie;
        params.resp_ie_len = resp_ie_len;
        params.timeout_reason = timeout_reason;

        cfg80211_connect_done(dev, &params, gfp);
}

/**
 * cfg80211_connect_result - notify cfg80211 of connection result
 *
 * @dev: network device
 * @bssid: the BSSID of the AP
 * @req_ie: association request IEs (maybe be %NULL)
 * @req_ie_len: association request IEs length
 * @resp_ie: association response IEs (may be %NULL)
 * @resp_ie_len: assoc response IEs length
 * @status: status code, %WLAN_STATUS_SUCCESS for successful connection, use
 *        %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you
 *        the real status code for failures.
 * @gfp: allocation flags
 *
 * It should be called by the underlying driver once execution of the connection
 * request from connect() has been completed. This is similar to
 * cfg80211_connect_bss() which allows the exact bss entry to be specified. Only
 * one of the functions among cfg80211_connect_bss(), cfg80211_connect_result(),
 * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called.
 */
static inline void
cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
                        const u8 *req_ie, size_t req_ie_len,
                        const u8 *resp_ie, size_t resp_ie_len,
                        u16 status, gfp_t gfp)
{
        cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, resp_ie,
                             resp_ie_len, status, gfp,
                             NL80211_TIMEOUT_UNSPECIFIED);
}

/**
 * cfg80211_connect_timeout - notify cfg80211 of connection timeout
 *
 * @dev: network device
 * @bssid: the BSSID of the AP
 * @req_ie: association request IEs (maybe be %NULL)
 * @req_ie_len: association request IEs length
 * @gfp: allocation flags
 * @timeout_reason: reason for connection timeout.
 *
 * It should be called by the underlying driver whenever connect() has failed
 * in a sequence where no explicit authentication/association rejection was
 * received from the AP. This could happen, e.g., due to not being able to send
 * out the Authentication or Association Request frame or timing out while
 * waiting for the response. Only one of the functions among
 * cfg80211_connect_bss(), cfg80211_connect_result(),
 * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called.
 */
static inline void
cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
                         const u8 *req_ie, size_t req_ie_len, gfp_t gfp,
                         enum nl80211_timeout_reason timeout_reason)
{
        cfg80211_connect_bss(dev, bssid, NULL, req_ie, req_ie_len, NULL, 0, -1,
                             gfp, timeout_reason);
}

/**
 * struct cfg80211_roam_info - driver initiated roaming information
 *
 * @req_ie: association request IEs (maybe be %NULL)
 * @req_ie_len: association request IEs length
 * @resp_ie: association response IEs (may be %NULL)
 * @resp_ie_len: assoc response IEs length
 * @fils: FILS related roaming information.
 * @valid_links: For MLO roaming, BIT mask of the new valid links is set.
 *        Otherwise zero.
 * @ap_mld_addr: For MLO roaming, MLD address of the new AP. Otherwise %NULL.
 * @links : For MLO roaming, contains new link info for the valid links set in
 *        @valid_links. For non-MLO roaming, links[0] contains the new AP info.
 * @links.addr: For MLO roaming, MAC address of the STA link. Otherwise %NULL.
 * @links.bssid: For MLO roaming, MAC address of the new AP link. For non-MLO
 *        roaming, links[0].bssid points to the BSSID of the new AP. May be
 *        %NULL if %links.bss is set.
 * @links.channel: the channel of the new AP.
 * @links.bss: For MLO roaming, entry of new bss to which STA link got
 *        roamed. For non-MLO roaming, links[0].bss points to entry of bss to
 *        which STA got roamed (may be %NULL if %links.bssid is set)
 */
struct cfg80211_roam_info {
        const u8 *req_ie;
        size_t req_ie_len;
        const u8 *resp_ie;
        size_t resp_ie_len;
        struct cfg80211_fils_resp_params fils;

        const u8 *ap_mld_addr;
        u16 valid_links;
        struct {
                const u8 *addr;
                const u8 *bssid;
                struct ieee80211_channel *channel;
                struct cfg80211_bss *bss;
        } links[IEEE80211_MLD_MAX_NUM_LINKS];
};

/**
 * cfg80211_roamed - notify cfg80211 of roaming
 *
 * @dev: network device
 * @info: information about the new BSS. struct &cfg80211_roam_info.
 * @gfp: allocation flags
 *
 * This function may be called with the driver passing either the BSSID of the
 * new AP or passing the bss entry to avoid a race in timeout of the bss entry.
 * It should be called by the underlying driver whenever it roamed from one AP
 * to another while connected. Drivers which have roaming implemented in
 * firmware should pass the bss entry to avoid a race in bss entry timeout where
 * the bss entry of the new AP is seen in the driver, but gets timed out by the
 * time it is accessed in __cfg80211_roamed() due to delay in scheduling
 * rdev->event_work. In case of any failures, the reference is released
 * either in cfg80211_roamed() or in __cfg80211_romed(), Otherwise, it will be
 * released while disconnecting from the current bss.
 */
void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
                     gfp_t gfp);

/**
 * cfg80211_port_authorized - notify cfg80211 of successful security association
 *
 * @dev: network device
 * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address
 *        in case of AP/P2P GO
 * @td_bitmap: transition disable policy
 * @td_bitmap_len: Length of transition disable policy
 * @gfp: allocation flags
 *
 * This function should be called by a driver that supports 4 way handshake
 * offload after a security association was successfully established (i.e.,
 * the 4 way handshake was completed successfully). The call to this function
 * should be preceded with a call to cfg80211_connect_result(),
 * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to
 * indicate the 802.11 association.
 * This function can also be called by AP/P2P GO driver that supports
 * authentication offload. In this case the peer_mac passed is that of
 * associated STA/GC.
 */
void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr,
                              const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp);

/**
 * cfg80211_disconnected - notify cfg80211 that connection was dropped
 *
 * @dev: network device
 * @ie: information elements of the deauth/disassoc frame (may be %NULL)
 * @ie_len: length of IEs
 * @reason: reason code for the disconnection, set it to 0 if unknown
 * @locally_generated: disconnection was requested locally
 * @gfp: allocation flags
 *
 * After it calls this function, the driver should enter an idle state
 * and not try to connect to any AP any more.
 */
void cfg80211_disconnected(struct net_device *dev, u16 reason,
                           const u8 *ie, size_t ie_len,
                           bool locally_generated, gfp_t gfp);

/**
 * cfg80211_ready_on_channel - notification of remain_on_channel start
 * @wdev: wireless device
 * @cookie: the request cookie
 * @chan: The current channel (from remain_on_channel request)
 * @duration: Duration in milliseconds that the driver intents to remain on the
 *        channel
 * @gfp: allocation flags
 */
void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
                               struct ieee80211_channel *chan,
                               unsigned int duration, gfp_t gfp);

/**
 * cfg80211_remain_on_channel_expired - remain_on_channel duration expired
 * @wdev: wireless device
 * @cookie: the request cookie
 * @chan: The current channel (from remain_on_channel request)
 * @gfp: allocation flags
 */
void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
                                        struct ieee80211_channel *chan,
                                        gfp_t gfp);

/**
 * cfg80211_tx_mgmt_expired - tx_mgmt duration expired
 * @wdev: wireless device
 * @cookie: the requested cookie
 * @chan: The current channel (from tx_mgmt request)
 * @gfp: allocation flags
 */
void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie,
                              struct ieee80211_channel *chan, gfp_t gfp);

/**
 * cfg80211_sinfo_alloc_tid_stats - allocate per-tid statistics.
 *
 * @sinfo: the station information
 * @gfp: allocation flags
 */
int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp);

/**
 * cfg80211_sinfo_release_content - release contents of station info
 * @sinfo: the station information
 *
 * Releases any potentially allocated sub-information of the station
 * information, but not the struct itself (since it's typically on
 * the stack.)
 */
static inline void cfg80211_sinfo_release_content(struct station_info *sinfo)
{
        kfree(sinfo->pertid);
}

/**
 * cfg80211_new_sta - notify userspace about station
 *
 * @dev: the netdev
 * @mac_addr: the station's address
 * @sinfo: the station information
 * @gfp: allocation flags
 */
void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
                      struct station_info *sinfo, gfp_t gfp);

/**
 * cfg80211_del_sta_sinfo - notify userspace about deletion of a station
 * @dev: the netdev
 * @mac_addr: the station's address. For MLD station, MLD address is used.
 * @sinfo: the station information/statistics
 * @gfp: allocation flags
 */
void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
                            struct station_info *sinfo, gfp_t gfp);

/**
 * cfg80211_del_sta - notify userspace about deletion of a station
 *
 * @dev: the netdev
 * @mac_addr: the station's address. For MLD station, MLD address is used.
 * @gfp: allocation flags
 */
static inline void cfg80211_del_sta(struct net_device *dev,
                                    const u8 *mac_addr, gfp_t gfp)
{
        cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp);
}

/**
 * cfg80211_conn_failed - connection request failed notification
 *
 * @dev: the netdev
 * @mac_addr: the station's address
 * @reason: the reason for connection failure
 * @gfp: allocation flags
 *
 * Whenever a station tries to connect to an AP and if the station
 * could not connect to the AP as the AP has rejected the connection
 * for some reasons, this function is called.
 *
 * The reason for connection failure can be any of the value from
 * nl80211_connect_failed_reason enum
 */
void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
                          enum nl80211_connect_failed_reason reason,
                          gfp_t gfp);

/**
 * struct cfg80211_rx_info - received management frame info
 *
 * @freq: Frequency on which the frame was received in kHz
 * @sig_dbm: signal strength in dBm, or 0 if unknown
 * @have_link_id: indicates the frame was received on a link of
 *        an MLD, i.e. the @link_id field is valid
 * @link_id: the ID of the link the frame was received        on
 * @buf: Management frame (header + body)
 * @len: length of the frame data
 * @flags: flags, as defined in &enum nl80211_rxmgmt_flags
 * @rx_tstamp: Hardware timestamp of frame RX in nanoseconds
 * @ack_tstamp: Hardware timestamp of ack TX in nanoseconds
 */
struct cfg80211_rx_info {
        int freq;
        int sig_dbm;
        bool have_link_id;
        u8 link_id;
        const u8 *buf;
        size_t len;
        u32 flags;
        u64 rx_tstamp;
        u64 ack_tstamp;
};

/**
 * cfg80211_rx_mgmt_ext - management frame notification with extended info
 * @wdev: wireless device receiving the frame
 * @info: RX info as defined in struct cfg80211_rx_info
 *
 * This function is called whenever an Action frame is received for a station
 * mode interface, but is not processed in kernel.
 *
 * Return: %true if a user space application has registered for this frame.
 * For action frames, that makes it responsible for rejecting unrecognized
 * action frames; %false otherwise, in which case for action frames the
 * driver is responsible for rejecting the frame.
 */
bool cfg80211_rx_mgmt_ext(struct wireless_dev *wdev,
                          struct cfg80211_rx_info *info);

/**
 * cfg80211_rx_mgmt_khz - notification of received, unprocessed management frame
 * @wdev: wireless device receiving the frame
 * @freq: Frequency on which the frame was received in KHz
 * @sig_dbm: signal strength in dBm, or 0 if unknown
 * @buf: Management frame (header + body)
 * @len: length of the frame data
 * @flags: flags, as defined in enum nl80211_rxmgmt_flags
 *
 * This function is called whenever an Action frame is received for a station
 * mode interface, but is not processed in kernel.
 *
 * Return: %true if a user space application has registered for this frame.
 * For action frames, that makes it responsible for rejecting unrecognized
 * action frames; %false otherwise, in which case for action frames the
 * driver is responsible for rejecting the frame.
 */
static inline bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq,
                                        int sig_dbm, const u8 *buf, size_t len,
                                        u32 flags)
{
        struct cfg80211_rx_info info = {
                .freq = freq,
                .sig_dbm = sig_dbm,
                .buf = buf,
                .len = len,
                .flags = flags
        };

        return cfg80211_rx_mgmt_ext(wdev, &info);
}

/**
 * cfg80211_rx_mgmt - notification of received, unprocessed management frame
 * @wdev: wireless device receiving the frame
 * @freq: Frequency on which the frame was received in MHz
 * @sig_dbm: signal strength in dBm, or 0 if unknown
 * @buf: Management frame (header + body)
 * @len: length of the frame data
 * @flags: flags, as defined in enum nl80211_rxmgmt_flags
 *
 * This function is called whenever an Action frame is received for a station
 * mode interface, but is not processed in kernel.
 *
 * Return: %true if a user space application has registered for this frame.
 * For action frames, that makes it responsible for rejecting unrecognized
 * action frames; %false otherwise, in which case for action frames the
 * driver is responsible for rejecting the frame.
 */
static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq,
                                    int sig_dbm, const u8 *buf, size_t len,
                                    u32 flags)
{
        struct cfg80211_rx_info info = {
                .freq = MHZ_TO_KHZ(freq),
                .sig_dbm = sig_dbm,
                .buf = buf,
                .len = len,
                .flags = flags
        };

        return cfg80211_rx_mgmt_ext(wdev, &info);
}

/**
 * struct cfg80211_tx_status - TX status for management frame information
 *
 * @cookie: Cookie returned by cfg80211_ops::mgmt_tx()
 * @tx_tstamp: hardware TX timestamp in nanoseconds
 * @ack_tstamp: hardware ack RX timestamp in nanoseconds
 * @buf: Management frame (header + body)
 * @len: length of the frame data
 * @ack: Whether frame was acknowledged
 */
struct cfg80211_tx_status {
        u64 cookie;
        u64 tx_tstamp;
        u64 ack_tstamp;
        const u8 *buf;
        size_t len;
        bool ack;
};

/**
 * cfg80211_mgmt_tx_status_ext - TX status notification with extended info
 * @wdev: wireless device receiving the frame
 * @status: TX status data
 * @gfp: context flags
 *
 * This function is called whenever a management frame was requested to be
 * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the
 * transmission attempt with extended info.
 */
void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev,
                                 struct cfg80211_tx_status *status, gfp_t gfp);

/**
 * cfg80211_mgmt_tx_status - notification of TX status for management frame
 * @wdev: wireless device receiving the frame
 * @cookie: Cookie returned by cfg80211_ops::mgmt_tx()
 * @buf: Management frame (header + body)
 * @len: length of the frame data
 * @ack: Whether frame was acknowledged
 * @gfp: context flags
 *
 * This function is called whenever a management frame was requested to be
 * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the
 * transmission attempt.
 */
static inline void cfg80211_mgmt_tx_status(struct wireless_dev *wdev,
                                           u64 cookie, const u8 *buf,
                                           size_t len, bool ack, gfp_t gfp)
{
        struct cfg80211_tx_status status = {
                .cookie = cookie,
                .buf = buf,
                .len = len,
                .ack = ack
        };

        cfg80211_mgmt_tx_status_ext(wdev, &status, gfp);
}

/**
 * cfg80211_control_port_tx_status - notification of TX status for control
 *                                   port frames
 * @wdev: wireless device receiving the frame
 * @cookie: Cookie returned by cfg80211_ops::tx_control_port()
 * @buf: Data frame (header + body)
 * @len: length of the frame data
 * @ack: Whether frame was acknowledged
 * @gfp: context flags
 *
 * This function is called whenever a control port frame was requested to be
 * transmitted with cfg80211_ops::tx_control_port() to report the TX status of
 * the transmission attempt.
 */
void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
                                     const u8 *buf, size_t len, bool ack,
                                     gfp_t gfp);

/**
 * cfg80211_rx_control_port - notification about a received control port frame
 * @dev: The device the frame matched to
 * @skb: The skbuf with the control port frame.  It is assumed that the skbuf
 *        is 802.3 formatted (with 802.3 header).  The skb can be non-linear.
 *        This function does not take ownership of the skb, so the caller is
 *        responsible for any cleanup.  The caller must also ensure that
 *        skb->protocol is set appropriately.
 * @unencrypted: Whether the frame was received unencrypted
 * @link_id: the link the frame was received on, -1 if not applicable or unknown
 *
 * This function is used to inform userspace about a received control port
 * frame.  It should only be used if userspace indicated it wants to receive
 * control port frames over nl80211.
 *
 * The frame is the data portion of the 802.3 or 802.11 data frame with all
 * network layer headers removed (e.g. the raw EAPoL frame).
 *
 * Return: %true if the frame was passed to userspace
 */
bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb,
                              bool unencrypted, int link_id);

/**
 * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
 * @dev: network device
 * @rssi_event: the triggered RSSI event
 * @rssi_level: new RSSI level value or 0 if not available
 * @gfp: context flags
 *
 * This function is called when a configured connection quality monitoring
 * rssi threshold reached event occurs.
 */
void cfg80211_cqm_rssi_notify(struct net_device *dev,
                              enum nl80211_cqm_rssi_threshold_event rssi_event,
                              s32 rssi_level, gfp_t gfp);

/**
 * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer
 * @dev: network device
 * @peer: peer's MAC address
 * @num_packets: how many packets were lost -- should be a fixed threshold
 *        but probably no less than maybe 50, or maybe a throughput dependent
 *        threshold (to account for temporary interference)
 * @gfp: context flags
 */
void cfg80211_cqm_pktloss_notify(struct net_device *dev,
                                 const u8 *peer, u32 num_packets, gfp_t gfp);

/**
 * cfg80211_cqm_txe_notify - TX error rate event
 * @dev: network device
 * @peer: peer's MAC address
 * @num_packets: how many packets were lost
 * @rate: % of packets which failed transmission
 * @intvl: interval (in s) over which the TX failure threshold was breached.
 * @gfp: context flags
 *
 * Notify userspace when configured % TX failures over number of packets in a
 * given interval is exceeded.
 */
void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer,
                             u32 num_packets, u32 rate, u32 intvl, gfp_t gfp);

/**
 * cfg80211_cqm_beacon_loss_notify - beacon loss event
 * @dev: network device
 * @gfp: context flags
 *
 * Notify userspace about beacon loss from the connected AP.
 */
void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp);

/**
 * __cfg80211_radar_event - radar detection event
 * @wiphy: the wiphy
 * @chandef: chandef for the current channel
 * @offchan: the radar has been detected on the offchannel chain
 * @gfp: context flags
 *
 * This function is called when a radar is detected on the current chanenl.
 */
void __cfg80211_radar_event(struct wiphy *wiphy,
                            struct cfg80211_chan_def *chandef,
                            bool offchan, gfp_t gfp);

static inline void
cfg80211_radar_event(struct wiphy *wiphy,
                     struct cfg80211_chan_def *chandef,
                     gfp_t gfp)
{
        __cfg80211_radar_event(wiphy, chandef, false, gfp);
}

static inline void
cfg80211_background_radar_event(struct wiphy *wiphy,
                                struct cfg80211_chan_def *chandef,
                                gfp_t gfp)
{
        __cfg80211_radar_event(wiphy, chandef, true, gfp);
}

/**
 * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
 * @dev: network device
 * @mac: MAC address of a station which opmode got modified
 * @sta_opmode: station's current opmode value
 * @gfp: context flags
 *
 * Driver should call this function when station's opmode modified via action
 * frame.
 */
void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
                                       struct sta_opmode_info *sta_opmode,
                                       gfp_t gfp);

/**
 * cfg80211_cac_event - Channel availability check (CAC) event
 * @netdev: network device
 * @chandef: chandef for the current channel
 * @event: type of event
 * @gfp: context flags
 *
 * This function is called when a Channel availability check (CAC) is finished
 * or aborted. This must be called to notify the completion of a CAC process,
 * also by full-MAC drivers.
 */
void cfg80211_cac_event(struct net_device *netdev,
                        const struct cfg80211_chan_def *chandef,
                        enum nl80211_radar_event event, gfp_t gfp);

/**
 * cfg80211_background_cac_abort - Channel Availability Check offchan abort event
 * @wiphy: the wiphy
 *
 * This function is called by the driver when a Channel Availability Check
 * (CAC) is aborted by a offchannel dedicated chain.
 */
void cfg80211_background_cac_abort(struct wiphy *wiphy);

/**
 * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying
 * @dev: network device
 * @bssid: BSSID of AP (to avoid races)
 * @replay_ctr: new replay counter
 * @gfp: allocation flags
 */
void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
                               const u8 *replay_ctr, gfp_t gfp);

/**
 * cfg80211_pmksa_candidate_notify - notify about PMKSA caching candidate
 * @dev: network device
 * @index: candidate index (the smaller the index, the higher the priority)
 * @bssid: BSSID of AP
 * @preauth: Whether AP advertises support for RSN pre-authentication
 * @gfp: allocation flags
 */
void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
                                     const u8 *bssid, bool preauth, gfp_t gfp);

/**
 * cfg80211_rx_spurious_frame - inform userspace about a spurious frame
 * @dev: The device the frame matched to
 * @addr: the transmitter address
 * @gfp: context flags
 *
 * This function is used in AP mode (only!) to inform userspace that
 * a spurious class 3 frame was received, to be able to deauth the
 * sender.
 * Return: %true if the frame was passed to userspace (or this failed
 * for a reason other than not having a subscription.)
 */
bool cfg80211_rx_spurious_frame(struct net_device *dev,
                                const u8 *addr, gfp_t gfp);

/**
 * cfg80211_rx_unexpected_4addr_frame - inform about unexpected WDS frame
 * @dev: The device the frame matched to
 * @addr: the transmitter address
 * @gfp: context flags
 *
 * This function is used in AP mode (only!) to inform userspace that
 * an associated station sent a 4addr frame but that wasn't expected.
 * It is allowed and desirable to send this event only once for each
 * station to avoid event flooding.
 * Return: %true if the frame was passed to userspace (or this failed
 * for a reason other than not having a subscription.)
 */
bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
                                        const u8 *addr, gfp_t gfp);

/**
 * cfg80211_probe_status - notify userspace about probe status
 * @dev: the device the probe was sent on
 * @addr: the address of the peer
 * @cookie: the cookie filled in @probe_client previously
 * @acked: indicates whether probe was acked or not
 * @ack_signal: signal strength (in dBm) of the ACK frame.
 * @is_valid_ack_signal: indicates the ack_signal is valid or not.
 * @gfp: allocation flags
 */
void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
                           u64 cookie, bool acked, s32 ack_signal,
                           bool is_valid_ack_signal, gfp_t gfp);

/**
 * cfg80211_report_obss_beacon_khz - report beacon from other APs
 * @wiphy: The wiphy that received the beacon
 * @frame: the frame
 * @len: length of the frame
 * @freq: frequency the frame was received on in KHz
 * @sig_dbm: signal strength in dBm, or 0 if unknown
 *
 * Use this function to report to userspace when a beacon was
 * received. It is not useful to call this when there is no
 * netdev that is in AP/GO mode.
 */
void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
                                     size_t len, int freq, int sig_dbm);

/**
 * cfg80211_report_obss_beacon - report beacon from other APs
 * @wiphy: The wiphy that received the beacon
 * @frame: the frame
 * @len: length of the frame
 * @freq: frequency the frame was received on
 * @sig_dbm: signal strength in dBm, or 0 if unknown
 *
 * Use this function to report to userspace when a beacon was
 * received. It is not useful to call this when there is no
 * netdev that is in AP/GO mode.
 */
static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
                                               const u8 *frame, size_t len,
                                               int freq, int sig_dbm)
{
        cfg80211_report_obss_beacon_khz(wiphy, frame, len, MHZ_TO_KHZ(freq),
                                        sig_dbm);
}

/**
 * cfg80211_reg_can_beacon - check if beaconing is allowed
 * @wiphy: the wiphy
 * @chandef: the channel definition
 * @iftype: interface type
 *
 * Return: %true if there is no secondary channel or the secondary channel(s)
 * can be used for beaconing (i.e. is not a radar channel etc.)
 */
bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
                             struct cfg80211_chan_def *chandef,
                             enum nl80211_iftype iftype);

/**
 * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation
 * @wiphy: the wiphy
 * @chandef: the channel definition
 * @iftype: interface type
 *
 * Return: %true if there is no secondary channel or the secondary channel(s)
 * can be used for beaconing (i.e. is not a radar channel etc.). This version
 * also checks if IR-relaxation conditions apply, to allow beaconing under
 * more permissive conditions.
 *
 * Requires the wiphy mutex to be held.
 */
bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
                                   struct cfg80211_chan_def *chandef,
                                   enum nl80211_iftype iftype);

/*
 * cfg80211_ch_switch_notify - update wdev channel and notify userspace
 * @dev: the device which switched channels
 * @chandef: the new channel definition
 * @link_id: the link ID for MLO, must be 0 for non-MLO
 *
 * Caller must hold wiphy mutex, therefore must only be called from sleepable
 * driver context!
 */
void cfg80211_ch_switch_notify(struct net_device *dev,
                               struct cfg80211_chan_def *chandef,
                               unsigned int link_id);

/*
 * cfg80211_ch_switch_started_notify - notify channel switch start
 * @dev: the device on which the channel switch started
 * @chandef: the future channel definition
 * @link_id: the link ID for MLO, must be 0 for non-MLO
 * @count: the number of TBTTs until the channel switch happens
 * @quiet: whether or not immediate quiet was requested by the AP
 *
 * Inform the userspace about the channel switch that has just
 * started, so that it can take appropriate actions (eg. starting
 * channel switch on other vifs), if necessary.
 */
void cfg80211_ch_switch_started_notify(struct net_device *dev,
                                       struct cfg80211_chan_def *chandef,
                                       unsigned int link_id, u8 count,
                                       bool quiet);

/**
 * ieee80211_operating_class_to_band - convert operating class to band
 *
 * @operating_class: the operating class to convert
 * @band: band pointer to fill
 *
 * Returns %true if the conversion was successful, %false otherwise.
 */
bool ieee80211_operating_class_to_band(u8 operating_class,
                                       enum nl80211_band *band);

/**
 * ieee80211_operating_class_to_chandef - convert operating class to chandef
 *
 * @operating_class: the operating class to convert
 * @chan: the ieee80211_channel to convert
 * @chandef: a pointer to the resulting chandef
 *
 * Returns %true if the conversion was successful, %false otherwise.
 */
bool ieee80211_operating_class_to_chandef(u8 operating_class,
                                          struct ieee80211_channel *chan,
                                          struct cfg80211_chan_def *chandef);

/**
 * ieee80211_chandef_to_operating_class - convert chandef to operation class
 *
 * @chandef: the chandef to convert
 * @op_class: a pointer to the resulting operating class
 *
 * Returns %true if the conversion was successful, %false otherwise.
 */
bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
                                          u8 *op_class);

/**
 * ieee80211_chandef_to_khz - convert chandef to frequency in KHz
 *
 * @chandef: the chandef to convert
 *
 * Returns the center frequency of chandef (1st segment) in KHz.
 */
static inline u32
ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef)
{
        return MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
}

/*
 * cfg80211_tdls_oper_request - request userspace to perform TDLS operation
 * @dev: the device on which the operation is requested
 * @peer: the MAC address of the peer device
 * @oper: the requested TDLS operation (NL80211_TDLS_SETUP or
 *        NL80211_TDLS_TEARDOWN)
 * @reason_code: the reason code for teardown request
 * @gfp: allocation flags
 *
 * This function is used to request userspace to perform TDLS operation that
 * requires knowledge of keys, i.e., link setup or teardown when the AP
 * connection uses encryption. This is optional mechanism for the driver to use
 * if it can automatically determine when a TDLS link could be useful (e.g.,
 * based on traffic and signal strength for a peer).
 */
void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
                                enum nl80211_tdls_operation oper,
                                u16 reason_code, gfp_t gfp);

/*
 * cfg80211_calculate_bitrate - calculate actual bitrate (in 100Kbps units)
 * @rate: given rate_info to calculate bitrate from
 *
 * return 0 if MCS index >= 32
 */
u32 cfg80211_calculate_bitrate(struct rate_info *rate);

/**
 * cfg80211_unregister_wdev - remove the given wdev
 * @wdev: struct wireless_dev to remove
 *
 * This function removes the device so it can no longer be used. It is necessary
 * to call this function even when cfg80211 requests the removal of the device
 * by calling the del_virtual_intf() callback. The function must also be called
 * when the driver wishes to unregister the wdev, e.g. when the hardware device
 * is unbound from the driver.
 *
 * Requires the RTNL and wiphy mutex to be held.
 */
void cfg80211_unregister_wdev(struct wireless_dev *wdev);

/**
 * cfg80211_register_netdevice - register the given netdev
 * @dev: the netdev to register
 *
 * Note: In contexts coming from cfg80211 callbacks, you must call this rather
 * than register_netdevice(), unregister_netdev() is impossible as the RTNL is
 * held. Otherwise, both register_netdevice() and register_netdev() are usable
 * instead as well.
 *
 * Requires the RTNL and wiphy mutex to be held.
 */
int cfg80211_register_netdevice(struct net_device *dev);

/**
 * cfg80211_unregister_netdevice - unregister the given netdev
 * @dev: the netdev to register
 *
 * Note: In contexts coming from cfg80211 callbacks, you must call this rather
 * than unregister_netdevice(), unregister_netdev() is impossible as the RTNL
 * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are
 * usable instead as well.
 *
 * Requires the RTNL and wiphy mutex to be held.
 */
static inline void cfg80211_unregister_netdevice(struct net_device *dev)
{
#if IS_ENABLED(CONFIG_CFG80211)
        cfg80211_unregister_wdev(dev->ieee80211_ptr);
#endif
}

/**
 * struct cfg80211_ft_event_params - FT Information Elements
 * @ies: FT IEs
 * @ies_len: length of the FT IE in bytes
 * @target_ap: target AP's MAC address
 * @ric_ies: RIC IE
 * @ric_ies_len: length of the RIC IE in bytes
 */
struct cfg80211_ft_event_params {
        const u8 *ies;
        size_t ies_len;
        const u8 *target_ap;
        const u8 *ric_ies;
        size_t ric_ies_len;
};

/**
 * cfg80211_ft_event - notify userspace about FT IE and RIC IE
 * @netdev: network device
 * @ft_event: IE information
 */
void cfg80211_ft_event(struct net_device *netdev,
                       struct cfg80211_ft_event_params *ft_event);

/**
 * cfg80211_get_p2p_attr - find and copy a P2P attribute from IE buffer
 * @ies: the input IE buffer
 * @len: the input length
 * @attr: the attribute ID to find
 * @buf: output buffer, can be %NULL if the data isn't needed, e.g.
 *        if the function is only called to get the needed buffer size
 * @bufsize: size of the output buffer
 *
 * The function finds a given P2P attribute in the (vendor) IEs and
 * copies its contents to the given buffer.
 *
 * Return: A negative error code (-%EILSEQ or -%ENOENT) if the data is
 * malformed or the attribute can't be found (respectively), or the
 * length of the found attribute (which can be zero).
 */
int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
                          enum ieee80211_p2p_attr_id attr,
                          u8 *buf, unsigned int bufsize);

/**
 * ieee80211_ie_split_ric - split an IE buffer according to ordering (with RIC)
 * @ies: the IE buffer
 * @ielen: the length of the IE buffer
 * @ids: an array with element IDs that are allowed before
 *        the split. A WLAN_EID_EXTENSION value means that the next
 *        EID in the list is a sub-element of the EXTENSION IE.
 * @n_ids: the size of the element ID array
 * @after_ric: array IE types that come after the RIC element
 * @n_after_ric: size of the @after_ric array
 * @offset: offset where to start splitting in the buffer
 *
 * This function splits an IE buffer by updating the @offset
 * variable to point to the location where the buffer should be
 * split.
 *
 * It assumes that the given IE buffer is well-formed, this
 * has to be guaranteed by the caller!
 *
 * It also assumes that the IEs in the buffer are ordered
 * correctly, if not the result of using this function will not
 * be ordered correctly either, i.e. it does no reordering.
 *
 * The function returns the offset where the next part of the
 * buffer starts, which may be @ielen if the entire (remainder)
 * of the buffer should be used.
 */
size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
                              const u8 *ids, int n_ids,
                              const u8 *after_ric, int n_after_ric,
                              size_t offset);

/**
 * ieee80211_ie_split - split an IE buffer according to ordering
 * @ies: the IE buffer
 * @ielen: the length of the IE buffer
 * @ids: an array with element IDs that are allowed before
 *        the split. A WLAN_EID_EXTENSION value means that the next
 *        EID in the list is a sub-element of the EXTENSION IE.
 * @n_ids: the size of the element ID array
 * @offset: offset where to start splitting in the buffer
 *
 * This function splits an IE buffer by updating the @offset
 * variable to point to the location where the buffer should be
 * split.
 *
 * It assumes that the given IE buffer is well-formed, this
 * has to be guaranteed by the caller!
 *
 * It also assumes that the IEs in the buffer are ordered
 * correctly, if not the result of using this function will not
 * be ordered correctly either, i.e. it does no reordering.
 *
 * The function returns the offset where the next part of the
 * buffer starts, which may be @ielen if the entire (remainder)
 * of the buffer should be used.
 */
static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
                                        const u8 *ids, int n_ids, size_t offset)
{
        return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset);
}

/**
 * ieee80211_fragment_element - fragment the last element in skb
 * @skb: The skbuf that the element was added to
 * @len_pos: Pointer to length of the element to fragment
 * @frag_id: The element ID to use for fragments
 *
 * This function fragments all data after @len_pos, adding fragmentation
 * elements with the given ID as appropriate. The SKB will grow in size
 * accordingly.
 */
void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);

/**
 * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN
 * @wdev: the wireless device reporting the wakeup
 * @wakeup: the wakeup report
 * @gfp: allocation flags
 *
 * This function reports that the given device woke up. If it
 * caused the wakeup, report the reason(s), otherwise you may
 * pass %NULL as the @wakeup parameter to advertise that something
 * else caused the wakeup.
 */
void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
                                   struct cfg80211_wowlan_wakeup *wakeup,
                                   gfp_t gfp);

/**
 * cfg80211_crit_proto_stopped() - indicate critical protocol stopped by driver.
 *
 * @wdev: the wireless device for which critical protocol is stopped.
 * @gfp: allocation flags
 *
 * This function can be called by the driver to indicate it has reverted
 * operation back to normal. One reason could be that the duration given
 * by .crit_proto_start() has expired.
 */
void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp);

/**
 * ieee80211_get_num_supported_channels - get number of channels device has
 * @wiphy: the wiphy
 *
 * Return: the number of channels supported by the device.
 */
unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy);

/**
 * cfg80211_check_combinations - check interface combinations
 *
 * @wiphy: the wiphy
 * @params: the interface combinations parameter
 *
 * This function can be called by the driver to check whether a
 * combination of interfaces and their types are allowed according to
 * the interface combinations.
 */
int cfg80211_check_combinations(struct wiphy *wiphy,
                                struct iface_combination_params *params);

/**
 * cfg80211_iter_combinations - iterate over matching combinations
 *
 * @wiphy: the wiphy
 * @params: the interface combinations parameter
 * @iter: function to call for each matching combination
 * @data: pointer to pass to iter function
 *
 * This function can be called by the driver to check what possible
 * combinations it fits in at a given moment, e.g. for channel switching
 * purposes.
 */
int cfg80211_iter_combinations(struct wiphy *wiphy,
                               struct iface_combination_params *params,
                               void (*iter)(const struct ieee80211_iface_combination *c,
                                            void *data),
                               void *data);

/*
 * cfg80211_stop_iface - trigger interface disconnection
 *
 * @wiphy: the wiphy
 * @wdev: wireless device
 * @gfp: context flags
 *
 * Trigger interface to be stopped as if AP was stopped, IBSS/mesh left, STA
 * disconnected.
 *
 * Note: This doesn't need any locks and is asynchronous.
 */
void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
                         gfp_t gfp);

/**
 * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy
 * @wiphy: the wiphy to shut down
 *
 * This function shuts down all interfaces belonging to this wiphy by
 * calling dev_close() (and treating non-netdev interfaces as needed).
 * It shouldn't really be used unless there are some fatal device errors
 * that really can't be recovered in any other way.
 *
 * Callers must hold the RTNL and be able to deal with callbacks into
 * the driver while the function is running.
 */
void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);

/**
 * wiphy_ext_feature_set - set the extended feature flag
 *
 * @wiphy: the wiphy to modify.
 * @ftidx: extended feature bit index.
 *
 * The extended features are flagged in multiple bytes (see
 * &struct wiphy.@ext_features)
 */
static inline void wiphy_ext_feature_set(struct wiphy *wiphy,
                                         enum nl80211_ext_feature_index ftidx)
{
        u8 *ft_byte;

        ft_byte = &wiphy->ext_features[ftidx / 8];
        *ft_byte |= BIT(ftidx % 8);
}

/**
 * wiphy_ext_feature_isset - check the extended feature flag
 *
 * @wiphy: the wiphy to modify.
 * @ftidx: extended feature bit index.
 *
 * The extended features are flagged in multiple bytes (see
 * &struct wiphy.@ext_features)
 */
static inline bool
wiphy_ext_feature_isset(struct wiphy *wiphy,
                        enum nl80211_ext_feature_index ftidx)
{
        u8 ft_byte;

        ft_byte = wiphy->ext_features[ftidx / 8];
        return (ft_byte & BIT(ftidx % 8)) != 0;
}

/**
 * cfg80211_free_nan_func - free NAN function
 * @f: NAN function that should be freed
 *
 * Frees all the NAN function and all it's allocated members.
 */
void cfg80211_free_nan_func(struct cfg80211_nan_func *f);

/**
 * struct cfg80211_nan_match_params - NAN match parameters
 * @type: the type of the function that triggered a match. If it is
 *         %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber.
 *         If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery
 *         result.
 *         If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up.
 * @inst_id: the local instance id
 * @peer_inst_id: the instance id of the peer's function
 * @addr: the MAC address of the peer
 * @info_len: the length of the &info
 * @info: the Service Specific Info from the peer (if any)
 * @cookie: unique identifier of the corresponding function
 */
struct cfg80211_nan_match_params {
        enum nl80211_nan_function_type type;
        u8 inst_id;
        u8 peer_inst_id;
        const u8 *addr;
        u8 info_len;
        const u8 *info;
        u64 cookie;
};

/**
 * cfg80211_nan_match - report a match for a NAN function.
 * @wdev: the wireless device reporting the match
 * @match: match notification parameters
 * @gfp: allocation flags
 *
 * This function reports that the a NAN function had a match. This
 * can be a subscribe that had a match or a solicited publish that
 * was sent. It can also be a follow up that was received.
 */
void cfg80211_nan_match(struct wireless_dev *wdev,
                        struct cfg80211_nan_match_params *match, gfp_t gfp);

/**
 * cfg80211_nan_func_terminated - notify about NAN function termination.
 *
 * @wdev: the wireless device reporting the match
 * @inst_id: the local instance id
 * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*)
 * @cookie: unique NAN function identifier
 * @gfp: allocation flags
 *
 * This function reports that the a NAN function is terminated.
 */
void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
                                  u8 inst_id,
                                  enum nl80211_nan_func_term_reason reason,
                                  u64 cookie, gfp_t gfp);

/* ethtool helper */
void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);

/**
 * cfg80211_external_auth_request - userspace request for authentication
 * @netdev: network device
 * @params: External authentication parameters
 * @gfp: allocation flags
 * Returns: 0 on success, < 0 on error
 */
int cfg80211_external_auth_request(struct net_device *netdev,
                                   struct cfg80211_external_auth_params *params,
                                   gfp_t gfp);

/**
 * cfg80211_pmsr_report - report peer measurement result data
 * @wdev: the wireless device reporting the measurement
 * @req: the original measurement request
 * @result: the result data
 * @gfp: allocation flags
 */
void cfg80211_pmsr_report(struct wireless_dev *wdev,
                          struct cfg80211_pmsr_request *req,
                          struct cfg80211_pmsr_result *result,
                          gfp_t gfp);

/**
 * cfg80211_pmsr_complete - report peer measurement completed
 * @wdev: the wireless device reporting the measurement
 * @req: the original measurement request
 * @gfp: allocation flags
 *
 * Report that the entire measurement completed, after this
 * the request pointer will no longer be valid.
 */
void cfg80211_pmsr_complete(struct wireless_dev *wdev,
                            struct cfg80211_pmsr_request *req,
                            gfp_t gfp);

/**
 * cfg80211_iftype_allowed - check whether the interface can be allowed
 * @wiphy: the wiphy
 * @iftype: interface type
 * @is_4addr: use_4addr flag, must be '0' when check_swif is '1'
 * @check_swif: check iftype against software interfaces
 *
 * Check whether the interface is allowed to operate; additionally, this API
 * can be used to check iftype against the software interfaces when
 * check_swif is '1'.
 */
bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
                             bool is_4addr, u8 check_swif);


/**
 * cfg80211_assoc_comeback - notification of association that was
 * temporarily rejected with a comeback
 * @netdev: network device
 * @ap_addr: AP (MLD) address that rejected the association
 * @timeout: timeout interval value TUs.
 *
 * this function may sleep. the caller must hold the corresponding wdev's mutex.
 */
void cfg80211_assoc_comeback(struct net_device *netdev,
                             const u8 *ap_addr, u32 timeout);

/* Logging, debugging and troubleshooting/diagnostic helpers. */

/* wiphy_printk helpers, similar to dev_printk */

#define wiphy_printk(level, wiphy, format, args...)                \
        dev_printk(level, &(wiphy)->dev, format, ##args)
#define wiphy_emerg(wiphy, format, args...)                        \
        dev_emerg(&(wiphy)->dev, format, ##args)
#define wiphy_alert(wiphy, format, args...)                        \
        dev_alert(&(wiphy)->dev, format, ##args)
#define wiphy_crit(wiphy, format, args...)                        \
        dev_crit(&(wiphy)->dev, format, ##args)
#define wiphy_err(wiphy, format, args...)                        \
        dev_err(&(wiphy)->dev, format, ##args)
#define wiphy_warn(wiphy, format, args...)                        \
        dev_warn(&(wiphy)->dev, format, ##args)
#define wiphy_notice(wiphy, format, args...)                        \
        dev_notice(&(wiphy)->dev, format, ##args)
#define wiphy_info(wiphy, format, args...)                        \
        dev_info(&(wiphy)->dev, format, ##args)
#define wiphy_info_once(wiphy, format, args...)                        \
        dev_info_once(&(wiphy)->dev, format, ##args)

#define wiphy_err_ratelimited(wiphy, format, args...)                \
        dev_err_ratelimited(&(wiphy)->dev, format, ##args)
#define wiphy_warn_ratelimited(wiphy, format, args...)                \
        dev_warn_ratelimited(&(wiphy)->dev, format, ##args)

#define wiphy_debug(wiphy, format, args...)                        \
        wiphy_printk(KERN_DEBUG, wiphy, format, ##args)

#define wiphy_dbg(wiphy, format, args...)                        \
        dev_dbg(&(wiphy)->dev, format, ##args)

#if defined(VERBOSE_DEBUG)
#define wiphy_vdbg        wiphy_dbg
#else
#define wiphy_vdbg(wiphy, format, args...)                                \
({                                                                        \
        if (0)                                                                \
                wiphy_printk(KERN_DEBUG, wiphy, format, ##args);        \
        0;                                                                \
})
#endif

/*
 * wiphy_WARN() acts like wiphy_printk(), but with the key difference
 * of using a WARN/WARN_ON to get the message out, including the
 * file/line information and a backtrace.
 */
#define wiphy_WARN(wiphy, format, args...)                        \
        WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args);

/**
 * cfg80211_update_owe_info_event - Notify the peer's OWE info to user space
 * @netdev: network device
 * @owe_info: peer's owe info
 * @gfp: allocation flags
 */
void cfg80211_update_owe_info_event(struct net_device *netdev,
                                    struct cfg80211_update_owe_info *owe_info,
                                    gfp_t gfp);

/**
 * cfg80211_bss_flush - resets all the scan entries
 * @wiphy: the wiphy
 */
void cfg80211_bss_flush(struct wiphy *wiphy);

/**
 * cfg80211_bss_color_notify - notify about bss color event
 * @dev: network device
 * @cmd: the actual event we want to notify
 * @count: the number of TBTTs until the color change happens
 * @color_bitmap: representations of the colors that the local BSS is aware of
 */
int cfg80211_bss_color_notify(struct net_device *dev,
                              enum nl80211_commands cmd, u8 count,
                              u64 color_bitmap);

/**
 * cfg80211_obss_color_collision_notify - notify about bss color collision
 * @dev: network device
 * @color_bitmap: representations of the colors that the local BSS is aware of
 */
static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
                                                       u64 color_bitmap)
{
        return cfg80211_bss_color_notify(dev, NL80211_CMD_OBSS_COLOR_COLLISION,
                                         0, color_bitmap);
}

/**
 * cfg80211_color_change_started_notify - notify color change start
 * @dev: the device on which the color is switched
 * @count: the number of TBTTs until the color change happens
 *
 * Inform the userspace about the color change that has started.
 */
static inline int cfg80211_color_change_started_notify(struct net_device *dev,
                                                       u8 count)
{
        return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_STARTED,
                                         count, 0);
}

/**
 * cfg80211_color_change_aborted_notify - notify color change abort
 * @dev: the device on which the color is switched
 *
 * Inform the userspace about the color change that has aborted.
 */
static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
{
        return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_ABORTED,
                                         0, 0);
}

/**
 * cfg80211_color_change_notify - notify color change completion
 * @dev: the device on which the color was switched
 *
 * Inform the userspace about the color change that has completed.
 */
static inline int cfg80211_color_change_notify(struct net_device *dev)
{
        return cfg80211_bss_color_notify(dev,
                                         NL80211_CMD_COLOR_CHANGE_COMPLETED,
                                         0, 0);
}

/**
 * cfg80211_links_removed - Notify about removed STA MLD setup links.
 * @dev: network device.
 * @link_mask: BIT mask of removed STA MLD setup link IDs.
 *
 * Inform cfg80211 and the userspace about removed STA MLD setup links due to
 * AP MLD removing the corresponding affiliated APs with Multi-Link
 * reconfiguration. Note that it's not valid to remove all links, in this
 * case disconnect instead.
 * Also note that the wdev mutex must be held.
 */
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);

/**
 * cfg80211_schedule_channels_check - schedule regulatory check if needed
 * @wdev: the wireless device to check
 *
 * In case the device supports NO_IR or DFS relaxations, schedule regulatory
 * channels check, as previous concurrent operation conditions may not
 * hold anymore.
 */
void cfg80211_schedule_channels_check(struct wireless_dev *wdev);

#ifdef CONFIG_CFG80211_DEBUGFS
/**
 * wiphy_locked_debugfs_read - do a locked read in debugfs
 * @wiphy: the wiphy to use
 * @file: the file being read
 * @buf: the buffer to fill and then read from
 * @bufsize: size of the buffer
 * @userbuf: the user buffer to copy to
 * @count: read count
 * @ppos: read position
 * @handler: the read handler to call (under wiphy lock)
 * @data: additional data to pass to the read handler
 */
ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
                                  char *buf, size_t bufsize,
                                  char __user *userbuf, size_t count,
                                  loff_t *ppos,
                                  ssize_t (*handler)(struct wiphy *wiphy,
                                                     struct file *file,
                                                     char *buf,
                                                     size_t bufsize,
                                                     void *data),
                                  void *data);

/**
 * wiphy_locked_debugfs_write - do a locked write in debugfs
 * @wiphy: the wiphy to use
 * @file: the file being written to
 * @buf: the buffer to copy the user data to
 * @bufsize: size of the buffer
 * @userbuf: the user buffer to copy from
 * @count: read count
 * @handler: the write handler to call (under wiphy lock)
 * @data: additional data to pass to the write handler
 */
ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
                                   char *buf, size_t bufsize,
                                   const char __user *userbuf, size_t count,
                                   ssize_t (*handler)(struct wiphy *wiphy,
                                                      struct file *file,
                                                      char *buf,
                                                      size_t count,
                                                      void *data),
                                   void *data);
#endif

#endif /* __NET_CFG80211_H */





















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Events for filesystem locks
 *
 * Copyright 2013 Jeff Layton <jlayton@poochiereds.net>
 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM filelock

#if !defined(_TRACE_FILELOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_FILELOCK_H

#include <linux/tracepoint.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/kdev_t.h>

#define show_fl_flags(val)                                                \
        __print_flags(val, "|",                                         \
                { FL_POSIX,                "FL_POSIX" },                        \
                { FL_FLOCK,                "FL_FLOCK" },                        \
                { FL_DELEG,                "FL_DELEG" },                        \
                { FL_ACCESS,                "FL_ACCESS" },                        \
                { FL_EXISTS,                "FL_EXISTS" },                        \
                { FL_LEASE,                "FL_LEASE" },                        \
                { FL_CLOSE,                "FL_CLOSE" },                        \
                { FL_SLEEP,                "FL_SLEEP" },                        \
                { FL_DOWNGRADE_PENDING,        "FL_DOWNGRADE_PENDING" },        \
                { FL_UNLOCK_PENDING,        "FL_UNLOCK_PENDING" },                \
                { FL_OFDLCK,                "FL_OFDLCK" })

#define show_fl_type(val)                                \
        __print_symbolic(val,                                \
                        { F_RDLCK, "F_RDLCK" },                \
                        { F_WRLCK, "F_WRLCK" },                \
                        { F_UNLCK, "F_UNLCK" })

TRACE_EVENT(locks_get_lock_context,
        TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),

        TP_ARGS(inode, type, ctx),

        TP_STRUCT__entry(
                __field(unsigned long, i_ino)
                __field(dev_t, s_dev)
                __field(unsigned char, type)
                __field(struct file_lock_context *, ctx)
        ),

        TP_fast_assign(
                __entry->s_dev = inode->i_sb->s_dev;
                __entry->i_ino = inode->i_ino;
                __entry->type = type;
                __entry->ctx = ctx;
        ),

        TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
                  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                  __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
);

DECLARE_EVENT_CLASS(filelock_lock,
        TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),

        TP_ARGS(inode, fl, ret),

        TP_STRUCT__entry(
                __field(struct file_lock *, fl)
                __field(unsigned long, i_ino)
                __field(dev_t, s_dev)
                __field(struct file_lock_core *, blocker)
                __field(fl_owner_t, owner)
                __field(unsigned int, pid)
                __field(unsigned int, flags)
                __field(unsigned char, type)
                __field(loff_t, fl_start)
                __field(loff_t, fl_end)
                __field(int, ret)
        ),

        TP_fast_assign(
                __entry->fl = fl ? fl : NULL;
                __entry->s_dev = inode->i_sb->s_dev;
                __entry->i_ino = inode->i_ino;
                __entry->blocker = fl ? fl->c.flc_blocker : NULL;
                __entry->owner = fl ? fl->c.flc_owner : NULL;
                __entry->pid = fl ? fl->c.flc_pid : 0;
                __entry->flags = fl ? fl->c.flc_flags : 0;
                __entry->type = fl ? fl->c.flc_type : 0;
                __entry->fl_start = fl ? fl->fl_start : 0;
                __entry->fl_end = fl ? fl->fl_end : 0;
                __entry->ret = ret;
        ),

        TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
                __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                __entry->i_ino, __entry->blocker, __entry->owner,
                __entry->pid, show_fl_flags(__entry->flags),
                show_fl_type(__entry->type),
                __entry->fl_start, __entry->fl_end, __entry->ret)
);

DEFINE_EVENT(filelock_lock, posix_lock_inode,
                TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
                TP_ARGS(inode, fl, ret));

DEFINE_EVENT(filelock_lock, fcntl_setlk,
                TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
                TP_ARGS(inode, fl, ret));

DEFINE_EVENT(filelock_lock, locks_remove_posix,
                TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
                TP_ARGS(inode, fl, ret));

DEFINE_EVENT(filelock_lock, flock_lock_inode,
                TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
                TP_ARGS(inode, fl, ret));

DECLARE_EVENT_CLASS(filelock_lease,
        TP_PROTO(struct inode *inode, struct file_lease *fl),

        TP_ARGS(inode, fl),

        TP_STRUCT__entry(
                __field(struct file_lease *, fl)
                __field(unsigned long, i_ino)
                __field(dev_t, s_dev)
                __field(struct file_lock_core *, blocker)
                __field(fl_owner_t, owner)
                __field(unsigned int, flags)
                __field(unsigned char, type)
                __field(unsigned long, break_time)
                __field(unsigned long, downgrade_time)
        ),

        TP_fast_assign(
                __entry->fl = fl ? fl : NULL;
                __entry->s_dev = inode->i_sb->s_dev;
                __entry->i_ino = inode->i_ino;
                __entry->blocker = fl ? fl->c.flc_blocker : NULL;
                __entry->owner = fl ? fl->c.flc_owner : NULL;
                __entry->flags = fl ? fl->c.flc_flags : 0;
                __entry->type = fl ? fl->c.flc_type : 0;
                __entry->break_time = fl ? fl->fl_break_time : 0;
                __entry->downgrade_time = fl ? fl->fl_downgrade_time : 0;
        ),

        TP_printk("fl=%p dev=0x%x:0x%x ino=0x%lx fl_blocker=%p fl_owner=%p fl_flags=%s fl_type=%s fl_break_time=%lu fl_downgrade_time=%lu",
                __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                __entry->i_ino, __entry->blocker, __entry->owner,
                show_fl_flags(__entry->flags),
                show_fl_type(__entry->type),
                __entry->break_time, __entry->downgrade_time)
);

DEFINE_EVENT(filelock_lease, break_lease_noblock, TP_PROTO(struct inode *inode, struct file_lease *fl),
                TP_ARGS(inode, fl));

DEFINE_EVENT(filelock_lease, break_lease_block, TP_PROTO(struct inode *inode, struct file_lease *fl),
                TP_ARGS(inode, fl));

DEFINE_EVENT(filelock_lease, break_lease_unblock, TP_PROTO(struct inode *inode, struct file_lease *fl),
                TP_ARGS(inode, fl));

DEFINE_EVENT(filelock_lease, generic_delete_lease, TP_PROTO(struct inode *inode, struct file_lease *fl),
                TP_ARGS(inode, fl));

DEFINE_EVENT(filelock_lease, time_out_leases, TP_PROTO(struct inode *inode, struct file_lease *fl),
                TP_ARGS(inode, fl));

TRACE_EVENT(generic_add_lease,
        TP_PROTO(struct inode *inode, struct file_lease *fl),

        TP_ARGS(inode, fl),

        TP_STRUCT__entry(
                __field(unsigned long, i_ino)
                __field(int, wcount)
                __field(int, rcount)
                __field(int, icount)
                __field(dev_t, s_dev)
                __field(fl_owner_t, owner)
                __field(unsigned int, flags)
                __field(unsigned char, type)
        ),

        TP_fast_assign(
                __entry->s_dev = inode->i_sb->s_dev;
                __entry->i_ino = inode->i_ino;
                __entry->wcount = atomic_read(&inode->i_writecount);
                __entry->rcount = atomic_read(&inode->i_readcount);
                __entry->icount = atomic_read(&inode->i_count);
                __entry->owner = fl->c.flc_owner;
                __entry->flags = fl->c.flc_flags;
                __entry->type = fl->c.flc_type;
        ),

        TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d rcount=%d icount=%d fl_owner=%p fl_flags=%s fl_type=%s",
                MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
                __entry->i_ino, __entry->wcount, __entry->rcount,
                __entry->icount, __entry->owner,
                show_fl_flags(__entry->flags),
                show_fl_type(__entry->type))
);

TRACE_EVENT(leases_conflict,
        TP_PROTO(bool conflict, struct file_lease *lease, struct file_lease *breaker),

        TP_ARGS(conflict, lease, breaker),

        TP_STRUCT__entry(
                __field(void *, lease)
                __field(void *, breaker)
                __field(unsigned int, l_fl_flags)
                __field(unsigned int, b_fl_flags)
                __field(unsigned char, l_fl_type)
                __field(unsigned char, b_fl_type)
                __field(bool, conflict)
        ),

        TP_fast_assign(
                __entry->lease = lease;
                __entry->l_fl_flags = lease->c.flc_flags;
                __entry->l_fl_type = lease->c.flc_type;
                __entry->breaker = breaker;
                __entry->b_fl_flags = breaker->c.flc_flags;
                __entry->b_fl_type = breaker->c.flc_type;
                __entry->conflict = conflict;
        ),

        TP_printk("conflict %d: lease=%p fl_flags=%s fl_type=%s; breaker=%p fl_flags=%s fl_type=%s",
                __entry->conflict,
                __entry->lease,
                show_fl_flags(__entry->l_fl_flags),
                show_fl_type(__entry->l_fl_type),
                __entry->breaker,
                show_fl_flags(__entry->b_fl_flags),
                show_fl_type(__entry->b_fl_type))
);

#endif /* _TRACE_FILELOCK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>


















































  524 











  524 

  522 



















































































  522 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_UNWIND_H
#define _ASM_X86_UNWIND_H

#include <linux/sched.h>
#include <linux/ftrace.h>
#include <linux/rethook.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>

#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)

struct unwind_state {
        struct stack_info stack_info;
        unsigned long stack_mask;
        struct task_struct *task;
        int graph_idx;
#if defined(CONFIG_RETHOOK)
        struct llist_node *kr_cur;
#endif
        bool error;
#if defined(CONFIG_UNWINDER_ORC)
        bool signal, full_regs;
        unsigned long sp, bp, ip;
        struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
        bool got_irq;
        unsigned long *bp, *orig_sp, ip;
        /*
         * If non-NULL: The current frame is incomplete and doesn't contain a
         * valid BP. When looking for the next frame, use this instead of the
         * non-existent saved BP.
         */
        unsigned long *next_bp;
        struct pt_regs *regs;
#else
        unsigned long *sp;
#endif
};

void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame);
bool unwind_next_frame(struct unwind_state *state);
unsigned long unwind_get_return_address(struct unwind_state *state);
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);

static inline bool unwind_done(struct unwind_state *state)
{
        return state->stack_info.type == STACK_TYPE_UNKNOWN;
}

static inline bool unwind_error(struct unwind_state *state)
{
        return state->error;
}

static inline
void unwind_start(struct unwind_state *state, struct task_struct *task,
                  struct pt_regs *regs, unsigned long *first_frame)
{
        first_frame = first_frame ? : get_stack_pointer(task, regs);

        __unwind_start(state, task, regs, first_frame);
}

#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
 * If 'partial' returns true, only the iret frame registers are valid.
 */
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
                                                    bool *partial)
{
        if (unwind_done(state))
                return NULL;

        if (partial) {
#ifdef CONFIG_UNWINDER_ORC
                *partial = !state->full_regs;
#else
                *partial = false;
#endif
        }

        return state->regs;
}
#else
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
                                                    bool *partial)
{
        return NULL;
}
#endif

#ifdef CONFIG_UNWINDER_ORC
void unwind_init(void);
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
                        void *orc, size_t orc_size);
#else
static inline void unwind_init(void) {}
static inline
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
                        void *orc, size_t orc_size) {}
#endif

static inline
unsigned long unwind_recover_rethook(struct unwind_state *state,
                                     unsigned long addr, unsigned long *addr_p)
{
#ifdef CONFIG_RETHOOK
        if (is_rethook_trampoline(addr))
                return rethook_find_ret_addr(state->task, (unsigned long)addr_p,
                                             &state->kr_cur);
#endif
        return addr;
}

/* Recover the return address modified by rethook and ftrace_graph. */
static inline
unsigned long unwind_recover_ret_addr(struct unwind_state *state,
                                     unsigned long addr, unsigned long *addr_p)
{
        unsigned long ret;

        ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
                                    addr, addr_p);
        return unwind_recover_rethook(state, ret, addr_p);
}

/*
 * This disables KASAN checking when reading a value from another task's stack,
 * since the other task could be running on another CPU and could have poisoned
 * the stack in the meantime.
 */
#define READ_ONCE_TASK_STACK(task, x)                        \
({                                                        \
        unsigned long val;                                \
        if (task == current)                                \
                val = READ_ONCE(x);                        \
        else                                                \
                val = READ_ONCE_NOCHECK(x);                \
        val;                                                \
})

static inline bool task_on_another_cpu(struct task_struct *task)
{
#ifdef CONFIG_SMP
        return task != current && task->on_cpu;
#else
        return false;
#endif
}

#endif /* _ASM_X86_UNWIND_H */















































































   13 










   14 








   14 



   13 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// SPDX-License-Identifier: GPL-2.0
/*
 * USB Serial Converter Bus specific functions
 *
 * Copyright (C) 2002 Greg Kroah-Hartman (greg@kroah.com)
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/tty.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>

static int usb_serial_device_match(struct device *dev,
                                                struct device_driver *drv)
{
        const struct usb_serial_port *port = to_usb_serial_port(dev);
        struct usb_serial_driver *driver = to_usb_serial_driver(drv);

        /*
         * drivers are already assigned to ports in serial_probe so it's
         * a simple check here.
         */
        if (driver == port->serial->type)
                return 1;

        return 0;
}

static int usb_serial_device_probe(struct device *dev)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct usb_serial_driver *driver;
        struct device *tty_dev;
        int retval = 0;
        int minor;

        /* make sure suspend/resume doesn't race against port_probe */
        retval = usb_autopm_get_interface(port->serial->interface);
        if (retval)
                return retval;

        driver = port->serial->type;
        if (driver->port_probe) {
                retval = driver->port_probe(port);
                if (retval)
                        goto err_autopm_put;
        }

        minor = port->minor;
        tty_dev = tty_port_register_device(&port->port, usb_serial_tty_driver,
                                           minor, dev);
        if (IS_ERR(tty_dev)) {
                retval = PTR_ERR(tty_dev);
                goto err_port_remove;
        }

        usb_autopm_put_interface(port->serial->interface);

        dev_info(&port->serial->dev->dev,
                 "%s converter now attached to ttyUSB%d\n",
                 driver->description, minor);

        return 0;

err_port_remove:
        if (driver->port_remove)
                driver->port_remove(port);
err_autopm_put:
        usb_autopm_put_interface(port->serial->interface);

        return retval;
}

static void usb_serial_device_remove(struct device *dev)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct usb_serial_driver *driver;
        int minor;
        int autopm_err;

        /*
         * Make sure suspend/resume doesn't race against port_remove.
         *
         * Note that no further runtime PM callbacks will be made if
         * autopm_get fails.
         */
        autopm_err = usb_autopm_get_interface(port->serial->interface);

        minor = port->minor;
        tty_unregister_device(usb_serial_tty_driver, minor);

        driver = port->serial->type;
        if (driver->port_remove)
                driver->port_remove(port);

        dev_info(dev, "%s converter now disconnected from ttyUSB%d\n",
                 driver->description, minor);

        if (!autopm_err)
                usb_autopm_put_interface(port->serial->interface);
}

static ssize_t new_id_store(struct device_driver *driver,
                            const char *buf, size_t count)
{
        struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver);
        ssize_t retval = usb_store_new_id(&usb_drv->dynids, usb_drv->id_table,
                                         driver, buf, count);

        if (retval >= 0 && usb_drv->usb_driver != NULL)
                retval = usb_store_new_id(&usb_drv->usb_driver->dynids,
                                          usb_drv->usb_driver->id_table,
                                          &usb_drv->usb_driver->driver,
                                          buf, count);
        return retval;
}

static ssize_t new_id_show(struct device_driver *driver, char *buf)
{
        struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver);

        return usb_show_dynids(&usb_drv->dynids, buf);
}
static DRIVER_ATTR_RW(new_id);

static struct attribute *usb_serial_drv_attrs[] = {
        &driver_attr_new_id.attr,
        NULL,
};
ATTRIBUTE_GROUPS(usb_serial_drv);

static void free_dynids(struct usb_serial_driver *drv)
{
        struct usb_dynid *dynid, *n;

        spin_lock(&drv->dynids.lock);
        list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
                list_del(&dynid->node);
                kfree(dynid);
        }
        spin_unlock(&drv->dynids.lock);
}

const struct bus_type usb_serial_bus_type = {
        .name =                "usb-serial",
        .match =        usb_serial_device_match,
        .probe =        usb_serial_device_probe,
        .remove =        usb_serial_device_remove,
        .drv_groups =         usb_serial_drv_groups,
};

int usb_serial_bus_register(struct usb_serial_driver *driver)
{
        int retval;

        driver->driver.bus = &usb_serial_bus_type;
        spin_lock_init(&driver->dynids.lock);
        INIT_LIST_HEAD(&driver->dynids.list);

        retval = driver_register(&driver->driver);

        return retval;
}

void usb_serial_bus_deregister(struct usb_serial_driver *driver)
{
        free_dynids(driver);
        driver_unregister(&driver->driver);
}

















































































































   28 
    2 


   28 


































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Input Multitouch Library
 *
 * Copyright (c) 2008-2010 Henrik Rydberg
 */

#include <linux/input/mt.h>
#include <linux/export.h>
#include <linux/slab.h>
#include "input-core-private.h"

#define TRKID_SGN        ((TRKID_MAX + 1) >> 1)

static void copy_abs(struct input_dev *dev, unsigned int dst, unsigned int src)
{
        if (dev->absinfo && test_bit(src, dev->absbit)) {
                dev->absinfo[dst] = dev->absinfo[src];
                dev->absinfo[dst].fuzz = 0;
                __set_bit(dst, dev->absbit);
        }
}

/**
 * input_mt_init_slots() - initialize MT input slots
 * @dev: input device supporting MT events and finger tracking
 * @num_slots: number of slots used by the device
 * @flags: mt tasks to handle in core
 *
 * This function allocates all necessary memory for MT slot handling
 * in the input device, prepares the ABS_MT_SLOT and
 * ABS_MT_TRACKING_ID events for use and sets up appropriate buffers.
 * Depending on the flags set, it also performs pointer emulation and
 * frame synchronization.
 *
 * May be called repeatedly. Returns -EINVAL if attempting to
 * reinitialize with a different number of slots.
 */
int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots,
                        unsigned int flags)
{
        struct input_mt *mt = dev->mt;
        int i;

        if (!num_slots)
                return 0;
        if (mt)
                return mt->num_slots != num_slots ? -EINVAL : 0;

        mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL);
        if (!mt)
                goto err_mem;

        mt->num_slots = num_slots;
        mt->flags = flags;
        input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
        input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0);

        if (flags & (INPUT_MT_POINTER | INPUT_MT_DIRECT)) {
                __set_bit(EV_KEY, dev->evbit);
                __set_bit(BTN_TOUCH, dev->keybit);

                copy_abs(dev, ABS_X, ABS_MT_POSITION_X);
                copy_abs(dev, ABS_Y, ABS_MT_POSITION_Y);
                copy_abs(dev, ABS_PRESSURE, ABS_MT_PRESSURE);
        }
        if (flags & INPUT_MT_POINTER) {
                __set_bit(BTN_TOOL_FINGER, dev->keybit);
                __set_bit(BTN_TOOL_DOUBLETAP, dev->keybit);
                if (num_slots >= 3)
                        __set_bit(BTN_TOOL_TRIPLETAP, dev->keybit);
                if (num_slots >= 4)
                        __set_bit(BTN_TOOL_QUADTAP, dev->keybit);
                if (num_slots >= 5)
                        __set_bit(BTN_TOOL_QUINTTAP, dev->keybit);
                __set_bit(INPUT_PROP_POINTER, dev->propbit);
        }
        if (flags & INPUT_MT_DIRECT)
                __set_bit(INPUT_PROP_DIRECT, dev->propbit);
        if (flags & INPUT_MT_SEMI_MT)
                __set_bit(INPUT_PROP_SEMI_MT, dev->propbit);
        if (flags & INPUT_MT_TRACK) {
                unsigned int n2 = num_slots * num_slots;
                mt->red = kcalloc(n2, sizeof(*mt->red), GFP_KERNEL);
                if (!mt->red)
                        goto err_mem;
        }

        /* Mark slots as 'inactive' */
        for (i = 0; i < num_slots; i++)
                input_mt_set_value(&mt->slots[i], ABS_MT_TRACKING_ID, -1);

        /* Mark slots as 'unused' */
        mt->frame = 1;

        dev->mt = mt;
        return 0;
err_mem:
        kfree(mt);
        return -ENOMEM;
}
EXPORT_SYMBOL(input_mt_init_slots);

/**
 * input_mt_destroy_slots() - frees the MT slots of the input device
 * @dev: input device with allocated MT slots
 *
 * This function is only needed in error path as the input core will
 * automatically free the MT slots when the device is destroyed.
 */
void input_mt_destroy_slots(struct input_dev *dev)
{
        if (dev->mt) {
                kfree(dev->mt->red);
                kfree(dev->mt);
        }
        dev->mt = NULL;
}
EXPORT_SYMBOL(input_mt_destroy_slots);

/**
 * input_mt_report_slot_state() - report contact state
 * @dev: input device with allocated MT slots
 * @tool_type: the tool type to use in this slot
 * @active: true if contact is active, false otherwise
 *
 * Reports a contact via ABS_MT_TRACKING_ID, and optionally
 * ABS_MT_TOOL_TYPE. If active is true and the slot is currently
 * inactive, or if the tool type is changed, a new tracking id is
 * assigned to the slot. The tool type is only reported if the
 * corresponding absbit field is set.
 *
 * Returns true if contact is active.
 */
bool input_mt_report_slot_state(struct input_dev *dev,
                                unsigned int tool_type, bool active)
{
        struct input_mt *mt = dev->mt;
        struct input_mt_slot *slot;
        int id;

        if (!mt)
                return false;

        slot = &mt->slots[mt->slot];
        slot->frame = mt->frame;

        if (!active) {
                input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
                return false;
        }

        id = input_mt_get_value(slot, ABS_MT_TRACKING_ID);
        if (id < 0)
                id = input_mt_new_trkid(mt);

        input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id);
        input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type);

        return true;
}
EXPORT_SYMBOL(input_mt_report_slot_state);

/**
 * input_mt_report_finger_count() - report contact count
 * @dev: input device with allocated MT slots
 * @count: the number of contacts
 *
 * Reports the contact count via BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP,
 * BTN_TOOL_TRIPLETAP and BTN_TOOL_QUADTAP.
 *
 * The input core ensures only the KEY events already setup for
 * this device will produce output.
 */
void input_mt_report_finger_count(struct input_dev *dev, int count)
{
        input_event(dev, EV_KEY, BTN_TOOL_FINGER, count == 1);
        input_event(dev, EV_KEY, BTN_TOOL_DOUBLETAP, count == 2);
        input_event(dev, EV_KEY, BTN_TOOL_TRIPLETAP, count == 3);
        input_event(dev, EV_KEY, BTN_TOOL_QUADTAP, count == 4);
        input_event(dev, EV_KEY, BTN_TOOL_QUINTTAP, count == 5);
}
EXPORT_SYMBOL(input_mt_report_finger_count);

/**
 * input_mt_report_pointer_emulation() - common pointer emulation
 * @dev: input device with allocated MT slots
 * @use_count: report number of active contacts as finger count
 *
 * Performs legacy pointer emulation via BTN_TOUCH, ABS_X, ABS_Y and
 * ABS_PRESSURE. Touchpad finger count is emulated if use_count is true.
 *
 * The input core ensures only the KEY and ABS axes already setup for
 * this device will produce output.
 */
void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count)
{
        struct input_mt *mt = dev->mt;
        struct input_mt_slot *oldest;
        int oldid, count, i;

        if (!mt)
                return;

        oldest = NULL;
        oldid = mt->trkid;
        count = 0;

        for (i = 0; i < mt->num_slots; ++i) {
                struct input_mt_slot *ps = &mt->slots[i];
                int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);

                if (id < 0)
                        continue;
                if ((id - oldid) & TRKID_SGN) {
                        oldest = ps;
                        oldid = id;
                }
                count++;
        }

        input_event(dev, EV_KEY, BTN_TOUCH, count > 0);

        if (use_count) {
                if (count == 0 &&
                    !test_bit(ABS_MT_DISTANCE, dev->absbit) &&
                    test_bit(ABS_DISTANCE, dev->absbit) &&
                    input_abs_get_val(dev, ABS_DISTANCE) != 0) {
                        /*
                         * Force reporting BTN_TOOL_FINGER for devices that
                         * only report general hover (and not per-contact
                         * distance) when contact is in proximity but not
                         * on the surface.
                         */
                        count = 1;
                }

                input_mt_report_finger_count(dev, count);
        }

        if (oldest) {
                int x = input_mt_get_value(oldest, ABS_MT_POSITION_X);
                int y = input_mt_get_value(oldest, ABS_MT_POSITION_Y);

                input_event(dev, EV_ABS, ABS_X, x);
                input_event(dev, EV_ABS, ABS_Y, y);

                if (test_bit(ABS_MT_PRESSURE, dev->absbit)) {
                        int p = input_mt_get_value(oldest, ABS_MT_PRESSURE);
                        input_event(dev, EV_ABS, ABS_PRESSURE, p);
                }
        } else {
                if (test_bit(ABS_MT_PRESSURE, dev->absbit))
                        input_event(dev, EV_ABS, ABS_PRESSURE, 0);
        }
}
EXPORT_SYMBOL(input_mt_report_pointer_emulation);

static void __input_mt_drop_unused(struct input_dev *dev, struct input_mt *mt)
{
        int i;

        lockdep_assert_held(&dev->event_lock);

        for (i = 0; i < mt->num_slots; i++) {
                if (input_mt_is_active(&mt->slots[i]) &&
                    !input_mt_is_used(mt, &mt->slots[i])) {
                        input_handle_event(dev, EV_ABS, ABS_MT_SLOT, i);
                        input_handle_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
                }
        }
}

/**
 * input_mt_drop_unused() - Inactivate slots not seen in this frame
 * @dev: input device with allocated MT slots
 *
 * Lift all slots not seen since the last call to this function.
 */
void input_mt_drop_unused(struct input_dev *dev)
{
        struct input_mt *mt = dev->mt;

        if (mt) {
                unsigned long flags;

                spin_lock_irqsave(&dev->event_lock, flags);

                __input_mt_drop_unused(dev, mt);
                mt->frame++;

                spin_unlock_irqrestore(&dev->event_lock, flags);
        }
}
EXPORT_SYMBOL(input_mt_drop_unused);

/**
 * input_mt_release_slots() - Deactivate all slots
 * @dev: input device with allocated MT slots
 *
 * Lift all active slots.
 */
void input_mt_release_slots(struct input_dev *dev)
{
        struct input_mt *mt = dev->mt;

        lockdep_assert_held(&dev->event_lock);

        if (mt) {
                /* This will effectively mark all slots unused. */
                mt->frame++;

                __input_mt_drop_unused(dev, mt);

                if (test_bit(ABS_PRESSURE, dev->absbit))
                        input_handle_event(dev, EV_ABS, ABS_PRESSURE, 0);

                mt->frame++;
        }
}

/**
 * input_mt_sync_frame() - synchronize mt frame
 * @dev: input device with allocated MT slots
 *
 * Close the frame and prepare the internal state for a new one.
 * Depending on the flags, marks unused slots as inactive and performs
 * pointer emulation.
 */
void input_mt_sync_frame(struct input_dev *dev)
{
        struct input_mt *mt = dev->mt;
        bool use_count = false;

        if (!mt)
                return;

        if (mt->flags & INPUT_MT_DROP_UNUSED) {
                unsigned long flags;

                spin_lock_irqsave(&dev->event_lock, flags);
                __input_mt_drop_unused(dev, mt);
                spin_unlock_irqrestore(&dev->event_lock, flags);
        }

        if ((mt->flags & INPUT_MT_POINTER) && !(mt->flags & INPUT_MT_SEMI_MT))
                use_count = true;

        input_mt_report_pointer_emulation(dev, use_count);

        mt->frame++;
}
EXPORT_SYMBOL(input_mt_sync_frame);

static int adjust_dual(int *begin, int step, int *end, int eq, int mu)
{
        int f, *p, s, c;

        if (begin == end)
                return 0;

        f = *begin;
        p = begin + step;
        s = p == end ? f + 1 : *p;

        for (; p != end; p += step) {
                if (*p < f) {
                        s = f;
                        f = *p;
                } else if (*p < s) {
                        s = *p;
                }
        }

        c = (f + s + 1) / 2;
        if (c == 0 || (c > mu && (!eq || mu > 0)))
                return 0;
        /* Improve convergence for positive matrices by penalizing overcovers */
        if (s < 0 && mu <= 0)
                c *= 2;

        for (p = begin; p != end; p += step)
                *p -= c;

        return (c < s && s <= 0) || (f >= 0 && f < c);
}

static void find_reduced_matrix(int *w, int nr, int nc, int nrc, int mu)
{
        int i, k, sum;

        for (k = 0; k < nrc; k++) {
                for (i = 0; i < nr; i++)
                        adjust_dual(w + i, nr, w + i + nrc, nr <= nc, mu);
                sum = 0;
                for (i = 0; i < nrc; i += nr)
                        sum += adjust_dual(w + i, 1, w + i + nr, nc <= nr, mu);
                if (!sum)
                        break;
        }
}

static int input_mt_set_matrix(struct input_mt *mt,
                               const struct input_mt_pos *pos, int num_pos,
                               int mu)
{
        const struct input_mt_pos *p;
        struct input_mt_slot *s;
        int *w = mt->red;
        int x, y;

        for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
                if (!input_mt_is_active(s))
                        continue;
                x = input_mt_get_value(s, ABS_MT_POSITION_X);
                y = input_mt_get_value(s, ABS_MT_POSITION_Y);
                for (p = pos; p != pos + num_pos; p++) {
                        int dx = x - p->x, dy = y - p->y;
                        *w++ = dx * dx + dy * dy - mu;
                }
        }

        return w - mt->red;
}

static void input_mt_set_slots(struct input_mt *mt,
                               int *slots, int num_pos)
{
        struct input_mt_slot *s;
        int *w = mt->red, j;

        for (j = 0; j != num_pos; j++)
                slots[j] = -1;

        for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
                if (!input_mt_is_active(s))
                        continue;

                for (j = 0; j != num_pos; j++) {
                        if (w[j] < 0) {
                                slots[j] = s - mt->slots;
                                break;
                        }
                }

                w += num_pos;
        }

        for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
                if (input_mt_is_active(s))
                        continue;

                for (j = 0; j != num_pos; j++) {
                        if (slots[j] < 0) {
                                slots[j] = s - mt->slots;
                                break;
                        }
                }
        }
}

/**
 * input_mt_assign_slots() - perform a best-match assignment
 * @dev: input device with allocated MT slots
 * @slots: the slot assignment to be filled
 * @pos: the position array to match
 * @num_pos: number of positions
 * @dmax: maximum ABS_MT_POSITION displacement (zero for infinite)
 *
 * Performs a best match against the current contacts and returns
 * the slot assignment list. New contacts are assigned to unused
 * slots.
 *
 * The assignments are balanced so that all coordinate displacements are
 * below the euclidian distance dmax. If no such assignment can be found,
 * some contacts are assigned to unused slots.
 *
 * Returns zero on success, or negative error in case of failure.
 */
int input_mt_assign_slots(struct input_dev *dev, int *slots,
                          const struct input_mt_pos *pos, int num_pos,
                          int dmax)
{
        struct input_mt *mt = dev->mt;
        int mu = 2 * dmax * dmax;
        int nrc;

        if (!mt || !mt->red)
                return -ENXIO;
        if (num_pos > mt->num_slots)
                return -EINVAL;
        if (num_pos < 1)
                return 0;

        nrc = input_mt_set_matrix(mt, pos, num_pos, mu);
        find_reduced_matrix(mt->red, num_pos, nrc / num_pos, nrc, mu);
        input_mt_set_slots(mt, slots, num_pos);

        return 0;
}
EXPORT_SYMBOL(input_mt_assign_slots);

/**
 * input_mt_get_slot_by_key() - return slot matching key
 * @dev: input device with allocated MT slots
 * @key: the key of the sought slot
 *
 * Returns the slot of the given key, if it exists, otherwise
 * set the key on the first unused slot and return.
 *
 * If no available slot can be found, -1 is returned.
 * Note that for this function to work properly, input_mt_sync_frame() has
 * to be called at each frame.
 */
int input_mt_get_slot_by_key(struct input_dev *dev, int key)
{
        struct input_mt *mt = dev->mt;
        struct input_mt_slot *s;

        if (!mt)
                return -1;

        for (s = mt->slots; s != mt->slots + mt->num_slots; s++)
                if (input_mt_is_active(s) && s->key == key)
                        return s - mt->slots;

        for (s = mt->slots; s != mt->slots + mt->num_slots; s++)
                if (!input_mt_is_active(s) && !input_mt_is_used(mt, s)) {
                        s->key = key;
                        return s - mt->slots;
                }

        return -1;
}
EXPORT_SYMBOL(input_mt_get_slot_by_key);













































































































































































































































































































































































































































































































































































































































































































































































  231 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * sysfs.h - definitions for the device driver filesystem
 *
 * Copyright (c) 2001,2002 Patrick Mochel
 * Copyright (c) 2004 Silicon Graphics, Inc.
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * Please see Documentation/filesystems/sysfs.rst for more information.
 */

#ifndef _SYSFS_H_
#define _SYSFS_H_

#include <linux/kernfs.h>
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/kobject_ns.h>
#include <linux/stat.h>
#include <linux/atomic.h>

struct kobject;
struct module;
struct bin_attribute;
enum kobj_ns_type;

struct attribute {
        const char                *name;
        umode_t                        mode;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        bool                        ignore_lockdep:1;
        struct lock_class_key        *key;
        struct lock_class_key        skey;
#endif
};

/**
 *        sysfs_attr_init - initialize a dynamically allocated sysfs attribute
 *        @attr: struct attribute to initialize
 *
 *        Initialize a dynamically allocated struct attribute so we can
 *        make lockdep happy.  This is a new requirement for attributes
 *        and initially this is only needed when lockdep is enabled.
 *        Lockdep gives a nice error when your attribute is added to
 *        sysfs if you don't have this.
 */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define sysfs_attr_init(attr)                                \
do {                                                        \
        static struct lock_class_key __key;                \
                                                        \
        (attr)->key = &__key;                                \
} while (0)
#else
#define sysfs_attr_init(attr) do {} while (0)
#endif

/**
 * struct attribute_group - data structure used to declare an attribute group.
 * @name:        Optional: Attribute group name
 *                If specified, the attribute group will be created in a
 *                new subdirectory with this name. Additionally when a
 *                group is named, @is_visible and @is_bin_visible may
 *                return SYSFS_GROUP_INVISIBLE to control visibility of
 *                the directory itself.
 * @is_visible:        Optional: Function to return permissions associated with an
 *                attribute of the group. Will be called repeatedly for
 *                each non-binary attribute in the group. Only read/write
 *                permissions as well as SYSFS_PREALLOC are accepted. Must
 *                return 0 if an attribute is not visible. The returned
 *                value will replace static permissions defined in struct
 *                attribute. Use SYSFS_GROUP_VISIBLE() when assigning this
 *                callback to specify separate _group_visible() and
 *                _attr_visible() handlers.
 * @is_bin_visible:
 *                Optional: Function to return permissions associated with a
 *                binary attribute of the group. Will be called repeatedly
 *                for each binary attribute in the group. Only read/write
 *                permissions as well as SYSFS_PREALLOC (and the
 *                visibility flags for named groups) are accepted. Must
 *                return 0 if a binary attribute is not visible. The
 *                returned value will replace static permissions defined
 *                in struct bin_attribute. If @is_visible is not set, Use
 *                SYSFS_GROUP_VISIBLE() when assigning this callback to
 *                specify separate _group_visible() and _attr_visible()
 *                handlers.
 * @attrs:        Pointer to NULL terminated list of attributes.
 * @bin_attrs:        Pointer to NULL terminated list of binary attributes.
 *                Either attrs or bin_attrs or both must be provided.
 */
struct attribute_group {
        const char                *name;
        umode_t                        (*is_visible)(struct kobject *,
                                              struct attribute *, int);
        umode_t                        (*is_bin_visible)(struct kobject *,
                                                  struct bin_attribute *, int);
        struct attribute        **attrs;
        struct bin_attribute        **bin_attrs;
};

#define SYSFS_PREALLOC                010000
#define SYSFS_GROUP_INVISIBLE        020000

/*
 * DEFINE_SYSFS_GROUP_VISIBLE(name):
 *        A helper macro to pair with the assignment of ".is_visible =
 *        SYSFS_GROUP_VISIBLE(name)", that arranges for the directory
 *        associated with a named attribute_group to optionally be hidden.
 *        This allows for static declaration of attribute_groups, and the
 *        simplification of attribute visibility lifetime that implies,
 *        without polluting sysfs with empty attribute directories.
 * Ex.
 *
 * static umode_t example_attr_visible(struct kobject *kobj,
 *                                   struct attribute *attr, int n)
 * {
 *       if (example_attr_condition)
 *               return 0;
 *       else if (ro_attr_condition)
 *               return 0444;
 *       return a->mode;
 * }
 *
 * static bool example_group_visible(struct kobject *kobj)
 * {
 *       if (example_group_condition)
 *               return false;
 *       return true;
 * }
 *
 * DEFINE_SYSFS_GROUP_VISIBLE(example);
 *
 * static struct attribute_group example_group = {
 *       .name = "example",
 *       .is_visible = SYSFS_GROUP_VISIBLE(example),
 *       .attrs = &example_attrs,
 * };
 *
 * Note that it expects <name>_attr_visible and <name>_group_visible to
 * be defined. For cases where individual attributes do not need
 * separate visibility consideration, only entire group visibility at
 * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE().
 */
#define DEFINE_SYSFS_GROUP_VISIBLE(name)                             \
        static inline umode_t sysfs_group_visible_##name(            \
                struct kobject *kobj, struct attribute *attr, int n) \
        {                                                            \
                if (n == 0 && !name##_group_visible(kobj))           \
                        return SYSFS_GROUP_INVISIBLE;                \
                return name##_attr_visible(kobj, attr, n);           \
        }

/*
 * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name):
 *        A helper macro to pair with SYSFS_GROUP_VISIBLE() that like
 *        DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does
 *        not require the implementation of a per-attribute visibility
 *        callback.
 * Ex.
 *
 * static bool example_group_visible(struct kobject *kobj)
 * {
 *       if (example_group_condition)
 *               return false;
 *       return true;
 * }
 *
 * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example);
 *
 * static struct attribute_group example_group = {
 *       .name = "example",
 *       .is_visible = SYSFS_GROUP_VISIBLE(example),
 *       .attrs = &example_attrs,
 * };
 */
#define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name)                   \
        static inline umode_t sysfs_group_visible_##name(         \
                struct kobject *kobj, struct attribute *a, int n) \
        {                                                         \
                if (n == 0 && !name##_group_visible(kobj))        \
                        return SYSFS_GROUP_INVISIBLE;             \
                return a->mode;                                   \
        }

/*
 * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary
 * attributes. If an attribute_group defines both text and binary
 * attributes, the group visibility is determined by the function
 * specified to is_visible() not is_bin_visible()
 */
#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name)                             \
        static inline umode_t sysfs_group_visible_##name(                \
                struct kobject *kobj, struct bin_attribute *attr, int n) \
        {                                                                \
                if (n == 0 && !name##_group_visible(kobj))               \
                        return SYSFS_GROUP_INVISIBLE;                    \
                return name##_attr_visible(kobj, attr, n);               \
        }

#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name)                   \
        static inline umode_t sysfs_group_visible_##name(             \
                struct kobject *kobj, struct bin_attribute *a, int n) \
        {                                                             \
                if (n == 0 && !name##_group_visible(kobj))            \
                        return SYSFS_GROUP_INVISIBLE;                 \
                return a->mode;                                       \
        }

#define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn

/*
 * Use these macros to make defining attributes easier.
 * See include/linux/device.h for examples..
 */

#define __ATTR(_name, _mode, _show, _store) {                                \
        .attr = {.name = __stringify(_name),                                \
                 .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },                \
        .show        = _show,                                                \
        .store        = _store,                                                \
}

#define __ATTR_PREALLOC(_name, _mode, _show, _store) {                        \
        .attr = {.name = __stringify(_name),                                \
                 .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\
        .show        = _show,                                                \
        .store        = _store,                                                \
}

#define __ATTR_RO(_name) {                                                \
        .attr        = { .name = __stringify(_name), .mode = 0444 },                \
        .show        = _name##_show,                                                \
}

#define __ATTR_RO_MODE(_name, _mode) {                                        \
        .attr        = { .name = __stringify(_name),                                \
                    .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },                \
        .show        = _name##_show,                                                \
}

#define __ATTR_RW_MODE(_name, _mode) {                                        \
        .attr        = { .name = __stringify(_name),                                \
                    .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },                \
        .show        = _name##_show,                                                \
        .store        = _name##_store,                                        \
}

#define __ATTR_WO(_name) {                                                \
        .attr        = { .name = __stringify(_name), .mode = 0200 },                \
        .store        = _name##_store,                                        \
}

#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store)

#define __ATTR_NULL { .attr = { .name = NULL } }

#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) {        \
        .attr = {.name = __stringify(_name), .mode = _mode,        \
                        .ignore_lockdep = true },                \
        .show                = _show,                                \
        .store                = _store,                                \
}
#else
#define __ATTR_IGNORE_LOCKDEP        __ATTR
#endif

#define __ATTRIBUTE_GROUPS(_name)                                \
static const struct attribute_group *_name##_groups[] = {        \
        &_name##_group,                                                \
        NULL,                                                        \
}

#define ATTRIBUTE_GROUPS(_name)                                        \
static const struct attribute_group _name##_group = {                \
        .attrs = _name##_attrs,                                        \
};                                                                \
__ATTRIBUTE_GROUPS(_name)

#define BIN_ATTRIBUTE_GROUPS(_name)                                \
static const struct attribute_group _name##_group = {                \
        .bin_attrs = _name##_attrs,                                \
};                                                                \
__ATTRIBUTE_GROUPS(_name)

struct file;
struct vm_area_struct;
struct address_space;

struct bin_attribute {
        struct attribute        attr;
        size_t                        size;
        void                        *private;
        struct address_space *(*f_mapping)(void);
        ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
                        char *, loff_t, size_t);
        ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *,
                         char *, loff_t, size_t);
        loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *,
                         loff_t, int);
        int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
                    struct vm_area_struct *vma);
};

/**
 *        sysfs_bin_attr_init - initialize a dynamically allocated bin_attribute
 *        @attr: struct bin_attribute to initialize
 *
 *        Initialize a dynamically allocated struct bin_attribute so we
 *        can make lockdep happy.  This is a new requirement for
 *        attributes and initially this is only needed when lockdep is
 *        enabled.  Lockdep gives a nice error when your attribute is
 *        added to sysfs if you don't have this.
 */
#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)

/* macros to create static binary attributes easier */
#define __BIN_ATTR(_name, _mode, _read, _write, _size) {                \
        .attr = { .name = __stringify(_name), .mode = _mode },                \
        .read        = _read,                                                \
        .write        = _write,                                                \
        .size        = _size,                                                \
}

#define __BIN_ATTR_RO(_name, _size) {                                        \
        .attr        = { .name = __stringify(_name), .mode = 0444 },                \
        .read        = _name##_read,                                                \
        .size        = _size,                                                \
}

#define __BIN_ATTR_WO(_name, _size) {                                        \
        .attr        = { .name = __stringify(_name), .mode = 0200 },                \
        .write        = _name##_write,                                        \
        .size        = _size,                                                \
}

#define __BIN_ATTR_RW(_name, _size)                                        \
        __BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size)

#define __BIN_ATTR_NULL __ATTR_NULL

#define BIN_ATTR(_name, _mode, _read, _write, _size)                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read,        \
                                        _write, _size)

#define BIN_ATTR_RO(_name, _size)                                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size)

#define BIN_ATTR_WO(_name, _size)                                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)

#define BIN_ATTR_RW(_name, _size)                                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)


#define __BIN_ATTR_ADMIN_RO(_name, _size) {                                        \
        .attr        = { .name = __stringify(_name), .mode = 0400 },                \
        .read        = _name##_read,                                                \
        .size        = _size,                                                \
}

#define __BIN_ATTR_ADMIN_RW(_name, _size)                                        \
        __BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size)

#define BIN_ATTR_ADMIN_RO(_name, _size)                                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size)

#define BIN_ATTR_ADMIN_RW(_name, _size)                                        \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size)

struct sysfs_ops {
        ssize_t        (*show)(struct kobject *, struct attribute *, char *);
        ssize_t        (*store)(struct kobject *, struct attribute *, const char *, size_t);
};

#ifdef CONFIG_SYSFS

int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns);
void sysfs_remove_dir(struct kobject *kobj);
int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
                                     const void *new_ns);
int __must_check sysfs_move_dir_ns(struct kobject *kobj,
                                   struct kobject *new_parent_kobj,
                                   const void *new_ns);
int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
                                          const char *name);
void sysfs_remove_mount_point(struct kobject *parent_kobj,
                              const char *name);

int __must_check sysfs_create_file_ns(struct kobject *kobj,
                                      const struct attribute *attr,
                                      const void *ns);
int __must_check sysfs_create_files(struct kobject *kobj,
                                   const struct attribute * const *attr);
int __must_check sysfs_chmod_file(struct kobject *kobj,
                                  const struct attribute *attr, umode_t mode);
struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
                                                  const struct attribute *attr);
void sysfs_unbreak_active_protection(struct kernfs_node *kn);
void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
                          const void *ns);
bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr);

int __must_check sysfs_create_bin_file(struct kobject *kobj,
                                       const struct bin_attribute *attr);
void sysfs_remove_bin_file(struct kobject *kobj,
                           const struct bin_attribute *attr);

int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target,
                                   const char *name);
int __must_check sysfs_create_link_nowarn(struct kobject *kobj,
                                          struct kobject *target,
                                          const char *name);
void sysfs_remove_link(struct kobject *kobj, const char *name);

int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target,
                         const char *old_name, const char *new_name,
                         const void *new_ns);

void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
                        const char *name);

int __must_check sysfs_create_group(struct kobject *kobj,
                                    const struct attribute_group *grp);
int __must_check sysfs_create_groups(struct kobject *kobj,
                                     const struct attribute_group **groups);
int __must_check sysfs_update_groups(struct kobject *kobj,
                                     const struct attribute_group **groups);
int sysfs_update_group(struct kobject *kobj,
                       const struct attribute_group *grp);
void sysfs_remove_group(struct kobject *kobj,
                        const struct attribute_group *grp);
void sysfs_remove_groups(struct kobject *kobj,
                         const struct attribute_group **groups);
int sysfs_add_file_to_group(struct kobject *kobj,
                        const struct attribute *attr, const char *group);
void sysfs_remove_file_from_group(struct kobject *kobj,
                        const struct attribute *attr, const char *group);
int sysfs_merge_group(struct kobject *kobj,
                       const struct attribute_group *grp);
void sysfs_unmerge_group(struct kobject *kobj,
                       const struct attribute_group *grp);
int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
                            struct kobject *target, const char *link_name);
void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
                                  const char *link_name);
int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
                                         struct kobject *target_kobj,
                                         const char *target_name,
                                         const char *symlink_name);

void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);

int __must_check sysfs_init(void);

static inline void sysfs_enable_ns(struct kernfs_node *kn)
{
        return kernfs_enable_ns(kn);
}

int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid,
                            kgid_t kgid);
int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid);
int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ,
                            const char *name, kuid_t kuid, kgid_t kgid);
int sysfs_groups_change_owner(struct kobject *kobj,
                              const struct attribute_group **groups,
                              kuid_t kuid, kgid_t kgid);
int sysfs_group_change_owner(struct kobject *kobj,
                             const struct attribute_group *groups, kuid_t kuid,
                             kgid_t kgid);
__printf(2, 3)
int sysfs_emit(char *buf, const char *fmt, ...);
__printf(3, 4)
int sysfs_emit_at(char *buf, int at, const char *fmt, ...);

#else /* CONFIG_SYSFS */

static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
{
        return 0;
}

static inline void sysfs_remove_dir(struct kobject *kobj)
{
}

static inline int sysfs_rename_dir_ns(struct kobject *kobj,
                                      const char *new_name, const void *new_ns)
{
        return 0;
}

static inline int sysfs_move_dir_ns(struct kobject *kobj,
                                    struct kobject *new_parent_kobj,
                                    const void *new_ns)
{
        return 0;
}

static inline int sysfs_create_mount_point(struct kobject *parent_kobj,
                                           const char *name)
{
        return 0;
}

static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
                                            const char *name)
{
}

static inline int sysfs_create_file_ns(struct kobject *kobj,
                                       const struct attribute *attr,
                                       const void *ns)
{
        return 0;
}

static inline int sysfs_create_files(struct kobject *kobj,
                                    const struct attribute * const *attr)
{
        return 0;
}

static inline int sysfs_chmod_file(struct kobject *kobj,
                                   const struct attribute *attr, umode_t mode)
{
        return 0;
}

static inline struct kernfs_node *
sysfs_break_active_protection(struct kobject *kobj,
                              const struct attribute *attr)
{
        return NULL;
}

static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
{
}

static inline void sysfs_remove_file_ns(struct kobject *kobj,
                                        const struct attribute *attr,
                                        const void *ns)
{
}

static inline bool sysfs_remove_file_self(struct kobject *kobj,
                                          const struct attribute *attr)
{
        return false;
}

static inline void sysfs_remove_files(struct kobject *kobj,
                                     const struct attribute * const *attr)
{
}

static inline int sysfs_create_bin_file(struct kobject *kobj,
                                        const struct bin_attribute *attr)
{
        return 0;
}

static inline void sysfs_remove_bin_file(struct kobject *kobj,
                                         const struct bin_attribute *attr)
{
}

static inline int sysfs_create_link(struct kobject *kobj,
                                    struct kobject *target, const char *name)
{
        return 0;
}

static inline int sysfs_create_link_nowarn(struct kobject *kobj,
                                           struct kobject *target,
                                           const char *name)
{
        return 0;
}

static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
{
}

static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t,
                                       const char *old_name,
                                       const char *new_name, const void *ns)
{
        return 0;
}

static inline void sysfs_delete_link(struct kobject *k, struct kobject *t,
                                     const char *name)
{
}

static inline int sysfs_create_group(struct kobject *kobj,
                                     const struct attribute_group *grp)
{
        return 0;
}

static inline int sysfs_create_groups(struct kobject *kobj,
                                      const struct attribute_group **groups)
{
        return 0;
}

static inline int sysfs_update_groups(struct kobject *kobj,
                                      const struct attribute_group **groups)
{
        return 0;
}

static inline int sysfs_update_group(struct kobject *kobj,
                                const struct attribute_group *grp)
{
        return 0;
}

static inline void sysfs_remove_group(struct kobject *kobj,
                                      const struct attribute_group *grp)
{
}

static inline void sysfs_remove_groups(struct kobject *kobj,
                                       const struct attribute_group **groups)
{
}

static inline int sysfs_add_file_to_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
{
        return 0;
}

static inline void sysfs_remove_file_from_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
{
}

static inline int sysfs_merge_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
        return 0;
}

static inline void sysfs_unmerge_group(struct kobject *kobj,
                       const struct attribute_group *grp)
{
}

static inline int sysfs_add_link_to_group(struct kobject *kobj,
                const char *group_name, struct kobject *target,
                const char *link_name)
{
        return 0;
}

static inline void sysfs_remove_link_from_group(struct kobject *kobj,
                const char *group_name, const char *link_name)
{
}

static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
                                                       struct kobject *target_kobj,
                                                       const char *target_name,
                                                       const char *symlink_name)
{
        return 0;
}

static inline void sysfs_notify(struct kobject *kobj, const char *dir,
                                const char *attr)
{
}

static inline int __must_check sysfs_init(void)
{
        return 0;
}

static inline void sysfs_enable_ns(struct kernfs_node *kn)
{
}

static inline int sysfs_file_change_owner(struct kobject *kobj,
                                          const char *name, kuid_t kuid,
                                          kgid_t kgid)
{
        return 0;
}

static inline int sysfs_link_change_owner(struct kobject *kobj,
                                          struct kobject *targ,
                                          const char *name, kuid_t kuid,
                                          kgid_t kgid)
{
        return 0;
}

static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid)
{
        return 0;
}

static inline int sysfs_groups_change_owner(struct kobject *kobj,
                          const struct attribute_group **groups,
                          kuid_t kuid, kgid_t kgid)
{
        return 0;
}

static inline int sysfs_group_change_owner(struct kobject *kobj,
                                           const struct attribute_group *groups,
                                           kuid_t kuid, kgid_t kgid)
{
        return 0;
}

__printf(2, 3)
static inline int sysfs_emit(char *buf, const char *fmt, ...)
{
        return 0;
}

__printf(3, 4)
static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
{
        return 0;
}
#endif /* CONFIG_SYSFS */

static inline int __must_check sysfs_create_file(struct kobject *kobj,
                                                 const struct attribute *attr)
{
        return sysfs_create_file_ns(kobj, attr, NULL);
}

static inline void sysfs_remove_file(struct kobject *kobj,
                                     const struct attribute *attr)
{
        sysfs_remove_file_ns(kobj, attr, NULL);
}

static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
                                    const char *old_name, const char *new_name)
{
        return sysfs_rename_link_ns(kobj, target, old_name, new_name, NULL);
}

static inline void sysfs_notify_dirent(struct kernfs_node *kn)
{
        kernfs_notify(kn);
}

static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent,
                                                   const char *name)
{
        return kernfs_find_and_get(parent, name);
}

static inline struct kernfs_node *sysfs_get(struct kernfs_node *kn)
{
        kernfs_get(kn);
        return kn;
}

static inline void sysfs_put(struct kernfs_node *kn)
{
        kernfs_put(kn);
}

#endif /* _SYSFS_H_ */






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




















































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312
9313
9314
9315
9316
9317
9318
9319
9320
9321
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384
9385
9386
9387
9388
9389
9390
9391
9392
9393
9394
9395
9396
9397
9398
9399
9400
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410
9411
9412
9413
9414
9415
9416
9417
9418
9419
9420
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461
9462
9463
9464
9465
9466
9467
9468
9469
9470
9471
9472
9473
9474
9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
9530
9531
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544
9545
9546
9547
9548
9549
9550
9551
9552
9553
9554
9555
9556
9557
9558
9559
9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600
9601
9602
9603
9604
9605
9606
9607
9608
9609
9610
9611
9612
9613
9614
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652
9653
9654
9655
9656
9657
9658
9659
9660
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679
9680
9681
9682
9683
9684
9685
9686
9687
9688
9689
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699
9700
9701
9702
9703
9704
9705
9706
9707
9708
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721
9722
9723
9724
9725
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
9767
9768
9769
9770
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868
9869
9870
9871
9872
9873
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
9893
9894
9895
9896
9897
9898
9899
9900
9901
9902
9903
9904
9905
9906
9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938
9939
9940
9941
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952
9953
9954
9955
9956
9957
9958
9959
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989
9990
9991
9992
9993
9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
10028
10029
10030
10031
10032
10033
10034
10035
10036
10037
10038
10039
10040
10041
10042
10043
10044
10045
10046
10047
10048
10049
10050
10051
10052
10053
10054
10055
10056
10057
10058
10059
10060
10061
10062
10063
10064
10065
10066
10067
10068
10069
10070
10071
10072
10073
10074
10075
10076
10077
10078
10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10193
10194
10195
10196
10197
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
10250
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260
10261
10262
10263
10264
10265
10266
10267
10268
10269
10270
10271
10272
10273
10274
10275
10276
10277
10278
10279
10280
10281
10282
10283
10284
10285
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2005-2011 Atheros Communications Inc.
 * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
 * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include "mac.h"

#include <net/cfg80211.h>
#include <net/mac80211.h>
#include <linux/etherdevice.h>
#include <linux/acpi.h>
#include <linux/of.h>
#include <linux/bitfield.h>

#include "hif.h"
#include "core.h"
#include "debug.h"
#include "wmi.h"
#include "htt.h"
#include "txrx.h"
#include "testmode.h"
#include "wmi-tlv.h"
#include "wmi-ops.h"
#include "wow.h"

/*********/
/* Rates */
/*********/

static struct ieee80211_rate ath10k_rates[] = {
        { .bitrate = 10,
          .hw_value = ATH10K_HW_RATE_CCK_LP_1M },
        { .bitrate = 20,
          .hw_value = ATH10K_HW_RATE_CCK_LP_2M,
          .hw_value_short = ATH10K_HW_RATE_CCK_SP_2M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },
        { .bitrate = 55,
          .hw_value = ATH10K_HW_RATE_CCK_LP_5_5M,
          .hw_value_short = ATH10K_HW_RATE_CCK_SP_5_5M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },
        { .bitrate = 110,
          .hw_value = ATH10K_HW_RATE_CCK_LP_11M,
          .hw_value_short = ATH10K_HW_RATE_CCK_SP_11M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },

        { .bitrate = 60, .hw_value = ATH10K_HW_RATE_OFDM_6M },
        { .bitrate = 90, .hw_value = ATH10K_HW_RATE_OFDM_9M },
        { .bitrate = 120, .hw_value = ATH10K_HW_RATE_OFDM_12M },
        { .bitrate = 180, .hw_value = ATH10K_HW_RATE_OFDM_18M },
        { .bitrate = 240, .hw_value = ATH10K_HW_RATE_OFDM_24M },
        { .bitrate = 360, .hw_value = ATH10K_HW_RATE_OFDM_36M },
        { .bitrate = 480, .hw_value = ATH10K_HW_RATE_OFDM_48M },
        { .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M },
};

static struct ieee80211_rate ath10k_rates_rev2[] = {
        { .bitrate = 10,
          .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_1M },
        { .bitrate = 20,
          .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_2M,
          .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_2M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },
        { .bitrate = 55,
          .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_5_5M,
          .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_5_5M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },
        { .bitrate = 110,
          .hw_value = ATH10K_HW_RATE_REV2_CCK_LP_11M,
          .hw_value_short = ATH10K_HW_RATE_REV2_CCK_SP_11M,
          .flags = IEEE80211_RATE_SHORT_PREAMBLE },

        { .bitrate = 60, .hw_value = ATH10K_HW_RATE_OFDM_6M },
        { .bitrate = 90, .hw_value = ATH10K_HW_RATE_OFDM_9M },
        { .bitrate = 120, .hw_value = ATH10K_HW_RATE_OFDM_12M },
        { .bitrate = 180, .hw_value = ATH10K_HW_RATE_OFDM_18M },
        { .bitrate = 240, .hw_value = ATH10K_HW_RATE_OFDM_24M },
        { .bitrate = 360, .hw_value = ATH10K_HW_RATE_OFDM_36M },
        { .bitrate = 480, .hw_value = ATH10K_HW_RATE_OFDM_48M },
        { .bitrate = 540, .hw_value = ATH10K_HW_RATE_OFDM_54M },
};

static const struct cfg80211_sar_freq_ranges ath10k_sar_freq_ranges[] = {
        {.start_freq = 2402, .end_freq = 2494 },
        {.start_freq = 5170, .end_freq = 5875 },
};

static const struct cfg80211_sar_capa ath10k_sar_capa = {
        .type = NL80211_SAR_TYPE_POWER,
        .num_freq_ranges = (ARRAY_SIZE(ath10k_sar_freq_ranges)),
        .freq_ranges = &ath10k_sar_freq_ranges[0],
};

#define ATH10K_MAC_FIRST_OFDM_RATE_IDX 4

#define ath10k_a_rates (ath10k_rates + ATH10K_MAC_FIRST_OFDM_RATE_IDX)
#define ath10k_a_rates_size (ARRAY_SIZE(ath10k_rates) - \
                             ATH10K_MAC_FIRST_OFDM_RATE_IDX)
#define ath10k_g_rates (ath10k_rates + 0)
#define ath10k_g_rates_size (ARRAY_SIZE(ath10k_rates))

#define ath10k_g_rates_rev2 (ath10k_rates_rev2 + 0)
#define ath10k_g_rates_rev2_size (ARRAY_SIZE(ath10k_rates_rev2))

#define ath10k_wmi_legacy_rates ath10k_rates

static bool ath10k_mac_bitrate_is_cck(int bitrate)
{
        switch (bitrate) {
        case 10:
        case 20:
        case 55:
        case 110:
                return true;
        }

        return false;
}

static u8 ath10k_mac_bitrate_to_rate(int bitrate)
{
        return DIV_ROUND_UP(bitrate, 5) |
               (ath10k_mac_bitrate_is_cck(bitrate) ? BIT(7) : 0);
}

u8 ath10k_mac_hw_rate_to_idx(const struct ieee80211_supported_band *sband,
                             u8 hw_rate, bool cck)
{
        const struct ieee80211_rate *rate;
        int i;

        for (i = 0; i < sband->n_bitrates; i++) {
                rate = &sband->bitrates[i];

                if (ath10k_mac_bitrate_is_cck(rate->bitrate) != cck)
                        continue;

                if (rate->hw_value == hw_rate)
                        return i;
                else if (rate->flags & IEEE80211_RATE_SHORT_PREAMBLE &&
                         rate->hw_value_short == hw_rate)
                        return i;
        }

        return 0;
}

u8 ath10k_mac_bitrate_to_idx(const struct ieee80211_supported_band *sband,
                             u32 bitrate)
{
        int i;

        for (i = 0; i < sband->n_bitrates; i++)
                if (sband->bitrates[i].bitrate == bitrate)
                        return i;

        return 0;
}

static int ath10k_mac_get_rate_hw_value(int bitrate)
{
        int i;
        u8 hw_value_prefix = 0;

        if (ath10k_mac_bitrate_is_cck(bitrate))
                hw_value_prefix = WMI_RATE_PREAMBLE_CCK << 6;

        for (i = 0; i < ARRAY_SIZE(ath10k_rates); i++) {
                if (ath10k_rates[i].bitrate == bitrate)
                        return hw_value_prefix | ath10k_rates[i].hw_value;
        }

        return -EINVAL;
}

static int ath10k_mac_get_max_vht_mcs_map(u16 mcs_map, int nss)
{
        switch ((mcs_map >> (2 * nss)) & 0x3) {
        case IEEE80211_VHT_MCS_SUPPORT_0_7: return BIT(8) - 1;
        case IEEE80211_VHT_MCS_SUPPORT_0_8: return BIT(9) - 1;
        case IEEE80211_VHT_MCS_SUPPORT_0_9: return BIT(10) - 1;
        }
        return 0;
}

static u32
ath10k_mac_max_ht_nss(const u8 ht_mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
{
        int nss;

        for (nss = IEEE80211_HT_MCS_MASK_LEN - 1; nss >= 0; nss--)
                if (ht_mcs_mask[nss])
                        return nss + 1;

        return 1;
}

static u32
ath10k_mac_max_vht_nss(const u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
{
        int nss;

        for (nss = NL80211_VHT_NSS_MAX - 1; nss >= 0; nss--)
                if (vht_mcs_mask[nss])
                        return nss + 1;

        return 1;
}

int ath10k_mac_ext_resource_config(struct ath10k *ar, u32 val)
{
        enum wmi_host_platform_type platform_type;
        int ret;

        if (test_bit(WMI_SERVICE_TX_MODE_DYNAMIC, ar->wmi.svc_map))
                platform_type = WMI_HOST_PLATFORM_LOW_PERF;
        else
                platform_type = WMI_HOST_PLATFORM_HIGH_PERF;

        ret = ath10k_wmi_ext_resource_config(ar, platform_type, val);

        if (ret && ret != -EOPNOTSUPP) {
                ath10k_warn(ar, "failed to configure ext resource: %d\n", ret);
                return ret;
        }

        return 0;
}

/**********/
/* Crypto */
/**********/

static int ath10k_send_key(struct ath10k_vif *arvif,
                           struct ieee80211_key_conf *key,
                           enum set_key_cmd cmd,
                           const u8 *macaddr, u32 flags)
{
        struct ath10k *ar = arvif->ar;
        struct wmi_vdev_install_key_arg arg = {
                .vdev_id = arvif->vdev_id,
                .key_idx = key->keyidx,
                .key_len = key->keylen,
                .key_data = key->key,
                .key_flags = flags,
                .macaddr = macaddr,
        };

        lockdep_assert_held(&arvif->ar->conf_mutex);

        switch (key->cipher) {
        case WLAN_CIPHER_SUITE_CCMP:
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_CCM];
                key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT;
                break;
        case WLAN_CIPHER_SUITE_TKIP:
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_TKIP];
                arg.key_txmic_len = 8;
                arg.key_rxmic_len = 8;
                break;
        case WLAN_CIPHER_SUITE_WEP40:
        case WLAN_CIPHER_SUITE_WEP104:
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_WEP];
                break;
        case WLAN_CIPHER_SUITE_CCMP_256:
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_CCM];
                break;
        case WLAN_CIPHER_SUITE_GCMP:
        case WLAN_CIPHER_SUITE_GCMP_256:
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_AES_GCM];
                key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT;
                break;
        case WLAN_CIPHER_SUITE_BIP_GMAC_128:
        case WLAN_CIPHER_SUITE_BIP_GMAC_256:
        case WLAN_CIPHER_SUITE_BIP_CMAC_256:
        case WLAN_CIPHER_SUITE_AES_CMAC:
                WARN_ON(1);
                return -EINVAL;
        default:
                ath10k_warn(ar, "cipher %d is not supported\n", key->cipher);
                return -EOPNOTSUPP;
        }

        if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags))
                key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;

        if (cmd == DISABLE_KEY) {
                arg.key_cipher = ar->wmi_key_cipher[WMI_CIPHER_NONE];
                arg.key_data = NULL;
        }

        return ath10k_wmi_vdev_install_key(arvif->ar, &arg);
}

static int ath10k_install_key(struct ath10k_vif *arvif,
                              struct ieee80211_key_conf *key,
                              enum set_key_cmd cmd,
                              const u8 *macaddr, u32 flags)
{
        struct ath10k *ar = arvif->ar;
        int ret;
        unsigned long time_left;

        lockdep_assert_held(&ar->conf_mutex);

        reinit_completion(&ar->install_key_done);

        if (arvif->nohwcrypt)
                return 1;

        ret = ath10k_send_key(arvif, key, cmd, macaddr, flags);
        if (ret)
                return ret;

        time_left = wait_for_completion_timeout(&ar->install_key_done, 3 * HZ);
        if (time_left == 0)
                return -ETIMEDOUT;

        return 0;
}

static int ath10k_install_peer_wep_keys(struct ath10k_vif *arvif,
                                        const u8 *addr)
{
        struct ath10k *ar = arvif->ar;
        struct ath10k_peer *peer;
        int ret;
        int i;
        u32 flags;

        lockdep_assert_held(&ar->conf_mutex);

        if (WARN_ON(arvif->vif->type != NL80211_IFTYPE_AP &&
                    arvif->vif->type != NL80211_IFTYPE_ADHOC &&
                    arvif->vif->type != NL80211_IFTYPE_MESH_POINT))
                return -EINVAL;

        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, arvif->vdev_id, addr);
        spin_unlock_bh(&ar->data_lock);

        if (!peer)
                return -ENOENT;

        for (i = 0; i < ARRAY_SIZE(arvif->wep_keys); i++) {
                if (arvif->wep_keys[i] == NULL)
                        continue;

                switch (arvif->vif->type) {
                case NL80211_IFTYPE_AP:
                        flags = WMI_KEY_PAIRWISE;

                        if (arvif->def_wep_key_idx == i)
                                flags |= WMI_KEY_TX_USAGE;

                        ret = ath10k_install_key(arvif, arvif->wep_keys[i],
                                                 SET_KEY, addr, flags);
                        if (ret < 0)
                                return ret;
                        break;
                case NL80211_IFTYPE_ADHOC:
                        ret = ath10k_install_key(arvif, arvif->wep_keys[i],
                                                 SET_KEY, addr,
                                                 WMI_KEY_PAIRWISE);
                        if (ret < 0)
                                return ret;

                        ret = ath10k_install_key(arvif, arvif->wep_keys[i],
                                                 SET_KEY, addr, WMI_KEY_GROUP);
                        if (ret < 0)
                                return ret;
                        break;
                default:
                        WARN_ON(1);
                        return -EINVAL;
                }

                spin_lock_bh(&ar->data_lock);
                peer->keys[i] = arvif->wep_keys[i];
                spin_unlock_bh(&ar->data_lock);
        }

        /* In some cases (notably with static WEP IBSS with multiple keys)
         * multicast Tx becomes broken. Both pairwise and groupwise keys are
         * installed already. Using WMI_KEY_TX_USAGE in different combinations
         * didn't seem help. Using def_keyid vdev parameter seems to be
         * effective so use that.
         *
         * FIXME: Revisit. Perhaps this can be done in a less hacky way.
         */
        if (arvif->vif->type != NL80211_IFTYPE_ADHOC)
                return 0;

        if (arvif->def_wep_key_idx == -1)
                return 0;

        ret = ath10k_wmi_vdev_set_param(arvif->ar,
                                        arvif->vdev_id,
                                        arvif->ar->wmi.vdev_param->def_keyid,
                                        arvif->def_wep_key_idx);
        if (ret) {
                ath10k_warn(ar, "failed to re-set def wpa key idxon vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_clear_peer_keys(struct ath10k_vif *arvif,
                                  const u8 *addr)
{
        struct ath10k *ar = arvif->ar;
        struct ath10k_peer *peer;
        int first_errno = 0;
        int ret;
        int i;
        u32 flags = 0;

        lockdep_assert_held(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, arvif->vdev_id, addr);
        spin_unlock_bh(&ar->data_lock);

        if (!peer)
                return -ENOENT;

        for (i = 0; i < ARRAY_SIZE(peer->keys); i++) {
                if (peer->keys[i] == NULL)
                        continue;

                /* key flags are not required to delete the key */
                ret = ath10k_install_key(arvif, peer->keys[i],
                                         DISABLE_KEY, addr, flags);
                if (ret < 0 && first_errno == 0)
                        first_errno = ret;

                if (ret < 0)
                        ath10k_warn(ar, "failed to remove peer wep key %d: %d\n",
                                    i, ret);

                spin_lock_bh(&ar->data_lock);
                peer->keys[i] = NULL;
                spin_unlock_bh(&ar->data_lock);
        }

        return first_errno;
}

bool ath10k_mac_is_peer_wep_key_set(struct ath10k *ar, const u8 *addr,
                                    u8 keyidx)
{
        struct ath10k_peer *peer;
        int i;

        lockdep_assert_held(&ar->data_lock);

        /* We don't know which vdev this peer belongs to,
         * since WMI doesn't give us that information.
         *
         * FIXME: multi-bss needs to be handled.
         */
        peer = ath10k_peer_find(ar, 0, addr);
        if (!peer)
                return false;

        for (i = 0; i < ARRAY_SIZE(peer->keys); i++) {
                if (peer->keys[i] && peer->keys[i]->keyidx == keyidx)
                        return true;
        }

        return false;
}

static int ath10k_clear_vdev_key(struct ath10k_vif *arvif,
                                 struct ieee80211_key_conf *key)
{
        struct ath10k *ar = arvif->ar;
        struct ath10k_peer *peer;
        u8 addr[ETH_ALEN];
        int first_errno = 0;
        int ret;
        int i;
        u32 flags = 0;

        lockdep_assert_held(&ar->conf_mutex);

        for (;;) {
                /* since ath10k_install_key we can't hold data_lock all the
                 * time, so we try to remove the keys incrementally
                 */
                spin_lock_bh(&ar->data_lock);
                i = 0;
                list_for_each_entry(peer, &ar->peers, list) {
                        for (i = 0; i < ARRAY_SIZE(peer->keys); i++) {
                                if (peer->keys[i] == key) {
                                        ether_addr_copy(addr, peer->addr);
                                        peer->keys[i] = NULL;
                                        break;
                                }
                        }

                        if (i < ARRAY_SIZE(peer->keys))
                                break;
                }
                spin_unlock_bh(&ar->data_lock);

                if (i == ARRAY_SIZE(peer->keys))
                        break;
                /* key flags are not required to delete the key */
                ret = ath10k_install_key(arvif, key, DISABLE_KEY, addr, flags);
                if (ret < 0 && first_errno == 0)
                        first_errno = ret;

                if (ret)
                        ath10k_warn(ar, "failed to remove key for %pM: %d\n",
                                    addr, ret);
        }

        return first_errno;
}

static int ath10k_mac_vif_update_wep_key(struct ath10k_vif *arvif,
                                         struct ieee80211_key_conf *key)
{
        struct ath10k *ar = arvif->ar;
        struct ath10k_peer *peer;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        list_for_each_entry(peer, &ar->peers, list) {
                if (ether_addr_equal(peer->addr, arvif->vif->addr))
                        continue;

                if (ether_addr_equal(peer->addr, arvif->bssid))
                        continue;

                if (peer->keys[key->keyidx] == key)
                        continue;

                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vif vdev %i update key %i needs update\n",
                           arvif->vdev_id, key->keyidx);

                ret = ath10k_install_peer_wep_keys(arvif, peer->addr);
                if (ret) {
                        ath10k_warn(ar, "failed to update wep keys on vdev %i for peer %pM: %d\n",
                                    arvif->vdev_id, peer->addr, ret);
                        return ret;
                }
        }

        return 0;
}

/*********************/
/* General utilities */
/*********************/

static inline enum wmi_phy_mode
chan_to_phymode(const struct cfg80211_chan_def *chandef)
{
        enum wmi_phy_mode phymode = MODE_UNKNOWN;

        switch (chandef->chan->band) {
        case NL80211_BAND_2GHZ:
                switch (chandef->width) {
                case NL80211_CHAN_WIDTH_20_NOHT:
                        if (chandef->chan->flags & IEEE80211_CHAN_NO_OFDM)
                                phymode = MODE_11B;
                        else
                                phymode = MODE_11G;
                        break;
                case NL80211_CHAN_WIDTH_20:
                        phymode = MODE_11NG_HT20;
                        break;
                case NL80211_CHAN_WIDTH_40:
                        phymode = MODE_11NG_HT40;
                        break;
                default:
                        phymode = MODE_UNKNOWN;
                        break;
                }
                break;
        case NL80211_BAND_5GHZ:
                switch (chandef->width) {
                case NL80211_CHAN_WIDTH_20_NOHT:
                        phymode = MODE_11A;
                        break;
                case NL80211_CHAN_WIDTH_20:
                        phymode = MODE_11NA_HT20;
                        break;
                case NL80211_CHAN_WIDTH_40:
                        phymode = MODE_11NA_HT40;
                        break;
                case NL80211_CHAN_WIDTH_80:
                        phymode = MODE_11AC_VHT80;
                        break;
                case NL80211_CHAN_WIDTH_160:
                        phymode = MODE_11AC_VHT160;
                        break;
                case NL80211_CHAN_WIDTH_80P80:
                        phymode = MODE_11AC_VHT80_80;
                        break;
                default:
                        phymode = MODE_UNKNOWN;
                        break;
                }
                break;
        default:
                break;
        }

        WARN_ON(phymode == MODE_UNKNOWN);
        return phymode;
}

static u8 ath10k_parse_mpdudensity(u8 mpdudensity)
{
/*
 * 802.11n D2.0 defined values for "Minimum MPDU Start Spacing":
 *   0 for no restriction
 *   1 for 1/4 us
 *   2 for 1/2 us
 *   3 for 1 us
 *   4 for 2 us
 *   5 for 4 us
 *   6 for 8 us
 *   7 for 16 us
 */
        switch (mpdudensity) {
        case 0:
                return 0;
        case 1:
        case 2:
        case 3:
        /* Our lower layer calculations limit our precision to
         * 1 microsecond
         */
                return 1;
        case 4:
                return 2;
        case 5:
                return 4;
        case 6:
                return 8;
        case 7:
                return 16;
        default:
                return 0;
        }
}

int ath10k_mac_vif_chan(struct ieee80211_vif *vif,
                        struct cfg80211_chan_def *def)
{
        struct ieee80211_chanctx_conf *conf;

        rcu_read_lock();
        conf = rcu_dereference(vif->bss_conf.chanctx_conf);
        if (!conf) {
                rcu_read_unlock();
                return -ENOENT;
        }

        *def = conf->def;
        rcu_read_unlock();

        return 0;
}

static void ath10k_mac_num_chanctxs_iter(struct ieee80211_hw *hw,
                                         struct ieee80211_chanctx_conf *conf,
                                         void *data)
{
        int *num = data;

        (*num)++;
}

static int ath10k_mac_num_chanctxs(struct ath10k *ar)
{
        int num = 0;

        ieee80211_iter_chan_contexts_atomic(ar->hw,
                                            ath10k_mac_num_chanctxs_iter,
                                            &num);

        return num;
}

static void
ath10k_mac_get_any_chandef_iter(struct ieee80211_hw *hw,
                                struct ieee80211_chanctx_conf *conf,
                                void *data)
{
        struct cfg80211_chan_def **def = data;

        *def = &conf->def;
}

static void ath10k_wait_for_peer_delete_done(struct ath10k *ar, u32 vdev_id,
                                             const u8 *addr)
{
        unsigned long time_left;
        int ret;

        if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) {
                ret = ath10k_wait_for_peer_deleted(ar, vdev_id, addr);
                if (ret) {
                        ath10k_warn(ar, "failed wait for peer deleted");
                        return;
                }

                time_left = wait_for_completion_timeout(&ar->peer_delete_done,
                                                        5 * HZ);
                if (!time_left)
                        ath10k_warn(ar, "Timeout in receiving peer delete response\n");
        }
}

static int ath10k_peer_create(struct ath10k *ar,
                              struct ieee80211_vif *vif,
                              struct ieee80211_sta *sta,
                              u32 vdev_id,
                              const u8 *addr,
                              enum wmi_peer_type peer_type)
{
        struct ath10k_peer *peer;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        /* Each vdev consumes a peer entry as well. */
        if (ar->num_peers + list_count_nodes(&ar->arvifs) >= ar->max_num_peers)
                return -ENOBUFS;

        ret = ath10k_wmi_peer_create(ar, vdev_id, addr, peer_type);
        if (ret) {
                ath10k_warn(ar, "failed to create wmi peer %pM on vdev %i: %i\n",
                            addr, vdev_id, ret);
                return ret;
        }

        ret = ath10k_wait_for_peer_created(ar, vdev_id, addr);
        if (ret) {
                ath10k_warn(ar, "failed to wait for created wmi peer %pM on vdev %i: %i\n",
                            addr, vdev_id, ret);
                return ret;
        }

        spin_lock_bh(&ar->data_lock);

        peer = ath10k_peer_find(ar, vdev_id, addr);
        if (!peer) {
                spin_unlock_bh(&ar->data_lock);
                ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n",
                            addr, vdev_id);
                ath10k_wait_for_peer_delete_done(ar, vdev_id, addr);
                return -ENOENT;
        }

        peer->vif = vif;
        peer->sta = sta;

        spin_unlock_bh(&ar->data_lock);

        ar->num_peers++;

        return 0;
}

static int ath10k_mac_set_kickout(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        u32 param;
        int ret;

        param = ar->wmi.pdev_param->sta_kickout_th;
        ret = ath10k_wmi_pdev_set_param(ar, param,
                                        ATH10K_KICKOUT_THRESHOLD);
        if (ret) {
                ath10k_warn(ar, "failed to set kickout threshold on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        param = ar->wmi.vdev_param->ap_keepalive_min_idle_inactive_time_secs;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
                                        ATH10K_KEEPALIVE_MIN_IDLE);
        if (ret) {
                ath10k_warn(ar, "failed to set keepalive minimum idle time on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        param = ar->wmi.vdev_param->ap_keepalive_max_idle_inactive_time_secs;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
                                        ATH10K_KEEPALIVE_MAX_IDLE);
        if (ret) {
                ath10k_warn(ar, "failed to set keepalive maximum idle time on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        param = ar->wmi.vdev_param->ap_keepalive_max_unresponsive_time_secs;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param,
                                        ATH10K_KEEPALIVE_MAX_UNRESPONSIVE);
        if (ret) {
                ath10k_warn(ar, "failed to set keepalive maximum unresponsive time on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_set_rts(struct ath10k_vif *arvif, u32 value)
{
        struct ath10k *ar = arvif->ar;
        u32 vdev_param;

        vdev_param = ar->wmi.vdev_param->rts_threshold;
        return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, value);
}

static int ath10k_peer_delete(struct ath10k *ar, u32 vdev_id, const u8 *addr)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_wmi_peer_delete(ar, vdev_id, addr);
        if (ret)
                return ret;

        ret = ath10k_wait_for_peer_deleted(ar, vdev_id, addr);
        if (ret)
                return ret;

        if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) {
                unsigned long time_left;

                time_left = wait_for_completion_timeout
                            (&ar->peer_delete_done, 5 * HZ);

                if (!time_left) {
                        ath10k_warn(ar, "Timeout in receiving peer delete response\n");
                        return -ETIMEDOUT;
                }
        }

        ar->num_peers--;

        return 0;
}

static void ath10k_peer_map_cleanup(struct ath10k *ar, struct ath10k_peer *peer)
{
        int peer_id, i;

        lockdep_assert_held(&ar->conf_mutex);

        for_each_set_bit(peer_id, peer->peer_ids,
                         ATH10K_MAX_NUM_PEER_IDS) {
                ar->peer_map[peer_id] = NULL;
        }

        /* Double check that peer is properly un-referenced from
         * the peer_map
         */
        for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
                if (ar->peer_map[i] == peer) {
                        ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n",
                                    peer->addr, peer, i);
                        ar->peer_map[i] = NULL;
                }
        }

        list_del(&peer->list);
        kfree(peer);
        ar->num_peers--;
}

static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id)
{
        struct ath10k_peer *peer, *tmp;

        lockdep_assert_held(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        list_for_each_entry_safe(peer, tmp, &ar->peers, list) {
                if (peer->vdev_id != vdev_id)
                        continue;

                ath10k_warn(ar, "removing stale peer %pM from vdev_id %d\n",
                            peer->addr, vdev_id);

                ath10k_peer_map_cleanup(ar, peer);
        }
        spin_unlock_bh(&ar->data_lock);
}

static void ath10k_peer_cleanup_all(struct ath10k *ar)
{
        struct ath10k_peer *peer, *tmp;
        int i;

        lockdep_assert_held(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        list_for_each_entry_safe(peer, tmp, &ar->peers, list) {
                list_del(&peer->list);
                kfree(peer);
        }

        for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++)
                ar->peer_map[i] = NULL;

        spin_unlock_bh(&ar->data_lock);

        ar->num_peers = 0;
        ar->num_stations = 0;
}

static int ath10k_mac_tdls_peer_update(struct ath10k *ar, u32 vdev_id,
                                       struct ieee80211_sta *sta,
                                       enum wmi_tdls_peer_state state)
{
        int ret;
        struct wmi_tdls_peer_update_cmd_arg arg = {};
        struct wmi_tdls_peer_capab_arg cap = {};
        struct wmi_channel_arg chan_arg = {};

        lockdep_assert_held(&ar->conf_mutex);

        arg.vdev_id = vdev_id;
        arg.peer_state = state;
        ether_addr_copy(arg.addr, sta->addr);

        cap.peer_max_sp = sta->max_sp;
        cap.peer_uapsd_queues = sta->uapsd_queues;

        if (state == WMI_TDLS_PEER_STATE_CONNECTED &&
            !sta->tdls_initiator)
                cap.is_peer_responder = 1;

        ret = ath10k_wmi_tdls_peer_update(ar, &arg, &cap, &chan_arg);
        if (ret) {
                ath10k_warn(ar, "failed to update tdls peer %pM on vdev %i: %i\n",
                            arg.addr, vdev_id, ret);
                return ret;
        }

        return 0;
}

/************************/
/* Interface management */
/************************/

void ath10k_mac_vif_beacon_free(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->data_lock);

        if (!arvif->beacon)
                return;

        if (!arvif->beacon_buf)
                dma_unmap_single(ar->dev, ATH10K_SKB_CB(arvif->beacon)->paddr,
                                 arvif->beacon->len, DMA_TO_DEVICE);

        if (WARN_ON(arvif->beacon_state != ATH10K_BEACON_SCHEDULED &&
                    arvif->beacon_state != ATH10K_BEACON_SENT))
                return;

        dev_kfree_skb_any(arvif->beacon);

        arvif->beacon = NULL;
        arvif->beacon_state = ATH10K_BEACON_SCHEDULED;
}

static void ath10k_mac_vif_beacon_cleanup(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->data_lock);

        ath10k_mac_vif_beacon_free(arvif);

        if (arvif->beacon_buf) {
                if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
                        kfree(arvif->beacon_buf);
                else
                        dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
                                          arvif->beacon_buf,
                                          arvif->beacon_paddr);
                arvif->beacon_buf = NULL;
        }
}

static inline int ath10k_vdev_setup_sync(struct ath10k *ar)
{
        unsigned long time_left;

        lockdep_assert_held(&ar->conf_mutex);

        if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
                return -ESHUTDOWN;

        time_left = wait_for_completion_timeout(&ar->vdev_setup_done,
                                                ATH10K_VDEV_SETUP_TIMEOUT_HZ);
        if (time_left == 0)
                return -ETIMEDOUT;

        return ar->last_wmi_vdev_start_status;
}

static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id)
{
        struct cfg80211_chan_def *chandef = NULL;
        struct ieee80211_channel *channel = NULL;
        struct wmi_vdev_start_request_arg arg = {};
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        ieee80211_iter_chan_contexts_atomic(ar->hw,
                                            ath10k_mac_get_any_chandef_iter,
                                            &chandef);
        if (WARN_ON_ONCE(!chandef))
                return -ENOENT;

        channel = chandef->chan;

        arg.vdev_id = vdev_id;
        arg.channel.freq = channel->center_freq;
        arg.channel.band_center_freq1 = chandef->center_freq1;
        arg.channel.band_center_freq2 = chandef->center_freq2;

        /* TODO setup this dynamically, what in case we
         * don't have any vifs?
         */
        arg.channel.mode = chan_to_phymode(chandef);
        arg.channel.chan_radar =
                        !!(channel->flags & IEEE80211_CHAN_RADAR);

        arg.channel.min_power = 0;
        arg.channel.max_power = channel->max_power * 2;
        arg.channel.max_reg_power = channel->max_reg_power * 2;
        arg.channel.max_antenna_gain = channel->max_antenna_gain;

        reinit_completion(&ar->vdev_setup_done);
        reinit_completion(&ar->vdev_delete_done);

        ret = ath10k_wmi_vdev_start(ar, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to request monitor vdev %i start: %d\n",
                            vdev_id, ret);
                return ret;
        }

        ret = ath10k_vdev_setup_sync(ar);
        if (ret) {
                ath10k_warn(ar, "failed to synchronize setup for monitor vdev %i start: %d\n",
                            vdev_id, ret);
                return ret;
        }

        ret = ath10k_wmi_vdev_up(ar, vdev_id, 0, ar->mac_addr);
        if (ret) {
                ath10k_warn(ar, "failed to put up monitor vdev %i: %d\n",
                            vdev_id, ret);
                goto vdev_stop;
        }

        ar->monitor_vdev_id = vdev_id;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %i started\n",
                   ar->monitor_vdev_id);
        return 0;

vdev_stop:
        ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
        if (ret)
                ath10k_warn(ar, "failed to stop monitor vdev %i after start failure: %d\n",
                            ar->monitor_vdev_id, ret);

        return ret;
}

static int ath10k_monitor_vdev_stop(struct ath10k *ar)
{
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_wmi_vdev_down(ar, ar->monitor_vdev_id);
        if (ret)
                ath10k_warn(ar, "failed to put down monitor vdev %i: %d\n",
                            ar->monitor_vdev_id, ret);

        reinit_completion(&ar->vdev_setup_done);
        reinit_completion(&ar->vdev_delete_done);

        ret = ath10k_wmi_vdev_stop(ar, ar->monitor_vdev_id);
        if (ret)
                ath10k_warn(ar, "failed to request monitor vdev %i stop: %d\n",
                            ar->monitor_vdev_id, ret);

        ret = ath10k_vdev_setup_sync(ar);
        if (ret)
                ath10k_warn(ar, "failed to synchronize monitor vdev %i stop: %d\n",
                            ar->monitor_vdev_id, ret);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %i stopped\n",
                   ar->monitor_vdev_id);
        return ret;
}

static int ath10k_monitor_vdev_create(struct ath10k *ar)
{
        int bit, ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        if (ar->free_vdev_map == 0) {
                ath10k_warn(ar, "failed to find free vdev id for monitor vdev\n");
                return -ENOMEM;
        }

        bit = __ffs64(ar->free_vdev_map);

        ar->monitor_vdev_id = bit;

        ret = ath10k_wmi_vdev_create(ar, ar->monitor_vdev_id,
                                     WMI_VDEV_TYPE_MONITOR,
                                     0, ar->mac_addr);
        if (ret) {
                ath10k_warn(ar, "failed to request monitor vdev %i creation: %d\n",
                            ar->monitor_vdev_id, ret);
                return ret;
        }

        ar->free_vdev_map &= ~(1LL << ar->monitor_vdev_id);
        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %d created\n",
                   ar->monitor_vdev_id);

        return 0;
}

static int ath10k_monitor_vdev_delete(struct ath10k *ar)
{
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_wmi_vdev_delete(ar, ar->monitor_vdev_id);
        if (ret) {
                ath10k_warn(ar, "failed to request wmi monitor vdev %i removal: %d\n",
                            ar->monitor_vdev_id, ret);
                return ret;
        }

        ar->free_vdev_map |= 1LL << ar->monitor_vdev_id;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor vdev %d deleted\n",
                   ar->monitor_vdev_id);
        return ret;
}

static int ath10k_monitor_start(struct ath10k *ar)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_monitor_vdev_create(ar);
        if (ret) {
                ath10k_warn(ar, "failed to create monitor vdev: %d\n", ret);
                return ret;
        }

        ret = ath10k_monitor_vdev_start(ar, ar->monitor_vdev_id);
        if (ret) {
                ath10k_warn(ar, "failed to start monitor vdev: %d\n", ret);
                ath10k_monitor_vdev_delete(ar);
                return ret;
        }

        ar->monitor_started = true;
        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor started\n");

        return 0;
}

static int ath10k_monitor_stop(struct ath10k *ar)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_monitor_vdev_stop(ar);
        if (ret) {
                ath10k_warn(ar, "failed to stop monitor vdev: %d\n", ret);
                return ret;
        }

        ret = ath10k_monitor_vdev_delete(ar);
        if (ret) {
                ath10k_warn(ar, "failed to delete monitor vdev: %d\n", ret);
                return ret;
        }

        ar->monitor_started = false;
        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor stopped\n");

        return 0;
}

static bool ath10k_mac_monitor_vdev_is_needed(struct ath10k *ar)
{
        int num_ctx;

        /* At least one chanctx is required to derive a channel to start
         * monitor vdev on.
         */
        num_ctx = ath10k_mac_num_chanctxs(ar);
        if (num_ctx == 0)
                return false;

        /* If there's already an existing special monitor interface then don't
         * bother creating another monitor vdev.
         */
        if (ar->monitor_arvif)
                return false;

        return ar->monitor ||
               (!test_bit(ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST,
                          ar->running_fw->fw_file.fw_features) &&
                (ar->filter_flags & (FIF_OTHER_BSS | FIF_MCAST_ACTION))) ||
               test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
}

static bool ath10k_mac_monitor_vdev_is_allowed(struct ath10k *ar)
{
        int num_ctx;

        num_ctx = ath10k_mac_num_chanctxs(ar);

        /* FIXME: Current interface combinations and cfg80211/mac80211 code
         * shouldn't allow this but make sure to prevent handling the following
         * case anyway since multi-channel DFS hasn't been tested at all.
         */
        if (test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags) && num_ctx > 1)
                return false;

        return true;
}

static int ath10k_monitor_recalc(struct ath10k *ar)
{
        bool needed;
        bool allowed;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        needed = ath10k_mac_monitor_vdev_is_needed(ar);
        allowed = ath10k_mac_monitor_vdev_is_allowed(ar);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac monitor recalc started? %d needed? %d allowed? %d\n",
                   ar->monitor_started, needed, allowed);

        if (WARN_ON(needed && !allowed)) {
                if (ar->monitor_started) {
                        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac monitor stopping disallowed monitor\n");

                        ret = ath10k_monitor_stop(ar);
                        if (ret)
                                ath10k_warn(ar, "failed to stop disallowed monitor: %d\n",
                                            ret);
                                /* not serious */
                }

                return -EPERM;
        }

        if (needed == ar->monitor_started)
                return 0;

        if (needed)
                return ath10k_monitor_start(ar);
        else
                return ath10k_monitor_stop(ar);
}

static bool ath10k_mac_can_set_cts_prot(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->conf_mutex);

        if (!arvif->is_started) {
                ath10k_dbg(ar, ATH10K_DBG_MAC, "defer cts setup, vdev is not ready yet\n");
                return false;
        }

        return true;
}

static int ath10k_mac_set_cts_prot(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        u32 vdev_param;

        lockdep_assert_held(&ar->conf_mutex);

        vdev_param = ar->wmi.vdev_param->protection_mode;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_protection %d\n",
                   arvif->vdev_id, arvif->use_cts_prot);

        return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                         arvif->use_cts_prot ? 1 : 0);
}

static int ath10k_recalc_rtscts_prot(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        u32 vdev_param, rts_cts = 0;

        lockdep_assert_held(&ar->conf_mutex);

        vdev_param = ar->wmi.vdev_param->enable_rtscts;

        rts_cts |= SM(WMI_RTSCTS_ENABLED, WMI_RTSCTS_SET);

        if (arvif->num_legacy_stations > 0)
                rts_cts |= SM(WMI_RTSCTS_ACROSS_SW_RETRIES,
                              WMI_RTSCTS_PROFILE);
        else
                rts_cts |= SM(WMI_RTSCTS_FOR_SECOND_RATESERIES,
                              WMI_RTSCTS_PROFILE);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d recalc rts/cts prot %d\n",
                   arvif->vdev_id, rts_cts);

        return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                         rts_cts);
}

static int ath10k_start_cac(struct ath10k *ar)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        set_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);

        ret = ath10k_monitor_recalc(ar);
        if (ret) {
                ath10k_warn(ar, "failed to start monitor (cac): %d\n", ret);
                clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
                return ret;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac cac start monitor vdev %d\n",
                   ar->monitor_vdev_id);

        return 0;
}

static int ath10k_stop_cac(struct ath10k *ar)
{
        lockdep_assert_held(&ar->conf_mutex);

        /* CAC is not running - do nothing */
        if (!test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags))
                return 0;

        clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
        ath10k_monitor_stop(ar);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac cac finished\n");

        return 0;
}

static void ath10k_mac_has_radar_iter(struct ieee80211_hw *hw,
                                      struct ieee80211_chanctx_conf *conf,
                                      void *data)
{
        bool *ret = data;

        if (!*ret && conf->radar_enabled)
                *ret = true;
}

static bool ath10k_mac_has_radar_enabled(struct ath10k *ar)
{
        bool has_radar = false;

        ieee80211_iter_chan_contexts_atomic(ar->hw,
                                            ath10k_mac_has_radar_iter,
                                            &has_radar);

        return has_radar;
}

static void ath10k_recalc_radar_detection(struct ath10k *ar)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ath10k_stop_cac(ar);

        if (!ath10k_mac_has_radar_enabled(ar))
                return;

        if (ar->num_started_vdevs > 0)
                return;

        ret = ath10k_start_cac(ar);
        if (ret) {
                /*
                 * Not possible to start CAC on current channel so starting
                 * radiation is not allowed, make this channel DFS_UNAVAILABLE
                 * by indicating that radar was detected.
                 */
                ath10k_warn(ar, "failed to start CAC: %d\n", ret);
                ieee80211_radar_detected(ar->hw);
        }
}

static int ath10k_vdev_stop(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        reinit_completion(&ar->vdev_setup_done);
        reinit_completion(&ar->vdev_delete_done);

        ret = ath10k_wmi_vdev_stop(ar, arvif->vdev_id);
        if (ret) {
                ath10k_warn(ar, "failed to stop WMI vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        ret = ath10k_vdev_setup_sync(ar);
        if (ret) {
                ath10k_warn(ar, "failed to synchronize setup for vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        WARN_ON(ar->num_started_vdevs == 0);

        if (ar->num_started_vdevs != 0) {
                ar->num_started_vdevs--;
                ath10k_recalc_radar_detection(ar);
        }

        return ret;
}

static int ath10k_vdev_start_restart(struct ath10k_vif *arvif,
                                     const struct cfg80211_chan_def *chandef,
                                     bool restart)
{
        struct ath10k *ar = arvif->ar;
        struct wmi_vdev_start_request_arg arg = {};
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        reinit_completion(&ar->vdev_setup_done);
        reinit_completion(&ar->vdev_delete_done);

        arg.vdev_id = arvif->vdev_id;
        arg.dtim_period = arvif->dtim_period;
        arg.bcn_intval = arvif->beacon_interval;

        arg.channel.freq = chandef->chan->center_freq;
        arg.channel.band_center_freq1 = chandef->center_freq1;
        arg.channel.band_center_freq2 = chandef->center_freq2;
        arg.channel.mode = chan_to_phymode(chandef);

        arg.channel.min_power = 0;
        arg.channel.max_power = chandef->chan->max_power * 2;
        arg.channel.max_reg_power = chandef->chan->max_reg_power * 2;
        arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain;

        if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
                arg.ssid = arvif->u.ap.ssid;
                arg.ssid_len = arvif->u.ap.ssid_len;
                arg.hidden_ssid = arvif->u.ap.hidden_ssid;

                /* For now allow DFS for AP mode */
                arg.channel.chan_radar =
                        !!(chandef->chan->flags & IEEE80211_CHAN_RADAR);
        } else if (arvif->vdev_type == WMI_VDEV_TYPE_IBSS) {
                arg.ssid = arvif->vif->cfg.ssid;
                arg.ssid_len = arvif->vif->cfg.ssid_len;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac vdev %d start center_freq %d phymode %s\n",
                   arg.vdev_id, arg.channel.freq,
                   ath10k_wmi_phymode_str(arg.channel.mode));

        if (restart)
                ret = ath10k_wmi_vdev_restart(ar, &arg);
        else
                ret = ath10k_wmi_vdev_start(ar, &arg);

        if (ret) {
                ath10k_warn(ar, "failed to start WMI vdev %i: %d\n",
                            arg.vdev_id, ret);
                return ret;
        }

        ret = ath10k_vdev_setup_sync(ar);
        if (ret) {
                ath10k_warn(ar,
                            "failed to synchronize setup for vdev %i restart %d: %d\n",
                            arg.vdev_id, restart, ret);
                return ret;
        }

        ar->num_started_vdevs++;
        ath10k_recalc_radar_detection(ar);

        return ret;
}

static int ath10k_vdev_start(struct ath10k_vif *arvif,
                             const struct cfg80211_chan_def *def)
{
        return ath10k_vdev_start_restart(arvif, def, false);
}

static int ath10k_vdev_restart(struct ath10k_vif *arvif,
                               const struct cfg80211_chan_def *def)
{
        return ath10k_vdev_start_restart(arvif, def, true);
}

static int ath10k_mac_setup_bcn_p2p_ie(struct ath10k_vif *arvif,
                                       struct sk_buff *bcn)
{
        struct ath10k *ar = arvif->ar;
        struct ieee80211_mgmt *mgmt;
        const u8 *p2p_ie;
        int ret;

        if (arvif->vif->type != NL80211_IFTYPE_AP || !arvif->vif->p2p)
                return 0;

        mgmt = (void *)bcn->data;
        p2p_ie = cfg80211_find_vendor_ie(WLAN_OUI_WFA, WLAN_OUI_TYPE_WFA_P2P,
                                         mgmt->u.beacon.variable,
                                         bcn->len - (mgmt->u.beacon.variable -
                                                     bcn->data));
        if (!p2p_ie)
                return -ENOENT;

        ret = ath10k_wmi_p2p_go_bcn_ie(ar, arvif->vdev_id, p2p_ie);
        if (ret) {
                ath10k_warn(ar, "failed to submit p2p go bcn ie for vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_remove_vendor_ie(struct sk_buff *skb, unsigned int oui,
                                       u8 oui_type, size_t ie_offset)
{
        size_t len;
        const u8 *next;
        const u8 *end;
        u8 *ie;

        if (WARN_ON(skb->len < ie_offset))
                return -EINVAL;

        ie = (u8 *)cfg80211_find_vendor_ie(oui, oui_type,
                                           skb->data + ie_offset,
                                           skb->len - ie_offset);
        if (!ie)
                return -ENOENT;

        len = ie[1] + 2;
        end = skb->data + skb->len;
        next = ie + len;

        if (WARN_ON(next > end))
                return -EINVAL;

        memmove(ie, next, end - next);
        skb_trim(skb, skb->len - len);

        return 0;
}

static int ath10k_mac_setup_bcn_tmpl(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct ieee80211_hw *hw = ar->hw;
        struct ieee80211_vif *vif = arvif->vif;
        struct ieee80211_mutable_offsets offs = {};
        struct sk_buff *bcn;
        int ret;

        if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map))
                return 0;

        if (arvif->vdev_type != WMI_VDEV_TYPE_AP &&
            arvif->vdev_type != WMI_VDEV_TYPE_IBSS)
                return 0;

        bcn = ieee80211_beacon_get_template(hw, vif, &offs, 0);
        if (!bcn) {
                ath10k_warn(ar, "failed to get beacon template from mac80211\n");
                return -EPERM;
        }

        ret = ath10k_mac_setup_bcn_p2p_ie(arvif, bcn);
        if (ret) {
                ath10k_warn(ar, "failed to setup p2p go bcn ie: %d\n", ret);
                kfree_skb(bcn);
                return ret;
        }

        /* P2P IE is inserted by firmware automatically (as configured above)
         * so remove it from the base beacon template to avoid duplicate P2P
         * IEs in beacon frames.
         */
        ath10k_mac_remove_vendor_ie(bcn, WLAN_OUI_WFA, WLAN_OUI_TYPE_WFA_P2P,
                                    offsetof(struct ieee80211_mgmt,
                                             u.beacon.variable));

        ret = ath10k_wmi_bcn_tmpl(ar, arvif->vdev_id, offs.tim_offset, bcn, 0,
                                  0, NULL, 0);
        kfree_skb(bcn);

        if (ret) {
                ath10k_warn(ar, "failed to submit beacon template command: %d\n",
                            ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_setup_prb_tmpl(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct ieee80211_hw *hw = ar->hw;
        struct ieee80211_vif *vif = arvif->vif;
        struct sk_buff *prb;
        int ret;

        if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map))
                return 0;

        if (arvif->vdev_type != WMI_VDEV_TYPE_AP)
                return 0;

         /* For mesh, probe response and beacon share the same template */
        if (ieee80211_vif_is_mesh(vif))
                return 0;

        prb = ieee80211_proberesp_get(hw, vif);
        if (!prb) {
                ath10k_warn(ar, "failed to get probe resp template from mac80211\n");
                return -EPERM;
        }

        ret = ath10k_wmi_prb_tmpl(ar, arvif->vdev_id, prb);
        kfree_skb(prb);

        if (ret) {
                ath10k_warn(ar, "failed to submit probe resp template command: %d\n",
                            ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_vif_fix_hidden_ssid(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct cfg80211_chan_def def;
        int ret;

        /* When originally vdev is started during assign_vif_chanctx() some
         * information is missing, notably SSID. Firmware revisions with beacon
         * offloading require the SSID to be provided during vdev (re)start to
         * handle hidden SSID properly.
         *
         * Vdev restart must be done after vdev has been both started and
         * upped. Otherwise some firmware revisions (at least 10.2) fail to
         * deliver vdev restart response event causing timeouts during vdev
         * syncing in ath10k.
         *
         * Note: The vdev down/up and template reinstallation could be skipped
         * since only wmi-tlv firmware are known to have beacon offload and
         * wmi-tlv doesn't seem to misbehave like 10.2 wrt vdev restart
         * response delivery. It's probably more robust to keep it as is.
         */
        if (!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map))
                return 0;

        if (WARN_ON(!arvif->is_started))
                return -EINVAL;

        if (WARN_ON(!arvif->is_up))
                return -EINVAL;

        if (WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def)))
                return -EINVAL;

        ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
        if (ret) {
                ath10k_warn(ar, "failed to bring down ap vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        /* Vdev down reset beacon & presp templates. Reinstall them. Otherwise
         * firmware will crash upon vdev up.
         */

        ret = ath10k_mac_setup_bcn_tmpl(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to update beacon template: %d\n", ret);
                return ret;
        }

        ret = ath10k_mac_setup_prb_tmpl(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to update presp template: %d\n", ret);
                return ret;
        }

        ret = ath10k_vdev_restart(arvif, &def);
        if (ret) {
                ath10k_warn(ar, "failed to restart ap vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
                                 arvif->bssid);
        if (ret) {
                ath10k_warn(ar, "failed to bring up ap vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static void ath10k_control_beaconing(struct ath10k_vif *arvif,
                                     struct ieee80211_bss_conf *info)
{
        struct ath10k *ar = arvif->ar;
        int ret = 0;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (!info->enable_beacon) {
                ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
                if (ret)
                        ath10k_warn(ar, "failed to down vdev_id %i: %d\n",
                                    arvif->vdev_id, ret);

                arvif->is_up = false;

                spin_lock_bh(&arvif->ar->data_lock);
                ath10k_mac_vif_beacon_free(arvif);
                spin_unlock_bh(&arvif->ar->data_lock);

                return;
        }

        arvif->tx_seq_no = 0x1000;

        arvif->aid = 0;
        ether_addr_copy(arvif->bssid, info->bssid);

        ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
                                 arvif->bssid);
        if (ret) {
                ath10k_warn(ar, "failed to bring up vdev %d: %i\n",
                            arvif->vdev_id, ret);
                return;
        }

        arvif->is_up = true;

        ret = ath10k_mac_vif_fix_hidden_ssid(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to fix hidden ssid for vdev %i, expect trouble: %d\n",
                            arvif->vdev_id, ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d up\n", arvif->vdev_id);
}

static void ath10k_control_ibss(struct ath10k_vif *arvif,
                                struct ieee80211_vif *vif)
{
        struct ath10k *ar = arvif->ar;
        u32 vdev_param;
        int ret = 0;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (!vif->cfg.ibss_joined) {
                if (is_zero_ether_addr(arvif->bssid))
                        return;

                eth_zero_addr(arvif->bssid);

                return;
        }

        vdev_param = arvif->ar->wmi.vdev_param->atim_window;
        ret = ath10k_wmi_vdev_set_param(arvif->ar, arvif->vdev_id, vdev_param,
                                        ATH10K_DEFAULT_ATIM);
        if (ret)
                ath10k_warn(ar, "failed to set IBSS ATIM for vdev %d: %d\n",
                            arvif->vdev_id, ret);
}

static int ath10k_mac_vif_recalc_ps_wake_threshold(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        u32 param;
        u32 value;
        int ret;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (arvif->u.sta.uapsd)
                value = WMI_STA_PS_TX_WAKE_THRESHOLD_NEVER;
        else
                value = WMI_STA_PS_TX_WAKE_THRESHOLD_ALWAYS;

        param = WMI_STA_PS_PARAM_TX_WAKE_THRESHOLD;
        ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, value);
        if (ret) {
                ath10k_warn(ar, "failed to submit ps wake threshold %u on vdev %i: %d\n",
                            value, arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_vif_recalc_ps_poll_count(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        u32 param;
        u32 value;
        int ret;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (arvif->u.sta.uapsd)
                value = WMI_STA_PS_PSPOLL_COUNT_UAPSD;
        else
                value = WMI_STA_PS_PSPOLL_COUNT_NO_MAX;

        param = WMI_STA_PS_PARAM_PSPOLL_COUNT;
        ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
                                          param, value);
        if (ret) {
                ath10k_warn(ar, "failed to submit ps poll count %u on vdev %i: %d\n",
                            value, arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_num_vifs_started(struct ath10k *ar)
{
        struct ath10k_vif *arvif;
        int num = 0;

        lockdep_assert_held(&ar->conf_mutex);

        list_for_each_entry(arvif, &ar->arvifs, list)
                if (arvif->is_started)
                        num++;

        return num;
}

static int ath10k_mac_vif_setup_ps(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct ieee80211_vif *vif = arvif->vif;
        struct ieee80211_conf *conf = &ar->hw->conf;
        enum wmi_sta_powersave_param param;
        enum wmi_sta_ps_mode psmode;
        int ret;
        int ps_timeout;
        bool enable_ps;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (arvif->vif->type != NL80211_IFTYPE_STATION)
                return 0;

        enable_ps = arvif->ps;

        if (enable_ps && ath10k_mac_num_vifs_started(ar) > 1 &&
            !test_bit(ATH10K_FW_FEATURE_MULTI_VIF_PS_SUPPORT,
                      ar->running_fw->fw_file.fw_features)) {
                ath10k_warn(ar, "refusing to enable ps on vdev %i: not supported by fw\n",
                            arvif->vdev_id);
                enable_ps = false;
        }

        if (!arvif->is_started) {
                /* mac80211 can update vif powersave state while disconnected.
                 * Firmware doesn't behave nicely and consumes more power than
                 * necessary if PS is disabled on a non-started vdev. Hence
                 * force-enable PS for non-running vdevs.
                 */
                psmode = WMI_STA_PS_MODE_ENABLED;
        } else if (enable_ps) {
                psmode = WMI_STA_PS_MODE_ENABLED;
                param = WMI_STA_PS_PARAM_INACTIVITY_TIME;

                ps_timeout = conf->dynamic_ps_timeout;
                if (ps_timeout == 0) {
                        /* Firmware doesn't like 0 */
                        ps_timeout = ieee80211_tu_to_usec(
                                vif->bss_conf.beacon_int) / 1000;
                }

                ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param,
                                                  ps_timeout);
                if (ret) {
                        ath10k_warn(ar, "failed to set inactivity time for vdev %d: %i\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }
        } else {
                psmode = WMI_STA_PS_MODE_DISABLED;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d psmode %s\n",
                   arvif->vdev_id, psmode ? "enable" : "disable");

        ret = ath10k_wmi_set_psmode(ar, arvif->vdev_id, psmode);
        if (ret) {
                ath10k_warn(ar, "failed to set PS Mode %d for vdev %d: %d\n",
                            psmode, arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_vif_disable_keepalive(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct wmi_sta_keepalive_arg arg = {};
        int ret;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (arvif->vdev_type != WMI_VDEV_TYPE_STA)
                return 0;

        if (!test_bit(WMI_SERVICE_STA_KEEP_ALIVE, ar->wmi.svc_map))
                return 0;

        /* Some firmware revisions have a bug and ignore the `enabled` field.
         * Instead use the interval to disable the keepalive.
         */
        arg.vdev_id = arvif->vdev_id;
        arg.enabled = 1;
        arg.method = WMI_STA_KEEPALIVE_METHOD_NULL_FRAME;
        arg.interval = WMI_STA_KEEPALIVE_INTERVAL_DISABLE;

        ret = ath10k_wmi_sta_keepalive(ar, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to submit keepalive on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return 0;
}

static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif)
{
        struct ath10k *ar = arvif->ar;
        struct ieee80211_vif *vif = arvif->vif;
        int ret;

        lockdep_assert_held(&arvif->ar->conf_mutex);

        if (WARN_ON(!test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)))
                return;

        if (arvif->vdev_type != WMI_VDEV_TYPE_AP)
                return;

        if (!vif->bss_conf.csa_active)
                return;

        if (!arvif->is_up)
                return;

        if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) {
                ieee80211_beacon_update_cntdwn(vif, 0);

                ret = ath10k_mac_setup_bcn_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to update bcn tmpl during csa: %d\n",
                                    ret);

                ret = ath10k_mac_setup_prb_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n",
                                    ret);
        } else {
                ieee80211_csa_finish(vif, 0);
        }
}

static void ath10k_mac_vif_ap_csa_work(struct work_struct *work)
{
        struct ath10k_vif *arvif = container_of(work, struct ath10k_vif,
                                                ap_csa_work);
        struct ath10k *ar = arvif->ar;

        mutex_lock(&ar->conf_mutex);
        ath10k_mac_vif_ap_csa_count_down(arvif);
        mutex_unlock(&ar->conf_mutex);
}

static void ath10k_mac_handle_beacon_iter(void *data, u8 *mac,
                                          struct ieee80211_vif *vif)
{
        struct sk_buff *skb = data;
        struct ieee80211_mgmt *mgmt = (void *)skb->data;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        if (vif->type != NL80211_IFTYPE_STATION)
                return;

        if (!ether_addr_equal(mgmt->bssid, vif->bss_conf.bssid))
                return;

        cancel_delayed_work(&arvif->connection_loss_work);
}

void ath10k_mac_handle_beacon(struct ath10k *ar, struct sk_buff *skb)
{
        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_NORMAL_FLAGS,
                                                   ath10k_mac_handle_beacon_iter,
                                                   skb);
}

static void ath10k_mac_handle_beacon_miss_iter(void *data, u8 *mac,
                                               struct ieee80211_vif *vif)
{
        u32 *vdev_id = data;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k *ar = arvif->ar;
        struct ieee80211_hw *hw = ar->hw;

        if (arvif->vdev_id != *vdev_id)
                return;

        if (!arvif->is_up)
                return;

        ieee80211_beacon_loss(vif);

        /* Firmware doesn't report beacon loss events repeatedly. If AP probe
         * (done by mac80211) succeeds but beacons do not resume then it
         * doesn't make sense to continue operation. Queue connection loss work
         * which can be cancelled when beacon is received.
         */
        ieee80211_queue_delayed_work(hw, &arvif->connection_loss_work,
                                     ATH10K_CONNECTION_LOSS_HZ);
}

void ath10k_mac_handle_beacon_miss(struct ath10k *ar, u32 vdev_id)
{
        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_NORMAL_FLAGS,
                                                   ath10k_mac_handle_beacon_miss_iter,
                                                   &vdev_id);
}

static void ath10k_mac_vif_sta_connection_loss_work(struct work_struct *work)
{
        struct ath10k_vif *arvif = container_of(work, struct ath10k_vif,
                                                connection_loss_work.work);
        struct ieee80211_vif *vif = arvif->vif;

        if (!arvif->is_up)
                return;

        ieee80211_connection_loss(vif);
}

/**********************/
/* Station management */
/**********************/

static u32 ath10k_peer_assoc_h_listen_intval(struct ath10k *ar,
                                             struct ieee80211_vif *vif)
{
        /* Some firmware revisions have unstable STA powersave when listen
         * interval is set too high (e.g. 5). The symptoms are firmware doesn't
         * generate NullFunc frames properly even if buffered frames have been
         * indicated in Beacon TIM. Firmware would seldom wake up to pull
         * buffered frames. Often pinging the device from AP would simply fail.
         *
         * As a workaround set it to 1.
         */
        if (vif->type == NL80211_IFTYPE_STATION)
                return 1;

        return ar->hw->conf.listen_interval;
}

static void ath10k_peer_assoc_h_basic(struct ath10k *ar,
                                      struct ieee80211_vif *vif,
                                      struct ieee80211_sta *sta,
                                      struct wmi_peer_assoc_complete_arg *arg)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        u32 aid;

        lockdep_assert_held(&ar->conf_mutex);

        if (vif->type == NL80211_IFTYPE_STATION)
                aid = vif->cfg.aid;
        else
                aid = sta->aid;

        ether_addr_copy(arg->addr, sta->addr);
        arg->vdev_id = arvif->vdev_id;
        arg->peer_aid = aid;
        arg->peer_flags |= arvif->ar->wmi.peer_flags->auth;
        arg->peer_listen_intval = ath10k_peer_assoc_h_listen_intval(ar, vif);
        arg->peer_num_spatial_streams = 1;
        arg->peer_caps = vif->bss_conf.assoc_capability;
}

static void ath10k_peer_assoc_h_crypto(struct ath10k *ar,
                                       struct ieee80211_vif *vif,
                                       struct ieee80211_sta *sta,
                                       struct wmi_peer_assoc_complete_arg *arg)
{
        struct ieee80211_bss_conf *info = &vif->bss_conf;
        struct cfg80211_chan_def def;
        struct cfg80211_bss *bss;
        const u8 *rsnie = NULL;
        const u8 *wpaie = NULL;

        lockdep_assert_held(&ar->conf_mutex);

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return;

        bss = cfg80211_get_bss(ar->hw->wiphy, def.chan, info->bssid,
                               vif->cfg.ssid_len ? vif->cfg.ssid : NULL,
                               vif->cfg.ssid_len,
                               IEEE80211_BSS_TYPE_ANY, IEEE80211_PRIVACY_ANY);
        if (bss) {
                const struct cfg80211_bss_ies *ies;

                rcu_read_lock();
                rsnie = ieee80211_bss_get_ie(bss, WLAN_EID_RSN);

                ies = rcu_dereference(bss->ies);

                wpaie = cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT,
                                                WLAN_OUI_TYPE_MICROSOFT_WPA,
                                                ies->data,
                                                ies->len);
                rcu_read_unlock();
                cfg80211_put_bss(ar->hw->wiphy, bss);
        }

        /* FIXME: base on RSN IE/WPA IE is a correct idea? */
        if (rsnie || wpaie) {
                ath10k_dbg(ar, ATH10K_DBG_WMI, "%s: rsn ie found\n", __func__);
                arg->peer_flags |= ar->wmi.peer_flags->need_ptk_4_way;
        }

        if (wpaie) {
                ath10k_dbg(ar, ATH10K_DBG_WMI, "%s: wpa ie found\n", __func__);
                arg->peer_flags |= ar->wmi.peer_flags->need_gtk_2_way;
        }

        if (sta->mfp &&
            test_bit(ATH10K_FW_FEATURE_MFP_SUPPORT,
                     ar->running_fw->fw_file.fw_features)) {
                arg->peer_flags |= ar->wmi.peer_flags->pmf;
        }
}

static void ath10k_peer_assoc_h_rates(struct ath10k *ar,
                                      struct ieee80211_vif *vif,
                                      struct ieee80211_sta *sta,
                                      struct wmi_peer_assoc_complete_arg *arg)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct wmi_rate_set_arg *rateset = &arg->peer_legacy_rates;
        struct cfg80211_chan_def def;
        const struct ieee80211_supported_band *sband;
        const struct ieee80211_rate *rates;
        enum nl80211_band band;
        u32 ratemask;
        u8 rate;
        int i;

        lockdep_assert_held(&ar->conf_mutex);

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return;

        band = def.chan->band;
        sband = ar->hw->wiphy->bands[band];
        ratemask = sta->deflink.supp_rates[band];
        ratemask &= arvif->bitrate_mask.control[band].legacy;
        rates = sband->bitrates;

        rateset->num_rates = 0;

        for (i = 0; i < 32; i++, ratemask >>= 1, rates++) {
                if (!(ratemask & 1))
                        continue;

                rate = ath10k_mac_bitrate_to_rate(rates->bitrate);
                rateset->rates[rateset->num_rates] = rate;
                rateset->num_rates++;
        }
}

static bool
ath10k_peer_assoc_h_ht_masked(const u8 ht_mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
{
        int nss;

        for (nss = 0; nss < IEEE80211_HT_MCS_MASK_LEN; nss++)
                if (ht_mcs_mask[nss])
                        return false;

        return true;
}

static bool
ath10k_peer_assoc_h_vht_masked(const u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
{
        int nss;

        for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++)
                if (vht_mcs_mask[nss])
                        return false;

        return true;
}

static void ath10k_peer_assoc_h_ht(struct ath10k *ar,
                                   struct ieee80211_vif *vif,
                                   struct ieee80211_sta *sta,
                                   struct wmi_peer_assoc_complete_arg *arg)
{
        const struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct cfg80211_chan_def def;
        enum nl80211_band band;
        const u8 *ht_mcs_mask;
        const u16 *vht_mcs_mask;
        int i, n;
        u8 max_nss;
        u32 stbc;

        lockdep_assert_held(&ar->conf_mutex);

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return;

        if (!ht_cap->ht_supported)
                return;

        band = def.chan->band;
        ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs;
        vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs;

        if (ath10k_peer_assoc_h_ht_masked(ht_mcs_mask) &&
            ath10k_peer_assoc_h_vht_masked(vht_mcs_mask))
                return;

        arg->peer_flags |= ar->wmi.peer_flags->ht;
        arg->peer_max_mpdu = (1 << (IEEE80211_HT_MAX_AMPDU_FACTOR +
                                    ht_cap->ampdu_factor)) - 1;

        arg->peer_mpdu_density =
                ath10k_parse_mpdudensity(ht_cap->ampdu_density);

        arg->peer_ht_caps = ht_cap->cap;
        arg->peer_rate_caps |= WMI_RC_HT_FLAG;

        if (ht_cap->cap & IEEE80211_HT_CAP_LDPC_CODING)
                arg->peer_flags |= ar->wmi.peer_flags->ldbc;

        if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40) {
                arg->peer_flags |= ar->wmi.peer_flags->bw40;
                arg->peer_rate_caps |= WMI_RC_CW40_FLAG;
        }

        if (arvif->bitrate_mask.control[band].gi != NL80211_TXRATE_FORCE_LGI) {
                if (ht_cap->cap & IEEE80211_HT_CAP_SGI_20)
                        arg->peer_rate_caps |= WMI_RC_SGI_FLAG;

                if (ht_cap->cap & IEEE80211_HT_CAP_SGI_40)
                        arg->peer_rate_caps |= WMI_RC_SGI_FLAG;
        }

        if (ht_cap->cap & IEEE80211_HT_CAP_TX_STBC) {
                arg->peer_rate_caps |= WMI_RC_TX_STBC_FLAG;
                arg->peer_flags |= ar->wmi.peer_flags->stbc;
        }

        if (ht_cap->cap & IEEE80211_HT_CAP_RX_STBC) {
                stbc = ht_cap->cap & IEEE80211_HT_CAP_RX_STBC;
                stbc = stbc >> IEEE80211_HT_CAP_RX_STBC_SHIFT;
                stbc = stbc << WMI_RC_RX_STBC_FLAG_S;
                arg->peer_rate_caps |= stbc;
                arg->peer_flags |= ar->wmi.peer_flags->stbc;
        }

        if (ht_cap->mcs.rx_mask[1] && ht_cap->mcs.rx_mask[2])
                arg->peer_rate_caps |= WMI_RC_TS_FLAG;
        else if (ht_cap->mcs.rx_mask[1])
                arg->peer_rate_caps |= WMI_RC_DS_FLAG;

        for (i = 0, n = 0, max_nss = 0; i < IEEE80211_HT_MCS_MASK_LEN * 8; i++)
                if ((ht_cap->mcs.rx_mask[i / 8] & BIT(i % 8)) &&
                    (ht_mcs_mask[i / 8] & BIT(i % 8))) {
                        max_nss = (i / 8) + 1;
                        arg->peer_ht_rates.rates[n++] = i;
                }

        /*
         * This is a workaround for HT-enabled STAs which break the spec
         * and have no HT capabilities RX mask (no HT RX MCS map).
         *
         * As per spec, in section 20.3.5 Modulation and coding scheme (MCS),
         * MCS 0 through 7 are mandatory in 20MHz with 800 ns GI at all STAs.
         *
         * Firmware asserts if such situation occurs.
         */
        if (n == 0) {
                arg->peer_ht_rates.num_rates = 8;
                for (i = 0; i < arg->peer_ht_rates.num_rates; i++)
                        arg->peer_ht_rates.rates[i] = i;
        } else {
                arg->peer_ht_rates.num_rates = n;
                arg->peer_num_spatial_streams = min(sta->deflink.rx_nss,
                                                    max_nss);
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ht peer %pM mcs cnt %d nss %d\n",
                   arg->addr,
                   arg->peer_ht_rates.num_rates,
                   arg->peer_num_spatial_streams);
}

static int ath10k_peer_assoc_qos_ap(struct ath10k *ar,
                                    struct ath10k_vif *arvif,
                                    struct ieee80211_sta *sta)
{
        u32 uapsd = 0;
        u32 max_sp = 0;
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        if (sta->wme && sta->uapsd_queues) {
                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac uapsd_queues 0x%x max_sp %d\n",
                           sta->uapsd_queues, sta->max_sp);

                if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
                        uapsd |= WMI_AP_PS_UAPSD_AC3_DELIVERY_EN |
                                 WMI_AP_PS_UAPSD_AC3_TRIGGER_EN;
                if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
                        uapsd |= WMI_AP_PS_UAPSD_AC2_DELIVERY_EN |
                                 WMI_AP_PS_UAPSD_AC2_TRIGGER_EN;
                if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
                        uapsd |= WMI_AP_PS_UAPSD_AC1_DELIVERY_EN |
                                 WMI_AP_PS_UAPSD_AC1_TRIGGER_EN;
                if (sta->uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
                        uapsd |= WMI_AP_PS_UAPSD_AC0_DELIVERY_EN |
                                 WMI_AP_PS_UAPSD_AC0_TRIGGER_EN;

                if (sta->max_sp < MAX_WMI_AP_PS_PEER_PARAM_MAX_SP)
                        max_sp = sta->max_sp;

                ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id,
                                                 sta->addr,
                                                 WMI_AP_PS_PEER_PARAM_UAPSD,
                                                 uapsd);
                if (ret) {
                        ath10k_warn(ar, "failed to set ap ps peer param uapsd for vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }

                ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id,
                                                 sta->addr,
                                                 WMI_AP_PS_PEER_PARAM_MAX_SP,
                                                 max_sp);
                if (ret) {
                        ath10k_warn(ar, "failed to set ap ps peer param max sp for vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }

                /* TODO setup this based on STA listen interval and
                 * beacon interval. Currently we don't know
                 * sta->listen_interval - mac80211 patch required.
                 * Currently use 10 seconds
                 */
                ret = ath10k_wmi_set_ap_ps_param(ar, arvif->vdev_id, sta->addr,
                                                 WMI_AP_PS_PEER_PARAM_AGEOUT_TIME,
                                                 10);
                if (ret) {
                        ath10k_warn(ar, "failed to set ap ps peer param ageout time for vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }
        }

        return 0;
}

static u16
ath10k_peer_assoc_h_vht_limit(u16 tx_mcs_set,
                              const u16 vht_mcs_limit[NL80211_VHT_NSS_MAX])
{
        int idx_limit;
        int nss;
        u16 mcs_map;
        u16 mcs;

        for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) {
                mcs_map = ath10k_mac_get_max_vht_mcs_map(tx_mcs_set, nss) &
                          vht_mcs_limit[nss];

                if (mcs_map)
                        idx_limit = fls(mcs_map) - 1;
                else
                        idx_limit = -1;

                switch (idx_limit) {
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                default:
                        /* see ath10k_mac_can_set_bitrate_mask() */
                        WARN_ON(1);
                        fallthrough;
                case -1:
                        mcs = IEEE80211_VHT_MCS_NOT_SUPPORTED;
                        break;
                case 7:
                        mcs = IEEE80211_VHT_MCS_SUPPORT_0_7;
                        break;
                case 8:
                        mcs = IEEE80211_VHT_MCS_SUPPORT_0_8;
                        break;
                case 9:
                        mcs = IEEE80211_VHT_MCS_SUPPORT_0_9;
                        break;
                }

                tx_mcs_set &= ~(0x3 << (nss * 2));
                tx_mcs_set |= mcs << (nss * 2);
        }

        return tx_mcs_set;
}

static u32 get_160mhz_nss_from_maxrate(int rate)
{
        u32 nss;

        switch (rate) {
        case 780:
                nss = 1;
                break;
        case 1560:
                nss = 2;
                break;
        case 2106:
                nss = 3; /* not support MCS9 from spec*/
                break;
        case 3120:
                nss = 4;
                break;
        default:
                 nss = 1;
        }

        return nss;
}

static void ath10k_peer_assoc_h_vht(struct ath10k *ar,
                                    struct ieee80211_vif *vif,
                                    struct ieee80211_sta *sta,
                                    struct wmi_peer_assoc_complete_arg *arg)
{
        const struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_hw_params *hw = &ar->hw_params;
        struct cfg80211_chan_def def;
        enum nl80211_band band;
        const u16 *vht_mcs_mask;
        u8 ampdu_factor;
        u8 max_nss, vht_mcs;
        int i;

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return;

        if (!vht_cap->vht_supported)
                return;

        band = def.chan->band;
        vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs;

        if (ath10k_peer_assoc_h_vht_masked(vht_mcs_mask))
                return;

        arg->peer_flags |= ar->wmi.peer_flags->vht;

        if (def.chan->band == NL80211_BAND_2GHZ)
                arg->peer_flags |= ar->wmi.peer_flags->vht_2g;

        arg->peer_vht_caps = vht_cap->cap;

        ampdu_factor = (vht_cap->cap &
                        IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >>
                       IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;

        /* Workaround: Some Netgear/Linksys 11ac APs set Rx A-MPDU factor to
         * zero in VHT IE. Using it would result in degraded throughput.
         * arg->peer_max_mpdu at this point contains HT max_mpdu so keep
         * it if VHT max_mpdu is smaller.
         */
        arg->peer_max_mpdu = max(arg->peer_max_mpdu,
                                 (1U << (IEEE80211_HT_MAX_AMPDU_FACTOR +
                                        ampdu_factor)) - 1);

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80)
                arg->peer_flags |= ar->wmi.peer_flags->bw80;

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160)
                arg->peer_flags |= ar->wmi.peer_flags->bw160;

        /* Calculate peer NSS capability from VHT capabilities if STA
         * supports VHT.
         */
        for (i = 0, max_nss = 0, vht_mcs = 0; i < NL80211_VHT_NSS_MAX; i++) {
                vht_mcs = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map) >>
                          (2 * i) & 3;

                if ((vht_mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) &&
                    vht_mcs_mask[i])
                        max_nss = i + 1;
        }
        arg->peer_num_spatial_streams = min(sta->deflink.rx_nss, max_nss);
        arg->peer_vht_rates.rx_max_rate =
                __le16_to_cpu(vht_cap->vht_mcs.rx_highest);
        arg->peer_vht_rates.rx_mcs_set =
                __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
        arg->peer_vht_rates.tx_max_rate =
                __le16_to_cpu(vht_cap->vht_mcs.tx_highest);
        arg->peer_vht_rates.tx_mcs_set = ath10k_peer_assoc_h_vht_limit(
                __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map), vht_mcs_mask);

        /* Configure bandwidth-NSS mapping to FW
         * for the chip's tx chains setting on 160Mhz bw
         */
        if (arg->peer_phymode == MODE_11AC_VHT160 ||
            arg->peer_phymode == MODE_11AC_VHT80_80) {
                u32 rx_nss;
                u32 max_rate;

                max_rate = arg->peer_vht_rates.rx_max_rate;
                rx_nss = get_160mhz_nss_from_maxrate(max_rate);

                if (rx_nss == 0)
                        rx_nss = arg->peer_num_spatial_streams;
                else
                        rx_nss = min(arg->peer_num_spatial_streams, rx_nss);

                max_rate = hw->vht160_mcs_tx_highest;
                rx_nss = min(rx_nss, get_160mhz_nss_from_maxrate(max_rate));

                arg->peer_bw_rxnss_override =
                        FIELD_PREP(WMI_PEER_NSS_MAP_ENABLE, 1) |
                        FIELD_PREP(WMI_PEER_NSS_160MHZ_MASK, (rx_nss - 1));

                if (arg->peer_phymode == MODE_11AC_VHT80_80) {
                        arg->peer_bw_rxnss_override |=
                        FIELD_PREP(WMI_PEER_NSS_80_80MHZ_MASK, (rx_nss - 1));
                }
        }
        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac vht peer %pM max_mpdu %d flags 0x%x peer_rx_nss_override 0x%x\n",
                   sta->addr, arg->peer_max_mpdu,
                   arg->peer_flags, arg->peer_bw_rxnss_override);
}

static void ath10k_peer_assoc_h_qos(struct ath10k *ar,
                                    struct ieee80211_vif *vif,
                                    struct ieee80211_sta *sta,
                                    struct wmi_peer_assoc_complete_arg *arg)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        switch (arvif->vdev_type) {
        case WMI_VDEV_TYPE_AP:
                if (sta->wme)
                        arg->peer_flags |= arvif->ar->wmi.peer_flags->qos;

                if (sta->wme && sta->uapsd_queues) {
                        arg->peer_flags |= arvif->ar->wmi.peer_flags->apsd;
                        arg->peer_rate_caps |= WMI_RC_UAPSD_FLAG;
                }
                break;
        case WMI_VDEV_TYPE_STA:
                if (sta->wme)
                        arg->peer_flags |= arvif->ar->wmi.peer_flags->qos;
                break;
        case WMI_VDEV_TYPE_IBSS:
                if (sta->wme)
                        arg->peer_flags |= arvif->ar->wmi.peer_flags->qos;
                break;
        default:
                break;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac peer %pM qos %d\n",
                   sta->addr, !!(arg->peer_flags &
                   arvif->ar->wmi.peer_flags->qos));
}

static bool ath10k_mac_sta_has_ofdm_only(struct ieee80211_sta *sta)
{
        return sta->deflink.supp_rates[NL80211_BAND_2GHZ] >>
               ATH10K_MAC_FIRST_OFDM_RATE_IDX;
}

static enum wmi_phy_mode ath10k_mac_get_phymode_vht(struct ath10k *ar,
                                                    struct ieee80211_sta *sta)
{
        struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap;

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) {
                switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
                case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
                        return MODE_11AC_VHT160;
                case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
                        return MODE_11AC_VHT80_80;
                default:
                        /* not sure if this is a valid case? */
                        return MODE_11AC_VHT160;
                }
        }

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80)
                return MODE_11AC_VHT80;

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40)
                return MODE_11AC_VHT40;

        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20)
                return MODE_11AC_VHT20;

        return MODE_UNKNOWN;
}

static void ath10k_peer_assoc_h_phymode(struct ath10k *ar,
                                        struct ieee80211_vif *vif,
                                        struct ieee80211_sta *sta,
                                        struct wmi_peer_assoc_complete_arg *arg)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct cfg80211_chan_def def;
        enum nl80211_band band;
        const u8 *ht_mcs_mask;
        const u16 *vht_mcs_mask;
        enum wmi_phy_mode phymode = MODE_UNKNOWN;

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return;

        band = def.chan->band;
        ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs;
        vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs;

        switch (band) {
        case NL80211_BAND_2GHZ:
                if (sta->deflink.vht_cap.vht_supported &&
                    !ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) {
                        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40)
                                phymode = MODE_11AC_VHT40;
                        else
                                phymode = MODE_11AC_VHT20;
                } else if (sta->deflink.ht_cap.ht_supported &&
                           !ath10k_peer_assoc_h_ht_masked(ht_mcs_mask)) {
                        if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40)
                                phymode = MODE_11NG_HT40;
                        else
                                phymode = MODE_11NG_HT20;
                } else if (ath10k_mac_sta_has_ofdm_only(sta)) {
                        phymode = MODE_11G;
                } else {
                        phymode = MODE_11B;
                }

                break;
        case NL80211_BAND_5GHZ:
                /*
                 * Check VHT first.
                 */
                if (sta->deflink.vht_cap.vht_supported &&
                    !ath10k_peer_assoc_h_vht_masked(vht_mcs_mask)) {
                        phymode = ath10k_mac_get_phymode_vht(ar, sta);
                } else if (sta->deflink.ht_cap.ht_supported &&
                           !ath10k_peer_assoc_h_ht_masked(ht_mcs_mask)) {
                        if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40)
                                phymode = MODE_11NA_HT40;
                        else
                                phymode = MODE_11NA_HT20;
                } else {
                        phymode = MODE_11A;
                }

                break;
        default:
                break;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac peer %pM phymode %s\n",
                   sta->addr, ath10k_wmi_phymode_str(phymode));

        arg->peer_phymode = phymode;
        WARN_ON(phymode == MODE_UNKNOWN);
}

static int ath10k_peer_assoc_prepare(struct ath10k *ar,
                                     struct ieee80211_vif *vif,
                                     struct ieee80211_sta *sta,
                                     struct wmi_peer_assoc_complete_arg *arg)
{
        lockdep_assert_held(&ar->conf_mutex);

        memset(arg, 0, sizeof(*arg));

        ath10k_peer_assoc_h_basic(ar, vif, sta, arg);
        ath10k_peer_assoc_h_crypto(ar, vif, sta, arg);
        ath10k_peer_assoc_h_rates(ar, vif, sta, arg);
        ath10k_peer_assoc_h_ht(ar, vif, sta, arg);
        ath10k_peer_assoc_h_phymode(ar, vif, sta, arg);
        ath10k_peer_assoc_h_vht(ar, vif, sta, arg);
        ath10k_peer_assoc_h_qos(ar, vif, sta, arg);

        return 0;
}

static const u32 ath10k_smps_map[] = {
        [WLAN_HT_CAP_SM_PS_STATIC] = WMI_PEER_SMPS_STATIC,
        [WLAN_HT_CAP_SM_PS_DYNAMIC] = WMI_PEER_SMPS_DYNAMIC,
        [WLAN_HT_CAP_SM_PS_INVALID] = WMI_PEER_SMPS_PS_NONE,
        [WLAN_HT_CAP_SM_PS_DISABLED] = WMI_PEER_SMPS_PS_NONE,
};

static int ath10k_setup_peer_smps(struct ath10k *ar, struct ath10k_vif *arvif,
                                  const u8 *addr,
                                  const struct ieee80211_sta_ht_cap *ht_cap)
{
        int smps;

        if (!ht_cap->ht_supported)
                return 0;

        smps = ht_cap->cap & IEEE80211_HT_CAP_SM_PS;
        smps >>= IEEE80211_HT_CAP_SM_PS_SHIFT;

        if (smps >= ARRAY_SIZE(ath10k_smps_map))
                return -EINVAL;

        return ath10k_wmi_peer_set_param(ar, arvif->vdev_id, addr,
                                         ar->wmi.peer_param->smps_state,
                                         ath10k_smps_map[smps]);
}

static int ath10k_mac_vif_recalc_txbf(struct ath10k *ar,
                                      struct ieee80211_vif *vif,
                                      struct ieee80211_sta_vht_cap vht_cap)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret;
        u32 param;
        u32 value;

        if (ath10k_wmi_get_txbf_conf_scheme(ar) != WMI_TXBF_CONF_AFTER_ASSOC)
                return 0;

        if (!(ar->vht_cap_info &
              (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
               IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
               IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
               IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)))
                return 0;

        param = ar->wmi.vdev_param->txbf;
        value = 0;

        if (WARN_ON(param == WMI_VDEV_PARAM_UNSUPPORTED))
                return 0;

        /* The following logic is correct. If a remote STA advertises support
         * for being a beamformer then we should enable us being a beamformee.
         */

        if (ar->vht_cap_info &
            (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
             IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)) {
                if (vht_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
                        value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE;

                if (vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
                        value |= WMI_VDEV_PARAM_TXBF_MU_TX_BFEE;
        }

        if (ar->vht_cap_info &
            (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
             IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)) {
                if (vht_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE)
                        value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER;

                if (vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
                        value |= WMI_VDEV_PARAM_TXBF_MU_TX_BFER;
        }

        if (value & WMI_VDEV_PARAM_TXBF_MU_TX_BFEE)
                value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE;

        if (value & WMI_VDEV_PARAM_TXBF_MU_TX_BFER)
                value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER;

        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, param, value);
        if (ret) {
                ath10k_warn(ar, "failed to submit vdev param txbf 0x%x: %d\n",
                            value, ret);
                return ret;
        }

        return 0;
}

static bool ath10k_mac_is_connected(struct ath10k *ar)
{
        struct ath10k_vif *arvif;

        list_for_each_entry(arvif, &ar->arvifs, list) {
                if (arvif->is_up && arvif->vdev_type == WMI_VDEV_TYPE_STA)
                        return true;
        }

        return false;
}

static int ath10k_mac_txpower_setup(struct ath10k *ar, int txpower)
{
        int ret;
        u32 param;
        int tx_power_2g, tx_power_5g;
        bool connected;

        lockdep_assert_held(&ar->conf_mutex);

        /* ath10k internally uses unit of 0.5 dBm so multiply by 2 */
        tx_power_2g = txpower * 2;
        tx_power_5g = txpower * 2;

        connected = ath10k_mac_is_connected(ar);

        if (connected && ar->tx_power_2g_limit)
                if (tx_power_2g > ar->tx_power_2g_limit)
                        tx_power_2g = ar->tx_power_2g_limit;

        if (connected && ar->tx_power_5g_limit)
                if (tx_power_5g > ar->tx_power_5g_limit)
                        tx_power_5g = ar->tx_power_5g_limit;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac txpower 2g: %d, 5g: %d\n",
                   tx_power_2g, tx_power_5g);

        param = ar->wmi.pdev_param->txpower_limit2g;
        ret = ath10k_wmi_pdev_set_param(ar, param, tx_power_2g);
        if (ret) {
                ath10k_warn(ar, "failed to set 2g txpower %d: %d\n",
                            tx_power_2g, ret);
                return ret;
        }

        param = ar->wmi.pdev_param->txpower_limit5g;
        ret = ath10k_wmi_pdev_set_param(ar, param, tx_power_5g);
        if (ret) {
                ath10k_warn(ar, "failed to set 5g txpower %d: %d\n",
                            tx_power_5g, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_txpower_recalc(struct ath10k *ar)
{
        struct ath10k_vif *arvif;
        int ret, txpower = -1;

        lockdep_assert_held(&ar->conf_mutex);

        list_for_each_entry(arvif, &ar->arvifs, list) {
                /* txpower not initialized yet? */
                if (arvif->txpower == INT_MIN)
                        continue;

                if (txpower == -1)
                        txpower = arvif->txpower;
                else
                        txpower = min(txpower, arvif->txpower);
        }

        if (txpower == -1)
                return 0;

        ret = ath10k_mac_txpower_setup(ar, txpower);
        if (ret) {
                ath10k_warn(ar, "failed to setup tx power %d: %d\n",
                            txpower, ret);
                return ret;
        }

        return 0;
}

static int ath10k_mac_set_sar_power(struct ath10k *ar)
{
        if (!ar->hw_params.dynamic_sar_support)
                return -EOPNOTSUPP;

        if (!ath10k_mac_is_connected(ar))
                return 0;

        /* if connected, then arvif->txpower must be valid */
        return ath10k_mac_txpower_recalc(ar);
}

static int ath10k_mac_set_sar_specs(struct ieee80211_hw *hw,
                                    const struct cfg80211_sar_specs *sar)
{
        const struct cfg80211_sar_sub_specs *sub_specs;
        struct ath10k *ar = hw->priv;
        u32 i;
        int ret;

        mutex_lock(&ar->conf_mutex);

        if (!ar->hw_params.dynamic_sar_support) {
                ret = -EOPNOTSUPP;
                goto err;
        }

        if (!sar || sar->type != NL80211_SAR_TYPE_POWER ||
            sar->num_sub_specs == 0) {
                ret = -EINVAL;
                goto err;
        }

        sub_specs = sar->sub_specs;

        /* 0dbm is not a practical value for ath10k, so use 0
         * as no SAR limitation on it.
         */
        ar->tx_power_2g_limit = 0;
        ar->tx_power_5g_limit = 0;

        /* note the power is in 0.25dbm unit, while ath10k uses
         * 0.5dbm unit.
         */
        for (i = 0; i < sar->num_sub_specs; i++) {
                if (sub_specs->freq_range_index == 0)
                        ar->tx_power_2g_limit = sub_specs->power / 2;
                else if (sub_specs->freq_range_index == 1)
                        ar->tx_power_5g_limit = sub_specs->power / 2;

                sub_specs++;
        }

        ret = ath10k_mac_set_sar_power(ar);
        if (ret) {
                ath10k_warn(ar, "failed to set sar power: %d", ret);
                goto err;
        }

err:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

/* can be called only in mac80211 callbacks due to `key_count` usage */
static void ath10k_bss_assoc(struct ieee80211_hw *hw,
                             struct ieee80211_vif *vif,
                             struct ieee80211_bss_conf *bss_conf)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ieee80211_sta_ht_cap ht_cap;
        struct ieee80211_sta_vht_cap vht_cap;
        struct wmi_peer_assoc_complete_arg peer_arg;
        struct ieee80211_sta *ap_sta;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i assoc bssid %pM aid %d\n",
                   arvif->vdev_id, arvif->bssid, arvif->aid);

        rcu_read_lock();

        ap_sta = ieee80211_find_sta(vif, bss_conf->bssid);
        if (!ap_sta) {
                ath10k_warn(ar, "failed to find station entry for bss %pM vdev %i\n",
                            bss_conf->bssid, arvif->vdev_id);
                rcu_read_unlock();
                return;
        }

        /* ap_sta must be accessed only within rcu section which must be left
         * before calling ath10k_setup_peer_smps() which might sleep.
         */
        ht_cap = ap_sta->deflink.ht_cap;
        vht_cap = ap_sta->deflink.vht_cap;

        ret = ath10k_peer_assoc_prepare(ar, vif, ap_sta, &peer_arg);
        if (ret) {
                ath10k_warn(ar, "failed to prepare peer assoc for %pM vdev %i: %d\n",
                            bss_conf->bssid, arvif->vdev_id, ret);
                rcu_read_unlock();
                return;
        }

        rcu_read_unlock();

        ret = ath10k_wmi_peer_assoc(ar, &peer_arg);
        if (ret) {
                ath10k_warn(ar, "failed to run peer assoc for %pM vdev %i: %d\n",
                            bss_conf->bssid, arvif->vdev_id, ret);
                return;
        }

        ret = ath10k_setup_peer_smps(ar, arvif, bss_conf->bssid, &ht_cap);
        if (ret) {
                ath10k_warn(ar, "failed to setup peer SMPS for vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return;
        }

        ret = ath10k_mac_vif_recalc_txbf(ar, vif, vht_cap);
        if (ret) {
                ath10k_warn(ar, "failed to recalc txbf for vdev %i on bss %pM: %d\n",
                            arvif->vdev_id, bss_conf->bssid, ret);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac vdev %d up (associated) bssid %pM aid %d\n",
                   arvif->vdev_id, bss_conf->bssid, vif->cfg.aid);

        WARN_ON(arvif->is_up);

        arvif->aid = vif->cfg.aid;
        ether_addr_copy(arvif->bssid, bss_conf->bssid);

        ret = ath10k_wmi_pdev_set_param(ar,
                                        ar->wmi.pdev_param->peer_stats_info_enable, 1);
        if (ret)
                ath10k_warn(ar, "failed to enable peer stats info: %d\n", ret);

        ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, arvif->aid, arvif->bssid);
        if (ret) {
                ath10k_warn(ar, "failed to set vdev %d up: %d\n",
                            arvif->vdev_id, ret);
                return;
        }

        arvif->is_up = true;

        ath10k_mac_set_sar_power(ar);

        /* Workaround: Some firmware revisions (tested with qca6174
         * WLAN.RM.2.0-00073) have buggy powersave state machine and must be
         * poked with peer param command.
         */
        ret = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, arvif->bssid,
                                        ar->wmi.peer_param->dummy_var, 1);
        if (ret) {
                ath10k_warn(ar, "failed to poke peer %pM param for ps workaround on vdev %i: %d\n",
                            arvif->bssid, arvif->vdev_id, ret);
                return;
        }
}

static void ath10k_bss_disassoc(struct ieee80211_hw *hw,
                                struct ieee80211_vif *vif)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ieee80211_sta_vht_cap vht_cap = {};
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i disassoc bssid %pM\n",
                   arvif->vdev_id, arvif->bssid);

        ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
        if (ret)
                ath10k_warn(ar, "failed to down vdev %i: %d\n",
                            arvif->vdev_id, ret);

        arvif->def_wep_key_idx = -1;

        ret = ath10k_mac_vif_recalc_txbf(ar, vif, vht_cap);
        if (ret) {
                ath10k_warn(ar, "failed to recalc txbf for vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return;
        }

        arvif->is_up = false;

        ath10k_mac_txpower_recalc(ar);

        cancel_delayed_work_sync(&arvif->connection_loss_work);
}

static int ath10k_new_peer_tid_config(struct ath10k *ar,
                                      struct ieee80211_sta *sta,
                                      struct ath10k_vif *arvif)
{
        struct wmi_per_peer_per_tid_cfg_arg arg = {};
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        bool config_apply;
        int ret, i;

        for (i = 0; i < ATH10K_TID_MAX; i++) {
                config_apply = false;
                if (arvif->retry_long[i] || arvif->ampdu[i] ||
                    arvif->rate_ctrl[i] || arvif->rtscts[i]) {
                        config_apply = true;
                        arg.tid = i;
                        arg.vdev_id = arvif->vdev_id;
                        arg.retry_count = arvif->retry_long[i];
                        arg.aggr_control = arvif->ampdu[i];
                        arg.rate_ctrl = arvif->rate_ctrl[i];
                        arg.rcode_flags = arvif->rate_code[i];

                        if (arvif->rtscts[i])
                                arg.ext_tid_cfg_bitmap =
                                        WMI_EXT_TID_RTS_CTS_CONFIG;
                        else
                                arg.ext_tid_cfg_bitmap = 0;

                        arg.rtscts_ctrl = arvif->rtscts[i];
                }

                if (arvif->noack[i]) {
                        arg.ack_policy = arvif->noack[i];
                        arg.rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_DEFAULT_LOWEST_RATE;
                        arg.aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_DISABLE;
                        config_apply = true;
                }

                /* Assign default value(-1) to newly connected station.
                 * This is to identify station specific tid configuration not
                 * configured for the station.
                 */
                arsta->retry_long[i] = -1;
                arsta->noack[i] = -1;
                arsta->ampdu[i] = -1;

                if (!config_apply)
                        continue;

                ether_addr_copy(arg.peer_macaddr.addr, sta->addr);

                ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg);
                if (ret) {
                        ath10k_warn(ar, "failed to set per tid retry/aggr config for sta %pM: %d\n",
                                    sta->addr, ret);
                        return ret;
                }

                memset(&arg, 0, sizeof(arg));
        }

        return 0;
}

static int ath10k_station_assoc(struct ath10k *ar,
                                struct ieee80211_vif *vif,
                                struct ieee80211_sta *sta,
                                bool reassoc)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct wmi_peer_assoc_complete_arg peer_arg;
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_peer_assoc_prepare(ar, vif, sta, &peer_arg);
        if (ret) {
                ath10k_warn(ar, "failed to prepare WMI peer assoc for %pM vdev %i: %i\n",
                            sta->addr, arvif->vdev_id, ret);
                return ret;
        }

        ret = ath10k_wmi_peer_assoc(ar, &peer_arg);
        if (ret) {
                ath10k_warn(ar, "failed to run peer assoc for STA %pM vdev %i: %d\n",
                            sta->addr, arvif->vdev_id, ret);
                return ret;
        }

        /* Re-assoc is run only to update supported rates for given station. It
         * doesn't make much sense to reconfigure the peer completely.
         */
        if (!reassoc) {
                ret = ath10k_setup_peer_smps(ar, arvif, sta->addr,
                                             &sta->deflink.ht_cap);
                if (ret) {
                        ath10k_warn(ar, "failed to setup peer SMPS for vdev %d: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }

                ret = ath10k_peer_assoc_qos_ap(ar, arvif, sta);
                if (ret) {
                        ath10k_warn(ar, "failed to set qos params for STA %pM for vdev %i: %d\n",
                                    sta->addr, arvif->vdev_id, ret);
                        return ret;
                }

                if (!sta->wme) {
                        arvif->num_legacy_stations++;
                        ret  = ath10k_recalc_rtscts_prot(arvif);
                        if (ret) {
                                ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n",
                                            arvif->vdev_id, ret);
                                return ret;
                        }
                }

                /* Plumb cached keys only for static WEP */
                if ((arvif->def_wep_key_idx != -1) && (!sta->tdls)) {
                        ret = ath10k_install_peer_wep_keys(arvif, sta->addr);
                        if (ret) {
                                ath10k_warn(ar, "failed to install peer wep keys for vdev %i: %d\n",
                                            arvif->vdev_id, ret);
                                return ret;
                        }
                }
        }

        if (!test_bit(WMI_SERVICE_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map))
                return ret;

        return ath10k_new_peer_tid_config(ar, sta, arvif);
}

static int ath10k_station_disassoc(struct ath10k *ar,
                                   struct ieee80211_vif *vif,
                                   struct ieee80211_sta *sta)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        if (!sta->wme) {
                arvif->num_legacy_stations--;
                ret = ath10k_recalc_rtscts_prot(arvif);
                if (ret) {
                        ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }
        }

        ret = ath10k_clear_peer_keys(arvif, sta->addr);
        if (ret) {
                ath10k_warn(ar, "failed to clear all peer wep keys for vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        return ret;
}

/**************/
/* Regulatory */
/**************/

static int ath10k_update_channel_list(struct ath10k *ar)
{
        struct ieee80211_hw *hw = ar->hw;
        struct ieee80211_supported_band **bands;
        enum nl80211_band band;
        struct ieee80211_channel *channel;
        struct wmi_scan_chan_list_arg arg = {0};
        struct wmi_channel_arg *ch;
        bool passive;
        int len;
        int ret;
        int i;

        lockdep_assert_held(&ar->conf_mutex);

        bands = hw->wiphy->bands;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                if (!bands[band])
                        continue;

                for (i = 0; i < bands[band]->n_channels; i++) {
                        if (bands[band]->channels[i].flags &
                            IEEE80211_CHAN_DISABLED)
                                continue;

                        arg.n_channels++;
                }
        }

        len = sizeof(struct wmi_channel_arg) * arg.n_channels;
        arg.channels = kzalloc(len, GFP_KERNEL);
        if (!arg.channels)
                return -ENOMEM;

        ch = arg.channels;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                if (!bands[band])
                        continue;

                for (i = 0; i < bands[band]->n_channels; i++) {
                        channel = &bands[band]->channels[i];

                        if (channel->flags & IEEE80211_CHAN_DISABLED)
                                continue;

                        ch->allow_ht = true;

                        /* FIXME: when should we really allow VHT? */
                        ch->allow_vht = true;

                        ch->allow_ibss =
                                !(channel->flags & IEEE80211_CHAN_NO_IR);

                        ch->ht40plus =
                                !(channel->flags & IEEE80211_CHAN_NO_HT40PLUS);

                        ch->chan_radar =
                                !!(channel->flags & IEEE80211_CHAN_RADAR);

                        passive = channel->flags & IEEE80211_CHAN_NO_IR;
                        ch->passive = passive;

                        /* the firmware is ignoring the "radar" flag of the
                         * channel and is scanning actively using Probe Requests
                         * on "Radar detection"/DFS channels which are not
                         * marked as "available"
                         */
                        ch->passive |= ch->chan_radar;

                        ch->freq = channel->center_freq;
                        ch->band_center_freq1 = channel->center_freq;
                        ch->min_power = 0;
                        ch->max_power = channel->max_power * 2;
                        ch->max_reg_power = channel->max_reg_power * 2;
                        ch->max_antenna_gain = channel->max_antenna_gain;
                        ch->reg_class_id = 0; /* FIXME */

                        /* FIXME: why use only legacy modes, why not any
                         * HT/VHT modes? Would that even make any
                         * difference?
                         */
                        if (channel->band == NL80211_BAND_2GHZ)
                                ch->mode = MODE_11G;
                        else
                                ch->mode = MODE_11A;

                        if (WARN_ON_ONCE(ch->mode == MODE_UNKNOWN))
                                continue;

                        ath10k_dbg(ar, ATH10K_DBG_WMI,
                                   "mac channel [%zd/%d] freq %d maxpower %d regpower %d antenna %d mode %d\n",
                                    ch - arg.channels, arg.n_channels,
                                   ch->freq, ch->max_power, ch->max_reg_power,
                                   ch->max_antenna_gain, ch->mode);

                        ch++;
                }
        }

        ret = ath10k_wmi_scan_chan_list(ar, &arg);
        kfree(arg.channels);

        return ret;
}

static enum wmi_dfs_region
ath10k_mac_get_dfs_region(enum nl80211_dfs_regions dfs_region)
{
        switch (dfs_region) {
        case NL80211_DFS_UNSET:
                return WMI_UNINIT_DFS_DOMAIN;
        case NL80211_DFS_FCC:
                return WMI_FCC_DFS_DOMAIN;
        case NL80211_DFS_ETSI:
                return WMI_ETSI_DFS_DOMAIN;
        case NL80211_DFS_JP:
                return WMI_MKK4_DFS_DOMAIN;
        }
        return WMI_UNINIT_DFS_DOMAIN;
}

static void ath10k_regd_update(struct ath10k *ar)
{
        struct reg_dmn_pair_mapping *regpair;
        int ret;
        enum wmi_dfs_region wmi_dfs_reg;
        enum nl80211_dfs_regions nl_dfs_reg;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_update_channel_list(ar);
        if (ret)
                ath10k_warn(ar, "failed to update channel list: %d\n", ret);

        regpair = ar->ath_common.regulatory.regpair;

        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) {
                nl_dfs_reg = ar->dfs_detector->region;
                wmi_dfs_reg = ath10k_mac_get_dfs_region(nl_dfs_reg);
        } else {
                wmi_dfs_reg = WMI_UNINIT_DFS_DOMAIN;
        }

        /* Target allows setting up per-band regdomain but ath_common provides
         * a combined one only
         */
        ret = ath10k_wmi_pdev_set_regdomain(ar,
                                            regpair->reg_domain,
                                            regpair->reg_domain, /* 2ghz */
                                            regpair->reg_domain, /* 5ghz */
                                            regpair->reg_2ghz_ctl,
                                            regpair->reg_5ghz_ctl,
                                            wmi_dfs_reg);
        if (ret)
                ath10k_warn(ar, "failed to set pdev regdomain: %d\n", ret);
}

static void ath10k_mac_update_channel_list(struct ath10k *ar,
                                           struct ieee80211_supported_band *band)
{
        int i;

        if (ar->low_5ghz_chan && ar->high_5ghz_chan) {
                for (i = 0; i < band->n_channels; i++) {
                        if (band->channels[i].center_freq < ar->low_5ghz_chan ||
                            band->channels[i].center_freq > ar->high_5ghz_chan)
                                band->channels[i].flags |=
                                        IEEE80211_CHAN_DISABLED;
                }
        }
}

static void ath10k_reg_notifier(struct wiphy *wiphy,
                                struct regulatory_request *request)
{
        struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
        struct ath10k *ar = hw->priv;
        bool result;

        ath_reg_notifier_apply(wiphy, request, &ar->ath_common.regulatory);

        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector) {
                ath10k_dbg(ar, ATH10K_DBG_REGULATORY, "dfs region 0x%x\n",
                           request->dfs_region);
                result = ar->dfs_detector->set_dfs_domain(ar->dfs_detector,
                                                          request->dfs_region);
                if (!result)
                        ath10k_warn(ar, "DFS region 0x%X not supported, will trigger radar for every pulse\n",
                                    request->dfs_region);
        }

        mutex_lock(&ar->conf_mutex);
        if (ar->state == ATH10K_STATE_ON)
                ath10k_regd_update(ar);
        mutex_unlock(&ar->conf_mutex);

        if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY)
                ath10k_mac_update_channel_list(ar,
                                               ar->hw->wiphy->bands[NL80211_BAND_5GHZ]);
}

static void ath10k_stop_radar_confirmation(struct ath10k *ar)
{
        spin_lock_bh(&ar->data_lock);
        ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_STOPPED;
        spin_unlock_bh(&ar->data_lock);

        cancel_work_sync(&ar->radar_confirmation_work);
}

/***************/
/* TX handlers */
/***************/

enum ath10k_mac_tx_path {
        ATH10K_MAC_TX_HTT,
        ATH10K_MAC_TX_HTT_MGMT,
        ATH10K_MAC_TX_WMI_MGMT,
        ATH10K_MAC_TX_UNKNOWN,
};

void ath10k_mac_tx_lock(struct ath10k *ar, int reason)
{
        lockdep_assert_held(&ar->htt.tx_lock);

        WARN_ON(reason >= ATH10K_TX_PAUSE_MAX);
        ar->tx_paused |= BIT(reason);
        ieee80211_stop_queues(ar->hw);
}

static void ath10k_mac_tx_unlock_iter(void *data, u8 *mac,
                                      struct ieee80211_vif *vif)
{
        struct ath10k *ar = data;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        if (arvif->tx_paused)
                return;

        ieee80211_wake_queue(ar->hw, arvif->vdev_id);
}

void ath10k_mac_tx_unlock(struct ath10k *ar, int reason)
{
        lockdep_assert_held(&ar->htt.tx_lock);

        WARN_ON(reason >= ATH10K_TX_PAUSE_MAX);
        ar->tx_paused &= ~BIT(reason);

        if (ar->tx_paused)
                return;

        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_RESUME_FLAGS,
                                                   ath10k_mac_tx_unlock_iter,
                                                   ar);

        ieee80211_wake_queue(ar->hw, ar->hw->offchannel_tx_hw_queue);
}

void ath10k_mac_vif_tx_lock(struct ath10k_vif *arvif, int reason)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->htt.tx_lock);

        WARN_ON(reason >= BITS_PER_LONG);
        arvif->tx_paused |= BIT(reason);
        ieee80211_stop_queue(ar->hw, arvif->vdev_id);
}

void ath10k_mac_vif_tx_unlock(struct ath10k_vif *arvif, int reason)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->htt.tx_lock);

        WARN_ON(reason >= BITS_PER_LONG);
        arvif->tx_paused &= ~BIT(reason);

        if (ar->tx_paused)
                return;

        if (arvif->tx_paused)
                return;

        ieee80211_wake_queue(ar->hw, arvif->vdev_id);
}

static void ath10k_mac_vif_handle_tx_pause(struct ath10k_vif *arvif,
                                           enum wmi_tlv_tx_pause_id pause_id,
                                           enum wmi_tlv_tx_pause_action action)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->htt.tx_lock);

        switch (action) {
        case WMI_TLV_TX_PAUSE_ACTION_STOP:
                ath10k_mac_vif_tx_lock(arvif, pause_id);
                break;
        case WMI_TLV_TX_PAUSE_ACTION_WAKE:
                ath10k_mac_vif_tx_unlock(arvif, pause_id);
                break;
        default:
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "received unknown tx pause action %d on vdev %i, ignoring\n",
                            action, arvif->vdev_id);
                break;
        }
}

struct ath10k_mac_tx_pause {
        u32 vdev_id;
        enum wmi_tlv_tx_pause_id pause_id;
        enum wmi_tlv_tx_pause_action action;
};

static void ath10k_mac_handle_tx_pause_iter(void *data, u8 *mac,
                                            struct ieee80211_vif *vif)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_mac_tx_pause *arg = data;

        if (arvif->vdev_id != arg->vdev_id)
                return;

        ath10k_mac_vif_handle_tx_pause(arvif, arg->pause_id, arg->action);
}

void ath10k_mac_handle_tx_pause_vdev(struct ath10k *ar, u32 vdev_id,
                                     enum wmi_tlv_tx_pause_id pause_id,
                                     enum wmi_tlv_tx_pause_action action)
{
        struct ath10k_mac_tx_pause arg = {
                .vdev_id = vdev_id,
                .pause_id = pause_id,
                .action = action,
        };

        spin_lock_bh(&ar->htt.tx_lock);
        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_RESUME_FLAGS,
                                                   ath10k_mac_handle_tx_pause_iter,
                                                   &arg);
        spin_unlock_bh(&ar->htt.tx_lock);
}

static enum ath10k_hw_txrx_mode
ath10k_mac_tx_h_get_txmode(struct ath10k *ar,
                           struct ieee80211_vif *vif,
                           struct ieee80211_sta *sta,
                           struct sk_buff *skb)
{
        const struct ieee80211_hdr *hdr = (void *)skb->data;
        const struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb);
        __le16 fc = hdr->frame_control;

        if (IEEE80211_SKB_CB(skb)->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)
                return ATH10K_HW_TXRX_ETHERNET;

        if (!vif || vif->type == NL80211_IFTYPE_MONITOR)
                return ATH10K_HW_TXRX_RAW;

        if (ieee80211_is_mgmt(fc))
                return ATH10K_HW_TXRX_MGMT;

        /* Workaround:
         *
         * NullFunc frames are mostly used to ping if a client or AP are still
         * reachable and responsive. This implies tx status reports must be
         * accurate - otherwise either mac80211 or userspace (e.g. hostapd) can
         * come to a conclusion that the other end disappeared and tear down
         * BSS connection or it can never disconnect from BSS/client (which is
         * the case).
         *
         * Firmware with HTT older than 3.0 delivers incorrect tx status for
         * NullFunc frames to driver. However there's a HTT Mgmt Tx command
         * which seems to deliver correct tx reports for NullFunc frames. The
         * downside of using it is it ignores client powersave state so it can
         * end up disconnecting sleeping clients in AP mode. It should fix STA
         * mode though because AP don't sleep.
         */
        if (ar->htt.target_version_major < 3 &&
            (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) &&
            !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX,
                      ar->running_fw->fw_file.fw_features))
                return ATH10K_HW_TXRX_MGMT;

        /* Workaround:
         *
         * Some wmi-tlv firmwares for qca6174 have broken Tx key selection for
         * NativeWifi txmode - it selects AP key instead of peer key. It seems
         * to work with Ethernet txmode so use it.
         *
         * FIXME: Check if raw mode works with TDLS.
         */
        if (ieee80211_is_data_present(fc) && sta && sta->tdls)
                return ATH10K_HW_TXRX_ETHERNET;

        if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags) ||
            skb_cb->flags & ATH10K_SKB_F_RAW_TX)
                return ATH10K_HW_TXRX_RAW;

        return ATH10K_HW_TXRX_NATIVE_WIFI;
}

static bool ath10k_tx_h_use_hwcrypto(struct ieee80211_vif *vif,
                                     struct sk_buff *skb)
{
        const struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        const struct ieee80211_hdr *hdr = (void *)skb->data;
        const u32 mask = IEEE80211_TX_INTFL_DONT_ENCRYPT |
                         IEEE80211_TX_CTL_INJECTED;

        if (!ieee80211_has_protected(hdr->frame_control))
                return false;

        if ((info->flags & mask) == mask)
                return false;

        if (vif)
                return !((struct ath10k_vif *)vif->drv_priv)->nohwcrypt;

        return true;
}

/* HTT Tx uses Native Wifi tx mode which expects 802.11 frames without QoS
 * Control in the header.
 */
static void ath10k_tx_h_nwifi(struct ieee80211_hw *hw, struct sk_buff *skb)
{
        struct ieee80211_hdr *hdr = (void *)skb->data;
        struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);
        u8 *qos_ctl;

        if (!ieee80211_is_data_qos(hdr->frame_control))
                return;

        qos_ctl = ieee80211_get_qos_ctl(hdr);
        memmove(skb->data + IEEE80211_QOS_CTL_LEN,
                skb->data, (void *)qos_ctl - (void *)skb->data);
        skb_pull(skb, IEEE80211_QOS_CTL_LEN);

        /* Some firmware revisions don't handle sending QoS NullFunc well.
         * These frames are mainly used for CQM purposes so it doesn't really
         * matter whether QoS NullFunc or NullFunc are sent.
         */
        hdr = (void *)skb->data;
        if (ieee80211_is_qos_nullfunc(hdr->frame_control))
                cb->flags &= ~ATH10K_SKB_F_QOS;

        hdr->frame_control &= ~__cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
}

static void ath10k_tx_h_8023(struct sk_buff *skb)
{
        struct ieee80211_hdr *hdr;
        struct rfc1042_hdr *rfc1042;
        struct ethhdr *eth;
        size_t hdrlen;
        u8 da[ETH_ALEN];
        u8 sa[ETH_ALEN];
        __be16 type;

        hdr = (void *)skb->data;
        hdrlen = ieee80211_hdrlen(hdr->frame_control);
        rfc1042 = (void *)skb->data + hdrlen;

        ether_addr_copy(da, ieee80211_get_DA(hdr));
        ether_addr_copy(sa, ieee80211_get_SA(hdr));
        type = rfc1042->snap_type;

        skb_pull(skb, hdrlen + sizeof(*rfc1042));
        skb_push(skb, sizeof(*eth));

        eth = (void *)skb->data;
        ether_addr_copy(eth->h_dest, da);
        ether_addr_copy(eth->h_source, sa);
        eth->h_proto = type;
}

static void ath10k_tx_h_add_p2p_noa_ie(struct ath10k *ar,
                                       struct ieee80211_vif *vif,
                                       struct sk_buff *skb)
{
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        /* This is case only for P2P_GO */
        if (vif->type != NL80211_IFTYPE_AP || !vif->p2p)
                return;

        if (unlikely(ieee80211_is_probe_resp(hdr->frame_control))) {
                spin_lock_bh(&ar->data_lock);
                if (arvif->u.ap.noa_data)
                        if (!pskb_expand_head(skb, 0, arvif->u.ap.noa_len,
                                              GFP_ATOMIC))
                                skb_put_data(skb, arvif->u.ap.noa_data,
                                             arvif->u.ap.noa_len);
                spin_unlock_bh(&ar->data_lock);
        }
}

static void ath10k_mac_tx_h_fill_cb(struct ath10k *ar,
                                    struct ieee80211_vif *vif,
                                    struct ieee80211_txq *txq,
                                    struct ieee80211_sta *sta,
                                    struct sk_buff *skb, u16 airtime)
{
        struct ieee80211_hdr *hdr = (void *)skb->data;
        struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);
        const struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        bool is_data = ieee80211_is_data(hdr->frame_control) ||
                        ieee80211_is_data_qos(hdr->frame_control);
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_sta *arsta;
        u8 tid, *qos_ctl;
        bool noack = false;

        cb->flags = 0;

        if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
                cb->flags |= ATH10K_SKB_F_QOS;        /* Assume data frames are QoS */
                goto finish_cb_fill;
        }

        if (!ath10k_tx_h_use_hwcrypto(vif, skb))
                cb->flags |= ATH10K_SKB_F_NO_HWCRYPT;

        if (ieee80211_is_mgmt(hdr->frame_control))
                cb->flags |= ATH10K_SKB_F_MGMT;

        if (ieee80211_is_data_qos(hdr->frame_control)) {
                cb->flags |= ATH10K_SKB_F_QOS;
                qos_ctl = ieee80211_get_qos_ctl(hdr);
                tid = (*qos_ctl) & IEEE80211_QOS_CTL_TID_MASK;

                if (arvif->noack[tid] == WMI_PEER_TID_CONFIG_NOACK)
                        noack = true;

                if (sta) {
                        arsta = (struct ath10k_sta *)sta->drv_priv;

                        if (arsta->noack[tid] == WMI_PEER_TID_CONFIG_NOACK)
                                noack = true;

                        if (arsta->noack[tid] == WMI_PEER_TID_CONFIG_ACK)
                                noack = false;
                }

                if (noack)
                        cb->flags |= ATH10K_SKB_F_NOACK_TID;
        }

        /* Data frames encrypted in software will be posted to firmware
         * with tx encap mode set to RAW. Ex: Multicast traffic generated
         * for a specific VLAN group will always be encrypted in software.
         */
        if (is_data && ieee80211_has_protected(hdr->frame_control) &&
            !info->control.hw_key) {
                cb->flags |= ATH10K_SKB_F_NO_HWCRYPT;
                cb->flags |= ATH10K_SKB_F_RAW_TX;
        }

finish_cb_fill:
        cb->vif = vif;
        cb->txq = txq;
        cb->airtime_est = airtime;
        if (sta) {
                arsta = (struct ath10k_sta *)sta->drv_priv;
                spin_lock_bh(&ar->data_lock);
                cb->ucast_cipher = arsta->ucast_cipher;
                spin_unlock_bh(&ar->data_lock);
        }
}

bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar)
{
        /* FIXME: Not really sure since when the behaviour changed. At some
         * point new firmware stopped requiring creation of peer entries for
         * offchannel tx (and actually creating them causes issues with wmi-htc
         * tx credit replenishment and reliability). Assuming it's at least 3.4
         * because that's when the `freq` was introduced to TX_FRM HTT command.
         */
        return (ar->htt.target_version_major >= 3 &&
                ar->htt.target_version_minor >= 4 &&
                ar->running_fw->fw_file.htt_op_version == ATH10K_FW_HTT_OP_VERSION_TLV);
}

static int ath10k_mac_tx_wmi_mgmt(struct ath10k *ar, struct sk_buff *skb)
{
        struct sk_buff_head *q = &ar->wmi_mgmt_tx_queue;

        if (skb_queue_len_lockless(q) >= ATH10K_MAX_NUM_MGMT_PENDING) {
                ath10k_warn(ar, "wmi mgmt tx queue is full\n");
                return -ENOSPC;
        }

        skb_queue_tail(q, skb);
        ieee80211_queue_work(ar->hw, &ar->wmi_mgmt_tx_work);

        return 0;
}

static enum ath10k_mac_tx_path
ath10k_mac_tx_h_get_txpath(struct ath10k *ar,
                           struct sk_buff *skb,
                           enum ath10k_hw_txrx_mode txmode)
{
        switch (txmode) {
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
        case ATH10K_HW_TXRX_ETHERNET:
                return ATH10K_MAC_TX_HTT;
        case ATH10K_HW_TXRX_MGMT:
                if (test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX,
                             ar->running_fw->fw_file.fw_features) ||
                             test_bit(WMI_SERVICE_MGMT_TX_WMI,
                                      ar->wmi.svc_map))
                        return ATH10K_MAC_TX_WMI_MGMT;
                else if (ar->htt.target_version_major >= 3)
                        return ATH10K_MAC_TX_HTT;
                else
                        return ATH10K_MAC_TX_HTT_MGMT;
        }

        return ATH10K_MAC_TX_UNKNOWN;
}

static int ath10k_mac_tx_submit(struct ath10k *ar,
                                enum ath10k_hw_txrx_mode txmode,
                                enum ath10k_mac_tx_path txpath,
                                struct sk_buff *skb)
{
        struct ath10k_htt *htt = &ar->htt;
        int ret = -EINVAL;

        switch (txpath) {
        case ATH10K_MAC_TX_HTT:
                ret = ath10k_htt_tx(htt, txmode, skb);
                break;
        case ATH10K_MAC_TX_HTT_MGMT:
                ret = ath10k_htt_mgmt_tx(htt, skb);
                break;
        case ATH10K_MAC_TX_WMI_MGMT:
                ret = ath10k_mac_tx_wmi_mgmt(ar, skb);
                break;
        case ATH10K_MAC_TX_UNKNOWN:
                WARN_ON_ONCE(1);
                ret = -EINVAL;
                break;
        }

        if (ret) {
                ath10k_warn(ar, "failed to transmit packet, dropping: %d\n",
                            ret);
                ieee80211_free_txskb(ar->hw, skb);
        }

        return ret;
}

/* This function consumes the sk_buff regardless of return value as far as
 * caller is concerned so no freeing is necessary afterwards.
 */
static int ath10k_mac_tx(struct ath10k *ar,
                         struct ieee80211_vif *vif,
                         enum ath10k_hw_txrx_mode txmode,
                         enum ath10k_mac_tx_path txpath,
                         struct sk_buff *skb, bool noque_offchan)
{
        struct ieee80211_hw *hw = ar->hw;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        const struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb);
        int ret;

        /* We should disable CCK RATE due to P2P */
        if (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)
                ath10k_dbg(ar, ATH10K_DBG_MAC, "IEEE80211_TX_CTL_NO_CCK_RATE\n");

        switch (txmode) {
        case ATH10K_HW_TXRX_MGMT:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                ath10k_tx_h_nwifi(hw, skb);
                ath10k_tx_h_add_p2p_noa_ie(ar, vif, skb);
                ath10k_tx_h_seq_no(vif, skb);
                break;
        case ATH10K_HW_TXRX_ETHERNET:
                /* Convert 802.11->802.3 header only if the frame was earlier
                 * encapsulated to 802.11 by mac80211. Otherwise pass it as is.
                 */
                if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP))
                        ath10k_tx_h_8023(skb);
                break;
        case ATH10K_HW_TXRX_RAW:
                if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags) &&
                    !(skb_cb->flags & ATH10K_SKB_F_RAW_TX)) {
                        WARN_ON_ONCE(1);
                        ieee80211_free_txskb(hw, skb);
                        return -EOPNOTSUPP;
                }
        }

        if (!noque_offchan && info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
                if (!ath10k_mac_tx_frm_has_freq(ar)) {
                        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %pK len %d\n",
                                   skb, skb->len);

                        skb_queue_tail(&ar->offchan_tx_queue, skb);
                        ieee80211_queue_work(hw, &ar->offchan_tx_work);
                        return 0;
                }
        }

        ret = ath10k_mac_tx_submit(ar, txmode, txpath, skb);
        if (ret) {
                ath10k_warn(ar, "failed to submit frame: %d\n", ret);
                return ret;
        }

        return 0;
}

void ath10k_offchan_tx_purge(struct ath10k *ar)
{
        struct sk_buff *skb;

        for (;;) {
                skb = skb_dequeue(&ar->offchan_tx_queue);
                if (!skb)
                        break;

                ieee80211_free_txskb(ar->hw, skb);
        }
}

void ath10k_offchan_tx_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k, offchan_tx_work);
        struct ath10k_peer *peer;
        struct ath10k_vif *arvif;
        enum ath10k_hw_txrx_mode txmode;
        enum ath10k_mac_tx_path txpath;
        struct ieee80211_hdr *hdr;
        struct ieee80211_vif *vif;
        struct ieee80211_sta *sta;
        struct sk_buff *skb;
        const u8 *peer_addr;
        int vdev_id;
        int ret;
        unsigned long time_left;
        bool tmp_peer_created = false;

        /* FW requirement: We must create a peer before FW will send out
         * an offchannel frame. Otherwise the frame will be stuck and
         * never transmitted. We delete the peer upon tx completion.
         * It is unlikely that a peer for offchannel tx will already be
         * present. However it may be in some rare cases so account for that.
         * Otherwise we might remove a legitimate peer and break stuff.
         */

        for (;;) {
                skb = skb_dequeue(&ar->offchan_tx_queue);
                if (!skb)
                        break;

                mutex_lock(&ar->conf_mutex);

                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK len %d\n",
                           skb, skb->len);

                hdr = (struct ieee80211_hdr *)skb->data;
                peer_addr = ieee80211_get_DA(hdr);

                spin_lock_bh(&ar->data_lock);
                vdev_id = ar->scan.vdev_id;
                peer = ath10k_peer_find(ar, vdev_id, peer_addr);
                spin_unlock_bh(&ar->data_lock);

                if (peer) {
                        ath10k_warn(ar, "peer %pM on vdev %d already present\n",
                                    peer_addr, vdev_id);
                } else {
                        ret = ath10k_peer_create(ar, NULL, NULL, vdev_id,
                                                 peer_addr,
                                                 WMI_PEER_TYPE_DEFAULT);
                        if (ret)
                                ath10k_warn(ar, "failed to create peer %pM on vdev %d: %d\n",
                                            peer_addr, vdev_id, ret);
                        tmp_peer_created = (ret == 0);
                }

                spin_lock_bh(&ar->data_lock);
                reinit_completion(&ar->offchan_tx_completed);
                ar->offchan_tx_skb = skb;
                spin_unlock_bh(&ar->data_lock);

                /* It's safe to access vif and sta - conf_mutex guarantees that
                 * sta_state() and remove_interface() are locked exclusively
                 * out wrt to this offchannel worker.
                 */
                arvif = ath10k_get_arvif(ar, vdev_id);
                if (arvif) {
                        vif = arvif->vif;
                        sta = ieee80211_find_sta(vif, peer_addr);
                } else {
                        vif = NULL;
                        sta = NULL;
                }

                txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
                txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);

                ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, true);
                if (ret) {
                        ath10k_warn(ar, "failed to transmit offchannel frame: %d\n",
                                    ret);
                        /* not serious */
                }

                time_left =
                wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ);
                if (time_left == 0)
                        ath10k_warn(ar, "timed out waiting for offchannel skb %pK, len: %d\n",
                                    skb, skb->len);

                if (!peer && tmp_peer_created) {
                        ret = ath10k_peer_delete(ar, vdev_id, peer_addr);
                        if (ret)
                                ath10k_warn(ar, "failed to delete peer %pM on vdev %d: %d\n",
                                            peer_addr, vdev_id, ret);
                }

                mutex_unlock(&ar->conf_mutex);
        }
}

void ath10k_mgmt_over_wmi_tx_purge(struct ath10k *ar)
{
        struct sk_buff *skb;

        for (;;) {
                skb = skb_dequeue(&ar->wmi_mgmt_tx_queue);
                if (!skb)
                        break;

                ieee80211_free_txskb(ar->hw, skb);
        }
}

void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k, wmi_mgmt_tx_work);
        struct sk_buff *skb;
        dma_addr_t paddr;
        int ret;

        for (;;) {
                skb = skb_dequeue(&ar->wmi_mgmt_tx_queue);
                if (!skb)
                        break;

                if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF,
                             ar->running_fw->fw_file.fw_features)) {
                        paddr = dma_map_single(ar->dev, skb->data,
                                               skb->len, DMA_TO_DEVICE);
                        if (dma_mapping_error(ar->dev, paddr)) {
                                ieee80211_free_txskb(ar->hw, skb);
                                continue;
                        }
                        ret = ath10k_wmi_mgmt_tx_send(ar, skb, paddr);
                        if (ret) {
                                ath10k_warn(ar, "failed to transmit management frame by ref via WMI: %d\n",
                                            ret);
                                /* remove this msdu from idr tracking */
                                ath10k_wmi_cleanup_mgmt_tx_send(ar, skb);

                                dma_unmap_single(ar->dev, paddr, skb->len,
                                                 DMA_TO_DEVICE);
                                ieee80211_free_txskb(ar->hw, skb);
                        }
                } else {
                        ret = ath10k_wmi_mgmt_tx(ar, skb);
                        if (ret) {
                                ath10k_warn(ar, "failed to transmit management frame via WMI: %d\n",
                                            ret);
                                ieee80211_free_txskb(ar->hw, skb);
                        }
                }
        }
}

static void ath10k_mac_txq_init(struct ieee80211_txq *txq)
{
        struct ath10k_txq *artxq;

        if (!txq)
                return;

        artxq = (void *)txq->drv_priv;
        INIT_LIST_HEAD(&artxq->list);
}

static void ath10k_mac_txq_unref(struct ath10k *ar, struct ieee80211_txq *txq)
{
        struct ath10k_skb_cb *cb;
        struct sk_buff *msdu;
        int msdu_id;

        if (!txq)
                return;

        spin_lock_bh(&ar->htt.tx_lock);
        idr_for_each_entry(&ar->htt.pending_tx, msdu, msdu_id) {
                cb = ATH10K_SKB_CB(msdu);
                if (cb->txq == txq)
                        cb->txq = NULL;
        }
        spin_unlock_bh(&ar->htt.tx_lock);
}

struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
                                            u16 peer_id,
                                            u8 tid)
{
        struct ath10k_peer *peer;

        lockdep_assert_held(&ar->data_lock);

        peer = ar->peer_map[peer_id];
        if (!peer)
                return NULL;

        if (peer->removed)
                return NULL;

        if (peer->sta)
                return peer->sta->txq[tid];
        else if (peer->vif)
                return peer->vif->txq;
        else
                return NULL;
}

static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw,
                                   struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_txq *artxq = (void *)txq->drv_priv;

        /* No need to get locks */
        if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH)
                return true;

        if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed)
                return true;

        if (artxq->num_fw_queued < artxq->num_push_allowed)
                return true;

        return false;
}

/* Return estimated airtime in microsecond, which is calculated using last
 * reported TX rate. This is just a rough estimation because host driver has no
 * knowledge of the actual transmit rate, retries or aggregation. If actual
 * airtime can be reported by firmware, then delta between estimated and actual
 * airtime can be adjusted from deficit.
 */
#define IEEE80211_ATF_OVERHEAD                100        /* IFS + some slot time */
#define IEEE80211_ATF_OVERHEAD_IFS        16        /* IFS only */
static u16 ath10k_mac_update_airtime(struct ath10k *ar,
                                     struct ieee80211_txq *txq,
                                     struct sk_buff *skb)
{
        struct ath10k_sta *arsta;
        u32 pktlen;
        u16 airtime = 0;

        if (!txq || !txq->sta)
                return airtime;

        if (test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map))
                return airtime;

        spin_lock_bh(&ar->data_lock);
        arsta = (struct ath10k_sta *)txq->sta->drv_priv;

        pktlen = skb->len + 38; /* Assume MAC header 30, SNAP 8 for most case */
        if (arsta->last_tx_bitrate) {
                /* airtime in us, last_tx_bitrate in 100kbps */
                airtime = (pktlen * 8 * (1000 / 100))
                                / arsta->last_tx_bitrate;
                /* overhead for media access time and IFS */
                airtime += IEEE80211_ATF_OVERHEAD_IFS;
        } else {
                /* This is mostly for throttle excessive BC/MC frames, and the
                 * airtime/rate doesn't need be exact. Airtime of BC/MC frames
                 * in 2G get some discount, which helps prevent very low rate
                 * frames from being blocked for too long.
                 */
                airtime = (pktlen * 8 * (1000 / 100)) / 60; /* 6M */
                airtime += IEEE80211_ATF_OVERHEAD;
        }
        spin_unlock_bh(&ar->data_lock);

        return airtime;
}

int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
                           struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_htt *htt = &ar->htt;
        struct ath10k_txq *artxq = (void *)txq->drv_priv;
        struct ieee80211_vif *vif = txq->vif;
        struct ieee80211_sta *sta = txq->sta;
        enum ath10k_hw_txrx_mode txmode;
        enum ath10k_mac_tx_path txpath;
        struct sk_buff *skb;
        struct ieee80211_hdr *hdr;
        size_t skb_len;
        bool is_mgmt, is_presp;
        int ret;
        u16 airtime;

        spin_lock_bh(&ar->htt.tx_lock);
        ret = ath10k_htt_tx_inc_pending(htt);
        spin_unlock_bh(&ar->htt.tx_lock);

        if (ret)
                return ret;

        skb = ieee80211_tx_dequeue_ni(hw, txq);
        if (!skb) {
                spin_lock_bh(&ar->htt.tx_lock);
                ath10k_htt_tx_dec_pending(htt);
                spin_unlock_bh(&ar->htt.tx_lock);

                return -ENOENT;
        }

        airtime = ath10k_mac_update_airtime(ar, txq, skb);
        ath10k_mac_tx_h_fill_cb(ar, vif, txq, sta, skb, airtime);

        skb_len = skb->len;
        txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
        txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
        is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT);

        if (is_mgmt) {
                hdr = (struct ieee80211_hdr *)skb->data;
                is_presp = ieee80211_is_probe_resp(hdr->frame_control);

                spin_lock_bh(&ar->htt.tx_lock);
                ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp);

                if (ret) {
                        ath10k_htt_tx_dec_pending(htt);
                        spin_unlock_bh(&ar->htt.tx_lock);
                        return ret;
                }
                spin_unlock_bh(&ar->htt.tx_lock);
        }

        ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false);
        if (unlikely(ret)) {
                ath10k_warn(ar, "failed to push frame: %d\n", ret);

                spin_lock_bh(&ar->htt.tx_lock);
                ath10k_htt_tx_dec_pending(htt);
                if (is_mgmt)
                        ath10k_htt_tx_mgmt_dec_pending(htt);
                spin_unlock_bh(&ar->htt.tx_lock);

                return ret;
        }

        spin_lock_bh(&ar->htt.tx_lock);
        artxq->num_fw_queued++;
        spin_unlock_bh(&ar->htt.tx_lock);

        return skb_len;
}

static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac)
{
        struct ieee80211_txq *txq;
        int ret = 0;

        ieee80211_txq_schedule_start(hw, ac);
        while ((txq = ieee80211_next_txq(hw, ac))) {
                while (ath10k_mac_tx_can_push(hw, txq)) {
                        ret = ath10k_mac_tx_push_txq(hw, txq);
                        if (ret < 0)
                                break;
                }
                ieee80211_return_txq(hw, txq, false);
                ath10k_htt_tx_txq_update(hw, txq);
                if (ret == -EBUSY)
                        break;
        }
        ieee80211_txq_schedule_end(hw, ac);

        return ret;
}

void ath10k_mac_tx_push_pending(struct ath10k *ar)
{
        struct ieee80211_hw *hw = ar->hw;
        u32 ac;

        if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH)
                return;

        if (ar->htt.num_pending_tx >= (ar->htt.max_num_pending_tx / 2))
                return;

        rcu_read_lock();
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                if (ath10k_mac_schedule_txq(hw, ac) == -EBUSY)
                        break;
        }
        rcu_read_unlock();
}
EXPORT_SYMBOL(ath10k_mac_tx_push_pending);

/************/
/* Scanning */
/************/

void __ath10k_scan_finish(struct ath10k *ar)
{
        lockdep_assert_held(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
                break;
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                if (ar->scan.is_roc && ar->scan.roc_notify)
                        ieee80211_remain_on_channel_expired(ar->hw);
                fallthrough;
        case ATH10K_SCAN_STARTING:
                if (!ar->scan.is_roc) {
                        struct cfg80211_scan_info info = {
                                .aborted = ((ar->scan.state ==
                                            ATH10K_SCAN_ABORTING) ||
                                            (ar->scan.state ==
                                            ATH10K_SCAN_STARTING)),
                        };

                        ieee80211_scan_completed(ar->hw, &info);
                }

                ar->scan.state = ATH10K_SCAN_IDLE;
                ar->scan_channel = NULL;
                ar->scan.roc_freq = 0;
                ath10k_offchan_tx_purge(ar);
                cancel_delayed_work(&ar->scan.timeout);
                complete(&ar->scan.completed);
                break;
        }
}

void ath10k_scan_finish(struct ath10k *ar)
{
        spin_lock_bh(&ar->data_lock);
        __ath10k_scan_finish(ar);
        spin_unlock_bh(&ar->data_lock);
}

static int ath10k_scan_stop(struct ath10k *ar)
{
        struct wmi_stop_scan_arg arg = {
                .req_id = 1, /* FIXME */
                .req_type = WMI_SCAN_STOP_ONE,
                .u.scan_id = ATH10K_SCAN_ID,
        };
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_wmi_stop_scan(ar, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to stop wmi scan: %d\n", ret);
                goto out;
        }

        ret = wait_for_completion_timeout(&ar->scan.completed, 3 * HZ);
        if (ret == 0) {
                ath10k_warn(ar, "failed to receive scan abortion completion: timed out\n");
                ret = -ETIMEDOUT;
        } else if (ret > 0) {
                ret = 0;
        }

out:
        /* Scan state should be updated upon scan completion but in case
         * firmware fails to deliver the event (for whatever reason) it is
         * desired to clean up scan state anyway. Firmware may have just
         * dropped the scan completion event delivery due to transport pipe
         * being overflown with data and/or it can recover on its own before
         * next scan request is submitted.
         */
        spin_lock_bh(&ar->data_lock);
        if (ar->scan.state != ATH10K_SCAN_IDLE)
                __ath10k_scan_finish(ar);
        spin_unlock_bh(&ar->data_lock);

        return ret;
}

static void ath10k_scan_abort(struct ath10k *ar)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);

        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
                /* This can happen if timeout worker kicked in and called
                 * abortion while scan completion was being processed.
                 */
                break;
        case ATH10K_SCAN_STARTING:
        case ATH10K_SCAN_ABORTING:
                ath10k_warn(ar, "refusing scan abortion due to invalid scan state: %s (%d)\n",
                            ath10k_scan_state_str(ar->scan.state),
                            ar->scan.state);
                break;
        case ATH10K_SCAN_RUNNING:
                ar->scan.state = ATH10K_SCAN_ABORTING;
                spin_unlock_bh(&ar->data_lock);

                ret = ath10k_scan_stop(ar);
                if (ret)
                        ath10k_warn(ar, "failed to abort scan: %d\n", ret);

                spin_lock_bh(&ar->data_lock);
                break;
        }

        spin_unlock_bh(&ar->data_lock);
}

void ath10k_scan_timeout_work(struct work_struct *work)
{
        struct ath10k *ar = container_of(work, struct ath10k,
                                         scan.timeout.work);

        mutex_lock(&ar->conf_mutex);
        ath10k_scan_abort(ar);
        mutex_unlock(&ar->conf_mutex);
}

static int ath10k_start_scan(struct ath10k *ar,
                             const struct wmi_start_scan_arg *arg)
{
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ret = ath10k_wmi_start_scan(ar, arg);
        if (ret)
                return ret;

        ret = wait_for_completion_timeout(&ar->scan.started, 1 * HZ);
        if (ret == 0) {
                ret = ath10k_scan_stop(ar);
                if (ret)
                        ath10k_warn(ar, "failed to stop scan: %d\n", ret);

                return -ETIMEDOUT;
        }

        /* If we failed to start the scan, return error code at
         * this point.  This is probably due to some issue in the
         * firmware, but no need to wedge the driver due to that...
         */
        spin_lock_bh(&ar->data_lock);
        if (ar->scan.state == ATH10K_SCAN_IDLE) {
                spin_unlock_bh(&ar->data_lock);
                return -EINVAL;
        }
        spin_unlock_bh(&ar->data_lock);

        return 0;
}

/**********************/
/* mac80211 callbacks */
/**********************/

static void ath10k_mac_op_tx(struct ieee80211_hw *hw,
                             struct ieee80211_tx_control *control,
                             struct sk_buff *skb)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_htt *htt = &ar->htt;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct ieee80211_vif *vif = info->control.vif;
        struct ieee80211_sta *sta = control->sta;
        struct ieee80211_txq *txq = NULL;
        enum ath10k_hw_txrx_mode txmode;
        enum ath10k_mac_tx_path txpath;
        bool is_htt;
        bool is_mgmt;
        int ret;
        u16 airtime;

        airtime = ath10k_mac_update_airtime(ar, txq, skb);
        ath10k_mac_tx_h_fill_cb(ar, vif, txq, sta, skb, airtime);

        txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
        txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
        is_htt = (txpath == ATH10K_MAC_TX_HTT ||
                  txpath == ATH10K_MAC_TX_HTT_MGMT);
        is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT);

        if (is_htt) {
                bool is_presp = false;

                spin_lock_bh(&ar->htt.tx_lock);
                if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) {
                        struct ieee80211_hdr *hdr = (void *)skb->data;

                        is_presp = ieee80211_is_probe_resp(hdr->frame_control);
                }

                ret = ath10k_htt_tx_inc_pending(htt);
                if (ret) {
                        ath10k_warn(ar, "failed to increase tx pending count: %d, dropping\n",
                                    ret);
                        spin_unlock_bh(&ar->htt.tx_lock);
                        ieee80211_free_txskb(ar->hw, skb);
                        return;
                }

                ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp);
                if (ret) {
                        ath10k_dbg(ar, ATH10K_DBG_MAC, "failed to increase tx mgmt pending count: %d, dropping\n",
                                   ret);
                        ath10k_htt_tx_dec_pending(htt);
                        spin_unlock_bh(&ar->htt.tx_lock);
                        ieee80211_free_txskb(ar->hw, skb);
                        return;
                }
                spin_unlock_bh(&ar->htt.tx_lock);
        }

        ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false);
        if (ret) {
                ath10k_warn(ar, "failed to transmit frame: %d\n", ret);
                if (is_htt) {
                        spin_lock_bh(&ar->htt.tx_lock);
                        ath10k_htt_tx_dec_pending(htt);
                        if (is_mgmt)
                                ath10k_htt_tx_mgmt_dec_pending(htt);
                        spin_unlock_bh(&ar->htt.tx_lock);
                }
                return;
        }
}

static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
                                        struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;
        int ret;
        u8 ac = txq->ac;

        ath10k_htt_tx_txq_update(hw, txq);
        if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH)
                return;

        spin_lock_bh(&ar->queue_lock[ac]);

        ieee80211_txq_schedule_start(hw, ac);
        txq = ieee80211_next_txq(hw, ac);
        if (!txq)
                goto out;

        while (ath10k_mac_tx_can_push(hw, txq)) {
                ret = ath10k_mac_tx_push_txq(hw, txq);
                if (ret < 0)
                        break;
        }
        ieee80211_return_txq(hw, txq, false);
        ath10k_htt_tx_txq_update(hw, txq);
out:
        ieee80211_txq_schedule_end(hw, ac);
        spin_unlock_bh(&ar->queue_lock[ac]);
}

/* Must not be called with conf_mutex held as workers can use that also. */
void ath10k_drain_tx(struct ath10k *ar)
{
        lockdep_assert_not_held(&ar->conf_mutex);

        /* make sure rcu-protected mac80211 tx path itself is drained */
        synchronize_net();

        ath10k_offchan_tx_purge(ar);
        ath10k_mgmt_over_wmi_tx_purge(ar);

        cancel_work_sync(&ar->offchan_tx_work);
        cancel_work_sync(&ar->wmi_mgmt_tx_work);
}

void ath10k_halt(struct ath10k *ar)
{
        struct ath10k_vif *arvif;

        lockdep_assert_held(&ar->conf_mutex);

        clear_bit(ATH10K_CAC_RUNNING, &ar->dev_flags);
        ar->filter_flags = 0;
        ar->monitor = false;
        ar->monitor_arvif = NULL;

        if (ar->monitor_started)
                ath10k_monitor_stop(ar);

        ar->monitor_started = false;
        ar->tx_paused = 0;

        ath10k_scan_finish(ar);
        ath10k_peer_cleanup_all(ar);
        ath10k_stop_radar_confirmation(ar);
        ath10k_core_stop(ar);
        ath10k_hif_power_down(ar);

        spin_lock_bh(&ar->data_lock);
        list_for_each_entry(arvif, &ar->arvifs, list)
                ath10k_mac_vif_beacon_cleanup(arvif);
        spin_unlock_bh(&ar->data_lock);
}

static int ath10k_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant)
{
        struct ath10k *ar = hw->priv;

        mutex_lock(&ar->conf_mutex);

        *tx_ant = ar->cfg_tx_chainmask;
        *rx_ant = ar->cfg_rx_chainmask;

        mutex_unlock(&ar->conf_mutex);

        return 0;
}

static bool ath10k_check_chain_mask(struct ath10k *ar, u32 cm, const char *dbg)
{
        /* It is not clear that allowing gaps in chainmask
         * is helpful.  Probably it will not do what user
         * is hoping for, so warn in that case.
         */
        if (cm == 15 || cm == 7 || cm == 3 || cm == 1 || cm == 0)
                return true;

        ath10k_warn(ar, "mac %s antenna chainmask is invalid: 0x%x.  Suggested values: 15, 7, 3, 1 or 0.\n",
                    dbg, cm);
        return false;
}

static int ath10k_mac_get_vht_cap_bf_sts(struct ath10k *ar)
{
        int nsts = ar->vht_cap_info;

        nsts &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
        nsts >>= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;

        /* If firmware does not deliver to host number of space-time
         * streams supported, assume it support up to 4 BF STS and return
         * the value for VHT CAP: nsts-1)
         */
        if (nsts == 0)
                return 3;

        return nsts;
}

static int ath10k_mac_get_vht_cap_bf_sound_dim(struct ath10k *ar)
{
        int sound_dim = ar->vht_cap_info;

        sound_dim &= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK;
        sound_dim >>= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT;

        /* If the sounding dimension is not advertised by the firmware,
         * let's use a default value of 1
         */
        if (sound_dim == 0)
                return 1;

        return sound_dim;
}

static struct ieee80211_sta_vht_cap ath10k_create_vht_cap(struct ath10k *ar)
{
        struct ieee80211_sta_vht_cap vht_cap = {0};
        struct ath10k_hw_params *hw = &ar->hw_params;
        u16 mcs_map;
        u32 val;
        int i;

        vht_cap.vht_supported = 1;
        vht_cap.cap = ar->vht_cap_info;

        if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
                                IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)) {
                val = ath10k_mac_get_vht_cap_bf_sts(ar);
                val <<= IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT;
                val &= IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;

                vht_cap.cap |= val;
        }

        if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
                                IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)) {
                val = ath10k_mac_get_vht_cap_bf_sound_dim(ar);
                val <<= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT;
                val &= IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK;

                vht_cap.cap |= val;
        }

        mcs_map = 0;
        for (i = 0; i < 8; i++) {
                if ((i < ar->num_rf_chains) && (ar->cfg_tx_chainmask & BIT(i)))
                        mcs_map |= IEEE80211_VHT_MCS_SUPPORT_0_9 << (i * 2);
                else
                        mcs_map |= IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2);
        }

        if (ar->cfg_tx_chainmask <= 1)
                vht_cap.cap &= ~IEEE80211_VHT_CAP_TXSTBC;

        vht_cap.vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
        vht_cap.vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);

        /* If we are supporting 160Mhz or 80+80, then the NIC may be able to do
         * a restricted NSS for 160 or 80+80 vs what it can do for 80Mhz.  Give
         * user-space a clue if that is the case.
         */
        if ((vht_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) &&
            (hw->vht160_mcs_rx_highest != 0 ||
             hw->vht160_mcs_tx_highest != 0)) {
                vht_cap.vht_mcs.rx_highest = cpu_to_le16(hw->vht160_mcs_rx_highest);
                vht_cap.vht_mcs.tx_highest = cpu_to_le16(hw->vht160_mcs_tx_highest);
        }

        return vht_cap;
}

static struct ieee80211_sta_ht_cap ath10k_get_ht_cap(struct ath10k *ar)
{
        int i;
        struct ieee80211_sta_ht_cap ht_cap = {0};

        if (!(ar->ht_cap_info & WMI_HT_CAP_ENABLED))
                return ht_cap;

        ht_cap.ht_supported = 1;
        ht_cap.ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
        ht_cap.ampdu_density = IEEE80211_HT_MPDU_DENSITY_8;
        ht_cap.cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
        ht_cap.cap |= IEEE80211_HT_CAP_DSSSCCK40;
        ht_cap.cap |=
                WLAN_HT_CAP_SM_PS_DISABLED << IEEE80211_HT_CAP_SM_PS_SHIFT;

        if (ar->ht_cap_info & WMI_HT_CAP_HT20_SGI)
                ht_cap.cap |= IEEE80211_HT_CAP_SGI_20;

        if (ar->ht_cap_info & WMI_HT_CAP_HT40_SGI)
                ht_cap.cap |= IEEE80211_HT_CAP_SGI_40;

        if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS) {
                u32 smps;

                smps   = WLAN_HT_CAP_SM_PS_DYNAMIC;
                smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT;

                ht_cap.cap |= smps;
        }

        if (ar->ht_cap_info & WMI_HT_CAP_TX_STBC && (ar->cfg_tx_chainmask > 1))
                ht_cap.cap |= IEEE80211_HT_CAP_TX_STBC;

        if (ar->ht_cap_info & WMI_HT_CAP_RX_STBC) {
                u32 stbc;

                stbc   = ar->ht_cap_info;
                stbc  &= WMI_HT_CAP_RX_STBC;
                stbc >>= WMI_HT_CAP_RX_STBC_MASK_SHIFT;
                stbc <<= IEEE80211_HT_CAP_RX_STBC_SHIFT;
                stbc  &= IEEE80211_HT_CAP_RX_STBC;

                ht_cap.cap |= stbc;
        }

        if (ar->ht_cap_info & WMI_HT_CAP_LDPC || (ar->ht_cap_info &
            WMI_HT_CAP_RX_LDPC && (ar->ht_cap_info & WMI_HT_CAP_TX_LDPC)))
                ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;

        if (ar->ht_cap_info & WMI_HT_CAP_L_SIG_TXOP_PROT)
                ht_cap.cap |= IEEE80211_HT_CAP_LSIG_TXOP_PROT;

        /* max AMSDU is implicitly taken from vht_cap_info */
        if (ar->vht_cap_info & WMI_VHT_CAP_MAX_MPDU_LEN_MASK)
                ht_cap.cap |= IEEE80211_HT_CAP_MAX_AMSDU;

        for (i = 0; i < ar->num_rf_chains; i++) {
                if (ar->cfg_rx_chainmask & BIT(i))
                        ht_cap.mcs.rx_mask[i] = 0xFF;
        }

        ht_cap.mcs.tx_params |= IEEE80211_HT_MCS_TX_DEFINED;

        return ht_cap;
}

static void ath10k_mac_setup_ht_vht_cap(struct ath10k *ar)
{
        struct ieee80211_supported_band *band;
        struct ieee80211_sta_vht_cap vht_cap;
        struct ieee80211_sta_ht_cap ht_cap;

        ht_cap = ath10k_get_ht_cap(ar);
        vht_cap = ath10k_create_vht_cap(ar);

        if (ar->phy_capability & WHAL_WLAN_11G_CAPABILITY) {
                band = &ar->mac.sbands[NL80211_BAND_2GHZ];
                band->ht_cap = ht_cap;
        }
        if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) {
                band = &ar->mac.sbands[NL80211_BAND_5GHZ];
                band->ht_cap = ht_cap;
                band->vht_cap = vht_cap;
        }
}

static int __ath10k_set_antenna(struct ath10k *ar, u32 tx_ant, u32 rx_ant)
{
        int ret;
        bool is_valid_tx_chain_mask, is_valid_rx_chain_mask;

        lockdep_assert_held(&ar->conf_mutex);

        is_valid_tx_chain_mask = ath10k_check_chain_mask(ar, tx_ant, "tx");
        is_valid_rx_chain_mask = ath10k_check_chain_mask(ar, rx_ant, "rx");

        if (!is_valid_tx_chain_mask || !is_valid_rx_chain_mask)
                return -EINVAL;

        ar->cfg_tx_chainmask = tx_ant;
        ar->cfg_rx_chainmask = rx_ant;

        if ((ar->state != ATH10K_STATE_ON) &&
            (ar->state != ATH10K_STATE_RESTARTED))
                return 0;

        ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->tx_chain_mask,
                                        tx_ant);
        if (ret) {
                ath10k_warn(ar, "failed to set tx-chainmask: %d, req 0x%x\n",
                            ret, tx_ant);
                return ret;
        }

        ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rx_chain_mask,
                                        rx_ant);
        if (ret) {
                ath10k_warn(ar, "failed to set rx-chainmask: %d, req 0x%x\n",
                            ret, rx_ant);
                return ret;
        }

        /* Reload HT/VHT capability */
        ath10k_mac_setup_ht_vht_cap(ar);

        return 0;
}

static int ath10k_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
{
        struct ath10k *ar = hw->priv;
        int ret;

        mutex_lock(&ar->conf_mutex);
        ret = __ath10k_set_antenna(ar, tx_ant, rx_ant);
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static int __ath10k_fetch_bb_timing_dt(struct ath10k *ar,
                                       struct wmi_bb_timing_cfg_arg *bb_timing)
{
        struct device_node *node;
        const char *fem_name;
        int ret;

        node = ar->dev->of_node;
        if (!node)
                return -ENOENT;

        ret = of_property_read_string_index(node, "ext-fem-name", 0, &fem_name);
        if (ret)
                return -ENOENT;

        /*
         * If external Front End module used in hardware, then default base band timing
         * parameter cannot be used since they were fine tuned for reference hardware,
         * so choosing different value suitable for that external FEM.
         */
        if (!strcmp("microsemi-lx5586", fem_name)) {
                bb_timing->bb_tx_timing = 0x00;
                bb_timing->bb_xpa_timing = 0x0101;
        } else {
                return -ENOENT;
        }

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot bb_tx_timing 0x%x bb_xpa_timing 0x%x\n",
                   bb_timing->bb_tx_timing, bb_timing->bb_xpa_timing);
        return 0;
}

static int ath10k_mac_rfkill_config(struct ath10k *ar)
{
        u32 param;
        int ret;

        if (ar->hw_values->rfkill_pin == 0) {
                ath10k_warn(ar, "ath10k does not support hardware rfkill with this device\n");
                return -EOPNOTSUPP;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac rfkill_pin %d rfkill_cfg %d rfkill_on_level %d",
                   ar->hw_values->rfkill_pin, ar->hw_values->rfkill_cfg,
                   ar->hw_values->rfkill_on_level);

        param = FIELD_PREP(WMI_TLV_RFKILL_CFG_RADIO_LEVEL,
                           ar->hw_values->rfkill_on_level) |
                FIELD_PREP(WMI_TLV_RFKILL_CFG_GPIO_PIN_NUM,
                           ar->hw_values->rfkill_pin) |
                FIELD_PREP(WMI_TLV_RFKILL_CFG_PIN_AS_GPIO,
                           ar->hw_values->rfkill_cfg);

        ret = ath10k_wmi_pdev_set_param(ar,
                                        ar->wmi.pdev_param->rfkill_config,
                                        param);
        if (ret) {
                ath10k_warn(ar,
                            "failed to set rfkill config 0x%x: %d\n",
                            param, ret);
                return ret;
        }
        return 0;
}

int ath10k_mac_rfkill_enable_radio(struct ath10k *ar, bool enable)
{
        enum wmi_tlv_rfkill_enable_radio param;
        int ret;

        if (enable)
                param = WMI_TLV_RFKILL_ENABLE_RADIO_ON;
        else
                param = WMI_TLV_RFKILL_ENABLE_RADIO_OFF;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac rfkill enable %d", param);

        ret = ath10k_wmi_pdev_set_param(ar, ar->wmi.pdev_param->rfkill_enable,
                                        param);
        if (ret) {
                ath10k_warn(ar, "failed to set rfkill enable param %d: %d\n",
                            param, ret);
                return ret;
        }

        return 0;
}

static int ath10k_start(struct ieee80211_hw *hw)
{
        struct ath10k *ar = hw->priv;
        u32 param;
        int ret = 0;
        struct wmi_bb_timing_cfg_arg bb_timing = {0};

        /*
         * This makes sense only when restarting hw. It is harmless to call
         * unconditionally. This is necessary to make sure no HTT/WMI tx
         * commands will be submitted while restarting.
         */
        ath10k_drain_tx(ar);

        mutex_lock(&ar->conf_mutex);

        switch (ar->state) {
        case ATH10K_STATE_OFF:
                ar->state = ATH10K_STATE_ON;
                break;
        case ATH10K_STATE_RESTARTING:
                ar->state = ATH10K_STATE_RESTARTED;
                break;
        case ATH10K_STATE_ON:
        case ATH10K_STATE_RESTARTED:
        case ATH10K_STATE_WEDGED:
                WARN_ON(1);
                ret = -EINVAL;
                goto err;
        case ATH10K_STATE_UTF:
                ret = -EBUSY;
                goto err;
        }

        spin_lock_bh(&ar->data_lock);

        if (ar->hw_rfkill_on) {
                ar->hw_rfkill_on = false;
                spin_unlock_bh(&ar->data_lock);
                goto err;
        }

        spin_unlock_bh(&ar->data_lock);

        ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL);
        if (ret) {
                ath10k_err(ar, "Could not init hif: %d\n", ret);
                goto err_off;
        }

        ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_NORMAL,
                                &ar->normal_mode_fw);
        if (ret) {
                ath10k_err(ar, "Could not init core: %d\n", ret);
                goto err_power_down;
        }

        if (ar->sys_cap_info & WMI_TLV_SYS_CAP_INFO_RFKILL) {
                ret = ath10k_mac_rfkill_config(ar);
                if (ret && ret != -EOPNOTSUPP) {
                        ath10k_warn(ar, "failed to configure rfkill: %d", ret);
                        goto err_core_stop;
                }
        }

        param = ar->wmi.pdev_param->pmf_qos;
        ret = ath10k_wmi_pdev_set_param(ar, param, 1);
        if (ret) {
                ath10k_warn(ar, "failed to enable PMF QOS: %d\n", ret);
                goto err_core_stop;
        }

        param = ar->wmi.pdev_param->dynamic_bw;
        ret = ath10k_wmi_pdev_set_param(ar, param, 1);
        if (ret) {
                ath10k_warn(ar, "failed to enable dynamic BW: %d\n", ret);
                goto err_core_stop;
        }

        if (test_bit(WMI_SERVICE_SPOOF_MAC_SUPPORT, ar->wmi.svc_map)) {
                ret = ath10k_wmi_scan_prob_req_oui(ar, ar->mac_addr);
                if (ret) {
                        ath10k_err(ar, "failed to set prob req oui: %i\n", ret);
                        goto err_core_stop;
                }
        }

        if (test_bit(WMI_SERVICE_ADAPTIVE_OCS, ar->wmi.svc_map)) {
                ret = ath10k_wmi_adaptive_qcs(ar, true);
                if (ret) {
                        ath10k_warn(ar, "failed to enable adaptive qcs: %d\n",
                                    ret);
                        goto err_core_stop;
                }
        }

        if (test_bit(WMI_SERVICE_BURST, ar->wmi.svc_map)) {
                param = ar->wmi.pdev_param->burst_enable;
                ret = ath10k_wmi_pdev_set_param(ar, param, 0);
                if (ret) {
                        ath10k_warn(ar, "failed to disable burst: %d\n", ret);
                        goto err_core_stop;
                }
        }

        param = ar->wmi.pdev_param->idle_ps_config;
        ret = ath10k_wmi_pdev_set_param(ar, param, 1);
        if (ret && ret != -EOPNOTSUPP) {
                ath10k_warn(ar, "failed to enable idle_ps_config: %d\n", ret);
                goto err_core_stop;
        }

        __ath10k_set_antenna(ar, ar->cfg_tx_chainmask, ar->cfg_rx_chainmask);

        /*
         * By default FW set ARP frames ac to voice (6). In that case ARP
         * exchange is not working properly for UAPSD enabled AP. ARP requests
         * which arrives with access category 0 are processed by network stack
         * and send back with access category 0, but FW changes access category
         * to 6. Set ARP frames access category to best effort (0) solves
         * this problem.
         */

        param = ar->wmi.pdev_param->arp_ac_override;
        ret = ath10k_wmi_pdev_set_param(ar, param, 0);
        if (ret) {
                ath10k_warn(ar, "failed to set arp ac override parameter: %d\n",
                            ret);
                goto err_core_stop;
        }

        if (test_bit(ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA,
                     ar->running_fw->fw_file.fw_features)) {
                ret = ath10k_wmi_pdev_enable_adaptive_cca(ar, 1,
                                                          WMI_CCA_DETECT_LEVEL_AUTO,
                                                          WMI_CCA_DETECT_MARGIN_AUTO);
                if (ret) {
                        ath10k_warn(ar, "failed to enable adaptive cca: %d\n",
                                    ret);
                        goto err_core_stop;
                }
        }

        param = ar->wmi.pdev_param->ani_enable;
        ret = ath10k_wmi_pdev_set_param(ar, param, 1);
        if (ret) {
                ath10k_warn(ar, "failed to enable ani by default: %d\n",
                            ret);
                goto err_core_stop;
        }

        ar->ani_enabled = true;

        if (ath10k_peer_stats_enabled(ar)) {
                param = ar->wmi.pdev_param->peer_stats_update_period;
                ret = ath10k_wmi_pdev_set_param(ar, param,
                                                PEER_DEFAULT_STATS_UPDATE_PERIOD);
                if (ret) {
                        ath10k_warn(ar,
                                    "failed to set peer stats period : %d\n",
                                    ret);
                        goto err_core_stop;
                }
        }

        param = ar->wmi.pdev_param->enable_btcoex;
        if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) &&
            test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM,
                     ar->running_fw->fw_file.fw_features) &&
            ar->coex_support) {
                ret = ath10k_wmi_pdev_set_param(ar, param, 0);
                if (ret) {
                        ath10k_warn(ar,
                                    "failed to set btcoex param: %d\n", ret);
                        goto err_core_stop;
                }
                clear_bit(ATH10K_FLAG_BTCOEX, &ar->dev_flags);
        }

        if (test_bit(WMI_SERVICE_BB_TIMING_CONFIG_SUPPORT, ar->wmi.svc_map)) {
                ret = __ath10k_fetch_bb_timing_dt(ar, &bb_timing);
                if (!ret) {
                        ret = ath10k_wmi_pdev_bb_timing(ar, &bb_timing);
                        if (ret) {
                                ath10k_warn(ar,
                                            "failed to set bb timings: %d\n",
                                            ret);
                                goto err_core_stop;
                        }
                }
        }

        ar->num_started_vdevs = 0;
        ath10k_regd_update(ar);

        ath10k_spectral_start(ar);
        ath10k_thermal_set_throttling(ar);

        ar->radar_conf_state = ATH10K_RADAR_CONFIRMATION_IDLE;

        mutex_unlock(&ar->conf_mutex);
        return 0;

err_core_stop:
        ath10k_core_stop(ar);

err_power_down:
        ath10k_hif_power_down(ar);

err_off:
        ar->state = ATH10K_STATE_OFF;

err:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static void ath10k_stop(struct ieee80211_hw *hw)
{
        struct ath10k *ar = hw->priv;
        u32 opt;

        ath10k_drain_tx(ar);

        mutex_lock(&ar->conf_mutex);
        if (ar->state != ATH10K_STATE_OFF) {
                if (!ar->hw_rfkill_on) {
                        /* If the current driver state is RESTARTING but not yet
                         * fully RESTARTED because of incoming suspend event,
                         * then ath10k_halt() is already called via
                         * ath10k_core_restart() and should not be called here.
                         */
                        if (ar->state != ATH10K_STATE_RESTARTING) {
                                ath10k_halt(ar);
                        } else {
                                /* Suspending here, because when in RESTARTING
                                 * state, ath10k_core_stop() skips
                                 * ath10k_wait_for_suspend().
                                 */
                                opt = WMI_PDEV_SUSPEND_AND_DISABLE_INTR;
                                ath10k_wait_for_suspend(ar, opt);
                        }
                }
                ar->state = ATH10K_STATE_OFF;
        }
        mutex_unlock(&ar->conf_mutex);

        cancel_work_sync(&ar->set_coverage_class_work);
        cancel_delayed_work_sync(&ar->scan.timeout);
        cancel_work_sync(&ar->restart_work);
}

static int ath10k_config_ps(struct ath10k *ar)
{
        struct ath10k_vif *arvif;
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        list_for_each_entry(arvif, &ar->arvifs, list) {
                ret = ath10k_mac_vif_setup_ps(arvif);
                if (ret) {
                        ath10k_warn(ar, "failed to setup powersave: %d\n", ret);
                        break;
                }
        }

        return ret;
}

static int ath10k_config(struct ieee80211_hw *hw, u32 changed)
{
        struct ath10k *ar = hw->priv;
        struct ieee80211_conf *conf = &hw->conf;
        int ret = 0;

        mutex_lock(&ar->conf_mutex);

        if (changed & IEEE80211_CONF_CHANGE_PS)
                ath10k_config_ps(ar);

        if (changed & IEEE80211_CONF_CHANGE_MONITOR) {
                ar->monitor = conf->flags & IEEE80211_CONF_MONITOR;
                ret = ath10k_monitor_recalc(ar);
                if (ret)
                        ath10k_warn(ar, "failed to recalc monitor: %d\n", ret);
        }

        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static u32 get_nss_from_chainmask(u16 chain_mask)
{
        if ((chain_mask & 0xf) == 0xf)
                return 4;
        else if ((chain_mask & 0x7) == 0x7)
                return 3;
        else if ((chain_mask & 0x3) == 0x3)
                return 2;
        return 1;
}

static int ath10k_mac_set_txbf_conf(struct ath10k_vif *arvif)
{
        u32 value = 0;
        struct ath10k *ar = arvif->ar;
        int nsts;
        int sound_dim;

        if (ath10k_wmi_get_txbf_conf_scheme(ar) != WMI_TXBF_CONF_BEFORE_ASSOC)
                return 0;

        nsts = ath10k_mac_get_vht_cap_bf_sts(ar);
        if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
                                IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE))
                value |= SM(nsts, WMI_TXBF_STS_CAP_OFFSET);

        sound_dim = ath10k_mac_get_vht_cap_bf_sound_dim(ar);
        if (ar->vht_cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
                                IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE))
                value |= SM(sound_dim, WMI_BF_SOUND_DIM_OFFSET);

        if (!value)
                return 0;

        if (ar->vht_cap_info & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
                value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFER;

        if (ar->vht_cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
                value |= (WMI_VDEV_PARAM_TXBF_MU_TX_BFER |
                          WMI_VDEV_PARAM_TXBF_SU_TX_BFER);

        if (ar->vht_cap_info & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE)
                value |= WMI_VDEV_PARAM_TXBF_SU_TX_BFEE;

        if (ar->vht_cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
                value |= (WMI_VDEV_PARAM_TXBF_MU_TX_BFEE |
                          WMI_VDEV_PARAM_TXBF_SU_TX_BFEE);

        return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id,
                                         ar->wmi.vdev_param->txbf, value);
}

static void ath10k_update_vif_offload(struct ieee80211_hw *hw,
                                      struct ieee80211_vif *vif)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k *ar = hw->priv;
        u32 vdev_param;
        int ret;

        if (ath10k_frame_mode != ATH10K_HW_TXRX_ETHERNET ||
            ar->wmi.vdev_param->tx_encap_type == WMI_VDEV_PARAM_UNSUPPORTED ||
             (vif->type != NL80211_IFTYPE_STATION &&
              vif->type != NL80211_IFTYPE_AP))
                vif->offload_flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;

        vdev_param = ar->wmi.vdev_param->tx_encap_type;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                        ATH10K_HW_TXRX_NATIVE_WIFI);
        /* 10.X firmware does not support this VDEV parameter. Do not warn */
        if (ret && ret != -EOPNOTSUPP) {
                ath10k_warn(ar, "failed to set vdev %i TX encapsulation: %d\n",
                            arvif->vdev_id, ret);
        }
}

/*
 * TODO:
 * Figure out how to handle WMI_VDEV_SUBTYPE_P2P_DEVICE,
 * because we will send mgmt frames without CCK. This requirement
 * for P2P_FIND/GO_NEG should be handled by checking CCK flag
 * in the TX packet.
 */
static int ath10k_add_interface(struct ieee80211_hw *hw,
                                struct ieee80211_vif *vif)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_peer *peer;
        enum wmi_sta_powersave_param param;
        int ret = 0;
        u32 value;
        int bit;
        int i;
        u32 vdev_param;

        vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;

        mutex_lock(&ar->conf_mutex);

        memset(arvif, 0, sizeof(*arvif));
        ath10k_mac_txq_init(vif->txq);

        arvif->ar = ar;
        arvif->vif = vif;

        INIT_LIST_HEAD(&arvif->list);
        INIT_WORK(&arvif->ap_csa_work, ath10k_mac_vif_ap_csa_work);
        INIT_DELAYED_WORK(&arvif->connection_loss_work,
                          ath10k_mac_vif_sta_connection_loss_work);

        for (i = 0; i < ARRAY_SIZE(arvif->bitrate_mask.control); i++) {
                arvif->bitrate_mask.control[i].legacy = 0xffffffff;
                memset(arvif->bitrate_mask.control[i].ht_mcs, 0xff,
                       sizeof(arvif->bitrate_mask.control[i].ht_mcs));
                memset(arvif->bitrate_mask.control[i].vht_mcs, 0xff,
                       sizeof(arvif->bitrate_mask.control[i].vht_mcs));
        }

        if (ar->num_peers >= ar->max_num_peers) {
                ath10k_warn(ar, "refusing vdev creation due to insufficient peer entry resources in firmware\n");
                ret = -ENOBUFS;
                goto err;
        }

        if (ar->free_vdev_map == 0) {
                ath10k_warn(ar, "Free vdev map is empty, no more interfaces allowed.\n");
                ret = -EBUSY;
                goto err;
        }
        bit = __ffs64(ar->free_vdev_map);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac create vdev %i map %llx\n",
                   bit, ar->free_vdev_map);

        arvif->vdev_id = bit;
        arvif->vdev_subtype =
                ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE);

        switch (vif->type) {
        case NL80211_IFTYPE_P2P_DEVICE:
                arvif->vdev_type = WMI_VDEV_TYPE_STA;
                arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype
                                        (ar, WMI_VDEV_SUBTYPE_P2P_DEVICE);
                break;
        case NL80211_IFTYPE_UNSPECIFIED:
        case NL80211_IFTYPE_STATION:
                arvif->vdev_type = WMI_VDEV_TYPE_STA;
                if (vif->p2p)
                        arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype
                                        (ar, WMI_VDEV_SUBTYPE_P2P_CLIENT);
                break;
        case NL80211_IFTYPE_ADHOC:
                arvif->vdev_type = WMI_VDEV_TYPE_IBSS;
                break;
        case NL80211_IFTYPE_MESH_POINT:
                if (test_bit(WMI_SERVICE_MESH_11S, ar->wmi.svc_map)) {
                        arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype
                                                (ar, WMI_VDEV_SUBTYPE_MESH_11S);
                } else if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                        ret = -EINVAL;
                        ath10k_warn(ar, "must load driver with rawmode=1 to add mesh interfaces\n");
                        goto err;
                }
                arvif->vdev_type = WMI_VDEV_TYPE_AP;
                break;
        case NL80211_IFTYPE_AP:
                arvif->vdev_type = WMI_VDEV_TYPE_AP;

                if (vif->p2p)
                        arvif->vdev_subtype = ath10k_wmi_get_vdev_subtype
                                                (ar, WMI_VDEV_SUBTYPE_P2P_GO);
                break;
        case NL80211_IFTYPE_MONITOR:
                arvif->vdev_type = WMI_VDEV_TYPE_MONITOR;
                break;
        default:
                WARN_ON(1);
                break;
        }

        /* Using vdev_id as queue number will make it very easy to do per-vif
         * tx queue locking. This shouldn't wrap due to interface combinations
         * but do a modulo for correctness sake and prevent using offchannel tx
         * queues for regular vif tx.
         */
        vif->cab_queue = arvif->vdev_id % (IEEE80211_MAX_QUEUES - 1);
        for (i = 0; i < ARRAY_SIZE(vif->hw_queue); i++)
                vif->hw_queue[i] = arvif->vdev_id % (IEEE80211_MAX_QUEUES - 1);

        /* Some firmware revisions don't wait for beacon tx completion before
         * sending another SWBA event. This could lead to hardware using old
         * (freed) beacon data in some cases, e.g. tx credit starvation
         * combined with missed TBTT. This is very rare.
         *
         * On non-IOMMU-enabled hosts this could be a possible security issue
         * because hw could beacon some random data on the air.  On
         * IOMMU-enabled hosts DMAR faults would occur in most cases and target
         * device would crash.
         *
         * Since there are no beacon tx completions (implicit nor explicit)
         * propagated to host the only workaround for this is to allocate a
         * DMA-coherent buffer for a lifetime of a vif and use it for all
         * beacon tx commands. Worst case for this approach is some beacons may
         * become corrupted, e.g. have garbled IEs or out-of-date TIM bitmap.
         */
        if (vif->type == NL80211_IFTYPE_ADHOC ||
            vif->type == NL80211_IFTYPE_MESH_POINT ||
            vif->type == NL80211_IFTYPE_AP) {
                if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) {
                        arvif->beacon_buf = kmalloc(IEEE80211_MAX_FRAME_LEN,
                                                    GFP_KERNEL);

                        /* Using a kernel pointer in place of a dma_addr_t
                         * token can lead to undefined behavior if that
                         * makes it into cache management functions. Use a
                         * known-invalid address token instead, which
                         * avoids the warning and makes it easier to catch
                         * bugs if it does end up getting used.
                         */
                        arvif->beacon_paddr = DMA_MAPPING_ERROR;
                } else {
                        arvif->beacon_buf =
                                dma_alloc_coherent(ar->dev,
                                                   IEEE80211_MAX_FRAME_LEN,
                                                   &arvif->beacon_paddr,
                                                   GFP_ATOMIC);
                }
                if (!arvif->beacon_buf) {
                        ret = -ENOMEM;
                        ath10k_warn(ar, "failed to allocate beacon buffer: %d\n",
                                    ret);
                        goto err;
                }
        }
        if (test_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags))
                arvif->nohwcrypt = true;

        if (arvif->nohwcrypt &&
            !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                ret = -EINVAL;
                ath10k_warn(ar, "cryptmode module param needed for sw crypto\n");
                goto err;
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev create %d (add interface) type %d subtype %d bcnmode %s\n",
                   arvif->vdev_id, arvif->vdev_type, arvif->vdev_subtype,
                   arvif->beacon_buf ? "single-buf" : "per-skb");

        ret = ath10k_wmi_vdev_create(ar, arvif->vdev_id, arvif->vdev_type,
                                     arvif->vdev_subtype, vif->addr);
        if (ret) {
                ath10k_warn(ar, "failed to create WMI vdev %i: %d\n",
                            arvif->vdev_id, ret);
                goto err;
        }

        if (test_bit(WMI_SERVICE_VDEV_DISABLE_4_ADDR_SRC_LRN_SUPPORT,
                     ar->wmi.svc_map)) {
                vdev_param = ar->wmi.vdev_param->disable_4addr_src_lrn;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                WMI_VDEV_DISABLE_4_ADDR_SRC_LRN);
                if (ret && ret != -EOPNOTSUPP) {
                        ath10k_warn(ar, "failed to disable 4addr src lrn vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                }
        }

        ar->free_vdev_map &= ~(1LL << arvif->vdev_id);
        spin_lock_bh(&ar->data_lock);
        list_add(&arvif->list, &ar->arvifs);
        spin_unlock_bh(&ar->data_lock);

        /* It makes no sense to have firmware do keepalives. mac80211 already
         * takes care of this with idle connection polling.
         */
        ret = ath10k_mac_vif_disable_keepalive(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to disable keepalive on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                goto err_vdev_delete;
        }

        arvif->def_wep_key_idx = -1;

        ath10k_update_vif_offload(hw, vif);

        /* Configuring number of spatial stream for monitor interface is causing
         * target assert in qca9888 and qca6174.
         */
        if (ar->cfg_tx_chainmask && (vif->type != NL80211_IFTYPE_MONITOR)) {
                u16 nss = get_nss_from_chainmask(ar->cfg_tx_chainmask);

                vdev_param = ar->wmi.vdev_param->nss;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                nss);
                if (ret) {
                        ath10k_warn(ar, "failed to set vdev %i chainmask 0x%x, nss %i: %d\n",
                                    arvif->vdev_id, ar->cfg_tx_chainmask, nss,
                                    ret);
                        goto err_vdev_delete;
                }
        }

        if (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
            arvif->vdev_type == WMI_VDEV_TYPE_IBSS) {
                ret = ath10k_peer_create(ar, vif, NULL, arvif->vdev_id,
                                         vif->addr, WMI_PEER_TYPE_DEFAULT);
                if (ret) {
                        ath10k_warn(ar, "failed to create vdev %i peer for AP/IBSS: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_vdev_delete;
                }

                spin_lock_bh(&ar->data_lock);

                peer = ath10k_peer_find(ar, arvif->vdev_id, vif->addr);
                if (!peer) {
                        ath10k_warn(ar, "failed to lookup peer %pM on vdev %i\n",
                                    vif->addr, arvif->vdev_id);
                        spin_unlock_bh(&ar->data_lock);
                        ret = -ENOENT;
                        goto err_peer_delete;
                }

                arvif->peer_id = find_first_bit(peer->peer_ids,
                                                ATH10K_MAX_NUM_PEER_IDS);

                spin_unlock_bh(&ar->data_lock);
        } else {
                arvif->peer_id = HTT_INVALID_PEERID;
        }

        if (arvif->vdev_type == WMI_VDEV_TYPE_AP) {
                ret = ath10k_mac_set_kickout(arvif);
                if (ret) {
                        ath10k_warn(ar, "failed to set vdev %i kickout parameters: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_peer_delete;
                }
        }

        if (arvif->vdev_type == WMI_VDEV_TYPE_STA) {
                param = WMI_STA_PS_PARAM_RX_WAKE_POLICY;
                value = WMI_STA_PS_RX_WAKE_POLICY_WAKE;
                ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
                                                  param, value);
                if (ret) {
                        ath10k_warn(ar, "failed to set vdev %i RX wake policy: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_peer_delete;
                }

                ret = ath10k_mac_vif_recalc_ps_wake_threshold(arvif);
                if (ret) {
                        ath10k_warn(ar, "failed to recalc ps wake threshold on vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_peer_delete;
                }

                ret = ath10k_mac_vif_recalc_ps_poll_count(arvif);
                if (ret) {
                        ath10k_warn(ar, "failed to recalc ps poll count on vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_peer_delete;
                }
        }

        ret = ath10k_mac_set_txbf_conf(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to set txbf for vdev %d: %d\n",
                            arvif->vdev_id, ret);
                goto err_peer_delete;
        }

        ret = ath10k_mac_set_rts(arvif, ar->hw->wiphy->rts_threshold);
        if (ret) {
                ath10k_warn(ar, "failed to set rts threshold for vdev %d: %d\n",
                            arvif->vdev_id, ret);
                goto err_peer_delete;
        }

        arvif->txpower = vif->bss_conf.txpower;
        ret = ath10k_mac_txpower_recalc(ar);
        if (ret) {
                ath10k_warn(ar, "failed to recalc tx power: %d\n", ret);
                goto err_peer_delete;
        }

        if (test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map)) {
                vdev_param = ar->wmi.vdev_param->rtt_responder_role;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                arvif->ftm_responder);

                /* It is harmless to not set FTM role. Do not warn */
                if (ret && ret != -EOPNOTSUPP)
                        ath10k_warn(ar, "failed to set vdev %i FTM Responder: %d\n",
                                    arvif->vdev_id, ret);
        }

        if (vif->type == NL80211_IFTYPE_MONITOR) {
                ar->monitor_arvif = arvif;
                ret = ath10k_monitor_recalc(ar);
                if (ret) {
                        ath10k_warn(ar, "failed to recalc monitor: %d\n", ret);
                        goto err_peer_delete;
                }
        }

        spin_lock_bh(&ar->htt.tx_lock);
        if (!ar->tx_paused)
                ieee80211_wake_queue(ar->hw, arvif->vdev_id);
        spin_unlock_bh(&ar->htt.tx_lock);

        mutex_unlock(&ar->conf_mutex);
        return 0;

err_peer_delete:
        if (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
            arvif->vdev_type == WMI_VDEV_TYPE_IBSS) {
                ath10k_wmi_peer_delete(ar, arvif->vdev_id, vif->addr);
                ath10k_wait_for_peer_delete_done(ar, arvif->vdev_id,
                                                 vif->addr);
        }

err_vdev_delete:
        ath10k_wmi_vdev_delete(ar, arvif->vdev_id);
        ar->free_vdev_map |= 1LL << arvif->vdev_id;
        spin_lock_bh(&ar->data_lock);
        list_del(&arvif->list);
        spin_unlock_bh(&ar->data_lock);

err:
        if (arvif->beacon_buf) {
                if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
                        kfree(arvif->beacon_buf);
                else
                        dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN,
                                          arvif->beacon_buf,
                                          arvif->beacon_paddr);
                arvif->beacon_buf = NULL;
        }

        mutex_unlock(&ar->conf_mutex);

        return ret;
}

static void ath10k_mac_vif_tx_unlock_all(struct ath10k_vif *arvif)
{
        int i;

        for (i = 0; i < BITS_PER_LONG; i++)
                ath10k_mac_vif_tx_unlock(arvif, i);
}

static void ath10k_remove_interface(struct ieee80211_hw *hw,
                                    struct ieee80211_vif *vif)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_peer *peer;
        unsigned long time_left;
        int ret;
        int i;

        cancel_work_sync(&arvif->ap_csa_work);
        cancel_delayed_work_sync(&arvif->connection_loss_work);

        mutex_lock(&ar->conf_mutex);

        ret = ath10k_spectral_vif_stop(arvif);
        if (ret)
                ath10k_warn(ar, "failed to stop spectral for vdev %i: %d\n",
                            arvif->vdev_id, ret);

        ar->free_vdev_map |= 1LL << arvif->vdev_id;
        spin_lock_bh(&ar->data_lock);
        list_del(&arvif->list);
        spin_unlock_bh(&ar->data_lock);

        if (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
            arvif->vdev_type == WMI_VDEV_TYPE_IBSS) {
                ret = ath10k_wmi_peer_delete(arvif->ar, arvif->vdev_id,
                                             vif->addr);
                if (ret)
                        ath10k_warn(ar, "failed to submit AP/IBSS self-peer removal on vdev %i: %d\n",
                                    arvif->vdev_id, ret);

                ath10k_wait_for_peer_delete_done(ar, arvif->vdev_id,
                                                 vif->addr);
                kfree(arvif->u.ap.noa_data);
        }

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %i delete (remove interface)\n",
                   arvif->vdev_id);

        ret = ath10k_wmi_vdev_delete(ar, arvif->vdev_id);
        if (ret)
                ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n",
                            arvif->vdev_id, ret);

        if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) {
                time_left = wait_for_completion_timeout(&ar->vdev_delete_done,
                                                        ATH10K_VDEV_DELETE_TIMEOUT_HZ);
                if (time_left == 0) {
                        ath10k_warn(ar, "Timeout in receiving vdev delete response\n");
                        goto out;
                }
        }

        /* Some firmware revisions don't notify host about self-peer removal
         * until after associated vdev is deleted.
         */
        if (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
            arvif->vdev_type == WMI_VDEV_TYPE_IBSS) {
                ret = ath10k_wait_for_peer_deleted(ar, arvif->vdev_id,
                                                   vif->addr);
                if (ret)
                        ath10k_warn(ar, "failed to remove AP self-peer on vdev %i: %d\n",
                                    arvif->vdev_id, ret);

                spin_lock_bh(&ar->data_lock);
                ar->num_peers--;
                spin_unlock_bh(&ar->data_lock);
        }

        spin_lock_bh(&ar->data_lock);
        for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
                peer = ar->peer_map[i];
                if (!peer)
                        continue;

                if (peer->vif == vif) {
                        ath10k_warn(ar, "found vif peer %pM entry on vdev %i after it was supposedly removed\n",
                                    vif->addr, arvif->vdev_id);
                        peer->vif = NULL;
                }
        }

        /* Clean this up late, less opportunity for firmware to access
         * DMA memory we have deleted.
         */
        ath10k_mac_vif_beacon_cleanup(arvif);
        spin_unlock_bh(&ar->data_lock);

        ath10k_peer_cleanup(ar, arvif->vdev_id);
        ath10k_mac_txq_unref(ar, vif->txq);

        if (vif->type == NL80211_IFTYPE_MONITOR) {
                ar->monitor_arvif = NULL;
                ret = ath10k_monitor_recalc(ar);
                if (ret)
                        ath10k_warn(ar, "failed to recalc monitor: %d\n", ret);
        }

        ret = ath10k_mac_txpower_recalc(ar);
        if (ret)
                ath10k_warn(ar, "failed to recalc tx power: %d\n", ret);

        spin_lock_bh(&ar->htt.tx_lock);
        ath10k_mac_vif_tx_unlock_all(arvif);
        spin_unlock_bh(&ar->htt.tx_lock);

        ath10k_mac_txq_unref(ar, vif->txq);

out:
        mutex_unlock(&ar->conf_mutex);
}

/*
 * FIXME: Has to be verified.
 */
#define SUPPORTED_FILTERS                        \
        (FIF_ALLMULTI |                                \
        FIF_CONTROL |                                \
        FIF_PSPOLL |                                \
        FIF_OTHER_BSS |                                \
        FIF_BCN_PRBRESP_PROMISC |                \
        FIF_PROBE_REQ |                                \
        FIF_FCSFAIL)

static void ath10k_configure_filter(struct ieee80211_hw *hw,
                                    unsigned int changed_flags,
                                    unsigned int *total_flags,
                                    u64 multicast)
{
        struct ath10k *ar = hw->priv;
        int ret;
        unsigned int supported = SUPPORTED_FILTERS;

        mutex_lock(&ar->conf_mutex);

        if (ar->hw_params.mcast_frame_registration)
                supported |= FIF_MCAST_ACTION;

        *total_flags &= supported;

        ar->filter_flags = *total_flags;

        ret = ath10k_monitor_recalc(ar);
        if (ret)
                ath10k_warn(ar, "failed to recalc monitor: %d\n", ret);

        mutex_unlock(&ar->conf_mutex);
}

static void ath10k_recalculate_mgmt_rate(struct ath10k *ar,
                                         struct ieee80211_vif *vif,
                                         struct cfg80211_chan_def *def)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        const struct ieee80211_supported_band *sband;
        u8 basic_rate_idx;
        int hw_rate_code;
        u32 vdev_param;
        u16 bitrate;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        sband = ar->hw->wiphy->bands[def->chan->band];
        basic_rate_idx = ffs(vif->bss_conf.basic_rates) - 1;
        bitrate = sband->bitrates[basic_rate_idx].bitrate;

        hw_rate_code = ath10k_mac_get_rate_hw_value(bitrate);
        if (hw_rate_code < 0) {
                ath10k_warn(ar, "bitrate not supported %d\n", bitrate);
                return;
        }

        vdev_param = ar->wmi.vdev_param->mgmt_rate;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                        hw_rate_code);
        if (ret)
                ath10k_warn(ar, "failed to set mgmt tx rate %d\n", ret);
}

static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
                                    struct ieee80211_vif *vif,
                                    struct ieee80211_bss_conf *info,
                                    u64 changed)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct cfg80211_chan_def def;
        u32 vdev_param, pdev_param, slottime, preamble;
        u16 bitrate, hw_value;
        u8 rate, rateidx;
        int ret = 0, mcast_rate;
        enum nl80211_band band;

        mutex_lock(&ar->conf_mutex);

        if (changed & BSS_CHANGED_IBSS)
                ath10k_control_ibss(arvif, vif);

        if (changed & BSS_CHANGED_BEACON_INT) {
                arvif->beacon_interval = info->beacon_int;
                vdev_param = ar->wmi.vdev_param->beacon_interval;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                arvif->beacon_interval);
                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac vdev %d beacon_interval %d\n",
                           arvif->vdev_id, arvif->beacon_interval);

                if (ret)
                        ath10k_warn(ar, "failed to set beacon interval for vdev %d: %i\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & BSS_CHANGED_BEACON) {
                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "vdev %d set beacon tx mode to staggered\n",
                           arvif->vdev_id);

                pdev_param = ar->wmi.pdev_param->beacon_tx_mode;
                ret = ath10k_wmi_pdev_set_param(ar, pdev_param,
                                                WMI_BEACON_STAGGERED_MODE);
                if (ret)
                        ath10k_warn(ar, "failed to set beacon mode for vdev %d: %i\n",
                                    arvif->vdev_id, ret);

                ret = ath10k_mac_setup_bcn_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to update beacon template: %d\n",
                                    ret);

                if (ieee80211_vif_is_mesh(vif)) {
                        /* mesh doesn't use SSID but firmware needs it */
                        arvif->u.ap.ssid_len = 4;
                        memcpy(arvif->u.ap.ssid, "mesh", arvif->u.ap.ssid_len);
                }
        }

        if (changed & BSS_CHANGED_AP_PROBE_RESP) {
                ret = ath10k_mac_setup_prb_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to setup probe resp template on vdev %i: %d\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) {
                arvif->dtim_period = info->dtim_period;

                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac vdev %d dtim_period %d\n",
                           arvif->vdev_id, arvif->dtim_period);

                vdev_param = ar->wmi.vdev_param->dtim_period;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                arvif->dtim_period);
                if (ret)
                        ath10k_warn(ar, "failed to set dtim period for vdev %d: %i\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & BSS_CHANGED_SSID &&
            vif->type == NL80211_IFTYPE_AP) {
                arvif->u.ap.ssid_len = vif->cfg.ssid_len;
                if (vif->cfg.ssid_len)
                        memcpy(arvif->u.ap.ssid, vif->cfg.ssid,
                               vif->cfg.ssid_len);
                arvif->u.ap.hidden_ssid = info->hidden_ssid;
        }

        if (changed & BSS_CHANGED_BSSID && !is_zero_ether_addr(info->bssid))
                ether_addr_copy(arvif->bssid, info->bssid);

        if (changed & BSS_CHANGED_FTM_RESPONDER &&
            arvif->ftm_responder != info->ftm_responder &&
            test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map)) {
                arvif->ftm_responder = info->ftm_responder;

                vdev_param = ar->wmi.vdev_param->rtt_responder_role;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                arvif->ftm_responder);

                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac vdev %d ftm_responder %d:ret %d\n",
                           arvif->vdev_id, arvif->ftm_responder, ret);
        }

        if (changed & BSS_CHANGED_BEACON_ENABLED)
                ath10k_control_beaconing(arvif, info);

        if (changed & BSS_CHANGED_ERP_CTS_PROT) {
                arvif->use_cts_prot = info->use_cts_prot;

                ret = ath10k_recalc_rtscts_prot(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n",
                                    arvif->vdev_id, ret);

                if (ath10k_mac_can_set_cts_prot(arvif)) {
                        ret = ath10k_mac_set_cts_prot(arvif);
                        if (ret)
                                ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n",
                                            arvif->vdev_id, ret);
                }
        }

        if (changed & BSS_CHANGED_ERP_SLOT) {
                if (info->use_short_slot)
                        slottime = WMI_VDEV_SLOT_TIME_SHORT; /* 9us */

                else
                        slottime = WMI_VDEV_SLOT_TIME_LONG; /* 20us */

                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d slot_time %d\n",
                           arvif->vdev_id, slottime);

                vdev_param = ar->wmi.vdev_param->slot_time;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                slottime);
                if (ret)
                        ath10k_warn(ar, "failed to set erp slot for vdev %d: %i\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & BSS_CHANGED_ERP_PREAMBLE) {
                if (info->use_short_preamble)
                        preamble = WMI_VDEV_PREAMBLE_SHORT;
                else
                        preamble = WMI_VDEV_PREAMBLE_LONG;

                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac vdev %d preamble %dn",
                           arvif->vdev_id, preamble);

                vdev_param = ar->wmi.vdev_param->preamble;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                                preamble);
                if (ret)
                        ath10k_warn(ar, "failed to set preamble for vdev %d: %i\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & BSS_CHANGED_ASSOC) {
                if (vif->cfg.assoc) {
                        /* Workaround: Make sure monitor vdev is not running
                         * when associating to prevent some firmware revisions
                         * (e.g. 10.1 and 10.2) from crashing.
                         */
                        if (ar->monitor_started)
                                ath10k_monitor_stop(ar);
                        ath10k_bss_assoc(hw, vif, info);
                        ath10k_monitor_recalc(ar);
                } else {
                        ath10k_bss_disassoc(hw, vif);
                }
        }

        if (changed & BSS_CHANGED_TXPOWER) {
                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev_id %i txpower %d\n",
                           arvif->vdev_id, info->txpower);

                arvif->txpower = info->txpower;
                ret = ath10k_mac_txpower_recalc(ar);
                if (ret)
                        ath10k_warn(ar, "failed to recalc tx power: %d\n", ret);
        }

        if (changed & BSS_CHANGED_PS) {
                arvif->ps = vif->cfg.ps;

                ret = ath10k_config_ps(ar);
                if (ret)
                        ath10k_warn(ar, "failed to setup ps on vdev %i: %d\n",
                                    arvif->vdev_id, ret);
        }

        if (changed & BSS_CHANGED_MCAST_RATE &&
            !ath10k_mac_vif_chan(arvif->vif, &def)) {
                band = def.chan->band;
                mcast_rate = vif->bss_conf.mcast_rate[band];
                if (mcast_rate > 0)
                        rateidx = mcast_rate - 1;
                else
                        rateidx = ffs(vif->bss_conf.basic_rates) - 1;

                if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY)
                        rateidx += ATH10K_MAC_FIRST_OFDM_RATE_IDX;

                bitrate = ath10k_wmi_legacy_rates[rateidx].bitrate;
                hw_value = ath10k_wmi_legacy_rates[rateidx].hw_value;
                if (ath10k_mac_bitrate_is_cck(bitrate))
                        preamble = WMI_RATE_PREAMBLE_CCK;
                else
                        preamble = WMI_RATE_PREAMBLE_OFDM;

                rate = ATH10K_HW_RATECODE(hw_value, 0, preamble);

                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac vdev %d mcast_rate %x\n",
                           arvif->vdev_id, rate);

                vdev_param = ar->wmi.vdev_param->mcast_data_rate;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id,
                                                vdev_param, rate);
                if (ret)
                        ath10k_warn(ar,
                                    "failed to set mcast rate on vdev %i: %d\n",
                                    arvif->vdev_id,  ret);

                vdev_param = ar->wmi.vdev_param->bcast_data_rate;
                ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id,
                                                vdev_param, rate);
                if (ret)
                        ath10k_warn(ar,
                                    "failed to set bcast rate on vdev %i: %d\n",
                                    arvif->vdev_id,  ret);
        }

        if (changed & BSS_CHANGED_BASIC_RATES &&
            !ath10k_mac_vif_chan(arvif->vif, &def))
                ath10k_recalculate_mgmt_rate(ar, vif, &def);

        mutex_unlock(&ar->conf_mutex);
}

static void ath10k_mac_op_set_coverage_class(struct ieee80211_hw *hw, s16 value)
{
        struct ath10k *ar = hw->priv;

        /* This function should never be called if setting the coverage class
         * is not supported on this hardware.
         */
        if (!ar->hw_params.hw_ops->set_coverage_class) {
                WARN_ON_ONCE(1);
                return;
        }
        ar->hw_params.hw_ops->set_coverage_class(ar, value);
}

struct ath10k_mac_tdls_iter_data {
        u32 num_tdls_stations;
        struct ieee80211_vif *curr_vif;
};

static void ath10k_mac_tdls_vif_stations_count_iter(void *data,
                                                    struct ieee80211_sta *sta)
{
        struct ath10k_mac_tdls_iter_data *iter_data = data;
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ieee80211_vif *sta_vif = arsta->arvif->vif;

        if (sta->tdls && sta_vif == iter_data->curr_vif)
                iter_data->num_tdls_stations++;
}

static int ath10k_mac_tdls_vif_stations_count(struct ieee80211_hw *hw,
                                              struct ieee80211_vif *vif)
{
        struct ath10k_mac_tdls_iter_data data = {};

        data.curr_vif = vif;

        ieee80211_iterate_stations_atomic(hw,
                                          ath10k_mac_tdls_vif_stations_count_iter,
                                          &data);
        return data.num_tdls_stations;
}

static int ath10k_hw_scan(struct ieee80211_hw *hw,
                          struct ieee80211_vif *vif,
                          struct ieee80211_scan_request *hw_req)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct cfg80211_scan_request *req = &hw_req->req;
        struct wmi_start_scan_arg arg;
        int ret = 0;
        int i;
        u32 scan_timeout;

        mutex_lock(&ar->conf_mutex);

        if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) {
                ret = -EBUSY;
                goto exit;
        }

        spin_lock_bh(&ar->data_lock);
        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
                reinit_completion(&ar->scan.started);
                reinit_completion(&ar->scan.completed);
                ar->scan.state = ATH10K_SCAN_STARTING;
                ar->scan.is_roc = false;
                ar->scan.vdev_id = arvif->vdev_id;
                ret = 0;
                break;
        case ATH10K_SCAN_STARTING:
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ret = -EBUSY;
                break;
        }
        spin_unlock_bh(&ar->data_lock);

        if (ret)
                goto exit;

        memset(&arg, 0, sizeof(arg));
        ath10k_wmi_start_scan_init(ar, &arg);
        arg.vdev_id = arvif->vdev_id;
        arg.scan_id = ATH10K_SCAN_ID;

        if (req->ie_len) {
                arg.ie_len = req->ie_len;
                memcpy(arg.ie, req->ie, arg.ie_len);
        }

        if (req->n_ssids) {
                arg.n_ssids = req->n_ssids;
                for (i = 0; i < arg.n_ssids; i++) {
                        arg.ssids[i].len  = req->ssids[i].ssid_len;
                        arg.ssids[i].ssid = req->ssids[i].ssid;
                }
        } else {
                arg.scan_ctrl_flags |= WMI_SCAN_FLAG_PASSIVE;
        }

        if (req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
                arg.scan_ctrl_flags |=  WMI_SCAN_ADD_SPOOFED_MAC_IN_PROBE_REQ;
                ether_addr_copy(arg.mac_addr.addr, req->mac_addr);
                ether_addr_copy(arg.mac_mask.addr, req->mac_addr_mask);
        }

        if (req->n_channels) {
                arg.n_channels = req->n_channels;
                for (i = 0; i < arg.n_channels; i++)
                        arg.channels[i] = req->channels[i]->center_freq;
        }

        /* if duration is set, default dwell times will be overwritten */
        if (req->duration) {
                arg.dwell_time_active = req->duration;
                arg.dwell_time_passive = req->duration;
                arg.burst_duration_ms = req->duration;

                scan_timeout = min_t(u32, arg.max_rest_time *
                                (arg.n_channels - 1) + (req->duration +
                                ATH10K_SCAN_CHANNEL_SWITCH_WMI_EVT_OVERHEAD) *
                                arg.n_channels, arg.max_scan_time);
        } else {
                scan_timeout = arg.max_scan_time;
        }

        /* Add a 200ms margin to account for event/command processing */
        scan_timeout += 200;

        ret = ath10k_start_scan(ar, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to start hw scan: %d\n", ret);
                spin_lock_bh(&ar->data_lock);
                ar->scan.state = ATH10K_SCAN_IDLE;
                spin_unlock_bh(&ar->data_lock);
        }

        ieee80211_queue_delayed_work(ar->hw, &ar->scan.timeout,
                                     msecs_to_jiffies(scan_timeout));

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static void ath10k_cancel_hw_scan(struct ieee80211_hw *hw,
                                  struct ieee80211_vif *vif)
{
        struct ath10k *ar = hw->priv;

        mutex_lock(&ar->conf_mutex);
        ath10k_scan_abort(ar);
        mutex_unlock(&ar->conf_mutex);

        cancel_delayed_work_sync(&ar->scan.timeout);
}

static void ath10k_set_key_h_def_keyidx(struct ath10k *ar,
                                        struct ath10k_vif *arvif,
                                        enum set_key_cmd cmd,
                                        struct ieee80211_key_conf *key)
{
        u32 vdev_param = arvif->ar->wmi.vdev_param->def_keyid;
        int ret;

        /* 10.1 firmware branch requires default key index to be set to group
         * key index after installing it. Otherwise FW/HW Txes corrupted
         * frames with multi-vif APs. This is not required for main firmware
         * branch (e.g. 636).
         *
         * This is also needed for 636 fw for IBSS-RSN to work more reliably.
         *
         * FIXME: It remains unknown if this is required for multi-vif STA
         * interfaces on 10.1.
         */

        if (arvif->vdev_type != WMI_VDEV_TYPE_AP &&
            arvif->vdev_type != WMI_VDEV_TYPE_IBSS)
                return;

        if (key->cipher == WLAN_CIPHER_SUITE_WEP40)
                return;

        if (key->cipher == WLAN_CIPHER_SUITE_WEP104)
                return;

        if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)
                return;

        if (cmd != SET_KEY)
                return;

        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
                                        key->keyidx);
        if (ret)
                ath10k_warn(ar, "failed to set vdev %i group key as default key: %d\n",
                            arvif->vdev_id, ret);
}

static int ath10k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
                          struct ieee80211_vif *vif, struct ieee80211_sta *sta,
                          struct ieee80211_key_conf *key)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_sta *arsta;
        struct ath10k_peer *peer;
        const u8 *peer_addr;
        bool is_wep = key->cipher == WLAN_CIPHER_SUITE_WEP40 ||
                      key->cipher == WLAN_CIPHER_SUITE_WEP104;
        int ret = 0;
        int ret2;
        u32 flags = 0;
        u32 flags2;

        /* this one needs to be done in software */
        if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
            key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
            key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256 ||
            key->cipher == WLAN_CIPHER_SUITE_BIP_CMAC_256)
                return 1;

        if (arvif->nohwcrypt)
                return 1;

        if (key->keyidx > WMI_MAX_KEY_INDEX)
                return -ENOSPC;

        mutex_lock(&ar->conf_mutex);

        if (sta) {
                arsta = (struct ath10k_sta *)sta->drv_priv;
                peer_addr = sta->addr;
                spin_lock_bh(&ar->data_lock);
                arsta->ucast_cipher = key->cipher;
                spin_unlock_bh(&ar->data_lock);
        } else if (arvif->vdev_type == WMI_VDEV_TYPE_STA) {
                peer_addr = vif->bss_conf.bssid;
        } else {
                peer_addr = vif->addr;
        }

        key->hw_key_idx = key->keyidx;

        if (is_wep) {
                if (cmd == SET_KEY)
                        arvif->wep_keys[key->keyidx] = key;
                else
                        arvif->wep_keys[key->keyidx] = NULL;
        }

        /* the peer should not disappear in mid-way (unless FW goes awry) since
         * we already hold conf_mutex. we just make sure its there now.
         */
        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, arvif->vdev_id, peer_addr);
        spin_unlock_bh(&ar->data_lock);

        if (!peer) {
                if (cmd == SET_KEY) {
                        ath10k_warn(ar, "failed to install key for non-existent peer %pM\n",
                                    peer_addr);
                        ret = -EOPNOTSUPP;
                        goto exit;
                } else {
                        /* if the peer doesn't exist there is no key to disable anymore */
                        goto exit;
                }
        }

        if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)
                flags |= WMI_KEY_PAIRWISE;
        else
                flags |= WMI_KEY_GROUP;

        if (is_wep) {
                if (cmd == DISABLE_KEY)
                        ath10k_clear_vdev_key(arvif, key);

                /* When WEP keys are uploaded it's possible that there are
                 * stations associated already (e.g. when merging) without any
                 * keys. Static WEP needs an explicit per-peer key upload.
                 */
                if (vif->type == NL80211_IFTYPE_ADHOC &&
                    cmd == SET_KEY)
                        ath10k_mac_vif_update_wep_key(arvif, key);

                /* 802.1x never sets the def_wep_key_idx so each set_key()
                 * call changes default tx key.
                 *
                 * Static WEP sets def_wep_key_idx via .set_default_unicast_key
                 * after first set_key().
                 */
                if (cmd == SET_KEY && arvif->def_wep_key_idx == -1)
                        flags |= WMI_KEY_TX_USAGE;
        }

        ret = ath10k_install_key(arvif, key, cmd, peer_addr, flags);
        if (ret) {
                WARN_ON(ret > 0);
                ath10k_warn(ar, "failed to install key for vdev %i peer %pM: %d\n",
                            arvif->vdev_id, peer_addr, ret);
                goto exit;
        }

        /* mac80211 sets static WEP keys as groupwise while firmware requires
         * them to be installed twice as both pairwise and groupwise.
         */
        if (is_wep && !sta && vif->type == NL80211_IFTYPE_STATION) {
                flags2 = flags;
                flags2 &= ~WMI_KEY_GROUP;
                flags2 |= WMI_KEY_PAIRWISE;

                ret = ath10k_install_key(arvif, key, cmd, peer_addr, flags2);
                if (ret) {
                        WARN_ON(ret > 0);
                        ath10k_warn(ar, "failed to install (ucast) key for vdev %i peer %pM: %d\n",
                                    arvif->vdev_id, peer_addr, ret);
                        ret2 = ath10k_install_key(arvif, key, DISABLE_KEY,
                                                  peer_addr, flags);
                        if (ret2) {
                                WARN_ON(ret2 > 0);
                                ath10k_warn(ar, "failed to disable (mcast) key for vdev %i peer %pM: %d\n",
                                            arvif->vdev_id, peer_addr, ret2);
                        }
                        goto exit;
                }
        }

        ath10k_set_key_h_def_keyidx(ar, arvif, cmd, key);

        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, arvif->vdev_id, peer_addr);
        if (peer && cmd == SET_KEY)
                peer->keys[key->keyidx] = key;
        else if (peer && cmd == DISABLE_KEY)
                peer->keys[key->keyidx] = NULL;
        else if (peer == NULL)
                /* impossible unless FW goes crazy */
                ath10k_warn(ar, "Peer %pM disappeared!\n", peer_addr);
        spin_unlock_bh(&ar->data_lock);

        if (sta && sta->tdls)
                ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                          ar->wmi.peer_param->authorize, 1);
        else if (sta && cmd == SET_KEY && (key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
                ath10k_wmi_peer_set_param(ar, arvif->vdev_id, peer_addr,
                                          ar->wmi.peer_param->authorize, 1);

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static void ath10k_set_default_unicast_key(struct ieee80211_hw *hw,
                                           struct ieee80211_vif *vif,
                                           int keyidx)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret;

        mutex_lock(&arvif->ar->conf_mutex);

        if (arvif->ar->state != ATH10K_STATE_ON)
                goto unlock;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d set keyidx %d\n",
                   arvif->vdev_id, keyidx);

        ret = ath10k_wmi_vdev_set_param(arvif->ar,
                                        arvif->vdev_id,
                                        arvif->ar->wmi.vdev_param->def_keyid,
                                        keyidx);

        if (ret) {
                ath10k_warn(ar, "failed to update wep key index for vdev %d: %d\n",
                            arvif->vdev_id,
                            ret);
                goto unlock;
        }

        arvif->def_wep_key_idx = keyidx;

unlock:
        mutex_unlock(&arvif->ar->conf_mutex);
}

static void ath10k_sta_rc_update_wk(struct work_struct *wk)
{
        struct ath10k *ar;
        struct ath10k_vif *arvif;
        struct ath10k_sta *arsta;
        struct ieee80211_sta *sta;
        struct cfg80211_chan_def def;
        enum nl80211_band band;
        const u8 *ht_mcs_mask;
        const u16 *vht_mcs_mask;
        u32 changed, bw, nss, smps;
        int err;

        arsta = container_of(wk, struct ath10k_sta, update_wk);
        sta = container_of((void *)arsta, struct ieee80211_sta, drv_priv);
        arvif = arsta->arvif;
        ar = arvif->ar;

        if (WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def)))
                return;

        band = def.chan->band;
        ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs;
        vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs;

        spin_lock_bh(&ar->data_lock);

        changed = arsta->changed;
        arsta->changed = 0;

        bw = arsta->bw;
        nss = arsta->nss;
        smps = arsta->smps;

        spin_unlock_bh(&ar->data_lock);

        mutex_lock(&ar->conf_mutex);

        nss = max_t(u32, 1, nss);
        nss = min(nss, max(ath10k_mac_max_ht_nss(ht_mcs_mask),
                           ath10k_mac_max_vht_nss(vht_mcs_mask)));

        if (changed & IEEE80211_RC_BW_CHANGED) {
                enum wmi_phy_mode mode;

                mode = chan_to_phymode(&def);
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM peer bw %d phymode %d\n",
                           sta->addr, bw, mode);

                err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                                ar->wmi.peer_param->phymode, mode);
                if (err) {
                        ath10k_warn(ar, "failed to update STA %pM peer phymode %d: %d\n",
                                    sta->addr, mode, err);
                        goto exit;
                }

                err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                                ar->wmi.peer_param->chan_width, bw);
                if (err)
                        ath10k_warn(ar, "failed to update STA %pM peer bw %d: %d\n",
                                    sta->addr, bw, err);
        }

        if (changed & IEEE80211_RC_NSS_CHANGED) {
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM nss %d\n",
                           sta->addr, nss);

                err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                                ar->wmi.peer_param->nss, nss);
                if (err)
                        ath10k_warn(ar, "failed to update STA %pM nss %d: %d\n",
                                    sta->addr, nss, err);
        }

        if (changed & IEEE80211_RC_SMPS_CHANGED) {
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM smps %d\n",
                           sta->addr, smps);

                err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                                ar->wmi.peer_param->smps_state, smps);
                if (err)
                        ath10k_warn(ar, "failed to update STA %pM smps %d: %d\n",
                                    sta->addr, smps, err);
        }

        if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) {
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM supp rates\n",
                           sta->addr);

                err = ath10k_station_assoc(ar, arvif->vif, sta, true);
                if (err)
                        ath10k_warn(ar, "failed to reassociate station: %pM\n",
                                    sta->addr);
        }

exit:
        mutex_unlock(&ar->conf_mutex);
}

static int ath10k_mac_inc_num_stations(struct ath10k_vif *arvif,
                                       struct ieee80211_sta *sta)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->conf_mutex);

        if (arvif->vdev_type == WMI_VDEV_TYPE_STA && !sta->tdls)
                return 0;

        if (ar->num_stations >= ar->max_num_stations)
                return -ENOBUFS;

        ar->num_stations++;

        return 0;
}

static void ath10k_mac_dec_num_stations(struct ath10k_vif *arvif,
                                        struct ieee80211_sta *sta)
{
        struct ath10k *ar = arvif->ar;

        lockdep_assert_held(&ar->conf_mutex);

        if (arvif->vdev_type == WMI_VDEV_TYPE_STA && !sta->tdls)
                return;

        ar->num_stations--;
}

static int ath10k_sta_set_txpwr(struct ieee80211_hw *hw,
                                struct ieee80211_vif *vif,
                                struct ieee80211_sta *sta)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret = 0;
        s16 txpwr;

        if (sta->deflink.txpwr.type == NL80211_TX_POWER_AUTOMATIC) {
                txpwr = 0;
        } else {
                txpwr = sta->deflink.txpwr.power;
                if (!txpwr)
                        return -EINVAL;
        }

        if (txpwr > ATH10K_TX_POWER_MAX_VAL || txpwr < ATH10K_TX_POWER_MIN_VAL)
                return -EINVAL;

        mutex_lock(&ar->conf_mutex);

        ret = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                        ar->wmi.peer_param->use_fixed_power, txpwr);
        if (ret) {
                ath10k_warn(ar, "failed to set tx power for station ret: %d\n",
                            ret);
                goto out;
        }

out:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

struct ath10k_mac_iter_tid_conf_data {
        struct ieee80211_vif *curr_vif;
        struct ath10k *ar;
        bool reset_config;
};

static bool
ath10k_mac_bitrate_mask_has_single_rate(struct ath10k *ar,
                                        enum nl80211_band band,
                                        const struct cfg80211_bitrate_mask *mask,
                                        int *vht_num_rates)
{
        int num_rates = 0;
        int i, tmp;

        num_rates += hweight32(mask->control[band].legacy);

        for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++)
                num_rates += hweight8(mask->control[band].ht_mcs[i]);

        *vht_num_rates = 0;
        for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) {
                tmp = hweight16(mask->control[band].vht_mcs[i]);
                num_rates += tmp;
                *vht_num_rates += tmp;
        }

        return num_rates == 1;
}

static int
ath10k_mac_bitrate_mask_get_single_rate(struct ath10k *ar,
                                        enum nl80211_band band,
                                        const struct cfg80211_bitrate_mask *mask,
                                        u8 *rate, u8 *nss, bool vht_only)
{
        int rate_idx;
        int i;
        u16 bitrate;
        u8 preamble;
        u8 hw_rate;

        if (vht_only)
                goto next;

        if (hweight32(mask->control[band].legacy) == 1) {
                rate_idx = ffs(mask->control[band].legacy) - 1;

                if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY)
                        rate_idx += ATH10K_MAC_FIRST_OFDM_RATE_IDX;

                hw_rate = ath10k_wmi_legacy_rates[rate_idx].hw_value;
                bitrate = ath10k_wmi_legacy_rates[rate_idx].bitrate;

                if (ath10k_mac_bitrate_is_cck(bitrate))
                        preamble = WMI_RATE_PREAMBLE_CCK;
                else
                        preamble = WMI_RATE_PREAMBLE_OFDM;

                *nss = 1;
                *rate = preamble << 6 |
                        (*nss - 1) << 4 |
                        hw_rate << 0;

                return 0;
        }

        for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++) {
                if (hweight8(mask->control[band].ht_mcs[i]) == 1) {
                        *nss = i + 1;
                        *rate = WMI_RATE_PREAMBLE_HT << 6 |
                                (*nss - 1) << 4 |
                                (ffs(mask->control[band].ht_mcs[i]) - 1);

                        return 0;
                }
        }

next:
        for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) {
                if (hweight16(mask->control[band].vht_mcs[i]) == 1) {
                        *nss = i + 1;
                        *rate = WMI_RATE_PREAMBLE_VHT << 6 |
                                (*nss - 1) << 4 |
                                (ffs(mask->control[band].vht_mcs[i]) - 1);

                        return 0;
                }
        }

        return -EINVAL;
}

static int ath10k_mac_validate_rate_mask(struct ath10k *ar,
                                         struct ieee80211_sta *sta,
                                         u32 rate_ctrl_flag, u8 nss)
{
        struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap;
        struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap;

        if (nss > sta->deflink.rx_nss) {
                ath10k_warn(ar, "Invalid nss field, configured %u limit %u\n",
                            nss, sta->deflink.rx_nss);
                return -EINVAL;
        }

        if (ATH10K_HW_PREAMBLE(rate_ctrl_flag) == WMI_RATE_PREAMBLE_VHT) {
                if (!vht_cap->vht_supported) {
                        ath10k_warn(ar, "Invalid VHT rate for sta %pM\n",
                                    sta->addr);
                        return -EINVAL;
                }
        } else if (ATH10K_HW_PREAMBLE(rate_ctrl_flag) == WMI_RATE_PREAMBLE_HT) {
                if (!ht_cap->ht_supported || vht_cap->vht_supported) {
                        ath10k_warn(ar, "Invalid HT rate for sta %pM\n",
                                    sta->addr);
                        return -EINVAL;
                }
        } else {
                if (ht_cap->ht_supported || vht_cap->vht_supported)
                        return -EINVAL;
        }

        return 0;
}

static int
ath10k_mac_tid_bitrate_config(struct ath10k *ar,
                              struct ieee80211_vif *vif,
                              struct ieee80211_sta *sta,
                              u32 *rate_ctrl_flag, u8 *rate_ctrl,
                              enum nl80211_tx_rate_setting txrate_type,
                              const struct cfg80211_bitrate_mask *mask)
{
        struct cfg80211_chan_def def;
        enum nl80211_band band;
        u8 nss, rate;
        int vht_num_rates, ret;

        if (WARN_ON(ath10k_mac_vif_chan(vif, &def)))
                return -EINVAL;

        if (txrate_type == NL80211_TX_RATE_AUTOMATIC) {
                *rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_AUTO;
                *rate_ctrl_flag = 0;
                return 0;
        }

        band = def.chan->band;

        if (!ath10k_mac_bitrate_mask_has_single_rate(ar, band, mask,
                                                     &vht_num_rates)) {
                return -EINVAL;
        }

        ret = ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask,
                                                      &rate, &nss, false);
        if (ret) {
                ath10k_warn(ar, "failed to get single rate: %d\n",
                            ret);
                return ret;
        }

        *rate_ctrl_flag = rate;

        if (sta && ath10k_mac_validate_rate_mask(ar, sta, *rate_ctrl_flag, nss))
                return -EINVAL;

        if (txrate_type == NL80211_TX_RATE_FIXED)
                *rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_FIXED_RATE;
        else if (txrate_type == NL80211_TX_RATE_LIMITED &&
                 (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT,
                           ar->wmi.svc_map)))
                *rate_ctrl = WMI_PEER_TID_CONFIG_RATE_UPPER_CAP;
        else
                return -EOPNOTSUPP;

        return 0;
}

static int ath10k_mac_set_tid_config(struct ath10k *ar, struct ieee80211_sta *sta,
                                     struct ieee80211_vif *vif, u32 changed,
                                     struct wmi_per_peer_per_tid_cfg_arg *arg)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_sta *arsta;
        int ret;

        if (sta) {
                if (!sta->wme)
                        return -EOPNOTSUPP;

                arsta = (struct ath10k_sta *)sta->drv_priv;

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) {
                        if ((arsta->retry_long[arg->tid] > 0 ||
                             arsta->rate_code[arg->tid] > 0 ||
                             arsta->ampdu[arg->tid] ==
                                        WMI_TID_CONFIG_AGGR_CONTROL_ENABLE) &&
                             arg->ack_policy == WMI_PEER_TID_CONFIG_NOACK) {
                                changed &= ~BIT(NL80211_TID_CONFIG_ATTR_NOACK);
                                arg->ack_policy = 0;
                                arg->aggr_control = 0;
                                arg->rate_ctrl = 0;
                                arg->rcode_flags = 0;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) {
                        if (arsta->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK ||
                            arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) {
                                arg->aggr_control = 0;
                                changed &= ~BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG);
                        }
                }

                if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                    BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) {
                        if (arsta->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK ||
                            arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) {
                                arg->rate_ctrl = 0;
                                arg->rcode_flags = 0;
                        }
                }

                ether_addr_copy(arg->peer_macaddr.addr, sta->addr);

                ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, arg);
                if (ret)
                        return ret;

                /* Store the configured parameters in success case */
                if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) {
                        arsta->noack[arg->tid] = arg->ack_policy;
                        arg->ack_policy = 0;
                        arg->aggr_control = 0;
                        arg->rate_ctrl = 0;
                        arg->rcode_flags = 0;
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) {
                        arsta->retry_long[arg->tid] = arg->retry_count;
                        arg->retry_count = 0;
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) {
                        arsta->ampdu[arg->tid] = arg->aggr_control;
                        arg->aggr_control = 0;
                }

                if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                    BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) {
                        arsta->rate_ctrl[arg->tid] = arg->rate_ctrl;
                        arg->rate_ctrl = 0;
                        arg->rcode_flags = 0;
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) {
                        arsta->rtscts[arg->tid] = arg->rtscts_ctrl;
                        arg->ext_tid_cfg_bitmap = 0;
                }
        } else {
                if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) {
                        if ((arvif->retry_long[arg->tid] ||
                             arvif->rate_code[arg->tid] ||
                             arvif->ampdu[arg->tid] ==
                                        WMI_TID_CONFIG_AGGR_CONTROL_ENABLE) &&
                             arg->ack_policy == WMI_PEER_TID_CONFIG_NOACK) {
                                changed &= ~BIT(NL80211_TID_CONFIG_ATTR_NOACK);
                        } else {
                                arvif->noack[arg->tid] = arg->ack_policy;
                                arvif->ampdu[arg->tid] = arg->aggr_control;
                                arvif->rate_ctrl[arg->tid] = arg->rate_ctrl;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) {
                        if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK)
                                changed &= ~BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG);
                        else
                                arvif->retry_long[arg->tid] = arg->retry_count;
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) {
                        if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK)
                                changed &= ~BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL);
                        else
                                arvif->ampdu[arg->tid] = arg->aggr_control;
                }

                if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                    BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) {
                        if (arvif->noack[arg->tid] == WMI_PEER_TID_CONFIG_NOACK) {
                                changed &= ~(BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                                             BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE));
                        } else {
                                arvif->rate_ctrl[arg->tid] = arg->rate_ctrl;
                                arvif->rate_code[arg->tid] = arg->rcode_flags;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) {
                        arvif->rtscts[arg->tid] = arg->rtscts_ctrl;
                        arg->ext_tid_cfg_bitmap = 0;
                }

                if (changed)
                        arvif->tid_conf_changed[arg->tid] |= changed;
        }

        return 0;
}

static int
ath10k_mac_parse_tid_config(struct ath10k *ar,
                            struct ieee80211_sta *sta,
                            struct ieee80211_vif *vif,
                            struct cfg80211_tid_cfg *tid_conf,
                            struct wmi_per_peer_per_tid_cfg_arg *arg)
{
        u32 changed = tid_conf->mask;
        int ret = 0, i = 0;

        if (!changed)
                return -EINVAL;

        while (i < ATH10K_TID_MAX) {
                if (!(tid_conf->tids & BIT(i))) {
                        i++;
                        continue;
                }

                arg->tid = i;

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) {
                        if (tid_conf->noack == NL80211_TID_CONFIG_ENABLE) {
                                arg->ack_policy = WMI_PEER_TID_CONFIG_NOACK;
                                arg->rate_ctrl =
                                WMI_TID_CONFIG_RATE_CONTROL_DEFAULT_LOWEST_RATE;
                                arg->aggr_control =
                                        WMI_TID_CONFIG_AGGR_CONTROL_DISABLE;
                        } else {
                                arg->ack_policy =
                                        WMI_PEER_TID_CONFIG_ACK;
                                arg->rate_ctrl =
                                        WMI_TID_CONFIG_RATE_CONTROL_AUTO;
                                arg->aggr_control =
                                        WMI_TID_CONFIG_AGGR_CONTROL_ENABLE;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG))
                        arg->retry_count = tid_conf->retry_long;

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) {
                        if (tid_conf->noack == NL80211_TID_CONFIG_ENABLE)
                                arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_ENABLE;
                        else
                                arg->aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_DISABLE;
                }

                if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                    BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) {
                        ret = ath10k_mac_tid_bitrate_config(ar, vif, sta,
                                                            &arg->rcode_flags,
                                                            &arg->rate_ctrl,
                                                            tid_conf->txrate_type,
                                                        &tid_conf->txrate_mask);
                        if (ret) {
                                ath10k_warn(ar, "failed to configure bitrate mask %d\n",
                                            ret);
                                arg->rcode_flags = 0;
                                arg->rate_ctrl = 0;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) {
                        if (tid_conf->rtscts)
                                arg->rtscts_ctrl = tid_conf->rtscts;

                        arg->ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG;
                }

                ret = ath10k_mac_set_tid_config(ar, sta, vif, changed, arg);
                if (ret)
                        return ret;
                i++;
        }

        return ret;
}

static int ath10k_mac_reset_tid_config(struct ath10k *ar,
                                       struct ieee80211_sta *sta,
                                       struct ath10k_vif *arvif,
                                       u8 tids)
{
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct wmi_per_peer_per_tid_cfg_arg arg;
        int ret = 0, i = 0;

        arg.vdev_id = arvif->vdev_id;
        while (i < ATH10K_TID_MAX) {
                if (!(tids & BIT(i))) {
                        i++;
                        continue;
                }

                arg.tid = i;
                arg.ack_policy = WMI_PEER_TID_CONFIG_ACK;
                arg.retry_count = ATH10K_MAX_RETRY_COUNT;
                arg.rate_ctrl = WMI_TID_CONFIG_RATE_CONTROL_AUTO;
                arg.aggr_control = WMI_TID_CONFIG_AGGR_CONTROL_ENABLE;
                arg.rtscts_ctrl = WMI_TID_CONFIG_RTSCTS_CONTROL_ENABLE;
                arg.ext_tid_cfg_bitmap = WMI_EXT_TID_RTS_CTS_CONFIG;

                ether_addr_copy(arg.peer_macaddr.addr, sta->addr);

                ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg);
                if (ret)
                        return ret;

                if (!arvif->tids_rst) {
                        arsta->retry_long[i] = -1;
                        arsta->noack[i] = -1;
                        arsta->ampdu[i] = -1;
                        arsta->rate_code[i] = -1;
                        arsta->rate_ctrl[i] = 0;
                        arsta->rtscts[i] = -1;
                } else {
                        arvif->retry_long[i] = 0;
                        arvif->noack[i] = 0;
                        arvif->ampdu[i] = 0;
                        arvif->rate_code[i] = 0;
                        arvif->rate_ctrl[i] = 0;
                        arvif->rtscts[i] = 0;
                }

                i++;
        }

        return ret;
}

static void ath10k_sta_tid_cfg_wk(struct work_struct *wk)
{
        struct wmi_per_peer_per_tid_cfg_arg arg = {};
        struct ieee80211_sta *sta;
        struct ath10k_sta *arsta;
        struct ath10k_vif *arvif;
        struct ath10k *ar;
        bool config_apply;
        int ret, i;
        u32 changed;
        u8 nss;

        arsta = container_of(wk, struct ath10k_sta, tid_config_wk);
        sta = container_of((void *)arsta, struct ieee80211_sta, drv_priv);
        arvif = arsta->arvif;
        ar = arvif->ar;

        mutex_lock(&ar->conf_mutex);

        if (arvif->tids_rst) {
                ret = ath10k_mac_reset_tid_config(ar, sta, arvif,
                                                  arvif->tids_rst);
                goto exit;
        }

        ether_addr_copy(arg.peer_macaddr.addr, sta->addr);

        for (i = 0; i < ATH10K_TID_MAX; i++) {
                config_apply = false;
                changed = arvif->tid_conf_changed[i];

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_NOACK)) {
                        if (arsta->noack[i] != -1) {
                                arg.ack_policy  = 0;
                        } else {
                                config_apply = true;
                                arg.ack_policy = arvif->noack[i];
                                arg.aggr_control = arvif->ampdu[i];
                                arg.rate_ctrl = arvif->rate_ctrl[i];
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG)) {
                        if (arsta->retry_long[i] != -1 ||
                            arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK ||
                            arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) {
                                arg.retry_count = 0;
                        } else {
                                arg.retry_count = arvif->retry_long[i];
                                config_apply = true;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL)) {
                        if (arsta->ampdu[i] != -1 ||
                            arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK ||
                            arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) {
                                arg.aggr_control = 0;
                        } else {
                                arg.aggr_control = arvif->ampdu[i];
                                config_apply = true;
                        }
                }

                if (changed & (BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                    BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE))) {
                        nss = ATH10K_HW_NSS(arvif->rate_code[i]);
                        ret = ath10k_mac_validate_rate_mask(ar, sta,
                                                            arvif->rate_code[i],
                                                            nss);
                        if (ret &&
                            arvif->rate_ctrl[i] > WMI_TID_CONFIG_RATE_CONTROL_AUTO) {
                                arg.rate_ctrl = 0;
                                arg.rcode_flags = 0;
                        }

                        if (arsta->rate_ctrl[i] >
                            WMI_TID_CONFIG_RATE_CONTROL_AUTO ||
                            arsta->noack[i] == WMI_PEER_TID_CONFIG_NOACK ||
                            arvif->noack[i] == WMI_PEER_TID_CONFIG_NOACK) {
                                arg.rate_ctrl = 0;
                                arg.rcode_flags = 0;
                        } else {
                                arg.rate_ctrl = arvif->rate_ctrl[i];
                                arg.rcode_flags = arvif->rate_code[i];
                                config_apply = true;
                        }
                }

                if (changed & BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL)) {
                        if (arsta->rtscts[i]) {
                                arg.rtscts_ctrl = 0;
                                arg.ext_tid_cfg_bitmap = 0;
                        } else {
                                arg.rtscts_ctrl = arvif->rtscts[i] - 1;
                                arg.ext_tid_cfg_bitmap =
                                        WMI_EXT_TID_RTS_CTS_CONFIG;
                                config_apply = true;
                        }
                }

                arg.tid = i;

                if (config_apply) {
                        ret = ath10k_wmi_set_per_peer_per_tid_cfg(ar, &arg);
                        if (ret)
                                ath10k_warn(ar, "failed to set per tid config for sta %pM: %d\n",
                                            sta->addr, ret);
                }

                arg.ack_policy  = 0;
                arg.retry_count  = 0;
                arg.aggr_control  = 0;
                arg.rate_ctrl = 0;
                arg.rcode_flags = 0;
        }

exit:
        mutex_unlock(&ar->conf_mutex);
}

static void ath10k_mac_vif_stations_tid_conf(void *data,
                                             struct ieee80211_sta *sta)
{
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k_mac_iter_tid_conf_data *iter_data = data;
        struct ieee80211_vif *sta_vif = arsta->arvif->vif;

        if (sta_vif != iter_data->curr_vif || !sta->wme)
                return;

        ieee80211_queue_work(iter_data->ar->hw, &arsta->tid_config_wk);
}

static int ath10k_sta_state(struct ieee80211_hw *hw,
                            struct ieee80211_vif *vif,
                            struct ieee80211_sta *sta,
                            enum ieee80211_sta_state old_state,
                            enum ieee80211_sta_state new_state)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k_peer *peer;
        int ret = 0;
        int i;

        if (old_state == IEEE80211_STA_NOTEXIST &&
            new_state == IEEE80211_STA_NONE) {
                memset(arsta, 0, sizeof(*arsta));
                arsta->arvif = arvif;
                arsta->peer_ps_state = WMI_PEER_PS_STATE_DISABLED;
                INIT_WORK(&arsta->update_wk, ath10k_sta_rc_update_wk);
                INIT_WORK(&arsta->tid_config_wk, ath10k_sta_tid_cfg_wk);

                for (i = 0; i < ARRAY_SIZE(sta->txq); i++)
                        ath10k_mac_txq_init(sta->txq[i]);
        }

        /* cancel must be done outside the mutex to avoid deadlock */
        if ((old_state == IEEE80211_STA_NONE &&
             new_state == IEEE80211_STA_NOTEXIST)) {
                cancel_work_sync(&arsta->update_wk);
                cancel_work_sync(&arsta->tid_config_wk);
        }

        mutex_lock(&ar->conf_mutex);

        if (old_state == IEEE80211_STA_NOTEXIST &&
            new_state == IEEE80211_STA_NONE) {
                /*
                 * New station addition.
                 */
                enum wmi_peer_type peer_type = WMI_PEER_TYPE_DEFAULT;
                u32 num_tdls_stations;

                ath10k_dbg(ar, ATH10K_DBG_STA,
                           "mac vdev %d peer create %pM (new sta) sta %d / %d peer %d / %d\n",
                           arvif->vdev_id, sta->addr,
                           ar->num_stations + 1, ar->max_num_stations,
                           ar->num_peers + 1, ar->max_num_peers);

                num_tdls_stations = ath10k_mac_tdls_vif_stations_count(hw, vif);

                if (sta->tdls) {
                        if (num_tdls_stations >= ar->max_num_tdls_vdevs) {
                                ath10k_warn(ar, "vdev %i exceeded maximum number of tdls vdevs %i\n",
                                            arvif->vdev_id,
                                            ar->max_num_tdls_vdevs);
                                ret = -ELNRNG;
                                goto exit;
                        }
                        peer_type = WMI_PEER_TYPE_TDLS;
                }

                ret = ath10k_mac_inc_num_stations(arvif, sta);
                if (ret) {
                        ath10k_warn(ar, "refusing to associate station: too many connected already (%d)\n",
                                    ar->max_num_stations);
                        goto exit;
                }

                if (ath10k_debug_is_extd_tx_stats_enabled(ar)) {
                        arsta->tx_stats = kzalloc(sizeof(*arsta->tx_stats),
                                                  GFP_KERNEL);
                        if (!arsta->tx_stats) {
                                ath10k_mac_dec_num_stations(arvif, sta);
                                ret = -ENOMEM;
                                goto exit;
                        }
                }

                ret = ath10k_peer_create(ar, vif, sta, arvif->vdev_id,
                                         sta->addr, peer_type);
                if (ret) {
                        ath10k_warn(ar, "failed to add peer %pM for vdev %d when adding a new sta: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
                        ath10k_mac_dec_num_stations(arvif, sta);
                        kfree(arsta->tx_stats);
                        goto exit;
                }

                spin_lock_bh(&ar->data_lock);

                peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr);
                if (!peer) {
                        ath10k_warn(ar, "failed to lookup peer %pM on vdev %i\n",
                                    vif->addr, arvif->vdev_id);
                        spin_unlock_bh(&ar->data_lock);
                        ath10k_peer_delete(ar, arvif->vdev_id, sta->addr);
                        ath10k_mac_dec_num_stations(arvif, sta);
                        kfree(arsta->tx_stats);
                        ret = -ENOENT;
                        goto exit;
                }

                arsta->peer_id = find_first_bit(peer->peer_ids,
                                                ATH10K_MAX_NUM_PEER_IDS);

                spin_unlock_bh(&ar->data_lock);

                if (!sta->tdls)
                        goto exit;

                ret = ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id,
                                                      WMI_TDLS_ENABLE_ACTIVE);
                if (ret) {
                        ath10k_warn(ar, "failed to update fw tdls state on vdev %i: %i\n",
                                    arvif->vdev_id, ret);
                        ath10k_peer_delete(ar, arvif->vdev_id,
                                           sta->addr);
                        ath10k_mac_dec_num_stations(arvif, sta);
                        kfree(arsta->tx_stats);
                        goto exit;
                }

                ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id, sta,
                                                  WMI_TDLS_PEER_STATE_PEERING);
                if (ret) {
                        ath10k_warn(ar,
                                    "failed to update tdls peer %pM for vdev %d when adding a new sta: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
                        ath10k_peer_delete(ar, arvif->vdev_id, sta->addr);
                        ath10k_mac_dec_num_stations(arvif, sta);
                        kfree(arsta->tx_stats);

                        if (num_tdls_stations != 0)
                                goto exit;
                        ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id,
                                                        WMI_TDLS_DISABLE);
                }
        } else if ((old_state == IEEE80211_STA_NONE &&
                    new_state == IEEE80211_STA_NOTEXIST)) {
                /*
                 * Existing station deletion.
                 */
                ath10k_dbg(ar, ATH10K_DBG_STA,
                           "mac vdev %d peer delete %pM sta %pK (sta gone)\n",
                           arvif->vdev_id, sta->addr, sta);

                if (sta->tdls) {
                        ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id,
                                                          sta,
                                                          WMI_TDLS_PEER_STATE_TEARDOWN);
                        if (ret)
                                ath10k_warn(ar, "failed to update tdls peer state for %pM state %d: %i\n",
                                            sta->addr,
                                            WMI_TDLS_PEER_STATE_TEARDOWN, ret);
                }

                ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr);
                if (ret)
                        ath10k_warn(ar, "failed to delete peer %pM for vdev %d: %i\n",
                                    sta->addr, arvif->vdev_id, ret);

                ath10k_mac_dec_num_stations(arvif, sta);

                spin_lock_bh(&ar->data_lock);
                for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
                        peer = ar->peer_map[i];
                        if (!peer)
                                continue;

                        if (peer->sta == sta) {
                                ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n",
                                            sta->addr, peer, i, arvif->vdev_id);
                                peer->sta = NULL;

                                /* Clean up the peer object as well since we
                                 * must have failed to do this above.
                                 */
                                ath10k_peer_map_cleanup(ar, peer);
                        }
                }
                spin_unlock_bh(&ar->data_lock);

                if (ath10k_debug_is_extd_tx_stats_enabled(ar)) {
                        kfree(arsta->tx_stats);
                        arsta->tx_stats = NULL;
                }

                for (i = 0; i < ARRAY_SIZE(sta->txq); i++)
                        ath10k_mac_txq_unref(ar, sta->txq[i]);

                if (!sta->tdls)
                        goto exit;

                if (ath10k_mac_tdls_vif_stations_count(hw, vif))
                        goto exit;

                /* This was the last tdls peer in current vif */
                ret = ath10k_wmi_update_fw_tdls_state(ar, arvif->vdev_id,
                                                      WMI_TDLS_DISABLE);
                if (ret) {
                        ath10k_warn(ar, "failed to update fw tdls state on vdev %i: %i\n",
                                    arvif->vdev_id, ret);
                }
        } else if (old_state == IEEE80211_STA_AUTH &&
                   new_state == IEEE80211_STA_ASSOC &&
                   (vif->type == NL80211_IFTYPE_AP ||
                    vif->type == NL80211_IFTYPE_MESH_POINT ||
                    vif->type == NL80211_IFTYPE_ADHOC)) {
                /*
                 * New association.
                 */
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM associated\n",
                           sta->addr);

                ret = ath10k_station_assoc(ar, vif, sta, false);
                if (ret)
                        ath10k_warn(ar, "failed to associate station %pM for vdev %i: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
        } else if (old_state == IEEE80211_STA_ASSOC &&
                   new_state == IEEE80211_STA_AUTHORIZED &&
                   sta->tdls) {
                /*
                 * Tdls station authorized.
                 */
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac tdls sta %pM authorized\n",
                           sta->addr);

                ret = ath10k_station_assoc(ar, vif, sta, false);
                if (ret) {
                        ath10k_warn(ar, "failed to associate tdls station %pM for vdev %i: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
                        goto exit;
                }

                ret = ath10k_mac_tdls_peer_update(ar, arvif->vdev_id, sta,
                                                  WMI_TDLS_PEER_STATE_CONNECTED);
                if (ret)
                        ath10k_warn(ar, "failed to update tdls peer %pM for vdev %i: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
        } else if (old_state == IEEE80211_STA_ASSOC &&
                    new_state == IEEE80211_STA_AUTH &&
                    (vif->type == NL80211_IFTYPE_AP ||
                     vif->type == NL80211_IFTYPE_MESH_POINT ||
                     vif->type == NL80211_IFTYPE_ADHOC)) {
                /*
                 * Disassociation.
                 */
                ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM disassociated\n",
                           sta->addr);

                ret = ath10k_station_disassoc(ar, vif, sta);
                if (ret)
                        ath10k_warn(ar, "failed to disassociate station: %pM vdev %i: %i\n",
                                    sta->addr, arvif->vdev_id, ret);
        }
exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static int ath10k_conf_tx_uapsd(struct ath10k *ar, struct ieee80211_vif *vif,
                                u16 ac, bool enable)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct wmi_sta_uapsd_auto_trig_arg arg = {};
        u32 prio = 0, acc = 0;
        u32 value = 0;
        int ret = 0;

        lockdep_assert_held(&ar->conf_mutex);

        if (arvif->vdev_type != WMI_VDEV_TYPE_STA)
                return 0;

        switch (ac) {
        case IEEE80211_AC_VO:
                value = WMI_STA_PS_UAPSD_AC3_DELIVERY_EN |
                        WMI_STA_PS_UAPSD_AC3_TRIGGER_EN;
                prio = 7;
                acc = 3;
                break;
        case IEEE80211_AC_VI:
                value = WMI_STA_PS_UAPSD_AC2_DELIVERY_EN |
                        WMI_STA_PS_UAPSD_AC2_TRIGGER_EN;
                prio = 5;
                acc = 2;
                break;
        case IEEE80211_AC_BE:
                value = WMI_STA_PS_UAPSD_AC1_DELIVERY_EN |
                        WMI_STA_PS_UAPSD_AC1_TRIGGER_EN;
                prio = 2;
                acc = 1;
                break;
        case IEEE80211_AC_BK:
                value = WMI_STA_PS_UAPSD_AC0_DELIVERY_EN |
                        WMI_STA_PS_UAPSD_AC0_TRIGGER_EN;
                prio = 0;
                acc = 0;
                break;
        }

        if (enable)
                arvif->u.sta.uapsd |= value;
        else
                arvif->u.sta.uapsd &= ~value;

        ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
                                          WMI_STA_PS_PARAM_UAPSD,
                                          arvif->u.sta.uapsd);
        if (ret) {
                ath10k_warn(ar, "failed to set uapsd params: %d\n", ret);
                goto exit;
        }

        if (arvif->u.sta.uapsd)
                value = WMI_STA_PS_RX_WAKE_POLICY_POLL_UAPSD;
        else
                value = WMI_STA_PS_RX_WAKE_POLICY_WAKE;

        ret = ath10k_wmi_set_sta_ps_param(ar, arvif->vdev_id,
                                          WMI_STA_PS_PARAM_RX_WAKE_POLICY,
                                          value);
        if (ret)
                ath10k_warn(ar, "failed to set rx wake param: %d\n", ret);

        ret = ath10k_mac_vif_recalc_ps_wake_threshold(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to recalc ps wake threshold on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        ret = ath10k_mac_vif_recalc_ps_poll_count(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to recalc ps poll count on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                return ret;
        }

        if (test_bit(WMI_SERVICE_STA_UAPSD_BASIC_AUTO_TRIG, ar->wmi.svc_map) ||
            test_bit(WMI_SERVICE_STA_UAPSD_VAR_AUTO_TRIG, ar->wmi.svc_map)) {
                /* Only userspace can make an educated decision when to send
                 * trigger frame. The following effectively disables u-UAPSD
                 * autotrigger in firmware (which is enabled by default
                 * provided the autotrigger service is available).
                 */

                arg.wmm_ac = acc;
                arg.user_priority = prio;
                arg.service_interval = 0;
                arg.suspend_interval = WMI_STA_UAPSD_MAX_INTERVAL_MSEC;
                arg.delay_interval = WMI_STA_UAPSD_MAX_INTERVAL_MSEC;

                ret = ath10k_wmi_vdev_sta_uapsd(ar, arvif->vdev_id,
                                                arvif->bssid, &arg, 1);
                if (ret) {
                        ath10k_warn(ar, "failed to set uapsd auto trigger %d\n",
                                    ret);
                        return ret;
                }
        }

exit:
        return ret;
}

static int ath10k_conf_tx(struct ieee80211_hw *hw,
                          struct ieee80211_vif *vif,
                          unsigned int link_id, u16 ac,
                          const struct ieee80211_tx_queue_params *params)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct wmi_wmm_params_arg *p = NULL;
        int ret;

        mutex_lock(&ar->conf_mutex);

        switch (ac) {
        case IEEE80211_AC_VO:
                p = &arvif->wmm_params.ac_vo;
                break;
        case IEEE80211_AC_VI:
                p = &arvif->wmm_params.ac_vi;
                break;
        case IEEE80211_AC_BE:
                p = &arvif->wmm_params.ac_be;
                break;
        case IEEE80211_AC_BK:
                p = &arvif->wmm_params.ac_bk;
                break;
        }

        if (WARN_ON(!p)) {
                ret = -EINVAL;
                goto exit;
        }

        p->cwmin = params->cw_min;
        p->cwmax = params->cw_max;
        p->aifs = params->aifs;

        /*
         * The channel time duration programmed in the HW is in absolute
         * microseconds, while mac80211 gives the txop in units of
         * 32 microseconds.
         */
        p->txop = params->txop * 32;

        if (ar->wmi.ops->gen_vdev_wmm_conf) {
                ret = ath10k_wmi_vdev_wmm_conf(ar, arvif->vdev_id,
                                               &arvif->wmm_params);
                if (ret) {
                        ath10k_warn(ar, "failed to set vdev wmm params on vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        goto exit;
                }
        } else {
                /* This won't work well with multi-interface cases but it's
                 * better than nothing.
                 */
                ret = ath10k_wmi_pdev_set_wmm_params(ar, &arvif->wmm_params);
                if (ret) {
                        ath10k_warn(ar, "failed to set wmm params: %d\n", ret);
                        goto exit;
                }
        }

        ret = ath10k_conf_tx_uapsd(ar, vif, ac, params->uapsd);
        if (ret)
                ath10k_warn(ar, "failed to set sta uapsd: %d\n", ret);

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static int ath10k_remain_on_channel(struct ieee80211_hw *hw,
                                    struct ieee80211_vif *vif,
                                    struct ieee80211_channel *chan,
                                    int duration,
                                    enum ieee80211_roc_type type)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct wmi_start_scan_arg arg;
        int ret = 0;
        u32 scan_time_msec;

        mutex_lock(&ar->conf_mutex);

        if (ath10k_mac_tdls_vif_stations_count(hw, vif) > 0) {
                ret = -EBUSY;
                goto exit;
        }

        spin_lock_bh(&ar->data_lock);
        switch (ar->scan.state) {
        case ATH10K_SCAN_IDLE:
                reinit_completion(&ar->scan.started);
                reinit_completion(&ar->scan.completed);
                reinit_completion(&ar->scan.on_channel);
                ar->scan.state = ATH10K_SCAN_STARTING;
                ar->scan.is_roc = true;
                ar->scan.vdev_id = arvif->vdev_id;
                ar->scan.roc_freq = chan->center_freq;
                ar->scan.roc_notify = true;
                ret = 0;
                break;
        case ATH10K_SCAN_STARTING:
        case ATH10K_SCAN_RUNNING:
        case ATH10K_SCAN_ABORTING:
                ret = -EBUSY;
                break;
        }
        spin_unlock_bh(&ar->data_lock);

        if (ret)
                goto exit;

        scan_time_msec = ar->hw->wiphy->max_remain_on_channel_duration * 2;

        memset(&arg, 0, sizeof(arg));
        ath10k_wmi_start_scan_init(ar, &arg);
        arg.vdev_id = arvif->vdev_id;
        arg.scan_id = ATH10K_SCAN_ID;
        arg.n_channels = 1;
        arg.channels[0] = chan->center_freq;
        arg.dwell_time_active = scan_time_msec;
        arg.dwell_time_passive = scan_time_msec;
        arg.max_scan_time = scan_time_msec;
        arg.scan_ctrl_flags |= WMI_SCAN_FLAG_PASSIVE;
        arg.scan_ctrl_flags |= WMI_SCAN_FILTER_PROBE_REQ;
        arg.burst_duration_ms = duration;

        ret = ath10k_start_scan(ar, &arg);
        if (ret) {
                ath10k_warn(ar, "failed to start roc scan: %d\n", ret);
                spin_lock_bh(&ar->data_lock);
                ar->scan.state = ATH10K_SCAN_IDLE;
                spin_unlock_bh(&ar->data_lock);
                goto exit;
        }

        ret = wait_for_completion_timeout(&ar->scan.on_channel, 3 * HZ);
        if (ret == 0) {
                ath10k_warn(ar, "failed to switch to channel for roc scan\n");

                ret = ath10k_scan_stop(ar);
                if (ret)
                        ath10k_warn(ar, "failed to stop scan: %d\n", ret);

                ret = -ETIMEDOUT;
                goto exit;
        }

        ieee80211_queue_delayed_work(ar->hw, &ar->scan.timeout,
                                     msecs_to_jiffies(duration));

        ret = 0;
exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static int ath10k_cancel_remain_on_channel(struct ieee80211_hw *hw,
                                           struct ieee80211_vif *vif)
{
        struct ath10k *ar = hw->priv;

        mutex_lock(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        ar->scan.roc_notify = false;
        spin_unlock_bh(&ar->data_lock);

        ath10k_scan_abort(ar);

        mutex_unlock(&ar->conf_mutex);

        cancel_delayed_work_sync(&ar->scan.timeout);

        return 0;
}

/*
 * Both RTS and Fragmentation threshold are interface-specific
 * in ath10k, but device-specific in mac80211.
 */

static int ath10k_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif;
        int ret = 0;

        mutex_lock(&ar->conf_mutex);
        list_for_each_entry(arvif, &ar->arvifs, list) {
                ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d rts threshold %d\n",
                           arvif->vdev_id, value);

                ret = ath10k_mac_set_rts(arvif, value);
                if (ret) {
                        ath10k_warn(ar, "failed to set rts threshold for vdev %d: %d\n",
                                    arvif->vdev_id, ret);
                        break;
                }
        }
        mutex_unlock(&ar->conf_mutex);

        return ret;
}

static int ath10k_mac_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
{
        /* Even though there's a WMI enum for fragmentation threshold no known
         * firmware actually implements it. Moreover it is not possible to rely
         * frame fragmentation to mac80211 because firmware clears the "more
         * fragments" bit in frame control making it impossible for remote
         * devices to reassemble frames.
         *
         * Hence implement a dummy callback just to say fragmentation isn't
         * supported. This effectively prevents mac80211 from doing frame
         * fragmentation in software.
         */
        return -EOPNOTSUPP;
}

void ath10k_mac_wait_tx_complete(struct ath10k *ar)
{
        bool skip;
        long time_left;

        /* mac80211 doesn't care if we really xmit queued frames or not
         * we'll collect those frames either way if we stop/delete vdevs
         */

        if (ar->state == ATH10K_STATE_WEDGED)
                return;

        time_left = wait_event_timeout(ar->htt.empty_tx_wq, ({
                        bool empty;

                        spin_lock_bh(&ar->htt.tx_lock);
                        empty = (ar->htt.num_pending_tx == 0);
                        spin_unlock_bh(&ar->htt.tx_lock);

                        skip = (ar->state == ATH10K_STATE_WEDGED) ||
                               test_bit(ATH10K_FLAG_CRASH_FLUSH,
                                        &ar->dev_flags);

                        (empty || skip);
                }), ATH10K_FLUSH_TIMEOUT_HZ);

        if (time_left == 0 || skip)
                ath10k_warn(ar, "failed to flush transmit queue (skip %i ar-state %i): %ld\n",
                            skip, ar->state, time_left);
}

static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                         u32 queues, bool drop)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif;
        u32 bitmap;

        if (drop) {
                if (vif && vif->type == NL80211_IFTYPE_STATION) {
                        bitmap = ~(1 << WMI_MGMT_TID);
                        list_for_each_entry(arvif, &ar->arvifs, list) {
                                if (arvif->vdev_type == WMI_VDEV_TYPE_STA)
                                        ath10k_wmi_peer_flush(ar, arvif->vdev_id,
                                                              arvif->bssid, bitmap);
                        }
                        ath10k_htt_flush_tx(&ar->htt);
                }
                return;
        }

        mutex_lock(&ar->conf_mutex);
        ath10k_mac_wait_tx_complete(ar);
        mutex_unlock(&ar->conf_mutex);
}

/* TODO: Implement this function properly
 * For now it is needed to reply to Probe Requests in IBSS mode.
 * Probably we need this information from FW.
 */
static int ath10k_tx_last_beacon(struct ieee80211_hw *hw)
{
        return 1;
}

static void ath10k_reconfig_complete(struct ieee80211_hw *hw,
                                     enum ieee80211_reconfig_type reconfig_type)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif;

        if (reconfig_type != IEEE80211_RECONFIG_TYPE_RESTART)
                return;

        mutex_lock(&ar->conf_mutex);

        /* If device failed to restart it will be in a different state, e.g.
         * ATH10K_STATE_WEDGED
         */
        if (ar->state == ATH10K_STATE_RESTARTED) {
                ath10k_info(ar, "device successfully recovered\n");
                ar->state = ATH10K_STATE_ON;
                ieee80211_wake_queues(ar->hw);
                clear_bit(ATH10K_FLAG_RESTARTING, &ar->dev_flags);
                if (ar->hw_params.hw_restart_disconnect) {
                        list_for_each_entry(arvif, &ar->arvifs, list) {
                                if (arvif->is_up && arvif->vdev_type == WMI_VDEV_TYPE_STA)
                                        ieee80211_hw_restart_disconnect(arvif->vif);
                                }
                }
        }

        mutex_unlock(&ar->conf_mutex);
}

static void
ath10k_mac_update_bss_chan_survey(struct ath10k *ar,
                                  struct ieee80211_channel *channel)
{
        int ret;
        enum wmi_bss_survey_req_type type = WMI_BSS_SURVEY_REQ_TYPE_READ;

        lockdep_assert_held(&ar->conf_mutex);

        if (!test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map) ||
            (ar->rx_channel != channel))
                return;

        if (ar->scan.state != ATH10K_SCAN_IDLE) {
                ath10k_dbg(ar, ATH10K_DBG_MAC, "ignoring bss chan info request while scanning..\n");
                return;
        }

        reinit_completion(&ar->bss_survey_done);

        ret = ath10k_wmi_pdev_bss_chan_info_request(ar, type);
        if (ret) {
                ath10k_warn(ar, "failed to send pdev bss chan info request\n");
                return;
        }

        ret = wait_for_completion_timeout(&ar->bss_survey_done, 3 * HZ);
        if (!ret) {
                ath10k_warn(ar, "bss channel survey timed out\n");
                return;
        }
}

static int ath10k_get_survey(struct ieee80211_hw *hw, int idx,
                             struct survey_info *survey)
{
        struct ath10k *ar = hw->priv;
        struct ieee80211_supported_band *sband;
        struct survey_info *ar_survey = &ar->survey[idx];
        int ret = 0;

        mutex_lock(&ar->conf_mutex);

        sband = hw->wiphy->bands[NL80211_BAND_2GHZ];
        if (sband && idx >= sband->n_channels) {
                idx -= sband->n_channels;
                sband = NULL;
        }

        if (!sband)
                sband = hw->wiphy->bands[NL80211_BAND_5GHZ];

        if (!sband || idx >= sband->n_channels) {
                ret = -ENOENT;
                goto exit;
        }

        ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]);

        spin_lock_bh(&ar->data_lock);
        memcpy(survey, ar_survey, sizeof(*survey));
        spin_unlock_bh(&ar->data_lock);

        survey->channel = &sband->channels[idx];

        if (ar->rx_channel == survey->channel)
                survey->filled |= SURVEY_INFO_IN_USE;

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static bool
ath10k_mac_bitrate_mask_get_single_nss(struct ath10k *ar,
                                       enum nl80211_band band,
                                       const struct cfg80211_bitrate_mask *mask,
                                       int *nss)
{
        struct ieee80211_supported_band *sband = &ar->mac.sbands[band];
        u16 vht_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
        u8 ht_nss_mask = 0;
        u8 vht_nss_mask = 0;
        int i;

        if (mask->control[band].legacy)
                return false;

        for (i = 0; i < ARRAY_SIZE(mask->control[band].ht_mcs); i++) {
                if (mask->control[band].ht_mcs[i] == 0)
                        continue;
                else if (mask->control[band].ht_mcs[i] ==
                         sband->ht_cap.mcs.rx_mask[i])
                        ht_nss_mask |= BIT(i);
                else
                        return false;
        }

        for (i = 0; i < ARRAY_SIZE(mask->control[band].vht_mcs); i++) {
                if (mask->control[band].vht_mcs[i] == 0)
                        continue;
                else if (mask->control[band].vht_mcs[i] ==
                         ath10k_mac_get_max_vht_mcs_map(vht_mcs_map, i))
                        vht_nss_mask |= BIT(i);
                else
                        return false;
        }

        if (ht_nss_mask != vht_nss_mask)
                return false;

        if (ht_nss_mask == 0)
                return false;

        if (BIT(fls(ht_nss_mask)) - 1 != ht_nss_mask)
                return false;

        *nss = fls(ht_nss_mask);

        return true;
}

static int ath10k_mac_set_fixed_rate_params(struct ath10k_vif *arvif,
                                            u8 rate, u8 nss, u8 sgi, u8 ldpc)
{
        struct ath10k *ar = arvif->ar;
        u32 vdev_param;
        int ret;

        lockdep_assert_held(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac set fixed rate params vdev %i rate 0x%02x nss %u sgi %u\n",
                   arvif->vdev_id, rate, nss, sgi);

        vdev_param = ar->wmi.vdev_param->fixed_rate;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, rate);
        if (ret) {
                ath10k_warn(ar, "failed to set fixed rate param 0x%02x: %d\n",
                            rate, ret);
                return ret;
        }

        vdev_param = ar->wmi.vdev_param->nss;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, nss);
        if (ret) {
                ath10k_warn(ar, "failed to set nss param %d: %d\n", nss, ret);
                return ret;
        }

        vdev_param = ar->wmi.vdev_param->sgi;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, sgi);
        if (ret) {
                ath10k_warn(ar, "failed to set sgi param %d: %d\n", sgi, ret);
                return ret;
        }

        vdev_param = ar->wmi.vdev_param->ldpc;
        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, ldpc);
        if (ret) {
                ath10k_warn(ar, "failed to set ldpc param %d: %d\n", ldpc, ret);
                return ret;
        }

        return 0;
}

static bool
ath10k_mac_can_set_bitrate_mask(struct ath10k *ar,
                                enum nl80211_band band,
                                const struct cfg80211_bitrate_mask *mask,
                                bool allow_pfr)
{
        int i;
        u16 vht_mcs;

        /* Due to firmware limitation in WMI_PEER_ASSOC_CMDID it is impossible
         * to express all VHT MCS rate masks. Effectively only the following
         * ranges can be used: none, 0-7, 0-8 and 0-9.
         */
        for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
                vht_mcs = mask->control[band].vht_mcs[i];

                switch (vht_mcs) {
                case 0:
                case BIT(8) - 1:
                case BIT(9) - 1:
                case BIT(10) - 1:
                        break;
                default:
                        if (!allow_pfr)
                                ath10k_warn(ar, "refusing bitrate mask with missing 0-7 VHT MCS rates\n");
                        return false;
                }
        }

        return true;
}

static bool ath10k_mac_set_vht_bitrate_mask_fixup(struct ath10k *ar,
                                                  struct ath10k_vif *arvif,
                                                  struct ieee80211_sta *sta)
{
        int err;
        u8 rate = arvif->vht_pfr;

        /* skip non vht and multiple rate peers */
        if (!sta->deflink.vht_cap.vht_supported || arvif->vht_num_rates != 1)
                return false;

        err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                        WMI_PEER_PARAM_FIXED_RATE, rate);
        if (err)
                ath10k_warn(ar, "failed to enable STA %pM peer fixed rate: %d\n",
                            sta->addr, err);

        return true;
}

static void ath10k_mac_set_bitrate_mask_iter(void *data,
                                             struct ieee80211_sta *sta)
{
        struct ath10k_vif *arvif = data;
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k *ar = arvif->ar;

        if (arsta->arvif != arvif)
                return;

        if (ath10k_mac_set_vht_bitrate_mask_fixup(ar, arvif, sta))
                return;

        spin_lock_bh(&ar->data_lock);
        arsta->changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
        spin_unlock_bh(&ar->data_lock);

        ieee80211_queue_work(ar->hw, &arsta->update_wk);
}

static void ath10k_mac_clr_bitrate_mask_iter(void *data,
                                             struct ieee80211_sta *sta)
{
        struct ath10k_vif *arvif = data;
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k *ar = arvif->ar;
        int err;

        /* clear vht peers only */
        if (arsta->arvif != arvif || !sta->deflink.vht_cap.vht_supported)
                return;

        err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
                                        WMI_PEER_PARAM_FIXED_RATE,
                                        WMI_FIXED_RATE_NONE);
        if (err)
                ath10k_warn(ar, "failed to clear STA %pM peer fixed rate: %d\n",
                            sta->addr, err);
}

static int ath10k_mac_op_set_bitrate_mask(struct ieee80211_hw *hw,
                                          struct ieee80211_vif *vif,
                                          const struct cfg80211_bitrate_mask *mask)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct cfg80211_chan_def def;
        struct ath10k *ar = arvif->ar;
        enum nl80211_band band;
        const u8 *ht_mcs_mask;
        const u16 *vht_mcs_mask;
        u8 rate;
        u8 nss;
        u8 sgi;
        u8 ldpc;
        int single_nss;
        int ret;
        int vht_num_rates, allow_pfr;
        u8 vht_pfr;
        bool update_bitrate_mask = true;

        if (ath10k_mac_vif_chan(vif, &def))
                return -EPERM;

        band = def.chan->band;
        ht_mcs_mask = mask->control[band].ht_mcs;
        vht_mcs_mask = mask->control[band].vht_mcs;
        ldpc = !!(ar->ht_cap_info & WMI_HT_CAP_LDPC);

        sgi = mask->control[band].gi;
        if (sgi == NL80211_TXRATE_FORCE_LGI)
                return -EINVAL;

        allow_pfr = test_bit(ATH10K_FW_FEATURE_PEER_FIXED_RATE,
                             ar->normal_mode_fw.fw_file.fw_features);
        if (allow_pfr) {
                mutex_lock(&ar->conf_mutex);
                ieee80211_iterate_stations_atomic(ar->hw,
                                                  ath10k_mac_clr_bitrate_mask_iter,
                                                  arvif);
                mutex_unlock(&ar->conf_mutex);
        }

        if (ath10k_mac_bitrate_mask_has_single_rate(ar, band, mask,
                                                    &vht_num_rates)) {
                ret = ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask,
                                                              &rate, &nss,
                                                              false);
                if (ret) {
                        ath10k_warn(ar, "failed to get single rate for vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        return ret;
                }
        } else if (ath10k_mac_bitrate_mask_get_single_nss(ar, band, mask,
                                                          &single_nss)) {
                rate = WMI_FIXED_RATE_NONE;
                nss = single_nss;
        } else {
                rate = WMI_FIXED_RATE_NONE;
                nss = min(ar->num_rf_chains,
                          max(ath10k_mac_max_ht_nss(ht_mcs_mask),
                              ath10k_mac_max_vht_nss(vht_mcs_mask)));

                if (!ath10k_mac_can_set_bitrate_mask(ar, band, mask,
                                                     allow_pfr)) {
                        u8 vht_nss;

                        if (!allow_pfr || vht_num_rates != 1)
                                return -EINVAL;

                        /* Reach here, firmware supports peer fixed rate and has
                         * single vht rate, and don't update vif birate_mask, as
                         * the rate only for specific peer.
                         */
                        ath10k_mac_bitrate_mask_get_single_rate(ar, band, mask,
                                                                &vht_pfr,
                                                                &vht_nss,
                                                                true);
                        update_bitrate_mask = false;
                } else {
                        vht_pfr = 0;
                }

                mutex_lock(&ar->conf_mutex);

                if (update_bitrate_mask)
                        arvif->bitrate_mask = *mask;
                arvif->vht_num_rates = vht_num_rates;
                arvif->vht_pfr = vht_pfr;
                ieee80211_iterate_stations_atomic(ar->hw,
                                                  ath10k_mac_set_bitrate_mask_iter,
                                                  arvif);

                mutex_unlock(&ar->conf_mutex);
        }

        mutex_lock(&ar->conf_mutex);

        ret = ath10k_mac_set_fixed_rate_params(arvif, rate, nss, sgi, ldpc);
        if (ret) {
                ath10k_warn(ar, "failed to set fixed rate params on vdev %i: %d\n",
                            arvif->vdev_id, ret);
                goto exit;
        }

exit:
        mutex_unlock(&ar->conf_mutex);

        return ret;
}

static void ath10k_sta_rc_update(struct ieee80211_hw *hw,
                                 struct ieee80211_vif *vif,
                                 struct ieee80211_sta *sta,
                                 u32 changed)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_peer *peer;
        u32 bw, smps;

        spin_lock_bh(&ar->data_lock);

        peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr);
        if (!peer) {
                spin_unlock_bh(&ar->data_lock);
                ath10k_warn(ar, "mac sta rc update failed to find peer %pM on vdev %i\n",
                            sta->addr, arvif->vdev_id);
                return;
        }

        ath10k_dbg(ar, ATH10K_DBG_STA,
                   "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n",
                   sta->addr, changed, sta->deflink.bandwidth,
                   sta->deflink.rx_nss,
                   sta->deflink.smps_mode);

        if (changed & IEEE80211_RC_BW_CHANGED) {
                bw = WMI_PEER_CHWIDTH_20MHZ;

                switch (sta->deflink.bandwidth) {
                case IEEE80211_STA_RX_BW_20:
                        bw = WMI_PEER_CHWIDTH_20MHZ;
                        break;
                case IEEE80211_STA_RX_BW_40:
                        bw = WMI_PEER_CHWIDTH_40MHZ;
                        break;
                case IEEE80211_STA_RX_BW_80:
                        bw = WMI_PEER_CHWIDTH_80MHZ;
                        break;
                case IEEE80211_STA_RX_BW_160:
                        bw = WMI_PEER_CHWIDTH_160MHZ;
                        break;
                default:
                        ath10k_warn(ar, "Invalid bandwidth %d in rc update for %pM\n",
                                    sta->deflink.bandwidth, sta->addr);
                        bw = WMI_PEER_CHWIDTH_20MHZ;
                        break;
                }

                arsta->bw = bw;
        }

        if (changed & IEEE80211_RC_NSS_CHANGED)
                arsta->nss = sta->deflink.rx_nss;

        if (changed & IEEE80211_RC_SMPS_CHANGED) {
                smps = WMI_PEER_SMPS_PS_NONE;

                switch (sta->deflink.smps_mode) {
                case IEEE80211_SMPS_AUTOMATIC:
                case IEEE80211_SMPS_OFF:
                        smps = WMI_PEER_SMPS_PS_NONE;
                        break;
                case IEEE80211_SMPS_STATIC:
                        smps = WMI_PEER_SMPS_STATIC;
                        break;
                case IEEE80211_SMPS_DYNAMIC:
                        smps = WMI_PEER_SMPS_DYNAMIC;
                        break;
                case IEEE80211_SMPS_NUM_MODES:
                        ath10k_warn(ar, "Invalid smps %d in sta rc update for %pM\n",
                                    sta->deflink.smps_mode, sta->addr);
                        smps = WMI_PEER_SMPS_PS_NONE;
                        break;
                }

                arsta->smps = smps;
        }

        arsta->changed |= changed;

        spin_unlock_bh(&ar->data_lock);

        ieee80211_queue_work(hw, &arsta->update_wk);
}

static void ath10k_offset_tsf(struct ieee80211_hw *hw,
                              struct ieee80211_vif *vif, s64 tsf_offset)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        u32 offset, vdev_param;
        int ret;

        if (tsf_offset < 0) {
                vdev_param = ar->wmi.vdev_param->dec_tsf;
                offset = -tsf_offset;
        } else {
                vdev_param = ar->wmi.vdev_param->inc_tsf;
                offset = tsf_offset;
        }

        ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id,
                                        vdev_param, offset);

        if (ret && ret != -EOPNOTSUPP)
                ath10k_warn(ar, "failed to set tsf offset %d cmd %d: %d\n",
                            offset, vdev_param, ret);
}

static int ath10k_ampdu_action(struct ieee80211_hw *hw,
                               struct ieee80211_vif *vif,
                               struct ieee80211_ampdu_params *params)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ieee80211_sta *sta = params->sta;
        enum ieee80211_ampdu_mlme_action action = params->action;
        u16 tid = params->tid;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ampdu vdev_id %i sta %pM tid %u action %d\n",
                   arvif->vdev_id, sta->addr, tid, action);

        switch (action) {
        case IEEE80211_AMPDU_RX_START:
        case IEEE80211_AMPDU_RX_STOP:
                /* HTT AddBa/DelBa events trigger mac80211 Rx BA session
                 * creation/removal. Do we need to verify this?
                 */
                return 0;
        case IEEE80211_AMPDU_TX_START:
        case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
        case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
        case IEEE80211_AMPDU_TX_OPERATIONAL:
                /* Firmware offloads Tx aggregation entirely so deny mac80211
                 * Tx aggregation requests.
                 */
                return -EOPNOTSUPP;
        }

        return -EINVAL;
}

static void
ath10k_mac_update_rx_channel(struct ath10k *ar,
                             struct ieee80211_chanctx_conf *ctx,
                             struct ieee80211_vif_chanctx_switch *vifs,
                             int n_vifs)
{
        struct cfg80211_chan_def *def = NULL;

        /* Both locks are required because ar->rx_channel is modified. This
         * allows readers to hold either lock.
         */
        lockdep_assert_held(&ar->conf_mutex);
        lockdep_assert_held(&ar->data_lock);

        WARN_ON(ctx && vifs);
        WARN_ON(vifs && !n_vifs);

        /* FIXME: Sort of an optimization and a workaround. Peers and vifs are
         * on a linked list now. Doing a lookup peer -> vif -> chanctx for each
         * ppdu on Rx may reduce performance on low-end systems. It should be
         * possible to make tables/hashmaps to speed the lookup up (be vary of
         * cpu data cache lines though regarding sizes) but to keep the initial
         * implementation simple and less intrusive fallback to the slow lookup
         * only for multi-channel cases. Single-channel cases will remain to
         * use the old channel derival and thus performance should not be
         * affected much.
         */
        rcu_read_lock();
        if (!ctx && ath10k_mac_num_chanctxs(ar) == 1) {
                ieee80211_iter_chan_contexts_atomic(ar->hw,
                                                    ath10k_mac_get_any_chandef_iter,
                                                    &def);

                if (vifs)
                        def = &vifs[0].new_ctx->def;

                ar->rx_channel = def->chan;
        } else if ((ctx && ath10k_mac_num_chanctxs(ar) == 0) ||
                   (ctx && (ar->state == ATH10K_STATE_RESTARTED))) {
                /* During driver restart due to firmware assert, since mac80211
                 * already has valid channel context for given radio, channel
                 * context iteration return num_chanctx > 0. So fix rx_channel
                 * when restart is in progress.
                 */
                ar->rx_channel = ctx->def.chan;
        } else {
                ar->rx_channel = NULL;
        }
        rcu_read_unlock();
}

static void
ath10k_mac_update_vif_chan(struct ath10k *ar,
                           struct ieee80211_vif_chanctx_switch *vifs,
                           int n_vifs)
{
        struct ath10k_vif *arvif;
        int ret;
        int i;

        lockdep_assert_held(&ar->conf_mutex);

        /* First stop monitor interface. Some FW versions crash if there's a
         * lone monitor interface.
         */
        if (ar->monitor_started)
                ath10k_monitor_stop(ar);

        for (i = 0; i < n_vifs; i++) {
                arvif = (void *)vifs[i].vif->drv_priv;

                ath10k_dbg(ar, ATH10K_DBG_MAC,
                           "mac chanctx switch vdev_id %i freq %u->%u width %d->%d\n",
                           arvif->vdev_id,
                           vifs[i].old_ctx->def.chan->center_freq,
                           vifs[i].new_ctx->def.chan->center_freq,
                           vifs[i].old_ctx->def.width,
                           vifs[i].new_ctx->def.width);

                if (WARN_ON(!arvif->is_started))
                        continue;

                if (WARN_ON(!arvif->is_up))
                        continue;

                ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
                if (ret) {
                        ath10k_warn(ar, "failed to down vdev %d: %d\n",
                                    arvif->vdev_id, ret);
                        continue;
                }
        }

        /* All relevant vdevs are downed and associated channel resources
         * should be available for the channel switch now.
         */

        spin_lock_bh(&ar->data_lock);
        ath10k_mac_update_rx_channel(ar, NULL, vifs, n_vifs);
        spin_unlock_bh(&ar->data_lock);

        for (i = 0; i < n_vifs; i++) {
                arvif = (void *)vifs[i].vif->drv_priv;

                if (WARN_ON(!arvif->is_started))
                        continue;

                if (WARN_ON(!arvif->is_up))
                        continue;

                ret = ath10k_mac_setup_bcn_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to update bcn tmpl during csa: %d\n",
                                    ret);

                ret = ath10k_mac_setup_prb_tmpl(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n",
                                    ret);

                ret = ath10k_vdev_restart(arvif, &vifs[i].new_ctx->def);
                if (ret) {
                        ath10k_warn(ar, "failed to restart vdev %d: %d\n",
                                    arvif->vdev_id, ret);
                        continue;
                }

                ret = ath10k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
                                         arvif->bssid);
                if (ret) {
                        ath10k_warn(ar, "failed to bring vdev up %d: %d\n",
                                    arvif->vdev_id, ret);
                        continue;
                }
        }

        ath10k_monitor_recalc(ar);
}

static int
ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw,
                          struct ieee80211_chanctx_conf *ctx)
{
        struct ath10k *ar = hw->priv;

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx add freq %u width %d ptr %pK\n",
                   ctx->def.chan->center_freq, ctx->def.width, ctx);

        mutex_lock(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        ath10k_mac_update_rx_channel(ar, ctx, NULL, 0);
        spin_unlock_bh(&ar->data_lock);

        ath10k_recalc_radar_detection(ar);
        ath10k_monitor_recalc(ar);

        mutex_unlock(&ar->conf_mutex);

        return 0;
}

static void
ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw,
                             struct ieee80211_chanctx_conf *ctx)
{
        struct ath10k *ar = hw->priv;

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx remove freq %u width %d ptr %pK\n",
                   ctx->def.chan->center_freq, ctx->def.width, ctx);

        mutex_lock(&ar->conf_mutex);

        spin_lock_bh(&ar->data_lock);
        ath10k_mac_update_rx_channel(ar, NULL, NULL, 0);
        spin_unlock_bh(&ar->data_lock);

        ath10k_recalc_radar_detection(ar);
        ath10k_monitor_recalc(ar);

        mutex_unlock(&ar->conf_mutex);
}

struct ath10k_mac_change_chanctx_arg {
        struct ieee80211_chanctx_conf *ctx;
        struct ieee80211_vif_chanctx_switch *vifs;
        int n_vifs;
        int next_vif;
};

static void
ath10k_mac_change_chanctx_cnt_iter(void *data, u8 *mac,
                                   struct ieee80211_vif *vif)
{
        struct ath10k_mac_change_chanctx_arg *arg = data;

        if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != arg->ctx)
                return;

        arg->n_vifs++;
}

static void
ath10k_mac_change_chanctx_fill_iter(void *data, u8 *mac,
                                    struct ieee80211_vif *vif)
{
        struct ath10k_mac_change_chanctx_arg *arg = data;
        struct ieee80211_chanctx_conf *ctx;

        ctx = rcu_access_pointer(vif->bss_conf.chanctx_conf);
        if (ctx != arg->ctx)
                return;

        if (WARN_ON(arg->next_vif == arg->n_vifs))
                return;

        arg->vifs[arg->next_vif].vif = vif;
        arg->vifs[arg->next_vif].old_ctx = ctx;
        arg->vifs[arg->next_vif].new_ctx = ctx;
        arg->next_vif++;
}

static void
ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw,
                             struct ieee80211_chanctx_conf *ctx,
                             u32 changed)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_mac_change_chanctx_arg arg = { .ctx = ctx };

        mutex_lock(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx change freq %u width %d ptr %pK changed %x\n",
                   ctx->def.chan->center_freq, ctx->def.width, ctx, changed);

        /* This shouldn't really happen because channel switching should use
         * switch_vif_chanctx().
         */
        if (WARN_ON(changed & IEEE80211_CHANCTX_CHANGE_CHANNEL))
                goto unlock;

        if (changed & IEEE80211_CHANCTX_CHANGE_WIDTH) {
                ieee80211_iterate_active_interfaces_atomic(
                                        hw,
                                        ATH10K_ITER_NORMAL_FLAGS,
                                        ath10k_mac_change_chanctx_cnt_iter,
                                        &arg);
                if (arg.n_vifs == 0)
                        goto radar;

                arg.vifs = kcalloc(arg.n_vifs, sizeof(arg.vifs[0]),
                                   GFP_KERNEL);
                if (!arg.vifs)
                        goto radar;

                ieee80211_iterate_active_interfaces_atomic(
                                        hw,
                                        ATH10K_ITER_NORMAL_FLAGS,
                                        ath10k_mac_change_chanctx_fill_iter,
                                        &arg);
                ath10k_mac_update_vif_chan(ar, arg.vifs, arg.n_vifs);
                kfree(arg.vifs);
        }

radar:
        ath10k_recalc_radar_detection(ar);

        /* FIXME: How to configure Rx chains properly? */

        /* No other actions are actually necessary. Firmware maintains channel
         * definitions per vdev internally and there's no host-side channel
         * context abstraction to configure, e.g. channel width.
         */

unlock:
        mutex_unlock(&ar->conf_mutex);
}

static int
ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
                                 struct ieee80211_vif *vif,
                                 struct ieee80211_bss_conf *link_conf,
                                 struct ieee80211_chanctx_conf *ctx)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret;

        mutex_lock(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx assign ptr %pK vdev_id %i\n",
                   ctx, arvif->vdev_id);

        if (WARN_ON(arvif->is_started)) {
                mutex_unlock(&ar->conf_mutex);
                return -EBUSY;
        }

        ret = ath10k_vdev_start(arvif, &ctx->def);
        if (ret) {
                ath10k_warn(ar, "failed to start vdev %i addr %pM on freq %d: %d\n",
                            arvif->vdev_id, vif->addr,
                            ctx->def.chan->center_freq, ret);
                goto err;
        }

        arvif->is_started = true;

        ret = ath10k_mac_vif_setup_ps(arvif);
        if (ret) {
                ath10k_warn(ar, "failed to update vdev %i ps: %d\n",
                            arvif->vdev_id, ret);
                goto err_stop;
        }

        if (vif->type == NL80211_IFTYPE_MONITOR) {
                ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, 0, vif->addr);
                if (ret) {
                        ath10k_warn(ar, "failed to up monitor vdev %i: %d\n",
                                    arvif->vdev_id, ret);
                        goto err_stop;
                }

                arvif->is_up = true;
        }

        if (ath10k_mac_can_set_cts_prot(arvif)) {
                ret = ath10k_mac_set_cts_prot(arvif);
                if (ret)
                        ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n",
                                    arvif->vdev_id, ret);
        }

        if (ath10k_peer_stats_enabled(ar) &&
            ar->hw_params.tx_stats_over_pktlog) {
                ar->pktlog_filter |= ATH10K_PKTLOG_PEER_STATS;
                ret = ath10k_wmi_pdev_pktlog_enable(ar,
                                                    ar->pktlog_filter);
                if (ret) {
                        ath10k_warn(ar, "failed to enable pktlog %d\n", ret);
                        goto err_stop;
                }
        }

        mutex_unlock(&ar->conf_mutex);
        return 0;

err_stop:
        ath10k_vdev_stop(arvif);
        arvif->is_started = false;
        ath10k_mac_vif_setup_ps(arvif);

err:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static void
ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
                                   struct ieee80211_vif *vif,
                                   struct ieee80211_bss_conf *link_conf,
                                   struct ieee80211_chanctx_conf *ctx)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        int ret;

        mutex_lock(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx unassign ptr %pK vdev_id %i\n",
                   ctx, arvif->vdev_id);

        WARN_ON(!arvif->is_started);

        if (vif->type == NL80211_IFTYPE_MONITOR) {
                WARN_ON(!arvif->is_up);

                ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
                if (ret)
                        ath10k_warn(ar, "failed to down monitor vdev %i: %d\n",
                                    arvif->vdev_id, ret);

                arvif->is_up = false;
        }

        ret = ath10k_vdev_stop(arvif);
        if (ret)
                ath10k_warn(ar, "failed to stop vdev %i: %d\n",
                            arvif->vdev_id, ret);

        arvif->is_started = false;

        mutex_unlock(&ar->conf_mutex);
}

static int
ath10k_mac_op_switch_vif_chanctx(struct ieee80211_hw *hw,
                                 struct ieee80211_vif_chanctx_switch *vifs,
                                 int n_vifs,
                                 enum ieee80211_chanctx_switch_mode mode)
{
        struct ath10k *ar = hw->priv;

        mutex_lock(&ar->conf_mutex);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac chanctx switch n_vifs %d mode %d\n",
                   n_vifs, mode);
        ath10k_mac_update_vif_chan(ar, vifs, n_vifs);

        mutex_unlock(&ar->conf_mutex);
        return 0;
}

static void ath10k_mac_op_sta_pre_rcu_remove(struct ieee80211_hw *hw,
                                             struct ieee80211_vif *vif,
                                             struct ieee80211_sta *sta)
{
        struct ath10k *ar;
        struct ath10k_peer *peer;

        ar = hw->priv;

        list_for_each_entry(peer, &ar->peers, list)
                if (peer->sta == sta)
                        peer->removed = true;
}

/* HT MCS parameters with Nss = 1 */
static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss1[] = {
        /* MCS  L20   L40   S20  S40 */
        {0,  { 65,  135,  72,  150} },
        {1,  { 130, 270,  144, 300} },
        {2,  { 195, 405,  217, 450} },
        {3,  { 260, 540,  289, 600} },
        {4,  { 390, 810,  433, 900} },
        {5,  { 520, 1080, 578, 1200} },
        {6,  { 585, 1215, 650, 1350} },
        {7,  { 650, 1350, 722, 1500} }
};

/* HT MCS parameters with Nss = 2 */
static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss2[] = {
        /* MCS  L20    L40   S20   S40 */
        {0,  {130,  270,  144,  300} },
        {1,  {260,  540,  289,  600} },
        {2,  {390,  810,  433,  900} },
        {3,  {520,  1080, 578,  1200} },
        {4,  {780,  1620, 867,  1800} },
        {5,  {1040, 2160, 1156, 2400} },
        {6,  {1170, 2430, 1300, 2700} },
        {7,  {1300, 2700, 1444, 3000} }
};

/* MCS parameters with Nss = 1 */
static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss1[] = {
        /* MCS  L80    S80     L40   S40    L20   S20 */
        {0,  {293,  325},  {135,  150},  {65,   72} },
        {1,  {585,  650},  {270,  300},  {130,  144} },
        {2,  {878,  975},  {405,  450},  {195,  217} },
        {3,  {1170, 1300}, {540,  600},  {260,  289} },
        {4,  {1755, 1950}, {810,  900},  {390,  433} },
        {5,  {2340, 2600}, {1080, 1200}, {520,  578} },
        {6,  {2633, 2925}, {1215, 1350}, {585,  650} },
        {7,  {2925, 3250}, {1350, 1500}, {650,  722} },
        {8,  {3510, 3900}, {1620, 1800}, {780,  867} },
        {9,  {3900, 4333}, {1800, 2000}, {780,  867} }
};

/*MCS parameters with Nss = 2 */
static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss2[] = {
        /* MCS  L80    S80     L40   S40    L20   S20 */
        {0,  {585,  650},  {270,  300},  {130,  144} },
        {1,  {1170, 1300}, {540,  600},  {260,  289} },
        {2,  {1755, 1950}, {810,  900},  {390,  433} },
        {3,  {2340, 2600}, {1080, 1200}, {520,  578} },
        {4,  {3510, 3900}, {1620, 1800}, {780,  867} },
        {5,  {4680, 5200}, {2160, 2400}, {1040, 1156} },
        {6,  {5265, 5850}, {2430, 2700}, {1170, 1300} },
        {7,  {5850, 6500}, {2700, 3000}, {1300, 1444} },
        {8,  {7020, 7800}, {3240, 3600}, {1560, 1733} },
        {9,  {7800, 8667}, {3600, 4000}, {1560, 1733} }
};

static void ath10k_mac_get_rate_flags_ht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs,
                                         u8 *flags, u8 *bw)
{
        struct ath10k_index_ht_data_rate_type *mcs_rate;
        u8 index;
        size_t len_nss1 = ARRAY_SIZE(supported_ht_mcs_rate_nss1);
        size_t len_nss2 = ARRAY_SIZE(supported_ht_mcs_rate_nss2);

        if (mcs >= (len_nss1 + len_nss2)) {
                ath10k_warn(ar, "not supported mcs %d in current rate table", mcs);
                return;
        }

        mcs_rate = (struct ath10k_index_ht_data_rate_type *)
                   ((nss == 1) ? &supported_ht_mcs_rate_nss1 :
                   &supported_ht_mcs_rate_nss2);

        if (mcs >= len_nss1)
                index = mcs - len_nss1;
        else
                index = mcs;

        if (rate == mcs_rate[index].supported_rate[0]) {
                *bw = RATE_INFO_BW_20;
        } else if (rate == mcs_rate[index].supported_rate[1]) {
                *bw |= RATE_INFO_BW_40;
        } else if (rate == mcs_rate[index].supported_rate[2]) {
                *bw |= RATE_INFO_BW_20;
                *flags |= RATE_INFO_FLAGS_SHORT_GI;
        } else if (rate == mcs_rate[index].supported_rate[3]) {
                *bw |= RATE_INFO_BW_40;
                *flags |= RATE_INFO_FLAGS_SHORT_GI;
        } else {
                ath10k_warn(ar, "invalid ht params rate %d 100kbps nss %d mcs %d",
                            rate, nss, mcs);
        }
}

static void ath10k_mac_get_rate_flags_vht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs,
                                          u8 *flags, u8 *bw)
{
        struct ath10k_index_vht_data_rate_type *mcs_rate;

        mcs_rate = (struct ath10k_index_vht_data_rate_type *)
                   ((nss == 1) ? &supported_vht_mcs_rate_nss1 :
                   &supported_vht_mcs_rate_nss2);

        if (rate == mcs_rate[mcs].supported_VHT80_rate[0]) {
                *bw = RATE_INFO_BW_80;
        } else if (rate == mcs_rate[mcs].supported_VHT80_rate[1]) {
                *bw = RATE_INFO_BW_80;
                *flags |= RATE_INFO_FLAGS_SHORT_GI;
        } else if (rate == mcs_rate[mcs].supported_VHT40_rate[0]) {
                *bw = RATE_INFO_BW_40;
        } else if (rate == mcs_rate[mcs].supported_VHT40_rate[1]) {
                *bw = RATE_INFO_BW_40;
                *flags |= RATE_INFO_FLAGS_SHORT_GI;
        } else if (rate == mcs_rate[mcs].supported_VHT20_rate[0]) {
                *bw = RATE_INFO_BW_20;
        } else if (rate == mcs_rate[mcs].supported_VHT20_rate[1]) {
                *bw = RATE_INFO_BW_20;
                *flags |= RATE_INFO_FLAGS_SHORT_GI;
        } else {
                ath10k_warn(ar, "invalid vht params rate %d 100kbps nss %d mcs %d",
                            rate, nss, mcs);
        }
}

static void ath10k_mac_get_rate_flags(struct ath10k *ar, u32 rate,
                                      enum ath10k_phy_mode mode, u8 nss, u8 mcs,
                                      u8 *flags, u8 *bw)
{
        if (mode == ATH10K_PHY_MODE_HT) {
                *flags = RATE_INFO_FLAGS_MCS;
                ath10k_mac_get_rate_flags_ht(ar, rate, nss, mcs, flags, bw);
        } else if (mode == ATH10K_PHY_MODE_VHT) {
                *flags = RATE_INFO_FLAGS_VHT_MCS;
                ath10k_mac_get_rate_flags_vht(ar, rate, nss, mcs, flags, bw);
        }
}

static void ath10k_mac_parse_bitrate(struct ath10k *ar, u32 rate_code,
                                     u32 bitrate_kbps, struct rate_info *rate)
{
        enum ath10k_phy_mode mode = ATH10K_PHY_MODE_LEGACY;
        enum wmi_rate_preamble preamble = WMI_TLV_GET_HW_RC_PREAM_V1(rate_code);
        u8 nss = WMI_TLV_GET_HW_RC_NSS_V1(rate_code) + 1;
        u8 mcs = WMI_TLV_GET_HW_RC_RATE_V1(rate_code);
        u8 flags = 0, bw = 0;

        ath10k_dbg(ar, ATH10K_DBG_MAC, "mac parse rate code 0x%x bitrate %d kbps\n",
                   rate_code, bitrate_kbps);

        if (preamble == WMI_RATE_PREAMBLE_HT)
                mode = ATH10K_PHY_MODE_HT;
        else if (preamble == WMI_RATE_PREAMBLE_VHT)
                mode = ATH10K_PHY_MODE_VHT;

        ath10k_mac_get_rate_flags(ar, bitrate_kbps / 100, mode, nss, mcs, &flags, &bw);

        ath10k_dbg(ar, ATH10K_DBG_MAC,
                   "mac parse bitrate preamble %d mode %d nss %d mcs %d flags %x bw %d\n",
                   preamble, mode, nss, mcs, flags, bw);

        rate->flags = flags;
        rate->bw = bw;
        rate->legacy = bitrate_kbps / 100;
        rate->nss = nss;
        rate->mcs = mcs;
}

static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar,
                                               struct ieee80211_sta *sta,
                                               struct station_info *sinfo)
{
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k_peer *peer;
        unsigned long time_left;
        int ret;

        if (!(ar->hw_params.supports_peer_stats_info &&
              arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA))
                return;

        spin_lock_bh(&ar->data_lock);
        peer = ath10k_peer_find(ar, arsta->arvif->vdev_id, sta->addr);
        spin_unlock_bh(&ar->data_lock);
        if (!peer)
                return;

        reinit_completion(&ar->peer_stats_info_complete);

        ret = ath10k_wmi_request_peer_stats_info(ar,
                                                 arsta->arvif->vdev_id,
                                                 WMI_REQUEST_ONE_PEER_STATS_INFO,
                                                 arsta->arvif->bssid,
                                                 0);
        if (ret && ret != -EOPNOTSUPP) {
                ath10k_warn(ar, "could not request peer stats info: %d\n", ret);
                return;
        }

        time_left = wait_for_completion_timeout(&ar->peer_stats_info_complete, 3 * HZ);
        if (time_left == 0) {
                ath10k_warn(ar, "timed out waiting peer stats info\n");
                return;
        }

        if (arsta->rx_rate_code != 0 && arsta->rx_bitrate_kbps != 0) {
                ath10k_mac_parse_bitrate(ar, arsta->rx_rate_code,
                                         arsta->rx_bitrate_kbps,
                                         &sinfo->rxrate);

                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
                arsta->rx_rate_code = 0;
                arsta->rx_bitrate_kbps = 0;
        }

        if (arsta->tx_rate_code != 0 && arsta->tx_bitrate_kbps != 0) {
                ath10k_mac_parse_bitrate(ar, arsta->tx_rate_code,
                                         arsta->tx_bitrate_kbps,
                                         &sinfo->txrate);

                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
                arsta->tx_rate_code = 0;
                arsta->tx_bitrate_kbps = 0;
        }
}

static void ath10k_sta_statistics(struct ieee80211_hw *hw,
                                  struct ieee80211_vif *vif,
                                  struct ieee80211_sta *sta,
                                  struct station_info *sinfo)
{
        struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
        struct ath10k *ar = arsta->arvif->ar;

        if (!ath10k_peer_stats_enabled(ar))
                return;

        mutex_lock(&ar->conf_mutex);
        ath10k_debug_fw_stats_request(ar);
        mutex_unlock(&ar->conf_mutex);

        sinfo->rx_duration = arsta->rx_duration;
        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);

        if (arsta->txrate.legacy || arsta->txrate.nss) {
                if (arsta->txrate.legacy) {
                        sinfo->txrate.legacy = arsta->txrate.legacy;
                } else {
                        sinfo->txrate.mcs = arsta->txrate.mcs;
                        sinfo->txrate.nss = arsta->txrate.nss;
                        sinfo->txrate.bw = arsta->txrate.bw;
                }
                sinfo->txrate.flags = arsta->txrate.flags;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
        }

        if (ar->htt.disable_tx_comp) {
                sinfo->tx_failed = arsta->tx_failed;
                sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
        }

        sinfo->tx_retries = arsta->tx_retries;
        sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);

        ath10k_mac_sta_get_peer_stats_info(ar, sta, sinfo);
}

static int ath10k_mac_op_set_tid_config(struct ieee80211_hw *hw,
                                        struct ieee80211_vif *vif,
                                        struct ieee80211_sta *sta,
                                        struct cfg80211_tid_config *tid_config)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_mac_iter_tid_conf_data data = {};
        struct wmi_per_peer_per_tid_cfg_arg arg = {};
        int ret, i;

        mutex_lock(&ar->conf_mutex);
        arg.vdev_id = arvif->vdev_id;

        arvif->tids_rst = 0;
        memset(arvif->tid_conf_changed, 0, sizeof(arvif->tid_conf_changed));

        for (i = 0; i < tid_config->n_tid_conf; i++) {
                ret = ath10k_mac_parse_tid_config(ar, sta, vif,
                                                  &tid_config->tid_conf[i],
                                                  &arg);
                if (ret)
                        goto exit;
        }

        ret = 0;

        if (sta)
                goto exit;

        arvif->tids_rst = 0;
        data.curr_vif = vif;
        data.ar = ar;

        ieee80211_iterate_stations_atomic(hw, ath10k_mac_vif_stations_tid_conf,
                                          &data);

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static int ath10k_mac_op_reset_tid_config(struct ieee80211_hw *hw,
                                          struct ieee80211_vif *vif,
                                          struct ieee80211_sta *sta,
                                          u8 tids)
{
        struct ath10k_vif *arvif = (void *)vif->drv_priv;
        struct ath10k_mac_iter_tid_conf_data data = {};
        struct ath10k *ar = hw->priv;
        int ret = 0;

        mutex_lock(&ar->conf_mutex);

        if (sta) {
                arvif->tids_rst = 0;
                ret = ath10k_mac_reset_tid_config(ar, sta, arvif, tids);
                goto exit;
        }

        arvif->tids_rst = tids;
        data.curr_vif = vif;
        data.ar = ar;
        ieee80211_iterate_stations_atomic(hw, ath10k_mac_vif_stations_tid_conf,
                                          &data);

exit:
        mutex_unlock(&ar->conf_mutex);
        return ret;
}

static const struct ieee80211_ops ath10k_ops = {
        .tx                                = ath10k_mac_op_tx,
        .wake_tx_queue                        = ath10k_mac_op_wake_tx_queue,
        .start                                = ath10k_start,
        .stop                                = ath10k_stop,
        .config                                = ath10k_config,
        .add_interface                        = ath10k_add_interface,
        .update_vif_offload                = ath10k_update_vif_offload,
        .remove_interface                = ath10k_remove_interface,
        .configure_filter                = ath10k_configure_filter,
        .bss_info_changed                = ath10k_bss_info_changed,
        .set_coverage_class                = ath10k_mac_op_set_coverage_class,
        .hw_scan                        = ath10k_hw_scan,
        .cancel_hw_scan                        = ath10k_cancel_hw_scan,
        .set_key                        = ath10k_set_key,
        .set_default_unicast_key        = ath10k_set_default_unicast_key,
        .sta_state                        = ath10k_sta_state,
        .sta_set_txpwr                        = ath10k_sta_set_txpwr,
        .conf_tx                        = ath10k_conf_tx,
        .remain_on_channel                = ath10k_remain_on_channel,
        .cancel_remain_on_channel        = ath10k_cancel_remain_on_channel,
        .set_rts_threshold                = ath10k_set_rts_threshold,
        .set_frag_threshold                = ath10k_mac_op_set_frag_threshold,
        .flush                                = ath10k_flush,
        .tx_last_beacon                        = ath10k_tx_last_beacon,
        .set_antenna                        = ath10k_set_antenna,
        .get_antenna                        = ath10k_get_antenna,
        .reconfig_complete                = ath10k_reconfig_complete,
        .get_survey                        = ath10k_get_survey,
        .set_bitrate_mask                = ath10k_mac_op_set_bitrate_mask,
        .sta_rc_update                        = ath10k_sta_rc_update,
        .offset_tsf                        = ath10k_offset_tsf,
        .ampdu_action                        = ath10k_ampdu_action,
        .get_et_sset_count                = ath10k_debug_get_et_sset_count,
        .get_et_stats                        = ath10k_debug_get_et_stats,
        .get_et_strings                        = ath10k_debug_get_et_strings,
        .add_chanctx                        = ath10k_mac_op_add_chanctx,
        .remove_chanctx                        = ath10k_mac_op_remove_chanctx,
        .change_chanctx                        = ath10k_mac_op_change_chanctx,
        .assign_vif_chanctx                = ath10k_mac_op_assign_vif_chanctx,
        .unassign_vif_chanctx                = ath10k_mac_op_unassign_vif_chanctx,
        .switch_vif_chanctx                = ath10k_mac_op_switch_vif_chanctx,
        .sta_pre_rcu_remove                = ath10k_mac_op_sta_pre_rcu_remove,
        .sta_statistics                        = ath10k_sta_statistics,
        .set_tid_config                        = ath10k_mac_op_set_tid_config,
        .reset_tid_config                = ath10k_mac_op_reset_tid_config,

        CFG80211_TESTMODE_CMD(ath10k_tm_cmd)

#ifdef CONFIG_PM
        .suspend                        = ath10k_wow_op_suspend,
        .resume                                = ath10k_wow_op_resume,
        .set_wakeup                        = ath10k_wow_op_set_wakeup,
#endif
#ifdef CONFIG_MAC80211_DEBUGFS
        .sta_add_debugfs                = ath10k_sta_add_debugfs,
#endif
        .set_sar_specs                        = ath10k_mac_set_sar_specs,
};

#define CHAN2G(_channel, _freq, _flags) { \
        .band                        = NL80211_BAND_2GHZ, \
        .hw_value                = (_channel), \
        .center_freq                = (_freq), \
        .flags                        = (_flags), \
        .max_antenna_gain        = 0, \
        .max_power                = 30, \
}

#define CHAN5G(_channel, _freq, _flags) { \
        .band                        = NL80211_BAND_5GHZ, \
        .hw_value                = (_channel), \
        .center_freq                = (_freq), \
        .flags                        = (_flags), \
        .max_antenna_gain        = 0, \
        .max_power                = 30, \
}

static const struct ieee80211_channel ath10k_2ghz_channels[] = {
        CHAN2G(1, 2412, 0),
        CHAN2G(2, 2417, 0),
        CHAN2G(3, 2422, 0),
        CHAN2G(4, 2427, 0),
        CHAN2G(5, 2432, 0),
        CHAN2G(6, 2437, 0),
        CHAN2G(7, 2442, 0),
        CHAN2G(8, 2447, 0),
        CHAN2G(9, 2452, 0),
        CHAN2G(10, 2457, 0),
        CHAN2G(11, 2462, 0),
        CHAN2G(12, 2467, 0),
        CHAN2G(13, 2472, 0),
        CHAN2G(14, 2484, 0),
};

static const struct ieee80211_channel ath10k_5ghz_channels[] = {
        CHAN5G(36, 5180, 0),
        CHAN5G(40, 5200, 0),
        CHAN5G(44, 5220, 0),
        CHAN5G(48, 5240, 0),
        CHAN5G(52, 5260, 0),
        CHAN5G(56, 5280, 0),
        CHAN5G(60, 5300, 0),
        CHAN5G(64, 5320, 0),
        CHAN5G(100, 5500, 0),
        CHAN5G(104, 5520, 0),
        CHAN5G(108, 5540, 0),
        CHAN5G(112, 5560, 0),
        CHAN5G(116, 5580, 0),
        CHAN5G(120, 5600, 0),
        CHAN5G(124, 5620, 0),
        CHAN5G(128, 5640, 0),
        CHAN5G(132, 5660, 0),
        CHAN5G(136, 5680, 0),
        CHAN5G(140, 5700, 0),
        CHAN5G(144, 5720, 0),
        CHAN5G(149, 5745, 0),
        CHAN5G(153, 5765, 0),
        CHAN5G(157, 5785, 0),
        CHAN5G(161, 5805, 0),
        CHAN5G(165, 5825, 0),
        CHAN5G(169, 5845, 0),
        CHAN5G(173, 5865, 0),
        /* If you add more, you may need to change ATH10K_MAX_5G_CHAN */
        /* And you will definitely need to change ATH10K_NUM_CHANS in core.h */
};

struct ath10k *ath10k_mac_create(size_t priv_size)
{
        struct ieee80211_hw *hw;
        struct ieee80211_ops *ops;
        struct ath10k *ar;

        ops = kmemdup(&ath10k_ops, sizeof(ath10k_ops), GFP_KERNEL);
        if (!ops)
                return NULL;

        hw = ieee80211_alloc_hw(sizeof(struct ath10k) + priv_size, ops);
        if (!hw) {
                kfree(ops);
                return NULL;
        }

        ar = hw->priv;
        ar->hw = hw;
        ar->ops = ops;

        return ar;
}

void ath10k_mac_destroy(struct ath10k *ar)
{
        struct ieee80211_ops *ops = ar->ops;

        ieee80211_free_hw(ar->hw);
        kfree(ops);
}

static const struct ieee80211_iface_limit ath10k_if_limits[] = {
        {
                .max        = 8,
                .types        = BIT(NL80211_IFTYPE_STATION)
                        | BIT(NL80211_IFTYPE_P2P_CLIENT)
        },
        {
                .max        = 3,
                .types        = BIT(NL80211_IFTYPE_P2P_GO)
        },
        {
                .max        = 1,
                .types        = BIT(NL80211_IFTYPE_P2P_DEVICE)
        },
        {
                .max        = 7,
                .types        = BIT(NL80211_IFTYPE_AP)
#ifdef CONFIG_MAC80211_MESH
                        | BIT(NL80211_IFTYPE_MESH_POINT)
#endif
        },
};

static const struct ieee80211_iface_limit ath10k_10x_if_limits[] = {
        {
                .max        = 8,
                .types        = BIT(NL80211_IFTYPE_AP)
#ifdef CONFIG_MAC80211_MESH
                        | BIT(NL80211_IFTYPE_MESH_POINT)
#endif
        },
        {
                .max        = 1,
                .types        = BIT(NL80211_IFTYPE_STATION)
        },
};

static const struct ieee80211_iface_combination ath10k_if_comb[] = {
        {
                .limits = ath10k_if_limits,
                .n_limits = ARRAY_SIZE(ath10k_if_limits),
                .max_interfaces = 8,
                .num_different_channels = 1,
                .beacon_int_infra_match = true,
        },
};

static const struct ieee80211_iface_combination ath10k_10x_if_comb[] = {
        {
                .limits = ath10k_10x_if_limits,
                .n_limits = ARRAY_SIZE(ath10k_10x_if_limits),
                .max_interfaces = 8,
                .num_different_channels = 1,
                .beacon_int_infra_match = true,
                .beacon_int_min_gcd = 1,
#ifdef CONFIG_ATH10K_DFS_CERTIFIED
                .radar_detect_widths =        BIT(NL80211_CHAN_WIDTH_20_NOHT) |
                                        BIT(NL80211_CHAN_WIDTH_20) |
                                        BIT(NL80211_CHAN_WIDTH_40) |
                                        BIT(NL80211_CHAN_WIDTH_80),
#endif
        },
};

static const struct ieee80211_iface_limit ath10k_tlv_if_limit[] = {
        {
                .max = 2,
                .types = BIT(NL80211_IFTYPE_STATION),
        },
        {
                .max = 2,
                .types = BIT(NL80211_IFTYPE_AP) |
#ifdef CONFIG_MAC80211_MESH
                         BIT(NL80211_IFTYPE_MESH_POINT) |
#endif
                         BIT(NL80211_IFTYPE_P2P_CLIENT) |
                         BIT(NL80211_IFTYPE_P2P_GO),
        },
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_P2P_DEVICE),
        },
};

static const struct ieee80211_iface_limit ath10k_tlv_qcs_if_limit[] = {
        {
                .max = 2,
                .types = BIT(NL80211_IFTYPE_STATION),
        },
        {
                .max = 2,
                .types = BIT(NL80211_IFTYPE_P2P_CLIENT),
        },
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_AP) |
#ifdef CONFIG_MAC80211_MESH
                         BIT(NL80211_IFTYPE_MESH_POINT) |
#endif
                         BIT(NL80211_IFTYPE_P2P_GO),
        },
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_P2P_DEVICE),
        },
};

static const struct ieee80211_iface_limit ath10k_tlv_if_limit_ibss[] = {
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_STATION),
        },
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_ADHOC),
        },
};

/* FIXME: This is not thoroughly tested. These combinations may over- or
 * underestimate hw/fw capabilities.
 */
static struct ieee80211_iface_combination ath10k_tlv_if_comb[] = {
        {
                .limits = ath10k_tlv_if_limit,
                .num_different_channels = 1,
                .max_interfaces = 4,
                .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit),
        },
        {
                .limits = ath10k_tlv_if_limit_ibss,
                .num_different_channels = 1,
                .max_interfaces = 2,
                .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit_ibss),
        },
};

static struct ieee80211_iface_combination ath10k_tlv_qcs_if_comb[] = {
        {
                .limits = ath10k_tlv_if_limit,
                .num_different_channels = 1,
                .max_interfaces = 4,
                .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit),
        },
        {
                .limits = ath10k_tlv_qcs_if_limit,
                .num_different_channels = 2,
                .max_interfaces = 4,
                .n_limits = ARRAY_SIZE(ath10k_tlv_qcs_if_limit),
        },
        {
                .limits = ath10k_tlv_if_limit_ibss,
                .num_different_channels = 1,
                .max_interfaces = 2,
                .n_limits = ARRAY_SIZE(ath10k_tlv_if_limit_ibss),
        },
};

static const struct ieee80211_iface_limit ath10k_10_4_if_limits[] = {
        {
                .max = 1,
                .types = BIT(NL80211_IFTYPE_STATION),
        },
        {
                .max        = 16,
                .types        = BIT(NL80211_IFTYPE_AP)
#ifdef CONFIG_MAC80211_MESH
                        | BIT(NL80211_IFTYPE_MESH_POINT)
#endif
        },
};

static const struct ieee80211_iface_combination ath10k_10_4_if_comb[] = {
        {
                .limits = ath10k_10_4_if_limits,
                .n_limits = ARRAY_SIZE(ath10k_10_4_if_limits),
                .max_interfaces = 16,
                .num_different_channels = 1,
                .beacon_int_infra_match = true,
                .beacon_int_min_gcd = 1,
#ifdef CONFIG_ATH10K_DFS_CERTIFIED
                .radar_detect_widths =        BIT(NL80211_CHAN_WIDTH_20_NOHT) |
                                        BIT(NL80211_CHAN_WIDTH_20) |
                                        BIT(NL80211_CHAN_WIDTH_40) |
                                        BIT(NL80211_CHAN_WIDTH_80) |
                                        BIT(NL80211_CHAN_WIDTH_80P80) |
                                        BIT(NL80211_CHAN_WIDTH_160),
#endif
        },
};

static const struct
ieee80211_iface_combination ath10k_10_4_bcn_int_if_comb[] = {
        {
                .limits = ath10k_10_4_if_limits,
                .n_limits = ARRAY_SIZE(ath10k_10_4_if_limits),
                .max_interfaces = 16,
                .num_different_channels = 1,
                .beacon_int_infra_match = true,
                .beacon_int_min_gcd = 100,
#ifdef CONFIG_ATH10K_DFS_CERTIFIED
                .radar_detect_widths =  BIT(NL80211_CHAN_WIDTH_20_NOHT) |
                                        BIT(NL80211_CHAN_WIDTH_20) |
                                        BIT(NL80211_CHAN_WIDTH_40) |
                                        BIT(NL80211_CHAN_WIDTH_80) |
                                        BIT(NL80211_CHAN_WIDTH_80P80) |
                                        BIT(NL80211_CHAN_WIDTH_160),
#endif
        },
};

static void ath10k_get_arvif_iter(void *data, u8 *mac,
                                  struct ieee80211_vif *vif)
{
        struct ath10k_vif_iter *arvif_iter = data;
        struct ath10k_vif *arvif = (void *)vif->drv_priv;

        if (arvif->vdev_id == arvif_iter->vdev_id)
                arvif_iter->arvif = arvif;
}

struct ath10k_vif *ath10k_get_arvif(struct ath10k *ar, u32 vdev_id)
{
        struct ath10k_vif_iter arvif_iter;

        memset(&arvif_iter, 0, sizeof(struct ath10k_vif_iter));
        arvif_iter.vdev_id = vdev_id;

        ieee80211_iterate_active_interfaces_atomic(ar->hw,
                                                   ATH10K_ITER_RESUME_FLAGS,
                                                   ath10k_get_arvif_iter,
                                                   &arvif_iter);
        if (!arvif_iter.arvif) {
                ath10k_warn(ar, "No VIF found for vdev %d\n", vdev_id);
                return NULL;
        }

        return arvif_iter.arvif;
}

#define WRD_METHOD "WRDD"
#define WRDD_WIFI  (0x07)

static u32 ath10k_mac_wrdd_get_mcc(struct ath10k *ar, union acpi_object *wrdd)
{
        union acpi_object *mcc_pkg;
        union acpi_object *domain_type;
        union acpi_object *mcc_value;
        u32 i;

        if (wrdd->type != ACPI_TYPE_PACKAGE ||
            wrdd->package.count < 2 ||
            wrdd->package.elements[0].type != ACPI_TYPE_INTEGER ||
            wrdd->package.elements[0].integer.value != 0) {
                ath10k_warn(ar, "ignoring malformed/unsupported wrdd structure\n");
                return 0;
        }

        for (i = 1; i < wrdd->package.count; ++i) {
                mcc_pkg = &wrdd->package.elements[i];

                if (mcc_pkg->type != ACPI_TYPE_PACKAGE)
                        continue;
                if (mcc_pkg->package.count < 2)
                        continue;
                if (mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
                    mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER)
                        continue;

                domain_type = &mcc_pkg->package.elements[0];
                if (domain_type->integer.value != WRDD_WIFI)
                        continue;

                mcc_value = &mcc_pkg->package.elements[1];
                return mcc_value->integer.value;
        }
        return 0;
}

static int ath10k_mac_get_wrdd_regulatory(struct ath10k *ar, u16 *rd)
{
        acpi_handle root_handle;
        acpi_handle handle;
        struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL};
        acpi_status status;
        u32 alpha2_code;
        char alpha2[3];

        root_handle = ACPI_HANDLE(ar->dev);
        if (!root_handle)
                return -EOPNOTSUPP;

        status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle);
        if (ACPI_FAILURE(status)) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "failed to get wrd method %d\n", status);
                return -EIO;
        }

        status = acpi_evaluate_object(handle, NULL, NULL, &wrdd);
        if (ACPI_FAILURE(status)) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "failed to call wrdc %d\n", status);
                return -EIO;
        }

        alpha2_code = ath10k_mac_wrdd_get_mcc(ar, wrdd.pointer);
        kfree(wrdd.pointer);
        if (!alpha2_code)
                return -EIO;

        alpha2[0] = (alpha2_code >> 8) & 0xff;
        alpha2[1] = (alpha2_code >> 0) & 0xff;
        alpha2[2] = '\0';

        ath10k_dbg(ar, ATH10K_DBG_BOOT,
                   "regulatory hint from WRDD (alpha2-code): %s\n", alpha2);

        *rd = ath_regd_find_country_by_name(alpha2);
        if (*rd == 0xffff)
                return -EIO;

        *rd |= COUNTRY_ERD_FLAG;
        return 0;
}

static int ath10k_mac_init_rd(struct ath10k *ar)
{
        int ret;
        u16 rd;

        ret = ath10k_mac_get_wrdd_regulatory(ar, &rd);
        if (ret) {
                ath10k_dbg(ar, ATH10K_DBG_BOOT,
                           "fallback to eeprom programmed regulatory settings\n");
                rd = ar->hw_eeprom_rd;
        }

        ar->ath_common.regulatory.current_rd = rd;
        return 0;
}

int ath10k_mac_register(struct ath10k *ar)
{
        static const u32 cipher_suites[] = {
                WLAN_CIPHER_SUITE_WEP40,
                WLAN_CIPHER_SUITE_WEP104,
                WLAN_CIPHER_SUITE_TKIP,
                WLAN_CIPHER_SUITE_CCMP,

                /* Do not add hardware supported ciphers before this line.
                 * Allow software encryption for all chips. Don't forget to
                 * update n_cipher_suites below.
                 */
                WLAN_CIPHER_SUITE_AES_CMAC,
                WLAN_CIPHER_SUITE_BIP_CMAC_256,
                WLAN_CIPHER_SUITE_BIP_GMAC_128,
                WLAN_CIPHER_SUITE_BIP_GMAC_256,

                /* Only QCA99x0 and QCA4019 variants support GCMP-128, GCMP-256
                 * and CCMP-256 in hardware.
                 */
                WLAN_CIPHER_SUITE_GCMP,
                WLAN_CIPHER_SUITE_GCMP_256,
                WLAN_CIPHER_SUITE_CCMP_256,
        };
        struct ieee80211_supported_band *band;
        void *channels;
        int ret;

        if (!is_valid_ether_addr(ar->mac_addr)) {
                ath10k_warn(ar, "invalid MAC address; choosing random\n");
                eth_random_addr(ar->mac_addr);
        }
        SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr);

        SET_IEEE80211_DEV(ar->hw, ar->dev);

        BUILD_BUG_ON((ARRAY_SIZE(ath10k_2ghz_channels) +
                      ARRAY_SIZE(ath10k_5ghz_channels)) !=
                     ATH10K_NUM_CHANS);

        if (ar->phy_capability & WHAL_WLAN_11G_CAPABILITY) {
                channels = kmemdup(ath10k_2ghz_channels,
                                   sizeof(ath10k_2ghz_channels),
                                   GFP_KERNEL);
                if (!channels) {
                        ret = -ENOMEM;
                        goto err_free;
                }

                band = &ar->mac.sbands[NL80211_BAND_2GHZ];
                band->n_channels = ARRAY_SIZE(ath10k_2ghz_channels);
                band->channels = channels;

                if (ar->hw_params.cck_rate_map_rev2) {
                        band->n_bitrates = ath10k_g_rates_rev2_size;
                        band->bitrates = ath10k_g_rates_rev2;
                } else {
                        band->n_bitrates = ath10k_g_rates_size;
                        band->bitrates = ath10k_g_rates;
                }

                ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = band;
        }

        if (ar->phy_capability & WHAL_WLAN_11A_CAPABILITY) {
                channels = kmemdup(ath10k_5ghz_channels,
                                   sizeof(ath10k_5ghz_channels),
                                   GFP_KERNEL);
                if (!channels) {
                        ret = -ENOMEM;
                        goto err_free;
                }

                band = &ar->mac.sbands[NL80211_BAND_5GHZ];
                band->n_channels = ARRAY_SIZE(ath10k_5ghz_channels);
                band->channels = channels;
                band->n_bitrates = ath10k_a_rates_size;
                band->bitrates = ath10k_a_rates;
                ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = band;
        }

        wiphy_read_of_freq_limits(ar->hw->wiphy);
        ath10k_mac_setup_ht_vht_cap(ar);

        ar->hw->wiphy->interface_modes =
                BIT(NL80211_IFTYPE_STATION) |
                BIT(NL80211_IFTYPE_AP) |
                BIT(NL80211_IFTYPE_MESH_POINT);

        ar->hw->wiphy->available_antennas_rx = ar->cfg_rx_chainmask;
        ar->hw->wiphy->available_antennas_tx = ar->cfg_tx_chainmask;

        if (!test_bit(ATH10K_FW_FEATURE_NO_P2P, ar->normal_mode_fw.fw_file.fw_features))
                ar->hw->wiphy->interface_modes |=
                        BIT(NL80211_IFTYPE_P2P_DEVICE) |
                        BIT(NL80211_IFTYPE_P2P_CLIENT) |
                        BIT(NL80211_IFTYPE_P2P_GO);

        ieee80211_hw_set(ar->hw, SIGNAL_DBM);

        if (!test_bit(ATH10K_FW_FEATURE_NO_PS,
                      ar->running_fw->fw_file.fw_features)) {
                ieee80211_hw_set(ar->hw, SUPPORTS_PS);
                ieee80211_hw_set(ar->hw, SUPPORTS_DYNAMIC_PS);
        }

        ieee80211_hw_set(ar->hw, MFP_CAPABLE);
        ieee80211_hw_set(ar->hw, REPORTS_TX_ACK_STATUS);
        ieee80211_hw_set(ar->hw, HAS_RATE_CONTROL);
        ieee80211_hw_set(ar->hw, AP_LINK_PS);
        ieee80211_hw_set(ar->hw, SPECTRUM_MGMT);
        ieee80211_hw_set(ar->hw, SUPPORT_FAST_XMIT);
        ieee80211_hw_set(ar->hw, CONNECTION_MONITOR);
        ieee80211_hw_set(ar->hw, SUPPORTS_PER_STA_GTK);
        ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF);
        ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA);
        ieee80211_hw_set(ar->hw, QUEUE_CONTROL);
        ieee80211_hw_set(ar->hw, SUPPORTS_TX_FRAG);
        ieee80211_hw_set(ar->hw, REPORTS_LOW_ACK);

        if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags))
                ieee80211_hw_set(ar->hw, SW_CRYPTO_CONTROL);

        ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS;
        ar->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;

        if (ar->ht_cap_info & WMI_HT_CAP_DYNAMIC_SMPS)
                ar->hw->wiphy->features |= NL80211_FEATURE_DYNAMIC_SMPS;

        if (ar->ht_cap_info & WMI_HT_CAP_ENABLED) {
                ieee80211_hw_set(ar->hw, AMPDU_AGGREGATION);
                ieee80211_hw_set(ar->hw, TX_AMPDU_SETUP_IN_HW);
        }

        ar->hw->wiphy->max_scan_ssids = WLAN_SCAN_PARAMS_MAX_SSID;
        ar->hw->wiphy->max_scan_ie_len = WLAN_SCAN_PARAMS_MAX_IE_LEN;

        if (test_bit(WMI_SERVICE_NLO, ar->wmi.svc_map)) {
                ar->hw->wiphy->max_sched_scan_ssids = WMI_PNO_MAX_SUPP_NETWORKS;
                ar->hw->wiphy->max_match_sets = WMI_PNO_MAX_SUPP_NETWORKS;
                ar->hw->wiphy->max_sched_scan_ie_len = WMI_PNO_MAX_IE_LENGTH;
                ar->hw->wiphy->max_sched_scan_plans = WMI_PNO_MAX_SCHED_SCAN_PLANS;
                ar->hw->wiphy->max_sched_scan_plan_interval =
                        WMI_PNO_MAX_SCHED_SCAN_PLAN_INT;
                ar->hw->wiphy->max_sched_scan_plan_iterations =
                        WMI_PNO_MAX_SCHED_SCAN_PLAN_ITRNS;
                ar->hw->wiphy->features |= NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
        }

        ar->hw->vif_data_size = sizeof(struct ath10k_vif);
        ar->hw->sta_data_size = sizeof(struct ath10k_sta);
        ar->hw->txq_data_size = sizeof(struct ath10k_txq);

        ar->hw->max_listen_interval = ATH10K_MAX_HW_LISTEN_INTERVAL;

        if (test_bit(WMI_SERVICE_BEACON_OFFLOAD, ar->wmi.svc_map)) {
                ar->hw->wiphy->flags |= WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD;

                /* Firmware delivers WPS/P2P Probe Requests frames to driver so
                 * that userspace (e.g. wpa_supplicant/hostapd) can generate
                 * correct Probe Responses. This is more of a hack advert..
                 */
                ar->hw->wiphy->probe_resp_offload |=
                        NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS |
                        NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 |
                        NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P;
        }

        if (test_bit(WMI_SERVICE_TDLS, ar->wmi.svc_map) ||
            test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) {
                ar->hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS;
                if (test_bit(WMI_SERVICE_TDLS_WIDER_BANDWIDTH, ar->wmi.svc_map))
                        ieee80211_hw_set(ar->hw, TDLS_WIDER_BW);
        }

        if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
                ieee80211_hw_set(ar->hw, SUPPORTS_TDLS_BUFFER_STA);

        if (ath10k_frame_mode == ATH10K_HW_TXRX_ETHERNET) {
                if (ar->wmi.vdev_param->tx_encap_type !=
                    WMI_VDEV_PARAM_UNSUPPORTED)
                        ieee80211_hw_set(ar->hw, SUPPORTS_TX_ENCAP_OFFLOAD);
        }

        ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
        ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
        ar->hw->wiphy->max_remain_on_channel_duration = 5000;

        ar->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD;
        ar->hw->wiphy->features |= NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE |
                                   NL80211_FEATURE_AP_SCAN;

        ar->hw->wiphy->max_ap_assoc_sta = ar->max_num_stations;

        ret = ath10k_wow_init(ar);
        if (ret) {
                ath10k_warn(ar, "failed to init wow: %d\n", ret);
                goto err_free;
        }

        wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
        wiphy_ext_feature_set(ar->hw->wiphy,
                              NL80211_EXT_FEATURE_SET_SCAN_DWELL);
        wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AQL);

        if (ar->hw_params.mcast_frame_registration)
                wiphy_ext_feature_set(ar->hw->wiphy,
                                      NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS);

        if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map) ||
            test_bit(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, ar->wmi.svc_map))
                wiphy_ext_feature_set(ar->hw->wiphy,
                                      NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT);

        if (ath10k_peer_stats_enabled(ar) ||
            test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map))
                wiphy_ext_feature_set(ar->hw->wiphy,
                                      NL80211_EXT_FEATURE_AIRTIME_FAIRNESS);

        if (test_bit(WMI_SERVICE_RTT_RESPONDER_ROLE, ar->wmi.svc_map))
                wiphy_ext_feature_set(ar->hw->wiphy,
                                      NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER);

        if (test_bit(WMI_SERVICE_TX_PWR_PER_PEER, ar->wmi.svc_map))
                wiphy_ext_feature_set(ar->hw->wiphy,
                                      NL80211_EXT_FEATURE_STA_TX_PWR);

        if (test_bit(WMI_SERVICE_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) {
                ar->hw->wiphy->tid_config_support.vif |=
                                BIT(NL80211_TID_CONFIG_ATTR_NOACK) |
                                BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT) |
                                BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG) |
                                BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL) |
                                BIT(NL80211_TID_CONFIG_ATTR_TX_RATE) |
                                BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE);

                if (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT,
                             ar->wmi.svc_map)) {
                        ar->hw->wiphy->tid_config_support.vif |=
                                BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL);
                }

                ar->hw->wiphy->tid_config_support.peer =
                                ar->hw->wiphy->tid_config_support.vif;
                ar->hw->wiphy->max_data_retry_count = ATH10K_MAX_RETRY_COUNT;
        } else {
                ar->ops->set_tid_config = NULL;
        }
        /*
         * on LL hardware queues are managed entirely by the FW
         * so we only advertise to mac we can do the queues thing
         */
        ar->hw->queues = IEEE80211_MAX_QUEUES;

        /* vdev_ids are used as hw queue numbers. Make sure offchan tx queue is
         * something that vdev_ids can't reach so that we don't stop the queue
         * accidentally.
         */
        ar->hw->offchannel_tx_hw_queue = IEEE80211_MAX_QUEUES - 1;

        switch (ar->running_fw->fw_file.wmi_op_version) {
        case ATH10K_FW_WMI_OP_VERSION_MAIN:
                ar->hw->wiphy->iface_combinations = ath10k_if_comb;
                ar->hw->wiphy->n_iface_combinations =
                        ARRAY_SIZE(ath10k_if_comb);
                ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC);
                break;
        case ATH10K_FW_WMI_OP_VERSION_TLV:
                if (test_bit(WMI_SERVICE_ADAPTIVE_OCS, ar->wmi.svc_map)) {
                        ar->hw->wiphy->iface_combinations =
                                ath10k_tlv_qcs_if_comb;
                        ar->hw->wiphy->n_iface_combinations =
                                ARRAY_SIZE(ath10k_tlv_qcs_if_comb);
                } else {
                        ar->hw->wiphy->iface_combinations = ath10k_tlv_if_comb;
                        ar->hw->wiphy->n_iface_combinations =
                                ARRAY_SIZE(ath10k_tlv_if_comb);
                }
                ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC);
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_1:
        case ATH10K_FW_WMI_OP_VERSION_10_2:
        case ATH10K_FW_WMI_OP_VERSION_10_2_4:
                ar->hw->wiphy->iface_combinations = ath10k_10x_if_comb;
                ar->hw->wiphy->n_iface_combinations =
                        ARRAY_SIZE(ath10k_10x_if_comb);
                break;
        case ATH10K_FW_WMI_OP_VERSION_10_4:
                ar->hw->wiphy->iface_combinations = ath10k_10_4_if_comb;
                ar->hw->wiphy->n_iface_combinations =
                        ARRAY_SIZE(ath10k_10_4_if_comb);
                if (test_bit(WMI_SERVICE_VDEV_DIFFERENT_BEACON_INTERVAL_SUPPORT,
                             ar->wmi.svc_map)) {
                        ar->hw->wiphy->iface_combinations =
                                ath10k_10_4_bcn_int_if_comb;
                        ar->hw->wiphy->n_iface_combinations =
                                ARRAY_SIZE(ath10k_10_4_bcn_int_if_comb);
                }
                break;
        case ATH10K_FW_WMI_OP_VERSION_UNSET:
        case ATH10K_FW_WMI_OP_VERSION_MAX:
                WARN_ON(1);
                ret = -EINVAL;
                goto err_free;
        }

        if (ar->hw_params.dynamic_sar_support)
                ar->hw->wiphy->sar_capa = &ath10k_sar_capa;

        if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags))
                ar->hw->netdev_features = NETIF_F_HW_CSUM;

        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) {
                /* Init ath dfs pattern detector */
                ar->ath_common.debug_mask = ATH_DBG_DFS;
                ar->dfs_detector = dfs_pattern_detector_init(&ar->ath_common,
                                                             NL80211_DFS_UNSET);

                if (!ar->dfs_detector)
                        ath10k_warn(ar, "failed to initialise DFS pattern detector\n");
        }

        ret = ath10k_mac_init_rd(ar);
        if (ret) {
                ath10k_err(ar, "failed to derive regdom: %d\n", ret);
                goto err_dfs_detector_exit;
        }

        /* Disable set_coverage_class for chipsets that do not support it. */
        if (!ar->hw_params.hw_ops->set_coverage_class)
                ar->ops->set_coverage_class = NULL;

        ret = ath_regd_init(&ar->ath_common.regulatory, ar->hw->wiphy,
                            ath10k_reg_notifier);
        if (ret) {
                ath10k_err(ar, "failed to initialise regulatory: %i\n", ret);
                goto err_dfs_detector_exit;
        }

        if (test_bit(WMI_SERVICE_SPOOF_MAC_SUPPORT, ar->wmi.svc_map)) {
                ar->hw->wiphy->features |=
                        NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
        }

        ar->hw->wiphy->cipher_suites = cipher_suites;

        /* QCA988x and QCA6174 family chips do not support CCMP-256, GCMP-128
         * and GCMP-256 ciphers in hardware. Fetch number of ciphers supported
         * from chip specific hw_param table.
         */
        if (!ar->hw_params.n_cipher_suites ||
            ar->hw_params.n_cipher_suites > ARRAY_SIZE(cipher_suites)) {
                ath10k_err(ar, "invalid hw_params.n_cipher_suites %d\n",
                           ar->hw_params.n_cipher_suites);
                ar->hw_params.n_cipher_suites = 8;
        }
        ar->hw->wiphy->n_cipher_suites = ar->hw_params.n_cipher_suites;

        wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);

        ar->hw->weight_multiplier = ATH10K_AIRTIME_WEIGHT_MULTIPLIER;

        ret = ieee80211_register_hw(ar->hw);
        if (ret) {
                ath10k_err(ar, "failed to register ieee80211: %d\n", ret);
                goto err_dfs_detector_exit;
        }

        if (test_bit(WMI_SERVICE_PER_PACKET_SW_ENCRYPT, ar->wmi.svc_map)) {
                ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
                ar->hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
        }

        if (!ath_is_world_regd(&ar->ath_common.reg_world_copy) &&
            !ath_is_world_regd(&ar->ath_common.regulatory)) {
                ret = regulatory_hint(ar->hw->wiphy,
                                      ar->ath_common.regulatory.alpha2);
                if (ret)
                        goto err_unregister;
        }

        return 0;

err_unregister:
        ieee80211_unregister_hw(ar->hw);

err_dfs_detector_exit:
        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector)
                ar->dfs_detector->exit(ar->dfs_detector);

err_free:
        kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels);
        kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels);

        SET_IEEE80211_DEV(ar->hw, NULL);
        return ret;
}

void ath10k_mac_unregister(struct ath10k *ar)
{
        ieee80211_unregister_hw(ar->hw);

        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED) && ar->dfs_detector)
                ar->dfs_detector->exit(ar->dfs_detector);

        kfree(ar->mac.sbands[NL80211_BAND_2GHZ].channels);
        kfree(ar->mac.sbands[NL80211_BAND_5GHZ].channels);

        SET_IEEE80211_DEV(ar->hw, NULL);
}

































    4 








    4 



    4 

    4 
    4 
    2 










    4 
    2 





    4 

    4 
    4 











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 */


#include <linux/init.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include <linux/usb/audio.h>
#include <linux/usb/audio-v2.h>
#include <linux/usb/audio-v3.h>

#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/control.h>
#include <sound/tlv.h>

#include "usbaudio.h"
#include "card.h"
#include "proc.h"
#include "quirks.h"
#include "endpoint.h"
#include "pcm.h"
#include "helper.h"
#include "format.h"
#include "clock.h"
#include "stream.h"
#include "power.h"
#include "media.h"

static void audioformat_free(struct audioformat *fp)
{
        list_del(&fp->list); /* unlink for avoiding double-free */
        kfree(fp->rate_table);
        kfree(fp->chmap);
        kfree(fp);
}

/*
 * free a substream
 */
static void free_substream(struct snd_usb_substream *subs)
{
        struct audioformat *fp, *n;

        if (!subs->num_formats)
                return; /* not initialized */
        list_for_each_entry_safe(fp, n, &subs->fmt_list, list)
                audioformat_free(fp);
        kfree(subs->str_pd);
        snd_media_stream_delete(subs);
}


/*
 * free a usb stream instance
 */
static void snd_usb_audio_stream_free(struct snd_usb_stream *stream)
{
        free_substream(&stream->substream[0]);
        free_substream(&stream->substream[1]);
        list_del(&stream->list);
        kfree(stream);
}

static void snd_usb_audio_pcm_free(struct snd_pcm *pcm)
{
        struct snd_usb_stream *stream = pcm->private_data;
        if (stream) {
                stream->pcm = NULL;
                snd_usb_audio_stream_free(stream);
        }
}

/*
 * initialize the substream instance.
 */

static void snd_usb_init_substream(struct snd_usb_stream *as,
                                   int stream,
                                   struct audioformat *fp,
                                   struct snd_usb_power_domain *pd)
{
        struct snd_usb_substream *subs = &as->substream[stream];

        INIT_LIST_HEAD(&subs->fmt_list);
        spin_lock_init(&subs->lock);

        subs->stream = as;
        subs->direction = stream;
        subs->dev = as->chip->dev;
        subs->txfr_quirk = !!(as->chip->quirk_flags & QUIRK_FLAG_ALIGN_TRANSFER);
        subs->tx_length_quirk = !!(as->chip->quirk_flags & QUIRK_FLAG_TX_LENGTH);
        subs->speed = snd_usb_get_speed(subs->dev);
        subs->pkt_offset_adj = 0;
        subs->stream_offset_adj = 0;

        snd_usb_set_pcm_ops(as->pcm, stream);

        list_add_tail(&fp->list, &subs->fmt_list);
        subs->formats |= fp->formats;
        subs->num_formats++;
        subs->fmt_type = fp->fmt_type;
        subs->ep_num = fp->endpoint;
        if (fp->channels > subs->channels_max)
                subs->channels_max = fp->channels;

        if (pd) {
                subs->str_pd = pd;
                /* Initialize Power Domain to idle status D1 */
                snd_usb_power_domain_set(subs->stream->chip, pd,
                                         UAC3_PD_STATE_D1);
        }

        snd_usb_preallocate_buffer(subs);
}

/* kctl callbacks for usb-audio channel maps */
static int usb_chmap_ctl_info(struct snd_kcontrol *kcontrol,
                              struct snd_ctl_elem_info *uinfo)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        struct snd_usb_substream *subs = info->private_data;

        uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
        uinfo->count = subs->channels_max;
        uinfo->value.integer.min = 0;
        uinfo->value.integer.max = SNDRV_CHMAP_LAST;
        return 0;
}

/* check whether a duplicated entry exists in the audiofmt list */
static bool have_dup_chmap(struct snd_usb_substream *subs,
                           struct audioformat *fp)
{
        struct audioformat *prev = fp;

        list_for_each_entry_continue_reverse(prev, &subs->fmt_list, list) {
                if (prev->chmap &&
                    !memcmp(prev->chmap, fp->chmap, sizeof(*fp->chmap)))
                        return true;
        }
        return false;
}

static int usb_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
                             unsigned int size, unsigned int __user *tlv)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        struct snd_usb_substream *subs = info->private_data;
        struct audioformat *fp;
        unsigned int __user *dst;
        int count = 0;

        if (size < 8)
                return -ENOMEM;
        if (put_user(SNDRV_CTL_TLVT_CONTAINER, tlv))
                return -EFAULT;
        size -= 8;
        dst = tlv + 2;
        list_for_each_entry(fp, &subs->fmt_list, list) {
                int i, ch_bytes;

                if (!fp->chmap)
                        continue;
                if (have_dup_chmap(subs, fp))
                        continue;
                /* copy the entry */
                ch_bytes = fp->chmap->channels * 4;
                if (size < 8 + ch_bytes)
                        return -ENOMEM;
                if (put_user(SNDRV_CTL_TLVT_CHMAP_FIXED, dst) ||
                    put_user(ch_bytes, dst + 1))
                        return -EFAULT;
                dst += 2;
                for (i = 0; i < fp->chmap->channels; i++, dst++) {
                        if (put_user(fp->chmap->map[i], dst))
                                return -EFAULT;
                }

                count += 8 + ch_bytes;
                size -= 8 + ch_bytes;
        }
        if (put_user(count, tlv + 1))
                return -EFAULT;
        return 0;
}

static int usb_chmap_ctl_get(struct snd_kcontrol *kcontrol,
                             struct snd_ctl_elem_value *ucontrol)
{
        struct snd_pcm_chmap *info = snd_kcontrol_chip(kcontrol);
        struct snd_usb_substream *subs = info->private_data;
        struct snd_pcm_chmap_elem *chmap = NULL;
        int i = 0;

        if (subs->cur_audiofmt)
                chmap = subs->cur_audiofmt->chmap;
        if (chmap) {
                for (i = 0; i < chmap->channels; i++)
                        ucontrol->value.integer.value[i] = chmap->map[i];
        }
        for (; i < subs->channels_max; i++)
                ucontrol->value.integer.value[i] = 0;
        return 0;
}

/* create a chmap kctl assigned to the given USB substream */
static int add_chmap(struct snd_pcm *pcm, int stream,
                     struct snd_usb_substream *subs)
{
        struct audioformat *fp;
        struct snd_pcm_chmap *chmap;
        struct snd_kcontrol *kctl;
        int err;

        list_for_each_entry(fp, &subs->fmt_list, list)
                if (fp->chmap)
                        goto ok;
        /* no chmap is found */
        return 0;

 ok:
        err = snd_pcm_add_chmap_ctls(pcm, stream, NULL, 0, 0, &chmap);
        if (err < 0)
                return err;

        /* override handlers */
        chmap->private_data = subs;
        kctl = chmap->kctl;
        kctl->info = usb_chmap_ctl_info;
        kctl->get = usb_chmap_ctl_get;
        kctl->tlv.c = usb_chmap_ctl_tlv;

        return 0;
}

/* convert from USB ChannelConfig bits to ALSA chmap element */
static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits,
                                                int protocol)
{
        static const unsigned int uac1_maps[] = {
                SNDRV_CHMAP_FL,                /* left front */
                SNDRV_CHMAP_FR,                /* right front */
                SNDRV_CHMAP_FC,                /* center front */
                SNDRV_CHMAP_LFE,        /* LFE */
                SNDRV_CHMAP_SL,                /* left surround */
                SNDRV_CHMAP_SR,                /* right surround */
                SNDRV_CHMAP_FLC,        /* left of center */
                SNDRV_CHMAP_FRC,        /* right of center */
                SNDRV_CHMAP_RC,                /* surround */
                SNDRV_CHMAP_SL,                /* side left */
                SNDRV_CHMAP_SR,                /* side right */
                SNDRV_CHMAP_TC,                /* top */
                0 /* terminator */
        };
        static const unsigned int uac2_maps[] = {
                SNDRV_CHMAP_FL,                /* front left */
                SNDRV_CHMAP_FR,                /* front right */
                SNDRV_CHMAP_FC,                /* front center */
                SNDRV_CHMAP_LFE,        /* LFE */
                SNDRV_CHMAP_RL,                /* back left */
                SNDRV_CHMAP_RR,                /* back right */
                SNDRV_CHMAP_FLC,        /* front left of center */
                SNDRV_CHMAP_FRC,        /* front right of center */
                SNDRV_CHMAP_RC,                /* back center */
                SNDRV_CHMAP_SL,                /* side left */
                SNDRV_CHMAP_SR,                /* side right */
                SNDRV_CHMAP_TC,                /* top center */
                SNDRV_CHMAP_TFL,        /* top front left */
                SNDRV_CHMAP_TFC,        /* top front center */
                SNDRV_CHMAP_TFR,        /* top front right */
                SNDRV_CHMAP_TRL,        /* top back left */
                SNDRV_CHMAP_TRC,        /* top back center */
                SNDRV_CHMAP_TRR,        /* top back right */
                SNDRV_CHMAP_TFLC,        /* top front left of center */
                SNDRV_CHMAP_TFRC,        /* top front right of center */
                SNDRV_CHMAP_LLFE,        /* left LFE */
                SNDRV_CHMAP_RLFE,        /* right LFE */
                SNDRV_CHMAP_TSL,        /* top side left */
                SNDRV_CHMAP_TSR,        /* top side right */
                SNDRV_CHMAP_BC,                /* bottom center */
                SNDRV_CHMAP_RLC,        /* back left of center */
                SNDRV_CHMAP_RRC,        /* back right of center */
                0 /* terminator */
        };
        struct snd_pcm_chmap_elem *chmap;
        const unsigned int *maps;
        int c;

        if (channels > ARRAY_SIZE(chmap->map))
                return NULL;

        chmap = kzalloc(sizeof(*chmap), GFP_KERNEL);
        if (!chmap)
                return NULL;

        maps = protocol == UAC_VERSION_2 ? uac2_maps : uac1_maps;
        chmap->channels = channels;
        c = 0;

        if (bits) {
                for (; bits && *maps; maps++, bits >>= 1) {
                        if (bits & 1)
                                chmap->map[c++] = *maps;
                        if (c == chmap->channels)
                                break;
                }
        } else {
                /* If we're missing wChannelConfig, then guess something
                    to make sure the channel map is not skipped entirely */
                if (channels == 1)
                        chmap->map[c++] = SNDRV_CHMAP_MONO;
                else
                        for (; c < channels && *maps; maps++)
                                chmap->map[c++] = *maps;
        }

        for (; c < channels; c++)
                chmap->map[c] = SNDRV_CHMAP_UNKNOWN;

        return chmap;
}

/* UAC3 device stores channels information in Cluster Descriptors */
static struct
snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor
                                                                *cluster)
{
        unsigned int channels = cluster->bNrChannels;
        struct snd_pcm_chmap_elem *chmap;
        void *p = cluster;
        int len, c;

        if (channels > ARRAY_SIZE(chmap->map))
                return NULL;

        chmap = kzalloc(sizeof(*chmap), GFP_KERNEL);
        if (!chmap)
                return NULL;

        len = le16_to_cpu(cluster->wLength);
        c = 0;
        p += sizeof(struct uac3_cluster_header_descriptor);

        while (((p - (void *)cluster) < len) && (c < channels)) {
                struct uac3_cluster_segment_descriptor *cs_desc = p;
                u16 cs_len;
                u8 cs_type;

                cs_len = le16_to_cpu(cs_desc->wLength);
                cs_type = cs_desc->bSegmentType;

                if (cs_type == UAC3_CHANNEL_INFORMATION) {
                        struct uac3_cluster_information_segment_descriptor *is = p;
                        unsigned char map;

                        /*
                         * TODO: this conversion is not complete, update it
                         * after adding UAC3 values to asound.h
                         */
                        switch (is->bChRelationship) {
                        case UAC3_CH_MONO:
                                map = SNDRV_CHMAP_MONO;
                                break;
                        case UAC3_CH_LEFT:
                        case UAC3_CH_FRONT_LEFT:
                        case UAC3_CH_HEADPHONE_LEFT:
                                map = SNDRV_CHMAP_FL;
                                break;
                        case UAC3_CH_RIGHT:
                        case UAC3_CH_FRONT_RIGHT:
                        case UAC3_CH_HEADPHONE_RIGHT:
                                map = SNDRV_CHMAP_FR;
                                break;
                        case UAC3_CH_FRONT_CENTER:
                                map = SNDRV_CHMAP_FC;
                                break;
                        case UAC3_CH_FRONT_LEFT_OF_CENTER:
                                map = SNDRV_CHMAP_FLC;
                                break;
                        case UAC3_CH_FRONT_RIGHT_OF_CENTER:
                                map = SNDRV_CHMAP_FRC;
                                break;
                        case UAC3_CH_SIDE_LEFT:
                                map = SNDRV_CHMAP_SL;
                                break;
                        case UAC3_CH_SIDE_RIGHT:
                                map = SNDRV_CHMAP_SR;
                                break;
                        case UAC3_CH_BACK_LEFT:
                                map = SNDRV_CHMAP_RL;
                                break;
                        case UAC3_CH_BACK_RIGHT:
                                map = SNDRV_CHMAP_RR;
                                break;
                        case UAC3_CH_BACK_CENTER:
                                map = SNDRV_CHMAP_RC;
                                break;
                        case UAC3_CH_BACK_LEFT_OF_CENTER:
                                map = SNDRV_CHMAP_RLC;
                                break;
                        case UAC3_CH_BACK_RIGHT_OF_CENTER:
                                map = SNDRV_CHMAP_RRC;
                                break;
                        case UAC3_CH_TOP_CENTER:
                                map = SNDRV_CHMAP_TC;
                                break;
                        case UAC3_CH_TOP_FRONT_LEFT:
                                map = SNDRV_CHMAP_TFL;
                                break;
                        case UAC3_CH_TOP_FRONT_RIGHT:
                                map = SNDRV_CHMAP_TFR;
                                break;
                        case UAC3_CH_TOP_FRONT_CENTER:
                                map = SNDRV_CHMAP_TFC;
                                break;
                        case UAC3_CH_TOP_FRONT_LOC:
                                map = SNDRV_CHMAP_TFLC;
                                break;
                        case UAC3_CH_TOP_FRONT_ROC:
                                map = SNDRV_CHMAP_TFRC;
                                break;
                        case UAC3_CH_TOP_SIDE_LEFT:
                                map = SNDRV_CHMAP_TSL;
                                break;
                        case UAC3_CH_TOP_SIDE_RIGHT:
                                map = SNDRV_CHMAP_TSR;
                                break;
                        case UAC3_CH_TOP_BACK_LEFT:
                                map = SNDRV_CHMAP_TRL;
                                break;
                        case UAC3_CH_TOP_BACK_RIGHT:
                                map = SNDRV_CHMAP_TRR;
                                break;
                        case UAC3_CH_TOP_BACK_CENTER:
                                map = SNDRV_CHMAP_TRC;
                                break;
                        case UAC3_CH_BOTTOM_CENTER:
                                map = SNDRV_CHMAP_BC;
                                break;
                        case UAC3_CH_LOW_FREQUENCY_EFFECTS:
                                map = SNDRV_CHMAP_LFE;
                                break;
                        case UAC3_CH_LFE_LEFT:
                                map = SNDRV_CHMAP_LLFE;
                                break;
                        case UAC3_CH_LFE_RIGHT:
                                map = SNDRV_CHMAP_RLFE;
                                break;
                        case UAC3_CH_RELATIONSHIP_UNDEFINED:
                        default:
                                map = SNDRV_CHMAP_UNKNOWN;
                                break;
                        }
                        chmap->map[c++] = map;
                }
                p += cs_len;
        }

        if (channels < c)
                pr_err("%s: channel number mismatch\n", __func__);

        chmap->channels = channels;

        for (; c < channels; c++)
                chmap->map[c] = SNDRV_CHMAP_UNKNOWN;

        return chmap;
}

/*
 * add this endpoint to the chip instance.
 * if a stream with the same endpoint already exists, append to it.
 * if not, create a new pcm stream. note, fp is added to the substream
 * fmt_list and will be freed on the chip instance release. do not free
 * fp or do remove it from the substream fmt_list to avoid double-free.
 */
static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip,
                                      int stream,
                                      struct audioformat *fp,
                                      struct snd_usb_power_domain *pd)

{
        struct snd_usb_stream *as;
        struct snd_usb_substream *subs;
        struct snd_pcm *pcm;
        int err;

        list_for_each_entry(as, &chip->pcm_list, list) {
                if (as->fmt_type != fp->fmt_type)
                        continue;
                subs = &as->substream[stream];
                if (subs->ep_num == fp->endpoint) {
                        list_add_tail(&fp->list, &subs->fmt_list);
                        subs->num_formats++;
                        subs->formats |= fp->formats;
                        return 0;
                }
        }

        if (chip->card->registered)
                chip->need_delayed_register = true;

        /* look for an empty stream */
        list_for_each_entry(as, &chip->pcm_list, list) {
                if (as->fmt_type != fp->fmt_type)
                        continue;
                subs = &as->substream[stream];
                if (subs->ep_num)
                        continue;
                err = snd_pcm_new_stream(as->pcm, stream, 1);
                if (err < 0)
                        return err;
                snd_usb_init_substream(as, stream, fp, pd);
                return add_chmap(as->pcm, stream, subs);
        }

        /* create a new pcm */
        as = kzalloc(sizeof(*as), GFP_KERNEL);
        if (!as)
                return -ENOMEM;
        as->pcm_index = chip->pcm_devs;
        as->chip = chip;
        as->fmt_type = fp->fmt_type;
        err = snd_pcm_new(chip->card, "USB Audio", chip->pcm_devs,
                          stream == SNDRV_PCM_STREAM_PLAYBACK ? 1 : 0,
                          stream == SNDRV_PCM_STREAM_PLAYBACK ? 0 : 1,
                          &pcm);
        if (err < 0) {
                kfree(as);
                return err;
        }
        as->pcm = pcm;
        pcm->private_data = as;
        pcm->private_free = snd_usb_audio_pcm_free;
        pcm->info_flags = 0;
        if (chip->pcm_devs > 0)
                sprintf(pcm->name, "USB Audio #%d", chip->pcm_devs);
        else
                strcpy(pcm->name, "USB Audio");

        snd_usb_init_substream(as, stream, fp, pd);

        /*
         * Keep using head insertion for M-Audio Audiophile USB (tm) which has a
         * fix to swap capture stream order in conf/cards/USB-audio.conf
         */
        if (chip->usb_id == USB_ID(0x0763, 0x2003))
                list_add(&as->list, &chip->pcm_list);
        else
                list_add_tail(&as->list, &chip->pcm_list);

        chip->pcm_devs++;

        snd_usb_proc_pcm_format_add(as);

        return add_chmap(pcm, stream, &as->substream[stream]);
}

int snd_usb_add_audio_stream(struct snd_usb_audio *chip,
                             int stream,
                             struct audioformat *fp)
{
        return __snd_usb_add_audio_stream(chip, stream, fp, NULL);
}

static int snd_usb_add_audio_stream_v3(struct snd_usb_audio *chip,
                                       int stream,
                                       struct audioformat *fp,
                                       struct snd_usb_power_domain *pd)
{
        return __snd_usb_add_audio_stream(chip, stream, fp, pd);
}

static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip,
                                         struct usb_host_interface *alts,
                                         int protocol, int iface_no)
{
        /* parsed with a v1 header here. that's ok as we only look at the
         * header first which is the same for both versions */
        struct uac_iso_endpoint_descriptor *csep;
        struct usb_interface_descriptor *altsd = get_iface_desc(alts);
        int attributes = 0;

        csep = snd_usb_find_desc(alts->endpoint[0].extra, alts->endpoint[0].extralen, NULL, USB_DT_CS_ENDPOINT);

        /* Creamware Noah has this descriptor after the 2nd endpoint */
        if (!csep && altsd->bNumEndpoints >= 2)
                csep = snd_usb_find_desc(alts->endpoint[1].extra, alts->endpoint[1].extralen, NULL, USB_DT_CS_ENDPOINT);

        /*
         * If we can't locate the USB_DT_CS_ENDPOINT descriptor in the extra
         * bytes after the first endpoint, go search the entire interface.
         * Some devices have it directly *before* the standard endpoint.
         */
        if (!csep)
                csep = snd_usb_find_desc(alts->extra, alts->extralen, NULL, USB_DT_CS_ENDPOINT);

        if (!csep || csep->bLength < 7 ||
            csep->bDescriptorSubtype != UAC_EP_GENERAL)
                goto error;

        if (protocol == UAC_VERSION_1) {
                attributes = csep->bmAttributes;
        } else if (protocol == UAC_VERSION_2) {
                struct uac2_iso_endpoint_descriptor *csep2 =
                        (struct uac2_iso_endpoint_descriptor *) csep;

                if (csep2->bLength < sizeof(*csep2))
                        goto error;
                attributes = csep->bmAttributes & UAC_EP_CS_ATTR_FILL_MAX;

                /* emulate the endpoint attributes of a v1 device */
                if (csep2->bmControls & UAC2_CONTROL_PITCH)
                        attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
        } else { /* UAC_VERSION_3 */
                struct uac3_iso_endpoint_descriptor *csep3 =
                        (struct uac3_iso_endpoint_descriptor *) csep;

                if (csep3->bLength < sizeof(*csep3))
                        goto error;
                /* emulate the endpoint attributes of a v1 device */
                if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH)
                        attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL;
        }

        return attributes;

 error:
        usb_audio_warn(chip,
                       "%u:%d : no or invalid class specific endpoint descriptor\n",
                       iface_no, altsd->bAlternateSetting);
        return 0;
}

/* find an input terminal descriptor (either UAC1 or UAC2) with the given
 * terminal id
 */
static void *
snd_usb_find_input_terminal_descriptor(struct usb_host_interface *ctrl_iface,
                                       int terminal_id, int protocol)
{
        struct uac2_input_terminal_descriptor *term = NULL;

        while ((term = snd_usb_find_csint_desc(ctrl_iface->extra,
                                               ctrl_iface->extralen,
                                               term, UAC_INPUT_TERMINAL))) {
                if (!snd_usb_validate_audio_desc(term, protocol))
                        continue;
                if (term->bTerminalID == terminal_id)
                        return term;
        }

        return NULL;
}

static void *
snd_usb_find_output_terminal_descriptor(struct usb_host_interface *ctrl_iface,
                                        int terminal_id, int protocol)
{
        /* OK to use with both UAC2 and UAC3 */
        struct uac2_output_terminal_descriptor *term = NULL;

        while ((term = snd_usb_find_csint_desc(ctrl_iface->extra,
                                               ctrl_iface->extralen,
                                               term, UAC_OUTPUT_TERMINAL))) {
                if (!snd_usb_validate_audio_desc(term, protocol))
                        continue;
                if (term->bTerminalID == terminal_id)
                        return term;
        }

        return NULL;
}

static struct audioformat *
audio_format_alloc_init(struct snd_usb_audio *chip,
                       struct usb_host_interface *alts,
                       int protocol, int iface_no, int altset_idx,
                       int altno, int num_channels, int clock)
{
        struct audioformat *fp;

        fp = kzalloc(sizeof(*fp), GFP_KERNEL);
        if (!fp)
                return NULL;

        fp->iface = iface_no;
        fp->altsetting = altno;
        fp->altset_idx = altset_idx;
        fp->endpoint = get_endpoint(alts, 0)->bEndpointAddress;
        fp->ep_attr = get_endpoint(alts, 0)->bmAttributes;
        fp->datainterval = snd_usb_parse_datainterval(chip, alts);
        fp->protocol = protocol;
        fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
        fp->channels = num_channels;
        if (snd_usb_get_speed(chip->dev) == USB_SPEED_HIGH)
                fp->maxpacksize = (((fp->maxpacksize >> 11) & 3) + 1)
                                * (fp->maxpacksize & 0x7ff);
        fp->clock = clock;
        INIT_LIST_HEAD(&fp->list);

        return fp;
}

static struct audioformat *
snd_usb_get_audioformat_uac12(struct snd_usb_audio *chip,
                              struct usb_host_interface *alts,
                              int protocol, int iface_no, int altset_idx,
                              int altno, int stream, int bm_quirk)
{
        struct usb_device *dev = chip->dev;
        struct uac_format_type_i_continuous_descriptor *fmt;
        unsigned int num_channels = 0, chconfig = 0;
        struct audioformat *fp;
        int clock = 0;
        u64 format;

        /* get audio formats */
        if (protocol == UAC_VERSION_1) {
                struct uac1_as_header_descriptor *as =
                        snd_usb_find_csint_desc(alts->extra, alts->extralen,
                                                NULL, UAC_AS_GENERAL);
                struct uac_input_terminal_descriptor *iterm;

                if (!as) {
                        dev_err(&dev->dev,
                                "%u:%d : UAC_AS_GENERAL descriptor not found\n",
                                iface_no, altno);
                        return NULL;
                }

                if (as->bLength < sizeof(*as)) {
                        dev_err(&dev->dev,
                                "%u:%d : invalid UAC_AS_GENERAL desc\n",
                                iface_no, altno);
                        return NULL;
                }

                format = le16_to_cpu(as->wFormatTag); /* remember the format value */

                iterm = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
                                                               as->bTerminalLink,
                                                               protocol);
                if (iterm) {
                        num_channels = iterm->bNrChannels;
                        chconfig = le16_to_cpu(iterm->wChannelConfig);
                }
        } else { /* UAC_VERSION_2 */
                struct uac2_input_terminal_descriptor *input_term;
                struct uac2_output_terminal_descriptor *output_term;
                struct uac2_as_header_descriptor *as =
                        snd_usb_find_csint_desc(alts->extra, alts->extralen,
                                                NULL, UAC_AS_GENERAL);

                if (!as) {
                        dev_err(&dev->dev,
                                "%u:%d : UAC_AS_GENERAL descriptor not found\n",
                                iface_no, altno);
                        return NULL;
                }

                if (as->bLength < sizeof(*as)) {
                        dev_err(&dev->dev,
                                "%u:%d : invalid UAC_AS_GENERAL desc\n",
                                iface_no, altno);
                        return NULL;
                }

                num_channels = as->bNrChannels;
                format = le32_to_cpu(as->bmFormats);
                chconfig = le32_to_cpu(as->bmChannelConfig);

                /*
                 * lookup the terminal associated to this interface
                 * to extract the clock
                 */
                input_term = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
                                                                    as->bTerminalLink,
                                                                    protocol);
                if (input_term) {
                        clock = input_term->bCSourceID;
                        if (!chconfig && (num_channels == input_term->bNrChannels))
                                chconfig = le32_to_cpu(input_term->bmChannelConfig);
                        goto found_clock;
                }

                output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
                                                                      as->bTerminalLink,
                                                                      protocol);
                if (output_term) {
                        clock = output_term->bCSourceID;
                        goto found_clock;
                }

                dev_err(&dev->dev,
                        "%u:%d : bogus bTerminalLink %d\n",
                        iface_no, altno, as->bTerminalLink);
                return NULL;
        }

found_clock:
        /* get format type */
        fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen,
                                      NULL, UAC_FORMAT_TYPE);
        if (!fmt) {
                dev_err(&dev->dev,
                        "%u:%d : no UAC_FORMAT_TYPE desc\n",
                        iface_no, altno);
                return NULL;
        }
        if (((protocol == UAC_VERSION_1) && (fmt->bLength < 8))
                        || ((protocol == UAC_VERSION_2) &&
                                        (fmt->bLength < 6))) {
                dev_err(&dev->dev,
                        "%u:%d : invalid UAC_FORMAT_TYPE desc\n",
                        iface_no, altno);
                return NULL;
        }

        /*
         * Blue Microphones workaround: The last altsetting is
         * identical with the previous one, except for a larger
         * packet size, but is actually a mislabeled two-channel
         * setting; ignore it.
         *
         * Part 2: analyze quirk flag and format
         */
        if (bm_quirk && fmt->bNrChannels == 1 && fmt->bSubframeSize == 2)
                return NULL;

        fp = audio_format_alloc_init(chip, alts, protocol, iface_no,
                                     altset_idx, altno, num_channels, clock);
        if (!fp)
                return ERR_PTR(-ENOMEM);

        fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol,
                                                       iface_no);

        /* some quirks for attributes here */
        snd_usb_audioformat_attributes_quirk(chip, fp, stream);

        /* ok, let's parse further... */
        if (snd_usb_parse_audio_format(chip, fp, format,
                                        fmt, stream) < 0) {
                audioformat_free(fp);
                return NULL;
        }

        /* Create chmap */
        if (fp->channels != num_channels)
                chconfig = 0;

        fp->chmap = convert_chmap(fp->channels, chconfig, protocol);

        return fp;
}

static struct audioformat *
snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip,
                             struct usb_host_interface *alts,
                             struct snd_usb_power_domain **pd_out,
                             int iface_no, int altset_idx,
                             int altno, int stream)
{
        struct usb_device *dev = chip->dev;
        struct uac3_input_terminal_descriptor *input_term;
        struct uac3_output_terminal_descriptor *output_term;
        struct uac3_cluster_header_descriptor *cluster;
        struct uac3_as_header_descriptor *as = NULL;
        struct uac3_hc_descriptor_header hc_header;
        struct snd_pcm_chmap_elem *chmap;
        struct snd_usb_power_domain *pd;
        unsigned char badd_profile;
        u64 badd_formats = 0;
        unsigned int num_channels;
        struct audioformat *fp;
        u16 cluster_id, wLength;
        int clock = 0;
        int err;

        badd_profile = chip->badd_profile;

        if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
                unsigned int maxpacksize =
                        le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);

                switch (maxpacksize) {
                default:
                        dev_err(&dev->dev,
                                "%u:%d : incorrect wMaxPacketSize for BADD profile\n",
                                iface_no, altno);
                        return NULL;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16:
                        badd_formats = SNDRV_PCM_FMTBIT_S16_LE;
                        num_channels = 1;
                        break;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24:
                        badd_formats = SNDRV_PCM_FMTBIT_S24_3LE;
                        num_channels = 1;
                        break;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16:
                        badd_formats = SNDRV_PCM_FMTBIT_S16_LE;
                        num_channels = 2;
                        break;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24:
                        badd_formats = SNDRV_PCM_FMTBIT_S24_3LE;
                        num_channels = 2;
                        break;
                }

                chmap = kzalloc(sizeof(*chmap), GFP_KERNEL);
                if (!chmap)
                        return ERR_PTR(-ENOMEM);

                if (num_channels == 1) {
                        chmap->map[0] = SNDRV_CHMAP_MONO;
                } else {
                        chmap->map[0] = SNDRV_CHMAP_FL;
                        chmap->map[1] = SNDRV_CHMAP_FR;
                }

                chmap->channels = num_channels;
                clock = UAC3_BADD_CS_ID9;
                goto found_clock;
        }

        as = snd_usb_find_csint_desc(alts->extra, alts->extralen,
                                     NULL, UAC_AS_GENERAL);
        if (!as) {
                dev_err(&dev->dev,
                        "%u:%d : UAC_AS_GENERAL descriptor not found\n",
                        iface_no, altno);
                return NULL;
        }

        if (as->bLength < sizeof(*as)) {
                dev_err(&dev->dev,
                        "%u:%d : invalid UAC_AS_GENERAL desc\n",
                        iface_no, altno);
                return NULL;
        }

        cluster_id = le16_to_cpu(as->wClusterDescrID);
        if (!cluster_id) {
                dev_err(&dev->dev,
                        "%u:%d : no cluster descriptor\n",
                        iface_no, altno);
                return NULL;
        }

        /*
         * Get number of channels and channel map through
         * High Capability Cluster Descriptor
         *
         * First step: get High Capability header and
         * read size of Cluster Descriptor
         */
        err = snd_usb_ctl_msg(chip->dev,
                        usb_rcvctrlpipe(chip->dev, 0),
                        UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
                        USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                        cluster_id,
                        snd_usb_ctrl_intf(chip),
                        &hc_header, sizeof(hc_header));
        if (err < 0)
                return ERR_PTR(err);
        else if (err != sizeof(hc_header)) {
                dev_err(&dev->dev,
                        "%u:%d : can't get High Capability descriptor\n",
                        iface_no, altno);
                return ERR_PTR(-EIO);
        }

        /*
         * Second step: allocate needed amount of memory
         * and request Cluster Descriptor
         */
        wLength = le16_to_cpu(hc_header.wLength);
        cluster = kzalloc(wLength, GFP_KERNEL);
        if (!cluster)
                return ERR_PTR(-ENOMEM);
        err = snd_usb_ctl_msg(chip->dev,
                        usb_rcvctrlpipe(chip->dev, 0),
                        UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
                        USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                        cluster_id,
                        snd_usb_ctrl_intf(chip),
                        cluster, wLength);
        if (err < 0) {
                kfree(cluster);
                return ERR_PTR(err);
        } else if (err != wLength) {
                dev_err(&dev->dev,
                        "%u:%d : can't get Cluster Descriptor\n",
                        iface_no, altno);
                kfree(cluster);
                return ERR_PTR(-EIO);
        }

        num_channels = cluster->bNrChannels;
        chmap = convert_chmap_v3(cluster);
        kfree(cluster);

        /*
         * lookup the terminal associated to this interface
         * to extract the clock
         */
        input_term = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
                                                            as->bTerminalLink,
                                                            UAC_VERSION_3);
        if (input_term) {
                clock = input_term->bCSourceID;
                goto found_clock;
        }

        output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
                                                              as->bTerminalLink,
                                                              UAC_VERSION_3);
        if (output_term) {
                clock = output_term->bCSourceID;
                goto found_clock;
        }

        dev_err(&dev->dev, "%u:%d : bogus bTerminalLink %d\n",
                        iface_no, altno, as->bTerminalLink);
        kfree(chmap);
        return NULL;

found_clock:
        fp = audio_format_alloc_init(chip, alts, UAC_VERSION_3, iface_no,
                                     altset_idx, altno, num_channels, clock);
        if (!fp) {
                kfree(chmap);
                return ERR_PTR(-ENOMEM);
        }

        fp->chmap = chmap;

        if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
                fp->attributes = 0; /* No attributes */

                fp->fmt_type = UAC_FORMAT_TYPE_I;
                fp->formats = badd_formats;

                fp->nr_rates = 0;        /* SNDRV_PCM_RATE_CONTINUOUS */
                fp->rate_min = UAC3_BADD_SAMPLING_RATE;
                fp->rate_max = UAC3_BADD_SAMPLING_RATE;
                fp->rates = SNDRV_PCM_RATE_CONTINUOUS;

                pd = kzalloc(sizeof(*pd), GFP_KERNEL);
                if (!pd) {
                        audioformat_free(fp);
                        return NULL;
                }
                pd->pd_id = (stream == SNDRV_PCM_STREAM_PLAYBACK) ?
                                        UAC3_BADD_PD_ID10 : UAC3_BADD_PD_ID11;
                pd->pd_d1d0_rec = UAC3_BADD_PD_RECOVER_D1D0;
                pd->pd_d2d0_rec = UAC3_BADD_PD_RECOVER_D2D0;

        } else {
                fp->attributes = parse_uac_endpoint_attributes(chip, alts,
                                                               UAC_VERSION_3,
                                                               iface_no);

                pd = snd_usb_find_power_domain(chip->ctrl_intf,
                                               as->bTerminalLink);

                /* ok, let's parse further... */
                if (snd_usb_parse_audio_format_v3(chip, fp, as, stream) < 0) {
                        kfree(pd);
                        audioformat_free(fp);
                        return NULL;
                }
        }

        if (pd)
                *pd_out = pd;

        return fp;
}

static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
                                           int iface_no,
                                           bool *has_non_pcm, bool non_pcm)
{
        struct usb_device *dev;
        struct usb_interface *iface;
        struct usb_host_interface *alts;
        struct usb_interface_descriptor *altsd;
        int i, altno, err, stream;
        struct audioformat *fp = NULL;
        struct snd_usb_power_domain *pd = NULL;
        bool set_iface_first;
        int num, protocol;

        dev = chip->dev;

        /* parse the interface's altsettings */
        iface = usb_ifnum_to_if(dev, iface_no);

        num = iface->num_altsetting;

        /*
         * Dallas DS4201 workaround: It presents 5 altsettings, but the last
         * one misses syncpipe, and does not produce any sound.
         */
        if (chip->usb_id == USB_ID(0x04fa, 0x4201) && num >= 4)
                num = 4;

        for (i = 0; i < num; i++) {
                alts = &iface->altsetting[i];
                altsd = get_iface_desc(alts);
                protocol = altsd->bInterfaceProtocol;
                /* skip invalid one */
                if (((altsd->bInterfaceClass != USB_CLASS_AUDIO ||
                      (altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING &&
                       altsd->bInterfaceSubClass != USB_SUBCLASS_VENDOR_SPEC)) &&
                     altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
                    altsd->bNumEndpoints < 1 ||
                    le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) == 0)
                        continue;
                /* must be isochronous */
                if ((get_endpoint(alts, 0)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) !=
                    USB_ENDPOINT_XFER_ISOC)
                        continue;
                /* check direction */
                stream = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN) ?
                        SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
                altno = altsd->bAlternateSetting;

                if (snd_usb_apply_interface_quirk(chip, iface_no, altno))
                        continue;

                /*
                 * Roland audio streaming interfaces are marked with protocols
                 * 0/1/2, but are UAC 1 compatible.
                 */
                if (USB_ID_VENDOR(chip->usb_id) == 0x0582 &&
                    altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC &&
                    protocol <= 2)
                        protocol = UAC_VERSION_1;

                switch (protocol) {
                default:
                        dev_dbg(&dev->dev, "%u:%d: unknown interface protocol %#02x, assuming v1\n",
                                iface_no, altno, protocol);
                        protocol = UAC_VERSION_1;
                        fallthrough;
                case UAC_VERSION_1:
                case UAC_VERSION_2: {
                        int bm_quirk = 0;

                        /*
                         * Blue Microphones workaround: The last altsetting is
                         * identical with the previous one, except for a larger
                         * packet size, but is actually a mislabeled two-channel
                         * setting; ignore it.
                         *
                         * Part 1: prepare quirk flag
                         */
                        if (altno == 2 && num == 3 &&
                            fp && fp->altsetting == 1 && fp->channels == 1 &&
                            fp->formats == SNDRV_PCM_FMTBIT_S16_LE &&
                            protocol == UAC_VERSION_1 &&
                            le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) ==
                                                        fp->maxpacksize * 2)
                                bm_quirk = 1;

                        fp = snd_usb_get_audioformat_uac12(chip, alts, protocol,
                                                           iface_no, i, altno,
                                                           stream, bm_quirk);
                        break;
                }
                case UAC_VERSION_3:
                        fp = snd_usb_get_audioformat_uac3(chip, alts, &pd,
                                                iface_no, i, altno, stream);
                        break;
                }

                if (!fp)
                        continue;
                else if (IS_ERR(fp))
                        return PTR_ERR(fp);

                if (fp->fmt_type != UAC_FORMAT_TYPE_I)
                        *has_non_pcm = true;
                if ((fp->fmt_type == UAC_FORMAT_TYPE_I) == non_pcm) {
                        audioformat_free(fp);
                        kfree(pd);
                        fp = NULL;
                        pd = NULL;
                        continue;
                }

                snd_usb_audioformat_set_sync_ep(chip, fp);

                dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint);
                if (protocol == UAC_VERSION_3)
                        err = snd_usb_add_audio_stream_v3(chip, stream, fp, pd);
                else
                        err = snd_usb_add_audio_stream(chip, stream, fp);

                if (err < 0) {
                        audioformat_free(fp);
                        kfree(pd);
                        return err;
                }

                /* add endpoints */
                err = snd_usb_add_endpoint(chip, fp->endpoint,
                                           SND_USB_ENDPOINT_TYPE_DATA);
                if (err < 0)
                        return err;

                if (fp->sync_ep) {
                        err = snd_usb_add_endpoint(chip, fp->sync_ep,
                                                   fp->implicit_fb ?
                                                   SND_USB_ENDPOINT_TYPE_DATA :
                                                   SND_USB_ENDPOINT_TYPE_SYNC);
                        if (err < 0)
                                return err;
                }

                set_iface_first = false;
                if (protocol == UAC_VERSION_1 ||
                    (chip->quirk_flags & QUIRK_FLAG_SET_IFACE_FIRST))
                        set_iface_first = true;

                /* try to set the interface... */
                usb_set_interface(chip->dev, iface_no, 0);
                if (set_iface_first)
                        usb_set_interface(chip->dev, iface_no, altno);
                snd_usb_init_pitch(chip, fp);
                snd_usb_init_sample_rate(chip, fp, fp->rate_max);
                if (!set_iface_first)
                        usb_set_interface(chip->dev, iface_no, altno);
        }
        return 0;
}

int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no)
{
        int err;
        bool has_non_pcm = false;

        /* parse PCM formats */
        err = __snd_usb_parse_audio_interface(chip, iface_no, &has_non_pcm, false);
        if (err < 0)
                return err;

        if (has_non_pcm) {
                /* parse non-PCM formats */
                err = __snd_usb_parse_audio_interface(chip, iface_no, &has_non_pcm, true);
                if (err < 0)
                        return err;
        }

        return 0;
}




































































































































































































   17 


































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __SOUND_CORE_H
#define __SOUND_CORE_H

/*
 *  Main header file for the ALSA driver
 *  Copyright (c) 1994-2001 by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/device.h>
#include <linux/sched.h>                /* wake_up() */
#include <linux/mutex.h>                /* struct mutex */
#include <linux/rwsem.h>                /* struct rw_semaphore */
#include <linux/pm.h>                        /* pm_message_t */
#include <linux/stringify.h>
#include <linux/printk.h>
#include <linux/xarray.h>

/* number of supported soundcards */
#ifdef CONFIG_SND_DYNAMIC_MINORS
#define SNDRV_CARDS CONFIG_SND_MAX_CARDS
#else
#define SNDRV_CARDS 8                /* don't change - minor numbers */
#endif

#define CONFIG_SND_MAJOR        116        /* standard configuration */

/* forward declarations */
struct pci_dev;
struct module;
struct completion;

/* device allocation stuff */

/* type of the object used in snd_device_*()
 * this also defines the calling order
 */
enum snd_device_type {
        SNDRV_DEV_LOWLEVEL,
        SNDRV_DEV_INFO,
        SNDRV_DEV_BUS,
        SNDRV_DEV_CODEC,
        SNDRV_DEV_PCM,
        SNDRV_DEV_COMPRESS,
        SNDRV_DEV_RAWMIDI,
        SNDRV_DEV_TIMER,
        SNDRV_DEV_SEQUENCER,
        SNDRV_DEV_HWDEP,
        SNDRV_DEV_JACK,
        SNDRV_DEV_CONTROL,        /* NOTE: this must be the last one */
};

enum snd_device_state {
        SNDRV_DEV_BUILD,
        SNDRV_DEV_REGISTERED,
        SNDRV_DEV_DISCONNECTED,
};

struct snd_device;

struct snd_device_ops {
        int (*dev_free)(struct snd_device *dev);
        int (*dev_register)(struct snd_device *dev);
        int (*dev_disconnect)(struct snd_device *dev);
};

struct snd_device {
        struct list_head list;                /* list of registered devices */
        struct snd_card *card;                /* card which holds this device */
        enum snd_device_state state;        /* state of the device */
        enum snd_device_type type;        /* device type */
        void *device_data;                /* device structure */
        const struct snd_device_ops *ops;        /* operations */
};

#define snd_device(n) list_entry(n, struct snd_device, list)

/* main structure for soundcard */

struct snd_card {
        int number;                        /* number of soundcard (index to
                                                                snd_cards) */

        char id[16];                        /* id string of this card */
        char driver[16];                /* driver name */
        char shortname[32];                /* short name of this soundcard */
        char longname[80];                /* name of this soundcard */
        char irq_descr[32];                /* Interrupt description */
        char mixername[80];                /* mixer name */
        char components[128];                /* card components delimited with
                                                                space */
        struct module *module;                /* top-level module */

        void *private_data;                /* private data for soundcard */
        void (*private_free) (struct snd_card *card); /* callback for freeing of
                                                                private data */
        struct list_head devices;        /* devices */

        struct device *ctl_dev;                /* control device */
        unsigned int last_numid;        /* last used numeric ID */
        struct rw_semaphore controls_rwsem;        /* controls lock (list and values) */
        rwlock_t ctl_files_rwlock;        /* ctl_files list lock */
        int controls_count;                /* count of all controls */
        size_t user_ctl_alloc_size;        // current memory allocation by user controls.
        struct list_head controls;        /* all controls for this card */
        struct list_head ctl_files;        /* active control files */
#ifdef CONFIG_SND_CTL_FAST_LOOKUP
        struct xarray ctl_numids;        /* hash table for numids */
        struct xarray ctl_hash;                /* hash table for ctl id matching */
        bool ctl_hash_collision;        /* ctl_hash collision seen? */
#endif

        struct snd_info_entry *proc_root;        /* root for soundcard specific files */
        struct proc_dir_entry *proc_root_link;        /* number link to real id */

        struct list_head files_list;        /* all files associated to this card */
        struct snd_shutdown_f_ops *s_f_ops; /* file operations in the shutdown
                                                                state */
        spinlock_t files_lock;                /* lock the files for this card */
        int shutdown;                        /* this card is going down */
        struct completion *release_completion;
        struct device *dev;                /* device assigned to this card */
        struct device card_dev;                /* cardX object for sysfs */
        const struct attribute_group *dev_groups[4]; /* assigned sysfs attr */
        bool registered;                /* card_dev is registered? */
        bool managed;                        /* managed via devres */
        bool releasing;                        /* during card free process */
        int sync_irq;                        /* assigned irq, used for PCM sync */
        wait_queue_head_t remove_sleep;

        size_t total_pcm_alloc_bytes;        /* total amount of allocated buffers */
        struct mutex memory_mutex;        /* protection for the above */
#ifdef CONFIG_SND_DEBUG
        struct dentry *debugfs_root;    /* debugfs root for card */
#endif

#ifdef CONFIG_PM
        unsigned int power_state;        /* power state */
        atomic_t power_ref;
        wait_queue_head_t power_sleep;
        wait_queue_head_t power_ref_sleep;
#endif

#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
        struct snd_mixer_oss *mixer_oss;
        int mixer_oss_change_count;
#endif
};

#define dev_to_snd_card(p)        container_of(p, struct snd_card, card_dev)

#ifdef CONFIG_PM
static inline unsigned int snd_power_get_state(struct snd_card *card)
{
        return READ_ONCE(card->power_state);
}

static inline void snd_power_change_state(struct snd_card *card, unsigned int state)
{
        WRITE_ONCE(card->power_state, state);
        wake_up(&card->power_sleep);
}

/**
 * snd_power_ref - Take the reference count for power control
 * @card: sound card object
 *
 * The power_ref reference of the card is used for managing to block
 * the snd_power_sync_ref() operation.  This function increments the reference.
 * The counterpart snd_power_unref() has to be called appropriately later.
 */
static inline void snd_power_ref(struct snd_card *card)
{
        atomic_inc(&card->power_ref);
}

/**
 * snd_power_unref - Release the reference count for power control
 * @card: sound card object
 */
static inline void snd_power_unref(struct snd_card *card)
{
        if (atomic_dec_and_test(&card->power_ref))
                wake_up(&card->power_ref_sleep);
}

/**
 * snd_power_sync_ref - wait until the card power_ref is freed
 * @card: sound card object
 *
 * This function is used to synchronize with the pending power_ref being
 * released.
 */
static inline void snd_power_sync_ref(struct snd_card *card)
{
        wait_event(card->power_ref_sleep, !atomic_read(&card->power_ref));
}

/* init.c */
int snd_power_wait(struct snd_card *card);
int snd_power_ref_and_wait(struct snd_card *card);

#else /* ! CONFIG_PM */

static inline int snd_power_wait(struct snd_card *card) { return 0; }
static inline void snd_power_ref(struct snd_card *card) {}
static inline void snd_power_unref(struct snd_card *card) {}
static inline int snd_power_ref_and_wait(struct snd_card *card) { return 0; }
static inline void snd_power_sync_ref(struct snd_card *card) {}
#define snd_power_get_state(card)        ({ (void)(card); SNDRV_CTL_POWER_D0; })
#define snd_power_change_state(card, state)        do { (void)(card); } while (0)

#endif /* CONFIG_PM */

struct snd_minor {
        int type;                        /* SNDRV_DEVICE_TYPE_XXX */
        int card;                        /* card number */
        int device;                        /* device number */
        const struct file_operations *f_ops;        /* file operations */
        void *private_data;                /* private data for f_ops->open */
        struct device *dev;                /* device for sysfs */
        struct snd_card *card_ptr;        /* assigned card instance */
};

/* return a device pointer linked to each sound device as a parent */
static inline struct device *snd_card_get_device_link(struct snd_card *card)
{
        return card ? &card->card_dev : NULL;
}

/* sound.c */

extern int snd_major;
extern int snd_ecards_limit;
extern const struct class sound_class;
#ifdef CONFIG_SND_DEBUG
extern struct dentry *sound_debugfs_root;
#endif

void snd_request_card(int card);

int snd_device_alloc(struct device **dev_p, struct snd_card *card);

int snd_register_device(int type, struct snd_card *card, int dev,
                        const struct file_operations *f_ops,
                        void *private_data, struct device *device);
int snd_unregister_device(struct device *dev);
void *snd_lookup_minor_data(unsigned int minor, int type);

#ifdef CONFIG_SND_OSSEMUL
int snd_register_oss_device(int type, struct snd_card *card, int dev,
                            const struct file_operations *f_ops, void *private_data);
int snd_unregister_oss_device(int type, struct snd_card *card, int dev);
void *snd_lookup_oss_minor_data(unsigned int minor, int type);
#endif

int snd_minor_info_init(void);

/* sound_oss.c */

#ifdef CONFIG_SND_OSSEMUL
int snd_minor_info_oss_init(void);
#else
static inline int snd_minor_info_oss_init(void) { return 0; }
#endif

/* memory.c */

int copy_to_user_fromio(void __user *dst, const volatile void __iomem *src, size_t count);
int copy_from_user_toio(volatile void __iomem *dst, const void __user *src, size_t count);

/* init.c */

int snd_card_locked(int card);
#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
#define SND_MIXER_OSS_NOTIFY_REGISTER        0
#define SND_MIXER_OSS_NOTIFY_DISCONNECT        1
#define SND_MIXER_OSS_NOTIFY_FREE        2
extern int (*snd_mixer_oss_notify_callback)(struct snd_card *card, int cmd);
#endif

int snd_card_new(struct device *parent, int idx, const char *xid,
                 struct module *module, int extra_size,
                 struct snd_card **card_ret);
int snd_devm_card_new(struct device *parent, int idx, const char *xid,
                      struct module *module, size_t extra_size,
                      struct snd_card **card_ret);

void snd_card_disconnect(struct snd_card *card);
void snd_card_disconnect_sync(struct snd_card *card);
void snd_card_free(struct snd_card *card);
void snd_card_free_when_closed(struct snd_card *card);
int snd_card_free_on_error(struct device *dev, int ret);
void snd_card_set_id(struct snd_card *card, const char *id);
int snd_card_register(struct snd_card *card);
int snd_card_info_init(void);
int snd_card_add_dev_attr(struct snd_card *card,
                          const struct attribute_group *group);
int snd_component_add(struct snd_card *card, const char *component);
int snd_card_file_add(struct snd_card *card, struct file *file);
int snd_card_file_remove(struct snd_card *card, struct file *file);

struct snd_card *snd_card_ref(int card);

/**
 * snd_card_unref - Unreference the card object
 * @card: the card object to unreference
 *
 * Call this function for the card object that was obtained via snd_card_ref()
 * or snd_lookup_minor_data().
 */
static inline void snd_card_unref(struct snd_card *card)
{
        put_device(&card->card_dev);
}

#define snd_card_set_dev(card, devptr) ((card)->dev = (devptr))

/* device.c */

int snd_device_new(struct snd_card *card, enum snd_device_type type,
                   void *device_data, const struct snd_device_ops *ops);
int snd_device_register(struct snd_card *card, void *device_data);
int snd_device_register_all(struct snd_card *card);
void snd_device_disconnect(struct snd_card *card, void *device_data);
void snd_device_disconnect_all(struct snd_card *card);
void snd_device_free(struct snd_card *card, void *device_data);
void snd_device_free_all(struct snd_card *card);
int snd_device_get_state(struct snd_card *card, void *device_data);

/* isadma.c */

#ifdef CONFIG_ISA_DMA_API
#define DMA_MODE_NO_ENABLE        0x0100

void snd_dma_program(unsigned long dma, unsigned long addr, unsigned int size, unsigned short mode);
void snd_dma_disable(unsigned long dma);
unsigned int snd_dma_pointer(unsigned long dma, unsigned int size);
int snd_devm_request_dma(struct device *dev, int dma, const char *name);
#endif

/* misc.c */
struct resource;
void release_and_free_resource(struct resource *res);

/* --- */

/* sound printk debug levels */
enum {
        SND_PR_ALWAYS,
        SND_PR_DEBUG,
        SND_PR_VERBOSE,
};

#if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK)
__printf(4, 5)
void __snd_printk(unsigned int level, const char *file, int line,
                  const char *format, ...);
#else
#define __snd_printk(level, file, line, format, ...) \
        printk(format, ##__VA_ARGS__)
#endif

/**
 * snd_printk - printk wrapper
 * @fmt: format string
 *
 * Works like printk() but prints the file and the line of the caller
 * when configured with CONFIG_SND_VERBOSE_PRINTK.
 */
#define snd_printk(fmt, ...) \
        __snd_printk(0, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef CONFIG_SND_DEBUG
/**
 * snd_printd - debug printk
 * @fmt: format string
 *
 * Works like snd_printk() for debugging purposes.
 * Ignored when CONFIG_SND_DEBUG is not set.
 */
#define snd_printd(fmt, ...) \
        __snd_printk(1, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define _snd_printd(level, fmt, ...) \
        __snd_printk(level, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

/**
 * snd_BUG - give a BUG warning message and stack trace
 *
 * Calls WARN() if CONFIG_SND_DEBUG is set.
 * Ignored when CONFIG_SND_DEBUG is not set.
 */
#define snd_BUG()                WARN(1, "BUG?\n")

/**
 * snd_printd_ratelimit - Suppress high rates of output when
 *                           CONFIG_SND_DEBUG is enabled.
 */
#define snd_printd_ratelimit() printk_ratelimit()

/**
 * snd_BUG_ON - debugging check macro
 * @cond: condition to evaluate
 *
 * Has the same behavior as WARN_ON when CONFIG_SND_DEBUG is set,
 * otherwise just evaluates the conditional and returns the value.
 */
#define snd_BUG_ON(cond)        WARN_ON((cond))

#else /* !CONFIG_SND_DEBUG */

__printf(1, 2)
static inline void snd_printd(const char *format, ...) {}
__printf(2, 3)
static inline void _snd_printd(int level, const char *format, ...) {}

#define snd_BUG()                        do { } while (0)

#define snd_BUG_ON(condition) ({ \
        int __ret_warn_on = !!(condition); \
        unlikely(__ret_warn_on); \
})

static inline bool snd_printd_ratelimit(void) { return false; }

#endif /* CONFIG_SND_DEBUG */

#ifdef CONFIG_SND_DEBUG_VERBOSE
/**
 * snd_printdd - debug printk
 * @format: format string
 *
 * Works like snd_printk() for debugging purposes.
 * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set.
 */
#define snd_printdd(format, ...) \
        __snd_printk(2, __FILE__, __LINE__, format, ##__VA_ARGS__)
#else
__printf(1, 2)
static inline void snd_printdd(const char *format, ...) {}
#endif


#define SNDRV_OSS_VERSION         ((3<<16)|(8<<8)|(1<<4)|(0))        /* 3.8.1a */

/* for easier backward-porting */
#if IS_ENABLED(CONFIG_GAMEPORT)
#define gameport_set_dev_parent(gp,xdev) ((gp)->dev.parent = (xdev))
#define gameport_set_port_data(gp,r) ((gp)->port_data = (r))
#define gameport_get_port_data(gp) (gp)->port_data
#endif

/* PCI quirk list helper */
struct snd_pci_quirk {
        unsigned short subvendor;        /* PCI subvendor ID */
        unsigned short subdevice;        /* PCI subdevice ID */
        unsigned short subdevice_mask;        /* bitmask to match */
        int value;                        /* value */
#ifdef CONFIG_SND_DEBUG_VERBOSE
        const char *name;                /* name of the device (optional) */
#endif
};

#define _SND_PCI_QUIRK_ID_MASK(vend, mask, dev)        \
        .subvendor = (vend), .subdevice = (dev), .subdevice_mask = (mask)
#define _SND_PCI_QUIRK_ID(vend, dev) \
        _SND_PCI_QUIRK_ID_MASK(vend, 0xffff, dev)
#define SND_PCI_QUIRK_ID(vend,dev) {_SND_PCI_QUIRK_ID(vend, dev)}
#ifdef CONFIG_SND_DEBUG_VERBOSE
#define SND_PCI_QUIRK(vend,dev,xname,val) \
        {_SND_PCI_QUIRK_ID(vend, dev), .value = (val), .name = (xname)}
#define SND_PCI_QUIRK_VENDOR(vend, xname, val)                        \
        {_SND_PCI_QUIRK_ID_MASK(vend, 0, 0), .value = (val), .name = (xname)}
#define SND_PCI_QUIRK_MASK(vend, mask, dev, xname, val)                        \
        {_SND_PCI_QUIRK_ID_MASK(vend, mask, dev),                        \
                        .value = (val), .name = (xname)}
#define snd_pci_quirk_name(q)        ((q)->name)
#else
#define SND_PCI_QUIRK(vend,dev,xname,val) \
        {_SND_PCI_QUIRK_ID(vend, dev), .value = (val)}
#define SND_PCI_QUIRK_MASK(vend, mask, dev, xname, val)                        \
        {_SND_PCI_QUIRK_ID_MASK(vend, mask, dev), .value = (val)}
#define SND_PCI_QUIRK_VENDOR(vend, xname, val)                        \
        {_SND_PCI_QUIRK_ID_MASK(vend, 0, 0), .value = (val)}
#define snd_pci_quirk_name(q)        ""
#endif

#ifdef CONFIG_PCI
const struct snd_pci_quirk *
snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list);

const struct snd_pci_quirk *
snd_pci_quirk_lookup_id(u16 vendor, u16 device,
                        const struct snd_pci_quirk *list);
#else
static inline const struct snd_pci_quirk *
snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list)
{
        return NULL;
}

static inline const struct snd_pci_quirk *
snd_pci_quirk_lookup_id(u16 vendor, u16 device,
                        const struct snd_pci_quirk *list)
{
        return NULL;
}
#endif

/* async signal helpers */
struct snd_fasync;

int snd_fasync_helper(int fd, struct file *file, int on,
                      struct snd_fasync **fasyncp);
void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll);
void snd_fasync_free(struct snd_fasync *fasync);

#endif /* __SOUND_CORE_H */



















































































































































































    2 














































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
// SPDX-License-Identifier: GPL-2.0
/******************************************************************************
 * os_intfs.c
 *
 * Copyright(c) 2007 - 2010 Realtek Corporation. All rights reserved.
 * Linux device driver for RTL8192SU
 *
 * Modifications for inclusion into the Linux staging tree are
 * Copyright(c) 2010 Larry Finger. All rights reserved.
 *
 * Contact information:
 * WLAN FAE <wlanfae@realtek.com>.
 * Larry Finger <Larry.Finger@lwfinger.net>
 *
 ******************************************************************************/

#define _OS_INTFS_C_

#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/firmware.h>
#include "osdep_service.h"
#include "drv_types.h"
#include "xmit_osdep.h"
#include "recv_osdep.h"
#include "rtl871x_ioctl.h"
#include "usb_osintf.h"

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("rtl871x wireless lan driver");
MODULE_AUTHOR("Larry Finger");

static char ifname[IFNAMSIZ] = "wlan%d";

/* module param defaults */
static int chip_version = RTL8712_2ndCUT;
static int rfintfs = HWPI;
static int lbkmode = RTL8712_AIR_TRX;
static int hci = RTL8712_USB;
static int ampdu_enable = 1;/*for enable tx_ampdu*/

/* The video_mode variable is for video mode.*/
/* It may be specify when inserting module with video_mode=1 parameter.*/
static int video_mode = 1;   /* enable video mode*/

/*Ndis802_11Infrastructure; infra, ad-hoc, auto*/
static int network_mode = Ndis802_11IBSS;
static int channel = 1;/*ad-hoc support requirement*/
static int wireless_mode = WIRELESS_11BG;
static int vrtl_carrier_sense = AUTO_VCS;
static int vcs_type = RTS_CTS;
static int frag_thresh = 2346;
static int preamble = PREAMBLE_LONG;/*long, short, auto*/
static int scan_mode = 1;/*active, passive*/
static int adhoc_tx_pwr = 1;
static int soft_ap;
static int smart_ps = 1;
static int power_mgnt = PS_MODE_ACTIVE;
static int radio_enable = 1;
static int long_retry_lmt = 7;
static int short_retry_lmt = 7;
static int busy_thresh = 40;
static int ack_policy = NORMAL_ACK;
static int mp_mode;
static int software_encrypt;
static int software_decrypt;

static int wmm_enable;/* default is set to disable the wmm.*/
static int uapsd_enable;
static int uapsd_max_sp = NO_LIMIT;
static int uapsd_acbk_en;
static int uapsd_acbe_en;
static int uapsd_acvi_en;
static int uapsd_acvo_en;

static int ht_enable = 1;
static int cbw40_enable = 1;
static int rf_config = RTL8712_RF_1T2R;  /* 1T2R*/
static int low_power;
/* mac address to use instead of the one stored in Efuse */
char *r8712_initmac;
static char *initmac;
/* if wifi_test = 1, driver will disable the turbo mode and pass it to
 * firmware private.
 */
static int wifi_test;

module_param_string(ifname, ifname, sizeof(ifname), 0644);
module_param(wifi_test, int, 0644);
module_param(initmac, charp, 0644);
module_param(video_mode, int, 0644);
module_param(chip_version, int, 0644);
module_param(rfintfs, int, 0644);
module_param(lbkmode, int, 0644);
module_param(hci, int, 0644);
module_param(network_mode, int, 0644);
module_param(channel, int, 0644);
module_param(mp_mode, int, 0644);
module_param(wmm_enable, int, 0644);
module_param(vrtl_carrier_sense, int, 0644);
module_param(vcs_type, int, 0644);
module_param(busy_thresh, int, 0644);
module_param(ht_enable, int, 0644);
module_param(cbw40_enable, int, 0644);
module_param(ampdu_enable, int, 0644);
module_param(rf_config, int, 0644);
module_param(power_mgnt, int, 0644);
module_param(low_power, int, 0644);

MODULE_PARM_DESC(ifname, " Net interface name, wlan%d=default");
MODULE_PARM_DESC(initmac, "MAC-Address, default: use FUSE");

static int netdev_open(struct net_device *pnetdev);
static int netdev_close(struct net_device *pnetdev);

static void loadparam(struct _adapter *padapter, struct  net_device *pnetdev)
{
        struct registry_priv  *registry_par = &padapter->registrypriv;

        registry_par->chip_version = (u8)chip_version;
        registry_par->rfintfs = (u8)rfintfs;
        registry_par->lbkmode = (u8)lbkmode;
        registry_par->hci = (u8)hci;
        registry_par->network_mode  = (u8)network_mode;
        memcpy(registry_par->ssid.Ssid, "ANY", 3);
        registry_par->ssid.SsidLength = 3;
        registry_par->channel = (u8)channel;
        registry_par->wireless_mode = (u8)wireless_mode;
        registry_par->vrtl_carrier_sense = (u8)vrtl_carrier_sense;
        registry_par->vcs_type = (u8)vcs_type;
        registry_par->frag_thresh = (u16)frag_thresh;
        registry_par->preamble = (u8)preamble;
        registry_par->scan_mode = (u8)scan_mode;
        registry_par->adhoc_tx_pwr = (u8)adhoc_tx_pwr;
        registry_par->soft_ap = (u8)soft_ap;
        registry_par->smart_ps = (u8)smart_ps;
        registry_par->power_mgnt = (u8)power_mgnt;
        registry_par->radio_enable = (u8)radio_enable;
        registry_par->long_retry_lmt = (u8)long_retry_lmt;
        registry_par->short_retry_lmt = (u8)short_retry_lmt;
        registry_par->busy_thresh = (u16)busy_thresh;
        registry_par->ack_policy = (u8)ack_policy;
        registry_par->mp_mode = (u8)mp_mode;
        registry_par->software_encrypt = (u8)software_encrypt;
        registry_par->software_decrypt = (u8)software_decrypt;
        /*UAPSD*/
        registry_par->wmm_enable = (u8)wmm_enable;
        registry_par->uapsd_enable = (u8)uapsd_enable;
        registry_par->uapsd_max_sp = (u8)uapsd_max_sp;
        registry_par->uapsd_acbk_en = (u8)uapsd_acbk_en;
        registry_par->uapsd_acbe_en = (u8)uapsd_acbe_en;
        registry_par->uapsd_acvi_en = (u8)uapsd_acvi_en;
        registry_par->uapsd_acvo_en = (u8)uapsd_acvo_en;
        registry_par->ht_enable = (u8)ht_enable;
        registry_par->cbw40_enable = (u8)cbw40_enable;
        registry_par->ampdu_enable = (u8)ampdu_enable;
        registry_par->rf_config = (u8)rf_config;
        registry_par->low_power = (u8)low_power;
        registry_par->wifi_test = (u8)wifi_test;
        r8712_initmac = initmac;
}

static int r871x_net_set_mac_address(struct net_device *pnetdev, void *p)
{
        struct _adapter *padapter = netdev_priv(pnetdev);
        struct sockaddr *addr = p;

        if (!padapter->bup)
                eth_hw_addr_set(pnetdev, addr->sa_data);
        return 0;
}

static struct net_device_stats *r871x_net_get_stats(struct net_device *pnetdev)
{
        struct _adapter *padapter = netdev_priv(pnetdev);
        struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
        struct recv_priv *precvpriv = &padapter->recvpriv;

        padapter->stats.tx_packets = pxmitpriv->tx_pkts;
        padapter->stats.rx_packets = precvpriv->rx_pkts;
        padapter->stats.tx_dropped = pxmitpriv->tx_drop;
        padapter->stats.rx_dropped = precvpriv->rx_drop;
        padapter->stats.tx_bytes = pxmitpriv->tx_bytes;
        padapter->stats.rx_bytes = precvpriv->rx_bytes;
        return &padapter->stats;
}

static const struct net_device_ops rtl8712_netdev_ops = {
        .ndo_open = netdev_open,
        .ndo_stop = netdev_close,
        .ndo_start_xmit = r8712_xmit_entry,
        .ndo_set_mac_address = r871x_net_set_mac_address,
        .ndo_get_stats = r871x_net_get_stats,
        .ndo_do_ioctl = r871x_ioctl,
};

struct net_device *r8712_init_netdev(void)
{
        struct _adapter *padapter;
        struct net_device *pnetdev;

        pnetdev = alloc_etherdev(sizeof(struct _adapter));
        if (!pnetdev)
                return NULL;
        if (dev_alloc_name(pnetdev, ifname) < 0) {
                strscpy(ifname, "wlan%d", sizeof(ifname));
                dev_alloc_name(pnetdev, ifname);
        }
        padapter = netdev_priv(pnetdev);
        padapter->pnetdev = pnetdev;
        pr_info("r8712u: register rtl8712_netdev_ops to netdev_ops\n");
        pnetdev->netdev_ops = &rtl8712_netdev_ops;
        pnetdev->watchdog_timeo = HZ; /* 1 second timeout */
        pnetdev->wireless_handlers = (struct iw_handler_def *)
                                     &r871x_handlers_def;
        loadparam(padapter, pnetdev);
        netif_carrier_off(pnetdev);
        padapter->pid = 0;  /* Initial the PID value used for HW PBC.*/
        return pnetdev;
}

static u32 start_drv_threads(struct _adapter *padapter)
{
        padapter->cmd_thread = kthread_run(r8712_cmd_thread, padapter, "%s", padapter->pnetdev->name);
        if (IS_ERR(padapter->cmd_thread))
                return _FAIL;
        return _SUCCESS;
}

void r8712_stop_drv_threads(struct _adapter *padapter)
{
        struct completion *completion =
                &padapter->cmdpriv.terminate_cmdthread_comp;

        /*Below is to terminate r8712_cmd_thread & event_thread...*/
        complete(&padapter->cmdpriv.cmd_queue_comp);
        if (padapter->cmd_thread)
                wait_for_completion_interruptible(completion);
        padapter->cmdpriv.cmd_seq = 1;
}

static void start_drv_timers(struct _adapter *padapter)
{
        mod_timer(&padapter->mlmepriv.sitesurveyctrl.sitesurvey_ctrl_timer,
                  jiffies + msecs_to_jiffies(5000));
        mod_timer(&padapter->mlmepriv.wdg_timer,
                  jiffies + msecs_to_jiffies(2000));
}

void r8712_stop_drv_timers(struct _adapter *padapter)
{
        del_timer_sync(&padapter->mlmepriv.assoc_timer);
        del_timer_sync(&padapter->securitypriv.tkip_timer);
        del_timer_sync(&padapter->mlmepriv.scan_to_timer);
        del_timer_sync(&padapter->mlmepriv.dhcp_timer);
        del_timer_sync(&padapter->mlmepriv.wdg_timer);
        del_timer_sync(&padapter->mlmepriv.sitesurveyctrl.sitesurvey_ctrl_timer);
}

static void init_default_value(struct _adapter *padapter)
{
        struct registry_priv *pregistrypriv = &padapter->registrypriv;
        struct xmit_priv *pxmitpriv = &padapter->xmitpriv;
        struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
        struct security_priv *psecuritypriv = &padapter->securitypriv;

        /*xmit_priv*/
        pxmitpriv->vcs_setting = pregistrypriv->vrtl_carrier_sense;
        pxmitpriv->vcs = pregistrypriv->vcs_type;
        pxmitpriv->vcs_type = pregistrypriv->vcs_type;
        pxmitpriv->rts_thresh = pregistrypriv->rts_thresh;
        pxmitpriv->frag_len = pregistrypriv->frag_thresh;
        /* mlme_priv */
        /* Maybe someday we should rename this variable to "active_mode"(Jeff)*/
        pmlmepriv->passive_mode = 1; /* 1: active, 0: passive. */
        /*ht_priv*/
        {
                int i;
                struct ht_priv         *phtpriv = &pmlmepriv->htpriv;

                phtpriv->ampdu_enable = false;/*set to disabled*/
                for (i = 0; i < 16; i++)
                        phtpriv->baddbareq_issued[i] = false;
        }
        /*security_priv*/
        psecuritypriv->sw_encrypt = pregistrypriv->software_encrypt;
        psecuritypriv->sw_decrypt = pregistrypriv->software_decrypt;
        psecuritypriv->binstallGrpkey = _FAIL;
        /*pwrctrl_priv*/
        /*registry_priv*/
        r8712_init_registrypriv_dev_network(padapter);
        r8712_update_registrypriv_dev_network(padapter);
        /*misc.*/
}

int r8712_init_drv_sw(struct _adapter *padapter)
{
        int ret;

        ret = r8712_init_cmd_priv(&padapter->cmdpriv);
        if (ret)
                return ret;
        padapter->cmdpriv.padapter = padapter;
        ret = r8712_init_evt_priv(&padapter->evtpriv);
        if (ret)
                goto free_cmd;
        ret = r8712_init_mlme_priv(padapter);
        if (ret)
                goto free_evt;
        ret = _r8712_init_xmit_priv(&padapter->xmitpriv, padapter);
        if (ret)
                goto free_mlme;
        ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter);
        if (ret)
                goto free_xmit;
        memset((unsigned char *)&padapter->securitypriv, 0,
               sizeof(struct security_priv));
        timer_setup(&padapter->securitypriv.tkip_timer,
                    r8712_use_tkipkey_handler, 0);
        ret = _r8712_init_sta_priv(&padapter->stapriv);
        if (ret)
                goto free_recv;
        padapter->stapriv.padapter = padapter;
        r8712_init_bcmc_stainfo(padapter);
        r8712_init_pwrctrl_priv(padapter);
        mp871xinit(padapter);
        init_default_value(padapter);
        r8712_InitSwLeds(padapter);
        mutex_init(&padapter->mutex_start);

        return 0;

free_recv:
        _r8712_free_recv_priv(&padapter->recvpriv);
free_xmit:
        _free_xmit_priv(&padapter->xmitpriv);
free_mlme:
        r8712_free_mlme_priv(&padapter->mlmepriv);
free_evt:
        r8712_free_evt_priv(&padapter->evtpriv);
free_cmd:
        r8712_free_cmd_priv(&padapter->cmdpriv);
        return ret;
}

void r8712_free_drv_sw(struct _adapter *padapter)
{
        r8712_free_cmd_priv(&padapter->cmdpriv);
        r8712_free_evt_priv(&padapter->evtpriv);
        r8712_DeInitSwLeds(padapter);
        r8712_free_mlme_priv(&padapter->mlmepriv);
        _free_xmit_priv(&padapter->xmitpriv);
        _r8712_free_sta_priv(&padapter->stapriv);
        _r8712_free_recv_priv(&padapter->recvpriv);
        mp871xdeinit(padapter);
}

static void enable_video_mode(struct _adapter *padapter, int cbw40_value)
{
        /*   bit 8:
         *   1 -> enable video mode to 96B AP
         *   0 -> disable video mode to 96B AP
         *   bit 9:
         *   1 -> enable 40MHz mode
         *   0 -> disable 40MHz mode
         *   bit 10:
         *   1 -> enable STBC
         *   0 -> disable STBC
         */
        u32  intcmd = 0xf4000500;   /* enable bit8, bit10*/

        if (cbw40_value) {
                /* if the driver supports the 40M bandwidth,
                 * we can enable the bit 9.
                 */
                intcmd |= 0x200;
        }
        r8712_fw_cmd(padapter, intcmd);
}

/*
 *
 * This function intends to handle the activation of an interface
 * i.e. when it is brought Up/Active from a Down state.
 *
 */
static int netdev_open(struct net_device *pnetdev)
{
        struct _adapter *padapter = netdev_priv(pnetdev);

        mutex_lock(&padapter->mutex_start);
        if (!padapter->bup) {
                padapter->driver_stopped = false;
                padapter->surprise_removed = false;
                padapter->bup = true;
                if (rtl871x_hal_init(padapter) != _SUCCESS)
                        goto netdev_open_error;
                if (!r8712_initmac) {
                        /* Use the mac address stored in the Efuse */
                        eth_hw_addr_set(pnetdev,
                                        padapter->eeprompriv.mac_addr);
                } else {
                        /* We have to inform f/w to use user-supplied MAC
                         * address.
                         */
                        msleep(200);
                        r8712_setMacAddr_cmd(padapter,
                                             (const u8 *)pnetdev->dev_addr);
                        /*
                         * The "myid" function will get the wifi mac address
                         * from eeprompriv structure instead of netdev
                         * structure. So, we have to overwrite the mac_addr
                         * stored in the eeprompriv structure. In this case,
                         * the real mac address won't be used anymore. So that,
                         * the eeprompriv.mac_addr should store the mac which
                         * users specify.
                         */
                        memcpy(padapter->eeprompriv.mac_addr,
                               pnetdev->dev_addr, ETH_ALEN);
                }
                if (start_drv_threads(padapter) != _SUCCESS)
                        goto netdev_open_error;
                if (!padapter->dvobjpriv.inirp_init)
                        goto netdev_open_error;
                else
                        padapter->dvobjpriv.inirp_init(padapter);
                r8712_set_ps_mode(padapter, padapter->registrypriv.power_mgnt,
                                  padapter->registrypriv.smart_ps);
        }
        if (!netif_queue_stopped(pnetdev))
                netif_start_queue(pnetdev);
        else
                netif_wake_queue(pnetdev);

        if (video_mode)
                enable_video_mode(padapter, cbw40_enable);
        /* start driver mlme relation timer */
        start_drv_timers(padapter);
        padapter->ledpriv.LedControlHandler(padapter, LED_CTL_NO_LINK);
        mutex_unlock(&padapter->mutex_start);
        return 0;
netdev_open_error:
        padapter->bup = false;
        netif_carrier_off(pnetdev);
        netif_stop_queue(pnetdev);
        mutex_unlock(&padapter->mutex_start);
        return -1;
}

/*
 *
 * This function intends to handle the shutdown of an interface
 * i.e. when it is brought Down from an Up/Active state.
 *
 */
static int netdev_close(struct net_device *pnetdev)
{
        struct _adapter *padapter = netdev_priv(pnetdev);

        /* Close LED*/
        padapter->ledpriv.LedControlHandler(padapter, LED_CTL_POWER_OFF);
        msleep(200);

        /*s1.*/
        if (pnetdev) {
                if (!netif_queue_stopped(pnetdev))
                        netif_stop_queue(pnetdev);
        }
        /*s2.*/
        /*s2-1.  issue disassoc_cmd to fw*/
        r8712_disassoc_cmd(padapter);
        /*s2-2.  indicate disconnect to os*/
        r8712_ind_disconnect(padapter);
        /*s2-3.*/
        r8712_free_assoc_resources(padapter);
        /*s2-4.*/
        r8712_free_network_queue(padapter);
        return 0;
}

#include "mlme_osdep.h"






























































































    4 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/time.h>
#include <linux/gcd.h>
#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/timer.h>

#include "pcm_local.h"

/*
 *  Timer functions
 */

void snd_pcm_timer_resolution_change(struct snd_pcm_substream *substream)
{
        unsigned long rate, mult, fsize, l, post;
        struct snd_pcm_runtime *runtime = substream->runtime;

        mult = 1000000000;
        rate = runtime->rate;
        if (snd_BUG_ON(!rate))
                return;
        l = gcd(mult, rate);
        mult /= l;
        rate /= l;
        fsize = runtime->period_size;
        if (snd_BUG_ON(!fsize))
                return;
        l = gcd(rate, fsize);
        rate /= l;
        fsize /= l;
        post = 1;
        while ((mult * fsize) / fsize != mult) {
                mult /= 2;
                post *= 2;
        }
        if (rate == 0) {
                pcm_err(substream->pcm,
                        "pcm timer resolution out of range (rate = %u, period_size = %lu)\n",
                        runtime->rate, runtime->period_size);
                runtime->timer_resolution = -1;
                return;
        }
        runtime->timer_resolution = (mult * fsize / rate) * post;
}

static unsigned long snd_pcm_timer_resolution(struct snd_timer * timer)
{
        struct snd_pcm_substream *substream;

        substream = timer->private_data;
        return substream->runtime ? substream->runtime->timer_resolution : 0;
}

static int snd_pcm_timer_start(struct snd_timer * timer)
{
        struct snd_pcm_substream *substream;

        substream = snd_timer_chip(timer);
        substream->timer_running = 1;
        return 0;
}

static int snd_pcm_timer_stop(struct snd_timer * timer)
{
        struct snd_pcm_substream *substream;

        substream = snd_timer_chip(timer);
        substream->timer_running = 0;
        return 0;
}

static const struct snd_timer_hardware snd_pcm_timer =
{
        .flags =        SNDRV_TIMER_HW_AUTO | SNDRV_TIMER_HW_SLAVE,
        .resolution =        0,
        .ticks =        1,
        .c_resolution =        snd_pcm_timer_resolution,
        .start =        snd_pcm_timer_start,
        .stop =                snd_pcm_timer_stop,
};

/*
 *  Init functions
 */

static void snd_pcm_timer_free(struct snd_timer *timer)
{
        struct snd_pcm_substream *substream = timer->private_data;
        substream->timer = NULL;
}

void snd_pcm_timer_init(struct snd_pcm_substream *substream)
{
        struct snd_timer_id tid;
        struct snd_timer *timer;

        tid.dev_sclass = SNDRV_TIMER_SCLASS_NONE;
        tid.dev_class = SNDRV_TIMER_CLASS_PCM;
        tid.card = substream->pcm->card->number;
        tid.device = substream->pcm->device;
        tid.subdevice = (substream->number << 1) | (substream->stream & 1);
        if (snd_timer_new(substream->pcm->card, "PCM", &tid, &timer) < 0)
                return;
        sprintf(timer->name, "PCM %s %i-%i-%i",
                        substream->stream == SNDRV_PCM_STREAM_CAPTURE ?
                                "capture" : "playback",
                        tid.card, tid.device, tid.subdevice);
        timer->hw = snd_pcm_timer;
        if (snd_device_register(timer->card, timer) < 0) {
                snd_device_free(timer->card, timer);
                return;
        }
        timer->private_data = substream;
        timer->private_free = snd_pcm_timer_free;
        substream->timer = timer;
}

void snd_pcm_timer_done(struct snd_pcm_substream *substream)
{
        if (substream->timer) {
                snd_device_free(substream->pcm->card, substream->timer);
                substream->timer = NULL;
        }
}






































































































































































































































































































































































































































































































































































































































    9 
















































  275 




    1 




  233 


  232 














































    6 
    6 

    6 



    6 

    6 






















































  118 
  275 





  275 



















   28 
  129 


































    2 







    2 





















  275 


  275 

  275 
    2 





  274 











  130 









  118 


  118 

























  275 

















































































  275 


  275 






























































































  270 

   34 
    5 
































































































  242 





  242 
  242 

  237 











  275 


  275 

  275 
  271 

  275 
  242 



  271 



















   33 










  272 











































































































  234 










  234 

  234 
  233 





  234 













  234 










  234 


  234 















  234 

  234 

  234 











    1 






    1 
















    1 
    1 

































































   33 
   33 

   33 































































  275 
  275 
  275 











    6 
    6 





    6 








    6 






    6 







    6 












































   33 




   33 

   33 
   32 















  275 

   33 


  275 

   33 

  274 



   33 
  274 










   33 


   33 
































  275 

























































































































































































































































































  131 


  131 







   53 




  120 
  130 

    1 




    1 



    1 








    1 
    1 


    1 













































































  119 

  118 













































  275 





  275 
  275 
  272 




























   33 































  275 






  274 


  275 


  274 
  274 
  275 

  274 


  275 
  274 







  274 


  270 

  270 

  269 

    2 



  270 



  274 









  274 











  275 

  275 


  275 







  274 
  275 
  275 

  275 








  275 
  275 






















  272 

  272 
  272 



  272 
  271 































































































    4 


   11 
   11 
   11 
   11 







   11 
    8 














    4 
    4 
























   10 

   10 
   10 



   10 
   10 
























    2 



    2 



    2 
    2 
    2 



    2 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


    4 


    3 




    3 





























































    4 















    3 




    1 






    4 











































   12 
   12 


   12 
   12 




   12 

   12 






   12 
   12 
   12 


   12 


   12 
   12 

   12 





   12 


   12 














    4 
















   12 









   12 














   12 
   12 




   12 

   12 




   12 






























   12 










































































   12 
























    5 

    6 
    5 

    6 






    6 
    6 
    6 













    5 
    6 
    6 











  118 

  118 
  119 

  114 



   27 

    1 

    1 


   26 
   26 

    3 


    4 


    4 


    4 










    4 


    4 


   25 
  118 





  119 




  119 


  119 


  119 


















   11 

















    3 
    3 

    3 


























   27 



   27 


   28 


   28 
   28 
   28 




  119 



  119 
  118 
  118 





  118 
  118 






  118 




  118 































   98 





















   28 














   38 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    6 
    6 


    6 

    6 




















   10 


    6 











    3 












    6 


    6 
    6 









    6 

    6 





    6 







    6 

    6 
    6 
    6 


    6 
    6 

    6 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   10 




    4 
































































































































































  256 
  255 

  255 






































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
// SPDX-License-Identifier: GPL-2.0-only
/*
 * kernel/workqueue.c - generic async execution with shared worker pool
 *
 * Copyright (C) 2002                Ingo Molnar
 *
 *   Derived from the taskqueue/keventd code by:
 *     David Woodhouse <dwmw2@infradead.org>
 *     Andrew Morton
 *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *     Theodore Ts'o <tytso@mit.edu>
 *
 * Made to use alloc_percpu by Christoph Lameter.
 *
 * Copyright (C) 2010                SUSE Linux Products GmbH
 * Copyright (C) 2010                Tejun Heo <tj@kernel.org>
 *
 * This is the generic async execution mechanism.  Work items as are
 * executed in process context.  The worker pool is shared and
 * automatically managed.  There are two worker pools for each CPU (one for
 * normal work items and the other for high priority ones) and some extra
 * pools for workqueues which are not bound to any specific CPU - the
 * number of these backing pools is dynamic.
 *
 * Please read Documentation/core-api/workqueue.rst for details.
 */

#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
#include <linux/mempolicy.h>
#include <linux/freezer.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
#include <linux/jhash.h>
#include <linux/hashtable.h>
#include <linux/rculist.h>
#include <linux/nodemask.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
#include <linux/sched/isolation.h>
#include <linux/sched/debug.h>
#include <linux/nmi.h>
#include <linux/kvm_para.h>
#include <linux/delay.h>
#include <linux/irq_work.h>

#include "workqueue_internal.h"

enum worker_pool_flags {
        /*
         * worker_pool flags
         *
         * A bound pool is either associated or disassociated with its CPU.
         * While associated (!DISASSOCIATED), all workers are bound to the
         * CPU and none has %WORKER_UNBOUND set and concurrency management
         * is in effect.
         *
         * While DISASSOCIATED, the cpu may be offline and all workers have
         * %WORKER_UNBOUND set and concurrency management disabled, and may
         * be executing on any CPU.  The pool behaves as an unbound one.
         *
         * Note that DISASSOCIATED should be flipped only while holding
         * wq_pool_attach_mutex to avoid changing binding state while
         * worker_attach_to_pool() is in progress.
         *
         * As there can only be one concurrent BH execution context per CPU, a
         * BH pool is per-CPU and always DISASSOCIATED.
         */
        POOL_BH                        = 1 << 0,        /* is a BH pool */
        POOL_MANAGER_ACTIVE        = 1 << 1,        /* being managed */
        POOL_DISASSOCIATED        = 1 << 2,        /* cpu can't serve workers */
        POOL_BH_DRAINING        = 1 << 3,        /* draining after CPU offline */
};

enum worker_flags {
        /* worker flags */
        WORKER_DIE                = 1 << 1,        /* die die die */
        WORKER_IDLE                = 1 << 2,        /* is idle */
        WORKER_PREP                = 1 << 3,        /* preparing to run works */
        WORKER_CPU_INTENSIVE        = 1 << 6,        /* cpu intensive */
        WORKER_UNBOUND                = 1 << 7,        /* worker is unbound */
        WORKER_REBOUND                = 1 << 8,        /* worker was rebound */

        WORKER_NOT_RUNNING        = WORKER_PREP | WORKER_CPU_INTENSIVE |
                                  WORKER_UNBOUND | WORKER_REBOUND,
};

enum work_cancel_flags {
        WORK_CANCEL_DELAYED        = 1 << 0,        /* canceling a delayed_work */
};

enum wq_internal_consts {
        NR_STD_WORKER_POOLS        = 2,                /* # standard pools per cpu */

        UNBOUND_POOL_HASH_ORDER        = 6,                /* hashed by pool->attrs */
        BUSY_WORKER_HASH_ORDER        = 6,                /* 64 pointers */

        MAX_IDLE_WORKERS_RATIO        = 4,                /* 1/4 of busy can be idle */
        IDLE_WORKER_TIMEOUT        = 300 * HZ,        /* keep idle ones for 5 mins */

        MAYDAY_INITIAL_TIMEOUT  = HZ / 100 >= 2 ? HZ / 100 : 2,
                                                /* call for help after 10ms
                                                   (min two ticks) */
        MAYDAY_INTERVAL                = HZ / 10,        /* and then every 100ms */
        CREATE_COOLDOWN                = HZ,                /* time to breath after fail */

        /*
         * Rescue workers are used only on emergencies and shared by
         * all cpus.  Give MIN_NICE.
         */
        RESCUER_NICE_LEVEL        = MIN_NICE,
        HIGHPRI_NICE_LEVEL        = MIN_NICE,

        WQ_NAME_LEN                = 32,
};

/*
 * We don't want to trap softirq for too long. See MAX_SOFTIRQ_TIME and
 * MAX_SOFTIRQ_RESTART in kernel/softirq.c. These are macros because
 * msecs_to_jiffies() can't be an initializer.
 */
#define BH_WORKER_JIFFIES        msecs_to_jiffies(2)
#define BH_WORKER_RESTARTS        10

/*
 * Structure fields follow one of the following exclusion rules.
 *
 * I: Modifiable by initialization/destruction paths and read-only for
 *    everyone else.
 *
 * P: Preemption protected.  Disabling preemption is enough and should
 *    only be modified and accessed from the local cpu.
 *
 * L: pool->lock protected.  Access with pool->lock held.
 *
 * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
 *     reads.
 *
 * K: Only modified by worker while holding pool->lock. Can be safely read by
 *    self, while holding pool->lock or from IRQ context if %current is the
 *    kworker.
 *
 * S: Only modified by worker self.
 *
 * A: wq_pool_attach_mutex protected.
 *
 * PL: wq_pool_mutex protected.
 *
 * PR: wq_pool_mutex protected for writes.  RCU protected for reads.
 *
 * PW: wq_pool_mutex and wq->mutex protected for writes.  Either for reads.
 *
 * PWR: wq_pool_mutex and wq->mutex protected for writes.  Either or
 *      RCU for reads.
 *
 * WQ: wq->mutex protected.
 *
 * WR: wq->mutex protected for writes.  RCU protected for reads.
 *
 * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
 *     with READ_ONCE() without locking.
 *
 * MD: wq_mayday_lock protected.
 *
 * WD: Used internally by the watchdog.
 */

/* struct worker is defined in workqueue_internal.h */

struct worker_pool {
        raw_spinlock_t                lock;                /* the pool lock */
        int                        cpu;                /* I: the associated cpu */
        int                        node;                /* I: the associated node ID */
        int                        id;                /* I: pool ID */
        unsigned int                flags;                /* L: flags */

        unsigned long                watchdog_ts;        /* L: watchdog timestamp */
        bool                        cpu_stall;        /* WD: stalled cpu bound pool */

        /*
         * The counter is incremented in a process context on the associated CPU
         * w/ preemption disabled, and decremented or reset in the same context
         * but w/ pool->lock held. The readers grab pool->lock and are
         * guaranteed to see if the counter reached zero.
         */
        int                        nr_running;

        struct list_head        worklist;        /* L: list of pending works */

        int                        nr_workers;        /* L: total number of workers */
        int                        nr_idle;        /* L: currently idle workers */

        struct list_head        idle_list;        /* L: list of idle workers */
        struct timer_list        idle_timer;        /* L: worker idle timeout */
        struct work_struct      idle_cull_work; /* L: worker idle cleanup */

        struct timer_list        mayday_timer;          /* L: SOS timer for workers */

        /* a workers is either on busy_hash or idle_list, or the manager */
        DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
                                                /* L: hash of busy workers */

        struct worker                *manager;        /* L: purely informational */
        struct list_head        workers;        /* A: attached workers */
        struct list_head        dying_workers;  /* A: workers about to die */
        struct completion        *detach_completion; /* all workers detached */

        struct ida                worker_ida;        /* worker IDs for task name */

        struct workqueue_attrs        *attrs;                /* I: worker attributes */
        struct hlist_node        hash_node;        /* PL: unbound_pool_hash node */
        int                        refcnt;                /* PL: refcnt for unbound pools */

        /*
         * Destruction of pool is RCU protected to allow dereferences
         * from get_work_pool().
         */
        struct rcu_head                rcu;
};

/*
 * Per-pool_workqueue statistics. These can be monitored using
 * tools/workqueue/wq_monitor.py.
 */
enum pool_workqueue_stats {
        PWQ_STAT_STARTED,        /* work items started execution */
        PWQ_STAT_COMPLETED,        /* work items completed execution */
        PWQ_STAT_CPU_TIME,        /* total CPU time consumed */
        PWQ_STAT_CPU_INTENSIVE,        /* wq_cpu_intensive_thresh_us violations */
        PWQ_STAT_CM_WAKEUP,        /* concurrency-management worker wakeups */
        PWQ_STAT_REPATRIATED,        /* unbound workers brought back into scope */
        PWQ_STAT_MAYDAY,        /* maydays to rescuer */
        PWQ_STAT_RESCUED,        /* linked work items executed by rescuer */

        PWQ_NR_STATS,
};

/*
 * The per-pool workqueue.  While queued, bits below WORK_PWQ_SHIFT
 * of work_struct->data are used for flags and the remaining high bits
 * point to the pwq; thus, pwqs need to be aligned at two's power of the
 * number of flag bits.
 */
struct pool_workqueue {
        struct worker_pool        *pool;                /* I: the associated pool */
        struct workqueue_struct *wq;                /* I: the owning workqueue */
        int                        work_color;        /* L: current color */
        int                        flush_color;        /* L: flushing color */
        int                        refcnt;                /* L: reference count */
        int                        nr_in_flight[WORK_NR_COLORS];
                                                /* L: nr of in_flight works */
        bool                        plugged;        /* L: execution suspended */

        /*
         * nr_active management and WORK_STRUCT_INACTIVE:
         *
         * When pwq->nr_active >= max_active, new work item is queued to
         * pwq->inactive_works instead of pool->worklist and marked with
         * WORK_STRUCT_INACTIVE.
         *
         * All work items marked with WORK_STRUCT_INACTIVE do not participate in
         * nr_active and all work items in pwq->inactive_works are marked with
         * WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are
         * in pwq->inactive_works. Some of them are ready to run in
         * pool->worklist or worker->scheduled. Those work itmes are only struct
         * wq_barrier which is used for flush_work() and should not participate
         * in nr_active. For non-barrier work item, it is marked with
         * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
         */
        int                        nr_active;        /* L: nr of active works */
        struct list_head        inactive_works;        /* L: inactive works */
        struct list_head        pending_node;        /* LN: node on wq_node_nr_active->pending_pwqs */
        struct list_head        pwqs_node;        /* WR: node on wq->pwqs */
        struct list_head        mayday_node;        /* MD: node on wq->maydays */

        u64                        stats[PWQ_NR_STATS];

        /*
         * Release of unbound pwq is punted to a kthread_worker. See put_pwq()
         * and pwq_release_workfn() for details. pool_workqueue itself is also
         * RCU protected so that the first pwq can be determined without
         * grabbing wq->mutex.
         */
        struct kthread_work        release_work;
        struct rcu_head                rcu;
} __aligned(1 << WORK_STRUCT_PWQ_SHIFT);

/*
 * Structure used to wait for workqueue flush.
 */
struct wq_flusher {
        struct list_head        list;                /* WQ: list of flushers */
        int                        flush_color;        /* WQ: flush color waiting for */
        struct completion        done;                /* flush completion */
};

struct wq_device;

/*
 * Unlike in a per-cpu workqueue where max_active limits its concurrency level
 * on each CPU, in an unbound workqueue, max_active applies to the whole system.
 * As sharing a single nr_active across multiple sockets can be very expensive,
 * the counting and enforcement is per NUMA node.
 *
 * The following struct is used to enforce per-node max_active. When a pwq wants
 * to start executing a work item, it should increment ->nr using
 * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
 * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
 * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
 * round-robin order.
 */
struct wq_node_nr_active {
        int                        max;                /* per-node max_active */
        atomic_t                nr;                /* per-node nr_active */
        raw_spinlock_t                lock;                /* nests inside pool locks */
        struct list_head        pending_pwqs;        /* LN: pwqs with inactive works */
};

/*
 * The externally visible workqueue.  It relays the issued work items to
 * the appropriate worker_pool through its pool_workqueues.
 */
struct workqueue_struct {
        struct list_head        pwqs;                /* WR: all pwqs of this wq */
        struct list_head        list;                /* PR: list of all workqueues */

        struct mutex                mutex;                /* protects this wq */
        int                        work_color;        /* WQ: current work color */
        int                        flush_color;        /* WQ: current flush color */
        atomic_t                nr_pwqs_to_flush; /* flush in progress */
        struct wq_flusher        *first_flusher;        /* WQ: first flusher */
        struct list_head        flusher_queue;        /* WQ: flush waiters */
        struct list_head        flusher_overflow; /* WQ: flush overflow list */

        struct list_head        maydays;        /* MD: pwqs requesting rescue */
        struct worker                *rescuer;        /* MD: rescue worker */

        int                        nr_drainers;        /* WQ: drain in progress */

        /* See alloc_workqueue() function comment for info on min/max_active */
        int                        max_active;        /* WO: max active works */
        int                        min_active;        /* WO: min active works */
        int                        saved_max_active; /* WQ: saved max_active */
        int                        saved_min_active; /* WQ: saved min_active */

        struct workqueue_attrs        *unbound_attrs;        /* PW: only for unbound wqs */
        struct pool_workqueue __rcu *dfl_pwq;   /* PW: only for unbound wqs */

#ifdef CONFIG_SYSFS
        struct wq_device        *wq_dev;        /* I: for sysfs interface */
#endif
#ifdef CONFIG_LOCKDEP
        char                        *lock_name;
        struct lock_class_key        key;
        struct lockdep_map        lockdep_map;
#endif
        char                        name[WQ_NAME_LEN]; /* I: workqueue name */

        /*
         * Destruction of workqueue_struct is RCU protected to allow walking
         * the workqueues list without grabbing wq_pool_mutex.
         * This is used to dump all workqueues from sysrq.
         */
        struct rcu_head                rcu;

        /* hot fields used during command issue, aligned to cacheline */
        unsigned int                flags ____cacheline_aligned; /* WQ: WQ_* flags */
        struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
        struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};

/*
 * Each pod type describes how CPUs should be grouped for unbound workqueues.
 * See the comment above workqueue_attrs->affn_scope.
 */
struct wq_pod_type {
        int                        nr_pods;        /* number of pods */
        cpumask_var_t                *pod_cpus;        /* pod -> cpus */
        int                        *pod_node;        /* pod -> node */
        int                        *cpu_pod;        /* cpu -> pod */
};

static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
        [WQ_AFFN_DFL]                = "default",
        [WQ_AFFN_CPU]                = "cpu",
        [WQ_AFFN_SMT]                = "smt",
        [WQ_AFFN_CACHE]                = "cache",
        [WQ_AFFN_NUMA]                = "numa",
        [WQ_AFFN_SYSTEM]        = "system",
};

/*
 * Per-cpu work items which run for longer than the following threshold are
 * automatically considered CPU intensive and excluded from concurrency
 * management to prevent them from noticeably delaying other per-cpu work items.
 * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
 * The actual value is initialized in wq_cpu_intensive_thresh_init().
 */
static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
static unsigned int wq_cpu_intensive_warning_thresh = 4;
module_param_named(cpu_intensive_warning_thresh, wq_cpu_intensive_warning_thresh, uint, 0644);
#endif

/* see the comment above the definition of WQ_POWER_EFFICIENT */
static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);

static bool wq_online;                        /* can kworkers be created yet? */
static bool wq_topo_initialized __read_mostly = false;

static struct kmem_cache *pwq_cache;

static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES];
static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_CACHE;

/* buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion */
static struct workqueue_attrs *wq_update_pod_attrs_buf;

static DEFINE_MUTEX(wq_pool_mutex);        /* protects pools and workqueues list */
static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
static DEFINE_RAW_SPINLOCK(wq_mayday_lock);        /* protects wq->maydays list */
/* wait for manager to go away */
static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);

static LIST_HEAD(workqueues);                /* PR: list of all workqueues */
static bool workqueue_freezing;                /* PL: have wqs started freezing? */

/* PL&A: allowable cpus for unbound wqs and work items */
static cpumask_var_t wq_unbound_cpumask;

/* PL: user requested unbound cpumask via sysfs */
static cpumask_var_t wq_requested_unbound_cpumask;

/* PL: isolated cpumask to be excluded from unbound cpumask */
static cpumask_var_t wq_isolated_cpumask;

/* for further constrain wq_unbound_cpumask by cmdline parameter*/
static struct cpumask wq_cmdline_cpumask __initdata;

/* CPU where unbound work was last round robin scheduled from this CPU */
static DEFINE_PER_CPU(int, wq_rr_cpu_last);

/*
 * Local execution of unbound work items is no longer guaranteed.  The
 * following always forces round-robin CPU selection on unbound work items
 * to uncover usages which depend on it.
 */
#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
static bool wq_debug_force_rr_cpu = true;
#else
static bool wq_debug_force_rr_cpu = false;
#endif
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);

/* to raise softirq for the BH worker pools on other CPUs */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS],
                                     bh_pool_irq_works);

/* the BH worker pools */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
                                     bh_worker_pools);

/* the per-cpu worker pools */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
                                     cpu_worker_pools);

static DEFINE_IDR(worker_pool_idr);        /* PR: idr of all pools */

/* PL: hash of all unbound pools keyed by pool->attrs */
static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);

/* I: attributes used when instantiating standard unbound pools on demand */
static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];

/* I: attributes used when instantiating ordered pools on demand */
static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];

/*
 * Used to synchronize multiple cancel_sync attempts on the same work item. See
 * work_grab_pending() and __cancel_work_sync().
 */
static DECLARE_WAIT_QUEUE_HEAD(wq_cancel_waitq);

/*
 * I: kthread_worker to release pwq's. pwq release needs to be bounced to a
 * process context while holding a pool lock. Bounce to a dedicated kthread
 * worker to avoid A-A deadlocks.
 */
static struct kthread_worker *pwq_release_worker __ro_after_init;

struct workqueue_struct *system_wq __ro_after_init;
EXPORT_SYMBOL(system_wq);
struct workqueue_struct *system_highpri_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_highpri_wq);
struct workqueue_struct *system_long_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_long_wq);
struct workqueue_struct *system_unbound_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
struct workqueue_struct *system_bh_wq;
EXPORT_SYMBOL_GPL(system_bh_wq);
struct workqueue_struct *system_bh_highpri_wq;
EXPORT_SYMBOL_GPL(system_bh_highpri_wq);

static int worker_thread(void *__worker);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
static void show_pwq(struct pool_workqueue *pwq);
static void show_one_worker_pool(struct worker_pool *pool);

#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>

#define assert_rcu_or_pool_mutex()                                        \
        RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() &&                        \
                         !lockdep_is_held(&wq_pool_mutex),                \
                         "RCU or wq_pool_mutex should be held")

#define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                        \
        RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() &&                        \
                         !lockdep_is_held(&wq->mutex) &&                \
                         !lockdep_is_held(&wq_pool_mutex),                \
                         "RCU, wq->mutex or wq_pool_mutex should be held")

#define for_each_bh_worker_pool(pool, cpu)                                \
        for ((pool) = &per_cpu(bh_worker_pools, cpu)[0];                \
             (pool) < &per_cpu(bh_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
             (pool)++)

#define for_each_cpu_worker_pool(pool, cpu)                                \
        for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];                \
             (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
             (pool)++)

/**
 * for_each_pool - iterate through all worker_pools in the system
 * @pool: iteration cursor
 * @pi: integer used for iteration
 *
 * This must be called either with wq_pool_mutex held or RCU read
 * locked.  If the pool needs to be used beyond the locking in effect, the
 * caller is responsible for guaranteeing that the pool stays online.
 *
 * The if/else clause exists only for the lockdep assertion and can be
 * ignored.
 */
#define for_each_pool(pool, pi)                                                \
        idr_for_each_entry(&worker_pool_idr, pool, pi)                        \
                if (({ assert_rcu_or_pool_mutex(); false; })) { }        \
                else

/**
 * for_each_pool_worker - iterate through all workers of a worker_pool
 * @worker: iteration cursor
 * @pool: worker_pool to iterate workers of
 *
 * This must be called with wq_pool_attach_mutex.
 *
 * The if/else clause exists only for the lockdep assertion and can be
 * ignored.
 */
#define for_each_pool_worker(worker, pool)                                \
        list_for_each_entry((worker), &(pool)->workers, node)                \
                if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
                else

/**
 * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
 * @pwq: iteration cursor
 * @wq: the target workqueue
 *
 * This must be called either with wq->mutex held or RCU read locked.
 * If the pwq needs to be used beyond the locking in effect, the caller is
 * responsible for guaranteeing that the pwq stays online.
 *
 * The if/else clause exists only for the lockdep assertion and can be
 * ignored.
 */
#define for_each_pwq(pwq, wq)                                                \
        list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,                \
                                 lockdep_is_held(&(wq->mutex)))

#ifdef CONFIG_DEBUG_OBJECTS_WORK

static const struct debug_obj_descr work_debug_descr;

static void *work_debug_hint(void *addr)
{
        return ((struct work_struct *) addr)->func;
}

static bool work_is_static_object(void *addr)
{
        struct work_struct *work = addr;

        return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
}

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static bool work_fixup_init(void *addr, enum debug_obj_state state)
{
        struct work_struct *work = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                cancel_work_sync(work);
                debug_object_init(work, &work_debug_descr);
                return true;
        default:
                return false;
        }
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static bool work_fixup_free(void *addr, enum debug_obj_state state)
{
        struct work_struct *work = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                cancel_work_sync(work);
                debug_object_free(work, &work_debug_descr);
                return true;
        default:
                return false;
        }
}

static const struct debug_obj_descr work_debug_descr = {
        .name                = "work_struct",
        .debug_hint        = work_debug_hint,
        .is_static_object = work_is_static_object,
        .fixup_init        = work_fixup_init,
        .fixup_free        = work_fixup_free,
};

static inline void debug_work_activate(struct work_struct *work)
{
        debug_object_activate(work, &work_debug_descr);
}

static inline void debug_work_deactivate(struct work_struct *work)
{
        debug_object_deactivate(work, &work_debug_descr);
}

void __init_work(struct work_struct *work, int onstack)
{
        if (onstack)
                debug_object_init_on_stack(work, &work_debug_descr);
        else
                debug_object_init(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(__init_work);

void destroy_work_on_stack(struct work_struct *work)
{
        debug_object_free(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);

void destroy_delayed_work_on_stack(struct delayed_work *work)
{
        destroy_timer_on_stack(&work->timer);
        debug_object_free(&work->work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);

#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif

/**
 * worker_pool_assign_id - allocate ID and assign it to @pool
 * @pool: the pool pointer of interest
 *
 * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
 * successfully, -errno on failure.
 */
static int worker_pool_assign_id(struct worker_pool *pool)
{
        int ret;

        lockdep_assert_held(&wq_pool_mutex);

        ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
                        GFP_KERNEL);
        if (ret >= 0) {
                pool->id = ret;
                return 0;
        }
        return ret;
}

static struct pool_workqueue __rcu **
unbound_pwq_slot(struct workqueue_struct *wq, int cpu)
{
       if (cpu >= 0)
               return per_cpu_ptr(wq->cpu_pwq, cpu);
       else
               return &wq->dfl_pwq;
}

/* @cpu < 0 for dfl_pwq */
static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu)
{
        return rcu_dereference_check(*unbound_pwq_slot(wq, cpu),
                                     lockdep_is_held(&wq_pool_mutex) ||
                                     lockdep_is_held(&wq->mutex));
}

/**
 * unbound_effective_cpumask - effective cpumask of an unbound workqueue
 * @wq: workqueue of interest
 *
 * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
 * is masked with wq_unbound_cpumask to determine the effective cpumask. The
 * default pwq is always mapped to the pool with the current effective cpumask.
 */
static struct cpumask *unbound_effective_cpumask(struct workqueue_struct *wq)
{
        return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask;
}

static unsigned int work_color_to_flags(int color)
{
        return color << WORK_STRUCT_COLOR_SHIFT;
}

static int get_work_color(unsigned long work_data)
{
        return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
                ((1 << WORK_STRUCT_COLOR_BITS) - 1);
}

static int work_next_color(int color)
{
        return (color + 1) % WORK_NR_COLORS;
}

/*
 * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
 * contain the pointer to the queued pwq.  Once execution starts, the flag
 * is cleared and the high bits contain OFFQ flags and pool ID.
 *
 * set_work_pwq(), set_work_pool_and_clear_pending() and mark_work_canceling()
 * can be used to set the pwq, pool or clear work->data. These functions should
 * only be called while the work is owned - ie. while the PENDING bit is set.
 *
 * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
 * corresponding to a work.  Pool is available once the work has been
 * queued anywhere after initialization until it is sync canceled.  pwq is
 * available only while the work item is queued.
 *
 * %WORK_OFFQ_CANCELING is used to mark a work item which is being
 * canceled.  While being canceled, a work item may have its PENDING set
 * but stay off timer and worklist for arbitrarily long and nobody should
 * try to steal the PENDING bit.
 */
static inline void set_work_data(struct work_struct *work, unsigned long data)
{
        WARN_ON_ONCE(!work_pending(work));
        atomic_long_set(&work->data, data | work_static(work));
}

static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
                         unsigned long flags)
{
        set_work_data(work, (unsigned long)pwq | WORK_STRUCT_PENDING |
                      WORK_STRUCT_PWQ | flags);
}

static void set_work_pool_and_keep_pending(struct work_struct *work,
                                           int pool_id, unsigned long flags)
{
        set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) |
                      WORK_STRUCT_PENDING | flags);
}

static void set_work_pool_and_clear_pending(struct work_struct *work,
                                            int pool_id, unsigned long flags)
{
        /*
         * The following wmb is paired with the implied mb in
         * test_and_set_bit(PENDING) and ensures all updates to @work made
         * here are visible to and precede any updates by the next PENDING
         * owner.
         */
        smp_wmb();
        set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) |
                      flags);
        /*
         * The following mb guarantees that previous clear of a PENDING bit
         * will not be reordered with any speculative LOADS or STORES from
         * work->current_func, which is executed afterwards.  This possible
         * reordering can lead to a missed execution on attempt to queue
         * the same @work.  E.g. consider this case:
         *
         *   CPU#0                         CPU#1
         *   ----------------------------  --------------------------------
         *
         * 1  STORE event_indicated
         * 2  queue_work_on() {
         * 3    test_and_set_bit(PENDING)
         * 4 }                             set_..._and_clear_pending() {
         * 5                                 set_work_data() # clear bit
         * 6                                 smp_mb()
         * 7                               work->current_func() {
         * 8                                      LOAD event_indicated
         *                                   }
         *
         * Without an explicit full barrier speculative LOAD on line 8 can
         * be executed before CPU#0 does STORE on line 1.  If that happens,
         * CPU#0 observes the PENDING bit is still set and new execution of
         * a @work is not queued in a hope, that CPU#1 will eventually
         * finish the queued @work.  Meanwhile CPU#1 does not see
         * event_indicated is set, because speculative LOAD was executed
         * before actual STORE.
         */
        smp_mb();
}

static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
{
        return (struct pool_workqueue *)(data & WORK_STRUCT_PWQ_MASK);
}

static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{
        unsigned long data = atomic_long_read(&work->data);

        if (data & WORK_STRUCT_PWQ)
                return work_struct_pwq(data);
        else
                return NULL;
}

/**
 * get_work_pool - return the worker_pool a given work was associated with
 * @work: the work item of interest
 *
 * Pools are created and destroyed under wq_pool_mutex, and allows read
 * access under RCU read lock.  As such, this function should be
 * called under wq_pool_mutex or inside of a rcu_read_lock() region.
 *
 * All fields of the returned pool are accessible as long as the above
 * mentioned locking is in effect.  If the returned pool needs to be used
 * beyond the critical section, the caller is responsible for ensuring the
 * returned pool is and stays online.
 *
 * Return: The worker_pool @work was last associated with.  %NULL if none.
 */
static struct worker_pool *get_work_pool(struct work_struct *work)
{
        unsigned long data = atomic_long_read(&work->data);
        int pool_id;

        assert_rcu_or_pool_mutex();

        if (data & WORK_STRUCT_PWQ)
                return work_struct_pwq(data)->pool;

        pool_id = data >> WORK_OFFQ_POOL_SHIFT;
        if (pool_id == WORK_OFFQ_POOL_NONE)
                return NULL;

        return idr_find(&worker_pool_idr, pool_id);
}

/**
 * get_work_pool_id - return the worker pool ID a given work is associated with
 * @work: the work item of interest
 *
 * Return: The worker_pool ID @work was last associated with.
 * %WORK_OFFQ_POOL_NONE if none.
 */
static int get_work_pool_id(struct work_struct *work)
{
        unsigned long data = atomic_long_read(&work->data);

        if (data & WORK_STRUCT_PWQ)
                return work_struct_pwq(data)->pool->id;

        return data >> WORK_OFFQ_POOL_SHIFT;
}

static void mark_work_canceling(struct work_struct *work)
{
        unsigned long pool_id = get_work_pool_id(work);

        pool_id <<= WORK_OFFQ_POOL_SHIFT;
        set_work_data(work, pool_id | WORK_STRUCT_PENDING | WORK_OFFQ_CANCELING);
}

static bool work_is_canceling(struct work_struct *work)
{
        unsigned long data = atomic_long_read(&work->data);

        return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
}

/*
 * Policy functions.  These define the policies on how the global worker
 * pools are managed.  Unless noted otherwise, these functions assume that
 * they're being called with pool->lock held.
 */

/*
 * Need to wake up a worker?  Called from anything but currently
 * running workers.
 *
 * Note that, because unbound workers never contribute to nr_running, this
 * function will always return %true for unbound pools as long as the
 * worklist isn't empty.
 */
static bool need_more_worker(struct worker_pool *pool)
{
        return !list_empty(&pool->worklist) && !pool->nr_running;
}

/* Can I start working?  Called from busy but !running workers. */
static bool may_start_working(struct worker_pool *pool)
{
        return pool->nr_idle;
}

/* Do I need to keep working?  Called from currently running workers. */
static bool keep_working(struct worker_pool *pool)
{
        return !list_empty(&pool->worklist) && (pool->nr_running <= 1);
}

/* Do we need a new worker?  Called from manager. */
static bool need_to_create_worker(struct worker_pool *pool)
{
        return need_more_worker(pool) && !may_start_working(pool);
}

/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
        bool managing = pool->flags & POOL_MANAGER_ACTIVE;
        int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
        int nr_busy = pool->nr_workers - nr_idle;

        return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
}

/**
 * worker_set_flags - set worker flags and adjust nr_running accordingly
 * @worker: self
 * @flags: flags to set
 *
 * Set @flags in @worker->flags and adjust nr_running accordingly.
 */
static inline void worker_set_flags(struct worker *worker, unsigned int flags)
{
        struct worker_pool *pool = worker->pool;

        lockdep_assert_held(&pool->lock);

        /* If transitioning into NOT_RUNNING, adjust nr_running. */
        if ((flags & WORKER_NOT_RUNNING) &&
            !(worker->flags & WORKER_NOT_RUNNING)) {
                pool->nr_running--;
        }

        worker->flags |= flags;
}

/**
 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
 * @worker: self
 * @flags: flags to clear
 *
 * Clear @flags in @worker->flags and adjust nr_running accordingly.
 */
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{
        struct worker_pool *pool = worker->pool;
        unsigned int oflags = worker->flags;

        lockdep_assert_held(&pool->lock);

        worker->flags &= ~flags;

        /*
         * If transitioning out of NOT_RUNNING, increment nr_running.  Note
         * that the nested NOT_RUNNING is not a noop.  NOT_RUNNING is mask
         * of multiple flags, not a single flag.
         */
        if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
                if (!(worker->flags & WORKER_NOT_RUNNING))
                        pool->nr_running++;
}

/* Return the first idle worker.  Called with pool->lock held. */
static struct worker *first_idle_worker(struct worker_pool *pool)
{
        if (unlikely(list_empty(&pool->idle_list)))
                return NULL;

        return list_first_entry(&pool->idle_list, struct worker, entry);
}

/**
 * worker_enter_idle - enter idle state
 * @worker: worker which is entering idle state
 *
 * @worker is entering idle state.  Update stats and idle timer if
 * necessary.
 *
 * LOCKING:
 * raw_spin_lock_irq(pool->lock).
 */
static void worker_enter_idle(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;

        if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
            WARN_ON_ONCE(!list_empty(&worker->entry) &&
                         (worker->hentry.next || worker->hentry.pprev)))
                return;

        /* can't use worker_set_flags(), also called from create_worker() */
        worker->flags |= WORKER_IDLE;
        pool->nr_idle++;
        worker->last_active = jiffies;

        /* idle_list is LIFO */
        list_add(&worker->entry, &pool->idle_list);

        if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
                mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);

        /* Sanity check nr_running. */
        WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running);
}

/**
 * worker_leave_idle - leave idle state
 * @worker: worker which is leaving idle state
 *
 * @worker is leaving idle state.  Update stats.
 *
 * LOCKING:
 * raw_spin_lock_irq(pool->lock).
 */
static void worker_leave_idle(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;

        if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
                return;
        worker_clr_flags(worker, WORKER_IDLE);
        pool->nr_idle--;
        list_del_init(&worker->entry);
}

/**
 * find_worker_executing_work - find worker which is executing a work
 * @pool: pool of interest
 * @work: work to find worker for
 *
 * Find a worker which is executing @work on @pool by searching
 * @pool->busy_hash which is keyed by the address of @work.  For a worker
 * to match, its current execution should match the address of @work and
 * its work function.  This is to avoid unwanted dependency between
 * unrelated work executions through a work item being recycled while still
 * being executed.
 *
 * This is a bit tricky.  A work item may be freed once its execution
 * starts and nothing prevents the freed area from being recycled for
 * another work item.  If the same work item address ends up being reused
 * before the original execution finishes, workqueue will identify the
 * recycled work item as currently executing and make it wait until the
 * current execution finishes, introducing an unwanted dependency.
 *
 * This function checks the work item address and work function to avoid
 * false positives.  Note that this isn't complete as one may construct a
 * work function which can introduce dependency onto itself through a
 * recycled work item.  Well, if somebody wants to shoot oneself in the
 * foot that badly, there's only so much we can do, and if such deadlock
 * actually occurs, it should be easy to locate the culprit work function.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 *
 * Return:
 * Pointer to worker which is executing @work if found, %NULL
 * otherwise.
 */
static struct worker *find_worker_executing_work(struct worker_pool *pool,
                                                 struct work_struct *work)
{
        struct worker *worker;

        hash_for_each_possible(pool->busy_hash, worker, hentry,
                               (unsigned long)work)
                if (worker->current_work == work &&
                    worker->current_func == work->func)
                        return worker;

        return NULL;
}

/**
 * move_linked_works - move linked works to a list
 * @work: start of series of works to be scheduled
 * @head: target list to append @work to
 * @nextp: out parameter for nested worklist walking
 *
 * Schedule linked works starting from @work to @head. Work series to be
 * scheduled starts at @work and includes any consecutive work with
 * WORK_STRUCT_LINKED set in its predecessor. See assign_work() for details on
 * @nextp.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void move_linked_works(struct work_struct *work, struct list_head *head,
                              struct work_struct **nextp)
{
        struct work_struct *n;

        /*
         * Linked worklist will always end before the end of the list,
         * use NULL for list head.
         */
        list_for_each_entry_safe_from(work, n, NULL, entry) {
                list_move_tail(&work->entry, head);
                if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
                        break;
        }

        /*
         * If we're already inside safe list traversal and have moved
         * multiple works to the scheduled queue, the next position
         * needs to be updated.
         */
        if (nextp)
                *nextp = n;
}

/**
 * assign_work - assign a work item and its linked work items to a worker
 * @work: work to assign
 * @worker: worker to assign to
 * @nextp: out parameter for nested worklist walking
 *
 * Assign @work and its linked work items to @worker. If @work is already being
 * executed by another worker in the same pool, it'll be punted there.
 *
 * If @nextp is not NULL, it's updated to point to the next work of the last
 * scheduled work. This allows assign_work() to be nested inside
 * list_for_each_entry_safe().
 *
 * Returns %true if @work was successfully assigned to @worker. %false if @work
 * was punted to another worker already executing it.
 */
static bool assign_work(struct work_struct *work, struct worker *worker,
                        struct work_struct **nextp)
{
        struct worker_pool *pool = worker->pool;
        struct worker *collision;

        lockdep_assert_held(&pool->lock);

        /*
         * A single work shouldn't be executed concurrently by multiple workers.
         * __queue_work() ensures that @work doesn't jump to a different pool
         * while still running in the previous pool. Here, we should ensure that
         * @work is not executed concurrently by multiple workers from the same
         * pool. Check whether anyone is already processing the work. If so,
         * defer the work to the currently executing one.
         */
        collision = find_worker_executing_work(pool, work);
        if (unlikely(collision)) {
                move_linked_works(work, &collision->scheduled, nextp);
                return false;
        }

        move_linked_works(work, &worker->scheduled, nextp);
        return true;
}

static struct irq_work *bh_pool_irq_work(struct worker_pool *pool)
{
        int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0;

        return &per_cpu(bh_pool_irq_works, pool->cpu)[high];
}

static void kick_bh_pool(struct worker_pool *pool)
{
#ifdef CONFIG_SMP
        /* see drain_dead_softirq_workfn() for BH_DRAINING */
        if (unlikely(pool->cpu != smp_processor_id() &&
                     !(pool->flags & POOL_BH_DRAINING))) {
                irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu);
                return;
        }
#endif
        if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
                raise_softirq_irqoff(HI_SOFTIRQ);
        else
                raise_softirq_irqoff(TASKLET_SOFTIRQ);
}

/**
 * kick_pool - wake up an idle worker if necessary
 * @pool: pool to kick
 *
 * @pool may have pending work items. Wake up worker if necessary. Returns
 * whether a worker was woken up.
 */
static bool kick_pool(struct worker_pool *pool)
{
        struct worker *worker = first_idle_worker(pool);
        struct task_struct *p;

        lockdep_assert_held(&pool->lock);

        if (!need_more_worker(pool) || !worker)
                return false;

        if (pool->flags & POOL_BH) {
                kick_bh_pool(pool);
                return true;
        }

        p = worker->task;

#ifdef CONFIG_SMP
        /*
         * Idle @worker is about to execute @work and waking up provides an
         * opportunity to migrate @worker at a lower cost by setting the task's
         * wake_cpu field. Let's see if we want to move @worker to improve
         * execution locality.
         *
         * We're waking the worker that went idle the latest and there's some
         * chance that @worker is marked idle but hasn't gone off CPU yet. If
         * so, setting the wake_cpu won't do anything. As this is a best-effort
         * optimization and the race window is narrow, let's leave as-is for
         * now. If this becomes pronounced, we can skip over workers which are
         * still on cpu when picking an idle worker.
         *
         * If @pool has non-strict affinity, @worker might have ended up outside
         * its affinity scope. Repatriate.
         */
        if (!pool->attrs->affn_strict &&
            !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) {
                struct work_struct *work = list_first_entry(&pool->worklist,
                                                struct work_struct, entry);
                int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask,
                                                          cpu_online_mask);
                if (wake_cpu < nr_cpu_ids) {
                        p->wake_cpu = wake_cpu;
                        get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++;
                }
        }
#endif
        wake_up_process(p);
        return true;
}

#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT

/*
 * Concurrency-managed per-cpu work items that hog CPU for longer than
 * wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
 * which prevents them from stalling other concurrency-managed work items. If a
 * work function keeps triggering this mechanism, it's likely that the work item
 * should be using an unbound workqueue instead.
 *
 * wq_cpu_intensive_report() tracks work functions which trigger such conditions
 * and report them so that they can be examined and converted to use unbound
 * workqueues as appropriate. To avoid flooding the console, each violating work
 * function is tracked and reported with exponential backoff.
 */
#define WCI_MAX_ENTS 128

struct wci_ent {
        work_func_t                func;
        atomic64_t                cnt;
        struct hlist_node        hash_node;
};

static struct wci_ent wci_ents[WCI_MAX_ENTS];
static int wci_nr_ents;
static DEFINE_RAW_SPINLOCK(wci_lock);
static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));

static struct wci_ent *wci_find_ent(work_func_t func)
{
        struct wci_ent *ent;

        hash_for_each_possible_rcu(wci_hash, ent, hash_node,
                                   (unsigned long)func) {
                if (ent->func == func)
                        return ent;
        }
        return NULL;
}

static void wq_cpu_intensive_report(work_func_t func)
{
        struct wci_ent *ent;

restart:
        ent = wci_find_ent(func);
        if (ent) {
                u64 cnt;

                /*
                 * Start reporting from the warning_thresh and back off
                 * exponentially.
                 */
                cnt = atomic64_inc_return_relaxed(&ent->cnt);
                if (wq_cpu_intensive_warning_thresh &&
                    cnt >= wq_cpu_intensive_warning_thresh &&
                    is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh))
                        printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
                                        ent->func, wq_cpu_intensive_thresh_us,
                                        atomic64_read(&ent->cnt));
                return;
        }

        /*
         * @func is a new violation. Allocate a new entry for it. If wcn_ents[]
         * is exhausted, something went really wrong and we probably made enough
         * noise already.
         */
        if (wci_nr_ents >= WCI_MAX_ENTS)
                return;

        raw_spin_lock(&wci_lock);

        if (wci_nr_ents >= WCI_MAX_ENTS) {
                raw_spin_unlock(&wci_lock);
                return;
        }

        if (wci_find_ent(func)) {
                raw_spin_unlock(&wci_lock);
                goto restart;
        }

        ent = &wci_ents[wci_nr_ents++];
        ent->func = func;
        atomic64_set(&ent->cnt, 0);
        hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);

        raw_spin_unlock(&wci_lock);

        goto restart;
}

#else        /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
static void wq_cpu_intensive_report(work_func_t func) {}
#endif        /* CONFIG_WQ_CPU_INTENSIVE_REPORT */

/**
 * wq_worker_running - a worker is running again
 * @task: task waking up
 *
 * This function is called when a worker returns from schedule()
 */
void wq_worker_running(struct task_struct *task)
{
        struct worker *worker = kthread_data(task);

        if (!READ_ONCE(worker->sleeping))
                return;

        /*
         * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
         * and the nr_running increment below, we may ruin the nr_running reset
         * and leave with an unexpected pool->nr_running == 1 on the newly unbound
         * pool. Protect against such race.
         */
        preempt_disable();
        if (!(worker->flags & WORKER_NOT_RUNNING))
                worker->pool->nr_running++;
        preempt_enable();

        /*
         * CPU intensive auto-detection cares about how long a work item hogged
         * CPU without sleeping. Reset the starting timestamp on wakeup.
         */
        worker->current_at = worker->task->se.sum_exec_runtime;

        WRITE_ONCE(worker->sleeping, 0);
}

/**
 * wq_worker_sleeping - a worker is going to sleep
 * @task: task going to sleep
 *
 * This function is called from schedule() when a busy worker is
 * going to sleep.
 */
void wq_worker_sleeping(struct task_struct *task)
{
        struct worker *worker = kthread_data(task);
        struct worker_pool *pool;

        /*
         * Rescuers, which may not have all the fields set up like normal
         * workers, also reach here, let's not access anything before
         * checking NOT_RUNNING.
         */
        if (worker->flags & WORKER_NOT_RUNNING)
                return;

        pool = worker->pool;

        /* Return if preempted before wq_worker_running() was reached */
        if (READ_ONCE(worker->sleeping))
                return;

        WRITE_ONCE(worker->sleeping, 1);
        raw_spin_lock_irq(&pool->lock);

        /*
         * Recheck in case unbind_workers() preempted us. We don't
         * want to decrement nr_running after the worker is unbound
         * and nr_running has been reset.
         */
        if (worker->flags & WORKER_NOT_RUNNING) {
                raw_spin_unlock_irq(&pool->lock);
                return;
        }

        pool->nr_running--;
        if (kick_pool(pool))
                worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;

        raw_spin_unlock_irq(&pool->lock);
}

/**
 * wq_worker_tick - a scheduler tick occurred while a kworker is running
 * @task: task currently running
 *
 * Called from scheduler_tick(). We're in the IRQ context and the current
 * worker's fields which follow the 'K' locking rule can be accessed safely.
 */
void wq_worker_tick(struct task_struct *task)
{
        struct worker *worker = kthread_data(task);
        struct pool_workqueue *pwq = worker->current_pwq;
        struct worker_pool *pool = worker->pool;

        if (!pwq)
                return;

        pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC;

        if (!wq_cpu_intensive_thresh_us)
                return;

        /*
         * If the current worker is concurrency managed and hogged the CPU for
         * longer than wq_cpu_intensive_thresh_us, it's automatically marked
         * CPU_INTENSIVE to avoid stalling other concurrency-managed work items.
         *
         * Set @worker->sleeping means that @worker is in the process of
         * switching out voluntarily and won't be contributing to
         * @pool->nr_running until it wakes up. As wq_worker_sleeping() also
         * decrements ->nr_running, setting CPU_INTENSIVE here can lead to
         * double decrements. The task is releasing the CPU anyway. Let's skip.
         * We probably want to make this prettier in the future.
         */
        if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) ||
            worker->task->se.sum_exec_runtime - worker->current_at <
            wq_cpu_intensive_thresh_us * NSEC_PER_USEC)
                return;

        raw_spin_lock(&pool->lock);

        worker_set_flags(worker, WORKER_CPU_INTENSIVE);
        wq_cpu_intensive_report(worker->current_func);
        pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;

        if (kick_pool(pool))
                pwq->stats[PWQ_STAT_CM_WAKEUP]++;

        raw_spin_unlock(&pool->lock);
}

/**
 * wq_worker_last_func - retrieve worker's last work function
 * @task: Task to retrieve last work function of.
 *
 * Determine the last function a worker executed. This is called from
 * the scheduler to get a worker's last known identity.
 *
 * CONTEXT:
 * raw_spin_lock_irq(rq->lock)
 *
 * This function is called during schedule() when a kworker is going
 * to sleep. It's used by psi to identify aggregation workers during
 * dequeuing, to allow periodic aggregation to shut-off when that
 * worker is the last task in the system or cgroup to go to sleep.
 *
 * As this function doesn't involve any workqueue-related locking, it
 * only returns stable values when called from inside the scheduler's
 * queuing and dequeuing paths, when @task, which must be a kworker,
 * is guaranteed to not be processing any works.
 *
 * Return:
 * The last work function %current executed as a worker, NULL if it
 * hasn't executed any work yet.
 */
work_func_t wq_worker_last_func(struct task_struct *task)
{
        struct worker *worker = kthread_data(task);

        return worker->last_func;
}

/**
 * wq_node_nr_active - Determine wq_node_nr_active to use
 * @wq: workqueue of interest
 * @node: NUMA node, can be %NUMA_NO_NODE
 *
 * Determine wq_node_nr_active to use for @wq on @node. Returns:
 *
 * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
 *
 * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
 *
 * - Otherwise, node_nr_active[@node].
 */
static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq,
                                                   int node)
{
        if (!(wq->flags & WQ_UNBOUND))
                return NULL;

        if (node == NUMA_NO_NODE)
                node = nr_node_ids;

        return wq->node_nr_active[node];
}

/**
 * wq_update_node_max_active - Update per-node max_actives to use
 * @wq: workqueue to update
 * @off_cpu: CPU that's going down, -1 if a CPU is not going down
 *
 * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
 * distributed among nodes according to the proportions of numbers of online
 * cpus. The result is always between @wq->min_active and max_active.
 */
static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu)
{
        struct cpumask *effective = unbound_effective_cpumask(wq);
        int min_active = READ_ONCE(wq->min_active);
        int max_active = READ_ONCE(wq->max_active);
        int total_cpus, node;

        lockdep_assert_held(&wq->mutex);

        if (!wq_topo_initialized)
                return;

        if (off_cpu >= 0 && !cpumask_test_cpu(off_cpu, effective))
                off_cpu = -1;

        total_cpus = cpumask_weight_and(effective, cpu_online_mask);
        if (off_cpu >= 0)
                total_cpus--;

        /* If all CPUs of the wq get offline, use the default values */
        if (unlikely(!total_cpus)) {
                for_each_node(node)
                        wq_node_nr_active(wq, node)->max = min_active;

                wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active;
                return;
        }

        for_each_node(node) {
                int node_cpus;

                node_cpus = cpumask_weight_and(effective, cpumask_of_node(node));
                if (off_cpu >= 0 && cpu_to_node(off_cpu) == node)
                        node_cpus--;

                wq_node_nr_active(wq, node)->max =
                        clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus),
                              min_active, max_active);
        }

        wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active;
}

/**
 * get_pwq - get an extra reference on the specified pool_workqueue
 * @pwq: pool_workqueue to get
 *
 * Obtain an extra reference on @pwq.  The caller should guarantee that
 * @pwq has positive refcnt and be holding the matching pool->lock.
 */
static void get_pwq(struct pool_workqueue *pwq)
{
        lockdep_assert_held(&pwq->pool->lock);
        WARN_ON_ONCE(pwq->refcnt <= 0);
        pwq->refcnt++;
}

/**
 * put_pwq - put a pool_workqueue reference
 * @pwq: pool_workqueue to put
 *
 * Drop a reference of @pwq.  If its refcnt reaches zero, schedule its
 * destruction.  The caller should be holding the matching pool->lock.
 */
static void put_pwq(struct pool_workqueue *pwq)
{
        lockdep_assert_held(&pwq->pool->lock);
        if (likely(--pwq->refcnt))
                return;
        /*
         * @pwq can't be released under pool->lock, bounce to a dedicated
         * kthread_worker to avoid A-A deadlocks.
         */
        kthread_queue_work(pwq_release_worker, &pwq->release_work);
}

/**
 * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
 * @pwq: pool_workqueue to put (can be %NULL)
 *
 * put_pwq() with locking.  This function also allows %NULL @pwq.
 */
static void put_pwq_unlocked(struct pool_workqueue *pwq)
{
        if (pwq) {
                /*
                 * As both pwqs and pools are RCU protected, the
                 * following lock operations are safe.
                 */
                raw_spin_lock_irq(&pwq->pool->lock);
                put_pwq(pwq);
                raw_spin_unlock_irq(&pwq->pool->lock);
        }
}

static bool pwq_is_empty(struct pool_workqueue *pwq)
{
        return !pwq->nr_active && list_empty(&pwq->inactive_works);
}

static void __pwq_activate_work(struct pool_workqueue *pwq,
                                struct work_struct *work)
{
        unsigned long *wdb = work_data_bits(work);

        WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
        trace_workqueue_activate_work(work);
        if (list_empty(&pwq->pool->worklist))
                pwq->pool->watchdog_ts = jiffies;
        move_linked_works(work, &pwq->pool->worklist, NULL);
        __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
}

/**
 * pwq_activate_work - Activate a work item if inactive
 * @pwq: pool_workqueue @work belongs to
 * @work: work item to activate
 *
 * Returns %true if activated. %false if already active.
 */
static bool pwq_activate_work(struct pool_workqueue *pwq,
                              struct work_struct *work)
{
        struct worker_pool *pool = pwq->pool;
        struct wq_node_nr_active *nna;

        lockdep_assert_held(&pool->lock);

        if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
                return false;

        nna = wq_node_nr_active(pwq->wq, pool->node);
        if (nna)
                atomic_inc(&nna->nr);

        pwq->nr_active++;
        __pwq_activate_work(pwq, work);
        return true;
}

static bool tryinc_node_nr_active(struct wq_node_nr_active *nna)
{
        int max = READ_ONCE(nna->max);

        while (true) {
                int old, tmp;

                old = atomic_read(&nna->nr);
                if (old >= max)
                        return false;
                tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1);
                if (tmp == old)
                        return true;
        }
}

/**
 * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
 * @pwq: pool_workqueue of interest
 * @fill: max_active may have increased, try to increase concurrency level
 *
 * Try to increment nr_active for @pwq. Returns %true if an nr_active count is
 * successfully obtained. %false otherwise.
 */
static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
{
        struct workqueue_struct *wq = pwq->wq;
        struct worker_pool *pool = pwq->pool;
        struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
        bool obtained = false;

        lockdep_assert_held(&pool->lock);

        if (!nna) {
                /* BH or per-cpu workqueue, pwq->nr_active is sufficient */
                obtained = pwq->nr_active < READ_ONCE(wq->max_active);
                goto out;
        }

        if (unlikely(pwq->plugged))
                return false;

        /*
         * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
         * already waiting on $nna, pwq_dec_nr_active() will maintain the
         * concurrency level. Don't jump the line.
         *
         * We need to ignore the pending test after max_active has increased as
         * pwq_dec_nr_active() can only maintain the concurrency level but not
         * increase it. This is indicated by @fill.
         */
        if (!list_empty(&pwq->pending_node) && likely(!fill))
                goto out;

        obtained = tryinc_node_nr_active(nna);
        if (obtained)
                goto out;

        /*
         * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs
         * and try again. The smp_mb() is paired with the implied memory barrier
         * of atomic_dec_return() in pwq_dec_nr_active() to ensure that either
         * we see the decremented $nna->nr or they see non-empty
         * $nna->pending_pwqs.
         */
        raw_spin_lock(&nna->lock);

        if (list_empty(&pwq->pending_node))
                list_add_tail(&pwq->pending_node, &nna->pending_pwqs);
        else if (likely(!fill))
                goto out_unlock;

        smp_mb();

        obtained = tryinc_node_nr_active(nna);

        /*
         * If @fill, @pwq might have already been pending. Being spuriously
         * pending in cold paths doesn't affect anything. Let's leave it be.
         */
        if (obtained && likely(!fill))
                list_del_init(&pwq->pending_node);

out_unlock:
        raw_spin_unlock(&nna->lock);
out:
        if (obtained)
                pwq->nr_active++;
        return obtained;
}

/**
 * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
 * @pwq: pool_workqueue of interest
 * @fill: max_active may have increased, try to increase concurrency level
 *
 * Activate the first inactive work item of @pwq if available and allowed by
 * max_active limit.
 *
 * Returns %true if an inactive work item has been activated. %false if no
 * inactive work item is found or max_active limit is reached.
 */
static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
{
        struct work_struct *work =
                list_first_entry_or_null(&pwq->inactive_works,
                                         struct work_struct, entry);

        if (work && pwq_tryinc_nr_active(pwq, fill)) {
                __pwq_activate_work(pwq, work);
                return true;
        } else {
                return false;
        }
}

/**
 * unplug_oldest_pwq - unplug the oldest pool_workqueue
 * @wq: workqueue_struct where its oldest pwq is to be unplugged
 *
 * This function should only be called for ordered workqueues where only the
 * oldest pwq is unplugged, the others are plugged to suspend execution to
 * ensure proper work item ordering::
 *
 *    dfl_pwq --------------+     [P] - plugged
 *                          |
 *                          v
 *    pwqs -> A -> B [P] -> C [P] (newest)
 *            |    |        |
 *            1    3        5
 *            |    |        |
 *            2    4        6
 *
 * When the oldest pwq is drained and removed, this function should be called
 * to unplug the next oldest one to start its work item execution. Note that
 * pwq's are linked into wq->pwqs with the oldest first, so the first one in
 * the list is the oldest.
 */
static void unplug_oldest_pwq(struct workqueue_struct *wq)
{
        struct pool_workqueue *pwq;

        lockdep_assert_held(&wq->mutex);

        /* Caller should make sure that pwqs isn't empty before calling */
        pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue,
                                       pwqs_node);
        raw_spin_lock_irq(&pwq->pool->lock);
        if (pwq->plugged) {
                pwq->plugged = false;
                if (pwq_activate_first_inactive(pwq, true))
                        kick_pool(pwq->pool);
        }
        raw_spin_unlock_irq(&pwq->pool->lock);
}

/**
 * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
 * @nna: wq_node_nr_active to activate a pending pwq for
 * @caller_pool: worker_pool the caller is locking
 *
 * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
 * @caller_pool may be unlocked and relocked to lock other worker_pools.
 */
static void node_activate_pending_pwq(struct wq_node_nr_active *nna,
                                      struct worker_pool *caller_pool)
{
        struct worker_pool *locked_pool = caller_pool;
        struct pool_workqueue *pwq;
        struct work_struct *work;

        lockdep_assert_held(&caller_pool->lock);

        raw_spin_lock(&nna->lock);
retry:
        pwq = list_first_entry_or_null(&nna->pending_pwqs,
                                       struct pool_workqueue, pending_node);
        if (!pwq)
                goto out_unlock;

        /*
         * If @pwq is for a different pool than @locked_pool, we need to lock
         * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock
         * / lock dance. For that, we also need to release @nna->lock as it's
         * nested inside pool locks.
         */
        if (pwq->pool != locked_pool) {
                raw_spin_unlock(&locked_pool->lock);
                locked_pool = pwq->pool;
                if (!raw_spin_trylock(&locked_pool->lock)) {
                        raw_spin_unlock(&nna->lock);
                        raw_spin_lock(&locked_pool->lock);
                        raw_spin_lock(&nna->lock);
                        goto retry;
                }
        }

        /*
         * $pwq may not have any inactive work items due to e.g. cancellations.
         * Drop it from pending_pwqs and see if there's another one.
         */
        work = list_first_entry_or_null(&pwq->inactive_works,
                                        struct work_struct, entry);
        if (!work) {
                list_del_init(&pwq->pending_node);
                goto retry;
        }

        /*
         * Acquire an nr_active count and activate the inactive work item. If
         * $pwq still has inactive work items, rotate it to the end of the
         * pending_pwqs so that we round-robin through them. This means that
         * inactive work items are not activated in queueing order which is fine
         * given that there has never been any ordering across different pwqs.
         */
        if (likely(tryinc_node_nr_active(nna))) {
                pwq->nr_active++;
                __pwq_activate_work(pwq, work);

                if (list_empty(&pwq->inactive_works))
                        list_del_init(&pwq->pending_node);
                else
                        list_move_tail(&pwq->pending_node, &nna->pending_pwqs);

                /* if activating a foreign pool, make sure it's running */
                if (pwq->pool != caller_pool)
                        kick_pool(pwq->pool);
        }

out_unlock:
        raw_spin_unlock(&nna->lock);
        if (locked_pool != caller_pool) {
                raw_spin_unlock(&locked_pool->lock);
                raw_spin_lock(&caller_pool->lock);
        }
}

/**
 * pwq_dec_nr_active - Retire an active count
 * @pwq: pool_workqueue of interest
 *
 * Decrement @pwq's nr_active and try to activate the first inactive work item.
 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
 */
static void pwq_dec_nr_active(struct pool_workqueue *pwq)
{
        struct worker_pool *pool = pwq->pool;
        struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node);

        lockdep_assert_held(&pool->lock);

        /*
         * @pwq->nr_active should be decremented for both percpu and unbound
         * workqueues.
         */
        pwq->nr_active--;

        /*
         * For a percpu workqueue, it's simple. Just need to kick the first
         * inactive work item on @pwq itself.
         */
        if (!nna) {
                pwq_activate_first_inactive(pwq, false);
                return;
        }

        /*
         * If @pwq is for an unbound workqueue, it's more complicated because
         * multiple pwqs and pools may be sharing the nr_active count. When a
         * pwq needs to wait for an nr_active count, it puts itself on
         * $nna->pending_pwqs. The following atomic_dec_return()'s implied
         * memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to
         * guarantee that either we see non-empty pending_pwqs or they see
         * decremented $nna->nr.
         *
         * $nna->max may change as CPUs come online/offline and @pwq->wq's
         * max_active gets updated. However, it is guaranteed to be equal to or
         * larger than @pwq->wq->min_active which is above zero unless freezing.
         * This maintains the forward progress guarantee.
         */
        if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max))
                return;

        if (!list_empty(&nna->pending_pwqs))
                node_activate_pending_pwq(nna, pool);
}

/**
 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
 * @pwq: pwq of interest
 * @work_data: work_data of work which left the queue
 *
 * A work either has completed or is removed from pending queue,
 * decrement nr_in_flight of its pwq and handle workqueue flushing.
 *
 * NOTE:
 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock
 * and thus should be called after all other state updates for the in-flight
 * work item is complete.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
        int color = get_work_color(work_data);

        if (!(work_data & WORK_STRUCT_INACTIVE))
                pwq_dec_nr_active(pwq);

        pwq->nr_in_flight[color]--;

        /* is flush in progress and are we at the flushing tip? */
        if (likely(pwq->flush_color != color))
                goto out_put;

        /* are there still in-flight works? */
        if (pwq->nr_in_flight[color])
                goto out_put;

        /* this pwq is done, clear flush_color */
        pwq->flush_color = -1;

        /*
         * If this was the last pwq, wake up the first flusher.  It
         * will handle the rest.
         */
        if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
                complete(&pwq->wq->first_flusher->done);
out_put:
        put_pwq(pwq);
}

/**
 * try_to_grab_pending - steal work item from worklist and disable irq
 * @work: work item to steal
 * @cflags: %WORK_CANCEL_ flags
 * @irq_flags: place to store irq state
 *
 * Try to grab PENDING bit of @work.  This function can handle @work in any
 * stable state - idle, on timer or on worklist.
 *
 * Return:
 *
 *  ========        ================================================================
 *  1                if @work was pending and we successfully stole PENDING
 *  0                if @work was idle and we claimed PENDING
 *  -EAGAIN        if PENDING couldn't be grabbed at the moment, safe to busy-retry
 *  -ENOENT        if someone else is canceling @work, this state may persist
 *                for arbitrarily long
 *  ========        ================================================================
 *
 * Note:
 * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
 * interrupted while holding PENDING and @work off queue, irq must be
 * disabled on entry.  This, combined with delayed_work->timer being
 * irqsafe, ensures that we return -EAGAIN for finite short period of time.
 *
 * On successful return, >= 0, irq is disabled and the caller is
 * responsible for releasing it using local_irq_restore(*@irq_flags).
 *
 * This function is safe to call from any context including IRQ handler.
 */
static int try_to_grab_pending(struct work_struct *work, u32 cflags,
                               unsigned long *irq_flags)
{
        struct worker_pool *pool;
        struct pool_workqueue *pwq;

        local_irq_save(*irq_flags);

        /* try to steal the timer if it exists */
        if (cflags & WORK_CANCEL_DELAYED) {
                struct delayed_work *dwork = to_delayed_work(work);

                /*
                 * dwork->timer is irqsafe.  If del_timer() fails, it's
                 * guaranteed that the timer is not queued anywhere and not
                 * running on the local CPU.
                 */
                if (likely(del_timer(&dwork->timer)))
                        return 1;
        }

        /* try to claim PENDING the normal way */
        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
                return 0;

        rcu_read_lock();
        /*
         * The queueing is in progress, or it is already queued. Try to
         * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
         */
        pool = get_work_pool(work);
        if (!pool)
                goto fail;

        raw_spin_lock(&pool->lock);
        /*
         * work->data is guaranteed to point to pwq only while the work
         * item is queued on pwq->wq, and both updating work->data to point
         * to pwq on queueing and to pool on dequeueing are done under
         * pwq->pool->lock.  This in turn guarantees that, if work->data
         * points to pwq which is associated with a locked pool, the work
         * item is currently queued on that pool.
         */
        pwq = get_work_pwq(work);
        if (pwq && pwq->pool == pool) {
                unsigned long work_data;

                debug_work_deactivate(work);

                /*
                 * A cancelable inactive work item must be in the
                 * pwq->inactive_works since a queued barrier can't be
                 * canceled (see the comments in insert_wq_barrier()).
                 *
                 * An inactive work item cannot be grabbed directly because
                 * it might have linked barrier work items which, if left
                 * on the inactive_works list, will confuse pwq->nr_active
                 * management later on and cause stall.  Make sure the work
                 * item is activated before grabbing.
                 */
                pwq_activate_work(pwq, work);

                list_del_init(&work->entry);

                /*
                 * work->data points to pwq iff queued. Let's point to pool. As
                 * this destroys work->data needed by the next step, stash it.
                 */
                work_data = *work_data_bits(work);
                set_work_pool_and_keep_pending(work, pool->id, 0);

                /* must be the last step, see the function comment */
                pwq_dec_nr_in_flight(pwq, work_data);

                raw_spin_unlock(&pool->lock);
                rcu_read_unlock();
                return 1;
        }
        raw_spin_unlock(&pool->lock);
fail:
        rcu_read_unlock();
        local_irq_restore(*irq_flags);
        if (work_is_canceling(work))
                return -ENOENT;
        cpu_relax();
        return -EAGAIN;
}

struct cwt_wait {
        wait_queue_entry_t        wait;
        struct work_struct        *work;
};

static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
        struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);

        if (cwait->work != key)
                return 0;
        return autoremove_wake_function(wait, mode, sync, key);
}

/**
 * work_grab_pending - steal work item from worklist and disable irq
 * @work: work item to steal
 * @cflags: %WORK_CANCEL_ flags
 * @irq_flags: place to store IRQ state
 *
 * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
 * or on worklist.
 *
 * Must be called in process context. IRQ is disabled on return with IRQ state
 * stored in *@irq_flags. The caller is responsible for re-enabling it using
 * local_irq_restore().
 *
 * Returns %true if @work was pending. %false if idle.
 */
static bool work_grab_pending(struct work_struct *work, u32 cflags,
                              unsigned long *irq_flags)
{
        struct cwt_wait cwait;
        int ret;

        might_sleep();
repeat:
        ret = try_to_grab_pending(work, cflags, irq_flags);
        if (likely(ret >= 0))
                return ret;
        if (ret != -ENOENT)
                goto repeat;

        /*
         * Someone is already canceling. Wait for it to finish. flush_work()
         * doesn't work for PREEMPT_NONE because we may get woken up between
         * @work's completion and the other canceling task resuming and clearing
         * CANCELING - flush_work() will return false immediately as @work is no
         * longer busy, try_to_grab_pending() will return -ENOENT as @work is
         * still being canceled and the other canceling task won't be able to
         * clear CANCELING as we're hogging the CPU.
         *
         * Let's wait for completion using a waitqueue. As this may lead to the
         * thundering herd problem, use a custom wake function which matches
         * @work along with exclusive wait and wakeup.
         */
        init_wait(&cwait.wait);
        cwait.wait.func = cwt_wakefn;
        cwait.work = work;

        prepare_to_wait_exclusive(&wq_cancel_waitq, &cwait.wait,
                                  TASK_UNINTERRUPTIBLE);
        if (work_is_canceling(work))
                schedule();
        finish_wait(&wq_cancel_waitq, &cwait.wait);

        goto repeat;
}

/**
 * insert_work - insert a work into a pool
 * @pwq: pwq @work belongs to
 * @work: work to insert
 * @head: insertion point
 * @extra_flags: extra WORK_STRUCT_* flags to set
 *
 * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
 * work_struct flags.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
                        struct list_head *head, unsigned int extra_flags)
{
        debug_work_activate(work);

        /* record the work call stack in order to print it in KASAN reports */
        kasan_record_aux_stack_noalloc(work);

        /* we own @work, set data and link */
        set_work_pwq(work, pwq, extra_flags);
        list_add_tail(&work->entry, head);
        get_pwq(pwq);
}

/*
 * Test whether @work is being queued from another work executing on the
 * same workqueue.
 */
static bool is_chained_work(struct workqueue_struct *wq)
{
        struct worker *worker;

        worker = current_wq_worker();
        /*
         * Return %true iff I'm a worker executing a work item on @wq.  If
         * I'm @worker, it's safe to dereference it without locking.
         */
        return worker && worker->current_pwq->wq == wq;
}

/*
 * When queueing an unbound work item to a wq, prefer local CPU if allowed
 * by wq_unbound_cpumask.  Otherwise, round robin among the allowed ones to
 * avoid perturbing sensitive tasks.
 */
static int wq_select_unbound_cpu(int cpu)
{
        int new_cpu;

        if (likely(!wq_debug_force_rr_cpu)) {
                if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
                        return cpu;
        } else {
                pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
        }

        new_cpu = __this_cpu_read(wq_rr_cpu_last);
        new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
        if (unlikely(new_cpu >= nr_cpu_ids)) {
                new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
                if (unlikely(new_cpu >= nr_cpu_ids))
                        return cpu;
        }
        __this_cpu_write(wq_rr_cpu_last, new_cpu);

        return new_cpu;
}

static void __queue_work(int cpu, struct workqueue_struct *wq,
                         struct work_struct *work)
{
        struct pool_workqueue *pwq;
        struct worker_pool *last_pool, *pool;
        unsigned int work_flags;
        unsigned int req_cpu = cpu;

        /*
         * While a work item is PENDING && off queue, a task trying to
         * steal the PENDING will busy-loop waiting for it to either get
         * queued or lose PENDING.  Grabbing PENDING and queueing should
         * happen with IRQ disabled.
         */
        lockdep_assert_irqs_disabled();

        /*
         * For a draining wq, only works from the same workqueue are
         * allowed. The __WQ_DESTROYING helps to spot the issue that
         * queues a new work item to a wq after destroy_workqueue(wq).
         */
        if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) &&
                     WARN_ON_ONCE(!is_chained_work(wq))))
                return;
        rcu_read_lock();
retry:
        /* pwq which will be used unless @work is executing elsewhere */
        if (req_cpu == WORK_CPU_UNBOUND) {
                if (wq->flags & WQ_UNBOUND)
                        cpu = wq_select_unbound_cpu(raw_smp_processor_id());
                else
                        cpu = raw_smp_processor_id();
        }

        pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu));
        pool = pwq->pool;

        /*
         * If @work was previously on a different pool, it might still be
         * running there, in which case the work needs to be queued on that
         * pool to guarantee non-reentrancy.
         */
        last_pool = get_work_pool(work);
        if (last_pool && last_pool != pool) {
                struct worker *worker;

                raw_spin_lock(&last_pool->lock);

                worker = find_worker_executing_work(last_pool, work);

                if (worker && worker->current_pwq->wq == wq) {
                        pwq = worker->current_pwq;
                        pool = pwq->pool;
                        WARN_ON_ONCE(pool != last_pool);
                } else {
                        /* meh... not running there, queue here */
                        raw_spin_unlock(&last_pool->lock);
                        raw_spin_lock(&pool->lock);
                }
        } else {
                raw_spin_lock(&pool->lock);
        }

        /*
         * pwq is determined and locked. For unbound pools, we could have raced
         * with pwq release and it could already be dead. If its refcnt is zero,
         * repeat pwq selection. Note that unbound pwqs never die without
         * another pwq replacing it in cpu_pwq or while work items are executing
         * on it, so the retrying is guaranteed to make forward-progress.
         */
        if (unlikely(!pwq->refcnt)) {
                if (wq->flags & WQ_UNBOUND) {
                        raw_spin_unlock(&pool->lock);
                        cpu_relax();
                        goto retry;
                }
                /* oops */
                WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
                          wq->name, cpu);
        }

        /* pwq determined, queue */
        trace_workqueue_queue_work(req_cpu, pwq, work);

        if (WARN_ON(!list_empty(&work->entry)))
                goto out;

        pwq->nr_in_flight[pwq->work_color]++;
        work_flags = work_color_to_flags(pwq->work_color);

        /*
         * Limit the number of concurrently active work items to max_active.
         * @work must also queue behind existing inactive work items to maintain
         * ordering when max_active changes. See wq_adjust_max_active().
         */
        if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
                if (list_empty(&pool->worklist))
                        pool->watchdog_ts = jiffies;

                trace_workqueue_activate_work(work);
                insert_work(pwq, work, &pool->worklist, work_flags);
                kick_pool(pool);
        } else {
                work_flags |= WORK_STRUCT_INACTIVE;
                insert_work(pwq, work, &pwq->inactive_works, work_flags);
        }

out:
        raw_spin_unlock(&pool->lock);
        rcu_read_unlock();
}

/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.  Callers that fail to ensure that the specified
 * CPU cannot go away will execute on a randomly chosen CPU.
 * But note well that callers specifying a CPU that never has been
 * online will get a splat.
 *
 * Return: %false if @work was already on a queue, %true otherwise.
 */
bool queue_work_on(int cpu, struct workqueue_struct *wq,
                   struct work_struct *work)
{
        bool ret = false;
        unsigned long irq_flags;

        local_irq_save(irq_flags);

        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
                __queue_work(cpu, wq, work);
                ret = true;
        }

        local_irq_restore(irq_flags);
        return ret;
}
EXPORT_SYMBOL(queue_work_on);

/**
 * select_numa_node_cpu - Select a CPU based on NUMA node
 * @node: NUMA node ID that we want to select a CPU from
 *
 * This function will attempt to find a "random" cpu available on a given
 * node. If there are no CPUs available on the given node it will return
 * WORK_CPU_UNBOUND indicating that we should just schedule to any
 * available CPU if we need to schedule this work.
 */
static int select_numa_node_cpu(int node)
{
        int cpu;

        /* Delay binding to CPU if node is not valid or online */
        if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
                return WORK_CPU_UNBOUND;

        /* Use local node/cpu if we are already there */
        cpu = raw_smp_processor_id();
        if (node == cpu_to_node(cpu))
                return cpu;

        /* Use "random" otherwise know as "first" online CPU of node */
        cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);

        /* If CPU is valid return that, otherwise just defer */
        return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
}

/**
 * queue_work_node - queue work on a "random" cpu for a given NUMA node
 * @node: NUMA node that we are targeting the work for
 * @wq: workqueue to use
 * @work: work to queue
 *
 * We queue the work to a "random" CPU within a given NUMA node. The basic
 * idea here is to provide a way to somehow associate work with a given
 * NUMA node.
 *
 * This function will only make a best effort attempt at getting this onto
 * the right NUMA node. If no node is requested or the requested node is
 * offline then we just fall back to standard queue_work behavior.
 *
 * Currently the "random" CPU ends up being the first available CPU in the
 * intersection of cpu_online_mask and the cpumask of the node, unless we
 * are running on the node. In that case we just use the current CPU.
 *
 * Return: %false if @work was already on a queue, %true otherwise.
 */
bool queue_work_node(int node, struct workqueue_struct *wq,
                     struct work_struct *work)
{
        unsigned long irq_flags;
        bool ret = false;

        /*
         * This current implementation is specific to unbound workqueues.
         * Specifically we only return the first available CPU for a given
         * node instead of cycling through individual CPUs within the node.
         *
         * If this is used with a per-cpu workqueue then the logic in
         * workqueue_select_cpu_near would need to be updated to allow for
         * some round robin type logic.
         */
        WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));

        local_irq_save(irq_flags);

        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
                int cpu = select_numa_node_cpu(node);

                __queue_work(cpu, wq, work);
                ret = true;
        }

        local_irq_restore(irq_flags);
        return ret;
}
EXPORT_SYMBOL_GPL(queue_work_node);

void delayed_work_timer_fn(struct timer_list *t)
{
        struct delayed_work *dwork = from_timer(dwork, t, timer);

        /* should have been called from irqsafe timer with irq already off */
        __queue_work(dwork->cpu, dwork->wq, &dwork->work);
}
EXPORT_SYMBOL(delayed_work_timer_fn);

static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
                                struct delayed_work *dwork, unsigned long delay)
{
        struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;

        WARN_ON_ONCE(!wq);
        WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
        WARN_ON_ONCE(timer_pending(timer));
        WARN_ON_ONCE(!list_empty(&work->entry));

        /*
         * If @delay is 0, queue @dwork->work immediately.  This is for
         * both optimization and correctness.  The earliest @timer can
         * expire is on the closest next tick and delayed_work users depend
         * on that there's no such delay when @delay is 0.
         */
        if (!delay) {
                __queue_work(cpu, wq, &dwork->work);
                return;
        }

        dwork->wq = wq;
        dwork->cpu = cpu;
        timer->expires = jiffies + delay;

        if (housekeeping_enabled(HK_TYPE_TIMER)) {
                /* If the current cpu is a housekeeping cpu, use it. */
                cpu = smp_processor_id();
                if (!housekeeping_test_cpu(cpu, HK_TYPE_TIMER))
                        cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
                add_timer_on(timer, cpu);
        } else {
                if (likely(cpu == WORK_CPU_UNBOUND))
                        add_timer_global(timer);
                else
                        add_timer_on(timer, cpu);
        }
}

/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @dwork: work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Return: %false if @work was already on a queue, %true otherwise.  If
 * @delay is zero and @dwork is idle, it will be scheduled for immediate
 * execution.
 */
bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
                           struct delayed_work *dwork, unsigned long delay)
{
        struct work_struct *work = &dwork->work;
        bool ret = false;
        unsigned long irq_flags;

        /* read the comment in __queue_work() */
        local_irq_save(irq_flags);

        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
                __queue_delayed_work(cpu, wq, dwork, delay);
                ret = true;
        }

        local_irq_restore(irq_flags);
        return ret;
}
EXPORT_SYMBOL(queue_delayed_work_on);

/**
 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @dwork: work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
 * modify @dwork's timer so that it expires after @delay.  If @delay is
 * zero, @work is guaranteed to be scheduled immediately regardless of its
 * current state.
 *
 * Return: %false if @dwork was idle and queued, %true if @dwork was
 * pending and its timer was modified.
 *
 * This function is safe to call from any context including IRQ handler.
 * See try_to_grab_pending() for details.
 */
bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
                         struct delayed_work *dwork, unsigned long delay)
{
        unsigned long irq_flags;
        int ret;

        do {
                ret = try_to_grab_pending(&dwork->work, WORK_CANCEL_DELAYED,
                                          &irq_flags);
        } while (unlikely(ret == -EAGAIN));

        if (likely(ret >= 0)) {
                __queue_delayed_work(cpu, wq, dwork, delay);
                local_irq_restore(irq_flags);
        }

        /* -ENOENT from try_to_grab_pending() becomes %true */
        return ret;
}
EXPORT_SYMBOL_GPL(mod_delayed_work_on);

static void rcu_work_rcufn(struct rcu_head *rcu)
{
        struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);

        /* read the comment in __queue_work() */
        local_irq_disable();
        __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
        local_irq_enable();
}

/**
 * queue_rcu_work - queue work after a RCU grace period
 * @wq: workqueue to use
 * @rwork: work to queue
 *
 * Return: %false if @rwork was already pending, %true otherwise.  Note
 * that a full RCU grace period is guaranteed only after a %true return.
 * While @rwork is guaranteed to be executed after a %false return, the
 * execution may happen before a full RCU grace period has passed.
 */
bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
{
        struct work_struct *work = &rwork->work;

        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
                rwork->wq = wq;
                call_rcu_hurry(&rwork->rcu, rcu_work_rcufn);
                return true;
        }

        return false;
}
EXPORT_SYMBOL(queue_rcu_work);

static struct worker *alloc_worker(int node)
{
        struct worker *worker;

        worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
        if (worker) {
                INIT_LIST_HEAD(&worker->entry);
                INIT_LIST_HEAD(&worker->scheduled);
                INIT_LIST_HEAD(&worker->node);
                /* on creation a worker is in !idle && prep state */
                worker->flags = WORKER_PREP;
        }
        return worker;
}

static cpumask_t *pool_allowed_cpus(struct worker_pool *pool)
{
        if (pool->cpu < 0 && pool->attrs->affn_strict)
                return pool->attrs->__pod_cpumask;
        else
                return pool->attrs->cpumask;
}

/**
 * worker_attach_to_pool() - attach a worker to a pool
 * @worker: worker to be attached
 * @pool: the target pool
 *
 * Attach @worker to @pool.  Once attached, the %WORKER_UNBOUND flag and
 * cpu-binding of @worker are kept coordinated with the pool across
 * cpu-[un]hotplugs.
 */
static void worker_attach_to_pool(struct worker *worker,
                                  struct worker_pool *pool)
{
        mutex_lock(&wq_pool_attach_mutex);

        /*
         * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains stable
         * across this function. See the comments above the flag definition for
         * details. BH workers are, while per-CPU, always DISASSOCIATED.
         */
        if (pool->flags & POOL_DISASSOCIATED) {
                worker->flags |= WORKER_UNBOUND;
        } else {
                WARN_ON_ONCE(pool->flags & POOL_BH);
                kthread_set_per_cpu(worker->task, pool->cpu);
        }

        if (worker->rescue_wq)
                set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool));

        list_add_tail(&worker->node, &pool->workers);
        worker->pool = pool;

        mutex_unlock(&wq_pool_attach_mutex);
}

/**
 * worker_detach_from_pool() - detach a worker from its pool
 * @worker: worker which is attached to its pool
 *
 * Undo the attaching which had been done in worker_attach_to_pool().  The
 * caller worker shouldn't access to the pool after detached except it has
 * other reference to the pool.
 */
static void worker_detach_from_pool(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;
        struct completion *detach_completion = NULL;

        /* there is one permanent BH worker per CPU which should never detach */
        WARN_ON_ONCE(pool->flags & POOL_BH);

        mutex_lock(&wq_pool_attach_mutex);

        kthread_set_per_cpu(worker->task, -1);
        list_del(&worker->node);
        worker->pool = NULL;

        if (list_empty(&pool->workers) && list_empty(&pool->dying_workers))
                detach_completion = pool->detach_completion;
        mutex_unlock(&wq_pool_attach_mutex);

        /* clear leftover flags without pool->lock after it is detached */
        worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);

        if (detach_completion)
                complete(detach_completion);
}

/**
 * create_worker - create a new workqueue worker
 * @pool: pool the new worker will belong to
 *
 * Create and start a new worker which is attached to @pool.
 *
 * CONTEXT:
 * Might sleep.  Does GFP_KERNEL allocations.
 *
 * Return:
 * Pointer to the newly created worker.
 */
static struct worker *create_worker(struct worker_pool *pool)
{
        struct worker *worker;
        int id;
        char id_buf[23];

        /* ID is needed to determine kthread name */
        id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
        if (id < 0) {
                pr_err_once("workqueue: Failed to allocate a worker ID: %pe\n",
                            ERR_PTR(id));
                return NULL;
        }

        worker = alloc_worker(pool->node);
        if (!worker) {
                pr_err_once("workqueue: Failed to allocate a worker\n");
                goto fail;
        }

        worker->id = id;

        if (!(pool->flags & POOL_BH)) {
                if (pool->cpu >= 0)
                        snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
                                 pool->attrs->nice < 0  ? "H" : "");
                else
                        snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);

                worker->task = kthread_create_on_node(worker_thread, worker,
                                        pool->node, "kworker/%s", id_buf);
                if (IS_ERR(worker->task)) {
                        if (PTR_ERR(worker->task) == -EINTR) {
                                pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
                                       id_buf);
                        } else {
                                pr_err_once("workqueue: Failed to create a worker thread: %pe",
                                            worker->task);
                        }
                        goto fail;
                }

                set_user_nice(worker->task, pool->attrs->nice);
                kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
        }

        /* successful, attach the worker to the pool */
        worker_attach_to_pool(worker, pool);

        /* start the newly created worker */
        raw_spin_lock_irq(&pool->lock);

        worker->pool->nr_workers++;
        worker_enter_idle(worker);

        /*
         * @worker is waiting on a completion in kthread() and will trigger hung
         * check if not woken up soon. As kick_pool() is noop if @pool is empty,
         * wake it up explicitly.
         */
        if (worker->task)
                wake_up_process(worker->task);

        raw_spin_unlock_irq(&pool->lock);

        return worker;

fail:
        ida_free(&pool->worker_ida, id);
        kfree(worker);
        return NULL;
}

static void unbind_worker(struct worker *worker)
{
        lockdep_assert_held(&wq_pool_attach_mutex);

        kthread_set_per_cpu(worker->task, -1);
        if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
        else
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
}

static void wake_dying_workers(struct list_head *cull_list)
{
        struct worker *worker, *tmp;

        list_for_each_entry_safe(worker, tmp, cull_list, entry) {
                list_del_init(&worker->entry);
                unbind_worker(worker);
                /*
                 * If the worker was somehow already running, then it had to be
                 * in pool->idle_list when set_worker_dying() happened or we
                 * wouldn't have gotten here.
                 *
                 * Thus, the worker must either have observed the WORKER_DIE
                 * flag, or have set its state to TASK_IDLE. Either way, the
                 * below will be observed by the worker and is safe to do
                 * outside of pool->lock.
                 */
                wake_up_process(worker->task);
        }
}

/**
 * set_worker_dying - Tag a worker for destruction
 * @worker: worker to be destroyed
 * @list: transfer worker away from its pool->idle_list and into list
 *
 * Tag @worker for destruction and adjust @pool stats accordingly.  The worker
 * should be idle.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void set_worker_dying(struct worker *worker, struct list_head *list)
{
        struct worker_pool *pool = worker->pool;

        lockdep_assert_held(&pool->lock);
        lockdep_assert_held(&wq_pool_attach_mutex);

        /* sanity check frenzy */
        if (WARN_ON(worker->current_work) ||
            WARN_ON(!list_empty(&worker->scheduled)) ||
            WARN_ON(!(worker->flags & WORKER_IDLE)))
                return;

        pool->nr_workers--;
        pool->nr_idle--;

        worker->flags |= WORKER_DIE;

        list_move(&worker->entry, list);
        list_move(&worker->node, &pool->dying_workers);
}

/**
 * idle_worker_timeout - check if some idle workers can now be deleted.
 * @t: The pool's idle_timer that just expired
 *
 * The timer is armed in worker_enter_idle(). Note that it isn't disarmed in
 * worker_leave_idle(), as a worker flicking between idle and active while its
 * pool is at the too_many_workers() tipping point would cause too much timer
 * housekeeping overhead. Since IDLE_WORKER_TIMEOUT is long enough, we just let
 * it expire and re-evaluate things from there.
 */
static void idle_worker_timeout(struct timer_list *t)
{
        struct worker_pool *pool = from_timer(pool, t, idle_timer);
        bool do_cull = false;

        if (work_pending(&pool->idle_cull_work))
                return;

        raw_spin_lock_irq(&pool->lock);

        if (too_many_workers(pool)) {
                struct worker *worker;
                unsigned long expires;

                /* idle_list is kept in LIFO order, check the last one */
                worker = list_entry(pool->idle_list.prev, struct worker, entry);
                expires = worker->last_active + IDLE_WORKER_TIMEOUT;
                do_cull = !time_before(jiffies, expires);

                if (!do_cull)
                        mod_timer(&pool->idle_timer, expires);
        }
        raw_spin_unlock_irq(&pool->lock);

        if (do_cull)
                queue_work(system_unbound_wq, &pool->idle_cull_work);
}

/**
 * idle_cull_fn - cull workers that have been idle for too long.
 * @work: the pool's work for handling these idle workers
 *
 * This goes through a pool's idle workers and gets rid of those that have been
 * idle for at least IDLE_WORKER_TIMEOUT seconds.
 *
 * We don't want to disturb isolated CPUs because of a pcpu kworker being
 * culled, so this also resets worker affinity. This requires a sleepable
 * context, hence the split between timer callback and work item.
 */
static void idle_cull_fn(struct work_struct *work)
{
        struct worker_pool *pool = container_of(work, struct worker_pool, idle_cull_work);
        LIST_HEAD(cull_list);

        /*
         * Grabbing wq_pool_attach_mutex here ensures an already-running worker
         * cannot proceed beyong worker_detach_from_pool() in its self-destruct
         * path. This is required as a previously-preempted worker could run after
         * set_worker_dying() has happened but before wake_dying_workers() did.
         */
        mutex_lock(&wq_pool_attach_mutex);
        raw_spin_lock_irq(&pool->lock);

        while (too_many_workers(pool)) {
                struct worker *worker;
                unsigned long expires;

                worker = list_entry(pool->idle_list.prev, struct worker, entry);
                expires = worker->last_active + IDLE_WORKER_TIMEOUT;

                if (time_before(jiffies, expires)) {
                        mod_timer(&pool->idle_timer, expires);
                        break;
                }

                set_worker_dying(worker, &cull_list);
        }

        raw_spin_unlock_irq(&pool->lock);
        wake_dying_workers(&cull_list);
        mutex_unlock(&wq_pool_attach_mutex);
}

static void send_mayday(struct work_struct *work)
{
        struct pool_workqueue *pwq = get_work_pwq(work);
        struct workqueue_struct *wq = pwq->wq;

        lockdep_assert_held(&wq_mayday_lock);

        if (!wq->rescuer)
                return;

        /* mayday mayday mayday */
        if (list_empty(&pwq->mayday_node)) {
                /*
                 * If @pwq is for an unbound wq, its base ref may be put at
                 * any time due to an attribute change.  Pin @pwq until the
                 * rescuer is done with it.
                 */
                get_pwq(pwq);
                list_add_tail(&pwq->mayday_node, &wq->maydays);
                wake_up_process(wq->rescuer->task);
                pwq->stats[PWQ_STAT_MAYDAY]++;
        }
}

static void pool_mayday_timeout(struct timer_list *t)
{
        struct worker_pool *pool = from_timer(pool, t, mayday_timer);
        struct work_struct *work;

        raw_spin_lock_irq(&pool->lock);
        raw_spin_lock(&wq_mayday_lock);                /* for wq->maydays */

        if (need_to_create_worker(pool)) {
                /*
                 * We've been trying to create a new worker but
                 * haven't been successful.  We might be hitting an
                 * allocation deadlock.  Send distress signals to
                 * rescuers.
                 */
                list_for_each_entry(work, &pool->worklist, entry)
                        send_mayday(work);
        }

        raw_spin_unlock(&wq_mayday_lock);
        raw_spin_unlock_irq(&pool->lock);

        mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
}

/**
 * maybe_create_worker - create a new worker if necessary
 * @pool: pool to create a new worker for
 *
 * Create a new worker for @pool if necessary.  @pool is guaranteed to
 * have at least one idle worker on return from this function.  If
 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
 * sent to all rescuers with works scheduled on @pool to resolve
 * possible allocation deadlock.
 *
 * On return, need_to_create_worker() is guaranteed to be %false and
 * may_start_working() %true.
 *
 * LOCKING:
 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
 * multiple times.  Does GFP_KERNEL allocations.  Called only from
 * manager.
 */
static void maybe_create_worker(struct worker_pool *pool)
__releases(&pool->lock)
__acquires(&pool->lock)
{
restart:
        raw_spin_unlock_irq(&pool->lock);

        /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
        mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);

        while (true) {
                if (create_worker(pool) || !need_to_create_worker(pool))
                        break;

                schedule_timeout_interruptible(CREATE_COOLDOWN);

                if (!need_to_create_worker(pool))
                        break;
        }

        del_timer_sync(&pool->mayday_timer);
        raw_spin_lock_irq(&pool->lock);
        /*
         * This is necessary even after a new worker was just successfully
         * created as @pool->lock was dropped and the new worker might have
         * already become busy.
         */
        if (need_to_create_worker(pool))
                goto restart;
}

/**
 * manage_workers - manage worker pool
 * @worker: self
 *
 * Assume the manager role and manage the worker pool @worker belongs
 * to.  At any given time, there can be only zero or one manager per
 * pool.  The exclusion is handled automatically by this function.
 *
 * The caller can safely start processing works on false return.  On
 * true return, it's guaranteed that need_to_create_worker() is false
 * and may_start_working() is true.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
 * multiple times.  Does GFP_KERNEL allocations.
 *
 * Return:
 * %false if the pool doesn't need management and the caller can safely
 * start processing works, %true if management function was performed and
 * the conditions that the caller verified before calling the function may
 * no longer be true.
 */
static bool manage_workers(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;

        if (pool->flags & POOL_MANAGER_ACTIVE)
                return false;

        pool->flags |= POOL_MANAGER_ACTIVE;
        pool->manager = worker;

        maybe_create_worker(pool);

        pool->manager = NULL;
        pool->flags &= ~POOL_MANAGER_ACTIVE;
        rcuwait_wake_up(&manager_wait);
        return true;
}

/**
 * process_one_work - process single work
 * @worker: self
 * @work: work to process
 *
 * Process @work.  This function contains all the logics necessary to
 * process a single work including synchronization against and
 * interaction with other workers on the same cpu, queueing and
 * flushing.  As long as context requirement is met, any worker can
 * call this function to process a work.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
 */
static void process_one_work(struct worker *worker, struct work_struct *work)
__releases(&pool->lock)
__acquires(&pool->lock)
{
        struct pool_workqueue *pwq = get_work_pwq(work);
        struct worker_pool *pool = worker->pool;
        unsigned long work_data;
        int lockdep_start_depth, rcu_start_depth;
        bool bh_draining = pool->flags & POOL_BH_DRAINING;
#ifdef CONFIG_LOCKDEP
        /*
         * It is permissible to free the struct work_struct from
         * inside the function that is called from it, this we need to
         * take into account for lockdep too.  To avoid bogus "held
         * lock freed" warnings as well as problems when looking into
         * work->lockdep_map, make a copy and use that here.
         */
        struct lockdep_map lockdep_map;

        lockdep_copy_map(&lockdep_map, &work->lockdep_map);
#endif
        /* ensure we're on the correct CPU */
        WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
                     raw_smp_processor_id() != pool->cpu);

        /* claim and dequeue */
        debug_work_deactivate(work);
        hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
        worker->current_work = work;
        worker->current_func = work->func;
        worker->current_pwq = pwq;
        if (worker->task)
                worker->current_at = worker->task->se.sum_exec_runtime;
        work_data = *work_data_bits(work);
        worker->current_color = get_work_color(work_data);

        /*
         * Record wq name for cmdline and debug reporting, may get
         * overridden through set_worker_desc().
         */
        strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);

        list_del_init(&work->entry);

        /*
         * CPU intensive works don't participate in concurrency management.
         * They're the scheduler's responsibility.  This takes @worker out
         * of concurrency management and the next code block will chain
         * execution of the pending work items.
         */
        if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE))
                worker_set_flags(worker, WORKER_CPU_INTENSIVE);

        /*
         * Kick @pool if necessary. It's always noop for per-cpu worker pools
         * since nr_running would always be >= 1 at this point. This is used to
         * chain execution of the pending work items for WORKER_NOT_RUNNING
         * workers such as the UNBOUND and CPU_INTENSIVE ones.
         */
        kick_pool(pool);

        /*
         * Record the last pool and clear PENDING which should be the last
         * update to @work.  Also, do this inside @pool->lock so that
         * PENDING and queued state changes happen together while IRQ is
         * disabled.
         */
        set_work_pool_and_clear_pending(work, pool->id, 0);

        pwq->stats[PWQ_STAT_STARTED]++;
        raw_spin_unlock_irq(&pool->lock);

        rcu_start_depth = rcu_preempt_depth();
        lockdep_start_depth = lockdep_depth(current);
        /* see drain_dead_softirq_workfn() */
        if (!bh_draining)
                lock_map_acquire(&pwq->wq->lockdep_map);
        lock_map_acquire(&lockdep_map);
        /*
         * Strictly speaking we should mark the invariant state without holding
         * any locks, that is, before these two lock_map_acquire()'s.
         *
         * However, that would result in:
         *
         *   A(W1)
         *   WFC(C)
         *                A(W1)
         *                C(C)
         *
         * Which would create W1->C->W1 dependencies, even though there is no
         * actual deadlock possible. There are two solutions, using a
         * read-recursive acquire on the work(queue) 'locks', but this will then
         * hit the lockdep limitation on recursive locks, or simply discard
         * these locks.
         *
         * AFAICT there is no possible deadlock scenario between the
         * flush_work() and complete() primitives (except for single-threaded
         * workqueues), so hiding them isn't a problem.
         */
        lockdep_invariant_state(true);
        trace_workqueue_execute_start(work);
        worker->current_func(work);
        /*
         * While we must be careful to not use "work" after this, the trace
         * point will only record its address.
         */
        trace_workqueue_execute_end(work, worker->current_func);
        pwq->stats[PWQ_STAT_COMPLETED]++;
        lock_map_release(&lockdep_map);
        if (!bh_draining)
                lock_map_release(&pwq->wq->lockdep_map);

        if (unlikely((worker->task && in_atomic()) ||
                     lockdep_depth(current) != lockdep_start_depth ||
                     rcu_preempt_depth() != rcu_start_depth)) {
                pr_err("BUG: workqueue leaked atomic, lock or RCU: %s[%d]\n"
                       "     preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n",
                       current->comm, task_pid_nr(current), preempt_count(),
                       lockdep_start_depth, lockdep_depth(current),
                       rcu_start_depth, rcu_preempt_depth(),
                       worker->current_func);
                debug_show_held_locks(current);
                dump_stack();
        }

        /*
         * The following prevents a kworker from hogging CPU on !PREEMPTION
         * kernels, where a requeueing work item waiting for something to
         * happen could deadlock with stop_machine as such work item could
         * indefinitely requeue itself while all other CPUs are trapped in
         * stop_machine. At the same time, report a quiescent RCU state so
         * the same condition doesn't freeze RCU.
         */
        if (worker->task)
                cond_resched();

        raw_spin_lock_irq(&pool->lock);

        /*
         * In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
         * CPU intensive by wq_worker_tick() if @work hogged CPU longer than
         * wq_cpu_intensive_thresh_us. Clear it.
         */
        worker_clr_flags(worker, WORKER_CPU_INTENSIVE);

        /* tag the worker for identification in schedule() */
        worker->last_func = worker->current_func;

        /* we're done with it, release */
        hash_del(&worker->hentry);
        worker->current_work = NULL;
        worker->current_func = NULL;
        worker->current_pwq = NULL;
        worker->current_color = INT_MAX;

        /* must be the last step, see the function comment */
        pwq_dec_nr_in_flight(pwq, work_data);
}

/**
 * process_scheduled_works - process scheduled works
 * @worker: self
 *
 * Process all scheduled works.  Please note that the scheduled list
 * may change while processing a work, so this function repeatedly
 * fetches a work from the top and executes it.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
 * multiple times.
 */
static void process_scheduled_works(struct worker *worker)
{
        struct work_struct *work;
        bool first = true;

        while ((work = list_first_entry_or_null(&worker->scheduled,
                                                struct work_struct, entry))) {
                if (first) {
                        worker->pool->watchdog_ts = jiffies;
                        first = false;
                }
                process_one_work(worker, work);
        }
}

static void set_pf_worker(bool val)
{
        mutex_lock(&wq_pool_attach_mutex);
        if (val)
                current->flags |= PF_WQ_WORKER;
        else
                current->flags &= ~PF_WQ_WORKER;
        mutex_unlock(&wq_pool_attach_mutex);
}

/**
 * worker_thread - the worker thread function
 * @__worker: self
 *
 * The worker thread function.  All workers belong to a worker_pool -
 * either a per-cpu one or dynamic unbound one.  These workers process all
 * work items regardless of their specific target workqueue.  The only
 * exception is work items which belong to workqueues with a rescuer which
 * will be explained in rescuer_thread().
 *
 * Return: 0
 */
static int worker_thread(void *__worker)
{
        struct worker *worker = __worker;
        struct worker_pool *pool = worker->pool;

        /* tell the scheduler that this is a workqueue worker */
        set_pf_worker(true);
woke_up:
        raw_spin_lock_irq(&pool->lock);

        /* am I supposed to die? */
        if (unlikely(worker->flags & WORKER_DIE)) {
                raw_spin_unlock_irq(&pool->lock);
                set_pf_worker(false);

                set_task_comm(worker->task, "kworker/dying");
                ida_free(&pool->worker_ida, worker->id);
                worker_detach_from_pool(worker);
                WARN_ON_ONCE(!list_empty(&worker->entry));
                kfree(worker);
                return 0;
        }

        worker_leave_idle(worker);
recheck:
        /* no more worker necessary? */
        if (!need_more_worker(pool))
                goto sleep;

        /* do we need to manage? */
        if (unlikely(!may_start_working(pool)) && manage_workers(worker))
                goto recheck;

        /*
         * ->scheduled list can only be filled while a worker is
         * preparing to process a work or actually processing it.
         * Make sure nobody diddled with it while I was sleeping.
         */
        WARN_ON_ONCE(!list_empty(&worker->scheduled));

        /*
         * Finish PREP stage.  We're guaranteed to have at least one idle
         * worker or that someone else has already assumed the manager
         * role.  This is where @worker starts participating in concurrency
         * management if applicable and concurrency management is restored
         * after being rebound.  See rebind_workers() for details.
         */
        worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);

        do {
                struct work_struct *work =
                        list_first_entry(&pool->worklist,
                                         struct work_struct, entry);

                if (assign_work(work, worker, NULL))
                        process_scheduled_works(worker);
        } while (keep_working(pool));

        worker_set_flags(worker, WORKER_PREP);
sleep:
        /*
         * pool->lock is held and there's no work to process and no need to
         * manage, sleep.  Workers are woken up only while holding
         * pool->lock or from local cpu, so setting the current state
         * before releasing pool->lock is enough to prevent losing any
         * event.
         */
        worker_enter_idle(worker);
        __set_current_state(TASK_IDLE);
        raw_spin_unlock_irq(&pool->lock);
        schedule();
        goto woke_up;
}

/**
 * rescuer_thread - the rescuer thread function
 * @__rescuer: self
 *
 * Workqueue rescuer thread function.  There's one rescuer for each
 * workqueue which has WQ_MEM_RECLAIM set.
 *
 * Regular work processing on a pool may block trying to create a new
 * worker which uses GFP_KERNEL allocation which has slight chance of
 * developing into deadlock if some works currently on the same queue
 * need to be processed to satisfy the GFP_KERNEL allocation.  This is
 * the problem rescuer solves.
 *
 * When such condition is possible, the pool summons rescuers of all
 * workqueues which have works queued on the pool and let them process
 * those works so that forward progress can be guaranteed.
 *
 * This should happen rarely.
 *
 * Return: 0
 */
static int rescuer_thread(void *__rescuer)
{
        struct worker *rescuer = __rescuer;
        struct workqueue_struct *wq = rescuer->rescue_wq;
        bool should_stop;

        set_user_nice(current, RESCUER_NICE_LEVEL);

        /*
         * Mark rescuer as worker too.  As WORKER_PREP is never cleared, it
         * doesn't participate in concurrency management.
         */
        set_pf_worker(true);
repeat:
        set_current_state(TASK_IDLE);

        /*
         * By the time the rescuer is requested to stop, the workqueue
         * shouldn't have any work pending, but @wq->maydays may still have
         * pwq(s) queued.  This can happen by non-rescuer workers consuming
         * all the work items before the rescuer got to them.  Go through
         * @wq->maydays processing before acting on should_stop so that the
         * list is always empty on exit.
         */
        should_stop = kthread_should_stop();

        /* see whether any pwq is asking for help */
        raw_spin_lock_irq(&wq_mayday_lock);

        while (!list_empty(&wq->maydays)) {
                struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
                                        struct pool_workqueue, mayday_node);
                struct worker_pool *pool = pwq->pool;
                struct work_struct *work, *n;

                __set_current_state(TASK_RUNNING);
                list_del_init(&pwq->mayday_node);

                raw_spin_unlock_irq(&wq_mayday_lock);

                worker_attach_to_pool(rescuer, pool);

                raw_spin_lock_irq(&pool->lock);

                /*
                 * Slurp in all works issued via this workqueue and
                 * process'em.
                 */
                WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
                list_for_each_entry_safe(work, n, &pool->worklist, entry) {
                        if (get_work_pwq(work) == pwq &&
                            assign_work(work, rescuer, &n))
                                pwq->stats[PWQ_STAT_RESCUED]++;
                }

                if (!list_empty(&rescuer->scheduled)) {
                        process_scheduled_works(rescuer);

                        /*
                         * The above execution of rescued work items could
                         * have created more to rescue through
                         * pwq_activate_first_inactive() or chained
                         * queueing.  Let's put @pwq back on mayday list so
                         * that such back-to-back work items, which may be
                         * being used to relieve memory pressure, don't
                         * incur MAYDAY_INTERVAL delay inbetween.
                         */
                        if (pwq->nr_active && need_to_create_worker(pool)) {
                                raw_spin_lock(&wq_mayday_lock);
                                /*
                                 * Queue iff we aren't racing destruction
                                 * and somebody else hasn't queued it already.
                                 */
                                if (wq->rescuer && list_empty(&pwq->mayday_node)) {
                                        get_pwq(pwq);
                                        list_add_tail(&pwq->mayday_node, &wq->maydays);
                                }
                                raw_spin_unlock(&wq_mayday_lock);
                        }
                }

                /*
                 * Put the reference grabbed by send_mayday().  @pool won't
                 * go away while we're still attached to it.
                 */
                put_pwq(pwq);

                /*
                 * Leave this pool. Notify regular workers; otherwise, we end up
                 * with 0 concurrency and stalling the execution.
                 */
                kick_pool(pool);

                raw_spin_unlock_irq(&pool->lock);

                worker_detach_from_pool(rescuer);

                raw_spin_lock_irq(&wq_mayday_lock);
        }

        raw_spin_unlock_irq(&wq_mayday_lock);

        if (should_stop) {
                __set_current_state(TASK_RUNNING);
                set_pf_worker(false);
                return 0;
        }

        /* rescuers should never participate in concurrency management */
        WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
        schedule();
        goto repeat;
}

static void bh_worker(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;
        int nr_restarts = BH_WORKER_RESTARTS;
        unsigned long end = jiffies + BH_WORKER_JIFFIES;

        raw_spin_lock_irq(&pool->lock);
        worker_leave_idle(worker);

        /*
         * This function follows the structure of worker_thread(). See there for
         * explanations on each step.
         */
        if (!need_more_worker(pool))
                goto done;

        WARN_ON_ONCE(!list_empty(&worker->scheduled));
        worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);

        do {
                struct work_struct *work =
                        list_first_entry(&pool->worklist,
                                         struct work_struct, entry);

                if (assign_work(work, worker, NULL))
                        process_scheduled_works(worker);
        } while (keep_working(pool) &&
                 --nr_restarts && time_before(jiffies, end));

        worker_set_flags(worker, WORKER_PREP);
done:
        worker_enter_idle(worker);
        kick_pool(pool);
        raw_spin_unlock_irq(&pool->lock);
}

/*
 * TODO: Convert all tasklet users to workqueue and use softirq directly.
 *
 * This is currently called from tasklet[_hi]action() and thus is also called
 * whenever there are tasklets to run. Let's do an early exit if there's nothing
 * queued. Once conversion from tasklet is complete, the need_more_worker() test
 * can be dropped.
 *
 * After full conversion, we'll add worker->softirq_action, directly use the
 * softirq action and obtain the worker pointer from the softirq_action pointer.
 */
void workqueue_softirq_action(bool highpri)
{
        struct worker_pool *pool =
                &per_cpu(bh_worker_pools, smp_processor_id())[highpri];
        if (need_more_worker(pool))
                bh_worker(list_first_entry(&pool->workers, struct worker, node));
}

struct wq_drain_dead_softirq_work {
        struct work_struct        work;
        struct worker_pool        *pool;
        struct completion        done;
};

static void drain_dead_softirq_workfn(struct work_struct *work)
{
        struct wq_drain_dead_softirq_work *dead_work =
                container_of(work, struct wq_drain_dead_softirq_work, work);
        struct worker_pool *pool = dead_work->pool;
        bool repeat;

        /*
         * @pool's CPU is dead and we want to execute its still pending work
         * items from this BH work item which is running on a different CPU. As
         * its CPU is dead, @pool can't be kicked and, as work execution path
         * will be nested, a lockdep annotation needs to be suppressed. Mark
         * @pool with %POOL_BH_DRAINING for the special treatments.
         */
        raw_spin_lock_irq(&pool->lock);
        pool->flags |= POOL_BH_DRAINING;
        raw_spin_unlock_irq(&pool->lock);

        bh_worker(list_first_entry(&pool->workers, struct worker, node));

        raw_spin_lock_irq(&pool->lock);
        pool->flags &= ~POOL_BH_DRAINING;
        repeat = need_more_worker(pool);
        raw_spin_unlock_irq(&pool->lock);

        /*
         * bh_worker() might hit consecutive execution limit and bail. If there
         * still are pending work items, reschedule self and return so that we
         * don't hog this CPU's BH.
         */
        if (repeat) {
                if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
                        queue_work(system_bh_highpri_wq, work);
                else
                        queue_work(system_bh_wq, work);
        } else {
                complete(&dead_work->done);
        }
}

/*
 * @cpu is dead. Drain the remaining BH work items on the current CPU. It's
 * possible to allocate dead_work per CPU and avoid flushing. However, then we
 * have to worry about draining overlapping with CPU coming back online or
 * nesting (one CPU's dead_work queued on another CPU which is also dead and so
 * on). Let's keep it simple and drain them synchronously. These are BH work
 * items which shouldn't be requeued on the same pool. Shouldn't take long.
 */
void workqueue_softirq_dead(unsigned int cpu)
{
        int i;

        for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
                struct worker_pool *pool = &per_cpu(bh_worker_pools, cpu)[i];
                struct wq_drain_dead_softirq_work dead_work;

                if (!need_more_worker(pool))
                        continue;

                INIT_WORK(&dead_work.work, drain_dead_softirq_workfn);
                dead_work.pool = pool;
                init_completion(&dead_work.done);

                if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
                        queue_work(system_bh_highpri_wq, &dead_work.work);
                else
                        queue_work(system_bh_wq, &dead_work.work);

                wait_for_completion(&dead_work.done);
        }
}

/**
 * check_flush_dependency - check for flush dependency sanity
 * @target_wq: workqueue being flushed
 * @target_work: work item being flushed (NULL for workqueue flushes)
 *
 * %current is trying to flush the whole @target_wq or @target_work on it.
 * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
 * reclaiming memory or running on a workqueue which doesn't have
 * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
 * a deadlock.
 */
static void check_flush_dependency(struct workqueue_struct *target_wq,
                                   struct work_struct *target_work)
{
        work_func_t target_func = target_work ? target_work->func : NULL;
        struct worker *worker;

        if (target_wq->flags & WQ_MEM_RECLAIM)
                return;

        worker = current_wq_worker();

        WARN_ONCE(current->flags & PF_MEMALLOC,
                  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
                  current->pid, current->comm, target_wq->name, target_func);
        WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
                              (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
                  "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
                  worker->current_pwq->wq->name, worker->current_func,
                  target_wq->name, target_func);
}

struct wq_barrier {
        struct work_struct        work;
        struct completion        done;
        struct task_struct        *task;        /* purely informational */
};

static void wq_barrier_func(struct work_struct *work)
{
        struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
        complete(&barr->done);
}

/**
 * insert_wq_barrier - insert a barrier work
 * @pwq: pwq to insert barrier into
 * @barr: wq_barrier to insert
 * @target: target work to attach @barr to
 * @worker: worker currently executing @target, NULL if @target is not executing
 *
 * @barr is linked to @target such that @barr is completed only after
 * @target finishes execution.  Please note that the ordering
 * guarantee is observed only with respect to @target and on the local
 * cpu.
 *
 * Currently, a queued barrier can't be canceled.  This is because
 * try_to_grab_pending() can't determine whether the work to be
 * grabbed is at the head of the queue and thus can't clear LINKED
 * flag of the previous work while there must be a valid next work
 * after a work with LINKED flag set.
 *
 * Note that when @worker is non-NULL, @target may be modified
 * underneath us, so we can't reliably determine pwq from @target.
 *
 * CONTEXT:
 * raw_spin_lock_irq(pool->lock).
 */
static void insert_wq_barrier(struct pool_workqueue *pwq,
                              struct wq_barrier *barr,
                              struct work_struct *target, struct worker *worker)
{
        static __maybe_unused struct lock_class_key bh_key, thr_key;
        unsigned int work_flags = 0;
        unsigned int work_color;
        struct list_head *head;

        /*
         * debugobject calls are safe here even with pool->lock locked
         * as we know for sure that this will not trigger any of the
         * checks and call back into the fixup functions where we
         * might deadlock.
         *
         * BH and threaded workqueues need separate lockdep keys to avoid
         * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W}
         * usage".
         */
        INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func,
                              (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key);
        __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));

        init_completion_map(&barr->done, &target->lockdep_map);

        barr->task = current;

        /* The barrier work item does not participate in nr_active. */
        work_flags |= WORK_STRUCT_INACTIVE;

        /*
         * If @target is currently being executed, schedule the
         * barrier to the worker; otherwise, put it after @target.
         */
        if (worker) {
                head = worker->scheduled.next;
                work_color = worker->current_color;
        } else {
                unsigned long *bits = work_data_bits(target);

                head = target->entry.next;
                /* there can already be other linked works, inherit and set */
                work_flags |= *bits & WORK_STRUCT_LINKED;
                work_color = get_work_color(*bits);
                __set_bit(WORK_STRUCT_LINKED_BIT, bits);
        }

        pwq->nr_in_flight[work_color]++;
        work_flags |= work_color_to_flags(work_color);

        insert_work(pwq, &barr->work, head, work_flags);
}

/**
 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
 * @wq: workqueue being flushed
 * @flush_color: new flush color, < 0 for no-op
 * @work_color: new work color, < 0 for no-op
 *
 * Prepare pwqs for workqueue flushing.
 *
 * If @flush_color is non-negative, flush_color on all pwqs should be
 * -1.  If no pwq has in-flight commands at the specified color, all
 * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
 * has in flight commands, its pwq->flush_color is set to
 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
 * wakeup logic is armed and %true is returned.
 *
 * The caller should have initialized @wq->first_flusher prior to
 * calling this function with non-negative @flush_color.  If
 * @flush_color is negative, no flush color update is done and %false
 * is returned.
 *
 * If @work_color is non-negative, all pwqs should have the same
 * work_color which is previous to @work_color and all will be
 * advanced to @work_color.
 *
 * CONTEXT:
 * mutex_lock(wq->mutex).
 *
 * Return:
 * %true if @flush_color >= 0 and there's something to flush.  %false
 * otherwise.
 */
static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
                                      int flush_color, int work_color)
{
        bool wait = false;
        struct pool_workqueue *pwq;

        if (flush_color >= 0) {
                WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
                atomic_set(&wq->nr_pwqs_to_flush, 1);
        }

        for_each_pwq(pwq, wq) {
                struct worker_pool *pool = pwq->pool;

                raw_spin_lock_irq(&pool->lock);

                if (flush_color >= 0) {
                        WARN_ON_ONCE(pwq->flush_color != -1);

                        if (pwq->nr_in_flight[flush_color]) {
                                pwq->flush_color = flush_color;
                                atomic_inc(&wq->nr_pwqs_to_flush);
                                wait = true;
                        }
                }

                if (work_color >= 0) {
                        WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
                        pwq->work_color = work_color;
                }

                raw_spin_unlock_irq(&pool->lock);
        }

        if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
                complete(&wq->first_flusher->done);

        return wait;
}

static void touch_wq_lockdep_map(struct workqueue_struct *wq)
{
#ifdef CONFIG_LOCKDEP
        if (wq->flags & WQ_BH)
                local_bh_disable();

        lock_map_acquire(&wq->lockdep_map);
        lock_map_release(&wq->lockdep_map);

        if (wq->flags & WQ_BH)
                local_bh_enable();
#endif
}

static void touch_work_lockdep_map(struct work_struct *work,
                                   struct workqueue_struct *wq)
{
#ifdef CONFIG_LOCKDEP
        if (wq->flags & WQ_BH)
                local_bh_disable();

        lock_map_acquire(&work->lockdep_map);
        lock_map_release(&work->lockdep_map);

        if (wq->flags & WQ_BH)
                local_bh_enable();
#endif
}

/**
 * __flush_workqueue - ensure that any scheduled work has run to completion.
 * @wq: workqueue to flush
 *
 * This function sleeps until all work items which were queued on entry
 * have finished execution, but it is not livelocked by new incoming ones.
 */
void __flush_workqueue(struct workqueue_struct *wq)
{
        struct wq_flusher this_flusher = {
                .list = LIST_HEAD_INIT(this_flusher.list),
                .flush_color = -1,
                .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
        };
        int next_color;

        if (WARN_ON(!wq_online))
                return;

        touch_wq_lockdep_map(wq);

        mutex_lock(&wq->mutex);

        /*
         * Start-to-wait phase
         */
        next_color = work_next_color(wq->work_color);

        if (next_color != wq->flush_color) {
                /*
                 * Color space is not full.  The current work_color
                 * becomes our flush_color and work_color is advanced
                 * by one.
                 */
                WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
                this_flusher.flush_color = wq->work_color;
                wq->work_color = next_color;

                if (!wq->first_flusher) {
                        /* no flush in progress, become the first flusher */
                        WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);

                        wq->first_flusher = &this_flusher;

                        if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
                                                       wq->work_color)) {
                                /* nothing to flush, done */
                                wq->flush_color = next_color;
                                wq->first_flusher = NULL;
                                goto out_unlock;
                        }
                } else {
                        /* wait in queue */
                        WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
                        list_add_tail(&this_flusher.list, &wq->flusher_queue);
                        flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
                }
        } else {
                /*
                 * Oops, color space is full, wait on overflow queue.
                 * The next flush completion will assign us
                 * flush_color and transfer to flusher_queue.
                 */
                list_add_tail(&this_flusher.list, &wq->flusher_overflow);
        }

        check_flush_dependency(wq, NULL);

        mutex_unlock(&wq->mutex);

        wait_for_completion(&this_flusher.done);

        /*
         * Wake-up-and-cascade phase
         *
         * First flushers are responsible for cascading flushes and
         * handling overflow.  Non-first flushers can simply return.
         */
        if (READ_ONCE(wq->first_flusher) != &this_flusher)
                return;

        mutex_lock(&wq->mutex);

        /* we might have raced, check again with mutex held */
        if (wq->first_flusher != &this_flusher)
                goto out_unlock;

        WRITE_ONCE(wq->first_flusher, NULL);

        WARN_ON_ONCE(!list_empty(&this_flusher.list));
        WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);

        while (true) {
                struct wq_flusher *next, *tmp;

                /* complete all the flushers sharing the current flush color */
                list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
                        if (next->flush_color != wq->flush_color)
                                break;
                        list_del_init(&next->list);
                        complete(&next->done);
                }

                WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
                             wq->flush_color != work_next_color(wq->work_color));

                /* this flush_color is finished, advance by one */
                wq->flush_color = work_next_color(wq->flush_color);

                /* one color has been freed, handle overflow queue */
                if (!list_empty(&wq->flusher_overflow)) {
                        /*
                         * Assign the same color to all overflowed
                         * flushers, advance work_color and append to
                         * flusher_queue.  This is the start-to-wait
                         * phase for these overflowed flushers.
                         */
                        list_for_each_entry(tmp, &wq->flusher_overflow, list)
                                tmp->flush_color = wq->work_color;

                        wq->work_color = work_next_color(wq->work_color);

                        list_splice_tail_init(&wq->flusher_overflow,
                                              &wq->flusher_queue);
                        flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
                }

                if (list_empty(&wq->flusher_queue)) {
                        WARN_ON_ONCE(wq->flush_color != wq->work_color);
                        break;
                }

                /*
                 * Need to flush more colors.  Make the next flusher
                 * the new first flusher and arm pwqs.
                 */
                WARN_ON_ONCE(wq->flush_color == wq->work_color);
                WARN_ON_ONCE(wq->flush_color != next->flush_color);

                list_del_init(&next->list);
                wq->first_flusher = next;

                if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
                        break;

                /*
                 * Meh... this color is already done, clear first
                 * flusher and repeat cascading.
                 */
                wq->first_flusher = NULL;
        }

out_unlock:
        mutex_unlock(&wq->mutex);
}
EXPORT_SYMBOL(__flush_workqueue);

/**
 * drain_workqueue - drain a workqueue
 * @wq: workqueue to drain
 *
 * Wait until the workqueue becomes empty.  While draining is in progress,
 * only chain queueing is allowed.  IOW, only currently pending or running
 * work items on @wq can queue further work items on it.  @wq is flushed
 * repeatedly until it becomes empty.  The number of flushing is determined
 * by the depth of chaining and should be relatively short.  Whine if it
 * takes too long.
 */
void drain_workqueue(struct workqueue_struct *wq)
{
        unsigned int flush_cnt = 0;
        struct pool_workqueue *pwq;

        /*
         * __queue_work() needs to test whether there are drainers, is much
         * hotter than drain_workqueue() and already looks at @wq->flags.
         * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
         */
        mutex_lock(&wq->mutex);
        if (!wq->nr_drainers++)
                wq->flags |= __WQ_DRAINING;
        mutex_unlock(&wq->mutex);
reflush:
        __flush_workqueue(wq);

        mutex_lock(&wq->mutex);

        for_each_pwq(pwq, wq) {
                bool drained;

                raw_spin_lock_irq(&pwq->pool->lock);
                drained = pwq_is_empty(pwq);
                raw_spin_unlock_irq(&pwq->pool->lock);

                if (drained)
                        continue;

                if (++flush_cnt == 10 ||
                    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
                        pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
                                wq->name, __func__, flush_cnt);

                mutex_unlock(&wq->mutex);
                goto reflush;
        }

        if (!--wq->nr_drainers)
                wq->flags &= ~__WQ_DRAINING;
        mutex_unlock(&wq->mutex);
}
EXPORT_SYMBOL_GPL(drain_workqueue);

static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
                             bool from_cancel)
{
        struct worker *worker = NULL;
        struct worker_pool *pool;
        struct pool_workqueue *pwq;
        struct workqueue_struct *wq;

        might_sleep();

        rcu_read_lock();
        pool = get_work_pool(work);
        if (!pool) {
                rcu_read_unlock();
                return false;
        }

        raw_spin_lock_irq(&pool->lock);
        /* see the comment in try_to_grab_pending() with the same code */
        pwq = get_work_pwq(work);
        if (pwq) {
                if (unlikely(pwq->pool != pool))
                        goto already_gone;
        } else {
                worker = find_worker_executing_work(pool, work);
                if (!worker)
                        goto already_gone;
                pwq = worker->current_pwq;
        }

        wq = pwq->wq;
        check_flush_dependency(wq, work);

        insert_wq_barrier(pwq, barr, work, worker);
        raw_spin_unlock_irq(&pool->lock);

        touch_work_lockdep_map(work, wq);

        /*
         * Force a lock recursion deadlock when using flush_work() inside a
         * single-threaded or rescuer equipped workqueue.
         *
         * For single threaded workqueues the deadlock happens when the work
         * is after the work issuing the flush_work(). For rescuer equipped
         * workqueues the deadlock happens when the rescuer stalls, blocking
         * forward progress.
         */
        if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer))
                touch_wq_lockdep_map(wq);

        rcu_read_unlock();
        return true;
already_gone:
        raw_spin_unlock_irq(&pool->lock);
        rcu_read_unlock();
        return false;
}

static bool __flush_work(struct work_struct *work, bool from_cancel)
{
        struct wq_barrier barr;

        if (WARN_ON(!wq_online))
                return false;

        if (WARN_ON(!work->func))
                return false;

        if (start_flush_work(work, &barr, from_cancel)) {
                wait_for_completion(&barr.done);
                destroy_work_on_stack(&barr.work);
                return true;
        } else {
                return false;
        }
}

/**
 * flush_work - wait for a work to finish executing the last queueing instance
 * @work: the work to flush
 *
 * Wait until @work has finished execution.  @work is guaranteed to be idle
 * on return if it hasn't been requeued since flush started.
 *
 * Return:
 * %true if flush_work() waited for the work to finish execution,
 * %false if it was already idle.
 */
bool flush_work(struct work_struct *work)
{
        return __flush_work(work, false);
}
EXPORT_SYMBOL_GPL(flush_work);

/**
 * flush_delayed_work - wait for a dwork to finish executing the last queueing
 * @dwork: the delayed work to flush
 *
 * Delayed timer is cancelled and the pending work is queued for
 * immediate execution.  Like flush_work(), this function only
 * considers the last queueing instance of @dwork.
 *
 * Return:
 * %true if flush_work() waited for the work to finish execution,
 * %false if it was already idle.
 */
bool flush_delayed_work(struct delayed_work *dwork)
{
        local_irq_disable();
        if (del_timer_sync(&dwork->timer))
                __queue_work(dwork->cpu, dwork->wq, &dwork->work);
        local_irq_enable();
        return flush_work(&dwork->work);
}
EXPORT_SYMBOL(flush_delayed_work);

/**
 * flush_rcu_work - wait for a rwork to finish executing the last queueing
 * @rwork: the rcu work to flush
 *
 * Return:
 * %true if flush_rcu_work() waited for the work to finish execution,
 * %false if it was already idle.
 */
bool flush_rcu_work(struct rcu_work *rwork)
{
        if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
                rcu_barrier();
                flush_work(&rwork->work);
                return true;
        } else {
                return flush_work(&rwork->work);
        }
}
EXPORT_SYMBOL(flush_rcu_work);

static bool __cancel_work(struct work_struct *work, u32 cflags)
{
        unsigned long irq_flags;
        int ret;

        do {
                ret = try_to_grab_pending(work, cflags, &irq_flags);
        } while (unlikely(ret == -EAGAIN));

        if (unlikely(ret < 0))
                return false;

        set_work_pool_and_clear_pending(work, get_work_pool_id(work), 0);
        local_irq_restore(irq_flags);
        return ret;
}

static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
{
        unsigned long irq_flags;
        bool ret;

        /* claim @work and tell other tasks trying to grab @work to back off */
        ret = work_grab_pending(work, cflags, &irq_flags);
        mark_work_canceling(work);
        local_irq_restore(irq_flags);

        /*
         * Skip __flush_work() during early boot when we know that @work isn't
         * executing. This allows canceling during early boot.
         */
        if (wq_online)
                __flush_work(work, true);

        /*
         * smp_mb() at the end of set_work_pool_and_clear_pending() is paired
         * with prepare_to_wait() above so that either waitqueue_active() is
         * visible here or !work_is_canceling() is visible there.
         */
        set_work_pool_and_clear_pending(work, WORK_OFFQ_POOL_NONE, 0);

        if (waitqueue_active(&wq_cancel_waitq))
                __wake_up(&wq_cancel_waitq, TASK_NORMAL, 1, work);

        return ret;
}

/*
 * See cancel_delayed_work()
 */
bool cancel_work(struct work_struct *work)
{
        return __cancel_work(work, 0);
}
EXPORT_SYMBOL(cancel_work);

/**
 * cancel_work_sync - cancel a work and wait for it to finish
 * @work: the work to cancel
 *
 * Cancel @work and wait for its execution to finish.  This function
 * can be used even if the work re-queues itself or migrates to
 * another workqueue.  On return from this function, @work is
 * guaranteed to be not pending or executing on any CPU.
 *
 * cancel_work_sync(&delayed_work->work) must not be used for
 * delayed_work's.  Use cancel_delayed_work_sync() instead.
 *
 * The caller must ensure that the workqueue on which @work was last
 * queued can't be destroyed before this function returns.
 *
 * Return:
 * %true if @work was pending, %false otherwise.
 */
bool cancel_work_sync(struct work_struct *work)
{
        return __cancel_work_sync(work, 0);
}
EXPORT_SYMBOL_GPL(cancel_work_sync);

/**
 * cancel_delayed_work - cancel a delayed work
 * @dwork: delayed_work to cancel
 *
 * Kill off a pending delayed_work.
 *
 * Return: %true if @dwork was pending and canceled; %false if it wasn't
 * pending.
 *
 * Note:
 * The work callback function may still be running on return, unless
 * it returns %true and the work doesn't re-arm itself.  Explicitly flush or
 * use cancel_delayed_work_sync() to wait on it.
 *
 * This function is safe to call from any context including IRQ handler.
 */
bool cancel_delayed_work(struct delayed_work *dwork)
{
        return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED);
}
EXPORT_SYMBOL(cancel_delayed_work);

/**
 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
 * @dwork: the delayed work cancel
 *
 * This is cancel_work_sync() for delayed works.
 *
 * Return:
 * %true if @dwork was pending, %false otherwise.
 */
bool cancel_delayed_work_sync(struct delayed_work *dwork)
{
        return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED);
}
EXPORT_SYMBOL(cancel_delayed_work_sync);

/**
 * schedule_on_each_cpu - execute a function synchronously on each online CPU
 * @func: the function to call
 *
 * schedule_on_each_cpu() executes @func on each online CPU using the
 * system workqueue and blocks until all CPUs have completed.
 * schedule_on_each_cpu() is very slow.
 *
 * Return:
 * 0 on success, -errno on failure.
 */
int schedule_on_each_cpu(work_func_t func)
{
        int cpu;
        struct work_struct __percpu *works;

        works = alloc_percpu(struct work_struct);
        if (!works)
                return -ENOMEM;

        cpus_read_lock();

        for_each_online_cpu(cpu) {
                struct work_struct *work = per_cpu_ptr(works, cpu);

                INIT_WORK(work, func);
                schedule_work_on(cpu, work);
        }

        for_each_online_cpu(cpu)
                flush_work(per_cpu_ptr(works, cpu));

        cpus_read_unlock();
        free_percpu(works);
        return 0;
}

/**
 * execute_in_process_context - reliably execute the routine with user context
 * @fn:                the function to execute
 * @ew:                guaranteed storage for the execute work structure (must
 *                be available when the work executes)
 *
 * Executes the function immediately if process context is available,
 * otherwise schedules the function for delayed execution.
 *
 * Return:        0 - function was executed
 *                1 - function was scheduled for execution
 */
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
{
        if (!in_interrupt()) {
                fn(&ew->work);
                return 0;
        }

        INIT_WORK(&ew->work, fn);
        schedule_work(&ew->work);

        return 1;
}
EXPORT_SYMBOL_GPL(execute_in_process_context);

/**
 * free_workqueue_attrs - free a workqueue_attrs
 * @attrs: workqueue_attrs to free
 *
 * Undo alloc_workqueue_attrs().
 */
void free_workqueue_attrs(struct workqueue_attrs *attrs)
{
        if (attrs) {
                free_cpumask_var(attrs->cpumask);
                free_cpumask_var(attrs->__pod_cpumask);
                kfree(attrs);
        }
}

/**
 * alloc_workqueue_attrs - allocate a workqueue_attrs
 *
 * Allocate a new workqueue_attrs, initialize with default settings and
 * return it.
 *
 * Return: The allocated new workqueue_attr on success. %NULL on failure.
 */
struct workqueue_attrs *alloc_workqueue_attrs(void)
{
        struct workqueue_attrs *attrs;

        attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
        if (!attrs)
                goto fail;
        if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
                goto fail;
        if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL))
                goto fail;

        cpumask_copy(attrs->cpumask, cpu_possible_mask);
        attrs->affn_scope = WQ_AFFN_DFL;
        return attrs;
fail:
        free_workqueue_attrs(attrs);
        return NULL;
}

static void copy_workqueue_attrs(struct workqueue_attrs *to,
                                 const struct workqueue_attrs *from)
{
        to->nice = from->nice;
        cpumask_copy(to->cpumask, from->cpumask);
        cpumask_copy(to->__pod_cpumask, from->__pod_cpumask);
        to->affn_strict = from->affn_strict;

        /*
         * Unlike hash and equality test, copying shouldn't ignore wq-only
         * fields as copying is used for both pool and wq attrs. Instead,
         * get_unbound_pool() explicitly clears the fields.
         */
        to->affn_scope = from->affn_scope;
        to->ordered = from->ordered;
}

/*
 * Some attrs fields are workqueue-only. Clear them for worker_pool's. See the
 * comments in 'struct workqueue_attrs' definition.
 */
static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs)
{
        attrs->affn_scope = WQ_AFFN_NR_TYPES;
        attrs->ordered = false;
}

/* hash value of the content of @attr */
static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
{
        u32 hash = 0;

        hash = jhash_1word(attrs->nice, hash);
        hash = jhash(cpumask_bits(attrs->cpumask),
                     BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
        hash = jhash(cpumask_bits(attrs->__pod_cpumask),
                     BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
        hash = jhash_1word(attrs->affn_strict, hash);
        return hash;
}

/* content equality test */
static bool wqattrs_equal(const struct workqueue_attrs *a,
                          const struct workqueue_attrs *b)
{
        if (a->nice != b->nice)
                return false;
        if (!cpumask_equal(a->cpumask, b->cpumask))
                return false;
        if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask))
                return false;
        if (a->affn_strict != b->affn_strict)
                return false;
        return true;
}

/* Update @attrs with actually available CPUs */
static void wqattrs_actualize_cpumask(struct workqueue_attrs *attrs,
                                      const cpumask_t *unbound_cpumask)
{
        /*
         * Calculate the effective CPU mask of @attrs given @unbound_cpumask. If
         * @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to
         * @unbound_cpumask.
         */
        cpumask_and(attrs->cpumask, attrs->cpumask, unbound_cpumask);
        if (unlikely(cpumask_empty(attrs->cpumask)))
                cpumask_copy(attrs->cpumask, unbound_cpumask);
}

/* find wq_pod_type to use for @attrs */
static const struct wq_pod_type *
wqattrs_pod_type(const struct workqueue_attrs *attrs)
{
        enum wq_affn_scope scope;
        struct wq_pod_type *pt;

        /* to synchronize access to wq_affn_dfl */
        lockdep_assert_held(&wq_pool_mutex);

        if (attrs->affn_scope == WQ_AFFN_DFL)
                scope = wq_affn_dfl;
        else
                scope = attrs->affn_scope;

        pt = &wq_pod_types[scope];

        if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) &&
            likely(pt->nr_pods))
                return pt;

        /*
         * Before workqueue_init_topology(), only SYSTEM is available which is
         * initialized in workqueue_init_early().
         */
        pt = &wq_pod_types[WQ_AFFN_SYSTEM];
        BUG_ON(!pt->nr_pods);
        return pt;
}

/**
 * init_worker_pool - initialize a newly zalloc'd worker_pool
 * @pool: worker_pool to initialize
 *
 * Initialize a newly zalloc'd @pool.  It also allocates @pool->attrs.
 *
 * Return: 0 on success, -errno on failure.  Even on failure, all fields
 * inside @pool proper are initialized and put_unbound_pool() can be called
 * on @pool safely to release it.
 */
static int init_worker_pool(struct worker_pool *pool)
{
        raw_spin_lock_init(&pool->lock);
        pool->id = -1;
        pool->cpu = -1;
        pool->node = NUMA_NO_NODE;
        pool->flags |= POOL_DISASSOCIATED;
        pool->watchdog_ts = jiffies;
        INIT_LIST_HEAD(&pool->worklist);
        INIT_LIST_HEAD(&pool->idle_list);
        hash_init(pool->busy_hash);

        timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
        INIT_WORK(&pool->idle_cull_work, idle_cull_fn);

        timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);

        INIT_LIST_HEAD(&pool->workers);
        INIT_LIST_HEAD(&pool->dying_workers);

        ida_init(&pool->worker_ida);
        INIT_HLIST_NODE(&pool->hash_node);
        pool->refcnt = 1;

        /* shouldn't fail above this point */
        pool->attrs = alloc_workqueue_attrs();
        if (!pool->attrs)
                return -ENOMEM;

        wqattrs_clear_for_pool(pool->attrs);

        return 0;
}

#ifdef CONFIG_LOCKDEP
static void wq_init_lockdep(struct workqueue_struct *wq)
{
        char *lock_name;

        lockdep_register_key(&wq->key);
        lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
        if (!lock_name)
                lock_name = wq->name;

        wq->lock_name = lock_name;
        lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
}

static void wq_unregister_lockdep(struct workqueue_struct *wq)
{
        lockdep_unregister_key(&wq->key);
}

static void wq_free_lockdep(struct workqueue_struct *wq)
{
        if (wq->lock_name != wq->name)
                kfree(wq->lock_name);
}
#else
static void wq_init_lockdep(struct workqueue_struct *wq)
{
}

static void wq_unregister_lockdep(struct workqueue_struct *wq)
{
}

static void wq_free_lockdep(struct workqueue_struct *wq)
{
}
#endif

static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
{
        int node;

        for_each_node(node) {
                kfree(nna_ar[node]);
                nna_ar[node] = NULL;
        }

        kfree(nna_ar[nr_node_ids]);
        nna_ar[nr_node_ids] = NULL;
}

static void init_node_nr_active(struct wq_node_nr_active *nna)
{
        nna->max = WQ_DFL_MIN_ACTIVE;
        atomic_set(&nna->nr, 0);
        raw_spin_lock_init(&nna->lock);
        INIT_LIST_HEAD(&nna->pending_pwqs);
}

/*
 * Each node's nr_active counter will be accessed mostly from its own node and
 * should be allocated in the node.
 */
static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
{
        struct wq_node_nr_active *nna;
        int node;

        for_each_node(node) {
                nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
                if (!nna)
                        goto err_free;
                init_node_nr_active(nna);
                nna_ar[node] = nna;
        }

        /* [nr_node_ids] is used as the fallback */
        nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
        if (!nna)
                goto err_free;
        init_node_nr_active(nna);
        nna_ar[nr_node_ids] = nna;

        return 0;

err_free:
        free_node_nr_active(nna_ar);
        return -ENOMEM;
}

static void rcu_free_wq(struct rcu_head *rcu)
{
        struct workqueue_struct *wq =
                container_of(rcu, struct workqueue_struct, rcu);

        if (wq->flags & WQ_UNBOUND)
                free_node_nr_active(wq->node_nr_active);

        wq_free_lockdep(wq);
        free_percpu(wq->cpu_pwq);
        free_workqueue_attrs(wq->unbound_attrs);
        kfree(wq);
}

static void rcu_free_pool(struct rcu_head *rcu)
{
        struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);

        ida_destroy(&pool->worker_ida);
        free_workqueue_attrs(pool->attrs);
        kfree(pool);
}

/**
 * put_unbound_pool - put a worker_pool
 * @pool: worker_pool to put
 *
 * Put @pool.  If its refcnt reaches zero, it gets destroyed in RCU
 * safe manner.  get_unbound_pool() calls this function on its failure path
 * and this function should be able to release pools which went through,
 * successfully or not, init_worker_pool().
 *
 * Should be called with wq_pool_mutex held.
 */
static void put_unbound_pool(struct worker_pool *pool)
{
        DECLARE_COMPLETION_ONSTACK(detach_completion);
        struct worker *worker;
        LIST_HEAD(cull_list);

        lockdep_assert_held(&wq_pool_mutex);

        if (--pool->refcnt)
                return;

        /* sanity checks */
        if (WARN_ON(!(pool->cpu < 0)) ||
            WARN_ON(!list_empty(&pool->worklist)))
                return;

        /* release id and unhash */
        if (pool->id >= 0)
                idr_remove(&worker_pool_idr, pool->id);
        hash_del(&pool->hash_node);

        /*
         * Become the manager and destroy all workers.  This prevents
         * @pool's workers from blocking on attach_mutex.  We're the last
         * manager and @pool gets freed with the flag set.
         *
         * Having a concurrent manager is quite unlikely to happen as we can
         * only get here with
         *   pwq->refcnt == pool->refcnt == 0
         * which implies no work queued to the pool, which implies no worker can
         * become the manager. However a worker could have taken the role of
         * manager before the refcnts dropped to 0, since maybe_create_worker()
         * drops pool->lock
         */
        while (true) {
                rcuwait_wait_event(&manager_wait,
                                   !(pool->flags & POOL_MANAGER_ACTIVE),
                                   TASK_UNINTERRUPTIBLE);

                mutex_lock(&wq_pool_attach_mutex);
                raw_spin_lock_irq(&pool->lock);
                if (!(pool->flags & POOL_MANAGER_ACTIVE)) {
                        pool->flags |= POOL_MANAGER_ACTIVE;
                        break;
                }
                raw_spin_unlock_irq(&pool->lock);
                mutex_unlock(&wq_pool_attach_mutex);
        }

        while ((worker = first_idle_worker(pool)))
                set_worker_dying(worker, &cull_list);
        WARN_ON(pool->nr_workers || pool->nr_idle);
        raw_spin_unlock_irq(&pool->lock);

        wake_dying_workers(&cull_list);

        if (!list_empty(&pool->workers) || !list_empty(&pool->dying_workers))
                pool->detach_completion = &detach_completion;
        mutex_unlock(&wq_pool_attach_mutex);

        if (pool->detach_completion)
                wait_for_completion(pool->detach_completion);

        /* shut down the timers */
        del_timer_sync(&pool->idle_timer);
        cancel_work_sync(&pool->idle_cull_work);
        del_timer_sync(&pool->mayday_timer);

        /* RCU protected to allow dereferences from get_work_pool() */
        call_rcu(&pool->rcu, rcu_free_pool);
}

/**
 * get_unbound_pool - get a worker_pool with the specified attributes
 * @attrs: the attributes of the worker_pool to get
 *
 * Obtain a worker_pool which has the same attributes as @attrs, bump the
 * reference count and return it.  If there already is a matching
 * worker_pool, it will be used; otherwise, this function attempts to
 * create a new one.
 *
 * Should be called with wq_pool_mutex held.
 *
 * Return: On success, a worker_pool with the same attributes as @attrs.
 * On failure, %NULL.
 */
static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
{
        struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_NUMA];
        u32 hash = wqattrs_hash(attrs);
        struct worker_pool *pool;
        int pod, node = NUMA_NO_NODE;

        lockdep_assert_held(&wq_pool_mutex);

        /* do we already have a matching pool? */
        hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
                if (wqattrs_equal(pool->attrs, attrs)) {
                        pool->refcnt++;
                        return pool;
                }
        }

        /* If __pod_cpumask is contained inside a NUMA pod, that's our node */
        for (pod = 0; pod < pt->nr_pods; pod++) {
                if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) {
                        node = pt->pod_node[pod];
                        break;
                }
        }

        /* nope, create a new one */
        pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, node);
        if (!pool || init_worker_pool(pool) < 0)
                goto fail;

        pool->node = node;
        copy_workqueue_attrs(pool->attrs, attrs);
        wqattrs_clear_for_pool(pool->attrs);

        if (worker_pool_assign_id(pool) < 0)
                goto fail;

        /* create and start the initial worker */
        if (wq_online && !create_worker(pool))
                goto fail;

        /* install */
        hash_add(unbound_pool_hash, &pool->hash_node, hash);

        return pool;
fail:
        if (pool)
                put_unbound_pool(pool);
        return NULL;
}

static void rcu_free_pwq(struct rcu_head *rcu)
{
        kmem_cache_free(pwq_cache,
                        container_of(rcu, struct pool_workqueue, rcu));
}

/*
 * Scheduled on pwq_release_worker by put_pwq() when an unbound pwq hits zero
 * refcnt and needs to be destroyed.
 */
static void pwq_release_workfn(struct kthread_work *work)
{
        struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
                                                  release_work);
        struct workqueue_struct *wq = pwq->wq;
        struct worker_pool *pool = pwq->pool;
        bool is_last = false;

        /*
         * When @pwq is not linked, it doesn't hold any reference to the
         * @wq, and @wq is invalid to access.
         */
        if (!list_empty(&pwq->pwqs_node)) {
                mutex_lock(&wq->mutex);
                list_del_rcu(&pwq->pwqs_node);
                is_last = list_empty(&wq->pwqs);

                /*
                 * For ordered workqueue with a plugged dfl_pwq, restart it now.
                 */
                if (!is_last && (wq->flags & __WQ_ORDERED))
                        unplug_oldest_pwq(wq);

                mutex_unlock(&wq->mutex);
        }

        if (wq->flags & WQ_UNBOUND) {
                mutex_lock(&wq_pool_mutex);
                put_unbound_pool(pool);
                mutex_unlock(&wq_pool_mutex);
        }

        if (!list_empty(&pwq->pending_node)) {
                struct wq_node_nr_active *nna =
                        wq_node_nr_active(pwq->wq, pwq->pool->node);

                raw_spin_lock_irq(&nna->lock);
                list_del_init(&pwq->pending_node);
                raw_spin_unlock_irq(&nna->lock);
        }

        call_rcu(&pwq->rcu, rcu_free_pwq);

        /*
         * If we're the last pwq going away, @wq is already dead and no one
         * is gonna access it anymore.  Schedule RCU free.
         */
        if (is_last) {
                wq_unregister_lockdep(wq);
                call_rcu(&wq->rcu, rcu_free_wq);
        }
}

/* initialize newly allocated @pwq which is associated with @wq and @pool */
static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
                     struct worker_pool *pool)
{
        BUG_ON((unsigned long)pwq & ~WORK_STRUCT_PWQ_MASK);

        memset(pwq, 0, sizeof(*pwq));

        pwq->pool = pool;
        pwq->wq = wq;
        pwq->flush_color = -1;
        pwq->refcnt = 1;
        INIT_LIST_HEAD(&pwq->inactive_works);
        INIT_LIST_HEAD(&pwq->pending_node);
        INIT_LIST_HEAD(&pwq->pwqs_node);
        INIT_LIST_HEAD(&pwq->mayday_node);
        kthread_init_work(&pwq->release_work, pwq_release_workfn);
}

/* sync @pwq with the current state of its associated wq and link it */
static void link_pwq(struct pool_workqueue *pwq)
{
        struct workqueue_struct *wq = pwq->wq;

        lockdep_assert_held(&wq->mutex);

        /* may be called multiple times, ignore if already linked */
        if (!list_empty(&pwq->pwqs_node))
                return;

        /* set the matching work_color */
        pwq->work_color = wq->work_color;

        /* link in @pwq */
        list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
}

/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
                                        const struct workqueue_attrs *attrs)
{
        struct worker_pool *pool;
        struct pool_workqueue *pwq;

        lockdep_assert_held(&wq_pool_mutex);

        pool = get_unbound_pool(attrs);
        if (!pool)
                return NULL;

        pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
        if (!pwq) {
                put_unbound_pool(pool);
                return NULL;
        }

        init_pwq(pwq, wq, pool);
        return pwq;
}

/**
 * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
 * @attrs: the wq_attrs of the default pwq of the target workqueue
 * @cpu: the target CPU
 * @cpu_going_down: if >= 0, the CPU to consider as offline
 *
 * Calculate the cpumask a workqueue with @attrs should use on @pod. If
 * @cpu_going_down is >= 0, that cpu is considered offline during calculation.
 * The result is stored in @attrs->__pod_cpumask.
 *
 * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
 * and @pod has online CPUs requested by @attrs, the returned cpumask is the
 * intersection of the possible CPUs of @pod and @attrs->cpumask.
 *
 * The caller is responsible for ensuring that the cpumask of @pod stays stable.
 */
static void wq_calc_pod_cpumask(struct workqueue_attrs *attrs, int cpu,
                                int cpu_going_down)
{
        const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
        int pod = pt->cpu_pod[cpu];

        /* does @pod have any online CPUs @attrs wants? */
        cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask);
        cpumask_and(attrs->__pod_cpumask, attrs->__pod_cpumask, cpu_online_mask);
        if (cpu_going_down >= 0)
                cpumask_clear_cpu(cpu_going_down, attrs->__pod_cpumask);

        if (cpumask_empty(attrs->__pod_cpumask)) {
                cpumask_copy(attrs->__pod_cpumask, attrs->cpumask);
                return;
        }

        /* yeap, return possible CPUs in @pod that @attrs wants */
        cpumask_and(attrs->__pod_cpumask, attrs->cpumask, pt->pod_cpus[pod]);

        if (cpumask_empty(attrs->__pod_cpumask))
                pr_warn_once("WARNING: workqueue cpumask: online intersect > "
                                "possible intersect\n");
}

/* install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq */
static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq,
                                        int cpu, struct pool_workqueue *pwq)
{
        struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu);
        struct pool_workqueue *old_pwq;

        lockdep_assert_held(&wq_pool_mutex);
        lockdep_assert_held(&wq->mutex);

        /* link_pwq() can handle duplicate calls */
        link_pwq(pwq);

        old_pwq = rcu_access_pointer(*slot);
        rcu_assign_pointer(*slot, pwq);
        return old_pwq;
}

/* context to store the prepared attrs & pwqs before applying */
struct apply_wqattrs_ctx {
        struct workqueue_struct        *wq;                /* target workqueue */
        struct workqueue_attrs        *attrs;                /* attrs to apply */
        struct list_head        list;                /* queued for batching commit */
        struct pool_workqueue        *dfl_pwq;
        struct pool_workqueue        *pwq_tbl[];
};

/* free the resources after success or abort */
static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
{
        if (ctx) {
                int cpu;

                for_each_possible_cpu(cpu)
                        put_pwq_unlocked(ctx->pwq_tbl[cpu]);
                put_pwq_unlocked(ctx->dfl_pwq);

                free_workqueue_attrs(ctx->attrs);

                kfree(ctx);
        }
}

/* allocate the attrs and pwqs for later installation */
static struct apply_wqattrs_ctx *
apply_wqattrs_prepare(struct workqueue_struct *wq,
                      const struct workqueue_attrs *attrs,
                      const cpumask_var_t unbound_cpumask)
{
        struct apply_wqattrs_ctx *ctx;
        struct workqueue_attrs *new_attrs;
        int cpu;

        lockdep_assert_held(&wq_pool_mutex);

        if (WARN_ON(attrs->affn_scope < 0 ||
                    attrs->affn_scope >= WQ_AFFN_NR_TYPES))
                return ERR_PTR(-EINVAL);

        ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_cpu_ids), GFP_KERNEL);

        new_attrs = alloc_workqueue_attrs();
        if (!ctx || !new_attrs)
                goto out_free;

        /*
         * If something goes wrong during CPU up/down, we'll fall back to
         * the default pwq covering whole @attrs->cpumask.  Always create
         * it even if we don't use it immediately.
         */
        copy_workqueue_attrs(new_attrs, attrs);
        wqattrs_actualize_cpumask(new_attrs, unbound_cpumask);
        cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask);
        ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
        if (!ctx->dfl_pwq)
                goto out_free;

        for_each_possible_cpu(cpu) {
                if (new_attrs->ordered) {
                        ctx->dfl_pwq->refcnt++;
                        ctx->pwq_tbl[cpu] = ctx->dfl_pwq;
                } else {
                        wq_calc_pod_cpumask(new_attrs, cpu, -1);
                        ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs);
                        if (!ctx->pwq_tbl[cpu])
                                goto out_free;
                }
        }

        /* save the user configured attrs and sanitize it. */
        copy_workqueue_attrs(new_attrs, attrs);
        cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
        cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask);
        ctx->attrs = new_attrs;

        /*
         * For initialized ordered workqueues, there should only be one pwq
         * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution
         * of newly queued work items until execution of older work items in
         * the old pwq's have completed.
         */
        if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))
                ctx->dfl_pwq->plugged = true;

        ctx->wq = wq;
        return ctx;

out_free:
        free_workqueue_attrs(new_attrs);
        apply_wqattrs_cleanup(ctx);
        return ERR_PTR(-ENOMEM);
}

/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
{
        int cpu;

        /* all pwqs have been created successfully, let's install'em */
        mutex_lock(&ctx->wq->mutex);

        copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);

        /* save the previous pwqs and install the new ones */
        for_each_possible_cpu(cpu)
                ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu,
                                                        ctx->pwq_tbl[cpu]);
        ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq);

        /* update node_nr_active->max */
        wq_update_node_max_active(ctx->wq, -1);

        /* rescuer needs to respect wq cpumask changes */
        if (ctx->wq->rescuer)
                set_cpus_allowed_ptr(ctx->wq->rescuer->task,
                                     unbound_effective_cpumask(ctx->wq));

        mutex_unlock(&ctx->wq->mutex);
}

static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
                                        const struct workqueue_attrs *attrs)
{
        struct apply_wqattrs_ctx *ctx;

        /* only unbound workqueues can change attributes */
        if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
                return -EINVAL;

        ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);

        /* the ctx has been prepared successfully, let's commit it */
        apply_wqattrs_commit(ctx);
        apply_wqattrs_cleanup(ctx);

        return 0;
}

/**
 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
 * @wq: the target workqueue
 * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
 *
 * Apply @attrs to an unbound workqueue @wq. Unless disabled, this function maps
 * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
 * work items are affine to the pod it was issued on. Older pwqs are released as
 * in-flight work items finish. Note that a work item which repeatedly requeues
 * itself back-to-back will stay on its current pwq.
 *
 * Performs GFP_KERNEL allocations.
 *
 * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
 *
 * Return: 0 on success and -errno on failure.
 */
int apply_workqueue_attrs(struct workqueue_struct *wq,
                          const struct workqueue_attrs *attrs)
{
        int ret;

        lockdep_assert_cpus_held();

        mutex_lock(&wq_pool_mutex);
        ret = apply_workqueue_attrs_locked(wq, attrs);
        mutex_unlock(&wq_pool_mutex);

        return ret;
}

/**
 * wq_update_pod - update pod affinity of a wq for CPU hot[un]plug
 * @wq: the target workqueue
 * @cpu: the CPU to update pool association for
 * @hotplug_cpu: the CPU coming up or going down
 * @online: whether @cpu is coming up or going down
 *
 * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
 * %CPU_DOWN_FAILED.  @cpu is being hot[un]plugged, update pod affinity of
 * @wq accordingly.
 *
 *
 * If pod affinity can't be adjusted due to memory allocation failure, it falls
 * back to @wq->dfl_pwq which may not be optimal but is always correct.
 *
 * Note that when the last allowed CPU of a pod goes offline for a workqueue
 * with a cpumask spanning multiple pods, the workers which were already
 * executing the work items for the workqueue will lose their CPU affinity and
 * may execute on any CPU. This is similar to how per-cpu workqueues behave on
 * CPU_DOWN. If a workqueue user wants strict affinity, it's the user's
 * responsibility to flush the work item from CPU_DOWN_PREPARE.
 */
static void wq_update_pod(struct workqueue_struct *wq, int cpu,
                          int hotplug_cpu, bool online)
{
        int off_cpu = online ? -1 : hotplug_cpu;
        struct pool_workqueue *old_pwq = NULL, *pwq;
        struct workqueue_attrs *target_attrs;

        lockdep_assert_held(&wq_pool_mutex);

        if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered)
                return;

        /*
         * We don't wanna alloc/free wq_attrs for each wq for each CPU.
         * Let's use a preallocated one.  The following buf is protected by
         * CPU hotplug exclusion.
         */
        target_attrs = wq_update_pod_attrs_buf;

        copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
        wqattrs_actualize_cpumask(target_attrs, wq_unbound_cpumask);

        /* nothing to do if the target cpumask matches the current pwq */
        wq_calc_pod_cpumask(target_attrs, cpu, off_cpu);
        if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs))
                return;

        /* create a new pwq */
        pwq = alloc_unbound_pwq(wq, target_attrs);
        if (!pwq) {
                pr_warn("workqueue: allocation failed while updating CPU pod affinity of \"%s\"\n",
                        wq->name);
                goto use_dfl_pwq;
        }

        /* Install the new pwq. */
        mutex_lock(&wq->mutex);
        old_pwq = install_unbound_pwq(wq, cpu, pwq);
        goto out_unlock;

use_dfl_pwq:
        mutex_lock(&wq->mutex);
        pwq = unbound_pwq(wq, -1);
        raw_spin_lock_irq(&pwq->pool->lock);
        get_pwq(pwq);
        raw_spin_unlock_irq(&pwq->pool->lock);
        old_pwq = install_unbound_pwq(wq, cpu, pwq);
out_unlock:
        mutex_unlock(&wq->mutex);
        put_pwq_unlocked(old_pwq);
}

static int alloc_and_link_pwqs(struct workqueue_struct *wq)
{
        bool highpri = wq->flags & WQ_HIGHPRI;
        int cpu, ret;

        wq->cpu_pwq = alloc_percpu(struct pool_workqueue *);
        if (!wq->cpu_pwq)
                goto enomem;

        if (!(wq->flags & WQ_UNBOUND)) {
                for_each_possible_cpu(cpu) {
                        struct pool_workqueue **pwq_p;
                        struct worker_pool __percpu *pools;
                        struct worker_pool *pool;

                        if (wq->flags & WQ_BH)
                                pools = bh_worker_pools;
                        else
                                pools = cpu_worker_pools;

                        pool = &(per_cpu_ptr(pools, cpu)[highpri]);
                        pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu);

                        *pwq_p = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL,
                                                       pool->node);
                        if (!*pwq_p)
                                goto enomem;

                        init_pwq(*pwq_p, wq, pool);

                        mutex_lock(&wq->mutex);
                        link_pwq(*pwq_p);
                        mutex_unlock(&wq->mutex);
                }
                return 0;
        }

        cpus_read_lock();
        if (wq->flags & __WQ_ORDERED) {
                struct pool_workqueue *dfl_pwq;

                ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
                /* there should only be single pwq for ordering guarantee */
                dfl_pwq = rcu_access_pointer(wq->dfl_pwq);
                WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node ||
                              wq->pwqs.prev != &dfl_pwq->pwqs_node),
                     "ordering guarantee broken for workqueue %s\n", wq->name);
        } else {
                ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
        }
        cpus_read_unlock();

        /* for unbound pwq, flush the pwq_release_worker ensures that the
         * pwq_release_workfn() completes before calling kfree(wq).
         */
        if (ret)
                kthread_flush_worker(pwq_release_worker);

        return ret;

enomem:
        if (wq->cpu_pwq) {
                for_each_possible_cpu(cpu) {
                        struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu);

                        if (pwq)
                                kmem_cache_free(pwq_cache, pwq);
                }
                free_percpu(wq->cpu_pwq);
                wq->cpu_pwq = NULL;
        }
        return -ENOMEM;
}

static int wq_clamp_max_active(int max_active, unsigned int flags,
                               const char *name)
{
        if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
                pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
                        max_active, name, 1, WQ_MAX_ACTIVE);

        return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
}

/*
 * Workqueues which may be used during memory reclaim should have a rescuer
 * to guarantee forward progress.
 */
static int init_rescuer(struct workqueue_struct *wq)
{
        struct worker *rescuer;
        int ret;

        if (!(wq->flags & WQ_MEM_RECLAIM))
                return 0;

        rescuer = alloc_worker(NUMA_NO_NODE);
        if (!rescuer) {
                pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n",
                       wq->name);
                return -ENOMEM;
        }

        rescuer->rescue_wq = wq;
        rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name);
        if (IS_ERR(rescuer->task)) {
                ret = PTR_ERR(rescuer->task);
                pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
                       wq->name, ERR_PTR(ret));
                kfree(rescuer);
                return ret;
        }

        wq->rescuer = rescuer;
        if (wq->flags & WQ_UNBOUND)
                kthread_bind_mask(rescuer->task, wq_unbound_cpumask);
        else
                kthread_bind_mask(rescuer->task, cpu_possible_mask);
        wake_up_process(rescuer->task);

        return 0;
}

/**
 * wq_adjust_max_active - update a wq's max_active to the current setting
 * @wq: target workqueue
 *
 * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
 * activate inactive work items accordingly. If @wq is freezing, clear
 * @wq->max_active to zero.
 */
static void wq_adjust_max_active(struct workqueue_struct *wq)
{
        bool activated;
        int new_max, new_min;

        lockdep_assert_held(&wq->mutex);

        if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) {
                new_max = 0;
                new_min = 0;
        } else {
                new_max = wq->saved_max_active;
                new_min = wq->saved_min_active;
        }

        if (wq->max_active == new_max && wq->min_active == new_min)
                return;

        /*
         * Update @wq->max/min_active and then kick inactive work items if more
         * active work items are allowed. This doesn't break work item ordering
         * because new work items are always queued behind existing inactive
         * work items if there are any.
         */
        WRITE_ONCE(wq->max_active, new_max);
        WRITE_ONCE(wq->min_active, new_min);

        if (wq->flags & WQ_UNBOUND)
                wq_update_node_max_active(wq, -1);

        if (new_max == 0)
                return;

        /*
         * Round-robin through pwq's activating the first inactive work item
         * until max_active is filled.
         */
        do {
                struct pool_workqueue *pwq;

                activated = false;
                for_each_pwq(pwq, wq) {
                        unsigned long irq_flags;

                        /* can be called during early boot w/ irq disabled */
                        raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
                        if (pwq_activate_first_inactive(pwq, true)) {
                                activated = true;
                                kick_pool(pwq->pool);
                        }
                        raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
                }
        } while (activated);
}

__printf(1, 4)
struct workqueue_struct *alloc_workqueue(const char *fmt,
                                         unsigned int flags,
                                         int max_active, ...)
{
        va_list args;
        struct workqueue_struct *wq;
        size_t wq_size;
        int name_len;

        if (flags & WQ_BH) {
                if (WARN_ON_ONCE(flags & ~__WQ_BH_ALLOWS))
                        return NULL;
                if (WARN_ON_ONCE(max_active))
                        return NULL;
        }

        /* see the comment above the definition of WQ_POWER_EFFICIENT */
        if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
                flags |= WQ_UNBOUND;

        /* allocate wq and format name */
        if (flags & WQ_UNBOUND)
                wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1);
        else
                wq_size = sizeof(*wq);

        wq = kzalloc(wq_size, GFP_KERNEL);
        if (!wq)
                return NULL;

        if (flags & WQ_UNBOUND) {
                wq->unbound_attrs = alloc_workqueue_attrs();
                if (!wq->unbound_attrs)
                        goto err_free_wq;
        }

        va_start(args, max_active);
        name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
        va_end(args);

        if (name_len >= WQ_NAME_LEN)
                pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
                             wq->name);

        if (flags & WQ_BH) {
                /*
                 * BH workqueues always share a single execution context per CPU
                 * and don't impose any max_active limit.
                 */
                max_active = INT_MAX;
        } else {
                max_active = max_active ?: WQ_DFL_ACTIVE;
                max_active = wq_clamp_max_active(max_active, flags, wq->name);
        }

        /* init wq */
        wq->flags = flags;
        wq->max_active = max_active;
        wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE);
        wq->saved_max_active = wq->max_active;
        wq->saved_min_active = wq->min_active;
        mutex_init(&wq->mutex);
        atomic_set(&wq->nr_pwqs_to_flush, 0);
        INIT_LIST_HEAD(&wq->pwqs);
        INIT_LIST_HEAD(&wq->flusher_queue);
        INIT_LIST_HEAD(&wq->flusher_overflow);
        INIT_LIST_HEAD(&wq->maydays);

        wq_init_lockdep(wq);
        INIT_LIST_HEAD(&wq->list);

        if (flags & WQ_UNBOUND) {
                if (alloc_node_nr_active(wq->node_nr_active) < 0)
                        goto err_unreg_lockdep;
        }

        if (alloc_and_link_pwqs(wq) < 0)
                goto err_free_node_nr_active;

        if (wq_online && init_rescuer(wq) < 0)
                goto err_destroy;

        if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
                goto err_destroy;

        /*
         * wq_pool_mutex protects global freeze state and workqueues list.
         * Grab it, adjust max_active and add the new @wq to workqueues
         * list.
         */
        mutex_lock(&wq_pool_mutex);

        mutex_lock(&wq->mutex);
        wq_adjust_max_active(wq);
        mutex_unlock(&wq->mutex);

        list_add_tail_rcu(&wq->list, &workqueues);

        mutex_unlock(&wq_pool_mutex);

        return wq;

err_free_node_nr_active:
        if (wq->flags & WQ_UNBOUND)
                free_node_nr_active(wq->node_nr_active);
err_unreg_lockdep:
        wq_unregister_lockdep(wq);
        wq_free_lockdep(wq);
err_free_wq:
        free_workqueue_attrs(wq->unbound_attrs);
        kfree(wq);
        return NULL;
err_destroy:
        destroy_workqueue(wq);
        return NULL;
}
EXPORT_SYMBOL_GPL(alloc_workqueue);

static bool pwq_busy(struct pool_workqueue *pwq)
{
        int i;

        for (i = 0; i < WORK_NR_COLORS; i++)
                if (pwq->nr_in_flight[i])
                        return true;

        if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1))
                return true;
        if (!pwq_is_empty(pwq))
                return true;

        return false;
}

/**
 * destroy_workqueue - safely terminate a workqueue
 * @wq: target workqueue
 *
 * Safely destroy a workqueue. All work currently pending will be done first.
 */
void destroy_workqueue(struct workqueue_struct *wq)
{
        struct pool_workqueue *pwq;
        int cpu;

        /*
         * Remove it from sysfs first so that sanity check failure doesn't
         * lead to sysfs name conflicts.
         */
        workqueue_sysfs_unregister(wq);

        /* mark the workqueue destruction is in progress */
        mutex_lock(&wq->mutex);
        wq->flags |= __WQ_DESTROYING;
        mutex_unlock(&wq->mutex);

        /* drain it before proceeding with destruction */
        drain_workqueue(wq);

        /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
        if (wq->rescuer) {
                struct worker *rescuer = wq->rescuer;

                /* this prevents new queueing */
                raw_spin_lock_irq(&wq_mayday_lock);
                wq->rescuer = NULL;
                raw_spin_unlock_irq(&wq_mayday_lock);

                /* rescuer will empty maydays list before exiting */
                kthread_stop(rescuer->task);
                kfree(rescuer);
        }

        /*
         * Sanity checks - grab all the locks so that we wait for all
         * in-flight operations which may do put_pwq().
         */
        mutex_lock(&wq_pool_mutex);
        mutex_lock(&wq->mutex);
        for_each_pwq(pwq, wq) {
                raw_spin_lock_irq(&pwq->pool->lock);
                if (WARN_ON(pwq_busy(pwq))) {
                        pr_warn("%s: %s has the following busy pwq\n",
                                __func__, wq->name);
                        show_pwq(pwq);
                        raw_spin_unlock_irq(&pwq->pool->lock);
                        mutex_unlock(&wq->mutex);
                        mutex_unlock(&wq_pool_mutex);
                        show_one_workqueue(wq);
                        return;
                }
                raw_spin_unlock_irq(&pwq->pool->lock);
        }
        mutex_unlock(&wq->mutex);

        /*
         * wq list is used to freeze wq, remove from list after
         * flushing is complete in case freeze races us.
         */
        list_del_rcu(&wq->list);
        mutex_unlock(&wq_pool_mutex);

        /*
         * We're the sole accessor of @wq. Directly access cpu_pwq and dfl_pwq
         * to put the base refs. @wq will be auto-destroyed from the last
         * pwq_put. RCU read lock prevents @wq from going away from under us.
         */
        rcu_read_lock();

        for_each_possible_cpu(cpu) {
                put_pwq_unlocked(unbound_pwq(wq, cpu));
                RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL);
        }

        put_pwq_unlocked(unbound_pwq(wq, -1));
        RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL);

        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(destroy_workqueue);

/**
 * workqueue_set_max_active - adjust max_active of a workqueue
 * @wq: target workqueue
 * @max_active: new max_active value.
 *
 * Set max_active of @wq to @max_active. See the alloc_workqueue() function
 * comment.
 *
 * CONTEXT:
 * Don't call from IRQ context.
 */
void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
{
        /* max_active doesn't mean anything for BH workqueues */
        if (WARN_ON(wq->flags & WQ_BH))
                return;
        /* disallow meddling with max_active for ordered workqueues */
        if (WARN_ON(wq->flags & __WQ_ORDERED))
                return;

        max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);

        mutex_lock(&wq->mutex);

        wq->saved_max_active = max_active;
        if (wq->flags & WQ_UNBOUND)
                wq->saved_min_active = min(wq->saved_min_active, max_active);

        wq_adjust_max_active(wq);

        mutex_unlock(&wq->mutex);
}
EXPORT_SYMBOL_GPL(workqueue_set_max_active);

/**
 * workqueue_set_min_active - adjust min_active of an unbound workqueue
 * @wq: target unbound workqueue
 * @min_active: new min_active value
 *
 * Set min_active of an unbound workqueue. Unlike other types of workqueues, an
 * unbound workqueue is not guaranteed to be able to process max_active
 * interdependent work items. Instead, an unbound workqueue is guaranteed to be
 * able to process min_active number of interdependent work items which is
 * %WQ_DFL_MIN_ACTIVE by default.
 *
 * Use this function to adjust the min_active value between 0 and the current
 * max_active.
 */
void workqueue_set_min_active(struct workqueue_struct *wq, int min_active)
{
        /* min_active is only meaningful for non-ordered unbound workqueues */
        if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) !=
                    WQ_UNBOUND))
                return;

        mutex_lock(&wq->mutex);
        wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active);
        wq_adjust_max_active(wq);
        mutex_unlock(&wq->mutex);
}

/**
 * current_work - retrieve %current task's work struct
 *
 * Determine if %current task is a workqueue worker and what it's working on.
 * Useful to find out the context that the %current task is running in.
 *
 * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
 */
struct work_struct *current_work(void)
{
        struct worker *worker = current_wq_worker();

        return worker ? worker->current_work : NULL;
}
EXPORT_SYMBOL(current_work);

/**
 * current_is_workqueue_rescuer - is %current workqueue rescuer?
 *
 * Determine whether %current is a workqueue rescuer.  Can be used from
 * work functions to determine whether it's being run off the rescuer task.
 *
 * Return: %true if %current is a workqueue rescuer. %false otherwise.
 */
bool current_is_workqueue_rescuer(void)
{
        struct worker *worker = current_wq_worker();

        return worker && worker->rescue_wq;
}

/**
 * workqueue_congested - test whether a workqueue is congested
 * @cpu: CPU in question
 * @wq: target workqueue
 *
 * Test whether @wq's cpu workqueue for @cpu is congested.  There is
 * no synchronization around this function and the test result is
 * unreliable and only useful as advisory hints or for debugging.
 *
 * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
 *
 * With the exception of ordered workqueues, all workqueues have per-cpu
 * pool_workqueues, each with its own congested state. A workqueue being
 * congested on one CPU doesn't mean that the workqueue is contested on any
 * other CPUs.
 *
 * Return:
 * %true if congested, %false otherwise.
 */
bool workqueue_congested(int cpu, struct workqueue_struct *wq)
{
        struct pool_workqueue *pwq;
        bool ret;

        rcu_read_lock();
        preempt_disable();

        if (cpu == WORK_CPU_UNBOUND)
                cpu = smp_processor_id();

        pwq = *per_cpu_ptr(wq->cpu_pwq, cpu);
        ret = !list_empty(&pwq->inactive_works);

        preempt_enable();
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL_GPL(workqueue_congested);

/**
 * work_busy - test whether a work is currently pending or running
 * @work: the work to be tested
 *
 * Test whether @work is currently pending or running.  There is no
 * synchronization around this function and the test result is
 * unreliable and only useful as advisory hints or for debugging.
 *
 * Return:
 * OR'd bitmask of WORK_BUSY_* bits.
 */
unsigned int work_busy(struct work_struct *work)
{
        struct worker_pool *pool;
        unsigned long irq_flags;
        unsigned int ret = 0;

        if (work_pending(work))
                ret |= WORK_BUSY_PENDING;

        rcu_read_lock();
        pool = get_work_pool(work);
        if (pool) {
                raw_spin_lock_irqsave(&pool->lock, irq_flags);
                if (find_worker_executing_work(pool, work))
                        ret |= WORK_BUSY_RUNNING;
                raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
        }
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL_GPL(work_busy);

/**
 * set_worker_desc - set description for the current work item
 * @fmt: printf-style format string
 * @...: arguments for the format string
 *
 * This function can be called by a running work function to describe what
 * the work item is about.  If the worker task gets dumped, this
 * information will be printed out together to help debugging.  The
 * description can be at most WORKER_DESC_LEN including the trailing '\0'.
 */
void set_worker_desc(const char *fmt, ...)
{
        struct worker *worker = current_wq_worker();
        va_list args;

        if (worker) {
                va_start(args, fmt);
                vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
                va_end(args);
        }
}
EXPORT_SYMBOL_GPL(set_worker_desc);

/**
 * print_worker_info - print out worker information and description
 * @log_lvl: the log level to use when printing
 * @task: target task
 *
 * If @task is a worker and currently executing a work item, print out the
 * name of the workqueue being serviced and worker description set with
 * set_worker_desc() by the currently executing work item.
 *
 * This function can be safely called on any task as long as the
 * task_struct itself is accessible.  While safe, this function isn't
 * synchronized and may print out mixups or garbages of limited length.
 */
void print_worker_info(const char *log_lvl, struct task_struct *task)
{
        work_func_t *fn = NULL;
        char name[WQ_NAME_LEN] = { };
        char desc[WORKER_DESC_LEN] = { };
        struct pool_workqueue *pwq = NULL;
        struct workqueue_struct *wq = NULL;
        struct worker *worker;

        if (!(task->flags & PF_WQ_WORKER))
                return;

        /*
         * This function is called without any synchronization and @task
         * could be in any state.  Be careful with dereferences.
         */
        worker = kthread_probe_data(task);

        /*
         * Carefully copy the associated workqueue's workfn, name and desc.
         * Keep the original last '\0' in case the original is garbage.
         */
        copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
        copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
        copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
        copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
        copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);

        if (fn || name[0] || desc[0]) {
                printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
                if (strcmp(name, desc))
                        pr_cont(" (%s)", desc);
                pr_cont("\n");
        }
}

static void pr_cont_pool_info(struct worker_pool *pool)
{
        pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
        if (pool->node != NUMA_NO_NODE)
                pr_cont(" node=%d", pool->node);
        pr_cont(" flags=0x%x", pool->flags);
        if (pool->flags & POOL_BH)
                pr_cont(" bh%s",
                        pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
        else
                pr_cont(" nice=%d", pool->attrs->nice);
}

static void pr_cont_worker_id(struct worker *worker)
{
        struct worker_pool *pool = worker->pool;

        if (pool->flags & WQ_BH)
                pr_cont("bh%s",
                        pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
        else
                pr_cont("%d%s", task_pid_nr(worker->task),
                        worker->rescue_wq ? "(RESCUER)" : "");
}

struct pr_cont_work_struct {
        bool comma;
        work_func_t func;
        long ctr;
};

static void pr_cont_work_flush(bool comma, work_func_t func, struct pr_cont_work_struct *pcwsp)
{
        if (!pcwsp->ctr)
                goto out_record;
        if (func == pcwsp->func) {
                pcwsp->ctr++;
                return;
        }
        if (pcwsp->ctr == 1)
                pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func);
        else
                pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func);
        pcwsp->ctr = 0;
out_record:
        if ((long)func == -1L)
                return;
        pcwsp->comma = comma;
        pcwsp->func = func;
        pcwsp->ctr = 1;
}

static void pr_cont_work(bool comma, struct work_struct *work, struct pr_cont_work_struct *pcwsp)
{
        if (work->func == wq_barrier_func) {
                struct wq_barrier *barr;

                barr = container_of(work, struct wq_barrier, work);

                pr_cont_work_flush(comma, (work_func_t)-1, pcwsp);
                pr_cont("%s BAR(%d)", comma ? "," : "",
                        task_pid_nr(barr->task));
        } else {
                if (!comma)
                        pr_cont_work_flush(comma, (work_func_t)-1, pcwsp);
                pr_cont_work_flush(comma, work->func, pcwsp);
        }
}

static void show_pwq(struct pool_workqueue *pwq)
{
        struct pr_cont_work_struct pcws = { .ctr = 0, };
        struct worker_pool *pool = pwq->pool;
        struct work_struct *work;
        struct worker *worker;
        bool has_in_flight = false, has_pending = false;
        int bkt;

        pr_info("  pwq %d:", pool->id);
        pr_cont_pool_info(pool);

        pr_cont(" active=%d refcnt=%d%s\n",
                pwq->nr_active, pwq->refcnt,
                !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");

        hash_for_each(pool->busy_hash, bkt, worker, hentry) {
                if (worker->current_pwq == pwq) {
                        has_in_flight = true;
                        break;
                }
        }
        if (has_in_flight) {
                bool comma = false;

                pr_info("    in-flight:");
                hash_for_each(pool->busy_hash, bkt, worker, hentry) {
                        if (worker->current_pwq != pwq)
                                continue;

                        pr_cont(" %s", comma ? "," : "");
                        pr_cont_worker_id(worker);
                        pr_cont(":%ps", worker->current_func);
                        list_for_each_entry(work, &worker->scheduled, entry)
                                pr_cont_work(false, work, &pcws);
                        pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
                        comma = true;
                }
                pr_cont("\n");
        }

        list_for_each_entry(work, &pool->worklist, entry) {
                if (get_work_pwq(work) == pwq) {
                        has_pending = true;
                        break;
                }
        }
        if (has_pending) {
                bool comma = false;

                pr_info("    pending:");
                list_for_each_entry(work, &pool->worklist, entry) {
                        if (get_work_pwq(work) != pwq)
                                continue;

                        pr_cont_work(comma, work, &pcws);
                        comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
                }
                pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
                pr_cont("\n");
        }

        if (!list_empty(&pwq->inactive_works)) {
                bool comma = false;

                pr_info("    inactive:");
                list_for_each_entry(work, &pwq->inactive_works, entry) {
                        pr_cont_work(comma, work, &pcws);
                        comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
                }
                pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
                pr_cont("\n");
        }
}

/**
 * show_one_workqueue - dump state of specified workqueue
 * @wq: workqueue whose state will be printed
 */
void show_one_workqueue(struct workqueue_struct *wq)
{
        struct pool_workqueue *pwq;
        bool idle = true;
        unsigned long irq_flags;

        for_each_pwq(pwq, wq) {
                if (!pwq_is_empty(pwq)) {
                        idle = false;
                        break;
                }
        }
        if (idle) /* Nothing to print for idle workqueue */
                return;

        pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);

        for_each_pwq(pwq, wq) {
                raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags);
                if (!pwq_is_empty(pwq)) {
                        /*
                         * Defer printing to avoid deadlocks in console
                         * drivers that queue work while holding locks
                         * also taken in their write paths.
                         */
                        printk_deferred_enter();
                        show_pwq(pwq);
                        printk_deferred_exit();
                }
                raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags);
                /*
                 * We could be printing a lot from atomic context, e.g.
                 * sysrq-t -> show_all_workqueues(). Avoid triggering
                 * hard lockup.
                 */
                touch_nmi_watchdog();
        }

}

/**
 * show_one_worker_pool - dump state of specified worker pool
 * @pool: worker pool whose state will be printed
 */
static void show_one_worker_pool(struct worker_pool *pool)
{
        struct worker *worker;
        bool first = true;
        unsigned long irq_flags;
        unsigned long hung = 0;

        raw_spin_lock_irqsave(&pool->lock, irq_flags);
        if (pool->nr_workers == pool->nr_idle)
                goto next_pool;

        /* How long the first pending work is waiting for a worker. */
        if (!list_empty(&pool->worklist))
                hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;

        /*
         * Defer printing to avoid deadlocks in console drivers that
         * queue work while holding locks also taken in their write
         * paths.
         */
        printk_deferred_enter();
        pr_info("pool %d:", pool->id);
        pr_cont_pool_info(pool);
        pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
        if (pool->manager)
                pr_cont(" manager: %d",
                        task_pid_nr(pool->manager->task));
        list_for_each_entry(worker, &pool->idle_list, entry) {
                pr_cont(" %s", first ? "idle: " : "");
                pr_cont_worker_id(worker);
                first = false;
        }
        pr_cont("\n");
        printk_deferred_exit();
next_pool:
        raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
        /*
         * We could be printing a lot from atomic context, e.g.
         * sysrq-t -> show_all_workqueues(). Avoid triggering
         * hard lockup.
         */
        touch_nmi_watchdog();

}

/**
 * show_all_workqueues - dump workqueue state
 *
 * Called from a sysrq handler and prints out all busy workqueues and pools.
 */
void show_all_workqueues(void)
{
        struct workqueue_struct *wq;
        struct worker_pool *pool;
        int pi;

        rcu_read_lock();

        pr_info("Showing busy workqueues and worker pools:\n");

        list_for_each_entry_rcu(wq, &workqueues, list)
                show_one_workqueue(wq);

        for_each_pool(pool, pi)
                show_one_worker_pool(pool);

        rcu_read_unlock();
}

/**
 * show_freezable_workqueues - dump freezable workqueue state
 *
 * Called from try_to_freeze_tasks() and prints out all freezable workqueues
 * still busy.
 */
void show_freezable_workqueues(void)
{
        struct workqueue_struct *wq;

        rcu_read_lock();

        pr_info("Showing freezable workqueues that are still busy:\n");

        list_for_each_entry_rcu(wq, &workqueues, list) {
                if (!(wq->flags & WQ_FREEZABLE))
                        continue;
                show_one_workqueue(wq);
        }

        rcu_read_unlock();
}

/* used to show worker information through /proc/PID/{comm,stat,status} */
void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
{
        int off;

        /* always show the actual comm */
        off = strscpy(buf, task->comm, size);
        if (off < 0)
                return;

        /* stabilize PF_WQ_WORKER and worker pool association */
        mutex_lock(&wq_pool_attach_mutex);

        if (task->flags & PF_WQ_WORKER) {
                struct worker *worker = kthread_data(task);
                struct worker_pool *pool = worker->pool;

                if (pool) {
                        raw_spin_lock_irq(&pool->lock);
                        /*
                         * ->desc tracks information (wq name or
                         * set_worker_desc()) for the latest execution.  If
                         * current, prepend '+', otherwise '-'.
                         */
                        if (worker->desc[0] != '\0') {
                                if (worker->current_work)
                                        scnprintf(buf + off, size - off, "+%s",
                                                  worker->desc);
                                else
                                        scnprintf(buf + off, size - off, "-%s",
                                                  worker->desc);
                        }
                        raw_spin_unlock_irq(&pool->lock);
                }
        }

        mutex_unlock(&wq_pool_attach_mutex);
}

#ifdef CONFIG_SMP

/*
 * CPU hotplug.
 *
 * There are two challenges in supporting CPU hotplug.  Firstly, there
 * are a lot of assumptions on strong associations among work, pwq and
 * pool which make migrating pending and scheduled works very
 * difficult to implement without impacting hot paths.  Secondly,
 * worker pools serve mix of short, long and very long running works making
 * blocked draining impractical.
 *
 * This is solved by allowing the pools to be disassociated from the CPU
 * running as an unbound one and allowing it to be reattached later if the
 * cpu comes back online.
 */

static void unbind_workers(int cpu)
{
        struct worker_pool *pool;
        struct worker *worker;

        for_each_cpu_worker_pool(pool, cpu) {
                mutex_lock(&wq_pool_attach_mutex);
                raw_spin_lock_irq(&pool->lock);

                /*
                 * We've blocked all attach/detach operations. Make all workers
                 * unbound and set DISASSOCIATED.  Before this, all workers
                 * must be on the cpu.  After this, they may become diasporas.
                 * And the preemption disabled section in their sched callbacks
                 * are guaranteed to see WORKER_UNBOUND since the code here
                 * is on the same cpu.
                 */
                for_each_pool_worker(worker, pool)
                        worker->flags |= WORKER_UNBOUND;

                pool->flags |= POOL_DISASSOCIATED;

                /*
                 * The handling of nr_running in sched callbacks are disabled
                 * now.  Zap nr_running.  After this, nr_running stays zero and
                 * need_more_worker() and keep_working() are always true as
                 * long as the worklist is not empty.  This pool now behaves as
                 * an unbound (in terms of concurrency management) pool which
                 * are served by workers tied to the pool.
                 */
                pool->nr_running = 0;

                /*
                 * With concurrency management just turned off, a busy
                 * worker blocking could lead to lengthy stalls.  Kick off
                 * unbound chain execution of currently pending work items.
                 */
                kick_pool(pool);

                raw_spin_unlock_irq(&pool->lock);

                for_each_pool_worker(worker, pool)
                        unbind_worker(worker);

                mutex_unlock(&wq_pool_attach_mutex);
        }
}

/**
 * rebind_workers - rebind all workers of a pool to the associated CPU
 * @pool: pool of interest
 *
 * @pool->cpu is coming online.  Rebind all workers to the CPU.
 */
static void rebind_workers(struct worker_pool *pool)
{
        struct worker *worker;

        lockdep_assert_held(&wq_pool_attach_mutex);

        /*
         * Restore CPU affinity of all workers.  As all idle workers should
         * be on the run-queue of the associated CPU before any local
         * wake-ups for concurrency management happen, restore CPU affinity
         * of all workers first and then clear UNBOUND.  As we're called
         * from CPU_ONLINE, the following shouldn't fail.
         */
        for_each_pool_worker(worker, pool) {
                kthread_set_per_cpu(worker->task, pool->cpu);
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
                                                  pool_allowed_cpus(pool)) < 0);
        }

        raw_spin_lock_irq(&pool->lock);

        pool->flags &= ~POOL_DISASSOCIATED;

        for_each_pool_worker(worker, pool) {
                unsigned int worker_flags = worker->flags;

                /*
                 * We want to clear UNBOUND but can't directly call
                 * worker_clr_flags() or adjust nr_running.  Atomically
                 * replace UNBOUND with another NOT_RUNNING flag REBOUND.
                 * @worker will clear REBOUND using worker_clr_flags() when
                 * it initiates the next execution cycle thus restoring
                 * concurrency management.  Note that when or whether
                 * @worker clears REBOUND doesn't affect correctness.
                 *
                 * WRITE_ONCE() is necessary because @worker->flags may be
                 * tested without holding any lock in
                 * wq_worker_running().  Without it, NOT_RUNNING test may
                 * fail incorrectly leading to premature concurrency
                 * management operations.
                 */
                WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
                worker_flags |= WORKER_REBOUND;
                worker_flags &= ~WORKER_UNBOUND;
                WRITE_ONCE(worker->flags, worker_flags);
        }

        raw_spin_unlock_irq(&pool->lock);
}

/**
 * restore_unbound_workers_cpumask - restore cpumask of unbound workers
 * @pool: unbound pool of interest
 * @cpu: the CPU which is coming up
 *
 * An unbound pool may end up with a cpumask which doesn't have any online
 * CPUs.  When a worker of such pool get scheduled, the scheduler resets
 * its cpus_allowed.  If @cpu is in @pool's cpumask which didn't have any
 * online CPU before, cpus_allowed of all its workers should be restored.
 */
static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
{
        static cpumask_t cpumask;
        struct worker *worker;

        lockdep_assert_held(&wq_pool_attach_mutex);

        /* is @cpu allowed for @pool? */
        if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
                return;

        cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);

        /* as we're called from CPU_ONLINE, the following shouldn't fail */
        for_each_pool_worker(worker, pool)
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
}

int workqueue_prepare_cpu(unsigned int cpu)
{
        struct worker_pool *pool;

        for_each_cpu_worker_pool(pool, cpu) {
                if (pool->nr_workers)
                        continue;
                if (!create_worker(pool))
                        return -ENOMEM;
        }
        return 0;
}

int workqueue_online_cpu(unsigned int cpu)
{
        struct worker_pool *pool;
        struct workqueue_struct *wq;
        int pi;

        mutex_lock(&wq_pool_mutex);

        for_each_pool(pool, pi) {
                /* BH pools aren't affected by hotplug */
                if (pool->flags & POOL_BH)
                        continue;

                mutex_lock(&wq_pool_attach_mutex);
                if (pool->cpu == cpu)
                        rebind_workers(pool);
                else if (pool->cpu < 0)
                        restore_unbound_workers_cpumask(pool, cpu);
                mutex_unlock(&wq_pool_attach_mutex);
        }

        /* update pod affinity of unbound workqueues */
        list_for_each_entry(wq, &workqueues, list) {
                struct workqueue_attrs *attrs = wq->unbound_attrs;

                if (attrs) {
                        const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
                        int tcpu;

                        for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
                                wq_update_pod(wq, tcpu, cpu, true);

                        mutex_lock(&wq->mutex);
                        wq_update_node_max_active(wq, -1);
                        mutex_unlock(&wq->mutex);
                }
        }

        mutex_unlock(&wq_pool_mutex);
        return 0;
}

int workqueue_offline_cpu(unsigned int cpu)
{
        struct workqueue_struct *wq;

        /* unbinding per-cpu workers should happen on the local CPU */
        if (WARN_ON(cpu != smp_processor_id()))
                return -1;

        unbind_workers(cpu);

        /* update pod affinity of unbound workqueues */
        mutex_lock(&wq_pool_mutex);
        list_for_each_entry(wq, &workqueues, list) {
                struct workqueue_attrs *attrs = wq->unbound_attrs;

                if (attrs) {
                        const struct wq_pod_type *pt = wqattrs_pod_type(attrs);
                        int tcpu;

                        for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]])
                                wq_update_pod(wq, tcpu, cpu, false);

                        mutex_lock(&wq->mutex);
                        wq_update_node_max_active(wq, cpu);
                        mutex_unlock(&wq->mutex);
                }
        }
        mutex_unlock(&wq_pool_mutex);

        return 0;
}

struct work_for_cpu {
        struct work_struct work;
        long (*fn)(void *);
        void *arg;
        long ret;
};

static void work_for_cpu_fn(struct work_struct *work)
{
        struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);

        wfc->ret = wfc->fn(wfc->arg);
}

/**
 * work_on_cpu_key - run a function in thread context on a particular cpu
 * @cpu: the cpu to run on
 * @fn: the function to run
 * @arg: the function arg
 * @key: The lock class key for lock debugging purposes
 *
 * It is up to the caller to ensure that the cpu doesn't go offline.
 * The caller must not hold any locks which would prevent @fn from completing.
 *
 * Return: The value @fn returns.
 */
long work_on_cpu_key(int cpu, long (*fn)(void *),
                     void *arg, struct lock_class_key *key)
{
        struct work_for_cpu wfc = { .fn = fn, .arg = arg };

        INIT_WORK_ONSTACK_KEY(&wfc.work, work_for_cpu_fn, key);
        schedule_work_on(cpu, &wfc.work);
        flush_work(&wfc.work);
        destroy_work_on_stack(&wfc.work);
        return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_key);

/**
 * work_on_cpu_safe_key - run a function in thread context on a particular cpu
 * @cpu: the cpu to run on
 * @fn:  the function to run
 * @arg: the function argument
 * @key: The lock class key for lock debugging purposes
 *
 * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
 * any locks which would prevent @fn from completing.
 *
 * Return: The value @fn returns.
 */
long work_on_cpu_safe_key(int cpu, long (*fn)(void *),
                          void *arg, struct lock_class_key *key)
{
        long ret = -ENODEV;

        cpus_read_lock();
        if (cpu_online(cpu))
                ret = work_on_cpu_key(cpu, fn, arg, key);
        cpus_read_unlock();
        return ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_safe_key);
#endif /* CONFIG_SMP */

#ifdef CONFIG_FREEZER

/**
 * freeze_workqueues_begin - begin freezing workqueues
 *
 * Start freezing workqueues.  After this function returns, all freezable
 * workqueues will queue new works to their inactive_works list instead of
 * pool->worklist.
 *
 * CONTEXT:
 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
 */
void freeze_workqueues_begin(void)
{
        struct workqueue_struct *wq;

        mutex_lock(&wq_pool_mutex);

        WARN_ON_ONCE(workqueue_freezing);
        workqueue_freezing = true;

        list_for_each_entry(wq, &workqueues, list) {
                mutex_lock(&wq->mutex);
                wq_adjust_max_active(wq);
                mutex_unlock(&wq->mutex);
        }

        mutex_unlock(&wq_pool_mutex);
}

/**
 * freeze_workqueues_busy - are freezable workqueues still busy?
 *
 * Check whether freezing is complete.  This function must be called
 * between freeze_workqueues_begin() and thaw_workqueues().
 *
 * CONTEXT:
 * Grabs and releases wq_pool_mutex.
 *
 * Return:
 * %true if some freezable workqueues are still busy.  %false if freezing
 * is complete.
 */
bool freeze_workqueues_busy(void)
{
        bool busy = false;
        struct workqueue_struct *wq;
        struct pool_workqueue *pwq;

        mutex_lock(&wq_pool_mutex);

        WARN_ON_ONCE(!workqueue_freezing);

        list_for_each_entry(wq, &workqueues, list) {
                if (!(wq->flags & WQ_FREEZABLE))
                        continue;
                /*
                 * nr_active is monotonically decreasing.  It's safe
                 * to peek without lock.
                 */
                rcu_read_lock();
                for_each_pwq(pwq, wq) {
                        WARN_ON_ONCE(pwq->nr_active < 0);
                        if (pwq->nr_active) {
                                busy = true;
                                rcu_read_unlock();
                                goto out_unlock;
                        }
                }
                rcu_read_unlock();
        }
out_unlock:
        mutex_unlock(&wq_pool_mutex);
        return busy;
}

/**
 * thaw_workqueues - thaw workqueues
 *
 * Thaw workqueues.  Normal queueing is restored and all collected
 * frozen works are transferred to their respective pool worklists.
 *
 * CONTEXT:
 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
 */
void thaw_workqueues(void)
{
        struct workqueue_struct *wq;

        mutex_lock(&wq_pool_mutex);

        if (!workqueue_freezing)
                goto out_unlock;

        workqueue_freezing = false;

        /* restore max_active and repopulate worklist */
        list_for_each_entry(wq, &workqueues, list) {
                mutex_lock(&wq->mutex);
                wq_adjust_max_active(wq);
                mutex_unlock(&wq->mutex);
        }

out_unlock:
        mutex_unlock(&wq_pool_mutex);
}
#endif /* CONFIG_FREEZER */

static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
{
        LIST_HEAD(ctxs);
        int ret = 0;
        struct workqueue_struct *wq;
        struct apply_wqattrs_ctx *ctx, *n;

        lockdep_assert_held(&wq_pool_mutex);

        list_for_each_entry(wq, &workqueues, list) {
                if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING))
                        continue;

                ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
                if (IS_ERR(ctx)) {
                        ret = PTR_ERR(ctx);
                        break;
                }

                list_add_tail(&ctx->list, &ctxs);
        }

        list_for_each_entry_safe(ctx, n, &ctxs, list) {
                if (!ret)
                        apply_wqattrs_commit(ctx);
                apply_wqattrs_cleanup(ctx);
        }

        if (!ret) {
                mutex_lock(&wq_pool_attach_mutex);
                cpumask_copy(wq_unbound_cpumask, unbound_cpumask);
                mutex_unlock(&wq_pool_attach_mutex);
        }
        return ret;
}

/**
 * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
 * @exclude_cpumask: the cpumask to be excluded from wq_unbound_cpumask
 *
 * This function can be called from cpuset code to provide a set of isolated
 * CPUs that should be excluded from wq_unbound_cpumask. The caller must hold
 * either cpus_read_lock or cpus_write_lock.
 */
int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask)
{
        cpumask_var_t cpumask;
        int ret = 0;

        if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
                return -ENOMEM;

        lockdep_assert_cpus_held();
        mutex_lock(&wq_pool_mutex);

        /* Save the current isolated cpumask & export it via sysfs */
        cpumask_copy(wq_isolated_cpumask, exclude_cpumask);

        /*
         * If the operation fails, it will fall back to
         * wq_requested_unbound_cpumask which is initially set to
         * (HK_TYPE_WQ ∩ HK_TYPE_DOMAIN) house keeping mask and rewritten
         * by any subsequent write to workqueue/cpumask sysfs file.
         */
        if (!cpumask_andnot(cpumask, wq_requested_unbound_cpumask, exclude_cpumask))
                cpumask_copy(cpumask, wq_requested_unbound_cpumask);
        if (!cpumask_equal(cpumask, wq_unbound_cpumask))
                ret = workqueue_apply_unbound_cpumask(cpumask);

        mutex_unlock(&wq_pool_mutex);
        free_cpumask_var(cpumask);
        return ret;
}

static int parse_affn_scope(const char *val)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(wq_affn_names); i++) {
                if (!strncasecmp(val, wq_affn_names[i], strlen(wq_affn_names[i])))
                        return i;
        }
        return -EINVAL;
}

static int wq_affn_dfl_set(const char *val, const struct kernel_param *kp)
{
        struct workqueue_struct *wq;
        int affn, cpu;

        affn = parse_affn_scope(val);
        if (affn < 0)
                return affn;
        if (affn == WQ_AFFN_DFL)
                return -EINVAL;

        cpus_read_lock();
        mutex_lock(&wq_pool_mutex);

        wq_affn_dfl = affn;

        list_for_each_entry(wq, &workqueues, list) {
                for_each_online_cpu(cpu) {
                        wq_update_pod(wq, cpu, cpu, true);
                }
        }

        mutex_unlock(&wq_pool_mutex);
        cpus_read_unlock();

        return 0;
}

static int wq_affn_dfl_get(char *buffer, const struct kernel_param *kp)
{
        return scnprintf(buffer, PAGE_SIZE, "%s\n", wq_affn_names[wq_affn_dfl]);
}

static const struct kernel_param_ops wq_affn_dfl_ops = {
        .set        = wq_affn_dfl_set,
        .get        = wq_affn_dfl_get,
};

module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, 0644);

#ifdef CONFIG_SYSFS
/*
 * Workqueues with WQ_SYSFS flag set is visible to userland via
 * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
 * following attributes.
 *
 *  per_cpu                RO bool        : whether the workqueue is per-cpu or unbound
 *  max_active                RW int        : maximum number of in-flight work items
 *
 * Unbound workqueues have the following extra attributes.
 *
 *  nice                RW int        : nice value of the workers
 *  cpumask                RW mask        : bitmask of allowed CPUs for the workers
 *  affinity_scope        RW str  : worker CPU affinity scope (cache, numa, none)
 *  affinity_strict        RW bool : worker CPU affinity is strict
 */
struct wq_device {
        struct workqueue_struct                *wq;
        struct device                        dev;
};

static struct workqueue_struct *dev_to_wq(struct device *dev)
{
        struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);

        return wq_dev->wq;
}

static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);

        return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
}
static DEVICE_ATTR_RO(per_cpu);

static ssize_t max_active_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);

        return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
}

static ssize_t max_active_store(struct device *dev,
                                struct device_attribute *attr, const char *buf,
                                size_t count)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        int val;

        if (sscanf(buf, "%d", &val) != 1 || val <= 0)
                return -EINVAL;

        workqueue_set_max_active(wq, val);
        return count;
}
static DEVICE_ATTR_RW(max_active);

static struct attribute *wq_sysfs_attrs[] = {
        &dev_attr_per_cpu.attr,
        &dev_attr_max_active.attr,
        NULL,
};
ATTRIBUTE_GROUPS(wq_sysfs);

static void apply_wqattrs_lock(void)
{
        /* CPUs should stay stable across pwq creations and installations */
        cpus_read_lock();
        mutex_lock(&wq_pool_mutex);
}

static void apply_wqattrs_unlock(void)
{
        mutex_unlock(&wq_pool_mutex);
        cpus_read_unlock();
}

static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        int written;

        mutex_lock(&wq->mutex);
        written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
        mutex_unlock(&wq->mutex);

        return written;
}

/* prepare workqueue_attrs for sysfs store operations */
static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
{
        struct workqueue_attrs *attrs;

        lockdep_assert_held(&wq_pool_mutex);

        attrs = alloc_workqueue_attrs();
        if (!attrs)
                return NULL;

        copy_workqueue_attrs(attrs, wq->unbound_attrs);
        return attrs;
}

static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t count)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        struct workqueue_attrs *attrs;
        int ret = -ENOMEM;

        apply_wqattrs_lock();

        attrs = wq_sysfs_prep_attrs(wq);
        if (!attrs)
                goto out_unlock;

        if (sscanf(buf, "%d", &attrs->nice) == 1 &&
            attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
                ret = apply_workqueue_attrs_locked(wq, attrs);
        else
                ret = -EINVAL;

out_unlock:
        apply_wqattrs_unlock();
        free_workqueue_attrs(attrs);
        return ret ?: count;
}

static ssize_t wq_cpumask_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        int written;

        mutex_lock(&wq->mutex);
        written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
                            cpumask_pr_args(wq->unbound_attrs->cpumask));
        mutex_unlock(&wq->mutex);
        return written;
}

static ssize_t wq_cpumask_store(struct device *dev,
                                struct device_attribute *attr,
                                const char *buf, size_t count)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        struct workqueue_attrs *attrs;
        int ret = -ENOMEM;

        apply_wqattrs_lock();

        attrs = wq_sysfs_prep_attrs(wq);
        if (!attrs)
                goto out_unlock;

        ret = cpumask_parse(buf, attrs->cpumask);
        if (!ret)
                ret = apply_workqueue_attrs_locked(wq, attrs);

out_unlock:
        apply_wqattrs_unlock();
        free_workqueue_attrs(attrs);
        return ret ?: count;
}

static ssize_t wq_affn_scope_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        int written;

        mutex_lock(&wq->mutex);
        if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL)
                written = scnprintf(buf, PAGE_SIZE, "%s (%s)\n",
                                    wq_affn_names[WQ_AFFN_DFL],
                                    wq_affn_names[wq_affn_dfl]);
        else
                written = scnprintf(buf, PAGE_SIZE, "%s\n",
                                    wq_affn_names[wq->unbound_attrs->affn_scope]);
        mutex_unlock(&wq->mutex);

        return written;
}

static ssize_t wq_affn_scope_store(struct device *dev,
                                   struct device_attribute *attr,
                                   const char *buf, size_t count)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        struct workqueue_attrs *attrs;
        int affn, ret = -ENOMEM;

        affn = parse_affn_scope(buf);
        if (affn < 0)
                return affn;

        apply_wqattrs_lock();
        attrs = wq_sysfs_prep_attrs(wq);
        if (attrs) {
                attrs->affn_scope = affn;
                ret = apply_workqueue_attrs_locked(wq, attrs);
        }
        apply_wqattrs_unlock();
        free_workqueue_attrs(attrs);
        return ret ?: count;
}

static ssize_t wq_affinity_strict_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        struct workqueue_struct *wq = dev_to_wq(dev);

        return scnprintf(buf, PAGE_SIZE, "%d\n",
                         wq->unbound_attrs->affn_strict);
}

static ssize_t wq_affinity_strict_store(struct device *dev,
                                        struct device_attribute *attr,
                                        const char *buf, size_t count)
{
        struct workqueue_struct *wq = dev_to_wq(dev);
        struct workqueue_attrs *attrs;
        int v, ret = -ENOMEM;

        if (sscanf(buf, "%d", &v) != 1)
                return -EINVAL;

        apply_wqattrs_lock();
        attrs = wq_sysfs_prep_attrs(wq);
        if (attrs) {
                attrs->affn_strict = (bool)v;
                ret = apply_workqueue_attrs_locked(wq, attrs);
        }
        apply_wqattrs_unlock();
        free_workqueue_attrs(attrs);
        return ret ?: count;
}

static struct device_attribute wq_sysfs_unbound_attrs[] = {
        __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
        __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
        __ATTR(affinity_scope, 0644, wq_affn_scope_show, wq_affn_scope_store),
        __ATTR(affinity_strict, 0644, wq_affinity_strict_show, wq_affinity_strict_store),
        __ATTR_NULL,
};

static const struct bus_type wq_subsys = {
        .name                                = "workqueue",
        .dev_groups                        = wq_sysfs_groups,
};

/**
 *  workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
 *  @cpumask: the cpumask to set
 *
 *  The low-level workqueues cpumask is a global cpumask that limits
 *  the affinity of all unbound workqueues.  This function check the @cpumask
 *  and apply it to all unbound workqueues and updates all pwqs of them.
 *
 *  Return:        0        - Success
 *                -EINVAL        - Invalid @cpumask
 *                -ENOMEM        - Failed to allocate memory for attrs or pwqs.
 */
static int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
{
        int ret = -EINVAL;

        /*
         * Not excluding isolated cpus on purpose.
         * If the user wishes to include them, we allow that.
         */
        cpumask_and(cpumask, cpumask, cpu_possible_mask);
        if (!cpumask_empty(cpumask)) {
                apply_wqattrs_lock();
                cpumask_copy(wq_requested_unbound_cpumask, cpumask);
                if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
                        ret = 0;
                        goto out_unlock;
                }

                ret = workqueue_apply_unbound_cpumask(cpumask);

out_unlock:
                apply_wqattrs_unlock();
        }

        return ret;
}

static ssize_t __wq_cpumask_show(struct device *dev,
                struct device_attribute *attr, char *buf, cpumask_var_t mask)
{
        int written;

        mutex_lock(&wq_pool_mutex);
        written = scnprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
        mutex_unlock(&wq_pool_mutex);

        return written;
}

static ssize_t wq_unbound_cpumask_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        return __wq_cpumask_show(dev, attr, buf, wq_unbound_cpumask);
}

static ssize_t wq_requested_cpumask_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        return __wq_cpumask_show(dev, attr, buf, wq_requested_unbound_cpumask);
}

static ssize_t wq_isolated_cpumask_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        return __wq_cpumask_show(dev, attr, buf, wq_isolated_cpumask);
}

static ssize_t wq_unbound_cpumask_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        cpumask_var_t cpumask;
        int ret;

        if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
                return -ENOMEM;

        ret = cpumask_parse(buf, cpumask);
        if (!ret)
                ret = workqueue_set_unbound_cpumask(cpumask);

        free_cpumask_var(cpumask);
        return ret ? ret : count;
}

static struct device_attribute wq_sysfs_cpumask_attrs[] = {
        __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
               wq_unbound_cpumask_store),
        __ATTR(cpumask_requested, 0444, wq_requested_cpumask_show, NULL),
        __ATTR(cpumask_isolated, 0444, wq_isolated_cpumask_show, NULL),
        __ATTR_NULL,
};

static int __init wq_sysfs_init(void)
{
        struct device *dev_root;
        int err;

        err = subsys_virtual_register(&wq_subsys, NULL);
        if (err)
                return err;

        dev_root = bus_get_dev_root(&wq_subsys);
        if (dev_root) {
                struct device_attribute *attr;

                for (attr = wq_sysfs_cpumask_attrs; attr->attr.name; attr++) {
                        err = device_create_file(dev_root, attr);
                        if (err)
                                break;
                }
                put_device(dev_root);
        }
        return err;
}
core_initcall(wq_sysfs_init);

static void wq_device_release(struct device *dev)
{
        struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);

        kfree(wq_dev);
}

/**
 * workqueue_sysfs_register - make a workqueue visible in sysfs
 * @wq: the workqueue to register
 *
 * Expose @wq in sysfs under /sys/bus/workqueue/devices.
 * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
 * which is the preferred method.
 *
 * Workqueue user should use this function directly iff it wants to apply
 * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
 * apply_workqueue_attrs() may race against userland updating the
 * attributes.
 *
 * Return: 0 on success, -errno on failure.
 */
int workqueue_sysfs_register(struct workqueue_struct *wq)
{
        struct wq_device *wq_dev;
        int ret;

        /*
         * Adjusting max_active breaks ordering guarantee.  Disallow exposing
         * ordered workqueues.
         */
        if (WARN_ON(wq->flags & __WQ_ORDERED))
                return -EINVAL;

        wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
        if (!wq_dev)
                return -ENOMEM;

        wq_dev->wq = wq;
        wq_dev->dev.bus = &wq_subsys;
        wq_dev->dev.release = wq_device_release;
        dev_set_name(&wq_dev->dev, "%s", wq->name);

        /*
         * unbound_attrs are created separately.  Suppress uevent until
         * everything is ready.
         */
        dev_set_uevent_suppress(&wq_dev->dev, true);

        ret = device_register(&wq_dev->dev);
        if (ret) {
                put_device(&wq_dev->dev);
                wq->wq_dev = NULL;
                return ret;
        }

        if (wq->flags & WQ_UNBOUND) {
                struct device_attribute *attr;

                for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
                        ret = device_create_file(&wq_dev->dev, attr);
                        if (ret) {
                                device_unregister(&wq_dev->dev);
                                wq->wq_dev = NULL;
                                return ret;
                        }
                }
        }

        dev_set_uevent_suppress(&wq_dev->dev, false);
        kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
        return 0;
}

/**
 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
 * @wq: the workqueue to unregister
 *
 * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
 */
static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
{
        struct wq_device *wq_dev = wq->wq_dev;

        if (!wq->wq_dev)
                return;

        wq->wq_dev = NULL;
        device_unregister(&wq_dev->dev);
}
#else        /* CONFIG_SYSFS */
static void workqueue_sysfs_unregister(struct workqueue_struct *wq)        { }
#endif        /* CONFIG_SYSFS */

/*
 * Workqueue watchdog.
 *
 * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
 * flush dependency, a concurrency managed work item which stays RUNNING
 * indefinitely.  Workqueue stalls can be very difficult to debug as the
 * usual warning mechanisms don't trigger and internal workqueue state is
 * largely opaque.
 *
 * Workqueue watchdog monitors all worker pools periodically and dumps
 * state if some pools failed to make forward progress for a while where
 * forward progress is defined as the first item on ->worklist changing.
 *
 * This mechanism is controlled through the kernel parameter
 * "workqueue.watchdog_thresh" which can be updated at runtime through the
 * corresponding sysfs parameter file.
 */
#ifdef CONFIG_WQ_WATCHDOG

static unsigned long wq_watchdog_thresh = 30;
static struct timer_list wq_watchdog_timer;

static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;

/*
 * Show workers that might prevent the processing of pending work items.
 * The only candidates are CPU-bound workers in the running state.
 * Pending work items should be handled by another idle worker
 * in all other situations.
 */
static void show_cpu_pool_hog(struct worker_pool *pool)
{
        struct worker *worker;
        unsigned long irq_flags;
        int bkt;

        raw_spin_lock_irqsave(&pool->lock, irq_flags);

        hash_for_each(pool->busy_hash, bkt, worker, hentry) {
                if (task_is_running(worker->task)) {
                        /*
                         * Defer printing to avoid deadlocks in console
                         * drivers that queue work while holding locks
                         * also taken in their write paths.
                         */
                        printk_deferred_enter();

                        pr_info("pool %d:\n", pool->id);
                        sched_show_task(worker->task);

                        printk_deferred_exit();
                }
        }

        raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
}

static void show_cpu_pools_hogs(void)
{
        struct worker_pool *pool;
        int pi;

        pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");

        rcu_read_lock();

        for_each_pool(pool, pi) {
                if (pool->cpu_stall)
                        show_cpu_pool_hog(pool);

        }

        rcu_read_unlock();
}

static void wq_watchdog_reset_touched(void)
{
        int cpu;

        wq_watchdog_touched = jiffies;
        for_each_possible_cpu(cpu)
                per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
}

static void wq_watchdog_timer_fn(struct timer_list *unused)
{
        unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
        bool lockup_detected = false;
        bool cpu_pool_stall = false;
        unsigned long now = jiffies;
        struct worker_pool *pool;
        int pi;

        if (!thresh)
                return;

        rcu_read_lock();

        for_each_pool(pool, pi) {
                unsigned long pool_ts, touched, ts;

                pool->cpu_stall = false;
                if (list_empty(&pool->worklist))
                        continue;

                /*
                 * If a virtual machine is stopped by the host it can look to
                 * the watchdog like a stall.
                 */
                kvm_check_and_clear_guest_paused();

                /* get the latest of pool and touched timestamps */
                if (pool->cpu >= 0)
                        touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
                else
                        touched = READ_ONCE(wq_watchdog_touched);
                pool_ts = READ_ONCE(pool->watchdog_ts);

                if (time_after(pool_ts, touched))
                        ts = pool_ts;
                else
                        ts = touched;

                /* did we stall? */
                if (time_after(now, ts + thresh)) {
                        lockup_detected = true;
                        if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) {
                                pool->cpu_stall = true;
                                cpu_pool_stall = true;
                        }
                        pr_emerg("BUG: workqueue lockup - pool");
                        pr_cont_pool_info(pool);
                        pr_cont(" stuck for %us!\n",
                                jiffies_to_msecs(now - pool_ts) / 1000);
                }


        }

        rcu_read_unlock();

        if (lockup_detected)
                show_all_workqueues();

        if (cpu_pool_stall)
                show_cpu_pools_hogs();

        wq_watchdog_reset_touched();
        mod_timer(&wq_watchdog_timer, jiffies + thresh);
}

notrace void wq_watchdog_touch(int cpu)
{
        if (cpu >= 0)
                per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;

        wq_watchdog_touched = jiffies;
}

static void wq_watchdog_set_thresh(unsigned long thresh)
{
        wq_watchdog_thresh = 0;
        del_timer_sync(&wq_watchdog_timer);

        if (thresh) {
                wq_watchdog_thresh = thresh;
                wq_watchdog_reset_touched();
                mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
        }
}

static int wq_watchdog_param_set_thresh(const char *val,
                                        const struct kernel_param *kp)
{
        unsigned long thresh;
        int ret;

        ret = kstrtoul(val, 0, &thresh);
        if (ret)
                return ret;

        if (system_wq)
                wq_watchdog_set_thresh(thresh);
        else
                wq_watchdog_thresh = thresh;

        return 0;
}

static const struct kernel_param_ops wq_watchdog_thresh_ops = {
        .set        = wq_watchdog_param_set_thresh,
        .get        = param_get_ulong,
};

module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
                0644);

static void wq_watchdog_init(void)
{
        timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
        wq_watchdog_set_thresh(wq_watchdog_thresh);
}

#else        /* CONFIG_WQ_WATCHDOG */

static inline void wq_watchdog_init(void) { }

#endif        /* CONFIG_WQ_WATCHDOG */

static void bh_pool_kick_normal(struct irq_work *irq_work)
{
        raise_softirq_irqoff(TASKLET_SOFTIRQ);
}

static void bh_pool_kick_highpri(struct irq_work *irq_work)
{
        raise_softirq_irqoff(HI_SOFTIRQ);
}

static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
{
        if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
                pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
                        cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
                return;
        }

        cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
}

static void __init init_cpu_worker_pool(struct worker_pool *pool, int cpu, int nice)
{
        BUG_ON(init_worker_pool(pool));
        pool->cpu = cpu;
        cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
        cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu));
        pool->attrs->nice = nice;
        pool->attrs->affn_strict = true;
        pool->node = cpu_to_node(cpu);

        /* alloc pool ID */
        mutex_lock(&wq_pool_mutex);
        BUG_ON(worker_pool_assign_id(pool));
        mutex_unlock(&wq_pool_mutex);
}

/**
 * workqueue_init_early - early init for workqueue subsystem
 *
 * This is the first step of three-staged workqueue subsystem initialization and
 * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
 * up. It sets up all the data structures and system workqueues and allows early
 * boot code to create workqueues and queue/cancel work items. Actual work item
 * execution starts only after kthreads can be created and scheduled right
 * before early initcalls.
 */
void __init workqueue_init_early(void)
{
        struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM];
        int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
        void (*irq_work_fns[2])(struct irq_work *) = { bh_pool_kick_normal,
                                                       bh_pool_kick_highpri };
        int i, cpu;

        BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));

        BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
        BUG_ON(!alloc_cpumask_var(&wq_requested_unbound_cpumask, GFP_KERNEL));
        BUG_ON(!zalloc_cpumask_var(&wq_isolated_cpumask, GFP_KERNEL));

        cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
        restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
        restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
        if (!cpumask_empty(&wq_cmdline_cpumask))
                restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);

        cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask);

        pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);

        wq_update_pod_attrs_buf = alloc_workqueue_attrs();
        BUG_ON(!wq_update_pod_attrs_buf);

        /*
         * If nohz_full is enabled, set power efficient workqueue as unbound.
         * This allows workqueue items to be moved to HK CPUs.
         */
        if (housekeeping_enabled(HK_TYPE_TICK))
                wq_power_efficient = true;

        /* initialize WQ_AFFN_SYSTEM pods */
        pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL);
        pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL);
        pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL);
        BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod);

        BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE));

        pt->nr_pods = 1;
        cpumask_copy(pt->pod_cpus[0], cpu_possible_mask);
        pt->pod_node[0] = NUMA_NO_NODE;
        pt->cpu_pod[0] = 0;

        /* initialize BH and CPU pools */
        for_each_possible_cpu(cpu) {
                struct worker_pool *pool;

                i = 0;
                for_each_bh_worker_pool(pool, cpu) {
                        init_cpu_worker_pool(pool, cpu, std_nice[i]);
                        pool->flags |= POOL_BH;
                        init_irq_work(bh_pool_irq_work(pool), irq_work_fns[i]);
                        i++;
                }

                i = 0;
                for_each_cpu_worker_pool(pool, cpu)
                        init_cpu_worker_pool(pool, cpu, std_nice[i++]);
        }

        /* create default unbound and ordered wq attrs */
        for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
                struct workqueue_attrs *attrs;

                BUG_ON(!(attrs = alloc_workqueue_attrs()));
                attrs->nice = std_nice[i];
                unbound_std_wq_attrs[i] = attrs;

                /*
                 * An ordered wq should have only one pwq as ordering is
                 * guaranteed by max_active which is enforced by pwqs.
                 */
                BUG_ON(!(attrs = alloc_workqueue_attrs()));
                attrs->nice = std_nice[i];
                attrs->ordered = true;
                ordered_wq_attrs[i] = attrs;
        }

        system_wq = alloc_workqueue("events", 0, 0);
        system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
        system_long_wq = alloc_workqueue("events_long", 0, 0);
        system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
                                            WQ_MAX_ACTIVE);
        system_freezable_wq = alloc_workqueue("events_freezable",
                                              WQ_FREEZABLE, 0);
        system_power_efficient_wq = alloc_workqueue("events_power_efficient",
                                              WQ_POWER_EFFICIENT, 0);
        system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
                                              WQ_FREEZABLE | WQ_POWER_EFFICIENT,
                                              0);
        system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0);
        system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
                                               WQ_BH | WQ_HIGHPRI, 0);
        BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
               !system_unbound_wq || !system_freezable_wq ||
               !system_power_efficient_wq ||
               !system_freezable_power_efficient_wq ||
               !system_bh_wq || !system_bh_highpri_wq);
}

static void __init wq_cpu_intensive_thresh_init(void)
{
        unsigned long thresh;
        unsigned long bogo;

        pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
        BUG_ON(IS_ERR(pwq_release_worker));

        /* if the user set it to a specific value, keep it */
        if (wq_cpu_intensive_thresh_us != ULONG_MAX)
                return;

        /*
         * The default of 10ms is derived from the fact that most modern (as of
         * 2023) processors can do a lot in 10ms and that it's just below what
         * most consider human-perceivable. However, the kernel also runs on a
         * lot slower CPUs including microcontrollers where the threshold is way
         * too low.
         *
         * Let's scale up the threshold upto 1 second if BogoMips is below 4000.
         * This is by no means accurate but it doesn't have to be. The mechanism
         * is still useful even when the threshold is fully scaled up. Also, as
         * the reports would usually be applicable to everyone, some machines
         * operating on longer thresholds won't significantly diminish their
         * usefulness.
         */
        thresh = 10 * USEC_PER_MSEC;

        /* see init/calibrate.c for lpj -> BogoMIPS calculation */
        bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1);
        if (bogo < 4000)
                thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC);

        pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
                 loops_per_jiffy, bogo, thresh);

        wq_cpu_intensive_thresh_us = thresh;
}

/**
 * workqueue_init - bring workqueue subsystem fully online
 *
 * This is the second step of three-staged workqueue subsystem initialization
 * and invoked as soon as kthreads can be created and scheduled. Workqueues have
 * been created and work items queued on them, but there are no kworkers
 * executing the work items yet. Populate the worker pools with the initial
 * workers and enable future kworker creations.
 */
void __init workqueue_init(void)
{
        struct workqueue_struct *wq;
        struct worker_pool *pool;
        int cpu, bkt;

        wq_cpu_intensive_thresh_init();

        mutex_lock(&wq_pool_mutex);

        /*
         * Per-cpu pools created earlier could be missing node hint. Fix them
         * up. Also, create a rescuer for workqueues that requested it.
         */
        for_each_possible_cpu(cpu) {
                for_each_bh_worker_pool(pool, cpu)
                        pool->node = cpu_to_node(cpu);
                for_each_cpu_worker_pool(pool, cpu)
                        pool->node = cpu_to_node(cpu);
        }

        list_for_each_entry(wq, &workqueues, list) {
                WARN(init_rescuer(wq),
                     "workqueue: failed to create early rescuer for %s",
                     wq->name);
        }

        mutex_unlock(&wq_pool_mutex);

        /*
         * Create the initial workers. A BH pool has one pseudo worker that
         * represents the shared BH execution context and thus doesn't get
         * affected by hotplug events. Create the BH pseudo workers for all
         * possible CPUs here.
         */
        for_each_possible_cpu(cpu)
                for_each_bh_worker_pool(pool, cpu)
                        BUG_ON(!create_worker(pool));

        for_each_online_cpu(cpu) {
                for_each_cpu_worker_pool(pool, cpu) {
                        pool->flags &= ~POOL_DISASSOCIATED;
                        BUG_ON(!create_worker(pool));
                }
        }

        hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
                BUG_ON(!create_worker(pool));

        wq_online = true;
        wq_watchdog_init();
}

/*
 * Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to
 * @cpu_shares_pod(). Each subset of CPUs that share a pod is assigned a unique
 * and consecutive pod ID. The rest of @pt is initialized accordingly.
 */
static void __init init_pod_type(struct wq_pod_type *pt,
                                 bool (*cpus_share_pod)(int, int))
{
        int cur, pre, cpu, pod;

        pt->nr_pods = 0;

        /* init @pt->cpu_pod[] according to @cpus_share_pod() */
        pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL);
        BUG_ON(!pt->cpu_pod);

        for_each_possible_cpu(cur) {
                for_each_possible_cpu(pre) {
                        if (pre >= cur) {
                                pt->cpu_pod[cur] = pt->nr_pods++;
                                break;
                        }
                        if (cpus_share_pod(cur, pre)) {
                                pt->cpu_pod[cur] = pt->cpu_pod[pre];
                                break;
                        }
                }
        }

        /* init the rest to match @pt->cpu_pod[] */
        pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[0]), GFP_KERNEL);
        pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[0]), GFP_KERNEL);
        BUG_ON(!pt->pod_cpus || !pt->pod_node);

        for (pod = 0; pod < pt->nr_pods; pod++)
                BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL));

        for_each_possible_cpu(cpu) {
                cpumask_set_cpu(cpu, pt->pod_cpus[pt->cpu_pod[cpu]]);
                pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu);
        }
}

static bool __init cpus_dont_share(int cpu0, int cpu1)
{
        return false;
}

static bool __init cpus_share_smt(int cpu0, int cpu1)
{
#ifdef CONFIG_SCHED_SMT
        return cpumask_test_cpu(cpu0, cpu_smt_mask(cpu1));
#else
        return false;
#endif
}

static bool __init cpus_share_numa(int cpu0, int cpu1)
{
        return cpu_to_node(cpu0) == cpu_to_node(cpu1);
}

/**
 * workqueue_init_topology - initialize CPU pods for unbound workqueues
 *
 * This is the third step of three-staged workqueue subsystem initialization and
 * invoked after SMP and topology information are fully initialized. It
 * initializes the unbound CPU pods accordingly.
 */
void __init workqueue_init_topology(void)
{
        struct workqueue_struct *wq;
        int cpu;

        init_pod_type(&wq_pod_types[WQ_AFFN_CPU], cpus_dont_share);
        init_pod_type(&wq_pod_types[WQ_AFFN_SMT], cpus_share_smt);
        init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache);
        init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa);

        wq_topo_initialized = true;

        mutex_lock(&wq_pool_mutex);

        /*
         * Workqueues allocated earlier would have all CPUs sharing the default
         * worker pool. Explicitly call wq_update_pod() on all workqueue and CPU
         * combinations to apply per-pod sharing.
         */
        list_for_each_entry(wq, &workqueues, list) {
                for_each_online_cpu(cpu)
                        wq_update_pod(wq, cpu, cpu, true);
                if (wq->flags & WQ_UNBOUND) {
                        mutex_lock(&wq->mutex);
                        wq_update_node_max_active(wq, -1);
                        mutex_unlock(&wq->mutex);
                }
        }

        mutex_unlock(&wq_pool_mutex);
}

void __warn_flushing_systemwide_wq(void)
{
        pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n");
        dump_stack();
}
EXPORT_SYMBOL(__warn_flushing_systemwide_wq);

static int __init workqueue_unbound_cpus_setup(char *str)
{
        if (cpulist_parse(str, &wq_cmdline_cpumask) < 0) {
                cpumask_clear(&wq_cmdline_cpumask);
                pr_warn("workqueue.unbound_cpus: incorrect CPU range, using default\n");
        }

        return 1;
}
__setup("workqueue.unbound_cpus=", workqueue_unbound_cpus_setup);



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



    2 





























































































































































































    2 

    2 

    2 

























































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
// SPDX-License-Identifier: GPL-2.0+
/*
 * USB FTDI SIO driver
 *
 *        Copyright (C) 2009 - 2013
 *            Johan Hovold (jhovold@gmail.com)
 *        Copyright (C) 1999 - 2001
 *            Greg Kroah-Hartman (greg@kroah.com)
 *          Bill Ryder (bryder@sgi.com)
 *        Copyright (C) 2002
 *            Kuba Ober (kuba@mareimbrium.org)
 *
 * See Documentation/usb/usb-serial.rst for more information on using this
 * driver
 *
 * See http://ftdi-usb-sio.sourceforge.net for up to date testing info
 *        and extra documentation
 *
 * Change entries from 2004 and earlier can be found in versions of this
 * file in kernel versions prior to the 2.6.24 release.
 *
 */

/* Bill Ryder - bryder@sgi.com - wrote the FTDI_SIO implementation */
/* Thanx to FTDI for so kindly providing details of the protocol required */
/*   to talk to the device */
/* Thanx to gkh and the rest of the usb dev group for all code I have
   assimilated :-) */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/serial.h>
#include <linux/gpio/driver.h>
#include <linux/usb/serial.h>
#include "ftdi_sio.h"
#include "ftdi_sio_ids.h"

#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr, Johan Hovold <jhovold@gmail.com>"
#define DRIVER_DESC "USB FTDI Serial Converters Driver"

enum ftdi_chip_type {
        SIO,
        FT232A,
        FT232B,
        FT2232C,
        FT232R,
        FT232H,
        FT2232H,
        FT4232H,
        FT4232HA,
        FT232HP,
        FT233HP,
        FT2232HP,
        FT2233HP,
        FT4232HP,
        FT4233HP,
        FTX,
};

struct ftdi_private {
        enum ftdi_chip_type chip_type;
        int baud_base;                /* baud base clock for divisor setting */
        int custom_divisor;        /* custom_divisor kludge, this is for
                                   baud_base (different from what goes to the
                                   chip!) */
        u16 last_set_data_value; /* the last data state set - needed for doing
                                  * a break
                                  */
        int flags;                /* some ASYNC_xxxx flags are supported */
        unsigned long last_dtr_rts;        /* saved modem control outputs */
        char prev_status;        /* Used for TIOCMIWAIT */
        char transmit_empty;        /* If transmitter is empty or not */
        u16 channel;                /* channel index, or 0 for legacy types */

        speed_t force_baud;        /* if non-zero, force the baud rate to
                                   this value */
        int force_rtscts;        /* if non-zero, force RTS-CTS to always
                                   be enabled */

        unsigned int latency;                /* latency setting in use */
        unsigned short max_packet_size;
        struct mutex cfg_lock; /* Avoid mess by parallel calls of config ioctl() and change_speed() */
#ifdef CONFIG_GPIOLIB
        struct gpio_chip gc;
        struct mutex gpio_lock;        /* protects GPIO state */
        bool gpio_registered;        /* is the gpiochip in kernel registered */
        bool gpio_used;                /* true if the user requested a gpio */
        u8 gpio_altfunc;        /* which pins are in gpio mode */
        u8 gpio_output;                /* pin directions cache */
        u8 gpio_value;                /* pin value for outputs */
#endif
};

struct ftdi_quirk {
        int (*probe)(struct usb_serial *);
        /* Special settings for probed ports. */
        void (*port_probe)(struct ftdi_private *);
};

static int   ftdi_jtag_probe(struct usb_serial *serial);
static int   ftdi_NDI_device_setup(struct usb_serial *serial);
static int   ftdi_stmclite_probe(struct usb_serial *serial);
static int   ftdi_8u2232c_probe(struct usb_serial *serial);
static void  ftdi_USB_UIRT_setup(struct ftdi_private *priv);
static void  ftdi_HE_TIRA1_setup(struct ftdi_private *priv);

static const struct ftdi_quirk ftdi_jtag_quirk = {
        .probe        = ftdi_jtag_probe,
};

static const struct ftdi_quirk ftdi_NDI_device_quirk = {
        .probe        = ftdi_NDI_device_setup,
};

static const struct ftdi_quirk ftdi_USB_UIRT_quirk = {
        .port_probe = ftdi_USB_UIRT_setup,
};

static const struct ftdi_quirk ftdi_HE_TIRA1_quirk = {
        .port_probe = ftdi_HE_TIRA1_setup,
};

static const struct ftdi_quirk ftdi_stmclite_quirk = {
        .probe        = ftdi_stmclite_probe,
};

static const struct ftdi_quirk ftdi_8u2232c_quirk = {
        .probe        = ftdi_8u2232c_probe,
};

/*
 * The 8U232AM has the same API as the sio except for:
 * - it can support MUCH higher baudrates; up to:
 *   o 921600 for RS232 and 2000000 for RS422/485 at 48MHz
 *   o 230400 at 12MHz
 *   so .. 8U232AM's baudrate setting codes are different
 * - it has a two byte status code.
 * - it returns characters every 16ms (the FTDI does it every 40ms)
 *
 * the bcdDevice value is used to differentiate FT232BM and FT245BM from
 * the earlier FT8U232AM and FT8U232BM.  For now, include all known VID/PID
 * combinations in both tables.
 * FIXME: perhaps bcdDevice can also identify 12MHz FT8U232AM devices,
 * but I don't know if those ever went into mass production. [Ian Abbott]
 */



/*
 * Device ID not listed? Test it using
 * /sys/bus/usb-serial/drivers/ftdi_sio/new_id and send a patch or report.
 */
static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_BM_ATOM_NANO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_EV3CON_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_3_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_4_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_5_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_6_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_7_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_USINT_CAT_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_USINT_WKEY_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_USINT_RS232_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ACTZWAVE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IRTRANS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IPLUS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IPLUS2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_DMX4ALL) },
        { USB_DEVICE(FTDI_VID, FTDI_SIO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_232RL_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) ,
                .driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_4232H_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_232H_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FTX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT2233HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT4233HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT2232HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT4232HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT233HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT232HP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FT4232HA_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_RELAIS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_SNIFFER_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_THROTTLE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GATEWAY_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_BOOST_PID) },
        { USB_DEVICE(NEWPORT_VID, NEWPORT_AGILIS_PID) },
        { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_CC_PID) },
        { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_AGP_PID) },
        { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) },
        { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SPROG_II) },
        { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_LP101_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_P200X_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_LENZ_LIUSB_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_632_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_634_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_547_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_633_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_631_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_635_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_640_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_XF_642_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_DSS20_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_URBAN_0_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_URBAN_1_PID) },
        { USB_DEVICE(FTDI_NF_RIC_VID, FTDI_NF_RIC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_VNHCPCUSB_D_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_0_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_3_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_4_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_5_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) },
        { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0103_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0104_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0105_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0106_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0107_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0108_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0109_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0110_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0111_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0112_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0113_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0114_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0115_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0116_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0117_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0118_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0119_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0120_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0121_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0122_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0123_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0124_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0125_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0126_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0127_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0128_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0129_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0130_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0131_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0132_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0133_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0134_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0135_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0136_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0137_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0138_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0139_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0140_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0141_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0142_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0143_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0144_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0145_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0146_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0147_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0148_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0149_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0150_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0151_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0152_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0153_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0154_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0155_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0156_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0157_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0158_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0159_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0160_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0161_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0162_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0163_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0164_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0165_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0166_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0167_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0168_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0169_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0170_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0171_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0172_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0173_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0174_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0175_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0176_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0177_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0178_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0179_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0180_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0181_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0182_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0183_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0184_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0185_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0186_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0187_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0188_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0189_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0190_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0191_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0192_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0193_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0194_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0195_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0196_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0197_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0198_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0199_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AD_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BD_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CD_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DD_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01ED_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F0_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F1_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F2_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F3_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F4_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F5_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F6_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F7_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F8_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F9_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FA_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FB_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FC_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FD_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FE_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FF_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_4701_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9300_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9301_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9302_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9303_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9304_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9305_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9306_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9307_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9308_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9309_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930F_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9310_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9311_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9312_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9313_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9314_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9315_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9316_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9317_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9318_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9319_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931A_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931B_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931C_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931D_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931E_PID) },
        { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931F_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PERLE_ULTRAPORT_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PIEGROUP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TNC_X_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_USBX_707_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2101_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2102_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2103_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2104_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2106_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_5_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_6_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_7_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_8_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_5_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_6_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_7_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_8_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_4_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_5_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_6_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_7_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_8_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_1_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_2_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_3_PID) },
        { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_4_PID) },
        { USB_DEVICE(IDTECH_VID, IDTECH_IDT1221U_PID) },
        { USB_DEVICE(OCT_VID, OCT_US101_PID) },
        { USB_DEVICE(OCT_VID, OCT_DK201_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_HE_TIRA1_PID),
                .driver_info = (kernel_ulong_t)&ftdi_HE_TIRA1_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_USB_UIRT_PID),
                .driver_info = (kernel_ulong_t)&ftdi_USB_UIRT_quirk },
        { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_1) },
        { USB_DEVICE(FTDI_VID, PROTEGO_R2X0) },
        { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_3) },
        { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_4) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E808_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E809_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80A_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80B_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80C_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80D_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80E_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80F_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E888_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E889_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88A_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88B_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88C_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88D_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88E_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88F_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UO100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UM100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UR100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_ALC8500_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PYRAMID_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1000PC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_US485_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_PICPRO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_PCMCIA_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_PK1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_RS232MON_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_APP70_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TAVIR_STK500_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TIAO_UMPA_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONMX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2WI_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX3_PID) },
        /*
         * ELV devices:
         */
        { USB_DEVICE(FTDI_ELV_VID, FTDI_ELV_WS300_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_USR_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_MSM1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_KL100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS550_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_EC3000_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS888_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_TWS550_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FEM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_CLI7000_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_PPS7330_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_TFM100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UDF77_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UIO88_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UAD8_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UDA7_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_USI2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_T1100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_PCD200_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_ULA200_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_CSI8_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1000DL_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_PCK100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_RFP500_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FS20SIG_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UTP8_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS300PC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS444PC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1300PC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1010PC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS500_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_HS485_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_UMS100_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
        { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
        { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
        { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
        { USB_DEVICE(FTDI_VID, LINX_FUTURE_1_PID) },
        { USB_DEVICE(FTDI_VID, LINX_FUTURE_2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSICDU20_0_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSICDU40_1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSMACHX_2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSLOAD_N_GO_3_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSICDU64_4_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CCSPRIME8_5_PID) },
        { USB_DEVICE(FTDI_VID, INSIDE_ACCESSO) },
        { USB_DEVICE(INTREPID_VID, INTREPID_VALUECAN_PID) },
        { USB_DEVICE(INTREPID_VID, INTREPID_NEOVI_PID) },
        { USB_DEVICE(FALCOM_VID, FALCOM_TWIST_PID) },
        { USB_DEVICE(FALCOM_VID, FALCOM_SAMBA_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SUUNTO_SPORTS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OCEANIC_PID) },
        { USB_DEVICE(TTI_VID, TTI_QL355P_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_RM_CANVIEW_PID) },
        { USB_DEVICE(ACTON_VID, ACTON_SPECTRAPRO_PID) },
        { USB_DEVICE(CONTEC_VID, CONTEC_COM1USBH_PID) },
        { USB_DEVICE(MITSUBISHI_VID, MITSUBISHI_FXUSB_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USOTL4_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USTL4_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USO9ML2_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USOPTL4_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USPTL4_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_2_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR2_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_485USB9F_2W_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_485USB9F_4W_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_232USB9M_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_485USBTB_2W_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_485USBTB_4W_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_TTL5USB9M_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_TTL3USB9M_PID) },
        { USB_DEVICE(BANDB_VID, BANDB_ZZ_PROG1_USB_PID) },
        { USB_DEVICE(FTDI_VID, EVER_ECO_PRO_CDS) },
        { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_3_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_0_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_1_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_2_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_3_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_4_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) },
        { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_MTIUSBCONVERTER_PID) },
        { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
        { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_KW_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_YS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y6_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y8_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_IC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_DB9_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_RS232_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y9_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_VCP_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_D2XX_PID) },
        { USB_DEVICE(EVOLUTION_VID, EVOLUTION_ER1_PID) },
        { USB_DEVICE(EVOLUTION_VID, EVO_HYBRID_PID) },
        { USB_DEVICE(EVOLUTION_VID, EVO_RCM4_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ARTEMIS_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16C_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HR_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HRC_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16IC_PID) },
        { USB_DEVICE(KOBIL_VID, KOBIL_CONV_B1_PID) },
        { USB_DEVICE(KOBIL_VID, KOBIL_CONV_KAAN_PID) },
        { USB_DEVICE(POSIFLEX_VID, POSIFLEX_PP7000_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TTUSB_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ECLO_COM_1WIRE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_777_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_8900F_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PCDJ_DAC2_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_RRCIRKITS_LOCOBUFFER_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ASK_RDR400_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_NZR_SEM_USB_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_1_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_OPC_U_UC_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C1_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C2_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2D_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VT_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VR_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVT_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVR_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVT_PID) },
        { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVR_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ACG_HFDUAL_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_YEI_SERVOCENTER31_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_THORLABS_PID) },
        { USB_DEVICE(TESTO_VID, TESTO_1_PID) },
        { USB_DEVICE(TESTO_VID, TESTO_3_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_GAMMA_SCOUT_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13M_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13S_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13U_PID) },
        { USB_DEVICE(ELEKTOR_VID, ELEKTOR_FT323R_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_NDI_HUC_PID),
                .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NDI_SPECTRA_SCU_PID),
                .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_2_PID),
                .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_3_PID),
                .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID),
                .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
        { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) },
        { USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) },
        { USB_DEVICE(FTDI_VID, RTSYSTEMS_USB_VX8_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29A_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29F_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S01_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29C_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_81B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_82B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5D_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K4Y_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5G_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S05_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_60_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_61_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63B_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_64_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_65_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92D_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_W5R_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_A5R_PID) },
        { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_PW1_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) },
        { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) },
        { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) },
        { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) },
        { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) },
        { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) },
        { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, LMI_LM3S_DEVEL_BOARD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, LMI_LM3S_EVAL_BOARD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, LMI_LM3S_ICDI_BOARD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
        { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) },
        { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },

        /* Papouch devices based on FTDI chip */
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_2_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_2_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_2_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485S_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485C_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_LEC_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB232_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_TMU_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_IRAMP_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK5_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO8x8_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO4x4_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x2_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO10x1_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO30x3_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO60x3_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x16_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO3x32_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK6_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_UPSUSB_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_MU_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SIMUKEY_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AD4USB_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMUX_PID) },
        { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMSR_PID) },

        { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DGQG_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) },
        { USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) },
        { USB_DEVICE(FTDI_VID, DIEBOLD_BCS_SE923_PID) },
        { USB_DEVICE(ATMEL_VID, STK541_PID) },
        { USB_DEVICE(DE_VID, STB_PID) },
        { USB_DEVICE(DE_VID, WHT_PID) },
        { USB_DEVICE(ADI_VID, ADI_GNICE_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID,
                                        USB_CLASS_VENDOR_SPEC,
                                        USB_SUBCLASS_VENDOR_SPEC, 0x00) },
        { USB_DEVICE_INTERFACE_NUMBER(ACTEL_VID, MICROSEMI_ARROW_SF2PLUS_BOARD_PID, 2) },
        { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) },
        { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) },
        { USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) },
        { USB_DEVICE(FTDI_VID, PI_C865_PID) },
        { USB_DEVICE(FTDI_VID, PI_C857_PID) },
        { USB_DEVICE(PI_VID, PI_C866_PID) },
        { USB_DEVICE(PI_VID, PI_C663_PID) },
        { USB_DEVICE(PI_VID, PI_C725_PID) },
        { USB_DEVICE(PI_VID, PI_E517_PID) },
        { USB_DEVICE(PI_VID, PI_C863_PID) },
        { USB_DEVICE(PI_VID, PI_E861_PID) },
        { USB_DEVICE(PI_VID, PI_C867_PID) },
        { USB_DEVICE(PI_VID, PI_E609_PID) },
        { USB_DEVICE(PI_VID, PI_E709_PID) },
        { USB_DEVICE(PI_VID, PI_100F_PID) },
        { USB_DEVICE(PI_VID, PI_1011_PID) },
        { USB_DEVICE(PI_VID, PI_1012_PID) },
        { USB_DEVICE(PI_VID, PI_1013_PID) },
        { USB_DEVICE(PI_VID, PI_1014_PID) },
        { USB_DEVICE(PI_VID, PI_1015_PID) },
        { USB_DEVICE(PI_VID, PI_1016_PID) },
        { USB_DEVICE(KONDO_VID, KONDO_USB_SERIAL_PID) },
        { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) },
        { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, TI_XDS100V2_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) },
        { USB_DEVICE(FTDI_VID, HAMEG_HO720_PID) },
        { USB_DEVICE(FTDI_VID, HAMEG_HO730_PID) },
        { USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) },
        { USB_DEVICE(FTDI_VID, MJSG_GENERIC_PID) },
        { USB_DEVICE(FTDI_VID, MJSG_SR_RADIO_PID) },
        { USB_DEVICE(FTDI_VID, MJSG_HD_RADIO_PID) },
        { USB_DEVICE(FTDI_VID, MJSG_XM_RADIO_PID) },
        { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_ST_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SLITE_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH2_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH4_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) },
        { USB_DEVICE(FTDI_VID, ACCESIO_COM4SM_PID) },
        { USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LOGBOOKML_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LS_LOGBOOK_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_CINTERION_MC55I_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_FHE_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) },
        { USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(ST_VID, ST_STMCLT_2232_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(ST_VID, ST_STMCLT_4232_PID),
                .driver_info = (kernel_ulong_t)&ftdi_stmclite_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_RF_R106) },
        { USB_DEVICE(FTDI_VID, FTDI_DISTORTEC_JTAG_LOCK_PICK_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) },
        /* Crucible Devices */
        { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) },
        /* Cressi Devices */
        { USB_DEVICE(FTDI_VID, FTDI_CRESSI_PID) },
        /* Brainboxes Devices */
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_001_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_012_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_4_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_5_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_357_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_3_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_1_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_2_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_3_PID) },
        { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_4_PID) },
        /* ekey Devices */
        { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) },
        /* Infineon Devices */
        { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC1798_PID, 1) },
        { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC2X7_PID, 1) },
        /* GE Healthcare devices */
        { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) },
        /* Active Research (Actisense) devices */
        { USB_DEVICE(FTDI_VID, ACTISENSE_NDC_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_UID_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_USA_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_NGX_PID) },
        { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_NMEA2000_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ETHERNET_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_WIFI_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
        { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
        /* Belimo Automation devices */
        { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) },
        { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) },
        /* ICP DAS I-756xU devices */
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
        { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
        { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
        { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
        /* EZPrototypes devices */
        { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) },
        { USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) },
        /* Sienna devices */
        { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) },
        { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) },
        /* IDS GmbH devices */
        { USB_DEVICE(IDS_VID, IDS_SI31A_PID) },
        { USB_DEVICE(IDS_VID, IDS_CM31A_PID) },
        /* Omron devices */
        { USB_DEVICE(OMRON_VID, OMRON_CS1W_CIF31_PID) },
        /* U-Blox devices */
        { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) },
        { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) },
        /* FreeCalypso USB adapters */
        { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_BUF_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        /* GMC devices */
        { USB_DEVICE(GMC_VID, GMC_Z216C_PID) },
        { }                                        /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, id_table_combined);

static const char *ftdi_chip_name[] = {
        [SIO]                = "SIO",        /* the serial part of FT8U100AX */
        [FT232A]        = "FT232A",
        [FT232B]        = "FT232B",
        [FT2232C]        = "FT2232C/D",
        [FT232R]        = "FT232R",
        [FT232H]        = "FT232H",
        [FT2232H]        = "FT2232H",
        [FT4232H]        = "FT4232H",
        [FT4232HA]        = "FT4232HA",
        [FT232HP]        = "FT232HP",
        [FT233HP]        = "FT233HP",
        [FT2232HP]        = "FT2232HP",
        [FT2233HP]        = "FT2233HP",
        [FT4232HP]        = "FT4232HP",
        [FT4233HP]        = "FT4233HP",
        [FTX]                = "FT-X",
};


/* Used for TIOCMIWAIT */
#define FTDI_STATUS_B0_MASK        (FTDI_RS0_CTS | FTDI_RS0_DSR | FTDI_RS0_RI | FTDI_RS0_RLSD)
#define FTDI_STATUS_B1_MASK        (FTDI_RS_BI)
/* End TIOCMIWAIT */

static void ftdi_set_termios(struct tty_struct *tty,
                             struct usb_serial_port *port,
                             const struct ktermios *old_termios);
static int ftdi_get_modem_status(struct usb_serial_port *port,
                                                unsigned char status[2]);

#define WDR_TIMEOUT 5000 /* default urb timeout */
#define WDR_SHORT_TIMEOUT 1000        /* shorter urb timeout */

/*
 * ***************************************************************************
 * Utility functions
 * ***************************************************************************
 */

static unsigned short int ftdi_232am_baud_base_to_divisor(int baud, int base)
{
        unsigned short int divisor;
        /* divisor shifted 3 bits to the left */
        int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud);
        if ((divisor3 & 0x7) == 7)
                divisor3++; /* round x.7/8 up to x+1 */
        divisor = divisor3 >> 3;
        divisor3 &= 0x7;
        if (divisor3 == 1)
                divisor |= 0xc000;        /* +0.125 */
        else if (divisor3 >= 4)
                divisor |= 0x4000;        /* +0.5 */
        else if (divisor3 != 0)
                divisor |= 0x8000;        /* +0.25 */
        else if (divisor == 1)
                divisor = 0;                /* special case for maximum baud rate */
        return divisor;
}

static unsigned short int ftdi_232am_baud_to_divisor(int baud)
{
         return ftdi_232am_baud_base_to_divisor(baud, 48000000);
}

static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base)
{
        static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 };
        u32 divisor;
        /* divisor shifted 3 bits to the left */
        int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud);
        divisor = divisor3 >> 3;
        divisor |= (u32)divfrac[divisor3 & 0x7] << 14;
        /* Deal with special cases for highest baud rates. */
        if (divisor == 1)                /* 1.0 */
                divisor = 0;
        else if (divisor == 0x4001)        /* 1.5 */
                divisor = 1;
        return divisor;
}

static u32 ftdi_232bm_baud_to_divisor(int baud)
{
         return ftdi_232bm_baud_base_to_divisor(baud, 48000000);
}

static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base)
{
        static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 };
        u32 divisor;
        int divisor3;

        /* hi-speed baud rate is 10-bit sampling instead of 16-bit */
        divisor3 = DIV_ROUND_CLOSEST(8 * base, 10 * baud);

        divisor = divisor3 >> 3;
        divisor |= (u32)divfrac[divisor3 & 0x7] << 14;
        /* Deal with special cases for highest baud rates. */
        if (divisor == 1)                /* 1.0 */
                divisor = 0;
        else if (divisor == 0x4001)        /* 1.5 */
                divisor = 1;
        /*
         * Set this bit to turn off a divide by 2.5 on baud rate generator
         * This enables baud rates up to 12Mbaud but cannot reach below 1200
         * baud with this bit set
         */
        divisor |= 0x00020000;
        return divisor;
}

static u32 ftdi_2232h_baud_to_divisor(int baud)
{
         return ftdi_2232h_baud_base_to_divisor(baud, 120000000);
}

#define set_mctrl(port, set)                update_mctrl((port), (set), 0)
#define clear_mctrl(port, clear)        update_mctrl((port), 0, (clear))

static int update_mctrl(struct usb_serial_port *port, unsigned int set,
                                                        unsigned int clear)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct device *dev = &port->dev;
        unsigned value;
        int rv;

        if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) {
                dev_dbg(dev, "%s - DTR|RTS not being set|cleared\n", __func__);
                return 0;        /* no change */
        }

        clear &= ~set;        /* 'set' takes precedence over 'clear' */
        value = 0;
        if (clear & TIOCM_DTR)
                value |= FTDI_SIO_SET_DTR_LOW;
        if (clear & TIOCM_RTS)
                value |= FTDI_SIO_SET_RTS_LOW;
        if (set & TIOCM_DTR)
                value |= FTDI_SIO_SET_DTR_HIGH;
        if (set & TIOCM_RTS)
                value |= FTDI_SIO_SET_RTS_HIGH;
        rv = usb_control_msg(port->serial->dev,
                               usb_sndctrlpipe(port->serial->dev, 0),
                               FTDI_SIO_SET_MODEM_CTRL_REQUEST,
                               FTDI_SIO_SET_MODEM_CTRL_REQUEST_TYPE,
                               value, priv->channel,
                               NULL, 0, WDR_TIMEOUT);
        if (rv < 0) {
                dev_dbg(dev, "%s Error from MODEM_CTRL urb: DTR %s, RTS %s\n",
                        __func__,
                        (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged",
                        (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged");
                rv = usb_translate_errors(rv);
        } else {
                dev_dbg(dev, "%s - DTR %s, RTS %s\n", __func__,
                        (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged",
                        (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged");
                /* FIXME: locking on last_dtr_rts */
                priv->last_dtr_rts = (priv->last_dtr_rts & ~clear) | set;
        }
        return rv;
}


static u32 get_ftdi_divisor(struct tty_struct *tty,
                                                struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct device *dev = &port->dev;
        u32 div_value = 0;
        int div_okay = 1;
        int baud;

        baud = tty_get_baud_rate(tty);
        dev_dbg(dev, "%s - tty_get_baud_rate reports speed %d\n", __func__, baud);

        /*
         * Observe deprecated async-compatible custom_divisor hack, update
         * baudrate if needed.
         */
        if (baud == 38400 &&
            ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) &&
             (priv->custom_divisor)) {
                baud = priv->baud_base / priv->custom_divisor;
                dev_dbg(dev, "%s - custom divisor %d sets baud rate to %d\n",
                        __func__, priv->custom_divisor, baud);
        }

        if (!baud)
                baud = 9600;
        switch (priv->chip_type) {
        case SIO:
                switch (baud) {
                case 300: div_value = ftdi_sio_b300; break;
                case 600: div_value = ftdi_sio_b600; break;
                case 1200: div_value = ftdi_sio_b1200; break;
                case 2400: div_value = ftdi_sio_b2400; break;
                case 4800: div_value = ftdi_sio_b4800; break;
                case 9600: div_value = ftdi_sio_b9600; break;
                case 19200: div_value = ftdi_sio_b19200; break;
                case 38400: div_value = ftdi_sio_b38400; break;
                case 57600: div_value = ftdi_sio_b57600;  break;
                case 115200: div_value = ftdi_sio_b115200; break;
                default:
                        dev_dbg(dev, "%s - Baudrate (%d) requested is not supported\n",
                                __func__,  baud);
                        div_value = ftdi_sio_b9600;
                        baud = 9600;
                        div_okay = 0;
                }
                break;
        case FT232A:
                if (baud <= 3000000) {
                        div_value = ftdi_232am_baud_to_divisor(baud);
                } else {
                        dev_dbg(dev, "%s - Baud rate too high!\n", __func__);
                        baud = 9600;
                        div_value = ftdi_232am_baud_to_divisor(9600);
                        div_okay = 0;
                }
                break;
        case FT232B:
        case FT2232C:
        case FT232R:
        case FTX:
                if (baud <= 3000000) {
                        u16 product_id = le16_to_cpu(
                                port->serial->dev->descriptor.idProduct);
                        if (((product_id == FTDI_NDI_HUC_PID)                ||
                             (product_id == FTDI_NDI_SPECTRA_SCU_PID)        ||
                             (product_id == FTDI_NDI_FUTURE_2_PID)        ||
                             (product_id == FTDI_NDI_FUTURE_3_PID)        ||
                             (product_id == FTDI_NDI_AURORA_SCU_PID))        &&
                            (baud == 19200)) {
                                baud = 1200000;
                        }
                        div_value = ftdi_232bm_baud_to_divisor(baud);
                } else {
                        dev_dbg(dev, "%s - Baud rate too high!\n", __func__);
                        div_value = ftdi_232bm_baud_to_divisor(9600);
                        div_okay = 0;
                        baud = 9600;
                }
                break;
        default:
                if ((baud <= 12000000) && (baud >= 1200)) {
                        div_value = ftdi_2232h_baud_to_divisor(baud);
                } else if (baud < 1200) {
                        div_value = ftdi_232bm_baud_to_divisor(baud);
                } else {
                        dev_dbg(dev, "%s - Baud rate too high!\n", __func__);
                        div_value = ftdi_232bm_baud_to_divisor(9600);
                        div_okay = 0;
                        baud = 9600;
                }
                break;
        }

        if (div_okay) {
                dev_dbg(dev, "%s - Baud rate set to %d (divisor 0x%lX) on chip %s\n",
                        __func__, baud, (unsigned long)div_value,
                        ftdi_chip_name[priv->chip_type]);
        }

        tty_encode_baud_rate(tty, baud, baud);
        return div_value;
}

static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        u16 value;
        u16 index;
        u32 index_value;
        int rv;

        index_value = get_ftdi_divisor(tty, port);
        value = (u16)index_value;
        index = (u16)(index_value >> 16);
        if (priv->channel)
                index = (u16)((index << 8) | priv->channel);

        rv = usb_control_msg(port->serial->dev,
                            usb_sndctrlpipe(port->serial->dev, 0),
                            FTDI_SIO_SET_BAUDRATE_REQUEST,
                            FTDI_SIO_SET_BAUDRATE_REQUEST_TYPE,
                            value, index,
                            NULL, 0, WDR_SHORT_TIMEOUT);
        return rv;
}

static int write_latency_timer(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_device *udev = port->serial->dev;
        int rv;
        int l = priv->latency;

        if (priv->chip_type == SIO || priv->chip_type == FT232A)
                return -EINVAL;

        if (priv->flags & ASYNC_LOW_LATENCY)
                l = 1;

        dev_dbg(&port->dev, "%s: setting latency timer = %i\n", __func__, l);

        rv = usb_control_msg(udev,
                             usb_sndctrlpipe(udev, 0),
                             FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
                             FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
                             l, priv->channel,
                             NULL, 0, WDR_TIMEOUT);
        if (rv < 0)
                dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
        return rv;
}

static int _read_latency_timer(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_device *udev = port->serial->dev;
        u8 buf;
        int rv;

        rv = usb_control_msg_recv(udev, 0, FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
                                  FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, 0,
                                  priv->channel, &buf, 1, WDR_TIMEOUT,
                                  GFP_KERNEL);
        if (rv == 0)
                rv = buf;

        return rv;
}

static int read_latency_timer(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        int rv;

        if (priv->chip_type == SIO || priv->chip_type == FT232A)
                return -EINVAL;

        rv = _read_latency_timer(port);
        if (rv < 0) {
                dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
                return rv;
        }

        priv->latency = rv;

        return 0;
}

static void get_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        ss->flags = priv->flags;
        ss->baud_base = priv->baud_base;
        ss->custom_divisor = priv->custom_divisor;
}

static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        int old_flags, old_divisor;

        mutex_lock(&priv->cfg_lock);

        if (!capable(CAP_SYS_ADMIN)) {
                if ((ss->flags ^ priv->flags) & ~ASYNC_USR_MASK) {
                        mutex_unlock(&priv->cfg_lock);
                        return -EPERM;
                }
        }

        old_flags = priv->flags;
        old_divisor = priv->custom_divisor;

        priv->flags = ss->flags & ASYNC_FLAGS;
        priv->custom_divisor = ss->custom_divisor;

        write_latency_timer(port);

        if ((priv->flags ^ old_flags) & ASYNC_SPD_MASK ||
                        ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST &&
                         priv->custom_divisor != old_divisor)) {

                /* warn about deprecation unless clearing */
                if (priv->flags & ASYNC_SPD_MASK)
                        dev_warn_ratelimited(&port->dev, "use of SPD flags is deprecated\n");

                change_speed(tty, port);
        }
        mutex_unlock(&priv->cfg_lock);
        return 0;
}

static int get_lsr_info(struct usb_serial_port *port,
                        unsigned int __user *retinfo)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        unsigned int result = 0;

        if (priv->transmit_empty)
                result = TIOCSER_TEMT;

        if (copy_to_user(retinfo, &result, sizeof(unsigned int)))
                return -EFAULT;
        return 0;
}

static int ftdi_determine_type(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;
        struct usb_device *udev = serial->dev;
        unsigned int version, ifnum;

        version = le16_to_cpu(udev->descriptor.bcdDevice);
        ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber;

        /* Assume Hi-Speed type */
        priv->baud_base = 120000000 / 2;
        priv->channel = CHANNEL_A + ifnum;

        switch (version) {
        case 0x200:
                priv->chip_type = FT232A;
                priv->baud_base = 48000000 / 2;
                priv->channel = 0;
                /*
                 * FT232B devices have a bug where bcdDevice gets set to 0x200
                 * when iSerialNumber is 0. Assume it is an FT232B in case the
                 * latency timer is readable.
                 */
                if (udev->descriptor.iSerialNumber == 0 &&
                                _read_latency_timer(port) >= 0) {
                        priv->chip_type = FT232B;
                }
                break;
        case 0x400:
                priv->chip_type = FT232B;
                priv->baud_base = 48000000 / 2;
                priv->channel = 0;
                break;
        case 0x500:
                priv->chip_type = FT2232C;
                priv->baud_base = 48000000 / 2;
                break;
        case 0x600:
                priv->chip_type = FT232R;
                priv->baud_base = 48000000 / 2;
                priv->channel = 0;
                break;
        case 0x700:
                priv->chip_type = FT2232H;
                break;
        case 0x800:
                priv->chip_type = FT4232H;
                break;
        case 0x900:
                priv->chip_type = FT232H;
                break;
        case 0x1000:
                priv->chip_type = FTX;
                priv->baud_base = 48000000 / 2;
                break;
        case 0x2800:
                priv->chip_type = FT2233HP;
                break;
        case 0x2900:
                priv->chip_type = FT4233HP;
                break;
        case 0x3000:
                priv->chip_type = FT2232HP;
                break;
        case 0x3100:
                priv->chip_type = FT4232HP;
                break;
        case 0x3200:
                priv->chip_type = FT233HP;
                break;
        case 0x3300:
                priv->chip_type = FT232HP;
                break;
        case 0x3600:
                priv->chip_type = FT4232HA;
                break;
        default:
                if (version < 0x200) {
                        priv->chip_type = SIO;
                        priv->baud_base = 12000000 / 16;
                        priv->channel = 0;
                } else {
                        dev_err(&port->dev, "unknown device type: 0x%02x\n", version);
                        return -ENODEV;
                }
        }

        dev_info(&udev->dev, "Detected %s\n", ftdi_chip_name[priv->chip_type]);

        return 0;
}


/*
 * Determine the maximum packet size for the device. This depends on the chip
 * type and the USB host capabilities. The value should be obtained from the
 * device descriptor as the chip will use the appropriate values for the host.
 */
static void ftdi_set_max_packet_size(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_interface *interface = port->serial->interface;
        struct usb_endpoint_descriptor *ep_desc;
        unsigned num_endpoints;
        unsigned i;

        num_endpoints = interface->cur_altsetting->desc.bNumEndpoints;
        if (!num_endpoints)
                return;

        /*
         * NOTE: Some customers have programmed FT232R/FT245R devices
         * with an endpoint size of 0 - not good. In this case, we
         * want to override the endpoint descriptor setting and use a
         * value of 64 for wMaxPacketSize.
         */
        for (i = 0; i < num_endpoints; i++) {
                ep_desc = &interface->cur_altsetting->endpoint[i].desc;
                if (!ep_desc->wMaxPacketSize) {
                        ep_desc->wMaxPacketSize = cpu_to_le16(0x40);
                        dev_warn(&port->dev, "Overriding wMaxPacketSize on endpoint %d\n",
                                        usb_endpoint_num(ep_desc));
                }
        }

        /* Set max packet size based on last descriptor. */
        priv->max_packet_size = usb_endpoint_maxp(ep_desc);
}


/*
 * ***************************************************************************
 * Sysfs Attribute
 * ***************************************************************************
 */

static ssize_t latency_timer_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        if (priv->flags & ASYNC_LOW_LATENCY)
                return sprintf(buf, "1\n");
        else
                return sprintf(buf, "%u\n", priv->latency);
}

/* Write a new value of the latency timer, in units of milliseconds. */
static ssize_t latency_timer_store(struct device *dev,
                                   struct device_attribute *attr,
                                   const char *valbuf, size_t count)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        u8 v;
        int rv;

        if (kstrtou8(valbuf, 10, &v))
                return -EINVAL;

        priv->latency = v;
        rv = write_latency_timer(port);
        if (rv < 0)
                return -EIO;
        return count;
}
static DEVICE_ATTR_RW(latency_timer);

/* Write an event character directly to the FTDI register.  The ASCII
   value is in the low 8 bits, with the enable bit in the 9th bit. */
static ssize_t event_char_store(struct device *dev,
        struct device_attribute *attr, const char *valbuf, size_t count)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_device *udev = port->serial->dev;
        unsigned int v;
        int rv;

        if (kstrtouint(valbuf, 0, &v) || v >= 0x200)
                return -EINVAL;

        dev_dbg(&port->dev, "%s: setting event char = 0x%03x\n", __func__, v);

        rv = usb_control_msg(udev,
                             usb_sndctrlpipe(udev, 0),
                             FTDI_SIO_SET_EVENT_CHAR_REQUEST,
                             FTDI_SIO_SET_EVENT_CHAR_REQUEST_TYPE,
                             v, priv->channel,
                             NULL, 0, WDR_TIMEOUT);
        if (rv < 0) {
                dev_dbg(&port->dev, "Unable to write event character: %i\n", rv);
                return -EIO;
        }

        return count;
}
static DEVICE_ATTR_WO(event_char);

static struct attribute *ftdi_attrs[] = {
        &dev_attr_event_char.attr,
        &dev_attr_latency_timer.attr,
        NULL
};

static umode_t ftdi_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        enum ftdi_chip_type type = priv->chip_type;

        if (attr == &dev_attr_event_char.attr) {
                if (type == SIO)
                        return 0;
        }

        if (attr == &dev_attr_latency_timer.attr) {
                if (type == SIO || type == FT232A)
                        return 0;
        }

        return attr->mode;
}

static const struct attribute_group ftdi_group = {
        .attrs                = ftdi_attrs,
        .is_visible        = ftdi_is_visible,
};

static const struct attribute_group *ftdi_groups[] = {
        &ftdi_group,
        NULL
};

#ifdef CONFIG_GPIOLIB

static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;
        int result;
        u16 val;

        result = usb_autopm_get_interface(serial->interface);
        if (result)
                return result;

        val = (mode << 8) | (priv->gpio_output << 4) | priv->gpio_value;
        result = usb_control_msg(serial->dev,
                                 usb_sndctrlpipe(serial->dev, 0),
                                 FTDI_SIO_SET_BITMODE_REQUEST,
                                 FTDI_SIO_SET_BITMODE_REQUEST_TYPE, val,
                                 priv->channel, NULL, 0, WDR_TIMEOUT);
        if (result < 0) {
                dev_err(&serial->interface->dev,
                        "bitmode request failed for value 0x%04x: %d\n",
                        val, result);
        }

        usb_autopm_put_interface(serial->interface);

        return result;
}

static int ftdi_set_cbus_pins(struct usb_serial_port *port)
{
        return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_CBUS);
}

static int ftdi_exit_cbus_mode(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        priv->gpio_output = 0;
        priv->gpio_value = 0;
        return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_RESET);
}

static int ftdi_gpio_request(struct gpio_chip *gc, unsigned int offset)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        int result;

        mutex_lock(&priv->gpio_lock);
        if (!priv->gpio_used) {
                /* Set default pin states, as we cannot get them from device */
                priv->gpio_output = 0x00;
                priv->gpio_value = 0x00;
                result = ftdi_set_cbus_pins(port);
                if (result) {
                        mutex_unlock(&priv->gpio_lock);
                        return result;
                }

                priv->gpio_used = true;
        }
        mutex_unlock(&priv->gpio_lock);

        return 0;
}

static int ftdi_read_cbus_pins(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;
        u8 buf;
        int result;

        result = usb_autopm_get_interface(serial->interface);
        if (result)
                return result;

        result = usb_control_msg_recv(serial->dev, 0,
                                      FTDI_SIO_READ_PINS_REQUEST,
                                      FTDI_SIO_READ_PINS_REQUEST_TYPE, 0,
                                      priv->channel, &buf, 1, WDR_TIMEOUT,
                                      GFP_KERNEL);
        if (result == 0)
                result = buf;

        usb_autopm_put_interface(serial->interface);

        return result;
}

static int ftdi_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        int result;

        result = ftdi_read_cbus_pins(port);
        if (result < 0)
                return result;

        return !!(result & BIT(gpio));
}

static void ftdi_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        mutex_lock(&priv->gpio_lock);

        if (value)
                priv->gpio_value |= BIT(gpio);
        else
                priv->gpio_value &= ~BIT(gpio);

        ftdi_set_cbus_pins(port);

        mutex_unlock(&priv->gpio_lock);
}

static int ftdi_gpio_get_multiple(struct gpio_chip *gc, unsigned long *mask,
                                        unsigned long *bits)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        int result;

        result = ftdi_read_cbus_pins(port);
        if (result < 0)
                return result;

        *bits = result & *mask;

        return 0;
}

static void ftdi_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
                                        unsigned long *bits)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        mutex_lock(&priv->gpio_lock);

        priv->gpio_value &= ~(*mask);
        priv->gpio_value |= *bits & *mask;
        ftdi_set_cbus_pins(port);

        mutex_unlock(&priv->gpio_lock);
}

static int ftdi_gpio_direction_get(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        return !(priv->gpio_output & BIT(gpio));
}

static int ftdi_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        int result;

        mutex_lock(&priv->gpio_lock);

        priv->gpio_output &= ~BIT(gpio);
        result = ftdi_set_cbus_pins(port);

        mutex_unlock(&priv->gpio_lock);

        return result;
}

static int ftdi_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio,
                                        int value)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        int result;

        mutex_lock(&priv->gpio_lock);

        priv->gpio_output |= BIT(gpio);
        if (value)
                priv->gpio_value |= BIT(gpio);
        else
                priv->gpio_value &= ~BIT(gpio);

        result = ftdi_set_cbus_pins(port);

        mutex_unlock(&priv->gpio_lock);

        return result;
}

static int ftdi_gpio_init_valid_mask(struct gpio_chip *gc,
                                     unsigned long *valid_mask,
                                     unsigned int ngpios)
{
        struct usb_serial_port *port = gpiochip_get_data(gc);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        unsigned long map = priv->gpio_altfunc;

        bitmap_complement(valid_mask, &map, ngpios);

        if (bitmap_empty(valid_mask, ngpios))
                dev_dbg(&port->dev, "no CBUS pin configured for GPIO\n");
        else
                dev_dbg(&port->dev, "CBUS%*pbl configured for GPIO\n", ngpios,
                        valid_mask);

        return 0;
}

static int ftdi_read_eeprom(struct usb_serial *serial, void *dst, u16 addr,
                                u16 nbytes)
{
        int read = 0;

        if (addr % 2 != 0)
                return -EINVAL;
        if (nbytes % 2 != 0)
                return -EINVAL;

        /* Read EEPROM two bytes at a time */
        while (read < nbytes) {
                int rv;

                rv = usb_control_msg(serial->dev,
                                     usb_rcvctrlpipe(serial->dev, 0),
                                     FTDI_SIO_READ_EEPROM_REQUEST,
                                     FTDI_SIO_READ_EEPROM_REQUEST_TYPE,
                                     0, (addr + read) / 2, dst + read, 2,
                                     WDR_TIMEOUT);
                if (rv < 2) {
                        if (rv >= 0)
                                return -EIO;
                        else
                                return rv;
                }

                read += rv;
        }

        return 0;
}

static int ftdi_gpio_init_ft232h(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        u16 cbus_config;
        u8 *buf;
        int ret;
        int i;

        buf = kmalloc(4, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = ftdi_read_eeprom(port->serial, buf, 0x1a, 4);
        if (ret < 0)
                goto out_free;

        /*
         * FT232H CBUS Memory Map
         *
         * 0x1a: X- (upper nibble -> AC5)
         * 0x1b: -X (lower nibble -> AC6)
         * 0x1c: XX (upper nibble -> AC9 | lower nibble -> AC8)
         */
        cbus_config = buf[2] << 8 | (buf[1] & 0xf) << 4 | (buf[0] & 0xf0) >> 4;

        priv->gc.ngpio = 4;
        priv->gpio_altfunc = 0xff;

        for (i = 0; i < priv->gc.ngpio; ++i) {
                if ((cbus_config & 0xf) == FTDI_FTX_CBUS_MUX_GPIO)
                        priv->gpio_altfunc &= ~BIT(i);
                cbus_config >>= 4;
        }

out_free:
        kfree(buf);

        return ret;
}

static int ftdi_gpio_init_ft232r(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        u16 cbus_config;
        u8 *buf;
        int ret;
        int i;

        buf = kmalloc(2, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        ret = ftdi_read_eeprom(port->serial, buf, 0x14, 2);
        if (ret < 0)
                goto out_free;

        cbus_config = le16_to_cpup((__le16 *)buf);
        dev_dbg(&port->dev, "cbus_config = 0x%04x\n", cbus_config);

        priv->gc.ngpio = 4;

        priv->gpio_altfunc = 0xff;
        for (i = 0; i < priv->gc.ngpio; ++i) {
                if ((cbus_config & 0xf) == FTDI_FT232R_CBUS_MUX_GPIO)
                        priv->gpio_altfunc &= ~BIT(i);
                cbus_config >>= 4;
        }
out_free:
        kfree(buf);

        return ret;
}

static int ftdi_gpio_init_ftx(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;
        const u16 cbus_cfg_addr = 0x1a;
        const u16 cbus_cfg_size = 4;
        u8 *cbus_cfg_buf;
        int result;
        u8 i;

        cbus_cfg_buf = kmalloc(cbus_cfg_size, GFP_KERNEL);
        if (!cbus_cfg_buf)
                return -ENOMEM;

        result = ftdi_read_eeprom(serial, cbus_cfg_buf,
                                  cbus_cfg_addr, cbus_cfg_size);
        if (result < 0)
                goto out_free;

        /* FIXME: FT234XD alone has 1 GPIO, but how to recognize this IC? */
        priv->gc.ngpio = 4;

        /* Determine which pins are configured for CBUS bitbanging */
        priv->gpio_altfunc = 0xff;
        for (i = 0; i < priv->gc.ngpio; ++i) {
                if (cbus_cfg_buf[i] == FTDI_FTX_CBUS_MUX_GPIO)
                        priv->gpio_altfunc &= ~BIT(i);
        }

out_free:
        kfree(cbus_cfg_buf);

        return result;
}

static int ftdi_gpio_init(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct usb_serial *serial = port->serial;
        int result;

        switch (priv->chip_type) {
        case FT232H:
                result = ftdi_gpio_init_ft232h(port);
                break;
        case FT232R:
                result = ftdi_gpio_init_ft232r(port);
                break;
        case FTX:
                result = ftdi_gpio_init_ftx(port);
                break;
        default:
                return 0;
        }

        if (result < 0)
                return result;

        mutex_init(&priv->gpio_lock);

        priv->gc.label = "ftdi-cbus";
        priv->gc.request = ftdi_gpio_request;
        priv->gc.get_direction = ftdi_gpio_direction_get;
        priv->gc.direction_input = ftdi_gpio_direction_input;
        priv->gc.direction_output = ftdi_gpio_direction_output;
        priv->gc.init_valid_mask = ftdi_gpio_init_valid_mask;
        priv->gc.get = ftdi_gpio_get;
        priv->gc.set = ftdi_gpio_set;
        priv->gc.get_multiple = ftdi_gpio_get_multiple;
        priv->gc.set_multiple = ftdi_gpio_set_multiple;
        priv->gc.owner = THIS_MODULE;
        priv->gc.parent = &serial->interface->dev;
        priv->gc.base = -1;
        priv->gc.can_sleep = true;

        result = gpiochip_add_data(&priv->gc, port);
        if (!result)
                priv->gpio_registered = true;

        return result;
}

static void ftdi_gpio_remove(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        if (priv->gpio_registered) {
                gpiochip_remove(&priv->gc);
                priv->gpio_registered = false;
        }

        if (priv->gpio_used) {
                /* Exiting CBUS-mode does not reset pin states. */
                ftdi_exit_cbus_mode(port);
                priv->gpio_used = false;
        }
}

#else

static int ftdi_gpio_init(struct usb_serial_port *port)
{
        return 0;
}

static void ftdi_gpio_remove(struct usb_serial_port *port) { }

#endif        /* CONFIG_GPIOLIB */

/*
 * ***************************************************************************
 * FTDI driver specific functions
 * ***************************************************************************
 */

static int ftdi_probe(struct usb_serial *serial, const struct usb_device_id *id)
{
        const struct ftdi_quirk *quirk = (struct ftdi_quirk *)id->driver_info;

        if (quirk && quirk->probe) {
                int ret = quirk->probe(serial);
                if (ret != 0)
                        return ret;
        }

        usb_set_serial_data(serial, (void *)id->driver_info);

        return 0;
}

static int ftdi_port_probe(struct usb_serial_port *port)
{
        const struct ftdi_quirk *quirk = usb_get_serial_data(port->serial);
        struct ftdi_private *priv;
        int result;

        priv = kzalloc(sizeof(struct ftdi_private), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        mutex_init(&priv->cfg_lock);

        if (quirk && quirk->port_probe)
                quirk->port_probe(priv);

        usb_set_serial_port_data(port, priv);

        result = ftdi_determine_type(port);
        if (result)
                goto err_free;

        ftdi_set_max_packet_size(port);
        if (read_latency_timer(port) < 0)
                priv->latency = 16;
        write_latency_timer(port);

        result = ftdi_gpio_init(port);
        if (result < 0) {
                dev_err(&port->serial->interface->dev,
                        "GPIO initialisation failed: %d\n",
                        result);
        }

        return 0;

err_free:
        kfree(priv);

        return result;
}

/* Setup for the USB-UIRT device, which requires hardwired
 * baudrate (38400 gets mapped to 312500) */
/* Called from usbserial:serial_probe */
static void ftdi_USB_UIRT_setup(struct ftdi_private *priv)
{
        priv->flags |= ASYNC_SPD_CUST;
        priv->custom_divisor = 77;
        priv->force_baud = 38400;
}

/* Setup for the HE-TIRA1 device, which requires hardwired
 * baudrate (38400 gets mapped to 100000) and RTS-CTS enabled.  */

static void ftdi_HE_TIRA1_setup(struct ftdi_private *priv)
{
        priv->flags |= ASYNC_SPD_CUST;
        priv->custom_divisor = 240;
        priv->force_baud = 38400;
        priv->force_rtscts = 1;
}

/*
 * Module parameter to control latency timer for NDI FTDI-based USB devices.
 * If this value is not set in /etc/modprobe.d/ its value will be set
 * to 1ms.
 */
static int ndi_latency_timer = 1;

/* Setup for the NDI FTDI-based USB devices, which requires hardwired
 * baudrate (19200 gets mapped to 1200000).
 *
 * Called from usbserial:serial_probe.
 */
static int ftdi_NDI_device_setup(struct usb_serial *serial)
{
        struct usb_device *udev = serial->dev;
        int latency = ndi_latency_timer;

        if (latency == 0)
                latency = 1;
        if (latency > 99)
                latency = 99;

        dev_dbg(&udev->dev, "%s setting NDI device latency to %d\n", __func__, latency);
        dev_info(&udev->dev, "NDI device with a latency value of %d\n", latency);

        /* FIXME: errors are not returned */
        usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
                                FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
                                latency, 0, NULL, 0, WDR_TIMEOUT);
        return 0;
}

/*
 * First port on JTAG adaptors such as Olimex arm-usb-ocd or the FIC/OpenMoko
 * Neo1973 Debug Board is reserved for JTAG interface and can be accessed from
 * userspace using openocd.
 */
static int ftdi_jtag_probe(struct usb_serial *serial)
{
        struct usb_interface *intf = serial->interface;
        int ifnum = intf->cur_altsetting->desc.bInterfaceNumber;

        if (ifnum == 0) {
                dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n");
                return -ENODEV;
        }

        return 0;
}

static int ftdi_8u2232c_probe(struct usb_serial *serial)
{
        struct usb_device *udev = serial->dev;

        if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems"))
                return ftdi_jtag_probe(serial);

        if (udev->product &&
                (!strcmp(udev->product, "Arrow USB Blaster") ||
                 !strcmp(udev->product, "BeagleBone/XDS100V2") ||
                 !strcmp(udev->product, "SNAP Connect E10")))
                return ftdi_jtag_probe(serial);

        return 0;
}

/*
 * First two ports on JTAG adaptors using an FT4232 such as STMicroelectronics's
 * ST Micro Connect Lite are reserved for JTAG or other non-UART interfaces and
 * can be accessed from userspace.
 * The next two ports are enabled as UARTs by default, where port 2 is
 * a conventional RS-232 UART.
 */
static int ftdi_stmclite_probe(struct usb_serial *serial)
{
        struct usb_interface *intf = serial->interface;
        int ifnum = intf->cur_altsetting->desc.bInterfaceNumber;

        if (ifnum < 2) {
                dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n");
                return -ENODEV;
        }

        return 0;
}

static void ftdi_port_remove(struct usb_serial_port *port)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        ftdi_gpio_remove(port);

        kfree(priv);
}

static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct usb_device *dev = port->serial->dev;
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        /* No error checking for this (will get errors later anyway) */
        /* See ftdi_sio.h for description of what is reset */
        usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                        FTDI_SIO_RESET_REQUEST, FTDI_SIO_RESET_REQUEST_TYPE,
                        FTDI_SIO_RESET_SIO,
                        priv->channel, NULL, 0, WDR_TIMEOUT);

        /* Termios defaults are set by usb_serial_init. We don't change
           port->tty->termios - this would lose speed settings, etc.
           This is same behaviour as serial.c/rs_open() - Kuba */

        /* ftdi_set_termios  will send usb control messages */
        if (tty)
                ftdi_set_termios(tty, port, NULL);

        return usb_serial_generic_open(tty, port);
}

static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);

        /* Disable flow control */
        if (!on) {
                if (usb_control_msg(port->serial->dev,
                            usb_sndctrlpipe(port->serial->dev, 0),
                            FTDI_SIO_SET_FLOW_CTRL_REQUEST,
                            FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
                            0, priv->channel, NULL, 0,
                            WDR_TIMEOUT) < 0) {
                        dev_err(&port->dev, "error from flowcontrol urb\n");
                }
        }
        /* drop RTS and DTR */
        if (on)
                set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
        else
                clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
}

/* The SIO requires the first byte to have:
 *  B0 1
 *  B1 0
 *  B2..7 length of message excluding byte 0
 *
 * The new devices do not require this byte
 */
static int ftdi_prepare_write_buffer(struct usb_serial_port *port,
                                                void *dest, size_t size)
{
        struct ftdi_private *priv;
        int count;
        unsigned long flags;

        priv = usb_get_serial_port_data(port);

        if (priv->chip_type == SIO) {
                unsigned char *buffer = dest;
                int i, len, c;

                count = 0;
                spin_lock_irqsave(&port->lock, flags);
                for (i = 0; i < size - 1; i += priv->max_packet_size) {
                        len = min_t(int, size - i, priv->max_packet_size) - 1;
                        c = kfifo_out(&port->write_fifo, &buffer[i + 1], len);
                        if (!c)
                                break;
                        port->icount.tx += c;
                        buffer[i] = (c << 2) + 1;
                        count += c + 1;
                }
                spin_unlock_irqrestore(&port->lock, flags);
        } else {
                count = kfifo_out_locked(&port->write_fifo, dest, size,
                                                                &port->lock);
                port->icount.tx += count;
        }

        return count;
}

#define FTDI_RS_ERR_MASK (FTDI_RS_BI | FTDI_RS_PE | FTDI_RS_FE | FTDI_RS_OE)

static int ftdi_process_packet(struct usb_serial_port *port,
                struct ftdi_private *priv, unsigned char *buf, int len)
{
        unsigned char status;
        bool brkint = false;
        int i;
        char flag;

        if (len < 2) {
                dev_dbg(&port->dev, "malformed packet\n");
                return 0;
        }

        /* Compare new line status to the old one, signal if different/
           N.B. packet may be processed more than once, but differences
           are only processed once.  */
        status = buf[0] & FTDI_STATUS_B0_MASK;
        if (status != priv->prev_status) {
                char diff_status = status ^ priv->prev_status;

                if (diff_status & FTDI_RS0_CTS)
                        port->icount.cts++;
                if (diff_status & FTDI_RS0_DSR)
                        port->icount.dsr++;
                if (diff_status & FTDI_RS0_RI)
                        port->icount.rng++;
                if (diff_status & FTDI_RS0_RLSD) {
                        struct tty_struct *tty;

                        port->icount.dcd++;
                        tty = tty_port_tty_get(&port->port);
                        if (tty)
                                usb_serial_handle_dcd_change(port, tty,
                                                status & FTDI_RS0_RLSD);
                        tty_kref_put(tty);
                }

                wake_up_interruptible(&port->port.delta_msr_wait);
                priv->prev_status = status;
        }

        /* save if the transmitter is empty or not */
        if (buf[1] & FTDI_RS_TEMT)
                priv->transmit_empty = 1;
        else
                priv->transmit_empty = 0;

        if (len == 2)
                return 0;        /* status only */

        /*
         * Break and error status must only be processed for packets with
         * data payload to avoid over-reporting.
         */
        flag = TTY_NORMAL;
        if (buf[1] & FTDI_RS_ERR_MASK) {
                /*
                 * Break takes precedence over parity, which takes precedence
                 * over framing errors. Note that break is only associated
                 * with the last character in the buffer and only when it's a
                 * NUL.
                 */
                if (buf[1] & FTDI_RS_BI && buf[len - 1] == '\0') {
                        port->icount.brk++;
                        brkint = true;
                }
                if (buf[1] & FTDI_RS_PE) {
                        flag = TTY_PARITY;
                        port->icount.parity++;
                } else if (buf[1] & FTDI_RS_FE) {
                        flag = TTY_FRAME;
                        port->icount.frame++;
                }
                /* Overrun is special, not associated with a char */
                if (buf[1] & FTDI_RS_OE) {
                        port->icount.overrun++;
                        tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
                }
        }

        port->icount.rx += len - 2;

        if (brkint || port->sysrq) {
                for (i = 2; i < len; i++) {
                        if (brkint && i == len - 1) {
                                if (usb_serial_handle_break(port))
                                        return len - 3;
                                flag = TTY_BREAK;
                        }
                        if (usb_serial_handle_sysrq_char(port, buf[i]))
                                continue;
                        tty_insert_flip_char(&port->port, buf[i], flag);
                }
        } else {
                tty_insert_flip_string_fixed_flag(&port->port, buf + 2, flag,
                                len - 2);
        }

        return len - 2;
}

static void ftdi_process_read_urb(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        char *data = urb->transfer_buffer;
        int i;
        int len;
        int count = 0;

        for (i = 0; i < urb->actual_length; i += priv->max_packet_size) {
                len = min_t(int, urb->actual_length - i, priv->max_packet_size);
                count += ftdi_process_packet(port, priv, &data[i], len);
        }

        if (count)
                tty_flip_buffer_push(&port->port);
}

static int ftdi_break_ctl(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        u16 value;
        int ret;

        /* break_state = -1 to turn on break, and 0 to turn off break */
        /* see drivers/char/tty_io.c to see it used */
        /* last_set_data_value NEVER has the break bit set in it */

        if (break_state)
                value = priv->last_set_data_value | FTDI_SIO_SET_BREAK;
        else
                value = priv->last_set_data_value;

        ret = usb_control_msg(port->serial->dev,
                        usb_sndctrlpipe(port->serial->dev, 0),
                        FTDI_SIO_SET_DATA_REQUEST,
                        FTDI_SIO_SET_DATA_REQUEST_TYPE,
                        value, priv->channel,
                        NULL, 0, WDR_TIMEOUT);
        if (ret < 0) {
                dev_err(&port->dev, "%s FAILED to enable/disable break state (state was %d)\n",
                        __func__, break_state);
                return ret;
        }

        dev_dbg(&port->dev, "%s break state is %d - urb is %d\n", __func__,
                break_state, value);

        return 0;
}

static bool ftdi_tx_empty(struct usb_serial_port *port)
{
        unsigned char buf[2];
        int ret;

        ret = ftdi_get_modem_status(port, buf);
        if (ret == 2) {
                if (!(buf[1] & FTDI_RS_TEMT))
                        return false;
        }

        return true;
}

/* old_termios contains the original termios settings and tty->termios contains
 * the new setting to be used
 * WARNING: set_termios calls this with old_termios in kernel space
 */
static void ftdi_set_termios(struct tty_struct *tty,
                             struct usb_serial_port *port,
                             const struct ktermios *old_termios)
{
        struct usb_device *dev = port->serial->dev;
        struct device *ddev = &port->dev;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        struct ktermios *termios = &tty->termios;
        unsigned int cflag;
        u16 value, index;
        int ret;

        /* Force baud rate if this device requires it, unless it is set to
           B0. */
        if (priv->force_baud && ((termios->c_cflag & CBAUD) != B0)) {
                dev_dbg(ddev, "%s: forcing baud rate for this device\n", __func__);
                tty_encode_baud_rate(tty, priv->force_baud,
                                        priv->force_baud);
        }

        /* Force RTS-CTS if this device requires it. */
        if (priv->force_rtscts) {
                dev_dbg(ddev, "%s: forcing rtscts for this device\n", __func__);
                termios->c_cflag |= CRTSCTS;
        }

        /*
         * All FTDI UART chips are limited to CS7/8. We shouldn't pretend to
         * support CS5/6 and revert the CSIZE setting instead.
         *
         * CS5 however is used to control some smartcard readers which abuse
         * this limitation to switch modes. Original FTDI chips fall back to
         * eight data bits.
         *
         * TODO: Implement a quirk to only allow this with mentioned
         *       readers. One I know of (Argolis Smartreader V1)
         *       returns "USB smartcard server" as iInterface string.
         *       The vendor didn't bother with a custom VID/PID of
         *       course.
         */
        if (C_CSIZE(tty) == CS6) {
                dev_warn(ddev, "requested CSIZE setting not supported\n");

                termios->c_cflag &= ~CSIZE;
                if (old_termios)
                        termios->c_cflag |= old_termios->c_cflag & CSIZE;
                else
                        termios->c_cflag |= CS8;
        }

        cflag = termios->c_cflag;

        if (!old_termios)
                goto no_skip;

        if (old_termios->c_cflag == termios->c_cflag
            && old_termios->c_ispeed == termios->c_ispeed
            && old_termios->c_ospeed == termios->c_ospeed)
                goto no_c_cflag_changes;

        /* NOTE These routines can get interrupted by
           ftdi_sio_read_bulk_callback  - need to examine what this means -
           don't see any problems yet */

        if ((old_termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB)) ==
            (termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB)))
                goto no_data_parity_stop_changes;

no_skip:
        /* Set number of data bits, parity, stop bits */

        value = 0;
        value |= (cflag & CSTOPB ? FTDI_SIO_SET_DATA_STOP_BITS_2 :
                        FTDI_SIO_SET_DATA_STOP_BITS_1);
        if (cflag & PARENB) {
                if (cflag & CMSPAR)
                        value |= cflag & PARODD ?
                                        FTDI_SIO_SET_DATA_PARITY_MARK :
                                        FTDI_SIO_SET_DATA_PARITY_SPACE;
                else
                        value |= cflag & PARODD ?
                                        FTDI_SIO_SET_DATA_PARITY_ODD :
                                        FTDI_SIO_SET_DATA_PARITY_EVEN;
        } else {
                value |= FTDI_SIO_SET_DATA_PARITY_NONE;
        }
        switch (cflag & CSIZE) {
        case CS5:
                dev_dbg(ddev, "Setting CS5 quirk\n");
                break;
        case CS7:
                value |= 7;
                dev_dbg(ddev, "Setting CS7\n");
                break;
        default:
        case CS8:
                value |= 8;
                dev_dbg(ddev, "Setting CS8\n");
                break;
        }

        /* This is needed by the break command since it uses the same command
           - but is or'ed with this value  */
        priv->last_set_data_value = value;

        if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                            FTDI_SIO_SET_DATA_REQUEST,
                            FTDI_SIO_SET_DATA_REQUEST_TYPE,
                            value, priv->channel,
                            NULL, 0, WDR_SHORT_TIMEOUT) < 0) {
                dev_err(ddev, "%s FAILED to set databits/stopbits/parity\n",
                        __func__);
        }

        /* Now do the baudrate */
no_data_parity_stop_changes:
        if ((cflag & CBAUD) == B0) {
                /* Disable flow control */
                if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                                    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
                                    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
                                    0, priv->channel,
                                    NULL, 0, WDR_TIMEOUT) < 0) {
                        dev_err(ddev, "%s error from disable flowcontrol urb\n",
                                __func__);
                }
                /* Drop RTS and DTR */
                clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
        } else {
                /* set the baudrate determined before */
                mutex_lock(&priv->cfg_lock);
                if (change_speed(tty, port))
                        dev_err(ddev, "%s urb failed to set baudrate\n", __func__);
                mutex_unlock(&priv->cfg_lock);
                /* Ensure RTS and DTR are raised when baudrate changed from 0 */
                if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
                        set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
        }

no_c_cflag_changes:
        /* Set hardware-assisted flow control */
        value = 0;

        if (C_CRTSCTS(tty)) {
                dev_dbg(&port->dev, "enabling rts/cts flow control\n");
                index = FTDI_SIO_RTS_CTS_HS;
        } else if (I_IXON(tty)) {
                dev_dbg(&port->dev, "enabling xon/xoff flow control\n");
                index = FTDI_SIO_XON_XOFF_HS;
                value = STOP_CHAR(tty) << 8 | START_CHAR(tty);
        } else {
                dev_dbg(&port->dev, "disabling flow control\n");
                index = FTDI_SIO_DISABLE_FLOW_CTRL;
        }

        index |= priv->channel;

        ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                        FTDI_SIO_SET_FLOW_CTRL_REQUEST,
                        FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
                        value, index, NULL, 0, WDR_TIMEOUT);
        if (ret < 0)
                dev_err(&port->dev, "failed to set flow control: %d\n", ret);
}

/*
 * Get modem-control status.
 *
 * Returns the number of status bytes retrieved (device dependant), or
 * negative error code.
 */
static int ftdi_get_modem_status(struct usb_serial_port *port,
                                                unsigned char status[2])
{
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        unsigned char *buf;
        int len;
        int ret;

        buf = kmalloc(2, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
        /*
         * The device returns a two byte value (the SIO a 1 byte value) in the
         * same format as the data returned from the IN endpoint.
         */
        if (priv->chip_type == SIO)
                len = 1;
        else
                len = 2;

        ret = usb_control_msg(port->serial->dev,
                        usb_rcvctrlpipe(port->serial->dev, 0),
                        FTDI_SIO_GET_MODEM_STATUS_REQUEST,
                        FTDI_SIO_GET_MODEM_STATUS_REQUEST_TYPE,
                        0, priv->channel,
                        buf, len, WDR_TIMEOUT);

        /* NOTE: We allow short responses and handle that below. */
        if (ret < 1) {
                dev_err(&port->dev, "failed to get modem status: %d\n", ret);
                if (ret >= 0)
                        ret = -EIO;
                ret = usb_translate_errors(ret);
                goto out;
        }

        status[0] = buf[0];
        if (ret > 1)
                status[1] = buf[1];
        else
                status[1] = 0;

        dev_dbg(&port->dev, "%s - 0x%02x%02x\n", __func__, status[0],
                                                                status[1]);
out:
        kfree(buf);

        return ret;
}

static int ftdi_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
        unsigned char buf[2];
        int ret;

        ret = ftdi_get_modem_status(port, buf);
        if (ret < 0)
                return ret;

        ret =        (buf[0] & FTDI_SIO_DSR_MASK  ? TIOCM_DSR : 0) |
                (buf[0] & FTDI_SIO_CTS_MASK  ? TIOCM_CTS : 0) |
                (buf[0] & FTDI_SIO_RI_MASK   ? TIOCM_RI  : 0) |
                (buf[0] & FTDI_SIO_RLSD_MASK ? TIOCM_CD  : 0) |
                priv->last_dtr_rts;

        return ret;
}

static int ftdi_tiocmset(struct tty_struct *tty,
                        unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;

        return update_mctrl(port, set, clear);
}

static int ftdi_ioctl(struct tty_struct *tty,
                                        unsigned int cmd, unsigned long arg)
{
        struct usb_serial_port *port = tty->driver_data;
        void __user *argp = (void __user *)arg;

        switch (cmd) {
        case TIOCSERGETLSR:
                return get_lsr_info(port, argp);
        default:
                break;
        }

        return -ENOIOCTLCMD;
}

static struct usb_serial_driver ftdi_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "ftdi_sio",
                .dev_groups =        ftdi_groups,
        },
        .description =                "FTDI USB Serial Device",
        .id_table =                id_table_combined,
        .num_ports =                1,
        .bulk_in_size =                512,
        .bulk_out_size =        256,
        .probe =                ftdi_probe,
        .port_probe =                ftdi_port_probe,
        .port_remove =                ftdi_port_remove,
        .open =                        ftdi_open,
        .dtr_rts =                ftdi_dtr_rts,
        .throttle =                usb_serial_generic_throttle,
        .unthrottle =                usb_serial_generic_unthrottle,
        .process_read_urb =        ftdi_process_read_urb,
        .prepare_write_buffer =        ftdi_prepare_write_buffer,
        .tiocmget =                ftdi_tiocmget,
        .tiocmset =                ftdi_tiocmset,
        .tiocmiwait =                usb_serial_generic_tiocmiwait,
        .get_icount =                usb_serial_generic_get_icount,
        .ioctl =                ftdi_ioctl,
        .get_serial =                get_serial_info,
        .set_serial =                set_serial_info,
        .set_termios =                ftdi_set_termios,
        .break_ctl =                ftdi_break_ctl,
        .tx_empty =                ftdi_tx_empty,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &ftdi_device, NULL
};
module_usb_serial_driver(serial_drivers, id_table_combined);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

module_param(ndi_latency_timer, int, 0644);
MODULE_PARM_DESC(ndi_latency_timer, "NDI device latency timer override");















































































  293 




  296 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PAGE_H
#define _ASM_X86_PAGE_H

#include <linux/types.h>

#ifdef __KERNEL__

#include <asm/page_types.h>

#ifdef CONFIG_X86_64
#include <asm/page_64.h>
#else
#include <asm/page_32.h>
#endif        /* CONFIG_X86_64 */

#ifndef __ASSEMBLY__

struct page;

#include <linux/range.h>
extern struct range pfn_mapped[];
extern int nr_pfn_mapped;

static inline void clear_user_page(void *page, unsigned long vaddr,
                                   struct page *pg)
{
        clear_page(page);
}

static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
                                  struct page *topage)
{
        copy_page(to, from);
}

#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
        vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)

#ifndef __pa
#define __pa(x)                __phys_addr((unsigned long)(x))
#endif

#define __pa_nodebug(x)        __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
   This seems to be the official gcc blessed way to do such arithmetic. */
/*
 * We need __phys_reloc_hide() here because gcc may assume that there is no
 * overflow during __pa() calculation and can optimize it unexpectedly.
 * Newer versions of gcc provide -fno-strict-overflow switch to handle this
 * case properly. Once all supported versions of gcc understand it, we can
 * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated)
 */
#define __pa_symbol(x) \
        __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))

#ifndef __va
#define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
#endif

#define __boot_va(x)                __va(x)
#define __boot_pa(x)                __pa(x)

/*
 * virt_to_page(kaddr) returns a valid pointer if and only if
 * virt_addr_valid(kaddr) returns true.
 */
#define virt_to_page(kaddr)        pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
extern bool __virt_addr_valid(unsigned long kaddr);
#define virt_addr_valid(kaddr)        __virt_addr_valid((unsigned long) (kaddr))

static __always_inline void *pfn_to_kaddr(unsigned long pfn)
{
        return __va(pfn << PAGE_SHIFT);
}

static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits)
{
        return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
}

static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
{
        return __canonical_address(vaddr, vaddr_bits) == vaddr;
}

#endif        /* __ASSEMBLY__ */

#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>

#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA

#endif        /* __KERNEL__ */
#endif /* _ASM_X86_PAGE_H */





















































































































   20 

   20 
   20 





























   21 

















   21 



   20 

   21 
   11 


   21 
   21 

   21 




   19 






   21 






























































































































































































































































































































































































































































   21 






















   21 




   21 





























































































































































































































































































































































































































































































































   22 
   19 







   22 






   21 
   21 





   21 



   21 

   21 
    4 

   21 


   21 

























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
// SPDX-License-Identifier: GPL-2.0
/*
 * USB Serial Converter driver
 *
 * Copyright (C) 2009 - 2013 Johan Hovold (jhovold@gmail.com)
 * Copyright (C) 1999 - 2012 Greg Kroah-Hartman (greg@kroah.com)
 * Copyright (C) 2000 Peter Berger (pberger@brimson.com)
 * Copyright (C) 2000 Al Borchers (borchers@steinerpoint.com)
 *
 * This driver was originally based on the ACM driver by Armin Fuerst (which was
 * based on a driver by Brad Keryan)
 *
 * See Documentation/usb/usb-serial.rst for more information on using this
 * driver
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/serial.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/kfifo.h>
#include <linux/idr.h>

#define DRIVER_AUTHOR "Greg Kroah-Hartman <gregkh@linuxfoundation.org>"
#define DRIVER_DESC "USB Serial Driver core"

#define USB_SERIAL_TTY_MAJOR        188
#define USB_SERIAL_TTY_MINORS        512        /* should be enough for a while */

/* There is no MODULE_DEVICE_TABLE for usbserial.c.  Instead
   the MODULE_DEVICE_TABLE declarations in each serial driver
   cause the "hotplug" program to pull in whatever module is necessary
   via modprobe, and modprobe will load usbserial because the serial
   drivers depend on it.
*/

static DEFINE_IDR(serial_minors);
static DEFINE_MUTEX(table_lock);
static LIST_HEAD(usb_serial_driver_list);

/*
 * Look up the serial port structure.  If it is found and it hasn't been
 * disconnected, return with the parent usb_serial structure's disc_mutex held
 * and its refcount incremented.  Otherwise return NULL.
 */
struct usb_serial_port *usb_serial_port_get_by_minor(unsigned minor)
{
        struct usb_serial *serial;
        struct usb_serial_port *port;

        mutex_lock(&table_lock);
        port = idr_find(&serial_minors, minor);
        if (!port)
                goto exit;

        serial = port->serial;
        mutex_lock(&serial->disc_mutex);
        if (serial->disconnected) {
                mutex_unlock(&serial->disc_mutex);
                port = NULL;
        } else {
                kref_get(&serial->kref);
        }
exit:
        mutex_unlock(&table_lock);
        return port;
}

static int allocate_minors(struct usb_serial *serial, int num_ports)
{
        struct usb_serial_port *port;
        unsigned int i, j;
        int minor;

        dev_dbg(&serial->interface->dev, "%s %d\n", __func__, num_ports);

        mutex_lock(&table_lock);
        for (i = 0; i < num_ports; ++i) {
                port = serial->port[i];
                minor = idr_alloc(&serial_minors, port, 0,
                                        USB_SERIAL_TTY_MINORS, GFP_KERNEL);
                if (minor < 0)
                        goto error;
                port->minor = minor;
                port->port_number = i;
        }
        serial->minors_reserved = 1;
        mutex_unlock(&table_lock);
        return 0;
error:
        /* unwind the already allocated minors */
        for (j = 0; j < i; ++j)
                idr_remove(&serial_minors, serial->port[j]->minor);
        mutex_unlock(&table_lock);
        return minor;
}

static void release_minors(struct usb_serial *serial)
{
        int i;

        mutex_lock(&table_lock);
        for (i = 0; i < serial->num_ports; ++i)
                idr_remove(&serial_minors, serial->port[i]->minor);
        mutex_unlock(&table_lock);
        serial->minors_reserved = 0;
}

int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf)
{
        struct usb_driver *driver = serial->type->usb_driver;
        int ret;

        if (serial->sibling)
                return -EBUSY;

        ret = usb_driver_claim_interface(driver, intf, serial);
        if (ret) {
                dev_err(&serial->interface->dev,
                                "failed to claim sibling interface: %d\n", ret);
                return ret;
        }

        serial->sibling = intf;

        return 0;
}
EXPORT_SYMBOL_GPL(usb_serial_claim_interface);

static void release_sibling(struct usb_serial *serial, struct usb_interface *intf)
{
        struct usb_driver *driver = serial->type->usb_driver;
        struct usb_interface *sibling;

        if (!serial->sibling)
                return;

        if (intf == serial->sibling)
                sibling = serial->interface;
        else
                sibling = serial->sibling;

        usb_set_intfdata(sibling, NULL);
        usb_driver_release_interface(driver, sibling);
}

static void destroy_serial(struct kref *kref)
{
        struct usb_serial *serial;
        struct usb_serial_port *port;
        int i;

        serial = to_usb_serial(kref);

        /* return the minor range that this device had */
        if (serial->minors_reserved)
                release_minors(serial);

        if (serial->attached && serial->type->release)
                serial->type->release(serial);

        /* Now that nothing is using the ports, they can be freed */
        for (i = 0; i < serial->num_port_pointers; ++i) {
                port = serial->port[i];
                if (port) {
                        port->serial = NULL;
                        put_device(&port->dev);
                }
        }

        usb_put_intf(serial->interface);
        usb_put_dev(serial->dev);
        kfree(serial);
}

void usb_serial_put(struct usb_serial *serial)
{
        kref_put(&serial->kref, destroy_serial);
}

/*****************************************************************************
 * Driver tty interface functions
 *****************************************************************************/

/**
 * serial_install - install tty
 * @driver: the driver (USB in our case)
 * @tty: the tty being created
 *
 * Initialise the termios structure for this tty.  We use the default
 * USB serial settings but permit them to be overridden by
 * serial->type->init_termios on first open.
 *
 * This is the first place a new tty gets used.  Hence this is where we
 * acquire references to the usb_serial structure and the driver module,
 * where we store a pointer to the port.  All these actions are reversed
 * in serial_cleanup().
 */
static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
{
        int idx = tty->index;
        struct usb_serial *serial;
        struct usb_serial_port *port;
        bool init_termios;
        int retval = -ENODEV;

        port = usb_serial_port_get_by_minor(idx);
        if (!port)
                return retval;

        serial = port->serial;
        if (!try_module_get(serial->type->driver.owner))
                goto err_put_serial;

        init_termios = (driver->termios[idx] == NULL);

        retval = tty_standard_install(driver, tty);
        if (retval)
                goto err_put_module;

        mutex_unlock(&serial->disc_mutex);

        /* allow the driver to update the initial settings */
        if (init_termios && serial->type->init_termios)
                serial->type->init_termios(tty);

        tty->driver_data = port;

        return retval;

err_put_module:
        module_put(serial->type->driver.owner);
err_put_serial:
        usb_serial_put(serial);
        mutex_unlock(&serial->disc_mutex);
        return retval;
}

static int serial_port_activate(struct tty_port *tport, struct tty_struct *tty)
{
        struct usb_serial_port *port =
                container_of(tport, struct usb_serial_port, port);
        struct usb_serial *serial = port->serial;
        int retval;

        mutex_lock(&serial->disc_mutex);
        if (serial->disconnected) {
                retval = -ENODEV;
                goto out_unlock;
        }

        retval = usb_autopm_get_interface(serial->interface);
        if (retval)
                goto out_unlock;

        retval = port->serial->type->open(tty, port);
        if (retval)
                usb_autopm_put_interface(serial->interface);
out_unlock:
        mutex_unlock(&serial->disc_mutex);

        if (retval < 0)
                retval = usb_translate_errors(retval);

        return retval;
}

static int serial_open(struct tty_struct *tty, struct file *filp)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        return tty_port_open(&port->port, tty, filp);
}

/**
 * serial_port_shutdown - shut down hardware
 * @tport: tty port to shut down
 *
 * Shut down a USB serial port. Serialized against activate by the
 * tport mutex and kept to matching open/close pairs
 * of calls by the tty-port initialized flag.
 *
 * Not called if tty is console.
 */
static void serial_port_shutdown(struct tty_port *tport)
{
        struct usb_serial_port *port =
                container_of(tport, struct usb_serial_port, port);
        struct usb_serial_driver *drv = port->serial->type;

        if (drv->close)
                drv->close(port);

        usb_autopm_put_interface(port->serial->interface);
}

static void serial_hangup(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        tty_port_hangup(&port->port);
}

static void serial_close(struct tty_struct *tty, struct file *filp)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        tty_port_close(&port->port, tty, filp);
}

/**
 * serial_cleanup - free resources post close/hangup
 * @tty: tty to clean up
 *
 * Do the resource freeing and refcount dropping for the port.
 * Avoid freeing the console.
 *
 * Called asynchronously after the last tty kref is dropped.
 */
static void serial_cleanup(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_serial *serial;
        struct module *owner;

        dev_dbg(&port->dev, "%s\n", __func__);

        /* The console is magical.  Do not hang up the console hardware
         * or there will be tears.
         */
        if (port->port.console)
                return;

        tty->driver_data = NULL;

        serial = port->serial;
        owner = serial->type->driver.owner;

        usb_serial_put(serial);
        module_put(owner);
}

static ssize_t serial_write(struct tty_struct *tty, const u8 *buf, size_t count)
{
        struct usb_serial_port *port = tty->driver_data;
        int retval = -ENODEV;

        if (port->serial->dev->state == USB_STATE_NOTATTACHED)
                goto exit;

        dev_dbg(&port->dev, "%s - %zu byte(s)\n", __func__, count);

        retval = port->serial->type->write(tty, port, buf, count);
        if (retval < 0)
                retval = usb_translate_errors(retval);
exit:
        return retval;
}

static unsigned int serial_write_room(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        return port->serial->type->write_room(tty);
}

static unsigned int serial_chars_in_buffer(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_serial *serial = port->serial;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (serial->disconnected)
                return 0;

        return serial->type->chars_in_buffer(tty);
}

static void serial_wait_until_sent(struct tty_struct *tty, int timeout)
{
        struct usb_serial_port *port = tty->driver_data;
        struct usb_serial *serial = port->serial;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (!port->serial->type->wait_until_sent)
                return;

        mutex_lock(&serial->disc_mutex);
        if (!serial->disconnected)
                port->serial->type->wait_until_sent(tty, timeout);
        mutex_unlock(&serial->disc_mutex);
}

static void serial_throttle(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->throttle)
                port->serial->type->throttle(tty);
}

static void serial_unthrottle(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->unthrottle)
                port->serial->type->unthrottle(tty);
}

static int serial_get_serial(struct tty_struct *tty, struct serial_struct *ss)
{
        struct usb_serial_port *port = tty->driver_data;
        struct tty_port *tport = &port->port;
        unsigned int close_delay, closing_wait;

        mutex_lock(&tport->mutex);

        close_delay = jiffies_to_msecs(tport->close_delay) / 10;
        closing_wait = tport->closing_wait;
        if (closing_wait != ASYNC_CLOSING_WAIT_NONE)
                closing_wait = jiffies_to_msecs(closing_wait) / 10;

        ss->line = port->minor;
        ss->close_delay = close_delay;
        ss->closing_wait = closing_wait;

        if (port->serial->type->get_serial)
                port->serial->type->get_serial(tty, ss);

        mutex_unlock(&tport->mutex);

        return 0;
}

static int serial_set_serial(struct tty_struct *tty, struct serial_struct *ss)
{
        struct usb_serial_port *port = tty->driver_data;
        struct tty_port *tport = &port->port;
        unsigned int close_delay, closing_wait;
        int ret = 0;

        close_delay = msecs_to_jiffies(ss->close_delay * 10);
        closing_wait = ss->closing_wait;
        if (closing_wait != ASYNC_CLOSING_WAIT_NONE)
                closing_wait = msecs_to_jiffies(closing_wait * 10);

        mutex_lock(&tport->mutex);

        if (!capable(CAP_SYS_ADMIN)) {
                if (close_delay != tport->close_delay ||
                                closing_wait != tport->closing_wait) {
                        ret = -EPERM;
                        goto out_unlock;
                }
        }

        if (port->serial->type->set_serial) {
                ret = port->serial->type->set_serial(tty, ss);
                if (ret)
                        goto out_unlock;
        }

        tport->close_delay = close_delay;
        tport->closing_wait = closing_wait;
out_unlock:
        mutex_unlock(&tport->mutex);

        return ret;
}

static int serial_ioctl(struct tty_struct *tty,
                                        unsigned int cmd, unsigned long arg)
{
        struct usb_serial_port *port = tty->driver_data;
        int retval = -ENOIOCTLCMD;

        dev_dbg(&port->dev, "%s - cmd 0x%04x\n", __func__, cmd);

        switch (cmd) {
        case TIOCMIWAIT:
                if (port->serial->type->tiocmiwait)
                        retval = port->serial->type->tiocmiwait(tty, arg);
                break;
        default:
                if (port->serial->type->ioctl)
                        retval = port->serial->type->ioctl(tty, cmd, arg);
        }

        return retval;
}

static void serial_set_termios(struct tty_struct *tty,
                               const struct ktermios *old)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->set_termios)
                port->serial->type->set_termios(tty, port, old);
        else
                tty_termios_copy_hw(&tty->termios, old);
}

static int serial_break(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->break_ctl)
                return port->serial->type->break_ctl(tty, break_state);

        return -ENOTTY;
}

static int serial_proc_show(struct seq_file *m, void *v)
{
        struct usb_serial *serial;
        struct usb_serial_port *port;
        int i;
        char tmp[40];

        seq_puts(m, "usbserinfo:1.0 driver:2.0\n");
        for (i = 0; i < USB_SERIAL_TTY_MINORS; ++i) {
                port = usb_serial_port_get_by_minor(i);
                if (port == NULL)
                        continue;
                serial = port->serial;

                seq_printf(m, "%d:", i);
                if (serial->type->driver.owner)
                        seq_printf(m, " module:%s",
                                module_name(serial->type->driver.owner));
                seq_printf(m, " name:\"%s\"",
                                serial->type->description);
                seq_printf(m, " vendor:%04x product:%04x",
                        le16_to_cpu(serial->dev->descriptor.idVendor),
                        le16_to_cpu(serial->dev->descriptor.idProduct));
                seq_printf(m, " num_ports:%d", serial->num_ports);
                seq_printf(m, " port:%d", port->port_number);
                usb_make_path(serial->dev, tmp, sizeof(tmp));
                seq_printf(m, " path:%s", tmp);

                seq_putc(m, '\n');
                usb_serial_put(serial);
                mutex_unlock(&serial->disc_mutex);
        }
        return 0;
}

static int serial_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->tiocmget)
                return port->serial->type->tiocmget(tty);
        return -ENOTTY;
}

static int serial_tiocmset(struct tty_struct *tty,
                            unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->tiocmset)
                return port->serial->type->tiocmset(tty, set, clear);
        return -ENOTTY;
}

static int serial_get_icount(struct tty_struct *tty,
                                struct serial_icounter_struct *icount)
{
        struct usb_serial_port *port = tty->driver_data;

        dev_dbg(&port->dev, "%s\n", __func__);

        if (port->serial->type->get_icount)
                return port->serial->type->get_icount(tty, icount);
        return -ENOTTY;
}

/*
 * We would be calling tty_wakeup here, but unfortunately some line
 * disciplines have an annoying habit of calling tty->write from
 * the write wakeup callback (e.g. n_hdlc.c).
 */
void usb_serial_port_softint(struct usb_serial_port *port)
{
        schedule_work(&port->work);
}
EXPORT_SYMBOL_GPL(usb_serial_port_softint);

static void usb_serial_port_work(struct work_struct *work)
{
        struct usb_serial_port *port =
                container_of(work, struct usb_serial_port, work);

        tty_port_tty_wakeup(&port->port);
}

static void usb_serial_port_poison_urbs(struct usb_serial_port *port)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i)
                usb_poison_urb(port->read_urbs[i]);
        for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
                usb_poison_urb(port->write_urbs[i]);

        usb_poison_urb(port->interrupt_in_urb);
        usb_poison_urb(port->interrupt_out_urb);
}

static void usb_serial_port_unpoison_urbs(struct usb_serial_port *port)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i)
                usb_unpoison_urb(port->read_urbs[i]);
        for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
                usb_unpoison_urb(port->write_urbs[i]);

        usb_unpoison_urb(port->interrupt_in_urb);
        usb_unpoison_urb(port->interrupt_out_urb);
}

static void usb_serial_port_release(struct device *dev)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        int i;

        dev_dbg(dev, "%s\n", __func__);

        usb_free_urb(port->interrupt_in_urb);
        usb_free_urb(port->interrupt_out_urb);
        for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
                usb_free_urb(port->read_urbs[i]);
                kfree(port->bulk_in_buffers[i]);
        }
        for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) {
                usb_free_urb(port->write_urbs[i]);
                kfree(port->bulk_out_buffers[i]);
        }
        kfifo_free(&port->write_fifo);
        kfree(port->interrupt_in_buffer);
        kfree(port->interrupt_out_buffer);
        tty_port_destroy(&port->port);
        kfree(port);
}

static struct usb_serial *create_serial(struct usb_device *dev,
                                        struct usb_interface *interface,
                                        struct usb_serial_driver *driver)
{
        struct usb_serial *serial;

        serial = kzalloc(sizeof(*serial), GFP_KERNEL);
        if (!serial)
                return NULL;
        serial->dev = usb_get_dev(dev);
        serial->type = driver;
        serial->interface = usb_get_intf(interface);
        kref_init(&serial->kref);
        mutex_init(&serial->disc_mutex);
        serial->minors_reserved = 0;

        return serial;
}

static const struct usb_device_id *match_dynamic_id(struct usb_interface *intf,
                                            struct usb_serial_driver *drv)
{
        struct usb_dynid *dynid;

        spin_lock(&drv->dynids.lock);
        list_for_each_entry(dynid, &drv->dynids.list, node) {
                if (usb_match_one_id(intf, &dynid->id)) {
                        spin_unlock(&drv->dynids.lock);
                        return &dynid->id;
                }
        }
        spin_unlock(&drv->dynids.lock);
        return NULL;
}

static const struct usb_device_id *get_iface_id(struct usb_serial_driver *drv,
                                                struct usb_interface *intf)
{
        const struct usb_device_id *id;

        id = usb_match_id(intf, drv->id_table);
        if (id) {
                dev_dbg(&intf->dev, "static descriptor matches\n");
                goto exit;
        }
        id = match_dynamic_id(intf, drv);
        if (id)
                dev_dbg(&intf->dev, "dynamic descriptor matches\n");
exit:
        return id;
}

/* Caller must hold table_lock */
static struct usb_serial_driver *search_serial_device(
                                        struct usb_interface *iface)
{
        const struct usb_device_id *id = NULL;
        struct usb_serial_driver *drv;
        struct usb_driver *driver = to_usb_driver(iface->dev.driver);

        /* Check if the usb id matches a known device */
        list_for_each_entry(drv, &usb_serial_driver_list, driver_list) {
                if (drv->usb_driver == driver)
                        id = get_iface_id(drv, iface);
                if (id)
                        return drv;
        }

        return NULL;
}

static bool serial_port_carrier_raised(struct tty_port *port)
{
        struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
        struct usb_serial_driver *drv = p->serial->type;

        if (drv->carrier_raised)
                return drv->carrier_raised(p);
        /* No carrier control - don't block */
        return true;
}

static void serial_port_dtr_rts(struct tty_port *port, bool on)
{
        struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
        struct usb_serial_driver *drv = p->serial->type;

        if (drv->dtr_rts)
                drv->dtr_rts(p, on);
}

static ssize_t port_number_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);

        return sprintf(buf, "%u\n", port->port_number);
}
static DEVICE_ATTR_RO(port_number);

static struct attribute *usb_serial_port_attrs[] = {
        &dev_attr_port_number.attr,
        NULL
};
ATTRIBUTE_GROUPS(usb_serial_port);

static const struct tty_port_operations serial_port_ops = {
        .carrier_raised                = serial_port_carrier_raised,
        .dtr_rts                = serial_port_dtr_rts,
        .activate                = serial_port_activate,
        .shutdown                = serial_port_shutdown,
};

static void store_endpoint(struct usb_serial *serial,
                                        struct usb_serial_endpoints *epds,
                                        struct usb_endpoint_descriptor *epd)
{
        struct device *dev = &serial->interface->dev;
        u8 addr = epd->bEndpointAddress;

        if (usb_endpoint_is_bulk_in(epd)) {
                if (epds->num_bulk_in == ARRAY_SIZE(epds->bulk_in))
                        return;
                dev_dbg(dev, "found bulk in endpoint %02x\n", addr);
                epds->bulk_in[epds->num_bulk_in++] = epd;
        } else if (usb_endpoint_is_bulk_out(epd)) {
                if (epds->num_bulk_out == ARRAY_SIZE(epds->bulk_out))
                        return;
                dev_dbg(dev, "found bulk out endpoint %02x\n", addr);
                epds->bulk_out[epds->num_bulk_out++] = epd;
        } else if (usb_endpoint_is_int_in(epd)) {
                if (epds->num_interrupt_in == ARRAY_SIZE(epds->interrupt_in))
                        return;
                dev_dbg(dev, "found interrupt in endpoint %02x\n", addr);
                epds->interrupt_in[epds->num_interrupt_in++] = epd;
        } else if (usb_endpoint_is_int_out(epd)) {
                if (epds->num_interrupt_out == ARRAY_SIZE(epds->interrupt_out))
                        return;
                dev_dbg(dev, "found interrupt out endpoint %02x\n", addr);
                epds->interrupt_out[epds->num_interrupt_out++] = epd;
        }
}

static void find_endpoints(struct usb_serial *serial,
                                        struct usb_serial_endpoints *epds,
                                        struct usb_interface *intf)
{
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *epd;
        unsigned int i;

        iface_desc = intf->cur_altsetting;
        for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
                epd = &iface_desc->endpoint[i].desc;
                store_endpoint(serial, epds, epd);
        }
}

static int setup_port_bulk_in(struct usb_serial_port *port,
                                        struct usb_endpoint_descriptor *epd)
{
        struct usb_serial_driver *type = port->serial->type;
        struct usb_device *udev = port->serial->dev;
        int buffer_size;
        int i;

        buffer_size = max_t(int, type->bulk_in_size, usb_endpoint_maxp(epd));
        port->bulk_in_size = buffer_size;
        port->bulk_in_endpointAddress = epd->bEndpointAddress;

        for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) {
                set_bit(i, &port->read_urbs_free);
                port->read_urbs[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!port->read_urbs[i])
                        return -ENOMEM;
                port->bulk_in_buffers[i] = kmalloc(buffer_size, GFP_KERNEL);
                if (!port->bulk_in_buffers[i])
                        return -ENOMEM;
                usb_fill_bulk_urb(port->read_urbs[i], udev,
                                usb_rcvbulkpipe(udev, epd->bEndpointAddress),
                                port->bulk_in_buffers[i], buffer_size,
                                type->read_bulk_callback, port);
        }

        port->read_urb = port->read_urbs[0];
        port->bulk_in_buffer = port->bulk_in_buffers[0];

        return 0;
}

static int setup_port_bulk_out(struct usb_serial_port *port,
                                        struct usb_endpoint_descriptor *epd)
{
        struct usb_serial_driver *type = port->serial->type;
        struct usb_device *udev = port->serial->dev;
        int buffer_size;
        int i;

        if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
                return -ENOMEM;
        if (type->bulk_out_size)
                buffer_size = type->bulk_out_size;
        else
                buffer_size = usb_endpoint_maxp(epd);
        port->bulk_out_size = buffer_size;
        port->bulk_out_endpointAddress = epd->bEndpointAddress;

        for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) {
                set_bit(i, &port->write_urbs_free);
                port->write_urbs[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!port->write_urbs[i])
                        return -ENOMEM;
                port->bulk_out_buffers[i] = kmalloc(buffer_size, GFP_KERNEL);
                if (!port->bulk_out_buffers[i])
                        return -ENOMEM;
                usb_fill_bulk_urb(port->write_urbs[i], udev,
                                usb_sndbulkpipe(udev, epd->bEndpointAddress),
                                port->bulk_out_buffers[i], buffer_size,
                                type->write_bulk_callback, port);
        }

        port->write_urb = port->write_urbs[0];
        port->bulk_out_buffer = port->bulk_out_buffers[0];

        return 0;
}

static int setup_port_interrupt_in(struct usb_serial_port *port,
                                        struct usb_endpoint_descriptor *epd)
{
        struct usb_serial_driver *type = port->serial->type;
        struct usb_device *udev = port->serial->dev;
        int buffer_size;

        port->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!port->interrupt_in_urb)
                return -ENOMEM;
        buffer_size = usb_endpoint_maxp(epd);
        port->interrupt_in_endpointAddress = epd->bEndpointAddress;
        port->interrupt_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
        if (!port->interrupt_in_buffer)
                return -ENOMEM;
        usb_fill_int_urb(port->interrupt_in_urb, udev,
                        usb_rcvintpipe(udev, epd->bEndpointAddress),
                        port->interrupt_in_buffer, buffer_size,
                        type->read_int_callback, port,
                        epd->bInterval);

        return 0;
}

static int setup_port_interrupt_out(struct usb_serial_port *port,
                                        struct usb_endpoint_descriptor *epd)
{
        struct usb_serial_driver *type = port->serial->type;
        struct usb_device *udev = port->serial->dev;
        int buffer_size;

        port->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!port->interrupt_out_urb)
                return -ENOMEM;
        buffer_size = usb_endpoint_maxp(epd);
        port->interrupt_out_size = buffer_size;
        port->interrupt_out_endpointAddress = epd->bEndpointAddress;
        port->interrupt_out_buffer = kmalloc(buffer_size, GFP_KERNEL);
        if (!port->interrupt_out_buffer)
                return -ENOMEM;
        usb_fill_int_urb(port->interrupt_out_urb, udev,
                        usb_sndintpipe(udev, epd->bEndpointAddress),
                        port->interrupt_out_buffer, buffer_size,
                        type->write_int_callback, port,
                        epd->bInterval);

        return 0;
}

static int usb_serial_probe(struct usb_interface *interface,
                               const struct usb_device_id *id)
{
        struct device *ddev = &interface->dev;
        struct usb_device *dev = interface_to_usbdev(interface);
        struct usb_serial *serial = NULL;
        struct usb_serial_port *port;
        struct usb_serial_endpoints *epds;
        struct usb_serial_driver *type = NULL;
        int retval;
        int i;
        int num_ports = 0;
        unsigned char max_endpoints;

        mutex_lock(&table_lock);
        type = search_serial_device(interface);
        if (!type) {
                mutex_unlock(&table_lock);
                dev_dbg(ddev, "none matched\n");
                return -ENODEV;
        }

        if (!try_module_get(type->driver.owner)) {
                mutex_unlock(&table_lock);
                dev_err(ddev, "module get failed, exiting\n");
                return -EIO;
        }
        mutex_unlock(&table_lock);

        serial = create_serial(dev, interface, type);
        if (!serial) {
                retval = -ENOMEM;
                goto err_put_module;
        }

        /* if this device type has a probe function, call it */
        if (type->probe) {
                const struct usb_device_id *id;

                id = get_iface_id(type, interface);
                retval = type->probe(serial, id);

                if (retval) {
                        dev_dbg(ddev, "sub driver rejected device\n");
                        goto err_release_sibling;
                }
        }

        /* descriptor matches, let's find the endpoints needed */
        epds = kzalloc(sizeof(*epds), GFP_KERNEL);
        if (!epds) {
                retval = -ENOMEM;
                goto err_release_sibling;
        }

        find_endpoints(serial, epds, interface);
        if (serial->sibling)
                find_endpoints(serial, epds, serial->sibling);

        if (epds->num_bulk_in < type->num_bulk_in ||
                        epds->num_bulk_out < type->num_bulk_out ||
                        epds->num_interrupt_in < type->num_interrupt_in ||
                        epds->num_interrupt_out < type->num_interrupt_out) {
                dev_err(ddev, "required endpoints missing\n");
                retval = -ENODEV;
                goto err_free_epds;
        }

        if (type->calc_num_ports) {
                retval = type->calc_num_ports(serial, epds);
                if (retval < 0)
                        goto err_free_epds;
                num_ports = retval;
        }

        if (!num_ports)
                num_ports = type->num_ports;

        if (num_ports > MAX_NUM_PORTS) {
                dev_warn(ddev, "too many ports requested: %d\n", num_ports);
                num_ports = MAX_NUM_PORTS;
        }

        serial->num_ports = (unsigned char)num_ports;
        serial->num_bulk_in = epds->num_bulk_in;
        serial->num_bulk_out = epds->num_bulk_out;
        serial->num_interrupt_in = epds->num_interrupt_in;
        serial->num_interrupt_out = epds->num_interrupt_out;

        /* found all that we need */
        dev_info(ddev, "%s converter detected\n", type->description);

        /* create our ports, we need as many as the max endpoints */
        /* we don't use num_ports here because some devices have more
           endpoint pairs than ports */
        max_endpoints = max(epds->num_bulk_in, epds->num_bulk_out);
        max_endpoints = max(max_endpoints, epds->num_interrupt_in);
        max_endpoints = max(max_endpoints, epds->num_interrupt_out);
        max_endpoints = max(max_endpoints, serial->num_ports);
        serial->num_port_pointers = max_endpoints;

        dev_dbg(ddev, "setting up %d port structure(s)\n", max_endpoints);
        for (i = 0; i < max_endpoints; ++i) {
                port = kzalloc(sizeof(struct usb_serial_port), GFP_KERNEL);
                if (!port) {
                        retval = -ENOMEM;
                        goto err_free_epds;
                }
                tty_port_init(&port->port);
                port->port.ops = &serial_port_ops;
                port->serial = serial;
                spin_lock_init(&port->lock);
                /* Keep this for private driver use for the moment but
                   should probably go away */
                INIT_WORK(&port->work, usb_serial_port_work);
                serial->port[i] = port;
                port->dev.parent = &interface->dev;
                port->dev.driver = NULL;
                port->dev.bus = &usb_serial_bus_type;
                port->dev.release = &usb_serial_port_release;
                port->dev.groups = usb_serial_port_groups;
                device_initialize(&port->dev);
        }

        /* set up the endpoint information */
        for (i = 0; i < epds->num_bulk_in; ++i) {
                retval = setup_port_bulk_in(serial->port[i], epds->bulk_in[i]);
                if (retval)
                        goto err_free_epds;
        }

        for (i = 0; i < epds->num_bulk_out; ++i) {
                retval = setup_port_bulk_out(serial->port[i],
                                epds->bulk_out[i]);
                if (retval)
                        goto err_free_epds;
        }

        if (serial->type->read_int_callback) {
                for (i = 0; i < epds->num_interrupt_in; ++i) {
                        retval = setup_port_interrupt_in(serial->port[i],
                                        epds->interrupt_in[i]);
                        if (retval)
                                goto err_free_epds;
                }
        } else if (epds->num_interrupt_in) {
                dev_dbg(ddev, "The device claims to support interrupt in transfers, but read_int_callback is not defined\n");
        }

        if (serial->type->write_int_callback) {
                for (i = 0; i < epds->num_interrupt_out; ++i) {
                        retval = setup_port_interrupt_out(serial->port[i],
                                        epds->interrupt_out[i]);
                        if (retval)
                                goto err_free_epds;
                }
        } else if (epds->num_interrupt_out) {
                dev_dbg(ddev, "The device claims to support interrupt out transfers, but write_int_callback is not defined\n");
        }

        usb_set_intfdata(interface, serial);

        /* if this device type has an attach function, call it */
        if (type->attach) {
                retval = type->attach(serial);
                if (retval < 0)
                        goto err_free_epds;
                serial->attached = 1;
                if (retval > 0) {
                        /* quietly accept this device, but don't bind to a
                           serial port as it's about to disappear */
                        serial->num_ports = 0;
                        goto exit;
                }
        } else {
                serial->attached = 1;
        }

        retval = allocate_minors(serial, num_ports);
        if (retval) {
                dev_err(ddev, "No more free serial minor numbers\n");
                goto err_free_epds;
        }

        /* register all of the individual ports with the driver core */
        for (i = 0; i < num_ports; ++i) {
                port = serial->port[i];
                dev_set_name(&port->dev, "ttyUSB%d", port->minor);
                dev_dbg(ddev, "registering %s\n", dev_name(&port->dev));
                device_enable_async_suspend(&port->dev);

                retval = device_add(&port->dev);
                if (retval)
                        dev_err(ddev, "Error registering port device, continuing\n");
        }

        if (num_ports > 0)
                usb_serial_console_init(serial->port[0]->minor);
exit:
        kfree(epds);
        module_put(type->driver.owner);
        return 0;

err_free_epds:
        kfree(epds);
err_release_sibling:
        release_sibling(serial, interface);
        usb_serial_put(serial);
err_put_module:
        module_put(type->driver.owner);

        return retval;
}

static void usb_serial_disconnect(struct usb_interface *interface)
{
        int i;
        struct usb_serial *serial = usb_get_intfdata(interface);
        struct device *dev = &interface->dev;
        struct usb_serial_port *port;
        struct tty_struct *tty;

        /* sibling interface is cleaning up */
        if (!serial)
                return;

        usb_serial_console_disconnect(serial);

        mutex_lock(&serial->disc_mutex);
        /* must set a flag, to signal subdrivers */
        serial->disconnected = 1;
        mutex_unlock(&serial->disc_mutex);

        for (i = 0; i < serial->num_ports; ++i) {
                port = serial->port[i];
                tty = tty_port_tty_get(&port->port);
                if (tty) {
                        tty_vhangup(tty);
                        tty_kref_put(tty);
                }
                usb_serial_port_poison_urbs(port);
                wake_up_interruptible(&port->port.delta_msr_wait);
                cancel_work_sync(&port->work);
                if (device_is_registered(&port->dev))
                        device_del(&port->dev);
        }
        if (serial->type->disconnect)
                serial->type->disconnect(serial);

        release_sibling(serial, interface);

        /* let the last holder of this object cause it to be cleaned up */
        usb_serial_put(serial);
        dev_info(dev, "device disconnected\n");
}

int usb_serial_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct usb_serial *serial = usb_get_intfdata(intf);
        int i, r;

        /* suspend when called for first sibling interface */
        if (serial->suspend_count++)
                return 0;

        /*
         * serial->type->suspend() MUST return 0 in system sleep context,
         * otherwise, the resume callback has to recover device from
         * previous suspend failure.
         */
        if (serial->type->suspend) {
                r = serial->type->suspend(serial, message);
                if (r < 0) {
                        serial->suspend_count--;
                        return r;
                }
        }

        for (i = 0; i < serial->num_ports; ++i)
                usb_serial_port_poison_urbs(serial->port[i]);

        return 0;
}
EXPORT_SYMBOL(usb_serial_suspend);

static void usb_serial_unpoison_port_urbs(struct usb_serial *serial)
{
        int i;

        for (i = 0; i < serial->num_ports; ++i)
                usb_serial_port_unpoison_urbs(serial->port[i]);
}

int usb_serial_resume(struct usb_interface *intf)
{
        struct usb_serial *serial = usb_get_intfdata(intf);
        int rv;

        /* resume when called for last sibling interface */
        if (--serial->suspend_count)
                return 0;

        usb_serial_unpoison_port_urbs(serial);

        if (serial->type->resume)
                rv = serial->type->resume(serial);
        else
                rv = usb_serial_generic_resume(serial);

        return rv;
}
EXPORT_SYMBOL(usb_serial_resume);

static int usb_serial_reset_resume(struct usb_interface *intf)
{
        struct usb_serial *serial = usb_get_intfdata(intf);
        int rv;

        /* resume when called for last sibling interface */
        if (--serial->suspend_count)
                return 0;

        usb_serial_unpoison_port_urbs(serial);

        if (serial->type->reset_resume) {
                rv = serial->type->reset_resume(serial);
        } else {
                rv = -EOPNOTSUPP;
                intf->needs_binding = 1;
        }

        return rv;
}

static const struct tty_operations serial_ops = {
        .open =                        serial_open,
        .close =                serial_close,
        .write =                serial_write,
        .hangup =                serial_hangup,
        .write_room =                serial_write_room,
        .ioctl =                serial_ioctl,
        .set_termios =                serial_set_termios,
        .throttle =                serial_throttle,
        .unthrottle =                serial_unthrottle,
        .break_ctl =                serial_break,
        .chars_in_buffer =        serial_chars_in_buffer,
        .wait_until_sent =        serial_wait_until_sent,
        .tiocmget =                serial_tiocmget,
        .tiocmset =                serial_tiocmset,
        .get_icount =                serial_get_icount,
        .set_serial =                serial_set_serial,
        .get_serial =                serial_get_serial,
        .cleanup =                serial_cleanup,
        .install =                serial_install,
        .proc_show =                serial_proc_show,
};


struct tty_driver *usb_serial_tty_driver;

static int __init usb_serial_init(void)
{
        int result;

        usb_serial_tty_driver = tty_alloc_driver(USB_SERIAL_TTY_MINORS,
                        TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV);
        if (IS_ERR(usb_serial_tty_driver))
                return PTR_ERR(usb_serial_tty_driver);

        /* Initialize our global data */
        result = bus_register(&usb_serial_bus_type);
        if (result) {
                pr_err("%s - registering bus driver failed\n", __func__);
                goto err_put_driver;
        }

        usb_serial_tty_driver->driver_name = "usbserial";
        usb_serial_tty_driver->name = "ttyUSB";
        usb_serial_tty_driver->major = USB_SERIAL_TTY_MAJOR;
        usb_serial_tty_driver->minor_start = 0;
        usb_serial_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
        usb_serial_tty_driver->subtype = SERIAL_TYPE_NORMAL;
        usb_serial_tty_driver->init_termios = tty_std_termios;
        usb_serial_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD
                                                        | HUPCL | CLOCAL;
        usb_serial_tty_driver->init_termios.c_ispeed = 9600;
        usb_serial_tty_driver->init_termios.c_ospeed = 9600;
        tty_set_operations(usb_serial_tty_driver, &serial_ops);
        result = tty_register_driver(usb_serial_tty_driver);
        if (result) {
                pr_err("%s - tty_register_driver failed\n", __func__);
                goto err_unregister_bus;
        }

        /* register the generic driver, if we should */
        result = usb_serial_generic_register();
        if (result < 0) {
                pr_err("%s - registering generic driver failed\n", __func__);
                goto err_unregister_driver;
        }

        return result;

err_unregister_driver:
        tty_unregister_driver(usb_serial_tty_driver);
err_unregister_bus:
        bus_unregister(&usb_serial_bus_type);
err_put_driver:
        pr_err("%s - returning with error %d\n", __func__, result);
        tty_driver_kref_put(usb_serial_tty_driver);
        return result;
}


static void __exit usb_serial_exit(void)
{
        usb_serial_console_exit();

        usb_serial_generic_deregister();

        tty_unregister_driver(usb_serial_tty_driver);
        tty_driver_kref_put(usb_serial_tty_driver);
        bus_unregister(&usb_serial_bus_type);
        idr_destroy(&serial_minors);
}


module_init(usb_serial_init);
module_exit(usb_serial_exit);

#define set_to_generic_if_null(type, function)                                \
        do {                                                                \
                if (!type->function) {                                        \
                        type->function = usb_serial_generic_##function;        \
                        pr_debug("%s: using generic " #function        "\n",        \
                                                type->driver.name);        \
                }                                                        \
        } while (0)

static void usb_serial_operations_init(struct usb_serial_driver *device)
{
        set_to_generic_if_null(device, open);
        set_to_generic_if_null(device, write);
        set_to_generic_if_null(device, close);
        set_to_generic_if_null(device, write_room);
        set_to_generic_if_null(device, chars_in_buffer);
        if (device->tx_empty)
                set_to_generic_if_null(device, wait_until_sent);
        set_to_generic_if_null(device, read_bulk_callback);
        set_to_generic_if_null(device, write_bulk_callback);
        set_to_generic_if_null(device, process_read_urb);
        set_to_generic_if_null(device, prepare_write_buffer);
}

static int usb_serial_register(struct usb_serial_driver *driver)
{
        int retval;

        if (usb_disabled())
                return -ENODEV;

        if (!driver->description)
                driver->description = driver->driver.name;
        if (!driver->usb_driver) {
                WARN(1, "Serial driver %s has no usb_driver\n",
                                driver->description);
                return -EINVAL;
        }

        /* Prevent individual ports from being unbound. */
        driver->driver.suppress_bind_attrs = true;

        usb_serial_operations_init(driver);

        /* Add this device to our list of devices */
        mutex_lock(&table_lock);
        list_add(&driver->driver_list, &usb_serial_driver_list);

        retval = usb_serial_bus_register(driver);
        if (retval) {
                pr_err("problem %d when registering driver %s\n", retval, driver->description);
                list_del(&driver->driver_list);
        } else {
                pr_info("USB Serial support registered for %s\n", driver->description);
        }
        mutex_unlock(&table_lock);
        return retval;
}

static void usb_serial_deregister(struct usb_serial_driver *device)
{
        pr_info("USB Serial deregistering driver %s\n", device->description);

        mutex_lock(&table_lock);
        list_del(&device->driver_list);
        mutex_unlock(&table_lock);

        usb_serial_bus_deregister(device);
}

/**
 * usb_serial_register_drivers - register drivers for a usb-serial module
 * @serial_drivers: NULL-terminated array of pointers to drivers to be registered
 * @name: name of the usb_driver for this set of @serial_drivers
 * @id_table: list of all devices this @serial_drivers set binds to
 *
 * Registers all the drivers in the @serial_drivers array, and dynamically
 * creates a struct usb_driver with the name @name and id_table of @id_table.
 */
int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
                                const char *name,
                                const struct usb_device_id *id_table)
{
        int rc;
        struct usb_driver *udriver;
        struct usb_serial_driver * const *sd;

        /*
         * udriver must be registered before any of the serial drivers,
         * because the store_new_id() routine for the serial drivers (in
         * bus.c) probes udriver.
         *
         * Performance hack: We don't want udriver to be probed until
         * the serial drivers are registered, because the probe would
         * simply fail for lack of a matching serial driver.
         * So we leave udriver's id_table set to NULL until we are all set.
         *
         * Suspend/resume support is implemented in the usb-serial core,
         * so fill in the PM-related fields in udriver.
         */
        udriver = kzalloc(sizeof(*udriver), GFP_KERNEL);
        if (!udriver)
                return -ENOMEM;

        udriver->name = name;
        udriver->no_dynamic_id = 1;
        udriver->supports_autosuspend = 1;
        udriver->suspend = usb_serial_suspend;
        udriver->resume = usb_serial_resume;
        udriver->probe = usb_serial_probe;
        udriver->disconnect = usb_serial_disconnect;

        /* we only set the reset_resume field if the serial_driver has one */
        for (sd = serial_drivers; *sd; ++sd) {
                if ((*sd)->reset_resume) {
                        udriver->reset_resume = usb_serial_reset_resume;
                        break;
                }
        }

        rc = usb_register(udriver);
        if (rc)
                goto err_free_driver;

        for (sd = serial_drivers; *sd; ++sd) {
                (*sd)->usb_driver = udriver;
                rc = usb_serial_register(*sd);
                if (rc)
                        goto err_deregister_drivers;
        }

        /* Now set udriver's id_table and look for matches */
        udriver->id_table = id_table;
        rc = driver_attach(&udriver->driver);
        return 0;

err_deregister_drivers:
        while (sd-- > serial_drivers)
                usb_serial_deregister(*sd);
        usb_deregister(udriver);
err_free_driver:
        kfree(udriver);
        return rc;
}
EXPORT_SYMBOL_GPL(usb_serial_register_drivers);

/**
 * usb_serial_deregister_drivers - deregister drivers for a usb-serial module
 * @serial_drivers: NULL-terminated array of pointers to drivers to be deregistered
 *
 * Deregisters all the drivers in the @serial_drivers array and deregisters and
 * frees the struct usb_driver that was created by the call to
 * usb_serial_register_drivers().
 */
void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[])
{
        struct usb_driver *udriver = (*serial_drivers)->usb_driver;

        for (; *serial_drivers; ++serial_drivers)
                usb_serial_deregister(*serial_drivers);
        usb_deregister(udriver);
        kfree(udriver);
}
EXPORT_SYMBOL_GPL(usb_serial_deregister_drivers);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL v2");






























































































































































































































   74 


   72 




















   73 




    4 




   72 
   41 




   74 







   36 






















































































































































































































   74 





   73 









   70 






   74 

   73 



   74 






   74 


   74 







   73 


   73 
   74 

   74 
   74 
    3 

   70 
   73 









   73 










   72 





   36 
   36 

   36 




   27 

























   48 





   51 

   51 









   49 
   51 
   50 
   49 
   50 

















   11 


   11 

   11 


   10 


    9 
    9 



























































































































































   11 


















































































































































  345 




  343 

























  345 
















  346 
  343 








  343 
    1 
















  345 
  346 
  346 































































































  342 















  341 

  343 


  343 

   33 


  344 





















   18 




   97 
   95 

   18 
   18 


   98 




   16 










    3 


    3 
    3 
    1 

    2 
    3 





















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/file.c
 *
 *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
 *
 *  Manage the dynamic fd arrays in the process files_struct.
 */

#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/close_range.h>
#include <net/sock.h>

#include "internal.h"

unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
#define __const_min(x, y) ((x) < (y) ? (x) : (y))
unsigned int sysctl_nr_open_max =
        __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;

static void __free_fdtable(struct fdtable *fdt)
{
        kvfree(fdt->fd);
        kvfree(fdt->open_fds);
        kfree(fdt);
}

static void free_fdtable_rcu(struct rcu_head *rcu)
{
        __free_fdtable(container_of(rcu, struct fdtable, rcu));
}

#define BITBIT_NR(nr)        BITS_TO_LONGS(BITS_TO_LONGS(nr))
#define BITBIT_SIZE(nr)        (BITBIT_NR(nr) * sizeof(long))

/*
 * Copy 'count' fd bits from the old table to the new table and clear the extra
 * space if any.  This does not copy the file pointers.  Called with the files
 * spinlock held for write.
 */
static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
                            unsigned int count)
{
        unsigned int cpy, set;

        cpy = count / BITS_PER_BYTE;
        set = (nfdt->max_fds - count) / BITS_PER_BYTE;
        memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
        memset((char *)nfdt->open_fds + cpy, 0, set);
        memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
        memset((char *)nfdt->close_on_exec + cpy, 0, set);

        cpy = BITBIT_SIZE(count);
        set = BITBIT_SIZE(nfdt->max_fds) - cpy;
        memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
        memset((char *)nfdt->full_fds_bits + cpy, 0, set);
}

/*
 * Copy all file descriptors from the old table to the new, expanded table and
 * clear the extra space.  Called with the files spinlock held for write.
 */
static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
{
        size_t cpy, set;

        BUG_ON(nfdt->max_fds < ofdt->max_fds);

        cpy = ofdt->max_fds * sizeof(struct file *);
        set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
        memcpy(nfdt->fd, ofdt->fd, cpy);
        memset((char *)nfdt->fd + cpy, 0, set);

        copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
}

/*
 * Note how the fdtable bitmap allocations very much have to be a multiple of
 * BITS_PER_LONG. This is not only because we walk those things in chunks of
 * 'unsigned long' in some places, but simply because that is how the Linux
 * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
 * they are very much "bits in an array of unsigned long".
 *
 * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
 * by that "1024/sizeof(ptr)" before, we already know there are sufficient
 * clear low bits. Clang seems to realize that, gcc ends up being confused.
 *
 * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
 * let's consider it documentation (and maybe a test-case for gcc to improve
 * its code generation ;)
 */
static struct fdtable * alloc_fdtable(unsigned int nr)
{
        struct fdtable *fdt;
        void *data;

        /*
         * Figure out how many fds we actually want to support in this fdtable.
         * Allocation steps are keyed to the size of the fdarray, since it
         * grows far faster than any of the other dynamic data. We try to fit
         * the fdarray into comfortable page-tuned chunks: starting at 1024B
         * and growing in powers of two from there on.
         */
        nr /= (1024 / sizeof(struct file *));
        nr = roundup_pow_of_two(nr + 1);
        nr *= (1024 / sizeof(struct file *));
        nr = ALIGN(nr, BITS_PER_LONG);
        /*
         * Note that this can drive nr *below* what we had passed if sysctl_nr_open
         * had been set lower between the check in expand_files() and here.  Deal
         * with that in caller, it's cheaper that way.
         *
         * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
         * bitmaps handling below becomes unpleasant, to put it mildly...
         */
        if (unlikely(nr > sysctl_nr_open))
                nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;

        fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
        if (!fdt)
                goto out;
        fdt->max_fds = nr;
        data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
        if (!data)
                goto out_fdt;
        fdt->fd = data;

        data = kvmalloc(max_t(size_t,
                                 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
                                 GFP_KERNEL_ACCOUNT);
        if (!data)
                goto out_arr;
        fdt->open_fds = data;
        data += nr / BITS_PER_BYTE;
        fdt->close_on_exec = data;
        data += nr / BITS_PER_BYTE;
        fdt->full_fds_bits = data;

        return fdt;

out_arr:
        kvfree(fdt->fd);
out_fdt:
        kfree(fdt);
out:
        return NULL;
}

/*
 * Expand the file descriptor table.
 * This function will allocate a new fdtable and both fd array and fdset, of
 * the given size.
 * Return <0 error code on error; 1 on successful completion.
 * The files->file_lock should be held on entry, and will be held on exit.
 */
static int expand_fdtable(struct files_struct *files, unsigned int nr)
        __releases(files->file_lock)
        __acquires(files->file_lock)
{
        struct fdtable *new_fdt, *cur_fdt;

        spin_unlock(&files->file_lock);
        new_fdt = alloc_fdtable(nr);

        /* make sure all fd_install() have seen resize_in_progress
         * or have finished their rcu_read_lock_sched() section.
         */
        if (atomic_read(&files->count) > 1)
                synchronize_rcu();

        spin_lock(&files->file_lock);
        if (!new_fdt)
                return -ENOMEM;
        /*
         * extremely unlikely race - sysctl_nr_open decreased between the check in
         * caller and alloc_fdtable().  Cheaper to catch it here...
         */
        if (unlikely(new_fdt->max_fds <= nr)) {
                __free_fdtable(new_fdt);
                return -EMFILE;
        }
        cur_fdt = files_fdtable(files);
        BUG_ON(nr < cur_fdt->max_fds);
        copy_fdtable(new_fdt, cur_fdt);
        rcu_assign_pointer(files->fdt, new_fdt);
        if (cur_fdt != &files->fdtab)
                call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
        /* coupled with smp_rmb() in fd_install() */
        smp_wmb();
        return 1;
}

/*
 * Expand files.
 * This function will expand the file structures, if the requested size exceeds
 * the current capacity and there is room for expansion.
 * Return <0 error code on error; 0 when nothing done; 1 when files were
 * expanded and execution may have blocked.
 * The files->file_lock should be held on entry, and will be held on exit.
 */
static int expand_files(struct files_struct *files, unsigned int nr)
        __releases(files->file_lock)
        __acquires(files->file_lock)
{
        struct fdtable *fdt;
        int expanded = 0;

repeat:
        fdt = files_fdtable(files);

        /* Do we need to expand? */
        if (nr < fdt->max_fds)
                return expanded;

        /* Can we expand? */
        if (nr >= sysctl_nr_open)
                return -EMFILE;

        if (unlikely(files->resize_in_progress)) {
                spin_unlock(&files->file_lock);
                expanded = 1;
                wait_event(files->resize_wait, !files->resize_in_progress);
                spin_lock(&files->file_lock);
                goto repeat;
        }

        /* All good, so we try */
        files->resize_in_progress = true;
        expanded = expand_fdtable(files, nr);
        files->resize_in_progress = false;

        wake_up_all(&files->resize_wait);
        return expanded;
}

static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
{
        __set_bit(fd, fdt->close_on_exec);
}

static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
{
        if (test_bit(fd, fdt->close_on_exec))
                __clear_bit(fd, fdt->close_on_exec);
}

static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
{
        __set_bit(fd, fdt->open_fds);
        fd /= BITS_PER_LONG;
        if (!~fdt->open_fds[fd])
                __set_bit(fd, fdt->full_fds_bits);
}

static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
{
        __clear_bit(fd, fdt->open_fds);
        __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
}

static unsigned int count_open_files(struct fdtable *fdt)
{
        unsigned int size = fdt->max_fds;
        unsigned int i;

        /* Find the last open fd */
        for (i = size / BITS_PER_LONG; i > 0; ) {
                if (fdt->open_fds[--i])
                        break;
        }
        i = (i + 1) * BITS_PER_LONG;
        return i;
}

/*
 * Note that a sane fdtable size always has to be a multiple of
 * BITS_PER_LONG, since we have bitmaps that are sized by this.
 *
 * 'max_fds' will normally already be properly aligned, but it
 * turns out that in the close_range() -> __close_range() ->
 * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
 * up having a 'max_fds' value that isn't already aligned.
 *
 * Rather than make close_range() have to worry about this,
 * just make that BITS_PER_LONG alignment be part of a sane
 * fdtable size. Becuase that's really what it is.
 */
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
{
        unsigned int count;

        count = count_open_files(fdt);
        if (max_fds < NR_OPEN_DEFAULT)
                max_fds = NR_OPEN_DEFAULT;
        return ALIGN(min(count, max_fds), BITS_PER_LONG);
}

/*
 * Allocate a new files structure and copy contents from the
 * passed in files structure.
 * errorp will be valid only when the returned files_struct is NULL.
 */
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
{
        struct files_struct *newf;
        struct file **old_fds, **new_fds;
        unsigned int open_files, i;
        struct fdtable *old_fdt, *new_fdt;

        *errorp = -ENOMEM;
        newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
        if (!newf)
                goto out;

        atomic_set(&newf->count, 1);

        spin_lock_init(&newf->file_lock);
        newf->resize_in_progress = false;
        init_waitqueue_head(&newf->resize_wait);
        newf->next_fd = 0;
        new_fdt = &newf->fdtab;
        new_fdt->max_fds = NR_OPEN_DEFAULT;
        new_fdt->close_on_exec = newf->close_on_exec_init;
        new_fdt->open_fds = newf->open_fds_init;
        new_fdt->full_fds_bits = newf->full_fds_bits_init;
        new_fdt->fd = &newf->fd_array[0];

        spin_lock(&oldf->file_lock);
        old_fdt = files_fdtable(oldf);
        open_files = sane_fdtable_size(old_fdt, max_fds);

        /*
         * Check whether we need to allocate a larger fd array and fd set.
         */
        while (unlikely(open_files > new_fdt->max_fds)) {
                spin_unlock(&oldf->file_lock);

                if (new_fdt != &newf->fdtab)
                        __free_fdtable(new_fdt);

                new_fdt = alloc_fdtable(open_files - 1);
                if (!new_fdt) {
                        *errorp = -ENOMEM;
                        goto out_release;
                }

                /* beyond sysctl_nr_open; nothing to do */
                if (unlikely(new_fdt->max_fds < open_files)) {
                        __free_fdtable(new_fdt);
                        *errorp = -EMFILE;
                        goto out_release;
                }

                /*
                 * Reacquire the oldf lock and a pointer to its fd table
                 * who knows it may have a new bigger fd table. We need
                 * the latest pointer.
                 */
                spin_lock(&oldf->file_lock);
                old_fdt = files_fdtable(oldf);
                open_files = sane_fdtable_size(old_fdt, max_fds);
        }

        copy_fd_bitmaps(new_fdt, old_fdt, open_files);

        old_fds = old_fdt->fd;
        new_fds = new_fdt->fd;

        for (i = open_files; i != 0; i--) {
                struct file *f = *old_fds++;
                if (f) {
                        get_file(f);
                } else {
                        /*
                         * The fd may be claimed in the fd bitmap but not yet
                         * instantiated in the files array if a sibling thread
                         * is partway through open().  So make sure that this
                         * fd is available to the new process.
                         */
                        __clear_open_fd(open_files - i, new_fdt);
                }
                rcu_assign_pointer(*new_fds++, f);
        }
        spin_unlock(&oldf->file_lock);

        /* clear the remainder */
        memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));

        rcu_assign_pointer(newf->fdt, new_fdt);

        return newf;

out_release:
        kmem_cache_free(files_cachep, newf);
out:
        return NULL;
}

static struct fdtable *close_files(struct files_struct * files)
{
        /*
         * It is safe to dereference the fd table without RCU or
         * ->file_lock because this is the last reference to the
         * files structure.
         */
        struct fdtable *fdt = rcu_dereference_raw(files->fdt);
        unsigned int i, j = 0;

        for (;;) {
                unsigned long set;
                i = j * BITS_PER_LONG;
                if (i >= fdt->max_fds)
                        break;
                set = fdt->open_fds[j++];
                while (set) {
                        if (set & 1) {
                                struct file * file = xchg(&fdt->fd[i], NULL);
                                if (file) {
                                        filp_close(file, files);
                                        cond_resched();
                                }
                        }
                        i++;
                        set >>= 1;
                }
        }

        return fdt;
}

void put_files_struct(struct files_struct *files)
{
        if (atomic_dec_and_test(&files->count)) {
                struct fdtable *fdt = close_files(files);

                /* free the arrays if they are not embedded */
                if (fdt != &files->fdtab)
                        __free_fdtable(fdt);
                kmem_cache_free(files_cachep, files);
        }
}

void exit_files(struct task_struct *tsk)
{
        struct files_struct * files = tsk->files;

        if (files) {
                task_lock(tsk);
                tsk->files = NULL;
                task_unlock(tsk);
                put_files_struct(files);
        }
}

struct files_struct init_files = {
        .count                = ATOMIC_INIT(1),
        .fdt                = &init_files.fdtab,
        .fdtab                = {
                .max_fds        = NR_OPEN_DEFAULT,
                .fd                = &init_files.fd_array[0],
                .close_on_exec        = init_files.close_on_exec_init,
                .open_fds        = init_files.open_fds_init,
                .full_fds_bits        = init_files.full_fds_bits_init,
        },
        .file_lock        = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
        .resize_wait        = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
};

static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
{
        unsigned int maxfd = fdt->max_fds;
        unsigned int maxbit = maxfd / BITS_PER_LONG;
        unsigned int bitbit = start / BITS_PER_LONG;

        bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
        if (bitbit > maxfd)
                return maxfd;
        if (bitbit > start)
                start = bitbit;
        return find_next_zero_bit(fdt->open_fds, maxfd, start);
}

/*
 * allocate a file descriptor, mark it busy.
 */
static int alloc_fd(unsigned start, unsigned end, unsigned flags)
{
        struct files_struct *files = current->files;
        unsigned int fd;
        int error;
        struct fdtable *fdt;

        spin_lock(&files->file_lock);
repeat:
        fdt = files_fdtable(files);
        fd = start;
        if (fd < files->next_fd)
                fd = files->next_fd;

        if (fd < fdt->max_fds)
                fd = find_next_fd(fdt, fd);

        /*
         * N.B. For clone tasks sharing a files structure, this test
         * will limit the total number of files that can be opened.
         */
        error = -EMFILE;
        if (fd >= end)
                goto out;

        error = expand_files(files, fd);
        if (error < 0)
                goto out;

        /*
         * If we needed to expand the fs array we
         * might have blocked - try again.
         */
        if (error)
                goto repeat;

        if (start <= files->next_fd)
                files->next_fd = fd + 1;

        __set_open_fd(fd, fdt);
        if (flags & O_CLOEXEC)
                __set_close_on_exec(fd, fdt);
        else
                __clear_close_on_exec(fd, fdt);
        error = fd;
#if 1
        /* Sanity check */
        if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
                printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
                rcu_assign_pointer(fdt->fd[fd], NULL);
        }
#endif

out:
        spin_unlock(&files->file_lock);
        return error;
}

int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
{
        return alloc_fd(0, nofile, flags);
}

int get_unused_fd_flags(unsigned flags)
{
        return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}
EXPORT_SYMBOL(get_unused_fd_flags);

static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
        struct fdtable *fdt = files_fdtable(files);
        __clear_open_fd(fd, fdt);
        if (fd < files->next_fd)
                files->next_fd = fd;
}

void put_unused_fd(unsigned int fd)
{
        struct files_struct *files = current->files;
        spin_lock(&files->file_lock);
        __put_unused_fd(files, fd);
        spin_unlock(&files->file_lock);
}

EXPORT_SYMBOL(put_unused_fd);

/*
 * Install a file pointer in the fd array.
 *
 * The VFS is full of places where we drop the files lock between
 * setting the open_fds bitmap and installing the file in the file
 * array.  At any such point, we are vulnerable to a dup2() race
 * installing a file in the array before us.  We need to detect this and
 * fput() the struct file we are about to overwrite in this case.
 *
 * It should never happen - if we allow dup2() do it, _really_ bad things
 * will follow.
 *
 * This consumes the "file" refcount, so callers should treat it
 * as if they had called fput(file).
 */

void fd_install(unsigned int fd, struct file *file)
{
        struct files_struct *files = current->files;
        struct fdtable *fdt;

        if (WARN_ON_ONCE(unlikely(file->f_mode & FMODE_BACKING)))
                return;

        rcu_read_lock_sched();

        if (unlikely(files->resize_in_progress)) {
                rcu_read_unlock_sched();
                spin_lock(&files->file_lock);
                fdt = files_fdtable(files);
                BUG_ON(fdt->fd[fd] != NULL);
                rcu_assign_pointer(fdt->fd[fd], file);
                spin_unlock(&files->file_lock);
                return;
        }
        /* coupled with smp_wmb() in expand_fdtable() */
        smp_rmb();
        fdt = rcu_dereference_sched(files->fdt);
        BUG_ON(fdt->fd[fd] != NULL);
        rcu_assign_pointer(fdt->fd[fd], file);
        rcu_read_unlock_sched();
}

EXPORT_SYMBOL(fd_install);

/**
 * file_close_fd_locked - return file associated with fd
 * @files: file struct to retrieve file from
 * @fd: file descriptor to retrieve file for
 *
 * Doesn't take a separate reference count.
 *
 * Context: files_lock must be held.
 *
 * Returns: The file associated with @fd (NULL if @fd is not open)
 */
struct file *file_close_fd_locked(struct files_struct *files, unsigned fd)
{
        struct fdtable *fdt = files_fdtable(files);
        struct file *file;

        lockdep_assert_held(&files->file_lock);

        if (fd >= fdt->max_fds)
                return NULL;

        fd = array_index_nospec(fd, fdt->max_fds);
        file = fdt->fd[fd];
        if (file) {
                rcu_assign_pointer(fdt->fd[fd], NULL);
                __put_unused_fd(files, fd);
        }
        return file;
}

int close_fd(unsigned fd)
{
        struct files_struct *files = current->files;
        struct file *file;

        spin_lock(&files->file_lock);
        file = file_close_fd_locked(files, fd);
        spin_unlock(&files->file_lock);
        if (!file)
                return -EBADF;

        return filp_close(file, files);
}
EXPORT_SYMBOL(close_fd); /* for ksys_close() */

/**
 * last_fd - return last valid index into fd table
 * @fdt: File descriptor table.
 *
 * Context: Either rcu read lock or files_lock must be held.
 *
 * Returns: Last valid index into fdtable.
 */
static inline unsigned last_fd(struct fdtable *fdt)
{
        return fdt->max_fds - 1;
}

static inline void __range_cloexec(struct files_struct *cur_fds,
                                   unsigned int fd, unsigned int max_fd)
{
        struct fdtable *fdt;

        /* make sure we're using the correct maximum value */
        spin_lock(&cur_fds->file_lock);
        fdt = files_fdtable(cur_fds);
        max_fd = min(last_fd(fdt), max_fd);
        if (fd <= max_fd)
                bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
        spin_unlock(&cur_fds->file_lock);
}

static inline void __range_close(struct files_struct *files, unsigned int fd,
                                 unsigned int max_fd)
{
        struct file *file;
        unsigned n;

        spin_lock(&files->file_lock);
        n = last_fd(files_fdtable(files));
        max_fd = min(max_fd, n);

        for (; fd <= max_fd; fd++) {
                file = file_close_fd_locked(files, fd);
                if (file) {
                        spin_unlock(&files->file_lock);
                        filp_close(file, files);
                        cond_resched();
                        spin_lock(&files->file_lock);
                } else if (need_resched()) {
                        spin_unlock(&files->file_lock);
                        cond_resched();
                        spin_lock(&files->file_lock);
                }
        }
        spin_unlock(&files->file_lock);
}

/**
 * __close_range() - Close all file descriptors in a given range.
 *
 * @fd:     starting file descriptor to close
 * @max_fd: last file descriptor to close
 * @flags:  CLOSE_RANGE flags.
 *
 * This closes a range of file descriptors. All file descriptors
 * from @fd up to and including @max_fd are closed.
 */
int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
{
        struct task_struct *me = current;
        struct files_struct *cur_fds = me->files, *fds = NULL;

        if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
                return -EINVAL;

        if (fd > max_fd)
                return -EINVAL;

        if (flags & CLOSE_RANGE_UNSHARE) {
                int ret;
                unsigned int max_unshare_fds = NR_OPEN_MAX;

                /*
                 * If the caller requested all fds to be made cloexec we always
                 * copy all of the file descriptors since they still want to
                 * use them.
                 */
                if (!(flags & CLOSE_RANGE_CLOEXEC)) {
                        /*
                         * If the requested range is greater than the current
                         * maximum, we're closing everything so only copy all
                         * file descriptors beneath the lowest file descriptor.
                         */
                        rcu_read_lock();
                        if (max_fd >= last_fd(files_fdtable(cur_fds)))
                                max_unshare_fds = fd;
                        rcu_read_unlock();
                }

                ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
                if (ret)
                        return ret;

                /*
                 * We used to share our file descriptor table, and have now
                 * created a private one, make sure we're using it below.
                 */
                if (fds)
                        swap(cur_fds, fds);
        }

        if (flags & CLOSE_RANGE_CLOEXEC)
                __range_cloexec(cur_fds, fd, max_fd);
        else
                __range_close(cur_fds, fd, max_fd);

        if (fds) {
                /*
                 * We're done closing the files we were supposed to. Time to install
                 * the new file descriptor table and drop the old one.
                 */
                task_lock(me);
                me->files = cur_fds;
                task_unlock(me);
                put_files_struct(fds);
        }

        return 0;
}

/**
 * file_close_fd - return file associated with fd
 * @fd: file descriptor to retrieve file for
 *
 * Doesn't take a separate reference count.
 *
 * Returns: The file associated with @fd (NULL if @fd is not open)
 */
struct file *file_close_fd(unsigned int fd)
{
        struct files_struct *files = current->files;
        struct file *file;

        spin_lock(&files->file_lock);
        file = file_close_fd_locked(files, fd);
        spin_unlock(&files->file_lock);

        return file;
}

void do_close_on_exec(struct files_struct *files)
{
        unsigned i;
        struct fdtable *fdt;

        /* exec unshares first */
        spin_lock(&files->file_lock);
        for (i = 0; ; i++) {
                unsigned long set;
                unsigned fd = i * BITS_PER_LONG;
                fdt = files_fdtable(files);
                if (fd >= fdt->max_fds)
                        break;
                set = fdt->close_on_exec[i];
                if (!set)
                        continue;
                fdt->close_on_exec[i] = 0;
                for ( ; set ; fd++, set >>= 1) {
                        struct file *file;
                        if (!(set & 1))
                                continue;
                        file = fdt->fd[fd];
                        if (!file)
                                continue;
                        rcu_assign_pointer(fdt->fd[fd], NULL);
                        __put_unused_fd(files, fd);
                        spin_unlock(&files->file_lock);
                        filp_close(file, files);
                        cond_resched();
                        spin_lock(&files->file_lock);
                }

        }
        spin_unlock(&files->file_lock);
}

static struct file *__get_file_rcu(struct file __rcu **f)
{
        struct file __rcu *file;
        struct file __rcu *file_reloaded;
        struct file __rcu *file_reloaded_cmp;

        file = rcu_dereference_raw(*f);
        if (!file)
                return NULL;

        if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
                return ERR_PTR(-EAGAIN);

        file_reloaded = rcu_dereference_raw(*f);

        /*
         * Ensure that all accesses have a dependency on the load from
         * rcu_dereference_raw() above so we get correct ordering
         * between reuse/allocation and the pointer check below.
         */
        file_reloaded_cmp = file_reloaded;
        OPTIMIZER_HIDE_VAR(file_reloaded_cmp);

        /*
         * atomic_long_inc_not_zero() above provided a full memory
         * barrier when we acquired a reference.
         *
         * This is paired with the write barrier from assigning to the
         * __rcu protected file pointer so that if that pointer still
         * matches the current file, we know we have successfully
         * acquired a reference to the right file.
         *
         * If the pointers don't match the file has been reallocated by
         * SLAB_TYPESAFE_BY_RCU.
         */
        if (file == file_reloaded_cmp)
                return file_reloaded;

        fput(file);
        return ERR_PTR(-EAGAIN);
}

/**
 * get_file_rcu - try go get a reference to a file under rcu
 * @f: the file to get a reference on
 *
 * This function tries to get a reference on @f carefully verifying that
 * @f hasn't been reused.
 *
 * This function should rarely have to be used and only by users who
 * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
 *
 * Return: Returns @f with the reference count increased or NULL.
 */
struct file *get_file_rcu(struct file __rcu **f)
{
        for (;;) {
                struct file __rcu *file;

                file = __get_file_rcu(f);
                if (unlikely(!file))
                        return NULL;

                if (unlikely(IS_ERR(file)))
                        continue;

                return file;
        }
}
EXPORT_SYMBOL_GPL(get_file_rcu);

/**
 * get_file_active - try go get a reference to a file
 * @f: the file to get a reference on
 *
 * In contast to get_file_rcu() the pointer itself isn't part of the
 * reference counting.
 *
 * This function should rarely have to be used and only by users who
 * understand the implications of SLAB_TYPESAFE_BY_RCU. Try to avoid it.
 *
 * Return: Returns @f with the reference count increased or NULL.
 */
struct file *get_file_active(struct file **f)
{
        struct file __rcu *file;

        rcu_read_lock();
        file = __get_file_rcu(f);
        rcu_read_unlock();
        if (IS_ERR(file))
                file = NULL;
        return file;
}
EXPORT_SYMBOL_GPL(get_file_active);

static inline struct file *__fget_files_rcu(struct files_struct *files,
       unsigned int fd, fmode_t mask)
{
        for (;;) {
                struct file *file;
                struct fdtable *fdt = rcu_dereference_raw(files->fdt);
                struct file __rcu **fdentry;
                unsigned long nospec_mask;

                /* Mask is a 0 for invalid fd's, ~0 for valid ones */
                nospec_mask = array_index_mask_nospec(fd, fdt->max_fds);

                /*
                 * fdentry points to the 'fd' offset, or fdt->fd[0].
                 * Loading from fdt->fd[0] is always safe, because the
                 * array always exists.
                 */
                fdentry = fdt->fd + (fd & nospec_mask);

                /* Do the load, then mask any invalid result */
                file = rcu_dereference_raw(*fdentry);
                file = (void *)(nospec_mask & (unsigned long)file);
                if (unlikely(!file))
                        return NULL;

                /*
                 * Ok, we have a file pointer that was valid at
                 * some point, but it might have become stale since.
                 *
                 * We need to confirm it by incrementing the refcount
                 * and then check the lookup again.
                 *
                 * atomic_long_inc_not_zero() gives us a full memory
                 * barrier. We only really need an 'acquire' one to
                 * protect the loads below, but we don't have that.
                 */
                if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
                        continue;

                /*
                 * Such a race can take two forms:
                 *
                 *  (a) the file ref already went down to zero and the
                 *      file hasn't been reused yet or the file count
                 *      isn't zero but the file has already been reused.
                 *
                 *  (b) the file table entry has changed under us.
                 *       Note that we don't need to re-check the 'fdt->fd'
                 *       pointer having changed, because it always goes
                 *       hand-in-hand with 'fdt'.
                 *
                 * If so, we need to put our ref and try again.
                 */
                if (unlikely(file != rcu_dereference_raw(*fdentry)) ||
                    unlikely(rcu_dereference_raw(files->fdt) != fdt)) {
                        fput(file);
                        continue;
                }

                /*
                 * This isn't the file we're looking for or we're not
                 * allowed to get a reference to it.
                 */
                if (unlikely(file->f_mode & mask)) {
                        fput(file);
                        return NULL;
                }

                /*
                 * Ok, we have a ref to the file, and checked that it
                 * still exists.
                 */
                return file;
        }
}

static struct file *__fget_files(struct files_struct *files, unsigned int fd,
                                 fmode_t mask)
{
        struct file *file;

        rcu_read_lock();
        file = __fget_files_rcu(files, fd, mask);
        rcu_read_unlock();

        return file;
}

static inline struct file *__fget(unsigned int fd, fmode_t mask)
{
        return __fget_files(current->files, fd, mask);
}

struct file *fget(unsigned int fd)
{
        return __fget(fd, FMODE_PATH);
}
EXPORT_SYMBOL(fget);

struct file *fget_raw(unsigned int fd)
{
        return __fget(fd, 0);
}
EXPORT_SYMBOL(fget_raw);

struct file *fget_task(struct task_struct *task, unsigned int fd)
{
        struct file *file = NULL;

        task_lock(task);
        if (task->files)
                file = __fget_files(task->files, fd, 0);
        task_unlock(task);

        return file;
}

struct file *lookup_fdget_rcu(unsigned int fd)
{
        return __fget_files_rcu(current->files, fd, 0);

}
EXPORT_SYMBOL_GPL(lookup_fdget_rcu);

struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd)
{
        /* Must be called with rcu_read_lock held */
        struct files_struct *files;
        struct file *file = NULL;

        task_lock(task);
        files = task->files;
        if (files)
                file = __fget_files_rcu(files, fd, 0);
        task_unlock(task);

        return file;
}

struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *ret_fd)
{
        /* Must be called with rcu_read_lock held */
        struct files_struct *files;
        unsigned int fd = *ret_fd;
        struct file *file = NULL;

        task_lock(task);
        files = task->files;
        if (files) {
                for (; fd < files_fdtable(files)->max_fds; fd++) {
                        file = __fget_files_rcu(files, fd, 0);
                        if (file)
                                break;
                }
        }
        task_unlock(task);
        *ret_fd = fd;
        return file;
}
EXPORT_SYMBOL(task_lookup_next_fdget_rcu);

/*
 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
 *
 * You can use this instead of fget if you satisfy all of the following
 * conditions:
 * 1) You must call fput_light before exiting the syscall and returning control
 *    to userspace (i.e. you cannot remember the returned struct file * after
 *    returning to userspace).
 * 2) You must not call filp_close on the returned struct file * in between
 *    calls to fget_light and fput_light.
 * 3) You must not clone the current task in between the calls to fget_light
 *    and fput_light.
 *
 * The fput_needed flag returned by fget_light should be passed to the
 * corresponding fput_light.
 */
static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
        struct files_struct *files = current->files;
        struct file *file;

        /*
         * If another thread is concurrently calling close_fd() followed
         * by put_files_struct(), we must not observe the old table
         * entry combined with the new refcount - otherwise we could
         * return a file that is concurrently being freed.
         *
         * atomic_read_acquire() pairs with atomic_dec_and_test() in
         * put_files_struct().
         */
        if (likely(atomic_read_acquire(&files->count) == 1)) {
                file = files_lookup_fd_raw(files, fd);
                if (!file || unlikely(file->f_mode & mask))
                        return 0;
                return (unsigned long)file;
        } else {
                file = __fget_files(files, fd, mask);
                if (!file)
                        return 0;
                return FDPUT_FPUT | (unsigned long)file;
        }
}
unsigned long __fdget(unsigned int fd)
{
        return __fget_light(fd, FMODE_PATH);
}
EXPORT_SYMBOL(__fdget);

unsigned long __fdget_raw(unsigned int fd)
{
        return __fget_light(fd, 0);
}

/*
 * Try to avoid f_pos locking. We only need it if the
 * file is marked for FMODE_ATOMIC_POS, and it can be
 * accessed multiple ways.
 *
 * Always do it for directories, because pidfd_getfd()
 * can make a file accessible even if it otherwise would
 * not be, and for directories this is a correctness
 * issue, not a "POSIX requirement".
 */
static inline bool file_needs_f_pos_lock(struct file *file)
{
        return (file->f_mode & FMODE_ATOMIC_POS) &&
                (file_count(file) > 1 || file->f_op->iterate_shared);
}

unsigned long __fdget_pos(unsigned int fd)
{
        unsigned long v = __fdget(fd);
        struct file *file = (struct file *)(v & ~3);

        if (file && file_needs_f_pos_lock(file)) {
                v |= FDPUT_POS_UNLOCK;
                mutex_lock(&file->f_pos_lock);
        }
        return v;
}

void __f_unlock_pos(struct file *f)
{
        mutex_unlock(&f->f_pos_lock);
}

/*
 * We only lock f_pos if we have threads or if the file might be
 * shared with another process. In both cases we'll have an elevated
 * file count (done either by fdget() or by fork()).
 */

void set_close_on_exec(unsigned int fd, int flag)
{
        struct files_struct *files = current->files;
        struct fdtable *fdt;
        spin_lock(&files->file_lock);
        fdt = files_fdtable(files);
        if (flag)
                __set_close_on_exec(fd, fdt);
        else
                __clear_close_on_exec(fd, fdt);
        spin_unlock(&files->file_lock);
}

bool get_close_on_exec(unsigned int fd)
{
        struct files_struct *files = current->files;
        struct fdtable *fdt;
        bool res;
        rcu_read_lock();
        fdt = files_fdtable(files);
        res = close_on_exec(fd, fdt);
        rcu_read_unlock();
        return res;
}

static int do_dup2(struct files_struct *files,
        struct file *file, unsigned fd, unsigned flags)
__releases(&files->file_lock)
{
        struct file *tofree;
        struct fdtable *fdt;

        /*
         * We need to detect attempts to do dup2() over allocated but still
         * not finished descriptor.  NB: OpenBSD avoids that at the price of
         * extra work in their equivalent of fget() - they insert struct
         * file immediately after grabbing descriptor, mark it larval if
         * more work (e.g. actual opening) is needed and make sure that
         * fget() treats larval files as absent.  Potentially interesting,
         * but while extra work in fget() is trivial, locking implications
         * and amount of surgery on open()-related paths in VFS are not.
         * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
         * deadlocks in rather amusing ways, AFAICS.  All of that is out of
         * scope of POSIX or SUS, since neither considers shared descriptor
         * tables and this condition does not arise without those.
         */
        fdt = files_fdtable(files);
        tofree = fdt->fd[fd];
        if (!tofree && fd_is_open(fd, fdt))
                goto Ebusy;
        get_file(file);
        rcu_assign_pointer(fdt->fd[fd], file);
        __set_open_fd(fd, fdt);
        if (flags & O_CLOEXEC)
                __set_close_on_exec(fd, fdt);
        else
                __clear_close_on_exec(fd, fdt);
        spin_unlock(&files->file_lock);

        if (tofree)
                filp_close(tofree, files);

        return fd;

Ebusy:
        spin_unlock(&files->file_lock);
        return -EBUSY;
}

int replace_fd(unsigned fd, struct file *file, unsigned flags)
{
        int err;
        struct files_struct *files = current->files;

        if (!file)
                return close_fd(fd);

        if (fd >= rlimit(RLIMIT_NOFILE))
                return -EBADF;

        spin_lock(&files->file_lock);
        err = expand_files(files, fd);
        if (unlikely(err < 0))
                goto out_unlock;
        return do_dup2(files, file, fd, flags);

out_unlock:
        spin_unlock(&files->file_lock);
        return err;
}

/**
 * receive_fd() - Install received file into file descriptor table
 * @file: struct file that was received from another process
 * @ufd: __user pointer to write new fd number to
 * @o_flags: the O_* flags to apply to the new fd entry
 *
 * Installs a received file into the file descriptor table, with appropriate
 * checks and count updates. Optionally writes the fd number to userspace, if
 * @ufd is non-NULL.
 *
 * This helper handles its own reference counting of the incoming
 * struct file.
 *
 * Returns newly install fd or -ve on error.
 */
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
{
        int new_fd;
        int error;

        error = security_file_receive(file);
        if (error)
                return error;

        new_fd = get_unused_fd_flags(o_flags);
        if (new_fd < 0)
                return new_fd;

        if (ufd) {
                error = put_user(new_fd, ufd);
                if (error) {
                        put_unused_fd(new_fd);
                        return error;
                }
        }

        fd_install(new_fd, get_file(file));
        __receive_sock(file);
        return new_fd;
}
EXPORT_SYMBOL_GPL(receive_fd);

int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
{
        int error;

        error = security_file_receive(file);
        if (error)
                return error;
        error = replace_fd(new_fd, file, o_flags);
        if (error)
                return error;
        __receive_sock(file);
        return new_fd;
}

static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
        int err = -EBADF;
        struct file *file;
        struct files_struct *files = current->files;

        if ((flags & ~O_CLOEXEC) != 0)
                return -EINVAL;

        if (unlikely(oldfd == newfd))
                return -EINVAL;

        if (newfd >= rlimit(RLIMIT_NOFILE))
                return -EBADF;

        spin_lock(&files->file_lock);
        err = expand_files(files, newfd);
        file = files_lookup_fd_locked(files, oldfd);
        if (unlikely(!file))
                goto Ebadf;
        if (unlikely(err < 0)) {
                if (err == -EMFILE)
                        goto Ebadf;
                goto out_unlock;
        }
        return do_dup2(files, file, newfd, flags);

Ebadf:
        err = -EBADF;
out_unlock:
        spin_unlock(&files->file_lock);
        return err;
}

SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
{
        return ksys_dup3(oldfd, newfd, flags);
}

SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
{
        if (unlikely(newfd == oldfd)) { /* corner case */
                struct files_struct *files = current->files;
                struct file *f;
                int retval = oldfd;

                rcu_read_lock();
                f = __fget_files_rcu(files, oldfd, 0);
                if (!f)
                        retval = -EBADF;
                rcu_read_unlock();
                if (f)
                        fput(f);
                return retval;
        }
        return ksys_dup3(oldfd, newfd, 0);
}

SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
        int ret = -EBADF;
        struct file *file = fget_raw(fildes);

        if (file) {
                ret = get_unused_fd_flags(0);
                if (ret >= 0)
                        fd_install(ret, file);
                else
                        fput(file);
        }
        return ret;
}

int f_dupfd(unsigned int from, struct file *file, unsigned flags)
{
        unsigned long nofile = rlimit(RLIMIT_NOFILE);
        int err;
        if (from >= nofile)
                return -EINVAL;
        err = alloc_fd(from, nofile, flags);
        if (err >= 0) {
                get_file(file);
                fd_install(err, file);
        }
        return err;
}

int iterate_fd(struct files_struct *files, unsigned n,
                int (*f)(const void *, struct file *, unsigned),
                const void *p)
{
        struct fdtable *fdt;
        int res = 0;
        if (!files)
                return 0;
        spin_lock(&files->file_lock);
        for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
                struct file *file;
                file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
                if (!file)
                        continue;
                res = f(p, file, n);
                if (res)
                        break;
        }
        spin_unlock(&files->file_lock);
        return res;
}
EXPORT_SYMBOL(iterate_fd);

























  192 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TIMERQUEUE_H
#define _LINUX_TIMERQUEUE_H

#include <linux/rbtree.h>
#include <linux/timerqueue_types.h>

extern bool timerqueue_add(struct timerqueue_head *head,
                           struct timerqueue_node *node);
extern bool timerqueue_del(struct timerqueue_head *head,
                           struct timerqueue_node *node);
extern struct timerqueue_node *timerqueue_iterate_next(
                                                struct timerqueue_node *node);

/**
 * timerqueue_getnext - Returns the timer with the earliest expiration time
 *
 * @head: head of timerqueue
 *
 * Returns a pointer to the timer node that has the earliest expiration time.
 */
static inline
struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
{
        struct rb_node *leftmost = rb_first_cached(&head->rb_root);

        return rb_entry_safe(leftmost, struct timerqueue_node, node);
}

static inline void timerqueue_init(struct timerqueue_node *node)
{
        RB_CLEAR_NODE(&node->node);
}

static inline bool timerqueue_node_queued(struct timerqueue_node *node)
{
        return !RB_EMPTY_NODE(&node->node);
}

static inline bool timerqueue_node_expires(struct timerqueue_node *node)
{
        return node->expires;
}

static inline void timerqueue_init_head(struct timerqueue_head *head)
{
        head->rb_root = RB_ROOT_CACHED;
}
#endif /* _LINUX_TIMERQUEUE_H */





































































   13 

   13 
   13 
   13 

   13 





























































































   13 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2023 Isovalent */
#ifndef __NET_TCX_H
#define __NET_TCX_H

#include <linux/bpf.h>
#include <linux/bpf_mprog.h>

#include <net/sch_generic.h>

struct mini_Qdisc;

struct tcx_entry {
        struct mini_Qdisc __rcu *miniq;
        struct bpf_mprog_bundle bundle;
        bool miniq_active;
        struct rcu_head rcu;
};

struct tcx_link {
        struct bpf_link link;
        struct net_device *dev;
        u32 location;
};

static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
{
#ifdef CONFIG_NET_XGRESS
        skb->tc_at_ingress = ingress;
#endif
}

#ifdef CONFIG_NET_XGRESS
static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
{
        struct bpf_mprog_bundle *bundle = entry->parent;

        return container_of(bundle, struct tcx_entry, bundle);
}

static inline struct tcx_link *tcx_link(const struct bpf_link *link)
{
        return container_of(link, struct tcx_link, link);
}

void tcx_inc(void);
void tcx_dec(void);

static inline void tcx_entry_sync(void)
{
        /* bpf_mprog_entry got a/b swapped, therefore ensure that
         * there are no inflight users on the old one anymore.
         */
        synchronize_rcu();
}

static inline void
tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
                 bool ingress)
{
        ASSERT_RTNL();
        if (ingress)
                rcu_assign_pointer(dev->tcx_ingress, entry);
        else
                rcu_assign_pointer(dev->tcx_egress, entry);
}

static inline struct bpf_mprog_entry *
tcx_entry_fetch(struct net_device *dev, bool ingress)
{
        ASSERT_RTNL();
        if (ingress)
                return rcu_dereference_rtnl(dev->tcx_ingress);
        else
                return rcu_dereference_rtnl(dev->tcx_egress);
}

static inline struct bpf_mprog_entry *tcx_entry_create(void)
{
        struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);

        if (tcx) {
                bpf_mprog_bundle_init(&tcx->bundle);
                return &tcx->bundle.a;
        }
        return NULL;
}

static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
{
        kfree_rcu(tcx_entry(entry), rcu);
}

static inline struct bpf_mprog_entry *
tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
{
        struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);

        *created = false;
        if (!entry) {
                entry = tcx_entry_create();
                if (!entry)
                        return NULL;
                *created = true;
        }
        return entry;
}

static inline void tcx_skeys_inc(bool ingress)
{
        tcx_inc();
        if (ingress)
                net_inc_ingress_queue();
        else
                net_inc_egress_queue();
}

static inline void tcx_skeys_dec(bool ingress)
{
        if (ingress)
                net_dec_ingress_queue();
        else
                net_dec_egress_queue();
        tcx_dec();
}

static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
                                        const bool active)
{
        ASSERT_RTNL();
        tcx_entry(entry)->miniq_active = active;
}

static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
{
        ASSERT_RTNL();
        return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
}

static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
                                                   int code)
{
        switch (code) {
        case TCX_PASS:
                skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
                fallthrough;
        case TCX_DROP:
        case TCX_REDIRECT:
                return code;
        case TCX_NEXT:
        default:
                return TCX_NEXT;
        }
}
#endif /* CONFIG_NET_XGRESS */

#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
void tcx_uninstall(struct net_device *dev, bool ingress);

int tcx_prog_query(const union bpf_attr *attr,
                   union bpf_attr __user *uattr);

static inline void dev_tcx_uninstall(struct net_device *dev)
{
        ASSERT_RTNL();
        tcx_uninstall(dev, true);
        tcx_uninstall(dev, false);
}
#else
static inline int tcx_prog_attach(const union bpf_attr *attr,
                                  struct bpf_prog *prog)
{
        return -EINVAL;
}

static inline int tcx_link_attach(const union bpf_attr *attr,
                                  struct bpf_prog *prog)
{
        return -EINVAL;
}

static inline int tcx_prog_detach(const union bpf_attr *attr,
                                  struct bpf_prog *prog)
{
        return -EINVAL;
}

static inline int tcx_prog_query(const union bpf_attr *attr,
                                 union bpf_attr __user *uattr)
{
        return -EINVAL;
}

static inline void dev_tcx_uninstall(struct net_device *dev)
{
}
#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
#endif /* __NET_TCX_H */
























































































    1 

    1 



    1 


















































































































    1 
    1 



    1 


    1 




    1 












































































































































































































































































    1 














































































































































































































































































































































































    1 




    1 


    1 



    1 



    1 

















































































































    1 




    1 

    1 


    1 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
/*
 * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org
 *
 * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California.
 * All Rights Reserved.
 *
 * Author Rickard E. (Rik) Faith <faith@valinux.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/slab.h>
#include <linux/srcu.h>

#include <drm/drm_accel.h>
#include <drm/drm_cache.h>
#include <drm/drm_client.h>
#include <drm/drm_color_mgmt.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <drm/drm_managed.h>
#include <drm/drm_mode_object.h>
#include <drm/drm_print.h>
#include <drm/drm_privacy_screen_machine.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl");
MODULE_DESCRIPTION("DRM shared core routines");
MODULE_LICENSE("GPL and additional rights");

static DEFINE_SPINLOCK(drm_minor_lock);
static struct idr drm_minors_idr;

/*
 * If the drm core fails to init for whatever reason,
 * we should prevent any drivers from registering with it.
 * It's best to check this at drm_dev_init(), as some drivers
 * prefer to embed struct drm_device into their own device
 * structure and call drm_dev_init() themselves.
 */
static bool drm_core_init_complete;

static struct dentry *drm_debugfs_root;

DEFINE_STATIC_SRCU(drm_unplug_srcu);

/*
 * DRM Minors
 * A DRM device can provide several char-dev interfaces on the DRM-Major. Each
 * of them is represented by a drm_minor object. Depending on the capabilities
 * of the device-driver, different interfaces are registered.
 *
 * Minors can be accessed via dev->$minor_name. This pointer is either
 * NULL or a valid drm_minor pointer and stays valid as long as the device is
 * valid. This means, DRM minors have the same life-time as the underlying
 * device. However, this doesn't mean that the minor is active. Minors are
 * registered and unregistered dynamically according to device-state.
 */

static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
                                             enum drm_minor_type type)
{
        switch (type) {
        case DRM_MINOR_PRIMARY:
                return &dev->primary;
        case DRM_MINOR_RENDER:
                return &dev->render;
        case DRM_MINOR_ACCEL:
                return &dev->accel;
        default:
                BUG();
        }
}

static void drm_minor_alloc_release(struct drm_device *dev, void *data)
{
        struct drm_minor *minor = data;
        unsigned long flags;

        WARN_ON(dev != minor->dev);

        put_device(minor->kdev);

        if (minor->type == DRM_MINOR_ACCEL) {
                accel_minor_remove(minor->index);
        } else {
                spin_lock_irqsave(&drm_minor_lock, flags);
                idr_remove(&drm_minors_idr, minor->index);
                spin_unlock_irqrestore(&drm_minor_lock, flags);
        }
}

static int drm_minor_alloc(struct drm_device *dev, enum drm_minor_type type)
{
        struct drm_minor *minor;
        unsigned long flags;
        int r;

        minor = drmm_kzalloc(dev, sizeof(*minor), GFP_KERNEL);
        if (!minor)
                return -ENOMEM;

        minor->type = type;
        minor->dev = dev;

        idr_preload(GFP_KERNEL);
        if (type == DRM_MINOR_ACCEL) {
                r = accel_minor_alloc();
        } else {
                spin_lock_irqsave(&drm_minor_lock, flags);
                r = idr_alloc(&drm_minors_idr,
                        NULL,
                        64 * type,
                        64 * (type + 1),
                        GFP_NOWAIT);
                spin_unlock_irqrestore(&drm_minor_lock, flags);
        }
        idr_preload_end();

        if (r < 0)
                return r;

        minor->index = r;

        r = drmm_add_action_or_reset(dev, drm_minor_alloc_release, minor);
        if (r)
                return r;

        minor->kdev = drm_sysfs_minor_alloc(minor);
        if (IS_ERR(minor->kdev))
                return PTR_ERR(minor->kdev);

        *drm_minor_get_slot(dev, type) = minor;
        return 0;
}

static int drm_minor_register(struct drm_device *dev, enum drm_minor_type type)
{
        struct drm_minor *minor;
        unsigned long flags;
        int ret;

        DRM_DEBUG("\n");

        minor = *drm_minor_get_slot(dev, type);
        if (!minor)
                return 0;

        if (minor->type != DRM_MINOR_ACCEL) {
                ret = drm_debugfs_register(minor, minor->index,
                                           drm_debugfs_root);
                if (ret) {
                        DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
                        goto err_debugfs;
                }
        }

        ret = device_add(minor->kdev);
        if (ret)
                goto err_debugfs;

        /* replace NULL with @minor so lookups will succeed from now on */
        if (minor->type == DRM_MINOR_ACCEL) {
                accel_minor_replace(minor, minor->index);
        } else {
                spin_lock_irqsave(&drm_minor_lock, flags);
                idr_replace(&drm_minors_idr, minor, minor->index);
                spin_unlock_irqrestore(&drm_minor_lock, flags);
        }

        DRM_DEBUG("new minor registered %d\n", minor->index);
        return 0;

err_debugfs:
        drm_debugfs_unregister(minor);
        return ret;
}

static void drm_minor_unregister(struct drm_device *dev, enum drm_minor_type type)
{
        struct drm_minor *minor;
        unsigned long flags;

        minor = *drm_minor_get_slot(dev, type);
        if (!minor || !device_is_registered(minor->kdev))
                return;

        /* replace @minor with NULL so lookups will fail from now on */
        if (minor->type == DRM_MINOR_ACCEL) {
                accel_minor_replace(NULL, minor->index);
        } else {
                spin_lock_irqsave(&drm_minor_lock, flags);
                idr_replace(&drm_minors_idr, NULL, minor->index);
                spin_unlock_irqrestore(&drm_minor_lock, flags);
        }

        device_del(minor->kdev);
        dev_set_drvdata(minor->kdev, NULL); /* safety belt */
        drm_debugfs_unregister(minor);
}

/*
 * Looks up the given minor-ID and returns the respective DRM-minor object. The
 * refence-count of the underlying device is increased so you must release this
 * object with drm_minor_release().
 *
 * As long as you hold this minor, it is guaranteed that the object and the
 * minor->dev pointer will stay valid! However, the device may get unplugged and
 * unregistered while you hold the minor.
 */
struct drm_minor *drm_minor_acquire(unsigned int minor_id)
{
        struct drm_minor *minor;
        unsigned long flags;

        spin_lock_irqsave(&drm_minor_lock, flags);
        minor = idr_find(&drm_minors_idr, minor_id);
        if (minor)
                drm_dev_get(minor->dev);
        spin_unlock_irqrestore(&drm_minor_lock, flags);

        if (!minor) {
                return ERR_PTR(-ENODEV);
        } else if (drm_dev_is_unplugged(minor->dev)) {
                drm_dev_put(minor->dev);
                return ERR_PTR(-ENODEV);
        }

        return minor;
}

void drm_minor_release(struct drm_minor *minor)
{
        drm_dev_put(minor->dev);
}

/**
 * DOC: driver instance overview
 *
 * A device instance for a drm driver is represented by &struct drm_device. This
 * is allocated and initialized with devm_drm_dev_alloc(), usually from
 * bus-specific ->probe() callbacks implemented by the driver. The driver then
 * needs to initialize all the various subsystems for the drm device like memory
 * management, vblank handling, modesetting support and initial output
 * configuration plus obviously initialize all the corresponding hardware bits.
 * Finally when everything is up and running and ready for userspace the device
 * instance can be published using drm_dev_register().
 *
 * There is also deprecated support for initializing device instances using
 * bus-specific helpers and the &drm_driver.load callback. But due to
 * backwards-compatibility needs the device instance have to be published too
 * early, which requires unpretty global locking to make safe and is therefore
 * only support for existing drivers not yet converted to the new scheme.
 *
 * When cleaning up a device instance everything needs to be done in reverse:
 * First unpublish the device instance with drm_dev_unregister(). Then clean up
 * any other resources allocated at device initialization and drop the driver's
 * reference to &drm_device using drm_dev_put().
 *
 * Note that any allocation or resource which is visible to userspace must be
 * released only when the final drm_dev_put() is called, and not when the
 * driver is unbound from the underlying physical struct &device. Best to use
 * &drm_device managed resources with drmm_add_action(), drmm_kmalloc() and
 * related functions.
 *
 * devres managed resources like devm_kmalloc() can only be used for resources
 * directly related to the underlying hardware device, and only used in code
 * paths fully protected by drm_dev_enter() and drm_dev_exit().
 *
 * Display driver example
 * ~~~~~~~~~~~~~~~~~~~~~~
 *
 * The following example shows a typical structure of a DRM display driver.
 * The example focus on the probe() function and the other functions that is
 * almost always present and serves as a demonstration of devm_drm_dev_alloc().
 *
 * .. code-block:: c
 *
 *        struct driver_device {
 *                struct drm_device drm;
 *                void *userspace_facing;
 *                struct clk *pclk;
 *        };
 *
 *        static const struct drm_driver driver_drm_driver = {
 *                [...]
 *        };
 *
 *        static int driver_probe(struct platform_device *pdev)
 *        {
 *                struct driver_device *priv;
 *                struct drm_device *drm;
 *                int ret;
 *
 *                priv = devm_drm_dev_alloc(&pdev->dev, &driver_drm_driver,
 *                                          struct driver_device, drm);
 *                if (IS_ERR(priv))
 *                        return PTR_ERR(priv);
 *                drm = &priv->drm;
 *
 *                ret = drmm_mode_config_init(drm);
 *                if (ret)
 *                        return ret;
 *
 *                priv->userspace_facing = drmm_kzalloc(..., GFP_KERNEL);
 *                if (!priv->userspace_facing)
 *                        return -ENOMEM;
 *
 *                priv->pclk = devm_clk_get(dev, "PCLK");
 *                if (IS_ERR(priv->pclk))
 *                        return PTR_ERR(priv->pclk);
 *
 *                // Further setup, display pipeline etc
 *
 *                platform_set_drvdata(pdev, drm);
 *
 *                drm_mode_config_reset(drm);
 *
 *                ret = drm_dev_register(drm);
 *                if (ret)
 *                        return ret;
 *
 *                drm_fbdev_generic_setup(drm, 32);
 *
 *                return 0;
 *        }
 *
 *        // This function is called before the devm_ resources are released
 *        static int driver_remove(struct platform_device *pdev)
 *        {
 *                struct drm_device *drm = platform_get_drvdata(pdev);
 *
 *                drm_dev_unregister(drm);
 *                drm_atomic_helper_shutdown(drm)
 *
 *                return 0;
 *        }
 *
 *        // This function is called on kernel restart and shutdown
 *        static void driver_shutdown(struct platform_device *pdev)
 *        {
 *                drm_atomic_helper_shutdown(platform_get_drvdata(pdev));
 *        }
 *
 *        static int __maybe_unused driver_pm_suspend(struct device *dev)
 *        {
 *                return drm_mode_config_helper_suspend(dev_get_drvdata(dev));
 *        }
 *
 *        static int __maybe_unused driver_pm_resume(struct device *dev)
 *        {
 *                drm_mode_config_helper_resume(dev_get_drvdata(dev));
 *
 *                return 0;
 *        }
 *
 *        static const struct dev_pm_ops driver_pm_ops = {
 *                SET_SYSTEM_SLEEP_PM_OPS(driver_pm_suspend, driver_pm_resume)
 *        };
 *
 *        static struct platform_driver driver_driver = {
 *                .driver = {
 *                        [...]
 *                        .pm = &driver_pm_ops,
 *                },
 *                .probe = driver_probe,
 *                .remove = driver_remove,
 *                .shutdown = driver_shutdown,
 *        };
 *        module_platform_driver(driver_driver);
 *
 * Drivers that want to support device unplugging (USB, DT overlay unload) should
 * use drm_dev_unplug() instead of drm_dev_unregister(). The driver must protect
 * regions that is accessing device resources to prevent use after they're
 * released. This is done using drm_dev_enter() and drm_dev_exit(). There is one
 * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before
 * drm_atomic_helper_shutdown() is called. This means that if the disable code
 * paths are protected, they will not run on regular driver module unload,
 * possibly leaving the hardware enabled.
 */

/**
 * drm_put_dev - Unregister and release a DRM device
 * @dev: DRM device
 *
 * Called at module unload time or when a PCI device is unplugged.
 *
 * Cleans up all DRM device, calling drm_lastclose().
 *
 * Note: Use of this function is deprecated. It will eventually go away
 * completely.  Please use drm_dev_unregister() and drm_dev_put() explicitly
 * instead to make sure that the device isn't userspace accessible any more
 * while teardown is in progress, ensuring that userspace can't access an
 * inconsistent state.
 */
void drm_put_dev(struct drm_device *dev)
{
        DRM_DEBUG("\n");

        if (!dev) {
                DRM_ERROR("cleanup called no dev\n");
                return;
        }

        drm_dev_unregister(dev);
        drm_dev_put(dev);
}
EXPORT_SYMBOL(drm_put_dev);

/**
 * drm_dev_enter - Enter device critical section
 * @dev: DRM device
 * @idx: Pointer to index that will be passed to the matching drm_dev_exit()
 *
 * This function marks and protects the beginning of a section that should not
 * be entered after the device has been unplugged. The section end is marked
 * with drm_dev_exit(). Calls to this function can be nested.
 *
 * Returns:
 * True if it is OK to enter the section, false otherwise.
 */
bool drm_dev_enter(struct drm_device *dev, int *idx)
{
        *idx = srcu_read_lock(&drm_unplug_srcu);

        if (dev->unplugged) {
                srcu_read_unlock(&drm_unplug_srcu, *idx);
                return false;
        }

        return true;
}
EXPORT_SYMBOL(drm_dev_enter);

/**
 * drm_dev_exit - Exit device critical section
 * @idx: index returned from drm_dev_enter()
 *
 * This function marks the end of a section that should not be entered after
 * the device has been unplugged.
 */
void drm_dev_exit(int idx)
{
        srcu_read_unlock(&drm_unplug_srcu, idx);
}
EXPORT_SYMBOL(drm_dev_exit);

/**
 * drm_dev_unplug - unplug a DRM device
 * @dev: DRM device
 *
 * This unplugs a hotpluggable DRM device, which makes it inaccessible to
 * userspace operations. Entry-points can use drm_dev_enter() and
 * drm_dev_exit() to protect device resources in a race free manner. This
 * essentially unregisters the device like drm_dev_unregister(), but can be
 * called while there are still open users of @dev.
 */
void drm_dev_unplug(struct drm_device *dev)
{
        /*
         * After synchronizing any critical read section is guaranteed to see
         * the new value of ->unplugged, and any critical section which might
         * still have seen the old value of ->unplugged is guaranteed to have
         * finished.
         */
        dev->unplugged = true;
        synchronize_srcu(&drm_unplug_srcu);

        drm_dev_unregister(dev);

        /* Clear all CPU mappings pointing to this device */
        unmap_mapping_range(dev->anon_inode->i_mapping, 0, 0, 1);
}
EXPORT_SYMBOL(drm_dev_unplug);

/*
 * DRM internal mount
 * We want to be able to allocate our own "struct address_space" to control
 * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow
 * stand-alone address_space objects, so we need an underlying inode. As there
 * is no way to allocate an independent inode easily, we need a fake internal
 * VFS mount-point.
 *
 * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free()
 * frees it again. You are allowed to use iget() and iput() to get references to
 * the inode. But each drm_fs_inode_new() call must be paired with exactly one
 * drm_fs_inode_free() call (which does not have to be the last iput()).
 * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it
 * between multiple inode-users. You could, technically, call
 * iget() + drm_fs_inode_free() directly after alloc and sometime later do an
 * iput(), but this way you'd end up with a new vfsmount for each inode.
 */

static int drm_fs_cnt;
static struct vfsmount *drm_fs_mnt;

static int drm_fs_init_fs_context(struct fs_context *fc)
{
        return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM;
}

static struct file_system_type drm_fs_type = {
        .name                = "drm",
        .owner                = THIS_MODULE,
        .init_fs_context = drm_fs_init_fs_context,
        .kill_sb        = kill_anon_super,
};

static struct inode *drm_fs_inode_new(void)
{
        struct inode *inode;
        int r;

        r = simple_pin_fs(&drm_fs_type, &drm_fs_mnt, &drm_fs_cnt);
        if (r < 0) {
                DRM_ERROR("Cannot mount pseudo fs: %d\n", r);
                return ERR_PTR(r);
        }

        inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
        if (IS_ERR(inode))
                simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);

        return inode;
}

static void drm_fs_inode_free(struct inode *inode)
{
        if (inode) {
                iput(inode);
                simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);
        }
}

/**
 * DOC: component helper usage recommendations
 *
 * DRM drivers that drive hardware where a logical device consists of a pile of
 * independent hardware blocks are recommended to use the :ref:`component helper
 * library<component>`. For consistency and better options for code reuse the
 * following guidelines apply:
 *
 *  - The entire device initialization procedure should be run from the
 *    &component_master_ops.master_bind callback, starting with
 *    devm_drm_dev_alloc(), then binding all components with
 *    component_bind_all() and finishing with drm_dev_register().
 *
 *  - The opaque pointer passed to all components through component_bind_all()
 *    should point at &struct drm_device of the device instance, not some driver
 *    specific private structure.
 *
 *  - The component helper fills the niche where further standardization of
 *    interfaces is not practical. When there already is, or will be, a
 *    standardized interface like &drm_bridge or &drm_panel, providing its own
 *    functions to find such components at driver load time, like
 *    drm_of_find_panel_or_bridge(), then the component helper should not be
 *    used.
 */

static void drm_dev_init_release(struct drm_device *dev, void *res)
{
        drm_fs_inode_free(dev->anon_inode);

        put_device(dev->dev);
        /* Prevent use-after-free in drm_managed_release when debugging is
         * enabled. Slightly awkward, but can't really be helped. */
        dev->dev = NULL;
        mutex_destroy(&dev->master_mutex);
        mutex_destroy(&dev->clientlist_mutex);
        mutex_destroy(&dev->filelist_mutex);
        mutex_destroy(&dev->struct_mutex);
}

static int drm_dev_init(struct drm_device *dev,
                        const struct drm_driver *driver,
                        struct device *parent)
{
        struct inode *inode;
        int ret;

        if (!drm_core_init_complete) {
                DRM_ERROR("DRM core is not initialized\n");
                return -ENODEV;
        }

        if (WARN_ON(!parent))
                return -EINVAL;

        kref_init(&dev->ref);
        dev->dev = get_device(parent);
        dev->driver = driver;

        INIT_LIST_HEAD(&dev->managed.resources);
        spin_lock_init(&dev->managed.lock);

        /* no per-device feature limits by default */
        dev->driver_features = ~0u;

        if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL) &&
                                (drm_core_check_feature(dev, DRIVER_RENDER) ||
                                drm_core_check_feature(dev, DRIVER_MODESET))) {
                DRM_ERROR("DRM driver can't be both a compute acceleration and graphics driver\n");
                return -EINVAL;
        }

        INIT_LIST_HEAD(&dev->filelist);
        INIT_LIST_HEAD(&dev->filelist_internal);
        INIT_LIST_HEAD(&dev->clientlist);
        INIT_LIST_HEAD(&dev->vblank_event_list);

        spin_lock_init(&dev->event_lock);
        mutex_init(&dev->struct_mutex);
        mutex_init(&dev->filelist_mutex);
        mutex_init(&dev->clientlist_mutex);
        mutex_init(&dev->master_mutex);

        ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
        if (ret)
                return ret;

        inode = drm_fs_inode_new();
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
                goto err;
        }

        dev->anon_inode = inode;

        if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) {
                ret = drm_minor_alloc(dev, DRM_MINOR_ACCEL);
                if (ret)
                        goto err;
        } else {
                if (drm_core_check_feature(dev, DRIVER_RENDER)) {
                        ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
                        if (ret)
                                goto err;
                }

                ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY);
                if (ret)
                        goto err;
        }

        if (drm_core_check_feature(dev, DRIVER_GEM)) {
                ret = drm_gem_init(dev);
                if (ret) {
                        DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n");
                        goto err;
                }
        }

        dev->unique = drmm_kstrdup(dev, dev_name(parent), GFP_KERNEL);
        if (!dev->unique) {
                ret = -ENOMEM;
                goto err;
        }

        if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL))
                accel_debugfs_init(dev);
        else
                drm_debugfs_dev_init(dev, drm_debugfs_root);

        return 0;

err:
        drm_managed_release(dev);

        return ret;
}

static void devm_drm_dev_init_release(void *data)
{
        drm_dev_put(data);
}

static int devm_drm_dev_init(struct device *parent,
                             struct drm_device *dev,
                             const struct drm_driver *driver)
{
        int ret;

        ret = drm_dev_init(dev, driver, parent);
        if (ret)
                return ret;

        return devm_add_action_or_reset(parent,
                                        devm_drm_dev_init_release, dev);
}

void *__devm_drm_dev_alloc(struct device *parent,
                           const struct drm_driver *driver,
                           size_t size, size_t offset)
{
        void *container;
        struct drm_device *drm;
        int ret;

        container = kzalloc(size, GFP_KERNEL);
        if (!container)
                return ERR_PTR(-ENOMEM);

        drm = container + offset;
        ret = devm_drm_dev_init(parent, drm, driver);
        if (ret) {
                kfree(container);
                return ERR_PTR(ret);
        }
        drmm_add_final_kfree(drm, container);

        return container;
}
EXPORT_SYMBOL(__devm_drm_dev_alloc);

/**
 * drm_dev_alloc - Allocate new DRM device
 * @driver: DRM driver to allocate device for
 * @parent: Parent device object
 *
 * This is the deprecated version of devm_drm_dev_alloc(), which does not support
 * subclassing through embedding the struct &drm_device in a driver private
 * structure, and which does not support automatic cleanup through devres.
 *
 * RETURNS:
 * Pointer to new DRM device, or ERR_PTR on failure.
 */
struct drm_device *drm_dev_alloc(const struct drm_driver *driver,
                                 struct device *parent)
{
        struct drm_device *dev;
        int ret;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return ERR_PTR(-ENOMEM);

        ret = drm_dev_init(dev, driver, parent);
        if (ret) {
                kfree(dev);
                return ERR_PTR(ret);
        }

        drmm_add_final_kfree(dev, dev);

        return dev;
}
EXPORT_SYMBOL(drm_dev_alloc);

static void drm_dev_release(struct kref *ref)
{
        struct drm_device *dev = container_of(ref, struct drm_device, ref);

        /* Just in case register/unregister was never called */
        drm_debugfs_dev_fini(dev);

        if (dev->driver->release)
                dev->driver->release(dev);

        drm_managed_release(dev);

        kfree(dev->managed.final_kfree);
}

/**
 * drm_dev_get - Take reference of a DRM device
 * @dev: device to take reference of or NULL
 *
 * This increases the ref-count of @dev by one. You *must* already own a
 * reference when calling this. Use drm_dev_put() to drop this reference
 * again.
 *
 * This function never fails. However, this function does not provide *any*
 * guarantee whether the device is alive or running. It only provides a
 * reference to the object and the memory associated with it.
 */
void drm_dev_get(struct drm_device *dev)
{
        if (dev)
                kref_get(&dev->ref);
}
EXPORT_SYMBOL(drm_dev_get);

/**
 * drm_dev_put - Drop reference of a DRM device
 * @dev: device to drop reference of or NULL
 *
 * This decreases the ref-count of @dev by one. The device is destroyed if the
 * ref-count drops to zero.
 */
void drm_dev_put(struct drm_device *dev)
{
        if (dev)
                kref_put(&dev->ref, drm_dev_release);
}
EXPORT_SYMBOL(drm_dev_put);

static int create_compat_control_link(struct drm_device *dev)
{
        struct drm_minor *minor;
        char *name;
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return 0;

        minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY);
        if (!minor)
                return 0;

        /*
         * Some existing userspace out there uses the existing of the controlD*
         * sysfs files to figure out whether it's a modeset driver. It only does
         * readdir, hence a symlink is sufficient (and the least confusing
         * option). Otherwise controlD* is entirely unused.
         *
         * Old controlD chardev have been allocated in the range
         * 64-127.
         */
        name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
        if (!name)
                return -ENOMEM;

        ret = sysfs_create_link(minor->kdev->kobj.parent,
                                &minor->kdev->kobj,
                                name);

        kfree(name);

        return ret;
}

static void remove_compat_control_link(struct drm_device *dev)
{
        struct drm_minor *minor;
        char *name;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY);
        if (!minor)
                return;

        name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
        if (!name)
                return;

        sysfs_remove_link(minor->kdev->kobj.parent, name);

        kfree(name);
}

/**
 * drm_dev_register - Register DRM device
 * @dev: Device to register
 * @flags: Flags passed to the driver's .load() function
 *
 * Register the DRM device @dev with the system, advertise device to user-space
 * and start normal device operation. @dev must be initialized via drm_dev_init()
 * previously.
 *
 * Never call this twice on any device!
 *
 * NOTE: To ensure backward compatibility with existing drivers method this
 * function calls the &drm_driver.load method after registering the device
 * nodes, creating race conditions. Usage of the &drm_driver.load methods is
 * therefore deprecated, drivers must perform all initialization before calling
 * drm_dev_register().
 *
 * RETURNS:
 * 0 on success, negative error code on failure.
 */
int drm_dev_register(struct drm_device *dev, unsigned long flags)
{
        const struct drm_driver *driver = dev->driver;
        int ret;

        if (!driver->load)
                drm_mode_config_validate(dev);

        WARN_ON(!dev->managed.final_kfree);

        if (drm_dev_needs_global_mutex(dev))
                mutex_lock(&drm_global_mutex);

        if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL))
                accel_debugfs_register(dev);
        else
                drm_debugfs_dev_register(dev);

        ret = drm_minor_register(dev, DRM_MINOR_RENDER);
        if (ret)
                goto err_minors;

        ret = drm_minor_register(dev, DRM_MINOR_PRIMARY);
        if (ret)
                goto err_minors;

        ret = drm_minor_register(dev, DRM_MINOR_ACCEL);
        if (ret)
                goto err_minors;

        ret = create_compat_control_link(dev);
        if (ret)
                goto err_minors;

        dev->registered = true;

        if (driver->load) {
                ret = driver->load(dev, flags);
                if (ret)
                        goto err_minors;
        }

        if (drm_core_check_feature(dev, DRIVER_MODESET)) {
                ret = drm_modeset_register_all(dev);
                if (ret)
                        goto err_unload;
        }

        DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
                 driver->name, driver->major, driver->minor,
                 driver->patchlevel, driver->date,
                 dev->dev ? dev_name(dev->dev) : "virtual device",
                 dev->primary ? dev->primary->index : dev->accel->index);

        goto out_unlock;

err_unload:
        if (dev->driver->unload)
                dev->driver->unload(dev);
err_minors:
        remove_compat_control_link(dev);
        drm_minor_unregister(dev, DRM_MINOR_ACCEL);
        drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
        drm_minor_unregister(dev, DRM_MINOR_RENDER);
out_unlock:
        if (drm_dev_needs_global_mutex(dev))
                mutex_unlock(&drm_global_mutex);
        return ret;
}
EXPORT_SYMBOL(drm_dev_register);

/**
 * drm_dev_unregister - Unregister DRM device
 * @dev: Device to unregister
 *
 * Unregister the DRM device from the system. This does the reverse of
 * drm_dev_register() but does not deallocate the device. The caller must call
 * drm_dev_put() to drop their final reference, unless it is managed with devres
 * (as devices allocated with devm_drm_dev_alloc() are), in which case there is
 * already an unwind action registered.
 *
 * A special form of unregistering for hotpluggable devices is drm_dev_unplug(),
 * which can be called while there are still open users of @dev.
 *
 * This should be called first in the device teardown code to make sure
 * userspace can't access the device instance any more.
 */
void drm_dev_unregister(struct drm_device *dev)
{
        dev->registered = false;

        drm_client_dev_unregister(dev);

        if (drm_core_check_feature(dev, DRIVER_MODESET))
                drm_modeset_unregister_all(dev);

        if (dev->driver->unload)
                dev->driver->unload(dev);

        remove_compat_control_link(dev);
        drm_minor_unregister(dev, DRM_MINOR_ACCEL);
        drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
        drm_minor_unregister(dev, DRM_MINOR_RENDER);
        drm_debugfs_dev_fini(dev);
}
EXPORT_SYMBOL(drm_dev_unregister);

/*
 * DRM Core
 * The DRM core module initializes all global DRM objects and makes them
 * available to drivers. Once setup, drivers can probe their respective
 * devices.
 * Currently, core management includes:
 *  - The "DRM-Global" key/value database
 *  - Global ID management for connectors
 *  - DRM major number allocation
 *  - DRM minor management
 *  - DRM sysfs class
 *  - DRM debugfs root
 *
 * Furthermore, the DRM core provides dynamic char-dev lookups. For each
 * interface registered on a DRM device, you can request minor numbers from DRM
 * core. DRM core takes care of major-number management and char-dev
 * registration. A stub ->open() callback forwards any open() requests to the
 * registered minor.
 */

static int drm_stub_open(struct inode *inode, struct file *filp)
{
        const struct file_operations *new_fops;
        struct drm_minor *minor;
        int err;

        DRM_DEBUG("\n");

        minor = drm_minor_acquire(iminor(inode));
        if (IS_ERR(minor))
                return PTR_ERR(minor);

        new_fops = fops_get(minor->dev->driver->fops);
        if (!new_fops) {
                err = -ENODEV;
                goto out;
        }

        replace_fops(filp, new_fops);
        if (filp->f_op->open)
                err = filp->f_op->open(inode, filp);
        else
                err = 0;

out:
        drm_minor_release(minor);

        return err;
}

static const struct file_operations drm_stub_fops = {
        .owner = THIS_MODULE,
        .open = drm_stub_open,
        .llseek = noop_llseek,
};

static void drm_core_exit(void)
{
        drm_privacy_screen_lookup_exit();
        accel_core_exit();
        unregister_chrdev(DRM_MAJOR, "drm");
        debugfs_remove(drm_debugfs_root);
        drm_sysfs_destroy();
        idr_destroy(&drm_minors_idr);
        drm_connector_ida_destroy();
}

static int __init drm_core_init(void)
{
        int ret;

        drm_connector_ida_init();
        idr_init(&drm_minors_idr);
        drm_memcpy_init_early();

        ret = drm_sysfs_init();
        if (ret < 0) {
                DRM_ERROR("Cannot create DRM class: %d\n", ret);
                goto error;
        }

        drm_debugfs_root = debugfs_create_dir("dri", NULL);

        ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops);
        if (ret < 0)
                goto error;

        ret = accel_core_init();
        if (ret < 0)
                goto error;

        drm_privacy_screen_lookup_init();

        drm_core_init_complete = true;

        DRM_DEBUG("Initialized\n");
        return 0;

error:
        drm_core_exit();
        return ret;
}

module_init(drm_core_init);
module_exit(drm_core_exit);
































































































































































































































































































































    3 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
// SPDX-License-Identifier: GPL-2.0-only
/*
 * HID driver for the apple ir device
 *
 * Original driver written by James McKenzie
 * Ported to recent 2.6 kernel versions by Greg Kroah-Hartman <gregkh@suse.de>
 * Updated to support newer remotes by Bastien Nocera <hadess@hadess.net>
 * Ported to HID subsystem by Benjamin Tissoires <benjamin.tissoires@gmail.com>
 *
 * Copyright (C) 2006 James McKenzie
 * Copyright (C) 2008 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (C) 2008 Novell Inc.
 * Copyright (C) 2010, 2012 Bastien Nocera <hadess@hadess.net>
 * Copyright (C) 2013 Benjamin Tissoires <benjamin.tissoires@gmail.com>
 * Copyright (C) 2013 Red Hat Inc. All Rights Reserved
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include "hid-ids.h"

MODULE_AUTHOR("James McKenzie");
MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@redhat.com>");
MODULE_DESCRIPTION("HID Apple IR remote controls");
MODULE_LICENSE("GPL");

#define KEY_MASK                0x0F
#define TWO_PACKETS_MASK        0x40

/*
 * James McKenzie has two devices both of which report the following
 * 25 87 ee 83 0a        +
 * 25 87 ee 83 0c        -
 * 25 87 ee 83 09        <<
 * 25 87 ee 83 06        >>
 * 25 87 ee 83 05        >"
 * 25 87 ee 83 03        menu
 * 26 00 00 00 00        for key repeat
 */

/*
 * Thomas Glanzmann reports the following responses
 * 25 87 ee ca 0b        +
 * 25 87 ee ca 0d        -
 * 25 87 ee ca 08        <<
 * 25 87 ee ca 07        >>
 * 25 87 ee ca 04        >"
 * 25 87 ee ca 02        menu
 * 26 00 00 00 00       for key repeat
 *
 * He also observes the following event sometimes
 * sent after a key is release, which I interpret
 * as a flat battery message
 * 25 87 e0 ca 06        flat battery
 */

/*
 * Alexandre Karpenko reports the following responses for Device ID 0x8242
 * 25 87 ee 47 0b        +
 * 25 87 ee 47 0d        -
 * 25 87 ee 47 08        <<
 * 25 87 ee 47 07        >>
 * 25 87 ee 47 04        >"
 * 25 87 ee 47 02        menu
 * 26 87 ee 47 **        for key repeat (** is the code of the key being held)
 */

/*
 * Bastien Nocera's remote
 * 25 87 ee 91 5f        followed by
 * 25 87 ee 91 05        gives you >"
 *
 * 25 87 ee 91 5c        followed by
 * 25 87 ee 91 05        gives you the middle button
 */

/*
 * Fabien Andre's remote
 * 25 87 ee a3 5e        followed by
 * 25 87 ee a3 04        gives you >"
 *
 * 25 87 ee a3 5d        followed by
 * 25 87 ee a3 04        gives you the middle button
 */

static const unsigned short appleir_key_table[] = {
        KEY_RESERVED,
        KEY_MENU,
        KEY_PLAYPAUSE,
        KEY_FORWARD,
        KEY_BACK,
        KEY_VOLUMEUP,
        KEY_VOLUMEDOWN,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_RESERVED,
        KEY_ENTER,
        KEY_PLAYPAUSE,
        KEY_RESERVED,
};

struct appleir {
        struct input_dev *input_dev;
        struct hid_device *hid;
        unsigned short keymap[ARRAY_SIZE(appleir_key_table)];
        struct timer_list key_up_timer;        /* timer for key up */
        spinlock_t lock;                /* protects .current_key */
        int current_key;                /* the currently pressed key */
        int prev_key_idx;                /* key index in a 2 packets message */
};

static int get_key(int data)
{
        /*
         * The key is coded accross bits 2..9:
         *
         * 0x00 or 0x01 (        )        key:  0                -> KEY_RESERVED
         * 0x02 or 0x03 (  menu  )        key:  1                -> KEY_MENU
         * 0x04 or 0x05 (   >"   )        key:  2                -> KEY_PLAYPAUSE
         * 0x06 or 0x07 (   >>   )        key:  3                -> KEY_FORWARD
         * 0x08 or 0x09 (   <<   )        key:  4                -> KEY_BACK
         * 0x0a or 0x0b (    +   )        key:  5                -> KEY_VOLUMEUP
         * 0x0c or 0x0d (    -   )        key:  6                -> KEY_VOLUMEDOWN
         * 0x0e or 0x0f (        )        key:  7                -> KEY_RESERVED
         * 0x50 or 0x51 (        )        key:  8                -> KEY_RESERVED
         * 0x52 or 0x53 (        )        key:  9                -> KEY_RESERVED
         * 0x54 or 0x55 (        )        key: 10                -> KEY_RESERVED
         * 0x56 or 0x57 (        )        key: 11                -> KEY_RESERVED
         * 0x58 or 0x59 (        )        key: 12                -> KEY_RESERVED
         * 0x5a or 0x5b (        )        key: 13                -> KEY_RESERVED
         * 0x5c or 0x5d ( middle )        key: 14                -> KEY_ENTER
         * 0x5e or 0x5f (   >"   )        key: 15                -> KEY_PLAYPAUSE
         *
         * Packets starting with 0x5 are part of a two-packets message,
         * we notify the caller by sending a negative value.
         */
        int key = (data >> 1) & KEY_MASK;

        if ((data & TWO_PACKETS_MASK))
                /* Part of a 2 packets-command */
                key = -key;

        return key;
}

static void key_up(struct hid_device *hid, struct appleir *appleir, int key)
{
        input_report_key(appleir->input_dev, key, 0);
        input_sync(appleir->input_dev);
}

static void key_down(struct hid_device *hid, struct appleir *appleir, int key)
{
        input_report_key(appleir->input_dev, key, 1);
        input_sync(appleir->input_dev);
}

static void battery_flat(struct appleir *appleir)
{
        dev_err(&appleir->input_dev->dev, "possible flat battery?\n");
}

static void key_up_tick(struct timer_list *t)
{
        struct appleir *appleir = from_timer(appleir, t, key_up_timer);
        struct hid_device *hid = appleir->hid;
        unsigned long flags;

        spin_lock_irqsave(&appleir->lock, flags);
        if (appleir->current_key) {
                key_up(hid, appleir, appleir->current_key);
                appleir->current_key = 0;
        }
        spin_unlock_irqrestore(&appleir->lock, flags);
}

static int appleir_raw_event(struct hid_device *hid, struct hid_report *report,
         u8 *data, int len)
{
        struct appleir *appleir = hid_get_drvdata(hid);
        static const u8 keydown[] = { 0x25, 0x87, 0xee };
        static const u8 keyrepeat[] = { 0x26, };
        static const u8 flatbattery[] = { 0x25, 0x87, 0xe0 };
        unsigned long flags;

        if (len != 5)
                goto out;

        if (!memcmp(data, keydown, sizeof(keydown))) {
                int index;

                spin_lock_irqsave(&appleir->lock, flags);
                /*
                 * If we already have a key down, take it up before marking
                 * this one down
                 */
                if (appleir->current_key)
                        key_up(hid, appleir, appleir->current_key);

                /* Handle dual packet commands */
                if (appleir->prev_key_idx > 0)
                        index = appleir->prev_key_idx;
                else
                        index = get_key(data[4]);

                if (index >= 0) {
                        appleir->current_key = appleir->keymap[index];

                        key_down(hid, appleir, appleir->current_key);
                        /*
                         * Remote doesn't do key up, either pull them up, in
                         * the test above, or here set a timer which pulls
                         * them up after 1/8 s
                         */
                        mod_timer(&appleir->key_up_timer, jiffies + HZ / 8);
                        appleir->prev_key_idx = 0;
                } else
                        /* Remember key for next packet */
                        appleir->prev_key_idx = -index;
                spin_unlock_irqrestore(&appleir->lock, flags);
                goto out;
        }

        appleir->prev_key_idx = 0;

        if (!memcmp(data, keyrepeat, sizeof(keyrepeat))) {
                key_down(hid, appleir, appleir->current_key);
                /*
                 * Remote doesn't do key up, either pull them up, in the test
                 * above, or here set a timer which pulls them up after 1/8 s
                 */
                mod_timer(&appleir->key_up_timer, jiffies + HZ / 8);
                goto out;
        }

        if (!memcmp(data, flatbattery, sizeof(flatbattery))) {
                battery_flat(appleir);
                /* Fall through */
        }

out:
        /* let hidraw and hiddev handle the report */
        return 0;
}

static int appleir_input_configured(struct hid_device *hid,
                struct hid_input *hidinput)
{
        struct input_dev *input_dev = hidinput->input;
        struct appleir *appleir = hid_get_drvdata(hid);
        int i;

        appleir->input_dev = input_dev;

        input_dev->keycode = appleir->keymap;
        input_dev->keycodesize = sizeof(unsigned short);
        input_dev->keycodemax = ARRAY_SIZE(appleir->keymap);

        input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);

        memcpy(appleir->keymap, appleir_key_table, sizeof(appleir->keymap));
        for (i = 0; i < ARRAY_SIZE(appleir_key_table); i++)
                set_bit(appleir->keymap[i], input_dev->keybit);
        clear_bit(KEY_RESERVED, input_dev->keybit);

        return 0;
}

static int appleir_input_mapping(struct hid_device *hid,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        return -1;
}

static int appleir_probe(struct hid_device *hid, const struct hid_device_id *id)
{
        int ret;
        struct appleir *appleir;

        appleir = devm_kzalloc(&hid->dev, sizeof(struct appleir), GFP_KERNEL);
        if (!appleir)
                return -ENOMEM;

        appleir->hid = hid;

        /* force input as some remotes bypass the input registration */
        hid->quirks |= HID_QUIRK_HIDINPUT_FORCE;

        spin_lock_init(&appleir->lock);
        timer_setup(&appleir->key_up_timer, key_up_tick, 0);

        hid_set_drvdata(hid, appleir);

        ret = hid_parse(hid);
        if (ret) {
                hid_err(hid, "parse failed\n");
                goto fail;
        }

        ret = hid_hw_start(hid, HID_CONNECT_DEFAULT | HID_CONNECT_HIDDEV_FORCE);
        if (ret) {
                hid_err(hid, "hw start failed\n");
                goto fail;
        }

        return 0;
fail:
        devm_kfree(&hid->dev, appleir);
        return ret;
}

static void appleir_remove(struct hid_device *hid)
{
        struct appleir *appleir = hid_get_drvdata(hid);
        hid_hw_stop(hid);
        del_timer_sync(&appleir->key_up_timer);
}

static const struct hid_device_id appleir_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
        { }
};
MODULE_DEVICE_TABLE(hid, appleir_devices);

static struct hid_driver appleir_driver = {
        .name = "appleir",
        .id_table = appleir_devices,
        .raw_event = appleir_raw_event,
        .input_configured = appleir_input_configured,
        .probe = appleir_probe,
        .remove = appleir_remove,
        .input_mapping = appleir_input_mapping,
};
module_hid_driver(appleir_driver);


















































  125 






















   64 


   64 


   65 
















































  173 

































   72 
   72 




















   72 


   73 


   73 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/lockref.h>

#if USE_CMPXCHG_LOCKREF

/*
 * Note that the "cmpxchg()" reloads the "old" value for the
 * failure case.
 */
#define CMPXCHG_LOOP(CODE, SUCCESS) do {                                        \
        int retry = 100;                                                        \
        struct lockref old;                                                        \
        BUILD_BUG_ON(sizeof(old) != 8);                                                \
        old.lock_count = READ_ONCE(lockref->lock_count);                        \
        while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {          \
                struct lockref new = old;                                        \
                CODE                                                                \
                if (likely(try_cmpxchg64_relaxed(&lockref->lock_count,                \
                                                 &old.lock_count,                \
                                                 new.lock_count))) {                \
                        SUCCESS;                                                \
                }                                                                \
                if (!--retry)                                                        \
                        break;                                                        \
        }                                                                        \
} while (0)

#else

#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)

#endif

/**
 * lockref_get - Increments reference count unconditionally
 * @lockref: pointer to lockref structure
 *
 * This operation is only valid if you already hold a reference
 * to the object, so you know the count cannot be zero.
 */
void lockref_get(struct lockref *lockref)
{
        CMPXCHG_LOOP(
                new.count++;
        ,
                return;
        );

        spin_lock(&lockref->lock);
        lockref->count++;
        spin_unlock(&lockref->lock);
}
EXPORT_SYMBOL(lockref_get);

/**
 * lockref_get_not_zero - Increments count unless the count is 0 or dead
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count was zero
 */
int lockref_get_not_zero(struct lockref *lockref)
{
        int retval;

        CMPXCHG_LOOP(
                new.count++;
                if (old.count <= 0)
                        return 0;
        ,
                return 1;
        );

        spin_lock(&lockref->lock);
        retval = 0;
        if (lockref->count > 0) {
                lockref->count++;
                retval = 1;
        }
        spin_unlock(&lockref->lock);
        return retval;
}
EXPORT_SYMBOL(lockref_get_not_zero);

/**
 * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count would become zero
 */
int lockref_put_not_zero(struct lockref *lockref)
{
        int retval;

        CMPXCHG_LOOP(
                new.count--;
                if (old.count <= 1)
                        return 0;
        ,
                return 1;
        );

        spin_lock(&lockref->lock);
        retval = 0;
        if (lockref->count > 1) {
                lockref->count--;
                retval = 1;
        }
        spin_unlock(&lockref->lock);
        return retval;
}
EXPORT_SYMBOL(lockref_put_not_zero);

/**
 * lockref_put_return - Decrement reference count if possible
 * @lockref: pointer to lockref structure
 *
 * Decrement the reference count and return the new value.
 * If the lockref was dead or locked, return an error.
 */
int lockref_put_return(struct lockref *lockref)
{
        CMPXCHG_LOOP(
                new.count--;
                if (old.count <= 0)
                        return -1;
        ,
                return new.count;
        );
        return -1;
}
EXPORT_SYMBOL(lockref_put_return);

/**
 * lockref_put_or_lock - decrements count unless count <= 1 before decrement
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
 */
int lockref_put_or_lock(struct lockref *lockref)
{
        CMPXCHG_LOOP(
                new.count--;
                if (old.count <= 1)
                        break;
        ,
                return 1;
        );

        spin_lock(&lockref->lock);
        if (lockref->count <= 1)
                return 0;
        lockref->count--;
        spin_unlock(&lockref->lock);
        return 1;
}
EXPORT_SYMBOL(lockref_put_or_lock);

/**
 * lockref_mark_dead - mark lockref dead
 * @lockref: pointer to lockref structure
 */
void lockref_mark_dead(struct lockref *lockref)
{
        assert_spin_locked(&lockref->lock);
        lockref->count = -128;
}
EXPORT_SYMBOL(lockref_mark_dead);

/**
 * lockref_get_not_dead - Increments count unless the ref is dead
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if lockref was dead
 */
int lockref_get_not_dead(struct lockref *lockref)
{
        int retval;

        CMPXCHG_LOOP(
                new.count++;
                if (old.count < 0)
                        return 0;
        ,
                return 1;
        );

        spin_lock(&lockref->lock);
        retval = 0;
        if (lockref->count >= 0) {
                lockref->count++;
                retval = 1;
        }
        spin_unlock(&lockref->lock);
        return retval;
}
EXPORT_SYMBOL(lockref_get_not_dead);













































































































































































































































































































































































































































































































































    1 



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2005-2011 Atheros Communications Inc.
 * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include <linux/etherdevice.h>
#include "htt.h"
#include "mac.h"
#include "hif.h"
#include "txrx.h"
#include "debug.h"

static u8 ath10k_htt_tx_txq_calc_size(size_t count)
{
        int exp;
        int factor;

        exp = 0;
        factor = count >> 7;

        while (factor >= 64 && exp < 4) {
                factor >>= 3;
                exp++;
        }

        if (exp == 4)
                return 0xff;

        if (count > 0)
                factor = max(1, factor);

        return SM(exp, HTT_TX_Q_STATE_ENTRY_EXP) |
               SM(factor, HTT_TX_Q_STATE_ENTRY_FACTOR);
}

static void __ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
                                       struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;
        struct ath10k_sta *arsta;
        struct ath10k_vif *arvif = (void *)txq->vif->drv_priv;
        unsigned long byte_cnt;
        int idx;
        u32 bit;
        u16 peer_id;
        u8 tid;
        u8 count;

        lockdep_assert_held(&ar->htt.tx_lock);

        if (!ar->htt.tx_q_state.enabled)
                return;

        if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
                return;

        if (txq->sta) {
                arsta = (void *)txq->sta->drv_priv;
                peer_id = arsta->peer_id;
        } else {
                peer_id = arvif->peer_id;
        }

        tid = txq->tid;
        bit = BIT(peer_id % 32);
        idx = peer_id / 32;

        ieee80211_txq_get_depth(txq, NULL, &byte_cnt);
        count = ath10k_htt_tx_txq_calc_size(byte_cnt);

        if (unlikely(peer_id >= ar->htt.tx_q_state.num_peers) ||
            unlikely(tid >= ar->htt.tx_q_state.num_tids)) {
                ath10k_warn(ar, "refusing to update txq for peer_id %u tid %u due to out of bounds\n",
                            peer_id, tid);
                return;
        }

        ar->htt.tx_q_state.vaddr->count[tid][peer_id] = count;
        ar->htt.tx_q_state.vaddr->map[tid][idx] &= ~bit;
        ar->htt.tx_q_state.vaddr->map[tid][idx] |= count ? bit : 0;

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx txq state update peer_id %u tid %u count %u\n",
                   peer_id, tid, count);
}

static void __ath10k_htt_tx_txq_sync(struct ath10k *ar)
{
        u32 seq;
        size_t size;

        lockdep_assert_held(&ar->htt.tx_lock);

        if (!ar->htt.tx_q_state.enabled)
                return;

        if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
                return;

        seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq);
        seq++;
        ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq);

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx txq state update commit seq %u\n",
                   seq);

        size = sizeof(*ar->htt.tx_q_state.vaddr);
        dma_sync_single_for_device(ar->dev,
                                   ar->htt.tx_q_state.paddr,
                                   size,
                                   DMA_TO_DEVICE);
}

void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
                              struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;

        spin_lock_bh(&ar->htt.tx_lock);
        __ath10k_htt_tx_txq_recalc(hw, txq);
        spin_unlock_bh(&ar->htt.tx_lock);
}

void ath10k_htt_tx_txq_sync(struct ath10k *ar)
{
        spin_lock_bh(&ar->htt.tx_lock);
        __ath10k_htt_tx_txq_sync(ar);
        spin_unlock_bh(&ar->htt.tx_lock);
}

void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
                              struct ieee80211_txq *txq)
{
        struct ath10k *ar = hw->priv;

        spin_lock_bh(&ar->htt.tx_lock);
        __ath10k_htt_tx_txq_recalc(hw, txq);
        __ath10k_htt_tx_txq_sync(ar);
        spin_unlock_bh(&ar->htt.tx_lock);
}

void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt)
{
        lockdep_assert_held(&htt->tx_lock);

        htt->num_pending_tx--;
        if (htt->num_pending_tx == htt->max_num_pending_tx - 1)
                ath10k_mac_tx_unlock(htt->ar, ATH10K_TX_PAUSE_Q_FULL);

        if (htt->num_pending_tx == 0)
                wake_up(&htt->empty_tx_wq);
}

int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt)
{
        lockdep_assert_held(&htt->tx_lock);

        if (htt->num_pending_tx >= htt->max_num_pending_tx)
                return -EBUSY;

        htt->num_pending_tx++;
        if (htt->num_pending_tx == htt->max_num_pending_tx)
                ath10k_mac_tx_lock(htt->ar, ATH10K_TX_PAUSE_Q_FULL);

        return 0;
}

int ath10k_htt_tx_mgmt_inc_pending(struct ath10k_htt *htt, bool is_mgmt,
                                   bool is_presp)
{
        struct ath10k *ar = htt->ar;

        lockdep_assert_held(&htt->tx_lock);

        if (!is_mgmt || !ar->hw_params.max_probe_resp_desc_thres)
                return 0;

        if (is_presp &&
            ar->hw_params.max_probe_resp_desc_thres < htt->num_pending_mgmt_tx)
                return -EBUSY;

        htt->num_pending_mgmt_tx++;

        return 0;
}

void ath10k_htt_tx_mgmt_dec_pending(struct ath10k_htt *htt)
{
        lockdep_assert_held(&htt->tx_lock);

        if (!htt->ar->hw_params.max_probe_resp_desc_thres)
                return;

        htt->num_pending_mgmt_tx--;
}

int ath10k_htt_tx_alloc_msdu_id(struct ath10k_htt *htt, struct sk_buff *skb)
{
        struct ath10k *ar = htt->ar;
        int ret;

        spin_lock_bh(&htt->tx_lock);
        ret = idr_alloc(&htt->pending_tx, skb, 0,
                        htt->max_num_pending_tx, GFP_ATOMIC);
        spin_unlock_bh(&htt->tx_lock);

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx alloc msdu_id %d\n", ret);

        return ret;
}

void ath10k_htt_tx_free_msdu_id(struct ath10k_htt *htt, u16 msdu_id)
{
        struct ath10k *ar = htt->ar;

        lockdep_assert_held(&htt->tx_lock);

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx free msdu_id %u\n", msdu_id);

        idr_remove(&htt->pending_tx, msdu_id);
}

static void ath10k_htt_tx_free_cont_txbuf_32(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        if (!htt->txbuf.vaddr_txbuff_32)
                return;

        size = htt->txbuf.size;
        dma_free_coherent(ar->dev, size, htt->txbuf.vaddr_txbuff_32,
                          htt->txbuf.paddr);
        htt->txbuf.vaddr_txbuff_32 = NULL;
}

static int ath10k_htt_tx_alloc_cont_txbuf_32(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        size = htt->max_num_pending_tx *
                        sizeof(struct ath10k_htt_txbuf_32);

        htt->txbuf.vaddr_txbuff_32 = dma_alloc_coherent(ar->dev, size,
                                                        &htt->txbuf.paddr,
                                                        GFP_KERNEL);
        if (!htt->txbuf.vaddr_txbuff_32)
                return -ENOMEM;

        htt->txbuf.size = size;

        return 0;
}

static void ath10k_htt_tx_free_cont_txbuf_64(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        if (!htt->txbuf.vaddr_txbuff_64)
                return;

        size = htt->txbuf.size;
        dma_free_coherent(ar->dev, size, htt->txbuf.vaddr_txbuff_64,
                          htt->txbuf.paddr);
        htt->txbuf.vaddr_txbuff_64 = NULL;
}

static int ath10k_htt_tx_alloc_cont_txbuf_64(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        size = htt->max_num_pending_tx *
                        sizeof(struct ath10k_htt_txbuf_64);

        htt->txbuf.vaddr_txbuff_64 = dma_alloc_coherent(ar->dev, size,
                                                        &htt->txbuf.paddr,
                                                        GFP_KERNEL);
        if (!htt->txbuf.vaddr_txbuff_64)
                return -ENOMEM;

        htt->txbuf.size = size;

        return 0;
}

static void ath10k_htt_tx_free_cont_frag_desc_32(struct ath10k_htt *htt)
{
        size_t size;

        if (!htt->frag_desc.vaddr_desc_32)
                return;

        size = htt->max_num_pending_tx *
                        sizeof(struct htt_msdu_ext_desc);

        dma_free_coherent(htt->ar->dev,
                          size,
                          htt->frag_desc.vaddr_desc_32,
                          htt->frag_desc.paddr);

        htt->frag_desc.vaddr_desc_32 = NULL;
}

static int ath10k_htt_tx_alloc_cont_frag_desc_32(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        if (!ar->hw_params.continuous_frag_desc)
                return 0;

        size = htt->max_num_pending_tx *
                        sizeof(struct htt_msdu_ext_desc);
        htt->frag_desc.vaddr_desc_32 = dma_alloc_coherent(ar->dev, size,
                                                          &htt->frag_desc.paddr,
                                                          GFP_KERNEL);
        if (!htt->frag_desc.vaddr_desc_32) {
                ath10k_err(ar, "failed to alloc fragment desc memory\n");
                return -ENOMEM;
        }
        htt->frag_desc.size = size;

        return 0;
}

static void ath10k_htt_tx_free_cont_frag_desc_64(struct ath10k_htt *htt)
{
        size_t size;

        if (!htt->frag_desc.vaddr_desc_64)
                return;

        size = htt->max_num_pending_tx *
                        sizeof(struct htt_msdu_ext_desc_64);

        dma_free_coherent(htt->ar->dev,
                          size,
                          htt->frag_desc.vaddr_desc_64,
                          htt->frag_desc.paddr);

        htt->frag_desc.vaddr_desc_64 = NULL;
}

static int ath10k_htt_tx_alloc_cont_frag_desc_64(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        if (!ar->hw_params.continuous_frag_desc)
                return 0;

        size = htt->max_num_pending_tx *
                        sizeof(struct htt_msdu_ext_desc_64);

        htt->frag_desc.vaddr_desc_64 = dma_alloc_coherent(ar->dev, size,
                                                          &htt->frag_desc.paddr,
                                                          GFP_KERNEL);
        if (!htt->frag_desc.vaddr_desc_64) {
                ath10k_err(ar, "failed to alloc fragment desc memory\n");
                return -ENOMEM;
        }
        htt->frag_desc.size = size;

        return 0;
}

static void ath10k_htt_tx_free_txq(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;

        if (!test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                      ar->running_fw->fw_file.fw_features))
                return;

        size = sizeof(*htt->tx_q_state.vaddr);

        dma_unmap_single(ar->dev, htt->tx_q_state.paddr, size, DMA_TO_DEVICE);
        kfree(htt->tx_q_state.vaddr);
}

static int ath10k_htt_tx_alloc_txq(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        size_t size;
        int ret;

        if (!test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                      ar->running_fw->fw_file.fw_features))
                return 0;

        htt->tx_q_state.num_peers = HTT_TX_Q_STATE_NUM_PEERS;
        htt->tx_q_state.num_tids = HTT_TX_Q_STATE_NUM_TIDS;
        htt->tx_q_state.type = HTT_Q_DEPTH_TYPE_BYTES;

        size = sizeof(*htt->tx_q_state.vaddr);
        htt->tx_q_state.vaddr = kzalloc(size, GFP_KERNEL);
        if (!htt->tx_q_state.vaddr)
                return -ENOMEM;

        htt->tx_q_state.paddr = dma_map_single(ar->dev, htt->tx_q_state.vaddr,
                                               size, DMA_TO_DEVICE);
        ret = dma_mapping_error(ar->dev, htt->tx_q_state.paddr);
        if (ret) {
                ath10k_warn(ar, "failed to dma map tx_q_state: %d\n", ret);
                kfree(htt->tx_q_state.vaddr);
                return -EIO;
        }

        return 0;
}

static void ath10k_htt_tx_free_txdone_fifo(struct ath10k_htt *htt)
{
        WARN_ON(!kfifo_is_empty(&htt->txdone_fifo));
        kfifo_free(&htt->txdone_fifo);
}

static int ath10k_htt_tx_alloc_txdone_fifo(struct ath10k_htt *htt)
{
        int ret;
        size_t size;

        size = roundup_pow_of_two(htt->max_num_pending_tx);
        ret = kfifo_alloc(&htt->txdone_fifo, size, GFP_KERNEL);
        return ret;
}

static int ath10k_htt_tx_alloc_buf(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        int ret;

        ret = ath10k_htt_alloc_txbuff(htt);
        if (ret) {
                ath10k_err(ar, "failed to alloc cont tx buffer: %d\n", ret);
                return ret;
        }

        ret = ath10k_htt_alloc_frag_desc(htt);
        if (ret) {
                ath10k_err(ar, "failed to alloc cont frag desc: %d\n", ret);
                goto free_txbuf;
        }

        ret = ath10k_htt_tx_alloc_txq(htt);
        if (ret) {
                ath10k_err(ar, "failed to alloc txq: %d\n", ret);
                goto free_frag_desc;
        }

        ret = ath10k_htt_tx_alloc_txdone_fifo(htt);
        if (ret) {
                ath10k_err(ar, "failed to alloc txdone fifo: %d\n", ret);
                goto free_txq;
        }

        return 0;

free_txq:
        ath10k_htt_tx_free_txq(htt);

free_frag_desc:
        ath10k_htt_free_frag_desc(htt);

free_txbuf:
        ath10k_htt_free_txbuff(htt);

        return ret;
}

int ath10k_htt_tx_start(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        int ret;

        ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx %d\n",
                   htt->max_num_pending_tx);

        spin_lock_init(&htt->tx_lock);
        idr_init(&htt->pending_tx);

        if (htt->tx_mem_allocated)
                return 0;

        if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
                return 0;

        ret = ath10k_htt_tx_alloc_buf(htt);
        if (ret)
                goto free_idr_pending_tx;

        htt->tx_mem_allocated = true;

        return 0;

free_idr_pending_tx:
        idr_destroy(&htt->pending_tx);

        return ret;
}

static int ath10k_htt_tx_clean_up_pending(int msdu_id, void *skb, void *ctx)
{
        struct ath10k *ar = ctx;
        struct ath10k_htt *htt = &ar->htt;
        struct htt_tx_done tx_done = {0};

        ath10k_dbg(ar, ATH10K_DBG_HTT, "force cleanup msdu_id %u\n", msdu_id);

        tx_done.msdu_id = msdu_id;
        tx_done.status = HTT_TX_COMPL_STATE_DISCARD;

        ath10k_txrx_tx_unref(htt, &tx_done);

        return 0;
}

void ath10k_htt_tx_destroy(struct ath10k_htt *htt)
{
        if (!htt->tx_mem_allocated)
                return;

        ath10k_htt_free_txbuff(htt);
        ath10k_htt_tx_free_txq(htt);
        ath10k_htt_free_frag_desc(htt);
        ath10k_htt_tx_free_txdone_fifo(htt);
        htt->tx_mem_allocated = false;
}

static void ath10k_htt_flush_tx_queue(struct ath10k_htt *htt)
{
        ath10k_htc_stop_hl(htt->ar);
        idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
}

void ath10k_htt_tx_stop(struct ath10k_htt *htt)
{
        ath10k_htt_flush_tx_queue(htt);
        idr_destroy(&htt->pending_tx);
}

void ath10k_htt_tx_free(struct ath10k_htt *htt)
{
        ath10k_htt_tx_stop(htt);
        ath10k_htt_tx_destroy(htt);
}

void ath10k_htt_op_ep_tx_credits(struct ath10k *ar)
{
        queue_work(ar->workqueue, &ar->bundle_tx_work);
}

void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
        struct ath10k_htt *htt = &ar->htt;
        struct htt_tx_done tx_done = {0};
        struct htt_cmd_hdr *htt_hdr;
        struct htt_data_tx_desc *desc_hdr = NULL;
        u16 flags1 = 0;
        u8 msg_type = 0;

        if (htt->disable_tx_comp) {
                htt_hdr = (struct htt_cmd_hdr *)skb->data;
                msg_type = htt_hdr->msg_type;

                if (msg_type == HTT_H2T_MSG_TYPE_TX_FRM) {
                        desc_hdr = (struct htt_data_tx_desc *)
                                (skb->data + sizeof(*htt_hdr));
                        flags1 = __le16_to_cpu(desc_hdr->flags1);
                        skb_pull(skb, sizeof(struct htt_cmd_hdr));
                        skb_pull(skb, sizeof(struct htt_data_tx_desc));
                }
        }

        dev_kfree_skb_any(skb);

        if ((!htt->disable_tx_comp) || (msg_type != HTT_H2T_MSG_TYPE_TX_FRM))
                return;

        ath10k_dbg(ar, ATH10K_DBG_HTT,
                   "htt tx complete msdu id:%u ,flags1:%x\n",
                   __le16_to_cpu(desc_hdr->id), flags1);

        if (flags1 & HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE)
                return;

        tx_done.status = HTT_TX_COMPL_STATE_ACK;
        tx_done.msdu_id = __le16_to_cpu(desc_hdr->id);
        ath10k_txrx_tx_unref(&ar->htt, &tx_done);
}

void ath10k_htt_hif_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
        dev_kfree_skb_any(skb);
}
EXPORT_SYMBOL(ath10k_htt_hif_tx_complete);

int ath10k_htt_h2t_ver_req_msg(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        int len = 0;
        int ret;

        len += sizeof(cmd->hdr);
        len += sizeof(cmd->ver_req);

        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_VERSION_REQ;

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

int ath10k_htt_h2t_stats_req(struct ath10k_htt *htt, u32 mask, u32 reset_mask,
                             u64 cookie)
{
        struct ath10k *ar = htt->ar;
        struct htt_stats_req *req;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        int len = 0, ret;

        len += sizeof(cmd->hdr);
        len += sizeof(cmd->stats_req);

        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_STATS_REQ;

        req = &cmd->stats_req;

        memset(req, 0, sizeof(*req));

        /* currently we support only max 24 bit masks so no need to worry
         * about endian support
         */
        memcpy(req->upload_types, &mask, 3);
        memcpy(req->reset_types, &reset_mask, 3);
        req->stat_type = HTT_STATS_REQ_CFG_STAT_TYPE_INVALID;
        req->cookie_lsb = cpu_to_le32(cookie & 0xffffffff);
        req->cookie_msb = cpu_to_le32((cookie & 0xffffffff00000000ULL) >> 32);

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                ath10k_warn(ar, "failed to send htt type stats request: %d",
                            ret);
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_send_frag_desc_bank_cfg_32(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        struct htt_frag_desc_bank_cfg32 *cfg;
        int ret, size;
        u8 info;

        if (!ar->hw_params.continuous_frag_desc)
                return 0;

        if (!htt->frag_desc.paddr) {
                ath10k_warn(ar, "invalid frag desc memory\n");
                return -EINVAL;
        }

        size = sizeof(cmd->hdr) + sizeof(cmd->frag_desc_bank_cfg32);
        skb = ath10k_htc_alloc_skb(ar, size);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, size);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_FRAG_DESC_BANK_CFG;

        info = 0;
        info |= SM(htt->tx_q_state.type,
                   HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_DEPTH_TYPE);

        if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                     ar->running_fw->fw_file.fw_features))
                info |= HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_VALID;

        cfg = &cmd->frag_desc_bank_cfg32;
        cfg->info = info;
        cfg->num_banks = 1;
        cfg->desc_size = sizeof(struct htt_msdu_ext_desc);
        cfg->bank_base_addrs[0] = __cpu_to_le32(htt->frag_desc.paddr);
        cfg->bank_id[0].bank_min_id = 0;
        cfg->bank_id[0].bank_max_id = __cpu_to_le16(htt->max_num_pending_tx -
                                                    1);

        cfg->q_state.paddr = cpu_to_le32(htt->tx_q_state.paddr);
        cfg->q_state.num_peers = cpu_to_le16(htt->tx_q_state.num_peers);
        cfg->q_state.num_tids = cpu_to_le16(htt->tx_q_state.num_tids);
        cfg->q_state.record_size = HTT_TX_Q_STATE_ENTRY_SIZE;
        cfg->q_state.record_multiplier = HTT_TX_Q_STATE_ENTRY_MULTIPLIER;

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt frag desc bank cmd\n");

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                ath10k_warn(ar, "failed to send frag desc bank cfg request: %d\n",
                            ret);
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_send_frag_desc_bank_cfg_64(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        struct htt_frag_desc_bank_cfg64 *cfg;
        int ret, size;
        u8 info;

        if (!ar->hw_params.continuous_frag_desc)
                return 0;

        if (!htt->frag_desc.paddr) {
                ath10k_warn(ar, "invalid frag desc memory\n");
                return -EINVAL;
        }

        size = sizeof(cmd->hdr) + sizeof(cmd->frag_desc_bank_cfg64);
        skb = ath10k_htc_alloc_skb(ar, size);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, size);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_FRAG_DESC_BANK_CFG;

        info = 0;
        info |= SM(htt->tx_q_state.type,
                   HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_DEPTH_TYPE);

        if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
                     ar->running_fw->fw_file.fw_features))
                info |= HTT_FRAG_DESC_BANK_CFG_INFO_Q_STATE_VALID;

        cfg = &cmd->frag_desc_bank_cfg64;
        cfg->info = info;
        cfg->num_banks = 1;
        cfg->desc_size = sizeof(struct htt_msdu_ext_desc_64);
        cfg->bank_base_addrs[0] =  __cpu_to_le64(htt->frag_desc.paddr);
        cfg->bank_id[0].bank_min_id = 0;
        cfg->bank_id[0].bank_max_id = __cpu_to_le16(htt->max_num_pending_tx -
                                                    1);

        cfg->q_state.paddr = cpu_to_le32(htt->tx_q_state.paddr);
        cfg->q_state.num_peers = cpu_to_le16(htt->tx_q_state.num_peers);
        cfg->q_state.num_tids = cpu_to_le16(htt->tx_q_state.num_tids);
        cfg->q_state.record_size = HTT_TX_Q_STATE_ENTRY_SIZE;
        cfg->q_state.record_multiplier = HTT_TX_Q_STATE_ENTRY_MULTIPLIER;

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt frag desc bank cmd\n");

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                ath10k_warn(ar, "failed to send frag desc bank cfg request: %d\n",
                            ret);
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static void ath10k_htt_fill_rx_desc_offset_32(struct ath10k_hw_params *hw,
                                              struct htt_rx_ring_setup_ring32 *rx_ring)
{
        ath10k_htt_rx_desc_get_offsets(hw, &rx_ring->offsets);
}

static void ath10k_htt_fill_rx_desc_offset_64(struct ath10k_hw_params *hw,
                                              struct htt_rx_ring_setup_ring64 *rx_ring)
{
        ath10k_htt_rx_desc_get_offsets(hw, &rx_ring->offsets);
}

static int ath10k_htt_send_rx_ring_cfg_32(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct ath10k_hw_params *hw = &ar->hw_params;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        struct htt_rx_ring_setup_ring32 *ring;
        const int num_rx_ring = 1;
        u16 flags;
        u32 fw_idx;
        int len;
        int ret;

        /*
         * the HW expects the buffer to be an integral number of 4-byte
         * "words"
         */
        BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4));
        BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0);

        len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_32.hdr)
            + (sizeof(*ring) * num_rx_ring);
        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);

        cmd = (struct htt_cmd *)skb->data;
        ring = &cmd->rx_setup_32.rings[0];

        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG;
        cmd->rx_setup_32.hdr.num_rings = 1;

        /* FIXME: do we need all of this? */
        flags = 0;
        flags |= HTT_RX_RING_FLAGS_MAC80211_HDR;
        flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD;
        flags |= HTT_RX_RING_FLAGS_PPDU_START;
        flags |= HTT_RX_RING_FLAGS_PPDU_END;
        flags |= HTT_RX_RING_FLAGS_MPDU_START;
        flags |= HTT_RX_RING_FLAGS_MPDU_END;
        flags |= HTT_RX_RING_FLAGS_MSDU_START;
        flags |= HTT_RX_RING_FLAGS_MSDU_END;
        flags |= HTT_RX_RING_FLAGS_RX_ATTENTION;
        flags |= HTT_RX_RING_FLAGS_FRAG_INFO;
        flags |= HTT_RX_RING_FLAGS_UNICAST_RX;
        flags |= HTT_RX_RING_FLAGS_MULTICAST_RX;
        flags |= HTT_RX_RING_FLAGS_CTRL_RX;
        flags |= HTT_RX_RING_FLAGS_MGMT_RX;
        flags |= HTT_RX_RING_FLAGS_NULL_RX;
        flags |= HTT_RX_RING_FLAGS_PHY_DATA_RX;

        fw_idx = __le32_to_cpu(*htt->rx_ring.alloc_idx.vaddr);

        ring->fw_idx_shadow_reg_paddr =
                __cpu_to_le32(htt->rx_ring.alloc_idx.paddr);
        ring->rx_ring_base_paddr = __cpu_to_le32(htt->rx_ring.base_paddr);
        ring->rx_ring_len = __cpu_to_le16(htt->rx_ring.size);
        ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE);
        ring->flags = __cpu_to_le16(flags);
        ring->fw_idx_init_val = __cpu_to_le16(fw_idx);

        ath10k_htt_fill_rx_desc_offset_32(hw, ring);
        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_send_rx_ring_cfg_64(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct ath10k_hw_params *hw = &ar->hw_params;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        struct htt_rx_ring_setup_ring64 *ring;
        const int num_rx_ring = 1;
        u16 flags;
        u32 fw_idx;
        int len;
        int ret;

        /* HW expects the buffer to be an integral number of 4-byte
         * "words"
         */
        BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4));
        BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0);

        len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_64.hdr)
            + (sizeof(*ring) * num_rx_ring);
        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);

        cmd = (struct htt_cmd *)skb->data;
        ring = &cmd->rx_setup_64.rings[0];

        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG;
        cmd->rx_setup_64.hdr.num_rings = 1;

        flags = 0;
        flags |= HTT_RX_RING_FLAGS_MAC80211_HDR;
        flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD;
        flags |= HTT_RX_RING_FLAGS_PPDU_START;
        flags |= HTT_RX_RING_FLAGS_PPDU_END;
        flags |= HTT_RX_RING_FLAGS_MPDU_START;
        flags |= HTT_RX_RING_FLAGS_MPDU_END;
        flags |= HTT_RX_RING_FLAGS_MSDU_START;
        flags |= HTT_RX_RING_FLAGS_MSDU_END;
        flags |= HTT_RX_RING_FLAGS_RX_ATTENTION;
        flags |= HTT_RX_RING_FLAGS_FRAG_INFO;
        flags |= HTT_RX_RING_FLAGS_UNICAST_RX;
        flags |= HTT_RX_RING_FLAGS_MULTICAST_RX;
        flags |= HTT_RX_RING_FLAGS_CTRL_RX;
        flags |= HTT_RX_RING_FLAGS_MGMT_RX;
        flags |= HTT_RX_RING_FLAGS_NULL_RX;
        flags |= HTT_RX_RING_FLAGS_PHY_DATA_RX;

        fw_idx = __le32_to_cpu(*htt->rx_ring.alloc_idx.vaddr);

        ring->fw_idx_shadow_reg_paddr = __cpu_to_le64(htt->rx_ring.alloc_idx.paddr);
        ring->rx_ring_base_paddr = __cpu_to_le64(htt->rx_ring.base_paddr);
        ring->rx_ring_len = __cpu_to_le16(htt->rx_ring.size);
        ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE);
        ring->flags = __cpu_to_le16(flags);
        ring->fw_idx_init_val = __cpu_to_le16(fw_idx);

        ath10k_htt_fill_rx_desc_offset_64(hw, ring);
        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_send_rx_ring_cfg_hl(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        struct htt_rx_ring_setup_ring32 *ring;
        const int num_rx_ring = 1;
        u16 flags;
        int len;
        int ret;

        /*
         * the HW expects the buffer to be an integral number of 4-byte
         * "words"
         */
        BUILD_BUG_ON(!IS_ALIGNED(HTT_RX_BUF_SIZE, 4));
        BUILD_BUG_ON((HTT_RX_BUF_SIZE & HTT_MAX_CACHE_LINE_SIZE_MASK) != 0);

        len = sizeof(cmd->hdr) + sizeof(cmd->rx_setup_32.hdr)
            + (sizeof(*ring) * num_rx_ring);
        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);

        cmd = (struct htt_cmd *)skb->data;
        ring = &cmd->rx_setup_32.rings[0];

        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_RX_RING_CFG;
        cmd->rx_setup_32.hdr.num_rings = 1;

        flags = 0;
        flags |= HTT_RX_RING_FLAGS_MSDU_PAYLOAD;
        flags |= HTT_RX_RING_FLAGS_UNICAST_RX;
        flags |= HTT_RX_RING_FLAGS_MULTICAST_RX;

        memset(ring, 0, sizeof(*ring));
        ring->rx_ring_len = __cpu_to_le16(HTT_RX_RING_SIZE_MIN);
        ring->rx_ring_bufsize = __cpu_to_le16(HTT_RX_BUF_SIZE);
        ring->flags = __cpu_to_le16(flags);

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_h2t_aggr_cfg_msg_32(struct ath10k_htt *htt,
                                          u8 max_subfrms_ampdu,
                                          u8 max_subfrms_amsdu)
{
        struct ath10k *ar = htt->ar;
        struct htt_aggr_conf *aggr_conf;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        int len;
        int ret;

        /* Firmware defaults are: amsdu = 3 and ampdu = 64 */

        if (max_subfrms_ampdu == 0 || max_subfrms_ampdu > 64)
                return -EINVAL;

        if (max_subfrms_amsdu == 0 || max_subfrms_amsdu > 31)
                return -EINVAL;

        len = sizeof(cmd->hdr);
        len += sizeof(cmd->aggr_conf);

        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_AGGR_CFG;

        aggr_conf = &cmd->aggr_conf;
        aggr_conf->max_num_ampdu_subframes = max_subfrms_ampdu;
        aggr_conf->max_num_amsdu_subframes = max_subfrms_amsdu;

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt h2t aggr cfg msg amsdu %d ampdu %d",
                   aggr_conf->max_num_amsdu_subframes,
                   aggr_conf->max_num_ampdu_subframes);

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

static int ath10k_htt_h2t_aggr_cfg_msg_v2(struct ath10k_htt *htt,
                                          u8 max_subfrms_ampdu,
                                          u8 max_subfrms_amsdu)
{
        struct ath10k *ar = htt->ar;
        struct htt_aggr_conf_v2 *aggr_conf;
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        int len;
        int ret;

        /* Firmware defaults are: amsdu = 3 and ampdu = 64 */

        if (max_subfrms_ampdu == 0 || max_subfrms_ampdu > 64)
                return -EINVAL;

        if (max_subfrms_amsdu == 0 || max_subfrms_amsdu > 31)
                return -EINVAL;

        len = sizeof(cmd->hdr);
        len += sizeof(cmd->aggr_conf_v2);

        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_AGGR_CFG;

        aggr_conf = &cmd->aggr_conf_v2;
        aggr_conf->max_num_ampdu_subframes = max_subfrms_ampdu;
        aggr_conf->max_num_amsdu_subframes = max_subfrms_amsdu;

        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt h2t aggr cfg msg amsdu %d ampdu %d",
                   aggr_conf->max_num_amsdu_subframes,
                   aggr_conf->max_num_ampdu_subframes);

        ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb);
        if (ret) {
                dev_kfree_skb_any(skb);
                return ret;
        }

        return 0;
}

int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
                             __le32 token,
                             __le16 fetch_seq_num,
                             struct htt_tx_fetch_record *records,
                             size_t num_records)
{
        struct sk_buff *skb;
        struct htt_cmd *cmd;
        const u16 resp_id = 0;
        int len = 0;
        int ret;

        /* Response IDs are echo-ed back only for host driver convenience
         * purposes. They aren't used for anything in the driver yet so use 0.
         */

        len += sizeof(cmd->hdr);
        len += sizeof(cmd->tx_fetch_resp);
        len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records;

        skb = ath10k_htc_alloc_skb(ar, len);
        if (!skb)
                return -ENOMEM;

        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP;
        cmd->tx_fetch_resp.resp_id = cpu_to_le16(resp_id);
        cmd->tx_fetch_resp.fetch_seq_num = fetch_seq_num;
        cmd->tx_fetch_resp.num_records = cpu_to_le16(num_records);
        cmd->tx_fetch_resp.token = token;

        memcpy(cmd->tx_fetch_resp.records, records,
               sizeof(records[0]) * num_records);

        ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb);
        if (ret) {
                ath10k_warn(ar, "failed to submit htc command: %d\n", ret);
                goto err_free_skb;
        }

        return 0;

err_free_skb:
        dev_kfree_skb_any(skb);

        return ret;
}

static u8 ath10k_htt_tx_get_vdev_id(struct ath10k *ar, struct sk_buff *skb)
{
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);
        struct ath10k_vif *arvif;

        if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
                return ar->scan.vdev_id;
        } else if (cb->vif) {
                arvif = (void *)cb->vif->drv_priv;
                return arvif->vdev_id;
        } else if (ar->monitor_started) {
                return ar->monitor_vdev_id;
        } else {
                return 0;
        }
}

static u8 ath10k_htt_tx_get_tid(struct sk_buff *skb, bool is_eth)
{
        struct ieee80211_hdr *hdr = (void *)skb->data;
        struct ath10k_skb_cb *cb = ATH10K_SKB_CB(skb);

        if (!is_eth && ieee80211_is_mgmt(hdr->frame_control))
                return HTT_DATA_TX_EXT_TID_MGMT;
        else if (cb->flags & ATH10K_SKB_F_QOS)
                return skb->priority & IEEE80211_QOS_CTL_TID_MASK;
        else
                return HTT_DATA_TX_EXT_TID_NON_QOS_MCAST_BCAST;
}

int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
{
        struct ath10k *ar = htt->ar;
        struct device *dev = ar->dev;
        struct sk_buff *txdesc = NULL;
        struct htt_cmd *cmd;
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
        u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu);
        int len = 0;
        int msdu_id = -1;
        int res;
        const u8 *peer_addr;
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;

        len += sizeof(cmd->hdr);
        len += sizeof(cmd->mgmt_tx);

        res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
        if (res < 0)
                goto err;

        msdu_id = res;

        if ((ieee80211_is_action(hdr->frame_control) ||
             ieee80211_is_deauth(hdr->frame_control) ||
             ieee80211_is_disassoc(hdr->frame_control)) &&
             ieee80211_has_protected(hdr->frame_control)) {
                peer_addr = hdr->addr1;
                if (is_multicast_ether_addr(peer_addr)) {
                        skb_put(msdu, sizeof(struct ieee80211_mmie_16));
                } else {
                        if (skb_cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP ||
                            skb_cb->ucast_cipher == WLAN_CIPHER_SUITE_GCMP_256)
                                skb_put(msdu, IEEE80211_GCMP_MIC_LEN);
                        else
                                skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                }
        }

        txdesc = ath10k_htc_alloc_skb(ar, len);
        if (!txdesc) {
                res = -ENOMEM;
                goto err_free_msdu_id;
        }

        skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len,
                                       DMA_TO_DEVICE);
        res = dma_mapping_error(dev, skb_cb->paddr);
        if (res) {
                res = -EIO;
                goto err_free_txdesc;
        }

        skb_put(txdesc, len);
        cmd = (struct htt_cmd *)txdesc->data;
        memset(cmd, 0, len);

        cmd->hdr.msg_type         = HTT_H2T_MSG_TYPE_MGMT_TX;
        cmd->mgmt_tx.msdu_paddr = __cpu_to_le32(ATH10K_SKB_CB(msdu)->paddr);
        cmd->mgmt_tx.len        = __cpu_to_le32(msdu->len);
        cmd->mgmt_tx.desc_id    = __cpu_to_le32(msdu_id);
        cmd->mgmt_tx.vdev_id    = __cpu_to_le32(vdev_id);
        memcpy(cmd->mgmt_tx.hdr, msdu->data,
               min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));

        res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
        if (res)
                goto err_unmap_msdu;

        return 0;

err_unmap_msdu:
        if (ar->bus_param.dev_type != ATH10K_DEV_TYPE_HL)
                dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
err_free_txdesc:
        dev_kfree_skb_any(txdesc);
err_free_msdu_id:
        spin_lock_bh(&htt->tx_lock);
        ath10k_htt_tx_free_msdu_id(htt, msdu_id);
        spin_unlock_bh(&htt->tx_lock);
err:
        return res;
}

#define HTT_TX_HL_NEEDED_HEADROOM \
        (unsigned int)(sizeof(struct htt_cmd_hdr) + \
        sizeof(struct htt_data_tx_desc) + \
        sizeof(struct ath10k_htc_hdr))

static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txmode,
                            struct sk_buff *msdu)
{
        struct ath10k *ar = htt->ar;
        int res, data_len;
        struct htt_cmd_hdr *cmd_hdr;
        struct htt_data_tx_desc *tx_desc;
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
        struct sk_buff *tmp_skb;
        bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET);
        u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu);
        u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth);
        u8 flags0 = 0;
        u16 flags1 = 0;
        u16 msdu_id = 0;

        if (!is_eth) {
                struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;

                if ((ieee80211_is_action(hdr->frame_control) ||
                     ieee80211_is_deauth(hdr->frame_control) ||
                     ieee80211_is_disassoc(hdr->frame_control)) &&
                     ieee80211_has_protected(hdr->frame_control)) {
                        skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                }
        }

        data_len = msdu->len;

        switch (txmode) {
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
                fallthrough;
        case ATH10K_HW_TXRX_ETHERNET:
                flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                break;
        case ATH10K_HW_TXRX_MGMT:
                flags0 |= SM(ATH10K_HW_TXRX_MGMT,
                             HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;

                if (htt->disable_tx_comp)
                        flags1 |= HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE;
                break;
        }

        if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT)
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;

        flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID);
        flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID);
        if (msdu->ip_summed == CHECKSUM_PARTIAL &&
            !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
        }

        /* Prepend the HTT header and TX desc struct to the data message
         * and realloc the skb if it does not have enough headroom.
         */
        if (skb_headroom(msdu) < HTT_TX_HL_NEEDED_HEADROOM) {
                tmp_skb = msdu;

                ath10k_dbg(htt->ar, ATH10K_DBG_HTT,
                           "Not enough headroom in skb. Current headroom: %u, needed: %u. Reallocating...\n",
                           skb_headroom(msdu), HTT_TX_HL_NEEDED_HEADROOM);
                msdu = skb_realloc_headroom(msdu, HTT_TX_HL_NEEDED_HEADROOM);
                kfree_skb(tmp_skb);
                if (!msdu) {
                        ath10k_warn(htt->ar, "htt hl tx: Unable to realloc skb!\n");
                        res = -ENOMEM;
                        goto out;
                }
        }

        if (ar->bus_param.hl_msdu_ids) {
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED;
                res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
                if (res < 0) {
                        ath10k_err(ar, "msdu_id allocation failed %d\n", res);
                        goto out;
                }
                msdu_id = res;
        }

        /* As msdu is freed by mac80211 (in ieee80211_tx_status()) and by
         * ath10k (in ath10k_htt_htc_tx_complete()) we have to increase
         * reference by one to avoid a use-after-free case and a double
         * free.
         */
        skb_get(msdu);

        skb_push(msdu, sizeof(*cmd_hdr));
        skb_push(msdu, sizeof(*tx_desc));
        cmd_hdr = (struct htt_cmd_hdr *)msdu->data;
        tx_desc = (struct htt_data_tx_desc *)(msdu->data + sizeof(*cmd_hdr));

        cmd_hdr->msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
        tx_desc->flags0 = flags0;
        tx_desc->flags1 = __cpu_to_le16(flags1);
        tx_desc->len = __cpu_to_le16(data_len);
        tx_desc->id = __cpu_to_le16(msdu_id);
        tx_desc->frags_paddr = 0; /* always zero */
        /* Initialize peer_id to INVALID_PEER because this is NOT
         * Reinjection path
         */
        tx_desc->peerid = __cpu_to_le32(HTT_INVALID_PEERID);

        res = ath10k_htc_send_hl(&htt->ar->htc, htt->eid, msdu);

out:
        return res;
}

static int ath10k_htt_tx_32(struct ath10k_htt *htt,
                            enum ath10k_hw_txrx_mode txmode,
                            struct sk_buff *msdu)
{
        struct ath10k *ar = htt->ar;
        struct device *dev = ar->dev;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu);
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
        struct ath10k_hif_sg_item sg_items[2];
        struct ath10k_htt_txbuf_32 *txbuf;
        struct htt_data_tx_desc_frag *frags;
        bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET);
        u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu);
        u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth);
        int prefetch_len;
        int res;
        u8 flags0 = 0;
        u16 msdu_id, flags1 = 0;
        u16 freq = 0;
        u32 frags_paddr = 0;
        u32 txbuf_paddr;
        struct htt_msdu_ext_desc *ext_desc = NULL;
        struct htt_msdu_ext_desc *ext_desc_t = NULL;

        res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
        if (res < 0)
                goto err;

        msdu_id = res;

        prefetch_len = min(htt->prefetch_len, msdu->len);
        prefetch_len = roundup(prefetch_len, 4);

        txbuf = htt->txbuf.vaddr_txbuff_32 + msdu_id;
        txbuf_paddr = htt->txbuf.paddr +
                      (sizeof(struct ath10k_htt_txbuf_32) * msdu_id);

        if (!is_eth) {
                struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;

                if ((ieee80211_is_action(hdr->frame_control) ||
                     ieee80211_is_deauth(hdr->frame_control) ||
                     ieee80211_is_disassoc(hdr->frame_control)) &&
                     ieee80211_has_protected(hdr->frame_control)) {
                        skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                } else if (!(skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) &&
                           txmode == ATH10K_HW_TXRX_RAW &&
                           ieee80211_has_protected(hdr->frame_control)) {
                        skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                }
        }

        skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len,
                                       DMA_TO_DEVICE);
        res = dma_mapping_error(dev, skb_cb->paddr);
        if (res) {
                res = -EIO;
                goto err_free_msdu_id;
        }

        if (unlikely(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN))
                freq = ar->scan.roc_freq;

        switch (txmode) {
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
                fallthrough;
        case ATH10K_HW_TXRX_ETHERNET:
                if (ar->hw_params.continuous_frag_desc) {
                        ext_desc_t = htt->frag_desc.vaddr_desc_32;
                        memset(&ext_desc_t[msdu_id], 0,
                               sizeof(struct htt_msdu_ext_desc));
                        frags = (struct htt_data_tx_desc_frag *)
                                &ext_desc_t[msdu_id].frags;
                        ext_desc = &ext_desc_t[msdu_id];
                        frags[0].tword_addr.paddr_lo =
                                __cpu_to_le32(skb_cb->paddr);
                        frags[0].tword_addr.paddr_hi = 0;
                        frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len);

                        frags_paddr =  htt->frag_desc.paddr +
                                (sizeof(struct htt_msdu_ext_desc) * msdu_id);
                } else {
                        frags = txbuf->frags;
                        frags[0].dword_addr.paddr =
                                __cpu_to_le32(skb_cb->paddr);
                        frags[0].dword_addr.len = __cpu_to_le32(msdu->len);
                        frags[1].dword_addr.paddr = 0;
                        frags[1].dword_addr.len = 0;

                        frags_paddr = txbuf_paddr;
                }
                flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                break;
        case ATH10K_HW_TXRX_MGMT:
                flags0 |= SM(ATH10K_HW_TXRX_MGMT,
                             HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;

                frags_paddr = skb_cb->paddr;
                break;
        }

        /* Normally all commands go through HTC which manages tx credits for
         * each endpoint and notifies when tx is completed.
         *
         * HTT endpoint is creditless so there's no need to care about HTC
         * flags. In that case it is trivial to fill the HTC header here.
         *
         * MSDU transmission is considered completed upon HTT event. This
         * implies no relevant resources can be freed until after the event is
         * received. That's why HTC tx completion handler itself is ignored by
         * setting NULL to transfer_context for all sg items.
         *
         * There is simply no point in pushing HTT TX_FRM through HTC tx path
         * as it's a waste of resources. By bypassing HTC it is possible to
         * avoid extra memory allocations, compress data structures and thus
         * improve performance.
         */

        txbuf->htc_hdr.eid = htt->eid;
        txbuf->htc_hdr.len = __cpu_to_le16(sizeof(txbuf->cmd_hdr) +
                                           sizeof(txbuf->cmd_tx) +
                                           prefetch_len);
        txbuf->htc_hdr.flags = 0;

        if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT)
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;

        flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID);
        flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID);
        if (msdu->ip_summed == CHECKSUM_PARTIAL &&
            !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
                if (ar->hw_params.continuous_frag_desc)
                        ext_desc->flags |= HTT_MSDU_CHECKSUM_ENABLE;
        }

        /* Prevent firmware from sending up tx inspection requests. There's
         * nothing ath10k can do with frames requested for inspection so force
         * it to simply rely a regular tx completion with discard status.
         */
        flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED;

        txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
        txbuf->cmd_tx.flags0 = flags0;
        txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1);
        txbuf->cmd_tx.len = __cpu_to_le16(msdu->len);
        txbuf->cmd_tx.id = __cpu_to_le16(msdu_id);
        txbuf->cmd_tx.frags_paddr = __cpu_to_le32(frags_paddr);
        if (ath10k_mac_tx_frm_has_freq(ar)) {
                txbuf->cmd_tx.offchan_tx.peerid =
                                __cpu_to_le16(HTT_INVALID_PEERID);
                txbuf->cmd_tx.offchan_tx.freq =
                                __cpu_to_le16(freq);
        } else {
                txbuf->cmd_tx.peerid =
                                __cpu_to_le32(HTT_INVALID_PEERID);
        }

        trace_ath10k_htt_tx(ar, msdu_id, msdu->len, vdev_id, tid);
        ath10k_dbg(ar, ATH10K_DBG_HTT,
                   "htt tx flags0 %u flags1 %u len %d id %u frags_paddr %pad, msdu_paddr %pad vdev %u tid %u freq %u\n",
                   flags0, flags1, msdu->len, msdu_id, &frags_paddr,
                   &skb_cb->paddr, vdev_id, tid, freq);
        ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ",
                        msdu->data, msdu->len);
        trace_ath10k_tx_hdr(ar, msdu->data, msdu->len);
        trace_ath10k_tx_payload(ar, msdu->data, msdu->len);

        sg_items[0].transfer_id = 0;
        sg_items[0].transfer_context = NULL;
        sg_items[0].vaddr = &txbuf->htc_hdr;
        sg_items[0].paddr = txbuf_paddr +
                            sizeof(txbuf->frags);
        sg_items[0].len = sizeof(txbuf->htc_hdr) +
                          sizeof(txbuf->cmd_hdr) +
                          sizeof(txbuf->cmd_tx);

        sg_items[1].transfer_id = 0;
        sg_items[1].transfer_context = NULL;
        sg_items[1].vaddr = msdu->data;
        sg_items[1].paddr = skb_cb->paddr;
        sg_items[1].len = prefetch_len;

        res = ath10k_hif_tx_sg(htt->ar,
                               htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
                               sg_items, ARRAY_SIZE(sg_items));
        if (res)
                goto err_unmap_msdu;

        return 0;

err_unmap_msdu:
        dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
err_free_msdu_id:
        spin_lock_bh(&htt->tx_lock);
        ath10k_htt_tx_free_msdu_id(htt, msdu_id);
        spin_unlock_bh(&htt->tx_lock);
err:
        return res;
}

static int ath10k_htt_tx_64(struct ath10k_htt *htt,
                            enum ath10k_hw_txrx_mode txmode,
                            struct sk_buff *msdu)
{
        struct ath10k *ar = htt->ar;
        struct device *dev = ar->dev;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu);
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
        struct ath10k_hif_sg_item sg_items[2];
        struct ath10k_htt_txbuf_64 *txbuf;
        struct htt_data_tx_desc_frag *frags;
        bool is_eth = (txmode == ATH10K_HW_TXRX_ETHERNET);
        u8 vdev_id = ath10k_htt_tx_get_vdev_id(ar, msdu);
        u8 tid = ath10k_htt_tx_get_tid(msdu, is_eth);
        int prefetch_len;
        int res;
        u8 flags0 = 0;
        u16 msdu_id, flags1 = 0;
        u16 freq = 0;
        dma_addr_t frags_paddr = 0;
        dma_addr_t txbuf_paddr;
        struct htt_msdu_ext_desc_64 *ext_desc = NULL;
        struct htt_msdu_ext_desc_64 *ext_desc_t = NULL;

        res = ath10k_htt_tx_alloc_msdu_id(htt, msdu);
        if (res < 0)
                goto err;

        msdu_id = res;

        prefetch_len = min(htt->prefetch_len, msdu->len);
        prefetch_len = roundup(prefetch_len, 4);

        txbuf = htt->txbuf.vaddr_txbuff_64 + msdu_id;
        txbuf_paddr = htt->txbuf.paddr +
                      (sizeof(struct ath10k_htt_txbuf_64) * msdu_id);

        if (!is_eth) {
                struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;

                if ((ieee80211_is_action(hdr->frame_control) ||
                     ieee80211_is_deauth(hdr->frame_control) ||
                     ieee80211_is_disassoc(hdr->frame_control)) &&
                     ieee80211_has_protected(hdr->frame_control)) {
                        skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                } else if (!(skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT) &&
                           txmode == ATH10K_HW_TXRX_RAW &&
                           ieee80211_has_protected(hdr->frame_control)) {
                        skb_put(msdu, IEEE80211_CCMP_MIC_LEN);
                }
        }

        skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len,
                                       DMA_TO_DEVICE);
        res = dma_mapping_error(dev, skb_cb->paddr);
        if (res) {
                res = -EIO;
                goto err_free_msdu_id;
        }

        if (unlikely(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN))
                freq = ar->scan.roc_freq;

        switch (txmode) {
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
                fallthrough;
        case ATH10K_HW_TXRX_ETHERNET:
                if (ar->hw_params.continuous_frag_desc) {
                        ext_desc_t = htt->frag_desc.vaddr_desc_64;
                        memset(&ext_desc_t[msdu_id], 0,
                               sizeof(struct htt_msdu_ext_desc_64));
                        frags = (struct htt_data_tx_desc_frag *)
                                &ext_desc_t[msdu_id].frags;
                        ext_desc = &ext_desc_t[msdu_id];
                        frags[0].tword_addr.paddr_lo =
                                __cpu_to_le32(skb_cb->paddr);
                        frags[0].tword_addr.paddr_hi =
                                __cpu_to_le16(upper_32_bits(skb_cb->paddr));
                        frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len);

                        frags_paddr =  htt->frag_desc.paddr +
                           (sizeof(struct htt_msdu_ext_desc_64) * msdu_id);
                } else {
                        frags = txbuf->frags;
                        frags[0].tword_addr.paddr_lo =
                                                __cpu_to_le32(skb_cb->paddr);
                        frags[0].tword_addr.paddr_hi =
                                __cpu_to_le16(upper_32_bits(skb_cb->paddr));
                        frags[0].tword_addr.len_16 = __cpu_to_le16(msdu->len);
                        frags[1].tword_addr.paddr_lo = 0;
                        frags[1].tword_addr.paddr_hi = 0;
                        frags[1].tword_addr.len_16 = 0;
                }
                flags0 |= SM(txmode, HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                break;
        case ATH10K_HW_TXRX_MGMT:
                flags0 |= SM(ATH10K_HW_TXRX_MGMT,
                             HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;

                frags_paddr = skb_cb->paddr;
                break;
        }

        /* Normally all commands go through HTC which manages tx credits for
         * each endpoint and notifies when tx is completed.
         *
         * HTT endpoint is creditless so there's no need to care about HTC
         * flags. In that case it is trivial to fill the HTC header here.
         *
         * MSDU transmission is considered completed upon HTT event. This
         * implies no relevant resources can be freed until after the event is
         * received. That's why HTC tx completion handler itself is ignored by
         * setting NULL to transfer_context for all sg items.
         *
         * There is simply no point in pushing HTT TX_FRM through HTC tx path
         * as it's a waste of resources. By bypassing HTC it is possible to
         * avoid extra memory allocations, compress data structures and thus
         * improve performance.
         */

        txbuf->htc_hdr.eid = htt->eid;
        txbuf->htc_hdr.len = __cpu_to_le16(sizeof(txbuf->cmd_hdr) +
                                           sizeof(txbuf->cmd_tx) +
                                           prefetch_len);
        txbuf->htc_hdr.flags = 0;

        if (skb_cb->flags & ATH10K_SKB_F_NO_HWCRYPT)
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;

        flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID);
        flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID);
        if (msdu->ip_summed == CHECKSUM_PARTIAL &&
            !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
                flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
                if (ar->hw_params.continuous_frag_desc) {
                        memset(ext_desc->tso_flag, 0, sizeof(ext_desc->tso_flag));
                        ext_desc->tso_flag[3] |=
                                __cpu_to_le32(HTT_MSDU_CHECKSUM_ENABLE_64);
                }
        }

        /* Prevent firmware from sending up tx inspection requests. There's
         * nothing ath10k can do with frames requested for inspection so force
         * it to simply rely a regular tx completion with discard status.
         */
        flags1 |= HTT_DATA_TX_DESC_FLAGS1_POSTPONED;

        txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
        txbuf->cmd_tx.flags0 = flags0;
        txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1);
        txbuf->cmd_tx.len = __cpu_to_le16(msdu->len);
        txbuf->cmd_tx.id = __cpu_to_le16(msdu_id);

        /* fill fragment descriptor */
        txbuf->cmd_tx.frags_paddr = __cpu_to_le64(frags_paddr);
        if (ath10k_mac_tx_frm_has_freq(ar)) {
                txbuf->cmd_tx.offchan_tx.peerid =
                                __cpu_to_le16(HTT_INVALID_PEERID);
                txbuf->cmd_tx.offchan_tx.freq =
                                __cpu_to_le16(freq);
        } else {
                txbuf->cmd_tx.peerid =
                                __cpu_to_le32(HTT_INVALID_PEERID);
        }

        trace_ath10k_htt_tx(ar, msdu_id, msdu->len, vdev_id, tid);
        ath10k_dbg(ar, ATH10K_DBG_HTT,
                   "htt tx flags0 %u flags1 %u len %d id %u frags_paddr %pad, msdu_paddr %pad vdev %u tid %u freq %u\n",
                   flags0, flags1, msdu->len, msdu_id, &frags_paddr,
                   &skb_cb->paddr, vdev_id, tid, freq);
        ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ",
                        msdu->data, msdu->len);
        trace_ath10k_tx_hdr(ar, msdu->data, msdu->len);
        trace_ath10k_tx_payload(ar, msdu->data, msdu->len);

        sg_items[0].transfer_id = 0;
        sg_items[0].transfer_context = NULL;
        sg_items[0].vaddr = &txbuf->htc_hdr;
        sg_items[0].paddr = txbuf_paddr +
                            sizeof(txbuf->frags);
        sg_items[0].len = sizeof(txbuf->htc_hdr) +
                          sizeof(txbuf->cmd_hdr) +
                          sizeof(txbuf->cmd_tx);

        sg_items[1].transfer_id = 0;
        sg_items[1].transfer_context = NULL;
        sg_items[1].vaddr = msdu->data;
        sg_items[1].paddr = skb_cb->paddr;
        sg_items[1].len = prefetch_len;

        res = ath10k_hif_tx_sg(htt->ar,
                               htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
                               sg_items, ARRAY_SIZE(sg_items));
        if (res)
                goto err_unmap_msdu;

        return 0;

err_unmap_msdu:
        dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
err_free_msdu_id:
        spin_lock_bh(&htt->tx_lock);
        ath10k_htt_tx_free_msdu_id(htt, msdu_id);
        spin_unlock_bh(&htt->tx_lock);
err:
        return res;
}

static const struct ath10k_htt_tx_ops htt_tx_ops_32 = {
        .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_32,
        .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_32,
        .htt_alloc_frag_desc = ath10k_htt_tx_alloc_cont_frag_desc_32,
        .htt_free_frag_desc = ath10k_htt_tx_free_cont_frag_desc_32,
        .htt_tx = ath10k_htt_tx_32,
        .htt_alloc_txbuff = ath10k_htt_tx_alloc_cont_txbuf_32,
        .htt_free_txbuff = ath10k_htt_tx_free_cont_txbuf_32,
        .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_32,
};

static const struct ath10k_htt_tx_ops htt_tx_ops_64 = {
        .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_64,
        .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_64,
        .htt_alloc_frag_desc = ath10k_htt_tx_alloc_cont_frag_desc_64,
        .htt_free_frag_desc = ath10k_htt_tx_free_cont_frag_desc_64,
        .htt_tx = ath10k_htt_tx_64,
        .htt_alloc_txbuff = ath10k_htt_tx_alloc_cont_txbuf_64,
        .htt_free_txbuff = ath10k_htt_tx_free_cont_txbuf_64,
        .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_v2,
};

static const struct ath10k_htt_tx_ops htt_tx_ops_hl = {
        .htt_send_rx_ring_cfg = ath10k_htt_send_rx_ring_cfg_hl,
        .htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_32,
        .htt_tx = ath10k_htt_tx_hl,
        .htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_32,
        .htt_flush_tx = ath10k_htt_flush_tx_queue,
};

void ath10k_htt_set_tx_ops(struct ath10k_htt *htt)
{
        struct ath10k *ar = htt->ar;

        if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL)
                htt->tx_ops = &htt_tx_ops_hl;
        else if (ar->hw_params.target_64bit)
                htt->tx_ops = &htt_tx_ops_64;
        else
                htt->tx_ops = &htt_tx_ops_32;
}





































































































  203 























  339 







  245 
































































  275 





   11 

   11 
   11 



   11 

   11 


    3 






















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
// SPDX-License-Identifier: GPL-2.0-or-later
/* bit search implementation
 *
 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * Copyright (C) 2008 IBM Corporation
 * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
 * (Inspired by David Howell's find_next_bit implementation)
 *
 * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
 * size and improve performance, 2015.
 */

#include <linux/bitops.h>
#include <linux/bitmap.h>
#include <linux/export.h>
#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/swab.h>

/*
 * Common helper for find_bit() function family
 * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
 * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
 * @size: The bitmap size in bits
 */
#define FIND_FIRST_BIT(FETCH, MUNGE, size)                                        \
({                                                                                \
        unsigned long idx, val, sz = (size);                                        \
                                                                                \
        for (idx = 0; idx * BITS_PER_LONG < sz; idx++) {                        \
                val = (FETCH);                                                        \
                if (val) {                                                        \
                        sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz);        \
                        break;                                                        \
                }                                                                \
        }                                                                        \
                                                                                \
        sz;                                                                        \
})

/*
 * Common helper for find_next_bit() function family
 * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
 * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
 * @size: The bitmap size in bits
 * @start: The bitnumber to start searching at
 */
#define FIND_NEXT_BIT(FETCH, MUNGE, size, start)                                \
({                                                                                \
        unsigned long mask, idx, tmp, sz = (size), __start = (start);                \
                                                                                \
        if (unlikely(__start >= sz))                                                \
                goto out;                                                        \
                                                                                \
        mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start));                                \
        idx = __start / BITS_PER_LONG;                                                \
                                                                                \
        for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) {                        \
                if ((idx + 1) * BITS_PER_LONG >= sz)                                \
                        goto out;                                                \
                idx++;                                                                \
        }                                                                        \
                                                                                \
        sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz);                        \
out:                                                                                \
        sz;                                                                        \
})

#define FIND_NTH_BIT(FETCH, size, num)                                                \
({                                                                                \
        unsigned long sz = (size), nr = (num), idx, w, tmp;                        \
                                                                                \
        for (idx = 0; (idx + 1) * BITS_PER_LONG <= sz; idx++) {                        \
                if (idx * BITS_PER_LONG + nr >= sz)                                \
                        goto out;                                                \
                                                                                \
                tmp = (FETCH);                                                        \
                w = hweight_long(tmp);                                                \
                if (w > nr)                                                        \
                        goto found;                                                \
                                                                                \
                nr -= w;                                                        \
        }                                                                        \
                                                                                \
        if (sz % BITS_PER_LONG)                                                        \
                tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz);                        \
found:                                                                                \
        sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz);                        \
out:                                                                                \
        sz;                                                                        \
})

#ifndef find_first_bit
/*
 * Find the first set bit in a memory region.
 */
unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
        return FIND_FIRST_BIT(addr[idx], /* nop */, size);
}
EXPORT_SYMBOL(_find_first_bit);
#endif

#ifndef find_first_and_bit
/*
 * Find the first set bit in two memory regions.
 */
unsigned long _find_first_and_bit(const unsigned long *addr1,
                                  const unsigned long *addr2,
                                  unsigned long size)
{
        return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size);
}
EXPORT_SYMBOL(_find_first_and_bit);
#endif

#ifndef find_first_zero_bit
/*
 * Find the first cleared bit in a memory region.
 */
unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
        return FIND_FIRST_BIT(~addr[idx], /* nop */, size);
}
EXPORT_SYMBOL(_find_first_zero_bit);
#endif

#ifndef find_next_bit
unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
{
        return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_bit);
#endif

unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
{
        return FIND_NTH_BIT(addr[idx], size, n);
}
EXPORT_SYMBOL(__find_nth_bit);

unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
                                 unsigned long size, unsigned long n)
{
        return FIND_NTH_BIT(addr1[idx] & addr2[idx], size, n);
}
EXPORT_SYMBOL(__find_nth_and_bit);

unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                 unsigned long size, unsigned long n)
{
        return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n);
}
EXPORT_SYMBOL(__find_nth_andnot_bit);

unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1,
                                        const unsigned long *addr2,
                                        const unsigned long *addr3,
                                        unsigned long size, unsigned long n)
{
        return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n);
}
EXPORT_SYMBOL(__find_nth_and_andnot_bit);

#ifndef find_next_and_bit
unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start)
{
        return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_and_bit);
#endif

#ifndef find_next_andnot_bit
unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start)
{
        return FIND_NEXT_BIT(addr1[idx] & ~addr2[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_andnot_bit);
#endif

#ifndef find_next_or_bit
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start)
{
        return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_or_bit);
#endif

#ifndef find_next_zero_bit
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
                                         unsigned long start)
{
        return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_zero_bit);
#endif

#ifndef find_last_bit
unsigned long _find_last_bit(const unsigned long *addr, unsigned long size)
{
        if (size) {
                unsigned long val = BITMAP_LAST_WORD_MASK(size);
                unsigned long idx = (size-1) / BITS_PER_LONG;

                do {
                        val &= addr[idx];
                        if (val)
                                return idx * BITS_PER_LONG + __fls(val);

                        val = ~0ul;
                } while (idx--);
        }
        return size;
}
EXPORT_SYMBOL(_find_last_bit);
#endif

unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
                               unsigned long size, unsigned long offset)
{
        offset = find_next_bit(addr, size, offset);
        if (offset == size)
                return size;

        offset = round_down(offset, 8);
        *clump = bitmap_get_value8(addr, offset);

        return offset;
}
EXPORT_SYMBOL(find_next_clump8);

#ifdef __BIG_ENDIAN

#ifndef find_first_zero_bit_le
/*
 * Find the first cleared bit in an LE memory region.
 */
unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size)
{
        return FIND_FIRST_BIT(~addr[idx], swab, size);
}
EXPORT_SYMBOL(_find_first_zero_bit_le);

#endif

#ifndef find_next_zero_bit_le
unsigned long _find_next_zero_bit_le(const unsigned long *addr,
                                        unsigned long size, unsigned long offset)
{
        return FIND_NEXT_BIT(~addr[idx], swab, size, offset);
}
EXPORT_SYMBOL(_find_next_zero_bit_le);
#endif

#ifndef find_next_bit_le
unsigned long _find_next_bit_le(const unsigned long *addr,
                                unsigned long size, unsigned long offset)
{
        return FIND_NEXT_BIT(addr[idx], swab, size, offset);
}
EXPORT_SYMBOL(_find_next_bit_le);

#endif

#endif /* __BIG_ENDIAN */




































































































































































   14 
   14 





    1 
    1 
    1 




    1 
    1 



    1 
    1 
    1 





    1 


    1 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 
    4 





    4 






    4 



















































































































    4 



















    4 













    4 



    4 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 





    2 
    2 




    2 




    2 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 


   14 
   14 

   14 

   11 

    5 

    5 
    5 




























   14 








   14 

   14 




   14 
   14 
   14 





















































































































































































































































































































































































































































































































































































































































    4 







    4 











    4 





















































    4 




    4 

    4 

    4 




    4 





    4 




    4 


    4 



    4 











    4 
    4 










    4 



















    4 






    4 
    4 


    4 





    4 










    4 





















    4 












    4 



    4 








    4 

    4 

    4 



    4 


    4 



    4 


































































































































































































































































































































































































    4 






    4 



    4 







    4 
    4 

























































































   14 















   14 




























































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Linux INET6 implementation
 *        FIB front-end.
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 */

/*        Changes:
 *
 *        YOSHIFUJI Hideaki @USAGI
 *                reworked default router selection.
 *                - respect outgoing interface
 *                - select from (probably) reachable routers (i.e.
 *                routers in REACHABLE, STALE, DELAY or PROBE states).
 *                - always select the same router if it is (probably)
 *                reachable.  otherwise, round-robin the list.
 *        Ville Nuorvala
 *                Fixed routing subtrees.
 */

#define pr_fmt(fmt) "IPv6: " fmt

#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/times.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/mroute6.h>
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/siphash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/rtnh.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
#include <net/ip.h>
#include <linux/uaccess.h>
#include <linux/btf_ids.h>

#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

static int ip6_rt_type_to_error(u8 fib6_type);

#define CREATE_TRACE_POINTS
#include <trace/events/fib6.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
#undef CREATE_TRACE_POINTS

enum rt6_nud_state {
        RT6_NUD_FAIL_HARD = -3,
        RT6_NUD_FAIL_PROBE = -2,
        RT6_NUD_FAIL_DO_RR = -1,
        RT6_NUD_SUCCEED = 1
};

INDIRECT_CALLABLE_SCOPE
struct dst_entry        *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int         ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int                ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void                ip6_dst_destroy(struct dst_entry *);
static void                ip6_dst_ifdown(struct dst_entry *,
                                       struct net_device *dev);
static void                 ip6_dst_gc(struct dst_ops *ops);

static int                ip6_pkt_discard(struct sk_buff *skb);
static int                ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static int                ip6_pkt_prohibit(struct sk_buff *skb);
static int                ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void                ip6_link_failure(struct sk_buff *skb);
static void                ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                                           struct sk_buff *skb, u32 mtu,
                                           bool confirm_neigh);
static void                rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
                                        struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
                           int strict);
static size_t rt6_nlmsg_size(struct fib6_info *f6i);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
                         struct fib6_info *rt, struct dst_entry *dst,
                         struct in6_addr *dest, struct in6_addr *src,
                         int iif, int type, u32 portid, u32 seq,
                         unsigned int flags);
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
                                           const struct in6_addr *daddr,
                                           const struct in6_addr *saddr);

#ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
                                           const struct in6_addr *gwaddr,
                                           struct net_device *dev,
                                           unsigned int pref);
static struct fib6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
                                           const struct in6_addr *gwaddr,
                                           struct net_device *dev);
#endif

struct uncached_list {
        spinlock_t                lock;
        struct list_head        head;
        struct list_head        quarantine;
};

static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);

void rt6_uncached_list_add(struct rt6_info *rt)
{
        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);

        rt->dst.rt_uncached_list = ul;

        spin_lock_bh(&ul->lock);
        list_add_tail(&rt->dst.rt_uncached, &ul->head);
        spin_unlock_bh(&ul->lock);
}

void rt6_uncached_list_del(struct rt6_info *rt)
{
        if (!list_empty(&rt->dst.rt_uncached)) {
                struct uncached_list *ul = rt->dst.rt_uncached_list;

                spin_lock_bh(&ul->lock);
                list_del_init(&rt->dst.rt_uncached);
                spin_unlock_bh(&ul->lock);
        }
}

static void rt6_uncached_list_flush_dev(struct net_device *dev)
{
        int cpu;

        for_each_possible_cpu(cpu) {
                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
                struct rt6_info *rt, *safe;

                if (list_empty(&ul->head))
                        continue;

                spin_lock_bh(&ul->lock);
                list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
                        struct inet6_dev *rt_idev = rt->rt6i_idev;
                        struct net_device *rt_dev = rt->dst.dev;
                        bool handled = false;

                        if (rt_idev->dev == dev) {
                                rt->rt6i_idev = in6_dev_get(blackhole_netdev);
                                in6_dev_put(rt_idev);
                                handled = true;
                        }

                        if (rt_dev == dev) {
                                rt->dst.dev = blackhole_netdev;
                                netdev_ref_replace(rt_dev, blackhole_netdev,
                                                   &rt->dst.dev_tracker,
                                                   GFP_ATOMIC);
                                handled = true;
                        }
                        if (handled)
                                list_move(&rt->dst.rt_uncached,
                                          &ul->quarantine);
                }
                spin_unlock_bh(&ul->lock);
        }
}

static inline const void *choose_neigh_daddr(const struct in6_addr *p,
                                             struct sk_buff *skb,
                                             const void *daddr)
{
        if (!ipv6_addr_any(p))
                return (const void *) p;
        else if (skb)
                return &ipv6_hdr(skb)->daddr;
        return daddr;
}

struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
                                   struct net_device *dev,
                                   struct sk_buff *skb,
                                   const void *daddr)
{
        struct neighbour *n;

        daddr = choose_neigh_daddr(gw, skb, daddr);
        n = __ipv6_neigh_lookup(dev, daddr);
        if (n)
                return n;

        n = neigh_create(&nd_tbl, daddr, dev);
        return IS_ERR(n) ? NULL : n;
}

static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
                                              struct sk_buff *skb,
                                              const void *daddr)
{
        const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);

        return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
                                dst->dev, skb, daddr);
}

static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
        struct net_device *dev = dst->dev;
        struct rt6_info *rt = (struct rt6_info *)dst;

        daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
        if (!daddr)
                return;
        if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
                return;
        if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
                return;
        __ipv6_confirm_neigh(dev, daddr);
}

static struct dst_ops ip6_dst_ops_template = {
        .family                        =        AF_INET6,
        .gc                        =        ip6_dst_gc,
        .gc_thresh                =        1024,
        .check                        =        ip6_dst_check,
        .default_advmss                =        ip6_default_advmss,
        .mtu                        =        ip6_mtu,
        .cow_metrics                =        dst_cow_metrics_generic,
        .destroy                =        ip6_dst_destroy,
        .ifdown                        =        ip6_dst_ifdown,
        .negative_advice        =        ip6_negative_advice,
        .link_failure                =        ip6_link_failure,
        .update_pmtu                =        ip6_rt_update_pmtu,
        .redirect                =        rt6_do_redirect,
        .local_out                =        __ip6_local_out,
        .neigh_lookup                =        ip6_dst_neigh_lookup,
        .confirm_neigh                =        ip6_confirm_neigh,
};

static struct dst_ops ip6_dst_blackhole_ops = {
        .family                        = AF_INET6,
        .default_advmss                = ip6_default_advmss,
        .neigh_lookup                = ip6_dst_neigh_lookup,
        .check                        = ip6_dst_check,
        .destroy                = ip6_dst_destroy,
        .cow_metrics                = dst_cow_metrics_generic,
        .update_pmtu                = dst_blackhole_update_pmtu,
        .redirect                = dst_blackhole_redirect,
        .mtu                        = dst_blackhole_mtu,
};

static const u32 ip6_template_metrics[RTAX_MAX] = {
        [RTAX_HOPLIMIT - 1] = 0,
};

static const struct fib6_info fib6_null_entry_template = {
        .fib6_flags        = (RTF_REJECT | RTF_NONEXTHOP),
        .fib6_protocol  = RTPROT_KERNEL,
        .fib6_metric        = ~(u32)0,
        .fib6_ref        = REFCOUNT_INIT(1),
        .fib6_type        = RTN_UNREACHABLE,
        .fib6_metrics        = (struct dst_metrics *)&dst_default_metrics,
};

static const struct rt6_info ip6_null_entry_template = {
        .dst = {
                .__rcuref        = RCUREF_INIT(1),
                .__use                = 1,
                .obsolete        = DST_OBSOLETE_FORCE_CHK,
                .error                = -ENETUNREACH,
                .input                = ip6_pkt_discard,
                .output                = ip6_pkt_discard_out,
        },
        .rt6i_flags        = (RTF_REJECT | RTF_NONEXTHOP),
};

#ifdef CONFIG_IPV6_MULTIPLE_TABLES

static const struct rt6_info ip6_prohibit_entry_template = {
        .dst = {
                .__rcuref        = RCUREF_INIT(1),
                .__use                = 1,
                .obsolete        = DST_OBSOLETE_FORCE_CHK,
                .error                = -EACCES,
                .input                = ip6_pkt_prohibit,
                .output                = ip6_pkt_prohibit_out,
        },
        .rt6i_flags        = (RTF_REJECT | RTF_NONEXTHOP),
};

static const struct rt6_info ip6_blk_hole_entry_template = {
        .dst = {
                .__rcuref        = RCUREF_INIT(1),
                .__use                = 1,
                .obsolete        = DST_OBSOLETE_FORCE_CHK,
                .error                = -EINVAL,
                .input                = dst_discard,
                .output                = dst_discard_out,
        },
        .rt6i_flags        = (RTF_REJECT | RTF_NONEXTHOP),
};

#endif

static void rt6_info_init(struct rt6_info *rt)
{
        memset_after(rt, 0, dst);
}

/* allocate dst with ip6_dst_ops */
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
                               int flags)
{
        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
                                        DST_OBSOLETE_FORCE_CHK, flags);

        if (rt) {
                rt6_info_init(rt);
                atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
        }

        return rt;
}
EXPORT_SYMBOL(ip6_dst_alloc);

static void ip6_dst_destroy(struct dst_entry *dst)
{
        struct rt6_info *rt = (struct rt6_info *)dst;
        struct fib6_info *from;
        struct inet6_dev *idev;

        ip_dst_metrics_put(dst);
        rt6_uncached_list_del(rt);

        idev = rt->rt6i_idev;
        if (idev) {
                rt->rt6i_idev = NULL;
                in6_dev_put(idev);
        }

        from = xchg((__force struct fib6_info **)&rt->from, NULL);
        fib6_info_release(from);
}

static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
        struct rt6_info *rt = (struct rt6_info *)dst;
        struct inet6_dev *idev = rt->rt6i_idev;

        if (idev && idev->dev != blackhole_netdev) {
                struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);

                if (blackhole_idev) {
                        rt->rt6i_idev = blackhole_idev;
                        in6_dev_put(idev);
                }
        }
}

static bool __rt6_check_expired(const struct rt6_info *rt)
{
        if (rt->rt6i_flags & RTF_EXPIRES)
                return time_after(jiffies, rt->dst.expires);
        else
                return false;
}

static bool rt6_check_expired(const struct rt6_info *rt)
{
        struct fib6_info *from;

        from = rcu_dereference(rt->from);

        if (rt->rt6i_flags & RTF_EXPIRES) {
                if (time_after(jiffies, rt->dst.expires))
                        return true;
        } else if (from) {
                return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
                        fib6_check_expired(from);
        }
        return false;
}

void fib6_select_path(const struct net *net, struct fib6_result *res,
                      struct flowi6 *fl6, int oif, bool have_oif_match,
                      const struct sk_buff *skb, int strict)
{
        struct fib6_info *sibling, *next_sibling;
        struct fib6_info *match = res->f6i;

        if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
                goto out;

        if (match->nh && have_oif_match && res->nh)
                return;

        if (skb)
                IP6CB(skb)->flags |= IP6SKB_MULTIPATH;

        /* We might have already computed the hash for ICMPv6 errors. In such
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
        if (!fl6->mp_hash &&
            (!match->nh || nexthop_is_multipath(match->nh)))
                fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);

        if (unlikely(match->nh)) {
                nexthop_path_fib6_result(res, fl6->mp_hash);
                return;
        }

        if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
                goto out;

        list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
                                 fib6_siblings) {
                const struct fib6_nh *nh = sibling->fib6_nh;
                int nh_upper_bound;

                nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
                if (fl6->mp_hash > nh_upper_bound)
                        continue;
                if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
                        break;
                match = sibling;
                break;
        }

out:
        res->f6i = match;
        res->nh = match->fib6_nh;
}

/*
 *        Route lookup. rcu_read_lock() should be held.
 */

static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
                               const struct in6_addr *saddr, int oif, int flags)
{
        const struct net_device *dev;

        if (nh->fib_nh_flags & RTNH_F_DEAD)
                return false;

        dev = nh->fib_nh_dev;
        if (oif) {
                if (dev->ifindex == oif)
                        return true;
        } else {
                if (ipv6_chk_addr(net, saddr, dev,
                                  flags & RT6_LOOKUP_F_IFACE))
                        return true;
        }

        return false;
}

struct fib6_nh_dm_arg {
        struct net                *net;
        const struct in6_addr        *saddr;
        int                        oif;
        int                        flags;
        struct fib6_nh                *nh;
};

static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_dm_arg *arg = _arg;

        arg->nh = nh;
        return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
                                  arg->flags);
}

/* returns fib6_nh from nexthop or NULL */
static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
                                        struct fib6_result *res,
                                        const struct in6_addr *saddr,
                                        int oif, int flags)
{
        struct fib6_nh_dm_arg arg = {
                .net   = net,
                .saddr = saddr,
                .oif   = oif,
                .flags = flags,
        };

        if (nexthop_is_blackhole(nh))
                return NULL;

        if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
                return arg.nh;

        return NULL;
}

static void rt6_device_match(struct net *net, struct fib6_result *res,
                             const struct in6_addr *saddr, int oif, int flags)
{
        struct fib6_info *f6i = res->f6i;
        struct fib6_info *spf6i;
        struct fib6_nh *nh;

        if (!oif && ipv6_addr_any(saddr)) {
                if (unlikely(f6i->nh)) {
                        nh = nexthop_fib6_nh(f6i->nh);
                        if (nexthop_is_blackhole(f6i->nh))
                                goto out_blackhole;
                } else {
                        nh = f6i->fib6_nh;
                }
                if (!(nh->fib_nh_flags & RTNH_F_DEAD))
                        goto out;
        }

        for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
                bool matched = false;

                if (unlikely(spf6i->nh)) {
                        nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
                                              oif, flags);
                        if (nh)
                                matched = true;
                } else {
                        nh = spf6i->fib6_nh;
                        if (__rt6_device_match(net, nh, saddr, oif, flags))
                                matched = true;
                }
                if (matched) {
                        res->f6i = spf6i;
                        goto out;
                }
        }

        if (oif && flags & RT6_LOOKUP_F_IFACE) {
                res->f6i = net->ipv6.fib6_null_entry;
                nh = res->f6i->fib6_nh;
                goto out;
        }

        if (unlikely(f6i->nh)) {
                nh = nexthop_fib6_nh(f6i->nh);
                if (nexthop_is_blackhole(f6i->nh))
                        goto out_blackhole;
        } else {
                nh = f6i->fib6_nh;
        }

        if (nh->fib_nh_flags & RTNH_F_DEAD) {
                res->f6i = net->ipv6.fib6_null_entry;
                nh = res->f6i->fib6_nh;
        }
out:
        res->nh = nh;
        res->fib6_type = res->f6i->fib6_type;
        res->fib6_flags = res->f6i->fib6_flags;
        return;

out_blackhole:
        res->fib6_flags |= RTF_REJECT;
        res->fib6_type = RTN_BLACKHOLE;
        res->nh = nh;
}

#ifdef CONFIG_IPV6_ROUTER_PREF
struct __rt6_probe_work {
        struct work_struct work;
        struct in6_addr target;
        struct net_device *dev;
        netdevice_tracker dev_tracker;
};

static void rt6_probe_deferred(struct work_struct *w)
{
        struct in6_addr mcaddr;
        struct __rt6_probe_work *work =
                container_of(w, struct __rt6_probe_work, work);

        addrconf_addr_solict_mult(&work->target, &mcaddr);
        ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
        netdev_put(work->dev, &work->dev_tracker);
        kfree(work);
}

static void rt6_probe(struct fib6_nh *fib6_nh)
{
        struct __rt6_probe_work *work = NULL;
        const struct in6_addr *nh_gw;
        unsigned long last_probe;
        struct neighbour *neigh;
        struct net_device *dev;
        struct inet6_dev *idev;

        /*
         * Okay, this does not seem to be appropriate
         * for now, however, we need to check if it
         * is really so; aka Router Reachability Probing.
         *
         * Router Reachability Probe MUST be rate-limited
         * to no more than one per minute.
         */
        if (!fib6_nh->fib_nh_gw_family)
                return;

        nh_gw = &fib6_nh->fib_nh_gw6;
        dev = fib6_nh->fib_nh_dev;
        rcu_read_lock();
        last_probe = READ_ONCE(fib6_nh->last_probe);
        idev = __in6_dev_get(dev);
        neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
        if (neigh) {
                if (READ_ONCE(neigh->nud_state) & NUD_VALID)
                        goto out;

                write_lock_bh(&neigh->lock);
                if (!(neigh->nud_state & NUD_VALID) &&
                    time_after(jiffies,
                               neigh->updated +
                               READ_ONCE(idev->cnf.rtr_probe_interval))) {
                        work = kmalloc(sizeof(*work), GFP_ATOMIC);
                        if (work)
                                __neigh_set_probe_once(neigh);
                }
                write_unlock_bh(&neigh->lock);
        } else if (time_after(jiffies, last_probe +
                                       READ_ONCE(idev->cnf.rtr_probe_interval))) {
                work = kmalloc(sizeof(*work), GFP_ATOMIC);
        }

        if (!work || cmpxchg(&fib6_nh->last_probe,
                             last_probe, jiffies) != last_probe) {
                kfree(work);
        } else {
                INIT_WORK(&work->work, rt6_probe_deferred);
                work->target = *nh_gw;
                netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
                work->dev = dev;
                schedule_work(&work->work);
        }

out:
        rcu_read_unlock();
}
#else
static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif

/*
 * Default Router Selection (RFC 2461 6.3.6)
 */
static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
        struct neighbour *neigh;

        rcu_read_lock();
        neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
                                          &fib6_nh->fib_nh_gw6);
        if (neigh) {
                u8 nud_state = READ_ONCE(neigh->nud_state);

                if (nud_state & NUD_VALID)
                        ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
                else if (!(nud_state & NUD_FAILED))
                        ret = RT6_NUD_SUCCEED;
                else
                        ret = RT6_NUD_FAIL_PROBE;
#endif
        } else {
                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
        }
        rcu_read_unlock();

        return ret;
}

static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
                           int strict)
{
        int m = 0;

        if (!oif || nh->fib_nh_dev->ifindex == oif)
                m = 2;

        if (!m && (strict & RT6_LOOKUP_F_IFACE))
                return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
        if ((strict & RT6_LOOKUP_F_REACHABLE) &&
            !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
                int n = rt6_check_neigh(nh);
                if (n < 0)
                        return n;
        }
        return m;
}

static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
                       int oif, int strict, int *mpri, bool *do_rr)
{
        bool match_do_rr = false;
        bool rc = false;
        int m;

        if (nh->fib_nh_flags & RTNH_F_DEAD)
                goto out;

        if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
            nh->fib_nh_flags & RTNH_F_LINKDOWN &&
            !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
                goto out;

        m = rt6_score_route(nh, fib6_flags, oif, strict);
        if (m == RT6_NUD_FAIL_DO_RR) {
                match_do_rr = true;
                m = 0; /* lowest valid score */
        } else if (m == RT6_NUD_FAIL_HARD) {
                goto out;
        }

        if (strict & RT6_LOOKUP_F_REACHABLE)
                rt6_probe(nh);

        /* note that m can be RT6_NUD_FAIL_PROBE at this point */
        if (m > *mpri) {
                *do_rr = match_do_rr;
                *mpri = m;
                rc = true;
        }
out:
        return rc;
}

struct fib6_nh_frl_arg {
        u32                flags;
        int                oif;
        int                strict;
        int                *mpri;
        bool                *do_rr;
        struct fib6_nh        *nh;
};

static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_frl_arg *arg = _arg;

        arg->nh = nh;
        return find_match(nh, arg->flags, arg->oif, arg->strict,
                          arg->mpri, arg->do_rr);
}

static void __find_rr_leaf(struct fib6_info *f6i_start,
                           struct fib6_info *nomatch, u32 metric,
                           struct fib6_result *res, struct fib6_info **cont,
                           int oif, int strict, bool *do_rr, int *mpri)
{
        struct fib6_info *f6i;

        for (f6i = f6i_start;
             f6i && f6i != nomatch;
             f6i = rcu_dereference(f6i->fib6_next)) {
                bool matched = false;
                struct fib6_nh *nh;

                if (cont && f6i->fib6_metric != metric) {
                        *cont = f6i;
                        return;
                }

                if (fib6_check_expired(f6i))
                        continue;

                if (unlikely(f6i->nh)) {
                        struct fib6_nh_frl_arg arg = {
                                .flags  = f6i->fib6_flags,
                                .oif    = oif,
                                .strict = strict,
                                .mpri   = mpri,
                                .do_rr  = do_rr
                        };

                        if (nexthop_is_blackhole(f6i->nh)) {
                                res->fib6_flags = RTF_REJECT;
                                res->fib6_type = RTN_BLACKHOLE;
                                res->f6i = f6i;
                                res->nh = nexthop_fib6_nh(f6i->nh);
                                return;
                        }
                        if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
                                                     &arg)) {
                                matched = true;
                                nh = arg.nh;
                        }
                } else {
                        nh = f6i->fib6_nh;
                        if (find_match(nh, f6i->fib6_flags, oif, strict,
                                       mpri, do_rr))
                                matched = true;
                }
                if (matched) {
                        res->f6i = f6i;
                        res->nh = nh;
                        res->fib6_flags = f6i->fib6_flags;
                        res->fib6_type = f6i->fib6_type;
                }
        }
}

static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
                         struct fib6_info *rr_head, int oif, int strict,
                         bool *do_rr, struct fib6_result *res)
{
        u32 metric = rr_head->fib6_metric;
        struct fib6_info *cont = NULL;
        int mpri = -1;

        __find_rr_leaf(rr_head, NULL, metric, res, &cont,
                       oif, strict, do_rr, &mpri);

        __find_rr_leaf(leaf, rr_head, metric, res, &cont,
                       oif, strict, do_rr, &mpri);

        if (res->f6i || !cont)
                return;

        __find_rr_leaf(cont, NULL, metric, res, NULL,
                       oif, strict, do_rr, &mpri);
}

static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
                       struct fib6_result *res, int strict)
{
        struct fib6_info *leaf = rcu_dereference(fn->leaf);
        struct fib6_info *rt0;
        bool do_rr = false;
        int key_plen;

        /* make sure this function or its helpers sets f6i */
        res->f6i = NULL;

        if (!leaf || leaf == net->ipv6.fib6_null_entry)
                goto out;

        rt0 = rcu_dereference(fn->rr_ptr);
        if (!rt0)
                rt0 = leaf;

        /* Double check to make sure fn is not an intermediate node
         * and fn->leaf does not points to its child's leaf
         * (This might happen if all routes under fn are deleted from
         * the tree and fib6_repair_tree() is called on the node.)
         */
        key_plen = rt0->fib6_dst.plen;
#ifdef CONFIG_IPV6_SUBTREES
        if (rt0->fib6_src.plen)
                key_plen = rt0->fib6_src.plen;
#endif
        if (fn->fn_bit != key_plen)
                goto out;

        find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
        if (do_rr) {
                struct fib6_info *next = rcu_dereference(rt0->fib6_next);

                /* no entries matched; do round-robin */
                if (!next || next->fib6_metric != rt0->fib6_metric)
                        next = leaf;

                if (next != rt0) {
                        spin_lock_bh(&leaf->fib6_table->tb6_lock);
                        /* make sure next is not being deleted from the tree */
                        if (next->fib6_node)
                                rcu_assign_pointer(fn->rr_ptr, next);
                        spin_unlock_bh(&leaf->fib6_table->tb6_lock);
                }
        }

out:
        if (!res->f6i) {
                res->f6i = net->ipv6.fib6_null_entry;
                res->nh = res->f6i->fib6_nh;
                res->fib6_flags = res->f6i->fib6_flags;
                res->fib6_type = res->f6i->fib6_type;
        }
}

static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
{
        return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
               res->nh->fib_nh_gw_family;
}

#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                  const struct in6_addr *gwaddr)
{
        struct net *net = dev_net(dev);
        struct route_info *rinfo = (struct route_info *) opt;
        struct in6_addr prefix_buf, *prefix;
        struct fib6_table *table;
        unsigned int pref;
        unsigned long lifetime;
        struct fib6_info *rt;

        if (len < sizeof(struct route_info)) {
                return -EINVAL;
        }

        /* Sanity check for prefix_len and length */
        if (rinfo->length > 3) {
                return -EINVAL;
        } else if (rinfo->prefix_len > 128) {
                return -EINVAL;
        } else if (rinfo->prefix_len > 64) {
                if (rinfo->length < 2) {
                        return -EINVAL;
                }
        } else if (rinfo->prefix_len > 0) {
                if (rinfo->length < 1) {
                        return -EINVAL;
                }
        }

        pref = rinfo->route_pref;
        if (pref == ICMPV6_ROUTER_PREF_INVALID)
                return -EINVAL;

        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);

        if (rinfo->length == 3)
                prefix = (struct in6_addr *)rinfo->prefix;
        else {
                /* this function is safe */
                ipv6_addr_prefix(&prefix_buf,
                                 (struct in6_addr *)rinfo->prefix,
                                 rinfo->prefix_len);
                prefix = &prefix_buf;
        }

        if (rinfo->prefix_len == 0)
                rt = rt6_get_dflt_router(net, gwaddr, dev);
        else
                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
                                        gwaddr, dev);

        if (rt && !lifetime) {
                ip6_del_rt(net, rt, false);
                rt = NULL;
        }

        if (!rt && lifetime)
                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
                                        dev, pref);
        else if (rt)
                rt->fib6_flags = RTF_ROUTEINFO |
                                 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);

        if (rt) {
                table = rt->fib6_table;
                spin_lock_bh(&table->tb6_lock);

                if (!addrconf_finite_timeout(lifetime)) {
                        fib6_clean_expires(rt);
                        fib6_remove_gc_list(rt);
                } else {
                        fib6_set_expires(rt, jiffies + HZ * lifetime);
                        fib6_add_gc_list(rt);
                }

                spin_unlock_bh(&table->tb6_lock);

                fib6_info_release(rt);
        }
        return 0;
}
#endif

/*
 *        Misc support functions
 */

/* called with rcu_lock held */
static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
{
        struct net_device *dev = res->nh->fib_nh_dev;

        if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
                /* for copies of local routes, dst->dev needs to be the
                 * device if it is a master device, the master device if
                 * device is enslaved, and the loopback as the default
                 */
                if (netif_is_l3_slave(dev) &&
                    !rt6_need_strict(&res->f6i->fib6_dst.addr))
                        dev = l3mdev_master_dev_rcu(dev);
                else if (!netif_is_l3_master(dev))
                        dev = dev_net(dev)->loopback_dev;
                /* last case is netif_is_l3_master(dev) is true in which
                 * case we want dev returned to be dev
                 */
        }

        return dev;
}

static const int fib6_prop[RTN_MAX + 1] = {
        [RTN_UNSPEC]        = 0,
        [RTN_UNICAST]        = 0,
        [RTN_LOCAL]        = 0,
        [RTN_BROADCAST]        = 0,
        [RTN_ANYCAST]        = 0,
        [RTN_MULTICAST]        = 0,
        [RTN_BLACKHOLE]        = -EINVAL,
        [RTN_UNREACHABLE] = -EHOSTUNREACH,
        [RTN_PROHIBIT]        = -EACCES,
        [RTN_THROW]        = -EAGAIN,
        [RTN_NAT]        = -EINVAL,
        [RTN_XRESOLVE]        = -EINVAL,
};

static int ip6_rt_type_to_error(u8 fib6_type)
{
        return fib6_prop[fib6_type];
}

static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
{
        unsigned short flags = 0;

        if (rt->dst_nocount)
                flags |= DST_NOCOUNT;
        if (rt->dst_nopolicy)
                flags |= DST_NOPOLICY;

        return flags;
}

static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
{
        rt->dst.error = ip6_rt_type_to_error(fib6_type);

        switch (fib6_type) {
        case RTN_BLACKHOLE:
                rt->dst.output = dst_discard_out;
                rt->dst.input = dst_discard;
                break;
        case RTN_PROHIBIT:
                rt->dst.output = ip6_pkt_prohibit_out;
                rt->dst.input = ip6_pkt_prohibit;
                break;
        case RTN_THROW:
        case RTN_UNREACHABLE:
        default:
                rt->dst.output = ip6_pkt_discard_out;
                rt->dst.input = ip6_pkt_discard;
                break;
        }
}

static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
{
        struct fib6_info *f6i = res->f6i;

        if (res->fib6_flags & RTF_REJECT) {
                ip6_rt_init_dst_reject(rt, res->fib6_type);
                return;
        }

        rt->dst.error = 0;
        rt->dst.output = ip6_output;

        if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
                rt->dst.input = ip6_input;
        } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
                rt->dst.input = ip6_mc_input;
        } else {
                rt->dst.input = ip6_forward;
        }

        if (res->nh->fib_nh_lws) {
                rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
                lwtunnel_set_redirect(&rt->dst);
        }

        rt->dst.lastuse = jiffies;
}

/* Caller must already hold reference to @from */
static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
        rt->rt6i_flags &= ~RTF_EXPIRES;
        rcu_assign_pointer(rt->from, from);
        ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}

/* Caller must already hold reference to f6i in result */
static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
{
        const struct fib6_nh *nh = res->nh;
        const struct net_device *dev = nh->fib_nh_dev;
        struct fib6_info *f6i = res->f6i;

        ip6_rt_init_dst(rt, res);

        rt->rt6i_dst = f6i->fib6_dst;
        rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
        rt->rt6i_flags = res->fib6_flags;
        if (nh->fib_nh_gw_family) {
                rt->rt6i_gateway = nh->fib_nh_gw6;
                rt->rt6i_flags |= RTF_GATEWAY;
        }
        rt6_set_from(rt, f6i);
#ifdef CONFIG_IPV6_SUBTREES
        rt->rt6i_src = f6i->fib6_src;
#endif
}

static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
                                        struct in6_addr *saddr)
{
        struct fib6_node *pn, *sn;
        while (1) {
                if (fn->fn_flags & RTN_TL_ROOT)
                        return NULL;
                pn = rcu_dereference(fn->parent);
                sn = FIB6_SUBTREE(pn);
                if (sn && sn != fn)
                        fn = fib6_node_lookup(sn, NULL, saddr);
                else
                        fn = pn;
                if (fn->fn_flags & RTN_RTINFO)
                        return fn;
        }
}

static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
{
        struct rt6_info *rt = *prt;

        if (dst_hold_safe(&rt->dst))
                return true;
        if (net) {
                rt = net->ipv6.ip6_null_entry;
                dst_hold(&rt->dst);
        } else {
                rt = NULL;
        }
        *prt = rt;
        return false;
}

/* called with rcu_lock held */
static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
{
        struct net_device *dev = res->nh->fib_nh_dev;
        struct fib6_info *f6i = res->f6i;
        unsigned short flags;
        struct rt6_info *nrt;

        if (!fib6_info_hold_safe(f6i))
                goto fallback;

        flags = fib6_info_dst_flags(f6i);
        nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
        if (!nrt) {
                fib6_info_release(f6i);
                goto fallback;
        }

        ip6_rt_copy_init(nrt, res);
        return nrt;

fallback:
        nrt = dev_net(dev)->ipv6.ip6_null_entry;
        dst_hold(&nrt->dst);
        return nrt;
}

INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags)
{
        struct fib6_result res = {};
        struct fib6_node *fn;
        struct rt6_info *rt;

        rcu_read_lock();
        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
        res.f6i = rcu_dereference(fn->leaf);
        if (!res.f6i)
                res.f6i = net->ipv6.fib6_null_entry;
        else
                rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
                                 flags);

        if (res.f6i == net->ipv6.fib6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
                        goto restart;

                rt = net->ipv6.ip6_null_entry;
                dst_hold(&rt->dst);
                goto out;
        } else if (res.fib6_flags & RTF_REJECT) {
                goto do_create;
        }

        fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
                         fl6->flowi6_oif != 0, skb, flags);

        /* Search through exception table */
        rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
        if (rt) {
                if (ip6_hold_safe(net, &rt))
                        dst_use_noref(&rt->dst, jiffies);
        } else {
do_create:
                rt = ip6_create_rt_rcu(&res);
        }

out:
        trace_fib6_table_lookup(net, &res, table, fl6);

        rcu_read_unlock();

        return rt;
}

struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb, int flags)
{
        return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);

struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                            const struct in6_addr *saddr, int oif,
                            const struct sk_buff *skb, int strict)
{
        struct flowi6 fl6 = {
                .flowi6_oif = oif,
                .daddr = *daddr,
        };
        struct dst_entry *dst;
        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;

        if (saddr) {
                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        }

        dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
        if (dst->error == 0)
                return (struct rt6_info *) dst;

        dst_release(dst);

        return NULL;
}
EXPORT_SYMBOL(rt6_lookup);

/* ip6_ins_rt is called with FREE table->tb6_lock.
 * It takes new route entry, the addition fails by any reason the
 * route is released.
 * Caller must hold dst before calling it.
 */

static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
                        struct netlink_ext_ack *extack)
{
        int err;
        struct fib6_table *table;

        table = rt->fib6_table;
        spin_lock_bh(&table->tb6_lock);
        err = fib6_add(&table->tb6_root, rt, info, extack);
        spin_unlock_bh(&table->tb6_lock);

        return err;
}

int ip6_ins_rt(struct net *net, struct fib6_info *rt)
{
        struct nl_info info = {        .nl_net = net, };

        return __ip6_ins_rt(rt, &info, NULL);
}

static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
                                           const struct in6_addr *daddr,
                                           const struct in6_addr *saddr)
{
        struct fib6_info *f6i = res->f6i;
        struct net_device *dev;
        struct rt6_info *rt;

        /*
         *        Clone the route.
         */

        if (!fib6_info_hold_safe(f6i))
                return NULL;

        dev = ip6_rt_get_dev_rcu(res);
        rt = ip6_dst_alloc(dev_net(dev), dev, 0);
        if (!rt) {
                fib6_info_release(f6i);
                return NULL;
        }

        ip6_rt_copy_init(rt, res);
        rt->rt6i_flags |= RTF_CACHE;
        rt->rt6i_dst.addr = *daddr;
        rt->rt6i_dst.plen = 128;

        if (!rt6_is_gw_or_nonexthop(res)) {
                if (f6i->fib6_dst.plen != 128 &&
                    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
                        rt->rt6i_flags |= RTF_ANYCAST;
#ifdef CONFIG_IPV6_SUBTREES
                if (rt->rt6i_src.plen && saddr) {
                        rt->rt6i_src.addr = *saddr;
                        rt->rt6i_src.plen = 128;
                }
#endif
        }

        return rt;
}

static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
{
        struct fib6_info *f6i = res->f6i;
        unsigned short flags = fib6_info_dst_flags(f6i);
        struct net_device *dev;
        struct rt6_info *pcpu_rt;

        if (!fib6_info_hold_safe(f6i))
                return NULL;

        rcu_read_lock();
        dev = ip6_rt_get_dev_rcu(res);
        pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
        rcu_read_unlock();
        if (!pcpu_rt) {
                fib6_info_release(f6i);
                return NULL;
        }
        ip6_rt_copy_init(pcpu_rt, res);
        pcpu_rt->rt6i_flags |= RTF_PCPU;

        if (f6i->nh)
                pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));

        return pcpu_rt;
}

static bool rt6_is_valid(const struct rt6_info *rt6)
{
        return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
}

/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
        struct rt6_info *pcpu_rt;

        pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);

        if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
                struct rt6_info *prev, **p;

                p = this_cpu_ptr(res->nh->rt6i_pcpu);
                prev = xchg(p, NULL);
                if (prev) {
                        dst_dev_put(&prev->dst);
                        dst_release(&prev->dst);
                }

                pcpu_rt = NULL;
        }

        return pcpu_rt;
}

static struct rt6_info *rt6_make_pcpu_route(struct net *net,
                                            const struct fib6_result *res)
{
        struct rt6_info *pcpu_rt, *prev, **p;

        pcpu_rt = ip6_rt_pcpu_alloc(res);
        if (!pcpu_rt)
                return NULL;

        p = this_cpu_ptr(res->nh->rt6i_pcpu);
        prev = cmpxchg(p, NULL, pcpu_rt);
        BUG_ON(prev);

        if (res->f6i->fib6_destroying) {
                struct fib6_info *from;

                from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
                fib6_info_release(from);
        }

        return pcpu_rt;
}

/* exception hash table implementation
 */
static DEFINE_SPINLOCK(rt6_exception_lock);

/* Remove rt6_ex from hash table and free the memory
 * Caller must hold rt6_exception_lock
 */
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
                                 struct rt6_exception *rt6_ex)
{
        struct fib6_info *from;
        struct net *net;

        if (!bucket || !rt6_ex)
                return;

        net = dev_net(rt6_ex->rt6i->dst.dev);
        net->ipv6.rt6_stats->fib_rt_cache--;

        /* purge completely the exception to allow releasing the held resources:
         * some [sk] cache may keep the dst around for unlimited time
         */
        from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
        fib6_info_release(from);
        dst_dev_put(&rt6_ex->rt6i->dst);

        hlist_del_rcu(&rt6_ex->hlist);
        dst_release(&rt6_ex->rt6i->dst);
        kfree_rcu(rt6_ex, rcu);
        WARN_ON_ONCE(!bucket->depth);
        bucket->depth--;
}

/* Remove oldest rt6_ex in bucket and free the memory
 * Caller must hold rt6_exception_lock
 */
static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
{
        struct rt6_exception *rt6_ex, *oldest = NULL;

        if (!bucket)
                return;

        hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
                if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
                        oldest = rt6_ex;
        }
        rt6_remove_exception(bucket, oldest);
}

static u32 rt6_exception_hash(const struct in6_addr *dst,
                              const struct in6_addr *src)
{
        static siphash_aligned_key_t rt6_exception_key;
        struct {
                struct in6_addr dst;
                struct in6_addr src;
        } __aligned(SIPHASH_ALIGNMENT) combined = {
                .dst = *dst,
        };
        u64 val;

        net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));

#ifdef CONFIG_IPV6_SUBTREES
        if (src)
                combined.src = *src;
#endif
        val = siphash(&combined, sizeof(combined), &rt6_exception_key);

        return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}

/* Helper function to find the cached rt in the hash table
 * and update bucket pointer to point to the bucket for this
 * (daddr, saddr) pair
 * Caller must hold rt6_exception_lock
 */
static struct rt6_exception *
__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
                              const struct in6_addr *daddr,
                              const struct in6_addr *saddr)
{
        struct rt6_exception *rt6_ex;
        u32 hval;

        if (!(*bucket) || !daddr)
                return NULL;

        hval = rt6_exception_hash(daddr, saddr);
        *bucket += hval;

        hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
                struct rt6_info *rt6 = rt6_ex->rt6i;
                bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);

#ifdef CONFIG_IPV6_SUBTREES
                if (matched && saddr)
                        matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
#endif
                if (matched)
                        return rt6_ex;
        }
        return NULL;
}

/* Helper function to find the cached rt in the hash table
 * and update bucket pointer to point to the bucket for this
 * (daddr, saddr) pair
 * Caller must hold rcu_read_lock()
 */
static struct rt6_exception *
__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
                         const struct in6_addr *daddr,
                         const struct in6_addr *saddr)
{
        struct rt6_exception *rt6_ex;
        u32 hval;

        WARN_ON_ONCE(!rcu_read_lock_held());

        if (!(*bucket) || !daddr)
                return NULL;

        hval = rt6_exception_hash(daddr, saddr);
        *bucket += hval;

        hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
                struct rt6_info *rt6 = rt6_ex->rt6i;
                bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);

#ifdef CONFIG_IPV6_SUBTREES
                if (matched && saddr)
                        matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
#endif
                if (matched)
                        return rt6_ex;
        }
        return NULL;
}

static unsigned int fib6_mtu(const struct fib6_result *res)
{
        const struct fib6_nh *nh = res->nh;
        unsigned int mtu;

        if (res->f6i->fib6_pmtu) {
                mtu = res->f6i->fib6_pmtu;
        } else {
                struct net_device *dev = nh->fib_nh_dev;
                struct inet6_dev *idev;

                rcu_read_lock();
                idev = __in6_dev_get(dev);
                mtu = READ_ONCE(idev->cnf.mtu6);
                rcu_read_unlock();
        }

        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);

        return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}

#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL

/* used when the flushed bit is not relevant, only access to the bucket
 * (ie., all bucket users except rt6_insert_exception);
 *
 * called under rcu lock; sometimes called with rt6_exception_lock held
 */
static
struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
                                                       spinlock_t *lock)
{
        struct rt6_exception_bucket *bucket;

        if (lock)
                bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
                                                   lockdep_is_held(lock));
        else
                bucket = rcu_dereference(nh->rt6i_exception_bucket);

        /* remove bucket flushed bit if set */
        if (bucket) {
                unsigned long p = (unsigned long)bucket;

                p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
                bucket = (struct rt6_exception_bucket *)p;
        }

        return bucket;
}

static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
{
        unsigned long p = (unsigned long)bucket;

        return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
}

/* called with rt6_exception_lock held */
static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
                                              spinlock_t *lock)
{
        struct rt6_exception_bucket *bucket;
        unsigned long p;

        bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
                                           lockdep_is_held(lock));

        p = (unsigned long)bucket;
        p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
        bucket = (struct rt6_exception_bucket *)p;
        rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
}

static int rt6_insert_exception(struct rt6_info *nrt,
                                const struct fib6_result *res)
{
        struct net *net = dev_net(nrt->dst.dev);
        struct rt6_exception_bucket *bucket;
        struct fib6_info *f6i = res->f6i;
        struct in6_addr *src_key = NULL;
        struct rt6_exception *rt6_ex;
        struct fib6_nh *nh = res->nh;
        int max_depth;
        int err = 0;

        spin_lock_bh(&rt6_exception_lock);

        bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
                                          lockdep_is_held(&rt6_exception_lock));
        if (!bucket) {
                bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
                                 GFP_ATOMIC);
                if (!bucket) {
                        err = -ENOMEM;
                        goto out;
                }
                rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
        } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
                err = -EINVAL;
                goto out;
        }

#ifdef CONFIG_IPV6_SUBTREES
        /* fib6_src.plen != 0 indicates f6i is in subtree
         * and exception table is indexed by a hash of
         * both fib6_dst and fib6_src.
         * Otherwise, the exception table is indexed by
         * a hash of only fib6_dst.
         */
        if (f6i->fib6_src.plen)
                src_key = &nrt->rt6i_src.addr;
#endif
        /* rt6_mtu_change() might lower mtu on f6i.
         * Only insert this exception route if its mtu
         * is less than f6i's mtu value.
         */
        if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
                err = -EINVAL;
                goto out;
        }

        rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
                                               src_key);
        if (rt6_ex)
                rt6_remove_exception(bucket, rt6_ex);

        rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
        if (!rt6_ex) {
                err = -ENOMEM;
                goto out;
        }
        rt6_ex->rt6i = nrt;
        rt6_ex->stamp = jiffies;
        hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
        bucket->depth++;
        net->ipv6.rt6_stats->fib_rt_cache++;

        /* Randomize max depth to avoid some side channels attacks. */
        max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
        while (bucket->depth > max_depth)
                rt6_exception_remove_oldest(bucket);

out:
        spin_unlock_bh(&rt6_exception_lock);

        /* Update fn->fn_sernum to invalidate all cached dst */
        if (!err) {
                spin_lock_bh(&f6i->fib6_table->tb6_lock);
                fib6_update_sernum(net, f6i);
                spin_unlock_bh(&f6i->fib6_table->tb6_lock);
                fib6_force_start_gc(net);
        }

        return err;
}

static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        struct hlist_node *tmp;
        int i;

        spin_lock_bh(&rt6_exception_lock);

        bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
        if (!bucket)
                goto out;

        /* Prevent rt6_insert_exception() to recreate the bucket list */
        if (!from)
                fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);

        for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
                hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
                        if (!from ||
                            rcu_access_pointer(rt6_ex->rt6i->from) == from)
                                rt6_remove_exception(bucket, rt6_ex);
                }
                WARN_ON_ONCE(!from && bucket->depth);
                bucket++;
        }
out:
        spin_unlock_bh(&rt6_exception_lock);
}

static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
{
        struct fib6_info *f6i = arg;

        fib6_nh_flush_exceptions(nh, f6i);

        return 0;
}

void rt6_flush_exceptions(struct fib6_info *f6i)
{
        if (f6i->nh)
                nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
                                         f6i);
        else
                fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
}

/* Find cached rt in the hash table inside passed in rt
 * Caller has to hold rcu_read_lock()
 */
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
                                           const struct in6_addr *daddr,
                                           const struct in6_addr *saddr)
{
        const struct in6_addr *src_key = NULL;
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        struct rt6_info *ret = NULL;

#ifdef CONFIG_IPV6_SUBTREES
        /* fib6i_src.plen != 0 indicates f6i is in subtree
         * and exception table is indexed by a hash of
         * both fib6_dst and fib6_src.
         * However, the src addr used to create the hash
         * might not be exactly the passed in saddr which
         * is a /128 addr from the flow.
         * So we need to use f6i->fib6_src to redo lookup
         * if the passed in saddr does not find anything.
         * (See the logic in ip6_rt_cache_alloc() on how
         * rt->rt6i_src is updated.)
         */
        if (res->f6i->fib6_src.plen)
                src_key = saddr;
find_ex:
#endif
        bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
        rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);

        if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
                ret = rt6_ex->rt6i;

#ifdef CONFIG_IPV6_SUBTREES
        /* Use fib6_src as src_key and redo lookup */
        if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
                src_key = &res->f6i->fib6_src.addr;
                goto find_ex;
        }
#endif

        return ret;
}

/* Remove the passed in cached rt from the hash table that contains it */
static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
                                    const struct rt6_info *rt)
{
        const struct in6_addr *src_key = NULL;
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        int err;

        if (!rcu_access_pointer(nh->rt6i_exception_bucket))
                return -ENOENT;

        spin_lock_bh(&rt6_exception_lock);
        bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);

#ifdef CONFIG_IPV6_SUBTREES
        /* rt6i_src.plen != 0 indicates 'from' is in subtree
         * and exception table is indexed by a hash of
         * both rt6i_dst and rt6i_src.
         * Otherwise, the exception table is indexed by
         * a hash of only rt6i_dst.
         */
        if (plen)
                src_key = &rt->rt6i_src.addr;
#endif
        rt6_ex = __rt6_find_exception_spinlock(&bucket,
                                               &rt->rt6i_dst.addr,
                                               src_key);
        if (rt6_ex) {
                rt6_remove_exception(bucket, rt6_ex);
                err = 0;
        } else {
                err = -ENOENT;
        }

        spin_unlock_bh(&rt6_exception_lock);
        return err;
}

struct fib6_nh_excptn_arg {
        struct rt6_info        *rt;
        int                plen;
};

static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_excptn_arg *arg = _arg;
        int err;

        err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
        if (err == 0)
                return 1;

        return 0;
}

static int rt6_remove_exception_rt(struct rt6_info *rt)
{
        struct fib6_info *from;

        from = rcu_dereference(rt->from);
        if (!from || !(rt->rt6i_flags & RTF_CACHE))
                return -EINVAL;

        if (from->nh) {
                struct fib6_nh_excptn_arg arg = {
                        .rt = rt,
                        .plen = from->fib6_src.plen
                };
                int rc;

                /* rc = 1 means an entry was found */
                rc = nexthop_for_each_fib6_nh(from->nh,
                                              rt6_nh_remove_exception_rt,
                                              &arg);
                return rc ? 0 : -ENOENT;
        }

        return fib6_nh_remove_exception(from->fib6_nh,
                                        from->fib6_src.plen, rt);
}

/* Find rt6_ex which contains the passed in rt cache and
 * refresh its stamp
 */
static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
                                     const struct rt6_info *rt)
{
        const struct in6_addr *src_key = NULL;
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;

        bucket = fib6_nh_get_excptn_bucket(nh, NULL);
#ifdef CONFIG_IPV6_SUBTREES
        /* rt6i_src.plen != 0 indicates 'from' is in subtree
         * and exception table is indexed by a hash of
         * both rt6i_dst and rt6i_src.
         * Otherwise, the exception table is indexed by
         * a hash of only rt6i_dst.
         */
        if (plen)
                src_key = &rt->rt6i_src.addr;
#endif
        rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
        if (rt6_ex)
                rt6_ex->stamp = jiffies;
}

struct fib6_nh_match_arg {
        const struct net_device *dev;
        const struct in6_addr        *gw;
        struct fib6_nh                *match;
};

/* determine if fib6_nh has given device and gateway */
static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_match_arg *arg = _arg;

        if (arg->dev != nh->fib_nh_dev ||
            (arg->gw && !nh->fib_nh_gw_family) ||
            (!arg->gw && nh->fib_nh_gw_family) ||
            (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
                return 0;

        arg->match = nh;

        /* found a match, break the loop */
        return 1;
}

static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
        struct fib6_info *from;
        struct fib6_nh *fib6_nh;

        rcu_read_lock();

        from = rcu_dereference(rt->from);
        if (!from || !(rt->rt6i_flags & RTF_CACHE))
                goto unlock;

        if (from->nh) {
                struct fib6_nh_match_arg arg = {
                        .dev = rt->dst.dev,
                        .gw = &rt->rt6i_gateway,
                };

                nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);

                if (!arg.match)
                        goto unlock;
                fib6_nh = arg.match;
        } else {
                fib6_nh = from->fib6_nh;
        }
        fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
unlock:
        rcu_read_unlock();
}

static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
                                         struct rt6_info *rt, int mtu)
{
        /* If the new MTU is lower than the route PMTU, this new MTU will be the
         * lowest MTU in the path: always allow updating the route PMTU to
         * reflect PMTU decreases.
         *
         * If the new MTU is higher, and the route PMTU is equal to the local
         * MTU, this means the old MTU is the lowest in the path, so allow
         * updating it: if other nodes now have lower MTUs, PMTU discovery will
         * handle this.
         */

        if (dst_mtu(&rt->dst) >= mtu)
                return true;

        if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
                return true;

        return false;
}

static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
                                       const struct fib6_nh *nh, int mtu)
{
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        int i;

        bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
        if (!bucket)
                return;

        for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
                hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
                        struct rt6_info *entry = rt6_ex->rt6i;

                        /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
                         * route), the metrics of its rt->from have already
                         * been updated.
                         */
                        if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
                            rt6_mtu_change_route_allowed(idev, entry, mtu))
                                dst_metric_set(&entry->dst, RTAX_MTU, mtu);
                }
                bucket++;
        }
}

#define RTF_CACHE_GATEWAY        (RTF_GATEWAY | RTF_CACHE)

static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
                                            const struct in6_addr *gateway)
{
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        struct hlist_node *tmp;
        int i;

        if (!rcu_access_pointer(nh->rt6i_exception_bucket))
                return;

        spin_lock_bh(&rt6_exception_lock);
        bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
        if (bucket) {
                for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
                        hlist_for_each_entry_safe(rt6_ex, tmp,
                                                  &bucket->chain, hlist) {
                                struct rt6_info *entry = rt6_ex->rt6i;

                                if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
                                    RTF_CACHE_GATEWAY &&
                                    ipv6_addr_equal(gateway,
                                                    &entry->rt6i_gateway)) {
                                        rt6_remove_exception(bucket, rt6_ex);
                                }
                        }
                        bucket++;
                }
        }

        spin_unlock_bh(&rt6_exception_lock);
}

static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
                                      struct rt6_exception *rt6_ex,
                                      struct fib6_gc_args *gc_args,
                                      unsigned long now)
{
        struct rt6_info *rt = rt6_ex->rt6i;

        /* we are pruning and obsoleting aged-out and non gateway exceptions
         * even if others have still references to them, so that on next
         * dst_check() such references can be dropped.
         * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
         * expired, independently from their aging, as per RFC 8201 section 4
         */
        if (!(rt->rt6i_flags & RTF_EXPIRES)) {
                if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
                        pr_debug("aging clone %p\n", rt);
                        rt6_remove_exception(bucket, rt6_ex);
                        return;
                }
        } else if (time_after(jiffies, rt->dst.expires)) {
                pr_debug("purging expired route %p\n", rt);
                rt6_remove_exception(bucket, rt6_ex);
                return;
        }

        if (rt->rt6i_flags & RTF_GATEWAY) {
                struct neighbour *neigh;

                neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);

                if (!(neigh && (neigh->flags & NTF_ROUTER))) {
                        pr_debug("purging route %p via non-router but gateway\n",
                                 rt);
                        rt6_remove_exception(bucket, rt6_ex);
                        return;
                }
        }

        gc_args->more++;
}

static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
                                   struct fib6_gc_args *gc_args,
                                   unsigned long now)
{
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        struct hlist_node *tmp;
        int i;

        if (!rcu_access_pointer(nh->rt6i_exception_bucket))
                return;

        rcu_read_lock_bh();
        spin_lock(&rt6_exception_lock);
        bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
        if (bucket) {
                for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
                        hlist_for_each_entry_safe(rt6_ex, tmp,
                                                  &bucket->chain, hlist) {
                                rt6_age_examine_exception(bucket, rt6_ex,
                                                          gc_args, now);
                        }
                        bucket++;
                }
        }
        spin_unlock(&rt6_exception_lock);
        rcu_read_unlock_bh();
}

struct fib6_nh_age_excptn_arg {
        struct fib6_gc_args        *gc_args;
        unsigned long                now;
};

static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_age_excptn_arg *arg = _arg;

        fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
        return 0;
}

void rt6_age_exceptions(struct fib6_info *f6i,
                        struct fib6_gc_args *gc_args,
                        unsigned long now)
{
        if (f6i->nh) {
                struct fib6_nh_age_excptn_arg arg = {
                        .gc_args = gc_args,
                        .now = now
                };

                nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
                                         &arg);
        } else {
                fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
        }
}

/* must be called with rcu lock held */
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
                      struct flowi6 *fl6, struct fib6_result *res, int strict)
{
        struct fib6_node *fn, *saved_fn;

        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
        saved_fn = fn;

redo_rt6_select:
        rt6_select(net, fn, oif, res, strict);
        if (res->f6i == net->ipv6.fib6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
                        goto redo_rt6_select;
                else if (strict & RT6_LOOKUP_F_REACHABLE) {
                        /* also consider unreachable route */
                        strict &= ~RT6_LOOKUP_F_REACHABLE;
                        fn = saved_fn;
                        goto redo_rt6_select;
                }
        }

        trace_fib6_table_lookup(net, res, table, fl6);

        return 0;
}

struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
                               int oif, struct flowi6 *fl6,
                               const struct sk_buff *skb, int flags)
{
        struct fib6_result res = {};
        struct rt6_info *rt = NULL;
        int strict = 0;

        WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
                     !rcu_read_lock_held());

        strict |= flags & RT6_LOOKUP_F_IFACE;
        strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
        if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
                strict |= RT6_LOOKUP_F_REACHABLE;

        rcu_read_lock();

        fib6_table_lookup(net, table, oif, fl6, &res, strict);
        if (res.f6i == net->ipv6.fib6_null_entry)
                goto out;

        fib6_select_path(net, &res, fl6, oif, false, skb, strict);

        /*Search through exception table */
        rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
        if (rt) {
                goto out;
        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
                            !res.nh->fib_nh_gw_family)) {
                /* Create a RTF_CACHE clone which will not be
                 * owned by the fib6 tree.  It is for the special case where
                 * the daddr in the skb during the neighbor look-up is different
                 * from the fl6->daddr used to look-up route here.
                 */
                rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);

                if (rt) {
                        /* 1 refcnt is taken during ip6_rt_cache_alloc().
                         * As rt6_uncached_list_add() does not consume refcnt,
                         * this refcnt is always returned to the caller even
                         * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
                         */
                        rt6_uncached_list_add(rt);
                        rcu_read_unlock();

                        return rt;
                }
        } else {
                /* Get a percpu copy */
                local_bh_disable();
                rt = rt6_get_pcpu_route(&res);

                if (!rt)
                        rt = rt6_make_pcpu_route(net, &res);

                local_bh_enable();
        }
out:
        if (!rt)
                rt = net->ipv6.ip6_null_entry;
        if (!(flags & RT6_LOOKUP_F_DST_NOREF))
                ip6_hold_safe(net, &rt);
        rcu_read_unlock();

        return rt;
}
EXPORT_SYMBOL_GPL(ip6_pol_route);

INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
                                            struct fib6_table *table,
                                            struct flowi6 *fl6,
                                            const struct sk_buff *skb,
                                            int flags)
{
        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}

struct dst_entry *ip6_route_input_lookup(struct net *net,
                                         struct net_device *dev,
                                         struct flowi6 *fl6,
                                         const struct sk_buff *skb,
                                         int flags)
{
        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;

        return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);

static void ip6_multipath_l3_keys(const struct sk_buff *skb,
                                  struct flow_keys *keys,
                                  struct flow_keys *flkeys)
{
        const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
        const struct ipv6hdr *key_iph = outer_iph;
        struct flow_keys *_flkeys = flkeys;
        const struct ipv6hdr *inner_iph;
        const struct icmp6hdr *icmph;
        struct ipv6hdr _inner_iph;
        struct icmp6hdr _icmph;

        if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
                goto out;

        icmph = skb_header_pointer(skb, skb_transport_offset(skb),
                                   sizeof(_icmph), &_icmph);
        if (!icmph)
                goto out;

        if (!icmpv6_is_err(icmph->icmp6_type))
                goto out;

        inner_iph = skb_header_pointer(skb,
                                       skb_transport_offset(skb) + sizeof(*icmph),
                                       sizeof(_inner_iph), &_inner_iph);
        if (!inner_iph)
                goto out;

        key_iph = inner_iph;
        _flkeys = NULL;
out:
        if (_flkeys) {
                keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
                keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
                keys->tags.flow_label = _flkeys->tags.flow_label;
                keys->basic.ip_proto = _flkeys->basic.ip_proto;
        } else {
                keys->addrs.v6addrs.src = key_iph->saddr;
                keys->addrs.v6addrs.dst = key_iph->daddr;
                keys->tags.flow_label = ip6_flowlabel(key_iph);
                keys->basic.ip_proto = key_iph->nexthdr;
        }
}

static u32 rt6_multipath_custom_hash_outer(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool *p_has_inner)
{
        u32 hash_fields = ip6_multipath_hash_fields(net);
        struct flow_keys keys, hash_keys;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);

        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
                hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
                hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
                hash_keys.basic.ip_proto = keys.basic.ip_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
                hash_keys.tags.flow_label = keys.tags.flow_label;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
                hash_keys.ports.src = keys.ports.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
                hash_keys.ports.dst = keys.ports.dst;

        *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
        return flow_hash_from_keys(&hash_keys);
}

static u32 rt6_multipath_custom_hash_inner(const struct net *net,
                                           const struct sk_buff *skb,
                                           bool has_inner)
{
        u32 hash_fields = ip6_multipath_hash_fields(net);
        struct flow_keys keys, hash_keys;

        /* We assume the packet carries an encapsulation, but if none was
         * encountered during dissection of the outer flow, then there is no
         * point in calling the flow dissector again.
         */
        if (!has_inner)
                return 0;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        skb_flow_dissect_flow_keys(skb, &keys, 0);

        if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
                return 0;

        if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
                        hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
                        hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
        } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
                        hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
                        hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
                if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
                        hash_keys.tags.flow_label = keys.tags.flow_label;
        }

        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
                hash_keys.basic.ip_proto = keys.basic.ip_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
                hash_keys.ports.src = keys.ports.src;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
                hash_keys.ports.dst = keys.ports.dst;

        return flow_hash_from_keys(&hash_keys);
}

static u32 rt6_multipath_custom_hash_skb(const struct net *net,
                                         const struct sk_buff *skb)
{
        u32 mhash, mhash_inner;
        bool has_inner = true;

        mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
        mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);

        return jhash_2words(mhash, mhash_inner, 0);
}

static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
                                         const struct flowi6 *fl6)
{
        u32 hash_fields = ip6_multipath_hash_fields(net);
        struct flow_keys hash_keys;

        if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
                return 0;

        memset(&hash_keys, 0, sizeof(hash_keys));
        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
                hash_keys.addrs.v6addrs.src = fl6->saddr;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
                hash_keys.addrs.v6addrs.dst = fl6->daddr;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
                hash_keys.basic.ip_proto = fl6->flowi6_proto;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
                hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
                hash_keys.ports.src = fl6->fl6_sport;
        if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
                hash_keys.ports.dst = fl6->fl6_dport;

        return flow_hash_from_keys(&hash_keys);
}

/* if skb is set it will be used and fl6 can be NULL */
u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
                       const struct sk_buff *skb, struct flow_keys *flkeys)
{
        struct flow_keys hash_keys;
        u32 mhash = 0;

        switch (ip6_multipath_hash_policy(net)) {
        case 0:
                memset(&hash_keys, 0, sizeof(hash_keys));
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                if (skb) {
                        ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
                } else {
                        hash_keys.addrs.v6addrs.src = fl6->saddr;
                        hash_keys.addrs.v6addrs.dst = fl6->daddr;
                        hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
                        hash_keys.basic.ip_proto = fl6->flowi6_proto;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 1:
                if (skb) {
                        unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
                        struct flow_keys keys;

                        /* short-circuit if we already have L4 hash present */
                        if (skb->l4_hash)
                                return skb_get_hash_raw(skb) >> 1;

                        memset(&hash_keys, 0, sizeof(hash_keys));

                        if (!flkeys) {
                                skb_flow_dissect_flow_keys(skb, &keys, flag);
                                flkeys = &keys;
                        }
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                        hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
                        hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
                        hash_keys.ports.src = flkeys->ports.src;
                        hash_keys.ports.dst = flkeys->ports.dst;
                        hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
                } else {
                        memset(&hash_keys, 0, sizeof(hash_keys));
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                        hash_keys.addrs.v6addrs.src = fl6->saddr;
                        hash_keys.addrs.v6addrs.dst = fl6->daddr;
                        hash_keys.ports.src = fl6->fl6_sport;
                        hash_keys.ports.dst = fl6->fl6_dport;
                        hash_keys.basic.ip_proto = fl6->flowi6_proto;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 2:
                memset(&hash_keys, 0, sizeof(hash_keys));
                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                if (skb) {
                        struct flow_keys keys;

                        if (!flkeys) {
                                skb_flow_dissect_flow_keys(skb, &keys, 0);
                                flkeys = &keys;
                        }

                        /* Inner can be v4 or v6 */
                        if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                                hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
                                hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
                        } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                                hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
                                hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
                                hash_keys.tags.flow_label = flkeys->tags.flow_label;
                                hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
                        } else {
                                /* Same as case 0 */
                                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                                ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
                        }
                } else {
                        /* Same as case 0 */
                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                        hash_keys.addrs.v6addrs.src = fl6->saddr;
                        hash_keys.addrs.v6addrs.dst = fl6->daddr;
                        hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
                        hash_keys.basic.ip_proto = fl6->flowi6_proto;
                }
                mhash = flow_hash_from_keys(&hash_keys);
                break;
        case 3:
                if (skb)
                        mhash = rt6_multipath_custom_hash_skb(net, skb);
                else
                        mhash = rt6_multipath_custom_hash_fl6(net, fl6);
                break;
        }

        return mhash >> 1;
}

/* Called with rcu held */
void ip6_route_input(struct sk_buff *skb)
{
        const struct ipv6hdr *iph = ipv6_hdr(skb);
        struct net *net = dev_net(skb->dev);
        int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
        struct ip_tunnel_info *tun_info;
        struct flowi6 fl6 = {
                .flowi6_iif = skb->dev->ifindex,
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
        };
        struct flow_keys *flkeys = NULL, _flkeys;

        tun_info = skb_tunnel_info(skb);
        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;

        if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
                flkeys = &_flkeys;

        if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
                fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
        skb_dst_drop(skb);
        skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
                                                      &fl6, skb, flags));
}

INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags)
{
        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}

static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
                                                      const struct sock *sk,
                                                      struct flowi6 *fl6,
                                                      int flags)
{
        bool any_src;

        if (ipv6_addr_type(&fl6->daddr) &
            (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
                struct dst_entry *dst;

                /* This function does not take refcnt on the dst */
                dst = l3mdev_link_scope_lookup(net, fl6);
                if (dst)
                        return dst;
        }

        fl6->flowi6_iif = LOOPBACK_IFINDEX;

        flags |= RT6_LOOKUP_F_DST_NOREF;
        any_src = ipv6_addr_any(&fl6->saddr);
        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
            (fl6->flowi6_oif && any_src))
                flags |= RT6_LOOKUP_F_IFACE;

        if (!any_src)
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        else if (sk)
                flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));

        return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}

struct dst_entry *ip6_route_output_flags(struct net *net,
                                         const struct sock *sk,
                                         struct flowi6 *fl6,
                                         int flags)
{
        struct dst_entry *dst;
        struct rt6_info *rt6;

        rcu_read_lock();
        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
        rt6 = (struct rt6_info *)dst;
        /* For dst cached in uncached_list, refcnt is already taken. */
        if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
                dst = &net->ipv6.ip6_null_entry->dst;
                dst_hold(dst);
        }
        rcu_read_unlock();

        return dst;
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);

struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
        struct net_device *loopback_dev = net->loopback_dev;
        struct dst_entry *new = NULL;

        rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
                       DST_OBSOLETE_DEAD, 0);
        if (rt) {
                rt6_info_init(rt);
                atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);

                new = &rt->dst;
                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard_out;

                dst_copy_metrics(new, &ort->dst);

                rt->rt6i_idev = in6_dev_get(loopback_dev);
                rt->rt6i_gateway = ort->rt6i_gateway;
                rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;

                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
        }

        dst_release(dst_orig);
        return new ? new : ERR_PTR(-ENOMEM);
}

/*
 *        Destination cache support functions
 */

static bool fib6_check(struct fib6_info *f6i, u32 cookie)
{
        u32 rt_cookie = 0;

        if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
                return false;

        if (fib6_check_expired(f6i))
                return false;

        return true;
}

static struct dst_entry *rt6_check(struct rt6_info *rt,
                                   struct fib6_info *from,
                                   u32 cookie)
{
        u32 rt_cookie = 0;

        if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
            rt_cookie != cookie)
                return NULL;

        if (rt6_check_expired(rt))
                return NULL;

        return &rt->dst;
}

static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
                                            struct fib6_info *from,
                                            u32 cookie)
{
        if (!__rt6_check_expired(rt) &&
            rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
            fib6_check(from, cookie))
                return &rt->dst;
        else
                return NULL;
}

INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
                                                        u32 cookie)
{
        struct dst_entry *dst_ret;
        struct fib6_info *from;
        struct rt6_info *rt;

        rt = container_of(dst, struct rt6_info, dst);

        if (rt->sernum)
                return rt6_is_valid(rt) ? dst : NULL;

        rcu_read_lock();

        /* All IPV6 dsts are created with ->obsolete set to the value
         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
         * into this function always.
         */

        from = rcu_dereference(rt->from);

        if (from && (rt->rt6i_flags & RTF_PCPU ||
            unlikely(!list_empty(&rt->dst.rt_uncached))))
                dst_ret = rt6_dst_from_check(rt, from, cookie);
        else
                dst_ret = rt6_check(rt, from, cookie);

        rcu_read_unlock();

        return dst_ret;
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);

static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
        struct rt6_info *rt = (struct rt6_info *) dst;

        if (rt) {
                if (rt->rt6i_flags & RTF_CACHE) {
                        rcu_read_lock();
                        if (rt6_check_expired(rt)) {
                                rt6_remove_exception_rt(rt);
                                dst = NULL;
                        }
                        rcu_read_unlock();
                } else {
                        dst_release(dst);
                        dst = NULL;
                }
        }
        return dst;
}

static void ip6_link_failure(struct sk_buff *skb)
{
        struct rt6_info *rt;

        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);

        rt = (struct rt6_info *) skb_dst(skb);
        if (rt) {
                rcu_read_lock();
                if (rt->rt6i_flags & RTF_CACHE) {
                        rt6_remove_exception_rt(rt);
                } else {
                        struct fib6_info *from;
                        struct fib6_node *fn;

                        from = rcu_dereference(rt->from);
                        if (from) {
                                fn = rcu_dereference(from->fib6_node);
                                if (fn && (rt->rt6i_flags & RTF_DEFAULT))
                                        WRITE_ONCE(fn->fn_sernum, -1);
                        }
                }
                rcu_read_unlock();
        }
}

static void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
        if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
                struct fib6_info *from;

                rcu_read_lock();
                from = rcu_dereference(rt0->from);
                if (from)
                        rt0->dst.expires = from->expires;
                rcu_read_unlock();
        }

        dst_set_expires(&rt0->dst, timeout);
        rt0->rt6i_flags |= RTF_EXPIRES;
}

static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
{
        struct net *net = dev_net(rt->dst.dev);

        dst_metric_set(&rt->dst, RTAX_MTU, mtu);
        rt->rt6i_flags |= RTF_MODIFIED;
        rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
}

static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
        return !(rt->rt6i_flags & RTF_CACHE) &&
                (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
}

static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
                                 const struct ipv6hdr *iph, u32 mtu,
                                 bool confirm_neigh)
{
        const struct in6_addr *daddr, *saddr;
        struct rt6_info *rt6 = (struct rt6_info *)dst;

        /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
         * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
         * [see also comment in rt6_mtu_change_route()]
         */

        if (iph) {
                daddr = &iph->daddr;
                saddr = &iph->saddr;
        } else if (sk) {
                daddr = &sk->sk_v6_daddr;
                saddr = &inet6_sk(sk)->saddr;
        } else {
                daddr = NULL;
                saddr = NULL;
        }

        if (confirm_neigh)
                dst_confirm_neigh(dst, daddr);

        if (mtu < IPV6_MIN_MTU)
                return;
        if (mtu >= dst_mtu(dst))
                return;

        if (!rt6_cache_allowed_for_pmtu(rt6)) {
                rt6_do_update_pmtu(rt6, mtu);
                /* update rt6_ex->stamp for cache */
                if (rt6->rt6i_flags & RTF_CACHE)
                        rt6_update_exception_stamp_rt(rt6);
        } else if (daddr) {
                struct fib6_result res = {};
                struct rt6_info *nrt6;

                rcu_read_lock();
                res.f6i = rcu_dereference(rt6->from);
                if (!res.f6i)
                        goto out_unlock;

                res.fib6_flags = res.f6i->fib6_flags;
                res.fib6_type = res.f6i->fib6_type;

                if (res.f6i->nh) {
                        struct fib6_nh_match_arg arg = {
                                .dev = dst->dev,
                                .gw = &rt6->rt6i_gateway,
                        };

                        nexthop_for_each_fib6_nh(res.f6i->nh,
                                                 fib6_nh_find_match, &arg);

                        /* fib6_info uses a nexthop that does not have fib6_nh
                         * using the dst->dev + gw. Should be impossible.
                         */
                        if (!arg.match)
                                goto out_unlock;

                        res.nh = arg.match;
                } else {
                        res.nh = res.f6i->fib6_nh;
                }

                nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
                if (nrt6) {
                        rt6_do_update_pmtu(nrt6, mtu);
                        if (rt6_insert_exception(nrt6, &res))
                                dst_release_immediate(&nrt6->dst);
                }
out_unlock:
                rcu_read_unlock();
        }
}

static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
                               struct sk_buff *skb, u32 mtu,
                               bool confirm_neigh)
{
        __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
                             confirm_neigh);
}

void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
                     int oif, u32 mark, kuid_t uid)
{
        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
        struct dst_entry *dst;
        struct flowi6 fl6 = {
                .flowi6_oif = oif,
                .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
                .flowi6_uid = uid,
        };

        dst = ip6_route_output(net, NULL, &fl6);
        if (!dst->error)
                __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
        dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);

void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
        int oif = sk->sk_bound_dev_if;
        struct dst_entry *dst;

        if (!oif && skb->dev)
                oif = l3mdev_master_ifindex(skb->dev);

        ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
                        sk->sk_uid);

        dst = __sk_dst_get(sk);
        if (!dst || !dst->obsolete ||
            dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
                return;

        bh_lock_sock(sk);
        if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
                ip6_datagram_dst_update(sk, false);
        bh_unlock_sock(sk);
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);

void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
                           const struct flowi6 *fl6)
{
#ifdef CONFIG_IPV6_SUBTREES
        struct ipv6_pinfo *np = inet6_sk(sk);
#endif

        ip6_dst_store(sk, dst,
                      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
                      &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
                      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
                      &np->saddr :
#endif
                      NULL);
}

static bool ip6_redirect_nh_match(const struct fib6_result *res,
                                  struct flowi6 *fl6,
                                  const struct in6_addr *gw,
                                  struct rt6_info **ret)
{
        const struct fib6_nh *nh = res->nh;

        if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
            fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
                return false;

        /* rt_cache's gateway might be different from its 'parent'
         * in the case of an ip redirect.
         * So we keep searching in the exception table if the gateway
         * is different.
         */
        if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
                struct rt6_info *rt_cache;

                rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
                if (rt_cache &&
                    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
                        *ret = rt_cache;
                        return true;
                }
                return false;
        }
        return true;
}

struct fib6_nh_rd_arg {
        struct fib6_result        *res;
        struct flowi6                *fl6;
        const struct in6_addr        *gw;
        struct rt6_info                **ret;
};

static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_rd_arg *arg = _arg;

        arg->res->nh = nh;
        return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
}

/* Handle redirects */
struct ip6rd_flowi {
        struct flowi6 fl6;
        struct in6_addr gateway;
};

INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags)
{
        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
        struct rt6_info *ret = NULL;
        struct fib6_result res = {};
        struct fib6_nh_rd_arg arg = {
                .res = &res,
                .fl6 = fl6,
                .gw  = &rdfl->gateway,
                .ret = &ret
        };
        struct fib6_info *rt;
        struct fib6_node *fn;

        /* Get the "current" route for this destination and
         * check if the redirect has come from appropriate router.
         *
         * RFC 4861 specifies that redirects should only be
         * accepted if they come from the nexthop to the target.
         * Due to the way the routes are chosen, this notion
         * is a bit fuzzy and one might need to check all possible
         * routes.
         */

        rcu_read_lock();
        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
        for_each_fib6_node_rt_rcu(fn) {
                res.f6i = rt;
                if (fib6_check_expired(rt))
                        continue;
                if (rt->fib6_flags & RTF_REJECT)
                        break;
                if (unlikely(rt->nh)) {
                        if (nexthop_is_blackhole(rt->nh))
                                continue;
                        /* on match, res->nh is filled in and potentially ret */
                        if (nexthop_for_each_fib6_nh(rt->nh,
                                                     fib6_nh_redirect_match,
                                                     &arg))
                                goto out;
                } else {
                        res.nh = rt->fib6_nh;
                        if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
                                                  &ret))
                                goto out;
                }
        }

        if (!rt)
                rt = net->ipv6.fib6_null_entry;
        else if (rt->fib6_flags & RTF_REJECT) {
                ret = net->ipv6.ip6_null_entry;
                goto out;
        }

        if (rt == net->ipv6.fib6_null_entry) {
                fn = fib6_backtrack(fn, &fl6->saddr);
                if (fn)
                        goto restart;
        }

        res.f6i = rt;
        res.nh = rt->fib6_nh;
out:
        if (ret) {
                ip6_hold_safe(net, &ret);
        } else {
                res.fib6_flags = res.f6i->fib6_flags;
                res.fib6_type = res.f6i->fib6_type;
                ret = ip6_create_rt_rcu(&res);
        }

        rcu_read_unlock();

        trace_fib6_table_lookup(net, &res, table, fl6);
        return ret;
};

static struct dst_entry *ip6_route_redirect(struct net *net,
                                            const struct flowi6 *fl6,
                                            const struct sk_buff *skb,
                                            const struct in6_addr *gateway)
{
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip6rd_flowi rdfl;

        rdfl.fl6 = *fl6;
        rdfl.gateway = *gateway;

        return fib6_rule_lookup(net, &rdfl.fl6, skb,
                                flags, __ip6_route_redirect);
}

void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
                  kuid_t uid)
{
        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
        struct dst_entry *dst;
        struct flowi6 fl6 = {
                .flowi6_iif = LOOPBACK_IFINDEX,
                .flowi6_oif = oif,
                .flowi6_mark = mark,
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
                .flowi6_uid = uid,
        };

        dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);

void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
{
        const struct ipv6hdr *iph = ipv6_hdr(skb);
        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
        struct dst_entry *dst;
        struct flowi6 fl6 = {
                .flowi6_iif = LOOPBACK_IFINDEX,
                .flowi6_oif = oif,
                .daddr = msg->dest,
                .saddr = iph->daddr,
                .flowi6_uid = sock_net_uid(net, NULL),
        };

        dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
        rt6_do_redirect(dst, NULL, skb);
        dst_release(dst);
}

void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
                     READ_ONCE(sk->sk_mark), sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);

static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
        struct net_device *dev = dst->dev;
        unsigned int mtu = dst_mtu(dst);
        struct net *net = dev_net(dev);

        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);

        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;

        /*
         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
         * IPV6_MAXPLEN is also valid and means: "any MSS,
         * rely only on pmtu discovery"
         */
        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
                mtu = IPV6_MAXPLEN;
        return mtu;
}

INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
        return ip6_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ip6_mtu);

/* MTU selection:
 * 1. mtu on route is locked - use it
 * 2. mtu from nexthop exception
 * 3. mtu from egress device
 *
 * based on ip6_dst_mtu_forward and exception logic of
 * rt6_find_cached_rt; called with rcu_read_lock
 */
u32 ip6_mtu_from_fib6(const struct fib6_result *res,
                      const struct in6_addr *daddr,
                      const struct in6_addr *saddr)
{
        const struct fib6_nh *nh = res->nh;
        struct fib6_info *f6i = res->f6i;
        struct inet6_dev *idev;
        struct rt6_info *rt;
        u32 mtu = 0;

        if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
                mtu = f6i->fib6_pmtu;
                if (mtu)
                        goto out;
        }

        rt = rt6_find_cached_rt(res, daddr, saddr);
        if (unlikely(rt)) {
                mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
        } else {
                struct net_device *dev = nh->fib_nh_dev;

                mtu = IPV6_MIN_MTU;
                idev = __in6_dev_get(dev);
                if (idev)
                        mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
        }

        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
        return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}

struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
                                  struct flowi6 *fl6)
{
        struct dst_entry *dst;
        struct rt6_info *rt;
        struct inet6_dev *idev = in6_dev_get(dev);
        struct net *net = dev_net(dev);

        if (unlikely(!idev))
                return ERR_PTR(-ENODEV);

        rt = ip6_dst_alloc(net, dev, 0);
        if (unlikely(!rt)) {
                in6_dev_put(idev);
                dst = ERR_PTR(-ENOMEM);
                goto out;
        }

        rt->dst.input = ip6_input;
        rt->dst.output  = ip6_output;
        rt->rt6i_gateway  = fl6->daddr;
        rt->rt6i_dst.addr = fl6->daddr;
        rt->rt6i_dst.plen = 128;
        rt->rt6i_idev     = idev;
        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);

        /* Add this dst into uncached_list so that rt6_disable_ip() can
         * do proper release of the net_device
         */
        rt6_uncached_list_add(rt);

        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);

out:
        return dst;
}

static void ip6_dst_gc(struct dst_ops *ops)
{
        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
        unsigned int val;
        int entries;

        if (time_after(rt_last_gc + rt_min_interval, jiffies))
                goto out;

        fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
        entries = dst_entries_get_slow(ops);
        if (entries < ops->gc_thresh)
                atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out:
        val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
        atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
}

static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
                               const struct in6_addr *gw_addr, u32 tbid,
                               int flags, struct fib6_result *res)
{
        struct flowi6 fl6 = {
                .flowi6_oif = cfg->fc_ifindex,
                .daddr = *gw_addr,
                .saddr = cfg->fc_prefsrc,
        };
        struct fib6_table *table;
        int err;

        table = fib6_get_table(net, tbid);
        if (!table)
                return -EINVAL;

        if (!ipv6_addr_any(&cfg->fc_prefsrc))
                flags |= RT6_LOOKUP_F_HAS_SADDR;

        flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;

        err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
        if (!err && res->f6i != net->ipv6.fib6_null_entry)
                fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
                                 cfg->fc_ifindex != 0, NULL, flags);

        return err;
}

static int ip6_route_check_nh_onlink(struct net *net,
                                     struct fib6_config *cfg,
                                     const struct net_device *dev,
                                     struct netlink_ext_ack *extack)
{
        u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
        const struct in6_addr *gw_addr = &cfg->fc_gateway;
        struct fib6_result res = {};
        int err;

        err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
        if (!err && !(res.fib6_flags & RTF_REJECT) &&
            /* ignore match if it is the default route */
            !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
            (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
                NL_SET_ERR_MSG(extack,
                               "Nexthop has invalid gateway or device mismatch");
                err = -EINVAL;
        }

        return err;
}

static int ip6_route_check_nh(struct net *net,
                              struct fib6_config *cfg,
                              struct net_device **_dev,
                              netdevice_tracker *dev_tracker,
                              struct inet6_dev **idev)
{
        const struct in6_addr *gw_addr = &cfg->fc_gateway;
        struct net_device *dev = _dev ? *_dev : NULL;
        int flags = RT6_LOOKUP_F_IFACE;
        struct fib6_result res = {};
        int err = -EHOSTUNREACH;

        if (cfg->fc_table) {
                err = ip6_nh_lookup_table(net, cfg, gw_addr,
                                          cfg->fc_table, flags, &res);
                /* gw_addr can not require a gateway or resolve to a reject
                 * route. If a device is given, it must match the result.
                 */
                if (err || res.fib6_flags & RTF_REJECT ||
                    res.nh->fib_nh_gw_family ||
                    (dev && dev != res.nh->fib_nh_dev))
                        err = -EHOSTUNREACH;
        }

        if (err < 0) {
                struct flowi6 fl6 = {
                        .flowi6_oif = cfg->fc_ifindex,
                        .daddr = *gw_addr,
                };

                err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
                if (err || res.fib6_flags & RTF_REJECT ||
                    res.nh->fib_nh_gw_family)
                        err = -EHOSTUNREACH;

                if (err)
                        return err;

                fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
                                 cfg->fc_ifindex != 0, NULL, flags);
        }

        err = 0;
        if (dev) {
                if (dev != res.nh->fib_nh_dev)
                        err = -EHOSTUNREACH;
        } else {
                *_dev = dev = res.nh->fib_nh_dev;
                netdev_hold(dev, dev_tracker, GFP_ATOMIC);
                *idev = in6_dev_get(dev);
        }

        return err;
}

static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
                           struct net_device **_dev,
                           netdevice_tracker *dev_tracker,
                           struct inet6_dev **idev,
                           struct netlink_ext_ack *extack)
{
        const struct in6_addr *gw_addr = &cfg->fc_gateway;
        int gwa_type = ipv6_addr_type(gw_addr);
        bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
        const struct net_device *dev = *_dev;
        bool need_addr_check = !dev;
        int err = -EINVAL;

        /* if gw_addr is local we will fail to detect this in case
         * address is still TENTATIVE (DAD in progress). rt6_lookup()
         * will return already-added prefix route via interface that
         * prefix route was assigned to, which might be non-loopback.
         */
        if (dev &&
            ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
                NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
                goto out;
        }

        if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
                /* IPv6 strictly inhibits using not link-local
                 * addresses as nexthop address.
                 * Otherwise, router will not able to send redirects.
                 * It is very good, but in some (rare!) circumstances
                 * (SIT, PtP, NBMA NOARP links) it is handy to allow
                 * some exceptions. --ANK
                 * We allow IPv4-mapped nexthops to support RFC4798-type
                 * addressing
                 */
                if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
                        NL_SET_ERR_MSG(extack, "Invalid gateway address");
                        goto out;
                }

                rcu_read_lock();

                if (cfg->fc_flags & RTNH_F_ONLINK)
                        err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
                else
                        err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
                                                 idev);

                rcu_read_unlock();

                if (err)
                        goto out;
        }

        /* reload in case device was changed */
        dev = *_dev;

        err = -EINVAL;
        if (!dev) {
                NL_SET_ERR_MSG(extack, "Egress device not specified");
                goto out;
        } else if (dev->flags & IFF_LOOPBACK) {
                NL_SET_ERR_MSG(extack,
                               "Egress device can not be loopback device for this route");
                goto out;
        }

        /* if we did not check gw_addr above, do so now that the
         * egress device has been resolved.
         */
        if (need_addr_check &&
            ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
                NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
                goto out;
        }

        err = 0;
out:
        return err;
}

static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
{
        if ((flags & RTF_REJECT) ||
            (dev && (dev->flags & IFF_LOOPBACK) &&
             !(addr_type & IPV6_ADDR_LOOPBACK) &&
             !(flags & (RTF_ANYCAST | RTF_LOCAL))))
                return true;

        return false;
}

int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
                 struct fib6_config *cfg, gfp_t gfp_flags,
                 struct netlink_ext_ack *extack)
{
        netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
        struct net_device *dev = NULL;
        struct inet6_dev *idev = NULL;
        int addr_type;
        int err;

        fib6_nh->fib_nh_family = AF_INET6;
#ifdef CONFIG_IPV6_ROUTER_PREF
        fib6_nh->last_probe = jiffies;
#endif
        if (cfg->fc_is_fdb) {
                fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
                fib6_nh->fib_nh_gw_family = AF_INET6;
                return 0;
        }

        err = -ENODEV;
        if (cfg->fc_ifindex) {
                dev = netdev_get_by_index(net, cfg->fc_ifindex,
                                          dev_tracker, gfp_flags);
                if (!dev)
                        goto out;
                idev = in6_dev_get(dev);
                if (!idev)
                        goto out;
        }

        if (cfg->fc_flags & RTNH_F_ONLINK) {
                if (!dev) {
                        NL_SET_ERR_MSG(extack,
                                       "Nexthop device required for onlink");
                        goto out;
                }

                if (!(dev->flags & IFF_UP)) {
                        NL_SET_ERR_MSG(extack, "Nexthop device is not up");
                        err = -ENETDOWN;
                        goto out;
                }

                fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
        }

        fib6_nh->fib_nh_weight = 1;

        /* We cannot add true routes via loopback here,
         * they would result in kernel looping; promote them to reject routes
         */
        addr_type = ipv6_addr_type(&cfg->fc_dst);
        if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
                /* hold loopback dev/idev if we haven't done so. */
                if (dev != net->loopback_dev) {
                        if (dev) {
                                netdev_put(dev, dev_tracker);
                                in6_dev_put(idev);
                        }
                        dev = net->loopback_dev;
                        netdev_hold(dev, dev_tracker, gfp_flags);
                        idev = in6_dev_get(dev);
                        if (!idev) {
                                err = -ENODEV;
                                goto out;
                        }
                }
                goto pcpu_alloc;
        }

        if (cfg->fc_flags & RTF_GATEWAY) {
                err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
                                      &idev, extack);
                if (err)
                        goto out;

                fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
                fib6_nh->fib_nh_gw_family = AF_INET6;
        }

        err = -ENODEV;
        if (!dev)
                goto out;

        if (idev->cnf.disable_ipv6) {
                NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
                err = -EACCES;
                goto out;
        }

        if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
                NL_SET_ERR_MSG(extack, "Nexthop device is not up");
                err = -ENETDOWN;
                goto out;
        }

        if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
            !netif_carrier_ok(dev))
                fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;

        err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
                                 cfg->fc_encap_type, cfg, gfp_flags, extack);
        if (err)
                goto out;

pcpu_alloc:
        fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
        if (!fib6_nh->rt6i_pcpu) {
                err = -ENOMEM;
                goto out;
        }

        fib6_nh->fib_nh_dev = dev;
        fib6_nh->fib_nh_oif = dev->ifindex;
        err = 0;
out:
        if (idev)
                in6_dev_put(idev);

        if (err) {
                lwtstate_put(fib6_nh->fib_nh_lws);
                fib6_nh->fib_nh_lws = NULL;
                netdev_put(dev, dev_tracker);
        }

        return err;
}

void fib6_nh_release(struct fib6_nh *fib6_nh)
{
        struct rt6_exception_bucket *bucket;

        rcu_read_lock();

        fib6_nh_flush_exceptions(fib6_nh, NULL);
        bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
        if (bucket) {
                rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
                kfree(bucket);
        }

        rcu_read_unlock();

        fib6_nh_release_dsts(fib6_nh);
        free_percpu(fib6_nh->rt6i_pcpu);

        fib_nh_common_release(&fib6_nh->nh_common);
}

void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
{
        int cpu;

        if (!fib6_nh->rt6i_pcpu)
                return;

        for_each_possible_cpu(cpu) {
                struct rt6_info *pcpu_rt, **ppcpu_rt;

                ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
                pcpu_rt = xchg(ppcpu_rt, NULL);
                if (pcpu_rt) {
                        dst_dev_put(&pcpu_rt->dst);
                        dst_release(&pcpu_rt->dst);
                }
        }
}

static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
                                              gfp_t gfp_flags,
                                              struct netlink_ext_ack *extack)
{
        struct net *net = cfg->fc_nlinfo.nl_net;
        struct fib6_info *rt = NULL;
        struct nexthop *nh = NULL;
        struct fib6_table *table;
        struct fib6_nh *fib6_nh;
        int err = -EINVAL;
        int addr_type;

        /* RTF_PCPU is an internal flag; can not be set by userspace */
        if (cfg->fc_flags & RTF_PCPU) {
                NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
                goto out;
        }

        /* RTF_CACHE is an internal flag; can not be set by userspace */
        if (cfg->fc_flags & RTF_CACHE) {
                NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
                goto out;
        }

        if (cfg->fc_type > RTN_MAX) {
                NL_SET_ERR_MSG(extack, "Invalid route type");
                goto out;
        }

        if (cfg->fc_dst_len > 128) {
                NL_SET_ERR_MSG(extack, "Invalid prefix length");
                goto out;
        }
        if (cfg->fc_src_len > 128) {
                NL_SET_ERR_MSG(extack, "Invalid source address length");
                goto out;
        }
#ifndef CONFIG_IPV6_SUBTREES
        if (cfg->fc_src_len) {
                NL_SET_ERR_MSG(extack,
                               "Specifying source address requires IPV6_SUBTREES to be enabled");
                goto out;
        }
#endif
        if (cfg->fc_nh_id) {
                nh = nexthop_find_by_id(net, cfg->fc_nh_id);
                if (!nh) {
                        NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
                        goto out;
                }
                err = fib6_check_nexthop(nh, cfg, extack);
                if (err)
                        goto out;
        }

        err = -ENOBUFS;
        if (cfg->fc_nlinfo.nlh &&
            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
                table = fib6_get_table(net, cfg->fc_table);
                if (!table) {
                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
                        table = fib6_new_table(net, cfg->fc_table);
                }
        } else {
                table = fib6_new_table(net, cfg->fc_table);
        }

        if (!table)
                goto out;

        err = -ENOMEM;
        rt = fib6_info_alloc(gfp_flags, !nh);
        if (!rt)
                goto out;

        rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
                                               extack);
        if (IS_ERR(rt->fib6_metrics)) {
                err = PTR_ERR(rt->fib6_metrics);
                /* Do not leave garbage there. */
                rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
                goto out_free;
        }

        if (cfg->fc_flags & RTF_ADDRCONF)
                rt->dst_nocount = true;

        if (cfg->fc_flags & RTF_EXPIRES)
                fib6_set_expires(rt, jiffies +
                                clock_t_to_jiffies(cfg->fc_expires));

        if (cfg->fc_protocol == RTPROT_UNSPEC)
                cfg->fc_protocol = RTPROT_BOOT;
        rt->fib6_protocol = cfg->fc_protocol;

        rt->fib6_table = table;
        rt->fib6_metric = cfg->fc_metric;
        rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
        rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;

        ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
        rt->fib6_dst.plen = cfg->fc_dst_len;

#ifdef CONFIG_IPV6_SUBTREES
        ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
        rt->fib6_src.plen = cfg->fc_src_len;
#endif
        if (nh) {
                if (rt->fib6_src.plen) {
                        NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
                        goto out_free;
                }
                if (!nexthop_get(nh)) {
                        NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
                        goto out_free;
                }
                rt->nh = nh;
                fib6_nh = nexthop_fib6_nh(rt->nh);
        } else {
                err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
                if (err)
                        goto out;

                fib6_nh = rt->fib6_nh;

                /* We cannot add true routes via loopback here, they would
                 * result in kernel looping; promote them to reject routes
                 */
                addr_type = ipv6_addr_type(&cfg->fc_dst);
                if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
                                   addr_type))
                        rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
        }

        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
                struct net_device *dev = fib6_nh->fib_nh_dev;

                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
                        NL_SET_ERR_MSG(extack, "Invalid source address");
                        err = -EINVAL;
                        goto out;
                }
                rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
                rt->fib6_prefsrc.plen = 128;
        } else
                rt->fib6_prefsrc.plen = 0;

        return rt;
out:
        fib6_info_release(rt);
        return ERR_PTR(err);
out_free:
        ip_fib_metrics_put(rt->fib6_metrics);
        kfree(rt);
        return ERR_PTR(err);
}

int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
                  struct netlink_ext_ack *extack)
{
        struct fib6_info *rt;
        int err;

        rt = ip6_route_info_create(cfg, gfp_flags, extack);
        if (IS_ERR(rt))
                return PTR_ERR(rt);

        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
        fib6_info_release(rt);

        return err;
}

static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
{
        struct net *net = info->nl_net;
        struct fib6_table *table;
        int err;

        if (rt == net->ipv6.fib6_null_entry) {
                err = -ENOENT;
                goto out;
        }

        table = rt->fib6_table;
        spin_lock_bh(&table->tb6_lock);
        err = fib6_del(rt, info);
        spin_unlock_bh(&table->tb6_lock);

out:
        fib6_info_release(rt);
        return err;
}

int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
{
        struct nl_info info = {
                .nl_net = net,
                .skip_notify = skip_notify
        };

        return __ip6_del_rt(rt, &info);
}

static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
{
        struct nl_info *info = &cfg->fc_nlinfo;
        struct net *net = info->nl_net;
        struct sk_buff *skb = NULL;
        struct fib6_table *table;
        int err = -ENOENT;

        if (rt == net->ipv6.fib6_null_entry)
                goto out_put;
        table = rt->fib6_table;
        spin_lock_bh(&table->tb6_lock);

        if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
                struct fib6_info *sibling, *next_sibling;
                struct fib6_node *fn;

                /* prefer to send a single notification with all hops */
                skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
                if (skb) {
                        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;

                        if (rt6_fill_node(net, skb, rt, NULL,
                                          NULL, NULL, 0, RTM_DELROUTE,
                                          info->portid, seq, 0) < 0) {
                                kfree_skb(skb);
                                skb = NULL;
                        } else
                                info->skip_notify = 1;
                }

                /* 'rt' points to the first sibling route. If it is not the
                 * leaf, then we do not need to send a notification. Otherwise,
                 * we need to check if the last sibling has a next route or not
                 * and emit a replace or delete notification, respectively.
                 */
                info->skip_notify_kernel = 1;
                fn = rcu_dereference_protected(rt->fib6_node,
                                            lockdep_is_held(&table->tb6_lock));
                if (rcu_access_pointer(fn->leaf) == rt) {
                        struct fib6_info *last_sibling, *replace_rt;

                        last_sibling = list_last_entry(&rt->fib6_siblings,
                                                       struct fib6_info,
                                                       fib6_siblings);
                        replace_rt = rcu_dereference_protected(
                                            last_sibling->fib6_next,
                                            lockdep_is_held(&table->tb6_lock));
                        if (replace_rt)
                                call_fib6_entry_notifiers_replace(net,
                                                                  replace_rt);
                        else
                                call_fib6_multipath_entry_notifiers(net,
                                                       FIB_EVENT_ENTRY_DEL,
                                                       rt, rt->fib6_nsiblings,
                                                       NULL);
                }
                list_for_each_entry_safe(sibling, next_sibling,
                                         &rt->fib6_siblings,
                                         fib6_siblings) {
                        err = fib6_del(sibling, info);
                        if (err)
                                goto out_unlock;
                }
        }

        err = fib6_del(rt, info);
out_unlock:
        spin_unlock_bh(&table->tb6_lock);
out_put:
        fib6_info_release(rt);

        if (skb) {
                rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
                            info->nlh, gfp_any());
        }
        return err;
}

static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
{
        int rc = -ESRCH;

        if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
                goto out;

        if (cfg->fc_flags & RTF_GATEWAY &&
            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
                goto out;

        rc = rt6_remove_exception_rt(rt);
out:
        return rc;
}

static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
                             struct fib6_nh *nh)
{
        struct fib6_result res = {
                .f6i = rt,
                .nh = nh,
        };
        struct rt6_info *rt_cache;

        rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
        if (rt_cache)
                return __ip6_del_cached_rt(rt_cache, cfg);

        return 0;
}

struct fib6_nh_del_cached_rt_arg {
        struct fib6_config *cfg;
        struct fib6_info *f6i;
};

static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
{
        struct fib6_nh_del_cached_rt_arg *arg = _arg;
        int rc;

        rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
        return rc != -ESRCH ? rc : 0;
}

static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
{
        struct fib6_nh_del_cached_rt_arg arg = {
                .cfg = cfg,
                .f6i = f6i
        };

        return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
}

static int ip6_route_del(struct fib6_config *cfg,
                         struct netlink_ext_ack *extack)
{
        struct fib6_table *table;
        struct fib6_info *rt;
        struct fib6_node *fn;
        int err = -ESRCH;

        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
        if (!table) {
                NL_SET_ERR_MSG(extack, "FIB table does not exist");
                return err;
        }

        rcu_read_lock();

        fn = fib6_locate(&table->tb6_root,
                         &cfg->fc_dst, cfg->fc_dst_len,
                         &cfg->fc_src, cfg->fc_src_len,
                         !(cfg->fc_flags & RTF_CACHE));

        if (fn) {
                for_each_fib6_node_rt_rcu(fn) {
                        struct fib6_nh *nh;

                        if (rt->nh && cfg->fc_nh_id &&
                            rt->nh->id != cfg->fc_nh_id)
                                continue;

                        if (cfg->fc_flags & RTF_CACHE) {
                                int rc = 0;

                                if (rt->nh) {
                                        rc = ip6_del_cached_rt_nh(cfg, rt);
                                } else if (cfg->fc_nh_id) {
                                        continue;
                                } else {
                                        nh = rt->fib6_nh;
                                        rc = ip6_del_cached_rt(cfg, rt, nh);
                                }
                                if (rc != -ESRCH) {
                                        rcu_read_unlock();
                                        return rc;
                                }
                                continue;
                        }

                        if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
                                continue;
                        if (cfg->fc_protocol &&
                            cfg->fc_protocol != rt->fib6_protocol)
                                continue;

                        if (rt->nh) {
                                if (!fib6_info_hold_safe(rt))
                                        continue;
                                rcu_read_unlock();

                                return __ip6_del_rt(rt, &cfg->fc_nlinfo);
                        }
                        if (cfg->fc_nh_id)
                                continue;

                        nh = rt->fib6_nh;
                        if (cfg->fc_ifindex &&
                            (!nh->fib_nh_dev ||
                             nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
                                continue;
                        if (cfg->fc_flags & RTF_GATEWAY &&
                            !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
                                continue;
                        if (!fib6_info_hold_safe(rt))
                                continue;
                        rcu_read_unlock();

                        /* if gateway was specified only delete the one hop */
                        if (cfg->fc_flags & RTF_GATEWAY)
                                return __ip6_del_rt(rt, &cfg->fc_nlinfo);

                        return __ip6_del_rt_siblings(rt, cfg);
                }
        }
        rcu_read_unlock();

        return err;
}

static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
        struct netevent_redirect netevent;
        struct rt6_info *rt, *nrt = NULL;
        struct fib6_result res = {};
        struct ndisc_options ndopts;
        struct inet6_dev *in6_dev;
        struct neighbour *neigh;
        struct rd_msg *msg;
        int optlen, on_link;
        u8 *lladdr;

        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
        optlen -= sizeof(*msg);

        if (optlen < 0) {
                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
                return;
        }

        msg = (struct rd_msg *)icmp6_hdr(skb);

        if (ipv6_addr_is_multicast(&msg->dest)) {
                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
                return;
        }

        on_link = 0;
        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
                on_link = 1;
        } else if (ipv6_addr_type(&msg->target) !=
                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
                return;
        }

        in6_dev = __in6_dev_get(skb->dev);
        if (!in6_dev)
                return;
        if (READ_ONCE(in6_dev->cnf.forwarding) ||
            !READ_ONCE(in6_dev->cnf.accept_redirects))
                return;

        /* RFC2461 8.1:
         *        The IP source address of the Redirect MUST be the same as the current
         *        first-hop router for the specified ICMP Destination Address.
         */

        if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
                return;
        }

        lladdr = NULL;
        if (ndopts.nd_opts_tgt_lladdr) {
                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
                                             skb->dev);
                if (!lladdr) {
                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
                        return;
                }
        }

        rt = (struct rt6_info *) dst;
        if (rt->rt6i_flags & RTF_REJECT) {
                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
                return;
        }

        /* Redirect received -> path was valid.
         * Look, redirects are sent only in response to data packets,
         * so that this nexthop apparently is reachable. --ANK
         */
        dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);

        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
        if (!neigh)
                return;

        /*
         *        We have finally decided to accept it.
         */

        ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
                     NEIGH_UPDATE_F_OVERRIDE|
                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
                                     NEIGH_UPDATE_F_ISROUTER)),
                     NDISC_REDIRECT, &ndopts);

        rcu_read_lock();
        res.f6i = rcu_dereference(rt->from);
        if (!res.f6i)
                goto out;

        if (res.f6i->nh) {
                struct fib6_nh_match_arg arg = {
                        .dev = dst->dev,
                        .gw = &rt->rt6i_gateway,
                };

                nexthop_for_each_fib6_nh(res.f6i->nh,
                                         fib6_nh_find_match, &arg);

                /* fib6_info uses a nexthop that does not have fib6_nh
                 * using the dst->dev. Should be impossible
                 */
                if (!arg.match)
                        goto out;
                res.nh = arg.match;
        } else {
                res.nh = res.f6i->fib6_nh;
        }

        res.fib6_flags = res.f6i->fib6_flags;
        res.fib6_type = res.f6i->fib6_type;
        nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
        if (!nrt)
                goto out;

        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
        if (on_link)
                nrt->rt6i_flags &= ~RTF_GATEWAY;

        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;

        /* rt6_insert_exception() will take care of duplicated exceptions */
        if (rt6_insert_exception(nrt, &res)) {
                dst_release_immediate(&nrt->dst);
                goto out;
        }

        netevent.old = &rt->dst;
        netevent.new = &nrt->dst;
        netevent.daddr = &msg->dest;
        netevent.neigh = neigh;
        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);

out:
        rcu_read_unlock();
        neigh_release(neigh);
}

#ifdef CONFIG_IPV6_ROUTE_INFO
static struct fib6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
                                           const struct in6_addr *gwaddr,
                                           struct net_device *dev)
{
        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
        int ifindex = dev->ifindex;
        struct fib6_node *fn;
        struct fib6_info *rt = NULL;
        struct fib6_table *table;

        table = fib6_get_table(net, tb_id);
        if (!table)
                return NULL;

        rcu_read_lock();
        fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
        if (!fn)
                goto out;

        for_each_fib6_node_rt_rcu(fn) {
                /* these routes do not use nexthops */
                if (rt->nh)
                        continue;
                if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
                        continue;
                if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
                    !rt->fib6_nh->fib_nh_gw_family)
                        continue;
                if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
                        continue;
                if (!fib6_info_hold_safe(rt))
                        continue;
                break;
        }
out:
        rcu_read_unlock();
        return rt;
}

static struct fib6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
                                           const struct in6_addr *gwaddr,
                                           struct net_device *dev,
                                           unsigned int pref)
{
        struct fib6_config cfg = {
                .fc_metric        = IP6_RT_PRIO_USER,
                .fc_ifindex        = dev->ifindex,
                .fc_dst_len        = prefixlen,
                .fc_flags        = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
                                  RTF_UP | RTF_PREF(pref),
                .fc_protocol = RTPROT_RA,
                .fc_type = RTN_UNICAST,
                .fc_nlinfo.portid = 0,
                .fc_nlinfo.nlh = NULL,
                .fc_nlinfo.nl_net = net,
        };

        cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
        cfg.fc_dst = *prefix;
        cfg.fc_gateway = *gwaddr;

        /* We should treat it as a default route if prefix length is 0. */
        if (!prefixlen)
                cfg.fc_flags |= RTF_DEFAULT;

        ip6_route_add(&cfg, GFP_ATOMIC, NULL);

        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
#endif

struct fib6_info *rt6_get_dflt_router(struct net *net,
                                     const struct in6_addr *addr,
                                     struct net_device *dev)
{
        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
        struct fib6_info *rt;
        struct fib6_table *table;

        table = fib6_get_table(net, tb_id);
        if (!table)
                return NULL;

        rcu_read_lock();
        for_each_fib6_node_rt_rcu(&table->tb6_root) {
                struct fib6_nh *nh;

                /* RA routes do not use nexthops */
                if (rt->nh)
                        continue;

                nh = rt->fib6_nh;
                if (dev == nh->fib_nh_dev &&
                    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
                    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
                        break;
        }
        if (rt && !fib6_info_hold_safe(rt))
                rt = NULL;
        rcu_read_unlock();
        return rt;
}

struct fib6_info *rt6_add_dflt_router(struct net *net,
                                     const struct in6_addr *gwaddr,
                                     struct net_device *dev,
                                     unsigned int pref,
                                     u32 defrtr_usr_metric,
                                     int lifetime)
{
        struct fib6_config cfg = {
                .fc_table        = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
                .fc_metric        = defrtr_usr_metric,
                .fc_ifindex        = dev->ifindex,
                .fc_flags        = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
                .fc_protocol = RTPROT_RA,
                .fc_type = RTN_UNICAST,
                .fc_nlinfo.portid = 0,
                .fc_nlinfo.nlh = NULL,
                .fc_nlinfo.nl_net = net,
                .fc_expires = jiffies_to_clock_t(lifetime * HZ),
        };

        cfg.fc_gateway = *gwaddr;

        if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
                struct fib6_table *table;

                table = fib6_get_table(dev_net(dev), cfg.fc_table);
                if (table)
                        table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
        }

        return rt6_get_dflt_router(net, gwaddr, dev);
}

static void __rt6_purge_dflt_routers(struct net *net,
                                     struct fib6_table *table)
{
        struct fib6_info *rt;

restart:
        rcu_read_lock();
        for_each_fib6_node_rt_rcu(&table->tb6_root) {
                struct net_device *dev = fib6_info_nh_dev(rt);
                struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;

                if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
                    (!idev || idev->cnf.accept_ra != 2) &&
                    fib6_info_hold_safe(rt)) {
                        rcu_read_unlock();
                        ip6_del_rt(net, rt, false);
                        goto restart;
                }
        }
        rcu_read_unlock();

        table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
}

void rt6_purge_dflt_routers(struct net *net)
{
        struct fib6_table *table;
        struct hlist_head *head;
        unsigned int h;

        rcu_read_lock();

        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
                head = &net->ipv6.fib_table_hash[h];
                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
                        if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
                                __rt6_purge_dflt_routers(net, table);
                }
        }

        rcu_read_unlock();
}

static void rtmsg_to_fib6_config(struct net *net,
                                 struct in6_rtmsg *rtmsg,
                                 struct fib6_config *cfg)
{
        *cfg = (struct fib6_config){
                .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
                         : RT6_TABLE_MAIN,
                .fc_ifindex = rtmsg->rtmsg_ifindex,
                .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
                .fc_expires = rtmsg->rtmsg_info,
                .fc_dst_len = rtmsg->rtmsg_dst_len,
                .fc_src_len = rtmsg->rtmsg_src_len,
                .fc_flags = rtmsg->rtmsg_flags,
                .fc_type = rtmsg->rtmsg_type,

                .fc_nlinfo.nl_net = net,

                .fc_dst = rtmsg->rtmsg_dst,
                .fc_src = rtmsg->rtmsg_src,
                .fc_gateway = rtmsg->rtmsg_gateway,
        };
}

int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
{
        struct fib6_config cfg;
        int err;

        if (cmd != SIOCADDRT && cmd != SIOCDELRT)
                return -EINVAL;
        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        rtmsg_to_fib6_config(net, rtmsg, &cfg);

        rtnl_lock();
        switch (cmd) {
        case SIOCADDRT:
                err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
                break;
        case SIOCDELRT:
                err = ip6_route_del(&cfg, NULL);
                break;
        }
        rtnl_unlock();
        return err;
}

/*
 *        Drop the packet on the floor
 */

static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
        struct dst_entry *dst = skb_dst(skb);
        struct net *net = dev_net(dst->dev);
        struct inet6_dev *idev;
        SKB_DR(reason);
        int type;

        if (netif_is_l3_master(skb->dev) ||
            dst->dev == net->loopback_dev)
                idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
        else
                idev = ip6_dst_idev(dst);

        switch (ipstats_mib_noroutes) {
        case IPSTATS_MIB_INNOROUTES:
                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
                if (type == IPV6_ADDR_ANY) {
                        SKB_DR_SET(reason, IP_INADDRERRORS);
                        IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
                        break;
                }
                SKB_DR_SET(reason, IP_INNOROUTES);
                fallthrough;
        case IPSTATS_MIB_OUTNOROUTES:
                SKB_DR_OR(reason, IP_OUTNOROUTES);
                IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
                break;
        }

        /* Start over by dropping the dst for l3mdev case */
        if (netif_is_l3_master(skb->dev))
                skb_dst_drop(skb);

        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
        kfree_skb_reason(skb, reason);
        return 0;
}

static int ip6_pkt_discard(struct sk_buff *skb)
{
        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}

static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        skb->dev = skb_dst(skb)->dev;
        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}

static int ip6_pkt_prohibit(struct sk_buff *skb)
{
        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}

static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        skb->dev = skb_dst(skb)->dev;
        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}

/*
 *        Allocate a dst for local (unicast / anycast) address.
 */

struct fib6_info *addrconf_f6i_alloc(struct net *net,
                                     struct inet6_dev *idev,
                                     const struct in6_addr *addr,
                                     bool anycast, gfp_t gfp_flags,
                                     struct netlink_ext_ack *extack)
{
        struct fib6_config cfg = {
                .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
                .fc_ifindex = idev->dev->ifindex,
                .fc_flags = RTF_UP | RTF_NONEXTHOP,
                .fc_dst = *addr,
                .fc_dst_len = 128,
                .fc_protocol = RTPROT_KERNEL,
                .fc_nlinfo.nl_net = net,
                .fc_ignore_dev_down = true,
        };
        struct fib6_info *f6i;

        if (anycast) {
                cfg.fc_type = RTN_ANYCAST;
                cfg.fc_flags |= RTF_ANYCAST;
        } else {
                cfg.fc_type = RTN_LOCAL;
                cfg.fc_flags |= RTF_LOCAL;
        }

        f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
        if (!IS_ERR(f6i)) {
                f6i->dst_nocount = true;

                if (!anycast &&
                    (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
                     READ_ONCE(idev->cnf.disable_policy)))
                        f6i->dst_nopolicy = true;
        }

        return f6i;
}

/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
        struct net *net;
        struct in6_addr *addr;
};

static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;

        if (!rt->nh &&
            rt != net->ipv6.fib6_null_entry &&
            ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
            !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
                spin_lock_bh(&rt6_exception_lock);
                /* remove prefsrc entry */
                rt->fib6_prefsrc.plen = 0;
                spin_unlock_bh(&rt6_exception_lock);
        }
        return 0;
}

void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
        struct net *net = dev_net(ifp->idev->dev);
        struct arg_dev_net_ip adni = {
                .net = net,
                .addr = &ifp->addr,
        };
        fib6_clean_all(net, fib6_remove_prefsrc, &adni);
}

#define RTF_RA_ROUTER                (RTF_ADDRCONF | RTF_DEFAULT)

/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
        struct in6_addr *gateway = (struct in6_addr *)arg;
        struct fib6_nh *nh;

        /* RA routes do not use nexthops */
        if (rt->nh)
                return 0;

        nh = rt->fib6_nh;
        if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
            nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
                return -1;

        /* Further clean up cached routes in exception table.
         * This is needed because cached route may have a different
         * gateway than its 'parent' in the case of an ip redirect.
         */
        fib6_nh_exceptions_clean_tohost(nh, gateway);

        return 0;
}

void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
{
        fib6_clean_all(net, fib6_clean_tohost, gateway);
}

struct arg_netdev_event {
        const struct net_device *dev;
        union {
                unsigned char nh_flags;
                unsigned long event;
        };
};

static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
{
        struct fib6_info *iter;
        struct fib6_node *fn;

        fn = rcu_dereference_protected(rt->fib6_node,
                        lockdep_is_held(&rt->fib6_table->tb6_lock));
        iter = rcu_dereference_protected(fn->leaf,
                        lockdep_is_held(&rt->fib6_table->tb6_lock));
        while (iter) {
                if (iter->fib6_metric == rt->fib6_metric &&
                    rt6_qualify_for_ecmp(iter))
                        return iter;
                iter = rcu_dereference_protected(iter->fib6_next,
                                lockdep_is_held(&rt->fib6_table->tb6_lock));
        }

        return NULL;
}

/* only called for fib entries with builtin fib6_nh */
static bool rt6_is_dead(const struct fib6_info *rt)
{
        if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
            (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
             ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
                return true;

        return false;
}

static int rt6_multipath_total_weight(const struct fib6_info *rt)
{
        struct fib6_info *iter;
        int total = 0;

        if (!rt6_is_dead(rt))
                total += rt->fib6_nh->fib_nh_weight;

        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
                if (!rt6_is_dead(iter))
                        total += iter->fib6_nh->fib_nh_weight;
        }

        return total;
}

static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
{
        int upper_bound = -1;

        if (!rt6_is_dead(rt)) {
                *weight += rt->fib6_nh->fib_nh_weight;
                upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
                                                    total) - 1;
        }
        atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
}

static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
{
        struct fib6_info *iter;
        int weight = 0;

        rt6_upper_bound_set(rt, &weight, total);

        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
                rt6_upper_bound_set(iter, &weight, total);
}

void rt6_multipath_rebalance(struct fib6_info *rt)
{
        struct fib6_info *first;
        int total;

        /* In case the entire multipath route was marked for flushing,
         * then there is no need to rebalance upon the removal of every
         * sibling route.
         */
        if (!rt->fib6_nsiblings || rt->should_flush)
                return;

        /* During lookup routes are evaluated in order, so we need to
         * make sure upper bounds are assigned from the first sibling
         * onwards.
         */
        first = rt6_multipath_first_sibling(rt);
        if (WARN_ON_ONCE(!first))
                return;

        total = rt6_multipath_total_weight(first);
        rt6_multipath_upper_bound_set(first, total);
}

static int fib6_ifup(struct fib6_info *rt, void *p_arg)
{
        const struct arg_netdev_event *arg = p_arg;
        struct net *net = dev_net(arg->dev);

        if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
            rt->fib6_nh->fib_nh_dev == arg->dev) {
                rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
                fib6_update_sernum_upto_root(net, rt);
                rt6_multipath_rebalance(rt);
        }

        return 0;
}

void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
{
        struct arg_netdev_event arg = {
                .dev = dev,
                {
                        .nh_flags = nh_flags,
                },
        };

        if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
                arg.nh_flags |= RTNH_F_LINKDOWN;

        fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
}

/* only called for fib entries with inline fib6_nh */
static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
                                   const struct net_device *dev)
{
        struct fib6_info *iter;

        if (rt->fib6_nh->fib_nh_dev == dev)
                return true;
        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
                if (iter->fib6_nh->fib_nh_dev == dev)
                        return true;

        return false;
}

static void rt6_multipath_flush(struct fib6_info *rt)
{
        struct fib6_info *iter;

        rt->should_flush = 1;
        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
                iter->should_flush = 1;
}

static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
                                             const struct net_device *down_dev)
{
        struct fib6_info *iter;
        unsigned int dead = 0;

        if (rt->fib6_nh->fib_nh_dev == down_dev ||
            rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
                dead++;
        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
                if (iter->fib6_nh->fib_nh_dev == down_dev ||
                    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
                        dead++;

        return dead;
}

static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
                                       const struct net_device *dev,
                                       unsigned char nh_flags)
{
        struct fib6_info *iter;

        if (rt->fib6_nh->fib_nh_dev == dev)
                rt->fib6_nh->fib_nh_flags |= nh_flags;
        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
                if (iter->fib6_nh->fib_nh_dev == dev)
                        iter->fib6_nh->fib_nh_flags |= nh_flags;
}

/* called with write lock held for table with rt */
static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
{
        const struct arg_netdev_event *arg = p_arg;
        const struct net_device *dev = arg->dev;
        struct net *net = dev_net(dev);

        if (rt == net->ipv6.fib6_null_entry || rt->nh)
                return 0;

        switch (arg->event) {
        case NETDEV_UNREGISTER:
                return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
        case NETDEV_DOWN:
                if (rt->should_flush)
                        return -1;
                if (!rt->fib6_nsiblings)
                        return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
                if (rt6_multipath_uses_dev(rt, dev)) {
                        unsigned int count;

                        count = rt6_multipath_dead_count(rt, dev);
                        if (rt->fib6_nsiblings + 1 == count) {
                                rt6_multipath_flush(rt);
                                return -1;
                        }
                        rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
                                                   RTNH_F_LINKDOWN);
                        fib6_update_sernum(net, rt);
                        rt6_multipath_rebalance(rt);
                }
                return -2;
        case NETDEV_CHANGE:
                if (rt->fib6_nh->fib_nh_dev != dev ||
                    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
                        break;
                rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
                rt6_multipath_rebalance(rt);
                break;
        }

        return 0;
}

void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
{
        struct arg_netdev_event arg = {
                .dev = dev,
                {
                        .event = event,
                },
        };
        struct net *net = dev_net(dev);

        if (net->ipv6.sysctl.skip_notify_on_dev_down)
                fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
        else
                fib6_clean_all(net, fib6_ifdown, &arg);
}

void rt6_disable_ip(struct net_device *dev, unsigned long event)
{
        rt6_sync_down_dev(dev, event);
        rt6_uncached_list_flush_dev(dev);
        neigh_ifdown(&nd_tbl, dev);
}

struct rt6_mtu_change_arg {
        struct net_device *dev;
        unsigned int mtu;
        struct fib6_info *f6i;
};

static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
{
        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
        struct fib6_info *f6i = arg->f6i;

        /* For administrative MTU increase, there is no way to discover
         * IPv6 PMTU increase, so PMTU increase should be updated here.
         * Since RFC 1981 doesn't include administrative MTU increase
         * update PMTU increase is a MUST. (i.e. jumbo frame)
         */
        if (nh->fib_nh_dev == arg->dev) {
                struct inet6_dev *idev = __in6_dev_get(arg->dev);
                u32 mtu = f6i->fib6_pmtu;

                if (mtu >= arg->mtu ||
                    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
                        fib6_metric_set(f6i, RTAX_MTU, arg->mtu);

                spin_lock_bh(&rt6_exception_lock);
                rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
                spin_unlock_bh(&rt6_exception_lock);
        }

        return 0;
}

static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
{
        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
        struct inet6_dev *idev;

        /* In IPv6 pmtu discovery is not optional,
           so that RTAX_MTU lock cannot disable it.
           We still use this lock to block changes
           caused by addrconf/ndisc.
        */

        idev = __in6_dev_get(arg->dev);
        if (!idev)
                return 0;

        if (fib6_metric_locked(f6i, RTAX_MTU))
                return 0;

        arg->f6i = f6i;
        if (f6i->nh) {
                /* fib6_nh_mtu_change only returns 0, so this is safe */
                return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
                                                arg);
        }

        return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}

void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
{
        struct rt6_mtu_change_arg arg = {
                .dev = dev,
                .mtu = mtu,
        };

        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
}

static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_UNSPEC]                = { .strict_start_type = RTA_DPORT + 1 },
        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
        [RTA_PREFSRC]                = { .len = sizeof(struct in6_addr) },
        [RTA_OIF]               = { .type = NLA_U32 },
        [RTA_IIF]                = { .type = NLA_U32 },
        [RTA_PRIORITY]          = { .type = NLA_U32 },
        [RTA_METRICS]           = { .type = NLA_NESTED },
        [RTA_MULTIPATH]                = { .len = sizeof(struct rtnexthop) },
        [RTA_PREF]              = { .type = NLA_U8 },
        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [RTA_ENCAP]                = { .type = NLA_NESTED },
        [RTA_EXPIRES]                = { .type = NLA_U32 },
        [RTA_UID]                = { .type = NLA_U32 },
        [RTA_MARK]                = { .type = NLA_U32 },
        [RTA_TABLE]                = { .type = NLA_U32 },
        [RTA_IP_PROTO]                = { .type = NLA_U8 },
        [RTA_SPORT]                = { .type = NLA_U16 },
        [RTA_DPORT]                = { .type = NLA_U16 },
        [RTA_NH_ID]                = { .type = NLA_U32 },
};

static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
                              struct fib6_config *cfg,
                              struct netlink_ext_ack *extack)
{
        struct rtmsg *rtm;
        struct nlattr *tb[RTA_MAX+1];
        unsigned int pref;
        int err;

        err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
                                     rtm_ipv6_policy, extack);
        if (err < 0)
                goto errout;

        err = -EINVAL;
        rtm = nlmsg_data(nlh);

        if (rtm->rtm_tos) {
                NL_SET_ERR_MSG(extack,
                               "Invalid dsfield (tos): option not available for IPv6");
                goto errout;
        }

        *cfg = (struct fib6_config){
                .fc_table = rtm->rtm_table,
                .fc_dst_len = rtm->rtm_dst_len,
                .fc_src_len = rtm->rtm_src_len,
                .fc_flags = RTF_UP,
                .fc_protocol = rtm->rtm_protocol,
                .fc_type = rtm->rtm_type,

                .fc_nlinfo.portid = NETLINK_CB(skb).portid,
                .fc_nlinfo.nlh = nlh,
                .fc_nlinfo.nl_net = sock_net(skb->sk),
        };

        if (rtm->rtm_type == RTN_UNREACHABLE ||
            rtm->rtm_type == RTN_BLACKHOLE ||
            rtm->rtm_type == RTN_PROHIBIT ||
            rtm->rtm_type == RTN_THROW)
                cfg->fc_flags |= RTF_REJECT;

        if (rtm->rtm_type == RTN_LOCAL)
                cfg->fc_flags |= RTF_LOCAL;

        if (rtm->rtm_flags & RTM_F_CLONED)
                cfg->fc_flags |= RTF_CACHE;

        cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);

        if (tb[RTA_NH_ID]) {
                if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
                    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
                        NL_SET_ERR_MSG(extack,
                                       "Nexthop specification and nexthop id are mutually exclusive");
                        goto errout;
                }
                cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
        }

        if (tb[RTA_GATEWAY]) {
                cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
                cfg->fc_flags |= RTF_GATEWAY;
        }
        if (tb[RTA_VIA]) {
                NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
                goto errout;
        }

        if (tb[RTA_DST]) {
                int plen = (rtm->rtm_dst_len + 7) >> 3;

                if (nla_len(tb[RTA_DST]) < plen)
                        goto errout;

                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
        }

        if (tb[RTA_SRC]) {
                int plen = (rtm->rtm_src_len + 7) >> 3;

                if (nla_len(tb[RTA_SRC]) < plen)
                        goto errout;

                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
        }

        if (tb[RTA_PREFSRC])
                cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);

        if (tb[RTA_OIF])
                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);

        if (tb[RTA_PRIORITY])
                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);

        if (tb[RTA_METRICS]) {
                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
        }

        if (tb[RTA_TABLE])
                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);

        if (tb[RTA_MULTIPATH]) {
                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);

                err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
                                                     cfg->fc_mp_len, extack);
                if (err < 0)
                        goto errout;
        }

        if (tb[RTA_PREF]) {
                pref = nla_get_u8(tb[RTA_PREF]);
                if (pref != ICMPV6_ROUTER_PREF_LOW &&
                    pref != ICMPV6_ROUTER_PREF_HIGH)
                        pref = ICMPV6_ROUTER_PREF_MEDIUM;
                cfg->fc_flags |= RTF_PREF(pref);
        }

        if (tb[RTA_ENCAP])
                cfg->fc_encap = tb[RTA_ENCAP];

        if (tb[RTA_ENCAP_TYPE]) {
                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);

                err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
                if (err < 0)
                        goto errout;
        }

        if (tb[RTA_EXPIRES]) {
                unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);

                if (addrconf_finite_timeout(timeout)) {
                        cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
                        cfg->fc_flags |= RTF_EXPIRES;
                }
        }

        err = 0;
errout:
        return err;
}

struct rt6_nh {
        struct fib6_info *fib6_info;
        struct fib6_config r_cfg;
        struct list_head next;
};

static int ip6_route_info_append(struct net *net,
                                 struct list_head *rt6_nh_list,
                                 struct fib6_info *rt,
                                 struct fib6_config *r_cfg)
{
        struct rt6_nh *nh;
        int err = -EEXIST;

        list_for_each_entry(nh, rt6_nh_list, next) {
                /* check if fib6_info already exists */
                if (rt6_duplicate_nexthop(nh->fib6_info, rt))
                        return err;
        }

        nh = kzalloc(sizeof(*nh), GFP_KERNEL);
        if (!nh)
                return -ENOMEM;
        nh->fib6_info = rt;
        memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
        list_add_tail(&nh->next, rt6_nh_list);

        return 0;
}

static void ip6_route_mpath_notify(struct fib6_info *rt,
                                   struct fib6_info *rt_last,
                                   struct nl_info *info,
                                   __u16 nlflags)
{
        /* if this is an APPEND route, then rt points to the first route
         * inserted and rt_last points to last route inserted. Userspace
         * wants a consistent dump of the route which starts at the first
         * nexthop. Since sibling routes are always added at the end of
         * the list, find the first sibling of the last route appended
         */
        if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
                rt = list_first_entry(&rt_last->fib6_siblings,
                                      struct fib6_info,
                                      fib6_siblings);
        }

        if (rt)
                inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}

static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
{
        bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
        bool should_notify = false;
        struct fib6_info *leaf;
        struct fib6_node *fn;

        rcu_read_lock();
        fn = rcu_dereference(rt->fib6_node);
        if (!fn)
                goto out;

        leaf = rcu_dereference(fn->leaf);
        if (!leaf)
                goto out;

        if (rt == leaf ||
            (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
             rt6_qualify_for_ecmp(leaf)))
                should_notify = true;
out:
        rcu_read_unlock();

        return should_notify;
}

static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
                             struct netlink_ext_ack *extack)
{
        if (nla_len(nla) < sizeof(*gw)) {
                NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
                return -EINVAL;
        }

        *gw = nla_get_in6_addr(nla);

        return 0;
}

static int ip6_route_multipath_add(struct fib6_config *cfg,
                                   struct netlink_ext_ack *extack)
{
        struct fib6_info *rt_notif = NULL, *rt_last = NULL;
        struct nl_info *info = &cfg->fc_nlinfo;
        struct fib6_config r_cfg;
        struct rtnexthop *rtnh;
        struct fib6_info *rt;
        struct rt6_nh *err_nh;
        struct rt6_nh *nh, *nh_safe;
        __u16 nlflags;
        int remaining;
        int attrlen;
        int err = 1;
        int nhn = 0;
        int replace = (cfg->fc_nlinfo.nlh &&
                       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
        LIST_HEAD(rt6_nh_list);

        nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
        if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
                nlflags |= NLM_F_APPEND;

        remaining = cfg->fc_mp_len;
        rtnh = (struct rtnexthop *)cfg->fc_mp;

        /* Parse a Multipath Entry and build a list (rt6_nh_list) of
         * fib6_info structs per nexthop
         */
        while (rtnh_ok(rtnh, remaining)) {
                memcpy(&r_cfg, cfg, sizeof(*cfg));
                if (rtnh->rtnh_ifindex)
                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;

                attrlen = rtnh_attrlen(rtnh);
                if (attrlen > 0) {
                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);

                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        if (nla) {
                                err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
                                                        extack);
                                if (err)
                                        goto cleanup;

                                r_cfg.fc_flags |= RTF_GATEWAY;
                        }
                        r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);

                        /* RTA_ENCAP_TYPE length checked in
                         * lwtunnel_valid_encap_type_attr
                         */
                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
                        if (nla)
                                r_cfg.fc_encap_type = nla_get_u16(nla);
                }

                r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
                rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
                if (IS_ERR(rt)) {
                        err = PTR_ERR(rt);
                        rt = NULL;
                        goto cleanup;
                }
                if (!rt6_qualify_for_ecmp(rt)) {
                        err = -EINVAL;
                        NL_SET_ERR_MSG(extack,
                                       "Device only routes can not be added for IPv6 using the multipath API.");
                        fib6_info_release(rt);
                        goto cleanup;
                }

                rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;

                err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
                                            rt, &r_cfg);
                if (err) {
                        fib6_info_release(rt);
                        goto cleanup;
                }

                rtnh = rtnh_next(rtnh, &remaining);
        }

        if (list_empty(&rt6_nh_list)) {
                NL_SET_ERR_MSG(extack,
                               "Invalid nexthop configuration - no valid nexthops");
                return -EINVAL;
        }

        /* for add and replace send one notification with all nexthops.
         * Skip the notification in fib6_add_rt2node and send one with
         * the full route when done
         */
        info->skip_notify = 1;

        /* For add and replace, send one notification with all nexthops. For
         * append, send one notification with all appended nexthops.
         */
        info->skip_notify_kernel = 1;

        err_nh = NULL;
        list_for_each_entry(nh, &rt6_nh_list, next) {
                err = __ip6_ins_rt(nh->fib6_info, info, extack);

                if (err) {
                        if (replace && nhn)
                                NL_SET_ERR_MSG_MOD(extack,
                                                   "multipath route replace failed (check consistency of installed routes)");
                        err_nh = nh;
                        goto add_errout;
                }
                /* save reference to last route successfully inserted */
                rt_last = nh->fib6_info;

                /* save reference to first route for notification */
                if (!rt_notif)
                        rt_notif = nh->fib6_info;

                /* Because each route is added like a single route we remove
                 * these flags after the first nexthop: if there is a collision,
                 * we have already failed to add the first nexthop:
                 * fib6_add_rt2node() has rejected it; when replacing, old
                 * nexthops have been replaced by first new, the rest should
                 * be added to it.
                 */
                if (cfg->fc_nlinfo.nlh) {
                        cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
                                                             NLM_F_REPLACE);
                        cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
                }
                nhn++;
        }

        /* An in-kernel notification should only be sent in case the new
         * multipath route is added as the first route in the node, or if
         * it was appended to it. We pass 'rt_notif' since it is the first
         * sibling and might allow us to skip some checks in the replace case.
         */
        if (ip6_route_mpath_should_notify(rt_notif)) {
                enum fib_event_type fib_event;

                if (rt_notif->fib6_nsiblings != nhn - 1)
                        fib_event = FIB_EVENT_ENTRY_APPEND;
                else
                        fib_event = FIB_EVENT_ENTRY_REPLACE;

                err = call_fib6_multipath_entry_notifiers(info->nl_net,
                                                          fib_event, rt_notif,
                                                          nhn - 1, extack);
                if (err) {
                        /* Delete all the siblings that were just added */
                        err_nh = NULL;
                        goto add_errout;
                }
        }

        /* success ... tell user about new route */
        ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
        goto cleanup;

add_errout:
        /* send notification for routes that were added so that
         * the delete notifications sent by ip6_route_del are
         * coherent
         */
        if (rt_notif)
                ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);

        /* Delete routes that were already added */
        list_for_each_entry(nh, &rt6_nh_list, next) {
                if (err_nh == nh)
                        break;
                ip6_route_del(&nh->r_cfg, extack);
        }

cleanup:
        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
                fib6_info_release(nh->fib6_info);
                list_del(&nh->next);
                kfree(nh);
        }

        return err;
}

static int ip6_route_multipath_del(struct fib6_config *cfg,
                                   struct netlink_ext_ack *extack)
{
        struct fib6_config r_cfg;
        struct rtnexthop *rtnh;
        int last_err = 0;
        int remaining;
        int attrlen;
        int err;

        remaining = cfg->fc_mp_len;
        rtnh = (struct rtnexthop *)cfg->fc_mp;

        /* Parse a Multipath Entry */
        while (rtnh_ok(rtnh, remaining)) {
                memcpy(&r_cfg, cfg, sizeof(*cfg));
                if (rtnh->rtnh_ifindex)
                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;

                attrlen = rtnh_attrlen(rtnh);
                if (attrlen > 0) {
                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);

                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        if (nla) {
                                err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
                                                        extack);
                                if (err) {
                                        last_err = err;
                                        goto next_rtnh;
                                }

                                r_cfg.fc_flags |= RTF_GATEWAY;
                        }
                }
                err = ip6_route_del(&r_cfg, extack);
                if (err)
                        last_err = err;

next_rtnh:
                rtnh = rtnh_next(rtnh, &remaining);
        }

        return last_err;
}

static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
                              struct netlink_ext_ack *extack)
{
        struct fib6_config cfg;
        int err;

        err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
        if (err < 0)
                return err;

        if (cfg.fc_nh_id &&
            !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
                NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
                return -EINVAL;
        }

        if (cfg.fc_mp)
                return ip6_route_multipath_del(&cfg, extack);
        else {
                cfg.fc_delete_all_nh = 1;
                return ip6_route_del(&cfg, extack);
        }
}

static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
                              struct netlink_ext_ack *extack)
{
        struct fib6_config cfg;
        int err;

        err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
        if (err < 0)
                return err;

        if (cfg.fc_metric == 0)
                cfg.fc_metric = IP6_RT_PRIO_USER;

        if (cfg.fc_mp)
                return ip6_route_multipath_add(&cfg, extack);
        else
                return ip6_route_add(&cfg, GFP_KERNEL, extack);
}

/* add the overhead of this fib6_nh to nexthop_len */
static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
{
        int *nexthop_len = arg;

        *nexthop_len += nla_total_size(0)         /* RTA_MULTIPATH */
                     + NLA_ALIGN(sizeof(struct rtnexthop))
                     + nla_total_size(16); /* RTA_GATEWAY */

        if (nh->fib_nh_lws) {
                /* RTA_ENCAP_TYPE */
                *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
                /* RTA_ENCAP */
                *nexthop_len += nla_total_size(2);
        }

        return 0;
}

static size_t rt6_nlmsg_size(struct fib6_info *f6i)
{
        int nexthop_len;

        if (f6i->nh) {
                nexthop_len = nla_total_size(4); /* RTA_NH_ID */
                nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
                                         &nexthop_len);
        } else {
                struct fib6_info *sibling, *next_sibling;
                struct fib6_nh *nh = f6i->fib6_nh;

                nexthop_len = 0;
                if (f6i->fib6_nsiblings) {
                        rt6_nh_nlmsg_size(nh, &nexthop_len);

                        list_for_each_entry_safe(sibling, next_sibling,
                                                 &f6i->fib6_siblings, fib6_siblings) {
                                rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
                        }
                }
                nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
        }

        return NLMSG_ALIGN(sizeof(struct rtmsg))
               + nla_total_size(16) /* RTA_SRC */
               + nla_total_size(16) /* RTA_DST */
               + nla_total_size(16) /* RTA_GATEWAY */
               + nla_total_size(16) /* RTA_PREFSRC */
               + nla_total_size(4) /* RTA_TABLE */
               + nla_total_size(4) /* RTA_IIF */
               + nla_total_size(4) /* RTA_OIF */
               + nla_total_size(4) /* RTA_PRIORITY */
               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
               + nla_total_size(sizeof(struct rta_cacheinfo))
               + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
               + nla_total_size(1) /* RTA_PREF */
               + nexthop_len;
}

static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
                                 unsigned char *flags)
{
        if (nexthop_is_multipath(nh)) {
                struct nlattr *mp;

                mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
                if (!mp)
                        goto nla_put_failure;

                if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
                        goto nla_put_failure;

                nla_nest_end(skb, mp);
        } else {
                struct fib6_nh *fib6_nh;

                fib6_nh = nexthop_fib6_nh(nh);
                if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
                                     flags, false) < 0)
                        goto nla_put_failure;
        }

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}

static int rt6_fill_node(struct net *net, struct sk_buff *skb,
                         struct fib6_info *rt, struct dst_entry *dst,
                         struct in6_addr *dest, struct in6_addr *src,
                         int iif, int type, u32 portid, u32 seq,
                         unsigned int flags)
{
        struct rt6_info *rt6 = (struct rt6_info *)dst;
        struct rt6key *rt6_dst, *rt6_src;
        u32 *pmetrics, table, rt6_flags;
        unsigned char nh_flags = 0;
        struct nlmsghdr *nlh;
        struct rtmsg *rtm;
        long expires = 0;

        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
        if (!nlh)
                return -EMSGSIZE;

        if (rt6) {
                rt6_dst = &rt6->rt6i_dst;
                rt6_src = &rt6->rt6i_src;
                rt6_flags = rt6->rt6i_flags;
        } else {
                rt6_dst = &rt->fib6_dst;
                rt6_src = &rt->fib6_src;
                rt6_flags = rt->fib6_flags;
        }

        rtm = nlmsg_data(nlh);
        rtm->rtm_family = AF_INET6;
        rtm->rtm_dst_len = rt6_dst->plen;
        rtm->rtm_src_len = rt6_src->plen;
        rtm->rtm_tos = 0;
        if (rt->fib6_table)
                table = rt->fib6_table->tb6_id;
        else
                table = RT6_TABLE_UNSPEC;
        rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
        if (nla_put_u32(skb, RTA_TABLE, table))
                goto nla_put_failure;

        rtm->rtm_type = rt->fib6_type;
        rtm->rtm_flags = 0;
        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
        rtm->rtm_protocol = rt->fib6_protocol;

        if (rt6_flags & RTF_CACHE)
                rtm->rtm_flags |= RTM_F_CLONED;

        if (dest) {
                if (nla_put_in6_addr(skb, RTA_DST, dest))
                        goto nla_put_failure;
                rtm->rtm_dst_len = 128;
        } else if (rtm->rtm_dst_len)
                if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
                        goto nla_put_failure;
#ifdef CONFIG_IPV6_SUBTREES
        if (src) {
                if (nla_put_in6_addr(skb, RTA_SRC, src))
                        goto nla_put_failure;
                rtm->rtm_src_len = 128;
        } else if (rtm->rtm_src_len &&
                   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
                goto nla_put_failure;
#endif
        if (iif) {
#ifdef CONFIG_IPV6_MROUTE
                if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
                        int err = ip6mr_get_route(net, skb, rtm, portid);

                        if (err == 0)
                                return 0;
                        if (err < 0)
                                goto nla_put_failure;
                } else
#endif
                        if (nla_put_u32(skb, RTA_IIF, iif))
                                goto nla_put_failure;
        } else if (dest) {
                struct in6_addr saddr_buf;
                if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
                    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
                        goto nla_put_failure;
        }

        if (rt->fib6_prefsrc.plen) {
                struct in6_addr saddr_buf;
                saddr_buf = rt->fib6_prefsrc.addr;
                if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
                        goto nla_put_failure;
        }

        pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
        if (rtnetlink_put_metrics(skb, pmetrics) < 0)
                goto nla_put_failure;

        if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
                goto nla_put_failure;

        /* For multipath routes, walk the siblings list and add
         * each as a nexthop within RTA_MULTIPATH.
         */
        if (rt6) {
                if (rt6_flags & RTF_GATEWAY &&
                    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
                        goto nla_put_failure;

                if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
                        goto nla_put_failure;

                if (dst->lwtstate &&
                    lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
                        goto nla_put_failure;
        } else if (rt->fib6_nsiblings) {
                struct fib6_info *sibling, *next_sibling;
                struct nlattr *mp;

                mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
                if (!mp)
                        goto nla_put_failure;

                if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
                                    rt->fib6_nh->fib_nh_weight, AF_INET6,
                                    0) < 0)
                        goto nla_put_failure;

                list_for_each_entry_safe(sibling, next_sibling,
                                         &rt->fib6_siblings, fib6_siblings) {
                        if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
                                            sibling->fib6_nh->fib_nh_weight,
                                            AF_INET6, 0) < 0)
                                goto nla_put_failure;
                }

                nla_nest_end(skb, mp);
        } else if (rt->nh) {
                if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
                        goto nla_put_failure;

                if (nexthop_is_blackhole(rt->nh))
                        rtm->rtm_type = RTN_BLACKHOLE;

                if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
                    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
                        goto nla_put_failure;

                rtm->rtm_flags |= nh_flags;
        } else {
                if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
                                     &nh_flags, false) < 0)
                        goto nla_put_failure;

                rtm->rtm_flags |= nh_flags;
        }

        if (rt6_flags & RTF_EXPIRES) {
                expires = dst ? dst->expires : rt->expires;
                expires -= jiffies;
        }

        if (!dst) {
                if (READ_ONCE(rt->offload))
                        rtm->rtm_flags |= RTM_F_OFFLOAD;
                if (READ_ONCE(rt->trap))
                        rtm->rtm_flags |= RTM_F_TRAP;
                if (READ_ONCE(rt->offload_failed))
                        rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
        }

        if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
                goto nla_put_failure;

        if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
                goto nla_put_failure;


        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
{
        const struct net_device *dev = arg;

        if (nh->fib_nh_dev == dev)
                return 1;

        return 0;
}

static bool fib6_info_uses_dev(const struct fib6_info *f6i,
                               const struct net_device *dev)
{
        if (f6i->nh) {
                struct net_device *_dev = (struct net_device *)dev;

                return !!nexthop_for_each_fib6_nh(f6i->nh,
                                                  fib6_info_nh_uses_dev,
                                                  _dev);
        }

        if (f6i->fib6_nh->fib_nh_dev == dev)
                return true;

        if (f6i->fib6_nsiblings) {
                struct fib6_info *sibling, *next_sibling;

                list_for_each_entry_safe(sibling, next_sibling,
                                         &f6i->fib6_siblings, fib6_siblings) {
                        if (sibling->fib6_nh->fib_nh_dev == dev)
                                return true;
                }
        }

        return false;
}

struct fib6_nh_exception_dump_walker {
        struct rt6_rtnl_dump_arg *dump;
        struct fib6_info *rt;
        unsigned int flags;
        unsigned int skip;
        unsigned int count;
};

static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
{
        struct fib6_nh_exception_dump_walker *w = arg;
        struct rt6_rtnl_dump_arg *dump = w->dump;
        struct rt6_exception_bucket *bucket;
        struct rt6_exception *rt6_ex;
        int i, err;

        bucket = fib6_nh_get_excptn_bucket(nh, NULL);
        if (!bucket)
                return 0;

        for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
                hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
                        if (w->skip) {
                                w->skip--;
                                continue;
                        }

                        /* Expiration of entries doesn't bump sernum, insertion
                         * does. Removal is triggered by insertion, so we can
                         * rely on the fact that if entries change between two
                         * partial dumps, this node is scanned again completely,
                         * see rt6_insert_exception() and fib6_dump_table().
                         *
                         * Count expired entries we go through as handled
                         * entries that we'll skip next time, in case of partial
                         * node dump. Otherwise, if entries expire meanwhile,
                         * we'll skip the wrong amount.
                         */
                        if (rt6_check_expired(rt6_ex->rt6i)) {
                                w->count++;
                                continue;
                        }

                        err = rt6_fill_node(dump->net, dump->skb, w->rt,
                                            &rt6_ex->rt6i->dst, NULL, NULL, 0,
                                            RTM_NEWROUTE,
                                            NETLINK_CB(dump->cb->skb).portid,
                                            dump->cb->nlh->nlmsg_seq, w->flags);
                        if (err)
                                return err;

                        w->count++;
                }
                bucket++;
        }

        return 0;
}

/* Return -1 if done with node, number of handled routes on partial dump */
int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
{
        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
        struct fib_dump_filter *filter = &arg->filter;
        unsigned int flags = NLM_F_MULTI;
        struct net *net = arg->net;
        int count = 0;

        if (rt == net->ipv6.fib6_null_entry)
                return -1;

        if ((filter->flags & RTM_F_PREFIX) &&
            !(rt->fib6_flags & RTF_PREFIX_RT)) {
                /* success since this is not a prefix route */
                return -1;
        }
        if (filter->filter_set &&
            ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
             (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
             (filter->protocol && rt->fib6_protocol != filter->protocol))) {
                return -1;
        }

        if (filter->filter_set ||
            !filter->dump_routes || !filter->dump_exceptions) {
                flags |= NLM_F_DUMP_FILTERED;
        }

        if (filter->dump_routes) {
                if (skip) {
                        skip--;
                } else {
                        if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
                                          0, RTM_NEWROUTE,
                                          NETLINK_CB(arg->cb->skb).portid,
                                          arg->cb->nlh->nlmsg_seq, flags)) {
                                return 0;
                        }
                        count++;
                }
        }

        if (filter->dump_exceptions) {
                struct fib6_nh_exception_dump_walker w = { .dump = arg,
                                                           .rt = rt,
                                                           .flags = flags,
                                                           .skip = skip,
                                                           .count = 0 };
                int err;

                rcu_read_lock();
                if (rt->nh) {
                        err = nexthop_for_each_fib6_nh(rt->nh,
                                                       rt6_nh_dump_exceptions,
                                                       &w);
                } else {
                        err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
                }
                rcu_read_unlock();

                if (err)
                        return count + w.count;
        }

        return -1;
}

static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
                                        const struct nlmsghdr *nlh,
                                        struct nlattr **tb,
                                        struct netlink_ext_ack *extack)
{
        struct rtmsg *rtm;
        int i, err;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Invalid header for get route request");
                return -EINVAL;
        }

        if (!netlink_strict_get_check(skb))
                return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
                                              rtm_ipv6_policy, extack);

        rtm = nlmsg_data(nlh);
        if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
            (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
            rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
            rtm->rtm_type) {
                NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
                return -EINVAL;
        }
        if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Invalid flags for get route request");
                return -EINVAL;
        }

        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
                                            rtm_ipv6_policy, extack);
        if (err)
                return err;

        if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
            (tb[RTA_DST] && !rtm->rtm_dst_len)) {
                NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
                return -EINVAL;
        }

        for (i = 0; i <= RTA_MAX; i++) {
                if (!tb[i])
                        continue;

                switch (i) {
                case RTA_SRC:
                case RTA_DST:
                case RTA_IIF:
                case RTA_OIF:
                case RTA_MARK:
                case RTA_UID:
                case RTA_SPORT:
                case RTA_DPORT:
                case RTA_IP_PROTO:
                        break;
                default:
                        NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
                        return -EINVAL;
                }
        }

        return 0;
}

static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                              struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nlattr *tb[RTA_MAX+1];
        int err, iif = 0, oif = 0;
        struct fib6_info *from;
        struct dst_entry *dst;
        struct rt6_info *rt;
        struct sk_buff *skb;
        struct rtmsg *rtm;
        struct flowi6 fl6 = {};
        bool fibmatch;

        err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
        if (err < 0)
                goto errout;

        err = -EINVAL;
        rtm = nlmsg_data(nlh);
        fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
        fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);

        if (tb[RTA_SRC]) {
                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
                        goto errout;

                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
        }

        if (tb[RTA_DST]) {
                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
                        goto errout;

                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
        }

        if (tb[RTA_IIF])
                iif = nla_get_u32(tb[RTA_IIF]);

        if (tb[RTA_OIF])
                oif = nla_get_u32(tb[RTA_OIF]);

        if (tb[RTA_MARK])
                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);

        if (tb[RTA_UID])
                fl6.flowi6_uid = make_kuid(current_user_ns(),
                                           nla_get_u32(tb[RTA_UID]));
        else
                fl6.flowi6_uid = iif ? INVALID_UID : current_uid();

        if (tb[RTA_SPORT])
                fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);

        if (tb[RTA_DPORT])
                fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);

        if (tb[RTA_IP_PROTO]) {
                err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
                                                  &fl6.flowi6_proto, AF_INET6,
                                                  extack);
                if (err)
                        goto errout;
        }

        if (iif) {
                struct net_device *dev;
                int flags = 0;

                rcu_read_lock();

                dev = dev_get_by_index_rcu(net, iif);
                if (!dev) {
                        rcu_read_unlock();
                        err = -ENODEV;
                        goto errout;
                }

                fl6.flowi6_iif = iif;

                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;

                dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);

                rcu_read_unlock();
        } else {
                fl6.flowi6_oif = oif;

                dst = ip6_route_output(net, NULL, &fl6);
        }


        rt = container_of(dst, struct rt6_info, dst);
        if (rt->dst.error) {
                err = rt->dst.error;
                ip6_rt_put(rt);
                goto errout;
        }

        if (rt == net->ipv6.ip6_null_entry) {
                err = rt->dst.error;
                ip6_rt_put(rt);
                goto errout;
        }

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb) {
                ip6_rt_put(rt);
                err = -ENOBUFS;
                goto errout;
        }

        skb_dst_set(skb, &rt->dst);

        rcu_read_lock();
        from = rcu_dereference(rt->from);
        if (from) {
                if (fibmatch)
                        err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
                                            iif, RTM_NEWROUTE,
                                            NETLINK_CB(in_skb).portid,
                                            nlh->nlmsg_seq, 0);
                else
                        err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
                                            &fl6.saddr, iif, RTM_NEWROUTE,
                                            NETLINK_CB(in_skb).portid,
                                            nlh->nlmsg_seq, 0);
        } else {
                err = -ENETUNREACH;
        }
        rcu_read_unlock();

        if (err < 0) {
                kfree_skb(skb);
                goto errout;
        }

        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
        return err;
}

void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
                     unsigned int nlm_flags)
{
        struct sk_buff *skb;
        struct net *net = info->nl_net;
        u32 seq;
        int err;

        err = -ENOBUFS;
        seq = info->nlh ? info->nlh->nlmsg_seq : 0;

        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
        if (!skb)
                goto errout;

        err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
                            event, info->portid, seq, nlm_flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
                    info->nlh, gfp_any());
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}

void fib6_rt_update(struct net *net, struct fib6_info *rt,
                    struct nl_info *info)
{
        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
        if (!skb)
                goto errout;

        err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
                            RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }
        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
                    info->nlh, gfp_any());
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}

void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
                            bool offload, bool trap, bool offload_failed)
{
        struct sk_buff *skb;
        int err;

        if (READ_ONCE(f6i->offload) == offload &&
            READ_ONCE(f6i->trap) == trap &&
            READ_ONCE(f6i->offload_failed) == offload_failed)
                return;

        WRITE_ONCE(f6i->offload, offload);
        WRITE_ONCE(f6i->trap, trap);

        /* 2 means send notifications only if offload_failed was changed. */
        if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
            READ_ONCE(f6i->offload_failed) == offload_failed)
                return;

        WRITE_ONCE(f6i->offload_failed, offload_failed);

        if (!rcu_access_pointer(f6i->fib6_node))
                /* The route was removed from the tree, do not send
                 * notification.
                 */
                return;

        if (!net->ipv6.sysctl.fib_notify_on_flag_change)
                return;

        skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
        if (!skb) {
                err = -ENOBUFS;
                goto errout;
        }

        err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
                            0, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
        return;

errout:
        rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
EXPORT_SYMBOL(fib6_info_hw_flags_set);

static int ip6_route_dev_notify(struct notifier_block *this,
                                unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);

        if (!(dev->flags & IFF_LOOPBACK))
                return NOTIFY_OK;

        if (event == NETDEV_REGISTER) {
                net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
                net->ipv6.ip6_null_entry->dst.dev = dev;
                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
         } else if (event == NETDEV_UNREGISTER &&
                    dev->reg_state != NETREG_UNREGISTERED) {
                /* NETDEV_UNREGISTER could be fired for multiple times by
                 * netdev_wait_allrefs(). Make sure we only call this once.
                 */
                in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
                in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
                in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
#endif
        }

        return NOTIFY_OK;
}

/*
 *        /proc
 */

#ifdef CONFIG_PROC_FS
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
{
        struct net *net = (struct net *)seq->private;
        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
                   net->ipv6.rt6_stats->fib_nodes,
                   net->ipv6.rt6_stats->fib_route_nodes,
                   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
                   net->ipv6.rt6_stats->fib_rt_entries,
                   net->ipv6.rt6_stats->fib_rt_cache,
                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
                   net->ipv6.rt6_stats->fib_discarded_routes);

        return 0;
}
#endif        /* CONFIG_PROC_FS */

#ifdef CONFIG_SYSCTL

static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
                              void *buffer, size_t *lenp, loff_t *ppos)
{
        struct net *net;
        int delay;
        int ret;
        if (!write)
                return -EINVAL;

        net = (struct net *)ctl->extra1;
        delay = net->ipv6.sysctl.flush_delay;
        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
        if (ret)
                return ret;

        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
        return 0;
}

static struct ctl_table ipv6_route_table_template[] = {
        {
                .procname        =        "max_size",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_max_size,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec,
        },
        {
                .procname        =        "gc_thresh",
                .data                =        &ip6_dst_ops_template.gc_thresh,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec,
        },
        {
                .procname        =        "flush",
                .data                =        &init_net.ipv6.sysctl.flush_delay,
                .maxlen                =        sizeof(int),
                .mode                =        0200,
                .proc_handler        =        ipv6_sysctl_rtcache_flush
        },
        {
                .procname        =        "gc_min_interval",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec_jiffies,
        },
        {
                .procname        =        "gc_timeout",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec_jiffies,
        },
        {
                .procname        =        "gc_interval",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_gc_interval,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec_jiffies,
        },
        {
                .procname        =        "gc_elasticity",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec,
        },
        {
                .procname        =        "mtu_expires",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec_jiffies,
        },
        {
                .procname        =        "min_adv_mss",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_min_advmss,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec,
        },
        {
                .procname        =        "gc_min_interval_ms",
                .data                =        &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
                .maxlen                =        sizeof(int),
                .mode                =        0644,
                .proc_handler        =        proc_dointvec_ms_jiffies,
        },
        {
                .procname        =        "skip_notify_on_dev_down",
                .data                =        &init_net.ipv6.sysctl.skip_notify_on_dev_down,
                .maxlen                =        sizeof(u8),
                .mode                =        0644,
                .proc_handler        =        proc_dou8vec_minmax,
                .extra1                =        SYSCTL_ZERO,
                .extra2                =        SYSCTL_ONE,
        },
        { }
};

struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
{
        struct ctl_table *table;

        table = kmemdup(ipv6_route_table_template,
                        sizeof(ipv6_route_table_template),
                        GFP_KERNEL);

        if (table) {
                table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
                table[2].data = &net->ipv6.sysctl.flush_delay;
                table[2].extra1 = net;
                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
                table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;

                /* Don't export sysctls to unprivileged users */
                if (net->user_ns != &init_user_ns)
                        table[1].procname = NULL;
        }

        return table;
}

size_t ipv6_route_sysctl_table_size(struct net *net)
{
        /* Don't export sysctls to unprivileged users */
        if (net->user_ns != &init_user_ns)
                return 1;

        return ARRAY_SIZE(ipv6_route_table_template);
}
#endif

static int __net_init ip6_route_net_init(struct net *net)
{
        int ret = -ENOMEM;

        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
               sizeof(net->ipv6.ip6_dst_ops));

        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
                goto out_ip6_dst_ops;

        net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
        if (!net->ipv6.fib6_null_entry)
                goto out_ip6_dst_entries;
        memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
               sizeof(*net->ipv6.fib6_null_entry));

        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
                                           sizeof(*net->ipv6.ip6_null_entry),
                                           GFP_KERNEL);
        if (!net->ipv6.ip6_null_entry)
                goto out_fib6_null_entry;
        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
                         ip6_template_metrics, true);
        INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.fib6_has_custom_rules = false;
        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
                                               sizeof(*net->ipv6.ip6_prohibit_entry),
                                               GFP_KERNEL);
        if (!net->ipv6.ip6_prohibit_entry)
                goto out_ip6_null_entry;
        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
                         ip6_template_metrics, true);
        INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);

        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
                                               GFP_KERNEL);
        if (!net->ipv6.ip6_blk_hole_entry)
                goto out_ip6_prohibit_entry;
        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
                         ip6_template_metrics, true);
        INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
#ifdef CONFIG_IPV6_SUBTREES
        net->ipv6.fib6_routes_require_src = 0;
#endif
#endif

        net->ipv6.sysctl.flush_delay = 0;
        net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
        net->ipv6.sysctl.skip_notify_on_dev_down = 0;

        atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);

        ret = 0;
out:
        return ret;

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
out_ip6_prohibit_entry:
        kfree(net->ipv6.ip6_prohibit_entry);
out_ip6_null_entry:
        kfree(net->ipv6.ip6_null_entry);
#endif
out_fib6_null_entry:
        kfree(net->ipv6.fib6_null_entry);
out_ip6_dst_entries:
        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
out_ip6_dst_ops:
        goto out;
}

static void __net_exit ip6_route_net_exit(struct net *net)
{
        kfree(net->ipv6.fib6_null_entry);
        kfree(net->ipv6.ip6_null_entry);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
        kfree(net->ipv6.ip6_prohibit_entry);
        kfree(net->ipv6.ip6_blk_hole_entry);
#endif
        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
}

static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
        if (!proc_create_net("ipv6_route", 0, net->proc_net,
                             &ipv6_route_seq_ops,
                             sizeof(struct ipv6_route_iter)))
                return -ENOMEM;

        if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
                                    rt6_stats_seq_show, NULL)) {
                remove_proc_entry("ipv6_route", net->proc_net);
                return -ENOMEM;
        }
#endif
        return 0;
}

static void __net_exit ip6_route_net_exit_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
        remove_proc_entry("ipv6_route", net->proc_net);
        remove_proc_entry("rt6_stats", net->proc_net);
#endif
}

static struct pernet_operations ip6_route_net_ops = {
        .init = ip6_route_net_init,
        .exit = ip6_route_net_exit,
};

static int __net_init ipv6_inetpeer_init(struct net *net)
{
        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);

        if (!bp)
                return -ENOMEM;
        inet_peer_base_init(bp);
        net->ipv6.peers = bp;
        return 0;
}

static void __net_exit ipv6_inetpeer_exit(struct net *net)
{
        struct inet_peer_base *bp = net->ipv6.peers;

        net->ipv6.peers = NULL;
        inetpeer_invalidate_tree(bp);
        kfree(bp);
}

static struct pernet_operations ipv6_inetpeer_ops = {
        .init        =        ipv6_inetpeer_init,
        .exit        =        ipv6_inetpeer_exit,
};

static struct pernet_operations ip6_route_net_late_ops = {
        .init = ip6_route_net_init_late,
        .exit = ip6_route_net_exit_late,
};

static struct notifier_block ip6_route_dev_notifier = {
        .notifier_call = ip6_route_dev_notify,
        .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
};

void __init ip6_route_init_special_entries(void)
{
        /* Registering of the loopback is done before this portion of code,
         * the loopback reference in rt6_info will not be taken, do it
         * manually for init_net */
        init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
  #endif
}

#if IS_BUILTIN(CONFIG_IPV6)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)

BTF_ID_LIST(btf_fib6_info_id)
BTF_ID(struct, fib6_info)

static const struct bpf_iter_seq_info ipv6_route_seq_info = {
        .seq_ops                = &ipv6_route_seq_ops,
        .init_seq_private        = bpf_iter_init_seq_net,
        .fini_seq_private        = bpf_iter_fini_seq_net,
        .seq_priv_size                = sizeof(struct ipv6_route_iter),
};

static struct bpf_iter_reg ipv6_route_reg_info = {
        .target                        = "ipv6_route",
        .ctx_arg_info_size        = 1,
        .ctx_arg_info                = {
                { offsetof(struct bpf_iter__ipv6_route, rt),
                  PTR_TO_BTF_ID_OR_NULL },
        },
        .seq_info                = &ipv6_route_seq_info,
};

static int __init bpf_iter_register(void)
{
        ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
        return bpf_iter_reg_target(&ipv6_route_reg_info);
}

static void bpf_iter_unregister(void)
{
        bpf_iter_unreg_target(&ipv6_route_reg_info);
}
#endif
#endif

int __init ip6_route_init(void)
{
        int ret;
        int cpu;

        ret = -ENOMEM;
        ip6_dst_ops_template.kmem_cachep =
                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
                                  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
        if (!ip6_dst_ops_template.kmem_cachep)
                goto out;

        ret = dst_entries_init(&ip6_dst_blackhole_ops);
        if (ret)
                goto out_kmem_cache;

        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
        if (ret)
                goto out_dst_entries;

        ret = register_pernet_subsys(&ip6_route_net_ops);
        if (ret)
                goto out_register_inetpeer;

        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;

        ret = fib6_init();
        if (ret)
                goto out_register_subsys;

        ret = xfrm6_init();
        if (ret)
                goto out_fib6_init;

        ret = fib6_rules_init();
        if (ret)
                goto xfrm6_init;

        ret = register_pernet_subsys(&ip6_route_net_late_ops);
        if (ret)
                goto fib6_rules_init;

        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
                                   inet6_rtm_newroute, NULL, 0);
        if (ret < 0)
                goto out_register_late_subsys;

        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
                                   inet6_rtm_delroute, NULL, 0);
        if (ret < 0)
                goto out_register_late_subsys;

        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
                                   inet6_rtm_getroute, NULL,
                                   RTNL_FLAG_DOIT_UNLOCKED);
        if (ret < 0)
                goto out_register_late_subsys;

        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
        if (ret)
                goto out_register_late_subsys;

#if IS_BUILTIN(CONFIG_IPV6)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
        ret = bpf_iter_register();
        if (ret)
                goto out_register_late_subsys;
#endif
#endif

        for_each_possible_cpu(cpu) {
                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);

                INIT_LIST_HEAD(&ul->head);
                INIT_LIST_HEAD(&ul->quarantine);
                spin_lock_init(&ul->lock);
        }

out:
        return ret;

out_register_late_subsys:
        rtnl_unregister_all(PF_INET6);
        unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_init:
        fib6_rules_cleanup();
xfrm6_init:
        xfrm6_fini();
out_fib6_init:
        fib6_gc_cleanup();
out_register_subsys:
        unregister_pernet_subsys(&ip6_route_net_ops);
out_register_inetpeer:
        unregister_pernet_subsys(&ipv6_inetpeer_ops);
out_dst_entries:
        dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
        goto out;
}

void ip6_route_cleanup(void)
{
#if IS_BUILTIN(CONFIG_IPV6)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
        bpf_iter_unregister();
#endif
#endif
        unregister_netdevice_notifier(&ip6_route_dev_notifier);
        unregister_pernet_subsys(&ip6_route_net_late_ops);
        fib6_rules_cleanup();
        xfrm6_fini();
        fib6_gc_cleanup();
        unregister_pernet_subsys(&ipv6_inetpeer_ops);
        unregister_pernet_subsys(&ip6_route_net_ops);
        dst_entries_destroy(&ip6_dst_blackhole_ops);
        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
}













    6 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TASK_WORK_H
#define _LINUX_TASK_WORK_H

#include <linux/list.h>
#include <linux/sched.h>

typedef void (*task_work_func_t)(struct callback_head *);

static inline void
init_task_work(struct callback_head *twork, task_work_func_t func)
{
        twork->func = func;
}

enum task_work_notify_mode {
        TWA_NONE,
        TWA_RESUME,
        TWA_SIGNAL,
        TWA_SIGNAL_NO_IPI,
};

static inline bool task_work_pending(struct task_struct *task)
{
        return READ_ONCE(task->task_works);
}

int task_work_add(struct task_struct *task, struct callback_head *twork,
                        enum task_work_notify_mode mode);

struct callback_head *task_work_cancel_match(struct task_struct *task,
        bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);

static inline void exit_task_work(struct task_struct *task)
{
        task_work_run();
}

#endif        /* _LINUX_TASK_WORK_H */






















































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2005,2006,2007,2008 IBM Corporation
 *
 * Authors:
 * Reiner Sailer <sailer@watson.ibm.com>
 * Mimi Zohar <zohar@us.ibm.com>
 *
 * File: ima.h
 *        internal Integrity Measurement Architecture (IMA) definitions
 */

#ifndef __LINUX_IMA_H
#define __LINUX_IMA_H

#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/hash.h>
#include <linux/tpm.h>
#include <linux/audit.h>
#include <crypto/hash_info.h>

#include "../integrity.h"

enum ima_show_type { IMA_SHOW_BINARY, IMA_SHOW_BINARY_NO_FIELD_LEN,
                     IMA_SHOW_BINARY_OLD_STRING_FMT, IMA_SHOW_ASCII };
enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8, TPM_PCR10 = 10 };

/* digest size for IMA, fits SHA1 or MD5 */
#define IMA_DIGEST_SIZE                SHA1_DIGEST_SIZE
#define IMA_EVENT_NAME_LEN_MAX        255

#define IMA_HASH_BITS 10
#define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS)

#define IMA_TEMPLATE_FIELD_ID_MAX_LEN        16
#define IMA_TEMPLATE_NUM_FIELDS_MAX        15

#define IMA_TEMPLATE_IMA_NAME "ima"
#define IMA_TEMPLATE_IMA_FMT "d|n"

#define NR_BANKS(chip) ((chip != NULL) ? chip->nr_allocated_banks : 0)

/* current content of the policy */
extern int ima_policy_flag;

/* bitset of digests algorithms allowed in the setxattr hook */
extern atomic_t ima_setxattr_allowed_hash_algorithms;

/* set during initialization */
extern int ima_hash_algo __ro_after_init;
extern int ima_sha1_idx __ro_after_init;
extern int ima_hash_algo_idx __ro_after_init;
extern int ima_extra_slots __ro_after_init;
extern int ima_appraise;
extern struct tpm_chip *ima_tpm_chip;
extern const char boot_aggregate_name[];

/* IMA event related data */
struct ima_event_data {
        struct ima_iint_cache *iint;
        struct file *file;
        const unsigned char *filename;
        struct evm_ima_xattr_data *xattr_value;
        int xattr_len;
        const struct modsig *modsig;
        const char *violation;
        const void *buf;
        int buf_len;
};

/* IMA template field data definition */
struct ima_field_data {
        u8 *data;
        u32 len;
};

/* IMA template field definition */
struct ima_template_field {
        const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN];
        int (*field_init)(struct ima_event_data *event_data,
                          struct ima_field_data *field_data);
        void (*field_show)(struct seq_file *m, enum ima_show_type show,
                           struct ima_field_data *field_data);
};

/* IMA template descriptor definition */
struct ima_template_desc {
        struct list_head list;
        char *name;
        char *fmt;
        int num_fields;
        const struct ima_template_field **fields;
};

struct ima_template_entry {
        int pcr;
        struct tpm_digest *digests;
        struct ima_template_desc *template_desc; /* template descriptor */
        u32 template_data_len;
        struct ima_field_data template_data[];        /* template related data */
};

struct ima_queue_entry {
        struct hlist_node hnext;        /* place in hash collision list */
        struct list_head later;                /* place in ima_measurements list */
        struct ima_template_entry *entry;
};
extern struct list_head ima_measurements;        /* list of all measurements */

/* Some details preceding the binary serialized measurement list */
struct ima_kexec_hdr {
        u16 version;
        u16 _reserved0;
        u32 _reserved1;
        u64 buffer_size;
        u64 count;
};

/* IMA iint action cache flags */
#define IMA_MEASURE                0x00000001
#define IMA_MEASURED                0x00000002
#define IMA_APPRAISE                0x00000004
#define IMA_APPRAISED                0x00000008
/*#define IMA_COLLECT                0x00000010  do not use this flag */
#define IMA_COLLECTED                0x00000020
#define IMA_AUDIT                0x00000040
#define IMA_AUDITED                0x00000080
#define IMA_HASH                0x00000100
#define IMA_HASHED                0x00000200

/* IMA iint policy rule cache flags */
#define IMA_NONACTION_FLAGS        0xff000000
#define IMA_DIGSIG_REQUIRED        0x01000000
#define IMA_PERMIT_DIRECTIO        0x02000000
#define IMA_NEW_FILE                0x04000000
#define IMA_FAIL_UNVERIFIABLE_SIGS        0x10000000
#define IMA_MODSIG_ALLOWED        0x20000000
#define IMA_CHECK_BLACKLIST        0x40000000
#define IMA_VERITY_REQUIRED        0x80000000

#define IMA_DO_MASK                (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
                                 IMA_HASH | IMA_APPRAISE_SUBMASK)
#define IMA_DONE_MASK                (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \
                                 IMA_HASHED | IMA_COLLECTED | \
                                 IMA_APPRAISED_SUBMASK)

/* IMA iint subaction appraise cache flags */
#define IMA_FILE_APPRAISE        0x00001000
#define IMA_FILE_APPRAISED        0x00002000
#define IMA_MMAP_APPRAISE        0x00004000
#define IMA_MMAP_APPRAISED        0x00008000
#define IMA_BPRM_APPRAISE        0x00010000
#define IMA_BPRM_APPRAISED        0x00020000
#define IMA_READ_APPRAISE        0x00040000
#define IMA_READ_APPRAISED        0x00080000
#define IMA_CREDS_APPRAISE        0x00100000
#define IMA_CREDS_APPRAISED        0x00200000
#define IMA_APPRAISE_SUBMASK        (IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
                                 IMA_BPRM_APPRAISE | IMA_READ_APPRAISE | \
                                 IMA_CREDS_APPRAISE)
#define IMA_APPRAISED_SUBMASK        (IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
                                 IMA_BPRM_APPRAISED | IMA_READ_APPRAISED | \
                                 IMA_CREDS_APPRAISED)

/* IMA iint cache atomic_flags */
#define IMA_CHANGE_XATTR        0
#define IMA_UPDATE_XATTR        1
#define IMA_CHANGE_ATTR                2
#define IMA_DIGSIG                3
#define IMA_MUST_MEASURE        4

/* IMA integrity metadata associated with an inode */
struct ima_iint_cache {
        struct mutex mutex;        /* protects: version, flags, digest */
        u64 version;                /* track inode changes */
        unsigned long flags;
        unsigned long measured_pcrs;
        unsigned long atomic_flags;
        unsigned long real_ino;
        dev_t real_dev;
        enum integrity_status ima_file_status:4;
        enum integrity_status ima_mmap_status:4;
        enum integrity_status ima_bprm_status:4;
        enum integrity_status ima_read_status:4;
        enum integrity_status ima_creds_status:4;
        struct ima_digest_data *ima_hash;
};

extern struct lsm_blob_sizes ima_blob_sizes;

static inline struct ima_iint_cache *
ima_inode_get_iint(const struct inode *inode)
{
        struct ima_iint_cache **iint_sec;

        if (unlikely(!inode->i_security))
                return NULL;

        iint_sec = inode->i_security + ima_blob_sizes.lbs_inode;
        return *iint_sec;
}

static inline void ima_inode_set_iint(const struct inode *inode,
                                      struct ima_iint_cache *iint)
{
        struct ima_iint_cache **iint_sec;

        if (unlikely(!inode->i_security))
                return;

        iint_sec = inode->i_security + ima_blob_sizes.lbs_inode;
        *iint_sec = iint;
}

struct ima_iint_cache *ima_iint_find(struct inode *inode);
struct ima_iint_cache *ima_inode_get(struct inode *inode);
void ima_inode_free(struct inode *inode);
void __init ima_iintcache_init(void);

extern const int read_idmap[];

#ifdef CONFIG_HAVE_IMA_KEXEC
void ima_load_kexec_buffer(void);
#else
static inline void ima_load_kexec_buffer(void) {}
#endif /* CONFIG_HAVE_IMA_KEXEC */

#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
void ima_post_key_create_or_update(struct key *keyring, struct key *key,
                                   const void *payload, size_t plen,
                                   unsigned long flags, bool create);
#endif

/*
 * The default binary_runtime_measurements list format is defined as the
 * platform native format.  The canonical format is defined as little-endian.
 */
extern bool ima_canonical_fmt;

/* Internal IMA function definitions */
int ima_init(void);
int ima_fs_init(void);
int ima_add_template_entry(struct ima_template_entry *entry, int violation,
                           const char *op, struct inode *inode,
                           const unsigned char *filename);
int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash);
int ima_calc_buffer_hash(const void *buf, loff_t len,
                         struct ima_digest_data *hash);
int ima_calc_field_array_hash(struct ima_field_data *field_data,
                              struct ima_template_entry *entry);
int ima_calc_boot_aggregate(struct ima_digest_data *hash);
void ima_add_violation(struct file *file, const unsigned char *filename,
                       struct ima_iint_cache *iint, const char *op,
                       const char *cause);
int ima_init_crypto(void);
void ima_putc(struct seq_file *m, void *data, int datalen);
void ima_print_digest(struct seq_file *m, u8 *digest, u32 size);
int template_desc_init_fields(const char *template_fmt,
                              const struct ima_template_field ***fields,
                              int *num_fields);
struct ima_template_desc *ima_template_desc_current(void);
struct ima_template_desc *ima_template_desc_buf(void);
struct ima_template_desc *lookup_template_desc(const char *name);
bool ima_template_has_modsig(const struct ima_template_desc *ima_template);
int ima_restore_measurement_entry(struct ima_template_entry *entry);
int ima_restore_measurement_list(loff_t bufsize, void *buf);
int ima_measurements_show(struct seq_file *m, void *v);
unsigned long ima_get_binary_runtime_size(void);
int ima_init_template(void);
void ima_init_template_list(void);
int __init ima_init_digests(void);
int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event,
                          void *lsm_data);

/*
 * used to protect h_table and sha_table
 */
extern spinlock_t ima_queue_lock;

struct ima_h_table {
        atomic_long_t len;        /* number of stored measurements in the list */
        atomic_long_t violations;
        struct hlist_head queue[IMA_MEASURE_HTABLE_SIZE];
};
extern struct ima_h_table ima_htable;

static inline unsigned int ima_hash_key(u8 *digest)
{
        /* there is no point in taking a hash of part of a digest */
        return (digest[0] | digest[1] << 8) % IMA_MEASURE_HTABLE_SIZE;
}

#define __ima_hooks(hook)                                \
        hook(NONE, none)                                \
        hook(FILE_CHECK, file)                                \
        hook(MMAP_CHECK, mmap)                                \
        hook(MMAP_CHECK_REQPROT, mmap_reqprot)                \
        hook(BPRM_CHECK, bprm)                                \
        hook(CREDS_CHECK, creds)                        \
        hook(POST_SETATTR, post_setattr)                \
        hook(MODULE_CHECK, module)                        \
        hook(FIRMWARE_CHECK, firmware)                        \
        hook(KEXEC_KERNEL_CHECK, kexec_kernel)                \
        hook(KEXEC_INITRAMFS_CHECK, kexec_initramfs)        \
        hook(POLICY_CHECK, policy)                        \
        hook(KEXEC_CMDLINE, kexec_cmdline)                \
        hook(KEY_CHECK, key)                                \
        hook(CRITICAL_DATA, critical_data)                \
        hook(SETXATTR_CHECK, setxattr_check)                \
        hook(MAX_CHECK, none)

#define __ima_hook_enumify(ENUM, str)        ENUM,
#define __ima_stringify(arg) (#arg)
#define __ima_hook_measuring_stringify(ENUM, str) \
                (__ima_stringify(measuring_ ##str)),

enum ima_hooks {
        __ima_hooks(__ima_hook_enumify)
};

static const char * const ima_hooks_measure_str[] = {
        __ima_hooks(__ima_hook_measuring_stringify)
};

static inline const char *func_measure_str(enum ima_hooks func)
{
        if (func >= MAX_CHECK)
                return ima_hooks_measure_str[NONE];

        return ima_hooks_measure_str[func];
}

extern const char *const func_tokens[];

struct modsig;

#ifdef CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS
/*
 * To track keys that need to be measured.
 */
struct ima_key_entry {
        struct list_head list;
        void *payload;
        size_t payload_len;
        char *keyring_name;
};
void ima_init_key_queue(void);
bool ima_should_queue_key(void);
bool ima_queue_key(struct key *keyring, const void *payload,
                   size_t payload_len);
void ima_process_queued_keys(void);
#else
static inline void ima_init_key_queue(void) {}
static inline bool ima_should_queue_key(void) { return false; }
static inline bool ima_queue_key(struct key *keyring,
                                 const void *payload,
                                 size_t payload_len) { return false; }
static inline void ima_process_queued_keys(void) {}
#endif /* CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS */

/* LIM API function definitions */
int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
                   const struct cred *cred, u32 secid, int mask,
                   enum ima_hooks func, int *pcr,
                   struct ima_template_desc **template_desc,
                   const char *func_data, unsigned int *allowed_algos);
int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file,
                            void *buf, loff_t size, enum hash_algo algo,
                            struct modsig *modsig);
void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
                           const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
                           int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc);
int process_buffer_measurement(struct mnt_idmap *idmap,
                               struct inode *inode, const void *buf, int size,
                               const char *eventname, enum ima_hooks func,
                               int pcr, const char *func_data,
                               bool buf_hash, u8 *digest, size_t digest_len);
void ima_audit_measurement(struct ima_iint_cache *iint,
                           const unsigned char *filename);
int ima_alloc_init_template(struct ima_event_data *event_data,
                            struct ima_template_entry **entry,
                            struct ima_template_desc *template_desc);
int ima_store_template(struct ima_template_entry *entry, int violation,
                       struct inode *inode,
                       const unsigned char *filename, int pcr);
void ima_free_template_entry(struct ima_template_entry *entry);
const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);

/* IMA policy related functions */
int ima_match_policy(struct mnt_idmap *idmap, struct inode *inode,
                     const struct cred *cred, u32 secid, enum ima_hooks func,
                     int mask, int flags, int *pcr,
                     struct ima_template_desc **template_desc,
                     const char *func_data, unsigned int *allowed_algos);
void ima_init_policy(void);
void ima_update_policy(void);
void ima_update_policy_flags(void);
ssize_t ima_parse_add_rule(char *);
void ima_delete_rules(void);
int ima_check_policy(void);
void *ima_policy_start(struct seq_file *m, loff_t *pos);
void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos);
void ima_policy_stop(struct seq_file *m, void *v);
int ima_policy_show(struct seq_file *m, void *v);

/* Appraise integrity measurements */
#define IMA_APPRAISE_ENFORCE        0x01
#define IMA_APPRAISE_FIX        0x02
#define IMA_APPRAISE_LOG        0x04
#define IMA_APPRAISE_MODULES        0x08
#define IMA_APPRAISE_FIRMWARE        0x10
#define IMA_APPRAISE_POLICY        0x20
#define IMA_APPRAISE_KEXEC        0x40

#ifdef CONFIG_IMA_APPRAISE
int ima_check_blacklist(struct ima_iint_cache *iint,
                        const struct modsig *modsig, int pcr);
int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
                             int xattr_len, const struct modsig *modsig);
int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
                      int mask, enum ima_hooks func);
void ima_update_xattr(struct ima_iint_cache *iint, struct file *file);
enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint,
                                           enum ima_hooks func);
enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
                                 int xattr_len);
int ima_read_xattr(struct dentry *dentry,
                   struct evm_ima_xattr_data **xattr_value, int xattr_len);
void __init init_ima_appraise_lsm(const struct lsm_id *lsmid);

#else
static inline int ima_check_blacklist(struct ima_iint_cache *iint,
                                      const struct modsig *modsig, int pcr)
{
        return 0;
}

static inline int ima_appraise_measurement(enum ima_hooks func,
                                           struct ima_iint_cache *iint,
                                           struct file *file,
                                           const unsigned char *filename,
                                           struct evm_ima_xattr_data *xattr_value,
                                           int xattr_len,
                                           const struct modsig *modsig)
{
        return INTEGRITY_UNKNOWN;
}

static inline int ima_must_appraise(struct mnt_idmap *idmap,
                                    struct inode *inode, int mask,
                                    enum ima_hooks func)
{
        return 0;
}

static inline void ima_update_xattr(struct ima_iint_cache *iint,
                                    struct file *file)
{
}

static inline enum integrity_status
ima_get_cache_status(struct ima_iint_cache *iint, enum ima_hooks func)
{
        return INTEGRITY_UNKNOWN;
}

static inline enum hash_algo
ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len)
{
        return ima_hash_algo;
}

static inline int ima_read_xattr(struct dentry *dentry,
                                 struct evm_ima_xattr_data **xattr_value,
                                 int xattr_len)
{
        return 0;
}

static inline void __init init_ima_appraise_lsm(const struct lsm_id *lsmid)
{
}

#endif /* CONFIG_IMA_APPRAISE */

#ifdef CONFIG_IMA_APPRAISE_MODSIG
int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len,
                    struct modsig **modsig);
void ima_collect_modsig(struct modsig *modsig, const void *buf, loff_t size);
int ima_get_modsig_digest(const struct modsig *modsig, enum hash_algo *algo,
                          const u8 **digest, u32 *digest_size);
int ima_get_raw_modsig(const struct modsig *modsig, const void **data,
                       u32 *data_len);
void ima_free_modsig(struct modsig *modsig);
#else
static inline int ima_read_modsig(enum ima_hooks func, const void *buf,
                                  loff_t buf_len, struct modsig **modsig)
{
        return -EOPNOTSUPP;
}

static inline void ima_collect_modsig(struct modsig *modsig, const void *buf,
                                      loff_t size)
{
}

static inline int ima_get_modsig_digest(const struct modsig *modsig,
                                        enum hash_algo *algo, const u8 **digest,
                                        u32 *digest_size)
{
        return -EOPNOTSUPP;
}

static inline int ima_get_raw_modsig(const struct modsig *modsig,
                                     const void **data, u32 *data_len)
{
        return -EOPNOTSUPP;
}

static inline void ima_free_modsig(struct modsig *modsig)
{
}
#endif /* CONFIG_IMA_APPRAISE_MODSIG */

/* LSM based policy rules require audit */
#ifdef CONFIG_IMA_LSM_RULES

#define ima_filter_rule_init security_audit_rule_init
#define ima_filter_rule_free security_audit_rule_free
#define ima_filter_rule_match security_audit_rule_match

#else

static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr,
                                       void **lsmrule)
{
        return -EINVAL;
}

static inline void ima_filter_rule_free(void *lsmrule)
{
}

static inline int ima_filter_rule_match(u32 secid, u32 field, u32 op,
                                        void *lsmrule)
{
        return -EINVAL;
}
#endif /* CONFIG_IMA_LSM_RULES */

#ifdef        CONFIG_IMA_READ_POLICY
#define        POLICY_FILE_FLAGS        (S_IWUSR | S_IRUSR)
#else
#define        POLICY_FILE_FLAGS        S_IWUSR
#endif /* CONFIG_IMA_READ_POLICY */

#endif /* __LINUX_IMA_H */





























































    2 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Network event notifiers
 *
 *        Authors:
 *      Tom Tucker             <tom@opengridcomputing.com>
 *      Steve Wise             <swise@opengridcomputing.com>
 *
 *        Fixes:
 */

#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <net/netevent.h>

static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);

/**
 *        register_netevent_notifier - register a netevent notifier block
 *        @nb: notifier
 *
 *        Register a notifier to be called when a netevent occurs.
 *        The notifier passed is linked into the kernel structures and must
 *        not be reused until it has been unregistered. A negative errno code
 *        is returned on a failure.
 */
int register_netevent_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&netevent_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(register_netevent_notifier);

/**
 *        unregister_netevent_notifier - unregister a netevent notifier block
 *        @nb: notifier
 *
 *        Unregister a notifier previously registered by
 *        register_neigh_notifier(). The notifier is unlinked into the
 *        kernel structures and may then be reused. A negative errno code
 *        is returned on a failure.
 */

int unregister_netevent_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_netevent_notifier);

/**
 *        call_netevent_notifiers - call all netevent notifier blocks
 *      @val: value passed unmodified to notifier function
 *      @v:   pointer passed unmodified to notifier function
 *
 *        Call all neighbour notifier blocks.  Parameters and return value
 *        are as for notifier_call_chain().
 */

int call_netevent_notifiers(unsigned long val, void *v)
{
        return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
}
EXPORT_SYMBOL_GPL(call_netevent_notifiers);















































































































































































  261 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H

#include <asm/processor.h>

#if defined(__KERNEL__) && !defined(__ASSEMBLY__)

#include <asm/asm.h>
#include <linux/bitops.h>
#include <asm/alternative.h>

enum cpuid_leafs
{
        CPUID_1_EDX                = 0,
        CPUID_8000_0001_EDX,
        CPUID_8086_0001_EDX,
        CPUID_LNX_1,
        CPUID_1_ECX,
        CPUID_C000_0001_EDX,
        CPUID_8000_0001_ECX,
        CPUID_LNX_2,
        CPUID_LNX_3,
        CPUID_7_0_EBX,
        CPUID_D_1_EAX,
        CPUID_LNX_4,
        CPUID_7_1_EAX,
        CPUID_8000_0008_EBX,
        CPUID_6_EAX,
        CPUID_8000_000A_EDX,
        CPUID_7_ECX,
        CPUID_8000_0007_EBX,
        CPUID_7_EDX,
        CPUID_8000_001F_EAX,
        CPUID_8000_0021_EAX,
        CPUID_LNX_5,
        NR_CPUID_WORDS,
};

#define X86_CAP_FMT_NUM "%d:%d"
#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)

extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
#define X86_CAP_FMT "%s"
#define x86_cap_flag(flag) x86_cap_flags[flag]

/*
 * In order to save room, we index into this array by doing
 * X86_BUG_<name> - NCAPINTS*32.
 */
extern const char * const x86_bug_flags[NBUGINTS*32];

#define test_cpu_cap(c, bit)                                                \
         arch_test_bit(bit, (unsigned long *)((c)->x86_capability))

/*
 * There are 32 bits/features in each mask word.  The high bits
 * (selected with (bit>>5) give us the word number and the low 5
 * bits give us the bit/feature number inside the word.
 * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
 * see if it is set in the mask word.
 */
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit)        \
        (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))

/*
 * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
 * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
 * header macros which use NCAPINTS need to be changed. The duplicated macro
 * use causes the compiler to issue errors for all headers so that all usage
 * sites can be corrected.
 */
#define REQUIRED_MASK_BIT_SET(feature_bit)                \
         ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  0, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  1, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  2, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  3, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  4, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  5, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  6, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  7, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  8, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK,  9, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) ||        \
           REQUIRED_MASK_CHECK                                          ||        \
           BUILD_BUG_ON_ZERO(NCAPINTS != 22))

#define DISABLED_MASK_BIT_SET(feature_bit)                                \
         ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  1, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  2, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  3, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  4, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  5, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  6, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  7, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  8, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  9, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) ||        \
           CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) ||        \
           DISABLED_MASK_CHECK                                          ||        \
           BUILD_BUG_ON_ZERO(NCAPINTS != 22))

#define cpu_has(c, bit)                                                        \
        (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :        \
         test_cpu_cap(c, bit))

#define this_cpu_has(bit)                                                \
        (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :        \
         x86_this_cpu_test_bit(bit,                                        \
                (unsigned long __percpu *)&cpu_info.x86_capability))

/*
 * This macro is for detection of features which need kernel
 * infrastructure to be used.  It may *not* directly test the CPU
 * itself.  Use the cpu_has() family if you want true runtime
 * testing of CPU features, like in hypervisor code where you are
 * supporting a possible guest feature where host support for it
 * is not relevant.
 */
#define cpu_feature_enabled(bit)        \
        (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))

#define boot_cpu_has(bit)        cpu_has(&boot_cpu_data, bit)

#define set_cpu_cap(c, bit)        set_bit(bit, (unsigned long *)((c)->x86_capability))

extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);

#define setup_force_cpu_cap(bit) do { \
        set_cpu_cap(&boot_cpu_data, bit);        \
        set_bit(bit, (unsigned long *)cpu_caps_set);        \
} while (0)

#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)

/*
 * Static testing of CPU features. Used the same as boot_cpu_has(). It
 * statically patches the target code for additional performance. Use
 * static_cpu_has() only in fast paths, where every cycle counts. Which
 * means that the boot_cpu_has() variant is already fast enough for the
 * majority of cases and you should stick to using it as it is generally
 * only two instructions: a RIP-relative MOV and a TEST.
 *
 * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
 * that this is only used on a fallback path and will sometimes cause
 * it to manifest the address of boot_cpu_data in a register, fouling
 * the mainline (post-initialization) code.
 */
static __always_inline bool _static_cpu_has(u16 bit)
{
        asm goto(
                ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
                ".pushsection .altinstr_aux,\"ax\"\n"
                "6:\n"
                " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n"
                " jnz %l[t_yes]\n"
                " jmp %l[t_no]\n"
                ".popsection\n"
                 : : [feature]  "i" (bit),
                     [bitnum]   "i" (1 << (bit & 7)),
                     [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3])
                 : : t_yes, t_no);
t_yes:
        return true;
t_no:
        return false;
}

#define static_cpu_has(bit)                                        \
(                                                                \
        __builtin_constant_p(boot_cpu_has(bit)) ?                \
                boot_cpu_has(bit) :                                \
                _static_cpu_has(bit)                                \
)

#define cpu_has_bug(c, bit)                cpu_has(c, (bit))
#define set_cpu_bug(c, bit)                set_cpu_cap(c, (bit))
#define clear_cpu_bug(c, bit)                clear_cpu_cap(c, (bit))

#define static_cpu_has_bug(bit)                static_cpu_has((bit))
#define boot_cpu_has_bug(bit)                cpu_has_bug(&boot_cpu_data, (bit))
#define boot_cpu_set_bug(bit)                set_cpu_cap(&boot_cpu_data, (bit))

#define MAX_CPU_FEATURES                (NCAPINTS * 32)
#define cpu_have_feature                boot_cpu_has

#define CPU_FEATURE_TYPEFMT                "x86,ven%04Xfam%04Xmod%04X"
#define CPU_FEATURE_TYPEVAL                boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
                                        boot_cpu_data.x86_model

#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
#endif /* _ASM_X86_CPUFEATURE_H */




















































































































































    1 






    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PACKET_INTERNAL_H__
#define __PACKET_INTERNAL_H__

#include <linux/refcount.h>

struct packet_mclist {
        struct packet_mclist        *next;
        int                        ifindex;
        int                        count;
        unsigned short                type;
        unsigned short                alen;
        unsigned char                addr[MAX_ADDR_LEN];
};

/* kbdq - kernel block descriptor queue */
struct tpacket_kbdq_core {
        struct pgv        *pkbdq;
        unsigned int        feature_req_word;
        unsigned int        hdrlen;
        unsigned char        reset_pending_on_curr_blk;
        unsigned char   delete_blk_timer;
        unsigned short        kactive_blk_num;
        unsigned short        blk_sizeof_priv;

        /* last_kactive_blk_num:
         * trick to see if user-space has caught up
         * in order to avoid refreshing timer when every single pkt arrives.
         */
        unsigned short        last_kactive_blk_num;

        char                *pkblk_start;
        char                *pkblk_end;
        int                kblk_size;
        unsigned int        max_frame_len;
        unsigned int        knum_blocks;
        uint64_t        knxt_seq_num;
        char                *prev;
        char                *nxt_offset;
        struct sk_buff        *skb;

        rwlock_t        blk_fill_in_prog_lock;

        /* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV        (8)

        unsigned short  retire_blk_tov;
        unsigned short  version;
        unsigned long        tov_in_jiffies;

        /* timer to retire an outstanding block */
        struct timer_list retire_blk_timer;
};

struct pgv {
        char *buffer;
};

struct packet_ring_buffer {
        struct pgv                *pg_vec;

        unsigned int                head;
        unsigned int                frames_per_block;
        unsigned int                frame_size;
        unsigned int                frame_max;

        unsigned int                pg_vec_order;
        unsigned int                pg_vec_pages;
        unsigned int                pg_vec_len;

        unsigned int __percpu        *pending_refcnt;

        union {
                unsigned long                        *rx_owner_map;
                struct tpacket_kbdq_core        prb_bdqc;
        };
};

extern struct mutex fanout_mutex;
#define PACKET_FANOUT_MAX        (1 << 16)

struct packet_fanout {
        possible_net_t                net;
        unsigned int                num_members;
        u32                        max_num_members;
        u16                        id;
        u8                        type;
        u8                        flags;
        union {
                atomic_t                rr_cur;
                struct bpf_prog __rcu        *bpf_prog;
        };
        struct list_head        list;
        spinlock_t                lock;
        refcount_t                sk_ref;
        struct packet_type        prot_hook ____cacheline_aligned_in_smp;
        struct sock        __rcu        *arr[] __counted_by(max_num_members);
};

struct packet_rollover {
        int                        sock;
        atomic_long_t                num;
        atomic_long_t                num_huge;
        atomic_long_t                num_failed;
#define ROLLOVER_HLEN        (L1_CACHE_BYTES / sizeof(u32))
        u32                        history[ROLLOVER_HLEN] ____cacheline_aligned;
} ____cacheline_aligned_in_smp;

struct packet_sock {
        /* struct sock has to be the first member of packet_sock */
        struct sock                sk;
        struct packet_fanout        *fanout;
        union  tpacket_stats_u        stats;
        struct packet_ring_buffer        rx_ring;
        struct packet_ring_buffer        tx_ring;
        int                        copy_thresh;
        spinlock_t                bind_lock;
        struct mutex                pg_vec_lock;
        unsigned long                flags;
        int                        ifindex;        /* bound device                */
        u8                        vnet_hdr_sz;
        __be16                        num;
        struct packet_rollover        *rollover;
        struct packet_mclist        *mclist;
        atomic_long_t                mapped;
        enum tpacket_versions        tp_version;
        unsigned int                tp_hdrlen;
        unsigned int                tp_reserve;
        unsigned int                tp_tstamp;
        struct completion        skb_completion;
        struct net_device __rcu        *cached_dev;
        struct packet_type        prot_hook ____cacheline_aligned_in_smp;
        atomic_t                tp_drops ____cacheline_aligned_in_smp;
};

#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk)

enum packet_sock_flags {
        PACKET_SOCK_ORIGDEV,
        PACKET_SOCK_AUXDATA,
        PACKET_SOCK_TX_HAS_OFF,
        PACKET_SOCK_TP_LOSS,
        PACKET_SOCK_RUNNING,
        PACKET_SOCK_PRESSURE,
        PACKET_SOCK_QDISC_BYPASS,
};

static inline void packet_sock_flag_set(struct packet_sock *po,
                                        enum packet_sock_flags flag,
                                        bool val)
{
        if (val)
                set_bit(flag, &po->flags);
        else
                clear_bit(flag, &po->flags);
}

static inline bool packet_sock_flag(const struct packet_sock *po,
                                    enum packet_sock_flags flag)
{
        return test_bit(flag, &po->flags);
}

#endif



















































































































































































































































































































































    1 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
// SPDX-License-Identifier: GPL-2.0
/*
 * USB 7 Segment Driver
 *
 * Copyright (C) 2008 Harrison Metzger <harrisonmetz@gmail.com>
 * Based on usbled.c by Greg Kroah-Hartman (greg@kroah.com)
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/usb.h>


#define DRIVER_AUTHOR "Harrison Metzger <harrisonmetz@gmail.com>"
#define DRIVER_DESC "USB 7 Segment Driver"

#define VENDOR_ID        0x0fc5
#define PRODUCT_ID        0x1227
#define MAXLEN                8

/* table of devices that work with this driver */
static const struct usb_device_id id_table[] = {
        { USB_DEVICE(VENDOR_ID, PRODUCT_ID) },
        { },
};
MODULE_DEVICE_TABLE(usb, id_table);

/* the different text display modes the device is capable of */
static const char *display_textmodes[] = {"raw", "hex", "ascii"};

struct usb_sevsegdev {
        struct usb_device *udev;
        struct usb_interface *intf;

        u8 powered;
        u8 mode_msb;
        u8 mode_lsb;
        u8 decimals[MAXLEN];
        u8 textmode;
        u8 text[MAXLEN];
        u16 textlength;

        u8 shadow_power; /* for PM */
        u8 has_interface_pm;
};

/* sysfs_streq can't replace this completely
 * If the device was in hex mode, and the user wanted a 0,
 * if str commands are used, we would assume the end of string
 * so mem commands are used.
 */
static inline size_t my_memlen(const char *buf, size_t count)
{
        if (count > 0 && buf[count-1] == '\n')
                return count - 1;
        else
                return count;
}

static void update_display_powered(struct usb_sevsegdev *mydev)
{
        int rc;

        if (mydev->powered && !mydev->has_interface_pm) {
                rc = usb_autopm_get_interface(mydev->intf);
                if (rc < 0)
                        return;
                mydev->has_interface_pm = 1;
        }

        if (mydev->shadow_power != 1)
                return;

        rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48,
                                  (80 * 0x100) + 10, /*  (power mode) */
                                  (0x00 * 0x100) + (mydev->powered ? 1 : 0),
                                  NULL, 0, 2000, GFP_KERNEL);
        if (rc < 0)
                dev_dbg(&mydev->udev->dev, "power retval = %d\n", rc);

        if (!mydev->powered && mydev->has_interface_pm) {
                usb_autopm_put_interface(mydev->intf);
                mydev->has_interface_pm = 0;
        }
}

static void update_display_mode(struct usb_sevsegdev *mydev)
{
        int rc;

        if(mydev->shadow_power != 1)
                return;

        rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48,
                                  (82 * 0x100) + 10, /* (set mode) */
                                  (mydev->mode_msb * 0x100) + mydev->mode_lsb,
                                  NULL, 0, 2000, GFP_NOIO);

        if (rc < 0)
                dev_dbg(&mydev->udev->dev, "mode retval = %d\n", rc);
}

static void update_display_visual(struct usb_sevsegdev *mydev, gfp_t mf)
{
        int rc;
        int i;
        unsigned char buffer[MAXLEN] = {0};
        u8 decimals = 0;

        if(mydev->shadow_power != 1)
                return;

        /* The device is right to left, where as you write left to right */
        for (i = 0; i < mydev->textlength; i++)
                buffer[i] = mydev->text[mydev->textlength-1-i];

        rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48,
                                  (85 * 0x100) + 10, /* (write text) */
                                  (0 * 0x100) + mydev->textmode, /* mode  */
                                  &buffer, mydev->textlength, 2000, mf);

        if (rc < 0)
                dev_dbg(&mydev->udev->dev, "write retval = %d\n", rc);

        /* The device is right to left, where as you write left to right */
        for (i = 0; i < sizeof(mydev->decimals); i++)
                decimals |= mydev->decimals[i] << i;

        rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48,
                                  (86 * 0x100) + 10, /* (set decimal) */
                                  (0 * 0x100) + decimals, /* decimals */
                                  NULL, 0, 2000, mf);

        if (rc < 0)
                dev_dbg(&mydev->udev->dev, "decimal retval = %d\n", rc);
}

#define MYDEV_ATTR_SIMPLE_UNSIGNED(name, update_fcn)                \
static ssize_t name##_show(struct device *dev,                        \
        struct device_attribute *attr, char *buf)                 \
{                                                                \
        struct usb_interface *intf = to_usb_interface(dev);        \
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);        \
                                                                \
        return sprintf(buf, "%u\n", mydev->name);                \
}                                                                \
                                                                \
static ssize_t name##_store(struct device *dev,                        \
        struct device_attribute *attr, const char *buf, size_t count) \
{                                                                \
        struct usb_interface *intf = to_usb_interface(dev);        \
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);        \
                                                                \
        mydev->name = simple_strtoul(buf, NULL, 10);                \
        update_fcn(mydev);                                         \
                                                                \
        return count;                                                \
}                                                                \
static DEVICE_ATTR_RW(name);

static ssize_t text_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);

        return sysfs_emit(buf, "%s\n", mydev->text);
}

static ssize_t text_store(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);
        size_t end = my_memlen(buf, count);

        if (end > sizeof(mydev->text))
                return -EINVAL;

        memset(mydev->text, 0, sizeof(mydev->text));
        mydev->textlength = end;

        if (end > 0)
                memcpy(mydev->text, buf, end);

        update_display_visual(mydev, GFP_KERNEL);
        return count;
}

static DEVICE_ATTR_RW(text);

static ssize_t decimals_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);
        int i;
        int pos;

        for (i = 0; i < sizeof(mydev->decimals); i++) {
                pos = sizeof(mydev->decimals) - 1 - i;
                if (mydev->decimals[i] == 0)
                        buf[pos] = '0';
                else if (mydev->decimals[i] == 1)
                        buf[pos] = '1';
                else
                        buf[pos] = 'x';
        }

        buf[sizeof(mydev->decimals)] = '\n';
        return sizeof(mydev->decimals) + 1;
}

static ssize_t decimals_store(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);
        size_t end = my_memlen(buf, count);
        int i;

        if (end > sizeof(mydev->decimals))
                return -EINVAL;

        for (i = 0; i < end; i++)
                if (buf[i] != '0' && buf[i] != '1')
                        return -EINVAL;

        memset(mydev->decimals, 0, sizeof(mydev->decimals));
        for (i = 0; i < end; i++)
                if (buf[i] == '1')
                        mydev->decimals[end-1-i] = 1;

        update_display_visual(mydev, GFP_KERNEL);

        return count;
}

static DEVICE_ATTR_RW(decimals);

static ssize_t textmode_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);
        int i;

        buf[0] = 0;

        for (i = 0; i < ARRAY_SIZE(display_textmodes); i++) {
                if (mydev->textmode == i) {
                        strcat(buf, " [");
                        strcat(buf, display_textmodes[i]);
                        strcat(buf, "] ");
                } else {
                        strcat(buf, " ");
                        strcat(buf, display_textmodes[i]);
                        strcat(buf, " ");
                }
        }
        strcat(buf, "\n");


        return strlen(buf);
}

static ssize_t textmode_store(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_sevsegdev *mydev = usb_get_intfdata(intf);
        int i;

        i = sysfs_match_string(display_textmodes, buf);
        if (i < 0)
                return i;

        mydev->textmode = i;
        update_display_visual(mydev, GFP_KERNEL);
        return count;
}

static DEVICE_ATTR_RW(textmode);


MYDEV_ATTR_SIMPLE_UNSIGNED(powered, update_display_powered);
MYDEV_ATTR_SIMPLE_UNSIGNED(mode_msb, update_display_mode);
MYDEV_ATTR_SIMPLE_UNSIGNED(mode_lsb, update_display_mode);

static struct attribute *sevseg_attrs[] = {
        &dev_attr_powered.attr,
        &dev_attr_text.attr,
        &dev_attr_textmode.attr,
        &dev_attr_decimals.attr,
        &dev_attr_mode_msb.attr,
        &dev_attr_mode_lsb.attr,
        NULL
};
ATTRIBUTE_GROUPS(sevseg);

static int sevseg_probe(struct usb_interface *interface,
        const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_sevsegdev *mydev;
        int rc = -ENOMEM;

        mydev = kzalloc(sizeof(struct usb_sevsegdev), GFP_KERNEL);
        if (!mydev)
                goto error_mem;

        mydev->udev = usb_get_dev(udev);
        mydev->intf = interface;
        usb_set_intfdata(interface, mydev);

        /* PM */
        mydev->shadow_power = 1; /* currently active */
        mydev->has_interface_pm = 0; /* have not issued autopm_get */

        /*set defaults */
        mydev->textmode = 0x02; /* ascii mode */
        mydev->mode_msb = 0x06; /* 6 characters */
        mydev->mode_lsb = 0x3f; /* scanmode for 6 chars */

        dev_info(&interface->dev, "USB 7 Segment device now attached\n");
        return 0;

error_mem:
        return rc;
}

static void sevseg_disconnect(struct usb_interface *interface)
{
        struct usb_sevsegdev *mydev;

        mydev = usb_get_intfdata(interface);
        usb_set_intfdata(interface, NULL);
        usb_put_dev(mydev->udev);
        kfree(mydev);
        dev_info(&interface->dev, "USB 7 Segment now disconnected\n");
}

static int sevseg_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct usb_sevsegdev *mydev;

        mydev = usb_get_intfdata(intf);
        mydev->shadow_power = 0;

        return 0;
}

static int sevseg_resume(struct usb_interface *intf)
{
        struct usb_sevsegdev *mydev;

        mydev = usb_get_intfdata(intf);
        mydev->shadow_power = 1;
        update_display_mode(mydev);
        update_display_visual(mydev, GFP_NOIO);

        return 0;
}

static int sevseg_reset_resume(struct usb_interface *intf)
{
        struct usb_sevsegdev *mydev;

        mydev = usb_get_intfdata(intf);
        mydev->shadow_power = 1;
        update_display_mode(mydev);
        update_display_visual(mydev, GFP_NOIO);

        return 0;
}

static struct usb_driver sevseg_driver = {
        .name =                "usbsevseg",
        .probe =        sevseg_probe,
        .disconnect =        sevseg_disconnect,
        .suspend =        sevseg_suspend,
        .resume =        sevseg_resume,
        .reset_resume =        sevseg_reset_resume,
        .id_table =        id_table,
        .dev_groups =        sevseg_groups,
        .supports_autosuspend = 1,
};

module_usb_driver(sevseg_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");



























































































































































































































































































































    3 









































































































    1 











    1 






    1 


    1 


    1 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Abilis Systems Single DVB-T Receiver
 * Copyright (C) 2008 Pierrick Hascoet <pierrick.hascoet@abilis.com>
 * Copyright (C) 2010 Devin Heitmueller <dheitmueller@kernellabs.com>
 */
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/usb.h>

#include "as102_drv.h"
#include "as102_usb_drv.h"
#include "as102_fw.h"

static void as102_usb_disconnect(struct usb_interface *interface);
static int as102_usb_probe(struct usb_interface *interface,
                           const struct usb_device_id *id);

static int as102_usb_start_stream(struct as102_dev_t *dev);
static void as102_usb_stop_stream(struct as102_dev_t *dev);

static int as102_open(struct inode *inode, struct file *file);
static int as102_release(struct inode *inode, struct file *file);

static const struct usb_device_id as102_usb_id_table[] = {
        { USB_DEVICE(AS102_USB_DEVICE_VENDOR_ID, AS102_USB_DEVICE_PID_0001) },
        { USB_DEVICE(PCTV_74E_USB_VID, PCTV_74E_USB_PID) },
        { USB_DEVICE(ELGATO_EYETV_DTT_USB_VID, ELGATO_EYETV_DTT_USB_PID) },
        { USB_DEVICE(NBOX_DVBT_DONGLE_USB_VID, NBOX_DVBT_DONGLE_USB_PID) },
        { USB_DEVICE(SKY_IT_DIGITAL_KEY_USB_VID, SKY_IT_DIGITAL_KEY_USB_PID) },
        { } /* Terminating entry */
};

/* Note that this table must always have the same number of entries as the
   as102_usb_id_table struct */
static const char * const as102_device_names[] = {
        AS102_REFERENCE_DESIGN,
        AS102_PCTV_74E,
        AS102_ELGATO_EYETV_DTT_NAME,
        AS102_NBOX_DVBT_DONGLE_NAME,
        AS102_SKY_IT_DIGITAL_KEY_NAME,
        NULL /* Terminating entry */
};

/* eLNA configuration: devices built on the reference design work best
   with 0xA0, while custom designs seem to require 0xC0 */
static uint8_t const as102_elna_cfg[] = {
        0xA0,
        0xC0,
        0xC0,
        0xA0,
        0xA0,
        0x00 /* Terminating entry */
};

struct usb_driver as102_usb_driver = {
        .name                = DRIVER_FULL_NAME,
        .probe                = as102_usb_probe,
        .disconnect        = as102_usb_disconnect,
        .id_table        = as102_usb_id_table
};

static const struct file_operations as102_dev_fops = {
        .owner                = THIS_MODULE,
        .open                = as102_open,
        .release        = as102_release,
};

static struct usb_class_driver as102_usb_class_driver = {
        .name                = "aton2-%d",
        .fops                = &as102_dev_fops,
        .minor_base        = AS102_DEVICE_MAJOR,
};

static int as102_usb_xfer_cmd(struct as10x_bus_adapter_t *bus_adap,
                              unsigned char *send_buf, int send_buf_len,
                              unsigned char *recv_buf, int recv_buf_len)
{
        int ret = 0;

        if (send_buf != NULL) {
                ret = usb_control_msg(bus_adap->usb_dev,
                                      usb_sndctrlpipe(bus_adap->usb_dev, 0),
                                      AS102_USB_DEVICE_TX_CTRL_CMD,
                                      USB_DIR_OUT | USB_TYPE_VENDOR |
                                      USB_RECIP_DEVICE,
                                      bus_adap->cmd_xid, /* value */
                                      0, /* index */
                                      send_buf, send_buf_len,
                                      USB_CTRL_SET_TIMEOUT /* 200 */);
                if (ret < 0) {
                        dev_dbg(&bus_adap->usb_dev->dev,
                                "usb_control_msg(send) failed, err %i\n", ret);
                        return ret;
                }

                if (ret != send_buf_len) {
                        dev_dbg(&bus_adap->usb_dev->dev,
                        "only wrote %d of %d bytes\n", ret, send_buf_len);
                        return -1;
                }
        }

        if (recv_buf != NULL) {
#ifdef TRACE
                dev_dbg(bus_adap->usb_dev->dev,
                        "want to read: %d bytes\n", recv_buf_len);
#endif
                ret = usb_control_msg(bus_adap->usb_dev,
                                      usb_rcvctrlpipe(bus_adap->usb_dev, 0),
                                      AS102_USB_DEVICE_RX_CTRL_CMD,
                                      USB_DIR_IN | USB_TYPE_VENDOR |
                                      USB_RECIP_DEVICE,
                                      bus_adap->cmd_xid, /* value */
                                      0, /* index */
                                      recv_buf, recv_buf_len,
                                      USB_CTRL_GET_TIMEOUT /* 200 */);
                if (ret < 0) {
                        dev_dbg(&bus_adap->usb_dev->dev,
                                "usb_control_msg(recv) failed, err %i\n", ret);
                        return ret;
                }
#ifdef TRACE
                dev_dbg(bus_adap->usb_dev->dev,
                        "read %d bytes\n", recv_buf_len);
#endif
        }

        return ret;
}

static int as102_send_ep1(struct as10x_bus_adapter_t *bus_adap,
                          unsigned char *send_buf,
                          int send_buf_len,
                          int swap32)
{
        int ret, actual_len;

        ret = usb_bulk_msg(bus_adap->usb_dev,
                           usb_sndbulkpipe(bus_adap->usb_dev, 1),
                           send_buf, send_buf_len, &actual_len, 200);
        if (ret) {
                dev_dbg(&bus_adap->usb_dev->dev,
                        "usb_bulk_msg(send) failed, err %i\n", ret);
                return ret;
        }

        if (actual_len != send_buf_len) {
                dev_dbg(&bus_adap->usb_dev->dev, "only wrote %d of %d bytes\n",
                        actual_len, send_buf_len);
                return -1;
        }
        return actual_len;
}

static int as102_read_ep2(struct as10x_bus_adapter_t *bus_adap,
                   unsigned char *recv_buf, int recv_buf_len)
{
        int ret, actual_len;

        if (recv_buf == NULL)
                return -EINVAL;

        ret = usb_bulk_msg(bus_adap->usb_dev,
                           usb_rcvbulkpipe(bus_adap->usb_dev, 2),
                           recv_buf, recv_buf_len, &actual_len, 200);
        if (ret) {
                dev_dbg(&bus_adap->usb_dev->dev,
                        "usb_bulk_msg(recv) failed, err %i\n", ret);
                return ret;
        }

        if (actual_len != recv_buf_len) {
                dev_dbg(&bus_adap->usb_dev->dev, "only read %d of %d bytes\n",
                        actual_len, recv_buf_len);
                return -1;
        }
        return actual_len;
}

static const struct as102_priv_ops_t as102_priv_ops = {
        .upload_fw_pkt        = as102_send_ep1,
        .xfer_cmd        = as102_usb_xfer_cmd,
        .as102_read_ep2        = as102_read_ep2,
        .start_stream        = as102_usb_start_stream,
        .stop_stream        = as102_usb_stop_stream,
};

static int as102_submit_urb_stream(struct as102_dev_t *dev, struct urb *urb)
{
        int err;

        usb_fill_bulk_urb(urb,
                          dev->bus_adap.usb_dev,
                          usb_rcvbulkpipe(dev->bus_adap.usb_dev, 0x2),
                          urb->transfer_buffer,
                          AS102_USB_BUF_SIZE,
                          as102_urb_stream_irq,
                          dev);

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err)
                dev_dbg(&urb->dev->dev,
                        "%s: usb_submit_urb failed\n", __func__);

        return err;
}

void as102_urb_stream_irq(struct urb *urb)
{
        struct as102_dev_t *as102_dev = urb->context;

        if (urb->actual_length > 0) {
                dvb_dmx_swfilter(&as102_dev->dvb_dmx,
                                 urb->transfer_buffer,
                                 urb->actual_length);
        } else {
                if (urb->actual_length == 0)
                        memset(urb->transfer_buffer, 0, AS102_USB_BUF_SIZE);
        }

        /* is not stopped, re-submit urb */
        if (as102_dev->streaming)
                as102_submit_urb_stream(as102_dev, urb);
}

static void as102_free_usb_stream_buffer(struct as102_dev_t *dev)
{
        int i;

        for (i = 0; i < MAX_STREAM_URB; i++)
                usb_free_urb(dev->stream_urb[i]);

        usb_free_coherent(dev->bus_adap.usb_dev,
                        MAX_STREAM_URB * AS102_USB_BUF_SIZE,
                        dev->stream,
                        dev->dma_addr);
}

static int as102_alloc_usb_stream_buffer(struct as102_dev_t *dev)
{
        int i;

        dev->stream = usb_alloc_coherent(dev->bus_adap.usb_dev,
                                       MAX_STREAM_URB * AS102_USB_BUF_SIZE,
                                       GFP_KERNEL,
                                       &dev->dma_addr);
        if (!dev->stream) {
                dev_dbg(&dev->bus_adap.usb_dev->dev,
                        "%s: usb_buffer_alloc failed\n", __func__);
                return -ENOMEM;
        }

        memset(dev->stream, 0, MAX_STREAM_URB * AS102_USB_BUF_SIZE);

        /* init urb buffers */
        for (i = 0; i < MAX_STREAM_URB; i++) {
                struct urb *urb;

                urb = usb_alloc_urb(0, GFP_ATOMIC);
                if (urb == NULL) {
                        as102_free_usb_stream_buffer(dev);
                        return -ENOMEM;
                }

                urb->transfer_buffer = dev->stream + (i * AS102_USB_BUF_SIZE);
                urb->transfer_dma = dev->dma_addr + (i * AS102_USB_BUF_SIZE);
                urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                urb->transfer_buffer_length = AS102_USB_BUF_SIZE;

                dev->stream_urb[i] = urb;
        }
        return 0;
}

static void as102_usb_stop_stream(struct as102_dev_t *dev)
{
        int i;

        for (i = 0; i < MAX_STREAM_URB; i++)
                usb_kill_urb(dev->stream_urb[i]);
}

static int as102_usb_start_stream(struct as102_dev_t *dev)
{
        int i, ret = 0;

        for (i = 0; i < MAX_STREAM_URB; i++) {
                ret = as102_submit_urb_stream(dev, dev->stream_urb[i]);
                if (ret) {
                        as102_usb_stop_stream(dev);
                        return ret;
                }
        }

        return 0;
}

static void as102_usb_release(struct kref *kref)
{
        struct as102_dev_t *as102_dev;

        as102_dev = container_of(kref, struct as102_dev_t, kref);
        usb_put_dev(as102_dev->bus_adap.usb_dev);
        kfree(as102_dev);
}

static void as102_usb_disconnect(struct usb_interface *intf)
{
        struct as102_dev_t *as102_dev;

        /* extract as102_dev_t from usb_device private data */
        as102_dev = usb_get_intfdata(intf);

        /* unregister dvb layer */
        as102_dvb_unregister(as102_dev);

        /* free usb buffers */
        as102_free_usb_stream_buffer(as102_dev);

        usb_set_intfdata(intf, NULL);

        /* usb unregister device */
        usb_deregister_dev(intf, &as102_usb_class_driver);

        /* decrement usage counter */
        kref_put(&as102_dev->kref, as102_usb_release);

        pr_info("%s: device has been disconnected\n", DRIVER_NAME);
}

static int as102_usb_probe(struct usb_interface *intf,
                           const struct usb_device_id *id)
{
        int ret;
        struct as102_dev_t *as102_dev;
        int i;

        /* This should never actually happen */
        if (ARRAY_SIZE(as102_usb_id_table) !=
            (sizeof(as102_device_names) / sizeof(const char *))) {
                pr_err("Device names table invalid size");
                return -EINVAL;
        }

        as102_dev = kzalloc(sizeof(struct as102_dev_t), GFP_KERNEL);
        if (as102_dev == NULL)
                return -ENOMEM;

        /* Assign the user-friendly device name */
        for (i = 0; i < ARRAY_SIZE(as102_usb_id_table); i++) {
                if (id == &as102_usb_id_table[i]) {
                        as102_dev->name = as102_device_names[i];
                        as102_dev->elna_cfg = as102_elna_cfg[i];
                }
        }

        if (as102_dev->name == NULL)
                as102_dev->name = "Unknown AS102 device";

        /* set private callback functions */
        as102_dev->bus_adap.ops = &as102_priv_ops;

        /* init cmd token for usb bus */
        as102_dev->bus_adap.cmd = &as102_dev->bus_adap.token.usb.c;
        as102_dev->bus_adap.rsp = &as102_dev->bus_adap.token.usb.r;

        /* init kernel device reference */
        kref_init(&as102_dev->kref);

        /* store as102 device to usb_device private data */
        usb_set_intfdata(intf, (void *) as102_dev);

        /* store in as102 device the usb_device pointer */
        as102_dev->bus_adap.usb_dev = usb_get_dev(interface_to_usbdev(intf));

        /* we can register the device now, as it is ready */
        ret = usb_register_dev(intf, &as102_usb_class_driver);
        if (ret < 0) {
                /* something prevented us from registering this driver */
                dev_err(&intf->dev,
                        "%s: usb_register_dev() failed (errno = %d)\n",
                        __func__, ret);
                goto failed;
        }

        pr_info("%s: device has been detected\n", DRIVER_NAME);

        /* request buffer allocation for streaming */
        ret = as102_alloc_usb_stream_buffer(as102_dev);
        if (ret != 0)
                goto failed_stream;

        /* register dvb layer */
        ret = as102_dvb_register(as102_dev);
        if (ret != 0)
                goto failed_dvb;

        return ret;

failed_dvb:
        as102_free_usb_stream_buffer(as102_dev);
failed_stream:
        usb_deregister_dev(intf, &as102_usb_class_driver);
failed:
        usb_put_dev(as102_dev->bus_adap.usb_dev);
        usb_set_intfdata(intf, NULL);
        kfree(as102_dev);
        return ret;
}

static int as102_open(struct inode *inode, struct file *file)
{
        int ret = 0, minor = 0;
        struct usb_interface *intf = NULL;
        struct as102_dev_t *dev = NULL;

        /* read minor from inode */
        minor = iminor(inode);

        /* fetch device from usb interface */
        intf = usb_find_interface(&as102_usb_driver, minor);
        if (intf == NULL) {
                pr_err("%s: can't find device for minor %d\n",
                       __func__, minor);
                ret = -ENODEV;
                goto exit;
        }

        /* get our device */
        dev = usb_get_intfdata(intf);
        if (dev == NULL) {
                ret = -EFAULT;
                goto exit;
        }

        /* save our device object in the file's private structure */
        file->private_data = dev;

        /* increment our usage count for the device */
        kref_get(&dev->kref);

exit:
        return ret;
}

static int as102_release(struct inode *inode, struct file *file)
{
        struct as102_dev_t *dev = NULL;

        dev = file->private_data;
        if (dev != NULL) {
                /* decrement the count on our device */
                kref_put(&dev->kref, as102_usb_release);
        }

        return 0;
}

MODULE_DEVICE_TABLE(usb, as102_usb_id_table);










































    3 



















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
// SPDX-License-Identifier: GPL-2.0

/* Copyright (C) 2021 Intel Corporation */

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include "hci_codec.h"

static int hci_codec_list_add(struct list_head *list,
                              struct hci_op_read_local_codec_caps *sent,
                              struct hci_rp_read_local_codec_caps *rp,
                              void *caps,
                              __u32 len)
{
        struct codec_list *entry;

        entry = kzalloc(sizeof(*entry) + len, GFP_KERNEL);
        if (!entry)
                return -ENOMEM;

        entry->id = sent->id;
        if (sent->id == 0xFF) {
                entry->cid = __le16_to_cpu(sent->cid);
                entry->vid = __le16_to_cpu(sent->vid);
        }
        entry->transport = sent->transport;
        entry->len = len;
        entry->num_caps = 0;
        if (rp) {
                entry->num_caps = rp->num_caps;
                memcpy(entry->caps, caps, len);
        }
        list_add(&entry->list, list);

        return 0;
}

void hci_codec_list_clear(struct list_head *codec_list)
{
        struct codec_list *c, *n;

        list_for_each_entry_safe(c, n, codec_list, list) {
                list_del(&c->list);
                kfree(c);
        }
}

static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport,
                                        struct hci_op_read_local_codec_caps
                                        *cmd)
{
        __u8 i;

        for (i = 0; i < TRANSPORT_TYPE_MAX; i++) {
                if (transport & BIT(i)) {
                        struct hci_rp_read_local_codec_caps *rp;
                        struct hci_codec_caps *caps;
                        struct sk_buff *skb;
                        __u8 j;
                        __u32 len;

                        cmd->transport = i;

                        /* If Read_Codec_Capabilities command is not supported
                         * then just add codec to the list without caps
                         */
                        if (!(hdev->commands[45] & 0x08)) {
                                hci_dev_lock(hdev);
                                hci_codec_list_add(&hdev->local_codecs, cmd,
                                                   NULL, NULL, 0);
                                hci_dev_unlock(hdev);
                                continue;
                        }

                        skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
                                                sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL);
                        if (IS_ERR(skb)) {
                                bt_dev_err(hdev, "Failed to read codec capabilities (%ld)",
                                           PTR_ERR(skb));
                                continue;
                        }

                        if (skb->len < sizeof(*rp))
                                goto error;

                        rp = (void *)skb->data;

                        if (rp->status)
                                goto error;

                        if (!rp->num_caps) {
                                len = 0;
                                /* this codec doesn't have capabilities */
                                goto skip_caps_parse;
                        }

                        skb_pull(skb, sizeof(*rp));

                        for (j = 0, len = 0; j < rp->num_caps; j++) {
                                caps = (void *)skb->data;
                                if (skb->len < sizeof(*caps))
                                        goto error;
                                if (skb->len < caps->len)
                                        goto error;
                                len += sizeof(caps->len) + caps->len;
                                skb_pull(skb,  sizeof(caps->len) + caps->len);
                        }

skip_caps_parse:
                        hci_dev_lock(hdev);
                        hci_codec_list_add(&hdev->local_codecs, cmd, rp,
                                           (__u8 *)rp + sizeof(*rp), len);
                        hci_dev_unlock(hdev);
error:
                        kfree_skb(skb);
                }
        }
}

void hci_read_supported_codecs(struct hci_dev *hdev)
{
        struct sk_buff *skb;
        struct hci_rp_read_local_supported_codecs *rp;
        struct hci_std_codecs *std_codecs;
        struct hci_vnd_codecs *vnd_codecs;
        struct hci_op_read_local_codec_caps caps;
        __u8 i;

        skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
                                0, HCI_CMD_TIMEOUT, NULL);

        if (IS_ERR(skb)) {
                bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
                           PTR_ERR(skb));
                return;
        }

        if (skb->len < sizeof(*rp))
                goto error;

        rp = (void *)skb->data;

        if (rp->status)
                goto error;

        skb_pull(skb, sizeof(rp->status));

        std_codecs = (void *)skb->data;

        /* validate codecs length before accessing */
        if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num)
            + sizeof(std_codecs->num))
                goto error;

        /* enumerate codec capabilities of standard codecs */
        memset(&caps, 0, sizeof(caps));
        for (i = 0; i < std_codecs->num; i++) {
                caps.id = std_codecs->codec[i];
                caps.direction = 0x00;
                hci_read_codec_capabilities(hdev,
                                            LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
        }

        skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
                 + sizeof(std_codecs->num));

        vnd_codecs = (void *)skb->data;

        /* validate vendor codecs length before accessing */
        if (skb->len <
            flex_array_size(vnd_codecs, codec, vnd_codecs->num)
            + sizeof(vnd_codecs->num))
                goto error;

        /* enumerate vendor codec capabilities */
        for (i = 0; i < vnd_codecs->num; i++) {
                caps.id = 0xFF;
                caps.cid = vnd_codecs->codec[i].cid;
                caps.vid = vnd_codecs->codec[i].vid;
                caps.direction = 0x00;
                hci_read_codec_capabilities(hdev,
                                            LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
        }

error:
        kfree_skb(skb);
}

void hci_read_supported_codecs_v2(struct hci_dev *hdev)
{
        struct sk_buff *skb;
        struct hci_rp_read_local_supported_codecs_v2 *rp;
        struct hci_std_codecs_v2 *std_codecs;
        struct hci_vnd_codecs_v2 *vnd_codecs;
        struct hci_op_read_local_codec_caps caps;
        __u8 i;

        skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
                                0, HCI_CMD_TIMEOUT, NULL);

        if (IS_ERR(skb)) {
                bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
                           PTR_ERR(skb));
                return;
        }

        if (skb->len < sizeof(*rp))
                goto error;

        rp = (void *)skb->data;

        if (rp->status)
                goto error;

        skb_pull(skb, sizeof(rp->status));

        std_codecs = (void *)skb->data;

        /* check for payload data length before accessing */
        if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num)
            + sizeof(std_codecs->num))
                goto error;

        memset(&caps, 0, sizeof(caps));

        for (i = 0; i < std_codecs->num; i++) {
                caps.id = std_codecs->codec[i].id;
                hci_read_codec_capabilities(hdev, std_codecs->codec[i].transport,
                                            &caps);
        }

        skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
                 + sizeof(std_codecs->num));

        vnd_codecs = (void *)skb->data;

        /* check for payload data length before accessing */
        if (skb->len <
            flex_array_size(vnd_codecs, codec, vnd_codecs->num)
            + sizeof(vnd_codecs->num))
                goto error;

        for (i = 0; i < vnd_codecs->num; i++) {
                caps.id = 0xFF;
                caps.cid = vnd_codecs->codec[i].cid;
                caps.vid = vnd_codecs->codec[i].vid;
                hci_read_codec_capabilities(hdev, vnd_codecs->codec[i].transport,
                                            &caps);
        }

error:
        kfree_skb(skb);
}





































































































    1 
















































    1 
    1 
















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Savu driver for Linux
 *
 * Copyright (c) 2012 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/* Roccat Savu is a gamer mouse with macro keys that can be configured in
 * 5 profiles.
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-savu.h"

ROCCAT_COMMON2_BIN_ATTRIBUTE_W(control, 0x4, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(profile, 0x5, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(general, 0x6, 0x10);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(buttons, 0x7, 0x2f);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(macro, 0x8, 0x0823);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(info, 0x9, 0x08);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(sensor, 0xc, 0x04);

static struct bin_attribute *savu_bin_attrs[] = {
        &bin_attr_control,
        &bin_attr_profile,
        &bin_attr_general,
        &bin_attr_buttons,
        &bin_attr_macro,
        &bin_attr_info,
        &bin_attr_sensor,
        NULL,
};

static const struct attribute_group savu_group = {
        .bin_attrs = savu_bin_attrs,
};

static const struct attribute_group *savu_groups[] = {
        &savu_group,
        NULL,
};

static const struct class savu_class = {
        .name = "savu",
        .dev_groups = savu_groups,
};

static int savu_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct roccat_common2_device *savu;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE) {
                hid_set_drvdata(hdev, NULL);
                return 0;
        }

        savu = kzalloc(sizeof(*savu), GFP_KERNEL);
        if (!savu) {
                hid_err(hdev, "can't alloc device descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, savu);

        retval = roccat_common2_device_init_struct(usb_dev, savu);
        if (retval) {
                hid_err(hdev, "couldn't init Savu device\n");
                goto exit_free;
        }

        retval = roccat_connect(&savu_class, hdev,
                        sizeof(struct savu_roccat_report));
        if (retval < 0) {
                hid_err(hdev, "couldn't init char dev\n");
        } else {
                savu->chrdev_minor = retval;
                savu->roccat_claimed = 1;
        }

        return 0;
exit_free:
        kfree(savu);
        return retval;
}

static void savu_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *savu;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return;

        savu = hid_get_drvdata(hdev);
        if (savu->roccat_claimed)
                roccat_disconnect(savu->chrdev_minor);
        kfree(savu);
}

static int savu_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = savu_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void savu_remove(struct hid_device *hdev)
{
        savu_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void savu_report_to_chrdev(struct roccat_common2_device const *savu,
                u8 const *data)
{
        struct savu_roccat_report roccat_report;
        struct savu_mouse_report_special const *special_report;

        if (data[0] != SAVU_MOUSE_REPORT_NUMBER_SPECIAL)
                return;

        special_report = (struct savu_mouse_report_special const *)data;

        roccat_report.type = special_report->type;
        roccat_report.data[0] = special_report->data[0];
        roccat_report.data[1] = special_report->data[1];
        roccat_report_event(savu->chrdev_minor,
                        (uint8_t const *)&roccat_report);
}

static int savu_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *savu = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return 0;

        if (savu == NULL)
                return 0;

        if (savu->roccat_claimed)
                savu_report_to_chrdev(savu, data);

        return 0;
}

static const struct hid_device_id savu_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_SAVU) },
        { }
};

MODULE_DEVICE_TABLE(hid, savu_devices);

static struct hid_driver savu_driver = {
                .name = "savu",
                .id_table = savu_devices,
                .probe = savu_probe,
                .remove = savu_remove,
                .raw_event = savu_raw_event
};

static int __init savu_init(void)
{
        int retval;

        retval = class_register(&savu_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&savu_driver);
        if (retval)
                class_unregister(&savu_class);
        return retval;
}

static void __exit savu_exit(void)
{
        hid_unregister_driver(&savu_driver);
        class_unregister(&savu_class);
}

module_init(savu_init);
module_exit(savu_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Savu driver");
MODULE_LICENSE("GPL v2");
















































































































































    6 


    6 

    6 







    6 




    6 
























    6 


    6 

    6 

    6 





















    6 











    6 

    6 
    6 
    6 







    6 



    6 

    6 












    6 


    6 



    4 
    3 
    5 






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   21 
   21 





































































































































































































































































































































   16 


   16 

   16 

   16 





   16 


   10 


   10 




   16 



































































































































    6 



    6 






    6 


    6 


    6 

































    4 


    4 
    4 




    4 




































    6 


    6 
    6 
    6 
    6 

    6 

    6 

    6 
    6 

    6 


    6 








    6 



    6 



    6 
    6 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    5 
























    5 



    5 
    5 
    5 

    5 

    5 

    5 


    5 

    5 

    5 

    5 

    5 

















































































    5 






    5 


    5 











    5 









    5 
    5 



    5 




    5 









    5 
    5 


    5 


    5 






    5 
    5 
    5 
    5 

    5 



    5 



    5 



























































































































































































































































































































    7 







    7 
















    7 

    1 


    6 




    6 




    6 












    6 












    6 





    5 

    4 
    5 


















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
// SPDX-License-Identifier: GPL-2.0-only
/*
 *        linux/mm/filemap.c
 *
 * Copyright (C) 1994-1999  Linus Torvalds
 */

/*
 * This file handles the generic file mmap semantics used by
 * most "normal" filesystems (but you don't /have/ to use this:
 * the NFS filesystem used to do this differently, for example)
 */
#include <linux/export.h>
#include <linux/compiler.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/capability.h>
#include <linux/kernel_stat.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/syscalls.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/error-injection.h>
#include <linux/hash.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/security.h>
#include <linux/cpuset.h>
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
#include <linux/migrate.h>
#include <linux/pipe_fs_i.h>
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/filemap.h>

/*
 * FIXME: remove all knowledge of the buffer layer from the core VM
 */
#include <linux/buffer_head.h> /* for try_to_free_buffers */

#include <asm/mman.h>

#include "swap.h"

/*
 * Shared mappings implemented 30.11.1994. It's not fully working yet,
 * though.
 *
 * Shared mappings now work. 15.8.1995  Bruno.
 *
 * finished 'unifying' the page and buffer cache and SMP-threaded the
 * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
 *
 * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
 */

/*
 * Lock ordering:
 *
 *  ->i_mmap_rwsem                (truncate_pagecache)
 *    ->private_lock                (__free_pte->block_dirty_folio)
 *      ->swap_lock                (exclusive_swap_page, others)
 *        ->i_pages lock
 *
 *  ->i_rwsem
 *    ->invalidate_lock                (acquired by fs in truncate path)
 *      ->i_mmap_rwsem                (truncate->unmap_mapping_range)
 *
 *  ->mmap_lock
 *    ->i_mmap_rwsem
 *      ->page_table_lock or pte_lock        (various, mainly in memory.c)
 *        ->i_pages lock        (arch-dependent flush_dcache_mmap_lock)
 *
 *  ->mmap_lock
 *    ->invalidate_lock                (filemap_fault)
 *      ->lock_page                (filemap_fault, access_process_vm)
 *
 *  ->i_rwsem                        (generic_perform_write)
 *    ->mmap_lock                (fault_in_readable->do_page_fault)
 *
 *  bdi->wb.list_lock
 *    sb_lock                        (fs/fs-writeback.c)
 *    ->i_pages lock                (__sync_single_inode)
 *
 *  ->i_mmap_rwsem
 *    ->anon_vma.lock                (vma_merge)
 *
 *  ->anon_vma.lock
 *    ->page_table_lock or pte_lock        (anon_vma_prepare and various)
 *
 *  ->page_table_lock or pte_lock
 *    ->swap_lock                (try_to_unmap_one)
 *    ->private_lock                (try_to_unmap_one)
 *    ->i_pages lock                (try_to_unmap_one)
 *    ->lruvec->lru_lock        (follow_page->mark_page_accessed)
 *    ->lruvec->lru_lock        (check_pte_range->isolate_lru_page)
 *    ->private_lock                (folio_remove_rmap_pte->set_page_dirty)
 *    ->i_pages lock                (folio_remove_rmap_pte->set_page_dirty)
 *    bdi.wb->list_lock                (folio_remove_rmap_pte->set_page_dirty)
 *    ->inode->i_lock                (folio_remove_rmap_pte->set_page_dirty)
 *    ->memcg->move_lock        (folio_remove_rmap_pte->folio_memcg_lock)
 *    bdi.wb->list_lock                (zap_pte_range->set_page_dirty)
 *    ->inode->i_lock                (zap_pte_range->set_page_dirty)
 *    ->private_lock                (zap_pte_range->block_dirty_folio)
 */

static void mapping_set_update(struct xa_state *xas,
                struct address_space *mapping)
{
        if (dax_mapping(mapping) || shmem_mapping(mapping))
                return;
        xas_set_update(xas, workingset_update_node);
        xas_set_lru(xas, &shadow_nodes);
}

static void page_cache_delete(struct address_space *mapping,
                                   struct folio *folio, void *shadow)
{
        XA_STATE(xas, &mapping->i_pages, folio->index);
        long nr = 1;

        mapping_set_update(&xas, mapping);

        xas_set_order(&xas, folio->index, folio_order(folio));
        nr = folio_nr_pages(folio);

        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

        xas_store(&xas, shadow);
        xas_init_marks(&xas);

        folio->mapping = NULL;
        /* Leave page->index set: truncation lookup relies upon it */
        mapping->nrpages -= nr;
}

static void filemap_unaccount_folio(struct address_space *mapping,
                struct folio *folio)
{
        long nr;

        VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
        if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
                pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
                         current->comm, folio_pfn(folio));
                dump_page(&folio->page, "still mapped when deleted");
                dump_stack();
                add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);

                if (mapping_exiting(mapping) && !folio_test_large(folio)) {
                        int mapcount = page_mapcount(&folio->page);

                        if (folio_ref_count(folio) >= mapcount + 2) {
                                /*
                                 * All vmas have already been torn down, so it's
                                 * a good bet that actually the page is unmapped
                                 * and we'd rather not leak it: if we're wrong,
                                 * another bad page check should catch it later.
                                 */
                                page_mapcount_reset(&folio->page);
                                folio_ref_sub(folio, mapcount);
                        }
                }
        }

        /* hugetlb folios do not participate in page cache accounting. */
        if (folio_test_hugetlb(folio))
                return;

        nr = folio_nr_pages(folio);

        __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
        if (folio_test_swapbacked(folio)) {
                __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
                if (folio_test_pmd_mappable(folio))
                        __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
        } else if (folio_test_pmd_mappable(folio)) {
                __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
                filemap_nr_thps_dec(mapping);
        }

        /*
         * At this point folio must be either written or cleaned by
         * truncate.  Dirty folio here signals a bug and loss of
         * unwritten data - on ordinary filesystems.
         *
         * But it's harmless on in-memory filesystems like tmpfs; and can
         * occur when a driver which did get_user_pages() sets page dirty
         * before putting it, while the inode is being finally evicted.
         *
         * Below fixes dirty accounting after removing the folio entirely
         * but leaves the dirty flag set: it has no effect for truncated
         * folio and anyway will be cleared before returning folio to
         * buddy allocator.
         */
        if (WARN_ON_ONCE(folio_test_dirty(folio) &&
                         mapping_can_writeback(mapping)))
                folio_account_cleaned(folio, inode_to_wb(mapping->host));
}

/*
 * Delete a page from the page cache and free it. Caller has to make
 * sure the page is locked and that nobody else uses it - or that usage
 * is safe.  The caller must hold the i_pages lock.
 */
void __filemap_remove_folio(struct folio *folio, void *shadow)
{
        struct address_space *mapping = folio->mapping;

        trace_mm_filemap_delete_from_page_cache(folio);
        filemap_unaccount_folio(mapping, folio);
        page_cache_delete(mapping, folio, shadow);
}

void filemap_free_folio(struct address_space *mapping, struct folio *folio)
{
        void (*free_folio)(struct folio *);
        int refs = 1;

        free_folio = mapping->a_ops->free_folio;
        if (free_folio)
                free_folio(folio);

        if (folio_test_large(folio))
                refs = folio_nr_pages(folio);
        folio_put_refs(folio, refs);
}

/**
 * filemap_remove_folio - Remove folio from page cache.
 * @folio: The folio.
 *
 * This must be called only on folios that are locked and have been
 * verified to be in the page cache.  It will never put the folio into
 * the free list because the caller has a reference on the page.
 */
void filemap_remove_folio(struct folio *folio)
{
        struct address_space *mapping = folio->mapping;

        BUG_ON(!folio_test_locked(folio));
        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
        __filemap_remove_folio(folio, NULL);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);

        filemap_free_folio(mapping, folio);
}

/*
 * page_cache_delete_batch - delete several folios from page cache
 * @mapping: the mapping to which folios belong
 * @fbatch: batch of folios to delete
 *
 * The function walks over mapping->i_pages and removes folios passed in
 * @fbatch from the mapping. The function expects @fbatch to be sorted
 * by page index and is optimised for it to be dense.
 * It tolerates holes in @fbatch (mapping entries at those indices are not
 * modified).
 *
 * The function expects the i_pages lock to be held.
 */
static void page_cache_delete_batch(struct address_space *mapping,
                             struct folio_batch *fbatch)
{
        XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);
        long total_pages = 0;
        int i = 0;
        struct folio *folio;

        mapping_set_update(&xas, mapping);
        xas_for_each(&xas, folio, ULONG_MAX) {
                if (i >= folio_batch_count(fbatch))
                        break;

                /* A swap/dax/shadow entry got inserted? Skip it. */
                if (xa_is_value(folio))
                        continue;
                /*
                 * A page got inserted in our range? Skip it. We have our
                 * pages locked so they are protected from being removed.
                 * If we see a page whose index is higher than ours, it
                 * means our page has been removed, which shouldn't be
                 * possible because we're holding the PageLock.
                 */
                if (folio != fbatch->folios[i]) {
                        VM_BUG_ON_FOLIO(folio->index >
                                        fbatch->folios[i]->index, folio);
                        continue;
                }

                WARN_ON_ONCE(!folio_test_locked(folio));

                folio->mapping = NULL;
                /* Leave folio->index set: truncation lookup relies on it */

                i++;
                xas_store(&xas, NULL);
                total_pages += folio_nr_pages(folio);
        }
        mapping->nrpages -= total_pages;
}

void delete_from_page_cache_batch(struct address_space *mapping,
                                  struct folio_batch *fbatch)
{
        int i;

        if (!folio_batch_count(fbatch))
                return;

        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
        for (i = 0; i < folio_batch_count(fbatch); i++) {
                struct folio *folio = fbatch->folios[i];

                trace_mm_filemap_delete_from_page_cache(folio);
                filemap_unaccount_folio(mapping, folio);
        }
        page_cache_delete_batch(mapping, fbatch);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);

        for (i = 0; i < folio_batch_count(fbatch); i++)
                filemap_free_folio(mapping, fbatch->folios[i]);
}

int filemap_check_errors(struct address_space *mapping)
{
        int ret = 0;
        /* Check for outstanding write errors */
        if (test_bit(AS_ENOSPC, &mapping->flags) &&
            test_and_clear_bit(AS_ENOSPC, &mapping->flags))
                ret = -ENOSPC;
        if (test_bit(AS_EIO, &mapping->flags) &&
            test_and_clear_bit(AS_EIO, &mapping->flags))
                ret = -EIO;
        return ret;
}
EXPORT_SYMBOL(filemap_check_errors);

static int filemap_check_and_keep_errors(struct address_space *mapping)
{
        /* Check for outstanding write errors */
        if (test_bit(AS_EIO, &mapping->flags))
                return -EIO;
        if (test_bit(AS_ENOSPC, &mapping->flags))
                return -ENOSPC;
        return 0;
}

/**
 * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
 * @mapping:        address space structure to write
 * @wbc:        the writeback_control controlling the writeout
 *
 * Call writepages on the mapping using the provided wbc to control the
 * writeout.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int filemap_fdatawrite_wbc(struct address_space *mapping,
                           struct writeback_control *wbc)
{
        int ret;

        if (!mapping_can_writeback(mapping) ||
            !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;

        wbc_attach_fdatawrite_inode(wbc, mapping->host);
        ret = do_writepages(mapping, wbc);
        wbc_detach_inode(wbc);
        return ret;
}
EXPORT_SYMBOL(filemap_fdatawrite_wbc);

/**
 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
 * @mapping:        address space structure to write
 * @start:        offset in bytes where the range starts
 * @end:        offset in bytes where the range ends (inclusive)
 * @sync_mode:        enable synchronous operation
 *
 * Start writeback against all of a mapping's dirty pages that lie
 * within the byte offsets <start, end> inclusive.
 *
 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
 * opposed to a regular memory cleansing writeback.  The difference between
 * these two operations is that if a dirty page/buffer is encountered, it must
 * be waited upon, and not just skipped over.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                                loff_t end, int sync_mode)
{
        struct writeback_control wbc = {
                .sync_mode = sync_mode,
                .nr_to_write = LONG_MAX,
                .range_start = start,
                .range_end = end,
        };

        return filemap_fdatawrite_wbc(mapping, &wbc);
}

static inline int __filemap_fdatawrite(struct address_space *mapping,
        int sync_mode)
{
        return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
}

int filemap_fdatawrite(struct address_space *mapping)
{
        return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
}
EXPORT_SYMBOL(filemap_fdatawrite);

int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                                loff_t end)
{
        return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
}
EXPORT_SYMBOL(filemap_fdatawrite_range);

/**
 * filemap_flush - mostly a non-blocking flush
 * @mapping:        target address_space
 *
 * This is a mostly non-blocking flush.  Not suitable for data-integrity
 * purposes - I/O may not be started against all dirty pages.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int filemap_flush(struct address_space *mapping)
{
        return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
}
EXPORT_SYMBOL(filemap_flush);

/**
 * filemap_range_has_page - check if a page exists in range.
 * @mapping:           address space within which to check
 * @start_byte:        offset in bytes where the range starts
 * @end_byte:          offset in bytes where the range ends (inclusive)
 *
 * Find at least one page in the range supplied, usually used to check if
 * direct writing in this range will trigger a writeback.
 *
 * Return: %true if at least one page exists in the specified range,
 * %false otherwise.
 */
bool filemap_range_has_page(struct address_space *mapping,
                           loff_t start_byte, loff_t end_byte)
{
        struct folio *folio;
        XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
        pgoff_t max = end_byte >> PAGE_SHIFT;

        if (end_byte < start_byte)
                return false;

        rcu_read_lock();
        for (;;) {
                folio = xas_find(&xas, max);
                if (xas_retry(&xas, folio))
                        continue;
                /* Shadow entries don't count */
                if (xa_is_value(folio))
                        continue;
                /*
                 * We don't need to try to pin this page; we're about to
                 * release the RCU lock anyway.  It is enough to know that
                 * there was a page here recently.
                 */
                break;
        }
        rcu_read_unlock();

        return folio != NULL;
}
EXPORT_SYMBOL(filemap_range_has_page);

static void __filemap_fdatawait_range(struct address_space *mapping,
                                     loff_t start_byte, loff_t end_byte)
{
        pgoff_t index = start_byte >> PAGE_SHIFT;
        pgoff_t end = end_byte >> PAGE_SHIFT;
        struct folio_batch fbatch;
        unsigned nr_folios;

        folio_batch_init(&fbatch);

        while (index <= end) {
                unsigned i;

                nr_folios = filemap_get_folios_tag(mapping, &index, end,
                                PAGECACHE_TAG_WRITEBACK, &fbatch);

                if (!nr_folios)
                        break;

                for (i = 0; i < nr_folios; i++) {
                        struct folio *folio = fbatch.folios[i];

                        folio_wait_writeback(folio);
                        folio_clear_error(folio);
                }
                folio_batch_release(&fbatch);
                cond_resched();
        }
}

/**
 * filemap_fdatawait_range - wait for writeback to complete
 * @mapping:                address space structure to wait for
 * @start_byte:                offset in bytes where the range starts
 * @end_byte:                offset in bytes where the range ends (inclusive)
 *
 * Walk the list of under-writeback pages of the given address space
 * in the given range and wait for all of them.  Check error status of
 * the address space and return it.
 *
 * Since the error status of the address space is cleared by this function,
 * callers are responsible for checking the return value and handling and/or
 * reporting the error.
 *
 * Return: error status of the address space.
 */
int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
                            loff_t end_byte)
{
        __filemap_fdatawait_range(mapping, start_byte, end_byte);
        return filemap_check_errors(mapping);
}
EXPORT_SYMBOL(filemap_fdatawait_range);

/**
 * filemap_fdatawait_range_keep_errors - wait for writeback to complete
 * @mapping:                address space structure to wait for
 * @start_byte:                offset in bytes where the range starts
 * @end_byte:                offset in bytes where the range ends (inclusive)
 *
 * Walk the list of under-writeback pages of the given address space in the
 * given range and wait for all of them.  Unlike filemap_fdatawait_range(),
 * this function does not clear error status of the address space.
 *
 * Use this function if callers don't handle errors themselves.  Expected
 * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
 * fsfreeze(8)
 */
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
                loff_t start_byte, loff_t end_byte)
{
        __filemap_fdatawait_range(mapping, start_byte, end_byte);
        return filemap_check_and_keep_errors(mapping);
}
EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);

/**
 * file_fdatawait_range - wait for writeback to complete
 * @file:                file pointing to address space structure to wait for
 * @start_byte:                offset in bytes where the range starts
 * @end_byte:                offset in bytes where the range ends (inclusive)
 *
 * Walk the list of under-writeback pages of the address space that file
 * refers to, in the given range and wait for all of them.  Check error
 * status of the address space vs. the file->f_wb_err cursor and return it.
 *
 * Since the error status of the file is advanced by this function,
 * callers are responsible for checking the return value and handling and/or
 * reporting the error.
 *
 * Return: error status of the address space vs. the file->f_wb_err cursor.
 */
int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
{
        struct address_space *mapping = file->f_mapping;

        __filemap_fdatawait_range(mapping, start_byte, end_byte);
        return file_check_and_advance_wb_err(file);
}
EXPORT_SYMBOL(file_fdatawait_range);

/**
 * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
 * @mapping: address space structure to wait for
 *
 * Walk the list of under-writeback pages of the given address space
 * and wait for all of them.  Unlike filemap_fdatawait(), this function
 * does not clear error status of the address space.
 *
 * Use this function if callers don't handle errors themselves.  Expected
 * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
 * fsfreeze(8)
 *
 * Return: error status of the address space.
 */
int filemap_fdatawait_keep_errors(struct address_space *mapping)
{
        __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
        return filemap_check_and_keep_errors(mapping);
}
EXPORT_SYMBOL(filemap_fdatawait_keep_errors);

/* Returns true if writeback might be needed or already in progress. */
static bool mapping_needs_writeback(struct address_space *mapping)
{
        return mapping->nrpages;
}

bool filemap_range_has_writeback(struct address_space *mapping,
                                 loff_t start_byte, loff_t end_byte)
{
        XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
        pgoff_t max = end_byte >> PAGE_SHIFT;
        struct folio *folio;

        if (end_byte < start_byte)
                return false;

        rcu_read_lock();
        xas_for_each(&xas, folio, max) {
                if (xas_retry(&xas, folio))
                        continue;
                if (xa_is_value(folio))
                        continue;
                if (folio_test_dirty(folio) || folio_test_locked(folio) ||
                                folio_test_writeback(folio))
                        break;
        }
        rcu_read_unlock();
        return folio != NULL;
}
EXPORT_SYMBOL_GPL(filemap_range_has_writeback);

/**
 * filemap_write_and_wait_range - write out & wait on a file range
 * @mapping:        the address_space for the pages
 * @lstart:        offset in bytes where the range starts
 * @lend:        offset in bytes where the range ends (inclusive)
 *
 * Write out and wait upon file offsets lstart->lend, inclusive.
 *
 * Note that @lend is inclusive (describes the last byte to be written) so
 * that this function can be used to write to the very end-of-file (end = -1).
 *
 * Return: error status of the address space.
 */
int filemap_write_and_wait_range(struct address_space *mapping,
                                 loff_t lstart, loff_t lend)
{
        int err = 0, err2;

        if (lend < lstart)
                return 0;

        if (mapping_needs_writeback(mapping)) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
                /*
                 * Even if the above returned error, the pages may be
                 * written partially (e.g. -ENOSPC), so we wait for it.
                 * But the -EIO is special case, it may indicate the worst
                 * thing (e.g. bug) happened, so we avoid waiting for it.
                 */
                if (err != -EIO)
                        __filemap_fdatawait_range(mapping, lstart, lend);
        }
        err2 = filemap_check_errors(mapping);
        if (!err)
                err = err2;
        return err;
}
EXPORT_SYMBOL(filemap_write_and_wait_range);

void __filemap_set_wb_err(struct address_space *mapping, int err)
{
        errseq_t eseq = errseq_set(&mapping->wb_err, err);

        trace_filemap_set_wb_err(mapping, eseq);
}
EXPORT_SYMBOL(__filemap_set_wb_err);

/**
 * file_check_and_advance_wb_err - report wb error (if any) that was previously
 *                                    and advance wb_err to current one
 * @file: struct file on which the error is being reported
 *
 * When userland calls fsync (or something like nfsd does the equivalent), we
 * want to report any writeback errors that occurred since the last fsync (or
 * since the file was opened if there haven't been any).
 *
 * Grab the wb_err from the mapping. If it matches what we have in the file,
 * then just quickly return 0. The file is all caught up.
 *
 * If it doesn't match, then take the mapping value, set the "seen" flag in
 * it and try to swap it into place. If it works, or another task beat us
 * to it with the new value, then update the f_wb_err and return the error
 * portion. The error at this point must be reported via proper channels
 * (a'la fsync, or NFS COMMIT operation, etc.).
 *
 * While we handle mapping->wb_err with atomic operations, the f_wb_err
 * value is protected by the f_lock since we must ensure that it reflects
 * the latest value swapped in for this file descriptor.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int file_check_and_advance_wb_err(struct file *file)
{
        int err = 0;
        errseq_t old = READ_ONCE(file->f_wb_err);
        struct address_space *mapping = file->f_mapping;

        /* Locklessly handle the common case where nothing has changed */
        if (errseq_check(&mapping->wb_err, old)) {
                /* Something changed, must use slow path */
                spin_lock(&file->f_lock);
                old = file->f_wb_err;
                err = errseq_check_and_advance(&mapping->wb_err,
                                                &file->f_wb_err);
                trace_file_check_and_advance_wb_err(file, old);
                spin_unlock(&file->f_lock);
        }

        /*
         * We're mostly using this function as a drop in replacement for
         * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
         * that the legacy code would have had on these flags.
         */
        clear_bit(AS_EIO, &mapping->flags);
        clear_bit(AS_ENOSPC, &mapping->flags);
        return err;
}
EXPORT_SYMBOL(file_check_and_advance_wb_err);

/**
 * file_write_and_wait_range - write out & wait on a file range
 * @file:        file pointing to address_space with pages
 * @lstart:        offset in bytes where the range starts
 * @lend:        offset in bytes where the range ends (inclusive)
 *
 * Write out and wait upon file offsets lstart->lend, inclusive.
 *
 * Note that @lend is inclusive (describes the last byte to be written) so
 * that this function can be used to write to the very end-of-file (end = -1).
 *
 * After writing out and waiting on the data, we check and advance the
 * f_wb_err cursor to the latest value, and return any errors detected there.
 *
 * Return: %0 on success, negative error code otherwise.
 */
int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
{
        int err = 0, err2;
        struct address_space *mapping = file->f_mapping;

        if (lend < lstart)
                return 0;

        if (mapping_needs_writeback(mapping)) {
                err = __filemap_fdatawrite_range(mapping, lstart, lend,
                                                 WB_SYNC_ALL);
                /* See comment of filemap_write_and_wait() */
                if (err != -EIO)
                        __filemap_fdatawait_range(mapping, lstart, lend);
        }
        err2 = file_check_and_advance_wb_err(file);
        if (!err)
                err = err2;
        return err;
}
EXPORT_SYMBOL(file_write_and_wait_range);

/**
 * replace_page_cache_folio - replace a pagecache folio with a new one
 * @old:        folio to be replaced
 * @new:        folio to replace with
 *
 * This function replaces a folio in the pagecache with a new one.  On
 * success it acquires the pagecache reference for the new folio and
 * drops it for the old folio.  Both the old and new folios must be
 * locked.  This function does not add the new folio to the LRU, the
 * caller must do that.
 *
 * The remove + add is atomic.  This function cannot fail.
 */
void replace_page_cache_folio(struct folio *old, struct folio *new)
{
        struct address_space *mapping = old->mapping;
        void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;
        pgoff_t offset = old->index;
        XA_STATE(xas, &mapping->i_pages, offset);

        VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
        VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
        VM_BUG_ON_FOLIO(new->mapping, new);

        folio_get(new);
        new->mapping = mapping;
        new->index = offset;

        mem_cgroup_replace_folio(old, new);

        xas_lock_irq(&xas);
        xas_store(&xas, new);

        old->mapping = NULL;
        /* hugetlb pages do not participate in page cache accounting. */
        if (!folio_test_hugetlb(old))
                __lruvec_stat_sub_folio(old, NR_FILE_PAGES);
        if (!folio_test_hugetlb(new))
                __lruvec_stat_add_folio(new, NR_FILE_PAGES);
        if (folio_test_swapbacked(old))
                __lruvec_stat_sub_folio(old, NR_SHMEM);
        if (folio_test_swapbacked(new))
                __lruvec_stat_add_folio(new, NR_SHMEM);
        xas_unlock_irq(&xas);
        if (free_folio)
                free_folio(old);
        folio_put(old);
}
EXPORT_SYMBOL_GPL(replace_page_cache_folio);

noinline int __filemap_add_folio(struct address_space *mapping,
                struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
{
        XA_STATE(xas, &mapping->i_pages, index);
        bool huge = folio_test_hugetlb(folio);
        bool charged = false;
        long nr = 1;

        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
        mapping_set_update(&xas, mapping);

        if (!huge) {
                int error = mem_cgroup_charge(folio, NULL, gfp);
                if (error)
                        return error;
                charged = true;
        }

        VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
        xas_set_order(&xas, index, folio_order(folio));
        nr = folio_nr_pages(folio);

        gfp &= GFP_RECLAIM_MASK;
        folio_ref_add(folio, nr);
        folio->mapping = mapping;
        folio->index = xas.xa_index;

        do {
                unsigned int order = xa_get_order(xas.xa, xas.xa_index);
                void *entry, *old = NULL;

                if (order > folio_order(folio))
                        xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
                                        order, gfp);
                xas_lock_irq(&xas);
                xas_for_each_conflict(&xas, entry) {
                        old = entry;
                        if (!xa_is_value(entry)) {
                                xas_set_err(&xas, -EEXIST);
                                goto unlock;
                        }
                }

                if (old) {
                        if (shadowp)
                                *shadowp = old;
                        /* entry may have been split before we acquired lock */
                        order = xa_get_order(xas.xa, xas.xa_index);
                        if (order > folio_order(folio)) {
                                /* How to handle large swap entries? */
                                BUG_ON(shmem_mapping(mapping));
                                xas_split(&xas, old, order);
                                xas_reset(&xas);
                        }
                }

                xas_store(&xas, folio);
                if (xas_error(&xas))
                        goto unlock;

                mapping->nrpages += nr;

                /* hugetlb pages do not participate in page cache accounting */
                if (!huge) {
                        __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
                        if (folio_test_pmd_mappable(folio))
                                __lruvec_stat_mod_folio(folio,
                                                NR_FILE_THPS, nr);
                }
unlock:
                xas_unlock_irq(&xas);
        } while (xas_nomem(&xas, gfp));

        if (xas_error(&xas))
                goto error;

        trace_mm_filemap_add_to_page_cache(folio);
        return 0;
error:
        if (charged)
                mem_cgroup_uncharge(folio);
        folio->mapping = NULL;
        /* Leave page->index set: truncation relies upon it */
        folio_put_refs(folio, nr);
        return xas_error(&xas);
}
ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);

int filemap_add_folio(struct address_space *mapping, struct folio *folio,
                                pgoff_t index, gfp_t gfp)
{
        void *shadow = NULL;
        int ret;

        __folio_set_locked(folio);
        ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
        if (unlikely(ret))
                __folio_clear_locked(folio);
        else {
                /*
                 * The folio might have been evicted from cache only
                 * recently, in which case it should be activated like
                 * any other repeatedly accessed folio.
                 * The exception is folios getting rewritten; evicting other
                 * data from the working set, only to cache data that will
                 * get overwritten with something else, is a waste of memory.
                 */
                WARN_ON_ONCE(folio_test_active(folio));
                if (!(gfp & __GFP_WRITE) && shadow)
                        workingset_refault(folio, shadow);
                folio_add_lru(folio);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(filemap_add_folio);

#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
        int n;
        struct folio *folio;

        if (cpuset_do_page_mem_spread()) {
                unsigned int cpuset_mems_cookie;
                do {
                        cpuset_mems_cookie = read_mems_allowed_begin();
                        n = cpuset_mem_spread_node();
                        folio = __folio_alloc_node(gfp, order, n);
                } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));

                return folio;
        }
        return folio_alloc(gfp, order);
}
EXPORT_SYMBOL(filemap_alloc_folio);
#endif

/*
 * filemap_invalidate_lock_two - lock invalidate_lock for two mappings
 *
 * Lock exclusively invalidate_lock of any passed mapping that is not NULL.
 *
 * @mapping1: the first mapping to lock
 * @mapping2: the second mapping to lock
 */
void filemap_invalidate_lock_two(struct address_space *mapping1,
                                 struct address_space *mapping2)
{
        if (mapping1 > mapping2)
                swap(mapping1, mapping2);
        if (mapping1)
                down_write(&mapping1->invalidate_lock);
        if (mapping2 && mapping1 != mapping2)
                down_write_nested(&mapping2->invalidate_lock, 1);
}
EXPORT_SYMBOL(filemap_invalidate_lock_two);

/*
 * filemap_invalidate_unlock_two - unlock invalidate_lock for two mappings
 *
 * Unlock exclusive invalidate_lock of any passed mapping that is not NULL.
 *
 * @mapping1: the first mapping to unlock
 * @mapping2: the second mapping to unlock
 */
void filemap_invalidate_unlock_two(struct address_space *mapping1,
                                   struct address_space *mapping2)
{
        if (mapping1)
                up_write(&mapping1->invalidate_lock);
        if (mapping2 && mapping1 != mapping2)
                up_write(&mapping2->invalidate_lock);
}
EXPORT_SYMBOL(filemap_invalidate_unlock_two);

/*
 * In order to wait for pages to become available there must be
 * waitqueues associated with pages. By using a hash table of
 * waitqueues where the bucket discipline is to maintain all
 * waiters on the same queue and wake all when any of the pages
 * become available, and for the woken contexts to check to be
 * sure the appropriate page became available, this saves space
 * at a cost of "thundering herd" phenomena during rare hash
 * collisions.
 */
#define PAGE_WAIT_TABLE_BITS 8
#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;

static wait_queue_head_t *folio_waitqueue(struct folio *folio)
{
        return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
}

void __init pagecache_init(void)
{
        int i;

        for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
                init_waitqueue_head(&folio_wait_table[i]);

        page_writeback_init();
}

/*
 * The page wait code treats the "wait->flags" somewhat unusually, because
 * we have multiple different kinds of waits, not just the usual "exclusive"
 * one.
 *
 * We have:
 *
 *  (a) no special bits set:
 *
 *        We're just waiting for the bit to be released, and when a waker
 *        calls the wakeup function, we set WQ_FLAG_WOKEN and wake it up,
 *        and remove it from the wait queue.
 *
 *        Simple and straightforward.
 *
 *  (b) WQ_FLAG_EXCLUSIVE:
 *
 *        The waiter is waiting to get the lock, and only one waiter should
 *        be woken up to avoid any thundering herd behavior. We'll set the
 *        WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue.
 *
 *        This is the traditional exclusive wait.
 *
 *  (c) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM:
 *
 *        The waiter is waiting to get the bit, and additionally wants the
 *        lock to be transferred to it for fair lock behavior. If the lock
 *        cannot be taken, we stop walking the wait queue without waking
 *        the waiter.
 *
 *        This is the "fair lock handoff" case, and in addition to setting
 *        WQ_FLAG_WOKEN, we set WQ_FLAG_DONE to let the waiter easily see
 *        that it now has the lock.
 */
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{
        unsigned int flags;
        struct wait_page_key *key = arg;
        struct wait_page_queue *wait_page
                = container_of(wait, struct wait_page_queue, wait);

        if (!wake_page_match(wait_page, key))
                return 0;

        /*
         * If it's a lock handoff wait, we get the bit for it, and
         * stop walking (and do not wake it up) if we can't.
         */
        flags = wait->flags;
        if (flags & WQ_FLAG_EXCLUSIVE) {
                if (test_bit(key->bit_nr, &key->folio->flags))
                        return -1;
                if (flags & WQ_FLAG_CUSTOM) {
                        if (test_and_set_bit(key->bit_nr, &key->folio->flags))
                                return -1;
                        flags |= WQ_FLAG_DONE;
                }
        }

        /*
         * We are holding the wait-queue lock, but the waiter that
         * is waiting for this will be checking the flags without
         * any locking.
         *
         * So update the flags atomically, and wake up the waiter
         * afterwards to avoid any races. This store-release pairs
         * with the load-acquire in folio_wait_bit_common().
         */
        smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
        wake_up_state(wait->private, mode);

        /*
         * Ok, we have successfully done what we're waiting for,
         * and we can unconditionally remove the wait entry.
         *
         * Note that this pairs with the "finish_wait()" in the
         * waiter, and has to be the absolute last thing we do.
         * After this list_del_init(&wait->entry) the wait entry
         * might be de-allocated and the process might even have
         * exited.
         */
        list_del_init_careful(&wait->entry);
        return (flags & WQ_FLAG_EXCLUSIVE) != 0;
}

static void folio_wake_bit(struct folio *folio, int bit_nr)
{
        wait_queue_head_t *q = folio_waitqueue(folio);
        struct wait_page_key key;
        unsigned long flags;

        key.folio = folio;
        key.bit_nr = bit_nr;
        key.page_match = 0;

        spin_lock_irqsave(&q->lock, flags);
        __wake_up_locked_key(q, TASK_NORMAL, &key);

        /*
         * It's possible to miss clearing waiters here, when we woke our page
         * waiters, but the hashed waitqueue has waiters for other pages on it.
         * That's okay, it's a rare case. The next waker will clear it.
         *
         * Note that, depending on the page pool (buddy, hugetlb, ZONE_DEVICE,
         * other), the flag may be cleared in the course of freeing the page;
         * but that is not required for correctness.
         */
        if (!waitqueue_active(q) || !key.page_match)
                folio_clear_waiters(folio);

        spin_unlock_irqrestore(&q->lock, flags);
}

/*
 * A choice of three behaviors for folio_wait_bit_common():
 */
enum behavior {
        EXCLUSIVE,        /* Hold ref to page and take the bit when woken, like
                         * __folio_lock() waiting on then setting PG_locked.
                         */
        SHARED,                /* Hold ref to page and check the bit when woken, like
                         * folio_wait_writeback() waiting on PG_writeback.
                         */
        DROP,                /* Drop ref to page before wait, no check when woken,
                         * like folio_put_wait_locked() on PG_locked.
                         */
};

/*
 * Attempt to check (or get) the folio flag, and mark us done
 * if successful.
 */
static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
                                        struct wait_queue_entry *wait)
{
        if (wait->flags & WQ_FLAG_EXCLUSIVE) {
                if (test_and_set_bit(bit_nr, &folio->flags))
                        return false;
        } else if (test_bit(bit_nr, &folio->flags))
                return false;

        wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
        return true;
}

/* How many times do we accept lock stealing from under a waiter? */
int sysctl_page_lock_unfairness = 5;

static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
                int state, enum behavior behavior)
{
        wait_queue_head_t *q = folio_waitqueue(folio);
        int unfairness = sysctl_page_lock_unfairness;
        struct wait_page_queue wait_page;
        wait_queue_entry_t *wait = &wait_page.wait;
        bool thrashing = false;
        unsigned long pflags;
        bool in_thrashing;

        if (bit_nr == PG_locked &&
            !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
                delayacct_thrashing_start(&in_thrashing);
                psi_memstall_enter(&pflags);
                thrashing = true;
        }

        init_wait(wait);
        wait->func = wake_page_function;
        wait_page.folio = folio;
        wait_page.bit_nr = bit_nr;

repeat:
        wait->flags = 0;
        if (behavior == EXCLUSIVE) {
                wait->flags = WQ_FLAG_EXCLUSIVE;
                if (--unfairness < 0)
                        wait->flags |= WQ_FLAG_CUSTOM;
        }

        /*
         * Do one last check whether we can get the
         * page bit synchronously.
         *
         * Do the folio_set_waiters() marking before that
         * to let any waker we _just_ missed know they
         * need to wake us up (otherwise they'll never
         * even go to the slow case that looks at the
         * page queue), and add ourselves to the wait
         * queue if we need to sleep.
         *
         * This part needs to be done under the queue
         * lock to avoid races.
         */
        spin_lock_irq(&q->lock);
        folio_set_waiters(folio);
        if (!folio_trylock_flag(folio, bit_nr, wait))
                __add_wait_queue_entry_tail(q, wait);
        spin_unlock_irq(&q->lock);

        /*
         * From now on, all the logic will be based on
         * the WQ_FLAG_WOKEN and WQ_FLAG_DONE flag, to
         * see whether the page bit testing has already
         * been done by the wake function.
         *
         * We can drop our reference to the folio.
         */
        if (behavior == DROP)
                folio_put(folio);

        /*
         * Note that until the "finish_wait()", or until
         * we see the WQ_FLAG_WOKEN flag, we need to
         * be very careful with the 'wait->flags', because
         * we may race with a waker that sets them.
         */
        for (;;) {
                unsigned int flags;

                set_current_state(state);

                /* Loop until we've been woken or interrupted */
                flags = smp_load_acquire(&wait->flags);
                if (!(flags & WQ_FLAG_WOKEN)) {
                        if (signal_pending_state(state, current))
                                break;

                        io_schedule();
                        continue;
                }

                /* If we were non-exclusive, we're done */
                if (behavior != EXCLUSIVE)
                        break;

                /* If the waker got the lock for us, we're done */
                if (flags & WQ_FLAG_DONE)
                        break;

                /*
                 * Otherwise, if we're getting the lock, we need to
                 * try to get it ourselves.
                 *
                 * And if that fails, we'll have to retry this all.
                 */
                if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0))))
                        goto repeat;

                wait->flags |= WQ_FLAG_DONE;
                break;
        }

        /*
         * If a signal happened, this 'finish_wait()' may remove the last
         * waiter from the wait-queues, but the folio waiters bit will remain
         * set. That's ok. The next wakeup will take care of it, and trying
         * to do it here would be difficult and prone to races.
         */
        finish_wait(q, wait);

        if (thrashing) {
                delayacct_thrashing_end(&in_thrashing);
                psi_memstall_leave(&pflags);
        }

        /*
         * NOTE! The wait->flags weren't stable until we've done the
         * 'finish_wait()', and we could have exited the loop above due
         * to a signal, and had a wakeup event happen after the signal
         * test but before the 'finish_wait()'.
         *
         * So only after the finish_wait() can we reliably determine
         * if we got woken up or not, so we can now figure out the final
         * return value based on that state without races.
         *
         * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive
         * waiter, but an exclusive one requires WQ_FLAG_DONE.
         */
        if (behavior == EXCLUSIVE)
                return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;

        return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}

#ifdef CONFIG_MIGRATION
/**
 * migration_entry_wait_on_locked - Wait for a migration entry to be removed
 * @entry: migration swap entry.
 * @ptl: already locked ptl. This function will drop the lock.
 *
 * Wait for a migration entry referencing the given page to be removed. This is
 * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except
 * this can be called without taking a reference on the page. Instead this
 * should be called while holding the ptl for the migration entry referencing
 * the page.
 *
 * Returns after unlocking the ptl.
 *
 * This follows the same logic as folio_wait_bit_common() so see the comments
 * there.
 */
void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
        __releases(ptl)
{
        struct wait_page_queue wait_page;
        wait_queue_entry_t *wait = &wait_page.wait;
        bool thrashing = false;
        unsigned long pflags;
        bool in_thrashing;
        wait_queue_head_t *q;
        struct folio *folio = pfn_swap_entry_folio(entry);

        q = folio_waitqueue(folio);
        if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
                delayacct_thrashing_start(&in_thrashing);
                psi_memstall_enter(&pflags);
                thrashing = true;
        }

        init_wait(wait);
        wait->func = wake_page_function;
        wait_page.folio = folio;
        wait_page.bit_nr = PG_locked;
        wait->flags = 0;

        spin_lock_irq(&q->lock);
        folio_set_waiters(folio);
        if (!folio_trylock_flag(folio, PG_locked, wait))
                __add_wait_queue_entry_tail(q, wait);
        spin_unlock_irq(&q->lock);

        /*
         * If a migration entry exists for the page the migration path must hold
         * a valid reference to the page, and it must take the ptl to remove the
         * migration entry. So the page is valid until the ptl is dropped.
         */
        spin_unlock(ptl);

        for (;;) {
                unsigned int flags;

                set_current_state(TASK_UNINTERRUPTIBLE);

                /* Loop until we've been woken or interrupted */
                flags = smp_load_acquire(&wait->flags);
                if (!(flags & WQ_FLAG_WOKEN)) {
                        if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
                                break;

                        io_schedule();
                        continue;
                }
                break;
        }

        finish_wait(q, wait);

        if (thrashing) {
                delayacct_thrashing_end(&in_thrashing);
                psi_memstall_leave(&pflags);
        }
}
#endif

void folio_wait_bit(struct folio *folio, int bit_nr)
{
        folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
}
EXPORT_SYMBOL(folio_wait_bit);

int folio_wait_bit_killable(struct folio *folio, int bit_nr)
{
        return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED);
}
EXPORT_SYMBOL(folio_wait_bit_killable);

/**
 * folio_put_wait_locked - Drop a reference and wait for it to be unlocked
 * @folio: The folio to wait for.
 * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
 *
 * The caller should hold a reference on @folio.  They expect the page to
 * become unlocked relatively soon, but do not wish to hold up migration
 * (for example) by holding the reference while waiting for the folio to
 * come unlocked.  After this function returns, the caller should not
 * dereference @folio.
 *
 * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal.
 */
static int folio_put_wait_locked(struct folio *folio, int state)
{
        return folio_wait_bit_common(folio, PG_locked, state, DROP);
}

/**
 * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue
 * @folio: Folio defining the wait queue of interest
 * @waiter: Waiter to add to the queue
 *
 * Add an arbitrary @waiter to the wait queue for the nominated @folio.
 */
void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
{
        wait_queue_head_t *q = folio_waitqueue(folio);
        unsigned long flags;

        spin_lock_irqsave(&q->lock, flags);
        __add_wait_queue_entry_tail(q, waiter);
        folio_set_waiters(folio);
        spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL_GPL(folio_add_wait_queue);

/**
 * folio_unlock - Unlock a locked folio.
 * @folio: The folio.
 *
 * Unlocks the folio and wakes up any thread sleeping on the page lock.
 *
 * Context: May be called from interrupt or process context.  May not be
 * called from NMI context.
 */
void folio_unlock(struct folio *folio)
{
        /* Bit 7 allows x86 to check the byte's sign bit */
        BUILD_BUG_ON(PG_waiters != 7);
        BUILD_BUG_ON(PG_locked > 7);
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        if (folio_xor_flags_has_waiters(folio, 1 << PG_locked))
                folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_unlock);

/**
 * folio_end_read - End read on a folio.
 * @folio: The folio.
 * @success: True if all reads completed successfully.
 *
 * When all reads against a folio have completed, filesystems should
 * call this function to let the pagecache know that no more reads
 * are outstanding.  This will unlock the folio and wake up any thread
 * sleeping on the lock.  The folio will also be marked uptodate if all
 * reads succeeded.
 *
 * Context: May be called from interrupt or process context.  May not be
 * called from NMI context.
 */
void folio_end_read(struct folio *folio, bool success)
{
        unsigned long mask = 1 << PG_locked;

        /* Must be in bottom byte for x86 to work */
        BUILD_BUG_ON(PG_uptodate > 7);
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);

        if (likely(success))
                mask |= 1 << PG_uptodate;
        if (folio_xor_flags_has_waiters(folio, mask))
                folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_end_read);

/**
 * folio_end_private_2 - Clear PG_private_2 and wake any waiters.
 * @folio: The folio.
 *
 * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for
 * it.  The folio reference held for PG_private_2 being set is released.
 *
 * This is, for example, used when a netfs folio is being written to a local
 * disk cache, thereby allowing writes to the cache for the same folio to be
 * serialised.
 */
void folio_end_private_2(struct folio *folio)
{
        VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio);
        clear_bit_unlock(PG_private_2, folio_flags(folio, 0));
        folio_wake_bit(folio, PG_private_2);
        folio_put(folio);
}
EXPORT_SYMBOL(folio_end_private_2);

/**
 * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
 * @folio: The folio to wait on.
 *
 * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio.
 */
void folio_wait_private_2(struct folio *folio)
{
        while (folio_test_private_2(folio))
                folio_wait_bit(folio, PG_private_2);
}
EXPORT_SYMBOL(folio_wait_private_2);

/**
 * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
 * @folio: The folio to wait on.
 *
 * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a
 * fatal signal is received by the calling task.
 *
 * Return:
 * - 0 if successful.
 * - -EINTR if a fatal signal was encountered.
 */
int folio_wait_private_2_killable(struct folio *folio)
{
        int ret = 0;

        while (folio_test_private_2(folio)) {
                ret = folio_wait_bit_killable(folio, PG_private_2);
                if (ret < 0)
                        break;
        }

        return ret;
}
EXPORT_SYMBOL(folio_wait_private_2_killable);

/**
 * folio_end_writeback - End writeback against a folio.
 * @folio: The folio.
 *
 * The folio must actually be under writeback.
 *
 * Context: May be called from process or interrupt context.
 */
void folio_end_writeback(struct folio *folio)
{
        VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);

        /*
         * folio_test_clear_reclaim() could be used here but it is an
         * atomic operation and overkill in this particular case. Failing
         * to shuffle a folio marked for immediate reclaim is too mild
         * a gain to justify taking an atomic operation penalty at the
         * end of every folio writeback.
         */
        if (folio_test_reclaim(folio)) {
                folio_clear_reclaim(folio);
                folio_rotate_reclaimable(folio);
        }

        /*
         * Writeback does not hold a folio reference of its own, relying
         * on truncation to wait for the clearing of PG_writeback.
         * But here we must make sure that the folio is not freed and
         * reused before the folio_wake_bit().
         */
        folio_get(folio);
        if (__folio_end_writeback(folio))
                folio_wake_bit(folio, PG_writeback);
        acct_reclaim_writeback(folio);
        folio_put(folio);
}
EXPORT_SYMBOL(folio_end_writeback);

/**
 * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
 * @folio: The folio to lock
 */
void __folio_lock(struct folio *folio)
{
        folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE,
                                EXCLUSIVE);
}
EXPORT_SYMBOL(__folio_lock);

int __folio_lock_killable(struct folio *folio)
{
        return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE,
                                        EXCLUSIVE);
}
EXPORT_SYMBOL_GPL(__folio_lock_killable);

static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
{
        struct wait_queue_head *q = folio_waitqueue(folio);
        int ret;

        wait->folio = folio;
        wait->bit_nr = PG_locked;

        spin_lock_irq(&q->lock);
        __add_wait_queue_entry_tail(q, &wait->wait);
        folio_set_waiters(folio);
        ret = !folio_trylock(folio);
        /*
         * If we were successful now, we know we're still on the
         * waitqueue as we're still under the lock. This means it's
         * safe to remove and return success, we know the callback
         * isn't going to trigger.
         */
        if (!ret)
                __remove_wait_queue(q, &wait->wait);
        else
                ret = -EIOCBQUEUED;
        spin_unlock_irq(&q->lock);
        return ret;
}

/*
 * Return values:
 * 0 - folio is locked.
 * non-zero - folio is not locked.
 *     mmap_lock or per-VMA lock has been released (mmap_read_unlock() or
 *     vma_end_read()), unless flags had both FAULT_FLAG_ALLOW_RETRY and
 *     FAULT_FLAG_RETRY_NOWAIT set, in which case the lock is still held.
 *
 * If neither ALLOW_RETRY nor KILLABLE are set, will always return 0
 * with the folio locked and the mmap_lock/per-VMA lock is left unperturbed.
 */
vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf)
{
        unsigned int flags = vmf->flags;

        if (fault_flag_allow_retry_first(flags)) {
                /*
                 * CAUTION! In this case, mmap_lock/per-VMA lock is not
                 * released even though returning VM_FAULT_RETRY.
                 */
                if (flags & FAULT_FLAG_RETRY_NOWAIT)
                        return VM_FAULT_RETRY;

                release_fault_lock(vmf);
                if (flags & FAULT_FLAG_KILLABLE)
                        folio_wait_locked_killable(folio);
                else
                        folio_wait_locked(folio);
                return VM_FAULT_RETRY;
        }
        if (flags & FAULT_FLAG_KILLABLE) {
                bool ret;

                ret = __folio_lock_killable(folio);
                if (ret) {
                        release_fault_lock(vmf);
                        return VM_FAULT_RETRY;
                }
        } else {
                __folio_lock(folio);
        }

        return 0;
}

/**
 * page_cache_next_miss() - Find the next gap in the page cache.
 * @mapping: Mapping.
 * @index: Index.
 * @max_scan: Maximum range to search.
 *
 * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
 * gap with the lowest index.
 *
 * This function may be called under the rcu_read_lock.  However, this will
 * not atomically search a snapshot of the cache at a single point in time.
 * For example, if a gap is created at index 5, then subsequently a gap is
 * created at index 10, page_cache_next_miss covering both indices may
 * return 10 if called under the rcu_read_lock.
 *
 * Return: The index of the gap if found, otherwise an index outside the
 * range specified (in which case 'return - index >= max_scan' will be true).
 * In the rare case of index wrap-around, 0 will be returned.
 */
pgoff_t page_cache_next_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan)
{
        XA_STATE(xas, &mapping->i_pages, index);

        while (max_scan--) {
                void *entry = xas_next(&xas);
                if (!entry || xa_is_value(entry))
                        break;
                if (xas.xa_index == 0)
                        break;
        }

        return xas.xa_index;
}
EXPORT_SYMBOL(page_cache_next_miss);

/**
 * page_cache_prev_miss() - Find the previous gap in the page cache.
 * @mapping: Mapping.
 * @index: Index.
 * @max_scan: Maximum range to search.
 *
 * Search the range [max(index - max_scan + 1, 0), index] for the
 * gap with the highest index.
 *
 * This function may be called under the rcu_read_lock.  However, this will
 * not atomically search a snapshot of the cache at a single point in time.
 * For example, if a gap is created at index 10, then subsequently a gap is
 * created at index 5, page_cache_prev_miss() covering both indices may
 * return 5 if called under the rcu_read_lock.
 *
 * Return: The index of the gap if found, otherwise an index outside the
 * range specified (in which case 'index - return >= max_scan' will be true).
 * In the rare case of wrap-around, ULONG_MAX will be returned.
 */
pgoff_t page_cache_prev_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan)
{
        XA_STATE(xas, &mapping->i_pages, index);

        while (max_scan--) {
                void *entry = xas_prev(&xas);
                if (!entry || xa_is_value(entry))
                        break;
                if (xas.xa_index == ULONG_MAX)
                        break;
        }

        return xas.xa_index;
}
EXPORT_SYMBOL(page_cache_prev_miss);

/*
 * Lockless page cache protocol:
 * On the lookup side:
 * 1. Load the folio from i_pages
 * 2. Increment the refcount if it's not zero
 * 3. If the folio is not found by xas_reload(), put the refcount and retry
 *
 * On the removal side:
 * A. Freeze the page (by zeroing the refcount if nobody else has a reference)
 * B. Remove the page from i_pages
 * C. Return the page to the page allocator
 *
 * This means that any page may have its reference count temporarily
 * increased by a speculative page cache (or fast GUP) lookup as it can
 * be allocated by another user before the RCU grace period expires.
 * Because the refcount temporarily acquired here may end up being the
 * last refcount on the page, any page allocation must be freeable by
 * folio_put().
 */

/*
 * filemap_get_entry - Get a page cache entry.
 * @mapping: the address_space to search
 * @index: The page cache index.
 *
 * Looks up the page cache entry at @mapping & @index.  If it is a folio,
 * it is returned with an increased refcount.  If it is a shadow entry
 * of a previously evicted folio, or a swap entry from shmem/tmpfs,
 * it is returned without further action.
 *
 * Return: The folio, swap or shadow entry, %NULL if nothing is found.
 */
void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
{
        XA_STATE(xas, &mapping->i_pages, index);
        struct folio *folio;

        rcu_read_lock();
repeat:
        xas_reset(&xas);
        folio = xas_load(&xas);
        if (xas_retry(&xas, folio))
                goto repeat;
        /*
         * A shadow entry of a recently evicted page, or a swap entry from
         * shmem/tmpfs.  Return it without attempting to raise page count.
         */
        if (!folio || xa_is_value(folio))
                goto out;

        if (!folio_try_get_rcu(folio))
                goto repeat;

        if (unlikely(folio != xas_reload(&xas))) {
                folio_put(folio);
                goto repeat;
        }
out:
        rcu_read_unlock();

        return folio;
}

/**
 * __filemap_get_folio - Find and get a reference to a folio.
 * @mapping: The address_space to search.
 * @index: The page index.
 * @fgp_flags: %FGP flags modify how the folio is returned.
 * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
 *
 * Looks up the page cache entry at @mapping & @index.
 *
 * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
 * if the %GFP flags specified for %FGP_CREAT are atomic.
 *
 * If this function returns a folio, it is returned with an increased refcount.
 *
 * Return: The found folio or an ERR_PTR() otherwise.
 */
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
                fgf_t fgp_flags, gfp_t gfp)
{
        struct folio *folio;

repeat:
        folio = filemap_get_entry(mapping, index);
        if (xa_is_value(folio))
                folio = NULL;
        if (!folio)
                goto no_page;

        if (fgp_flags & FGP_LOCK) {
                if (fgp_flags & FGP_NOWAIT) {
                        if (!folio_trylock(folio)) {
                                folio_put(folio);
                                return ERR_PTR(-EAGAIN);
                        }
                } else {
                        folio_lock(folio);
                }

                /* Has the page been truncated? */
                if (unlikely(folio->mapping != mapping)) {
                        folio_unlock(folio);
                        folio_put(folio);
                        goto repeat;
                }
                VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
        }

        if (fgp_flags & FGP_ACCESSED)
                folio_mark_accessed(folio);
        else if (fgp_flags & FGP_WRITE) {
                /* Clear idle flag for buffer write */
                if (folio_test_idle(folio))
                        folio_clear_idle(folio);
        }

        if (fgp_flags & FGP_STABLE)
                folio_wait_stable(folio);
no_page:
        if (!folio && (fgp_flags & FGP_CREAT)) {
                unsigned order = FGF_GET_ORDER(fgp_flags);
                int err;

                if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
                        gfp |= __GFP_WRITE;
                if (fgp_flags & FGP_NOFS)
                        gfp &= ~__GFP_FS;
                if (fgp_flags & FGP_NOWAIT) {
                        gfp &= ~GFP_KERNEL;
                        gfp |= GFP_NOWAIT | __GFP_NOWARN;
                }
                if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
                        fgp_flags |= FGP_LOCK;

                if (!mapping_large_folio_support(mapping))
                        order = 0;
                if (order > MAX_PAGECACHE_ORDER)
                        order = MAX_PAGECACHE_ORDER;
                /* If we're not aligned, allocate a smaller folio */
                if (index & ((1UL << order) - 1))
                        order = __ffs(index);

                do {
                        gfp_t alloc_gfp = gfp;

                        err = -ENOMEM;
                        if (order > 0)
                                alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
                        folio = filemap_alloc_folio(alloc_gfp, order);
                        if (!folio)
                                continue;

                        /* Init accessed so avoid atomic mark_page_accessed later */
                        if (fgp_flags & FGP_ACCESSED)
                                __folio_set_referenced(folio);

                        err = filemap_add_folio(mapping, folio, index, gfp);
                        if (!err)
                                break;
                        folio_put(folio);
                        folio = NULL;
                } while (order-- > 0);

                if (err == -EEXIST)
                        goto repeat;
                if (err)
                        return ERR_PTR(err);
                /*
                 * filemap_add_folio locks the page, and for mmap
                 * we expect an unlocked page.
                 */
                if (folio && (fgp_flags & FGP_FOR_MMAP))
                        folio_unlock(folio);
        }

        if (!folio)
                return ERR_PTR(-ENOENT);
        return folio;
}
EXPORT_SYMBOL(__filemap_get_folio);

static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
                xa_mark_t mark)
{
        struct folio *folio;

retry:
        if (mark == XA_PRESENT)
                folio = xas_find(xas, max);
        else
                folio = xas_find_marked(xas, max, mark);

        if (xas_retry(xas, folio))
                goto retry;
        /*
         * A shadow entry of a recently evicted page, a swap
         * entry from shmem/tmpfs or a DAX entry.  Return it
         * without attempting to raise page count.
         */
        if (!folio || xa_is_value(folio))
                return folio;

        if (!folio_try_get_rcu(folio))
                goto reset;

        if (unlikely(folio != xas_reload(xas))) {
                folio_put(folio);
                goto reset;
        }

        return folio;
reset:
        xas_reset(xas);
        goto retry;
}

/**
 * find_get_entries - gang pagecache lookup
 * @mapping:        The address_space to search
 * @start:        The starting page cache index
 * @end:        The final page index (inclusive).
 * @fbatch:        Where the resulting entries are placed.
 * @indices:        The cache indices corresponding to the entries in @entries
 *
 * find_get_entries() will search for and return a batch of entries in
 * the mapping.  The entries are placed in @fbatch.  find_get_entries()
 * takes a reference on any actual folios it returns.
 *
 * The entries have ascending indexes.  The indices may not be consecutive
 * due to not-present entries or large folios.
 *
 * Any shadow entries of evicted folios, or swap entries from
 * shmem/tmpfs, are included in the returned array.
 *
 * Return: The number of entries which were found.
 */
unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
        XA_STATE(xas, &mapping->i_pages, *start);
        struct folio *folio;

        rcu_read_lock();
        while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
                indices[fbatch->nr] = xas.xa_index;
                if (!folio_batch_add(fbatch, folio))
                        break;
        }
        rcu_read_unlock();

        if (folio_batch_count(fbatch)) {
                unsigned long nr = 1;
                int idx = folio_batch_count(fbatch) - 1;

                folio = fbatch->folios[idx];
                if (!xa_is_value(folio))
                        nr = folio_nr_pages(folio);
                *start = indices[idx] + nr;
        }
        return folio_batch_count(fbatch);
}

/**
 * find_lock_entries - Find a batch of pagecache entries.
 * @mapping:        The address_space to search.
 * @start:        The starting page cache index.
 * @end:        The final page index (inclusive).
 * @fbatch:        Where the resulting entries are placed.
 * @indices:        The cache indices of the entries in @fbatch.
 *
 * find_lock_entries() will return a batch of entries from @mapping.
 * Swap, shadow and DAX entries are included.  Folios are returned
 * locked and with an incremented refcount.  Folios which are locked
 * by somebody else or under writeback are skipped.  Folios which are
 * partially outside the range are not returned.
 *
 * The entries have ascending indexes.  The indices may not be consecutive
 * due to not-present entries, large folios, folios which could not be
 * locked or folios under writeback.
 *
 * Return: The number of entries which were found.
 */
unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
        XA_STATE(xas, &mapping->i_pages, *start);
        struct folio *folio;

        rcu_read_lock();
        while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
                if (!xa_is_value(folio)) {
                        if (folio->index < *start)
                                goto put;
                        if (folio_next_index(folio) - 1 > end)
                                goto put;
                        if (!folio_trylock(folio))
                                goto put;
                        if (folio->mapping != mapping ||
                            folio_test_writeback(folio))
                                goto unlock;
                        VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
                                        folio);
                }
                indices[fbatch->nr] = xas.xa_index;
                if (!folio_batch_add(fbatch, folio))
                        break;
                continue;
unlock:
                folio_unlock(folio);
put:
                folio_put(folio);
        }
        rcu_read_unlock();

        if (folio_batch_count(fbatch)) {
                unsigned long nr = 1;
                int idx = folio_batch_count(fbatch) - 1;

                folio = fbatch->folios[idx];
                if (!xa_is_value(folio))
                        nr = folio_nr_pages(folio);
                *start = indices[idx] + nr;
        }
        return folio_batch_count(fbatch);
}

/**
 * filemap_get_folios - Get a batch of folios
 * @mapping:        The address_space to search
 * @start:        The starting page index
 * @end:        The final page index (inclusive)
 * @fbatch:        The batch to fill.
 *
 * Search for and return a batch of folios in the mapping starting at
 * index @start and up to index @end (inclusive).  The folios are returned
 * in @fbatch with an elevated reference count.
 *
 * Return: The number of folios which were found.
 * We also update @start to index the next folio for the traversal.
 */
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch)
{
        return filemap_get_folios_tag(mapping, start, end, XA_PRESENT, fbatch);
}
EXPORT_SYMBOL(filemap_get_folios);

/**
 * filemap_get_folios_contig - Get a batch of contiguous folios
 * @mapping:        The address_space to search
 * @start:        The starting page index
 * @end:        The final page index (inclusive)
 * @fbatch:        The batch to fill
 *
 * filemap_get_folios_contig() works exactly like filemap_get_folios(),
 * except the returned folios are guaranteed to be contiguous. This may
 * not return all contiguous folios if the batch gets filled up.
 *
 * Return: The number of folios found.
 * Also update @start to be positioned for traversal of the next folio.
 */

unsigned filemap_get_folios_contig(struct address_space *mapping,
                pgoff_t *start, pgoff_t end, struct folio_batch *fbatch)
{
        XA_STATE(xas, &mapping->i_pages, *start);
        unsigned long nr;
        struct folio *folio;

        rcu_read_lock();

        for (folio = xas_load(&xas); folio && xas.xa_index <= end;
                        folio = xas_next(&xas)) {
                if (xas_retry(&xas, folio))
                        continue;
                /*
                 * If the entry has been swapped out, we can stop looking.
                 * No current caller is looking for DAX entries.
                 */
                if (xa_is_value(folio))
                        goto update_start;

                if (!folio_try_get_rcu(folio))
                        goto retry;

                if (unlikely(folio != xas_reload(&xas)))
                        goto put_folio;

                if (!folio_batch_add(fbatch, folio)) {
                        nr = folio_nr_pages(folio);
                        *start = folio->index + nr;
                        goto out;
                }
                continue;
put_folio:
                folio_put(folio);

retry:
                xas_reset(&xas);
        }

update_start:
        nr = folio_batch_count(fbatch);

        if (nr) {
                folio = fbatch->folios[nr - 1];
                *start = folio_next_index(folio);
        }
out:
        rcu_read_unlock();
        return folio_batch_count(fbatch);
}
EXPORT_SYMBOL(filemap_get_folios_contig);

/**
 * filemap_get_folios_tag - Get a batch of folios matching @tag
 * @mapping:    The address_space to search
 * @start:      The starting page index
 * @end:        The final page index (inclusive)
 * @tag:        The tag index
 * @fbatch:     The batch to fill
 *
 * The first folio may start before @start; if it does, it will contain
 * @start.  The final folio may extend beyond @end; if it does, it will
 * contain @end.  The folios have ascending indices.  There may be gaps
 * between the folios if there are indices which have no folio in the
 * page cache.  If folios are added to or removed from the page cache
 * while this is running, they may or may not be found by this call.
 * Only returns folios that are tagged with @tag.
 *
 * Return: The number of folios found.
 * Also update @start to index the next folio for traversal.
 */
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
                        pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch)
{
        XA_STATE(xas, &mapping->i_pages, *start);
        struct folio *folio;

        rcu_read_lock();
        while ((folio = find_get_entry(&xas, end, tag)) != NULL) {
                /*
                 * Shadow entries should never be tagged, but this iteration
                 * is lockless so there is a window for page reclaim to evict
                 * a page we saw tagged. Skip over it.
                 */
                if (xa_is_value(folio))
                        continue;
                if (!folio_batch_add(fbatch, folio)) {
                        unsigned long nr = folio_nr_pages(folio);
                        *start = folio->index + nr;
                        goto out;
                }
        }
        /*
         * We come here when there is no page beyond @end. We take care to not
         * overflow the index @start as it confuses some of the callers. This
         * breaks the iteration when there is a page at index -1 but that is
         * already broke anyway.
         */
        if (end == (pgoff_t)-1)
                *start = (pgoff_t)-1;
        else
                *start = end + 1;
out:
        rcu_read_unlock();

        return folio_batch_count(fbatch);
}
EXPORT_SYMBOL(filemap_get_folios_tag);

/*
 * CD/DVDs are error prone. When a medium error occurs, the driver may fail
 * a _large_ part of the i/o request. Imagine the worst scenario:
 *
 *      ---R__________________________________________B__________
 *         ^ reading here                             ^ bad block(assume 4k)
 *
 * read(R) => miss => readahead(R...B) => media error => frustrating retries
 * => failing the whole request => read(R) => read(R+1) =>
 * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
 * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
 * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
 *
 * It is going insane. Fix it by quickly scaling down the readahead size.
 */
static void shrink_readahead_size_eio(struct file_ra_state *ra)
{
        ra->ra_pages /= 4;
}

/*
 * filemap_get_read_batch - Get a batch of folios for read
 *
 * Get a batch of folios which represent a contiguous range of bytes in
 * the file.  No exceptional entries will be returned.  If @index is in
 * the middle of a folio, the entire folio will be returned.  The last
 * folio in the batch may have the readahead flag set or the uptodate flag
 * clear so that the caller can take the appropriate action.
 */
static void filemap_get_read_batch(struct address_space *mapping,
                pgoff_t index, pgoff_t max, struct folio_batch *fbatch)
{
        XA_STATE(xas, &mapping->i_pages, index);
        struct folio *folio;

        rcu_read_lock();
        for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
                if (xas_retry(&xas, folio))
                        continue;
                if (xas.xa_index > max || xa_is_value(folio))
                        break;
                if (xa_is_sibling(folio))
                        break;
                if (!folio_try_get_rcu(folio))
                        goto retry;

                if (unlikely(folio != xas_reload(&xas)))
                        goto put_folio;

                if (!folio_batch_add(fbatch, folio))
                        break;
                if (!folio_test_uptodate(folio))
                        break;
                if (folio_test_readahead(folio))
                        break;
                xas_advance(&xas, folio_next_index(folio) - 1);
                continue;
put_folio:
                folio_put(folio);
retry:
                xas_reset(&xas);
        }
        rcu_read_unlock();
}

static int filemap_read_folio(struct file *file, filler_t filler,
                struct folio *folio)
{
        bool workingset = folio_test_workingset(folio);
        unsigned long pflags;
        int error;

        /*
         * A previous I/O error may have been due to temporary failures,
         * eg. multipath errors.  PG_error will be set again if read_folio
         * fails.
         */
        folio_clear_error(folio);

        /* Start the actual read. The read will unlock the page. */
        if (unlikely(workingset))
                psi_memstall_enter(&pflags);
        error = filler(file, folio);
        if (unlikely(workingset))
                psi_memstall_leave(&pflags);
        if (error)
                return error;

        error = folio_wait_locked_killable(folio);
        if (error)
                return error;
        if (folio_test_uptodate(folio))
                return 0;
        if (file)
                shrink_readahead_size_eio(&file->f_ra);
        return -EIO;
}

static bool filemap_range_uptodate(struct address_space *mapping,
                loff_t pos, size_t count, struct folio *folio,
                bool need_uptodate)
{
        if (folio_test_uptodate(folio))
                return true;
        /* pipes can't handle partially uptodate pages */
        if (need_uptodate)
                return false;
        if (!mapping->a_ops->is_partially_uptodate)
                return false;
        if (mapping->host->i_blkbits >= folio_shift(folio))
                return false;

        if (folio_pos(folio) > pos) {
                count -= folio_pos(folio) - pos;
                pos = 0;
        } else {
                pos -= folio_pos(folio);
        }

        return mapping->a_ops->is_partially_uptodate(folio, pos, count);
}

static int filemap_update_page(struct kiocb *iocb,
                struct address_space *mapping, size_t count,
                struct folio *folio, bool need_uptodate)
{
        int error;

        if (iocb->ki_flags & IOCB_NOWAIT) {
                if (!filemap_invalidate_trylock_shared(mapping))
                        return -EAGAIN;
        } else {
                filemap_invalidate_lock_shared(mapping);
        }

        if (!folio_trylock(folio)) {
                error = -EAGAIN;
                if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
                        goto unlock_mapping;
                if (!(iocb->ki_flags & IOCB_WAITQ)) {
                        filemap_invalidate_unlock_shared(mapping);
                        /*
                         * This is where we usually end up waiting for a
                         * previously submitted readahead to finish.
                         */
                        folio_put_wait_locked(folio, TASK_KILLABLE);
                        return AOP_TRUNCATED_PAGE;
                }
                error = __folio_lock_async(folio, iocb->ki_waitq);
                if (error)
                        goto unlock_mapping;
        }

        error = AOP_TRUNCATED_PAGE;
        if (!folio->mapping)
                goto unlock;

        error = 0;
        if (filemap_range_uptodate(mapping, iocb->ki_pos, count, folio,
                                   need_uptodate))
                goto unlock;

        error = -EAGAIN;
        if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
                goto unlock;

        error = filemap_read_folio(iocb->ki_filp, mapping->a_ops->read_folio,
                        folio);
        goto unlock_mapping;
unlock:
        folio_unlock(folio);
unlock_mapping:
        filemap_invalidate_unlock_shared(mapping);
        if (error == AOP_TRUNCATED_PAGE)
                folio_put(folio);
        return error;
}

static int filemap_create_folio(struct file *file,
                struct address_space *mapping, pgoff_t index,
                struct folio_batch *fbatch)
{
        struct folio *folio;
        int error;

        folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
        if (!folio)
                return -ENOMEM;

        /*
         * Protect against truncate / hole punch. Grabbing invalidate_lock
         * here assures we cannot instantiate and bring uptodate new
         * pagecache folios after evicting page cache during truncate
         * and before actually freeing blocks.        Note that we could
         * release invalidate_lock after inserting the folio into
         * the page cache as the locked folio would then be enough to
         * synchronize with hole punching. But there are code paths
         * such as filemap_update_page() filling in partially uptodate
         * pages or ->readahead() that need to hold invalidate_lock
         * while mapping blocks for IO so let's hold the lock here as
         * well to keep locking rules simple.
         */
        filemap_invalidate_lock_shared(mapping);
        error = filemap_add_folio(mapping, folio, index,
                        mapping_gfp_constraint(mapping, GFP_KERNEL));
        if (error == -EEXIST)
                error = AOP_TRUNCATED_PAGE;
        if (error)
                goto error;

        error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
        if (error)
                goto error;

        filemap_invalidate_unlock_shared(mapping);
        folio_batch_add(fbatch, folio);
        return 0;
error:
        filemap_invalidate_unlock_shared(mapping);
        folio_put(folio);
        return error;
}

static int filemap_readahead(struct kiocb *iocb, struct file *file,
                struct address_space *mapping, struct folio *folio,
                pgoff_t last_index)
{
        DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);

        if (iocb->ki_flags & IOCB_NOIO)
                return -EAGAIN;
        page_cache_async_ra(&ractl, folio, last_index - folio->index);
        return 0;
}

static int filemap_get_pages(struct kiocb *iocb, size_t count,
                struct folio_batch *fbatch, bool need_uptodate)
{
        struct file *filp = iocb->ki_filp;
        struct address_space *mapping = filp->f_mapping;
        struct file_ra_state *ra = &filp->f_ra;
        pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
        pgoff_t last_index;
        struct folio *folio;
        int err = 0;

        /* "last_index" is the index of the page beyond the end of the read */
        last_index = DIV_ROUND_UP(iocb->ki_pos + count, PAGE_SIZE);
retry:
        if (fatal_signal_pending(current))
                return -EINTR;

        filemap_get_read_batch(mapping, index, last_index - 1, fbatch);
        if (!folio_batch_count(fbatch)) {
                if (iocb->ki_flags & IOCB_NOIO)
                        return -EAGAIN;
                page_cache_sync_readahead(mapping, ra, filp, index,
                                last_index - index);
                filemap_get_read_batch(mapping, index, last_index - 1, fbatch);
        }
        if (!folio_batch_count(fbatch)) {
                if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
                        return -EAGAIN;
                err = filemap_create_folio(filp, mapping,
                                iocb->ki_pos >> PAGE_SHIFT, fbatch);
                if (err == AOP_TRUNCATED_PAGE)
                        goto retry;
                return err;
        }

        folio = fbatch->folios[folio_batch_count(fbatch) - 1];
        if (folio_test_readahead(folio)) {
                err = filemap_readahead(iocb, filp, mapping, folio, last_index);
                if (err)
                        goto err;
        }
        if (!folio_test_uptodate(folio)) {
                if ((iocb->ki_flags & IOCB_WAITQ) &&
                    folio_batch_count(fbatch) > 1)
                        iocb->ki_flags |= IOCB_NOWAIT;
                err = filemap_update_page(iocb, mapping, count, folio,
                                          need_uptodate);
                if (err)
                        goto err;
        }

        return 0;
err:
        if (err < 0)
                folio_put(folio);
        if (likely(--fbatch->nr))
                return 0;
        if (err == AOP_TRUNCATED_PAGE)
                goto retry;
        return err;
}

static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio)
{
        unsigned int shift = folio_shift(folio);

        return (pos1 >> shift == pos2 >> shift);
}

/**
 * filemap_read - Read data from the page cache.
 * @iocb: The iocb to read.
 * @iter: Destination for the data.
 * @already_read: Number of bytes already read by the caller.
 *
 * Copies data from the page cache.  If the data is not currently present,
 * uses the readahead and read_folio address_space operations to fetch it.
 *
 * Return: Total number of bytes copied, including those already read by
 * the caller.  If an error happens before any bytes are copied, returns
 * a negative error number.
 */
ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
                ssize_t already_read)
{
        struct file *filp = iocb->ki_filp;
        struct file_ra_state *ra = &filp->f_ra;
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
        struct folio_batch fbatch;
        int i, error = 0;
        bool writably_mapped;
        loff_t isize, end_offset;
        loff_t last_pos = ra->prev_pos;

        if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
                return 0;
        if (unlikely(!iov_iter_count(iter)))
                return 0;

        iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
        folio_batch_init(&fbatch);

        do {
                cond_resched();

                /*
                 * If we've already successfully copied some data, then we
                 * can no longer safely return -EIOCBQUEUED. Hence mark
                 * an async read NOWAIT at that point.
                 */
                if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
                        iocb->ki_flags |= IOCB_NOWAIT;

                if (unlikely(iocb->ki_pos >= i_size_read(inode)))
                        break;

                error = filemap_get_pages(iocb, iter->count, &fbatch, false);
                if (error < 0)
                        break;

                /*
                 * i_size must be checked after we know the pages are Uptodate.
                 *
                 * Checking i_size after the check allows us to calculate
                 * the correct value for "nr", which means the zero-filled
                 * part of the page is not copied back to userspace (unless
                 * another truncate extends the file - this is desired though).
                 */
                isize = i_size_read(inode);
                if (unlikely(iocb->ki_pos >= isize))
                        goto put_folios;
                end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);

                /*
                 * Once we start copying data, we don't want to be touching any
                 * cachelines that might be contended:
                 */
                writably_mapped = mapping_writably_mapped(mapping);

                /*
                 * When a read accesses the same folio several times, only
                 * mark it as accessed the first time.
                 */
                if (!pos_same_folio(iocb->ki_pos, last_pos - 1,
                                    fbatch.folios[0]))
                        folio_mark_accessed(fbatch.folios[0]);

                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];
                        size_t fsize = folio_size(folio);
                        size_t offset = iocb->ki_pos & (fsize - 1);
                        size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
                                             fsize - offset);
                        size_t copied;

                        if (end_offset < folio_pos(folio))
                                break;
                        if (i > 0)
                                folio_mark_accessed(folio);
                        /*
                         * If users can be writing to this folio using arbitrary
                         * virtual addresses, take care of potential aliasing
                         * before reading the folio on the kernel side.
                         */
                        if (writably_mapped)
                                flush_dcache_folio(folio);

                        copied = copy_folio_to_iter(folio, offset, bytes, iter);

                        already_read += copied;
                        iocb->ki_pos += copied;
                        last_pos = iocb->ki_pos;

                        if (copied < bytes) {
                                error = -EFAULT;
                                break;
                        }
                }
put_folios:
                for (i = 0; i < folio_batch_count(&fbatch); i++)
                        folio_put(fbatch.folios[i]);
                folio_batch_init(&fbatch);
        } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);

        file_accessed(filp);
        ra->prev_pos = last_pos;
        return already_read ? already_read : error;
}
EXPORT_SYMBOL_GPL(filemap_read);

int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
{
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        loff_t pos = iocb->ki_pos;
        loff_t end = pos + count - 1;

        if (iocb->ki_flags & IOCB_NOWAIT) {
                if (filemap_range_needs_writeback(mapping, pos, end))
                        return -EAGAIN;
                return 0;
        }

        return filemap_write_and_wait_range(mapping, pos, end);
}
EXPORT_SYMBOL_GPL(kiocb_write_and_wait);

int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
{
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        loff_t pos = iocb->ki_pos;
        loff_t end = pos + count - 1;
        int ret;

        if (iocb->ki_flags & IOCB_NOWAIT) {
                /* we could block if there are any pages in the range */
                if (filemap_range_has_page(mapping, pos, end))
                        return -EAGAIN;
        } else {
                ret = filemap_write_and_wait_range(mapping, pos, end);
                if (ret)
                        return ret;
        }

        /*
         * After a write we want buffered reads to be sure to go to disk to get
         * the new data.  We invalidate clean cached page from the region we're
         * about to write.  We do this *before* the write so that we can return
         * without clobbering -EIOCBQUEUED from ->direct_IO().
         */
        return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
                                             end >> PAGE_SHIFT);
}
EXPORT_SYMBOL_GPL(kiocb_invalidate_pages);

/**
 * generic_file_read_iter - generic filesystem read routine
 * @iocb:        kernel I/O control block
 * @iter:        destination for the data read
 *
 * This is the "read_iter()" routine for all filesystems
 * that can use the page cache directly.
 *
 * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
 * be returned when no data can be read without waiting for I/O requests
 * to complete; it doesn't prevent readahead.
 *
 * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
 * requests shall be made for the read or for readahead.  When no data
 * can be read, -EAGAIN shall be returned.  When readahead would be
 * triggered, a partial, possibly empty read shall be returned.
 *
 * Return:
 * * number of bytes copied, even for partial reads
 * * negative error code (or 0 if IOCB_NOIO) if nothing was read
 */
ssize_t
generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
        size_t count = iov_iter_count(iter);
        ssize_t retval = 0;

        if (!count)
                return 0; /* skip atime */

        if (iocb->ki_flags & IOCB_DIRECT) {
                struct file *file = iocb->ki_filp;
                struct address_space *mapping = file->f_mapping;
                struct inode *inode = mapping->host;

                retval = kiocb_write_and_wait(iocb, count);
                if (retval < 0)
                        return retval;
                file_accessed(file);

                retval = mapping->a_ops->direct_IO(iocb, iter);
                if (retval >= 0) {
                        iocb->ki_pos += retval;
                        count -= retval;
                }
                if (retval != -EIOCBQUEUED)
                        iov_iter_revert(iter, count - iov_iter_count(iter));

                /*
                 * Btrfs can have a short DIO read if we encounter
                 * compressed extents, so if there was an error, or if
                 * we've already read everything we wanted to, or if
                 * there was a short read because we hit EOF, go ahead
                 * and return.  Otherwise fallthrough to buffered io for
                 * the rest of the read.  Buffered reads will not work for
                 * DAX files, so don't bother trying.
                 */
                if (retval < 0 || !count || IS_DAX(inode))
                        return retval;
                if (iocb->ki_pos >= i_size_read(inode))
                        return retval;
        }

        return filemap_read(iocb, iter, retval);
}
EXPORT_SYMBOL(generic_file_read_iter);

/*
 * Splice subpages from a folio into a pipe.
 */
size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
                              struct folio *folio, loff_t fpos, size_t size)
{
        struct page *page;
        size_t spliced = 0, offset = offset_in_folio(folio, fpos);

        page = folio_page(folio, offset / PAGE_SIZE);
        size = min(size, folio_size(folio) - offset);
        offset %= PAGE_SIZE;

        while (spliced < size &&
               !pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
                struct pipe_buffer *buf = pipe_head_buf(pipe);
                size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);

                *buf = (struct pipe_buffer) {
                        .ops        = &page_cache_pipe_buf_ops,
                        .page        = page,
                        .offset        = offset,
                        .len        = part,
                };
                folio_get(folio);
                pipe->head++;
                page++;
                spliced += part;
                offset = 0;
        }

        return spliced;
}

/**
 * filemap_splice_read -  Splice data from a file's pagecache into a pipe
 * @in: The file to read from
 * @ppos: Pointer to the file position to read from
 * @pipe: The pipe to splice into
 * @len: The amount to splice
 * @flags: The SPLICE_F_* flags
 *
 * This function gets folios from a file's pagecache and splices them into the
 * pipe.  Readahead will be called as necessary to fill more folios.  This may
 * be used for blockdevs also.
 *
 * Return: On success, the number of bytes read will be returned and *@ppos
 * will be updated if appropriate; 0 will be returned if there is no more data
 * to be read; -EAGAIN will be returned if the pipe had no space, and some
 * other negative error code will be returned on error.  A short read may occur
 * if the pipe has insufficient space, we reach the end of the data or we hit a
 * hole.
 */
ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
                            struct pipe_inode_info *pipe,
                            size_t len, unsigned int flags)
{
        struct folio_batch fbatch;
        struct kiocb iocb;
        size_t total_spliced = 0, used, npages;
        loff_t isize, end_offset;
        bool writably_mapped;
        int i, error = 0;

        if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
                return 0;

        init_sync_kiocb(&iocb, in);
        iocb.ki_pos = *ppos;

        /* Work out how much data we can actually add into the pipe */
        used = pipe_occupancy(pipe->head, pipe->tail);
        npages = max_t(ssize_t, pipe->max_usage - used, 0);
        len = min_t(size_t, len, npages * PAGE_SIZE);

        folio_batch_init(&fbatch);

        do {
                cond_resched();

                if (*ppos >= i_size_read(in->f_mapping->host))
                        break;

                iocb.ki_pos = *ppos;
                error = filemap_get_pages(&iocb, len, &fbatch, true);
                if (error < 0)
                        break;

                /*
                 * i_size must be checked after we know the pages are Uptodate.
                 *
                 * Checking i_size after the check allows us to calculate
                 * the correct value for "nr", which means the zero-filled
                 * part of the page is not copied back to userspace (unless
                 * another truncate extends the file - this is desired though).
                 */
                isize = i_size_read(in->f_mapping->host);
                if (unlikely(*ppos >= isize))
                        break;
                end_offset = min_t(loff_t, isize, *ppos + len);

                /*
                 * Once we start copying data, we don't want to be touching any
                 * cachelines that might be contended:
                 */
                writably_mapped = mapping_writably_mapped(in->f_mapping);

                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];
                        size_t n;

                        if (folio_pos(folio) >= end_offset)
                                goto out;
                        folio_mark_accessed(folio);

                        /*
                         * If users can be writing to this folio using arbitrary
                         * virtual addresses, take care of potential aliasing
                         * before reading the folio on the kernel side.
                         */
                        if (writably_mapped)
                                flush_dcache_folio(folio);

                        n = min_t(loff_t, len, isize - *ppos);
                        n = splice_folio_into_pipe(pipe, folio, *ppos, n);
                        if (!n)
                                goto out;
                        len -= n;
                        total_spliced += n;
                        *ppos += n;
                        in->f_ra.prev_pos = *ppos;
                        if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
                                goto out;
                }

                folio_batch_release(&fbatch);
        } while (len);

out:
        folio_batch_release(&fbatch);
        file_accessed(in);

        return total_spliced ? total_spliced : error;
}
EXPORT_SYMBOL(filemap_splice_read);

static inline loff_t folio_seek_hole_data(struct xa_state *xas,
                struct address_space *mapping, struct folio *folio,
                loff_t start, loff_t end, bool seek_data)
{
        const struct address_space_operations *ops = mapping->a_ops;
        size_t offset, bsz = i_blocksize(mapping->host);

        if (xa_is_value(folio) || folio_test_uptodate(folio))
                return seek_data ? start : end;
        if (!ops->is_partially_uptodate)
                return seek_data ? end : start;

        xas_pause(xas);
        rcu_read_unlock();
        folio_lock(folio);
        if (unlikely(folio->mapping != mapping))
                goto unlock;

        offset = offset_in_folio(folio, start) & ~(bsz - 1);

        do {
                if (ops->is_partially_uptodate(folio, offset, bsz) ==
                                                        seek_data)
                        break;
                start = (start + bsz) & ~(bsz - 1);
                offset += bsz;
        } while (offset < folio_size(folio));
unlock:
        folio_unlock(folio);
        rcu_read_lock();
        return start;
}

static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio)
{
        if (xa_is_value(folio))
                return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
        return folio_size(folio);
}

/**
 * mapping_seek_hole_data - Seek for SEEK_DATA / SEEK_HOLE in the page cache.
 * @mapping: Address space to search.
 * @start: First byte to consider.
 * @end: Limit of search (exclusive).
 * @whence: Either SEEK_HOLE or SEEK_DATA.
 *
 * If the page cache knows which blocks contain holes and which blocks
 * contain data, your filesystem can use this function to implement
 * SEEK_HOLE and SEEK_DATA.  This is useful for filesystems which are
 * entirely memory-based such as tmpfs, and filesystems which support
 * unwritten extents.
 *
 * Return: The requested offset on success, or -ENXIO if @whence specifies
 * SEEK_DATA and there is no data after @start.  There is an implicit hole
 * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
 * and @end contain data.
 */
loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
                loff_t end, int whence)
{
        XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
        pgoff_t max = (end - 1) >> PAGE_SHIFT;
        bool seek_data = (whence == SEEK_DATA);
        struct folio *folio;

        if (end <= start)
                return -ENXIO;

        rcu_read_lock();
        while ((folio = find_get_entry(&xas, max, XA_PRESENT))) {
                loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
                size_t seek_size;

                if (start < pos) {
                        if (!seek_data)
                                goto unlock;
                        start = pos;
                }

                seek_size = seek_folio_size(&xas, folio);
                pos = round_up((u64)pos + 1, seek_size);
                start = folio_seek_hole_data(&xas, mapping, folio, start, pos,
                                seek_data);
                if (start < pos)
                        goto unlock;
                if (start >= end)
                        break;
                if (seek_size > PAGE_SIZE)
                        xas_set(&xas, pos >> PAGE_SHIFT);
                if (!xa_is_value(folio))
                        folio_put(folio);
        }
        if (seek_data)
                start = -ENXIO;
unlock:
        rcu_read_unlock();
        if (folio && !xa_is_value(folio))
                folio_put(folio);
        if (start > end)
                return end;
        return start;
}

#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS  (100)
/*
 * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
 * @vmf - the vm_fault for this fault.
 * @folio - the folio to lock.
 * @fpin - the pointer to the file we may pin (or is already pinned).
 *
 * This works similar to lock_folio_or_retry in that it can drop the
 * mmap_lock.  It differs in that it actually returns the folio locked
 * if it returns 1 and 0 if it couldn't lock the folio.  If we did have
 * to drop the mmap_lock then fpin will point to the pinned file and
 * needs to be fput()'ed at a later point.
 */
static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
                                     struct file **fpin)
{
        if (folio_trylock(folio))
                return 1;

        /*
         * NOTE! This will make us return with VM_FAULT_RETRY, but with
         * the fault lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
         * is supposed to work. We have way too many special cases..
         */
        if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
                return 0;

        *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
        if (vmf->flags & FAULT_FLAG_KILLABLE) {
                if (__folio_lock_killable(folio)) {
                        /*
                         * We didn't have the right flags to drop the
                         * fault lock, but all fault_handlers only check
                         * for fatal signals if we return VM_FAULT_RETRY,
                         * so we need to drop the fault lock here and
                         * return 0 if we don't have a fpin.
                         */
                        if (*fpin == NULL)
                                release_fault_lock(vmf);
                        return 0;
                }
        } else
                __folio_lock(folio);

        return 1;
}

/*
 * Synchronous readahead happens when we don't even find a page in the page
 * cache at all.  We don't want to perform IO under the mmap sem, so if we have
 * to drop the mmap sem we return the file that was pinned in order for us to do
 * that.  If we didn't pin a file then we return NULL.  The file that is
 * returned needs to be fput()'ed when we're done with it.
 */
static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
{
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
        DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
        struct file *fpin = NULL;
        unsigned long vm_flags = vmf->vma->vm_flags;
        unsigned int mmap_miss;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        /* Use the readahead code, even if readahead is disabled */
        if (vm_flags & VM_HUGEPAGE) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
                ra->size = HPAGE_PMD_NR;
                /*
                 * Fetch two PMD folios, so we get the chance to actually
                 * readahead, unless we've been told not to.
                 */
                if (!(vm_flags & VM_RAND_READ))
                        ra->size *= 2;
                ra->async_size = HPAGE_PMD_NR;
                page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
                return fpin;
        }
#endif

        /* If we don't want any read-ahead, don't bother */
        if (vm_flags & VM_RAND_READ)
                return fpin;
        if (!ra->ra_pages)
                return fpin;

        if (vm_flags & VM_SEQ_READ) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_sync_ra(&ractl, ra->ra_pages);
                return fpin;
        }

        /* Avoid banging the cache line if not needed */
        mmap_miss = READ_ONCE(ra->mmap_miss);
        if (mmap_miss < MMAP_LOTSAMISS * 10)
                WRITE_ONCE(ra->mmap_miss, ++mmap_miss);

        /*
         * Do we miss much more than hit in this file? If so,
         * stop bothering with read-ahead. It will only hurt.
         */
        if (mmap_miss > MMAP_LOTSAMISS)
                return fpin;

        /*
         * mmap read-around
         */
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
        ra->size = ra->ra_pages;
        ra->async_size = ra->ra_pages / 4;
        ractl._index = ra->start;
        page_cache_ra_order(&ractl, ra, 0);
        return fpin;
}

/*
 * Asynchronous readahead happens when we find the page and PG_readahead,
 * so we want to possibly extend the readahead further.  We return the file that
 * was pinned if we have to drop the mmap_lock in order to do IO.
 */
static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
                                            struct folio *folio)
{
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);
        struct file *fpin = NULL;
        unsigned int mmap_miss;

        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
                return fpin;

        mmap_miss = READ_ONCE(ra->mmap_miss);
        if (mmap_miss)
                WRITE_ONCE(ra->mmap_miss, --mmap_miss);

        if (folio_test_readahead(folio)) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_async_ra(&ractl, folio, ra->ra_pages);
        }
        return fpin;
}

static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        vm_fault_t ret = 0;
        pte_t *ptep;

        /*
         * We might have COW'ed a pagecache folio and might now have an mlocked
         * anon folio mapped. The original pagecache folio is not mlocked and
         * might have been evicted. During a read+clear/modify/write update of
         * the PTE, such as done in do_numa_page()/change_pte_range(), we
         * temporarily clear the PTE under PT lock and might detect it here as
         * "none" when not holding the PT lock.
         *
         * Not rechecking the PTE under PT lock could result in an unexpected
         * major fault in an mlock'ed region. Recheck only for this special
         * scenario while holding the PT lock, to not degrade non-mlocked
         * scenarios. Recheck the PTE without PT lock firstly, thereby reducing
         * the number of times we hold PT lock.
         */
        if (!(vma->vm_flags & VM_LOCKED))
                return 0;

        if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
                return 0;

        ptep = pte_offset_map(vmf->pmd, vmf->address);
        if (unlikely(!ptep))
                return VM_FAULT_NOPAGE;

        if (unlikely(!pte_none(ptep_get_lockless(ptep)))) {
                ret = VM_FAULT_NOPAGE;
        } else {
                spin_lock(vmf->ptl);
                if (unlikely(!pte_none(ptep_get(ptep))))
                        ret = VM_FAULT_NOPAGE;
                spin_unlock(vmf->ptl);
        }
        pte_unmap(ptep);
        return ret;
}

/**
 * filemap_fault - read in file data for page fault handling
 * @vmf:        struct vm_fault containing details of the fault
 *
 * filemap_fault() is invoked via the vma operations vector for a
 * mapped memory region to read in file data during a page fault.
 *
 * The goto's are kind of ugly, but this streamlines the normal case of having
 * it in the page cache, and handles the special cases reasonably without
 * having a lot of duplicated code.
 *
 * vma->vm_mm->mmap_lock must be held on entry.
 *
 * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
 * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap().
 *
 * If our return value does not have VM_FAULT_RETRY set, the mmap_lock
 * has not been released.
 *
 * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
 *
 * Return: bitwise-OR of %VM_FAULT_ codes.
 */
vm_fault_t filemap_fault(struct vm_fault *vmf)
{
        int error;
        struct file *file = vmf->vma->vm_file;
        struct file *fpin = NULL;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
        pgoff_t max_idx, index = vmf->pgoff;
        struct folio *folio;
        vm_fault_t ret = 0;
        bool mapping_locked = false;

        max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
        if (unlikely(index >= max_idx))
                return VM_FAULT_SIGBUS;

        /*
         * Do we have something in the page cache already?
         */
        folio = filemap_get_folio(mapping, index);
        if (likely(!IS_ERR(folio))) {
                /*
                 * We found the page, so try async readahead before waiting for
                 * the lock.
                 */
                if (!(vmf->flags & FAULT_FLAG_TRIED))
                        fpin = do_async_mmap_readahead(vmf, folio);
                if (unlikely(!folio_test_uptodate(folio))) {
                        filemap_invalidate_lock_shared(mapping);
                        mapping_locked = true;
                }
        } else {
                ret = filemap_fault_recheck_pte_none(vmf);
                if (unlikely(ret))
                        return ret;

                /* No page in the page cache at all */
                count_vm_event(PGMAJFAULT);
                count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
                fpin = do_sync_mmap_readahead(vmf);
retry_find:
                /*
                 * See comment in filemap_create_folio() why we need
                 * invalidate_lock
                 */
                if (!mapping_locked) {
                        filemap_invalidate_lock_shared(mapping);
                        mapping_locked = true;
                }
                folio = __filemap_get_folio(mapping, index,
                                          FGP_CREAT|FGP_FOR_MMAP,
                                          vmf->gfp_mask);
                if (IS_ERR(folio)) {
                        if (fpin)
                                goto out_retry;
                        filemap_invalidate_unlock_shared(mapping);
                        return VM_FAULT_OOM;
                }
        }

        if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
                goto out_retry;

        /* Did it get truncated? */
        if (unlikely(folio->mapping != mapping)) {
                folio_unlock(folio);
                folio_put(folio);
                goto retry_find;
        }
        VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);

        /*
         * We have a locked folio in the page cache, now we need to check
         * that it's up-to-date. If not, it is going to be due to an error,
         * or because readahead was otherwise unable to retrieve it.
         */
        if (unlikely(!folio_test_uptodate(folio))) {
                /*
                 * If the invalidate lock is not held, the folio was in cache
                 * and uptodate and now it is not. Strange but possible since we
                 * didn't hold the page lock all the time. Let's drop
                 * everything, get the invalidate lock and try again.
                 */
                if (!mapping_locked) {
                        folio_unlock(folio);
                        folio_put(folio);
                        goto retry_find;
                }

                /*
                 * OK, the folio is really not uptodate. This can be because the
                 * VMA has the VM_RAND_READ flag set, or because an error
                 * arose. Let's read it in directly.
                 */
                goto page_not_uptodate;
        }

        /*
         * We've made it this far and we had to drop our mmap_lock, now is the
         * time to return to the upper layer and have it re-find the vma and
         * redo the fault.
         */
        if (fpin) {
                folio_unlock(folio);
                goto out_retry;
        }
        if (mapping_locked)
                filemap_invalidate_unlock_shared(mapping);

        /*
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         */
        max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
        if (unlikely(index >= max_idx)) {
                folio_unlock(folio);
                folio_put(folio);
                return VM_FAULT_SIGBUS;
        }

        vmf->page = folio_file_page(folio, index);
        return ret | VM_FAULT_LOCKED;

page_not_uptodate:
        /*
         * Umm, take care of errors if the page isn't up-to-date.
         * Try to re-read it _once_. We do this synchronously,
         * because there really aren't any performance issues here
         * and we need to check for errors.
         */
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        error = filemap_read_folio(file, mapping->a_ops->read_folio, folio);
        if (fpin)
                goto out_retry;
        folio_put(folio);

        if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
        filemap_invalidate_unlock_shared(mapping);

        return VM_FAULT_SIGBUS;

out_retry:
        /*
         * We dropped the mmap_lock, we need to return to the fault handler to
         * re-find the vma and come back and find our hopefully still populated
         * page.
         */
        if (!IS_ERR(folio))
                folio_put(folio);
        if (mapping_locked)
                filemap_invalidate_unlock_shared(mapping);
        if (fpin)
                fput(fpin);
        return ret | VM_FAULT_RETRY;
}
EXPORT_SYMBOL(filemap_fault);

static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio,
                pgoff_t start)
{
        struct mm_struct *mm = vmf->vma->vm_mm;

        /* Huge page is mapped? No need to proceed. */
        if (pmd_trans_huge(*vmf->pmd)) {
                folio_unlock(folio);
                folio_put(folio);
                return true;
        }

        if (pmd_none(*vmf->pmd) && folio_test_pmd_mappable(folio)) {
                struct page *page = folio_file_page(folio, start);
                vm_fault_t ret = do_set_pmd(vmf, page);
                if (!ret) {
                        /* The page is mapped successfully, reference consumed. */
                        folio_unlock(folio);
                        return true;
                }
        }

        if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
                pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);

        return false;
}

static struct folio *next_uptodate_folio(struct xa_state *xas,
                struct address_space *mapping, pgoff_t end_pgoff)
{
        struct folio *folio = xas_next_entry(xas, end_pgoff);
        unsigned long max_idx;

        do {
                if (!folio)
                        return NULL;
                if (xas_retry(xas, folio))
                        continue;
                if (xa_is_value(folio))
                        continue;
                if (folio_test_locked(folio))
                        continue;
                if (!folio_try_get_rcu(folio))
                        continue;
                /* Has the page moved or been split? */
                if (unlikely(folio != xas_reload(xas)))
                        goto skip;
                if (!folio_test_uptodate(folio) || folio_test_readahead(folio))
                        goto skip;
                if (!folio_trylock(folio))
                        goto skip;
                if (folio->mapping != mapping)
                        goto unlock;
                if (!folio_test_uptodate(folio))
                        goto unlock;
                max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
                if (xas->xa_index >= max_idx)
                        goto unlock;
                return folio;
unlock:
                folio_unlock(folio);
skip:
                folio_put(folio);
        } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL);

        return NULL;
}

/*
 * Map page range [start_page, start_page + nr_pages) of folio.
 * start_page is gotten from start by folio_page(folio, start)
 */
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
                        struct folio *folio, unsigned long start,
                        unsigned long addr, unsigned int nr_pages,
                        unsigned int *mmap_miss)
{
        vm_fault_t ret = 0;
        struct page *page = folio_page(folio, start);
        unsigned int count = 0;
        pte_t *old_ptep = vmf->pte;

        do {
                if (PageHWPoison(page + count))
                        goto skip;

                (*mmap_miss)++;

                /*
                 * NOTE: If there're PTE markers, we'll leave them to be
                 * handled in the specific fault path, and it'll prohibit the
                 * fault-around logic.
                 */
                if (!pte_none(ptep_get(&vmf->pte[count])))
                        goto skip;

                count++;
                continue;
skip:
                if (count) {
                        set_pte_range(vmf, folio, page, count, addr);
                        folio_ref_add(folio, count);
                        if (in_range(vmf->address, addr, count * PAGE_SIZE))
                                ret = VM_FAULT_NOPAGE;
                }

                count++;
                page += count;
                vmf->pte += count;
                addr += count * PAGE_SIZE;
                count = 0;
        } while (--nr_pages > 0);

        if (count) {
                set_pte_range(vmf, folio, page, count, addr);
                folio_ref_add(folio, count);
                if (in_range(vmf->address, addr, count * PAGE_SIZE))
                        ret = VM_FAULT_NOPAGE;
        }

        vmf->pte = old_ptep;

        return ret;
}

static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
                struct folio *folio, unsigned long addr,
                unsigned int *mmap_miss)
{
        vm_fault_t ret = 0;
        struct page *page = &folio->page;

        if (PageHWPoison(page))
                return ret;

        (*mmap_miss)++;

        /*
         * NOTE: If there're PTE markers, we'll leave them to be
         * handled in the specific fault path, and it'll prohibit
         * the fault-around logic.
         */
        if (!pte_none(ptep_get(vmf->pte)))
                return ret;

        if (vmf->address == addr)
                ret = VM_FAULT_NOPAGE;

        set_pte_range(vmf, folio, page, 1, addr);
        folio_ref_inc(folio);

        return ret;
}

vm_fault_t filemap_map_pages(struct vm_fault *vmf,
                             pgoff_t start_pgoff, pgoff_t end_pgoff)
{
        struct vm_area_struct *vma = vmf->vma;
        struct file *file = vma->vm_file;
        struct address_space *mapping = file->f_mapping;
        pgoff_t last_pgoff = start_pgoff;
        unsigned long addr;
        XA_STATE(xas, &mapping->i_pages, start_pgoff);
        struct folio *folio;
        vm_fault_t ret = 0;
        unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;

        rcu_read_lock();
        folio = next_uptodate_folio(&xas, mapping, end_pgoff);
        if (!folio)
                goto out;

        if (filemap_map_pmd(vmf, folio, start_pgoff)) {
                ret = VM_FAULT_NOPAGE;
                goto out;
        }

        addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
        if (!vmf->pte) {
                folio_unlock(folio);
                folio_put(folio);
                goto out;
        }
        do {
                unsigned long end;

                addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
                vmf->pte += xas.xa_index - last_pgoff;
                last_pgoff = xas.xa_index;
                end = folio_next_index(folio) - 1;
                nr_pages = min(end, end_pgoff) - xas.xa_index + 1;

                if (!folio_test_large(folio))
                        ret |= filemap_map_order0_folio(vmf,
                                        folio, addr, &mmap_miss);
                else
                        ret |= filemap_map_folio_range(vmf, folio,
                                        xas.xa_index - folio->index, addr,
                                        nr_pages, &mmap_miss);

                folio_unlock(folio);
                folio_put(folio);
        } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
        rcu_read_unlock();

        mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss);
        if (mmap_miss >= mmap_miss_saved)
                WRITE_ONCE(file->f_ra.mmap_miss, 0);
        else
                WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss);

        return ret;
}
EXPORT_SYMBOL(filemap_map_pages);

vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
{
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        struct folio *folio = page_folio(vmf->page);
        vm_fault_t ret = VM_FAULT_LOCKED;

        sb_start_pagefault(mapping->host->i_sb);
        file_update_time(vmf->vma->vm_file);
        folio_lock(folio);
        if (folio->mapping != mapping) {
                folio_unlock(folio);
                ret = VM_FAULT_NOPAGE;
                goto out;
        }
        /*
         * We mark the folio dirty already here so that when freeze is in
         * progress, we are guaranteed that writeback during freezing will
         * see the dirty folio and writeprotect it again.
         */
        folio_mark_dirty(folio);
        folio_wait_stable(folio);
out:
        sb_end_pagefault(mapping->host->i_sb);
        return ret;
}

const struct vm_operations_struct generic_file_vm_ops = {
        .fault                = filemap_fault,
        .map_pages        = filemap_map_pages,
        .page_mkwrite        = filemap_page_mkwrite,
};

/* This is used for a general mmap of a disk file */

int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct address_space *mapping = file->f_mapping;

        if (!mapping->a_ops->read_folio)
                return -ENOEXEC;
        file_accessed(file);
        vma->vm_ops = &generic_file_vm_ops;
        return 0;
}

/*
 * This is for filesystems which do not implement ->writepage.
 */
int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
{
        if (vma_is_shared_maywrite(vma))
                return -EINVAL;
        return generic_file_mmap(file, vma);
}
#else
vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
{
        return VM_FAULT_SIGBUS;
}
int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
{
        return -ENOSYS;
}
int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
{
        return -ENOSYS;
}
#endif /* CONFIG_MMU */

EXPORT_SYMBOL(filemap_page_mkwrite);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_file_readonly_mmap);

static struct folio *do_read_cache_folio(struct address_space *mapping,
                pgoff_t index, filler_t filler, struct file *file, gfp_t gfp)
{
        struct folio *folio;
        int err;

        if (!filler)
                filler = mapping->a_ops->read_folio;
repeat:
        folio = filemap_get_folio(mapping, index);
        if (IS_ERR(folio)) {
                folio = filemap_alloc_folio(gfp, 0);
                if (!folio)
                        return ERR_PTR(-ENOMEM);
                err = filemap_add_folio(mapping, folio, index, gfp);
                if (unlikely(err)) {
                        folio_put(folio);
                        if (err == -EEXIST)
                                goto repeat;
                        /* Presumably ENOMEM for xarray node */
                        return ERR_PTR(err);
                }

                goto filler;
        }
        if (folio_test_uptodate(folio))
                goto out;

        if (!folio_trylock(folio)) {
                folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
                goto repeat;
        }

        /* Folio was truncated from mapping */
        if (!folio->mapping) {
                folio_unlock(folio);
                folio_put(folio);
                goto repeat;
        }

        /* Someone else locked and filled the page in a very small window */
        if (folio_test_uptodate(folio)) {
                folio_unlock(folio);
                goto out;
        }

filler:
        err = filemap_read_folio(file, filler, folio);
        if (err) {
                folio_put(folio);
                if (err == AOP_TRUNCATED_PAGE)
                        goto repeat;
                return ERR_PTR(err);
        }

out:
        folio_mark_accessed(folio);
        return folio;
}

/**
 * read_cache_folio - Read into page cache, fill it if needed.
 * @mapping: The address_space to read from.
 * @index: The index to read.
 * @filler: Function to perform the read, or NULL to use aops->read_folio().
 * @file: Passed to filler function, may be NULL if not required.
 *
 * Read one page into the page cache.  If it succeeds, the folio returned
 * will contain @index, but it may not be the first page of the folio.
 *
 * If the filler function returns an error, it will be returned to the
 * caller.
 *
 * Context: May sleep.  Expects mapping->invalidate_lock to be held.
 * Return: An uptodate folio on success, ERR_PTR() on failure.
 */
struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index,
                filler_t filler, struct file *file)
{
        return do_read_cache_folio(mapping, index, filler, file,
                        mapping_gfp_mask(mapping));
}
EXPORT_SYMBOL(read_cache_folio);

/**
 * mapping_read_folio_gfp - Read into page cache, using specified allocation flags.
 * @mapping:        The address_space for the folio.
 * @index:        The index that the allocated folio will contain.
 * @gfp:        The page allocator flags to use if allocating.
 *
 * This is the same as "read_cache_folio(mapping, index, NULL, NULL)", but with
 * any new memory allocations done using the specified allocation flags.
 *
 * The most likely error from this function is EIO, but ENOMEM is
 * possible and so is EINTR.  If ->read_folio returns another error,
 * that will be returned to the caller.
 *
 * The function expects mapping->invalidate_lock to be already held.
 *
 * Return: Uptodate folio on success, ERR_PTR() on failure.
 */
struct folio *mapping_read_folio_gfp(struct address_space *mapping,
                pgoff_t index, gfp_t gfp)
{
        return do_read_cache_folio(mapping, index, NULL, NULL, gfp);
}
EXPORT_SYMBOL(mapping_read_folio_gfp);

static struct page *do_read_cache_page(struct address_space *mapping,
                pgoff_t index, filler_t *filler, struct file *file, gfp_t gfp)
{
        struct folio *folio;

        folio = do_read_cache_folio(mapping, index, filler, file, gfp);
        if (IS_ERR(folio))
                return &folio->page;
        return folio_file_page(folio, index);
}

struct page *read_cache_page(struct address_space *mapping,
                        pgoff_t index, filler_t *filler, struct file *file)
{
        return do_read_cache_page(mapping, index, filler, file,
                        mapping_gfp_mask(mapping));
}
EXPORT_SYMBOL(read_cache_page);

/**
 * read_cache_page_gfp - read into page cache, using specified page allocation flags.
 * @mapping:        the page's address_space
 * @index:        the page index
 * @gfp:        the page allocator flags to use if allocating
 *
 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
 * any new page allocations done using the specified allocation flags.
 *
 * If the page does not get brought uptodate, return -EIO.
 *
 * The function expects mapping->invalidate_lock to be already held.
 *
 * Return: up to date page on success, ERR_PTR() on failure.
 */
struct page *read_cache_page_gfp(struct address_space *mapping,
                                pgoff_t index,
                                gfp_t gfp)
{
        return do_read_cache_page(mapping, index, NULL, NULL, gfp);
}
EXPORT_SYMBOL(read_cache_page_gfp);

/*
 * Warn about a page cache invalidation failure during a direct I/O write.
 */
static void dio_warn_stale_pagecache(struct file *filp)
{
        static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
        char pathname[128];
        char *path;

        errseq_set(&filp->f_mapping->wb_err, -EIO);
        if (__ratelimit(&_rs)) {
                path = file_path(filp, pathname, sizeof(pathname));
                if (IS_ERR(path))
                        path = "(unknown)";
                pr_crit("Page cache invalidation failure on direct I/O.  Possible data corruption due to collision with buffered I/O!\n");
                pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
                        current->comm);
        }
}

void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count)
{
        struct address_space *mapping = iocb->ki_filp->f_mapping;

        if (mapping->nrpages &&
            invalidate_inode_pages2_range(mapping,
                        iocb->ki_pos >> PAGE_SHIFT,
                        (iocb->ki_pos + count - 1) >> PAGE_SHIFT))
                dio_warn_stale_pagecache(iocb->ki_filp);
}

ssize_t
generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        size_t write_len = iov_iter_count(from);
        ssize_t written;

        /*
         * If a page can not be invalidated, return 0 to fall back
         * to buffered write.
         */
        written = kiocb_invalidate_pages(iocb, write_len);
        if (written) {
                if (written == -EBUSY)
                        return 0;
                return written;
        }

        written = mapping->a_ops->direct_IO(iocb, from);

        /*
         * Finally, try again to invalidate clean pages which might have been
         * cached by non-direct readahead, or faulted in by get_user_pages()
         * if the source of the write was an mmap'ed region of the file
         * we're writing.  Either one is a pretty crazy thing to do,
         * so we don't support it 100%.  If this invalidation
         * fails, tough, the write still worked...
         *
         * Most of the time we do not need this since dio_complete() will do
         * the invalidation for us. However there are some file systems that
         * do not end up with dio_complete() being called, so let's not break
         * them by removing it completely.
         *
         * Noticeable example is a blkdev_direct_IO().
         *
         * Skip invalidation for async writes or if mapping has no pages.
         */
        if (written > 0) {
                struct inode *inode = mapping->host;
                loff_t pos = iocb->ki_pos;

                kiocb_invalidate_post_direct_write(iocb, written);
                pos += written;
                write_len -= written;
                if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
                        i_size_write(inode, pos);
                        mark_inode_dirty(inode);
                }
                iocb->ki_pos = pos;
        }
        if (written != -EIOCBQUEUED)
                iov_iter_revert(from, write_len - iov_iter_count(from));
        return written;
}
EXPORT_SYMBOL(generic_file_direct_write);

ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
{
        struct file *file = iocb->ki_filp;
        loff_t pos = iocb->ki_pos;
        struct address_space *mapping = file->f_mapping;
        const struct address_space_operations *a_ops = mapping->a_ops;
        long status = 0;
        ssize_t written = 0;

        do {
                struct page *page;
                unsigned long offset;        /* Offset into pagecache page */
                unsigned long bytes;        /* Bytes to write to page */
                size_t copied;                /* Bytes copied from user */
                void *fsdata = NULL;

                offset = (pos & (PAGE_SIZE - 1));
                bytes = min_t(unsigned long, PAGE_SIZE - offset,
                                                iov_iter_count(i));

again:
                /*
                 * Bring in the user page that we will copy from _first_.
                 * Otherwise there's a nasty deadlock on copying from the
                 * same page as we're writing to, without it being marked
                 * up-to-date.
                 */
                if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
                        status = -EFAULT;
                        break;
                }

                if (fatal_signal_pending(current)) {
                        status = -EINTR;
                        break;
                }

                status = a_ops->write_begin(file, mapping, pos, bytes,
                                                &page, &fsdata);
                if (unlikely(status < 0))
                        break;

                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);

                copied = copy_page_from_iter_atomic(page, offset, bytes, i);
                flush_dcache_page(page);

                status = a_ops->write_end(file, mapping, pos, bytes, copied,
                                                page, fsdata);
                if (unlikely(status != copied)) {
                        iov_iter_revert(i, copied - max(status, 0L));
                        if (unlikely(status < 0))
                                break;
                }
                cond_resched();

                if (unlikely(status == 0)) {
                        /*
                         * A short copy made ->write_end() reject the
                         * thing entirely.  Might be memory poisoning
                         * halfway through, might be a race with munmap,
                         * might be severe memory pressure.
                         */
                        if (copied)
                                bytes = copied;
                        goto again;
                }
                pos += status;
                written += status;

                balance_dirty_pages_ratelimited(mapping);
        } while (iov_iter_count(i));

        if (!written)
                return status;
        iocb->ki_pos += written;
        return written;
}
EXPORT_SYMBOL(generic_perform_write);

/**
 * __generic_file_write_iter - write data to a file
 * @iocb:        IO state structure (file, offset, etc.)
 * @from:        iov_iter with data to write
 *
 * This function does all the work needed for actually writing data to a
 * file. It does all basic checks, removes SUID from the file, updates
 * modification times and calls proper subroutines depending on whether we
 * do direct IO or a standard buffered write.
 *
 * It expects i_rwsem to be grabbed unless we work on a block device or similar
 * object which does not need locking at all.
 *
 * This function does *not* take care of syncing data in case of O_SYNC write.
 * A caller has to handle it. This is mainly due to the fact that we want to
 * avoid syncing under i_rwsem.
 *
 * Return:
 * * number of bytes written, even for truncated writes
 * * negative error code if no data has been written at all
 */
ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
        ssize_t ret;

        ret = file_remove_privs(file);
        if (ret)
                return ret;

        ret = file_update_time(file);
        if (ret)
                return ret;

        if (iocb->ki_flags & IOCB_DIRECT) {
                ret = generic_file_direct_write(iocb, from);
                /*
                 * If the write stopped short of completing, fall back to
                 * buffered writes.  Some filesystems do this for writes to
                 * holes, for example.  For DAX files, a buffered write will
                 * not succeed (even if it did, DAX does not handle dirty
                 * page-cache pages correctly).
                 */
                if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
                        return ret;
                return direct_write_fallback(iocb, from, ret,
                                generic_perform_write(iocb, from));
        }

        return generic_perform_write(iocb, from);
}
EXPORT_SYMBOL(__generic_file_write_iter);

/**
 * generic_file_write_iter - write data to a file
 * @iocb:        IO state structure
 * @from:        iov_iter with data to write
 *
 * This is a wrapper around __generic_file_write_iter() to be used by most
 * filesystems. It takes care of syncing the file in case of O_SYNC file
 * and acquires i_rwsem as needed.
 * Return:
 * * negative error code if no data has been written at all of
 *   vfs_fsync_range() failed for a synchronous write
 * * number of bytes written, even for truncated writes
 */
ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;

        inode_lock(inode);
        ret = generic_write_checks(iocb, from);
        if (ret > 0)
                ret = __generic_file_write_iter(iocb, from);
        inode_unlock(inode);

        if (ret > 0)
                ret = generic_write_sync(iocb, ret);
        return ret;
}
EXPORT_SYMBOL(generic_file_write_iter);

/**
 * filemap_release_folio() - Release fs-specific metadata on a folio.
 * @folio: The folio which the kernel is trying to free.
 * @gfp: Memory allocation flags (and I/O mode).
 *
 * The address_space is trying to release any data attached to a folio
 * (presumably at folio->private).
 *
 * This will also be called if the private_2 flag is set on a page,
 * indicating that the folio has other metadata associated with it.
 *
 * The @gfp argument specifies whether I/O may be performed to release
 * this page (__GFP_IO), and whether the call may block
 * (__GFP_RECLAIM & __GFP_FS).
 *
 * Return: %true if the release was successful, otherwise %false.
 */
bool filemap_release_folio(struct folio *folio, gfp_t gfp)
{
        struct address_space * const mapping = folio->mapping;

        BUG_ON(!folio_test_locked(folio));
        if (!folio_needs_release(folio))
                return true;
        if (folio_test_writeback(folio))
                return false;

        if (mapping && mapping->a_ops->release_folio)
                return mapping->a_ops->release_folio(folio, gfp);
        return try_to_free_buffers(folio);
}
EXPORT_SYMBOL(filemap_release_folio);

#ifdef CONFIG_CACHESTAT_SYSCALL
/**
 * filemap_cachestat() - compute the page cache statistics of a mapping
 * @mapping:        The mapping to compute the statistics for.
 * @first_index:        The starting page cache index.
 * @last_index:        The final page index (inclusive).
 * @cs:        the cachestat struct to write the result to.
 *
 * This will query the page cache statistics of a mapping in the
 * page range of [first_index, last_index] (inclusive). The statistics
 * queried include: number of dirty pages, number of pages marked for
 * writeback, and the number of (recently) evicted pages.
 */
static void filemap_cachestat(struct address_space *mapping,
                pgoff_t first_index, pgoff_t last_index, struct cachestat *cs)
{
        XA_STATE(xas, &mapping->i_pages, first_index);
        struct folio *folio;

        rcu_read_lock();
        xas_for_each(&xas, folio, last_index) {
                int order;
                unsigned long nr_pages;
                pgoff_t folio_first_index, folio_last_index;

                /*
                 * Don't deref the folio. It is not pinned, and might
                 * get freed (and reused) underneath us.
                 *
                 * We *could* pin it, but that would be expensive for
                 * what should be a fast and lightweight syscall.
                 *
                 * Instead, derive all information of interest from
                 * the rcu-protected xarray.
                 */

                if (xas_retry(&xas, folio))
                        continue;

                order = xa_get_order(xas.xa, xas.xa_index);
                nr_pages = 1 << order;
                folio_first_index = round_down(xas.xa_index, 1 << order);
                folio_last_index = folio_first_index + nr_pages - 1;

                /* Folios might straddle the range boundaries, only count covered pages */
                if (folio_first_index < first_index)
                        nr_pages -= first_index - folio_first_index;

                if (folio_last_index > last_index)
                        nr_pages -= folio_last_index - last_index;

                if (xa_is_value(folio)) {
                        /* page is evicted */
                        void *shadow = (void *)folio;
                        bool workingset; /* not used */

                        cs->nr_evicted += nr_pages;

#ifdef CONFIG_SWAP /* implies CONFIG_MMU */
                        if (shmem_mapping(mapping)) {
                                /* shmem file - in swap cache */
                                swp_entry_t swp = radix_to_swp_entry(folio);

                                /* swapin error results in poisoned entry */
                                if (non_swap_entry(swp))
                                        goto resched;

                                /*
                                 * Getting a swap entry from the shmem
                                 * inode means we beat
                                 * shmem_unuse(). rcu_read_lock()
                                 * ensures swapoff waits for us before
                                 * freeing the swapper space. However,
                                 * we can race with swapping and
                                 * invalidation, so there might not be
                                 * a shadow in the swapcache (yet).
                                 */
                                shadow = get_shadow_from_swap_cache(swp);
                                if (!shadow)
                                        goto resched;
                        }
#endif
                        if (workingset_test_recent(shadow, true, &workingset))
                                cs->nr_recently_evicted += nr_pages;

                        goto resched;
                }

                /* page is in cache */
                cs->nr_cache += nr_pages;

                if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
                        cs->nr_dirty += nr_pages;

                if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
                        cs->nr_writeback += nr_pages;

resched:
                if (need_resched()) {
                        xas_pause(&xas);
                        cond_resched_rcu();
                }
        }
        rcu_read_unlock();
}

/*
 * The cachestat(2) system call.
 *
 * cachestat() returns the page cache statistics of a file in the
 * bytes range specified by `off` and `len`: number of cached pages,
 * number of dirty pages, number of pages marked for writeback,
 * number of evicted pages, and number of recently evicted pages.
 *
 * An evicted page is a page that is previously in the page cache
 * but has been evicted since. A page is recently evicted if its last
 * eviction was recent enough that its reentry to the cache would
 * indicate that it is actively being used by the system, and that
 * there is memory pressure on the system.
 *
 * `off` and `len` must be non-negative integers. If `len` > 0,
 * the queried range is [`off`, `off` + `len`]. If `len` == 0,
 * we will query in the range from `off` to the end of the file.
 *
 * The `flags` argument is unused for now, but is included for future
 * extensibility. User should pass 0 (i.e no flag specified).
 *
 * Currently, hugetlbfs is not supported.
 *
 * Because the status of a page can change after cachestat() checks it
 * but before it returns to the application, the returned values may
 * contain stale information.
 *
 * return values:
 *  zero        - success
 *  -EFAULT     - cstat or cstat_range points to an illegal address
 *  -EINVAL     - invalid flags
 *  -EBADF      - invalid file descriptor
 *  -EOPNOTSUPP - file descriptor is of a hugetlbfs file
 */
SYSCALL_DEFINE4(cachestat, unsigned int, fd,
                struct cachestat_range __user *, cstat_range,
                struct cachestat __user *, cstat, unsigned int, flags)
{
        struct fd f = fdget(fd);
        struct address_space *mapping;
        struct cachestat_range csr;
        struct cachestat cs;
        pgoff_t first_index, last_index;

        if (!f.file)
                return -EBADF;

        if (copy_from_user(&csr, cstat_range,
                        sizeof(struct cachestat_range))) {
                fdput(f);
                return -EFAULT;
        }

        /* hugetlbfs is not supported */
        if (is_file_hugepages(f.file)) {
                fdput(f);
                return -EOPNOTSUPP;
        }

        if (flags != 0) {
                fdput(f);
                return -EINVAL;
        }

        first_index = csr.off >> PAGE_SHIFT;
        last_index =
                csr.len == 0 ? ULONG_MAX : (csr.off + csr.len - 1) >> PAGE_SHIFT;
        memset(&cs, 0, sizeof(struct cachestat));
        mapping = f.file->f_mapping;
        filemap_cachestat(mapping, first_index, last_index, &cs);
        fdput(f);

        if (copy_to_user(cstat, &cs, sizeof(struct cachestat)))
                return -EFAULT;

        return 0;
}
#endif /* CONFIG_CACHESTAT_SYSCALL */


















































    9 













































    9 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM signal

#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SIGNAL_H

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/tracepoint.h>

#define TP_STORE_SIGINFO(__entry, info)                                \
        do {                                                        \
                if (info == SEND_SIG_NOINFO) {                        \
                        __entry->errno        = 0;                        \
                        __entry->code        = SI_USER;                \
                } else if (info == SEND_SIG_PRIV) {                \
                        __entry->errno        = 0;                        \
                        __entry->code        = SI_KERNEL;                \
                } else {                                        \
                        __entry->errno        = info->si_errno;        \
                        __entry->code        = info->si_code;        \
                }                                                \
        } while (0)

#ifndef TRACE_HEADER_MULTI_READ
enum {
        TRACE_SIGNAL_DELIVERED,
        TRACE_SIGNAL_IGNORED,
        TRACE_SIGNAL_ALREADY_PENDING,
        TRACE_SIGNAL_OVERFLOW_FAIL,
        TRACE_SIGNAL_LOSE_INFO,
};
#endif

/**
 * signal_generate - called when a signal is generated
 * @sig: signal number
 * @info: pointer to struct siginfo
 * @task: pointer to struct task_struct
 * @group: shared or private
 * @result: TRACE_SIGNAL_*
 *
 * Current process sends a 'sig' signal to 'task' process with
 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
 * 'info' is not a pointer and you can't access its field. Instead,
 * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV
 * means that si_code is SI_KERNEL.
 */
TRACE_EVENT(signal_generate,

        TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task,
                        int group, int result),

        TP_ARGS(sig, info, task, group, result),

        TP_STRUCT__entry(
                __field(        int,        sig                        )
                __field(        int,        errno                        )
                __field(        int,        code                        )
                __array(        char,        comm,        TASK_COMM_LEN        )
                __field(        pid_t,        pid                        )
                __field(        int,        group                        )
                __field(        int,        result                        )
        ),

        TP_fast_assign(
                __entry->sig        = sig;
                TP_STORE_SIGINFO(__entry, info);
                memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
                __entry->pid        = task->pid;
                __entry->group        = group;
                __entry->result        = result;
        ),

        TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
                  __entry->sig, __entry->errno, __entry->code,
                  __entry->comm, __entry->pid, __entry->group,
                  __entry->result)
);

/**
 * signal_deliver - called when a signal is delivered
 * @sig: signal number
 * @info: pointer to struct siginfo
 * @ka: pointer to struct k_sigaction
 *
 * A 'sig' signal is delivered to current process with 'info' siginfo,
 * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or
 * SIG_DFL.
 * Note that some signals reported by signal_generate tracepoint can be
 * lost, ignored or modified (by debugger) before hitting this tracepoint.
 * This means, this can show which signals are actually delivered, but
 * matching generated signals and delivered signals may not be correct.
 */
TRACE_EVENT(signal_deliver,

        TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka),

        TP_ARGS(sig, info, ka),

        TP_STRUCT__entry(
                __field(        int,                sig                )
                __field(        int,                errno                )
                __field(        int,                code                )
                __field(        unsigned long,        sa_handler        )
                __field(        unsigned long,        sa_flags        )
        ),

        TP_fast_assign(
                __entry->sig        = sig;
                TP_STORE_SIGINFO(__entry, info);
                __entry->sa_handler        = (unsigned long)ka->sa.sa_handler;
                __entry->sa_flags        = ka->sa.sa_flags;
        ),

        TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx",
                  __entry->sig, __entry->errno, __entry->code,
                  __entry->sa_handler, __entry->sa_flags)
);

#endif /* _TRACE_SIGNAL_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
























  296 






  295 


  294 

   75 

  296 

  296 
  296 
  296 



  294 
























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Access kernel or user memory without faulting.
 */
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <asm/tlb.h>

bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
                size_t size)
{
        return true;
}

#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)        \
        while (len >= sizeof(type)) {                                        \
                __get_kernel_nofault(dst, src, type, err_label);                \
                dst += sizeof(type);                                        \
                src += sizeof(type);                                        \
                len -= sizeof(type);                                        \
        }

long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
{
        unsigned long align = 0;

        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
                align = (unsigned long)dst | (unsigned long)src;

        if (!copy_from_kernel_nofault_allowed(src, size))
                return -ERANGE;

        pagefault_disable();
        if (!(align & 7))
                copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
        if (!(align & 3))
                copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
        if (!(align & 1))
                copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
        copy_from_kernel_nofault_loop(dst, src, size, u8, Efault);
        pagefault_enable();
        return 0;
Efault:
        pagefault_enable();
        return -EFAULT;
}
EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);

#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)        \
        while (len >= sizeof(type)) {                                        \
                __put_kernel_nofault(dst, src, type, err_label);                \
                dst += sizeof(type);                                        \
                src += sizeof(type);                                        \
                len -= sizeof(type);                                        \
        }

long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
{
        unsigned long align = 0;

        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
                align = (unsigned long)dst | (unsigned long)src;

        pagefault_disable();
        if (!(align & 7))
                copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
        if (!(align & 3))
                copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
        if (!(align & 1))
                copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
        copy_to_kernel_nofault_loop(dst, src, size, u8, Efault);
        pagefault_enable();
        return 0;
Efault:
        pagefault_enable();
        return -EFAULT;
}

long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
{
        const void *src = unsafe_addr;

        if (unlikely(count <= 0))
                return 0;
        if (!copy_from_kernel_nofault_allowed(unsafe_addr, count))
                return -ERANGE;

        pagefault_disable();
        do {
                __get_kernel_nofault(dst, src, u8, Efault);
                dst++;
                src++;
        } while (dst[-1] && src - unsafe_addr < count);
        pagefault_enable();

        dst[-1] = '\0';
        return src - unsafe_addr;
Efault:
        pagefault_enable();
        dst[0] = '\0';
        return -EFAULT;
}

/**
 * copy_from_user_nofault(): safely attempt to read from a user-space location
 * @dst: pointer to the buffer that shall take the data
 * @src: address to read from. This must be a user address.
 * @size: size of the data chunk
 *
 * Safely read from user address @src to the buffer at @dst. If a kernel fault
 * happens, handle that and return -EFAULT.
 */
long copy_from_user_nofault(void *dst, const void __user *src, size_t size)
{
        long ret = -EFAULT;

        if (!__access_ok(src, size))
                return ret;

        if (!nmi_uaccess_okay())
                return ret;

        pagefault_disable();
        ret = __copy_from_user_inatomic(dst, src, size);
        pagefault_enable();

        if (ret)
                return -EFAULT;
        return 0;
}
EXPORT_SYMBOL_GPL(copy_from_user_nofault);

/**
 * copy_to_user_nofault(): safely attempt to write to a user-space location
 * @dst: address to write to
 * @src: pointer to the data that shall be written
 * @size: size of the data chunk
 *
 * Safely write to address @dst from the buffer at @src.  If a kernel fault
 * happens, handle that and return -EFAULT.
 */
long copy_to_user_nofault(void __user *dst, const void *src, size_t size)
{
        long ret = -EFAULT;

        if (access_ok(dst, size)) {
                pagefault_disable();
                ret = __copy_to_user_inatomic(dst, src, size);
                pagefault_enable();
        }

        if (ret)
                return -EFAULT;
        return 0;
}
EXPORT_SYMBOL_GPL(copy_to_user_nofault);

/**
 * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user
 *                                address.
 * @dst:   Destination address, in kernel space.  This buffer must be at
 *         least @count bytes long.
 * @unsafe_addr: Unsafe user address.
 * @count: Maximum number of bytes to copy, including the trailing NUL.
 *
 * Copies a NUL-terminated string from unsafe user address to kernel buffer.
 *
 * On success, returns the length of the string INCLUDING the trailing NUL.
 *
 * If access fails, returns -EFAULT (some data may have been copied
 * and the trailing NUL added).
 *
 * If @count is smaller than the length of the string, copies @count-1 bytes,
 * sets the last byte of @dst buffer to NUL and returns @count.
 */
long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr,
                              long count)
{
        long ret;

        if (unlikely(count <= 0))
                return 0;

        pagefault_disable();
        ret = strncpy_from_user(dst, unsafe_addr, count);
        pagefault_enable();

        if (ret >= count) {
                ret = count;
                dst[ret - 1] = '\0';
        } else if (ret > 0) {
                ret++;
        }

        return ret;
}

/**
 * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL.
 * @unsafe_addr: The string to measure.
 * @count: Maximum count (including NUL)
 *
 * Get the size of a NUL-terminated string in user space without pagefault.
 *
 * Returns the size of the string INCLUDING the terminating NUL.
 *
 * If the string is too long, returns a number larger than @count. User
 * has to check the return value against "> count".
 * On exception (or invalid count), returns 0.
 *
 * Unlike strnlen_user, this can be used from IRQ handler etc. because
 * it disables pagefaults.
 */
long strnlen_user_nofault(const void __user *unsafe_addr, long count)
{
        int ret;

        pagefault_disable();
        ret = strnlen_user(unsafe_addr, count);
        pagefault_enable();

        return ret;
}

void __copy_overflow(int size, unsigned long count)
{
        WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
}
EXPORT_SYMBOL(__copy_overflow);


































  164 






























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * AppArmor security module
 *
 * This file contains AppArmor file mediation function definitions.
 *
 * Copyright (C) 1998-2008 Novell/SUSE
 * Copyright 2009-2010 Canonical Ltd.
 */

#ifndef __AA_FILE_H
#define __AA_FILE_H

#include <linux/spinlock.h>

#include "domain.h"
#include "match.h"
#include "perms.h"

struct aa_policydb;
struct aa_profile;
struct path;

#define mask_mode_t(X) (X & (MAY_EXEC | MAY_WRITE | MAY_READ | MAY_APPEND))

#define AA_AUDIT_FILE_MASK        (MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND |\
                                 AA_MAY_CREATE | AA_MAY_DELETE |        \
                                 AA_MAY_GETATTR | AA_MAY_SETATTR | \
                                 AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_LOCK | \
                                 AA_EXEC_MMAP | AA_MAY_LINK)

static inline struct aa_file_ctx *file_ctx(struct file *file)
{
        return file->f_security + apparmor_blob_sizes.lbs_file;
}

/* struct aa_file_ctx - the AppArmor context the file was opened in
 * @lock: lock to update the ctx
 * @label: label currently cached on the ctx
 * @perms: the permission the file was opened with
 */
struct aa_file_ctx {
        spinlock_t lock;
        struct aa_label __rcu *label;
        u32 allow;
};

/*
 * The xindex is broken into 3 parts
 * - index - an index into either the exec name table or the variable table
 * - exec type - which determines how the executable name and index are used
 * - flags - which modify how the destination name is applied
 */
#define AA_X_INDEX_MASK                AA_INDEX_MASK

#define AA_X_TYPE_MASK                0x0c000000
#define AA_X_NONE                AA_INDEX_NONE
#define AA_X_NAME                0x04000000 /* use executable name px */
#define AA_X_TABLE                0x08000000 /* use a specified name ->n# */

#define AA_X_UNSAFE                0x10000000
#define AA_X_CHILD                0x20000000
#define AA_X_INHERIT                0x40000000
#define AA_X_UNCONFINED                0x80000000

/* need to make conditional which ones are being set */
struct path_cond {
        kuid_t uid;
        umode_t mode;
};

#define COMBINED_PERM_MASK(X) ((X).allow | (X).audit | (X).quiet | (X).kill)

int aa_audit_file(const struct cred *cred,
                  struct aa_profile *profile, struct aa_perms *perms,
                  const char *op, u32 request, const char *name,
                  const char *target, struct aa_label *tlabel, kuid_t ouid,
                  const char *info, int error);

struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
                                  aa_state_t state, struct path_cond *cond);
aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
                        const char *name, struct path_cond *cond,
                        struct aa_perms *perms);

int aa_path_perm(const char *op, const struct cred *subj_cred,
                 struct aa_label *label, const struct path *path,
                 int flags, u32 request, struct path_cond *cond);

int aa_path_link(const struct cred *subj_cred, struct aa_label *label,
                 struct dentry *old_dentry, const struct path *new_dir,
                 struct dentry *new_dentry);

int aa_file_perm(const char *op, const struct cred *subj_cred,
                 struct aa_label *label, struct file *file,
                 u32 request, bool in_atomic);

void aa_inherit_files(const struct cred *cred, struct files_struct *files);


/**
 * aa_map_file_perms - map file flags to AppArmor permissions
 * @file: open file to map flags to AppArmor permissions
 *
 * Returns: apparmor permission set for the file
 */
static inline u32 aa_map_file_to_perms(struct file *file)
{
        int flags = file->f_flags;
        u32 perms = 0;

        if (file->f_mode & FMODE_WRITE)
                perms |= MAY_WRITE;
        if (file->f_mode & FMODE_READ)
                perms |= MAY_READ;

        if ((flags & O_APPEND) && (perms & MAY_WRITE))
                perms = (perms & ~MAY_WRITE) | MAY_APPEND;
        /* trunc implies write permission */
        if (flags & O_TRUNC)
                perms |= MAY_WRITE;
        if (flags & O_CREAT)
                perms |= AA_MAY_CREATE;

        return perms;
}

#endif /* __AA_FILE_H */

































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * descriptor table internals; you almost certainly want file.h instead.
 */

#ifndef __LINUX_FDTABLE_H
#define __LINUX_FDTABLE_H

#include <linux/posix_types.h>
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/nospec.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>

#include <linux/atomic.h>

/*
 * The default fd array needs to be at least BITS_PER_LONG,
 * as this is the granularity returned by copy_fdset().
 */
#define NR_OPEN_DEFAULT BITS_PER_LONG
#define NR_OPEN_MAX ~0U

struct fdtable {
        unsigned int max_fds;
        struct file __rcu **fd;      /* current fd array */
        unsigned long *close_on_exec;
        unsigned long *open_fds;
        unsigned long *full_fds_bits;
        struct rcu_head rcu;
};

static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt)
{
        return test_bit(fd, fdt->close_on_exec);
}

static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
{
        return test_bit(fd, fdt->open_fds);
}

/*
 * Open file table structure
 */
struct files_struct {
  /*
   * read mostly part
   */
        atomic_t count;
        bool resize_in_progress;
        wait_queue_head_t resize_wait;

        struct fdtable __rcu *fdt;
        struct fdtable fdtab;
  /*
   * written part on a separate cache line in SMP
   */
        spinlock_t file_lock ____cacheline_aligned_in_smp;
        unsigned int next_fd;
        unsigned long close_on_exec_init[1];
        unsigned long open_fds_init[1];
        unsigned long full_fds_bits_init[1];
        struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};

struct file_operations;
struct vfsmount;
struct dentry;

#define rcu_dereference_check_fdtable(files, fdtfd) \
        rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))

#define files_fdtable(files) \
        rcu_dereference_check_fdtable((files), (files)->fdt)

/*
 * The caller must ensure that fd table isn't shared or hold rcu or file lock
 */
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
        struct fdtable *fdt = rcu_dereference_raw(files->fdt);
        unsigned long mask = array_index_mask_nospec(fd, fdt->max_fds);
        struct file *needs_masking;

        /*
         * 'mask' is zero for an out-of-bounds fd, all ones for ok.
         * 'fd&mask' is 'fd' for ok, or 0 for out of bounds.
         *
         * Accessing fdt->fd[0] is ok, but needs masking of the result.
         */
        needs_masking = rcu_dereference_raw(fdt->fd[fd&mask]);
        return (struct file *)(mask & (unsigned long)needs_masking);
}

static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
{
        RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock),
                           "suspicious rcu_dereference_check() usage");
        return files_lookup_fd_raw(files, fd);
}

struct file *lookup_fdget_rcu(unsigned int fd);
struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);

struct task_struct;

void put_files_struct(struct files_struct *fs);
int unshare_files(void);
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
                int (*)(const void *, struct file *, unsigned),
                const void *);

extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *file_close_fd(unsigned int fd);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
                      struct files_struct **new_fdp);

extern struct kmem_cache *files_cachep;

#endif /* __LINUX_FDTABLE_H */































































































































































































































































































































































































































































































































































    3 




    1 










































































































































































    3 

    3 





































  114 









































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
/* SPDX-License-Identifier: GPL-2.0+ */
#ifndef _LINUX_MAPLE_TREE_H
#define _LINUX_MAPLE_TREE_H
/*
 * Maple Tree - An RCU-safe adaptive tree for storing ranges
 * Copyright (c) 2018-2022 Oracle
 * Authors:     Liam R. Howlett <Liam.Howlett@Oracle.com>
 *              Matthew Wilcox <willy@infradead.org>
 */

#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
/* #define CONFIG_MAPLE_RCU_DISABLED */

/*
 * Allocated nodes are mutable until they have been inserted into the tree,
 * at which time they cannot change their type until they have been removed
 * from the tree and an RCU grace period has passed.
 *
 * Removed nodes have their ->parent set to point to themselves.  RCU readers
 * check ->parent before relying on the value that they loaded from the
 * slots array.  This lets us reuse the slots array for the RCU head.
 *
 * Nodes in the tree point to their parent unless bit 0 is set.
 */
#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
/* 64bit sizes */
#define MAPLE_NODE_SLOTS        31        /* 256 bytes including ->parent */
#define MAPLE_RANGE64_SLOTS        16        /* 256 bytes */
#define MAPLE_ARANGE64_SLOTS        10        /* 240 bytes */
#define MAPLE_ALLOC_SLOTS        (MAPLE_NODE_SLOTS - 1)
#else
/* 32bit sizes */
#define MAPLE_NODE_SLOTS        63        /* 256 bytes including ->parent */
#define MAPLE_RANGE64_SLOTS        32        /* 256 bytes */
#define MAPLE_ARANGE64_SLOTS        21        /* 240 bytes */
#define MAPLE_ALLOC_SLOTS        (MAPLE_NODE_SLOTS - 2)
#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */

#define MAPLE_NODE_MASK                255UL

/*
 * The node->parent of the root node has bit 0 set and the rest of the pointer
 * is a pointer to the tree itself.  No more bits are available in this pointer
 * (on m68k, the data structure may only be 2-byte aligned).
 *
 * Internal non-root nodes can only have maple_range_* nodes as parents.  The
 * parent pointer is 256B aligned like all other tree nodes.  When storing a 32
 * or 64 bit values, the offset can fit into 4 bits.  The 16 bit values need an
 * extra bit to store the offset.  This extra bit comes from a reuse of the last
 * bit in the node type.  This is possible by using bit 1 to indicate if bit 2
 * is part of the type or the slot.
 *
 * Once the type is decided, the decision of an allocation range type or a range
 * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE
 * flag.
 *
 *  Node types:
 *   0x??1 = Root
 *   0x?00 = 16 bit nodes
 *   0x010 = 32 bit nodes
 *   0x110 = 64 bit nodes
 *
 *  Slot size and location in the parent pointer:
 *   type  : slot location
 *   0x??1 : Root
 *   0x?00 : 16 bit values, type in 0-1, slot in 2-6
 *   0x010 : 32 bit values, type in 0-2, slot in 3-6
 *   0x110 : 64 bit values, type in 0-2, slot in 3-6
 */

/*
 * This metadata is used to optimize the gap updating code and in reverse
 * searching for gaps or any other code that needs to find the end of the data.
 */
struct maple_metadata {
        unsigned char end;
        unsigned char gap;
};

/*
 * Leaf nodes do not store pointers to nodes, they store user data.  Users may
 * store almost any bit pattern.  As noted above, the optimisation of storing an
 * entry at 0 in the root pointer cannot be done for data which have the bottom
 * two bits set to '10'.  We also reserve values with the bottom two bits set to
 * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use.  Some APIs
 * return errnos as a negative errno shifted right by two bits and the bottom
 * two bits set to '10', and while choosing to store these values in the array
 * is not an error, it may lead to confusion if you're testing for an error with
 * mas_is_err().
 *
 * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits
 * 3-6), bit 2 is reserved.  That leaves bits 0-1 unused for now.
 *
 * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
 * indicate that the tree is specifying ranges,  Pivots may appear in the
 * subtree with an entry attached to the value whereas keys are unique to a
 * specific position of a B-tree.  Pivot values are inclusive of the slot with
 * the same index.
 */

struct maple_range_64 {
        struct maple_pnode *parent;
        unsigned long pivot[MAPLE_RANGE64_SLOTS - 1];
        union {
                void __rcu *slot[MAPLE_RANGE64_SLOTS];
                struct {
                        void __rcu *pad[MAPLE_RANGE64_SLOTS - 1];
                        struct maple_metadata meta;
                };
        };
};

/*
 * At tree creation time, the user can specify that they're willing to trade off
 * storing fewer entries in a tree in return for storing more information in
 * each node.
 *
 * The maple tree supports recording the largest range of NULL entries available
 * in this node, also called gaps.  This optimises the tree for allocating a
 * range.
 */
struct maple_arange_64 {
        struct maple_pnode *parent;
        unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1];
        void __rcu *slot[MAPLE_ARANGE64_SLOTS];
        unsigned long gap[MAPLE_ARANGE64_SLOTS];
        struct maple_metadata meta;
};

struct maple_alloc {
        unsigned long total;
        unsigned char node_count;
        unsigned int request_count;
        struct maple_alloc *slot[MAPLE_ALLOC_SLOTS];
};

struct maple_topiary {
        struct maple_pnode *parent;
        struct maple_enode *next; /* Overlaps the pivot */
};

enum maple_type {
        maple_dense,
        maple_leaf_64,
        maple_range_64,
        maple_arange_64,
};


/**
 * DOC: Maple tree flags
 *
 * * MT_FLAGS_ALLOC_RANGE        - Track gaps in this tree
 * * MT_FLAGS_USE_RCU                - Operate in RCU mode
 * * MT_FLAGS_HEIGHT_OFFSET        - The position of the tree height in the flags
 * * MT_FLAGS_HEIGHT_MASK        - The mask for the maple tree height value
 * * MT_FLAGS_LOCK_MASK                - How the mt_lock is used
 * * MT_FLAGS_LOCK_IRQ                - Acquired irq-safe
 * * MT_FLAGS_LOCK_BH                - Acquired bh-safe
 * * MT_FLAGS_LOCK_EXTERN        - mt_lock is not used
 *
 * MAPLE_HEIGHT_MAX        The largest height that can be stored
 */
#define MT_FLAGS_ALLOC_RANGE        0x01
#define MT_FLAGS_USE_RCU        0x02
#define MT_FLAGS_HEIGHT_OFFSET        0x02
#define MT_FLAGS_HEIGHT_MASK        0x7C
#define MT_FLAGS_LOCK_MASK        0x300
#define MT_FLAGS_LOCK_IRQ        0x100
#define MT_FLAGS_LOCK_BH        0x200
#define MT_FLAGS_LOCK_EXTERN        0x300
#define MT_FLAGS_ALLOC_WRAPPED        0x0800

#define MAPLE_HEIGHT_MAX        31


#define MAPLE_NODE_TYPE_MASK        0x0F
#define MAPLE_NODE_TYPE_SHIFT        0x03

#define MAPLE_RESERVED_RANGE        4096

#ifdef CONFIG_LOCKDEP
typedef struct lockdep_map *lockdep_map_p;
#define mt_lock_is_held(mt)                                             \
        (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))

#define mt_write_lock_is_held(mt)                                        \
        (!(mt)->ma_external_lock ||                                        \
         lock_is_held_type((mt)->ma_external_lock, 0))

#define mt_set_external_lock(mt, lock)                                        \
        (mt)->ma_external_lock = &(lock)->dep_map

#define mt_on_stack(mt)                        (mt).ma_external_lock = NULL
#else
typedef struct { /* nothing */ } lockdep_map_p;
#define mt_lock_is_held(mt)                1
#define mt_write_lock_is_held(mt)        1
#define mt_set_external_lock(mt, lock)        do { } while (0)
#define mt_on_stack(mt)                        do { } while (0)
#endif

/*
 * If the tree contains a single entry at index 0, it is usually stored in
 * tree->ma_root.  To optimise for the page cache, an entry which ends in '00',
 * '01' or '11' is stored in the root, but an entry which ends in '10' will be
 * stored in a node.  Bits 3-6 are used to store enum maple_type.
 *
 * The flags are used both to store some immutable information about this tree
 * (set at tree creation time) and dynamic information set under the spinlock.
 *
 * Another use of flags are to indicate global states of the tree.  This is the
 * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
 * RCU mode.  This mode was added to allow the tree to reuse nodes instead of
 * re-allocating and RCU freeing nodes when there is a single user.
 */
struct maple_tree {
        union {
                spinlock_t        ma_lock;
                lockdep_map_p        ma_external_lock;
        };
        unsigned int        ma_flags;
        void __rcu      *ma_root;
};

/**
 * MTREE_INIT() - Initialize a maple tree
 * @name: The maple tree name
 * @__flags: The maple tree flags
 *
 */
#define MTREE_INIT(name, __flags) {                                        \
        .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock),                \
        .ma_flags = __flags,                                                \
        .ma_root = NULL,                                                \
}

/**
 * MTREE_INIT_EXT() - Initialize a maple tree with an external lock.
 * @name: The tree name
 * @__flags: The maple tree flags
 * @__lock: The external lock
 */
#ifdef CONFIG_LOCKDEP
#define MTREE_INIT_EXT(name, __flags, __lock) {                                \
        .ma_external_lock = &(__lock).dep_map,                                \
        .ma_flags = (__flags),                                                \
        .ma_root = NULL,                                                \
}
#else
#define MTREE_INIT_EXT(name, __flags, __lock)        MTREE_INIT(name, __flags)
#endif

#define DEFINE_MTREE(name)                                                \
        struct maple_tree name = MTREE_INIT(name, 0)

#define mtree_lock(mt)                spin_lock((&(mt)->ma_lock))
#define mtree_lock_nested(mas, subclass) \
                spin_lock_nested((&(mt)->ma_lock), subclass)
#define mtree_unlock(mt)        spin_unlock((&(mt)->ma_lock))

/*
 * The Maple Tree squeezes various bits in at various points which aren't
 * necessarily obvious.  Usually, this is done by observing that pointers are
 * N-byte aligned and thus the bottom log_2(N) bits are available for use.  We
 * don't use the high bits of pointers to store additional information because
 * we don't know what bits are unused on any given architecture.
 *
 * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8
 * low bits for our own purposes.  Nodes are currently of 4 types:
 * 1. Single pointer (Range is 0-0)
 * 2. Non-leaf Allocation Range nodes
 * 3. Non-leaf Range nodes
 * 4. Leaf Range nodes All nodes consist of a number of node slots,
 *    pivots, and a parent pointer.
 */

struct maple_node {
        union {
                struct {
                        struct maple_pnode *parent;
                        void __rcu *slot[MAPLE_NODE_SLOTS];
                };
                struct {
                        void *pad;
                        struct rcu_head rcu;
                        struct maple_enode *piv_parent;
                        unsigned char parent_slot;
                        enum maple_type type;
                        unsigned char slot_len;
                        unsigned int ma_flags;
                };
                struct maple_range_64 mr64;
                struct maple_arange_64 ma64;
                struct maple_alloc alloc;
        };
};

/*
 * More complicated stores can cause two nodes to become one or three and
 * potentially alter the height of the tree.  Either half of the tree may need
 * to be rebalanced against the other.  The ma_topiary struct is used to track
 * which nodes have been 'cut' from the tree so that the change can be done
 * safely at a later date.  This is done to support RCU.
 */
struct ma_topiary {
        struct maple_enode *head;
        struct maple_enode *tail;
        struct maple_tree *mtree;
};

void *mtree_load(struct maple_tree *mt, unsigned long index);

int mtree_insert(struct maple_tree *mt, unsigned long index,
                void *entry, gfp_t gfp);
int mtree_insert_range(struct maple_tree *mt, unsigned long first,
                unsigned long last, void *entry, gfp_t gfp);
int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long size, unsigned long min,
                unsigned long max, gfp_t gfp);
int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long range_lo, unsigned long range_hi,
                unsigned long *next, gfp_t gfp);
int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long size, unsigned long min,
                unsigned long max, gfp_t gfp);

int mtree_store_range(struct maple_tree *mt, unsigned long first,
                      unsigned long last, void *entry, gfp_t gfp);
int mtree_store(struct maple_tree *mt, unsigned long index,
                void *entry, gfp_t gfp);
void *mtree_erase(struct maple_tree *mt, unsigned long index);

int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);

void mtree_destroy(struct maple_tree *mt);
void __mt_destroy(struct maple_tree *mt);

/**
 * mtree_empty() - Determine if a tree has any present entries.
 * @mt: Maple Tree.
 *
 * Context: Any context.
 * Return: %true if the tree contains only NULL pointers.
 */
static inline bool mtree_empty(const struct maple_tree *mt)
{
        return mt->ma_root == NULL;
}

/* Advanced API */

/*
 * Maple State Status
 * ma_active means the maple state is pointing to a node and offset and can
 * continue operating on the tree.
 * ma_start means we have not searched the tree.
 * ma_root means we have searched the tree and the entry we found lives in
 * the root of the tree (ie it has index 0, length 1 and is the only entry in
 * the tree).
 * ma_none means we have searched the tree and there is no node in the
 * tree for this entry.  For example, we searched for index 1 in an empty
 * tree.  Or we have a tree which points to a full leaf node and we
 * searched for an entry which is larger than can be contained in that
 * leaf node.
 * ma_pause means the data within the maple state may be stale, restart the
 * operation
 * ma_overflow means the search has reached the upper limit of the search
 * ma_underflow means the search has reached the lower limit of the search
 * ma_error means there was an error, check the node for the error number.
 */
enum maple_status {
        ma_active,
        ma_start,
        ma_root,
        ma_none,
        ma_pause,
        ma_overflow,
        ma_underflow,
        ma_error,
};

/*
 * The maple state is defined in the struct ma_state and is used to keep track
 * of information during operations, and even between operations when using the
 * advanced API.
 *
 * If state->node has bit 0 set then it references a tree location which is not
 * a node (eg the root).  If bit 1 is set, the rest of the bits are a negative
 * errno.  Bit 2 (the 'unallocated slots' bit) is clear.  Bits 3-6 indicate the
 * node type.
 *
 * state->alloc either has a request number of nodes or an allocated node.  If
 * stat->alloc has a requested number of nodes, the first bit will be set (0x1)
 * and the remaining bits are the value.  If state->alloc is a node, then the
 * node will be of type maple_alloc.  maple_alloc has MAPLE_NODE_SLOTS - 1 for
 * storing more allocated nodes, a total number of nodes allocated, and the
 * node_count in this node.  node_count is the number of allocated nodes in this
 * node.  The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further
 * nodes into state->alloc->slot[0]'s node.  Nodes are taken from state->alloc
 * by removing a node from the state->alloc node until state->alloc->node_count
 * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted
 * to state->alloc.  Nodes are pushed onto state->alloc by putting the current
 * state->alloc into the pushed node's slot[0].
 *
 * The state also contains the implied min/max of the state->node, the depth of
 * this search, and the offset. The implied min/max are either from the parent
 * node or are 0-oo for the root node.  The depth is incremented or decremented
 * every time a node is walked down or up.  The offset is the slot/pivot of
 * interest in the node - either for reading or writing.
 *
 * When returning a value the maple state index and last respectively contain
 * the start and end of the range for the entry.  Ranges are inclusive in the
 * Maple Tree.
 *
 * The status of the state is used to determine how the next action should treat
 * the state.  For instance, if the status is ma_start then the next action
 * should start at the root of the tree and walk down.  If the status is
 * ma_pause then the node may be stale data and should be discarded.  If the
 * status is ma_overflow, then the last action hit the upper limit.
 *
 */
struct ma_state {
        struct maple_tree *tree;        /* The tree we're operating in */
        unsigned long index;                /* The index we're operating on - range start */
        unsigned long last;                /* The last index we're operating on - range end */
        struct maple_enode *node;        /* The node containing this entry */
        unsigned long min;                /* The minimum index of this node - implied pivot min */
        unsigned long max;                /* The maximum index of this node - implied pivot max */
        struct maple_alloc *alloc;        /* Allocated nodes for this operation */
        enum maple_status status;        /* The status of the state (active, start, none, etc) */
        unsigned char depth;                /* depth of tree descent during write */
        unsigned char offset;
        unsigned char mas_flags;
        unsigned char end;                /* The end of the node */
};

struct ma_wr_state {
        struct ma_state *mas;
        struct maple_node *node;        /* Decoded mas->node */
        unsigned long r_min;                /* range min */
        unsigned long r_max;                /* range max */
        enum maple_type type;                /* mas->node type */
        unsigned char offset_end;        /* The offset where the write ends */
        unsigned long *pivots;                /* mas->node->pivots pointer */
        unsigned long end_piv;                /* The pivot at the offset end */
        void __rcu **slots;                /* mas->node->slots pointer */
        void *entry;                        /* The entry to write */
        void *content;                        /* The existing entry that is being overwritten */
};

#define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
#define mas_lock_nested(mas, subclass) \
                spin_lock_nested(&((mas)->tree->ma_lock), subclass)
#define mas_unlock(mas)         spin_unlock(&((mas)->tree->ma_lock))

/*
 * Special values for ma_state.node.
 * MA_ERROR represents an errno.  After dropping the lock and attempting
 * to resolve the error, the walk would have to be restarted from the
 * top of the tree as the tree may have been modified.
 */
#define MA_ERROR(err) \
                ((struct maple_enode *)(((unsigned long)err << 2) | 2UL))

#define MA_STATE(name, mt, first, end)                                        \
        struct ma_state name = {                                        \
                .tree = mt,                                                \
                .index = first,                                                \
                .last = end,                                                \
                .node = NULL,                                                \
                .status = ma_start,                                        \
                .min = 0,                                                \
                .max = ULONG_MAX,                                        \
                .alloc = NULL,                                                \
                .mas_flags = 0,                                                \
        }

#define MA_WR_STATE(name, ma_state, wr_entry)                                \
        struct ma_wr_state name = {                                        \
                .mas = ma_state,                                        \
                .content = NULL,                                        \
                .entry = wr_entry,                                        \
        }

#define MA_TOPIARY(name, tree)                                                \
        struct ma_topiary name = {                                        \
                .head = NULL,                                                \
                .tail = NULL,                                                \
                .mtree = tree,                                                \
        }

void *mas_walk(struct ma_state *mas);
void *mas_store(struct ma_state *mas, void *entry);
void *mas_erase(struct ma_state *mas);
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp);
void mas_store_prealloc(struct ma_state *mas, void *entry);
void *mas_find(struct ma_state *mas, unsigned long max);
void *mas_find_range(struct ma_state *mas, unsigned long max);
void *mas_find_rev(struct ma_state *mas, unsigned long min);
void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
                void *entry, unsigned long range_lo, unsigned long range_hi,
                unsigned long *next, gfp_t gfp);

bool mas_nomem(struct ma_state *mas, gfp_t gfp);
void mas_pause(struct ma_state *mas);
void maple_tree_init(void);
void mas_destroy(struct ma_state *mas);
int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);

void *mas_prev(struct ma_state *mas, unsigned long min);
void *mas_prev_range(struct ma_state *mas, unsigned long max);
void *mas_next(struct ma_state *mas, unsigned long max);
void *mas_next_range(struct ma_state *mas, unsigned long max);

int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
                   unsigned long size);
/*
 * This finds an empty area from the highest address to the lowest.
 * AKA "Topdown" version,
 */
int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
                       unsigned long max, unsigned long size);

static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
                            unsigned long addr)
{
        memset(mas, 0, sizeof(struct ma_state));
        mas->tree = tree;
        mas->index = mas->last = addr;
        mas->max = ULONG_MAX;
        mas->status = ma_start;
        mas->node = NULL;
}

static inline bool mas_is_active(struct ma_state *mas)
{
        return mas->status == ma_active;
}

static inline bool mas_is_err(struct ma_state *mas)
{
        return mas->status == ma_error;
}

/**
 * mas_reset() - Reset a Maple Tree operation state.
 * @mas: Maple Tree operation state.
 *
 * Resets the error or walk state of the @mas so future walks of the
 * array will start from the root.  Use this if you have dropped the
 * lock and want to reuse the ma_state.
 *
 * Context: Any context.
 */
static __always_inline void mas_reset(struct ma_state *mas)
{
        mas->status = ma_start;
        mas->node = NULL;
}

/**
 * mas_for_each() - Iterate over a range of the maple tree.
 * @__mas: Maple Tree operation state (maple_state)
 * @__entry: Entry retrieved from the tree
 * @__max: maximum index to retrieve from the tree
 *
 * When returned, mas->index and mas->last will hold the entire range for the
 * entry.
 *
 * Note: may return the zero entry.
 */
#define mas_for_each(__mas, __entry, __max) \
        while (((__entry) = mas_find((__mas), (__max))) != NULL)

#ifdef CONFIG_DEBUG_MAPLE_TREE
enum mt_dump_format {
        mt_dump_dec,
        mt_dump_hex,
};

extern atomic_t maple_tree_tests_run;
extern atomic_t maple_tree_tests_passed;

void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
void mas_dump(const struct ma_state *mas);
void mas_wr_dump(const struct ma_wr_state *wr_mas);
void mt_validate(struct maple_tree *mt);
void mt_cache_shrink(void);
#define MT_BUG_ON(__tree, __x) do {                                        \
        atomic_inc(&maple_tree_tests_run);                                \
        if (__x) {                                                        \
                pr_info("BUG at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mt_dump(__tree, mt_dump_hex);                                \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
} while (0)

#define MAS_BUG_ON(__mas, __x) do {                                        \
        atomic_inc(&maple_tree_tests_run);                                \
        if (__x) {                                                        \
                pr_info("BUG at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mas_dump(__mas);                                        \
                mt_dump((__mas)->tree, mt_dump_hex);                        \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
} while (0)

#define MAS_WR_BUG_ON(__wrmas, __x) do {                                \
        atomic_inc(&maple_tree_tests_run);                                \
        if (__x) {                                                        \
                pr_info("BUG at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mas_wr_dump(__wrmas);                                        \
                mas_dump((__wrmas)->mas);                                \
                mt_dump((__wrmas)->mas->tree, mt_dump_hex);                \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
} while (0)

#define MT_WARN_ON(__tree, __x)  ({                                        \
        int ret = !!(__x);                                                \
        atomic_inc(&maple_tree_tests_run);                                \
        if (ret) {                                                        \
                pr_info("WARN at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mt_dump(__tree, mt_dump_hex);                                \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
        unlikely(ret);                                                        \
})

#define MAS_WARN_ON(__mas, __x) ({                                        \
        int ret = !!(__x);                                                \
        atomic_inc(&maple_tree_tests_run);                                \
        if (ret) {                                                        \
                pr_info("WARN at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mas_dump(__mas);                                        \
                mt_dump((__mas)->tree, mt_dump_hex);                        \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
        unlikely(ret);                                                        \
})

#define MAS_WR_WARN_ON(__wrmas, __x) ({                                        \
        int ret = !!(__x);                                                \
        atomic_inc(&maple_tree_tests_run);                                \
        if (ret) {                                                        \
                pr_info("WARN at %s:%d (%u)\n",                                \
                __func__, __LINE__, __x);                                \
                mas_wr_dump(__wrmas);                                        \
                mas_dump((__wrmas)->mas);                                \
                mt_dump((__wrmas)->mas->tree, mt_dump_hex);                \
                pr_info("Pass: %u Run:%u\n",                                \
                        atomic_read(&maple_tree_tests_passed),                \
                        atomic_read(&maple_tree_tests_run));                \
                dump_stack();                                                \
        } else {                                                        \
                atomic_inc(&maple_tree_tests_passed);                        \
        }                                                                \
        unlikely(ret);                                                        \
})
#else
#define MT_BUG_ON(__tree, __x)                BUG_ON(__x)
#define MAS_BUG_ON(__mas, __x)                BUG_ON(__x)
#define MAS_WR_BUG_ON(__mas, __x)        BUG_ON(__x)
#define MT_WARN_ON(__tree, __x)                WARN_ON(__x)
#define MAS_WARN_ON(__mas, __x)                WARN_ON(__x)
#define MAS_WR_WARN_ON(__mas, __x)        WARN_ON(__x)
#endif /* CONFIG_DEBUG_MAPLE_TREE */

/**
 * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the
 * current location.
 * @mas: Maple Tree operation state.
 * @start: New start of range in the Maple Tree.
 * @last: New end of range in the Maple Tree.
 *
 * set the internal maple state values to a sub-range.
 * Please use mas_set_range() if you do not know where you are in the tree.
 */
static inline void __mas_set_range(struct ma_state *mas, unsigned long start,
                unsigned long last)
{
        /* Ensure the range starts within the current slot */
        MAS_WARN_ON(mas, mas_is_active(mas) &&
                   (mas->index > start || mas->last < start));
        mas->index = start;
        mas->last = last;
}

/**
 * mas_set_range() - Set up Maple Tree operation state for a different index.
 * @mas: Maple Tree operation state.
 * @start: New start of range in the Maple Tree.
 * @last: New end of range in the Maple Tree.
 *
 * Move the operation state to refer to a different range.  This will
 * have the effect of starting a walk from the top; see mas_next()
 * to move to an adjacent index.
 */
static inline
void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
{
        mas_reset(mas);
        __mas_set_range(mas, start, last);
}

/**
 * mas_set() - Set up Maple Tree operation state for a different index.
 * @mas: Maple Tree operation state.
 * @index: New index into the Maple Tree.
 *
 * Move the operation state to refer to a different index.  This will
 * have the effect of starting a walk from the top; see mas_next()
 * to move to an adjacent index.
 */
static inline void mas_set(struct ma_state *mas, unsigned long index)
{

        mas_set_range(mas, index, index);
}

static inline bool mt_external_lock(const struct maple_tree *mt)
{
        return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN;
}

/**
 * mt_init_flags() - Initialise an empty maple tree with flags.
 * @mt: Maple Tree
 * @flags: maple tree flags.
 *
 * If you need to initialise a Maple Tree with special flags (eg, an
 * allocation tree), use this function.
 *
 * Context: Any context.
 */
static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags)
{
        mt->ma_flags = flags;
        if (!mt_external_lock(mt))
                spin_lock_init(&mt->ma_lock);
        rcu_assign_pointer(mt->ma_root, NULL);
}

/**
 * mt_init() - Initialise an empty maple tree.
 * @mt: Maple Tree
 *
 * An empty Maple Tree.
 *
 * Context: Any context.
 */
static inline void mt_init(struct maple_tree *mt)
{
        mt_init_flags(mt, 0);
}

static inline bool mt_in_rcu(struct maple_tree *mt)
{
#ifdef CONFIG_MAPLE_RCU_DISABLED
        return false;
#endif
        return mt->ma_flags & MT_FLAGS_USE_RCU;
}

/**
 * mt_clear_in_rcu() - Switch the tree to non-RCU mode.
 * @mt: The Maple Tree
 */
static inline void mt_clear_in_rcu(struct maple_tree *mt)
{
        if (!mt_in_rcu(mt))
                return;

        if (mt_external_lock(mt)) {
                WARN_ON(!mt_lock_is_held(mt));
                mt->ma_flags &= ~MT_FLAGS_USE_RCU;
        } else {
                mtree_lock(mt);
                mt->ma_flags &= ~MT_FLAGS_USE_RCU;
                mtree_unlock(mt);
        }
}

/**
 * mt_set_in_rcu() - Switch the tree to RCU safe mode.
 * @mt: The Maple Tree
 */
static inline void mt_set_in_rcu(struct maple_tree *mt)
{
        if (mt_in_rcu(mt))
                return;

        if (mt_external_lock(mt)) {
                WARN_ON(!mt_lock_is_held(mt));
                mt->ma_flags |= MT_FLAGS_USE_RCU;
        } else {
                mtree_lock(mt);
                mt->ma_flags |= MT_FLAGS_USE_RCU;
                mtree_unlock(mt);
        }
}

static inline unsigned int mt_height(const struct maple_tree *mt)
{
        return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
}

void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max);
void *mt_find_after(struct maple_tree *mt, unsigned long *index,
                    unsigned long max);
void *mt_prev(struct maple_tree *mt, unsigned long index,  unsigned long min);
void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);

/**
 * mt_for_each - Iterate over each entry starting at index until max.
 * @__tree: The Maple Tree
 * @__entry: The current entry
 * @__index: The index to start the search from. Subsequently used as iterator.
 * @__max: The maximum limit for @index
 *
 * This iterator skips all entries, which resolve to a NULL pointer,
 * e.g. entries which has been reserved with XA_ZERO_ENTRY.
 */
#define mt_for_each(__tree, __entry, __index, __max) \
        for (__entry = mt_find(__tree, &(__index), __max); \
                __entry; __entry = mt_find_after(__tree, &(__index), __max))

#endif /*_LINUX_MAPLE_TREE_H */















































































































































































































































































































































































































































    1 

























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * irq_domain - IRQ translation domains
 *
 * Translation infrastructure between hw and linux irq numbers.  This is
 * helpful for interrupt controllers to implement mapping between hardware
 * irq numbers and the Linux irq number space.
 *
 * irq_domains also have hooks for translating device tree or other
 * firmware interrupt representations into a hardware irq number that
 * can be mapped back to a Linux irq number without any extra platform
 * support code.
 *
 * Interrupt controller "domain" data structure. This could be defined as a
 * irq domain controller. That is, it handles the mapping between hardware
 * and virtual interrupt numbers for a given interrupt domain. The domain
 * structure is generally created by the PIC code for a given PIC instance
 * (though a domain can cover more than one PIC if they have a flat number
 * model). It's the domain callbacks that are responsible for setting the
 * irq_chip on a given irq_desc after it's been mapped.
 *
 * The host code and data structures use a fwnode_handle pointer to
 * identify the domain. In some cases, and in order to preserve source
 * code compatibility, this fwnode pointer is "upgraded" to a DT
 * device_node. For those firmware infrastructures that do not provide
 * a unique identifier for an interrupt controller, the irq_domain
 * code offers a fwnode allocator.
 */

#ifndef _LINUX_IRQDOMAIN_H
#define _LINUX_IRQDOMAIN_H

#include <linux/types.h>
#include <linux/irqdomain_defs.h>
#include <linux/irqhandler.h>
#include <linux/of.h>
#include <linux/mutex.h>
#include <linux/radix-tree.h>

struct device_node;
struct fwnode_handle;
struct irq_domain;
struct irq_chip;
struct irq_data;
struct irq_desc;
struct cpumask;
struct seq_file;
struct irq_affinity_desc;
struct msi_parent_ops;

#define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16

/**
 * struct irq_fwspec - generic IRQ specifier structure
 *
 * @fwnode:                Pointer to a firmware-specific descriptor
 * @param_count:        Number of device-specific parameters
 * @param:                Device-specific parameters
 *
 * This structure, directly modeled after of_phandle_args, is used to
 * pass a device-specific description of an interrupt.
 */
struct irq_fwspec {
        struct fwnode_handle *fwnode;
        int param_count;
        u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
};

/* Conversion function from of_phandle_args fields to fwspec  */
void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
                               unsigned int count, struct irq_fwspec *fwspec);

/**
 * struct irq_domain_ops - Methods for irq_domain objects
 * @match: Match an interrupt controller device node to a host, returns
 *         1 on a match
 * @map: Create or update a mapping between a virtual irq number and a hw
 *       irq number. This is called only once for a given mapping.
 * @unmap: Dispose of such a mapping
 * @xlate: Given a device tree node and interrupt specifier, decode
 *         the hardware irq number and linux irq type value.
 *
 * Functions below are provided by the driver and called whenever a new mapping
 * is created or an old mapping is disposed. The driver can then proceed to
 * whatever internal data structures management is required. It also needs
 * to setup the irq_desc when returning from map().
 */
struct irq_domain_ops {
        int (*match)(struct irq_domain *d, struct device_node *node,
                     enum irq_domain_bus_token bus_token);
        int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec,
                      enum irq_domain_bus_token bus_token);
        int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw);
        void (*unmap)(struct irq_domain *d, unsigned int virq);
        int (*xlate)(struct irq_domain *d, struct device_node *node,
                     const u32 *intspec, unsigned int intsize,
                     unsigned long *out_hwirq, unsigned int *out_type);
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
        /* extended V2 interfaces to support hierarchy irq_domains */
        int (*alloc)(struct irq_domain *d, unsigned int virq,
                     unsigned int nr_irqs, void *arg);
        void (*free)(struct irq_domain *d, unsigned int virq,
                     unsigned int nr_irqs);
        int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
        void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
        int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
                         unsigned long *out_hwirq, unsigned int *out_type);
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
        void (*debug_show)(struct seq_file *m, struct irq_domain *d,
                           struct irq_data *irqd, int ind);
#endif
};

extern const struct irq_domain_ops irq_generic_chip_ops;

struct irq_domain_chip_generic;

/**
 * struct irq_domain - Hardware interrupt number translation object
 * @link:        Element in global irq_domain list.
 * @name:        Name of interrupt domain
 * @ops:        Pointer to irq_domain methods
 * @host_data:        Private data pointer for use by owner.  Not touched by irq_domain
 *                core code.
 * @flags:        Per irq_domain flags
 * @mapcount:        The number of mapped interrupts
 * @mutex:        Domain lock, hierarchical domains use root domain's lock
 * @root:        Pointer to root domain, or containing structure if non-hierarchical
 *
 * Optional elements:
 * @fwnode:        Pointer to firmware node associated with the irq_domain. Pretty easy
 *                to swap it for the of_node via the irq_domain_get_of_node accessor
 * @gc:                Pointer to a list of generic chips. There is a helper function for
 *                setting up one or more generic chips for interrupt controllers
 *                drivers using the generic chip library which uses this pointer.
 * @dev:        Pointer to the device which instantiated the irqdomain
 *                With per device irq domains this is not necessarily the same
 *                as @pm_dev.
 * @pm_dev:        Pointer to a device that can be utilized for power management
 *                purposes related to the irq domain.
 * @parent:        Pointer to parent irq_domain to support hierarchy irq_domains
 * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init
 *
 * Revmap data, used internally by the irq domain code:
 * @revmap_size:        Size of the linear map table @revmap[]
 * @revmap_tree:        Radix map tree for hwirqs that don't fit in the linear map
 * @revmap:                Linear table of irq_data pointers
 */
struct irq_domain {
        struct list_head                link;
        const char                        *name;
        const struct irq_domain_ops        *ops;
        void                                *host_data;
        unsigned int                        flags;
        unsigned int                        mapcount;
        struct mutex                        mutex;
        struct irq_domain                *root;

        /* Optional data */
        struct fwnode_handle                *fwnode;
        enum irq_domain_bus_token        bus_token;
        struct irq_domain_chip_generic        *gc;
        struct device                        *dev;
        struct device                        *pm_dev;
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
        struct irq_domain                *parent;
#endif
#ifdef CONFIG_GENERIC_MSI_IRQ
        const struct msi_parent_ops        *msi_parent_ops;
#endif

        /* reverse map data. The linear map gets appended to the irq_domain */
        irq_hw_number_t                        hwirq_max;
        unsigned int                        revmap_size;
        struct radix_tree_root                revmap_tree;
        struct irq_data __rcu                *revmap[] __counted_by(revmap_size);
};

/* Irq domain flags */
enum {
        /* Irq domain is hierarchical */
        IRQ_DOMAIN_FLAG_HIERARCHY        = (1 << 0),

        /* Irq domain name was allocated in __irq_domain_add() */
        IRQ_DOMAIN_NAME_ALLOCATED        = (1 << 1),

        /* Irq domain is an IPI domain with virq per cpu */
        IRQ_DOMAIN_FLAG_IPI_PER_CPU        = (1 << 2),

        /* Irq domain is an IPI domain with single virq */
        IRQ_DOMAIN_FLAG_IPI_SINGLE        = (1 << 3),

        /* Irq domain implements MSIs */
        IRQ_DOMAIN_FLAG_MSI                = (1 << 4),

        /*
         * Irq domain implements isolated MSI, see msi_device_has_isolated_msi()
         */
        IRQ_DOMAIN_FLAG_ISOLATED_MSI        = (1 << 5),

        /* Irq domain doesn't translate anything */
        IRQ_DOMAIN_FLAG_NO_MAP                = (1 << 6),

        /* Irq domain is a MSI parent domain */
        IRQ_DOMAIN_FLAG_MSI_PARENT        = (1 << 8),

        /* Irq domain is a MSI device domain */
        IRQ_DOMAIN_FLAG_MSI_DEVICE        = (1 << 9),

        /*
         * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
         * for implementation specific purposes and ignored by the
         * core code.
         */
        IRQ_DOMAIN_FLAG_NONCORE                = (1 << 16),
};

static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
{
        return to_of_node(d->fwnode);
}

static inline void irq_domain_set_pm_device(struct irq_domain *d,
                                            struct device *dev)
{
        if (d)
                d->pm_dev = dev;
}

#ifdef CONFIG_IRQ_DOMAIN
struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
                                                const char *name, phys_addr_t *pa);

enum {
        IRQCHIP_FWNODE_REAL,
        IRQCHIP_FWNODE_NAMED,
        IRQCHIP_FWNODE_NAMED_ID,
};

static inline
struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
{
        return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL);
}

static inline
struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
{
        return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name,
                                         NULL);
}

static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
{
        return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa);
}

void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
                                    irq_hw_number_t hwirq_max, int direct_max,
                                    const struct irq_domain_ops *ops,
                                    void *host_data);
struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
                                            unsigned int size,
                                            unsigned int first_irq,
                                            const struct irq_domain_ops *ops,
                                            void *host_data);
struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
                                         unsigned int size,
                                         unsigned int first_irq,
                                         irq_hw_number_t first_hwirq,
                                         const struct irq_domain_ops *ops,
                                         void *host_data);
struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
                                            unsigned int size,
                                            unsigned int first_irq,
                                            irq_hw_number_t first_hwirq,
                                            const struct irq_domain_ops *ops,
                                            void *host_data);
extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
                                                   enum irq_domain_bus_token bus_token);
extern void irq_set_default_host(struct irq_domain *host);
extern struct irq_domain *irq_get_default_host(void);
extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
                                  irq_hw_number_t hwirq, int node,
                                  const struct irq_affinity_desc *affinity);

static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
{
        return node ? &node->fwnode : NULL;
}

extern const struct fwnode_operations irqchip_fwnode_ops;

static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
{
        return fwnode && fwnode->ops == &irqchip_fwnode_ops;
}

extern void irq_domain_update_bus_token(struct irq_domain *domain,
                                        enum irq_domain_bus_token bus_token);

static inline
struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
                                            enum irq_domain_bus_token bus_token)
{
        struct irq_fwspec fwspec = {
                .fwnode = fwnode,
        };

        return irq_find_matching_fwspec(&fwspec, bus_token);
}

static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
                                                        enum irq_domain_bus_token bus_token)
{
        return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token);
}

static inline struct irq_domain *irq_find_host(struct device_node *node)
{
        struct irq_domain *d;

        d = irq_find_matching_host(node, DOMAIN_BUS_WIRED);
        if (!d)
                d = irq_find_matching_host(node, DOMAIN_BUS_ANY);

        return d;
}

static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
                                                       unsigned int size,
                                                       unsigned int first_irq,
                                                       const struct irq_domain_ops *ops,
                                                       void *host_data)
{
        return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data);
}

/**
 * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
 * @of_node: pointer to interrupt controller's device tree node.
 * @size: Number of interrupts in the domain.
 * @ops: map/unmap domain callbacks
 * @host_data: Controller private data pointer
 */
static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
                                         unsigned int size,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
}

#ifdef CONFIG_IRQ_DOMAIN_NOMAP
static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
                                         unsigned int max_irq,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
}

extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
#endif

static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data);
}

static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
                                         unsigned int size,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return __irq_domain_add(fwnode, size, size, 0, ops, host_data);
}

static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
                                         const struct irq_domain_ops *ops,
                                         void *host_data)
{
        return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data);
}

extern void irq_domain_remove(struct irq_domain *host);

extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
                                        irq_hw_number_t hwirq);
extern void irq_domain_associate_many(struct irq_domain *domain,
                                      unsigned int irq_base,
                                      irq_hw_number_t hwirq_base, int count);

extern unsigned int irq_create_mapping_affinity(struct irq_domain *host,
                                      irq_hw_number_t hwirq,
                                      const struct irq_affinity_desc *affinity);
extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
extern void irq_dispose_mapping(unsigned int virq);

static inline unsigned int irq_create_mapping(struct irq_domain *host,
                                              irq_hw_number_t hwirq)
{
        return irq_create_mapping_affinity(host, hwirq, NULL);
}

extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
                                              irq_hw_number_t hwirq,
                                              unsigned int *irq);

static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
                                                   irq_hw_number_t hwirq)
{
        return __irq_resolve_mapping(domain, hwirq, NULL);
}

/**
 * irq_find_mapping() - Find a linux irq from a hw irq number.
 * @domain: domain owning this hardware interrupt
 * @hwirq: hardware irq number in that domain space
 */
static inline unsigned int irq_find_mapping(struct irq_domain *domain,
                                            irq_hw_number_t hwirq)
{
        unsigned int irq;

        if (__irq_resolve_mapping(domain, hwirq, &irq))
                return irq;

        return 0;
}

static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
                                             irq_hw_number_t hwirq)
{
        return irq_find_mapping(domain, hwirq);
}

extern const struct irq_domain_ops irq_domain_simple_ops;

/* stock xlate functions */
int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr,
                        const u32 *intspec, unsigned int intsize,
                        irq_hw_number_t *out_hwirq, unsigned int *out_type);
int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
                        const u32 *intspec, unsigned int intsize,
                        irq_hw_number_t *out_hwirq, unsigned int *out_type);
int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
                        const u32 *intspec, unsigned int intsize,
                        irq_hw_number_t *out_hwirq, unsigned int *out_type);

int irq_domain_translate_twocell(struct irq_domain *d,
                                 struct irq_fwspec *fwspec,
                                 unsigned long *out_hwirq,
                                 unsigned int *out_type);

int irq_domain_translate_onecell(struct irq_domain *d,
                                 struct irq_fwspec *fwspec,
                                 unsigned long *out_hwirq,
                                 unsigned int *out_type);

/* IPI functions */
int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);

/* V2 interfaces to support hierarchy IRQ domains. */
extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
                                                unsigned int virq);
extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
                                irq_hw_number_t hwirq,
                                const struct irq_chip *chip,
                                void *chip_data, irq_flow_handler_t handler,
                                void *handler_data, const char *handler_name);
extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
                        unsigned int flags, unsigned int size,
                        struct fwnode_handle *fwnode,
                        const struct irq_domain_ops *ops, void *host_data);

static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
                                            unsigned int flags,
                                            unsigned int size,
                                            struct device_node *node,
                                            const struct irq_domain_ops *ops,
                                            void *host_data)
{
        return irq_domain_create_hierarchy(parent, flags, size,
                                           of_node_to_fwnode(node),
                                           ops, host_data);
}

extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
                                   unsigned int nr_irqs, int node, void *arg,
                                   bool realloc,
                                   const struct irq_affinity_desc *affinity);
extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
extern void irq_domain_deactivate_irq(struct irq_data *irq_data);

static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
                        unsigned int nr_irqs, int node, void *arg)
{
        return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
                                       NULL);
}

extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
                                           unsigned int irq_base,
                                           unsigned int nr_irqs, void *arg);
extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
                                         unsigned int virq,
                                         irq_hw_number_t hwirq,
                                         const struct irq_chip *chip,
                                         void *chip_data);
extern void irq_domain_free_irqs_common(struct irq_domain *domain,
                                        unsigned int virq,
                                        unsigned int nr_irqs);
extern void irq_domain_free_irqs_top(struct irq_domain *domain,
                                     unsigned int virq, unsigned int nr_irqs);

extern int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg);
extern int irq_domain_pop_irq(struct irq_domain *domain, int virq);

extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
                                        unsigned int irq_base,
                                        unsigned int nr_irqs, void *arg);

extern void irq_domain_free_irqs_parent(struct irq_domain *domain,
                                        unsigned int irq_base,
                                        unsigned int nr_irqs);

extern int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
                                           unsigned int virq);

static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY;
}

static inline bool irq_domain_is_ipi(struct irq_domain *domain)
{
        return domain->flags &
                (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
}

static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU;
}

static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
}

static inline bool irq_domain_is_msi(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_MSI;
}

static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT;
}

static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
{
        return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE;
}

#else        /* CONFIG_IRQ_DOMAIN_HIERARCHY */
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
                        unsigned int nr_irqs, int node, void *arg)
{
        return -1;
}

static inline void irq_domain_free_irqs(unsigned int virq,
                                        unsigned int nr_irqs) { }

static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_ipi(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_msi(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
        return false;
}

static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
{
        return false;
}

#endif        /* CONFIG_IRQ_DOMAIN_HIERARCHY */

#ifdef CONFIG_GENERIC_MSI_IRQ
int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
                                  unsigned int type);
void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq);
#else
static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
                                                unsigned int type)
{
        WARN_ON_ONCE(1);
        return -EINVAL;
}
static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
{
        WARN_ON_ONCE(1);
}
#endif

#else /* CONFIG_IRQ_DOMAIN */
static inline void irq_dispose_mapping(unsigned int virq) { }
static inline struct irq_domain *irq_find_matching_fwnode(
        struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
{
        return NULL;
}
#endif /* !CONFIG_IRQ_DOMAIN */

#endif /* _LINUX_IRQDOMAIN_H */

























   12 

   73 




   72 












































   72 


   73 

    1 









   74 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_BIT_SPINLOCK_H
#define __LINUX_BIT_SPINLOCK_H

#include <linux/kernel.h>
#include <linux/preempt.h>
#include <linux/atomic.h>
#include <linux/bug.h>

/*
 *  bit-based spin_lock()
 *
 * Don't use this unless you really need to: spin_lock() and spin_unlock()
 * are significantly faster.
 */
static inline void bit_spin_lock(int bitnum, unsigned long *addr)
{
        /*
         * Assuming the lock is uncontended, this never enters
         * the body of the outer loop. If it is contended, then
         * within the inner loop a non-atomic test is used to
         * busywait with less bus contention for a good time to
         * attempt to acquire the lock bit.
         */
        preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                preempt_enable();
                do {
                        cpu_relax();
                } while (test_bit(bitnum, addr));
                preempt_disable();
        }
#endif
        __acquire(bitlock);
}

/*
 * Return true if it was acquired
 */
static inline int bit_spin_trylock(int bitnum, unsigned long *addr)
{
        preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        if (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                preempt_enable();
                return 0;
        }
#endif
        __acquire(bitlock);
        return 1;
}

/*
 *  bit-based spin_unlock()
 */
static inline void bit_spin_unlock(int bitnum, unsigned long *addr)
{
#ifdef CONFIG_DEBUG_SPINLOCK
        BUG_ON(!test_bit(bitnum, addr));
#endif
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        clear_bit_unlock(bitnum, addr);
#endif
        preempt_enable();
        __release(bitlock);
}

/*
 *  bit-based spin_unlock()
 *  non-atomic version, which can be used eg. if the bit lock itself is
 *  protecting the rest of the flags in the word.
 */
static inline void __bit_spin_unlock(int bitnum, unsigned long *addr)
{
#ifdef CONFIG_DEBUG_SPINLOCK
        BUG_ON(!test_bit(bitnum, addr));
#endif
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        __clear_bit_unlock(bitnum, addr);
#endif
        preempt_enable();
        __release(bitlock);
}

/*
 * Return true if the lock is held.
 */
static inline int bit_spin_is_locked(int bitnum, unsigned long *addr)
{
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        return test_bit(bitnum, addr);
#elif defined CONFIG_PREEMPT_COUNT
        return preempt_count();
#else
        return 1;
#endif
}

#endif /* __LINUX_BIT_SPINLOCK_H */




































































































































































































































































































































































































































































































































































































































































































































































    4 




    4 
































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _SCSI_SCSI_HOST_H
#define _SCSI_SCSI_HOST_H

#include <linux/device.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
#include <linux/blk-mq.h>
#include <scsi/scsi.h>

struct block_device;
struct completion;
struct module;
struct scsi_cmnd;
struct scsi_device;
struct scsi_target;
struct Scsi_Host;
struct scsi_transport_template;


#define SG_ALL        SG_CHUNK_SIZE

#define MODE_UNKNOWN 0x00
#define MODE_INITIATOR 0x01
#define MODE_TARGET 0x02

/**
 * enum scsi_timeout_action - How to handle a command that timed out.
 * @SCSI_EH_DONE: The command has already been completed.
 * @SCSI_EH_RESET_TIMER: Reset the timer and continue waiting for completion.
 * @SCSI_EH_NOT_HANDLED: The command has not yet finished. Abort the command.
 */
enum scsi_timeout_action {
        SCSI_EH_DONE,
        SCSI_EH_RESET_TIMER,
        SCSI_EH_NOT_HANDLED,
};

struct scsi_host_template {
        /*
         * Put fields referenced in IO submission path together in
         * same cacheline
         */

        /*
         * Additional per-command data allocated for the driver.
         */
        unsigned int cmd_size;

        /*
         * The queuecommand function is used to queue up a scsi
         * command block to the LLDD.  When the driver finished
         * processing the command the done callback is invoked.
         *
         * If queuecommand returns 0, then the driver has accepted the
         * command.  It must also push it to the HBA if the scsi_cmnd
         * flag SCMD_LAST is set, or if the driver does not implement
         * commit_rqs.  The done() function must be called on the command
         * when the driver has finished with it. (you may call done on the
         * command before queuecommand returns, but in this case you
         * *must* return 0 from queuecommand).
         *
         * Queuecommand may also reject the command, in which case it may
         * not touch the command and must not call done() for it.
         *
         * There are two possible rejection returns:
         *
         *   SCSI_MLQUEUE_DEVICE_BUSY: Block this device temporarily, but
         *   allow commands to other devices serviced by this host.
         *
         *   SCSI_MLQUEUE_HOST_BUSY: Block all devices served by this
         *   host temporarily.
         *
         * For compatibility, any other non-zero return is treated the
         * same as SCSI_MLQUEUE_HOST_BUSY.
         *
         * NOTE: "temporarily" means either until the next command for#
         * this device/host completes, or a period of time determined by
         * I/O pressure in the system if there are no other outstanding
         * commands.
         *
         * STATUS: REQUIRED
         */
        int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);

        /*
         * The commit_rqs function is used to trigger a hardware
         * doorbell after some requests have been queued with
         * queuecommand, when an error is encountered before sending
         * the request with SCMD_LAST set.
         *
         * STATUS: OPTIONAL
         */
        void (*commit_rqs)(struct Scsi_Host *, u16);

        struct module *module;
        const char *name;

        /*
         * The info function will return whatever useful information the
         * developer sees fit.  If not provided, then the name field will
         * be used instead.
         *
         * Status: OPTIONAL
         */
        const char *(*info)(struct Scsi_Host *);

        /*
         * Ioctl interface
         *
         * Status: OPTIONAL
         */
        int (*ioctl)(struct scsi_device *dev, unsigned int cmd,
                     void __user *arg);


#ifdef CONFIG_COMPAT
        /*
         * Compat handler. Handle 32bit ABI.
         * When unknown ioctl is passed return -ENOIOCTLCMD.
         *
         * Status: OPTIONAL
         */
        int (*compat_ioctl)(struct scsi_device *dev, unsigned int cmd,
                            void __user *arg);
#endif

        int (*init_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd);
        int (*exit_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd);

        /*
         * This is an error handling strategy routine.  You don't need to
         * define one of these if you don't want to - there is a default
         * routine that is present that should work in most cases.  For those
         * driver authors that have the inclination and ability to write their
         * own strategy routine, this is where it is specified.  Note - the
         * strategy routine is *ALWAYS* run in the context of the kernel eh
         * thread.  Thus you are guaranteed to *NOT* be in an interrupt
         * handler when you execute this, and you are also guaranteed to
         * *NOT* have any other commands being queued while you are in the
         * strategy routine. When you return from this function, operations
         * return to normal.
         *
         * See scsi_error.c scsi_unjam_host for additional comments about
         * what this function should and should not be attempting to do.
         *
         * Status: REQUIRED        (at least one of them)
         */
        int (* eh_abort_handler)(struct scsi_cmnd *);
        int (* eh_device_reset_handler)(struct scsi_cmnd *);
        int (* eh_target_reset_handler)(struct scsi_cmnd *);
        int (* eh_bus_reset_handler)(struct scsi_cmnd *);
        int (* eh_host_reset_handler)(struct scsi_cmnd *);

        /*
         * Before the mid layer attempts to scan for a new device where none
         * currently exists, it will call this entry in your driver.  Should
         * your driver need to allocate any structs or perform any other init
         * items in order to send commands to a currently unused target/lun
         * combo, then this is where you can perform those allocations.  This
         * is specifically so that drivers won't have to perform any kind of
         * "is this a new device" checks in their queuecommand routine,
         * thereby making the hot path a bit quicker.
         *
         * Return values: 0 on success, non-0 on failure
         *
         * Deallocation:  If we didn't find any devices at this ID, you will
         * get an immediate call to slave_destroy().  If we find something
         * here then you will get a call to slave_configure(), then the
         * device will be used for however long it is kept around, then when
         * the device is removed from the system (or * possibly at reboot
         * time), you will then get a call to slave_destroy().  This is
         * assuming you implement slave_configure and slave_destroy.
         * However, if you allocate memory and hang it off the device struct,
         * then you must implement the slave_destroy() routine at a minimum
         * in order to avoid leaking memory
         * each time a device is tore down.
         *
         * Status: OPTIONAL
         */
        int (* slave_alloc)(struct scsi_device *);

        /*
         * Once the device has responded to an INQUIRY and we know the
         * device is online, we call into the low level driver with the
         * struct scsi_device *.  If the low level device driver implements
         * this function, it *must* perform the task of setting the queue
         * depth on the device.  All other tasks are optional and depend
         * on what the driver supports and various implementation details.
         * 
         * Things currently recommended to be handled at this time include:
         *
         * 1.  Setting the device queue depth.  Proper setting of this is
         *     described in the comments for scsi_change_queue_depth.
         * 2.  Determining if the device supports the various synchronous
         *     negotiation protocols.  The device struct will already have
         *     responded to INQUIRY and the results of the standard items
         *     will have been shoved into the various device flag bits, eg.
         *     device->sdtr will be true if the device supports SDTR messages.
         * 3.  Allocating command structs that the device will need.
         * 4.  Setting the default timeout on this device (if needed).
         * 5.  Anything else the low level driver might want to do on a device
         *     specific setup basis...
         * 6.  Return 0 on success, non-0 on error.  The device will be marked
         *     as offline on error so that no access will occur.  If you return
         *     non-0, your slave_destroy routine will never get called for this
         *     device, so don't leave any loose memory hanging around, clean
         *     up after yourself before returning non-0
         *
         * Status: OPTIONAL
         */
        int (* slave_configure)(struct scsi_device *);

        /*
         * Immediately prior to deallocating the device and after all activity
         * has ceased the mid layer calls this point so that the low level
         * driver may completely detach itself from the scsi device and vice
         * versa.  The low level driver is responsible for freeing any memory
         * it allocated in the slave_alloc or slave_configure calls. 
         *
         * Status: OPTIONAL
         */
        void (* slave_destroy)(struct scsi_device *);

        /*
         * Before the mid layer attempts to scan for a new device attached
         * to a target where no target currently exists, it will call this
         * entry in your driver.  Should your driver need to allocate any
         * structs or perform any other init items in order to send commands
         * to a currently unused target, then this is where you can perform
         * those allocations.
         *
         * Return values: 0 on success, non-0 on failure
         *
         * Status: OPTIONAL
         */
        int (* target_alloc)(struct scsi_target *);

        /*
         * Immediately prior to deallocating the target structure, and
         * after all activity to attached scsi devices has ceased, the
         * midlayer calls this point so that the driver may deallocate
         * and terminate any references to the target.
         *
         * Note: This callback is called with the host lock held and hence
         * must not sleep.
         *
         * Status: OPTIONAL
         */
        void (* target_destroy)(struct scsi_target *);

        /*
         * If a host has the ability to discover targets on its own instead
         * of scanning the entire bus, it can fill in this function and
         * call scsi_scan_host().  This function will be called periodically
         * until it returns 1 with the scsi_host and the elapsed time of
         * the scan in jiffies.
         *
         * Status: OPTIONAL
         */
        int (* scan_finished)(struct Scsi_Host *, unsigned long);

        /*
         * If the host wants to be called before the scan starts, but
         * after the midlayer has set up ready for the scan, it can fill
         * in this function.
         *
         * Status: OPTIONAL
         */
        void (* scan_start)(struct Scsi_Host *);

        /*
         * Fill in this function to allow the queue depth of this host
         * to be changeable (on a per device basis).  Returns either
         * the current queue depth setting (may be different from what
         * was passed in) or an error.  An error should only be
         * returned if the requested depth is legal but the driver was
         * unable to set it.  If the requested depth is illegal, the
         * driver should set and return the closest legal queue depth.
         *
         * Status: OPTIONAL
         */
        int (* change_queue_depth)(struct scsi_device *, int);

        /*
         * This functions lets the driver expose the queue mapping
         * to the block layer.
         *
         * Status: OPTIONAL
         */
        void (* map_queues)(struct Scsi_Host *shost);

        /*
         * SCSI interface of blk_poll - poll for IO completions.
         * Only applicable if SCSI LLD exposes multiple h/w queues.
         *
         * Return value: Number of completed entries found.
         *
         * Status: OPTIONAL
         */
        int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num);

        /*
         * Check if scatterlists need to be padded for DMA draining.
         *
         * Status: OPTIONAL
         */
        bool (* dma_need_drain)(struct request *rq);

        /*
         * This function determines the BIOS parameters for a given
         * harddisk.  These tend to be numbers that are made up by
         * the host adapter.  Parameters:
         * size, device, list (heads, sectors, cylinders)
         *
         * Status: OPTIONAL
         */
        int (* bios_param)(struct scsi_device *, struct block_device *,
                        sector_t, int []);

        /*
         * This function is called when one or more partitions on the
         * device reach beyond the end of the device.
         *
         * Status: OPTIONAL
         */
        void (*unlock_native_capacity)(struct scsi_device *);

        /*
         * Can be used to export driver statistics and other infos to the
         * world outside the kernel ie. userspace and it also provides an
         * interface to feed the driver with information.
         *
         * Status: OBSOLETE
         */
        int (*show_info)(struct seq_file *, struct Scsi_Host *);
        int (*write_info)(struct Scsi_Host *, char *, int);

        /*
         * This is an optional routine that allows the transport to become
         * involved when a scsi io timer fires. The return value tells the
         * timer routine how to finish the io timeout handling.
         *
         * Status: OPTIONAL
         */
        enum scsi_timeout_action (*eh_timed_out)(struct scsi_cmnd *);
        /*
         * Optional routine that allows the transport to decide if a cmd
         * is retryable. Return true if the transport is in a state the
         * cmd should be retried on.
         */
        bool (*eh_should_retry_cmd)(struct scsi_cmnd *scmd);

        /* This is an optional routine that allows transport to initiate
         * LLD adapter or firmware reset using sysfs attribute.
         *
         * Return values: 0 on success, -ve value on failure.
         *
         * Status: OPTIONAL
         */

        int (*host_reset)(struct Scsi_Host *shost, int reset_type);
#define SCSI_ADAPTER_RESET        1
#define SCSI_FIRMWARE_RESET        2


        /*
         * Name of proc directory
         */
        const char *proc_name;

        /*
         * This determines if we will use a non-interrupt driven
         * or an interrupt driven scheme.  It is set to the maximum number
         * of simultaneous commands a single hw queue in HBA will accept.
         */
        int can_queue;

        /*
         * In many instances, especially where disconnect / reconnect are
         * supported, our host also has an ID on the SCSI bus.  If this is
         * the case, then it must be reserved.  Please set this_id to -1 if
         * your setup is in single initiator mode, and the host lacks an
         * ID.
         */
        int this_id;

        /*
         * This determines the degree to which the host adapter is capable
         * of scatter-gather.
         */
        unsigned short sg_tablesize;
        unsigned short sg_prot_tablesize;

        /*
         * Set this if the host adapter has limitations beside segment count.
         */
        unsigned int max_sectors;

        /*
         * Maximum size in bytes of a single segment.
         */
        unsigned int max_segment_size;

        /*
         * DMA scatter gather segment boundary limit. A segment crossing this
         * boundary will be split in two.
         */
        unsigned long dma_boundary;

        unsigned long virt_boundary_mask;

        /*
         * This specifies "machine infinity" for host templates which don't
         * limit the transfer size.  Note this limit represents an absolute
         * maximum, and may be over the transfer limits allowed for
         * individual devices (e.g. 256 for SCSI-1).
         */
#define SCSI_DEFAULT_MAX_SECTORS        1024

        /*
         * True if this host adapter can make good use of linked commands.
         * This will allow more than one command to be queued to a given
         * unit on a given host.  Set this to the maximum number of command
         * blocks to be provided for each device.  Set this to 1 for one
         * command block per lun, 2 for two, etc.  Do not set this to 0.
         * You should make sure that the host adapter will do the right thing
         * before you try setting this above 1.
         */
        short cmd_per_lun;

        /* If use block layer to manage tags, this is tag allocation policy */
        int tag_alloc_policy;

        /*
         * Track QUEUE_FULL events and reduce queue depth on demand.
         */
        unsigned track_queue_depth:1;

        /*
         * This specifies the mode that a LLD supports.
         */
        unsigned supported_mode:2;

        /*
         * True for emulated SCSI host adapters (e.g. ATAPI).
         */
        unsigned emulated:1;

        /*
         * True if the low-level driver performs its own reset-settle delays.
         */
        unsigned skip_settle_delay:1;

        /* True if the controller does not support WRITE SAME */
        unsigned no_write_same:1;

        /* True if the host uses host-wide tagspace */
        unsigned host_tagset:1;

        /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
        unsigned queuecommand_may_block:1;

        /*
         * Countdown for host blocking with no commands outstanding.
         */
        unsigned int max_host_blocked;

        /*
         * Default value for the blocking.  If the queue is empty,
         * host_blocked counts down in the request_fn until it restarts
         * host operations as zero is reached.  
         *
         * FIXME: This should probably be a value in the template
         */
#define SCSI_DEFAULT_HOST_BLOCKED        7

        /*
         * Pointer to the SCSI host sysfs attribute groups, NULL terminated.
         */
        const struct attribute_group **shost_groups;

        /*
         * Pointer to the SCSI device attribute groups for this host,
         * NULL terminated.
         */
        const struct attribute_group **sdev_groups;

        /*
         * Vendor Identifier associated with the host
         *
         * Note: When specifying vendor_id, be sure to read the
         *   Vendor Type and ID formatting requirements specified in
         *   scsi_netlink.h
         */
        u64 vendor_id;
};

/*
 * Temporary #define for host lock push down. Can be removed when all
 * drivers have been updated to take advantage of unlocked
 * queuecommand.
 *
 */
#define DEF_SCSI_QCMD(func_name) \
        int func_name(struct Scsi_Host *shost, struct scsi_cmnd *cmd)        \
        {                                                                \
                unsigned long irq_flags;                                \
                int rc;                                                        \
                spin_lock_irqsave(shost->host_lock, irq_flags);                \
                rc = func_name##_lck(cmd);                                \
                spin_unlock_irqrestore(shost->host_lock, irq_flags);        \
                return rc;                                                \
        }


/*
 * shost state: If you alter this, you also need to alter scsi_sysfs.c
 * (for the ascii descriptions) and the state model enforcer:
 * scsi_host_set_state()
 */
enum scsi_host_state {
        SHOST_CREATED = 1,
        SHOST_RUNNING,
        SHOST_CANCEL,
        SHOST_DEL,
        SHOST_RECOVERY,
        SHOST_CANCEL_RECOVERY,
        SHOST_DEL_RECOVERY,
};

struct Scsi_Host {
        /*
         * __devices is protected by the host_lock, but you should
         * usually use scsi_device_lookup / shost_for_each_device
         * to access it and don't care about locking yourself.
         * In the rare case of being in irq context you can use
         * their __ prefixed variants with the lock held. NEVER
         * access this list directly from a driver.
         */
        struct list_head        __devices;
        struct list_head        __targets;
        
        struct list_head        starved_list;

        spinlock_t                default_lock;
        spinlock_t                *host_lock;

        struct mutex                scan_mutex;/* serialize scanning activity */

        struct list_head        eh_abort_list;
        struct list_head        eh_cmd_q;
        struct task_struct    * ehandler;  /* Error recovery thread. */
        struct completion     * eh_action; /* Wait for specific actions on the
                                              host. */
        wait_queue_head_t       host_wait;
        const struct scsi_host_template *hostt;
        struct scsi_transport_template *transportt;

        struct kref                tagset_refcnt;
        struct completion        tagset_freed;
        /* Area to keep a shared tag map */
        struct blk_mq_tag_set        tag_set;

        atomic_t host_blocked;

        unsigned int host_failed;           /* commands that failed.
                                              protected by host_lock */
        unsigned int host_eh_scheduled;    /* EH scheduled without command */
    
        unsigned int host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */

        /* next two fields are used to bound the time spent in error handling */
        int eh_deadline;
        unsigned long last_reset;


        /*
         * These three parameters can be used to allow for wide scsi,
         * and for host adapters that support multiple busses
         * The last two should be set to 1 more than the actual max id
         * or lun (e.g. 8 for SCSI parallel systems).
         */
        unsigned int max_channel;
        unsigned int max_id;
        u64 max_lun;

        /*
         * This is a unique identifier that must be assigned so that we
         * have some way of identifying each detected host adapter properly
         * and uniquely.  For hosts that do not support more than one card
         * in the system at one time, this does not need to be set.  It is
         * initialized to 0 in scsi_register.
         */
        unsigned int unique_id;

        /*
         * The maximum length of SCSI commands that this host can accept.
         * Probably 12 for most host adapters, but could be 16 for others.
         * or 260 if the driver supports variable length cdbs.
         * For drivers that don't set this field, a value of 12 is
         * assumed.
         */
        unsigned short max_cmd_len;

        int this_id;
        int can_queue;
        short cmd_per_lun;
        short unsigned int sg_tablesize;
        short unsigned int sg_prot_tablesize;
        unsigned int max_sectors;
        unsigned int opt_sectors;
        unsigned int max_segment_size;
        unsigned long dma_boundary;
        unsigned long virt_boundary_mask;
        /*
         * In scsi-mq mode, the number of hardware queues supported by the LLD.
         *
         * Note: it is assumed that each hardware queue has a queue depth of
         * can_queue. In other words, the total queue depth per host
         * is nr_hw_queues * can_queue. However, for when host_tagset is set,
         * the total queue depth is can_queue.
         */
        unsigned nr_hw_queues;
        unsigned nr_maps;
        unsigned active_mode:2;

        /*
         * Host has requested that no further requests come through for the
         * time being.
         */
        unsigned host_self_blocked:1;
    
        /*
         * Host uses correct SCSI ordering not PC ordering. The bit is
         * set for the minority of drivers whose authors actually read
         * the spec ;).
         */
        unsigned reverse_ordering:1;

        /* Task mgmt function in progress */
        unsigned tmf_in_progress:1;

        /* Asynchronous scan in progress */
        unsigned async_scan:1;

        /* Don't resume host in EH */
        unsigned eh_noresume:1;

        /* The controller does not support WRITE SAME */
        unsigned no_write_same:1;

        /* True if the host uses host-wide tagspace */
        unsigned host_tagset:1;

        /* The queuecommand callback may block. See also BLK_MQ_F_BLOCKING. */
        unsigned queuecommand_may_block:1;

        /* Host responded with short (<36 bytes) INQUIRY result */
        unsigned short_inquiry:1;

        /* The transport requires the LUN bits NOT to be stored in CDB[1] */
        unsigned no_scsi2_lun_in_cdb:1;

        /*
         * Optional work queue to be utilized by the transport
         */
        char work_q_name[20];
        struct workqueue_struct *work_q;

        /*
         * Task management function work queue
         */
        struct workqueue_struct *tmf_work_q;

        /*
         * Value host_blocked counts down from
         */
        unsigned int max_host_blocked;

        /* Protection Information */
        unsigned int prot_capabilities;
        unsigned char prot_guard_type;

        /* legacy crap */
        unsigned long base;
        unsigned long io_port;
        unsigned char n_io_port;
        unsigned char dma_channel;
        unsigned int  irq;
        

        enum scsi_host_state shost_state;

        /* ldm bits */
        struct device                shost_gendev, shost_dev;

        /*
         * Points to the transport data (if any) which is allocated
         * separately
         */
        void *shost_data;

        /*
         * Points to the physical bus device we'd use to do DMA
         * Needed just in case we have virtual hosts.
         */
        struct device *dma_dev;

        /* Delay for runtime autosuspend */
        int rpm_autosuspend_delay;

        /*
         * We should ensure that this is aligned, both for better performance
         * and also because some compilers (m68k) don't automatically force
         * alignment to a long boundary.
         */
        unsigned long hostdata[]  /* Used for storage of host specific stuff */
                __attribute__ ((aligned (sizeof(unsigned long))));
};

#define                class_to_shost(d)        \
        container_of(d, struct Scsi_Host, shost_dev)

#define shost_printk(prefix, shost, fmt, a...)        \
        dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a)

static inline void *shost_priv(struct Scsi_Host *shost)
{
        return (void *)shost->hostdata;
}

int scsi_is_host_device(const struct device *);

static inline struct Scsi_Host *dev_to_shost(struct device *dev)
{
        while (!scsi_is_host_device(dev)) {
                if (!dev->parent)
                        return NULL;
                dev = dev->parent;
        }
        return container_of(dev, struct Scsi_Host, shost_gendev);
}

static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
{
        return shost->shost_state == SHOST_RECOVERY ||
                shost->shost_state == SHOST_CANCEL_RECOVERY ||
                shost->shost_state == SHOST_DEL_RECOVERY ||
                shost->tmf_in_progress;
}

extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
extern void scsi_flush_work(struct Scsi_Host *);

extern struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *, int);
extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
                                               struct device *,
                                               struct device *);
#if defined(CONFIG_SCSI_PROC_FS)
struct proc_dir_entry *
scsi_template_proc_dir(const struct scsi_host_template *sht);
#else
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
extern int scsi_host_busy(struct Scsi_Host *shost);
extern void scsi_host_put(struct Scsi_Host *t);
extern struct Scsi_Host *scsi_host_lookup(unsigned int hostnum);
extern const char *scsi_host_state_name(enum scsi_host_state);
extern void scsi_host_complete_all_commands(struct Scsi_Host *shost,
                                            enum scsi_host_status status);

static inline int __must_check scsi_add_host(struct Scsi_Host *host,
                                             struct device *dev)
{
        return scsi_add_host_with_dma(host, dev, dev);
}

static inline struct device *scsi_get_device(struct Scsi_Host *shost)
{
        return shost->shost_gendev.parent;
}

/**
 * scsi_host_scan_allowed - Is scanning of this host allowed
 * @shost:        Pointer to Scsi_Host.
 **/
static inline int scsi_host_scan_allowed(struct Scsi_Host *shost)
{
        return shost->shost_state == SHOST_RUNNING ||
               shost->shost_state == SHOST_RECOVERY;
}

extern void scsi_unblock_requests(struct Scsi_Host *);
extern void scsi_block_requests(struct Scsi_Host *);
extern int scsi_host_block(struct Scsi_Host *shost);
extern int scsi_host_unblock(struct Scsi_Host *shost, int new_state);

void scsi_host_busy_iter(struct Scsi_Host *,
                         bool (*fn)(struct scsi_cmnd *, void *), void *priv);

struct class_container;

/*
 * DIF defines the exchange of protection information between
 * initiator and SBC block device.
 *
 * DIX defines the exchange of protection information between OS and
 * initiator.
 */
enum scsi_host_prot_capabilities {
        SHOST_DIF_TYPE1_PROTECTION = 1 << 0, /* T10 DIF Type 1 */
        SHOST_DIF_TYPE2_PROTECTION = 1 << 1, /* T10 DIF Type 2 */
        SHOST_DIF_TYPE3_PROTECTION = 1 << 2, /* T10 DIF Type 3 */

        SHOST_DIX_TYPE0_PROTECTION = 1 << 3, /* DIX between OS and HBA only */
        SHOST_DIX_TYPE1_PROTECTION = 1 << 4, /* DIX with DIF Type 1 */
        SHOST_DIX_TYPE2_PROTECTION = 1 << 5, /* DIX with DIF Type 2 */
        SHOST_DIX_TYPE3_PROTECTION = 1 << 6, /* DIX with DIF Type 3 */
};

/*
 * SCSI hosts which support the Data Integrity Extensions must
 * indicate their capabilities by setting the prot_capabilities using
 * this call.
 */
static inline void scsi_host_set_prot(struct Scsi_Host *shost, unsigned int mask)
{
        shost->prot_capabilities = mask;
}

static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost)
{
        return shost->prot_capabilities;
}

static inline int scsi_host_prot_dma(struct Scsi_Host *shost)
{
        return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION;
}

static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type)
{
        static unsigned char cap[] = { 0,
                                       SHOST_DIF_TYPE1_PROTECTION,
                                       SHOST_DIF_TYPE2_PROTECTION,
                                       SHOST_DIF_TYPE3_PROTECTION };

        if (target_type >= ARRAY_SIZE(cap))
                return 0;

        return shost->prot_capabilities & cap[target_type] ? target_type : 0;
}

static inline unsigned int scsi_host_dix_capable(struct Scsi_Host *shost, unsigned int target_type)
{
#if defined(CONFIG_BLK_DEV_INTEGRITY)
        static unsigned char cap[] = { SHOST_DIX_TYPE0_PROTECTION,
                                       SHOST_DIX_TYPE1_PROTECTION,
                                       SHOST_DIX_TYPE2_PROTECTION,
                                       SHOST_DIX_TYPE3_PROTECTION };

        if (target_type >= ARRAY_SIZE(cap))
                return 0;

        return shost->prot_capabilities & cap[target_type];
#endif
        return 0;
}

/*
 * All DIX-capable initiators must support the T10-mandated CRC
 * checksum.  Controllers can optionally implement the IP checksum
 * scheme which has much lower impact on system performance.  Note
 * that the main rationale for the checksum is to match integrity
 * metadata with data.  Detecting bit errors are a job for ECC memory
 * and buses.
 */

enum scsi_host_guard_type {
        SHOST_DIX_GUARD_CRC = 1 << 0,
        SHOST_DIX_GUARD_IP  = 1 << 1,
};

static inline void scsi_host_set_guard(struct Scsi_Host *shost, unsigned char type)
{
        shost->prot_guard_type = type;
}

static inline unsigned char scsi_host_get_guard(struct Scsi_Host *shost)
{
        return shost->prot_guard_type;
}

extern int scsi_host_set_state(struct Scsi_Host *, enum scsi_host_state);

#endif /* _SCSI_SCSI_HOST_H */









































































































































































































































































































































   47 
   47 
    2 



   47 



   44 
   44 
   44 




































    3 







    3 




    3 

























    1 




    1 



    2 











































































    3 


















    2 



















    2 

































































   47 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
// SPDX-License-Identifier: GPL-2.0-only
/*
 * HID raw devices, giving access to raw HID events.
 *
 * In comparison to hiddev, this device does not process the
 * hid events at all (no parsing, no lookups). This lets applications
 * to work on raw hid events as they want to, and avoids a need to
 * use a transport-specific userspace libhid/libusb libraries.
 *
 *  Copyright (c) 2007-2014 Jiri Kosina
 */


#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/fs.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cdev.h>
#include <linux/poll.h>
#include <linux/device.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/hid.h>
#include <linux/mutex.h>
#include <linux/sched/signal.h>
#include <linux/string.h>

#include <linux/hidraw.h>

static int hidraw_major;
static struct cdev hidraw_cdev;
static const struct class hidraw_class = {
        .name = "hidraw",
};
static struct hidraw *hidraw_table[HIDRAW_MAX_DEVICES];
static DECLARE_RWSEM(minors_rwsem);

static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
        struct hidraw_list *list = file->private_data;
        int ret = 0, len;
        DECLARE_WAITQUEUE(wait, current);

        mutex_lock(&list->read_mutex);

        while (ret == 0) {
                if (list->head == list->tail) {
                        add_wait_queue(&list->hidraw->wait, &wait);
                        set_current_state(TASK_INTERRUPTIBLE);

                        while (list->head == list->tail) {
                                if (signal_pending(current)) {
                                        ret = -ERESTARTSYS;
                                        break;
                                }
                                if (!list->hidraw->exist) {
                                        ret = -EIO;
                                        break;
                                }
                                if (file->f_flags & O_NONBLOCK) {
                                        ret = -EAGAIN;
                                        break;
                                }

                                /* allow O_NONBLOCK to work well from other threads */
                                mutex_unlock(&list->read_mutex);
                                schedule();
                                mutex_lock(&list->read_mutex);
                                set_current_state(TASK_INTERRUPTIBLE);
                        }

                        set_current_state(TASK_RUNNING);
                        remove_wait_queue(&list->hidraw->wait, &wait);
                }

                if (ret)
                        goto out;

                len = list->buffer[list->tail].len > count ?
                        count : list->buffer[list->tail].len;

                if (list->buffer[list->tail].value) {
                        if (copy_to_user(buffer, list->buffer[list->tail].value, len)) {
                                ret = -EFAULT;
                                goto out;
                        }
                        ret = len;
                }

                kfree(list->buffer[list->tail].value);
                list->buffer[list->tail].value = NULL;
                list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1);
        }
out:
        mutex_unlock(&list->read_mutex);
        return ret;
}

/*
 * The first byte of the report buffer is expected to be a report number.
 */
static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, size_t count, unsigned char report_type)
{
        unsigned int minor = iminor(file_inode(file));
        struct hid_device *dev;
        __u8 *buf;
        int ret = 0;

        lockdep_assert_held(&minors_rwsem);

        if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
                ret = -ENODEV;
                goto out;
        }

        dev = hidraw_table[minor]->hid;

        if (count > HID_MAX_BUFFER_SIZE) {
                hid_warn(dev, "pid %d passed too large report\n",
                         task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }

        if (count < 2) {
                hid_warn(dev, "pid %d passed too short report\n",
                         task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }

        buf = memdup_user(buffer, count);
        if (IS_ERR(buf)) {
                ret = PTR_ERR(buf);
                goto out;
        }

        if ((report_type == HID_OUTPUT_REPORT) &&
            !(dev->quirks & HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP)) {
                ret = hid_hw_output_report(dev, buf, count);
                /*
                 * compatibility with old implementation of USB-HID and I2C-HID:
                 * if the device does not support receiving output reports,
                 * on an interrupt endpoint, fallback to SET_REPORT HID command.
                 */
                if (ret != -ENOSYS)
                        goto out_free;
        }

        ret = hid_hw_raw_request(dev, buf[0], buf, count, report_type,
                                HID_REQ_SET_REPORT);

out_free:
        kfree(buf);
out:
        return ret;
}

static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
        ssize_t ret;
        down_read(&minors_rwsem);
        ret = hidraw_send_report(file, buffer, count, HID_OUTPUT_REPORT);
        up_read(&minors_rwsem);
        return ret;
}


/*
 * This function performs a Get_Report transfer over the control endpoint
 * per section 7.2.1 of the HID specification, version 1.1.  The first byte
 * of buffer is the report number to request, or 0x0 if the device does not
 * use numbered reports. The report_type parameter can be HID_FEATURE_REPORT
 * or HID_INPUT_REPORT.
 */
static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t count, unsigned char report_type)
{
        unsigned int minor = iminor(file_inode(file));
        struct hid_device *dev;
        __u8 *buf;
        int ret = 0, len;
        unsigned char report_number;

        lockdep_assert_held(&minors_rwsem);

        if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
                ret = -ENODEV;
                goto out;
        }

        dev = hidraw_table[minor]->hid;

        if (!dev->ll_driver->raw_request) {
                ret = -ENODEV;
                goto out;
        }

        if (count > HID_MAX_BUFFER_SIZE) {
                hid_warn(dev, "pid %d passed too large report\n",
                        task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }

        if (count < 2) {
                hid_warn(dev, "pid %d passed too short report\n",
                        task_pid_nr(current));
                ret = -EINVAL;
                goto out;
        }

        buf = kmalloc(count, GFP_KERNEL);
        if (!buf) {
                ret = -ENOMEM;
                goto out;
        }

        /*
         * Read the first byte from the user. This is the report number,
         * which is passed to hid_hw_raw_request().
         */
        if (copy_from_user(&report_number, buffer, 1)) {
                ret = -EFAULT;
                goto out_free;
        }

        ret = hid_hw_raw_request(dev, report_number, buf, count, report_type,
                                 HID_REQ_GET_REPORT);

        if (ret < 0)
                goto out_free;

        len = (ret < count) ? ret : count;

        if (copy_to_user(buffer, buf, len)) {
                ret = -EFAULT;
                goto out_free;
        }

        ret = len;

out_free:
        kfree(buf);
out:
        return ret;
}

static __poll_t hidraw_poll(struct file *file, poll_table *wait)
{
        struct hidraw_list *list = file->private_data;
        __poll_t mask = EPOLLOUT | EPOLLWRNORM; /* hidraw is always writable */

        poll_wait(file, &list->hidraw->wait, wait);
        if (list->head != list->tail)
                mask |= EPOLLIN | EPOLLRDNORM;
        if (!list->hidraw->exist)
                mask |= EPOLLERR | EPOLLHUP;
        return mask;
}

static int hidraw_open(struct inode *inode, struct file *file)
{
        unsigned int minor = iminor(inode);
        struct hidraw *dev;
        struct hidraw_list *list;
        unsigned long flags;
        int err = 0;

        if (!(list = kzalloc(sizeof(struct hidraw_list), GFP_KERNEL))) {
                err = -ENOMEM;
                goto out;
        }

        /*
         * Technically not writing to the hidraw_table but a write lock is
         * required to protect the device refcount. This is symmetrical to
         * hidraw_release().
         */
        down_write(&minors_rwsem);
        if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
                err = -ENODEV;
                goto out_unlock;
        }

        dev = hidraw_table[minor];
        if (!dev->open++) {
                err = hid_hw_power(dev->hid, PM_HINT_FULLON);
                if (err < 0) {
                        dev->open--;
                        goto out_unlock;
                }

                err = hid_hw_open(dev->hid);
                if (err < 0) {
                        hid_hw_power(dev->hid, PM_HINT_NORMAL);
                        dev->open--;
                        goto out_unlock;
                }
        }

        list->hidraw = hidraw_table[minor];
        mutex_init(&list->read_mutex);
        spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags);
        list_add_tail(&list->node, &hidraw_table[minor]->list);
        spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags);
        file->private_data = list;
out_unlock:
        up_write(&minors_rwsem);
out:
        if (err < 0)
                kfree(list);
        return err;

}

static int hidraw_fasync(int fd, struct file *file, int on)
{
        struct hidraw_list *list = file->private_data;

        return fasync_helper(fd, file, on, &list->fasync);
}

static void drop_ref(struct hidraw *hidraw, int exists_bit)
{
        if (exists_bit) {
                hidraw->exist = 0;
                if (hidraw->open) {
                        hid_hw_close(hidraw->hid);
                        wake_up_interruptible(&hidraw->wait);
                }
                device_destroy(&hidraw_class,
                               MKDEV(hidraw_major, hidraw->minor));
        } else {
                --hidraw->open;
        }
        if (!hidraw->open) {
                if (!hidraw->exist) {
                        hidraw_table[hidraw->minor] = NULL;
                        kfree(hidraw);
                } else {
                        /* close device for last reader */
                        hid_hw_close(hidraw->hid);
                        hid_hw_power(hidraw->hid, PM_HINT_NORMAL);
                }
        }
}

static int hidraw_release(struct inode * inode, struct file * file)
{
        unsigned int minor = iminor(inode);
        struct hidraw_list *list = file->private_data;
        unsigned long flags;

        down_write(&minors_rwsem);

        spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags);
        while (list->tail != list->head) {
                kfree(list->buffer[list->tail].value);
                list->buffer[list->tail].value = NULL;
                list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1);
        }
        list_del(&list->node);
        spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags);
        kfree(list);

        drop_ref(hidraw_table[minor], 0);

        up_write(&minors_rwsem);
        return 0;
}

static long hidraw_ioctl(struct file *file, unsigned int cmd,
                                                        unsigned long arg)
{
        struct inode *inode = file_inode(file);
        unsigned int minor = iminor(inode);
        long ret = 0;
        struct hidraw *dev;
        void __user *user_arg = (void __user*) arg;

        down_read(&minors_rwsem);
        dev = hidraw_table[minor];
        if (!dev || !dev->exist) {
                ret = -ENODEV;
                goto out;
        }

        switch (cmd) {
                case HIDIOCGRDESCSIZE:
                        if (put_user(dev->hid->rsize, (int __user *)arg))
                                ret = -EFAULT;
                        break;

                case HIDIOCGRDESC:
                        {
                                __u32 len;

                                if (get_user(len, (int __user *)arg))
                                        ret = -EFAULT;
                                else if (len > HID_MAX_DESCRIPTOR_SIZE - 1)
                                        ret = -EINVAL;
                                else if (copy_to_user(user_arg + offsetof(
                                        struct hidraw_report_descriptor,
                                        value[0]),
                                        dev->hid->rdesc,
                                        min(dev->hid->rsize, len)))
                                        ret = -EFAULT;
                                break;
                        }
                case HIDIOCGRAWINFO:
                        {
                                struct hidraw_devinfo dinfo;

                                dinfo.bustype = dev->hid->bus;
                                dinfo.vendor = dev->hid->vendor;
                                dinfo.product = dev->hid->product;
                                if (copy_to_user(user_arg, &dinfo, sizeof(dinfo)))
                                        ret = -EFAULT;
                                break;
                        }
                default:
                        {
                                struct hid_device *hid = dev->hid;
                                if (_IOC_TYPE(cmd) != 'H') {
                                        ret = -EINVAL;
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSFEATURE(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_send_report(file, user_arg, len, HID_FEATURE_REPORT);
                                        break;
                                }
                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGFEATURE(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_get_report(file, user_arg, len, HID_FEATURE_REPORT);
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSINPUT(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_send_report(file, user_arg, len, HID_INPUT_REPORT);
                                        break;
                                }
                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGINPUT(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_get_report(file, user_arg, len, HID_INPUT_REPORT);
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCSOUTPUT(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_send_report(file, user_arg, len, HID_OUTPUT_REPORT);
                                        break;
                                }
                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGOUTPUT(0))) {
                                        int len = _IOC_SIZE(cmd);
                                        ret = hidraw_get_report(file, user_arg, len, HID_OUTPUT_REPORT);
                                        break;
                                }

                                /* Begin Read-only ioctls. */
                                if (_IOC_DIR(cmd) != _IOC_READ) {
                                        ret = -EINVAL;
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) {
                                        int len = strlen(hid->name) + 1;
                                        if (len > _IOC_SIZE(cmd))
                                                len = _IOC_SIZE(cmd);
                                        ret = copy_to_user(user_arg, hid->name, len) ?
                                                -EFAULT : len;
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) {
                                        int len = strlen(hid->phys) + 1;
                                        if (len > _IOC_SIZE(cmd))
                                                len = _IOC_SIZE(cmd);
                                        ret = copy_to_user(user_arg, hid->phys, len) ?
                                                -EFAULT : len;
                                        break;
                                }

                                if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWUNIQ(0))) {
                                        int len = strlen(hid->uniq) + 1;
                                        if (len > _IOC_SIZE(cmd))
                                                len = _IOC_SIZE(cmd);
                                        ret = copy_to_user(user_arg, hid->uniq, len) ?
                                                -EFAULT : len;
                                        break;
                                }
                        }

                ret = -ENOTTY;
        }
out:
        up_read(&minors_rwsem);
        return ret;
}

static const struct file_operations hidraw_ops = {
        .owner =        THIS_MODULE,
        .read =         hidraw_read,
        .write =        hidraw_write,
        .poll =         hidraw_poll,
        .open =         hidraw_open,
        .release =      hidraw_release,
        .unlocked_ioctl = hidraw_ioctl,
        .fasync =        hidraw_fasync,
        .compat_ioctl   = compat_ptr_ioctl,
        .llseek =        noop_llseek,
};

int hidraw_report_event(struct hid_device *hid, u8 *data, int len)
{
        struct hidraw *dev = hid->hidraw;
        struct hidraw_list *list;
        int ret = 0;
        unsigned long flags;

        spin_lock_irqsave(&dev->list_lock, flags);
        list_for_each_entry(list, &dev->list, node) {
                int new_head = (list->head + 1) & (HIDRAW_BUFFER_SIZE - 1);

                if (new_head == list->tail)
                        continue;

                if (!(list->buffer[list->head].value = kmemdup(data, len, GFP_ATOMIC))) {
                        ret = -ENOMEM;
                        break;
                }
                list->buffer[list->head].len = len;
                list->head = new_head;
                kill_fasync(&list->fasync, SIGIO, POLL_IN);
        }
        spin_unlock_irqrestore(&dev->list_lock, flags);

        wake_up_interruptible(&dev->wait);
        return ret;
}
EXPORT_SYMBOL_GPL(hidraw_report_event);

int hidraw_connect(struct hid_device *hid)
{
        int minor, result;
        struct hidraw *dev;

        /* we accept any HID device, all applications */

        dev = kzalloc(sizeof(struct hidraw), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;

        result = -EINVAL;

        down_write(&minors_rwsem);

        for (minor = 0; minor < HIDRAW_MAX_DEVICES; minor++) {
                if (hidraw_table[minor])
                        continue;
                hidraw_table[minor] = dev;
                result = 0;
                break;
        }

        if (result) {
                up_write(&minors_rwsem);
                kfree(dev);
                goto out;
        }

        dev->dev = device_create(&hidraw_class, &hid->dev, MKDEV(hidraw_major, minor),
                                 NULL, "%s%d", "hidraw", minor);

        if (IS_ERR(dev->dev)) {
                hidraw_table[minor] = NULL;
                up_write(&minors_rwsem);
                result = PTR_ERR(dev->dev);
                kfree(dev);
                goto out;
        }

        init_waitqueue_head(&dev->wait);
        spin_lock_init(&dev->list_lock);
        INIT_LIST_HEAD(&dev->list);

        dev->hid = hid;
        dev->minor = minor;

        dev->exist = 1;
        hid->hidraw = dev;

        up_write(&minors_rwsem);
out:
        return result;

}
EXPORT_SYMBOL_GPL(hidraw_connect);

void hidraw_disconnect(struct hid_device *hid)
{
        struct hidraw *hidraw = hid->hidraw;

        down_write(&minors_rwsem);

        drop_ref(hidraw, 1);

        up_write(&minors_rwsem);
}
EXPORT_SYMBOL_GPL(hidraw_disconnect);

int __init hidraw_init(void)
{
        int result;
        dev_t dev_id;

        result = alloc_chrdev_region(&dev_id, HIDRAW_FIRST_MINOR,
                        HIDRAW_MAX_DEVICES, "hidraw");
        if (result < 0) {
                pr_warn("can't get major number\n");
                goto out;
        }

        hidraw_major = MAJOR(dev_id);

        result = class_register(&hidraw_class);
        if (result)
                goto error_cdev;

        cdev_init(&hidraw_cdev, &hidraw_ops);
        result = cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES);
        if (result < 0)
                goto error_class;

        pr_info("raw HID events driver (C) Jiri Kosina\n");
out:
        return result;

error_class:
        class_unregister(&hidraw_class);
error_cdev:
        unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES);
        goto out;
}

void hidraw_exit(void)
{
        dev_t dev_id = MKDEV(hidraw_major, 0);

        cdev_del(&hidraw_cdev);
        class_unregister(&hidraw_class);
        unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES);

}























































  243 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_STRINGHASH_H
#define __LINUX_STRINGHASH_H

#include <linux/compiler.h>        /* For __pure */
#include <linux/types.h>        /* For u32, u64 */
#include <linux/hash.h>

/*
 * Routines for hashing strings of bytes to a 32-bit hash value.
 *
 * These hash functions are NOT GUARANTEED STABLE between kernel
 * versions, architectures, or even repeated boots of the same kernel.
 * (E.g. they may depend on boot-time hardware detection or be
 * deliberately randomized.)
 *
 * They are also not intended to be secure against collisions caused by
 * malicious inputs; much slower hash functions are required for that.
 *
 * They are optimized for pathname components, meaning short strings.
 * Even if a majority of files have longer names, the dynamic profile of
 * pathname components skews short due to short directory names.
 * (E.g. /usr/lib/libsesquipedalianism.so.3.141.)
 */

/*
 * Version 1: one byte at a time.  Example of use:
 *
 * unsigned long hash = init_name_hash;
 * while (*p)
 *        hash = partial_name_hash(tolower(*p++), hash);
 * hash = end_name_hash(hash);
 *
 * Although this is designed for bytes, fs/hfsplus/unicode.c
 * abuses it to hash 16-bit values.
 */

/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash(salt)                (unsigned long)(salt)

/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
        return (prevhash + (c << 4) + (c >> 4)) * 11;
}

/*
 * Finally: cut down the number of bits to a int value (and try to avoid
 * losing bits).  This also has the property (wanted by the dcache)
 * that the msbits make a good hash table index.
 */
static inline unsigned int end_name_hash(unsigned long hash)
{
        return hash_long(hash, 32);
}

/*
 * Version 2: One word (32 or 64 bits) at a time.
 * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h>
 * exists, which describes major Linux platforms like x86 and ARM), then
 * this computes a different hash function much faster.
 *
 * If not set, this falls back to a wrapper around the preceding.
 */
extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int);

/*
 * A hash_len is a u64 with the hash of a string in the low
 * half and the length in the high half.
 */
#define hashlen_hash(hashlen) ((u32)(hashlen))
#define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))

/* Return the "hash_len" (hash and length) of a null-terminated string */
extern u64 __pure hashlen_string(const void *salt, const char *name);

#endif        /* __LINUX_STRINGHASH_H */









































































































































































































































































































































































































































































































































   61 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/fs/buffer.c
 *
 *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
 */

/*
 * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
 *
 * Removed a lot of unnecessary code and simplified things now that
 * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
 *
 * Speed up hash, lru, and free list operations.  Use gfp() for allocating
 * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
 *
 * Added 32k buffer block sizes - these are required older ARM systems. - RMK
 *
 * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
 */

#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/blkdev.h>
#include <linux/file.h>
#include <linux/quotaops.h>
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/hash.h>
#include <linux/suspend.h>
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/bio.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
#include <linux/pagevec.h>
#include <linux/sched/mm.h>
#include <trace/events/block.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
#include <linux/sched/isolation.h>

#include "internal.h"

static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
                          enum rw_hint hint, struct writeback_control *wbc);

#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)

inline void touch_buffer(struct buffer_head *bh)
{
        trace_block_touch_buffer(bh);
        folio_mark_accessed(bh->b_folio);
}
EXPORT_SYMBOL(touch_buffer);

void __lock_buffer(struct buffer_head *bh)
{
        wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_buffer);

void unlock_buffer(struct buffer_head *bh)
{
        clear_bit_unlock(BH_Lock, &bh->b_state);
        smp_mb__after_atomic();
        wake_up_bit(&bh->b_state, BH_Lock);
}
EXPORT_SYMBOL(unlock_buffer);

/*
 * Returns if the folio has dirty or writeback buffers. If all the buffers
 * are unlocked and clean then the folio_test_dirty information is stale. If
 * any of the buffers are locked, it is assumed they are locked for IO.
 */
void buffer_check_dirty_writeback(struct folio *folio,
                                     bool *dirty, bool *writeback)
{
        struct buffer_head *head, *bh;
        *dirty = false;
        *writeback = false;

        BUG_ON(!folio_test_locked(folio));

        head = folio_buffers(folio);
        if (!head)
                return;

        if (folio_test_writeback(folio))
                *writeback = true;

        bh = head;
        do {
                if (buffer_locked(bh))
                        *writeback = true;

                if (buffer_dirty(bh))
                        *dirty = true;

                bh = bh->b_this_page;
        } while (bh != head);
}

/*
 * Block until a buffer comes unlocked.  This doesn't stop it
 * from becoming locked again - you have to lock it yourself
 * if you want to preserve its state.
 */
void __wait_on_buffer(struct buffer_head * bh)
{
        wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__wait_on_buffer);

static void buffer_io_error(struct buffer_head *bh, char *msg)
{
        if (!test_bit(BH_Quiet, &bh->b_state))
                printk_ratelimited(KERN_ERR
                        "Buffer I/O error on dev %pg, logical block %llu%s\n",
                        bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
}

/*
 * End-of-IO handler helper function which does not touch the bh after
 * unlocking it.
 * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
 * a race there is benign: unlock_buffer() only use the bh's address for
 * hashing after unlocking the buffer, so it doesn't actually touch the bh
 * itself.
 */
static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
{
        if (uptodate) {
                set_buffer_uptodate(bh);
        } else {
                /* This happens, due to failed read-ahead attempts. */
                clear_buffer_uptodate(bh);
        }
        unlock_buffer(bh);
}

/*
 * Default synchronous end-of-IO handler..  Just mark it up-to-date and
 * unlock the buffer.
 */
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
        __end_buffer_read_notouch(bh, uptodate);
        put_bh(bh);
}
EXPORT_SYMBOL(end_buffer_read_sync);

void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
        if (uptodate) {
                set_buffer_uptodate(bh);
        } else {
                buffer_io_error(bh, ", lost sync page write");
                mark_buffer_write_io_error(bh);
                clear_buffer_uptodate(bh);
        }
        unlock_buffer(bh);
        put_bh(bh);
}
EXPORT_SYMBOL(end_buffer_write_sync);

/*
 * Various filesystems appear to want __find_get_block to be non-blocking.
 * But it's the page lock which protects the buffers.  To get around this,
 * we get exclusion from try_to_free_buffers with the blockdev mapping's
 * i_private_lock.
 *
 * Hack idea: for the blockdev mapping, i_private_lock contention
 * may be quite high.  This code could TryLock the page, and if that
 * succeeds, there is no need to take i_private_lock.
 */
static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
{
        struct inode *bd_inode = bdev->bd_inode;
        struct address_space *bd_mapping = bd_inode->i_mapping;
        struct buffer_head *ret = NULL;
        pgoff_t index;
        struct buffer_head *bh;
        struct buffer_head *head;
        struct folio *folio;
        int all_mapped = 1;
        static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);

        index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
        folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
        if (IS_ERR(folio))
                goto out;

        spin_lock(&bd_mapping->i_private_lock);
        head = folio_buffers(folio);
        if (!head)
                goto out_unlock;
        bh = head;
        do {
                if (!buffer_mapped(bh))
                        all_mapped = 0;
                else if (bh->b_blocknr == block) {
                        ret = bh;
                        get_bh(bh);
                        goto out_unlock;
                }
                bh = bh->b_this_page;
        } while (bh != head);

        /* we might be here because some of the buffers on this page are
         * not mapped.  This is due to various races between
         * file io on the block device and getblk.  It gets dealt with
         * elsewhere, don't buffer_error if we had some unmapped buffers
         */
        ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
        if (all_mapped && __ratelimit(&last_warned)) {
                printk("__find_get_block_slow() failed. block=%llu, "
                       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
                       "device %pg blocksize: %d\n",
                       (unsigned long long)block,
                       (unsigned long long)bh->b_blocknr,
                       bh->b_state, bh->b_size, bdev,
                       1 << bd_inode->i_blkbits);
        }
out_unlock:
        spin_unlock(&bd_mapping->i_private_lock);
        folio_put(folio);
out:
        return ret;
}

static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
        unsigned long flags;
        struct buffer_head *first;
        struct buffer_head *tmp;
        struct folio *folio;
        int folio_uptodate = 1;

        BUG_ON(!buffer_async_read(bh));

        folio = bh->b_folio;
        if (uptodate) {
                set_buffer_uptodate(bh);
        } else {
                clear_buffer_uptodate(bh);
                buffer_io_error(bh, ", async page read");
                folio_set_error(folio);
        }

        /*
         * Be _very_ careful from here on. Bad things can happen if
         * two buffer heads end IO at almost the same time and both
         * decide that the page is now completely done.
         */
        first = folio_buffers(folio);
        spin_lock_irqsave(&first->b_uptodate_lock, flags);
        clear_buffer_async_read(bh);
        unlock_buffer(bh);
        tmp = bh;
        do {
                if (!buffer_uptodate(tmp))
                        folio_uptodate = 0;
                if (buffer_async_read(tmp)) {
                        BUG_ON(!buffer_locked(tmp));
                        goto still_busy;
                }
                tmp = tmp->b_this_page;
        } while (tmp != bh);
        spin_unlock_irqrestore(&first->b_uptodate_lock, flags);

        folio_end_read(folio, folio_uptodate);
        return;

still_busy:
        spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
        return;
}

struct postprocess_bh_ctx {
        struct work_struct work;
        struct buffer_head *bh;
};

static void verify_bh(struct work_struct *work)
{
        struct postprocess_bh_ctx *ctx =
                container_of(work, struct postprocess_bh_ctx, work);
        struct buffer_head *bh = ctx->bh;
        bool valid;

        valid = fsverity_verify_blocks(bh->b_folio, bh->b_size, bh_offset(bh));
        end_buffer_async_read(bh, valid);
        kfree(ctx);
}

static bool need_fsverity(struct buffer_head *bh)
{
        struct folio *folio = bh->b_folio;
        struct inode *inode = folio->mapping->host;

        return fsverity_active(inode) &&
                /* needed by ext4 */
                folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
}

static void decrypt_bh(struct work_struct *work)
{
        struct postprocess_bh_ctx *ctx =
                container_of(work, struct postprocess_bh_ctx, work);
        struct buffer_head *bh = ctx->bh;
        int err;

        err = fscrypt_decrypt_pagecache_blocks(bh->b_folio, bh->b_size,
                                               bh_offset(bh));
        if (err == 0 && need_fsverity(bh)) {
                /*
                 * We use different work queues for decryption and for verity
                 * because verity may require reading metadata pages that need
                 * decryption, and we shouldn't recurse to the same workqueue.
                 */
                INIT_WORK(&ctx->work, verify_bh);
                fsverity_enqueue_verify_work(&ctx->work);
                return;
        }
        end_buffer_async_read(bh, err == 0);
        kfree(ctx);
}

/*
 * I/O completion handler for block_read_full_folio() - pages
 * which come unlocked at the end of I/O.
 */
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
        struct inode *inode = bh->b_folio->mapping->host;
        bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
        bool verify = need_fsverity(bh);

        /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */
        if (uptodate && (decrypt || verify)) {
                struct postprocess_bh_ctx *ctx =
                        kmalloc(sizeof(*ctx), GFP_ATOMIC);

                if (ctx) {
                        ctx->bh = bh;
                        if (decrypt) {
                                INIT_WORK(&ctx->work, decrypt_bh);
                                fscrypt_enqueue_decrypt_work(&ctx->work);
                        } else {
                                INIT_WORK(&ctx->work, verify_bh);
                                fsverity_enqueue_verify_work(&ctx->work);
                        }
                        return;
                }
                uptodate = 0;
        }
        end_buffer_async_read(bh, uptodate);
}

/*
 * Completion handler for block_write_full_folio() - folios which are unlocked
 * during I/O, and which have the writeback flag cleared upon I/O completion.
 */
static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
{
        unsigned long flags;
        struct buffer_head *first;
        struct buffer_head *tmp;
        struct folio *folio;

        BUG_ON(!buffer_async_write(bh));

        folio = bh->b_folio;
        if (uptodate) {
                set_buffer_uptodate(bh);
        } else {
                buffer_io_error(bh, ", lost async page write");
                mark_buffer_write_io_error(bh);
                clear_buffer_uptodate(bh);
                folio_set_error(folio);
        }

        first = folio_buffers(folio);
        spin_lock_irqsave(&first->b_uptodate_lock, flags);

        clear_buffer_async_write(bh);
        unlock_buffer(bh);
        tmp = bh->b_this_page;
        while (tmp != bh) {
                if (buffer_async_write(tmp)) {
                        BUG_ON(!buffer_locked(tmp));
                        goto still_busy;
                }
                tmp = tmp->b_this_page;
        }
        spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
        folio_end_writeback(folio);
        return;

still_busy:
        spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
        return;
}

/*
 * If a page's buffers are under async readin (end_buffer_async_read
 * completion) then there is a possibility that another thread of
 * control could lock one of the buffers after it has completed
 * but while some of the other buffers have not completed.  This
 * locked buffer would confuse end_buffer_async_read() into not unlocking
 * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
 * that this buffer is not under async I/O.
 *
 * The page comes unlocked when it has no locked buffer_async buffers
 * left.
 *
 * PageLocked prevents anyone starting new async I/O reads any of
 * the buffers.
 *
 * PageWriteback is used to prevent simultaneous writeout of the same
 * page.
 *
 * PageLocked prevents anyone from starting writeback of a page which is
 * under read I/O (PageWriteback is only ever set against a locked page).
 */
static void mark_buffer_async_read(struct buffer_head *bh)
{
        bh->b_end_io = end_buffer_async_read_io;
        set_buffer_async_read(bh);
}

static void mark_buffer_async_write_endio(struct buffer_head *bh,
                                          bh_end_io_t *handler)
{
        bh->b_end_io = handler;
        set_buffer_async_write(bh);
}

void mark_buffer_async_write(struct buffer_head *bh)
{
        mark_buffer_async_write_endio(bh, end_buffer_async_write);
}
EXPORT_SYMBOL(mark_buffer_async_write);


/*
 * fs/buffer.c contains helper functions for buffer-backed address space's
 * fsync functions.  A common requirement for buffer-based filesystems is
 * that certain data from the backing blockdev needs to be written out for
 * a successful fsync().  For example, ext2 indirect blocks need to be
 * written back and waited upon before fsync() returns.
 *
 * The functions mark_buffer_dirty_inode(), fsync_inode_buffers(),
 * inode_has_buffers() and invalidate_inode_buffers() are provided for the
 * management of a list of dependent buffers at ->i_mapping->i_private_list.
 *
 * Locking is a little subtle: try_to_free_buffers() will remove buffers
 * from their controlling inode's queue when they are being freed.  But
 * try_to_free_buffers() will be operating against the *blockdev* mapping
 * at the time, not against the S_ISREG file which depends on those buffers.
 * So the locking for i_private_list is via the i_private_lock in the address_space
 * which backs the buffers.  Which is different from the address_space 
 * against which the buffers are listed.  So for a particular address_space,
 * mapping->i_private_lock does *not* protect mapping->i_private_list!  In fact,
 * mapping->i_private_list will always be protected by the backing blockdev's
 * ->i_private_lock.
 *
 * Which introduces a requirement: all buffers on an address_space's
 * ->i_private_list must be from the same address_space: the blockdev's.
 *
 * address_spaces which do not place buffers at ->i_private_list via these
 * utility functions are free to use i_private_lock and i_private_list for
 * whatever they want.  The only requirement is that list_empty(i_private_list)
 * be true at clear_inode() time.
 *
 * FIXME: clear_inode should not call invalidate_inode_buffers().  The
 * filesystems should do that.  invalidate_inode_buffers() should just go
 * BUG_ON(!list_empty).
 *
 * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
 * take an address_space, not an inode.  And it should be called
 * mark_buffer_dirty_fsync() to clearly define why those buffers are being
 * queued up.
 *
 * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
 * list if it is already on a list.  Because if the buffer is on a list,
 * it *must* already be on the right one.  If not, the filesystem is being
 * silly.  This will save a ton of locking.  But first we have to ensure
 * that buffers are taken *off* the old inode's list when they are freed
 * (presumably in truncate).  That requires careful auditing of all
 * filesystems (do it inside bforget()).  It could also be done by bringing
 * b_inode back.
 */

/*
 * The buffer's backing address_space's i_private_lock must be held
 */
static void __remove_assoc_queue(struct buffer_head *bh)
{
        list_del_init(&bh->b_assoc_buffers);
        WARN_ON(!bh->b_assoc_map);
        bh->b_assoc_map = NULL;
}

int inode_has_buffers(struct inode *inode)
{
        return !list_empty(&inode->i_data.i_private_list);
}

/*
 * osync is designed to support O_SYNC io.  It waits synchronously for
 * all already-submitted IO to complete, but does not queue any new
 * writes to the disk.
 *
 * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
 * as you dirty the buffers, and then use osync_inode_buffers to wait for
 * completion.  Any other dirty buffers which are not yet queued for
 * write will not be flushed to disk by the osync.
 */
static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
{
        struct buffer_head *bh;
        struct list_head *p;
        int err = 0;

        spin_lock(lock);
repeat:
        list_for_each_prev(p, list) {
                bh = BH_ENTRY(p);
                if (buffer_locked(bh)) {
                        get_bh(bh);
                        spin_unlock(lock);
                        wait_on_buffer(bh);
                        if (!buffer_uptodate(bh))
                                err = -EIO;
                        brelse(bh);
                        spin_lock(lock);
                        goto repeat;
                }
        }
        spin_unlock(lock);
        return err;
}

/**
 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
 * @mapping: the mapping which wants those buffers written
 *
 * Starts I/O against the buffers at mapping->i_private_list, and waits upon
 * that I/O.
 *
 * Basically, this is a convenience function for fsync().
 * @mapping is a file or directory which needs those buffers to be written for
 * a successful fsync().
 */
int sync_mapping_buffers(struct address_space *mapping)
{
        struct address_space *buffer_mapping = mapping->i_private_data;

        if (buffer_mapping == NULL || list_empty(&mapping->i_private_list))
                return 0;

        return fsync_buffers_list(&buffer_mapping->i_private_lock,
                                        &mapping->i_private_list);
}
EXPORT_SYMBOL(sync_mapping_buffers);

/**
 * generic_buffers_fsync_noflush - generic buffer fsync implementation
 * for simple filesystems with no inode lock
 *
 * @file:        file to synchronize
 * @start:        start offset in bytes
 * @end:        end offset in bytes (inclusive)
 * @datasync:        only synchronize essential metadata if true
 *
 * This is a generic implementation of the fsync method for simple
 * filesystems which track all non-inode metadata in the buffers list
 * hanging off the address_space structure.
 */
int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
                                  bool datasync)
{
        struct inode *inode = file->f_mapping->host;
        int err;
        int ret;

        err = file_write_and_wait_range(file, start, end);
        if (err)
                return err;

        ret = sync_mapping_buffers(inode->i_mapping);
        if (!(inode->i_state & I_DIRTY_ALL))
                goto out;
        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
                goto out;

        err = sync_inode_metadata(inode, 1);
        if (ret == 0)
                ret = err;

out:
        /* check and advance again to catch errors after syncing out buffers */
        err = file_check_and_advance_wb_err(file);
        if (ret == 0)
                ret = err;
        return ret;
}
EXPORT_SYMBOL(generic_buffers_fsync_noflush);

/**
 * generic_buffers_fsync - generic buffer fsync implementation
 * for simple filesystems with no inode lock
 *
 * @file:        file to synchronize
 * @start:        start offset in bytes
 * @end:        end offset in bytes (inclusive)
 * @datasync:        only synchronize essential metadata if true
 *
 * This is a generic implementation of the fsync method for simple
 * filesystems which track all non-inode metadata in the buffers list
 * hanging off the address_space structure. This also makes sure that
 * a device cache flush operation is called at the end.
 */
int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
                          bool datasync)
{
        struct inode *inode = file->f_mapping->host;
        int ret;

        ret = generic_buffers_fsync_noflush(file, start, end, datasync);
        if (!ret)
                ret = blkdev_issue_flush(inode->i_sb->s_bdev);
        return ret;
}
EXPORT_SYMBOL(generic_buffers_fsync);

/*
 * Called when we've recently written block `bblock', and it is known that
 * `bblock' was for a buffer_boundary() buffer.  This means that the block at
 * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
 * dirty, schedule it for IO.  So that indirects merge nicely with their data.
 */
void write_boundary_block(struct block_device *bdev,
                        sector_t bblock, unsigned blocksize)
{
        struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
        if (bh) {
                if (buffer_dirty(bh))
                        write_dirty_buffer(bh, 0);
                put_bh(bh);
        }
}

void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
{
        struct address_space *mapping = inode->i_mapping;
        struct address_space *buffer_mapping = bh->b_folio->mapping;

        mark_buffer_dirty(bh);
        if (!mapping->i_private_data) {
                mapping->i_private_data = buffer_mapping;
        } else {
                BUG_ON(mapping->i_private_data != buffer_mapping);
        }
        if (!bh->b_assoc_map) {
                spin_lock(&buffer_mapping->i_private_lock);
                list_move_tail(&bh->b_assoc_buffers,
                                &mapping->i_private_list);
                bh->b_assoc_map = mapping;
                spin_unlock(&buffer_mapping->i_private_lock);
        }
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);

/*
 * Add a page to the dirty page list.
 *
 * It is a sad fact of life that this function is called from several places
 * deeply under spinlocking.  It may not sleep.
 *
 * If the page has buffers, the uptodate buffers are set dirty, to preserve
 * dirty-state coherency between the page and the buffers.  It the page does
 * not have buffers then when they are later attached they will all be set
 * dirty.
 *
 * The buffers are dirtied before the page is dirtied.  There's a small race
 * window in which a writepage caller may see the page cleanness but not the
 * buffer dirtiness.  That's fine.  If this code were to set the page dirty
 * before the buffers, a concurrent writepage caller could clear the page dirty
 * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
 * page on the dirty page list.
 *
 * We use i_private_lock to lock against try_to_free_buffers while using the
 * page's buffer list.  Also use this to protect against clean buffers being
 * added to the page after it was set dirty.
 *
 * FIXME: may need to call ->reservepage here as well.  That's rather up to the
 * address_space though.
 */
bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
{
        struct buffer_head *head;
        bool newly_dirty;

        spin_lock(&mapping->i_private_lock);
        head = folio_buffers(folio);
        if (head) {
                struct buffer_head *bh = head;

                do {
                        set_buffer_dirty(bh);
                        bh = bh->b_this_page;
                } while (bh != head);
        }
        /*
         * Lock out page's memcg migration to keep PageDirty
         * synchronized with per-memcg dirty page counters.
         */
        folio_memcg_lock(folio);
        newly_dirty = !folio_test_set_dirty(folio);
        spin_unlock(&mapping->i_private_lock);

        if (newly_dirty)
                __folio_mark_dirty(folio, mapping, 1);

        folio_memcg_unlock(folio);

        if (newly_dirty)
                __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

        return newly_dirty;
}
EXPORT_SYMBOL(block_dirty_folio);

/*
 * Write out and wait upon a list of buffers.
 *
 * We have conflicting pressures: we want to make sure that all
 * initially dirty buffers get waited on, but that any subsequently
 * dirtied buffers don't.  After all, we don't want fsync to last
 * forever if somebody is actively writing to the file.
 *
 * Do this in two main stages: first we copy dirty buffers to a
 * temporary inode list, queueing the writes as we go.  Then we clean
 * up, waiting for those writes to complete.
 * 
 * During this second stage, any subsequent updates to the file may end
 * up refiling the buffer on the original inode's dirty list again, so
 * there is a chance we will end up with a buffer queued for write but
 * not yet completed on that list.  So, as a final cleanup we go through
 * the osync code to catch these locked, dirty buffers without requeuing
 * any newly dirty buffers for write.
 */
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
        struct buffer_head *bh;
        struct list_head tmp;
        struct address_space *mapping;
        int err = 0, err2;
        struct blk_plug plug;

        INIT_LIST_HEAD(&tmp);
        blk_start_plug(&plug);

        spin_lock(lock);
        while (!list_empty(list)) {
                bh = BH_ENTRY(list->next);
                mapping = bh->b_assoc_map;
                __remove_assoc_queue(bh);
                /* Avoid race with mark_buffer_dirty_inode() which does
                 * a lockless check and we rely on seeing the dirty bit */
                smp_mb();
                if (buffer_dirty(bh) || buffer_locked(bh)) {
                        list_add(&bh->b_assoc_buffers, &tmp);
                        bh->b_assoc_map = mapping;
                        if (buffer_dirty(bh)) {
                                get_bh(bh);
                                spin_unlock(lock);
                                /*
                                 * Ensure any pending I/O completes so that
                                 * write_dirty_buffer() actually writes the
                                 * current contents - it is a noop if I/O is
                                 * still in flight on potentially older
                                 * contents.
                                 */
                                write_dirty_buffer(bh, REQ_SYNC);

                                /*
                                 * Kick off IO for the previous mapping. Note
                                 * that we will not run the very last mapping,
                                 * wait_on_buffer() will do that for us
                                 * through sync_buffer().
                                 */
                                brelse(bh);
                                spin_lock(lock);
                        }
                }
        }

        spin_unlock(lock);
        blk_finish_plug(&plug);
        spin_lock(lock);

        while (!list_empty(&tmp)) {
                bh = BH_ENTRY(tmp.prev);
                get_bh(bh);
                mapping = bh->b_assoc_map;
                __remove_assoc_queue(bh);
                /* Avoid race with mark_buffer_dirty_inode() which does
                 * a lockless check and we rely on seeing the dirty bit */
                smp_mb();
                if (buffer_dirty(bh)) {
                        list_add(&bh->b_assoc_buffers,
                                 &mapping->i_private_list);
                        bh->b_assoc_map = mapping;
                }
                spin_unlock(lock);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh))
                        err = -EIO;
                brelse(bh);
                spin_lock(lock);
        }
        
        spin_unlock(lock);
        err2 = osync_buffers_list(lock, list);
        if (err)
                return err;
        else
                return err2;
}

/*
 * Invalidate any and all dirty buffers on a given inode.  We are
 * probably unmounting the fs, but that doesn't mean we have already
 * done a sync().  Just drop the buffers from the inode list.
 *
 * NOTE: we take the inode's blockdev's mapping's i_private_lock.  Which
 * assumes that all the buffers are against the blockdev.  Not true
 * for reiserfs.
 */
void invalidate_inode_buffers(struct inode *inode)
{
        if (inode_has_buffers(inode)) {
                struct address_space *mapping = &inode->i_data;
                struct list_head *list = &mapping->i_private_list;
                struct address_space *buffer_mapping = mapping->i_private_data;

                spin_lock(&buffer_mapping->i_private_lock);
                while (!list_empty(list))
                        __remove_assoc_queue(BH_ENTRY(list->next));
                spin_unlock(&buffer_mapping->i_private_lock);
        }
}
EXPORT_SYMBOL(invalidate_inode_buffers);

/*
 * Remove any clean buffers from the inode's buffer list.  This is called
 * when we're trying to free the inode itself.  Those buffers can pin it.
 *
 * Returns true if all buffers were removed.
 */
int remove_inode_buffers(struct inode *inode)
{
        int ret = 1;

        if (inode_has_buffers(inode)) {
                struct address_space *mapping = &inode->i_data;
                struct list_head *list = &mapping->i_private_list;
                struct address_space *buffer_mapping = mapping->i_private_data;

                spin_lock(&buffer_mapping->i_private_lock);
                while (!list_empty(list)) {
                        struct buffer_head *bh = BH_ENTRY(list->next);
                        if (buffer_dirty(bh)) {
                                ret = 0;
                                break;
                        }
                        __remove_assoc_queue(bh);
                }
                spin_unlock(&buffer_mapping->i_private_lock);
        }
        return ret;
}

/*
 * Create the appropriate buffers when given a folio for data area and
 * the size of each buffer.. Use the bh->b_this_page linked list to
 * follow the buffers created.  Return NULL if unable to create more
 * buffers.
 *
 * The retry flag is used to differentiate async IO (paging, swapping)
 * which may not fail from ordinary buffer allocations.
 */
struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
                                        gfp_t gfp)
{
        struct buffer_head *bh, *head;
        long offset;
        struct mem_cgroup *memcg, *old_memcg;

        /* The folio lock pins the memcg */
        memcg = folio_memcg(folio);
        old_memcg = set_active_memcg(memcg);

        head = NULL;
        offset = folio_size(folio);
        while ((offset -= size) >= 0) {
                bh = alloc_buffer_head(gfp);
                if (!bh)
                        goto no_grow;

                bh->b_this_page = head;
                bh->b_blocknr = -1;
                head = bh;

                bh->b_size = size;

                /* Link the buffer to its folio */
                folio_set_bh(bh, folio, offset);
        }
out:
        set_active_memcg(old_memcg);
        return head;
/*
 * In case anything failed, we just free everything we got.
 */
no_grow:
        if (head) {
                do {
                        bh = head;
                        head = head->b_this_page;
                        free_buffer_head(bh);
                } while (head);
        }

        goto out;
}
EXPORT_SYMBOL_GPL(folio_alloc_buffers);

struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
                                       bool retry)
{
        gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
        if (retry)
                gfp |= __GFP_NOFAIL;

        return folio_alloc_buffers(page_folio(page), size, gfp);
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);

static inline void link_dev_buffers(struct folio *folio,
                struct buffer_head *head)
{
        struct buffer_head *bh, *tail;

        bh = head;
        do {
                tail = bh;
                bh = bh->b_this_page;
        } while (bh);
        tail->b_this_page = head;
        folio_attach_private(folio, head);
}

static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
{
        sector_t retval = ~((sector_t)0);
        loff_t sz = bdev_nr_bytes(bdev);

        if (sz) {
                unsigned int sizebits = blksize_bits(size);
                retval = (sz >> sizebits);
        }
        return retval;
}

/*
 * Initialise the state of a blockdev folio's buffers.
 */ 
static sector_t folio_init_buffers(struct folio *folio,
                struct block_device *bdev, unsigned size)
{
        struct buffer_head *head = folio_buffers(folio);
        struct buffer_head *bh = head;
        bool uptodate = folio_test_uptodate(folio);
        sector_t block = div_u64(folio_pos(folio), size);
        sector_t end_block = blkdev_max_block(bdev, size);

        do {
                if (!buffer_mapped(bh)) {
                        bh->b_end_io = NULL;
                        bh->b_private = NULL;
                        bh->b_bdev = bdev;
                        bh->b_blocknr = block;
                        if (uptodate)
                                set_buffer_uptodate(bh);
                        if (block < end_block)
                                set_buffer_mapped(bh);
                }
                block++;
                bh = bh->b_this_page;
        } while (bh != head);

        /*
         * Caller needs to validate requested block against end of device.
         */
        return end_block;
}

/*
 * Create the page-cache folio that contains the requested block.
 *
 * This is used purely for blockdev mappings.
 *
 * Returns false if we have a failure which cannot be cured by retrying
 * without sleeping.  Returns true if we succeeded, or the caller should retry.
 */
static bool grow_dev_folio(struct block_device *bdev, sector_t block,
                pgoff_t index, unsigned size, gfp_t gfp)
{
        struct inode *inode = bdev->bd_inode;
        struct folio *folio;
        struct buffer_head *bh;
        sector_t end_block = 0;

        folio = __filemap_get_folio(inode->i_mapping, index,
                        FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
        if (IS_ERR(folio))
                return false;

        bh = folio_buffers(folio);
        if (bh) {
                if (bh->b_size == size) {
                        end_block = folio_init_buffers(folio, bdev, size);
                        goto unlock;
                }

                /*
                 * Retrying may succeed; for example the folio may finish
                 * writeback, or buffers may be cleaned.  This should not
                 * happen very often; maybe we have old buffers attached to
                 * this blockdev's page cache and we're trying to change
                 * the block size?
                 */
                if (!try_to_free_buffers(folio)) {
                        end_block = ~0ULL;
                        goto unlock;
                }
        }

        bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT);
        if (!bh)
                goto unlock;

        /*
         * Link the folio to the buffers and initialise them.  Take the
         * lock to be atomic wrt __find_get_block(), which does not
         * run under the folio lock.
         */
        spin_lock(&inode->i_mapping->i_private_lock);
        link_dev_buffers(folio, bh);
        end_block = folio_init_buffers(folio, bdev, size);
        spin_unlock(&inode->i_mapping->i_private_lock);
unlock:
        folio_unlock(folio);
        folio_put(folio);
        return block < end_block;
}

/*
 * Create buffers for the specified block device block's folio.  If
 * that folio was dirty, the buffers are set dirty also.  Returns false
 * if we've hit a permanent error.
 */
static bool grow_buffers(struct block_device *bdev, sector_t block,
                unsigned size, gfp_t gfp)
{
        loff_t pos;

        /*
         * Check for a block which lies outside our maximum possible
         * pagecache index.
         */
        if (check_mul_overflow(block, (sector_t)size, &pos) || pos > MAX_LFS_FILESIZE) {
                printk(KERN_ERR "%s: requested out-of-range block %llu for device %pg\n",
                        __func__, (unsigned long long)block,
                        bdev);
                return false;
        }

        /* Create a folio with the proper size buffers */
        return grow_dev_folio(bdev, block, pos / PAGE_SIZE, size, gfp);
}

static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
             unsigned size, gfp_t gfp)
{
        /* Size must be multiple of hard sectorsize */
        if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
                        (size < 512 || size > PAGE_SIZE))) {
                printk(KERN_ERR "getblk(): invalid block size %d requested\n",
                                        size);
                printk(KERN_ERR "logical block size: %d\n",
                                        bdev_logical_block_size(bdev));

                dump_stack();
                return NULL;
        }

        for (;;) {
                struct buffer_head *bh;

                bh = __find_get_block(bdev, block, size);
                if (bh)
                        return bh;

                if (!grow_buffers(bdev, block, size, gfp))
                        return NULL;
        }
}

/*
 * The relationship between dirty buffers and dirty pages:
 *
 * Whenever a page has any dirty buffers, the page's dirty bit is set, and
 * the page is tagged dirty in the page cache.
 *
 * At all times, the dirtiness of the buffers represents the dirtiness of
 * subsections of the page.  If the page has buffers, the page dirty bit is
 * merely a hint about the true dirty state.
 *
 * When a page is set dirty in its entirety, all its buffers are marked dirty
 * (if the page has buffers).
 *
 * When a buffer is marked dirty, its page is dirtied, but the page's other
 * buffers are not.
 *
 * Also.  When blockdev buffers are explicitly read with bread(), they
 * individually become uptodate.  But their backing page remains not
 * uptodate - even if all of its buffers are uptodate.  A subsequent
 * block_read_full_folio() against that folio will discover all the uptodate
 * buffers, will set the folio uptodate and will perform no I/O.
 */

/**
 * mark_buffer_dirty - mark a buffer_head as needing writeout
 * @bh: the buffer_head to mark dirty
 *
 * mark_buffer_dirty() will set the dirty bit against the buffer, then set
 * its backing page dirty, then tag the page as dirty in the page cache
 * and then attach the address_space's inode to its superblock's dirty
 * inode list.
 *
 * mark_buffer_dirty() is atomic.  It takes bh->b_folio->mapping->i_private_lock,
 * i_pages lock and mapping->host->i_lock.
 */
void mark_buffer_dirty(struct buffer_head *bh)
{
        WARN_ON_ONCE(!buffer_uptodate(bh));

        trace_block_dirty_buffer(bh);

        /*
         * Very *carefully* optimize the it-is-already-dirty case.
         *
         * Don't let the final "is it dirty" escape to before we
         * perhaps modified the buffer.
         */
        if (buffer_dirty(bh)) {
                smp_mb();
                if (buffer_dirty(bh))
                        return;
        }

        if (!test_set_buffer_dirty(bh)) {
                struct folio *folio = bh->b_folio;
                struct address_space *mapping = NULL;

                folio_memcg_lock(folio);
                if (!folio_test_set_dirty(folio)) {
                        mapping = folio->mapping;
                        if (mapping)
                                __folio_mark_dirty(folio, mapping, 0);
                }
                folio_memcg_unlock(folio);
                if (mapping)
                        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
        }
}
EXPORT_SYMBOL(mark_buffer_dirty);

void mark_buffer_write_io_error(struct buffer_head *bh)
{
        set_buffer_write_io_error(bh);
        /* FIXME: do we need to set this in both places? */
        if (bh->b_folio && bh->b_folio->mapping)
                mapping_set_error(bh->b_folio->mapping, -EIO);
        if (bh->b_assoc_map) {
                mapping_set_error(bh->b_assoc_map, -EIO);
                errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
        }
}
EXPORT_SYMBOL(mark_buffer_write_io_error);

/*
 * Decrement a buffer_head's reference count.  If all buffers against a page
 * have zero reference count, are clean and unlocked, and if the page is clean
 * and unlocked then try_to_free_buffers() may strip the buffers from the page
 * in preparation for freeing it (sometimes, rarely, buffers are removed from
 * a page but it ends up not being freed, and buffers may later be reattached).
 */
void __brelse(struct buffer_head * buf)
{
        if (atomic_read(&buf->b_count)) {
                put_bh(buf);
                return;
        }
        WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
}
EXPORT_SYMBOL(__brelse);

/*
 * bforget() is like brelse(), except it discards any
 * potentially dirty data.
 */
void __bforget(struct buffer_head *bh)
{
        clear_buffer_dirty(bh);
        if (bh->b_assoc_map) {
                struct address_space *buffer_mapping = bh->b_folio->mapping;

                spin_lock(&buffer_mapping->i_private_lock);
                list_del_init(&bh->b_assoc_buffers);
                bh->b_assoc_map = NULL;
                spin_unlock(&buffer_mapping->i_private_lock);
        }
        __brelse(bh);
}
EXPORT_SYMBOL(__bforget);

static struct buffer_head *__bread_slow(struct buffer_head *bh)
{
        lock_buffer(bh);
        if (buffer_uptodate(bh)) {
                unlock_buffer(bh);
                return bh;
        } else {
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(REQ_OP_READ, bh);
                wait_on_buffer(bh);
                if (buffer_uptodate(bh))
                        return bh;
        }
        brelse(bh);
        return NULL;
}

/*
 * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
 * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
 * refcount elevated by one when they're in an LRU.  A buffer can only appear
 * once in a particular CPU's LRU.  A single buffer can be present in multiple
 * CPU's LRUs at the same time.
 *
 * This is a transparent caching front-end to sb_bread(), sb_getblk() and
 * sb_find_get_block().
 *
 * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
 * a local interrupt disable for that.
 */

#define BH_LRU_SIZE        16

struct bh_lru {
        struct buffer_head *bhs[BH_LRU_SIZE];
};

static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};

#ifdef CONFIG_SMP
#define bh_lru_lock()        local_irq_disable()
#define bh_lru_unlock()        local_irq_enable()
#else
#define bh_lru_lock()        preempt_disable()
#define bh_lru_unlock()        preempt_enable()
#endif

static inline void check_irqs_on(void)
{
#ifdef irqs_disabled
        BUG_ON(irqs_disabled());
#endif
}

/*
 * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
 * inserted at the front, and the buffer_head at the back if any is evicted.
 * Or, if already in the LRU it is moved to the front.
 */
static void bh_lru_install(struct buffer_head *bh)
{
        struct buffer_head *evictee = bh;
        struct bh_lru *b;
        int i;

        check_irqs_on();
        bh_lru_lock();

        /*
         * the refcount of buffer_head in bh_lru prevents dropping the
         * attached page(i.e., try_to_free_buffers) so it could cause
         * failing page migration.
         * Skip putting upcoming bh into bh_lru until migration is done.
         */
        if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
                bh_lru_unlock();
                return;
        }

        b = this_cpu_ptr(&bh_lrus);
        for (i = 0; i < BH_LRU_SIZE; i++) {
                swap(evictee, b->bhs[i]);
                if (evictee == bh) {
                        bh_lru_unlock();
                        return;
                }
        }

        get_bh(bh);
        bh_lru_unlock();
        brelse(evictee);
}

/*
 * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
 */
static struct buffer_head *
lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
        struct buffer_head *ret = NULL;
        unsigned int i;

        check_irqs_on();
        bh_lru_lock();
        if (cpu_is_isolated(smp_processor_id())) {
                bh_lru_unlock();
                return NULL;
        }
        for (i = 0; i < BH_LRU_SIZE; i++) {
                struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);

                if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
                    bh->b_size == size) {
                        if (i) {
                                while (i) {
                                        __this_cpu_write(bh_lrus.bhs[i],
                                                __this_cpu_read(bh_lrus.bhs[i - 1]));
                                        i--;
                                }
                                __this_cpu_write(bh_lrus.bhs[0], bh);
                        }
                        get_bh(bh);
                        ret = bh;
                        break;
                }
        }
        bh_lru_unlock();
        return ret;
}

/*
 * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
 * it in the LRU and mark it as accessed.  If it is not present then return
 * NULL
 */
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
        struct buffer_head *bh = lookup_bh_lru(bdev, block, size);

        if (bh == NULL) {
                /* __find_get_block_slow will mark the page accessed */
                bh = __find_get_block_slow(bdev, block);
                if (bh)
                        bh_lru_install(bh);
        } else
                touch_buffer(bh);

        return bh;
}
EXPORT_SYMBOL(__find_get_block);

/**
 * bdev_getblk - Get a buffer_head in a block device's buffer cache.
 * @bdev: The block device.
 * @block: The block number.
 * @size: The size of buffer_heads for this @bdev.
 * @gfp: The memory allocation flags to use.
 *
 * Return: The buffer head, or NULL if memory could not be allocated.
 */
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
                unsigned size, gfp_t gfp)
{
        struct buffer_head *bh = __find_get_block(bdev, block, size);

        might_alloc(gfp);
        if (bh)
                return bh;

        return __getblk_slow(bdev, block, size, gfp);
}
EXPORT_SYMBOL(bdev_getblk);

/*
 * Do async read-ahead on a buffer..
 */
void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
        struct buffer_head *bh = bdev_getblk(bdev, block, size,
                        GFP_NOWAIT | __GFP_MOVABLE);

        if (likely(bh)) {
                bh_readahead(bh, REQ_RAHEAD);
                brelse(bh);
        }
}
EXPORT_SYMBOL(__breadahead);

/**
 *  __bread_gfp() - reads a specified block and returns the bh
 *  @bdev: the block_device to read from
 *  @block: number of block
 *  @size: size (in bytes) to read
 *  @gfp: page allocation flag
 *
 *  Reads a specified block, and returns buffer head that contains it.
 *  The page cache can be allocated from non-movable area
 *  not to prevent page migration if you set gfp to zero.
 *  It returns NULL if the block was unreadable.
 */
struct buffer_head *
__bread_gfp(struct block_device *bdev, sector_t block,
                   unsigned size, gfp_t gfp)
{
        struct buffer_head *bh;

        gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);

        /*
         * Prefer looping in the allocator rather than here, at least that
         * code knows what it's doing.
         */
        gfp |= __GFP_NOFAIL;

        bh = bdev_getblk(bdev, block, size, gfp);

        if (likely(bh) && !buffer_uptodate(bh))
                bh = __bread_slow(bh);
        return bh;
}
EXPORT_SYMBOL(__bread_gfp);

static void __invalidate_bh_lrus(struct bh_lru *b)
{
        int i;

        for (i = 0; i < BH_LRU_SIZE; i++) {
                brelse(b->bhs[i]);
                b->bhs[i] = NULL;
        }
}
/*
 * invalidate_bh_lrus() is called rarely - but not only at unmount.
 * This doesn't race because it runs in each cpu either in irq
 * or with preempt disabled.
 */
static void invalidate_bh_lru(void *arg)
{
        struct bh_lru *b = &get_cpu_var(bh_lrus);

        __invalidate_bh_lrus(b);
        put_cpu_var(bh_lrus);
}

bool has_bh_in_lru(int cpu, void *dummy)
{
        struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
        int i;
        
        for (i = 0; i < BH_LRU_SIZE; i++) {
                if (b->bhs[i])
                        return true;
        }

        return false;
}

void invalidate_bh_lrus(void)
{
        on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);

/*
 * It's called from workqueue context so we need a bh_lru_lock to close
 * the race with preemption/irq.
 */
void invalidate_bh_lrus_cpu(void)
{
        struct bh_lru *b;

        bh_lru_lock();
        b = this_cpu_ptr(&bh_lrus);
        __invalidate_bh_lrus(b);
        bh_lru_unlock();
}

void folio_set_bh(struct buffer_head *bh, struct folio *folio,
                  unsigned long offset)
{
        bh->b_folio = folio;
        BUG_ON(offset >= folio_size(folio));
        if (folio_test_highmem(folio))
                /*
                 * This catches illegal uses and preserves the offset:
                 */
                bh->b_data = (char *)(0 + offset);
        else
                bh->b_data = folio_address(folio) + offset;
}
EXPORT_SYMBOL(folio_set_bh);

/*
 * Called when truncating a buffer on a page completely.
 */

/* Bits that are cleared during an invalidate */
#define BUFFER_FLAGS_DISCARD \
        (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
         1 << BH_Delay | 1 << BH_Unwritten)

static void discard_buffer(struct buffer_head * bh)
{
        unsigned long b_state;

        lock_buffer(bh);
        clear_buffer_dirty(bh);
        bh->b_bdev = NULL;
        b_state = READ_ONCE(bh->b_state);
        do {
        } while (!try_cmpxchg(&bh->b_state, &b_state,
                              b_state & ~BUFFER_FLAGS_DISCARD));
        unlock_buffer(bh);
}

/**
 * block_invalidate_folio - Invalidate part or all of a buffer-backed folio.
 * @folio: The folio which is affected.
 * @offset: start of the range to invalidate
 * @length: length of the range to invalidate
 *
 * block_invalidate_folio() is called when all or part of the folio has been
 * invalidated by a truncate operation.
 *
 * block_invalidate_folio() does not have to release all buffers, but it must
 * ensure that no dirty buffer is left outside @offset and that no I/O
 * is underway against any of the blocks which are outside the truncation
 * point.  Because the caller is about to free (and possibly reuse) those
 * blocks on-disk.
 */
void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
        struct buffer_head *head, *bh, *next;
        size_t curr_off = 0;
        size_t stop = length + offset;

        BUG_ON(!folio_test_locked(folio));

        /*
         * Check for overflow
         */
        BUG_ON(stop > folio_size(folio) || stop < length);

        head = folio_buffers(folio);
        if (!head)
                return;

        bh = head;
        do {
                size_t next_off = curr_off + bh->b_size;
                next = bh->b_this_page;

                /*
                 * Are we still fully in range ?
                 */
                if (next_off > stop)
                        goto out;

                /*
                 * is this block fully invalidated?
                 */
                if (offset <= curr_off)
                        discard_buffer(bh);
                curr_off = next_off;
                bh = next;
        } while (bh != head);

        /*
         * We release buffers only if the entire folio is being invalidated.
         * The get_block cached value has been unconditionally invalidated,
         * so real IO is not possible anymore.
         */
        if (length == folio_size(folio))
                filemap_release_folio(folio, 0);
out:
        return;
}
EXPORT_SYMBOL(block_invalidate_folio);

/*
 * We attach and possibly dirty the buffers atomically wrt
 * block_dirty_folio() via i_private_lock.  try_to_free_buffers
 * is already excluded via the folio lock.
 */
struct buffer_head *create_empty_buffers(struct folio *folio,
                unsigned long blocksize, unsigned long b_state)
{
        struct buffer_head *bh, *head, *tail;
        gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL;

        head = folio_alloc_buffers(folio, blocksize, gfp);
        bh = head;
        do {
                bh->b_state |= b_state;
                tail = bh;
                bh = bh->b_this_page;
        } while (bh);
        tail->b_this_page = head;

        spin_lock(&folio->mapping->i_private_lock);
        if (folio_test_uptodate(folio) || folio_test_dirty(folio)) {
                bh = head;
                do {
                        if (folio_test_dirty(folio))
                                set_buffer_dirty(bh);
                        if (folio_test_uptodate(folio))
                                set_buffer_uptodate(bh);
                        bh = bh->b_this_page;
                } while (bh != head);
        }
        folio_attach_private(folio, head);
        spin_unlock(&folio->mapping->i_private_lock);

        return head;
}
EXPORT_SYMBOL(create_empty_buffers);

/**
 * clean_bdev_aliases: clean a range of buffers in block device
 * @bdev: Block device to clean buffers in
 * @block: Start of a range of blocks to clean
 * @len: Number of blocks to clean
 *
 * We are taking a range of blocks for data and we don't want writeback of any
 * buffer-cache aliases starting from return from this function and until the
 * moment when something will explicitly mark the buffer dirty (hopefully that
 * will not happen until we will free that block ;-) We don't even need to mark
 * it not-uptodate - nobody can expect anything from a newly allocated buffer
 * anyway. We used to use unmap_buffer() for such invalidation, but that was
 * wrong. We definitely don't want to mark the alias unmapped, for example - it
 * would confuse anyone who might pick it with bread() afterwards...
 *
 * Also..  Note that bforget() doesn't lock the buffer.  So there can be
 * writeout I/O going on against recently-freed buffers.  We don't wait on that
 * I/O in bforget() - it's more efficient to wait on the I/O only if we really
 * need to.  That happens here.
 */
void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
        struct inode *bd_inode = bdev->bd_inode;
        struct address_space *bd_mapping = bd_inode->i_mapping;
        struct folio_batch fbatch;
        pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
        pgoff_t end;
        int i, count;
        struct buffer_head *bh;
        struct buffer_head *head;

        end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE;
        folio_batch_init(&fbatch);
        while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
                count = folio_batch_count(&fbatch);
                for (i = 0; i < count; i++) {
                        struct folio *folio = fbatch.folios[i];

                        if (!folio_buffers(folio))
                                continue;
                        /*
                         * We use folio lock instead of bd_mapping->i_private_lock
                         * to pin buffers here since we can afford to sleep and
                         * it scales better than a global spinlock lock.
                         */
                        folio_lock(folio);
                        /* Recheck when the folio is locked which pins bhs */
                        head = folio_buffers(folio);
                        if (!head)
                                goto unlock_page;
                        bh = head;
                        do {
                                if (!buffer_mapped(bh) || (bh->b_blocknr < block))
                                        goto next;
                                if (bh->b_blocknr >= block + len)
                                        break;
                                clear_buffer_dirty(bh);
                                wait_on_buffer(bh);
                                clear_buffer_req(bh);
next:
                                bh = bh->b_this_page;
                        } while (bh != head);
unlock_page:
                        folio_unlock(folio);
                }
                folio_batch_release(&fbatch);
                cond_resched();
                /* End of range already reached? */
                if (index > end || !index)
                        break;
        }
}
EXPORT_SYMBOL(clean_bdev_aliases);

static struct buffer_head *folio_create_buffers(struct folio *folio,
                                                struct inode *inode,
                                                unsigned int b_state)
{
        struct buffer_head *bh;

        BUG_ON(!folio_test_locked(folio));

        bh = folio_buffers(folio);
        if (!bh)
                bh = create_empty_buffers(folio,
                                1 << READ_ONCE(inode->i_blkbits), b_state);
        return bh;
}

/*
 * NOTE! All mapped/uptodate combinations are valid:
 *
 *        Mapped        Uptodate        Meaning
 *
 *        No        No                "unknown" - must do get_block()
 *        No        Yes                "hole" - zero-filled
 *        Yes        No                "allocated" - allocated on disk, not read in
 *        Yes        Yes                "valid" - allocated and up-to-date in memory.
 *
 * "Dirty" is valid only with the last case (mapped+uptodate).
 */

/*
 * While block_write_full_folio is writing back the dirty buffers under
 * the page lock, whoever dirtied the buffers may decide to clean them
 * again at any time.  We handle that by only looking at the buffer
 * state inside lock_buffer().
 *
 * If block_write_full_folio() is called for regular writeback
 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
 * locked buffer.   This only can happen if someone has written the buffer
 * directly, with submit_bh().  At the address_space level PageWriteback
 * prevents this contention from occurring.
 *
 * If block_write_full_folio() is called with wbc->sync_mode ==
 * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
 * causes the writes to be flagged as synchronous writes.
 */
int __block_write_full_folio(struct inode *inode, struct folio *folio,
                        get_block_t *get_block, struct writeback_control *wbc)
{
        int err;
        sector_t block;
        sector_t last_block;
        struct buffer_head *bh, *head;
        size_t blocksize;
        int nr_underway = 0;
        blk_opf_t write_flags = wbc_to_write_flags(wbc);

        head = folio_create_buffers(folio, inode,
                                    (1 << BH_Dirty) | (1 << BH_Uptodate));

        /*
         * Be very careful.  We have no exclusion from block_dirty_folio
         * here, and the (potentially unmapped) buffers may become dirty at
         * any time.  If a buffer becomes dirty here after we've inspected it
         * then we just miss that fact, and the folio stays dirty.
         *
         * Buffers outside i_size may be dirtied by block_dirty_folio;
         * handle that here by just cleaning them.
         */

        bh = head;
        blocksize = bh->b_size;

        block = div_u64(folio_pos(folio), blocksize);
        last_block = div_u64(i_size_read(inode) - 1, blocksize);

        /*
         * Get all the dirty buffers mapped to disk addresses and
         * handle any aliases from the underlying blockdev's mapping.
         */
        do {
                if (block > last_block) {
                        /*
                         * mapped buffers outside i_size will occur, because
                         * this folio can be outside i_size when there is a
                         * truncate in progress.
                         */
                        /*
                         * The buffer was zeroed by block_write_full_folio()
                         */
                        clear_buffer_dirty(bh);
                        set_buffer_uptodate(bh);
                } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
                           buffer_dirty(bh)) {
                        WARN_ON(bh->b_size != blocksize);
                        err = get_block(inode, block, bh, 1);
                        if (err)
                                goto recover;
                        clear_buffer_delay(bh);
                        if (buffer_new(bh)) {
                                /* blockdev mappings never come here */
                                clear_buffer_new(bh);
                                clean_bdev_bh_alias(bh);
                        }
                }
                bh = bh->b_this_page;
                block++;
        } while (bh != head);

        do {
                if (!buffer_mapped(bh))
                        continue;
                /*
                 * If it's a fully non-blocking write attempt and we cannot
                 * lock the buffer then redirty the folio.  Note that this can
                 * potentially cause a busy-wait loop from writeback threads
                 * and kswapd activity, but those code paths have their own
                 * higher-level throttling.
                 */
                if (wbc->sync_mode != WB_SYNC_NONE) {
                        lock_buffer(bh);
                } else if (!trylock_buffer(bh)) {
                        folio_redirty_for_writepage(wbc, folio);
                        continue;
                }
                if (test_clear_buffer_dirty(bh)) {
                        mark_buffer_async_write_endio(bh,
                                end_buffer_async_write);
                } else {
                        unlock_buffer(bh);
                }
        } while ((bh = bh->b_this_page) != head);

        /*
         * The folio and its buffers are protected by the writeback flag,
         * so we can drop the bh refcounts early.
         */
        BUG_ON(folio_test_writeback(folio));
        folio_start_writeback(folio);

        do {
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
                        submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
                                      inode->i_write_hint, wbc);
                        nr_underway++;
                }
                bh = next;
        } while (bh != head);
        folio_unlock(folio);

        err = 0;
done:
        if (nr_underway == 0) {
                /*
                 * The folio was marked dirty, but the buffers were
                 * clean.  Someone wrote them back by hand with
                 * write_dirty_buffer/submit_bh.  A rare case.
                 */
                folio_end_writeback(folio);

                /*
                 * The folio and buffer_heads can be released at any time from
                 * here on.
                 */
        }
        return err;

recover:
        /*
         * ENOSPC, or some other error.  We may already have added some
         * blocks to the file, so we need to write these out to avoid
         * exposing stale data.
         * The folio is currently locked and not marked for writeback
         */
        bh = head;
        /* Recovery: lock and submit the mapped buffers */
        do {
                if (buffer_mapped(bh) && buffer_dirty(bh) &&
                    !buffer_delay(bh)) {
                        lock_buffer(bh);
                        mark_buffer_async_write_endio(bh,
                                end_buffer_async_write);
                } else {
                        /*
                         * The buffer may have been set dirty during
                         * attachment to a dirty folio.
                         */
                        clear_buffer_dirty(bh);
                }
        } while ((bh = bh->b_this_page) != head);
        folio_set_error(folio);
        BUG_ON(folio_test_writeback(folio));
        mapping_set_error(folio->mapping, err);
        folio_start_writeback(folio);
        do {
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
                        clear_buffer_dirty(bh);
                        submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
                                      inode->i_write_hint, wbc);
                        nr_underway++;
                }
                bh = next;
        } while (bh != head);
        folio_unlock(folio);
        goto done;
}
EXPORT_SYMBOL(__block_write_full_folio);

/*
 * If a folio has any new buffers, zero them out here, and mark them uptodate
 * and dirty so they'll be written out (in order to prevent uninitialised
 * block data from leaking). And clear the new bit.
 */
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
{
        size_t block_start, block_end;
        struct buffer_head *head, *bh;

        BUG_ON(!folio_test_locked(folio));
        head = folio_buffers(folio);
        if (!head)
                return;

        bh = head;
        block_start = 0;
        do {
                block_end = block_start + bh->b_size;

                if (buffer_new(bh)) {
                        if (block_end > from && block_start < to) {
                                if (!folio_test_uptodate(folio)) {
                                        size_t start, xend;

                                        start = max(from, block_start);
                                        xend = min(to, block_end);

                                        folio_zero_segment(folio, start, xend);
                                        set_buffer_uptodate(bh);
                                }

                                clear_buffer_new(bh);
                                mark_buffer_dirty(bh);
                        }
                }

                block_start = block_end;
                bh = bh->b_this_page;
        } while (bh != head);
}
EXPORT_SYMBOL(folio_zero_new_buffers);

static int
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
                const struct iomap *iomap)
{
        loff_t offset = (loff_t)block << inode->i_blkbits;

        bh->b_bdev = iomap->bdev;

        /*
         * Block points to offset in file we need to map, iomap contains
         * the offset at which the map starts. If the map ends before the
         * current block, then do not map the buffer and let the caller
         * handle it.
         */
        if (offset >= iomap->offset + iomap->length)
                return -EIO;

        switch (iomap->type) {
        case IOMAP_HOLE:
                /*
                 * If the buffer is not up to date or beyond the current EOF,
                 * we need to mark it as new to ensure sub-block zeroing is
                 * executed if necessary.
                 */
                if (!buffer_uptodate(bh) ||
                    (offset >= i_size_read(inode)))
                        set_buffer_new(bh);
                return 0;
        case IOMAP_DELALLOC:
                if (!buffer_uptodate(bh) ||
                    (offset >= i_size_read(inode)))
                        set_buffer_new(bh);
                set_buffer_uptodate(bh);
                set_buffer_mapped(bh);
                set_buffer_delay(bh);
                return 0;
        case IOMAP_UNWRITTEN:
                /*
                 * For unwritten regions, we always need to ensure that regions
                 * in the block we are not writing to are zeroed. Mark the
                 * buffer as new to ensure this.
                 */
                set_buffer_new(bh);
                set_buffer_unwritten(bh);
                fallthrough;
        case IOMAP_MAPPED:
                if ((iomap->flags & IOMAP_F_NEW) ||
                    offset >= i_size_read(inode)) {
                        /*
                         * This can happen if truncating the block device races
                         * with the check in the caller as i_size updates on
                         * block devices aren't synchronized by i_rwsem for
                         * block devices.
                         */
                        if (S_ISBLK(inode->i_mode))
                                return -EIO;
                        set_buffer_new(bh);
                }
                bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
                                inode->i_blkbits;
                set_buffer_mapped(bh);
                return 0;
        default:
                WARN_ON_ONCE(1);
                return -EIO;
        }
}

int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
                get_block_t *get_block, const struct iomap *iomap)
{
        size_t from = offset_in_folio(folio, pos);
        size_t to = from + len;
        struct inode *inode = folio->mapping->host;
        size_t block_start, block_end;
        sector_t block;
        int err = 0;
        size_t blocksize;
        struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;

        BUG_ON(!folio_test_locked(folio));
        BUG_ON(to > folio_size(folio));
        BUG_ON(from > to);

        head = folio_create_buffers(folio, inode, 0);
        blocksize = head->b_size;
        block = div_u64(folio_pos(folio), blocksize);

        for (bh = head, block_start = 0; bh != head || !block_start;
            block++, block_start=block_end, bh = bh->b_this_page) {
                block_end = block_start + blocksize;
                if (block_end <= from || block_start >= to) {
                        if (folio_test_uptodate(folio)) {
                                if (!buffer_uptodate(bh))
                                        set_buffer_uptodate(bh);
                        }
                        continue;
                }
                if (buffer_new(bh))
                        clear_buffer_new(bh);
                if (!buffer_mapped(bh)) {
                        WARN_ON(bh->b_size != blocksize);
                        if (get_block)
                                err = get_block(inode, block, bh, 1);
                        else
                                err = iomap_to_bh(inode, block, bh, iomap);
                        if (err)
                                break;

                        if (buffer_new(bh)) {
                                clean_bdev_bh_alias(bh);
                                if (folio_test_uptodate(folio)) {
                                        clear_buffer_new(bh);
                                        set_buffer_uptodate(bh);
                                        mark_buffer_dirty(bh);
                                        continue;
                                }
                                if (block_end > to || block_start < from)
                                        folio_zero_segments(folio,
                                                to, block_end,
                                                block_start, from);
                                continue;
                        }
                }
                if (folio_test_uptodate(folio)) {
                        if (!buffer_uptodate(bh))
                                set_buffer_uptodate(bh);
                        continue; 
                }
                if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
                    !buffer_unwritten(bh) &&
                     (block_start < from || block_end > to)) {
                        bh_read_nowait(bh, 0);
                        *wait_bh++=bh;
                }
        }
        /*
         * If we issued read requests - let them complete.
         */
        while(wait_bh > wait) {
                wait_on_buffer(*--wait_bh);
                if (!buffer_uptodate(*wait_bh))
                        err = -EIO;
        }
        if (unlikely(err))
                folio_zero_new_buffers(folio, from, to);
        return err;
}

int __block_write_begin(struct page *page, loff_t pos, unsigned len,
                get_block_t *get_block)
{
        return __block_write_begin_int(page_folio(page), pos, len, get_block,
                                       NULL);
}
EXPORT_SYMBOL(__block_write_begin);

static void __block_commit_write(struct folio *folio, size_t from, size_t to)
{
        size_t block_start, block_end;
        bool partial = false;
        unsigned blocksize;
        struct buffer_head *bh, *head;

        bh = head = folio_buffers(folio);
        blocksize = bh->b_size;

        block_start = 0;
        do {
                block_end = block_start + blocksize;
                if (block_end <= from || block_start >= to) {
                        if (!buffer_uptodate(bh))
                                partial = true;
                } else {
                        set_buffer_uptodate(bh);
                        mark_buffer_dirty(bh);
                }
                if (buffer_new(bh))
                        clear_buffer_new(bh);

                block_start = block_end;
                bh = bh->b_this_page;
        } while (bh != head);

        /*
         * If this is a partial write which happened to make all buffers
         * uptodate then we can optimize away a bogus read_folio() for
         * the next read(). Here we 'discover' whether the folio went
         * uptodate as a result of this (potentially partial) write.
         */
        if (!partial)
                folio_mark_uptodate(folio);
}

/*
 * block_write_begin takes care of the basic task of block allocation and
 * bringing partial write blocks uptodate first.
 *
 * The filesystem needs to handle block truncation upon failure.
 */
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
                struct page **pagep, get_block_t *get_block)
{
        pgoff_t index = pos >> PAGE_SHIFT;
        struct page *page;
        int status;

        page = grab_cache_page_write_begin(mapping, index);
        if (!page)
                return -ENOMEM;

        status = __block_write_begin(page, pos, len, get_block);
        if (unlikely(status)) {
                unlock_page(page);
                put_page(page);
                page = NULL;
        }

        *pagep = page;
        return status;
}
EXPORT_SYMBOL(block_write_begin);

int block_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
{
        struct folio *folio = page_folio(page);
        size_t start = pos - folio_pos(folio);

        if (unlikely(copied < len)) {
                /*
                 * The buffers that were written will now be uptodate, so
                 * we don't have to worry about a read_folio reading them
                 * and overwriting a partial write. However if we have
                 * encountered a short write and only partially written
                 * into a buffer, it will not be marked uptodate, so a
                 * read_folio might come in and destroy our partial write.
                 *
                 * Do the simplest thing, and just treat any short write to a
                 * non uptodate folio as a zero-length write, and force the
                 * caller to redo the whole thing.
                 */
                if (!folio_test_uptodate(folio))
                        copied = 0;

                folio_zero_new_buffers(folio, start+copied, start+len);
        }
        flush_dcache_folio(folio);

        /* This could be a short (even 0-length) commit */
        __block_commit_write(folio, start, start + copied);

        return copied;
}
EXPORT_SYMBOL(block_write_end);

int generic_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
{
        struct inode *inode = mapping->host;
        loff_t old_size = inode->i_size;
        bool i_size_changed = false;

        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);

        /*
         * No need to use i_size_read() here, the i_size cannot change under us
         * because we hold i_rwsem.
         *
         * But it's important to update i_size while still holding page lock:
         * page writeout could otherwise come in and zero beyond i_size.
         */
        if (pos + copied > inode->i_size) {
                i_size_write(inode, pos + copied);
                i_size_changed = true;
        }

        unlock_page(page);
        put_page(page);

        if (old_size < pos)
                pagecache_isize_extended(inode, old_size, pos);
        /*
         * Don't mark the inode dirty under page lock. First, it unnecessarily
         * makes the holding time of page lock longer. Second, it forces lock
         * ordering of page lock and transaction start for journaling
         * filesystems.
         */
        if (i_size_changed)
                mark_inode_dirty(inode);
        return copied;
}
EXPORT_SYMBOL(generic_write_end);

/*
 * block_is_partially_uptodate checks whether buffers within a folio are
 * uptodate or not.
 *
 * Returns true if all buffers which correspond to the specified part
 * of the folio are uptodate.
 */
bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{
        unsigned block_start, block_end, blocksize;
        unsigned to;
        struct buffer_head *bh, *head;
        bool ret = true;

        head = folio_buffers(folio);
        if (!head)
                return false;
        blocksize = head->b_size;
        to = min_t(unsigned, folio_size(folio) - from, count);
        to = from + to;
        if (from < blocksize && to > folio_size(folio) - blocksize)
                return false;

        bh = head;
        block_start = 0;
        do {
                block_end = block_start + blocksize;
                if (block_end > from && block_start < to) {
                        if (!buffer_uptodate(bh)) {
                                ret = false;
                                break;
                        }
                        if (block_end >= to)
                                break;
                }
                block_start = block_end;
                bh = bh->b_this_page;
        } while (bh != head);

        return ret;
}
EXPORT_SYMBOL(block_is_partially_uptodate);

/*
 * Generic "read_folio" function for block devices that have the normal
 * get_block functionality. This is most of the block device filesystems.
 * Reads the folio asynchronously --- the unlock_buffer() and
 * set/clear_buffer_uptodate() functions propagate buffer state into the
 * folio once IO has completed.
 */
int block_read_full_folio(struct folio *folio, get_block_t *get_block)
{
        struct inode *inode = folio->mapping->host;
        sector_t iblock, lblock;
        struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
        size_t blocksize;
        int nr, i;
        int fully_mapped = 1;
        bool page_error = false;
        loff_t limit = i_size_read(inode);

        /* This is needed for ext4. */
        if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
                limit = inode->i_sb->s_maxbytes;

        VM_BUG_ON_FOLIO(folio_test_large(folio), folio);

        head = folio_create_buffers(folio, inode, 0);
        blocksize = head->b_size;

        iblock = div_u64(folio_pos(folio), blocksize);
        lblock = div_u64(limit + blocksize - 1, blocksize);
        bh = head;
        nr = 0;
        i = 0;

        do {
                if (buffer_uptodate(bh))
                        continue;

                if (!buffer_mapped(bh)) {
                        int err = 0;

                        fully_mapped = 0;
                        if (iblock < lblock) {
                                WARN_ON(bh->b_size != blocksize);
                                err = get_block(inode, iblock, bh, 0);
                                if (err) {
                                        folio_set_error(folio);
                                        page_error = true;
                                }
                        }
                        if (!buffer_mapped(bh)) {
                                folio_zero_range(folio, i * blocksize,
                                                blocksize);
                                if (!err)
                                        set_buffer_uptodate(bh);
                                continue;
                        }
                        /*
                         * get_block() might have updated the buffer
                         * synchronously
                         */
                        if (buffer_uptodate(bh))
                                continue;
                }
                arr[nr++] = bh;
        } while (i++, iblock++, (bh = bh->b_this_page) != head);

        if (fully_mapped)
                folio_set_mappedtodisk(folio);

        if (!nr) {
                /*
                 * All buffers are uptodate or get_block() returned an
                 * error when trying to map them - we can finish the read.
                 */
                folio_end_read(folio, !page_error);
                return 0;
        }

        /* Stage two: lock the buffers */
        for (i = 0; i < nr; i++) {
                bh = arr[i];
                lock_buffer(bh);
                mark_buffer_async_read(bh);
        }

        /*
         * Stage 3: start the IO.  Check for uptodateness
         * inside the buffer lock in case another process reading
         * the underlying blockdev brought it uptodate (the sct fix).
         */
        for (i = 0; i < nr; i++) {
                bh = arr[i];
                if (buffer_uptodate(bh))
                        end_buffer_async_read(bh, 1);
                else
                        submit_bh(REQ_OP_READ, bh);
        }
        return 0;
}
EXPORT_SYMBOL(block_read_full_folio);

/* utility function for filesystems that need to do work on expanding
 * truncates.  Uses filesystem pagecache writes to allow the filesystem to
 * deal with the hole.  
 */
int generic_cont_expand_simple(struct inode *inode, loff_t size)
{
        struct address_space *mapping = inode->i_mapping;
        const struct address_space_operations *aops = mapping->a_ops;
        struct page *page;
        void *fsdata = NULL;
        int err;

        err = inode_newsize_ok(inode, size);
        if (err)
                goto out;

        err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
        if (err)
                goto out;

        err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
        BUG_ON(err > 0);

out:
        return err;
}
EXPORT_SYMBOL(generic_cont_expand_simple);

static int cont_expand_zero(struct file *file, struct address_space *mapping,
                            loff_t pos, loff_t *bytes)
{
        struct inode *inode = mapping->host;
        const struct address_space_operations *aops = mapping->a_ops;
        unsigned int blocksize = i_blocksize(inode);
        struct page *page;
        void *fsdata = NULL;
        pgoff_t index, curidx;
        loff_t curpos;
        unsigned zerofrom, offset, len;
        int err = 0;

        index = pos >> PAGE_SHIFT;
        offset = pos & ~PAGE_MASK;

        while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
                zerofrom = curpos & ~PAGE_MASK;
                if (zerofrom & (blocksize-1)) {
                        *bytes |= (blocksize-1);
                        (*bytes)++;
                }
                len = PAGE_SIZE - zerofrom;

                err = aops->write_begin(file, mapping, curpos, len,
                                            &page, &fsdata);
                if (err)
                        goto out;
                zero_user(page, zerofrom, len);
                err = aops->write_end(file, mapping, curpos, len, len,
                                                page, fsdata);
                if (err < 0)
                        goto out;
                BUG_ON(err != len);
                err = 0;

                balance_dirty_pages_ratelimited(mapping);

                if (fatal_signal_pending(current)) {
                        err = -EINTR;
                        goto out;
                }
        }

        /* page covers the boundary, find the boundary offset */
        if (index == curidx) {
                zerofrom = curpos & ~PAGE_MASK;
                /* if we will expand the thing last block will be filled */
                if (offset <= zerofrom) {
                        goto out;
                }
                if (zerofrom & (blocksize-1)) {
                        *bytes |= (blocksize-1);
                        (*bytes)++;
                }
                len = offset - zerofrom;

                err = aops->write_begin(file, mapping, curpos, len,
                                            &page, &fsdata);
                if (err)
                        goto out;
                zero_user(page, zerofrom, len);
                err = aops->write_end(file, mapping, curpos, len, len,
                                                page, fsdata);
                if (err < 0)
                        goto out;
                BUG_ON(err != len);
                err = 0;
        }
out:
        return err;
}

/*
 * For moronic filesystems that do not allow holes in file.
 * We may have to extend the file.
 */
int cont_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata,
                        get_block_t *get_block, loff_t *bytes)
{
        struct inode *inode = mapping->host;
        unsigned int blocksize = i_blocksize(inode);
        unsigned int zerofrom;
        int err;

        err = cont_expand_zero(file, mapping, pos, bytes);
        if (err)
                return err;

        zerofrom = *bytes & ~PAGE_MASK;
        if (pos+len > *bytes && zerofrom & (blocksize-1)) {
                *bytes |= (blocksize-1);
                (*bytes)++;
        }

        return block_write_begin(mapping, pos, len, pagep, get_block);
}
EXPORT_SYMBOL(cont_write_begin);

void block_commit_write(struct page *page, unsigned from, unsigned to)
{
        struct folio *folio = page_folio(page);
        __block_commit_write(folio, from, to);
}
EXPORT_SYMBOL(block_commit_write);

/*
 * block_page_mkwrite() is not allowed to change the file size as it gets
 * called from a page fault handler when a page is first dirtied. Hence we must
 * be careful to check for EOF conditions here. We set the page up correctly
 * for a written page which means we get ENOSPC checking when writing into
 * holes and correct delalloc and unwritten extent mapping on filesystems that
 * support these features.
 *
 * We are not allowed to take the i_mutex here so we have to play games to
 * protect against truncate races as the page could now be beyond EOF.  Because
 * truncate writes the inode size before removing pages, once we have the
 * page lock we can determine safely if the page is beyond EOF. If it is not
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 *
 * Direct callers of this function should protect against filesystem freezing
 * using sb_start_pagefault() - sb_end_pagefault() functions.
 */
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                         get_block_t get_block)
{
        struct folio *folio = page_folio(vmf->page);
        struct inode *inode = file_inode(vma->vm_file);
        unsigned long end;
        loff_t size;
        int ret;

        folio_lock(folio);
        size = i_size_read(inode);
        if ((folio->mapping != inode->i_mapping) ||
            (folio_pos(folio) >= size)) {
                /* We overload EFAULT to mean page got truncated */
                ret = -EFAULT;
                goto out_unlock;
        }

        end = folio_size(folio);
        /* folio is wholly or partially inside EOF */
        if (folio_pos(folio) + end > size)
                end = size - folio_pos(folio);

        ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
        if (unlikely(ret))
                goto out_unlock;

        __block_commit_write(folio, 0, end);

        folio_mark_dirty(folio);
        folio_wait_stable(folio);
        return 0;
out_unlock:
        folio_unlock(folio);
        return ret;
}
EXPORT_SYMBOL(block_page_mkwrite);

int block_truncate_page(struct address_space *mapping,
                        loff_t from, get_block_t *get_block)
{
        pgoff_t index = from >> PAGE_SHIFT;
        unsigned blocksize;
        sector_t iblock;
        size_t offset, length, pos;
        struct inode *inode = mapping->host;
        struct folio *folio;
        struct buffer_head *bh;
        int err = 0;

        blocksize = i_blocksize(inode);
        length = from & (blocksize - 1);

        /* Block boundary? Nothing to do */
        if (!length)
                return 0;

        length = blocksize - length;
        iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits;

        folio = filemap_grab_folio(mapping, index);
        if (IS_ERR(folio))
                return PTR_ERR(folio);

        bh = folio_buffers(folio);
        if (!bh)
                bh = create_empty_buffers(folio, blocksize, 0);

        /* Find the buffer that contains "offset" */
        offset = offset_in_folio(folio, from);
        pos = blocksize;
        while (offset >= pos) {
                bh = bh->b_this_page;
                iblock++;
                pos += blocksize;
        }

        if (!buffer_mapped(bh)) {
                WARN_ON(bh->b_size != blocksize);
                err = get_block(inode, iblock, bh, 0);
                if (err)
                        goto unlock;
                /* unmapped? It's a hole - nothing to do */
                if (!buffer_mapped(bh))
                        goto unlock;
        }

        /* Ok, it's mapped. Make sure it's up-to-date */
        if (folio_test_uptodate(folio))
                set_buffer_uptodate(bh);

        if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
                err = bh_read(bh, 0);
                /* Uhhuh. Read error. Complain and punt. */
                if (err < 0)
                        goto unlock;
        }

        folio_zero_range(folio, offset, length);
        mark_buffer_dirty(bh);

unlock:
        folio_unlock(folio);
        folio_put(folio);

        return err;
}
EXPORT_SYMBOL(block_truncate_page);

/*
 * The generic ->writepage function for buffer-backed address_spaces
 */
int block_write_full_folio(struct folio *folio, struct writeback_control *wbc,
                void *get_block)
{
        struct inode * const inode = folio->mapping->host;
        loff_t i_size = i_size_read(inode);

        /* Is the folio fully inside i_size? */
        if (folio_pos(folio) + folio_size(folio) <= i_size)
                return __block_write_full_folio(inode, folio, get_block, wbc);

        /* Is the folio fully outside i_size? (truncate in progress) */
        if (folio_pos(folio) >= i_size) {
                folio_unlock(folio);
                return 0; /* don't care */
        }

        /*
         * The folio straddles i_size.  It must be zeroed out on each and every
         * writepage invocation because it may be mmapped.  "A file is mapped
         * in multiples of the page size.  For a file that is not a multiple of
         * the page size, the remaining memory is zeroed when mapped, and
         * writes to that region are not written out to the file."
         */
        folio_zero_segment(folio, offset_in_folio(folio, i_size),
                        folio_size(folio));
        return __block_write_full_folio(inode, folio, get_block, wbc);
}

sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
                            get_block_t *get_block)
{
        struct inode *inode = mapping->host;
        struct buffer_head tmp = {
                .b_size = i_blocksize(inode),
        };

        get_block(inode, block, &tmp, 0);
        return tmp.b_blocknr;
}
EXPORT_SYMBOL(generic_block_bmap);

static void end_bio_bh_io_sync(struct bio *bio)
{
        struct buffer_head *bh = bio->bi_private;

        if (unlikely(bio_flagged(bio, BIO_QUIET)))
                set_bit(BH_Quiet, &bh->b_state);

        bh->b_end_io(bh, !bio->bi_status);
        bio_put(bio);
}

static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
                          enum rw_hint write_hint,
                          struct writeback_control *wbc)
{
        const enum req_op op = opf & REQ_OP_MASK;
        struct bio *bio;

        BUG_ON(!buffer_locked(bh));
        BUG_ON(!buffer_mapped(bh));
        BUG_ON(!bh->b_end_io);
        BUG_ON(buffer_delay(bh));
        BUG_ON(buffer_unwritten(bh));

        /*
         * Only clear out a write error when rewriting
         */
        if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
                clear_buffer_write_io_error(bh);

        if (buffer_meta(bh))
                opf |= REQ_META;
        if (buffer_prio(bh))
                opf |= REQ_PRIO;

        bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);

        fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);

        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio->bi_write_hint = write_hint;

        __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));

        bio->bi_end_io = end_bio_bh_io_sync;
        bio->bi_private = bh;

        /* Take care of bh's that straddle the end of the device */
        guard_bio_eod(bio);

        if (wbc) {
                wbc_init_bio(wbc, bio);
                wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
        }

        submit_bio(bio);
}

void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{
        submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
}
EXPORT_SYMBOL(submit_bh);

void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
        lock_buffer(bh);
        if (!test_clear_buffer_dirty(bh)) {
                unlock_buffer(bh);
                return;
        }
        bh->b_end_io = end_buffer_write_sync;
        get_bh(bh);
        submit_bh(REQ_OP_WRITE | op_flags, bh);
}
EXPORT_SYMBOL(write_dirty_buffer);

/*
 * For a data-integrity writeout, we need to wait upon any in-progress I/O
 * and then start new I/O and then wait upon it.  The caller must have a ref on
 * the buffer_head.
 */
int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
        WARN_ON(atomic_read(&bh->b_count) < 1);
        lock_buffer(bh);
        if (test_clear_buffer_dirty(bh)) {
                /*
                 * The bh should be mapped, but it might not be if the
                 * device was hot-removed. Not much we can do but fail the I/O.
                 */
                if (!buffer_mapped(bh)) {
                        unlock_buffer(bh);
                        return -EIO;
                }

                get_bh(bh);
                bh->b_end_io = end_buffer_write_sync;
                submit_bh(REQ_OP_WRITE | op_flags, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh))
                        return -EIO;
        } else {
                unlock_buffer(bh);
        }
        return 0;
}
EXPORT_SYMBOL(__sync_dirty_buffer);

int sync_dirty_buffer(struct buffer_head *bh)
{
        return __sync_dirty_buffer(bh, REQ_SYNC);
}
EXPORT_SYMBOL(sync_dirty_buffer);

/*
 * try_to_free_buffers() checks if all the buffers on this particular folio
 * are unused, and releases them if so.
 *
 * Exclusion against try_to_free_buffers may be obtained by either
 * locking the folio or by holding its mapping's i_private_lock.
 *
 * If the folio is dirty but all the buffers are clean then we need to
 * be sure to mark the folio clean as well.  This is because the folio
 * may be against a block device, and a later reattachment of buffers
 * to a dirty folio will set *all* buffers dirty.  Which would corrupt
 * filesystem data on the same device.
 *
 * The same applies to regular filesystem folios: if all the buffers are
 * clean then we set the folio clean and proceed.  To do that, we require
 * total exclusion from block_dirty_folio().  That is obtained with
 * i_private_lock.
 *
 * try_to_free_buffers() is non-blocking.
 */
static inline int buffer_busy(struct buffer_head *bh)
{
        return atomic_read(&bh->b_count) |
                (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}

static bool
drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
{
        struct buffer_head *head = folio_buffers(folio);
        struct buffer_head *bh;

        bh = head;
        do {
                if (buffer_busy(bh))
                        goto failed;
                bh = bh->b_this_page;
        } while (bh != head);

        do {
                struct buffer_head *next = bh->b_this_page;

                if (bh->b_assoc_map)
                        __remove_assoc_queue(bh);
                bh = next;
        } while (bh != head);
        *buffers_to_free = head;
        folio_detach_private(folio);
        return true;
failed:
        return false;
}

bool try_to_free_buffers(struct folio *folio)
{
        struct address_space * const mapping = folio->mapping;
        struct buffer_head *buffers_to_free = NULL;
        bool ret = 0;

        BUG_ON(!folio_test_locked(folio));
        if (folio_test_writeback(folio))
                return false;

        if (mapping == NULL) {                /* can this still happen? */
                ret = drop_buffers(folio, &buffers_to_free);
                goto out;
        }

        spin_lock(&mapping->i_private_lock);
        ret = drop_buffers(folio, &buffers_to_free);

        /*
         * If the filesystem writes its buffers by hand (eg ext3)
         * then we can have clean buffers against a dirty folio.  We
         * clean the folio here; otherwise the VM will never notice
         * that the filesystem did any IO at all.
         *
         * Also, during truncate, discard_buffer will have marked all
         * the folio's buffers clean.  We discover that here and clean
         * the folio also.
         *
         * i_private_lock must be held over this entire operation in order
         * to synchronise against block_dirty_folio and prevent the
         * dirty bit from being lost.
         */
        if (ret)
                folio_cancel_dirty(folio);
        spin_unlock(&mapping->i_private_lock);
out:
        if (buffers_to_free) {
                struct buffer_head *bh = buffers_to_free;

                do {
                        struct buffer_head *next = bh->b_this_page;
                        free_buffer_head(bh);
                        bh = next;
                } while (bh != buffers_to_free);
        }
        return ret;
}
EXPORT_SYMBOL(try_to_free_buffers);

/*
 * Buffer-head allocation
 */
static struct kmem_cache *bh_cachep __ro_after_init;

/*
 * Once the number of bh's in the machine exceeds this level, we start
 * stripping them in writeback.
 */
static unsigned long max_buffer_heads __ro_after_init;

int buffer_heads_over_limit;

struct bh_accounting {
        int nr;                        /* Number of live bh's */
        int ratelimit;                /* Limit cacheline bouncing */
};

static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};

static void recalc_bh_state(void)
{
        int i;
        int tot = 0;

        if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
                return;
        __this_cpu_write(bh_accounting.ratelimit, 0);
        for_each_online_cpu(i)
                tot += per_cpu(bh_accounting, i).nr;
        buffer_heads_over_limit = (tot > max_buffer_heads);
}

struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
{
        struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
        if (ret) {
                INIT_LIST_HEAD(&ret->b_assoc_buffers);
                spin_lock_init(&ret->b_uptodate_lock);
                preempt_disable();
                __this_cpu_inc(bh_accounting.nr);
                recalc_bh_state();
                preempt_enable();
        }
        return ret;
}
EXPORT_SYMBOL(alloc_buffer_head);

void free_buffer_head(struct buffer_head *bh)
{
        BUG_ON(!list_empty(&bh->b_assoc_buffers));
        kmem_cache_free(bh_cachep, bh);
        preempt_disable();
        __this_cpu_dec(bh_accounting.nr);
        recalc_bh_state();
        preempt_enable();
}
EXPORT_SYMBOL(free_buffer_head);

static int buffer_exit_cpu_dead(unsigned int cpu)
{
        int i;
        struct bh_lru *b = &per_cpu(bh_lrus, cpu);

        for (i = 0; i < BH_LRU_SIZE; i++) {
                brelse(b->bhs[i]);
                b->bhs[i] = NULL;
        }
        this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
        per_cpu(bh_accounting, cpu).nr = 0;
        return 0;
}

/**
 * bh_uptodate_or_lock - Test whether the buffer is uptodate
 * @bh: struct buffer_head
 *
 * Return true if the buffer is up-to-date and false,
 * with the buffer locked, if not.
 */
int bh_uptodate_or_lock(struct buffer_head *bh)
{
        if (!buffer_uptodate(bh)) {
                lock_buffer(bh);
                if (!buffer_uptodate(bh))
                        return 0;
                unlock_buffer(bh);
        }
        return 1;
}
EXPORT_SYMBOL(bh_uptodate_or_lock);

/**
 * __bh_read - Submit read for a locked buffer
 * @bh: struct buffer_head
 * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
 * @wait: wait until reading finish
 *
 * Returns zero on success or don't wait, and -EIO on error.
 */
int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
        int ret = 0;

        BUG_ON(!buffer_locked(bh));

        get_bh(bh);
        bh->b_end_io = end_buffer_read_sync;
        submit_bh(REQ_OP_READ | op_flags, bh);
        if (wait) {
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh))
                        ret = -EIO;
        }
        return ret;
}
EXPORT_SYMBOL(__bh_read);

/**
 * __bh_read_batch - Submit read for a batch of unlocked buffers
 * @nr: entry number of the buffer batch
 * @bhs: a batch of struct buffer_head
 * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
 * @force_lock: force to get a lock on the buffer if set, otherwise drops any
 *              buffer that cannot lock.
 *
 * Returns zero on success or don't wait, and -EIO on error.
 */
void __bh_read_batch(int nr, struct buffer_head *bhs[],
                     blk_opf_t op_flags, bool force_lock)
{
        int i;

        for (i = 0; i < nr; i++) {
                struct buffer_head *bh = bhs[i];

                if (buffer_uptodate(bh))
                        continue;

                if (force_lock)
                        lock_buffer(bh);
                else
                        if (!trylock_buffer(bh))
                                continue;

                if (buffer_uptodate(bh)) {
                        unlock_buffer(bh);
                        continue;
                }

                bh->b_end_io = end_buffer_read_sync;
                get_bh(bh);
                submit_bh(REQ_OP_READ | op_flags, bh);
        }
}
EXPORT_SYMBOL(__bh_read_batch);

void __init buffer_init(void)
{
        unsigned long nrpages;
        int ret;

        bh_cachep = KMEM_CACHE(buffer_head,
                                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC);
        /*
         * Limit the bh occupancy to 10% of ZONE_NORMAL
         */
        nrpages = (nr_free_buffer_pages() * 10) / 100;
        max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
        ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
                                        NULL, buffer_exit_cpu_dead);
        WARN_ON(ret < 0);
}


























   80 




   79 





   80 





   80 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 *
 * This is an implementation of the BLAKE2s hash and PRF functions.
 *
 * Information: https://blake2.net/
 *
 */

#include <crypto/internal/blake2s.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bug.h>

static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
        state->f[0] = -1;
}

void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
{
        const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;

        if (unlikely(!inlen))
                return;
        if (inlen > fill) {
                memcpy(state->buf + state->buflen, in, fill);
                blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
                state->buflen = 0;
                in += fill;
                inlen -= fill;
        }
        if (inlen > BLAKE2S_BLOCK_SIZE) {
                const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
                blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
                in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
                inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
        }
        memcpy(state->buf + state->buflen, in, inlen);
        state->buflen += inlen;
}
EXPORT_SYMBOL(blake2s_update);

void blake2s_final(struct blake2s_state *state, u8 *out)
{
        WARN_ON(IS_ENABLED(DEBUG) && !out);
        blake2s_set_lastblock(state);
        memset(state->buf + state->buflen, 0,
               BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
        blake2s_compress(state, state->buf, 1, state->buflen);
        cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
        memcpy(out, state->h, state->outlen);
        memzero_explicit(state, sizeof(*state));
}
EXPORT_SYMBOL(blake2s_final);

static int __init blake2s_mod_init(void)
{
        if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
            WARN_ON(!blake2s_selftest()))
                return -ENODEV;
        return 0;
}

module_init(blake2s_mod_init);
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");




















































































































































































































































































































































  248 





















































   89 

















































































































































































































  232 




















  234 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * kernfs.h - pseudo filesystem decoupled from vfs locking
 */

#ifndef __LINUX_KERNFS_H
#define __LINUX_KERNFS_H

#include <linux/err.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/lockdep.h>
#include <linux/rbtree.h>
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/uidgid.h>
#include <linux/wait.h>
#include <linux/rwsem.h>
#include <linux/cache.h>

struct file;
struct dentry;
struct iattr;
struct seq_file;
struct vm_area_struct;
struct vm_operations_struct;
struct super_block;
struct file_system_type;
struct poll_table_struct;
struct fs_context;

struct kernfs_fs_context;
struct kernfs_open_node;
struct kernfs_iattrs;

/*
 * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash
 * table of locks.
 * Having a small hash table would impact scalability, since
 * more and more kernfs_node objects will end up using same lock
 * and having a very large hash table would waste memory.
 *
 * At the moment size of hash table of locks is being set based on
 * the number of CPUs as follows:
 *
 * NR_CPU      NR_KERNFS_LOCK_BITS      NR_KERNFS_LOCKS
 *   1                  1                       2
 *  2-3                 2                       4
 *  4-7                 4                       16
 *  8-15                6                       64
 *  16-31               8                       256
 *  32 and more         10                      1024
 *
 * The above relation between NR_CPU and number of locks is based
 * on some internal experimentation which involved booting qemu
 * with different values of smp, performing some sysfs operations
 * on all CPUs and observing how increase in number of locks impacts
 * completion time of these sysfs operations on each CPU.
 */
#ifdef CONFIG_SMP
#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32)))
#else
#define NR_KERNFS_LOCK_BITS     1
#endif

#define NR_KERNFS_LOCKS     (1 << NR_KERNFS_LOCK_BITS)

/*
 * There's one kernfs_open_file for each open file and one kernfs_open_node
 * for each kernfs_node with one or more open files.
 *
 * filp->private_data points to seq_file whose ->private points to
 * kernfs_open_file.
 *
 * kernfs_open_files are chained at kernfs_open_node->files, which is
 * protected by kernfs_global_locks.open_file_mutex[i].
 *
 * To reduce possible contention in sysfs access, arising due to single
 * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node
 * object address as hash keys to get the index of these locks.
 *
 * Hashed mutexes are safe to use here because operations using these don't
 * rely on global exclusion.
 *
 * In future we intend to replace other global locks with hashed ones as well.
 * kernfs_global_locks acts as a holder for all such hash tables.
 */
struct kernfs_global_locks {
        struct mutex open_file_mutex[NR_KERNFS_LOCKS];
};

enum kernfs_node_type {
        KERNFS_DIR                = 0x0001,
        KERNFS_FILE                = 0x0002,
        KERNFS_LINK                = 0x0004,
};

#define KERNFS_TYPE_MASK                0x000f
#define KERNFS_FLAG_MASK                ~KERNFS_TYPE_MASK
#define KERNFS_MAX_USER_XATTRS                128
#define KERNFS_USER_XATTR_SIZE_LIMIT        (128 << 10)

enum kernfs_node_flag {
        KERNFS_ACTIVATED        = 0x0010,
        KERNFS_NS                = 0x0020,
        KERNFS_HAS_SEQ_SHOW        = 0x0040,
        KERNFS_HAS_MMAP                = 0x0080,
        KERNFS_LOCKDEP                = 0x0100,
        KERNFS_HIDDEN                = 0x0200,
        KERNFS_SUICIDAL                = 0x0400,
        KERNFS_SUICIDED                = 0x0800,
        KERNFS_EMPTY_DIR        = 0x1000,
        KERNFS_HAS_RELEASE        = 0x2000,
        KERNFS_REMOVING                = 0x4000,
};

/* @flags for kernfs_create_root() */
enum kernfs_root_flag {
        /*
         * kernfs_nodes are created in the deactivated state and invisible.
         * They require explicit kernfs_activate() to become visible.  This
         * can be used to make related nodes become visible atomically
         * after all nodes are created successfully.
         */
        KERNFS_ROOT_CREATE_DEACTIVATED                = 0x0001,

        /*
         * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2)
         * succeeds regardless of the RW permissions.  sysfs had an extra
         * layer of enforcement where open(2) fails with -EACCES regardless
         * of CAP_DAC_OVERRIDE if the permission doesn't have the
         * respective read or write access at all (none of S_IRUGO or
         * S_IWUGO) or the respective operation isn't implemented.  The
         * following flag enables that behavior.
         */
        KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK        = 0x0002,

        /*
         * The filesystem supports exportfs operation, so userspace can use
         * fhandle to access nodes of the fs.
         */
        KERNFS_ROOT_SUPPORT_EXPORTOP                = 0x0004,

        /*
         * Support user xattrs to be written to nodes rooted at this root.
         */
        KERNFS_ROOT_SUPPORT_USER_XATTR                = 0x0008,
};

/* type-specific structures for kernfs_node union members */
struct kernfs_elem_dir {
        unsigned long                subdirs;
        /* children rbtree starts here and goes through kn->rb */
        struct rb_root                children;

        /*
         * The kernfs hierarchy this directory belongs to.  This fits
         * better directly in kernfs_node but is here to save space.
         */
        struct kernfs_root        *root;
        /*
         * Monotonic revision counter, used to identify if a directory
         * node has changed during negative dentry revalidation.
         */
        unsigned long                rev;
};

struct kernfs_elem_symlink {
        struct kernfs_node        *target_kn;
};

struct kernfs_elem_attr {
        const struct kernfs_ops        *ops;
        struct kernfs_open_node __rcu        *open;
        loff_t                        size;
        struct kernfs_node        *notify_next;        /* for kernfs_notify() */
};

/*
 * kernfs_node - the building block of kernfs hierarchy.  Each and every
 * kernfs node is represented by single kernfs_node.  Most fields are
 * private to kernfs and shouldn't be accessed directly by kernfs users.
 *
 * As long as count reference is held, the kernfs_node itself is
 * accessible.  Dereferencing elem or any other outer entity requires
 * active reference.
 */
struct kernfs_node {
        atomic_t                count;
        atomic_t                active;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map        dep_map;
#endif
        /*
         * Use kernfs_get_parent() and kernfs_name/path() instead of
         * accessing the following two fields directly.  If the node is
         * never moved to a different parent, it is safe to access the
         * parent directly.
         */
        struct kernfs_node        *parent;
        const char                *name;

        struct rb_node                rb;

        const void                *ns;        /* namespace tag */
        unsigned int                hash;        /* ns + name hash */
        unsigned short                flags;
        umode_t                        mode;

        union {
                struct kernfs_elem_dir                dir;
                struct kernfs_elem_symlink        symlink;
                struct kernfs_elem_attr                attr;
        };

        /*
         * 64bit unique ID.  On 64bit ino setups, id is the ino.  On 32bit,
         * the low 32bits are ino and upper generation.
         */
        u64                        id;

        void                        *priv;
        struct kernfs_iattrs        *iattr;

        struct rcu_head                rcu;
};

/*
 * kernfs_syscall_ops may be specified on kernfs_create_root() to support
 * syscalls.  These optional callbacks are invoked on the matching syscalls
 * and can perform any kernfs operations which don't necessarily have to be
 * the exact operation requested.  An active reference is held for each
 * kernfs_node parameter.
 */
struct kernfs_syscall_ops {
        int (*show_options)(struct seq_file *sf, struct kernfs_root *root);

        int (*mkdir)(struct kernfs_node *parent, const char *name,
                     umode_t mode);
        int (*rmdir)(struct kernfs_node *kn);
        int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
                      const char *new_name);
        int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
                         struct kernfs_root *root);
};

struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root);

struct kernfs_open_file {
        /* published fields */
        struct kernfs_node        *kn;
        struct file                *file;
        struct seq_file                *seq_file;
        void                        *priv;

        /* private fields, do not use outside kernfs proper */
        struct mutex                mutex;
        struct mutex                prealloc_mutex;
        int                        event;
        struct list_head        list;
        char                        *prealloc_buf;

        size_t                        atomic_write_len;
        bool                        mmapped:1;
        bool                        released:1;
        const struct vm_operations_struct *vm_ops;
};

struct kernfs_ops {
        /*
         * Optional open/release methods.  Both are called with
         * @of->seq_file populated.
         */
        int (*open)(struct kernfs_open_file *of);
        void (*release)(struct kernfs_open_file *of);

        /*
         * Read is handled by either seq_file or raw_read().
         *
         * If seq_show() is present, seq_file path is active.  Other seq
         * operations are optional and if not implemented, the behavior is
         * equivalent to single_open().  @sf->private points to the
         * associated kernfs_open_file.
         *
         * read() is bounced through kernel buffer and a read larger than
         * PAGE_SIZE results in partial operation of PAGE_SIZE.
         */
        int (*seq_show)(struct seq_file *sf, void *v);

        void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
        void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
        void (*seq_stop)(struct seq_file *sf, void *v);

        ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
                        loff_t off);

        /*
         * write() is bounced through kernel buffer.  If atomic_write_len
         * is not set, a write larger than PAGE_SIZE results in partial
         * operations of PAGE_SIZE chunks.  If atomic_write_len is set,
         * writes upto the specified size are executed atomically but
         * larger ones are rejected with -E2BIG.
         */
        size_t atomic_write_len;
        /*
         * "prealloc" causes a buffer to be allocated at open for
         * all read/write requests.  As ->seq_show uses seq_read()
         * which does its own allocation, it is incompatible with
         * ->prealloc.  Provide ->read and ->write with ->prealloc.
         */
        bool prealloc;
        ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
                         loff_t off);

        __poll_t (*poll)(struct kernfs_open_file *of,
                         struct poll_table_struct *pt);

        int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
        loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence);
};

/*
 * The kernfs superblock creation/mount parameter context.
 */
struct kernfs_fs_context {
        struct kernfs_root        *root;                /* Root of the hierarchy being mounted */
        void                        *ns_tag;        /* Namespace tag of the mount (or NULL) */
        unsigned long                magic;                /* File system specific magic number */

        /* The following are set/used by kernfs_mount() */
        bool                        new_sb_created;        /* Set to T if we allocated a new sb */
};

#ifdef CONFIG_KERNFS

static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
{
        return kn->flags & KERNFS_TYPE_MASK;
}

static inline ino_t kernfs_id_ino(u64 id)
{
        /* id is ino if ino_t is 64bit; otherwise, low 32bits */
        if (sizeof(ino_t) >= sizeof(u64))
                return id;
        else
                return (u32)id;
}

static inline u32 kernfs_id_gen(u64 id)
{
        /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */
        if (sizeof(ino_t) >= sizeof(u64))
                return 1;
        else
                return id >> 32;
}

static inline ino_t kernfs_ino(struct kernfs_node *kn)
{
        return kernfs_id_ino(kn->id);
}

static inline ino_t kernfs_gen(struct kernfs_node *kn)
{
        return kernfs_id_gen(kn->id);
}

/**
 * kernfs_enable_ns - enable namespace under a directory
 * @kn: directory of interest, should be empty
 *
 * This is to be called right after @kn is created to enable namespace
 * under it.  All children of @kn must have non-NULL namespace tags and
 * only the ones which match the super_block's tag will be visible.
 */
static inline void kernfs_enable_ns(struct kernfs_node *kn)
{
        WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
        WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children));
        kn->flags |= KERNFS_NS;
}

/**
 * kernfs_ns_enabled - test whether namespace is enabled
 * @kn: the node to test
 *
 * Test whether namespace filtering is enabled for the children of @ns.
 */
static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
{
        return kn->flags & KERNFS_NS;
}

int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
                          char *buf, size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
void pr_cont_kernfs_path(struct kernfs_node *kn);
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
                                           const char *name, const void *ns);
struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
                                           const char *path, const void *ns);
void kernfs_get(struct kernfs_node *kn);
void kernfs_put(struct kernfs_node *kn);

struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);

struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
                                  struct super_block *sb);
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
                                       unsigned int flags, void *priv);
void kernfs_destroy_root(struct kernfs_root *root);

struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
                                         const char *name, umode_t mode,
                                         kuid_t uid, kgid_t gid,
                                         void *priv, const void *ns);
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
                                            const char *name);
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
                                         const char *name, umode_t mode,
                                         kuid_t uid, kgid_t gid,
                                         loff_t size,
                                         const struct kernfs_ops *ops,
                                         void *priv, const void *ns,
                                         struct lock_class_key *key);
struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
                                       const char *name,
                                       struct kernfs_node *target);
void kernfs_activate(struct kernfs_node *kn);
void kernfs_show(struct kernfs_node *kn, bool show);
void kernfs_remove(struct kernfs_node *kn);
void kernfs_break_active_protection(struct kernfs_node *kn);
void kernfs_unbreak_active_protection(struct kernfs_node *kn);
bool kernfs_remove_self(struct kernfs_node *kn);
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
                             const void *ns);
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
                     const char *new_name, const void *new_ns);
int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
__poll_t kernfs_generic_poll(struct kernfs_open_file *of,
                             struct poll_table_struct *pt);
void kernfs_notify(struct kernfs_node *kn);

int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
                     void *value, size_t size);
int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
                     const void *value, size_t size, int flags);

const void *kernfs_super_ns(struct super_block *sb);
int kernfs_get_tree(struct fs_context *fc);
void kernfs_free_fs_context(struct fs_context *fc);
void kernfs_kill_sb(struct super_block *sb);

void kernfs_init(void);

struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
                                                   u64 id);
#else        /* CONFIG_KERNFS */

static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
{ return 0; }        /* whatever */

static inline void kernfs_enable_ns(struct kernfs_node *kn) { }

static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
{ return false; }

static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{ return -ENOSYS; }

static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
                                        struct kernfs_node *kn,
                                        char *buf, size_t buflen)
{ return -ENOSYS; }

static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }

static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
{ return NULL; }

static inline struct kernfs_node *
kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
                       const void *ns)
{ return NULL; }
static inline struct kernfs_node *
kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path,
                       const void *ns)
{ return NULL; }

static inline void kernfs_get(struct kernfs_node *kn) { }
static inline void kernfs_put(struct kernfs_node *kn) { }

static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{ return NULL; }

static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
{ return NULL; }

static inline struct inode *
kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
{ return NULL; }

static inline struct kernfs_root *
kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
                   void *priv)
{ return ERR_PTR(-ENOSYS); }

static inline void kernfs_destroy_root(struct kernfs_root *root) { }

static inline struct kernfs_node *
kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
                     umode_t mode, kuid_t uid, kgid_t gid,
                     void *priv, const void *ns)
{ return ERR_PTR(-ENOSYS); }

static inline struct kernfs_node *
__kernfs_create_file(struct kernfs_node *parent, const char *name,
                     umode_t mode, kuid_t uid, kgid_t gid,
                     loff_t size, const struct kernfs_ops *ops,
                     void *priv, const void *ns, struct lock_class_key *key)
{ return ERR_PTR(-ENOSYS); }

static inline struct kernfs_node *
kernfs_create_link(struct kernfs_node *parent, const char *name,
                   struct kernfs_node *target)
{ return ERR_PTR(-ENOSYS); }

static inline void kernfs_activate(struct kernfs_node *kn) { }

static inline void kernfs_remove(struct kernfs_node *kn) { }

static inline bool kernfs_remove_self(struct kernfs_node *kn)
{ return false; }

static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
                                           const char *name, const void *ns)
{ return -ENOSYS; }

static inline int kernfs_rename_ns(struct kernfs_node *kn,
                                   struct kernfs_node *new_parent,
                                   const char *new_name, const void *new_ns)
{ return -ENOSYS; }

static inline int kernfs_setattr(struct kernfs_node *kn,
                                 const struct iattr *iattr)
{ return -ENOSYS; }

static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
                                           struct poll_table_struct *pt)
{ return -ENOSYS; }

static inline void kernfs_notify(struct kernfs_node *kn) { }

static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
                                   void *value, size_t size)
{ return -ENOSYS; }

static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
                                   const void *value, size_t size, int flags)
{ return -ENOSYS; }

static inline const void *kernfs_super_ns(struct super_block *sb)
{ return NULL; }

static inline int kernfs_get_tree(struct fs_context *fc)
{ return -ENOSYS; }

static inline void kernfs_free_fs_context(struct fs_context *fc) { }

static inline void kernfs_kill_sb(struct super_block *sb) { }

static inline void kernfs_init(void) { }

#endif        /* CONFIG_KERNFS */

/**
 * kernfs_path - build full path of a given node
 * @kn: kernfs_node of interest
 * @buf: buffer to copy @kn's name into
 * @buflen: size of @buf
 *
 * If @kn is NULL result will be "(null)".
 *
 * Returns the length of the full path.  If the full length is equal to or
 * greater than @buflen, @buf contains the truncated path with the trailing
 * '\0'.  On error, -errno is returned.
 */
static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
{
        return kernfs_path_from_node(kn, NULL, buf, buflen);
}

static inline struct kernfs_node *
kernfs_find_and_get(struct kernfs_node *kn, const char *name)
{
        return kernfs_find_and_get_ns(kn, name, NULL);
}

static inline struct kernfs_node *
kernfs_walk_and_get(struct kernfs_node *kn, const char *path)
{
        return kernfs_walk_and_get_ns(kn, path, NULL);
}

static inline struct kernfs_node *
kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
                  void *priv)
{
        return kernfs_create_dir_ns(parent, name, mode,
                                    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
                                    priv, NULL);
}

static inline int kernfs_remove_by_name(struct kernfs_node *parent,
                                        const char *name)
{
        return kernfs_remove_by_name_ns(parent, name, NULL);
}

static inline int kernfs_rename(struct kernfs_node *kn,
                                struct kernfs_node *new_parent,
                                const char *new_name)
{
        return kernfs_rename_ns(kn, new_parent, new_name, NULL);
}

#endif        /* __LINUX_KERNFS_H */























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 


































    4 


    4 


    4 

    4 


    4 
    4 



    4 




    4 

    4 
    4 
    4 


























































































































































































































































































































































































































































































































































    4 




    4 







    4 
    4 

    4 





















































































































































































































































































































































































































































































































































































































































































































































    4 
    4 

    4 




    4 
    4 



    4 























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
// SPDX-License-Identifier: GPL-2.0
/*
 * Block multiqueue core code
 *
 * Copyright (C) 2013-2014 Jens Axboe
 * Copyright (C) 2013-2014 Christoph Hellwig
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
#include <linux/kmemleak.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/llist.h>
#include <linux/cpu.h>
#include <linux/cache.h>
#include <linux/sched/topology.h>
#include <linux/sched/signal.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/prefetch.h>
#include <linux/blk-crypto.h>
#include <linux/part_stat.h>

#include <trace/events/block.h>

#include <linux/t10-pi.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-pm.h"
#include "blk-stat.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"

static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);

static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq,
                blk_insert_t flags);
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                struct list_head *list);
static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
                         struct io_comp_batch *iob, unsigned int flags);

/*
 * Check if any of the ctx, dispatch list or elevator
 * have pending work in this hardware queue.
 */
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
{
        return !list_empty_careful(&hctx->dispatch) ||
                sbitmap_any_bit_set(&hctx->ctx_map) ||
                        blk_mq_sched_has_work(hctx);
}

/*
 * Mark this ctx as having pending work in this hardware queue
 */
static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
                                     struct blk_mq_ctx *ctx)
{
        const int bit = ctx->index_hw[hctx->type];

        if (!sbitmap_test_bit(&hctx->ctx_map, bit))
                sbitmap_set_bit(&hctx->ctx_map, bit);
}

static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
                                      struct blk_mq_ctx *ctx)
{
        const int bit = ctx->index_hw[hctx->type];

        sbitmap_clear_bit(&hctx->ctx_map, bit);
}

struct mq_inflight {
        struct block_device *part;
        unsigned int inflight[2];
};

static bool blk_mq_check_inflight(struct request *rq, void *priv)
{
        struct mq_inflight *mi = priv;

        if (rq->part && blk_do_io_stat(rq) &&
            (!mi->part->bd_partno || rq->part == mi->part) &&
            blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
                mi->inflight[rq_data_dir(rq)]++;

        return true;
}

unsigned int blk_mq_in_flight(struct request_queue *q,
                struct block_device *part)
{
        struct mq_inflight mi = { .part = part };

        blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);

        return mi.inflight[0] + mi.inflight[1];
}

void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
                unsigned int inflight[2])
{
        struct mq_inflight mi = { .part = part };

        blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
        inflight[0] = mi.inflight[0];
        inflight[1] = mi.inflight[1];
}

void blk_freeze_queue_start(struct request_queue *q)
{
        mutex_lock(&q->mq_freeze_lock);
        if (++q->mq_freeze_depth == 1) {
                percpu_ref_kill(&q->q_usage_counter);
                mutex_unlock(&q->mq_freeze_lock);
                if (queue_is_mq(q))
                        blk_mq_run_hw_queues(q, false);
        } else {
                mutex_unlock(&q->mq_freeze_lock);
        }
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);

void blk_mq_freeze_queue_wait(struct request_queue *q)
{
        wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);

int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
                                     unsigned long timeout)
{
        return wait_event_timeout(q->mq_freeze_wq,
                                        percpu_ref_is_zero(&q->q_usage_counter),
                                        timeout);
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);

/*
 * Guarantee no request is in use, so we can change any data structure of
 * the queue afterward.
 */
void blk_freeze_queue(struct request_queue *q)
{
        /*
         * In the !blk_mq case we are only calling this to kill the
         * q_usage_counter, otherwise this increases the freeze depth
         * and waits for it to return to zero.  For this reason there is
         * no blk_unfreeze_queue(), and blk_freeze_queue() is not
         * exported to drivers as the only user for unfreeze is blk_mq.
         */
        blk_freeze_queue_start(q);
        blk_mq_freeze_queue_wait(q);
}

void blk_mq_freeze_queue(struct request_queue *q)
{
        /*
         * ...just an alias to keep freeze and unfreeze actions balanced
         * in the blk_mq_* namespace
         */
        blk_freeze_queue(q);
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
        mutex_lock(&q->mq_freeze_lock);
        if (force_atomic)
                q->q_usage_counter.data->force_atomic = true;
        q->mq_freeze_depth--;
        WARN_ON_ONCE(q->mq_freeze_depth < 0);
        if (!q->mq_freeze_depth) {
                percpu_ref_resurrect(&q->q_usage_counter);
                wake_up_all(&q->mq_freeze_wq);
        }
        mutex_unlock(&q->mq_freeze_lock);
}

void blk_mq_unfreeze_queue(struct request_queue *q)
{
        __blk_mq_unfreeze_queue(q, false);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);

/*
 * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
 * mpt3sas driver such that this function can be removed.
 */
void blk_mq_quiesce_queue_nowait(struct request_queue *q)
{
        unsigned long flags;

        spin_lock_irqsave(&q->queue_lock, flags);
        if (!q->quiesce_depth++)
                blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
        spin_unlock_irqrestore(&q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);

/**
 * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
 * @set: tag_set to wait on
 *
 * Note: it is driver's responsibility for making sure that quiesce has
 * been started on or more of the request_queues of the tag_set.  This
 * function only waits for the quiesce on those request_queues that had
 * the quiesce flag set using blk_mq_quiesce_queue_nowait.
 */
void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set)
{
        if (set->flags & BLK_MQ_F_BLOCKING)
                synchronize_srcu(set->srcu);
        else
                synchronize_rcu();
}
EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);

/**
 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
 * @q: request queue.
 *
 * Note: this function does not prevent that the struct request end_io()
 * callback function is invoked. Once this function is returned, we make
 * sure no dispatch can happen until the queue is unquiesced via
 * blk_mq_unquiesce_queue().
 */
void blk_mq_quiesce_queue(struct request_queue *q)
{
        blk_mq_quiesce_queue_nowait(q);
        /* nothing to wait for non-mq queues */
        if (queue_is_mq(q))
                blk_mq_wait_quiesce_done(q->tag_set);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);

/*
 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
 * @q: request queue.
 *
 * This function recovers queue into the state before quiescing
 * which is done by blk_mq_quiesce_queue.
 */
void blk_mq_unquiesce_queue(struct request_queue *q)
{
        unsigned long flags;
        bool run_queue = false;

        spin_lock_irqsave(&q->queue_lock, flags);
        if (WARN_ON_ONCE(q->quiesce_depth <= 0)) {
                ;
        } else if (!--q->quiesce_depth) {
                blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
                run_queue = true;
        }
        spin_unlock_irqrestore(&q->queue_lock, flags);

        /* dispatch requests which are inserted during quiescing */
        if (run_queue)
                blk_mq_run_hw_queues(q, true);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);

void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
{
        struct request_queue *q;

        mutex_lock(&set->tag_list_lock);
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                if (!blk_queue_skip_tagset_quiesce(q))
                        blk_mq_quiesce_queue_nowait(q);
        }
        blk_mq_wait_quiesce_done(set);
        mutex_unlock(&set->tag_list_lock);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);

void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set)
{
        struct request_queue *q;

        mutex_lock(&set->tag_list_lock);
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                if (!blk_queue_skip_tagset_quiesce(q))
                        blk_mq_unquiesce_queue(q);
        }
        mutex_unlock(&set->tag_list_lock);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset);

void blk_mq_wake_waiters(struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i)
                if (blk_mq_hw_queue_mapped(hctx))
                        blk_mq_tag_wakeup_all(hctx->tags, true);
}

void blk_rq_init(struct request_queue *q, struct request *rq)
{
        memset(rq, 0, sizeof(*rq));

        INIT_LIST_HEAD(&rq->queuelist);
        rq->q = q;
        rq->__sector = (sector_t) -1;
        INIT_HLIST_NODE(&rq->hash);
        RB_CLEAR_NODE(&rq->rb_node);
        rq->tag = BLK_MQ_NO_TAG;
        rq->internal_tag = BLK_MQ_NO_TAG;
        rq->start_time_ns = blk_time_get_ns();
        rq->part = NULL;
        blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);

/* Set start and alloc time when the allocated request is actually used */
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
{
        if (blk_mq_need_time_stamp(rq))
                rq->start_time_ns = blk_time_get_ns();
        else
                rq->start_time_ns = 0;

#ifdef CONFIG_BLK_RQ_ALLOC_TIME
        if (blk_queue_rq_alloc_time(rq->q))
                rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns;
        else
                rq->alloc_time_ns = 0;
#endif
}

static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
                struct blk_mq_tags *tags, unsigned int tag)
{
        struct blk_mq_ctx *ctx = data->ctx;
        struct blk_mq_hw_ctx *hctx = data->hctx;
        struct request_queue *q = data->q;
        struct request *rq = tags->static_rqs[tag];

        rq->q = q;
        rq->mq_ctx = ctx;
        rq->mq_hctx = hctx;
        rq->cmd_flags = data->cmd_flags;

        if (data->flags & BLK_MQ_REQ_PM)
                data->rq_flags |= RQF_PM;
        if (blk_queue_io_stat(q))
                data->rq_flags |= RQF_IO_STAT;
        rq->rq_flags = data->rq_flags;

        if (data->rq_flags & RQF_SCHED_TAGS) {
                rq->tag = BLK_MQ_NO_TAG;
                rq->internal_tag = tag;
        } else {
                rq->tag = tag;
                rq->internal_tag = BLK_MQ_NO_TAG;
        }
        rq->timeout = 0;

        rq->part = NULL;
        rq->io_start_time_ns = 0;
        rq->stats_sectors = 0;
        rq->nr_phys_segments = 0;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
        rq->nr_integrity_segments = 0;
#endif
        rq->end_io = NULL;
        rq->end_io_data = NULL;

        blk_crypto_rq_set_defaults(rq);
        INIT_LIST_HEAD(&rq->queuelist);
        /* tag was already set */
        WRITE_ONCE(rq->deadline, 0);
        req_ref_set(rq, 1);

        if (rq->rq_flags & RQF_USE_SCHED) {
                struct elevator_queue *e = data->q->elevator;

                INIT_HLIST_NODE(&rq->hash);
                RB_CLEAR_NODE(&rq->rb_node);

                if (e->type->ops.prepare_request)
                        e->type->ops.prepare_request(rq);
        }

        return rq;
}

static inline struct request *
__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
{
        unsigned int tag, tag_offset;
        struct blk_mq_tags *tags;
        struct request *rq;
        unsigned long tag_mask;
        int i, nr = 0;

        tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
        if (unlikely(!tag_mask))
                return NULL;

        tags = blk_mq_tags_from_data(data);
        for (i = 0; tag_mask; i++) {
                if (!(tag_mask & (1UL << i)))
                        continue;
                tag = tag_offset + i;
                prefetch(tags->static_rqs[tag]);
                tag_mask &= ~(1UL << i);
                rq = blk_mq_rq_ctx_init(data, tags, tag);
                rq_list_add(data->cached_rq, rq);
                nr++;
        }
        if (!(data->rq_flags & RQF_SCHED_TAGS))
                blk_mq_add_active_requests(data->hctx, nr);
        /* caller already holds a reference, add for remainder */
        percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
        data->nr_tags -= nr;

        return rq_list_pop(data->cached_rq);
}

static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
{
        struct request_queue *q = data->q;
        u64 alloc_time_ns = 0;
        struct request *rq;
        unsigned int tag;

        /* alloc_time includes depth and tag waits */
        if (blk_queue_rq_alloc_time(q))
                alloc_time_ns = blk_time_get_ns();

        if (data->cmd_flags & REQ_NOWAIT)
                data->flags |= BLK_MQ_REQ_NOWAIT;

        if (q->elevator) {
                /*
                 * All requests use scheduler tags when an I/O scheduler is
                 * enabled for the queue.
                 */
                data->rq_flags |= RQF_SCHED_TAGS;

                /*
                 * Flush/passthrough requests are special and go directly to the
                 * dispatch list.
                 */
                if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH &&
                    !blk_op_is_passthrough(data->cmd_flags)) {
                        struct elevator_mq_ops *ops = &q->elevator->type->ops;

                        WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);

                        data->rq_flags |= RQF_USE_SCHED;
                        if (ops->limit_depth)
                                ops->limit_depth(data->cmd_flags, data);
                }
        }

retry:
        data->ctx = blk_mq_get_ctx(q);
        data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
        if (!(data->rq_flags & RQF_SCHED_TAGS))
                blk_mq_tag_busy(data->hctx);

        if (data->flags & BLK_MQ_REQ_RESERVED)
                data->rq_flags |= RQF_RESV;

        /*
         * Try batched alloc if we want more than 1 tag.
         */
        if (data->nr_tags > 1) {
                rq = __blk_mq_alloc_requests_batch(data);
                if (rq) {
                        blk_mq_rq_time_init(rq, alloc_time_ns);
                        return rq;
                }
                data->nr_tags = 1;
        }

        /*
         * Waiting allocations only fail because of an inactive hctx.  In that
         * case just retry the hctx assignment and tag allocation as CPU hotplug
         * should have migrated us to an online CPU by now.
         */
        tag = blk_mq_get_tag(data);
        if (tag == BLK_MQ_NO_TAG) {
                if (data->flags & BLK_MQ_REQ_NOWAIT)
                        return NULL;
                /*
                 * Give up the CPU and sleep for a random short time to
                 * ensure that thread using a realtime scheduling class
                 * are migrated off the CPU, and thus off the hctx that
                 * is going away.
                 */
                msleep(3);
                goto retry;
        }

        if (!(data->rq_flags & RQF_SCHED_TAGS))
                blk_mq_inc_active_requests(data->hctx);
        rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag);
        blk_mq_rq_time_init(rq, alloc_time_ns);
        return rq;
}

static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
                                            struct blk_plug *plug,
                                            blk_opf_t opf,
                                            blk_mq_req_flags_t flags)
{
        struct blk_mq_alloc_data data = {
                .q                = q,
                .flags                = flags,
                .cmd_flags        = opf,
                .nr_tags        = plug->nr_ios,
                .cached_rq        = &plug->cached_rq,
        };
        struct request *rq;

        if (blk_queue_enter(q, flags))
                return NULL;

        plug->nr_ios = 1;

        rq = __blk_mq_alloc_requests(&data);
        if (unlikely(!rq))
                blk_queue_exit(q);
        return rq;
}

static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
                                                   blk_opf_t opf,
                                                   blk_mq_req_flags_t flags)
{
        struct blk_plug *plug = current->plug;
        struct request *rq;

        if (!plug)
                return NULL;

        if (rq_list_empty(plug->cached_rq)) {
                if (plug->nr_ios == 1)
                        return NULL;
                rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
                if (!rq)
                        return NULL;
        } else {
                rq = rq_list_peek(&plug->cached_rq);
                if (!rq || rq->q != q)
                        return NULL;

                if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type)
                        return NULL;
                if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
                        return NULL;

                plug->cached_rq = rq_list_next(rq);
                blk_mq_rq_time_init(rq, 0);
        }

        rq->cmd_flags = opf;
        INIT_LIST_HEAD(&rq->queuelist);
        return rq;
}

struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
                blk_mq_req_flags_t flags)
{
        struct request *rq;

        rq = blk_mq_alloc_cached_request(q, opf, flags);
        if (!rq) {
                struct blk_mq_alloc_data data = {
                        .q                = q,
                        .flags                = flags,
                        .cmd_flags        = opf,
                        .nr_tags        = 1,
                };
                int ret;

                ret = blk_queue_enter(q, flags);
                if (ret)
                        return ERR_PTR(ret);

                rq = __blk_mq_alloc_requests(&data);
                if (!rq)
                        goto out_queue_exit;
        }
        rq->__data_len = 0;
        rq->__sector = (sector_t) -1;
        rq->bio = rq->biotail = NULL;
        return rq;
out_queue_exit:
        blk_queue_exit(q);
        return ERR_PTR(-EWOULDBLOCK);
}
EXPORT_SYMBOL(blk_mq_alloc_request);

struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
        blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx)
{
        struct blk_mq_alloc_data data = {
                .q                = q,
                .flags                = flags,
                .cmd_flags        = opf,
                .nr_tags        = 1,
        };
        u64 alloc_time_ns = 0;
        struct request *rq;
        unsigned int cpu;
        unsigned int tag;
        int ret;

        /* alloc_time includes depth and tag waits */
        if (blk_queue_rq_alloc_time(q))
                alloc_time_ns = blk_time_get_ns();

        /*
         * If the tag allocator sleeps we could get an allocation for a
         * different hardware context.  No need to complicate the low level
         * allocator for this for the rare use case of a command tied to
         * a specific queue.
         */
        if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)) ||
            WARN_ON_ONCE(!(flags & BLK_MQ_REQ_RESERVED)))
                return ERR_PTR(-EINVAL);

        if (hctx_idx >= q->nr_hw_queues)
                return ERR_PTR(-EIO);

        ret = blk_queue_enter(q, flags);
        if (ret)
                return ERR_PTR(ret);

        /*
         * Check if the hardware context is actually mapped to anything.
         * If not tell the caller that it should skip this queue.
         */
        ret = -EXDEV;
        data.hctx = xa_load(&q->hctx_table, hctx_idx);
        if (!blk_mq_hw_queue_mapped(data.hctx))
                goto out_queue_exit;
        cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
        if (cpu >= nr_cpu_ids)
                goto out_queue_exit;
        data.ctx = __blk_mq_get_ctx(q, cpu);

        if (q->elevator)
                data.rq_flags |= RQF_SCHED_TAGS;
        else
                blk_mq_tag_busy(data.hctx);

        if (flags & BLK_MQ_REQ_RESERVED)
                data.rq_flags |= RQF_RESV;

        ret = -EWOULDBLOCK;
        tag = blk_mq_get_tag(&data);
        if (tag == BLK_MQ_NO_TAG)
                goto out_queue_exit;
        if (!(data.rq_flags & RQF_SCHED_TAGS))
                blk_mq_inc_active_requests(data.hctx);
        rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
        blk_mq_rq_time_init(rq, alloc_time_ns);
        rq->__data_len = 0;
        rq->__sector = (sector_t) -1;
        rq->bio = rq->biotail = NULL;
        return rq;

out_queue_exit:
        blk_queue_exit(q);
        return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);

static void blk_mq_finish_request(struct request *rq)
{
        struct request_queue *q = rq->q;

        if (rq->rq_flags & RQF_USE_SCHED) {
                q->elevator->type->ops.finish_request(rq);
                /*
                 * For postflush request that may need to be
                 * completed twice, we should clear this flag
                 * to avoid double finish_request() on the rq.
                 */
                rq->rq_flags &= ~RQF_USE_SCHED;
        }
}

static void __blk_mq_free_request(struct request *rq)
{
        struct request_queue *q = rq->q;
        struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        const int sched_tag = rq->internal_tag;

        blk_crypto_free_request(rq);
        blk_pm_mark_last_busy(rq);
        rq->mq_hctx = NULL;

        if (rq->tag != BLK_MQ_NO_TAG) {
                blk_mq_dec_active_requests(hctx);
                blk_mq_put_tag(hctx->tags, ctx, rq->tag);
        }
        if (sched_tag != BLK_MQ_NO_TAG)
                blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
        blk_mq_sched_restart(hctx);
        blk_queue_exit(q);
}

void blk_mq_free_request(struct request *rq)
{
        struct request_queue *q = rq->q;

        blk_mq_finish_request(rq);

        if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
                laptop_io_completion(q->disk->bdi);

        rq_qos_done(q, rq);

        WRITE_ONCE(rq->state, MQ_RQ_IDLE);
        if (req_ref_put_and_test(rq))
                __blk_mq_free_request(rq);
}
EXPORT_SYMBOL_GPL(blk_mq_free_request);

void blk_mq_free_plug_rqs(struct blk_plug *plug)
{
        struct request *rq;

        while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
                blk_mq_free_request(rq);
}

void blk_dump_rq_flags(struct request *rq, char *msg)
{
        printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
                rq->q->disk ? rq->q->disk->disk_name : "?",
                (__force unsigned long long) rq->cmd_flags);

        printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
               (unsigned long long)blk_rq_pos(rq),
               blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
        printk(KERN_INFO "  bio %p, biotail %p, len %u\n",
               rq->bio, rq->biotail, blk_rq_bytes(rq));
}
EXPORT_SYMBOL(blk_dump_rq_flags);

static void req_bio_endio(struct request *rq, struct bio *bio,
                          unsigned int nbytes, blk_status_t error)
{
        if (unlikely(error)) {
                bio->bi_status = error;
        } else if (req_op(rq) == REQ_OP_ZONE_APPEND) {
                /*
                 * Partial zone append completions cannot be supported as the
                 * BIO fragments may end up not being written sequentially.
                 */
                if (bio->bi_iter.bi_size != nbytes)
                        bio->bi_status = BLK_STS_IOERR;
                else
                        bio->bi_iter.bi_sector = rq->__sector;
        }

        bio_advance(bio, nbytes);

        if (unlikely(rq->rq_flags & RQF_QUIET))
                bio_set_flag(bio, BIO_QUIET);
        /* don't actually finish bio if it's part of flush sequence */
        if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
                bio_endio(bio);
}

static void blk_account_io_completion(struct request *req, unsigned int bytes)
{
        if (req->part && blk_do_io_stat(req)) {
                const int sgrp = op_stat_group(req_op(req));

                part_stat_lock();
                part_stat_add(req->part, sectors[sgrp], bytes >> 9);
                part_stat_unlock();
        }
}

static void blk_print_req_error(struct request *req, blk_status_t status)
{
        printk_ratelimited(KERN_ERR
                "%s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
                "phys_seg %u prio class %u\n",
                blk_status_to_str(status),
                req->q->disk ? req->q->disk->disk_name : "?",
                blk_rq_pos(req), (__force u32)req_op(req),
                blk_op_str(req_op(req)),
                (__force u32)(req->cmd_flags & ~REQ_OP_MASK),
                req->nr_phys_segments,
                IOPRIO_PRIO_CLASS(req->ioprio));
}

/*
 * Fully end IO on a request. Does not support partial completions, or
 * errors.
 */
static void blk_complete_request(struct request *req)
{
        const bool is_flush = (req->rq_flags & RQF_FLUSH_SEQ) != 0;
        int total_bytes = blk_rq_bytes(req);
        struct bio *bio = req->bio;

        trace_block_rq_complete(req, BLK_STS_OK, total_bytes);

        if (!bio)
                return;

#ifdef CONFIG_BLK_DEV_INTEGRITY
        if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ)
                req->q->integrity.profile->complete_fn(req, total_bytes);
#endif

        /*
         * Upper layers may call blk_crypto_evict_key() anytime after the last
         * bio_endio().  Therefore, the keyslot must be released before that.
         */
        blk_crypto_rq_put_keyslot(req);

        blk_account_io_completion(req, total_bytes);

        do {
                struct bio *next = bio->bi_next;

                /* Completion has already been traced */
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);

                if (req_op(req) == REQ_OP_ZONE_APPEND)
                        bio->bi_iter.bi_sector = req->__sector;

                if (!is_flush)
                        bio_endio(bio);
                bio = next;
        } while (bio);

        /*
         * Reset counters so that the request stacking driver
         * can find how many bytes remain in the request
         * later.
         */
        if (!req->end_io) {
                req->bio = NULL;
                req->__data_len = 0;
        }
}

/**
 * blk_update_request - Complete multiple bytes without completing the request
 * @req:      the request being processed
 * @error:    block status code
 * @nr_bytes: number of bytes to complete for @req
 *
 * Description:
 *     Ends I/O on a number of bytes attached to @req, but doesn't complete
 *     the request structure even if @req doesn't have leftover.
 *     If @req has leftover, sets it up for the next range of segments.
 *
 *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
 *     %false return from this function.
 *
 * Note:
 *        The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function
 *      except in the consistency check at the end of this function.
 *
 * Return:
 *     %false - this request doesn't have any more data
 *     %true  - this request has more data
 **/
bool blk_update_request(struct request *req, blk_status_t error,
                unsigned int nr_bytes)
{
        int total_bytes;

        trace_block_rq_complete(req, error, nr_bytes);

        if (!req->bio)
                return false;

#ifdef CONFIG_BLK_DEV_INTEGRITY
        if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
            error == BLK_STS_OK)
                req->q->integrity.profile->complete_fn(req, nr_bytes);
#endif

        /*
         * Upper layers may call blk_crypto_evict_key() anytime after the last
         * bio_endio().  Therefore, the keyslot must be released before that.
         */
        if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
                __blk_crypto_rq_put_keyslot(req);

        if (unlikely(error && !blk_rq_is_passthrough(req) &&
                     !(req->rq_flags & RQF_QUIET)) &&
                     !test_bit(GD_DEAD, &req->q->disk->state)) {
                blk_print_req_error(req, error);
                trace_block_rq_error(req, error, nr_bytes);
        }

        blk_account_io_completion(req, nr_bytes);

        total_bytes = 0;
        while (req->bio) {
                struct bio *bio = req->bio;
                unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);

                if (bio_bytes == bio->bi_iter.bi_size)
                        req->bio = bio->bi_next;

                /* Completion has already been traced */
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
                req_bio_endio(req, bio, bio_bytes, error);

                total_bytes += bio_bytes;
                nr_bytes -= bio_bytes;

                if (!nr_bytes)
                        break;
        }

        /*
         * completely done
         */
        if (!req->bio) {
                /*
                 * Reset counters so that the request stacking driver
                 * can find how many bytes remain in the request
                 * later.
                 */
                req->__data_len = 0;
                return false;
        }

        req->__data_len -= total_bytes;

        /* update sector only for requests with clear definition of sector */
        if (!blk_rq_is_passthrough(req))
                req->__sector += total_bytes >> 9;

        /* mixed attributes always follow the first bio */
        if (req->rq_flags & RQF_MIXED_MERGE) {
                req->cmd_flags &= ~REQ_FAILFAST_MASK;
                req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
        }

        if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
                /*
                 * If total number of sectors is less than the first segment
                 * size, something has gone terribly wrong.
                 */
                if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
                        blk_dump_rq_flags(req, "request botched");
                        req->__data_len = blk_rq_cur_bytes(req);
                }

                /* recalculate the number of segments */
                req->nr_phys_segments = blk_recalc_rq_segments(req);
        }

        return true;
}
EXPORT_SYMBOL_GPL(blk_update_request);

static inline void blk_account_io_done(struct request *req, u64 now)
{
        trace_block_io_done(req);

        /*
         * Account IO completion.  flush_rq isn't accounted as a
         * normal IO on queueing nor completion.  Accounting the
         * containing request is enough.
         */
        if (blk_do_io_stat(req) && req->part &&
            !(req->rq_flags & RQF_FLUSH_SEQ)) {
                const int sgrp = op_stat_group(req_op(req));

                part_stat_lock();
                update_io_ticks(req->part, jiffies, true);
                part_stat_inc(req->part, ios[sgrp]);
                part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
                part_stat_unlock();
        }
}

static inline void blk_account_io_start(struct request *req)
{
        trace_block_io_start(req);

        if (blk_do_io_stat(req)) {
                /*
                 * All non-passthrough requests are created from a bio with one
                 * exception: when a flush command that is part of a flush sequence
                 * generated by the state machine in blk-flush.c is cloned onto the
                 * lower device by dm-multipath we can get here without a bio.
                 */
                if (req->bio)
                        req->part = req->bio->bi_bdev;
                else
                        req->part = req->q->disk->part0;

                part_stat_lock();
                update_io_ticks(req->part, jiffies, false);
                part_stat_unlock();
        }
}

static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
{
        if (rq->rq_flags & RQF_STATS)
                blk_stat_add(rq, now);

        blk_mq_sched_completed_request(rq, now);
        blk_account_io_done(rq, now);
}

inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
        if (blk_mq_need_time_stamp(rq))
                __blk_mq_end_request_acct(rq, blk_time_get_ns());

        blk_mq_finish_request(rq);

        if (rq->end_io) {
                rq_qos_done(rq->q, rq);
                if (rq->end_io(rq, error) == RQ_END_IO_FREE)
                        blk_mq_free_request(rq);
        } else {
                blk_mq_free_request(rq);
        }
}
EXPORT_SYMBOL(__blk_mq_end_request);

void blk_mq_end_request(struct request *rq, blk_status_t error)
{
        if (blk_update_request(rq, error, blk_rq_bytes(rq)))
                BUG();
        __blk_mq_end_request(rq, error);
}
EXPORT_SYMBOL(blk_mq_end_request);

#define TAG_COMP_BATCH                32

static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
                                          int *tag_array, int nr_tags)
{
        struct request_queue *q = hctx->queue;

        blk_mq_sub_active_requests(hctx, nr_tags);

        blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
        percpu_ref_put_many(&q->q_usage_counter, nr_tags);
}

void blk_mq_end_request_batch(struct io_comp_batch *iob)
{
        int tags[TAG_COMP_BATCH], nr_tags = 0;
        struct blk_mq_hw_ctx *cur_hctx = NULL;
        struct request *rq;
        u64 now = 0;

        if (iob->need_ts)
                now = blk_time_get_ns();

        while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
                prefetch(rq->bio);
                prefetch(rq->rq_next);

                blk_complete_request(rq);
                if (iob->need_ts)
                        __blk_mq_end_request_acct(rq, now);

                blk_mq_finish_request(rq);

                rq_qos_done(rq->q, rq);

                /*
                 * If end_io handler returns NONE, then it still has
                 * ownership of the request.
                 */
                if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE)
                        continue;

                WRITE_ONCE(rq->state, MQ_RQ_IDLE);
                if (!req_ref_put_and_test(rq))
                        continue;

                blk_crypto_free_request(rq);
                blk_pm_mark_last_busy(rq);

                if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {
                        if (cur_hctx)
                                blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags);
                        nr_tags = 0;
                        cur_hctx = rq->mq_hctx;
                }
                tags[nr_tags++] = rq->tag;
        }

        if (nr_tags)
                blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags);
}
EXPORT_SYMBOL_GPL(blk_mq_end_request_batch);

static void blk_complete_reqs(struct llist_head *list)
{
        struct llist_node *entry = llist_reverse_order(llist_del_all(list));
        struct request *rq, *next;

        llist_for_each_entry_safe(rq, next, entry, ipi_list)
                rq->q->mq_ops->complete(rq);
}

static __latent_entropy void blk_done_softirq(struct softirq_action *h)
{
        blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
}

static int blk_softirq_cpu_dead(unsigned int cpu)
{
        blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
        return 0;
}

static void __blk_mq_complete_request_remote(void *data)
{
        __raise_softirq_irqoff(BLOCK_SOFTIRQ);
}

static inline bool blk_mq_complete_need_ipi(struct request *rq)
{
        int cpu = raw_smp_processor_id();

        if (!IS_ENABLED(CONFIG_SMP) ||
            !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
                return false;
        /*
         * With force threaded interrupts enabled, raising softirq from an SMP
         * function call will always result in waking the ksoftirqd thread.
         * This is probably worse than completing the request on a different
         * cache domain.
         */
        if (force_irqthreads())
                return false;

        /* same CPU or cache domain and capacity?  Complete locally */
        if (cpu == rq->mq_ctx->cpu ||
            (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
             cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
             cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
                return false;

        /* don't try to IPI to an offline CPU */
        return cpu_online(rq->mq_ctx->cpu);
}

static void blk_mq_complete_send_ipi(struct request *rq)
{
        unsigned int cpu;

        cpu = rq->mq_ctx->cpu;
        if (llist_add(&rq->ipi_list, &per_cpu(blk_cpu_done, cpu)))
                smp_call_function_single_async(cpu, &per_cpu(blk_cpu_csd, cpu));
}

static void blk_mq_raise_softirq(struct request *rq)
{
        struct llist_head *list;

        preempt_disable();
        list = this_cpu_ptr(&blk_cpu_done);
        if (llist_add(&rq->ipi_list, list))
                raise_softirq(BLOCK_SOFTIRQ);
        preempt_enable();
}

bool blk_mq_complete_request_remote(struct request *rq)
{
        WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);

        /*
         * For request which hctx has only one ctx mapping,
         * or a polled request, always complete locally,
         * it's pointless to redirect the completion.
         */
        if ((rq->mq_hctx->nr_ctx == 1 &&
             rq->mq_ctx->cpu == raw_smp_processor_id()) ||
             rq->cmd_flags & REQ_POLLED)
                return false;

        if (blk_mq_complete_need_ipi(rq)) {
                blk_mq_complete_send_ipi(rq);
                return true;
        }

        if (rq->q->nr_hw_queues == 1) {
                blk_mq_raise_softirq(rq);
                return true;
        }
        return false;
}
EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);

/**
 * blk_mq_complete_request - end I/O on a request
 * @rq:                the request being processed
 *
 * Description:
 *        Complete a request by scheduling the ->complete_rq operation.
 **/
void blk_mq_complete_request(struct request *rq)
{
        if (!blk_mq_complete_request_remote(rq))
                rq->q->mq_ops->complete(rq);
}
EXPORT_SYMBOL(blk_mq_complete_request);

/**
 * blk_mq_start_request - Start processing a request
 * @rq: Pointer to request to be started
 *
 * Function used by device drivers to notify the block layer that a request
 * is going to be processed now, so blk layer can do proper initializations
 * such as starting the timeout timer.
 */
void blk_mq_start_request(struct request *rq)
{
        struct request_queue *q = rq->q;

        trace_block_rq_issue(rq);

        if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
            !blk_rq_is_passthrough(rq)) {
                rq->io_start_time_ns = blk_time_get_ns();
                rq->stats_sectors = blk_rq_sectors(rq);
                rq->rq_flags |= RQF_STATS;
                rq_qos_issue(q, rq);
        }

        WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);

        blk_add_timer(rq);
        WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
        rq->mq_hctx->tags->rqs[rq->tag] = rq;

#ifdef CONFIG_BLK_DEV_INTEGRITY
        if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
                q->integrity.profile->prepare_fn(rq);
#endif
        if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
                WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
}
EXPORT_SYMBOL(blk_mq_start_request);

/*
 * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
 * queues. This is important for md arrays to benefit from merging
 * requests.
 */
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
        if (plug->multiple_queues)
                return BLK_MAX_REQUEST_COUNT * 2;
        return BLK_MAX_REQUEST_COUNT;
}

static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
        struct request *last = rq_list_peek(&plug->mq_list);

        if (!plug->rq_count) {
                trace_block_plug(rq->q);
        } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
                   (!blk_queue_nomerges(rq->q) &&
                    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
                blk_mq_flush_plug_list(plug, false);
                last = NULL;
                trace_block_plug(rq->q);
        }

        if (!plug->multiple_queues && last && last->q != rq->q)
                plug->multiple_queues = true;
        /*
         * Any request allocated from sched tags can't be issued to
         * ->queue_rqs() directly
         */
        if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
                plug->has_elevator = true;
        rq->rq_next = NULL;
        rq_list_add(&plug->mq_list, rq);
        plug->rq_count++;
}

/**
 * blk_execute_rq_nowait - insert a request to I/O scheduler for execution
 * @rq:                request to insert
 * @at_head:    insert request at head or tail of queue
 *
 * Description:
 *    Insert a fully prepared request at the back of the I/O scheduler queue
 *    for execution.  Don't wait for completion.
 *
 * Note:
 *    This function will invoke @done directly if the queue is dead.
 */
void blk_execute_rq_nowait(struct request *rq, bool at_head)
{
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

        WARN_ON(irqs_disabled());
        WARN_ON(!blk_rq_is_passthrough(rq));

        blk_account_io_start(rq);

        /*
         * As plugging can be enabled for passthrough requests on a zoned
         * device, directly accessing the plug instead of using blk_mq_plug()
         * should not have any consequences.
         */
        if (current->plug && !at_head) {
                blk_add_rq_to_plug(current->plug, rq);
                return;
        }

        blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
        blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

struct blk_rq_wait {
        struct completion done;
        blk_status_t ret;
};

static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret)
{
        struct blk_rq_wait *wait = rq->end_io_data;

        wait->ret = ret;
        complete(&wait->done);
        return RQ_END_IO_NONE;
}

bool blk_rq_is_poll(struct request *rq)
{
        if (!rq->mq_hctx)
                return false;
        if (rq->mq_hctx->type != HCTX_TYPE_POLL)
                return false;
        return true;
}
EXPORT_SYMBOL_GPL(blk_rq_is_poll);

static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
        do {
                blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);
                cond_resched();
        } while (!completion_done(wait));
}

/**
 * blk_execute_rq - insert a request into queue for execution
 * @rq:                request to insert
 * @at_head:    insert request at head or tail of queue
 *
 * Description:
 *    Insert a fully prepared request at the back of the I/O scheduler queue
 *    for execution and wait for completion.
 * Return: The blk_status_t result provided to blk_mq_end_request().
 */
blk_status_t blk_execute_rq(struct request *rq, bool at_head)
{
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        struct blk_rq_wait wait = {
                .done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
        };

        WARN_ON(irqs_disabled());
        WARN_ON(!blk_rq_is_passthrough(rq));

        rq->end_io_data = &wait;
        rq->end_io = blk_end_sync_rq;

        blk_account_io_start(rq);
        blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
        blk_mq_run_hw_queue(hctx, false);

        if (blk_rq_is_poll(rq))
                blk_rq_poll_completion(rq, &wait.done);
        else
                blk_wait_io(&wait.done);

        return wait.ret;
}
EXPORT_SYMBOL(blk_execute_rq);

static void __blk_mq_requeue_request(struct request *rq)
{
        struct request_queue *q = rq->q;

        blk_mq_put_driver_tag(rq);

        trace_block_rq_requeue(rq);
        rq_qos_requeue(q, rq);

        if (blk_mq_request_started(rq)) {
                WRITE_ONCE(rq->state, MQ_RQ_IDLE);
                rq->rq_flags &= ~RQF_TIMED_OUT;
        }
}

void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
{
        struct request_queue *q = rq->q;
        unsigned long flags;

        __blk_mq_requeue_request(rq);

        /* this request will be re-inserted to io scheduler queue */
        blk_mq_sched_requeue_request(rq);

        spin_lock_irqsave(&q->requeue_lock, flags);
        list_add_tail(&rq->queuelist, &q->requeue_list);
        spin_unlock_irqrestore(&q->requeue_lock, flags);

        if (kick_requeue_list)
                blk_mq_kick_requeue_list(q);
}
EXPORT_SYMBOL(blk_mq_requeue_request);

static void blk_mq_requeue_work(struct work_struct *work)
{
        struct request_queue *q =
                container_of(work, struct request_queue, requeue_work.work);
        LIST_HEAD(rq_list);
        LIST_HEAD(flush_list);
        struct request *rq;

        spin_lock_irq(&q->requeue_lock);
        list_splice_init(&q->requeue_list, &rq_list);
        list_splice_init(&q->flush_list, &flush_list);
        spin_unlock_irq(&q->requeue_lock);

        while (!list_empty(&rq_list)) {
                rq = list_entry(rq_list.next, struct request, queuelist);
                /*
                 * If RQF_DONTPREP ist set, the request has been started by the
                 * driver already and might have driver-specific data allocated
                 * already.  Insert it into the hctx dispatch list to avoid
                 * block layer merges for the request.
                 */
                if (rq->rq_flags & RQF_DONTPREP) {
                        list_del_init(&rq->queuelist);
                        blk_mq_request_bypass_insert(rq, 0);
                } else {
                        list_del_init(&rq->queuelist);
                        blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
                }
        }

        while (!list_empty(&flush_list)) {
                rq = list_entry(flush_list.next, struct request, queuelist);
                list_del_init(&rq->queuelist);
                blk_mq_insert_request(rq, 0);
        }

        blk_mq_run_hw_queues(q, false);
}

void blk_mq_kick_requeue_list(struct request_queue *q)
{
        kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
}
EXPORT_SYMBOL(blk_mq_kick_requeue_list);

void blk_mq_delay_kick_requeue_list(struct request_queue *q,
                                    unsigned long msecs)
{
        kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
                                    msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);

static bool blk_is_flush_data_rq(struct request *rq)
{
        return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
}

static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
        /*
         * If we find a request that isn't idle we know the queue is busy
         * as it's checked in the iter.
         * Return false to stop the iteration.
         *
         * In case of queue quiesce, if one flush data request is completed,
         * don't count it as inflight given the flush sequence is suspended,
         * and the original flush data request is invisible to driver, just
         * like other pending requests because of quiesce
         */
        if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
                                blk_is_flush_data_rq(rq) &&
                                blk_mq_request_completed(rq))) {
                bool *busy = priv;

                *busy = true;
                return false;
        }

        return true;
}

bool blk_mq_queue_inflight(struct request_queue *q)
{
        bool busy = false;

        blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
        return busy;
}
EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);

static void blk_mq_rq_timed_out(struct request *req)
{
        req->rq_flags |= RQF_TIMED_OUT;
        if (req->q->mq_ops->timeout) {
                enum blk_eh_timer_return ret;

                ret = req->q->mq_ops->timeout(req);
                if (ret == BLK_EH_DONE)
                        return;
                WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
        }

        blk_add_timer(req);
}

struct blk_expired_data {
        bool has_timedout_rq;
        unsigned long next;
        unsigned long timeout_start;
};

static bool blk_mq_req_expired(struct request *rq, struct blk_expired_data *expired)
{
        unsigned long deadline;

        if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
                return false;
        if (rq->rq_flags & RQF_TIMED_OUT)
                return false;

        deadline = READ_ONCE(rq->deadline);
        if (time_after_eq(expired->timeout_start, deadline))
                return true;

        if (expired->next == 0)
                expired->next = deadline;
        else if (time_after(expired->next, deadline))
                expired->next = deadline;
        return false;
}

void blk_mq_put_rq_ref(struct request *rq)
{
        if (is_flush_rq(rq)) {
                if (rq->end_io(rq, 0) == RQ_END_IO_FREE)
                        blk_mq_free_request(rq);
        } else if (req_ref_put_and_test(rq)) {
                __blk_mq_free_request(rq);
        }
}

static bool blk_mq_check_expired(struct request *rq, void *priv)
{
        struct blk_expired_data *expired = priv;

        /*
         * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
         * be reallocated underneath the timeout handler's processing, then
         * the expire check is reliable. If the request is not expired, then
         * it was completed and reallocated as a new request after returning
         * from blk_mq_check_expired().
         */
        if (blk_mq_req_expired(rq, expired)) {
                expired->has_timedout_rq = true;
                return false;
        }
        return true;
}

static bool blk_mq_handle_expired(struct request *rq, void *priv)
{
        struct blk_expired_data *expired = priv;

        if (blk_mq_req_expired(rq, expired))
                blk_mq_rq_timed_out(rq);
        return true;
}

static void blk_mq_timeout_work(struct work_struct *work)
{
        struct request_queue *q =
                container_of(work, struct request_queue, timeout_work);
        struct blk_expired_data expired = {
                .timeout_start = jiffies,
        };
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        /* A deadlock might occur if a request is stuck requiring a
         * timeout at the same time a queue freeze is waiting
         * completion, since the timeout code would not be able to
         * acquire the queue reference here.
         *
         * That's why we don't use blk_queue_enter here; instead, we use
         * percpu_ref_tryget directly, because we need to be able to
         * obtain a reference even in the short window between the queue
         * starting to freeze, by dropping the first reference in
         * blk_freeze_queue_start, and the moment the last request is
         * consumed, marked by the instant q_usage_counter reaches
         * zero.
         */
        if (!percpu_ref_tryget(&q->q_usage_counter))
                return;

        /* check if there is any timed-out request */
        blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &expired);
        if (expired.has_timedout_rq) {
                /*
                 * Before walking tags, we must ensure any submit started
                 * before the current time has finished. Since the submit
                 * uses srcu or rcu, wait for a synchronization point to
                 * ensure all running submits have finished
                 */
                blk_mq_wait_quiesce_done(q->tag_set);

                expired.next = 0;
                blk_mq_queue_tag_busy_iter(q, blk_mq_handle_expired, &expired);
        }

        if (expired.next != 0) {
                mod_timer(&q->timeout, expired.next);
        } else {
                /*
                 * Request timeouts are handled as a forward rolling timer. If
                 * we end up here it means that no requests are pending and
                 * also that no request has been pending for a while. Mark
                 * each hctx as idle.
                 */
                queue_for_each_hw_ctx(q, hctx, i) {
                        /* the hctx may be unmapped, so check it here */
                        if (blk_mq_hw_queue_mapped(hctx))
                                blk_mq_tag_idle(hctx);
                }
        }
        blk_queue_exit(q);
}

struct flush_busy_ctx_data {
        struct blk_mq_hw_ctx *hctx;
        struct list_head *list;
};

static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
{
        struct flush_busy_ctx_data *flush_data = data;
        struct blk_mq_hw_ctx *hctx = flush_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
        enum hctx_type type = hctx->type;

        spin_lock(&ctx->lock);
        list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
        sbitmap_clear_bit(sb, bitnr);
        spin_unlock(&ctx->lock);
        return true;
}

/*
 * Process software queues that have been marked busy, splicing them
 * to the for-dispatch
 */
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
{
        struct flush_busy_ctx_data data = {
                .hctx = hctx,
                .list = list,
        };

        sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
}
EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);

struct dispatch_rq_data {
        struct blk_mq_hw_ctx *hctx;
        struct request *rq;
};

static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
                void *data)
{
        struct dispatch_rq_data *dispatch_data = data;
        struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
        struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
        enum hctx_type type = hctx->type;

        spin_lock(&ctx->lock);
        if (!list_empty(&ctx->rq_lists[type])) {
                dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
                list_del_init(&dispatch_data->rq->queuelist);
                if (list_empty(&ctx->rq_lists[type]))
                        sbitmap_clear_bit(sb, bitnr);
        }
        spin_unlock(&ctx->lock);

        return !dispatch_data->rq;
}

struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
                                        struct blk_mq_ctx *start)
{
        unsigned off = start ? start->index_hw[hctx->type] : 0;
        struct dispatch_rq_data data = {
                .hctx = hctx,
                .rq   = NULL,
        };

        __sbitmap_for_each_set(&hctx->ctx_map, off,
                               dispatch_rq_from_ctx, &data);

        return data.rq;
}

bool __blk_mq_alloc_driver_tag(struct request *rq)
{
        struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
        unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
        int tag;

        blk_mq_tag_busy(rq->mq_hctx);

        if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
                bt = &rq->mq_hctx->tags->breserved_tags;
                tag_offset = 0;
        } else {
                if (!hctx_may_queue(rq->mq_hctx, bt))
                        return false;
        }

        tag = __sbitmap_queue_get(bt);
        if (tag == BLK_MQ_NO_TAG)
                return false;

        rq->tag = tag + tag_offset;
        blk_mq_inc_active_requests(rq->mq_hctx);
        return true;
}

static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
                                int flags, void *key)
{
        struct blk_mq_hw_ctx *hctx;

        hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);

        spin_lock(&hctx->dispatch_wait_lock);
        if (!list_empty(&wait->entry)) {
                struct sbitmap_queue *sbq;

                list_del_init(&wait->entry);
                sbq = &hctx->tags->bitmap_tags;
                atomic_dec(&sbq->ws_active);
        }
        spin_unlock(&hctx->dispatch_wait_lock);

        blk_mq_run_hw_queue(hctx, true);
        return 1;
}

/*
 * Mark us waiting for a tag. For shared tags, this involves hooking us into
 * the tag wakeups. For non-shared tags, we can simply mark us needing a
 * restart. For both cases, take care to check the condition again after
 * marking us as waiting.
 */
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
                                 struct request *rq)
{
        struct sbitmap_queue *sbq;
        struct wait_queue_head *wq;
        wait_queue_entry_t *wait;
        bool ret;

        if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
            !(blk_mq_is_shared_tags(hctx->flags))) {
                blk_mq_sched_mark_restart_hctx(hctx);

                /*
                 * It's possible that a tag was freed in the window between the
                 * allocation failure and adding the hardware queue to the wait
                 * queue.
                 *
                 * Don't clear RESTART here, someone else could have set it.
                 * At most this will cost an extra queue run.
                 */
                return blk_mq_get_driver_tag(rq);
        }

        wait = &hctx->dispatch_wait;
        if (!list_empty_careful(&wait->entry))
                return false;

        if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))
                sbq = &hctx->tags->breserved_tags;
        else
                sbq = &hctx->tags->bitmap_tags;
        wq = &bt_wait_ptr(sbq, hctx)->wait;

        spin_lock_irq(&wq->lock);
        spin_lock(&hctx->dispatch_wait_lock);
        if (!list_empty(&wait->entry)) {
                spin_unlock(&hctx->dispatch_wait_lock);
                spin_unlock_irq(&wq->lock);
                return false;
        }

        atomic_inc(&sbq->ws_active);
        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
        __add_wait_queue(wq, wait);

        /*
         * Add one explicit barrier since blk_mq_get_driver_tag() may
         * not imply barrier in case of failure.
         *
         * Order adding us to wait queue and allocating driver tag.
         *
         * The pair is the one implied in sbitmap_queue_wake_up() which
         * orders clearing sbitmap tag bits and waitqueue_active() in
         * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
         *
         * Otherwise, re-order of adding wait queue and getting driver tag
         * may cause __sbitmap_queue_wake_up() to wake up nothing because
         * the waitqueue_active() may not observe us in wait queue.
         */
        smp_mb();

        /*
         * It's possible that a tag was freed in the window between the
         * allocation failure and adding the hardware queue to the wait
         * queue.
         */
        ret = blk_mq_get_driver_tag(rq);
        if (!ret) {
                spin_unlock(&hctx->dispatch_wait_lock);
                spin_unlock_irq(&wq->lock);
                return false;
        }

        /*
         * We got a tag, remove ourselves from the wait queue to ensure
         * someone else gets the wakeup.
         */
        list_del_init(&wait->entry);
        atomic_dec(&sbq->ws_active);
        spin_unlock(&hctx->dispatch_wait_lock);
        spin_unlock_irq(&wq->lock);

        return true;
}

#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
/*
 * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
 * - EWMA is one simple way to compute running average value
 * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
 * - take 4 as factor for avoiding to get too small(0) result, and this
 *   factor doesn't matter because EWMA decreases exponentially
 */
static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
{
        unsigned int ewma;

        ewma = hctx->dispatch_busy;

        if (!ewma && !busy)
                return;

        ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
        if (busy)
                ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
        ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;

        hctx->dispatch_busy = ewma;
}

#define BLK_MQ_RESOURCE_DELAY        3                /* ms units */

static void blk_mq_handle_dev_resource(struct request *rq,
                                       struct list_head *list)
{
        list_add(&rq->queuelist, list);
        __blk_mq_requeue_request(rq);
}

static void blk_mq_handle_zone_resource(struct request *rq,
                                        struct list_head *zone_list)
{
        /*
         * If we end up here it is because we cannot dispatch a request to a
         * specific zone due to LLD level zone-write locking or other zone
         * related resource not being available. In this case, set the request
         * aside in zone_list for retrying it later.
         */
        list_add(&rq->queuelist, zone_list);
        __blk_mq_requeue_request(rq);
}

enum prep_dispatch {
        PREP_DISPATCH_OK,
        PREP_DISPATCH_NO_TAG,
        PREP_DISPATCH_NO_BUDGET,
};

static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
                                                  bool need_budget)
{
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        int budget_token = -1;

        if (need_budget) {
                budget_token = blk_mq_get_dispatch_budget(rq->q);
                if (budget_token < 0) {
                        blk_mq_put_driver_tag(rq);
                        return PREP_DISPATCH_NO_BUDGET;
                }
                blk_mq_set_rq_budget_token(rq, budget_token);
        }

        if (!blk_mq_get_driver_tag(rq)) {
                /*
                 * The initial allocation attempt failed, so we need to
                 * rerun the hardware queue when a tag is freed. The
                 * waitqueue takes care of that. If the queue is run
                 * before we add this entry back on the dispatch list,
                 * we'll re-run it below.
                 */
                if (!blk_mq_mark_tag_wait(hctx, rq)) {
                        /*
                         * All budgets not got from this function will be put
                         * together during handling partial dispatch
                         */
                        if (need_budget)
                                blk_mq_put_dispatch_budget(rq->q, budget_token);
                        return PREP_DISPATCH_NO_TAG;
                }
        }

        return PREP_DISPATCH_OK;
}

/* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
static void blk_mq_release_budgets(struct request_queue *q,
                struct list_head *list)
{
        struct request *rq;

        list_for_each_entry(rq, list, queuelist) {
                int budget_token = blk_mq_get_rq_budget_token(rq);

                if (budget_token >= 0)
                        blk_mq_put_dispatch_budget(q, budget_token);
        }
}

/*
 * blk_mq_commit_rqs will notify driver using bd->last that there is no
 * more requests. (See comment in struct blk_mq_ops for commit_rqs for
 * details)
 * Attention, we should explicitly call this in unusual cases:
 *  1) did not queue everything initially scheduled to queue
 *  2) the last attempt to queue a request failed
 */
static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
                              bool from_schedule)
{
        if (hctx->queue->mq_ops->commit_rqs && queued) {
                trace_block_unplug(hctx->queue, queued, !from_schedule);
                hctx->queue->mq_ops->commit_rqs(hctx);
        }
}

/*
 * Returns true if we did some work AND can potentially do more.
 */
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
                             unsigned int nr_budgets)
{
        enum prep_dispatch prep;
        struct request_queue *q = hctx->queue;
        struct request *rq;
        int queued;
        blk_status_t ret = BLK_STS_OK;
        LIST_HEAD(zone_list);
        bool needs_resource = false;

        if (list_empty(list))
                return false;

        /*
         * Now process all the entries, sending them to the driver.
         */
        queued = 0;
        do {
                struct blk_mq_queue_data bd;

                rq = list_first_entry(list, struct request, queuelist);

                WARN_ON_ONCE(hctx != rq->mq_hctx);
                prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
                if (prep != PREP_DISPATCH_OK)
                        break;

                list_del_init(&rq->queuelist);

                bd.rq = rq;
                bd.last = list_empty(list);

                /*
                 * once the request is queued to lld, no need to cover the
                 * budget any more
                 */
                if (nr_budgets)
                        nr_budgets--;
                ret = q->mq_ops->queue_rq(hctx, &bd);
                switch (ret) {
                case BLK_STS_OK:
                        queued++;
                        break;
                case BLK_STS_RESOURCE:
                        needs_resource = true;
                        fallthrough;
                case BLK_STS_DEV_RESOURCE:
                        blk_mq_handle_dev_resource(rq, list);
                        goto out;
                case BLK_STS_ZONE_RESOURCE:
                        /*
                         * Move the request to zone_list and keep going through
                         * the dispatch list to find more requests the drive can
                         * accept.
                         */
                        blk_mq_handle_zone_resource(rq, &zone_list);
                        needs_resource = true;
                        break;
                default:
                        blk_mq_end_request(rq, ret);
                }
        } while (!list_empty(list));
out:
        if (!list_empty(&zone_list))
                list_splice_tail_init(&zone_list, list);

        /* If we didn't flush the entire list, we could have told the driver
         * there was more coming, but that turned out to be a lie.
         */
        if (!list_empty(list) || ret != BLK_STS_OK)
                blk_mq_commit_rqs(hctx, queued, false);

        /*
         * Any items that need requeuing? Stuff them into hctx->dispatch,
         * that is where we will continue on next queue run.
         */
        if (!list_empty(list)) {
                bool needs_restart;
                /* For non-shared tags, the RESTART check will suffice */
                bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
                        ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
                        blk_mq_is_shared_tags(hctx->flags));

                if (nr_budgets)
                        blk_mq_release_budgets(q, list);

                spin_lock(&hctx->lock);
                list_splice_tail_init(list, &hctx->dispatch);
                spin_unlock(&hctx->lock);

                /*
                 * Order adding requests to hctx->dispatch and checking
                 * SCHED_RESTART flag. The pair of this smp_mb() is the one
                 * in blk_mq_sched_restart(). Avoid restart code path to
                 * miss the new added requests to hctx->dispatch, meantime
                 * SCHED_RESTART is observed here.
                 */
                smp_mb();

                /*
                 * If SCHED_RESTART was set by the caller of this function and
                 * it is no longer set that means that it was cleared by another
                 * thread and hence that a queue rerun is needed.
                 *
                 * If 'no_tag' is set, that means that we failed getting
                 * a driver tag with an I/O scheduler attached. If our dispatch
                 * waitqueue is no longer active, ensure that we run the queue
                 * AFTER adding our entries back to the list.
                 *
                 * If no I/O scheduler has been configured it is possible that
                 * the hardware queue got stopped and restarted before requests
                 * were pushed back onto the dispatch list. Rerun the queue to
                 * avoid starvation. Notes:
                 * - blk_mq_run_hw_queue() checks whether or not a queue has
                 *   been stopped before rerunning a queue.
                 * - Some but not all block drivers stop a queue before
                 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
                 *   and dm-rq.
                 *
                 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
                 * bit is set, run queue after a delay to avoid IO stalls
                 * that could otherwise occur if the queue is idle.  We'll do
                 * similar if we couldn't get budget or couldn't lock a zone
                 * and SCHED_RESTART is set.
                 */
                needs_restart = blk_mq_sched_needs_restart(hctx);
                if (prep == PREP_DISPATCH_NO_BUDGET)
                        needs_resource = true;
                if (!needs_restart ||
                    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
                        blk_mq_run_hw_queue(hctx, true);
                else if (needs_resource)
                        blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);

                blk_mq_update_dispatch_busy(hctx, true);
                return false;
        }

        blk_mq_update_dispatch_busy(hctx, false);
        return true;
}

static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
{
        int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);

        if (cpu >= nr_cpu_ids)
                cpu = cpumask_first(hctx->cpumask);
        return cpu;
}

/*
 * It'd be great if the workqueue API had a way to pass
 * in a mask and had some smarts for more clever placement.
 * For now we just round-robin here, switching for every
 * BLK_MQ_CPU_WORK_BATCH queued items.
 */
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
        bool tried = false;
        int next_cpu = hctx->next_cpu;

        if (hctx->queue->nr_hw_queues == 1)
                return WORK_CPU_UNBOUND;

        if (--hctx->next_cpu_batch <= 0) {
select_cpu:
                next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
                                cpu_online_mask);
                if (next_cpu >= nr_cpu_ids)
                        next_cpu = blk_mq_first_mapped_cpu(hctx);
                hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
        }

        /*
         * Do unbound schedule if we can't find a online CPU for this hctx,
         * and it should only happen in the path of handling CPU DEAD.
         */
        if (!cpu_online(next_cpu)) {
                if (!tried) {
                        tried = true;
                        goto select_cpu;
                }

                /*
                 * Make sure to re-select CPU next time once after CPUs
                 * in hctx->cpumask become online again.
                 */
                hctx->next_cpu = next_cpu;
                hctx->next_cpu_batch = 1;
                return WORK_CPU_UNBOUND;
        }

        hctx->next_cpu = next_cpu;
        return next_cpu;
}

/**
 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
 * @hctx: Pointer to the hardware queue to run.
 * @msecs: Milliseconds of delay to wait before running the queue.
 *
 * Run a hardware queue asynchronously with a delay of @msecs.
 */
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
        if (unlikely(blk_mq_hctx_stopped(hctx)))
                return;
        kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
                                    msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);

/**
 * blk_mq_run_hw_queue - Start to run a hardware queue.
 * @hctx: Pointer to the hardware queue to run.
 * @async: If we want to run the queue asynchronously.
 *
 * Check if the request queue is not in a quiesced state and if there are
 * pending requests to be sent. If this is true, run the queue to send requests
 * to hardware.
 */
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
        bool need_run;

        /*
         * We can't run the queue inline with interrupts disabled.
         */
        WARN_ON_ONCE(!async && in_interrupt());

        might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);

        /*
         * When queue is quiesced, we may be switching io scheduler, or
         * updating nr_hw_queues, or other things, and we can't run queue
         * any more, even __blk_mq_hctx_has_pending() can't be called safely.
         *
         * And queue will be rerun in blk_mq_unquiesce_queue() if it is
         * quiesced.
         */
        __blk_mq_run_dispatch_ops(hctx->queue, false,
                need_run = !blk_queue_quiesced(hctx->queue) &&
                blk_mq_hctx_has_pending(hctx));

        if (!need_run)
                return;

        if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
                blk_mq_delay_run_hw_queue(hctx, 0);
                return;
        }

        blk_mq_run_dispatch_ops(hctx->queue,
                                blk_mq_sched_dispatch_requests(hctx));
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);

/*
 * Return prefered queue to dispatch from (if any) for non-mq aware IO
 * scheduler.
 */
static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
{
        struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
        /*
         * If the IO scheduler does not respect hardware queues when
         * dispatching, we just don't bother with multiple HW queues and
         * dispatch from hctx for the current CPU since running multiple queues
         * just causes lock contention inside the scheduler and pointless cache
         * bouncing.
         */
        struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT];

        if (!blk_mq_hctx_stopped(hctx))
                return hctx;
        return NULL;
}

/**
 * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
 * @q: Pointer to the request queue to run.
 * @async: If we want to run the queue asynchronously.
 */
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
{
        struct blk_mq_hw_ctx *hctx, *sq_hctx;
        unsigned long i;

        sq_hctx = NULL;
        if (blk_queue_sq_sched(q))
                sq_hctx = blk_mq_get_sq_hctx(q);
        queue_for_each_hw_ctx(q, hctx, i) {
                if (blk_mq_hctx_stopped(hctx))
                        continue;
                /*
                 * Dispatch from this hctx either if there's no hctx preferred
                 * by IO scheduler or if it has requests that bypass the
                 * scheduler.
                 */
                if (!sq_hctx || sq_hctx == hctx ||
                    !list_empty_careful(&hctx->dispatch))
                        blk_mq_run_hw_queue(hctx, async);
        }
}
EXPORT_SYMBOL(blk_mq_run_hw_queues);

/**
 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
 * @q: Pointer to the request queue to run.
 * @msecs: Milliseconds of delay to wait before running the queues.
 */
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
{
        struct blk_mq_hw_ctx *hctx, *sq_hctx;
        unsigned long i;

        sq_hctx = NULL;
        if (blk_queue_sq_sched(q))
                sq_hctx = blk_mq_get_sq_hctx(q);
        queue_for_each_hw_ctx(q, hctx, i) {
                if (blk_mq_hctx_stopped(hctx))
                        continue;
                /*
                 * If there is already a run_work pending, leave the
                 * pending delay untouched. Otherwise, a hctx can stall
                 * if another hctx is re-delaying the other's work
                 * before the work executes.
                 */
                if (delayed_work_pending(&hctx->run_work))
                        continue;
                /*
                 * Dispatch from this hctx either if there's no hctx preferred
                 * by IO scheduler or if it has requests that bypass the
                 * scheduler.
                 */
                if (!sq_hctx || sq_hctx == hctx ||
                    !list_empty_careful(&hctx->dispatch))
                        blk_mq_delay_run_hw_queue(hctx, msecs);
        }
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);

/*
 * This function is often used for pausing .queue_rq() by driver when
 * there isn't enough resource or some conditions aren't satisfied, and
 * BLK_STS_RESOURCE is usually returned.
 *
 * We do not guarantee that dispatch can be drained or blocked
 * after blk_mq_stop_hw_queue() returns. Please use
 * blk_mq_quiesce_queue() for that requirement.
 */
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
        cancel_delayed_work(&hctx->run_work);

        set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
EXPORT_SYMBOL(blk_mq_stop_hw_queue);

/*
 * This function is often used for pausing .queue_rq() by driver when
 * there isn't enough resource or some conditions aren't satisfied, and
 * BLK_STS_RESOURCE is usually returned.
 *
 * We do not guarantee that dispatch can be drained or blocked
 * after blk_mq_stop_hw_queues() returns. Please use
 * blk_mq_quiesce_queue() for that requirement.
 */
void blk_mq_stop_hw_queues(struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i)
                blk_mq_stop_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_stop_hw_queues);

void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
        clear_bit(BLK_MQ_S_STOPPED, &hctx->state);

        blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);

void blk_mq_start_hw_queues(struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i)
                blk_mq_start_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_start_hw_queues);

void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
        if (!blk_mq_hctx_stopped(hctx))
                return;

        clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
        blk_mq_run_hw_queue(hctx, async);
}
EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);

void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i)
                blk_mq_start_stopped_hw_queue(hctx, async ||
                                        (hctx->flags & BLK_MQ_F_BLOCKING));
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);

static void blk_mq_run_work_fn(struct work_struct *work)
{
        struct blk_mq_hw_ctx *hctx =
                container_of(work, struct blk_mq_hw_ctx, run_work.work);

        blk_mq_run_dispatch_ops(hctx->queue,
                                blk_mq_sched_dispatch_requests(hctx));
}

/**
 * blk_mq_request_bypass_insert - Insert a request at dispatch list.
 * @rq: Pointer to request to be inserted.
 * @flags: BLK_MQ_INSERT_*
 *
 * Should only be used carefully, when the caller knows we want to
 * bypass a potential IO scheduler on the target device.
 */
static void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
{
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

        spin_lock(&hctx->lock);
        if (flags & BLK_MQ_INSERT_AT_HEAD)
                list_add(&rq->queuelist, &hctx->dispatch);
        else
                list_add_tail(&rq->queuelist, &hctx->dispatch);
        spin_unlock(&hctx->lock);
}

static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
                struct blk_mq_ctx *ctx, struct list_head *list,
                bool run_queue_async)
{
        struct request *rq;
        enum hctx_type type = hctx->type;

        /*
         * Try to issue requests directly if the hw queue isn't busy to save an
         * extra enqueue & dequeue to the sw queue.
         */
        if (!hctx->dispatch_busy && !run_queue_async) {
                blk_mq_run_dispatch_ops(hctx->queue,
                        blk_mq_try_issue_list_directly(hctx, list));
                if (list_empty(list))
                        goto out;
        }

        /*
         * preemption doesn't flush plug list, so it's possible ctx->cpu is
         * offline now
         */
        list_for_each_entry(rq, list, queuelist) {
                BUG_ON(rq->mq_ctx != ctx);
                trace_block_rq_insert(rq);
                if (rq->cmd_flags & REQ_NOWAIT)
                        run_queue_async = true;
        }

        spin_lock(&ctx->lock);
        list_splice_tail_init(list, &ctx->rq_lists[type]);
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
out:
        blk_mq_run_hw_queue(hctx, run_queue_async);
}

static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
{
        struct request_queue *q = rq->q;
        struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

        if (blk_rq_is_passthrough(rq)) {
                /*
                 * Passthrough request have to be added to hctx->dispatch
                 * directly.  The device may be in a situation where it can't
                 * handle FS request, and always returns BLK_STS_RESOURCE for
                 * them, which gets them added to hctx->dispatch.
                 *
                 * If a passthrough request is required to unblock the queues,
                 * and it is added to the scheduler queue, there is no chance to
                 * dispatch it given we prioritize requests in hctx->dispatch.
                 */
                blk_mq_request_bypass_insert(rq, flags);
        } else if (req_op(rq) == REQ_OP_FLUSH) {
                /*
                 * Firstly normal IO request is inserted to scheduler queue or
                 * sw queue, meantime we add flush request to dispatch queue(
                 * hctx->dispatch) directly and there is at most one in-flight
                 * flush request for each hw queue, so it doesn't matter to add
                 * flush request to tail or front of the dispatch queue.
                 *
                 * Secondly in case of NCQ, flush request belongs to non-NCQ
                 * command, and queueing it will fail when there is any
                 * in-flight normal IO request(NCQ command). When adding flush
                 * rq to the front of hctx->dispatch, it is easier to introduce
                 * extra time to flush rq's latency because of S_SCHED_RESTART
                 * compared with adding to the tail of dispatch queue, then
                 * chance of flush merge is increased, and less flush requests
                 * will be issued to controller. It is observed that ~10% time
                 * is saved in blktests block/004 on disk attached to AHCI/NCQ
                 * drive when adding flush rq to the front of hctx->dispatch.
                 *
                 * Simply queue flush rq to the front of hctx->dispatch so that
                 * intensive flush workloads can benefit in case of NCQ HW.
                 */
                blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD);
        } else if (q->elevator) {
                LIST_HEAD(list);

                WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG);

                list_add(&rq->queuelist, &list);
                q->elevator->type->ops.insert_requests(hctx, &list, flags);
        } else {
                trace_block_rq_insert(rq);

                spin_lock(&ctx->lock);
                if (flags & BLK_MQ_INSERT_AT_HEAD)
                        list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
                else
                        list_add_tail(&rq->queuelist,
                                      &ctx->rq_lists[hctx->type]);
                blk_mq_hctx_mark_pending(hctx, ctx);
                spin_unlock(&ctx->lock);
        }
}

static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
                unsigned int nr_segs)
{
        int err;

        if (bio->bi_opf & REQ_RAHEAD)
                rq->cmd_flags |= REQ_FAILFAST_MASK;

        rq->__sector = bio->bi_iter.bi_sector;
        rq->write_hint = bio->bi_write_hint;
        blk_rq_bio_prep(rq, bio, nr_segs);

        /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
        err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
        WARN_ON_ONCE(err);

        blk_account_io_start(rq);
}

static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
                                            struct request *rq, bool last)
{
        struct request_queue *q = rq->q;
        struct blk_mq_queue_data bd = {
                .rq = rq,
                .last = last,
        };
        blk_status_t ret;

        /*
         * For OK queue, we are done. For error, caller may kill it.
         * Any other error (busy), just add it to our list as we
         * previously would have done.
         */
        ret = q->mq_ops->queue_rq(hctx, &bd);
        switch (ret) {
        case BLK_STS_OK:
                blk_mq_update_dispatch_busy(hctx, false);
                break;
        case BLK_STS_RESOURCE:
        case BLK_STS_DEV_RESOURCE:
                blk_mq_update_dispatch_busy(hctx, true);
                __blk_mq_requeue_request(rq);
                break;
        default:
                blk_mq_update_dispatch_busy(hctx, false);
                break;
        }

        return ret;
}

static bool blk_mq_get_budget_and_tag(struct request *rq)
{
        int budget_token;

        budget_token = blk_mq_get_dispatch_budget(rq->q);
        if (budget_token < 0)
                return false;
        blk_mq_set_rq_budget_token(rq, budget_token);
        if (!blk_mq_get_driver_tag(rq)) {
                blk_mq_put_dispatch_budget(rq->q, budget_token);
                return false;
        }
        return true;
}

/**
 * blk_mq_try_issue_directly - Try to send a request directly to device driver.
 * @hctx: Pointer of the associated hardware queue.
 * @rq: Pointer to request to be sent.
 *
 * If the device has enough resources to accept a new request now, send the
 * request directly to device driver. Else, insert at hctx->dispatch queue, so
 * we can try send it another time in the future. Requests inserted at this
 * queue have higher priority.
 */
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                struct request *rq)
{
        blk_status_t ret;

        if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
                blk_mq_insert_request(rq, 0);
                return;
        }

        if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
                blk_mq_insert_request(rq, 0);
                blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);
                return;
        }

        ret = __blk_mq_issue_directly(hctx, rq, true);
        switch (ret) {
        case BLK_STS_OK:
                break;
        case BLK_STS_RESOURCE:
        case BLK_STS_DEV_RESOURCE:
                blk_mq_request_bypass_insert(rq, 0);
                blk_mq_run_hw_queue(hctx, false);
                break;
        default:
                blk_mq_end_request(rq, ret);
                break;
        }
}

static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
{
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

        if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) {
                blk_mq_insert_request(rq, 0);
                return BLK_STS_OK;
        }

        if (!blk_mq_get_budget_and_tag(rq))
                return BLK_STS_RESOURCE;
        return __blk_mq_issue_directly(hctx, rq, last);
}

static void blk_mq_plug_issue_direct(struct blk_plug *plug)
{
        struct blk_mq_hw_ctx *hctx = NULL;
        struct request *rq;
        int queued = 0;
        blk_status_t ret = BLK_STS_OK;

        while ((rq = rq_list_pop(&plug->mq_list))) {
                bool last = rq_list_empty(plug->mq_list);

                if (hctx != rq->mq_hctx) {
                        if (hctx) {
                                blk_mq_commit_rqs(hctx, queued, false);
                                queued = 0;
                        }
                        hctx = rq->mq_hctx;
                }

                ret = blk_mq_request_issue_directly(rq, last);
                switch (ret) {
                case BLK_STS_OK:
                        queued++;
                        break;
                case BLK_STS_RESOURCE:
                case BLK_STS_DEV_RESOURCE:
                        blk_mq_request_bypass_insert(rq, 0);
                        blk_mq_run_hw_queue(hctx, false);
                        goto out;
                default:
                        blk_mq_end_request(rq, ret);
                        break;
                }
        }

out:
        if (ret != BLK_STS_OK)
                blk_mq_commit_rqs(hctx, queued, false);
}

static void __blk_mq_flush_plug_list(struct request_queue *q,
                                     struct blk_plug *plug)
{
        if (blk_queue_quiesced(q))
                return;
        q->mq_ops->queue_rqs(&plug->mq_list);
}

static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
{
        struct blk_mq_hw_ctx *this_hctx = NULL;
        struct blk_mq_ctx *this_ctx = NULL;
        struct request *requeue_list = NULL;
        struct request **requeue_lastp = &requeue_list;
        unsigned int depth = 0;
        bool is_passthrough = false;
        LIST_HEAD(list);

        do {
                struct request *rq = rq_list_pop(&plug->mq_list);

                if (!this_hctx) {
                        this_hctx = rq->mq_hctx;
                        this_ctx = rq->mq_ctx;
                        is_passthrough = blk_rq_is_passthrough(rq);
                } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||
                           is_passthrough != blk_rq_is_passthrough(rq)) {
                        rq_list_add_tail(&requeue_lastp, rq);
                        continue;
                }
                list_add(&rq->queuelist, &list);
                depth++;
        } while (!rq_list_empty(plug->mq_list));

        plug->mq_list = requeue_list;
        trace_block_unplug(this_hctx->queue, depth, !from_sched);

        percpu_ref_get(&this_hctx->queue->q_usage_counter);
        /* passthrough requests should never be issued to the I/O scheduler */
        if (is_passthrough) {
                spin_lock(&this_hctx->lock);
                list_splice_tail_init(&list, &this_hctx->dispatch);
                spin_unlock(&this_hctx->lock);
                blk_mq_run_hw_queue(this_hctx, from_sched);
        } else if (this_hctx->queue->elevator) {
                this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,
                                &list, 0);
                blk_mq_run_hw_queue(this_hctx, from_sched);
        } else {
                blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched);
        }
        percpu_ref_put(&this_hctx->queue->q_usage_counter);
}

void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
        struct request *rq;

        /*
         * We may have been called recursively midway through handling
         * plug->mq_list via a schedule() in the driver's queue_rq() callback.
         * To avoid mq_list changing under our feet, clear rq_count early and
         * bail out specifically if rq_count is 0 rather than checking
         * whether the mq_list is empty.
         */
        if (plug->rq_count == 0)
                return;
        plug->rq_count = 0;

        if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
                struct request_queue *q;

                rq = rq_list_peek(&plug->mq_list);
                q = rq->q;

                /*
                 * Peek first request and see if we have a ->queue_rqs() hook.
                 * If we do, we can dispatch the whole plug list in one go. We
                 * already know at this point that all requests belong to the
                 * same queue, caller must ensure that's the case.
                 */
                if (q->mq_ops->queue_rqs) {
                        blk_mq_run_dispatch_ops(q,
                                __blk_mq_flush_plug_list(q, plug));
                        if (rq_list_empty(plug->mq_list))
                                return;
                }

                blk_mq_run_dispatch_ops(q,
                                blk_mq_plug_issue_direct(plug));
                if (rq_list_empty(plug->mq_list))
                        return;
        }

        do {
                blk_mq_dispatch_plug_list(plug, from_schedule);
        } while (!rq_list_empty(plug->mq_list));
}

static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
                struct list_head *list)
{
        int queued = 0;
        blk_status_t ret = BLK_STS_OK;

        while (!list_empty(list)) {
                struct request *rq = list_first_entry(list, struct request,
                                queuelist);

                list_del_init(&rq->queuelist);
                ret = blk_mq_request_issue_directly(rq, list_empty(list));
                switch (ret) {
                case BLK_STS_OK:
                        queued++;
                        break;
                case BLK_STS_RESOURCE:
                case BLK_STS_DEV_RESOURCE:
                        blk_mq_request_bypass_insert(rq, 0);
                        if (list_empty(list))
                                blk_mq_run_hw_queue(hctx, false);
                        goto out;
                default:
                        blk_mq_end_request(rq, ret);
                        break;
                }
        }

out:
        if (ret != BLK_STS_OK)
                blk_mq_commit_rqs(hctx, queued, false);
}

static bool blk_mq_attempt_bio_merge(struct request_queue *q,
                                     struct bio *bio, unsigned int nr_segs)
{
        if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
                if (blk_attempt_plug_merge(q, bio, nr_segs))
                        return true;
                if (blk_mq_sched_bio_merge(q, bio, nr_segs))
                        return true;
        }
        return false;
}

static struct request *blk_mq_get_new_requests(struct request_queue *q,
                                               struct blk_plug *plug,
                                               struct bio *bio,
                                               unsigned int nsegs)
{
        struct blk_mq_alloc_data data = {
                .q                = q,
                .nr_tags        = 1,
                .cmd_flags        = bio->bi_opf,
        };
        struct request *rq;

        rq_qos_throttle(q, bio);

        if (plug) {
                data.nr_tags = plug->nr_ios;
                plug->nr_ios = 1;
                data.cached_rq = &plug->cached_rq;
        }

        rq = __blk_mq_alloc_requests(&data);
        if (rq)
                return rq;
        rq_qos_cleanup(q, bio);
        if (bio->bi_opf & REQ_NOWAIT)
                bio_wouldblock_error(bio);
        return NULL;
}

/*
 * Check if there is a suitable cached request and return it.
 */
static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
                struct request_queue *q, blk_opf_t opf)
{
        enum hctx_type type = blk_mq_get_hctx_type(opf);
        struct request *rq;

        if (!plug)
                return NULL;
        rq = rq_list_peek(&plug->cached_rq);
        if (!rq || rq->q != q)
                return NULL;
        if (type != rq->mq_hctx->type &&
            (type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))
                return NULL;
        if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
                return NULL;
        return rq;
}

static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
                struct bio *bio)
{
        WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);

        /*
         * If any qos ->throttle() end up blocking, we will have flushed the
         * plug and hence killed the cached_rq list as well. Pop this entry
         * before we throttle.
         */
        plug->cached_rq = rq_list_next(rq);
        rq_qos_throttle(rq->q, bio);

        blk_mq_rq_time_init(rq, 0);
        rq->cmd_flags = bio->bi_opf;
        INIT_LIST_HEAD(&rq->queuelist);
}

/**
 * blk_mq_submit_bio - Create and send a request to block device.
 * @bio: Bio pointer.
 *
 * Builds up a request structure from @q and @bio and send to the device. The
 * request may not be queued directly to hardware if:
 * * This request can be merged with another one
 * * We want to place request at plug queue for possible future merging
 * * There is an IO scheduler active at this queue
 *
 * It will not queue the request if there is an error with the bio, or at the
 * request creation.
 */
void blk_mq_submit_bio(struct bio *bio)
{
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        struct blk_plug *plug = blk_mq_plug(bio);
        const int is_sync = op_is_sync(bio->bi_opf);
        struct blk_mq_hw_ctx *hctx;
        unsigned int nr_segs = 1;
        struct request *rq;
        blk_status_t ret;

        bio = blk_queue_bounce(bio, q);

        /*
         * If the plug has a cached request for this queue, try use it.
         *
         * The cached request already holds a q_usage_counter reference and we
         * don't have to acquire a new one if we use it.
         */
        rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
        if (!rq) {
                if (unlikely(bio_queue_enter(bio)))
                        return;
        }

        if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
                bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
                if (!bio)
                        goto queue_exit;
        }
        if (!bio_integrity_prep(bio))
                goto queue_exit;

        if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
                goto queue_exit;

        if (!rq) {
                rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
                if (unlikely(!rq))
                        goto queue_exit;
        } else {
                blk_mq_use_cached_rq(rq, plug, bio);
        }

        trace_block_getrq(bio);

        rq_qos_track(q, rq, bio);

        blk_mq_bio_to_request(rq, bio, nr_segs);

        ret = blk_crypto_rq_get_keyslot(rq);
        if (ret != BLK_STS_OK) {
                bio->bi_status = ret;
                bio_endio(bio);
                blk_mq_free_request(rq);
                return;
        }

        if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
                return;

        if (plug) {
                blk_add_rq_to_plug(plug, rq);
                return;
        }

        hctx = rq->mq_hctx;
        if ((rq->rq_flags & RQF_USE_SCHED) ||
            (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
                blk_mq_insert_request(rq, 0);
                blk_mq_run_hw_queue(hctx, true);
        } else {
                blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
        }
        return;

queue_exit:
        /*
         * Don't drop the queue reference if we were trying to use a cached
         * request and thus didn't acquire one.
         */
        if (!rq)
                blk_queue_exit(q);
}

#ifdef CONFIG_BLK_MQ_STACKING
/**
 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
 * @rq: the request being queued
 */
blk_status_t blk_insert_cloned_request(struct request *rq)
{
        struct request_queue *q = rq->q;
        unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
        unsigned int max_segments = blk_rq_get_max_segments(rq);
        blk_status_t ret;

        if (blk_rq_sectors(rq) > max_sectors) {
                /*
                 * SCSI device does not have a good way to return if
                 * Write Same/Zero is actually supported. If a device rejects
                 * a non-read/write command (discard, write same,etc.) the
                 * low-level device driver will set the relevant queue limit to
                 * 0 to prevent blk-lib from issuing more of the offending
                 * operations. Commands queued prior to the queue limit being
                 * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
                 * errors being propagated to upper layers.
                 */
                if (max_sectors == 0)
                        return BLK_STS_NOTSUPP;

                printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
                        __func__, blk_rq_sectors(rq), max_sectors);
                return BLK_STS_IOERR;
        }

        /*
         * The queue settings related to segment counting may differ from the
         * original queue.
         */
        rq->nr_phys_segments = blk_recalc_rq_segments(rq);
        if (rq->nr_phys_segments > max_segments) {
                printk(KERN_ERR "%s: over max segments limit. (%u > %u)\n",
                        __func__, rq->nr_phys_segments, max_segments);
                return BLK_STS_IOERR;
        }

        if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
                return BLK_STS_IOERR;

        ret = blk_crypto_rq_get_keyslot(rq);
        if (ret != BLK_STS_OK)
                return ret;

        blk_account_io_start(rq);

        /*
         * Since we have a scheduler attached on the top device,
         * bypass a potential scheduler on the bottom device for
         * insert.
         */
        blk_mq_run_dispatch_ops(q,
                        ret = blk_mq_request_issue_directly(rq, true));
        if (ret)
                blk_account_io_done(rq, blk_time_get_ns());
        return ret;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

/**
 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
 * @rq: the clone request to be cleaned up
 *
 * Description:
 *     Free all bios in @rq for a cloned request.
 */
void blk_rq_unprep_clone(struct request *rq)
{
        struct bio *bio;

        while ((bio = rq->bio) != NULL) {
                rq->bio = bio->bi_next;

                bio_put(bio);
        }
}
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

/**
 * blk_rq_prep_clone - Helper function to setup clone request
 * @rq: the request to be setup
 * @rq_src: original request to be cloned
 * @bs: bio_set that bios for clone are allocated from
 * @gfp_mask: memory allocation mask for bio
 * @bio_ctr: setup function to be called for each clone bio.
 *           Returns %0 for success, non %0 for failure.
 * @data: private data to be passed to @bio_ctr
 *
 * Description:
 *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
 *     Also, pages which the original bios are pointing to are not copied
 *     and the cloned bios just point same pages.
 *     So cloned bios must be completed before original bios, which means
 *     the caller must complete @rq before @rq_src.
 */
int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
                      struct bio_set *bs, gfp_t gfp_mask,
                      int (*bio_ctr)(struct bio *, struct bio *, void *),
                      void *data)
{
        struct bio *bio, *bio_src;

        if (!bs)
                bs = &fs_bio_set;

        __rq_for_each_bio(bio_src, rq_src) {
                bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
                                      bs);
                if (!bio)
                        goto free_and_out;

                if (bio_ctr && bio_ctr(bio, bio_src, data))
                        goto free_and_out;

                if (rq->bio) {
                        rq->biotail->bi_next = bio;
                        rq->biotail = bio;
                } else {
                        rq->bio = rq->biotail = bio;
                }
                bio = NULL;
        }

        /* Copy attributes of the original request to the clone request. */
        rq->__sector = blk_rq_pos(rq_src);
        rq->__data_len = blk_rq_bytes(rq_src);
        if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
                rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
                rq->special_vec = rq_src->special_vec;
        }
        rq->nr_phys_segments = rq_src->nr_phys_segments;
        rq->ioprio = rq_src->ioprio;
        rq->write_hint = rq_src->write_hint;

        if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
                goto free_and_out;

        return 0;

free_and_out:
        if (bio)
                bio_put(bio);
        blk_rq_unprep_clone(rq);

        return -ENOMEM;
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
#endif /* CONFIG_BLK_MQ_STACKING */

/*
 * Steal bios from a request and add them to a bio list.
 * The request must not have been partially completed before.
 */
void blk_steal_bios(struct bio_list *list, struct request *rq)
{
        if (rq->bio) {
                if (list->tail)
                        list->tail->bi_next = rq->bio;
                else
                        list->head = rq->bio;
                list->tail = rq->biotail;

                rq->bio = NULL;
                rq->biotail = NULL;
        }

        rq->__data_len = 0;
}
EXPORT_SYMBOL_GPL(blk_steal_bios);

static size_t order_to_size(unsigned int order)
{
        return (size_t)PAGE_SIZE << order;
}

/* called before freeing request pool in @tags */
static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
                                    struct blk_mq_tags *tags)
{
        struct page *page;
        unsigned long flags;

        /*
         * There is no need to clear mapping if driver tags is not initialized
         * or the mapping belongs to the driver tags.
         */
        if (!drv_tags || drv_tags == tags)
                return;

        list_for_each_entry(page, &tags->page_list, lru) {
                unsigned long start = (unsigned long)page_address(page);
                unsigned long end = start + order_to_size(page->private);
                int i;

                for (i = 0; i < drv_tags->nr_tags; i++) {
                        struct request *rq = drv_tags->rqs[i];
                        unsigned long rq_addr = (unsigned long)rq;

                        if (rq_addr >= start && rq_addr < end) {
                                WARN_ON_ONCE(req_ref_read(rq) != 0);
                                cmpxchg(&drv_tags->rqs[i], rq, NULL);
                        }
                }
        }

        /*
         * Wait until all pending iteration is done.
         *
         * Request reference is cleared and it is guaranteed to be observed
         * after the ->lock is released.
         */
        spin_lock_irqsave(&drv_tags->lock, flags);
        spin_unlock_irqrestore(&drv_tags->lock, flags);
}

void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                     unsigned int hctx_idx)
{
        struct blk_mq_tags *drv_tags;
        struct page *page;

        if (list_empty(&tags->page_list))
                return;

        if (blk_mq_is_shared_tags(set->flags))
                drv_tags = set->shared_tags;
        else
                drv_tags = set->tags[hctx_idx];

        if (tags->static_rqs && set->ops->exit_request) {
                int i;

                for (i = 0; i < tags->nr_tags; i++) {
                        struct request *rq = tags->static_rqs[i];

                        if (!rq)
                                continue;
                        set->ops->exit_request(set, rq, hctx_idx);
                        tags->static_rqs[i] = NULL;
                }
        }

        blk_mq_clear_rq_mapping(drv_tags, tags);

        while (!list_empty(&tags->page_list)) {
                page = list_first_entry(&tags->page_list, struct page, lru);
                list_del_init(&page->lru);
                /*
                 * Remove kmemleak object previously allocated in
                 * blk_mq_alloc_rqs().
                 */
                kmemleak_free(page_address(page));
                __free_pages(page, page->private);
        }
}

void blk_mq_free_rq_map(struct blk_mq_tags *tags)
{
        kfree(tags->rqs);
        tags->rqs = NULL;
        kfree(tags->static_rqs);
        tags->static_rqs = NULL;

        blk_mq_free_tags(tags);
}

static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set,
                unsigned int hctx_idx)
{
        int i;

        for (i = 0; i < set->nr_maps; i++) {
                unsigned int start = set->map[i].queue_offset;
                unsigned int end = start + set->map[i].nr_queues;

                if (hctx_idx >= start && hctx_idx < end)
                        break;
        }

        if (i >= set->nr_maps)
                i = HCTX_TYPE_DEFAULT;

        return i;
}

static int blk_mq_get_hctx_node(struct blk_mq_tag_set *set,
                unsigned int hctx_idx)
{
        enum hctx_type type = hctx_idx_to_type(set, hctx_idx);

        return blk_mq_hw_queue_to_node(&set->map[type], hctx_idx);
}

static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
                                               unsigned int hctx_idx,
                                               unsigned int nr_tags,
                                               unsigned int reserved_tags)
{
        int node = blk_mq_get_hctx_node(set, hctx_idx);
        struct blk_mq_tags *tags;

        if (node == NUMA_NO_NODE)
                node = set->numa_node;

        tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
                                BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
        if (!tags)
                return NULL;

        tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
                                 node);
        if (!tags->rqs)
                goto err_free_tags;

        tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),
                                        GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
                                        node);
        if (!tags->static_rqs)
                goto err_free_rqs;

        return tags;

err_free_rqs:
        kfree(tags->rqs);
err_free_tags:
        blk_mq_free_tags(tags);
        return NULL;
}

static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
                               unsigned int hctx_idx, int node)
{
        int ret;

        if (set->ops->init_request) {
                ret = set->ops->init_request(set, rq, hctx_idx, node);
                if (ret)
                        return ret;
        }

        WRITE_ONCE(rq->state, MQ_RQ_IDLE);
        return 0;
}

static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
                            struct blk_mq_tags *tags,
                            unsigned int hctx_idx, unsigned int depth)
{
        unsigned int i, j, entries_per_page, max_order = 4;
        int node = blk_mq_get_hctx_node(set, hctx_idx);
        size_t rq_size, left;

        if (node == NUMA_NO_NODE)
                node = set->numa_node;

        INIT_LIST_HEAD(&tags->page_list);

        /*
         * rq_size is the size of the request plus driver payload, rounded
         * to the cacheline size
         */
        rq_size = round_up(sizeof(struct request) + set->cmd_size,
                                cache_line_size());
        left = rq_size * depth;

        for (i = 0; i < depth; ) {
                int this_order = max_order;
                struct page *page;
                int to_do;
                void *p;

                while (this_order && left < order_to_size(this_order - 1))
                        this_order--;

                do {
                        page = alloc_pages_node(node,
                                GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
                                this_order);
                        if (page)
                                break;
                        if (!this_order--)
                                break;
                        if (order_to_size(this_order) < rq_size)
                                break;
                } while (1);

                if (!page)
                        goto fail;

                page->private = this_order;
                list_add_tail(&page->lru, &tags->page_list);

                p = page_address(page);
                /*
                 * Allow kmemleak to scan these pages as they contain pointers
                 * to additional allocations like via ops->init_request().
                 */
                kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
                entries_per_page = order_to_size(this_order) / rq_size;
                to_do = min(entries_per_page, depth - i);
                left -= to_do * rq_size;
                for (j = 0; j < to_do; j++) {
                        struct request *rq = p;

                        tags->static_rqs[i] = rq;
                        if (blk_mq_init_request(set, rq, hctx_idx, node)) {
                                tags->static_rqs[i] = NULL;
                                goto fail;
                        }

                        p += rq_size;
                        i++;
                }
        }
        return 0;

fail:
        blk_mq_free_rqs(set, tags, hctx_idx);
        return -ENOMEM;
}

struct rq_iter_data {
        struct blk_mq_hw_ctx *hctx;
        bool has_rq;
};

static bool blk_mq_has_request(struct request *rq, void *data)
{
        struct rq_iter_data *iter_data = data;

        if (rq->mq_hctx != iter_data->hctx)
                return true;
        iter_data->has_rq = true;
        return false;
}

static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
{
        struct blk_mq_tags *tags = hctx->sched_tags ?
                        hctx->sched_tags : hctx->tags;
        struct rq_iter_data data = {
                .hctx        = hctx,
        };

        blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
        return data.has_rq;
}

static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
                struct blk_mq_hw_ctx *hctx)
{
        if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)
                return false;
        if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
                return false;
        return true;
}

static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
{
        struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
                        struct blk_mq_hw_ctx, cpuhp_online);

        if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
            !blk_mq_last_cpu_in_hctx(cpu, hctx))
                return 0;

        /*
         * Prevent new request from being allocated on the current hctx.
         *
         * The smp_mb__after_atomic() Pairs with the implied barrier in
         * test_and_set_bit_lock in sbitmap_get().  Ensures the inactive flag is
         * seen once we return from the tag allocator.
         */
        set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
        smp_mb__after_atomic();

        /*
         * Try to grab a reference to the queue and wait for any outstanding
         * requests.  If we could not grab a reference the queue has been
         * frozen and there are no requests.
         */
        if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
                while (blk_mq_hctx_has_requests(hctx))
                        msleep(5);
                percpu_ref_put(&hctx->queue->q_usage_counter);
        }

        return 0;
}

static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
{
        struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
                        struct blk_mq_hw_ctx, cpuhp_online);

        if (cpumask_test_cpu(cpu, hctx->cpumask))
                clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
        return 0;
}

/*
 * 'cpu' is going away. splice any existing rq_list entries from this
 * software queue to the hw queue dispatch list, and ensure that it
 * gets run.
 */
static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
{
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
        enum hctx_type type;

        hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
        if (!cpumask_test_cpu(cpu, hctx->cpumask))
                return 0;

        ctx = __blk_mq_get_ctx(hctx->queue, cpu);
        type = hctx->type;

        spin_lock(&ctx->lock);
        if (!list_empty(&ctx->rq_lists[type])) {
                list_splice_init(&ctx->rq_lists[type], &tmp);
                blk_mq_hctx_clear_pending(hctx, ctx);
        }
        spin_unlock(&ctx->lock);

        if (list_empty(&tmp))
                return 0;

        spin_lock(&hctx->lock);
        list_splice_tail_init(&tmp, &hctx->dispatch);
        spin_unlock(&hctx->lock);

        blk_mq_run_hw_queue(hctx, true);
        return 0;
}

static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{
        if (!(hctx->flags & BLK_MQ_F_STACKING))
                cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
                                                    &hctx->cpuhp_online);
        cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
                                            &hctx->cpuhp_dead);
}

/*
 * Before freeing hw queue, clearing the flush request reference in
 * tags->rqs[] for avoiding potential UAF.
 */
static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
                unsigned int queue_depth, struct request *flush_rq)
{
        int i;
        unsigned long flags;

        /* The hw queue may not be mapped yet */
        if (!tags)
                return;

        WARN_ON_ONCE(req_ref_read(flush_rq) != 0);

        for (i = 0; i < queue_depth; i++)
                cmpxchg(&tags->rqs[i], flush_rq, NULL);

        /*
         * Wait until all pending iteration is done.
         *
         * Request reference is cleared and it is guaranteed to be observed
         * after the ->lock is released.
         */
        spin_lock_irqsave(&tags->lock, flags);
        spin_unlock_irqrestore(&tags->lock, flags);
}

/* hctx->ctxs will be freed in queue's release handler */
static void blk_mq_exit_hctx(struct request_queue *q,
                struct blk_mq_tag_set *set,
                struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
        struct request *flush_rq = hctx->fq->flush_rq;

        if (blk_mq_hw_queue_mapped(hctx))
                blk_mq_tag_idle(hctx);

        if (blk_queue_init_done(q))
                blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
                                set->queue_depth, flush_rq);
        if (set->ops->exit_request)
                set->ops->exit_request(set, flush_rq, hctx_idx);

        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);

        blk_mq_remove_cpuhp(hctx);

        xa_erase(&q->hctx_table, hctx_idx);

        spin_lock(&q->unused_hctx_lock);
        list_add(&hctx->hctx_list, &q->unused_hctx_list);
        spin_unlock(&q->unused_hctx_lock);
}

static void blk_mq_exit_hw_queues(struct request_queue *q,
                struct blk_mq_tag_set *set, int nr_queue)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i) {
                if (i == nr_queue)
                        break;
                blk_mq_exit_hctx(q, set, hctx, i);
        }
}

static int blk_mq_init_hctx(struct request_queue *q,
                struct blk_mq_tag_set *set,
                struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
{
        hctx->queue_num = hctx_idx;

        if (!(hctx->flags & BLK_MQ_F_STACKING))
                cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
                                &hctx->cpuhp_online);
        cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);

        hctx->tags = set->tags[hctx_idx];

        if (set->ops->init_hctx &&
            set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
                goto unregister_cpu_notifier;

        if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
                                hctx->numa_node))
                goto exit_hctx;

        if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
                goto exit_flush_rq;

        return 0;

 exit_flush_rq:
        if (set->ops->exit_request)
                set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
 exit_hctx:
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
 unregister_cpu_notifier:
        blk_mq_remove_cpuhp(hctx);
        return -1;
}

static struct blk_mq_hw_ctx *
blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
                int node)
{
        struct blk_mq_hw_ctx *hctx;
        gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;

        hctx = kzalloc_node(sizeof(struct blk_mq_hw_ctx), gfp, node);
        if (!hctx)
                goto fail_alloc_hctx;

        if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
                goto free_hctx;

        atomic_set(&hctx->nr_active, 0);
        if (node == NUMA_NO_NODE)
                node = set->numa_node;
        hctx->numa_node = node;

        INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
        spin_lock_init(&hctx->lock);
        INIT_LIST_HEAD(&hctx->dispatch);
        hctx->queue = q;
        hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;

        INIT_LIST_HEAD(&hctx->hctx_list);

        /*
         * Allocate space for all possible cpus to avoid allocation at
         * runtime
         */
        hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
                        gfp, node);
        if (!hctx->ctxs)
                goto free_cpumask;

        if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
                                gfp, node, false, false))
                goto free_ctxs;
        hctx->nr_ctx = 0;

        spin_lock_init(&hctx->dispatch_wait_lock);
        init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
        INIT_LIST_HEAD(&hctx->dispatch_wait.entry);

        hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
        if (!hctx->fq)
                goto free_bitmap;

        blk_mq_hctx_kobj_init(hctx);

        return hctx;

 free_bitmap:
        sbitmap_free(&hctx->ctx_map);
 free_ctxs:
        kfree(hctx->ctxs);
 free_cpumask:
        free_cpumask_var(hctx->cpumask);
 free_hctx:
        kfree(hctx);
 fail_alloc_hctx:
        return NULL;
}

static void blk_mq_init_cpu_queues(struct request_queue *q,
                                   unsigned int nr_hw_queues)
{
        struct blk_mq_tag_set *set = q->tag_set;
        unsigned int i, j;

        for_each_possible_cpu(i) {
                struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
                struct blk_mq_hw_ctx *hctx;
                int k;

                __ctx->cpu = i;
                spin_lock_init(&__ctx->lock);
                for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
                        INIT_LIST_HEAD(&__ctx->rq_lists[k]);

                __ctx->queue = q;

                /*
                 * Set local node, IFF we have more than one hw queue. If
                 * not, we remain on the home node of the device
                 */
                for (j = 0; j < set->nr_maps; j++) {
                        hctx = blk_mq_map_queue_type(q, j, i);
                        if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
                                hctx->numa_node = cpu_to_node(i);
                }
        }
}

struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
                                             unsigned int hctx_idx,
                                             unsigned int depth)
{
        struct blk_mq_tags *tags;
        int ret;

        tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);
        if (!tags)
                return NULL;

        ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth);
        if (ret) {
                blk_mq_free_rq_map(tags);
                return NULL;
        }

        return tags;
}

static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
                                       int hctx_idx)
{
        if (blk_mq_is_shared_tags(set->flags)) {
                set->tags[hctx_idx] = set->shared_tags;

                return true;
        }

        set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,
                                                       set->queue_depth);

        return set->tags[hctx_idx];
}

void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
                             struct blk_mq_tags *tags,
                             unsigned int hctx_idx)
{
        if (tags) {
                blk_mq_free_rqs(set, tags, hctx_idx);
                blk_mq_free_rq_map(tags);
        }
}

static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
                                      unsigned int hctx_idx)
{
        if (!blk_mq_is_shared_tags(set->flags))
                blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);

        set->tags[hctx_idx] = NULL;
}

static void blk_mq_map_swqueue(struct request_queue *q)
{
        unsigned int j, hctx_idx;
        unsigned long i;
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        struct blk_mq_tag_set *set = q->tag_set;

        queue_for_each_hw_ctx(q, hctx, i) {
                cpumask_clear(hctx->cpumask);
                hctx->nr_ctx = 0;
                hctx->dispatch_from = NULL;
        }

        /*
         * Map software to hardware queues.
         *
         * If the cpu isn't present, the cpu is mapped to first hctx.
         */
        for_each_possible_cpu(i) {

                ctx = per_cpu_ptr(q->queue_ctx, i);
                for (j = 0; j < set->nr_maps; j++) {
                        if (!set->map[j].nr_queues) {
                                ctx->hctxs[j] = blk_mq_map_queue_type(q,
                                                HCTX_TYPE_DEFAULT, i);
                                continue;
                        }
                        hctx_idx = set->map[j].mq_map[i];
                        /* unmapped hw queue can be remapped after CPU topo changed */
                        if (!set->tags[hctx_idx] &&
                            !__blk_mq_alloc_map_and_rqs(set, hctx_idx)) {
                                /*
                                 * If tags initialization fail for some hctx,
                                 * that hctx won't be brought online.  In this
                                 * case, remap the current ctx to hctx[0] which
                                 * is guaranteed to always have tags allocated
                                 */
                                set->map[j].mq_map[i] = 0;
                        }

                        hctx = blk_mq_map_queue_type(q, j, i);
                        ctx->hctxs[j] = hctx;
                        /*
                         * If the CPU is already set in the mask, then we've
                         * mapped this one already. This can happen if
                         * devices share queues across queue maps.
                         */
                        if (cpumask_test_cpu(i, hctx->cpumask))
                                continue;

                        cpumask_set_cpu(i, hctx->cpumask);
                        hctx->type = j;
                        ctx->index_hw[hctx->type] = hctx->nr_ctx;
                        hctx->ctxs[hctx->nr_ctx++] = ctx;

                        /*
                         * If the nr_ctx type overflows, we have exceeded the
                         * amount of sw queues we can support.
                         */
                        BUG_ON(!hctx->nr_ctx);
                }

                for (; j < HCTX_MAX_TYPES; j++)
                        ctx->hctxs[j] = blk_mq_map_queue_type(q,
                                        HCTX_TYPE_DEFAULT, i);
        }

        queue_for_each_hw_ctx(q, hctx, i) {
                /*
                 * If no software queues are mapped to this hardware queue,
                 * disable it and free the request entries.
                 */
                if (!hctx->nr_ctx) {
                        /* Never unmap queue 0.  We need it as a
                         * fallback in case of a new remap fails
                         * allocation
                         */
                        if (i)
                                __blk_mq_free_map_and_rqs(set, i);

                        hctx->tags = NULL;
                        continue;
                }

                hctx->tags = set->tags[i];
                WARN_ON(!hctx->tags);

                /*
                 * Set the map size to the number of mapped software queues.
                 * This is more accurate and more efficient than looping
                 * over all possibly mapped software queues.
                 */
                sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);

                /*
                 * Initialize batch roundrobin counts
                 */
                hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
                hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
        }
}

/*
 * Caller needs to ensure that we're either frozen/quiesced, or that
 * the queue isn't live yet.
 */
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i) {
                if (shared) {
                        hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
                } else {
                        blk_mq_tag_idle(hctx);
                        hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
                }
        }
}

static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
                                         bool shared)
{
        struct request_queue *q;

        lockdep_assert_held(&set->tag_list_lock);

        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                blk_mq_freeze_queue(q);
                queue_set_hctx_shared(q, shared);
                blk_mq_unfreeze_queue(q);
        }
}

static void blk_mq_del_queue_tag_set(struct request_queue *q)
{
        struct blk_mq_tag_set *set = q->tag_set;

        mutex_lock(&set->tag_list_lock);
        list_del(&q->tag_set_list);
        if (list_is_singular(&set->tag_list)) {
                /* just transitioned to unshared */
                set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
                /* update existing queue */
                blk_mq_update_tag_set_shared(set, false);
        }
        mutex_unlock(&set->tag_list_lock);
        INIT_LIST_HEAD(&q->tag_set_list);
}

static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
                                     struct request_queue *q)
{
        mutex_lock(&set->tag_list_lock);

        /*
         * Check to see if we're transitioning to shared (from 1 to 2 queues).
         */
        if (!list_empty(&set->tag_list) &&
            !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
                set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
                /* update existing queue */
                blk_mq_update_tag_set_shared(set, true);
        }
        if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
                queue_set_hctx_shared(q, true);
        list_add_tail(&q->tag_set_list, &set->tag_list);

        mutex_unlock(&set->tag_list_lock);
}

/* All allocations will be freed in release handler of q->mq_kobj */
static int blk_mq_alloc_ctxs(struct request_queue *q)
{
        struct blk_mq_ctxs *ctxs;
        int cpu;

        ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
        if (!ctxs)
                return -ENOMEM;

        ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
        if (!ctxs->queue_ctx)
                goto fail;

        for_each_possible_cpu(cpu) {
                struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
                ctx->ctxs = ctxs;
        }

        q->mq_kobj = &ctxs->kobj;
        q->queue_ctx = ctxs->queue_ctx;

        return 0;
 fail:
        kfree(ctxs);
        return -ENOMEM;
}

/*
 * It is the actual release handler for mq, but we do it from
 * request queue's release handler for avoiding use-after-free
 * and headache because q->mq_kobj shouldn't have been introduced,
 * but we can't group ctx/kctx kobj without it.
 */
void blk_mq_release(struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx, *next;
        unsigned long i;

        queue_for_each_hw_ctx(q, hctx, i)
                WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));

        /* all hctx are in .unused_hctx_list now */
        list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
                list_del_init(&hctx->hctx_list);
                kobject_put(&hctx->kobj);
        }

        xa_destroy(&q->hctx_table);

        /*
         * release .mq_kobj and sw queue's kobject now because
         * both share lifetime with request queue.
         */
        blk_mq_sysfs_deinit(q);
}

struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
                struct queue_limits *lim, void *queuedata)
{
        struct queue_limits default_lim = { };
        struct request_queue *q;
        int ret;

        q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
        if (IS_ERR(q))
                return q;
        q->queuedata = queuedata;
        ret = blk_mq_init_allocated_queue(set, q);
        if (ret) {
                blk_put_queue(q);
                return ERR_PTR(ret);
        }
        return q;
}
EXPORT_SYMBOL(blk_mq_alloc_queue);

/**
 * blk_mq_destroy_queue - shutdown a request queue
 * @q: request queue to shutdown
 *
 * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
 * requests will be failed with -ENODEV. The caller is responsible for dropping
 * the reference from blk_mq_alloc_queue() by calling blk_put_queue().
 *
 * Context: can sleep
 */
void blk_mq_destroy_queue(struct request_queue *q)
{
        WARN_ON_ONCE(!queue_is_mq(q));
        WARN_ON_ONCE(blk_queue_registered(q));

        might_sleep();

        blk_queue_flag_set(QUEUE_FLAG_DYING, q);
        blk_queue_start_drain(q);
        blk_mq_freeze_queue_wait(q);

        blk_sync_queue(q);
        blk_mq_cancel_work_sync(q);
        blk_mq_exit_queue(q);
}
EXPORT_SYMBOL(blk_mq_destroy_queue);

struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
                struct queue_limits *lim, void *queuedata,
                struct lock_class_key *lkclass)
{
        struct request_queue *q;
        struct gendisk *disk;

        q = blk_mq_alloc_queue(set, lim, queuedata);
        if (IS_ERR(q))
                return ERR_CAST(q);

        disk = __alloc_disk_node(q, set->numa_node, lkclass);
        if (!disk) {
                blk_mq_destroy_queue(q);
                blk_put_queue(q);
                return ERR_PTR(-ENOMEM);
        }
        set_bit(GD_OWNS_QUEUE, &disk->state);
        return disk;
}
EXPORT_SYMBOL(__blk_mq_alloc_disk);

struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
                struct lock_class_key *lkclass)
{
        struct gendisk *disk;

        if (!blk_get_queue(q))
                return NULL;
        disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass);
        if (!disk)
                blk_put_queue(q);
        return disk;
}
EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);

static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
                struct blk_mq_tag_set *set, struct request_queue *q,
                int hctx_idx, int node)
{
        struct blk_mq_hw_ctx *hctx = NULL, *tmp;

        /* reuse dead hctx first */
        spin_lock(&q->unused_hctx_lock);
        list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
                if (tmp->numa_node == node) {
                        hctx = tmp;
                        break;
                }
        }
        if (hctx)
                list_del_init(&hctx->hctx_list);
        spin_unlock(&q->unused_hctx_lock);

        if (!hctx)
                hctx = blk_mq_alloc_hctx(q, set, node);
        if (!hctx)
                goto fail;

        if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
                goto free_hctx;

        return hctx;

 free_hctx:
        kobject_put(&hctx->kobj);
 fail:
        return NULL;
}

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
                                                struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i, j;

        /* protect against switching io scheduler  */
        mutex_lock(&q->sysfs_lock);
        for (i = 0; i < set->nr_hw_queues; i++) {
                int old_node;
                int node = blk_mq_get_hctx_node(set, i);
                struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);

                if (old_hctx) {
                        old_node = old_hctx->numa_node;
                        blk_mq_exit_hctx(q, set, old_hctx, i);
                }

                if (!blk_mq_alloc_and_init_hctx(set, q, i, node)) {
                        if (!old_hctx)
                                break;
                        pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n",
                                        node, old_node);
                        hctx = blk_mq_alloc_and_init_hctx(set, q, i, old_node);
                        WARN_ON_ONCE(!hctx);
                }
        }
        /*
         * Increasing nr_hw_queues fails. Free the newly allocated
         * hctxs and keep the previous q->nr_hw_queues.
         */
        if (i != set->nr_hw_queues) {
                j = q->nr_hw_queues;
        } else {
                j = i;
                q->nr_hw_queues = set->nr_hw_queues;
        }

        xa_for_each_start(&q->hctx_table, j, hctx, j)
                blk_mq_exit_hctx(q, set, hctx, j);
        mutex_unlock(&q->sysfs_lock);
}

static void blk_mq_update_poll_flag(struct request_queue *q)
{
        struct blk_mq_tag_set *set = q->tag_set;

        if (set->nr_maps > HCTX_TYPE_POLL &&
            set->map[HCTX_TYPE_POLL].nr_queues)
                blk_queue_flag_set(QUEUE_FLAG_POLL, q);
        else
                blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
}

int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
                struct request_queue *q)
{
        /* mark the queue as mq asap */
        q->mq_ops = set->ops;

        if (blk_mq_alloc_ctxs(q))
                goto err_exit;

        /* init q->mq_kobj and sw queues' kobjects */
        blk_mq_sysfs_init(q);

        INIT_LIST_HEAD(&q->unused_hctx_list);
        spin_lock_init(&q->unused_hctx_lock);

        xa_init(&q->hctx_table);

        blk_mq_realloc_hw_ctxs(set, q);
        if (!q->nr_hw_queues)
                goto err_hctxs;

        INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
        blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);

        q->tag_set = set;

        q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
        blk_mq_update_poll_flag(q);

        INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
        INIT_LIST_HEAD(&q->flush_list);
        INIT_LIST_HEAD(&q->requeue_list);
        spin_lock_init(&q->requeue_lock);

        q->nr_requests = set->queue_depth;

        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
        blk_mq_add_queue_tag_set(set, q);
        blk_mq_map_swqueue(q);
        return 0;

err_hctxs:
        blk_mq_release(q);
err_exit:
        q->mq_ops = NULL;
        return -ENOMEM;
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);

/* tags can _not_ be used after returning from blk_mq_exit_queue */
void blk_mq_exit_queue(struct request_queue *q)
{
        struct blk_mq_tag_set *set = q->tag_set;

        /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
        blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
        /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
        blk_mq_del_queue_tag_set(q);
}

static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
        int i;

        if (blk_mq_is_shared_tags(set->flags)) {
                set->shared_tags = blk_mq_alloc_map_and_rqs(set,
                                                BLK_MQ_NO_HCTX_IDX,
                                                set->queue_depth);
                if (!set->shared_tags)
                        return -ENOMEM;
        }

        for (i = 0; i < set->nr_hw_queues; i++) {
                if (!__blk_mq_alloc_map_and_rqs(set, i))
                        goto out_unwind;
                cond_resched();
        }

        return 0;

out_unwind:
        while (--i >= 0)
                __blk_mq_free_map_and_rqs(set, i);

        if (blk_mq_is_shared_tags(set->flags)) {
                blk_mq_free_map_and_rqs(set, set->shared_tags,
                                        BLK_MQ_NO_HCTX_IDX);
        }

        return -ENOMEM;
}

/*
 * Allocate the request maps associated with this tag_set. Note that this
 * may reduce the depth asked for, if memory is tight. set->queue_depth
 * will be updated to reflect the allocated depth.
 */
static int blk_mq_alloc_set_map_and_rqs(struct blk_mq_tag_set *set)
{
        unsigned int depth;
        int err;

        depth = set->queue_depth;
        do {
                err = __blk_mq_alloc_rq_maps(set);
                if (!err)
                        break;

                set->queue_depth >>= 1;
                if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
                        err = -ENOMEM;
                        break;
                }
        } while (set->queue_depth);

        if (!set->queue_depth || err) {
                pr_err("blk-mq: failed to allocate request map\n");
                return -ENOMEM;
        }

        if (depth != set->queue_depth)
                pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
                                                depth, set->queue_depth);

        return 0;
}

static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
{
        /*
         * blk_mq_map_queues() and multiple .map_queues() implementations
         * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
         * number of hardware queues.
         */
        if (set->nr_maps == 1)
                set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;

        if (set->ops->map_queues) {
                int i;

                /*
                 * transport .map_queues is usually done in the following
                 * way:
                 *
                 * for (queue = 0; queue < set->nr_hw_queues; queue++) {
                 *         mask = get_cpu_mask(queue)
                 *         for_each_cpu(cpu, mask)
                 *                 set->map[x].mq_map[cpu] = queue;
                 * }
                 *
                 * When we need to remap, the table has to be cleared for
                 * killing stale mapping since one CPU may not be mapped
                 * to any hw queue.
                 */
                for (i = 0; i < set->nr_maps; i++)
                        blk_mq_clear_mq_map(&set->map[i]);

                set->ops->map_queues(set);
        } else {
                BUG_ON(set->nr_maps > 1);
                blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
        }
}

static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
                                       int new_nr_hw_queues)
{
        struct blk_mq_tags **new_tags;
        int i;

        if (set->nr_hw_queues >= new_nr_hw_queues)
                goto done;

        new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
                                GFP_KERNEL, set->numa_node);
        if (!new_tags)
                return -ENOMEM;

        if (set->tags)
                memcpy(new_tags, set->tags, set->nr_hw_queues *
                       sizeof(*set->tags));
        kfree(set->tags);
        set->tags = new_tags;

        for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
                if (!__blk_mq_alloc_map_and_rqs(set, i)) {
                        while (--i >= set->nr_hw_queues)
                                __blk_mq_free_map_and_rqs(set, i);
                        return -ENOMEM;
                }
                cond_resched();
        }

done:
        set->nr_hw_queues = new_nr_hw_queues;
        return 0;
}

/*
 * Alloc a tag set to be associated with one or more request queues.
 * May fail with EINVAL for various error conditions. May adjust the
 * requested depth down, if it's too large. In that case, the set
 * value will be stored in set->queue_depth.
 */
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
        int i, ret;

        BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);

        if (!set->nr_hw_queues)
                return -EINVAL;
        if (!set->queue_depth)
                return -EINVAL;
        if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
                return -EINVAL;

        if (!set->ops->queue_rq)
                return -EINVAL;

        if (!set->ops->get_budget ^ !set->ops->put_budget)
                return -EINVAL;

        if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
                pr_info("blk-mq: reduced tag depth to %u\n",
                        BLK_MQ_MAX_DEPTH);
                set->queue_depth = BLK_MQ_MAX_DEPTH;
        }

        if (!set->nr_maps)
                set->nr_maps = 1;
        else if (set->nr_maps > HCTX_MAX_TYPES)
                return -EINVAL;

        /*
         * If a crashdump is active, then we are potentially in a very
         * memory constrained environment. Limit us to  64 tags to prevent
         * using too much memory.
         */
        if (is_kdump_kernel())
                set->queue_depth = min(64U, set->queue_depth);

        /*
         * There is no use for more h/w queues than cpus if we just have
         * a single map
         */
        if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
                set->nr_hw_queues = nr_cpu_ids;

        if (set->flags & BLK_MQ_F_BLOCKING) {
                set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);
                if (!set->srcu)
                        return -ENOMEM;
                ret = init_srcu_struct(set->srcu);
                if (ret)
                        goto out_free_srcu;
        }

        ret = -ENOMEM;
        set->tags = kcalloc_node(set->nr_hw_queues,
                                 sizeof(struct blk_mq_tags *), GFP_KERNEL,
                                 set->numa_node);
        if (!set->tags)
                goto out_cleanup_srcu;

        for (i = 0; i < set->nr_maps; i++) {
                set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
                                                  sizeof(set->map[i].mq_map[0]),
                                                  GFP_KERNEL, set->numa_node);
                if (!set->map[i].mq_map)
                        goto out_free_mq_map;
                set->map[i].nr_queues = set->nr_hw_queues;
        }

        blk_mq_update_queue_map(set);

        ret = blk_mq_alloc_set_map_and_rqs(set);
        if (ret)
                goto out_free_mq_map;

        mutex_init(&set->tag_list_lock);
        INIT_LIST_HEAD(&set->tag_list);

        return 0;

out_free_mq_map:
        for (i = 0; i < set->nr_maps; i++) {
                kfree(set->map[i].mq_map);
                set->map[i].mq_map = NULL;
        }
        kfree(set->tags);
        set->tags = NULL;
out_cleanup_srcu:
        if (set->flags & BLK_MQ_F_BLOCKING)
                cleanup_srcu_struct(set->srcu);
out_free_srcu:
        if (set->flags & BLK_MQ_F_BLOCKING)
                kfree(set->srcu);
        return ret;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);

/* allocate and initialize a tagset for a simple single-queue device */
int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
                const struct blk_mq_ops *ops, unsigned int queue_depth,
                unsigned int set_flags)
{
        memset(set, 0, sizeof(*set));
        set->ops = ops;
        set->nr_hw_queues = 1;
        set->nr_maps = 1;
        set->queue_depth = queue_depth;
        set->numa_node = NUMA_NO_NODE;
        set->flags = set_flags;
        return blk_mq_alloc_tag_set(set);
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set);

void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
        int i, j;

        for (i = 0; i < set->nr_hw_queues; i++)
                __blk_mq_free_map_and_rqs(set, i);

        if (blk_mq_is_shared_tags(set->flags)) {
                blk_mq_free_map_and_rqs(set, set->shared_tags,
                                        BLK_MQ_NO_HCTX_IDX);
        }

        for (j = 0; j < set->nr_maps; j++) {
                kfree(set->map[j].mq_map);
                set->map[j].mq_map = NULL;
        }

        kfree(set->tags);
        set->tags = NULL;
        if (set->flags & BLK_MQ_F_BLOCKING) {
                cleanup_srcu_struct(set->srcu);
                kfree(set->srcu);
        }
}
EXPORT_SYMBOL(blk_mq_free_tag_set);

int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
{
        struct blk_mq_tag_set *set = q->tag_set;
        struct blk_mq_hw_ctx *hctx;
        int ret;
        unsigned long i;

        if (!set)
                return -EINVAL;

        if (q->nr_requests == nr)
                return 0;

        blk_mq_freeze_queue(q);
        blk_mq_quiesce_queue(q);

        ret = 0;
        queue_for_each_hw_ctx(q, hctx, i) {
                if (!hctx->tags)
                        continue;
                /*
                 * If we're using an MQ scheduler, just update the scheduler
                 * queue depth. This is similar to what the old code would do.
                 */
                if (hctx->sched_tags) {
                        ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
                                                      nr, true);
                } else {
                        ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
                                                      false);
                }
                if (ret)
                        break;
                if (q->elevator && q->elevator->type->ops.depth_updated)
                        q->elevator->type->ops.depth_updated(hctx);
        }
        if (!ret) {
                q->nr_requests = nr;
                if (blk_mq_is_shared_tags(set->flags)) {
                        if (q->elevator)
                                blk_mq_tag_update_sched_shared_tags(q);
                        else
                                blk_mq_tag_resize_shared_tags(set, nr);
                }
        }

        blk_mq_unquiesce_queue(q);
        blk_mq_unfreeze_queue(q);

        return ret;
}

/*
 * request_queue and elevator_type pair.
 * It is just used by __blk_mq_update_nr_hw_queues to cache
 * the elevator_type associated with a request_queue.
 */
struct blk_mq_qe_pair {
        struct list_head node;
        struct request_queue *q;
        struct elevator_type *type;
};

/*
 * Cache the elevator_type in qe pair list and switch the
 * io scheduler to 'none'
 */
static bool blk_mq_elv_switch_none(struct list_head *head,
                struct request_queue *q)
{
        struct blk_mq_qe_pair *qe;

        qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
        if (!qe)
                return false;

        /* q->elevator needs protection from ->sysfs_lock */
        mutex_lock(&q->sysfs_lock);

        /* the check has to be done with holding sysfs_lock */
        if (!q->elevator) {
                kfree(qe);
                goto unlock;
        }

        INIT_LIST_HEAD(&qe->node);
        qe->q = q;
        qe->type = q->elevator->type;
        /* keep a reference to the elevator module as we'll switch back */
        __elevator_get(qe->type);
        list_add(&qe->node, head);
        elevator_disable(q);
unlock:
        mutex_unlock(&q->sysfs_lock);

        return true;
}

static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
                                                struct request_queue *q)
{
        struct blk_mq_qe_pair *qe;

        list_for_each_entry(qe, head, node)
                if (qe->q == q)
                        return qe;

        return NULL;
}

static void blk_mq_elv_switch_back(struct list_head *head,
                                  struct request_queue *q)
{
        struct blk_mq_qe_pair *qe;
        struct elevator_type *t;

        qe = blk_lookup_qe_pair(head, q);
        if (!qe)
                return;
        t = qe->type;
        list_del(&qe->node);
        kfree(qe);

        mutex_lock(&q->sysfs_lock);
        elevator_switch(q, t);
        /* drop the reference acquired in blk_mq_elv_switch_none */
        elevator_put(t);
        mutex_unlock(&q->sysfs_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
                                                        int nr_hw_queues)
{
        struct request_queue *q;
        LIST_HEAD(head);
        int prev_nr_hw_queues = set->nr_hw_queues;
        int i;

        lockdep_assert_held(&set->tag_list_lock);

        if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
                nr_hw_queues = nr_cpu_ids;
        if (nr_hw_queues < 1)
                return;
        if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
                return;

        list_for_each_entry(q, &set->tag_list, tag_set_list)
                blk_mq_freeze_queue(q);
        /*
         * Switch IO scheduler to 'none', cleaning up the data associated
         * with the previous scheduler. We will switch back once we are done
         * updating the new sw to hw queue mappings.
         */
        list_for_each_entry(q, &set->tag_list, tag_set_list)
                if (!blk_mq_elv_switch_none(&head, q))
                        goto switch_back;

        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                blk_mq_debugfs_unregister_hctxs(q);
                blk_mq_sysfs_unregister_hctxs(q);
        }

        if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
                goto reregister;

fallback:
        blk_mq_update_queue_map(set);
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                blk_mq_realloc_hw_ctxs(set, q);
                blk_mq_update_poll_flag(q);
                if (q->nr_hw_queues != set->nr_hw_queues) {
                        int i = prev_nr_hw_queues;

                        pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
                                        nr_hw_queues, prev_nr_hw_queues);
                        for (; i < set->nr_hw_queues; i++)
                                __blk_mq_free_map_and_rqs(set, i);

                        set->nr_hw_queues = prev_nr_hw_queues;
                        goto fallback;
                }
                blk_mq_map_swqueue(q);
        }

reregister:
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                blk_mq_sysfs_register_hctxs(q);
                blk_mq_debugfs_register_hctxs(q);
        }

switch_back:
        list_for_each_entry(q, &set->tag_list, tag_set_list)
                blk_mq_elv_switch_back(&head, q);

        list_for_each_entry(q, &set->tag_list, tag_set_list)
                blk_mq_unfreeze_queue(q);

        /* Free the excess tags when nr_hw_queues shrink. */
        for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
                __blk_mq_free_map_and_rqs(set, i);
}

void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
{
        mutex_lock(&set->tag_list_lock);
        __blk_mq_update_nr_hw_queues(set, nr_hw_queues);
        mutex_unlock(&set->tag_list_lock);
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);

static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
                         struct io_comp_batch *iob, unsigned int flags)
{
        long state = get_current_state();
        int ret;

        do {
                ret = q->mq_ops->poll(hctx, iob);
                if (ret > 0) {
                        __set_current_state(TASK_RUNNING);
                        return ret;
                }

                if (signal_pending_state(state, current))
                        __set_current_state(TASK_RUNNING);
                if (task_is_running(current))
                        return 1;

                if (ret < 0 || (flags & BLK_POLL_ONESHOT))
                        break;
                cpu_relax();
        } while (!need_resched());

        __set_current_state(TASK_RUNNING);
        return 0;
}

int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
                struct io_comp_batch *iob, unsigned int flags)
{
        struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);

        return blk_hctx_poll(q, hctx, iob, flags);
}

int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
                unsigned int poll_flags)
{
        struct request_queue *q = rq->q;
        int ret;

        if (!blk_rq_is_poll(rq))
                return 0;
        if (!percpu_ref_tryget(&q->q_usage_counter))
                return 0;

        ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);
        blk_queue_exit(q);

        return ret;
}
EXPORT_SYMBOL_GPL(blk_rq_poll);

unsigned int blk_mq_rq_cpu(struct request *rq)
{
        return rq->mq_ctx->cpu;
}
EXPORT_SYMBOL(blk_mq_rq_cpu);

void blk_mq_cancel_work_sync(struct request_queue *q)
{
        struct blk_mq_hw_ctx *hctx;
        unsigned long i;

        cancel_delayed_work_sync(&q->requeue_work);

        queue_for_each_hw_ctx(q, hctx, i)
                cancel_delayed_work_sync(&hctx->run_work);
}

static int __init blk_mq_init(void)
{
        int i;

        for_each_possible_cpu(i)
                init_llist_head(&per_cpu(blk_cpu_done, i));
        for_each_possible_cpu(i)
                INIT_CSD(&per_cpu(blk_cpu_csd, i),
                         __blk_mq_complete_request_remote, NULL);
        open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);

        cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
                                  "block/softirq:dead", NULL,
                                  blk_softirq_cpu_dead);
        cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
                                blk_mq_hctx_notify_dead);
        cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
                                blk_mq_hctx_notify_online,
                                blk_mq_hctx_notify_offline);
        return 0;
}
subsys_initcall(blk_mq_init);








































































    9 











































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 1994 Linus Torvalds
 *
 * Pentium III FXSR, SSE support
 * General FPU state handling cleanups
 *        Gareth Hughes <gareth@valinux.com>, May 2000
 * x86-64 work by Andi Kleen 2002
 */

#ifndef _ASM_X86_FPU_API_H
#define _ASM_X86_FPU_API_H
#include <linux/bottom_half.h>

#include <asm/fpu/types.h>

/*
 * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
 * disables preemption so be careful if you intend to use it for long periods
 * of time.
 * If you intend to use the FPU in irq/softirq you need to check first with
 * irq_fpu_usable() if it is possible.
 */

/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */
#define KFPU_387        _BITUL(0)        /* 387 state will be initialized */
#define KFPU_MXCSR        _BITUL(1)        /* MXCSR will be initialized */

extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
extern void fpregs_mark_activate(void);

/* Code that is unaware of kernel_fpu_begin_mask() can use this */
static inline void kernel_fpu_begin(void)
{
#ifdef CONFIG_X86_64
        /*
         * Any 64-bit code that uses 387 instructions must explicitly request
         * KFPU_387.
         */
        kernel_fpu_begin_mask(KFPU_MXCSR);
#else
        /*
         * 32-bit kernel code may use 387 operations as well as SSE2, etc,
         * as long as it checks that the CPU has the required capability.
         */
        kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
#endif
}

/*
 * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate.
 * A context switch will (and softirq might) save CPU's FPU registers to
 * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
 * a random state.
 *
 * local_bh_disable() protects against both preemption and soft interrupts
 * on !RT kernels.
 *
 * On RT kernels local_bh_disable() is not sufficient because it only
 * serializes soft interrupt related sections via a local lock, but stays
 * preemptible. Disabling preemption is the right choice here as bottom
 * half processing is always in thread context on RT kernels so it
 * implicitly prevents bottom half processing as well.
 *
 * Disabling preemption also serializes against kernel_fpu_begin().
 */
static inline void fpregs_lock(void)
{
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_bh_disable();
        else
                preempt_disable();
}

static inline void fpregs_unlock(void)
{
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                local_bh_enable();
        else
                preempt_enable();
}

/*
 * FPU state gets lazily restored before returning to userspace. So when in the
 * kernel, the valid FPU state may be kept in the buffer. This function will force
 * restore all the fpu state to the registers early if needed, and lock them from
 * being automatically saved/restored. Then FPU state can be modified safely in the
 * registers, before unlocking with fpregs_unlock().
 */
void fpregs_lock_and_load(void);

#ifdef CONFIG_X86_DEBUG_FPU
extern void fpregs_assert_state_consistent(void);
#else
static inline void fpregs_assert_state_consistent(void) { }
#endif

/*
 * Load the task FPU state before returning to userspace.
 */
extern void switch_fpu_return(void);

/*
 * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
 *
 * If 'feature_name' is set then put a human-readable description of
 * the feature there as well - this can be used to print error (or success)
 * messages.
 */
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);

/* Trap handling */
extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
extern void fpu_sync_fpstate(struct fpu *fpu);
extern void fpu_reset_from_exception_fixup(void);

/* Boot, hotplug and resume */
extern void fpu__init_cpu(void);
extern void fpu__init_system(void);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);

#ifdef CONFIG_MATH_EMULATION
extern void fpstate_init_soft(struct swregs_state *soft);
#else
static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif

/* State tracking */
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);

/* Process cleanup */
#ifdef CONFIG_X86_64
extern void fpstate_free(struct fpu *fpu);
#else
static inline void fpstate_free(struct fpu *fpu) { }
#endif

/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);

extern u64 xstate_get_guest_group_perm(void);

/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);

#ifdef CONFIG_X86_64
extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
extern void fpu_sync_guest_vmexit_xfd_state(void);
#else
static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif

extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
                                           unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);

static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
{
        gfpu->fpstate->is_confidential = true;
}

static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
{
        return gfpu->fpstate->is_confidential;
}

/* prctl */
extern long fpu_xstate_prctl(int option, unsigned long arg2);

extern void fpu_idle_fpregs(void);

#endif /* _ASM_X86_FPU_API_H */














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   13 
   13 

   13 








   14 





   14 













   13 
   13 



   14 


















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
// SPDX-License-Identifier: GPL-2.0-only
/*
 * net/core/fib_rules.c                Generic Routing Rules
 *
 * Authors:        Thomas Graf <tgraf@suug.ch>
 */

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/module.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
#include <linux/indirect_call_wrapper.h>

#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES)
#ifdef CONFIG_IP_MULTIPLE_TABLES
#define INDIRECT_CALL_MT(f, f2, f1, ...) \
        INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__)
#else
#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
#endif
#elif defined(CONFIG_IP_MULTIPLE_TABLES)
#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
#else
#define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__)
#endif

static const struct fib_kuid_range fib_kuid_range_unset = {
        KUIDT_INIT(0),
        KUIDT_INIT(~0),
};

bool fib_rule_matchall(const struct fib_rule *rule)
{
        if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
            rule->flags)
                return false;
        if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
                return false;
        if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
            !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
                return false;
        if (fib_rule_port_range_set(&rule->sport_range))
                return false;
        if (fib_rule_port_range_set(&rule->dport_range))
                return false;
        return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);

int fib_default_rule_add(struct fib_rules_ops *ops,
                         u32 pref, u32 table)
{
        struct fib_rule *r;

        r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (r == NULL)
                return -ENOMEM;

        refcount_set(&r->refcnt, 1);
        r->action = FR_ACT_TO_TBL;
        r->pref = pref;
        r->table = table;
        r->proto = RTPROT_KERNEL;
        r->fr_net = ops->fro_net;
        r->uid_range = fib_kuid_range_unset;

        r->suppress_prefixlen = -1;
        r->suppress_ifgroup = -1;

        /* The lock is not required here, the list in unreacheable
         * at the moment this function is called */
        list_add_tail(&r->list, &ops->rules_list);
        return 0;
}
EXPORT_SYMBOL(fib_default_rule_add);

static u32 fib_default_rule_pref(struct fib_rules_ops *ops)
{
        struct list_head *pos;
        struct fib_rule *rule;

        if (!list_empty(&ops->rules_list)) {
                pos = ops->rules_list.next;
                if (pos->next != &ops->rules_list) {
                        rule = list_entry(pos->next, struct fib_rule, list);
                        if (rule->pref)
                                return rule->pref - 1;
                }
        }

        return 0;
}

static void notify_rule_change(int event, struct fib_rule *rule,
                               struct fib_rules_ops *ops, struct nlmsghdr *nlh,
                               u32 pid);

static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
{
        struct fib_rules_ops *ops;

        rcu_read_lock();
        list_for_each_entry_rcu(ops, &net->rules_ops, list) {
                if (ops->family == family) {
                        if (!try_module_get(ops->owner))
                                ops = NULL;
                        rcu_read_unlock();
                        return ops;
                }
        }
        rcu_read_unlock();

        return NULL;
}

static void rules_ops_put(struct fib_rules_ops *ops)
{
        if (ops)
                module_put(ops->owner);
}

static void flush_route_cache(struct fib_rules_ops *ops)
{
        if (ops->flush_cache)
                ops->flush_cache(ops);
}

static int __fib_rules_register(struct fib_rules_ops *ops)
{
        int err = -EEXIST;
        struct fib_rules_ops *o;
        struct net *net;

        net = ops->fro_net;

        if (ops->rule_size < sizeof(struct fib_rule))
                return -EINVAL;

        if (ops->match == NULL || ops->configure == NULL ||
            ops->compare == NULL || ops->fill == NULL ||
            ops->action == NULL)
                return -EINVAL;

        spin_lock(&net->rules_mod_lock);
        list_for_each_entry(o, &net->rules_ops, list)
                if (ops->family == o->family)
                        goto errout;

        list_add_tail_rcu(&ops->list, &net->rules_ops);
        err = 0;
errout:
        spin_unlock(&net->rules_mod_lock);

        return err;
}

struct fib_rules_ops *
fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
{
        struct fib_rules_ops *ops;
        int err;

        ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
        if (ops == NULL)
                return ERR_PTR(-ENOMEM);

        INIT_LIST_HEAD(&ops->rules_list);
        ops->fro_net = net;

        err = __fib_rules_register(ops);
        if (err) {
                kfree(ops);
                ops = ERR_PTR(err);
        }

        return ops;
}
EXPORT_SYMBOL_GPL(fib_rules_register);

static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
{
        struct fib_rule *rule, *tmp;

        list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
                list_del_rcu(&rule->list);
                if (ops->delete)
                        ops->delete(rule);
                fib_rule_put(rule);
        }
}

void fib_rules_unregister(struct fib_rules_ops *ops)
{
        struct net *net = ops->fro_net;

        spin_lock(&net->rules_mod_lock);
        list_del_rcu(&ops->list);
        spin_unlock(&net->rules_mod_lock);

        fib_rules_cleanup_ops(ops);
        kfree_rcu(ops, rcu);
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);

static int uid_range_set(struct fib_kuid_range *range)
{
        return uid_valid(range->start) && uid_valid(range->end);
}

static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
{
        struct fib_rule_uid_range *in;
        struct fib_kuid_range out;

        in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);

        out.start = make_kuid(current_user_ns(), in->start);
        out.end = make_kuid(current_user_ns(), in->end);

        return out;
}

static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
{
        struct fib_rule_uid_range out = {
                from_kuid_munged(current_user_ns(), range->start),
                from_kuid_munged(current_user_ns(), range->end)
        };

        return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}

static int nla_get_port_range(struct nlattr *pattr,
                              struct fib_rule_port_range *port_range)
{
        const struct fib_rule_port_range *pr = nla_data(pattr);

        if (!fib_rule_port_range_valid(pr))
                return -EINVAL;

        port_range->start = pr->start;
        port_range->end = pr->end;

        return 0;
}

static int nla_put_port_range(struct sk_buff *skb, int attrtype,
                              struct fib_rule_port_range *range)
{
        return nla_put(skb, attrtype, sizeof(*range), range);
}

static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
                          struct flowi *fl, int flags,
                          struct fib_lookup_arg *arg)
{
        int ret = 0;

        if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
                goto out;

        if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
                goto out;

        if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
                goto out;

        if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
                goto out;

        if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
                goto out;

        if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
            uid_gt(fl->flowi_uid, rule->uid_range.end))
                goto out;

        ret = INDIRECT_CALL_MT(ops->match,
                               fib6_rule_match,
                               fib4_rule_match,
                               rule, fl, flags);
out:
        return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}

int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
                     int flags, struct fib_lookup_arg *arg)
{
        struct fib_rule *rule;
        int err;

        rcu_read_lock();

        list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
                if (!fib_rule_match(rule, ops, fl, flags, arg))
                        continue;

                if (rule->action == FR_ACT_GOTO) {
                        struct fib_rule *target;

                        target = rcu_dereference(rule->ctarget);
                        if (target == NULL) {
                                continue;
                        } else {
                                rule = target;
                                goto jumped;
                        }
                } else if (rule->action == FR_ACT_NOP)
                        continue;
                else
                        err = INDIRECT_CALL_MT(ops->action,
                                               fib6_rule_action,
                                               fib4_rule_action,
                                               rule, fl, flags, arg);

                if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress,
                                                              fib6_rule_suppress,
                                                              fib4_rule_suppress,
                                                              rule, flags, arg))
                        continue;

                if (err != -EAGAIN) {
                        if ((arg->flags & FIB_LOOKUP_NOREF) ||
                            likely(refcount_inc_not_zero(&rule->refcnt))) {
                                arg->rule = rule;
                                goto out;
                        }
                        break;
                }
        }

        err = -ESRCH;
out:
        rcu_read_unlock();

        return err;
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);

static int call_fib_rule_notifier(struct notifier_block *nb,
                                  enum fib_event_type event_type,
                                  struct fib_rule *rule, int family,
                                  struct netlink_ext_ack *extack)
{
        struct fib_rule_notifier_info info = {
                .info.family = family,
                .info.extack = extack,
                .rule = rule,
        };

        return call_fib_notifier(nb, event_type, &info.info);
}

static int call_fib_rule_notifiers(struct net *net,
                                   enum fib_event_type event_type,
                                   struct fib_rule *rule,
                                   struct fib_rules_ops *ops,
                                   struct netlink_ext_ack *extack)
{
        struct fib_rule_notifier_info info = {
                .info.family = ops->family,
                .info.extack = extack,
                .rule = rule,
        };

        ops->fib_rules_seq++;
        return call_fib_notifiers(net, event_type, &info.info);
}

/* Called with rcu_read_lock() */
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
                   struct netlink_ext_ack *extack)
{
        struct fib_rules_ops *ops;
        struct fib_rule *rule;
        int err = 0;

        ops = lookup_rules_ops(net, family);
        if (!ops)
                return -EAFNOSUPPORT;
        list_for_each_entry_rcu(rule, &ops->rules_list, list) {
                err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
                                             rule, family, extack);
                if (err)
                        break;
        }
        rules_ops_put(ops);

        return err;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);

unsigned int fib_rules_seq_read(struct net *net, int family)
{
        unsigned int fib_rules_seq;
        struct fib_rules_ops *ops;

        ASSERT_RTNL();

        ops = lookup_rules_ops(net, family);
        if (!ops)
                return 0;
        fib_rules_seq = ops->fib_rules_seq;
        rules_ops_put(ops);

        return fib_rules_seq;
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);

static struct fib_rule *rule_find(struct fib_rules_ops *ops,
                                  struct fib_rule_hdr *frh,
                                  struct nlattr **tb,
                                  struct fib_rule *rule,
                                  bool user_priority)
{
        struct fib_rule *r;

        list_for_each_entry(r, &ops->rules_list, list) {
                if (rule->action && r->action != rule->action)
                        continue;

                if (rule->table && r->table != rule->table)
                        continue;

                if (user_priority && r->pref != rule->pref)
                        continue;

                if (rule->iifname[0] &&
                    memcmp(r->iifname, rule->iifname, IFNAMSIZ))
                        continue;

                if (rule->oifname[0] &&
                    memcmp(r->oifname, rule->oifname, IFNAMSIZ))
                        continue;

                if (rule->mark && r->mark != rule->mark)
                        continue;

                if (rule->suppress_ifgroup != -1 &&
                    r->suppress_ifgroup != rule->suppress_ifgroup)
                        continue;

                if (rule->suppress_prefixlen != -1 &&
                    r->suppress_prefixlen != rule->suppress_prefixlen)
                        continue;

                if (rule->mark_mask && r->mark_mask != rule->mark_mask)
                        continue;

                if (rule->tun_id && r->tun_id != rule->tun_id)
                        continue;

                if (r->fr_net != rule->fr_net)
                        continue;

                if (rule->l3mdev && r->l3mdev != rule->l3mdev)
                        continue;

                if (uid_range_set(&rule->uid_range) &&
                    (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
                    !uid_eq(r->uid_range.end, rule->uid_range.end)))
                        continue;

                if (rule->ip_proto && r->ip_proto != rule->ip_proto)
                        continue;

                if (rule->proto && r->proto != rule->proto)
                        continue;

                if (fib_rule_port_range_set(&rule->sport_range) &&
                    !fib_rule_port_range_compare(&r->sport_range,
                                                 &rule->sport_range))
                        continue;

                if (fib_rule_port_range_set(&rule->dport_range) &&
                    !fib_rule_port_range_compare(&r->dport_range,
                                                 &rule->dport_range))
                        continue;

                if (!ops->compare(r, frh, tb))
                        continue;
                return r;
        }

        return NULL;
}

#ifdef CONFIG_NET_L3_MASTER_DEV
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
                              struct netlink_ext_ack *extack)
{
        nlrule->l3mdev = nla_get_u8(nla);
        if (nlrule->l3mdev != 1) {
                NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
                return -1;
        }

        return 0;
}
#else
static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
                              struct netlink_ext_ack *extack)
{
        NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
        return -1;
}
#endif

static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
                       struct netlink_ext_ack *extack,
                       struct fib_rules_ops *ops,
                       struct nlattr *tb[],
                       struct fib_rule **rule,
                       bool *user_priority)
{
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
        struct fib_rule *nlrule = NULL;
        int err = -EINVAL;

        if (frh->src_len)
                if (!tb[FRA_SRC] ||
                    frh->src_len > (ops->addr_size * 8) ||
                    nla_len(tb[FRA_SRC]) != ops->addr_size) {
                        NL_SET_ERR_MSG(extack, "Invalid source address");
                        goto errout;
        }

        if (frh->dst_len)
                if (!tb[FRA_DST] ||
                    frh->dst_len > (ops->addr_size * 8) ||
                    nla_len(tb[FRA_DST]) != ops->addr_size) {
                        NL_SET_ERR_MSG(extack, "Invalid dst address");
                        goto errout;
        }

        nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (!nlrule) {
                err = -ENOMEM;
                goto errout;
        }
        refcount_set(&nlrule->refcnt, 1);
        nlrule->fr_net = net;

        if (tb[FRA_PRIORITY]) {
                nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
                *user_priority = true;
        } else {
                nlrule->pref = fib_default_rule_pref(ops);
        }

        nlrule->proto = tb[FRA_PROTOCOL] ?
                nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;

        if (tb[FRA_IIFNAME]) {
                struct net_device *dev;

                nlrule->iifindex = -1;
                nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
                dev = __dev_get_by_name(net, nlrule->iifname);
                if (dev)
                        nlrule->iifindex = dev->ifindex;
        }

        if (tb[FRA_OIFNAME]) {
                struct net_device *dev;

                nlrule->oifindex = -1;
                nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
                dev = __dev_get_by_name(net, nlrule->oifname);
                if (dev)
                        nlrule->oifindex = dev->ifindex;
        }

        if (tb[FRA_FWMARK]) {
                nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
                if (nlrule->mark)
                        /* compatibility: if the mark value is non-zero all bits
                         * are compared unless a mask is explicitly specified.
                         */
                        nlrule->mark_mask = 0xFFFFFFFF;
        }

        if (tb[FRA_FWMASK])
                nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);

        if (tb[FRA_TUN_ID])
                nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);

        if (tb[FRA_L3MDEV] &&
            fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
                goto errout_free;

        nlrule->action = frh->action;
        nlrule->flags = frh->flags;
        nlrule->table = frh_get_table(frh, tb);
        if (tb[FRA_SUPPRESS_PREFIXLEN])
                nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
        else
                nlrule->suppress_prefixlen = -1;

        if (tb[FRA_SUPPRESS_IFGROUP])
                nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
        else
                nlrule->suppress_ifgroup = -1;

        if (tb[FRA_GOTO]) {
                if (nlrule->action != FR_ACT_GOTO) {
                        NL_SET_ERR_MSG(extack, "Unexpected goto");
                        goto errout_free;
                }

                nlrule->target = nla_get_u32(tb[FRA_GOTO]);
                /* Backward jumps are prohibited to avoid endless loops */
                if (nlrule->target <= nlrule->pref) {
                        NL_SET_ERR_MSG(extack, "Backward goto not supported");
                        goto errout_free;
                }
        } else if (nlrule->action == FR_ACT_GOTO) {
                NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
                goto errout_free;
        }

        if (nlrule->l3mdev && nlrule->table) {
                NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
                goto errout_free;
        }

        if (tb[FRA_UID_RANGE]) {
                if (current_user_ns() != net->user_ns) {
                        err = -EPERM;
                        NL_SET_ERR_MSG(extack, "No permission to set uid");
                        goto errout_free;
                }

                nlrule->uid_range = nla_get_kuid_range(tb);

                if (!uid_range_set(&nlrule->uid_range) ||
                    !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
                        NL_SET_ERR_MSG(extack, "Invalid uid range");
                        goto errout_free;
                }
        } else {
                nlrule->uid_range = fib_kuid_range_unset;
        }

        if (tb[FRA_IP_PROTO])
                nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);

        if (tb[FRA_SPORT_RANGE]) {
                err = nla_get_port_range(tb[FRA_SPORT_RANGE],
                                         &nlrule->sport_range);
                if (err) {
                        NL_SET_ERR_MSG(extack, "Invalid sport range");
                        goto errout_free;
                }
        }

        if (tb[FRA_DPORT_RANGE]) {
                err = nla_get_port_range(tb[FRA_DPORT_RANGE],
                                         &nlrule->dport_range);
                if (err) {
                        NL_SET_ERR_MSG(extack, "Invalid dport range");
                        goto errout_free;
                }
        }

        *rule = nlrule;

        return 0;

errout_free:
        kfree(nlrule);
errout:
        return err;
}

static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
                       struct nlattr **tb, struct fib_rule *rule)
{
        struct fib_rule *r;

        list_for_each_entry(r, &ops->rules_list, list) {
                if (r->action != rule->action)
                        continue;

                if (r->table != rule->table)
                        continue;

                if (r->pref != rule->pref)
                        continue;

                if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
                        continue;

                if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
                        continue;

                if (r->mark != rule->mark)
                        continue;

                if (r->suppress_ifgroup != rule->suppress_ifgroup)
                        continue;

                if (r->suppress_prefixlen != rule->suppress_prefixlen)
                        continue;

                if (r->mark_mask != rule->mark_mask)
                        continue;

                if (r->tun_id != rule->tun_id)
                        continue;

                if (r->fr_net != rule->fr_net)
                        continue;

                if (r->l3mdev != rule->l3mdev)
                        continue;

                if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
                    !uid_eq(r->uid_range.end, rule->uid_range.end))
                        continue;

                if (r->ip_proto != rule->ip_proto)
                        continue;

                if (r->proto != rule->proto)
                        continue;

                if (!fib_rule_port_range_compare(&r->sport_range,
                                                 &rule->sport_range))
                        continue;

                if (!fib_rule_port_range_compare(&r->dport_range,
                                                 &rule->dport_range))
                        continue;

                if (!ops->compare(r, frh, tb))
                        continue;
                return 1;
        }
        return 0;
}

static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
        [FRA_UNSPEC]        = { .strict_start_type = FRA_DPORT_RANGE + 1 },
        [FRA_IIFNAME]        = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
        [FRA_OIFNAME]        = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
        [FRA_PRIORITY]        = { .type = NLA_U32 },
        [FRA_FWMARK]        = { .type = NLA_U32 },
        [FRA_FLOW]        = { .type = NLA_U32 },
        [FRA_TUN_ID]        = { .type = NLA_U64 },
        [FRA_FWMASK]        = { .type = NLA_U32 },
        [FRA_TABLE]     = { .type = NLA_U32 },
        [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 },
        [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 },
        [FRA_GOTO]        = { .type = NLA_U32 },
        [FRA_L3MDEV]        = { .type = NLA_U8 },
        [FRA_UID_RANGE]        = { .len = sizeof(struct fib_rule_uid_range) },
        [FRA_PROTOCOL]  = { .type = NLA_U8 },
        [FRA_IP_PROTO]  = { .type = NLA_U8 },
        [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
        [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
};

int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                   struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule = NULL, *r, *last = NULL;
        struct nlattr *tb[FRA_MAX + 1];
        int err = -EINVAL, unresolved = 0;
        bool user_priority = false;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
                NL_SET_ERR_MSG(extack, "Invalid msg length");
                goto errout;
        }

        ops = lookup_rules_ops(net, frh->family);
        if (!ops) {
                err = -EAFNOSUPPORT;
                NL_SET_ERR_MSG(extack, "Rule family not supported");
                goto errout;
        }

        err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
                                     fib_rule_policy, extack);
        if (err < 0) {
                NL_SET_ERR_MSG(extack, "Error parsing msg");
                goto errout;
        }

        err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
        if (err)
                goto errout;

        if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
            rule_exists(ops, frh, tb, rule)) {
                err = -EEXIST;
                goto errout_free;
        }

        err = ops->configure(rule, skb, frh, tb, extack);
        if (err < 0)
                goto errout_free;

        err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
                                      extack);
        if (err < 0)
                goto errout_free;

        list_for_each_entry(r, &ops->rules_list, list) {
                if (r->pref == rule->target) {
                        RCU_INIT_POINTER(rule->ctarget, r);
                        break;
                }
        }

        if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
                unresolved = 1;

        list_for_each_entry(r, &ops->rules_list, list) {
                if (r->pref > rule->pref)
                        break;
                last = r;
        }

        if (last)
                list_add_rcu(&rule->list, &last->list);
        else
                list_add_rcu(&rule->list, &ops->rules_list);

        if (ops->unresolved_rules) {
                /*
                 * There are unresolved goto rules in the list, check if
                 * any of them are pointing to this new rule.
                 */
                list_for_each_entry(r, &ops->rules_list, list) {
                        if (r->action == FR_ACT_GOTO &&
                            r->target == rule->pref &&
                            rtnl_dereference(r->ctarget) == NULL) {
                                rcu_assign_pointer(r->ctarget, rule);
                                if (--ops->unresolved_rules == 0)
                                        break;
                        }
                }
        }

        if (rule->action == FR_ACT_GOTO)
                ops->nr_goto_rules++;

        if (unresolved)
                ops->unresolved_rules++;

        if (rule->tun_id)
                ip_tunnel_need_metadata();

        notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
        flush_route_cache(ops);
        rules_ops_put(ops);
        return 0;

errout_free:
        kfree(rule);
errout:
        rules_ops_put(ops);
        return err;
}
EXPORT_SYMBOL_GPL(fib_nl_newrule);

int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                   struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule = NULL, *r, *nlrule = NULL;
        struct nlattr *tb[FRA_MAX+1];
        int err = -EINVAL;
        bool user_priority = false;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
                NL_SET_ERR_MSG(extack, "Invalid msg length");
                goto errout;
        }

        ops = lookup_rules_ops(net, frh->family);
        if (ops == NULL) {
                err = -EAFNOSUPPORT;
                NL_SET_ERR_MSG(extack, "Rule family not supported");
                goto errout;
        }

        err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX,
                                     fib_rule_policy, extack);
        if (err < 0) {
                NL_SET_ERR_MSG(extack, "Error parsing msg");
                goto errout;
        }

        err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
        if (err)
                goto errout;

        rule = rule_find(ops, frh, tb, nlrule, user_priority);
        if (!rule) {
                err = -ENOENT;
                goto errout;
        }

        if (rule->flags & FIB_RULE_PERMANENT) {
                err = -EPERM;
                goto errout;
        }

        if (ops->delete) {
                err = ops->delete(rule);
                if (err)
                        goto errout;
        }

        if (rule->tun_id)
                ip_tunnel_unneed_metadata();

        list_del_rcu(&rule->list);

        if (rule->action == FR_ACT_GOTO) {
                ops->nr_goto_rules--;
                if (rtnl_dereference(rule->ctarget) == NULL)
                        ops->unresolved_rules--;
        }

        /*
         * Check if this rule is a target to any of them. If so,
         * adjust to the next one with the same preference or
         * disable them. As this operation is eventually very
         * expensive, it is only performed if goto rules, except
         * current if it is goto rule, have actually been added.
         */
        if (ops->nr_goto_rules > 0) {
                struct fib_rule *n;

                n = list_next_entry(rule, list);
                if (&n->list == &ops->rules_list || n->pref != rule->pref)
                        n = NULL;
                list_for_each_entry(r, &ops->rules_list, list) {
                        if (rtnl_dereference(r->ctarget) != rule)
                                continue;
                        rcu_assign_pointer(r->ctarget, n);
                        if (!n)
                                ops->unresolved_rules++;
                }
        }

        call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
                                NULL);
        notify_rule_change(RTM_DELRULE, rule, ops, nlh,
                           NETLINK_CB(skb).portid);
        fib_rule_put(rule);
        flush_route_cache(ops);
        rules_ops_put(ops);
        kfree(nlrule);
        return 0;

errout:
        kfree(nlrule);
        rules_ops_put(ops);
        return err;
}
EXPORT_SYMBOL_GPL(fib_nl_delrule);

static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
                                         struct fib_rule *rule)
{
        size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
                         + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
                         + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
                         + nla_total_size(4) /* FRA_PRIORITY */
                         + nla_total_size(4) /* FRA_TABLE */
                         + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
                         + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
                         + nla_total_size(4) /* FRA_FWMARK */
                         + nla_total_size(4) /* FRA_FWMASK */
                         + nla_total_size_64bit(8) /* FRA_TUN_ID */
                         + nla_total_size(sizeof(struct fib_kuid_range))
                         + nla_total_size(1) /* FRA_PROTOCOL */
                         + nla_total_size(1) /* FRA_IP_PROTO */
                         + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
                         + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */

        if (ops->nlmsg_payload)
                payload += ops->nlmsg_payload(rule);

        return payload;
}

static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
                            u32 pid, u32 seq, int type, int flags,
                            struct fib_rules_ops *ops)
{
        struct nlmsghdr *nlh;
        struct fib_rule_hdr *frh;

        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
        if (nlh == NULL)
                return -EMSGSIZE;

        frh = nlmsg_data(nlh);
        frh->family = ops->family;
        frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
        if (nla_put_u32(skb, FRA_TABLE, rule->table))
                goto nla_put_failure;
        if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
                goto nla_put_failure;
        frh->res1 = 0;
        frh->res2 = 0;
        frh->action = rule->action;
        frh->flags = rule->flags;

        if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
                goto nla_put_failure;

        if (rule->action == FR_ACT_GOTO &&
            rcu_access_pointer(rule->ctarget) == NULL)
                frh->flags |= FIB_RULE_UNRESOLVED;

        if (rule->iifname[0]) {
                if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
                        goto nla_put_failure;
                if (rule->iifindex == -1)
                        frh->flags |= FIB_RULE_IIF_DETACHED;
        }

        if (rule->oifname[0]) {
                if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
                        goto nla_put_failure;
                if (rule->oifindex == -1)
                        frh->flags |= FIB_RULE_OIF_DETACHED;
        }

        if ((rule->pref &&
             nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
            (rule->mark &&
             nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
            ((rule->mark_mask || rule->mark) &&
             nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
            (rule->target &&
             nla_put_u32(skb, FRA_GOTO, rule->target)) ||
            (rule->tun_id &&
             nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
            (rule->l3mdev &&
             nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
            (uid_range_set(&rule->uid_range) &&
             nla_put_uid_range(skb, &rule->uid_range)) ||
            (fib_rule_port_range_set(&rule->sport_range) &&
             nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
            (fib_rule_port_range_set(&rule->dport_range) &&
             nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
            (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
                goto nla_put_failure;

        if (rule->suppress_ifgroup != -1) {
                if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
                        goto nla_put_failure;
        }

        if (ops->fill(rule, skb, frh) < 0)
                goto nla_put_failure;

        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
                      struct fib_rules_ops *ops)
{
        int idx = 0;
        struct fib_rule *rule;
        int err = 0;

        rcu_read_lock();
        list_for_each_entry_rcu(rule, &ops->rules_list, list) {
                if (idx < cb->args[1])
                        goto skip;

                err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
                                       cb->nlh->nlmsg_seq, RTM_NEWRULE,
                                       NLM_F_MULTI, ops);
                if (err)
                        break;
skip:
                idx++;
        }
        rcu_read_unlock();
        cb->args[1] = idx;
        rules_ops_put(ops);

        return err;
}

static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
                                   struct netlink_ext_ack *extack)
{
        struct fib_rule_hdr *frh;

        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
                NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
                return -EINVAL;
        }

        frh = nlmsg_data(nlh);
        if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
            frh->res1 || frh->res2 || frh->action || frh->flags) {
                NL_SET_ERR_MSG(extack,
                               "Invalid values in header for fib rule dump request");
                return -EINVAL;
        }

        if (nlmsg_attrlen(nlh, sizeof(*frh))) {
                NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
                return -EINVAL;
        }

        return 0;
}

static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
        const struct nlmsghdr *nlh = cb->nlh;
        struct net *net = sock_net(skb->sk);
        struct fib_rules_ops *ops;
        int idx = 0, family;

        if (cb->strict_check) {
                int err = fib_valid_dumprule_req(nlh, cb->extack);

                if (err < 0)
                        return err;
        }

        family = rtnl_msg_family(nlh);
        if (family != AF_UNSPEC) {
                /* Protocol specific dump request */
                ops = lookup_rules_ops(net, family);
                if (ops == NULL)
                        return -EAFNOSUPPORT;

                dump_rules(skb, cb, ops);

                return skb->len;
        }

        rcu_read_lock();
        list_for_each_entry_rcu(ops, &net->rules_ops, list) {
                if (idx < cb->args[0] || !try_module_get(ops->owner))
                        goto skip;

                if (dump_rules(skb, cb, ops) < 0)
                        break;

                cb->args[1] = 0;
skip:
                idx++;
        }
        rcu_read_unlock();
        cb->args[0] = idx;

        return skb->len;
}

static void notify_rule_change(int event, struct fib_rule *rule,
                               struct fib_rules_ops *ops, struct nlmsghdr *nlh,
                               u32 pid)
{
        struct net *net;
        struct sk_buff *skb;
        int err = -ENOMEM;

        net = ops->fro_net;
        skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
        if (skb == NULL)
                goto errout;

        err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(net, ops->nlgroup, err);
}

static void attach_rules(struct list_head *rules, struct net_device *dev)
{
        struct fib_rule *rule;

        list_for_each_entry(rule, rules, list) {
                if (rule->iifindex == -1 &&
                    strcmp(dev->name, rule->iifname) == 0)
                        rule->iifindex = dev->ifindex;
                if (rule->oifindex == -1 &&
                    strcmp(dev->name, rule->oifname) == 0)
                        rule->oifindex = dev->ifindex;
        }
}

static void detach_rules(struct list_head *rules, struct net_device *dev)
{
        struct fib_rule *rule;

        list_for_each_entry(rule, rules, list) {
                if (rule->iifindex == dev->ifindex)
                        rule->iifindex = -1;
                if (rule->oifindex == dev->ifindex)
                        rule->oifindex = -1;
        }
}


static int fib_rules_event(struct notifier_block *this, unsigned long event,
                           void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
        struct fib_rules_ops *ops;

        ASSERT_RTNL();

        switch (event) {
        case NETDEV_REGISTER:
                list_for_each_entry(ops, &net->rules_ops, list)
                        attach_rules(&ops->rules_list, dev);
                break;

        case NETDEV_CHANGENAME:
                list_for_each_entry(ops, &net->rules_ops, list) {
                        detach_rules(&ops->rules_list, dev);
                        attach_rules(&ops->rules_list, dev);
                }
                break;

        case NETDEV_UNREGISTER:
                list_for_each_entry(ops, &net->rules_ops, list)
                        detach_rules(&ops->rules_list, dev);
                break;
        }

        return NOTIFY_DONE;
}

static struct notifier_block fib_rules_notifier = {
        .notifier_call = fib_rules_event,
};

static int __net_init fib_rules_net_init(struct net *net)
{
        INIT_LIST_HEAD(&net->rules_ops);
        spin_lock_init(&net->rules_mod_lock);
        return 0;
}

static void __net_exit fib_rules_net_exit(struct net *net)
{
        WARN_ON_ONCE(!list_empty(&net->rules_ops));
}

static struct pernet_operations fib_rules_net_ops = {
        .init = fib_rules_net_init,
        .exit = fib_rules_net_exit,
};

static int __init fib_rules_init(void)
{
        int err;
        rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);

        err = register_pernet_subsys(&fib_rules_net_ops);
        if (err < 0)
                goto fail;

        err = register_netdevice_notifier(&fib_rules_notifier);
        if (err < 0)
                goto fail_unregister;

        return 0;

fail_unregister:
        unregister_pernet_subsys(&fib_rules_net_ops);
fail:
        rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
        rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
        rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
        return err;
}

subsys_initcall(fib_rules_init);














































































































































































































































































































































































































































































































































































































































































































































































































  234 

  234 
  234 
  234 
  234 




















































































































































































  237 



  237 
  237 
  237 



  237 




  237 




  237 






































  167 











  166 






  165 









  166 
  164 
  165 

  165 


  166 
  165 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
// SPDX-License-Identifier: GPL-2.0-only
/*
 * fs/kernfs/file.c - kernfs file implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 */

#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <linux/uio.h>

#include "kernfs-internal.h"

struct kernfs_open_node {
        struct rcu_head                rcu_head;
        atomic_t                event;
        wait_queue_head_t        poll;
        struct list_head        files; /* goes through kernfs_open_file.list */
        unsigned int                nr_mmapped;
        unsigned int                nr_to_release;
};

/*
 * kernfs_notify() may be called from any context and bounces notifications
 * through a work item.  To minimize space overhead in kernfs_node, the
 * pending queue is implemented as a singly linked list of kernfs_nodes.
 * The list is terminated with the self pointer so that whether a
 * kernfs_node is on the list or not can be determined by testing the next
 * pointer for %NULL.
 */
#define KERNFS_NOTIFY_EOL                        ((void *)&kernfs_notify_list)

static DEFINE_SPINLOCK(kernfs_notify_lock);
static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;

static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
{
        int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);

        return &kernfs_locks->open_file_mutex[idx];
}

static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
{
        struct mutex *lock;

        lock = kernfs_open_file_mutex_ptr(kn);

        mutex_lock(lock);

        return lock;
}

/**
 * of_on - Get the kernfs_open_node of the specified kernfs_open_file
 * @of: target kernfs_open_file
 *
 * Return: the kernfs_open_node of the kernfs_open_file
 */
static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
{
        return rcu_dereference_protected(of->kn->attr.open,
                                         !list_empty(&of->list));
}

/**
 * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
 *
 * @kn: target kernfs_node.
 *
 * Fetch and return ->attr.open of @kn when caller holds the
 * kernfs_open_file_mutex_ptr(kn).
 *
 * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
 * the caller guarantees that this mutex is being held, other updaters can't
 * change ->attr.open and this means that we can safely deref ->attr.open
 * outside RCU read-side critical section.
 *
 * The caller needs to make sure that kernfs_open_file_mutex is held.
 *
 * Return: @kn->attr.open when kernfs_open_file_mutex is held.
 */
static struct kernfs_open_node *
kernfs_deref_open_node_locked(struct kernfs_node *kn)
{
        return rcu_dereference_protected(kn->attr.open,
                                lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
}

static struct kernfs_open_file *kernfs_of(struct file *file)
{
        return ((struct seq_file *)file->private_data)->private;
}

/*
 * Determine the kernfs_ops for the given kernfs_node.  This function must
 * be called while holding an active reference.
 */
static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
{
        if (kn->flags & KERNFS_LOCKDEP)
                lockdep_assert_held(kn);
        return kn->attr.ops;
}

/*
 * As kernfs_seq_stop() is also called after kernfs_seq_start() or
 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
 * a seq_file iteration which is fully initialized with an active reference
 * or an aborted kernfs_seq_start() due to get_active failure.  The
 * position pointer is the only context for each seq_file iteration and
 * thus the stop condition should be encoded in it.  As the return value is
 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
 * choice to indicate get_active failure.
 *
 * Unfortunately, this is complicated due to the optional custom seq_file
 * operations which may return ERR_PTR(-ENODEV) too.  kernfs_seq_stop()
 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
 * custom seq_file operations and thus can't decide whether put_active
 * should be performed or not only on ERR_PTR(-ENODEV).
 *
 * This is worked around by factoring out the custom seq_stop() and
 * put_active part into kernfs_seq_stop_active(), skipping it from
 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
 * that kernfs_seq_stop_active() is skipped only after get_active failure.
 */
static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
{
        struct kernfs_open_file *of = sf->private;
        const struct kernfs_ops *ops = kernfs_ops(of->kn);

        if (ops->seq_stop)
                ops->seq_stop(sf, v);
        kernfs_put_active(of->kn);
}

static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
{
        struct kernfs_open_file *of = sf->private;
        const struct kernfs_ops *ops;

        /*
         * @of->mutex nests outside active ref and is primarily to ensure that
         * the ops aren't called concurrently for the same open file.
         */
        mutex_lock(&of->mutex);
        if (!kernfs_get_active(of->kn))
                return ERR_PTR(-ENODEV);

        ops = kernfs_ops(of->kn);
        if (ops->seq_start) {
                void *next = ops->seq_start(sf, ppos);
                /* see the comment above kernfs_seq_stop_active() */
                if (next == ERR_PTR(-ENODEV))
                        kernfs_seq_stop_active(sf, next);
                return next;
        }
        return single_start(sf, ppos);
}

static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
{
        struct kernfs_open_file *of = sf->private;
        const struct kernfs_ops *ops = kernfs_ops(of->kn);

        if (ops->seq_next) {
                void *next = ops->seq_next(sf, v, ppos);
                /* see the comment above kernfs_seq_stop_active() */
                if (next == ERR_PTR(-ENODEV))
                        kernfs_seq_stop_active(sf, next);
                return next;
        } else {
                /*
                 * The same behavior and code as single_open(), always
                 * terminate after the initial read.
                 */
                ++*ppos;
                return NULL;
        }
}

static void kernfs_seq_stop(struct seq_file *sf, void *v)
{
        struct kernfs_open_file *of = sf->private;

        if (v != ERR_PTR(-ENODEV))
                kernfs_seq_stop_active(sf, v);
        mutex_unlock(&of->mutex);
}

static int kernfs_seq_show(struct seq_file *sf, void *v)
{
        struct kernfs_open_file *of = sf->private;

        of->event = atomic_read(&of_on(of)->event);

        return of->kn->attr.ops->seq_show(sf, v);
}

static const struct seq_operations kernfs_seq_ops = {
        .start = kernfs_seq_start,
        .next = kernfs_seq_next,
        .stop = kernfs_seq_stop,
        .show = kernfs_seq_show,
};

/*
 * As reading a bin file can have side-effects, the exact offset and bytes
 * specified in read(2) call should be passed to the read callback making
 * it difficult to use seq_file.  Implement simplistic custom buffering for
 * bin files.
 */
static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
        struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
        ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
        const struct kernfs_ops *ops;
        char *buf;

        buf = of->prealloc_buf;
        if (buf)
                mutex_lock(&of->prealloc_mutex);
        else
                buf = kmalloc(len, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        /*
         * @of->mutex nests outside active ref and is used both to ensure that
         * the ops aren't called concurrently for the same open file.
         */
        mutex_lock(&of->mutex);
        if (!kernfs_get_active(of->kn)) {
                len = -ENODEV;
                mutex_unlock(&of->mutex);
                goto out_free;
        }

        of->event = atomic_read(&of_on(of)->event);

        ops = kernfs_ops(of->kn);
        if (ops->read)
                len = ops->read(of, buf, len, iocb->ki_pos);
        else
                len = -EINVAL;

        kernfs_put_active(of->kn);
        mutex_unlock(&of->mutex);

        if (len < 0)
                goto out_free;

        if (copy_to_iter(buf, len, iter) != len) {
                len = -EFAULT;
                goto out_free;
        }

        iocb->ki_pos += len;

 out_free:
        if (buf == of->prealloc_buf)
                mutex_unlock(&of->prealloc_mutex);
        else
                kfree(buf);
        return len;
}

static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
        if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
                return seq_read_iter(iocb, iter);
        return kernfs_file_read_iter(iocb, iter);
}

/*
 * Copy data in from userland and pass it to the matching kernfs write
 * operation.
 *
 * There is no easy way for us to know if userspace is only doing a partial
 * write, so we don't support them. We expect the entire buffer to come on
 * the first write.  Hint: if you're writing a value, first read the file,
 * modify only the value you're changing, then write entire buffer
 * back.
 */
static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
{
        struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
        ssize_t len = iov_iter_count(iter);
        const struct kernfs_ops *ops;
        char *buf;

        if (of->atomic_write_len) {
                if (len > of->atomic_write_len)
                        return -E2BIG;
        } else {
                len = min_t(size_t, len, PAGE_SIZE);
        }

        buf = of->prealloc_buf;
        if (buf)
                mutex_lock(&of->prealloc_mutex);
        else
                buf = kmalloc(len + 1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        if (copy_from_iter(buf, len, iter) != len) {
                len = -EFAULT;
                goto out_free;
        }
        buf[len] = '\0';        /* guarantee string termination */

        /*
         * @of->mutex nests outside active ref and is used both to ensure that
         * the ops aren't called concurrently for the same open file.
         */
        mutex_lock(&of->mutex);
        if (!kernfs_get_active(of->kn)) {
                mutex_unlock(&of->mutex);
                len = -ENODEV;
                goto out_free;
        }

        ops = kernfs_ops(of->kn);
        if (ops->write)
                len = ops->write(of, buf, len, iocb->ki_pos);
        else
                len = -EINVAL;

        kernfs_put_active(of->kn);
        mutex_unlock(&of->mutex);

        if (len > 0)
                iocb->ki_pos += len;

out_free:
        if (buf == of->prealloc_buf)
                mutex_unlock(&of->prealloc_mutex);
        else
                kfree(buf);
        return len;
}

static void kernfs_vma_open(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);

        if (!of->vm_ops)
                return;

        if (!kernfs_get_active(of->kn))
                return;

        if (of->vm_ops->open)
                of->vm_ops->open(vma);

        kernfs_put_active(of->kn);
}

static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
{
        struct file *file = vmf->vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);
        vm_fault_t ret;

        if (!of->vm_ops)
                return VM_FAULT_SIGBUS;

        if (!kernfs_get_active(of->kn))
                return VM_FAULT_SIGBUS;

        ret = VM_FAULT_SIGBUS;
        if (of->vm_ops->fault)
                ret = of->vm_ops->fault(vmf);

        kernfs_put_active(of->kn);
        return ret;
}

static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
{
        struct file *file = vmf->vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);
        vm_fault_t ret;

        if (!of->vm_ops)
                return VM_FAULT_SIGBUS;

        if (!kernfs_get_active(of->kn))
                return VM_FAULT_SIGBUS;

        ret = 0;
        if (of->vm_ops->page_mkwrite)
                ret = of->vm_ops->page_mkwrite(vmf);
        else
                file_update_time(file);

        kernfs_put_active(of->kn);
        return ret;
}

static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
                             void *buf, int len, int write)
{
        struct file *file = vma->vm_file;
        struct kernfs_open_file *of = kernfs_of(file);
        int ret;

        if (!of->vm_ops)
                return -EINVAL;

        if (!kernfs_get_active(of->kn))
                return -EINVAL;

        ret = -EINVAL;
        if (of->vm_ops->access)
                ret = of->vm_ops->access(vma, addr, buf, len, write);

        kernfs_put_active(of->kn);
        return ret;
}

static const struct vm_operations_struct kernfs_vm_ops = {
        .open                = kernfs_vma_open,
        .fault                = kernfs_vma_fault,
        .page_mkwrite        = kernfs_vma_page_mkwrite,
        .access                = kernfs_vma_access,
};

static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct kernfs_open_file *of = kernfs_of(file);
        const struct kernfs_ops *ops;
        int rc;

        /*
         * mmap path and of->mutex are prone to triggering spurious lockdep
         * warnings and we don't want to add spurious locking dependency
         * between the two.  Check whether mmap is actually implemented
         * without grabbing @of->mutex by testing HAS_MMAP flag.  See the
         * comment in kernfs_fop_open() for more details.
         */
        if (!(of->kn->flags & KERNFS_HAS_MMAP))
                return -ENODEV;

        mutex_lock(&of->mutex);

        rc = -ENODEV;
        if (!kernfs_get_active(of->kn))
                goto out_unlock;

        ops = kernfs_ops(of->kn);
        rc = ops->mmap(of, vma);
        if (rc)
                goto out_put;

        /*
         * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
         * to satisfy versions of X which crash if the mmap fails: that
         * substitutes a new vm_file, and we don't then want bin_vm_ops.
         */
        if (vma->vm_file != file)
                goto out_put;

        rc = -EINVAL;
        if (of->mmapped && of->vm_ops != vma->vm_ops)
                goto out_put;

        /*
         * It is not possible to successfully wrap close.
         * So error if someone is trying to use close.
         */
        if (vma->vm_ops && vma->vm_ops->close)
                goto out_put;

        rc = 0;
        if (!of->mmapped) {
                of->mmapped = true;
                of_on(of)->nr_mmapped++;
                of->vm_ops = vma->vm_ops;
        }
        vma->vm_ops = &kernfs_vm_ops;
out_put:
        kernfs_put_active(of->kn);
out_unlock:
        mutex_unlock(&of->mutex);

        return rc;
}

/**
 *        kernfs_get_open_node - get or create kernfs_open_node
 *        @kn: target kernfs_node
 *        @of: kernfs_open_file for this instance of open
 *
 *        If @kn->attr.open exists, increment its reference count; otherwise,
 *        create one.  @of is chained to the files list.
 *
 *        Locking:
 *        Kernel thread context (may sleep).
 *
 *        Return:
 *        %0 on success, -errno on failure.
 */
static int kernfs_get_open_node(struct kernfs_node *kn,
                                struct kernfs_open_file *of)
{
        struct kernfs_open_node *on;
        struct mutex *mutex;

        mutex = kernfs_open_file_mutex_lock(kn);
        on = kernfs_deref_open_node_locked(kn);

        if (!on) {
                /* not there, initialize a new one */
                on = kzalloc(sizeof(*on), GFP_KERNEL);
                if (!on) {
                        mutex_unlock(mutex);
                        return -ENOMEM;
                }
                atomic_set(&on->event, 1);
                init_waitqueue_head(&on->poll);
                INIT_LIST_HEAD(&on->files);
                rcu_assign_pointer(kn->attr.open, on);
        }

        list_add_tail(&of->list, &on->files);
        if (kn->flags & KERNFS_HAS_RELEASE)
                on->nr_to_release++;

        mutex_unlock(mutex);
        return 0;
}

/**
 *        kernfs_unlink_open_file - Unlink @of from @kn.
 *
 *        @kn: target kernfs_node
 *        @of: associated kernfs_open_file
 *        @open_failed: ->open() failed, cancel ->release()
 *
 *        Unlink @of from list of @kn's associated open files. If list of
 *        associated open files becomes empty, disassociate and free
 *        kernfs_open_node.
 *
 *        LOCKING:
 *        None.
 */
static void kernfs_unlink_open_file(struct kernfs_node *kn,
                                    struct kernfs_open_file *of,
                                    bool open_failed)
{
        struct kernfs_open_node *on;
        struct mutex *mutex;

        mutex = kernfs_open_file_mutex_lock(kn);

        on = kernfs_deref_open_node_locked(kn);
        if (!on) {
                mutex_unlock(mutex);
                return;
        }

        if (of) {
                if (kn->flags & KERNFS_HAS_RELEASE) {
                        WARN_ON_ONCE(of->released == open_failed);
                        if (open_failed)
                                on->nr_to_release--;
                }
                if (of->mmapped)
                        on->nr_mmapped--;
                list_del(&of->list);
        }

        if (list_empty(&on->files)) {
                rcu_assign_pointer(kn->attr.open, NULL);
                kfree_rcu(on, rcu_head);
        }

        mutex_unlock(mutex);
}

static int kernfs_fop_open(struct inode *inode, struct file *file)
{
        struct kernfs_node *kn = inode->i_private;
        struct kernfs_root *root = kernfs_root(kn);
        const struct kernfs_ops *ops;
        struct kernfs_open_file *of;
        bool has_read, has_write, has_mmap;
        int error = -EACCES;

        if (!kernfs_get_active(kn))
                return -ENODEV;

        ops = kernfs_ops(kn);

        has_read = ops->seq_show || ops->read || ops->mmap;
        has_write = ops->write || ops->mmap;
        has_mmap = ops->mmap;

        /* see the flag definition for details */
        if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
                if ((file->f_mode & FMODE_WRITE) &&
                    (!(inode->i_mode & S_IWUGO) || !has_write))
                        goto err_out;

                if ((file->f_mode & FMODE_READ) &&
                    (!(inode->i_mode & S_IRUGO) || !has_read))
                        goto err_out;
        }

        /* allocate a kernfs_open_file for the file */
        error = -ENOMEM;
        of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
        if (!of)
                goto err_out;

        /*
         * The following is done to give a different lockdep key to
         * @of->mutex for files which implement mmap.  This is a rather
         * crude way to avoid false positive lockdep warning around
         * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
         * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
         * which mm->mmap_lock nests, while holding @of->mutex.  As each
         * open file has a separate mutex, it's okay as long as those don't
         * happen on the same file.  At this point, we can't easily give
         * each file a separate locking class.  Let's differentiate on
         * whether the file has mmap or not for now.
         *
         * For similar reasons, writable and readonly files are given different
         * lockdep key, because the writable file /sys/power/resume may call vfs
         * lookup helpers for arbitrary paths and readonly files can be read by
         * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
         *
         * All three cases look the same.  They're supposed to
         * look that way and give @of->mutex different static lockdep keys.
         */
        if (has_mmap)
                mutex_init(&of->mutex);
        else if (file->f_mode & FMODE_WRITE)
                mutex_init(&of->mutex);
        else
                mutex_init(&of->mutex);

        of->kn = kn;
        of->file = file;

        /*
         * Write path needs to atomic_write_len outside active reference.
         * Cache it in open_file.  See kernfs_fop_write_iter() for details.
         */
        of->atomic_write_len = ops->atomic_write_len;

        error = -EINVAL;
        /*
         * ->seq_show is incompatible with ->prealloc,
         * as seq_read does its own allocation.
         * ->read must be used instead.
         */
        if (ops->prealloc && ops->seq_show)
                goto err_free;
        if (ops->prealloc) {
                int len = of->atomic_write_len ?: PAGE_SIZE;
                of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
                error = -ENOMEM;
                if (!of->prealloc_buf)
                        goto err_free;
                mutex_init(&of->prealloc_mutex);
        }

        /*
         * Always instantiate seq_file even if read access doesn't use
         * seq_file or is not requested.  This unifies private data access
         * and readable regular files are the vast majority anyway.
         */
        if (ops->seq_show)
                error = seq_open(file, &kernfs_seq_ops);
        else
                error = seq_open(file, NULL);
        if (error)
                goto err_free;

        of->seq_file = file->private_data;
        of->seq_file->private = of;

        /* seq_file clears PWRITE unconditionally, restore it if WRITE */
        if (file->f_mode & FMODE_WRITE)
                file->f_mode |= FMODE_PWRITE;

        /* make sure we have open node struct */
        error = kernfs_get_open_node(kn, of);
        if (error)
                goto err_seq_release;

        if (ops->open) {
                /* nobody has access to @of yet, skip @of->mutex */
                error = ops->open(of);
                if (error)
                        goto err_put_node;
        }

        /* open succeeded, put active references */
        kernfs_put_active(kn);
        return 0;

err_put_node:
        kernfs_unlink_open_file(kn, of, true);
err_seq_release:
        seq_release(inode, file);
err_free:
        kfree(of->prealloc_buf);
        kfree(of);
err_out:
        kernfs_put_active(kn);
        return error;
}

/* used from release/drain to ensure that ->release() is called exactly once */
static void kernfs_release_file(struct kernfs_node *kn,
                                struct kernfs_open_file *of)
{
        /*
         * @of is guaranteed to have no other file operations in flight and
         * we just want to synchronize release and drain paths.
         * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
         * here because drain path may be called from places which can
         * cause circular dependency.
         */
        lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));

        if (!of->released) {
                /*
                 * A file is never detached without being released and we
                 * need to be able to release files which are deactivated
                 * and being drained.  Don't use kernfs_ops().
                 */
                kn->attr.ops->release(of);
                of->released = true;
                of_on(of)->nr_to_release--;
        }
}

static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
        struct kernfs_node *kn = inode->i_private;
        struct kernfs_open_file *of = kernfs_of(filp);

        if (kn->flags & KERNFS_HAS_RELEASE) {
                struct mutex *mutex;

                mutex = kernfs_open_file_mutex_lock(kn);
                kernfs_release_file(kn, of);
                mutex_unlock(mutex);
        }

        kernfs_unlink_open_file(kn, of, false);
        seq_release(inode, filp);
        kfree(of->prealloc_buf);
        kfree(of);

        return 0;
}

bool kernfs_should_drain_open_files(struct kernfs_node *kn)
{
        struct kernfs_open_node *on;
        bool ret;

        /*
         * @kn being deactivated guarantees that @kn->attr.open can't change
         * beneath us making the lockless test below safe.
         */
        WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);

        rcu_read_lock();
        on = rcu_dereference(kn->attr.open);
        ret = on && (on->nr_mmapped || on->nr_to_release);
        rcu_read_unlock();

        return ret;
}

void kernfs_drain_open_files(struct kernfs_node *kn)
{
        struct kernfs_open_node *on;
        struct kernfs_open_file *of;
        struct mutex *mutex;

        mutex = kernfs_open_file_mutex_lock(kn);
        on = kernfs_deref_open_node_locked(kn);
        if (!on) {
                mutex_unlock(mutex);
                return;
        }

        list_for_each_entry(of, &on->files, list) {
                struct inode *inode = file_inode(of->file);

                if (of->mmapped) {
                        unmap_mapping_range(inode->i_mapping, 0, 0, 1);
                        of->mmapped = false;
                        on->nr_mmapped--;
                }

                if (kn->flags & KERNFS_HAS_RELEASE)
                        kernfs_release_file(kn, of);
        }

        WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
        mutex_unlock(mutex);
}

/*
 * Kernfs attribute files are pollable.  The idea is that you read
 * the content and then you use 'poll' or 'select' to wait for
 * the content to change.  When the content changes (assuming the
 * manager for the kobject supports notification), poll will
 * return EPOLLERR|EPOLLPRI, and select will return the fd whether
 * it is waiting for read, write, or exceptions.
 * Once poll/select indicates that the value has changed, you
 * need to close and re-open the file, or seek to 0 and read again.
 * Reminder: this only works for attributes which actively support
 * it, and it is not possible to test an attribute from userspace
 * to see if it supports poll (Neither 'poll' nor 'select' return
 * an appropriate error code).  When in doubt, set a suitable timeout value.
 */
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
        struct kernfs_open_node *on = of_on(of);

        poll_wait(of->file, &on->poll, wait);

        if (of->event != atomic_read(&on->event))
                return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;

        return DEFAULT_POLLMASK;
}

static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
{
        struct kernfs_open_file *of = kernfs_of(filp);
        struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
        __poll_t ret;

        if (!kernfs_get_active(kn))
                return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;

        if (kn->attr.ops->poll)
                ret = kn->attr.ops->poll(of, wait);
        else
                ret = kernfs_generic_poll(of, wait);

        kernfs_put_active(kn);
        return ret;
}

static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
{
        struct kernfs_open_file *of = kernfs_of(file);
        const struct kernfs_ops *ops;
        loff_t ret;

        /*
         * @of->mutex nests outside active ref and is primarily to ensure that
         * the ops aren't called concurrently for the same open file.
         */
        mutex_lock(&of->mutex);
        if (!kernfs_get_active(of->kn)) {
                mutex_unlock(&of->mutex);
                return -ENODEV;
        }

        ops = kernfs_ops(of->kn);
        if (ops->llseek)
                ret = ops->llseek(of, offset, whence);
        else
                ret = generic_file_llseek(file, offset, whence);

        kernfs_put_active(of->kn);
        mutex_unlock(&of->mutex);
        return ret;
}

static void kernfs_notify_workfn(struct work_struct *work)
{
        struct kernfs_node *kn;
        struct kernfs_super_info *info;
        struct kernfs_root *root;
repeat:
        /* pop one off the notify_list */
        spin_lock_irq(&kernfs_notify_lock);
        kn = kernfs_notify_list;
        if (kn == KERNFS_NOTIFY_EOL) {
                spin_unlock_irq(&kernfs_notify_lock);
                return;
        }
        kernfs_notify_list = kn->attr.notify_next;
        kn->attr.notify_next = NULL;
        spin_unlock_irq(&kernfs_notify_lock);

        root = kernfs_root(kn);
        /* kick fsnotify */

        down_read(&root->kernfs_supers_rwsem);
        list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
                struct kernfs_node *parent;
                struct inode *p_inode = NULL;
                struct inode *inode;
                struct qstr name;

                /*
                 * We want fsnotify_modify() on @kn but as the
                 * modifications aren't originating from userland don't
                 * have the matching @file available.  Look up the inodes
                 * and generate the events manually.
                 */
                inode = ilookup(info->sb, kernfs_ino(kn));
                if (!inode)
                        continue;

                name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
                parent = kernfs_get_parent(kn);
                if (parent) {
                        p_inode = ilookup(info->sb, kernfs_ino(parent));
                        if (p_inode) {
                                fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
                                         inode, FSNOTIFY_EVENT_INODE,
                                         p_inode, &name, inode, 0);
                                iput(p_inode);
                        }

                        kernfs_put(parent);
                }

                if (!p_inode)
                        fsnotify_inode(inode, FS_MODIFY);

                iput(inode);
        }

        up_read(&root->kernfs_supers_rwsem);
        kernfs_put(kn);
        goto repeat;
}

/**
 * kernfs_notify - notify a kernfs file
 * @kn: file to notify
 *
 * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
 * context.
 */
void kernfs_notify(struct kernfs_node *kn)
{
        static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
        unsigned long flags;
        struct kernfs_open_node *on;

        if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
                return;

        /* kick poll immediately */
        rcu_read_lock();
        on = rcu_dereference(kn->attr.open);
        if (on) {
                atomic_inc(&on->event);
                wake_up_interruptible(&on->poll);
        }
        rcu_read_unlock();

        /* schedule work to kick fsnotify */
        spin_lock_irqsave(&kernfs_notify_lock, flags);
        if (!kn->attr.notify_next) {
                kernfs_get(kn);
                kn->attr.notify_next = kernfs_notify_list;
                kernfs_notify_list = kn;
                schedule_work(&kernfs_notify_work);
        }
        spin_unlock_irqrestore(&kernfs_notify_lock, flags);
}
EXPORT_SYMBOL_GPL(kernfs_notify);

const struct file_operations kernfs_file_fops = {
        .read_iter        = kernfs_fop_read_iter,
        .write_iter        = kernfs_fop_write_iter,
        .llseek                = kernfs_fop_llseek,
        .mmap                = kernfs_fop_mmap,
        .open                = kernfs_fop_open,
        .release        = kernfs_fop_release,
        .poll                = kernfs_fop_poll,
        .fsync                = noop_fsync,
        .splice_read        = copy_splice_read,
        .splice_write        = iter_file_splice_write,
};

/**
 * __kernfs_create_file - kernfs internal function to create a file
 * @parent: directory to create the file in
 * @name: name of the file
 * @mode: mode of the file
 * @uid: uid of the file
 * @gid: gid of the file
 * @size: size of the file
 * @ops: kernfs operations for the file
 * @priv: private data for the file
 * @ns: optional namespace tag of the file
 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
 *
 * Return: the created node on success, ERR_PTR() value on error.
 */
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
                                         const char *name,
                                         umode_t mode, kuid_t uid, kgid_t gid,
                                         loff_t size,
                                         const struct kernfs_ops *ops,
                                         void *priv, const void *ns,
                                         struct lock_class_key *key)
{
        struct kernfs_node *kn;
        unsigned flags;
        int rc;

        flags = KERNFS_FILE;

        kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
                             uid, gid, flags);
        if (!kn)
                return ERR_PTR(-ENOMEM);

        kn->attr.ops = ops;
        kn->attr.size = size;
        kn->ns = ns;
        kn->priv = priv;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (key) {
                lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
                kn->flags |= KERNFS_LOCKDEP;
        }
#endif

        /*
         * kn->attr.ops is accessible only while holding active ref.  We
         * need to know whether some ops are implemented outside active
         * ref.  Cache their existence in flags.
         */
        if (ops->seq_show)
                kn->flags |= KERNFS_HAS_SEQ_SHOW;
        if (ops->mmap)
                kn->flags |= KERNFS_HAS_MMAP;
        if (ops->release)
                kn->flags |= KERNFS_HAS_RELEASE;

        rc = kernfs_add_one(kn);
        if (rc) {
                kernfs_put(kn);
                return ERR_PTR(rc);
        }
        return kn;
}




































































































































































































































































































































































































































































































































































































































































   72 




   73 

   73 













    1 




    1 

    1 











































































































































































































































































































































































   15 























































































    6 
    6 





    6 







































































































































































































































































































































































































































































    1 



    1 
    1 




















   61 









   61 
   61 





















































































    1 


    1 

    1 


    1 

    1 





    1 

    1 












    1 






    1 








































    1 

    1 


















































































































































































































    1 

    1 




























































































































































































   17 

   17 


















   73 

   73 
   74 

















    6 

    6 














    4 

    4 

























































































































































































































































    5 






































































































































































  168 























   95 













   74 



   74 
   74 












   14 












   37 

   37 

   35 



















  260 




































































































































































































   65 




   63 














   52 















    6 







































































































































































































































































































































































   50 
   51 










































































































































































































































































































































































































































































































































































    1 

    1 














































































































































































































































































































































































































































































































































































































































































































  242 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Security plug functions
 *
 * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com>
 * Copyright (C) 2001-2002 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com>
 * Copyright (C) 2016 Mellanox Technologies
 * Copyright (C) 2023 Microsoft Corporation <paul@paul-moore.com>
 */

#define pr_fmt(fmt) "LSM: " fmt

#include <linux/bpf.h>
#include <linux/capability.h>
#include <linux/dcache.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kernel_read_file.h>
#include <linux/lsm_hooks.h>
#include <linux/fsnotify.h>
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/backing-dev.h>
#include <linux/string.h>
#include <linux/xattr.h>
#include <linux/msg.h>
#include <linux/overflow.h>
#include <net/flow.h>

/* How many LSMs were built into the kernel? */
#define LSM_COUNT (__end_lsm_info - __start_lsm_info)

/*
 * How many LSMs are built into the kernel as determined at
 * build time. Used to determine fixed array sizes.
 * The capability module is accounted for by CONFIG_SECURITY
 */
#define LSM_CONFIG_COUNT ( \
        (IS_ENABLED(CONFIG_SECURITY) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_TOMOYO) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_YAMA) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_LOADPIN) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \
        (IS_ENABLED(CONFIG_EVM) ? 1 : 0))

/*
 * These are descriptions of the reasons that can be passed to the
 * security_locked_down() LSM hook. Placing this array here allows
 * all security modules to use the same descriptions for auditing
 * purposes.
 */
const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = {
        [LOCKDOWN_NONE] = "none",
        [LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
        [LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
        [LOCKDOWN_EFI_TEST] = "/dev/efi_test access",
        [LOCKDOWN_KEXEC] = "kexec of unsigned images",
        [LOCKDOWN_HIBERNATION] = "hibernation",
        [LOCKDOWN_PCI_ACCESS] = "direct PCI access",
        [LOCKDOWN_IOPORT] = "raw io port access",
        [LOCKDOWN_MSR] = "raw MSR access",
        [LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
        [LOCKDOWN_DEVICE_TREE] = "modifying device tree contents",
        [LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
        [LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
        [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
        [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
        [LOCKDOWN_DEBUGFS] = "debugfs access",
        [LOCKDOWN_XMON_WR] = "xmon write access",
        [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
        [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
        [LOCKDOWN_RTAS_ERROR_INJECTION] = "RTAS error injection",
        [LOCKDOWN_INTEGRITY_MAX] = "integrity",
        [LOCKDOWN_KCORE] = "/proc/kcore access",
        [LOCKDOWN_KPROBES] = "use of kprobes",
        [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM",
        [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
        [LOCKDOWN_PERF] = "unsafe use of perf",
        [LOCKDOWN_TRACEFS] = "use of tracefs",
        [LOCKDOWN_XMON_RW] = "xmon read and write access",
        [LOCKDOWN_XFRM_SECRET] = "xfrm SA secret",
        [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
};

struct security_hook_heads security_hook_heads __ro_after_init;
static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);

static struct kmem_cache *lsm_file_cache;
static struct kmem_cache *lsm_inode_cache;

char *lsm_names;
static struct lsm_blob_sizes blob_sizes __ro_after_init;

/* Boot-time LSM user choice */
static __initdata const char *chosen_lsm_order;
static __initdata const char *chosen_major_lsm;

static __initconst const char *const builtin_lsm_order = CONFIG_LSM;

/* Ordered list of LSMs to initialize. */
static __initdata struct lsm_info **ordered_lsms;
static __initdata struct lsm_info *exclusive;

static __initdata bool debug;
#define init_debug(...)                                                \
        do {                                                        \
                if (debug)                                        \
                        pr_info(__VA_ARGS__);                        \
        } while (0)

static bool __init is_enabled(struct lsm_info *lsm)
{
        if (!lsm->enabled)
                return false;

        return *lsm->enabled;
}

/* Mark an LSM's enabled flag. */
static int lsm_enabled_true __initdata = 1;
static int lsm_enabled_false __initdata = 0;
static void __init set_enabled(struct lsm_info *lsm, bool enabled)
{
        /*
         * When an LSM hasn't configured an enable variable, we can use
         * a hard-coded location for storing the default enabled state.
         */
        if (!lsm->enabled) {
                if (enabled)
                        lsm->enabled = &lsm_enabled_true;
                else
                        lsm->enabled = &lsm_enabled_false;
        } else if (lsm->enabled == &lsm_enabled_true) {
                if (!enabled)
                        lsm->enabled = &lsm_enabled_false;
        } else if (lsm->enabled == &lsm_enabled_false) {
                if (enabled)
                        lsm->enabled = &lsm_enabled_true;
        } else {
                *lsm->enabled = enabled;
        }
}

/* Is an LSM already listed in the ordered LSMs list? */
static bool __init exists_ordered_lsm(struct lsm_info *lsm)
{
        struct lsm_info **check;

        for (check = ordered_lsms; *check; check++)
                if (*check == lsm)
                        return true;

        return false;
}

/* Append an LSM to the list of ordered LSMs to initialize. */
static int last_lsm __initdata;
static void __init append_ordered_lsm(struct lsm_info *lsm, const char *from)
{
        /* Ignore duplicate selections. */
        if (exists_ordered_lsm(lsm))
                return;

        if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from))
                return;

        /* Enable this LSM, if it is not already set. */
        if (!lsm->enabled)
                lsm->enabled = &lsm_enabled_true;
        ordered_lsms[last_lsm++] = lsm;

        init_debug("%s ordered: %s (%s)\n", from, lsm->name,
                   is_enabled(lsm) ? "enabled" : "disabled");
}

/* Is an LSM allowed to be initialized? */
static bool __init lsm_allowed(struct lsm_info *lsm)
{
        /* Skip if the LSM is disabled. */
        if (!is_enabled(lsm))
                return false;

        /* Not allowed if another exclusive LSM already initialized. */
        if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && exclusive) {
                init_debug("exclusive disabled: %s\n", lsm->name);
                return false;
        }

        return true;
}

static void __init lsm_set_blob_size(int *need, int *lbs)
{
        int offset;

        if (*need <= 0)
                return;

        offset = ALIGN(*lbs, sizeof(void *));
        *lbs = offset + *need;
        *need = offset;
}

static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
{
        if (!needed)
                return;

        lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
        lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
        /*
         * The inode blob gets an rcu_head in addition to
         * what the modules might need.
         */
        if (needed->lbs_inode && blob_sizes.lbs_inode == 0)
                blob_sizes.lbs_inode = sizeof(struct rcu_head);
        lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
        lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
        lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
        lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock);
        lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
        lsm_set_blob_size(&needed->lbs_xattr_count,
                          &blob_sizes.lbs_xattr_count);
}

/* Prepare LSM for initialization. */
static void __init prepare_lsm(struct lsm_info *lsm)
{
        int enabled = lsm_allowed(lsm);

        /* Record enablement (to handle any following exclusive LSMs). */
        set_enabled(lsm, enabled);

        /* If enabled, do pre-initialization work. */
        if (enabled) {
                if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && !exclusive) {
                        exclusive = lsm;
                        init_debug("exclusive chosen:   %s\n", lsm->name);
                }

                lsm_set_blob_sizes(lsm->blobs);
        }
}

/* Initialize a given LSM, if it is enabled. */
static void __init initialize_lsm(struct lsm_info *lsm)
{
        if (is_enabled(lsm)) {
                int ret;

                init_debug("initializing %s\n", lsm->name);
                ret = lsm->init();
                WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret);
        }
}

/*
 * Current index to use while initializing the lsm id list.
 */
u32 lsm_active_cnt __ro_after_init;
const struct lsm_id *lsm_idlist[LSM_CONFIG_COUNT];

/* Populate ordered LSMs list from comma-separated LSM name list. */
static void __init ordered_lsm_parse(const char *order, const char *origin)
{
        struct lsm_info *lsm;
        char *sep, *name, *next;

        /* LSM_ORDER_FIRST is always first. */
        for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
                if (lsm->order == LSM_ORDER_FIRST)
                        append_ordered_lsm(lsm, "  first");
        }

        /* Process "security=", if given. */
        if (chosen_major_lsm) {
                struct lsm_info *major;

                /*
                 * To match the original "security=" behavior, this
                 * explicitly does NOT fallback to another Legacy Major
                 * if the selected one was separately disabled: disable
                 * all non-matching Legacy Major LSMs.
                 */
                for (major = __start_lsm_info; major < __end_lsm_info;
                     major++) {
                        if ((major->flags & LSM_FLAG_LEGACY_MAJOR) &&
                            strcmp(major->name, chosen_major_lsm) != 0) {
                                set_enabled(major, false);
                                init_debug("security=%s disabled: %s (only one legacy major LSM)\n",
                                           chosen_major_lsm, major->name);
                        }
                }
        }

        sep = kstrdup(order, GFP_KERNEL);
        next = sep;
        /* Walk the list, looking for matching LSMs. */
        while ((name = strsep(&next, ",")) != NULL) {
                bool found = false;

                for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
                        if (strcmp(lsm->name, name) == 0) {
                                if (lsm->order == LSM_ORDER_MUTABLE)
                                        append_ordered_lsm(lsm, origin);
                                found = true;
                        }
                }

                if (!found)
                        init_debug("%s ignored: %s (not built into kernel)\n",
                                   origin, name);
        }

        /* Process "security=", if given. */
        if (chosen_major_lsm) {
                for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
                        if (exists_ordered_lsm(lsm))
                                continue;
                        if (strcmp(lsm->name, chosen_major_lsm) == 0)
                                append_ordered_lsm(lsm, "security=");
                }
        }

        /* LSM_ORDER_LAST is always last. */
        for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
                if (lsm->order == LSM_ORDER_LAST)
                        append_ordered_lsm(lsm, "   last");
        }

        /* Disable all LSMs not in the ordered list. */
        for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
                if (exists_ordered_lsm(lsm))
                        continue;
                set_enabled(lsm, false);
                init_debug("%s skipped: %s (not in requested order)\n",
                           origin, lsm->name);
        }

        kfree(sep);
}

static void __init lsm_early_cred(struct cred *cred);
static void __init lsm_early_task(struct task_struct *task);

static int lsm_append(const char *new, char **result);

static void __init report_lsm_order(void)
{
        struct lsm_info **lsm, *early;
        int first = 0;

        pr_info("initializing lsm=");

        /* Report each enabled LSM name, comma separated. */
        for (early = __start_early_lsm_info;
             early < __end_early_lsm_info; early++)
                if (is_enabled(early))
                        pr_cont("%s%s", first++ == 0 ? "" : ",", early->name);
        for (lsm = ordered_lsms; *lsm; lsm++)
                if (is_enabled(*lsm))
                        pr_cont("%s%s", first++ == 0 ? "" : ",", (*lsm)->name);

        pr_cont("\n");
}

static void __init ordered_lsm_init(void)
{
        struct lsm_info **lsm;

        ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms),
                               GFP_KERNEL);

        if (chosen_lsm_order) {
                if (chosen_major_lsm) {
                        pr_warn("security=%s is ignored because it is superseded by lsm=%s\n",
                                chosen_major_lsm, chosen_lsm_order);
                        chosen_major_lsm = NULL;
                }
                ordered_lsm_parse(chosen_lsm_order, "cmdline");
        } else
                ordered_lsm_parse(builtin_lsm_order, "builtin");

        for (lsm = ordered_lsms; *lsm; lsm++)
                prepare_lsm(*lsm);

        report_lsm_order();

        init_debug("cred blob size       = %d\n", blob_sizes.lbs_cred);
        init_debug("file blob size       = %d\n", blob_sizes.lbs_file);
        init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
        init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
        init_debug("msg_msg blob size    = %d\n", blob_sizes.lbs_msg_msg);
        init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
        init_debug("task blob size       = %d\n", blob_sizes.lbs_task);
        init_debug("xattr slots          = %d\n", blob_sizes.lbs_xattr_count);

        /*
         * Create any kmem_caches needed for blobs
         */
        if (blob_sizes.lbs_file)
                lsm_file_cache = kmem_cache_create("lsm_file_cache",
                                                   blob_sizes.lbs_file, 0,
                                                   SLAB_PANIC, NULL);
        if (blob_sizes.lbs_inode)
                lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
                                                    blob_sizes.lbs_inode, 0,
                                                    SLAB_PANIC, NULL);

        lsm_early_cred((struct cred *) current->cred);
        lsm_early_task(current);
        for (lsm = ordered_lsms; *lsm; lsm++)
                initialize_lsm(*lsm);

        kfree(ordered_lsms);
}

int __init early_security_init(void)
{
        struct lsm_info *lsm;

#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
        INIT_HLIST_HEAD(&security_hook_heads.NAME);
#include "linux/lsm_hook_defs.h"
#undef LSM_HOOK

        for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
                if (!lsm->enabled)
                        lsm->enabled = &lsm_enabled_true;
                prepare_lsm(lsm);
                initialize_lsm(lsm);
        }

        return 0;
}

/**
 * security_init - initializes the security framework
 *
 * This should be called early in the kernel initialization sequence.
 */
int __init security_init(void)
{
        struct lsm_info *lsm;

        init_debug("legacy security=%s\n", chosen_major_lsm ? : " *unspecified*");
        init_debug("  CONFIG_LSM=%s\n", builtin_lsm_order);
        init_debug("boot arg lsm=%s\n", chosen_lsm_order ? : " *unspecified*");

        /*
         * Append the names of the early LSM modules now that kmalloc() is
         * available
         */
        for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
                init_debug("  early started: %s (%s)\n", lsm->name,
                           is_enabled(lsm) ? "enabled" : "disabled");
                if (lsm->enabled)
                        lsm_append(lsm->name, &lsm_names);
        }

        /* Load LSMs in specified order. */
        ordered_lsm_init();

        return 0;
}

/* Save user chosen LSM */
static int __init choose_major_lsm(char *str)
{
        chosen_major_lsm = str;
        return 1;
}
__setup("security=", choose_major_lsm);

/* Explicitly choose LSM initialization order. */
static int __init choose_lsm_order(char *str)
{
        chosen_lsm_order = str;
        return 1;
}
__setup("lsm=", choose_lsm_order);

/* Enable LSM order debugging. */
static int __init enable_debug(char *str)
{
        debug = true;
        return 1;
}
__setup("lsm.debug", enable_debug);

static bool match_last_lsm(const char *list, const char *lsm)
{
        const char *last;

        if (WARN_ON(!list || !lsm))
                return false;
        last = strrchr(list, ',');
        if (last)
                /* Pass the comma, strcmp() will check for '\0' */
                last++;
        else
                last = list;
        return !strcmp(last, lsm);
}

static int lsm_append(const char *new, char **result)
{
        char *cp;

        if (*result == NULL) {
                *result = kstrdup(new, GFP_KERNEL);
                if (*result == NULL)
                        return -ENOMEM;
        } else {
                /* Check if it is the last registered name */
                if (match_last_lsm(*result, new))
                        return 0;
                cp = kasprintf(GFP_KERNEL, "%s,%s", *result, new);
                if (cp == NULL)
                        return -ENOMEM;
                kfree(*result);
                *result = cp;
        }
        return 0;
}

/**
 * security_add_hooks - Add a modules hooks to the hook lists.
 * @hooks: the hooks to add
 * @count: the number of hooks to add
 * @lsmid: the identification information for the security module
 *
 * Each LSM has to register its hooks with the infrastructure.
 */
void __init security_add_hooks(struct security_hook_list *hooks, int count,
                               const struct lsm_id *lsmid)
{
        int i;

        /*
         * A security module may call security_add_hooks() more
         * than once during initialization, and LSM initialization
         * is serialized. Landlock is one such case.
         * Look at the previous entry, if there is one, for duplication.
         */
        if (lsm_active_cnt == 0 || lsm_idlist[lsm_active_cnt - 1] != lsmid) {
                if (lsm_active_cnt >= LSM_CONFIG_COUNT)
                        panic("%s Too many LSMs registered.\n", __func__);
                lsm_idlist[lsm_active_cnt++] = lsmid;
        }

        for (i = 0; i < count; i++) {
                hooks[i].lsmid = lsmid;
                hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
        }

        /*
         * Don't try to append during early_security_init(), we'll come back
         * and fix this up afterwards.
         */
        if (slab_is_available()) {
                if (lsm_append(lsmid->name, &lsm_names) < 0)
                        panic("%s - Cannot get early memory.\n", __func__);
        }
}

int call_blocking_lsm_notifier(enum lsm_event event, void *data)
{
        return blocking_notifier_call_chain(&blocking_lsm_notifier_chain,
                                            event, data);
}
EXPORT_SYMBOL(call_blocking_lsm_notifier);

int register_blocking_lsm_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&blocking_lsm_notifier_chain,
                                                nb);
}
EXPORT_SYMBOL(register_blocking_lsm_notifier);

int unregister_blocking_lsm_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&blocking_lsm_notifier_chain,
                                                  nb);
}
EXPORT_SYMBOL(unregister_blocking_lsm_notifier);

/**
 * lsm_cred_alloc - allocate a composite cred blob
 * @cred: the cred that needs a blob
 * @gfp: allocation type
 *
 * Allocate the cred blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
{
        if (blob_sizes.lbs_cred == 0) {
                cred->security = NULL;
                return 0;
        }

        cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
        if (cred->security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_early_cred - during initialization allocate a composite cred blob
 * @cred: the cred that needs a blob
 *
 * Allocate the cred blob for all the modules
 */
static void __init lsm_early_cred(struct cred *cred)
{
        int rc = lsm_cred_alloc(cred, GFP_KERNEL);

        if (rc)
                panic("%s: Early cred alloc failed.\n", __func__);
}

/**
 * lsm_file_alloc - allocate a composite file blob
 * @file: the file that needs a blob
 *
 * Allocate the file blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_file_alloc(struct file *file)
{
        if (!lsm_file_cache) {
                file->f_security = NULL;
                return 0;
        }

        file->f_security = kmem_cache_zalloc(lsm_file_cache, GFP_KERNEL);
        if (file->f_security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_inode_alloc - allocate a composite inode blob
 * @inode: the inode that needs a blob
 *
 * Allocate the inode blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
int lsm_inode_alloc(struct inode *inode)
{
        if (!lsm_inode_cache) {
                inode->i_security = NULL;
                return 0;
        }

        inode->i_security = kmem_cache_zalloc(lsm_inode_cache, GFP_NOFS);
        if (inode->i_security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_task_alloc - allocate a composite task blob
 * @task: the task that needs a blob
 *
 * Allocate the task blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_task_alloc(struct task_struct *task)
{
        if (blob_sizes.lbs_task == 0) {
                task->security = NULL;
                return 0;
        }

        task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
        if (task->security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_ipc_alloc - allocate a composite ipc blob
 * @kip: the ipc that needs a blob
 *
 * Allocate the ipc blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
{
        if (blob_sizes.lbs_ipc == 0) {
                kip->security = NULL;
                return 0;
        }

        kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
        if (kip->security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_msg_msg_alloc - allocate a composite msg_msg blob
 * @mp: the msg_msg that needs a blob
 *
 * Allocate the ipc blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_msg_msg_alloc(struct msg_msg *mp)
{
        if (blob_sizes.lbs_msg_msg == 0) {
                mp->security = NULL;
                return 0;
        }

        mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
        if (mp->security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_early_task - during initialization allocate a composite task blob
 * @task: the task that needs a blob
 *
 * Allocate the task blob for all the modules
 */
static void __init lsm_early_task(struct task_struct *task)
{
        int rc = lsm_task_alloc(task);

        if (rc)
                panic("%s: Early task alloc failed.\n", __func__);
}

/**
 * lsm_superblock_alloc - allocate a composite superblock blob
 * @sb: the superblock that needs a blob
 *
 * Allocate the superblock blob for all the modules
 *
 * Returns 0, or -ENOMEM if memory can't be allocated.
 */
static int lsm_superblock_alloc(struct super_block *sb)
{
        if (blob_sizes.lbs_superblock == 0) {
                sb->s_security = NULL;
                return 0;
        }

        sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL);
        if (sb->s_security == NULL)
                return -ENOMEM;
        return 0;
}

/**
 * lsm_fill_user_ctx - Fill a user space lsm_ctx structure
 * @uctx: a userspace LSM context to be filled
 * @uctx_len: available uctx size (input), used uctx size (output)
 * @val: the new LSM context value
 * @val_len: the size of the new LSM context value
 * @id: LSM id
 * @flags: LSM defined flags
 *
 * Fill all of the fields in a userspace lsm_ctx structure.  If @uctx is NULL
 * simply calculate the required size to output via @utc_len and return
 * success.
 *
 * Returns 0 on success, -E2BIG if userspace buffer is not large enough,
 * -EFAULT on a copyout error, -ENOMEM if memory can't be allocated.
 */
int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
                      void *val, size_t val_len,
                      u64 id, u64 flags)
{
        struct lsm_ctx *nctx = NULL;
        size_t nctx_len;
        int rc = 0;

        nctx_len = ALIGN(struct_size(nctx, ctx, val_len), sizeof(void *));
        if (nctx_len > *uctx_len) {
                rc = -E2BIG;
                goto out;
        }

        /* no buffer - return success/0 and set @uctx_len to the req size */
        if (!uctx)
                goto out;

        nctx = kzalloc(nctx_len, GFP_KERNEL);
        if (nctx == NULL) {
                rc = -ENOMEM;
                goto out;
        }
        nctx->id = id;
        nctx->flags = flags;
        nctx->len = nctx_len;
        nctx->ctx_len = val_len;
        memcpy(nctx->ctx, val, val_len);

        if (copy_to_user(uctx, nctx, nctx_len))
                rc = -EFAULT;

out:
        kfree(nctx);
        *uctx_len = nctx_len;
        return rc;
}

/*
 * The default value of the LSM hook is defined in linux/lsm_hook_defs.h and
 * can be accessed with:
 *
 *        LSM_RET_DEFAULT(<hook_name>)
 *
 * The macros below define static constants for the default value of each
 * LSM hook.
 */
#define LSM_RET_DEFAULT(NAME) (NAME##_default)
#define DECLARE_LSM_RET_DEFAULT_void(DEFAULT, NAME)
#define DECLARE_LSM_RET_DEFAULT_int(DEFAULT, NAME) \
        static const int __maybe_unused LSM_RET_DEFAULT(NAME) = (DEFAULT);
#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
        DECLARE_LSM_RET_DEFAULT_##RET(DEFAULT, NAME)

#include <linux/lsm_hook_defs.h>
#undef LSM_HOOK

/*
 * Hook list operation macros.
 *
 * call_void_hook:
 *        This is a hook that does not return a value.
 *
 * call_int_hook:
 *        This is a hook that returns a value.
 */

#define call_void_hook(FUNC, ...)                                \
        do {                                                        \
                struct security_hook_list *P;                        \
                                                                \
                hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \
                        P->hook.FUNC(__VA_ARGS__);                \
        } while (0)

#define call_int_hook(FUNC, ...) ({                                \
        int RC = LSM_RET_DEFAULT(FUNC);                                \
        do {                                                        \
                struct security_hook_list *P;                        \
                                                                \
                hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
                        RC = P->hook.FUNC(__VA_ARGS__);                \
                        if (RC != LSM_RET_DEFAULT(FUNC))        \
                                break;                                \
                }                                                \
        } while (0);                                                \
        RC;                                                        \
})

/* Security operations */

/**
 * security_binder_set_context_mgr() - Check if becoming binder ctx mgr is ok
 * @mgr: task credentials of current binder process
 *
 * Check whether @mgr is allowed to be the binder context manager.
 *
 * Return: Return 0 if permission is granted.
 */
int security_binder_set_context_mgr(const struct cred *mgr)
{
        return call_int_hook(binder_set_context_mgr, mgr);
}

/**
 * security_binder_transaction() - Check if a binder transaction is allowed
 * @from: sending process
 * @to: receiving process
 *
 * Check whether @from is allowed to invoke a binder transaction call to @to.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_binder_transaction(const struct cred *from,
                                const struct cred *to)
{
        return call_int_hook(binder_transaction, from, to);
}

/**
 * security_binder_transfer_binder() - Check if a binder transfer is allowed
 * @from: sending process
 * @to: receiving process
 *
 * Check whether @from is allowed to transfer a binder reference to @to.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_binder_transfer_binder(const struct cred *from,
                                    const struct cred *to)
{
        return call_int_hook(binder_transfer_binder, from, to);
}

/**
 * security_binder_transfer_file() - Check if a binder file xfer is allowed
 * @from: sending process
 * @to: receiving process
 * @file: file being transferred
 *
 * Check whether @from is allowed to transfer @file to @to.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_binder_transfer_file(const struct cred *from,
                                  const struct cred *to, const struct file *file)
{
        return call_int_hook(binder_transfer_file, from, to, file);
}

/**
 * security_ptrace_access_check() - Check if tracing is allowed
 * @child: target process
 * @mode: PTRACE_MODE flags
 *
 * Check permission before allowing the current process to trace the @child
 * process.  Security modules may also want to perform a process tracing check
 * during an execve in the set_security or apply_creds hooks of tracing check
 * during an execve in the bprm_set_creds hook of binprm_security_ops if the
 * process is being traced and its security attributes would be changed by the
 * execve.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
{
        return call_int_hook(ptrace_access_check, child, mode);
}

/**
 * security_ptrace_traceme() - Check if tracing is allowed
 * @parent: tracing process
 *
 * Check that the @parent process has sufficient permission to trace the
 * current process before allowing the current process to present itself to the
 * @parent process for tracing.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_ptrace_traceme(struct task_struct *parent)
{
        return call_int_hook(ptrace_traceme, parent);
}

/**
 * security_capget() - Get the capability sets for a process
 * @target: target process
 * @effective: effective capability set
 * @inheritable: inheritable capability set
 * @permitted: permitted capability set
 *
 * Get the @effective, @inheritable, and @permitted capability sets for the
 * @target process.  The hook may also perform permission checking to determine
 * if the current process is allowed to see the capability sets of the @target
 * process.
 *
 * Return: Returns 0 if the capability sets were successfully obtained.
 */
int security_capget(const struct task_struct *target,
                    kernel_cap_t *effective,
                    kernel_cap_t *inheritable,
                    kernel_cap_t *permitted)
{
        return call_int_hook(capget, target, effective, inheritable, permitted);
}

/**
 * security_capset() - Set the capability sets for a process
 * @new: new credentials for the target process
 * @old: current credentials of the target process
 * @effective: effective capability set
 * @inheritable: inheritable capability set
 * @permitted: permitted capability set
 *
 * Set the @effective, @inheritable, and @permitted capability sets for the
 * current process.
 *
 * Return: Returns 0 and update @new if permission is granted.
 */
int security_capset(struct cred *new, const struct cred *old,
                    const kernel_cap_t *effective,
                    const kernel_cap_t *inheritable,
                    const kernel_cap_t *permitted)
{
        return call_int_hook(capset, new, old, effective, inheritable,
                             permitted);
}

/**
 * security_capable() - Check if a process has the necessary capability
 * @cred: credentials to examine
 * @ns: user namespace
 * @cap: capability requested
 * @opts: capability check options
 *
 * Check whether the @tsk process has the @cap capability in the indicated
 * credentials.  @cap contains the capability <include/linux/capability.h>.
 * @opts contains options for the capable check <include/linux/security.h>.
 *
 * Return: Returns 0 if the capability is granted.
 */
int security_capable(const struct cred *cred,
                     struct user_namespace *ns,
                     int cap,
                     unsigned int opts)
{
        return call_int_hook(capable, cred, ns, cap, opts);
}

/**
 * security_quotactl() - Check if a quotactl() syscall is allowed for this fs
 * @cmds: commands
 * @type: type
 * @id: id
 * @sb: filesystem
 *
 * Check whether the quotactl syscall is allowed for this @sb.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_quotactl(int cmds, int type, int id, const struct super_block *sb)
{
        return call_int_hook(quotactl, cmds, type, id, sb);
}

/**
 * security_quota_on() - Check if QUOTAON is allowed for a dentry
 * @dentry: dentry
 *
 * Check whether QUOTAON is allowed for @dentry.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_quota_on(struct dentry *dentry)
{
        return call_int_hook(quota_on, dentry);
}

/**
 * security_syslog() - Check if accessing the kernel message ring is allowed
 * @type: SYSLOG_ACTION_* type
 *
 * Check permission before accessing the kernel message ring or changing
 * logging to the console.  See the syslog(2) manual page for an explanation of
 * the @type values.
 *
 * Return: Return 0 if permission is granted.
 */
int security_syslog(int type)
{
        return call_int_hook(syslog, type);
}

/**
 * security_settime64() - Check if changing the system time is allowed
 * @ts: new time
 * @tz: timezone
 *
 * Check permission to change the system time, struct timespec64 is defined in
 * <include/linux/time64.h> and timezone is defined in <include/linux/time.h>.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_settime64(const struct timespec64 *ts, const struct timezone *tz)
{
        return call_int_hook(settime, ts, tz);
}

/**
 * security_vm_enough_memory_mm() - Check if allocating a new mem map is allowed
 * @mm: mm struct
 * @pages: number of pages
 *
 * Check permissions for allocating a new virtual mapping.  If all LSMs return
 * a positive value, __vm_enough_memory() will be called with cap_sys_admin
 * set. If at least one LSM returns 0 or negative, __vm_enough_memory() will be
 * called with cap_sys_admin cleared.
 *
 * Return: Returns 0 if permission is granted by the LSM infrastructure to the
 *         caller.
 */
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
        struct security_hook_list *hp;
        int cap_sys_admin = 1;
        int rc;

        /*
         * The module will respond with a positive value if
         * it thinks the __vm_enough_memory() call should be
         * made with the cap_sys_admin set. If all of the modules
         * agree that it should be set it will. If any module
         * thinks it should not be set it won't.
         */
        hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
                rc = hp->hook.vm_enough_memory(mm, pages);
                if (rc <= 0) {
                        cap_sys_admin = 0;
                        break;
                }
        }
        return __vm_enough_memory(mm, pages, cap_sys_admin);
}

/**
 * security_bprm_creds_for_exec() - Prepare the credentials for exec()
 * @bprm: binary program information
 *
 * If the setup in prepare_exec_creds did not setup @bprm->cred->security
 * properly for executing @bprm->file, update the LSM's portion of
 * @bprm->cred->security to be what commit_creds needs to install for the new
 * program.  This hook may also optionally check permissions (e.g. for
 * transitions between security domains).  The hook must set @bprm->secureexec
 * to 1 if AT_SECURE should be set to request libc enable secure mode.  @bprm
 * contains the linux_binprm structure.
 *
 * Return: Returns 0 if the hook is successful and permission is granted.
 */
int security_bprm_creds_for_exec(struct linux_binprm *bprm)
{
        return call_int_hook(bprm_creds_for_exec, bprm);
}

/**
 * security_bprm_creds_from_file() - Update linux_binprm creds based on file
 * @bprm: binary program information
 * @file: associated file
 *
 * If @file is setpcap, suid, sgid or otherwise marked to change privilege upon
 * exec, update @bprm->cred to reflect that change. This is called after
 * finding the binary that will be executed without an interpreter.  This
 * ensures that the credentials will not be derived from a script that the
 * binary will need to reopen, which when reopend may end up being a completely
 * different file.  This hook may also optionally check permissions (e.g. for
 * transitions between security domains).  The hook must set @bprm->secureexec
 * to 1 if AT_SECURE should be set to request libc enable secure mode.  The
 * hook must add to @bprm->per_clear any personality flags that should be
 * cleared from current->personality.  @bprm contains the linux_binprm
 * structure.
 *
 * Return: Returns 0 if the hook is successful and permission is granted.
 */
int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)
{
        return call_int_hook(bprm_creds_from_file, bprm, file);
}

/**
 * security_bprm_check() - Mediate binary handler search
 * @bprm: binary program information
 *
 * This hook mediates the point when a search for a binary handler will begin.
 * It allows a check against the @bprm->cred->security value which was set in
 * the preceding creds_for_exec call.  The argv list and envp list are reliably
 * available in @bprm.  This hook may be called multiple times during a single
 * execve.  @bprm contains the linux_binprm structure.
 *
 * Return: Returns 0 if the hook is successful and permission is granted.
 */
int security_bprm_check(struct linux_binprm *bprm)
{
        return call_int_hook(bprm_check_security, bprm);
}

/**
 * security_bprm_committing_creds() - Install creds for a process during exec()
 * @bprm: binary program information
 *
 * Prepare to install the new security attributes of a process being
 * transformed by an execve operation, based on the old credentials pointed to
 * by @current->cred and the information set in @bprm->cred by the
 * bprm_creds_for_exec hook.  @bprm points to the linux_binprm structure.  This
 * hook is a good place to perform state changes on the process such as closing
 * open file descriptors to which access will no longer be granted when the
 * attributes are changed.  This is called immediately before commit_creds().
 */
void security_bprm_committing_creds(const struct linux_binprm *bprm)
{
        call_void_hook(bprm_committing_creds, bprm);
}

/**
 * security_bprm_committed_creds() - Tidy up after cred install during exec()
 * @bprm: binary program information
 *
 * Tidy up after the installation of the new security attributes of a process
 * being transformed by an execve operation.  The new credentials have, by this
 * point, been set to @current->cred.  @bprm points to the linux_binprm
 * structure.  This hook is a good place to perform state changes on the
 * process such as clearing out non-inheritable signal state.  This is called
 * immediately after commit_creds().
 */
void security_bprm_committed_creds(const struct linux_binprm *bprm)
{
        call_void_hook(bprm_committed_creds, bprm);
}

/**
 * security_fs_context_submount() - Initialise fc->security
 * @fc: new filesystem context
 * @reference: dentry reference for submount/remount
 *
 * Fill out the ->security field for a new fs_context.
 *
 * Return: Returns 0 on success or negative error code on failure.
 */
int security_fs_context_submount(struct fs_context *fc, struct super_block *reference)
{
        return call_int_hook(fs_context_submount, fc, reference);
}

/**
 * security_fs_context_dup() - Duplicate a fs_context LSM blob
 * @fc: destination filesystem context
 * @src_fc: source filesystem context
 *
 * Allocate and attach a security structure to sc->security.  This pointer is
 * initialised to NULL by the caller.  @fc indicates the new filesystem context.
 * @src_fc indicates the original filesystem context.
 *
 * Return: Returns 0 on success or a negative error code on failure.
 */
int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
{
        return call_int_hook(fs_context_dup, fc, src_fc);
}

/**
 * security_fs_context_parse_param() - Configure a filesystem context
 * @fc: filesystem context
 * @param: filesystem parameter
 *
 * Userspace provided a parameter to configure a superblock.  The LSM can
 * consume the parameter or return it to the caller for use elsewhere.
 *
 * Return: If the parameter is used by the LSM it should return 0, if it is
 *         returned to the caller -ENOPARAM is returned, otherwise a negative
 *         error code is returned.
 */
int security_fs_context_parse_param(struct fs_context *fc,
                                    struct fs_parameter *param)
{
        struct security_hook_list *hp;
        int trc;
        int rc = -ENOPARAM;

        hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
                             list) {
                trc = hp->hook.fs_context_parse_param(fc, param);
                if (trc == 0)
                        rc = 0;
                else if (trc != -ENOPARAM)
                        return trc;
        }
        return rc;
}

/**
 * security_sb_alloc() - Allocate a super_block LSM blob
 * @sb: filesystem superblock
 *
 * Allocate and attach a security structure to the sb->s_security field.  The
 * s_security field is initialized to NULL when the structure is allocated.
 * @sb contains the super_block structure to be modified.
 *
 * Return: Returns 0 if operation was successful.
 */
int security_sb_alloc(struct super_block *sb)
{
        int rc = lsm_superblock_alloc(sb);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(sb_alloc_security, sb);
        if (unlikely(rc))
                security_sb_free(sb);
        return rc;
}

/**
 * security_sb_delete() - Release super_block LSM associated objects
 * @sb: filesystem superblock
 *
 * Release objects tied to a superblock (e.g. inodes).  @sb contains the
 * super_block structure being released.
 */
void security_sb_delete(struct super_block *sb)
{
        call_void_hook(sb_delete, sb);
}

/**
 * security_sb_free() - Free a super_block LSM blob
 * @sb: filesystem superblock
 *
 * Deallocate and clear the sb->s_security field.  @sb contains the super_block
 * structure to be modified.
 */
void security_sb_free(struct super_block *sb)
{
        call_void_hook(sb_free_security, sb);
        kfree(sb->s_security);
        sb->s_security = NULL;
}

/**
 * security_free_mnt_opts() - Free memory associated with mount options
 * @mnt_opts: LSM processed mount options
 *
 * Free memory associated with @mnt_ops.
 */
void security_free_mnt_opts(void **mnt_opts)
{
        if (!*mnt_opts)
                return;
        call_void_hook(sb_free_mnt_opts, *mnt_opts);
        *mnt_opts = NULL;
}
EXPORT_SYMBOL(security_free_mnt_opts);

/**
 * security_sb_eat_lsm_opts() - Consume LSM mount options
 * @options: mount options
 * @mnt_opts: LSM processed mount options
 *
 * Eat (scan @options) and save them in @mnt_opts.
 *
 * Return: Returns 0 on success, negative values on failure.
 */
int security_sb_eat_lsm_opts(char *options, void **mnt_opts)
{
        return call_int_hook(sb_eat_lsm_opts, options, mnt_opts);
}
EXPORT_SYMBOL(security_sb_eat_lsm_opts);

/**
 * security_sb_mnt_opts_compat() - Check if new mount options are allowed
 * @sb: filesystem superblock
 * @mnt_opts: new mount options
 *
 * Determine if the new mount options in @mnt_opts are allowed given the
 * existing mounted filesystem at @sb.  @sb superblock being compared.
 *
 * Return: Returns 0 if options are compatible.
 */
int security_sb_mnt_opts_compat(struct super_block *sb,
                                void *mnt_opts)
{
        return call_int_hook(sb_mnt_opts_compat, sb, mnt_opts);
}
EXPORT_SYMBOL(security_sb_mnt_opts_compat);

/**
 * security_sb_remount() - Verify no incompatible mount changes during remount
 * @sb: filesystem superblock
 * @mnt_opts: (re)mount options
 *
 * Extracts security system specific mount options and verifies no changes are
 * being made to those options.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_remount(struct super_block *sb,
                        void *mnt_opts)
{
        return call_int_hook(sb_remount, sb, mnt_opts);
}
EXPORT_SYMBOL(security_sb_remount);

/**
 * security_sb_kern_mount() - Check if a kernel mount is allowed
 * @sb: filesystem superblock
 *
 * Mount this @sb if allowed by permissions.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_kern_mount(const struct super_block *sb)
{
        return call_int_hook(sb_kern_mount, sb);
}

/**
 * security_sb_show_options() - Output the mount options for a superblock
 * @m: output file
 * @sb: filesystem superblock
 *
 * Show (print on @m) mount options for this @sb.
 *
 * Return: Returns 0 on success, negative values on failure.
 */
int security_sb_show_options(struct seq_file *m, struct super_block *sb)
{
        return call_int_hook(sb_show_options, m, sb);
}

/**
 * security_sb_statfs() - Check if accessing fs stats is allowed
 * @dentry: superblock handle
 *
 * Check permission before obtaining filesystem statistics for the @mnt
 * mountpoint.  @dentry is a handle on the superblock for the filesystem.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_statfs(struct dentry *dentry)
{
        return call_int_hook(sb_statfs, dentry);
}

/**
 * security_sb_mount() - Check permission for mounting a filesystem
 * @dev_name: filesystem backing device
 * @path: mount point
 * @type: filesystem type
 * @flags: mount flags
 * @data: filesystem specific data
 *
 * Check permission before an object specified by @dev_name is mounted on the
 * mount point named by @nd.  For an ordinary mount, @dev_name identifies a
 * device if the file system type requires a device.  For a remount
 * (@flags & MS_REMOUNT), @dev_name is irrelevant.  For a loopback/bind mount
 * (@flags & MS_BIND), @dev_name identifies the        pathname of the object being
 * mounted.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_mount(const char *dev_name, const struct path *path,
                      const char *type, unsigned long flags, void *data)
{
        return call_int_hook(sb_mount, dev_name, path, type, flags, data);
}

/**
 * security_sb_umount() - Check permission for unmounting a filesystem
 * @mnt: mounted filesystem
 * @flags: unmount flags
 *
 * Check permission before the @mnt file system is unmounted.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_umount(struct vfsmount *mnt, int flags)
{
        return call_int_hook(sb_umount, mnt, flags);
}

/**
 * security_sb_pivotroot() - Check permissions for pivoting the rootfs
 * @old_path: new location for current rootfs
 * @new_path: location of the new rootfs
 *
 * Check permission before pivoting the root filesystem.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sb_pivotroot(const struct path *old_path,
                          const struct path *new_path)
{
        return call_int_hook(sb_pivotroot, old_path, new_path);
}

/**
 * security_sb_set_mnt_opts() - Set the mount options for a filesystem
 * @sb: filesystem superblock
 * @mnt_opts: binary mount options
 * @kern_flags: kernel flags (in)
 * @set_kern_flags: kernel flags (out)
 *
 * Set the security relevant mount options used for a superblock.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_sb_set_mnt_opts(struct super_block *sb,
                             void *mnt_opts,
                             unsigned long kern_flags,
                             unsigned long *set_kern_flags)
{
        struct security_hook_list *hp;
        int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);

        hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts,
                             list) {
                rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
                                              set_kern_flags);
                if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts))
                        break;
        }
        return rc;
}
EXPORT_SYMBOL(security_sb_set_mnt_opts);

/**
 * security_sb_clone_mnt_opts() - Duplicate superblock mount options
 * @oldsb: source superblock
 * @newsb: destination superblock
 * @kern_flags: kernel flags (in)
 * @set_kern_flags: kernel flags (out)
 *
 * Copy all security options from a given superblock to another.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_sb_clone_mnt_opts(const struct super_block *oldsb,
                               struct super_block *newsb,
                               unsigned long kern_flags,
                               unsigned long *set_kern_flags)
{
        return call_int_hook(sb_clone_mnt_opts, oldsb, newsb,
                             kern_flags, set_kern_flags);
}
EXPORT_SYMBOL(security_sb_clone_mnt_opts);

/**
 * security_move_mount() - Check permissions for moving a mount
 * @from_path: source mount point
 * @to_path: destination mount point
 *
 * Check permission before a mount is moved.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_move_mount(const struct path *from_path,
                        const struct path *to_path)
{
        return call_int_hook(move_mount, from_path, to_path);
}

/**
 * security_path_notify() - Check if setting a watch is allowed
 * @path: file path
 * @mask: event mask
 * @obj_type: file path type
 *
 * Check permissions before setting a watch on events as defined by @mask, on
 * an object at @path, whose type is defined by @obj_type.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_notify(const struct path *path, u64 mask,
                         unsigned int obj_type)
{
        return call_int_hook(path_notify, path, mask, obj_type);
}

/**
 * security_inode_alloc() - Allocate an inode LSM blob
 * @inode: the inode
 *
 * Allocate and attach a security structure to @inode->i_security.  The
 * i_security field is initialized to NULL when the inode structure is
 * allocated.
 *
 * Return: Return 0 if operation was successful.
 */
int security_inode_alloc(struct inode *inode)
{
        int rc = lsm_inode_alloc(inode);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(inode_alloc_security, inode);
        if (unlikely(rc))
                security_inode_free(inode);
        return rc;
}

static void inode_free_by_rcu(struct rcu_head *head)
{
        /*
         * The rcu head is at the start of the inode blob
         */
        kmem_cache_free(lsm_inode_cache, head);
}

/**
 * security_inode_free() - Free an inode's LSM blob
 * @inode: the inode
 *
 * Deallocate the inode security structure and set @inode->i_security to NULL.
 */
void security_inode_free(struct inode *inode)
{
        call_void_hook(inode_free_security, inode);
        /*
         * The inode may still be referenced in a path walk and
         * a call to security_inode_permission() can be made
         * after inode_free_security() is called. Ideally, the VFS
         * wouldn't do this, but fixing that is a much harder
         * job. For now, simply free the i_security via RCU, and
         * leave the current inode->i_security pointer intact.
         * The inode will be freed after the RCU grace period too.
         */
        if (inode->i_security)
                call_rcu((struct rcu_head *)inode->i_security,
                         inode_free_by_rcu);
}

/**
 * security_dentry_init_security() - Perform dentry initialization
 * @dentry: the dentry to initialize
 * @mode: mode used to determine resource type
 * @name: name of the last path component
 * @xattr_name: name of the security/LSM xattr
 * @ctx: pointer to the resulting LSM context
 * @ctxlen: length of @ctx
 *
 * Compute a context for a dentry as the inode is not yet available since NFSv4
 * has no label backed by an EA anyway.  It is important to note that
 * @xattr_name does not need to be free'd by the caller, it is a static string.
 *
 * Return: Returns 0 on success, negative values on failure.
 */
int security_dentry_init_security(struct dentry *dentry, int mode,
                                  const struct qstr *name,
                                  const char **xattr_name, void **ctx,
                                  u32 *ctxlen)
{
        return call_int_hook(dentry_init_security, dentry, mode, name,
                             xattr_name, ctx, ctxlen);
}
EXPORT_SYMBOL(security_dentry_init_security);

/**
 * security_dentry_create_files_as() - Perform dentry initialization
 * @dentry: the dentry to initialize
 * @mode: mode used to determine resource type
 * @name: name of the last path component
 * @old: creds to use for LSM context calculations
 * @new: creds to modify
 *
 * Compute a context for a dentry as the inode is not yet available and set
 * that context in passed in creds so that new files are created using that
 * context. Context is calculated using the passed in creds and not the creds
 * of the caller.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_dentry_create_files_as(struct dentry *dentry, int mode,
                                    struct qstr *name,
                                    const struct cred *old, struct cred *new)
{
        return call_int_hook(dentry_create_files_as, dentry, mode,
                             name, old, new);
}
EXPORT_SYMBOL(security_dentry_create_files_as);

/**
 * security_inode_init_security() - Initialize an inode's LSM context
 * @inode: the inode
 * @dir: parent directory
 * @qstr: last component of the pathname
 * @initxattrs: callback function to write xattrs
 * @fs_data: filesystem specific data
 *
 * Obtain the security attribute name suffix and value to set on a newly
 * created inode and set up the incore security field for the new inode.  This
 * hook is called by the fs code as part of the inode creation transaction and
 * provides for atomic labeling of the inode, unlike the post_create/mkdir/...
 * hooks called by the VFS.
 *
 * The hook function is expected to populate the xattrs array, by calling
 * lsm_get_xattr_slot() to retrieve the slots reserved by the security module
 * with the lbs_xattr_count field of the lsm_blob_sizes structure.  For each
 * slot, the hook function should set ->name to the attribute name suffix
 * (e.g. selinux), to allocate ->value (will be freed by the caller) and set it
 * to the attribute value, to set ->value_len to the length of the value.  If
 * the security module does not use security attributes or does not wish to put
 * a security attribute on this particular inode, then it should return
 * -EOPNOTSUPP to skip this processing.
 *
 * Return: Returns 0 if the LSM successfully initialized all of the inode
 *         security attributes that are required, negative values otherwise.
 */
int security_inode_init_security(struct inode *inode, struct inode *dir,
                                 const struct qstr *qstr,
                                 const initxattrs initxattrs, void *fs_data)
{
        struct security_hook_list *hp;
        struct xattr *new_xattrs = NULL;
        int ret = -EOPNOTSUPP, xattr_count = 0;

        if (unlikely(IS_PRIVATE(inode)))
                return 0;

        if (!blob_sizes.lbs_xattr_count)
                return 0;

        if (initxattrs) {
                /* Allocate +1 as terminator. */
                new_xattrs = kcalloc(blob_sizes.lbs_xattr_count + 1,
                                     sizeof(*new_xattrs), GFP_NOFS);
                if (!new_xattrs)
                        return -ENOMEM;
        }

        hlist_for_each_entry(hp, &security_hook_heads.inode_init_security,
                             list) {
                ret = hp->hook.inode_init_security(inode, dir, qstr, new_xattrs,
                                                  &xattr_count);
                if (ret && ret != -EOPNOTSUPP)
                        goto out;
                /*
                 * As documented in lsm_hooks.h, -EOPNOTSUPP in this context
                 * means that the LSM is not willing to provide an xattr, not
                 * that it wants to signal an error. Thus, continue to invoke
                 * the remaining LSMs.
                 */
        }

        /* If initxattrs() is NULL, xattr_count is zero, skip the call. */
        if (!xattr_count)
                goto out;

        ret = initxattrs(inode, new_xattrs, fs_data);
out:
        for (; xattr_count > 0; xattr_count--)
                kfree(new_xattrs[xattr_count - 1].value);
        kfree(new_xattrs);
        return (ret == -EOPNOTSUPP) ? 0 : ret;
}
EXPORT_SYMBOL(security_inode_init_security);

/**
 * security_inode_init_security_anon() - Initialize an anonymous inode
 * @inode: the inode
 * @name: the anonymous inode class
 * @context_inode: an optional related inode
 *
 * Set up the incore security field for the new anonymous inode and return
 * whether the inode creation is permitted by the security module or not.
 *
 * Return: Returns 0 on success, -EACCES if the security module denies the
 * creation of this inode, or another -errno upon other errors.
 */
int security_inode_init_security_anon(struct inode *inode,
                                      const struct qstr *name,
                                      const struct inode *context_inode)
{
        return call_int_hook(inode_init_security_anon, inode, name,
                             context_inode);
}

#ifdef CONFIG_SECURITY_PATH
/**
 * security_path_mknod() - Check if creating a special file is allowed
 * @dir: parent directory
 * @dentry: new file
 * @mode: new file mode
 * @dev: device number
 *
 * Check permissions when creating a file. Note that this hook is called even
 * if mknod operation is being done for a regular file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_mknod(const struct path *dir, struct dentry *dentry,
                        umode_t mode, unsigned int dev)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return call_int_hook(path_mknod, dir, dentry, mode, dev);
}
EXPORT_SYMBOL(security_path_mknod);

/**
 * security_path_post_mknod() - Update inode security after reg file creation
 * @idmap: idmap of the mount
 * @dentry: new file
 *
 * Update inode security field after a regular file has been created.
 */
void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(path_post_mknod, idmap, dentry);
}

/**
 * security_path_mkdir() - Check if creating a new directory is allowed
 * @dir: parent directory
 * @dentry: new directory
 * @mode: new directory mode
 *
 * Check permissions to create a new directory in the existing directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_mkdir(const struct path *dir, struct dentry *dentry,
                        umode_t mode)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return call_int_hook(path_mkdir, dir, dentry, mode);
}
EXPORT_SYMBOL(security_path_mkdir);

/**
 * security_path_rmdir() - Check if removing a directory is allowed
 * @dir: parent directory
 * @dentry: directory to remove
 *
 * Check the permission to remove a directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_rmdir(const struct path *dir, struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return call_int_hook(path_rmdir, dir, dentry);
}

/**
 * security_path_unlink() - Check if removing a hard link is allowed
 * @dir: parent directory
 * @dentry: file
 *
 * Check the permission to remove a hard link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_unlink(const struct path *dir, struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return call_int_hook(path_unlink, dir, dentry);
}
EXPORT_SYMBOL(security_path_unlink);

/**
 * security_path_symlink() - Check if creating a symbolic link is allowed
 * @dir: parent directory
 * @dentry: symbolic link
 * @old_name: file pathname
 *
 * Check the permission to create a symbolic link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_symlink(const struct path *dir, struct dentry *dentry,
                          const char *old_name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return call_int_hook(path_symlink, dir, dentry, old_name);
}

/**
 * security_path_link - Check if creating a hard link is allowed
 * @old_dentry: existing file
 * @new_dir: new parent directory
 * @new_dentry: new link
 *
 * Check permission before creating a new hard link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
                       struct dentry *new_dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                return 0;
        return call_int_hook(path_link, old_dentry, new_dir, new_dentry);
}

/**
 * security_path_rename() - Check if renaming a file is allowed
 * @old_dir: parent directory of the old file
 * @old_dentry: the old file
 * @new_dir: parent directory of the new file
 * @new_dentry: the new file
 * @flags: flags
 *
 * Check for permission to rename a file or directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
                         const struct path *new_dir, struct dentry *new_dentry,
                         unsigned int flags)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
                     (d_is_positive(new_dentry) &&
                      IS_PRIVATE(d_backing_inode(new_dentry)))))
                return 0;

        return call_int_hook(path_rename, old_dir, old_dentry, new_dir,
                             new_dentry, flags);
}
EXPORT_SYMBOL(security_path_rename);

/**
 * security_path_truncate() - Check if truncating a file is allowed
 * @path: file
 *
 * Check permission before truncating the file indicated by path.  Note that
 * truncation permissions may also be checked based on already opened files,
 * using the security_file_truncate() hook.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_truncate(const struct path *path)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return call_int_hook(path_truncate, path);
}

/**
 * security_path_chmod() - Check if changing the file's mode is allowed
 * @path: file
 * @mode: new mode
 *
 * Check for permission to change a mode of the file @path. The new mode is
 * specified in @mode which is a bitmask of constants from
 * <include/uapi/linux/stat.h>.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_chmod(const struct path *path, umode_t mode)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return call_int_hook(path_chmod, path, mode);
}

/**
 * security_path_chown() - Check if changing the file's owner/group is allowed
 * @path: file
 * @uid: file owner
 * @gid: file group
 *
 * Check for permission to change owner/group of a file or directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return call_int_hook(path_chown, path, uid, gid);
}

/**
 * security_path_chroot() - Check if changing the root directory is allowed
 * @path: directory
 *
 * Check for permission to change root directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_path_chroot(const struct path *path)
{
        return call_int_hook(path_chroot, path);
}
#endif /* CONFIG_SECURITY_PATH */

/**
 * security_inode_create() - Check if creating a file is allowed
 * @dir: the parent directory
 * @dentry: the file being created
 * @mode: requested file mode
 *
 * Check permission to create a regular file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_create(struct inode *dir, struct dentry *dentry,
                          umode_t mode)
{
        if (unlikely(IS_PRIVATE(dir)))
                return 0;
        return call_int_hook(inode_create, dir, dentry, mode);
}
EXPORT_SYMBOL_GPL(security_inode_create);

/**
 * security_inode_post_create_tmpfile() - Update inode security of new tmpfile
 * @idmap: idmap of the mount
 * @inode: inode of the new tmpfile
 *
 * Update inode security data after a tmpfile has been created.
 */
void security_inode_post_create_tmpfile(struct mnt_idmap *idmap,
                                        struct inode *inode)
{
        if (unlikely(IS_PRIVATE(inode)))
                return;
        call_void_hook(inode_post_create_tmpfile, idmap, inode);
}

/**
 * security_inode_link() - Check if creating a hard link is allowed
 * @old_dentry: existing file
 * @dir: new parent directory
 * @new_dentry: new link
 *
 * Check permission before creating a new hard link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_link(struct dentry *old_dentry, struct inode *dir,
                        struct dentry *new_dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                return 0;
        return call_int_hook(inode_link, old_dentry, dir, new_dentry);
}

/**
 * security_inode_unlink() - Check if removing a hard link is allowed
 * @dir: parent directory
 * @dentry: file
 *
 * Check the permission to remove a hard link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_unlink(struct inode *dir, struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_unlink, dir, dentry);
}

/**
 * security_inode_symlink() - Check if creating a symbolic link is allowed
 * @dir: parent directory
 * @dentry: symbolic link
 * @old_name: existing filename
 *
 * Check the permission to create a symbolic link to a file.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_symlink(struct inode *dir, struct dentry *dentry,
                           const char *old_name)
{
        if (unlikely(IS_PRIVATE(dir)))
                return 0;
        return call_int_hook(inode_symlink, dir, dentry, old_name);
}

/**
 * security_inode_mkdir() - Check if creation a new director is allowed
 * @dir: parent directory
 * @dentry: new directory
 * @mode: new directory mode
 *
 * Check permissions to create a new directory in the existing directory
 * associated with inode structure @dir.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
        if (unlikely(IS_PRIVATE(dir)))
                return 0;
        return call_int_hook(inode_mkdir, dir, dentry, mode);
}
EXPORT_SYMBOL_GPL(security_inode_mkdir);

/**
 * security_inode_rmdir() - Check if removing a directory is allowed
 * @dir: parent directory
 * @dentry: directory to be removed
 *
 * Check the permission to remove a directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_rmdir, dir, dentry);
}

/**
 * security_inode_mknod() - Check if creating a special file is allowed
 * @dir: parent directory
 * @dentry: new file
 * @mode: new file mode
 * @dev: device number
 *
 * Check permissions when creating a special file (or a socket or a fifo file
 * created via the mknod system call).  Note that if mknod operation is being
 * done for a regular file, then the create hook will be called and not this
 * hook.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_mknod(struct inode *dir, struct dentry *dentry,
                         umode_t mode, dev_t dev)
{
        if (unlikely(IS_PRIVATE(dir)))
                return 0;
        return call_int_hook(inode_mknod, dir, dentry, mode, dev);
}

/**
 * security_inode_rename() - Check if renaming a file is allowed
 * @old_dir: parent directory of the old file
 * @old_dentry: the old file
 * @new_dir: parent directory of the new file
 * @new_dentry: the new file
 * @flags: flags
 *
 * Check for permission to rename a file or directory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
                          struct inode *new_dir, struct dentry *new_dentry,
                          unsigned int flags)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
                     (d_is_positive(new_dentry) &&
                      IS_PRIVATE(d_backing_inode(new_dentry)))))
                return 0;

        if (flags & RENAME_EXCHANGE) {
                int err = call_int_hook(inode_rename, new_dir, new_dentry,
                                        old_dir, old_dentry);
                if (err)
                        return err;
        }

        return call_int_hook(inode_rename, old_dir, old_dentry,
                             new_dir, new_dentry);
}

/**
 * security_inode_readlink() - Check if reading a symbolic link is allowed
 * @dentry: link
 *
 * Check the permission to read the symbolic link.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_readlink(struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_readlink, dentry);
}

/**
 * security_inode_follow_link() - Check if following a symbolic link is allowed
 * @dentry: link dentry
 * @inode: link inode
 * @rcu: true if in RCU-walk mode
 *
 * Check permission to follow a symbolic link when looking up a pathname.  If
 * @rcu is true, @inode is not stable.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
                               bool rcu)
{
        if (unlikely(IS_PRIVATE(inode)))
                return 0;
        return call_int_hook(inode_follow_link, dentry, inode, rcu);
}

/**
 * security_inode_permission() - Check if accessing an inode is allowed
 * @inode: inode
 * @mask: access mask
 *
 * Check permission before accessing an inode.  This hook is called by the
 * existing Linux permission function, so a security module can use it to
 * provide additional checking for existing Linux permission checks.  Notice
 * that this hook is called when a file is opened (as well as many other
 * operations), whereas the file_security_ops permission hook is called when
 * the actual read/write operations are performed.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_permission(struct inode *inode, int mask)
{
        if (unlikely(IS_PRIVATE(inode)))
                return 0;
        return call_int_hook(inode_permission, inode, mask);
}

/**
 * security_inode_setattr() - Check if setting file attributes is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @attr: new attributes
 *
 * Check permission before setting file attributes.  Note that the kernel call
 * to notify_change is performed from several locations, whenever file
 * attributes change (such as when a file is truncated, chown/chmod operations,
 * transferring disk quotas, etc).
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_setattr(struct mnt_idmap *idmap,
                           struct dentry *dentry, struct iattr *attr)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_setattr, idmap, dentry, attr);
}
EXPORT_SYMBOL_GPL(security_inode_setattr);

/**
 * security_inode_post_setattr() - Update the inode after a setattr operation
 * @idmap: idmap of the mount
 * @dentry: file
 * @ia_valid: file attributes set
 *
 * Update inode security field after successful setting file attributes.
 */
void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                                 int ia_valid)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(inode_post_setattr, idmap, dentry, ia_valid);
}

/**
 * security_inode_getattr() - Check if getting file attributes is allowed
 * @path: file
 *
 * Check permission before obtaining file attributes.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_getattr(const struct path *path)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return call_int_hook(inode_getattr, path);
}

/**
 * security_inode_setxattr() - Check if setting file xattrs is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @name: xattr name
 * @value: xattr value
 * @size: size of xattr value
 * @flags: flags
 *
 * Check permission before setting the extended attributes.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_setxattr(struct mnt_idmap *idmap,
                            struct dentry *dentry, const char *name,
                            const void *value, size_t size, int flags)
{
        int ret;

        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        /*
         * SELinux and Smack integrate the cap call,
         * so assume that all LSMs supplying this call do so.
         */
        ret = call_int_hook(inode_setxattr, idmap, dentry, name, value, size,
                            flags);

        if (ret == 1)
                ret = cap_inode_setxattr(dentry, name, value, size, flags);
        return ret;
}

/**
 * security_inode_set_acl() - Check if setting posix acls is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @acl_name: acl name
 * @kacl: acl struct
 *
 * Check permission before setting posix acls, the posix acls in @kacl are
 * identified by @acl_name.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_set_acl(struct mnt_idmap *idmap,
                           struct dentry *dentry, const char *acl_name,
                           struct posix_acl *kacl)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_set_acl, idmap, dentry, acl_name, kacl);
}

/**
 * security_inode_post_set_acl() - Update inode security from posix acls set
 * @dentry: file
 * @acl_name: acl name
 * @kacl: acl struct
 *
 * Update inode security data after successfully setting posix acls on @dentry.
 * The posix acls in @kacl are identified by @acl_name.
 */
void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
                                 struct posix_acl *kacl)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(inode_post_set_acl, dentry, acl_name, kacl);
}

/**
 * security_inode_get_acl() - Check if reading posix acls is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @acl_name: acl name
 *
 * Check permission before getting osix acls, the posix acls are identified by
 * @acl_name.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_get_acl(struct mnt_idmap *idmap,
                           struct dentry *dentry, const char *acl_name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_get_acl, idmap, dentry, acl_name);
}

/**
 * security_inode_remove_acl() - Check if removing a posix acl is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @acl_name: acl name
 *
 * Check permission before removing posix acls, the posix acls are identified
 * by @acl_name.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_remove_acl(struct mnt_idmap *idmap,
                              struct dentry *dentry, const char *acl_name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_remove_acl, idmap, dentry, acl_name);
}

/**
 * security_inode_post_remove_acl() - Update inode security after rm posix acls
 * @idmap: idmap of the mount
 * @dentry: file
 * @acl_name: acl name
 *
 * Update inode security data after successfully removing posix acls on
 * @dentry in @idmap. The posix acls are identified by @acl_name.
 */
void security_inode_post_remove_acl(struct mnt_idmap *idmap,
                                    struct dentry *dentry, const char *acl_name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(inode_post_remove_acl, idmap, dentry, acl_name);
}

/**
 * security_inode_post_setxattr() - Update the inode after a setxattr operation
 * @dentry: file
 * @name: xattr name
 * @value: xattr value
 * @size: xattr value size
 * @flags: flags
 *
 * Update inode security field after successful setxattr operation.
 */
void security_inode_post_setxattr(struct dentry *dentry, const char *name,
                                  const void *value, size_t size, int flags)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(inode_post_setxattr, dentry, name, value, size, flags);
}

/**
 * security_inode_getxattr() - Check if xattr access is allowed
 * @dentry: file
 * @name: xattr name
 *
 * Check permission before obtaining the extended attributes identified by
 * @name for @dentry.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_getxattr(struct dentry *dentry, const char *name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_getxattr, dentry, name);
}

/**
 * security_inode_listxattr() - Check if listing xattrs is allowed
 * @dentry: file
 *
 * Check permission before obtaining the list of extended attribute names for
 * @dentry.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_listxattr(struct dentry *dentry)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return call_int_hook(inode_listxattr, dentry);
}

/**
 * security_inode_removexattr() - Check if removing an xattr is allowed
 * @idmap: idmap of the mount
 * @dentry: file
 * @name: xattr name
 *
 * Check permission before removing the extended attribute identified by @name
 * for @dentry.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inode_removexattr(struct mnt_idmap *idmap,
                               struct dentry *dentry, const char *name)
{
        int ret;

        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        /*
         * SELinux and Smack integrate the cap call,
         * so assume that all LSMs supplying this call do so.
         */
        ret = call_int_hook(inode_removexattr, idmap, dentry, name);
        if (ret == 1)
                ret = cap_inode_removexattr(idmap, dentry, name);
        return ret;
}

/**
 * security_inode_post_removexattr() - Update the inode after a removexattr op
 * @dentry: file
 * @name: xattr name
 *
 * Update the inode after a successful removexattr operation.
 */
void security_inode_post_removexattr(struct dentry *dentry, const char *name)
{
        if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        call_void_hook(inode_post_removexattr, dentry, name);
}

/**
 * security_inode_need_killpriv() - Check if security_inode_killpriv() required
 * @dentry: associated dentry
 *
 * Called when an inode has been changed to determine if
 * security_inode_killpriv() should be called.
 *
 * Return: Return <0 on error to abort the inode change operation, return 0 if
 *         security_inode_killpriv() does not need to be called, return >0 if
 *         security_inode_killpriv() does need to be called.
 */
int security_inode_need_killpriv(struct dentry *dentry)
{
        return call_int_hook(inode_need_killpriv, dentry);
}

/**
 * security_inode_killpriv() - The setuid bit is removed, update LSM state
 * @idmap: idmap of the mount
 * @dentry: associated dentry
 *
 * The @dentry's setuid bit is being removed.  Remove similar security labels.
 * Called with the dentry->d_inode->i_mutex held.
 *
 * Return: Return 0 on success.  If error is returned, then the operation
 *         causing setuid bit removal is failed.
 */
int security_inode_killpriv(struct mnt_idmap *idmap,
                            struct dentry *dentry)
{
        return call_int_hook(inode_killpriv, idmap, dentry);
}

/**
 * security_inode_getsecurity() - Get the xattr security label of an inode
 * @idmap: idmap of the mount
 * @inode: inode
 * @name: xattr name
 * @buffer: security label buffer
 * @alloc: allocation flag
 *
 * Retrieve a copy of the extended attribute representation of the security
 * label associated with @name for @inode via @buffer.  Note that @name is the
 * remainder of the attribute name after the security prefix has been removed.
 * @alloc is used to specify if the call should return a value via the buffer
 * or just the value length.
 *
 * Return: Returns size of buffer on success.
 */
int security_inode_getsecurity(struct mnt_idmap *idmap,
                               struct inode *inode, const char *name,
                               void **buffer, bool alloc)
{
        if (unlikely(IS_PRIVATE(inode)))
                return LSM_RET_DEFAULT(inode_getsecurity);

        return call_int_hook(inode_getsecurity, idmap, inode, name, buffer,
                             alloc);
}

/**
 * security_inode_setsecurity() - Set the xattr security label of an inode
 * @inode: inode
 * @name: xattr name
 * @value: security label
 * @size: length of security label
 * @flags: flags
 *
 * Set the security label associated with @name for @inode from the extended
 * attribute value @value.  @size indicates the size of the @value in bytes.
 * @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. Note that @name is the
 * remainder of the attribute name after the security. prefix has been removed.
 *
 * Return: Returns 0 on success.
 */
int security_inode_setsecurity(struct inode *inode, const char *name,
                               const void *value, size_t size, int flags)
{
        if (unlikely(IS_PRIVATE(inode)))
                return LSM_RET_DEFAULT(inode_setsecurity);

        return call_int_hook(inode_setsecurity, inode, name, value, size,
                             flags);
}

/**
 * security_inode_listsecurity() - List the xattr security label names
 * @inode: inode
 * @buffer: buffer
 * @buffer_size: size of buffer
 *
 * Copy the extended attribute names for the security labels associated with
 * @inode into @buffer.  The maximum size of @buffer is specified by
 * @buffer_size.  @buffer may be NULL to request the size of the buffer
 * required.
 *
 * Return: Returns number of bytes used/required on success.
 */
int security_inode_listsecurity(struct inode *inode,
                                char *buffer, size_t buffer_size)
{
        if (unlikely(IS_PRIVATE(inode)))
                return 0;
        return call_int_hook(inode_listsecurity, inode, buffer, buffer_size);
}
EXPORT_SYMBOL(security_inode_listsecurity);

/**
 * security_inode_getsecid() - Get an inode's secid
 * @inode: inode
 * @secid: secid to return
 *
 * Get the secid associated with the node.  In case of failure, @secid will be
 * set to zero.
 */
void security_inode_getsecid(struct inode *inode, u32 *secid)
{
        call_void_hook(inode_getsecid, inode, secid);
}

/**
 * security_inode_copy_up() - Create new creds for an overlayfs copy-up op
 * @src: union dentry of copy-up file
 * @new: newly created creds
 *
 * A file is about to be copied up from lower layer to upper layer of overlay
 * filesystem. Security module can prepare a set of new creds and modify as
 * need be and return new creds. Caller will switch to new creds temporarily to
 * create new file and release newly allocated creds.
 *
 * Return: Returns 0 on success or a negative error code on error.
 */
int security_inode_copy_up(struct dentry *src, struct cred **new)
{
        return call_int_hook(inode_copy_up, src, new);
}
EXPORT_SYMBOL(security_inode_copy_up);

/**
 * security_inode_copy_up_xattr() - Filter xattrs in an overlayfs copy-up op
 * @name: xattr name
 *
 * Filter the xattrs being copied up when a unioned file is copied up from a
 * lower layer to the union/overlay layer.   The caller is responsible for
 * reading and writing the xattrs, this hook is merely a filter.
 *
 * Return: Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP
 *         if the security module does not know about attribute, or a negative
 *         error code to abort the copy up.
 */
int security_inode_copy_up_xattr(const char *name)
{
        int rc;

        /*
         * The implementation can return 0 (accept the xattr), 1 (discard the
         * xattr), -EOPNOTSUPP if it does not know anything about the xattr or
         * any other error code in case of an error.
         */
        rc = call_int_hook(inode_copy_up_xattr, name);
        if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
                return rc;

        return LSM_RET_DEFAULT(inode_copy_up_xattr);
}
EXPORT_SYMBOL(security_inode_copy_up_xattr);

/**
 * security_kernfs_init_security() - Init LSM context for a kernfs node
 * @kn_dir: parent kernfs node
 * @kn: the kernfs node to initialize
 *
 * Initialize the security context of a newly created kernfs node based on its
 * own and its parent's attributes.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_kernfs_init_security(struct kernfs_node *kn_dir,
                                  struct kernfs_node *kn)
{
        return call_int_hook(kernfs_init_security, kn_dir, kn);
}

/**
 * security_file_permission() - Check file permissions
 * @file: file
 * @mask: requested permissions
 *
 * Check file permissions before accessing an open file.  This hook is called
 * by various operations that read or write files.  A security module can use
 * this hook to perform additional checking on these operations, e.g. to
 * revalidate permissions on use to support privilege bracketing or policy
 * changes.  Notice that this hook is used when the actual read/write
 * operations are performed, whereas the inode_security_ops hook is called when
 * a file is opened (as well as many other operations).  Although this hook can
 * be used to revalidate permissions for various system call operations that
 * read or write files, it does not address the revalidation of permissions for
 * memory-mapped files.  Security modules must handle this separately if they
 * need such revalidation.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_permission(struct file *file, int mask)
{
        return call_int_hook(file_permission, file, mask);
}

/**
 * security_file_alloc() - Allocate and init a file's LSM blob
 * @file: the file
 *
 * Allocate and attach a security structure to the file->f_security field.  The
 * security field is initialized to NULL when the structure is first created.
 *
 * Return: Return 0 if the hook is successful and permission is granted.
 */
int security_file_alloc(struct file *file)
{
        int rc = lsm_file_alloc(file);

        if (rc)
                return rc;
        rc = call_int_hook(file_alloc_security, file);
        if (unlikely(rc))
                security_file_free(file);
        return rc;
}

/**
 * security_file_release() - Perform actions before releasing the file ref
 * @file: the file
 *
 * Perform actions before releasing the last reference to a file.
 */
void security_file_release(struct file *file)
{
        call_void_hook(file_release, file);
}

/**
 * security_file_free() - Free a file's LSM blob
 * @file: the file
 *
 * Deallocate and free any security structures stored in file->f_security.
 */
void security_file_free(struct file *file)
{
        void *blob;

        call_void_hook(file_free_security, file);

        blob = file->f_security;
        if (blob) {
                file->f_security = NULL;
                kmem_cache_free(lsm_file_cache, blob);
        }
}

/**
 * security_file_ioctl() - Check if an ioctl is allowed
 * @file: associated file
 * @cmd: ioctl cmd
 * @arg: ioctl arguments
 *
 * Check permission for an ioctl operation on @file.  Note that @arg sometimes
 * represents a user space pointer; in other cases, it may be a simple integer
 * value.  When @arg represents a user space pointer, it should never be used
 * by the security module.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        return call_int_hook(file_ioctl, file, cmd, arg);
}
EXPORT_SYMBOL_GPL(security_file_ioctl);

/**
 * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode
 * @file: associated file
 * @cmd: ioctl cmd
 * @arg: ioctl arguments
 *
 * Compat version of security_file_ioctl() that correctly handles 32-bit
 * processes running on 64-bit kernels.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_ioctl_compat(struct file *file, unsigned int cmd,
                               unsigned long arg)
{
        return call_int_hook(file_ioctl_compat, file, cmd, arg);
}
EXPORT_SYMBOL_GPL(security_file_ioctl_compat);

static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
{
        /*
         * Does we have PROT_READ and does the application expect
         * it to imply PROT_EXEC?  If not, nothing to talk about...
         */
        if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ)
                return prot;
        if (!(current->personality & READ_IMPLIES_EXEC))
                return prot;
        /*
         * if that's an anonymous mapping, let it.
         */
        if (!file)
                return prot | PROT_EXEC;
        /*
         * ditto if it's not on noexec mount, except that on !MMU we need
         * NOMMU_MAP_EXEC (== VM_MAYEXEC) in this case
         */
        if (!path_noexec(&file->f_path)) {
#ifndef CONFIG_MMU
                if (file->f_op->mmap_capabilities) {
                        unsigned caps = file->f_op->mmap_capabilities(file);
                        if (!(caps & NOMMU_MAP_EXEC))
                                return prot;
                }
#endif
                return prot | PROT_EXEC;
        }
        /* anything on noexec mount won't get PROT_EXEC */
        return prot;
}

/**
 * security_mmap_file() - Check if mmap'ing a file is allowed
 * @file: file
 * @prot: protection applied by the kernel
 * @flags: flags
 *
 * Check permissions for a mmap operation.  The @file may be NULL, e.g. if
 * mapping anonymous memory.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_mmap_file(struct file *file, unsigned long prot,
                       unsigned long flags)
{
        return call_int_hook(mmap_file, file, prot, mmap_prot(file, prot),
                             flags);
}

/**
 * security_mmap_addr() - Check if mmap'ing an address is allowed
 * @addr: address
 *
 * Check permissions for a mmap operation at @addr.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_mmap_addr(unsigned long addr)
{
        return call_int_hook(mmap_addr, addr);
}

/**
 * security_file_mprotect() - Check if changing memory protections is allowed
 * @vma: memory region
 * @reqprot: application requested protection
 * @prot: protection applied by the kernel
 *
 * Check permissions before changing memory access permissions.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
                           unsigned long prot)
{
        return call_int_hook(file_mprotect, vma, reqprot, prot);
}

/**
 * security_file_lock() - Check if a file lock is allowed
 * @file: file
 * @cmd: lock operation (e.g. F_RDLCK, F_WRLCK)
 *
 * Check permission before performing file locking operations.  Note the hook
 * mediates both flock and fcntl style locks.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_lock(struct file *file, unsigned int cmd)
{
        return call_int_hook(file_lock, file, cmd);
}

/**
 * security_file_fcntl() - Check if fcntl() op is allowed
 * @file: file
 * @cmd: fcntl command
 * @arg: command argument
 *
 * Check permission before allowing the file operation specified by @cmd from
 * being performed on the file @file.  Note that @arg sometimes represents a
 * user space pointer; in other cases, it may be a simple integer value.  When
 * @arg represents a user space pointer, it should never be used by the
 * security module.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
{
        return call_int_hook(file_fcntl, file, cmd, arg);
}

/**
 * security_file_set_fowner() - Set the file owner info in the LSM blob
 * @file: the file
 *
 * Save owner security information (typically from current->security) in
 * file->f_security for later use by the send_sigiotask hook.
 *
 * Return: Returns 0 on success.
 */
void security_file_set_fowner(struct file *file)
{
        call_void_hook(file_set_fowner, file);
}

/**
 * security_file_send_sigiotask() - Check if sending SIGIO/SIGURG is allowed
 * @tsk: target task
 * @fown: signal sender
 * @sig: signal to be sent, SIGIO is sent if 0
 *
 * Check permission for the file owner @fown to send SIGIO or SIGURG to the
 * process @tsk.  Note that this hook is sometimes called from interrupt.  Note
 * that the fown_struct, @fown, is never outside the context of a struct file,
 * so the file structure (and associated security information) can always be
 * obtained: container_of(fown, struct file, f_owner).
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_send_sigiotask(struct task_struct *tsk,
                                 struct fown_struct *fown, int sig)
{
        return call_int_hook(file_send_sigiotask, tsk, fown, sig);
}

/**
 * security_file_receive() - Check if receiving a file via IPC is allowed
 * @file: file being received
 *
 * This hook allows security modules to control the ability of a process to
 * receive an open file descriptor via socket IPC.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_receive(struct file *file)
{
        return call_int_hook(file_receive, file);
}

/**
 * security_file_open() - Save open() time state for late use by the LSM
 * @file:
 *
 * Save open-time permission checking state for later use upon file_permission,
 * and recheck access if anything has changed since inode_permission.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_open(struct file *file)
{
        int ret;

        ret = call_int_hook(file_open, file);
        if (ret)
                return ret;

        return fsnotify_open_perm(file);
}

/**
 * security_file_post_open() - Evaluate a file after it has been opened
 * @file: the file
 * @mask: access mask
 *
 * Evaluate an opened file and the access mask requested with open(). The hook
 * is useful for LSMs that require the file content to be available in order to
 * make decisions.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_post_open(struct file *file, int mask)
{
        return call_int_hook(file_post_open, file, mask);
}
EXPORT_SYMBOL_GPL(security_file_post_open);

/**
 * security_file_truncate() - Check if truncating a file is allowed
 * @file: file
 *
 * Check permission before truncating a file, i.e. using ftruncate.  Note that
 * truncation permission may also be checked based on the path, using the
 * @path_truncate hook.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_file_truncate(struct file *file)
{
        return call_int_hook(file_truncate, file);
}

/**
 * security_task_alloc() - Allocate a task's LSM blob
 * @task: the task
 * @clone_flags: flags indicating what is being shared
 *
 * Handle allocation of task-related resources.
 *
 * Return: Returns a zero on success, negative values on failure.
 */
int security_task_alloc(struct task_struct *task, unsigned long clone_flags)
{
        int rc = lsm_task_alloc(task);

        if (rc)
                return rc;
        rc = call_int_hook(task_alloc, task, clone_flags);
        if (unlikely(rc))
                security_task_free(task);
        return rc;
}

/**
 * security_task_free() - Free a task's LSM blob and related resources
 * @task: task
 *
 * Handle release of task-related resources.  Note that this can be called from
 * interrupt context.
 */
void security_task_free(struct task_struct *task)
{
        call_void_hook(task_free, task);

        kfree(task->security);
        task->security = NULL;
}

/**
 * security_cred_alloc_blank() - Allocate the min memory to allow cred_transfer
 * @cred: credentials
 * @gfp: gfp flags
 *
 * Only allocate sufficient memory and attach to @cred such that
 * cred_transfer() will not get ENOMEM.
 *
 * Return: Returns 0 on success, negative values on failure.
 */
int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
{
        int rc = lsm_cred_alloc(cred, gfp);

        if (rc)
                return rc;

        rc = call_int_hook(cred_alloc_blank, cred, gfp);
        if (unlikely(rc))
                security_cred_free(cred);
        return rc;
}

/**
 * security_cred_free() - Free the cred's LSM blob and associated resources
 * @cred: credentials
 *
 * Deallocate and clear the cred->security field in a set of credentials.
 */
void security_cred_free(struct cred *cred)
{
        /*
         * There is a failure case in prepare_creds() that
         * may result in a call here with ->security being NULL.
         */
        if (unlikely(cred->security == NULL))
                return;

        call_void_hook(cred_free, cred);

        kfree(cred->security);
        cred->security = NULL;
}

/**
 * security_prepare_creds() - Prepare a new set of credentials
 * @new: new credentials
 * @old: original credentials
 * @gfp: gfp flags
 *
 * Prepare a new set of credentials by copying the data from the old set.
 *
 * Return: Returns 0 on success, negative values on failure.
 */
int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
{
        int rc = lsm_cred_alloc(new, gfp);

        if (rc)
                return rc;

        rc = call_int_hook(cred_prepare, new, old, gfp);
        if (unlikely(rc))
                security_cred_free(new);
        return rc;
}

/**
 * security_transfer_creds() - Transfer creds
 * @new: target credentials
 * @old: original credentials
 *
 * Transfer data from original creds to new creds.
 */
void security_transfer_creds(struct cred *new, const struct cred *old)
{
        call_void_hook(cred_transfer, new, old);
}

/**
 * security_cred_getsecid() - Get the secid from a set of credentials
 * @c: credentials
 * @secid: secid value
 *
 * Retrieve the security identifier of the cred structure @c.  In case of
 * failure, @secid will be set to zero.
 */
void security_cred_getsecid(const struct cred *c, u32 *secid)
{
        *secid = 0;
        call_void_hook(cred_getsecid, c, secid);
}
EXPORT_SYMBOL(security_cred_getsecid);

/**
 * security_kernel_act_as() - Set the kernel credentials to act as secid
 * @new: credentials
 * @secid: secid
 *
 * Set the credentials for a kernel service to act as (subjective context).
 * The current task must be the one that nominated @secid.
 *
 * Return: Returns 0 if successful.
 */
int security_kernel_act_as(struct cred *new, u32 secid)
{
        return call_int_hook(kernel_act_as, new, secid);
}

/**
 * security_kernel_create_files_as() - Set file creation context using an inode
 * @new: target credentials
 * @inode: reference inode
 *
 * Set the file creation context in a set of credentials to be the same as the
 * objective context of the specified inode.  The current task must be the one
 * that nominated @inode.
 *
 * Return: Returns 0 if successful.
 */
int security_kernel_create_files_as(struct cred *new, struct inode *inode)
{
        return call_int_hook(kernel_create_files_as, new, inode);
}

/**
 * security_kernel_module_request() - Check if loading a module is allowed
 * @kmod_name: module name
 *
 * Ability to trigger the kernel to automatically upcall to userspace for
 * userspace to load a kernel module with the given name.
 *
 * Return: Returns 0 if successful.
 */
int security_kernel_module_request(char *kmod_name)
{
        return call_int_hook(kernel_module_request, kmod_name);
}

/**
 * security_kernel_read_file() - Read a file specified by userspace
 * @file: file
 * @id: file identifier
 * @contents: trust if security_kernel_post_read_file() will be called
 *
 * Read a file specified by userspace.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_kernel_read_file(struct file *file, enum kernel_read_file_id id,
                              bool contents)
{
        return call_int_hook(kernel_read_file, file, id, contents);
}
EXPORT_SYMBOL_GPL(security_kernel_read_file);

/**
 * security_kernel_post_read_file() - Read a file specified by userspace
 * @file: file
 * @buf: file contents
 * @size: size of file contents
 * @id: file identifier
 *
 * Read a file specified by userspace.  This must be paired with a prior call
 * to security_kernel_read_file() call that indicated this hook would also be
 * called, see security_kernel_read_file() for more information.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
                                   enum kernel_read_file_id id)
{
        return call_int_hook(kernel_post_read_file, file, buf, size, id);
}
EXPORT_SYMBOL_GPL(security_kernel_post_read_file);

/**
 * security_kernel_load_data() - Load data provided by userspace
 * @id: data identifier
 * @contents: true if security_kernel_post_load_data() will be called
 *
 * Load data provided by userspace.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_kernel_load_data(enum kernel_load_data_id id, bool contents)
{
        return call_int_hook(kernel_load_data, id, contents);
}
EXPORT_SYMBOL_GPL(security_kernel_load_data);

/**
 * security_kernel_post_load_data() - Load userspace data from a non-file source
 * @buf: data
 * @size: size of data
 * @id: data identifier
 * @description: text description of data, specific to the id value
 *
 * Load data provided by a non-file source (usually userspace buffer).  This
 * must be paired with a prior security_kernel_load_data() call that indicated
 * this hook would also be called, see security_kernel_load_data() for more
 * information.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_kernel_post_load_data(char *buf, loff_t size,
                                   enum kernel_load_data_id id,
                                   char *description)
{
        return call_int_hook(kernel_post_load_data, buf, size, id, description);
}
EXPORT_SYMBOL_GPL(security_kernel_post_load_data);

/**
 * security_task_fix_setuid() - Update LSM with new user id attributes
 * @new: updated credentials
 * @old: credentials being replaced
 * @flags: LSM_SETID_* flag values
 *
 * Update the module's state after setting one or more of the user identity
 * attributes of the current process.  The @flags parameter indicates which of
 * the set*uid system calls invoked this hook.  If @new is the set of
 * credentials that will be installed.  Modifications should be made to this
 * rather than to @current->cred.
 *
 * Return: Returns 0 on success.
 */
int security_task_fix_setuid(struct cred *new, const struct cred *old,
                             int flags)
{
        return call_int_hook(task_fix_setuid, new, old, flags);
}

/**
 * security_task_fix_setgid() - Update LSM with new group id attributes
 * @new: updated credentials
 * @old: credentials being replaced
 * @flags: LSM_SETID_* flag value
 *
 * Update the module's state after setting one or more of the group identity
 * attributes of the current process.  The @flags parameter indicates which of
 * the set*gid system calls invoked this hook.  @new is the set of credentials
 * that will be installed.  Modifications should be made to this rather than to
 * @current->cred.
 *
 * Return: Returns 0 on success.
 */
int security_task_fix_setgid(struct cred *new, const struct cred *old,
                             int flags)
{
        return call_int_hook(task_fix_setgid, new, old, flags);
}

/**
 * security_task_fix_setgroups() - Update LSM with new supplementary groups
 * @new: updated credentials
 * @old: credentials being replaced
 *
 * Update the module's state after setting the supplementary group identity
 * attributes of the current process.  @new is the set of credentials that will
 * be installed.  Modifications should be made to this rather than to
 * @current->cred.
 *
 * Return: Returns 0 on success.
 */
int security_task_fix_setgroups(struct cred *new, const struct cred *old)
{
        return call_int_hook(task_fix_setgroups, new, old);
}

/**
 * security_task_setpgid() - Check if setting the pgid is allowed
 * @p: task being modified
 * @pgid: new pgid
 *
 * Check permission before setting the process group identifier of the process
 * @p to @pgid.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_setpgid(struct task_struct *p, pid_t pgid)
{
        return call_int_hook(task_setpgid, p, pgid);
}

/**
 * security_task_getpgid() - Check if getting the pgid is allowed
 * @p: task
 *
 * Check permission before getting the process group identifier of the process
 * @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_getpgid(struct task_struct *p)
{
        return call_int_hook(task_getpgid, p);
}

/**
 * security_task_getsid() - Check if getting the session id is allowed
 * @p: task
 *
 * Check permission before getting the session identifier of the process @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_getsid(struct task_struct *p)
{
        return call_int_hook(task_getsid, p);
}

/**
 * security_current_getsecid_subj() - Get the current task's subjective secid
 * @secid: secid value
 *
 * Retrieve the subjective security identifier of the current task and return
 * it in @secid.  In case of failure, @secid will be set to zero.
 */
void security_current_getsecid_subj(u32 *secid)
{
        *secid = 0;
        call_void_hook(current_getsecid_subj, secid);
}
EXPORT_SYMBOL(security_current_getsecid_subj);

/**
 * security_task_getsecid_obj() - Get a task's objective secid
 * @p: target task
 * @secid: secid value
 *
 * Retrieve the objective security identifier of the task_struct in @p and
 * return it in @secid. In case of failure, @secid will be set to zero.
 */
void security_task_getsecid_obj(struct task_struct *p, u32 *secid)
{
        *secid = 0;
        call_void_hook(task_getsecid_obj, p, secid);
}
EXPORT_SYMBOL(security_task_getsecid_obj);

/**
 * security_task_setnice() - Check if setting a task's nice value is allowed
 * @p: target task
 * @nice: nice value
 *
 * Check permission before setting the nice value of @p to @nice.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_setnice(struct task_struct *p, int nice)
{
        return call_int_hook(task_setnice, p, nice);
}

/**
 * security_task_setioprio() - Check if setting a task's ioprio is allowed
 * @p: target task
 * @ioprio: ioprio value
 *
 * Check permission before setting the ioprio value of @p to @ioprio.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_setioprio(struct task_struct *p, int ioprio)
{
        return call_int_hook(task_setioprio, p, ioprio);
}

/**
 * security_task_getioprio() - Check if getting a task's ioprio is allowed
 * @p: task
 *
 * Check permission before getting the ioprio value of @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_getioprio(struct task_struct *p)
{
        return call_int_hook(task_getioprio, p);
}

/**
 * security_task_prlimit() - Check if get/setting resources limits is allowed
 * @cred: current task credentials
 * @tcred: target task credentials
 * @flags: LSM_PRLIMIT_* flag bits indicating a get/set/both
 *
 * Check permission before getting and/or setting the resource limits of
 * another task.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_prlimit(const struct cred *cred, const struct cred *tcred,
                          unsigned int flags)
{
        return call_int_hook(task_prlimit, cred, tcred, flags);
}

/**
 * security_task_setrlimit() - Check if setting a new rlimit value is allowed
 * @p: target task's group leader
 * @resource: resource whose limit is being set
 * @new_rlim: new resource limit
 *
 * Check permission before setting the resource limits of process @p for
 * @resource to @new_rlim.  The old resource limit values can be examined by
 * dereferencing (p->signal->rlim + resource).
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_setrlimit(struct task_struct *p, unsigned int resource,
                            struct rlimit *new_rlim)
{
        return call_int_hook(task_setrlimit, p, resource, new_rlim);
}

/**
 * security_task_setscheduler() - Check if setting sched policy/param is allowed
 * @p: target task
 *
 * Check permission before setting scheduling policy and/or parameters of
 * process @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_setscheduler(struct task_struct *p)
{
        return call_int_hook(task_setscheduler, p);
}

/**
 * security_task_getscheduler() - Check if getting scheduling info is allowed
 * @p: target task
 *
 * Check permission before obtaining scheduling information for process @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_getscheduler(struct task_struct *p)
{
        return call_int_hook(task_getscheduler, p);
}

/**
 * security_task_movememory() - Check if moving memory is allowed
 * @p: task
 *
 * Check permission before moving memory owned by process @p.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_movememory(struct task_struct *p)
{
        return call_int_hook(task_movememory, p);
}

/**
 * security_task_kill() - Check if sending a signal is allowed
 * @p: target process
 * @info: signal information
 * @sig: signal value
 * @cred: credentials of the signal sender, NULL if @current
 *
 * Check permission before sending signal @sig to @p.  @info can be NULL, the
 * constant 1, or a pointer to a kernel_siginfo structure.  If @info is 1 or
 * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming from
 * the kernel and should typically be permitted.  SIGIO signals are handled
 * separately by the send_sigiotask hook in file_security_ops.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
                       int sig, const struct cred *cred)
{
        return call_int_hook(task_kill, p, info, sig, cred);
}

/**
 * security_task_prctl() - Check if a prctl op is allowed
 * @option: operation
 * @arg2: argument
 * @arg3: argument
 * @arg4: argument
 * @arg5: argument
 *
 * Check permission before performing a process control operation on the
 * current process.
 *
 * Return: Return -ENOSYS if no-one wanted to handle this op, any other value
 *         to cause prctl() to return immediately with that value.
 */
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
                        unsigned long arg4, unsigned long arg5)
{
        int thisrc;
        int rc = LSM_RET_DEFAULT(task_prctl);
        struct security_hook_list *hp;

        hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
                thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
                if (thisrc != LSM_RET_DEFAULT(task_prctl)) {
                        rc = thisrc;
                        if (thisrc != 0)
                                break;
                }
        }
        return rc;
}

/**
 * security_task_to_inode() - Set the security attributes of a task's inode
 * @p: task
 * @inode: inode
 *
 * Set the security attributes for an inode based on an associated task's
 * security attributes, e.g. for /proc/pid inodes.
 */
void security_task_to_inode(struct task_struct *p, struct inode *inode)
{
        call_void_hook(task_to_inode, p, inode);
}

/**
 * security_create_user_ns() - Check if creating a new userns is allowed
 * @cred: prepared creds
 *
 * Check permission prior to creating a new user namespace.
 *
 * Return: Returns 0 if successful, otherwise < 0 error code.
 */
int security_create_user_ns(const struct cred *cred)
{
        return call_int_hook(userns_create, cred);
}

/**
 * security_ipc_permission() - Check if sysv ipc access is allowed
 * @ipcp: ipc permission structure
 * @flag: requested permissions
 *
 * Check permissions for access to IPC.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
{
        return call_int_hook(ipc_permission, ipcp, flag);
}

/**
 * security_ipc_getsecid() - Get the sysv ipc object's secid
 * @ipcp: ipc permission structure
 * @secid: secid pointer
 *
 * Get the secid associated with the ipc object.  In case of failure, @secid
 * will be set to zero.
 */
void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
{
        *secid = 0;
        call_void_hook(ipc_getsecid, ipcp, secid);
}

/**
 * security_msg_msg_alloc() - Allocate a sysv ipc message LSM blob
 * @msg: message structure
 *
 * Allocate and attach a security structure to the msg->security field.  The
 * security field is initialized to NULL when the structure is first created.
 *
 * Return: Return 0 if operation was successful and permission is granted.
 */
int security_msg_msg_alloc(struct msg_msg *msg)
{
        int rc = lsm_msg_msg_alloc(msg);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(msg_msg_alloc_security, msg);
        if (unlikely(rc))
                security_msg_msg_free(msg);
        return rc;
}

/**
 * security_msg_msg_free() - Free a sysv ipc message LSM blob
 * @msg: message structure
 *
 * Deallocate the security structure for this message.
 */
void security_msg_msg_free(struct msg_msg *msg)
{
        call_void_hook(msg_msg_free_security, msg);
        kfree(msg->security);
        msg->security = NULL;
}

/**
 * security_msg_queue_alloc() - Allocate a sysv ipc msg queue LSM blob
 * @msq: sysv ipc permission structure
 *
 * Allocate and attach a security structure to @msg. The security field is
 * initialized to NULL when the structure is first created.
 *
 * Return: Returns 0 if operation was successful and permission is granted.
 */
int security_msg_queue_alloc(struct kern_ipc_perm *msq)
{
        int rc = lsm_ipc_alloc(msq);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(msg_queue_alloc_security, msq);
        if (unlikely(rc))
                security_msg_queue_free(msq);
        return rc;
}

/**
 * security_msg_queue_free() - Free a sysv ipc msg queue LSM blob
 * @msq: sysv ipc permission structure
 *
 * Deallocate security field @perm->security for the message queue.
 */
void security_msg_queue_free(struct kern_ipc_perm *msq)
{
        call_void_hook(msg_queue_free_security, msq);
        kfree(msq->security);
        msq->security = NULL;
}

/**
 * security_msg_queue_associate() - Check if a msg queue operation is allowed
 * @msq: sysv ipc permission structure
 * @msqflg: operation flags
 *
 * Check permission when a message queue is requested through the msgget system
 * call. This hook is only called when returning the message queue identifier
 * for an existing message queue, not when a new message queue is created.
 *
 * Return: Return 0 if permission is granted.
 */
int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg)
{
        return call_int_hook(msg_queue_associate, msq, msqflg);
}

/**
 * security_msg_queue_msgctl() - Check if a msg queue operation is allowed
 * @msq: sysv ipc permission structure
 * @cmd: operation
 *
 * Check permission when a message control operation specified by @cmd is to be
 * performed on the message queue with permissions.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
{
        return call_int_hook(msg_queue_msgctl, msq, cmd);
}

/**
 * security_msg_queue_msgsnd() - Check if sending a sysv ipc message is allowed
 * @msq: sysv ipc permission structure
 * @msg: message
 * @msqflg: operation flags
 *
 * Check permission before a message, @msg, is enqueued on the message queue
 * with permissions specified in @msq.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_msg_queue_msgsnd(struct kern_ipc_perm *msq,
                              struct msg_msg *msg, int msqflg)
{
        return call_int_hook(msg_queue_msgsnd, msq, msg, msqflg);
}

/**
 * security_msg_queue_msgrcv() - Check if receiving a sysv ipc msg is allowed
 * @msq: sysv ipc permission structure
 * @msg: message
 * @target: target task
 * @type: type of message requested
 * @mode: operation flags
 *
 * Check permission before a message, @msg, is removed from the message        queue.
 * The @target task structure contains a pointer to the process that will be
 * receiving the message (not equal to the current process when inline receives
 * are being performed).
 *
 * Return: Returns 0 if permission is granted.
 */
int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg,
                              struct task_struct *target, long type, int mode)
{
        return call_int_hook(msg_queue_msgrcv, msq, msg, target, type, mode);
}

/**
 * security_shm_alloc() - Allocate a sysv shm LSM blob
 * @shp: sysv ipc permission structure
 *
 * Allocate and attach a security structure to the @shp security field.  The
 * security field is initialized to NULL when the structure is first created.
 *
 * Return: Returns 0 if operation was successful and permission is granted.
 */
int security_shm_alloc(struct kern_ipc_perm *shp)
{
        int rc = lsm_ipc_alloc(shp);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(shm_alloc_security, shp);
        if (unlikely(rc))
                security_shm_free(shp);
        return rc;
}

/**
 * security_shm_free() - Free a sysv shm LSM blob
 * @shp: sysv ipc permission structure
 *
 * Deallocate the security structure @perm->security for the memory segment.
 */
void security_shm_free(struct kern_ipc_perm *shp)
{
        call_void_hook(shm_free_security, shp);
        kfree(shp->security);
        shp->security = NULL;
}

/**
 * security_shm_associate() - Check if a sysv shm operation is allowed
 * @shp: sysv ipc permission structure
 * @shmflg: operation flags
 *
 * Check permission when a shared memory region is requested through the shmget
 * system call. This hook is only called when returning the shared memory
 * region identifier for an existing region, not when a new shared memory
 * region is created.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_shm_associate(struct kern_ipc_perm *shp, int shmflg)
{
        return call_int_hook(shm_associate, shp, shmflg);
}

/**
 * security_shm_shmctl() - Check if a sysv shm operation is allowed
 * @shp: sysv ipc permission structure
 * @cmd: operation
 *
 * Check permission when a shared memory control operation specified by @cmd is
 * to be performed on the shared memory region with permissions in @shp.
 *
 * Return: Return 0 if permission is granted.
 */
int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
{
        return call_int_hook(shm_shmctl, shp, cmd);
}

/**
 * security_shm_shmat() - Check if a sysv shm attach operation is allowed
 * @shp: sysv ipc permission structure
 * @shmaddr: address of memory region to attach
 * @shmflg: operation flags
 *
 * Check permissions prior to allowing the shmat system call to attach the
 * shared memory segment with permissions @shp to the data segment of the
 * calling process. The attaching address is specified by @shmaddr.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_shm_shmat(struct kern_ipc_perm *shp,
                       char __user *shmaddr, int shmflg)
{
        return call_int_hook(shm_shmat, shp, shmaddr, shmflg);
}

/**
 * security_sem_alloc() - Allocate a sysv semaphore LSM blob
 * @sma: sysv ipc permission structure
 *
 * Allocate and attach a security structure to the @sma security field. The
 * security field is initialized to NULL when the structure is first created.
 *
 * Return: Returns 0 if operation was successful and permission is granted.
 */
int security_sem_alloc(struct kern_ipc_perm *sma)
{
        int rc = lsm_ipc_alloc(sma);

        if (unlikely(rc))
                return rc;
        rc = call_int_hook(sem_alloc_security, sma);
        if (unlikely(rc))
                security_sem_free(sma);
        return rc;
}

/**
 * security_sem_free() - Free a sysv semaphore LSM blob
 * @sma: sysv ipc permission structure
 *
 * Deallocate security structure @sma->security for the semaphore.
 */
void security_sem_free(struct kern_ipc_perm *sma)
{
        call_void_hook(sem_free_security, sma);
        kfree(sma->security);
        sma->security = NULL;
}

/**
 * security_sem_associate() - Check if a sysv semaphore operation is allowed
 * @sma: sysv ipc permission structure
 * @semflg: operation flags
 *
 * Check permission when a semaphore is requested through the semget system
 * call. This hook is only called when returning the semaphore identifier for
 * an existing semaphore, not when a new one must be created.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sem_associate(struct kern_ipc_perm *sma, int semflg)
{
        return call_int_hook(sem_associate, sma, semflg);
}

/**
 * security_sem_semctl() - Check if a sysv semaphore operation is allowed
 * @sma: sysv ipc permission structure
 * @cmd: operation
 *
 * Check permission when a semaphore operation specified by @cmd is to be
 * performed on the semaphore.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sem_semctl(struct kern_ipc_perm *sma, int cmd)
{
        return call_int_hook(sem_semctl, sma, cmd);
}

/**
 * security_sem_semop() - Check if a sysv semaphore operation is allowed
 * @sma: sysv ipc permission structure
 * @sops: operations to perform
 * @nsops: number of operations
 * @alter: flag indicating changes will be made
 *
 * Check permissions before performing operations on members of the semaphore
 * set. If the @alter flag is nonzero, the semaphore set may be modified.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops,
                       unsigned nsops, int alter)
{
        return call_int_hook(sem_semop, sma, sops, nsops, alter);
}

/**
 * security_d_instantiate() - Populate an inode's LSM state based on a dentry
 * @dentry: dentry
 * @inode: inode
 *
 * Fill in @inode security information for a @dentry if allowed.
 */
void security_d_instantiate(struct dentry *dentry, struct inode *inode)
{
        if (unlikely(inode && IS_PRIVATE(inode)))
                return;
        call_void_hook(d_instantiate, dentry, inode);
}
EXPORT_SYMBOL(security_d_instantiate);

/*
 * Please keep this in sync with it's counterpart in security/lsm_syscalls.c
 */

/**
 * security_getselfattr - Read an LSM attribute of the current process.
 * @attr: which attribute to return
 * @uctx: the user-space destination for the information, or NULL
 * @size: pointer to the size of space available to receive the data
 * @flags: special handling options. LSM_FLAG_SINGLE indicates that only
 * attributes associated with the LSM identified in the passed @ctx be
 * reported.
 *
 * A NULL value for @uctx can be used to get both the number of attributes
 * and the size of the data.
 *
 * Returns the number of attributes found on success, negative value
 * on error. @size is reset to the total size of the data.
 * If @size is insufficient to contain the data -E2BIG is returned.
 */
int security_getselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
                         u32 __user *size, u32 flags)
{
        struct security_hook_list *hp;
        struct lsm_ctx lctx = { .id = LSM_ID_UNDEF, };
        u8 __user *base = (u8 __user *)uctx;
        u32 entrysize;
        u32 total = 0;
        u32 left;
        bool toobig = false;
        bool single = false;
        int count = 0;
        int rc;

        if (attr == LSM_ATTR_UNDEF)
                return -EINVAL;
        if (size == NULL)
                return -EINVAL;
        if (get_user(left, size))
                return -EFAULT;

        if (flags) {
                /*
                 * Only flag supported is LSM_FLAG_SINGLE
                 */
                if (flags != LSM_FLAG_SINGLE || !uctx)
                        return -EINVAL;
                if (copy_from_user(&lctx, uctx, sizeof(lctx)))
                        return -EFAULT;
                /*
                 * If the LSM ID isn't specified it is an error.
                 */
                if (lctx.id == LSM_ID_UNDEF)
                        return -EINVAL;
                single = true;
        }

        /*
         * In the usual case gather all the data from the LSMs.
         * In the single case only get the data from the LSM specified.
         */
        hlist_for_each_entry(hp, &security_hook_heads.getselfattr, list) {
                if (single && lctx.id != hp->lsmid->id)
                        continue;
                entrysize = left;
                if (base)
                        uctx = (struct lsm_ctx __user *)(base + total);
                rc = hp->hook.getselfattr(attr, uctx, &entrysize, flags);
                if (rc == -EOPNOTSUPP) {
                        rc = 0;
                        continue;
                }
                if (rc == -E2BIG) {
                        rc = 0;
                        left = 0;
                        toobig = true;
                } else if (rc < 0)
                        return rc;
                else
                        left -= entrysize;

                total += entrysize;
                count += rc;
                if (single)
                        break;
        }
        if (put_user(total, size))
                return -EFAULT;
        if (toobig)
                return -E2BIG;
        if (count == 0)
                return LSM_RET_DEFAULT(getselfattr);
        return count;
}

/*
 * Please keep this in sync with it's counterpart in security/lsm_syscalls.c
 */

/**
 * security_setselfattr - Set an LSM attribute on the current process.
 * @attr: which attribute to set
 * @uctx: the user-space source for the information
 * @size: the size of the data
 * @flags: reserved for future use, must be 0
 *
 * Set an LSM attribute for the current process. The LSM, attribute
 * and new value are included in @uctx.
 *
 * Returns 0 on success, -EINVAL if the input is inconsistent, -EFAULT
 * if the user buffer is inaccessible, E2BIG if size is too big, or an
 * LSM specific failure.
 */
int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
                         u32 size, u32 flags)
{
        struct security_hook_list *hp;
        struct lsm_ctx *lctx;
        int rc = LSM_RET_DEFAULT(setselfattr);
        u64 required_len;

        if (flags)
                return -EINVAL;
        if (size < sizeof(*lctx))
                return -EINVAL;
        if (size > PAGE_SIZE)
                return -E2BIG;

        lctx = memdup_user(uctx, size);
        if (IS_ERR(lctx))
                return PTR_ERR(lctx);

        if (size < lctx->len ||
            check_add_overflow(sizeof(*lctx), lctx->ctx_len, &required_len) ||
            lctx->len < required_len) {
                rc = -EINVAL;
                goto free_out;
        }

        hlist_for_each_entry(hp, &security_hook_heads.setselfattr, list)
                if ((hp->lsmid->id) == lctx->id) {
                        rc = hp->hook.setselfattr(attr, lctx, size, flags);
                        break;
                }

free_out:
        kfree(lctx);
        return rc;
}

/**
 * security_getprocattr() - Read an attribute for a task
 * @p: the task
 * @lsmid: LSM identification
 * @name: attribute name
 * @value: attribute value
 *
 * Read attribute @name for task @p and store it into @value if allowed.
 *
 * Return: Returns the length of @value on success, a negative value otherwise.
 */
int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
                         char **value)
{
        struct security_hook_list *hp;

        hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) {
                if (lsmid != 0 && lsmid != hp->lsmid->id)
                        continue;
                return hp->hook.getprocattr(p, name, value);
        }
        return LSM_RET_DEFAULT(getprocattr);
}

/**
 * security_setprocattr() - Set an attribute for a task
 * @lsmid: LSM identification
 * @name: attribute name
 * @value: attribute value
 * @size: attribute value size
 *
 * Write (set) the current task's attribute @name to @value, size @size if
 * allowed.
 *
 * Return: Returns bytes written on success, a negative value otherwise.
 */
int security_setprocattr(int lsmid, const char *name, void *value, size_t size)
{
        struct security_hook_list *hp;

        hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) {
                if (lsmid != 0 && lsmid != hp->lsmid->id)
                        continue;
                return hp->hook.setprocattr(name, value, size);
        }
        return LSM_RET_DEFAULT(setprocattr);
}

/**
 * security_netlink_send() - Save info and check if netlink sending is allowed
 * @sk: sending socket
 * @skb: netlink message
 *
 * Save security information for a netlink message so that permission checking
 * can be performed when the message is processed.  The security information
 * can be saved using the eff_cap field of the netlink_skb_parms structure.
 * Also may be used to provide fine grained control over message transmission.
 *
 * Return: Returns 0 if the information was successfully saved and message is
 *         allowed to be transmitted.
 */
int security_netlink_send(struct sock *sk, struct sk_buff *skb)
{
        return call_int_hook(netlink_send, sk, skb);
}

/**
 * security_ismaclabel() - Check if the named attribute is a MAC label
 * @name: full extended attribute name
 *
 * Check if the extended attribute specified by @name represents a MAC label.
 *
 * Return: Returns 1 if name is a MAC attribute otherwise returns 0.
 */
int security_ismaclabel(const char *name)
{
        return call_int_hook(ismaclabel, name);
}
EXPORT_SYMBOL(security_ismaclabel);

/**
 * security_secid_to_secctx() - Convert a secid to a secctx
 * @secid: secid
 * @secdata: secctx
 * @seclen: secctx length
 *
 * Convert secid to security context.  If @secdata is NULL the length of the
 * result will be returned in @seclen, but no @secdata will be returned.  This
 * does mean that the length could change between calls to check the length and
 * the next call which actually allocates and returns the @secdata.
 *
 * Return: Return 0 on success, error on failure.
 */
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
{
        return call_int_hook(secid_to_secctx, secid, secdata, seclen);
}
EXPORT_SYMBOL(security_secid_to_secctx);

/**
 * security_secctx_to_secid() - Convert a secctx to a secid
 * @secdata: secctx
 * @seclen: length of secctx
 * @secid: secid
 *
 * Convert security context to secid.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
{
        *secid = 0;
        return call_int_hook(secctx_to_secid, secdata, seclen, secid);
}
EXPORT_SYMBOL(security_secctx_to_secid);

/**
 * security_release_secctx() - Free a secctx buffer
 * @secdata: secctx
 * @seclen: length of secctx
 *
 * Release the security context.
 */
void security_release_secctx(char *secdata, u32 seclen)
{
        call_void_hook(release_secctx, secdata, seclen);
}
EXPORT_SYMBOL(security_release_secctx);

/**
 * security_inode_invalidate_secctx() - Invalidate an inode's security label
 * @inode: inode
 *
 * Notify the security module that it must revalidate the security context of
 * an inode.
 */
void security_inode_invalidate_secctx(struct inode *inode)
{
        call_void_hook(inode_invalidate_secctx, inode);
}
EXPORT_SYMBOL(security_inode_invalidate_secctx);

/**
 * security_inode_notifysecctx() - Notify the LSM of an inode's security label
 * @inode: inode
 * @ctx: secctx
 * @ctxlen: length of secctx
 *
 * Notify the security module of what the security context of an inode should
 * be.  Initializes the incore security context managed by the security module
 * for this inode.  Example usage: NFS client invokes this hook to initialize
 * the security context in its incore inode to the value provided by the server
 * for the file when the server returned the file's attributes to the client.
 * Must be called with inode->i_mutex locked.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
{
        return call_int_hook(inode_notifysecctx, inode, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_notifysecctx);

/**
 * security_inode_setsecctx() - Change the security label of an inode
 * @dentry: inode
 * @ctx: secctx
 * @ctxlen: length of secctx
 *
 * Change the security context of an inode.  Updates the incore security
 * context managed by the security module and invokes the fs code as needed
 * (via __vfs_setxattr_noperm) to update any backing xattrs that represent the
 * context.  Example usage: NFS server invokes this hook to change the security
 * context in its incore inode and on the backing filesystem to a value
 * provided by the client on a SETATTR operation.  Must be called with
 * inode->i_mutex locked.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
        return call_int_hook(inode_setsecctx, dentry, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_setsecctx);

/**
 * security_inode_getsecctx() - Get the security label of an inode
 * @inode: inode
 * @ctx: secctx
 * @ctxlen: length of secctx
 *
 * On success, returns 0 and fills out @ctx and @ctxlen with the security
 * context for the given @inode.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
{
        return call_int_hook(inode_getsecctx, inode, ctx, ctxlen);
}
EXPORT_SYMBOL(security_inode_getsecctx);

#ifdef CONFIG_WATCH_QUEUE
/**
 * security_post_notification() - Check if a watch notification can be posted
 * @w_cred: credentials of the task that set the watch
 * @cred: credentials of the task which triggered the watch
 * @n: the notification
 *
 * Check to see if a watch notification can be posted to a particular queue.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_post_notification(const struct cred *w_cred,
                               const struct cred *cred,
                               struct watch_notification *n)
{
        return call_int_hook(post_notification, w_cred, cred, n);
}
#endif /* CONFIG_WATCH_QUEUE */

#ifdef CONFIG_KEY_NOTIFICATIONS
/**
 * security_watch_key() - Check if a task is allowed to watch for key events
 * @key: the key to watch
 *
 * Check to see if a process is allowed to watch for event notifications from
 * a key or keyring.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_watch_key(struct key *key)
{
        return call_int_hook(watch_key, key);
}
#endif /* CONFIG_KEY_NOTIFICATIONS */

#ifdef CONFIG_SECURITY_NETWORK
/**
 * security_unix_stream_connect() - Check if a AF_UNIX stream is allowed
 * @sock: originating sock
 * @other: peer sock
 * @newsk: new sock
 *
 * Check permissions before establishing a Unix domain stream connection
 * between @sock and @other.
 *
 * The @unix_stream_connect and @unix_may_send hooks were necessary because
 * Linux provides an alternative to the conventional file name space for Unix
 * domain sockets.  Whereas binding and connecting to sockets in the file name
 * space is mediated by the typical file permissions (and caught by the mknod
 * and permission hooks in inode_security_ops), binding and connecting to
 * sockets in the abstract name space is completely unmediated.  Sufficient
 * control of Unix domain sockets in the abstract name space isn't possible
 * using only the socket layer hooks, since we need to know the actual target
 * socket, which is not looked up until we are inside the af_unix code.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_unix_stream_connect(struct sock *sock, struct sock *other,
                                 struct sock *newsk)
{
        return call_int_hook(unix_stream_connect, sock, other, newsk);
}
EXPORT_SYMBOL(security_unix_stream_connect);

/**
 * security_unix_may_send() - Check if AF_UNIX socket can send datagrams
 * @sock: originating sock
 * @other: peer sock
 *
 * Check permissions before connecting or sending datagrams from @sock to
 * @other.
 *
 * The @unix_stream_connect and @unix_may_send hooks were necessary because
 * Linux provides an alternative to the conventional file name space for Unix
 * domain sockets.  Whereas binding and connecting to sockets in the file name
 * space is mediated by the typical file permissions (and caught by the mknod
 * and permission hooks in inode_security_ops), binding and connecting to
 * sockets in the abstract name space is completely unmediated.  Sufficient
 * control of Unix domain sockets in the abstract name space isn't possible
 * using only the socket layer hooks, since we need to know the actual target
 * socket, which is not looked up until we are inside the af_unix code.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_unix_may_send(struct socket *sock,  struct socket *other)
{
        return call_int_hook(unix_may_send, sock, other);
}
EXPORT_SYMBOL(security_unix_may_send);

/**
 * security_socket_create() - Check if creating a new socket is allowed
 * @family: protocol family
 * @type: communications type
 * @protocol: requested protocol
 * @kern: set to 1 if a kernel socket is requested
 *
 * Check permissions prior to creating a new socket.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_create(int family, int type, int protocol, int kern)
{
        return call_int_hook(socket_create, family, type, protocol, kern);
}

/**
 * security_socket_post_create() - Initialize a newly created socket
 * @sock: socket
 * @family: protocol family
 * @type: communications type
 * @protocol: requested protocol
 * @kern: set to 1 if a kernel socket is requested
 *
 * This hook allows a module to update or allocate a per-socket security
 * structure. Note that the security field was not added directly to the socket
 * structure, but rather, the socket security information is stored in the
 * associated inode.  Typically, the inode alloc_security hook will allocate
 * and attach security information to SOCK_INODE(sock)->i_security.  This hook
 * may be used to update the SOCK_INODE(sock)->i_security field with additional
 * information that wasn't available when the inode was allocated.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_post_create(struct socket *sock, int family,
                                int type, int protocol, int kern)
{
        return call_int_hook(socket_post_create, sock, family, type,
                             protocol, kern);
}

/**
 * security_socket_socketpair() - Check if creating a socketpair is allowed
 * @socka: first socket
 * @sockb: second socket
 *
 * Check permissions before creating a fresh pair of sockets.
 *
 * Return: Returns 0 if permission is granted and the connection was
 *         established.
 */
int security_socket_socketpair(struct socket *socka, struct socket *sockb)
{
        return call_int_hook(socket_socketpair, socka, sockb);
}
EXPORT_SYMBOL(security_socket_socketpair);

/**
 * security_socket_bind() - Check if a socket bind operation is allowed
 * @sock: socket
 * @address: requested bind address
 * @addrlen: length of address
 *
 * Check permission before socket protocol layer bind operation is performed
 * and the socket @sock is bound to the address specified in the @address
 * parameter.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_bind(struct socket *sock,
                         struct sockaddr *address, int addrlen)
{
        return call_int_hook(socket_bind, sock, address, addrlen);
}

/**
 * security_socket_connect() - Check if a socket connect operation is allowed
 * @sock: socket
 * @address: address of remote connection point
 * @addrlen: length of address
 *
 * Check permission before socket protocol layer connect operation attempts to
 * connect socket @sock to a remote address, @address.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_connect(struct socket *sock,
                            struct sockaddr *address, int addrlen)
{
        return call_int_hook(socket_connect, sock, address, addrlen);
}

/**
 * security_socket_listen() - Check if a socket is allowed to listen
 * @sock: socket
 * @backlog: connection queue size
 *
 * Check permission before socket protocol layer listen operation.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_listen(struct socket *sock, int backlog)
{
        return call_int_hook(socket_listen, sock, backlog);
}

/**
 * security_socket_accept() - Check if a socket is allowed to accept connections
 * @sock: listening socket
 * @newsock: newly creation connection socket
 *
 * Check permission before accepting a new connection.  Note that the new
 * socket, @newsock, has been created and some information copied to it, but
 * the accept operation has not actually been performed.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_accept(struct socket *sock, struct socket *newsock)
{
        return call_int_hook(socket_accept, sock, newsock);
}

/**
 * security_socket_sendmsg() - Check if sending a message is allowed
 * @sock: sending socket
 * @msg: message to send
 * @size: size of message
 *
 * Check permission before transmitting a message to another socket.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
{
        return call_int_hook(socket_sendmsg, sock, msg, size);
}

/**
 * security_socket_recvmsg() - Check if receiving a message is allowed
 * @sock: receiving socket
 * @msg: message to receive
 * @size: size of message
 * @flags: operational flags
 *
 * Check permission before receiving a message from a socket.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
                            int size, int flags)
{
        return call_int_hook(socket_recvmsg, sock, msg, size, flags);
}

/**
 * security_socket_getsockname() - Check if reading the socket addr is allowed
 * @sock: socket
 *
 * Check permission before reading the local address (name) of the socket
 * object.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_getsockname(struct socket *sock)
{
        return call_int_hook(socket_getsockname, sock);
}

/**
 * security_socket_getpeername() - Check if reading the peer's addr is allowed
 * @sock: socket
 *
 * Check permission before the remote address (name) of a socket object.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_getpeername(struct socket *sock)
{
        return call_int_hook(socket_getpeername, sock);
}

/**
 * security_socket_getsockopt() - Check if reading a socket option is allowed
 * @sock: socket
 * @level: option's protocol level
 * @optname: option name
 *
 * Check permissions before retrieving the options associated with socket
 * @sock.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_getsockopt(struct socket *sock, int level, int optname)
{
        return call_int_hook(socket_getsockopt, sock, level, optname);
}

/**
 * security_socket_setsockopt() - Check if setting a socket option is allowed
 * @sock: socket
 * @level: option's protocol level
 * @optname: option name
 *
 * Check permissions before setting the options associated with socket @sock.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_setsockopt(struct socket *sock, int level, int optname)
{
        return call_int_hook(socket_setsockopt, sock, level, optname);
}

/**
 * security_socket_shutdown() - Checks if shutting down the socket is allowed
 * @sock: socket
 * @how: flag indicating how sends and receives are handled
 *
 * Checks permission before all or part of a connection on the socket @sock is
 * shut down.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_socket_shutdown(struct socket *sock, int how)
{
        return call_int_hook(socket_shutdown, sock, how);
}

/**
 * security_sock_rcv_skb() - Check if an incoming network packet is allowed
 * @sk: destination sock
 * @skb: incoming packet
 *
 * Check permissions on incoming network packets.  This hook is distinct from
 * Netfilter's IP input hooks since it is the first time that the incoming
 * sk_buff @skb has been associated with a particular socket, @sk.  Must not
 * sleep inside this hook because some callers hold spinlocks.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
        return call_int_hook(socket_sock_rcv_skb, sk, skb);
}
EXPORT_SYMBOL(security_sock_rcv_skb);

/**
 * security_socket_getpeersec_stream() - Get the remote peer label
 * @sock: socket
 * @optval: destination buffer
 * @optlen: size of peer label copied into the buffer
 * @len: maximum size of the destination buffer
 *
 * This hook allows the security module to provide peer socket security state
 * for unix or connected tcp sockets to userspace via getsockopt SO_GETPEERSEC.
 * For tcp sockets this can be meaningful if the socket is associated with an
 * ipsec SA.
 *
 * Return: Returns 0 if all is well, otherwise, typical getsockopt return
 *         values.
 */
int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval,
                                      sockptr_t optlen, unsigned int len)
{
        return call_int_hook(socket_getpeersec_stream, sock, optval, optlen,
                             len);
}

/**
 * security_socket_getpeersec_dgram() - Get the remote peer label
 * @sock: socket
 * @skb: datagram packet
 * @secid: remote peer label secid
 *
 * This hook allows the security module to provide peer socket security state
 * for udp sockets on a per-packet basis to userspace via getsockopt
 * SO_GETPEERSEC. The application must first have indicated the IP_PASSSEC
 * option via getsockopt. It can then retrieve the security state returned by
 * this hook for a packet via the SCM_SECURITY ancillary message type.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_socket_getpeersec_dgram(struct socket *sock,
                                     struct sk_buff *skb, u32 *secid)
{
        return call_int_hook(socket_getpeersec_dgram, sock, skb, secid);
}
EXPORT_SYMBOL(security_socket_getpeersec_dgram);

/**
 * security_sk_alloc() - Allocate and initialize a sock's LSM blob
 * @sk: sock
 * @family: protocol family
 * @priority: gfp flags
 *
 * Allocate and attach a security structure to the sk->sk_security field, which
 * is used to copy security attributes between local stream sockets.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
{
        return call_int_hook(sk_alloc_security, sk, family, priority);
}

/**
 * security_sk_free() - Free the sock's LSM blob
 * @sk: sock
 *
 * Deallocate security structure.
 */
void security_sk_free(struct sock *sk)
{
        call_void_hook(sk_free_security, sk);
}

/**
 * security_sk_clone() - Clone a sock's LSM state
 * @sk: original sock
 * @newsk: target sock
 *
 * Clone/copy security structure.
 */
void security_sk_clone(const struct sock *sk, struct sock *newsk)
{
        call_void_hook(sk_clone_security, sk, newsk);
}
EXPORT_SYMBOL(security_sk_clone);

/**
 * security_sk_classify_flow() - Set a flow's secid based on socket
 * @sk: original socket
 * @flic: target flow
 *
 * Set the target flow's secid to socket's secid.
 */
void security_sk_classify_flow(const struct sock *sk, struct flowi_common *flic)
{
        call_void_hook(sk_getsecid, sk, &flic->flowic_secid);
}
EXPORT_SYMBOL(security_sk_classify_flow);

/**
 * security_req_classify_flow() - Set a flow's secid based on request_sock
 * @req: request_sock
 * @flic: target flow
 *
 * Sets @flic's secid to @req's secid.
 */
void security_req_classify_flow(const struct request_sock *req,
                                struct flowi_common *flic)
{
        call_void_hook(req_classify_flow, req, flic);
}
EXPORT_SYMBOL(security_req_classify_flow);

/**
 * security_sock_graft() - Reconcile LSM state when grafting a sock on a socket
 * @sk: sock being grafted
 * @parent: target parent socket
 *
 * Sets @parent's inode secid to @sk's secid and update @sk with any necessary
 * LSM state from @parent.
 */
void security_sock_graft(struct sock *sk, struct socket *parent)
{
        call_void_hook(sock_graft, sk, parent);
}
EXPORT_SYMBOL(security_sock_graft);

/**
 * security_inet_conn_request() - Set request_sock state using incoming connect
 * @sk: parent listening sock
 * @skb: incoming connection
 * @req: new request_sock
 *
 * Initialize the @req LSM state based on @sk and the incoming connect in @skb.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_inet_conn_request(const struct sock *sk,
                               struct sk_buff *skb, struct request_sock *req)
{
        return call_int_hook(inet_conn_request, sk, skb, req);
}
EXPORT_SYMBOL(security_inet_conn_request);

/**
 * security_inet_csk_clone() - Set new sock LSM state based on request_sock
 * @newsk: new sock
 * @req: connection request_sock
 *
 * Set that LSM state of @sock using the LSM state from @req.
 */
void security_inet_csk_clone(struct sock *newsk,
                             const struct request_sock *req)
{
        call_void_hook(inet_csk_clone, newsk, req);
}

/**
 * security_inet_conn_established() - Update sock's LSM state with connection
 * @sk: sock
 * @skb: connection packet
 *
 * Update @sock's LSM state to represent a new connection from @skb.
 */
void security_inet_conn_established(struct sock *sk,
                                    struct sk_buff *skb)
{
        call_void_hook(inet_conn_established, sk, skb);
}
EXPORT_SYMBOL(security_inet_conn_established);

/**
 * security_secmark_relabel_packet() - Check if setting a secmark is allowed
 * @secid: new secmark value
 *
 * Check if the process should be allowed to relabel packets to @secid.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_secmark_relabel_packet(u32 secid)
{
        return call_int_hook(secmark_relabel_packet, secid);
}
EXPORT_SYMBOL(security_secmark_relabel_packet);

/**
 * security_secmark_refcount_inc() - Increment the secmark labeling rule count
 *
 * Tells the LSM to increment the number of secmark labeling rules loaded.
 */
void security_secmark_refcount_inc(void)
{
        call_void_hook(secmark_refcount_inc);
}
EXPORT_SYMBOL(security_secmark_refcount_inc);

/**
 * security_secmark_refcount_dec() - Decrement the secmark labeling rule count
 *
 * Tells the LSM to decrement the number of secmark labeling rules loaded.
 */
void security_secmark_refcount_dec(void)
{
        call_void_hook(secmark_refcount_dec);
}
EXPORT_SYMBOL(security_secmark_refcount_dec);

/**
 * security_tun_dev_alloc_security() - Allocate a LSM blob for a TUN device
 * @security: pointer to the LSM blob
 *
 * This hook allows a module to allocate a security structure for a TUN        device,
 * returning the pointer in @security.
 *
 * Return: Returns a zero on success, negative values on failure.
 */
int security_tun_dev_alloc_security(void **security)
{
        return call_int_hook(tun_dev_alloc_security, security);
}
EXPORT_SYMBOL(security_tun_dev_alloc_security);

/**
 * security_tun_dev_free_security() - Free a TUN device LSM blob
 * @security: LSM blob
 *
 * This hook allows a module to free the security structure for a TUN device.
 */
void security_tun_dev_free_security(void *security)
{
        call_void_hook(tun_dev_free_security, security);
}
EXPORT_SYMBOL(security_tun_dev_free_security);

/**
 * security_tun_dev_create() - Check if creating a TUN device is allowed
 *
 * Check permissions prior to creating a new TUN device.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_tun_dev_create(void)
{
        return call_int_hook(tun_dev_create);
}
EXPORT_SYMBOL(security_tun_dev_create);

/**
 * security_tun_dev_attach_queue() - Check if attaching a TUN queue is allowed
 * @security: TUN device LSM blob
 *
 * Check permissions prior to attaching to a TUN device queue.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_tun_dev_attach_queue(void *security)
{
        return call_int_hook(tun_dev_attach_queue, security);
}
EXPORT_SYMBOL(security_tun_dev_attach_queue);

/**
 * security_tun_dev_attach() - Update TUN device LSM state on attach
 * @sk: associated sock
 * @security: TUN device LSM blob
 *
 * This hook can be used by the module to update any security state associated
 * with the TUN device's sock structure.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_tun_dev_attach(struct sock *sk, void *security)
{
        return call_int_hook(tun_dev_attach, sk, security);
}
EXPORT_SYMBOL(security_tun_dev_attach);

/**
 * security_tun_dev_open() - Update TUN device LSM state on open
 * @security: TUN device LSM blob
 *
 * This hook can be used by the module to update any security state associated
 * with the TUN device's security structure.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_tun_dev_open(void *security)
{
        return call_int_hook(tun_dev_open, security);
}
EXPORT_SYMBOL(security_tun_dev_open);

/**
 * security_sctp_assoc_request() - Update the LSM on a SCTP association req
 * @asoc: SCTP association
 * @skb: packet requesting the association
 *
 * Passes the @asoc and @chunk->skb of the association INIT packet to the LSM.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_sctp_assoc_request(struct sctp_association *asoc,
                                struct sk_buff *skb)
{
        return call_int_hook(sctp_assoc_request, asoc, skb);
}
EXPORT_SYMBOL(security_sctp_assoc_request);

/**
 * security_sctp_bind_connect() - Validate a list of addrs for a SCTP option
 * @sk: socket
 * @optname: SCTP option to validate
 * @address: list of IP addresses to validate
 * @addrlen: length of the address list
 *
 * Validiate permissions required for each address associated with sock        @sk.
 * Depending on @optname, the addresses will be treated as either a connect or
 * bind service. The @addrlen is calculated on each IPv4 and IPv6 address using
 * sizeof(struct sockaddr_in) or sizeof(struct sockaddr_in6).
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_sctp_bind_connect(struct sock *sk, int optname,
                               struct sockaddr *address, int addrlen)
{
        return call_int_hook(sctp_bind_connect, sk, optname, address, addrlen);
}
EXPORT_SYMBOL(security_sctp_bind_connect);

/**
 * security_sctp_sk_clone() - Clone a SCTP sock's LSM state
 * @asoc: SCTP association
 * @sk: original sock
 * @newsk: target sock
 *
 * Called whenever a new socket is created by accept(2) (i.e. a TCP style
 * socket) or when a socket is 'peeled off' e.g userspace calls
 * sctp_peeloff(3).
 */
void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
                            struct sock *newsk)
{
        call_void_hook(sctp_sk_clone, asoc, sk, newsk);
}
EXPORT_SYMBOL(security_sctp_sk_clone);

/**
 * security_sctp_assoc_established() - Update LSM state when assoc established
 * @asoc: SCTP association
 * @skb: packet establishing the association
 *
 * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet to the
 * security module.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_sctp_assoc_established(struct sctp_association *asoc,
                                    struct sk_buff *skb)
{
        return call_int_hook(sctp_assoc_established, asoc, skb);
}
EXPORT_SYMBOL(security_sctp_assoc_established);

/**
 * security_mptcp_add_subflow() - Inherit the LSM label from the MPTCP socket
 * @sk: the owning MPTCP socket
 * @ssk: the new subflow
 *
 * Update the labeling for the given MPTCP subflow, to match the one of the
 * owning MPTCP socket. This hook has to be called after the socket creation and
 * initialization via the security_socket_create() and
 * security_socket_post_create() LSM hooks.
 *
 * Return: Returns 0 on success or a negative error code on failure.
 */
int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
{
        return call_int_hook(mptcp_add_subflow, sk, ssk);
}

#endif        /* CONFIG_SECURITY_NETWORK */

#ifdef CONFIG_SECURITY_INFINIBAND
/**
 * security_ib_pkey_access() - Check if access to an IB pkey is allowed
 * @sec: LSM blob
 * @subnet_prefix: subnet prefix of the port
 * @pkey: IB pkey
 *
 * Check permission to access a pkey when modifying a QP.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey)
{
        return call_int_hook(ib_pkey_access, sec, subnet_prefix, pkey);
}
EXPORT_SYMBOL(security_ib_pkey_access);

/**
 * security_ib_endport_manage_subnet() - Check if SMPs traffic is allowed
 * @sec: LSM blob
 * @dev_name: IB device name
 * @port_num: port number
 *
 * Check permissions to send and receive SMPs on a end port.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_ib_endport_manage_subnet(void *sec,
                                      const char *dev_name, u8 port_num)
{
        return call_int_hook(ib_endport_manage_subnet, sec, dev_name, port_num);
}
EXPORT_SYMBOL(security_ib_endport_manage_subnet);

/**
 * security_ib_alloc_security() - Allocate an Infiniband LSM blob
 * @sec: LSM blob
 *
 * Allocate a security structure for Infiniband objects.
 *
 * Return: Returns 0 on success, non-zero on failure.
 */
int security_ib_alloc_security(void **sec)
{
        return call_int_hook(ib_alloc_security, sec);
}
EXPORT_SYMBOL(security_ib_alloc_security);

/**
 * security_ib_free_security() - Free an Infiniband LSM blob
 * @sec: LSM blob
 *
 * Deallocate an Infiniband security structure.
 */
void security_ib_free_security(void *sec)
{
        call_void_hook(ib_free_security, sec);
}
EXPORT_SYMBOL(security_ib_free_security);
#endif        /* CONFIG_SECURITY_INFINIBAND */

#ifdef CONFIG_SECURITY_NETWORK_XFRM
/**
 * security_xfrm_policy_alloc() - Allocate a xfrm policy LSM blob
 * @ctxp: xfrm security context being added to the SPD
 * @sec_ctx: security label provided by userspace
 * @gfp: gfp flags
 *
 * Allocate a security structure to the xp->security field; the security field
 * is initialized to NULL when the xfrm_policy is allocated.
 *
 * Return:  Return 0 if operation was successful.
 */
int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
                               struct xfrm_user_sec_ctx *sec_ctx,
                               gfp_t gfp)
{
        return call_int_hook(xfrm_policy_alloc_security, ctxp, sec_ctx, gfp);
}
EXPORT_SYMBOL(security_xfrm_policy_alloc);

/**
 * security_xfrm_policy_clone() - Clone xfrm policy LSM state
 * @old_ctx: xfrm security context
 * @new_ctxp: target xfrm security context
 *
 * Allocate a security structure in new_ctxp that contains the information from
 * the old_ctx structure.
 *
 * Return: Return 0 if operation was successful.
 */
int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                               struct xfrm_sec_ctx **new_ctxp)
{
        return call_int_hook(xfrm_policy_clone_security, old_ctx, new_ctxp);
}

/**
 * security_xfrm_policy_free() - Free a xfrm security context
 * @ctx: xfrm security context
 *
 * Free LSM resources associated with @ctx.
 */
void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
{
        call_void_hook(xfrm_policy_free_security, ctx);
}
EXPORT_SYMBOL(security_xfrm_policy_free);

/**
 * security_xfrm_policy_delete() - Check if deleting a xfrm policy is allowed
 * @ctx: xfrm security context
 *
 * Authorize deletion of a SPD entry.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
{
        return call_int_hook(xfrm_policy_delete_security, ctx);
}

/**
 * security_xfrm_state_alloc() - Allocate a xfrm state LSM blob
 * @x: xfrm state being added to the SAD
 * @sec_ctx: security label provided by userspace
 *
 * Allocate a security structure to the @x->security field; the security field
 * is initialized to NULL when the xfrm_state is allocated. Set the context to
 * correspond to @sec_ctx.
 *
 * Return: Return 0 if operation was successful.
 */
int security_xfrm_state_alloc(struct xfrm_state *x,
                              struct xfrm_user_sec_ctx *sec_ctx)
{
        return call_int_hook(xfrm_state_alloc, x, sec_ctx);
}
EXPORT_SYMBOL(security_xfrm_state_alloc);

/**
 * security_xfrm_state_alloc_acquire() - Allocate a xfrm state LSM blob
 * @x: xfrm state being added to the SAD
 * @polsec: associated policy's security context
 * @secid: secid from the flow
 *
 * Allocate a security structure to the x->security field; the security field
 * is initialized to NULL when the xfrm_state is allocated.  Set the context to
 * correspond to secid.
 *
 * Return: Returns 0 if operation was successful.
 */
int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
                                      struct xfrm_sec_ctx *polsec, u32 secid)
{
        return call_int_hook(xfrm_state_alloc_acquire, x, polsec, secid);
}

/**
 * security_xfrm_state_delete() - Check if deleting a xfrm state is allowed
 * @x: xfrm state
 *
 * Authorize deletion of x->security.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_xfrm_state_delete(struct xfrm_state *x)
{
        return call_int_hook(xfrm_state_delete_security, x);
}
EXPORT_SYMBOL(security_xfrm_state_delete);

/**
 * security_xfrm_state_free() - Free a xfrm state
 * @x: xfrm state
 *
 * Deallocate x->security.
 */
void security_xfrm_state_free(struct xfrm_state *x)
{
        call_void_hook(xfrm_state_free_security, x);
}

/**
 * security_xfrm_policy_lookup() - Check if using a xfrm policy is allowed
 * @ctx: target xfrm security context
 * @fl_secid: flow secid used to authorize access
 *
 * Check permission when a flow selects a xfrm_policy for processing XFRMs on a
 * packet.  The hook is called when selecting either a per-socket policy or a
 * generic xfrm policy.
 *
 * Return: Return 0 if permission is granted, -ESRCH otherwise, or -errno on
 *         other errors.
 */
int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid)
{
        return call_int_hook(xfrm_policy_lookup, ctx, fl_secid);
}

/**
 * security_xfrm_state_pol_flow_match() - Check for a xfrm match
 * @x: xfrm state to match
 * @xp: xfrm policy to check for a match
 * @flic: flow to check for a match.
 *
 * Check @xp and @flic for a match with @x.
 *
 * Return: Returns 1 if there is a match.
 */
int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
                                       struct xfrm_policy *xp,
                                       const struct flowi_common *flic)
{
        struct security_hook_list *hp;
        int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match);

        /*
         * Since this function is expected to return 0 or 1, the judgment
         * becomes difficult if multiple LSMs supply this call. Fortunately,
         * we can use the first LSM's judgment because currently only SELinux
         * supplies this call.
         *
         * For speed optimization, we explicitly break the loop rather than
         * using the macro
         */
        hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
                             list) {
                rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic);
                break;
        }
        return rc;
}

/**
 * security_xfrm_decode_session() - Determine the xfrm secid for a packet
 * @skb: xfrm packet
 * @secid: secid
 *
 * Decode the packet in @skb and return the security label in @secid.
 *
 * Return: Return 0 if all xfrms used have the same secid.
 */
int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
{
        return call_int_hook(xfrm_decode_session, skb, secid, 1);
}

void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic)
{
        int rc = call_int_hook(xfrm_decode_session, skb, &flic->flowic_secid,
                               0);

        BUG_ON(rc);
}
EXPORT_SYMBOL(security_skb_classify_flow);
#endif        /* CONFIG_SECURITY_NETWORK_XFRM */

#ifdef CONFIG_KEYS
/**
 * security_key_alloc() - Allocate and initialize a kernel key LSM blob
 * @key: key
 * @cred: credentials
 * @flags: allocation flags
 *
 * Permit allocation of a key and assign security data. Note that key does not
 * have a serial number assigned at this point.
 *
 * Return: Return 0 if permission is granted, -ve error otherwise.
 */
int security_key_alloc(struct key *key, const struct cred *cred,
                       unsigned long flags)
{
        return call_int_hook(key_alloc, key, cred, flags);
}

/**
 * security_key_free() - Free a kernel key LSM blob
 * @key: key
 *
 * Notification of destruction; free security data.
 */
void security_key_free(struct key *key)
{
        call_void_hook(key_free, key);
}

/**
 * security_key_permission() - Check if a kernel key operation is allowed
 * @key_ref: key reference
 * @cred: credentials of actor requesting access
 * @need_perm: requested permissions
 *
 * See whether a specific operational right is granted to a process on a key.
 *
 * Return: Return 0 if permission is granted, -ve error otherwise.
 */
int security_key_permission(key_ref_t key_ref, const struct cred *cred,
                            enum key_need_perm need_perm)
{
        return call_int_hook(key_permission, key_ref, cred, need_perm);
}

/**
 * security_key_getsecurity() - Get the key's security label
 * @key: key
 * @buffer: security label buffer
 *
 * Get a textual representation of the security context attached to a key for
 * the purposes of honouring KEYCTL_GETSECURITY.  This function allocates the
 * storage for the NUL-terminated string and the caller should free it.
 *
 * Return: Returns the length of @buffer (including terminating NUL) or -ve if
 *         an error occurs.  May also return 0 (and a NULL buffer pointer) if
 *         there is no security label assigned to the key.
 */
int security_key_getsecurity(struct key *key, char **buffer)
{
        *buffer = NULL;
        return call_int_hook(key_getsecurity, key, buffer);
}

/**
 * security_key_post_create_or_update() - Notification of key create or update
 * @keyring: keyring to which the key is linked to
 * @key: created or updated key
 * @payload: data used to instantiate or update the key
 * @payload_len: length of payload
 * @flags: key flags
 * @create: flag indicating whether the key was created or updated
 *
 * Notify the caller of a key creation or update.
 */
void security_key_post_create_or_update(struct key *keyring, struct key *key,
                                        const void *payload, size_t payload_len,
                                        unsigned long flags, bool create)
{
        call_void_hook(key_post_create_or_update, keyring, key, payload,
                       payload_len, flags, create);
}
#endif        /* CONFIG_KEYS */

#ifdef CONFIG_AUDIT
/**
 * security_audit_rule_init() - Allocate and init an LSM audit rule struct
 * @field: audit action
 * @op: rule operator
 * @rulestr: rule context
 * @lsmrule: receive buffer for audit rule struct
 *
 * Allocate and initialize an LSM audit rule structure.
 *
 * Return: Return 0 if @lsmrule has been successfully set, -EINVAL in case of
 *         an invalid rule.
 */
int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule)
{
        return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule);
}

/**
 * security_audit_rule_known() - Check if an audit rule contains LSM fields
 * @krule: audit rule
 *
 * Specifies whether given @krule contains any fields related to the current
 * LSM.
 *
 * Return: Returns 1 in case of relation found, 0 otherwise.
 */
int security_audit_rule_known(struct audit_krule *krule)
{
        return call_int_hook(audit_rule_known, krule);
}

/**
 * security_audit_rule_free() - Free an LSM audit rule struct
 * @lsmrule: audit rule struct
 *
 * Deallocate the LSM audit rule structure previously allocated by
 * audit_rule_init().
 */
void security_audit_rule_free(void *lsmrule)
{
        call_void_hook(audit_rule_free, lsmrule);
}

/**
 * security_audit_rule_match() - Check if a label matches an audit rule
 * @secid: security label
 * @field: LSM audit field
 * @op: matching operator
 * @lsmrule: audit rule
 *
 * Determine if given @secid matches a rule previously approved by
 * security_audit_rule_known().
 *
 * Return: Returns 1 if secid matches the rule, 0 if it does not, -ERRNO on
 *         failure.
 */
int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule)
{
        return call_int_hook(audit_rule_match, secid, field, op, lsmrule);
}
#endif /* CONFIG_AUDIT */

#ifdef CONFIG_BPF_SYSCALL
/**
 * security_bpf() - Check if the bpf syscall operation is allowed
 * @cmd: command
 * @attr: bpf attribute
 * @size: size
 *
 * Do a initial check for all bpf syscalls after the attribute is copied into
 * the kernel. The actual security module can implement their own rules to
 * check the specific cmd they need.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
        return call_int_hook(bpf, cmd, attr, size);
}

/**
 * security_bpf_map() - Check if access to a bpf map is allowed
 * @map: bpf map
 * @fmode: mode
 *
 * Do a check when the kernel generates and returns a file descriptor for eBPF
 * maps.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_bpf_map(struct bpf_map *map, fmode_t fmode)
{
        return call_int_hook(bpf_map, map, fmode);
}

/**
 * security_bpf_prog() - Check if access to a bpf program is allowed
 * @prog: bpf program
 *
 * Do a check when the kernel generates and returns a file descriptor for eBPF
 * programs.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_bpf_prog(struct bpf_prog *prog)
{
        return call_int_hook(bpf_prog, prog);
}

/**
 * security_bpf_map_create() - Check if BPF map creation is allowed
 * @map: BPF map object
 * @attr: BPF syscall attributes used to create BPF map
 * @token: BPF token used to grant user access
 *
 * Do a check when the kernel creates a new BPF map. This is also the
 * point where LSM blob is allocated for LSMs that need them.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
                            struct bpf_token *token)
{
        return call_int_hook(bpf_map_create, map, attr, token);
}

/**
 * security_bpf_prog_load() - Check if loading of BPF program is allowed
 * @prog: BPF program object
 * @attr: BPF syscall attributes used to create BPF program
 * @token: BPF token used to grant user access to BPF subsystem
 *
 * Perform an access control check when the kernel loads a BPF program and
 * allocates associated BPF program object. This hook is also responsible for
 * allocating any required LSM state for the BPF program.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
                           struct bpf_token *token)
{
        return call_int_hook(bpf_prog_load, prog, attr, token);
}

/**
 * security_bpf_token_create() - Check if creating of BPF token is allowed
 * @token: BPF token object
 * @attr: BPF syscall attributes used to create BPF token
 * @path: path pointing to BPF FS mount point from which BPF token is created
 *
 * Do a check when the kernel instantiates a new BPF token object from BPF FS
 * instance. This is also the point where LSM blob can be allocated for LSMs.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
                              struct path *path)
{
        return call_int_hook(bpf_token_create, token, attr, path);
}

/**
 * security_bpf_token_cmd() - Check if BPF token is allowed to delegate
 * requested BPF syscall command
 * @token: BPF token object
 * @cmd: BPF syscall command requested to be delegated by BPF token
 *
 * Do a check when the kernel decides whether provided BPF token should allow
 * delegation of requested BPF syscall command.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
{
        return call_int_hook(bpf_token_cmd, token, cmd);
}

/**
 * security_bpf_token_capable() - Check if BPF token is allowed to delegate
 * requested BPF-related capability
 * @token: BPF token object
 * @cap: capabilities requested to be delegated by BPF token
 *
 * Do a check when the kernel decides whether provided BPF token should allow
 * delegation of requested BPF-related capabilities.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_bpf_token_capable(const struct bpf_token *token, int cap)
{
        return call_int_hook(bpf_token_capable, token, cap);
}

/**
 * security_bpf_map_free() - Free a bpf map's LSM blob
 * @map: bpf map
 *
 * Clean up the security information stored inside bpf map.
 */
void security_bpf_map_free(struct bpf_map *map)
{
        call_void_hook(bpf_map_free, map);
}

/**
 * security_bpf_prog_free() - Free a BPF program's LSM blob
 * @prog: BPF program struct
 *
 * Clean up the security information stored inside BPF program.
 */
void security_bpf_prog_free(struct bpf_prog *prog)
{
        call_void_hook(bpf_prog_free, prog);
}

/**
 * security_bpf_token_free() - Free a BPF token's LSM blob
 * @token: BPF token struct
 *
 * Clean up the security information stored inside BPF token.
 */
void security_bpf_token_free(struct bpf_token *token)
{
        call_void_hook(bpf_token_free, token);
}
#endif /* CONFIG_BPF_SYSCALL */

/**
 * security_locked_down() - Check if a kernel feature is allowed
 * @what: requested kernel feature
 *
 * Determine whether a kernel feature that potentially enables arbitrary code
 * execution in kernel space should be permitted.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_locked_down(enum lockdown_reason what)
{
        return call_int_hook(locked_down, what);
}
EXPORT_SYMBOL(security_locked_down);

#ifdef CONFIG_PERF_EVENTS
/**
 * security_perf_event_open() - Check if a perf event open is allowed
 * @attr: perf event attribute
 * @type: type of event
 *
 * Check whether the @type of perf_event_open syscall is allowed.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_perf_event_open(struct perf_event_attr *attr, int type)
{
        return call_int_hook(perf_event_open, attr, type);
}

/**
 * security_perf_event_alloc() - Allocate a perf event LSM blob
 * @event: perf event
 *
 * Allocate and save perf_event security info.
 *
 * Return: Returns 0 on success, error on failure.
 */
int security_perf_event_alloc(struct perf_event *event)
{
        return call_int_hook(perf_event_alloc, event);
}

/**
 * security_perf_event_free() - Free a perf event LSM blob
 * @event: perf event
 *
 * Release (free) perf_event security info.
 */
void security_perf_event_free(struct perf_event *event)
{
        call_void_hook(perf_event_free, event);
}

/**
 * security_perf_event_read() - Check if reading a perf event label is allowed
 * @event: perf event
 *
 * Read perf_event security info if allowed.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_perf_event_read(struct perf_event *event)
{
        return call_int_hook(perf_event_read, event);
}

/**
 * security_perf_event_write() - Check if writing a perf event label is allowed
 * @event: perf event
 *
 * Write perf_event security info if allowed.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_perf_event_write(struct perf_event *event)
{
        return call_int_hook(perf_event_write, event);
}
#endif /* CONFIG_PERF_EVENTS */

#ifdef CONFIG_IO_URING
/**
 * security_uring_override_creds() - Check if overriding creds is allowed
 * @new: new credentials
 *
 * Check if the current task, executing an io_uring operation, is allowed to
 * override it's credentials with @new.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_uring_override_creds(const struct cred *new)
{
        return call_int_hook(uring_override_creds, new);
}

/**
 * security_uring_sqpoll() - Check if IORING_SETUP_SQPOLL is allowed
 *
 * Check whether the current task is allowed to spawn a io_uring polling thread
 * (IORING_SETUP_SQPOLL).
 *
 * Return: Returns 0 if permission is granted.
 */
int security_uring_sqpoll(void)
{
        return call_int_hook(uring_sqpoll);
}

/**
 * security_uring_cmd() - Check if a io_uring passthrough command is allowed
 * @ioucmd: command
 *
 * Check whether the file_operations uring_cmd is allowed to run.
 *
 * Return: Returns 0 if permission is granted.
 */
int security_uring_cmd(struct io_uring_cmd *ioucmd)
{
        return call_int_hook(uring_cmd, ioucmd);
}
#endif /* CONFIG_IO_URING */













































































































































































































































































































































































































































































































































    1 










    1 






























































    1 
    1 

    1 


























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * HID driver for Corsair devices
 *
 * Supported devices:
 *  - Vengeance K70 Keyboard
 *  - K70 RAPIDFIRE Keyboard
 *  - Vengeance K90 Keyboard
 *  - Scimitar PRO RGB Gaming Mouse
 *
 * Copyright (c) 2015 Clement Vuchener
 * Copyright (c) 2017 Oscar Campos
 * Copyright (c) 2017 Aaron Bottegal
 */

/*
 */

#include <linux/hid.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/leds.h>

#include "hid-ids.h"

#define CORSAIR_USE_K90_MACRO        (1<<0)
#define CORSAIR_USE_K90_BACKLIGHT        (1<<1)

struct k90_led {
        struct led_classdev cdev;
        int brightness;
        struct work_struct work;
        bool removed;
};

struct k90_drvdata {
        struct k90_led record_led;
};

struct corsair_drvdata {
        unsigned long quirks;
        struct k90_drvdata *k90;
        struct k90_led *backlight;
};

#define K90_GKEY_COUNT        18

static int corsair_usage_to_gkey(unsigned int usage)
{
        /* G1 (0xd0) to G16 (0xdf) */
        if (usage >= 0xd0 && usage <= 0xdf)
                return usage - 0xd0 + 1;
        /* G17 (0xe8) to G18 (0xe9) */
        if (usage >= 0xe8 && usage <= 0xe9)
                return usage - 0xe8 + 17;
        return 0;
}

static unsigned short corsair_gkey_map[K90_GKEY_COUNT] = {
        BTN_TRIGGER_HAPPY1,
        BTN_TRIGGER_HAPPY2,
        BTN_TRIGGER_HAPPY3,
        BTN_TRIGGER_HAPPY4,
        BTN_TRIGGER_HAPPY5,
        BTN_TRIGGER_HAPPY6,
        BTN_TRIGGER_HAPPY7,
        BTN_TRIGGER_HAPPY8,
        BTN_TRIGGER_HAPPY9,
        BTN_TRIGGER_HAPPY10,
        BTN_TRIGGER_HAPPY11,
        BTN_TRIGGER_HAPPY12,
        BTN_TRIGGER_HAPPY13,
        BTN_TRIGGER_HAPPY14,
        BTN_TRIGGER_HAPPY15,
        BTN_TRIGGER_HAPPY16,
        BTN_TRIGGER_HAPPY17,
        BTN_TRIGGER_HAPPY18,
};

module_param_array_named(gkey_codes, corsair_gkey_map, ushort, NULL, S_IRUGO);
MODULE_PARM_DESC(gkey_codes, "Key codes for the G-keys");

static unsigned short corsair_record_keycodes[2] = {
        BTN_TRIGGER_HAPPY19,
        BTN_TRIGGER_HAPPY20
};

module_param_array_named(recordkey_codes, corsair_record_keycodes, ushort,
                         NULL, S_IRUGO);
MODULE_PARM_DESC(recordkey_codes, "Key codes for the MR (start and stop record) button");

static unsigned short corsair_profile_keycodes[3] = {
        BTN_TRIGGER_HAPPY21,
        BTN_TRIGGER_HAPPY22,
        BTN_TRIGGER_HAPPY23
};

module_param_array_named(profilekey_codes, corsair_profile_keycodes, ushort,
                         NULL, S_IRUGO);
MODULE_PARM_DESC(profilekey_codes, "Key codes for the profile buttons");

#define CORSAIR_USAGE_SPECIAL_MIN 0xf0
#define CORSAIR_USAGE_SPECIAL_MAX 0xff

#define CORSAIR_USAGE_MACRO_RECORD_START 0xf6
#define CORSAIR_USAGE_MACRO_RECORD_STOP 0xf7

#define CORSAIR_USAGE_PROFILE 0xf1
#define CORSAIR_USAGE_M1 0xf1
#define CORSAIR_USAGE_M2 0xf2
#define CORSAIR_USAGE_M3 0xf3
#define CORSAIR_USAGE_PROFILE_MAX 0xf3

#define CORSAIR_USAGE_META_OFF 0xf4
#define CORSAIR_USAGE_META_ON  0xf5

#define CORSAIR_USAGE_LIGHT 0xfa
#define CORSAIR_USAGE_LIGHT_OFF 0xfa
#define CORSAIR_USAGE_LIGHT_DIM 0xfb
#define CORSAIR_USAGE_LIGHT_MEDIUM 0xfc
#define CORSAIR_USAGE_LIGHT_BRIGHT 0xfd
#define CORSAIR_USAGE_LIGHT_MAX 0xfd

/* USB control protocol */

#define K90_REQUEST_BRIGHTNESS 49
#define K90_REQUEST_MACRO_MODE 2
#define K90_REQUEST_STATUS 4
#define K90_REQUEST_GET_MODE 5
#define K90_REQUEST_PROFILE 20

#define K90_MACRO_MODE_SW 0x0030
#define K90_MACRO_MODE_HW 0x0001

#define K90_MACRO_LED_ON  0x0020
#define K90_MACRO_LED_OFF 0x0040

/*
 * LED class devices
 */

#define K90_BACKLIGHT_LED_SUFFIX "::backlight"
#define K90_RECORD_LED_SUFFIX "::record"

static enum led_brightness k90_backlight_get(struct led_classdev *led_cdev)
{
        int ret;
        struct k90_led *led = container_of(led_cdev, struct k90_led, cdev);
        struct device *dev = led->cdev.dev->parent;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);
        int brightness;
        char *data;

        data = kmalloc(8, GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0),
                              K90_REQUEST_STATUS,
                              USB_DIR_IN | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, 0, 0, data, 8,
                              USB_CTRL_SET_TIMEOUT);
        if (ret < 5) {
                dev_warn(dev, "Failed to get K90 initial state (error %d).\n",
                         ret);
                ret = -EIO;
                goto out;
        }
        brightness = data[4];
        if (brightness < 0 || brightness > 3) {
                dev_warn(dev,
                         "Read invalid backlight brightness: %02hhx.\n",
                         data[4]);
                ret = -EIO;
                goto out;
        }
        ret = brightness;
out:
        kfree(data);

        return ret;
}

static enum led_brightness k90_record_led_get(struct led_classdev *led_cdev)
{
        struct k90_led *led = container_of(led_cdev, struct k90_led, cdev);

        return led->brightness;
}

static void k90_brightness_set(struct led_classdev *led_cdev,
                               enum led_brightness brightness)
{
        struct k90_led *led = container_of(led_cdev, struct k90_led, cdev);

        led->brightness = brightness;
        schedule_work(&led->work);
}

static void k90_backlight_work(struct work_struct *work)
{
        int ret;
        struct k90_led *led = container_of(work, struct k90_led, work);
        struct device *dev;
        struct usb_interface *usbif;
        struct usb_device *usbdev;

        if (led->removed)
                return;

        dev = led->cdev.dev->parent;
        usbif = to_usb_interface(dev->parent);
        usbdev = interface_to_usbdev(usbif);

        ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
                              K90_REQUEST_BRIGHTNESS,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, led->brightness, 0,
                              NULL, 0, USB_CTRL_SET_TIMEOUT);
        if (ret != 0)
                dev_warn(dev, "Failed to set backlight brightness (error: %d).\n",
                         ret);
}

static void k90_record_led_work(struct work_struct *work)
{
        int ret;
        struct k90_led *led = container_of(work, struct k90_led, work);
        struct device *dev;
        struct usb_interface *usbif;
        struct usb_device *usbdev;
        int value;

        if (led->removed)
                return;

        dev = led->cdev.dev->parent;
        usbif = to_usb_interface(dev->parent);
        usbdev = interface_to_usbdev(usbif);

        if (led->brightness > 0)
                value = K90_MACRO_LED_ON;
        else
                value = K90_MACRO_LED_OFF;

        ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
                              K90_REQUEST_MACRO_MODE,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, value, 0, NULL, 0,
                              USB_CTRL_SET_TIMEOUT);
        if (ret != 0)
                dev_warn(dev, "Failed to set record LED state (error: %d).\n",
                         ret);
}

/*
 * Keyboard attributes
 */

static ssize_t k90_show_macro_mode(struct device *dev,
                                   struct device_attribute *attr, char *buf)
{
        int ret;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);
        const char *macro_mode;
        char *data;

        data = kmalloc(2, GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0),
                              K90_REQUEST_GET_MODE,
                              USB_DIR_IN | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, 0, 0, data, 2,
                              USB_CTRL_SET_TIMEOUT);
        if (ret < 1) {
                dev_warn(dev, "Failed to get K90 initial mode (error %d).\n",
                         ret);
                ret = -EIO;
                goto out;
        }

        switch (data[0]) {
        case K90_MACRO_MODE_HW:
                macro_mode = "HW";
                break;

        case K90_MACRO_MODE_SW:
                macro_mode = "SW";
                break;
        default:
                dev_warn(dev, "K90 in unknown mode: %02hhx.\n",
                         data[0]);
                ret = -EIO;
                goto out;
        }

        ret = snprintf(buf, PAGE_SIZE, "%s\n", macro_mode);
out:
        kfree(data);

        return ret;
}

static ssize_t k90_store_macro_mode(struct device *dev,
                                    struct device_attribute *attr,
                                    const char *buf, size_t count)
{
        int ret;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);
        __u16 value;

        if (strncmp(buf, "SW", 2) == 0)
                value = K90_MACRO_MODE_SW;
        else if (strncmp(buf, "HW", 2) == 0)
                value = K90_MACRO_MODE_HW;
        else
                return -EINVAL;

        ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
                              K90_REQUEST_MACRO_MODE,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, value, 0, NULL, 0,
                              USB_CTRL_SET_TIMEOUT);
        if (ret != 0) {
                dev_warn(dev, "Failed to set macro mode.\n");
                return ret;
        }

        return count;
}

static ssize_t k90_show_current_profile(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        int ret;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);
        int current_profile;
        char *data;

        data = kmalloc(8, GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0),
                              K90_REQUEST_STATUS,
                              USB_DIR_IN | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, 0, 0, data, 8,
                              USB_CTRL_SET_TIMEOUT);
        if (ret < 8) {
                dev_warn(dev, "Failed to get K90 initial state (error %d).\n",
                         ret);
                ret = -EIO;
                goto out;
        }
        current_profile = data[7];
        if (current_profile < 1 || current_profile > 3) {
                dev_warn(dev, "Read invalid current profile: %02hhx.\n",
                         data[7]);
                ret = -EIO;
                goto out;
        }

        ret = snprintf(buf, PAGE_SIZE, "%d\n", current_profile);
out:
        kfree(data);

        return ret;
}

static ssize_t k90_store_current_profile(struct device *dev,
                                         struct device_attribute *attr,
                                         const char *buf, size_t count)
{
        int ret;
        struct usb_interface *usbif = to_usb_interface(dev->parent);
        struct usb_device *usbdev = interface_to_usbdev(usbif);
        int profile;

        if (kstrtoint(buf, 10, &profile))
                return -EINVAL;
        if (profile < 1 || profile > 3)
                return -EINVAL;

        ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
                              K90_REQUEST_PROFILE,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_DEVICE, profile, 0, NULL, 0,
                              USB_CTRL_SET_TIMEOUT);
        if (ret != 0) {
                dev_warn(dev, "Failed to change current profile (error %d).\n",
                         ret);
                return ret;
        }

        return count;
}

static DEVICE_ATTR(macro_mode, 0644, k90_show_macro_mode, k90_store_macro_mode);
static DEVICE_ATTR(current_profile, 0644, k90_show_current_profile,
                   k90_store_current_profile);

static struct attribute *k90_attrs[] = {
        &dev_attr_macro_mode.attr,
        &dev_attr_current_profile.attr,
        NULL
};

static const struct attribute_group k90_attr_group = {
        .attrs = k90_attrs,
};

/*
 * Driver functions
 */

static int k90_init_backlight(struct hid_device *dev)
{
        int ret;
        struct corsair_drvdata *drvdata = hid_get_drvdata(dev);
        size_t name_sz;
        char *name;

        drvdata->backlight = kzalloc(sizeof(struct k90_led), GFP_KERNEL);
        if (!drvdata->backlight) {
                ret = -ENOMEM;
                goto fail_backlight_alloc;
        }

        name_sz =
            strlen(dev_name(&dev->dev)) + sizeof(K90_BACKLIGHT_LED_SUFFIX);
        name = kzalloc(name_sz, GFP_KERNEL);
        if (!name) {
                ret = -ENOMEM;
                goto fail_name_alloc;
        }
        snprintf(name, name_sz, "%s" K90_BACKLIGHT_LED_SUFFIX,
                 dev_name(&dev->dev));
        drvdata->backlight->removed = false;
        drvdata->backlight->cdev.name = name;
        drvdata->backlight->cdev.max_brightness = 3;
        drvdata->backlight->cdev.brightness_set = k90_brightness_set;
        drvdata->backlight->cdev.brightness_get = k90_backlight_get;
        INIT_WORK(&drvdata->backlight->work, k90_backlight_work);
        ret = led_classdev_register(&dev->dev, &drvdata->backlight->cdev);
        if (ret != 0)
                goto fail_register_cdev;

        return 0;

fail_register_cdev:
        kfree(drvdata->backlight->cdev.name);
fail_name_alloc:
        kfree(drvdata->backlight);
        drvdata->backlight = NULL;
fail_backlight_alloc:
        return ret;
}

static int k90_init_macro_functions(struct hid_device *dev)
{
        int ret;
        struct corsair_drvdata *drvdata = hid_get_drvdata(dev);
        struct k90_drvdata *k90;
        size_t name_sz;
        char *name;

        k90 = kzalloc(sizeof(struct k90_drvdata), GFP_KERNEL);
        if (!k90) {
                ret = -ENOMEM;
                goto fail_drvdata;
        }
        drvdata->k90 = k90;

        /* Init LED device for record LED */
        name_sz = strlen(dev_name(&dev->dev)) + sizeof(K90_RECORD_LED_SUFFIX);
        name = kzalloc(name_sz, GFP_KERNEL);
        if (!name) {
                ret = -ENOMEM;
                goto fail_record_led_alloc;
        }
        snprintf(name, name_sz, "%s" K90_RECORD_LED_SUFFIX,
                 dev_name(&dev->dev));
        k90->record_led.removed = false;
        k90->record_led.cdev.name = name;
        k90->record_led.cdev.max_brightness = 1;
        k90->record_led.cdev.brightness_set = k90_brightness_set;
        k90->record_led.cdev.brightness_get = k90_record_led_get;
        INIT_WORK(&k90->record_led.work, k90_record_led_work);
        k90->record_led.brightness = 0;
        ret = led_classdev_register(&dev->dev, &k90->record_led.cdev);
        if (ret != 0)
                goto fail_record_led;

        /* Init attributes */
        ret = sysfs_create_group(&dev->dev.kobj, &k90_attr_group);
        if (ret != 0)
                goto fail_sysfs;

        return 0;

fail_sysfs:
        k90->record_led.removed = true;
        led_classdev_unregister(&k90->record_led.cdev);
        cancel_work_sync(&k90->record_led.work);
fail_record_led:
        kfree(k90->record_led.cdev.name);
fail_record_led_alloc:
        kfree(k90);
fail_drvdata:
        drvdata->k90 = NULL;
        return ret;
}

static void k90_cleanup_backlight(struct hid_device *dev)
{
        struct corsair_drvdata *drvdata = hid_get_drvdata(dev);

        if (drvdata->backlight) {
                drvdata->backlight->removed = true;
                led_classdev_unregister(&drvdata->backlight->cdev);
                cancel_work_sync(&drvdata->backlight->work);
                kfree(drvdata->backlight->cdev.name);
                kfree(drvdata->backlight);
        }
}

static void k90_cleanup_macro_functions(struct hid_device *dev)
{
        struct corsair_drvdata *drvdata = hid_get_drvdata(dev);
        struct k90_drvdata *k90 = drvdata->k90;

        if (k90) {
                sysfs_remove_group(&dev->dev.kobj, &k90_attr_group);

                k90->record_led.removed = true;
                led_classdev_unregister(&k90->record_led.cdev);
                cancel_work_sync(&k90->record_led.work);
                kfree(k90->record_led.cdev.name);

                kfree(k90);
        }
}

static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id)
{
        int ret;
        unsigned long quirks = id->driver_data;
        struct corsair_drvdata *drvdata;
        struct usb_interface *usbif;

        if (!hid_is_usb(dev))
                return -EINVAL;

        usbif = to_usb_interface(dev->dev.parent);

        drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata),
                               GFP_KERNEL);
        if (drvdata == NULL)
                return -ENOMEM;
        drvdata->quirks = quirks;
        hid_set_drvdata(dev, drvdata);

        ret = hid_parse(dev);
        if (ret != 0) {
                hid_err(dev, "parse failed\n");
                return ret;
        }
        ret = hid_hw_start(dev, HID_CONNECT_DEFAULT);
        if (ret != 0) {
                hid_err(dev, "hw start failed\n");
                return ret;
        }

        if (usbif->cur_altsetting->desc.bInterfaceNumber == 0) {
                if (quirks & CORSAIR_USE_K90_MACRO) {
                        ret = k90_init_macro_functions(dev);
                        if (ret != 0)
                                hid_warn(dev, "Failed to initialize K90 macro functions.\n");
                }
                if (quirks & CORSAIR_USE_K90_BACKLIGHT) {
                        ret = k90_init_backlight(dev);
                        if (ret != 0)
                                hid_warn(dev, "Failed to initialize K90 backlight.\n");
                }
        }

        return 0;
}

static void corsair_remove(struct hid_device *dev)
{
        k90_cleanup_macro_functions(dev);
        k90_cleanup_backlight(dev);

        hid_hw_stop(dev);
}

static int corsair_event(struct hid_device *dev, struct hid_field *field,
                         struct hid_usage *usage, __s32 value)
{
        struct corsair_drvdata *drvdata = hid_get_drvdata(dev);

        if (!drvdata->k90)
                return 0;

        switch (usage->hid & HID_USAGE) {
        case CORSAIR_USAGE_MACRO_RECORD_START:
                drvdata->k90->record_led.brightness = 1;
                break;
        case CORSAIR_USAGE_MACRO_RECORD_STOP:
                drvdata->k90->record_led.brightness = 0;
                break;
        default:
                break;
        }

        return 0;
}

static int corsair_input_mapping(struct hid_device *dev,
                                 struct hid_input *input,
                                 struct hid_field *field,
                                 struct hid_usage *usage, unsigned long **bit,
                                 int *max)
{
        int gkey;

        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD)
                return 0;

        gkey = corsair_usage_to_gkey(usage->hid & HID_USAGE);
        if (gkey != 0) {
                hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                    corsair_gkey_map[gkey - 1]);
                return 1;
        }
        if ((usage->hid & HID_USAGE) >= CORSAIR_USAGE_SPECIAL_MIN &&
            (usage->hid & HID_USAGE) <= CORSAIR_USAGE_SPECIAL_MAX) {
                switch (usage->hid & HID_USAGE) {
                case CORSAIR_USAGE_MACRO_RECORD_START:
                        hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                            corsair_record_keycodes[0]);
                        return 1;

                case CORSAIR_USAGE_MACRO_RECORD_STOP:
                        hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                            corsair_record_keycodes[1]);
                        return 1;

                case CORSAIR_USAGE_M1:
                        hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                            corsair_profile_keycodes[0]);
                        return 1;

                case CORSAIR_USAGE_M2:
                        hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                            corsair_profile_keycodes[1]);
                        return 1;

                case CORSAIR_USAGE_M3:
                        hid_map_usage_clear(input, usage, bit, max, EV_KEY,
                                            corsair_profile_keycodes[2]);
                        return 1;

                default:
                        return -1;
                }
        }

        return 0;
}

/*
 * The report descriptor of some of the Corsair gaming mice is
 * non parseable as they define two consecutive Logical Minimum for
 * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
 * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
 * prevents poper parsing of the report descriptor due Logical
 * Minimum being larger than Logical Maximum.
 *
 * This driver fixes the report descriptor for:
 * - USB ID 1b1c:1b34, sold as GLAIVE RGB Gaming mouse
 * - USB ID 1b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
 */

static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
        unsigned int *rsize)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);

        if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
                /*
                 * Corsair GLAIVE RGB and Scimitar RGB Pro report descriptor is
                 * broken and defines two different Logical Minimum for the
                 * Consumer Application. The byte 77 should be a 0x26 defining
                 * a 16 bits integer for the Logical Maximum but it is a 0x16
                 * instead (Logical Minimum)
                 */
                switch (hdev->product) {
                case USB_DEVICE_ID_CORSAIR_GLAIVE_RGB:
                case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
                        if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
                        && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
                                hid_info(hdev, "Fixing up report descriptor\n");
                                rdesc[77] = 0x26;
                        }
                        break;
                }

        }
        return rdesc;
}

static const struct hid_device_id corsair_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
                .driver_data = CORSAIR_USE_K90_MACRO |
                               CORSAIR_USE_K90_BACKLIGHT },
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
            USB_DEVICE_ID_CORSAIR_GLAIVE_RGB) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
        /*
         * Vengeance K70 and K70 RAPIDFIRE share product IDs.
         */
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
            USB_DEVICE_ID_CORSAIR_K70R) },
        {}
};

MODULE_DEVICE_TABLE(hid, corsair_devices);

static struct hid_driver corsair_driver = {
        .name = "corsair",
        .id_table = corsair_devices,
        .probe = corsair_probe,
        .event = corsair_event,
        .remove = corsair_remove,
        .input_mapping = corsair_input_mapping,
        .report_fixup = corsair_mouse_report_fixup,
};

module_hid_driver(corsair_driver);

MODULE_LICENSE("GPL");
/* Original K90 driver author */
MODULE_AUTHOR("Clement Vuchener");
/* Scimitar PRO RGB driver author */
MODULE_AUTHOR("Oscar Campos");
MODULE_DESCRIPTION("HID driver for Corsair devices");

















  518 



  269 



  510 


  510 


  517 





  111 




  110 






  335 



  269 




  323 


  335 



  336 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// SPDX-License-Identifier: GPL-2.0
#include <linux/memblock.h>
#include <linux/mmdebug.h>
#include <linux/export.h>
#include <linux/mm.h>

#include <asm/page.h>
#include <linux/vmalloc.h>

#include "physaddr.h"

#ifdef CONFIG_X86_64

#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
        unsigned long y = x - __START_KERNEL_map;

        /* use the carry flag to determine if x was < __START_KERNEL_map */
        if (unlikely(x > y)) {
                x = y + phys_base;

                VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
        } else {
                x = y + (__START_KERNEL_map - PAGE_OFFSET);

                /* carry flag will be set if starting x was >= PAGE_OFFSET */
                VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x));
        }

        return x;
}
EXPORT_SYMBOL(__phys_addr);

unsigned long __phys_addr_symbol(unsigned long x)
{
        unsigned long y = x - __START_KERNEL_map;

        /* only check upper bounds since lower bounds will trigger carry */
        VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);

        return y + phys_base;
}
EXPORT_SYMBOL(__phys_addr_symbol);
#endif

bool __virt_addr_valid(unsigned long x)
{
        unsigned long y = x - __START_KERNEL_map;

        /* use the carry flag to determine if x was < __START_KERNEL_map */
        if (unlikely(x > y)) {
                x = y + phys_base;

                if (y >= KERNEL_IMAGE_SIZE)
                        return false;
        } else {
                x = y + (__START_KERNEL_map - PAGE_OFFSET);

                /* carry flag will be set if starting x was >= PAGE_OFFSET */
                if ((x > y) || !phys_addr_valid(x))
                        return false;
        }

        return pfn_valid(x >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);

#else

#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
        unsigned long phys_addr = x - PAGE_OFFSET;
        /* VMALLOC_* aren't constants  */
        VIRTUAL_BUG_ON(x < PAGE_OFFSET);
        VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
        /* max_low_pfn is set early, but not _that_ early */
        if (max_low_pfn) {
                VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn);
                BUG_ON(slow_virt_to_phys((void *)x) != phys_addr);
        }
        return phys_addr;
}
EXPORT_SYMBOL(__phys_addr);
#endif

bool __virt_addr_valid(unsigned long x)
{
        if (x < PAGE_OFFSET)
                return false;
        if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
                return false;
        if (x >= FIXADDR_START)
                return false;
        return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
}
EXPORT_SYMBOL(__virt_addr_valid);

#endif        /* CONFIG_X86_64 */





















  295 
  296 





  292 
  296 

  294 


















  296 









  294 






  294 






  296 

  296 
























  296 





  295 
  295 


  295 



  294 




  296 

  296 





  294 



  293 
  296 



























  294 


  294 

  296 

  296 

  296 
  296 
  296 
  295 
  296 




  296 

  293 
  296 




  294 

  295 


  292 


  294 


































  293 


  296 


  296 




















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/fs_struct.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/prefetch.h>
#include "mount.h"
#include "internal.h"

struct prepend_buffer {
        char *buf;
        int len;
};
#define DECLARE_BUFFER(__name, __buf, __len) \
        struct prepend_buffer __name = {.buf = __buf + __len, .len = __len}

static char *extract_string(struct prepend_buffer *p)
{
        if (likely(p->len >= 0))
                return p->buf;
        return ERR_PTR(-ENAMETOOLONG);
}

static bool prepend_char(struct prepend_buffer *p, unsigned char c)
{
        if (likely(p->len > 0)) {
                p->len--;
                *--p->buf = c;
                return true;
        }
        p->len = -1;
        return false;
}

/*
 * The source of the prepend data can be an optimistic load
 * of a dentry name and length. And because we don't hold any
 * locks, the length and the pointer to the name may not be
 * in sync if a concurrent rename happens, and the kernel
 * copy might fault as a result.
 *
 * The end result will correct itself when we check the
 * rename sequence count, but we need to be able to handle
 * the fault gracefully.
 */
static bool prepend_copy(void *dst, const void *src, int len)
{
        if (unlikely(copy_from_kernel_nofault(dst, src, len))) {
                memset(dst, 'x', len);
                return false;
        }
        return true;
}

static bool prepend(struct prepend_buffer *p, const char *str, int namelen)
{
        // Already overflowed?
        if (p->len < 0)
                return false;

        // Will overflow?
        if (p->len < namelen) {
                // Fill as much as possible from the end of the name
                str += namelen - p->len;
                p->buf -= p->len;
                prepend_copy(p->buf, str, p->len);
                p->len = -1;
                return false;
        }

        // Fits fully
        p->len -= namelen;
        p->buf -= namelen;
        return prepend_copy(p->buf, str, namelen);
}

/**
 * prepend_name - prepend a pathname in front of current buffer pointer
 * @p: prepend buffer which contains buffer pointer and allocated length
 * @name: name string and length qstr structure
 *
 * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
 * make sure that either the old or the new name pointer and length are
 * fetched. However, there may be mismatch between length and pointer.
 * But since the length cannot be trusted, we need to copy the name very
 * carefully when doing the prepend_copy(). It also prepends "/" at
 * the beginning of the name. The sequence number check at the caller will
 * retry it again when a d_move() does happen. So any garbage in the buffer
 * due to mismatched pointer and length will be discarded.
 *
 * Load acquire is needed to make sure that we see the new name data even
 * if we might get the length wrong.
 */
static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
{
        const char *dname = smp_load_acquire(&name->name); /* ^^^ */
        u32 dlen = READ_ONCE(name->len);

        return prepend(p, dname, dlen) && prepend_char(p, '/');
}

static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
                          const struct path *root, struct prepend_buffer *p)
{
        while (dentry != root->dentry || &mnt->mnt != root->mnt) {
                const struct dentry *parent = READ_ONCE(dentry->d_parent);

                if (dentry == mnt->mnt.mnt_root) {
                        struct mount *m = READ_ONCE(mnt->mnt_parent);
                        struct mnt_namespace *mnt_ns;

                        if (likely(mnt != m)) {
                                dentry = READ_ONCE(mnt->mnt_mountpoint);
                                mnt = m;
                                continue;
                        }
                        /* Global root */
                        mnt_ns = READ_ONCE(mnt->mnt_ns);
                        /* open-coded is_mounted() to use local mnt_ns */
                        if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
                                return 1;        // absolute root
                        else
                                return 2;        // detached or not attached yet
                }

                if (unlikely(dentry == parent))
                        /* Escaped? */
                        return 3;

                prefetch(parent);
                if (!prepend_name(p, &dentry->d_name))
                        break;
                dentry = parent;
        }
        return 0;
}

/**
 * prepend_path - Prepend path string to a buffer
 * @path: the dentry/vfsmount to report
 * @root: root vfsmnt/dentry
 * @p: prepend buffer which contains buffer pointer and allocated length
 *
 * The function will first try to write out the pathname without taking any
 * lock other than the RCU read lock to make sure that dentries won't go away.
 * It only checks the sequence number of the global rename_lock as any change
 * in the dentry's d_seq will be preceded by changes in the rename_lock
 * sequence number. If the sequence number had been changed, it will restart
 * the whole pathname back-tracing sequence again by taking the rename_lock.
 * In this case, there is no need to take the RCU read lock as the recursive
 * parent pointer references will keep the dentry chain alive as long as no
 * rename operation is performed.
 */
static int prepend_path(const struct path *path,
                        const struct path *root,
                        struct prepend_buffer *p)
{
        unsigned seq, m_seq = 0;
        struct prepend_buffer b;
        int error;

        rcu_read_lock();
restart_mnt:
        read_seqbegin_or_lock(&mount_lock, &m_seq);
        seq = 0;
        rcu_read_lock();
restart:
        b = *p;
        read_seqbegin_or_lock(&rename_lock, &seq);
        error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
        if (!(seq & 1))
                rcu_read_unlock();
        if (need_seqretry(&rename_lock, seq)) {
                seq = 1;
                goto restart;
        }
        done_seqretry(&rename_lock, seq);

        if (!(m_seq & 1))
                rcu_read_unlock();
        if (need_seqretry(&mount_lock, m_seq)) {
                m_seq = 1;
                goto restart_mnt;
        }
        done_seqretry(&mount_lock, m_seq);

        if (unlikely(error == 3))
                b = *p;

        if (b.len == p->len)
                prepend_char(&b, '/');

        *p = b;
        return error;
}

/**
 * __d_path - return the path of a dentry
 * @path: the dentry/vfsmount to report
 * @root: root vfsmnt/dentry
 * @buf: buffer to return value in
 * @buflen: buffer length
 *
 * Convert a dentry into an ASCII path name.
 *
 * Returns a pointer into the buffer or an error code if the
 * path was too long.
 *
 * "buflen" should be positive.
 *
 * If the path is not reachable from the supplied root, return %NULL.
 */
char *__d_path(const struct path *path,
               const struct path *root,
               char *buf, int buflen)
{
        DECLARE_BUFFER(b, buf, buflen);

        prepend_char(&b, 0);
        if (unlikely(prepend_path(path, root, &b) > 0))
                return NULL;
        return extract_string(&b);
}

char *d_absolute_path(const struct path *path,
               char *buf, int buflen)
{
        struct path root = {};
        DECLARE_BUFFER(b, buf, buflen);

        prepend_char(&b, 0);
        if (unlikely(prepend_path(path, &root, &b) > 1))
                return ERR_PTR(-EINVAL);
        return extract_string(&b);
}

static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
{
        unsigned seq;

        do {
                seq = read_seqcount_begin(&fs->seq);
                *root = fs->root;
        } while (read_seqcount_retry(&fs->seq, seq));
}

/**
 * d_path - return the path of a dentry
 * @path: path to report
 * @buf: buffer to return value in
 * @buflen: buffer length
 *
 * Convert a dentry into an ASCII path name. If the entry has been deleted
 * the string " (deleted)" is appended. Note that this is ambiguous.
 *
 * Returns a pointer into the buffer or an error code if the path was
 * too long. Note: Callers should use the returned pointer, not the passed
 * in buffer, to use the name! The implementation often starts at an offset
 * into the buffer, and may leave 0 bytes at the start.
 *
 * "buflen" should be positive.
 */
char *d_path(const struct path *path, char *buf, int buflen)
{
        DECLARE_BUFFER(b, buf, buflen);
        struct path root;

        /*
         * We have various synthetic filesystems that never get mounted.  On
         * these filesystems dentries are never used for lookup purposes, and
         * thus don't need to be hashed.  They also don't need a name until a
         * user wants to identify the object in /proc/pid/fd/.  The little hack
         * below allows us to generate a name for these objects on demand:
         *
         * Some pseudo inodes are mountable.  When they are mounted
         * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
         * and instead have d_path return the mounted path.
         */
        if (path->dentry->d_op && path->dentry->d_op->d_dname &&
            (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);

        rcu_read_lock();
        get_fs_root_rcu(current->fs, &root);
        if (unlikely(d_unlinked(path->dentry)))
                prepend(&b, " (deleted)", 11);
        else
                prepend_char(&b, 0);
        prepend_path(path, &root, &b);
        rcu_read_unlock();

        return extract_string(&b);
}
EXPORT_SYMBOL(d_path);

/*
 * Helper function for dentry_operations.d_dname() members
 */
char *dynamic_dname(char *buffer, int buflen, const char *fmt, ...)
{
        va_list args;
        char temp[64];
        int sz;

        va_start(args, fmt);
        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
        va_end(args);

        if (sz > sizeof(temp) || sz > buflen)
                return ERR_PTR(-ENAMETOOLONG);

        buffer += buflen - sz;
        return memcpy(buffer, temp, sz);
}

char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
{
        DECLARE_BUFFER(b, buffer, buflen);
        /* these dentries are never renamed, so d_lock is not needed */
        prepend(&b, " (deleted)", 11);
        prepend(&b, dentry->d_name.name, dentry->d_name.len);
        prepend_char(&b, '/');
        return extract_string(&b);
}

/*
 * Write full pathname from the root of the filesystem into the buffer.
 */
static char *__dentry_path(const struct dentry *d, struct prepend_buffer *p)
{
        const struct dentry *dentry;
        struct prepend_buffer b;
        int seq = 0;

        rcu_read_lock();
restart:
        dentry = d;
        b = *p;
        read_seqbegin_or_lock(&rename_lock, &seq);
        while (!IS_ROOT(dentry)) {
                const struct dentry *parent = dentry->d_parent;

                prefetch(parent);
                if (!prepend_name(&b, &dentry->d_name))
                        break;
                dentry = parent;
        }
        if (!(seq & 1))
                rcu_read_unlock();
        if (need_seqretry(&rename_lock, seq)) {
                seq = 1;
                goto restart;
        }
        done_seqretry(&rename_lock, seq);
        if (b.len == p->len)
                prepend_char(&b, '/');
        return extract_string(&b);
}

char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
{
        DECLARE_BUFFER(b, buf, buflen);

        prepend_char(&b, 0);
        return __dentry_path(dentry, &b);
}
EXPORT_SYMBOL(dentry_path_raw);

char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
{
        DECLARE_BUFFER(b, buf, buflen);

        if (unlikely(d_unlinked(dentry)))
                prepend(&b, "//deleted", 10);
        else
                prepend_char(&b, 0);
        return __dentry_path(dentry, &b);
}

static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
                                    struct path *pwd)
{
        unsigned seq;

        do {
                seq = read_seqcount_begin(&fs->seq);
                *root = fs->root;
                *pwd = fs->pwd;
        } while (read_seqcount_retry(&fs->seq, seq));
}

/*
 * NOTE! The user-level library version returns a
 * character pointer. The kernel system call just
 * returns the length of the buffer filled (which
 * includes the ending '\0' character), or a negative
 * error value. So libc would do something like
 *
 *        char *getcwd(char * buf, size_t size)
 *        {
 *                int retval;
 *
 *                retval = sys_getcwd(buf, size);
 *                if (retval >= 0)
 *                        return buf;
 *                errno = -retval;
 *                return NULL;
 *        }
 */
SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
{
        int error;
        struct path pwd, root;
        char *page = __getname();

        if (!page)
                return -ENOMEM;

        rcu_read_lock();
        get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);

        if (unlikely(d_unlinked(pwd.dentry))) {
                rcu_read_unlock();
                error = -ENOENT;
        } else {
                unsigned len;
                DECLARE_BUFFER(b, page, PATH_MAX);

                prepend_char(&b, 0);
                if (unlikely(prepend_path(&pwd, &root, &b) > 0))
                        prepend(&b, "(unreachable)", 13);
                rcu_read_unlock();

                len = PATH_MAX - b.len;
                if (unlikely(len > PATH_MAX))
                        error = -ENAMETOOLONG;
                else if (unlikely(len > size))
                        error = -ERANGE;
                else if (copy_to_user(buf, b.buf, len))
                        error = -EFAULT;
                else
                        error = len;
        }
        __putname(page);
        return error;
}






























































































































































































































    6 











    6 
















   35 











   35 
















  230 











  231 











































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
// SPDX-License-Identifier: GPL-2.0+
/*
 * A wrapper for multiple PHYs which passes all phy_* function calls to
 * multiple (actual) PHY devices. This is comes handy when initializing
 * all PHYs on a HCD and to keep them all in the same state.
 *
 * Copyright (C) 2018 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
 */

#include <linux/device.h>
#include <linux/list.h>
#include <linux/phy/phy.h>
#include <linux/of.h>

#include "phy.h"

struct usb_phy_roothub {
        struct phy                *phy;
        struct list_head        list;
};

/* Allocate the roothub_entry by specific name of phy */
static int usb_phy_roothub_add_phy_by_name(struct device *dev, const char *name,
                                           struct list_head *list)
{
        struct usb_phy_roothub *roothub_entry;
        struct phy *phy;

        phy = devm_of_phy_get(dev, dev->of_node, name);
        if (IS_ERR(phy))
                return PTR_ERR(phy);

        roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL);
        if (!roothub_entry)
                return -ENOMEM;

        INIT_LIST_HEAD(&roothub_entry->list);

        roothub_entry->phy = phy;

        list_add_tail(&roothub_entry->list, list);

        return 0;
}

static int usb_phy_roothub_add_phy(struct device *dev, int index,
                                   struct list_head *list)
{
        struct usb_phy_roothub *roothub_entry;
        struct phy *phy;

        phy = devm_of_phy_get_by_index(dev, dev->of_node, index);
        if (IS_ERR(phy)) {
                if (PTR_ERR(phy) == -ENODEV)
                        return 0;
                else
                        return PTR_ERR(phy);
        }

        roothub_entry = devm_kzalloc(dev, sizeof(*roothub_entry), GFP_KERNEL);
        if (!roothub_entry)
                return -ENOMEM;

        INIT_LIST_HEAD(&roothub_entry->list);

        roothub_entry->phy = phy;

        list_add_tail(&roothub_entry->list, list);

        return 0;
}

struct usb_phy_roothub *usb_phy_roothub_alloc(struct device *dev)
{
        struct usb_phy_roothub *phy_roothub;
        int i, num_phys, err;

        if (!IS_ENABLED(CONFIG_GENERIC_PHY))
                return NULL;

        num_phys = of_count_phandle_with_args(dev->of_node, "phys",
                                              "#phy-cells");
        if (num_phys <= 0)
                return NULL;

        phy_roothub = devm_kzalloc(dev, sizeof(*phy_roothub), GFP_KERNEL);
        if (!phy_roothub)
                return ERR_PTR(-ENOMEM);

        INIT_LIST_HEAD(&phy_roothub->list);

        if (!usb_phy_roothub_add_phy_by_name(dev, "usb2-phy", &phy_roothub->list))
                return phy_roothub;

        for (i = 0; i < num_phys; i++) {
                err = usb_phy_roothub_add_phy(dev, i, &phy_roothub->list);
                if (err)
                        return ERR_PTR(err);
        }

        return phy_roothub;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_alloc);

/**
 * usb_phy_roothub_alloc_usb3_phy - alloc the roothub
 * @dev: the device of the host controller
 *
 * Allocate the usb phy roothub if the host use a generic usb3-phy.
 *
 * Return: On success, a pointer to the usb_phy_roothub. Otherwise,
 * %NULL if no use usb3 phy or %-ENOMEM if out of memory.
 */
struct usb_phy_roothub *usb_phy_roothub_alloc_usb3_phy(struct device *dev)
{
        struct usb_phy_roothub *phy_roothub;
        int num_phys;

        if (!IS_ENABLED(CONFIG_GENERIC_PHY))
                return NULL;

        num_phys = of_count_phandle_with_args(dev->of_node, "phys",
                                              "#phy-cells");
        if (num_phys <= 0)
                return NULL;

        phy_roothub = devm_kzalloc(dev, sizeof(*phy_roothub), GFP_KERNEL);
        if (!phy_roothub)
                return ERR_PTR(-ENOMEM);

        INIT_LIST_HEAD(&phy_roothub->list);

        if (!usb_phy_roothub_add_phy_by_name(dev, "usb3-phy", &phy_roothub->list))
                return phy_roothub;

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_alloc_usb3_phy);

int usb_phy_roothub_init(struct usb_phy_roothub *phy_roothub)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_init(roothub_entry->phy);
                if (err)
                        goto err_exit_phys;
        }

        return 0;

err_exit_phys:
        list_for_each_entry_continue_reverse(roothub_entry, head, list)
                phy_exit(roothub_entry->phy);

        return err;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_init);

int usb_phy_roothub_exit(struct usb_phy_roothub *phy_roothub)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err, ret = 0;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_exit(roothub_entry->phy);
                if (err)
                        ret = err;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_exit);

int usb_phy_roothub_set_mode(struct usb_phy_roothub *phy_roothub,
                             enum phy_mode mode)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_set_mode(roothub_entry->phy, mode);
                if (err)
                        goto err_out;
        }

        return 0;

err_out:
        list_for_each_entry_continue_reverse(roothub_entry, head, list)
                phy_power_off(roothub_entry->phy);

        return err;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_set_mode);

int usb_phy_roothub_calibrate(struct usb_phy_roothub *phy_roothub)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_calibrate(roothub_entry->phy);
                if (err)
                        return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_calibrate);

/**
 * usb_phy_roothub_notify_connect() - connect notification
 * @phy_roothub: the phy of roothub, if the host use a generic phy.
 * @port: the port index for connect
 *
 * If the phy needs to get connection status, the callback can be used.
 * Returns: %0 if successful, a negative error code otherwise
 */
int usb_phy_roothub_notify_connect(struct usb_phy_roothub *phy_roothub, int port)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_notify_connect(roothub_entry->phy, port);
                if (err)
                        return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_notify_connect);

/**
 * usb_phy_roothub_notify_disconnect() - disconnect notification
 * @phy_roothub: the phy of roothub, if the host use a generic phy.
 * @port: the port index for disconnect
 *
 * If the phy needs to get connection status, the callback can be used.
 * Returns: %0 if successful, a negative error code otherwise
 */
int usb_phy_roothub_notify_disconnect(struct usb_phy_roothub *phy_roothub, int port)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_notify_disconnect(roothub_entry->phy, port);
                if (err)
                        return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_notify_disconnect);

int usb_phy_roothub_power_on(struct usb_phy_roothub *phy_roothub)
{
        struct usb_phy_roothub *roothub_entry;
        struct list_head *head;
        int err;

        if (!phy_roothub)
                return 0;

        head = &phy_roothub->list;

        list_for_each_entry(roothub_entry, head, list) {
                err = phy_power_on(roothub_entry->phy);
                if (err)
                        goto err_out;
        }

        return 0;

err_out:
        list_for_each_entry_continue_reverse(roothub_entry, head, list)
                phy_power_off(roothub_entry->phy);

        return err;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_power_on);

void usb_phy_roothub_power_off(struct usb_phy_roothub *phy_roothub)
{
        struct usb_phy_roothub *roothub_entry;

        if (!phy_roothub)
                return;

        list_for_each_entry_reverse(roothub_entry, &phy_roothub->list, list)
                phy_power_off(roothub_entry->phy);
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_power_off);

int usb_phy_roothub_suspend(struct device *controller_dev,
                            struct usb_phy_roothub *phy_roothub)
{
        usb_phy_roothub_power_off(phy_roothub);

        /* keep the PHYs initialized so the device can wake up the system */
        if (device_may_wakeup(controller_dev))
                return 0;

        return usb_phy_roothub_exit(phy_roothub);
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_suspend);

int usb_phy_roothub_resume(struct device *controller_dev,
                           struct usb_phy_roothub *phy_roothub)
{
        int err;

        /* if the device can't wake up the system _exit was called */
        if (!device_may_wakeup(controller_dev)) {
                err = usb_phy_roothub_init(phy_roothub);
                if (err)
                        return err;
        }

        err = usb_phy_roothub_power_on(phy_roothub);

        /* undo _init if _power_on failed */
        if (err && !device_may_wakeup(controller_dev))
                usb_phy_roothub_exit(phy_roothub);

        return err;
}
EXPORT_SYMBOL_GPL(usb_phy_roothub_resume);













































































    5 
























    4 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#undef TRACE_SYSTEM
#define TRACE_SYSTEM qdisc

#if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_QDISC_H

#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/pkt_sched.h>
#include <net/sch_generic.h>

TRACE_EVENT(qdisc_dequeue,

        TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq,
                 int packets, struct sk_buff *skb),

        TP_ARGS(qdisc, txq, packets, skb),

        TP_STRUCT__entry(
                __field(        struct Qdisc *,                qdisc        )
                __field(const        struct netdev_queue *,        txq        )
                __field(        int,                        packets        )
                __field(        void *,                        skbaddr        )
                __field(        int,                        ifindex        )
                __field(        u32,                        handle        )
                __field(        u32,                        parent        )
                __field(        unsigned long,                txq_state)
        ),

        /* skb==NULL indicate packets dequeued was 0, even when packets==1 */
        TP_fast_assign(
                __entry->qdisc                = qdisc;
                __entry->txq                = txq;
                __entry->packets        = skb ? packets : 0;
                __entry->skbaddr        = skb;
                __entry->ifindex        = txq->dev ? txq->dev->ifindex : 0;
                __entry->handle                = qdisc->handle;
                __entry->parent                = qdisc->parent;
                __entry->txq_state        = txq->state;
        ),

        TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p",
                  __entry->ifindex, __entry->handle, __entry->parent,
                  __entry->txq_state, __entry->packets, __entry->skbaddr )
);

TRACE_EVENT(qdisc_enqueue,

        TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb),

        TP_ARGS(qdisc, txq, skb),

        TP_STRUCT__entry(
                __field(struct Qdisc *, qdisc)
                __field(const struct netdev_queue *, txq)
                __field(void *,        skbaddr)
                __field(int, ifindex)
                __field(u32, handle)
                __field(u32, parent)
        ),

        TP_fast_assign(
                __entry->qdisc = qdisc;
                __entry->txq         = txq;
                __entry->skbaddr = skb;
                __entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
                __entry->handle         = qdisc->handle;
                __entry->parent         = qdisc->parent;
        ),

        TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%p",
                  __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
);

TRACE_EVENT(qdisc_reset,

        TP_PROTO(struct Qdisc *q),

        TP_ARGS(q),

        TP_STRUCT__entry(
                __string(        dev,                qdisc_dev(q)->name        )
                __string(        kind,                q->ops->id                )
                __field(        u32,                parent                        )
                __field(        u32,                handle                        )
        ),

        TP_fast_assign(
                __assign_str(dev, qdisc_dev(q)->name);
                __assign_str(kind, q->ops->id);
                __entry->parent = q->parent;
                __entry->handle = q->handle;
        ),

        TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
                  __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
                  TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);

TRACE_EVENT(qdisc_destroy,

        TP_PROTO(struct Qdisc *q),

        TP_ARGS(q),

        TP_STRUCT__entry(
                __string(        dev,                qdisc_dev(q)->name        )
                __string(        kind,                q->ops->id                )
                __field(        u32,                parent                        )
                __field(        u32,                handle                        )
        ),

        TP_fast_assign(
                __assign_str(dev, qdisc_dev(q)->name);
                __assign_str(kind, q->ops->id);
                __entry->parent = q->parent;
                __entry->handle = q->handle;
        ),

        TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
                  __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
                  TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);

TRACE_EVENT(qdisc_create,

        TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent),

        TP_ARGS(ops, dev, parent),

        TP_STRUCT__entry(
                __string(        dev,                dev->name        )
                __string(        kind,                ops->id                )
                __field(        u32,                parent                )
        ),

        TP_fast_assign(
                __assign_str(dev, dev->name);
                __assign_str(kind, ops->id);
                __entry->parent = parent;
        ),

        TP_printk("dev=%s kind=%s parent=%x:%x",
                  __get_str(dev), __get_str(kind),
                  TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent))
);

#endif /* _TRACE_QDISC_H */

/* This part must be outside protection */
#include <trace/define_trace.h>





























































































































































































































































































































































































   12 
   12 




    4 





   12 





































































































































































































































































































































    7 






    7 


















   24 



    1 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
// SPDX-License-Identifier: GPL-2.0-only
/*
 * lib/bitmap.c
 * Helper functions for bitmap.h.
 */

#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/ctype.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/slab.h>

/**
 * DOC: bitmap introduction
 *
 * bitmaps provide an array of bits, implemented using an
 * array of unsigned longs.  The number of valid bits in a
 * given bitmap does _not_ need to be an exact multiple of
 * BITS_PER_LONG.
 *
 * The possible unused bits in the last, partially used word
 * of a bitmap are 'don't care'.  The implementation makes
 * no particular effort to keep them zero.  It ensures that
 * their value will not affect the results of any operation.
 * The bitmap operations that return Boolean (bitmap_empty,
 * for example) or scalar (bitmap_weight, for example) results
 * carefully filter out these unused bits from impacting their
 * results.
 *
 * The byte ordering of bitmaps is more natural on little
 * endian architectures.  See the big-endian headers
 * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h
 * for the best explanations of this ordering.
 */

bool __bitmap_equal(const unsigned long *bitmap1,
                    const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] != bitmap2[k])
                        return false;

        if (bits % BITS_PER_LONG)
                if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
                        return false;

        return true;
}
EXPORT_SYMBOL(__bitmap_equal);

bool __bitmap_or_equal(const unsigned long *bitmap1,
                       const unsigned long *bitmap2,
                       const unsigned long *bitmap3,
                       unsigned int bits)
{
        unsigned int k, lim = bits / BITS_PER_LONG;
        unsigned long tmp;

        for (k = 0; k < lim; ++k) {
                if ((bitmap1[k] | bitmap2[k]) != bitmap3[k])
                        return false;
        }

        if (!(bits % BITS_PER_LONG))
                return true;

        tmp = (bitmap1[k] | bitmap2[k]) ^ bitmap3[k];
        return (tmp & BITMAP_LAST_WORD_MASK(bits)) == 0;
}

void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
{
        unsigned int k, lim = BITS_TO_LONGS(bits);
        for (k = 0; k < lim; ++k)
                dst[k] = ~src[k];
}
EXPORT_SYMBOL(__bitmap_complement);

/**
 * __bitmap_shift_right - logical right shift of the bits in a bitmap
 *   @dst : destination bitmap
 *   @src : source bitmap
 *   @shift : shift by this many bits
 *   @nbits : bitmap size, in bits
 *
 * Shifting right (dividing) means moving bits in the MS -> LS bit
 * direction.  Zeros are fed into the vacated MS positions and the
 * LS bits shifted off the bottom are lost.
 */
void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
                        unsigned shift, unsigned nbits)
{
        unsigned k, lim = BITS_TO_LONGS(nbits);
        unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
        unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
        for (k = 0; off + k < lim; ++k) {
                unsigned long upper, lower;

                /*
                 * If shift is not word aligned, take lower rem bits of
                 * word above and make them the top rem bits of result.
                 */
                if (!rem || off + k + 1 >= lim)
                        upper = 0;
                else {
                        upper = src[off + k + 1];
                        if (off + k + 1 == lim - 1)
                                upper &= mask;
                        upper <<= (BITS_PER_LONG - rem);
                }
                lower = src[off + k];
                if (off + k == lim - 1)
                        lower &= mask;
                lower >>= rem;
                dst[k] = lower | upper;
        }
        if (off)
                memset(&dst[lim - off], 0, off*sizeof(unsigned long));
}
EXPORT_SYMBOL(__bitmap_shift_right);


/**
 * __bitmap_shift_left - logical left shift of the bits in a bitmap
 *   @dst : destination bitmap
 *   @src : source bitmap
 *   @shift : shift by this many bits
 *   @nbits : bitmap size, in bits
 *
 * Shifting left (multiplying) means moving bits in the LS -> MS
 * direction.  Zeros are fed into the vacated LS bit positions
 * and those MS bits shifted off the top are lost.
 */

void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
                        unsigned int shift, unsigned int nbits)
{
        int k;
        unsigned int lim = BITS_TO_LONGS(nbits);
        unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
        for (k = lim - off - 1; k >= 0; --k) {
                unsigned long upper, lower;

                /*
                 * If shift is not word aligned, take upper rem bits of
                 * word below and make them the bottom rem bits of result.
                 */
                if (rem && k > 0)
                        lower = src[k - 1] >> (BITS_PER_LONG - rem);
                else
                        lower = 0;
                upper = src[k] << rem;
                dst[k + off] = lower | upper;
        }
        if (off)
                memset(dst, 0, off*sizeof(unsigned long));
}
EXPORT_SYMBOL(__bitmap_shift_left);

/**
 * bitmap_cut() - remove bit region from bitmap and right shift remaining bits
 * @dst: destination bitmap, might overlap with src
 * @src: source bitmap
 * @first: start bit of region to be removed
 * @cut: number of bits to remove
 * @nbits: bitmap size, in bits
 *
 * Set the n-th bit of @dst iff the n-th bit of @src is set and
 * n is less than @first, or the m-th bit of @src is set for any
 * m such that @first <= n < nbits, and m = n + @cut.
 *
 * In pictures, example for a big-endian 32-bit architecture:
 *
 * The @src bitmap is::
 *
 *   31                                   63
 *   |                                    |
 *   10000000 11000001 11110010 00010101  10000000 11000001 01110010 00010101
 *                   |  |              |                                    |
 *                  16  14             0                                   32
 *
 * if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is::
 *
 *   31                                   63
 *   |                                    |
 *   10110000 00011000 00110010 00010101  00010000 00011000 00101110 01000010
 *                      |              |                                    |
 *                      14 (bit 17     0                                   32
 *                          from @src)
 *
 * Note that @dst and @src might overlap partially or entirely.
 *
 * This is implemented in the obvious way, with a shift and carry
 * step for each moved bit. Optimisation is left as an exercise
 * for the compiler.
 */
void bitmap_cut(unsigned long *dst, const unsigned long *src,
                unsigned int first, unsigned int cut, unsigned int nbits)
{
        unsigned int len = BITS_TO_LONGS(nbits);
        unsigned long keep = 0, carry;
        int i;

        if (first % BITS_PER_LONG) {
                keep = src[first / BITS_PER_LONG] &
                       (~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG));
        }

        memmove(dst, src, len * sizeof(*dst));

        while (cut--) {
                for (i = first / BITS_PER_LONG; i < len; i++) {
                        if (i < len - 1)
                                carry = dst[i + 1] & 1UL;
                        else
                                carry = 0;

                        dst[i] = (dst[i] >> 1) | (carry << (BITS_PER_LONG - 1));
                }
        }

        dst[first / BITS_PER_LONG] &= ~0UL << (first % BITS_PER_LONG);
        dst[first / BITS_PER_LONG] |= keep;
}
EXPORT_SYMBOL(bitmap_cut);

bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k;
        unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;

        for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & bitmap2[k]);
        if (bits % BITS_PER_LONG)
                result |= (dst[k] = bitmap1[k] & bitmap2[k] &
                           BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
}
EXPORT_SYMBOL(__bitmap_and);

void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k;
        unsigned int nr = BITS_TO_LONGS(bits);

        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] | bitmap2[k];
}
EXPORT_SYMBOL(__bitmap_or);

void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k;
        unsigned int nr = BITS_TO_LONGS(bits);

        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] ^ bitmap2[k];
}
EXPORT_SYMBOL(__bitmap_xor);

bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k;
        unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;

        for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
        if (bits % BITS_PER_LONG)
                result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
                           BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
}
EXPORT_SYMBOL(__bitmap_andnot);

void __bitmap_replace(unsigned long *dst,
                      const unsigned long *old, const unsigned long *new,
                      const unsigned long *mask, unsigned int nbits)
{
        unsigned int k;
        unsigned int nr = BITS_TO_LONGS(nbits);

        for (k = 0; k < nr; k++)
                dst[k] = (old[k] & ~mask[k]) | (new[k] & mask[k]);
}
EXPORT_SYMBOL(__bitmap_replace);

bool __bitmap_intersects(const unsigned long *bitmap1,
                         const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & bitmap2[k])
                        return true;

        if (bits % BITS_PER_LONG)
                if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
                        return true;
        return false;
}
EXPORT_SYMBOL(__bitmap_intersects);

bool __bitmap_subset(const unsigned long *bitmap1,
                     const unsigned long *bitmap2, unsigned int bits)
{
        unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & ~bitmap2[k])
                        return false;

        if (bits % BITS_PER_LONG)
                if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
                        return false;
        return true;
}
EXPORT_SYMBOL(__bitmap_subset);

#define BITMAP_WEIGHT(FETCH, bits)        \
({                                                                                \
        unsigned int __bits = (bits), idx, w = 0;                                \
                                                                                \
        for (idx = 0; idx < __bits / BITS_PER_LONG; idx++)                        \
                w += hweight_long(FETCH);                                        \
                                                                                \
        if (__bits % BITS_PER_LONG)                                                \
                w += hweight_long((FETCH) & BITMAP_LAST_WORD_MASK(__bits));        \
                                                                                \
        w;                                                                        \
})

unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
{
        return BITMAP_WEIGHT(bitmap[idx], bits);
}
EXPORT_SYMBOL(__bitmap_weight);

unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        return BITMAP_WEIGHT(bitmap1[idx] & bitmap2[idx], bits);
}
EXPORT_SYMBOL(__bitmap_weight_and);

unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
                                const unsigned long *bitmap2, unsigned int bits)
{
        return BITMAP_WEIGHT(bitmap1[idx] & ~bitmap2[idx], bits);
}
EXPORT_SYMBOL(__bitmap_weight_andnot);

void __bitmap_set(unsigned long *map, unsigned int start, int len)
{
        unsigned long *p = map + BIT_WORD(start);
        const unsigned int size = start + len;
        int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);

        while (len - bits_to_set >= 0) {
                *p |= mask_to_set;
                len -= bits_to_set;
                bits_to_set = BITS_PER_LONG;
                mask_to_set = ~0UL;
                p++;
        }
        if (len) {
                mask_to_set &= BITMAP_LAST_WORD_MASK(size);
                *p |= mask_to_set;
        }
}
EXPORT_SYMBOL(__bitmap_set);

void __bitmap_clear(unsigned long *map, unsigned int start, int len)
{
        unsigned long *p = map + BIT_WORD(start);
        const unsigned int size = start + len;
        int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);

        while (len - bits_to_clear >= 0) {
                *p &= ~mask_to_clear;
                len -= bits_to_clear;
                bits_to_clear = BITS_PER_LONG;
                mask_to_clear = ~0UL;
                p++;
        }
        if (len) {
                mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
                *p &= ~mask_to_clear;
        }
}
EXPORT_SYMBOL(__bitmap_clear);

/**
 * bitmap_find_next_zero_area_off - find a contiguous aligned zero area
 * @map: The address to base the search on
 * @size: The bitmap size in bits
 * @start: The bitnumber to start searching at
 * @nr: The number of zeroed bits we're looking for
 * @align_mask: Alignment mask for zero area
 * @align_offset: Alignment offset for zero area.
 *
 * The @align_mask should be one less than a power of 2; the effect is that
 * the bit offset of all zero areas this function finds plus @align_offset
 * is multiple of that power of 2.
 */
unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
                                             unsigned long size,
                                             unsigned long start,
                                             unsigned int nr,
                                             unsigned long align_mask,
                                             unsigned long align_offset)
{
        unsigned long index, end, i;
again:
        index = find_next_zero_bit(map, size, start);

        /* Align allocation */
        index = __ALIGN_MASK(index + align_offset, align_mask) - align_offset;

        end = index + nr;
        if (end > size)
                return end;
        i = find_next_bit(map, end, index);
        if (i < end) {
                start = i + 1;
                goto again;
        }
        return index;
}
EXPORT_SYMBOL(bitmap_find_next_zero_area_off);

/**
 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
 *        @buf: pointer to a bitmap
 *        @pos: a bit position in @buf (0 <= @pos < @nbits)
 *        @nbits: number of valid bit positions in @buf
 *
 * Map the bit at position @pos in @buf (of length @nbits) to the
 * ordinal of which set bit it is.  If it is not set or if @pos
 * is not a valid bit position, map to -1.
 *
 * If for example, just bits 4 through 7 are set in @buf, then @pos
 * values 4 through 7 will get mapped to 0 through 3, respectively,
 * and other @pos values will get mapped to -1.  When @pos value 7
 * gets mapped to (returns) @ord value 3 in this example, that means
 * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
 *
 * The bit positions 0 through @bits are valid positions in @buf.
 */
static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigned int nbits)
{
        if (pos >= nbits || !test_bit(pos, buf))
                return -1;

        return bitmap_weight(buf, pos);
}

/**
 * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap
 *        @dst: remapped result
 *        @src: subset to be remapped
 *        @old: defines domain of map
 *        @new: defines range of map
 *        @nbits: number of bits in each of these bitmaps
 *
 * Let @old and @new define a mapping of bit positions, such that
 * whatever position is held by the n-th set bit in @old is mapped
 * to the n-th set bit in @new.  In the more general case, allowing
 * for the possibility that the weight 'w' of @new is less than the
 * weight of @old, map the position of the n-th set bit in @old to
 * the position of the m-th set bit in @new, where m == n % w.
 *
 * If either of the @old and @new bitmaps are empty, or if @src and
 * @dst point to the same location, then this routine copies @src
 * to @dst.
 *
 * The positions of unset bits in @old are mapped to themselves
 * (the identity map).
 *
 * Apply the above specified mapping to @src, placing the result in
 * @dst, clearing any bits previously set in @dst.
 *
 * For example, lets say that @old has bits 4 through 7 set, and
 * @new has bits 12 through 15 set.  This defines the mapping of bit
 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
 * bit positions unchanged.  So if say @src comes into this routine
 * with bits 1, 5 and 7 set, then @dst should leave with bits 1,
 * 13 and 15 set.
 */
void bitmap_remap(unsigned long *dst, const unsigned long *src,
                const unsigned long *old, const unsigned long *new,
                unsigned int nbits)
{
        unsigned int oldbit, w;

        if (dst == src)                /* following doesn't handle inplace remaps */
                return;
        bitmap_zero(dst, nbits);

        w = bitmap_weight(new, nbits);
        for_each_set_bit(oldbit, src, nbits) {
                int n = bitmap_pos_to_ord(old, oldbit, nbits);

                if (n < 0 || w == 0)
                        set_bit(oldbit, dst);        /* identity map */
                else
                        set_bit(find_nth_bit(new, nbits, n % w), dst);
        }
}
EXPORT_SYMBOL(bitmap_remap);

/**
 * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit
 *        @oldbit: bit position to be mapped
 *        @old: defines domain of map
 *        @new: defines range of map
 *        @bits: number of bits in each of these bitmaps
 *
 * Let @old and @new define a mapping of bit positions, such that
 * whatever position is held by the n-th set bit in @old is mapped
 * to the n-th set bit in @new.  In the more general case, allowing
 * for the possibility that the weight 'w' of @new is less than the
 * weight of @old, map the position of the n-th set bit in @old to
 * the position of the m-th set bit in @new, where m == n % w.
 *
 * The positions of unset bits in @old are mapped to themselves
 * (the identity map).
 *
 * Apply the above specified mapping to bit position @oldbit, returning
 * the new bit position.
 *
 * For example, lets say that @old has bits 4 through 7 set, and
 * @new has bits 12 through 15 set.  This defines the mapping of bit
 * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
 * bit positions unchanged.  So if say @oldbit is 5, then this routine
 * returns 13.
 */
int bitmap_bitremap(int oldbit, const unsigned long *old,
                                const unsigned long *new, int bits)
{
        int w = bitmap_weight(new, bits);
        int n = bitmap_pos_to_ord(old, oldbit, bits);
        if (n < 0 || w == 0)
                return oldbit;
        else
                return find_nth_bit(new, bits, n % w);
}
EXPORT_SYMBOL(bitmap_bitremap);

#ifdef CONFIG_NUMA
/**
 * bitmap_onto - translate one bitmap relative to another
 *        @dst: resulting translated bitmap
 *         @orig: original untranslated bitmap
 *         @relmap: bitmap relative to which translated
 *        @bits: number of bits in each of these bitmaps
 *
 * Set the n-th bit of @dst iff there exists some m such that the
 * n-th bit of @relmap is set, the m-th bit of @orig is set, and
 * the n-th bit of @relmap is also the m-th _set_ bit of @relmap.
 * (If you understood the previous sentence the first time your
 * read it, you're overqualified for your current job.)
 *
 * In other words, @orig is mapped onto (surjectively) @dst,
 * using the map { <n, m> | the n-th bit of @relmap is the
 * m-th set bit of @relmap }.
 *
 * Any set bits in @orig above bit number W, where W is the
 * weight of (number of set bits in) @relmap are mapped nowhere.
 * In particular, if for all bits m set in @orig, m >= W, then
 * @dst will end up empty.  In situations where the possibility
 * of such an empty result is not desired, one way to avoid it is
 * to use the bitmap_fold() operator, below, to first fold the
 * @orig bitmap over itself so that all its set bits x are in the
 * range 0 <= x < W.  The bitmap_fold() operator does this by
 * setting the bit (m % W) in @dst, for each bit (m) set in @orig.
 *
 * Example [1] for bitmap_onto():
 *  Let's say @relmap has bits 30-39 set, and @orig has bits
 *  1, 3, 5, 7, 9 and 11 set.  Then on return from this routine,
 *  @dst will have bits 31, 33, 35, 37 and 39 set.
 *
 *  When bit 0 is set in @orig, it means turn on the bit in
 *  @dst corresponding to whatever is the first bit (if any)
 *  that is turned on in @relmap.  Since bit 0 was off in the
 *  above example, we leave off that bit (bit 30) in @dst.
 *
 *  When bit 1 is set in @orig (as in the above example), it
 *  means turn on the bit in @dst corresponding to whatever
 *  is the second bit that is turned on in @relmap.  The second
 *  bit in @relmap that was turned on in the above example was
 *  bit 31, so we turned on bit 31 in @dst.
 *
 *  Similarly, we turned on bits 33, 35, 37 and 39 in @dst,
 *  because they were the 4th, 6th, 8th and 10th set bits
 *  set in @relmap, and the 4th, 6th, 8th and 10th bits of
 *  @orig (i.e. bits 3, 5, 7 and 9) were also set.
 *
 *  When bit 11 is set in @orig, it means turn on the bit in
 *  @dst corresponding to whatever is the twelfth bit that is
 *  turned on in @relmap.  In the above example, there were
 *  only ten bits turned on in @relmap (30..39), so that bit
 *  11 was set in @orig had no affect on @dst.
 *
 * Example [2] for bitmap_fold() + bitmap_onto():
 *  Let's say @relmap has these ten bits set::
 *
 *                40 41 42 43 45 48 53 61 74 95
 *
 *  (for the curious, that's 40 plus the first ten terms of the
 *  Fibonacci sequence.)
 *
 *  Further lets say we use the following code, invoking
 *  bitmap_fold() then bitmap_onto, as suggested above to
 *  avoid the possibility of an empty @dst result::
 *
 *        unsigned long *tmp;        // a temporary bitmap's bits
 *
 *        bitmap_fold(tmp, orig, bitmap_weight(relmap, bits), bits);
 *        bitmap_onto(dst, tmp, relmap, bits);
 *
 *  Then this table shows what various values of @dst would be, for
 *  various @orig's.  I list the zero-based positions of each set bit.
 *  The tmp column shows the intermediate result, as computed by
 *  using bitmap_fold() to fold the @orig bitmap modulo ten
 *  (the weight of @relmap):
 *
 *      =============== ============== =================
 *      @orig           tmp            @dst
 *      0                0             40
 *      1                1             41
 *      9                9             95
 *      10               0             40 [#f1]_
 *      1 3 5 7          1 3 5 7       41 43 48 61
 *      0 1 2 3 4        0 1 2 3 4     40 41 42 43 45
 *      0 9 18 27        0 9 8 7       40 61 74 95
 *      0 10 20 30       0             40
 *      0 11 22 33       0 1 2 3       40 41 42 43
 *      0 12 24 36       0 2 4 6       40 42 45 53
 *      78 102 211       1 2 8         41 42 74 [#f1]_
 *      =============== ============== =================
 *
 * .. [#f1]
 *
 *     For these marked lines, if we hadn't first done bitmap_fold()
 *     into tmp, then the @dst result would have been empty.
 *
 * If either of @orig or @relmap is empty (no set bits), then @dst
 * will be returned empty.
 *
 * If (as explained above) the only set bits in @orig are in positions
 * m where m >= W, (where W is the weight of @relmap) then @dst will
 * once again be returned empty.
 *
 * All bits in @dst not set by the above rule are cleared.
 */
void bitmap_onto(unsigned long *dst, const unsigned long *orig,
                        const unsigned long *relmap, unsigned int bits)
{
        unsigned int n, m;        /* same meaning as in above comment */

        if (dst == orig)        /* following doesn't handle inplace mappings */
                return;
        bitmap_zero(dst, bits);

        /*
         * The following code is a more efficient, but less
         * obvious, equivalent to the loop:
         *        for (m = 0; m < bitmap_weight(relmap, bits); m++) {
         *                n = find_nth_bit(orig, bits, m);
         *                if (test_bit(m, orig))
         *                        set_bit(n, dst);
         *        }
         */

        m = 0;
        for_each_set_bit(n, relmap, bits) {
                /* m == bitmap_pos_to_ord(relmap, n, bits) */
                if (test_bit(m, orig))
                        set_bit(n, dst);
                m++;
        }
}

/**
 * bitmap_fold - fold larger bitmap into smaller, modulo specified size
 *        @dst: resulting smaller bitmap
 *        @orig: original larger bitmap
 *        @sz: specified size
 *        @nbits: number of bits in each of these bitmaps
 *
 * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst.
 * Clear all other bits in @dst.  See further the comment and
 * Example [2] for bitmap_onto() for why and how to use this.
 */
void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                        unsigned int sz, unsigned int nbits)
{
        unsigned int oldbit;

        if (dst == orig)        /* following doesn't handle inplace mappings */
                return;
        bitmap_zero(dst, nbits);

        for_each_set_bit(oldbit, orig, nbits)
                set_bit(oldbit % sz, dst);
}
#endif /* CONFIG_NUMA */

unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags)
{
        return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long),
                             flags);
}
EXPORT_SYMBOL(bitmap_alloc);

unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
{
        return bitmap_alloc(nbits, flags | __GFP_ZERO);
}
EXPORT_SYMBOL(bitmap_zalloc);

unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node)
{
        return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long),
                                  flags, node);
}
EXPORT_SYMBOL(bitmap_alloc_node);

unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node)
{
        return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node);
}
EXPORT_SYMBOL(bitmap_zalloc_node);

void bitmap_free(const unsigned long *bitmap)
{
        kfree(bitmap);
}
EXPORT_SYMBOL(bitmap_free);

static void devm_bitmap_free(void *data)
{
        unsigned long *bitmap = data;

        bitmap_free(bitmap);
}

unsigned long *devm_bitmap_alloc(struct device *dev,
                                 unsigned int nbits, gfp_t flags)
{
        unsigned long *bitmap;
        int ret;

        bitmap = bitmap_alloc(nbits, flags);
        if (!bitmap)
                return NULL;

        ret = devm_add_action_or_reset(dev, devm_bitmap_free, bitmap);
        if (ret)
                return NULL;

        return bitmap;
}
EXPORT_SYMBOL_GPL(devm_bitmap_alloc);

unsigned long *devm_bitmap_zalloc(struct device *dev,
                                  unsigned int nbits, gfp_t flags)
{
        return devm_bitmap_alloc(dev, nbits, flags | __GFP_ZERO);
}
EXPORT_SYMBOL_GPL(devm_bitmap_zalloc);

#if BITS_PER_LONG == 64
/**
 * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
 *        @bitmap: array of unsigned longs, the destination bitmap
 *        @buf: array of u32 (in host byte order), the source bitmap
 *        @nbits: number of bits in @bitmap
 */
void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits)
{
        unsigned int i, halfwords;

        halfwords = DIV_ROUND_UP(nbits, 32);
        for (i = 0; i < halfwords; i++) {
                bitmap[i/2] = (unsigned long) buf[i];
                if (++i < halfwords)
                        bitmap[i/2] |= ((unsigned long) buf[i]) << 32;
        }

        /* Clear tail bits in last word beyond nbits. */
        if (nbits % BITS_PER_LONG)
                bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits);
}
EXPORT_SYMBOL(bitmap_from_arr32);

/**
 * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits
 *        @buf: array of u32 (in host byte order), the dest bitmap
 *        @bitmap: array of unsigned longs, the source bitmap
 *        @nbits: number of bits in @bitmap
 */
void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
{
        unsigned int i, halfwords;

        halfwords = DIV_ROUND_UP(nbits, 32);
        for (i = 0; i < halfwords; i++) {
                buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
                if (++i < halfwords)
                        buf[i] = (u32) (bitmap[i/2] >> 32);
        }

        /* Clear tail bits in last element of array beyond nbits. */
        if (nbits % BITS_PER_LONG)
                buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31));
}
EXPORT_SYMBOL(bitmap_to_arr32);
#endif

#if BITS_PER_LONG == 32
/**
 * bitmap_from_arr64 - copy the contents of u64 array of bits to bitmap
 *        @bitmap: array of unsigned longs, the destination bitmap
 *        @buf: array of u64 (in host byte order), the source bitmap
 *        @nbits: number of bits in @bitmap
 */
void bitmap_from_arr64(unsigned long *bitmap, const u64 *buf, unsigned int nbits)
{
        int n;

        for (n = nbits; n > 0; n -= 64) {
                u64 val = *buf++;

                *bitmap++ = val;
                if (n > 32)
                        *bitmap++ = val >> 32;
        }

        /*
         * Clear tail bits in the last word beyond nbits.
         *
         * Negative index is OK because here we point to the word next
         * to the last word of the bitmap, except for nbits == 0, which
         * is tested implicitly.
         */
        if (nbits % BITS_PER_LONG)
                bitmap[-1] &= BITMAP_LAST_WORD_MASK(nbits);
}
EXPORT_SYMBOL(bitmap_from_arr64);

/**
 * bitmap_to_arr64 - copy the contents of bitmap to a u64 array of bits
 *        @buf: array of u64 (in host byte order), the dest bitmap
 *        @bitmap: array of unsigned longs, the source bitmap
 *        @nbits: number of bits in @bitmap
 */
void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits)
{
        const unsigned long *end = bitmap + BITS_TO_LONGS(nbits);

        while (bitmap < end) {
                *buf = *bitmap++;
                if (bitmap < end)
                        *buf |= (u64)(*bitmap++) << 32;
                buf++;
        }

        /* Clear tail bits in the last element of array beyond nbits. */
        if (nbits % 64)
                buf[-1] &= GENMASK_ULL((nbits - 1) % 64, 0);
}
EXPORT_SYMBOL(bitmap_to_arr64);
#endif































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TIMEKEEPING_H
#define _LINUX_TIMEKEEPING_H

#include <linux/errno.h>
#include <linux/clocksource_ids.h>
#include <linux/ktime.h>

/* Included from linux/ktime.h */

void timekeeping_init(void);
extern int timekeeping_suspended;

/* Architecture timer tick functions: */
extern void legacy_timer_tick(unsigned long ticks);

/*
 * Get and set timeofday
 */
extern int do_settimeofday64(const struct timespec64 *ts);
extern int do_sys_settimeofday64(const struct timespec64 *tv,
                                 const struct timezone *tz);

/*
 * ktime_get() family - read the current time in a multitude of ways.
 *
 * The default time reference is CLOCK_MONOTONIC, starting at
 * boot time but not counting the time spent in suspend.
 * For other references, use the functions with "real", "clocktai",
 * "boottime" and "raw" suffixes.
 *
 * To get the time in a different format, use the ones with
 * "ns", "ts64" and "seconds" suffix.
 *
 * See Documentation/core-api/timekeeping.rst for more details.
 */


/*
 * timespec64 based interfaces
 */
extern void ktime_get_raw_ts64(struct timespec64 *ts);
extern void ktime_get_ts64(struct timespec64 *ts);
extern void ktime_get_real_ts64(struct timespec64 *tv);
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);

void getboottime64(struct timespec64 *ts);

/*
 * time64_t base interfaces
 */
extern time64_t ktime_get_seconds(void);
extern time64_t __ktime_get_real_seconds(void);
extern time64_t ktime_get_real_seconds(void);

/*
 * ktime_t based interfaces
 */

enum tk_offsets {
        TK_OFFS_REAL,
        TK_OFFS_BOOT,
        TK_OFFS_TAI,
        TK_OFFS_MAX,
};

extern ktime_t ktime_get(void);
extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
extern ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs);
extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
extern ktime_t ktime_get_raw(void);
extern u32 ktime_get_resolution_ns(void);

/**
 * ktime_get_real - get the real (wall-) time in ktime_t format
 *
 * Returns: real (wall) time in ktime_t format
 */
static inline ktime_t ktime_get_real(void)
{
        return ktime_get_with_offset(TK_OFFS_REAL);
}

static inline ktime_t ktime_get_coarse_real(void)
{
        return ktime_get_coarse_with_offset(TK_OFFS_REAL);
}

/**
 * ktime_get_boottime - Get monotonic time since boot in ktime_t format
 *
 * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
 * time spent in suspend.
 *
 * Returns: monotonic time since boot in ktime_t format
 */
static inline ktime_t ktime_get_boottime(void)
{
        return ktime_get_with_offset(TK_OFFS_BOOT);
}

static inline ktime_t ktime_get_coarse_boottime(void)
{
        return ktime_get_coarse_with_offset(TK_OFFS_BOOT);
}

/**
 * ktime_get_clocktai - Get the TAI time of day in ktime_t format
 *
 * Returns: the TAI time of day in ktime_t format
 */
static inline ktime_t ktime_get_clocktai(void)
{
        return ktime_get_with_offset(TK_OFFS_TAI);
}

static inline ktime_t ktime_get_coarse_clocktai(void)
{
        return ktime_get_coarse_with_offset(TK_OFFS_TAI);
}

static inline ktime_t ktime_get_coarse(void)
{
        struct timespec64 ts;

        ktime_get_coarse_ts64(&ts);
        return timespec64_to_ktime(ts);
}

static inline u64 ktime_get_coarse_ns(void)
{
        return ktime_to_ns(ktime_get_coarse());
}

static inline u64 ktime_get_coarse_real_ns(void)
{
        return ktime_to_ns(ktime_get_coarse_real());
}

static inline u64 ktime_get_coarse_boottime_ns(void)
{
        return ktime_to_ns(ktime_get_coarse_boottime());
}

static inline u64 ktime_get_coarse_clocktai_ns(void)
{
        return ktime_to_ns(ktime_get_coarse_clocktai());
}

/**
 * ktime_mono_to_real - Convert monotonic time to clock realtime
 * @mono: monotonic time to convert
 *
 * Returns: time converted to realtime clock
 */
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
        return ktime_mono_to_any(mono, TK_OFFS_REAL);
}

/**
 * ktime_get_ns - Get the current time in nanoseconds
 *
 * Returns: current time converted to nanoseconds
 */
static inline u64 ktime_get_ns(void)
{
        return ktime_to_ns(ktime_get());
}

/**
 * ktime_get_real_ns - Get the current real/wall time in nanoseconds
 *
 * Returns: current real time converted to nanoseconds
 */
static inline u64 ktime_get_real_ns(void)
{
        return ktime_to_ns(ktime_get_real());
}

/**
 * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
 *
 * Returns: current boottime converted to nanoseconds
 */
static inline u64 ktime_get_boottime_ns(void)
{
        return ktime_to_ns(ktime_get_boottime());
}

/**
 * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
 *
 * Returns: current TAI time converted to nanoseconds
 */
static inline u64 ktime_get_clocktai_ns(void)
{
        return ktime_to_ns(ktime_get_clocktai());
}

/**
 * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
 *
 * Returns: current raw monotonic time converted to nanoseconds
 */
static inline u64 ktime_get_raw_ns(void)
{
        return ktime_to_ns(ktime_get_raw());
}

extern u64 ktime_get_mono_fast_ns(void);
extern u64 ktime_get_raw_fast_ns(void);
extern u64 ktime_get_boot_fast_ns(void);
extern u64 ktime_get_tai_fast_ns(void);
extern u64 ktime_get_real_fast_ns(void);

/*
 * timespec64/time64_t interfaces utilizing the ktime based ones
 * for API completeness, these could be implemented more efficiently
 * if needed.
 */
static inline void ktime_get_boottime_ts64(struct timespec64 *ts)
{
        *ts = ktime_to_timespec64(ktime_get_boottime());
}

static inline void ktime_get_coarse_boottime_ts64(struct timespec64 *ts)
{
        *ts = ktime_to_timespec64(ktime_get_coarse_boottime());
}

static inline time64_t ktime_get_boottime_seconds(void)
{
        return ktime_divns(ktime_get_coarse_boottime(), NSEC_PER_SEC);
}

static inline void ktime_get_clocktai_ts64(struct timespec64 *ts)
{
        *ts = ktime_to_timespec64(ktime_get_clocktai());
}

static inline void ktime_get_coarse_clocktai_ts64(struct timespec64 *ts)
{
        *ts = ktime_to_timespec64(ktime_get_coarse_clocktai());
}

static inline time64_t ktime_get_clocktai_seconds(void)
{
        return ktime_divns(ktime_get_coarse_clocktai(), NSEC_PER_SEC);
}

/*
 * RTC specific
 */
extern bool timekeeping_rtc_skipsuspend(void);
extern bool timekeeping_rtc_skipresume(void);

extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);

/**
 * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
 * @mono:        Monotonic timestamp
 * @boot:        Boottime timestamp
 * @real:        Realtime timestamp
 */
struct ktime_timestamps {
        u64                mono;
        u64                boot;
        u64                real;
};

/**
 * struct system_time_snapshot - simultaneous raw/real time capture with
 *                                 counter value
 * @cycles:        Clocksource counter value to produce the system times
 * @real:        Realtime system time
 * @raw:        Monotonic raw system time
 * @cs_id:        Clocksource ID
 * @clock_was_set_seq:        The sequence number of clock-was-set events
 * @cs_was_changed_seq:        The sequence number of clocksource change events
 */
struct system_time_snapshot {
        u64                        cycles;
        ktime_t                        real;
        ktime_t                        raw;
        enum clocksource_ids        cs_id;
        unsigned int                clock_was_set_seq;
        u8                        cs_was_changed_seq;
};

/**
 * struct system_device_crosststamp - system/device cross-timestamp
 *                                      (synchronized capture)
 * @device:                Device time
 * @sys_realtime:        Realtime simultaneous with device time
 * @sys_monoraw:        Monotonic raw simultaneous with device time
 */
struct system_device_crosststamp {
        ktime_t device;
        ktime_t sys_realtime;
        ktime_t sys_monoraw;
};

/**
 * struct system_counterval_t - system counter value with the ID of the
 *                                corresponding clocksource
 * @cycles:        System counter value
 * @cs_id:        Clocksource ID corresponding to system counter value. Used by
 *                timekeeping code to verify comparability of two cycle values.
 *                The default ID, CSID_GENERIC, does not identify a specific
 *                clocksource.
 */
struct system_counterval_t {
        u64                        cycles;
        enum clocksource_ids        cs_id;
};

/*
 * Get cross timestamp between system clock and device clock
 */
extern int get_device_system_crosststamp(
                        int (*get_time_fn)(ktime_t *device_time,
                                struct system_counterval_t *system_counterval,
                                void *ctx),
                        void *ctx,
                        struct system_time_snapshot *history,
                        struct system_device_crosststamp *xtstamp);

/*
 * Simultaneously snapshot realtime and monotonic raw clocks
 */
extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);

/* NMI safe mono/boot/realtime timestamps */
extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap);

/*
 * Persistent clock related interfaces
 */
extern int persistent_clock_is_local;

extern void read_persistent_clock64(struct timespec64 *ts);
void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
                                          struct timespec64 *boot_offset);
#ifdef CONFIG_GENERIC_CMOS_UPDATE
extern int update_persistent_clock64(struct timespec64 now);
#endif

#endif











































































































   55 


   55 













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  232 


























  230 




















































































































































































































   55 
   55 
   54 
   55 



   54 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
// SPDX-License-Identifier: GPL-2.0
/*
 * Common Block IO controller cgroup interface
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *                      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 *                       Nauman Rafique <nauman@google.com>
 *
 * For policy-specific per-blkcg data:
 * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
 *                    Arianna Avanzini <avanzini.arianna@gmail.com>
 */
#include <linux/ioprio.h>
#include <linux/kdev_t.h>
#include <linux/module.h>
#include <linux/sched/signal.h>
#include <linux/err.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <linux/ctype.h>
#include <linux/resume_user_mode.h>
#include <linux/psi.h>
#include <linux/part_stat.h>
#include "blk.h"
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"

static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);

/*
 * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
 * blkcg_pol_register_mutex nests outside of it and synchronizes entire
 * policy [un]register operations including cgroup file additions /
 * removals.  Putting cgroup file registration outside blkcg_pol_mutex
 * allows grabbing it from cgroup callbacks.
 */
static DEFINE_MUTEX(blkcg_pol_register_mutex);
static DEFINE_MUTEX(blkcg_pol_mutex);

struct blkcg blkcg_root;
EXPORT_SYMBOL_GPL(blkcg_root);

struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
EXPORT_SYMBOL_GPL(blkcg_root_css);

static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];

static LIST_HEAD(all_blkcgs);                /* protected by blkcg_pol_mutex */

bool blkcg_debug_stats = false;

static DEFINE_RAW_SPINLOCK(blkg_stat_lock);

#define BLKG_DESTROY_BATCH_SIZE  64

/*
 * Lockless lists for tracking IO stats update
 *
 * New IO stats are stored in the percpu iostat_cpu within blkcg_gq (blkg).
 * There are multiple blkg's (one for each block device) attached to each
 * blkcg. The rstat code keeps track of which cpu has IO stats updated,
 * but it doesn't know which blkg has the updated stats. If there are many
 * block devices in a system, the cost of iterating all the blkg's to flush
 * out the IO stats can be high. To reduce such overhead, a set of percpu
 * lockless lists (lhead) per blkcg are used to track the set of recently
 * updated iostat_cpu's since the last flush. An iostat_cpu will be put
 * onto the lockless list on the update side [blk_cgroup_bio_start()] if
 * not there yet and then removed when being flushed [blkcg_rstat_flush()].
 * References to blkg are gotten and then put back in the process to
 * protect against blkg removal.
 *
 * Return: 0 if successful or -ENOMEM if allocation fails.
 */
static int init_blkcg_llists(struct blkcg *blkcg)
{
        int cpu;

        blkcg->lhead = alloc_percpu_gfp(struct llist_head, GFP_KERNEL);
        if (!blkcg->lhead)
                return -ENOMEM;

        for_each_possible_cpu(cpu)
                init_llist_head(per_cpu_ptr(blkcg->lhead, cpu));
        return 0;
}

/**
 * blkcg_css - find the current css
 *
 * Find the css associated with either the kthread or the current task.
 * This may return a dying css, so it is up to the caller to use tryget logic
 * to confirm it is alive and well.
 */
static struct cgroup_subsys_state *blkcg_css(void)
{
        struct cgroup_subsys_state *css;

        css = kthread_blkcg();
        if (css)
                return css;
        return task_css(current, io_cgrp_id);
}

static bool blkcg_policy_enabled(struct request_queue *q,
                                 const struct blkcg_policy *pol)
{
        return pol && test_bit(pol->plid, q->blkcg_pols);
}

static void blkg_free_workfn(struct work_struct *work)
{
        struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
                                             free_work);
        struct request_queue *q = blkg->q;
        int i;

        /*
         * pd_free_fn() can also be called from blkcg_deactivate_policy(),
         * in order to make sure pd_free_fn() is called in order, the deletion
         * of the list blkg->q_node is delayed to here from blkg_destroy(), and
         * blkcg_mutex is used to synchronize blkg_free_workfn() and
         * blkcg_deactivate_policy().
         */
        mutex_lock(&q->blkcg_mutex);
        for (i = 0; i < BLKCG_MAX_POLS; i++)
                if (blkg->pd[i])
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
        if (blkg->parent)
                blkg_put(blkg->parent);
        spin_lock_irq(&q->queue_lock);
        list_del_init(&blkg->q_node);
        spin_unlock_irq(&q->queue_lock);
        mutex_unlock(&q->blkcg_mutex);

        blk_put_queue(q);
        free_percpu(blkg->iostat_cpu);
        percpu_ref_exit(&blkg->refcnt);
        kfree(blkg);
}

/**
 * blkg_free - free a blkg
 * @blkg: blkg to free
 *
 * Free @blkg which may be partially allocated.
 */
static void blkg_free(struct blkcg_gq *blkg)
{
        if (!blkg)
                return;

        /*
         * Both ->pd_free_fn() and request queue's release handler may
         * sleep, so free us by scheduling one work func
         */
        INIT_WORK(&blkg->free_work, blkg_free_workfn);
        schedule_work(&blkg->free_work);
}

static void __blkg_release(struct rcu_head *rcu)
{
        struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
        struct blkcg *blkcg = blkg->blkcg;
        int cpu;

#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
        WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
        /*
         * Flush all the non-empty percpu lockless lists before releasing
         * us, given these stat belongs to us.
         *
         * blkg_stat_lock is for serializing blkg stat update
         */
        for_each_possible_cpu(cpu)
                __blkcg_rstat_flush(blkcg, cpu);

        /* release the blkcg and parent blkg refs this blkg has been holding */
        css_put(&blkg->blkcg->css);
        blkg_free(blkg);
}

/*
 * A group is RCU protected, but having an rcu lock does not mean that one
 * can access all the fields of blkg and assume these are valid.  For
 * example, don't try to follow throtl_data and request queue links.
 *
 * Having a reference to blkg under an rcu allows accesses to only values
 * local to groups like group stats and group rate limits.
 */
static void blkg_release(struct percpu_ref *ref)
{
        struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);

        call_rcu(&blkg->rcu_head, __blkg_release);
}

#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
static struct workqueue_struct *blkcg_punt_bio_wq;

static void blkg_async_bio_workfn(struct work_struct *work)
{
        struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
                                             async_bio_work);
        struct bio_list bios = BIO_EMPTY_LIST;
        struct bio *bio;
        struct blk_plug plug;
        bool need_plug = false;

        /* as long as there are pending bios, @blkg can't go away */
        spin_lock(&blkg->async_bio_lock);
        bio_list_merge(&bios, &blkg->async_bios);
        bio_list_init(&blkg->async_bios);
        spin_unlock(&blkg->async_bio_lock);

        /* start plug only when bio_list contains at least 2 bios */
        if (bios.head && bios.head->bi_next) {
                need_plug = true;
                blk_start_plug(&plug);
        }
        while ((bio = bio_list_pop(&bios)))
                submit_bio(bio);
        if (need_plug)
                blk_finish_plug(&plug);
}

/*
 * When a shared kthread issues a bio for a cgroup, doing so synchronously can
 * lead to priority inversions as the kthread can be trapped waiting for that
 * cgroup.  Use this helper instead of submit_bio to punt the actual issuing to
 * a dedicated per-blkcg work item to avoid such priority inversions.
 */
void blkcg_punt_bio_submit(struct bio *bio)
{
        struct blkcg_gq *blkg = bio->bi_blkg;

        if (blkg->parent) {
                spin_lock(&blkg->async_bio_lock);
                bio_list_add(&blkg->async_bios, bio);
                spin_unlock(&blkg->async_bio_lock);
                queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
        } else {
                /* never bounce for the root cgroup */
                submit_bio(bio);
        }
}
EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);

static int __init blkcg_punt_bio_init(void)
{
        blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
                                            WQ_MEM_RECLAIM | WQ_FREEZABLE |
                                            WQ_UNBOUND | WQ_SYSFS, 0);
        if (!blkcg_punt_bio_wq)
                return -ENOMEM;
        return 0;
}
subsys_initcall(blkcg_punt_bio_init);
#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */

/**
 * bio_blkcg_css - return the blkcg CSS associated with a bio
 * @bio: target bio
 *
 * This returns the CSS for the blkcg associated with a bio, or %NULL if not
 * associated. Callers are expected to either handle %NULL or know association
 * has been done prior to calling this.
 */
struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
{
        if (!bio || !bio->bi_blkg)
                return NULL;
        return &bio->bi_blkg->blkcg->css;
}
EXPORT_SYMBOL_GPL(bio_blkcg_css);

/**
 * blkcg_parent - get the parent of a blkcg
 * @blkcg: blkcg of interest
 *
 * Return the parent blkcg of @blkcg.  Can be called anytime.
 */
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
{
        return css_to_blkcg(blkcg->css.parent);
}

/**
 * blkg_alloc - allocate a blkg
 * @blkcg: block cgroup the new blkg is associated with
 * @disk: gendisk the new blkg is associated with
 * @gfp_mask: allocation mask to use
 *
 * Allocate a new blkg associating @blkcg and @disk.
 */
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
                                   gfp_t gfp_mask)
{
        struct blkcg_gq *blkg;
        int i, cpu;

        /* alloc and init base part */
        blkg = kzalloc_node(sizeof(*blkg), gfp_mask, disk->queue->node);
        if (!blkg)
                return NULL;
        if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
                goto out_free_blkg;
        blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
        if (!blkg->iostat_cpu)
                goto out_exit_refcnt;
        if (!blk_get_queue(disk->queue))
                goto out_free_iostat;

        blkg->q = disk->queue;
        INIT_LIST_HEAD(&blkg->q_node);
        blkg->blkcg = blkcg;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
        spin_lock_init(&blkg->async_bio_lock);
        bio_list_init(&blkg->async_bios);
        INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
#endif

        u64_stats_init(&blkg->iostat.sync);
        for_each_possible_cpu(cpu) {
                u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
                per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg;
        }

        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];
                struct blkg_policy_data *pd;

                if (!blkcg_policy_enabled(disk->queue, pol))
                        continue;

                /* alloc per-policy data and attach it to blkg */
                pd = pol->pd_alloc_fn(disk, blkcg, gfp_mask);
                if (!pd)
                        goto out_free_pds;
                blkg->pd[i] = pd;
                pd->blkg = blkg;
                pd->plid = i;
                pd->online = false;
        }

        return blkg;

out_free_pds:
        while (--i >= 0)
                if (blkg->pd[i])
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
        blk_put_queue(disk->queue);
out_free_iostat:
        free_percpu(blkg->iostat_cpu);
out_exit_refcnt:
        percpu_ref_exit(&blkg->refcnt);
out_free_blkg:
        kfree(blkg);
        return NULL;
}

/*
 * If @new_blkg is %NULL, this function tries to allocate a new one as
 * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
 */
static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
                                    struct blkcg_gq *new_blkg)
{
        struct blkcg_gq *blkg;
        int i, ret;

        lockdep_assert_held(&disk->queue->queue_lock);

        /* request_queue is dying, do not create/recreate a blkg */
        if (blk_queue_dying(disk->queue)) {
                ret = -ENODEV;
                goto err_free_blkg;
        }

        /* blkg holds a reference to blkcg */
        if (!css_tryget_online(&blkcg->css)) {
                ret = -ENODEV;
                goto err_free_blkg;
        }

        /* allocate */
        if (!new_blkg) {
                new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT | __GFP_NOWARN);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
                        goto err_put_css;
                }
        }
        blkg = new_blkg;

        /* link parent */
        if (blkcg_parent(blkcg)) {
                blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
                if (WARN_ON_ONCE(!blkg->parent)) {
                        ret = -ENODEV;
                        goto err_put_css;
                }
                blkg_get(blkg->parent);
        }

        /* invoke per-policy init */
        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];

                if (blkg->pd[i] && pol->pd_init_fn)
                        pol->pd_init_fn(blkg->pd[i]);
        }

        /* insert */
        spin_lock(&blkcg->lock);
        ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg);
        if (likely(!ret)) {
                hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
                list_add(&blkg->q_node, &disk->queue->blkg_list);

                for (i = 0; i < BLKCG_MAX_POLS; i++) {
                        struct blkcg_policy *pol = blkcg_policy[i];

                        if (blkg->pd[i]) {
                                if (pol->pd_online_fn)
                                        pol->pd_online_fn(blkg->pd[i]);
                                blkg->pd[i]->online = true;
                        }
                }
        }
        blkg->online = true;
        spin_unlock(&blkcg->lock);

        if (!ret)
                return blkg;

        /* @blkg failed fully initialized, use the usual release path */
        blkg_put(blkg);
        return ERR_PTR(ret);

err_put_css:
        css_put(&blkcg->css);
err_free_blkg:
        if (new_blkg)
                blkg_free(new_blkg);
        return ERR_PTR(ret);
}

/**
 * blkg_lookup_create - lookup blkg, try to create one if not there
 * @blkcg: blkcg of interest
 * @disk: gendisk of interest
 *
 * Lookup blkg for the @blkcg - @disk pair.  If it doesn't exist, try to
 * create one.  blkg creation is performed recursively from blkcg_root such
 * that all non-root blkg's have access to the parent blkg.  This function
 * should be called under RCU read lock and takes @disk->queue->queue_lock.
 *
 * Returns the blkg or the closest blkg if blkg_create() fails as it walks
 * down from root.
 */
static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
                struct gendisk *disk)
{
        struct request_queue *q = disk->queue;
        struct blkcg_gq *blkg;
        unsigned long flags;

        WARN_ON_ONCE(!rcu_read_lock_held());

        blkg = blkg_lookup(blkcg, q);
        if (blkg)
                return blkg;

        spin_lock_irqsave(&q->queue_lock, flags);
        blkg = blkg_lookup(blkcg, q);
        if (blkg) {
                if (blkcg != &blkcg_root &&
                    blkg != rcu_dereference(blkcg->blkg_hint))
                        rcu_assign_pointer(blkcg->blkg_hint, blkg);
                goto found;
        }

        /*
         * Create blkgs walking down from blkcg_root to @blkcg, so that all
         * non-root blkgs have access to their parents.  Returns the closest
         * blkg to the intended blkg should blkg_create() fail.
         */
        while (true) {
                struct blkcg *pos = blkcg;
                struct blkcg *parent = blkcg_parent(blkcg);
                struct blkcg_gq *ret_blkg = q->root_blkg;

                while (parent) {
                        blkg = blkg_lookup(parent, q);
                        if (blkg) {
                                /* remember closest blkg */
                                ret_blkg = blkg;
                                break;
                        }
                        pos = parent;
                        parent = blkcg_parent(parent);
                }

                blkg = blkg_create(pos, disk, NULL);
                if (IS_ERR(blkg)) {
                        blkg = ret_blkg;
                        break;
                }
                if (pos == blkcg)
                        break;
        }

found:
        spin_unlock_irqrestore(&q->queue_lock, flags);
        return blkg;
}

static void blkg_destroy(struct blkcg_gq *blkg)
{
        struct blkcg *blkcg = blkg->blkcg;
        int i;

        lockdep_assert_held(&blkg->q->queue_lock);
        lockdep_assert_held(&blkcg->lock);

        /*
         * blkg stays on the queue list until blkg_free_workfn(), see details in
         * blkg_free_workfn(), hence this function can be called from
         * blkcg_destroy_blkgs() first and again from blkg_destroy_all() before
         * blkg_free_workfn().
         */
        if (hlist_unhashed(&blkg->blkcg_node))
                return;

        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];

                if (blkg->pd[i] && blkg->pd[i]->online) {
                        blkg->pd[i]->online = false;
                        if (pol->pd_offline_fn)
                                pol->pd_offline_fn(blkg->pd[i]);
                }
        }

        blkg->online = false;

        radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
        hlist_del_init_rcu(&blkg->blkcg_node);

        /*
         * Both setting lookup hint to and clearing it from @blkg are done
         * under queue_lock.  If it's not pointing to @blkg now, it never
         * will.  Hint assignment itself can race safely.
         */
        if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
                rcu_assign_pointer(blkcg->blkg_hint, NULL);

        /*
         * Put the reference taken at the time of creation so that when all
         * queues are gone, group can be destroyed.
         */
        percpu_ref_kill(&blkg->refcnt);
}

static void blkg_destroy_all(struct gendisk *disk)
{
        struct request_queue *q = disk->queue;
        struct blkcg_gq *blkg;
        int count = BLKG_DESTROY_BATCH_SIZE;
        int i;

restart:
        spin_lock_irq(&q->queue_lock);
        list_for_each_entry(blkg, &q->blkg_list, q_node) {
                struct blkcg *blkcg = blkg->blkcg;

                if (hlist_unhashed(&blkg->blkcg_node))
                        continue;

                spin_lock(&blkcg->lock);
                blkg_destroy(blkg);
                spin_unlock(&blkcg->lock);

                /*
                 * in order to avoid holding the spin lock for too long, release
                 * it when a batch of blkgs are destroyed.
                 */
                if (!(--count)) {
                        count = BLKG_DESTROY_BATCH_SIZE;
                        spin_unlock_irq(&q->queue_lock);
                        cond_resched();
                        goto restart;
                }
        }

        /*
         * Mark policy deactivated since policy offline has been done, and
         * the free is scheduled, so future blkcg_deactivate_policy() can
         * be bypassed
         */
        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];

                if (pol)
                        __clear_bit(pol->plid, q->blkcg_pols);
        }

        q->root_blkg = NULL;
        spin_unlock_irq(&q->queue_lock);
}

static int blkcg_reset_stats(struct cgroup_subsys_state *css,
                             struct cftype *cftype, u64 val)
{
        struct blkcg *blkcg = css_to_blkcg(css);
        struct blkcg_gq *blkg;
        int i, cpu;

        mutex_lock(&blkcg_pol_mutex);
        spin_lock_irq(&blkcg->lock);

        /*
         * Note that stat reset is racy - it doesn't synchronize against
         * stat updates.  This is a debug feature which shouldn't exist
         * anyway.  If you get hit by a race, retry.
         */
        hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
                for_each_possible_cpu(cpu) {
                        struct blkg_iostat_set *bis =
                                per_cpu_ptr(blkg->iostat_cpu, cpu);
                        memset(bis, 0, sizeof(*bis));

                        /* Re-initialize the cleared blkg_iostat_set */
                        u64_stats_init(&bis->sync);
                        bis->blkg = blkg;
                }
                memset(&blkg->iostat, 0, sizeof(blkg->iostat));
                u64_stats_init(&blkg->iostat.sync);

                for (i = 0; i < BLKCG_MAX_POLS; i++) {
                        struct blkcg_policy *pol = blkcg_policy[i];

                        if (blkg->pd[i] && pol->pd_reset_stats_fn)
                                pol->pd_reset_stats_fn(blkg->pd[i]);
                }
        }

        spin_unlock_irq(&blkcg->lock);
        mutex_unlock(&blkcg_pol_mutex);
        return 0;
}

const char *blkg_dev_name(struct blkcg_gq *blkg)
{
        if (!blkg->q->disk)
                return NULL;
        return bdi_dev_name(blkg->q->disk->bdi);
}

/**
 * blkcg_print_blkgs - helper for printing per-blkg data
 * @sf: seq_file to print to
 * @blkcg: blkcg of interest
 * @prfill: fill function to print out a blkg
 * @pol: policy in question
 * @data: data to be passed to @prfill
 * @show_total: to print out sum of prfill return values or not
 *
 * This function invokes @prfill on each blkg of @blkcg if pd for the
 * policy specified by @pol exists.  @prfill is invoked with @sf, the
 * policy data and @data and the matching queue lock held.  If @show_total
 * is %true, the sum of the return values from @prfill is printed with
 * "Total" label at the end.
 *
 * This is to be used to construct print functions for
 * cftype->read_seq_string method.
 */
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
                       u64 (*prfill)(struct seq_file *,
                                     struct blkg_policy_data *, int),
                       const struct blkcg_policy *pol, int data,
                       bool show_total)
{
        struct blkcg_gq *blkg;
        u64 total = 0;

        rcu_read_lock();
        hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
                spin_lock_irq(&blkg->q->queue_lock);
                if (blkcg_policy_enabled(blkg->q, pol))
                        total += prfill(sf, blkg->pd[pol->plid], data);
                spin_unlock_irq(&blkg->q->queue_lock);
        }
        rcu_read_unlock();

        if (show_total)
                seq_printf(sf, "Total %llu\n", (unsigned long long)total);
}
EXPORT_SYMBOL_GPL(blkcg_print_blkgs);

/**
 * __blkg_prfill_u64 - prfill helper for a single u64 value
 * @sf: seq_file to print to
 * @pd: policy private data of interest
 * @v: value to print
 *
 * Print @v to @sf for the device associated with @pd.
 */
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
{
        const char *dname = blkg_dev_name(pd->blkg);

        if (!dname)
                return 0;

        seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
        return v;
}
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);

/**
 * blkg_conf_init - initialize a blkg_conf_ctx
 * @ctx: blkg_conf_ctx to initialize
 * @input: input string
 *
 * Initialize @ctx which can be used to parse blkg config input string @input.
 * Once initialized, @ctx can be used with blkg_conf_open_bdev() and
 * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
 */
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
{
        *ctx = (struct blkg_conf_ctx){ .input = input };
}
EXPORT_SYMBOL_GPL(blkg_conf_init);

/**
 * blkg_conf_open_bdev - parse and open bdev for per-blkg config update
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
 * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
 * set to point past the device node prefix.
 *
 * This function may be called multiple times on @ctx and the extra calls become
 * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
 * explicitly if bdev access is needed without resolving the blkcg / policy part
 * of @ctx->input. Returns -errno on error.
 */
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
        char *input = ctx->input;
        unsigned int major, minor;
        struct block_device *bdev;
        int key_len;

        if (ctx->bdev)
                return 0;

        if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
                return -EINVAL;

        input += key_len;
        if (!isspace(*input))
                return -EINVAL;
        input = skip_spaces(input);

        bdev = blkdev_get_no_open(MKDEV(major, minor));
        if (!bdev)
                return -ENODEV;
        if (bdev_is_partition(bdev)) {
                blkdev_put_no_open(bdev);
                return -ENODEV;
        }

        mutex_lock(&bdev->bd_queue->rq_qos_mutex);
        if (!disk_live(bdev->bd_disk)) {
                blkdev_put_no_open(bdev);
                mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
                return -ENODEV;
        }

        ctx->body = input;
        ctx->bdev = bdev;
        return 0;
}

/**
 * blkg_conf_prep - parse and prepare for per-blkg config update
 * @blkcg: target block cgroup
 * @pol: target policy
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Parse per-blkg config update from @ctx->input and initialize @ctx
 * accordingly. On success, @ctx->body points to the part of @ctx->input
 * following MAJ:MIN, @ctx->bdev points to the target block device and
 * @ctx->blkg to the blkg being configured.
 *
 * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
 * function returns with queue lock held and must be followed by
 * blkg_conf_exit().
 */
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                   struct blkg_conf_ctx *ctx)
        __acquires(&bdev->bd_queue->queue_lock)
{
        struct gendisk *disk;
        struct request_queue *q;
        struct blkcg_gq *blkg;
        int ret;

        ret = blkg_conf_open_bdev(ctx);
        if (ret)
                return ret;

        disk = ctx->bdev->bd_disk;
        q = disk->queue;

        /*
         * blkcg_deactivate_policy() requires queue to be frozen, we can grab
         * q_usage_counter to prevent concurrent with blkcg_deactivate_policy().
         */
        ret = blk_queue_enter(q, 0);
        if (ret)
                goto fail;

        spin_lock_irq(&q->queue_lock);

        if (!blkcg_policy_enabled(q, pol)) {
                ret = -EOPNOTSUPP;
                goto fail_unlock;
        }

        blkg = blkg_lookup(blkcg, q);
        if (blkg)
                goto success;

        /*
         * Create blkgs walking down from blkcg_root to @blkcg, so that all
         * non-root blkgs have access to their parents.
         */
        while (true) {
                struct blkcg *pos = blkcg;
                struct blkcg *parent;
                struct blkcg_gq *new_blkg;

                parent = blkcg_parent(blkcg);
                while (parent && !blkg_lookup(parent, q)) {
                        pos = parent;
                        parent = blkcg_parent(parent);
                }

                /* Drop locks to do new blkg allocation with GFP_KERNEL. */
                spin_unlock_irq(&q->queue_lock);

                new_blkg = blkg_alloc(pos, disk, GFP_KERNEL);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
                        goto fail_exit_queue;
                }

                if (radix_tree_preload(GFP_KERNEL)) {
                        blkg_free(new_blkg);
                        ret = -ENOMEM;
                        goto fail_exit_queue;
                }

                spin_lock_irq(&q->queue_lock);

                if (!blkcg_policy_enabled(q, pol)) {
                        blkg_free(new_blkg);
                        ret = -EOPNOTSUPP;
                        goto fail_preloaded;
                }

                blkg = blkg_lookup(pos, q);
                if (blkg) {
                        blkg_free(new_blkg);
                } else {
                        blkg = blkg_create(pos, disk, new_blkg);
                        if (IS_ERR(blkg)) {
                                ret = PTR_ERR(blkg);
                                goto fail_preloaded;
                        }
                }

                radix_tree_preload_end();

                if (pos == blkcg)
                        goto success;
        }
success:
        blk_queue_exit(q);
        ctx->blkg = blkg;
        return 0;

fail_preloaded:
        radix_tree_preload_end();
fail_unlock:
        spin_unlock_irq(&q->queue_lock);
fail_exit_queue:
        blk_queue_exit(q);
fail:
        /*
         * If queue was bypassing, we should retry.  Do so after a
         * short msleep().  It isn't strictly necessary but queue
         * can be bypassing for some time and it's always nice to
         * avoid busy looping.
         */
        if (ret == -EBUSY) {
                msleep(10);
                ret = restart_syscall();
        }
        return ret;
}
EXPORT_SYMBOL_GPL(blkg_conf_prep);

/**
 * blkg_conf_exit - clean up per-blkg config update
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Clean up after per-blkg config update. This function must be called on all
 * blkg_conf_ctx's initialized with blkg_conf_init().
 */
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
        __releases(&ctx->bdev->bd_queue->queue_lock)
        __releases(&ctx->bdev->bd_queue->rq_qos_mutex)
{
        if (ctx->blkg) {
                spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
                ctx->blkg = NULL;
        }

        if (ctx->bdev) {
                mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
                blkdev_put_no_open(ctx->bdev);
                ctx->body = NULL;
                ctx->bdev = NULL;
        }
}
EXPORT_SYMBOL_GPL(blkg_conf_exit);

static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{
        int i;

        for (i = 0; i < BLKG_IOSTAT_NR; i++) {
                dst->bytes[i] = src->bytes[i];
                dst->ios[i] = src->ios[i];
        }
}

static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
{
        int i;

        for (i = 0; i < BLKG_IOSTAT_NR; i++) {
                dst->bytes[i] += src->bytes[i];
                dst->ios[i] += src->ios[i];
        }
}

static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
{
        int i;

        for (i = 0; i < BLKG_IOSTAT_NR; i++) {
                dst->bytes[i] -= src->bytes[i];
                dst->ios[i] -= src->ios[i];
        }
}

static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
                                struct blkg_iostat *last)
{
        struct blkg_iostat delta;
        unsigned long flags;

        /* propagate percpu delta to global */
        flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
        blkg_iostat_set(&delta, cur);
        blkg_iostat_sub(&delta, last);
        blkg_iostat_add(&blkg->iostat.cur, &delta);
        blkg_iostat_add(last, &delta);
        u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}

static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
{
        struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
        struct llist_node *lnode;
        struct blkg_iostat_set *bisc, *next_bisc;
        unsigned long flags;

        rcu_read_lock();

        lnode = llist_del_all(lhead);
        if (!lnode)
                goto out;

        /*
         * For covering concurrent parent blkg update from blkg_release().
         *
         * When flushing from cgroup, cgroup_rstat_lock is always held, so
         * this lock won't cause contention most of time.
         */
        raw_spin_lock_irqsave(&blkg_stat_lock, flags);

        /*
         * Iterate only the iostat_cpu's queued in the lockless list.
         */
        llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) {
                struct blkcg_gq *blkg = bisc->blkg;
                struct blkcg_gq *parent = blkg->parent;
                struct blkg_iostat cur;
                unsigned int seq;

                WRITE_ONCE(bisc->lqueued, false);

                /* fetch the current per-cpu values */
                do {
                        seq = u64_stats_fetch_begin(&bisc->sync);
                        blkg_iostat_set(&cur, &bisc->cur);
                } while (u64_stats_fetch_retry(&bisc->sync, seq));

                blkcg_iostat_update(blkg, &cur, &bisc->last);

                /* propagate global delta to parent (unless that's root) */
                if (parent && parent->parent)
                        blkcg_iostat_update(parent, &blkg->iostat.cur,
                                            &blkg->iostat.last);
        }
        raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
out:
        rcu_read_unlock();
}

static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
{
        /* Root-level stats are sourced from system-wide IO stats */
        if (cgroup_parent(css->cgroup))
                __blkcg_rstat_flush(css_to_blkcg(css), cpu);
}

/*
 * We source root cgroup stats from the system-wide stats to avoid
 * tracking the same information twice and incurring overhead when no
 * cgroups are defined. For that reason, cgroup_rstat_flush in
 * blkcg_print_stat does not actually fill out the iostat in the root
 * cgroup's blkcg_gq.
 *
 * However, we would like to re-use the printing code between the root and
 * non-root cgroups to the extent possible. For that reason, we simulate
 * flushing the root cgroup's stats by explicitly filling in the iostat
 * with disk level statistics.
 */
static void blkcg_fill_root_iostats(void)
{
        struct class_dev_iter iter;
        struct device *dev;

        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
        while ((dev = class_dev_iter_next(&iter))) {
                struct block_device *bdev = dev_to_bdev(dev);
                struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
                struct blkg_iostat tmp;
                int cpu;
                unsigned long flags;

                memset(&tmp, 0, sizeof(tmp));
                for_each_possible_cpu(cpu) {
                        struct disk_stats *cpu_dkstats;

                        cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
                        tmp.ios[BLKG_IOSTAT_READ] +=
                                cpu_dkstats->ios[STAT_READ];
                        tmp.ios[BLKG_IOSTAT_WRITE] +=
                                cpu_dkstats->ios[STAT_WRITE];
                        tmp.ios[BLKG_IOSTAT_DISCARD] +=
                                cpu_dkstats->ios[STAT_DISCARD];
                        // convert sectors to bytes
                        tmp.bytes[BLKG_IOSTAT_READ] +=
                                cpu_dkstats->sectors[STAT_READ] << 9;
                        tmp.bytes[BLKG_IOSTAT_WRITE] +=
                                cpu_dkstats->sectors[STAT_WRITE] << 9;
                        tmp.bytes[BLKG_IOSTAT_DISCARD] +=
                                cpu_dkstats->sectors[STAT_DISCARD] << 9;
                }

                flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
                blkg_iostat_set(&blkg->iostat.cur, &tmp);
                u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
        }
}

static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
        struct blkg_iostat_set *bis = &blkg->iostat;
        u64 rbytes, wbytes, rios, wios, dbytes, dios;
        const char *dname;
        unsigned seq;
        int i;

        if (!blkg->online)
                return;

        dname = blkg_dev_name(blkg);
        if (!dname)
                return;

        seq_printf(s, "%s ", dname);

        do {
                seq = u64_stats_fetch_begin(&bis->sync);

                rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
                wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
                dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
                rios = bis->cur.ios[BLKG_IOSTAT_READ];
                wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
                dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
        } while (u64_stats_fetch_retry(&bis->sync, seq));

        if (rbytes || wbytes || rios || wios) {
                seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
                        rbytes, wbytes, rios, wios,
                        dbytes, dios);
        }

        if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
                seq_printf(s, " use_delay=%d delay_nsec=%llu",
                        atomic_read(&blkg->use_delay),
                        atomic64_read(&blkg->delay_nsec));
        }

        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];

                if (!blkg->pd[i] || !pol->pd_stat_fn)
                        continue;

                pol->pd_stat_fn(blkg->pd[i], s);
        }

        seq_puts(s, "\n");
}

static int blkcg_print_stat(struct seq_file *sf, void *v)
{
        struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
        struct blkcg_gq *blkg;

        if (!seq_css(sf)->parent)
                blkcg_fill_root_iostats();
        else
                cgroup_rstat_flush(blkcg->css.cgroup);

        rcu_read_lock();
        hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
                spin_lock_irq(&blkg->q->queue_lock);
                blkcg_print_one_stat(blkg, sf);
                spin_unlock_irq(&blkg->q->queue_lock);
        }
        rcu_read_unlock();
        return 0;
}

static struct cftype blkcg_files[] = {
        {
                .name = "stat",
                .seq_show = blkcg_print_stat,
        },
        { }        /* terminate */
};

static struct cftype blkcg_legacy_files[] = {
        {
                .name = "reset_stats",
                .write_u64 = blkcg_reset_stats,
        },
        { }        /* terminate */
};

#ifdef CONFIG_CGROUP_WRITEBACK
struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
{
        return &css_to_blkcg(css)->cgwb_list;
}
#endif

/*
 * blkcg destruction is a three-stage process.
 *
 * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
 *    which offlines writeback.  Here we tie the next stage of blkg destruction
 *    to the completion of writeback associated with the blkcg.  This lets us
 *    avoid punting potentially large amounts of outstanding writeback to root
 *    while maintaining any ongoing policies.  The next stage is triggered when
 *    the nr_cgwbs count goes to zero.
 *
 * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
 *    and handles the destruction of blkgs.  Here the css reference held by
 *    the blkg is put back eventually allowing blkcg_css_free() to be called.
 *    This work may occur in cgwb_release_workfn() on the cgwb_release
 *    workqueue.  Any submitted ios that fail to get the blkg ref will be
 *    punted to the root_blkg.
 *
 * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
 *    This finally frees the blkcg.
 */

/**
 * blkcg_destroy_blkgs - responsible for shooting down blkgs
 * @blkcg: blkcg of interest
 *
 * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
 * is nested inside q lock, this function performs reverse double lock dancing.
 * Destroying the blkgs releases the reference held on the blkcg's css allowing
 * blkcg_css_free to eventually be called.
 *
 * This is the blkcg counterpart of ioc_release_fn().
 */
static void blkcg_destroy_blkgs(struct blkcg *blkcg)
{
        might_sleep();

        spin_lock_irq(&blkcg->lock);

        while (!hlist_empty(&blkcg->blkg_list)) {
                struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
                                                struct blkcg_gq, blkcg_node);
                struct request_queue *q = blkg->q;

                if (need_resched() || !spin_trylock(&q->queue_lock)) {
                        /*
                         * Given that the system can accumulate a huge number
                         * of blkgs in pathological cases, check to see if we
                         * need to rescheduling to avoid softlockup.
                         */
                        spin_unlock_irq(&blkcg->lock);
                        cond_resched();
                        spin_lock_irq(&blkcg->lock);
                        continue;
                }

                blkg_destroy(blkg);
                spin_unlock(&q->queue_lock);
        }

        spin_unlock_irq(&blkcg->lock);
}

/**
 * blkcg_pin_online - pin online state
 * @blkcg_css: blkcg of interest
 *
 * While pinned, a blkcg is kept online.  This is primarily used to
 * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
 * while an associated cgwb is still active.
 */
void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
{
        refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
}

/**
 * blkcg_unpin_online - unpin online state
 * @blkcg_css: blkcg of interest
 *
 * This is primarily used to impedance-match blkg and cgwb lifetimes so
 * that blkg doesn't go offline while an associated cgwb is still active.
 * When this count goes to zero, all active cgwbs have finished so the
 * blkcg can continue destruction by calling blkcg_destroy_blkgs().
 */
void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
{
        struct blkcg *blkcg = css_to_blkcg(blkcg_css);

        do {
                if (!refcount_dec_and_test(&blkcg->online_pin))
                        break;
                blkcg_destroy_blkgs(blkcg);
                blkcg = blkcg_parent(blkcg);
        } while (blkcg);
}

/**
 * blkcg_css_offline - cgroup css_offline callback
 * @css: css of interest
 *
 * This function is called when @css is about to go away.  Here the cgwbs are
 * offlined first and only once writeback associated with the blkcg has
 * finished do we start step 2 (see above).
 */
static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
        /* this prevents anyone from attaching or migrating to this blkcg */
        wb_blkcg_offline(css);

        /* put the base online pin allowing step 2 to be triggered */
        blkcg_unpin_online(css);
}

static void blkcg_css_free(struct cgroup_subsys_state *css)
{
        struct blkcg *blkcg = css_to_blkcg(css);
        int i;

        mutex_lock(&blkcg_pol_mutex);

        list_del(&blkcg->all_blkcgs_node);

        for (i = 0; i < BLKCG_MAX_POLS; i++)
                if (blkcg->cpd[i])
                        blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);

        mutex_unlock(&blkcg_pol_mutex);

        free_percpu(blkcg->lhead);
        kfree(blkcg);
}

static struct cgroup_subsys_state *
blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
        struct blkcg *blkcg;
        int i;

        mutex_lock(&blkcg_pol_mutex);

        if (!parent_css) {
                blkcg = &blkcg_root;
        } else {
                blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
                if (!blkcg)
                        goto unlock;
        }

        if (init_blkcg_llists(blkcg))
                goto free_blkcg;

        for (i = 0; i < BLKCG_MAX_POLS ; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];
                struct blkcg_policy_data *cpd;

                /*
                 * If the policy hasn't been attached yet, wait for it
                 * to be attached before doing anything else. Otherwise,
                 * check if the policy requires any specific per-cgroup
                 * data: if it does, allocate and initialize it.
                 */
                if (!pol || !pol->cpd_alloc_fn)
                        continue;

                cpd = pol->cpd_alloc_fn(GFP_KERNEL);
                if (!cpd)
                        goto free_pd_blkcg;

                blkcg->cpd[i] = cpd;
                cpd->blkcg = blkcg;
                cpd->plid = i;
        }

        spin_lock_init(&blkcg->lock);
        refcount_set(&blkcg->online_pin, 1);
        INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
        INIT_HLIST_HEAD(&blkcg->blkg_list);
#ifdef CONFIG_CGROUP_WRITEBACK
        INIT_LIST_HEAD(&blkcg->cgwb_list);
#endif
        list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);

        mutex_unlock(&blkcg_pol_mutex);
        return &blkcg->css;

free_pd_blkcg:
        for (i--; i >= 0; i--)
                if (blkcg->cpd[i])
                        blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
        free_percpu(blkcg->lhead);
free_blkcg:
        if (blkcg != &blkcg_root)
                kfree(blkcg);
unlock:
        mutex_unlock(&blkcg_pol_mutex);
        return ERR_PTR(-ENOMEM);
}

static int blkcg_css_online(struct cgroup_subsys_state *css)
{
        struct blkcg *parent = blkcg_parent(css_to_blkcg(css));

        /*
         * blkcg_pin_online() is used to delay blkcg offline so that blkgs
         * don't go offline while cgwbs are still active on them.  Pin the
         * parent so that offline always happens towards the root.
         */
        if (parent)
                blkcg_pin_online(&parent->css);
        return 0;
}

void blkg_init_queue(struct request_queue *q)
{
        INIT_LIST_HEAD(&q->blkg_list);
        mutex_init(&q->blkcg_mutex);
}

int blkcg_init_disk(struct gendisk *disk)
{
        struct request_queue *q = disk->queue;
        struct blkcg_gq *new_blkg, *blkg;
        bool preloaded;
        int ret;

        new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
        if (!new_blkg)
                return -ENOMEM;

        preloaded = !radix_tree_preload(GFP_KERNEL);

        /* Make sure the root blkg exists. */
        /* spin_lock_irq can serve as RCU read-side critical section. */
        spin_lock_irq(&q->queue_lock);
        blkg = blkg_create(&blkcg_root, disk, new_blkg);
        if (IS_ERR(blkg))
                goto err_unlock;
        q->root_blkg = blkg;
        spin_unlock_irq(&q->queue_lock);

        if (preloaded)
                radix_tree_preload_end();

        ret = blk_ioprio_init(disk);
        if (ret)
                goto err_destroy_all;

        ret = blk_throtl_init(disk);
        if (ret)
                goto err_ioprio_exit;

        return 0;

err_ioprio_exit:
        blk_ioprio_exit(disk);
err_destroy_all:
        blkg_destroy_all(disk);
        return ret;
err_unlock:
        spin_unlock_irq(&q->queue_lock);
        if (preloaded)
                radix_tree_preload_end();
        return PTR_ERR(blkg);
}

void blkcg_exit_disk(struct gendisk *disk)
{
        blkg_destroy_all(disk);
        blk_throtl_exit(disk);
}

static void blkcg_exit(struct task_struct *tsk)
{
        if (tsk->throttle_disk)
                put_disk(tsk->throttle_disk);
        tsk->throttle_disk = NULL;
}

struct cgroup_subsys io_cgrp_subsys = {
        .css_alloc = blkcg_css_alloc,
        .css_online = blkcg_css_online,
        .css_offline = blkcg_css_offline,
        .css_free = blkcg_css_free,
        .css_rstat_flush = blkcg_rstat_flush,
        .dfl_cftypes = blkcg_files,
        .legacy_cftypes = blkcg_legacy_files,
        .legacy_name = "blkio",
        .exit = blkcg_exit,
#ifdef CONFIG_MEMCG
        /*
         * This ensures that, if available, memcg is automatically enabled
         * together on the default hierarchy so that the owner cgroup can
         * be retrieved from writeback pages.
         */
        .depends_on = 1 << memory_cgrp_id,
#endif
};
EXPORT_SYMBOL_GPL(io_cgrp_subsys);

/**
 * blkcg_activate_policy - activate a blkcg policy on a gendisk
 * @disk: gendisk of interest
 * @pol: blkcg policy to activate
 *
 * Activate @pol on @disk.  Requires %GFP_KERNEL context.  @disk goes through
 * bypass mode to populate its blkgs with policy_data for @pol.
 *
 * Activation happens with @disk bypassed, so nobody would be accessing blkgs
 * from IO path.  Update of each blkg is protected by both queue and blkcg
 * locks so that holding either lock and testing blkcg_policy_enabled() is
 * always enough for dereferencing policy data.
 *
 * The caller is responsible for synchronizing [de]activations and policy
 * [un]registerations.  Returns 0 on success, -errno on failure.
 */
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
{
        struct request_queue *q = disk->queue;
        struct blkg_policy_data *pd_prealloc = NULL;
        struct blkcg_gq *blkg, *pinned_blkg = NULL;
        int ret;

        if (blkcg_policy_enabled(q, pol))
                return 0;

        if (queue_is_mq(q))
                blk_mq_freeze_queue(q);
retry:
        spin_lock_irq(&q->queue_lock);

        /* blkg_list is pushed at the head, reverse walk to initialize parents first */
        list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
                struct blkg_policy_data *pd;

                if (blkg->pd[pol->plid])
                        continue;

                /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
                if (blkg == pinned_blkg) {
                        pd = pd_prealloc;
                        pd_prealloc = NULL;
                } else {
                        pd = pol->pd_alloc_fn(disk, blkg->blkcg,
                                              GFP_NOWAIT | __GFP_NOWARN);
                }

                if (!pd) {
                        /*
                         * GFP_NOWAIT failed.  Free the existing one and
                         * prealloc for @blkg w/ GFP_KERNEL.
                         */
                        if (pinned_blkg)
                                blkg_put(pinned_blkg);
                        blkg_get(blkg);
                        pinned_blkg = blkg;

                        spin_unlock_irq(&q->queue_lock);

                        if (pd_prealloc)
                                pol->pd_free_fn(pd_prealloc);
                        pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
                                                       GFP_KERNEL);
                        if (pd_prealloc)
                                goto retry;
                        else
                                goto enomem;
                }

                spin_lock(&blkg->blkcg->lock);

                pd->blkg = blkg;
                pd->plid = pol->plid;
                blkg->pd[pol->plid] = pd;

                if (pol->pd_init_fn)
                        pol->pd_init_fn(pd);

                if (pol->pd_online_fn)
                        pol->pd_online_fn(pd);
                pd->online = true;

                spin_unlock(&blkg->blkcg->lock);
        }

        __set_bit(pol->plid, q->blkcg_pols);
        ret = 0;

        spin_unlock_irq(&q->queue_lock);
out:
        if (queue_is_mq(q))
                blk_mq_unfreeze_queue(q);
        if (pinned_blkg)
                blkg_put(pinned_blkg);
        if (pd_prealloc)
                pol->pd_free_fn(pd_prealloc);
        return ret;

enomem:
        /* alloc failed, take down everything */
        spin_lock_irq(&q->queue_lock);
        list_for_each_entry(blkg, &q->blkg_list, q_node) {
                struct blkcg *blkcg = blkg->blkcg;
                struct blkg_policy_data *pd;

                spin_lock(&blkcg->lock);
                pd = blkg->pd[pol->plid];
                if (pd) {
                        if (pd->online && pol->pd_offline_fn)
                                pol->pd_offline_fn(pd);
                        pd->online = false;
                        pol->pd_free_fn(pd);
                        blkg->pd[pol->plid] = NULL;
                }
                spin_unlock(&blkcg->lock);
        }
        spin_unlock_irq(&q->queue_lock);
        ret = -ENOMEM;
        goto out;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);

/**
 * blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk
 * @disk: gendisk of interest
 * @pol: blkcg policy to deactivate
 *
 * Deactivate @pol on @disk.  Follows the same synchronization rules as
 * blkcg_activate_policy().
 */
void blkcg_deactivate_policy(struct gendisk *disk,
                             const struct blkcg_policy *pol)
{
        struct request_queue *q = disk->queue;
        struct blkcg_gq *blkg;

        if (!blkcg_policy_enabled(q, pol))
                return;

        if (queue_is_mq(q))
                blk_mq_freeze_queue(q);

        mutex_lock(&q->blkcg_mutex);
        spin_lock_irq(&q->queue_lock);

        __clear_bit(pol->plid, q->blkcg_pols);

        list_for_each_entry(blkg, &q->blkg_list, q_node) {
                struct blkcg *blkcg = blkg->blkcg;

                spin_lock(&blkcg->lock);
                if (blkg->pd[pol->plid]) {
                        if (blkg->pd[pol->plid]->online && pol->pd_offline_fn)
                                pol->pd_offline_fn(blkg->pd[pol->plid]);
                        pol->pd_free_fn(blkg->pd[pol->plid]);
                        blkg->pd[pol->plid] = NULL;
                }
                spin_unlock(&blkcg->lock);
        }

        spin_unlock_irq(&q->queue_lock);
        mutex_unlock(&q->blkcg_mutex);

        if (queue_is_mq(q))
                blk_mq_unfreeze_queue(q);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);

static void blkcg_free_all_cpd(struct blkcg_policy *pol)
{
        struct blkcg *blkcg;

        list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
                if (blkcg->cpd[pol->plid]) {
                        pol->cpd_free_fn(blkcg->cpd[pol->plid]);
                        blkcg->cpd[pol->plid] = NULL;
                }
        }
}

/**
 * blkcg_policy_register - register a blkcg policy
 * @pol: blkcg policy to register
 *
 * Register @pol with blkcg core.  Might sleep and @pol may be modified on
 * successful registration.  Returns 0 on success and -errno on failure.
 */
int blkcg_policy_register(struct blkcg_policy *pol)
{
        struct blkcg *blkcg;
        int i, ret;

        mutex_lock(&blkcg_pol_register_mutex);
        mutex_lock(&blkcg_pol_mutex);

        /* find an empty slot */
        ret = -ENOSPC;
        for (i = 0; i < BLKCG_MAX_POLS; i++)
                if (!blkcg_policy[i])
                        break;
        if (i >= BLKCG_MAX_POLS) {
                pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
                goto err_unlock;
        }

        /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
        if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
                (!pol->pd_alloc_fn ^ !pol->pd_free_fn))
                goto err_unlock;

        /* register @pol */
        pol->plid = i;
        blkcg_policy[pol->plid] = pol;

        /* allocate and install cpd's */
        if (pol->cpd_alloc_fn) {
                list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
                        struct blkcg_policy_data *cpd;

                        cpd = pol->cpd_alloc_fn(GFP_KERNEL);
                        if (!cpd)
                                goto err_free_cpds;

                        blkcg->cpd[pol->plid] = cpd;
                        cpd->blkcg = blkcg;
                        cpd->plid = pol->plid;
                }
        }

        mutex_unlock(&blkcg_pol_mutex);

        /* everything is in place, add intf files for the new policy */
        if (pol->dfl_cftypes)
                WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
                                               pol->dfl_cftypes));
        if (pol->legacy_cftypes)
                WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
                                                  pol->legacy_cftypes));
        mutex_unlock(&blkcg_pol_register_mutex);
        return 0;

err_free_cpds:
        if (pol->cpd_free_fn)
                blkcg_free_all_cpd(pol);

        blkcg_policy[pol->plid] = NULL;
err_unlock:
        mutex_unlock(&blkcg_pol_mutex);
        mutex_unlock(&blkcg_pol_register_mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(blkcg_policy_register);

/**
 * blkcg_policy_unregister - unregister a blkcg policy
 * @pol: blkcg policy to unregister
 *
 * Undo blkcg_policy_register(@pol).  Might sleep.
 */
void blkcg_policy_unregister(struct blkcg_policy *pol)
{
        mutex_lock(&blkcg_pol_register_mutex);

        if (WARN_ON(blkcg_policy[pol->plid] != pol))
                goto out_unlock;

        /* kill the intf files first */
        if (pol->dfl_cftypes)
                cgroup_rm_cftypes(pol->dfl_cftypes);
        if (pol->legacy_cftypes)
                cgroup_rm_cftypes(pol->legacy_cftypes);

        /* remove cpds and unregister */
        mutex_lock(&blkcg_pol_mutex);

        if (pol->cpd_free_fn)
                blkcg_free_all_cpd(pol);

        blkcg_policy[pol->plid] = NULL;

        mutex_unlock(&blkcg_pol_mutex);
out_unlock:
        mutex_unlock(&blkcg_pol_register_mutex);
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);

/*
 * Scale the accumulated delay based on how long it has been since we updated
 * the delay.  We only call this when we are adding delay, in case it's been a
 * while since we added delay, and when we are checking to see if we need to
 * delay a task, to account for any delays that may have occurred.
 */
static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
{
        u64 old = atomic64_read(&blkg->delay_start);

        /* negative use_delay means no scaling, see blkcg_set_delay() */
        if (atomic_read(&blkg->use_delay) < 0)
                return;

        /*
         * We only want to scale down every second.  The idea here is that we
         * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
         * time window.  We only want to throttle tasks for recent delay that
         * has occurred, in 1 second time windows since that's the maximum
         * things can be throttled.  We save the current delay window in
         * blkg->last_delay so we know what amount is still left to be charged
         * to the blkg from this point onward.  blkg->last_use keeps track of
         * the use_delay counter.  The idea is if we're unthrottling the blkg we
         * are ok with whatever is happening now, and we can take away more of
         * the accumulated delay as we've already throttled enough that
         * everybody is happy with their IO latencies.
         */
        if (time_before64(old + NSEC_PER_SEC, now) &&
            atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) {
                u64 cur = atomic64_read(&blkg->delay_nsec);
                u64 sub = min_t(u64, blkg->last_delay, now - old);
                int cur_use = atomic_read(&blkg->use_delay);

                /*
                 * We've been unthrottled, subtract a larger chunk of our
                 * accumulated delay.
                 */
                if (cur_use < blkg->last_use)
                        sub = max_t(u64, sub, blkg->last_delay >> 1);

                /*
                 * This shouldn't happen, but handle it anyway.  Our delay_nsec
                 * should only ever be growing except here where we subtract out
                 * min(last_delay, 1 second), but lord knows bugs happen and I'd
                 * rather not end up with negative numbers.
                 */
                if (unlikely(cur < sub)) {
                        atomic64_set(&blkg->delay_nsec, 0);
                        blkg->last_delay = 0;
                } else {
                        atomic64_sub(sub, &blkg->delay_nsec);
                        blkg->last_delay = cur - sub;
                }
                blkg->last_use = cur_use;
        }
}

/*
 * This is called when we want to actually walk up the hierarchy and check to
 * see if we need to throttle, and then actually throttle if there is some
 * accumulated delay.  This should only be called upon return to user space so
 * we're not holding some lock that would induce a priority inversion.
 */
static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
{
        unsigned long pflags;
        bool clamp;
        u64 now = blk_time_get_ns();
        u64 exp;
        u64 delay_nsec = 0;
        int tok;

        while (blkg->parent) {
                int use_delay = atomic_read(&blkg->use_delay);

                if (use_delay) {
                        u64 this_delay;

                        blkcg_scale_delay(blkg, now);
                        this_delay = atomic64_read(&blkg->delay_nsec);
                        if (this_delay > delay_nsec) {
                                delay_nsec = this_delay;
                                clamp = use_delay > 0;
                        }
                }
                blkg = blkg->parent;
        }

        if (!delay_nsec)
                return;

        /*
         * Let's not sleep for all eternity if we've amassed a huge delay.
         * Swapping or metadata IO can accumulate 10's of seconds worth of
         * delay, and we want userspace to be able to do _something_ so cap the
         * delays at 0.25s. If there's 10's of seconds worth of delay then the
         * tasks will be delayed for 0.25 second for every syscall. If
         * blkcg_set_delay() was used as indicated by negative use_delay, the
         * caller is responsible for regulating the range.
         */
        if (clamp)
                delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);

        if (use_memdelay)
                psi_memstall_enter(&pflags);

        exp = ktime_add_ns(now, delay_nsec);
        tok = io_schedule_prepare();
        do {
                __set_current_state(TASK_KILLABLE);
                if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
                        break;
        } while (!fatal_signal_pending(current));
        io_schedule_finish(tok);

        if (use_memdelay)
                psi_memstall_leave(&pflags);
}

/**
 * blkcg_maybe_throttle_current - throttle the current task if it has been marked
 *
 * This is only called if we've been marked with set_notify_resume().  Obviously
 * we can be set_notify_resume() for reasons other than blkcg throttling, so we
 * check to see if current->throttle_disk is set and if not this doesn't do
 * anything.  This should only ever be called by the resume code, it's not meant
 * to be called by people willy-nilly as it will actually do the work to
 * throttle the task if it is setup for throttling.
 */
void blkcg_maybe_throttle_current(void)
{
        struct gendisk *disk = current->throttle_disk;
        struct blkcg *blkcg;
        struct blkcg_gq *blkg;
        bool use_memdelay = current->use_memdelay;

        if (!disk)
                return;

        current->throttle_disk = NULL;
        current->use_memdelay = false;

        rcu_read_lock();
        blkcg = css_to_blkcg(blkcg_css());
        if (!blkcg)
                goto out;
        blkg = blkg_lookup(blkcg, disk->queue);
        if (!blkg)
                goto out;
        if (!blkg_tryget(blkg))
                goto out;
        rcu_read_unlock();

        blkcg_maybe_throttle_blkg(blkg, use_memdelay);
        blkg_put(blkg);
        put_disk(disk);
        return;
out:
        rcu_read_unlock();
}

/**
 * blkcg_schedule_throttle - this task needs to check for throttling
 * @disk: disk to throttle
 * @use_memdelay: do we charge this to memory delay for PSI
 *
 * This is called by the IO controller when we know there's delay accumulated
 * for the blkg for this task.  We do not pass the blkg because there are places
 * we call this that may not have that information, the swapping code for
 * instance will only have a block_device at that point.  This set's the
 * notify_resume for the task to check and see if it requires throttling before
 * returning to user space.
 *
 * We will only schedule once per syscall.  You can call this over and over
 * again and it will only do the check once upon return to user space, and only
 * throttle once.  If the task needs to be throttled again it'll need to be
 * re-set at the next time we see the task.
 */
void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
{
        if (unlikely(current->flags & PF_KTHREAD))
                return;

        if (current->throttle_disk != disk) {
                if (test_bit(GD_DEAD, &disk->state))
                        return;
                get_device(disk_to_dev(disk));

                if (current->throttle_disk)
                        put_disk(current->throttle_disk);
                current->throttle_disk = disk;
        }

        if (use_memdelay)
                current->use_memdelay = use_memdelay;
        set_notify_resume(current);
}

/**
 * blkcg_add_delay - add delay to this blkg
 * @blkg: blkg of interest
 * @now: the current time in nanoseconds
 * @delta: how many nanoseconds of delay to add
 *
 * Charge @delta to the blkg's current delay accumulation.  This is used to
 * throttle tasks if an IO controller thinks we need more throttling.
 */
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
{
        if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
                return;
        blkcg_scale_delay(blkg, now);
        atomic64_add(delta, &blkg->delay_nsec);
}

/**
 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
 * @bio: target bio
 * @css: target css
 *
 * As the failure mode here is to walk up the blkg tree, this ensure that the
 * blkg->parent pointers are always valid.  This returns the blkg that it ended
 * up taking a reference on or %NULL if no reference was taken.
 */
static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
                struct cgroup_subsys_state *css)
{
        struct blkcg_gq *blkg, *ret_blkg = NULL;

        rcu_read_lock();
        blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_bdev->bd_disk);
        while (blkg) {
                if (blkg_tryget(blkg)) {
                        ret_blkg = blkg;
                        break;
                }
                blkg = blkg->parent;
        }
        rcu_read_unlock();

        return ret_blkg;
}

/**
 * bio_associate_blkg_from_css - associate a bio with a specified css
 * @bio: target bio
 * @css: target css
 *
 * Associate @bio with the blkg found by combining the css's blkg and the
 * request_queue of the @bio.  An association failure is handled by walking up
 * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
 * and q->root_blkg.  This situation only happens when a cgroup is dying and
 * then the remaining bios will spill to the closest alive blkg.
 *
 * A reference will be taken on the blkg and will be released when @bio is
 * freed.
 */
void bio_associate_blkg_from_css(struct bio *bio,
                                 struct cgroup_subsys_state *css)
{
        if (bio->bi_blkg)
                blkg_put(bio->bi_blkg);

        if (css && css->parent) {
                bio->bi_blkg = blkg_tryget_closest(bio, css);
        } else {
                blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg);
                bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg;
        }
}
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);

/**
 * bio_associate_blkg - associate a bio with a blkg
 * @bio: target bio
 *
 * Associate @bio with the blkg found from the bio's css and request_queue.
 * If one is not found, bio_lookup_blkg() creates the blkg.  If a blkg is
 * already associated, the css is reused and association redone as the
 * request_queue may have changed.
 */
void bio_associate_blkg(struct bio *bio)
{
        struct cgroup_subsys_state *css;

        if (blk_op_is_passthrough(bio->bi_opf))
                return;

        rcu_read_lock();

        if (bio->bi_blkg)
                css = bio_blkcg_css(bio);
        else
                css = blkcg_css();

        bio_associate_blkg_from_css(bio, css);

        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(bio_associate_blkg);

/**
 * bio_clone_blkg_association - clone blkg association from src to dst bio
 * @dst: destination bio
 * @src: source bio
 */
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
{
        if (src->bi_blkg)
                bio_associate_blkg_from_css(dst, bio_blkcg_css(src));
}
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);

static int blk_cgroup_io_type(struct bio *bio)
{
        if (op_is_discard(bio->bi_opf))
                return BLKG_IOSTAT_DISCARD;
        if (op_is_write(bio->bi_opf))
                return BLKG_IOSTAT_WRITE;
        return BLKG_IOSTAT_READ;
}

void blk_cgroup_bio_start(struct bio *bio)
{
        struct blkcg *blkcg = bio->bi_blkg->blkcg;
        int rwd = blk_cgroup_io_type(bio), cpu;
        struct blkg_iostat_set *bis;
        unsigned long flags;

        if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
                return;

        /* Root-level stats are sourced from system-wide IO stats */
        if (!cgroup_parent(blkcg->css.cgroup))
                return;

        cpu = get_cpu();
        bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
        flags = u64_stats_update_begin_irqsave(&bis->sync);

        /*
         * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
         * bio and we would have already accounted for the size of the bio.
         */
        if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
                bio_set_flag(bio, BIO_CGROUP_ACCT);
                bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
        }
        bis->cur.ios[rwd]++;

        /*
         * If the iostat_cpu isn't in a lockless list, put it into the
         * list to indicate that a stat update is pending.
         */
        if (!READ_ONCE(bis->lqueued)) {
                struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);

                llist_add(&bis->lnode, lhead);
                WRITE_ONCE(bis->lqueued, true);
        }

        u64_stats_update_end_irqrestore(&bis->sync, flags);
        cgroup_rstat_updated(blkcg->css.cgroup, cpu);
        put_cpu();
}

bool blk_cgroup_congested(void)
{
        struct cgroup_subsys_state *css;
        bool ret = false;

        rcu_read_lock();
        for (css = blkcg_css(); css; css = css->parent) {
                if (atomic_read(&css->cgroup->congestion_count)) {
                        ret = true;
                        break;
                }
        }
        rcu_read_unlock();
        return ret;
}

module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
































































































































































































































































































































































































































































































































































































































































































































































































































  233 





  235 







































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
// SPDX-License-Identifier: GPL-2.0-only
/* Common code for 32 and 64-bit NUMA */
#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/ctype.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
#include <linux/topology.h>
#include <linux/sort.h>

#include <asm/e820/api.h>
#include <asm/proto.h>
#include <asm/dma.h>
#include <asm/amd_nb.h>

#include "numa_internal.h"

int numa_off;
nodemask_t numa_nodes_parsed __initdata;

struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);

static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;

static int numa_distance_cnt;
static u8 *numa_distance;

static __init int numa_setup(char *opt)
{
        if (!opt)
                return -EINVAL;
        if (!strncmp(opt, "off", 3))
                numa_off = 1;
        if (!strncmp(opt, "fake=", 5))
                return numa_emu_cmdline(opt + 5);
        if (!strncmp(opt, "noacpi", 6))
                disable_srat();
        if (!strncmp(opt, "nohmat", 6))
                disable_hmat();
        return 0;
}
early_param("numa", numa_setup);

/*
 * apicid, cpu, node mappings
 */
s16 __apicid_to_node[MAX_LOCAL_APIC] = {
        [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};

int numa_cpu_node(int cpu)
{
        u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);

        if (apicid != BAD_APICID)
                return __apicid_to_node[apicid];
        return NUMA_NO_NODE;
}

cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);

/*
 * Map cpu index to node index
 */
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);

void numa_set_node(int cpu, int node)
{
        int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

        /* early setting, no percpu area yet */
        if (cpu_to_node_map) {
                cpu_to_node_map[cpu] = node;
                return;
        }

#ifdef CONFIG_DEBUG_PER_CPU_MAPS
        if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
                printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
                dump_stack();
                return;
        }
#endif
        per_cpu(x86_cpu_to_node_map, cpu) = node;

        set_cpu_numa_node(cpu, node);
}

void numa_clear_node(int cpu)
{
        numa_set_node(cpu, NUMA_NO_NODE);
}

/*
 * Allocate node_to_cpumask_map based on number of available nodes
 * Requires node_possible_map to be valid.
 *
 * Note: cpumask_of_node() is not valid until after this is done.
 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
 */
void __init setup_node_to_cpumask_map(void)
{
        unsigned int node;

        /* setup nr_node_ids if not done yet */
        if (nr_node_ids == MAX_NUMNODES)
                setup_nr_node_ids();

        /* allocate the map */
        for (node = 0; node < nr_node_ids; node++)
                alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);

        /* cpumask_of_node() will now work */
        pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}

static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
                                     struct numa_meminfo *mi)
{
        /* ignore zero length blks */
        if (start == end)
                return 0;

        /* whine about and ignore invalid blks */
        if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
                pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
                        nid, start, end - 1);
                return 0;
        }

        if (mi->nr_blks >= NR_NODE_MEMBLKS) {
                pr_err("too many memblk ranges\n");
                return -EINVAL;
        }

        mi->blk[mi->nr_blks].start = start;
        mi->blk[mi->nr_blks].end = end;
        mi->blk[mi->nr_blks].nid = nid;
        mi->nr_blks++;
        return 0;
}

/**
 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
 * @idx: Index of memblk to remove
 * @mi: numa_meminfo to remove memblk from
 *
 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
 * decrementing @mi->nr_blks.
 */
void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
{
        mi->nr_blks--;
        memmove(&mi->blk[idx], &mi->blk[idx + 1],
                (mi->nr_blks - idx) * sizeof(mi->blk[0]));
}

/**
 * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
 * @dst: numa_meminfo to append block to
 * @idx: Index of memblk to remove
 * @src: numa_meminfo to remove memblk from
 */
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
                                         struct numa_meminfo *src)
{
        dst->blk[dst->nr_blks++] = src->blk[idx];
        numa_remove_memblk_from(idx, src);
}

/**
 * numa_add_memblk - Add one numa_memblk to numa_meminfo
 * @nid: NUMA node ID of the new memblk
 * @start: Start address of the new memblk
 * @end: End address of the new memblk
 *
 * Add a new memblk to the default numa_meminfo.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init numa_add_memblk(int nid, u64 start, u64 end)
{
        return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}

/* Allocate NODE_DATA for a node on the local memory */
static void __init alloc_node_data(int nid)
{
        const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
        u64 nd_pa;
        void *nd;
        int tnid;

        /*
         * Allocate node data.  Try node-local memory and then any node.
         * Never allocate in DMA zone.
         */
        nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
        if (!nd_pa) {
                pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
                       nd_size, nid);
                return;
        }
        nd = __va(nd_pa);

        /* report and initialize */
        printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
               nd_pa, nd_pa + nd_size - 1);
        tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
        if (tnid != nid)
                printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);

        node_data[nid] = nd;
        memset(NODE_DATA(nid), 0, sizeof(pg_data_t));

        node_set_online(nid);
}

/**
 * numa_cleanup_meminfo - Cleanup a numa_meminfo
 * @mi: numa_meminfo to clean up
 *
 * Sanitize @mi by merging and removing unnecessary memblks.  Also check for
 * conflicts and clear unused memblks.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
{
        const u64 low = 0;
        const u64 high = PFN_PHYS(max_pfn);
        int i, j, k;

        /* first, trim all entries */
        for (i = 0; i < mi->nr_blks; i++) {
                struct numa_memblk *bi = &mi->blk[i];

                /* move / save reserved memory ranges */
                if (!memblock_overlaps_region(&memblock.memory,
                                        bi->start, bi->end - bi->start)) {
                        numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
                        continue;
                }

                /* make sure all non-reserved blocks are inside the limits */
                bi->start = max(bi->start, low);

                /* preserve info for non-RAM areas above 'max_pfn': */
                if (bi->end > high) {
                        numa_add_memblk_to(bi->nid, high, bi->end,
                                           &numa_reserved_meminfo);
                        bi->end = high;
                }

                /* and there's no empty block */
                if (bi->start >= bi->end)
                        numa_remove_memblk_from(i--, mi);
        }

        /* merge neighboring / overlapping entries */
        for (i = 0; i < mi->nr_blks; i++) {
                struct numa_memblk *bi = &mi->blk[i];

                for (j = i + 1; j < mi->nr_blks; j++) {
                        struct numa_memblk *bj = &mi->blk[j];
                        u64 start, end;

                        /*
                         * See whether there are overlapping blocks.  Whine
                         * about but allow overlaps of the same nid.  They
                         * will be merged below.
                         */
                        if (bi->end > bj->start && bi->start < bj->end) {
                                if (bi->nid != bj->nid) {
                                        pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
                                               bi->nid, bi->start, bi->end - 1,
                                               bj->nid, bj->start, bj->end - 1);
                                        return -EINVAL;
                                }
                                pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
                                        bi->nid, bi->start, bi->end - 1,
                                        bj->start, bj->end - 1);
                        }

                        /*
                         * Join together blocks on the same node, holes
                         * between which don't overlap with memory on other
                         * nodes.
                         */
                        if (bi->nid != bj->nid)
                                continue;
                        start = min(bi->start, bj->start);
                        end = max(bi->end, bj->end);
                        for (k = 0; k < mi->nr_blks; k++) {
                                struct numa_memblk *bk = &mi->blk[k];

                                if (bi->nid == bk->nid)
                                        continue;
                                if (start < bk->end && end > bk->start)
                                        break;
                        }
                        if (k < mi->nr_blks)
                                continue;
                        printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
                               bi->nid, bi->start, bi->end - 1, bj->start,
                               bj->end - 1, start, end - 1);
                        bi->start = start;
                        bi->end = end;
                        numa_remove_memblk_from(j--, mi);
                }
        }

        /* clear unused ones */
        for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
                mi->blk[i].start = mi->blk[i].end = 0;
                mi->blk[i].nid = NUMA_NO_NODE;
        }

        return 0;
}

/*
 * Set nodes, which have memory in @mi, in *@nodemask.
 */
static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
                                              const struct numa_meminfo *mi)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
                if (mi->blk[i].start != mi->blk[i].end &&
                    mi->blk[i].nid != NUMA_NO_NODE)
                        node_set(mi->blk[i].nid, *nodemask);
}

/**
 * numa_reset_distance - Reset NUMA distance table
 *
 * The current table is freed.  The next numa_set_distance() call will
 * create a new one.
 */
void __init numa_reset_distance(void)
{
        size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);

        /* numa_distance could be 1LU marking allocation failure, test cnt */
        if (numa_distance_cnt)
                memblock_free(numa_distance, size);
        numa_distance_cnt = 0;
        numa_distance = NULL;        /* enable table creation */
}

static int __init numa_alloc_distance(void)
{
        nodemask_t nodes_parsed;
        size_t size;
        int i, j, cnt = 0;
        u64 phys;

        /* size the new table and allocate it */
        nodes_parsed = numa_nodes_parsed;
        numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);

        for_each_node_mask(i, nodes_parsed)
                cnt = i;
        cnt++;
        size = cnt * cnt * sizeof(numa_distance[0]);

        phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
                                         PFN_PHYS(max_pfn_mapped));
        if (!phys) {
                pr_warn("Warning: can't allocate distance table!\n");
                /* don't retry until explicitly reset */
                numa_distance = (void *)1LU;
                return -ENOMEM;
        }

        numa_distance = __va(phys);
        numa_distance_cnt = cnt;

        /* fill with the default distances */
        for (i = 0; i < cnt; i++)
                for (j = 0; j < cnt; j++)
                        numa_distance[i * cnt + j] = i == j ?
                                LOCAL_DISTANCE : REMOTE_DISTANCE;
        printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);

        return 0;
}

/**
 * numa_set_distance - Set NUMA distance from one NUMA to another
 * @from: the 'from' node to set distance
 * @to: the 'to'  node to set distance
 * @distance: NUMA distance
 *
 * Set the distance from node @from to @to to @distance.  If distance table
 * doesn't exist, one which is large enough to accommodate all the currently
 * known nodes will be created.
 *
 * If such table cannot be allocated, a warning is printed and further
 * calls are ignored until the distance table is reset with
 * numa_reset_distance().
 *
 * If @from or @to is higher than the highest known node or lower than zero
 * at the time of table creation or @distance doesn't make sense, the call
 * is ignored.
 * This is to allow simplification of specific NUMA config implementations.
 */
void __init numa_set_distance(int from, int to, int distance)
{
        if (!numa_distance && numa_alloc_distance() < 0)
                return;

        if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
                        from < 0 || to < 0) {
                pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
                             from, to, distance);
                return;
        }

        if ((u8)distance != distance ||
            (from == to && distance != LOCAL_DISTANCE)) {
                pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
                             from, to, distance);
                return;
        }

        numa_distance[from * numa_distance_cnt + to] = distance;
}

int __node_distance(int from, int to)
{
        if (from >= numa_distance_cnt || to >= numa_distance_cnt)
                return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
        return numa_distance[from * numa_distance_cnt + to];
}
EXPORT_SYMBOL(__node_distance);

/*
 * Mark all currently memblock-reserved physical memory (which covers the
 * kernel's own memory ranges) as hot-unswappable.
 */
static void __init numa_clear_kernel_node_hotplug(void)
{
        nodemask_t reserved_nodemask = NODE_MASK_NONE;
        struct memblock_region *mb_region;
        int i;

        /*
         * We have to do some preprocessing of memblock regions, to
         * make them suitable for reservation.
         *
         * At this time, all memory regions reserved by memblock are
         * used by the kernel, but those regions are not split up
         * along node boundaries yet, and don't necessarily have their
         * node ID set yet either.
         *
         * So iterate over all memory known to the x86 architecture,
         * and use those ranges to set the nid in memblock.reserved.
         * This will split up the memblock regions along node
         * boundaries and will set the node IDs as well.
         */
        for (i = 0; i < numa_meminfo.nr_blks; i++) {
                struct numa_memblk *mb = numa_meminfo.blk + i;
                int ret;

                ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
                WARN_ON_ONCE(ret);
        }

        /*
         * Now go over all reserved memblock regions, to construct a
         * node mask of all kernel reserved memory areas.
         *
         * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
         *   numa_meminfo might not include all memblock.reserved
         *   memory ranges, because quirks such as trim_snb_memory()
         *   reserve specific pages for Sandy Bridge graphics. ]
         */
        for_each_reserved_mem_region(mb_region) {
                int nid = memblock_get_region_node(mb_region);

                if (nid != MAX_NUMNODES)
                        node_set(nid, reserved_nodemask);
        }

        /*
         * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
         * belonging to the reserved node mask.
         *
         * Note that this will include memory regions that reside
         * on nodes that contain kernel memory - entire nodes
         * become hot-unpluggable:
         */
        for (i = 0; i < numa_meminfo.nr_blks; i++) {
                struct numa_memblk *mb = numa_meminfo.blk + i;

                if (!node_isset(mb->nid, reserved_nodemask))
                        continue;

                memblock_clear_hotplug(mb->start, mb->end - mb->start);
        }
}

static int __init numa_register_memblks(struct numa_meminfo *mi)
{
        int i, nid;

        /* Account for nodes with cpus and no memory */
        node_possible_map = numa_nodes_parsed;
        numa_nodemask_from_meminfo(&node_possible_map, mi);
        if (WARN_ON(nodes_empty(node_possible_map)))
                return -EINVAL;

        for (i = 0; i < mi->nr_blks; i++) {
                struct numa_memblk *mb = &mi->blk[i];
                memblock_set_node(mb->start, mb->end - mb->start,
                                  &memblock.memory, mb->nid);
        }

        /*
         * At very early time, the kernel have to use some memory such as
         * loading the kernel image. We cannot prevent this anyway. So any
         * node the kernel resides in should be un-hotpluggable.
         *
         * And when we come here, alloc node data won't fail.
         */
        numa_clear_kernel_node_hotplug();

        /*
         * If sections array is gonna be used for pfn -> nid mapping, check
         * whether its granularity is fine enough.
         */
        if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
                unsigned long pfn_align = node_map_pfn_alignment();

                if (pfn_align && pfn_align < PAGES_PER_SECTION) {
                        pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
                                PFN_PHYS(pfn_align) >> 20,
                                PFN_PHYS(PAGES_PER_SECTION) >> 20);
                        return -EINVAL;
                }
        }

        if (!memblock_validate_numa_coverage(SZ_1M))
                return -EINVAL;

        /* Finally register nodes. */
        for_each_node_mask(nid, node_possible_map) {
                u64 start = PFN_PHYS(max_pfn);
                u64 end = 0;

                for (i = 0; i < mi->nr_blks; i++) {
                        if (nid != mi->blk[i].nid)
                                continue;
                        start = min(mi->blk[i].start, start);
                        end = max(mi->blk[i].end, end);
                }

                if (start >= end)
                        continue;

                alloc_node_data(nid);
        }

        /* Dump memblock with node info and return. */
        memblock_dump_all();
        return 0;
}

/*
 * There are unfortunately some poorly designed mainboards around that
 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
 * mapping. To avoid this fill in the mapping for all possible CPUs,
 * as the number of CPUs is not known yet. We round robin the existing
 * nodes.
 */
static void __init numa_init_array(void)
{
        int rr, i;

        rr = first_node(node_online_map);
        for (i = 0; i < nr_cpu_ids; i++) {
                if (early_cpu_to_node(i) != NUMA_NO_NODE)
                        continue;
                numa_set_node(i, rr);
                rr = next_node_in(rr, node_online_map);
        }
}

static int __init numa_init(int (*init_func)(void))
{
        int i;
        int ret;

        for (i = 0; i < MAX_LOCAL_APIC; i++)
                set_apicid_to_node(i, NUMA_NO_NODE);

        nodes_clear(numa_nodes_parsed);
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
        memset(&numa_meminfo, 0, sizeof(numa_meminfo));
        WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
                                  MAX_NUMNODES));
        WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
                                  MAX_NUMNODES));
        /* In case that parsing SRAT failed. */
        WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
        numa_reset_distance();

        ret = init_func();
        if (ret < 0)
                return ret;

        /*
         * We reset memblock back to the top-down direction
         * here because if we configured ACPI_NUMA, we have
         * parsed SRAT in init_func(). It is ok to have the
         * reset here even if we did't configure ACPI_NUMA
         * or acpi numa init fails and fallbacks to dummy
         * numa init.
         */
        memblock_set_bottom_up(false);

        ret = numa_cleanup_meminfo(&numa_meminfo);
        if (ret < 0)
                return ret;

        numa_emulation(&numa_meminfo, numa_distance_cnt);

        ret = numa_register_memblks(&numa_meminfo);
        if (ret < 0)
                return ret;

        for (i = 0; i < nr_cpu_ids; i++) {
                int nid = early_cpu_to_node(i);

                if (nid == NUMA_NO_NODE)
                        continue;
                if (!node_online(nid))
                        numa_clear_node(i);
        }
        numa_init_array();

        return 0;
}

/**
 * dummy_numa_init - Fallback dummy NUMA init
 *
 * Used if there's no underlying NUMA architecture, NUMA initialization
 * fails, or NUMA is disabled on the command line.
 *
 * Must online at least one node and add memory blocks that cover all
 * allowed memory.  This function must not fail.
 */
static int __init dummy_numa_init(void)
{
        printk(KERN_INFO "%s\n",
               numa_off ? "NUMA turned off" : "No NUMA configuration found");
        printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
               0LLU, PFN_PHYS(max_pfn) - 1);

        node_set(0, numa_nodes_parsed);
        numa_add_memblk(0, 0, PFN_PHYS(max_pfn));

        return 0;
}

/**
 * x86_numa_init - Initialize NUMA
 *
 * Try each configured NUMA initialization method until one succeeds.  The
 * last fallback is dummy single node config encompassing whole memory and
 * never fails.
 */
void __init x86_numa_init(void)
{
        if (!numa_off) {
#ifdef CONFIG_ACPI_NUMA
                if (!numa_init(x86_acpi_numa_init))
                        return;
#endif
#ifdef CONFIG_AMD_NUMA
                if (!numa_init(amd_numa_init))
                        return;
#endif
                if (acpi_disabled && !numa_init(of_numa_init))
                        return;
        }

        numa_init(dummy_numa_init);
}


/*
 * A node may exist which has one or more Generic Initiators but no CPUs and no
 * memory.
 *
 * This function must be called after init_cpu_to_node(), to ensure that any
 * memoryless CPU nodes have already been brought online, and before the
 * node_data[nid] is needed for zone list setup in build_all_zonelists().
 *
 * When this function is called, any nodes containing either memory and/or CPUs
 * will already be online and there is no need to do anything extra, even if
 * they also contain one or more Generic Initiators.
 */
void __init init_gi_nodes(void)
{
        int nid;

        /*
         * Exclude this node from
         * bringup_nonboot_cpus
         *  cpu_up
         *   __try_online_node
         *    register_one_node
         * because node_subsys is not initialized yet.
         * TODO remove dependency on node_online
         */
        for_each_node_state(nid, N_GENERIC_INITIATOR)
                if (!node_online(nid))
                        node_set_online(nid);
}

/*
 * Setup early cpu_to_node.
 *
 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
 * and apicid_to_node[] tables have valid entries for a CPU.
 * This means we skip cpu_to_node[] initialisation for NUMA
 * emulation and faking node case (when running a kernel compiled
 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
 * is already initialized in a round robin manner at numa_init_array,
 * prior to this call, and this initialization is good enough
 * for the fake NUMA cases.
 *
 * Called before the per_cpu areas are setup.
 */
void __init init_cpu_to_node(void)
{
        int cpu;
        u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);

        BUG_ON(cpu_to_apicid == NULL);

        for_each_possible_cpu(cpu) {
                int node = numa_cpu_node(cpu);

                if (node == NUMA_NO_NODE)
                        continue;

                /*
                 * Exclude this node from
                 * bringup_nonboot_cpus
                 *  cpu_up
                 *   __try_online_node
                 *    register_one_node
                 * because node_subsys is not initialized yet.
                 * TODO remove dependency on node_online
                 */
                if (!node_online(node))
                        node_set_online(node);

                numa_set_node(cpu, node);
        }
}

#ifndef CONFIG_DEBUG_PER_CPU_MAPS

# ifndef CONFIG_NUMA_EMU
void numa_add_cpu(int cpu)
{
        cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}

void numa_remove_cpu(int cpu)
{
        cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
# endif        /* !CONFIG_NUMA_EMU */

#else        /* !CONFIG_DEBUG_PER_CPU_MAPS */

int __cpu_to_node(int cpu)
{
        if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
                printk(KERN_WARNING
                        "cpu_to_node(%d): usage too early!\n", cpu);
                dump_stack();
                return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
        }
        return per_cpu(x86_cpu_to_node_map, cpu);
}
EXPORT_SYMBOL(__cpu_to_node);

/*
 * Same function as cpu_to_node() but used if called before the
 * per_cpu areas are setup.
 */
int early_cpu_to_node(int cpu)
{
        if (early_per_cpu_ptr(x86_cpu_to_node_map))
                return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];

        if (!cpu_possible(cpu)) {
                printk(KERN_WARNING
                        "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
                dump_stack();
                return NUMA_NO_NODE;
        }
        return per_cpu(x86_cpu_to_node_map, cpu);
}

void debug_cpumask_set_cpu(int cpu, int node, bool enable)
{
        struct cpumask *mask;

        if (node == NUMA_NO_NODE) {
                /* early_cpu_to_node() already emits a warning and trace */
                return;
        }
        mask = node_to_cpumask_map[node];
        if (!cpumask_available(mask)) {
                pr_err("node_to_cpumask_map[%i] NULL\n", node);
                dump_stack();
                return;
        }

        if (enable)
                cpumask_set_cpu(cpu, mask);
        else
                cpumask_clear_cpu(cpu, mask);

        printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n",
                enable ? "numa_add_cpu" : "numa_remove_cpu",
                cpu, node, cpumask_pr_args(mask));
        return;
}

# ifndef CONFIG_NUMA_EMU
static void numa_set_cpumask(int cpu, bool enable)
{
        debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
}

void numa_add_cpu(int cpu)
{
        numa_set_cpumask(cpu, true);
}

void numa_remove_cpu(int cpu)
{
        numa_set_cpumask(cpu, false);
}
# endif        /* !CONFIG_NUMA_EMU */

/*
 * Returns a pointer to the bitmask of CPUs on Node 'node'.
 */
const struct cpumask *cpumask_of_node(int node)
{
        if ((unsigned)node >= nr_node_ids) {
                printk(KERN_WARNING
                        "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n",
                        node, nr_node_ids);
                dump_stack();
                return cpu_none_mask;
        }
        if (!cpumask_available(node_to_cpumask_map[node])) {
                printk(KERN_WARNING
                        "cpumask_of_node(%d): no node_to_cpumask_map!\n",
                        node);
                dump_stack();
                return cpu_online_mask;
        }
        return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);

#endif        /* !CONFIG_DEBUG_PER_CPU_MAPS */

#ifdef CONFIG_NUMA_KEEP_MEMINFO
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{
        int i;

        for (i = 0; i < mi->nr_blks; i++)
                if (mi->blk[i].start <= start && mi->blk[i].end > start)
                        return mi->blk[i].nid;
        return NUMA_NO_NODE;
}

int phys_to_target_node(phys_addr_t start)
{
        int nid = meminfo_to_nid(&numa_meminfo, start);

        /*
         * Prefer online nodes, but if reserved memory might be
         * hot-added continue the search with reserved ranges.
         */
        if (nid != NUMA_NO_NODE)
                return nid;

        return meminfo_to_nid(&numa_reserved_meminfo, start);
}
EXPORT_SYMBOL_GPL(phys_to_target_node);

int memory_add_physaddr_to_nid(u64 start)
{
        int nid = meminfo_to_nid(&numa_meminfo, start);

        if (nid == NUMA_NO_NODE)
                nid = numa_meminfo.blk[0].nid;
        return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);

static int __init cmp_memblk(const void *a, const void *b)
{
        const struct numa_memblk *ma = *(const struct numa_memblk **)a;
        const struct numa_memblk *mb = *(const struct numa_memblk **)b;

        return (ma->start > mb->start) - (ma->start < mb->start);
}

static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;

/**
 * numa_fill_memblks - Fill gaps in numa_meminfo memblks
 * @start: address to begin fill
 * @end: address to end fill
 *
 * Find and extend numa_meminfo memblks to cover the physical
 * address range @start-@end
 *
 * RETURNS:
 * 0                  : Success
 * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
 */

int __init numa_fill_memblks(u64 start, u64 end)
{
        struct numa_memblk **blk = &numa_memblk_list[0];
        struct numa_meminfo *mi = &numa_meminfo;
        int count = 0;
        u64 prev_end;

        /*
         * Create a list of pointers to numa_meminfo memblks that
         * overlap start, end. The list is used to make in-place
         * changes that fill out the numa_meminfo memblks.
         */
        for (int i = 0; i < mi->nr_blks; i++) {
                struct numa_memblk *bi = &mi->blk[i];

                if (memblock_addrs_overlap(start, end - start, bi->start,
                                           bi->end - bi->start)) {
                        blk[count] = &mi->blk[i];
                        count++;
                }
        }
        if (!count)
                return NUMA_NO_MEMBLK;

        /* Sort the list of pointers in memblk->start order */
        sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);

        /* Make sure the first/last memblks include start/end */
        blk[0]->start = min(blk[0]->start, start);
        blk[count - 1]->end = max(blk[count - 1]->end, end);

        /*
         * Fill any gaps by tracking the previous memblks
         * end address and backfilling to it if needed.
         */
        prev_end = blk[0]->end;
        for (int i = 1; i < count; i++) {
                struct numa_memblk *curr = blk[i];

                if (prev_end >= curr->start) {
                        if (prev_end < curr->end)
                                prev_end = curr->end;
                } else {
                        curr->start = prev_end;
                        prev_end = curr->end;
                }
        }
        return 0;
}

#endif


















































































    1 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>

#include <linux/fb.h>
#include <linux/lcd.h>

#include "hid-picolcd.h"

/*
 * lcd class device
 */
static int picolcd_get_contrast(struct lcd_device *ldev)
{
        struct picolcd_data *data = lcd_get_data(ldev);
        return data->lcd_contrast;
}

static int picolcd_set_contrast(struct lcd_device *ldev, int contrast)
{
        struct picolcd_data *data = lcd_get_data(ldev);
        struct hid_report *report = picolcd_out_report(REPORT_CONTRAST, data->hdev);
        unsigned long flags;

        if (!report || report->maxfield != 1 || report->field[0]->report_count != 1)
                return -ENODEV;

        data->lcd_contrast = contrast & 0x0ff;
        spin_lock_irqsave(&data->lock, flags);
        hid_set_field(report->field[0], 0, data->lcd_contrast);
        if (!(data->status & PICOLCD_FAILED))
                hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);
        return 0;
}

static int picolcd_check_lcd_fb(struct lcd_device *ldev, struct fb_info *fb)
{
        return fb && fb == picolcd_fbinfo((struct picolcd_data *)lcd_get_data(ldev));
}

static struct lcd_ops picolcd_lcdops = {
        .get_contrast   = picolcd_get_contrast,
        .set_contrast   = picolcd_set_contrast,
        .check_fb       = picolcd_check_lcd_fb,
};

int picolcd_init_lcd(struct picolcd_data *data, struct hid_report *report)
{
        struct device *dev = &data->hdev->dev;
        struct lcd_device *ldev;

        if (!report)
                return -ENODEV;
        if (report->maxfield != 1 || report->field[0]->report_count != 1 ||
                        report->field[0]->report_size != 8) {
                dev_err(dev, "unsupported CONTRAST report");
                return -EINVAL;
        }

        ldev = lcd_device_register(dev_name(dev), dev, data, &picolcd_lcdops);
        if (IS_ERR(ldev)) {
                dev_err(dev, "failed to register LCD\n");
                return PTR_ERR(ldev);
        }
        ldev->props.max_contrast = 0x0ff;
        data->lcd_contrast = 0xe5;
        data->lcd = ldev;
        picolcd_set_contrast(ldev, 0xe5);
        return 0;
}

void picolcd_exit_lcd(struct picolcd_data *data)
{
        struct lcd_device *ldev = data->lcd;

        data->lcd = NULL;
        lcd_device_unregister(ldev);
}

int picolcd_resume_lcd(struct picolcd_data *data)
{
        if (!data->lcd)
                return 0;
        return picolcd_set_contrast(data->lcd, data->lcd_contrast);
}





































  233 














  248 










  248 
  247 
    1 


    1 







  248 

  249 
  248 

  249 


















  247 






  248 




































  247 



  247 





  248 





  247 

  248 









































    2 




    1 




    2 



    2 


















































    1 



    1 


    1 



    1 













    1 
    1 










































































    3 
    3 


    3 



    3 






    3 


    3 














































































































































































































































































































































































































  233 


  232 

   11 

  233 



   11 

  233 






































  108 


   10 

  109 



  102 

  109 





























  109 


  109 




   11 




   11 
  100 



   99 






   11 

  109 






  109 


  108 


  108 



  107 
    3 









  105 
    9 






   96 





























  109 

  110 

  110 


  109 



  109 



  100 



   98 










  107 
  108 




















  215 


  107 

  107 

   97 


   97 

  106 


  126 




















  232 



  233 



    2 

    2 
  233 


























































    6 













    6 



    6 






    6 







    6 




    6 






    6 

































    1 








    1 





    1 

































  223 


  223 



  139 
  223 

















  233 





  234 
  233 
  233 
  142 

  234 
  232 
  143 

  234 
  234 


  233 







































  195 


  130 
  130 
  130 











  143 














  143 
  142 





















  230 





  228 
  223 

  223 



  223 


  223 

  223 
  223 





  135 
  135 



  136 








  143 


  143 














  233 




  232 
  234 
  233 
  233 
  233 
  233 













  119 



   78 














































   10 







   10 





   10 

















































































































   10 

















































































































  130 



  130 
  130 

















































  162 








  162 











  163 























































































































































    3 


    3 


    3 
    3 






    3 







    3 
    3 























































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
// SPDX-License-Identifier: GPL-2.0
/*
 * message.c - synchronous message handling
 *
 * Released under the GPLv2 only.
 */

#include <linux/acpi.h>
#include <linux/pci.h>        /* for scatterlist macros */
#include <linux/usb.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/timer.h>
#include <linux/ctype.h>
#include <linux/nls.h>
#include <linux/device.h>
#include <linux/scatterlist.h>
#include <linux/usb/cdc.h>
#include <linux/usb/quirks.h>
#include <linux/usb/hcd.h>        /* for usbcore internals */
#include <linux/usb/of.h>
#include <asm/byteorder.h>

#include "usb.h"

static void cancel_async_set_config(struct usb_device *udev);

struct api_context {
        struct completion        done;
        int                        status;
};

static void usb_api_blocking_completion(struct urb *urb)
{
        struct api_context *ctx = urb->context;

        ctx->status = urb->status;
        complete(&ctx->done);
}


/*
 * Starts urb and waits for completion or timeout. Note that this call
 * is NOT interruptible. Many device driver i/o requests should be
 * interruptible and therefore these drivers should implement their
 * own interruptible routines.
 */
static int usb_start_wait_urb(struct urb *urb, int timeout, int *actual_length)
{
        struct api_context ctx;
        unsigned long expire;
        int retval;

        init_completion(&ctx.done);
        urb->context = &ctx;
        urb->actual_length = 0;
        retval = usb_submit_urb(urb, GFP_NOIO);
        if (unlikely(retval))
                goto out;

        expire = timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT;
        if (!wait_for_completion_timeout(&ctx.done, expire)) {
                usb_kill_urb(urb);
                retval = (ctx.status == -ENOENT ? -ETIMEDOUT : ctx.status);

                dev_dbg(&urb->dev->dev,
                        "%s timed out on ep%d%s len=%u/%u\n",
                        current->comm,
                        usb_endpoint_num(&urb->ep->desc),
                        usb_urb_dir_in(urb) ? "in" : "out",
                        urb->actual_length,
                        urb->transfer_buffer_length);
        } else
                retval = ctx.status;
out:
        if (actual_length)
                *actual_length = urb->actual_length;

        usb_free_urb(urb);
        return retval;
}

/*-------------------------------------------------------------------*/
/* returns status (negative) or length (positive) */
static int usb_internal_control_msg(struct usb_device *usb_dev,
                                    unsigned int pipe,
                                    struct usb_ctrlrequest *cmd,
                                    void *data, int len, int timeout)
{
        struct urb *urb;
        int retv;
        int length;

        urb = usb_alloc_urb(0, GFP_NOIO);
        if (!urb)
                return -ENOMEM;

        usb_fill_control_urb(urb, usb_dev, pipe, (unsigned char *)cmd, data,
                             len, usb_api_blocking_completion, NULL);

        retv = usb_start_wait_urb(urb, timeout, &length);
        if (retv < 0)
                return retv;
        else
                return length;
}

/**
 * usb_control_msg - Builds a control urb, sends it off and waits for completion
 * @dev: pointer to the usb device to send the message to
 * @pipe: endpoint "pipe" to send the message to
 * @request: USB message request value
 * @requesttype: USB message request type value
 * @value: USB message value
 * @index: USB message index value
 * @data: pointer to the data to send
 * @size: length in bytes of the data to send
 * @timeout: time in msecs to wait for the message to complete before timing
 *        out (if 0 the wait is forever)
 *
 * Context: task context, might sleep.
 *
 * This function sends a simple control message to a specified endpoint and
 * waits for the message to complete, or timeout.
 *
 * Don't use this function from within an interrupt context. If you need
 * an asynchronous message, or need to send a message from within interrupt
 * context, use usb_submit_urb(). If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete. Since you
 * don't have a handle on the URB used, you can't cancel the request.
 *
 * Return: If successful, the number of bytes transferred. Otherwise, a negative
 * error number.
 */
int usb_control_msg(struct usb_device *dev, unsigned int pipe, __u8 request,
                    __u8 requesttype, __u16 value, __u16 index, void *data,
                    __u16 size, int timeout)
{
        struct usb_ctrlrequest *dr;
        int ret;

        dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_NOIO);
        if (!dr)
                return -ENOMEM;

        dr->bRequestType = requesttype;
        dr->bRequest = request;
        dr->wValue = cpu_to_le16(value);
        dr->wIndex = cpu_to_le16(index);
        dr->wLength = cpu_to_le16(size);

        ret = usb_internal_control_msg(dev, pipe, dr, data, size, timeout);

        /* Linger a bit, prior to the next control message. */
        if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
                msleep(200);

        kfree(dr);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_control_msg);

/**
 * usb_control_msg_send - Builds a control "send" message, sends it off and waits for completion
 * @dev: pointer to the usb device to send the message to
 * @endpoint: endpoint to send the message to
 * @request: USB message request value
 * @requesttype: USB message request type value
 * @value: USB message value
 * @index: USB message index value
 * @driver_data: pointer to the data to send
 * @size: length in bytes of the data to send
 * @timeout: time in msecs to wait for the message to complete before timing
 *        out (if 0 the wait is forever)
 * @memflags: the flags for memory allocation for buffers
 *
 * Context: !in_interrupt ()
 *
 * This function sends a control message to a specified endpoint that is not
 * expected to fill in a response (i.e. a "send message") and waits for the
 * message to complete, or timeout.
 *
 * Do not use this function from within an interrupt context. If you need
 * an asynchronous message, or need to send a message from within interrupt
 * context, use usb_submit_urb(). If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete. Since you
 * don't have a handle on the URB used, you can't cancel the request.
 *
 * The data pointer can be made to a reference on the stack, or anywhere else,
 * as it will not be modified at all.  This does not have the restriction that
 * usb_control_msg() has where the data pointer must be to dynamically allocated
 * memory (i.e. memory that can be successfully DMAed to a device).
 *
 * Return: If successful, 0 is returned, Otherwise, a negative error number.
 */
int usb_control_msg_send(struct usb_device *dev, __u8 endpoint, __u8 request,
                         __u8 requesttype, __u16 value, __u16 index,
                         const void *driver_data, __u16 size, int timeout,
                         gfp_t memflags)
{
        unsigned int pipe = usb_sndctrlpipe(dev, endpoint);
        int ret;
        u8 *data = NULL;

        if (size) {
                data = kmemdup(driver_data, size, memflags);
                if (!data)
                        return -ENOMEM;
        }

        ret = usb_control_msg(dev, pipe, request, requesttype, value, index,
                              data, size, timeout);
        kfree(data);

        if (ret < 0)
                return ret;

        return 0;
}
EXPORT_SYMBOL_GPL(usb_control_msg_send);

/**
 * usb_control_msg_recv - Builds a control "receive" message, sends it off and waits for completion
 * @dev: pointer to the usb device to send the message to
 * @endpoint: endpoint to send the message to
 * @request: USB message request value
 * @requesttype: USB message request type value
 * @value: USB message value
 * @index: USB message index value
 * @driver_data: pointer to the data to be filled in by the message
 * @size: length in bytes of the data to be received
 * @timeout: time in msecs to wait for the message to complete before timing
 *        out (if 0 the wait is forever)
 * @memflags: the flags for memory allocation for buffers
 *
 * Context: !in_interrupt ()
 *
 * This function sends a control message to a specified endpoint that is
 * expected to fill in a response (i.e. a "receive message") and waits for the
 * message to complete, or timeout.
 *
 * Do not use this function from within an interrupt context. If you need
 * an asynchronous message, or need to send a message from within interrupt
 * context, use usb_submit_urb(). If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete. Since you
 * don't have a handle on the URB used, you can't cancel the request.
 *
 * The data pointer can be made to a reference on the stack, or anywhere else
 * that can be successfully written to.  This function does not have the
 * restriction that usb_control_msg() has where the data pointer must be to
 * dynamically allocated memory (i.e. memory that can be successfully DMAed to a
 * device).
 *
 * The "whole" message must be properly received from the device in order for
 * this function to be successful.  If a device returns less than the expected
 * amount of data, then the function will fail.  Do not use this for messages
 * where a variable amount of data might be returned.
 *
 * Return: If successful, 0 is returned, Otherwise, a negative error number.
 */
int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request,
                         __u8 requesttype, __u16 value, __u16 index,
                         void *driver_data, __u16 size, int timeout,
                         gfp_t memflags)
{
        unsigned int pipe = usb_rcvctrlpipe(dev, endpoint);
        int ret;
        u8 *data;

        if (!size || !driver_data)
                return -EINVAL;

        data = kmalloc(size, memflags);
        if (!data)
                return -ENOMEM;

        ret = usb_control_msg(dev, pipe, request, requesttype, value, index,
                              data, size, timeout);

        if (ret < 0)
                goto exit;

        if (ret == size) {
                memcpy(driver_data, data, size);
                ret = 0;
        } else {
                ret = -EREMOTEIO;
        }

exit:
        kfree(data);
        return ret;
}
EXPORT_SYMBOL_GPL(usb_control_msg_recv);

/**
 * usb_interrupt_msg - Builds an interrupt urb, sends it off and waits for completion
 * @usb_dev: pointer to the usb device to send the message to
 * @pipe: endpoint "pipe" to send the message to
 * @data: pointer to the data to send
 * @len: length in bytes of the data to send
 * @actual_length: pointer to a location to put the actual length transferred
 *        in bytes
 * @timeout: time in msecs to wait for the message to complete before
 *        timing out (if 0 the wait is forever)
 *
 * Context: task context, might sleep.
 *
 * This function sends a simple interrupt message to a specified endpoint and
 * waits for the message to complete, or timeout.
 *
 * Don't use this function from within an interrupt context. If you need
 * an asynchronous message, or need to send a message from within interrupt
 * context, use usb_submit_urb() If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete. Since you
 * don't have a handle on the URB used, you can't cancel the request.
 *
 * Return:
 * If successful, 0. Otherwise a negative error number. The number of actual
 * bytes transferred will be stored in the @actual_length parameter.
 */
int usb_interrupt_msg(struct usb_device *usb_dev, unsigned int pipe,
                      void *data, int len, int *actual_length, int timeout)
{
        return usb_bulk_msg(usb_dev, pipe, data, len, actual_length, timeout);
}
EXPORT_SYMBOL_GPL(usb_interrupt_msg);

/**
 * usb_bulk_msg - Builds a bulk urb, sends it off and waits for completion
 * @usb_dev: pointer to the usb device to send the message to
 * @pipe: endpoint "pipe" to send the message to
 * @data: pointer to the data to send
 * @len: length in bytes of the data to send
 * @actual_length: pointer to a location to put the actual length transferred
 *        in bytes
 * @timeout: time in msecs to wait for the message to complete before
 *        timing out (if 0 the wait is forever)
 *
 * Context: task context, might sleep.
 *
 * This function sends a simple bulk message to a specified endpoint
 * and waits for the message to complete, or timeout.
 *
 * Don't use this function from within an interrupt context. If you need
 * an asynchronous message, or need to send a message from within interrupt
 * context, use usb_submit_urb() If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete. Since you
 * don't have a handle on the URB used, you can't cancel the request.
 *
 * Because there is no usb_interrupt_msg() and no USBDEVFS_INTERRUPT ioctl,
 * users are forced to abuse this routine by using it to submit URBs for
 * interrupt endpoints.  We will take the liberty of creating an interrupt URB
 * (with the default interval) if the target is an interrupt endpoint.
 *
 * Return:
 * If successful, 0. Otherwise a negative error number. The number of actual
 * bytes transferred will be stored in the @actual_length parameter.
 *
 */
int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe,
                 void *data, int len, int *actual_length, int timeout)
{
        struct urb *urb;
        struct usb_host_endpoint *ep;

        ep = usb_pipe_endpoint(usb_dev, pipe);
        if (!ep || len < 0)
                return -EINVAL;

        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                return -ENOMEM;

        if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                        USB_ENDPOINT_XFER_INT) {
                pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30);
                usb_fill_int_urb(urb, usb_dev, pipe, data, len,
                                usb_api_blocking_completion, NULL,
                                ep->desc.bInterval);
        } else
                usb_fill_bulk_urb(urb, usb_dev, pipe, data, len,
                                usb_api_blocking_completion, NULL);

        return usb_start_wait_urb(urb, timeout, actual_length);
}
EXPORT_SYMBOL_GPL(usb_bulk_msg);

/*-------------------------------------------------------------------*/

static void sg_clean(struct usb_sg_request *io)
{
        if (io->urbs) {
                while (io->entries--)
                        usb_free_urb(io->urbs[io->entries]);
                kfree(io->urbs);
                io->urbs = NULL;
        }
        io->dev = NULL;
}

static void sg_complete(struct urb *urb)
{
        unsigned long flags;
        struct usb_sg_request *io = urb->context;
        int status = urb->status;

        spin_lock_irqsave(&io->lock, flags);

        /* In 2.5 we require hcds' endpoint queues not to progress after fault
         * reports, until the completion callback (this!) returns.  That lets
         * device driver code (like this routine) unlink queued urbs first,
         * if it needs to, since the HC won't work on them at all.  So it's
         * not possible for page N+1 to overwrite page N, and so on.
         *
         * That's only for "hard" faults; "soft" faults (unlinks) sometimes
         * complete before the HCD can get requests away from hardware,
         * though never during cleanup after a hard fault.
         */
        if (io->status
                        && (io->status != -ECONNRESET
                                || status != -ECONNRESET)
                        && urb->actual_length) {
                dev_err(io->dev->bus->controller,
                        "dev %s ep%d%s scatterlist error %d/%d\n",
                        io->dev->devpath,
                        usb_endpoint_num(&urb->ep->desc),
                        usb_urb_dir_in(urb) ? "in" : "out",
                        status, io->status);
                /* BUG (); */
        }

        if (io->status == 0 && status && status != -ECONNRESET) {
                int i, found, retval;

                io->status = status;

                /* the previous urbs, and this one, completed already.
                 * unlink pending urbs so they won't rx/tx bad data.
                 * careful: unlink can sometimes be synchronous...
                 */
                spin_unlock_irqrestore(&io->lock, flags);
                for (i = 0, found = 0; i < io->entries; i++) {
                        if (!io->urbs[i])
                                continue;
                        if (found) {
                                usb_block_urb(io->urbs[i]);
                                retval = usb_unlink_urb(io->urbs[i]);
                                if (retval != -EINPROGRESS &&
                                    retval != -ENODEV &&
                                    retval != -EBUSY &&
                                    retval != -EIDRM)
                                        dev_err(&io->dev->dev,
                                                "%s, unlink --> %d\n",
                                                __func__, retval);
                        } else if (urb == io->urbs[i])
                                found = 1;
                }
                spin_lock_irqsave(&io->lock, flags);
        }

        /* on the last completion, signal usb_sg_wait() */
        io->bytes += urb->actual_length;
        io->count--;
        if (!io->count)
                complete(&io->complete);

        spin_unlock_irqrestore(&io->lock, flags);
}


/**
 * usb_sg_init - initializes scatterlist-based bulk/interrupt I/O request
 * @io: request block being initialized.  until usb_sg_wait() returns,
 *        treat this as a pointer to an opaque block of memory,
 * @dev: the usb device that will send or receive the data
 * @pipe: endpoint "pipe" used to transfer the data
 * @period: polling rate for interrupt endpoints, in frames or
 *         (for high speed endpoints) microframes; ignored for bulk
 * @sg: scatterlist entries
 * @nents: how many entries in the scatterlist
 * @length: how many bytes to send from the scatterlist, or zero to
 *         send every byte identified in the list.
 * @mem_flags: SLAB_* flags affecting memory allocations in this call
 *
 * This initializes a scatter/gather request, allocating resources such as
 * I/O mappings and urb memory (except maybe memory used by USB controller
 * drivers).
 *
 * The request must be issued using usb_sg_wait(), which waits for the I/O to
 * complete (or to be canceled) and then cleans up all resources allocated by
 * usb_sg_init().
 *
 * The request may be canceled with usb_sg_cancel(), either before or after
 * usb_sg_wait() is called.
 *
 * Return: Zero for success, else a negative errno value.
 */
int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
                unsigned pipe, unsigned        period, struct scatterlist *sg,
                int nents, size_t length, gfp_t mem_flags)
{
        int i;
        int urb_flags;
        int use_sg;

        if (!io || !dev || !sg
                        || usb_pipecontrol(pipe)
                        || usb_pipeisoc(pipe)
                        || nents <= 0)
                return -EINVAL;

        spin_lock_init(&io->lock);
        io->dev = dev;
        io->pipe = pipe;

        if (dev->bus->sg_tablesize > 0) {
                use_sg = true;
                io->entries = 1;
        } else {
                use_sg = false;
                io->entries = nents;
        }

        /* initialize all the urbs we'll use */
        io->urbs = kmalloc_array(io->entries, sizeof(*io->urbs), mem_flags);
        if (!io->urbs)
                goto nomem;

        urb_flags = URB_NO_INTERRUPT;
        if (usb_pipein(pipe))
                urb_flags |= URB_SHORT_NOT_OK;

        for_each_sg(sg, sg, io->entries, i) {
                struct urb *urb;
                unsigned len;

                urb = usb_alloc_urb(0, mem_flags);
                if (!urb) {
                        io->entries = i;
                        goto nomem;
                }
                io->urbs[i] = urb;

                urb->dev = NULL;
                urb->pipe = pipe;
                urb->interval = period;
                urb->transfer_flags = urb_flags;
                urb->complete = sg_complete;
                urb->context = io;
                urb->sg = sg;

                if (use_sg) {
                        /* There is no single transfer buffer */
                        urb->transfer_buffer = NULL;
                        urb->num_sgs = nents;

                        /* A length of zero means transfer the whole sg list */
                        len = length;
                        if (len == 0) {
                                struct scatterlist        *sg2;
                                int                        j;

                                for_each_sg(sg, sg2, nents, j)
                                        len += sg2->length;
                        }
                } else {
                        /*
                         * Some systems can't use DMA; they use PIO instead.
                         * For their sakes, transfer_buffer is set whenever
                         * possible.
                         */
                        if (!PageHighMem(sg_page(sg)))
                                urb->transfer_buffer = sg_virt(sg);
                        else
                                urb->transfer_buffer = NULL;

                        len = sg->length;
                        if (length) {
                                len = min_t(size_t, len, length);
                                length -= len;
                                if (length == 0)
                                        io->entries = i + 1;
                        }
                }
                urb->transfer_buffer_length = len;
        }
        io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;

        /* transaction state */
        io->count = io->entries;
        io->status = 0;
        io->bytes = 0;
        init_completion(&io->complete);
        return 0;

nomem:
        sg_clean(io);
        return -ENOMEM;
}
EXPORT_SYMBOL_GPL(usb_sg_init);

/**
 * usb_sg_wait - synchronously execute scatter/gather request
 * @io: request block handle, as initialized with usb_sg_init().
 *         some fields become accessible when this call returns.
 *
 * Context: task context, might sleep.
 *
 * This function blocks until the specified I/O operation completes.  It
 * leverages the grouping of the related I/O requests to get good transfer
 * rates, by queueing the requests.  At higher speeds, such queuing can
 * significantly improve USB throughput.
 *
 * There are three kinds of completion for this function.
 *
 * (1) success, where io->status is zero.  The number of io->bytes
 *     transferred is as requested.
 * (2) error, where io->status is a negative errno value.  The number
 *     of io->bytes transferred before the error is usually less
 *     than requested, and can be nonzero.
 * (3) cancellation, a type of error with status -ECONNRESET that
 *     is initiated by usb_sg_cancel().
 *
 * When this function returns, all memory allocated through usb_sg_init() or
 * this call will have been freed.  The request block parameter may still be
 * passed to usb_sg_cancel(), or it may be freed.  It could also be
 * reinitialized and then reused.
 *
 * Data Transfer Rates:
 *
 * Bulk transfers are valid for full or high speed endpoints.
 * The best full speed data rate is 19 packets of 64 bytes each
 * per frame, or 1216 bytes per millisecond.
 * The best high speed data rate is 13 packets of 512 bytes each
 * per microframe, or 52 KBytes per millisecond.
 *
 * The reason to use interrupt transfers through this API would most likely
 * be to reserve high speed bandwidth, where up to 24 KBytes per millisecond
 * could be transferred.  That capability is less useful for low or full
 * speed interrupt endpoints, which allow at most one packet per millisecond,
 * of at most 8 or 64 bytes (respectively).
 *
 * It is not necessary to call this function to reserve bandwidth for devices
 * under an xHCI host controller, as the bandwidth is reserved when the
 * configuration or interface alt setting is selected.
 */
void usb_sg_wait(struct usb_sg_request *io)
{
        int i;
        int entries = io->entries;

        /* queue the urbs.  */
        spin_lock_irq(&io->lock);
        i = 0;
        while (i < entries && !io->status) {
                int retval;

                io->urbs[i]->dev = io->dev;
                spin_unlock_irq(&io->lock);

                retval = usb_submit_urb(io->urbs[i], GFP_NOIO);

                switch (retval) {
                        /* maybe we retrying will recover */
                case -ENXIO:        /* hc didn't queue this one */
                case -EAGAIN:
                case -ENOMEM:
                        retval = 0;
                        yield();
                        break;

                        /* no error? continue immediately.
                         *
                         * NOTE: to work better with UHCI (4K I/O buffer may
                         * need 3K of TDs) it may be good to limit how many
                         * URBs are queued at once; N milliseconds?
                         */
                case 0:
                        ++i;
                        cpu_relax();
                        break;

                        /* fail any uncompleted urbs */
                default:
                        io->urbs[i]->status = retval;
                        dev_dbg(&io->dev->dev, "%s, submit --> %d\n",
                                __func__, retval);
                        usb_sg_cancel(io);
                }
                spin_lock_irq(&io->lock);
                if (retval && (io->status == 0 || io->status == -ECONNRESET))
                        io->status = retval;
        }
        io->count -= entries - i;
        if (io->count == 0)
                complete(&io->complete);
        spin_unlock_irq(&io->lock);

        /* OK, yes, this could be packaged as non-blocking.
         * So could the submit loop above ... but it's easier to
         * solve neither problem than to solve both!
         */
        wait_for_completion(&io->complete);

        sg_clean(io);
}
EXPORT_SYMBOL_GPL(usb_sg_wait);

/**
 * usb_sg_cancel - stop scatter/gather i/o issued by usb_sg_wait()
 * @io: request block, initialized with usb_sg_init()
 *
 * This stops a request after it has been started by usb_sg_wait().
 * It can also prevents one initialized by usb_sg_init() from starting,
 * so that call just frees resources allocated to the request.
 */
void usb_sg_cancel(struct usb_sg_request *io)
{
        unsigned long flags;
        int i, retval;

        spin_lock_irqsave(&io->lock, flags);
        if (io->status || io->count == 0) {
                spin_unlock_irqrestore(&io->lock, flags);
                return;
        }
        /* shut everything down */
        io->status = -ECONNRESET;
        io->count++;                /* Keep the request alive until we're done */
        spin_unlock_irqrestore(&io->lock, flags);

        for (i = io->entries - 1; i >= 0; --i) {
                usb_block_urb(io->urbs[i]);

                retval = usb_unlink_urb(io->urbs[i]);
                if (retval != -EINPROGRESS
                    && retval != -ENODEV
                    && retval != -EBUSY
                    && retval != -EIDRM)
                        dev_warn(&io->dev->dev, "%s, unlink --> %d\n",
                                 __func__, retval);
        }

        spin_lock_irqsave(&io->lock, flags);
        io->count--;
        if (!io->count)
                complete(&io->complete);
        spin_unlock_irqrestore(&io->lock, flags);
}
EXPORT_SYMBOL_GPL(usb_sg_cancel);

/*-------------------------------------------------------------------*/

/**
 * usb_get_descriptor - issues a generic GET_DESCRIPTOR request
 * @dev: the device whose descriptor is being retrieved
 * @type: the descriptor type (USB_DT_*)
 * @index: the number of the descriptor
 * @buf: where to put the descriptor
 * @size: how big is "buf"?
 *
 * Context: task context, might sleep.
 *
 * Gets a USB descriptor.  Convenience functions exist to simplify
 * getting some types of descriptors.  Use
 * usb_get_string() or usb_string() for USB_DT_STRING.
 * Device (USB_DT_DEVICE) and configuration descriptors (USB_DT_CONFIG)
 * are part of the device structure.
 * In addition to a number of USB-standard descriptors, some
 * devices also use class-specific or vendor-specific descriptors.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 *
 * Return: The number of bytes received on success, or else the status code
 * returned by the underlying usb_control_msg() call.
 */
int usb_get_descriptor(struct usb_device *dev, unsigned char type,
                       unsigned char index, void *buf, int size)
{
        int i;
        int result;

        if (size <= 0)                /* No point in asking for no data */
                return -EINVAL;

        memset(buf, 0, size);        /* Make sure we parse really received data */

        for (i = 0; i < 3; ++i) {
                /* retry on length 0 or error; some devices are flakey */
                result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                                USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
                                (type << 8) + index, 0, buf, size,
                                USB_CTRL_GET_TIMEOUT);
                if (result <= 0 && result != -ETIMEDOUT)
                        continue;
                if (result > 1 && ((u8 *)buf)[1] != type) {
                        result = -ENODATA;
                        continue;
                }
                break;
        }
        return result;
}
EXPORT_SYMBOL_GPL(usb_get_descriptor);

/**
 * usb_get_string - gets a string descriptor
 * @dev: the device whose string descriptor is being retrieved
 * @langid: code for language chosen (from string descriptor zero)
 * @index: the number of the descriptor
 * @buf: where to put the string
 * @size: how big is "buf"?
 *
 * Context: task context, might sleep.
 *
 * Retrieves a string, encoded using UTF-16LE (Unicode, 16 bits per character,
 * in little-endian byte order).
 * The usb_string() function will often be a convenient way to turn
 * these strings into kernel-printable form.
 *
 * Strings may be referenced in device, configuration, interface, or other
 * descriptors, and could also be used in vendor-specific ways.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 *
 * Return: The number of bytes received on success, or else the status code
 * returned by the underlying usb_control_msg() call.
 */
static int usb_get_string(struct usb_device *dev, unsigned short langid,
                          unsigned char index, void *buf, int size)
{
        int i;
        int result;

        if (size <= 0)                /* No point in asking for no data */
                return -EINVAL;

        for (i = 0; i < 3; ++i) {
                /* retry on length 0 or stall; some devices are flakey */
                result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                        USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
                        (USB_DT_STRING << 8) + index, langid, buf, size,
                        USB_CTRL_GET_TIMEOUT);
                if (result == 0 || result == -EPIPE)
                        continue;
                if (result > 1 && ((u8 *) buf)[1] != USB_DT_STRING) {
                        result = -ENODATA;
                        continue;
                }
                break;
        }
        return result;
}

static void usb_try_string_workarounds(unsigned char *buf, int *length)
{
        int newlength, oldlength = *length;

        for (newlength = 2; newlength + 1 < oldlength; newlength += 2)
                if (!isprint(buf[newlength]) || buf[newlength + 1])
                        break;

        if (newlength > 2) {
                buf[0] = newlength;
                *length = newlength;
        }
}

static int usb_string_sub(struct usb_device *dev, unsigned int langid,
                          unsigned int index, unsigned char *buf)
{
        int rc;

        /* Try to read the string descriptor by asking for the maximum
         * possible number of bytes */
        if (dev->quirks & USB_QUIRK_STRING_FETCH_255)
                rc = -EIO;
        else
                rc = usb_get_string(dev, langid, index, buf, 255);

        /* If that failed try to read the descriptor length, then
         * ask for just that many bytes */
        if (rc < 2) {
                rc = usb_get_string(dev, langid, index, buf, 2);
                if (rc == 2)
                        rc = usb_get_string(dev, langid, index, buf, buf[0]);
        }

        if (rc >= 2) {
                if (!buf[0] && !buf[1])
                        usb_try_string_workarounds(buf, &rc);

                /* There might be extra junk at the end of the descriptor */
                if (buf[0] < rc)
                        rc = buf[0];

                rc = rc - (rc & 1); /* force a multiple of two */
        }

        if (rc < 2)
                rc = (rc < 0 ? rc : -EINVAL);

        return rc;
}

static int usb_get_langid(struct usb_device *dev, unsigned char *tbuf)
{
        int err;

        if (dev->have_langid)
                return 0;

        if (dev->string_langid < 0)
                return -EPIPE;

        err = usb_string_sub(dev, 0, 0, tbuf);

        /* If the string was reported but is malformed, default to english
         * (0x0409) */
        if (err == -ENODATA || (err > 0 && err < 4)) {
                dev->string_langid = 0x0409;
                dev->have_langid = 1;
                dev_err(&dev->dev,
                        "language id specifier not provided by device, defaulting to English\n");
                return 0;
        }

        /* In case of all other errors, we assume the device is not able to
         * deal with strings at all. Set string_langid to -1 in order to
         * prevent any string to be retrieved from the device */
        if (err < 0) {
                dev_info(&dev->dev, "string descriptor 0 read error: %d\n",
                                        err);
                dev->string_langid = -1;
                return -EPIPE;
        }

        /* always use the first langid listed */
        dev->string_langid = tbuf[2] | (tbuf[3] << 8);
        dev->have_langid = 1;
        dev_dbg(&dev->dev, "default language 0x%04x\n",
                                dev->string_langid);
        return 0;
}

/**
 * usb_string - returns UTF-8 version of a string descriptor
 * @dev: the device whose string descriptor is being retrieved
 * @index: the number of the descriptor
 * @buf: where to put the string
 * @size: how big is "buf"?
 *
 * Context: task context, might sleep.
 *
 * This converts the UTF-16LE encoded strings returned by devices, from
 * usb_get_string_descriptor(), to null-terminated UTF-8 encoded ones
 * that are more usable in most kernel contexts.  Note that this function
 * chooses strings in the first language supported by the device.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 *
 * Return: length of the string (>= 0) or usb_control_msg status (< 0).
 */
int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
{
        unsigned char *tbuf;
        int err;

        if (dev->state == USB_STATE_SUSPENDED)
                return -EHOSTUNREACH;
        if (size <= 0 || !buf)
                return -EINVAL;
        buf[0] = 0;
        if (index <= 0 || index >= 256)
                return -EINVAL;
        tbuf = kmalloc(256, GFP_NOIO);
        if (!tbuf)
                return -ENOMEM;

        err = usb_get_langid(dev, tbuf);
        if (err < 0)
                goto errout;

        err = usb_string_sub(dev, dev->string_langid, index, tbuf);
        if (err < 0)
                goto errout;

        size--;                /* leave room for trailing NULL char in output buffer */
        err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2,
                        UTF16_LITTLE_ENDIAN, buf, size);
        buf[err] = 0;

        if (tbuf[1] != USB_DT_STRING)
                dev_dbg(&dev->dev,
                        "wrong descriptor type %02x for string %d (\"%s\")\n",
                        tbuf[1], index, buf);

 errout:
        kfree(tbuf);
        return err;
}
EXPORT_SYMBOL_GPL(usb_string);

/* one UTF-8-encoded 16-bit character has at most three bytes */
#define MAX_USB_STRING_SIZE (127 * 3 + 1)

/**
 * usb_cache_string - read a string descriptor and cache it for later use
 * @udev: the device whose string descriptor is being read
 * @index: the descriptor index
 *
 * Return: A pointer to a kmalloc'ed buffer containing the descriptor string,
 * or %NULL if the index is 0 or the string could not be read.
 */
char *usb_cache_string(struct usb_device *udev, int index)
{
        char *buf;
        char *smallbuf = NULL;
        int len;

        if (index <= 0)
                return NULL;

        buf = kmalloc(MAX_USB_STRING_SIZE, GFP_NOIO);
        if (buf) {
                len = usb_string(udev, index, buf, MAX_USB_STRING_SIZE);
                if (len > 0) {
                        smallbuf = kmalloc(++len, GFP_NOIO);
                        if (!smallbuf)
                                return buf;
                        memcpy(smallbuf, buf, len);
                }
                kfree(buf);
        }
        return smallbuf;
}
EXPORT_SYMBOL_GPL(usb_cache_string);

/*
 * usb_get_device_descriptor - read the device descriptor
 * @udev: the device whose device descriptor should be read
 *
 * Context: task context, might sleep.
 *
 * Not exported, only for use by the core.  If drivers really want to read
 * the device descriptor directly, they can call usb_get_descriptor() with
 * type = USB_DT_DEVICE and index = 0.
 *
 * Returns: a pointer to a dynamically allocated usb_device_descriptor
 * structure (which the caller must deallocate), or an ERR_PTR value.
 */
struct usb_device_descriptor *usb_get_device_descriptor(struct usb_device *udev)
{
        struct usb_device_descriptor *desc;
        int ret;

        desc = kmalloc(sizeof(*desc), GFP_NOIO);
        if (!desc)
                return ERR_PTR(-ENOMEM);

        ret = usb_get_descriptor(udev, USB_DT_DEVICE, 0, desc, sizeof(*desc));
        if (ret == sizeof(*desc))
                return desc;

        if (ret >= 0)
                ret = -EMSGSIZE;
        kfree(desc);
        return ERR_PTR(ret);
}

/*
 * usb_set_isoch_delay - informs the device of the packet transmit delay
 * @dev: the device whose delay is to be informed
 * Context: task context, might sleep
 *
 * Since this is an optional request, we don't bother if it fails.
 */
int usb_set_isoch_delay(struct usb_device *dev)
{
        /* skip hub devices */
        if (dev->descriptor.bDeviceClass == USB_CLASS_HUB)
                return 0;

        /* skip non-SS/non-SSP devices */
        if (dev->speed < USB_SPEED_SUPER)
                return 0;

        return usb_control_msg_send(dev, 0,
                        USB_REQ_SET_ISOCH_DELAY,
                        USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
                        dev->hub_delay, 0, NULL, 0,
                        USB_CTRL_SET_TIMEOUT,
                        GFP_NOIO);
}

/**
 * usb_get_status - issues a GET_STATUS call
 * @dev: the device whose status is being checked
 * @recip: USB_RECIP_*; for device, interface, or endpoint
 * @type: USB_STATUS_TYPE_*; for standard or PTM status types
 * @target: zero (for device), else interface or endpoint number
 * @data: pointer to two bytes of bitmap data
 *
 * Context: task context, might sleep.
 *
 * Returns device, interface, or endpoint status.  Normally only of
 * interest to see if the device is self powered, or has enabled the
 * remote wakeup facility; or whether a bulk or interrupt endpoint
 * is halted ("stalled").
 *
 * Bits in these status bitmaps are set using the SET_FEATURE request,
 * and cleared using the CLEAR_FEATURE request.  The usb_clear_halt()
 * function should be used to clear halt ("stall") status.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 *
 * Returns 0 and the status value in *@data (in host byte order) on success,
 * or else the status code from the underlying usb_control_msg() call.
 */
int usb_get_status(struct usb_device *dev, int recip, int type, int target,
                void *data)
{
        int ret;
        void *status;
        int length;

        switch (type) {
        case USB_STATUS_TYPE_STANDARD:
                length = 2;
                break;
        case USB_STATUS_TYPE_PTM:
                if (recip != USB_RECIP_DEVICE)
                        return -EINVAL;

                length = 4;
                break;
        default:
                return -EINVAL;
        }

        status =  kmalloc(length, GFP_KERNEL);
        if (!status)
                return -ENOMEM;

        ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                USB_REQ_GET_STATUS, USB_DIR_IN | recip, USB_STATUS_TYPE_STANDARD,
                target, status, length, USB_CTRL_GET_TIMEOUT);

        switch (ret) {
        case 4:
                if (type != USB_STATUS_TYPE_PTM) {
                        ret = -EIO;
                        break;
                }

                *(u32 *) data = le32_to_cpu(*(__le32 *) status);
                ret = 0;
                break;
        case 2:
                if (type != USB_STATUS_TYPE_STANDARD) {
                        ret = -EIO;
                        break;
                }

                *(u16 *) data = le16_to_cpu(*(__le16 *) status);
                ret = 0;
                break;
        default:
                ret = -EIO;
        }

        kfree(status);
        return ret;
}
EXPORT_SYMBOL_GPL(usb_get_status);

/**
 * usb_clear_halt - tells device to clear endpoint halt/stall condition
 * @dev: device whose endpoint is halted
 * @pipe: endpoint "pipe" being cleared
 *
 * Context: task context, might sleep.
 *
 * This is used to clear halt conditions for bulk and interrupt endpoints,
 * as reported by URB completion status.  Endpoints that are halted are
 * sometimes referred to as being "stalled".  Such endpoints are unable
 * to transmit or receive data until the halt status is cleared.  Any URBs
 * queued for such an endpoint should normally be unlinked by the driver
 * before clearing the halt condition, as described in sections 5.7.5
 * and 5.8.5 of the USB 2.0 spec.
 *
 * Note that control and isochronous endpoints don't halt, although control
 * endpoints report "protocol stall" (for unsupported requests) using the
 * same status code used to report a true stall.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 * If a thread in your driver uses this call, make sure your disconnect()
 * method can wait for it to complete.
 *
 * Return: Zero on success, or else the status code returned by the
 * underlying usb_control_msg() call.
 */
int usb_clear_halt(struct usb_device *dev, int pipe)
{
        int result;
        int endp = usb_pipeendpoint(pipe);

        if (usb_pipein(pipe))
                endp |= USB_DIR_IN;

        /* we don't care if it wasn't halted first. in fact some devices
         * (like some ibmcam model 1 units) seem to expect hosts to make
         * this request for iso endpoints, which can't halt!
         */
        result = usb_control_msg_send(dev, 0,
                                      USB_REQ_CLEAR_FEATURE, USB_RECIP_ENDPOINT,
                                      USB_ENDPOINT_HALT, endp, NULL, 0,
                                      USB_CTRL_SET_TIMEOUT, GFP_NOIO);

        /* don't un-halt or force to DATA0 except on success */
        if (result)
                return result;

        /* NOTE:  seems like Microsoft and Apple don't bother verifying
         * the clear "took", so some devices could lock up if you check...
         * such as the Hagiwara FlashGate DUAL.  So we won't bother.
         *
         * NOTE:  make sure the logic here doesn't diverge much from
         * the copy in usb-storage, for as long as we need two copies.
         */

        usb_reset_endpoint(dev, endp);

        return 0;
}
EXPORT_SYMBOL_GPL(usb_clear_halt);

static int create_intf_ep_devs(struct usb_interface *intf)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usb_host_interface *alt = intf->cur_altsetting;
        int i;

        if (intf->ep_devs_created || intf->unregistering)
                return 0;

        for (i = 0; i < alt->desc.bNumEndpoints; ++i)
                (void) usb_create_ep_devs(&intf->dev, &alt->endpoint[i], udev);
        intf->ep_devs_created = 1;
        return 0;
}

static void remove_intf_ep_devs(struct usb_interface *intf)
{
        struct usb_host_interface *alt = intf->cur_altsetting;
        int i;

        if (!intf->ep_devs_created)
                return;

        for (i = 0; i < alt->desc.bNumEndpoints; ++i)
                usb_remove_ep_devs(&alt->endpoint[i]);
        intf->ep_devs_created = 0;
}

/**
 * usb_disable_endpoint -- Disable an endpoint by address
 * @dev: the device whose endpoint is being disabled
 * @epaddr: the endpoint's address.  Endpoint number for output,
 *        endpoint number + USB_DIR_IN for input
 * @reset_hardware: flag to erase any endpoint state stored in the
 *        controller hardware
 *
 * Disables the endpoint for URB submission and nukes all pending URBs.
 * If @reset_hardware is set then also deallocates hcd/hardware state
 * for the endpoint.
 */
void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr,
                bool reset_hardware)
{
        unsigned int epnum = epaddr & USB_ENDPOINT_NUMBER_MASK;
        struct usb_host_endpoint *ep;

        if (!dev)
                return;

        if (usb_endpoint_out(epaddr)) {
                ep = dev->ep_out[epnum];
                if (reset_hardware && epnum != 0)
                        dev->ep_out[epnum] = NULL;
        } else {
                ep = dev->ep_in[epnum];
                if (reset_hardware && epnum != 0)
                        dev->ep_in[epnum] = NULL;
        }
        if (ep) {
                ep->enabled = 0;
                usb_hcd_flush_endpoint(dev, ep);
                if (reset_hardware)
                        usb_hcd_disable_endpoint(dev, ep);
        }
}

/**
 * usb_reset_endpoint - Reset an endpoint's state.
 * @dev: the device whose endpoint is to be reset
 * @epaddr: the endpoint's address.  Endpoint number for output,
 *        endpoint number + USB_DIR_IN for input
 *
 * Resets any host-side endpoint state such as the toggle bit,
 * sequence number or current window.
 */
void usb_reset_endpoint(struct usb_device *dev, unsigned int epaddr)
{
        unsigned int epnum = epaddr & USB_ENDPOINT_NUMBER_MASK;
        struct usb_host_endpoint *ep;

        if (usb_endpoint_out(epaddr))
                ep = dev->ep_out[epnum];
        else
                ep = dev->ep_in[epnum];
        if (ep)
                usb_hcd_reset_endpoint(dev, ep);
}
EXPORT_SYMBOL_GPL(usb_reset_endpoint);


/**
 * usb_disable_interface -- Disable all endpoints for an interface
 * @dev: the device whose interface is being disabled
 * @intf: pointer to the interface descriptor
 * @reset_hardware: flag to erase any endpoint state stored in the
 *        controller hardware
 *
 * Disables all the endpoints for the interface's current altsetting.
 */
void usb_disable_interface(struct usb_device *dev, struct usb_interface *intf,
                bool reset_hardware)
{
        struct usb_host_interface *alt = intf->cur_altsetting;
        int i;

        for (i = 0; i < alt->desc.bNumEndpoints; ++i) {
                usb_disable_endpoint(dev,
                                alt->endpoint[i].desc.bEndpointAddress,
                                reset_hardware);
        }
}

/*
 * usb_disable_device_endpoints -- Disable all endpoints for a device
 * @dev: the device whose endpoints are being disabled
 * @skip_ep0: 0 to disable endpoint 0, 1 to skip it.
 */
static void usb_disable_device_endpoints(struct usb_device *dev, int skip_ep0)
{
        struct usb_hcd *hcd = bus_to_hcd(dev->bus);
        int i;

        if (hcd->driver->check_bandwidth) {
                /* First pass: Cancel URBs, leave endpoint pointers intact. */
                for (i = skip_ep0; i < 16; ++i) {
                        usb_disable_endpoint(dev, i, false);
                        usb_disable_endpoint(dev, i + USB_DIR_IN, false);
                }
                /* Remove endpoints from the host controller internal state */
                mutex_lock(hcd->bandwidth_mutex);
                usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
                mutex_unlock(hcd->bandwidth_mutex);
        }
        /* Second pass: remove endpoint pointers */
        for (i = skip_ep0; i < 16; ++i) {
                usb_disable_endpoint(dev, i, true);
                usb_disable_endpoint(dev, i + USB_DIR_IN, true);
        }
}

/**
 * usb_disable_device - Disable all the endpoints for a USB device
 * @dev: the device whose endpoints are being disabled
 * @skip_ep0: 0 to disable endpoint 0, 1 to skip it.
 *
 * Disables all the device's endpoints, potentially including endpoint 0.
 * Deallocates hcd/hardware state for the endpoints (nuking all or most
 * pending urbs) and usbcore state for the interfaces, so that usbcore
 * must usb_set_configuration() before any interfaces could be used.
 */
void usb_disable_device(struct usb_device *dev, int skip_ep0)
{
        int i;

        /* getting rid of interfaces will disconnect
         * any drivers bound to them (a key side effect)
         */
        if (dev->actconfig) {
                /*
                 * FIXME: In order to avoid self-deadlock involving the
                 * bandwidth_mutex, we have to mark all the interfaces
                 * before unregistering any of them.
                 */
                for (i = 0; i < dev->actconfig->desc.bNumInterfaces; i++)
                        dev->actconfig->interface[i]->unregistering = 1;

                for (i = 0; i < dev->actconfig->desc.bNumInterfaces; i++) {
                        struct usb_interface        *interface;

                        /* remove this interface if it has been registered */
                        interface = dev->actconfig->interface[i];
                        if (!device_is_registered(&interface->dev))
                                continue;
                        dev_dbg(&dev->dev, "unregistering interface %s\n",
                                dev_name(&interface->dev));
                        remove_intf_ep_devs(interface);
                        device_del(&interface->dev);
                }

                /* Now that the interfaces are unbound, nobody should
                 * try to access them.
                 */
                for (i = 0; i < dev->actconfig->desc.bNumInterfaces; i++) {
                        put_device(&dev->actconfig->interface[i]->dev);
                        dev->actconfig->interface[i] = NULL;
                }

                usb_disable_usb2_hardware_lpm(dev);
                usb_unlocked_disable_lpm(dev);
                usb_disable_ltm(dev);

                dev->actconfig = NULL;
                if (dev->state == USB_STATE_CONFIGURED)
                        usb_set_device_state(dev, USB_STATE_ADDRESS);
        }

        dev_dbg(&dev->dev, "%s nuking %s URBs\n", __func__,
                skip_ep0 ? "non-ep0" : "all");

        usb_disable_device_endpoints(dev, skip_ep0);
}

/**
 * usb_enable_endpoint - Enable an endpoint for USB communications
 * @dev: the device whose interface is being enabled
 * @ep: the endpoint
 * @reset_ep: flag to reset the endpoint state
 *
 * Resets the endpoint state if asked, and sets dev->ep_{in,out} pointers.
 * For control endpoints, both the input and output sides are handled.
 */
void usb_enable_endpoint(struct usb_device *dev, struct usb_host_endpoint *ep,
                bool reset_ep)
{
        int epnum = usb_endpoint_num(&ep->desc);
        int is_out = usb_endpoint_dir_out(&ep->desc);
        int is_control = usb_endpoint_xfer_control(&ep->desc);

        if (reset_ep)
                usb_hcd_reset_endpoint(dev, ep);
        if (is_out || is_control)
                dev->ep_out[epnum] = ep;
        if (!is_out || is_control)
                dev->ep_in[epnum] = ep;
        ep->enabled = 1;
}

/**
 * usb_enable_interface - Enable all the endpoints for an interface
 * @dev: the device whose interface is being enabled
 * @intf: pointer to the interface descriptor
 * @reset_eps: flag to reset the endpoints' state
 *
 * Enables all the endpoints for the interface's current altsetting.
 */
void usb_enable_interface(struct usb_device *dev,
                struct usb_interface *intf, bool reset_eps)
{
        struct usb_host_interface *alt = intf->cur_altsetting;
        int i;

        for (i = 0; i < alt->desc.bNumEndpoints; ++i)
                usb_enable_endpoint(dev, &alt->endpoint[i], reset_eps);
}

/**
 * usb_set_interface - Makes a particular alternate setting be current
 * @dev: the device whose interface is being updated
 * @interface: the interface being updated
 * @alternate: the setting being chosen.
 *
 * Context: task context, might sleep.
 *
 * This is used to enable data transfers on interfaces that may not
 * be enabled by default.  Not all devices support such configurability.
 * Only the driver bound to an interface may change its setting.
 *
 * Within any given configuration, each interface may have several
 * alternative settings.  These are often used to control levels of
 * bandwidth consumption.  For example, the default setting for a high
 * speed interrupt endpoint may not send more than 64 bytes per microframe,
 * while interrupt transfers of up to 3KBytes per microframe are legal.
 * Also, isochronous endpoints may never be part of an
 * interface's default setting.  To access such bandwidth, alternate
 * interface settings must be made current.
 *
 * Note that in the Linux USB subsystem, bandwidth associated with
 * an endpoint in a given alternate setting is not reserved until an URB
 * is submitted that needs that bandwidth.  Some other operating systems
 * allocate bandwidth early, when a configuration is chosen.
 *
 * xHCI reserves bandwidth and configures the alternate setting in
 * usb_hcd_alloc_bandwidth(). If it fails the original interface altsetting
 * may be disabled. Drivers cannot rely on any particular alternate
 * setting being in effect after a failure.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 * Also, drivers must not change altsettings while urbs are scheduled for
 * endpoints in that interface; all such urbs must first be completed
 * (perhaps forced by unlinking). If a thread in your driver uses this call,
 * make sure your disconnect() method can wait for it to complete.
 *
 * Return: Zero on success, or else the status code returned by the
 * underlying usb_control_msg() call.
 */
int usb_set_interface(struct usb_device *dev, int interface, int alternate)
{
        struct usb_interface *iface;
        struct usb_host_interface *alt;
        struct usb_hcd *hcd = bus_to_hcd(dev->bus);
        int i, ret, manual = 0;
        unsigned int epaddr;
        unsigned int pipe;

        if (dev->state == USB_STATE_SUSPENDED)
                return -EHOSTUNREACH;

        iface = usb_ifnum_to_if(dev, interface);
        if (!iface) {
                dev_dbg(&dev->dev, "selecting invalid interface %d\n",
                        interface);
                return -EINVAL;
        }
        if (iface->unregistering)
                return -ENODEV;

        alt = usb_altnum_to_altsetting(iface, alternate);
        if (!alt) {
                dev_warn(&dev->dev, "selecting invalid altsetting %d\n",
                         alternate);
                return -EINVAL;
        }
        /*
         * usb3 hosts configure the interface in usb_hcd_alloc_bandwidth,
         * including freeing dropped endpoint ring buffers.
         * Make sure the interface endpoints are flushed before that
         */
        usb_disable_interface(dev, iface, false);

        /* Make sure we have enough bandwidth for this alternate interface.
         * Remove the current alt setting and add the new alt setting.
         */
        mutex_lock(hcd->bandwidth_mutex);
        /* Disable LPM, and re-enable it once the new alt setting is installed,
         * so that the xHCI driver can recalculate the U1/U2 timeouts.
         */
        if (usb_disable_lpm(dev)) {
                dev_err(&iface->dev, "%s Failed to disable LPM\n", __func__);
                mutex_unlock(hcd->bandwidth_mutex);
                return -ENOMEM;
        }
        /* Changing alt-setting also frees any allocated streams */
        for (i = 0; i < iface->cur_altsetting->desc.bNumEndpoints; i++)
                iface->cur_altsetting->endpoint[i].streams = 0;

        ret = usb_hcd_alloc_bandwidth(dev, NULL, iface->cur_altsetting, alt);
        if (ret < 0) {
                dev_info(&dev->dev, "Not enough bandwidth for altsetting %d\n",
                                alternate);
                usb_enable_lpm(dev);
                mutex_unlock(hcd->bandwidth_mutex);
                return ret;
        }

        if (dev->quirks & USB_QUIRK_NO_SET_INTF)
                ret = -EPIPE;
        else
                ret = usb_control_msg_send(dev, 0,
                                           USB_REQ_SET_INTERFACE,
                                           USB_RECIP_INTERFACE, alternate,
                                           interface, NULL, 0, 5000,
                                           GFP_NOIO);

        /* 9.4.10 says devices don't need this and are free to STALL the
         * request if the interface only has one alternate setting.
         */
        if (ret == -EPIPE && iface->num_altsetting == 1) {
                dev_dbg(&dev->dev,
                        "manual set_interface for iface %d, alt %d\n",
                        interface, alternate);
                manual = 1;
        } else if (ret) {
                /* Re-instate the old alt setting */
                usb_hcd_alloc_bandwidth(dev, NULL, alt, iface->cur_altsetting);
                usb_enable_lpm(dev);
                mutex_unlock(hcd->bandwidth_mutex);
                return ret;
        }
        mutex_unlock(hcd->bandwidth_mutex);

        /* FIXME drivers shouldn't need to replicate/bugfix the logic here
         * when they implement async or easily-killable versions of this or
         * other "should-be-internal" functions (like clear_halt).
         * should hcd+usbcore postprocess control requests?
         */

        /* prevent submissions using previous endpoint settings */
        if (iface->cur_altsetting != alt) {
                remove_intf_ep_devs(iface);
                usb_remove_sysfs_intf_files(iface);
        }
        usb_disable_interface(dev, iface, true);

        iface->cur_altsetting = alt;

        /* Now that the interface is installed, re-enable LPM. */
        usb_unlocked_enable_lpm(dev);

        /* If the interface only has one altsetting and the device didn't
         * accept the request, we attempt to carry out the equivalent action
         * by manually clearing the HALT feature for each endpoint in the
         * new altsetting.
         */
        if (manual) {
                for (i = 0; i < alt->desc.bNumEndpoints; i++) {
                        epaddr = alt->endpoint[i].desc.bEndpointAddress;
                        pipe = __create_pipe(dev,
                                        USB_ENDPOINT_NUMBER_MASK & epaddr) |
                                        (usb_endpoint_out(epaddr) ?
                                        USB_DIR_OUT : USB_DIR_IN);

                        usb_clear_halt(dev, pipe);
                }
        }

        /* 9.1.1.5: reset toggles for all endpoints in the new altsetting
         *
         * Note:
         * Despite EP0 is always present in all interfaces/AS, the list of
         * endpoints from the descriptor does not contain EP0. Due to its
         * omnipresence one might expect EP0 being considered "affected" by
         * any SetInterface request and hence assume toggles need to be reset.
         * However, EP0 toggles are re-synced for every individual transfer
         * during the SETUP stage - hence EP0 toggles are "don't care" here.
         * (Likewise, EP0 never "halts" on well designed devices.)
         */
        usb_enable_interface(dev, iface, true);
        if (device_is_registered(&iface->dev)) {
                usb_create_sysfs_intf_files(iface);
                create_intf_ep_devs(iface);
        }
        return 0;
}
EXPORT_SYMBOL_GPL(usb_set_interface);

/**
 * usb_reset_configuration - lightweight device reset
 * @dev: the device whose configuration is being reset
 *
 * This issues a standard SET_CONFIGURATION request to the device using
 * the current configuration.  The effect is to reset most USB-related
 * state in the device, including interface altsettings (reset to zero),
 * endpoint halts (cleared), and endpoint state (only for bulk and interrupt
 * endpoints).  Other usbcore state is unchanged, including bindings of
 * usb device drivers to interfaces.
 *
 * Because this affects multiple interfaces, avoid using this with composite
 * (multi-interface) devices.  Instead, the driver for each interface may
 * use usb_set_interface() on the interfaces it claims.  Be careful though;
 * some devices don't support the SET_INTERFACE request, and others won't
 * reset all the interface state (notably endpoint state).  Resetting the whole
 * configuration would affect other drivers' interfaces.
 *
 * The caller must own the device lock.
 *
 * Return: Zero on success, else a negative error code.
 *
 * If this routine fails the device will probably be in an unusable state
 * with endpoints disabled, and interfaces only partially enabled.
 */
int usb_reset_configuration(struct usb_device *dev)
{
        int                        i, retval;
        struct usb_host_config        *config;
        struct usb_hcd *hcd = bus_to_hcd(dev->bus);

        if (dev->state == USB_STATE_SUSPENDED)
                return -EHOSTUNREACH;

        /* caller must have locked the device and must own
         * the usb bus readlock (so driver bindings are stable);
         * calls during probe() are fine
         */

        usb_disable_device_endpoints(dev, 1); /* skip ep0*/

        config = dev->actconfig;
        retval = 0;
        mutex_lock(hcd->bandwidth_mutex);
        /* Disable LPM, and re-enable it once the configuration is reset, so
         * that the xHCI driver can recalculate the U1/U2 timeouts.
         */
        if (usb_disable_lpm(dev)) {
                dev_err(&dev->dev, "%s Failed to disable LPM\n", __func__);
                mutex_unlock(hcd->bandwidth_mutex);
                return -ENOMEM;
        }

        /* xHCI adds all endpoints in usb_hcd_alloc_bandwidth */
        retval = usb_hcd_alloc_bandwidth(dev, config, NULL, NULL);
        if (retval < 0) {
                usb_enable_lpm(dev);
                mutex_unlock(hcd->bandwidth_mutex);
                return retval;
        }
        retval = usb_control_msg_send(dev, 0, USB_REQ_SET_CONFIGURATION, 0,
                                      config->desc.bConfigurationValue, 0,
                                      NULL, 0, USB_CTRL_SET_TIMEOUT,
                                      GFP_NOIO);
        if (retval) {
                usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
                usb_enable_lpm(dev);
                mutex_unlock(hcd->bandwidth_mutex);
                return retval;
        }
        mutex_unlock(hcd->bandwidth_mutex);

        /* re-init hc/hcd interface/endpoint state */
        for (i = 0; i < config->desc.bNumInterfaces; i++) {
                struct usb_interface *intf = config->interface[i];
                struct usb_host_interface *alt;

                alt = usb_altnum_to_altsetting(intf, 0);

                /* No altsetting 0?  We'll assume the first altsetting.
                 * We could use a GetInterface call, but if a device is
                 * so non-compliant that it doesn't have altsetting 0
                 * then I wouldn't trust its reply anyway.
                 */
                if (!alt)
                        alt = &intf->altsetting[0];

                if (alt != intf->cur_altsetting) {
                        remove_intf_ep_devs(intf);
                        usb_remove_sysfs_intf_files(intf);
                }
                intf->cur_altsetting = alt;
                usb_enable_interface(dev, intf, true);
                if (device_is_registered(&intf->dev)) {
                        usb_create_sysfs_intf_files(intf);
                        create_intf_ep_devs(intf);
                }
        }
        /* Now that the interfaces are installed, re-enable LPM. */
        usb_unlocked_enable_lpm(dev);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_reset_configuration);

static void usb_release_interface(struct device *dev)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_interface_cache *intfc =
                        altsetting_to_usb_interface_cache(intf->altsetting);

        kref_put(&intfc->ref, usb_release_interface_cache);
        usb_put_dev(interface_to_usbdev(intf));
        of_node_put(dev->of_node);
        kfree(intf);
}

/*
 * usb_deauthorize_interface - deauthorize an USB interface
 *
 * @intf: USB interface structure
 */
void usb_deauthorize_interface(struct usb_interface *intf)
{
        struct device *dev = &intf->dev;

        device_lock(dev->parent);

        if (intf->authorized) {
                device_lock(dev);
                intf->authorized = 0;
                device_unlock(dev);

                usb_forced_unbind_intf(intf);
        }

        device_unlock(dev->parent);
}

/*
 * usb_authorize_interface - authorize an USB interface
 *
 * @intf: USB interface structure
 */
void usb_authorize_interface(struct usb_interface *intf)
{
        struct device *dev = &intf->dev;

        if (!intf->authorized) {
                device_lock(dev);
                intf->authorized = 1; /* authorize interface */
                device_unlock(dev);
        }
}

static int usb_if_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct usb_device *usb_dev;
        const struct usb_interface *intf;
        const struct usb_host_interface *alt;

        intf = to_usb_interface(dev);
        usb_dev = interface_to_usbdev(intf);
        alt = intf->cur_altsetting;

        if (add_uevent_var(env, "INTERFACE=%d/%d/%d",
                   alt->desc.bInterfaceClass,
                   alt->desc.bInterfaceSubClass,
                   alt->desc.bInterfaceProtocol))
                return -ENOMEM;

        if (add_uevent_var(env,
                   "MODALIAS=usb:"
                   "v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02Xin%02X",
                   le16_to_cpu(usb_dev->descriptor.idVendor),
                   le16_to_cpu(usb_dev->descriptor.idProduct),
                   le16_to_cpu(usb_dev->descriptor.bcdDevice),
                   usb_dev->descriptor.bDeviceClass,
                   usb_dev->descriptor.bDeviceSubClass,
                   usb_dev->descriptor.bDeviceProtocol,
                   alt->desc.bInterfaceClass,
                   alt->desc.bInterfaceSubClass,
                   alt->desc.bInterfaceProtocol,
                   alt->desc.bInterfaceNumber))
                return -ENOMEM;

        return 0;
}

const struct device_type usb_if_device_type = {
        .name =                "usb_interface",
        .release =        usb_release_interface,
        .uevent =        usb_if_uevent,
};

static struct usb_interface_assoc_descriptor *find_iad(struct usb_device *dev,
                                                struct usb_host_config *config,
                                                u8 inum)
{
        struct usb_interface_assoc_descriptor *retval = NULL;
        struct usb_interface_assoc_descriptor *intf_assoc;
        int first_intf;
        int last_intf;
        int i;

        for (i = 0; (i < USB_MAXIADS && config->intf_assoc[i]); i++) {
                intf_assoc = config->intf_assoc[i];
                if (intf_assoc->bInterfaceCount == 0)
                        continue;

                first_intf = intf_assoc->bFirstInterface;
                last_intf = first_intf + (intf_assoc->bInterfaceCount - 1);
                if (inum >= first_intf && inum <= last_intf) {
                        if (!retval)
                                retval = intf_assoc;
                        else
                                dev_err(&dev->dev, "Interface #%d referenced"
                                        " by multiple IADs\n", inum);
                }
        }

        return retval;
}


/*
 * Internal function to queue a device reset
 * See usb_queue_reset_device() for more details
 */
static void __usb_queue_reset_device(struct work_struct *ws)
{
        int rc;
        struct usb_interface *iface =
                container_of(ws, struct usb_interface, reset_ws);
        struct usb_device *udev = interface_to_usbdev(iface);

        rc = usb_lock_device_for_reset(udev, iface);
        if (rc >= 0) {
                usb_reset_device(udev);
                usb_unlock_device(udev);
        }
        usb_put_intf(iface);        /* Undo _get_ in usb_queue_reset_device() */
}

/*
 * Internal function to set the wireless_status sysfs attribute
 * See usb_set_wireless_status() for more details
 */
static void __usb_wireless_status_intf(struct work_struct *ws)
{
        struct usb_interface *iface =
                container_of(ws, struct usb_interface, wireless_status_work);

        device_lock(iface->dev.parent);
        if (iface->sysfs_files_created)
                usb_update_wireless_status_attr(iface);
        device_unlock(iface->dev.parent);
        usb_put_intf(iface);        /* Undo _get_ in usb_set_wireless_status() */
}

/**
 * usb_set_wireless_status - sets the wireless_status struct member
 * @iface: the interface to modify
 * @status: the new wireless status
 *
 * Set the wireless_status struct member to the new value, and emit
 * sysfs changes as necessary.
 *
 * Returns: 0 on success, -EALREADY if already set.
 */
int usb_set_wireless_status(struct usb_interface *iface,
                enum usb_wireless_status status)
{
        if (iface->wireless_status == status)
                return -EALREADY;

        usb_get_intf(iface);
        iface->wireless_status = status;
        schedule_work(&iface->wireless_status_work);

        return 0;
}
EXPORT_SYMBOL_GPL(usb_set_wireless_status);

/*
 * usb_set_configuration - Makes a particular device setting be current
 * @dev: the device whose configuration is being updated
 * @configuration: the configuration being chosen.
 *
 * Context: task context, might sleep. Caller holds device lock.
 *
 * This is used to enable non-default device modes.  Not all devices
 * use this kind of configurability; many devices only have one
 * configuration.
 *
 * @configuration is the value of the configuration to be installed.
 * According to the USB spec (e.g. section 9.1.1.5), configuration values
 * must be non-zero; a value of zero indicates that the device in
 * unconfigured.  However some devices erroneously use 0 as one of their
 * configuration values.  To help manage such devices, this routine will
 * accept @configuration = -1 as indicating the device should be put in
 * an unconfigured state.
 *
 * USB device configurations may affect Linux interoperability,
 * power consumption and the functionality available.  For example,
 * the default configuration is limited to using 100mA of bus power,
 * so that when certain device functionality requires more power,
 * and the device is bus powered, that functionality should be in some
 * non-default device configuration.  Other device modes may also be
 * reflected as configuration options, such as whether two ISDN
 * channels are available independently; and choosing between open
 * standard device protocols (like CDC) or proprietary ones.
 *
 * Note that a non-authorized device (dev->authorized == 0) will only
 * be put in unconfigured mode.
 *
 * Note that USB has an additional level of device configurability,
 * associated with interfaces.  That configurability is accessed using
 * usb_set_interface().
 *
 * This call is synchronous. The calling context must be able to sleep,
 * must own the device lock, and must not hold the driver model's USB
 * bus mutex; usb interface driver probe() methods cannot use this routine.
 *
 * Returns zero on success, or else the status code returned by the
 * underlying call that failed.  On successful completion, each interface
 * in the original device configuration has been destroyed, and each one
 * in the new configuration has been probed by all relevant usb device
 * drivers currently known to the kernel.
 */
int usb_set_configuration(struct usb_device *dev, int configuration)
{
        int i, ret;
        struct usb_host_config *cp = NULL;
        struct usb_interface **new_interfaces = NULL;
        struct usb_hcd *hcd = bus_to_hcd(dev->bus);
        int n, nintf;

        if (dev->authorized == 0 || configuration == -1)
                configuration = 0;
        else {
                for (i = 0; i < dev->descriptor.bNumConfigurations; i++) {
                        if (dev->config[i].desc.bConfigurationValue ==
                                        configuration) {
                                cp = &dev->config[i];
                                break;
                        }
                }
        }
        if ((!cp && configuration != 0))
                return -EINVAL;

        /* The USB spec says configuration 0 means unconfigured.
         * But if a device includes a configuration numbered 0,
         * we will accept it as a correctly configured state.
         * Use -1 if you really want to unconfigure the device.
         */
        if (cp && configuration == 0)
                dev_warn(&dev->dev, "config 0 descriptor??\n");

        /* Allocate memory for new interfaces before doing anything else,
         * so that if we run out then nothing will have changed. */
        n = nintf = 0;
        if (cp) {
                nintf = cp->desc.bNumInterfaces;
                new_interfaces = kmalloc_array(nintf, sizeof(*new_interfaces),
                                               GFP_NOIO);
                if (!new_interfaces)
                        return -ENOMEM;

                for (; n < nintf; ++n) {
                        new_interfaces[n] = kzalloc(
                                        sizeof(struct usb_interface),
                                        GFP_NOIO);
                        if (!new_interfaces[n]) {
                                ret = -ENOMEM;
free_interfaces:
                                while (--n >= 0)
                                        kfree(new_interfaces[n]);
                                kfree(new_interfaces);
                                return ret;
                        }
                }

                i = dev->bus_mA - usb_get_max_power(dev, cp);
                if (i < 0)
                        dev_warn(&dev->dev, "new config #%d exceeds power "
                                        "limit by %dmA\n",
                                        configuration, -i);
        }

        /* Wake up the device so we can send it the Set-Config request */
        ret = usb_autoresume_device(dev);
        if (ret)
                goto free_interfaces;

        /* if it's already configured, clear out old state first.
         * getting rid of old interfaces means unbinding their drivers.
         */
        if (dev->state != USB_STATE_ADDRESS)
                usb_disable_device(dev, 1);        /* Skip ep0 */

        /* Get rid of pending async Set-Config requests for this device */
        cancel_async_set_config(dev);

        /* Make sure we have bandwidth (and available HCD resources) for this
         * configuration.  Remove endpoints from the schedule if we're dropping
         * this configuration to set configuration 0.  After this point, the
         * host controller will not allow submissions to dropped endpoints.  If
         * this call fails, the device state is unchanged.
         */
        mutex_lock(hcd->bandwidth_mutex);
        /* Disable LPM, and re-enable it once the new configuration is
         * installed, so that the xHCI driver can recalculate the U1/U2
         * timeouts.
         */
        if (dev->actconfig && usb_disable_lpm(dev)) {
                dev_err(&dev->dev, "%s Failed to disable LPM\n", __func__);
                mutex_unlock(hcd->bandwidth_mutex);
                ret = -ENOMEM;
                goto free_interfaces;
        }
        ret = usb_hcd_alloc_bandwidth(dev, cp, NULL, NULL);
        if (ret < 0) {
                if (dev->actconfig)
                        usb_enable_lpm(dev);
                mutex_unlock(hcd->bandwidth_mutex);
                usb_autosuspend_device(dev);
                goto free_interfaces;
        }

        /*
         * Initialize the new interface structures and the
         * hc/hcd/usbcore interface/endpoint state.
         */
        for (i = 0; i < nintf; ++i) {
                struct usb_interface_cache *intfc;
                struct usb_interface *intf;
                struct usb_host_interface *alt;
                u8 ifnum;

                cp->interface[i] = intf = new_interfaces[i];
                intfc = cp->intf_cache[i];
                intf->altsetting = intfc->altsetting;
                intf->num_altsetting = intfc->num_altsetting;
                intf->authorized = !!HCD_INTF_AUTHORIZED(hcd);
                kref_get(&intfc->ref);

                alt = usb_altnum_to_altsetting(intf, 0);

                /* No altsetting 0?  We'll assume the first altsetting.
                 * We could use a GetInterface call, but if a device is
                 * so non-compliant that it doesn't have altsetting 0
                 * then I wouldn't trust its reply anyway.
                 */
                if (!alt)
                        alt = &intf->altsetting[0];

                ifnum = alt->desc.bInterfaceNumber;
                intf->intf_assoc = find_iad(dev, cp, ifnum);
                intf->cur_altsetting = alt;
                usb_enable_interface(dev, intf, true);
                intf->dev.parent = &dev->dev;
                if (usb_of_has_combined_node(dev)) {
                        device_set_of_node_from_dev(&intf->dev, &dev->dev);
                } else {
                        intf->dev.of_node = usb_of_get_interface_node(dev,
                                        configuration, ifnum);
                }
                ACPI_COMPANION_SET(&intf->dev, ACPI_COMPANION(&dev->dev));
                intf->dev.driver = NULL;
                intf->dev.bus = &usb_bus_type;
                intf->dev.type = &usb_if_device_type;
                intf->dev.groups = usb_interface_groups;
                INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
                INIT_WORK(&intf->wireless_status_work, __usb_wireless_status_intf);
                intf->minor = -1;
                device_initialize(&intf->dev);
                pm_runtime_no_callbacks(&intf->dev);
                dev_set_name(&intf->dev, "%d-%s:%d.%d", dev->bus->busnum,
                                dev->devpath, configuration, ifnum);
                usb_get_dev(dev);
        }
        kfree(new_interfaces);

        ret = usb_control_msg_send(dev, 0, USB_REQ_SET_CONFIGURATION, 0,
                                   configuration, 0, NULL, 0,
                                   USB_CTRL_SET_TIMEOUT, GFP_NOIO);
        if (ret && cp) {
                /*
                 * All the old state is gone, so what else can we do?
                 * The device is probably useless now anyway.
                 */
                usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL);
                for (i = 0; i < nintf; ++i) {
                        usb_disable_interface(dev, cp->interface[i], true);
                        put_device(&cp->interface[i]->dev);
                        cp->interface[i] = NULL;
                }
                cp = NULL;
        }

        dev->actconfig = cp;
        mutex_unlock(hcd->bandwidth_mutex);

        if (!cp) {
                usb_set_device_state(dev, USB_STATE_ADDRESS);

                /* Leave LPM disabled while the device is unconfigured. */
                usb_autosuspend_device(dev);
                return ret;
        }
        usb_set_device_state(dev, USB_STATE_CONFIGURED);

        if (cp->string == NULL &&
                        !(dev->quirks & USB_QUIRK_CONFIG_INTF_STRINGS))
                cp->string = usb_cache_string(dev, cp->desc.iConfiguration);

        /* Now that the interfaces are installed, re-enable LPM. */
        usb_unlocked_enable_lpm(dev);
        /* Enable LTM if it was turned off by usb_disable_device. */
        usb_enable_ltm(dev);

        /* Now that all the interfaces are set up, register them
         * to trigger binding of drivers to interfaces.  probe()
         * routines may install different altsettings and may
         * claim() any interfaces not yet bound.  Many class drivers
         * need that: CDC, audio, video, etc.
         */
        for (i = 0; i < nintf; ++i) {
                struct usb_interface *intf = cp->interface[i];

                if (intf->dev.of_node &&
                    !of_device_is_available(intf->dev.of_node)) {
                        dev_info(&dev->dev, "skipping disabled interface %d\n",
                                 intf->cur_altsetting->desc.bInterfaceNumber);
                        continue;
                }

                dev_dbg(&dev->dev,
                        "adding %s (config #%d, interface %d)\n",
                        dev_name(&intf->dev), configuration,
                        intf->cur_altsetting->desc.bInterfaceNumber);
                device_enable_async_suspend(&intf->dev);
                ret = device_add(&intf->dev);
                if (ret != 0) {
                        dev_err(&dev->dev, "device_add(%s) --> %d\n",
                                dev_name(&intf->dev), ret);
                        continue;
                }
                create_intf_ep_devs(intf);
        }

        usb_autosuspend_device(dev);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_set_configuration);

static LIST_HEAD(set_config_list);
static DEFINE_SPINLOCK(set_config_lock);

struct set_config_request {
        struct usb_device        *udev;
        int                        config;
        struct work_struct        work;
        struct list_head        node;
};

/* Worker routine for usb_driver_set_configuration() */
static void driver_set_config_work(struct work_struct *work)
{
        struct set_config_request *req =
                container_of(work, struct set_config_request, work);
        struct usb_device *udev = req->udev;

        usb_lock_device(udev);
        spin_lock(&set_config_lock);
        list_del(&req->node);
        spin_unlock(&set_config_lock);

        if (req->config >= -1)                /* Is req still valid? */
                usb_set_configuration(udev, req->config);
        usb_unlock_device(udev);
        usb_put_dev(udev);
        kfree(req);
}

/* Cancel pending Set-Config requests for a device whose configuration
 * was just changed
 */
static void cancel_async_set_config(struct usb_device *udev)
{
        struct set_config_request *req;

        spin_lock(&set_config_lock);
        list_for_each_entry(req, &set_config_list, node) {
                if (req->udev == udev)
                        req->config = -999;        /* Mark as cancelled */
        }
        spin_unlock(&set_config_lock);
}

/**
 * usb_driver_set_configuration - Provide a way for drivers to change device configurations
 * @udev: the device whose configuration is being updated
 * @config: the configuration being chosen.
 * Context: In process context, must be able to sleep
 *
 * Device interface drivers are not allowed to change device configurations.
 * This is because changing configurations will destroy the interface the
 * driver is bound to and create new ones; it would be like a floppy-disk
 * driver telling the computer to replace the floppy-disk drive with a
 * tape drive!
 *
 * Still, in certain specialized circumstances the need may arise.  This
 * routine gets around the normal restrictions by using a work thread to
 * submit the change-config request.
 *
 * Return: 0 if the request was successfully queued, error code otherwise.
 * The caller has no way to know whether the queued request will eventually
 * succeed.
 */
int usb_driver_set_configuration(struct usb_device *udev, int config)
{
        struct set_config_request *req;

        req = kmalloc(sizeof(*req), GFP_KERNEL);
        if (!req)
                return -ENOMEM;
        req->udev = udev;
        req->config = config;
        INIT_WORK(&req->work, driver_set_config_work);

        spin_lock(&set_config_lock);
        list_add(&req->node, &set_config_list);
        spin_unlock(&set_config_lock);

        usb_get_dev(udev);
        schedule_work(&req->work);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_driver_set_configuration);

/**
 * cdc_parse_cdc_header - parse the extra headers present in CDC devices
 * @hdr: the place to put the results of the parsing
 * @intf: the interface for which parsing is requested
 * @buffer: pointer to the extra headers to be parsed
 * @buflen: length of the extra headers
 *
 * This evaluates the extra headers present in CDC devices which
 * bind the interfaces for data and control and provide details
 * about the capabilities of the device.
 *
 * Return: number of descriptors parsed or -EINVAL
 * if the header is contradictory beyond salvage
 */

int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
                                struct usb_interface *intf,
                                u8 *buffer,
                                int buflen)
{
        /* duplicates are ignored */
        struct usb_cdc_union_desc *union_header = NULL;

        /* duplicates are not tolerated */
        struct usb_cdc_header_desc *header = NULL;
        struct usb_cdc_ether_desc *ether = NULL;
        struct usb_cdc_mdlm_detail_desc *detail = NULL;
        struct usb_cdc_mdlm_desc *desc = NULL;

        unsigned int elength;
        int cnt = 0;

        memset(hdr, 0x00, sizeof(struct usb_cdc_parsed_header));
        hdr->phonet_magic_present = false;
        while (buflen > 0) {
                elength = buffer[0];
                if (!elength) {
                        dev_err(&intf->dev, "skipping garbage byte\n");
                        elength = 1;
                        goto next_desc;
                }
                if ((buflen < elength) || (elength < 3)) {
                        dev_err(&intf->dev, "invalid descriptor buffer length\n");
                        break;
                }
                if (buffer[1] != USB_DT_CS_INTERFACE) {
                        dev_err(&intf->dev, "skipping garbage\n");
                        goto next_desc;
                }

                switch (buffer[2]) {
                case USB_CDC_UNION_TYPE: /* we've found it */
                        if (elength < sizeof(struct usb_cdc_union_desc))
                                goto next_desc;
                        if (union_header) {
                                dev_err(&intf->dev, "More than one union descriptor, skipping ...\n");
                                goto next_desc;
                        }
                        union_header = (struct usb_cdc_union_desc *)buffer;
                        break;
                case USB_CDC_COUNTRY_TYPE:
                        if (elength < sizeof(struct usb_cdc_country_functional_desc))
                                goto next_desc;
                        hdr->usb_cdc_country_functional_desc =
                                (struct usb_cdc_country_functional_desc *)buffer;
                        break;
                case USB_CDC_HEADER_TYPE:
                        if (elength != sizeof(struct usb_cdc_header_desc))
                                goto next_desc;
                        if (header)
                                return -EINVAL;
                        header = (struct usb_cdc_header_desc *)buffer;
                        break;
                case USB_CDC_ACM_TYPE:
                        if (elength < sizeof(struct usb_cdc_acm_descriptor))
                                goto next_desc;
                        hdr->usb_cdc_acm_descriptor =
                                (struct usb_cdc_acm_descriptor *)buffer;
                        break;
                case USB_CDC_ETHERNET_TYPE:
                        if (elength != sizeof(struct usb_cdc_ether_desc))
                                goto next_desc;
                        if (ether)
                                return -EINVAL;
                        ether = (struct usb_cdc_ether_desc *)buffer;
                        break;
                case USB_CDC_CALL_MANAGEMENT_TYPE:
                        if (elength < sizeof(struct usb_cdc_call_mgmt_descriptor))
                                goto next_desc;
                        hdr->usb_cdc_call_mgmt_descriptor =
                                (struct usb_cdc_call_mgmt_descriptor *)buffer;
                        break;
                case USB_CDC_DMM_TYPE:
                        if (elength < sizeof(struct usb_cdc_dmm_desc))
                                goto next_desc;
                        hdr->usb_cdc_dmm_desc =
                                (struct usb_cdc_dmm_desc *)buffer;
                        break;
                case USB_CDC_MDLM_TYPE:
                        if (elength < sizeof(struct usb_cdc_mdlm_desc))
                                goto next_desc;
                        if (desc)
                                return -EINVAL;
                        desc = (struct usb_cdc_mdlm_desc *)buffer;
                        break;
                case USB_CDC_MDLM_DETAIL_TYPE:
                        if (elength < sizeof(struct usb_cdc_mdlm_detail_desc))
                                goto next_desc;
                        if (detail)
                                return -EINVAL;
                        detail = (struct usb_cdc_mdlm_detail_desc *)buffer;
                        break;
                case USB_CDC_NCM_TYPE:
                        if (elength < sizeof(struct usb_cdc_ncm_desc))
                                goto next_desc;
                        hdr->usb_cdc_ncm_desc = (struct usb_cdc_ncm_desc *)buffer;
                        break;
                case USB_CDC_MBIM_TYPE:
                        if (elength < sizeof(struct usb_cdc_mbim_desc))
                                goto next_desc;

                        hdr->usb_cdc_mbim_desc = (struct usb_cdc_mbim_desc *)buffer;
                        break;
                case USB_CDC_MBIM_EXTENDED_TYPE:
                        if (elength < sizeof(struct usb_cdc_mbim_extended_desc))
                                break;
                        hdr->usb_cdc_mbim_extended_desc =
                                (struct usb_cdc_mbim_extended_desc *)buffer;
                        break;
                case CDC_PHONET_MAGIC_NUMBER:
                        hdr->phonet_magic_present = true;
                        break;
                default:
                        /*
                         * there are LOTS more CDC descriptors that
                         * could legitimately be found here.
                         */
                        dev_dbg(&intf->dev, "Ignoring descriptor: type %02x, length %ud\n",
                                        buffer[2], elength);
                        goto next_desc;
                }
                cnt++;
next_desc:
                buflen -= elength;
                buffer += elength;
        }
        hdr->usb_cdc_union_desc = union_header;
        hdr->usb_cdc_header_desc = header;
        hdr->usb_cdc_mdlm_detail_desc = detail;
        hdr->usb_cdc_mdlm_desc = desc;
        hdr->usb_cdc_ether_desc = ether;
        return cnt;
}

EXPORT_SYMBOL(cdc_parse_cdc_header);



































































  522 

















   74 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Prevent the compiler from merging or refetching reads or writes. The
 * compiler is also forbidden from reordering successive instances of
 * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
 * particular ordering. One way to make the compiler aware of ordering is to
 * put the two invocations of READ_ONCE or WRITE_ONCE in different C
 * statements.
 *
 * These two macros will also work on aggregate data types like structs or
 * unions.
 *
 * Their two major use cases are: (1) Mediating communication between
 * process-level code and irq/NMI handlers, all running on the same CPU,
 * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
 * mutilate accesses that either do not require ordering or that interact
 * with an explicit memory barrier or atomic instruction that provides the
 * required ordering.
 */
#ifndef __ASM_GENERIC_RWONCE_H
#define __ASM_GENERIC_RWONCE_H

#ifndef __ASSEMBLY__

#include <linux/compiler_types.h>
#include <linux/kasan-checks.h>
#include <linux/kcsan-checks.h>

/*
 * Yes, this permits 64-bit accesses on 32-bit architectures. These will
 * actually be atomic in some cases (namely Armv7 + LPAE), but for others we
 * rely on the access being split into 2x32-bit accesses for a 32-bit quantity
 * (e.g. a virtual address) and a strong prevailing wind.
 */
#define compiletime_assert_rwonce_type(t)                                        \
        compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),        \
                "Unsupported access size for {READ,WRITE}_ONCE().")

/*
 * Use __READ_ONCE() instead of READ_ONCE() if you do not require any
 * atomicity. Note that this may result in tears!
 */
#ifndef __READ_ONCE
#define __READ_ONCE(x)        (*(const volatile __unqual_scalar_typeof(x) *)&(x))
#endif

#define READ_ONCE(x)                                                        \
({                                                                        \
        compiletime_assert_rwonce_type(x);                                \
        __READ_ONCE(x);                                                        \
})

#define __WRITE_ONCE(x, val)                                                \
do {                                                                        \
        *(volatile typeof(x) *)&(x) = (val);                                \
} while (0)

#define WRITE_ONCE(x, val)                                                \
do {                                                                        \
        compiletime_assert_rwonce_type(x);                                \
        __WRITE_ONCE(x, val);                                                \
} while (0)

static __no_sanitize_or_inline
unsigned long __read_once_word_nocheck(const void *addr)
{
        return __READ_ONCE(*(unsigned long *)addr);
}

/*
 * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a
 * word from memory atomically but without telling KASAN/KCSAN. This is
 * usually used by unwinding code when walking the stack of a running process.
 */
#define READ_ONCE_NOCHECK(x)                                                \
({                                                                        \
        compiletime_assert(sizeof(x) == sizeof(unsigned long),                \
                "Unsupported access size for READ_ONCE_NOCHECK().");        \
        (typeof(x))__read_once_word_nocheck(&(x));                        \
})

static __no_kasan_or_inline
unsigned long read_word_at_a_time(const void *addr)
{
        kasan_check_read(addr, 1);
        return *(unsigned long *)addr;
}

#endif /* __ASSEMBLY__ */
#endif        /* __ASM_GENERIC_RWONCE_H */







































































































































































    4 

    4 
    4 
    4 

















    4 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 
    4 
































































































   13 











   13 
   13 








   13 





    4 






    4 
    4 



    4 








    4 

















    4 

























































































    4 


































































































































































































































































   14 

   14 
   14 

    4 
    4 
    4 
    4 





    4 

    4 








































   14 

   14 
    4 


   14 


   14 




   14 

























































   13 


   13 




   13 

















































































































































































































































































































































    4 










    4 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 








   14 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Linux NET3:        Internet Group Management Protocol  [IGMP]
 *
 *        This code implements the IGMP protocol as defined in RFC1112. There has
 *        been a further revision of this protocol since which is now supported.
 *
 *        If you have trouble with this module be careful what gcc you have used,
 *        the older version didn't come out right using gcc 2.5.8, the newer one
 *        seems to fall out with gcc 2.6.2.
 *
 *        Authors:
 *                Alan Cox <alan@lxorguk.ukuu.org.uk>
 *
 *        Fixes:
 *
 *                Alan Cox        :        Added lots of __inline__ to optimise
 *                                        the memory usage of all the tiny little
 *                                        functions.
 *                Alan Cox        :        Dumped the header building experiment.
 *                Alan Cox        :        Minor tweaks ready for multicast routing
 *                                        and extended IGMP protocol.
 *                Alan Cox        :        Removed a load of inline directives. Gcc 2.5.8
 *                                        writes utterly bogus code otherwise (sigh)
 *                                        fixed IGMP loopback to behave in the manner
 *                                        desired by mrouted, fixed the fact it has been
 *                                        broken since 1.3.6 and cleaned up a few minor
 *                                        points.
 *
 *                Chih-Jen Chang        :        Tried to revise IGMP to Version 2
 *                Tsu-Sheng Tsao                E-mail: chihjenc@scf.usc.edu and tsusheng@scf.usc.edu
 *                                        The enhancements are mainly based on Steve Deering's
 *                                         ipmulti-3.5 source code.
 *                Chih-Jen Chang        :        Added the igmp_get_mrouter_info and
 *                Tsu-Sheng Tsao                igmp_set_mrouter_info to keep track of
 *                                        the mrouted version on that device.
 *                Chih-Jen Chang        :        Added the max_resp_time parameter to
 *                Tsu-Sheng Tsao                igmp_heard_query(). Using this parameter
 *                                        to identify the multicast router version
 *                                        and do what the IGMP version 2 specified.
 *                Chih-Jen Chang        :        Added a timer to revert to IGMP V2 router
 *                Tsu-Sheng Tsao                if the specified time expired.
 *                Alan Cox        :        Stop IGMP from 0.0.0.0 being accepted.
 *                Alan Cox        :        Use GFP_ATOMIC in the right places.
 *                Christian Daudt :        igmp timer wasn't set for local group
 *                                        memberships but was being deleted,
 *                                        which caused a "del_timer() called
 *                                        from %p with timer not initialized\n"
 *                                        message (960131).
 *                Christian Daudt :        removed del_timer from
 *                                        igmp_timer_expire function (960205).
 *             Christian Daudt :       igmp_heard_report now only calls
 *                                     igmp_timer_expire if tm->running is
 *                                     true (960216).
 *                Malcolm Beattie :        ttl comparison wrong in igmp_rcv made
 *                                        igmp_heard_query never trigger. Expiry
 *                                        miscalculation fixed in igmp_heard_query
 *                                        and random() made to return unsigned to
 *                                        prevent negative expiry times.
 *                Alexey Kuznetsov:        Wrong group leaving behaviour, backport
 *                                        fix from pending 2.1.x patches.
 *                Alan Cox:                Forget to enable FDDI support earlier.
 *                Alexey Kuznetsov:        Fixed leaving groups on device down.
 *                Alexey Kuznetsov:        Accordance to igmp-v2-06 draft.
 *                David L Stevens:        IGMPv3 support, with help from
 *                                        Vinay Kulkarni
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
#include <linux/byteorder/generic.h>

#include <net/net_namespace.h>
#include <net/arp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <net/inet_common.h>
#include <linux/netfilter_ipv4.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#endif

#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */

#define IGMP_QUERY_INTERVAL                        (125*HZ)
#define IGMP_QUERY_RESPONSE_INTERVAL                (10*HZ)

#define IGMP_INITIAL_REPORT_DELAY                (1)

/* IGMP_INITIAL_REPORT_DELAY is not from IGMP specs!
 * IGMP specs require to report membership immediately after
 * joining a group, but we delay the first report by a
 * small interval. It seems more natural and still does not
 * contradict to specs provided this delay is small enough.
 */

#define IGMP_V1_SEEN(in_dev) \
        (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
         IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
         ((in_dev)->mr_v1_seen && \
          time_before(jiffies, (in_dev)->mr_v1_seen)))
#define IGMP_V2_SEEN(in_dev) \
        (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
         IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
         ((in_dev)->mr_v2_seen && \
          time_before(jiffies, (in_dev)->mr_v2_seen)))

static int unsolicited_report_interval(struct in_device *in_dev)
{
        int interval_ms, interval_jiffies;

        if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
                interval_ms = IN_DEV_CONF_GET(
                        in_dev,
                        IGMPV2_UNSOLICITED_REPORT_INTERVAL);
        else /* v3 */
                interval_ms = IN_DEV_CONF_GET(
                        in_dev,
                        IGMPV3_UNSOLICITED_REPORT_INTERVAL);

        interval_jiffies = msecs_to_jiffies(interval_ms);

        /* _timer functions can't handle a delay of 0 jiffies so ensure
         *  we always return a positive value.
         */
        if (interval_jiffies <= 0)
                interval_jiffies = 1;
        return interval_jiffies;
}

static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
                              gfp_t gfp);
static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im);
static void igmpv3_clear_delrec(struct in_device *in_dev);
static int sf_setstate(struct ip_mc_list *pmc);
static void sf_markstate(struct ip_mc_list *pmc);
#endif
static void ip_mc_clear_src(struct ip_mc_list *pmc);
static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
                         int sfcount, __be32 *psfsrc, int delta);

static void ip_ma_put(struct ip_mc_list *im)
{
        if (refcount_dec_and_test(&im->refcnt)) {
                in_dev_put(im->interface);
                kfree_rcu(im, rcu);
        }
}

#define for_each_pmc_rcu(in_dev, pmc)                                \
        for (pmc = rcu_dereference(in_dev->mc_list);                \
             pmc != NULL;                                        \
             pmc = rcu_dereference(pmc->next_rcu))

#define for_each_pmc_rtnl(in_dev, pmc)                                \
        for (pmc = rtnl_dereference(in_dev->mc_list);                \
             pmc != NULL;                                        \
             pmc = rtnl_dereference(pmc->next_rcu))

static void ip_sf_list_clear_all(struct ip_sf_list *psf)
{
        struct ip_sf_list *next;

        while (psf) {
                next = psf->sf_next;
                kfree(psf);
                psf = next;
        }
}

#ifdef CONFIG_IP_MULTICAST

/*
 *        Timer management
 */

static void igmp_stop_timer(struct ip_mc_list *im)
{
        spin_lock_bh(&im->lock);
        if (del_timer(&im->timer))
                refcount_dec(&im->refcnt);
        im->tm_running = 0;
        im->reporter = 0;
        im->unsolicit_count = 0;
        spin_unlock_bh(&im->lock);
}

/* It must be called with locked im->lock */
static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
        int tv = get_random_u32_below(max_delay);

        im->tm_running = 1;
        if (refcount_inc_not_zero(&im->refcnt)) {
                if (mod_timer(&im->timer, jiffies + tv + 2))
                        ip_ma_put(im);
        }
}

static void igmp_gq_start_timer(struct in_device *in_dev)
{
        int tv = get_random_u32_below(in_dev->mr_maxdelay);
        unsigned long exp = jiffies + tv + 2;

        if (in_dev->mr_gq_running &&
            time_after_eq(exp, (in_dev->mr_gq_timer).expires))
                return;

        in_dev->mr_gq_running = 1;
        if (!mod_timer(&in_dev->mr_gq_timer, exp))
                in_dev_hold(in_dev);
}

static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
{
        int tv = get_random_u32_below(delay);

        if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2))
                in_dev_hold(in_dev);
}

static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
{
        spin_lock_bh(&im->lock);
        im->unsolicit_count = 0;
        if (del_timer(&im->timer)) {
                if ((long)(im->timer.expires-jiffies) < max_delay) {
                        add_timer(&im->timer);
                        im->tm_running = 1;
                        spin_unlock_bh(&im->lock);
                        return;
                }
                refcount_dec(&im->refcnt);
        }
        igmp_start_timer(im, max_delay);
        spin_unlock_bh(&im->lock);
}


/*
 *        Send an IGMP report.
 */

#define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)


static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
        int gdeleted, int sdeleted)
{
        switch (type) {
        case IGMPV3_MODE_IS_INCLUDE:
        case IGMPV3_MODE_IS_EXCLUDE:
                if (gdeleted || sdeleted)
                        return 0;
                if (!(pmc->gsquery && !psf->sf_gsresp)) {
                        if (pmc->sfmode == MCAST_INCLUDE)
                                return 1;
                        /* don't include if this source is excluded
                         * in all filters
                         */
                        if (psf->sf_count[MCAST_INCLUDE])
                                return type == IGMPV3_MODE_IS_INCLUDE;
                        return pmc->sfcount[MCAST_EXCLUDE] ==
                                psf->sf_count[MCAST_EXCLUDE];
                }
                return 0;
        case IGMPV3_CHANGE_TO_INCLUDE:
                if (gdeleted || sdeleted)
                        return 0;
                return psf->sf_count[MCAST_INCLUDE] != 0;
        case IGMPV3_CHANGE_TO_EXCLUDE:
                if (gdeleted || sdeleted)
                        return 0;
                if (pmc->sfcount[MCAST_EXCLUDE] == 0 ||
                    psf->sf_count[MCAST_INCLUDE])
                        return 0;
                return pmc->sfcount[MCAST_EXCLUDE] ==
                        psf->sf_count[MCAST_EXCLUDE];
        case IGMPV3_ALLOW_NEW_SOURCES:
                if (gdeleted || !psf->sf_crcount)
                        return 0;
                return (pmc->sfmode == MCAST_INCLUDE) ^ sdeleted;
        case IGMPV3_BLOCK_OLD_SOURCES:
                if (pmc->sfmode == MCAST_INCLUDE)
                        return gdeleted || (psf->sf_crcount && sdeleted);
                return psf->sf_crcount && !gdeleted && !sdeleted;
        }
        return 0;
}

static int
igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
{
        struct ip_sf_list *psf;
        int scount = 0;

        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (!is_in(pmc, psf, type, gdeleted, sdeleted))
                        continue;
                scount++;
        }
        return scount;
}

/* source address selection per RFC 3376 section 4.2.13 */
static __be32 igmpv3_get_srcaddr(struct net_device *dev,
                                 const struct flowi4 *fl4)
{
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        const struct in_ifaddr *ifa;

        if (!in_dev)
                return htonl(INADDR_ANY);

        in_dev_for_each_ifa_rcu(ifa, in_dev) {
                if (fl4->saddr == ifa->ifa_local)
                        return fl4->saddr;
        }

        return htonl(INADDR_ANY);
}

static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
        struct sk_buff *skb;
        struct rtable *rt;
        struct iphdr *pip;
        struct igmpv3_report *pig;
        struct net *net = dev_net(dev);
        struct flowi4 fl4;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
        unsigned int size;

        size = min(mtu, IP_MAX_MTU);
        while (1) {
                skb = alloc_skb(size + hlen + tlen,
                                GFP_ATOMIC | __GFP_NOWARN);
                if (skb)
                        break;
                size >>= 1;
                if (size < 256)
                        return NULL;
        }
        skb->priority = TC_PRIO_CONTROL;

        rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
                                   0, 0,
                                   IPPROTO_IGMP, 0, dev->ifindex);
        if (IS_ERR(rt)) {
                kfree_skb(skb);
                return NULL;
        }

        skb_dst_set(skb, &rt->dst);
        skb->dev = dev;

        skb_reserve(skb, hlen);
        skb_tailroom_reserve(skb, mtu, tlen);

        skb_reset_network_header(skb);
        pip = ip_hdr(skb);
        skb_put(skb, sizeof(struct iphdr) + 4);

        pip->version  = 4;
        pip->ihl      = (sizeof(struct iphdr)+4)>>2;
        pip->tos      = 0xc0;
        pip->frag_off = htons(IP_DF);
        pip->ttl      = 1;
        pip->daddr    = fl4.daddr;

        rcu_read_lock();
        pip->saddr    = igmpv3_get_srcaddr(dev, &fl4);
        rcu_read_unlock();

        pip->protocol = IPPROTO_IGMP;
        pip->tot_len  = 0;        /* filled in later */
        ip_select_ident(net, skb, NULL);
        ((u8 *)&pip[1])[0] = IPOPT_RA;
        ((u8 *)&pip[1])[1] = 4;
        ((u8 *)&pip[1])[2] = 0;
        ((u8 *)&pip[1])[3] = 0;

        skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
        skb_put(skb, sizeof(*pig));
        pig = igmpv3_report_hdr(skb);
        pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
        pig->resv1 = 0;
        pig->csum = 0;
        pig->resv2 = 0;
        pig->ngrec = 0;
        return skb;
}

static int igmpv3_sendpack(struct sk_buff *skb)
{
        struct igmphdr *pig = igmp_hdr(skb);
        const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb);

        pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);

        return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
}

static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
{
        return sizeof(struct igmpv3_grec) + 4*igmp_scount(pmc, type, gdel, sdel);
}

static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
        int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
        struct net_device *dev = pmc->interface->dev;
        struct igmpv3_report *pih;
        struct igmpv3_grec *pgr;

        if (!skb) {
                skb = igmpv3_newpack(dev, mtu);
                if (!skb)
                        return NULL;
        }
        pgr = skb_put(skb, sizeof(struct igmpv3_grec));
        pgr->grec_type = type;
        pgr->grec_auxwords = 0;
        pgr->grec_nsrcs = 0;
        pgr->grec_mca = pmc->multiaddr;
        pih = igmpv3_report_hdr(skb);
        pih->ngrec = htons(ntohs(pih->ngrec)+1);
        *ppgr = pgr;
        return skb;
}

#define AVAILABLE(skb)        ((skb) ? skb_availroom(skb) : 0)

static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
        int type, int gdeleted, int sdeleted)
{
        struct net_device *dev = pmc->interface->dev;
        struct net *net = dev_net(dev);
        struct igmpv3_report *pih;
        struct igmpv3_grec *pgr = NULL;
        struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
        int scount, stotal, first, isquery, truncate;
        unsigned int mtu;

        if (pmc->multiaddr == IGMP_ALL_HOSTS)
                return skb;
        if (ipv4_is_local_multicast(pmc->multiaddr) &&
            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return skb;

        mtu = READ_ONCE(dev->mtu);
        if (mtu < IPV4_MIN_MTU)
                return skb;

        isquery = type == IGMPV3_MODE_IS_INCLUDE ||
                  type == IGMPV3_MODE_IS_EXCLUDE;
        truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
                    type == IGMPV3_CHANGE_TO_EXCLUDE;

        stotal = scount = 0;

        psf_list = sdeleted ? &pmc->tomb : &pmc->sources;

        if (!*psf_list)
                goto empty_source;

        pih = skb ? igmpv3_report_hdr(skb) : NULL;

        /* EX and TO_EX get a fresh packet, if needed */
        if (truncate) {
                if (pih && pih->ngrec &&
                    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
                        if (skb)
                                igmpv3_sendpack(skb);
                        skb = igmpv3_newpack(dev, mtu);
                }
        }
        first = 1;
        psf_prev = NULL;
        for (psf = *psf_list; psf; psf = psf_next) {
                __be32 *psrc;

                psf_next = psf->sf_next;

                if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
                        psf_prev = psf;
                        continue;
                }

                /* Based on RFC3376 5.1. Should not send source-list change
                 * records when there is a filter mode change.
                 */
                if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
                     (!gdeleted && pmc->crcount)) &&
                    (type == IGMPV3_ALLOW_NEW_SOURCES ||
                     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
                        goto decrease_sf_crcount;

                /* clear marks on query responses */
                if (isquery)
                        psf->sf_gsresp = 0;

                if (AVAILABLE(skb) < sizeof(__be32) +
                    first*sizeof(struct igmpv3_grec)) {
                        if (truncate && !first)
                                break;         /* truncate these */
                        if (pgr)
                                pgr->grec_nsrcs = htons(scount);
                        if (skb)
                                igmpv3_sendpack(skb);
                        skb = igmpv3_newpack(dev, mtu);
                        first = 1;
                        scount = 0;
                }
                if (first) {
                        skb = add_grhead(skb, pmc, type, &pgr, mtu);
                        first = 0;
                }
                if (!skb)
                        return NULL;
                psrc = skb_put(skb, sizeof(__be32));
                *psrc = psf->sf_inaddr;
                scount++; stotal++;
                if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
                     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
decrease_sf_crcount:
                        psf->sf_crcount--;
                        if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
                                if (psf_prev)
                                        psf_prev->sf_next = psf->sf_next;
                                else
                                        *psf_list = psf->sf_next;
                                kfree(psf);
                                continue;
                        }
                }
                psf_prev = psf;
        }

empty_source:
        if (!stotal) {
                if (type == IGMPV3_ALLOW_NEW_SOURCES ||
                    type == IGMPV3_BLOCK_OLD_SOURCES)
                        return skb;
                if (pmc->crcount || isquery) {
                        /* make sure we have room for group header */
                        if (skb && AVAILABLE(skb) < sizeof(struct igmpv3_grec)) {
                                igmpv3_sendpack(skb);
                                skb = NULL; /* add_grhead will get a new one */
                        }
                        skb = add_grhead(skb, pmc, type, &pgr, mtu);
                }
        }
        if (pgr)
                pgr->grec_nsrcs = htons(scount);

        if (isquery)
                pmc->gsquery = 0;        /* clear query state on report */
        return skb;
}

static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
{
        struct sk_buff *skb = NULL;
        struct net *net = dev_net(in_dev->dev);
        int type;

        if (!pmc) {
                rcu_read_lock();
                for_each_pmc_rcu(in_dev, pmc) {
                        if (pmc->multiaddr == IGMP_ALL_HOSTS)
                                continue;
                        if (ipv4_is_local_multicast(pmc->multiaddr) &&
                            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                                continue;
                        spin_lock_bh(&pmc->lock);
                        if (pmc->sfcount[MCAST_EXCLUDE])
                                type = IGMPV3_MODE_IS_EXCLUDE;
                        else
                                type = IGMPV3_MODE_IS_INCLUDE;
                        skb = add_grec(skb, pmc, type, 0, 0);
                        spin_unlock_bh(&pmc->lock);
                }
                rcu_read_unlock();
        } else {
                spin_lock_bh(&pmc->lock);
                if (pmc->sfcount[MCAST_EXCLUDE])
                        type = IGMPV3_MODE_IS_EXCLUDE;
                else
                        type = IGMPV3_MODE_IS_INCLUDE;
                skb = add_grec(skb, pmc, type, 0, 0);
                spin_unlock_bh(&pmc->lock);
        }
        if (!skb)
                return 0;
        return igmpv3_sendpack(skb);
}

/*
 * remove zero-count source records from a source filter list
 */
static void igmpv3_clear_zeros(struct ip_sf_list **ppsf)
{
        struct ip_sf_list *psf_prev, *psf_next, *psf;

        psf_prev = NULL;
        for (psf = *ppsf; psf; psf = psf_next) {
                psf_next = psf->sf_next;
                if (psf->sf_crcount == 0) {
                        if (psf_prev)
                                psf_prev->sf_next = psf->sf_next;
                        else
                                *ppsf = psf->sf_next;
                        kfree(psf);
                } else
                        psf_prev = psf;
        }
}

static void kfree_pmc(struct ip_mc_list *pmc)
{
        ip_sf_list_clear_all(pmc->sources);
        ip_sf_list_clear_all(pmc->tomb);
        kfree(pmc);
}

static void igmpv3_send_cr(struct in_device *in_dev)
{
        struct ip_mc_list *pmc, *pmc_prev, *pmc_next;
        struct sk_buff *skb = NULL;
        int type, dtype;

        rcu_read_lock();
        spin_lock_bh(&in_dev->mc_tomb_lock);

        /* deleted MCA's */
        pmc_prev = NULL;
        for (pmc = in_dev->mc_tomb; pmc; pmc = pmc_next) {
                pmc_next = pmc->next;
                if (pmc->sfmode == MCAST_INCLUDE) {
                        type = IGMPV3_BLOCK_OLD_SOURCES;
                        dtype = IGMPV3_BLOCK_OLD_SOURCES;
                        skb = add_grec(skb, pmc, type, 1, 0);
                        skb = add_grec(skb, pmc, dtype, 1, 1);
                }
                if (pmc->crcount) {
                        if (pmc->sfmode == MCAST_EXCLUDE) {
                                type = IGMPV3_CHANGE_TO_INCLUDE;
                                skb = add_grec(skb, pmc, type, 1, 0);
                        }
                        pmc->crcount--;
                        if (pmc->crcount == 0) {
                                igmpv3_clear_zeros(&pmc->tomb);
                                igmpv3_clear_zeros(&pmc->sources);
                        }
                }
                if (pmc->crcount == 0 && !pmc->tomb && !pmc->sources) {
                        if (pmc_prev)
                                pmc_prev->next = pmc_next;
                        else
                                in_dev->mc_tomb = pmc_next;
                        in_dev_put(pmc->interface);
                        kfree_pmc(pmc);
                } else
                        pmc_prev = pmc;
        }
        spin_unlock_bh(&in_dev->mc_tomb_lock);

        /* change recs */
        for_each_pmc_rcu(in_dev, pmc) {
                spin_lock_bh(&pmc->lock);
                if (pmc->sfcount[MCAST_EXCLUDE]) {
                        type = IGMPV3_BLOCK_OLD_SOURCES;
                        dtype = IGMPV3_ALLOW_NEW_SOURCES;
                } else {
                        type = IGMPV3_ALLOW_NEW_SOURCES;
                        dtype = IGMPV3_BLOCK_OLD_SOURCES;
                }
                skb = add_grec(skb, pmc, type, 0, 0);
                skb = add_grec(skb, pmc, dtype, 0, 1);        /* deleted sources */

                /* filter mode changes */
                if (pmc->crcount) {
                        if (pmc->sfmode == MCAST_EXCLUDE)
                                type = IGMPV3_CHANGE_TO_EXCLUDE;
                        else
                                type = IGMPV3_CHANGE_TO_INCLUDE;
                        skb = add_grec(skb, pmc, type, 0, 0);
                        pmc->crcount--;
                }
                spin_unlock_bh(&pmc->lock);
        }
        rcu_read_unlock();

        if (!skb)
                return;
        (void) igmpv3_sendpack(skb);
}

static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
        int type)
{
        struct sk_buff *skb;
        struct iphdr *iph;
        struct igmphdr *ih;
        struct rtable *rt;
        struct net_device *dev = in_dev->dev;
        struct net *net = dev_net(dev);
        __be32        group = pmc ? pmc->multiaddr : 0;
        struct flowi4 fl4;
        __be32        dst;
        int hlen, tlen;

        if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
                return igmpv3_send_report(in_dev, pmc);

        if (ipv4_is_local_multicast(group) &&
            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return 0;

        if (type == IGMP_HOST_LEAVE_MESSAGE)
                dst = IGMP_ALL_ROUTER;
        else
                dst = group;

        rt = ip_route_output_ports(net, &fl4, NULL, dst, 0,
                                   0, 0,
                                   IPPROTO_IGMP, 0, dev->ifindex);
        if (IS_ERR(rt))
                return -1;

        hlen = LL_RESERVED_SPACE(dev);
        tlen = dev->needed_tailroom;
        skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC);
        if (!skb) {
                ip_rt_put(rt);
                return -1;
        }
        skb->priority = TC_PRIO_CONTROL;

        skb_dst_set(skb, &rt->dst);

        skb_reserve(skb, hlen);

        skb_reset_network_header(skb);
        iph = ip_hdr(skb);
        skb_put(skb, sizeof(struct iphdr) + 4);

        iph->version  = 4;
        iph->ihl      = (sizeof(struct iphdr)+4)>>2;
        iph->tos      = 0xc0;
        iph->frag_off = htons(IP_DF);
        iph->ttl      = 1;
        iph->daddr    = dst;
        iph->saddr    = fl4.saddr;
        iph->protocol = IPPROTO_IGMP;
        ip_select_ident(net, skb, NULL);
        ((u8 *)&iph[1])[0] = IPOPT_RA;
        ((u8 *)&iph[1])[1] = 4;
        ((u8 *)&iph[1])[2] = 0;
        ((u8 *)&iph[1])[3] = 0;

        ih = skb_put(skb, sizeof(struct igmphdr));
        ih->type = type;
        ih->code = 0;
        ih->csum = 0;
        ih->group = group;
        ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));

        return ip_local_out(net, skb->sk, skb);
}

static void igmp_gq_timer_expire(struct timer_list *t)
{
        struct in_device *in_dev = from_timer(in_dev, t, mr_gq_timer);

        in_dev->mr_gq_running = 0;
        igmpv3_send_report(in_dev, NULL);
        in_dev_put(in_dev);
}

static void igmp_ifc_timer_expire(struct timer_list *t)
{
        struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
        u32 mr_ifc_count;

        igmpv3_send_cr(in_dev);
restart:
        mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);

        if (mr_ifc_count) {
                if (cmpxchg(&in_dev->mr_ifc_count,
                            mr_ifc_count,
                            mr_ifc_count - 1) != mr_ifc_count)
                        goto restart;
                igmp_ifc_start_timer(in_dev,
                                     unsolicited_report_interval(in_dev));
        }
        in_dev_put(in_dev);
}

static void igmp_ifc_event(struct in_device *in_dev)
{
        struct net *net = dev_net(in_dev->dev);
        if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
                return;
        WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
        igmp_ifc_start_timer(in_dev, 1);
}


static void igmp_timer_expire(struct timer_list *t)
{
        struct ip_mc_list *im = from_timer(im, t, timer);
        struct in_device *in_dev = im->interface;

        spin_lock(&im->lock);
        im->tm_running = 0;

        if (im->unsolicit_count && --im->unsolicit_count)
                igmp_start_timer(im, unsolicited_report_interval(in_dev));

        im->reporter = 1;
        spin_unlock(&im->lock);

        if (IGMP_V1_SEEN(in_dev))
                igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
        else if (IGMP_V2_SEEN(in_dev))
                igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
        else
                igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);

        ip_ma_put(im);
}

/* mark EXCLUDE-mode sources */
static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
{
        struct ip_sf_list *psf;
        int i, scount;

        scount = 0;
        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (scount == nsrcs)
                        break;
                for (i = 0; i < nsrcs; i++) {
                        /* skip inactive filters */
                        if (psf->sf_count[MCAST_INCLUDE] ||
                            pmc->sfcount[MCAST_EXCLUDE] !=
                            psf->sf_count[MCAST_EXCLUDE])
                                break;
                        if (srcs[i] == psf->sf_inaddr) {
                                scount++;
                                break;
                        }
                }
        }
        pmc->gsquery = 0;
        if (scount == nsrcs)        /* all sources excluded */
                return 0;
        return 1;
}

static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
{
        struct ip_sf_list *psf;
        int i, scount;

        if (pmc->sfmode == MCAST_EXCLUDE)
                return igmp_xmarksources(pmc, nsrcs, srcs);

        /* mark INCLUDE-mode sources */
        scount = 0;
        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (scount == nsrcs)
                        break;
                for (i = 0; i < nsrcs; i++)
                        if (srcs[i] == psf->sf_inaddr) {
                                psf->sf_gsresp = 1;
                                scount++;
                                break;
                        }
        }
        if (!scount) {
                pmc->gsquery = 0;
                return 0;
        }
        pmc->gsquery = 1;
        return 1;
}

/* return true if packet was dropped */
static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
{
        struct ip_mc_list *im;
        struct net *net = dev_net(in_dev->dev);

        /* Timers are only set for non-local groups */

        if (group == IGMP_ALL_HOSTS)
                return false;
        if (ipv4_is_local_multicast(group) &&
            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return false;

        rcu_read_lock();
        for_each_pmc_rcu(in_dev, im) {
                if (im->multiaddr == group) {
                        igmp_stop_timer(im);
                        break;
                }
        }
        rcu_read_unlock();
        return false;
}

/* return true if packet was dropped */
static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
        int len)
{
        struct igmphdr                 *ih = igmp_hdr(skb);
        struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
        struct ip_mc_list        *im;
        __be32                        group = ih->group;
        int                        max_delay;
        int                        mark = 0;
        struct net                *net = dev_net(in_dev->dev);


        if (len == 8) {
                if (ih->code == 0) {
                        /* Alas, old v1 router presents here. */

                        max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
                        in_dev->mr_v1_seen = jiffies +
                                (in_dev->mr_qrv * in_dev->mr_qi) +
                                in_dev->mr_qri;
                        group = 0;
                } else {
                        /* v2 router present */
                        max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
                        in_dev->mr_v2_seen = jiffies +
                                (in_dev->mr_qrv * in_dev->mr_qi) +
                                in_dev->mr_qri;
                }
                /* cancel the interface change timer */
                WRITE_ONCE(in_dev->mr_ifc_count, 0);
                if (del_timer(&in_dev->mr_ifc_timer))
                        __in_dev_put(in_dev);
                /* clear deleted report items */
                igmpv3_clear_delrec(in_dev);
        } else if (len < 12) {
                return true;        /* ignore bogus packet; freed by caller */
        } else if (IGMP_V1_SEEN(in_dev)) {
                /* This is a v3 query with v1 queriers present */
                max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
                group = 0;
        } else if (IGMP_V2_SEEN(in_dev)) {
                /* this is a v3 query with v2 queriers present;
                 * Interpretation of the max_delay code is problematic here.
                 * A real v2 host would use ih_code directly, while v3 has a
                 * different encoding. We use the v3 encoding as more likely
                 * to be intended in a v3 query.
                 */
                max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
                if (!max_delay)
                        max_delay = 1;        /* can't mod w/ 0 */
        } else { /* v3 */
                if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
                        return true;

                ih3 = igmpv3_query_hdr(skb);
                if (ih3->nsrcs) {
                        if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
                                           + ntohs(ih3->nsrcs)*sizeof(__be32)))
                                return true;
                        ih3 = igmpv3_query_hdr(skb);
                }

                max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
                if (!max_delay)
                        max_delay = 1;        /* can't mod w/ 0 */
                in_dev->mr_maxdelay = max_delay;

                /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
                 * received value was zero, use the default or statically
                 * configured value.
                 */
                in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;

                /* RFC3376, 8.3. Query Response Interval:
                 * The number of seconds represented by the [Query Response
                 * Interval] must be less than the [Query Interval].
                 */
                if (in_dev->mr_qri >= in_dev->mr_qi)
                        in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;

                if (!group) { /* general query */
                        if (ih3->nsrcs)
                                return true;        /* no sources allowed */
                        igmp_gq_start_timer(in_dev);
                        return false;
                }
                /* mark sources to include, if group & source-specific */
                mark = ih3->nsrcs != 0;
        }

        /*
         * - Start the timers in all of our membership records
         *   that the query applies to for the interface on
         *   which the query arrived excl. those that belong
         *   to a "local" group (224.0.0.X)
         * - For timers already running check if they need to
         *   be reset.
         * - Use the igmp->igmp_code field as the maximum
         *   delay possible
         */
        rcu_read_lock();
        for_each_pmc_rcu(in_dev, im) {
                int changed;

                if (group && group != im->multiaddr)
                        continue;
                if (im->multiaddr == IGMP_ALL_HOSTS)
                        continue;
                if (ipv4_is_local_multicast(im->multiaddr) &&
                    !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                        continue;
                spin_lock_bh(&im->lock);
                if (im->tm_running)
                        im->gsquery = im->gsquery && mark;
                else
                        im->gsquery = mark;
                changed = !im->gsquery ||
                        igmp_marksources(im, ntohs(ih3->nsrcs), ih3->srcs);
                spin_unlock_bh(&im->lock);
                if (changed)
                        igmp_mod_timer(im, max_delay);
        }
        rcu_read_unlock();
        return false;
}

/* called in rcu_read_lock() section */
int igmp_rcv(struct sk_buff *skb)
{
        /* This basically follows the spec line by line -- see RFC1112 */
        struct igmphdr *ih;
        struct net_device *dev = skb->dev;
        struct in_device *in_dev;
        int len = skb->len;
        bool dropped = true;

        if (netif_is_l3_master(dev)) {
                dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
                if (!dev)
                        goto drop;
        }

        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev)
                goto drop;

        if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
                goto drop;

        if (skb_checksum_simple_validate(skb))
                goto drop;

        ih = igmp_hdr(skb);
        switch (ih->type) {
        case IGMP_HOST_MEMBERSHIP_QUERY:
                dropped = igmp_heard_query(in_dev, skb, len);
                break;
        case IGMP_HOST_MEMBERSHIP_REPORT:
        case IGMPV2_HOST_MEMBERSHIP_REPORT:
                /* Is it our report looped back? */
                if (rt_is_output_route(skb_rtable(skb)))
                        break;
                /* don't rely on MC router hearing unicast reports */
                if (skb->pkt_type == PACKET_MULTICAST ||
                    skb->pkt_type == PACKET_BROADCAST)
                        dropped = igmp_heard_report(in_dev, ih->group);
                break;
        case IGMP_PIM:
#ifdef CONFIG_IP_PIMSM_V1
                return pim_rcv_v1(skb);
#endif
        case IGMPV3_HOST_MEMBERSHIP_REPORT:
        case IGMP_DVMRP:
        case IGMP_TRACE:
        case IGMP_HOST_LEAVE_MESSAGE:
        case IGMP_MTRACE:
        case IGMP_MTRACE_RESP:
                break;
        default:
                break;
        }

drop:
        if (dropped)
                kfree_skb(skb);
        else
                consume_skb(skb);
        return 0;
}

#endif


/*
 *        Add a filter to a device
 */

static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
{
        char buf[MAX_ADDR_LEN];
        struct net_device *dev = in_dev->dev;

        /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
           We will get multicast token leakage, when IFF_MULTICAST
           is changed. This check should be done in ndo_set_rx_mode
           routine. Something sort of:
           if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
           --ANK
           */
        if (arp_mc_map(addr, buf, dev, 0) == 0)
                dev_mc_add(dev, buf);
}

/*
 *        Remove a filter from a device
 */

static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
{
        char buf[MAX_ADDR_LEN];
        struct net_device *dev = in_dev->dev;

        if (arp_mc_map(addr, buf, dev, 0) == 0)
                dev_mc_del(dev, buf);
}

#ifdef CONFIG_IP_MULTICAST
/*
 * deleted ip_mc_list manipulation
 */
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
                              gfp_t gfp)
{
        struct ip_mc_list *pmc;
        struct net *net = dev_net(in_dev->dev);

        /* this is an "ip_mc_list" for convenience; only the fields below
         * are actually used. In particular, the refcnt and users are not
         * used for management of the delete list. Using the same structure
         * for deleted items allows change reports to use common code with
         * non-deleted or query-response MCA's.
         */
        pmc = kzalloc(sizeof(*pmc), gfp);
        if (!pmc)
                return;
        spin_lock_init(&pmc->lock);
        spin_lock_bh(&im->lock);
        pmc->interface = im->interface;
        in_dev_hold(in_dev);
        pmc->multiaddr = im->multiaddr;
        pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
        pmc->sfmode = im->sfmode;
        if (pmc->sfmode == MCAST_INCLUDE) {
                struct ip_sf_list *psf;

                pmc->tomb = im->tomb;
                pmc->sources = im->sources;
                im->tomb = im->sources = NULL;
                for (psf = pmc->sources; psf; psf = psf->sf_next)
                        psf->sf_crcount = pmc->crcount;
        }
        spin_unlock_bh(&im->lock);

        spin_lock_bh(&in_dev->mc_tomb_lock);
        pmc->next = in_dev->mc_tomb;
        in_dev->mc_tomb = pmc;
        spin_unlock_bh(&in_dev->mc_tomb_lock);
}

/*
 * restore ip_mc_list deleted records
 */
static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
{
        struct ip_mc_list *pmc, *pmc_prev;
        struct ip_sf_list *psf;
        struct net *net = dev_net(in_dev->dev);
        __be32 multiaddr = im->multiaddr;

        spin_lock_bh(&in_dev->mc_tomb_lock);
        pmc_prev = NULL;
        for (pmc = in_dev->mc_tomb; pmc; pmc = pmc->next) {
                if (pmc->multiaddr == multiaddr)
                        break;
                pmc_prev = pmc;
        }
        if (pmc) {
                if (pmc_prev)
                        pmc_prev->next = pmc->next;
                else
                        in_dev->mc_tomb = pmc->next;
        }
        spin_unlock_bh(&in_dev->mc_tomb_lock);

        spin_lock_bh(&im->lock);
        if (pmc) {
                im->interface = pmc->interface;
                if (im->sfmode == MCAST_INCLUDE) {
                        swap(im->tomb, pmc->tomb);
                        swap(im->sources, pmc->sources);
                        for (psf = im->sources; psf; psf = psf->sf_next)
                                psf->sf_crcount = in_dev->mr_qrv ?:
                                        READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                } else {
                        im->crcount = in_dev->mr_qrv ?:
                                READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                }
                in_dev_put(pmc->interface);
                kfree_pmc(pmc);
        }
        spin_unlock_bh(&im->lock);
}

/*
 * flush ip_mc_list deleted records
 */
static void igmpv3_clear_delrec(struct in_device *in_dev)
{
        struct ip_mc_list *pmc, *nextpmc;

        spin_lock_bh(&in_dev->mc_tomb_lock);
        pmc = in_dev->mc_tomb;
        in_dev->mc_tomb = NULL;
        spin_unlock_bh(&in_dev->mc_tomb_lock);

        for (; pmc; pmc = nextpmc) {
                nextpmc = pmc->next;
                ip_mc_clear_src(pmc);
                in_dev_put(pmc->interface);
                kfree_pmc(pmc);
        }
        /* clear dead sources, too */
        rcu_read_lock();
        for_each_pmc_rcu(in_dev, pmc) {
                struct ip_sf_list *psf;

                spin_lock_bh(&pmc->lock);
                psf = pmc->tomb;
                pmc->tomb = NULL;
                spin_unlock_bh(&pmc->lock);
                ip_sf_list_clear_all(psf);
        }
        rcu_read_unlock();
}
#endif

static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
{
        struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
        struct net *net = dev_net(in_dev->dev);
        int reporter;
#endif

        if (im->loaded) {
                im->loaded = 0;
                ip_mc_filter_del(in_dev, im->multiaddr);
        }

#ifdef CONFIG_IP_MULTICAST
        if (im->multiaddr == IGMP_ALL_HOSTS)
                return;
        if (ipv4_is_local_multicast(im->multiaddr) &&
            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return;

        reporter = im->reporter;
        igmp_stop_timer(im);

        if (!in_dev->dead) {
                if (IGMP_V1_SEEN(in_dev))
                        return;
                if (IGMP_V2_SEEN(in_dev)) {
                        if (reporter)
                                igmp_send_report(in_dev, im, IGMP_HOST_LEAVE_MESSAGE);
                        return;
                }
                /* IGMPv3 */
                igmpv3_add_delrec(in_dev, im, gfp);

                igmp_ifc_event(in_dev);
        }
#endif
}

static void igmp_group_dropped(struct ip_mc_list *im)
{
        __igmp_group_dropped(im, GFP_KERNEL);
}

static void igmp_group_added(struct ip_mc_list *im)
{
        struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
        struct net *net = dev_net(in_dev->dev);
#endif

        if (im->loaded == 0) {
                im->loaded = 1;
                ip_mc_filter_add(in_dev, im->multiaddr);
        }

#ifdef CONFIG_IP_MULTICAST
        if (im->multiaddr == IGMP_ALL_HOSTS)
                return;
        if (ipv4_is_local_multicast(im->multiaddr) &&
            !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                return;

        if (in_dev->dead)
                return;

        im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
        if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
                spin_lock_bh(&im->lock);
                igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
                spin_unlock_bh(&im->lock);
                return;
        }
        /* else, v3 */

        /* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
         * not send filter-mode change record as the mode should be from
         * IN() to IN(A).
         */
        if (im->sfmode == MCAST_EXCLUDE)
                im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);

        igmp_ifc_event(in_dev);
#endif
}


/*
 *        Multicast list managers
 */

static u32 ip_mc_hash(const struct ip_mc_list *im)
{
        return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG);
}

static void ip_mc_hash_add(struct in_device *in_dev,
                           struct ip_mc_list *im)
{
        struct ip_mc_list __rcu **mc_hash;
        u32 hash;

        mc_hash = rtnl_dereference(in_dev->mc_hash);
        if (mc_hash) {
                hash = ip_mc_hash(im);
                im->next_hash = mc_hash[hash];
                rcu_assign_pointer(mc_hash[hash], im);
                return;
        }

        /* do not use a hash table for small number of items */
        if (in_dev->mc_count < 4)
                return;

        mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG,
                          GFP_KERNEL);
        if (!mc_hash)
                return;

        for_each_pmc_rtnl(in_dev, im) {
                hash = ip_mc_hash(im);
                im->next_hash = mc_hash[hash];
                RCU_INIT_POINTER(mc_hash[hash], im);
        }

        rcu_assign_pointer(in_dev->mc_hash, mc_hash);
}

static void ip_mc_hash_remove(struct in_device *in_dev,
                              struct ip_mc_list *im)
{
        struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash);
        struct ip_mc_list *aux;

        if (!mc_hash)
                return;
        mc_hash += ip_mc_hash(im);
        while ((aux = rtnl_dereference(*mc_hash)) != im)
                mc_hash = &aux->next_hash;
        *mc_hash = im->next_hash;
}


/*
 *        A socket has joined a multicast group on device dev.
 */
static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
                                unsigned int mode, gfp_t gfp)
{
        struct ip_mc_list *im;

        ASSERT_RTNL();

        for_each_pmc_rtnl(in_dev, im) {
                if (im->multiaddr == addr) {
                        im->users++;
                        ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
                        goto out;
                }
        }

        im = kzalloc(sizeof(*im), gfp);
        if (!im)
                goto out;

        im->users = 1;
        im->interface = in_dev;
        in_dev_hold(in_dev);
        im->multiaddr = addr;
        /* initial mode is (EX, empty) */
        im->sfmode = mode;
        im->sfcount[mode] = 1;
        refcount_set(&im->refcnt, 1);
        spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
        timer_setup(&im->timer, igmp_timer_expire, 0);
#endif

        im->next_rcu = in_dev->mc_list;
        in_dev->mc_count++;
        rcu_assign_pointer(in_dev->mc_list, im);

        ip_mc_hash_add(in_dev, im);

#ifdef CONFIG_IP_MULTICAST
        igmpv3_del_delrec(in_dev, im);
#endif
        igmp_group_added(im);
        if (!in_dev->dead)
                ip_rt_multicast_event(in_dev);
out:
        return;
}

void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
{
        ____ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE, gfp);
}
EXPORT_SYMBOL(__ip_mc_inc_group);

void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
{
        __ip_mc_inc_group(in_dev, addr, GFP_KERNEL);
}
EXPORT_SYMBOL(ip_mc_inc_group);

static int ip_mc_check_iphdr(struct sk_buff *skb)
{
        const struct iphdr *iph;
        unsigned int len;
        unsigned int offset = skb_network_offset(skb) + sizeof(*iph);

        if (!pskb_may_pull(skb, offset))
                return -EINVAL;

        iph = ip_hdr(skb);

        if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph))
                return -EINVAL;

        offset += ip_hdrlen(skb) - sizeof(*iph);

        if (!pskb_may_pull(skb, offset))
                return -EINVAL;

        iph = ip_hdr(skb);

        if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
                return -EINVAL;

        len = skb_network_offset(skb) + ntohs(iph->tot_len);
        if (skb->len < len || len < offset)
                return -EINVAL;

        skb_set_transport_header(skb, offset);

        return 0;
}

static int ip_mc_check_igmp_reportv3(struct sk_buff *skb)
{
        unsigned int len = skb_transport_offset(skb);

        len += sizeof(struct igmpv3_report);

        return ip_mc_may_pull(skb, len) ? 0 : -EINVAL;
}

static int ip_mc_check_igmp_query(struct sk_buff *skb)
{
        unsigned int transport_len = ip_transport_len(skb);
        unsigned int len;

        /* IGMPv{1,2}? */
        if (transport_len != sizeof(struct igmphdr)) {
                /* or IGMPv3? */
                if (transport_len < sizeof(struct igmpv3_query))
                        return -EINVAL;

                len = skb_transport_offset(skb) + sizeof(struct igmpv3_query);
                if (!ip_mc_may_pull(skb, len))
                        return -EINVAL;
        }

        /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
         * all-systems destination addresses (224.0.0.1) for general queries
         */
        if (!igmp_hdr(skb)->group &&
            ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP))
                return -EINVAL;

        return 0;
}

static int ip_mc_check_igmp_msg(struct sk_buff *skb)
{
        switch (igmp_hdr(skb)->type) {
        case IGMP_HOST_LEAVE_MESSAGE:
        case IGMP_HOST_MEMBERSHIP_REPORT:
        case IGMPV2_HOST_MEMBERSHIP_REPORT:
                return 0;
        case IGMPV3_HOST_MEMBERSHIP_REPORT:
                return ip_mc_check_igmp_reportv3(skb);
        case IGMP_HOST_MEMBERSHIP_QUERY:
                return ip_mc_check_igmp_query(skb);
        default:
                return -ENOMSG;
        }
}

static __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
{
        return skb_checksum_simple_validate(skb);
}

static int ip_mc_check_igmp_csum(struct sk_buff *skb)
{
        unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
        unsigned int transport_len = ip_transport_len(skb);
        struct sk_buff *skb_chk;

        if (!ip_mc_may_pull(skb, len))
                return -EINVAL;

        skb_chk = skb_checksum_trimmed(skb, transport_len,
                                       ip_mc_validate_checksum);
        if (!skb_chk)
                return -EINVAL;

        if (skb_chk != skb)
                kfree_skb(skb_chk);

        return 0;
}

/**
 * ip_mc_check_igmp - checks whether this is a sane IGMP packet
 * @skb: the skb to validate
 *
 * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
 * skb transport header accordingly and returns zero.
 *
 * -EINVAL: A broken packet was detected, i.e. it violates some internet
 *  standard
 * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
 * -ENOMEM: A memory allocation failure happened.
 *
 * Caller needs to set the skb network header and free any returned skb if it
 * differs from the provided skb.
 */
int ip_mc_check_igmp(struct sk_buff *skb)
{
        int ret = ip_mc_check_iphdr(skb);

        if (ret < 0)
                return ret;

        if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
                return -ENOMSG;

        ret = ip_mc_check_igmp_csum(skb);
        if (ret < 0)
                return ret;

        return ip_mc_check_igmp_msg(skb);
}
EXPORT_SYMBOL(ip_mc_check_igmp);

/*
 *        Resend IGMP JOIN report; used by netdev notifier.
 */
static void ip_mc_rejoin_groups(struct in_device *in_dev)
{
#ifdef CONFIG_IP_MULTICAST
        struct ip_mc_list *im;
        int type;
        struct net *net = dev_net(in_dev->dev);

        ASSERT_RTNL();

        for_each_pmc_rtnl(in_dev, im) {
                if (im->multiaddr == IGMP_ALL_HOSTS)
                        continue;
                if (ipv4_is_local_multicast(im->multiaddr) &&
                    !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
                        continue;

                /* a failover is happening and switches
                 * must be notified immediately
                 */
                if (IGMP_V1_SEEN(in_dev))
                        type = IGMP_HOST_MEMBERSHIP_REPORT;
                else if (IGMP_V2_SEEN(in_dev))
                        type = IGMPV2_HOST_MEMBERSHIP_REPORT;
                else
                        type = IGMPV3_HOST_MEMBERSHIP_REPORT;
                igmp_send_report(in_dev, im, type);
        }
#endif
}

/*
 *        A socket has left a multicast group on device dev
 */

void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
{
        struct ip_mc_list *i;
        struct ip_mc_list __rcu **ip;

        ASSERT_RTNL();

        for (ip = &in_dev->mc_list;
             (i = rtnl_dereference(*ip)) != NULL;
             ip = &i->next_rcu) {
                if (i->multiaddr == addr) {
                        if (--i->users == 0) {
                                ip_mc_hash_remove(in_dev, i);
                                *ip = i->next_rcu;
                                in_dev->mc_count--;
                                __igmp_group_dropped(i, gfp);
                                ip_mc_clear_src(i);

                                if (!in_dev->dead)
                                        ip_rt_multicast_event(in_dev);

                                ip_ma_put(i);
                                return;
                        }
                        break;
                }
        }
}
EXPORT_SYMBOL(__ip_mc_dec_group);

/* Device changing type */

void ip_mc_unmap(struct in_device *in_dev)
{
        struct ip_mc_list *pmc;

        ASSERT_RTNL();

        for_each_pmc_rtnl(in_dev, pmc)
                igmp_group_dropped(pmc);
}

void ip_mc_remap(struct in_device *in_dev)
{
        struct ip_mc_list *pmc;

        ASSERT_RTNL();

        for_each_pmc_rtnl(in_dev, pmc) {
#ifdef CONFIG_IP_MULTICAST
                igmpv3_del_delrec(in_dev, pmc);
#endif
                igmp_group_added(pmc);
        }
}

/* Device going down */

void ip_mc_down(struct in_device *in_dev)
{
        struct ip_mc_list *pmc;

        ASSERT_RTNL();

        for_each_pmc_rtnl(in_dev, pmc)
                igmp_group_dropped(pmc);

#ifdef CONFIG_IP_MULTICAST
        WRITE_ONCE(in_dev->mr_ifc_count, 0);
        if (del_timer(&in_dev->mr_ifc_timer))
                __in_dev_put(in_dev);
        in_dev->mr_gq_running = 0;
        if (del_timer(&in_dev->mr_gq_timer))
                __in_dev_put(in_dev);
#endif

        ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
}

#ifdef CONFIG_IP_MULTICAST
static void ip_mc_reset(struct in_device *in_dev)
{
        struct net *net = dev_net(in_dev->dev);

        in_dev->mr_qi = IGMP_QUERY_INTERVAL;
        in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
        in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
{
}
#endif

void ip_mc_init_dev(struct in_device *in_dev)
{
        ASSERT_RTNL();

#ifdef CONFIG_IP_MULTICAST
        timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
        timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
#endif
        ip_mc_reset(in_dev);

        spin_lock_init(&in_dev->mc_tomb_lock);
}

/* Device going up */

void ip_mc_up(struct in_device *in_dev)
{
        struct ip_mc_list *pmc;

        ASSERT_RTNL();

        ip_mc_reset(in_dev);
        ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);

        for_each_pmc_rtnl(in_dev, pmc) {
#ifdef CONFIG_IP_MULTICAST
                igmpv3_del_delrec(in_dev, pmc);
#endif
                igmp_group_added(pmc);
        }
}

/*
 *        Device is about to be destroyed: clean up.
 */

void ip_mc_destroy_dev(struct in_device *in_dev)
{
        struct ip_mc_list *i;

        ASSERT_RTNL();

        /* Deactivate timers */
        ip_mc_down(in_dev);
#ifdef CONFIG_IP_MULTICAST
        igmpv3_clear_delrec(in_dev);
#endif

        while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
                in_dev->mc_list = i->next_rcu;
                in_dev->mc_count--;
                ip_mc_clear_src(i);
                ip_ma_put(i);
        }
}

/* RTNL is locked */
static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
{
        struct net_device *dev = NULL;
        struct in_device *idev = NULL;

        if (imr->imr_ifindex) {
                idev = inetdev_by_index(net, imr->imr_ifindex);
                return idev;
        }
        if (imr->imr_address.s_addr) {
                dev = __ip_dev_find(net, imr->imr_address.s_addr, false);
                if (!dev)
                        return NULL;
        }

        if (!dev) {
                struct rtable *rt = ip_route_output(net,
                                                    imr->imr_multiaddr.s_addr,
                                                    0, 0, 0);
                if (!IS_ERR(rt)) {
                        dev = rt->dst.dev;
                        ip_rt_put(rt);
                }
        }
        if (dev) {
                imr->imr_ifindex = dev->ifindex;
                idev = __in_dev_get_rtnl(dev);
        }
        return idev;
}

/*
 *        Join a socket to a group
 */

static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
        __be32 *psfsrc)
{
        struct ip_sf_list *psf, *psf_prev;
        int rv = 0;

        psf_prev = NULL;
        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (psf->sf_inaddr == *psfsrc)
                        break;
                psf_prev = psf;
        }
        if (!psf || psf->sf_count[sfmode] == 0) {
                /* source filter not found, or count wrong =>  bug */
                return -ESRCH;
        }
        psf->sf_count[sfmode]--;
        if (psf->sf_count[sfmode] == 0) {
                ip_rt_multicast_event(pmc->interface);
        }
        if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
#ifdef CONFIG_IP_MULTICAST
                struct in_device *in_dev = pmc->interface;
                struct net *net = dev_net(in_dev->dev);
#endif

                /* no more filters for this source */
                if (psf_prev)
                        psf_prev->sf_next = psf->sf_next;
                else
                        pmc->sources = psf->sf_next;
#ifdef CONFIG_IP_MULTICAST
                if (psf->sf_oldin &&
                    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
                        psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                        psf->sf_next = pmc->tomb;
                        pmc->tomb = psf;
                        rv = 1;
                } else
#endif
                        kfree(psf);
        }
        return rv;
}

#ifndef CONFIG_IP_MULTICAST
#define igmp_ifc_event(x)        do { } while (0)
#endif

static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
                         int sfcount, __be32 *psfsrc, int delta)
{
        struct ip_mc_list *pmc;
        int        changerec = 0;
        int        i, err;

        if (!in_dev)
                return -ENODEV;
        rcu_read_lock();
        for_each_pmc_rcu(in_dev, pmc) {
                if (*pmca == pmc->multiaddr)
                        break;
        }
        if (!pmc) {
                /* MCA not found?? bug */
                rcu_read_unlock();
                return -ESRCH;
        }
        spin_lock_bh(&pmc->lock);
        rcu_read_unlock();
#ifdef CONFIG_IP_MULTICAST
        sf_markstate(pmc);
#endif
        if (!delta) {
                err = -EINVAL;
                if (!pmc->sfcount[sfmode])
                        goto out_unlock;
                pmc->sfcount[sfmode]--;
        }
        err = 0;
        for (i = 0; i < sfcount; i++) {
                int rv = ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);

                changerec |= rv > 0;
                if (!err && rv < 0)
                        err = rv;
        }
        if (pmc->sfmode == MCAST_EXCLUDE &&
            pmc->sfcount[MCAST_EXCLUDE] == 0 &&
            pmc->sfcount[MCAST_INCLUDE]) {
#ifdef CONFIG_IP_MULTICAST
                struct ip_sf_list *psf;
                struct net *net = dev_net(in_dev->dev);
#endif

                /* filter mode change */
                pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
                pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
                for (psf = pmc->sources; psf; psf = psf->sf_next)
                        psf->sf_crcount = 0;
                igmp_ifc_event(pmc->interface);
        } else if (sf_setstate(pmc) || changerec) {
                igmp_ifc_event(pmc->interface);
#endif
        }
out_unlock:
        spin_unlock_bh(&pmc->lock);
        return err;
}

/*
 * Add multicast single-source filter to the interface list
 */
static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
        __be32 *psfsrc)
{
        struct ip_sf_list *psf, *psf_prev;

        psf_prev = NULL;
        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (psf->sf_inaddr == *psfsrc)
                        break;
                psf_prev = psf;
        }
        if (!psf) {
                psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
                if (!psf)
                        return -ENOBUFS;
                psf->sf_inaddr = *psfsrc;
                if (psf_prev) {
                        psf_prev->sf_next = psf;
                } else
                        pmc->sources = psf;
        }
        psf->sf_count[sfmode]++;
        if (psf->sf_count[sfmode] == 1) {
                ip_rt_multicast_event(pmc->interface);
        }
        return 0;
}

#ifdef CONFIG_IP_MULTICAST
static void sf_markstate(struct ip_mc_list *pmc)
{
        struct ip_sf_list *psf;
        int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];

        for (psf = pmc->sources; psf; psf = psf->sf_next)
                if (pmc->sfcount[MCAST_EXCLUDE]) {
                        psf->sf_oldin = mca_xcount ==
                                psf->sf_count[MCAST_EXCLUDE] &&
                                !psf->sf_count[MCAST_INCLUDE];
                } else
                        psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
}

static int sf_setstate(struct ip_mc_list *pmc)
{
        struct ip_sf_list *psf, *dpsf;
        int mca_xcount = pmc->sfcount[MCAST_EXCLUDE];
        int qrv = pmc->interface->mr_qrv;
        int new_in, rv;

        rv = 0;
        for (psf = pmc->sources; psf; psf = psf->sf_next) {
                if (pmc->sfcount[MCAST_EXCLUDE]) {
                        new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
                                !psf->sf_count[MCAST_INCLUDE];
                } else
                        new_in = psf->sf_count[MCAST_INCLUDE] != 0;
                if (new_in) {
                        if (!psf->sf_oldin) {
                                struct ip_sf_list *prev = NULL;

                                for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next) {
                                        if (dpsf->sf_inaddr == psf->sf_inaddr)
                                                break;
                                        prev = dpsf;
                                }
                                if (dpsf) {
                                        if (prev)
                                                prev->sf_next = dpsf->sf_next;
                                        else
                                                pmc->tomb = dpsf->sf_next;
                                        kfree(dpsf);
                                }
                                psf->sf_crcount = qrv;
                                rv++;
                        }
                } else if (psf->sf_oldin) {

                        psf->sf_crcount = 0;
                        /*
                         * add or update "delete" records if an active filter
                         * is now inactive
                         */
                        for (dpsf = pmc->tomb; dpsf; dpsf = dpsf->sf_next)
                                if (dpsf->sf_inaddr == psf->sf_inaddr)
                                        break;
                        if (!dpsf) {
                                dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
                                if (!dpsf)
                                        continue;
                                *dpsf = *psf;
                                /* pmc->lock held by callers */
                                dpsf->sf_next = pmc->tomb;
                                pmc->tomb = dpsf;
                        }
                        dpsf->sf_crcount = qrv;
                        rv++;
                }
        }
        return rv;
}
#endif

/*
 * Add multicast source filter list to the interface list
 */
static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
                         int sfcount, __be32 *psfsrc, int delta)
{
        struct ip_mc_list *pmc;
        int        isexclude;
        int        i, err;

        if (!in_dev)
                return -ENODEV;
        rcu_read_lock();
        for_each_pmc_rcu(in_dev, pmc) {
                if (*pmca == pmc->multiaddr)
                        break;
        }
        if (!pmc) {
                /* MCA not found?? bug */
                rcu_read_unlock();
                return -ESRCH;
        }
        spin_lock_bh(&pmc->lock);
        rcu_read_unlock();

#ifdef CONFIG_IP_MULTICAST
        sf_markstate(pmc);
#endif
        isexclude = pmc->sfmode == MCAST_EXCLUDE;
        if (!delta)
                pmc->sfcount[sfmode]++;
        err = 0;
        for (i = 0; i < sfcount; i++) {
                err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
                if (err)
                        break;
        }
        if (err) {
                int j;

                if (!delta)
                        pmc->sfcount[sfmode]--;
                for (j = 0; j < i; j++)
                        (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
        } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
#ifdef CONFIG_IP_MULTICAST
                struct ip_sf_list *psf;
                struct net *net = dev_net(pmc->interface->dev);
                in_dev = pmc->interface;
#endif

                /* filter mode change */
                if (pmc->sfcount[MCAST_EXCLUDE])
                        pmc->sfmode = MCAST_EXCLUDE;
                else if (pmc->sfcount[MCAST_INCLUDE])
                        pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
                /* else no filters; keep old mode for reports */

                pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
                WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
                for (psf = pmc->sources; psf; psf = psf->sf_next)
                        psf->sf_crcount = 0;
                igmp_ifc_event(in_dev);
        } else if (sf_setstate(pmc)) {
                igmp_ifc_event(in_dev);
#endif
        }
        spin_unlock_bh(&pmc->lock);
        return err;
}

static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
        struct ip_sf_list *tomb, *sources;

        spin_lock_bh(&pmc->lock);
        tomb = pmc->tomb;
        pmc->tomb = NULL;
        sources = pmc->sources;
        pmc->sources = NULL;
        pmc->sfmode = MCAST_EXCLUDE;
        pmc->sfcount[MCAST_INCLUDE] = 0;
        pmc->sfcount[MCAST_EXCLUDE] = 1;
        spin_unlock_bh(&pmc->lock);

        ip_sf_list_clear_all(tomb);
        ip_sf_list_clear_all(sources);
}

/* Join a multicast group
 */
static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
                              unsigned int mode)
{
        __be32 addr = imr->imr_multiaddr.s_addr;
        struct ip_mc_socklist *iml, *i;
        struct in_device *in_dev;
        struct inet_sock *inet = inet_sk(sk);
        struct net *net = sock_net(sk);
        int ifindex;
        int count = 0;
        int err;

        ASSERT_RTNL();

        if (!ipv4_is_multicast(addr))
                return -EINVAL;

        in_dev = ip_mc_find_dev(net, imr);

        if (!in_dev) {
                err = -ENODEV;
                goto done;
        }

        err = -EADDRINUSE;
        ifindex = imr->imr_ifindex;
        for_each_pmc_rtnl(inet, i) {
                if (i->multi.imr_multiaddr.s_addr == addr &&
                    i->multi.imr_ifindex == ifindex)
                        goto done;
                count++;
        }
        err = -ENOBUFS;
        if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
                goto done;
        iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
        if (!iml)
                goto done;

        memcpy(&iml->multi, imr, sizeof(*imr));
        iml->next_rcu = inet->mc_list;
        iml->sflist = NULL;
        iml->sfmode = mode;
        rcu_assign_pointer(inet->mc_list, iml);
        ____ip_mc_inc_group(in_dev, addr, mode, GFP_KERNEL);
        err = 0;
done:
        return err;
}

/* Join ASM (Any-Source Multicast) group
 */
int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
{
        return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
}
EXPORT_SYMBOL(ip_mc_join_group);

/* Join SSM (Source-Specific Multicast) group
 */
int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
                         unsigned int mode)
{
        return __ip_mc_join_group(sk, imr, mode);
}

static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
                           struct in_device *in_dev)
{
        struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
        int err;

        if (!psf) {
                /* any-source empty exclude case */
                return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
                        iml->sfmode, 0, NULL, 0);
        }
        err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
                        iml->sfmode, psf->sl_count, psf->sl_addr, 0);
        RCU_INIT_POINTER(iml->sflist, NULL);
        /* decrease mem now to avoid the memleak warning */
        atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
        kfree_rcu(psf, rcu);
        return err;
}

int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
{
        struct inet_sock *inet = inet_sk(sk);
        struct ip_mc_socklist *iml;
        struct ip_mc_socklist __rcu **imlp;
        struct in_device *in_dev;
        struct net *net = sock_net(sk);
        __be32 group = imr->imr_multiaddr.s_addr;
        u32 ifindex;
        int ret = -EADDRNOTAVAIL;

        ASSERT_RTNL();

        in_dev = ip_mc_find_dev(net, imr);
        if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
                ret = -ENODEV;
                goto out;
        }
        ifindex = imr->imr_ifindex;
        for (imlp = &inet->mc_list;
             (iml = rtnl_dereference(*imlp)) != NULL;
             imlp = &iml->next_rcu) {
                if (iml->multi.imr_multiaddr.s_addr != group)
                        continue;
                if (ifindex) {
                        if (iml->multi.imr_ifindex != ifindex)
                                continue;
                } else if (imr->imr_address.s_addr && imr->imr_address.s_addr !=
                                iml->multi.imr_address.s_addr)
                        continue;

                (void) ip_mc_leave_src(sk, iml, in_dev);

                *imlp = iml->next_rcu;

                if (in_dev)
                        ip_mc_dec_group(in_dev, group);

                /* decrease mem now to avoid the memleak warning */
                atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
                kfree_rcu(iml, rcu);
                return 0;
        }
out:
        return ret;
}
EXPORT_SYMBOL(ip_mc_leave_group);

int ip_mc_source(int add, int omode, struct sock *sk, struct
        ip_mreq_source *mreqs, int ifindex)
{
        int err;
        struct ip_mreqn imr;
        __be32 addr = mreqs->imr_multiaddr;
        struct ip_mc_socklist *pmc;
        struct in_device *in_dev = NULL;
        struct inet_sock *inet = inet_sk(sk);
        struct ip_sf_socklist *psl;
        struct net *net = sock_net(sk);
        int leavegroup = 0;
        int i, j, rv;

        if (!ipv4_is_multicast(addr))
                return -EINVAL;

        ASSERT_RTNL();

        imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
        imr.imr_address.s_addr = mreqs->imr_interface;
        imr.imr_ifindex = ifindex;
        in_dev = ip_mc_find_dev(net, &imr);

        if (!in_dev) {
                err = -ENODEV;
                goto done;
        }
        err = -EADDRNOTAVAIL;

        for_each_pmc_rtnl(inet, pmc) {
                if ((pmc->multi.imr_multiaddr.s_addr ==
                     imr.imr_multiaddr.s_addr) &&
                    (pmc->multi.imr_ifindex == imr.imr_ifindex))
                        break;
        }
        if (!pmc) {                /* must have a prior join */
                err = -EINVAL;
                goto done;
        }
        /* if a source filter was set, must be the same mode as before */
        if (pmc->sflist) {
                if (pmc->sfmode != omode) {
                        err = -EINVAL;
                        goto done;
                }
        } else if (pmc->sfmode != omode) {
                /* allow mode switches for empty-set filters */
                ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 0, NULL, 0);
                ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, pmc->sfmode, 0,
                        NULL, 0);
                pmc->sfmode = omode;
        }

        psl = rtnl_dereference(pmc->sflist);
        if (!add) {
                if (!psl)
                        goto done;        /* err = -EADDRNOTAVAIL */
                rv = !0;
                for (i = 0; i < psl->sl_count; i++) {
                        rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
                                sizeof(__be32));
                        if (rv == 0)
                                break;
                }
                if (rv)                /* source not found */
                        goto done;        /* err = -EADDRNOTAVAIL */

                /* special case - (INCLUDE, empty) == LEAVE_GROUP */
                if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
                        leavegroup = 1;
                        goto done;
                }

                /* update the interface filter */
                ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
                        &mreqs->imr_sourceaddr, 1);

                for (j = i+1; j < psl->sl_count; j++)
                        psl->sl_addr[j-1] = psl->sl_addr[j];
                psl->sl_count--;
                err = 0;
                goto done;
        }
        /* else, add a new source to the filter */

        if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
                err = -ENOBUFS;
                goto done;
        }
        if (!psl || psl->sl_count == psl->sl_max) {
                struct ip_sf_socklist *newpsl;
                int count = IP_SFBLOCK;

                if (psl)
                        count += psl->sl_max;
                newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
                                      GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = count;
                newpsl->sl_count = count - IP_SFBLOCK;
                if (psl) {
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
                        /* decrease mem now to avoid the memleak warning */
                        atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                                   &sk->sk_omem_alloc);
                }
                rcu_assign_pointer(pmc->sflist, newpsl);
                if (psl)
                        kfree_rcu(psl, rcu);
                psl = newpsl;
        }
        rv = 1;        /* > 0 for insert logic below if sl_count is 0 */
        for (i = 0; i < psl->sl_count; i++) {
                rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
                        sizeof(__be32));
                if (rv == 0)
                        break;
        }
        if (rv == 0)                /* address already there is an error */
                goto done;
        for (j = psl->sl_count-1; j >= i; j--)
                psl->sl_addr[j+1] = psl->sl_addr[j];
        psl->sl_addr[i] = mreqs->imr_sourceaddr;
        psl->sl_count++;
        err = 0;
        /* update the interface list */
        ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
                &mreqs->imr_sourceaddr, 1);
done:
        if (leavegroup)
                err = ip_mc_leave_group(sk, &imr);
        return err;
}

int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
{
        int err = 0;
        struct ip_mreqn        imr;
        __be32 addr = msf->imsf_multiaddr;
        struct ip_mc_socklist *pmc;
        struct in_device *in_dev;
        struct inet_sock *inet = inet_sk(sk);
        struct ip_sf_socklist *newpsl, *psl;
        struct net *net = sock_net(sk);
        int leavegroup = 0;

        if (!ipv4_is_multicast(addr))
                return -EINVAL;
        if (msf->imsf_fmode != MCAST_INCLUDE &&
            msf->imsf_fmode != MCAST_EXCLUDE)
                return -EINVAL;

        ASSERT_RTNL();

        imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
        imr.imr_address.s_addr = msf->imsf_interface;
        imr.imr_ifindex = ifindex;
        in_dev = ip_mc_find_dev(net, &imr);

        if (!in_dev) {
                err = -ENODEV;
                goto done;
        }

        /* special case - (INCLUDE, empty) == LEAVE_GROUP */
        if (msf->imsf_fmode == MCAST_INCLUDE && msf->imsf_numsrc == 0) {
                leavegroup = 1;
                goto done;
        }

        for_each_pmc_rtnl(inet, pmc) {
                if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
                    pmc->multi.imr_ifindex == imr.imr_ifindex)
                        break;
        }
        if (!pmc) {                /* must have a prior join */
                err = -EINVAL;
                goto done;
        }
        if (msf->imsf_numsrc) {
                newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
                                                      msf->imsf_numsrc),
                                      GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
                memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
                       flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
                err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
                        msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
                        sock_kfree_s(sk, newpsl,
                                     struct_size(newpsl, sl_addr,
                                                 newpsl->sl_max));
                        goto done;
                }
        } else {
                newpsl = NULL;
                (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
                                     msf->imsf_fmode, 0, NULL, 0);
        }
        psl = rtnl_dereference(pmc->sflist);
        if (psl) {
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                        psl->sl_count, psl->sl_addr, 0);
                /* decrease mem now to avoid the memleak warning */
                atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                           &sk->sk_omem_alloc);
        } else {
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                        0, NULL, 0);
        }
        rcu_assign_pointer(pmc->sflist, newpsl);
        if (psl)
                kfree_rcu(psl, rcu);
        pmc->sfmode = msf->imsf_fmode;
        err = 0;
done:
        if (leavegroup)
                err = ip_mc_leave_group(sk, &imr);
        return err;
}
int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
                 sockptr_t optval, sockptr_t optlen)
{
        int err, len, count, copycount, msf_size;
        struct ip_mreqn        imr;
        __be32 addr = msf->imsf_multiaddr;
        struct ip_mc_socklist *pmc;
        struct in_device *in_dev;
        struct inet_sock *inet = inet_sk(sk);
        struct ip_sf_socklist *psl;
        struct net *net = sock_net(sk);

        ASSERT_RTNL();

        if (!ipv4_is_multicast(addr))
                return -EINVAL;

        imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
        imr.imr_address.s_addr = msf->imsf_interface;
        imr.imr_ifindex = 0;
        in_dev = ip_mc_find_dev(net, &imr);

        if (!in_dev) {
                err = -ENODEV;
                goto done;
        }
        err = -EADDRNOTAVAIL;

        for_each_pmc_rtnl(inet, pmc) {
                if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
                    pmc->multi.imr_ifindex == imr.imr_ifindex)
                        break;
        }
        if (!pmc)                /* must have a prior join */
                goto done;
        msf->imsf_fmode = pmc->sfmode;
        psl = rtnl_dereference(pmc->sflist);
        if (!psl) {
                count = 0;
        } else {
                count = psl->sl_count;
        }
        copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
        len = flex_array_size(psl, sl_addr, copycount);
        msf->imsf_numsrc = count;
        msf_size = IP_MSFILTER_SIZE(copycount);
        if (copy_to_sockptr(optlen, &msf_size, sizeof(int)) ||
            copy_to_sockptr(optval, msf, IP_MSFILTER_SIZE(0))) {
                return -EFAULT;
        }
        if (len &&
            copy_to_sockptr_offset(optval,
                                   offsetof(struct ip_msfilter, imsf_slist_flex),
                                   psl->sl_addr, len))
                return -EFAULT;
        return 0;
done:
        return err;
}

int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
                 sockptr_t optval, size_t ss_offset)
{
        int i, count, copycount;
        struct sockaddr_in *psin;
        __be32 addr;
        struct ip_mc_socklist *pmc;
        struct inet_sock *inet = inet_sk(sk);
        struct ip_sf_socklist *psl;

        ASSERT_RTNL();

        psin = (struct sockaddr_in *)&gsf->gf_group;
        if (psin->sin_family != AF_INET)
                return -EINVAL;
        addr = psin->sin_addr.s_addr;
        if (!ipv4_is_multicast(addr))
                return -EINVAL;

        for_each_pmc_rtnl(inet, pmc) {
                if (pmc->multi.imr_multiaddr.s_addr == addr &&
                    pmc->multi.imr_ifindex == gsf->gf_interface)
                        break;
        }
        if (!pmc)                /* must have a prior join */
                return -EADDRNOTAVAIL;
        gsf->gf_fmode = pmc->sfmode;
        psl = rtnl_dereference(pmc->sflist);
        count = psl ? psl->sl_count : 0;
        copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
        gsf->gf_numsrc = count;
        for (i = 0; i < copycount; i++) {
                struct sockaddr_storage ss;

                psin = (struct sockaddr_in *)&ss;
                memset(&ss, 0, sizeof(ss));
                psin->sin_family = AF_INET;
                psin->sin_addr.s_addr = psl->sl_addr[i];
                if (copy_to_sockptr_offset(optval, ss_offset,
                                           &ss, sizeof(ss)))
                        return -EFAULT;
                ss_offset += sizeof(ss);
        }
        return 0;
}

/*
 * check if a multicast source filter allows delivery for a given <src,dst,intf>
 */
int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
                   int dif, int sdif)
{
        const struct inet_sock *inet = inet_sk(sk);
        struct ip_mc_socklist *pmc;
        struct ip_sf_socklist *psl;
        int i;
        int ret;

        ret = 1;
        if (!ipv4_is_multicast(loc_addr))
                goto out;

        rcu_read_lock();
        for_each_pmc_rcu(inet, pmc) {
                if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
                    (pmc->multi.imr_ifindex == dif ||
                     (sdif && pmc->multi.imr_ifindex == sdif)))
                        break;
        }
        ret = inet_test_bit(MC_ALL, sk);
        if (!pmc)
                goto unlock;
        psl = rcu_dereference(pmc->sflist);
        ret = (pmc->sfmode == MCAST_EXCLUDE);
        if (!psl)
                goto unlock;

        for (i = 0; i < psl->sl_count; i++) {
                if (psl->sl_addr[i] == rmt_addr)
                        break;
        }
        ret = 0;
        if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
                goto unlock;
        if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
                goto unlock;
        ret = 1;
unlock:
        rcu_read_unlock();
out:
        return ret;
}

/*
 *        A socket is closing.
 */

void ip_mc_drop_socket(struct sock *sk)
{
        struct inet_sock *inet = inet_sk(sk);
        struct ip_mc_socklist *iml;
        struct net *net = sock_net(sk);

        if (!inet->mc_list)
                return;

        rtnl_lock();
        while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
                struct in_device *in_dev;

                inet->mc_list = iml->next_rcu;
                in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
                (void) ip_mc_leave_src(sk, iml, in_dev);
                if (in_dev)
                        ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
                /* decrease mem now to avoid the memleak warning */
                atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
                kfree_rcu(iml, rcu);
        }
        rtnl_unlock();
}

/* called with rcu_read_lock() */
int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto)
{
        struct ip_mc_list *im;
        struct ip_mc_list __rcu **mc_hash;
        struct ip_sf_list *psf;
        int rv = 0;

        mc_hash = rcu_dereference(in_dev->mc_hash);
        if (mc_hash) {
                u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG);

                for (im = rcu_dereference(mc_hash[hash]);
                     im != NULL;
                     im = rcu_dereference(im->next_hash)) {
                        if (im->multiaddr == mc_addr)
                                break;
                }
        } else {
                for_each_pmc_rcu(in_dev, im) {
                        if (im->multiaddr == mc_addr)
                                break;
                }
        }
        if (im && proto == IPPROTO_IGMP) {
                rv = 1;
        } else if (im) {
                if (src_addr) {
                        spin_lock_bh(&im->lock);
                        for (psf = im->sources; psf; psf = psf->sf_next) {
                                if (psf->sf_inaddr == src_addr)
                                        break;
                        }
                        if (psf)
                                rv = psf->sf_count[MCAST_INCLUDE] ||
                                        psf->sf_count[MCAST_EXCLUDE] !=
                                        im->sfcount[MCAST_EXCLUDE];
                        else
                                rv = im->sfcount[MCAST_EXCLUDE] != 0;
                        spin_unlock_bh(&im->lock);
                } else
                        rv = 1; /* unspecified source; tentatively allow */
        }
        return rv;
}

#if defined(CONFIG_PROC_FS)
struct igmp_mc_iter_state {
        struct seq_net_private p;
        struct net_device *dev;
        struct in_device *in_dev;
};

#define        igmp_mc_seq_private(seq)        ((struct igmp_mc_iter_state *)(seq)->private)

static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
{
        struct net *net = seq_file_net(seq);
        struct ip_mc_list *im = NULL;
        struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);

        state->in_dev = NULL;
        for_each_netdev_rcu(net, state->dev) {
                struct in_device *in_dev;

                in_dev = __in_dev_get_rcu(state->dev);
                if (!in_dev)
                        continue;
                im = rcu_dereference(in_dev->mc_list);
                if (im) {
                        state->in_dev = in_dev;
                        break;
                }
        }
        return im;
}

static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
{
        struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);

        im = rcu_dereference(im->next_rcu);
        while (!im) {
                state->dev = next_net_device_rcu(state->dev);
                if (!state->dev) {
                        state->in_dev = NULL;
                        break;
                }
                state->in_dev = __in_dev_get_rcu(state->dev);
                if (!state->in_dev)
                        continue;
                im = rcu_dereference(state->in_dev->mc_list);
        }
        return im;
}

static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
{
        struct ip_mc_list *im = igmp_mc_get_first(seq);
        if (im)
                while (pos && (im = igmp_mc_get_next(seq, im)) != NULL)
                        --pos;
        return pos ? NULL : im;
}

static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(rcu)
{
        rcu_read_lock();
        return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}

static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct ip_mc_list *im;
        if (v == SEQ_START_TOKEN)
                im = igmp_mc_get_first(seq);
        else
                im = igmp_mc_get_next(seq, v);
        ++*pos;
        return im;
}

static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
        __releases(rcu)
{
        struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);

        state->in_dev = NULL;
        state->dev = NULL;
        rcu_read_unlock();
}

static int igmp_mc_seq_show(struct seq_file *seq, void *v)
{
        if (v == SEQ_START_TOKEN)
                seq_puts(seq,
                         "Idx\tDevice    : Count Querier\tGroup    Users Timer\tReporter\n");
        else {
                struct ip_mc_list *im = v;
                struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
                char   *querier;
                long delta;

#ifdef CONFIG_IP_MULTICAST
                querier = IGMP_V1_SEEN(state->in_dev) ? "V1" :
                          IGMP_V2_SEEN(state->in_dev) ? "V2" :
                          "V3";
#else
                querier = "NONE";
#endif

                if (rcu_access_pointer(state->in_dev->mc_list) == im) {
                        seq_printf(seq, "%d\t%-10s: %5d %7s\n",
                                   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
                }

                delta = im->timer.expires - jiffies;
                seq_printf(seq,
                           "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
                           im->multiaddr, im->users,
                           im->tm_running,
                           im->tm_running ? jiffies_delta_to_clock_t(delta) : 0,
                           im->reporter);
        }
        return 0;
}

static const struct seq_operations igmp_mc_seq_ops = {
        .start        =        igmp_mc_seq_start,
        .next        =        igmp_mc_seq_next,
        .stop        =        igmp_mc_seq_stop,
        .show        =        igmp_mc_seq_show,
};

struct igmp_mcf_iter_state {
        struct seq_net_private p;
        struct net_device *dev;
        struct in_device *idev;
        struct ip_mc_list *im;
};

#define igmp_mcf_seq_private(seq)        ((struct igmp_mcf_iter_state *)(seq)->private)

static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
{
        struct net *net = seq_file_net(seq);
        struct ip_sf_list *psf = NULL;
        struct ip_mc_list *im = NULL;
        struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);

        state->idev = NULL;
        state->im = NULL;
        for_each_netdev_rcu(net, state->dev) {
                struct in_device *idev;
                idev = __in_dev_get_rcu(state->dev);
                if (unlikely(!idev))
                        continue;
                im = rcu_dereference(idev->mc_list);
                if (likely(im)) {
                        spin_lock_bh(&im->lock);
                        psf = im->sources;
                        if (likely(psf)) {
                                state->im = im;
                                state->idev = idev;
                                break;
                        }
                        spin_unlock_bh(&im->lock);
                }
        }
        return psf;
}

static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_list *psf)
{
        struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);

        psf = psf->sf_next;
        while (!psf) {
                spin_unlock_bh(&state->im->lock);
                state->im = state->im->next;
                while (!state->im) {
                        state->dev = next_net_device_rcu(state->dev);
                        if (!state->dev) {
                                state->idev = NULL;
                                goto out;
                        }
                        state->idev = __in_dev_get_rcu(state->dev);
                        if (!state->idev)
                                continue;
                        state->im = rcu_dereference(state->idev->mc_list);
                }
                spin_lock_bh(&state->im->lock);
                psf = state->im->sources;
        }
out:
        return psf;
}

static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos)
{
        struct ip_sf_list *psf = igmp_mcf_get_first(seq);
        if (psf)
                while (pos && (psf = igmp_mcf_get_next(seq, psf)) != NULL)
                        --pos;
        return pos ? NULL : psf;
}

static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(rcu)
{
        rcu_read_lock();
        return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}

static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct ip_sf_list *psf;
        if (v == SEQ_START_TOKEN)
                psf = igmp_mcf_get_first(seq);
        else
                psf = igmp_mcf_get_next(seq, v);
        ++*pos;
        return psf;
}

static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
        __releases(rcu)
{
        struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
        if (likely(state->im)) {
                spin_unlock_bh(&state->im->lock);
                state->im = NULL;
        }
        state->idev = NULL;
        state->dev = NULL;
        rcu_read_unlock();
}

static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
{
        struct ip_sf_list *psf = v;
        struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq, "Idx Device        MCA        SRC    INC    EXC\n");
        } else {
                seq_printf(seq,
                           "%3d %6.6s 0x%08x "
                           "0x%08x %6lu %6lu\n",
                           state->dev->ifindex, state->dev->name,
                           ntohl(state->im->multiaddr),
                           ntohl(psf->sf_inaddr),
                           psf->sf_count[MCAST_INCLUDE],
                           psf->sf_count[MCAST_EXCLUDE]);
        }
        return 0;
}

static const struct seq_operations igmp_mcf_seq_ops = {
        .start        =        igmp_mcf_seq_start,
        .next        =        igmp_mcf_seq_next,
        .stop        =        igmp_mcf_seq_stop,
        .show        =        igmp_mcf_seq_show,
};

static int __net_init igmp_net_init(struct net *net)
{
        struct proc_dir_entry *pde;
        int err;

        pde = proc_create_net("igmp", 0444, net->proc_net, &igmp_mc_seq_ops,
                        sizeof(struct igmp_mc_iter_state));
        if (!pde)
                goto out_igmp;
        pde = proc_create_net("mcfilter", 0444, net->proc_net,
                        &igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state));
        if (!pde)
                goto out_mcfilter;
        err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
                                   SOCK_DGRAM, 0, net);
        if (err < 0) {
                pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n",
                       err);
                goto out_sock;
        }

        return 0;

out_sock:
        remove_proc_entry("mcfilter", net->proc_net);
out_mcfilter:
        remove_proc_entry("igmp", net->proc_net);
out_igmp:
        return -ENOMEM;
}

static void __net_exit igmp_net_exit(struct net *net)
{
        remove_proc_entry("mcfilter", net->proc_net);
        remove_proc_entry("igmp", net->proc_net);
        inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk);
}

static struct pernet_operations igmp_net_ops = {
        .init = igmp_net_init,
        .exit = igmp_net_exit,
};
#endif

static int igmp_netdev_event(struct notifier_block *this,
                             unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct in_device *in_dev;

        switch (event) {
        case NETDEV_RESEND_IGMP:
                in_dev = __in_dev_get_rtnl(dev);
                if (in_dev)
                        ip_mc_rejoin_groups(in_dev);
                break;
        default:
                break;
        }
        return NOTIFY_DONE;
}

static struct notifier_block igmp_notifier = {
        .notifier_call = igmp_netdev_event,
};

int __init igmp_mc_init(void)
{
#if defined(CONFIG_PROC_FS)
        int err;

        err = register_pernet_subsys(&igmp_net_ops);
        if (err)
                return err;
        err = register_netdevice_notifier(&igmp_notifier);
        if (err)
                goto reg_notif_fail;
        return 0;

reg_notif_fail:
        unregister_pernet_subsys(&igmp_net_ops);
        return err;
#else
        return register_netdevice_notifier(&igmp_notifier);
#endif
}


















  105 





  105 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_PGTABLE_INVERT_H
#define _ASM_PGTABLE_INVERT_H 1

#ifndef __ASSEMBLY__

/*
 * A clear pte value is special, and doesn't get inverted.
 *
 * Note that even users that only pass a pgprot_t (rather
 * than a full pte) won't trigger the special zero case,
 * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
 * set. So the all zero case really is limited to just the
 * cleared page table entry case.
 */
static inline bool __pte_needs_invert(u64 val)
{
        return val && !(val & _PAGE_PRESENT);
}

/* Get a mask to xor with the page table entry to get the correct pfn. */
static inline u64 protnone_mask(u64 val)
{
        return __pte_needs_invert(val) ?  ~0ull : 0;
}

static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
{
        /*
         * When a PTE transitions from NONE to !NONE or vice-versa
         * invert the PFN part to stop speculation.
         * pte_pfn undoes this when needed.
         */
        if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
                val = (val & ~mask) | (~val & mask);
        return val;
}

#endif /* __ASSEMBLY__ */

#endif

















































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
// SPDX-License-Identifier: GPL-2.0
/*
 * consolemap.c
 *
 * Mapping from internal code (such as Latin-1 or Unicode or IBM PC code)
 * to font positions.
 *
 * aeb, 950210
 *
 * Support for multiple unimaps by Jakub Jelinek <jj@ultra.linux.cz>, July 1998
 *
 * Fix bug in inverse translation. Stanislav Voronyi <stas@cnti.uanet.kharkov.ua>, Dec 1998
 *
 * In order to prevent the following circular lock dependency:
 *   &mm->mmap_lock --> cpu_hotplug.lock --> console_lock --> &mm->mmap_lock
 *
 * We cannot allow page fault to happen while holding the console_lock.
 * Therefore, all the userspace copy operations have to be done outside
 * the console_lock critical sections.
 *
 * As all the affected functions are all called directly from vt_ioctl(), we
 * can allocate some small buffers directly on stack without worrying about
 * stack overflow.
 */

#include <linux/bitfield.h>
#include <linux/bits.h>
#include <linux/module.h>
#include <linux/kd.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/tty.h>
#include <linux/uaccess.h>
#include <linux/console.h>
#include <linux/consolemap.h>
#include <linux/vt_kern.h>
#include <linux/string.h>

static unsigned short translations[][E_TABSZ] = {
  /* 8-bit Latin-1 mapped to Unicode -- trivial mapping */
  [LAT1_MAP] = {
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
  },
  /* VT100 graphics mapped to Unicode */
  [GRAF_MAP] = {
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    0x0028, 0x0029, 0x002a, 0x2192, 0x2190, 0x2191, 0x2193, 0x002f,
    0x2588, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x00a0,
    0x25c6, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
    0x2591, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
    0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
    0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x007f,
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
  },
  /* IBM Codepage 437 mapped to Unicode */
  [IBMPC_MAP] = {
    0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
    0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
    0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
    0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
    0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
    0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2302,
    0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
    0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
    0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
    0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
    0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
    0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
    0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
    0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
    0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
    0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
    0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
    0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
    0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4,
    0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229,
    0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
    0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0
  },
  /* User mapping -- default to codes for direct font mapping */
  [USER_MAP] = {
    0xf000, 0xf001, 0xf002, 0xf003, 0xf004, 0xf005, 0xf006, 0xf007,
    0xf008, 0xf009, 0xf00a, 0xf00b, 0xf00c, 0xf00d, 0xf00e, 0xf00f,
    0xf010, 0xf011, 0xf012, 0xf013, 0xf014, 0xf015, 0xf016, 0xf017,
    0xf018, 0xf019, 0xf01a, 0xf01b, 0xf01c, 0xf01d, 0xf01e, 0xf01f,
    0xf020, 0xf021, 0xf022, 0xf023, 0xf024, 0xf025, 0xf026, 0xf027,
    0xf028, 0xf029, 0xf02a, 0xf02b, 0xf02c, 0xf02d, 0xf02e, 0xf02f,
    0xf030, 0xf031, 0xf032, 0xf033, 0xf034, 0xf035, 0xf036, 0xf037,
    0xf038, 0xf039, 0xf03a, 0xf03b, 0xf03c, 0xf03d, 0xf03e, 0xf03f,
    0xf040, 0xf041, 0xf042, 0xf043, 0xf044, 0xf045, 0xf046, 0xf047,
    0xf048, 0xf049, 0xf04a, 0xf04b, 0xf04c, 0xf04d, 0xf04e, 0xf04f,
    0xf050, 0xf051, 0xf052, 0xf053, 0xf054, 0xf055, 0xf056, 0xf057,
    0xf058, 0xf059, 0xf05a, 0xf05b, 0xf05c, 0xf05d, 0xf05e, 0xf05f,
    0xf060, 0xf061, 0xf062, 0xf063, 0xf064, 0xf065, 0xf066, 0xf067,
    0xf068, 0xf069, 0xf06a, 0xf06b, 0xf06c, 0xf06d, 0xf06e, 0xf06f,
    0xf070, 0xf071, 0xf072, 0xf073, 0xf074, 0xf075, 0xf076, 0xf077,
    0xf078, 0xf079, 0xf07a, 0xf07b, 0xf07c, 0xf07d, 0xf07e, 0xf07f,
    0xf080, 0xf081, 0xf082, 0xf083, 0xf084, 0xf085, 0xf086, 0xf087,
    0xf088, 0xf089, 0xf08a, 0xf08b, 0xf08c, 0xf08d, 0xf08e, 0xf08f,
    0xf090, 0xf091, 0xf092, 0xf093, 0xf094, 0xf095, 0xf096, 0xf097,
    0xf098, 0xf099, 0xf09a, 0xf09b, 0xf09c, 0xf09d, 0xf09e, 0xf09f,
    0xf0a0, 0xf0a1, 0xf0a2, 0xf0a3, 0xf0a4, 0xf0a5, 0xf0a6, 0xf0a7,
    0xf0a8, 0xf0a9, 0xf0aa, 0xf0ab, 0xf0ac, 0xf0ad, 0xf0ae, 0xf0af,
    0xf0b0, 0xf0b1, 0xf0b2, 0xf0b3, 0xf0b4, 0xf0b5, 0xf0b6, 0xf0b7,
    0xf0b8, 0xf0b9, 0xf0ba, 0xf0bb, 0xf0bc, 0xf0bd, 0xf0be, 0xf0bf,
    0xf0c0, 0xf0c1, 0xf0c2, 0xf0c3, 0xf0c4, 0xf0c5, 0xf0c6, 0xf0c7,
    0xf0c8, 0xf0c9, 0xf0ca, 0xf0cb, 0xf0cc, 0xf0cd, 0xf0ce, 0xf0cf,
    0xf0d0, 0xf0d1, 0xf0d2, 0xf0d3, 0xf0d4, 0xf0d5, 0xf0d6, 0xf0d7,
    0xf0d8, 0xf0d9, 0xf0da, 0xf0db, 0xf0dc, 0xf0dd, 0xf0de, 0xf0df,
    0xf0e0, 0xf0e1, 0xf0e2, 0xf0e3, 0xf0e4, 0xf0e5, 0xf0e6, 0xf0e7,
    0xf0e8, 0xf0e9, 0xf0ea, 0xf0eb, 0xf0ec, 0xf0ed, 0xf0ee, 0xf0ef,
    0xf0f0, 0xf0f1, 0xf0f2, 0xf0f3, 0xf0f4, 0xf0f5, 0xf0f6, 0xf0f7,
    0xf0f8, 0xf0f9, 0xf0fa, 0xf0fb, 0xf0fc, 0xf0fd, 0xf0fe, 0xf0ff
  }
};

/* The standard kernel character-to-font mappings are not invertible
   -- this is just a best effort. */

#define MAX_GLYPH 512                /* Max possible glyph value */

static enum translation_map inv_translate[MAX_NR_CONSOLES];

#define UNI_DIRS        32U
#define UNI_DIR_ROWS        32U
#define UNI_ROW_GLYPHS        64U

#define UNI_DIR_BITS                GENMASK(15, 11)
#define UNI_ROW_BITS                GENMASK(10,  6)
#define UNI_GLYPH_BITS                GENMASK( 5,  0)

#define UNI_DIR(uni)                FIELD_GET(UNI_DIR_BITS, (uni))
#define UNI_ROW(uni)                FIELD_GET(UNI_ROW_BITS, (uni))
#define UNI_GLYPH(uni)                FIELD_GET(UNI_GLYPH_BITS, (uni))

#define UNI(dir, row, glyph)        (FIELD_PREP(UNI_DIR_BITS, (dir)) | \
                                 FIELD_PREP(UNI_ROW_BITS, (row)) | \
                                 FIELD_PREP(UNI_GLYPH_BITS, (glyph)))

/**
 * struct uni_pagedict - unicode directory
 *
 * @uni_pgdir: 32*32*64 table with glyphs
 * @refcount: reference count of this structure
 * @sum: checksum
 * @inverse_translations: best-effort inverse mapping
 * @inverse_trans_unicode: best-effort inverse mapping to unicode
 */
struct uni_pagedict {
        u16                **uni_pgdir[UNI_DIRS];
        unsigned long        refcount;
        unsigned long        sum;
        unsigned char        *inverse_translations[LAST_MAP + 1];
        u16                *inverse_trans_unicode;
};

static struct uni_pagedict *dflt;

static void set_inverse_transl(struct vc_data *conp, struct uni_pagedict *dict,
               enum translation_map m)
{
        unsigned short *t = translations[m];
        unsigned char *inv;

        if (!dict)
                return;
        inv = dict->inverse_translations[m];

        if (!inv) {
                inv = dict->inverse_translations[m] = kmalloc(MAX_GLYPH,
                                GFP_KERNEL);
                if (!inv)
                        return;
        }
        memset(inv, 0, MAX_GLYPH);

        for (unsigned int ch = 0; ch < ARRAY_SIZE(translations[m]); ch++) {
                int glyph = conv_uni_to_pc(conp, t[ch]);
                if (glyph >= 0 && glyph < MAX_GLYPH && inv[glyph] < 32) {
                        /* prefer '-' above SHY etc. */
                        inv[glyph] = ch;
                }
        }
}

static void set_inverse_trans_unicode(struct uni_pagedict *dict)
{
        unsigned int d, r, g;
        u16 *inv;

        if (!dict)
                return;

        inv = dict->inverse_trans_unicode;
        if (!inv) {
                inv = dict->inverse_trans_unicode = kmalloc_array(MAX_GLYPH,
                                sizeof(*inv), GFP_KERNEL);
                if (!inv)
                        return;
        }
        memset(inv, 0, MAX_GLYPH * sizeof(*inv));

        for (d = 0; d < UNI_DIRS; d++) {
                u16 **dir = dict->uni_pgdir[d];
                if (!dir)
                        continue;
                for (r = 0; r < UNI_DIR_ROWS; r++) {
                        u16 *row = dir[r];
                        if (!row)
                                continue;
                        for (g = 0; g < UNI_ROW_GLYPHS; g++) {
                                u16 glyph = row[g];
                                if (glyph < MAX_GLYPH && inv[glyph] < 32)
                                        inv[glyph] = UNI(d, r, g);
                        }
                }
        }
}

unsigned short *set_translate(enum translation_map m, struct vc_data *vc)
{
        inv_translate[vc->vc_num] = m;
        return translations[m];
}

/*
 * Inverse translation is impossible for several reasons:
 * 1. The font<->character maps are not 1-1.
 * 2. The text may have been written while a different translation map
 *    was active.
 * Still, it is now possible to a certain extent to cut and paste non-ASCII.
 */
u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode)
{
        struct uni_pagedict *p;
        enum translation_map m;

        if (glyph >= MAX_GLYPH)
                return 0;

        p = *conp->uni_pagedict_loc;
        if (!p)
                return glyph;

        if (use_unicode) {
                if (!p->inverse_trans_unicode)
                        return glyph;

                return p->inverse_trans_unicode[glyph];
        }

        m = inv_translate[conp->vc_num];
        if (!p->inverse_translations[m])
                return glyph;

        return p->inverse_translations[m][glyph];
}
EXPORT_SYMBOL_GPL(inverse_translate);

static void update_user_maps(void)
{
        int i;
        struct uni_pagedict *p, *q = NULL;

        for (i = 0; i < MAX_NR_CONSOLES; i++) {
                if (!vc_cons_allocated(i))
                        continue;
                p = *vc_cons[i].d->uni_pagedict_loc;
                if (p && p != q) {
                        set_inverse_transl(vc_cons[i].d, p, USER_MAP);
                        set_inverse_trans_unicode(p);
                        q = p;
                }
        }
}

/*
 * Load customizable translation table
 * arg points to a 256 byte translation table.
 *
 * The "old" variants are for translation directly to font (using the
 * 0xf000-0xf0ff "transparent" Unicodes) whereas the "new" variants set
 * Unicodes explicitly.
 */
int con_set_trans_old(unsigned char __user * arg)
{
        unsigned short inbuf[E_TABSZ];
        unsigned int i;
        unsigned char ch;

        for (i = 0; i < ARRAY_SIZE(inbuf); i++) {
                if (get_user(ch, &arg[i]))
                        return -EFAULT;
                inbuf[i] = UNI_DIRECT_BASE | ch;
        }

        console_lock();
        memcpy(translations[USER_MAP], inbuf, sizeof(inbuf));
        update_user_maps();
        console_unlock();
        return 0;
}

int con_get_trans_old(unsigned char __user * arg)
{
        int i, ch;
        unsigned short *p = translations[USER_MAP];
        unsigned char outbuf[E_TABSZ];

        console_lock();
        for (i = 0; i < ARRAY_SIZE(outbuf); i++)
        {
                ch = conv_uni_to_pc(vc_cons[fg_console].d, p[i]);
                outbuf[i] = (ch & ~0xff) ? 0 : ch;
        }
        console_unlock();

        return copy_to_user(arg, outbuf, sizeof(outbuf)) ? -EFAULT : 0;
}

int con_set_trans_new(ushort __user * arg)
{
        unsigned short inbuf[E_TABSZ];

        if (copy_from_user(inbuf, arg, sizeof(inbuf)))
                return -EFAULT;

        console_lock();
        memcpy(translations[USER_MAP], inbuf, sizeof(inbuf));
        update_user_maps();
        console_unlock();
        return 0;
}

int con_get_trans_new(ushort __user * arg)
{
        unsigned short outbuf[E_TABSZ];

        console_lock();
        memcpy(outbuf, translations[USER_MAP], sizeof(outbuf));
        console_unlock();

        return copy_to_user(arg, outbuf, sizeof(outbuf)) ? -EFAULT : 0;
}

/*
 * Unicode -> current font conversion
 *
 * A font has at most 512 chars, usually 256.
 * But one font position may represent several Unicode chars.
 * A hashtable is somewhat of a pain to deal with, so use a
 * "paged table" instead.  Simulation has shown the memory cost of
 * this 3-level paged table scheme to be comparable to a hash table.
 */

extern u8 dfont_unicount[];        /* Defined in console_defmap.c */
extern u16 dfont_unitable[];

static void con_release_unimap(struct uni_pagedict *dict)
{
        unsigned int d, r;

        if (dict == dflt)
                dflt = NULL;

        for (d = 0; d < UNI_DIRS; d++) {
                u16 **dir = dict->uni_pgdir[d];
                if (dir != NULL) {
                        for (r = 0; r < UNI_DIR_ROWS; r++)
                                kfree(dir[r]);
                        kfree(dir);
                }
                dict->uni_pgdir[d] = NULL;
        }

        for (r = 0; r < ARRAY_SIZE(dict->inverse_translations); r++) {
                kfree(dict->inverse_translations[r]);
                dict->inverse_translations[r] = NULL;
        }

        kfree(dict->inverse_trans_unicode);
        dict->inverse_trans_unicode = NULL;
}

/* Caller must hold the console lock */
void con_free_unimap(struct vc_data *vc)
{
        struct uni_pagedict *p;

        p = *vc->uni_pagedict_loc;
        if (!p)
                return;
        *vc->uni_pagedict_loc = NULL;
        if (--p->refcount)
                return;
        con_release_unimap(p);
        kfree(p);
}

static int con_unify_unimap(struct vc_data *conp, struct uni_pagedict *dict1)
{
        struct uni_pagedict *dict2;
        unsigned int cons, d, r;

        for (cons = 0; cons < MAX_NR_CONSOLES; cons++) {
                if (!vc_cons_allocated(cons))
                        continue;
                dict2 = *vc_cons[cons].d->uni_pagedict_loc;
                if (!dict2 || dict2 == dict1 || dict2->sum != dict1->sum)
                        continue;
                for (d = 0; d < UNI_DIRS; d++) {
                        u16 **dir1 = dict1->uni_pgdir[d];
                        u16 **dir2 = dict2->uni_pgdir[d];
                        if (!dir1 && !dir2)
                                continue;
                        if (!dir1 || !dir2)
                                break;
                        for (r = 0; r < UNI_DIR_ROWS; r++) {
                                if (!dir1[r] && !dir2[r])
                                        continue;
                                if (!dir1[r] || !dir2[r])
                                        break;
                                if (memcmp(dir1[r], dir2[r], UNI_ROW_GLYPHS *
                                                        sizeof(*dir1[r])))
                                        break;
                        }
                        if (r < UNI_DIR_ROWS)
                                break;
                }
                if (d == UNI_DIRS) {
                        dict2->refcount++;
                        *conp->uni_pagedict_loc = dict2;
                        con_release_unimap(dict1);
                        kfree(dict1);
                        return 1;
                }
        }
        return 0;
}

static int
con_insert_unipair(struct uni_pagedict *p, u_short unicode, u_short fontpos)
{
        u16 **dir, *row;
        unsigned int n;

        n = UNI_DIR(unicode);
        dir = p->uni_pgdir[n];
        if (!dir) {
                dir = p->uni_pgdir[n] = kcalloc(UNI_DIR_ROWS, sizeof(*dir),
                                GFP_KERNEL);
                if (!dir)
                        return -ENOMEM;
        }

        n = UNI_ROW(unicode);
        row = dir[n];
        if (!row) {
                row = dir[n] = kmalloc_array(UNI_ROW_GLYPHS, sizeof(*row),
                                GFP_KERNEL);
                if (!row)
                        return -ENOMEM;
                /* No glyphs for the characters (yet) */
                memset(row, 0xff, UNI_ROW_GLYPHS * sizeof(*row));
        }

        row[UNI_GLYPH(unicode)] = fontpos;

        p->sum += (fontpos << 20U) + unicode;

        return 0;
}

static int con_allocate_new(struct vc_data *vc)
{
        struct uni_pagedict *new, *old = *vc->uni_pagedict_loc;

        new = kzalloc(sizeof(*new), GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        new->refcount = 1;
        *vc->uni_pagedict_loc = new;

        if (old)
                old->refcount--;

        return 0;
}

/* Caller must hold the lock */
static int con_do_clear_unimap(struct vc_data *vc)
{
        struct uni_pagedict *old = *vc->uni_pagedict_loc;

        if (!old || old->refcount > 1)
                return con_allocate_new(vc);

        old->sum = 0;
        con_release_unimap(old);

        return 0;
}

int con_clear_unimap(struct vc_data *vc)
{
        int ret;
        console_lock();
        ret = con_do_clear_unimap(vc);
        console_unlock();
        return ret;
}

static struct uni_pagedict *con_unshare_unimap(struct vc_data *vc,
                struct uni_pagedict *old)
{
        struct uni_pagedict *new;
        unsigned int d, r, g;
        int ret;
        u16 uni = 0;

        ret = con_allocate_new(vc);
        if (ret)
                return ERR_PTR(ret);

        new = *vc->uni_pagedict_loc;

        /*
         * uni_pgdir is a 32*32*64 table with rows allocated when its first
         * entry is added. The unicode value must still be incremented for
         * empty rows. We are copying entries from "old" to "new".
         */
        for (d = 0; d < UNI_DIRS; d++) {
                u16 **dir = old->uni_pgdir[d];
                if (!dir) {
                        /* Account for empty table */
                        uni += UNI_DIR_ROWS * UNI_ROW_GLYPHS;
                        continue;
                }

                for (r = 0; r < UNI_DIR_ROWS; r++) {
                        u16 *row = dir[r];
                        if (!row) {
                                /* Account for row of 64 empty entries */
                                uni += UNI_ROW_GLYPHS;
                                continue;
                        }

                        for (g = 0; g < UNI_ROW_GLYPHS; g++, uni++) {
                                if (row[g] == 0xffff)
                                        continue;
                                /*
                                 * Found one, copy entry for unicode uni with
                                 * fontpos value row[g].
                                 */
                                ret = con_insert_unipair(new, uni, row[g]);
                                if (ret) {
                                        old->refcount++;
                                        *vc->uni_pagedict_loc = old;
                                        con_release_unimap(new);
                                        kfree(new);
                                        return ERR_PTR(ret);
                                }
                        }
                }
        }

        return new;
}

int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
{
        int err = 0, err1;
        struct uni_pagedict *dict;
        struct unipair *unilist, *plist;

        if (!ct)
                return 0;

        unilist = vmemdup_array_user(list, ct, sizeof(*unilist));
        if (IS_ERR(unilist))
                return PTR_ERR(unilist);

        console_lock();

        /* Save original vc_unipagdir_loc in case we allocate a new one */
        dict = *vc->uni_pagedict_loc;
        if (!dict) {
                err = -EINVAL;
                goto out_unlock;
        }

        if (dict->refcount > 1) {
                dict = con_unshare_unimap(vc, dict);
                if (IS_ERR(dict)) {
                        err = PTR_ERR(dict);
                        goto out_unlock;
                }
        } else if (dict == dflt) {
                dflt = NULL;
        }

        /*
         * Insert user specified unicode pairs into new table.
         */
        for (plist = unilist; ct; ct--, plist++) {
                err1 = con_insert_unipair(dict, plist->unicode, plist->fontpos);
                if (err1)
                        err = err1;
        }

        /*
         * Merge with fontmaps of any other virtual consoles.
         */
        if (con_unify_unimap(vc, dict))
                goto out_unlock;

        for (enum translation_map m = FIRST_MAP; m <= LAST_MAP; m++)
                set_inverse_transl(vc, dict, m);
        set_inverse_trans_unicode(dict);

out_unlock:
        console_unlock();
        kvfree(unilist);
        return err;
}

/**
 *        con_set_default_unimap        -        set default unicode map
 *        @vc: the console we are updating
 *
 *        Loads the unimap for the hardware font, as defined in uni_hash.tbl.
 *        The representation used was the most compact I could come up
 *        with.  This routine is executed at video setup, and when the
 *        PIO_FONTRESET ioctl is called.
 *
 *        The caller must hold the console lock
 */
int con_set_default_unimap(struct vc_data *vc)
{
        struct uni_pagedict *dict;
        unsigned int fontpos, count;
        int err = 0, err1;
        u16 *dfont;

        if (dflt) {
                dict = *vc->uni_pagedict_loc;
                if (dict == dflt)
                        return 0;

                dflt->refcount++;
                *vc->uni_pagedict_loc = dflt;
                if (dict && !--dict->refcount) {
                        con_release_unimap(dict);
                        kfree(dict);
                }
                return 0;
        }

        /* The default font is always 256 characters */

        err = con_do_clear_unimap(vc);
        if (err)
                return err;

        dict = *vc->uni_pagedict_loc;
        dfont = dfont_unitable;

        for (fontpos = 0; fontpos < 256U; fontpos++)
                for (count = dfont_unicount[fontpos]; count; count--) {
                        err1 = con_insert_unipair(dict, *(dfont++), fontpos);
                        if (err1)
                                err = err1;
                }

        if (con_unify_unimap(vc, dict)) {
                dflt = *vc->uni_pagedict_loc;
                return err;
        }

        for (enum translation_map m = FIRST_MAP; m <= LAST_MAP; m++)
                set_inverse_transl(vc, dict, m);
        set_inverse_trans_unicode(dict);
        dflt = dict;
        return err;
}
EXPORT_SYMBOL(con_set_default_unimap);

/**
 *        con_copy_unimap                -        copy unimap between two vts
 *        @dst_vc: target
 *        @src_vc: source
 *
 *        The caller must hold the console lock when invoking this method
 */
int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc)
{
        struct uni_pagedict *src;

        if (!*src_vc->uni_pagedict_loc)
                return -EINVAL;
        if (*dst_vc->uni_pagedict_loc == *src_vc->uni_pagedict_loc)
                return 0;
        con_free_unimap(dst_vc);
        src = *src_vc->uni_pagedict_loc;
        src->refcount++;
        *dst_vc->uni_pagedict_loc = src;
        return 0;
}
EXPORT_SYMBOL(con_copy_unimap);

/*
 *        con_get_unimap                -        get the unicode map
 *
 *        Read the console unicode data for this console. Called from the ioctl
 *        handlers.
 */
int con_get_unimap(struct vc_data *vc, ushort ct, ushort __user *uct,
                struct unipair __user *list)
{
        ushort ect;
        struct uni_pagedict *dict;
        struct unipair *unilist;
        unsigned int d, r, g;
        int ret = 0;

        unilist = kvmalloc_array(ct, sizeof(*unilist), GFP_KERNEL);
        if (!unilist)
                return -ENOMEM;

        console_lock();

        ect = 0;
        dict = *vc->uni_pagedict_loc;
        if (!dict)
                goto unlock;

        for (d = 0; d < UNI_DIRS; d++) {
                u16 **dir = dict->uni_pgdir[d];
                if (!dir)
                        continue;

                for (r = 0; r < UNI_DIR_ROWS; r++) {
                        u16 *row = dir[r];
                        if (!row)
                                continue;

                        for (g = 0; g < UNI_ROW_GLYPHS; g++, row++) {
                                if (*row >= MAX_GLYPH)
                                        continue;
                                if (ect < ct) {
                                        unilist[ect].unicode = UNI(d, r, g);
                                        unilist[ect].fontpos = *row;
                                }
                                ect++;
                        }
                }
        }
unlock:
        console_unlock();
        if (copy_to_user(list, unilist, min(ect, ct) * sizeof(*unilist)))
                ret = -EFAULT;
        if (put_user(ect, uct))
                ret = -EFAULT;
        kvfree(unilist);
        return ret ? ret : (ect <= ct) ? 0 : -ENOMEM;
}

/*
 * Always use USER_MAP. These functions are used by the keyboard,
 * which shouldn't be affected by G0/G1 switching, etc.
 * If the user map still contains default values, i.e. the
 * direct-to-font mapping, then assume user is using Latin1.
 *
 * FIXME: at some point we need to decide if we want to lock the table
 * update element itself via the keyboard_event_lock for consistency with the
 * keyboard driver as well as the consoles
 */
/* may be called during an interrupt */
u32 conv_8bit_to_uni(unsigned char c)
{
        unsigned short uni = translations[USER_MAP][c];
        return uni == (0xf000 | c) ? c : uni;
}

int conv_uni_to_8bit(u32 uni)
{
        int c;
        for (c = 0; c < ARRAY_SIZE(translations[USER_MAP]); c++)
                if (translations[USER_MAP][c] == uni ||
                   (translations[USER_MAP][c] == (c | 0xf000) && uni == c))
                        return c;
        return -1;
}

int conv_uni_to_pc(struct vc_data *conp, long ucs)
{
        struct uni_pagedict *dict;
        u16 **dir, *row, glyph;

        /* Only 16-bit codes supported at this time */
        if (ucs > 0xffff)
                return -4;                /* Not found */
        else if (ucs < 0x20)
                return -1;                /* Not a printable character */
        else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f))
                return -2;                        /* Zero-width space */
        /*
         * UNI_DIRECT_BASE indicates the start of the region in the User Zone
         * which always has a 1:1 mapping to the currently loaded font.  The
         * UNI_DIRECT_MASK indicates the bit span of the region.
         */
        else if ((ucs & ~UNI_DIRECT_MASK) == UNI_DIRECT_BASE)
                return ucs & UNI_DIRECT_MASK;

        dict = *conp->uni_pagedict_loc;
        if (!dict)
                return -3;

        dir = dict->uni_pgdir[UNI_DIR(ucs)];
        if (!dir)
                return -4;

        row = dir[UNI_ROW(ucs)];
        if (!row)
                return -4;

        glyph = row[UNI_GLYPH(ucs)];
        if (glyph >= MAX_GLYPH)
                return -4;

        return glyph;
}

/*
 * This is called at sys_setup time, after memory and the console are
 * initialized.  It must be possible to call kmalloc(..., GFP_KERNEL)
 * from this function, hence the call from sys_setup.
 */
void __init
console_map_init(void)
{
        int i;

        for (i = 0; i < MAX_NR_CONSOLES; i++)
                if (vc_cons_allocated(i) && !*vc_cons[i].d->uni_pagedict_loc)
                        con_set_default_unimap(vc_cons[i].d);
}







































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 
    3 
    3 



    3 
    3 



    3 
    3 


    3 
    3 


    3 






    1 



















































    3 
    3 


    3 
    3 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 

    1 


    1 

    1 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 
    1 

    1 












































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
// SPDX-License-Identifier: GPL-2.0-only
/*
 *
 *  Copyright (C) 2005 Mike Isely <isely@pobox.com>
 */

#include <linux/errno.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/firmware.h>
#include <linux/videodev2.h>
#include <media/v4l2-common.h>
#include <media/tuner.h>
#include "pvrusb2.h"
#include "pvrusb2-std.h"
#include "pvrusb2-util.h"
#include "pvrusb2-hdw.h"
#include "pvrusb2-i2c-core.h"
#include "pvrusb2-eeprom.h"
#include "pvrusb2-hdw-internal.h"
#include "pvrusb2-encoder.h"
#include "pvrusb2-debug.h"
#include "pvrusb2-fx2-cmd.h"
#include "pvrusb2-wm8775.h"
#include "pvrusb2-video-v4l.h"
#include "pvrusb2-cx2584x-v4l.h"
#include "pvrusb2-cs53l32a.h"
#include "pvrusb2-audio.h"

#define TV_MIN_FREQ     55250000L
#define TV_MAX_FREQ    850000000L

/* This defines a minimum interval that the decoder must remain quiet
   before we are allowed to start it running. */
#define TIME_MSEC_DECODER_WAIT 50

/* This defines a minimum interval that the decoder must be allowed to run
   before we can safely begin using its streaming output. */
#define TIME_MSEC_DECODER_STABILIZATION_WAIT 300

/* This defines a minimum interval that the encoder must remain quiet
   before we are allowed to configure it. */
#define TIME_MSEC_ENCODER_WAIT 50

/* This defines the minimum interval that the encoder must successfully run
   before we consider that the encoder has run at least once since its
   firmware has been loaded.  This measurement is in important for cases
   where we can't do something until we know that the encoder has been run
   at least once. */
#define TIME_MSEC_ENCODER_OK 250

static struct pvr2_hdw *unit_pointers[PVR_NUM] = {[ 0 ... PVR_NUM-1 ] = NULL};
static DEFINE_MUTEX(pvr2_unit_mtx);

static int ctlchg;
static int procreload;
static int tuner[PVR_NUM] = { [0 ... PVR_NUM-1] = -1 };
static int tolerance[PVR_NUM] = { [0 ... PVR_NUM-1] = 0 };
static int video_std[PVR_NUM] = { [0 ... PVR_NUM-1] = 0 };
static int init_pause_msec;

module_param(ctlchg, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(ctlchg, "0=optimize ctl change 1=always accept new ctl value");
module_param(init_pause_msec, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(init_pause_msec, "hardware initialization settling delay");
module_param(procreload, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(procreload,
                 "Attempt init failure recovery with firmware reload");
module_param_array(tuner,    int, NULL, 0444);
MODULE_PARM_DESC(tuner,"specify installed tuner type");
module_param_array(video_std,    int, NULL, 0444);
MODULE_PARM_DESC(video_std,"specify initial video standard");
module_param_array(tolerance,    int, NULL, 0444);
MODULE_PARM_DESC(tolerance,"specify stream error tolerance");

/* US Broadcast channel 3 (61.25 MHz), to help with testing */
static int default_tv_freq    = 61250000L;
/* 104.3 MHz, a usable FM station for my area */
static int default_radio_freq = 104300000L;

module_param_named(tv_freq, default_tv_freq, int, 0444);
MODULE_PARM_DESC(tv_freq, "specify initial television frequency");
module_param_named(radio_freq, default_radio_freq, int, 0444);
MODULE_PARM_DESC(radio_freq, "specify initial radio frequency");

#define PVR2_CTL_WRITE_ENDPOINT  0x01
#define PVR2_CTL_READ_ENDPOINT   0x81

#define PVR2_GPIO_IN 0x9008
#define PVR2_GPIO_OUT 0x900c
#define PVR2_GPIO_DIR 0x9020

#define trace_firmware(...) pvr2_trace(PVR2_TRACE_FIRMWARE,__VA_ARGS__)

#define PVR2_FIRMWARE_ENDPOINT   0x02

/* size of a firmware chunk */
#define FIRMWARE_CHUNK_SIZE 0x2000

typedef void (*pvr2_subdev_update_func)(struct pvr2_hdw *,
                                        struct v4l2_subdev *);

static const pvr2_subdev_update_func pvr2_module_update_functions[] = {
        [PVR2_CLIENT_ID_WM8775] = pvr2_wm8775_subdev_update,
        [PVR2_CLIENT_ID_SAA7115] = pvr2_saa7115_subdev_update,
        [PVR2_CLIENT_ID_MSP3400] = pvr2_msp3400_subdev_update,
        [PVR2_CLIENT_ID_CX25840] = pvr2_cx25840_subdev_update,
        [PVR2_CLIENT_ID_CS53L32A] = pvr2_cs53l32a_subdev_update,
};

static const char *module_names[] = {
        [PVR2_CLIENT_ID_MSP3400] = "msp3400",
        [PVR2_CLIENT_ID_CX25840] = "cx25840",
        [PVR2_CLIENT_ID_SAA7115] = "saa7115",
        [PVR2_CLIENT_ID_TUNER] = "tuner",
        [PVR2_CLIENT_ID_DEMOD] = "tuner",
        [PVR2_CLIENT_ID_CS53L32A] = "cs53l32a",
        [PVR2_CLIENT_ID_WM8775] = "wm8775",
};


static const unsigned char *module_i2c_addresses[] = {
        [PVR2_CLIENT_ID_TUNER] = "\x60\x61\x62\x63",
        [PVR2_CLIENT_ID_DEMOD] = "\x43",
        [PVR2_CLIENT_ID_MSP3400] = "\x40",
        [PVR2_CLIENT_ID_SAA7115] = "\x21",
        [PVR2_CLIENT_ID_WM8775] = "\x1b",
        [PVR2_CLIENT_ID_CX25840] = "\x44",
        [PVR2_CLIENT_ID_CS53L32A] = "\x11",
};


static const char *ir_scheme_names[] = {
        [PVR2_IR_SCHEME_NONE] = "none",
        [PVR2_IR_SCHEME_29XXX] = "29xxx",
        [PVR2_IR_SCHEME_24XXX] = "24xxx (29xxx emulation)",
        [PVR2_IR_SCHEME_24XXX_MCE] = "24xxx (MCE device)",
        [PVR2_IR_SCHEME_ZILOG] = "Zilog",
};


/* Define the list of additional controls we'll dynamically construct based
   on query of the cx2341x module. */
struct pvr2_mpeg_ids {
        const char *strid;
        int id;
};
static const struct pvr2_mpeg_ids mpeg_ids[] = {
        {
                .strid = "audio_layer",
                .id = V4L2_CID_MPEG_AUDIO_ENCODING,
        },{
                .strid = "audio_bitrate",
                .id = V4L2_CID_MPEG_AUDIO_L2_BITRATE,
        },{
                /* Already using audio_mode elsewhere :-( */
                .strid = "mpeg_audio_mode",
                .id = V4L2_CID_MPEG_AUDIO_MODE,
        },{
                .strid = "mpeg_audio_mode_extension",
                .id = V4L2_CID_MPEG_AUDIO_MODE_EXTENSION,
        },{
                .strid = "audio_emphasis",
                .id = V4L2_CID_MPEG_AUDIO_EMPHASIS,
        },{
                .strid = "audio_crc",
                .id = V4L2_CID_MPEG_AUDIO_CRC,
        },{
                .strid = "video_aspect",
                .id = V4L2_CID_MPEG_VIDEO_ASPECT,
        },{
                .strid = "video_b_frames",
                .id = V4L2_CID_MPEG_VIDEO_B_FRAMES,
        },{
                .strid = "video_gop_size",
                .id = V4L2_CID_MPEG_VIDEO_GOP_SIZE,
        },{
                .strid = "video_gop_closure",
                .id = V4L2_CID_MPEG_VIDEO_GOP_CLOSURE,
        },{
                .strid = "video_bitrate_mode",
                .id = V4L2_CID_MPEG_VIDEO_BITRATE_MODE,
        },{
                .strid = "video_bitrate",
                .id = V4L2_CID_MPEG_VIDEO_BITRATE,
        },{
                .strid = "video_bitrate_peak",
                .id = V4L2_CID_MPEG_VIDEO_BITRATE_PEAK,
        },{
                .strid = "video_temporal_decimation",
                .id = V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION,
        },{
                .strid = "stream_type",
                .id = V4L2_CID_MPEG_STREAM_TYPE,
        },{
                .strid = "video_spatial_filter_mode",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE,
        },{
                .strid = "video_spatial_filter",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER,
        },{
                .strid = "video_luma_spatial_filter_type",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE,
        },{
                .strid = "video_chroma_spatial_filter_type",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE,
        },{
                .strid = "video_temporal_filter_mode",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE,
        },{
                .strid = "video_temporal_filter",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER,
        },{
                .strid = "video_median_filter_type",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE,
        },{
                .strid = "video_luma_median_filter_top",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP,
        },{
                .strid = "video_luma_median_filter_bottom",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM,
        },{
                .strid = "video_chroma_median_filter_top",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP,
        },{
                .strid = "video_chroma_median_filter_bottom",
                .id = V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM,
        }
};
#define MPEGDEF_COUNT ARRAY_SIZE(mpeg_ids)


static const char *control_values_srate[] = {
        [V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100]   = "44.1 kHz",
        [V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000]   = "48 kHz",
        [V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000]   = "32 kHz",
};



static const char *control_values_input[] = {
        [PVR2_CVAL_INPUT_TV]        = "television",  /*xawtv needs this name*/
        [PVR2_CVAL_INPUT_DTV]       = "dtv",
        [PVR2_CVAL_INPUT_RADIO]     = "radio",
        [PVR2_CVAL_INPUT_SVIDEO]    = "s-video",
        [PVR2_CVAL_INPUT_COMPOSITE] = "composite",
};


static const char *control_values_audiomode[] = {
        [V4L2_TUNER_MODE_MONO]   = "Mono",
        [V4L2_TUNER_MODE_STEREO] = "Stereo",
        [V4L2_TUNER_MODE_LANG1]  = "Lang1",
        [V4L2_TUNER_MODE_LANG2]  = "Lang2",
        [V4L2_TUNER_MODE_LANG1_LANG2] = "Lang1+Lang2",
};


static const char *control_values_hsm[] = {
        [PVR2_CVAL_HSM_FAIL] = "Fail",
        [PVR2_CVAL_HSM_HIGH] = "High",
        [PVR2_CVAL_HSM_FULL] = "Full",
};


static const char *pvr2_state_names[] = {
        [PVR2_STATE_NONE] =    "none",
        [PVR2_STATE_DEAD] =    "dead",
        [PVR2_STATE_COLD] =    "cold",
        [PVR2_STATE_WARM] =    "warm",
        [PVR2_STATE_ERROR] =   "error",
        [PVR2_STATE_READY] =   "ready",
        [PVR2_STATE_RUN] =     "run",
};


struct pvr2_fx2cmd_descdef {
        unsigned char id;
        unsigned char *desc;
};

static const struct pvr2_fx2cmd_descdef pvr2_fx2cmd_desc[] = {
        {FX2CMD_MEM_WRITE_DWORD, "write encoder dword"},
        {FX2CMD_MEM_READ_DWORD, "read encoder dword"},
        {FX2CMD_HCW_ZILOG_RESET, "zilog IR reset control"},
        {FX2CMD_MEM_READ_64BYTES, "read encoder 64bytes"},
        {FX2CMD_REG_WRITE, "write encoder register"},
        {FX2CMD_REG_READ, "read encoder register"},
        {FX2CMD_MEMSEL, "encoder memsel"},
        {FX2CMD_I2C_WRITE, "i2c write"},
        {FX2CMD_I2C_READ, "i2c read"},
        {FX2CMD_GET_USB_SPEED, "get USB speed"},
        {FX2CMD_STREAMING_ON, "stream on"},
        {FX2CMD_STREAMING_OFF, "stream off"},
        {FX2CMD_FWPOST1, "fwpost1"},
        {FX2CMD_POWER_OFF, "power off"},
        {FX2CMD_POWER_ON, "power on"},
        {FX2CMD_DEEP_RESET, "deep reset"},
        {FX2CMD_GET_EEPROM_ADDR, "get rom addr"},
        {FX2CMD_GET_IR_CODE, "get IR code"},
        {FX2CMD_HCW_DEMOD_RESETIN, "hcw demod resetin"},
        {FX2CMD_HCW_DTV_STREAMING_ON, "hcw dtv stream on"},
        {FX2CMD_HCW_DTV_STREAMING_OFF, "hcw dtv stream off"},
        {FX2CMD_ONAIR_DTV_STREAMING_ON, "onair dtv stream on"},
        {FX2CMD_ONAIR_DTV_STREAMING_OFF, "onair dtv stream off"},
        {FX2CMD_ONAIR_DTV_POWER_ON, "onair dtv power on"},
        {FX2CMD_ONAIR_DTV_POWER_OFF, "onair dtv power off"},
        {FX2CMD_HCW_DEMOD_RESET_PIN, "hcw demod reset pin"},
        {FX2CMD_HCW_MAKO_SLEEP_PIN, "hcw mako sleep pin"},
};


static int pvr2_hdw_set_input(struct pvr2_hdw *hdw,int v);
static void pvr2_hdw_state_sched(struct pvr2_hdw *);
static int pvr2_hdw_state_eval(struct pvr2_hdw *);
static void pvr2_hdw_set_cur_freq(struct pvr2_hdw *,unsigned long);
static void pvr2_hdw_worker_poll(struct work_struct *work);
static int pvr2_hdw_wait(struct pvr2_hdw *,int state);
static int pvr2_hdw_untrip_unlocked(struct pvr2_hdw *);
static void pvr2_hdw_state_log_state(struct pvr2_hdw *);
static int pvr2_hdw_cmd_usbstream(struct pvr2_hdw *hdw,int runFl);
static int pvr2_hdw_commit_setup(struct pvr2_hdw *hdw);
static int pvr2_hdw_get_eeprom_addr(struct pvr2_hdw *hdw);
static void pvr2_hdw_quiescent_timeout(struct timer_list *);
static void pvr2_hdw_decoder_stabilization_timeout(struct timer_list *);
static void pvr2_hdw_encoder_wait_timeout(struct timer_list *);
static void pvr2_hdw_encoder_run_timeout(struct timer_list *);
static int pvr2_issue_simple_cmd(struct pvr2_hdw *,u32);
static int pvr2_send_request_ex(struct pvr2_hdw *hdw,
                                unsigned int timeout,int probe_fl,
                                void *write_data,unsigned int write_len,
                                void *read_data,unsigned int read_len);
static int pvr2_hdw_check_cropcap(struct pvr2_hdw *hdw);
static v4l2_std_id pvr2_hdw_get_detected_std(struct pvr2_hdw *hdw);

static void trace_stbit(const char *name,int val)
{
        pvr2_trace(PVR2_TRACE_STBITS,
                   "State bit %s <-- %s",
                   name,(val ? "true" : "false"));
}

static int ctrl_channelfreq_get(struct pvr2_ctrl *cptr,int *vp)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        if ((hdw->freqProgSlot > 0) && (hdw->freqProgSlot <= FREQTABLE_SIZE)) {
                *vp = hdw->freqTable[hdw->freqProgSlot-1];
        } else {
                *vp = 0;
        }
        return 0;
}

static int ctrl_channelfreq_set(struct pvr2_ctrl *cptr,int m,int v)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        unsigned int slotId = hdw->freqProgSlot;
        if ((slotId > 0) && (slotId <= FREQTABLE_SIZE)) {
                hdw->freqTable[slotId-1] = v;
                /* Handle side effects correctly - if we're tuned to this
                   slot, then forgot the slot id relation since the stored
                   frequency has been changed. */
                if (hdw->freqSelector) {
                        if (hdw->freqSlotRadio == slotId) {
                                hdw->freqSlotRadio = 0;
                        }
                } else {
                        if (hdw->freqSlotTelevision == slotId) {
                                hdw->freqSlotTelevision = 0;
                        }
                }
        }
        return 0;
}

static int ctrl_channelprog_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->freqProgSlot;
        return 0;
}

static int ctrl_channelprog_set(struct pvr2_ctrl *cptr,int m,int v)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        if ((v >= 0) && (v <= FREQTABLE_SIZE)) {
                hdw->freqProgSlot = v;
        }
        return 0;
}

static int ctrl_channel_get(struct pvr2_ctrl *cptr,int *vp)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        *vp = hdw->freqSelector ? hdw->freqSlotRadio : hdw->freqSlotTelevision;
        return 0;
}

static int ctrl_channel_set(struct pvr2_ctrl *cptr,int m,int slotId)
{
        unsigned freq = 0;
        struct pvr2_hdw *hdw = cptr->hdw;
        if ((slotId < 0) || (slotId > FREQTABLE_SIZE)) return 0;
        if (slotId > 0) {
                freq = hdw->freqTable[slotId-1];
                if (!freq) return 0;
                pvr2_hdw_set_cur_freq(hdw,freq);
        }
        if (hdw->freqSelector) {
                hdw->freqSlotRadio = slotId;
        } else {
                hdw->freqSlotTelevision = slotId;
        }
        return 0;
}

static int ctrl_freq_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = pvr2_hdw_get_cur_freq(cptr->hdw);
        return 0;
}

static int ctrl_freq_is_dirty(struct pvr2_ctrl *cptr)
{
        return cptr->hdw->freqDirty != 0;
}

static void ctrl_freq_clear_dirty(struct pvr2_ctrl *cptr)
{
        cptr->hdw->freqDirty = 0;
}

static int ctrl_freq_set(struct pvr2_ctrl *cptr,int m,int v)
{
        pvr2_hdw_set_cur_freq(cptr->hdw,v);
        return 0;
}

static int ctrl_cropl_min_get(struct pvr2_ctrl *cptr, int *left)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *left = cap->bounds.left;
        return 0;
}

static int ctrl_cropl_max_get(struct pvr2_ctrl *cptr, int *left)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *left = cap->bounds.left;
        if (cap->bounds.width > cptr->hdw->cropw_val) {
                *left += cap->bounds.width - cptr->hdw->cropw_val;
        }
        return 0;
}

static int ctrl_cropt_min_get(struct pvr2_ctrl *cptr, int *top)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *top = cap->bounds.top;
        return 0;
}

static int ctrl_cropt_max_get(struct pvr2_ctrl *cptr, int *top)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *top = cap->bounds.top;
        if (cap->bounds.height > cptr->hdw->croph_val) {
                *top += cap->bounds.height - cptr->hdw->croph_val;
        }
        return 0;
}

static int ctrl_cropw_max_get(struct pvr2_ctrl *cptr, int *width)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat, bleftend, cleft;

        stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        bleftend = cap->bounds.left+cap->bounds.width;
        cleft = cptr->hdw->cropl_val;

        *width = cleft < bleftend ? bleftend-cleft : 0;
        return 0;
}

static int ctrl_croph_max_get(struct pvr2_ctrl *cptr, int *height)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat, btopend, ctop;

        stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        btopend = cap->bounds.top+cap->bounds.height;
        ctop = cptr->hdw->cropt_val;

        *height = ctop < btopend ? btopend-ctop : 0;
        return 0;
}

static int ctrl_get_cropcapbl(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->bounds.left;
        return 0;
}

static int ctrl_get_cropcapbt(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->bounds.top;
        return 0;
}

static int ctrl_get_cropcapbw(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->bounds.width;
        return 0;
}

static int ctrl_get_cropcapbh(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->bounds.height;
        return 0;
}

static int ctrl_get_cropcapdl(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->defrect.left;
        return 0;
}

static int ctrl_get_cropcapdt(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->defrect.top;
        return 0;
}

static int ctrl_get_cropcapdw(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->defrect.width;
        return 0;
}

static int ctrl_get_cropcapdh(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->defrect.height;
        return 0;
}

static int ctrl_get_cropcappan(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->pixelaspect.numerator;
        return 0;
}

static int ctrl_get_cropcappad(struct pvr2_ctrl *cptr, int *val)
{
        struct v4l2_cropcap *cap = &cptr->hdw->cropcap_info;
        int stat = pvr2_hdw_check_cropcap(cptr->hdw);
        if (stat != 0) {
                return stat;
        }
        *val = cap->pixelaspect.denominator;
        return 0;
}

static int ctrl_vres_max_get(struct pvr2_ctrl *cptr,int *vp)
{
        /* Actual maximum depends on the video standard in effect. */
        if (cptr->hdw->std_mask_cur & V4L2_STD_525_60) {
                *vp = 480;
        } else {
                *vp = 576;
        }
        return 0;
}

static int ctrl_vres_min_get(struct pvr2_ctrl *cptr,int *vp)
{
        /* Actual minimum depends on device digitizer type. */
        if (cptr->hdw->hdw_desc->flag_has_cx25840) {
                *vp = 75;
        } else {
                *vp = 17;
        }
        return 0;
}

static int ctrl_get_input(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->input_val;
        return 0;
}

static int ctrl_check_input(struct pvr2_ctrl *cptr,int v)
{
        if (v < 0 || v > PVR2_CVAL_INPUT_MAX)
                return 0;
        return ((1UL << v) & cptr->hdw->input_allowed_mask) != 0;
}

static int ctrl_set_input(struct pvr2_ctrl *cptr,int m,int v)
{
        return pvr2_hdw_set_input(cptr->hdw,v);
}

static int ctrl_isdirty_input(struct pvr2_ctrl *cptr)
{
        return cptr->hdw->input_dirty != 0;
}

static void ctrl_cleardirty_input(struct pvr2_ctrl *cptr)
{
        cptr->hdw->input_dirty = 0;
}


static int ctrl_freq_max_get(struct pvr2_ctrl *cptr, int *vp)
{
        unsigned long fv;
        struct pvr2_hdw *hdw = cptr->hdw;
        if (hdw->tuner_signal_stale) {
                pvr2_hdw_status_poll(hdw);
        }
        fv = hdw->tuner_signal_info.rangehigh;
        if (!fv) {
                /* Safety fallback */
                *vp = TV_MAX_FREQ;
                return 0;
        }
        if (hdw->tuner_signal_info.capability & V4L2_TUNER_CAP_LOW) {
                fv = (fv * 125) / 2;
        } else {
                fv = fv * 62500;
        }
        *vp = fv;
        return 0;
}

static int ctrl_freq_min_get(struct pvr2_ctrl *cptr, int *vp)
{
        unsigned long fv;
        struct pvr2_hdw *hdw = cptr->hdw;
        if (hdw->tuner_signal_stale) {
                pvr2_hdw_status_poll(hdw);
        }
        fv = hdw->tuner_signal_info.rangelow;
        if (!fv) {
                /* Safety fallback */
                *vp = TV_MIN_FREQ;
                return 0;
        }
        if (hdw->tuner_signal_info.capability & V4L2_TUNER_CAP_LOW) {
                fv = (fv * 125) / 2;
        } else {
                fv = fv * 62500;
        }
        *vp = fv;
        return 0;
}

static int ctrl_cx2341x_is_dirty(struct pvr2_ctrl *cptr)
{
        return cptr->hdw->enc_stale != 0;
}

static void ctrl_cx2341x_clear_dirty(struct pvr2_ctrl *cptr)
{
        cptr->hdw->enc_stale = 0;
        cptr->hdw->enc_unsafe_stale = 0;
}

static int ctrl_cx2341x_get(struct pvr2_ctrl *cptr,int *vp)
{
        int ret;
        struct v4l2_ext_controls cs;
        struct v4l2_ext_control c1;
        memset(&cs,0,sizeof(cs));
        memset(&c1,0,sizeof(c1));
        cs.controls = &c1;
        cs.count = 1;
        c1.id = cptr->info->v4l_id;
        ret = cx2341x_ext_ctrls(&cptr->hdw->enc_ctl_state, 0, &cs,
                                VIDIOC_G_EXT_CTRLS);
        if (ret) return ret;
        *vp = c1.value;
        return 0;
}

static int ctrl_cx2341x_set(struct pvr2_ctrl *cptr,int m,int v)
{
        int ret;
        struct pvr2_hdw *hdw = cptr->hdw;
        struct v4l2_ext_controls cs;
        struct v4l2_ext_control c1;
        memset(&cs,0,sizeof(cs));
        memset(&c1,0,sizeof(c1));
        cs.controls = &c1;
        cs.count = 1;
        c1.id = cptr->info->v4l_id;
        c1.value = v;
        ret = cx2341x_ext_ctrls(&hdw->enc_ctl_state,
                                hdw->state_encoder_run, &cs,
                                VIDIOC_S_EXT_CTRLS);
        if (ret == -EBUSY) {
                /* Oops.  cx2341x is telling us it's not safe to change
                   this control while we're capturing.  Make a note of this
                   fact so that the pipeline will be stopped the next time
                   controls are committed.  Then go on ahead and store this
                   change anyway. */
                ret = cx2341x_ext_ctrls(&hdw->enc_ctl_state,
                                        0, &cs,
                                        VIDIOC_S_EXT_CTRLS);
                if (!ret) hdw->enc_unsafe_stale = !0;
        }
        if (ret) return ret;
        hdw->enc_stale = !0;
        return 0;
}

static unsigned int ctrl_cx2341x_getv4lflags(struct pvr2_ctrl *cptr)
{
        struct v4l2_queryctrl qctrl = {};
        struct pvr2_ctl_info *info;
        qctrl.id = cptr->info->v4l_id;
        cx2341x_ctrl_query(&cptr->hdw->enc_ctl_state,&qctrl);
        /* Strip out the const so we can adjust a function pointer.  It's
           OK to do this here because we know this is a dynamically created
           control, so the underlying storage for the info pointer is (a)
           private to us, and (b) not in read-only storage.  Either we do
           this or we significantly complicate the underlying control
           implementation. */
        info = (struct pvr2_ctl_info *)(cptr->info);
        if (qctrl.flags & V4L2_CTRL_FLAG_READ_ONLY) {
                if (info->set_value) {
                        info->set_value = NULL;
                }
        } else {
                if (!(info->set_value)) {
                        info->set_value = ctrl_cx2341x_set;
                }
        }
        return qctrl.flags;
}

static int ctrl_streamingenabled_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->state_pipeline_req;
        return 0;
}

static int ctrl_masterstate_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->master_state;
        return 0;
}

static int ctrl_hsm_get(struct pvr2_ctrl *cptr,int *vp)
{
        int result = pvr2_hdw_is_hsm(cptr->hdw);
        *vp = PVR2_CVAL_HSM_FULL;
        if (result < 0) *vp = PVR2_CVAL_HSM_FAIL;
        if (result) *vp = PVR2_CVAL_HSM_HIGH;
        return 0;
}

static int ctrl_stddetect_get(struct pvr2_ctrl *cptr, int *vp)
{
        *vp = pvr2_hdw_get_detected_std(cptr->hdw);
        return 0;
}

static int ctrl_stdavail_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->std_mask_avail;
        return 0;
}

static int ctrl_stdavail_set(struct pvr2_ctrl *cptr,int m,int v)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        v4l2_std_id ns;
        ns = hdw->std_mask_avail;
        ns = (ns & ~m) | (v & m);
        if (ns == hdw->std_mask_avail) return 0;
        hdw->std_mask_avail = ns;
        hdw->std_info_cur.def.type_bitmask.valid_bits = hdw->std_mask_avail;
        return 0;
}

static int ctrl_std_val_to_sym(struct pvr2_ctrl *cptr,int msk,int val,
                               char *bufPtr,unsigned int bufSize,
                               unsigned int *len)
{
        *len = pvr2_std_id_to_str(bufPtr,bufSize,msk & val);
        return 0;
}

static int ctrl_std_sym_to_val(struct pvr2_ctrl *cptr,
                               const char *bufPtr,unsigned int bufSize,
                               int *mskp,int *valp)
{
        v4l2_std_id id;
        if (!pvr2_std_str_to_id(&id, bufPtr, bufSize))
                return -EINVAL;
        if (mskp) *mskp = id;
        if (valp) *valp = id;
        return 0;
}

static int ctrl_stdcur_get(struct pvr2_ctrl *cptr,int *vp)
{
        *vp = cptr->hdw->std_mask_cur;
        return 0;
}

static int ctrl_stdcur_set(struct pvr2_ctrl *cptr,int m,int v)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        v4l2_std_id ns;
        ns = hdw->std_mask_cur;
        ns = (ns & ~m) | (v & m);
        if (ns == hdw->std_mask_cur) return 0;
        hdw->std_mask_cur = ns;
        hdw->std_dirty = !0;
        return 0;
}

static int ctrl_stdcur_is_dirty(struct pvr2_ctrl *cptr)
{
        return cptr->hdw->std_dirty != 0;
}

static void ctrl_stdcur_clear_dirty(struct pvr2_ctrl *cptr)
{
        cptr->hdw->std_dirty = 0;
}

static int ctrl_signal_get(struct pvr2_ctrl *cptr,int *vp)
{
        struct pvr2_hdw *hdw = cptr->hdw;
        pvr2_hdw_status_poll(hdw);
        *vp = hdw->tuner_signal_info.signal;
        return 0;
}

static int ctrl_audio_modes_present_get(struct pvr2_ctrl *cptr,int *vp)
{
        int val = 0;
        unsigned int subchan;
        struct pvr2_hdw *hdw = cptr->hdw;
        pvr2_hdw_status_poll(hdw);
        subchan = hdw->tuner_signal_info.rxsubchans;
        if (subchan & V4L2_TUNER_SUB_MONO) {
                val |= (1 << V4L2_TUNER_MODE_MONO);
        }
        if (subchan & V4L2_TUNER_SUB_STEREO) {
                val |= (1 << V4L2_TUNER_MODE_STEREO);
        }
        if (subchan & V4L2_TUNER_SUB_LANG1) {
                val |= (1 << V4L2_TUNER_MODE_LANG1);
        }
        if (subchan & V4L2_TUNER_SUB_LANG2) {
                val |= (1 << V4L2_TUNER_MODE_LANG2);
        }
        *vp = val;
        return 0;
}


#define DEFINT(vmin,vmax) \
        .type = pvr2_ctl_int, \
        .def.type_int.min_value = vmin, \
        .def.type_int.max_value = vmax

#define DEFENUM(tab) \
        .type = pvr2_ctl_enum, \
        .def.type_enum.count = ARRAY_SIZE(tab), \
        .def.type_enum.value_names = tab

#define DEFBOOL \
        .type = pvr2_ctl_bool

#define DEFMASK(msk,tab) \
        .type = pvr2_ctl_bitmask, \
        .def.type_bitmask.valid_bits = msk, \
        .def.type_bitmask.bit_names = tab

#define DEFREF(vname) \
        .set_value = ctrl_set_##vname, \
        .get_value = ctrl_get_##vname, \
        .is_dirty = ctrl_isdirty_##vname, \
        .clear_dirty = ctrl_cleardirty_##vname


#define VCREATE_FUNCS(vname) \
static int ctrl_get_##vname(struct pvr2_ctrl *cptr,int *vp) \
{*vp = cptr->hdw->vname##_val; return 0;} \
static int ctrl_set_##vname(struct pvr2_ctrl *cptr,int m,int v) \
{cptr->hdw->vname##_val = v; cptr->hdw->vname##_dirty = !0; return 0;} \
static int ctrl_isdirty_##vname(struct pvr2_ctrl *cptr) \
{return cptr->hdw->vname##_dirty != 0;} \
static void ctrl_cleardirty_##vname(struct pvr2_ctrl *cptr) \
{cptr->hdw->vname##_dirty = 0;}

VCREATE_FUNCS(brightness)
VCREATE_FUNCS(contrast)
VCREATE_FUNCS(saturation)
VCREATE_FUNCS(hue)
VCREATE_FUNCS(volume)
VCREATE_FUNCS(balance)
VCREATE_FUNCS(bass)
VCREATE_FUNCS(treble)
VCREATE_FUNCS(mute)
VCREATE_FUNCS(cropl)
VCREATE_FUNCS(cropt)
VCREATE_FUNCS(cropw)
VCREATE_FUNCS(croph)
VCREATE_FUNCS(audiomode)
VCREATE_FUNCS(res_hor)
VCREATE_FUNCS(res_ver)
VCREATE_FUNCS(srate)

/* Table definition of all controls which can be manipulated */
static const struct pvr2_ctl_info control_defs[] = {
        {
                .v4l_id = V4L2_CID_BRIGHTNESS,
                .desc = "Brightness",
                .name = "brightness",
                .default_value = 128,
                DEFREF(brightness),
                DEFINT(0,255),
        },{
                .v4l_id = V4L2_CID_CONTRAST,
                .desc = "Contrast",
                .name = "contrast",
                .default_value = 68,
                DEFREF(contrast),
                DEFINT(0,127),
        },{
                .v4l_id = V4L2_CID_SATURATION,
                .desc = "Saturation",
                .name = "saturation",
                .default_value = 64,
                DEFREF(saturation),
                DEFINT(0,127),
        },{
                .v4l_id = V4L2_CID_HUE,
                .desc = "Hue",
                .name = "hue",
                .default_value = 0,
                DEFREF(hue),
                DEFINT(-128,127),
        },{
                .v4l_id = V4L2_CID_AUDIO_VOLUME,
                .desc = "Volume",
                .name = "volume",
                .default_value = 62000,
                DEFREF(volume),
                DEFINT(0,65535),
        },{
                .v4l_id = V4L2_CID_AUDIO_BALANCE,
                .desc = "Balance",
                .name = "balance",
                .default_value = 0,
                DEFREF(balance),
                DEFINT(-32768,32767),
        },{
                .v4l_id = V4L2_CID_AUDIO_BASS,
                .desc = "Bass",
                .name = "bass",
                .default_value = 0,
                DEFREF(bass),
                DEFINT(-32768,32767),
        },{
                .v4l_id = V4L2_CID_AUDIO_TREBLE,
                .desc = "Treble",
                .name = "treble",
                .default_value = 0,
                DEFREF(treble),
                DEFINT(-32768,32767),
        },{
                .v4l_id = V4L2_CID_AUDIO_MUTE,
                .desc = "Mute",
                .name = "mute",
                .default_value = 0,
                DEFREF(mute),
                DEFBOOL,
        }, {
                .desc = "Capture crop left margin",
                .name = "crop_left",
                .internal_id = PVR2_CID_CROPL,
                .default_value = 0,
                DEFREF(cropl),
                DEFINT(-129, 340),
                .get_min_value = ctrl_cropl_min_get,
                .get_max_value = ctrl_cropl_max_get,
                .get_def_value = ctrl_get_cropcapdl,
        }, {
                .desc = "Capture crop top margin",
                .name = "crop_top",
                .internal_id = PVR2_CID_CROPT,
                .default_value = 0,
                DEFREF(cropt),
                DEFINT(-35, 544),
                .get_min_value = ctrl_cropt_min_get,
                .get_max_value = ctrl_cropt_max_get,
                .get_def_value = ctrl_get_cropcapdt,
        }, {
                .desc = "Capture crop width",
                .name = "crop_width",
                .internal_id = PVR2_CID_CROPW,
                .default_value = 720,
                DEFREF(cropw),
                DEFINT(0, 864),
                .get_max_value = ctrl_cropw_max_get,
                .get_def_value = ctrl_get_cropcapdw,
        }, {
                .desc = "Capture crop height",
                .name = "crop_height",
                .internal_id = PVR2_CID_CROPH,
                .default_value = 480,
                DEFREF(croph),
                DEFINT(0, 576),
                .get_max_value = ctrl_croph_max_get,
                .get_def_value = ctrl_get_cropcapdh,
        }, {
                .desc = "Capture capability pixel aspect numerator",
                .name = "cropcap_pixel_numerator",
                .internal_id = PVR2_CID_CROPCAPPAN,
                .get_value = ctrl_get_cropcappan,
        }, {
                .desc = "Capture capability pixel aspect denominator",
                .name = "cropcap_pixel_denominator",
                .internal_id = PVR2_CID_CROPCAPPAD,
                .get_value = ctrl_get_cropcappad,
        }, {
                .desc = "Capture capability bounds top",
                .name = "cropcap_bounds_top",
                .internal_id = PVR2_CID_CROPCAPBT,
                .get_value = ctrl_get_cropcapbt,
        }, {
                .desc = "Capture capability bounds left",
                .name = "cropcap_bounds_left",
                .internal_id = PVR2_CID_CROPCAPBL,
                .get_value = ctrl_get_cropcapbl,
        }, {
                .desc = "Capture capability bounds width",
                .name = "cropcap_bounds_width",
                .internal_id = PVR2_CID_CROPCAPBW,
                .get_value = ctrl_get_cropcapbw,
        }, {
                .desc = "Capture capability bounds height",
                .name = "cropcap_bounds_height",
                .internal_id = PVR2_CID_CROPCAPBH,
                .get_value = ctrl_get_cropcapbh,
        },{
                .desc = "Video Source",
                .name = "input",
                .internal_id = PVR2_CID_INPUT,
                .default_value = PVR2_CVAL_INPUT_TV,
                .check_value = ctrl_check_input,
                DEFREF(input),
                DEFENUM(control_values_input),
        },{
                .desc = "Audio Mode",
                .name = "audio_mode",
                .internal_id = PVR2_CID_AUDIOMODE,
                .default_value = V4L2_TUNER_MODE_STEREO,
                DEFREF(audiomode),
                DEFENUM(control_values_audiomode),
        },{
                .desc = "Horizontal capture resolution",
                .name = "resolution_hor",
                .internal_id = PVR2_CID_HRES,
                .default_value = 720,
                DEFREF(res_hor),
                DEFINT(19,720),
        },{
                .desc = "Vertical capture resolution",
                .name = "resolution_ver",
                .internal_id = PVR2_CID_VRES,
                .default_value = 480,
                DEFREF(res_ver),
                DEFINT(17,576),
                /* Hook in check for video standard and adjust maximum
                   depending on the standard. */
                .get_max_value = ctrl_vres_max_get,
                .get_min_value = ctrl_vres_min_get,
        },{
                .v4l_id = V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ,
                .default_value = V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000,
                .desc = "Audio Sampling Frequency",
                .name = "srate",
                DEFREF(srate),
                DEFENUM(control_values_srate),
        },{
                .desc = "Tuner Frequency (Hz)",
                .name = "frequency",
                .internal_id = PVR2_CID_FREQUENCY,
                .default_value = 0,
                .set_value = ctrl_freq_set,
                .get_value = ctrl_freq_get,
                .is_dirty = ctrl_freq_is_dirty,
                .clear_dirty = ctrl_freq_clear_dirty,
                DEFINT(0,0),
                /* Hook in check for input value (tv/radio) and adjust
                   max/min values accordingly */
                .get_max_value = ctrl_freq_max_get,
                .get_min_value = ctrl_freq_min_get,
        },{
                .desc = "Channel",
                .name = "channel",
                .set_value = ctrl_channel_set,
                .get_value = ctrl_channel_get,
                DEFINT(0,FREQTABLE_SIZE),
        },{
                .desc = "Channel Program Frequency",
                .name = "freq_table_value",
                .set_value = ctrl_channelfreq_set,
                .get_value = ctrl_channelfreq_get,
                DEFINT(0,0),
                /* Hook in check for input value (tv/radio) and adjust
                   max/min values accordingly */
                .get_max_value = ctrl_freq_max_get,
                .get_min_value = ctrl_freq_min_get,
        },{
                .desc = "Channel Program ID",
                .name = "freq_table_channel",
                .set_value = ctrl_channelprog_set,
                .get_value = ctrl_channelprog_get,
                DEFINT(0,FREQTABLE_SIZE),
        },{
                .desc = "Streaming Enabled",
                .name = "streaming_enabled",
                .get_value = ctrl_streamingenabled_get,
                DEFBOOL,
        },{
                .desc = "USB Speed",
                .name = "usb_speed",
                .get_value = ctrl_hsm_get,
                DEFENUM(control_values_hsm),
        },{
                .desc = "Master State",
                .name = "master_state",
                .get_value = ctrl_masterstate_get,
                DEFENUM(pvr2_state_names),
        },{
                .desc = "Signal Present",
                .name = "signal_present",
                .get_value = ctrl_signal_get,
                DEFINT(0,65535),
        },{
                .desc = "Audio Modes Present",
                .name = "audio_modes_present",
                .get_value = ctrl_audio_modes_present_get,
                /* For this type we "borrow" the V4L2_TUNER_MODE enum from
                   v4l.  Nothing outside of this module cares about this,
                   but I reuse it in order to also reuse the
                   control_values_audiomode string table. */
                DEFMASK(((1 << V4L2_TUNER_MODE_MONO)|
                         (1 << V4L2_TUNER_MODE_STEREO)|
                         (1 << V4L2_TUNER_MODE_LANG1)|
                         (1 << V4L2_TUNER_MODE_LANG2)),
                        control_values_audiomode),
        },{
                .desc = "Video Standards Available Mask",
                .name = "video_standard_mask_available",
                .internal_id = PVR2_CID_STDAVAIL,
                .skip_init = !0,
                .get_value = ctrl_stdavail_get,
                .set_value = ctrl_stdavail_set,
                .val_to_sym = ctrl_std_val_to_sym,
                .sym_to_val = ctrl_std_sym_to_val,
                .type = pvr2_ctl_bitmask,
        },{
                .desc = "Video Standards In Use Mask",
                .name = "video_standard_mask_active",
                .internal_id = PVR2_CID_STDCUR,
                .skip_init = !0,
                .get_value = ctrl_stdcur_get,
                .set_value = ctrl_stdcur_set,
                .is_dirty = ctrl_stdcur_is_dirty,
                .clear_dirty = ctrl_stdcur_clear_dirty,
                .val_to_sym = ctrl_std_val_to_sym,
                .sym_to_val = ctrl_std_sym_to_val,
                .type = pvr2_ctl_bitmask,
        },{
                .desc = "Video Standards Detected Mask",
                .name = "video_standard_mask_detected",
                .internal_id = PVR2_CID_STDDETECT,
                .skip_init = !0,
                .get_value = ctrl_stddetect_get,
                .val_to_sym = ctrl_std_val_to_sym,
                .sym_to_val = ctrl_std_sym_to_val,
                .type = pvr2_ctl_bitmask,
        }
};

#define CTRLDEF_COUNT ARRAY_SIZE(control_defs)


const char *pvr2_config_get_name(enum pvr2_config cfg)
{
        switch (cfg) {
        case pvr2_config_empty: return "empty";
        case pvr2_config_mpeg: return "mpeg";
        case pvr2_config_vbi: return "vbi";
        case pvr2_config_pcm: return "pcm";
        case pvr2_config_rawvideo: return "raw video";
        }
        return "<unknown>";
}


struct usb_device *pvr2_hdw_get_dev(struct pvr2_hdw *hdw)
{
        return hdw->usb_dev;
}


unsigned long pvr2_hdw_get_sn(struct pvr2_hdw *hdw)
{
        return hdw->serial_number;
}


const char *pvr2_hdw_get_bus_info(struct pvr2_hdw *hdw)
{
        return hdw->bus_info;
}


const char *pvr2_hdw_get_device_identifier(struct pvr2_hdw *hdw)
{
        return hdw->identifier;
}


unsigned long pvr2_hdw_get_cur_freq(struct pvr2_hdw *hdw)
{
        return hdw->freqSelector ? hdw->freqValTelevision : hdw->freqValRadio;
}

/* Set the currently tuned frequency and account for all possible
   driver-core side effects of this action. */
static void pvr2_hdw_set_cur_freq(struct pvr2_hdw *hdw,unsigned long val)
{
        if (hdw->input_val == PVR2_CVAL_INPUT_RADIO) {
                if (hdw->freqSelector) {
                        /* Swing over to radio frequency selection */
                        hdw->freqSelector = 0;
                        hdw->freqDirty = !0;
                }
                if (hdw->freqValRadio != val) {
                        hdw->freqValRadio = val;
                        hdw->freqSlotRadio = 0;
                        hdw->freqDirty = !0;
                }
        } else {
                if (!(hdw->freqSelector)) {
                        /* Swing over to television frequency selection */
                        hdw->freqSelector = 1;
                        hdw->freqDirty = !0;
                }
                if (hdw->freqValTelevision != val) {
                        hdw->freqValTelevision = val;
                        hdw->freqSlotTelevision = 0;
                        hdw->freqDirty = !0;
                }
        }
}

int pvr2_hdw_get_unit_number(struct pvr2_hdw *hdw)
{
        return hdw->unit_number;
}


/* Attempt to locate one of the given set of files.  Messages are logged
   appropriate to what has been found.  The return value will be 0 or
   greater on success (it will be the index of the file name found) and
   fw_entry will be filled in.  Otherwise a negative error is returned on
   failure.  If the return value is -ENOENT then no viable firmware file
   could be located. */
static int pvr2_locate_firmware(struct pvr2_hdw *hdw,
                                const struct firmware **fw_entry,
                                const char *fwtypename,
                                unsigned int fwcount,
                                const char *fwnames[])
{
        unsigned int idx;
        int ret = -EINVAL;
        for (idx = 0; idx < fwcount; idx++) {
                ret = request_firmware(fw_entry,
                                       fwnames[idx],
                                       &hdw->usb_dev->dev);
                if (!ret) {
                        trace_firmware("Located %s firmware: %s; uploading...",
                                       fwtypename,
                                       fwnames[idx]);
                        return idx;
                }
                if (ret == -ENOENT) continue;
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "request_firmware fatal error with code=%d",ret);
                return ret;
        }
        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                   "***WARNING*** Device %s firmware seems to be missing.",
                   fwtypename);
        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                   "Did you install the pvrusb2 firmware files in their proper location?");
        if (fwcount == 1) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "request_firmware unable to locate %s file %s",
                           fwtypename,fwnames[0]);
        } else {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "request_firmware unable to locate one of the following %s files:",
                           fwtypename);
                for (idx = 0; idx < fwcount; idx++) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "request_firmware: Failed to find %s",
                                   fwnames[idx]);
                }
        }
        return ret;
}


/*
 * pvr2_upload_firmware1().
 *
 * Send the 8051 firmware to the device.  After the upload, arrange for
 * device to re-enumerate.
 *
 * NOTE : the pointer to the firmware data given by request_firmware()
 * is not suitable for an usb transaction.
 *
 */
static int pvr2_upload_firmware1(struct pvr2_hdw *hdw)
{
        const struct firmware *fw_entry = NULL;
        void  *fw_ptr;
        unsigned int pipe;
        unsigned int fwsize;
        int ret;
        u16 address;

        if (!hdw->hdw_desc->fx2_firmware.cnt) {
                hdw->fw1_state = FW1_STATE_OK;
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Connected device type defines no firmware to upload; ignoring firmware");
                return -ENOTTY;
        }

        hdw->fw1_state = FW1_STATE_FAILED; // default result

        trace_firmware("pvr2_upload_firmware1");

        ret = pvr2_locate_firmware(hdw,&fw_entry,"fx2 controller",
                                   hdw->hdw_desc->fx2_firmware.cnt,
                                   hdw->hdw_desc->fx2_firmware.lst);
        if (ret < 0) {
                if (ret == -ENOENT) hdw->fw1_state = FW1_STATE_MISSING;
                return ret;
        }

        usb_clear_halt(hdw->usb_dev, usb_sndbulkpipe(hdw->usb_dev, 0 & 0x7f));

        pipe = usb_sndctrlpipe(hdw->usb_dev, 0);
        fwsize = fw_entry->size;

        if ((fwsize != 0x2000) &&
            (!(hdw->hdw_desc->flag_fx2_16kb && (fwsize == 0x4000)))) {
                if (hdw->hdw_desc->flag_fx2_16kb) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Wrong fx2 firmware size (expected 8192 or 16384, got %u)",
                                   fwsize);
                } else {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Wrong fx2 firmware size (expected 8192, got %u)",
                                   fwsize);
                }
                release_firmware(fw_entry);
                return -ENOMEM;
        }

        fw_ptr = kmalloc(0x800, GFP_KERNEL);
        if (fw_ptr == NULL){
                release_firmware(fw_entry);
                return -ENOMEM;
        }

        /* We have to hold the CPU during firmware upload. */
        pvr2_hdw_cpureset_assert(hdw,1);

        /* upload the firmware to address 0000-1fff in 2048 (=0x800) bytes
           chunk. */

        ret = 0;
        for (address = 0; address < fwsize; address += 0x800) {
                memcpy(fw_ptr, fw_entry->data + address, 0x800);
                ret += usb_control_msg(hdw->usb_dev, pipe, 0xa0, 0x40, address,
                                       0, fw_ptr, 0x800, 1000);
        }

        trace_firmware("Upload done, releasing device's CPU");

        /* Now release the CPU.  It will disconnect and reconnect later. */
        pvr2_hdw_cpureset_assert(hdw,0);

        kfree(fw_ptr);
        release_firmware(fw_entry);

        trace_firmware("Upload done (%d bytes sent)",ret);

        /* We should have written fwsize bytes */
        if (ret == fwsize) {
                hdw->fw1_state = FW1_STATE_RELOAD;
                return 0;
        }

        return -EIO;
}


/*
 * pvr2_upload_firmware2()
 *
 * This uploads encoder firmware on endpoint 2.
 *
 */

int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
{
        const struct firmware *fw_entry = NULL;
        void  *fw_ptr;
        unsigned int pipe, fw_len, fw_done, bcnt, icnt;
        int actual_length;
        int ret = 0;
        int fwidx;
        static const char *fw_files[] = {
                CX2341X_FIRM_ENC_FILENAME,
        };

        if (hdw->hdw_desc->flag_skip_cx23416_firmware) {
                return 0;
        }

        trace_firmware("pvr2_upload_firmware2");

        ret = pvr2_locate_firmware(hdw,&fw_entry,"encoder",
                                   ARRAY_SIZE(fw_files), fw_files);
        if (ret < 0) return ret;
        fwidx = ret;
        ret = 0;
        /* Since we're about to completely reinitialize the encoder,
           invalidate our cached copy of its configuration state.  Next
           time we configure the encoder, then we'll fully configure it. */
        hdw->enc_cur_valid = 0;

        /* Encoder is about to be reset so note that as far as we're
           concerned now, the encoder has never been run. */
        del_timer_sync(&hdw->encoder_run_timer);
        if (hdw->state_encoder_runok) {
                hdw->state_encoder_runok = 0;
                trace_stbit("state_encoder_runok",hdw->state_encoder_runok);
        }

        /* First prepare firmware loading */
        ret |= pvr2_write_register(hdw, 0x0048, 0xffffffff); /*interrupt mask*/
        ret |= pvr2_hdw_gpio_chg_dir(hdw,0xffffffff,0x00000088); /*gpio dir*/
        ret |= pvr2_hdw_gpio_chg_out(hdw,0xffffffff,0x00000008); /*gpio output state*/
        ret |= pvr2_hdw_cmd_deep_reset(hdw);
        ret |= pvr2_write_register(hdw, 0xa064, 0x00000000); /*APU command*/
        ret |= pvr2_hdw_gpio_chg_dir(hdw,0xffffffff,0x00000408); /*gpio dir*/
        ret |= pvr2_hdw_gpio_chg_out(hdw,0xffffffff,0x00000008); /*gpio output state*/
        ret |= pvr2_write_register(hdw, 0x9058, 0xffffffed); /*VPU ctrl*/
        ret |= pvr2_write_register(hdw, 0x9054, 0xfffffffd); /*reset hw blocks*/
        ret |= pvr2_write_register(hdw, 0x07f8, 0x80000800); /*encoder SDRAM refresh*/
        ret |= pvr2_write_register(hdw, 0x07fc, 0x0000001a); /*encoder SDRAM pre-charge*/
        ret |= pvr2_write_register(hdw, 0x0700, 0x00000000); /*I2C clock*/
        ret |= pvr2_write_register(hdw, 0xaa00, 0x00000000); /*unknown*/
        ret |= pvr2_write_register(hdw, 0xaa04, 0x00057810); /*unknown*/
        ret |= pvr2_write_register(hdw, 0xaa10, 0x00148500); /*unknown*/
        ret |= pvr2_write_register(hdw, 0xaa18, 0x00840000); /*unknown*/
        ret |= pvr2_issue_simple_cmd(hdw,FX2CMD_FWPOST1);
        ret |= pvr2_issue_simple_cmd(hdw,FX2CMD_MEMSEL | (1 << 8) | (0 << 16));

        if (ret) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "firmware2 upload prep failed, ret=%d",ret);
                release_firmware(fw_entry);
                goto done;
        }

        /* Now send firmware */

        fw_len = fw_entry->size;

        if (fw_len % sizeof(u32)) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "size of %s firmware must be a multiple of %zu bytes",
                           fw_files[fwidx],sizeof(u32));
                release_firmware(fw_entry);
                ret = -EINVAL;
                goto done;
        }

        fw_ptr = kmalloc(FIRMWARE_CHUNK_SIZE, GFP_KERNEL);
        if (fw_ptr == NULL){
                release_firmware(fw_entry);
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "failed to allocate memory for firmware2 upload");
                ret = -ENOMEM;
                goto done;
        }

        pipe = usb_sndbulkpipe(hdw->usb_dev, PVR2_FIRMWARE_ENDPOINT);

        fw_done = 0;
        for (fw_done = 0; fw_done < fw_len;) {
                bcnt = fw_len - fw_done;
                if (bcnt > FIRMWARE_CHUNK_SIZE) bcnt = FIRMWARE_CHUNK_SIZE;
                memcpy(fw_ptr, fw_entry->data + fw_done, bcnt);
                /* Usbsnoop log shows that we must swap bytes... */
                /* Some background info: The data being swapped here is a
                   firmware image destined for the mpeg encoder chip that
                   lives at the other end of a USB endpoint.  The encoder
                   chip always talks in 32 bit chunks and its storage is
                   organized into 32 bit words.  However from the file
                   system to the encoder chip everything is purely a byte
                   stream.  The firmware file's contents are always 32 bit
                   swapped from what the encoder expects.  Thus the need
                   always exists to swap the bytes regardless of the endian
                   type of the host processor and therefore swab32() makes
                   the most sense. */
                for (icnt = 0; icnt < bcnt/4 ; icnt++)
                        ((u32 *)fw_ptr)[icnt] = swab32(((u32 *)fw_ptr)[icnt]);

                ret |= usb_bulk_msg(hdw->usb_dev, pipe, fw_ptr,bcnt,
                                    &actual_length, 1000);
                ret |= (actual_length != bcnt);
                if (ret) break;
                fw_done += bcnt;
        }

        trace_firmware("upload of %s : %i / %i ",
                       fw_files[fwidx],fw_done,fw_len);

        kfree(fw_ptr);
        release_firmware(fw_entry);

        if (ret) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "firmware2 upload transfer failure");
                goto done;
        }

        /* Finish upload */

        ret |= pvr2_write_register(hdw, 0x9054, 0xffffffff); /*reset hw blocks*/
        ret |= pvr2_write_register(hdw, 0x9058, 0xffffffe8); /*VPU ctrl*/
        ret |= pvr2_issue_simple_cmd(hdw,FX2CMD_MEMSEL | (1 << 8) | (0 << 16));

        if (ret) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "firmware2 upload post-proc failure");
        }

 done:
        if (hdw->hdw_desc->signal_routing_scheme ==
            PVR2_ROUTING_SCHEME_GOTVIEW) {
                /* Ensure that GPIO 11 is set to output for GOTVIEW
                   hardware. */
                pvr2_hdw_gpio_chg_dir(hdw,(1 << 11),~0);
        }
        return ret;
}


static const char *pvr2_get_state_name(unsigned int st)
{
        if (st < ARRAY_SIZE(pvr2_state_names)) {
                return pvr2_state_names[st];
        }
        return "???";
}

static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl)
{
        /* Even though we really only care about the video decoder chip at
           this point, we'll broadcast stream on/off to all sub-devices
           anyway, just in case somebody else wants to hear the
           command... */
        pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 stream=%s",
                   (enablefl ? "on" : "off"));
        v4l2_device_call_all(&hdw->v4l2_dev, 0, video, s_stream, enablefl);
        v4l2_device_call_all(&hdw->v4l2_dev, 0, audio, s_stream, enablefl);
        if (hdw->decoder_client_id) {
                /* We get here if the encoder has been noticed.  Otherwise
                   we'll issue a warning to the user (which should
                   normally never happen). */
                return 0;
        }
        if (!hdw->flag_decoder_missed) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "***WARNING*** No decoder present");
                hdw->flag_decoder_missed = !0;
                trace_stbit("flag_decoder_missed",
                            hdw->flag_decoder_missed);
        }
        return -EIO;
}


int pvr2_hdw_get_state(struct pvr2_hdw *hdw)
{
        return hdw->master_state;
}


static int pvr2_hdw_untrip_unlocked(struct pvr2_hdw *hdw)
{
        if (!hdw->flag_tripped) return 0;
        hdw->flag_tripped = 0;
        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                   "Clearing driver error status");
        return !0;
}


int pvr2_hdw_untrip(struct pvr2_hdw *hdw)
{
        int fl;
        LOCK_TAKE(hdw->big_lock); do {
                fl = pvr2_hdw_untrip_unlocked(hdw);
        } while (0); LOCK_GIVE(hdw->big_lock);
        if (fl) pvr2_hdw_state_sched(hdw);
        return 0;
}




int pvr2_hdw_get_streaming(struct pvr2_hdw *hdw)
{
        return hdw->state_pipeline_req != 0;
}


int pvr2_hdw_set_streaming(struct pvr2_hdw *hdw,int enable_flag)
{
        int ret,st;
        LOCK_TAKE(hdw->big_lock);
        pvr2_hdw_untrip_unlocked(hdw);
        if (!enable_flag != !hdw->state_pipeline_req) {
                hdw->state_pipeline_req = enable_flag != 0;
                pvr2_trace(PVR2_TRACE_START_STOP,
                           "/*--TRACE_STREAM--*/ %s",
                           enable_flag ? "enable" : "disable");
        }
        pvr2_hdw_state_sched(hdw);
        LOCK_GIVE(hdw->big_lock);
        if ((ret = pvr2_hdw_wait(hdw,0)) < 0) return ret;
        if (enable_flag) {
                while ((st = hdw->master_state) != PVR2_STATE_RUN) {
                        if (st != PVR2_STATE_READY) return -EIO;
                        if ((ret = pvr2_hdw_wait(hdw,st)) < 0) return ret;
                }
        }
        return 0;
}


int pvr2_hdw_set_stream_type(struct pvr2_hdw *hdw,enum pvr2_config config)
{
        int fl;
        LOCK_TAKE(hdw->big_lock);
        if ((fl = (hdw->desired_stream_type != config)) != 0) {
                hdw->desired_stream_type = config;
                hdw->state_pipeline_config = 0;
                trace_stbit("state_pipeline_config",
                            hdw->state_pipeline_config);
                pvr2_hdw_state_sched(hdw);
        }
        LOCK_GIVE(hdw->big_lock);
        if (fl) return 0;
        return pvr2_hdw_wait(hdw,0);
}


static int get_default_tuner_type(struct pvr2_hdw *hdw)
{
        int unit_number = hdw->unit_number;
        int tp = -1;
        if ((unit_number >= 0) && (unit_number < PVR_NUM)) {
                tp = tuner[unit_number];
        }
        if (tp < 0) return -EINVAL;
        hdw->tuner_type = tp;
        hdw->tuner_updated = !0;
        return 0;
}


static v4l2_std_id get_default_standard(struct pvr2_hdw *hdw)
{
        int unit_number = hdw->unit_number;
        int tp = 0;
        if ((unit_number >= 0) && (unit_number < PVR_NUM)) {
                tp = video_std[unit_number];
                if (tp) return tp;
        }
        return 0;
}


static unsigned int get_default_error_tolerance(struct pvr2_hdw *hdw)
{
        int unit_number = hdw->unit_number;
        int tp = 0;
        if ((unit_number >= 0) && (unit_number < PVR_NUM)) {
                tp = tolerance[unit_number];
        }
        return tp;
}


static int pvr2_hdw_check_firmware(struct pvr2_hdw *hdw)
{
        /* Try a harmless request to fetch the eeprom's address over
           endpoint 1.  See what happens.  Only the full FX2 image can
           respond to this.  If this probe fails then likely the FX2
           firmware needs be loaded. */
        int result;
        LOCK_TAKE(hdw->ctl_lock); do {
                hdw->cmd_buffer[0] = FX2CMD_GET_EEPROM_ADDR;
                result = pvr2_send_request_ex(hdw,HZ*1,!0,
                                           hdw->cmd_buffer,1,
                                           hdw->cmd_buffer,1);
                if (result < 0) break;
        } while(0); LOCK_GIVE(hdw->ctl_lock);
        if (result) {
                pvr2_trace(PVR2_TRACE_INIT,
                           "Probe of device endpoint 1 result status %d",
                           result);
        } else {
                pvr2_trace(PVR2_TRACE_INIT,
                           "Probe of device endpoint 1 succeeded");
        }
        return result == 0;
}

struct pvr2_std_hack {
        v4l2_std_id pat;  /* Pattern to match */
        v4l2_std_id msk;  /* Which bits we care about */
        v4l2_std_id std;  /* What additional standards or default to set */
};

/* This data structure labels specific combinations of standards from
   tveeprom that we'll try to recognize.  If we recognize one, then assume
   a specified default standard to use.  This is here because tveeprom only
   tells us about available standards not the intended default standard (if
   any) for the device in question.  We guess the default based on what has
   been reported as available.  Note that this is only for guessing a
   default - which can always be overridden explicitly - and if the user
   has otherwise named a default then that default will always be used in
   place of this table. */
static const struct pvr2_std_hack std_eeprom_maps[] = {
        {        /* PAL(B/G) */
                .pat = V4L2_STD_B|V4L2_STD_GH,
                .std = V4L2_STD_PAL_B|V4L2_STD_PAL_B1|V4L2_STD_PAL_G,
        },
        {        /* NTSC(M) */
                .pat = V4L2_STD_MN,
                .std = V4L2_STD_NTSC_M,
        },
        {        /* PAL(I) */
                .pat = V4L2_STD_PAL_I,
                .std = V4L2_STD_PAL_I,
        },
        {        /* SECAM(L/L') */
                .pat = V4L2_STD_SECAM_L|V4L2_STD_SECAM_LC,
                .std = V4L2_STD_SECAM_L|V4L2_STD_SECAM_LC,
        },
        {        /* PAL(D/D1/K) */
                .pat = V4L2_STD_DK,
                .std = V4L2_STD_PAL_D|V4L2_STD_PAL_D1|V4L2_STD_PAL_K,
        },
};

static void pvr2_hdw_setup_std(struct pvr2_hdw *hdw)
{
        char buf[40];
        unsigned int bcnt;
        v4l2_std_id std1,std2,std3;

        std1 = get_default_standard(hdw);
        std3 = std1 ? 0 : hdw->hdw_desc->default_std_mask;

        bcnt = pvr2_std_id_to_str(buf,sizeof(buf),hdw->std_mask_eeprom);
        pvr2_trace(PVR2_TRACE_STD,
                   "Supported video standard(s) reported available in hardware: %.*s",
                   bcnt,buf);

        hdw->std_mask_avail = hdw->std_mask_eeprom;

        std2 = (std1|std3) & ~hdw->std_mask_avail;
        if (std2) {
                bcnt = pvr2_std_id_to_str(buf,sizeof(buf),std2);
                pvr2_trace(PVR2_TRACE_STD,
                           "Expanding supported video standards to include: %.*s",
                           bcnt,buf);
                hdw->std_mask_avail |= std2;
        }

        hdw->std_info_cur.def.type_bitmask.valid_bits = hdw->std_mask_avail;

        if (std1) {
                bcnt = pvr2_std_id_to_str(buf,sizeof(buf),std1);
                pvr2_trace(PVR2_TRACE_STD,
                           "Initial video standard forced to %.*s",
                           bcnt,buf);
                hdw->std_mask_cur = std1;
                hdw->std_dirty = !0;
                return;
        }
        if (std3) {
                bcnt = pvr2_std_id_to_str(buf,sizeof(buf),std3);
                pvr2_trace(PVR2_TRACE_STD,
                           "Initial video standard (determined by device type): %.*s",
                           bcnt, buf);
                hdw->std_mask_cur = std3;
                hdw->std_dirty = !0;
                return;
        }

        {
                unsigned int idx;
                for (idx = 0; idx < ARRAY_SIZE(std_eeprom_maps); idx++) {
                        if (std_eeprom_maps[idx].msk ?
                            ((std_eeprom_maps[idx].pat ^
                             hdw->std_mask_eeprom) &
                             std_eeprom_maps[idx].msk) :
                            (std_eeprom_maps[idx].pat !=
                             hdw->std_mask_eeprom)) continue;
                        bcnt = pvr2_std_id_to_str(buf,sizeof(buf),
                                                  std_eeprom_maps[idx].std);
                        pvr2_trace(PVR2_TRACE_STD,
                                   "Initial video standard guessed as %.*s",
                                   bcnt,buf);
                        hdw->std_mask_cur = std_eeprom_maps[idx].std;
                        hdw->std_dirty = !0;
                        return;
                }
        }

}


static unsigned int pvr2_copy_i2c_addr_list(
        unsigned short *dst, const unsigned char *src,
        unsigned int dst_max)
{
        unsigned int cnt = 0;
        if (!src) return 0;
        while (src[cnt] && (cnt + 1) < dst_max) {
                dst[cnt] = src[cnt];
                cnt++;
        }
        dst[cnt] = I2C_CLIENT_END;
        return cnt;
}


static void pvr2_hdw_cx25840_vbi_hack(struct pvr2_hdw *hdw)
{
        /*
          Mike Isely <isely@pobox.com> 19-Nov-2006 - This bit of nuttiness
          for cx25840 causes that module to correctly set up its video
          scaling.  This is really a problem in the cx25840 module itself,
          but we work around it here.  The problem has not been seen in
          ivtv because there VBI is supported and set up.  We don't do VBI
          here (at least not yet) and thus we never attempted to even set
          it up.
        */
        struct v4l2_format fmt;
        if (hdw->decoder_client_id != PVR2_CLIENT_ID_CX25840) {
                /* We're not using a cx25840 so don't enable the hack */
                return;
        }

        pvr2_trace(PVR2_TRACE_INIT,
                   "Module ID %u: Executing cx25840 VBI hack",
                   hdw->decoder_client_id);
        memset(&fmt, 0, sizeof(fmt));
        fmt.type = V4L2_BUF_TYPE_SLICED_VBI_CAPTURE;
        fmt.fmt.sliced.service_lines[0][21] = V4L2_SLICED_CAPTION_525;
        fmt.fmt.sliced.service_lines[1][21] = V4L2_SLICED_CAPTION_525;
        v4l2_device_call_all(&hdw->v4l2_dev, hdw->decoder_client_id,
                             vbi, s_sliced_fmt, &fmt.fmt.sliced);
}


static int pvr2_hdw_load_subdev(struct pvr2_hdw *hdw,
                                const struct pvr2_device_client_desc *cd)
{
        const char *fname;
        unsigned char mid;
        struct v4l2_subdev *sd;
        unsigned int i2ccnt;
        const unsigned char *p;
        /* Arbitrary count - max # i2c addresses we will probe */
        unsigned short i2caddr[25];

        mid = cd->module_id;
        fname = (mid < ARRAY_SIZE(module_names)) ? module_names[mid] : NULL;
        if (!fname) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Module ID %u for device %s has no name?  The driver might have a configuration problem.",
                           mid,
                           hdw->hdw_desc->description);
                return -EINVAL;
        }
        pvr2_trace(PVR2_TRACE_INIT,
                   "Module ID %u (%s) for device %s being loaded...",
                   mid, fname,
                   hdw->hdw_desc->description);

        i2ccnt = pvr2_copy_i2c_addr_list(i2caddr, cd->i2c_address_list,
                                         ARRAY_SIZE(i2caddr));
        if (!i2ccnt && ((p = (mid < ARRAY_SIZE(module_i2c_addresses)) ?
                         module_i2c_addresses[mid] : NULL) != NULL)) {
                /* Second chance: Try default i2c address list */
                i2ccnt = pvr2_copy_i2c_addr_list(i2caddr, p,
                                                 ARRAY_SIZE(i2caddr));
                if (i2ccnt) {
                        pvr2_trace(PVR2_TRACE_INIT,
                                   "Module ID %u: Using default i2c address list",
                                   mid);
                }
        }

        if (!i2ccnt) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Module ID %u (%s) for device %s: No i2c addresses.        The driver might have a configuration problem.",
                           mid, fname, hdw->hdw_desc->description);
                return -EINVAL;
        }

        if (i2ccnt == 1) {
                pvr2_trace(PVR2_TRACE_INIT,
                           "Module ID %u: Setting up with specified i2c address 0x%x",
                           mid, i2caddr[0]);
                sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
                                         fname, i2caddr[0], NULL);
        } else {
                pvr2_trace(PVR2_TRACE_INIT,
                           "Module ID %u: Setting up with address probe list",
                           mid);
                sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
                                         fname, 0, i2caddr);
        }

        if (!sd) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Module ID %u (%s) for device %s failed to load.  Possible missing sub-device kernel module or initialization failure within module.",
                           mid, fname, hdw->hdw_desc->description);
                return -EIO;
        }

        /* Tag this sub-device instance with the module ID we know about.
           In other places we'll use that tag to determine if the instance
           requires special handling. */
        sd->grp_id = mid;

        pvr2_trace(PVR2_TRACE_INFO, "Attached sub-driver %s", fname);


        /* client-specific setup... */
        switch (mid) {
        case PVR2_CLIENT_ID_CX25840:
        case PVR2_CLIENT_ID_SAA7115:
                hdw->decoder_client_id = mid;
                break;
        default: break;
        }

        return 0;
}


static void pvr2_hdw_load_modules(struct pvr2_hdw *hdw)
{
        unsigned int idx;
        const struct pvr2_string_table *cm;
        const struct pvr2_device_client_table *ct;
        int okFl = !0;

        cm = &hdw->hdw_desc->client_modules;
        for (idx = 0; idx < cm->cnt; idx++) {
                request_module(cm->lst[idx]);
        }

        ct = &hdw->hdw_desc->client_table;
        for (idx = 0; idx < ct->cnt; idx++) {
                if (pvr2_hdw_load_subdev(hdw, &ct->lst[idx]) < 0) okFl = 0;
        }
        if (!okFl) {
                hdw->flag_modulefail = !0;
                pvr2_hdw_render_useless(hdw);
        }
}


static void pvr2_hdw_setup_low(struct pvr2_hdw *hdw)
{
        int ret;
        unsigned int idx;
        struct pvr2_ctrl *cptr;
        int reloadFl = 0;
        if (hdw->hdw_desc->fx2_firmware.cnt) {
                if (!reloadFl) {
                        reloadFl =
                                (hdw->usb_intf->cur_altsetting->desc.bNumEndpoints
                                 == 0);
                        if (reloadFl) {
                                pvr2_trace(PVR2_TRACE_INIT,
                                           "USB endpoint config looks strange; possibly firmware needs to be loaded");
                        }
                }
                if (!reloadFl) {
                        reloadFl = !pvr2_hdw_check_firmware(hdw);
                        if (reloadFl) {
                                pvr2_trace(PVR2_TRACE_INIT,
                                           "Check for FX2 firmware failed; possibly firmware needs to be loaded");
                        }
                }
                if (reloadFl) {
                        if (pvr2_upload_firmware1(hdw) != 0) {
                                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                           "Failure uploading firmware1");
                        }
                        return;
                }
        }
        hdw->fw1_state = FW1_STATE_OK;

        if (!pvr2_hdw_dev_ok(hdw)) return;

        hdw->force_dirty = !0;

        if (!hdw->hdw_desc->flag_no_powerup) {
                pvr2_hdw_cmd_powerup(hdw);
                if (!pvr2_hdw_dev_ok(hdw)) return;
        }

        /* Take the IR chip out of reset, if appropriate */
        if (hdw->ir_scheme_active == PVR2_IR_SCHEME_ZILOG) {
                pvr2_issue_simple_cmd(hdw,
                                      FX2CMD_HCW_ZILOG_RESET |
                                      (1 << 8) |
                                      ((0) << 16));
        }

        /* This step MUST happen after the earlier powerup step */
        pvr2_i2c_core_init(hdw);
        if (!pvr2_hdw_dev_ok(hdw)) return;

        /* Reset demod only on Hauppauge 160xxx platform */
        if (le16_to_cpu(hdw->usb_dev->descriptor.idVendor) == 0x2040 &&
            (le16_to_cpu(hdw->usb_dev->descriptor.idProduct) == 0x7502 ||
             le16_to_cpu(hdw->usb_dev->descriptor.idProduct) == 0x7510)) {
                pr_info("%s(): resetting 160xxx demod\n", __func__);
                /* TODO: not sure this is proper place to reset once only */
                pvr2_issue_simple_cmd(hdw,
                                      FX2CMD_HCW_DEMOD_RESET_PIN |
                                      (1 << 8) |
                                      ((0) << 16));
                usleep_range(10000, 10500);
                pvr2_issue_simple_cmd(hdw,
                                      FX2CMD_HCW_DEMOD_RESET_PIN |
                                      (1 << 8) |
                                      ((1) << 16));
                usleep_range(10000, 10500);
        }

        pvr2_hdw_load_modules(hdw);
        if (!pvr2_hdw_dev_ok(hdw)) return;

        v4l2_device_call_all(&hdw->v4l2_dev, 0, core, load_fw);

        for (idx = 0; idx < CTRLDEF_COUNT; idx++) {
                cptr = hdw->controls + idx;
                if (cptr->info->skip_init) continue;
                if (!cptr->info->set_value) continue;
                cptr->info->set_value(cptr,~0,cptr->info->default_value);
        }

        pvr2_hdw_cx25840_vbi_hack(hdw);

        /* Set up special default values for the television and radio
           frequencies here.  It's not really important what these defaults
           are, but I set them to something usable in the Chicago area just
           to make driver testing a little easier. */

        hdw->freqValTelevision = default_tv_freq;
        hdw->freqValRadio = default_radio_freq;

        // Do not use pvr2_reset_ctl_endpoints() here.  It is not
        // thread-safe against the normal pvr2_send_request() mechanism.
        // (We should make it thread safe).

        if (hdw->hdw_desc->flag_has_hauppauge_rom) {
                ret = pvr2_hdw_get_eeprom_addr(hdw);
                if (!pvr2_hdw_dev_ok(hdw)) return;
                if (ret < 0) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Unable to determine location of eeprom, skipping");
                } else {
                        hdw->eeprom_addr = ret;
                        pvr2_eeprom_analyze(hdw);
                        if (!pvr2_hdw_dev_ok(hdw)) return;
                }
        } else {
                hdw->tuner_type = hdw->hdw_desc->default_tuner_type;
                hdw->tuner_updated = !0;
                hdw->std_mask_eeprom = V4L2_STD_ALL;
        }

        if (hdw->serial_number) {
                idx = scnprintf(hdw->identifier, sizeof(hdw->identifier) - 1,
                                "sn-%lu", hdw->serial_number);
        } else if (hdw->unit_number >= 0) {
                idx = scnprintf(hdw->identifier, sizeof(hdw->identifier) - 1,
                                "unit-%c",
                                hdw->unit_number + 'a');
        } else {
                idx = scnprintf(hdw->identifier, sizeof(hdw->identifier) - 1,
                                "unit-??");
        }
        hdw->identifier[idx] = 0;

        pvr2_hdw_setup_std(hdw);

        if (!get_default_tuner_type(hdw)) {
                pvr2_trace(PVR2_TRACE_INIT,
                           "pvr2_hdw_setup: Tuner type overridden to %d",
                           hdw->tuner_type);
        }


        if (!pvr2_hdw_dev_ok(hdw)) return;

        if (hdw->hdw_desc->signal_routing_scheme ==
            PVR2_ROUTING_SCHEME_GOTVIEW) {
                /* Ensure that GPIO 11 is set to output for GOTVIEW
                   hardware. */
                pvr2_hdw_gpio_chg_dir(hdw,(1 << 11),~0);
        }

        pvr2_hdw_commit_setup(hdw);

        hdw->vid_stream = pvr2_stream_create();
        if (!pvr2_hdw_dev_ok(hdw)) return;
        pvr2_trace(PVR2_TRACE_INIT,
                   "pvr2_hdw_setup: video stream is %p",hdw->vid_stream);
        if (hdw->vid_stream) {
                idx = get_default_error_tolerance(hdw);
                if (idx) {
                        pvr2_trace(PVR2_TRACE_INIT,
                                   "pvr2_hdw_setup: video stream %p setting tolerance %u",
                                   hdw->vid_stream,idx);
                }
                pvr2_stream_setup(hdw->vid_stream,hdw->usb_dev,
                                  PVR2_VID_ENDPOINT,idx);
        }

        if (!pvr2_hdw_dev_ok(hdw)) return;

        hdw->flag_init_ok = !0;

        pvr2_hdw_state_sched(hdw);
}


/* Set up the structure and attempt to put the device into a usable state.
   This can be a time-consuming operation, which is why it is not done
   internally as part of the create() step. */
static void pvr2_hdw_setup(struct pvr2_hdw *hdw)
{
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_setup(hdw=%p) begin",hdw);
        do {
                pvr2_hdw_setup_low(hdw);
                pvr2_trace(PVR2_TRACE_INIT,
                           "pvr2_hdw_setup(hdw=%p) done, ok=%d init_ok=%d",
                           hdw,pvr2_hdw_dev_ok(hdw),hdw->flag_init_ok);
                if (pvr2_hdw_dev_ok(hdw)) {
                        if (hdw->flag_init_ok) {
                                pvr2_trace(
                                        PVR2_TRACE_INFO,
                                        "Device initialization completed successfully.");
                                break;
                        }
                        if (hdw->fw1_state == FW1_STATE_RELOAD) {
                                pvr2_trace(
                                        PVR2_TRACE_INFO,
                                        "Device microcontroller firmware (re)loaded; it should now reset and reconnect.");
                                break;
                        }
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "Device initialization was not successful.");
                        if (hdw->fw1_state == FW1_STATE_MISSING) {
                                pvr2_trace(
                                        PVR2_TRACE_ERROR_LEGS,
                                        "Giving up since device microcontroller firmware appears to be missing.");
                                break;
                        }
                }
                if (hdw->flag_modulefail) {
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "***WARNING*** pvrusb2 driver initialization failed due to the failure of one or more sub-device kernel modules.");
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "You need to resolve the failing condition before this driver can function.  There should be some earlier messages giving more information about the problem.");
                        break;
                }
                if (procreload) {
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "Attempting pvrusb2 recovery by reloading primary firmware.");
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "If this works, device should disconnect and reconnect in a sane state.");
                        hdw->fw1_state = FW1_STATE_UNKNOWN;
                        pvr2_upload_firmware1(hdw);
                } else {
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "***WARNING*** pvrusb2 device hardware appears to be jammed and I can't clear it.");
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "You might need to power cycle the pvrusb2 device in order to recover.");
                }
        } while (0);
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_setup(hdw=%p) end",hdw);
}


/* Perform second stage initialization.  Set callback pointer first so that
   we can avoid a possible initialization race (if the kernel thread runs
   before the callback has been set). */
int pvr2_hdw_initialize(struct pvr2_hdw *hdw,
                        void (*callback_func)(void *),
                        void *callback_data)
{
        LOCK_TAKE(hdw->big_lock); do {
                if (hdw->flag_disconnected) {
                        /* Handle a race here: If we're already
                           disconnected by this point, then give up.  If we
                           get past this then we'll remain connected for
                           the duration of initialization since the entire
                           initialization sequence is now protected by the
                           big_lock. */
                        break;
                }
                hdw->state_data = callback_data;
                hdw->state_func = callback_func;
                pvr2_hdw_setup(hdw);
        } while (0); LOCK_GIVE(hdw->big_lock);
        return hdw->flag_init_ok;
}


/* Create, set up, and return a structure for interacting with the
   underlying hardware.  */
struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
                                 const struct usb_device_id *devid)
{
        unsigned int idx,cnt1,cnt2,m;
        struct pvr2_hdw *hdw = NULL;
        int valid_std_mask;
        struct pvr2_ctrl *cptr;
        struct usb_device *usb_dev;
        const struct pvr2_device_desc *hdw_desc;
        __u8 ifnum;
        struct v4l2_queryctrl qctrl;
        struct pvr2_ctl_info *ciptr;

        usb_dev = interface_to_usbdev(intf);

        hdw_desc = (const struct pvr2_device_desc *)(devid->driver_info);

        if (hdw_desc == NULL) {
                pvr2_trace(PVR2_TRACE_INIT, "pvr2_hdw_create: No device description pointer, unable to continue.");
                pvr2_trace(PVR2_TRACE_INIT,
                           "If you have a new device type, please contact Mike Isely <isely@pobox.com> to get it included in the driver");
                goto fail;
        }

        hdw = kzalloc(sizeof(*hdw),GFP_KERNEL);
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_create: hdw=%p, type \"%s\"",
                   hdw,hdw_desc->description);
        pvr2_trace(PVR2_TRACE_INFO, "Hardware description: %s",
                hdw_desc->description);
        if (hdw_desc->flag_is_experimental) {
                pvr2_trace(PVR2_TRACE_INFO, "**********");
                pvr2_trace(PVR2_TRACE_INFO,
                           "***WARNING*** Support for this device (%s) is experimental.",
                                                              hdw_desc->description);
                pvr2_trace(PVR2_TRACE_INFO,
                           "Important functionality might not be entirely working.");
                pvr2_trace(PVR2_TRACE_INFO,
                           "Please consider contacting the driver author to help with further stabilization of the driver.");
                pvr2_trace(PVR2_TRACE_INFO, "**********");
        }
        if (!hdw) goto fail;

        timer_setup(&hdw->quiescent_timer, pvr2_hdw_quiescent_timeout, 0);

        timer_setup(&hdw->decoder_stabilization_timer,
                    pvr2_hdw_decoder_stabilization_timeout, 0);

        timer_setup(&hdw->encoder_wait_timer, pvr2_hdw_encoder_wait_timeout,
                    0);

        timer_setup(&hdw->encoder_run_timer, pvr2_hdw_encoder_run_timeout, 0);

        hdw->master_state = PVR2_STATE_DEAD;

        init_waitqueue_head(&hdw->state_wait_data);

        hdw->tuner_signal_stale = !0;
        cx2341x_fill_defaults(&hdw->enc_ctl_state);

        /* Calculate which inputs are OK */
        m = 0;
        if (hdw_desc->flag_has_analogtuner) m |= 1 << PVR2_CVAL_INPUT_TV;
        if (hdw_desc->digital_control_scheme != PVR2_DIGITAL_SCHEME_NONE) {
                m |= 1 << PVR2_CVAL_INPUT_DTV;
        }
        if (hdw_desc->flag_has_svideo) m |= 1 << PVR2_CVAL_INPUT_SVIDEO;
        if (hdw_desc->flag_has_composite) m |= 1 << PVR2_CVAL_INPUT_COMPOSITE;
        if (hdw_desc->flag_has_fmradio) m |= 1 << PVR2_CVAL_INPUT_RADIO;
        hdw->input_avail_mask = m;
        hdw->input_allowed_mask = hdw->input_avail_mask;

        /* If not a hybrid device, pathway_state never changes.  So
           initialize it here to what it should forever be. */
        if (!(hdw->input_avail_mask & (1 << PVR2_CVAL_INPUT_DTV))) {
                hdw->pathway_state = PVR2_PATHWAY_ANALOG;
        } else if (!(hdw->input_avail_mask & (1 << PVR2_CVAL_INPUT_TV))) {
                hdw->pathway_state = PVR2_PATHWAY_DIGITAL;
        }

        hdw->control_cnt = CTRLDEF_COUNT;
        hdw->control_cnt += MPEGDEF_COUNT;
        hdw->controls = kcalloc(hdw->control_cnt, sizeof(struct pvr2_ctrl),
                                GFP_KERNEL);
        if (!hdw->controls) goto fail;
        hdw->hdw_desc = hdw_desc;
        hdw->ir_scheme_active = hdw->hdw_desc->ir_scheme;
        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                cptr->hdw = hdw;
        }
        for (idx = 0; idx < 32; idx++) {
                hdw->std_mask_ptrs[idx] = hdw->std_mask_names[idx];
        }
        for (idx = 0; idx < CTRLDEF_COUNT; idx++) {
                cptr = hdw->controls + idx;
                cptr->info = control_defs+idx;
        }

        /* Ensure that default input choice is a valid one. */
        m = hdw->input_avail_mask;
        if (m) for (idx = 0; idx < (sizeof(m) << 3); idx++) {
                if (!((1UL << idx) & m)) continue;
                hdw->input_val = idx;
                break;
        }

        /* Define and configure additional controls from cx2341x module. */
        hdw->mpeg_ctrl_info = kcalloc(MPEGDEF_COUNT,
                                      sizeof(*(hdw->mpeg_ctrl_info)),
                                      GFP_KERNEL);
        if (!hdw->mpeg_ctrl_info) goto fail;
        for (idx = 0; idx < MPEGDEF_COUNT; idx++) {
                cptr = hdw->controls + idx + CTRLDEF_COUNT;
                ciptr = &(hdw->mpeg_ctrl_info[idx].info);
                ciptr->desc = hdw->mpeg_ctrl_info[idx].desc;
                ciptr->name = mpeg_ids[idx].strid;
                ciptr->v4l_id = mpeg_ids[idx].id;
                ciptr->skip_init = !0;
                ciptr->get_value = ctrl_cx2341x_get;
                ciptr->get_v4lflags = ctrl_cx2341x_getv4lflags;
                ciptr->is_dirty = ctrl_cx2341x_is_dirty;
                if (!idx) ciptr->clear_dirty = ctrl_cx2341x_clear_dirty;
                qctrl.id = ciptr->v4l_id;
                cx2341x_ctrl_query(&hdw->enc_ctl_state,&qctrl);
                if (!(qctrl.flags & V4L2_CTRL_FLAG_READ_ONLY)) {
                        ciptr->set_value = ctrl_cx2341x_set;
                }
                strscpy(hdw->mpeg_ctrl_info[idx].desc, qctrl.name,
                        sizeof(hdw->mpeg_ctrl_info[idx].desc));
                ciptr->default_value = qctrl.default_value;
                switch (qctrl.type) {
                default:
                case V4L2_CTRL_TYPE_INTEGER:
                        ciptr->type = pvr2_ctl_int;
                        ciptr->def.type_int.min_value = qctrl.minimum;
                        ciptr->def.type_int.max_value = qctrl.maximum;
                        break;
                case V4L2_CTRL_TYPE_BOOLEAN:
                        ciptr->type = pvr2_ctl_bool;
                        break;
                case V4L2_CTRL_TYPE_MENU:
                        ciptr->type = pvr2_ctl_enum;
                        ciptr->def.type_enum.value_names =
                                cx2341x_ctrl_get_menu(&hdw->enc_ctl_state,
                                                                ciptr->v4l_id);
                        for (cnt1 = 0;
                             ciptr->def.type_enum.value_names[cnt1] != NULL;
                             cnt1++) { }
                        ciptr->def.type_enum.count = cnt1;
                        break;
                }
                cptr->info = ciptr;
        }

        // Initialize control data regarding video standard masks
        valid_std_mask = pvr2_std_get_usable();
        for (idx = 0; idx < 32; idx++) {
                if (!(valid_std_mask & (1UL << idx))) continue;
                cnt1 = pvr2_std_id_to_str(
                        hdw->std_mask_names[idx],
                        sizeof(hdw->std_mask_names[idx])-1,
                        1UL << idx);
                hdw->std_mask_names[idx][cnt1] = 0;
        }
        cptr = pvr2_hdw_get_ctrl_by_id(hdw,PVR2_CID_STDAVAIL);
        if (cptr) {
                memcpy(&hdw->std_info_avail,cptr->info,
                       sizeof(hdw->std_info_avail));
                cptr->info = &hdw->std_info_avail;
                hdw->std_info_avail.def.type_bitmask.bit_names =
                        hdw->std_mask_ptrs;
                hdw->std_info_avail.def.type_bitmask.valid_bits =
                        valid_std_mask;
        }
        cptr = pvr2_hdw_get_ctrl_by_id(hdw,PVR2_CID_STDCUR);
        if (cptr) {
                memcpy(&hdw->std_info_cur,cptr->info,
                       sizeof(hdw->std_info_cur));
                cptr->info = &hdw->std_info_cur;
                hdw->std_info_cur.def.type_bitmask.bit_names =
                        hdw->std_mask_ptrs;
                hdw->std_info_cur.def.type_bitmask.valid_bits =
                        valid_std_mask;
        }
        cptr = pvr2_hdw_get_ctrl_by_id(hdw,PVR2_CID_STDDETECT);
        if (cptr) {
                memcpy(&hdw->std_info_detect,cptr->info,
                       sizeof(hdw->std_info_detect));
                cptr->info = &hdw->std_info_detect;
                hdw->std_info_detect.def.type_bitmask.bit_names =
                        hdw->std_mask_ptrs;
                hdw->std_info_detect.def.type_bitmask.valid_bits =
                        valid_std_mask;
        }

        hdw->cropcap_stale = !0;
        hdw->eeprom_addr = -1;
        hdw->unit_number = -1;
        hdw->v4l_minor_number_video = -1;
        hdw->v4l_minor_number_vbi = -1;
        hdw->v4l_minor_number_radio = -1;
        hdw->ctl_write_buffer = kmalloc(PVR2_CTL_BUFFSIZE,GFP_KERNEL);
        if (!hdw->ctl_write_buffer) goto fail;
        hdw->ctl_read_buffer = kmalloc(PVR2_CTL_BUFFSIZE,GFP_KERNEL);
        if (!hdw->ctl_read_buffer) goto fail;
        hdw->ctl_write_urb = usb_alloc_urb(0,GFP_KERNEL);
        if (!hdw->ctl_write_urb) goto fail;
        hdw->ctl_read_urb = usb_alloc_urb(0,GFP_KERNEL);
        if (!hdw->ctl_read_urb) goto fail;

        if (v4l2_device_register(&intf->dev, &hdw->v4l2_dev) != 0) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Error registering with v4l core, giving up");
                goto fail;
        }
        mutex_lock(&pvr2_unit_mtx);
        do {
                for (idx = 0; idx < PVR_NUM; idx++) {
                        if (unit_pointers[idx]) continue;
                        hdw->unit_number = idx;
                        unit_pointers[idx] = hdw;
                        break;
                }
        } while (0);
        mutex_unlock(&pvr2_unit_mtx);

        INIT_WORK(&hdw->workpoll, pvr2_hdw_worker_poll);

        if (hdw->unit_number == -1)
                goto fail;

        cnt1 = 0;
        cnt2 = scnprintf(hdw->name+cnt1,sizeof(hdw->name)-cnt1,"pvrusb2");
        cnt1 += cnt2;
        if (hdw->unit_number >= 0) {
                cnt2 = scnprintf(hdw->name+cnt1,sizeof(hdw->name)-cnt1,"_%c",
                                 ('a' + hdw->unit_number));
                cnt1 += cnt2;
        }
        if (cnt1 >= sizeof(hdw->name)) cnt1 = sizeof(hdw->name)-1;
        hdw->name[cnt1] = 0;

        pvr2_trace(PVR2_TRACE_INIT,"Driver unit number is %d, name is %s",
                   hdw->unit_number,hdw->name);

        hdw->tuner_type = -1;
        hdw->flag_ok = !0;

        hdw->usb_intf = intf;
        hdw->usb_dev = usb_dev;

        usb_make_path(hdw->usb_dev, hdw->bus_info, sizeof(hdw->bus_info));

        ifnum = hdw->usb_intf->cur_altsetting->desc.bInterfaceNumber;
        usb_set_interface(hdw->usb_dev,ifnum,0);

        mutex_init(&hdw->ctl_lock_mutex);
        mutex_init(&hdw->big_lock_mutex);

        return hdw;
 fail:
        if (hdw) {
                timer_shutdown_sync(&hdw->quiescent_timer);
                timer_shutdown_sync(&hdw->decoder_stabilization_timer);
                timer_shutdown_sync(&hdw->encoder_run_timer);
                timer_shutdown_sync(&hdw->encoder_wait_timer);
                flush_work(&hdw->workpoll);
                v4l2_device_unregister(&hdw->v4l2_dev);
                usb_free_urb(hdw->ctl_read_urb);
                usb_free_urb(hdw->ctl_write_urb);
                kfree(hdw->ctl_read_buffer);
                kfree(hdw->ctl_write_buffer);
                kfree(hdw->controls);
                kfree(hdw->mpeg_ctrl_info);
                kfree(hdw);
        }
        return NULL;
}


/* Remove _all_ associations between this driver and the underlying USB
   layer. */
static void pvr2_hdw_remove_usb_stuff(struct pvr2_hdw *hdw)
{
        if (hdw->flag_disconnected) return;
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_remove_usb_stuff: hdw=%p",hdw);
        if (hdw->ctl_read_urb) {
                usb_kill_urb(hdw->ctl_read_urb);
                usb_free_urb(hdw->ctl_read_urb);
                hdw->ctl_read_urb = NULL;
        }
        if (hdw->ctl_write_urb) {
                usb_kill_urb(hdw->ctl_write_urb);
                usb_free_urb(hdw->ctl_write_urb);
                hdw->ctl_write_urb = NULL;
        }
        if (hdw->ctl_read_buffer) {
                kfree(hdw->ctl_read_buffer);
                hdw->ctl_read_buffer = NULL;
        }
        if (hdw->ctl_write_buffer) {
                kfree(hdw->ctl_write_buffer);
                hdw->ctl_write_buffer = NULL;
        }
        hdw->flag_disconnected = !0;
        /* If we don't do this, then there will be a dangling struct device
           reference to our disappearing device persisting inside the V4L
           core... */
        v4l2_device_disconnect(&hdw->v4l2_dev);
        hdw->usb_dev = NULL;
        hdw->usb_intf = NULL;
        pvr2_hdw_render_useless(hdw);
}

void pvr2_hdw_set_v4l2_dev(struct pvr2_hdw *hdw, struct video_device *vdev)
{
        vdev->v4l2_dev = &hdw->v4l2_dev;
}

/* Destroy hardware interaction structure */
void pvr2_hdw_destroy(struct pvr2_hdw *hdw)
{
        if (!hdw) return;
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_destroy: hdw=%p",hdw);
        flush_work(&hdw->workpoll);
        timer_shutdown_sync(&hdw->quiescent_timer);
        timer_shutdown_sync(&hdw->decoder_stabilization_timer);
        timer_shutdown_sync(&hdw->encoder_run_timer);
        timer_shutdown_sync(&hdw->encoder_wait_timer);
        if (hdw->fw_buffer) {
                kfree(hdw->fw_buffer);
                hdw->fw_buffer = NULL;
        }
        if (hdw->vid_stream) {
                pvr2_stream_destroy(hdw->vid_stream);
                hdw->vid_stream = NULL;
        }
        v4l2_device_unregister(&hdw->v4l2_dev);
        pvr2_hdw_disconnect(hdw);
        mutex_lock(&pvr2_unit_mtx);
        do {
                if ((hdw->unit_number >= 0) &&
                    (hdw->unit_number < PVR_NUM) &&
                    (unit_pointers[hdw->unit_number] == hdw)) {
                        unit_pointers[hdw->unit_number] = NULL;
                }
        } while (0);
        mutex_unlock(&pvr2_unit_mtx);
        kfree(hdw->controls);
        kfree(hdw->mpeg_ctrl_info);
        kfree(hdw);
}


int pvr2_hdw_dev_ok(struct pvr2_hdw *hdw)
{
        return (hdw && hdw->flag_ok);
}


/* Called when hardware has been unplugged */
void pvr2_hdw_disconnect(struct pvr2_hdw *hdw)
{
        pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_disconnect(hdw=%p)",hdw);
        LOCK_TAKE(hdw->big_lock);
        pvr2_i2c_core_done(hdw);
        LOCK_TAKE(hdw->ctl_lock);
        pvr2_hdw_remove_usb_stuff(hdw);
        LOCK_GIVE(hdw->ctl_lock);
        LOCK_GIVE(hdw->big_lock);
}


/* Get the number of defined controls */
unsigned int pvr2_hdw_get_ctrl_count(struct pvr2_hdw *hdw)
{
        return hdw->control_cnt;
}


/* Retrieve a control handle given its index (0..count-1) */
struct pvr2_ctrl *pvr2_hdw_get_ctrl_by_index(struct pvr2_hdw *hdw,
                                             unsigned int idx)
{
        if (idx >= hdw->control_cnt) return NULL;
        return hdw->controls + idx;
}


/* Retrieve a control handle given its index (0..count-1) */
struct pvr2_ctrl *pvr2_hdw_get_ctrl_by_id(struct pvr2_hdw *hdw,
                                          unsigned int ctl_id)
{
        struct pvr2_ctrl *cptr;
        unsigned int idx;
        int i;

        /* This could be made a lot more efficient, but for now... */
        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                i = cptr->info->internal_id;
                if (i && (i == ctl_id)) return cptr;
        }
        return NULL;
}


/* Given a V4L ID, retrieve the control structure associated with it. */
struct pvr2_ctrl *pvr2_hdw_get_ctrl_v4l(struct pvr2_hdw *hdw,unsigned int ctl_id)
{
        struct pvr2_ctrl *cptr;
        unsigned int idx;
        int i;

        /* This could be made a lot more efficient, but for now... */
        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                i = cptr->info->v4l_id;
                if (i && (i == ctl_id)) return cptr;
        }
        return NULL;
}


/* Given a V4L ID for its immediate predecessor, retrieve the control
   structure associated with it. */
struct pvr2_ctrl *pvr2_hdw_get_ctrl_nextv4l(struct pvr2_hdw *hdw,
                                            unsigned int ctl_id)
{
        struct pvr2_ctrl *cptr,*cp2;
        unsigned int idx;
        int i;

        /* This could be made a lot more efficient, but for now... */
        cp2 = NULL;
        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                i = cptr->info->v4l_id;
                if (!i) continue;
                if (i <= ctl_id) continue;
                if (cp2 && (cp2->info->v4l_id < i)) continue;
                cp2 = cptr;
        }
        return cp2;
        return NULL;
}


static const char *get_ctrl_typename(enum pvr2_ctl_type tp)
{
        switch (tp) {
        case pvr2_ctl_int: return "integer";
        case pvr2_ctl_enum: return "enum";
        case pvr2_ctl_bool: return "boolean";
        case pvr2_ctl_bitmask: return "bitmask";
        }
        return "";
}


static void pvr2_subdev_set_control(struct pvr2_hdw *hdw, int id,
                                    const char *name, int val)
{
        struct v4l2_control ctrl;
        struct v4l2_subdev *sd;

        pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 %s=%d", name, val);
        memset(&ctrl, 0, sizeof(ctrl));
        ctrl.id = id;
        ctrl.value = val;

        v4l2_device_for_each_subdev(sd, &hdw->v4l2_dev)
                v4l2_s_ctrl(NULL, sd->ctrl_handler, &ctrl);
}

#define PVR2_SUBDEV_SET_CONTROL(hdw, id, lab) \
        if ((hdw)->lab##_dirty || (hdw)->force_dirty) {                \
                pvr2_subdev_set_control(hdw, id, #lab, (hdw)->lab##_val); \
        }

static v4l2_std_id pvr2_hdw_get_detected_std(struct pvr2_hdw *hdw)
{
        v4l2_std_id std;
        std = (v4l2_std_id)hdw->std_mask_avail;
        v4l2_device_call_all(&hdw->v4l2_dev, 0,
                             video, querystd, &std);
        return std;
}

/* Execute whatever commands are required to update the state of all the
   sub-devices so that they match our current control values. */
static void pvr2_subdev_update(struct pvr2_hdw *hdw)
{
        struct v4l2_subdev *sd;
        unsigned int id;
        pvr2_subdev_update_func fp;

        pvr2_trace(PVR2_TRACE_CHIPS, "subdev update...");

        if (hdw->tuner_updated || hdw->force_dirty) {
                struct tuner_setup setup;
                pvr2_trace(PVR2_TRACE_CHIPS, "subdev tuner set_type(%d)",
                           hdw->tuner_type);
                if (((int)(hdw->tuner_type)) >= 0) {
                        memset(&setup, 0, sizeof(setup));
                        setup.addr = ADDR_UNSET;
                        setup.type = hdw->tuner_type;
                        setup.mode_mask = T_RADIO | T_ANALOG_TV;
                        v4l2_device_call_all(&hdw->v4l2_dev, 0,
                                             tuner, s_type_addr, &setup);
                }
        }

        if (hdw->input_dirty || hdw->std_dirty || hdw->force_dirty) {
                pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 set_standard");
                if (hdw->input_val == PVR2_CVAL_INPUT_RADIO) {
                        v4l2_device_call_all(&hdw->v4l2_dev, 0,
                                             tuner, s_radio);
                } else {
                        v4l2_std_id vs;
                        vs = hdw->std_mask_cur;
                        v4l2_device_call_all(&hdw->v4l2_dev, 0,
                                             video, s_std, vs);
                        pvr2_hdw_cx25840_vbi_hack(hdw);
                }
                hdw->tuner_signal_stale = !0;
                hdw->cropcap_stale = !0;
        }

        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_BRIGHTNESS, brightness);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_CONTRAST, contrast);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_SATURATION, saturation);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_HUE, hue);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_AUDIO_MUTE, mute);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_AUDIO_VOLUME, volume);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_AUDIO_BALANCE, balance);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_AUDIO_BASS, bass);
        PVR2_SUBDEV_SET_CONTROL(hdw, V4L2_CID_AUDIO_TREBLE, treble);

        if (hdw->input_dirty || hdw->audiomode_dirty || hdw->force_dirty) {
                struct v4l2_tuner vt;
                memset(&vt, 0, sizeof(vt));
                vt.type = (hdw->input_val == PVR2_CVAL_INPUT_RADIO) ?
                        V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
                vt.audmode = hdw->audiomode_val;
                v4l2_device_call_all(&hdw->v4l2_dev, 0, tuner, s_tuner, &vt);
        }

        if (hdw->freqDirty || hdw->force_dirty) {
                unsigned long fv;
                struct v4l2_frequency freq;
                fv = pvr2_hdw_get_cur_freq(hdw);
                pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 set_freq(%lu)", fv);
                if (hdw->tuner_signal_stale) pvr2_hdw_status_poll(hdw);
                memset(&freq, 0, sizeof(freq));
                if (hdw->tuner_signal_info.capability & V4L2_TUNER_CAP_LOW) {
                        /* ((fv * 1000) / 62500) */
                        freq.frequency = (fv * 2) / 125;
                } else {
                        freq.frequency = fv / 62500;
                }
                /* tuner-core currently doesn't seem to care about this, but
                   let's set it anyway for completeness. */
                if (hdw->input_val == PVR2_CVAL_INPUT_RADIO) {
                        freq.type = V4L2_TUNER_RADIO;
                } else {
                        freq.type = V4L2_TUNER_ANALOG_TV;
                }
                freq.tuner = 0;
                v4l2_device_call_all(&hdw->v4l2_dev, 0, tuner,
                                     s_frequency, &freq);
        }

        if (hdw->res_hor_dirty || hdw->res_ver_dirty || hdw->force_dirty) {
                struct v4l2_subdev_format format = {
                        .which = V4L2_SUBDEV_FORMAT_ACTIVE,
                };

                format.format.width = hdw->res_hor_val;
                format.format.height = hdw->res_ver_val;
                format.format.code = MEDIA_BUS_FMT_FIXED;
                pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 set_size(%dx%d)",
                           format.format.width, format.format.height);
                v4l2_device_call_all(&hdw->v4l2_dev, 0, pad, set_fmt,
                                     NULL, &format);
        }

        if (hdw->srate_dirty || hdw->force_dirty) {
                u32 val;
                pvr2_trace(PVR2_TRACE_CHIPS, "subdev v4l2 set_audio %d",
                           hdw->srate_val);
                switch (hdw->srate_val) {
                default:
                case V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000:
                        val = 48000;
                        break;
                case V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100:
                        val = 44100;
                        break;
                case V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000:
                        val = 32000;
                        break;
                }
                v4l2_device_call_all(&hdw->v4l2_dev, 0,
                                     audio, s_clock_freq, val);
        }

        /* Unable to set crop parameters; there is apparently no equivalent
           for VIDIOC_S_CROP */

        v4l2_device_for_each_subdev(sd, &hdw->v4l2_dev) {
                id = sd->grp_id;
                if (id >= ARRAY_SIZE(pvr2_module_update_functions)) continue;
                fp = pvr2_module_update_functions[id];
                if (!fp) continue;
                (*fp)(hdw, sd);
        }

        if (hdw->tuner_signal_stale || hdw->cropcap_stale) {
                pvr2_hdw_status_poll(hdw);
        }
}


/* Figure out if we need to commit control changes.  If so, mark internal
   state flags to indicate this fact and return true.  Otherwise do nothing
   else and return false. */
static int pvr2_hdw_commit_setup(struct pvr2_hdw *hdw)
{
        unsigned int idx;
        struct pvr2_ctrl *cptr;
        int value;
        int commit_flag = hdw->force_dirty;
        char buf[100];
        unsigned int bcnt,ccnt;

        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                if (!cptr->info->is_dirty) continue;
                if (!cptr->info->is_dirty(cptr)) continue;
                commit_flag = !0;

                if (!(pvrusb2_debug & PVR2_TRACE_CTL)) continue;
                bcnt = scnprintf(buf,sizeof(buf),"\"%s\" <-- ",
                                 cptr->info->name);
                value = 0;
                cptr->info->get_value(cptr,&value);
                pvr2_ctrl_value_to_sym_internal(cptr,~0,value,
                                                buf+bcnt,
                                                sizeof(buf)-bcnt,&ccnt);
                bcnt += ccnt;
                bcnt += scnprintf(buf+bcnt,sizeof(buf)-bcnt," <%s>",
                                  get_ctrl_typename(cptr->info->type));
                pvr2_trace(PVR2_TRACE_CTL,
                           "/*--TRACE_COMMIT--*/ %.*s",
                           bcnt,buf);
        }

        if (!commit_flag) {
                /* Nothing has changed */
                return 0;
        }

        hdw->state_pipeline_config = 0;
        trace_stbit("state_pipeline_config",hdw->state_pipeline_config);
        pvr2_hdw_state_sched(hdw);

        return !0;
}


/* Perform all operations needed to commit all control changes.  This must
   be performed in synchronization with the pipeline state and is thus
   expected to be called as part of the driver's worker thread.  Return
   true if commit successful, otherwise return false to indicate that
   commit isn't possible at this time. */
static int pvr2_hdw_commit_execute(struct pvr2_hdw *hdw)
{
        unsigned int idx;
        struct pvr2_ctrl *cptr;
        int disruptive_change;

        if (hdw->input_dirty && hdw->state_pathway_ok &&
            (((hdw->input_val == PVR2_CVAL_INPUT_DTV) ?
              PVR2_PATHWAY_DIGITAL : PVR2_PATHWAY_ANALOG) !=
             hdw->pathway_state)) {
                /* Change of mode being asked for... */
                hdw->state_pathway_ok = 0;
                trace_stbit("state_pathway_ok", hdw->state_pathway_ok);
        }
        if (!hdw->state_pathway_ok) {
                /* Can't commit anything until pathway is ok. */
                return 0;
        }

        /* Handle some required side effects when the video standard is
           changed.... */
        if (hdw->std_dirty) {
                int nvres;
                int gop_size;
                if (hdw->std_mask_cur & V4L2_STD_525_60) {
                        nvres = 480;
                        gop_size = 15;
                } else {
                        nvres = 576;
                        gop_size = 12;
                }
                /* Rewrite the vertical resolution to be appropriate to the
                   video standard that has been selected. */
                if (nvres != hdw->res_ver_val) {
                        hdw->res_ver_val = nvres;
                        hdw->res_ver_dirty = !0;
                }
                /* Rewrite the GOP size to be appropriate to the video
                   standard that has been selected. */
                if (gop_size != hdw->enc_ctl_state.video_gop_size) {
                        struct v4l2_ext_controls cs;
                        struct v4l2_ext_control c1;
                        memset(&cs, 0, sizeof(cs));
                        memset(&c1, 0, sizeof(c1));
                        cs.controls = &c1;
                        cs.count = 1;
                        c1.id = V4L2_CID_MPEG_VIDEO_GOP_SIZE;
                        c1.value = gop_size;
                        cx2341x_ext_ctrls(&hdw->enc_ctl_state, 0, &cs,
                                          VIDIOC_S_EXT_CTRLS);
                }
        }

        /* The broadcast decoder can only scale down, so if
         * res_*_dirty && crop window < output format ==> enlarge crop.
         *
         * The mpeg encoder receives fields of res_hor_val dots and
         * res_ver_val halflines.  Limits: hor<=720, ver<=576.
         */
        if (hdw->res_hor_dirty && hdw->cropw_val < hdw->res_hor_val) {
                hdw->cropw_val = hdw->res_hor_val;
                hdw->cropw_dirty = !0;
        } else if (hdw->cropw_dirty) {
                hdw->res_hor_dirty = !0;           /* must rescale */
                hdw->res_hor_val = min(720, hdw->cropw_val);
        }
        if (hdw->res_ver_dirty && hdw->croph_val < hdw->res_ver_val) {
                hdw->croph_val = hdw->res_ver_val;
                hdw->croph_dirty = !0;
        } else if (hdw->croph_dirty) {
                int nvres = hdw->std_mask_cur & V4L2_STD_525_60 ? 480 : 576;
                hdw->res_ver_dirty = !0;
                hdw->res_ver_val = min(nvres, hdw->croph_val);
        }

        /* If any of the below has changed, then we can't do the update
           while the pipeline is running.  Pipeline must be paused first
           and decoder -> encoder connection be made quiescent before we
           can proceed. */
        disruptive_change =
                (hdw->std_dirty ||
                 hdw->enc_unsafe_stale ||
                 hdw->srate_dirty ||
                 hdw->res_ver_dirty ||
                 hdw->res_hor_dirty ||
                 hdw->cropw_dirty ||
                 hdw->croph_dirty ||
                 hdw->input_dirty ||
                 (hdw->active_stream_type != hdw->desired_stream_type));
        if (disruptive_change && !hdw->state_pipeline_idle) {
                /* Pipeline is not idle; we can't proceed.  Arrange to
                   cause pipeline to stop so that we can try this again
                   later.... */
                hdw->state_pipeline_pause = !0;
                return 0;
        }

        if (hdw->srate_dirty) {
                /* Write new sample rate into control structure since
                 * the master copy is stale.  We must track srate
                 * separate from the mpeg control structure because
                 * other logic also uses this value. */
                struct v4l2_ext_controls cs;
                struct v4l2_ext_control c1;
                memset(&cs,0,sizeof(cs));
                memset(&c1,0,sizeof(c1));
                cs.controls = &c1;
                cs.count = 1;
                c1.id = V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ;
                c1.value = hdw->srate_val;
                cx2341x_ext_ctrls(&hdw->enc_ctl_state, 0, &cs,VIDIOC_S_EXT_CTRLS);
        }

        if (hdw->active_stream_type != hdw->desired_stream_type) {
                /* Handle any side effects of stream config here */
                hdw->active_stream_type = hdw->desired_stream_type;
        }

        if (hdw->hdw_desc->signal_routing_scheme ==
            PVR2_ROUTING_SCHEME_GOTVIEW) {
                u32 b;
                /* Handle GOTVIEW audio switching */
                pvr2_hdw_gpio_get_out(hdw,&b);
                if (hdw->input_val == PVR2_CVAL_INPUT_RADIO) {
                        /* Set GPIO 11 */
                        pvr2_hdw_gpio_chg_out(hdw,(1 << 11),~0);
                } else {
                        /* Clear GPIO 11 */
                        pvr2_hdw_gpio_chg_out(hdw,(1 << 11),0);
                }
        }

        /* Check and update state for all sub-devices. */
        pvr2_subdev_update(hdw);

        hdw->tuner_updated = 0;
        hdw->force_dirty = 0;
        for (idx = 0; idx < hdw->control_cnt; idx++) {
                cptr = hdw->controls + idx;
                if (!cptr->info->clear_dirty) continue;
                cptr->info->clear_dirty(cptr);
        }

        if ((hdw->pathway_state == PVR2_PATHWAY_ANALOG) &&
            hdw->state_encoder_run) {
                /* If encoder isn't running or it can't be touched, then
                   this will get worked out later when we start the
                   encoder. */
                if (pvr2_encoder_adjust(hdw) < 0) return !0;
        }

        hdw->state_pipeline_config = !0;
        /* Hardware state may have changed in a way to cause the cropping
           capabilities to have changed.  So mark it stale, which will
           cause a later re-fetch. */
        trace_stbit("state_pipeline_config",hdw->state_pipeline_config);
        return !0;
}


int pvr2_hdw_commit_ctl(struct pvr2_hdw *hdw)
{
        int fl;
        LOCK_TAKE(hdw->big_lock);
        fl = pvr2_hdw_commit_setup(hdw);
        LOCK_GIVE(hdw->big_lock);
        if (!fl) return 0;
        return pvr2_hdw_wait(hdw,0);
}


static void pvr2_hdw_worker_poll(struct work_struct *work)
{
        int fl = 0;
        struct pvr2_hdw *hdw = container_of(work,struct pvr2_hdw,workpoll);
        LOCK_TAKE(hdw->big_lock); do {
                fl = pvr2_hdw_state_eval(hdw);
        } while (0); LOCK_GIVE(hdw->big_lock);
        if (fl && hdw->state_func) {
                hdw->state_func(hdw->state_data);
        }
}


static int pvr2_hdw_wait(struct pvr2_hdw *hdw,int state)
{
        return wait_event_interruptible(
                hdw->state_wait_data,
                (hdw->state_stale == 0) &&
                (!state || (hdw->master_state != state)));
}


/* Return name for this driver instance */
const char *pvr2_hdw_get_driver_name(struct pvr2_hdw *hdw)
{
        return hdw->name;
}


const char *pvr2_hdw_get_desc(struct pvr2_hdw *hdw)
{
        return hdw->hdw_desc->description;
}


const char *pvr2_hdw_get_type(struct pvr2_hdw *hdw)
{
        return hdw->hdw_desc->shortname;
}


int pvr2_hdw_is_hsm(struct pvr2_hdw *hdw)
{
        int result;
        LOCK_TAKE(hdw->ctl_lock); do {
                hdw->cmd_buffer[0] = FX2CMD_GET_USB_SPEED;
                result = pvr2_send_request(hdw,
                                           hdw->cmd_buffer,1,
                                           hdw->cmd_buffer,1);
                if (result < 0) break;
                result = (hdw->cmd_buffer[0] != 0);
        } while(0); LOCK_GIVE(hdw->ctl_lock);
        return result;
}


/* Execute poll of tuner status */
void pvr2_hdw_execute_tuner_poll(struct pvr2_hdw *hdw)
{
        LOCK_TAKE(hdw->big_lock); do {
                pvr2_hdw_status_poll(hdw);
        } while (0); LOCK_GIVE(hdw->big_lock);
}


static int pvr2_hdw_check_cropcap(struct pvr2_hdw *hdw)
{
        if (!hdw->cropcap_stale) {
                return 0;
        }
        pvr2_hdw_status_poll(hdw);
        if (hdw->cropcap_stale) {
                return -EIO;
        }
        return 0;
}


/* Return information about cropping capabilities */
int pvr2_hdw_get_cropcap(struct pvr2_hdw *hdw, struct v4l2_cropcap *pp)
{
        int stat = 0;
        LOCK_TAKE(hdw->big_lock);
        stat = pvr2_hdw_check_cropcap(hdw);
        if (!stat) {
                memcpy(pp, &hdw->cropcap_info, sizeof(hdw->cropcap_info));
        }
        LOCK_GIVE(hdw->big_lock);
        return stat;
}


/* Return information about the tuner */
int pvr2_hdw_get_tuner_status(struct pvr2_hdw *hdw,struct v4l2_tuner *vtp)
{
        LOCK_TAKE(hdw->big_lock);
        do {
                if (hdw->tuner_signal_stale) {
                        pvr2_hdw_status_poll(hdw);
                }
                memcpy(vtp,&hdw->tuner_signal_info,sizeof(struct v4l2_tuner));
        } while (0);
        LOCK_GIVE(hdw->big_lock);
        return 0;
}


/* Get handle to video output stream */
struct pvr2_stream *pvr2_hdw_get_video_stream(struct pvr2_hdw *hp)
{
        return hp->vid_stream;
}


void pvr2_hdw_trigger_module_log(struct pvr2_hdw *hdw)
{
        int nr = pvr2_hdw_get_unit_number(hdw);
        LOCK_TAKE(hdw->big_lock);
        do {
                pr_info("pvrusb2: =================  START STATUS CARD #%d  =================\n", nr);
                v4l2_device_call_all(&hdw->v4l2_dev, 0, core, log_status);
                pvr2_trace(PVR2_TRACE_INFO,"cx2341x config:");
                cx2341x_log_status(&hdw->enc_ctl_state, "pvrusb2");
                pvr2_hdw_state_log_state(hdw);
                pr_info("pvrusb2: ==================  END STATUS CARD #%d  ==================\n", nr);
        } while (0);
        LOCK_GIVE(hdw->big_lock);
}


/* Grab EEPROM contents, needed for direct method. */
#define EEPROM_SIZE 8192
#define trace_eeprom(...) pvr2_trace(PVR2_TRACE_EEPROM,__VA_ARGS__)
static u8 *pvr2_full_eeprom_fetch(struct pvr2_hdw *hdw)
{
        struct i2c_msg msg[2];
        u8 *eeprom;
        u8 iadd[2];
        u8 addr;
        u16 eepromSize;
        unsigned int offs;
        int ret;
        int mode16 = 0;
        unsigned pcnt,tcnt;
        eeprom = kzalloc(EEPROM_SIZE, GFP_KERNEL);
        if (!eeprom) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Failed to allocate memory required to read eeprom");
                return NULL;
        }

        trace_eeprom("Value for eeprom addr from controller was 0x%x",
                     hdw->eeprom_addr);
        addr = hdw->eeprom_addr;
        /* Seems that if the high bit is set, then the *real* eeprom
           address is shifted right now bit position (noticed this in
           newer PVR USB2 hardware) */
        if (addr & 0x80) addr >>= 1;

        /* FX2 documentation states that a 16bit-addressed eeprom is
           expected if the I2C address is an odd number (yeah, this is
           strange but it's what they do) */
        mode16 = (addr & 1);
        eepromSize = (mode16 ? EEPROM_SIZE : 256);
        trace_eeprom("Examining %d byte eeprom at location 0x%x using %d bit addressing",
                     eepromSize, addr,
                     mode16 ? 16 : 8);

        msg[0].addr = addr;
        msg[0].flags = 0;
        msg[0].len = mode16 ? 2 : 1;
        msg[0].buf = iadd;
        msg[1].addr = addr;
        msg[1].flags = I2C_M_RD;

        /* We have to do the actual eeprom data fetch ourselves, because
           (1) we're only fetching part of the eeprom, and (2) if we were
           getting the whole thing our I2C driver can't grab it in one
           pass - which is what tveeprom is otherwise going to attempt */
        for (tcnt = 0; tcnt < EEPROM_SIZE; tcnt += pcnt) {
                pcnt = 16;
                if (pcnt + tcnt > EEPROM_SIZE) pcnt = EEPROM_SIZE-tcnt;
                offs = tcnt + (eepromSize - EEPROM_SIZE);
                if (mode16) {
                        iadd[0] = offs >> 8;
                        iadd[1] = offs;
                } else {
                        iadd[0] = offs;
                }
                msg[1].len = pcnt;
                msg[1].buf = eeprom+tcnt;
                if ((ret = i2c_transfer(&hdw->i2c_adap,
                                        msg,ARRAY_SIZE(msg))) != 2) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "eeprom fetch set offs err=%d",ret);
                        kfree(eeprom);
                        return NULL;
                }
        }
        return eeprom;
}


void pvr2_hdw_cpufw_set_enabled(struct pvr2_hdw *hdw,
                                int mode,
                                int enable_flag)
{
        int ret;
        u16 address;
        unsigned int pipe;
        LOCK_TAKE(hdw->big_lock);
        do {
                if ((hdw->fw_buffer == NULL) == !enable_flag) break;

                if (!enable_flag) {
                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Cleaning up after CPU firmware fetch");
                        kfree(hdw->fw_buffer);
                        hdw->fw_buffer = NULL;
                        hdw->fw_size = 0;
                        if (hdw->fw_cpu_flag) {
                                /* Now release the CPU.  It will disconnect
                                   and reconnect later. */
                                pvr2_hdw_cpureset_assert(hdw,0);
                        }
                        break;
                }

                hdw->fw_cpu_flag = (mode != 2);
                if (hdw->fw_cpu_flag) {
                        hdw->fw_size = (mode == 1) ? 0x4000 : 0x2000;
                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Preparing to suck out CPU firmware (size=%u)",
                                   hdw->fw_size);
                        hdw->fw_buffer = kzalloc(hdw->fw_size,GFP_KERNEL);
                        if (!hdw->fw_buffer) {
                                hdw->fw_size = 0;
                                break;
                        }

                        /* We have to hold the CPU during firmware upload. */
                        pvr2_hdw_cpureset_assert(hdw,1);

                        /* download the firmware from address 0000-1fff in 2048
                           (=0x800) bytes chunk. */

                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Grabbing CPU firmware");
                        pipe = usb_rcvctrlpipe(hdw->usb_dev, 0);
                        for(address = 0; address < hdw->fw_size;
                            address += 0x800) {
                                ret = usb_control_msg(hdw->usb_dev,pipe,
                                                      0xa0,0xc0,
                                                      address,0,
                                                      hdw->fw_buffer+address,
                                                      0x800,1000);
                                if (ret < 0) break;
                        }

                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Done grabbing CPU firmware");
                } else {
                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Sucking down EEPROM contents");
                        hdw->fw_buffer = pvr2_full_eeprom_fetch(hdw);
                        if (!hdw->fw_buffer) {
                                pvr2_trace(PVR2_TRACE_FIRMWARE,
                                           "EEPROM content suck failed.");
                                break;
                        }
                        hdw->fw_size = EEPROM_SIZE;
                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Done sucking down EEPROM contents");
                }
        } while (0);
        LOCK_GIVE(hdw->big_lock);
}


/* Return true if we're in a mode for retrieval CPU firmware */
int pvr2_hdw_cpufw_get_enabled(struct pvr2_hdw *hdw)
{
        return hdw->fw_buffer != NULL;
}


int pvr2_hdw_cpufw_get(struct pvr2_hdw *hdw,unsigned int offs,
                       char *buf,unsigned int cnt)
{
        int ret = -EINVAL;
        LOCK_TAKE(hdw->big_lock);
        do {
                if (!buf) break;
                if (!cnt) break;

                if (!hdw->fw_buffer) {
                        ret = -EIO;
                        break;
                }

                if (offs >= hdw->fw_size) {
                        pvr2_trace(PVR2_TRACE_FIRMWARE,
                                   "Read firmware data offs=%d EOF",
                                   offs);
                        ret = 0;
                        break;
                }

                if (offs + cnt > hdw->fw_size) cnt = hdw->fw_size - offs;

                memcpy(buf,hdw->fw_buffer+offs,cnt);

                pvr2_trace(PVR2_TRACE_FIRMWARE,
                           "Read firmware data offs=%d cnt=%d",
                           offs,cnt);
                ret = cnt;
        } while (0);
        LOCK_GIVE(hdw->big_lock);

        return ret;
}


int pvr2_hdw_v4l_get_minor_number(struct pvr2_hdw *hdw,
                                  enum pvr2_v4l_type index)
{
        switch (index) {
        case pvr2_v4l_type_video: return hdw->v4l_minor_number_video;
        case pvr2_v4l_type_vbi: return hdw->v4l_minor_number_vbi;
        case pvr2_v4l_type_radio: return hdw->v4l_minor_number_radio;
        default: return -1;
        }
}


/* Store a v4l minor device number */
void pvr2_hdw_v4l_store_minor_number(struct pvr2_hdw *hdw,
                                     enum pvr2_v4l_type index,int v)
{
        switch (index) {
        case pvr2_v4l_type_video: hdw->v4l_minor_number_video = v;break;
        case pvr2_v4l_type_vbi: hdw->v4l_minor_number_vbi = v;break;
        case pvr2_v4l_type_radio: hdw->v4l_minor_number_radio = v;break;
        default: break;
        }
}


static void pvr2_ctl_write_complete(struct urb *urb)
{
        struct pvr2_hdw *hdw = urb->context;
        hdw->ctl_write_pend_flag = 0;
        if (hdw->ctl_read_pend_flag) return;
        complete(&hdw->ctl_done);
}


static void pvr2_ctl_read_complete(struct urb *urb)
{
        struct pvr2_hdw *hdw = urb->context;
        hdw->ctl_read_pend_flag = 0;
        if (hdw->ctl_write_pend_flag) return;
        complete(&hdw->ctl_done);
}

struct hdw_timer {
        struct timer_list timer;
        struct pvr2_hdw *hdw;
};

static void pvr2_ctl_timeout(struct timer_list *t)
{
        struct hdw_timer *timer = from_timer(timer, t, timer);
        struct pvr2_hdw *hdw = timer->hdw;

        if (hdw->ctl_write_pend_flag || hdw->ctl_read_pend_flag) {
                hdw->ctl_timeout_flag = !0;
                if (hdw->ctl_write_pend_flag)
                        usb_unlink_urb(hdw->ctl_write_urb);
                if (hdw->ctl_read_pend_flag)
                        usb_unlink_urb(hdw->ctl_read_urb);
        }
}


/* Issue a command and get a response from the device.  This extended
   version includes a probe flag (which if set means that device errors
   should not be logged or treated as fatal) and a timeout in jiffies.
   This can be used to non-lethally probe the health of endpoint 1. */
static int pvr2_send_request_ex(struct pvr2_hdw *hdw,
                                unsigned int timeout,int probe_fl,
                                void *write_data,unsigned int write_len,
                                void *read_data,unsigned int read_len)
{
        unsigned int idx;
        int status = 0;
        struct hdw_timer timer = {
                .hdw = hdw,
        };

        if (!hdw->ctl_lock_held) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Attempted to execute control transfer without lock!!");
                return -EDEADLK;
        }
        if (!hdw->flag_ok && !probe_fl) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Attempted to execute control transfer when device not ok");
                return -EIO;
        }
        if (!(hdw->ctl_read_urb && hdw->ctl_write_urb)) {
                if (!probe_fl) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Attempted to execute control transfer when USB is disconnected");
                }
                return -ENOTTY;
        }

        /* Ensure that we have sane parameters */
        if (!write_data) write_len = 0;
        if (!read_data) read_len = 0;
        if (write_len > PVR2_CTL_BUFFSIZE) {
                pvr2_trace(
                        PVR2_TRACE_ERROR_LEGS,
                        "Attempted to execute %d byte control-write transfer (limit=%d)",
                        write_len,PVR2_CTL_BUFFSIZE);
                return -EINVAL;
        }
        if (read_len > PVR2_CTL_BUFFSIZE) {
                pvr2_trace(
                        PVR2_TRACE_ERROR_LEGS,
                        "Attempted to execute %d byte control-read transfer (limit=%d)",
                        write_len,PVR2_CTL_BUFFSIZE);
                return -EINVAL;
        }
        if ((!write_len) && (!read_len)) {
                pvr2_trace(
                        PVR2_TRACE_ERROR_LEGS,
                        "Attempted to execute null control transfer?");
                return -EINVAL;
        }


        hdw->cmd_debug_state = 1;
        if (write_len && write_data)
                hdw->cmd_debug_code = ((unsigned char *)write_data)[0];
        else
                hdw->cmd_debug_code = 0;
        hdw->cmd_debug_write_len = write_len;
        hdw->cmd_debug_read_len = read_len;

        /* Initialize common stuff */
        init_completion(&hdw->ctl_done);
        hdw->ctl_timeout_flag = 0;
        hdw->ctl_write_pend_flag = 0;
        hdw->ctl_read_pend_flag = 0;
        timer_setup_on_stack(&timer.timer, pvr2_ctl_timeout, 0);
        timer.timer.expires = jiffies + timeout;

        if (write_len && write_data) {
                hdw->cmd_debug_state = 2;
                /* Transfer write data to internal buffer */
                for (idx = 0; idx < write_len; idx++) {
                        hdw->ctl_write_buffer[idx] =
                                ((unsigned char *)write_data)[idx];
                }
                /* Initiate a write request */
                usb_fill_bulk_urb(hdw->ctl_write_urb,
                                  hdw->usb_dev,
                                  usb_sndbulkpipe(hdw->usb_dev,
                                                  PVR2_CTL_WRITE_ENDPOINT),
                                  hdw->ctl_write_buffer,
                                  write_len,
                                  pvr2_ctl_write_complete,
                                  hdw);
                hdw->ctl_write_urb->actual_length = 0;
                hdw->ctl_write_pend_flag = !0;
                if (usb_urb_ep_type_check(hdw->ctl_write_urb)) {
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "Invalid write control endpoint");
                        return -EINVAL;
                }
                status = usb_submit_urb(hdw->ctl_write_urb,GFP_KERNEL);
                if (status < 0) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Failed to submit write-control URB status=%d",
status);
                        hdw->ctl_write_pend_flag = 0;
                        goto done;
                }
        }

        if (read_len) {
                hdw->cmd_debug_state = 3;
                memset(hdw->ctl_read_buffer,0x43,read_len);
                /* Initiate a read request */
                usb_fill_bulk_urb(hdw->ctl_read_urb,
                                  hdw->usb_dev,
                                  usb_rcvbulkpipe(hdw->usb_dev,
                                                  PVR2_CTL_READ_ENDPOINT),
                                  hdw->ctl_read_buffer,
                                  read_len,
                                  pvr2_ctl_read_complete,
                                  hdw);
                hdw->ctl_read_urb->actual_length = 0;
                hdw->ctl_read_pend_flag = !0;
                if (usb_urb_ep_type_check(hdw->ctl_read_urb)) {
                        pvr2_trace(
                                PVR2_TRACE_ERROR_LEGS,
                                "Invalid read control endpoint");
                        return -EINVAL;
                }
                status = usb_submit_urb(hdw->ctl_read_urb,GFP_KERNEL);
                if (status < 0) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Failed to submit read-control URB status=%d",
status);
                        hdw->ctl_read_pend_flag = 0;
                        goto done;
                }
        }

        /* Start timer */
        add_timer(&timer.timer);

        /* Now wait for all I/O to complete */
        hdw->cmd_debug_state = 4;
        while (hdw->ctl_write_pend_flag || hdw->ctl_read_pend_flag) {
                wait_for_completion(&hdw->ctl_done);
        }
        hdw->cmd_debug_state = 5;

        /* Stop timer */
        del_timer_sync(&timer.timer);

        hdw->cmd_debug_state = 6;
        status = 0;

        if (hdw->ctl_timeout_flag) {
                status = -ETIMEDOUT;
                if (!probe_fl) {
                        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                   "Timed out control-write");
                }
                goto done;
        }

        if (write_len) {
                /* Validate results of write request */
                if ((hdw->ctl_write_urb->status != 0) &&
                    (hdw->ctl_write_urb->status != -ENOENT) &&
                    (hdw->ctl_write_urb->status != -ESHUTDOWN) &&
                    (hdw->ctl_write_urb->status != -ECONNRESET)) {
                        /* USB subsystem is reporting some kind of failure
                           on the write */
                        status = hdw->ctl_write_urb->status;
                        if (!probe_fl) {
                                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                           "control-write URB failure, status=%d",
                                           status);
                        }
                        goto done;
                }
                if (hdw->ctl_write_urb->actual_length < write_len) {
                        /* Failed to write enough data */
                        status = -EIO;
                        if (!probe_fl) {
                                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                           "control-write URB short, expected=%d got=%d",
                                           write_len,
                                           hdw->ctl_write_urb->actual_length);
                        }
                        goto done;
                }
        }
        if (read_len && read_data) {
                /* Validate results of read request */
                if ((hdw->ctl_read_urb->status != 0) &&
                    (hdw->ctl_read_urb->status != -ENOENT) &&
                    (hdw->ctl_read_urb->status != -ESHUTDOWN) &&
                    (hdw->ctl_read_urb->status != -ECONNRESET)) {
                        /* USB subsystem is reporting some kind of failure
                           on the read */
                        status = hdw->ctl_read_urb->status;
                        if (!probe_fl) {
                                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                           "control-read URB failure, status=%d",
                                           status);
                        }
                        goto done;
                }
                if (hdw->ctl_read_urb->actual_length < read_len) {
                        /* Failed to read enough data */
                        status = -EIO;
                        if (!probe_fl) {
                                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                                           "control-read URB short, expected=%d got=%d",
                                           read_len,
                                           hdw->ctl_read_urb->actual_length);
                        }
                        goto done;
                }
                /* Transfer retrieved data out from internal buffer */
                for (idx = 0; idx < read_len; idx++) {
                        ((unsigned char *)read_data)[idx] =
                                hdw->ctl_read_buffer[idx];
                }
        }

 done:

        hdw->cmd_debug_state = 0;
        if ((status < 0) && (!probe_fl)) {
                pvr2_hdw_render_useless(hdw);
        }
        destroy_timer_on_stack(&timer.timer);

        return status;
}


int pvr2_send_request(struct pvr2_hdw *hdw,
                      void *write_data,unsigned int write_len,
                      void *read_data,unsigned int read_len)
{
        return pvr2_send_request_ex(hdw,HZ*4,0,
                                    write_data,write_len,
                                    read_data,read_len);
}


static int pvr2_issue_simple_cmd(struct pvr2_hdw *hdw,u32 cmdcode)
{
        int ret;
        unsigned int cnt = 1;
        unsigned int args = 0;
        LOCK_TAKE(hdw->ctl_lock);
        hdw->cmd_buffer[0] = cmdcode & 0xffu;
        args = (cmdcode >> 8) & 0xffu;
        args = (args > 2) ? 2 : args;
        if (args) {
                cnt += args;
                hdw->cmd_buffer[1] = (cmdcode >> 16) & 0xffu;
                if (args > 1) {
                        hdw->cmd_buffer[2] = (cmdcode >> 24) & 0xffu;
                }
        }
        if (pvrusb2_debug & PVR2_TRACE_INIT) {
                unsigned int idx;
                unsigned int ccnt,bcnt;
                char tbuf[50];
                cmdcode &= 0xffu;
                bcnt = 0;
                ccnt = scnprintf(tbuf+bcnt,
                                 sizeof(tbuf)-bcnt,
                                 "Sending FX2 command 0x%x",cmdcode);
                bcnt += ccnt;
                for (idx = 0; idx < ARRAY_SIZE(pvr2_fx2cmd_desc); idx++) {
                        if (pvr2_fx2cmd_desc[idx].id == cmdcode) {
                                ccnt = scnprintf(tbuf+bcnt,
                                                 sizeof(tbuf)-bcnt,
                                                 " \"%s\"",
                                                 pvr2_fx2cmd_desc[idx].desc);
                                bcnt += ccnt;
                                break;
                        }
                }
                if (args) {
                        ccnt = scnprintf(tbuf+bcnt,
                                         sizeof(tbuf)-bcnt,
                                         " (%u",hdw->cmd_buffer[1]);
                        bcnt += ccnt;
                        if (args > 1) {
                                ccnt = scnprintf(tbuf+bcnt,
                                                 sizeof(tbuf)-bcnt,
                                                 ",%u",hdw->cmd_buffer[2]);
                                bcnt += ccnt;
                        }
                        ccnt = scnprintf(tbuf+bcnt,
                                         sizeof(tbuf)-bcnt,
                                         ")");
                        bcnt += ccnt;
                }
                pvr2_trace(PVR2_TRACE_INIT,"%.*s",bcnt,tbuf);
        }
        ret = pvr2_send_request(hdw,hdw->cmd_buffer,cnt,NULL,0);
        LOCK_GIVE(hdw->ctl_lock);
        return ret;
}


int pvr2_write_register(struct pvr2_hdw *hdw, u16 reg, u32 data)
{
        int ret;

        LOCK_TAKE(hdw->ctl_lock);

        hdw->cmd_buffer[0] = FX2CMD_REG_WRITE;  /* write register prefix */
        PVR2_DECOMPOSE_LE(hdw->cmd_buffer,1,data);
        hdw->cmd_buffer[5] = 0;
        hdw->cmd_buffer[6] = (reg >> 8) & 0xff;
        hdw->cmd_buffer[7] = reg & 0xff;


        ret = pvr2_send_request(hdw, hdw->cmd_buffer, 8, hdw->cmd_buffer, 0);

        LOCK_GIVE(hdw->ctl_lock);

        return ret;
}


static int pvr2_read_register(struct pvr2_hdw *hdw, u16 reg, u32 *data)
{
        int ret = 0;

        LOCK_TAKE(hdw->ctl_lock);

        hdw->cmd_buffer[0] = FX2CMD_REG_READ;  /* read register prefix */
        hdw->cmd_buffer[1] = 0;
        hdw->cmd_buffer[2] = 0;
        hdw->cmd_buffer[3] = 0;
        hdw->cmd_buffer[4] = 0;
        hdw->cmd_buffer[5] = 0;
        hdw->cmd_buffer[6] = (reg >> 8) & 0xff;
        hdw->cmd_buffer[7] = reg & 0xff;

        ret |= pvr2_send_request(hdw, hdw->cmd_buffer, 8, hdw->cmd_buffer, 4);
        *data = PVR2_COMPOSE_LE(hdw->cmd_buffer,0);

        LOCK_GIVE(hdw->ctl_lock);

        return ret;
}


void pvr2_hdw_render_useless(struct pvr2_hdw *hdw)
{
        if (!hdw->flag_ok) return;
        pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                   "Device being rendered inoperable");
        if (hdw->vid_stream) {
                pvr2_stream_setup(hdw->vid_stream,NULL,0,0);
        }
        hdw->flag_ok = 0;
        trace_stbit("flag_ok",hdw->flag_ok);
        pvr2_hdw_state_sched(hdw);
}


void pvr2_hdw_device_reset(struct pvr2_hdw *hdw)
{
        int ret;
        pvr2_trace(PVR2_TRACE_INIT,"Performing a device reset...");
        ret = usb_lock_device_for_reset(hdw->usb_dev,NULL);
        if (ret == 0) {
                ret = usb_reset_device(hdw->usb_dev);
                usb_unlock_device(hdw->usb_dev);
        } else {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Failed to lock USB device ret=%d",ret);
        }
        if (init_pause_msec) {
                pvr2_trace(PVR2_TRACE_INFO,
                           "Waiting %u msec for hardware to settle",
                           init_pause_msec);
                msleep(init_pause_msec);
        }

}


void pvr2_hdw_cpureset_assert(struct pvr2_hdw *hdw,int val)
{
        char *da;
        unsigned int pipe;
        int ret;

        if (!hdw->usb_dev) return;

        da = kmalloc(16, GFP_KERNEL);

        if (da == NULL) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Unable to allocate memory to control CPU reset");
                return;
        }

        pvr2_trace(PVR2_TRACE_INIT,"cpureset_assert(%d)",val);

        da[0] = val ? 0x01 : 0x00;

        /* Write the CPUCS register on the 8051.  The lsb of the register
           is the reset bit; a 1 asserts reset while a 0 clears it. */
        pipe = usb_sndctrlpipe(hdw->usb_dev, 0);
        ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,1000);
        if (ret < 0) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "cpureset_assert(%d) error=%d",val,ret);
                pvr2_hdw_render_useless(hdw);
        }

        kfree(da);
}


int pvr2_hdw_cmd_deep_reset(struct pvr2_hdw *hdw)
{
        return pvr2_issue_simple_cmd(hdw,FX2CMD_DEEP_RESET);
}


int pvr2_hdw_cmd_powerup(struct pvr2_hdw *hdw)
{
        return pvr2_issue_simple_cmd(hdw,FX2CMD_POWER_ON);
}



int pvr2_hdw_cmd_decoder_reset(struct pvr2_hdw *hdw)
{
        pvr2_trace(PVR2_TRACE_INIT,
                   "Requesting decoder reset");
        if (hdw->decoder_client_id) {
                v4l2_device_call_all(&hdw->v4l2_dev, hdw->decoder_client_id,
                                     core, reset, 0);
                pvr2_hdw_cx25840_vbi_hack(hdw);
                return 0;
        }
        pvr2_trace(PVR2_TRACE_INIT,
                   "Unable to reset decoder: nothing attached");
        return -ENOTTY;
}


static int pvr2_hdw_cmd_hcw_demod_reset(struct pvr2_hdw *hdw, int onoff)
{
        hdw->flag_ok = !0;

        /* Use this for Hauppauge 160xxx only */
        if (le16_to_cpu(hdw->usb_dev->descriptor.idVendor) == 0x2040 &&
            (le16_to_cpu(hdw->usb_dev->descriptor.idProduct) == 0x7502 ||
             le16_to_cpu(hdw->usb_dev->descriptor.idProduct) == 0x7510)) {
                pr_debug("%s(): resetting demod on Hauppauge 160xxx platform skipped\n",
                         __func__);
                /* Can't reset 160xxx or it will trash Demod tristate */
                return pvr2_issue_simple_cmd(hdw,
                                             FX2CMD_HCW_MAKO_SLEEP_PIN |
                                             (1 << 8) |
                                             ((onoff ? 1 : 0) << 16));
        }

        return pvr2_issue_simple_cmd(hdw,
                                     FX2CMD_HCW_DEMOD_RESETIN |
                                     (1 << 8) |
                                     ((onoff ? 1 : 0) << 16));
}


static int pvr2_hdw_cmd_onair_fe_power_ctrl(struct pvr2_hdw *hdw, int onoff)
{
        hdw->flag_ok = !0;
        return pvr2_issue_simple_cmd(hdw,(onoff ?
                                          FX2CMD_ONAIR_DTV_POWER_ON :
                                          FX2CMD_ONAIR_DTV_POWER_OFF));
}


static int pvr2_hdw_cmd_onair_digital_path_ctrl(struct pvr2_hdw *hdw,
                                                int onoff)
{
        return pvr2_issue_simple_cmd(hdw,(onoff ?
                                          FX2CMD_ONAIR_DTV_STREAMING_ON :
                                          FX2CMD_ONAIR_DTV_STREAMING_OFF));
}


static void pvr2_hdw_cmd_modeswitch(struct pvr2_hdw *hdw,int digitalFl)
{
        int cmode;
        /* Compare digital/analog desired setting with current setting.  If
           they don't match, fix it... */
        cmode = (digitalFl ? PVR2_PATHWAY_DIGITAL : PVR2_PATHWAY_ANALOG);
        if (cmode == hdw->pathway_state) {
                /* They match; nothing to do */
                return;
        }

        switch (hdw->hdw_desc->digital_control_scheme) {
        case PVR2_DIGITAL_SCHEME_HAUPPAUGE:
                pvr2_hdw_cmd_hcw_demod_reset(hdw,digitalFl);
                if (cmode == PVR2_PATHWAY_ANALOG) {
                        /* If moving to analog mode, also force the decoder
                           to reset.  If no decoder is attached, then it's
                           ok to ignore this because if/when the decoder
                           attaches, it will reset itself at that time. */
                        pvr2_hdw_cmd_decoder_reset(hdw);
                }
                break;
        case PVR2_DIGITAL_SCHEME_ONAIR:
                /* Supposedly we should always have the power on whether in
                   digital or analog mode.  But for now do what appears to
                   work... */
                pvr2_hdw_cmd_onair_fe_power_ctrl(hdw,digitalFl);
                break;
        default: break;
        }

        pvr2_hdw_untrip_unlocked(hdw);
        hdw->pathway_state = cmode;
}


static void pvr2_led_ctrl_hauppauge(struct pvr2_hdw *hdw, int onoff)
{
        /* change some GPIO data
         *
         * note: bit d7 of dir appears to control the LED,
         * so we shut it off here.
         *
         */
        if (onoff) {
                pvr2_hdw_gpio_chg_dir(hdw, 0xffffffff, 0x00000481);
        } else {
                pvr2_hdw_gpio_chg_dir(hdw, 0xffffffff, 0x00000401);
        }
        pvr2_hdw_gpio_chg_out(hdw, 0xffffffff, 0x00000000);
}


typedef void (*led_method_func)(struct pvr2_hdw *,int);

static led_method_func led_methods[] = {
        [PVR2_LED_SCHEME_HAUPPAUGE] = pvr2_led_ctrl_hauppauge,
};


/* Toggle LED */
static void pvr2_led_ctrl(struct pvr2_hdw *hdw,int onoff)
{
        unsigned int scheme_id;
        led_method_func fp;

        if ((!onoff) == (!hdw->led_on)) return;

        hdw->led_on = onoff != 0;

        scheme_id = hdw->hdw_desc->led_scheme;
        if (scheme_id < ARRAY_SIZE(led_methods)) {
                fp = led_methods[scheme_id];
        } else {
                fp = NULL;
        }

        if (fp) (*fp)(hdw,onoff);
}


/* Stop / start video stream transport */
static int pvr2_hdw_cmd_usbstream(struct pvr2_hdw *hdw,int runFl)
{
        int ret;

        /* If we're in analog mode, then just issue the usual analog
           command. */
        if (hdw->pathway_state == PVR2_PATHWAY_ANALOG) {
                return pvr2_issue_simple_cmd(hdw,
                                             (runFl ?
                                              FX2CMD_STREAMING_ON :
                                              FX2CMD_STREAMING_OFF));
                /*Note: Not reached */
        }

        if (hdw->pathway_state != PVR2_PATHWAY_DIGITAL) {
                /* Whoops, we don't know what mode we're in... */
                return -EINVAL;
        }

        /* To get here we have to be in digital mode.  The mechanism here
           is unfortunately different for different vendors.  So we switch
           on the device's digital scheme attribute in order to figure out
           what to do. */
        switch (hdw->hdw_desc->digital_control_scheme) {
        case PVR2_DIGITAL_SCHEME_HAUPPAUGE:
                return pvr2_issue_simple_cmd(hdw,
                                             (runFl ?
                                              FX2CMD_HCW_DTV_STREAMING_ON :
                                              FX2CMD_HCW_DTV_STREAMING_OFF));
        case PVR2_DIGITAL_SCHEME_ONAIR:
                ret = pvr2_issue_simple_cmd(hdw,
                                            (runFl ?
                                             FX2CMD_STREAMING_ON :
                                             FX2CMD_STREAMING_OFF));
                if (ret) return ret;
                return pvr2_hdw_cmd_onair_digital_path_ctrl(hdw,runFl);
        default:
                return -EINVAL;
        }
}


/* Evaluate whether or not state_pathway_ok can change */
static int state_eval_pathway_ok(struct pvr2_hdw *hdw)
{
        if (hdw->state_pathway_ok) {
                /* Nothing to do if pathway is already ok */
                return 0;
        }
        if (!hdw->state_pipeline_idle) {
                /* Not allowed to change anything if pipeline is not idle */
                return 0;
        }
        pvr2_hdw_cmd_modeswitch(hdw,hdw->input_val == PVR2_CVAL_INPUT_DTV);
        hdw->state_pathway_ok = !0;
        trace_stbit("state_pathway_ok",hdw->state_pathway_ok);
        return !0;
}


/* Evaluate whether or not state_encoder_ok can change */
static int state_eval_encoder_ok(struct pvr2_hdw *hdw)
{
        if (hdw->state_encoder_ok) return 0;
        if (hdw->flag_tripped) return 0;
        if (hdw->state_encoder_run) return 0;
        if (hdw->state_encoder_config) return 0;
        if (hdw->state_decoder_run) return 0;
        if (hdw->state_usbstream_run) return 0;
        if (hdw->pathway_state == PVR2_PATHWAY_DIGITAL) {
                if (!hdw->hdw_desc->flag_digital_requires_cx23416) return 0;
        } else if (hdw->pathway_state != PVR2_PATHWAY_ANALOG) {
                return 0;
        }

        if (pvr2_upload_firmware2(hdw) < 0) {
                hdw->flag_tripped = !0;
                trace_stbit("flag_tripped",hdw->flag_tripped);
                return !0;
        }
        hdw->state_encoder_ok = !0;
        trace_stbit("state_encoder_ok",hdw->state_encoder_ok);
        return !0;
}


/* Evaluate whether or not state_encoder_config can change */
static int state_eval_encoder_config(struct pvr2_hdw *hdw)
{
        if (hdw->state_encoder_config) {
                if (hdw->state_encoder_ok) {
                        if (hdw->state_pipeline_req &&
                            !hdw->state_pipeline_pause) return 0;
                }
                hdw->state_encoder_config = 0;
                hdw->state_encoder_waitok = 0;
                trace_stbit("state_encoder_waitok",hdw->state_encoder_waitok);
                /* paranoia - solve race if timer just completed */
                del_timer_sync(&hdw->encoder_wait_timer);
        } else {
                if (!hdw->state_pathway_ok ||
                    (hdw->pathway_state != PVR2_PATHWAY_ANALOG) ||
                    !hdw->state_encoder_ok ||
                    !hdw->state_pipeline_idle ||
                    hdw->state_pipeline_pause ||
                    !hdw->state_pipeline_req ||
                    !hdw->state_pipeline_config) {
                        /* We must reset the enforced wait interval if
                           anything has happened that might have disturbed
                           the encoder.  This should be a rare case. */
                        if (timer_pending(&hdw->encoder_wait_timer)) {
                                del_timer_sync(&hdw->encoder_wait_timer);
                        }
                        if (hdw->state_encoder_waitok) {
                                /* Must clear the state - therefore we did
                                   something to a state bit and must also
                                   return true. */
                                hdw->state_encoder_waitok = 0;
                                trace_stbit("state_encoder_waitok",
                                            hdw->state_encoder_waitok);
                                return !0;
                        }
                        return 0;
                }
                if (!hdw->state_encoder_waitok) {
                        if (!timer_pending(&hdw->encoder_wait_timer)) {
                                /* waitok flag wasn't set and timer isn't
                                   running.  Check flag once more to avoid
                                   a race then start the timer.  This is
                                   the point when we measure out a minimal
                                   quiet interval before doing something to
                                   the encoder. */
                                if (!hdw->state_encoder_waitok) {
                                        hdw->encoder_wait_timer.expires =
                                                jiffies + msecs_to_jiffies(
                                                TIME_MSEC_ENCODER_WAIT);
                                        add_timer(&hdw->encoder_wait_timer);
                                }
                        }
                        /* We can't continue until we know we have been
                           quiet for the interval measured by this
                           timer. */
                        return 0;
                }
                pvr2_encoder_configure(hdw);
                if (hdw->state_encoder_ok) hdw->state_encoder_config = !0;
        }
        trace_stbit("state_encoder_config",hdw->state_encoder_config);
        return !0;
}


/* Return true if the encoder should not be running. */
static int state_check_disable_encoder_run(struct pvr2_hdw *hdw)
{
        if (!hdw->state_encoder_ok) {
                /* Encoder isn't healthy at the moment, so stop it. */
                return !0;
        }
        if (!hdw->state_pathway_ok) {
                /* Mode is not understood at the moment (i.e. it wants to
                   change), so encoder must be stopped. */
                return !0;
        }

        switch (hdw->pathway_state) {
        case PVR2_PATHWAY_ANALOG:
                if (!hdw->state_decoder_run) {
                        /* We're in analog mode and the decoder is not
                           running; thus the encoder should be stopped as
                           well. */
                        return !0;
                }
                break;
        case PVR2_PATHWAY_DIGITAL:
                if (hdw->state_encoder_runok) {
                        /* This is a funny case.  We're in digital mode so
                           really the encoder should be stopped.  However
                           if it really is running, only kill it after
                           runok has been set.  This gives a chance for the
                           onair quirk to function (encoder must run
                           briefly first, at least once, before onair
                           digital streaming can work). */
                        return !0;
                }
                break;
        default:
                /* Unknown mode; so encoder should be stopped. */
                return !0;
        }

        /* If we get here, we haven't found a reason to stop the
           encoder. */
        return 0;
}


/* Return true if the encoder should be running. */
static int state_check_enable_encoder_run(struct pvr2_hdw *hdw)
{
        if (!hdw->state_encoder_ok) {
                /* Don't run the encoder if it isn't healthy... */
                return 0;
        }
        if (!hdw->state_pathway_ok) {
                /* Don't run the encoder if we don't (yet) know what mode
                   we need to be in... */
                return 0;
        }

        switch (hdw->pathway_state) {
        case PVR2_PATHWAY_ANALOG:
                if (hdw->state_decoder_run && hdw->state_decoder_ready) {
                        /* In analog mode, if the decoder is running, then
                           run the encoder. */
                        return !0;
                }
                break;
        case PVR2_PATHWAY_DIGITAL:
                if ((hdw->hdw_desc->digital_control_scheme ==
                     PVR2_DIGITAL_SCHEME_ONAIR) &&
                    !hdw->state_encoder_runok) {
                        /* This is a quirk.  OnAir hardware won't stream
                           digital until the encoder has been run at least
                           once, for a minimal period of time (empiricially
                           measured to be 1/4 second).  So if we're on
                           OnAir hardware and the encoder has never been
                           run at all, then start the encoder.  Normal
                           state machine logic in the driver will
                           automatically handle the remaining bits. */
                        return !0;
                }
                break;
        default:
                /* For completeness (unknown mode; encoder won't run ever) */
                break;
        }
        /* If we get here, then we haven't found any reason to run the
           encoder, so don't run it. */
        return 0;
}


/* Evaluate whether or not state_encoder_run can change */
static int state_eval_encoder_run(struct pvr2_hdw *hdw)
{
        if (hdw->state_encoder_run) {
                if (!state_check_disable_encoder_run(hdw)) return 0;
                if (hdw->state_encoder_ok) {
                        del_timer_sync(&hdw->encoder_run_timer);
                        if (pvr2_encoder_stop(hdw) < 0) return !0;
                }
                hdw->state_encoder_run = 0;
        } else {
                if (!state_check_enable_encoder_run(hdw)) return 0;
                if (pvr2_encoder_start(hdw) < 0) return !0;
                hdw->state_encoder_run = !0;
                if (!hdw->state_encoder_runok) {
                        hdw->encoder_run_timer.expires = jiffies +
                                 msecs_to_jiffies(TIME_MSEC_ENCODER_OK);
                        add_timer(&hdw->encoder_run_timer);
                }
        }
        trace_stbit("state_encoder_run",hdw->state_encoder_run);
        return !0;
}


/* Timeout function for quiescent timer. */
static void pvr2_hdw_quiescent_timeout(struct timer_list *t)
{
        struct pvr2_hdw *hdw = from_timer(hdw, t, quiescent_timer);
        hdw->state_decoder_quiescent = !0;
        trace_stbit("state_decoder_quiescent",hdw->state_decoder_quiescent);
        hdw->state_stale = !0;
        schedule_work(&hdw->workpoll);
}


/* Timeout function for decoder stabilization timer. */
static void pvr2_hdw_decoder_stabilization_timeout(struct timer_list *t)
{
        struct pvr2_hdw *hdw = from_timer(hdw, t, decoder_stabilization_timer);
        hdw->state_decoder_ready = !0;
        trace_stbit("state_decoder_ready", hdw->state_decoder_ready);
        hdw->state_stale = !0;
        schedule_work(&hdw->workpoll);
}


/* Timeout function for encoder wait timer. */
static void pvr2_hdw_encoder_wait_timeout(struct timer_list *t)
{
        struct pvr2_hdw *hdw = from_timer(hdw, t, encoder_wait_timer);
        hdw->state_encoder_waitok = !0;
        trace_stbit("state_encoder_waitok",hdw->state_encoder_waitok);
        hdw->state_stale = !0;
        schedule_work(&hdw->workpoll);
}


/* Timeout function for encoder run timer. */
static void pvr2_hdw_encoder_run_timeout(struct timer_list *t)
{
        struct pvr2_hdw *hdw = from_timer(hdw, t, encoder_run_timer);
        if (!hdw->state_encoder_runok) {
                hdw->state_encoder_runok = !0;
                trace_stbit("state_encoder_runok",hdw->state_encoder_runok);
                hdw->state_stale = !0;
                schedule_work(&hdw->workpoll);
        }
}


/* Evaluate whether or not state_decoder_run can change */
static int state_eval_decoder_run(struct pvr2_hdw *hdw)
{
        if (hdw->state_decoder_run) {
                if (hdw->state_encoder_ok) {
                        if (hdw->state_pipeline_req &&
                            !hdw->state_pipeline_pause &&
                            hdw->state_pathway_ok) return 0;
                }
                if (!hdw->flag_decoder_missed) {
                        pvr2_decoder_enable(hdw,0);
                }
                hdw->state_decoder_quiescent = 0;
                hdw->state_decoder_run = 0;
                /* paranoia - solve race if timer(s) just completed */
                del_timer_sync(&hdw->quiescent_timer);
                /* Kill the stabilization timer, in case we're killing the
                   encoder before the previous stabilization interval has
                   been properly timed. */
                del_timer_sync(&hdw->decoder_stabilization_timer);
                hdw->state_decoder_ready = 0;
        } else {
                if (!hdw->state_decoder_quiescent) {
                        if (!timer_pending(&hdw->quiescent_timer)) {
                                /* We don't do something about the
                                   quiescent timer until right here because
                                   we also want to catch cases where the
                                   decoder was already not running (like
                                   after initialization) as opposed to
                                   knowing that we had just stopped it.
                                   The second flag check is here to cover a
                                   race - the timer could have run and set
                                   this flag just after the previous check
                                   but before we did the pending check. */
                                if (!hdw->state_decoder_quiescent) {
                                        hdw->quiescent_timer.expires =
                                                jiffies + msecs_to_jiffies(
                                                TIME_MSEC_DECODER_WAIT);
                                        add_timer(&hdw->quiescent_timer);
                                }
                        }
                        /* Don't allow decoder to start again until it has
                           been quiesced first.  This little detail should
                           hopefully further stabilize the encoder. */
                        return 0;
                }
                if (!hdw->state_pathway_ok ||
                    (hdw->pathway_state != PVR2_PATHWAY_ANALOG) ||
                    !hdw->state_pipeline_req ||
                    hdw->state_pipeline_pause ||
                    !hdw->state_pipeline_config ||
                    !hdw->state_encoder_config ||
                    !hdw->state_encoder_ok) return 0;
                del_timer_sync(&hdw->quiescent_timer);
                if (hdw->flag_decoder_missed) return 0;
                if (pvr2_decoder_enable(hdw,!0) < 0) return 0;
                hdw->state_decoder_quiescent = 0;
                hdw->state_decoder_ready = 0;
                hdw->state_decoder_run = !0;
                if (hdw->decoder_client_id == PVR2_CLIENT_ID_SAA7115) {
                        hdw->decoder_stabilization_timer.expires =
                                jiffies + msecs_to_jiffies(
                                TIME_MSEC_DECODER_STABILIZATION_WAIT);
                        add_timer(&hdw->decoder_stabilization_timer);
                } else {
                        hdw->state_decoder_ready = !0;
                }
        }
        trace_stbit("state_decoder_quiescent",hdw->state_decoder_quiescent);
        trace_stbit("state_decoder_run",hdw->state_decoder_run);
        trace_stbit("state_decoder_ready", hdw->state_decoder_ready);
        return !0;
}


/* Evaluate whether or not state_usbstream_run can change */
static int state_eval_usbstream_run(struct pvr2_hdw *hdw)
{
        if (hdw->state_usbstream_run) {
                int fl = !0;
                if (hdw->pathway_state == PVR2_PATHWAY_ANALOG) {
                        fl = (hdw->state_encoder_ok &&
                              hdw->state_encoder_run);
                } else if ((hdw->pathway_state == PVR2_PATHWAY_DIGITAL) &&
                           (hdw->hdw_desc->flag_digital_requires_cx23416)) {
                        fl = hdw->state_encoder_ok;
                }
                if (fl &&
                    hdw->state_pipeline_req &&
                    !hdw->state_pipeline_pause &&
                    hdw->state_pathway_ok) {
                        return 0;
                }
                pvr2_hdw_cmd_usbstream(hdw,0);
                hdw->state_usbstream_run = 0;
        } else {
                if (!hdw->state_pipeline_req ||
                    hdw->state_pipeline_pause ||
                    !hdw->state_pathway_ok) return 0;
                if (hdw->pathway_state == PVR2_PATHWAY_ANALOG) {
                        if (!hdw->state_encoder_ok ||
                            !hdw->state_encoder_run) return 0;
                } else if ((hdw->pathway_state == PVR2_PATHWAY_DIGITAL) &&
                           (hdw->hdw_desc->flag_digital_requires_cx23416)) {
                        if (!hdw->state_encoder_ok) return 0;
                        if (hdw->state_encoder_run) return 0;
                        if (hdw->hdw_desc->digital_control_scheme ==
                            PVR2_DIGITAL_SCHEME_ONAIR) {
                                /* OnAir digital receivers won't stream
                                   unless the analog encoder has run first.
                                   Why?  I have no idea.  But don't even
                                   try until we know the analog side is
                                   known to have run. */
                                if (!hdw->state_encoder_runok) return 0;
                        }
                }
                if (pvr2_hdw_cmd_usbstream(hdw,!0) < 0) return 0;
                hdw->state_usbstream_run = !0;
        }
        trace_stbit("state_usbstream_run",hdw->state_usbstream_run);
        return !0;
}


/* Attempt to configure pipeline, if needed */
static int state_eval_pipeline_config(struct pvr2_hdw *hdw)
{
        if (hdw->state_pipeline_config ||
            hdw->state_pipeline_pause) return 0;
        pvr2_hdw_commit_execute(hdw);
        return !0;
}


/* Update pipeline idle and pipeline pause tracking states based on other
   inputs.  This must be called whenever the other relevant inputs have
   changed. */
static int state_update_pipeline_state(struct pvr2_hdw *hdw)
{
        unsigned int st;
        int updatedFl = 0;
        /* Update pipeline state */
        st = !(hdw->state_encoder_run ||
               hdw->state_decoder_run ||
               hdw->state_usbstream_run ||
               (!hdw->state_decoder_quiescent));
        if (!st != !hdw->state_pipeline_idle) {
                hdw->state_pipeline_idle = st;
                updatedFl = !0;
        }
        if (hdw->state_pipeline_idle && hdw->state_pipeline_pause) {
                hdw->state_pipeline_pause = 0;
                updatedFl = !0;
        }
        return updatedFl;
}


typedef int (*state_eval_func)(struct pvr2_hdw *);

/* Set of functions to be run to evaluate various states in the driver. */
static const state_eval_func eval_funcs[] = {
        state_eval_pathway_ok,
        state_eval_pipeline_config,
        state_eval_encoder_ok,
        state_eval_encoder_config,
        state_eval_decoder_run,
        state_eval_encoder_run,
        state_eval_usbstream_run,
};


/* Process various states and return true if we did anything interesting. */
static int pvr2_hdw_state_update(struct pvr2_hdw *hdw)
{
        unsigned int i;
        int state_updated = 0;
        int check_flag;

        if (!hdw->state_stale) return 0;
        if ((hdw->fw1_state != FW1_STATE_OK) ||
            !hdw->flag_ok) {
                hdw->state_stale = 0;
                return !0;
        }
        /* This loop is the heart of the entire driver.  It keeps trying to
           evaluate various bits of driver state until nothing changes for
           one full iteration.  Each "bit of state" tracks some global
           aspect of the driver, e.g. whether decoder should run, if
           pipeline is configured, usb streaming is on, etc.  We separately
           evaluate each of those questions based on other driver state to
           arrive at the correct running configuration. */
        do {
                check_flag = 0;
                state_update_pipeline_state(hdw);
                /* Iterate over each bit of state */
                for (i = 0; (i<ARRAY_SIZE(eval_funcs)) && hdw->flag_ok; i++) {
                        if ((*eval_funcs[i])(hdw)) {
                                check_flag = !0;
                                state_updated = !0;
                                state_update_pipeline_state(hdw);
                        }
                }
        } while (check_flag && hdw->flag_ok);
        hdw->state_stale = 0;
        trace_stbit("state_stale",hdw->state_stale);
        return state_updated;
}


static unsigned int print_input_mask(unsigned int msk,
                                     char *buf,unsigned int acnt)
{
        unsigned int idx,ccnt;
        unsigned int tcnt = 0;
        for (idx = 0; idx < ARRAY_SIZE(control_values_input); idx++) {
                if (!((1UL << idx) & msk)) continue;
                ccnt = scnprintf(buf+tcnt,
                                 acnt-tcnt,
                                 "%s%s",
                                 (tcnt ? ", " : ""),
                                 control_values_input[idx]);
                tcnt += ccnt;
        }
        return tcnt;
}


static const char *pvr2_pathway_state_name(int id)
{
        switch (id) {
        case PVR2_PATHWAY_ANALOG: return "analog";
        case PVR2_PATHWAY_DIGITAL: return "digital";
        default: return "unknown";
        }
}


static unsigned int pvr2_hdw_report_unlocked(struct pvr2_hdw *hdw,int which,
                                             char *buf,unsigned int acnt)
{
        switch (which) {
        case 0:
                return scnprintf(
                        buf,acnt,
                        "driver:%s%s%s%s%s <mode=%s>",
                        (hdw->flag_ok ? " <ok>" : " <fail>"),
                        (hdw->flag_init_ok ? " <init>" : " <uninitialized>"),
                        (hdw->flag_disconnected ? " <disconnected>" :
                         " <connected>"),
                        (hdw->flag_tripped ? " <tripped>" : ""),
                        (hdw->flag_decoder_missed ? " <no decoder>" : ""),
                        pvr2_pathway_state_name(hdw->pathway_state));

        case 1:
                return scnprintf(
                        buf,acnt,
                        "pipeline:%s%s%s%s",
                        (hdw->state_pipeline_idle ? " <idle>" : ""),
                        (hdw->state_pipeline_config ?
                         " <configok>" : " <stale>"),
                        (hdw->state_pipeline_req ? " <req>" : ""),
                        (hdw->state_pipeline_pause ? " <pause>" : ""));
        case 2:
                return scnprintf(
                        buf,acnt,
                        "worker:%s%s%s%s%s%s%s",
                        (hdw->state_decoder_run ?
                         (hdw->state_decoder_ready ?
                          "<decode:run>" : " <decode:start>") :
                         (hdw->state_decoder_quiescent ?
                          "" : " <decode:stop>")),
                        (hdw->state_decoder_quiescent ?
                         " <decode:quiescent>" : ""),
                        (hdw->state_encoder_ok ?
                         "" : " <encode:init>"),
                        (hdw->state_encoder_run ?
                         (hdw->state_encoder_runok ?
                          " <encode:run>" :
                          " <encode:firstrun>") :
                         (hdw->state_encoder_runok ?
                          " <encode:stop>" :
                          " <encode:virgin>")),
                        (hdw->state_encoder_config ?
                         " <encode:configok>" :
                         (hdw->state_encoder_waitok ?
                          "" : " <encode:waitok>")),
                        (hdw->state_usbstream_run ?
                         " <usb:run>" : " <usb:stop>"),
                        (hdw->state_pathway_ok ?
                         " <pathway:ok>" : ""));
        case 3:
                return scnprintf(
                        buf,acnt,
                        "state: %s",
                        pvr2_get_state_name(hdw->master_state));
        case 4: {
                unsigned int tcnt = 0;
                unsigned int ccnt;

                ccnt = scnprintf(buf,
                                 acnt,
                                 "Hardware supported inputs: ");
                tcnt += ccnt;
                tcnt += print_input_mask(hdw->input_avail_mask,
                                         buf+tcnt,
                                         acnt-tcnt);
                if (hdw->input_avail_mask != hdw->input_allowed_mask) {
                        ccnt = scnprintf(buf+tcnt,
                                         acnt-tcnt,
                                         "; allowed inputs: ");
                        tcnt += ccnt;
                        tcnt += print_input_mask(hdw->input_allowed_mask,
                                                 buf+tcnt,
                                                 acnt-tcnt);
                }
                return tcnt;
        }
        case 5: {
                struct pvr2_stream_stats stats;
                if (!hdw->vid_stream) break;
                pvr2_stream_get_stats(hdw->vid_stream,
                                      &stats,
                                      0);
                return scnprintf(
                        buf,acnt,
                        "Bytes streamed=%u URBs: queued=%u idle=%u ready=%u processed=%u failed=%u",
                        stats.bytes_processed,
                        stats.buffers_in_queue,
                        stats.buffers_in_idle,
                        stats.buffers_in_ready,
                        stats.buffers_processed,
                        stats.buffers_failed);
        }
        case 6: {
                unsigned int id = hdw->ir_scheme_active;
                return scnprintf(buf, acnt, "ir scheme: id=%d %s", id,
                                 (id >= ARRAY_SIZE(ir_scheme_names) ?
                                  "?" : ir_scheme_names[id]));
        }
        default: break;
        }
        return 0;
}


/* Generate report containing info about attached sub-devices and attached
   i2c clients, including an indication of which attached i2c clients are
   actually sub-devices. */
static unsigned int pvr2_hdw_report_clients(struct pvr2_hdw *hdw,
                                            char *buf, unsigned int acnt)
{
        struct v4l2_subdev *sd;
        unsigned int tcnt = 0;
        unsigned int ccnt;
        struct i2c_client *client;
        const char *p;
        unsigned int id;

        ccnt = scnprintf(buf, acnt, "Associated v4l2-subdev drivers and I2C clients:\n");
        tcnt += ccnt;
        v4l2_device_for_each_subdev(sd, &hdw->v4l2_dev) {
                id = sd->grp_id;
                p = NULL;
                if (id < ARRAY_SIZE(module_names)) p = module_names[id];
                if (p) {
                        ccnt = scnprintf(buf + tcnt, acnt - tcnt, "  %s:", p);
                        tcnt += ccnt;
                } else {
                        ccnt = scnprintf(buf + tcnt, acnt - tcnt,
                                         "  (unknown id=%u):", id);
                        tcnt += ccnt;
                }
                client = v4l2_get_subdevdata(sd);
                if (client) {
                        ccnt = scnprintf(buf + tcnt, acnt - tcnt,
                                         " %s @ %02x\n", client->name,
                                         client->addr);
                        tcnt += ccnt;
                } else {
                        ccnt = scnprintf(buf + tcnt, acnt - tcnt,
                                         " no i2c client\n");
                        tcnt += ccnt;
                }
        }
        return tcnt;
}


unsigned int pvr2_hdw_state_report(struct pvr2_hdw *hdw,
                                   char *buf,unsigned int acnt)
{
        unsigned int bcnt,ccnt,idx;
        bcnt = 0;
        LOCK_TAKE(hdw->big_lock);
        for (idx = 0; ; idx++) {
                ccnt = pvr2_hdw_report_unlocked(hdw,idx,buf,acnt);
                if (!ccnt) break;
                bcnt += ccnt; acnt -= ccnt; buf += ccnt;
                if (!acnt) break;
                buf[0] = '\n'; ccnt = 1;
                bcnt += ccnt; acnt -= ccnt; buf += ccnt;
        }
        ccnt = pvr2_hdw_report_clients(hdw, buf, acnt);
        bcnt += ccnt; acnt -= ccnt; buf += ccnt;
        LOCK_GIVE(hdw->big_lock);
        return bcnt;
}


static void pvr2_hdw_state_log_state(struct pvr2_hdw *hdw)
{
        char buf[256];
        unsigned int idx, ccnt;
        unsigned int lcnt, ucnt;

        for (idx = 0; ; idx++) {
                ccnt = pvr2_hdw_report_unlocked(hdw,idx,buf,sizeof(buf));
                if (!ccnt) break;
                pr_info("%s %.*s\n", hdw->name, ccnt, buf);
        }
        ccnt = pvr2_hdw_report_clients(hdw, buf, sizeof(buf));
        if (ccnt >= sizeof(buf))
                ccnt = sizeof(buf);

        ucnt = 0;
        while (ucnt < ccnt) {
                lcnt = 0;
                while ((lcnt + ucnt < ccnt) && (buf[lcnt + ucnt] != '\n')) {
                        lcnt++;
                }
                pr_info("%s %.*s\n", hdw->name, lcnt, buf + ucnt);
                ucnt += lcnt + 1;
        }
}


/* Evaluate and update the driver's current state, taking various actions
   as appropriate for the update. */
static int pvr2_hdw_state_eval(struct pvr2_hdw *hdw)
{
        unsigned int st;
        int state_updated = 0;
        int callback_flag = 0;
        int analog_mode;

        pvr2_trace(PVR2_TRACE_STBITS,
                   "Drive state check START");
        if (pvrusb2_debug & PVR2_TRACE_STBITS) {
                pvr2_hdw_state_log_state(hdw);
        }

        /* Process all state and get back over disposition */
        state_updated = pvr2_hdw_state_update(hdw);

        analog_mode = (hdw->pathway_state != PVR2_PATHWAY_DIGITAL);

        /* Update master state based upon all other states. */
        if (!hdw->flag_ok) {
                st = PVR2_STATE_DEAD;
        } else if (hdw->fw1_state != FW1_STATE_OK) {
                st = PVR2_STATE_COLD;
        } else if ((analog_mode ||
                    hdw->hdw_desc->flag_digital_requires_cx23416) &&
                   !hdw->state_encoder_ok) {
                st = PVR2_STATE_WARM;
        } else if (hdw->flag_tripped ||
                   (analog_mode && hdw->flag_decoder_missed)) {
                st = PVR2_STATE_ERROR;
        } else if (hdw->state_usbstream_run &&
                   (!analog_mode ||
                    (hdw->state_encoder_run && hdw->state_decoder_run))) {
                st = PVR2_STATE_RUN;
        } else {
                st = PVR2_STATE_READY;
        }
        if (hdw->master_state != st) {
                pvr2_trace(PVR2_TRACE_STATE,
                           "Device state change from %s to %s",
                           pvr2_get_state_name(hdw->master_state),
                           pvr2_get_state_name(st));
                pvr2_led_ctrl(hdw,st == PVR2_STATE_RUN);
                hdw->master_state = st;
                state_updated = !0;
                callback_flag = !0;
        }
        if (state_updated) {
                /* Trigger anyone waiting on any state changes here. */
                wake_up(&hdw->state_wait_data);
        }

        if (pvrusb2_debug & PVR2_TRACE_STBITS) {
                pvr2_hdw_state_log_state(hdw);
        }
        pvr2_trace(PVR2_TRACE_STBITS,
                   "Drive state check DONE callback=%d",callback_flag);

        return callback_flag;
}


/* Cause kernel thread to check / update driver state */
static void pvr2_hdw_state_sched(struct pvr2_hdw *hdw)
{
        if (hdw->state_stale) return;
        hdw->state_stale = !0;
        trace_stbit("state_stale",hdw->state_stale);
        schedule_work(&hdw->workpoll);
}


int pvr2_hdw_gpio_get_dir(struct pvr2_hdw *hdw,u32 *dp)
{
        return pvr2_read_register(hdw,PVR2_GPIO_DIR,dp);
}


int pvr2_hdw_gpio_get_out(struct pvr2_hdw *hdw,u32 *dp)
{
        return pvr2_read_register(hdw,PVR2_GPIO_OUT,dp);
}


int pvr2_hdw_gpio_get_in(struct pvr2_hdw *hdw,u32 *dp)
{
        return pvr2_read_register(hdw,PVR2_GPIO_IN,dp);
}


int pvr2_hdw_gpio_chg_dir(struct pvr2_hdw *hdw,u32 msk,u32 val)
{
        u32 cval,nval;
        int ret;
        if (~msk) {
                ret = pvr2_read_register(hdw,PVR2_GPIO_DIR,&cval);
                if (ret) return ret;
                nval = (cval & ~msk) | (val & msk);
                pvr2_trace(PVR2_TRACE_GPIO,
                           "GPIO direction changing 0x%x:0x%x from 0x%x to 0x%x",
                           msk,val,cval,nval);
        } else {
                nval = val;
                pvr2_trace(PVR2_TRACE_GPIO,
                           "GPIO direction changing to 0x%x",nval);
        }
        return pvr2_write_register(hdw,PVR2_GPIO_DIR,nval);
}


int pvr2_hdw_gpio_chg_out(struct pvr2_hdw *hdw,u32 msk,u32 val)
{
        u32 cval,nval;
        int ret;
        if (~msk) {
                ret = pvr2_read_register(hdw,PVR2_GPIO_OUT,&cval);
                if (ret) return ret;
                nval = (cval & ~msk) | (val & msk);
                pvr2_trace(PVR2_TRACE_GPIO,
                           "GPIO output changing 0x%x:0x%x from 0x%x to 0x%x",
                           msk,val,cval,nval);
        } else {
                nval = val;
                pvr2_trace(PVR2_TRACE_GPIO,
                           "GPIO output changing to 0x%x",nval);
        }
        return pvr2_write_register(hdw,PVR2_GPIO_OUT,nval);
}


void pvr2_hdw_status_poll(struct pvr2_hdw *hdw)
{
        struct v4l2_tuner *vtp = &hdw->tuner_signal_info;
        memset(vtp, 0, sizeof(*vtp));
        vtp->type = (hdw->input_val == PVR2_CVAL_INPUT_RADIO) ?
                V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
        hdw->tuner_signal_stale = 0;
        /* Note: There apparently is no replacement for VIDIOC_CROPCAP
           using v4l2-subdev - therefore we can't support that AT ALL right
           now.  (Of course, no sub-drivers seem to implement it either.
           But now it's a chicken and egg problem...) */
        v4l2_device_call_all(&hdw->v4l2_dev, 0, tuner, g_tuner, vtp);
        pvr2_trace(PVR2_TRACE_CHIPS, "subdev status poll type=%u strength=%u audio=0x%x cap=0x%x low=%u hi=%u",
                   vtp->type,
                   vtp->signal, vtp->rxsubchans, vtp->capability,
                   vtp->rangelow, vtp->rangehigh);

        /* We have to do this to avoid getting into constant polling if
           there's nobody to answer a poll of cropcap info. */
        hdw->cropcap_stale = 0;
}


unsigned int pvr2_hdw_get_input_available(struct pvr2_hdw *hdw)
{
        return hdw->input_avail_mask;
}


unsigned int pvr2_hdw_get_input_allowed(struct pvr2_hdw *hdw)
{
        return hdw->input_allowed_mask;
}


static int pvr2_hdw_set_input(struct pvr2_hdw *hdw,int v)
{
        if (hdw->input_val != v) {
                hdw->input_val = v;
                hdw->input_dirty = !0;
        }

        /* Handle side effects - if we switch to a mode that needs the RF
           tuner, then select the right frequency choice as well and mark
           it dirty. */
        if (hdw->input_val == PVR2_CVAL_INPUT_RADIO) {
                hdw->freqSelector = 0;
                hdw->freqDirty = !0;
        } else if ((hdw->input_val == PVR2_CVAL_INPUT_TV) ||
                   (hdw->input_val == PVR2_CVAL_INPUT_DTV)) {
                hdw->freqSelector = 1;
                hdw->freqDirty = !0;
        }
        return 0;
}


int pvr2_hdw_set_input_allowed(struct pvr2_hdw *hdw,
                               unsigned int change_mask,
                               unsigned int change_val)
{
        int ret = 0;
        unsigned int nv,m,idx;
        LOCK_TAKE(hdw->big_lock);
        do {
                nv = hdw->input_allowed_mask & ~change_mask;
                nv |= (change_val & change_mask);
                nv &= hdw->input_avail_mask;
                if (!nv) {
                        /* No legal modes left; return error instead. */
                        ret = -EPERM;
                        break;
                }
                hdw->input_allowed_mask = nv;
                if ((1UL << hdw->input_val) & hdw->input_allowed_mask) {
                        /* Current mode is still in the allowed mask, so
                           we're done. */
                        break;
                }
                /* Select and switch to a mode that is still in the allowed
                   mask */
                if (!hdw->input_allowed_mask) {
                        /* Nothing legal; give up */
                        break;
                }
                m = hdw->input_allowed_mask;
                for (idx = 0; idx < (sizeof(m) << 3); idx++) {
                        if (!((1UL << idx) & m)) continue;
                        pvr2_hdw_set_input(hdw,idx);
                        break;
                }
        } while (0);
        LOCK_GIVE(hdw->big_lock);
        return ret;
}


/* Find I2C address of eeprom */
static int pvr2_hdw_get_eeprom_addr(struct pvr2_hdw *hdw)
{
        int result;
        LOCK_TAKE(hdw->ctl_lock); do {
                hdw->cmd_buffer[0] = FX2CMD_GET_EEPROM_ADDR;
                result = pvr2_send_request(hdw,
                                           hdw->cmd_buffer,1,
                                           hdw->cmd_buffer,1);
                if (result < 0) break;
                result = hdw->cmd_buffer[0];
        } while(0); LOCK_GIVE(hdw->ctl_lock);
        return result;
}




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_FPU_XCR_H
#define _ASM_X86_FPU_XCR_H

#define XCR_XFEATURE_ENABLED_MASK        0x00000000
#define XCR_XFEATURE_IN_USE_MASK        0x00000001

static __always_inline u64 xgetbv(u32 index)
{
        u32 eax, edx;

        asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
        return eax + ((u64)edx << 32);
}

static inline void xsetbv(u32 index, u64 value)
{
        u32 eax = value;
        u32 edx = value >> 32;

        asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}

/*
 * Return a mask of xfeatures which are currently being tracked
 * by the processor as being in the initial configuration.
 *
 * Callers should check X86_FEATURE_XGETBV1.
 */
static __always_inline u64 xfeatures_in_use(void)
{
        return xgetbv(XCR_XFEATURE_IN_USE_MASK);
}

#endif /* _ASM_X86_FPU_XCR_H */





























































































































































    1 

    1 





















































































































































































































































































































































































































































































































































































































































    4 













    4 





    4 





    4 









    4 





    4 





    4 


































































































































































































































































































































































































































































































































































    4 











































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
 * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
 *
 * This file contains the core interrupt handling code, for irq-chip based
 * architectures. Detailed information is available in
 * Documentation/core-api/genericirq.rst
 */

#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/irqdomain.h>

#include <trace/events/irq.h>

#include "internals.h"

static irqreturn_t bad_chained_irq(int irq, void *dev_id)
{
        WARN_ONCE(1, "Chained irq %d should not call an action\n", irq);
        return IRQ_NONE;
}

/*
 * Chained handlers should never call action on their IRQ. This default
 * action will emit warning if such thing happens.
 */
struct irqaction chained_action = {
        .handler = bad_chained_irq,
};

/**
 *        irq_set_chip - set the irq chip for an irq
 *        @irq:        irq number
 *        @chip:        pointer to irq chip description structure
 */
int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);

        if (!desc)
                return -EINVAL;

        desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
        irq_put_desc_unlock(desc, flags);
        /*
         * For !CONFIG_SPARSE_IRQ make the irq show up in
         * allocated_irqs.
         */
        irq_mark_irq(irq);
        return 0;
}
EXPORT_SYMBOL(irq_set_chip);

/**
 *        irq_set_irq_type - set the irq trigger type for an irq
 *        @irq:        irq number
 *        @type:        IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
 */
int irq_set_irq_type(unsigned int irq, unsigned int type)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
        int ret = 0;

        if (!desc)
                return -EINVAL;

        ret = __irq_set_trigger(desc, type);
        irq_put_desc_busunlock(desc, flags);
        return ret;
}
EXPORT_SYMBOL(irq_set_irq_type);

/**
 *        irq_set_handler_data - set irq handler data for an irq
 *        @irq:        Interrupt number
 *        @data:        Pointer to interrupt specific data
 *
 *        Set the hardware irq controller data for an irq
 */
int irq_set_handler_data(unsigned int irq, void *data)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);

        if (!desc)
                return -EINVAL;
        desc->irq_common_data.handler_data = data;
        irq_put_desc_unlock(desc, flags);
        return 0;
}
EXPORT_SYMBOL(irq_set_handler_data);

/**
 *        irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
 *        @irq_base:        Interrupt number base
 *        @irq_offset:        Interrupt number offset
 *        @entry:                Pointer to MSI descriptor data
 *
 *        Set the MSI descriptor entry for an irq at offset
 */
int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
                         struct msi_desc *entry)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);

        if (!desc)
                return -EINVAL;
        desc->irq_common_data.msi_desc = entry;
        if (entry && !irq_offset)
                entry->irq = irq_base;
        irq_put_desc_unlock(desc, flags);
        return 0;
}

/**
 *        irq_set_msi_desc - set MSI descriptor data for an irq
 *        @irq:        Interrupt number
 *        @entry:        Pointer to MSI descriptor data
 *
 *        Set the MSI descriptor entry for an irq
 */
int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
{
        return irq_set_msi_desc_off(irq, 0, entry);
}

/**
 *        irq_set_chip_data - set irq chip data for an irq
 *        @irq:        Interrupt number
 *        @data:        Pointer to chip specific data
 *
 *        Set the hardware irq chip data for an irq
 */
int irq_set_chip_data(unsigned int irq, void *data)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);

        if (!desc)
                return -EINVAL;
        desc->irq_data.chip_data = data;
        irq_put_desc_unlock(desc, flags);
        return 0;
}
EXPORT_SYMBOL(irq_set_chip_data);

struct irq_data *irq_get_irq_data(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);

        return desc ? &desc->irq_data : NULL;
}
EXPORT_SYMBOL_GPL(irq_get_irq_data);

static void irq_state_clr_disabled(struct irq_desc *desc)
{
        irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
}

static void irq_state_clr_masked(struct irq_desc *desc)
{
        irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
}

static void irq_state_clr_started(struct irq_desc *desc)
{
        irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
}

static void irq_state_set_started(struct irq_desc *desc)
{
        irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
}

enum {
        IRQ_STARTUP_NORMAL,
        IRQ_STARTUP_MANAGED,
        IRQ_STARTUP_ABORT,
};

#ifdef CONFIG_SMP
static int
__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
                      bool force)
{
        struct irq_data *d = irq_desc_get_irq_data(desc);

        if (!irqd_affinity_is_managed(d))
                return IRQ_STARTUP_NORMAL;

        irqd_clr_managed_shutdown(d);

        if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
                /*
                 * Catch code which fiddles with enable_irq() on a managed
                 * and potentially shutdown IRQ. Chained interrupt
                 * installment or irq auto probing should not happen on
                 * managed irqs either.
                 */
                if (WARN_ON_ONCE(force))
                        return IRQ_STARTUP_ABORT;
                /*
                 * The interrupt was requested, but there is no online CPU
                 * in it's affinity mask. Put it into managed shutdown
                 * state and let the cpu hotplug mechanism start it up once
                 * a CPU in the mask becomes available.
                 */
                return IRQ_STARTUP_ABORT;
        }
        /*
         * Managed interrupts have reserved resources, so this should not
         * happen.
         */
        if (WARN_ON(irq_domain_activate_irq(d, false)))
                return IRQ_STARTUP_ABORT;
        return IRQ_STARTUP_MANAGED;
}
#else
static __always_inline int
__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
                      bool force)
{
        return IRQ_STARTUP_NORMAL;
}
#endif

static int __irq_startup(struct irq_desc *desc)
{
        struct irq_data *d = irq_desc_get_irq_data(desc);
        int ret = 0;

        /* Warn if this interrupt is not activated but try nevertheless */
        WARN_ON_ONCE(!irqd_is_activated(d));

        if (d->chip->irq_startup) {
                ret = d->chip->irq_startup(d);
                irq_state_clr_disabled(desc);
                irq_state_clr_masked(desc);
        } else {
                irq_enable(desc);
        }
        irq_state_set_started(desc);
        return ret;
}

int irq_startup(struct irq_desc *desc, bool resend, bool force)
{
        struct irq_data *d = irq_desc_get_irq_data(desc);
        const struct cpumask *aff = irq_data_get_affinity_mask(d);
        int ret = 0;

        desc->depth = 0;

        if (irqd_is_started(d)) {
                irq_enable(desc);
        } else {
                switch (__irq_startup_managed(desc, aff, force)) {
                case IRQ_STARTUP_NORMAL:
                        if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)
                                irq_setup_affinity(desc);
                        ret = __irq_startup(desc);
                        if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP))
                                irq_setup_affinity(desc);
                        break;
                case IRQ_STARTUP_MANAGED:
                        irq_do_set_affinity(d, aff, false);
                        ret = __irq_startup(desc);
                        break;
                case IRQ_STARTUP_ABORT:
                        irqd_set_managed_shutdown(d);
                        return 0;
                }
        }
        if (resend)
                check_irq_resend(desc, false);

        return ret;
}

int irq_activate(struct irq_desc *desc)
{
        struct irq_data *d = irq_desc_get_irq_data(desc);

        if (!irqd_affinity_is_managed(d))
                return irq_domain_activate_irq(d, false);
        return 0;
}

int irq_activate_and_startup(struct irq_desc *desc, bool resend)
{
        if (WARN_ON(irq_activate(desc)))
                return 0;
        return irq_startup(desc, resend, IRQ_START_FORCE);
}

static void __irq_disable(struct irq_desc *desc, bool mask);

void irq_shutdown(struct irq_desc *desc)
{
        if (irqd_is_started(&desc->irq_data)) {
                clear_irq_resend(desc);
                desc->depth = 1;
                if (desc->irq_data.chip->irq_shutdown) {
                        desc->irq_data.chip->irq_shutdown(&desc->irq_data);
                        irq_state_set_disabled(desc);
                        irq_state_set_masked(desc);
                } else {
                        __irq_disable(desc, true);
                }
                irq_state_clr_started(desc);
        }
}


void irq_shutdown_and_deactivate(struct irq_desc *desc)
{
        irq_shutdown(desc);
        /*
         * This must be called even if the interrupt was never started up,
         * because the activation can happen before the interrupt is
         * available for request/startup. It has it's own state tracking so
         * it's safe to call it unconditionally.
         */
        irq_domain_deactivate_irq(&desc->irq_data);
}

void irq_enable(struct irq_desc *desc)
{
        if (!irqd_irq_disabled(&desc->irq_data)) {
                unmask_irq(desc);
        } else {
                irq_state_clr_disabled(desc);
                if (desc->irq_data.chip->irq_enable) {
                        desc->irq_data.chip->irq_enable(&desc->irq_data);
                        irq_state_clr_masked(desc);
                } else {
                        unmask_irq(desc);
                }
        }
}

static void __irq_disable(struct irq_desc *desc, bool mask)
{
        if (irqd_irq_disabled(&desc->irq_data)) {
                if (mask)
                        mask_irq(desc);
        } else {
                irq_state_set_disabled(desc);
                if (desc->irq_data.chip->irq_disable) {
                        desc->irq_data.chip->irq_disable(&desc->irq_data);
                        irq_state_set_masked(desc);
                } else if (mask) {
                        mask_irq(desc);
                }
        }
}

/**
 * irq_disable - Mark interrupt disabled
 * @desc:        irq descriptor which should be disabled
 *
 * If the chip does not implement the irq_disable callback, we
 * use a lazy disable approach. That means we mark the interrupt
 * disabled, but leave the hardware unmasked. That's an
 * optimization because we avoid the hardware access for the
 * common case where no interrupt happens after we marked it
 * disabled. If an interrupt happens, then the interrupt flow
 * handler masks the line at the hardware level and marks it
 * pending.
 *
 * If the interrupt chip does not implement the irq_disable callback,
 * a driver can disable the lazy approach for a particular irq line by
 * calling 'irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY)'. This can
 * be used for devices which cannot disable the interrupt at the
 * device level under certain circumstances and have to use
 * disable_irq[_nosync] instead.
 */
void irq_disable(struct irq_desc *desc)
{
        __irq_disable(desc, irq_settings_disable_unlazy(desc));
}

void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
{
        if (desc->irq_data.chip->irq_enable)
                desc->irq_data.chip->irq_enable(&desc->irq_data);
        else
                desc->irq_data.chip->irq_unmask(&desc->irq_data);
        cpumask_set_cpu(cpu, desc->percpu_enabled);
}

void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
{
        if (desc->irq_data.chip->irq_disable)
                desc->irq_data.chip->irq_disable(&desc->irq_data);
        else
                desc->irq_data.chip->irq_mask(&desc->irq_data);
        cpumask_clear_cpu(cpu, desc->percpu_enabled);
}

static inline void mask_ack_irq(struct irq_desc *desc)
{
        if (desc->irq_data.chip->irq_mask_ack) {
                desc->irq_data.chip->irq_mask_ack(&desc->irq_data);
                irq_state_set_masked(desc);
        } else {
                mask_irq(desc);
                if (desc->irq_data.chip->irq_ack)
                        desc->irq_data.chip->irq_ack(&desc->irq_data);
        }
}

void mask_irq(struct irq_desc *desc)
{
        if (irqd_irq_masked(&desc->irq_data))
                return;

        if (desc->irq_data.chip->irq_mask) {
                desc->irq_data.chip->irq_mask(&desc->irq_data);
                irq_state_set_masked(desc);
        }
}

void unmask_irq(struct irq_desc *desc)
{
        if (!irqd_irq_masked(&desc->irq_data))
                return;

        if (desc->irq_data.chip->irq_unmask) {
                desc->irq_data.chip->irq_unmask(&desc->irq_data);
                irq_state_clr_masked(desc);
        }
}

void unmask_threaded_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = desc->irq_data.chip;

        if (chip->flags & IRQCHIP_EOI_THREADED)
                chip->irq_eoi(&desc->irq_data);

        unmask_irq(desc);
}

/*
 *        handle_nested_irq - Handle a nested irq from a irq thread
 *        @irq:        the interrupt number
 *
 *        Handle interrupts which are nested into a threaded interrupt
 *        handler. The handler function is called inside the calling
 *        threads context.
 */
void handle_nested_irq(unsigned int irq)
{
        struct irq_desc *desc = irq_to_desc(irq);
        struct irqaction *action;
        irqreturn_t action_ret;

        might_sleep();

        raw_spin_lock_irq(&desc->lock);

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        action = desc->action;
        if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                raw_spin_unlock_irq(&desc->lock);
                return;
        }

        kstat_incr_irqs_this_cpu(desc);
        atomic_inc(&desc->threads_active);
        raw_spin_unlock_irq(&desc->lock);

        action_ret = IRQ_NONE;
        for_each_action_of_desc(desc, action)
                action_ret |= action->thread_fn(action->irq, action->dev_id);

        if (!irq_settings_no_debug(desc))
                note_interrupt(desc, action_ret);

        wake_threads_waitq(desc);
}
EXPORT_SYMBOL_GPL(handle_nested_irq);

static bool irq_check_poll(struct irq_desc *desc)
{
        if (!(desc->istate & IRQS_POLL_INPROGRESS))
                return false;
        return irq_wait_for_poll(desc);
}

static bool irq_may_run(struct irq_desc *desc)
{
        unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;

        /*
         * If the interrupt is not in progress and is not an armed
         * wakeup interrupt, proceed.
         */
        if (!irqd_has_set(&desc->irq_data, mask))
                return true;

        /*
         * If the interrupt is an armed wakeup source, mark it pending
         * and suspended, disable it and notify the pm core about the
         * event.
         */
        if (irq_pm_check_wakeup(desc))
                return false;

        /*
         * Handle a potential concurrent poll on a different core.
         */
        return irq_check_poll(desc);
}

/**
 *        handle_simple_irq - Simple and software-decoded IRQs.
 *        @desc:        the interrupt description structure for this irq
 *
 *        Simple interrupts are either sent from a demultiplexing interrupt
 *        handler or come from hardware, where no interrupt hardware control
 *        is necessary.
 *
 *        Note: The caller is expected to handle the ack, clear, mask and
 *        unmask issues if necessary.
 */
void handle_simple_irq(struct irq_desc *desc)
{
        raw_spin_lock(&desc->lock);

        if (!irq_may_run(desc))
                goto out_unlock;

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                goto out_unlock;
        }

        kstat_incr_irqs_this_cpu(desc);
        handle_irq_event(desc);

out_unlock:
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_simple_irq);

/**
 *        handle_untracked_irq - Simple and software-decoded IRQs.
 *        @desc:        the interrupt description structure for this irq
 *
 *        Untracked interrupts are sent from a demultiplexing interrupt
 *        handler when the demultiplexer does not know which device it its
 *        multiplexed irq domain generated the interrupt. IRQ's handled
 *        through here are not subjected to stats tracking, randomness, or
 *        spurious interrupt detection.
 *
 *        Note: Like handle_simple_irq, the caller is expected to handle
 *        the ack, clear, mask and unmask issues if necessary.
 */
void handle_untracked_irq(struct irq_desc *desc)
{
        raw_spin_lock(&desc->lock);

        if (!irq_may_run(desc))
                goto out_unlock;

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                goto out_unlock;
        }

        desc->istate &= ~IRQS_PENDING;
        irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
        raw_spin_unlock(&desc->lock);

        __handle_irq_event_percpu(desc);

        raw_spin_lock(&desc->lock);
        irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);

out_unlock:
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_untracked_irq);

/*
 * Called unconditionally from handle_level_irq() and only for oneshot
 * interrupts from handle_fasteoi_irq()
 */
static void cond_unmask_irq(struct irq_desc *desc)
{
        /*
         * We need to unmask in the following cases:
         * - Standard level irq (IRQF_ONESHOT is not set)
         * - Oneshot irq which did not wake the thread (caused by a
         *   spurious interrupt or a primary handler handling it
         *   completely).
         */
        if (!irqd_irq_disabled(&desc->irq_data) &&
            irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
                unmask_irq(desc);
}

/**
 *        handle_level_irq - Level type irq handler
 *        @desc:        the interrupt description structure for this irq
 *
 *        Level type interrupts are active as long as the hardware line has
 *        the active level. This may require to mask the interrupt and unmask
 *        it after the associated handler has acknowledged the device, so the
 *        interrupt line is back to inactive.
 */
void handle_level_irq(struct irq_desc *desc)
{
        raw_spin_lock(&desc->lock);
        mask_ack_irq(desc);

        if (!irq_may_run(desc))
                goto out_unlock;

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        /*
         * If its disabled or no action available
         * keep it masked and get out of here
         */
        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                goto out_unlock;
        }

        kstat_incr_irqs_this_cpu(desc);
        handle_irq_event(desc);

        cond_unmask_irq(desc);

out_unlock:
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);

static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
{
        if (!(desc->istate & IRQS_ONESHOT)) {
                chip->irq_eoi(&desc->irq_data);
                return;
        }
        /*
         * We need to unmask in the following cases:
         * - Oneshot irq which did not wake the thread (caused by a
         *   spurious interrupt or a primary handler handling it
         *   completely).
         */
        if (!irqd_irq_disabled(&desc->irq_data) &&
            irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) {
                chip->irq_eoi(&desc->irq_data);
                unmask_irq(desc);
        } else if (!(chip->flags & IRQCHIP_EOI_THREADED)) {
                chip->irq_eoi(&desc->irq_data);
        }
}

/**
 *        handle_fasteoi_irq - irq handler for transparent controllers
 *        @desc:        the interrupt description structure for this irq
 *
 *        Only a single callback will be issued to the chip: an ->eoi()
 *        call when the interrupt has been serviced. This enables support
 *        for modern forms of interrupt handlers, which handle the flow
 *        details in hardware, transparently.
 */
void handle_fasteoi_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = desc->irq_data.chip;

        raw_spin_lock(&desc->lock);

        /*
         * When an affinity change races with IRQ handling, the next interrupt
         * can arrive on the new CPU before the original CPU has completed
         * handling the previous one - it may need to be resent.
         */
        if (!irq_may_run(desc)) {
                if (irqd_needs_resend_when_in_progress(&desc->irq_data))
                        desc->istate |= IRQS_PENDING;
                goto out;
        }

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        /*
         * If its disabled or no action available
         * then mask it and get out of here:
         */
        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                mask_irq(desc);
                goto out;
        }

        kstat_incr_irqs_this_cpu(desc);
        if (desc->istate & IRQS_ONESHOT)
                mask_irq(desc);

        handle_irq_event(desc);

        cond_unmask_eoi_irq(desc, chip);

        /*
         * When the race described above happens this will resend the interrupt.
         */
        if (unlikely(desc->istate & IRQS_PENDING))
                check_irq_resend(desc, false);

        raw_spin_unlock(&desc->lock);
        return;
out:
        if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
                chip->irq_eoi(&desc->irq_data);
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_irq);

/**
 *        handle_fasteoi_nmi - irq handler for NMI interrupt lines
 *        @desc:        the interrupt description structure for this irq
 *
 *        A simple NMI-safe handler, considering the restrictions
 *        from request_nmi.
 *
 *        Only a single callback will be issued to the chip: an ->eoi()
 *        call when the interrupt has been serviced. This enables support
 *        for modern forms of interrupt handlers, which handle the flow
 *        details in hardware, transparently.
 */
void handle_fasteoi_nmi(struct irq_desc *desc)
{
        struct irq_chip *chip = irq_desc_get_chip(desc);
        struct irqaction *action = desc->action;
        unsigned int irq = irq_desc_get_irq(desc);
        irqreturn_t res;

        __kstat_incr_irqs_this_cpu(desc);

        trace_irq_handler_entry(irq, action);
        /*
         * NMIs cannot be shared, there is only one action.
         */
        res = action->handler(irq, action->dev_id);
        trace_irq_handler_exit(irq, action, res);

        if (chip->irq_eoi)
                chip->irq_eoi(&desc->irq_data);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);

/**
 *        handle_edge_irq - edge type IRQ handler
 *        @desc:        the interrupt description structure for this irq
 *
 *        Interrupt occurs on the falling and/or rising edge of a hardware
 *        signal. The occurrence is latched into the irq controller hardware
 *        and must be acked in order to be reenabled. After the ack another
 *        interrupt can happen on the same source even before the first one
 *        is handled by the associated event handler. If this happens it
 *        might be necessary to disable (mask) the interrupt depending on the
 *        controller hardware. This requires to reenable the interrupt inside
 *        of the loop which handles the interrupts which have arrived while
 *        the handler was running. If all pending interrupts are handled, the
 *        loop is left.
 */
void handle_edge_irq(struct irq_desc *desc)
{
        raw_spin_lock(&desc->lock);

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        if (!irq_may_run(desc)) {
                desc->istate |= IRQS_PENDING;
                mask_ack_irq(desc);
                goto out_unlock;
        }

        /*
         * If its disabled or no action available then mask it and get
         * out of here.
         */
        if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
                desc->istate |= IRQS_PENDING;
                mask_ack_irq(desc);
                goto out_unlock;
        }

        kstat_incr_irqs_this_cpu(desc);

        /* Start handling the irq */
        desc->irq_data.chip->irq_ack(&desc->irq_data);

        do {
                if (unlikely(!desc->action)) {
                        mask_irq(desc);
                        goto out_unlock;
                }

                /*
                 * When another irq arrived while we were handling
                 * one, we could have masked the irq.
                 * Reenable it, if it was not disabled in meantime.
                 */
                if (unlikely(desc->istate & IRQS_PENDING)) {
                        if (!irqd_irq_disabled(&desc->irq_data) &&
                            irqd_irq_masked(&desc->irq_data))
                                unmask_irq(desc);
                }

                handle_irq_event(desc);

        } while ((desc->istate & IRQS_PENDING) &&
                 !irqd_irq_disabled(&desc->irq_data));

out_unlock:
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL(handle_edge_irq);

#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
/**
 *        handle_edge_eoi_irq - edge eoi type IRQ handler
 *        @desc:        the interrupt description structure for this irq
 *
 * Similar as the above handle_edge_irq, but using eoi and w/o the
 * mask/unmask logic.
 */
void handle_edge_eoi_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = irq_desc_get_chip(desc);

        raw_spin_lock(&desc->lock);

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        if (!irq_may_run(desc)) {
                desc->istate |= IRQS_PENDING;
                goto out_eoi;
        }

        /*
         * If its disabled or no action available then mask it and get
         * out of here.
         */
        if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
                desc->istate |= IRQS_PENDING;
                goto out_eoi;
        }

        kstat_incr_irqs_this_cpu(desc);

        do {
                if (unlikely(!desc->action))
                        goto out_eoi;

                handle_irq_event(desc);

        } while ((desc->istate & IRQS_PENDING) &&
                 !irqd_irq_disabled(&desc->irq_data));

out_eoi:
        chip->irq_eoi(&desc->irq_data);
        raw_spin_unlock(&desc->lock);
}
#endif

/**
 *        handle_percpu_irq - Per CPU local irq handler
 *        @desc:        the interrupt description structure for this irq
 *
 *        Per CPU interrupts on SMP machines without locking requirements
 */
void handle_percpu_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = irq_desc_get_chip(desc);

        /*
         * PER CPU interrupts are not serialized. Do not touch
         * desc->tot_count.
         */
        __kstat_incr_irqs_this_cpu(desc);

        if (chip->irq_ack)
                chip->irq_ack(&desc->irq_data);

        handle_irq_event_percpu(desc);

        if (chip->irq_eoi)
                chip->irq_eoi(&desc->irq_data);
}

/**
 * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
 * @desc:        the interrupt description structure for this irq
 *
 * Per CPU interrupts on SMP machines without locking requirements. Same as
 * handle_percpu_irq() above but with the following extras:
 *
 * action->percpu_dev_id is a pointer to percpu variables which
 * contain the real device id for the cpu on which this handler is
 * called
 */
void handle_percpu_devid_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = irq_desc_get_chip(desc);
        struct irqaction *action = desc->action;
        unsigned int irq = irq_desc_get_irq(desc);
        irqreturn_t res;

        /*
         * PER CPU interrupts are not serialized. Do not touch
         * desc->tot_count.
         */
        __kstat_incr_irqs_this_cpu(desc);

        if (chip->irq_ack)
                chip->irq_ack(&desc->irq_data);

        if (likely(action)) {
                trace_irq_handler_entry(irq, action);
                res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
                trace_irq_handler_exit(irq, action, res);
        } else {
                unsigned int cpu = smp_processor_id();
                bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);

                if (enabled)
                        irq_percpu_disable(desc, cpu);

                pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
                            enabled ? " and unmasked" : "", irq, cpu);
        }

        if (chip->irq_eoi)
                chip->irq_eoi(&desc->irq_data);
}

/**
 * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu
 *                                     dev ids
 * @desc:        the interrupt description structure for this irq
 *
 * Similar to handle_fasteoi_nmi, but handling the dev_id cookie
 * as a percpu pointer.
 */
void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
{
        struct irq_chip *chip = irq_desc_get_chip(desc);
        struct irqaction *action = desc->action;
        unsigned int irq = irq_desc_get_irq(desc);
        irqreturn_t res;

        __kstat_incr_irqs_this_cpu(desc);

        trace_irq_handler_entry(irq, action);
        res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
        trace_irq_handler_exit(irq, action, res);

        if (chip->irq_eoi)
                chip->irq_eoi(&desc->irq_data);
}

static void
__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
                     int is_chained, const char *name)
{
        if (!handle) {
                handle = handle_bad_irq;
        } else {
                struct irq_data *irq_data = &desc->irq_data;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
                /*
                 * With hierarchical domains we might run into a
                 * situation where the outermost chip is not yet set
                 * up, but the inner chips are there.  Instead of
                 * bailing we install the handler, but obviously we
                 * cannot enable/startup the interrupt at this point.
                 */
                while (irq_data) {
                        if (irq_data->chip != &no_irq_chip)
                                break;
                        /*
                         * Bail out if the outer chip is not set up
                         * and the interrupt supposed to be started
                         * right away.
                         */
                        if (WARN_ON(is_chained))
                                return;
                        /* Try the parent */
                        irq_data = irq_data->parent_data;
                }
#endif
                if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip))
                        return;
        }

        /* Uninstall? */
        if (handle == handle_bad_irq) {
                if (desc->irq_data.chip != &no_irq_chip)
                        mask_ack_irq(desc);
                irq_state_set_disabled(desc);
                if (is_chained) {
                        desc->action = NULL;
                        WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc)));
                }
                desc->depth = 1;
        }
        desc->handle_irq = handle;
        desc->name = name;

        if (handle != handle_bad_irq && is_chained) {
                unsigned int type = irqd_get_trigger_type(&desc->irq_data);

                /*
                 * We're about to start this interrupt immediately,
                 * hence the need to set the trigger configuration.
                 * But the .set_type callback may have overridden the
                 * flow handler, ignoring that we're dealing with a
                 * chained interrupt. Reset it immediately because we
                 * do know better.
                 */
                if (type != IRQ_TYPE_NONE) {
                        __irq_set_trigger(desc, type);
                        desc->handle_irq = handle;
                }

                irq_settings_set_noprobe(desc);
                irq_settings_set_norequest(desc);
                irq_settings_set_nothread(desc);
                desc->action = &chained_action;
                WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
                irq_activate_and_startup(desc, IRQ_RESEND);
        }
}

void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                  const char *name)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);

        if (!desc)
                return;

        __irq_do_set_handler(desc, handle, is_chained, name);
        irq_put_desc_busunlock(desc, flags);
}
EXPORT_SYMBOL_GPL(__irq_set_handler);

void
irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
                                 void *data)
{
        unsigned long flags;
        struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);

        if (!desc)
                return;

        desc->irq_common_data.handler_data = data;
        __irq_do_set_handler(desc, handle, 1, NULL);

        irq_put_desc_busunlock(desc, flags);
}
EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);

void
irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
                              irq_flow_handler_t handle, const char *name)
{
        irq_set_chip(irq, chip);
        __irq_set_handler(irq, handle, 0, name);
}
EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);

void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
        unsigned long flags, trigger, tmp;
        struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);

        if (!desc)
                return;

        /*
         * Warn when a driver sets the no autoenable flag on an already
         * active interrupt.
         */
        WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));

        irq_settings_clr_and_set(desc, clr, set);

        trigger = irqd_get_trigger_type(&desc->irq_data);

        irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
                   IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
        if (irq_settings_has_no_balance_set(desc))
                irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
        if (irq_settings_is_per_cpu(desc))
                irqd_set(&desc->irq_data, IRQD_PER_CPU);
        if (irq_settings_can_move_pcntxt(desc))
                irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
        if (irq_settings_is_level(desc))
                irqd_set(&desc->irq_data, IRQD_LEVEL);

        tmp = irq_settings_get_trigger_mask(desc);
        if (tmp != IRQ_TYPE_NONE)
                trigger = tmp;

        irqd_set(&desc->irq_data, trigger);

        irq_put_desc_unlock(desc, flags);
}
EXPORT_SYMBOL_GPL(irq_modify_status);

#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
/**
 *        irq_cpu_online - Invoke all irq_cpu_online functions.
 *
 *        Iterate through all irqs and invoke the chip.irq_cpu_online()
 *        for each.
 */
void irq_cpu_online(void)
{
        struct irq_desc *desc;
        struct irq_chip *chip;
        unsigned long flags;
        unsigned int irq;

        for_each_active_irq(irq) {
                desc = irq_to_desc(irq);
                if (!desc)
                        continue;

                raw_spin_lock_irqsave(&desc->lock, flags);

                chip = irq_data_get_irq_chip(&desc->irq_data);
                if (chip && chip->irq_cpu_online &&
                    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
                     !irqd_irq_disabled(&desc->irq_data)))
                        chip->irq_cpu_online(&desc->irq_data);

                raw_spin_unlock_irqrestore(&desc->lock, flags);
        }
}

/**
 *        irq_cpu_offline - Invoke all irq_cpu_offline functions.
 *
 *        Iterate through all irqs and invoke the chip.irq_cpu_offline()
 *        for each.
 */
void irq_cpu_offline(void)
{
        struct irq_desc *desc;
        struct irq_chip *chip;
        unsigned long flags;
        unsigned int irq;

        for_each_active_irq(irq) {
                desc = irq_to_desc(irq);
                if (!desc)
                        continue;

                raw_spin_lock_irqsave(&desc->lock, flags);

                chip = irq_data_get_irq_chip(&desc->irq_data);
                if (chip && chip->irq_cpu_offline &&
                    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
                     !irqd_irq_disabled(&desc->irq_data)))
                        chip->irq_cpu_offline(&desc->irq_data);

                raw_spin_unlock_irqrestore(&desc->lock, flags);
        }
}
#endif

#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY

#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS
/**
 *        handle_fasteoi_ack_irq - irq handler for edge hierarchy
 *        stacked on transparent controllers
 *
 *        @desc:        the interrupt description structure for this irq
 *
 *        Like handle_fasteoi_irq(), but for use with hierarchy where
 *        the irq_chip also needs to have its ->irq_ack() function
 *        called.
 */
void handle_fasteoi_ack_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = desc->irq_data.chip;

        raw_spin_lock(&desc->lock);

        if (!irq_may_run(desc))
                goto out;

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        /*
         * If its disabled or no action available
         * then mask it and get out of here:
         */
        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                mask_irq(desc);
                goto out;
        }

        kstat_incr_irqs_this_cpu(desc);
        if (desc->istate & IRQS_ONESHOT)
                mask_irq(desc);

        /* Start handling the irq */
        desc->irq_data.chip->irq_ack(&desc->irq_data);

        handle_irq_event(desc);

        cond_unmask_eoi_irq(desc, chip);

        raw_spin_unlock(&desc->lock);
        return;
out:
        if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
                chip->irq_eoi(&desc->irq_data);
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq);

/**
 *        handle_fasteoi_mask_irq - irq handler for level hierarchy
 *        stacked on transparent controllers
 *
 *        @desc:        the interrupt description structure for this irq
 *
 *        Like handle_fasteoi_irq(), but for use with hierarchy where
 *        the irq_chip also needs to have its ->irq_mask_ack() function
 *        called.
 */
void handle_fasteoi_mask_irq(struct irq_desc *desc)
{
        struct irq_chip *chip = desc->irq_data.chip;

        raw_spin_lock(&desc->lock);
        mask_ack_irq(desc);

        if (!irq_may_run(desc))
                goto out;

        desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

        /*
         * If its disabled or no action available
         * then mask it and get out of here:
         */
        if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
                mask_irq(desc);
                goto out;
        }

        kstat_incr_irqs_this_cpu(desc);
        if (desc->istate & IRQS_ONESHOT)
                mask_irq(desc);

        handle_irq_event(desc);

        cond_unmask_eoi_irq(desc, chip);

        raw_spin_unlock(&desc->lock);
        return;
out:
        if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
                chip->irq_eoi(&desc->irq_data);
        raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);

#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */

/**
 * irq_chip_set_parent_state - set the state of a parent interrupt.
 *
 * @data: Pointer to interrupt specific data
 * @which: State to be restored (one of IRQCHIP_STATE_*)
 * @val: Value corresponding to @which
 *
 * Conditional success, if the underlying irqchip does not implement it.
 */
int irq_chip_set_parent_state(struct irq_data *data,
                              enum irqchip_irq_state which,
                              bool val)
{
        data = data->parent_data;

        if (!data || !data->chip->irq_set_irqchip_state)
                return 0;

        return data->chip->irq_set_irqchip_state(data, which, val);
}
EXPORT_SYMBOL_GPL(irq_chip_set_parent_state);

/**
 * irq_chip_get_parent_state - get the state of a parent interrupt.
 *
 * @data: Pointer to interrupt specific data
 * @which: one of IRQCHIP_STATE_* the caller wants to know
 * @state: a pointer to a boolean where the state is to be stored
 *
 * Conditional success, if the underlying irqchip does not implement it.
 */
int irq_chip_get_parent_state(struct irq_data *data,
                              enum irqchip_irq_state which,
                              bool *state)
{
        data = data->parent_data;

        if (!data || !data->chip->irq_get_irqchip_state)
                return 0;

        return data->chip->irq_get_irqchip_state(data, which, state);
}
EXPORT_SYMBOL_GPL(irq_chip_get_parent_state);

/**
 * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if
 * NULL)
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_enable_parent(struct irq_data *data)
{
        data = data->parent_data;
        if (data->chip->irq_enable)
                data->chip->irq_enable(data);
        else
                data->chip->irq_unmask(data);
}
EXPORT_SYMBOL_GPL(irq_chip_enable_parent);

/**
 * irq_chip_disable_parent - Disable the parent interrupt (defaults to mask if
 * NULL)
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_disable_parent(struct irq_data *data)
{
        data = data->parent_data;
        if (data->chip->irq_disable)
                data->chip->irq_disable(data);
        else
                data->chip->irq_mask(data);
}
EXPORT_SYMBOL_GPL(irq_chip_disable_parent);

/**
 * irq_chip_ack_parent - Acknowledge the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_ack_parent(struct irq_data *data)
{
        data = data->parent_data;
        data->chip->irq_ack(data);
}
EXPORT_SYMBOL_GPL(irq_chip_ack_parent);

/**
 * irq_chip_mask_parent - Mask the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_mask_parent(struct irq_data *data)
{
        data = data->parent_data;
        data->chip->irq_mask(data);
}
EXPORT_SYMBOL_GPL(irq_chip_mask_parent);

/**
 * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_mask_ack_parent(struct irq_data *data)
{
        data = data->parent_data;
        data->chip->irq_mask_ack(data);
}
EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent);

/**
 * irq_chip_unmask_parent - Unmask the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_unmask_parent(struct irq_data *data)
{
        data = data->parent_data;
        data->chip->irq_unmask(data);
}
EXPORT_SYMBOL_GPL(irq_chip_unmask_parent);

/**
 * irq_chip_eoi_parent - Invoke EOI on the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_eoi_parent(struct irq_data *data)
{
        data = data->parent_data;
        data->chip->irq_eoi(data);
}
EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);

/**
 * irq_chip_set_affinity_parent - Set affinity on the parent interrupt
 * @data:        Pointer to interrupt specific data
 * @dest:        The affinity mask to set
 * @force:        Flag to enforce setting (disable online checks)
 *
 * Conditional, as the underlying parent chip might not implement it.
 */
int irq_chip_set_affinity_parent(struct irq_data *data,
                                 const struct cpumask *dest, bool force)
{
        data = data->parent_data;
        if (data->chip->irq_set_affinity)
                return data->chip->irq_set_affinity(data, dest, force);

        return -ENOSYS;
}
EXPORT_SYMBOL_GPL(irq_chip_set_affinity_parent);

/**
 * irq_chip_set_type_parent - Set IRQ type on the parent interrupt
 * @data:        Pointer to interrupt specific data
 * @type:        IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
 *
 * Conditional, as the underlying parent chip might not implement it.
 */
int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
{
        data = data->parent_data;

        if (data->chip->irq_set_type)
                return data->chip->irq_set_type(data, type);

        return -ENOSYS;
}
EXPORT_SYMBOL_GPL(irq_chip_set_type_parent);

/**
 * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
 * @data:        Pointer to interrupt specific data
 *
 * Iterate through the domain hierarchy of the interrupt and check
 * whether a hw retrigger function exists. If yes, invoke it.
 */
int irq_chip_retrigger_hierarchy(struct irq_data *data)
{
        for (data = data->parent_data; data; data = data->parent_data)
                if (data->chip && data->chip->irq_retrigger)
                        return data->chip->irq_retrigger(data);

        return 0;
}
EXPORT_SYMBOL_GPL(irq_chip_retrigger_hierarchy);

/**
 * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt
 * @data:        Pointer to interrupt specific data
 * @vcpu_info:        The vcpu affinity information
 */
int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
{
        data = data->parent_data;
        if (data->chip->irq_set_vcpu_affinity)
                return data->chip->irq_set_vcpu_affinity(data, vcpu_info);

        return -ENOSYS;
}
EXPORT_SYMBOL_GPL(irq_chip_set_vcpu_affinity_parent);
/**
 * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt
 * @data:        Pointer to interrupt specific data
 * @on:                Whether to set or reset the wake-up capability of this irq
 *
 * Conditional, as the underlying parent chip might not implement it.
 */
int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
{
        data = data->parent_data;

        if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
                return 0;

        if (data->chip->irq_set_wake)
                return data->chip->irq_set_wake(data, on);

        return -ENOSYS;
}
EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent);

/**
 * irq_chip_request_resources_parent - Request resources on the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
int irq_chip_request_resources_parent(struct irq_data *data)
{
        data = data->parent_data;

        if (data->chip->irq_request_resources)
                return data->chip->irq_request_resources(data);

        /* no error on missing optional irq_chip::irq_request_resources */
        return 0;
}
EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);

/**
 * irq_chip_release_resources_parent - Release resources on the parent interrupt
 * @data:        Pointer to interrupt specific data
 */
void irq_chip_release_resources_parent(struct irq_data *data)
{
        data = data->parent_data;
        if (data->chip->irq_release_resources)
                data->chip->irq_release_resources(data);
}
EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
#endif

/**
 * irq_chip_compose_msi_msg - Compose msi message for a irq chip
 * @data:        Pointer to interrupt specific data
 * @msg:        Pointer to the MSI message
 *
 * For hierarchical domains we find the first chip in the hierarchy
 * which implements the irq_compose_msi_msg callback. For non
 * hierarchical we use the top level chip.
 */
int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
        struct irq_data *pos;

        for (pos = NULL; !pos && data; data = irqd_get_parent_data(data)) {
                if (data->chip && data->chip->irq_compose_msi_msg)
                        pos = data;
        }

        if (!pos)
                return -ENOSYS;

        pos->chip->irq_compose_msi_msg(pos, msg);
        return 0;
}

static struct device *irq_get_pm_device(struct irq_data *data)
{
        if (data->domain)
                return data->domain->pm_dev;

        return NULL;
}

/**
 * irq_chip_pm_get - Enable power for an IRQ chip
 * @data:        Pointer to interrupt specific data
 *
 * Enable the power to the IRQ chip referenced by the interrupt data
 * structure.
 */
int irq_chip_pm_get(struct irq_data *data)
{
        struct device *dev = irq_get_pm_device(data);
        int retval = 0;

        if (IS_ENABLED(CONFIG_PM) && dev)
                retval = pm_runtime_resume_and_get(dev);

        return retval;
}

/**
 * irq_chip_pm_put - Disable power for an IRQ chip
 * @data:        Pointer to interrupt specific data
 *
 * Disable the power to the IRQ chip referenced by the interrupt data
 * structure, belongs. Note that power will only be disabled, once this
 * function has been called for all IRQs that have called irq_chip_pm_get().
 */
int irq_chip_pm_put(struct irq_data *data)
{
        struct device *dev = irq_get_pm_device(data);
        int retval = 0;

        if (IS_ENABLED(CONFIG_PM) && dev)
                retval = pm_runtime_put(dev);

        return (retval < 0) ? retval : 0;
}













































































































































































































    3 































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_IVERSION_H
#define _LINUX_IVERSION_H

#include <linux/fs.h>

/*
 * The inode->i_version field:
 * ---------------------------
 * The change attribute (i_version) is mandated by NFSv4 and is mostly for
 * knfsd, but is also used for other purposes (e.g. IMA). The i_version must
 * appear larger to observers if there was an explicit change to the inode's
 * data or metadata since it was last queried.
 *
 * An explicit change is one that would ordinarily result in a change to the
 * inode status change time (aka ctime). i_version must appear to change, even
 * if the ctime does not (since the whole point is to avoid missing updates due
 * to timestamp granularity). If POSIX or other relevant spec mandates that the
 * ctime must change due to an operation, then the i_version counter must be
 * incremented as well.
 *
 * Making the i_version update completely atomic with the operation itself would
 * be prohibitively expensive. Traditionally the kernel has updated the times on
 * directories after an operation that changes its contents. For regular files,
 * the ctime is usually updated before the data is copied into the cache for a
 * write. This means that there is a window of time when an observer can
 * associate a new timestamp with old file contents. Since the purpose of the
 * i_version is to allow for better cache coherency, the i_version must always
 * be updated after the results of the operation are visible. Updating it before
 * and after a change is also permitted. (Note that no filesystems currently do
 * this. Fixing that is a work-in-progress).
 *
 * Observers see the i_version as a 64-bit number that never decreases. If it
 * remains the same since it was last checked, then nothing has changed in the
 * inode. If it's different then something has changed. Observers cannot infer
 * anything about the nature or magnitude of the changes from the value, only
 * that the inode has changed in some fashion.
 *
 * Not all filesystems properly implement the i_version counter. Subsystems that
 * want to use i_version field on an inode should first check whether the
 * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro).
 *
 * Those that set SB_I_VERSION will automatically have their i_version counter
 * incremented on writes to normal files. If the SB_I_VERSION is not set, then
 * the VFS will not touch it on writes, and the filesystem can use it how it
 * wishes. Note that the filesystem is always responsible for updating the
 * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.).
 * We consider these sorts of filesystems to have a kernel-managed i_version.
 *
 * It may be impractical for filesystems to keep i_version updates atomic with
 * respect to the changes that cause them.  They should, however, guarantee
 * that i_version updates are never visible before the changes that caused
 * them.  Also, i_version updates should never be delayed longer than it takes
 * the original change to reach disk.
 *
 * This implementation uses the low bit in the i_version field as a flag to
 * track when the value has been queried. If it has not been queried since it
 * was last incremented, we can skip the increment in most cases.
 *
 * In the event that we're updating the ctime, we will usually go ahead and
 * bump the i_version anyway. Since that has to go to stable storage in some
 * fashion, we might as well increment it as well.
 *
 * With this implementation, the value should always appear to observers to
 * increase over time if the file has changed. It's recommended to use
 * inode_eq_iversion() helper to compare values.
 *
 * Note that some filesystems (e.g. NFS and AFS) just use the field to store
 * a server-provided value (for the most part). For that reason, those
 * filesystems do not set SB_I_VERSION. These filesystems are considered to
 * have a self-managed i_version.
 *
 * Persistently storing the i_version
 * ----------------------------------
 * Queries of the i_version field are not gated on them hitting the backing
 * store. It's always possible that the host could crash after allowing
 * a query of the value but before it has made it to disk.
 *
 * To mitigate this problem, filesystems should always use
 * inode_set_iversion_queried when loading an existing inode from disk. This
 * ensures that the next attempted inode increment will result in the value
 * changing.
 *
 * Storing the value to disk therefore does not count as a query, so those
 * filesystems should use inode_peek_iversion to grab the value to be stored.
 * There is no need to flag the value as having been queried in that case.
 */

/*
 * We borrow the lowest bit in the i_version to use as a flag to tell whether
 * it has been queried since we last incremented it. If it has, then we must
 * increment it on the next change. After that, we can clear the flag and
 * avoid incrementing it again until it has again been queried.
 */
#define I_VERSION_QUERIED_SHIFT        (1)
#define I_VERSION_QUERIED        (1ULL << (I_VERSION_QUERIED_SHIFT - 1))
#define I_VERSION_INCREMENT        (1ULL << I_VERSION_QUERIED_SHIFT)

/**
 * inode_set_iversion_raw - set i_version to the specified raw value
 * @inode: inode to set
 * @val: new i_version value to set
 *
 * Set @inode's i_version field to @val. This function is for use by
 * filesystems that self-manage the i_version.
 *
 * For example, the NFS client stores its NFSv4 change attribute in this way,
 * and the AFS client stores the data_version from the server here.
 */
static inline void
inode_set_iversion_raw(struct inode *inode, u64 val)
{
        atomic64_set(&inode->i_version, val);
}

/**
 * inode_peek_iversion_raw - grab a "raw" iversion value
 * @inode: inode from which i_version should be read
 *
 * Grab a "raw" inode->i_version value and return it. The i_version is not
 * flagged or converted in any way. This is mostly used to access a self-managed
 * i_version.
 *
 * With those filesystems, we want to treat the i_version as an entirely
 * opaque value.
 */
static inline u64
inode_peek_iversion_raw(const struct inode *inode)
{
        return atomic64_read(&inode->i_version);
}

/**
 * inode_set_max_iversion_raw - update i_version new value is larger
 * @inode: inode to set
 * @val: new i_version to set
 *
 * Some self-managed filesystems (e.g Ceph) will only update the i_version
 * value if the new value is larger than the one we already have.
 */
static inline void
inode_set_max_iversion_raw(struct inode *inode, u64 val)
{
        u64 cur = inode_peek_iversion_raw(inode);

        do {
                if (cur > val)
                        break;
        } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val));
}

/**
 * inode_set_iversion - set i_version to a particular value
 * @inode: inode to set
 * @val: new i_version value to set
 *
 * Set @inode's i_version field to @val. This function is for filesystems with
 * a kernel-managed i_version, for initializing a newly-created inode from
 * scratch.
 *
 * In this case, we do not set the QUERIED flag since we know that this value
 * has never been queried.
 */
static inline void
inode_set_iversion(struct inode *inode, u64 val)
{
        inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT);
}

/**
 * inode_set_iversion_queried - set i_version to a particular value as quereied
 * @inode: inode to set
 * @val: new i_version value to set
 *
 * Set @inode's i_version field to @val, and flag it for increment on the next
 * change.
 *
 * Filesystems that persistently store the i_version on disk should use this
 * when loading an existing inode from disk.
 *
 * When loading in an i_version value from a backing store, we can't be certain
 * that it wasn't previously viewed before being stored. Thus, we must assume
 * that it was, to ensure that we don't end up handing out the same value for
 * different versions of the same inode.
 */
static inline void
inode_set_iversion_queried(struct inode *inode, u64 val)
{
        inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) |
                                I_VERSION_QUERIED);
}

bool inode_maybe_inc_iversion(struct inode *inode, bool force);

/**
 * inode_inc_iversion - forcibly increment i_version
 * @inode: inode that needs to be updated
 *
 * Forcbily increment the i_version field. This always results in a change to
 * the observable value.
 */
static inline void
inode_inc_iversion(struct inode *inode)
{
        inode_maybe_inc_iversion(inode, true);
}

/**
 * inode_iversion_need_inc - is the i_version in need of being incremented?
 * @inode: inode to check
 *
 * Returns whether the inode->i_version counter needs incrementing on the next
 * change. Just fetch the value and check the QUERIED flag.
 */
static inline bool
inode_iversion_need_inc(struct inode *inode)
{
        return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED;
}

/**
 * inode_inc_iversion_raw - forcibly increment raw i_version
 * @inode: inode that needs to be updated
 *
 * Forcbily increment the raw i_version field. This always results in a change
 * to the raw value.
 *
 * NFS will use the i_version field to store the value from the server. It
 * mostly treats it as opaque, but in the case where it holds a write
 * delegation, it must increment the value itself. This function does that.
 */
static inline void
inode_inc_iversion_raw(struct inode *inode)
{
        atomic64_inc(&inode->i_version);
}

/**
 * inode_peek_iversion - read i_version without flagging it to be incremented
 * @inode: inode from which i_version should be read
 *
 * Read the inode i_version counter for an inode without registering it as a
 * query.
 *
 * This is typically used by local filesystems that need to store an i_version
 * on disk. In that situation, it's not necessary to flag it as having been
 * viewed, as the result won't be used to gauge changes from that point.
 */
static inline u64
inode_peek_iversion(const struct inode *inode)
{
        return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT;
}

/*
 * For filesystems without any sort of change attribute, the best we can
 * do is fake one up from the ctime:
 */
static inline u64 time_to_chattr(const struct timespec64 *t)
{
        u64 chattr = t->tv_sec;

        chattr <<= 32;
        chattr += t->tv_nsec;
        return chattr;
}

u64 inode_query_iversion(struct inode *inode);

/**
 * inode_eq_iversion_raw - check whether the raw i_version counter has changed
 * @inode: inode to check
 * @old: old value to check against its i_version
 *
 * Compare the current raw i_version counter with a previous one. Returns true
 * if they are the same or false if they are different.
 */
static inline bool
inode_eq_iversion_raw(const struct inode *inode, u64 old)
{
        return inode_peek_iversion_raw(inode) == old;
}

/**
 * inode_eq_iversion - check whether the i_version counter has changed
 * @inode: inode to check
 * @old: old value to check against its i_version
 *
 * Compare an i_version counter with a previous one. Returns true if they are
 * the same, and false if they are different.
 *
 * Note that we don't need to set the QUERIED flag in this case, as the value
 * in the inode is not being recorded for later use.
 */
static inline bool
inode_eq_iversion(const struct inode *inode, u64 old)
{
        return inode_peek_iversion(inode) == old;
}
#endif























































































  392 





  390 





































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_PAGE_EXT_H
#define __LINUX_PAGE_EXT_H

#include <linux/types.h>
#include <linux/stacktrace.h>
#include <linux/stackdepot.h>

struct pglist_data;

#ifdef CONFIG_PAGE_EXTENSION
/**
 * struct page_ext_operations - per page_ext client operations
 * @offset: Offset to the client's data within page_ext. Offset is returned to
 *          the client by page_ext_init.
 * @size: The size of the client data within page_ext.
 * @need: Function that returns true if client requires page_ext.
 * @init: (optional) Called to initialize client once page_exts are allocated.
 * @need_shared_flags: True when client is using shared page_ext->flags
 *                     field.
 *
 * Each Page Extension client must define page_ext_operations in
 * page_ext_ops array.
 */
struct page_ext_operations {
        size_t offset;
        size_t size;
        bool (*need)(void);
        void (*init)(void);
        bool need_shared_flags;
};

/*
 * The page_ext_flags users must set need_shared_flags to true.
 */
enum page_ext_flags {
        PAGE_EXT_OWNER,
        PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
        PAGE_EXT_YOUNG,
        PAGE_EXT_IDLE,
#endif
};

/*
 * Page Extension can be considered as an extended mem_map.
 * A page_ext page is associated with every page descriptor. The
 * page_ext helps us add more information about the page.
 * All page_ext are allocated at boot or memory hotplug event,
 * then the page_ext for pfn always exists.
 */
struct page_ext {
        unsigned long flags;
};

extern bool early_page_ext;
extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);

static inline bool early_page_ext_enabled(void)
{
        return early_page_ext;
}

#ifdef CONFIG_SPARSEMEM
static inline void page_ext_init_flatmem(void)
{
}
extern void page_ext_init(void);
static inline void page_ext_init_flatmem_late(void)
{
}
#else
extern void page_ext_init_flatmem(void);
extern void page_ext_init_flatmem_late(void);
static inline void page_ext_init(void)
{
}
#endif

extern struct page_ext *page_ext_get(struct page *page);
extern void page_ext_put(struct page_ext *page_ext);

static inline void *page_ext_data(struct page_ext *page_ext,
                                  struct page_ext_operations *ops)
{
        return (void *)(page_ext) + ops->offset;
}

static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
        void *next = curr;
        next += page_ext_size;
        return next;
}

#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;

static inline bool early_page_ext_enabled(void)
{
        return false;
}

static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
{
}

static inline void page_ext_init(void)
{
}

static inline void page_ext_init_flatmem_late(void)
{
}

static inline void page_ext_init_flatmem(void)
{
}

static inline struct page_ext *page_ext_get(struct page *page)
{
        return NULL;
}

static inline void page_ext_put(struct page_ext *page_ext)
{
}
#endif /* CONFIG_PAGE_EXTENSION */
#endif /* __LINUX_PAGE_EXT_H */


























    4 








































































































































    9 


































































































































































    6 


























































    3 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Definitions for diskquota-operations. When diskquota is configured these
 * macros expand to the right source-code.
 *
 * Author:  Marco van Wieringen <mvw@planets.elm.net>
 */
#ifndef _LINUX_QUOTAOPS_
#define _LINUX_QUOTAOPS_

#include <linux/fs.h>

#define DQUOT_SPACE_WARN        0x1
#define DQUOT_SPACE_RESERVE        0x2
#define DQUOT_SPACE_NOFAIL        0x4

static inline struct quota_info *sb_dqopt(struct super_block *sb)
{
        return &sb->s_dquot;
}

/* i_mutex must being held */
static inline bool is_quota_modification(struct mnt_idmap *idmap,
                                         struct inode *inode, struct iattr *ia)
{
        return ((ia->ia_valid & ATTR_SIZE) ||
                i_uid_needs_update(idmap, ia, inode) ||
                i_gid_needs_update(idmap, ia, inode));
}

#if defined(CONFIG_QUOTA)

#define quota_error(sb, fmt, args...) \
        __quota_error((sb), __func__, fmt , ## args)

extern __printf(3, 4)
void __quota_error(struct super_block *sb, const char *func,
                   const char *fmt, ...);

/*
 * declaration of quota_function calls in kernel.
 */
int dquot_initialize(struct inode *inode);
bool dquot_initialize_needed(struct inode *inode);
void dquot_drop(struct inode *inode);
struct dquot *dqget(struct super_block *sb, struct kqid qid);
static inline struct dquot *dqgrab(struct dquot *dquot)
{
        /* Make sure someone else has active reference to dquot */
        WARN_ON_ONCE(!atomic_read(&dquot->dq_count));
        WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
        atomic_inc(&dquot->dq_count);
        return dquot;
}

static inline bool dquot_is_busy(struct dquot *dquot)
{
        if (test_bit(DQ_MOD_B, &dquot->dq_flags))
                return true;
        if (atomic_read(&dquot->dq_count) > 0)
                return true;
        return false;
}

void dqput(struct dquot *dquot);
int dquot_scan_active(struct super_block *sb,
                      int (*fn)(struct dquot *dquot, unsigned long priv),
                      unsigned long priv);
struct dquot *dquot_alloc(struct super_block *sb, int type);
void dquot_destroy(struct dquot *dquot);

int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
void __dquot_free_space(struct inode *inode, qsize_t number, int flags);

int dquot_alloc_inode(struct inode *inode);

void dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
void dquot_free_inode(struct inode *inode);
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number);

int dquot_disable(struct super_block *sb, int type, unsigned int flags);
/* Suspend quotas on remount RO */
static inline int dquot_suspend(struct super_block *sb, int type)
{
        return dquot_disable(sb, type, DQUOT_SUSPENDED);
}
int dquot_resume(struct super_block *sb, int type);

int dquot_commit(struct dquot *dquot);
int dquot_acquire(struct dquot *dquot);
int dquot_release(struct dquot *dquot);
int dquot_commit_info(struct super_block *sb, int type);
int dquot_get_next_id(struct super_block *sb, struct kqid *qid);
int dquot_mark_dquot_dirty(struct dquot *dquot);

int dquot_file_open(struct inode *inode, struct file *file);

int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
        unsigned int flags);
int dquot_load_quota_inode(struct inode *inode, int type, int format_id,
        unsigned int flags);
int dquot_quota_on(struct super_block *sb, int type, int format_id,
        const struct path *path);
int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
         int format_id, int type);
int dquot_quota_off(struct super_block *sb, int type);
int dquot_writeback_dquots(struct super_block *sb, int type);
int dquot_quota_sync(struct super_block *sb, int type);
int dquot_get_state(struct super_block *sb, struct qc_state *state);
int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii);
int dquot_get_dqblk(struct super_block *sb, struct kqid id,
                struct qc_dqblk *di);
int dquot_get_next_dqblk(struct super_block *sb, struct kqid *id,
                struct qc_dqblk *di);
int dquot_set_dqblk(struct super_block *sb, struct kqid id,
                struct qc_dqblk *di);

int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode,
                   struct iattr *iattr);

static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
{
        return sb_dqopt(sb)->info + type;
}

/*
 * Functions for checking status of quota
 */

static inline bool sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
        return sb_dqopt(sb)->flags &
                                dquot_state_flag(DQUOT_USAGE_ENABLED, type);
}

static inline bool sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
        return sb_dqopt(sb)->flags &
                                dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
}

static inline bool sb_has_quota_suspended(struct super_block *sb, int type)
{
        return sb_dqopt(sb)->flags &
                                dquot_state_flag(DQUOT_SUSPENDED, type);
}

static inline unsigned sb_any_quota_suspended(struct super_block *sb)
{
        return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_SUSPENDED);
}

/* Does kernel know about any quota information for given sb + type? */
static inline bool sb_has_quota_loaded(struct super_block *sb, int type)
{
        /* Currently if anything is on, then quota usage is on as well */
        return sb_has_quota_usage_enabled(sb, type);
}

static inline unsigned sb_any_quota_loaded(struct super_block *sb)
{
        return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_USAGE_ENABLED);
}

static inline bool sb_has_quota_active(struct super_block *sb, int type)
{
        return sb_has_quota_loaded(sb, type) &&
               !sb_has_quota_suspended(sb, type);
}

/*
 * Operations supported for diskquotas.
 */
extern const struct dquot_operations dquot_operations;
extern const struct quotactl_ops dquot_quotactl_sysfile_ops;

#else

static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
        return 0;
}

static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
        return 0;
}

static inline int sb_has_quota_suspended(struct super_block *sb, int type)
{
        return 0;
}

static inline int sb_any_quota_suspended(struct super_block *sb)
{
        return 0;
}

/* Does kernel know about any quota information for given sb + type? */
static inline int sb_has_quota_loaded(struct super_block *sb, int type)
{
        return 0;
}

static inline int sb_any_quota_loaded(struct super_block *sb)
{
        return 0;
}

static inline int sb_has_quota_active(struct super_block *sb, int type)
{
        return 0;
}

static inline int dquot_initialize(struct inode *inode)
{
        return 0;
}

static inline bool dquot_initialize_needed(struct inode *inode)
{
        return false;
}

static inline void dquot_drop(struct inode *inode)
{
}

static inline int dquot_alloc_inode(struct inode *inode)
{
        return 0;
}

static inline void dquot_free_inode(struct inode *inode)
{
}

static inline int dquot_transfer(struct mnt_idmap *idmap,
                                 struct inode *inode, struct iattr *iattr)
{
        return 0;
}

static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
                int flags)
{
        if (!(flags & DQUOT_SPACE_RESERVE))
                inode_add_bytes(inode, number);
        return 0;
}

static inline void __dquot_free_space(struct inode *inode, qsize_t number,
                int flags)
{
        if (!(flags & DQUOT_SPACE_RESERVE))
                inode_sub_bytes(inode, number);
}

static inline void dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
        inode_add_bytes(inode, number);
}

static inline int dquot_reclaim_space_nodirty(struct inode *inode,
                                              qsize_t number)
{
        inode_sub_bytes(inode, number);
        return 0;
}

static inline int dquot_disable(struct super_block *sb, int type,
                unsigned int flags)
{
        return 0;
}

static inline int dquot_suspend(struct super_block *sb, int type)
{
        return 0;
}

static inline int dquot_resume(struct super_block *sb, int type)
{
        return 0;
}

#define dquot_file_open                generic_file_open

static inline int dquot_writeback_dquots(struct super_block *sb, int type)
{
        return 0;
}

#endif /* CONFIG_QUOTA */

static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
        return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
}

static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
{
        __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
        mark_inode_dirty_sync(inode);
}

static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
{
        int ret;

        ret = dquot_alloc_space_nodirty(inode, nr);
        if (!ret) {
                /*
                 * Mark inode fully dirty. Since we are allocating blocks, inode
                 * would become fully dirty soon anyway and it reportedly
                 * reduces lock contention.
                 */
                mark_inode_dirty(inode);
        }
        return ret;
}

static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
{
        return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
}

static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
{
        dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
}

static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
{
        return dquot_alloc_space(inode, nr << inode->i_blkbits);
}

static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
{
        return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
}

static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
{
        int ret;

        ret = dquot_prealloc_block_nodirty(inode, nr);
        if (!ret)
                mark_inode_dirty_sync(inode);
        return ret;
}

static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
{
        return __dquot_alloc_space(inode, nr << inode->i_blkbits,
                                DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
}

static inline void dquot_claim_block(struct inode *inode, qsize_t nr)
{
        dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
        mark_inode_dirty_sync(inode);
}

static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr)
{
        dquot_reclaim_space_nodirty(inode, nr << inode->i_blkbits);
        mark_inode_dirty_sync(inode);
}

static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
{
        __dquot_free_space(inode, nr, 0);
}

static inline void dquot_free_space(struct inode *inode, qsize_t nr)
{
        dquot_free_space_nodirty(inode, nr);
        mark_inode_dirty_sync(inode);
}

static inline void dquot_free_block_nodirty(struct inode *inode, qsize_t nr)
{
        dquot_free_space_nodirty(inode, nr << inode->i_blkbits);
}

static inline void dquot_free_block(struct inode *inode, qsize_t nr)
{
        dquot_free_space(inode, nr << inode->i_blkbits);
}

static inline void dquot_release_reservation_block(struct inode *inode,
                qsize_t nr)
{
        __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
}

unsigned int qtype_enforce_flag(int type);

#endif /* _LINUX_QUOTAOPS_ */






















































































































    9 








    9 










































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_TASK_H
#define _LINUX_SCHED_TASK_H

/*
 * Interface between the scheduler and various task lifetime (fork()/exit())
 * functionality:
 */

#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/uaccess.h>

struct task_struct;
struct rusage;
union thread_union;
struct css_set;

/* All the bits taken by the old clone syscall. */
#define CLONE_LEGACY_FLAGS 0xffffffffULL

struct kernel_clone_args {
        u64 flags;
        int __user *pidfd;
        int __user *child_tid;
        int __user *parent_tid;
        const char *name;
        int exit_signal;
        u32 kthread:1;
        u32 io_thread:1;
        u32 user_worker:1;
        u32 no_files:1;
        unsigned long stack;
        unsigned long stack_size;
        unsigned long tls;
        pid_t *set_tid;
        /* Number of elements in *set_tid */
        size_t set_tid_size;
        int cgroup;
        int idle;
        int (*fn)(void *);
        void *fn_arg;
        struct cgroup *cgrp;
        struct css_set *cset;
};

/*
 * This serializes "schedule()" and also protects
 * the run-queue from deletions/modifications (but
 * _adding_ to the beginning of the run-queue has
 * a separate lock).
 */
extern rwlock_t tasklist_lock;
extern spinlock_t mmlist_lock;

extern union thread_union init_thread_union;
extern struct task_struct init_task;

extern int lockdep_tasklist_lock_is_held(void);

extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);

extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
extern void sched_post_fork(struct task_struct *p);
extern void sched_dead(struct task_struct *p);

void __noreturn do_task_dead(void);
void __noreturn make_task_dead(int signr);

extern void mm_cache_init(void);
extern void proc_caches_init(void);

extern void fork_init(void);

extern void release_task(struct task_struct * p);

extern int copy_thread(struct task_struct *, const struct kernel_clone_args *);

extern void flush_thread(void);

#ifdef CONFIG_HAVE_EXIT_THREAD
extern void exit_thread(struct task_struct *tsk);
#else
static inline void exit_thread(struct task_struct *tsk)
{
}
#endif
extern __noreturn void do_group_exit(int);

extern void exit_files(struct task_struct *);
extern void exit_itimers(struct task_struct *);

extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *copy_process(struct pid *pid, int trace, int node,
                                 struct kernel_clone_args *args);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
                            unsigned long flags);
extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
int kernel_wait(pid_t pid, int *stat);

extern void free_task(struct task_struct *tsk);

/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
extern void sched_exec(void);
#else
#define sched_exec()   {}
#endif

static inline struct task_struct *get_task_struct(struct task_struct *t)
{
        refcount_inc(&t->usage);
        return t;
}

extern void __put_task_struct(struct task_struct *t);
extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);

static inline void put_task_struct(struct task_struct *t)
{
        if (!refcount_dec_and_test(&t->usage))
                return;

        /*
         * In !RT, it is always safe to call __put_task_struct().
         * Under RT, we can only call it in preemptible context.
         */
        if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
                static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);

                lock_map_acquire_try(&put_task_map);
                __put_task_struct(t);
                lock_map_release(&put_task_map);
                return;
        }

        /*
         * under PREEMPT_RT, we can't call put_task_struct
         * in atomic context because it will indirectly
         * acquire sleeping locks.
         *
         * call_rcu() will schedule delayed_put_task_struct_rcu()
         * to be called in process context.
         *
         * __put_task_struct() is called when
         * refcount_dec_and_test(&t->usage) succeeds.
         *
         * This means that it can't "conflict" with
         * put_task_struct_rcu_user() which abuses ->rcu the same
         * way; rcu_users has a reference so task->usage can't be
         * zero after rcu_users 1 -> 0 transition.
         *
         * delayed_free_task() also uses ->rcu, but it is only called
         * when it fails to fork a process. Therefore, there is no
         * way it can conflict with put_task_struct().
         */
        call_rcu(&t->rcu, __put_task_struct_rcu_cb);
}

DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T))

static inline void put_task_struct_many(struct task_struct *t, int nr)
{
        if (refcount_sub_and_test(nr, &t->usage))
                __put_task_struct(t);
}

void put_task_struct_rcu_user(struct task_struct *task);

/* Free all architecture-specific resources held by a thread. */
void release_thread(struct task_struct *dead_task);

#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
extern int arch_task_struct_size __read_mostly;
#else
# define arch_task_struct_size (sizeof(struct task_struct))
#endif

#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST
/*
 * If an architecture has not declared a thread_struct whitelist we
 * must assume something there may need to be copied to userspace.
 */
static inline void arch_thread_struct_whitelist(unsigned long *offset,
                                                unsigned long *size)
{
        *offset = 0;
        /* Handle dynamically sized thread_struct. */
        *size = arch_task_struct_size - offsetof(struct task_struct, thread);
}
#endif

#ifdef CONFIG_VMAP_STACK
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
{
        return t->stack_vm_area;
}
#else
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
{
        return NULL;
}
#endif

/*
 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
 * subscriptions and synchronises with wait4().  Also used in procfs.  Also
 * pins the final release of task.io_context.  Also protects ->cpuset and
 * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
 *
 * Nests both inside and outside of read_lock(&tasklist_lock).
 * It must not be nested with write_lock_irq(&tasklist_lock),
 * neither inside nor outside.
 */
static inline void task_lock(struct task_struct *p)
{
        spin_lock(&p->alloc_lock);
}

static inline void task_unlock(struct task_struct *p)
{
        spin_unlock(&p->alloc_lock);
}

DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T))

#endif /* _LINUX_SCHED_TASK_H */








































































































































































































































































































































































































































































































































































































































































  242 





























































































































  242 







    1 


































































































































































































  242 






  242 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  242 









































































































































































































































































































































































































  242 







































































































































































  242 




























































    1 




    4 






































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Definitions for the AF_INET socket handler.
 *
 * Version:        @(#)sock.h        1.0.4        05/13/93
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Corey Minyard <wf-rch!minyard@relay.EU.net>
 *                Florian La Roche <flla@stud.uni-sb.de>
 *
 * Fixes:
 *                Alan Cox        :        Volatiles in skbuff pointers. See
 *                                        skbuff comments. May be overdone,
 *                                        better to prove they can be removed
 *                                        than the reverse.
 *                Alan Cox        :        Added a zapped field for tcp to note
 *                                        a socket is reset and must stay shut up
 *                Alan Cox        :        New fields for options
 *        Pauline Middelink        :        identd support
 *                Alan Cox        :        Eliminate low level recv/recvfrom
 *                David S. Miller        :        New socket lookup architecture.
 *              Steve Whitehouse:       Default routines for sock_ops
 *              Arnaldo C. Melo :        removed net_pinfo, tp_pinfo and made
 *                                      protinfo be just a void pointer, as the
 *                                      protocol specific parts were moved to
 *                                      respective headers and ipv4/v6, etc now
 *                                      use private slabcaches for its socks
 *              Pedro Hortas        :        New flags field for socket options
 */
#ifndef _SOCK_H
#define _SOCK_H

#include <linux/hardirq.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/list_nulls.h>
#include <linux/timer.h>
#include <linux/cache.h>
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>        /* struct sk_buff */
#include <linux/mm.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/page_counter.h>
#include <linux/memcontrol.h>
#include <linux/static_key.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/cgroup-defs.h>
#include <linux/rbtree.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
#include <linux/sockptr.h>
#include <linux/indirect_call_wrapper.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/llist.h>
#include <net/dst.h>
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/l3mdev.h>
#include <uapi/linux/socket.h>

/*
 * This structure really needs to be cleaned up.
 * Most of it is for TCP, and not used by any of
 * the other protocols.
 */

/* This is the per-socket lock.  The spinlock provides a synchronization
 * between user contexts and software interrupt processing, whereas the
 * mini-semaphore synchronizes multiple users amongst themselves.
 */
typedef struct {
        spinlock_t                slock;
        int                        owned;
        wait_queue_head_t        wq;
        /*
         * We express the mutex-alike socket_lock semantics
         * to the lock validator by explicitly managing
         * the slock as a lock variant (in addition to
         * the slock itself):
         */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lockdep_map dep_map;
#endif
} socket_lock_t;

struct sock;
struct proto;
struct net;

typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;

/**
 *        struct sock_common - minimal network layer representation of sockets
 *        @skc_daddr: Foreign IPv4 addr
 *        @skc_rcv_saddr: Bound local IPv4 addr
 *        @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr
 *        @skc_hash: hash value used with various protocol lookup tables
 *        @skc_u16hashes: two u16 hash values used by UDP lookup tables
 *        @skc_dport: placeholder for inet_dport/tw_dport
 *        @skc_num: placeholder for inet_num/tw_num
 *        @skc_portpair: __u32 union of @skc_dport & @skc_num
 *        @skc_family: network address family
 *        @skc_state: Connection state
 *        @skc_reuse: %SO_REUSEADDR setting
 *        @skc_reuseport: %SO_REUSEPORT setting
 *        @skc_ipv6only: socket is IPV6 only
 *        @skc_net_refcnt: socket is using net ref counting
 *        @skc_bound_dev_if: bound device index if != 0
 *        @skc_bind_node: bind hash linkage for various protocol lookup tables
 *        @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
 *        @skc_prot: protocol handlers inside a network family
 *        @skc_net: reference to the network namespace of this socket
 *        @skc_v6_daddr: IPV6 destination address
 *        @skc_v6_rcv_saddr: IPV6 source address
 *        @skc_cookie: socket's cookie value
 *        @skc_node: main hash linkage for various protocol lookup tables
 *        @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
 *        @skc_tx_queue_mapping: tx queue number for this connection
 *        @skc_rx_queue_mapping: rx queue number for this connection
 *        @skc_flags: place holder for sk_flags
 *                %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
 *                %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
 *        @skc_listener: connection request listener socket (aka rsk_listener)
 *                [union with @skc_flags]
 *        @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row
 *                [union with @skc_flags]
 *        @skc_incoming_cpu: record/match cpu processing incoming packets
 *        @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled)
 *                [union with @skc_incoming_cpu]
 *        @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
 *                [union with @skc_incoming_cpu]
 *        @skc_refcnt: reference count
 *
 *        This is the minimal network layer representation of sockets, the header
 *        for struct sock and struct inet_timewait_sock.
 */
struct sock_common {
        union {
                __addrpair        skc_addrpair;
                struct {
                        __be32        skc_daddr;
                        __be32        skc_rcv_saddr;
                };
        };
        union  {
                unsigned int        skc_hash;
                __u16                skc_u16hashes[2];
        };
        /* skc_dport && skc_num must be grouped as well */
        union {
                __portpair        skc_portpair;
                struct {
                        __be16        skc_dport;
                        __u16        skc_num;
                };
        };

        unsigned short                skc_family;
        volatile unsigned char        skc_state;
        unsigned char                skc_reuse:4;
        unsigned char                skc_reuseport:1;
        unsigned char                skc_ipv6only:1;
        unsigned char                skc_net_refcnt:1;
        int                        skc_bound_dev_if;
        union {
                struct hlist_node        skc_bind_node;
                struct hlist_node        skc_portaddr_node;
        };
        struct proto                *skc_prot;
        possible_net_t                skc_net;

#if IS_ENABLED(CONFIG_IPV6)
        struct in6_addr                skc_v6_daddr;
        struct in6_addr                skc_v6_rcv_saddr;
#endif

        atomic64_t                skc_cookie;

        /* following fields are padding to force
         * offset(struct sock, sk_refcnt) == 128 on 64bit arches
         * assuming IPV6 is enabled. We use this padding differently
         * for different kind of 'sockets'
         */
        union {
                unsigned long        skc_flags;
                struct sock        *skc_listener; /* request_sock */
                struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
        };
        /*
         * fields between dontcopy_begin/dontcopy_end
         * are not copied in sock_copy()
         */
        /* private: */
        int                        skc_dontcopy_begin[0];
        /* public: */
        union {
                struct hlist_node        skc_node;
                struct hlist_nulls_node skc_nulls_node;
        };
        unsigned short                skc_tx_queue_mapping;
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
        unsigned short                skc_rx_queue_mapping;
#endif
        union {
                int                skc_incoming_cpu;
                u32                skc_rcv_wnd;
                u32                skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
        };

        refcount_t                skc_refcnt;
        /* private: */
        int                     skc_dontcopy_end[0];
        union {
                u32                skc_rxhash;
                u32                skc_window_clamp;
                u32                skc_tw_snd_nxt; /* struct tcp_timewait_sock */
        };
        /* public: */
};

struct bpf_local_storage;
struct sk_filter;

/**
  *        struct sock - network layer representation of sockets
  *        @__sk_common: shared layout with inet_timewait_sock
  *        @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
  *        @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
  *        @sk_lock:        synchronizer
  *        @sk_kern_sock: True if sock is using kernel lock classes
  *        @sk_rcvbuf: size of receive buffer in bytes
  *        @sk_wq: sock wait queue and async head
  *        @sk_rx_dst: receive input route used by early demux
  *        @sk_rx_dst_ifindex: ifindex for @sk_rx_dst
  *        @sk_rx_dst_cookie: cookie for @sk_rx_dst
  *        @sk_dst_cache: destination cache
  *        @sk_dst_pending_confirm: need to confirm neighbour
  *        @sk_policy: flow policy
  *        @sk_receive_queue: incoming packets
  *        @sk_wmem_alloc: transmit queue bytes committed
  *        @sk_tsq_flags: TCP Small Queues flags
  *        @sk_write_queue: Packet sending queue
  *        @sk_omem_alloc: "o" is "option" or "other"
  *        @sk_wmem_queued: persistent queue size
  *        @sk_forward_alloc: space allocated forward
  *        @sk_reserved_mem: space reserved and non-reclaimable for the socket
  *        @sk_napi_id: id of the last napi context to receive data for sk
  *        @sk_ll_usec: usecs to busypoll when there is no data
  *        @sk_allocation: allocation mode
  *        @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
  *        @sk_pacing_status: Pacing status (requested, handled by sch_fq)
  *        @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
  *        @sk_sndbuf: size of send buffer in bytes
  *        @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
  *        @sk_no_check_rx: allow zero checksum in RX packets
  *        @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
  *        @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden.
  *        @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
  *        @sk_gso_max_size: Maximum GSO segment size to build
  *        @sk_gso_max_segs: Maximum number of GSO segments
  *        @sk_pacing_shift: scaling factor for TCP Small Queues
  *        @sk_lingertime: %SO_LINGER l_linger setting
  *        @sk_backlog: always used with the per-socket spinlock held
  *        @sk_callback_lock: used with the callbacks in the end of this struct
  *        @sk_error_queue: rarely used
  *        @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
  *                          IPV6_ADDRFORM for instance)
  *        @sk_err: last error
  *        @sk_err_soft: errors that don't cause failure but are the cause of a
  *                      persistent failure not just 'timed out'
  *        @sk_drops: raw/udp drops counter
  *        @sk_ack_backlog: current listen backlog
  *        @sk_max_ack_backlog: listen backlog set in listen()
  *        @sk_uid: user id of owner
  *        @sk_prefer_busy_poll: prefer busypolling over softirq processing
  *        @sk_busy_poll_budget: napi processing budget when busypolling
  *        @sk_priority: %SO_PRIORITY setting
  *        @sk_type: socket type (%SOCK_STREAM, etc)
  *        @sk_protocol: which protocol this socket belongs in this network family
  *        @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred
  *        @sk_peer_pid: &struct pid for this socket's peer
  *        @sk_peer_cred: %SO_PEERCRED setting
  *        @sk_rcvlowat: %SO_RCVLOWAT setting
  *        @sk_rcvtimeo: %SO_RCVTIMEO setting
  *        @sk_sndtimeo: %SO_SNDTIMEO setting
  *        @sk_txhash: computed flow hash for use on transmit
  *        @sk_txrehash: enable TX hash rethink
  *        @sk_filter: socket filtering instructions
  *        @sk_timer: sock cleanup timer
  *        @sk_stamp: time stamp of last packet received
  *        @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
  *        @sk_tsflags: SO_TIMESTAMPING flags
  *        @sk_use_task_frag: allow sk_page_frag() to use current->task_frag.
  *                           Sockets that can be used under memory reclaim should
  *                           set this to false.
  *        @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock
  *                      for timestamping
  *        @sk_tskey: counter to disambiguate concurrent tstamp requests
  *        @sk_zckey: counter to order MSG_ZEROCOPY notifications
  *        @sk_socket: Identd and reporting IO signals
  *        @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock.
  *        @sk_frag: cached page frag
  *        @sk_peek_off: current peek_offset value
  *        @sk_send_head: front of stuff to transmit
  *        @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head]
  *        @sk_security: used by security modules
  *        @sk_mark: generic packet mark
  *        @sk_cgrp_data: cgroup data for this cgroup
  *        @sk_memcg: this socket's memory cgroup association
  *        @sk_write_pending: a write to stream socket waits to start
  *        @sk_disconnects: number of disconnect operations performed on this sock
  *        @sk_state_change: callback to indicate change in the state of the sock
  *        @sk_data_ready: callback to indicate there is data to be processed
  *        @sk_write_space: callback to indicate there is bf sending space available
  *        @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
  *        @sk_backlog_rcv: callback to process the backlog
  *        @sk_validate_xmit_skb: ptr to an optional validate function
  *        @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
  *        @sk_reuseport_cb: reuseport group container
  *        @sk_bpf_storage: ptr to cache and control for bpf_sk_storage
  *        @sk_rcu: used during RCU grace period
  *        @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
  *        @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
  *        @sk_txtime_report_errors: set report errors mode for SO_TXTIME
  *        @sk_txtime_unused: unused txtime flags
  *        @ns_tracker: tracker for netns reference
  */
struct sock {
        /*
         * Now struct inet_timewait_sock also uses sock_common, so please just
         * don't add nothing before this first member (__sk_common) --acme
         */
        struct sock_common        __sk_common;
#define sk_node                        __sk_common.skc_node
#define sk_nulls_node                __sk_common.skc_nulls_node
#define sk_refcnt                __sk_common.skc_refcnt
#define sk_tx_queue_mapping        __sk_common.skc_tx_queue_mapping
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
#define sk_rx_queue_mapping        __sk_common.skc_rx_queue_mapping
#endif

#define sk_dontcopy_begin        __sk_common.skc_dontcopy_begin
#define sk_dontcopy_end                __sk_common.skc_dontcopy_end
#define sk_hash                        __sk_common.skc_hash
#define sk_portpair                __sk_common.skc_portpair
#define sk_num                        __sk_common.skc_num
#define sk_dport                __sk_common.skc_dport
#define sk_addrpair                __sk_common.skc_addrpair
#define sk_daddr                __sk_common.skc_daddr
#define sk_rcv_saddr                __sk_common.skc_rcv_saddr
#define sk_family                __sk_common.skc_family
#define sk_state                __sk_common.skc_state
#define sk_reuse                __sk_common.skc_reuse
#define sk_reuseport                __sk_common.skc_reuseport
#define sk_ipv6only                __sk_common.skc_ipv6only
#define sk_net_refcnt                __sk_common.skc_net_refcnt
#define sk_bound_dev_if                __sk_common.skc_bound_dev_if
#define sk_bind_node                __sk_common.skc_bind_node
#define sk_prot                        __sk_common.skc_prot
#define sk_net                        __sk_common.skc_net
#define sk_v6_daddr                __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr        __sk_common.skc_v6_rcv_saddr
#define sk_cookie                __sk_common.skc_cookie
#define sk_incoming_cpu                __sk_common.skc_incoming_cpu
#define sk_flags                __sk_common.skc_flags
#define sk_rxhash                __sk_common.skc_rxhash

        __cacheline_group_begin(sock_write_rx);

        atomic_t                sk_drops;
        __s32                        sk_peek_off;
        struct sk_buff_head        sk_error_queue;
        struct sk_buff_head        sk_receive_queue;
        /*
         * The backlog queue is special, it is always used with
         * the per-socket spinlock held and requires low latency
         * access. Therefore we special case it's implementation.
         * Note : rmem_alloc is in this structure to fill a hole
         * on 64bit arches, not because its logically part of
         * backlog.
         */
        struct {
                atomic_t        rmem_alloc;
                int                len;
                struct sk_buff        *head;
                struct sk_buff        *tail;
        } sk_backlog;
#define sk_rmem_alloc sk_backlog.rmem_alloc

        __cacheline_group_end(sock_write_rx);

        __cacheline_group_begin(sock_read_rx);
        /* early demux fields */
        struct dst_entry __rcu        *sk_rx_dst;
        int                        sk_rx_dst_ifindex;
        u32                        sk_rx_dst_cookie;

#ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned int                sk_ll_usec;
        unsigned int                sk_napi_id;
        u16                        sk_busy_poll_budget;
        u8                        sk_prefer_busy_poll;
#endif
        u8                        sk_userlocks;
        int                        sk_rcvbuf;

        struct sk_filter __rcu        *sk_filter;
        union {
                struct socket_wq __rcu        *sk_wq;
                /* private: */
                struct socket_wq        *sk_wq_raw;
                /* public: */
        };

        void                        (*sk_data_ready)(struct sock *sk);
        long                        sk_rcvtimeo;
        int                        sk_rcvlowat;
        __cacheline_group_end(sock_read_rx);

        __cacheline_group_begin(sock_read_rxtx);
        int                        sk_err;
        struct socket                *sk_socket;
        struct mem_cgroup        *sk_memcg;
#ifdef CONFIG_XFRM
        struct xfrm_policy __rcu *sk_policy[2];
#endif
        __cacheline_group_end(sock_read_rxtx);

        __cacheline_group_begin(sock_write_rxtx);
        socket_lock_t                sk_lock;
        u32                        sk_reserved_mem;
        int                        sk_forward_alloc;
        u32                        sk_tsflags;
        __cacheline_group_end(sock_write_rxtx);

        __cacheline_group_begin(sock_write_tx);
        int                        sk_write_pending;
        atomic_t                sk_omem_alloc;
        int                        sk_sndbuf;

        int                        sk_wmem_queued;
        refcount_t                sk_wmem_alloc;
        unsigned long                sk_tsq_flags;
        union {
                struct sk_buff        *sk_send_head;
                struct rb_root        tcp_rtx_queue;
        };
        struct sk_buff_head        sk_write_queue;
        u32                        sk_dst_pending_confirm;
        u32                        sk_pacing_status; /* see enum sk_pacing */
        struct page_frag        sk_frag;
        struct timer_list        sk_timer;

        unsigned long                sk_pacing_rate; /* bytes per second */
        atomic_t                sk_zckey;
        atomic_t                sk_tskey;
        __cacheline_group_end(sock_write_tx);

        __cacheline_group_begin(sock_read_tx);
        unsigned long                sk_max_pacing_rate;
        long                        sk_sndtimeo;
        u32                        sk_priority;
        u32                        sk_mark;
        struct dst_entry __rcu        *sk_dst_cache;
        netdev_features_t        sk_route_caps;
#ifdef CONFIG_SOCK_VALIDATE_XMIT
        struct sk_buff*                (*sk_validate_xmit_skb)(struct sock *sk,
                                                        struct net_device *dev,
                                                        struct sk_buff *skb);
#endif
        u16                        sk_gso_type;
        u16                        sk_gso_max_segs;
        unsigned int                sk_gso_max_size;
        gfp_t                        sk_allocation;
        u32                        sk_txhash;
        u8                        sk_pacing_shift;
        bool                        sk_use_task_frag;
        __cacheline_group_end(sock_read_tx);

        /*
         * Because of non atomicity rules, all
         * changes are protected by socket lock.
         */
        u8                        sk_gso_disabled : 1,
                                sk_kern_sock : 1,
                                sk_no_check_tx : 1,
                                sk_no_check_rx : 1;
        u8                        sk_shutdown;
        u16                        sk_type;
        u16                        sk_protocol;
        unsigned long                sk_lingertime;
        struct proto                *sk_prot_creator;
        rwlock_t                sk_callback_lock;
        int                        sk_err_soft;
        u32                        sk_ack_backlog;
        u32                        sk_max_ack_backlog;
        kuid_t                        sk_uid;
        spinlock_t                sk_peer_lock;
        int                        sk_bind_phc;
        struct pid                *sk_peer_pid;
        const struct cred        *sk_peer_cred;

        ktime_t                        sk_stamp;
#if BITS_PER_LONG==32
        seqlock_t                sk_stamp_seq;
#endif
        int                        sk_disconnects;

        u8                        sk_txrehash;
        u8                        sk_clockid;
        u8                        sk_txtime_deadline_mode : 1,
                                sk_txtime_report_errors : 1,
                                sk_txtime_unused : 6;

        void                        *sk_user_data;
#ifdef CONFIG_SECURITY
        void                        *sk_security;
#endif
        struct sock_cgroup_data        sk_cgrp_data;
        void                        (*sk_state_change)(struct sock *sk);
        void                        (*sk_write_space)(struct sock *sk);
        void                        (*sk_error_report)(struct sock *sk);
        int                        (*sk_backlog_rcv)(struct sock *sk,
                                                  struct sk_buff *skb);
        void                    (*sk_destruct)(struct sock *sk);
        struct sock_reuseport __rcu        *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
        struct bpf_local_storage __rcu        *sk_bpf_storage;
#endif
        struct rcu_head                sk_rcu;
        netns_tracker                ns_tracker;
};

enum sk_pacing {
        SK_PACING_NONE                = 0,
        SK_PACING_NEEDED        = 1,
        SK_PACING_FQ                = 2,
};

/* flag bits in sk_user_data
 *
 * - SK_USER_DATA_NOCOPY:      Pointer stored in sk_user_data might
 *   not be suitable for copying when cloning the socket. For instance,
 *   it can point to a reference counted object. sk_user_data bottom
 *   bit is set if pointer must not be copied.
 *
 * - SK_USER_DATA_BPF:         Mark whether sk_user_data field is
 *   managed/owned by a BPF reuseport array. This bit should be set
 *   when sk_user_data's sk is added to the bpf's reuseport_array.
 *
 * - SK_USER_DATA_PSOCK:       Mark whether pointer stored in
 *   sk_user_data points to psock type. This bit should be set
 *   when sk_user_data is assigned to a psock object.
 */
#define SK_USER_DATA_NOCOPY        1UL
#define SK_USER_DATA_BPF        2UL
#define SK_USER_DATA_PSOCK        4UL
#define SK_USER_DATA_PTRMASK        ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
                                  SK_USER_DATA_PSOCK)

/**
 * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
 * @sk: socket
 */
static inline bool sk_user_data_is_nocopy(const struct sock *sk)
{
        return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY);
}

#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))

/**
 * __locked_read_sk_user_data_with_flags - return the pointer
 * only if argument flags all has been set in sk_user_data. Otherwise
 * return NULL
 *
 * @sk: socket
 * @flags: flag bits
 *
 * The caller must be holding sk->sk_callback_lock.
 */
static inline void *
__locked_read_sk_user_data_with_flags(const struct sock *sk,
                                      uintptr_t flags)
{
        uintptr_t sk_user_data =
                (uintptr_t)rcu_dereference_check(__sk_user_data(sk),
                                                 lockdep_is_held(&sk->sk_callback_lock));

        WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);

        if ((sk_user_data & flags) == flags)
                return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
        return NULL;
}

/**
 * __rcu_dereference_sk_user_data_with_flags - return the pointer
 * only if argument flags all has been set in sk_user_data. Otherwise
 * return NULL
 *
 * @sk: socket
 * @flags: flag bits
 */
static inline void *
__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
                                          uintptr_t flags)
{
        uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));

        WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);

        if ((sk_user_data & flags) == flags)
                return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
        return NULL;
}

#define rcu_dereference_sk_user_data(sk)                                \
        __rcu_dereference_sk_user_data_with_flags(sk, 0)
#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags)                \
({                                                                        \
        uintptr_t __tmp1 = (uintptr_t)(ptr),                                \
                  __tmp2 = (uintptr_t)(flags);                                \
        WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK);                        \
        WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK);                        \
        rcu_assign_pointer(__sk_user_data((sk)),                        \
                           __tmp1 | __tmp2);                                \
})
#define rcu_assign_sk_user_data(sk, ptr)                                \
        __rcu_assign_sk_user_data_with_flags(sk, ptr, 0)

static inline
struct net *sock_net(const struct sock *sk)
{
        return read_pnet(&sk->sk_net);
}

static inline
void sock_net_set(struct sock *sk, struct net *net)
{
        write_pnet(&sk->sk_net, net);
}

/*
 * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
 * or not whether his port will be reused by someone else. SK_FORCE_REUSE
 * on a socket means that the socket will reuse everybody else's port
 * without looking at the other's sk_reuse value.
 */

#define SK_NO_REUSE        0
#define SK_CAN_REUSE        1
#define SK_FORCE_REUSE        2

int sk_set_peek_off(struct sock *sk, int val);

static inline int sk_peek_offset(const struct sock *sk, int flags)
{
        if (unlikely(flags & MSG_PEEK)) {
                return READ_ONCE(sk->sk_peek_off);
        }

        return 0;
}

static inline void sk_peek_offset_bwd(struct sock *sk, int val)
{
        s32 off = READ_ONCE(sk->sk_peek_off);

        if (unlikely(off >= 0)) {
                off = max_t(s32, off - val, 0);
                WRITE_ONCE(sk->sk_peek_off, off);
        }
}

static inline void sk_peek_offset_fwd(struct sock *sk, int val)
{
        sk_peek_offset_bwd(sk, -val);
}

/*
 * Hashed lists helper routines
 */
static inline struct sock *sk_entry(const struct hlist_node *node)
{
        return hlist_entry(node, struct sock, sk_node);
}

static inline struct sock *__sk_head(const struct hlist_head *head)
{
        return hlist_entry(head->first, struct sock, sk_node);
}

static inline struct sock *sk_head(const struct hlist_head *head)
{
        return hlist_empty(head) ? NULL : __sk_head(head);
}

static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head)
{
        return hlist_nulls_entry(head->first, struct sock, sk_nulls_node);
}

static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head)
{
        return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head);
}

static inline struct sock *sk_next(const struct sock *sk)
{
        return hlist_entry_safe(sk->sk_node.next, struct sock, sk_node);
}

static inline struct sock *sk_nulls_next(const struct sock *sk)
{
        return (!is_a_nulls(sk->sk_nulls_node.next)) ?
                hlist_nulls_entry(sk->sk_nulls_node.next,
                                  struct sock, sk_nulls_node) :
                NULL;
}

static inline bool sk_unhashed(const struct sock *sk)
{
        return hlist_unhashed(&sk->sk_node);
}

static inline bool sk_hashed(const struct sock *sk)
{
        return !sk_unhashed(sk);
}

static inline void sk_node_init(struct hlist_node *node)
{
        node->pprev = NULL;
}

static inline void __sk_del_node(struct sock *sk)
{
        __hlist_del(&sk->sk_node);
}

/* NB: equivalent to hlist_del_init_rcu */
static inline bool __sk_del_node_init(struct sock *sk)
{
        if (sk_hashed(sk)) {
                __sk_del_node(sk);
                sk_node_init(&sk->sk_node);
                return true;
        }
        return false;
}

/* Grab socket reference count. This operation is valid only
   when sk is ALREADY grabbed f.e. it is found in hash table
   or a list and the lookup is made under lock preventing hash table
   modifications.
 */

static __always_inline void sock_hold(struct sock *sk)
{
        refcount_inc(&sk->sk_refcnt);
}

/* Ungrab socket in the context, which assumes that socket refcnt
   cannot hit zero, f.e. it is true in context of any socketcall.
 */
static __always_inline void __sock_put(struct sock *sk)
{
        refcount_dec(&sk->sk_refcnt);
}

static inline bool sk_del_node_init(struct sock *sk)
{
        bool rc = __sk_del_node_init(sk);

        if (rc) {
                /* paranoid for a while -acme */
                WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                __sock_put(sk);
        }
        return rc;
}
#define sk_del_node_init_rcu(sk)        sk_del_node_init(sk)

static inline bool __sk_nulls_del_node_init_rcu(struct sock *sk)
{
        if (sk_hashed(sk)) {
                hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
                return true;
        }
        return false;
}

static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
{
        bool rc = __sk_nulls_del_node_init_rcu(sk);

        if (rc) {
                /* paranoid for a while -acme */
                WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                __sock_put(sk);
        }
        return rc;
}

static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
{
        hlist_add_head(&sk->sk_node, list);
}

static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
{
        sock_hold(sk);
        __sk_add_node(sk, list);
}

static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
{
        sock_hold(sk);
        if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
            sk->sk_family == AF_INET6)
                hlist_add_tail_rcu(&sk->sk_node, list);
        else
                hlist_add_head_rcu(&sk->sk_node, list);
}

static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list)
{
        sock_hold(sk);
        hlist_add_tail_rcu(&sk->sk_node, list);
}

static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
        hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}

static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
        hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
}

static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
        sock_hold(sk);
        __sk_nulls_add_node_rcu(sk, list);
}

static inline void __sk_del_bind_node(struct sock *sk)
{
        __hlist_del(&sk->sk_bind_node);
}

static inline void sk_add_bind_node(struct sock *sk,
                                        struct hlist_head *list)
{
        hlist_add_head(&sk->sk_bind_node, list);
}

#define sk_for_each(__sk, list) \
        hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
        hlist_for_each_entry_rcu(__sk, list, sk_node)
#define sk_nulls_for_each(__sk, node, list) \
        hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
#define sk_nulls_for_each_rcu(__sk, node, list) \
        hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
#define sk_for_each_from(__sk) \
        hlist_for_each_entry_from(__sk, sk_node)
#define sk_nulls_for_each_from(__sk, node) \
        if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
                hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
#define sk_for_each_safe(__sk, tmp, list) \
        hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
        hlist_for_each_entry(__sk, list, sk_bind_node)

/**
 * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct hlist_node to use as a loop cursor.
 * @head:        the head for your list.
 * @offset:        offset of hlist_node within the struct.
 *
 */
#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset)                       \
        for (pos = rcu_dereference(hlist_first_rcu(head));                       \
             pos != NULL &&                                                       \
                ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;});       \
             pos = rcu_dereference(hlist_next_rcu(pos)))

static inline struct user_namespace *sk_user_ns(const struct sock *sk)
{
        /* Careful only use this in a context where these parameters
         * can not change and must all be valid, such as recvmsg from
         * userspace.
         */
        return sk->sk_socket->file->f_cred->user_ns;
}

/* Sock flags */
enum sock_flags {
        SOCK_DEAD,
        SOCK_DONE,
        SOCK_URGINLINE,
        SOCK_KEEPOPEN,
        SOCK_LINGER,
        SOCK_DESTROY,
        SOCK_BROADCAST,
        SOCK_TIMESTAMP,
        SOCK_ZAPPED,
        SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
        SOCK_DBG, /* %SO_DEBUG setting */
        SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
        SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
        SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
        SOCK_MEMALLOC, /* VM depends on this socket for swapping */
        SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
        SOCK_FASYNC, /* fasync() active */
        SOCK_RXQ_OVFL,
        SOCK_ZEROCOPY, /* buffers from userspace */
        SOCK_WIFI_STATUS, /* push wifi status to userspace */
        SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS.
                     * Will use last 4 bytes of packet sent from
                     * user-space instead.
                     */
        SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
        SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
        SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
        SOCK_TXTIME,
        SOCK_XDP, /* XDP is attached */
        SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
        SOCK_RCVMARK, /* Receive SO_MARK  ancillary data with packet */
};

#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))

static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
{
        nsk->sk_flags = osk->sk_flags;
}

static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
{
        __set_bit(flag, &sk->sk_flags);
}

static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag)
{
        __clear_bit(flag, &sk->sk_flags);
}

static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
                                     int valbool)
{
        if (valbool)
                sock_set_flag(sk, bit);
        else
                sock_reset_flag(sk, bit);
}

static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
{
        return test_bit(flag, &sk->sk_flags);
}

#ifdef CONFIG_NET
DECLARE_STATIC_KEY_FALSE(memalloc_socks_key);
static inline int sk_memalloc_socks(void)
{
        return static_branch_unlikely(&memalloc_socks_key);
}

void __receive_sock(struct file *file);
#else

static inline int sk_memalloc_socks(void)
{
        return 0;
}

static inline void __receive_sock(struct file *file)
{ }
#endif

static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
{
        return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC);
}

static inline void sk_acceptq_removed(struct sock *sk)
{
        WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1);
}

static inline void sk_acceptq_added(struct sock *sk)
{
        WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
}

/* Note: If you think the test should be:
 *        return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog);
 * Then please take a look at commit 64a146513f8f ("[NET]: Revert incorrect accept queue backlog changes.")
 */
static inline bool sk_acceptq_is_full(const struct sock *sk)
{
        return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
}

/*
 * Compute minimal free write space needed to queue new packets.
 */
static inline int sk_stream_min_wspace(const struct sock *sk)
{
        return READ_ONCE(sk->sk_wmem_queued) >> 1;
}

static inline int sk_stream_wspace(const struct sock *sk)
{
        return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
}

static inline void sk_wmem_queued_add(struct sock *sk, int val)
{
        WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}

static inline void sk_forward_alloc_add(struct sock *sk, int val)
{
        /* Paired with lockless reads of sk->sk_forward_alloc */
        WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
}

void sk_stream_write_space(struct sock *sk);

/* OOB backlog add */
static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
        /* dont let skb dst not refcounted, we are going to leave rcu lock */
        skb_dst_force(skb);

        if (!sk->sk_backlog.tail)
                WRITE_ONCE(sk->sk_backlog.head, skb);
        else
                sk->sk_backlog.tail->next = skb;

        WRITE_ONCE(sk->sk_backlog.tail, skb);
        skb->next = NULL;
}

/*
 * Take into account size of receive queue and backlog queue
 * Do not take into account this skb truesize,
 * to allow even a single big packet to come.
 */
static inline bool sk_rcvqueues_full(const struct sock *sk, unsigned int limit)
{
        unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);

        return qsize > limit;
}

/* The per-socket spinlock must be held here. */
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb,
                                              unsigned int limit)
{
        if (sk_rcvqueues_full(sk, limit))
                return -ENOBUFS;

        /*
         * If the skb was allocated from pfmemalloc reserves, only
         * allow SOCK_MEMALLOC sockets to use it as this socket is
         * helping free memory
         */
        if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
                return -ENOMEM;

        __sk_add_backlog(sk, skb);
        sk->sk_backlog.len += skb->truesize;
        return 0;
}

int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);

INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb));

static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
        if (sk_memalloc_socks() && skb_pfmemalloc(skb))
                return __sk_backlog_rcv(sk, skb);

        return INDIRECT_CALL_INET(sk->sk_backlog_rcv,
                                  tcp_v6_do_rcv,
                                  tcp_v4_do_rcv,
                                  sk, skb);
}

static inline void sk_incoming_cpu_update(struct sock *sk)
{
        int cpu = raw_smp_processor_id();

        if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
                WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}


static inline void sock_rps_save_rxhash(struct sock *sk,
                                        const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
        /* The following WRITE_ONCE() is paired with the READ_ONCE()
         * here, and another one in sock_rps_record_flow().
         */
        if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
                WRITE_ONCE(sk->sk_rxhash, skb->hash);
#endif
}

static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
        /* Paired with READ_ONCE() in sock_rps_record_flow() */
        WRITE_ONCE(sk->sk_rxhash, 0);
#endif
}

#define sk_wait_event(__sk, __timeo, __condition, __wait)                \
        ({        int __rc, __dis = __sk->sk_disconnects;                        \
                release_sock(__sk);                                        \
                __rc = __condition;                                        \
                if (!__rc) {                                                \
                        *(__timeo) = wait_woken(__wait,                        \
                                                TASK_INTERRUPTIBLE,        \
                                                *(__timeo));                \
                }                                                        \
                sched_annotate_sleep();                                        \
                lock_sock(__sk);                                        \
                __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \
                __rc;                                                        \
        })

int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
void sk_stream_wait_close(struct sock *sk, long timeo_p);
int sk_stream_error(struct sock *sk, int flags, int err);
void sk_stream_kill_queues(struct sock *sk);
void sk_set_memalloc(struct sock *sk);
void sk_clear_memalloc(struct sock *sk);

void __sk_flush_backlog(struct sock *sk);

static inline bool sk_flush_backlog(struct sock *sk)
{
        if (unlikely(READ_ONCE(sk->sk_backlog.tail))) {
                __sk_flush_backlog(sk);
                return true;
        }
        return false;
}

int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);

struct request_sock_ops;
struct timewait_sock_ops;
struct inet_hashinfo;
struct raw_hashinfo;
struct smc_hashinfo;
struct module;
struct sk_psock;

/*
 * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
 * un-modified. Special care is taken when initializing object to zero.
 */
static inline void sk_prot_clear_nulls(struct sock *sk, int size)
{
        if (offsetof(struct sock, sk_node.next) != 0)
                memset(sk, 0, offsetof(struct sock, sk_node.next));
        memset(&sk->sk_node.pprev, 0,
               size - offsetof(struct sock, sk_node.pprev));
}

/* Networking protocol blocks we attach to sockets.
 * socket layer -> transport layer interface
 */
struct proto {
        void                        (*close)(struct sock *sk,
                                        long timeout);
        int                        (*pre_connect)(struct sock *sk,
                                        struct sockaddr *uaddr,
                                        int addr_len);
        int                        (*connect)(struct sock *sk,
                                        struct sockaddr *uaddr,
                                        int addr_len);
        int                        (*disconnect)(struct sock *sk, int flags);

        struct sock *                (*accept)(struct sock *sk, int flags, int *err,
                                          bool kern);

        int                        (*ioctl)(struct sock *sk, int cmd,
                                         int *karg);
        int                        (*init)(struct sock *sk);
        void                        (*destroy)(struct sock *sk);
        void                        (*shutdown)(struct sock *sk, int how);
        int                        (*setsockopt)(struct sock *sk, int level,
                                        int optname, sockptr_t optval,
                                        unsigned int optlen);
        int                        (*getsockopt)(struct sock *sk, int level,
                                        int optname, char __user *optval,
                                        int __user *option);
        void                        (*keepalive)(struct sock *sk, int valbool);
#ifdef CONFIG_COMPAT
        int                        (*compat_ioctl)(struct sock *sk,
                                        unsigned int cmd, unsigned long arg);
#endif
        int                        (*sendmsg)(struct sock *sk, struct msghdr *msg,
                                           size_t len);
        int                        (*recvmsg)(struct sock *sk, struct msghdr *msg,
                                           size_t len, int flags, int *addr_len);
        void                        (*splice_eof)(struct socket *sock);
        int                        (*bind)(struct sock *sk,
                                        struct sockaddr *addr, int addr_len);
        int                        (*bind_add)(struct sock *sk,
                                        struct sockaddr *addr, int addr_len);

        int                        (*backlog_rcv) (struct sock *sk,
                                                struct sk_buff *skb);
        bool                        (*bpf_bypass_getsockopt)(int level,
                                                         int optname);

        void                (*release_cb)(struct sock *sk);

        /* Keeping track of sk's, looking them up, and port selection methods. */
        int                        (*hash)(struct sock *sk);
        void                        (*unhash)(struct sock *sk);
        void                        (*rehash)(struct sock *sk);
        int                        (*get_port)(struct sock *sk, unsigned short snum);
        void                        (*put_port)(struct sock *sk);
#ifdef CONFIG_BPF_SYSCALL
        int                        (*psock_update_sk_prot)(struct sock *sk,
                                                        struct sk_psock *psock,
                                                        bool restore);
#endif

        /* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
        unsigned int                inuse_idx;
#endif

#if IS_ENABLED(CONFIG_MPTCP)
        int                        (*forward_alloc_get)(const struct sock *sk);
#endif

        bool                        (*stream_memory_free)(const struct sock *sk, int wake);
        bool                        (*sock_is_readable)(struct sock *sk);
        /* Memory pressure */
        void                        (*enter_memory_pressure)(struct sock *sk);
        void                        (*leave_memory_pressure)(struct sock *sk);
        atomic_long_t                *memory_allocated;        /* Current allocated memory. */
        int  __percpu                *per_cpu_fw_alloc;
        struct percpu_counter        *sockets_allocated;        /* Current number of sockets. */

        /*
         * Pressure flag: try to collapse.
         * Technical note: it is used by multiple contexts non atomically.
         * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
         * All the __sk_mem_schedule() is of this nature: accounting
         * is strict, actions are advisory and have some latency.
         */
        unsigned long                *memory_pressure;
        long                        *sysctl_mem;

        int                        *sysctl_wmem;
        int                        *sysctl_rmem;
        u32                        sysctl_wmem_offset;
        u32                        sysctl_rmem_offset;

        int                        max_header;
        bool                        no_autobind;

        struct kmem_cache        *slab;
        unsigned int                obj_size;
        unsigned int                ipv6_pinfo_offset;
        slab_flags_t                slab_flags;
        unsigned int                useroffset;        /* Usercopy region offset */
        unsigned int                usersize;        /* Usercopy region size */

        unsigned int __percpu        *orphan_count;

        struct request_sock_ops        *rsk_prot;
        struct timewait_sock_ops *twsk_prot;

        union {
                struct inet_hashinfo        *hashinfo;
                struct udp_table        *udp_table;
                struct raw_hashinfo        *raw_hash;
                struct smc_hashinfo        *smc_hash;
        } h;

        struct module                *owner;

        char                        name[32];

        struct list_head        node;
        int                        (*diag_destroy)(struct sock *sk, int err);
} __randomize_layout;

int proto_register(struct proto *prot, int alloc_slab);
void proto_unregister(struct proto *prot);
int sock_load_diag_module(int family, int protocol);

INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));

static inline int sk_forward_alloc_get(const struct sock *sk)
{
#if IS_ENABLED(CONFIG_MPTCP)
        if (sk->sk_prot->forward_alloc_get)
                return sk->sk_prot->forward_alloc_get(sk);
#endif
        return READ_ONCE(sk->sk_forward_alloc);
}

static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
        if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
                return false;

        return sk->sk_prot->stream_memory_free ?
                INDIRECT_CALL_INET_1(sk->sk_prot->stream_memory_free,
                                     tcp_stream_memory_free, sk, wake) : true;
}

static inline bool sk_stream_memory_free(const struct sock *sk)
{
        return __sk_stream_memory_free(sk, 0);
}

static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake)
{
        return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) &&
               __sk_stream_memory_free(sk, wake);
}

static inline bool sk_stream_is_writeable(const struct sock *sk)
{
        return __sk_stream_is_writeable(sk, 0);
}

static inline int sk_under_cgroup_hierarchy(struct sock *sk,
                                            struct cgroup *ancestor)
{
#ifdef CONFIG_SOCK_CGROUP_DATA
        return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
                                    ancestor);
#else
        return -ENOTSUPP;
#endif
}

static inline bool sk_has_memory_pressure(const struct sock *sk)
{
        return sk->sk_prot->memory_pressure != NULL;
}

static inline bool sk_under_global_memory_pressure(const struct sock *sk)
{
        return sk->sk_prot->memory_pressure &&
                !!READ_ONCE(*sk->sk_prot->memory_pressure);
}

static inline bool sk_under_memory_pressure(const struct sock *sk)
{
        if (!sk->sk_prot->memory_pressure)
                return false;

        if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
            mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;

        return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}

static inline long
proto_memory_allocated(const struct proto *prot)
{
        return max(0L, atomic_long_read(prot->memory_allocated));
}

static inline long
sk_memory_allocated(const struct sock *sk)
{
        return proto_memory_allocated(sk->sk_prot);
}

/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
extern int sysctl_mem_pcpu_rsv;

static inline void proto_memory_pcpu_drain(struct proto *proto)
{
        int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);

        if (val)
                atomic_long_add(val, proto->memory_allocated);
}

static inline void
sk_memory_allocated_add(const struct sock *sk, int val)
{
        struct proto *proto = sk->sk_prot;

        val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);

        if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
                proto_memory_pcpu_drain(proto);
}

static inline void
sk_memory_allocated_sub(const struct sock *sk, int val)
{
        struct proto *proto = sk->sk_prot;

        val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);

        if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
                proto_memory_pcpu_drain(proto);
}

#define SK_ALLOC_PERCPU_COUNTER_BATCH 16

static inline void sk_sockets_allocated_dec(struct sock *sk)
{
        percpu_counter_add_batch(sk->sk_prot->sockets_allocated, -1,
                                 SK_ALLOC_PERCPU_COUNTER_BATCH);
}

static inline void sk_sockets_allocated_inc(struct sock *sk)
{
        percpu_counter_add_batch(sk->sk_prot->sockets_allocated, 1,
                                 SK_ALLOC_PERCPU_COUNTER_BATCH);
}

static inline u64
sk_sockets_allocated_read_positive(struct sock *sk)
{
        return percpu_counter_read_positive(sk->sk_prot->sockets_allocated);
}

static inline int
proto_sockets_allocated_sum_positive(struct proto *prot)
{
        return percpu_counter_sum_positive(prot->sockets_allocated);
}

static inline bool
proto_memory_pressure(struct proto *prot)
{
        if (!prot->memory_pressure)
                return false;
        return !!READ_ONCE(*prot->memory_pressure);
}


#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR        64        /* should be enough for the first time */
struct prot_inuse {
        int all;
        int val[PROTO_INUSE_NR];
};

static inline void sock_prot_inuse_add(const struct net *net,
                                       const struct proto *prot, int val)
{
        this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
}

static inline void sock_inuse_add(const struct net *net, int val)
{
        this_cpu_add(net->core.prot_inuse->all, val);
}

int sock_prot_inuse_get(struct net *net, struct proto *proto);
int sock_inuse_get(struct net *net);
#else
static inline void sock_prot_inuse_add(const struct net *net,
                                       const struct proto *prot, int val)
{
}

static inline void sock_inuse_add(const struct net *net, int val)
{
}
#endif


/* With per-bucket locks this operation is not-atomic, so that
 * this version is not worse.
 */
static inline int __sk_prot_rehash(struct sock *sk)
{
        sk->sk_prot->unhash(sk);
        return sk->sk_prot->hash(sk);
}

/* About 10 seconds */
#define SOCK_DESTROY_TIME (10*HZ)

/* Sockets 0-1023 can't be bound to unless you are superuser */
#define PROT_SOCK        1024

#define SHUTDOWN_MASK        3
#define RCV_SHUTDOWN        1
#define SEND_SHUTDOWN        2

#define SOCK_BINDADDR_LOCK        4
#define SOCK_BINDPORT_LOCK        8

struct socket_alloc {
        struct socket socket;
        struct inode vfs_inode;
};

static inline struct socket *SOCKET_I(struct inode *inode)
{
        return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
}

static inline struct inode *SOCK_INODE(struct socket *socket)
{
        return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
}

/*
 * Functions for memory accounting
 */
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind);
int __sk_mem_schedule(struct sock *sk, int size, int kind);
void __sk_mem_reduce_allocated(struct sock *sk, int amount);
void __sk_mem_reclaim(struct sock *sk, int amount);

#define SK_MEM_SEND        0
#define SK_MEM_RECV        1

/* sysctl_mem values are in pages */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
        return READ_ONCE(sk->sk_prot->sysctl_mem[index]);
}

static inline int sk_mem_pages(int amt)
{
        return (amt + PAGE_SIZE - 1) >> PAGE_SHIFT;
}

static inline bool sk_has_account(struct sock *sk)
{
        /* return true if protocol supports memory accounting */
        return !!sk->sk_prot->memory_allocated;
}

static inline bool sk_wmem_schedule(struct sock *sk, int size)
{
        int delta;

        if (!sk_has_account(sk))
                return true;
        delta = size - sk->sk_forward_alloc;
        return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND);
}

static inline bool
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
{
        int delta;

        if (!sk_has_account(sk))
                return true;
        delta = size - sk->sk_forward_alloc;
        return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
                skb_pfmemalloc(skb);
}

static inline int sk_unused_reserved_mem(const struct sock *sk)
{
        int unused_mem;

        if (likely(!sk->sk_reserved_mem))
                return 0;

        unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued -
                        atomic_read(&sk->sk_rmem_alloc);

        return unused_mem > 0 ? unused_mem : 0;
}

static inline void sk_mem_reclaim(struct sock *sk)
{
        int reclaimable;

        if (!sk_has_account(sk))
                return;

        reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);

        if (reclaimable >= (int)PAGE_SIZE)
                __sk_mem_reclaim(sk, reclaimable);
}

static inline void sk_mem_reclaim_final(struct sock *sk)
{
        sk->sk_reserved_mem = 0;
        sk_mem_reclaim(sk);
}

static inline void sk_mem_charge(struct sock *sk, int size)
{
        if (!sk_has_account(sk))
                return;
        sk_forward_alloc_add(sk, -size);
}

static inline void sk_mem_uncharge(struct sock *sk, int size)
{
        if (!sk_has_account(sk))
                return;
        sk_forward_alloc_add(sk, size);
        sk_mem_reclaim(sk);
}

/*
 * Macro so as to not evaluate some arguments when
 * lockdep is not enabled.
 *
 * Mark both the sk_lock and the sk_lock.slock as a
 * per-address-family lock class.
 */
#define sock_lock_init_class_and_name(sk, sname, skey, name, key)        \
do {                                                                        \
        sk->sk_lock.owned = 0;                                                \
        init_waitqueue_head(&sk->sk_lock.wq);                                \
        spin_lock_init(&(sk)->sk_lock.slock);                                \
        debug_check_no_locks_freed((void *)&(sk)->sk_lock,                \
                        sizeof((sk)->sk_lock));                                \
        lockdep_set_class_and_name(&(sk)->sk_lock.slock,                \
                                (skey), (sname));                                \
        lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0);        \
} while (0)

static inline bool lockdep_sock_is_held(const struct sock *sk)
{
        return lockdep_is_held(&sk->sk_lock) ||
               lockdep_is_held(&sk->sk_lock.slock);
}

void lock_sock_nested(struct sock *sk, int subclass);

static inline void lock_sock(struct sock *sk)
{
        lock_sock_nested(sk, 0);
}

void __lock_sock(struct sock *sk);
void __release_sock(struct sock *sk);
void release_sock(struct sock *sk);

/* BH context may only use the following locking interface. */
#define bh_lock_sock(__sk)        spin_lock(&((__sk)->sk_lock.slock))
#define bh_lock_sock_nested(__sk) \
                                spin_lock_nested(&((__sk)->sk_lock.slock), \
                                SINGLE_DEPTH_NESTING)
#define bh_unlock_sock(__sk)        spin_unlock(&((__sk)->sk_lock.slock))

bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);

/**
 * lock_sock_fast - fast version of lock_sock
 * @sk: socket
 *
 * This version should be used for very small section, where process wont block
 * return false if fast path is taken:
 *
 *   sk_lock.slock locked, owned = 0, BH disabled
 *
 * return true if slow path is taken:
 *
 *   sk_lock.slock unlocked, owned = 1, BH enabled
 */
static inline bool lock_sock_fast(struct sock *sk)
{
        /* The sk_lock has mutex_lock() semantics here. */
        mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);

        return __lock_sock_fast(sk);
}

/* fast socket lock variant for caller already holding a [different] socket lock */
static inline bool lock_sock_fast_nested(struct sock *sk)
{
        mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);

        return __lock_sock_fast(sk);
}

/**
 * unlock_sock_fast - complement of lock_sock_fast
 * @sk: socket
 * @slow: slow mode
 *
 * fast unlock socket for user context.
 * If slow mode is on, we call regular release_sock()
 */
static inline void unlock_sock_fast(struct sock *sk, bool slow)
        __releases(&sk->sk_lock.slock)
{
        if (slow) {
                release_sock(sk);
                __release(&sk->sk_lock.slock);
        } else {
                mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
                spin_unlock_bh(&sk->sk_lock.slock);
        }
}

void sockopt_lock_sock(struct sock *sk);
void sockopt_release_sock(struct sock *sk);
bool sockopt_ns_capable(struct user_namespace *ns, int cap);
bool sockopt_capable(int cap);

/* Used by processes to "lock" a socket state, so that
 * interrupts and bottom half handlers won't change it
 * from under us. It essentially blocks any incoming
 * packets, so that we won't get any new data or any
 * packets that change the state of the socket.
 *
 * While locked, BH processing will add new packets to
 * the backlog queue.  This queue is processed by the
 * owner of the socket lock right before it is released.
 *
 * Since ~2.3.5 it is also exclusive sleep lock serializing
 * accesses from user process context.
 */

static inline void sock_owned_by_me(const struct sock *sk)
{
#ifdef CONFIG_LOCKDEP
        WARN_ON_ONCE(!lockdep_sock_is_held(sk) && debug_locks);
#endif
}

static inline void sock_not_owned_by_me(const struct sock *sk)
{
#ifdef CONFIG_LOCKDEP
        WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
#endif
}

static inline bool sock_owned_by_user(const struct sock *sk)
{
        sock_owned_by_me(sk);
        return sk->sk_lock.owned;
}

static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
{
        return sk->sk_lock.owned;
}

static inline void sock_release_ownership(struct sock *sk)
{
        DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk));
        sk->sk_lock.owned = 0;

        /* The sk_lock has mutex_unlock() semantics: */
        mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}

/* no reclassification while locks are held */
static inline bool sock_allow_reclassification(const struct sock *csk)
{
        struct sock *sk = (struct sock *)csk;

        return !sock_owned_by_user_nocheck(sk) &&
                !spin_is_locked(&sk->sk_lock.slock);
}

struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                      struct proto *prot, int kern);
void sk_free(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
void sk_free_unlock_clone(struct sock *sk);

struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
                             gfp_t priority);
void __sock_wfree(struct sk_buff *skb);
void sock_wfree(struct sk_buff *skb);
struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
                             gfp_t priority);
void skb_orphan_partial(struct sk_buff *skb);
void sock_rfree(struct sk_buff *skb);
void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
void sock_pfree(struct sk_buff *skb);
#else
#define sock_edemux sock_efree
#endif

int sk_setsockopt(struct sock *sk, int level, int optname,
                  sockptr_t optval, unsigned int optlen);
int sock_setsockopt(struct socket *sock, int level, int op,
                    sockptr_t optval, unsigned int optlen);
int do_sock_setsockopt(struct socket *sock, bool compat, int level,
                       int optname, sockptr_t optval, int optlen);
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
                       int optname, sockptr_t optval, sockptr_t optlen);

int sk_getsockopt(struct sock *sk, int level, int optname,
                  sockptr_t optval, sockptr_t optlen);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
                   bool timeval, bool time32);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                                     unsigned long data_len, int noblock,
                                     int *errcode, int max_page_order);

static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
                                                  unsigned long size,
                                                  int noblock, int *errcode)
{
        return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
}

void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
void sk_send_sigurg(struct sock *sk);

static inline void sock_replace_proto(struct sock *sk, struct proto *proto)
{
        if (sk->sk_socket)
                clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
        WRITE_ONCE(sk->sk_prot, proto);
}

struct sockcm_cookie {
        u64 transmit_time;
        u32 mark;
        u32 tsflags;
};

static inline void sockcm_init(struct sockcm_cookie *sockc,
                               const struct sock *sk)
{
        *sockc = (struct sockcm_cookie) {
                .tsflags = READ_ONCE(sk->sk_tsflags)
        };
}

int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
                     struct sockcm_cookie *sockc);
int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
                   struct sockcm_cookie *sockc);

/*
 * Functions to fill in entries in struct proto_ops when a protocol
 * does not implement a particular function.
 */
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
int sock_no_shutdown(struct socket *, int);
int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
int sock_no_mmap(struct file *file, struct socket *sock,
                 struct vm_area_struct *vma);

/*
 * Functions to fill in entries in struct proto_ops when a protocol
 * uses the inet style.
 */
int sock_common_getsockopt(struct socket *sock, int level, int optname,
                                  char __user *optval, int __user *optlen);
int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        int flags);
int sock_common_setsockopt(struct socket *sock, int level, int optname,
                           sockptr_t optval, unsigned int optlen);

void sk_common_release(struct sock *sk);

/*
 *        Default socket callbacks and setup code
 */

/* Initialise core socket variables using an explicit uid. */
void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid);

/* Initialise core socket variables.
 * Assumes struct socket *sock is embedded in a struct socket_alloc.
 */
void sock_init_data(struct socket *sock, struct sock *sk);

/*
 * Socket reference counting postulates.
 *
 * * Each user of socket SHOULD hold a reference count.
 * * Each access point to socket (an hash table bucket, reference from a list,
 *   running timer, skb in flight MUST hold a reference count.
 * * When reference count hits 0, it means it will never increase back.
 * * When reference count hits 0, it means that no references from
 *   outside exist to this socket and current process on current CPU
 *   is last user and may/should destroy this socket.
 * * sk_free is called from any context: process, BH, IRQ. When
 *   it is called, socket has no references from outside -> sk_free
 *   may release descendant resources allocated by the socket, but
 *   to the time when it is called, socket is NOT referenced by any
 *   hash tables, lists etc.
 * * Packets, delivered from outside (from network or from another process)
 *   and enqueued on receive/error queues SHOULD NOT grab reference count,
 *   when they sit in queue. Otherwise, packets will leak to hole, when
 *   socket is looked up by one cpu and unhasing is made by another CPU.
 *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
 *   (leak to backlog). Packet socket does all the processing inside
 *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
 *   use separate SMP lock, so that they are prone too.
 */

/* Ungrab socket and destroy it, if it was the last reference. */
static inline void sock_put(struct sock *sk)
{
        if (refcount_dec_and_test(&sk->sk_refcnt))
                sk_free(sk);
}
/* Generic version of sock_put(), dealing with all sockets
 * (TCP_TIMEWAIT, TCP_NEW_SYN_RECV, ESTABLISHED...)
 */
void sock_gen_put(struct sock *sk);

int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
                     unsigned int trim_cap, bool refcounted);
static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
                                 const int nested)
{
        return __sk_receive_skb(sk, skb, nested, 1, true);
}

static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
{
        /* sk_tx_queue_mapping accept only upto a 16-bit value */
        if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
                return;
        /* Paired with READ_ONCE() in sk_tx_queue_get() and
         * other WRITE_ONCE() because socket lock might be not held.
         */
        WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
}

#define NO_QUEUE_MAPPING        USHRT_MAX

static inline void sk_tx_queue_clear(struct sock *sk)
{
        /* Paired with READ_ONCE() in sk_tx_queue_get() and
         * other WRITE_ONCE() because socket lock might be not held.
         */
        WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING);
}

static inline int sk_tx_queue_get(const struct sock *sk)
{
        if (sk) {
                /* Paired with WRITE_ONCE() in sk_tx_queue_clear()
                 * and sk_tx_queue_set().
                 */
                int val = READ_ONCE(sk->sk_tx_queue_mapping);

                if (val != NO_QUEUE_MAPPING)
                        return val;
        }
        return -1;
}

static inline void __sk_rx_queue_set(struct sock *sk,
                                     const struct sk_buff *skb,
                                     bool force_set)
{
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
        if (skb_rx_queue_recorded(skb)) {
                u16 rx_queue = skb_get_rx_queue(skb);

                if (force_set ||
                    unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
                        WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
        }
#endif
}

static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
{
        __sk_rx_queue_set(sk, skb, true);
}

static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb)
{
        __sk_rx_queue_set(sk, skb, false);
}

static inline void sk_rx_queue_clear(struct sock *sk)
{
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
        WRITE_ONCE(sk->sk_rx_queue_mapping, NO_QUEUE_MAPPING);
#endif
}

static inline int sk_rx_queue_get(const struct sock *sk)
{
#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
        if (sk) {
                int res = READ_ONCE(sk->sk_rx_queue_mapping);

                if (res != NO_QUEUE_MAPPING)
                        return res;
        }
#endif

        return -1;
}

static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
        sk->sk_socket = sock;
}

static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
        BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0);
        return &rcu_dereference_raw(sk->sk_wq)->wait;
}
/* Detach socket from process context.
 * Announce socket dead, detach it from wait queue and inode.
 * Note that parent inode held reference count on this struct sock,
 * we do not release it in this function, because protocol
 * probably wants some additional cleanups or even continuing
 * to work with this socket (TCP).
 */
static inline void sock_orphan(struct sock *sk)
{
        write_lock_bh(&sk->sk_callback_lock);
        sock_set_flag(sk, SOCK_DEAD);
        sk_set_socket(sk, NULL);
        sk->sk_wq  = NULL;
        write_unlock_bh(&sk->sk_callback_lock);
}

static inline void sock_graft(struct sock *sk, struct socket *parent)
{
        WARN_ON(parent->sk);
        write_lock_bh(&sk->sk_callback_lock);
        rcu_assign_pointer(sk->sk_wq, &parent->wq);
        parent->sk = sk;
        sk_set_socket(sk, parent);
        sk->sk_uid = SOCK_INODE(parent)->i_uid;
        security_sock_graft(sk, parent);
        write_unlock_bh(&sk->sk_callback_lock);
}

kuid_t sock_i_uid(struct sock *sk);
unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);

static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
{
        return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
}

static inline u32 net_tx_rndhash(void)
{
        u32 v = get_random_u32();

        return v ?: 1;
}

static inline void sk_set_txhash(struct sock *sk)
{
        /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
        WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
}

static inline bool sk_rethink_txhash(struct sock *sk)
{
        if (sk->sk_txhash && sk->sk_txrehash == SOCK_TXREHASH_ENABLED) {
                sk_set_txhash(sk);
                return true;
        }
        return false;
}

static inline struct dst_entry *
__sk_dst_get(const struct sock *sk)
{
        return rcu_dereference_check(sk->sk_dst_cache,
                                     lockdep_sock_is_held(sk));
}

static inline struct dst_entry *
sk_dst_get(const struct sock *sk)
{
        struct dst_entry *dst;

        rcu_read_lock();
        dst = rcu_dereference(sk->sk_dst_cache);
        if (dst && !rcuref_get(&dst->__rcuref))
                dst = NULL;
        rcu_read_unlock();
        return dst;
}

static inline void __dst_negative_advice(struct sock *sk)
{
        struct dst_entry *ndst, *dst = __sk_dst_get(sk);

        if (dst && dst->ops->negative_advice) {
                ndst = dst->ops->negative_advice(dst);

                if (ndst != dst) {
                        rcu_assign_pointer(sk->sk_dst_cache, ndst);
                        sk_tx_queue_clear(sk);
                        WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
                }
        }
}

static inline void dst_negative_advice(struct sock *sk)
{
        sk_rethink_txhash(sk);
        __dst_negative_advice(sk);
}

static inline void
__sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
        struct dst_entry *old_dst;

        sk_tx_queue_clear(sk);
        WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
        old_dst = rcu_dereference_protected(sk->sk_dst_cache,
                                            lockdep_sock_is_held(sk));
        rcu_assign_pointer(sk->sk_dst_cache, dst);
        dst_release(old_dst);
}

static inline void
sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
        struct dst_entry *old_dst;

        sk_tx_queue_clear(sk);
        WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
        old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
        dst_release(old_dst);
}

static inline void
__sk_dst_reset(struct sock *sk)
{
        __sk_dst_set(sk, NULL);
}

static inline void
sk_dst_reset(struct sock *sk)
{
        sk_dst_set(sk, NULL);
}

struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);

struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);

static inline void sk_dst_confirm(struct sock *sk)
{
        if (!READ_ONCE(sk->sk_dst_pending_confirm))
                WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
}

static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
{
        if (skb_get_dst_pending_confirm(skb)) {
                struct sock *sk = skb->sk;

                if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
                        WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
                neigh_confirm(n);
        }
}

bool sk_mc_loop(const struct sock *sk);

static inline bool sk_can_gso(const struct sock *sk)
{
        return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
}

void sk_setup_caps(struct sock *sk, struct dst_entry *dst);

static inline void sk_gso_disable(struct sock *sk)
{
        sk->sk_gso_disabled = 1;
        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}

static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
                                           struct iov_iter *from, char *to,
                                           int copy, int offset)
{
        if (skb->ip_summed == CHECKSUM_NONE) {
                __wsum csum = 0;
                if (!csum_and_copy_from_iter_full(to, copy, &csum, from))
                        return -EFAULT;
                skb->csum = csum_block_add(skb->csum, csum, offset);
        } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
                if (!copy_from_iter_full_nocache(to, copy, from))
                        return -EFAULT;
        } else if (!copy_from_iter_full(to, copy, from))
                return -EFAULT;

        return 0;
}

static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
                                       struct iov_iter *from, int copy)
{
        int err, offset = skb->len;

        err = skb_do_copy_data_nocache(sk, skb, from, skb_put(skb, copy),
                                       copy, offset);
        if (err)
                __skb_trim(skb, offset);

        return err;
}

static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
                                           struct sk_buff *skb,
                                           struct page *page,
                                           int off, int copy)
{
        int err;

        err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off,
                                       copy, skb->len);
        if (err)
                return err;

        skb_len_add(skb, copy);
        sk_wmem_queued_add(sk, copy);
        sk_mem_charge(sk, copy);
        return 0;
}

/**
 * sk_wmem_alloc_get - returns write allocations
 * @sk: socket
 *
 * Return: sk_wmem_alloc minus initial offset of one
 */
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
        return refcount_read(&sk->sk_wmem_alloc) - 1;
}

/**
 * sk_rmem_alloc_get - returns read allocations
 * @sk: socket
 *
 * Return: sk_rmem_alloc
 */
static inline int sk_rmem_alloc_get(const struct sock *sk)
{
        return atomic_read(&sk->sk_rmem_alloc);
}

/**
 * sk_has_allocations - check if allocations are outstanding
 * @sk: socket
 *
 * Return: true if socket has write or read allocations
 */
static inline bool sk_has_allocations(const struct sock *sk)
{
        return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
}

/**
 * skwq_has_sleeper - check if there are any waiting processes
 * @wq: struct socket_wq
 *
 * Return: true if socket_wq has waiting processes
 *
 * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
 * barrier call. They were added due to the race found within the tcp code.
 *
 * Consider following tcp code paths::
 *
 *   CPU1                CPU2
 *   sys_select          receive packet
 *   ...                 ...
 *   __add_wait_queue    update tp->rcv_nxt
 *   ...                 ...
 *   tp->rcv_nxt check   sock_def_readable
 *   ...                 {
 *   schedule               rcu_read_lock();
 *                          wq = rcu_dereference(sk->sk_wq);
 *                          if (wq && waitqueue_active(&wq->wait))
 *                              wake_up_interruptible(&wq->wait)
 *                          ...
 *                       }
 *
 * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
 * in its cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1
 * could then endup calling schedule and sleep forever if there are no more
 * data on the socket.
 *
 */
static inline bool skwq_has_sleeper(struct socket_wq *wq)
{
        return wq && wq_has_sleeper(&wq->wait);
}

/**
 * sock_poll_wait - place memory barrier behind the poll_wait call.
 * @filp:           file
 * @sock:           socket to wait on
 * @p:              poll_table
 *
 * See the comments in the wq_has_sleeper function.
 */
static inline void sock_poll_wait(struct file *filp, struct socket *sock,
                                  poll_table *p)
{
        if (!poll_does_not_wait(p)) {
                poll_wait(filp, &sock->wq.wait, p);
                /* We need to be sure we are in sync with the
                 * socket flags modification.
                 *
                 * This memory barrier is paired in the wq_has_sleeper.
                 */
                smp_mb();
        }
}

static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
{
        /* This pairs with WRITE_ONCE() in sk_set_txhash() */
        u32 txhash = READ_ONCE(sk->sk_txhash);

        if (txhash) {
                skb->l4_hash = 1;
                skb->hash = txhash;
        }
}

void skb_set_owner_w(struct sk_buff *skb, struct sock *sk);

/*
 *        Queue a received datagram if it will fit. Stream and sequenced
 *        protocols can't normally use this as they need to fit buffers in
 *        and play with them.
 *
 *        Inlined as it's very short and called for pretty much every
 *        packet ever received.
 */
static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
        skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = sock_rfree;
        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
        sk_mem_charge(sk, skb->truesize);
}

static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
{
        if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
                skb_orphan(skb);
                skb->destructor = sock_efree;
                skb->sk = sk;
                return true;
        }
        return false;
}

static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk)
{
        skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
        if (skb) {
                if (sk_rmem_schedule(sk, skb, skb->truesize)) {
                        skb_set_owner_r(skb, sk);
                        return skb;
                }
                __kfree_skb(skb);
        }
        return NULL;
}

static inline void skb_prepare_for_gro(struct sk_buff *skb)
{
        if (skb->destructor != sock_wfree) {
                skb_orphan(skb);
                return;
        }
        skb->slow_gro = 1;
}

void sk_reset_timer(struct sock *sk, struct timer_list *timer,
                    unsigned long expires);

void sk_stop_timer(struct sock *sk, struct timer_list *timer);

void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer);

int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
                        struct sk_buff *skb, unsigned int flags,
                        void (*destructor)(struct sock *sk,
                                           struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);

int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
                              enum skb_drop_reason *reason);

static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
        return sock_queue_rcv_skb_reason(sk, skb, NULL);
}

int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);

/*
 *        Recover an error report and clear atomically
 */

static inline int sock_error(struct sock *sk)
{
        int err;

        /* Avoid an atomic operation for the common case.
         * This is racy since another cpu/thread can change sk_err under us.
         */
        if (likely(data_race(!sk->sk_err)))
                return 0;

        err = xchg(&sk->sk_err, 0);
        return -err;
}

void sk_error_report(struct sock *sk);

static inline unsigned long sock_wspace(struct sock *sk)
{
        int amt = 0;

        if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
                amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
                if (amt < 0)
                        amt = 0;
        }
        return amt;
}

/* Note:
 *  We use sk->sk_wq_raw, from contexts knowing this
 *  pointer is not NULL and cannot disappear/change.
 */
static inline void sk_set_bit(int nr, struct sock *sk)
{
        if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
            !sock_flag(sk, SOCK_FASYNC))
                return;

        set_bit(nr, &sk->sk_wq_raw->flags);
}

static inline void sk_clear_bit(int nr, struct sock *sk)
{
        if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
            !sock_flag(sk, SOCK_FASYNC))
                return;

        clear_bit(nr, &sk->sk_wq_raw->flags);
}

static inline void sk_wake_async(const struct sock *sk, int how, int band)
{
        if (sock_flag(sk, SOCK_FASYNC)) {
                rcu_read_lock();
                sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
                rcu_read_unlock();
        }
}

/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
 * need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
 * Note: for send buffers, TCP works better if we can build two skbs at
 * minimum.
 */
#define TCP_SKB_MIN_TRUESIZE        (2048 + SKB_DATA_ALIGN(sizeof(struct sk_buff)))

#define SOCK_MIN_SNDBUF                (TCP_SKB_MIN_TRUESIZE * 2)
#define SOCK_MIN_RCVBUF                 TCP_SKB_MIN_TRUESIZE

static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
        u32 val;

        if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
                return;

        val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
        val = max_t(u32, val, sk_unused_reserved_mem(sk));

        WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}

/**
 * sk_page_frag - return an appropriate page_frag
 * @sk: socket
 *
 * Use the per task page_frag instead of the per socket one for
 * optimization when we know that we're in process context and own
 * everything that's associated with %current.
 *
 * Both direct reclaim and page faults can nest inside other
 * socket operations and end up recursing into sk_page_frag()
 * while it's already in use: explicitly avoid task page_frag
 * when users disable sk_use_task_frag.
 *
 * Return: a per task page_frag if context allows that,
 * otherwise a per socket one.
 */
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
        if (sk->sk_use_task_frag)
                return &current->task_frag;

        return &sk->sk_frag;
}

bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);

/*
 *        Default write policy as shown to user space via poll/select/SIGIO
 */
static inline bool sock_writeable(const struct sock *sk)
{
        return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
}

static inline gfp_t gfp_any(void)
{
        return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}

static inline gfp_t gfp_memcg_charge(void)
{
        return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}

static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
        return noblock ? 0 : sk->sk_rcvtimeo;
}

static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
{
        return noblock ? 0 : sk->sk_sndtimeo;
}

static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
        int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);

        return v ?: 1;
}

/* Alas, with timeout socket operations are not restartable.
 * Compare this to poll().
 */
static inline int sock_intr_errno(long timeo)
{
        return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
}

struct sock_skb_cb {
        u32 dropcount;
};

/* Store sock_skb_cb at the end of skb->cb[] so protocol families
 * using skb->cb[] would keep using it directly and utilize its
 * alignement guarantee.
 */
#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
                            sizeof(struct sock_skb_cb)))

#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
                            SOCK_SKB_CB_OFFSET))

#define sock_skb_cb_check_size(size) \
        BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)

static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
        SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
                                                atomic_read(&sk->sk_drops) : 0;
}

static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
{
        int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);

        atomic_add(segs, &sk->sk_drops);
}

static inline ktime_t sock_read_timestamp(struct sock *sk)
{
#if BITS_PER_LONG==32
        unsigned int seq;
        ktime_t kt;

        do {
                seq = read_seqbegin(&sk->sk_stamp_seq);
                kt = sk->sk_stamp;
        } while (read_seqretry(&sk->sk_stamp_seq, seq));

        return kt;
#else
        return READ_ONCE(sk->sk_stamp);
#endif
}

static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
{
#if BITS_PER_LONG==32
        write_seqlock(&sk->sk_stamp_seq);
        sk->sk_stamp = kt;
        write_sequnlock(&sk->sk_stamp_seq);
#else
        WRITE_ONCE(sk->sk_stamp, kt);
#endif
}

void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                           struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
                             struct sk_buff *skb);

static inline void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
        struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
        u32 tsflags = READ_ONCE(sk->sk_tsflags);
        ktime_t kt = skb->tstamp;
        /*
         * generate control messages if
         * - receive time stamping in software requested
         * - software time stamp available and wanted
         * - hardware time stamps available and wanted
         */
        if (sock_flag(sk, SOCK_RCVTSTAMP) ||
            (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
            (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
            (hwtstamps->hwtstamp &&
             (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
                __sock_recv_timestamp(msg, sk, skb);
        else
                sock_write_timestamp(sk, kt);

        if (sock_flag(sk, SOCK_WIFI_STATUS) && skb_wifi_acked_valid(skb))
                __sock_recv_wifi_status(msg, sk, skb);
}

void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
                       struct sk_buff *skb);

#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC)
static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
                                   struct sk_buff *skb)
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL)                        | \
                           (1UL << SOCK_RCVTSTAMP)                        | \
                           (1UL << SOCK_RCVMARK))
#define TSFLAGS_ANY          (SOF_TIMESTAMPING_SOFTWARE                        | \
                           SOF_TIMESTAMPING_RAW_HARDWARE)

        if (sk->sk_flags & FLAGS_RECV_CMSGS ||
            READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
                __sock_recv_cmsgs(msg, sk, skb);
        else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
                sock_write_timestamp(sk, skb->tstamp);
        else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP))
                sock_write_timestamp(sk, 0);
}

void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);

/**
 * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
 * @sk:                socket sending this packet
 * @tsflags:        timestamping flags to use
 * @tx_flags:        completed with instructions for time stamping
 * @tskey:      filled in with next sk_tskey (not for TCP, which uses seqno)
 *
 * Note: callers should take care of initial ``*tx_flags`` value (usually 0)
 */
static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
                                      __u8 *tx_flags, __u32 *tskey)
{
        if (unlikely(tsflags)) {
                __sock_tx_timestamp(tsflags, tx_flags);
                if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
                    tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
                        *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
        }
        if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
                *tx_flags |= SKBTX_WIFI_STATUS;
}

static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags,
                                     __u8 *tx_flags)
{
        _sock_tx_timestamp(sk, tsflags, tx_flags, NULL);
}

static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
{
        _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags,
                           &skb_shinfo(skb)->tskey);
}

static inline bool sk_is_inet(const struct sock *sk)
{
        int family = READ_ONCE(sk->sk_family);

        return family == AF_INET || family == AF_INET6;
}

static inline bool sk_is_tcp(const struct sock *sk)
{
        return sk_is_inet(sk) &&
               sk->sk_type == SOCK_STREAM &&
               sk->sk_protocol == IPPROTO_TCP;
}

static inline bool sk_is_udp(const struct sock *sk)
{
        return sk_is_inet(sk) &&
               sk->sk_type == SOCK_DGRAM &&
               sk->sk_protocol == IPPROTO_UDP;
}

static inline bool sk_is_stream_unix(const struct sock *sk)
{
        return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM;
}

/**
 * sk_eat_skb - Release a skb if it is no longer needed
 * @sk: socket to eat this skb from
 * @skb: socket buffer to eat
 *
 * This routine must be called with interrupts disabled or with the socket
 * locked so that the sk_buff queue operation is ok.
*/
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
        __skb_unlink(skb, &sk->sk_receive_queue);
        __kfree_skb(skb);
}

static inline bool
skb_sk_is_prefetched(struct sk_buff *skb)
{
#ifdef CONFIG_INET
        return skb->destructor == sock_pfree;
#else
        return false;
#endif /* CONFIG_INET */
}

/* This helper checks if a socket is a full socket,
 * ie _not_ a timewait or request socket.
 */
static inline bool sk_fullsock(const struct sock *sk)
{
        return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
}

static inline bool
sk_is_refcounted(struct sock *sk)
{
        /* Only full sockets have sk->sk_flags. */
        return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}

/* Checks if this SKB belongs to an HW offloaded socket
 * and whether any SW fallbacks are required based on dev.
 * Check decrypted mark in case skb_orphan() cleared socket.
 */
static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
                                                   struct net_device *dev)
{
#ifdef CONFIG_SOCK_VALIDATE_XMIT
        struct sock *sk = skb->sk;

        if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
                skb = sk->sk_validate_xmit_skb(sk, dev, skb);
#ifdef CONFIG_TLS_DEVICE
        } else if (unlikely(skb->decrypted)) {
                pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
                kfree_skb(skb);
                skb = NULL;
#endif
        }
#endif

        return skb;
}

/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV
 * SYNACK messages can be attached to either ones (depending on SYNCOOKIE)
 */
static inline bool sk_listener(const struct sock *sk)
{
        return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}

void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
                       int type);

bool sk_ns_capable(const struct sock *sk,
                   struct user_namespace *user_ns, int cap);
bool sk_capable(const struct sock *sk, int cap);
bool sk_net_capable(const struct sock *sk, int cap);

void sk_get_meminfo(const struct sock *sk, u32 *meminfo);

/* Take into consideration the size of the struct sk_buff overhead in the
 * determination of these values, since that is non-constant across
 * platforms.  This makes socket queueing behavior and performance
 * not depend upon such differences.
 */
#define _SK_MEM_PACKETS                256
#define _SK_MEM_OVERHEAD        SKB_TRUESIZE(256)
#define SK_WMEM_MAX                (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
#define SK_RMEM_MAX                (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)

extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;

extern int sysctl_tstamp_allow_data;

extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;

#define SKB_FRAG_PAGE_ORDER        get_order(32768)
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);

static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
        /* Does this proto have per netns sysctl_wmem ? */
        if (proto->sysctl_wmem_offset)
                return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));

        return READ_ONCE(*proto->sysctl_wmem);
}

static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
        /* Does this proto have per netns sysctl_rmem ? */
        if (proto->sysctl_rmem_offset)
                return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));

        return READ_ONCE(*proto->sysctl_rmem);
}

/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
 * Some wifi drivers need to tweak it to get more chunks.
 * They can use this helper from their ndo_start_xmit()
 */
static inline void sk_pacing_shift_update(struct sock *sk, int val)
{
        if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
                return;
        WRITE_ONCE(sk->sk_pacing_shift, val);
}

/* if a socket is bound to a device, check that the given device
 * index is either the same or that the socket is bound to an L3
 * master device and the given device index is also enslaved to
 * that L3 master
 */
static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
{
        int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
        int mdif;

        if (!bound_dev_if || bound_dev_if == dif)
                return true;

        mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
        if (mdif && mdif == bound_dev_if)
                return true;

        return false;
}

void sock_def_readable(struct sock *sk);

int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
int sock_set_timestamping(struct sock *sk, int optname,
                          struct so_timestamping timestamping);

void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
void sock_set_rcvbuf(struct sock *sk, int val);
void sock_set_mark(struct sock *sk, u32 val);
void sock_set_reuseaddr(struct sock *sk);
void sock_set_reuseport(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);

int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);

int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
                           sockptr_t optval, int optlen, bool old_timeval);

int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
                     void __user *arg, void *karg, size_t size);
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
        if (sk->sk_prot->sock_is_readable)
                return sk->sk_prot->sock_is_readable(sk);
        return false;
}
#endif        /* _SOCK_H */





































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CPUSET_H
#define _LINUX_CPUSET_H
/*
 *  cpuset interface
 *
 *  Copyright (C) 2003 BULL SA
 *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
 *
 */

#include <linux/sched.h>
#include <linux/sched/topology.h>
#include <linux/sched/task.h>
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/jump_label.h>

#ifdef CONFIG_CPUSETS

/*
 * Static branch rewrites can happen in an arbitrary order for a given
 * key. In code paths where we need to loop with read_mems_allowed_begin() and
 * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need
 * to ensure that begin() always gets rewritten before retry() in the
 * disabled -> enabled transition. If not, then if local irqs are disabled
 * around the loop, we can deadlock since retry() would always be
 * comparing the latest value of the mems_allowed seqcount against 0 as
 * begin() still would see cpusets_enabled() as false. The enabled -> disabled
 * transition should happen in reverse order for the same reasons (want to stop
 * looking at real value of mems_allowed.sequence in retry() first).
 */
extern struct static_key_false cpusets_pre_enable_key;
extern struct static_key_false cpusets_enabled_key;
extern struct static_key_false cpusets_insane_config_key;

static inline bool cpusets_enabled(void)
{
        return static_branch_unlikely(&cpusets_enabled_key);
}

static inline void cpuset_inc(void)
{
        static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
        static_branch_inc_cpuslocked(&cpusets_enabled_key);
}

static inline void cpuset_dec(void)
{
        static_branch_dec_cpuslocked(&cpusets_enabled_key);
        static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
}

/*
 * This will get enabled whenever a cpuset configuration is considered
 * unsupportable in general. E.g. movable only node which cannot satisfy
 * any non movable allocations (see update_nodemask). Page allocator
 * needs to make additional checks for those configurations and this
 * check is meant to guard those checks without any overhead for sane
 * configurations.
 */
static inline bool cpusets_insane_config(void)
{
        return static_branch_unlikely(&cpusets_insane_config_key);
}

extern int cpuset_init(void);
extern void cpuset_init_smp(void);
extern void cpuset_force_rebuild(void);
extern void cpuset_update_active_cpus(void);
extern void cpuset_wait_for_hotplug(void);
extern void inc_dl_tasks_cs(struct task_struct *task);
extern void dec_dl_tasks_cs(struct task_struct *task);
extern void cpuset_lock(void);
extern void cpuset_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern bool cpuset_cpu_is_isolated(int cpu);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);

extern bool cpuset_node_allowed(int node, gfp_t gfp_mask);

static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
        return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
}

static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
        if (cpusets_enabled())
                return __cpuset_zone_allowed(z, gfp_mask);
        return true;
}

extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
                                          const struct task_struct *tsk2);

#define cpuset_memory_pressure_bump()                                 \
        do {                                                        \
                if (cpuset_memory_pressure_enabled)                \
                        __cpuset_memory_pressure_bump();        \
        } while (0)
extern int cpuset_memory_pressure_enabled;
extern void __cpuset_memory_pressure_bump(void);

extern void cpuset_task_status_allowed(struct seq_file *m,
                                        struct task_struct *task);
extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
                            struct pid *pid, struct task_struct *tsk);

extern int cpuset_mem_spread_node(void);
extern int cpuset_slab_spread_node(void);

static inline int cpuset_do_page_mem_spread(void)
{
        return task_spread_page(current);
}

extern bool current_cpuset_is_being_rebound(void);

extern void rebuild_sched_domains(void);

extern void cpuset_print_current_mems_allowed(void);

/*
 * read_mems_allowed_begin is required when making decisions involving
 * mems_allowed such as during page allocation. mems_allowed can be updated in
 * parallel and depending on the new value an operation can fail potentially
 * causing process failure. A retry loop with read_mems_allowed_begin and
 * read_mems_allowed_retry prevents these artificial failures.
 */
static inline unsigned int read_mems_allowed_begin(void)
{
        if (!static_branch_unlikely(&cpusets_pre_enable_key))
                return 0;

        return read_seqcount_begin(&current->mems_allowed_seq);
}

/*
 * If this returns true, the operation that took place after
 * read_mems_allowed_begin may have failed artificially due to a concurrent
 * update of mems_allowed. It is up to the caller to retry the operation if
 * appropriate.
 */
static inline bool read_mems_allowed_retry(unsigned int seq)
{
        if (!static_branch_unlikely(&cpusets_enabled_key))
                return false;

        return read_seqcount_retry(&current->mems_allowed_seq, seq);
}

static inline void set_mems_allowed(nodemask_t nodemask)
{
        unsigned long flags;

        task_lock(current);
        local_irq_save(flags);
        write_seqcount_begin(&current->mems_allowed_seq);
        current->mems_allowed = nodemask;
        write_seqcount_end(&current->mems_allowed_seq);
        local_irq_restore(flags);
        task_unlock(current);
}

#else /* !CONFIG_CPUSETS */

static inline bool cpusets_enabled(void) { return false; }

static inline bool cpusets_insane_config(void) { return false; }

static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}

static inline void cpuset_force_rebuild(void) { }

static inline void cpuset_update_active_cpus(void)
{
        partition_sched_domains(1, NULL, NULL);
}

static inline void cpuset_wait_for_hotplug(void) { }

static inline void inc_dl_tasks_cs(struct task_struct *task) { }
static inline void dec_dl_tasks_cs(struct task_struct *task) { }
static inline void cpuset_lock(void) { }
static inline void cpuset_unlock(void) { }

static inline void cpuset_cpus_allowed(struct task_struct *p,
                                       struct cpumask *mask)
{
        cpumask_copy(mask, task_cpu_possible_mask(p));
}

static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
{
        return false;
}

static inline bool cpuset_cpu_is_isolated(int cpu)
{
        return false;
}

static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
{
        return node_possible_map;
}

#define cpuset_current_mems_allowed (node_states[N_MEMORY])
static inline void cpuset_init_current_mems_allowed(void) {}

static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
{
        return 1;
}

static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
        return true;
}

static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
        return true;
}

static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
                                                 const struct task_struct *tsk2)
{
        return 1;
}

static inline void cpuset_memory_pressure_bump(void) {}

static inline void cpuset_task_status_allowed(struct seq_file *m,
                                                struct task_struct *task)
{
}

static inline int cpuset_mem_spread_node(void)
{
        return 0;
}

static inline int cpuset_slab_spread_node(void)
{
        return 0;
}

static inline int cpuset_do_page_mem_spread(void)
{
        return 0;
}

static inline bool current_cpuset_is_being_rebound(void)
{
        return false;
}

static inline void rebuild_sched_domains(void)
{
        partition_sched_domains(1, NULL, NULL);
}

static inline void cpuset_print_current_mems_allowed(void)
{
}

static inline void set_mems_allowed(nodemask_t nodemask)
{
}

static inline unsigned int read_mems_allowed_begin(void)
{
        return 0;
}

static inline bool read_mems_allowed_retry(unsigned int seq)
{
        return false;
}

#endif /* !CONFIG_CPUSETS */

#endif /* _LINUX_CPUSET_H */





































































































































































































































































































































































    5 




   15 

   15 


















   15 




















































    4 










































   10 
    9 















   10 

   10 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
// SPDX-License-Identifier: GPL-2.0
/*
 * linux/kernel/capability.c
 *
 * Copyright (C) 1997  Andrew Main <zefram@fysh.org>
 *
 * Integrated into 2.1.97+,  Andrew G. Morgan <morgan@kernel.org>
 * 30 May 2002:        Cleanup, Robert M. Love <rml@tech9.net>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/audit.h>
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/uaccess.h>

int file_caps_enabled = 1;

static int __init file_caps_disable(char *str)
{
        file_caps_enabled = 0;
        return 1;
}
__setup("no_file_caps", file_caps_disable);

#ifdef CONFIG_MULTIUSER
/*
 * More recent versions of libcap are available from:
 *
 *   http://www.kernel.org/pub/linux/libs/security/linux-privs/
 */

static void warn_legacy_capability_use(void)
{
        char name[sizeof(current->comm)];

        pr_info_once("warning: `%s' uses 32-bit capabilities (legacy support in use)\n",
                     get_task_comm(name, current));
}

/*
 * Version 2 capabilities worked fine, but the linux/capability.h file
 * that accompanied their introduction encouraged their use without
 * the necessary user-space source code changes. As such, we have
 * created a version 3 with equivalent functionality to version 2, but
 * with a header change to protect legacy source code from using
 * version 2 when it wanted to use version 1. If your system has code
 * that trips the following warning, it is using version 2 specific
 * capabilities and may be doing so insecurely.
 *
 * The remedy is to either upgrade your version of libcap (to 2.10+,
 * if the application is linked against it), or recompile your
 * application with modern kernel headers and this warning will go
 * away.
 */

static void warn_deprecated_v2(void)
{
        char name[sizeof(current->comm)];

        pr_info_once("warning: `%s' uses deprecated v2 capabilities in a way that may be insecure\n",
                     get_task_comm(name, current));
}

/*
 * Version check. Return the number of u32s in each capability flag
 * array, or a negative value on error.
 */
static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
{
        __u32 version;

        if (get_user(version, &header->version))
                return -EFAULT;

        switch (version) {
        case _LINUX_CAPABILITY_VERSION_1:
                warn_legacy_capability_use();
                *tocopy = _LINUX_CAPABILITY_U32S_1;
                break;
        case _LINUX_CAPABILITY_VERSION_2:
                warn_deprecated_v2();
                fallthrough;        /* v3 is otherwise equivalent to v2 */
        case _LINUX_CAPABILITY_VERSION_3:
                *tocopy = _LINUX_CAPABILITY_U32S_3;
                break;
        default:
                if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version))
                        return -EFAULT;
                return -EINVAL;
        }

        return 0;
}

/*
 * The only thing that can change the capabilities of the current
 * process is the current process. As such, we can't be in this code
 * at the same time as we are in the process of setting capabilities
 * in this process. The net result is that we can limit our use of
 * locks to when we are reading the caps of another process.
 */
static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
                                     kernel_cap_t *pIp, kernel_cap_t *pPp)
{
        int ret;

        if (pid && (pid != task_pid_vnr(current))) {
                const struct task_struct *target;

                rcu_read_lock();

                target = find_task_by_vpid(pid);
                if (!target)
                        ret = -ESRCH;
                else
                        ret = security_capget(target, pEp, pIp, pPp);

                rcu_read_unlock();
        } else
                ret = security_capget(current, pEp, pIp, pPp);

        return ret;
}

/**
 * sys_capget - get the capabilities of a given process.
 * @header: pointer to struct that contains capability version and
 *        target pid data
 * @dataptr: pointer to struct that contains the effective, permitted,
 *        and inheritable capabilities that are returned
 *
 * Returns 0 on success and < 0 on error.
 */
SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
{
        int ret = 0;
        pid_t pid;
        unsigned tocopy;
        kernel_cap_t pE, pI, pP;
        struct __user_cap_data_struct kdata[2];

        ret = cap_validate_magic(header, &tocopy);
        if ((dataptr == NULL) || (ret != 0))
                return ((dataptr == NULL) && (ret == -EINVAL)) ? 0 : ret;

        if (get_user(pid, &header->pid))
                return -EFAULT;

        if (pid < 0)
                return -EINVAL;

        ret = cap_get_target_pid(pid, &pE, &pI, &pP);
        if (ret)
                return ret;

        /*
         * Annoying legacy format with 64-bit capabilities exposed
         * as two sets of 32-bit fields, so we need to split the
         * capability values up.
         */
        kdata[0].effective   = pE.val; kdata[1].effective   = pE.val >> 32;
        kdata[0].permitted   = pP.val; kdata[1].permitted   = pP.val >> 32;
        kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32;

        /*
         * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
         * we silently drop the upper capabilities here. This
         * has the effect of making older libcap
         * implementations implicitly drop upper capability
         * bits when they perform a: capget/modify/capset
         * sequence.
         *
         * This behavior is considered fail-safe
         * behavior. Upgrading the application to a newer
         * version of libcap will enable access to the newer
         * capabilities.
         *
         * An alternative would be to return an error here
         * (-ERANGE), but that causes legacy applications to
         * unexpectedly fail; the capget/modify/capset aborts
         * before modification is attempted and the application
         * fails.
         */
        if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
                return -EFAULT;

        return 0;
}

static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
{
        return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
}

/**
 * sys_capset - set capabilities for a process or (*) a group of processes
 * @header: pointer to struct that contains capability version and
 *        target pid data
 * @data: pointer to struct that contains the effective, permitted,
 *        and inheritable capabilities
 *
 * Set capabilities for the current process only.  The ability to any other
 * process(es) has been deprecated and removed.
 *
 * The restrictions on setting capabilities are specified as:
 *
 * I: any raised capabilities must be a subset of the old permitted
 * P: any raised capabilities must be a subset of the old permitted
 * E: must be set to a subset of new permitted
 *
 * Returns 0 on success and < 0 on error.
 */
SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
        struct __user_cap_data_struct kdata[2] = { { 0, }, };
        unsigned tocopy, copybytes;
        kernel_cap_t inheritable, permitted, effective;
        struct cred *new;
        int ret;
        pid_t pid;

        ret = cap_validate_magic(header, &tocopy);
        if (ret != 0)
                return ret;

        if (get_user(pid, &header->pid))
                return -EFAULT;

        /* may only affect current now */
        if (pid != 0 && pid != task_pid_vnr(current))
                return -EPERM;

        copybytes = tocopy * sizeof(struct __user_cap_data_struct);
        if (copybytes > sizeof(kdata))
                return -EFAULT;

        if (copy_from_user(&kdata, data, copybytes))
                return -EFAULT;

        effective   = mk_kernel_cap(kdata[0].effective,   kdata[1].effective);
        permitted   = mk_kernel_cap(kdata[0].permitted,   kdata[1].permitted);
        inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);

        new = prepare_creds();
        if (!new)
                return -ENOMEM;

        ret = security_capset(new, current_cred(),
                              &effective, &inheritable, &permitted);
        if (ret < 0)
                goto error;

        audit_log_capset(new, current_cred());

        return commit_creds(new);

error:
        abort_creds(new);
        return ret;
}

/**
 * has_ns_capability - Does a task have a capability in a specific user ns
 * @t: The task in question
 * @ns: target user namespace
 * @cap: The capability to be tested for
 *
 * Return true if the specified task has the given superior capability
 * currently in effect to the specified user namespace, false if not.
 *
 * Note that this does not set PF_SUPERPRIV on the task.
 */
bool has_ns_capability(struct task_struct *t,
                       struct user_namespace *ns, int cap)
{
        int ret;

        rcu_read_lock();
        ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NONE);
        rcu_read_unlock();

        return (ret == 0);
}

/**
 * has_capability - Does a task have a capability in init_user_ns
 * @t: The task in question
 * @cap: The capability to be tested for
 *
 * Return true if the specified task has the given superior capability
 * currently in effect to the initial user namespace, false if not.
 *
 * Note that this does not set PF_SUPERPRIV on the task.
 */
bool has_capability(struct task_struct *t, int cap)
{
        return has_ns_capability(t, &init_user_ns, cap);
}
EXPORT_SYMBOL(has_capability);

/**
 * has_ns_capability_noaudit - Does a task have a capability (unaudited)
 * in a specific user ns.
 * @t: The task in question
 * @ns: target user namespace
 * @cap: The capability to be tested for
 *
 * Return true if the specified task has the given superior capability
 * currently in effect to the specified user namespace, false if not.
 * Do not write an audit message for the check.
 *
 * Note that this does not set PF_SUPERPRIV on the task.
 */
bool has_ns_capability_noaudit(struct task_struct *t,
                               struct user_namespace *ns, int cap)
{
        int ret;

        rcu_read_lock();
        ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NOAUDIT);
        rcu_read_unlock();

        return (ret == 0);
}

/**
 * has_capability_noaudit - Does a task have a capability (unaudited) in the
 * initial user ns
 * @t: The task in question
 * @cap: The capability to be tested for
 *
 * Return true if the specified task has the given superior capability
 * currently in effect to init_user_ns, false if not.  Don't write an
 * audit message for the check.
 *
 * Note that this does not set PF_SUPERPRIV on the task.
 */
bool has_capability_noaudit(struct task_struct *t, int cap)
{
        return has_ns_capability_noaudit(t, &init_user_ns, cap);
}
EXPORT_SYMBOL(has_capability_noaudit);

static bool ns_capable_common(struct user_namespace *ns,
                              int cap,
                              unsigned int opts)
{
        int capable;

        if (unlikely(!cap_valid(cap))) {
                pr_crit("capable() called with invalid cap=%u\n", cap);
                BUG();
        }

        capable = security_capable(current_cred(), ns, cap, opts);
        if (capable == 0) {
                current->flags |= PF_SUPERPRIV;
                return true;
        }
        return false;
}

/**
 * ns_capable - Determine if the current task has a superior capability in effect
 * @ns:  The usernamespace we want the capability in
 * @cap: The capability to be tested for
 *
 * Return true if the current task has the given superior capability currently
 * available for use, false if not.
 *
 * This sets PF_SUPERPRIV on the task if the capability is available on the
 * assumption that it's about to be used.
 */
bool ns_capable(struct user_namespace *ns, int cap)
{
        return ns_capable_common(ns, cap, CAP_OPT_NONE);
}
EXPORT_SYMBOL(ns_capable);

/**
 * ns_capable_noaudit - Determine if the current task has a superior capability
 * (unaudited) in effect
 * @ns:  The usernamespace we want the capability in
 * @cap: The capability to be tested for
 *
 * Return true if the current task has the given superior capability currently
 * available for use, false if not.
 *
 * This sets PF_SUPERPRIV on the task if the capability is available on the
 * assumption that it's about to be used.
 */
bool ns_capable_noaudit(struct user_namespace *ns, int cap)
{
        return ns_capable_common(ns, cap, CAP_OPT_NOAUDIT);
}
EXPORT_SYMBOL(ns_capable_noaudit);

/**
 * ns_capable_setid - Determine if the current task has a superior capability
 * in effect, while signalling that this check is being done from within a
 * setid or setgroups syscall.
 * @ns:  The usernamespace we want the capability in
 * @cap: The capability to be tested for
 *
 * Return true if the current task has the given superior capability currently
 * available for use, false if not.
 *
 * This sets PF_SUPERPRIV on the task if the capability is available on the
 * assumption that it's about to be used.
 */
bool ns_capable_setid(struct user_namespace *ns, int cap)
{
        return ns_capable_common(ns, cap, CAP_OPT_INSETID);
}
EXPORT_SYMBOL(ns_capable_setid);

/**
 * capable - Determine if the current task has a superior capability in effect
 * @cap: The capability to be tested for
 *
 * Return true if the current task has the given superior capability currently
 * available for use, false if not.
 *
 * This sets PF_SUPERPRIV on the task if the capability is available on the
 * assumption that it's about to be used.
 */
bool capable(int cap)
{
        return ns_capable(&init_user_ns, cap);
}
EXPORT_SYMBOL(capable);
#endif /* CONFIG_MULTIUSER */

/**
 * file_ns_capable - Determine if the file's opener had a capability in effect
 * @file:  The file we want to check
 * @ns:  The usernamespace we want the capability in
 * @cap: The capability to be tested for
 *
 * Return true if task that opened the file had a capability in effect
 * when the file was opened.
 *
 * This does not set PF_SUPERPRIV because the caller may not
 * actually be privileged.
 */
bool file_ns_capable(const struct file *file, struct user_namespace *ns,
                     int cap)
{

        if (WARN_ON_ONCE(!cap_valid(cap)))
                return false;

        if (security_capable(file->f_cred, ns, cap, CAP_OPT_NONE) == 0)
                return true;

        return false;
}
EXPORT_SYMBOL(file_ns_capable);

/**
 * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
 * @ns: The user namespace in question
 * @idmap: idmap of the mount @inode was found from
 * @inode: The inode in question
 *
 * Return true if the inode uid and gid are within the namespace.
 */
bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
                                 struct mnt_idmap *idmap,
                                 const struct inode *inode)
{
        return vfsuid_has_mapping(ns, i_uid_into_vfsuid(idmap, inode)) &&
               vfsgid_has_mapping(ns, i_gid_into_vfsgid(idmap, inode));
}

/**
 * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
 * @idmap: idmap of the mount @inode was found from
 * @inode: The inode in question
 * @cap: The capability in question
 *
 * Return true if the current task has the given capability targeted at
 * its own user namespace and that the given inode's uid and gid are
 * mapped into the current user namespace.
 */
bool capable_wrt_inode_uidgid(struct mnt_idmap *idmap,
                              const struct inode *inode, int cap)
{
        struct user_namespace *ns = current_user_ns();

        return ns_capable(ns, cap) &&
               privileged_wrt_inode_uidgid(ns, idmap, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);

/**
 * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
 * @tsk: The task that may be ptraced
 * @ns: The user namespace to search for CAP_SYS_PTRACE in
 *
 * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
 * in the specified user namespace.
 */
bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
{
        int ret = 0;  /* An absent tracer adds no restrictions */
        const struct cred *cred;

        rcu_read_lock();
        cred = rcu_dereference(tsk->ptracer_cred);
        if (cred)
                ret = security_capable(cred, ns, CAP_SYS_PTRACE,
                                       CAP_OPT_NOAUDIT);
        rcu_read_unlock();
        return (ret == 0);
}






























































































    3 



    3 
    3 





    3 
    3 




    3 








































































































































































































    4 



















    4 

    4 

























    2 









    2 






    2 


    2 




    2 




















    2 






    1 








    1 




    2 







    2 





    1 



    2 


    1 











    1 





























    1 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
// SPDX-License-Identifier: GPL-2.0
/*
 * Driver for Meywa-Denki & KAYAC YUREX
 *
 * Copyright (C) 2010 Tomoki Sekiyama (tomoki.sekiyama@gmail.com)
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/hid.h>

#define DRIVER_AUTHOR "Tomoki Sekiyama"
#define DRIVER_DESC "Driver for Meywa-Denki & KAYAC YUREX"

#define YUREX_VENDOR_ID                0x0c45
#define YUREX_PRODUCT_ID        0x1010

#define CMD_ACK                '!'
#define CMD_ANIMATE        'A'
#define CMD_COUNT        'C'
#define CMD_LED                'L'
#define CMD_READ        'R'
#define CMD_SET                'S'
#define CMD_VERSION        'V'
#define CMD_EOF                0x0d
#define CMD_PADDING        0xff

#define YUREX_BUF_SIZE                8
#define YUREX_WRITE_TIMEOUT        (HZ*2)

#define MAX_S64_STRLEN 20 /* {-}922337203685477580{7,8} */

/* table of devices that work with this driver */
static struct usb_device_id yurex_table[] = {
        { USB_DEVICE(YUREX_VENDOR_ID, YUREX_PRODUCT_ID) },
        { }                                        /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, yurex_table);

#ifdef CONFIG_USB_DYNAMIC_MINORS
#define YUREX_MINOR_BASE        0
#else
#define YUREX_MINOR_BASE        192
#endif

/* Structure to hold all of our device specific stuff */
struct usb_yurex {
        struct usb_device        *udev;
        struct usb_interface        *interface;
        __u8                        int_in_endpointAddr;
        struct urb                *urb;                /* URB for interrupt in */
        unsigned char           *int_buffer;        /* buffer for intterupt in */
        struct urb                *cntl_urb;        /* URB for control msg */
        struct usb_ctrlrequest        *cntl_req;        /* req for control msg */
        unsigned char                *cntl_buffer;        /* buffer for control msg */

        struct kref                kref;
        struct mutex                io_mutex;
        unsigned long                disconnected:1;
        struct fasync_struct        *async_queue;
        wait_queue_head_t        waitq;

        spinlock_t                lock;
        __s64                        bbu;                /* BBU from device */
};
#define to_yurex_dev(d) container_of(d, struct usb_yurex, kref)

static struct usb_driver yurex_driver;
static const struct file_operations yurex_fops;


static void yurex_control_callback(struct urb *urb)
{
        struct usb_yurex *dev = urb->context;
        int status = urb->status;

        if (status) {
                dev_err(&urb->dev->dev, "%s - control failed: %d\n",
                        __func__, status);
                wake_up_interruptible(&dev->waitq);
                return;
        }
        /* on success, sender woken up by CMD_ACK int in, or timeout */
}

static void yurex_delete(struct kref *kref)
{
        struct usb_yurex *dev = to_yurex_dev(kref);

        dev_dbg(&dev->interface->dev, "%s\n", __func__);

        if (dev->cntl_urb) {
                usb_kill_urb(dev->cntl_urb);
                kfree(dev->cntl_req);
                usb_free_coherent(dev->udev, YUREX_BUF_SIZE,
                                dev->cntl_buffer, dev->cntl_urb->transfer_dma);
                usb_free_urb(dev->cntl_urb);
        }
        if (dev->urb) {
                usb_kill_urb(dev->urb);
                usb_free_coherent(dev->udev, YUREX_BUF_SIZE,
                                dev->int_buffer, dev->urb->transfer_dma);
                usb_free_urb(dev->urb);
        }
        usb_put_intf(dev->interface);
        usb_put_dev(dev->udev);
        kfree(dev);
}

/*
 * usb class driver info in order to get a minor number from the usb core,
 * and to have the device registered with the driver core
 */
static struct usb_class_driver yurex_class = {
        .name =                "yurex%d",
        .fops =                &yurex_fops,
        .minor_base =        YUREX_MINOR_BASE,
};

static void yurex_interrupt(struct urb *urb)
{
        struct usb_yurex *dev = urb->context;
        unsigned char *buf = dev->int_buffer;
        int status = urb->status;
        unsigned long flags;
        int retval, i;

        switch (status) {
        case 0: /*success*/
                break;
        /* The device is terminated or messed up, give up */
        case -EOVERFLOW:
                dev_err(&dev->interface->dev,
                        "%s - overflow with length %d, actual length is %d\n",
                        __func__, YUREX_BUF_SIZE, dev->urb->actual_length);
                return;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
        case -EILSEQ:
        case -EPROTO:
        case -ETIME:
                return;
        default:
                dev_err(&dev->interface->dev,
                        "%s - unknown status received: %d\n", __func__, status);
                return;
        }

        /* handle received message */
        switch (buf[0]) {
        case CMD_COUNT:
        case CMD_READ:
                if (buf[6] == CMD_EOF) {
                        spin_lock_irqsave(&dev->lock, flags);
                        dev->bbu = 0;
                        for (i = 1; i < 6; i++) {
                                dev->bbu += buf[i];
                                if (i != 5)
                                        dev->bbu <<= 8;
                        }
                        dev_dbg(&dev->interface->dev, "%s count: %lld\n",
                                __func__, dev->bbu);
                        spin_unlock_irqrestore(&dev->lock, flags);

                        kill_fasync(&dev->async_queue, SIGIO, POLL_IN);
                }
                else
                        dev_dbg(&dev->interface->dev,
                                "data format error - no EOF\n");
                break;
        case CMD_ACK:
                dev_dbg(&dev->interface->dev, "%s ack: %c\n",
                        __func__, buf[1]);
                wake_up_interruptible(&dev->waitq);
                break;
        }

        retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
        if (retval) {
                dev_err(&dev->interface->dev, "%s - usb_submit_urb failed: %d\n",
                        __func__, retval);
        }
}

static int yurex_probe(struct usb_interface *interface, const struct usb_device_id *id)
{
        struct usb_yurex *dev;
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        int retval = -ENOMEM;
        DEFINE_WAIT(wait);
        int res;

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                goto error;
        kref_init(&dev->kref);
        mutex_init(&dev->io_mutex);
        spin_lock_init(&dev->lock);
        init_waitqueue_head(&dev->waitq);

        dev->udev = usb_get_dev(interface_to_usbdev(interface));
        dev->interface = usb_get_intf(interface);

        /* set up the endpoint information */
        iface_desc = interface->cur_altsetting;
        res = usb_find_int_in_endpoint(iface_desc, &endpoint);
        if (res) {
                dev_err(&interface->dev, "Could not find endpoints\n");
                retval = res;
                goto error;
        }

        dev->int_in_endpointAddr = endpoint->bEndpointAddress;

        /* allocate control URB */
        dev->cntl_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->cntl_urb)
                goto error;

        /* allocate buffer for control req */
        dev->cntl_req = kmalloc(YUREX_BUF_SIZE, GFP_KERNEL);
        if (!dev->cntl_req)
                goto error;

        /* allocate buffer for control msg */
        dev->cntl_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE,
                                              GFP_KERNEL,
                                              &dev->cntl_urb->transfer_dma);
        if (!dev->cntl_buffer) {
                dev_err(&interface->dev, "Could not allocate cntl_buffer\n");
                goto error;
        }

        /* configure control URB */
        dev->cntl_req->bRequestType = USB_DIR_OUT | USB_TYPE_CLASS |
                                      USB_RECIP_INTERFACE;
        dev->cntl_req->bRequest        = HID_REQ_SET_REPORT;
        dev->cntl_req->wValue        = cpu_to_le16((HID_OUTPUT_REPORT + 1) << 8);
        dev->cntl_req->wIndex        = cpu_to_le16(iface_desc->desc.bInterfaceNumber);
        dev->cntl_req->wLength        = cpu_to_le16(YUREX_BUF_SIZE);

        usb_fill_control_urb(dev->cntl_urb, dev->udev,
                             usb_sndctrlpipe(dev->udev, 0),
                             (void *)dev->cntl_req, dev->cntl_buffer,
                             YUREX_BUF_SIZE, yurex_control_callback, dev);
        dev->cntl_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;


        /* allocate interrupt URB */
        dev->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->urb)
                goto error;

        /* allocate buffer for interrupt in */
        dev->int_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE,
                                        GFP_KERNEL, &dev->urb->transfer_dma);
        if (!dev->int_buffer) {
                dev_err(&interface->dev, "Could not allocate int_buffer\n");
                goto error;
        }

        /* configure interrupt URB */
        usb_fill_int_urb(dev->urb, dev->udev,
                         usb_rcvintpipe(dev->udev, dev->int_in_endpointAddr),
                         dev->int_buffer, YUREX_BUF_SIZE, yurex_interrupt,
                         dev, 1);
        dev->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        if (usb_submit_urb(dev->urb, GFP_KERNEL)) {
                retval = -EIO;
                dev_err(&interface->dev, "Could not submitting URB\n");
                goto error;
        }

        /* save our data pointer in this interface device */
        usb_set_intfdata(interface, dev);
        dev->bbu = -1;

        /* we can register the device now, as it is ready */
        retval = usb_register_dev(interface, &yurex_class);
        if (retval) {
                dev_err(&interface->dev,
                        "Not able to get a minor for this device.\n");
                usb_set_intfdata(interface, NULL);
                goto error;
        }

        dev_info(&interface->dev,
                 "USB YUREX device now attached to Yurex #%d\n",
                 interface->minor);

        return 0;

error:
        if (dev)
                /* this frees allocated memory */
                kref_put(&dev->kref, yurex_delete);
        return retval;
}

static void yurex_disconnect(struct usb_interface *interface)
{
        struct usb_yurex *dev;
        int minor = interface->minor;

        dev = usb_get_intfdata(interface);
        usb_set_intfdata(interface, NULL);

        /* give back our minor */
        usb_deregister_dev(interface, &yurex_class);

        /* prevent more I/O from starting */
        usb_poison_urb(dev->urb);
        usb_poison_urb(dev->cntl_urb);
        mutex_lock(&dev->io_mutex);
        dev->disconnected = 1;
        mutex_unlock(&dev->io_mutex);

        /* wakeup waiters */
        kill_fasync(&dev->async_queue, SIGIO, POLL_IN);
        wake_up_interruptible(&dev->waitq);

        /* decrement our usage count */
        kref_put(&dev->kref, yurex_delete);

        dev_info(&interface->dev, "USB YUREX #%d now disconnected\n", minor);
}

static struct usb_driver yurex_driver = {
        .name =                "yurex",
        .probe =        yurex_probe,
        .disconnect =        yurex_disconnect,
        .id_table =        yurex_table,
};


static int yurex_fasync(int fd, struct file *file, int on)
{
        struct usb_yurex *dev;

        dev = file->private_data;
        return fasync_helper(fd, file, on, &dev->async_queue);
}

static int yurex_open(struct inode *inode, struct file *file)
{
        struct usb_yurex *dev;
        struct usb_interface *interface;
        int subminor;
        int retval = 0;

        subminor = iminor(inode);

        interface = usb_find_interface(&yurex_driver, subminor);
        if (!interface) {
                printk(KERN_ERR "%s - error, can't find device for minor %d",
                       __func__, subminor);
                retval = -ENODEV;
                goto exit;
        }

        dev = usb_get_intfdata(interface);
        if (!dev) {
                retval = -ENODEV;
                goto exit;
        }

        /* increment our usage count for the device */
        kref_get(&dev->kref);

        /* save our object in the file's private structure */
        mutex_lock(&dev->io_mutex);
        file->private_data = dev;
        mutex_unlock(&dev->io_mutex);

exit:
        return retval;
}

static int yurex_release(struct inode *inode, struct file *file)
{
        struct usb_yurex *dev;

        dev = file->private_data;
        if (dev == NULL)
                return -ENODEV;

        /* decrement the count on our device */
        kref_put(&dev->kref, yurex_delete);
        return 0;
}

static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
                          loff_t *ppos)
{
        struct usb_yurex *dev;
        int len = 0;
        char in_buffer[MAX_S64_STRLEN];
        unsigned long flags;

        dev = file->private_data;

        mutex_lock(&dev->io_mutex);
        if (dev->disconnected) {                /* already disconnected */
                mutex_unlock(&dev->io_mutex);
                return -ENODEV;
        }

        if (WARN_ON_ONCE(dev->bbu > S64_MAX || dev->bbu < S64_MIN)) {
                mutex_unlock(&dev->io_mutex);
                return -EIO;
        }

        spin_lock_irqsave(&dev->lock, flags);
        scnprintf(in_buffer, MAX_S64_STRLEN, "%lld\n", dev->bbu);
        spin_unlock_irqrestore(&dev->lock, flags);
        mutex_unlock(&dev->io_mutex);

        return simple_read_from_buffer(buffer, count, ppos, in_buffer, len);
}

static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
                           size_t count, loff_t *ppos)
{
        struct usb_yurex *dev;
        int i, set = 0, retval = 0;
        char buffer[16 + 1];
        char *data = buffer;
        unsigned long long c, c2 = 0;
        signed long timeout = 0;
        DEFINE_WAIT(wait);

        count = min(sizeof(buffer) - 1, count);
        dev = file->private_data;

        /* verify that we actually have some data to write */
        if (count == 0)
                goto error;

        mutex_lock(&dev->io_mutex);
        if (dev->disconnected) {                /* already disconnected */
                mutex_unlock(&dev->io_mutex);
                retval = -ENODEV;
                goto error;
        }

        if (copy_from_user(buffer, user_buffer, count)) {
                mutex_unlock(&dev->io_mutex);
                retval = -EFAULT;
                goto error;
        }
        buffer[count] = 0;
        memset(dev->cntl_buffer, CMD_PADDING, YUREX_BUF_SIZE);

        switch (buffer[0]) {
        case CMD_ANIMATE:
        case CMD_LED:
                dev->cntl_buffer[0] = buffer[0];
                dev->cntl_buffer[1] = buffer[1];
                dev->cntl_buffer[2] = CMD_EOF;
                break;
        case CMD_READ:
        case CMD_VERSION:
                dev->cntl_buffer[0] = buffer[0];
                dev->cntl_buffer[1] = 0x00;
                dev->cntl_buffer[2] = CMD_EOF;
                break;
        case CMD_SET:
                data++;
                fallthrough;
        case '0' ... '9':
                set = 1;
                c = c2 = simple_strtoull(data, NULL, 0);
                dev->cntl_buffer[0] = CMD_SET;
                for (i = 1; i < 6; i++) {
                        dev->cntl_buffer[i] = (c>>32) & 0xff;
                        c <<= 8;
                }
                buffer[6] = CMD_EOF;
                break;
        default:
                mutex_unlock(&dev->io_mutex);
                return -EINVAL;
        }

        /* send the data as the control msg */
        prepare_to_wait(&dev->waitq, &wait, TASK_INTERRUPTIBLE);
        dev_dbg(&dev->interface->dev, "%s - submit %c\n", __func__,
                dev->cntl_buffer[0]);
        retval = usb_submit_urb(dev->cntl_urb, GFP_ATOMIC);
        if (retval >= 0)
                timeout = schedule_timeout(YUREX_WRITE_TIMEOUT);
        finish_wait(&dev->waitq, &wait);

        /* make sure URB is idle after timeout or (spurious) CMD_ACK */
        usb_kill_urb(dev->cntl_urb);

        mutex_unlock(&dev->io_mutex);

        if (retval < 0) {
                dev_err(&dev->interface->dev,
                        "%s - failed to send bulk msg, error %d\n",
                        __func__, retval);
                goto error;
        }
        if (set && timeout)
                dev->bbu = c2;
        return timeout ? count : -EIO;

error:
        return retval;
}

static const struct file_operations yurex_fops = {
        .owner =        THIS_MODULE,
        .read =                yurex_read,
        .write =        yurex_write,
        .open =                yurex_open,
        .release =        yurex_release,
        .fasync        =        yurex_fasync,
        .llseek =        default_llseek,
};

module_usb_driver(yurex_driver);

MODULE_LICENSE("GPL");

































































































































































    6 




    6 

    6 
    6 



    6 
    6 









    6 






    6 




















    6 
    6 




    6 

















































































































    6 



    6 
    6 





    6 


    6 

    6 

































    1 




    1 


    1 
    1 
    1 
    1 











    1 


    1 
    1 






    1 















    1 





























    1 
    1 
    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
// SPDX-License-Identifier: GPL-2.0-only
/*
 * LED Triggers Core
 *
 * Copyright 2005-2007 Openedhand Ltd.
 *
 * Author: Richard Purdie <rpurdie@openedhand.com>
 */

#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/timer.h>
#include <linux/rwsem.h>
#include <linux/leds.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include "leds.h"

/*
 * Nests outside led_cdev->trigger_lock
 */
static DECLARE_RWSEM(triggers_list_lock);
static LIST_HEAD(trigger_list);

 /* Used by LED Class */

static inline bool
trigger_relevant(struct led_classdev *led_cdev, struct led_trigger *trig)
{
        return !trig->trigger_type || trig->trigger_type == led_cdev->trigger_type;
}

ssize_t led_trigger_write(struct file *filp, struct kobject *kobj,
                          struct bin_attribute *bin_attr, char *buf,
                          loff_t pos, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct led_trigger *trig;
        int ret = count;

        mutex_lock(&led_cdev->led_access);

        if (led_sysfs_is_disabled(led_cdev)) {
                ret = -EBUSY;
                goto unlock;
        }

        if (sysfs_streq(buf, "none")) {
                led_trigger_remove(led_cdev);
                goto unlock;
        }

        down_read(&triggers_list_lock);
        list_for_each_entry(trig, &trigger_list, next_trig) {
                if (sysfs_streq(buf, trig->name) && trigger_relevant(led_cdev, trig)) {
                        down_write(&led_cdev->trigger_lock);
                        led_trigger_set(led_cdev, trig);
                        up_write(&led_cdev->trigger_lock);

                        up_read(&triggers_list_lock);
                        goto unlock;
                }
        }
        /* we come here only if buf matches no trigger */
        ret = -EINVAL;
        up_read(&triggers_list_lock);

unlock:
        mutex_unlock(&led_cdev->led_access);
        return ret;
}
EXPORT_SYMBOL_GPL(led_trigger_write);

__printf(3, 4)
static int led_trigger_snprintf(char *buf, ssize_t size, const char *fmt, ...)
{
        va_list args;
        int i;

        va_start(args, fmt);
        if (size <= 0)
                i = vsnprintf(NULL, 0, fmt, args);
        else
                i = vscnprintf(buf, size, fmt, args);
        va_end(args);

        return i;
}

static int led_trigger_format(char *buf, size_t size,
                              struct led_classdev *led_cdev)
{
        struct led_trigger *trig;
        int len = led_trigger_snprintf(buf, size, "%s",
                                       led_cdev->trigger ? "none" : "[none]");

        list_for_each_entry(trig, &trigger_list, next_trig) {
                bool hit;

                if (!trigger_relevant(led_cdev, trig))
                        continue;

                hit = led_cdev->trigger && !strcmp(led_cdev->trigger->name, trig->name);

                len += led_trigger_snprintf(buf + len, size - len,
                                            " %s%s%s", hit ? "[" : "",
                                            trig->name, hit ? "]" : "");
        }

        len += led_trigger_snprintf(buf + len, size - len, "\n");

        return len;
}

/*
 * It was stupid to create 10000 cpu triggers, but we are stuck with it now.
 * Don't make that mistake again. We work around it here by creating binary
 * attribute, which is not limited by length. This is _not_ good design, do not
 * copy it.
 */
ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
                        struct bin_attribute *attr, char *buf,
                        loff_t pos, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        void *data;
        int len;

        down_read(&triggers_list_lock);
        down_read(&led_cdev->trigger_lock);

        len = led_trigger_format(NULL, 0, led_cdev);
        data = kvmalloc(len + 1, GFP_KERNEL);
        if (!data) {
                up_read(&led_cdev->trigger_lock);
                up_read(&triggers_list_lock);
                return -ENOMEM;
        }
        len = led_trigger_format(data, len + 1, led_cdev);

        up_read(&led_cdev->trigger_lock);
        up_read(&triggers_list_lock);

        len = memory_read_from_buffer(buf, count, &pos, data, len);

        kvfree(data);

        return len;
}
EXPORT_SYMBOL_GPL(led_trigger_read);

/* Caller must ensure led_cdev->trigger_lock held */
int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
{
        char *event = NULL;
        char *envp[2];
        const char *name;
        int ret;

        if (!led_cdev->trigger && !trig)
                return 0;

        name = trig ? trig->name : "none";
        event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name);

        /* Remove any existing trigger */
        if (led_cdev->trigger) {
                spin_lock(&led_cdev->trigger->leddev_list_lock);
                list_del_rcu(&led_cdev->trig_list);
                spin_unlock(&led_cdev->trigger->leddev_list_lock);

                /* ensure it's no longer visible on the led_cdevs list */
                synchronize_rcu();

                cancel_work_sync(&led_cdev->set_brightness_work);
                led_stop_software_blink(led_cdev);
                if (led_cdev->trigger->deactivate)
                        led_cdev->trigger->deactivate(led_cdev);
                device_remove_groups(led_cdev->dev, led_cdev->trigger->groups);
                led_cdev->trigger = NULL;
                led_cdev->trigger_data = NULL;
                led_cdev->activated = false;
                led_cdev->flags &= ~LED_INIT_DEFAULT_TRIGGER;
                led_set_brightness(led_cdev, LED_OFF);
        }
        if (trig) {
                spin_lock(&trig->leddev_list_lock);
                list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs);
                spin_unlock(&trig->leddev_list_lock);
                led_cdev->trigger = trig;

                if (trig->activate)
                        ret = trig->activate(led_cdev);
                else
                        ret = 0;

                if (ret)
                        goto err_activate;

                ret = device_add_groups(led_cdev->dev, trig->groups);
                if (ret) {
                        dev_err(led_cdev->dev, "Failed to add trigger attributes\n");
                        goto err_add_groups;
                }
        }

        if (event) {
                envp[0] = event;
                envp[1] = NULL;
                if (kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp))
                        dev_err(led_cdev->dev,
                                "%s: Error sending uevent\n", __func__);
                kfree(event);
        }

        return 0;

err_add_groups:

        if (trig->deactivate)
                trig->deactivate(led_cdev);
err_activate:

        spin_lock(&led_cdev->trigger->leddev_list_lock);
        list_del_rcu(&led_cdev->trig_list);
        spin_unlock(&led_cdev->trigger->leddev_list_lock);
        synchronize_rcu();
        led_cdev->trigger = NULL;
        led_cdev->trigger_data = NULL;
        led_set_brightness(led_cdev, LED_OFF);
        kfree(event);

        return ret;
}
EXPORT_SYMBOL_GPL(led_trigger_set);

void led_trigger_remove(struct led_classdev *led_cdev)
{
        down_write(&led_cdev->trigger_lock);
        led_trigger_set(led_cdev, NULL);
        up_write(&led_cdev->trigger_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_remove);

static bool led_match_default_trigger(struct led_classdev *led_cdev,
                                      struct led_trigger *trig)
{
        if (!strcmp(led_cdev->default_trigger, trig->name) &&
            trigger_relevant(led_cdev, trig)) {
                led_cdev->flags |= LED_INIT_DEFAULT_TRIGGER;
                led_trigger_set(led_cdev, trig);
                return true;
        }

        return false;
}

void led_trigger_set_default(struct led_classdev *led_cdev)
{
        struct led_trigger *trig;
        bool found = false;

        if (!led_cdev->default_trigger)
                return;

        down_read(&triggers_list_lock);
        down_write(&led_cdev->trigger_lock);
        list_for_each_entry(trig, &trigger_list, next_trig) {
                found = led_match_default_trigger(led_cdev, trig);
                if (found)
                        break;
        }
        up_write(&led_cdev->trigger_lock);
        up_read(&triggers_list_lock);

        /*
         * If default trigger wasn't found, maybe trigger module isn't loaded yet.
         * Once loaded it will re-probe with all led_cdev's.
         */
        if (!found)
                request_module_nowait("ledtrig:%s", led_cdev->default_trigger);
}
EXPORT_SYMBOL_GPL(led_trigger_set_default);

/* LED Trigger Interface */

int led_trigger_register(struct led_trigger *trig)
{
        struct led_classdev *led_cdev;
        struct led_trigger *_trig;

        spin_lock_init(&trig->leddev_list_lock);
        INIT_LIST_HEAD(&trig->led_cdevs);

        down_write(&triggers_list_lock);
        /* Make sure the trigger's name isn't already in use */
        list_for_each_entry(_trig, &trigger_list, next_trig) {
                if (!strcmp(_trig->name, trig->name) &&
                    (trig->trigger_type == _trig->trigger_type ||
                     !trig->trigger_type || !_trig->trigger_type)) {
                        up_write(&triggers_list_lock);
                        return -EEXIST;
                }
        }
        /* Add to the list of led triggers */
        list_add_tail(&trig->next_trig, &trigger_list);
        up_write(&triggers_list_lock);

        /* Register with any LEDs that have this as a default trigger */
        down_read(&leds_list_lock);
        list_for_each_entry(led_cdev, &leds_list, node) {
                down_write(&led_cdev->trigger_lock);
                if (!led_cdev->trigger && led_cdev->default_trigger)
                        led_match_default_trigger(led_cdev, trig);
                up_write(&led_cdev->trigger_lock);
        }
        up_read(&leds_list_lock);

        return 0;
}
EXPORT_SYMBOL_GPL(led_trigger_register);

void led_trigger_unregister(struct led_trigger *trig)
{
        struct led_classdev *led_cdev;

        if (list_empty_careful(&trig->next_trig))
                return;

        /* Remove from the list of led triggers */
        down_write(&triggers_list_lock);
        list_del_init(&trig->next_trig);
        up_write(&triggers_list_lock);

        /* Remove anyone actively using this trigger */
        down_read(&leds_list_lock);
        list_for_each_entry(led_cdev, &leds_list, node) {
                down_write(&led_cdev->trigger_lock);
                if (led_cdev->trigger == trig)
                        led_trigger_set(led_cdev, NULL);
                up_write(&led_cdev->trigger_lock);
        }
        up_read(&leds_list_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_unregister);

static void devm_led_trigger_release(struct device *dev, void *res)
{
        led_trigger_unregister(*(struct led_trigger **)res);
}

int devm_led_trigger_register(struct device *dev,
                              struct led_trigger *trig)
{
        struct led_trigger **dr;
        int rc;

        dr = devres_alloc(devm_led_trigger_release, sizeof(*dr),
                          GFP_KERNEL);
        if (!dr)
                return -ENOMEM;

        *dr = trig;

        rc = led_trigger_register(trig);
        if (rc)
                devres_free(dr);
        else
                devres_add(dev, dr);

        return rc;
}
EXPORT_SYMBOL_GPL(devm_led_trigger_register);

/* Simple LED Trigger Interface */

void led_trigger_event(struct led_trigger *trig,
                        enum led_brightness brightness)
{
        struct led_classdev *led_cdev;

        if (!trig)
                return;

        rcu_read_lock();
        list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list)
                led_set_brightness(led_cdev, brightness);
        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(led_trigger_event);

static void led_trigger_blink_setup(struct led_trigger *trig,
                             unsigned long delay_on,
                             unsigned long delay_off,
                             int oneshot,
                             int invert)
{
        struct led_classdev *led_cdev;

        if (!trig)
                return;

        rcu_read_lock();
        list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) {
                if (oneshot)
                        led_blink_set_oneshot(led_cdev, &delay_on, &delay_off,
                                              invert);
                else
                        led_blink_set_nosleep(led_cdev, delay_on, delay_off);
        }
        rcu_read_unlock();
}

void led_trigger_blink(struct led_trigger *trig,
                       unsigned long delay_on,
                       unsigned long delay_off)
{
        led_trigger_blink_setup(trig, delay_on, delay_off, 0, 0);
}
EXPORT_SYMBOL_GPL(led_trigger_blink);

void led_trigger_blink_oneshot(struct led_trigger *trig,
                               unsigned long delay_on,
                               unsigned long delay_off,
                               int invert)
{
        led_trigger_blink_setup(trig, delay_on, delay_off, 1, invert);
}
EXPORT_SYMBOL_GPL(led_trigger_blink_oneshot);

void led_trigger_register_simple(const char *name, struct led_trigger **tp)
{
        struct led_trigger *trig;
        int err;

        trig = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);

        if (trig) {
                trig->name = name;
                err = led_trigger_register(trig);
                if (err < 0) {
                        kfree(trig);
                        trig = NULL;
                        pr_warn("LED trigger %s failed to register (%d)\n",
                                name, err);
                }
        } else {
                pr_warn("LED trigger %s failed to register (no memory)\n",
                        name);
        }
        *tp = trig;
}
EXPORT_SYMBOL_GPL(led_trigger_register_simple);

void led_trigger_unregister_simple(struct led_trigger *trig)
{
        if (trig)
                led_trigger_unregister(trig);
        kfree(trig);
}
EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);













   35 
   36 





   35 
   33 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BSEARCH_H
#define _LINUX_BSEARCH_H

#include <linux/types.h>

static __always_inline
void *__inline_bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp)
{
        const char *pivot;
        int result;

        while (num > 0) {
                pivot = base + (num >> 1) * size;
                result = cmp(key, pivot);

                if (result == 0)
                        return (void *)pivot;

                if (result > 0) {
                        base = pivot + size;
                        num--;
                }
                num >>= 1;
        }

        return NULL;
}

extern void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp);

#endif /* _LINUX_BSEARCH_H */





































  239 

















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * net busy poll support
 * Copyright(c) 2013 Intel Corporation.
 *
 * Author: Eliezer Tamir
 *
 * Contact Information:
 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 */

#ifndef _LINUX_NET_BUSY_POLL_H
#define _LINUX_NET_BUSY_POLL_H

#include <linux/netdevice.h>
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <net/ip.h>
#include <net/xdp.h>

/*                0 - Reserved to indicate value not set
 *     1..NR_CPUS - Reserved for sender_cpu
 *  NR_CPUS+1..~0 - Region available for NAPI IDs
 */
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))

#define BUSY_POLL_BUDGET 8

#ifdef CONFIG_NET_RX_BUSY_POLL

struct napi_struct;
extern unsigned int sysctl_net_busy_read __read_mostly;
extern unsigned int sysctl_net_busy_poll __read_mostly;

static inline bool net_busy_loop_on(void)
{
        return READ_ONCE(sysctl_net_busy_poll);
}

static inline bool sk_can_busy_loop(const struct sock *sk)
{
        return READ_ONCE(sk->sk_ll_usec) && !signal_pending(current);
}

bool sk_busy_loop_end(void *p, unsigned long start_time);

void napi_busy_loop(unsigned int napi_id,
                    bool (*loop_end)(void *, unsigned long),
                    void *loop_end_arg, bool prefer_busy_poll, u16 budget);

void napi_busy_loop_rcu(unsigned int napi_id,
                        bool (*loop_end)(void *, unsigned long),
                        void *loop_end_arg, bool prefer_busy_poll, u16 budget);

#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
        return 0;
}

static inline bool sk_can_busy_loop(struct sock *sk)
{
        return false;
}

#endif /* CONFIG_NET_RX_BUSY_POLL */

static inline unsigned long busy_loop_current_time(void)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        return (unsigned long)(local_clock() >> 10);
#else
        return 0;
#endif
}

/* in poll/select we use the global sysctl_net_ll_poll value */
static inline bool busy_loop_timeout(unsigned long start_time)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);

        if (bp_usec) {
                unsigned long end_time = start_time + bp_usec;
                unsigned long now = busy_loop_current_time();

                return time_after(now, end_time);
        }
#endif
        return true;
}

static inline bool sk_busy_loop_timeout(struct sock *sk,
                                        unsigned long start_time)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);

        if (bp_usec) {
                unsigned long end_time = start_time + bp_usec;
                unsigned long now = busy_loop_current_time();

                return time_after(now, end_time);
        }
#endif
        return true;
}

static inline void sk_busy_loop(struct sock *sk, int nonblock)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        unsigned int napi_id = READ_ONCE(sk->sk_napi_id);

        if (napi_id >= MIN_NAPI_ID)
                napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
                               READ_ONCE(sk->sk_prefer_busy_poll),
                               READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
#endif
}

/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
                                    struct napi_struct *napi)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        /* If the skb was already marked with a valid NAPI ID, avoid overwriting
         * it.
         */
        if (skb->napi_id < MIN_NAPI_ID)
                skb->napi_id = napi->napi_id;
#endif
}

/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
                WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
        sk_rx_queue_update(sk, skb);
}

/* Variant of sk_mark_napi_id() for passive flow setup,
 * as sk->sk_napi_id and sk->sk_rx_queue_mapping content
 * needs to be set.
 */
static inline void sk_mark_napi_id_set(struct sock *sk,
                                       const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
        sk_rx_queue_set(sk, skb);
}

static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        if (!READ_ONCE(sk->sk_napi_id))
                WRITE_ONCE(sk->sk_napi_id, napi_id);
#endif
}

/* variant used for unconnected sockets */
static inline void sk_mark_napi_id_once(struct sock *sk,
                                        const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        __sk_mark_napi_id_once(sk, skb->napi_id);
#endif
}

static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
                                            const struct xdp_buff *xdp)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        __sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
#endif
}

#endif /* _LINUX_NET_BUSY_POLL_H */


























































































































































    1 



    1 





    1 







    1 


































































    1 


    1 











    1 
    1 


    1 
































































































    1 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2012 Red Hat
 *
 * based in parts on udlfb.c:
 * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it>
 * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com>
 * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com>
 */

#include <drm/drm.h>
#include <drm/drm_print.h>
#include <drm/drm_probe_helper.h>

#include "udl_drv.h"

/* -BULK_SIZE as per usb-skeleton. Can we get full page and avoid overhead? */
#define BULK_SIZE 512

#define NR_USB_REQUEST_CHANNEL 0x12

#define MAX_TRANSFER (PAGE_SIZE*16 - BULK_SIZE)
#define WRITES_IN_FLIGHT (20)
#define MAX_VENDOR_DESCRIPTOR_SIZE 256

static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout);

static int udl_parse_vendor_descriptor(struct udl_device *udl)
{
        struct usb_device *udev = udl_to_usb_device(udl);
        char *desc;
        char *buf;
        char *desc_end;

        u8 total_len = 0;

        buf = kzalloc(MAX_VENDOR_DESCRIPTOR_SIZE, GFP_KERNEL);
        if (!buf)
                return false;
        desc = buf;

        total_len = usb_get_descriptor(udev, 0x5f, /* vendor specific */
                                    0, desc, MAX_VENDOR_DESCRIPTOR_SIZE);
        if (total_len > 5) {
                DRM_INFO("vendor descriptor length:%x data:%11ph\n",
                        total_len, desc);

                if ((desc[0] != total_len) || /* descriptor length */
                    (desc[1] != 0x5f) ||   /* vendor descriptor type */
                    (desc[2] != 0x01) ||   /* version (2 bytes) */
                    (desc[3] != 0x00) ||
                    (desc[4] != total_len - 2)) /* length after type */
                        goto unrecognized;

                desc_end = desc + total_len;
                desc += 5; /* the fixed header we've already parsed */

                while (desc < desc_end) {
                        u8 length;
                        u16 key;

                        key = le16_to_cpu(*((u16 *) desc));
                        desc += sizeof(u16);
                        length = *desc;
                        desc++;

                        switch (key) {
                        case 0x0200: { /* max_area */
                                u32 max_area;
                                max_area = le32_to_cpu(*((u32 *)desc));
                                DRM_DEBUG("DL chip limited to %d pixel modes\n",
                                        max_area);
                                udl->sku_pixel_limit = max_area;
                                break;
                        }
                        default:
                                break;
                        }
                        desc += length;
                }
        }

        goto success;

unrecognized:
        /* allow udlfb to load for now even if firmware unrecognized */
        DRM_ERROR("Unrecognized vendor firmware descriptor\n");

success:
        kfree(buf);
        return true;
}

/*
 * Need to ensure a channel is selected before submitting URBs
 */
int udl_select_std_channel(struct udl_device *udl)
{
        static const u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7,
                                         0x1C, 0x88, 0x5E, 0x15,
                                         0x60, 0xFE, 0xC6, 0x97,
                                         0x16, 0x3D, 0x47, 0xF2};

        void *sendbuf;
        int ret;
        struct usb_device *udev = udl_to_usb_device(udl);

        sendbuf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL);
        if (!sendbuf)
                return -ENOMEM;

        ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                              NR_USB_REQUEST_CHANNEL,
                              (USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0,
                              sendbuf, sizeof(set_def_chn),
                              USB_CTRL_SET_TIMEOUT);
        kfree(sendbuf);
        return ret < 0 ? ret : 0;
}

void udl_urb_completion(struct urb *urb)
{
        struct urb_node *unode = urb->context;
        struct udl_device *udl = unode->dev;
        unsigned long flags;

        /* sync/async unlink faults aren't errors */
        if (urb->status) {
                if (!(urb->status == -ENOENT ||
                    urb->status == -ECONNRESET ||
                    urb->status == -EPROTO ||
                    urb->status == -ESHUTDOWN)) {
                        DRM_ERROR("%s - nonzero write bulk status received: %d\n",
                                __func__, urb->status);
                }
        }

        urb->transfer_buffer_length = udl->urbs.size; /* reset to actual */

        spin_lock_irqsave(&udl->urbs.lock, flags);
        list_add_tail(&unode->entry, &udl->urbs.list);
        udl->urbs.available++;
        spin_unlock_irqrestore(&udl->urbs.lock, flags);

        wake_up(&udl->urbs.sleep);
}

static void udl_free_urb_list(struct drm_device *dev)
{
        struct udl_device *udl = to_udl(dev);
        struct urb_node *unode;
        struct urb *urb;

        DRM_DEBUG("Waiting for completes and freeing all render urbs\n");

        /* keep waiting and freeing, until we've got 'em all */
        while (udl->urbs.count) {
                spin_lock_irq(&udl->urbs.lock);
                urb = udl_get_urb_locked(udl, MAX_SCHEDULE_TIMEOUT);
                udl->urbs.count--;
                spin_unlock_irq(&udl->urbs.lock);
                if (WARN_ON(!urb))
                        break;
                unode = urb->context;
                /* Free each separately allocated piece */
                usb_free_coherent(urb->dev, udl->urbs.size,
                                  urb->transfer_buffer, urb->transfer_dma);
                usb_free_urb(urb);
                kfree(unode);
        }

        wake_up_all(&udl->urbs.sleep);
}

static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
{
        struct udl_device *udl = to_udl(dev);
        struct urb *urb;
        struct urb_node *unode;
        char *buf;
        size_t wanted_size = count * size;
        struct usb_device *udev = udl_to_usb_device(udl);

        spin_lock_init(&udl->urbs.lock);
        INIT_LIST_HEAD(&udl->urbs.list);
        init_waitqueue_head(&udl->urbs.sleep);
        udl->urbs.count = 0;
        udl->urbs.available = 0;

retry:
        udl->urbs.size = size;

        while (udl->urbs.count * size < wanted_size) {
                unode = kzalloc(sizeof(struct urb_node), GFP_KERNEL);
                if (!unode)
                        break;
                unode->dev = udl;

                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb) {
                        kfree(unode);
                        break;
                }
                unode->urb = urb;

                buf = usb_alloc_coherent(udev, size, GFP_KERNEL,
                                         &urb->transfer_dma);
                if (!buf) {
                        kfree(unode);
                        usb_free_urb(urb);
                        if (size > PAGE_SIZE) {
                                size /= 2;
                                udl_free_urb_list(dev);
                                goto retry;
                        }
                        break;
                }

                /* urb->transfer_buffer_length set to actual before submit */
                usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, 1),
                                  buf, size, udl_urb_completion, unode);
                urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

                list_add_tail(&unode->entry, &udl->urbs.list);

                udl->urbs.count++;
                udl->urbs.available++;
        }

        DRM_DEBUG("allocated %d %d byte urbs\n", udl->urbs.count, (int) size);

        return udl->urbs.count;
}

static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout)
{
        struct urb_node *unode;

        assert_spin_locked(&udl->urbs.lock);

        /* Wait for an in-flight buffer to complete and get re-queued */
        if (!wait_event_lock_irq_timeout(udl->urbs.sleep,
                                         !udl->urbs.count ||
                                         !list_empty(&udl->urbs.list),
                                         udl->urbs.lock, timeout)) {
                DRM_INFO("wait for urb interrupted: available: %d\n",
                         udl->urbs.available);
                return NULL;
        }

        if (!udl->urbs.count)
                return NULL;

        unode = list_first_entry(&udl->urbs.list, struct urb_node, entry);
        list_del_init(&unode->entry);
        udl->urbs.available--;

        return unode->urb;
}

#define GET_URB_TIMEOUT        HZ
struct urb *udl_get_urb(struct drm_device *dev)
{
        struct udl_device *udl = to_udl(dev);
        struct urb *urb;

        spin_lock_irq(&udl->urbs.lock);
        urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT);
        spin_unlock_irq(&udl->urbs.lock);
        return urb;
}

int udl_submit_urb(struct drm_device *dev, struct urb *urb, size_t len)
{
        struct udl_device *udl = to_udl(dev);
        int ret;

        if (WARN_ON(len > udl->urbs.size)) {
                ret = -EINVAL;
                goto error;
        }
        urb->transfer_buffer_length = len; /* set to actual payload len */
        ret = usb_submit_urb(urb, GFP_ATOMIC);
 error:
        if (ret) {
                udl_urb_completion(urb); /* because no one else will */
                DRM_ERROR("usb_submit_urb error %x\n", ret);
        }
        return ret;
}

/* wait until all pending URBs have been processed */
void udl_sync_pending_urbs(struct drm_device *dev)
{
        struct udl_device *udl = to_udl(dev);

        spin_lock_irq(&udl->urbs.lock);
        /* 2 seconds as a sane timeout */
        if (!wait_event_lock_irq_timeout(udl->urbs.sleep,
                                         udl->urbs.available == udl->urbs.count,
                                         udl->urbs.lock,
                                         msecs_to_jiffies(2000)))
                drm_err(dev, "Timeout for syncing pending URBs\n");
        spin_unlock_irq(&udl->urbs.lock);
}

int udl_init(struct udl_device *udl)
{
        struct drm_device *dev = &udl->drm;
        int ret = -ENOMEM;

        DRM_DEBUG("\n");

        udl->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev));
        if (!udl->dmadev)
                drm_warn(dev, "buffer sharing not supported"); /* not an error */

        mutex_init(&udl->gem_lock);

        if (!udl_parse_vendor_descriptor(udl)) {
                ret = -ENODEV;
                DRM_ERROR("firmware not recognized. Assume incompatible device\n");
                goto err;
        }

        if (udl_select_std_channel(udl))
                DRM_ERROR("Selecting channel failed\n");

        if (!udl_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
                DRM_ERROR("udl_alloc_urb_list failed\n");
                goto err;
        }

        DRM_DEBUG("\n");
        ret = udl_modeset_init(dev);
        if (ret)
                goto err;

        drm_kms_helper_poll_init(dev);

        return 0;

err:
        if (udl->urbs.count)
                udl_free_urb_list(dev);
        put_device(udl->dmadev);
        DRM_ERROR("%d\n", ret);
        return ret;
}

int udl_drop_usb(struct drm_device *dev)
{
        struct udl_device *udl = to_udl(dev);

        udl_free_urb_list(dev);
        put_device(udl->dmadev);
        udl->dmadev = NULL;

        return 0;
}































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 







































    2 































































































































































    2 
    1 




    2 



    2 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
// SPDX-License-Identifier: GPL-2.0
/*
 * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
 *
 * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
 * Copyright (C) 2010-2021 Johan Hovold (johan@kernel.org)
 *
 * Support to set flow control line levels using TIOCMGET and TIOCMSET
 * thanks to Karl Hiramoto karl@hiramoto.org. RTSCTS hardware flow
 * control thanks to Munir Nassar nassarmu@real-time.com
 *
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/gpio/driver.h>
#include <linux/bitops.h>
#include <linux/mutex.h>

#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"

/*
 * Function Prototypes
 */
static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *);
static void cp210x_close(struct usb_serial_port *);
static void cp210x_change_speed(struct tty_struct *, struct usb_serial_port *,
                                const struct ktermios *);
static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
                               const struct ktermios *);
static bool cp210x_tx_empty(struct usb_serial_port *port);
static int cp210x_tiocmget(struct tty_struct *);
static int cp210x_tiocmset(struct tty_struct *, unsigned int, unsigned int);
static int cp210x_tiocmset_port(struct usb_serial_port *port,
                unsigned int, unsigned int);
static int cp210x_break_ctl(struct tty_struct *, int);
static int cp210x_attach(struct usb_serial *);
static void cp210x_disconnect(struct usb_serial *);
static void cp210x_release(struct usb_serial *);
static int cp210x_port_probe(struct usb_serial_port *);
static void cp210x_port_remove(struct usb_serial_port *);
static void cp210x_dtr_rts(struct usb_serial_port *port, int on);
static void cp210x_process_read_urb(struct urb *urb);
static void cp210x_enable_event_mode(struct usb_serial_port *port);
static void cp210x_disable_event_mode(struct usb_serial_port *port);

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x0404, 0x034C) },        /* NCR Retail IO Box */
        { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */
        { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
        { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
        { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
        { USB_DEVICE(0x04BF, 0x1301) }, /* TDK Corporation NC0110013M - Network Controller */
        { USB_DEVICE(0x04BF, 0x1303) }, /* TDK Corporation MM0110113M - i3 Micro Module */
        { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
        { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
        { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
        { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
        { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */
        { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */
        { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */
        { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */
        { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */
        { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */
        { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
        { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
        { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
        { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */
        { USB_DEVICE(0x106F, 0x0003) },        /* CPI / Money Controls Bulk Coin Recycler */
        { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
        { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
        { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
        { USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
        { USB_DEVICE(0x10C4, 0x1101) }, /* Arkham Technology DS101 Bus Monitor */
        { USB_DEVICE(0x10C4, 0x1601) }, /* Arkham Technology DS101 Adapter */
        { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
        { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
        { USB_DEVICE(0x10C4, 0x8044) }, /* Cygnal Debug Adapter */
        { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */
        { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
        { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */
        { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */
        { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */
        { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */
        { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */
        { USB_DEVICE(0x10C4, 0x80C4) }, /* Cygnal Integrated Products, Inc., Optris infrared thermometer */
        { USB_DEVICE(0x10C4, 0x80CA) }, /* Degree Controls Inc */
        { USB_DEVICE(0x10C4, 0x80DD) }, /* Tracient RFID */
        { USB_DEVICE(0x10C4, 0x80F6) }, /* Suunto sports instrument */
        { USB_DEVICE(0x10C4, 0x8115) }, /* Arygon NFC/Mifare Reader */
        { USB_DEVICE(0x10C4, 0x813D) }, /* Burnside Telecom Deskmobile */
        { USB_DEVICE(0x10C4, 0x813F) }, /* Tams Master Easy Control */
        { USB_DEVICE(0x10C4, 0x814A) }, /* West Mountain Radio RIGblaster P&P */
        { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */
        { USB_DEVICE(0x2405, 0x0003) }, /* West Mountain Radio RIGblaster Advantage */
        { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
        { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
        { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */
        { USB_DEVICE(0x10C4, 0x817C) }, /* CESINEL MEDCAL N Power Quality Monitor */
        { USB_DEVICE(0x10C4, 0x817D) }, /* CESINEL MEDCAL NT Power Quality Monitor */
        { USB_DEVICE(0x10C4, 0x817E) }, /* CESINEL MEDCAL S Power Quality Monitor */
        { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
        { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
        { USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */
        { USB_DEVICE(0x10C4, 0x81A9) }, /* Multiplex RC Interface */
        { USB_DEVICE(0x10C4, 0x81AC) }, /* MSD Dash Hawk */
        { USB_DEVICE(0x10C4, 0x81AD) }, /* INSYS USB Modem */
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
        { USB_DEVICE(0x10C4, 0x81D7) }, /* IAI Corp. RCB-CV-USB USB to RS485 Adaptor */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
        { USB_DEVICE(0x10C4, 0x81E8) }, /* Zephyr Bioharness */
        { USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
        { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
        { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
        { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
        { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
        { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
        { USB_DEVICE(0x10C4, 0x82AA) }, /* Silicon Labs IFS-USB-DATACABLE used with Quint UPS */
        { USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */
        { USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */
        { USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */
        { USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */
        { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
        { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
        { USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */
        { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
        { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */
        { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */
        { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
        { USB_DEVICE(0x10C4, 0x8414) }, /* Decagon USB Cable Adapter */
        { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
        { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
        { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
        { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
        { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
        { USB_DEVICE(0x10C4, 0x851E) }, /* CESINEL MEDCAL PT Network Analyzer */
        { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
        { USB_DEVICE(0x10C4, 0x85B8) }, /* CESINEL ReCon T Energy Logger */
        { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
        { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
        { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
        { USB_DEVICE(0x10C4, 0x863C) }, /* MGP Instruments PDS100 */
        { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
        { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
        { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */
        { USB_DEVICE(0x10C4, 0x8856) },        /* CEL EM357 ZigBee USB Stick - LR */
        { USB_DEVICE(0x10C4, 0x8857) },        /* CEL EM357 ZigBee USB Stick */
        { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x88D8) }, /* Acuity Brands nLight Air Adapter */
        { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */
        { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */
        { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
        { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */
        { USB_DEVICE(0x10C4, 0x8977) },        /* CEL MeshWorks DevKit Device */
        { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
        { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
        { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
        { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
        { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */
        { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
        { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
        { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA63) }, /* Silicon Labs Windows Update (CP2101-4/CP2102N) */
        { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
        { USB_DEVICE(0x10C4, 0xEA7A) }, /* Silicon Labs Windows Update (CP2105) */
        { USB_DEVICE(0x10C4, 0xEA7B) }, /* Silicon Labs Windows Update (CP2108) */
        { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */
        { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */
        { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
        { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
        { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
        { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
        { USB_DEVICE(0x11CA, 0x0212) }, /* Verifone USB to Printer (UART, CP2102) */
        { USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */
        { USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */
        { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
        { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
        { USB_DEVICE(0x155A, 0x1006) },        /* ELDAT Easywave RX09 */
        { USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */
        { USB_DEVICE(0x166A, 0x0301) }, /* Clipsal 5800PC C-Bus Wireless PC Interface */
        { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
        { USB_DEVICE(0x166A, 0x0304) }, /* Clipsal 5000CT2 C-Bus Black and White Touchscreen */
        { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */
        { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */
        { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */
        { USB_DEVICE(0x16C0, 0x09B0) }, /* Lunatico Seletek */
        { USB_DEVICE(0x16C0, 0x09B1) }, /* Lunatico Seletek */
        { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
        { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
        { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
        { USB_DEVICE(0x16DC, 0x0012) }, /* W-IE-NE-R Plein & Baus GmbH MPOD Multi Channel Power Supply */
        { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
        { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */
        { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */
        { USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */
        { USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */
        { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */
        { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */
        { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
        { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
        { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
        { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
        { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
        { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
        { USB_DEVICE(0x1901, 0x0194) },        /* GE Healthcare Remote Alarm Box */
        { USB_DEVICE(0x1901, 0x0195) },        /* GE B850/B650/B450 CP2104 DP UART interface */
        { USB_DEVICE(0x1901, 0x0196) },        /* GE B850 CP2105 DP UART interface */
        { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */
        { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */
        { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */
        { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
        { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
        { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
        { USB_DEVICE(0x1BA4, 0x0002) },        /* Silicon Labs 358x factory default */
        { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
        { USB_DEVICE(0x1D6F, 0x0010) }, /* Seluxit ApS RF Dongle */
        { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
        { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
        { USB_DEVICE(0x1FB9, 0x0100) }, /* Lake Shore Model 121 Current Source */
        { USB_DEVICE(0x1FB9, 0x0200) }, /* Lake Shore Model 218A Temperature Monitor */
        { USB_DEVICE(0x1FB9, 0x0201) }, /* Lake Shore Model 219 Temperature Monitor */
        { USB_DEVICE(0x1FB9, 0x0202) }, /* Lake Shore Model 233 Temperature Transmitter */
        { USB_DEVICE(0x1FB9, 0x0203) }, /* Lake Shore Model 235 Temperature Transmitter */
        { USB_DEVICE(0x1FB9, 0x0300) }, /* Lake Shore Model 335 Temperature Controller */
        { USB_DEVICE(0x1FB9, 0x0301) }, /* Lake Shore Model 336 Temperature Controller */
        { USB_DEVICE(0x1FB9, 0x0302) }, /* Lake Shore Model 350 Temperature Controller */
        { USB_DEVICE(0x1FB9, 0x0303) }, /* Lake Shore Model 371 AC Bridge */
        { USB_DEVICE(0x1FB9, 0x0400) }, /* Lake Shore Model 411 Handheld Gaussmeter */
        { USB_DEVICE(0x1FB9, 0x0401) }, /* Lake Shore Model 425 Gaussmeter */
        { USB_DEVICE(0x1FB9, 0x0402) }, /* Lake Shore Model 455A Gaussmeter */
        { USB_DEVICE(0x1FB9, 0x0403) }, /* Lake Shore Model 475A Gaussmeter */
        { USB_DEVICE(0x1FB9, 0x0404) }, /* Lake Shore Model 465 Three Axis Gaussmeter */
        { USB_DEVICE(0x1FB9, 0x0600) }, /* Lake Shore Model 625A Superconducting MPS */
        { USB_DEVICE(0x1FB9, 0x0601) }, /* Lake Shore Model 642A Magnet Power Supply */
        { USB_DEVICE(0x1FB9, 0x0602) }, /* Lake Shore Model 648 Magnet Power Supply */
        { USB_DEVICE(0x1FB9, 0x0700) }, /* Lake Shore Model 737 VSM Controller */
        { USB_DEVICE(0x1FB9, 0x0701) }, /* Lake Shore Model 776 Hall Matrix */
        { USB_DEVICE(0x2184, 0x0030) }, /* GW Instek GDM-834x Digital Multimeter */
        { USB_DEVICE(0x2626, 0xEA60) }, /* Aruba Networks 7xxx USB Serial Console */
        { USB_DEVICE(0x3195, 0xF190) }, /* Link Instruments MSO-19 */
        { USB_DEVICE(0x3195, 0xF280) }, /* Link Instruments MSO-28 */
        { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */
        { USB_DEVICE(0x3923, 0x7A0B) }, /* National Instruments USB Serial Console */
        { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
        { } /* Terminating Entry */
};

MODULE_DEVICE_TABLE(usb, id_table);

struct cp210x_serial_private {
#ifdef CONFIG_GPIOLIB
        struct gpio_chip        gc;
        bool                        gpio_registered;
        u16                        gpio_pushpull;
        u16                        gpio_altfunc;
        u16                        gpio_input;
#endif
        u8                        partnum;
        u32                        fw_version;
        speed_t                        min_speed;
        speed_t                        max_speed;
        bool                        use_actual_rate;
        bool                        no_flow_control;
        bool                        no_event_mode;
};

enum cp210x_event_state {
        ES_DATA,
        ES_ESCAPE,
        ES_LSR,
        ES_LSR_DATA_0,
        ES_LSR_DATA_1,
        ES_MSR
};

struct cp210x_port_private {
        u8                        bInterfaceNumber;
        bool                        event_mode;
        enum cp210x_event_state event_state;
        u8                        lsr;

        struct mutex                mutex;
        bool                        crtscts;
        bool                        dtr;
        bool                        rts;
};

static struct usb_serial_driver cp210x_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "cp210x",
        },
        .id_table                = id_table,
        .num_ports                = 1,
        .bulk_in_size                = 256,
        .bulk_out_size                = 256,
        .open                        = cp210x_open,
        .close                        = cp210x_close,
        .break_ctl                = cp210x_break_ctl,
        .set_termios                = cp210x_set_termios,
        .tx_empty                = cp210x_tx_empty,
        .throttle                = usb_serial_generic_throttle,
        .unthrottle                = usb_serial_generic_unthrottle,
        .tiocmget                = cp210x_tiocmget,
        .tiocmset                = cp210x_tiocmset,
        .get_icount                = usb_serial_generic_get_icount,
        .attach                        = cp210x_attach,
        .disconnect                = cp210x_disconnect,
        .release                = cp210x_release,
        .port_probe                = cp210x_port_probe,
        .port_remove                = cp210x_port_remove,
        .dtr_rts                = cp210x_dtr_rts,
        .process_read_urb        = cp210x_process_read_urb,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &cp210x_device, NULL
};

/* Config request types */
#define REQTYPE_HOST_TO_INTERFACE        0x41
#define REQTYPE_INTERFACE_TO_HOST        0xc1
#define REQTYPE_HOST_TO_DEVICE        0x40
#define REQTYPE_DEVICE_TO_HOST        0xc0

/* Config request codes */
#define CP210X_IFC_ENABLE        0x00
#define CP210X_SET_BAUDDIV        0x01
#define CP210X_GET_BAUDDIV        0x02
#define CP210X_SET_LINE_CTL        0x03
#define CP210X_GET_LINE_CTL        0x04
#define CP210X_SET_BREAK        0x05
#define CP210X_IMM_CHAR                0x06
#define CP210X_SET_MHS                0x07
#define CP210X_GET_MDMSTS        0x08
#define CP210X_SET_XON                0x09
#define CP210X_SET_XOFF                0x0A
#define CP210X_SET_EVENTMASK        0x0B
#define CP210X_GET_EVENTMASK        0x0C
#define CP210X_SET_CHAR                0x0D
#define CP210X_GET_CHARS        0x0E
#define CP210X_GET_PROPS        0x0F
#define CP210X_GET_COMM_STATUS        0x10
#define CP210X_RESET                0x11
#define CP210X_PURGE                0x12
#define CP210X_SET_FLOW                0x13
#define CP210X_GET_FLOW                0x14
#define CP210X_EMBED_EVENTS        0x15
#define CP210X_GET_EVENTSTATE        0x16
#define CP210X_SET_CHARS        0x19
#define CP210X_GET_BAUDRATE        0x1D
#define CP210X_SET_BAUDRATE        0x1E
#define CP210X_VENDOR_SPECIFIC        0xFF

/* CP210X_IFC_ENABLE */
#define UART_ENABLE                0x0001
#define UART_DISABLE                0x0000

/* CP210X_(SET|GET)_BAUDDIV */
#define BAUD_RATE_GEN_FREQ        0x384000

/* CP210X_(SET|GET)_LINE_CTL */
#define BITS_DATA_MASK                0X0f00
#define BITS_DATA_5                0X0500
#define BITS_DATA_6                0X0600
#define BITS_DATA_7                0X0700
#define BITS_DATA_8                0X0800
#define BITS_DATA_9                0X0900

#define BITS_PARITY_MASK        0x00f0
#define BITS_PARITY_NONE        0x0000
#define BITS_PARITY_ODD                0x0010
#define BITS_PARITY_EVEN        0x0020
#define BITS_PARITY_MARK        0x0030
#define BITS_PARITY_SPACE        0x0040

#define BITS_STOP_MASK                0x000f
#define BITS_STOP_1                0x0000
#define BITS_STOP_1_5                0x0001
#define BITS_STOP_2                0x0002

/* CP210X_SET_BREAK */
#define BREAK_ON                0x0001
#define BREAK_OFF                0x0000

/* CP210X_(SET_MHS|GET_MDMSTS) */
#define CONTROL_DTR                0x0001
#define CONTROL_RTS                0x0002
#define CONTROL_CTS                0x0010
#define CONTROL_DSR                0x0020
#define CONTROL_RING                0x0040
#define CONTROL_DCD                0x0080
#define CONTROL_WRITE_DTR        0x0100
#define CONTROL_WRITE_RTS        0x0200

/* CP210X_(GET|SET)_CHARS */
struct cp210x_special_chars {
        u8        bEofChar;
        u8        bErrorChar;
        u8        bBreakChar;
        u8        bEventChar;
        u8        bXonChar;
        u8        bXoffChar;
};

/* CP210X_VENDOR_SPECIFIC values */
#define CP210X_GET_FW_VER        0x000E
#define CP210X_READ_2NCONFIG        0x000E
#define CP210X_GET_FW_VER_2N        0x0010
#define CP210X_READ_LATCH        0x00C2
#define CP210X_GET_PARTNUM        0x370B
#define CP210X_GET_PORTCONFIG        0x370C
#define CP210X_GET_DEVICEMODE        0x3711
#define CP210X_WRITE_LATCH        0x37E1

/* Part number definitions */
#define CP210X_PARTNUM_CP2101        0x01
#define CP210X_PARTNUM_CP2102        0x02
#define CP210X_PARTNUM_CP2103        0x03
#define CP210X_PARTNUM_CP2104        0x04
#define CP210X_PARTNUM_CP2105        0x05
#define CP210X_PARTNUM_CP2108        0x08
#define CP210X_PARTNUM_CP2102N_QFN28        0x20
#define CP210X_PARTNUM_CP2102N_QFN24        0x21
#define CP210X_PARTNUM_CP2102N_QFN20        0x22
#define CP210X_PARTNUM_UNKNOWN        0xFF

/* CP210X_GET_COMM_STATUS returns these 0x13 bytes */
struct cp210x_comm_status {
        __le32   ulErrors;
        __le32   ulHoldReasons;
        __le32   ulAmountInInQueue;
        __le32   ulAmountInOutQueue;
        u8       bEofReceived;
        u8       bWaitForImmediate;
        u8       bReserved;
} __packed;

/*
 * CP210X_PURGE - 16 bits passed in wValue of USB request.
 * SiLabs app note AN571 gives a strange description of the 4 bits:
 * bit 0 or bit 2 clears the transmit queue and 1 or 3 receive.
 * writing 1 to all, however, purges cp2108 well enough to avoid the hang.
 */
#define PURGE_ALL                0x000f

/* CP210X_EMBED_EVENTS */
#define CP210X_ESCCHAR                0xec

#define CP210X_LSR_OVERRUN        BIT(1)
#define CP210X_LSR_PARITY        BIT(2)
#define CP210X_LSR_FRAME        BIT(3)
#define CP210X_LSR_BREAK        BIT(4)


/* CP210X_GET_FLOW/CP210X_SET_FLOW read/write these 0x10 bytes */
struct cp210x_flow_ctl {
        __le32        ulControlHandshake;
        __le32        ulFlowReplace;
        __le32        ulXonLimit;
        __le32        ulXoffLimit;
};

/* cp210x_flow_ctl::ulControlHandshake */
#define CP210X_SERIAL_DTR_MASK                GENMASK(1, 0)
#define CP210X_SERIAL_DTR_INACTIVE        (0 << 0)
#define CP210X_SERIAL_DTR_ACTIVE        (1 << 0)
#define CP210X_SERIAL_DTR_FLOW_CTL        (2 << 0)
#define CP210X_SERIAL_CTS_HANDSHAKE        BIT(3)
#define CP210X_SERIAL_DSR_HANDSHAKE        BIT(4)
#define CP210X_SERIAL_DCD_HANDSHAKE        BIT(5)
#define CP210X_SERIAL_DSR_SENSITIVITY        BIT(6)

/* cp210x_flow_ctl::ulFlowReplace */
#define CP210X_SERIAL_AUTO_TRANSMIT        BIT(0)
#define CP210X_SERIAL_AUTO_RECEIVE        BIT(1)
#define CP210X_SERIAL_ERROR_CHAR        BIT(2)
#define CP210X_SERIAL_NULL_STRIPPING        BIT(3)
#define CP210X_SERIAL_BREAK_CHAR        BIT(4)
#define CP210X_SERIAL_RTS_MASK                GENMASK(7, 6)
#define CP210X_SERIAL_RTS_INACTIVE        (0 << 6)
#define CP210X_SERIAL_RTS_ACTIVE        (1 << 6)
#define CP210X_SERIAL_RTS_FLOW_CTL        (2 << 6)
#define CP210X_SERIAL_XOFF_CONTINUE        BIT(31)

/* CP210X_VENDOR_SPECIFIC, CP210X_GET_DEVICEMODE call reads these 0x2 bytes. */
struct cp210x_pin_mode {
        u8        eci;
        u8        sci;
};

#define CP210X_PIN_MODE_MODEM                0
#define CP210X_PIN_MODE_GPIO                BIT(0)

/*
 * CP210X_VENDOR_SPECIFIC, CP210X_GET_PORTCONFIG call reads these 0xf bytes
 * on a CP2105 chip. Structure needs padding due to unused/unspecified bytes.
 */
struct cp210x_dual_port_config {
        __le16        gpio_mode;
        u8        __pad0[2];
        __le16        reset_state;
        u8        __pad1[4];
        __le16        suspend_state;
        u8        sci_cfg;
        u8        eci_cfg;
        u8        device_cfg;
} __packed;

/*
 * CP210X_VENDOR_SPECIFIC, CP210X_GET_PORTCONFIG call reads these 0xd bytes
 * on a CP2104 chip. Structure needs padding due to unused/unspecified bytes.
 */
struct cp210x_single_port_config {
        __le16        gpio_mode;
        u8        __pad0[2];
        __le16        reset_state;
        u8        __pad1[4];
        __le16        suspend_state;
        u8        device_cfg;
} __packed;

/* GPIO modes */
#define CP210X_SCI_GPIO_MODE_OFFSET        9
#define CP210X_SCI_GPIO_MODE_MASK        GENMASK(11, 9)

#define CP210X_ECI_GPIO_MODE_OFFSET        2
#define CP210X_ECI_GPIO_MODE_MASK        GENMASK(3, 2)

#define CP210X_GPIO_MODE_OFFSET                8
#define CP210X_GPIO_MODE_MASK                GENMASK(11, 8)

/* CP2105 port configuration values */
#define CP2105_GPIO0_TXLED_MODE                BIT(0)
#define CP2105_GPIO1_RXLED_MODE                BIT(1)
#define CP2105_GPIO1_RS485_MODE                BIT(2)

/* CP2104 port configuration values */
#define CP2104_GPIO0_TXLED_MODE                BIT(0)
#define CP2104_GPIO1_RXLED_MODE                BIT(1)
#define CP2104_GPIO2_RS485_MODE                BIT(2)

struct cp210x_quad_port_state {
        __le16 gpio_mode_pb0;
        __le16 gpio_mode_pb1;
        __le16 gpio_mode_pb2;
        __le16 gpio_mode_pb3;
        __le16 gpio_mode_pb4;

        __le16 gpio_lowpower_pb0;
        __le16 gpio_lowpower_pb1;
        __le16 gpio_lowpower_pb2;
        __le16 gpio_lowpower_pb3;
        __le16 gpio_lowpower_pb4;

        __le16 gpio_latch_pb0;
        __le16 gpio_latch_pb1;
        __le16 gpio_latch_pb2;
        __le16 gpio_latch_pb3;
        __le16 gpio_latch_pb4;
};

/*
 * CP210X_VENDOR_SPECIFIC, CP210X_GET_PORTCONFIG call reads these 0x49 bytes
 * on a CP2108 chip.
 *
 * See https://www.silabs.com/documents/public/application-notes/an978-cp210x-usb-to-uart-api-specification.pdf
 */
struct cp210x_quad_port_config {
        struct cp210x_quad_port_state reset_state;
        struct cp210x_quad_port_state suspend_state;
        u8 ipdelay_ifc[4];
        u8 enhancedfxn_ifc[4];
        u8 enhancedfxn_device;
        u8 extclkfreq[4];
} __packed;

#define CP2108_EF_IFC_GPIO_TXLED                0x01
#define CP2108_EF_IFC_GPIO_RXLED                0x02
#define CP2108_EF_IFC_GPIO_RS485                0x04
#define CP2108_EF_IFC_GPIO_RS485_LOGIC                0x08
#define CP2108_EF_IFC_GPIO_CLOCK                0x10
#define CP2108_EF_IFC_DYNAMIC_SUSPEND                0x40

/* CP2102N configuration array indices */
#define CP210X_2NCONFIG_CONFIG_VERSION_IDX        2
#define CP210X_2NCONFIG_GPIO_MODE_IDX                581
#define CP210X_2NCONFIG_GPIO_RSTLATCH_IDX        587
#define CP210X_2NCONFIG_GPIO_CONTROL_IDX        600

/* CP2102N QFN20 port configuration values */
#define CP2102N_QFN20_GPIO2_TXLED_MODE                BIT(2)
#define CP2102N_QFN20_GPIO3_RXLED_MODE                BIT(3)
#define CP2102N_QFN20_GPIO1_RS485_MODE                BIT(4)
#define CP2102N_QFN20_GPIO0_CLK_MODE                BIT(6)

/*
 * CP210X_VENDOR_SPECIFIC, CP210X_WRITE_LATCH call writes these 0x02 bytes
 * for CP2102N, CP2103, CP2104 and CP2105.
 */
struct cp210x_gpio_write {
        u8        mask;
        u8        state;
};

/*
 * CP210X_VENDOR_SPECIFIC, CP210X_WRITE_LATCH call writes these 0x04 bytes
 * for CP2108.
 */
struct cp210x_gpio_write16 {
        __le16        mask;
        __le16        state;
};

/*
 * Helper to get interface number when we only have struct usb_serial.
 */
static u8 cp210x_interface_num(struct usb_serial *serial)
{
        struct usb_host_interface *cur_altsetting;

        cur_altsetting = serial->interface->cur_altsetting;

        return cur_altsetting->desc.bInterfaceNumber;
}

/*
 * Reads a variable-sized block of CP210X_ registers, identified by req.
 * Returns data into buf in native USB byte order.
 */
static int cp210x_read_reg_block(struct usb_serial_port *port, u8 req,
                void *buf, int bufsize)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int result;


        result = usb_control_msg_recv(serial->dev, 0, req,
                        REQTYPE_INTERFACE_TO_HOST, 0,
                        port_priv->bInterfaceNumber, buf, bufsize,
                        USB_CTRL_SET_TIMEOUT, GFP_KERNEL);
        if (result) {
                dev_err(&port->dev, "failed get req 0x%x size %d status: %d\n",
                                req, bufsize, result);
                return result;
        }

        return 0;
}

/*
 * Reads any 8-bit CP210X_ register identified by req.
 */
static int cp210x_read_u8_reg(struct usb_serial_port *port, u8 req, u8 *val)
{
        return cp210x_read_reg_block(port, req, val, sizeof(*val));
}

/*
 * Reads a variable-sized vendor block of CP210X_ registers, identified by val.
 * Returns data into buf in native USB byte order.
 */
static int cp210x_read_vendor_block(struct usb_serial *serial, u8 type, u16 val,
                                    void *buf, int bufsize)
{
        int result;

        result = usb_control_msg_recv(serial->dev, 0, CP210X_VENDOR_SPECIFIC,
                        type, val, cp210x_interface_num(serial), buf, bufsize,
                        USB_CTRL_GET_TIMEOUT, GFP_KERNEL);
        if (result) {
                dev_err(&serial->interface->dev,
                        "failed to get vendor val 0x%04x size %d: %d\n", val,
                        bufsize, result);
                return result;
        }

        return 0;
}

/*
 * Writes any 16-bit CP210X_ register (req) whose value is passed
 * entirely in the wValue field of the USB request.
 */
static int cp210x_write_u16_reg(struct usb_serial_port *port, u8 req, u16 val)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int result;

        result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
                        req, REQTYPE_HOST_TO_INTERFACE, val,
                        port_priv->bInterfaceNumber, NULL, 0,
                        USB_CTRL_SET_TIMEOUT);
        if (result < 0) {
                dev_err(&port->dev, "failed set request 0x%x status: %d\n",
                                req, result);
        }

        return result;
}

/*
 * Writes a variable-sized block of CP210X_ registers, identified by req.
 * Data in buf must be in native USB byte order.
 */
static int cp210x_write_reg_block(struct usb_serial_port *port, u8 req,
                void *buf, int bufsize)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int result;

        result = usb_control_msg_send(serial->dev, 0, req,
                        REQTYPE_HOST_TO_INTERFACE, 0,
                        port_priv->bInterfaceNumber, buf, bufsize,
                        USB_CTRL_SET_TIMEOUT, GFP_KERNEL);
        if (result) {
                dev_err(&port->dev, "failed set req 0x%x size %d status: %d\n",
                                req, bufsize, result);
                return result;
        }

        return 0;
}

/*
 * Writes any 32-bit CP210X_ register identified by req.
 */
static int cp210x_write_u32_reg(struct usb_serial_port *port, u8 req, u32 val)
{
        __le32 le32_val;

        le32_val = cpu_to_le32(val);

        return cp210x_write_reg_block(port, req, &le32_val, sizeof(le32_val));
}

#ifdef CONFIG_GPIOLIB
/*
 * Writes a variable-sized vendor block of CP210X_ registers, identified by val.
 * Data in buf must be in native USB byte order.
 */
static int cp210x_write_vendor_block(struct usb_serial *serial, u8 type,
                                     u16 val, void *buf, int bufsize)
{
        int result;

        result = usb_control_msg_send(serial->dev, 0, CP210X_VENDOR_SPECIFIC,
                        type, val, cp210x_interface_num(serial), buf, bufsize,
                        USB_CTRL_SET_TIMEOUT, GFP_KERNEL);
        if (result) {
                dev_err(&serial->interface->dev,
                        "failed to set vendor val 0x%04x size %d: %d\n", val,
                        bufsize, result);
                return result;
        }

        return 0;
}
#endif

static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int result;

        result = cp210x_write_u16_reg(port, CP210X_IFC_ENABLE, UART_ENABLE);
        if (result) {
                dev_err(&port->dev, "%s - Unable to enable UART\n", __func__);
                return result;
        }

        if (tty)
                cp210x_set_termios(tty, port, NULL);

        result = usb_serial_generic_open(tty, port);
        if (result)
                goto err_disable;

        return 0;

err_disable:
        cp210x_write_u16_reg(port, CP210X_IFC_ENABLE, UART_DISABLE);
        port_priv->event_mode = false;

        return result;
}

static void cp210x_close(struct usb_serial_port *port)
{
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);

        usb_serial_generic_close(port);

        /* Clear both queues; cp2108 needs this to avoid an occasional hang */
        cp210x_write_u16_reg(port, CP210X_PURGE, PURGE_ALL);

        cp210x_write_u16_reg(port, CP210X_IFC_ENABLE, UART_DISABLE);

        /* Disabling the interface disables event-insertion mode. */
        port_priv->event_mode = false;
}

static void cp210x_process_lsr(struct usb_serial_port *port, unsigned char lsr, char *flag)
{
        if (lsr & CP210X_LSR_BREAK) {
                port->icount.brk++;
                *flag = TTY_BREAK;
        } else if (lsr & CP210X_LSR_PARITY) {
                port->icount.parity++;
                *flag = TTY_PARITY;
        } else if (lsr & CP210X_LSR_FRAME) {
                port->icount.frame++;
                *flag = TTY_FRAME;
        }

        if (lsr & CP210X_LSR_OVERRUN) {
                port->icount.overrun++;
                tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
        }
}

static bool cp210x_process_char(struct usb_serial_port *port, unsigned char *ch, char *flag)
{
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);

        switch (port_priv->event_state) {
        case ES_DATA:
                if (*ch == CP210X_ESCCHAR) {
                        port_priv->event_state = ES_ESCAPE;
                        break;
                }
                return false;
        case ES_ESCAPE:
                switch (*ch) {
                case 0:
                        dev_dbg(&port->dev, "%s - escape char\n", __func__);
                        *ch = CP210X_ESCCHAR;
                        port_priv->event_state = ES_DATA;
                        return false;
                case 1:
                        port_priv->event_state = ES_LSR_DATA_0;
                        break;
                case 2:
                        port_priv->event_state = ES_LSR;
                        break;
                case 3:
                        port_priv->event_state = ES_MSR;
                        break;
                default:
                        dev_err(&port->dev, "malformed event 0x%02x\n", *ch);
                        port_priv->event_state = ES_DATA;
                        break;
                }
                break;
        case ES_LSR_DATA_0:
                port_priv->lsr = *ch;
                port_priv->event_state = ES_LSR_DATA_1;
                break;
        case ES_LSR_DATA_1:
                dev_dbg(&port->dev, "%s - lsr = 0x%02x, data = 0x%02x\n",
                                __func__, port_priv->lsr, *ch);
                cp210x_process_lsr(port, port_priv->lsr, flag);
                port_priv->event_state = ES_DATA;
                return false;
        case ES_LSR:
                dev_dbg(&port->dev, "%s - lsr = 0x%02x\n", __func__, *ch);
                port_priv->lsr = *ch;
                cp210x_process_lsr(port, port_priv->lsr, flag);
                port_priv->event_state = ES_DATA;
                break;
        case ES_MSR:
                dev_dbg(&port->dev, "%s - msr = 0x%02x\n", __func__, *ch);
                /* unimplemented */
                port_priv->event_state = ES_DATA;
                break;
        }

        return true;
}

static void cp210x_process_read_urb(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        unsigned char *ch = urb->transfer_buffer;
        char flag;
        int i;

        if (!urb->actual_length)
                return;

        if (port_priv->event_mode) {
                for (i = 0; i < urb->actual_length; i++, ch++) {
                        flag = TTY_NORMAL;

                        if (cp210x_process_char(port, ch, &flag))
                                continue;

                        tty_insert_flip_char(&port->port, *ch, flag);
                }
        } else {
                tty_insert_flip_string(&port->port, ch, urb->actual_length);
        }
        tty_flip_buffer_push(&port->port);
}

/*
 * Read how many bytes are waiting in the TX queue.
 */
static int cp210x_get_tx_queue_byte_count(struct usb_serial_port *port,
                u32 *count)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        struct cp210x_comm_status sts;
        int result;

        result = usb_control_msg_recv(serial->dev, 0, CP210X_GET_COMM_STATUS,
                        REQTYPE_INTERFACE_TO_HOST, 0,
                        port_priv->bInterfaceNumber, &sts, sizeof(sts),
                        USB_CTRL_GET_TIMEOUT, GFP_KERNEL);
        if (result) {
                dev_err(&port->dev, "failed to get comm status: %d\n", result);
                return result;
        }

        *count = le32_to_cpu(sts.ulAmountInOutQueue);

        return 0;
}

static bool cp210x_tx_empty(struct usb_serial_port *port)
{
        int err;
        u32 count;

        err = cp210x_get_tx_queue_byte_count(port, &count);
        if (err)
                return true;

        return !count;
}

struct cp210x_rate {
        speed_t rate;
        speed_t high;
};

static const struct cp210x_rate cp210x_an205_table1[] = {
        { 300, 300 },
        { 600, 600 },
        { 1200, 1200 },
        { 1800, 1800 },
        { 2400, 2400 },
        { 4000, 4000 },
        { 4800, 4803 },
        { 7200, 7207 },
        { 9600, 9612 },
        { 14400, 14428 },
        { 16000, 16062 },
        { 19200, 19250 },
        { 28800, 28912 },
        { 38400, 38601 },
        { 51200, 51558 },
        { 56000, 56280 },
        { 57600, 58053 },
        { 64000, 64111 },
        { 76800, 77608 },
        { 115200, 117028 },
        { 128000, 129347 },
        { 153600, 156868 },
        { 230400, 237832 },
        { 250000, 254234 },
        { 256000, 273066 },
        { 460800, 491520 },
        { 500000, 567138 },
        { 576000, 670254 },
        { 921600, UINT_MAX }
};

/*
 * Quantises the baud rate as per AN205 Table 1
 */
static speed_t cp210x_get_an205_rate(speed_t baud)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(cp210x_an205_table1); ++i) {
                if (baud <= cp210x_an205_table1[i].high)
                        break;
        }

        return cp210x_an205_table1[i].rate;
}

static speed_t cp210x_get_actual_rate(speed_t baud)
{
        unsigned int prescale = 1;
        unsigned int div;

        if (baud <= 365)
                prescale = 4;

        div = DIV_ROUND_CLOSEST(48000000, 2 * prescale * baud);
        baud = 48000000 / (2 * prescale * div);

        return baud;
}

/*
 * CP2101 supports the following baud rates:
 *
 *        300, 600, 1200, 1800, 2400, 4800, 7200, 9600, 14400, 19200, 28800,
 *        38400, 56000, 57600, 115200, 128000, 230400, 460800, 921600
 *
 * CP2102 and CP2103 support the following additional rates:
 *
 *        4000, 16000, 51200, 64000, 76800, 153600, 250000, 256000, 500000,
 *        576000
 *
 * The device will map a requested rate to a supported one, but the result
 * of requests for rates greater than 1053257 is undefined (see AN205).
 *
 * CP2104, CP2105 and CP2110 support most rates up to 2M, 921k and 1M baud,
 * respectively, with an error less than 1%. The actual rates are determined
 * by
 *
 *        div = round(freq / (2 x prescale x request))
 *        actual = freq / (2 x prescale x div)
 *
 * For CP2104 and CP2105 freq is 48Mhz and prescale is 4 for request <= 365bps
 * or 1 otherwise.
 * For CP2110 freq is 24Mhz and prescale is 4 for request <= 300bps or 1
 * otherwise.
 */
static void cp210x_change_speed(struct tty_struct *tty,
                                struct usb_serial_port *port,
                                const struct ktermios *old_termios)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        u32 baud;

        if (tty->termios.c_ospeed == 0)
                return;

        /*
         * This maps the requested rate to the actual rate, a valid rate on
         * cp2102 or cp2103, or to an arbitrary rate in [1M, max_speed].
         */
        baud = clamp(tty->termios.c_ospeed, priv->min_speed, priv->max_speed);

        if (priv->use_actual_rate)
                baud = cp210x_get_actual_rate(baud);
        else if (baud < 1000000)
                baud = cp210x_get_an205_rate(baud);

        dev_dbg(&port->dev, "%s - setting baud rate to %u\n", __func__, baud);
        if (cp210x_write_u32_reg(port, CP210X_SET_BAUDRATE, baud)) {
                dev_warn(&port->dev, "failed to set baud rate to %u\n", baud);
                if (old_termios)
                        baud = old_termios->c_ospeed;
                else
                        baud = 9600;
        }

        tty_encode_baud_rate(tty, baud, baud);
}

static void cp210x_enable_event_mode(struct usb_serial_port *port)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(port->serial);
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int ret;

        if (port_priv->event_mode)
                return;

        if (priv->no_event_mode)
                return;

        port_priv->event_state = ES_DATA;
        port_priv->event_mode = true;

        ret = cp210x_write_u16_reg(port, CP210X_EMBED_EVENTS, CP210X_ESCCHAR);
        if (ret) {
                dev_err(&port->dev, "failed to enable events: %d\n", ret);
                port_priv->event_mode = false;
        }
}

static void cp210x_disable_event_mode(struct usb_serial_port *port)
{
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        int ret;

        if (!port_priv->event_mode)
                return;

        ret = cp210x_write_u16_reg(port, CP210X_EMBED_EVENTS, 0);
        if (ret) {
                dev_err(&port->dev, "failed to disable events: %d\n", ret);
                return;
        }

        port_priv->event_mode = false;
}

static bool cp210x_termios_change(const struct ktermios *a, const struct ktermios *b)
{
        bool iflag_change, cc_change;

        iflag_change = ((a->c_iflag ^ b->c_iflag) & (INPCK | IXON | IXOFF));
        cc_change = a->c_cc[VSTART] != b->c_cc[VSTART] ||
                        a->c_cc[VSTOP] != b->c_cc[VSTOP];

        return tty_termios_hw_change(a, b) || iflag_change || cc_change;
}

static void cp210x_set_flow_control(struct tty_struct *tty,
                                    struct usb_serial_port *port,
                                    const struct ktermios *old_termios)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(port->serial);
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        struct cp210x_special_chars chars;
        struct cp210x_flow_ctl flow_ctl;
        u32 flow_repl;
        u32 ctl_hs;
        bool crtscts;
        int ret;

        /*
         * Some CP2102N interpret ulXonLimit as ulFlowReplace (erratum
         * CP2102N_E104). Report back that flow control is not supported.
         */
        if (priv->no_flow_control) {
                tty->termios.c_cflag &= ~CRTSCTS;
                tty->termios.c_iflag &= ~(IXON | IXOFF);
        }

        if (tty->termios.c_ospeed != 0 &&
                        old_termios && old_termios->c_ospeed != 0 &&
                        C_CRTSCTS(tty) == (old_termios->c_cflag & CRTSCTS) &&
                        I_IXON(tty) == (old_termios->c_iflag & IXON) &&
                        I_IXOFF(tty) == (old_termios->c_iflag & IXOFF) &&
                        START_CHAR(tty) == old_termios->c_cc[VSTART] &&
                        STOP_CHAR(tty) == old_termios->c_cc[VSTOP]) {
                return;
        }

        if (I_IXON(tty) || I_IXOFF(tty)) {
                memset(&chars, 0, sizeof(chars));

                chars.bXonChar = START_CHAR(tty);
                chars.bXoffChar = STOP_CHAR(tty);

                ret = cp210x_write_reg_block(port, CP210X_SET_CHARS, &chars,
                                sizeof(chars));
                if (ret) {
                        dev_err(&port->dev, "failed to set special chars: %d\n",
                                        ret);
                }
        }

        mutex_lock(&port_priv->mutex);

        if (tty->termios.c_ospeed == 0) {
                port_priv->dtr = false;
                port_priv->rts = false;
        } else if (old_termios && old_termios->c_ospeed == 0) {
                port_priv->dtr = true;
                port_priv->rts = true;
        }

        ret = cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl,
                        sizeof(flow_ctl));
        if (ret)
                goto out_unlock;

        ctl_hs = le32_to_cpu(flow_ctl.ulControlHandshake);
        flow_repl = le32_to_cpu(flow_ctl.ulFlowReplace);

        ctl_hs &= ~CP210X_SERIAL_DSR_HANDSHAKE;
        ctl_hs &= ~CP210X_SERIAL_DCD_HANDSHAKE;
        ctl_hs &= ~CP210X_SERIAL_DSR_SENSITIVITY;
        ctl_hs &= ~CP210X_SERIAL_DTR_MASK;
        if (port_priv->dtr)
                ctl_hs |= CP210X_SERIAL_DTR_ACTIVE;
        else
                ctl_hs |= CP210X_SERIAL_DTR_INACTIVE;

        flow_repl &= ~CP210X_SERIAL_RTS_MASK;
        if (C_CRTSCTS(tty)) {
                ctl_hs |= CP210X_SERIAL_CTS_HANDSHAKE;
                if (port_priv->rts)
                        flow_repl |= CP210X_SERIAL_RTS_FLOW_CTL;
                else
                        flow_repl |= CP210X_SERIAL_RTS_INACTIVE;
                crtscts = true;
        } else {
                ctl_hs &= ~CP210X_SERIAL_CTS_HANDSHAKE;
                if (port_priv->rts)
                        flow_repl |= CP210X_SERIAL_RTS_ACTIVE;
                else
                        flow_repl |= CP210X_SERIAL_RTS_INACTIVE;
                crtscts = false;
        }

        if (I_IXOFF(tty)) {
                flow_repl |= CP210X_SERIAL_AUTO_RECEIVE;

                flow_ctl.ulXonLimit = cpu_to_le32(128);
                flow_ctl.ulXoffLimit = cpu_to_le32(128);
        } else {
                flow_repl &= ~CP210X_SERIAL_AUTO_RECEIVE;
        }

        if (I_IXON(tty))
                flow_repl |= CP210X_SERIAL_AUTO_TRANSMIT;
        else
                flow_repl &= ~CP210X_SERIAL_AUTO_TRANSMIT;

        dev_dbg(&port->dev, "%s - ctrl = 0x%02x, flow = 0x%02x\n", __func__,
                        ctl_hs, flow_repl);

        flow_ctl.ulControlHandshake = cpu_to_le32(ctl_hs);
        flow_ctl.ulFlowReplace = cpu_to_le32(flow_repl);

        ret = cp210x_write_reg_block(port, CP210X_SET_FLOW, &flow_ctl,
                        sizeof(flow_ctl));
        if (ret)
                goto out_unlock;

        port_priv->crtscts = crtscts;
out_unlock:
        mutex_unlock(&port_priv->mutex);
}

static void cp210x_set_termios(struct tty_struct *tty,
                               struct usb_serial_port *port,
                               const struct ktermios *old_termios)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(port->serial);
        u16 bits;
        int ret;

        if (old_termios && !cp210x_termios_change(&tty->termios, old_termios) &&
                        tty->termios.c_ospeed != 0)
                return;

        if (!old_termios || tty->termios.c_ospeed != old_termios->c_ospeed)
                cp210x_change_speed(tty, port, old_termios);

        /* CP2101 only supports CS8, 1 stop bit and non-stick parity. */
        if (priv->partnum == CP210X_PARTNUM_CP2101) {
                tty->termios.c_cflag &= ~(CSIZE | CSTOPB | CMSPAR);
                tty->termios.c_cflag |= CS8;
        }

        bits = 0;

        switch (C_CSIZE(tty)) {
        case CS5:
                bits |= BITS_DATA_5;
                break;
        case CS6:
                bits |= BITS_DATA_6;
                break;
        case CS7:
                bits |= BITS_DATA_7;
                break;
        case CS8:
        default:
                bits |= BITS_DATA_8;
                break;
        }

        if (C_PARENB(tty)) {
                if (C_CMSPAR(tty)) {
                        if (C_PARODD(tty))
                                bits |= BITS_PARITY_MARK;
                        else
                                bits |= BITS_PARITY_SPACE;
                } else {
                        if (C_PARODD(tty))
                                bits |= BITS_PARITY_ODD;
                        else
                                bits |= BITS_PARITY_EVEN;
                }
        }

        if (C_CSTOPB(tty))
                bits |= BITS_STOP_2;
        else
                bits |= BITS_STOP_1;

        ret = cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits);
        if (ret)
                dev_err(&port->dev, "failed to set line control: %d\n", ret);

        cp210x_set_flow_control(tty, port, old_termios);

        /*
         * Enable event-insertion mode only if input parity checking is
         * enabled for now.
         */
        if (I_INPCK(tty))
                cp210x_enable_event_mode(port);
        else
                cp210x_disable_event_mode(port);
}

static int cp210x_tiocmset(struct tty_struct *tty,
                unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        return cp210x_tiocmset_port(port, set, clear);
}

static int cp210x_tiocmset_port(struct usb_serial_port *port,
                unsigned int set, unsigned int clear)
{
        struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
        struct cp210x_flow_ctl flow_ctl;
        u32 ctl_hs, flow_repl;
        u16 control = 0;
        int ret;

        mutex_lock(&port_priv->mutex);

        if (set & TIOCM_RTS) {
                port_priv->rts = true;
                control |= CONTROL_RTS;
                control |= CONTROL_WRITE_RTS;
        }
        if (set & TIOCM_DTR) {
                port_priv->dtr = true;
                control |= CONTROL_DTR;
                control |= CONTROL_WRITE_DTR;
        }
        if (clear & TIOCM_RTS) {
                port_priv->rts = false;
                control &= ~CONTROL_RTS;
                control |= CONTROL_WRITE_RTS;
        }
        if (clear & TIOCM_DTR) {
                port_priv->dtr = false;
                control &= ~CONTROL_DTR;
                control |= CONTROL_WRITE_DTR;
        }

        /*
         * Use SET_FLOW to set DTR and enable/disable auto-RTS when hardware
         * flow control is enabled.
         */
        if (port_priv->crtscts && control & CONTROL_WRITE_RTS) {
                ret = cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl,
                                sizeof(flow_ctl));
                if (ret)
                        goto out_unlock;

                ctl_hs = le32_to_cpu(flow_ctl.ulControlHandshake);
                flow_repl = le32_to_cpu(flow_ctl.ulFlowReplace);

                ctl_hs &= ~CP210X_SERIAL_DTR_MASK;
                if (port_priv->dtr)
                        ctl_hs |= CP210X_SERIAL_DTR_ACTIVE;
                else
                        ctl_hs |= CP210X_SERIAL_DTR_INACTIVE;

                flow_repl &= ~CP210X_SERIAL_RTS_MASK;
                if (port_priv->rts)
                        flow_repl |= CP210X_SERIAL_RTS_FLOW_CTL;
                else
                        flow_repl |= CP210X_SERIAL_RTS_INACTIVE;

                flow_ctl.ulControlHandshake = cpu_to_le32(ctl_hs);
                flow_ctl.ulFlowReplace = cpu_to_le32(flow_repl);

                dev_dbg(&port->dev, "%s - ctrl = 0x%02x, flow = 0x%02x\n",
                                __func__, ctl_hs, flow_repl);

                ret = cp210x_write_reg_block(port, CP210X_SET_FLOW, &flow_ctl,
                                sizeof(flow_ctl));
        } else {
                dev_dbg(&port->dev, "%s - control = 0x%04x\n", __func__, control);

                ret = cp210x_write_u16_reg(port, CP210X_SET_MHS, control);
        }
out_unlock:
        mutex_unlock(&port_priv->mutex);

        return ret;
}

static void cp210x_dtr_rts(struct usb_serial_port *port, int on)
{
        if (on)
                cp210x_tiocmset_port(port, TIOCM_DTR | TIOCM_RTS, 0);
        else
                cp210x_tiocmset_port(port, 0, TIOCM_DTR | TIOCM_RTS);
}

static int cp210x_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        u8 control;
        int result;

        result = cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control);
        if (result)
                return result;

        result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
                |((control & CONTROL_RTS) ? TIOCM_RTS : 0)
                |((control & CONTROL_CTS) ? TIOCM_CTS : 0)
                |((control & CONTROL_DSR) ? TIOCM_DSR : 0)
                |((control & CONTROL_RING)? TIOCM_RI  : 0)
                |((control & CONTROL_DCD) ? TIOCM_CD  : 0);

        dev_dbg(&port->dev, "%s - control = 0x%02x\n", __func__, control);

        return result;
}

static int cp210x_break_ctl(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;
        struct cp210x_serial_private *priv = usb_get_serial_data(port->serial);
        u16 state;

        if (priv->partnum == CP210X_PARTNUM_CP2105) {
                if (cp210x_interface_num(port->serial) == 1)
                        return -ENOTTY;
        }

        if (break_state == 0)
                state = BREAK_OFF;
        else
                state = BREAK_ON;

        dev_dbg(&port->dev, "%s - turning break %s\n", __func__,
                state == BREAK_OFF ? "off" : "on");

        return cp210x_write_u16_reg(port, CP210X_SET_BREAK, state);
}

#ifdef CONFIG_GPIOLIB
static int cp210x_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        u8 req_type;
        u16 mask;
        int result;
        int len;

        result = usb_autopm_get_interface(serial->interface);
        if (result)
                return result;

        switch (priv->partnum) {
        case CP210X_PARTNUM_CP2105:
                req_type = REQTYPE_INTERFACE_TO_HOST;
                len = 1;
                break;
        case CP210X_PARTNUM_CP2108:
                req_type = REQTYPE_INTERFACE_TO_HOST;
                len = 2;
                break;
        default:
                req_type = REQTYPE_DEVICE_TO_HOST;
                len = 1;
                break;
        }

        mask = 0;
        result = cp210x_read_vendor_block(serial, req_type, CP210X_READ_LATCH,
                                          &mask, len);

        usb_autopm_put_interface(serial->interface);

        if (result < 0)
                return result;

        le16_to_cpus(&mask);

        return !!(mask & BIT(gpio));
}

static void cp210x_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        struct cp210x_gpio_write16 buf16;
        struct cp210x_gpio_write buf;
        u16 mask, state;
        u16 wIndex;
        int result;

        if (value == 1)
                state = BIT(gpio);
        else
                state = 0;

        mask = BIT(gpio);

        result = usb_autopm_get_interface(serial->interface);
        if (result)
                goto out;

        switch (priv->partnum) {
        case CP210X_PARTNUM_CP2105:
                buf.mask = (u8)mask;
                buf.state = (u8)state;
                result = cp210x_write_vendor_block(serial,
                                                   REQTYPE_HOST_TO_INTERFACE,
                                                   CP210X_WRITE_LATCH, &buf,
                                                   sizeof(buf));
                break;
        case CP210X_PARTNUM_CP2108:
                buf16.mask = cpu_to_le16(mask);
                buf16.state = cpu_to_le16(state);
                result = cp210x_write_vendor_block(serial,
                                                   REQTYPE_HOST_TO_INTERFACE,
                                                   CP210X_WRITE_LATCH, &buf16,
                                                   sizeof(buf16));
                break;
        default:
                wIndex = state << 8 | mask;
                result = usb_control_msg(serial->dev,
                                         usb_sndctrlpipe(serial->dev, 0),
                                         CP210X_VENDOR_SPECIFIC,
                                         REQTYPE_HOST_TO_DEVICE,
                                         CP210X_WRITE_LATCH,
                                         wIndex,
                                         NULL, 0, USB_CTRL_SET_TIMEOUT);
                break;
        }

        usb_autopm_put_interface(serial->interface);
out:
        if (result < 0) {
                dev_err(&serial->interface->dev, "failed to set GPIO value: %d\n",
                                result);
        }
}

static int cp210x_gpio_direction_get(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);

        return priv->gpio_input & BIT(gpio);
}

static int cp210x_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);

        if (priv->partnum == CP210X_PARTNUM_CP2105) {
                /* hardware does not support an input mode */
                return -ENOTSUPP;
        }

        /* push-pull pins cannot be changed to be inputs */
        if (priv->gpio_pushpull & BIT(gpio))
                return -EINVAL;

        /* make sure to release pin if it is being driven low */
        cp210x_gpio_set(gc, gpio, 1);

        priv->gpio_input |= BIT(gpio);

        return 0;
}

static int cp210x_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio,
                                        int value)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);

        priv->gpio_input &= ~BIT(gpio);
        cp210x_gpio_set(gc, gpio, value);

        return 0;
}

static int cp210x_gpio_set_config(struct gpio_chip *gc, unsigned int gpio,
                                  unsigned long config)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        enum pin_config_param param = pinconf_to_config_param(config);

        /* Succeed only if in correct mode (this can't be set at runtime) */
        if ((param == PIN_CONFIG_DRIVE_PUSH_PULL) &&
            (priv->gpio_pushpull & BIT(gpio)))
                return 0;

        if ((param == PIN_CONFIG_DRIVE_OPEN_DRAIN) &&
            !(priv->gpio_pushpull & BIT(gpio)))
                return 0;

        return -ENOTSUPP;
}

static int cp210x_gpio_init_valid_mask(struct gpio_chip *gc,
                unsigned long *valid_mask, unsigned int ngpios)
{
        struct usb_serial *serial = gpiochip_get_data(gc);
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        struct device *dev = &serial->interface->dev;
        unsigned long altfunc_mask = priv->gpio_altfunc;

        bitmap_complement(valid_mask, &altfunc_mask, ngpios);

        if (bitmap_empty(valid_mask, ngpios))
                dev_dbg(dev, "no pin configured for GPIO\n");
        else
                dev_dbg(dev, "GPIO.%*pbl configured for GPIO\n", ngpios,
                                valid_mask);
        return 0;
}

/*
 * This function is for configuring GPIO using shared pins, where other signals
 * are made unavailable by configuring the use of GPIO. This is believed to be
 * only applicable to the cp2105 at this point, the other devices supported by
 * this driver that provide GPIO do so in a way that does not impact other
 * signals and are thus expected to have very different initialisation.
 */
static int cp2105_gpioconf_init(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        struct cp210x_pin_mode mode;
        struct cp210x_dual_port_config config;
        u8 intf_num = cp210x_interface_num(serial);
        u8 iface_config;
        int result;

        result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
                                          CP210X_GET_DEVICEMODE, &mode,
                                          sizeof(mode));
        if (result < 0)
                return result;

        result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
                                          CP210X_GET_PORTCONFIG, &config,
                                          sizeof(config));
        if (result < 0)
                return result;

        /*  2 banks of GPIO - One for the pins taken from each serial port */
        if (intf_num == 0) {
                priv->gc.ngpio = 2;

                if (mode.eci == CP210X_PIN_MODE_MODEM) {
                        /* mark all GPIOs of this interface as reserved */
                        priv->gpio_altfunc = 0xff;
                        return 0;
                }

                iface_config = config.eci_cfg;
                priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) &
                                                CP210X_ECI_GPIO_MODE_MASK) >>
                                                CP210X_ECI_GPIO_MODE_OFFSET);
        } else if (intf_num == 1) {
                priv->gc.ngpio = 3;

                if (mode.sci == CP210X_PIN_MODE_MODEM) {
                        /* mark all GPIOs of this interface as reserved */
                        priv->gpio_altfunc = 0xff;
                        return 0;
                }

                iface_config = config.sci_cfg;
                priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) &
                                                CP210X_SCI_GPIO_MODE_MASK) >>
                                                CP210X_SCI_GPIO_MODE_OFFSET);
        } else {
                return -ENODEV;
        }

        /* mark all pins which are not in GPIO mode */
        if (iface_config & CP2105_GPIO0_TXLED_MODE)        /* GPIO 0 */
                priv->gpio_altfunc |= BIT(0);
        if (iface_config & (CP2105_GPIO1_RXLED_MODE |        /* GPIO 1 */
                        CP2105_GPIO1_RS485_MODE))
                priv->gpio_altfunc |= BIT(1);

        /* driver implementation for CP2105 only supports outputs */
        priv->gpio_input = 0;

        return 0;
}

static int cp2104_gpioconf_init(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        struct cp210x_single_port_config config;
        u8 iface_config;
        u8 gpio_latch;
        int result;
        u8 i;

        result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
                                          CP210X_GET_PORTCONFIG, &config,
                                          sizeof(config));
        if (result < 0)
                return result;

        priv->gc.ngpio = 4;

        iface_config = config.device_cfg;
        priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) &
                                        CP210X_GPIO_MODE_MASK) >>
                                        CP210X_GPIO_MODE_OFFSET);
        gpio_latch = (u8)((le16_to_cpu(config.reset_state) &
                                        CP210X_GPIO_MODE_MASK) >>
                                        CP210X_GPIO_MODE_OFFSET);

        /* mark all pins which are not in GPIO mode */
        if (iface_config & CP2104_GPIO0_TXLED_MODE)        /* GPIO 0 */
                priv->gpio_altfunc |= BIT(0);
        if (iface_config & CP2104_GPIO1_RXLED_MODE)        /* GPIO 1 */
                priv->gpio_altfunc |= BIT(1);
        if (iface_config & CP2104_GPIO2_RS485_MODE)        /* GPIO 2 */
                priv->gpio_altfunc |= BIT(2);

        /*
         * Like CP2102N, CP2104 has also no strict input and output pin
         * modes.
         * Do the same input mode emulation as CP2102N.
         */
        for (i = 0; i < priv->gc.ngpio; ++i) {
                /*
                 * Set direction to "input" iff pin is open-drain and reset
                 * value is 1.
                 */
                if (!(priv->gpio_pushpull & BIT(i)) && (gpio_latch & BIT(i)))
                        priv->gpio_input |= BIT(i);
        }

        return 0;
}

static int cp2108_gpio_init(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        struct cp210x_quad_port_config config;
        u16 gpio_latch;
        int result;
        u8 i;

        result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
                                          CP210X_GET_PORTCONFIG, &config,
                                          sizeof(config));
        if (result < 0)
                return result;

        priv->gc.ngpio = 16;
        priv->gpio_pushpull = le16_to_cpu(config.reset_state.gpio_mode_pb1);
        gpio_latch = le16_to_cpu(config.reset_state.gpio_latch_pb1);

        /*
         * Mark all pins which are not in GPIO mode.
         *
         * Refer to table 9.1 "GPIO Mode alternate Functions" in the datasheet:
         * https://www.silabs.com/documents/public/data-sheets/cp2108-datasheet.pdf
         *
         * Alternate functions of GPIO0 to GPIO3 are determine by enhancedfxn_ifc[0]
         * and the similarly for the other pins; enhancedfxn_ifc[1]: GPIO4 to GPIO7,
         * enhancedfxn_ifc[2]: GPIO8 to GPIO11, enhancedfxn_ifc[3]: GPIO12 to GPIO15.
         */
        for (i = 0; i < 4; i++) {
                if (config.enhancedfxn_ifc[i] & CP2108_EF_IFC_GPIO_TXLED)
                        priv->gpio_altfunc |= BIT(i * 4);
                if (config.enhancedfxn_ifc[i] & CP2108_EF_IFC_GPIO_RXLED)
                        priv->gpio_altfunc |= BIT((i * 4) + 1);
                if (config.enhancedfxn_ifc[i] & CP2108_EF_IFC_GPIO_RS485)
                        priv->gpio_altfunc |= BIT((i * 4) + 2);
                if (config.enhancedfxn_ifc[i] & CP2108_EF_IFC_GPIO_CLOCK)
                        priv->gpio_altfunc |= BIT((i * 4) + 3);
        }

        /*
         * Like CP2102N, CP2108 has also no strict input and output pin
         * modes. Do the same input mode emulation as CP2102N.
         */
        for (i = 0; i < priv->gc.ngpio; ++i) {
                /*
                 * Set direction to "input" iff pin is open-drain and reset
                 * value is 1.
                 */
                if (!(priv->gpio_pushpull & BIT(i)) && (gpio_latch & BIT(i)))
                        priv->gpio_input |= BIT(i);
        }

        return 0;
}

static int cp2102n_gpioconf_init(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        const u16 config_size = 0x02a6;
        u8 gpio_rst_latch;
        u8 config_version;
        u8 gpio_pushpull;
        u8 *config_buf;
        u8 gpio_latch;
        u8 gpio_ctrl;
        int result;
        u8 i;

        /*
         * Retrieve device configuration from the device.
         * The array received contains all customization settings done at the
         * factory/manufacturer. Format of the array is documented at the
         * time of writing at:
         * https://www.silabs.com/community/interface/knowledge-base.entry.html/2017/03/31/cp2102n_setconfig-xsfa
         */
        config_buf = kmalloc(config_size, GFP_KERNEL);
        if (!config_buf)
                return -ENOMEM;

        result = cp210x_read_vendor_block(serial,
                                          REQTYPE_DEVICE_TO_HOST,
                                          CP210X_READ_2NCONFIG,
                                          config_buf,
                                          config_size);
        if (result < 0) {
                kfree(config_buf);
                return result;
        }

        config_version = config_buf[CP210X_2NCONFIG_CONFIG_VERSION_IDX];
        gpio_pushpull = config_buf[CP210X_2NCONFIG_GPIO_MODE_IDX];
        gpio_ctrl = config_buf[CP210X_2NCONFIG_GPIO_CONTROL_IDX];
        gpio_rst_latch = config_buf[CP210X_2NCONFIG_GPIO_RSTLATCH_IDX];

        kfree(config_buf);

        /* Make sure this is a config format we understand. */
        if (config_version != 0x01)
                return -ENOTSUPP;

        priv->gc.ngpio = 4;

        /*
         * Get default pin states after reset. Needed so we can determine
         * the direction of an open-drain pin.
         */
        gpio_latch = (gpio_rst_latch >> 3) & 0x0f;

        /* 0 indicates open-drain mode, 1 is push-pull */
        priv->gpio_pushpull = (gpio_pushpull >> 3) & 0x0f;

        /* 0 indicates GPIO mode, 1 is alternate function */
        if (priv->partnum == CP210X_PARTNUM_CP2102N_QFN20) {
                /* QFN20 is special... */
                if (gpio_ctrl & CP2102N_QFN20_GPIO0_CLK_MODE)   /* GPIO 0 */
                        priv->gpio_altfunc |= BIT(0);
                if (gpio_ctrl & CP2102N_QFN20_GPIO1_RS485_MODE) /* GPIO 1 */
                        priv->gpio_altfunc |= BIT(1);
                if (gpio_ctrl & CP2102N_QFN20_GPIO2_TXLED_MODE) /* GPIO 2 */
                        priv->gpio_altfunc |= BIT(2);
                if (gpio_ctrl & CP2102N_QFN20_GPIO3_RXLED_MODE) /* GPIO 3 */
                        priv->gpio_altfunc |= BIT(3);
        } else {
                priv->gpio_altfunc = (gpio_ctrl >> 2) & 0x0f;
        }

        if (priv->partnum == CP210X_PARTNUM_CP2102N_QFN28) {
                /*
                 * For the QFN28 package, GPIO4-6 are controlled by
                 * the low three bits of the mode/latch fields.
                 * Contrary to the document linked above, the bits for
                 * the SUSPEND pins are elsewhere.  No alternate
                 * function is available for these pins.
                 */
                priv->gc.ngpio = 7;
                gpio_latch |= (gpio_rst_latch & 7) << 4;
                priv->gpio_pushpull |= (gpio_pushpull & 7) << 4;
        }

        /*
         * The CP2102N does not strictly has input and output pin modes,
         * it only knows open-drain and push-pull modes which is set at
         * factory. An open-drain pin can function both as an
         * input or an output. We emulate input mode for open-drain pins
         * by making sure they are not driven low, and we do not allow
         * push-pull pins to be set as an input.
         */
        for (i = 0; i < priv->gc.ngpio; ++i) {
                /*
                 * Set direction to "input" iff pin is open-drain and reset
                 * value is 1.
                 */
                if (!(priv->gpio_pushpull & BIT(i)) && (gpio_latch & BIT(i)))
                        priv->gpio_input |= BIT(i);
        }

        return 0;
}

static int cp210x_gpio_init(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        int result;

        switch (priv->partnum) {
        case CP210X_PARTNUM_CP2104:
                result = cp2104_gpioconf_init(serial);
                break;
        case CP210X_PARTNUM_CP2105:
                result = cp2105_gpioconf_init(serial);
                break;
        case CP210X_PARTNUM_CP2108:
                /*
                 * The GPIOs are not tied to any specific port so only register
                 * once for interface 0.
                 */
                if (cp210x_interface_num(serial) != 0)
                        return 0;
                result = cp2108_gpio_init(serial);
                break;
        case CP210X_PARTNUM_CP2102N_QFN28:
        case CP210X_PARTNUM_CP2102N_QFN24:
        case CP210X_PARTNUM_CP2102N_QFN20:
                result = cp2102n_gpioconf_init(serial);
                break;
        default:
                return 0;
        }

        if (result < 0)
                return result;

        priv->gc.label = "cp210x";
        priv->gc.get_direction = cp210x_gpio_direction_get;
        priv->gc.direction_input = cp210x_gpio_direction_input;
        priv->gc.direction_output = cp210x_gpio_direction_output;
        priv->gc.get = cp210x_gpio_get;
        priv->gc.set = cp210x_gpio_set;
        priv->gc.set_config = cp210x_gpio_set_config;
        priv->gc.init_valid_mask = cp210x_gpio_init_valid_mask;
        priv->gc.owner = THIS_MODULE;
        priv->gc.parent = &serial->interface->dev;
        priv->gc.base = -1;
        priv->gc.can_sleep = true;

        result = gpiochip_add_data(&priv->gc, serial);
        if (!result)
                priv->gpio_registered = true;

        return result;
}

static void cp210x_gpio_remove(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);

        if (priv->gpio_registered) {
                gpiochip_remove(&priv->gc);
                priv->gpio_registered = false;
        }
}

#else

static int cp210x_gpio_init(struct usb_serial *serial)
{
        return 0;
}

static void cp210x_gpio_remove(struct usb_serial *serial)
{
        /* Nothing to do */
}

#endif

static int cp210x_port_probe(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct cp210x_port_private *port_priv;

        port_priv = kzalloc(sizeof(*port_priv), GFP_KERNEL);
        if (!port_priv)
                return -ENOMEM;

        port_priv->bInterfaceNumber = cp210x_interface_num(serial);
        mutex_init(&port_priv->mutex);

        usb_set_serial_port_data(port, port_priv);

        return 0;
}

static void cp210x_port_remove(struct usb_serial_port *port)
{
        struct cp210x_port_private *port_priv;

        port_priv = usb_get_serial_port_data(port);
        kfree(port_priv);
}

static void cp210x_init_max_speed(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        bool use_actual_rate = false;
        speed_t min = 300;
        speed_t max;

        switch (priv->partnum) {
        case CP210X_PARTNUM_CP2101:
                max = 921600;
                break;
        case CP210X_PARTNUM_CP2102:
        case CP210X_PARTNUM_CP2103:
                max = 1000000;
                break;
        case CP210X_PARTNUM_CP2104:
                use_actual_rate = true;
                max = 2000000;
                break;
        case CP210X_PARTNUM_CP2108:
                max = 2000000;
                break;
        case CP210X_PARTNUM_CP2105:
                if (cp210x_interface_num(serial) == 0) {
                        use_actual_rate = true;
                        max = 2000000;        /* ECI */
                } else {
                        min = 2400;
                        max = 921600;        /* SCI */
                }
                break;
        case CP210X_PARTNUM_CP2102N_QFN28:
        case CP210X_PARTNUM_CP2102N_QFN24:
        case CP210X_PARTNUM_CP2102N_QFN20:
                use_actual_rate = true;
                max = 3000000;
                break;
        default:
                max = 2000000;
                break;
        }

        priv->min_speed = min;
        priv->max_speed = max;
        priv->use_actual_rate = use_actual_rate;
}

static void cp2102_determine_quirks(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        u8 *buf;
        int ret;

        buf = kmalloc(2, GFP_KERNEL);
        if (!buf)
                return;
        /*
         * Some (possibly counterfeit) CP2102 do not support event-insertion
         * mode and respond differently to malformed vendor requests.
         * Specifically, they return one instead of two bytes when sent a
         * two-byte part-number request.
         */
        ret = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
                        CP210X_VENDOR_SPECIFIC, REQTYPE_DEVICE_TO_HOST,
                        CP210X_GET_PARTNUM, 0, buf, 2, USB_CTRL_GET_TIMEOUT);
        if (ret == 1) {
                dev_dbg(&serial->interface->dev,
                                "device does not support event-insertion mode\n");
                priv->no_event_mode = true;
        }

        kfree(buf);
}

static int cp210x_get_fw_version(struct usb_serial *serial, u16 value)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        u8 ver[3];
        int ret;

        ret = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST, value,
                        ver, sizeof(ver));
        if (ret)
                return ret;

        dev_dbg(&serial->interface->dev, "%s - %d.%d.%d\n", __func__,
                        ver[0], ver[1], ver[2]);

        priv->fw_version = ver[0] << 16 | ver[1] << 8 | ver[2];

        return 0;
}

static void cp210x_determine_type(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);
        int ret;

        ret = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST,
                        CP210X_GET_PARTNUM, &priv->partnum,
                        sizeof(priv->partnum));
        if (ret < 0) {
                dev_warn(&serial->interface->dev,
                                "querying part number failed\n");
                priv->partnum = CP210X_PARTNUM_UNKNOWN;
                return;
        }

        dev_dbg(&serial->interface->dev, "partnum = 0x%02x\n", priv->partnum);

        switch (priv->partnum) {
        case CP210X_PARTNUM_CP2102:
                cp2102_determine_quirks(serial);
                break;
        case CP210X_PARTNUM_CP2105:
        case CP210X_PARTNUM_CP2108:
                cp210x_get_fw_version(serial, CP210X_GET_FW_VER);
                break;
        case CP210X_PARTNUM_CP2102N_QFN28:
        case CP210X_PARTNUM_CP2102N_QFN24:
        case CP210X_PARTNUM_CP2102N_QFN20:
                ret = cp210x_get_fw_version(serial, CP210X_GET_FW_VER_2N);
                if (ret)
                        break;
                if (priv->fw_version <= 0x10004)
                        priv->no_flow_control = true;
                break;
        default:
                break;
        }
}

static int cp210x_attach(struct usb_serial *serial)
{
        int result;
        struct cp210x_serial_private *priv;

        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        usb_set_serial_data(serial, priv);

        cp210x_determine_type(serial);
        cp210x_init_max_speed(serial);

        result = cp210x_gpio_init(serial);
        if (result < 0) {
                dev_err(&serial->interface->dev, "GPIO initialisation failed: %d\n",
                                result);
        }

        return 0;
}

static void cp210x_disconnect(struct usb_serial *serial)
{
        cp210x_gpio_remove(serial);
}

static void cp210x_release(struct usb_serial *serial)
{
        struct cp210x_serial_private *priv = usb_get_serial_data(serial);

        cp210x_gpio_remove(serial);

        kfree(priv);
}

module_usb_serial_driver(serial_drivers, id_table);

MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL v2");
















































































































































































































































   26 



   26 

   26 
   26 






























































  252 


  252 





  251 



  252 
  251 

  251 

   33 

  252 


  250 


   26 


























































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
// SPDX-License-Identifier: GPL-2.0
/*
 * This file contains functions which manage clock event devices.
 *
 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
 */

#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/device.h>

#include "tick-internal.h"

/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
/* Protection for the above */
static DEFINE_RAW_SPINLOCK(clockevents_lock);
/* Protection for unbind operations */
static DEFINE_MUTEX(clockevents_mutex);

struct ce_unbind {
        struct clock_event_device *ce;
        int res;
};

static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
                        bool ismax)
{
        u64 clc = (u64) latch << evt->shift;
        u64 rnd;

        if (WARN_ON(!evt->mult))
                evt->mult = 1;
        rnd = (u64) evt->mult - 1;

        /*
         * Upper bound sanity check. If the backwards conversion is
         * not equal latch, we know that the above shift overflowed.
         */
        if ((clc >> evt->shift) != (u64)latch)
                clc = ~0ULL;

        /*
         * Scaled math oddities:
         *
         * For mult <= (1 << shift) we can safely add mult - 1 to
         * prevent integer rounding loss. So the backwards conversion
         * from nsec to device ticks will be correct.
         *
         * For mult > (1 << shift), i.e. device frequency is > 1GHz we
         * need to be careful. Adding mult - 1 will result in a value
         * which when converted back to device ticks can be larger
         * than latch by up to (mult - 1) >> shift. For the min_delta
         * calculation we still want to apply this in order to stay
         * above the minimum device ticks limit. For the upper limit
         * we would end up with a latch value larger than the upper
         * limit of the device, so we omit the add to stay below the
         * device upper boundary.
         *
         * Also omit the add if it would overflow the u64 boundary.
         */
        if ((~0ULL - clc > rnd) &&
            (!ismax || evt->mult <= (1ULL << evt->shift)))
                clc += rnd;

        do_div(clc, evt->mult);

        /* Deltas less than 1usec are pointless noise */
        return clc > 1000 ? clc : 1000;
}

/**
 * clockevent_delta2ns - Convert a latch value (device ticks) to nanoseconds
 * @latch:        value to convert
 * @evt:        pointer to clock event device descriptor
 *
 * Math helper, returns latch value converted to nanoseconds (bound checked)
 */
u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
{
        return cev_delta2ns(latch, evt, false);
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);

static int __clockevents_switch_state(struct clock_event_device *dev,
                                      enum clock_event_state state)
{
        if (dev->features & CLOCK_EVT_FEAT_DUMMY)
                return 0;

        /* Transition with new state-specific callbacks */
        switch (state) {
        case CLOCK_EVT_STATE_DETACHED:
                /* The clockevent device is getting replaced. Shut it down. */

        case CLOCK_EVT_STATE_SHUTDOWN:
                if (dev->set_state_shutdown)
                        return dev->set_state_shutdown(dev);
                return 0;

        case CLOCK_EVT_STATE_PERIODIC:
                /* Core internal bug */
                if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
                        return -ENOSYS;
                if (dev->set_state_periodic)
                        return dev->set_state_periodic(dev);
                return 0;

        case CLOCK_EVT_STATE_ONESHOT:
                /* Core internal bug */
                if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
                        return -ENOSYS;
                if (dev->set_state_oneshot)
                        return dev->set_state_oneshot(dev);
                return 0;

        case CLOCK_EVT_STATE_ONESHOT_STOPPED:
                /* Core internal bug */
                if (WARN_ONCE(!clockevent_state_oneshot(dev),
                              "Current state: %d\n",
                              clockevent_get_state(dev)))
                        return -EINVAL;

                if (dev->set_state_oneshot_stopped)
                        return dev->set_state_oneshot_stopped(dev);
                else
                        return -ENOSYS;

        default:
                return -ENOSYS;
        }
}

/**
 * clockevents_switch_state - set the operating state of a clock event device
 * @dev:        device to modify
 * @state:        new state
 *
 * Must be called with interrupts disabled !
 */
void clockevents_switch_state(struct clock_event_device *dev,
                              enum clock_event_state state)
{
        if (clockevent_get_state(dev) != state) {
                if (__clockevents_switch_state(dev, state))
                        return;

                clockevent_set_state(dev, state);

                /*
                 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
                 * on it, so fix it up and emit a warning:
                 */
                if (clockevent_state_oneshot(dev)) {
                        if (WARN_ON(!dev->mult))
                                dev->mult = 1;
                }
        }
}

/**
 * clockevents_shutdown - shutdown the device and clear next_event
 * @dev:        device to shutdown
 */
void clockevents_shutdown(struct clock_event_device *dev)
{
        clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
        dev->next_event = KTIME_MAX;
}

/**
 * clockevents_tick_resume -        Resume the tick device before using it again
 * @dev:                        device to resume
 */
int clockevents_tick_resume(struct clock_event_device *dev)
{
        int ret = 0;

        if (dev->tick_resume)
                ret = dev->tick_resume(dev);

        return ret;
}

#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST

/* Limit min_delta to a jiffie */
#define MIN_DELTA_LIMIT                (NSEC_PER_SEC / HZ)

/**
 * clockevents_increase_min_delta - raise minimum delta of a clock event device
 * @dev:       device to increase the minimum delta
 *
 * Returns 0 on success, -ETIME when the minimum delta reached the limit.
 */
static int clockevents_increase_min_delta(struct clock_event_device *dev)
{
        /* Nothing to do if we already reached the limit */
        if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
                printk_deferred(KERN_WARNING
                                "CE: Reprogramming failure. Giving up\n");
                dev->next_event = KTIME_MAX;
                return -ETIME;
        }

        if (dev->min_delta_ns < 5000)
                dev->min_delta_ns = 5000;
        else
                dev->min_delta_ns += dev->min_delta_ns >> 1;

        if (dev->min_delta_ns > MIN_DELTA_LIMIT)
                dev->min_delta_ns = MIN_DELTA_LIMIT;

        printk_deferred(KERN_WARNING
                        "CE: %s increased min_delta_ns to %llu nsec\n",
                        dev->name ? dev->name : "?",
                        (unsigned long long) dev->min_delta_ns);
        return 0;
}

/**
 * clockevents_program_min_delta - Set clock event device to the minimum delay.
 * @dev:        device to program
 *
 * Returns 0 on success, -ETIME when the retry loop failed.
 */
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
        unsigned long long clc;
        int64_t delta;
        int i;

        for (i = 0;;) {
                delta = dev->min_delta_ns;
                dev->next_event = ktime_add_ns(ktime_get(), delta);

                if (clockevent_state_shutdown(dev))
                        return 0;

                dev->retries++;
                clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
                if (dev->set_next_event((unsigned long) clc, dev) == 0)
                        return 0;

                if (++i > 2) {
                        /*
                         * We tried 3 times to program the device with the
                         * given min_delta_ns. Try to increase the minimum
                         * delta, if that fails as well get out of here.
                         */
                        if (clockevents_increase_min_delta(dev))
                                return -ETIME;
                        i = 0;
                }
        }
}

#else  /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */

/**
 * clockevents_program_min_delta - Set clock event device to the minimum delay.
 * @dev:        device to program
 *
 * Returns 0 on success, -ETIME when the retry loop failed.
 */
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
        unsigned long long clc;
        int64_t delta = 0;
        int i;

        for (i = 0; i < 10; i++) {
                delta += dev->min_delta_ns;
                dev->next_event = ktime_add_ns(ktime_get(), delta);

                if (clockevent_state_shutdown(dev))
                        return 0;

                dev->retries++;
                clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
                if (dev->set_next_event((unsigned long) clc, dev) == 0)
                        return 0;
        }
        return -ETIME;
}

#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */

/**
 * clockevents_program_event - Reprogram the clock event device.
 * @dev:        device to program
 * @expires:        absolute expiry time (monotonic clock)
 * @force:        program minimum delay if expires can not be set
 *
 * Returns 0 on success, -ETIME when the event is in the past.
 */
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
                              bool force)
{
        unsigned long long clc;
        int64_t delta;
        int rc;

        if (WARN_ON_ONCE(expires < 0))
                return -ETIME;

        dev->next_event = expires;

        if (clockevent_state_shutdown(dev))
                return 0;

        /* We must be in ONESHOT state here */
        WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
                  clockevent_get_state(dev));

        /* Shortcut for clockevent devices that can deal with ktime. */
        if (dev->features & CLOCK_EVT_FEAT_KTIME)
                return dev->set_next_ktime(expires, dev);

        delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
        if (delta <= 0)
                return force ? clockevents_program_min_delta(dev) : -ETIME;

        delta = min(delta, (int64_t) dev->max_delta_ns);
        delta = max(delta, (int64_t) dev->min_delta_ns);

        clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
        rc = dev->set_next_event((unsigned long) clc, dev);

        return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}

/*
 * Called after a notify add to make devices available which were
 * released from the notifier call.
 */
static void clockevents_notify_released(void)
{
        struct clock_event_device *dev;

        while (!list_empty(&clockevents_released)) {
                dev = list_entry(clockevents_released.next,
                                 struct clock_event_device, list);
                list_move(&dev->list, &clockevent_devices);
                tick_check_new_device(dev);
        }
}

/*
 * Try to install a replacement clock event device
 */
static int clockevents_replace(struct clock_event_device *ced)
{
        struct clock_event_device *dev, *newdev = NULL;

        list_for_each_entry(dev, &clockevent_devices, list) {
                if (dev == ced || !clockevent_state_detached(dev))
                        continue;

                if (!tick_check_replacement(newdev, dev))
                        continue;

                if (!try_module_get(dev->owner))
                        continue;

                if (newdev)
                        module_put(newdev->owner);
                newdev = dev;
        }
        if (newdev) {
                tick_install_replacement(newdev);
                list_del_init(&ced->list);
        }
        return newdev ? 0 : -EBUSY;
}

/*
 * Called with clockevents_mutex and clockevents_lock held
 */
static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
{
        /* Fast track. Device is unused */
        if (clockevent_state_detached(ced)) {
                list_del_init(&ced->list);
                return 0;
        }

        return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
}

/*
 * SMP function call to unbind a device
 */
static void __clockevents_unbind(void *arg)
{
        struct ce_unbind *cu = arg;
        int res;

        raw_spin_lock(&clockevents_lock);
        res = __clockevents_try_unbind(cu->ce, smp_processor_id());
        if (res == -EAGAIN)
                res = clockevents_replace(cu->ce);
        cu->res = res;
        raw_spin_unlock(&clockevents_lock);
}

/*
 * Issues smp function call to unbind a per cpu device. Called with
 * clockevents_mutex held.
 */
static int clockevents_unbind(struct clock_event_device *ced, int cpu)
{
        struct ce_unbind cu = { .ce = ced, .res = -ENODEV };

        smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
        return cu.res;
}

/*
 * Unbind a clockevents device.
 */
int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
{
        int ret;

        mutex_lock(&clockevents_mutex);
        ret = clockevents_unbind(ced, cpu);
        mutex_unlock(&clockevents_mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(clockevents_unbind_device);

/**
 * clockevents_register_device - register a clock event device
 * @dev:        device to register
 */
void clockevents_register_device(struct clock_event_device *dev)
{
        unsigned long flags;

        /* Initialize state to DETACHED */
        clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);

        if (!dev->cpumask) {
                WARN_ON(num_possible_cpus() > 1);
                dev->cpumask = cpumask_of(smp_processor_id());
        }

        if (dev->cpumask == cpu_all_mask) {
                WARN(1, "%s cpumask == cpu_all_mask, using cpu_possible_mask instead\n",
                     dev->name);
                dev->cpumask = cpu_possible_mask;
        }

        raw_spin_lock_irqsave(&clockevents_lock, flags);

        list_add(&dev->list, &clockevent_devices);
        tick_check_new_device(dev);
        clockevents_notify_released();

        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);

static void clockevents_config(struct clock_event_device *dev, u32 freq)
{
        u64 sec;

        if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
                return;

        /*
         * Calculate the maximum number of seconds we can sleep. Limit
         * to 10 minutes for hardware which can program more than
         * 32bit ticks so we still get reasonable conversion values.
         */
        sec = dev->max_delta_ticks;
        do_div(sec, freq);
        if (!sec)
                sec = 1;
        else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
                sec = 600;

        clockevents_calc_mult_shift(dev, freq, sec);
        dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
        dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
}

/**
 * clockevents_config_and_register - Configure and register a clock event device
 * @dev:        device to register
 * @freq:        The clock frequency
 * @min_delta:        The minimum clock ticks to program in oneshot mode
 * @max_delta:        The maximum clock ticks to program in oneshot mode
 *
 * min/max_delta can be 0 for devices which do not support oneshot mode.
 */
void clockevents_config_and_register(struct clock_event_device *dev,
                                     u32 freq, unsigned long min_delta,
                                     unsigned long max_delta)
{
        dev->min_delta_ticks = min_delta;
        dev->max_delta_ticks = max_delta;
        clockevents_config(dev, freq);
        clockevents_register_device(dev);
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);

int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
        clockevents_config(dev, freq);

        if (clockevent_state_oneshot(dev))
                return clockevents_program_event(dev, dev->next_event, false);

        if (clockevent_state_periodic(dev))
                return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);

        return 0;
}

/**
 * clockevents_update_freq - Update frequency and reprogram a clock event device.
 * @dev:        device to modify
 * @freq:        new device frequency
 *
 * Reconfigure and reprogram a clock event device in oneshot
 * mode. Must be called on the cpu for which the device delivers per
 * cpu timer events. If called for the broadcast device the core takes
 * care of serialization.
 *
 * Returns 0 on success, -ETIME when the event is in the past.
 */
int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
        unsigned long flags;
        int ret;

        local_irq_save(flags);
        ret = tick_broadcast_update_freq(dev, freq);
        if (ret == -ENODEV)
                ret = __clockevents_update_freq(dev, freq);
        local_irq_restore(flags);
        return ret;
}

/*
 * Noop handler when we shut down an event device
 */
void clockevents_handle_noop(struct clock_event_device *dev)
{
}

/**
 * clockevents_exchange_device - release and request clock devices
 * @old:        device to release (can be NULL)
 * @new:        device to request (can be NULL)
 *
 * Called from various tick functions with clockevents_lock held and
 * interrupts disabled.
 */
void clockevents_exchange_device(struct clock_event_device *old,
                                 struct clock_event_device *new)
{
        /*
         * Caller releases a clock event device. We queue it into the
         * released list and do a notify add later.
         */
        if (old) {
                module_put(old->owner);
                clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
                list_move(&old->list, &clockevents_released);
        }

        if (new) {
                BUG_ON(!clockevent_state_detached(new));
                clockevents_shutdown(new);
        }
}

/**
 * clockevents_suspend - suspend clock devices
 */
void clockevents_suspend(void)
{
        struct clock_event_device *dev;

        list_for_each_entry_reverse(dev, &clockevent_devices, list)
                if (dev->suspend && !clockevent_state_detached(dev))
                        dev->suspend(dev);
}

/**
 * clockevents_resume - resume clock devices
 */
void clockevents_resume(void)
{
        struct clock_event_device *dev;

        list_for_each_entry(dev, &clockevent_devices, list)
                if (dev->resume && !clockevent_state_detached(dev))
                        dev->resume(dev);
}

#ifdef CONFIG_HOTPLUG_CPU

# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
/**
 * tick_offline_cpu - Take CPU out of the broadcast mechanism
 * @cpu:        The outgoing CPU
 *
 * Called on the outgoing CPU after it took itself offline.
 */
void tick_offline_cpu(unsigned int cpu)
{
        raw_spin_lock(&clockevents_lock);
        tick_broadcast_offline(cpu);
        raw_spin_unlock(&clockevents_lock);
}
# endif

/**
 * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
 * @cpu:        The dead CPU
 */
void tick_cleanup_dead_cpu(int cpu)
{
        struct clock_event_device *dev, *tmp;
        unsigned long flags;

        raw_spin_lock_irqsave(&clockevents_lock, flags);

        tick_shutdown(cpu);
        /*
         * Unregister the clock event devices which were
         * released from the users in the notify chain.
         */
        list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
                list_del(&dev->list);
        /*
         * Now check whether the CPU has left unused per cpu devices
         */
        list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
                if (cpumask_test_cpu(cpu, dev->cpumask) &&
                    cpumask_weight(dev->cpumask) == 1 &&
                    !tick_is_broadcast_device(dev)) {
                        BUG_ON(!clockevent_state_detached(dev));
                        list_del(&dev->list);
                }
        }
        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
#endif

#ifdef CONFIG_SYSFS
static const struct bus_type clockevents_subsys = {
        .name                = "clockevents",
        .dev_name       = "clockevent",
};

static DEFINE_PER_CPU(struct device, tick_percpu_dev);
static struct tick_device *tick_get_tick_dev(struct device *dev);

static ssize_t current_device_show(struct device *dev,
                                   struct device_attribute *attr,
                                   char *buf)
{
        struct tick_device *td;
        ssize_t count = 0;

        raw_spin_lock_irq(&clockevents_lock);
        td = tick_get_tick_dev(dev);
        if (td && td->evtdev)
                count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
        raw_spin_unlock_irq(&clockevents_lock);
        return count;
}
static DEVICE_ATTR_RO(current_device);

/* We don't support the abomination of removable broadcast devices */
static ssize_t unbind_device_store(struct device *dev,
                                   struct device_attribute *attr,
                                   const char *buf, size_t count)
{
        char name[CS_NAME_LEN];
        ssize_t ret = sysfs_get_uname(buf, name, count);
        struct clock_event_device *ce = NULL, *iter;

        if (ret < 0)
                return ret;

        ret = -ENODEV;
        mutex_lock(&clockevents_mutex);
        raw_spin_lock_irq(&clockevents_lock);
        list_for_each_entry(iter, &clockevent_devices, list) {
                if (!strcmp(iter->name, name)) {
                        ret = __clockevents_try_unbind(iter, dev->id);
                        ce = iter;
                        break;
                }
        }
        raw_spin_unlock_irq(&clockevents_lock);
        /*
         * We hold clockevents_mutex, so ce can't go away
         */
        if (ret == -EAGAIN)
                ret = clockevents_unbind(ce, dev->id);
        mutex_unlock(&clockevents_mutex);
        return ret ? ret : count;
}
static DEVICE_ATTR_WO(unbind_device);

#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
static struct device tick_bc_dev = {
        .init_name        = "broadcast",
        .id                = 0,
        .bus                = &clockevents_subsys,
};

static struct tick_device *tick_get_tick_dev(struct device *dev)
{
        return dev == &tick_bc_dev ? tick_get_broadcast_device() :
                &per_cpu(tick_cpu_device, dev->id);
}

static __init int tick_broadcast_init_sysfs(void)
{
        int err = device_register(&tick_bc_dev);

        if (!err)
                err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
        return err;
}
#else
static struct tick_device *tick_get_tick_dev(struct device *dev)
{
        return &per_cpu(tick_cpu_device, dev->id);
}
static inline int tick_broadcast_init_sysfs(void) { return 0; }
#endif

static int __init tick_init_sysfs(void)
{
        int cpu;

        for_each_possible_cpu(cpu) {
                struct device *dev = &per_cpu(tick_percpu_dev, cpu);
                int err;

                dev->id = cpu;
                dev->bus = &clockevents_subsys;
                err = device_register(dev);
                if (!err)
                        err = device_create_file(dev, &dev_attr_current_device);
                if (!err)
                        err = device_create_file(dev, &dev_attr_unbind_device);
                if (err)
                        return err;
        }
        return tick_broadcast_init_sysfs();
}

static int __init clockevents_init_sysfs(void)
{
        int err = subsys_system_register(&clockevents_subsys, NULL);

        if (!err)
                err = tick_init_sysfs();
        return err;
}
device_initcall(clockevents_init_sysfs);
#endif /* SYSFS */































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



    1 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  Copyright (c) 2002 Petko Manolov (petkan@users.sourceforge.net)
 */

#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/usb.h>
#include <linux/uaccess.h>

/* Version Information */
#define DRIVER_VERSION "v0.6.2 (2004/08/27)"
#define DRIVER_AUTHOR "Petko Manolov <petkan@users.sourceforge.net>"
#define DRIVER_DESC "rtl8150 based usb-ethernet driver"

#define        IDR                        0x0120
#define        MAR                        0x0126
#define        CR                        0x012e
#define        TCR                        0x012f
#define        RCR                        0x0130
#define        TSR                        0x0132
#define        RSR                        0x0133
#define        CON0                        0x0135
#define        CON1                        0x0136
#define        MSR                        0x0137
#define        PHYADD                        0x0138
#define        PHYDAT                        0x0139
#define        PHYCNT                        0x013b
#define        GPPC                        0x013d
#define        BMCR                        0x0140
#define        BMSR                        0x0142
#define        ANAR                        0x0144
#define        ANLP                        0x0146
#define        AER                        0x0148
#define CSCR                        0x014C  /* This one has the link status */
#define CSCR_LINK_STATUS        (1 << 3)

#define        IDR_EEPROM                0x1202

#define        PHY_READ                0
#define        PHY_WRITE                0x20
#define        PHY_GO                        0x40

#define        MII_TIMEOUT                10
#define        INTBUFSIZE                8

#define        RTL8150_REQT_READ        0xc0
#define        RTL8150_REQT_WRITE        0x40
#define        RTL8150_REQ_GET_REGS        0x05
#define        RTL8150_REQ_SET_REGS        0x05


/* Transmit status register errors */
#define TSR_ECOL                (1<<5)
#define TSR_LCOL                (1<<4)
#define TSR_LOSS_CRS                (1<<3)
#define TSR_JBR                        (1<<2)
#define TSR_ERRORS                (TSR_ECOL | TSR_LCOL | TSR_LOSS_CRS | TSR_JBR)
/* Receive status register errors */
#define RSR_CRC                        (1<<2)
#define RSR_FAE                        (1<<1)
#define RSR_ERRORS                (RSR_CRC | RSR_FAE)

/* Media status register definitions */
#define MSR_DUPLEX                (1<<4)
#define MSR_SPEED                (1<<3)
#define MSR_LINK                (1<<2)

/* Interrupt pipe data */
#define INT_TSR                        0x00
#define INT_RSR                        0x01
#define INT_MSR                        0x02
#define INT_WAKSR                0x03
#define INT_TXOK_CNT                0x04
#define INT_RXLOST_CNT                0x05
#define INT_CRERR_CNT                0x06
#define INT_COL_CNT                0x07


#define        RTL8150_MTU                1540
#define        RTL8150_TX_TIMEOUT        (HZ)
#define        RX_SKB_POOL_SIZE        4

/* rtl8150 flags */
#define        RTL8150_HW_CRC                0
#define        RX_REG_SET                1
#define        RTL8150_UNPLUG                2
#define        RX_URB_FAIL                3

/* Define these values to match your device */
#define        VENDOR_ID_REALTEK                0x0bda
#define        VENDOR_ID_MELCO                        0x0411
#define        VENDOR_ID_MICRONET                0x3980
#define        VENDOR_ID_LONGSHINE                0x07b8
#define        VENDOR_ID_OQO                        0x1557
#define        VENDOR_ID_ZYXEL                        0x0586

#define PRODUCT_ID_RTL8150                0x8150
#define        PRODUCT_ID_LUAKTX                0x0012
#define        PRODUCT_ID_LCS8138TX                0x401a
#define PRODUCT_ID_SP128AR                0x0003
#define        PRODUCT_ID_PRESTIGE                0x401a

#undef        EEPROM_WRITE

/* table of devices that work with this driver */
static const struct usb_device_id rtl8150_table[] = {
        {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8150)},
        {USB_DEVICE(VENDOR_ID_MELCO, PRODUCT_ID_LUAKTX)},
        {USB_DEVICE(VENDOR_ID_MICRONET, PRODUCT_ID_SP128AR)},
        {USB_DEVICE(VENDOR_ID_LONGSHINE, PRODUCT_ID_LCS8138TX)},
        {USB_DEVICE(VENDOR_ID_OQO, PRODUCT_ID_RTL8150)},
        {USB_DEVICE(VENDOR_ID_ZYXEL, PRODUCT_ID_PRESTIGE)},
        {}
};

MODULE_DEVICE_TABLE(usb, rtl8150_table);

struct rtl8150 {
        unsigned long flags;
        struct usb_device *udev;
        struct tasklet_struct tl;
        struct net_device *netdev;
        struct urb *rx_urb, *tx_urb, *intr_urb;
        struct sk_buff *tx_skb, *rx_skb;
        struct sk_buff *rx_skb_pool[RX_SKB_POOL_SIZE];
        spinlock_t rx_pool_lock;
        struct usb_ctrlrequest dr;
        int intr_interval;
        u8 *intr_buff;
        u8 phy;
};

typedef struct rtl8150 rtl8150_t;

struct async_req {
        struct usb_ctrlrequest dr;
        u16 rx_creg;
};

static const char driver_name [] = "rtl8150";

/*
**
**        device related part of the code
**
*/
static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data)
{
        return usb_control_msg_recv(dev->udev, 0, RTL8150_REQ_GET_REGS,
                                    RTL8150_REQT_READ, indx, 0, data, size,
                                    1000, GFP_NOIO);
}

static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data)
{
        return usb_control_msg_send(dev->udev, 0, RTL8150_REQ_SET_REGS,
                                    RTL8150_REQT_WRITE, indx, 0, data, size,
                                    1000, GFP_NOIO);
}

static void async_set_reg_cb(struct urb *urb)
{
        struct async_req *req = (struct async_req *)urb->context;
        int status = urb->status;

        if (status < 0)
                dev_dbg(&urb->dev->dev, "%s failed with %d", __func__, status);
        kfree(req);
        usb_free_urb(urb);
}

static int async_set_registers(rtl8150_t *dev, u16 indx, u16 size, u16 reg)
{
        int res = -ENOMEM;
        struct urb *async_urb;
        struct async_req *req;

        req = kmalloc(sizeof(struct async_req), GFP_ATOMIC);
        if (req == NULL)
                return res;
        async_urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (async_urb == NULL) {
                kfree(req);
                return res;
        }
        req->rx_creg = cpu_to_le16(reg);
        req->dr.bRequestType = RTL8150_REQT_WRITE;
        req->dr.bRequest = RTL8150_REQ_SET_REGS;
        req->dr.wIndex = 0;
        req->dr.wValue = cpu_to_le16(indx);
        req->dr.wLength = cpu_to_le16(size);
        usb_fill_control_urb(async_urb, dev->udev,
                             usb_sndctrlpipe(dev->udev, 0), (void *)&req->dr,
                             &req->rx_creg, size, async_set_reg_cb, req);
        res = usb_submit_urb(async_urb, GFP_ATOMIC);
        if (res) {
                if (res == -ENODEV)
                        netif_device_detach(dev->netdev);
                dev_err(&dev->udev->dev, "%s failed with %d\n", __func__, res);
        }
        return res;
}

static int read_mii_word(rtl8150_t * dev, u8 phy, __u8 indx, u16 * reg)
{
        int i;
        u8 data[3], tmp;

        data[0] = phy;
        data[1] = data[2] = 0;
        tmp = indx | PHY_READ | PHY_GO;
        i = 0;

        set_registers(dev, PHYADD, sizeof(data), data);
        set_registers(dev, PHYCNT, 1, &tmp);
        do {
                get_registers(dev, PHYCNT, 1, data);
        } while ((data[0] & PHY_GO) && (i++ < MII_TIMEOUT));

        if (i <= MII_TIMEOUT) {
                get_registers(dev, PHYDAT, 2, data);
                *reg = data[0] | (data[1] << 8);
                return 0;
        } else
                return 1;
}

static int write_mii_word(rtl8150_t * dev, u8 phy, __u8 indx, u16 reg)
{
        int i;
        u8 data[3], tmp;

        data[0] = phy;
        data[1] = reg & 0xff;
        data[2] = (reg >> 8) & 0xff;
        tmp = indx | PHY_WRITE | PHY_GO;
        i = 0;

        set_registers(dev, PHYADD, sizeof(data), data);
        set_registers(dev, PHYCNT, 1, &tmp);
        do {
                get_registers(dev, PHYCNT, 1, data);
        } while ((data[0] & PHY_GO) && (i++ < MII_TIMEOUT));

        if (i <= MII_TIMEOUT)
                return 0;
        else
                return 1;
}

static void set_ethernet_addr(rtl8150_t *dev)
{
        u8 node_id[ETH_ALEN];
        int ret;

        ret = get_registers(dev, IDR, sizeof(node_id), node_id);

        if (!ret) {
                eth_hw_addr_set(dev->netdev, node_id);
        } else {
                eth_hw_addr_random(dev->netdev);
                netdev_notice(dev->netdev, "Assigned a random MAC address: %pM\n",
                              dev->netdev->dev_addr);
        }
}

static int rtl8150_set_mac_address(struct net_device *netdev, void *p)
{
        struct sockaddr *addr = p;
        rtl8150_t *dev = netdev_priv(netdev);

        if (netif_running(netdev))
                return -EBUSY;

        eth_hw_addr_set(netdev, addr->sa_data);
        netdev_dbg(netdev, "Setting MAC address to %pM\n", netdev->dev_addr);
        /* Set the IDR registers. */
        set_registers(dev, IDR, netdev->addr_len, netdev->dev_addr);
#ifdef EEPROM_WRITE
        {
        int i;
        u8 cr;
        /* Get the CR contents. */
        get_registers(dev, CR, 1, &cr);
        /* Set the WEPROM bit (eeprom write enable). */
        cr |= 0x20;
        set_registers(dev, CR, 1, &cr);
        /* Write the MAC address into eeprom. Eeprom writes must be word-sized,
           so we need to split them up. */
        for (i = 0; i * 2 < netdev->addr_len; i++) {
                set_registers(dev, IDR_EEPROM + (i * 2), 2,
                netdev->dev_addr + (i * 2));
        }
        /* Clear the WEPROM bit (preventing accidental eeprom writes). */
        cr &= 0xdf;
        set_registers(dev, CR, 1, &cr);
        }
#endif
        return 0;
}

static int rtl8150_reset(rtl8150_t * dev)
{
        u8 data = 0x10;
        int i = HZ;

        set_registers(dev, CR, 1, &data);
        do {
                get_registers(dev, CR, 1, &data);
        } while ((data & 0x10) && --i);

        return (i > 0) ? 1 : 0;
}

static int alloc_all_urbs(rtl8150_t * dev)
{
        dev->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->rx_urb)
                return 0;
        dev->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->tx_urb) {
                usb_free_urb(dev->rx_urb);
                return 0;
        }
        dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->intr_urb) {
                usb_free_urb(dev->rx_urb);
                usb_free_urb(dev->tx_urb);
                return 0;
        }

        return 1;
}

static void free_all_urbs(rtl8150_t * dev)
{
        usb_free_urb(dev->rx_urb);
        usb_free_urb(dev->tx_urb);
        usb_free_urb(dev->intr_urb);
}

static void unlink_all_urbs(rtl8150_t * dev)
{
        usb_kill_urb(dev->rx_urb);
        usb_kill_urb(dev->tx_urb);
        usb_kill_urb(dev->intr_urb);
}

static inline struct sk_buff *pull_skb(rtl8150_t *dev)
{
        struct sk_buff *skb;
        int i;

        for (i = 0; i < RX_SKB_POOL_SIZE; i++) {
                if (dev->rx_skb_pool[i]) {
                        skb = dev->rx_skb_pool[i];
                        dev->rx_skb_pool[i] = NULL;
                        return skb;
                }
        }
        return NULL;
}

static void read_bulk_callback(struct urb *urb)
{
        rtl8150_t *dev;
        unsigned pkt_len, res;
        struct sk_buff *skb;
        struct net_device *netdev;
        int status = urb->status;
        int result;
        unsigned long flags;

        dev = urb->context;
        if (!dev)
                return;
        if (test_bit(RTL8150_UNPLUG, &dev->flags))
                return;
        netdev = dev->netdev;
        if (!netif_device_present(netdev))
                return;

        switch (status) {
        case 0:
                break;
        case -ENOENT:
                return;        /* the urb is in unlink state */
        case -ETIME:
                if (printk_ratelimit())
                        dev_warn(&urb->dev->dev, "may be reset is needed?..\n");
                goto goon;
        default:
                if (printk_ratelimit())
                        dev_warn(&urb->dev->dev, "Rx status %d\n", status);
                goto goon;
        }

        if (!dev->rx_skb)
                goto resched;
        /* protect against short packets (tell me why we got some?!?) */
        if (urb->actual_length < 4)
                goto goon;

        res = urb->actual_length;
        pkt_len = res - 4;

        skb_put(dev->rx_skb, pkt_len);
        dev->rx_skb->protocol = eth_type_trans(dev->rx_skb, netdev);
        netif_rx(dev->rx_skb);
        netdev->stats.rx_packets++;
        netdev->stats.rx_bytes += pkt_len;

        spin_lock_irqsave(&dev->rx_pool_lock, flags);
        skb = pull_skb(dev);
        spin_unlock_irqrestore(&dev->rx_pool_lock, flags);
        if (!skb)
                goto resched;

        dev->rx_skb = skb;
goon:
        usb_fill_bulk_urb(dev->rx_urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1),
                      dev->rx_skb->data, RTL8150_MTU, read_bulk_callback, dev);
        result = usb_submit_urb(dev->rx_urb, GFP_ATOMIC);
        if (result == -ENODEV)
                netif_device_detach(dev->netdev);
        else if (result) {
                set_bit(RX_URB_FAIL, &dev->flags);
                goto resched;
        } else {
                clear_bit(RX_URB_FAIL, &dev->flags);
        }

        return;
resched:
        tasklet_schedule(&dev->tl);
}

static void write_bulk_callback(struct urb *urb)
{
        rtl8150_t *dev;
        int status = urb->status;

        dev = urb->context;
        if (!dev)
                return;
        dev_kfree_skb_irq(dev->tx_skb);
        if (!netif_device_present(dev->netdev))
                return;
        if (status)
                dev_info(&urb->dev->dev, "%s: Tx status %d\n",
                         dev->netdev->name, status);
        netif_trans_update(dev->netdev);
        netif_wake_queue(dev->netdev);
}

static void intr_callback(struct urb *urb)
{
        rtl8150_t *dev;
        __u8 *d;
        int status = urb->status;
        int res;

        dev = urb->context;
        if (!dev)
                return;
        switch (status) {
        case 0:                        /* success */
                break;
        case -ECONNRESET:        /* unlink */
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        /* -EPIPE:  should clear the halt */
        default:
                dev_info(&urb->dev->dev, "%s: intr status %d\n",
                         dev->netdev->name, status);
                goto resubmit;
        }

        d = urb->transfer_buffer;
        if (d[0] & TSR_ERRORS) {
                dev->netdev->stats.tx_errors++;
                if (d[INT_TSR] & (TSR_ECOL | TSR_JBR))
                        dev->netdev->stats.tx_aborted_errors++;
                if (d[INT_TSR] & TSR_LCOL)
                        dev->netdev->stats.tx_window_errors++;
                if (d[INT_TSR] & TSR_LOSS_CRS)
                        dev->netdev->stats.tx_carrier_errors++;
        }
        /* Report link status changes to the network stack */
        if ((d[INT_MSR] & MSR_LINK) == 0) {
                if (netif_carrier_ok(dev->netdev)) {
                        netif_carrier_off(dev->netdev);
                        netdev_dbg(dev->netdev, "%s: LINK LOST\n", __func__);
                }
        } else {
                if (!netif_carrier_ok(dev->netdev)) {
                        netif_carrier_on(dev->netdev);
                        netdev_dbg(dev->netdev, "%s: LINK CAME BACK\n", __func__);
                }
        }

resubmit:
        res = usb_submit_urb (urb, GFP_ATOMIC);
        if (res == -ENODEV)
                netif_device_detach(dev->netdev);
        else if (res)
                dev_err(&dev->udev->dev,
                        "can't resubmit intr, %s-%s/input0, status %d\n",
                        dev->udev->bus->bus_name, dev->udev->devpath, res);
}

static int rtl8150_suspend(struct usb_interface *intf, pm_message_t message)
{
        rtl8150_t *dev = usb_get_intfdata(intf);

        netif_device_detach(dev->netdev);

        if (netif_running(dev->netdev)) {
                usb_kill_urb(dev->rx_urb);
                usb_kill_urb(dev->intr_urb);
        }
        return 0;
}

static int rtl8150_resume(struct usb_interface *intf)
{
        rtl8150_t *dev = usb_get_intfdata(intf);

        netif_device_attach(dev->netdev);
        if (netif_running(dev->netdev)) {
                dev->rx_urb->status = 0;
                dev->rx_urb->actual_length = 0;
                read_bulk_callback(dev->rx_urb);

                dev->intr_urb->status = 0;
                dev->intr_urb->actual_length = 0;
                intr_callback(dev->intr_urb);
        }
        return 0;
}

/*
**
**        network related part of the code
**
*/

static void fill_skb_pool(rtl8150_t *dev)
{
        struct sk_buff *skb;
        int i;

        for (i = 0; i < RX_SKB_POOL_SIZE; i++) {
                if (dev->rx_skb_pool[i])
                        continue;
                skb = dev_alloc_skb(RTL8150_MTU + 2);
                if (!skb) {
                        return;
                }
                skb_reserve(skb, 2);
                dev->rx_skb_pool[i] = skb;
        }
}

static void free_skb_pool(rtl8150_t *dev)
{
        int i;

        for (i = 0; i < RX_SKB_POOL_SIZE; i++)
                dev_kfree_skb(dev->rx_skb_pool[i]);
}

static void rx_fixup(struct tasklet_struct *t)
{
        struct rtl8150 *dev = from_tasklet(dev, t, tl);
        struct sk_buff *skb;
        int status;

        spin_lock_irq(&dev->rx_pool_lock);
        fill_skb_pool(dev);
        spin_unlock_irq(&dev->rx_pool_lock);
        if (test_bit(RX_URB_FAIL, &dev->flags))
                if (dev->rx_skb)
                        goto try_again;
        spin_lock_irq(&dev->rx_pool_lock);
        skb = pull_skb(dev);
        spin_unlock_irq(&dev->rx_pool_lock);
        if (skb == NULL)
                goto tlsched;
        dev->rx_skb = skb;
        usb_fill_bulk_urb(dev->rx_urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1),
                      dev->rx_skb->data, RTL8150_MTU, read_bulk_callback, dev);
try_again:
        status = usb_submit_urb(dev->rx_urb, GFP_ATOMIC);
        if (status == -ENODEV) {
                netif_device_detach(dev->netdev);
        } else if (status) {
                set_bit(RX_URB_FAIL, &dev->flags);
                goto tlsched;
        } else {
                clear_bit(RX_URB_FAIL, &dev->flags);
        }

        return;
tlsched:
        tasklet_schedule(&dev->tl);
}

static int enable_net_traffic(rtl8150_t * dev)
{
        u8 cr, tcr, rcr, msr;

        if (!rtl8150_reset(dev)) {
                dev_warn(&dev->udev->dev, "device reset failed\n");
        }
        /* RCR bit7=1 attach Rx info at the end;  =0 HW CRC (which is broken) */
        rcr = 0x9e;
        tcr = 0xd8;
        cr = 0x0c;
        if (!(rcr & 0x80))
                set_bit(RTL8150_HW_CRC, &dev->flags);
        set_registers(dev, RCR, 1, &rcr);
        set_registers(dev, TCR, 1, &tcr);
        set_registers(dev, CR, 1, &cr);
        get_registers(dev, MSR, 1, &msr);

        return 0;
}

static void disable_net_traffic(rtl8150_t * dev)
{
        u8 cr;

        get_registers(dev, CR, 1, &cr);
        cr &= 0xf3;
        set_registers(dev, CR, 1, &cr);
}

static void rtl8150_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
        rtl8150_t *dev = netdev_priv(netdev);
        dev_warn(&netdev->dev, "Tx timeout.\n");
        usb_unlink_urb(dev->tx_urb);
        netdev->stats.tx_errors++;
}

static void rtl8150_set_multicast(struct net_device *netdev)
{
        rtl8150_t *dev = netdev_priv(netdev);
        u16 rx_creg = 0x9e;

        netif_stop_queue(netdev);
        if (netdev->flags & IFF_PROMISC) {
                rx_creg |= 0x0001;
                dev_info(&netdev->dev, "%s: promiscuous mode\n", netdev->name);
        } else if (!netdev_mc_empty(netdev) ||
                   (netdev->flags & IFF_ALLMULTI)) {
                rx_creg &= 0xfffe;
                rx_creg |= 0x0002;
                dev_dbg(&netdev->dev, "%s: allmulti set\n", netdev->name);
        } else {
                /* ~RX_MULTICAST, ~RX_PROMISCUOUS */
                rx_creg &= 0x00fc;
        }
        async_set_registers(dev, RCR, sizeof(rx_creg), rx_creg);
        netif_wake_queue(netdev);
}

static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb,
                                            struct net_device *netdev)
{
        rtl8150_t *dev = netdev_priv(netdev);
        int count, res;

        netif_stop_queue(netdev);
        count = (skb->len < 60) ? 60 : skb->len;
        count = (count & 0x3f) ? count : count + 1;
        dev->tx_skb = skb;
        usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2),
                      skb->data, count, write_bulk_callback, dev);
        if ((res = usb_submit_urb(dev->tx_urb, GFP_ATOMIC))) {
                /* Can we get/handle EPIPE here? */
                if (res == -ENODEV)
                        netif_device_detach(dev->netdev);
                else {
                        dev_warn(&netdev->dev, "failed tx_urb %d\n", res);
                        netdev->stats.tx_errors++;
                        netif_start_queue(netdev);
                }
        } else {
                netdev->stats.tx_packets++;
                netdev->stats.tx_bytes += skb->len;
                netif_trans_update(netdev);
        }

        return NETDEV_TX_OK;
}


static void set_carrier(struct net_device *netdev)
{
        rtl8150_t *dev = netdev_priv(netdev);
        short tmp;

        get_registers(dev, CSCR, 2, &tmp);
        if (tmp & CSCR_LINK_STATUS)
                netif_carrier_on(netdev);
        else
                netif_carrier_off(netdev);
}

static int rtl8150_open(struct net_device *netdev)
{
        rtl8150_t *dev = netdev_priv(netdev);
        int res;

        if (dev->rx_skb == NULL)
                dev->rx_skb = pull_skb(dev);
        if (!dev->rx_skb)
                return -ENOMEM;

        set_registers(dev, IDR, 6, netdev->dev_addr);

        usb_fill_bulk_urb(dev->rx_urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1),
                      dev->rx_skb->data, RTL8150_MTU, read_bulk_callback, dev);
        if ((res = usb_submit_urb(dev->rx_urb, GFP_KERNEL))) {
                if (res == -ENODEV)
                        netif_device_detach(dev->netdev);
                dev_warn(&netdev->dev, "rx_urb submit failed: %d\n", res);
                return res;
        }
        usb_fill_int_urb(dev->intr_urb, dev->udev, usb_rcvintpipe(dev->udev, 3),
                     dev->intr_buff, INTBUFSIZE, intr_callback,
                     dev, dev->intr_interval);
        if ((res = usb_submit_urb(dev->intr_urb, GFP_KERNEL))) {
                if (res == -ENODEV)
                        netif_device_detach(dev->netdev);
                dev_warn(&netdev->dev, "intr_urb submit failed: %d\n", res);
                usb_kill_urb(dev->rx_urb);
                return res;
        }
        enable_net_traffic(dev);
        set_carrier(netdev);
        netif_start_queue(netdev);

        return res;
}

static int rtl8150_close(struct net_device *netdev)
{
        rtl8150_t *dev = netdev_priv(netdev);

        netif_stop_queue(netdev);
        if (!test_bit(RTL8150_UNPLUG, &dev->flags))
                disable_net_traffic(dev);
        unlink_all_urbs(dev);

        return 0;
}

static void rtl8150_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
{
        rtl8150_t *dev = netdev_priv(netdev);

        strscpy(info->driver, driver_name, sizeof(info->driver));
        strscpy(info->version, DRIVER_VERSION, sizeof(info->version));
        usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
}

static int rtl8150_get_link_ksettings(struct net_device *netdev,
                                      struct ethtool_link_ksettings *ecmd)
{
        rtl8150_t *dev = netdev_priv(netdev);
        short lpa, bmcr;
        u32 supported;

        supported = (SUPPORTED_10baseT_Half |
                          SUPPORTED_10baseT_Full |
                          SUPPORTED_100baseT_Half |
                          SUPPORTED_100baseT_Full |
                          SUPPORTED_Autoneg |
                          SUPPORTED_TP | SUPPORTED_MII);
        ecmd->base.port = PORT_TP;
        ecmd->base.phy_address = dev->phy;
        get_registers(dev, BMCR, 2, &bmcr);
        get_registers(dev, ANLP, 2, &lpa);
        if (bmcr & BMCR_ANENABLE) {
                u32 speed = ((lpa & (LPA_100HALF | LPA_100FULL)) ?
                             SPEED_100 : SPEED_10);
                ecmd->base.speed = speed;
                ecmd->base.autoneg = AUTONEG_ENABLE;
                if (speed == SPEED_100)
                        ecmd->base.duplex = (lpa & LPA_100FULL) ?
                            DUPLEX_FULL : DUPLEX_HALF;
                else
                        ecmd->base.duplex = (lpa & LPA_10FULL) ?
                            DUPLEX_FULL : DUPLEX_HALF;
        } else {
                ecmd->base.autoneg = AUTONEG_DISABLE;
                ecmd->base.speed = ((bmcr & BMCR_SPEED100) ?
                                             SPEED_100 : SPEED_10);
                ecmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
                    DUPLEX_FULL : DUPLEX_HALF;
        }

        ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported,
                                                supported);

        return 0;
}

static const struct ethtool_ops ops = {
        .get_drvinfo = rtl8150_get_drvinfo,
        .get_link = ethtool_op_get_link,
        .get_link_ksettings = rtl8150_get_link_ksettings,
};

static int rtl8150_siocdevprivate(struct net_device *netdev, struct ifreq *rq,
                                  void __user *udata, int cmd)
{
        rtl8150_t *dev = netdev_priv(netdev);
        u16 *data = (u16 *) & rq->ifr_ifru;
        int res = 0;

        switch (cmd) {
        case SIOCDEVPRIVATE:
                data[0] = dev->phy;
                fallthrough;
        case SIOCDEVPRIVATE + 1:
                read_mii_word(dev, dev->phy, (data[1] & 0x1f), &data[3]);
                break;
        case SIOCDEVPRIVATE + 2:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
                write_mii_word(dev, dev->phy, (data[1] & 0x1f), data[2]);
                break;
        default:
                res = -EOPNOTSUPP;
        }

        return res;
}

static const struct net_device_ops rtl8150_netdev_ops = {
        .ndo_open                = rtl8150_open,
        .ndo_stop                = rtl8150_close,
        .ndo_siocdevprivate        = rtl8150_siocdevprivate,
        .ndo_start_xmit                = rtl8150_start_xmit,
        .ndo_tx_timeout                = rtl8150_tx_timeout,
        .ndo_set_rx_mode        = rtl8150_set_multicast,
        .ndo_set_mac_address        = rtl8150_set_mac_address,

        .ndo_validate_addr        = eth_validate_addr,
};

static int rtl8150_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        rtl8150_t *dev;
        struct net_device *netdev;

        netdev = alloc_etherdev(sizeof(rtl8150_t));
        if (!netdev)
                return -ENOMEM;

        dev = netdev_priv(netdev);

        dev->intr_buff = kmalloc(INTBUFSIZE, GFP_KERNEL);
        if (!dev->intr_buff) {
                free_netdev(netdev);
                return -ENOMEM;
        }

        tasklet_setup(&dev->tl, rx_fixup);
        spin_lock_init(&dev->rx_pool_lock);

        dev->udev = udev;
        dev->netdev = netdev;
        netdev->netdev_ops = &rtl8150_netdev_ops;
        netdev->watchdog_timeo = RTL8150_TX_TIMEOUT;
        netdev->ethtool_ops = &ops;
        dev->intr_interval = 100;        /* 100ms */

        if (!alloc_all_urbs(dev)) {
                dev_err(&intf->dev, "out of memory\n");
                goto out;
        }
        if (!rtl8150_reset(dev)) {
                dev_err(&intf->dev, "couldn't reset the device\n");
                goto out1;
        }
        fill_skb_pool(dev);
        set_ethernet_addr(dev);

        usb_set_intfdata(intf, dev);
        SET_NETDEV_DEV(netdev, &intf->dev);
        if (register_netdev(netdev) != 0) {
                dev_err(&intf->dev, "couldn't register the device\n");
                goto out2;
        }

        dev_info(&intf->dev, "%s: rtl8150 is detected\n", netdev->name);

        return 0;

out2:
        usb_set_intfdata(intf, NULL);
        free_skb_pool(dev);
out1:
        free_all_urbs(dev);
out:
        kfree(dev->intr_buff);
        free_netdev(netdev);
        return -EIO;
}

static void rtl8150_disconnect(struct usb_interface *intf)
{
        rtl8150_t *dev = usb_get_intfdata(intf);

        usb_set_intfdata(intf, NULL);
        if (dev) {
                set_bit(RTL8150_UNPLUG, &dev->flags);
                tasklet_kill(&dev->tl);
                unregister_netdev(dev->netdev);
                unlink_all_urbs(dev);
                free_all_urbs(dev);
                free_skb_pool(dev);
                dev_kfree_skb(dev->rx_skb);
                kfree(dev->intr_buff);
                free_netdev(dev->netdev);
        }
}

static struct usb_driver rtl8150_driver = {
        .name                = driver_name,
        .probe                = rtl8150_probe,
        .disconnect        = rtl8150_disconnect,
        .id_table        = rtl8150_table,
        .suspend        = rtl8150_suspend,
        .resume                = rtl8150_resume,
        .disable_hub_initiated_lpm = 1,
};

module_usb_driver(rtl8150_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");


































































































































































































  520 

  520 


  520 







  521 
  524 




  521 


  521 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
 */
#include <linux/sched/debug.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>

#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>

static const char * const exception_stack_names[] = {
                [ ESTACK_DF        ]        = "#DF",
                [ ESTACK_NMI        ]        = "NMI",
                [ ESTACK_DB        ]        = "#DB",
                [ ESTACK_MCE        ]        = "#MC",
                [ ESTACK_VC        ]        = "#VC",
                [ ESTACK_VC2        ]        = "#VC2",
};

const char *stack_type_name(enum stack_type type)
{
        BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);

        if (type == STACK_TYPE_TASK)
                return "TASK";

        if (type == STACK_TYPE_IRQ)
                return "IRQ";

        if (type == STACK_TYPE_SOFTIRQ)
                return "SOFTIRQ";

        if (type == STACK_TYPE_ENTRY) {
                /*
                 * On 64-bit, we have a generic entry stack that we
                 * use for all the kernel entry points, including
                 * SYSENTER.
                 */
                return "ENTRY_TRAMPOLINE";
        }

        if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
                return exception_stack_names[type - STACK_TYPE_EXCEPTION];

        return NULL;
}

/**
 * struct estack_pages - Page descriptor for exception stacks
 * @offs:        Offset from the start of the exception stack area
 * @size:        Size of the exception stack
 * @type:        Type to store in the stack_info struct
 */
struct estack_pages {
        u32        offs;
        u16        size;
        u16        type;
};

#define EPAGERANGE(st)                                                        \
        [PFN_DOWN(CEA_ESTACK_OFFS(st)) ...                                \
         PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = {        \
                .offs        = CEA_ESTACK_OFFS(st),                                \
                .size        = CEA_ESTACK_SIZE(st),                                \
                .type        = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }

/*
 * Array of exception stack page descriptors. If the stack is larger than
 * PAGE_SIZE, all pages covering a particular stack will have the same
 * info. The guard pages including the not mapped DB2 stack are zeroed
 * out.
 */
static const
struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
        EPAGERANGE(DF),
        EPAGERANGE(NMI),
        EPAGERANGE(DB),
        EPAGERANGE(MCE),
        EPAGERANGE(VC),
        EPAGERANGE(VC2),
};

static __always_inline bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
        unsigned long begin, end, stk = (unsigned long)stack;
        const struct estack_pages *ep;
        struct pt_regs *regs;
        unsigned int k;

        BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);

        begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
        /*
         * Handle the case where stack trace is collected _before_
         * cea_exception_stacks had been initialized.
         */
        if (!begin)
                return false;

        end = begin + sizeof(struct cea_exception_stacks);
        /* Bail if @stack is outside the exception stack area. */
        if (stk < begin || stk >= end)
                return false;

        /* Calc page offset from start of exception stacks */
        k = (stk - begin) >> PAGE_SHIFT;
        /* Lookup the page descriptor */
        ep = &estack_pages[k];
        /* Guard page? */
        if (!ep->size)
                return false;

        begin += (unsigned long)ep->offs;
        end = begin + (unsigned long)ep->size;
        regs = (struct pt_regs *)end - 1;

        info->type        = ep->type;
        info->begin        = (unsigned long *)begin;
        info->end        = (unsigned long *)end;
        info->next_sp        = (unsigned long *)regs->sp;
        return true;
}

static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
        unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
        unsigned long *begin;

        /*
         * @end points directly to the top most stack entry to avoid a -8
         * adjustment in the stack switch hotpath. Adjust it back before
         * calculating @begin.
         */
        end++;
        begin = end - (IRQ_STACK_SIZE / sizeof(long));

        /*
         * Due to the switching logic RSP can never be == @end because the
         * final operation is 'popq %rsp' which means after that RSP points
         * to the original stack and not to @end.
         */
        if (stack < begin || stack >= end)
                return false;

        info->type        = STACK_TYPE_IRQ;
        info->begin        = begin;
        info->end        = end;

        /*
         * The next stack pointer is stored at the top of the irq stack
         * before switching to the irq stack. Actual stack entries are all
         * below that.
         */
        info->next_sp = (unsigned long *)*(end - 1);

        return true;
}

bool noinstr get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
                                    struct stack_info *info)
{
        if (in_task_stack(stack, task, info))
                return true;

        if (task != current)
                return false;

        if (in_exception_stack(stack, info))
                return true;

        if (in_irq_stack(stack, info))
                return true;

        if (in_entry_stack(stack, info))
                return true;

        return false;
}

int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask)
{
        task = task ? : current;

        if (!stack)
                goto unknown;

        if (!get_stack_info_noinstr(stack, task, info))
                goto unknown;

        /*
         * Make sure we don't iterate through any given stack more than once.
         * If it comes up a second time then there's something wrong going on:
         * just break out and report an unknown stack type.
         */
        if (visit_mask) {
                if (*visit_mask & (1UL << info->type)) {
                        if (task == current)
                                printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
                        goto unknown;
                }
                *visit_mask |= 1UL << info->type;
        }

        return 0;

unknown:
        info->type = STACK_TYPE_UNKNOWN;
        return -EINVAL;
}








































































































































   10 






































































   39 

   40 














































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Definitions related to Power Management Quality of Service (PM QoS).
 *
 * Copyright (C) 2020 Intel Corporation
 *
 * Authors:
 *        Mark Gross <mgross@linux.intel.com>
 *        Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 */

#ifndef _LINUX_PM_QOS_H
#define _LINUX_PM_QOS_H

#include <linux/plist.h>
#include <linux/notifier.h>
#include <linux/device.h>

enum pm_qos_flags_status {
        PM_QOS_FLAGS_UNDEFINED = -1,
        PM_QOS_FLAGS_NONE,
        PM_QOS_FLAGS_SOME,
        PM_QOS_FLAGS_ALL,
};

#define PM_QOS_DEFAULT_VALUE        (-1)
#define PM_QOS_LATENCY_ANY        S32_MAX
#define PM_QOS_LATENCY_ANY_NS        ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC)

#define PM_QOS_CPU_LATENCY_DEFAULT_VALUE        (2000 * USEC_PER_SEC)
#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE        PM_QOS_LATENCY_ANY
#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT        PM_QOS_LATENCY_ANY
#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS        PM_QOS_LATENCY_ANY_NS
#define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE        0
#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE        0
#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE        FREQ_QOS_MAX_DEFAULT_VALUE
#define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT        (-1)

#define PM_QOS_FLAG_NO_POWER_OFF        (1 << 0)

enum pm_qos_type {
        PM_QOS_UNITIALIZED,
        PM_QOS_MAX,                /* return the largest value */
        PM_QOS_MIN,                /* return the smallest value */
};

/*
 * Note: The lockless read path depends on the CPU accessing target_value
 * or effective_flags atomically.  Atomic access is only guaranteed on all CPU
 * types linux supports for 32 bit quantites
 */
struct pm_qos_constraints {
        struct plist_head list;
        s32 target_value;        /* Do not change to 64 bit */
        s32 default_value;
        s32 no_constraint_value;
        enum pm_qos_type type;
        struct blocking_notifier_head *notifiers;
};

struct pm_qos_request {
        struct plist_node node;
        struct pm_qos_constraints *qos;
};

struct pm_qos_flags_request {
        struct list_head node;
        s32 flags;        /* Do not change to 64 bit */
};

struct pm_qos_flags {
        struct list_head list;
        s32 effective_flags;        /* Do not change to 64 bit */
};


#define FREQ_QOS_MIN_DEFAULT_VALUE        0
#define FREQ_QOS_MAX_DEFAULT_VALUE        S32_MAX

enum freq_qos_req_type {
        FREQ_QOS_MIN = 1,
        FREQ_QOS_MAX,
};

struct freq_constraints {
        struct pm_qos_constraints min_freq;
        struct blocking_notifier_head min_freq_notifiers;
        struct pm_qos_constraints max_freq;
        struct blocking_notifier_head max_freq_notifiers;
};

struct freq_qos_request {
        enum freq_qos_req_type type;
        struct plist_node pnode;
        struct freq_constraints *qos;
};


enum dev_pm_qos_req_type {
        DEV_PM_QOS_RESUME_LATENCY = 1,
        DEV_PM_QOS_LATENCY_TOLERANCE,
        DEV_PM_QOS_MIN_FREQUENCY,
        DEV_PM_QOS_MAX_FREQUENCY,
        DEV_PM_QOS_FLAGS,
};

struct dev_pm_qos_request {
        enum dev_pm_qos_req_type type;
        union {
                struct plist_node pnode;
                struct pm_qos_flags_request flr;
                struct freq_qos_request freq;
        } data;
        struct device *dev;
};

struct dev_pm_qos {
        struct pm_qos_constraints resume_latency;
        struct pm_qos_constraints latency_tolerance;
        struct freq_constraints freq;
        struct pm_qos_flags flags;
        struct dev_pm_qos_request *resume_latency_req;
        struct dev_pm_qos_request *latency_tolerance_req;
        struct dev_pm_qos_request *flags_req;
};

/* Action requested to pm_qos_update_target */
enum pm_qos_req_action {
        PM_QOS_ADD_REQ,                /* Add a new request */
        PM_QOS_UPDATE_REQ,        /* Update an existing request */
        PM_QOS_REMOVE_REQ        /* Remove an existing request */
};

static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req)
{
        return req->dev != NULL;
}

s32 pm_qos_read_value(struct pm_qos_constraints *c);
int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
                         enum pm_qos_req_action action, int value);
bool pm_qos_update_flags(struct pm_qos_flags *pqf,
                         struct pm_qos_flags_request *req,
                         enum pm_qos_req_action action, s32 val);

#ifdef CONFIG_CPU_IDLE
s32 cpu_latency_qos_limit(void);
bool cpu_latency_qos_request_active(struct pm_qos_request *req);
void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value);
void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value);
void cpu_latency_qos_remove_request(struct pm_qos_request *req);
#else
static inline s32 cpu_latency_qos_limit(void) { return INT_MAX; }
static inline bool cpu_latency_qos_request_active(struct pm_qos_request *req)
{
        return false;
}
static inline void cpu_latency_qos_add_request(struct pm_qos_request *req,
                                               s32 value) {}
static inline void cpu_latency_qos_update_request(struct pm_qos_request *req,
                                                  s32 new_value) {}
static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) {}
#endif

#ifdef CONFIG_PM
enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask);
enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask);
s32 __dev_pm_qos_resume_latency(struct device *dev);
s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type);
int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
                           enum dev_pm_qos_req_type type, s32 value);
int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value);
int dev_pm_qos_remove_request(struct dev_pm_qos_request *req);
int dev_pm_qos_add_notifier(struct device *dev,
                            struct notifier_block *notifier,
                            enum dev_pm_qos_req_type type);
int dev_pm_qos_remove_notifier(struct device *dev,
                               struct notifier_block *notifier,
                               enum dev_pm_qos_req_type type);
void dev_pm_qos_constraints_init(struct device *dev);
void dev_pm_qos_constraints_destroy(struct device *dev);
int dev_pm_qos_add_ancestor_request(struct device *dev,
                                    struct dev_pm_qos_request *req,
                                    enum dev_pm_qos_req_type type, s32 value);
int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value);
void dev_pm_qos_hide_latency_limit(struct device *dev);
int dev_pm_qos_expose_flags(struct device *dev, s32 value);
void dev_pm_qos_hide_flags(struct device *dev);
int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set);
s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev);
int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val);
int dev_pm_qos_expose_latency_tolerance(struct device *dev);
void dev_pm_qos_hide_latency_tolerance(struct device *dev);

static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev)
{
        return dev->power.qos->resume_latency_req->data.pnode.prio;
}

static inline s32 dev_pm_qos_requested_flags(struct device *dev)
{
        return dev->power.qos->flags_req->data.flr.flags;
}

static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev)
{
        return IS_ERR_OR_NULL(dev->power.qos) ?
                PM_QOS_RESUME_LATENCY_NO_CONSTRAINT :
                pm_qos_read_value(&dev->power.qos->resume_latency);
}
#else
static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
                                                          s32 mask)
                        { return PM_QOS_FLAGS_UNDEFINED; }
static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev,
                                                        s32 mask)
                        { return PM_QOS_FLAGS_UNDEFINED; }
static inline s32 __dev_pm_qos_resume_latency(struct device *dev)
                        { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; }
static inline s32 dev_pm_qos_read_value(struct device *dev,
                                        enum dev_pm_qos_req_type type)
{
        switch (type) {
        case DEV_PM_QOS_RESUME_LATENCY:
                return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
        case DEV_PM_QOS_MIN_FREQUENCY:
                return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
        case DEV_PM_QOS_MAX_FREQUENCY:
                return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
        default:
                WARN_ON(1);
                return 0;
        }
}

static inline int dev_pm_qos_add_request(struct device *dev,
                                         struct dev_pm_qos_request *req,
                                         enum dev_pm_qos_req_type type,
                                         s32 value)
                        { return 0; }
static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req,
                                            s32 new_value)
                        { return 0; }
static inline int dev_pm_qos_remove_request(struct dev_pm_qos_request *req)
                        { return 0; }
static inline int dev_pm_qos_add_notifier(struct device *dev,
                                          struct notifier_block *notifier,
                                          enum dev_pm_qos_req_type type)
                        { return 0; }
static inline int dev_pm_qos_remove_notifier(struct device *dev,
                                             struct notifier_block *notifier,
                                             enum dev_pm_qos_req_type type)
                        { return 0; }
static inline void dev_pm_qos_constraints_init(struct device *dev)
{
        dev->power.power_state = PMSG_ON;
}
static inline void dev_pm_qos_constraints_destroy(struct device *dev)
{
        dev->power.power_state = PMSG_INVALID;
}
static inline int dev_pm_qos_add_ancestor_request(struct device *dev,
                                                  struct dev_pm_qos_request *req,
                                                  enum dev_pm_qos_req_type type,
                                                  s32 value)
                        { return 0; }
static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
                        { return 0; }
static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {}
static inline int dev_pm_qos_expose_flags(struct device *dev, s32 value)
                        { return 0; }
static inline void dev_pm_qos_hide_flags(struct device *dev) {}
static inline int dev_pm_qos_update_flags(struct device *dev, s32 m, bool set)
                        { return 0; }
static inline s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev)
                        { return PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; }
static inline int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val)
                        { return 0; }
static inline int dev_pm_qos_expose_latency_tolerance(struct device *dev)
                        { return 0; }
static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {}

static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev)
{
        return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
}
static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev)
{
        return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
}
#endif

static inline int freq_qos_request_active(struct freq_qos_request *req)
{
        return !IS_ERR_OR_NULL(req->qos);
}

void freq_constraints_init(struct freq_constraints *qos);

s32 freq_qos_read_value(struct freq_constraints *qos,
                        enum freq_qos_req_type type);

int freq_qos_add_request(struct freq_constraints *qos,
                         struct freq_qos_request *req,
                         enum freq_qos_req_type type, s32 value);
int freq_qos_update_request(struct freq_qos_request *req, s32 new_value);
int freq_qos_remove_request(struct freq_qos_request *req);
int freq_qos_apply(struct freq_qos_request *req,
                   enum pm_qos_req_action action, s32 value);

int freq_qos_add_notifier(struct freq_constraints *qos,
                          enum freq_qos_req_type type,
                          struct notifier_block *notifier);
int freq_qos_remove_notifier(struct freq_constraints *qos,
                             enum freq_qos_req_type type,
                             struct notifier_block *notifier);

#endif


















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Statically sized hash table implementation
 * (C) 2012  Sasha Levin <levinsasha928@gmail.com>
 */

#ifndef _LINUX_HASHTABLE_H
#define _LINUX_HASHTABLE_H

#include <linux/list.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/hash.h>
#include <linux/rculist.h>

#define DEFINE_HASHTABLE(name, bits)                                                \
        struct hlist_head name[1 << (bits)] =                                        \
                        { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits)                                \
        struct hlist_head name[1 << (bits)] __read_mostly =                        \
                        { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }

#define DECLARE_HASHTABLE(name, bits)                                           \
        struct hlist_head name[1 << (bits)]

#define HASH_SIZE(name) (ARRAY_SIZE(name))
#define HASH_BITS(name) ilog2(HASH_SIZE(name))

/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
#define hash_min(val, bits)                                                        \
        (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))

static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
{
        unsigned int i;

        for (i = 0; i < sz; i++)
                INIT_HLIST_HEAD(&ht[i]);
}

/**
 * hash_init - initialize a hash table
 * @hashtable: hashtable to be initialized
 *
 * Calculates the size of the hashtable from the given parameter, otherwise
 * same as hash_init_size.
 *
 * This has to be a macro since HASH_BITS() will not work on pointers since
 * it calculates the size during preprocessing.
 */
#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))

/**
 * hash_add - add an object to a hashtable
 * @hashtable: hashtable to add to
 * @node: the &struct hlist_node of the object to be added
 * @key: the key of the object to be added
 */
#define hash_add(hashtable, node, key)                                                \
        hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])

/**
 * hash_add_rcu - add an object to a rcu enabled hashtable
 * @hashtable: hashtable to add to
 * @node: the &struct hlist_node of the object to be added
 * @key: the key of the object to be added
 */
#define hash_add_rcu(hashtable, node, key)                                        \
        hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])

/**
 * hash_hashed - check whether an object is in any hashtable
 * @node: the &struct hlist_node of the object to be checked
 */
static inline bool hash_hashed(struct hlist_node *node)
{
        return !hlist_unhashed(node);
}

static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
{
        unsigned int i;

        for (i = 0; i < sz; i++)
                if (!hlist_empty(&ht[i]))
                        return false;

        return true;
}

/**
 * hash_empty - check whether a hashtable is empty
 * @hashtable: hashtable to check
 *
 * This has to be a macro since HASH_BITS() will not work on pointers since
 * it calculates the size during preprocessing.
 */
#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))

/**
 * hash_del - remove an object from a hashtable
 * @node: &struct hlist_node of the object to remove
 */
static inline void hash_del(struct hlist_node *node)
{
        hlist_del_init(node);
}

/**
 * hash_del_rcu - remove an object from a rcu enabled hashtable
 * @node: &struct hlist_node of the object to remove
 */
static inline void hash_del_rcu(struct hlist_node *node)
{
        hlist_del_init_rcu(node);
}

/**
 * hash_for_each - iterate over a hashtable
 * @name: hashtable to iterate
 * @bkt: integer to use as bucket loop cursor
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 */
#define hash_for_each(name, bkt, obj, member)                                \
        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
                        (bkt)++)\
                hlist_for_each_entry(obj, &name[bkt], member)

/**
 * hash_for_each_rcu - iterate over a rcu enabled hashtable
 * @name: hashtable to iterate
 * @bkt: integer to use as bucket loop cursor
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 */
#define hash_for_each_rcu(name, bkt, obj, member)                        \
        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
                        (bkt)++)\
                hlist_for_each_entry_rcu(obj, &name[bkt], member)

/**
 * hash_for_each_safe - iterate over a hashtable safe against removal of
 * hash entry
 * @name: hashtable to iterate
 * @bkt: integer to use as bucket loop cursor
 * @tmp: a &struct hlist_node used for temporary storage
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 */
#define hash_for_each_safe(name, bkt, tmp, obj, member)                        \
        for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
                        (bkt)++)\
                hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)

/**
 * hash_for_each_possible - iterate over all possible objects hashing to the
 * same bucket
 * @name: hashtable to iterate
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 * @key: the key of the objects to iterate over
 */
#define hash_for_each_possible(name, obj, member, key)                        \
        hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member)

/**
 * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
 * same bucket in an rcu enabled hashtable
 * @name: hashtable to iterate
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 * @key: the key of the objects to iterate over
 */
#define hash_for_each_possible_rcu(name, obj, member, key, cond...)        \
        hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
                member, ## cond)

/**
 * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing
 * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable
 * @name: hashtable to iterate
 * @obj: the type * to use as a loop cursor for each entry
 * @member: the name of the hlist_node within the struct
 * @key: the key of the objects to iterate over
 *
 * This is the same as hash_for_each_possible_rcu() except that it does
 * not do any RCU debugging or tracing.
 */
#define hash_for_each_possible_rcu_notrace(name, obj, member, key) \
        hlist_for_each_entry_rcu_notrace(obj, \
                &name[hash_min(key, HASH_BITS(name))], member)

/**
 * hash_for_each_possible_safe - iterate over all possible objects hashing to the
 * same bucket safe against removals
 * @name: hashtable to iterate
 * @obj: the type * to use as a loop cursor for each entry
 * @tmp: a &struct hlist_node used for temporary storage
 * @member: the name of the hlist_node within the struct
 * @key: the key of the objects to iterate over
 */
#define hash_for_each_possible_safe(name, obj, tmp, member, key)        \
        hlist_for_each_entry_safe(obj, tmp,\
                &name[hash_min(key, HASH_BITS(name))], member)


#endif



































































































































































































































































































    5 























































































































































    4 












    4 
    4 





























    3 







    3 











    3 









    3 
    3 
    3 
    3 
    3 



















    3 








    3 





































    4 



























    2 










    3 

    3 
    3 




    3 
















    3 
    3 



    3 
    4 
    3 


    4 


    3 
    3 

    3 





    3 
































    5 













    5 


































    5 





















    5 






    5 





    5 














    5 





    5 




    5 







    5 












    5 
    5 

    5 

    5 









    5 

    5 
    5 



    5 
































































































































































































































































































































































































    1 












    1 

















    1 






    1 








    1 



























































































































































































































































































































    8 




    8 

    8 




    8 




































































































































































































































































































































































    5 
























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * USB Network driver infrastructure
 * Copyright (C) 2000-2005 by David Brownell
 * Copyright (C) 2003-2005 David Hollis <dhollis@davehollis.com>
 */

/*
 * This is a generic "USB networking" framework that works with several
 * kinds of full and high speed networking devices:  host-to-host cables,
 * smart usb peripherals, and actual Ethernet adapters.
 *
 * These devices usually differ in terms of control protocols (if they
 * even have one!) and sometimes they define new framing to wrap or batch
 * Ethernet packets.  Otherwise, they talk to USB pretty much the same,
 * so interface (un)binding, endpoint I/O queues, fault handling, and other
 * issues can usefully be addressed by this framework.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ctype.h>
#include <linux/ethtool.h>
#include <linux/workqueue.h>
#include <linux/mii.h>
#include <linux/usb.h>
#include <linux/usb/usbnet.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/pm_runtime.h>

/*-------------------------------------------------------------------------*/

/*
 * Nineteen USB 1.1 max size bulk transactions per frame (ms), max.
 * Several dozen bytes of IPv4 data can fit in two such transactions.
 * One maximum size Ethernet packet takes twenty four of them.
 * For high speed, each frame comfortably fits almost 36 max size
 * Ethernet packets (so queues should be bigger).
 *
 * The goal is to let the USB host controller be busy for 5msec or
 * more before an irq is required, under load.  Jumbograms change
 * the equation.
 */
#define        MAX_QUEUE_MEMORY        (60 * 1518)
#define        RX_QLEN(dev)                ((dev)->rx_qlen)
#define        TX_QLEN(dev)                ((dev)->tx_qlen)

// reawaken network queue this soon after stopping; else watchdog barks
#define TX_TIMEOUT_JIFFIES        (5*HZ)

/* throttle rx/tx briefly after some faults, so hub_wq might disconnect()
 * us (it polls at HZ/4 usually) before we report too many false errors.
 */
#define THROTTLE_JIFFIES        (HZ/8)

// between wakeups
#define UNLINK_TIMEOUT_MS        3

/*-------------------------------------------------------------------------*/

// randomly generated ethernet address
static u8        node_id [ETH_ALEN];

/* use ethtool to change the level for any given device */
static int msg_level = -1;
module_param (msg_level, int, 0);
MODULE_PARM_DESC (msg_level, "Override default message level");

/*-------------------------------------------------------------------------*/

static const char * const usbnet_event_names[] = {
        [EVENT_TX_HALT]                   = "EVENT_TX_HALT",
        [EVENT_RX_HALT]                   = "EVENT_RX_HALT",
        [EVENT_RX_MEMORY]           = "EVENT_RX_MEMORY",
        [EVENT_STS_SPLIT]           = "EVENT_STS_SPLIT",
        [EVENT_LINK_RESET]           = "EVENT_LINK_RESET",
        [EVENT_RX_PAUSED]           = "EVENT_RX_PAUSED",
        [EVENT_DEV_ASLEEP]           = "EVENT_DEV_ASLEEP",
        [EVENT_DEV_OPEN]           = "EVENT_DEV_OPEN",
        [EVENT_DEVICE_REPORT_IDLE] = "EVENT_DEVICE_REPORT_IDLE",
        [EVENT_NO_RUNTIME_PM]           = "EVENT_NO_RUNTIME_PM",
        [EVENT_RX_KILL]                   = "EVENT_RX_KILL",
        [EVENT_LINK_CHANGE]           = "EVENT_LINK_CHANGE",
        [EVENT_SET_RX_MODE]           = "EVENT_SET_RX_MODE",
        [EVENT_NO_IP_ALIGN]           = "EVENT_NO_IP_ALIGN",
};

/* handles CDC Ethernet and many other network "bulk data" interfaces */
int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
{
        int                                tmp;
        struct usb_host_interface        *alt = NULL;
        struct usb_host_endpoint        *in = NULL, *out = NULL;
        struct usb_host_endpoint        *status = NULL;

        for (tmp = 0; tmp < intf->num_altsetting; tmp++) {
                unsigned        ep;

                in = out = status = NULL;
                alt = intf->altsetting + tmp;

                /* take the first altsetting with in-bulk + out-bulk;
                 * remember any status endpoint, just in case;
                 * ignore other endpoints and altsettings.
                 */
                for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
                        struct usb_host_endpoint        *e;
                        int                                intr = 0;

                        e = alt->endpoint + ep;

                        /* ignore endpoints which cannot transfer data */
                        if (!usb_endpoint_maxp(&e->desc))
                                continue;

                        switch (e->desc.bmAttributes) {
                        case USB_ENDPOINT_XFER_INT:
                                if (!usb_endpoint_dir_in(&e->desc))
                                        continue;
                                intr = 1;
                                fallthrough;
                        case USB_ENDPOINT_XFER_BULK:
                                break;
                        default:
                                continue;
                        }
                        if (usb_endpoint_dir_in(&e->desc)) {
                                if (!intr && !in)
                                        in = e;
                                else if (intr && !status)
                                        status = e;
                        } else {
                                if (!out)
                                        out = e;
                        }
                }
                if (in && out)
                        break;
        }
        if (!alt || !in || !out)
                return -EINVAL;

        if (alt->desc.bAlternateSetting != 0 ||
            !(dev->driver_info->flags & FLAG_NO_SETINT)) {
                tmp = usb_set_interface (dev->udev, alt->desc.bInterfaceNumber,
                                alt->desc.bAlternateSetting);
                if (tmp < 0)
                        return tmp;
        }

        dev->in = usb_rcvbulkpipe (dev->udev,
                        in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
        dev->out = usb_sndbulkpipe (dev->udev,
                        out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
        dev->status = status;
        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_get_endpoints);

int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress)
{
        u8                addr[ETH_ALEN];
        int                 tmp = -1, ret;
        unsigned char        buf [13];

        ret = usb_string(dev->udev, iMACAddress, buf, sizeof buf);
        if (ret == 12)
                tmp = hex2bin(addr, buf, 6);
        if (tmp < 0) {
                dev_dbg(&dev->udev->dev,
                        "bad MAC string %d fetch, %d\n", iMACAddress, tmp);
                if (ret >= 0)
                        ret = -EINVAL;
                return ret;
        }
        eth_hw_addr_set(dev->net, addr);
        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr);

static void intr_complete (struct urb *urb)
{
        struct usbnet        *dev = urb->context;
        int                status = urb->status;

        switch (status) {
        /* success */
        case 0:
                dev->driver_info->status(dev, urb);
                break;

        /* software-driven interface shutdown */
        case -ENOENT:                /* urb killed */
        case -ESHUTDOWN:        /* hardware gone */
                netif_dbg(dev, ifdown, dev->net,
                          "intr shutdown, code %d\n", status);
                return;

        /* NOTE:  not throttling like RX/TX, since this endpoint
         * already polls infrequently
         */
        default:
                netdev_dbg(dev->net, "intr status %d\n", status);
                break;
        }

        status = usb_submit_urb (urb, GFP_ATOMIC);
        if (status != 0)
                netif_err(dev, timer, dev->net,
                          "intr resubmit --> %d\n", status);
}

static int init_status (struct usbnet *dev, struct usb_interface *intf)
{
        char                *buf = NULL;
        unsigned        pipe = 0;
        unsigned        maxp;
        unsigned        period;

        if (!dev->driver_info->status)
                return 0;

        pipe = usb_rcvintpipe (dev->udev,
                        dev->status->desc.bEndpointAddress
                                & USB_ENDPOINT_NUMBER_MASK);
        maxp = usb_maxpacket(dev->udev, pipe);

        /* avoid 1 msec chatter:  min 8 msec poll rate */
        period = max ((int) dev->status->desc.bInterval,
                (dev->udev->speed == USB_SPEED_HIGH) ? 7 : 3);

        buf = kmalloc (maxp, GFP_KERNEL);
        if (buf) {
                dev->interrupt = usb_alloc_urb (0, GFP_KERNEL);
                if (!dev->interrupt) {
                        kfree (buf);
                        return -ENOMEM;
                } else {
                        usb_fill_int_urb(dev->interrupt, dev->udev, pipe,
                                buf, maxp, intr_complete, dev, period);
                        dev->interrupt->transfer_flags |= URB_FREE_BUFFER;
                        dev_dbg(&intf->dev,
                                "status ep%din, %d bytes period %d\n",
                                usb_pipeendpoint(pipe), maxp, period);
                }
        }
        return 0;
}

/* Submit the interrupt URB if not previously submitted, increasing refcount */
int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags)
{
        int ret = 0;

        WARN_ON_ONCE(dev->interrupt == NULL);
        if (dev->interrupt) {
                mutex_lock(&dev->interrupt_mutex);

                if (++dev->interrupt_count == 1)
                        ret = usb_submit_urb(dev->interrupt, mem_flags);

                dev_dbg(&dev->udev->dev, "incremented interrupt URB count to %d\n",
                        dev->interrupt_count);
                mutex_unlock(&dev->interrupt_mutex);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(usbnet_status_start);

/* For resume; submit interrupt URB if previously submitted */
static int __usbnet_status_start_force(struct usbnet *dev, gfp_t mem_flags)
{
        int ret = 0;

        mutex_lock(&dev->interrupt_mutex);
        if (dev->interrupt_count) {
                ret = usb_submit_urb(dev->interrupt, mem_flags);
                dev_dbg(&dev->udev->dev,
                        "submitted interrupt URB for resume\n");
        }
        mutex_unlock(&dev->interrupt_mutex);
        return ret;
}

/* Kill the interrupt URB if all submitters want it killed */
void usbnet_status_stop(struct usbnet *dev)
{
        if (dev->interrupt) {
                mutex_lock(&dev->interrupt_mutex);
                WARN_ON(dev->interrupt_count == 0);

                if (dev->interrupt_count && --dev->interrupt_count == 0)
                        usb_kill_urb(dev->interrupt);

                dev_dbg(&dev->udev->dev,
                        "decremented interrupt URB count to %d\n",
                        dev->interrupt_count);
                mutex_unlock(&dev->interrupt_mutex);
        }
}
EXPORT_SYMBOL_GPL(usbnet_status_stop);

/* For suspend; always kill interrupt URB */
static void __usbnet_status_stop_force(struct usbnet *dev)
{
        if (dev->interrupt) {
                mutex_lock(&dev->interrupt_mutex);
                usb_kill_urb(dev->interrupt);
                dev_dbg(&dev->udev->dev, "killed interrupt URB for suspend\n");
                mutex_unlock(&dev->interrupt_mutex);
        }
}

/* Passes this packet up the stack, updating its accounting.
 * Some link protocols batch packets, so their rx_fixup paths
 * can return clones as well as just modify the original skb.
 */
void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
{
        struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->net->tstats);
        unsigned long flags;
        int        status;

        if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
                skb_queue_tail(&dev->rxq_pause, skb);
                return;
        }

        /* only update if unset to allow minidriver rx_fixup override */
        if (skb->protocol == 0)
                skb->protocol = eth_type_trans (skb, dev->net);

        flags = u64_stats_update_begin_irqsave(&stats64->syncp);
        u64_stats_inc(&stats64->rx_packets);
        u64_stats_add(&stats64->rx_bytes, skb->len);
        u64_stats_update_end_irqrestore(&stats64->syncp, flags);

        netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n",
                  skb->len + sizeof (struct ethhdr), skb->protocol);
        memset (skb->cb, 0, sizeof (struct skb_data));

        if (skb_defer_rx_timestamp(skb))
                return;

        status = netif_rx (skb);
        if (status != NET_RX_SUCCESS)
                netif_dbg(dev, rx_err, dev->net,
                          "netif_rx status %d\n", status);
}
EXPORT_SYMBOL_GPL(usbnet_skb_return);

/* must be called if hard_mtu or rx_urb_size changed */
void usbnet_update_max_qlen(struct usbnet *dev)
{
        enum usb_device_speed speed = dev->udev->speed;

        if (!dev->rx_urb_size || !dev->hard_mtu)
                goto insanity;
        switch (speed) {
        case USB_SPEED_HIGH:
                dev->rx_qlen = MAX_QUEUE_MEMORY / dev->rx_urb_size;
                dev->tx_qlen = MAX_QUEUE_MEMORY / dev->hard_mtu;
                break;
        case USB_SPEED_SUPER:
        case USB_SPEED_SUPER_PLUS:
                /*
                 * Not take default 5ms qlen for super speed HC to
                 * save memory, and iperf tests show 2.5ms qlen can
                 * work well
                 */
                dev->rx_qlen = 5 * MAX_QUEUE_MEMORY / dev->rx_urb_size;
                dev->tx_qlen = 5 * MAX_QUEUE_MEMORY / dev->hard_mtu;
                break;
        default:
insanity:
                dev->rx_qlen = dev->tx_qlen = 4;
        }
}
EXPORT_SYMBOL_GPL(usbnet_update_max_qlen);


/*-------------------------------------------------------------------------
 *
 * Network Device Driver (peer link to "Host Device", from USB host)
 *
 *-------------------------------------------------------------------------*/

int usbnet_change_mtu (struct net_device *net, int new_mtu)
{
        struct usbnet        *dev = netdev_priv(net);
        int                ll_mtu = new_mtu + net->hard_header_len;
        int                old_hard_mtu = dev->hard_mtu;
        int                old_rx_urb_size = dev->rx_urb_size;

        // no second zero-length packet read wanted after mtu-sized packets
        if ((ll_mtu % dev->maxpacket) == 0)
                return -EDOM;
        net->mtu = new_mtu;

        dev->hard_mtu = net->mtu + net->hard_header_len;
        if (dev->rx_urb_size == old_hard_mtu) {
                dev->rx_urb_size = dev->hard_mtu;
                if (dev->rx_urb_size > old_rx_urb_size) {
                        usbnet_pause_rx(dev);
                        usbnet_unlink_rx_urbs(dev);
                        usbnet_resume_rx(dev);
                }
        }

        /* max qlen depend on hard_mtu and rx_urb_size */
        usbnet_update_max_qlen(dev);

        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_change_mtu);

/* The caller must hold list->lock */
static void __usbnet_queue_skb(struct sk_buff_head *list,
                        struct sk_buff *newsk, enum skb_state state)
{
        struct skb_data *entry = (struct skb_data *) newsk->cb;

        __skb_queue_tail(list, newsk);
        entry->state = state;
}

/*-------------------------------------------------------------------------*/

/* some LK 2.4 HCDs oopsed if we freed or resubmitted urbs from
 * completion callbacks.  2.5 should have fixed those bugs...
 */

static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
                struct sk_buff_head *list, enum skb_state state)
{
        unsigned long                flags;
        enum skb_state                 old_state;
        struct skb_data *entry = (struct skb_data *) skb->cb;

        spin_lock_irqsave(&list->lock, flags);
        old_state = entry->state;
        entry->state = state;
        __skb_unlink(skb, list);

        /* defer_bh() is never called with list == &dev->done.
         * spin_lock_nested() tells lockdep that it is OK to take
         * dev->done.lock here with list->lock held.
         */
        spin_lock_nested(&dev->done.lock, SINGLE_DEPTH_NESTING);

        __skb_queue_tail(&dev->done, skb);
        if (dev->done.qlen == 1)
                tasklet_schedule(&dev->bh);
        spin_unlock(&dev->done.lock);
        spin_unlock_irqrestore(&list->lock, flags);
        return old_state;
}

/* some work can't be done in tasklets, so we use keventd
 *
 * NOTE:  annoying asymmetry:  if it's active, schedule_work() fails,
 * but tasklet_schedule() doesn't.  hope the failure is rare.
 */
void usbnet_defer_kevent (struct usbnet *dev, int work)
{
        set_bit (work, &dev->flags);
        if (!schedule_work (&dev->kevent))
                netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]);
        else
                netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]);
}
EXPORT_SYMBOL_GPL(usbnet_defer_kevent);

/*-------------------------------------------------------------------------*/

static void rx_complete (struct urb *urb);

static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
{
        struct sk_buff                *skb;
        struct skb_data                *entry;
        int                        retval = 0;
        unsigned long                lockflags;
        size_t                        size = dev->rx_urb_size;

        /* prevent rx skb allocation when error ratio is high */
        if (test_bit(EVENT_RX_KILL, &dev->flags)) {
                usb_free_urb(urb);
                return -ENOLINK;
        }

        if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags))
                skb = __netdev_alloc_skb(dev->net, size, flags);
        else
                skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
        if (!skb) {
                netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
                usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
                usb_free_urb (urb);
                return -ENOMEM;
        }

        entry = (struct skb_data *) skb->cb;
        entry->urb = urb;
        entry->dev = dev;
        entry->length = 0;

        usb_fill_bulk_urb (urb, dev->udev, dev->in,
                skb->data, size, rx_complete, skb);

        spin_lock_irqsave (&dev->rxq.lock, lockflags);

        if (netif_running (dev->net) &&
            netif_device_present (dev->net) &&
            test_bit(EVENT_DEV_OPEN, &dev->flags) &&
            !test_bit (EVENT_RX_HALT, &dev->flags) &&
            !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) {
                switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) {
                case -EPIPE:
                        usbnet_defer_kevent (dev, EVENT_RX_HALT);
                        break;
                case -ENOMEM:
                        usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
                        break;
                case -ENODEV:
                        netif_dbg(dev, ifdown, dev->net, "device gone\n");
                        netif_device_detach (dev->net);
                        break;
                case -EHOSTUNREACH:
                        retval = -ENOLINK;
                        break;
                default:
                        netif_dbg(dev, rx_err, dev->net,
                                  "rx submit, %d\n", retval);
                        tasklet_schedule (&dev->bh);
                        break;
                case 0:
                        __usbnet_queue_skb(&dev->rxq, skb, rx_start);
                }
        } else {
                netif_dbg(dev, ifdown, dev->net, "rx: stopped\n");
                retval = -ENOLINK;
        }
        spin_unlock_irqrestore (&dev->rxq.lock, lockflags);
        if (retval) {
                dev_kfree_skb_any (skb);
                usb_free_urb (urb);
        }
        return retval;
}


/*-------------------------------------------------------------------------*/

static inline int rx_process(struct usbnet *dev, struct sk_buff *skb)
{
        if (dev->driver_info->rx_fixup &&
            !dev->driver_info->rx_fixup (dev, skb)) {
                /* With RX_ASSEMBLE, rx_fixup() must update counters */
                if (!(dev->driver_info->flags & FLAG_RX_ASSEMBLE))
                        dev->net->stats.rx_errors++;
                return -EPROTO;
        }
        // else network stack removes extra byte if we forced a short packet

        /* all data was already cloned from skb inside the driver */
        if (dev->driver_info->flags & FLAG_MULTI_PACKET)
                return -EALREADY;

        if (skb->len < ETH_HLEN) {
                dev->net->stats.rx_errors++;
                dev->net->stats.rx_length_errors++;
                netif_dbg(dev, rx_err, dev->net, "rx length %d\n", skb->len);
                return -EPROTO;
        }

        usbnet_skb_return(dev, skb);
        return 0;
}

/*-------------------------------------------------------------------------*/

static void rx_complete (struct urb *urb)
{
        struct sk_buff                *skb = (struct sk_buff *) urb->context;
        struct skb_data                *entry = (struct skb_data *) skb->cb;
        struct usbnet                *dev = entry->dev;
        int                        urb_status = urb->status;
        enum skb_state                state;

        skb_put (skb, urb->actual_length);
        state = rx_done;
        entry->urb = NULL;

        switch (urb_status) {
        /* success */
        case 0:
                break;

        /* stalls need manual reset. this is rare ... except that
         * when going through USB 2.0 TTs, unplug appears this way.
         * we avoid the highspeed version of the ETIMEDOUT/EILSEQ
         * storm, recovering as needed.
         */
        case -EPIPE:
                dev->net->stats.rx_errors++;
                usbnet_defer_kevent (dev, EVENT_RX_HALT);
                fallthrough;

        /* software-driven interface shutdown */
        case -ECONNRESET:                /* async unlink */
        case -ESHUTDOWN:                /* hardware gone */
                netif_dbg(dev, ifdown, dev->net,
                          "rx shutdown, code %d\n", urb_status);
                goto block;

        /* we get controller i/o faults during hub_wq disconnect() delays.
         * throttle down resubmits, to avoid log floods; just temporarily,
         * so we still recover when the fault isn't a hub_wq delay.
         */
        case -EPROTO:
        case -ETIME:
        case -EILSEQ:
                dev->net->stats.rx_errors++;
                if (!timer_pending (&dev->delay)) {
                        mod_timer (&dev->delay, jiffies + THROTTLE_JIFFIES);
                        netif_dbg(dev, link, dev->net,
                                  "rx throttle %d\n", urb_status);
                }
block:
                state = rx_cleanup;
                entry->urb = urb;
                urb = NULL;
                break;

        /* data overrun ... flush fifo? */
        case -EOVERFLOW:
                dev->net->stats.rx_over_errors++;
                fallthrough;

        default:
                state = rx_cleanup;
                dev->net->stats.rx_errors++;
                netif_dbg(dev, rx_err, dev->net, "rx status %d\n", urb_status);
                break;
        }

        /* stop rx if packet error rate is high */
        if (++dev->pkt_cnt > 30) {
                dev->pkt_cnt = 0;
                dev->pkt_err = 0;
        } else {
                if (state == rx_cleanup)
                        dev->pkt_err++;
                if (dev->pkt_err > 20)
                        set_bit(EVENT_RX_KILL, &dev->flags);
        }

        state = defer_bh(dev, skb, &dev->rxq, state);

        if (urb) {
                if (netif_running (dev->net) &&
                    !test_bit (EVENT_RX_HALT, &dev->flags) &&
                    state != unlink_start) {
                        rx_submit (dev, urb, GFP_ATOMIC);
                        usb_mark_last_busy(dev->udev);
                        return;
                }
                usb_free_urb (urb);
        }
        netif_dbg(dev, rx_err, dev->net, "no read resubmitted\n");
}

/*-------------------------------------------------------------------------*/
void usbnet_pause_rx(struct usbnet *dev)
{
        set_bit(EVENT_RX_PAUSED, &dev->flags);

        netif_dbg(dev, rx_status, dev->net, "paused rx queue enabled\n");
}
EXPORT_SYMBOL_GPL(usbnet_pause_rx);

void usbnet_resume_rx(struct usbnet *dev)
{
        struct sk_buff *skb;
        int num = 0;

        clear_bit(EVENT_RX_PAUSED, &dev->flags);

        while ((skb = skb_dequeue(&dev->rxq_pause)) != NULL) {
                usbnet_skb_return(dev, skb);
                num++;
        }

        tasklet_schedule(&dev->bh);

        netif_dbg(dev, rx_status, dev->net,
                  "paused rx queue disabled, %d skbs requeued\n", num);
}
EXPORT_SYMBOL_GPL(usbnet_resume_rx);

void usbnet_purge_paused_rxq(struct usbnet *dev)
{
        skb_queue_purge(&dev->rxq_pause);
}
EXPORT_SYMBOL_GPL(usbnet_purge_paused_rxq);

/*-------------------------------------------------------------------------*/

// unlink pending rx/tx; completion handlers do all other cleanup

static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
{
        unsigned long                flags;
        struct sk_buff                *skb;
        int                        count = 0;

        spin_lock_irqsave (&q->lock, flags);
        while (!skb_queue_empty(q)) {
                struct skb_data                *entry;
                struct urb                *urb;
                int                        retval;

                skb_queue_walk(q, skb) {
                        entry = (struct skb_data *) skb->cb;
                        if (entry->state != unlink_start)
                                goto found;
                }
                break;
found:
                entry->state = unlink_start;
                urb = entry->urb;

                /*
                 * Get reference count of the URB to avoid it to be
                 * freed during usb_unlink_urb, which may trigger
                 * use-after-free problem inside usb_unlink_urb since
                 * usb_unlink_urb is always racing with .complete
                 * handler(include defer_bh).
                 */
                usb_get_urb(urb);
                spin_unlock_irqrestore(&q->lock, flags);
                // during some PM-driven resume scenarios,
                // these (async) unlinks complete immediately
                retval = usb_unlink_urb (urb);
                if (retval != -EINPROGRESS && retval != 0)
                        netdev_dbg(dev->net, "unlink urb err, %d\n", retval);
                else
                        count++;
                usb_put_urb(urb);
                spin_lock_irqsave(&q->lock, flags);
        }
        spin_unlock_irqrestore (&q->lock, flags);
        return count;
}

// Flush all pending rx urbs
// minidrivers may need to do this when the MTU changes

void usbnet_unlink_rx_urbs(struct usbnet *dev)
{
        if (netif_running(dev->net)) {
                (void) unlink_urbs (dev, &dev->rxq);
                tasklet_schedule(&dev->bh);
        }
}
EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);

/*-------------------------------------------------------------------------*/

static void wait_skb_queue_empty(struct sk_buff_head *q)
{
        unsigned long flags;

        spin_lock_irqsave(&q->lock, flags);
        while (!skb_queue_empty(q)) {
                spin_unlock_irqrestore(&q->lock, flags);
                schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
                set_current_state(TASK_UNINTERRUPTIBLE);
                spin_lock_irqsave(&q->lock, flags);
        }
        spin_unlock_irqrestore(&q->lock, flags);
}

// precondition: never called in_interrupt
static void usbnet_terminate_urbs(struct usbnet *dev)
{
        DECLARE_WAITQUEUE(wait, current);
        int temp;

        /* ensure there are no more active urbs */
        add_wait_queue(&dev->wait, &wait);
        set_current_state(TASK_UNINTERRUPTIBLE);
        temp = unlink_urbs(dev, &dev->txq) +
                unlink_urbs(dev, &dev->rxq);

        /* maybe wait for deletions to finish. */
        wait_skb_queue_empty(&dev->rxq);
        wait_skb_queue_empty(&dev->txq);
        wait_skb_queue_empty(&dev->done);
        netif_dbg(dev, ifdown, dev->net,
                  "waited for %d urb completions\n", temp);
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&dev->wait, &wait);
}

int usbnet_stop (struct net_device *net)
{
        struct usbnet                *dev = netdev_priv(net);
        const struct driver_info *info = dev->driver_info;
        int                        retval, pm, mpn;

        clear_bit(EVENT_DEV_OPEN, &dev->flags);
        netif_stop_queue (net);

        netif_info(dev, ifdown, dev->net,
                   "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n",
                   net->stats.rx_packets, net->stats.tx_packets,
                   net->stats.rx_errors, net->stats.tx_errors);

        /* to not race resume */
        pm = usb_autopm_get_interface(dev->intf);
        /* allow minidriver to stop correctly (wireless devices to turn off
         * radio etc) */
        if (info->stop) {
                retval = info->stop(dev);
                if (retval < 0)
                        netif_info(dev, ifdown, dev->net,
                                   "stop fail (%d) usbnet usb-%s-%s, %s\n",
                                   retval,
                                   dev->udev->bus->bus_name, dev->udev->devpath,
                                   info->description);
        }

        if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
                usbnet_terminate_urbs(dev);

        usbnet_status_stop(dev);

        usbnet_purge_paused_rxq(dev);

        mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags);

        /* deferred work (timer, softirq, task) must also stop */
        dev->flags = 0;
        del_timer_sync (&dev->delay);
        tasklet_kill (&dev->bh);
        cancel_work_sync(&dev->kevent);
        if (!pm)
                usb_autopm_put_interface(dev->intf);

        if (info->manage_power && mpn)
                info->manage_power(dev, 0);
        else
                usb_autopm_put_interface(dev->intf);

        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_stop);

/*-------------------------------------------------------------------------*/

// posts reads, and enables write queuing

// precondition: never called in_interrupt

int usbnet_open (struct net_device *net)
{
        struct usbnet                *dev = netdev_priv(net);
        int                        retval;
        const struct driver_info *info = dev->driver_info;

        if ((retval = usb_autopm_get_interface(dev->intf)) < 0) {
                netif_info(dev, ifup, dev->net,
                           "resumption fail (%d) usbnet usb-%s-%s, %s\n",
                           retval,
                           dev->udev->bus->bus_name,
                           dev->udev->devpath,
                           info->description);
                goto done_nopm;
        }

        // put into "known safe" state
        if (info->reset && (retval = info->reset (dev)) < 0) {
                netif_info(dev, ifup, dev->net,
                           "open reset fail (%d) usbnet usb-%s-%s, %s\n",
                           retval,
                           dev->udev->bus->bus_name,
                           dev->udev->devpath,
                           info->description);
                goto done;
        }

        /* hard_mtu or rx_urb_size may change in reset() */
        usbnet_update_max_qlen(dev);

        // insist peer be connected
        if (info->check_connect && (retval = info->check_connect (dev)) < 0) {
                netif_err(dev, ifup, dev->net, "can't open; %d\n", retval);
                goto done;
        }

        /* start any status interrupt transfer */
        if (dev->interrupt) {
                retval = usbnet_status_start(dev, GFP_KERNEL);
                if (retval < 0) {
                        netif_err(dev, ifup, dev->net,
                                  "intr submit %d\n", retval);
                        goto done;
                }
        }

        set_bit(EVENT_DEV_OPEN, &dev->flags);
        netif_start_queue (net);
        netif_info(dev, ifup, dev->net,
                   "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n",
                   (int)RX_QLEN(dev), (int)TX_QLEN(dev),
                   dev->net->mtu,
                   (dev->driver_info->flags & FLAG_FRAMING_NC) ? "NetChip" :
                   (dev->driver_info->flags & FLAG_FRAMING_GL) ? "GeneSys" :
                   (dev->driver_info->flags & FLAG_FRAMING_Z) ? "Zaurus" :
                   (dev->driver_info->flags & FLAG_FRAMING_RN) ? "RNDIS" :
                   (dev->driver_info->flags & FLAG_FRAMING_AX) ? "ASIX" :
                   "simple");

        /* reset rx error state */
        dev->pkt_cnt = 0;
        dev->pkt_err = 0;
        clear_bit(EVENT_RX_KILL, &dev->flags);

        // delay posting reads until we're fully open
        tasklet_schedule (&dev->bh);
        if (info->manage_power) {
                retval = info->manage_power(dev, 1);
                if (retval < 0) {
                        retval = 0;
                        set_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
                } else {
                        usb_autopm_put_interface(dev->intf);
                }
        }
        return retval;
done:
        usb_autopm_put_interface(dev->intf);
done_nopm:
        return retval;
}
EXPORT_SYMBOL_GPL(usbnet_open);

/*-------------------------------------------------------------------------*/

/* ethtool methods; minidrivers may need to add some more, but
 * they'll probably want to use this base set.
 */

/* These methods are written on the assumption that the device
 * uses MII
 */
int usbnet_get_link_ksettings_mii(struct net_device *net,
                              struct ethtool_link_ksettings *cmd)
{
        struct usbnet *dev = netdev_priv(net);

        if (!dev->mii.mdio_read)
                return -EOPNOTSUPP;

        mii_ethtool_get_link_ksettings(&dev->mii, cmd);

        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings_mii);

int usbnet_get_link_ksettings_internal(struct net_device *net,
                                        struct ethtool_link_ksettings *cmd)
{
        struct usbnet *dev = netdev_priv(net);

        /* the assumption that speed is equal on tx and rx
         * is deeply engrained into the networking layer.
         * For wireless stuff it is not true.
         * We assume that rx_speed matters more.
         */
        if (dev->rx_speed != SPEED_UNSET)
                cmd->base.speed = dev->rx_speed / 1000000;
        else if (dev->tx_speed != SPEED_UNSET)
                cmd->base.speed = dev->tx_speed / 1000000;
        else
                cmd->base.speed = SPEED_UNKNOWN;

        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings_internal);

int usbnet_set_link_ksettings_mii(struct net_device *net,
                              const struct ethtool_link_ksettings *cmd)
{
        struct usbnet *dev = netdev_priv(net);
        int retval;

        if (!dev->mii.mdio_write)
                return -EOPNOTSUPP;

        retval = mii_ethtool_set_link_ksettings(&dev->mii, cmd);

        /* link speed/duplex might have changed */
        if (dev->driver_info->link_reset)
                dev->driver_info->link_reset(dev);

        /* hard_mtu or rx_urb_size may change in link_reset() */
        usbnet_update_max_qlen(dev);

        return retval;
}
EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings_mii);

u32 usbnet_get_link (struct net_device *net)
{
        struct usbnet *dev = netdev_priv(net);

        /* If a check_connect is defined, return its result */
        if (dev->driver_info->check_connect)
                return dev->driver_info->check_connect (dev) == 0;

        /* if the device has mii operations, use those */
        if (dev->mii.mdio_read)
                return mii_link_ok(&dev->mii);

        /* Otherwise, dtrt for drivers calling netif_carrier_{on,off} */
        return ethtool_op_get_link(net);
}
EXPORT_SYMBOL_GPL(usbnet_get_link);

int usbnet_nway_reset(struct net_device *net)
{
        struct usbnet *dev = netdev_priv(net);

        if (!dev->mii.mdio_write)
                return -EOPNOTSUPP;

        return mii_nway_restart(&dev->mii);
}
EXPORT_SYMBOL_GPL(usbnet_nway_reset);

void usbnet_get_drvinfo (struct net_device *net, struct ethtool_drvinfo *info)
{
        struct usbnet *dev = netdev_priv(net);

        strscpy(info->driver, dev->driver_name, sizeof(info->driver));
        strscpy(info->fw_version, dev->driver_info->description,
                sizeof(info->fw_version));
        usb_make_path (dev->udev, info->bus_info, sizeof info->bus_info);
}
EXPORT_SYMBOL_GPL(usbnet_get_drvinfo);

u32 usbnet_get_msglevel (struct net_device *net)
{
        struct usbnet *dev = netdev_priv(net);

        return dev->msg_enable;
}
EXPORT_SYMBOL_GPL(usbnet_get_msglevel);

void usbnet_set_msglevel (struct net_device *net, u32 level)
{
        struct usbnet *dev = netdev_priv(net);

        dev->msg_enable = level;
}
EXPORT_SYMBOL_GPL(usbnet_set_msglevel);

/* drivers may override default ethtool_ops in their bind() routine */
static const struct ethtool_ops usbnet_ethtool_ops = {
        .get_link                = usbnet_get_link,
        .nway_reset                = usbnet_nway_reset,
        .get_drvinfo                = usbnet_get_drvinfo,
        .get_msglevel                = usbnet_get_msglevel,
        .set_msglevel                = usbnet_set_msglevel,
        .get_ts_info                = ethtool_op_get_ts_info,
        .get_link_ksettings        = usbnet_get_link_ksettings_mii,
        .set_link_ksettings        = usbnet_set_link_ksettings_mii,
};

/*-------------------------------------------------------------------------*/

static void __handle_link_change(struct usbnet *dev)
{
        if (!test_bit(EVENT_DEV_OPEN, &dev->flags))
                return;

        if (!netif_carrier_ok(dev->net)) {
                /* kill URBs for reading packets to save bus bandwidth */
                unlink_urbs(dev, &dev->rxq);

                /*
                 * tx_timeout will unlink URBs for sending packets and
                 * tx queue is stopped by netcore after link becomes off
                 */
        } else {
                /* submitting URBs for reading packets */
                tasklet_schedule(&dev->bh);
        }

        /* hard_mtu or rx_urb_size may change during link change */
        usbnet_update_max_qlen(dev);

        clear_bit(EVENT_LINK_CHANGE, &dev->flags);
}

void usbnet_set_rx_mode(struct net_device *net)
{
        struct usbnet                *dev = netdev_priv(net);

        usbnet_defer_kevent(dev, EVENT_SET_RX_MODE);
}
EXPORT_SYMBOL_GPL(usbnet_set_rx_mode);

static void __handle_set_rx_mode(struct usbnet *dev)
{
        if (dev->driver_info->set_rx_mode)
                (dev->driver_info->set_rx_mode)(dev);

        clear_bit(EVENT_SET_RX_MODE, &dev->flags);
}

/* work that cannot be done in interrupt context uses keventd.
 *
 * NOTE:  with 2.5 we could do more of this using completion callbacks,
 * especially now that control transfers can be queued.
 */
static void
usbnet_deferred_kevent (struct work_struct *work)
{
        struct usbnet                *dev =
                container_of(work, struct usbnet, kevent);
        int                        status;

        /* usb_clear_halt() needs a thread context */
        if (test_bit (EVENT_TX_HALT, &dev->flags)) {
                unlink_urbs (dev, &dev->txq);
                status = usb_autopm_get_interface(dev->intf);
                if (status < 0)
                        goto fail_pipe;
                status = usb_clear_halt (dev->udev, dev->out);
                usb_autopm_put_interface(dev->intf);
                if (status < 0 &&
                    status != -EPIPE &&
                    status != -ESHUTDOWN) {
                        if (netif_msg_tx_err (dev))
fail_pipe:
                                netdev_err(dev->net, "can't clear tx halt, status %d\n",
                                           status);
                } else {
                        clear_bit (EVENT_TX_HALT, &dev->flags);
                        if (status != -ESHUTDOWN)
                                netif_wake_queue (dev->net);
                }
        }
        if (test_bit (EVENT_RX_HALT, &dev->flags)) {
                unlink_urbs (dev, &dev->rxq);
                status = usb_autopm_get_interface(dev->intf);
                if (status < 0)
                        goto fail_halt;
                status = usb_clear_halt (dev->udev, dev->in);
                usb_autopm_put_interface(dev->intf);
                if (status < 0 &&
                    status != -EPIPE &&
                    status != -ESHUTDOWN) {
                        if (netif_msg_rx_err (dev))
fail_halt:
                                netdev_err(dev->net, "can't clear rx halt, status %d\n",
                                           status);
                } else {
                        clear_bit (EVENT_RX_HALT, &dev->flags);
                        tasklet_schedule (&dev->bh);
                }
        }

        /* tasklet could resubmit itself forever if memory is tight */
        if (test_bit (EVENT_RX_MEMORY, &dev->flags)) {
                struct urb        *urb = NULL;
                int resched = 1;

                if (netif_running (dev->net))
                        urb = usb_alloc_urb (0, GFP_KERNEL);
                else
                        clear_bit (EVENT_RX_MEMORY, &dev->flags);
                if (urb != NULL) {
                        clear_bit (EVENT_RX_MEMORY, &dev->flags);
                        status = usb_autopm_get_interface(dev->intf);
                        if (status < 0) {
                                usb_free_urb(urb);
                                goto fail_lowmem;
                        }
                        if (rx_submit (dev, urb, GFP_KERNEL) == -ENOLINK)
                                resched = 0;
                        usb_autopm_put_interface(dev->intf);
fail_lowmem:
                        if (resched)
                                tasklet_schedule (&dev->bh);
                }
        }

        if (test_bit (EVENT_LINK_RESET, &dev->flags)) {
                const struct driver_info *info = dev->driver_info;
                int                        retval = 0;

                clear_bit (EVENT_LINK_RESET, &dev->flags);
                status = usb_autopm_get_interface(dev->intf);
                if (status < 0)
                        goto skip_reset;
                if(info->link_reset && (retval = info->link_reset(dev)) < 0) {
                        usb_autopm_put_interface(dev->intf);
skip_reset:
                        netdev_info(dev->net, "link reset failed (%d) usbnet usb-%s-%s, %s\n",
                                    retval,
                                    dev->udev->bus->bus_name,
                                    dev->udev->devpath,
                                    info->description);
                } else {
                        usb_autopm_put_interface(dev->intf);
                }

                /* handle link change from link resetting */
                __handle_link_change(dev);
        }

        if (test_bit (EVENT_LINK_CHANGE, &dev->flags))
                __handle_link_change(dev);

        if (test_bit (EVENT_SET_RX_MODE, &dev->flags))
                __handle_set_rx_mode(dev);


        if (dev->flags)
                netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags);
}

/*-------------------------------------------------------------------------*/

static void tx_complete (struct urb *urb)
{
        struct sk_buff                *skb = (struct sk_buff *) urb->context;
        struct skb_data                *entry = (struct skb_data *) skb->cb;
        struct usbnet                *dev = entry->dev;

        if (urb->status == 0) {
                struct pcpu_sw_netstats *stats64 = this_cpu_ptr(dev->net->tstats);
                unsigned long flags;

                flags = u64_stats_update_begin_irqsave(&stats64->syncp);
                u64_stats_add(&stats64->tx_packets, entry->packets);
                u64_stats_add(&stats64->tx_bytes, entry->length);
                u64_stats_update_end_irqrestore(&stats64->syncp, flags);
        } else {
                dev->net->stats.tx_errors++;

                switch (urb->status) {
                case -EPIPE:
                        usbnet_defer_kevent (dev, EVENT_TX_HALT);
                        break;

                /* software-driven interface shutdown */
                case -ECONNRESET:                // async unlink
                case -ESHUTDOWN:                // hardware gone
                        break;

                /* like rx, tx gets controller i/o faults during hub_wq
                 * delays and so it uses the same throttling mechanism.
                 */
                case -EPROTO:
                case -ETIME:
                case -EILSEQ:
                        usb_mark_last_busy(dev->udev);
                        if (!timer_pending (&dev->delay)) {
                                mod_timer (&dev->delay,
                                        jiffies + THROTTLE_JIFFIES);
                                netif_dbg(dev, link, dev->net,
                                          "tx throttle %d\n", urb->status);
                        }
                        netif_stop_queue (dev->net);
                        break;
                default:
                        netif_dbg(dev, tx_err, dev->net,
                                  "tx err %d\n", entry->urb->status);
                        break;
                }
        }

        usb_autopm_put_interface_async(dev->intf);
        (void) defer_bh(dev, skb, &dev->txq, tx_done);
}

/*-------------------------------------------------------------------------*/

void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue)
{
        struct usbnet                *dev = netdev_priv(net);

        unlink_urbs (dev, &dev->txq);
        tasklet_schedule (&dev->bh);
        /* this needs to be handled individually because the generic layer
         * doesn't know what is sufficient and could not restore private
         * information if a remedy of an unconditional reset were used.
         */
        if (dev->driver_info->recover)
                (dev->driver_info->recover)(dev);
}
EXPORT_SYMBOL_GPL(usbnet_tx_timeout);

/*-------------------------------------------------------------------------*/

static int build_dma_sg(const struct sk_buff *skb, struct urb *urb)
{
        unsigned num_sgs, total_len = 0;
        int i, s = 0;

        num_sgs = skb_shinfo(skb)->nr_frags + 1;
        if (num_sgs == 1)
                return 0;

        /* reserve one for zero packet */
        urb->sg = kmalloc_array(num_sgs + 1, sizeof(struct scatterlist),
                                GFP_ATOMIC);
        if (!urb->sg)
                return -ENOMEM;

        urb->num_sgs = num_sgs;
        sg_init_table(urb->sg, urb->num_sgs + 1);

        sg_set_buf(&urb->sg[s++], skb->data, skb_headlen(skb));
        total_len += skb_headlen(skb);

        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *f = &skb_shinfo(skb)->frags[i];

                total_len += skb_frag_size(f);
                sg_set_page(&urb->sg[i + s], skb_frag_page(f), skb_frag_size(f),
                            skb_frag_off(f));
        }
        urb->transfer_buffer_length = total_len;

        return 1;
}

netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
                                     struct net_device *net)
{
        struct usbnet                *dev = netdev_priv(net);
        unsigned int                        length;
        struct urb                *urb = NULL;
        struct skb_data                *entry;
        const struct driver_info *info = dev->driver_info;
        unsigned long                flags;
        int retval;

        if (skb)
                skb_tx_timestamp(skb);

        // some devices want funky USB-level framing, for
        // win32 driver (usually) and/or hardware quirks
        if (info->tx_fixup) {
                skb = info->tx_fixup (dev, skb, GFP_ATOMIC);
                if (!skb) {
                        /* packet collected; minidriver waiting for more */
                        if (info->flags & FLAG_MULTI_PACKET)
                                goto not_drop;
                        netif_dbg(dev, tx_err, dev->net, "can't tx_fixup skb\n");
                        goto drop;
                }
        }

        if (!(urb = usb_alloc_urb (0, GFP_ATOMIC))) {
                netif_dbg(dev, tx_err, dev->net, "no urb\n");
                goto drop;
        }

        entry = (struct skb_data *) skb->cb;
        entry->urb = urb;
        entry->dev = dev;

        usb_fill_bulk_urb (urb, dev->udev, dev->out,
                        skb->data, skb->len, tx_complete, skb);
        if (dev->can_dma_sg) {
                if (build_dma_sg(skb, urb) < 0)
                        goto drop;
        }
        length = urb->transfer_buffer_length;

        /* don't assume the hardware handles USB_ZERO_PACKET
         * NOTE:  strictly conforming cdc-ether devices should expect
         * the ZLP here, but ignore the one-byte packet.
         * NOTE2: CDC NCM specification is different from CDC ECM when
         * handling ZLP/short packets, so cdc_ncm driver will make short
         * packet itself if needed.
         */
        if (length % dev->maxpacket == 0) {
                if (!(info->flags & FLAG_SEND_ZLP)) {
                        if (!(info->flags & FLAG_MULTI_PACKET)) {
                                length++;
                                if (skb_tailroom(skb) && !urb->num_sgs) {
                                        skb->data[skb->len] = 0;
                                        __skb_put(skb, 1);
                                } else if (urb->num_sgs)
                                        sg_set_buf(&urb->sg[urb->num_sgs++],
                                                        dev->padding_pkt, 1);
                        }
                } else
                        urb->transfer_flags |= URB_ZERO_PACKET;
        }
        urb->transfer_buffer_length = length;

        if (info->flags & FLAG_MULTI_PACKET) {
                /* Driver has set number of packets and a length delta.
                 * Calculate the complete length and ensure that it's
                 * positive.
                 */
                entry->length += length;
                if (WARN_ON_ONCE(entry->length <= 0))
                        entry->length = length;
        } else {
                usbnet_set_skb_tx_stats(skb, 1, length);
        }

        spin_lock_irqsave(&dev->txq.lock, flags);
        retval = usb_autopm_get_interface_async(dev->intf);
        if (retval < 0) {
                spin_unlock_irqrestore(&dev->txq.lock, flags);
                goto drop;
        }
        if (netif_queue_stopped(net)) {
                usb_autopm_put_interface_async(dev->intf);
                spin_unlock_irqrestore(&dev->txq.lock, flags);
                goto drop;
        }

#ifdef CONFIG_PM
        /* if this triggers the device is still a sleep */
        if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) {
                /* transmission will be done in resume */
                usb_anchor_urb(urb, &dev->deferred);
                /* no use to process more packets */
                netif_stop_queue(net);
                usb_put_urb(urb);
                spin_unlock_irqrestore(&dev->txq.lock, flags);
                netdev_dbg(dev->net, "Delaying transmission for resumption\n");
                goto deferred;
        }
#endif

        switch ((retval = usb_submit_urb (urb, GFP_ATOMIC))) {
        case -EPIPE:
                netif_stop_queue (net);
                usbnet_defer_kevent (dev, EVENT_TX_HALT);
                usb_autopm_put_interface_async(dev->intf);
                break;
        default:
                usb_autopm_put_interface_async(dev->intf);
                netif_dbg(dev, tx_err, dev->net,
                          "tx: submit urb err %d\n", retval);
                break;
        case 0:
                netif_trans_update(net);
                __usbnet_queue_skb(&dev->txq, skb, tx_start);
                if (dev->txq.qlen >= TX_QLEN (dev))
                        netif_stop_queue (net);
        }
        spin_unlock_irqrestore (&dev->txq.lock, flags);

        if (retval) {
                netif_dbg(dev, tx_err, dev->net, "drop, code %d\n", retval);
drop:
                dev->net->stats.tx_dropped++;
not_drop:
                if (skb)
                        dev_kfree_skb_any (skb);
                if (urb) {
                        kfree(urb->sg);
                        usb_free_urb(urb);
                }
        } else
                netif_dbg(dev, tx_queued, dev->net,
                          "> tx, len %u, type 0x%x\n", length, skb->protocol);
#ifdef CONFIG_PM
deferred:
#endif
        return NETDEV_TX_OK;
}
EXPORT_SYMBOL_GPL(usbnet_start_xmit);

static int rx_alloc_submit(struct usbnet *dev, gfp_t flags)
{
        struct urb        *urb;
        int                i;
        int                ret = 0;

        /* don't refill the queue all at once */
        for (i = 0; i < 10 && dev->rxq.qlen < RX_QLEN(dev); i++) {
                urb = usb_alloc_urb(0, flags);
                if (urb != NULL) {
                        ret = rx_submit(dev, urb, flags);
                        if (ret)
                                goto err;
                } else {
                        ret = -ENOMEM;
                        goto err;
                }
        }
err:
        return ret;
}

static inline void usb_free_skb(struct sk_buff *skb)
{
        struct skb_data *entry = (struct skb_data *)skb->cb;

        usb_free_urb(entry->urb);
        dev_kfree_skb(skb);
}

/*-------------------------------------------------------------------------*/

// tasklet (work deferred from completions, in_irq) or timer

static void usbnet_bh (struct timer_list *t)
{
        struct usbnet                *dev = from_timer(dev, t, delay);
        struct sk_buff                *skb;
        struct skb_data                *entry;

        while ((skb = skb_dequeue (&dev->done))) {
                entry = (struct skb_data *) skb->cb;
                switch (entry->state) {
                case rx_done:
                        if (rx_process(dev, skb))
                                usb_free_skb(skb);
                        continue;
                case tx_done:
                        kfree(entry->urb->sg);
                        fallthrough;
                case rx_cleanup:
                        usb_free_skb(skb);
                        continue;
                default:
                        netdev_dbg(dev->net, "bogus skb state %d\n", entry->state);
                }
        }

        /* restart RX again after disabling due to high error rate */
        clear_bit(EVENT_RX_KILL, &dev->flags);

        /* waiting for all pending urbs to complete?
         * only then can we forgo submitting anew
         */
        if (waitqueue_active(&dev->wait)) {
                if (dev->txq.qlen + dev->rxq.qlen + dev->done.qlen == 0)
                        wake_up_all(&dev->wait);

        // or are we maybe short a few urbs?
        } else if (netif_running (dev->net) &&
                   netif_device_present (dev->net) &&
                   netif_carrier_ok(dev->net) &&
                   !timer_pending(&dev->delay) &&
                   !test_bit(EVENT_RX_PAUSED, &dev->flags) &&
                   !test_bit(EVENT_RX_HALT, &dev->flags)) {
                int        temp = dev->rxq.qlen;

                if (temp < RX_QLEN(dev)) {
                        if (rx_alloc_submit(dev, GFP_ATOMIC) == -ENOLINK)
                                return;
                        if (temp != dev->rxq.qlen)
                                netif_dbg(dev, link, dev->net,
                                          "rxqlen %d --> %d\n",
                                          temp, dev->rxq.qlen);
                        if (dev->rxq.qlen < RX_QLEN(dev))
                                tasklet_schedule (&dev->bh);
                }
                if (dev->txq.qlen < TX_QLEN (dev))
                        netif_wake_queue (dev->net);
        }
}

static void usbnet_bh_tasklet(struct tasklet_struct *t)
{
        struct usbnet *dev = from_tasklet(dev, t, bh);

        usbnet_bh(&dev->delay);
}


/*-------------------------------------------------------------------------
 *
 * USB Device Driver support
 *
 *-------------------------------------------------------------------------*/

// precondition: never called in_interrupt

void usbnet_disconnect (struct usb_interface *intf)
{
        struct usbnet                *dev;
        struct usb_device        *xdev;
        struct net_device        *net;
        struct urb                *urb;

        dev = usb_get_intfdata(intf);
        usb_set_intfdata(intf, NULL);
        if (!dev)
                return;

        xdev = interface_to_usbdev (intf);

        netif_info(dev, probe, dev->net, "unregister '%s' usb-%s-%s, %s\n",
                   intf->dev.driver->name,
                   xdev->bus->bus_name, xdev->devpath,
                   dev->driver_info->description);

        net = dev->net;
        unregister_netdev (net);

        while ((urb = usb_get_from_anchor(&dev->deferred))) {
                dev_kfree_skb(urb->context);
                kfree(urb->sg);
                usb_free_urb(urb);
        }

        if (dev->driver_info->unbind)
                dev->driver_info->unbind(dev, intf);

        usb_kill_urb(dev->interrupt);
        usb_free_urb(dev->interrupt);
        kfree(dev->padding_pkt);

        free_netdev(net);
}
EXPORT_SYMBOL_GPL(usbnet_disconnect);

static const struct net_device_ops usbnet_netdev_ops = {
        .ndo_open                = usbnet_open,
        .ndo_stop                = usbnet_stop,
        .ndo_start_xmit                = usbnet_start_xmit,
        .ndo_tx_timeout                = usbnet_tx_timeout,
        .ndo_set_rx_mode        = usbnet_set_rx_mode,
        .ndo_change_mtu                = usbnet_change_mtu,
        .ndo_set_mac_address         = eth_mac_addr,
        .ndo_validate_addr        = eth_validate_addr,
};

/*-------------------------------------------------------------------------*/

// precondition: never called in_interrupt

static const struct device_type wlan_type = {
        .name        = "wlan",
};

static const struct device_type wwan_type = {
        .name        = "wwan",
};

int
usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
{
        struct usbnet                        *dev;
        struct net_device                *net;
        struct usb_host_interface        *interface;
        const struct driver_info        *info;
        struct usb_device                *xdev;
        int                                status;
        const char                        *name;
        struct usb_driver         *driver = to_usb_driver(udev->dev.driver);

        /* usbnet already took usb runtime pm, so have to enable the feature
         * for usb interface, otherwise usb_autopm_get_interface may return
         * failure if RUNTIME_PM is enabled.
         */
        if (!driver->supports_autosuspend) {
                driver->supports_autosuspend = 1;
                pm_runtime_enable(&udev->dev);
        }

        name = udev->dev.driver->name;
        info = (const struct driver_info *) prod->driver_info;
        if (!info) {
                dev_dbg (&udev->dev, "blacklisted by %s\n", name);
                return -ENODEV;
        }
        xdev = interface_to_usbdev (udev);
        interface = udev->cur_altsetting;

        status = -ENOMEM;

        // set up our own records
        net = alloc_etherdev(sizeof(*dev));
        if (!net)
                goto out;

        /* netdev_printk() needs this so do it as early as possible */
        SET_NETDEV_DEV(net, &udev->dev);

        dev = netdev_priv(net);
        dev->udev = xdev;
        dev->intf = udev;
        dev->driver_info = info;
        dev->driver_name = name;
        dev->rx_speed = SPEED_UNSET;
        dev->tx_speed = SPEED_UNSET;

        dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV
                                | NETIF_MSG_PROBE | NETIF_MSG_LINK);
        init_waitqueue_head(&dev->wait);
        skb_queue_head_init (&dev->rxq);
        skb_queue_head_init (&dev->txq);
        skb_queue_head_init (&dev->done);
        skb_queue_head_init(&dev->rxq_pause);
        tasklet_setup(&dev->bh, usbnet_bh_tasklet);
        INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
        init_usb_anchor(&dev->deferred);
        timer_setup(&dev->delay, usbnet_bh, 0);
        mutex_init (&dev->phy_mutex);
        mutex_init(&dev->interrupt_mutex);
        dev->interrupt_count = 0;

        dev->net = net;
        strscpy(net->name, "usb%d", sizeof(net->name));
        eth_hw_addr_set(net, node_id);

        /* rx and tx sides can use different message sizes;
         * bind() should set rx_urb_size in that case.
         */
        dev->hard_mtu = net->mtu + net->hard_header_len;
        net->min_mtu = 0;
        net->max_mtu = ETH_MAX_MTU;

        net->netdev_ops = &usbnet_netdev_ops;
        net->watchdog_timeo = TX_TIMEOUT_JIFFIES;
        net->ethtool_ops = &usbnet_ethtool_ops;
        net->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;

        // allow device-specific bind/init procedures
        // NOTE net->name still not usable ...
        if (info->bind) {
                status = info->bind (dev, udev);
                if (status < 0)
                        goto out1;

                // heuristic:  "usb%d" for links we know are two-host,
                // else "eth%d" when there's reasonable doubt.  userspace
                // can rename the link if it knows better.
                if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
                    ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
                     (net->dev_addr [0] & 0x02) == 0))
                        strscpy(net->name, "eth%d", sizeof(net->name));
                /* WLAN devices should always be named "wlan%d" */
                if ((dev->driver_info->flags & FLAG_WLAN) != 0)
                        strscpy(net->name, "wlan%d", sizeof(net->name));
                /* WWAN devices should always be named "wwan%d" */
                if ((dev->driver_info->flags & FLAG_WWAN) != 0)
                        strscpy(net->name, "wwan%d", sizeof(net->name));

                /* devices that cannot do ARP */
                if ((dev->driver_info->flags & FLAG_NOARP) != 0)
                        net->flags |= IFF_NOARP;

                /* maybe the remote can't receive an Ethernet MTU */
                if (net->mtu > (dev->hard_mtu - net->hard_header_len))
                        net->mtu = dev->hard_mtu - net->hard_header_len;
        } else if (!info->in || !info->out)
                status = usbnet_get_endpoints (dev, udev);
        else {
                u8 ep_addrs[3] = {
                        info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0
                };

                dev->in = usb_rcvbulkpipe (xdev, info->in);
                dev->out = usb_sndbulkpipe (xdev, info->out);
                if (!(info->flags & FLAG_NO_SETINT))
                        status = usb_set_interface (xdev,
                                interface->desc.bInterfaceNumber,
                                interface->desc.bAlternateSetting);
                else
                        status = 0;

                if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs))
                        status = -EINVAL;
        }
        if (status >= 0 && dev->status)
                status = init_status (dev, udev);
        if (status < 0)
                goto out3;

        if (!dev->rx_urb_size)
                dev->rx_urb_size = dev->hard_mtu;
        dev->maxpacket = usb_maxpacket(dev->udev, dev->out);
        if (dev->maxpacket == 0) {
                /* that is a broken device */
                status = -ENODEV;
                goto out4;
        }

        /* let userspace know we have a random address */
        if (ether_addr_equal(net->dev_addr, node_id))
                net->addr_assign_type = NET_ADDR_RANDOM;

        if ((dev->driver_info->flags & FLAG_WLAN) != 0)
                SET_NETDEV_DEVTYPE(net, &wlan_type);
        if ((dev->driver_info->flags & FLAG_WWAN) != 0)
                SET_NETDEV_DEVTYPE(net, &wwan_type);

        /* initialize max rx_qlen and tx_qlen */
        usbnet_update_max_qlen(dev);

        if (dev->can_dma_sg && !(info->flags & FLAG_SEND_ZLP) &&
                !(info->flags & FLAG_MULTI_PACKET)) {
                dev->padding_pkt = kzalloc(1, GFP_KERNEL);
                if (!dev->padding_pkt) {
                        status = -ENOMEM;
                        goto out4;
                }
        }

        status = register_netdev (net);
        if (status)
                goto out5;
        netif_info(dev, probe, dev->net,
                   "register '%s' at usb-%s-%s, %s, %pM\n",
                   udev->dev.driver->name,
                   xdev->bus->bus_name, xdev->devpath,
                   dev->driver_info->description,
                   net->dev_addr);

        // ok, it's ready to go.
        usb_set_intfdata (udev, dev);

        netif_device_attach (net);

        if (dev->driver_info->flags & FLAG_LINK_INTR)
                usbnet_link_change(dev, 0, 0);

        return 0;

out5:
        kfree(dev->padding_pkt);
out4:
        usb_free_urb(dev->interrupt);
out3:
        if (info->unbind)
                info->unbind (dev, udev);
out1:
        /* subdrivers must undo all they did in bind() if they
         * fail it, but we may fail later and a deferred kevent
         * may trigger an error resubmitting itself and, worse,
         * schedule a timer. So we kill it all just in case.
         */
        cancel_work_sync(&dev->kevent);
        del_timer_sync(&dev->delay);
        free_netdev(net);
out:
        return status;
}
EXPORT_SYMBOL_GPL(usbnet_probe);

/*-------------------------------------------------------------------------*/

/*
 * suspend the whole driver as soon as the first interface is suspended
 * resume only when the last interface is resumed
 */

int usbnet_suspend (struct usb_interface *intf, pm_message_t message)
{
        struct usbnet                *dev = usb_get_intfdata(intf);

        if (!dev->suspend_count++) {
                spin_lock_irq(&dev->txq.lock);
                /* don't autosuspend while transmitting */
                if (dev->txq.qlen && PMSG_IS_AUTO(message)) {
                        dev->suspend_count--;
                        spin_unlock_irq(&dev->txq.lock);
                        return -EBUSY;
                } else {
                        set_bit(EVENT_DEV_ASLEEP, &dev->flags);
                        spin_unlock_irq(&dev->txq.lock);
                }
                /*
                 * accelerate emptying of the rx and queues, to avoid
                 * having everything error out.
                 */
                netif_device_detach (dev->net);
                usbnet_terminate_urbs(dev);
                __usbnet_status_stop_force(dev);

                /*
                 * reattach so runtime management can use and
                 * wake the device
                 */
                netif_device_attach (dev->net);
        }
        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_suspend);

int usbnet_resume (struct usb_interface *intf)
{
        struct usbnet                *dev = usb_get_intfdata(intf);
        struct sk_buff          *skb;
        struct urb              *res;
        int                     retval;

        if (!--dev->suspend_count) {
                /* resume interrupt URB if it was previously submitted */
                __usbnet_status_start_force(dev, GFP_NOIO);

                spin_lock_irq(&dev->txq.lock);
                while ((res = usb_get_from_anchor(&dev->deferred))) {

                        skb = (struct sk_buff *)res->context;
                        retval = usb_submit_urb(res, GFP_ATOMIC);
                        if (retval < 0) {
                                dev_kfree_skb_any(skb);
                                kfree(res->sg);
                                usb_free_urb(res);
                                usb_autopm_put_interface_async(dev->intf);
                        } else {
                                netif_trans_update(dev->net);
                                __skb_queue_tail(&dev->txq, skb);
                        }
                }

                smp_mb();
                clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
                spin_unlock_irq(&dev->txq.lock);

                if (test_bit(EVENT_DEV_OPEN, &dev->flags)) {
                        /* handle remote wakeup ASAP
                         * we cannot race against stop
                         */
                        if (netif_device_present(dev->net) &&
                                !timer_pending(&dev->delay) &&
                                !test_bit(EVENT_RX_HALT, &dev->flags))
                                        rx_alloc_submit(dev, GFP_NOIO);

                        if (!(dev->txq.qlen >= TX_QLEN(dev)))
                                netif_tx_wake_all_queues(dev->net);
                        tasklet_schedule (&dev->bh);
                }
        }

        if (test_and_clear_bit(EVENT_DEVICE_REPORT_IDLE, &dev->flags))
                usb_autopm_get_interface_no_resume(intf);

        return 0;
}
EXPORT_SYMBOL_GPL(usbnet_resume);

/*
 * Either a subdriver implements manage_power, then it is assumed to always
 * be ready to be suspended or it reports the readiness to be suspended
 * explicitly
 */
void usbnet_device_suggests_idle(struct usbnet *dev)
{
        if (!test_and_set_bit(EVENT_DEVICE_REPORT_IDLE, &dev->flags)) {
                dev->intf->needs_remote_wakeup = 1;
                usb_autopm_put_interface_async(dev->intf);
        }
}
EXPORT_SYMBOL(usbnet_device_suggests_idle);

/*
 * For devices that can do without special commands
 */
int usbnet_manage_power(struct usbnet *dev, int on)
{
        dev->intf->needs_remote_wakeup = on;
        return 0;
}
EXPORT_SYMBOL(usbnet_manage_power);

void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset)
{
        /* update link after link is reseted */
        if (link && !need_reset)
                netif_carrier_on(dev->net);
        else
                netif_carrier_off(dev->net);

        if (need_reset && link)
                usbnet_defer_kevent(dev, EVENT_LINK_RESET);
        else
                usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
}
EXPORT_SYMBOL(usbnet_link_change);

/*-------------------------------------------------------------------------*/
static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                             u16 value, u16 index, void *data, u16 size)
{
        void *buf = NULL;
        int err = -ENOMEM;

        netdev_dbg(dev->net, "usbnet_read_cmd cmd=0x%02x reqtype=%02x"
                   " value=0x%04x index=0x%04x size=%d\n",
                   cmd, reqtype, value, index, size);

        if (size) {
                buf = kmalloc(size, GFP_NOIO);
                if (!buf)
                        goto out;
        }

        err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
                              cmd, reqtype, value, index, buf, size,
                              USB_CTRL_GET_TIMEOUT);
        if (err > 0 && err <= size) {
                if (data)
                        memcpy(data, buf, err);
                else
                        netdev_dbg(dev->net,
                                   "Huh? Data requested but thrown away.\n");
        }
        kfree(buf);
out:
        return err;
}

static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                              u16 value, u16 index, const void *data,
                              u16 size)
{
        void *buf = NULL;
        int err = -ENOMEM;

        netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x"
                   " value=0x%04x index=0x%04x size=%d\n",
                   cmd, reqtype, value, index, size);

        if (data) {
                buf = kmemdup(data, size, GFP_NOIO);
                if (!buf)
                        goto out;
        } else {
        if (size) {
            WARN_ON_ONCE(1);
            err = -EINVAL;
            goto out;
        }
    }

        err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
                              cmd, reqtype, value, index, buf, size,
                              USB_CTRL_SET_TIMEOUT);
        kfree(buf);

out:
        return err;
}

/*
 * The function can't be called inside suspend/resume callback,
 * otherwise deadlock will be caused.
 */
int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                    u16 value, u16 index, void *data, u16 size)
{
        int ret;

        if (usb_autopm_get_interface(dev->intf) < 0)
                return -ENODEV;
        ret = __usbnet_read_cmd(dev, cmd, reqtype, value, index,
                                data, size);
        usb_autopm_put_interface(dev->intf);
        return ret;
}
EXPORT_SYMBOL_GPL(usbnet_read_cmd);

/*
 * The function can't be called inside suspend/resume callback,
 * otherwise deadlock will be caused.
 */
int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                     u16 value, u16 index, const void *data, u16 size)
{
        int ret;

        if (usb_autopm_get_interface(dev->intf) < 0)
                return -ENODEV;
        ret = __usbnet_write_cmd(dev, cmd, reqtype, value, index,
                                 data, size);
        usb_autopm_put_interface(dev->intf);
        return ret;
}
EXPORT_SYMBOL_GPL(usbnet_write_cmd);

/*
 * The function can be called inside suspend/resume callback safely
 * and should only be called by suspend/resume callback generally.
 */
int usbnet_read_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
                          u16 value, u16 index, void *data, u16 size)
{
        return __usbnet_read_cmd(dev, cmd, reqtype, value, index,
                                 data, size);
}
EXPORT_SYMBOL_GPL(usbnet_read_cmd_nopm);

/*
 * The function can be called inside suspend/resume callback safely
 * and should only be called by suspend/resume callback generally.
 */
int usbnet_write_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype,
                          u16 value, u16 index, const void *data,
                          u16 size)
{
        return __usbnet_write_cmd(dev, cmd, reqtype, value, index,
                                  data, size);
}
EXPORT_SYMBOL_GPL(usbnet_write_cmd_nopm);

static void usbnet_async_cmd_cb(struct urb *urb)
{
        struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context;
        int status = urb->status;

        if (status < 0)
                dev_dbg(&urb->dev->dev, "%s failed with %d",
                        __func__, status);

        kfree(req);
        usb_free_urb(urb);
}

/*
 * The caller must make sure that device can't be put into suspend
 * state until the control URB completes.
 */
int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
                           u16 value, u16 index, const void *data, u16 size)
{
        struct usb_ctrlrequest *req;
        struct urb *urb;
        int err = -ENOMEM;
        void *buf = NULL;

        netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x"
                   " value=0x%04x index=0x%04x size=%d\n",
                   cmd, reqtype, value, index, size);

        urb = usb_alloc_urb(0, GFP_ATOMIC);
        if (!urb)
                goto fail;

        if (data) {
                buf = kmemdup(data, size, GFP_ATOMIC);
                if (!buf) {
                        netdev_err(dev->net, "Error allocating buffer"
                                   " in %s!\n", __func__);
                        goto fail_free_urb;
                }
        }

        req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC);
        if (!req)
                goto fail_free_buf;

        req->bRequestType = reqtype;
        req->bRequest = cmd;
        req->wValue = cpu_to_le16(value);
        req->wIndex = cpu_to_le16(index);
        req->wLength = cpu_to_le16(size);

        usb_fill_control_urb(urb, dev->udev,
                             usb_sndctrlpipe(dev->udev, 0),
                             (void *)req, buf, size,
                             usbnet_async_cmd_cb, req);
        urb->transfer_flags |= URB_FREE_BUFFER;

        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err < 0) {
                netdev_err(dev->net, "Error submitting the control"
                           " message: status=%d\n", err);
                goto fail_free_all;
        }
        return 0;

fail_free_all:
        kfree(req);
fail_free_buf:
        kfree(buf);
        /*
         * avoid a double free
         * needed because the flag can be set only
         * after filling the URB
         */
        urb->transfer_flags = 0;
fail_free_urb:
        usb_free_urb(urb);
fail:
        return err;

}
EXPORT_SYMBOL_GPL(usbnet_write_cmd_async);
/*-------------------------------------------------------------------------*/

static int __init usbnet_init(void)
{
        /* Compiler should optimize this out. */
        BUILD_BUG_ON(
                sizeof_field(struct sk_buff, cb) < sizeof(struct skb_data));

        eth_random_addr(node_id);
        return 0;
}
module_init(usbnet_init);

static void __exit usbnet_exit(void)
{
}
module_exit(usbnet_exit);

MODULE_AUTHOR("David Brownell");
MODULE_DESCRIPTION("USB network driver framework");
MODULE_LICENSE("GPL");



















































































































































































































































































































































































    2 




































































































































































































































































































































































































































    2 




































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * i2c.h - definitions for the Linux i2c bus interface
 * Copyright (C) 1995-2000 Simon G. Vogl
 * Copyright (C) 2013-2019 Wolfram Sang <wsa@kernel.org>
 *
 * With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi> and
 * Frodo Looijaard <frodol@dds.nl>
 */
#ifndef _LINUX_I2C_H
#define _LINUX_I2C_H

#include <linux/acpi.h>                /* for acpi_handle */
#include <linux/bits.h>
#include <linux/mod_devicetable.h>
#include <linux/device.h>        /* for struct device */
#include <linux/sched.h>        /* for completion */
#include <linux/mutex.h>
#include <linux/regulator/consumer.h>
#include <linux/rtmutex.h>
#include <linux/irqdomain.h>                /* for Host Notify IRQ */
#include <linux/of.h>                /* for struct device_node */
#include <linux/swab.h>                /* for swab16 */
#include <uapi/linux/i2c.h>

extern const struct bus_type i2c_bus_type;
extern const struct device_type i2c_adapter_type;
extern const struct device_type i2c_client_type;

/* --- General options ------------------------------------------------        */

struct i2c_msg;
struct i2c_algorithm;
struct i2c_adapter;
struct i2c_client;
struct i2c_driver;
struct i2c_device_identity;
union i2c_smbus_data;
struct i2c_board_info;
enum i2c_slave_event;
typedef int (*i2c_slave_cb_t)(struct i2c_client *client,
                              enum i2c_slave_event event, u8 *val);

/* I2C Frequency Modes */
#define I2C_MAX_STANDARD_MODE_FREQ        100000
#define I2C_MAX_FAST_MODE_FREQ                400000
#define I2C_MAX_FAST_MODE_PLUS_FREQ        1000000
#define I2C_MAX_TURBO_MODE_FREQ                1400000
#define I2C_MAX_HIGH_SPEED_MODE_FREQ        3400000
#define I2C_MAX_ULTRA_FAST_MODE_FREQ        5000000

struct module;
struct property_entry;

#if IS_ENABLED(CONFIG_I2C)
/* Return the Frequency mode string based on the bus frequency */
const char *i2c_freq_mode_string(u32 bus_freq_hz);

/*
 * The master routines are the ones normally used to transmit data to devices
 * on a bus (or read from them). Apart from two basic transfer functions to
 * transmit one message at a time, a more complex version can be used to
 * transmit an arbitrary number of messages without interruption.
 * @count must be less than 64k since msg.len is u16.
 */
int i2c_transfer_buffer_flags(const struct i2c_client *client,
                              char *buf, int count, u16 flags);

/**
 * i2c_master_recv - issue a single I2C message in master receive mode
 * @client: Handle to slave device
 * @buf: Where to store data read from slave
 * @count: How many bytes to read, must be less than 64k since msg.len is u16
 *
 * Returns negative errno, or else the number of bytes read.
 */
static inline int i2c_master_recv(const struct i2c_client *client,
                                  char *buf, int count)
{
        return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD);
};

/**
 * i2c_master_recv_dmasafe - issue a single I2C message in master receive mode
 *                             using a DMA safe buffer
 * @client: Handle to slave device
 * @buf: Where to store data read from slave, must be safe to use with DMA
 * @count: How many bytes to read, must be less than 64k since msg.len is u16
 *
 * Returns negative errno, or else the number of bytes read.
 */
static inline int i2c_master_recv_dmasafe(const struct i2c_client *client,
                                          char *buf, int count)
{
        return i2c_transfer_buffer_flags(client, buf, count,
                                         I2C_M_RD | I2C_M_DMA_SAFE);
};

/**
 * i2c_master_send - issue a single I2C message in master transmit mode
 * @client: Handle to slave device
 * @buf: Data that will be written to the slave
 * @count: How many bytes to write, must be less than 64k since msg.len is u16
 *
 * Returns negative errno, or else the number of bytes written.
 */
static inline int i2c_master_send(const struct i2c_client *client,
                                  const char *buf, int count)
{
        return i2c_transfer_buffer_flags(client, (char *)buf, count, 0);
};

/**
 * i2c_master_send_dmasafe - issue a single I2C message in master transmit mode
 *                             using a DMA safe buffer
 * @client: Handle to slave device
 * @buf: Data that will be written to the slave, must be safe to use with DMA
 * @count: How many bytes to write, must be less than 64k since msg.len is u16
 *
 * Returns negative errno, or else the number of bytes written.
 */
static inline int i2c_master_send_dmasafe(const struct i2c_client *client,
                                          const char *buf, int count)
{
        return i2c_transfer_buffer_flags(client, (char *)buf, count,
                                         I2C_M_DMA_SAFE);
};

/* Transfer num messages.
 */
int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num);
/* Unlocked flavor */
int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num);

/* This is the very generalized SMBus access routine. You probably do not
   want to use this, though; one of the functions below may be much easier,
   and probably just as fast.
   Note that we use i2c_adapter here, because you do not need a specific
   smbus adapter to call this function. */
s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
                   unsigned short flags, char read_write, u8 command,
                   int protocol, union i2c_smbus_data *data);

/* Unlocked flavor */
s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
                     unsigned short flags, char read_write, u8 command,
                     int protocol, union i2c_smbus_data *data);

/* Now follow the 'nice' access routines. These also document the calling
   conventions of i2c_smbus_xfer. */

u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count);
s32 i2c_smbus_read_byte(const struct i2c_client *client);
s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value);
s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command);
s32 i2c_smbus_write_byte_data(const struct i2c_client *client,
                              u8 command, u8 value);
s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command);
s32 i2c_smbus_write_word_data(const struct i2c_client *client,
                              u8 command, u16 value);

static inline s32
i2c_smbus_read_word_swapped(const struct i2c_client *client, u8 command)
{
        s32 value = i2c_smbus_read_word_data(client, command);

        return (value < 0) ? value : swab16(value);
}

static inline s32
i2c_smbus_write_word_swapped(const struct i2c_client *client,
                             u8 command, u16 value)
{
        return i2c_smbus_write_word_data(client, command, swab16(value));
}

/* Returns the number of read bytes */
s32 i2c_smbus_read_block_data(const struct i2c_client *client,
                              u8 command, u8 *values);
s32 i2c_smbus_write_block_data(const struct i2c_client *client,
                               u8 command, u8 length, const u8 *values);
/* Returns the number of read bytes */
s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client,
                                  u8 command, u8 length, u8 *values);
s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client,
                                   u8 command, u8 length, const u8 *values);
s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
                                              u8 command, u8 length,
                                              u8 *values);
int i2c_get_device_id(const struct i2c_client *client,
                      struct i2c_device_identity *id);
const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client);
#endif /* I2C */

/**
 * struct i2c_device_identity - i2c client device identification
 * @manufacturer_id: 0 - 4095, database maintained by NXP
 * @part_id: 0 - 511, according to manufacturer
 * @die_revision: 0 - 7, according to manufacturer
 */
struct i2c_device_identity {
        u16 manufacturer_id;
#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS                0
#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_1              1
#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_2              2
#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_3              3
#define I2C_DEVICE_ID_RAMTRON_INTERNATIONAL             4
#define I2C_DEVICE_ID_ANALOG_DEVICES                    5
#define I2C_DEVICE_ID_STMICROELECTRONICS                6
#define I2C_DEVICE_ID_ON_SEMICONDUCTOR                  7
#define I2C_DEVICE_ID_SPRINTEK_CORPORATION              8
#define I2C_DEVICE_ID_ESPROS_PHOTONICS_AG               9
#define I2C_DEVICE_ID_FUJITSU_SEMICONDUCTOR            10
#define I2C_DEVICE_ID_FLIR                             11
#define I2C_DEVICE_ID_O2MICRO                          12
#define I2C_DEVICE_ID_ATMEL                            13
#define I2C_DEVICE_ID_NONE                         0xffff
        u16 part_id;
        u8 die_revision;
};

enum i2c_alert_protocol {
        I2C_PROTOCOL_SMBUS_ALERT,
        I2C_PROTOCOL_SMBUS_HOST_NOTIFY,
};

/**
 * enum i2c_driver_flags - Flags for an I2C device driver
 *
 * @I2C_DRV_ACPI_WAIVE_D0_PROBE: Don't put the device in D0 state for probe
 */
enum i2c_driver_flags {
        I2C_DRV_ACPI_WAIVE_D0_PROBE = BIT(0),
};

/**
 * struct i2c_driver - represent an I2C device driver
 * @class: What kind of i2c device we instantiate (for detect)
 * @probe: Callback for device binding
 * @remove: Callback for device unbinding
 * @shutdown: Callback for device shutdown
 * @alert: Alert callback, for example for the SMBus alert protocol
 * @command: Callback for bus-wide signaling (optional)
 * @driver: Device driver model driver
 * @id_table: List of I2C devices supported by this driver
 * @detect: Callback for device detection
 * @address_list: The I2C addresses to probe (for detect)
 * @clients: List of detected clients we created (for i2c-core use only)
 * @flags: A bitmask of flags defined in &enum i2c_driver_flags
 *
 * The driver.owner field should be set to the module owner of this driver.
 * The driver.name field should be set to the name of this driver.
 *
 * For automatic device detection, both @detect and @address_list must
 * be defined. @class should also be set, otherwise only devices forced
 * with module parameters will be created. The detect function must
 * fill at least the name field of the i2c_board_info structure it is
 * handed upon successful detection, and possibly also the flags field.
 *
 * If @detect is missing, the driver will still work fine for enumerated
 * devices. Detected devices simply won't be supported. This is expected
 * for the many I2C/SMBus devices which can't be detected reliably, and
 * the ones which can always be enumerated in practice.
 *
 * The i2c_client structure which is handed to the @detect callback is
 * not a real i2c_client. It is initialized just enough so that you can
 * call i2c_smbus_read_byte_data and friends on it. Don't do anything
 * else with it. In particular, calling dev_dbg and friends on it is
 * not allowed.
 */
struct i2c_driver {
        unsigned int class;

        /* Standard driver model interfaces */
        int (*probe)(struct i2c_client *client);
        void (*remove)(struct i2c_client *client);


        /* driver model interfaces that don't relate to enumeration  */
        void (*shutdown)(struct i2c_client *client);

        /* Alert callback, for example for the SMBus alert protocol.
         * The format and meaning of the data value depends on the protocol.
         * For the SMBus alert protocol, there is a single bit of data passed
         * as the alert response's low bit ("event flag").
         * For the SMBus Host Notify protocol, the data corresponds to the
         * 16-bit payload data reported by the slave device acting as master.
         */
        void (*alert)(struct i2c_client *client, enum i2c_alert_protocol protocol,
                      unsigned int data);

        /* a ioctl like command that can be used to perform specific functions
         * with the device.
         */
        int (*command)(struct i2c_client *client, unsigned int cmd, void *arg);

        struct device_driver driver;
        const struct i2c_device_id *id_table;

        /* Device detection callback for automatic device creation */
        int (*detect)(struct i2c_client *client, struct i2c_board_info *info);
        const unsigned short *address_list;
        struct list_head clients;

        u32 flags;
};
#define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)

/**
 * struct i2c_client - represent an I2C slave device
 * @flags: see I2C_CLIENT_* for possible flags
 * @addr: Address used on the I2C bus connected to the parent adapter.
 * @name: Indicates the type of the device, usually a chip name that's
 *        generic enough to hide second-sourcing and compatible revisions.
 * @adapter: manages the bus segment hosting this I2C device
 * @dev: Driver model device node for the slave.
 * @init_irq: IRQ that was set at initialization
 * @irq: indicates the IRQ generated by this device (if any)
 * @detected: member of an i2c_driver.clients list or i2c-core's
 *        userspace_devices list
 * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter
 *        calls it to pass on slave events to the slave driver.
 * @devres_group_id: id of the devres group that will be created for resources
 *        acquired when probing this device.
 *
 * An i2c_client identifies a single device (i.e. chip) connected to an
 * i2c bus. The behaviour exposed to Linux is defined by the driver
 * managing the device.
 */
struct i2c_client {
        unsigned short flags;                /* div., see below                */
#define I2C_CLIENT_PEC                0x04        /* Use Packet Error Checking */
#define I2C_CLIENT_TEN                0x10        /* we have a ten bit chip address */
                                        /* Must equal I2C_M_TEN below */
#define I2C_CLIENT_SLAVE        0x20        /* we are the slave */
#define I2C_CLIENT_HOST_NOTIFY        0x40        /* We want to use I2C host notify */
#define I2C_CLIENT_WAKE                0x80        /* for board_info; true iff can wake */
#define I2C_CLIENT_SCCB                0x9000        /* Use Omnivision SCCB protocol */
                                        /* Must match I2C_M_STOP|IGNORE_NAK */

        unsigned short addr;                /* chip address - NOTE: 7bit        */
                                        /* addresses are stored in the        */
                                        /* _LOWER_ 7 bits                */
        char name[I2C_NAME_SIZE];
        struct i2c_adapter *adapter;        /* the adapter we sit on        */
        struct device dev;                /* the device structure                */
        int init_irq;                        /* irq set at initialization        */
        int irq;                        /* irq issued by device                */
        struct list_head detected;
#if IS_ENABLED(CONFIG_I2C_SLAVE)
        i2c_slave_cb_t slave_cb;        /* callback for slave mode        */
#endif
        void *devres_group_id;                /* ID of probe devres group        */
};
#define to_i2c_client(d) container_of(d, struct i2c_client, dev)

struct i2c_adapter *i2c_verify_adapter(struct device *dev);
const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
                                         const struct i2c_client *client);

const void *i2c_get_match_data(const struct i2c_client *client);

static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj)
{
        struct device * const dev = kobj_to_dev(kobj);
        return to_i2c_client(dev);
}

static inline void *i2c_get_clientdata(const struct i2c_client *client)
{
        return dev_get_drvdata(&client->dev);
}

static inline void i2c_set_clientdata(struct i2c_client *client, void *data)
{
        dev_set_drvdata(&client->dev, data);
}

/* I2C slave support */

enum i2c_slave_event {
        I2C_SLAVE_READ_REQUESTED,
        I2C_SLAVE_WRITE_REQUESTED,
        I2C_SLAVE_READ_PROCESSED,
        I2C_SLAVE_WRITE_RECEIVED,
        I2C_SLAVE_STOP,
};

int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb);
int i2c_slave_unregister(struct i2c_client *client);
int i2c_slave_event(struct i2c_client *client,
                    enum i2c_slave_event event, u8 *val);
#if IS_ENABLED(CONFIG_I2C_SLAVE)
bool i2c_detect_slave_mode(struct device *dev);
#else
static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
#endif

/**
 * struct i2c_board_info - template for device creation
 * @type: chip type, to initialize i2c_client.name
 * @flags: to initialize i2c_client.flags
 * @addr: stored in i2c_client.addr
 * @dev_name: Overrides the default <busnr>-<addr> dev_name if set
 * @platform_data: stored in i2c_client.dev.platform_data
 * @of_node: pointer to OpenFirmware device node
 * @fwnode: device node supplied by the platform firmware
 * @swnode: software node for the device
 * @resources: resources associated with the device
 * @num_resources: number of resources in the @resources array
 * @irq: stored in i2c_client.irq
 *
 * I2C doesn't actually support hardware probing, although controllers and
 * devices may be able to use I2C_SMBUS_QUICK to tell whether or not there's
 * a device at a given address.  Drivers commonly need more information than
 * that, such as chip type, configuration, associated IRQ, and so on.
 *
 * i2c_board_info is used to build tables of information listing I2C devices
 * that are present.  This information is used to grow the driver model tree.
 * For mainboards this is done statically using i2c_register_board_info();
 * bus numbers identify adapters that aren't yet available.  For add-on boards,
 * i2c_new_client_device() does this dynamically with the adapter already known.
 */
struct i2c_board_info {
        char                type[I2C_NAME_SIZE];
        unsigned short        flags;
        unsigned short        addr;
        const char        *dev_name;
        void                *platform_data;
        struct device_node *of_node;
        struct fwnode_handle *fwnode;
        const struct software_node *swnode;
        const struct resource *resources;
        unsigned int        num_resources;
        int                irq;
};

/**
 * I2C_BOARD_INFO - macro used to list an i2c device and its address
 * @dev_type: identifies the device type
 * @dev_addr: the device's address on the bus.
 *
 * This macro initializes essential fields of a struct i2c_board_info,
 * declaring what has been provided on a particular board.  Optional
 * fields (such as associated irq, or device-specific platform_data)
 * are provided using conventional syntax.
 */
#define I2C_BOARD_INFO(dev_type, dev_addr) \
        .type = dev_type, .addr = (dev_addr)


#if IS_ENABLED(CONFIG_I2C)
/*
 * Add-on boards should register/unregister their devices; e.g. a board
 * with integrated I2C, a config eeprom, sensors, and a codec that's
 * used in conjunction with the primary hardware.
 */
struct i2c_client *
i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info);

/* If you don't know the exact address of an I2C device, use this variant
 * instead, which can probe for device presence in a list of possible
 * addresses. The "probe" callback function is optional. If it is provided,
 * it must return 1 on successful probe, 0 otherwise. If it is not provided,
 * a default probing method is used.
 */
struct i2c_client *
i2c_new_scanned_device(struct i2c_adapter *adap,
                       struct i2c_board_info *info,
                       unsigned short const *addr_list,
                       int (*probe)(struct i2c_adapter *adap, unsigned short addr));

/* Common custom probe functions */
int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr);

struct i2c_client *
i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address);

struct i2c_client *
devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address);

struct i2c_client *
i2c_new_ancillary_device(struct i2c_client *client,
                         const char *name,
                         u16 default_addr);

void i2c_unregister_device(struct i2c_client *client);

struct i2c_client *i2c_verify_client(struct device *dev);
#else
static inline struct i2c_client *i2c_verify_client(struct device *dev)
{
        return NULL;
}
#endif /* I2C */

/* Mainboard arch_initcall() code should register all its I2C devices.
 * This is done at arch_initcall time, before declaring any i2c adapters.
 * Modules for add-on boards must use other calls.
 */
#ifdef CONFIG_I2C_BOARDINFO
int
i2c_register_board_info(int busnum, struct i2c_board_info const *info,
                        unsigned n);
#else
static inline int
i2c_register_board_info(int busnum, struct i2c_board_info const *info,
                        unsigned n)
{
        return 0;
}
#endif /* I2C_BOARDINFO */

/**
 * struct i2c_algorithm - represent I2C transfer method
 * @master_xfer: Issue a set of i2c transactions to the given I2C adapter
 *   defined by the msgs array, with num messages available to transfer via
 *   the adapter specified by adap.
 * @master_xfer_atomic: same as @master_xfer. Yet, only using atomic context
 *   so e.g. PMICs can be accessed very late before shutdown. Optional.
 * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this
 *   is not present, then the bus layer will try and convert the SMBus calls
 *   into I2C transfers instead.
 * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context
 *   so e.g. PMICs can be accessed very late before shutdown. Optional.
 * @functionality: Return the flags that this algorithm/adapter pair supports
 *   from the ``I2C_FUNC_*`` flags.
 * @reg_slave: Register given client to I2C slave mode of this adapter
 * @unreg_slave: Unregister given client from I2C slave mode of this adapter
 *
 * The following structs are for those who like to implement new bus drivers:
 * i2c_algorithm is the interface to a class of hardware solutions which can
 * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584
 * to name two of the most common.
 *
 * The return codes from the ``master_xfer{_atomic}`` fields should indicate the
 * type of error code that occurred during the transfer, as documented in the
 * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the
 * number of messages executed should be returned.
 */
struct i2c_algorithm {
        /*
         * If an adapter algorithm can't do I2C-level access, set master_xfer
         * to NULL. If an adapter algorithm can do SMBus access, set
         * smbus_xfer. If set to NULL, the SMBus protocol is simulated
         * using common I2C messages.
         *
         * master_xfer should return the number of messages successfully
         * processed, or a negative value on error
         */
        int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs,
                           int num);
        int (*master_xfer_atomic)(struct i2c_adapter *adap,
                                   struct i2c_msg *msgs, int num);
        int (*smbus_xfer)(struct i2c_adapter *adap, u16 addr,
                          unsigned short flags, char read_write,
                          u8 command, int size, union i2c_smbus_data *data);
        int (*smbus_xfer_atomic)(struct i2c_adapter *adap, u16 addr,
                                 unsigned short flags, char read_write,
                                 u8 command, int size, union i2c_smbus_data *data);

        /* To determine what the adapter supports */
        u32 (*functionality)(struct i2c_adapter *adap);

#if IS_ENABLED(CONFIG_I2C_SLAVE)
        int (*reg_slave)(struct i2c_client *client);
        int (*unreg_slave)(struct i2c_client *client);
#endif
};

/**
 * struct i2c_lock_operations - represent I2C locking operations
 * @lock_bus: Get exclusive access to an I2C bus segment
 * @trylock_bus: Try to get exclusive access to an I2C bus segment
 * @unlock_bus: Release exclusive access to an I2C bus segment
 *
 * The main operations are wrapped by i2c_lock_bus and i2c_unlock_bus.
 */
struct i2c_lock_operations {
        void (*lock_bus)(struct i2c_adapter *adapter, unsigned int flags);
        int (*trylock_bus)(struct i2c_adapter *adapter, unsigned int flags);
        void (*unlock_bus)(struct i2c_adapter *adapter, unsigned int flags);
};

/**
 * struct i2c_timings - I2C timing information
 * @bus_freq_hz: the bus frequency in Hz
 * @scl_rise_ns: time SCL signal takes to rise in ns; t(r) in the I2C specification
 * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification
 * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns
 * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification
 * @sda_hold_ns: time IP core additionally needs to hold SDA in ns
 * @digital_filter_width_ns: width in ns of spikes on i2c lines that the IP core
 *        digital filter can filter out
 * @analog_filter_cutoff_freq_hz: threshold frequency for the low pass IP core
 *        analog filter
 */
struct i2c_timings {
        u32 bus_freq_hz;
        u32 scl_rise_ns;
        u32 scl_fall_ns;
        u32 scl_int_delay_ns;
        u32 sda_fall_ns;
        u32 sda_hold_ns;
        u32 digital_filter_width_ns;
        u32 analog_filter_cutoff_freq_hz;
};

/**
 * struct i2c_bus_recovery_info - I2C bus recovery information
 * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or
 *        i2c_generic_scl_recovery().
 * @get_scl: This gets current value of SCL line. Mandatory for generic SCL
 *      recovery. Populated internally for generic GPIO recovery.
 * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery.
 *      Populated internally for generic GPIO recovery.
 * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory
 *        for generic SCL recovery. Populated internally, if sda_gpio is a valid
 *        GPIO, for generic GPIO recovery.
 * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
 *        generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
 *        for generic GPIO recovery.
 * @get_bus_free: Returns the bus free state as seen from the IP core in case it
 *        has a more complex internal logic than just reading SDA. Optional.
 * @prepare_recovery: This will be called before starting recovery. Platform may
 *        configure padmux here for SDA/SCL line or something else they want.
 * @unprepare_recovery: This will be called after completing recovery. Platform
 *        may configure padmux here for SDA/SCL line or something else they want.
 * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery.
 * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery.
 * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins.
 *      Optional.
 * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned
 *      to the I2C bus. Optional. Populated internally for GPIO recovery, if
 *      state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid.
 * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as
 *      GPIOs. Optional. Populated internally for GPIO recovery, if this state
 *      is called "gpio" or "recovery" and pinctrl is valid.
 */
struct i2c_bus_recovery_info {
        int (*recover_bus)(struct i2c_adapter *adap);

        int (*get_scl)(struct i2c_adapter *adap);
        void (*set_scl)(struct i2c_adapter *adap, int val);
        int (*get_sda)(struct i2c_adapter *adap);
        void (*set_sda)(struct i2c_adapter *adap, int val);
        int (*get_bus_free)(struct i2c_adapter *adap);

        void (*prepare_recovery)(struct i2c_adapter *adap);
        void (*unprepare_recovery)(struct i2c_adapter *adap);

        /* gpio recovery */
        struct gpio_desc *scl_gpiod;
        struct gpio_desc *sda_gpiod;
        struct pinctrl *pinctrl;
        struct pinctrl_state *pins_default;
        struct pinctrl_state *pins_gpio;
};

int i2c_recover_bus(struct i2c_adapter *adap);

/* Generic recovery routines */
int i2c_generic_scl_recovery(struct i2c_adapter *adap);

/**
 * struct i2c_adapter_quirks - describe flaws of an i2c adapter
 * @flags: see I2C_AQ_* for possible flags and read below
 * @max_num_msgs: maximum number of messages per transfer
 * @max_write_len: maximum length of a write message
 * @max_read_len: maximum length of a read message
 * @max_comb_1st_msg_len: maximum length of the first msg in a combined message
 * @max_comb_2nd_msg_len: maximum length of the second msg in a combined message
 *
 * Note about combined messages: Some I2C controllers can only send one message
 * per transfer, plus something called combined message or write-then-read.
 * This is (usually) a small write message followed by a read message and
 * barely enough to access register based devices like EEPROMs. There is a flag
 * to support this mode. It implies max_num_msg = 2 and does the length checks
 * with max_comb_*_len because combined message mode usually has its own
 * limitations. Because of HW implementations, some controllers can actually do
 * write-then-anything or other variants. To support that, write-then-read has
 * been broken out into smaller bits like write-first and read-second which can
 * be combined as needed.
 */

struct i2c_adapter_quirks {
        u64 flags;
        int max_num_msgs;
        u16 max_write_len;
        u16 max_read_len;
        u16 max_comb_1st_msg_len;
        u16 max_comb_2nd_msg_len;
};

/* enforce max_num_msgs = 2 and use max_comb_*_len for length checks */
#define I2C_AQ_COMB                        BIT(0)
/* first combined message must be write */
#define I2C_AQ_COMB_WRITE_FIRST                BIT(1)
/* second combined message must be read */
#define I2C_AQ_COMB_READ_SECOND                BIT(2)
/* both combined messages must have the same target address */
#define I2C_AQ_COMB_SAME_ADDR                BIT(3)
/* convenience macro for typical write-then read case */
#define I2C_AQ_COMB_WRITE_THEN_READ        (I2C_AQ_COMB | I2C_AQ_COMB_WRITE_FIRST | \
                                         I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR)
/* clock stretching is not supported */
#define I2C_AQ_NO_CLK_STRETCH                BIT(4)
/* message cannot have length of 0 */
#define I2C_AQ_NO_ZERO_LEN_READ                BIT(5)
#define I2C_AQ_NO_ZERO_LEN_WRITE        BIT(6)
#define I2C_AQ_NO_ZERO_LEN                (I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE)
/* adapter cannot do repeated START */
#define I2C_AQ_NO_REP_START                BIT(7)

/*
 * i2c_adapter is the structure used to identify a physical i2c bus along
 * with the access algorithms necessary to access it.
 */
struct i2c_adapter {
        struct module *owner;
        unsigned int class;                  /* classes to allow probing for */
        const struct i2c_algorithm *algo; /* the algorithm to access the bus */
        void *algo_data;

        /* data fields that are valid for all devices        */
        const struct i2c_lock_operations *lock_ops;
        struct rt_mutex bus_lock;
        struct rt_mutex mux_lock;

        int timeout;                        /* in jiffies */
        int retries;
        struct device dev;                /* the adapter device */
        unsigned long locked_flags;        /* owned by the I2C core */
#define I2C_ALF_IS_SUSPENDED                0
#define I2C_ALF_SUSPEND_REPORTED        1

        int nr;
        char name[48];
        struct completion dev_released;

        struct mutex userspace_clients_lock;
        struct list_head userspace_clients;

        struct i2c_bus_recovery_info *bus_recovery_info;
        const struct i2c_adapter_quirks *quirks;

        struct irq_domain *host_notify_domain;
        struct regulator *bus_regulator;

        struct dentry *debugfs;
};
#define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)

static inline void *i2c_get_adapdata(const struct i2c_adapter *adap)
{
        return dev_get_drvdata(&adap->dev);
}

static inline void i2c_set_adapdata(struct i2c_adapter *adap, void *data)
{
        dev_set_drvdata(&adap->dev, data);
}

static inline struct i2c_adapter *
i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter)
{
#if IS_ENABLED(CONFIG_I2C_MUX)
        struct device *parent = adapter->dev.parent;

        if (parent != NULL && parent->type == &i2c_adapter_type)
                return to_i2c_adapter(parent);
        else
#endif
                return NULL;
}

int i2c_for_each_dev(void *data, int (*fn)(struct device *dev, void *data));

/* Adapter locking functions, exported for shared pin cases */
#define I2C_LOCK_ROOT_ADAPTER BIT(0)
#define I2C_LOCK_SEGMENT      BIT(1)

/**
 * i2c_lock_bus - Get exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
 *        locks only this branch in the adapter tree
 */
static inline void
i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags)
{
        adapter->lock_ops->lock_bus(adapter, flags);
}

/**
 * i2c_trylock_bus - Try to get exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER tries to locks the root i2c adapter,
 *        I2C_LOCK_SEGMENT tries to lock only this branch in the adapter tree
 *
 * Return: true if the I2C bus segment is locked, false otherwise
 */
static inline int
i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags)
{
        return adapter->lock_ops->trylock_bus(adapter, flags);
}

/**
 * i2c_unlock_bus - Release exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
 *        unlocks only this branch in the adapter tree
 */
static inline void
i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
{
        adapter->lock_ops->unlock_bus(adapter, flags);
}

/**
 * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core
 * @adap: Adapter to mark as suspended
 *
 * When using this helper to mark an adapter as suspended, the core will reject
 * further transfers to this adapter. The usage of this helper is optional but
 * recommended for devices having distinct handlers for system suspend and
 * runtime suspend. More complex devices are free to implement custom solutions
 * to reject transfers when suspended.
 */
static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap)
{
        i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
        set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags);
        i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
}

/**
 * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core
 * @adap: Adapter to mark as resumed
 *
 * When using this helper to mark an adapter as resumed, the core will allow
 * further transfers to this adapter. See also further notes to
 * @i2c_mark_adapter_suspended().
 */
static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap)
{
        i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
        clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags);
        i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
}

/* i2c adapter classes (bitmask) */
#define I2C_CLASS_HWMON                (1<<0)        /* lm_sensors, ... */
#define I2C_CLASS_SPD                (1<<7)        /* Memory modules */
/* Warn users that the adapter doesn't support classes anymore */
#define I2C_CLASS_DEPRECATED        (1<<8)

/* Internal numbers to terminate lists */
#define I2C_CLIENT_END                0xfffeU

/* Construct an I2C_CLIENT_END-terminated array of i2c addresses */
#define I2C_ADDRS(addr, addrs...) \
        ((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END })


/* ----- functions exported by i2c.o */

/* administration...
 */
#if IS_ENABLED(CONFIG_I2C)
int i2c_add_adapter(struct i2c_adapter *adap);
int devm_i2c_add_adapter(struct device *dev, struct i2c_adapter *adapter);
void i2c_del_adapter(struct i2c_adapter *adap);
int i2c_add_numbered_adapter(struct i2c_adapter *adap);

int i2c_register_driver(struct module *owner, struct i2c_driver *driver);
void i2c_del_driver(struct i2c_driver *driver);

/* use a define to avoid include chaining to get THIS_MODULE */
#define i2c_add_driver(driver) \
        i2c_register_driver(THIS_MODULE, driver)

static inline bool i2c_client_has_driver(struct i2c_client *client)
{
        return !IS_ERR_OR_NULL(client) && client->dev.driver;
}

/* call the i2c_client->command() of all attached clients with
 * the given arguments */
void i2c_clients_command(struct i2c_adapter *adap,
                         unsigned int cmd, void *arg);

struct i2c_adapter *i2c_get_adapter(int nr);
void i2c_put_adapter(struct i2c_adapter *adap);
unsigned int i2c_adapter_depth(struct i2c_adapter *adapter);

void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults);

/* Return the functionality mask */
static inline u32 i2c_get_functionality(struct i2c_adapter *adap)
{
        return adap->algo->functionality(adap);
}

/* Return 1 if adapter supports everything we need, 0 if not. */
static inline int i2c_check_functionality(struct i2c_adapter *adap, u32 func)
{
        return (func & i2c_get_functionality(adap)) == func;
}

/**
 * i2c_check_quirks() - Function for checking the quirk flags in an i2c adapter
 * @adap: i2c adapter
 * @quirks: quirk flags
 *
 * Return: true if the adapter has all the specified quirk flags, false if not
 */
static inline bool i2c_check_quirks(struct i2c_adapter *adap, u64 quirks)
{
        if (!adap->quirks)
                return false;
        return (adap->quirks->flags & quirks) == quirks;
}

/* Return the adapter number for a specific adapter */
static inline int i2c_adapter_id(struct i2c_adapter *adap)
{
        return adap->nr;
}

static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
{
        return (msg->addr << 1) | (msg->flags & I2C_M_RD);
}

u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred);

int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
/**
 * module_i2c_driver() - Helper macro for registering a modular I2C driver
 * @__i2c_driver: i2c_driver struct
 *
 * Helper macro for I2C drivers which do not do anything special in module
 * init/exit. This eliminates a lot of boilerplate. Each module may only
 * use this macro once, and calling it replaces module_init() and module_exit()
 */
#define module_i2c_driver(__i2c_driver) \
        module_driver(__i2c_driver, i2c_add_driver, \
                        i2c_del_driver)

/**
 * builtin_i2c_driver() - Helper macro for registering a builtin I2C driver
 * @__i2c_driver: i2c_driver struct
 *
 * Helper macro for I2C drivers which do not do anything special in their
 * init. This eliminates a lot of boilerplate. Each driver may only
 * use this macro once, and calling it replaces device_initcall().
 */
#define builtin_i2c_driver(__i2c_driver) \
        builtin_driver(__i2c_driver, i2c_add_driver)

#endif /* I2C */

/* must call put_device() when done with returned i2c_client device */
struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode);

/* must call put_device() when done with returned i2c_adapter device */
struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode);

/* must call i2c_put_adapter() when done with returned i2c_adapter device */
struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode);

#if IS_ENABLED(CONFIG_OF)
/* must call put_device() when done with returned i2c_client device */
static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
{
        return i2c_find_device_by_fwnode(of_fwnode_handle(node));
}

/* must call put_device() when done with returned i2c_adapter device */
static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
{
        return i2c_find_adapter_by_fwnode(of_fwnode_handle(node));
}

/* must call i2c_put_adapter() when done with returned i2c_adapter device */
static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
{
        return i2c_get_adapter_by_fwnode(of_fwnode_handle(node));
}

const struct of_device_id
*i2c_of_match_device(const struct of_device_id *matches,
                     struct i2c_client *client);

int of_i2c_get_board_info(struct device *dev, struct device_node *node,
                          struct i2c_board_info *info);

#else

static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
{
        return NULL;
}

static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
{
        return NULL;
}

static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
{
        return NULL;
}

static inline const struct of_device_id
*i2c_of_match_device(const struct of_device_id *matches,
                     struct i2c_client *client)
{
        return NULL;
}

static inline int of_i2c_get_board_info(struct device *dev,
                                        struct device_node *node,
                                        struct i2c_board_info *info)
{
        return -ENOTSUPP;
}

#endif /* CONFIG_OF */

struct acpi_resource;
struct acpi_resource_i2c_serialbus;

#if IS_ENABLED(CONFIG_ACPI)
bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
                               struct acpi_resource_i2c_serialbus **i2c);
int i2c_acpi_client_count(struct acpi_device *adev);
u32 i2c_acpi_find_bus_speed(struct device *dev);
struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode,
                                                 int index,
                                                 struct i2c_board_info *info);
struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle);
bool i2c_acpi_waive_d0_probe(struct device *dev);
#else
static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
                                             struct acpi_resource_i2c_serialbus **i2c)
{
        return false;
}
static inline int i2c_acpi_client_count(struct acpi_device *adev)
{
        return 0;
}
static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
{
        return 0;
}
static inline struct i2c_client *i2c_acpi_new_device_by_fwnode(
                                        struct fwnode_handle *fwnode, int index,
                                        struct i2c_board_info *info)
{
        return ERR_PTR(-ENODEV);
}
static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
{
        return NULL;
}
static inline bool i2c_acpi_waive_d0_probe(struct device *dev)
{
        return false;
}
#endif /* CONFIG_ACPI */

static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
                                                     int index,
                                                     struct i2c_board_info *info)
{
        return i2c_acpi_new_device_by_fwnode(dev_fwnode(dev), index, info);
}

#endif /* _LINUX_I2C_H */












































































































































































































































































































































































































































































































































































    1 


    1 
























    1 




























































    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
/*
 * Created: Sun Dec 21 13:08:50 2008 by bgamari@gmail.com
 *
 * Copyright 2008 Ben Gamari <bgamari@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/uaccess.h>

#include <drm/drm_atomic.h>
#include <drm/drm_auth.h>
#include <drm/drm_bridge.h>
#include <drm/drm_client.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
#include <drm/drm_file.h>
#include <drm/drm_gem.h>
#include <drm/drm_managed.h>
#include <drm/drm_gpuvm.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"

/***************************************************
 * Initialization, etc.
 **************************************************/

static int drm_name_info(struct seq_file *m, void *data)
{
        struct drm_debugfs_entry *entry = m->private;
        struct drm_device *dev = entry->dev;
        struct drm_master *master;

        mutex_lock(&dev->master_mutex);
        master = dev->master;
        seq_printf(m, "%s", dev->driver->name);
        if (dev->dev)
                seq_printf(m, " dev=%s", dev_name(dev->dev));
        if (master && master->unique)
                seq_printf(m, " master=%s", master->unique);
        if (dev->unique)
                seq_printf(m, " unique=%s", dev->unique);
        seq_printf(m, "\n");
        mutex_unlock(&dev->master_mutex);

        return 0;
}

static int drm_clients_info(struct seq_file *m, void *data)
{
        struct drm_debugfs_entry *entry = m->private;
        struct drm_device *dev = entry->dev;
        struct drm_file *priv;
        kuid_t uid;

        seq_printf(m,
                   "%20s %5s %3s master a %5s %10s\n",
                   "command",
                   "tgid",
                   "dev",
                   "uid",
                   "magic");

        /* dev->filelist is sorted youngest first, but we want to present
         * oldest first (i.e. kernel, servers, clients), so walk backwardss.
         */
        mutex_lock(&dev->filelist_mutex);
        list_for_each_entry_reverse(priv, &dev->filelist, lhead) {
                bool is_current_master = drm_is_current_master(priv);
                struct task_struct *task;
                struct pid *pid;

                rcu_read_lock(); /* Locks priv->pid and pid_task()->comm! */
                pid = rcu_dereference(priv->pid);
                task = pid_task(pid, PIDTYPE_TGID);
                uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID;
                seq_printf(m, "%20s %5d %3d   %c    %c %5d %10u\n",
                           task ? task->comm : "<unknown>",
                           pid_vnr(pid),
                           priv->minor->index,
                           is_current_master ? 'y' : 'n',
                           priv->authenticated ? 'y' : 'n',
                           from_kuid_munged(seq_user_ns(m), uid),
                           priv->magic);
                rcu_read_unlock();
        }
        mutex_unlock(&dev->filelist_mutex);
        return 0;
}

static int drm_gem_one_name_info(int id, void *ptr, void *data)
{
        struct drm_gem_object *obj = ptr;
        struct seq_file *m = data;

        seq_printf(m, "%6d %8zd %7d %8d\n",
                   obj->name, obj->size,
                   obj->handle_count,
                   kref_read(&obj->refcount));
        return 0;
}

static int drm_gem_name_info(struct seq_file *m, void *data)
{
        struct drm_debugfs_entry *entry = m->private;
        struct drm_device *dev = entry->dev;

        seq_printf(m, "  name     size handles refcount\n");

        mutex_lock(&dev->object_name_lock);
        idr_for_each(&dev->object_name_idr, drm_gem_one_name_info, m);
        mutex_unlock(&dev->object_name_lock);

        return 0;
}

static const struct drm_debugfs_info drm_debugfs_list[] = {
        {"name", drm_name_info, 0},
        {"clients", drm_clients_info, 0},
        {"gem_names", drm_gem_name_info, DRIVER_GEM},
};
#define DRM_DEBUGFS_ENTRIES ARRAY_SIZE(drm_debugfs_list)


static int drm_debugfs_open(struct inode *inode, struct file *file)
{
        struct drm_info_node *node = inode->i_private;

        if (!device_is_registered(node->minor->kdev))
                return -ENODEV;

        return single_open(file, node->info_ent->show, node);
}

static int drm_debugfs_entry_open(struct inode *inode, struct file *file)
{
        struct drm_debugfs_entry *entry = inode->i_private;
        struct drm_debugfs_info *node = &entry->file;
        struct drm_minor *minor = entry->dev->primary ?: entry->dev->accel;

        if (!device_is_registered(minor->kdev))
                return -ENODEV;

        return single_open(file, node->show, entry);
}

static const struct file_operations drm_debugfs_entry_fops = {
        .owner = THIS_MODULE,
        .open = drm_debugfs_entry_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
};

static const struct file_operations drm_debugfs_fops = {
        .owner = THIS_MODULE,
        .open = drm_debugfs_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
};

/**
 * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
 * @m: pointer to the &seq_file to write
 * @gpuvm: the &drm_gpuvm representing the GPU VA space
 *
 * Dumps the GPU VA mappings of a given DRM GPU VA manager.
 *
 * For each DRM GPU VA space drivers should call this function from their
 * &drm_info_list's show callback.
 *
 * Returns: 0 on success, -ENODEV if the &gpuvm is not initialized
 */
int drm_debugfs_gpuva_info(struct seq_file *m,
                           struct drm_gpuvm *gpuvm)
{
        struct drm_gpuva *va, *kva = &gpuvm->kernel_alloc_node;

        if (!gpuvm->name)
                return -ENODEV;

        seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
                   gpuvm->name, gpuvm->mm_start, gpuvm->mm_start + gpuvm->mm_range);
        seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
                   kva->va.addr, kva->va.addr + kva->va.range);
        seq_puts(m, "\n");
        seq_puts(m, " VAs | start              | range              | end                | object             | object offset\n");
        seq_puts(m, "-------------------------------------------------------------------------------------------------------------\n");
        drm_gpuvm_for_each_va(va, gpuvm) {
                if (unlikely(va == kva))
                        continue;

                seq_printf(m, "     | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx\n",
                           va->va.addr, va->va.range, va->va.addr + va->va.range,
                           (u64)(uintptr_t)va->gem.obj, va->gem.offset);
        }

        return 0;
}
EXPORT_SYMBOL(drm_debugfs_gpuva_info);

/**
 * drm_debugfs_create_files - Initialize a given set of debugfs files for DRM
 *                         minor
 * @files: The array of files to create
 * @count: The number of files given
 * @root: DRI debugfs dir entry.
 * @minor: device minor number
 *
 * Create a given set of debugfs files represented by an array of
 * &struct drm_info_list in the given root directory. These files will be removed
 * automatically on drm_debugfs_dev_fini().
 */
void drm_debugfs_create_files(const struct drm_info_list *files, int count,
                              struct dentry *root, struct drm_minor *minor)
{
        struct drm_device *dev = minor->dev;
        struct drm_info_node *tmp;
        int i;

        for (i = 0; i < count; i++) {
                u32 features = files[i].driver_features;

                if (features && !drm_core_check_all_features(dev, features))
                        continue;

                tmp = drmm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL);
                if (tmp == NULL)
                        continue;

                tmp->minor = minor;
                tmp->dent = debugfs_create_file(files[i].name,
                                                0444, root, tmp,
                                                &drm_debugfs_fops);
                tmp->info_ent = &files[i];
        }
}
EXPORT_SYMBOL(drm_debugfs_create_files);

int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
                             struct dentry *root, struct drm_minor *minor)
{
        int i;

        for (i = 0; i < count; i++) {
                struct dentry *dent = debugfs_lookup(files[i].name, root);

                if (!dent)
                        continue;

                drmm_kfree(minor->dev, d_inode(dent)->i_private);
                debugfs_remove(dent);
        }
        return 0;
}
EXPORT_SYMBOL(drm_debugfs_remove_files);

/**
 * drm_debugfs_dev_init - create debugfs directory for the device
 * @dev: the device which we want to create the directory for
 * @root: the parent directory depending on the device type
 *
 * Creates the debugfs directory for the device under the given root directory.
 */
void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root)
{
        dev->debugfs_root = debugfs_create_dir(dev->unique, root);
}

/**
 * drm_debugfs_dev_fini - cleanup debugfs directory
 * @dev: the device to cleanup the debugfs stuff
 *
 * Remove the debugfs directory, might be called multiple times.
 */
void drm_debugfs_dev_fini(struct drm_device *dev)
{
        debugfs_remove_recursive(dev->debugfs_root);
        dev->debugfs_root = NULL;
}

void drm_debugfs_dev_register(struct drm_device *dev)
{
        drm_debugfs_add_files(dev, drm_debugfs_list, DRM_DEBUGFS_ENTRIES);

        if (drm_core_check_feature(dev, DRIVER_MODESET)) {
                drm_framebuffer_debugfs_init(dev);
                drm_client_debugfs_init(dev);
        }
        if (drm_drv_uses_atomic_modeset(dev))
                drm_atomic_debugfs_init(dev);
}

int drm_debugfs_register(struct drm_minor *minor, int minor_id,
                         struct dentry *root)
{
        struct drm_device *dev = minor->dev;
        char name[64];

        sprintf(name, "%d", minor_id);
        minor->debugfs_symlink = debugfs_create_symlink(name, root,
                                                        dev->unique);

        /* TODO: Only for compatibility with drivers */
        minor->debugfs_root = dev->debugfs_root;

        if (dev->driver->debugfs_init && dev->render != minor)
                dev->driver->debugfs_init(minor);

        return 0;
}

void drm_debugfs_unregister(struct drm_minor *minor)
{
        debugfs_remove(minor->debugfs_symlink);
        minor->debugfs_symlink = NULL;
}

/**
 * drm_debugfs_add_file - Add a given file to the DRM device debugfs file list
 * @dev: drm device for the ioctl
 * @name: debugfs file name
 * @show: show callback
 * @data: driver-private data, should not be device-specific
 *
 * Add a given file entry to the DRM device debugfs file list to be created on
 * drm_debugfs_init.
 */
void drm_debugfs_add_file(struct drm_device *dev, const char *name,
                          int (*show)(struct seq_file*, void*), void *data)
{
        struct drm_debugfs_entry *entry = drmm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);

        if (!entry)
                return;

        entry->file.name = name;
        entry->file.show = show;
        entry->file.data = data;
        entry->dev = dev;

        debugfs_create_file(name, 0444, dev->debugfs_root, entry,
                            &drm_debugfs_entry_fops);
}
EXPORT_SYMBOL(drm_debugfs_add_file);

/**
 * drm_debugfs_add_files - Add an array of files to the DRM device debugfs file list
 * @dev: drm device for the ioctl
 * @files: The array of files to create
 * @count: The number of files given
 *
 * Add a given set of debugfs files represented by an array of
 * &struct drm_debugfs_info in the DRM device debugfs file list.
 */
void drm_debugfs_add_files(struct drm_device *dev, const struct drm_debugfs_info *files, int count)
{
        int i;

        for (i = 0; i < count; i++)
                drm_debugfs_add_file(dev, files[i].name, files[i].show, files[i].data);
}
EXPORT_SYMBOL(drm_debugfs_add_files);

static int connector_show(struct seq_file *m, void *data)
{
        struct drm_connector *connector = m->private;

        seq_printf(m, "%s\n", drm_get_connector_force_name(connector->force));

        return 0;
}

static int connector_open(struct inode *inode, struct file *file)
{
        struct drm_connector *dev = inode->i_private;

        return single_open(file, connector_show, dev);
}

static ssize_t connector_write(struct file *file, const char __user *ubuf,
                               size_t len, loff_t *offp)
{
        struct seq_file *m = file->private_data;
        struct drm_connector *connector = m->private;
        char buf[12];

        if (len > sizeof(buf) - 1)
                return -EINVAL;

        if (copy_from_user(buf, ubuf, len))
                return -EFAULT;

        buf[len] = '\0';

        if (sysfs_streq(buf, "on"))
                connector->force = DRM_FORCE_ON;
        else if (sysfs_streq(buf, "digital"))
                connector->force = DRM_FORCE_ON_DIGITAL;
        else if (sysfs_streq(buf, "off"))
                connector->force = DRM_FORCE_OFF;
        else if (sysfs_streq(buf, "unspecified"))
                connector->force = DRM_FORCE_UNSPECIFIED;
        else
                return -EINVAL;

        return len;
}

static int edid_show(struct seq_file *m, void *data)
{
        return drm_edid_override_show(m->private, m);
}

static int edid_open(struct inode *inode, struct file *file)
{
        struct drm_connector *dev = inode->i_private;

        return single_open(file, edid_show, dev);
}

static ssize_t edid_write(struct file *file, const char __user *ubuf,
                          size_t len, loff_t *offp)
{
        struct seq_file *m = file->private_data;
        struct drm_connector *connector = m->private;
        char *buf;
        int ret;

        buf = memdup_user(ubuf, len);
        if (IS_ERR(buf))
                return PTR_ERR(buf);

        if (len == 5 && !strncmp(buf, "reset", 5))
                ret = drm_edid_override_reset(connector);
        else
                ret = drm_edid_override_set(connector, buf, len);

        kfree(buf);

        return ret ? ret : len;
}

/*
 * Returns the min and max vrr vfreq through the connector's debugfs file.
 * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range
 */
static int vrr_range_show(struct seq_file *m, void *data)
{
        struct drm_connector *connector = m->private;

        if (connector->status != connector_status_connected)
                return -ENODEV;

        seq_printf(m, "Min: %u\n", connector->display_info.monitor_range.min_vfreq);
        seq_printf(m, "Max: %u\n", connector->display_info.monitor_range.max_vfreq);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(vrr_range);

/*
 * Returns Connector's max supported bpc through debugfs file.
 * Example usage: cat /sys/kernel/debug/dri/0/DP-1/output_bpc
 */
static int output_bpc_show(struct seq_file *m, void *data)
{
        struct drm_connector *connector = m->private;

        if (connector->status != connector_status_connected)
                return -ENODEV;

        seq_printf(m, "Maximum: %u\n", connector->display_info.bpc);

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(output_bpc);

static const struct file_operations drm_edid_fops = {
        .owner = THIS_MODULE,
        .open = edid_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
        .write = edid_write
};


static const struct file_operations drm_connector_fops = {
        .owner = THIS_MODULE,
        .open = connector_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
        .write = connector_write
};

void drm_debugfs_connector_add(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct dentry *root;

        if (!dev->debugfs_root)
                return;

        root = debugfs_create_dir(connector->name, dev->debugfs_root);
        connector->debugfs_entry = root;

        /* force */
        debugfs_create_file("force", 0644, root, connector,
                            &drm_connector_fops);

        /* edid */
        debugfs_create_file("edid_override", 0644, root, connector,
                            &drm_edid_fops);

        /* vrr range */
        debugfs_create_file("vrr_range", 0444, root, connector,
                            &vrr_range_fops);

        /* max bpc */
        debugfs_create_file("output_bpc", 0444, root, connector,
                            &output_bpc_fops);

        if (connector->funcs->debugfs_init)
                connector->funcs->debugfs_init(connector, root);
}

void drm_debugfs_connector_remove(struct drm_connector *connector)
{
        if (!connector->debugfs_entry)
                return;

        debugfs_remove_recursive(connector->debugfs_entry);

        connector->debugfs_entry = NULL;
}

void drm_debugfs_crtc_add(struct drm_crtc *crtc)
{
        struct drm_device *dev = crtc->dev;
        struct dentry *root;
        char *name;

        name = kasprintf(GFP_KERNEL, "crtc-%d", crtc->index);
        if (!name)
                return;

        root = debugfs_create_dir(name, dev->debugfs_root);
        kfree(name);

        crtc->debugfs_entry = root;

        drm_debugfs_crtc_crc_add(crtc);
}

void drm_debugfs_crtc_remove(struct drm_crtc *crtc)
{
        debugfs_remove_recursive(crtc->debugfs_entry);
        crtc->debugfs_entry = NULL;
}

static int bridges_show(struct seq_file *m, void *data)
{
        struct drm_encoder *encoder = m->private;
        struct drm_printer p = drm_seq_file_printer(m);
        struct drm_bridge *bridge;
        unsigned int idx = 0;

        drm_for_each_bridge_in_chain(encoder, bridge) {
                drm_printf(&p, "bridge[%d]: %ps\n", idx++, bridge->funcs);
                drm_printf(&p, "\ttype: [%d] %s\n",
                           bridge->type,
                           drm_get_connector_type_name(bridge->type));
#ifdef CONFIG_OF
                if (bridge->of_node)
                        drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node);
#endif
                drm_printf(&p, "\tops: [0x%x]", bridge->ops);
                if (bridge->ops & DRM_BRIDGE_OP_DETECT)
                        drm_puts(&p, " detect");
                if (bridge->ops & DRM_BRIDGE_OP_EDID)
                        drm_puts(&p, " edid");
                if (bridge->ops & DRM_BRIDGE_OP_HPD)
                        drm_puts(&p, " hpd");
                if (bridge->ops & DRM_BRIDGE_OP_MODES)
                        drm_puts(&p, " modes");
                drm_puts(&p, "\n");
        }

        return 0;
}
DEFINE_SHOW_ATTRIBUTE(bridges);

void drm_debugfs_encoder_add(struct drm_encoder *encoder)
{
        struct drm_minor *minor = encoder->dev->primary;
        struct dentry *root;
        char *name;

        name = kasprintf(GFP_KERNEL, "encoder-%d", encoder->index);
        if (!name)
                return;

        root = debugfs_create_dir(name, minor->debugfs_root);
        kfree(name);

        encoder->debugfs_entry = root;

        /* bridges list */
        debugfs_create_file("bridges", 0444, root, encoder,
                            &bridges_fops);

        if (encoder->funcs && encoder->funcs->debugfs_init)
                encoder->funcs->debugfs_init(encoder, root);
}

void drm_debugfs_encoder_remove(struct drm_encoder *encoder)
{
        debugfs_remove_recursive(encoder->debugfs_entry);
        encoder->debugfs_entry = NULL;
}








































































































































































































































































































































    1 


    1 









    1 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
/*
 * Copyright (C) 2016 Red Hat
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 * Rob Clark <robdclark@gmail.com>
 */

#include <linux/stdarg.h>

#include <linux/io.h>
#include <linux/moduleparam.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/dynamic_debug.h>

#include <drm/drm.h>
#include <drm/drm_drv.h>
#include <drm/drm_print.h>

/*
 * __drm_debug: Enable debug output.
 * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
 */
unsigned long __drm_debug;
EXPORT_SYMBOL(__drm_debug);

MODULE_PARM_DESC(debug, "Enable debug output, where each bit enables a debug category.\n"
"\t\tBit 0 (0x01)  will enable CORE messages (drm core code)\n"
"\t\tBit 1 (0x02)  will enable DRIVER messages (drm controller code)\n"
"\t\tBit 2 (0x04)  will enable KMS messages (modesetting code)\n"
"\t\tBit 3 (0x08)  will enable PRIME messages (prime code)\n"
"\t\tBit 4 (0x10)  will enable ATOMIC messages (atomic code)\n"
"\t\tBit 5 (0x20)  will enable VBL messages (vblank code)\n"
"\t\tBit 7 (0x80)  will enable LEASE messages (leasing code)\n"
"\t\tBit 8 (0x100) will enable DP messages (displayport code)");

#if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
module_param_named(debug, __drm_debug, ulong, 0600);
#else
/* classnames must match vals of enum drm_debug_category */
DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
                        "DRM_UT_CORE",
                        "DRM_UT_DRIVER",
                        "DRM_UT_KMS",
                        "DRM_UT_PRIME",
                        "DRM_UT_ATOMIC",
                        "DRM_UT_VBL",
                        "DRM_UT_STATE",
                        "DRM_UT_LEASE",
                        "DRM_UT_DP",
                        "DRM_UT_DRMRES");

static struct ddebug_class_param drm_debug_bitmap = {
        .bits = &__drm_debug,
        .flags = "p",
        .map = &drm_debug_classes,
};
module_param_cb(debug, &param_ops_dyndbg_classes, &drm_debug_bitmap, 0600);
#endif

void __drm_puts_coredump(struct drm_printer *p, const char *str)
{
        struct drm_print_iterator *iterator = p->arg;
        ssize_t len;

        if (!iterator->remain)
                return;

        if (iterator->offset < iterator->start) {
                ssize_t copy;

                len = strlen(str);

                if (iterator->offset + len <= iterator->start) {
                        iterator->offset += len;
                        return;
                }

                copy = len - (iterator->start - iterator->offset);

                if (copy > iterator->remain)
                        copy = iterator->remain;

                /* Copy out the bit of the string that we need */
                memcpy(iterator->data,
                        str + (iterator->start - iterator->offset), copy);

                iterator->offset = iterator->start + copy;
                iterator->remain -= copy;
        } else {
                ssize_t pos = iterator->offset - iterator->start;

                len = min_t(ssize_t, strlen(str), iterator->remain);

                memcpy(iterator->data + pos, str, len);

                iterator->offset += len;
                iterator->remain -= len;
        }
}
EXPORT_SYMBOL(__drm_puts_coredump);

void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
{
        struct drm_print_iterator *iterator = p->arg;
        size_t len;
        char *buf;

        if (!iterator->remain)
                return;

        /* Figure out how big the string will be */
        len = snprintf(NULL, 0, "%pV", vaf);

        /* This is the easiest path, we've already advanced beyond the offset */
        if (iterator->offset + len <= iterator->start) {
                iterator->offset += len;
                return;
        }

        /* Then check if we can directly copy into the target buffer */
        if ((iterator->offset >= iterator->start) && (len < iterator->remain)) {
                ssize_t pos = iterator->offset - iterator->start;

                snprintf(((char *) iterator->data) + pos,
                        iterator->remain, "%pV", vaf);

                iterator->offset += len;
                iterator->remain -= len;

                return;
        }

        /*
         * Finally, hit the slow path and make a temporary string to copy over
         * using _drm_puts_coredump
         */
        buf = kmalloc(len + 1, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
        if (!buf)
                return;

        snprintf(buf, len + 1, "%pV", vaf);
        __drm_puts_coredump(p, (const char *) buf);

        kfree(buf);
}
EXPORT_SYMBOL(__drm_printfn_coredump);

void __drm_puts_seq_file(struct drm_printer *p, const char *str)
{
        seq_puts(p->arg, str);
}
EXPORT_SYMBOL(__drm_puts_seq_file);

void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf)
{
        seq_printf(p->arg, "%pV", vaf);
}
EXPORT_SYMBOL(__drm_printfn_seq_file);

void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf)
{
        dev_info(p->arg, "[" DRM_NAME "] %pV", vaf);
}
EXPORT_SYMBOL(__drm_printfn_info);

void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
{
        const struct drm_device *drm = p->arg;
        const struct device *dev = drm ? drm->dev : NULL;
        enum drm_debug_category category = p->category;
        const char *prefix = p->prefix ?: "";
        const char *prefix_pad = p->prefix ? " " : "";

        if (!__drm_debug_enabled(category))
                return;

        /* Note: __builtin_return_address(0) is useless here. */
        if (dev)
                dev_printk(KERN_DEBUG, dev, "[" DRM_NAME "]%s%s %pV",
                           prefix_pad, prefix, vaf);
        else
                printk(KERN_DEBUG "[" DRM_NAME "]%s%s %pV",
                       prefix_pad, prefix, vaf);
}
EXPORT_SYMBOL(__drm_printfn_dbg);

void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf)
{
        struct drm_device *drm = p->arg;

        if (p->prefix)
                drm_err(drm, "%s %pV", p->prefix, vaf);
        else
                drm_err(drm, "%pV", vaf);
}
EXPORT_SYMBOL(__drm_printfn_err);

/**
 * drm_puts - print a const string to a &drm_printer stream
 * @p: the &drm printer
 * @str: const string
 *
 * Allow &drm_printer types that have a constant string
 * option to use it.
 */
void drm_puts(struct drm_printer *p, const char *str)
{
        if (p->puts)
                p->puts(p, str);
        else
                drm_printf(p, "%s", str);
}
EXPORT_SYMBOL(drm_puts);

/**
 * drm_printf - print to a &drm_printer stream
 * @p: the &drm_printer
 * @f: format string
 */
void drm_printf(struct drm_printer *p, const char *f, ...)
{
        va_list args;

        va_start(args, f);
        drm_vprintf(p, f, &args);
        va_end(args);
}
EXPORT_SYMBOL(drm_printf);

/**
 * drm_print_bits - print bits to a &drm_printer stream
 *
 * Print bits (in flag fields for example) in human readable form.
 *
 * @p: the &drm_printer
 * @value: field value.
 * @bits: Array with bit names.
 * @nbits: Size of bit names array.
 */
void drm_print_bits(struct drm_printer *p, unsigned long value,
                    const char * const bits[], unsigned int nbits)
{
        bool first = true;
        unsigned int i;

        if (WARN_ON_ONCE(nbits > BITS_PER_TYPE(value)))
                nbits = BITS_PER_TYPE(value);

        for_each_set_bit(i, &value, nbits) {
                if (WARN_ON_ONCE(!bits[i]))
                        continue;
                drm_printf(p, "%s%s", first ? "" : ",",
                           bits[i]);
                first = false;
        }
        if (first)
                drm_printf(p, "(none)");
}
EXPORT_SYMBOL(drm_print_bits);

void drm_dev_printk(const struct device *dev, const char *level,
                    const char *format, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, format);
        vaf.fmt = format;
        vaf.va = &args;

        if (dev)
                dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
                           __builtin_return_address(0), &vaf);
        else
                printk("%s" "[" DRM_NAME ":%ps] %pV",
                       level, __builtin_return_address(0), &vaf);

        va_end(args);
}
EXPORT_SYMBOL(drm_dev_printk);

void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
                   enum drm_debug_category category, const char *format, ...)
{
        struct va_format vaf;
        va_list args;

        if (!__drm_debug_enabled(category))
                return;

        /* we know we are printing for either syslog, tracefs, or both */
        va_start(args, format);
        vaf.fmt = format;
        vaf.va = &args;

        if (dev)
                dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
                           __builtin_return_address(0), &vaf);
        else
                printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
                       __builtin_return_address(0), &vaf);

        va_end(args);
}
EXPORT_SYMBOL(__drm_dev_dbg);

void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...)
{
        struct va_format vaf;
        va_list args;

        if (!__drm_debug_enabled(category))
                return;

        va_start(args, format);
        vaf.fmt = format;
        vaf.va = &args;

        printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
               __builtin_return_address(0), &vaf);

        va_end(args);
}
EXPORT_SYMBOL(___drm_dbg);

void __drm_err(const char *format, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, format);
        vaf.fmt = format;
        vaf.va = &args;

        printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV",
               __builtin_return_address(0), &vaf);

        va_end(args);
}
EXPORT_SYMBOL(__drm_err);

/**
 * drm_print_regset32 - print the contents of registers to a
 * &drm_printer stream.
 *
 * @p: the &drm printer
 * @regset: the list of registers to print.
 *
 * Often in driver debug, it's useful to be able to either capture the
 * contents of registers in the steady state using debugfs or at
 * specific points during operation.  This lets the driver have a
 * single list of registers for both.
 */
void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset)
{
        int namelen = 0;
        int i;

        for (i = 0; i < regset->nregs; i++)
                namelen = max(namelen, (int)strlen(regset->regs[i].name));

        for (i = 0; i < regset->nregs; i++) {
                drm_printf(p, "%*s = 0x%08x\n",
                           namelen, regset->regs[i].name,
                           readl(regset->base + regset->regs[i].offset));
        }
}
EXPORT_SYMBOL(drm_print_regset32);





















































  117 
































































    7 












    7 
    7 





































































































































































































































































































































    7 

























































































































































    2 











    7 
    2 





    7 



























































































   30 


   29 





















































   37 


   30 




    7 









    7 








    7 


    7 

    7 


    7 


    7 




    7 

    7 






   34 






    2 




    2 










    2 



















    2 
























    2 































































































    5 

    5 

















































   90 




























   91 
   87 






   91 







    4 



   91 





   51 



   52 



   90 








  114 


  118 
    1 















    5 




































    5 














    5 






















  113 

























  112 









  114 














  114 
   30 




  112 




  114 









  112 

  113 

  111 
    4 






  111 
   43 













   76 
    1 




    1 


   43 



   41 



   42 

   42 

   42 


















   82 









   53 
    2 
















   51 














   51 







   52 




   52 

   88 




























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1995  Linus Torvalds
 *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
 *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
 */
#include <linux/sched.h>                /* test_thread_flag(), ...        */
#include <linux/sched/task_stack.h>        /* task_stack_*(), ...                */
#include <linux/kdebug.h>                /* oops_begin/end, ...                */
#include <linux/extable.h>                /* search_exception_tables        */
#include <linux/memblock.h>                /* max_low_pfn                        */
#include <linux/kfence.h>                /* kfence_handle_page_fault        */
#include <linux/kprobes.h>                /* NOKPROBE_SYMBOL, ...                */
#include <linux/mmiotrace.h>                /* kmmio_handler, ...                */
#include <linux/perf_event.h>                /* perf_sw_event                */
#include <linux/hugetlb.h>                /* hstate_index_to_shift        */
#include <linux/prefetch.h>                /* prefetchw                        */
#include <linux/context_tracking.h>        /* exception_enter(), ...        */
#include <linux/uaccess.h>                /* faulthandler_disabled()        */
#include <linux/efi.h>                        /* efi_crash_gracefully_on_page_fault()*/
#include <linux/mm_types.h>
#include <linux/mm.h>                        /* find_and_lock_vma() */

#include <asm/cpufeature.h>                /* boot_cpu_has, ...                */
#include <asm/traps.h>                        /* dotraplinkage, ...                */
#include <asm/fixmap.h>                        /* VSYSCALL_ADDR                */
#include <asm/vsyscall.h>                /* emulate_vsyscall                */
#include <asm/vm86.h>                        /* struct vm86                        */
#include <asm/mmu_context.h>                /* vma_pkey()                        */
#include <asm/efi.h>                        /* efi_crash_gracefully_on_page_fault()*/
#include <asm/desc.h>                        /* store_idt(), ...                */
#include <asm/cpu_entry_area.h>                /* exception stack                */
#include <asm/pgtable_areas.h>                /* VMALLOC_START, ...                */
#include <asm/kvm_para.h>                /* kvm_handle_async_pf                */
#include <asm/vdso.h>                        /* fixup_vdso_exception()        */
#include <asm/irq_stack.h>
#include <asm/fred.h>
#include <asm/sev.h>                        /* snp_dump_hva_rmpentry()        */

#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>

/*
 * Returns 0 if mmiotrace is disabled, or if the fault is not
 * handled by mmiotrace:
 */
static nokprobe_inline int
kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
        if (unlikely(is_kmmio_active()))
                if (kmmio_handler(regs, addr) == 1)
                        return -1;
        return 0;
}

/*
 * Prefetch quirks:
 *
 * 32-bit mode:
 *
 *   Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
 *   Check that here and ignore it.  This is AMD erratum #91.
 *
 * 64-bit mode:
 *
 *   Sometimes the CPU reports invalid exceptions on prefetch.
 *   Check that here and ignore it.
 *
 * Opcode checker based on code by Richard Brunner.
 */
static inline int
check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
                      unsigned char opcode, int *prefetch)
{
        unsigned char instr_hi = opcode & 0xf0;
        unsigned char instr_lo = opcode & 0x0f;

        switch (instr_hi) {
        case 0x20:
        case 0x30:
                /*
                 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
                 * In X86_64 long mode, the CPU will signal invalid
                 * opcode if some of these prefixes are present so
                 * X86_64 will never get here anyway
                 */
                return ((instr_lo & 7) == 0x6);
#ifdef CONFIG_X86_64
        case 0x40:
                /*
                 * In 64-bit mode 0x40..0x4F are valid REX prefixes
                 */
                return (!user_mode(regs) || user_64bit_mode(regs));
#endif
        case 0x60:
                /* 0x64 thru 0x67 are valid prefixes in all modes. */
                return (instr_lo & 0xC) == 0x4;
        case 0xF0:
                /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
                return !instr_lo || (instr_lo>>1) == 1;
        case 0x00:
                /* Prefetch instruction is 0x0F0D or 0x0F18 */
                if (get_kernel_nofault(opcode, instr))
                        return 0;

                *prefetch = (instr_lo == 0xF) &&
                        (opcode == 0x0D || opcode == 0x18);
                return 0;
        default:
                return 0;
        }
}

static bool is_amd_k8_pre_npt(void)
{
        struct cpuinfo_x86 *c = &boot_cpu_data;

        return unlikely(IS_ENABLED(CONFIG_CPU_SUP_AMD) &&
                        c->x86_vendor == X86_VENDOR_AMD &&
                        c->x86 == 0xf && c->x86_model < 0x40);
}

static int
is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
{
        unsigned char *max_instr;
        unsigned char *instr;
        int prefetch = 0;

        /* Erratum #91 affects AMD K8, pre-NPT CPUs */
        if (!is_amd_k8_pre_npt())
                return 0;

        /*
         * If it was a exec (instruction fetch) fault on NX page, then
         * do not ignore the fault:
         */
        if (error_code & X86_PF_INSTR)
                return 0;

        instr = (void *)convert_ip_to_linear(current, regs);
        max_instr = instr + 15;

        /*
         * This code has historically always bailed out if IP points to a
         * not-present page (e.g. due to a race).  No one has ever
         * complained about this.
         */
        pagefault_disable();

        while (instr < max_instr) {
                unsigned char opcode;

                if (user_mode(regs)) {
                        if (get_user(opcode, (unsigned char __user *) instr))
                                break;
                } else {
                        if (get_kernel_nofault(opcode, instr))
                                break;
                }

                instr++;

                if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
                        break;
        }

        pagefault_enable();
        return prefetch;
}

DEFINE_SPINLOCK(pgd_lock);
LIST_HEAD(pgd_list);

#ifdef CONFIG_X86_32
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
        unsigned index = pgd_index(address);
        pgd_t *pgd_k;
        p4d_t *p4d, *p4d_k;
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;

        pgd += index;
        pgd_k = init_mm.pgd + index;

        if (!pgd_present(*pgd_k))
                return NULL;

        /*
         * set_pgd(pgd, *pgd_k); here would be useless on PAE
         * and redundant with the set_pmd() on non-PAE. As would
         * set_p4d/set_pud.
         */
        p4d = p4d_offset(pgd, address);
        p4d_k = p4d_offset(pgd_k, address);
        if (!p4d_present(*p4d_k))
                return NULL;

        pud = pud_offset(p4d, address);
        pud_k = pud_offset(p4d_k, address);
        if (!pud_present(*pud_k))
                return NULL;

        pmd = pmd_offset(pud, address);
        pmd_k = pmd_offset(pud_k, address);

        if (pmd_present(*pmd) != pmd_present(*pmd_k))
                set_pmd(pmd, *pmd_k);

        if (!pmd_present(*pmd_k))
                return NULL;
        else
                BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));

        return pmd_k;
}

/*
 *   Handle a fault on the vmalloc or module mapping area
 *
 *   This is needed because there is a race condition between the time
 *   when the vmalloc mapping code updates the PMD to the point in time
 *   where it synchronizes this update with the other page-tables in the
 *   system.
 *
 *   In this race window another thread/CPU can map an area on the same
 *   PMD, finds it already present and does not synchronize it with the
 *   rest of the system yet. As a result v[mz]alloc might return areas
 *   which are not mapped in every page-table in the system, causing an
 *   unhandled page-fault when they are accessed.
 */
static noinline int vmalloc_fault(unsigned long address)
{
        unsigned long pgd_paddr;
        pmd_t *pmd_k;
        pte_t *pte_k;

        /* Make sure we are in vmalloc area: */
        if (!(address >= VMALLOC_START && address < VMALLOC_END))
                return -1;

        /*
         * Synchronize this task's top level page-table
         * with the 'reference' page table.
         *
         * Do _not_ use "current" here. We might be inside
         * an interrupt in the middle of a task switch..
         */
        pgd_paddr = read_cr3_pa();
        pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
        if (!pmd_k)
                return -1;

        if (pmd_leaf(*pmd_k))
                return 0;

        pte_k = pte_offset_kernel(pmd_k, address);
        if (!pte_present(*pte_k))
                return -1;

        return 0;
}
NOKPROBE_SYMBOL(vmalloc_fault);

void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
{
        unsigned long addr;

        for (addr = start & PMD_MASK;
             addr >= TASK_SIZE_MAX && addr < VMALLOC_END;
             addr += PMD_SIZE) {
                struct page *page;

                spin_lock(&pgd_lock);
                list_for_each_entry(page, &pgd_list, lru) {
                        spinlock_t *pgt_lock;

                        /* the pgt_lock only for Xen */
                        pgt_lock = &pgd_page_get_mm(page)->page_table_lock;

                        spin_lock(pgt_lock);
                        vmalloc_sync_one(page_address(page), addr);
                        spin_unlock(pgt_lock);
                }
                spin_unlock(&pgd_lock);
        }
}

static bool low_pfn(unsigned long pfn)
{
        return pfn < max_low_pfn;
}

static void dump_pagetable(unsigned long address)
{
        pgd_t *base = __va(read_cr3_pa());
        pgd_t *pgd = &base[pgd_index(address)];
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;

#ifdef CONFIG_X86_PAE
        pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
        if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
                goto out;
#define pr_pde pr_cont
#else
#define pr_pde pr_info
#endif
        p4d = p4d_offset(pgd, address);
        pud = pud_offset(p4d, address);
        pmd = pmd_offset(pud, address);
        pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
#undef pr_pde

        /*
         * We must not directly access the pte in the highpte
         * case if the page table is located in highmem.
         * And let's rather not kmap-atomic the pte, just in case
         * it's allocated already:
         */
        if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd))
                goto out;

        pte = pte_offset_kernel(pmd, address);
        pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
out:
        pr_cont("\n");
}

#else /* CONFIG_X86_64: */

#ifdef CONFIG_CPU_SUP_AMD
static const char errata93_warning[] =
KERN_ERR 
"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
"******* Working around it, but it may cause SEGVs or burn power.\n"
"******* Please consider a BIOS update.\n"
"******* Disabling USB legacy in the BIOS may also help.\n";
#endif

static int bad_address(void *p)
{
        unsigned long dummy;

        return get_kernel_nofault(dummy, (unsigned long *)p);
}

static void dump_pagetable(unsigned long address)
{
        pgd_t *base = __va(read_cr3_pa());
        pgd_t *pgd = base + pgd_index(address);
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;

        if (bad_address(pgd))
                goto bad;

        pr_info("PGD %lx ", pgd_val(*pgd));

        if (!pgd_present(*pgd))
                goto out;

        p4d = p4d_offset(pgd, address);
        if (bad_address(p4d))
                goto bad;

        pr_cont("P4D %lx ", p4d_val(*p4d));
        if (!p4d_present(*p4d) || p4d_leaf(*p4d))
                goto out;

        pud = pud_offset(p4d, address);
        if (bad_address(pud))
                goto bad;

        pr_cont("PUD %lx ", pud_val(*pud));
        if (!pud_present(*pud) || pud_leaf(*pud))
                goto out;

        pmd = pmd_offset(pud, address);
        if (bad_address(pmd))
                goto bad;

        pr_cont("PMD %lx ", pmd_val(*pmd));
        if (!pmd_present(*pmd) || pmd_leaf(*pmd))
                goto out;

        pte = pte_offset_kernel(pmd, address);
        if (bad_address(pte))
                goto bad;

        pr_cont("PTE %lx", pte_val(*pte));
out:
        pr_cont("\n");
        return;
bad:
        pr_info("BAD\n");
}

#endif /* CONFIG_X86_64 */

/*
 * Workaround for K8 erratum #93 & buggy BIOS.
 *
 * BIOS SMM functions are required to use a specific workaround
 * to avoid corruption of the 64bit RIP register on C stepping K8.
 *
 * A lot of BIOS that didn't get tested properly miss this.
 *
 * The OS sees this as a page fault with the upper 32bits of RIP cleared.
 * Try to work around it here.
 *
 * Note we only handle faults in kernel here.
 * Does nothing on 32-bit.
 */
static int is_errata93(struct pt_regs *regs, unsigned long address)
{
#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD)
        if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
            || boot_cpu_data.x86 != 0xf)
                return 0;

        if (user_mode(regs))
                return 0;

        if (address != regs->ip)
                return 0;

        if ((address >> 32) != 0)
                return 0;

        address |= 0xffffffffUL << 32;
        if ((address >= (u64)_stext && address <= (u64)_etext) ||
            (address >= MODULES_VADDR && address <= MODULES_END)) {
                printk_once(errata93_warning);
                regs->ip = address;
                return 1;
        }
#endif
        return 0;
}

/*
 * Work around K8 erratum #100 K8 in compat mode occasionally jumps
 * to illegal addresses >4GB.
 *
 * We catch this in the page fault handler because these addresses
 * are not reachable. Just detect this case and return.  Any code
 * segment in LDT is compatibility mode.
 */
static int is_errata100(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
        if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
                return 1;
#endif
        return 0;
}

/* Pentium F0 0F C7 C8 bug workaround: */
static int is_f00f_bug(struct pt_regs *regs, unsigned long error_code,
                       unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
        if (boot_cpu_has_bug(X86_BUG_F00F) && !(error_code & X86_PF_USER) &&
            idt_is_f00f_address(address)) {
                handle_invalid_op(regs);
                return 1;
        }
#endif
        return 0;
}

static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
{
        u32 offset = (index >> 3) * sizeof(struct desc_struct);
        unsigned long addr;
        struct ldttss_desc desc;

        if (index == 0) {
                pr_alert("%s: NULL\n", name);
                return;
        }

        if (offset + sizeof(struct ldttss_desc) >= gdt->size) {
                pr_alert("%s: 0x%hx -- out of bounds\n", name, index);
                return;
        }

        if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset),
                              sizeof(struct ldttss_desc))) {
                pr_alert("%s: 0x%hx -- GDT entry is not readable\n",
                         name, index);
                return;
        }

        addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24);
#ifdef CONFIG_X86_64
        addr |= ((u64)desc.base3 << 32);
#endif
        pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n",
                 name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
}

static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
        if (!oops_may_print())
                return;

        if (error_code & X86_PF_INSTR) {
                unsigned int level;
                pgd_t *pgd;
                pte_t *pte;

                pgd = __va(read_cr3_pa());
                pgd += pgd_index(address);

                pte = lookup_address_in_pgd(pgd, address, &level);

                if (pte && pte_present(*pte) && !pte_exec(*pte))
                        pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
                                from_kuid(&init_user_ns, current_uid()));
                if (pte && pte_present(*pte) && pte_exec(*pte) &&
                                (pgd_flags(*pgd) & _PAGE_USER) &&
                                (__read_cr4() & X86_CR4_SMEP))
                        pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
                                from_kuid(&init_user_ns, current_uid()));
        }

        if (address < PAGE_SIZE && !user_mode(regs))
                pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
                        (void *)address);
        else
                pr_alert("BUG: unable to handle page fault for address: %px\n",
                        (void *)address);

        pr_alert("#PF: %s %s in %s mode\n",
                 (error_code & X86_PF_USER)  ? "user" : "supervisor",
                 (error_code & X86_PF_INSTR) ? "instruction fetch" :
                 (error_code & X86_PF_WRITE) ? "write access" :
                                               "read access",
                             user_mode(regs) ? "user" : "kernel");
        pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
                 !(error_code & X86_PF_PROT) ? "not-present page" :
                 (error_code & X86_PF_RSVD)  ? "reserved bit violation" :
                 (error_code & X86_PF_PK)    ? "protection keys violation" :
                 (error_code & X86_PF_RMP)   ? "RMP violation" :
                                               "permissions violation");

        if (!(error_code & X86_PF_USER) && user_mode(regs)) {
                struct desc_ptr idt, gdt;
                u16 ldtr, tr;

                /*
                 * This can happen for quite a few reasons.  The more obvious
                 * ones are faults accessing the GDT, or LDT.  Perhaps
                 * surprisingly, if the CPU tries to deliver a benign or
                 * contributory exception from user code and gets a page fault
                 * during delivery, the page fault can be delivered as though
                 * it originated directly from user code.  This could happen
                 * due to wrong permissions on the IDT, GDT, LDT, TSS, or
                 * kernel or IST stack.
                 */
                store_idt(&idt);

                /* Usable even on Xen PV -- it's just slow. */
                native_store_gdt(&gdt);

                pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n",
                         idt.address, idt.size, gdt.address, gdt.size);

                store_ldt(ldtr);
                show_ldttss(&gdt, "LDTR", ldtr);

                store_tr(tr);
                show_ldttss(&gdt, "TR", tr);
        }

        dump_pagetable(address);

        if (error_code & X86_PF_RMP)
                snp_dump_hva_rmpentry(address);
}

static noinline void
pgtable_bad(struct pt_regs *regs, unsigned long error_code,
            unsigned long address)
{
        struct task_struct *tsk;
        unsigned long flags;
        int sig;

        flags = oops_begin();
        tsk = current;
        sig = SIGKILL;

        printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
               tsk->comm, address);
        dump_pagetable(address);

        if (__die("Bad pagetable", regs, error_code))
                sig = 0;

        oops_end(flags, regs, sig);
}

static void sanitize_error_code(unsigned long address,
                                unsigned long *error_code)
{
        /*
         * To avoid leaking information about the kernel page
         * table layout, pretend that user-mode accesses to
         * kernel addresses are always protection faults.
         *
         * NB: This means that failed vsyscalls with vsyscall=none
         * will have the PROT bit.  This doesn't leak any
         * information and does not appear to cause any problems.
         */
        if (address >= TASK_SIZE_MAX)
                *error_code |= X86_PF_PROT;
}

static void set_signal_archinfo(unsigned long address,
                                unsigned long error_code)
{
        struct task_struct *tsk = current;

        tsk->thread.trap_nr = X86_TRAP_PF;
        tsk->thread.error_code = error_code | X86_PF_USER;
        tsk->thread.cr2 = address;
}

static noinline void
page_fault_oops(struct pt_regs *regs, unsigned long error_code,
                unsigned long address)
{
#ifdef CONFIG_VMAP_STACK
        struct stack_info info;
#endif
        unsigned long flags;
        int sig;

        if (user_mode(regs)) {
                /*
                 * Implicit kernel access from user mode?  Skip the stack
                 * overflow and EFI special cases.
                 */
                goto oops;
        }

#ifdef CONFIG_VMAP_STACK
        /*
         * Stack overflow?  During boot, we can fault near the initial
         * stack in the direct map, but that's not an overflow -- check
         * that we're in vmalloc space to avoid this.
         */
        if (is_vmalloc_addr((void *)address) &&
            get_stack_guard_info((void *)address, &info)) {
                /*
                 * We're likely to be running with very little stack space
                 * left.  It's plausible that we'd hit this condition but
                 * double-fault even before we get this far, in which case
                 * we're fine: the double-fault handler will deal with it.
                 *
                 * We don't want to make it all the way into the oops code
                 * and then double-fault, though, because we're likely to
                 * break the console driver and lose most of the stack dump.
                 */
                call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*),
                              handle_stack_overflow,
                              ASM_CALL_ARG3,
                              , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));

                unreachable();
        }
#endif

        /*
         * Buggy firmware could access regions which might page fault.  If
         * this happens, EFI has a special OOPS path that will try to
         * avoid hanging the system.
         */
        if (IS_ENABLED(CONFIG_EFI))
                efi_crash_gracefully_on_page_fault(address);

        /* Only not-present faults should be handled by KFENCE. */
        if (!(error_code & X86_PF_PROT) &&
            kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs))
                return;

oops:
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice:
         */
        flags = oops_begin();

        show_fault_oops(regs, error_code, address);

        if (task_stack_end_corrupted(current))
                printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");

        sig = SIGKILL;
        if (__die("Oops", regs, error_code))
                sig = 0;

        /* Executive summary in case the body of the oops scrolled away */
        printk(KERN_DEFAULT "CR2: %016lx\n", address);

        oops_end(flags, regs, sig);
}

static noinline void
kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
                         unsigned long address, int signal, int si_code,
                         u32 pkey)
{
        WARN_ON_ONCE(user_mode(regs));

        /* Are we prepared to handle this kernel fault? */
        if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
                return;

        /*
         * AMD erratum #91 manifests as a spurious page fault on a PREFETCH
         * instruction.
         */
        if (is_prefetch(regs, error_code, address))
                return;

        page_fault_oops(regs, error_code, address);
}

/*
 * Print out info about fatal segfaults, if the show_unhandled_signals
 * sysctl is set:
 */
static inline void
show_signal_msg(struct pt_regs *regs, unsigned long error_code,
                unsigned long address, struct task_struct *tsk)
{
        const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
        /* This is a racy snapshot, but it's better than nothing. */
        int cpu = raw_smp_processor_id();

        if (!unhandled_signal(tsk, SIGSEGV))
                return;

        if (!printk_ratelimit())
                return;

        printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                loglvl, tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);

        print_vma_addr(KERN_CONT " in ", regs->ip);

        /*
         * Dump the likely CPU where the fatal segfault happened.
         * This can help identify faulty hardware.
         */
        printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu,
               topology_core_id(cpu), topology_physical_package_id(cpu));


        printk(KERN_CONT "\n");

        show_opcodes(regs, loglvl);
}

static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
                       unsigned long address, u32 pkey, int si_code)
{
        struct task_struct *tsk = current;

        if (!user_mode(regs)) {
                kernelmode_fixup_or_oops(regs, error_code, address,
                                         SIGSEGV, si_code, pkey);
                return;
        }

        if (!(error_code & X86_PF_USER)) {
                /* Implicit user access to kernel memory -- just oops */
                page_fault_oops(regs, error_code, address);
                return;
        }

        /*
         * User mode accesses just cause a SIGSEGV.
         * It's possible to have interrupts off here:
         */
        local_irq_enable();

        /*
         * Valid to do another page fault here because this one came
         * from user space:
         */
        if (is_prefetch(regs, error_code, address))
                return;

        if (is_errata100(regs, address))
                return;

        sanitize_error_code(address, &error_code);

        if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
                return;

        if (likely(show_unhandled_signals))
                show_signal_msg(regs, error_code, address, tsk);

        set_signal_archinfo(address, error_code);

        if (si_code == SEGV_PKUERR)
                force_sig_pkuerr((void __user *)address, pkey);
        else
                force_sig_fault(SIGSEGV, si_code, (void __user *)address);

        local_irq_disable();
}

static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
                     unsigned long address)
{
        __bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR);
}

static void
__bad_area(struct pt_regs *regs, unsigned long error_code,
           unsigned long address, u32 pkey, int si_code)
{
        struct mm_struct *mm = current->mm;
        /*
         * Something tried to access memory that isn't in our memory map..
         * Fix it, but check if it's kernel or user first..
         */
        mmap_read_unlock(mm);

        __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}

static inline bool bad_area_access_from_pkeys(unsigned long error_code,
                struct vm_area_struct *vma)
{
        /* This code is always called on the current mm */
        bool foreign = false;

        if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
                return false;
        if (error_code & X86_PF_PK)
                return true;
        /* this checks permission keys on the VMA: */
        if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
                                       (error_code & X86_PF_INSTR), foreign))
                return true;
        return false;
}

static noinline void
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
                      unsigned long address, struct vm_area_struct *vma)
{
        /*
         * This OSPKE check is not strictly necessary at runtime.
         * But, doing it this way allows compiler optimizations
         * if pkeys are compiled out.
         */
        if (bad_area_access_from_pkeys(error_code, vma)) {
                /*
                 * A protection key fault means that the PKRU value did not allow
                 * access to some PTE.  Userspace can figure out what PKRU was
                 * from the XSAVE state.  This function captures the pkey from
                 * the vma and passes it to userspace so userspace can discover
                 * which protection key was set on the PTE.
                 *
                 * If we get here, we know that the hardware signaled a X86_PF_PK
                 * fault and that there was a VMA once we got in the fault
                 * handler.  It does *not* guarantee that the VMA we find here
                 * was the one that we faulted on.
                 *
                 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
                 * 2. T1   : set PKRU to deny access to pkey=4, touches page
                 * 3. T1   : faults...
                 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
                 * 5. T1   : enters fault handler, takes mmap_lock, etc...
                 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
                 *             faulted on a pte with its pkey=4.
                 */
                u32 pkey = vma_pkey(vma);

                __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
        } else {
                __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
        }
}

static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
          vm_fault_t fault)
{
        /* Kernel mode? Handle exceptions or die: */
        if (!user_mode(regs)) {
                kernelmode_fixup_or_oops(regs, error_code, address,
                                         SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY);
                return;
        }

        /* User-space => ok to do another page fault: */
        if (is_prefetch(regs, error_code, address))
                return;

        sanitize_error_code(address, &error_code);

        if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
                return;

        set_signal_archinfo(address, error_code);

#ifdef CONFIG_MEMORY_FAILURE
        if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
                struct task_struct *tsk = current;
                unsigned lsb = 0;

                pr_err(
        "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
                        tsk->comm, tsk->pid, address);
                if (fault & VM_FAULT_HWPOISON_LARGE)
                        lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
                if (fault & VM_FAULT_HWPOISON)
                        lsb = PAGE_SHIFT;
                force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
                return;
        }
#endif
        force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}

static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
{
        if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
                return 0;

        if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
                return 0;

        return 1;
}

/*
 * Handle a spurious fault caused by a stale TLB entry.
 *
 * This allows us to lazily refresh the TLB when increasing the
 * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
 * eagerly is very expensive since that implies doing a full
 * cross-processor TLB flush, even if no stale TLB entries exist
 * on other processors.
 *
 * Spurious faults may only occur if the TLB contains an entry with
 * fewer permission than the page table entry.  Non-present (P = 0)
 * and reserved bit (R = 1) faults are never spurious.
 *
 * There are no security implications to leaving a stale TLB when
 * increasing the permissions on a page.
 *
 * Returns non-zero if a spurious fault was handled, zero otherwise.
 *
 * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
 * (Optional Invalidation).
 */
static noinline int
spurious_kernel_fault(unsigned long error_code, unsigned long address)
{
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
        int ret;

        /*
         * Only writes to RO or instruction fetches from NX may cause
         * spurious faults.
         *
         * These could be from user or supervisor accesses but the TLB
         * is only lazily flushed after a kernel mapping protection
         * change, so user accesses are not expected to cause spurious
         * faults.
         */
        if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
            error_code != (X86_PF_INSTR | X86_PF_PROT))
                return 0;

        pgd = init_mm.pgd + pgd_index(address);
        if (!pgd_present(*pgd))
                return 0;

        p4d = p4d_offset(pgd, address);
        if (!p4d_present(*p4d))
                return 0;

        if (p4d_leaf(*p4d))
                return spurious_kernel_fault_check(error_code, (pte_t *) p4d);

        pud = pud_offset(p4d, address);
        if (!pud_present(*pud))
                return 0;

        if (pud_leaf(*pud))
                return spurious_kernel_fault_check(error_code, (pte_t *) pud);

        pmd = pmd_offset(pud, address);
        if (!pmd_present(*pmd))
                return 0;

        if (pmd_leaf(*pmd))
                return spurious_kernel_fault_check(error_code, (pte_t *) pmd);

        pte = pte_offset_kernel(pmd, address);
        if (!pte_present(*pte))
                return 0;

        ret = spurious_kernel_fault_check(error_code, pte);
        if (!ret)
                return 0;

        /*
         * Make sure we have permissions in PMD.
         * If not, then there's a bug in the page tables:
         */
        ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd);
        WARN_ONCE(!ret, "PMD has incorrect permission bits\n");

        return ret;
}
NOKPROBE_SYMBOL(spurious_kernel_fault);

int show_unhandled_signals = 1;

static inline int
access_error(unsigned long error_code, struct vm_area_struct *vma)
{
        /* This is only called for the current mm, so: */
        bool foreign = false;

        /*
         * Read or write was blocked by protection keys.  This is
         * always an unconditional error and can never result in
         * a follow-up action to resolve the fault, like a COW.
         */
        if (error_code & X86_PF_PK)
                return 1;

        /*
         * SGX hardware blocked the access.  This usually happens
         * when the enclave memory contents have been destroyed, like
         * after a suspend/resume cycle. In any case, the kernel can't
         * fix the cause of the fault.  Handle the fault as an access
         * error even in cases where no actual access violation
         * occurred.  This allows userspace to rebuild the enclave in
         * response to the signal.
         */
        if (unlikely(error_code & X86_PF_SGX))
                return 1;

        /*
         * Make sure to check the VMA so that we do not perform
         * faults just to hit a X86_PF_PK as soon as we fill in a
         * page.
         */
        if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
                                       (error_code & X86_PF_INSTR), foreign))
                return 1;

        /*
         * Shadow stack accesses (PF_SHSTK=1) are only permitted to
         * shadow stack VMAs. All other accesses result in an error.
         */
        if (error_code & X86_PF_SHSTK) {
                if (unlikely(!(vma->vm_flags & VM_SHADOW_STACK)))
                        return 1;
                if (unlikely(!(vma->vm_flags & VM_WRITE)))
                        return 1;
                return 0;
        }

        if (error_code & X86_PF_WRITE) {
                /* write, present and write, not present: */
                if (unlikely(vma->vm_flags & VM_SHADOW_STACK))
                        return 1;
                if (unlikely(!(vma->vm_flags & VM_WRITE)))
                        return 1;
                return 0;
        }

        /* read, present: */
        if (unlikely(error_code & X86_PF_PROT))
                return 1;

        /* read, not present: */
        if (unlikely(!vma_is_accessible(vma)))
                return 1;

        return 0;
}

bool fault_in_kernel_space(unsigned long address)
{
        /*
         * On 64-bit systems, the vsyscall page is at an address above
         * TASK_SIZE_MAX, but is not considered part of the kernel
         * address space.
         */
        if (IS_ENABLED(CONFIG_X86_64) && is_vsyscall_vaddr(address))
                return false;

        return address >= TASK_SIZE_MAX;
}

/*
 * Called for all faults where 'address' is part of the kernel address
 * space.  Might get called for faults that originate from *code* that
 * ran in userspace or the kernel.
 */
static void
do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
                   unsigned long address)
{
        /*
         * Protection keys exceptions only happen on user pages.  We
         * have no user pages in the kernel portion of the address
         * space, so do not expect them here.
         */
        WARN_ON_ONCE(hw_error_code & X86_PF_PK);

#ifdef CONFIG_X86_32
        /*
         * We can fault-in kernel-space virtual memory on-demand. The
         * 'reference' page table is init_mm.pgd.
         *
         * NOTE! We MUST NOT take any locks for this case. We may
         * be in an interrupt or a critical region, and should
         * only copy the information from the master page table,
         * nothing more.
         *
         * Before doing this on-demand faulting, ensure that the
         * fault is not any of the following:
         * 1. A fault on a PTE with a reserved bit set.
         * 2. A fault caused by a user-mode access.  (Do not demand-
         *    fault kernel memory due to user-mode accesses).
         * 3. A fault caused by a page-level protection violation.
         *    (A demand fault would be on a non-present page which
         *     would have X86_PF_PROT==0).
         *
         * This is only needed to close a race condition on x86-32 in
         * the vmalloc mapping/unmapping code. See the comment above
         * vmalloc_fault() for details. On x86-64 the race does not
         * exist as the vmalloc mappings don't need to be synchronized
         * there.
         */
        if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
                if (vmalloc_fault(address) >= 0)
                        return;
        }
#endif

        if (is_f00f_bug(regs, hw_error_code, address))
                return;

        /* Was the fault spurious, caused by lazy TLB invalidation? */
        if (spurious_kernel_fault(hw_error_code, address))
                return;

        /* kprobes don't want to hook the spurious faults: */
        if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
                return;

        /*
         * Note, despite being a "bad area", there are quite a few
         * acceptable reasons to get here, such as erratum fixups
         * and handling kernel code that can fault, like get_user().
         *
         * Don't take the mm semaphore here. If we fixup a prefetch
         * fault we could otherwise deadlock:
         */
        bad_area_nosemaphore(regs, hw_error_code, address);
}
NOKPROBE_SYMBOL(do_kern_addr_fault);

/*
 * Handle faults in the user portion of the address space.  Nothing in here
 * should check X86_PF_USER without a specific justification: for almost
 * all purposes, we should treat a normal kernel access to user memory
 * (e.g. get_user(), put_user(), etc.) the same as the WRUSS instruction.
 * The one exception is AC flag handling, which is, per the x86
 * architecture, special for WRUSS.
 */
static inline
void do_user_addr_fault(struct pt_regs *regs,
                        unsigned long error_code,
                        unsigned long address)
{
        struct vm_area_struct *vma;
        struct task_struct *tsk;
        struct mm_struct *mm;
        vm_fault_t fault;
        unsigned int flags = FAULT_FLAG_DEFAULT;

        tsk = current;
        mm = tsk->mm;

        if (unlikely((error_code & (X86_PF_USER | X86_PF_INSTR)) == X86_PF_INSTR)) {
                /*
                 * Whoops, this is kernel mode code trying to execute from
                 * user memory.  Unless this is AMD erratum #93, which
                 * corrupts RIP such that it looks like a user address,
                 * this is unrecoverable.  Don't even try to look up the
                 * VMA or look for extable entries.
                 */
                if (is_errata93(regs, address))
                        return;

                page_fault_oops(regs, error_code, address);
                return;
        }

        /* kprobes don't want to hook the spurious faults: */
        if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
                return;

        /*
         * Reserved bits are never expected to be set on
         * entries in the user portion of the page tables.
         */
        if (unlikely(error_code & X86_PF_RSVD))
                pgtable_bad(regs, error_code, address);

        /*
         * If SMAP is on, check for invalid kernel (supervisor) access to user
         * pages in the user address space.  The odd case here is WRUSS,
         * which, according to the preliminary documentation, does not respect
         * SMAP and will have the USER bit set so, in all cases, SMAP
         * enforcement appears to be consistent with the USER bit.
         */
        if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
                     !(error_code & X86_PF_USER) &&
                     !(regs->flags & X86_EFLAGS_AC))) {
                /*
                 * No extable entry here.  This was a kernel access to an
                 * invalid pointer.  get_kernel_nofault() will not get here.
                 */
                page_fault_oops(regs, error_code, address);
                return;
        }

        /*
         * If we're in an interrupt, have no user context or are running
         * in a region with pagefaults disabled then we must not take the fault
         */
        if (unlikely(faulthandler_disabled() || !mm)) {
                bad_area_nosemaphore(regs, error_code, address);
                return;
        }

        /* Legacy check - remove this after verifying that it doesn't trigger */
        if (WARN_ON_ONCE(!(regs->flags & X86_EFLAGS_IF))) {
                bad_area_nosemaphore(regs, error_code, address);
                return;
        }

        local_irq_enable();

        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);

        /*
         * Read-only permissions can not be expressed in shadow stack PTEs.
         * Treat all shadow stack accesses as WRITE faults. This ensures
         * that the MM will prepare everything (e.g., break COW) such that
         * maybe_mkwrite() can create a proper shadow stack PTE.
         */
        if (error_code & X86_PF_SHSTK)
                flags |= FAULT_FLAG_WRITE;
        if (error_code & X86_PF_WRITE)
                flags |= FAULT_FLAG_WRITE;
        if (error_code & X86_PF_INSTR)
                flags |= FAULT_FLAG_INSTRUCTION;

        /*
         * We set FAULT_FLAG_USER based on the register state, not
         * based on X86_PF_USER. User space accesses that cause
         * system page faults are still user accesses.
         */
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;

#ifdef CONFIG_X86_64
        /*
         * Faults in the vsyscall page might need emulation.  The
         * vsyscall page is at a high address (>PAGE_OFFSET), but is
         * considered to be part of the user address space.
         *
         * The vsyscall page does not have a "real" VMA, so do this
         * emulation before we go searching for VMAs.
         *
         * PKRU never rejects instruction fetches, so we don't need
         * to consider the PF_PK bit.
         */
        if (is_vsyscall_vaddr(address)) {
                if (emulate_vsyscall(error_code, regs, address))
                        return;
        }
#endif

        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;

        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
                goto lock_mmap;

        if (unlikely(access_error(error_code, vma))) {
                vma_end_read(vma);
                goto lock_mmap;
        }
        fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
        if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
                vma_end_read(vma);

        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
                goto done;
        }
        count_vm_vma_lock_event(VMA_LOCK_RETRY);
        if (fault & VM_FAULT_MAJOR)
                flags |= FAULT_FLAG_TRIED;

        /* Quick path to respond to signals */
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
                        kernelmode_fixup_or_oops(regs, error_code, address,
                                                 SIGBUS, BUS_ADRERR,
                                                 ARCH_DEFAULT_PKEY);
                return;
        }
lock_mmap:

retry:
        vma = lock_mm_and_find_vma(mm, address, regs);
        if (unlikely(!vma)) {
                bad_area_nosemaphore(regs, error_code, address);
                return;
        }

        /*
         * Ok, we have a good vm_area for this memory access, so
         * we can handle it..
         */
        if (unlikely(access_error(error_code, vma))) {
                bad_area_access_error(regs, error_code, address, vma);
                return;
        }

        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
         * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
         * we get VM_FAULT_RETRY back, the mmap_lock has been unlocked.
         *
         * Note that handle_userfault() may also release and reacquire mmap_lock
         * (and not return with VM_FAULT_RETRY), when returning to userland to
         * repeat the page fault later with a VM_FAULT_NOPAGE retval
         * (potentially after handling any pending signal during the return to
         * userland). The return to userland is identified whenever
         * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
         */
        fault = handle_mm_fault(vma, address, flags, regs);

        if (fault_signal_pending(fault, regs)) {
                /*
                 * Quick path to respond to signals.  The core mm code
                 * has unlocked the mm for us if we get here.
                 */
                if (!user_mode(regs))
                        kernelmode_fixup_or_oops(regs, error_code, address,
                                                 SIGBUS, BUS_ADRERR,
                                                 ARCH_DEFAULT_PKEY);
                return;
        }

        /* The fault is fully completed (including releasing mmap lock) */
        if (fault & VM_FAULT_COMPLETED)
                return;

        /*
         * If we need to retry the mmap_lock has already been released,
         * and if there is a fatal signal pending there is no guarantee
         * that we made any progress. Handle this case first.
         */
        if (unlikely(fault & VM_FAULT_RETRY)) {
                flags |= FAULT_FLAG_TRIED;
                goto retry;
        }

        mmap_read_unlock(mm);
done:
        if (likely(!(fault & VM_FAULT_ERROR)))
                return;

        if (fatal_signal_pending(current) && !user_mode(regs)) {
                kernelmode_fixup_or_oops(regs, error_code, address,
                                         0, 0, ARCH_DEFAULT_PKEY);
                return;
        }

        if (fault & VM_FAULT_OOM) {
                /* Kernel mode? Handle exceptions or die: */
                if (!user_mode(regs)) {
                        kernelmode_fixup_or_oops(regs, error_code, address,
                                                 SIGSEGV, SEGV_MAPERR,
                                                 ARCH_DEFAULT_PKEY);
                        return;
                }

                /*
                 * We ran out of memory, call the OOM killer, and return the
                 * userspace (which will retry the fault, or kill us if we got
                 * oom-killed):
                 */
                pagefault_out_of_memory();
        } else {
                if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
                             VM_FAULT_HWPOISON_LARGE))
                        do_sigbus(regs, error_code, address, fault);
                else if (fault & VM_FAULT_SIGSEGV)
                        bad_area_nosemaphore(regs, error_code, address);
                else
                        BUG();
        }
}
NOKPROBE_SYMBOL(do_user_addr_fault);

static __always_inline void
trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
                         unsigned long address)
{
        if (!trace_pagefault_enabled())
                return;

        if (user_mode(regs))
                trace_page_fault_user(address, regs, error_code);
        else
                trace_page_fault_kernel(address, regs, error_code);
}

static __always_inline void
handle_page_fault(struct pt_regs *regs, unsigned long error_code,
                              unsigned long address)
{
        trace_page_fault_entries(regs, error_code, address);

        if (unlikely(kmmio_fault(regs, address)))
                return;

        /* Was the fault on kernel-controlled part of the address space? */
        if (unlikely(fault_in_kernel_space(address))) {
                do_kern_addr_fault(regs, error_code, address);
        } else {
                do_user_addr_fault(regs, error_code, address);
                /*
                 * User address page fault handling might have reenabled
                 * interrupts. Fixing up all potential exit points of
                 * do_user_addr_fault() and its leaf functions is just not
                 * doable w/o creating an unholy mess or turning the code
                 * upside down.
                 */
                local_irq_disable();
        }
}

DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
        irqentry_state_t state;
        unsigned long address;

        address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();

        prefetchw(&current->mm->mmap_lock);

        /*
         * KVM uses #PF vector to deliver 'page not present' events to guests
         * (asynchronous page fault mechanism). The event happens when a
         * userspace task is trying to access some valid (from guest's point of
         * view) memory which is not currently mapped by the host (e.g. the
         * memory is swapped out). Note, the corresponding "page ready" event
         * which is injected when the memory becomes available, is delivered via
         * an interrupt mechanism and not a #PF exception
         * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
         *
         * We are relying on the interrupted context being sane (valid RSP,
         * relevant locks not held, etc.), which is fine as long as the
         * interrupted context had IF=1.  We are also relying on the KVM
         * async pf type field and CR2 being read consistently instead of
         * getting values from real and async page faults mixed up.
         *
         * Fingers crossed.
         *
         * The async #PF handling code takes care of idtentry handling
         * itself.
         */
        if (kvm_handle_async_pf(regs, (u32)address))
                return;

        /*
         * Entry handling for valid #PF from kernel mode is slightly
         * different: RCU is already watching and ct_irq_enter() must not
         * be invoked because a kernel fault on a user space address might
         * sleep.
         *
         * In case the fault hit a RCU idle region the conditional entry
         * code reenabled RCU to avoid subsequent wreckage which helps
         * debuggability.
         */
        state = irqentry_enter(regs);

        instrumentation_begin();
        handle_page_fault(regs, error_code, address);
        instrumentation_end();

        irqentry_exit(regs, state);
}



































































  247 




  247 




  246 



  246 



  247 


























   26 

   26 









    5 
    4 
    5 

































































   19 



   18 












    5 



    4 
    5 
    4 

    4 

    5 
    4 
    4 

    2 




    5 










  247 



































   17 








   17 







   17 







   17 





   17 
   17 



   17 



   17 







   17 





    4 



    4 

















































    4 



    4 





    6 



    6 





  236 



  234 





  233 



  233 













   18 


   19 





   19 





    5 






    1 


    4 





    4 





    4 




    5 
    5 








   22 








   21 

   12 










   19 



   20 




   20 







   20 





   20 




   20 





   19 

    1 



   18 




















    1 

    2 

    2 

    2 

    2 
   22 







   20 


   19 

    1 



   18 






    2 




   17 


   17 


   19 





   27 







   27 

    1 
    1 


   26 




   26 

   26 
   26 



   26 






   25 
   26 




   26 



   33 




   33 

   32 

   31 

   30 


   29 

   21 
   33 











   24 

    1 



   22 




   23 




   23 
   17 
    2 



   18 




   16 





   21 













   21 










   21 




   20 

   23 







   19 




   19 








   20 





   18 


















    6 

    5 

    1 



    4 




    4 
    1 



    3 
    1 




    2 




    2 
    2 

    1 


    5 











   18 

    1 





   17 
    3 
    3 



   16 

    1 



   15 





   15 
   15 
   12 


   15 


   13 

   14 


    7 



    9 







    9 





    1 
    1 







    8 

   15 





    6 




    1 



    5 




    5 
    1 



    4 
    1 



    4 





    4 
    1 




    4 











    6 






    7 




    1 



    6 




    6 
    1 



    4 
    1 



    4 





    4 





    4 






    4 
    2 

    1 

    1 
    1 




    1 






    7 





    1 





    1 

    1 











    6 



    1 



    5 




    5 
    1 



    4 

    1 



    3 





    3 




    3 
    1 




    2 
















    2 










    2 




    2 

    6 







    4 




    3 

   39 






    4 




    4 
    3 



   16 



   16 

   21 








   17 

   16 

    1 



   15 




   15 











   17 

    1 



   16 




   16 


   18 



























    3 





    3 

    1 

    1 


    2 






    2 
    2 
    2 






    2 





    3 






   67 





   68 

   22 


   20 


   27 


   19 


   20 


   18 


    6 


    4 


   39 


   17 


   18 


    3 


    6 


    3 



    2 



    2 






   68 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
// SPDX-License-Identifier: GPL-2.0
/*
 * USB Raw Gadget driver.
 * See Documentation/usb/raw-gadget.rst for more details.
 *
 * Copyright (c) 2020 Google, Inc.
 * Author: Andrey Konovalov <andreyknvl@gmail.com>
 */

#include <linux/compiler.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/idr.h>
#include <linux/kref.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/semaphore.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/wait.h>

#include <linux/usb.h>
#include <linux/usb/ch9.h>
#include <linux/usb/ch11.h>
#include <linux/usb/gadget.h>
#include <linux/usb/composite.h>

#include <uapi/linux/usb/raw_gadget.h>

#define        DRIVER_DESC "USB Raw Gadget"
#define DRIVER_NAME "raw-gadget"

MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_AUTHOR("Andrey Konovalov");
MODULE_LICENSE("GPL");

/*----------------------------------------------------------------------*/

static DEFINE_IDA(driver_id_numbers);
#define DRIVER_DRIVER_NAME_LENGTH_MAX        32

#define RAW_EVENT_QUEUE_SIZE        16

struct raw_event_queue {
        /* See the comment in raw_event_queue_fetch() for locking details. */
        spinlock_t                lock;
        struct semaphore        sema;
        struct usb_raw_event        *events[RAW_EVENT_QUEUE_SIZE];
        int                        size;
};

static void raw_event_queue_init(struct raw_event_queue *queue)
{
        spin_lock_init(&queue->lock);
        sema_init(&queue->sema, 0);
        queue->size = 0;
}

static int raw_event_queue_add(struct raw_event_queue *queue,
        enum usb_raw_event_type type, size_t length, const void *data)
{
        unsigned long flags;
        struct usb_raw_event *event;

        spin_lock_irqsave(&queue->lock, flags);
        if (queue->size >= RAW_EVENT_QUEUE_SIZE) {
                spin_unlock_irqrestore(&queue->lock, flags);
                return -ENOMEM;
        }
        event = kmalloc(sizeof(*event) + length, GFP_ATOMIC);
        if (!event) {
                spin_unlock_irqrestore(&queue->lock, flags);
                return -ENOMEM;
        }
        event->type = type;
        event->length = length;
        if (event->length)
                memcpy(&event->data[0], data, length);
        queue->events[queue->size] = event;
        queue->size++;
        up(&queue->sema);
        spin_unlock_irqrestore(&queue->lock, flags);
        return 0;
}

static struct usb_raw_event *raw_event_queue_fetch(
                                struct raw_event_queue *queue)
{
        int ret;
        unsigned long flags;
        struct usb_raw_event *event;

        /*
         * This function can be called concurrently. We first check that
         * there's at least one event queued by decrementing the semaphore,
         * and then take the lock to protect queue struct fields.
         */
        ret = down_interruptible(&queue->sema);
        if (ret)
                return ERR_PTR(ret);
        spin_lock_irqsave(&queue->lock, flags);
        /*
         * queue->size must have the same value as queue->sema counter (before
         * the down_interruptible() call above), so this check is a fail-safe.
         */
        if (WARN_ON(!queue->size)) {
                spin_unlock_irqrestore(&queue->lock, flags);
                return ERR_PTR(-ENODEV);
        }
        event = queue->events[0];
        queue->size--;
        memmove(&queue->events[0], &queue->events[1],
                        queue->size * sizeof(queue->events[0]));
        spin_unlock_irqrestore(&queue->lock, flags);
        return event;
}

static void raw_event_queue_destroy(struct raw_event_queue *queue)
{
        int i;

        for (i = 0; i < queue->size; i++)
                kfree(queue->events[i]);
        queue->size = 0;
}

/*----------------------------------------------------------------------*/

struct raw_dev;

enum ep_state {
        STATE_EP_DISABLED,
        STATE_EP_ENABLED,
};

struct raw_ep {
        struct raw_dev                *dev;
        enum ep_state                state;
        struct usb_ep                *ep;
        u8                        addr;
        struct usb_request        *req;
        bool                        urb_queued;
        bool                        disabling;
        ssize_t                        status;
};

enum dev_state {
        STATE_DEV_INVALID = 0,
        STATE_DEV_OPENED,
        STATE_DEV_INITIALIZED,
        STATE_DEV_REGISTERING,
        STATE_DEV_RUNNING,
        STATE_DEV_CLOSED,
        STATE_DEV_FAILED
};

struct raw_dev {
        struct kref                        count;
        spinlock_t                        lock;

        const char                        *udc_name;
        struct usb_gadget_driver        driver;

        /* Reference to misc device: */
        struct device                        *dev;

        /* Make driver names unique */
        int                                driver_id_number;

        /* Protected by lock: */
        enum dev_state                        state;
        bool                                gadget_registered;
        struct usb_gadget                *gadget;
        struct usb_request                *req;
        bool                                ep0_in_pending;
        bool                                ep0_out_pending;
        bool                                ep0_urb_queued;
        ssize_t                                ep0_status;
        struct raw_ep                        eps[USB_RAW_EPS_NUM_MAX];
        int                                eps_num;

        struct completion                ep0_done;
        struct raw_event_queue                queue;
};

static struct raw_dev *dev_new(void)
{
        struct raw_dev *dev;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return NULL;
        /* Matches kref_put() in raw_release(). */
        kref_init(&dev->count);
        spin_lock_init(&dev->lock);
        init_completion(&dev->ep0_done);
        raw_event_queue_init(&dev->queue);
        dev->driver_id_number = -1;
        return dev;
}

static void dev_free(struct kref *kref)
{
        struct raw_dev *dev = container_of(kref, struct raw_dev, count);
        int i;

        kfree(dev->udc_name);
        kfree(dev->driver.udc_name);
        kfree(dev->driver.driver.name);
        if (dev->driver_id_number >= 0)
                ida_free(&driver_id_numbers, dev->driver_id_number);
        if (dev->req) {
                if (dev->ep0_urb_queued)
                        usb_ep_dequeue(dev->gadget->ep0, dev->req);
                usb_ep_free_request(dev->gadget->ep0, dev->req);
        }
        raw_event_queue_destroy(&dev->queue);
        for (i = 0; i < dev->eps_num; i++) {
                if (dev->eps[i].state == STATE_EP_DISABLED)
                        continue;
                usb_ep_disable(dev->eps[i].ep);
                usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req);
                kfree(dev->eps[i].ep->desc);
                dev->eps[i].state = STATE_EP_DISABLED;
        }
        kfree(dev);
}

/*----------------------------------------------------------------------*/

static int raw_queue_event(struct raw_dev *dev,
        enum usb_raw_event_type type, size_t length, const void *data)
{
        int ret = 0;
        unsigned long flags;

        ret = raw_event_queue_add(&dev->queue, type, length, data);
        if (ret < 0) {
                spin_lock_irqsave(&dev->lock, flags);
                dev->state = STATE_DEV_FAILED;
                spin_unlock_irqrestore(&dev->lock, flags);
        }
        return ret;
}

static void gadget_ep0_complete(struct usb_ep *ep, struct usb_request *req)
{
        struct raw_dev *dev = req->context;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (req->status)
                dev->ep0_status = req->status;
        else
                dev->ep0_status = req->actual;
        if (dev->ep0_in_pending)
                dev->ep0_in_pending = false;
        else
                dev->ep0_out_pending = false;
        spin_unlock_irqrestore(&dev->lock, flags);

        complete(&dev->ep0_done);
}

static u8 get_ep_addr(const char *name)
{
        /* If the endpoint has fixed function (named as e.g. "ep12out-bulk"),
         * parse the endpoint address from its name. We deliberately use
         * deprecated simple_strtoul() function here, as the number isn't
         * followed by '\0' nor '\n'.
         */
        if (isdigit(name[2]))
                return simple_strtoul(&name[2], NULL, 10);
        /* Otherwise the endpoint is configurable (named as e.g. "ep-a"). */
        return USB_RAW_EP_ADDR_ANY;
}

static int gadget_bind(struct usb_gadget *gadget,
                        struct usb_gadget_driver *driver)
{
        int ret = 0, i = 0;
        struct raw_dev *dev = container_of(driver, struct raw_dev, driver);
        struct usb_request *req;
        struct usb_ep *ep;
        unsigned long flags;

        if (strcmp(gadget->name, dev->udc_name) != 0)
                return -ENODEV;

        set_gadget_data(gadget, dev);
        req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL);
        if (!req) {
                dev_err(&gadget->dev, "usb_ep_alloc_request failed\n");
                set_gadget_data(gadget, NULL);
                return -ENOMEM;
        }

        spin_lock_irqsave(&dev->lock, flags);
        dev->req = req;
        dev->req->context = dev;
        dev->req->complete = gadget_ep0_complete;
        dev->gadget = gadget;
        gadget_for_each_ep(ep, dev->gadget) {
                dev->eps[i].ep = ep;
                dev->eps[i].addr = get_ep_addr(ep->name);
                dev->eps[i].state = STATE_EP_DISABLED;
                i++;
        }
        dev->eps_num = i;
        spin_unlock_irqrestore(&dev->lock, flags);

        dev_dbg(&gadget->dev, "gadget connected\n");
        ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL);
        if (ret < 0) {
                dev_err(&gadget->dev, "failed to queue connect event\n");
                set_gadget_data(gadget, NULL);
                return ret;
        }

        /* Matches kref_put() in gadget_unbind(). */
        kref_get(&dev->count);
        return ret;
}

static void gadget_unbind(struct usb_gadget *gadget)
{
        struct raw_dev *dev = get_gadget_data(gadget);

        set_gadget_data(gadget, NULL);
        /* Matches kref_get() in gadget_bind(). */
        kref_put(&dev->count, dev_free);
}

static int gadget_setup(struct usb_gadget *gadget,
                        const struct usb_ctrlrequest *ctrl)
{
        int ret = 0;
        struct raw_dev *dev = get_gadget_data(gadget);
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_err(&gadget->dev, "ignoring, device is not running\n");
                ret = -ENODEV;
                goto out_unlock;
        }
        if (dev->ep0_in_pending || dev->ep0_out_pending) {
                dev_dbg(&gadget->dev, "stalling, request already pending\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength)
                dev->ep0_in_pending = true;
        else
                dev->ep0_out_pending = true;
        spin_unlock_irqrestore(&dev->lock, flags);

        ret = raw_queue_event(dev, USB_RAW_EVENT_CONTROL, sizeof(*ctrl), ctrl);
        if (ret < 0)
                dev_err(&gadget->dev, "failed to queue control event\n");
        goto out;

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
out:
        if (ret == 0 && ctrl->wLength == 0) {
                /*
                 * Return USB_GADGET_DELAYED_STATUS as a workaround to stop
                 * some UDC drivers (e.g. dwc3) from automatically proceeding
                 * with the status stage for 0-length transfers.
                 * Should be removed once all UDC drivers are fixed to always
                 * delay the status stage until a response is queued to EP0.
                 */
                return USB_GADGET_DELAYED_STATUS;
        }
        return ret;
}

static void gadget_disconnect(struct usb_gadget *gadget)
{
        struct raw_dev *dev = get_gadget_data(gadget);
        int ret;

        dev_dbg(&gadget->dev, "gadget disconnected\n");
        ret = raw_queue_event(dev, USB_RAW_EVENT_DISCONNECT, 0, NULL);
        if (ret < 0)
                dev_err(&gadget->dev, "failed to queue disconnect event\n");
}
static void gadget_suspend(struct usb_gadget *gadget)
{
        struct raw_dev *dev = get_gadget_data(gadget);
        int ret;

        dev_dbg(&gadget->dev, "gadget suspended\n");
        ret = raw_queue_event(dev, USB_RAW_EVENT_SUSPEND, 0, NULL);
        if (ret < 0)
                dev_err(&gadget->dev, "failed to queue suspend event\n");
}
static void gadget_resume(struct usb_gadget *gadget)
{
        struct raw_dev *dev = get_gadget_data(gadget);
        int ret;

        dev_dbg(&gadget->dev, "gadget resumed\n");
        ret = raw_queue_event(dev, USB_RAW_EVENT_RESUME, 0, NULL);
        if (ret < 0)
                dev_err(&gadget->dev, "failed to queue resume event\n");
}
static void gadget_reset(struct usb_gadget *gadget)
{
        struct raw_dev *dev = get_gadget_data(gadget);
        int ret;

        dev_dbg(&gadget->dev, "gadget reset\n");
        ret = raw_queue_event(dev, USB_RAW_EVENT_RESET, 0, NULL);
        if (ret < 0)
                dev_err(&gadget->dev, "failed to queue reset event\n");
}

/*----------------------------------------------------------------------*/

static struct miscdevice raw_misc_device;

static int raw_open(struct inode *inode, struct file *fd)
{
        struct raw_dev *dev;

        /* Nonblocking I/O is not supported yet. */
        if (fd->f_flags & O_NONBLOCK)
                return -EINVAL;

        dev = dev_new();
        if (!dev)
                return -ENOMEM;
        fd->private_data = dev;
        dev->state = STATE_DEV_OPENED;
        dev->dev = raw_misc_device.this_device;
        return 0;
}

static int raw_release(struct inode *inode, struct file *fd)
{
        int ret = 0;
        struct raw_dev *dev = fd->private_data;
        unsigned long flags;
        bool unregister = false;

        spin_lock_irqsave(&dev->lock, flags);
        dev->state = STATE_DEV_CLOSED;
        if (!dev->gadget) {
                spin_unlock_irqrestore(&dev->lock, flags);
                goto out_put;
        }
        if (dev->gadget_registered)
                unregister = true;
        dev->gadget_registered = false;
        spin_unlock_irqrestore(&dev->lock, flags);

        if (unregister) {
                ret = usb_gadget_unregister_driver(&dev->driver);
                if (ret != 0)
                        dev_err(dev->dev,
                                "usb_gadget_unregister_driver() failed with %d\n",
                                ret);
                /* Matches kref_get() in raw_ioctl_run(). */
                kref_put(&dev->count, dev_free);
        }

out_put:
        /* Matches dev_new() in raw_open(). */
        kref_put(&dev->count, dev_free);
        return ret;
}

/*----------------------------------------------------------------------*/

static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        int driver_id_number;
        struct usb_raw_init arg;
        char *udc_driver_name;
        char *udc_device_name;
        char *driver_driver_name;
        unsigned long flags;

        if (copy_from_user(&arg, (void __user *)value, sizeof(arg)))
                return -EFAULT;

        switch (arg.speed) {
        case USB_SPEED_UNKNOWN:
                arg.speed = USB_SPEED_HIGH;
                break;
        case USB_SPEED_LOW:
        case USB_SPEED_FULL:
        case USB_SPEED_HIGH:
        case USB_SPEED_SUPER:
                break;
        default:
                return -EINVAL;
        }

        driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL);
        if (driver_id_number < 0)
                return driver_id_number;

        driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL);
        if (!driver_driver_name) {
                ret = -ENOMEM;
                goto out_free_driver_id_number;
        }
        snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX,
                                DRIVER_NAME ".%d", driver_id_number);

        udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
        if (!udc_driver_name) {
                ret = -ENOMEM;
                goto out_free_driver_driver_name;
        }
        ret = strscpy(udc_driver_name, &arg.driver_name[0],
                                UDC_NAME_LENGTH_MAX);
        if (ret < 0)
                goto out_free_udc_driver_name;
        ret = 0;

        udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
        if (!udc_device_name) {
                ret = -ENOMEM;
                goto out_free_udc_driver_name;
        }
        ret = strscpy(udc_device_name, &arg.device_name[0],
                                UDC_NAME_LENGTH_MAX);
        if (ret < 0)
                goto out_free_udc_device_name;
        ret = 0;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_OPENED) {
                dev_dbg(dev->dev, "fail, device is not opened\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        dev->udc_name = udc_driver_name;

        dev->driver.function = DRIVER_DESC;
        dev->driver.max_speed = arg.speed;
        dev->driver.setup = gadget_setup;
        dev->driver.disconnect = gadget_disconnect;
        dev->driver.bind = gadget_bind;
        dev->driver.unbind = gadget_unbind;
        dev->driver.suspend = gadget_suspend;
        dev->driver.resume = gadget_resume;
        dev->driver.reset = gadget_reset;
        dev->driver.driver.name = driver_driver_name;
        dev->driver.udc_name = udc_device_name;
        dev->driver.match_existing_only = 1;
        dev->driver_id_number = driver_id_number;

        dev->state = STATE_DEV_INITIALIZED;
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
out_free_udc_device_name:
        kfree(udc_device_name);
out_free_udc_driver_name:
        kfree(udc_driver_name);
out_free_driver_driver_name:
        kfree(driver_driver_name);
out_free_driver_id_number:
        ida_free(&driver_id_numbers, driver_id_number);
        return ret;
}

static int raw_ioctl_run(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        unsigned long flags;

        if (value)
                return -EINVAL;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_INITIALIZED) {
                dev_dbg(dev->dev, "fail, device is not initialized\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        dev->state = STATE_DEV_REGISTERING;
        spin_unlock_irqrestore(&dev->lock, flags);

        ret = usb_gadget_register_driver(&dev->driver);

        spin_lock_irqsave(&dev->lock, flags);
        if (ret) {
                dev_err(dev->dev,
                        "fail, usb_gadget_register_driver returned %d\n", ret);
                dev->state = STATE_DEV_FAILED;
                goto out_unlock;
        }
        dev->gadget_registered = true;
        dev->state = STATE_DEV_RUNNING;
        /* Matches kref_put() in raw_release(). */
        kref_get(&dev->count);

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_event_fetch(struct raw_dev *dev, unsigned long value)
{
        struct usb_raw_event arg;
        unsigned long flags;
        struct usb_raw_event *event;
        uint32_t length;

        if (copy_from_user(&arg, (void __user *)value, sizeof(arg)))
                return -EFAULT;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                spin_unlock_irqrestore(&dev->lock, flags);
                return -EINVAL;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                spin_unlock_irqrestore(&dev->lock, flags);
                return -EBUSY;
        }
        spin_unlock_irqrestore(&dev->lock, flags);

        event = raw_event_queue_fetch(&dev->queue);
        if (PTR_ERR(event) == -EINTR) {
                dev_dbg(&dev->gadget->dev, "event fetching interrupted\n");
                return -EINTR;
        }
        if (IS_ERR(event)) {
                dev_err(&dev->gadget->dev, "failed to fetch event\n");
                spin_lock_irqsave(&dev->lock, flags);
                dev->state = STATE_DEV_FAILED;
                spin_unlock_irqrestore(&dev->lock, flags);
                return -ENODEV;
        }
        length = min(arg.length, event->length);
        if (copy_to_user((void __user *)value, event, sizeof(*event) + length)) {
                kfree(event);
                return -EFAULT;
        }

        kfree(event);
        return 0;
}

static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr,
                                bool get_from_user)
{
        void *data;

        if (copy_from_user(io, ptr, sizeof(*io)))
                return ERR_PTR(-EFAULT);
        if (io->ep >= USB_RAW_EPS_NUM_MAX)
                return ERR_PTR(-EINVAL);
        if (!usb_raw_io_flags_valid(io->flags))
                return ERR_PTR(-EINVAL);
        if (io->length > PAGE_SIZE)
                return ERR_PTR(-EINVAL);
        if (get_from_user)
                data = memdup_user(ptr + sizeof(*io), io->length);
        else {
                data = kmalloc(io->length, GFP_KERNEL);
                if (!data)
                        data = ERR_PTR(-ENOMEM);
        }
        return data;
}

static int raw_process_ep0_io(struct raw_dev *dev, struct usb_raw_ep_io *io,
                                void *data, bool in)
{
        int ret = 0;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (dev->ep0_urb_queued) {
                dev_dbg(&dev->gadget->dev, "fail, urb already queued\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if ((in && !dev->ep0_in_pending) ||
                        (!in && !dev->ep0_out_pending)) {
                dev_dbg(&dev->gadget->dev, "fail, wrong direction\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (WARN_ON(in && dev->ep0_out_pending)) {
                ret = -ENODEV;
                dev->state = STATE_DEV_FAILED;
                goto out_unlock;
        }
        if (WARN_ON(!in && dev->ep0_in_pending)) {
                ret = -ENODEV;
                dev->state = STATE_DEV_FAILED;
                goto out_unlock;
        }

        dev->req->buf = data;
        dev->req->length = io->length;
        dev->req->zero = usb_raw_io_flags_zero(io->flags);
        dev->ep0_urb_queued = true;
        spin_unlock_irqrestore(&dev->lock, flags);

        ret = usb_ep_queue(dev->gadget->ep0, dev->req, GFP_KERNEL);
        if (ret) {
                dev_err(&dev->gadget->dev,
                                "fail, usb_ep_queue returned %d\n", ret);
                spin_lock_irqsave(&dev->lock, flags);
                goto out_queue_failed;
        }

        ret = wait_for_completion_interruptible(&dev->ep0_done);
        if (ret) {
                dev_dbg(&dev->gadget->dev, "wait interrupted\n");
                usb_ep_dequeue(dev->gadget->ep0, dev->req);
                wait_for_completion(&dev->ep0_done);
                spin_lock_irqsave(&dev->lock, flags);
                if (dev->ep0_status == -ECONNRESET)
                        dev->ep0_status = -EINTR;
                goto out_interrupted;
        }

        spin_lock_irqsave(&dev->lock, flags);

out_interrupted:
        ret = dev->ep0_status;
out_queue_failed:
        dev->ep0_urb_queued = false;
out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_ep0_write(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        void *data;
        struct usb_raw_ep_io io;

        data = raw_alloc_io_data(&io, (void __user *)value, true);
        if (IS_ERR(data))
                return PTR_ERR(data);
        ret = raw_process_ep0_io(dev, &io, data, true);
        kfree(data);
        return ret;
}

static int raw_ioctl_ep0_read(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        void *data;
        struct usb_raw_ep_io io;
        unsigned int length;

        data = raw_alloc_io_data(&io, (void __user *)value, false);
        if (IS_ERR(data))
                return PTR_ERR(data);
        ret = raw_process_ep0_io(dev, &io, data, false);
        if (ret < 0)
                goto free;

        length = min(io.length, (unsigned int)ret);
        if (copy_to_user((void __user *)(value + sizeof(io)), data, length))
                ret = -EFAULT;
        else
                ret = length;
free:
        kfree(data);
        return ret;
}

static int raw_ioctl_ep0_stall(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        unsigned long flags;

        if (value)
                return -EINVAL;
        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (dev->ep0_urb_queued) {
                dev_dbg(&dev->gadget->dev, "fail, urb already queued\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (!dev->ep0_in_pending && !dev->ep0_out_pending) {
                dev_dbg(&dev->gadget->dev, "fail, no request pending\n");
                ret = -EBUSY;
                goto out_unlock;
        }

        ret = usb_ep_set_halt(dev->gadget->ep0);
        if (ret < 0)
                dev_err(&dev->gadget->dev,
                                "fail, usb_ep_set_halt returned %d\n", ret);

        if (dev->ep0_in_pending)
                dev->ep0_in_pending = false;
        else
                dev->ep0_out_pending = false;

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_ep_enable(struct raw_dev *dev, unsigned long value)
{
        int ret = 0, i;
        unsigned long flags;
        struct usb_endpoint_descriptor *desc;
        struct raw_ep *ep;
        bool ep_props_matched = false;

        desc = memdup_user((void __user *)value, sizeof(*desc));
        if (IS_ERR(desc))
                return PTR_ERR(desc);

        /*
         * Endpoints with a maxpacket length of 0 can cause crashes in UDC
         * drivers.
         */
        if (usb_endpoint_maxp(desc) == 0) {
                dev_dbg(dev->dev, "fail, bad endpoint maxpacket\n");
                kfree(desc);
                return -EINVAL;
        }

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_free;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_free;
        }

        for (i = 0; i < dev->eps_num; i++) {
                ep = &dev->eps[i];
                if (ep->addr != usb_endpoint_num(desc) &&
                                ep->addr != USB_RAW_EP_ADDR_ANY)
                        continue;
                if (!usb_gadget_ep_match_desc(dev->gadget, ep->ep, desc, NULL))
                        continue;
                ep_props_matched = true;
                if (ep->state != STATE_EP_DISABLED)
                        continue;
                ep->ep->desc = desc;
                ret = usb_ep_enable(ep->ep);
                if (ret < 0) {
                        dev_err(&dev->gadget->dev,
                                "fail, usb_ep_enable returned %d\n", ret);
                        goto out_free;
                }
                ep->req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC);
                if (!ep->req) {
                        dev_err(&dev->gadget->dev,
                                "fail, usb_ep_alloc_request failed\n");
                        usb_ep_disable(ep->ep);
                        ret = -ENOMEM;
                        goto out_free;
                }
                ep->state = STATE_EP_ENABLED;
                ep->ep->driver_data = ep;
                ret = i;
                goto out_unlock;
        }

        if (!ep_props_matched) {
                dev_dbg(&dev->gadget->dev, "fail, bad endpoint descriptor\n");
                ret = -EINVAL;
        } else {
                dev_dbg(&dev->gadget->dev, "fail, no endpoints available\n");
                ret = -EBUSY;
        }

out_free:
        kfree(desc);
out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_ep_disable(struct raw_dev *dev, unsigned long value)
{
        int ret = 0, i = value;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (i < 0 || i >= dev->eps_num) {
                dev_dbg(dev->dev, "fail, invalid endpoint\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (dev->eps[i].state == STATE_EP_DISABLED) {
                dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (dev->eps[i].disabling) {
                dev_dbg(&dev->gadget->dev,
                                "fail, disable already in progress\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (dev->eps[i].urb_queued) {
                dev_dbg(&dev->gadget->dev,
                                "fail, waiting for urb completion\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        dev->eps[i].disabling = true;
        spin_unlock_irqrestore(&dev->lock, flags);

        usb_ep_disable(dev->eps[i].ep);

        spin_lock_irqsave(&dev->lock, flags);
        usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req);
        kfree(dev->eps[i].ep->desc);
        dev->eps[i].state = STATE_EP_DISABLED;
        dev->eps[i].disabling = false;

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_ep_set_clear_halt_wedge(struct raw_dev *dev,
                unsigned long value, bool set, bool halt)
{
        int ret = 0, i = value;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (i < 0 || i >= dev->eps_num) {
                dev_dbg(dev->dev, "fail, invalid endpoint\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (dev->eps[i].state == STATE_EP_DISABLED) {
                dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (dev->eps[i].disabling) {
                dev_dbg(&dev->gadget->dev,
                                "fail, disable is in progress\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (dev->eps[i].urb_queued) {
                dev_dbg(&dev->gadget->dev,
                                "fail, waiting for urb completion\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (usb_endpoint_xfer_isoc(dev->eps[i].ep->desc)) {
                dev_dbg(&dev->gadget->dev,
                                "fail, can't halt/wedge ISO endpoint\n");
                ret = -EINVAL;
                goto out_unlock;
        }

        if (set && halt) {
                ret = usb_ep_set_halt(dev->eps[i].ep);
                if (ret < 0)
                        dev_err(&dev->gadget->dev,
                                "fail, usb_ep_set_halt returned %d\n", ret);
        } else if (!set && halt) {
                ret = usb_ep_clear_halt(dev->eps[i].ep);
                if (ret < 0)
                        dev_err(&dev->gadget->dev,
                                "fail, usb_ep_clear_halt returned %d\n", ret);
        } else if (set && !halt) {
                ret = usb_ep_set_wedge(dev->eps[i].ep);
                if (ret < 0)
                        dev_err(&dev->gadget->dev,
                                "fail, usb_ep_set_wedge returned %d\n", ret);
        }

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static void gadget_ep_complete(struct usb_ep *ep, struct usb_request *req)
{
        struct raw_ep *r_ep = (struct raw_ep *)ep->driver_data;
        struct raw_dev *dev = r_ep->dev;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (req->status)
                r_ep->status = req->status;
        else
                r_ep->status = req->actual;
        spin_unlock_irqrestore(&dev->lock, flags);

        complete((struct completion *)req->context);
}

static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io,
                                void *data, bool in)
{
        int ret = 0;
        unsigned long flags;
        struct raw_ep *ep;
        DECLARE_COMPLETION_ONSTACK(done);

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (io->ep >= dev->eps_num) {
                dev_dbg(&dev->gadget->dev, "fail, invalid endpoint\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        ep = &dev->eps[io->ep];
        if (ep->state != STATE_EP_ENABLED) {
                dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (ep->disabling) {
                dev_dbg(&dev->gadget->dev,
                                "fail, endpoint is already being disabled\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (ep->urb_queued) {
                dev_dbg(&dev->gadget->dev, "fail, urb already queued\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        if (in != usb_endpoint_dir_in(ep->ep->desc)) {
                dev_dbg(&dev->gadget->dev, "fail, wrong direction\n");
                ret = -EINVAL;
                goto out_unlock;
        }

        ep->dev = dev;
        ep->req->context = &done;
        ep->req->complete = gadget_ep_complete;
        ep->req->buf = data;
        ep->req->length = io->length;
        ep->req->zero = usb_raw_io_flags_zero(io->flags);
        ep->urb_queued = true;
        spin_unlock_irqrestore(&dev->lock, flags);

        ret = usb_ep_queue(ep->ep, ep->req, GFP_KERNEL);
        if (ret) {
                dev_err(&dev->gadget->dev,
                                "fail, usb_ep_queue returned %d\n", ret);
                spin_lock_irqsave(&dev->lock, flags);
                goto out_queue_failed;
        }

        ret = wait_for_completion_interruptible(&done);
        if (ret) {
                dev_dbg(&dev->gadget->dev, "wait interrupted\n");
                usb_ep_dequeue(ep->ep, ep->req);
                wait_for_completion(&done);
                spin_lock_irqsave(&dev->lock, flags);
                if (ep->status == -ECONNRESET)
                        ep->status = -EINTR;
                goto out_interrupted;
        }

        spin_lock_irqsave(&dev->lock, flags);

out_interrupted:
        ret = ep->status;
out_queue_failed:
        ep->urb_queued = false;
out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_ep_write(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        char *data;
        struct usb_raw_ep_io io;

        data = raw_alloc_io_data(&io, (void __user *)value, true);
        if (IS_ERR(data))
                return PTR_ERR(data);
        ret = raw_process_ep_io(dev, &io, data, true);
        kfree(data);
        return ret;
}

static int raw_ioctl_ep_read(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        char *data;
        struct usb_raw_ep_io io;
        unsigned int length;

        data = raw_alloc_io_data(&io, (void __user *)value, false);
        if (IS_ERR(data))
                return PTR_ERR(data);
        ret = raw_process_ep_io(dev, &io, data, false);
        if (ret < 0)
                goto free;

        length = min(io.length, (unsigned int)ret);
        if (copy_to_user((void __user *)(value + sizeof(io)), data, length))
                ret = -EFAULT;
        else
                ret = length;
free:
        kfree(data);
        return ret;
}

static int raw_ioctl_configure(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        unsigned long flags;

        if (value)
                return -EINVAL;
        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        usb_gadget_set_state(dev->gadget, USB_STATE_CONFIGURED);

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static int raw_ioctl_vbus_draw(struct raw_dev *dev, unsigned long value)
{
        int ret = 0;
        unsigned long flags;

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                goto out_unlock;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                goto out_unlock;
        }
        usb_gadget_vbus_draw(dev->gadget, 2 * value);

out_unlock:
        spin_unlock_irqrestore(&dev->lock, flags);
        return ret;
}

static void fill_ep_caps(struct usb_ep_caps *caps,
                                struct usb_raw_ep_caps *raw_caps)
{
        raw_caps->type_control = caps->type_control;
        raw_caps->type_iso = caps->type_iso;
        raw_caps->type_bulk = caps->type_bulk;
        raw_caps->type_int = caps->type_int;
        raw_caps->dir_in = caps->dir_in;
        raw_caps->dir_out = caps->dir_out;
}

static void fill_ep_limits(struct usb_ep *ep, struct usb_raw_ep_limits *limits)
{
        limits->maxpacket_limit = ep->maxpacket_limit;
        limits->max_streams = ep->max_streams;
}

static int raw_ioctl_eps_info(struct raw_dev *dev, unsigned long value)
{
        int ret = 0, i;
        unsigned long flags;
        struct usb_raw_eps_info *info;
        struct raw_ep *ep;

        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
                ret = -ENOMEM;
                goto out;
        }

        spin_lock_irqsave(&dev->lock, flags);
        if (dev->state != STATE_DEV_RUNNING) {
                dev_dbg(dev->dev, "fail, device is not running\n");
                ret = -EINVAL;
                spin_unlock_irqrestore(&dev->lock, flags);
                goto out_free;
        }
        if (!dev->gadget) {
                dev_dbg(dev->dev, "fail, gadget is not bound\n");
                ret = -EBUSY;
                spin_unlock_irqrestore(&dev->lock, flags);
                goto out_free;
        }

        for (i = 0; i < dev->eps_num; i++) {
                ep = &dev->eps[i];
                strscpy(&info->eps[i].name[0], ep->ep->name,
                                USB_RAW_EP_NAME_MAX);
                info->eps[i].addr = ep->addr;
                fill_ep_caps(&ep->ep->caps, &info->eps[i].caps);
                fill_ep_limits(ep->ep, &info->eps[i].limits);
        }
        ret = dev->eps_num;
        spin_unlock_irqrestore(&dev->lock, flags);

        if (copy_to_user((void __user *)value, info, sizeof(*info)))
                ret = -EFAULT;

out_free:
        kfree(info);
out:
        return ret;
}

static long raw_ioctl(struct file *fd, unsigned int cmd, unsigned long value)
{
        struct raw_dev *dev = fd->private_data;
        int ret = 0;

        if (!dev)
                return -EBUSY;

        switch (cmd) {
        case USB_RAW_IOCTL_INIT:
                ret = raw_ioctl_init(dev, value);
                break;
        case USB_RAW_IOCTL_RUN:
                ret = raw_ioctl_run(dev, value);
                break;
        case USB_RAW_IOCTL_EVENT_FETCH:
                ret = raw_ioctl_event_fetch(dev, value);
                break;
        case USB_RAW_IOCTL_EP0_WRITE:
                ret = raw_ioctl_ep0_write(dev, value);
                break;
        case USB_RAW_IOCTL_EP0_READ:
                ret = raw_ioctl_ep0_read(dev, value);
                break;
        case USB_RAW_IOCTL_EP_ENABLE:
                ret = raw_ioctl_ep_enable(dev, value);
                break;
        case USB_RAW_IOCTL_EP_DISABLE:
                ret = raw_ioctl_ep_disable(dev, value);
                break;
        case USB_RAW_IOCTL_EP_WRITE:
                ret = raw_ioctl_ep_write(dev, value);
                break;
        case USB_RAW_IOCTL_EP_READ:
                ret = raw_ioctl_ep_read(dev, value);
                break;
        case USB_RAW_IOCTL_CONFIGURE:
                ret = raw_ioctl_configure(dev, value);
                break;
        case USB_RAW_IOCTL_VBUS_DRAW:
                ret = raw_ioctl_vbus_draw(dev, value);
                break;
        case USB_RAW_IOCTL_EPS_INFO:
                ret = raw_ioctl_eps_info(dev, value);
                break;
        case USB_RAW_IOCTL_EP0_STALL:
                ret = raw_ioctl_ep0_stall(dev, value);
                break;
        case USB_RAW_IOCTL_EP_SET_HALT:
                ret = raw_ioctl_ep_set_clear_halt_wedge(
                                        dev, value, true, true);
                break;
        case USB_RAW_IOCTL_EP_CLEAR_HALT:
                ret = raw_ioctl_ep_set_clear_halt_wedge(
                                        dev, value, false, true);
                break;
        case USB_RAW_IOCTL_EP_SET_WEDGE:
                ret = raw_ioctl_ep_set_clear_halt_wedge(
                                        dev, value, true, false);
                break;
        default:
                ret = -EINVAL;
        }

        return ret;
}

/*----------------------------------------------------------------------*/

static const struct file_operations raw_fops = {
        .open =                        raw_open,
        .unlocked_ioctl =        raw_ioctl,
        .compat_ioctl =                raw_ioctl,
        .release =                raw_release,
        .llseek =                no_llseek,
};

static struct miscdevice raw_misc_device = {
        .minor = MISC_DYNAMIC_MINOR,
        .name = DRIVER_NAME,
        .fops = &raw_fops,
};

module_misc_device(raw_misc_device);
























    4 





























  242 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM skb

#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SKB_H

#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/tracepoint.h>

#undef FN
#define FN(reason)        TRACE_DEFINE_ENUM(SKB_DROP_REASON_##reason);
DEFINE_DROP_REASON(FN, FN)

#undef FN
#undef FNe
#define FN(reason)        { SKB_DROP_REASON_##reason, #reason },
#define FNe(reason)        { SKB_DROP_REASON_##reason, #reason }

/*
 * Tracepoint for free an sk_buff:
 */
TRACE_EVENT(kfree_skb,

        TP_PROTO(struct sk_buff *skb, void *location,
                 enum skb_drop_reason reason),

        TP_ARGS(skb, location, reason),

        TP_STRUCT__entry(
                __field(void *,                skbaddr)
                __field(void *,                location)
                __field(unsigned short,        protocol)
                __field(enum skb_drop_reason,        reason)
        ),

        TP_fast_assign(
                __entry->skbaddr = skb;
                __entry->location = location;
                __entry->protocol = ntohs(skb->protocol);
                __entry->reason = reason;
        ),

        TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s",
                  __entry->skbaddr, __entry->protocol, __entry->location,
                  __print_symbolic(__entry->reason,
                                   DEFINE_DROP_REASON(FN, FNe)))
);

#undef FN
#undef FNe

TRACE_EVENT(consume_skb,

        TP_PROTO(struct sk_buff *skb, void *location),

        TP_ARGS(skb, location),

        TP_STRUCT__entry(
                __field(        void *,        skbaddr)
                __field(        void *,        location)
        ),

        TP_fast_assign(
                __entry->skbaddr = skb;
                __entry->location = location;
        ),

        TP_printk("skbaddr=%p location=%pS", __entry->skbaddr, __entry->location)
);

TRACE_EVENT(skb_copy_datagram_iovec,

        TP_PROTO(const struct sk_buff *skb, int len),

        TP_ARGS(skb, len),

        TP_STRUCT__entry(
                __field(        const void *,                skbaddr                )
                __field(        int,                        len                )
        ),

        TP_fast_assign(
                __entry->skbaddr = skb;
                __entry->len = len;
        ),

        TP_printk("skbaddr=%p len=%d", __entry->skbaddr, __entry->len)
);

#endif /* _TRACE_SKB_H */

/* This part must be outside protection */
#include <trace/define_trace.h>



















  234 









  234 
  233 














  241 

  240 
   18 
  233 





  190 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/lib/kasprintf.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/stdarg.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/string.h>

/* Simplified asprintf. */
char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
{
        unsigned int first, second;
        char *p;
        va_list aq;

        va_copy(aq, ap);
        first = vsnprintf(NULL, 0, fmt, aq);
        va_end(aq);

        p = kmalloc_track_caller(first+1, gfp);
        if (!p)
                return NULL;

        second = vsnprintf(p, first+1, fmt, ap);
        WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)",
             first, second, fmt);

        return p;
}
EXPORT_SYMBOL(kvasprintf);

/*
 * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt
 * (or the sole vararg) points to rodata, we will then save a memory
 * allocation and string copy. In any case, the return value should be
 * freed using kfree_const().
 */
const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap)
{
        if (!strchr(fmt, '%'))
                return kstrdup_const(fmt, gfp);
        if (!strcmp(fmt, "%s"))
                return kstrdup_const(va_arg(ap, const char*), gfp);
        return kvasprintf(gfp, fmt, ap);
}
EXPORT_SYMBOL(kvasprintf_const);

char *kasprintf(gfp_t gfp, const char *fmt, ...)
{
        va_list ap;
        char *p;

        va_start(ap, fmt);
        p = kvasprintf(gfp, fmt, ap);
        va_end(ap);

        return p;
}
EXPORT_SYMBOL(kasprintf);















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * attribute_container.h - a generic container for all classes
 *
 * Copyright (c) 2005 - James Bottomley <James.Bottomley@steeleye.com>
 */

#ifndef _ATTRIBUTE_CONTAINER_H_
#define _ATTRIBUTE_CONTAINER_H_

#include <linux/list.h>
#include <linux/klist.h>

struct device;

struct attribute_container {
        struct list_head        node;
        struct klist                containers;
        struct class                *class;
        const struct attribute_group *grp;
        struct device_attribute **attrs;
        int (*match)(struct attribute_container *, struct device *);
#define        ATTRIBUTE_CONTAINER_NO_CLASSDEVS        0x01
        unsigned long                flags;
};

static inline int
attribute_container_no_classdevs(struct attribute_container *atc)
{
        return atc->flags & ATTRIBUTE_CONTAINER_NO_CLASSDEVS;
}

static inline void
attribute_container_set_no_classdevs(struct attribute_container *atc)
{
        atc->flags |= ATTRIBUTE_CONTAINER_NO_CLASSDEVS;
}

int attribute_container_register(struct attribute_container *cont);
int __must_check attribute_container_unregister(struct attribute_container *cont);
void attribute_container_create_device(struct device *dev,
                                       int (*fn)(struct attribute_container *,
                                                 struct device *,
                                                 struct device *));
void attribute_container_add_device(struct device *dev,
                                    int (*fn)(struct attribute_container *,
                                              struct device *,
                                              struct device *));
void attribute_container_remove_device(struct device *dev,
                                       void (*fn)(struct attribute_container *,
                                                  struct device *,
                                                  struct device *));
void attribute_container_device_trigger(struct device *dev, 
                                        int (*fn)(struct attribute_container *,
                                                  struct device *,
                                                  struct device *));
int attribute_container_device_trigger_safe(struct device *dev,
                                            int (*fn)(struct attribute_container *,
                                                      struct device *,
                                                      struct device *),
                                            int (*undo)(struct attribute_container *,
                                                        struct device *,
                                                        struct device *));
void attribute_container_trigger(struct device *dev, 
                                 int (*fn)(struct attribute_container *,
                                           struct device *));
int attribute_container_add_attrs(struct device *classdev);
int attribute_container_add_class_device(struct device *classdev);
int attribute_container_add_class_device_adapter(struct attribute_container *cont,
                                                 struct device *dev,
                                                 struct device *classdev);
void attribute_container_remove_attrs(struct device *classdev);
void attribute_container_class_device_del(struct device *classdev);
struct attribute_container *attribute_container_classdev_to_container(struct device *);
struct device *attribute_container_find_class_device(struct attribute_container *, struct device *);
struct device_attribute **attribute_container_classdev_to_attrs(const struct device *classdev);

#endif





























































































































































































































































































































   28 




























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

/*
 * Hopefully this will be a rather complete VT102 implementation.
 *
 * Beeping thanks to John T Kohl.
 *
 * Virtual Consoles, Screen Blanking, Screen Dumping, Color, Graphics
 *   Chars, and VT100 enhancements by Peter MacDonald.
 *
 * Copy and paste function by Andrew Haylett,
 *   some enhancements by Alessandro Rubini.
 *
 * Code to check for different video-cards mostly by Galen Hunt,
 * <g-hunt@ee.utah.edu>
 *
 * Rudimentary ISO 10646/Unicode/UTF-8 character set support by
 * Markus Kuhn, <mskuhn@immd4.informatik.uni-erlangen.de>.
 *
 * Dynamic allocation of consoles, aeb@cwi.nl, May 1994
 * Resizing of consoles, aeb, 940926
 *
 * Code for xterm like mouse click reporting by Peter Orbaek 20-Jul-94
 * <poe@daimi.aau.dk>
 *
 * User-defined bell sound, new setterm control sequences and printk
 * redirection by Martin Mares <mj@k332.feld.cvut.cz> 19-Nov-95
 *
 * APM screenblank bug fixed Takashi Manabe <manabe@roy.dsl.tutics.tut.jp>
 *
 * Merge with the abstract console driver by Geert Uytterhoeven
 * <geert@linux-m68k.org>, Jan 1997.
 *
 *   Original m68k console driver modifications by
 *
 *     - Arno Griffioen <arno@usn.nl>
 *     - David Carter <carter@cs.bris.ac.uk>
 * 
 *   The abstract console driver provides a generic interface for a text
 *   console. It supports VGA text mode, frame buffer based graphical consoles
 *   and special graphics processors that are only accessible through some
 *   registers (e.g. a TMS340x0 GSP).
 *
 *   The interface to the hardware is specified using a special structure
 *   (struct consw) which contains function pointers to console operations
 *   (see <linux/console.h> for more information).
 *
 * Support for changeable cursor shape
 * by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>, August 1997
 *
 * Ported to i386 and con_scrolldelta fixed
 * by Emmanuel Marty <core@ggi-project.org>, April 1998
 *
 * Resurrected character buffers in videoram plus lots of other trickery
 * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998
 *
 * Removed old-style timers, introduced console_timer, made timer
 * deletion SMP-safe.  17Jun00, Andrew Morton
 *
 * Removed console_lock, enabled interrupts across all console operations
 * 13 March 2001, Andrew Morton
 *
 * Fixed UTF-8 mode so alternate charset modes always work according
 * to control sequences interpreted in do_con_trol function
 * preserving backward VT100 semigraphics compatibility,
 * malformed UTF sequences represented as sequences of replacement glyphs,
 * original codes or '?' as a last resort if replacement glyph is undefined
 * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched/signal.h>
#include <linux/tty.h>
#include <linux/tty_flip.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kd.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/major.h>
#include <linux/mm.h>
#include <linux/console.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/vt_kern.h>
#include <linux/selection.h>
#include <linux/tiocl.h>
#include <linux/kbd_kern.h>
#include <linux/consolemap.h>
#include <linux/timer.h>
#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/pm.h>
#include <linux/font.h>
#include <linux/bitops.h>
#include <linux/notifier.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/kdb.h>
#include <linux/ctype.h>
#include <linux/bsearch.h>
#include <linux/gcd.h>

#define MAX_NR_CON_DRIVER 16

#define CON_DRIVER_FLAG_MODULE 1
#define CON_DRIVER_FLAG_INIT   2
#define CON_DRIVER_FLAG_ATTR   4
#define CON_DRIVER_FLAG_ZOMBIE 8

struct con_driver {
        const struct consw *con;
        const char *desc;
        struct device *dev;
        int node;
        int first;
        int last;
        int flag;
};

static struct con_driver registered_con_driver[MAX_NR_CON_DRIVER];
const struct consw *conswitchp;

/*
 * Here is the default bell parameters: 750HZ, 1/8th of a second
 */
#define DEFAULT_BELL_PITCH        750
#define DEFAULT_BELL_DURATION        (HZ/8)
#define DEFAULT_CURSOR_BLINK_MS        200

struct vc vc_cons [MAX_NR_CONSOLES];
EXPORT_SYMBOL(vc_cons);

static const struct consw *con_driver_map[MAX_NR_CONSOLES];

static int con_open(struct tty_struct *, struct file *);
static void vc_init(struct vc_data *vc, int do_clear);
static void gotoxy(struct vc_data *vc, int new_x, int new_y);
static void save_cur(struct vc_data *vc);
static void reset_terminal(struct vc_data *vc, int do_clear);
static void con_flush_chars(struct tty_struct *tty);
static int set_vesa_blanking(u8 __user *mode);
static void set_cursor(struct vc_data *vc);
static void hide_cursor(struct vc_data *vc);
static void console_callback(struct work_struct *ignored);
static void con_driver_unregister_callback(struct work_struct *ignored);
static void blank_screen_t(struct timer_list *unused);
static void set_palette(struct vc_data *vc);
static void unblank_screen(void);

#define vt_get_kmsg_redirect() vt_kmsg_redirect(-1)

int default_utf8 = true;
module_param(default_utf8, int, S_IRUGO | S_IWUSR);
int global_cursor_default = -1;
module_param(global_cursor_default, int, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL(global_cursor_default);

static int cur_default = CUR_UNDERLINE;
module_param(cur_default, int, S_IRUGO | S_IWUSR);

/*
 * ignore_poke: don't unblank the screen when things are typed.  This is
 * mainly for the privacy of braille terminal users.
 */
static int ignore_poke;

int do_poke_blanked_console;
int console_blanked;
EXPORT_SYMBOL(console_blanked);

static enum vesa_blank_mode vesa_blank_mode;
static int vesa_off_interval;
static int blankinterval;
core_param(consoleblank, blankinterval, int, 0444);

static DECLARE_WORK(console_work, console_callback);
static DECLARE_WORK(con_driver_unregister_work, con_driver_unregister_callback);

/*
 * fg_console is the current virtual console,
 * last_console is the last used one,
 * want_console is the console we want to switch to,
 * saved_* variants are for save/restore around kernel debugger enter/leave
 */
int fg_console;
EXPORT_SYMBOL(fg_console);
int last_console;
int want_console = -1;

static int saved_fg_console;
static int saved_last_console;
static int saved_want_console;
static int saved_vc_mode;
static int saved_console_blanked;

/*
 * For each existing display, we have a pointer to console currently visible
 * on that display, allowing consoles other than fg_console to be refreshed
 * appropriately. Unless the low-level driver supplies its own display_fg
 * variable, we use this one for the "master display".
 */
static struct vc_data *master_display_fg;

/*
 * Unfortunately, we need to delay tty echo when we're currently writing to the
 * console since the code is (and always was) not re-entrant, so we schedule
 * all flip requests to process context with schedule-task() and run it from
 * console_callback().
 */

/*
 * For the same reason, we defer scrollback to the console callback.
 */
static int scrollback_delta;

/*
 * Hook so that the power management routines can (un)blank
 * the console on our behalf.
 */
int (*console_blank_hook)(int);
EXPORT_SYMBOL(console_blank_hook);

static DEFINE_TIMER(console_timer, blank_screen_t);
static int blank_state;
static int blank_timer_expired;
enum {
        blank_off = 0,
        blank_normal_wait,
        blank_vesa_wait,
};

/*
 * /sys/class/tty/tty0/
 *
 * the attribute 'active' contains the name of the current vc
 * console and it supports poll() to detect vc switches
 */
static struct device *tty0dev;

/*
 * Notifier list for console events.
 */
static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);

int register_vt_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_register(&vt_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(register_vt_notifier);

int unregister_vt_notifier(struct notifier_block *nb)
{
        return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(unregister_vt_notifier);

static void notify_write(struct vc_data *vc, unsigned int unicode)
{
        struct vt_notifier_param param = { .vc = vc, .c = unicode };
        atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
}

static void notify_update(struct vc_data *vc)
{
        struct vt_notifier_param param = { .vc = vc };
        atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
}
/*
 *        Low-Level Functions
 */

static inline bool con_is_fg(const struct vc_data *vc)
{
        return vc->vc_num == fg_console;
}

static inline bool con_should_update(const struct vc_data *vc)
{
        return con_is_visible(vc) && !console_blanked;
}

static inline u16 *screenpos(const struct vc_data *vc, unsigned int offset,
                             bool viewed)
{
        unsigned long origin = viewed ? vc->vc_visible_origin : vc->vc_origin;

        return (u16 *)(origin + offset);
}

static void con_putc(struct vc_data *vc, u16 ca, unsigned int y, unsigned int x)
{
        if (vc->vc_sw->con_putc)
                vc->vc_sw->con_putc(vc, ca, y, x);
        else
                vc->vc_sw->con_putcs(vc, &ca, 1, y, x);
}

/* Called  from the keyboard irq path.. */
static inline void scrolldelta(int lines)
{
        /* FIXME */
        /* scrolldelta needs some kind of consistency lock, but the BKL was
           and still is not protecting versus the scheduled back end */
        scrollback_delta += lines;
        schedule_console_callback();
}

void schedule_console_callback(void)
{
        schedule_work(&console_work);
}

/*
 * Code to manage unicode-based screen buffers
 */

/*
 * Our screen buffer is preceded by an array of line pointers so that
 * scrolling only implies some pointer shuffling.
 */

static u32 **vc_uniscr_alloc(unsigned int cols, unsigned int rows)
{
        u32 **uni_lines;
        void *p;
        unsigned int memsize, i, col_size = cols * sizeof(**uni_lines);

        /* allocate everything in one go */
        memsize = col_size * rows;
        memsize += rows * sizeof(*uni_lines);
        uni_lines = vzalloc(memsize);
        if (!uni_lines)
                return NULL;

        /* initial line pointers */
        p = uni_lines + rows;
        for (i = 0; i < rows; i++) {
                uni_lines[i] = p;
                p += col_size;
        }

        return uni_lines;
}

static void vc_uniscr_free(u32 **uni_lines)
{
        vfree(uni_lines);
}

static void vc_uniscr_set(struct vc_data *vc, u32 **new_uni_lines)
{
        vc_uniscr_free(vc->vc_uni_lines);
        vc->vc_uni_lines = new_uni_lines;
}

static void vc_uniscr_putc(struct vc_data *vc, u32 uc)
{
        if (vc->vc_uni_lines)
                vc->vc_uni_lines[vc->state.y][vc->state.x] = uc;
}

static void vc_uniscr_insert(struct vc_data *vc, unsigned int nr)
{
        if (vc->vc_uni_lines) {
                u32 *ln = vc->vc_uni_lines[vc->state.y];
                unsigned int x = vc->state.x, cols = vc->vc_cols;

                memmove(&ln[x + nr], &ln[x], (cols - x - nr) * sizeof(*ln));
                memset32(&ln[x], ' ', nr);
        }
}

static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr)
{
        if (vc->vc_uni_lines) {
                u32 *ln = vc->vc_uni_lines[vc->state.y];
                unsigned int x = vc->state.x, cols = vc->vc_cols;

                memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln));
                memset32(&ln[cols - nr], ' ', nr);
        }
}

static void vc_uniscr_clear_line(struct vc_data *vc, unsigned int x,
                                 unsigned int nr)
{
        if (vc->vc_uni_lines)
                memset32(&vc->vc_uni_lines[vc->state.y][x], ' ', nr);
}

static void vc_uniscr_clear_lines(struct vc_data *vc, unsigned int y,
                                  unsigned int nr)
{
        if (vc->vc_uni_lines)
                while (nr--)
                        memset32(vc->vc_uni_lines[y++], ' ', vc->vc_cols);
}

/* juggling array rotation algorithm (complexity O(N), size complexity O(1)) */
static void juggle_array(u32 **array, unsigned int size, unsigned int nr)
{
        unsigned int gcd_idx;

        for (gcd_idx = 0; gcd_idx < gcd(nr, size); gcd_idx++) {
                u32 *gcd_idx_val = array[gcd_idx];
                unsigned int dst_idx = gcd_idx;

                while (1) {
                        unsigned int src_idx = (dst_idx + nr) % size;
                        if (src_idx == gcd_idx)
                                break;

                        array[dst_idx] = array[src_idx];
                        dst_idx = src_idx;
                }

                array[dst_idx] = gcd_idx_val;
        }
}

static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top,
                             unsigned int bottom, enum con_scroll dir,
                             unsigned int nr)
{
        u32 **uni_lines = vc->vc_uni_lines;
        unsigned int size = bottom - top;

        if (!uni_lines)
                return;

        if (dir == SM_DOWN) {
                juggle_array(&uni_lines[top], size, size - nr);
                vc_uniscr_clear_lines(vc, top, nr);
        } else {
                juggle_array(&uni_lines[top], size, nr);
                vc_uniscr_clear_lines(vc, bottom - nr, nr);
        }
}

static void vc_uniscr_copy_area(u32 **dst_lines,
                                unsigned int dst_cols,
                                unsigned int dst_rows,
                                u32 **src_lines,
                                unsigned int src_cols,
                                unsigned int src_top_row,
                                unsigned int src_bot_row)
{
        unsigned int dst_row = 0;

        if (!dst_lines)
                return;

        while (src_top_row < src_bot_row) {
                u32 *src_line = src_lines[src_top_row];
                u32 *dst_line = dst_lines[dst_row];

                memcpy(dst_line, src_line, src_cols * sizeof(*src_line));
                if (dst_cols - src_cols)
                        memset32(dst_line + src_cols, ' ', dst_cols - src_cols);
                src_top_row++;
                dst_row++;
        }
        while (dst_row < dst_rows) {
                u32 *dst_line = dst_lines[dst_row];

                memset32(dst_line, ' ', dst_cols);
                dst_row++;
        }
}

/*
 * Called from vcs_read() to make sure unicode screen retrieval is possible.
 * This will initialize the unicode screen buffer if not already done.
 * This returns 0 if OK, or a negative error code otherwise.
 * In particular, -ENODATA is returned if the console is not in UTF-8 mode.
 */
int vc_uniscr_check(struct vc_data *vc)
{
        u32 **uni_lines;
        unsigned short *p;
        int x, y, mask;

        WARN_CONSOLE_UNLOCKED();

        if (!vc->vc_utf)
                return -ENODATA;

        if (vc->vc_uni_lines)
                return 0;

        uni_lines = vc_uniscr_alloc(vc->vc_cols, vc->vc_rows);
        if (!uni_lines)
                return -ENOMEM;

        /*
         * Let's populate it initially with (imperfect) reverse translation.
         * This is the next best thing we can do short of having it enabled
         * from the start even when no users rely on this functionality. True
         * unicode content will be available after a complete screen refresh.
         */
        p = (unsigned short *)vc->vc_origin;
        mask = vc->vc_hi_font_mask | 0xff;
        for (y = 0; y < vc->vc_rows; y++) {
                u32 *line = uni_lines[y];
                for (x = 0; x < vc->vc_cols; x++) {
                        u16 glyph = scr_readw(p++) & mask;
                        line[x] = inverse_translate(vc, glyph, true);
                }
        }

        vc->vc_uni_lines = uni_lines;

        return 0;
}

/*
 * Called from vcs_read() to get the unicode data from the screen.
 * This must be preceded by a successful call to vc_uniscr_check() once
 * the console lock has been taken.
 */
void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed,
                         unsigned int row, unsigned int col, unsigned int nr)
{
        u32 **uni_lines = vc->vc_uni_lines;
        int offset = row * vc->vc_size_row + col * 2;
        unsigned long pos;

        if (WARN_ON_ONCE(!uni_lines))
                return;

        pos = (unsigned long)screenpos(vc, offset, viewed);
        if (pos >= vc->vc_origin && pos < vc->vc_scr_end) {
                /*
                 * Desired position falls in the main screen buffer.
                 * However the actual row/col might be different if
                 * scrollback is active.
                 */
                row = (pos - vc->vc_origin) / vc->vc_size_row;
                col = ((pos - vc->vc_origin) % vc->vc_size_row) / 2;
                memcpy(dest, &uni_lines[row][col], nr * sizeof(u32));
        } else {
                /*
                 * Scrollback is active. For now let's simply backtranslate
                 * the screen glyphs until the unicode screen buffer does
                 * synchronize with console display drivers for a scrollback
                 * buffer of its own.
                 */
                u16 *p = (u16 *)pos;
                int mask = vc->vc_hi_font_mask | 0xff;
                u32 *uni_buf = dest;
                while (nr--) {
                        u16 glyph = scr_readw(p++) & mask;
                        *uni_buf++ = inverse_translate(vc, glyph, true);
                }
        }
}

static void con_scroll(struct vc_data *vc, unsigned int top,
                       unsigned int bottom, enum con_scroll dir,
                       unsigned int nr)
{
        unsigned int rows = bottom - top;
        u16 *clear, *dst, *src;

        if (top + nr >= bottom)
                nr = rows - 1;
        if (bottom > vc->vc_rows || top >= bottom || nr < 1)
                return;

        vc_uniscr_scroll(vc, top, bottom, dir, nr);
        if (con_is_visible(vc) &&
                        vc->vc_sw->con_scroll(vc, top, bottom, dir, nr))
                return;

        src = clear = (u16 *)(vc->vc_origin + vc->vc_size_row * top);
        dst = (u16 *)(vc->vc_origin + vc->vc_size_row * (top + nr));

        if (dir == SM_UP) {
                clear = src + (rows - nr) * vc->vc_cols;
                swap(src, dst);
        }
        scr_memmovew(dst, src, (rows - nr) * vc->vc_size_row);
        scr_memsetw(clear, vc->vc_video_erase_char, vc->vc_size_row * nr);
}

static void do_update_region(struct vc_data *vc, unsigned long start, int count)
{
        unsigned int xx, yy, offset;
        u16 *p = (u16 *)start;

        offset = (start - vc->vc_origin) / 2;
        xx = offset % vc->vc_cols;
        yy = offset / vc->vc_cols;

        for(;;) {
                u16 attrib = scr_readw(p) & 0xff00;
                int startx = xx;
                u16 *q = p;
                while (xx < vc->vc_cols && count) {
                        if (attrib != (scr_readw(p) & 0xff00)) {
                                if (p > q)
                                        vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
                                startx = xx;
                                q = p;
                                attrib = scr_readw(p) & 0xff00;
                        }
                        p++;
                        xx++;
                        count--;
                }
                if (p > q)
                        vc->vc_sw->con_putcs(vc, q, p-q, yy, startx);
                if (!count)
                        break;
                xx = 0;
                yy++;
        }
}

void update_region(struct vc_data *vc, unsigned long start, int count)
{
        WARN_CONSOLE_UNLOCKED();

        if (con_should_update(vc)) {
                hide_cursor(vc);
                do_update_region(vc, start, count);
                set_cursor(vc);
        }
}
EXPORT_SYMBOL(update_region);

/* Structure of attributes is hardware-dependent */

static u8 build_attr(struct vc_data *vc, u8 _color,
                enum vc_intensity _intensity, bool _blink, bool _underline,
                bool _reverse, bool _italic)
{
        if (vc->vc_sw->con_build_attr)
                return vc->vc_sw->con_build_attr(vc, _color, _intensity,
                       _blink, _underline, _reverse, _italic);

/*
 * ++roman: I completely changed the attribute format for monochrome
 * mode (!can_do_color). The formerly used MDA (monochrome display
 * adapter) format didn't allow the combination of certain effects.
 * Now the attribute is just a bit vector:
 *  Bit 0..1: intensity (0..2)
 *  Bit 2   : underline
 *  Bit 3   : reverse
 *  Bit 7   : blink
 */
        {
        u8 a = _color;
        if (!vc->vc_can_do_color)
                return _intensity |
                       (_italic    << 1) |
                       (_underline << 2) |
                       (_reverse   << 3) |
                       (_blink     << 7);
        if (_italic)
                a = (a & 0xF0) | vc->vc_itcolor;
        else if (_underline)
                a = (a & 0xf0) | vc->vc_ulcolor;
        else if (_intensity == VCI_HALF_BRIGHT)
                a = (a & 0xf0) | vc->vc_halfcolor;
        if (_reverse)
                a = (a & 0x88) | (((a >> 4) | (a << 4)) & 0x77);
        if (_blink)
                a ^= 0x80;
        if (_intensity == VCI_BOLD)
                a ^= 0x08;
        if (vc->vc_hi_font_mask == 0x100)
                a <<= 1;
        return a;
        }
}

static void update_attr(struct vc_data *vc)
{
        vc->vc_attr = build_attr(vc, vc->state.color, vc->state.intensity,
                      vc->state.blink, vc->state.underline,
                      vc->state.reverse ^ vc->vc_decscnm, vc->state.italic);
        vc->vc_video_erase_char = ' ' | (build_attr(vc, vc->state.color,
                                VCI_NORMAL, vc->state.blink, false,
                                vc->vc_decscnm, false) << 8);
}

/* Note: inverting the screen twice should revert to the original state */
void invert_screen(struct vc_data *vc, int offset, int count, bool viewed)
{
        u16 *p;

        WARN_CONSOLE_UNLOCKED();

        count /= 2;
        p = screenpos(vc, offset, viewed);
        if (vc->vc_sw->con_invert_region) {
                vc->vc_sw->con_invert_region(vc, p, count);
        } else {
                u16 *q = p;
                int cnt = count;
                u16 a;

                if (!vc->vc_can_do_color) {
                        while (cnt--) {
                            a = scr_readw(q);
                            a ^= 0x0800;
                            scr_writew(a, q);
                            q++;
                        }
                } else if (vc->vc_hi_font_mask == 0x100) {
                        while (cnt--) {
                                a = scr_readw(q);
                                a = (a & 0x11ff) |
                                   ((a & 0xe000) >> 4) |
                                   ((a & 0x0e00) << 4);
                                scr_writew(a, q);
                                q++;
                        }
                } else {
                        while (cnt--) {
                                a = scr_readw(q);
                                a = (a & 0x88ff) |
                                   ((a & 0x7000) >> 4) |
                                   ((a & 0x0700) << 4);
                                scr_writew(a, q);
                                q++;
                        }
                }
        }

        if (con_should_update(vc))
                do_update_region(vc, (unsigned long) p, count);
        notify_update(vc);
}

/* used by selection: complement pointer position */
void complement_pos(struct vc_data *vc, int offset)
{
        static int old_offset = -1;
        static unsigned short old;
        static unsigned short oldx, oldy;

        WARN_CONSOLE_UNLOCKED();

        if (old_offset != -1 && old_offset >= 0 &&
            old_offset < vc->vc_screenbuf_size) {
                scr_writew(old, screenpos(vc, old_offset, true));
                if (con_should_update(vc))
                        con_putc(vc, old, oldy, oldx);
                notify_update(vc);
        }

        old_offset = offset;

        if (offset != -1 && offset >= 0 &&
            offset < vc->vc_screenbuf_size) {
                unsigned short new;
                u16 *p = screenpos(vc, offset, true);
                old = scr_readw(p);
                new = old ^ vc->vc_complement_mask;
                scr_writew(new, p);
                if (con_should_update(vc)) {
                        oldx = (offset >> 1) % vc->vc_cols;
                        oldy = (offset >> 1) / vc->vc_cols;
                        con_putc(vc, new, oldy, oldx);
                }
                notify_update(vc);
        }
}

static void insert_char(struct vc_data *vc, unsigned int nr)
{
        unsigned short *p = (unsigned short *) vc->vc_pos;

        vc_uniscr_insert(vc, nr);
        scr_memmovew(p + nr, p, (vc->vc_cols - vc->state.x - nr) * 2);
        scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
        vc->vc_need_wrap = 0;
        if (con_should_update(vc))
                do_update_region(vc, (unsigned long) p,
                        vc->vc_cols - vc->state.x);
}

static void delete_char(struct vc_data *vc, unsigned int nr)
{
        unsigned short *p = (unsigned short *) vc->vc_pos;

        vc_uniscr_delete(vc, nr);
        scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
        scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char,
                        nr * 2);
        vc->vc_need_wrap = 0;
        if (con_should_update(vc))
                do_update_region(vc, (unsigned long) p,
                        vc->vc_cols - vc->state.x);
}

static int softcursor_original = -1;

static void add_softcursor(struct vc_data *vc)
{
        int i = scr_readw((u16 *) vc->vc_pos);
        u32 type = vc->vc_cursor_type;

        if (!(type & CUR_SW))
                return;
        if (softcursor_original != -1)
                return;
        softcursor_original = i;
        i |= CUR_SET(type);
        i ^= CUR_CHANGE(type);
        if ((type & CUR_ALWAYS_BG) &&
                        (softcursor_original & CUR_BG) == (i & CUR_BG))
                i ^= CUR_BG;
        if ((type & CUR_INVERT_FG_BG) && (i & CUR_FG) == ((i & CUR_BG) >> 4))
                i ^= CUR_FG;
        scr_writew(i, (u16 *)vc->vc_pos);
        if (con_should_update(vc))
                con_putc(vc, i, vc->state.y, vc->state.x);
}

static void hide_softcursor(struct vc_data *vc)
{
        if (softcursor_original != -1) {
                scr_writew(softcursor_original, (u16 *)vc->vc_pos);
                if (con_should_update(vc))
                        con_putc(vc, softcursor_original, vc->state.y,
                                 vc->state.x);
                softcursor_original = -1;
        }
}

static void hide_cursor(struct vc_data *vc)
{
        if (vc_is_sel(vc))
                clear_selection();

        vc->vc_sw->con_cursor(vc, false);
        hide_softcursor(vc);
}

static void set_cursor(struct vc_data *vc)
{
        if (!con_is_fg(vc) || console_blanked || vc->vc_mode == KD_GRAPHICS)
                return;
        if (vc->vc_deccm) {
                if (vc_is_sel(vc))
                        clear_selection();
                add_softcursor(vc);
                if (CUR_SIZE(vc->vc_cursor_type) != CUR_NONE)
                        vc->vc_sw->con_cursor(vc, true);
        } else
                hide_cursor(vc);
}

static void set_origin(struct vc_data *vc)
{
        WARN_CONSOLE_UNLOCKED();

        if (!con_is_visible(vc) ||
            !vc->vc_sw->con_set_origin ||
            !vc->vc_sw->con_set_origin(vc))
                vc->vc_origin = (unsigned long)vc->vc_screenbuf;
        vc->vc_visible_origin = vc->vc_origin;
        vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size;
        vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->state.y +
                2 * vc->state.x;
}

static void save_screen(struct vc_data *vc)
{
        WARN_CONSOLE_UNLOCKED();

        if (vc->vc_sw->con_save_screen)
                vc->vc_sw->con_save_screen(vc);
}

static void flush_scrollback(struct vc_data *vc)
{
        WARN_CONSOLE_UNLOCKED();

        set_origin(vc);
        if (!con_is_visible(vc))
                return;

        /*
         * The legacy way for flushing the scrollback buffer is to use a side
         * effect of the con_switch method. We do it only on the foreground
         * console as background consoles have no scrollback buffers in that
         * case and we obviously don't want to switch to them.
         */
        hide_cursor(vc);
        vc->vc_sw->con_switch(vc);
        set_cursor(vc);
}

/*
 *        Redrawing of screen
 */

void clear_buffer_attributes(struct vc_data *vc)
{
        unsigned short *p = (unsigned short *)vc->vc_origin;
        int count = vc->vc_screenbuf_size / 2;
        int mask = vc->vc_hi_font_mask | 0xff;

        for (; count > 0; count--, p++) {
                scr_writew((scr_readw(p)&mask) | (vc->vc_video_erase_char & ~mask), p);
        }
}

void redraw_screen(struct vc_data *vc, int is_switch)
{
        int redraw = 0;

        WARN_CONSOLE_UNLOCKED();

        if (!vc) {
                /* strange ... */
                /* printk("redraw_screen: tty %d not allocated ??\n", new_console+1); */
                return;
        }

        if (is_switch) {
                struct vc_data *old_vc = vc_cons[fg_console].d;
                if (old_vc == vc)
                        return;
                if (!con_is_visible(vc))
                        redraw = 1;
                *vc->vc_display_fg = vc;
                fg_console = vc->vc_num;
                hide_cursor(old_vc);
                if (!con_is_visible(old_vc)) {
                        save_screen(old_vc);
                        set_origin(old_vc);
                }
                if (tty0dev)
                        sysfs_notify(&tty0dev->kobj, NULL, "active");
        } else {
                hide_cursor(vc);
                redraw = 1;
        }

        if (redraw) {
                bool update;
                int old_was_color = vc->vc_can_do_color;

                set_origin(vc);
                update = vc->vc_sw->con_switch(vc);
                set_palette(vc);
                /*
                 * If console changed from mono<->color, the best we can do
                 * is to clear the buffer attributes. As it currently stands,
                 * rebuilding new attributes from the old buffer is not doable
                 * without overly complex code.
                 */
                if (old_was_color != vc->vc_can_do_color) {
                        update_attr(vc);
                        clear_buffer_attributes(vc);
                }

                if (update && vc->vc_mode != KD_GRAPHICS)
                        do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
        }
        set_cursor(vc);
        if (is_switch) {
                vt_set_leds_compute_shiftstate();
                notify_update(vc);
        }
}
EXPORT_SYMBOL(redraw_screen);

/*
 *        Allocation, freeing and resizing of VTs.
 */

int vc_cons_allocated(unsigned int i)
{
        return (i < MAX_NR_CONSOLES && vc_cons[i].d);
}

static void visual_init(struct vc_data *vc, int num, bool init)
{
        /* ++Geert: vc->vc_sw->con_init determines console size */
        if (vc->vc_sw)
                module_put(vc->vc_sw->owner);
        vc->vc_sw = conswitchp;

        if (con_driver_map[num])
                vc->vc_sw = con_driver_map[num];

        __module_get(vc->vc_sw->owner);
        vc->vc_num = num;
        vc->vc_display_fg = &master_display_fg;
        if (vc->uni_pagedict_loc)
                con_free_unimap(vc);
        vc->uni_pagedict_loc = &vc->uni_pagedict;
        vc->uni_pagedict = NULL;
        vc->vc_hi_font_mask = 0;
        vc->vc_complement_mask = 0;
        vc->vc_can_do_color = 0;
        vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
        vc->vc_sw->con_init(vc, init);
        if (!vc->vc_complement_mask)
                vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
        vc->vc_s_complement_mask = vc->vc_complement_mask;
        vc->vc_size_row = vc->vc_cols << 1;
        vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row;
}


static void visual_deinit(struct vc_data *vc)
{
        vc->vc_sw->con_deinit(vc);
        module_put(vc->vc_sw->owner);
}

static void vc_port_destruct(struct tty_port *port)
{
        struct vc_data *vc = container_of(port, struct vc_data, port);

        kfree(vc);
}

static const struct tty_port_operations vc_port_ops = {
        .destruct = vc_port_destruct,
};

/*
 * Change # of rows and columns (0 means unchanged/the size of fg_console)
 * [this is to be used together with some user program
 * like resize that changes the hardware videomode]
 */
#define VC_MAXCOL (32767)
#define VC_MAXROW (32767)

int vc_allocate(unsigned int currcons)        /* return 0 on success */
{
        struct vt_notifier_param param;
        struct vc_data *vc;
        int err;

        WARN_CONSOLE_UNLOCKED();

        if (currcons >= MAX_NR_CONSOLES)
                return -ENXIO;

        if (vc_cons[currcons].d)
                return 0;

        /* due to the granularity of kmalloc, we waste some memory here */
        /* the alloc is done in two steps, to optimize the common situation
           of a 25x80 console (structsize=216, screenbuf_size=4000) */
        /* although the numbers above are not valid since long ago, the
           point is still up-to-date and the comment still has its value
           even if only as a historical artifact.  --mj, July 1998 */
        param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
        if (!vc)
                return -ENOMEM;

        vc_cons[currcons].d = vc;
        tty_port_init(&vc->port);
        vc->port.ops = &vc_port_ops;
        INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);

        visual_init(vc, currcons, true);

        if (!*vc->uni_pagedict_loc)
                con_set_default_unimap(vc);

        err = -EINVAL;
        if (vc->vc_cols > VC_MAXCOL || vc->vc_rows > VC_MAXROW ||
            vc->vc_screenbuf_size > KMALLOC_MAX_SIZE || !vc->vc_screenbuf_size)
                goto err_free;
        err = -ENOMEM;
        vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL);
        if (!vc->vc_screenbuf)
                goto err_free;

        /* If no drivers have overridden us and the user didn't pass a
           boot option, default to displaying the cursor */
        if (global_cursor_default == -1)
                global_cursor_default = 1;

        vc_init(vc, 1);
        vcs_make_sysfs(currcons);
        atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);

        return 0;
err_free:
        visual_deinit(vc);
        kfree(vc);
        vc_cons[currcons].d = NULL;
        return err;
}

static inline int resize_screen(struct vc_data *vc, int width, int height,
                                bool from_user)
{
        /* Resizes the resolution of the display adapater */
        int err = 0;

        if (vc->vc_sw->con_resize)
                err = vc->vc_sw->con_resize(vc, width, height, from_user);

        return err;
}

/**
 * vc_do_resize - resizing method for the tty
 * @tty: tty being resized
 * @vc: virtual console private data
 * @cols: columns
 * @lines: lines
 * @from_user: invoked by a user?
 *
 * Resize a virtual console, clipping according to the actual constraints. If
 * the caller passes a tty structure then update the termios winsize
 * information and perform any necessary signal handling.
 *
 * Locking: Caller must hold the console semaphore. Takes the termios rwsem and
 * ctrl.lock of the tty IFF a tty is passed.
 */
static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
                        unsigned int cols, unsigned int lines, bool from_user)
{
        unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
        unsigned long end;
        unsigned int old_rows, old_row_size, first_copied_row;
        unsigned int new_cols, new_rows, new_row_size, new_screen_size;
        unsigned short *oldscreen, *newscreen;
        u32 **new_uniscr = NULL;

        WARN_CONSOLE_UNLOCKED();

        if (cols > VC_MAXCOL || lines > VC_MAXROW)
                return -EINVAL;

        new_cols = (cols ? cols : vc->vc_cols);
        new_rows = (lines ? lines : vc->vc_rows);
        new_row_size = new_cols << 1;
        new_screen_size = new_row_size * new_rows;

        if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) {
                /*
                 * This function is being called here to cover the case
                 * where the userspace calls the FBIOPUT_VSCREENINFO twice,
                 * passing the same fb_var_screeninfo containing the fields
                 * yres/xres equal to a number non-multiple of vc_font.height
                 * and yres_virtual/xres_virtual equal to number lesser than the
                 * vc_font.height and yres/xres.
                 * In the second call, the struct fb_var_screeninfo isn't
                 * being modified by the underlying driver because of the
                 * if above, and this causes the fbcon_display->vrows to become
                 * negative and it eventually leads to out-of-bound
                 * access by the imageblit function.
                 * To give the correct values to the struct and to not have
                 * to deal with possible errors from the code below, we call
                 * the resize_screen here as well.
                 */
                return resize_screen(vc, new_cols, new_rows, from_user);
        }

        if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size)
                return -EINVAL;
        newscreen = kzalloc(new_screen_size, GFP_USER);
        if (!newscreen)
                return -ENOMEM;

        if (vc->vc_uni_lines) {
                new_uniscr = vc_uniscr_alloc(new_cols, new_rows);
                if (!new_uniscr) {
                        kfree(newscreen);
                        return -ENOMEM;
                }
        }

        if (vc_is_sel(vc))
                clear_selection();

        old_rows = vc->vc_rows;
        old_row_size = vc->vc_size_row;

        err = resize_screen(vc, new_cols, new_rows, from_user);
        if (err) {
                kfree(newscreen);
                vc_uniscr_free(new_uniscr);
                return err;
        }

        vc->vc_rows = new_rows;
        vc->vc_cols = new_cols;
        vc->vc_size_row = new_row_size;
        vc->vc_screenbuf_size = new_screen_size;

        rlth = min(old_row_size, new_row_size);
        rrem = new_row_size - rlth;
        old_origin = vc->vc_origin;
        new_origin = (long) newscreen;
        new_scr_end = new_origin + new_screen_size;

        if (vc->state.y > new_rows) {
                if (old_rows - vc->state.y < new_rows) {
                        /*
                         * Cursor near the bottom, copy contents from the
                         * bottom of buffer
                         */
                        first_copied_row = (old_rows - new_rows);
                } else {
                        /*
                         * Cursor is in no man's land, copy 1/2 screenful
                         * from the top and bottom of cursor position
                         */
                        first_copied_row = (vc->state.y - new_rows/2);
                }
                old_origin += first_copied_row * old_row_size;
        } else
                first_copied_row = 0;
        end = old_origin + old_row_size * min(old_rows, new_rows);

        vc_uniscr_copy_area(new_uniscr, new_cols, new_rows,
                            vc->vc_uni_lines, rlth/2, first_copied_row,
                            min(old_rows, new_rows));
        vc_uniscr_set(vc, new_uniscr);

        update_attr(vc);

        while (old_origin < end) {
                scr_memcpyw((unsigned short *) new_origin,
                            (unsigned short *) old_origin, rlth);
                if (rrem)
                        scr_memsetw((void *)(new_origin + rlth),
                                    vc->vc_video_erase_char, rrem);
                old_origin += old_row_size;
                new_origin += new_row_size;
        }
        if (new_scr_end > new_origin)
                scr_memsetw((void *)new_origin, vc->vc_video_erase_char,
                            new_scr_end - new_origin);
        oldscreen = vc->vc_screenbuf;
        vc->vc_screenbuf = newscreen;
        vc->vc_screenbuf_size = new_screen_size;
        set_origin(vc);
        kfree(oldscreen);

        /* do part of a reset_terminal() */
        vc->vc_top = 0;
        vc->vc_bottom = vc->vc_rows;
        gotoxy(vc, vc->state.x, vc->state.y);
        save_cur(vc);

        if (tty) {
                /* Rewrite the requested winsize data with the actual
                   resulting sizes */
                struct winsize ws;
                memset(&ws, 0, sizeof(ws));
                ws.ws_row = vc->vc_rows;
                ws.ws_col = vc->vc_cols;
                ws.ws_ypixel = vc->vc_scan_lines;
                tty_do_resize(tty, &ws);
        }

        if (con_is_visible(vc))
                update_screen(vc);
        vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
        notify_update(vc);
        return err;
}

/**
 * __vc_resize - resize a VT
 * @vc: virtual console
 * @cols: columns
 * @rows: rows
 * @from_user: invoked by a user?
 *
 * Resize a virtual console as seen from the console end of things. We use the
 * common vc_do_resize() method to update the structures.
 *
 * Locking: The caller must hold the console sem to protect console internals
 * and @vc->port.tty.
 */
int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows,
                bool from_user)
{
        return vc_do_resize(vc->port.tty, vc, cols, rows, from_user);
}
EXPORT_SYMBOL(__vc_resize);

/**
 * vt_resize - resize a VT
 * @tty: tty to resize
 * @ws: winsize attributes
 *
 * Resize a virtual terminal. This is called by the tty layer as we register
 * our own handler for resizing. The mutual helper does all the actual work.
 *
 * Locking: Takes the console sem and the called methods then take the tty
 * termios_rwsem and the tty ctrl.lock in that order.
 */
static int vt_resize(struct tty_struct *tty, struct winsize *ws)
{
        struct vc_data *vc = tty->driver_data;
        int ret;

        console_lock();
        ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row, false);
        console_unlock();
        return ret;
}

struct vc_data *vc_deallocate(unsigned int currcons)
{
        struct vc_data *vc = NULL;

        WARN_CONSOLE_UNLOCKED();

        if (vc_cons_allocated(currcons)) {
                struct vt_notifier_param param;

                param.vc = vc = vc_cons[currcons].d;
                atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
                vcs_remove_sysfs(currcons);
                visual_deinit(vc);
                con_free_unimap(vc);
                put_pid(vc->vt_pid);
                vc_uniscr_set(vc, NULL);
                kfree(vc->vc_screenbuf);
                vc_cons[currcons].d = NULL;
        }
        return vc;
}

/*
 *        VT102 emulator
 */

enum { EPecma = 0, EPdec, EPeq, EPgt, EPlt};

#define set_kbd(vc, x)        vt_set_kbd_mode_bit((vc)->vc_num, (x))
#define clr_kbd(vc, x)        vt_clr_kbd_mode_bit((vc)->vc_num, (x))
#define is_kbd(vc, x)        vt_get_kbd_mode_bit((vc)->vc_num, (x))

#define decarm                VC_REPEAT
#define decckm                VC_CKMODE
#define kbdapplic        VC_APPLIC
#define lnm                VC_CRLF

const unsigned char color_table[] = { 0, 4, 2, 6, 1, 5, 3, 7,
                                       8,12,10,14, 9,13,11,15 };
EXPORT_SYMBOL(color_table);

/* the default colour table, for VGA+ colour systems */
unsigned char default_red[] = {
        0x00, 0xaa, 0x00, 0xaa, 0x00, 0xaa, 0x00, 0xaa,
        0x55, 0xff, 0x55, 0xff, 0x55, 0xff, 0x55, 0xff
};
module_param_array(default_red, byte, NULL, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL(default_red);

unsigned char default_grn[] = {
        0x00, 0x00, 0xaa, 0x55, 0x00, 0x00, 0xaa, 0xaa,
        0x55, 0x55, 0xff, 0xff, 0x55, 0x55, 0xff, 0xff
};
module_param_array(default_grn, byte, NULL, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL(default_grn);

unsigned char default_blu[] = {
        0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
        0x55, 0x55, 0x55, 0x55, 0xff, 0xff, 0xff, 0xff
};
module_param_array(default_blu, byte, NULL, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL(default_blu);

/*
 * gotoxy() must verify all boundaries, because the arguments
 * might also be negative. If the given position is out of
 * bounds, the cursor is placed at the nearest margin.
 */
static void gotoxy(struct vc_data *vc, int new_x, int new_y)
{
        int min_y, max_y;

        if (new_x < 0)
                vc->state.x = 0;
        else {
                if (new_x >= vc->vc_cols)
                        vc->state.x = vc->vc_cols - 1;
                else
                        vc->state.x = new_x;
        }

         if (vc->vc_decom) {
                min_y = vc->vc_top;
                max_y = vc->vc_bottom;
        } else {
                min_y = 0;
                max_y = vc->vc_rows;
        }
        if (new_y < min_y)
                vc->state.y = min_y;
        else if (new_y >= max_y)
                vc->state.y = max_y - 1;
        else
                vc->state.y = new_y;
        vc->vc_pos = vc->vc_origin + vc->state.y * vc->vc_size_row +
                (vc->state.x << 1);
        vc->vc_need_wrap = 0;
}

/* for absolute user moves, when decom is set */
static void gotoxay(struct vc_data *vc, int new_x, int new_y)
{
        gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y);
}

void scrollback(struct vc_data *vc)
{
        scrolldelta(-(vc->vc_rows / 2));
}

void scrollfront(struct vc_data *vc, int lines)
{
        if (!lines)
                lines = vc->vc_rows / 2;
        scrolldelta(lines);
}

static void lf(struct vc_data *vc)
{
            /* don't scroll if above bottom of scrolling region, or
         * if below scrolling region
         */
        if (vc->state.y + 1 == vc->vc_bottom)
                con_scroll(vc, vc->vc_top, vc->vc_bottom, SM_UP, 1);
        else if (vc->state.y < vc->vc_rows - 1) {
                vc->state.y++;
                vc->vc_pos += vc->vc_size_row;
        }
        vc->vc_need_wrap = 0;
        notify_write(vc, '\n');
}

static void ri(struct vc_data *vc)
{
            /* don't scroll if below top of scrolling region, or
         * if above scrolling region
         */
        if (vc->state.y == vc->vc_top)
                con_scroll(vc, vc->vc_top, vc->vc_bottom, SM_DOWN, 1);
        else if (vc->state.y > 0) {
                vc->state.y--;
                vc->vc_pos -= vc->vc_size_row;
        }
        vc->vc_need_wrap = 0;
}

static inline void cr(struct vc_data *vc)
{
        vc->vc_pos -= vc->state.x << 1;
        vc->vc_need_wrap = vc->state.x = 0;
        notify_write(vc, '\r');
}

static inline void bs(struct vc_data *vc)
{
        if (vc->state.x) {
                vc->vc_pos -= 2;
                vc->state.x--;
                vc->vc_need_wrap = 0;
                notify_write(vc, '\b');
        }
}

static inline void del(struct vc_data *vc)
{
        /* ignored */
}

enum CSI_J {
        CSI_J_CURSOR_TO_END        = 0,
        CSI_J_START_TO_CURSOR        = 1,
        CSI_J_VISIBLE                = 2,
        CSI_J_FULL                = 3,
};

static void csi_J(struct vc_data *vc, enum CSI_J vpar)
{
        unsigned short *start;
        unsigned int count;

        switch (vpar) {
        case CSI_J_CURSOR_TO_END:
                vc_uniscr_clear_line(vc, vc->state.x,
                                     vc->vc_cols - vc->state.x);
                vc_uniscr_clear_lines(vc, vc->state.y + 1,
                                      vc->vc_rows - vc->state.y - 1);
                count = (vc->vc_scr_end - vc->vc_pos) >> 1;
                start = (unsigned short *)vc->vc_pos;
                break;
        case CSI_J_START_TO_CURSOR:
                vc_uniscr_clear_line(vc, 0, vc->state.x + 1);
                vc_uniscr_clear_lines(vc, 0, vc->state.y);
                count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
                start = (unsigned short *)vc->vc_origin;
                break;
        case CSI_J_FULL:
                flush_scrollback(vc);
                fallthrough;
        case CSI_J_VISIBLE:
                vc_uniscr_clear_lines(vc, 0, vc->vc_rows);
                count = vc->vc_cols * vc->vc_rows;
                start = (unsigned short *)vc->vc_origin;
                break;
        default:
                return;
        }
        scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
        if (con_should_update(vc))
                do_update_region(vc, (unsigned long) start, count);
        vc->vc_need_wrap = 0;
}

enum {
        CSI_K_CURSOR_TO_LINEEND                = 0,
        CSI_K_LINESTART_TO_CURSOR        = 1,
        CSI_K_LINE                        = 2,
};

static void csi_K(struct vc_data *vc)
{
        unsigned int count;
        unsigned short *start = (unsigned short *)vc->vc_pos;
        int offset;

        switch (vc->vc_par[0]) {
        case CSI_K_CURSOR_TO_LINEEND:
                offset = 0;
                count = vc->vc_cols - vc->state.x;
                break;
        case CSI_K_LINESTART_TO_CURSOR:
                offset = -vc->state.x;
                count = vc->state.x + 1;
                break;
        case CSI_K_LINE:
                offset = -vc->state.x;
                count = vc->vc_cols;
                break;
        default:
                return;
        }
        vc_uniscr_clear_line(vc, vc->state.x + offset, count);
        scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
        vc->vc_need_wrap = 0;
        if (con_should_update(vc))
                do_update_region(vc, (unsigned long)(start + offset), count);
}

/* erase the following count positions */
static void csi_X(struct vc_data *vc)
{                                          /* not vt100? */
        unsigned int count = clamp(vc->vc_par[0], 1, vc->vc_cols - vc->state.x);

        vc_uniscr_clear_line(vc, vc->state.x, count);
        scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
        if (con_should_update(vc))
                vc->vc_sw->con_clear(vc, vc->state.y, vc->state.x, count);
        vc->vc_need_wrap = 0;
}

static void default_attr(struct vc_data *vc)
{
        vc->state.intensity = VCI_NORMAL;
        vc->state.italic = false;
        vc->state.underline = false;
        vc->state.reverse = false;
        vc->state.blink = false;
        vc->state.color = vc->vc_def_color;
}

struct rgb { u8 r; u8 g; u8 b; };

static void rgb_from_256(unsigned int i, struct rgb *c)
{
        if (i < 8) {            /* Standard colours. */
                c->r = i&1 ? 0xaa : 0x00;
                c->g = i&2 ? 0xaa : 0x00;
                c->b = i&4 ? 0xaa : 0x00;
        } else if (i < 16) {
                c->r = i&1 ? 0xff : 0x55;
                c->g = i&2 ? 0xff : 0x55;
                c->b = i&4 ? 0xff : 0x55;
        } else if (i < 232) {   /* 6x6x6 colour cube. */
                i -= 16;
                c->b = i % 6 * 255 / 6;
                i /= 6;
                c->g = i % 6 * 255 / 6;
                i /= 6;
                c->r = i     * 255 / 6;
        } else                  /* Grayscale ramp. */
                c->r = c->g = c->b = i * 10 - 2312;
}

static void rgb_foreground(struct vc_data *vc, const struct rgb *c)
{
        u8 hue = 0, max = max3(c->r, c->g, c->b);

        if (c->r > max / 2)
                hue |= 4;
        if (c->g > max / 2)
                hue |= 2;
        if (c->b > max / 2)
                hue |= 1;

        if (hue == 7 && max <= 0x55) {
                hue = 0;
                vc->state.intensity = VCI_BOLD;
        } else if (max > 0xaa)
                vc->state.intensity = VCI_BOLD;
        else
                vc->state.intensity = VCI_NORMAL;

        vc->state.color = (vc->state.color & 0xf0) | hue;
}

static void rgb_background(struct vc_data *vc, const struct rgb *c)
{
        /* For backgrounds, err on the dark side. */
        vc->state.color = (vc->state.color & 0x0f)
                | (c->r&0x80) >> 1 | (c->g&0x80) >> 2 | (c->b&0x80) >> 3;
}

/*
 * ITU T.416 Higher colour modes. They break the usual properties of SGR codes
 * and thus need to be detected and ignored by hand. That standard also
 * wants : rather than ; as separators but sequences containing : are currently
 * completely ignored by the parser.
 *
 * Subcommands 3 (CMY) and 4 (CMYK) are so insane there's no point in
 * supporting them.
 */
static int vc_t416_color(struct vc_data *vc, int i,
                void(*set_color)(struct vc_data *vc, const struct rgb *c))
{
        struct rgb c;

        i++;
        if (i > vc->vc_npar)
                return i;

        if (vc->vc_par[i] == 5 && i + 1 <= vc->vc_npar) {
                /* 256 colours */
                i++;
                rgb_from_256(vc->vc_par[i], &c);
        } else if (vc->vc_par[i] == 2 && i + 3 <= vc->vc_npar) {
                /* 24 bit */
                c.r = vc->vc_par[i + 1];
                c.g = vc->vc_par[i + 2];
                c.b = vc->vc_par[i + 3];
                i += 3;
        } else
                return i;

        set_color(vc, &c);

        return i;
}

enum {
        CSI_m_DEFAULT                        = 0,
        CSI_m_BOLD                        = 1,
        CSI_m_HALF_BRIGHT                = 2,
        CSI_m_ITALIC                        = 3,
        CSI_m_UNDERLINE                        = 4,
        CSI_m_BLINK                        = 5,
        CSI_m_REVERSE                        = 7,
        CSI_m_PRI_FONT                        = 10,
        CSI_m_ALT_FONT1                        = 11,
        CSI_m_ALT_FONT2                        = 12,
        CSI_m_DOUBLE_UNDERLINE                = 21,
        CSI_m_NORMAL_INTENSITY                = 22,
        CSI_m_NO_ITALIC                        = 23,
        CSI_m_NO_UNDERLINE                = 24,
        CSI_m_NO_BLINK                        = 25,
        CSI_m_NO_REVERSE                = 27,
        CSI_m_FG_COLOR_BEG                = 30,
        CSI_m_FG_COLOR_END                = 37,
        CSI_m_FG_COLOR                        = 38,
        CSI_m_DEFAULT_FG_COLOR                = 39,
        CSI_m_BG_COLOR_BEG                = 40,
        CSI_m_BG_COLOR_END                = 47,
        CSI_m_BG_COLOR                        = 48,
        CSI_m_DEFAULT_BG_COLOR                = 49,
        CSI_m_BRIGHT_FG_COLOR_BEG        = 90,
        CSI_m_BRIGHT_FG_COLOR_END        = 97,
        CSI_m_BRIGHT_FG_COLOR_OFF        = CSI_m_BRIGHT_FG_COLOR_BEG - CSI_m_FG_COLOR_BEG,
        CSI_m_BRIGHT_BG_COLOR_BEG        = 100,
        CSI_m_BRIGHT_BG_COLOR_END        = 107,
        CSI_m_BRIGHT_BG_COLOR_OFF        = CSI_m_BRIGHT_BG_COLOR_BEG - CSI_m_BG_COLOR_BEG,
};

/* console_lock is held */
static void csi_m(struct vc_data *vc)
{
        int i;

        for (i = 0; i <= vc->vc_npar; i++)
                switch (vc->vc_par[i]) {
                case CSI_m_DEFAULT:        /* all attributes off */
                        default_attr(vc);
                        break;
                case CSI_m_BOLD:
                        vc->state.intensity = VCI_BOLD;
                        break;
                case CSI_m_HALF_BRIGHT:
                        vc->state.intensity = VCI_HALF_BRIGHT;
                        break;
                case CSI_m_ITALIC:
                        vc->state.italic = true;
                        break;
                case CSI_m_DOUBLE_UNDERLINE:
                        /*
                         * No console drivers support double underline, so
                         * convert it to a single underline.
                         */
                case CSI_m_UNDERLINE:
                        vc->state.underline = true;
                        break;
                case CSI_m_BLINK:
                        vc->state.blink = true;
                        break;
                case CSI_m_REVERSE:
                        vc->state.reverse = true;
                        break;
                case CSI_m_PRI_FONT: /* ANSI X3.64-1979 (SCO-ish?)
                          * Select primary font, don't display control chars if
                          * defined, don't set bit 8 on output.
                          */
                        vc->vc_translate = set_translate(vc->state.Gx_charset[vc->state.charset], vc);
                        vc->vc_disp_ctrl = 0;
                        vc->vc_toggle_meta = 0;
                        break;
                case CSI_m_ALT_FONT1: /* ANSI X3.64-1979 (SCO-ish?)
                          * Select first alternate font, lets chars < 32 be
                          * displayed as ROM chars.
                          */
                        vc->vc_translate = set_translate(IBMPC_MAP, vc);
                        vc->vc_disp_ctrl = 1;
                        vc->vc_toggle_meta = 0;
                        break;
                case CSI_m_ALT_FONT2: /* ANSI X3.64-1979 (SCO-ish?)
                          * Select second alternate font, toggle high bit
                          * before displaying as ROM char.
                          */
                        vc->vc_translate = set_translate(IBMPC_MAP, vc);
                        vc->vc_disp_ctrl = 1;
                        vc->vc_toggle_meta = 1;
                        break;
                case CSI_m_NORMAL_INTENSITY:
                        vc->state.intensity = VCI_NORMAL;
                        break;
                case CSI_m_NO_ITALIC:
                        vc->state.italic = false;
                        break;
                case CSI_m_NO_UNDERLINE:
                        vc->state.underline = false;
                        break;
                case CSI_m_NO_BLINK:
                        vc->state.blink = false;
                        break;
                case CSI_m_NO_REVERSE:
                        vc->state.reverse = false;
                        break;
                case CSI_m_FG_COLOR:
                        i = vc_t416_color(vc, i, rgb_foreground);
                        break;
                case CSI_m_BG_COLOR:
                        i = vc_t416_color(vc, i, rgb_background);
                        break;
                case CSI_m_DEFAULT_FG_COLOR:
                        vc->state.color = (vc->vc_def_color & 0x0f) |
                                (vc->state.color & 0xf0);
                        break;
                case CSI_m_DEFAULT_BG_COLOR:
                        vc->state.color = (vc->vc_def_color & 0xf0) |
                                (vc->state.color & 0x0f);
                        break;
                case CSI_m_BRIGHT_FG_COLOR_BEG ... CSI_m_BRIGHT_FG_COLOR_END:
                        vc->state.intensity = VCI_BOLD;
                        vc->vc_par[i] -= CSI_m_BRIGHT_FG_COLOR_OFF;
                        fallthrough;
                case CSI_m_FG_COLOR_BEG ... CSI_m_FG_COLOR_END:
                        vc->vc_par[i] -= CSI_m_FG_COLOR_BEG;
                        vc->state.color = color_table[vc->vc_par[i]] |
                                (vc->state.color & 0xf0);
                        break;
                case CSI_m_BRIGHT_BG_COLOR_BEG ... CSI_m_BRIGHT_BG_COLOR_END:
                        vc->vc_par[i] -= CSI_m_BRIGHT_BG_COLOR_OFF;
                        fallthrough;
                case CSI_m_BG_COLOR_BEG ... CSI_m_BG_COLOR_END:
                        vc->vc_par[i] -= CSI_m_BG_COLOR_BEG;
                        vc->state.color = (color_table[vc->vc_par[i]] << 4) |
                                (vc->state.color & 0x0f);
                        break;
                }
        update_attr(vc);
}

static void respond_string(const char *p, size_t len, struct tty_port *port)
{
        tty_insert_flip_string(port, p, len);
        tty_flip_buffer_push(port);
}

static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
{
        char buf[40];
        int len;

        len = sprintf(buf, "\033[%d;%dR", vc->state.y +
                        (vc->vc_decom ? vc->vc_top + 1 : 1),
                        vc->state.x + 1);
        respond_string(buf, len, tty->port);
}

static inline void status_report(struct tty_struct *tty)
{
        static const char teminal_ok[] = "\033[0n";

        respond_string(teminal_ok, strlen(teminal_ok), tty->port);
}

static inline void respond_ID(struct tty_struct *tty)
{
        /* terminal answer to an ESC-Z or csi0c query. */
        static const char vt102_id[] = "\033[?6c";

        respond_string(vt102_id, strlen(vt102_id), tty->port);
}

void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
{
        char buf[8];
        int len;

        len = sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt),
                        (char)('!' + mrx), (char)('!' + mry));
        respond_string(buf, len, tty->port);
}

/* invoked via ioctl(TIOCLINUX) and through set_selection_user */
int mouse_reporting(void)
{
        return vc_cons[fg_console].d->vc_report_mouse;
}

enum {
        CSI_DEC_hl_CURSOR_KEYS        = 1,        /* CKM: cursor keys send ^[Ox/^[[x */
        CSI_DEC_hl_132_COLUMNS        = 3,        /* COLM: 80/132 mode switch */
        CSI_DEC_hl_REVERSE_VIDEO = 5,        /* SCNM */
        CSI_DEC_hl_ORIGIN_MODE        = 6,        /* OM: origin relative/absolute */
        CSI_DEC_hl_AUTOWRAP        = 7,        /* AWM */
        CSI_DEC_hl_AUTOREPEAT        = 8,        /* ARM */
        CSI_DEC_hl_MOUSE_X10        = 9,
        CSI_DEC_hl_SHOW_CURSOR        = 25,        /* TCEM */
        CSI_DEC_hl_MOUSE_VT200        = 1000,
};

/* console_lock is held */
static void csi_DEC_hl(struct vc_data *vc, bool on_off)
{
        unsigned int i;

        for (i = 0; i <= vc->vc_npar; i++)
                switch (vc->vc_par[i]) {
                case CSI_DEC_hl_CURSOR_KEYS:
                        if (on_off)
                                set_kbd(vc, decckm);
                        else
                                clr_kbd(vc, decckm);
                        break;
                case CSI_DEC_hl_132_COLUMNS:        /* unimplemented */
#if 0
                        vc_resize(deccolm ? 132 : 80, vc->vc_rows);
                        /* this alone does not suffice; some user mode
                           utility has to change the hardware regs */
#endif
                        break;
                case CSI_DEC_hl_REVERSE_VIDEO:
                        if (vc->vc_decscnm != on_off) {
                                vc->vc_decscnm = on_off;
                                invert_screen(vc, 0, vc->vc_screenbuf_size,
                                              false);
                                update_attr(vc);
                        }
                        break;
                case CSI_DEC_hl_ORIGIN_MODE:
                        vc->vc_decom = on_off;
                        gotoxay(vc, 0, 0);
                        break;
                case CSI_DEC_hl_AUTOWRAP:
                        vc->vc_decawm = on_off;
                        break;
                case CSI_DEC_hl_AUTOREPEAT:
                        if (on_off)
                                set_kbd(vc, decarm);
                        else
                                clr_kbd(vc, decarm);
                        break;
                case CSI_DEC_hl_MOUSE_X10:
                        vc->vc_report_mouse = on_off ? 1 : 0;
                        break;
                case CSI_DEC_hl_SHOW_CURSOR:
                        vc->vc_deccm = on_off;
                        break;
                case CSI_DEC_hl_MOUSE_VT200:
                        vc->vc_report_mouse = on_off ? 2 : 0;
                        break;
                }
}

enum {
        CSI_hl_DISPLAY_CTRL        = 3,        /* handle ansi control chars */
        CSI_hl_INSERT                = 4,        /* IRM: insert/replace */
        CSI_hl_AUTO_NL                = 20,        /* LNM: Enter == CrLf/Lf */
};

/* console_lock is held */
static void csi_hl(struct vc_data *vc, bool on_off)
{
        unsigned int i;

        for (i = 0; i <= vc->vc_npar; i++)
                switch (vc->vc_par[i]) {        /* ANSI modes set/reset */
                case CSI_hl_DISPLAY_CTRL:
                        vc->vc_disp_ctrl = on_off;
                        break;
                case CSI_hl_INSERT:
                        vc->vc_decim = on_off;
                        break;
                case CSI_hl_AUTO_NL:
                        if (on_off)
                                set_kbd(vc, lnm);
                        else
                                clr_kbd(vc, lnm);
                        break;
                }
}

enum CSI_right_square_bracket {
        CSI_RSB_COLOR_FOR_UNDERLINE                = 1,
        CSI_RSB_COLOR_FOR_HALF_BRIGHT                = 2,
        CSI_RSB_MAKE_CUR_COLOR_DEFAULT                = 8,
        CSI_RSB_BLANKING_INTERVAL                = 9,
        CSI_RSB_BELL_FREQUENCY                        = 10,
        CSI_RSB_BELL_DURATION                        = 11,
        CSI_RSB_BRING_CONSOLE_TO_FRONT                = 12,
        CSI_RSB_UNBLANK                                = 13,
        CSI_RSB_VESA_OFF_INTERVAL                = 14,
        CSI_RSB_BRING_PREV_CONSOLE_TO_FRONT        = 15,
        CSI_RSB_CURSOR_BLINK_INTERVAL                = 16,
};

/*
 * csi_RSB - csi+] (Right Square Bracket) handler
 *
 * These are linux console private sequences.
 *
 * console_lock is held
 */
static void csi_RSB(struct vc_data *vc)
{
        switch (vc->vc_par[0]) {
        case CSI_RSB_COLOR_FOR_UNDERLINE:
                if (vc->vc_can_do_color && vc->vc_par[1] < 16) {
                        vc->vc_ulcolor = color_table[vc->vc_par[1]];
                        if (vc->state.underline)
                                update_attr(vc);
                }
                break;
        case CSI_RSB_COLOR_FOR_HALF_BRIGHT:
                if (vc->vc_can_do_color && vc->vc_par[1] < 16) {
                        vc->vc_halfcolor = color_table[vc->vc_par[1]];
                        if (vc->state.intensity == VCI_HALF_BRIGHT)
                                update_attr(vc);
                }
                break;
        case CSI_RSB_MAKE_CUR_COLOR_DEFAULT:
                vc->vc_def_color = vc->vc_attr;
                if (vc->vc_hi_font_mask == 0x100)
                        vc->vc_def_color >>= 1;
                default_attr(vc);
                update_attr(vc);
                break;
        case CSI_RSB_BLANKING_INTERVAL:
                blankinterval = min(vc->vc_par[1], 60U) * 60;
                poke_blanked_console();
                break;
        case CSI_RSB_BELL_FREQUENCY:
                if (vc->vc_npar >= 1)
                        vc->vc_bell_pitch = vc->vc_par[1];
                else
                        vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
                break;
        case CSI_RSB_BELL_DURATION:
                if (vc->vc_npar >= 1)
                        vc->vc_bell_duration = (vc->vc_par[1] < 2000) ?
                                msecs_to_jiffies(vc->vc_par[1]) : 0;
                else
                        vc->vc_bell_duration = DEFAULT_BELL_DURATION;
                break;
        case CSI_RSB_BRING_CONSOLE_TO_FRONT:
                if (vc->vc_par[1] >= 1 && vc_cons_allocated(vc->vc_par[1] - 1))
                        set_console(vc->vc_par[1] - 1);
                break;
        case CSI_RSB_UNBLANK:
                poke_blanked_console();
                break;
        case CSI_RSB_VESA_OFF_INTERVAL:
                vesa_off_interval = min(vc->vc_par[1], 60U) * 60 * HZ;
                break;
        case CSI_RSB_BRING_PREV_CONSOLE_TO_FRONT:
                set_console(last_console);
                break;
        case CSI_RSB_CURSOR_BLINK_INTERVAL:
                if (vc->vc_npar >= 1 && vc->vc_par[1] >= 50 &&
                                vc->vc_par[1] <= USHRT_MAX)
                        vc->vc_cur_blink_ms = vc->vc_par[1];
                else
                        vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
                break;
        }
}

/* console_lock is held */
static void csi_at(struct vc_data *vc, unsigned int nr)
{
        nr = clamp(nr, 1, vc->vc_cols - vc->state.x);
        insert_char(vc, nr);
}

/* console_lock is held */
static void csi_L(struct vc_data *vc)
{
        unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_rows - vc->state.y);

        con_scroll(vc, vc->state.y, vc->vc_bottom, SM_DOWN, nr);
        vc->vc_need_wrap = 0;
}

/* console_lock is held */
static void csi_P(struct vc_data *vc)
{
        unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_cols - vc->state.x);

        delete_char(vc, nr);
}

/* console_lock is held */
static void csi_M(struct vc_data *vc)
{
        unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_rows - vc->state.y);

        con_scroll(vc, vc->state.y, vc->vc_bottom, SM_UP, nr);
        vc->vc_need_wrap = 0;
}

/* console_lock is held (except via vc_init->reset_terminal */
static void save_cur(struct vc_data *vc)
{
        memcpy(&vc->saved_state, &vc->state, sizeof(vc->state));
}

/* console_lock is held */
static void restore_cur(struct vc_data *vc)
{
        memcpy(&vc->state, &vc->saved_state, sizeof(vc->state));

        gotoxy(vc, vc->state.x, vc->state.y);
        vc->vc_translate = set_translate(vc->state.Gx_charset[vc->state.charset],
                        vc);
        update_attr(vc);
        vc->vc_need_wrap = 0;
}

/**
 * enum vc_ctl_state - control characters state of a vt
 *
 * @ESnormal:                initial state, no control characters parsed
 * @ESesc:                ESC parsed
 * @ESsquare:                CSI parsed -- modifiers/parameters/ctrl chars expected
 * @ESgetpars:                CSI parsed -- parameters/ctrl chars expected
 * @ESfunckey:                CSI [ parsed
 * @EShash:                ESC # parsed
 * @ESsetG0:                ESC ( parsed
 * @ESsetG1:                ESC ) parsed
 * @ESpercent:                ESC % parsed
 * @EScsiignore:        CSI [0x20-0x3f] parsed
 * @ESnonstd:                OSC parsed
 * @ESpalette:                OSC P parsed
 * @ESosc:                OSC [0-9] parsed
 * @ESANSI_first:        first state for ignoring ansi control sequences
 * @ESapc:                ESC _ parsed
 * @ESpm:                ESC ^ parsed
 * @ESdcs:                ESC P parsed
 * @ESANSI_last:        last state for ignoring ansi control sequences
 */
enum vc_ctl_state {
        ESnormal,
        ESesc,
        ESsquare,
        ESgetpars,
        ESfunckey,
        EShash,
        ESsetG0,
        ESsetG1,
        ESpercent,
        EScsiignore,
        ESnonstd,
        ESpalette,
        ESosc,
        ESANSI_first = ESosc,
        ESapc,
        ESpm,
        ESdcs,
        ESANSI_last = ESdcs,
};

/* console_lock is held (except via vc_init()) */
static void reset_terminal(struct vc_data *vc, int do_clear)
{
        unsigned int i;

        vc->vc_top                = 0;
        vc->vc_bottom                = vc->vc_rows;
        vc->vc_state                = ESnormal;
        vc->vc_priv                = EPecma;
        vc->vc_translate        = set_translate(LAT1_MAP, vc);
        vc->state.Gx_charset[0]        = LAT1_MAP;
        vc->state.Gx_charset[1]        = GRAF_MAP;
        vc->state.charset        = 0;
        vc->vc_need_wrap        = 0;
        vc->vc_report_mouse        = 0;
        vc->vc_utf              = default_utf8;
        vc->vc_utf_count        = 0;

        vc->vc_disp_ctrl        = 0;
        vc->vc_toggle_meta        = 0;

        vc->vc_decscnm                = 0;
        vc->vc_decom                = 0;
        vc->vc_decawm                = 1;
        vc->vc_deccm                = global_cursor_default;
        vc->vc_decim                = 0;

        vt_reset_keyboard(vc->vc_num);

        vc->vc_cursor_type = cur_default;
        vc->vc_complement_mask = vc->vc_s_complement_mask;

        default_attr(vc);
        update_attr(vc);

        bitmap_zero(vc->vc_tab_stop, VC_TABSTOPS_COUNT);
        for (i = 0; i < VC_TABSTOPS_COUNT; i += 8)
                set_bit(i, vc->vc_tab_stop);

        vc->vc_bell_pitch = DEFAULT_BELL_PITCH;
        vc->vc_bell_duration = DEFAULT_BELL_DURATION;
        vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;

        gotoxy(vc, 0, 0);
        save_cur(vc);
        if (do_clear)
            csi_J(vc, CSI_J_VISIBLE);
}

static void vc_setGx(struct vc_data *vc, unsigned int which, u8 c)
{
        unsigned char *charset = &vc->state.Gx_charset[which];

        switch (c) {
        case '0':
                *charset = GRAF_MAP;
                break;
        case 'B':
                *charset = LAT1_MAP;
                break;
        case 'U':
                *charset = IBMPC_MAP;
                break;
        case 'K':
                *charset = USER_MAP;
                break;
        }

        if (vc->state.charset == which)
                vc->vc_translate = set_translate(*charset, vc);
}

static bool ansi_control_string(enum vc_ctl_state state)
{
        return state >= ESANSI_first && state <= ESANSI_last;
}

enum {
        ASCII_NULL                = 0,
        ASCII_BELL                = 7,
        ASCII_BACKSPACE                = 8,
        ASCII_IGNORE_FIRST        = ASCII_BACKSPACE,
        ASCII_HTAB                = 9,
        ASCII_LINEFEED                = 10,
        ASCII_VTAB                = 11,
        ASCII_FORMFEED                = 12,
        ASCII_CAR_RET                = 13,
        ASCII_IGNORE_LAST        = ASCII_CAR_RET,
        ASCII_SHIFTOUT                = 14,
        ASCII_SHIFTIN                = 15,
        ASCII_CANCEL                = 24,
        ASCII_SUBSTITUTE        = 26,
        ASCII_ESCAPE                = 27,
        ASCII_CSI_IGNORE_FIRST        = ' ', /* 0x2x, 0x3a and 0x3c - 0x3f */
        ASCII_CSI_IGNORE_LAST        = '?',
        ASCII_DEL                = 127,
        ASCII_EXT_CSI                = 128 + ASCII_ESCAPE,
};

/*
 * Handle ascii characters in control sequences and change states accordingly.
 * E.g. ESC sets the state of vc to ESesc.
 *
 * Returns: true if @c handled.
 */
static bool handle_ascii(struct tty_struct *tty, struct vc_data *vc, u8 c)
{
        switch (c) {
        case ASCII_NULL:
                return true;
        case ASCII_BELL:
                if (ansi_control_string(vc->vc_state))
                        vc->vc_state = ESnormal;
                else if (vc->vc_bell_duration)
                        kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration);
                return true;
        case ASCII_BACKSPACE:
                bs(vc);
                return true;
        case ASCII_HTAB:
                vc->vc_pos -= (vc->state.x << 1);

                vc->state.x = find_next_bit(vc->vc_tab_stop,
                                min(vc->vc_cols - 1, VC_TABSTOPS_COUNT),
                                vc->state.x + 1);
                if (vc->state.x >= VC_TABSTOPS_COUNT)
                        vc->state.x = vc->vc_cols - 1;

                vc->vc_pos += (vc->state.x << 1);
                notify_write(vc, '\t');
                return true;
        case ASCII_LINEFEED:
        case ASCII_VTAB:
        case ASCII_FORMFEED:
                lf(vc);
                if (!is_kbd(vc, lnm))
                        return true;
                fallthrough;
        case ASCII_CAR_RET:
                cr(vc);
                return true;
        case ASCII_SHIFTOUT:
                vc->state.charset = 1;
                vc->vc_translate = set_translate(vc->state.Gx_charset[1], vc);
                vc->vc_disp_ctrl = 1;
                return true;
        case ASCII_SHIFTIN:
                vc->state.charset = 0;
                vc->vc_translate = set_translate(vc->state.Gx_charset[0], vc);
                vc->vc_disp_ctrl = 0;
                return true;
        case ASCII_CANCEL:
        case ASCII_SUBSTITUTE:
                vc->vc_state = ESnormal;
                return true;
        case ASCII_ESCAPE:
                vc->vc_state = ESesc;
                return true;
        case ASCII_DEL:
                del(vc);
                return true;
        case ASCII_EXT_CSI:
                vc->vc_state = ESsquare;
                return true;
        }

        return false;
}

/*
 * Handle a character (@c) following an ESC (when @vc is in the ESesc state).
 * E.g. previous ESC with @c == '[' here yields the ESsquare state (that is:
 * CSI).
 */
static void handle_esc(struct tty_struct *tty, struct vc_data *vc, u8 c)
{
        vc->vc_state = ESnormal;
        switch (c) {
        case '[':
                vc->vc_state = ESsquare;
                break;
        case ']':
                vc->vc_state = ESnonstd;
                break;
        case '_':
                vc->vc_state = ESapc;
                break;
        case '^':
                vc->vc_state = ESpm;
                break;
        case '%':
                vc->vc_state = ESpercent;
                break;
        case 'E':
                cr(vc);
                lf(vc);
                break;
        case 'M':
                ri(vc);
                break;
        case 'D':
                lf(vc);
                break;
        case 'H':
                if (vc->state.x < VC_TABSTOPS_COUNT)
                        set_bit(vc->state.x, vc->vc_tab_stop);
                break;
        case 'P':
                vc->vc_state = ESdcs;
                break;
        case 'Z':
                respond_ID(tty);
                break;
        case '7':
                save_cur(vc);
                break;
        case '8':
                restore_cur(vc);
                break;
        case '(':
                vc->vc_state = ESsetG0;
                break;
        case ')':
                vc->vc_state = ESsetG1;
                break;
        case '#':
                vc->vc_state = EShash;
                break;
        case 'c':
                reset_terminal(vc, 1);
                break;
        case '>':  /* Numeric keypad */
                clr_kbd(vc, kbdapplic);
                break;
        case '=':  /* Appl. keypad */
                set_kbd(vc, kbdapplic);
                break;
        }
}

/*
 * Handle special DEC control sequences ("ESC [ ? parameters char"). Parameters
 * are in @vc->vc_par and the char is in @c here.
 */
static void csi_DEC(struct tty_struct *tty, struct vc_data *vc, u8 c)
{
        switch (c) {
        case 'h':
                csi_DEC_hl(vc, true);
                break;
        case 'l':
                csi_DEC_hl(vc, false);
                break;
        case 'c':
                if (vc->vc_par[0])
                        vc->vc_cursor_type = CUR_MAKE(vc->vc_par[0],
                                                      vc->vc_par[1],
                                                      vc->vc_par[2]);
                else
                        vc->vc_cursor_type = cur_default;
                break;
        case 'm':
                clear_selection();
                if (vc->vc_par[0])
                        vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1];
                else
                        vc->vc_complement_mask = vc->vc_s_complement_mask;
                break;
        case 'n':
                if (vc->vc_par[0] == 5)
                        status_report(tty);
                else if (vc->vc_par[0] == 6)
                        cursor_report(vc, tty);
                break;
        }
}

/*
 * Handle Control Sequence Introducer control characters. That is
 * "ESC [ parameters char". Parameters are in @vc->vc_par and the char is in
 * @c here.
 */
static void csi_ECMA(struct tty_struct *tty, struct vc_data *vc, u8 c)
{
        switch (c) {
        case 'G':
        case '`':
                if (vc->vc_par[0])
                        vc->vc_par[0]--;
                gotoxy(vc, vc->vc_par[0], vc->state.y);
                break;
        case 'A':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, vc->state.x, vc->state.y - vc->vc_par[0]);
                break;
        case 'B':
        case 'e':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, vc->state.x, vc->state.y + vc->vc_par[0]);
                break;
        case 'C':
        case 'a':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, vc->state.x + vc->vc_par[0], vc->state.y);
                break;
        case 'D':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, vc->state.x - vc->vc_par[0], vc->state.y);
                break;
        case 'E':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, 0, vc->state.y + vc->vc_par[0]);
                break;
        case 'F':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                gotoxy(vc, 0, vc->state.y - vc->vc_par[0]);
                break;
        case 'd':
                if (vc->vc_par[0])
                        vc->vc_par[0]--;
                gotoxay(vc, vc->state.x ,vc->vc_par[0]);
                break;
        case 'H':
        case 'f':
                if (vc->vc_par[0])
                        vc->vc_par[0]--;
                if (vc->vc_par[1])
                        vc->vc_par[1]--;
                gotoxay(vc, vc->vc_par[1], vc->vc_par[0]);
                break;
        case 'J':
                csi_J(vc, vc->vc_par[0]);
                break;
        case 'K':
                csi_K(vc);
                break;
        case 'L':
                csi_L(vc);
                break;
        case 'M':
                csi_M(vc);
                break;
        case 'P':
                csi_P(vc);
                break;
        case 'c':
                if (!vc->vc_par[0])
                        respond_ID(tty);
                break;
        case 'g':
                if (!vc->vc_par[0] && vc->state.x < VC_TABSTOPS_COUNT)
                        set_bit(vc->state.x, vc->vc_tab_stop);
                else if (vc->vc_par[0] == 3)
                        bitmap_zero(vc->vc_tab_stop, VC_TABSTOPS_COUNT);
                break;
        case 'h':
                csi_hl(vc, true);
                break;
        case 'l':
                csi_hl(vc, false);
                break;
        case 'm':
                csi_m(vc);
                break;
        case 'n':
                if (vc->vc_par[0] == 5)
                        status_report(tty);
                else if (vc->vc_par[0] == 6)
                        cursor_report(vc, tty);
                break;
        case 'q': /* DECLL - but only 3 leds */
                /* map 0,1,2,3 to 0,1,2,4 */
                if (vc->vc_par[0] < 4)
                        vt_set_led_state(vc->vc_num,
                                    (vc->vc_par[0] < 3) ? vc->vc_par[0] : 4);
                break;
        case 'r':
                if (!vc->vc_par[0])
                        vc->vc_par[0]++;
                if (!vc->vc_par[1])
                        vc->vc_par[1] = vc->vc_rows;
                /* Minimum allowed region is 2 lines */
                if (vc->vc_par[0] < vc->vc_par[1] &&
                    vc->vc_par[1] <= vc->vc_rows) {
                        vc->vc_top = vc->vc_par[0] - 1;
                        vc->vc_bottom = vc->vc_par[1];
                        gotoxay(vc, 0, 0);
                }
                break;
        case 's':
                save_cur(vc);
                break;
        case 'u':
                restore_cur(vc);
                break;
        case 'X':
                csi_X(vc);
                break;
        case '@':
                csi_at(vc, vc->vc_par[0]);
                break;
        case ']':
                csi_RSB(vc);
                break;
        }

}

static void vc_reset_params(struct vc_data *vc)
{
        memset(vc->vc_par, 0, sizeof(vc->vc_par));
        vc->vc_npar = 0;
}

/* console_lock is held */
static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, u8 c)
{
        /*
         *  Control characters can be used in the _middle_
         *  of an escape sequence, aside from ANSI control strings.
         */
        if (ansi_control_string(vc->vc_state) && c >= ASCII_IGNORE_FIRST &&
            c <= ASCII_IGNORE_LAST)
                return;

        if (handle_ascii(tty, vc, c))
                return;

        switch(vc->vc_state) {
        case ESesc:        /* ESC */
                handle_esc(tty, vc, c);
                return;
        case ESnonstd:        /* ESC ] aka OSC */
                switch (c) {
                case 'P': /* palette escape sequence */
                        vc_reset_params(vc);
                        vc->vc_state = ESpalette;
                        return;
                case 'R': /* reset palette */
                        reset_palette(vc);
                        break;
                case '0' ... '9':
                        vc->vc_state = ESosc;
                        return;
                }
                vc->vc_state = ESnormal;
                return;
        case ESpalette:        /* ESC ] P aka OSC P */
                if (isxdigit(c)) {
                        vc->vc_par[vc->vc_npar++] = hex_to_bin(c);
                        if (vc->vc_npar == 7) {
                                int i = vc->vc_par[0] * 3, j = 1;
                                vc->vc_palette[i] = 16 * vc->vc_par[j++];
                                vc->vc_palette[i++] += vc->vc_par[j++];
                                vc->vc_palette[i] = 16 * vc->vc_par[j++];
                                vc->vc_palette[i++] += vc->vc_par[j++];
                                vc->vc_palette[i] = 16 * vc->vc_par[j++];
                                vc->vc_palette[i] += vc->vc_par[j];
                                set_palette(vc);
                                vc->vc_state = ESnormal;
                        }
                } else
                        vc->vc_state = ESnormal;
                return;
        case ESsquare:        /* ESC [ aka CSI, parameters or modifiers expected */
                vc_reset_params(vc);

                vc->vc_state = ESgetpars;
                switch (c) {
                case '[': /* Function key */
                        vc->vc_state = ESfunckey;
                        return;
                case '?':
                        vc->vc_priv = EPdec;
                        return;
                case '>':
                        vc->vc_priv = EPgt;
                        return;
                case '=':
                        vc->vc_priv = EPeq;
                        return;
                case '<':
                        vc->vc_priv = EPlt;
                        return;
                }
                vc->vc_priv = EPecma;
                fallthrough;
        case ESgetpars: /* ESC [ aka CSI, parameters expected */
                switch (c) {
                case ';':
                        if (vc->vc_npar < NPAR - 1) {
                                vc->vc_npar++;
                                return;
                        }
                        break;
                case '0' ... '9':
                        vc->vc_par[vc->vc_npar] *= 10;
                        vc->vc_par[vc->vc_npar] += c - '0';
                        return;
                }
                if (c >= ASCII_CSI_IGNORE_FIRST && c <= ASCII_CSI_IGNORE_LAST) {
                        vc->vc_state = EScsiignore;
                        return;
                }

                /* parameters done, handle the control char @c */

                vc->vc_state = ESnormal;

                switch (vc->vc_priv) {
                case EPdec:
                        csi_DEC(tty, vc, c);
                        return;
                case EPecma:
                        csi_ECMA(tty, vc, c);
                        return;
                default:
                        return;
                }
        case EScsiignore:
                if (c >= ASCII_CSI_IGNORE_FIRST && c <= ASCII_CSI_IGNORE_LAST)
                        return;
                vc->vc_state = ESnormal;
                return;
        case ESpercent:        /* ESC % */
                vc->vc_state = ESnormal;
                switch (c) {
                case '@':  /* defined in ISO 2022 */
                        vc->vc_utf = 0;
                        return;
                case 'G':  /* prelim official escape code */
                case '8':  /* retained for compatibility */
                        vc->vc_utf = 1;
                        return;
                }
                return;
        case ESfunckey:        /* ESC [ [ aka CSI [ */
                vc->vc_state = ESnormal;
                return;
        case EShash:        /* ESC # */
                vc->vc_state = ESnormal;
                if (c == '8') {
                        /* DEC screen alignment test. kludge :-) */
                        vc->vc_video_erase_char =
                                (vc->vc_video_erase_char & 0xff00) | 'E';
                        csi_J(vc, CSI_J_VISIBLE);
                        vc->vc_video_erase_char =
                                (vc->vc_video_erase_char & 0xff00) | ' ';
                        do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
                }
                return;
        case ESsetG0:        /* ESC ( */
                vc_setGx(vc, 0, c);
                vc->vc_state = ESnormal;
                return;
        case ESsetG1:        /* ESC ) */
                vc_setGx(vc, 1, c);
                vc->vc_state = ESnormal;
                return;
        case ESapc:        /* ESC _ */
                return;
        case ESosc:        /* ESC ] [0-9] aka OSC [0-9] */
                return;
        case ESpm:        /* ESC ^ */
                return;
        case ESdcs:        /* ESC P */
                return;
        default:
                vc->vc_state = ESnormal;
        }
}

/* is_double_width() is based on the wcwidth() implementation by
 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
 * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
 */
struct interval {
        uint32_t first;
        uint32_t last;
};

static int ucs_cmp(const void *key, const void *elt)
{
        uint32_t ucs = *(uint32_t *)key;
        struct interval e = *(struct interval *) elt;

        if (ucs > e.last)
                return 1;
        else if (ucs < e.first)
                return -1;
        return 0;
}

static int is_double_width(uint32_t ucs)
{
        static const struct interval double_width[] = {
                { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
                { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
                { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 },
                { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
        };
        if (ucs < double_width[0].first ||
            ucs > double_width[ARRAY_SIZE(double_width) - 1].last)
                return 0;

        return bsearch(&ucs, double_width, ARRAY_SIZE(double_width),
                        sizeof(struct interval), ucs_cmp) != NULL;
}

struct vc_draw_region {
        unsigned long from, to;
        int x;
};

static void con_flush(struct vc_data *vc, struct vc_draw_region *draw)
{
        if (draw->x < 0)
                return;

        vc->vc_sw->con_putcs(vc, (u16 *)draw->from,
                        (u16 *)draw->to - (u16 *)draw->from, vc->state.y,
                        draw->x);
        draw->x = -1;
}

static inline int vc_translate_ascii(const struct vc_data *vc, int c)
{
        if (IS_ENABLED(CONFIG_CONSOLE_TRANSLATIONS)) {
                if (vc->vc_toggle_meta)
                        c |= 0x80;

                return vc->vc_translate[c];
        }

        return c;
}


/**
 * vc_sanitize_unicode - Replace invalid Unicode code points with ``U+FFFD``
 * @c: the received code point
 */
static inline int vc_sanitize_unicode(const int c)
{
        if (c >= 0xd800 && c <= 0xdfff)
                return 0xfffd;

        return c;
}

/**
 * vc_translate_unicode - Combine UTF-8 into Unicode in &vc_data.vc_utf_char
 * @vc: virtual console
 * @c: UTF-8 byte to translate
 * @rescan: set to true iff @c wasn't consumed here and needs to be re-processed
 *
 * * &vc_data.vc_utf_char is the being-constructed Unicode code point.
 * * &vc_data.vc_utf_count is the number of continuation bytes still expected to
 *   arrive.
 * * &vc_data.vc_npar is the number of continuation bytes arrived so far.
 *
 * Return:
 * * %-1 - Input OK so far, @c consumed, further bytes expected.
 * * %0xFFFD - Possibility 1: input invalid, @c may have been consumed (see
 *             desc. of @rescan). Possibility 2: input OK, @c consumed,
 *             ``U+FFFD`` is the resulting code point. ``U+FFFD`` is valid,
 *             ``REPLACEMENT CHARACTER``.
 * * otherwise - Input OK, @c consumed, resulting code point returned.
 */
static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan)
{
        static const u32 utf8_length_changes[] = {0x7f, 0x7ff, 0xffff, 0x10ffff};

        /* Continuation byte received */
        if ((c & 0xc0) == 0x80) {
                /* Unexpected continuation byte? */
                if (!vc->vc_utf_count)
                        return 0xfffd;

                vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
                vc->vc_npar++;
                if (--vc->vc_utf_count)
                        goto need_more_bytes;

                /* Got a whole character */
                c = vc->vc_utf_char;
                /* Reject overlong sequences */
                if (c <= utf8_length_changes[vc->vc_npar - 1] ||
                                c > utf8_length_changes[vc->vc_npar])
                        return 0xfffd;

                return vc_sanitize_unicode(c);
        }

        /* Single ASCII byte or first byte of a sequence received */
        if (vc->vc_utf_count) {
                /* Continuation byte expected */
                *rescan = true;
                vc->vc_utf_count = 0;
                return 0xfffd;
        }

        /* Nothing to do if an ASCII byte was received */
        if (c <= 0x7f)
                return c;

        /* First byte of a multibyte sequence received */
        vc->vc_npar = 0;
        if ((c & 0xe0) == 0xc0) {
                vc->vc_utf_count = 1;
                vc->vc_utf_char = (c & 0x1f);
        } else if ((c & 0xf0) == 0xe0) {
                vc->vc_utf_count = 2;
                vc->vc_utf_char = (c & 0x0f);
        } else if ((c & 0xf8) == 0xf0) {
                vc->vc_utf_count = 3;
                vc->vc_utf_char = (c & 0x07);
        } else {
                return 0xfffd;
        }

need_more_bytes:
        return -1;
}

static int vc_translate(struct vc_data *vc, int *c, bool *rescan)
{
        /* Do no translation at all in control states */
        if (vc->vc_state != ESnormal)
                return *c;

        if (vc->vc_utf && !vc->vc_disp_ctrl)
                return *c = vc_translate_unicode(vc, *c, rescan);

        /* no utf or alternate charset mode */
        return vc_translate_ascii(vc, *c);
}

static inline unsigned char vc_invert_attr(const struct vc_data *vc)
{
        if (!vc->vc_can_do_color)
                return vc->vc_attr ^ 0x08;

        if (vc->vc_hi_font_mask == 0x100)
                return   (vc->vc_attr & 0x11) |
                        ((vc->vc_attr & 0xe0) >> 4) |
                        ((vc->vc_attr & 0x0e) << 4);

        return   (vc->vc_attr & 0x88) |
                ((vc->vc_attr & 0x70) >> 4) |
                ((vc->vc_attr & 0x07) << 4);
}

static bool vc_is_control(struct vc_data *vc, int tc, int c)
{
        /*
         * A bitmap for codes <32. A bit of 1 indicates that the code
         * corresponding to that bit number invokes some special action (such
         * as cursor movement) and should not be displayed as a glyph unless
         * the disp_ctrl mode is explicitly enabled.
         */
        static const u32 CTRL_ACTION = BIT(ASCII_NULL) |
                GENMASK(ASCII_SHIFTIN, ASCII_BELL) | BIT(ASCII_CANCEL) |
                BIT(ASCII_SUBSTITUTE) | BIT(ASCII_ESCAPE);
        /* Cannot be overridden by disp_ctrl */
        static const u32 CTRL_ALWAYS = BIT(ASCII_NULL) | BIT(ASCII_BACKSPACE) |
                BIT(ASCII_LINEFEED) | BIT(ASCII_SHIFTIN) | BIT(ASCII_SHIFTOUT) |
                BIT(ASCII_CAR_RET) | BIT(ASCII_FORMFEED) | BIT(ASCII_ESCAPE);

        if (vc->vc_state != ESnormal)
                return true;

        if (!tc)
                return true;

        /*
         * If the original code was a control character we only allow a glyph
         * to be displayed if the code is not normally used (such as for cursor
         * movement) or if the disp_ctrl mode has been explicitly enabled.
         * Certain characters (as given by the CTRL_ALWAYS bitmap) are always
         * displayed as control characters, as the console would be pretty
         * useless without them; to display an arbitrary font position use the
         * direct-to-font zone in UTF-8 mode.
         */
        if (c < BITS_PER_TYPE(CTRL_ALWAYS)) {
                if (vc->vc_disp_ctrl)
                        return CTRL_ALWAYS & BIT(c);
                else
                        return vc->vc_utf || (CTRL_ACTION & BIT(c));
        }

        if (c == ASCII_DEL && !vc->vc_disp_ctrl)
                return true;

        if (c == ASCII_EXT_CSI)
                return true;

        return false;
}

static int vc_con_write_normal(struct vc_data *vc, int tc, int c,
                struct vc_draw_region *draw)
{
        int next_c;
        unsigned char vc_attr = vc->vc_attr;
        u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff;
        u8 width = 1;
        bool inverse = false;

        if (vc->vc_utf && !vc->vc_disp_ctrl) {
                if (is_double_width(c))
                        width = 2;
        }

        /* Now try to find out how to display it */
        tc = conv_uni_to_pc(vc, tc);
        if (tc & ~charmask) {
                if (tc == -1 || tc == -2)
                        return -1; /* nothing to display */

                /* Glyph not found */
                if ((!vc->vc_utf || vc->vc_disp_ctrl || c < 128) &&
                                !(c & ~charmask)) {
                        /*
                         * In legacy mode use the glyph we get by a 1:1
                         * mapping.
                         * This would make absolutely no sense with Unicode in
                         * mind, but do this for ASCII characters since a font
                         * may lack Unicode mapping info and we don't want to
                         * end up with having question marks only.
                         */
                        tc = c;
                } else {
                        /*
                         * Display U+FFFD. If it's not found, display an inverse
                         * question mark.
                         */
                        tc = conv_uni_to_pc(vc, 0xfffd);
                        if (tc < 0) {
                                inverse = true;
                                tc = conv_uni_to_pc(vc, '?');
                                if (tc < 0)
                                        tc = '?';

                                vc_attr = vc_invert_attr(vc);
                                con_flush(vc, draw);
                        }
                }
        }

        next_c = c;
        while (1) {
                if (vc->vc_need_wrap || vc->vc_decim)
                        con_flush(vc, draw);
                if (vc->vc_need_wrap) {
                        cr(vc);
                        lf(vc);
                }
                if (vc->vc_decim)
                        insert_char(vc, 1);
                vc_uniscr_putc(vc, next_c);

                if (himask)
                        tc = ((tc & 0x100) ? himask : 0) |
                              (tc &  0xff);
                tc |= (vc_attr << 8) & ~himask;

                scr_writew(tc, (u16 *)vc->vc_pos);

                if (con_should_update(vc) && draw->x < 0) {
                        draw->x = vc->state.x;
                        draw->from = vc->vc_pos;
                }
                if (vc->state.x == vc->vc_cols - 1) {
                        vc->vc_need_wrap = vc->vc_decawm;
                        draw->to = vc->vc_pos + 2;
                } else {
                        vc->state.x++;
                        draw->to = (vc->vc_pos += 2);
                }

                if (!--width)
                        break;

                /* A space is printed in the second column */
                tc = conv_uni_to_pc(vc, ' ');
                if (tc < 0)
                        tc = ' ';
                next_c = ' ';
        }
        notify_write(vc, c);

        if (inverse)
                con_flush(vc, draw);

        return 0;
}

/* acquires console_lock */
static int do_con_write(struct tty_struct *tty, const u8 *buf, int count)
{
        struct vc_draw_region draw = {
                .x = -1,
        };
        int c, tc, n = 0;
        unsigned int currcons;
        struct vc_data *vc = tty->driver_data;
        struct vt_notifier_param param;
        bool rescan;

        if (in_interrupt())
                return count;

        console_lock();
        currcons = vc->vc_num;
        if (!vc_cons_allocated(currcons)) {
                /* could this happen? */
                pr_warn_once("con_write: tty %d not allocated\n", currcons+1);
                console_unlock();
                return 0;
        }


        /* undraw cursor first */
        if (con_is_fg(vc))
                hide_cursor(vc);

        param.vc = vc;

        while (!tty->flow.stopped && count) {
                u8 orig = *buf;
                buf++;
                n++;
                count--;
rescan_last_byte:
                c = orig;
                rescan = false;

                tc = vc_translate(vc, &c, &rescan);
                if (tc == -1)
                        continue;

                param.c = tc;
                if (atomic_notifier_call_chain(&vt_notifier_list, VT_PREWRITE,
                                        &param) == NOTIFY_STOP)
                        continue;

                if (vc_is_control(vc, tc, c)) {
                        con_flush(vc, &draw);
                        do_con_trol(tty, vc, orig);
                        continue;
                }

                if (vc_con_write_normal(vc, tc, c, &draw) < 0)
                        continue;

                if (rescan)
                        goto rescan_last_byte;
        }
        con_flush(vc, &draw);
        console_conditional_schedule();
        notify_update(vc);
        console_unlock();
        return n;
}

/*
 * This is the console switching callback.
 *
 * Doing console switching in a process context allows
 * us to do the switches asynchronously (needed when we want
 * to switch due to a keyboard interrupt).  Synchronization
 * with other console code and prevention of re-entrancy is
 * ensured with console_lock.
 */
static void console_callback(struct work_struct *ignored)
{
        console_lock();

        if (want_console >= 0) {
                if (want_console != fg_console &&
                    vc_cons_allocated(want_console)) {
                        hide_cursor(vc_cons[fg_console].d);
                        change_console(vc_cons[want_console].d);
                        /* we only changed when the console had already
                           been allocated - a new console is not created
                           in an interrupt routine */
                }
                want_console = -1;
        }
        if (do_poke_blanked_console) { /* do not unblank for a LED change */
                do_poke_blanked_console = 0;
                poke_blanked_console();
        }
        if (scrollback_delta) {
                struct vc_data *vc = vc_cons[fg_console].d;
                clear_selection();
                if (vc->vc_mode == KD_TEXT && vc->vc_sw->con_scrolldelta)
                        vc->vc_sw->con_scrolldelta(vc, scrollback_delta);
                scrollback_delta = 0;
        }
        if (blank_timer_expired) {
                do_blank_screen(0);
                blank_timer_expired = 0;
        }
        notify_update(vc_cons[fg_console].d);

        console_unlock();
}

int set_console(int nr)
{
        struct vc_data *vc = vc_cons[fg_console].d;

        if (!vc_cons_allocated(nr) || vt_dont_switch ||
                (vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) {

                /*
                 * Console switch will fail in console_callback() or
                 * change_console() so there is no point scheduling
                 * the callback
                 *
                 * Existing set_console() users don't check the return
                 * value so this shouldn't break anything
                 */
                return -EINVAL;
        }

        want_console = nr;
        schedule_console_callback();

        return 0;
}

struct tty_driver *console_driver;

#ifdef CONFIG_VT_CONSOLE

/**
 * vt_kmsg_redirect() - sets/gets the kernel message console
 * @new: the new virtual terminal number or -1 if the console should stay
 *        unchanged
 *
 * By default, the kernel messages are always printed on the current virtual
 * console. However, the user may modify that default with the
 * %TIOCL_SETKMSGREDIRECT ioctl call.
 *
 * This function sets the kernel message console to be @new. It returns the old
 * virtual console number. The virtual terminal number %0 (both as parameter and
 * return value) means no redirection (i.e. always printed on the currently
 * active console).
 *
 * The parameter -1 means that only the current console is returned, but the
 * value is not modified. You may use the macro vt_get_kmsg_redirect() in that
 * case to make the code more understandable.
 *
 * When the kernel is compiled without %CONFIG_VT_CONSOLE, this function ignores
 * the parameter and always returns %0.
 */
int vt_kmsg_redirect(int new)
{
        static int kmsg_con;

        if (new != -1)
                return xchg(&kmsg_con, new);
        else
                return kmsg_con;
}

/*
 *        Console on virtual terminal
 *
 * The console must be locked when we get here.
 */

static void vt_console_print(struct console *co, const char *b, unsigned count)
{
        struct vc_data *vc = vc_cons[fg_console].d;
        unsigned char c;
        static DEFINE_SPINLOCK(printing_lock);
        const ushort *start;
        ushort start_x, cnt;
        int kmsg_console;

        WARN_CONSOLE_UNLOCKED();

        /* this protects against concurrent oops only */
        if (!spin_trylock(&printing_lock))
                return;

        kmsg_console = vt_get_kmsg_redirect();
        if (kmsg_console && vc_cons_allocated(kmsg_console - 1))
                vc = vc_cons[kmsg_console - 1].d;

        if (!vc_cons_allocated(fg_console)) {
                /* impossible */
                /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */
                goto quit;
        }

        if (vc->vc_mode != KD_TEXT)
                goto quit;

        /* undraw cursor first */
        if (con_is_fg(vc))
                hide_cursor(vc);

        start = (ushort *)vc->vc_pos;
        start_x = vc->state.x;
        cnt = 0;
        while (count--) {
                c = *b++;
                if (c == ASCII_LINEFEED || c == ASCII_CAR_RET ||
                    c == ASCII_BACKSPACE || vc->vc_need_wrap) {
                        if (cnt && con_is_visible(vc))
                                vc->vc_sw->con_putcs(vc, start, cnt, vc->state.y, start_x);
                        cnt = 0;
                        if (c == ASCII_BACKSPACE) {
                                bs(vc);
                                start = (ushort *)vc->vc_pos;
                                start_x = vc->state.x;
                                continue;
                        }
                        if (c != ASCII_CAR_RET)
                                lf(vc);
                        cr(vc);
                        start = (ushort *)vc->vc_pos;
                        start_x = vc->state.x;
                        if (c == ASCII_LINEFEED || c == ASCII_CAR_RET)
                                continue;
                }
                vc_uniscr_putc(vc, c);
                scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
                notify_write(vc, c);
                cnt++;
                if (vc->state.x == vc->vc_cols - 1) {
                        vc->vc_need_wrap = 1;
                } else {
                        vc->vc_pos += 2;
                        vc->state.x++;
                }
        }
        if (cnt && con_is_visible(vc))
                vc->vc_sw->con_putcs(vc, start, cnt, vc->state.y, start_x);
        set_cursor(vc);
        notify_update(vc);

quit:
        spin_unlock(&printing_lock);
}

static struct tty_driver *vt_console_device(struct console *c, int *index)
{
        *index = c->index ? c->index-1 : fg_console;
        return console_driver;
}

static int vt_console_setup(struct console *co, char *options)
{
        return co->index >= MAX_NR_CONSOLES ? -EINVAL : 0;
}

static struct console vt_console_driver = {
        .name                = "tty",
        .setup                = vt_console_setup,
        .write                = vt_console_print,
        .device                = vt_console_device,
        .unblank        = unblank_screen,
        .flags                = CON_PRINTBUFFER,
        .index                = -1,
};
#endif

/*
 *        Handling of Linux-specific VC ioctls
 */

/*
 * Generally a bit racy with respect to console_lock();.
 *
 * There are some functions which don't need it.
 *
 * There are some functions which can sleep for arbitrary periods
 * (paste_selection) but we don't need the lock there anyway.
 *
 * set_selection_user has locking, and definitely needs it
 */

int tioclinux(struct tty_struct *tty, unsigned long arg)
{
        char type, data;
        char __user *p = (char __user *)arg;
        void __user *param_aligned32 = (u32 __user *)arg + 1;
        void __user *param = (void __user *)arg + 1;
        int lines;
        int ret;

        if (current->signal->tty != tty && !capable(CAP_SYS_ADMIN))
                return -EPERM;
        if (get_user(type, p))
                return -EFAULT;
        ret = 0;

        switch (type) {
        case TIOCL_SETSEL:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                return set_selection_user(param, tty);
        case TIOCL_PASTESEL:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                return paste_selection(tty);
        case TIOCL_UNBLANKSCREEN:
                console_lock();
                unblank_screen();
                console_unlock();
                break;
        case TIOCL_SELLOADLUT:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                return sel_loadlut(param_aligned32);
        case TIOCL_GETSHIFTSTATE:
                /*
                 * Make it possible to react to Shift+Mousebutton. Note that
                 * 'shift_state' is an undocumented kernel-internal variable;
                 * programs not closely related to the kernel should not use
                 * this.
                 */
                data = vt_get_shift_state();
                return put_user(data, p);
        case TIOCL_GETMOUSEREPORTING:
                console_lock();        /* May be overkill */
                data = mouse_reporting();
                console_unlock();
                return put_user(data, p);
        case TIOCL_SETVESABLANK:
                return set_vesa_blanking(param);
        case TIOCL_GETKMSGREDIRECT:
                data = vt_get_kmsg_redirect();
                return put_user(data, p);
        case TIOCL_SETKMSGREDIRECT:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;

                if (get_user(data, p+1))
                        return -EFAULT;

                vt_kmsg_redirect(data);

                break;
        case TIOCL_GETFGCONSOLE:
                /*
                 * No locking needed as this is a transiently correct return
                 * anyway if the caller hasn't disabled switching.
                 */
                return fg_console;
        case TIOCL_SCROLLCONSOLE:
                if (get_user(lines, (s32 __user *)param_aligned32))
                        return -EFAULT;

                /*
                 * Needs the console lock here. Note that lots of other calls
                 * need fixing before the lock is actually useful!
                 */
                console_lock();
                scrollfront(vc_cons[fg_console].d, lines);
                console_unlock();
                break;
        case TIOCL_BLANKSCREEN:        /* until explicitly unblanked, not only poked */
                console_lock();
                ignore_poke = 1;
                do_blank_screen(0);
                console_unlock();
                break;
        case TIOCL_BLANKEDSCREEN:
                return console_blanked;
        default:
                return -EINVAL;
        }

        return ret;
}

/*
 * /dev/ttyN handling
 */

static ssize_t con_write(struct tty_struct *tty, const u8 *buf, size_t count)
{
        int        retval;

        retval = do_con_write(tty, buf, count);
        con_flush_chars(tty);

        return retval;
}

static int con_put_char(struct tty_struct *tty, u8 ch)
{
        return do_con_write(tty, &ch, 1);
}

static unsigned int con_write_room(struct tty_struct *tty)
{
        if (tty->flow.stopped)
                return 0;
        return 32768;                /* No limit, really; we're not buffering */
}

/*
 * con_throttle and con_unthrottle are only used for
 * paste_selection(), which has to stuff in a large number of
 * characters...
 */
static void con_throttle(struct tty_struct *tty)
{
}

static void con_unthrottle(struct tty_struct *tty)
{
        struct vc_data *vc = tty->driver_data;

        wake_up_interruptible(&vc->paste_wait);
}

/*
 * Turn the Scroll-Lock LED on when the tty is stopped
 */
static void con_stop(struct tty_struct *tty)
{
        int console_num;
        if (!tty)
                return;
        console_num = tty->index;
        if (!vc_cons_allocated(console_num))
                return;
        vt_kbd_con_stop(console_num);
}

/*
 * Turn the Scroll-Lock LED off when the console is started
 */
static void con_start(struct tty_struct *tty)
{
        int console_num;
        if (!tty)
                return;
        console_num = tty->index;
        if (!vc_cons_allocated(console_num))
                return;
        vt_kbd_con_start(console_num);
}

static void con_flush_chars(struct tty_struct *tty)
{
        struct vc_data *vc = tty->driver_data;

        if (in_interrupt())        /* from flush_to_ldisc */
                return;

        console_lock();
        set_cursor(vc);
        console_unlock();
}

/*
 * Allocate the console screen memory.
 */
static int con_install(struct tty_driver *driver, struct tty_struct *tty)
{
        unsigned int currcons = tty->index;
        struct vc_data *vc;
        int ret;

        console_lock();
        ret = vc_allocate(currcons);
        if (ret)
                goto unlock;

        vc = vc_cons[currcons].d;

        /* Still being freed */
        if (vc->port.tty) {
                ret = -ERESTARTSYS;
                goto unlock;
        }

        ret = tty_port_install(&vc->port, driver, tty);
        if (ret)
                goto unlock;

        tty->driver_data = vc;
        vc->port.tty = tty;
        tty_port_get(&vc->port);

        if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
                tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
                tty->winsize.ws_col = vc_cons[currcons].d->vc_cols;
        }
        if (vc->vc_utf)
                tty->termios.c_iflag |= IUTF8;
        else
                tty->termios.c_iflag &= ~IUTF8;
unlock:
        console_unlock();
        return ret;
}

static int con_open(struct tty_struct *tty, struct file *filp)
{
        /* everything done in install */
        return 0;
}


static void con_close(struct tty_struct *tty, struct file *filp)
{
        /* Nothing to do - we defer to shutdown */
}

static void con_shutdown(struct tty_struct *tty)
{
        struct vc_data *vc = tty->driver_data;
        BUG_ON(vc == NULL);
        console_lock();
        vc->port.tty = NULL;
        console_unlock();
}

static void con_cleanup(struct tty_struct *tty)
{
        struct vc_data *vc = tty->driver_data;

        tty_port_put(&vc->port);
}

static int default_color           = 7; /* white */
static int default_italic_color    = 2; // green (ASCII)
static int default_underline_color = 3; // cyan (ASCII)
module_param_named(color, default_color, int, S_IRUGO | S_IWUSR);
module_param_named(italic, default_italic_color, int, S_IRUGO | S_IWUSR);
module_param_named(underline, default_underline_color, int, S_IRUGO | S_IWUSR);

static void vc_init(struct vc_data *vc, int do_clear)
{
        int j, k ;

        set_origin(vc);
        vc->vc_pos = vc->vc_origin;
        reset_vc(vc);
        for (j=k=0; j<16; j++) {
                vc->vc_palette[k++] = default_red[j] ;
                vc->vc_palette[k++] = default_grn[j] ;
                vc->vc_palette[k++] = default_blu[j] ;
        }
        vc->vc_def_color       = default_color;
        vc->vc_ulcolor         = default_underline_color;
        vc->vc_itcolor         = default_italic_color;
        vc->vc_halfcolor       = 0x08;   /* grey */
        init_waitqueue_head(&vc->paste_wait);
        reset_terminal(vc, do_clear);
}

/*
 * This routine initializes console interrupts, and does nothing
 * else. If you want the screen to clear, call tty_write with
 * the appropriate escape-sequence.
 */

static int __init con_init(void)
{
        const char *display_desc = NULL;
        struct vc_data *vc;
        unsigned int currcons = 0, i;

        console_lock();

        if (!conswitchp)
                conswitchp = &dummy_con;
        display_desc = conswitchp->con_startup();
        if (!display_desc) {
                fg_console = 0;
                console_unlock();
                return 0;
        }

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                struct con_driver *con_driver = &registered_con_driver[i];

                if (con_driver->con == NULL) {
                        con_driver->con = conswitchp;
                        con_driver->desc = display_desc;
                        con_driver->flag = CON_DRIVER_FLAG_INIT;
                        con_driver->first = 0;
                        con_driver->last = MAX_NR_CONSOLES - 1;
                        break;
                }
        }

        for (i = 0; i < MAX_NR_CONSOLES; i++)
                con_driver_map[i] = conswitchp;

        if (blankinterval) {
                blank_state = blank_normal_wait;
                mod_timer(&console_timer, jiffies + (blankinterval * HZ));
        }

        for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
                vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT);
                INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
                tty_port_init(&vc->port);
                visual_init(vc, currcons, true);
                /* Assuming vc->vc_{cols,rows,screenbuf_size} are sane here. */
                vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
                vc_init(vc, currcons || !vc->vc_sw->con_save_screen);
        }
        currcons = fg_console = 0;
        master_display_fg = vc = vc_cons[currcons].d;
        set_origin(vc);
        save_screen(vc);
        gotoxy(vc, vc->state.x, vc->state.y);
        csi_J(vc, CSI_J_CURSOR_TO_END);
        update_screen(vc);
        pr_info("Console: %s %s %dx%d\n",
                vc->vc_can_do_color ? "colour" : "mono",
                display_desc, vc->vc_cols, vc->vc_rows);

        console_unlock();

#ifdef CONFIG_VT_CONSOLE
        register_console(&vt_console_driver);
#endif
        return 0;
}
console_initcall(con_init);

static const struct tty_operations con_ops = {
        .install = con_install,
        .open = con_open,
        .close = con_close,
        .write = con_write,
        .write_room = con_write_room,
        .put_char = con_put_char,
        .flush_chars = con_flush_chars,
        .ioctl = vt_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl = vt_compat_ioctl,
#endif
        .stop = con_stop,
        .start = con_start,
        .throttle = con_throttle,
        .unthrottle = con_unthrottle,
        .resize = vt_resize,
        .shutdown = con_shutdown,
        .cleanup = con_cleanup,
};

static struct cdev vc0_cdev;

static ssize_t show_tty_active(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        return sprintf(buf, "tty%d\n", fg_console + 1);
}
static DEVICE_ATTR(active, S_IRUGO, show_tty_active, NULL);

static struct attribute *vt_dev_attrs[] = {
        &dev_attr_active.attr,
        NULL
};

ATTRIBUTE_GROUPS(vt_dev);

int __init vty_init(const struct file_operations *console_fops)
{
        cdev_init(&vc0_cdev, console_fops);
        if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) ||
            register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
                panic("Couldn't register /dev/tty0 driver\n");
        tty0dev = device_create_with_groups(&tty_class, NULL,
                                            MKDEV(TTY_MAJOR, 0), NULL,
                                            vt_dev_groups, "tty0");
        if (IS_ERR(tty0dev))
                tty0dev = NULL;

        vcs_init();

        console_driver = tty_alloc_driver(MAX_NR_CONSOLES, TTY_DRIVER_REAL_RAW |
                        TTY_DRIVER_RESET_TERMIOS);
        if (IS_ERR(console_driver))
                panic("Couldn't allocate console driver\n");

        console_driver->name = "tty";
        console_driver->name_base = 1;
        console_driver->major = TTY_MAJOR;
        console_driver->minor_start = 1;
        console_driver->type = TTY_DRIVER_TYPE_CONSOLE;
        console_driver->init_termios = tty_std_termios;
        if (default_utf8)
                console_driver->init_termios.c_iflag |= IUTF8;
        tty_set_operations(console_driver, &con_ops);
        if (tty_register_driver(console_driver))
                panic("Couldn't register console driver\n");
        kbd_init();
        console_map_init();
#ifdef CONFIG_MDA_CONSOLE
        mda_console_init();
#endif
        return 0;
}

static const struct class vtconsole_class = {
        .name = "vtconsole",
};

static int do_bind_con_driver(const struct consw *csw, int first, int last,
                           int deflt)
{
        struct module *owner = csw->owner;
        const char *desc = NULL;
        struct con_driver *con_driver;
        int i, j = -1, k = -1, retval = -ENODEV;

        if (!try_module_get(owner))
                return -ENODEV;

        WARN_CONSOLE_UNLOCKED();

        /* check if driver is registered */
        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                con_driver = &registered_con_driver[i];

                if (con_driver->con == csw) {
                        desc = con_driver->desc;
                        retval = 0;
                        break;
                }
        }

        if (retval)
                goto err;

        if (!(con_driver->flag & CON_DRIVER_FLAG_INIT)) {
                csw->con_startup();
                con_driver->flag |= CON_DRIVER_FLAG_INIT;
        }

        if (deflt) {
                if (conswitchp)
                        module_put(conswitchp->owner);

                __module_get(owner);
                conswitchp = csw;
        }

        first = max(first, con_driver->first);
        last = min(last, con_driver->last);

        for (i = first; i <= last; i++) {
                int old_was_color;
                struct vc_data *vc = vc_cons[i].d;

                if (con_driver_map[i])
                        module_put(con_driver_map[i]->owner);
                __module_get(owner);
                con_driver_map[i] = csw;

                if (!vc || !vc->vc_sw)
                        continue;

                j = i;

                if (con_is_visible(vc)) {
                        k = i;
                        save_screen(vc);
                }

                old_was_color = vc->vc_can_do_color;
                vc->vc_sw->con_deinit(vc);
                vc->vc_origin = (unsigned long)vc->vc_screenbuf;
                visual_init(vc, i, false);
                set_origin(vc);
                update_attr(vc);

                /* If the console changed between mono <-> color, then
                 * the attributes in the screenbuf will be wrong.  The
                 * following resets all attributes to something sane.
                 */
                if (old_was_color != vc->vc_can_do_color)
                        clear_buffer_attributes(vc);
        }

        pr_info("Console: switching ");
        if (!deflt)
                pr_cont("consoles %d-%d ", first + 1, last + 1);
        if (j >= 0) {
                struct vc_data *vc = vc_cons[j].d;

                pr_cont("to %s %s %dx%d\n",
                        vc->vc_can_do_color ? "colour" : "mono",
                        desc, vc->vc_cols, vc->vc_rows);

                if (k >= 0) {
                        vc = vc_cons[k].d;
                        update_screen(vc);
                }
        } else {
                pr_cont("to %s\n", desc);
        }

        retval = 0;
err:
        module_put(owner);
        return retval;
};


#ifdef CONFIG_VT_HW_CONSOLE_BINDING
int do_unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
{
        struct module *owner = csw->owner;
        const struct consw *defcsw = NULL;
        struct con_driver *con_driver = NULL, *con_back = NULL;
        int i, retval = -ENODEV;

        if (!try_module_get(owner))
                return -ENODEV;

        WARN_CONSOLE_UNLOCKED();

        /* check if driver is registered and if it is unbindable */
        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                con_driver = &registered_con_driver[i];

                if (con_driver->con == csw &&
                    con_driver->flag & CON_DRIVER_FLAG_MODULE) {
                        retval = 0;
                        break;
                }
        }

        if (retval)
                goto err;

        retval = -ENODEV;

        /* check if backup driver exists */
        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                con_back = &registered_con_driver[i];

                if (con_back->con && con_back->con != csw) {
                        defcsw = con_back->con;
                        retval = 0;
                        break;
                }
        }

        if (retval)
                goto err;

        if (!con_is_bound(csw))
                goto err;

        first = max(first, con_driver->first);
        last = min(last, con_driver->last);

        for (i = first; i <= last; i++) {
                if (con_driver_map[i] == csw) {
                        module_put(csw->owner);
                        con_driver_map[i] = NULL;
                }
        }

        if (!con_is_bound(defcsw)) {
                const struct consw *defconsw = conswitchp;

                defcsw->con_startup();
                con_back->flag |= CON_DRIVER_FLAG_INIT;
                /*
                 * vgacon may change the default driver to point
                 * to dummycon, we restore it here...
                 */
                conswitchp = defconsw;
        }

        if (!con_is_bound(csw))
                con_driver->flag &= ~CON_DRIVER_FLAG_INIT;

        /* ignore return value, binding should not fail */
        do_bind_con_driver(defcsw, first, last, deflt);
err:
        module_put(owner);
        return retval;

}
EXPORT_SYMBOL_GPL(do_unbind_con_driver);

static int vt_bind(struct con_driver *con)
{
        const struct consw *defcsw = NULL, *csw = NULL;
        int i, more = 1, first = -1, last = -1, deflt = 0;

         if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE))
                goto err;

        csw = con->con;

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                struct con_driver *con = &registered_con_driver[i];

                if (con->con && !(con->flag & CON_DRIVER_FLAG_MODULE)) {
                        defcsw = con->con;
                        break;
                }
        }

        if (!defcsw)
                goto err;

        while (more) {
                more = 0;

                for (i = con->first; i <= con->last; i++) {
                        if (con_driver_map[i] == defcsw) {
                                if (first == -1)
                                        first = i;
                                last = i;
                                more = 1;
                        } else if (first != -1)
                                break;
                }

                if (first == 0 && last == MAX_NR_CONSOLES -1)
                        deflt = 1;

                if (first != -1)
                        do_bind_con_driver(csw, first, last, deflt);

                first = -1;
                last = -1;
                deflt = 0;
        }

err:
        return 0;
}

static int vt_unbind(struct con_driver *con)
{
        const struct consw *csw = NULL;
        int i, more = 1, first = -1, last = -1, deflt = 0;
        int ret;

         if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE))
                goto err;

        csw = con->con;

        while (more) {
                more = 0;

                for (i = con->first; i <= con->last; i++) {
                        if (con_driver_map[i] == csw) {
                                if (first == -1)
                                        first = i;
                                last = i;
                                more = 1;
                        } else if (first != -1)
                                break;
                }

                if (first == 0 && last == MAX_NR_CONSOLES -1)
                        deflt = 1;

                if (first != -1) {
                        ret = do_unbind_con_driver(csw, first, last, deflt);
                        if (ret != 0)
                                return ret;
                }

                first = -1;
                last = -1;
                deflt = 0;
        }

err:
        return 0;
}
#else
static inline int vt_bind(struct con_driver *con)
{
        return 0;
}
static inline int vt_unbind(struct con_driver *con)
{
        return 0;
}
#endif /* CONFIG_VT_HW_CONSOLE_BINDING */

static ssize_t store_bind(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count)
{
        struct con_driver *con = dev_get_drvdata(dev);
        int bind = simple_strtoul(buf, NULL, 0);

        console_lock();

        if (bind)
                vt_bind(con);
        else
                vt_unbind(con);

        console_unlock();

        return count;
}

static ssize_t show_bind(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct con_driver *con = dev_get_drvdata(dev);
        int bind;

        console_lock();
        bind = con_is_bound(con->con);
        console_unlock();

        return sysfs_emit(buf, "%i\n", bind);
}

static ssize_t show_name(struct device *dev, struct device_attribute *attr,
                         char *buf)
{
        struct con_driver *con = dev_get_drvdata(dev);

        return sysfs_emit(buf, "%s %s\n",
                        (con->flag & CON_DRIVER_FLAG_MODULE) ? "(M)" : "(S)",
                         con->desc);

}

static DEVICE_ATTR(bind, S_IRUGO|S_IWUSR, show_bind, store_bind);
static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);

static struct attribute *con_dev_attrs[] = {
        &dev_attr_bind.attr,
        &dev_attr_name.attr,
        NULL
};

ATTRIBUTE_GROUPS(con_dev);

static int vtconsole_init_device(struct con_driver *con)
{
        con->flag |= CON_DRIVER_FLAG_ATTR;
        return 0;
}

static void vtconsole_deinit_device(struct con_driver *con)
{
        con->flag &= ~CON_DRIVER_FLAG_ATTR;
}

/**
 * con_is_bound - checks if driver is bound to the console
 * @csw: console driver
 *
 * RETURNS: zero if unbound, nonzero if bound
 *
 * Drivers can call this and if zero, they should release
 * all resources allocated on &consw.con_startup()
 */
int con_is_bound(const struct consw *csw)
{
        int i, bound = 0;

        WARN_CONSOLE_UNLOCKED();

        for (i = 0; i < MAX_NR_CONSOLES; i++) {
                if (con_driver_map[i] == csw) {
                        bound = 1;
                        break;
                }
        }

        return bound;
}
EXPORT_SYMBOL(con_is_bound);

/**
 * con_is_visible - checks whether the current console is visible
 * @vc: virtual console
 *
 * RETURNS: zero if not visible, nonzero if visible
 */
bool con_is_visible(const struct vc_data *vc)
{
        WARN_CONSOLE_UNLOCKED();

        return *vc->vc_display_fg == vc;
}
EXPORT_SYMBOL(con_is_visible);

/**
 * con_debug_enter - prepare the console for the kernel debugger
 * @vc: virtual console
 *
 * Called when the console is taken over by the kernel debugger, this
 * function needs to save the current console state, then put the console
 * into a state suitable for the kernel debugger.
 */
void con_debug_enter(struct vc_data *vc)
{
        saved_fg_console = fg_console;
        saved_last_console = last_console;
        saved_want_console = want_console;
        saved_vc_mode = vc->vc_mode;
        saved_console_blanked = console_blanked;
        vc->vc_mode = KD_TEXT;
        console_blanked = 0;
        if (vc->vc_sw->con_debug_enter)
                vc->vc_sw->con_debug_enter(vc);
#ifdef CONFIG_KGDB_KDB
        /* Set the initial LINES variable if it is not already set */
        if (vc->vc_rows < 999) {
                int linecount;
                char lns[4];
                const char *setargs[3] = {
                        "set",
                        "LINES",
                        lns,
                };
                if (kdbgetintenv(setargs[0], &linecount)) {
                        snprintf(lns, 4, "%i", vc->vc_rows);
                        kdb_set(2, setargs);
                }
        }
        if (vc->vc_cols < 999) {
                int colcount;
                char cols[4];
                const char *setargs[3] = {
                        "set",
                        "COLUMNS",
                        cols,
                };
                if (kdbgetintenv(setargs[0], &colcount)) {
                        snprintf(cols, 4, "%i", vc->vc_cols);
                        kdb_set(2, setargs);
                }
        }
#endif /* CONFIG_KGDB_KDB */
}
EXPORT_SYMBOL_GPL(con_debug_enter);

/**
 * con_debug_leave - restore console state
 *
 * Restore the console state to what it was before the kernel debugger
 * was invoked.
 */
void con_debug_leave(void)
{
        struct vc_data *vc;

        fg_console = saved_fg_console;
        last_console = saved_last_console;
        want_console = saved_want_console;
        console_blanked = saved_console_blanked;
        vc_cons[fg_console].d->vc_mode = saved_vc_mode;

        vc = vc_cons[fg_console].d;
        if (vc->vc_sw->con_debug_leave)
                vc->vc_sw->con_debug_leave(vc);
}
EXPORT_SYMBOL_GPL(con_debug_leave);

static int do_register_con_driver(const struct consw *csw, int first, int last)
{
        struct module *owner = csw->owner;
        struct con_driver *con_driver;
        const char *desc;
        int i, retval;

        WARN_CONSOLE_UNLOCKED();

        if (!try_module_get(owner))
                return -ENODEV;

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                con_driver = &registered_con_driver[i];

                /* already registered */
                if (con_driver->con == csw) {
                        retval = -EBUSY;
                        goto err;
                }
        }

        desc = csw->con_startup();
        if (!desc) {
                retval = -ENODEV;
                goto err;
        }

        retval = -EINVAL;

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                con_driver = &registered_con_driver[i];

                if (con_driver->con == NULL &&
                    !(con_driver->flag & CON_DRIVER_FLAG_ZOMBIE)) {
                        con_driver->con = csw;
                        con_driver->desc = desc;
                        con_driver->node = i;
                        con_driver->flag = CON_DRIVER_FLAG_MODULE |
                                           CON_DRIVER_FLAG_INIT;
                        con_driver->first = first;
                        con_driver->last = last;
                        retval = 0;
                        break;
                }
        }

        if (retval)
                goto err;

        con_driver->dev =
                device_create_with_groups(&vtconsole_class, NULL,
                                          MKDEV(0, con_driver->node),
                                          con_driver, con_dev_groups,
                                          "vtcon%i", con_driver->node);
        if (IS_ERR(con_driver->dev)) {
                pr_warn("Unable to create device for %s; errno = %ld\n",
                        con_driver->desc, PTR_ERR(con_driver->dev));
                con_driver->dev = NULL;
        } else {
                vtconsole_init_device(con_driver);
        }

err:
        module_put(owner);
        return retval;
}


/**
 * do_unregister_con_driver - unregister console driver from console layer
 * @csw: console driver
 *
 * DESCRIPTION: All drivers that registers to the console layer must
 * call this function upon exit, or if the console driver is in a state
 * where it won't be able to handle console services, such as the
 * framebuffer console without loaded framebuffer drivers.
 *
 * The driver must unbind first prior to unregistration.
 */
int do_unregister_con_driver(const struct consw *csw)
{
        int i;

        /* cannot unregister a bound driver */
        if (con_is_bound(csw))
                return -EBUSY;

        if (csw == conswitchp)
                return -EINVAL;

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                struct con_driver *con_driver = &registered_con_driver[i];

                if (con_driver->con == csw) {
                        /*
                         * Defer the removal of the sysfs entries since that
                         * will acquire the kernfs s_active lock and we can't
                         * acquire this lock while holding the console lock:
                         * the unbind sysfs entry imposes already the opposite
                         * order. Reset con already here to prevent any later
                         * lookup to succeed and mark this slot as zombie, so
                         * it won't get reused until we complete the removal
                         * in the deferred work.
                         */
                        con_driver->con = NULL;
                        con_driver->flag = CON_DRIVER_FLAG_ZOMBIE;
                        schedule_work(&con_driver_unregister_work);

                        return 0;
                }
        }

        return -ENODEV;
}
EXPORT_SYMBOL_GPL(do_unregister_con_driver);

static void con_driver_unregister_callback(struct work_struct *ignored)
{
        int i;

        console_lock();

        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                struct con_driver *con_driver = &registered_con_driver[i];

                if (!(con_driver->flag & CON_DRIVER_FLAG_ZOMBIE))
                        continue;

                console_unlock();

                vtconsole_deinit_device(con_driver);
                device_destroy(&vtconsole_class, MKDEV(0, con_driver->node));

                console_lock();

                if (WARN_ON_ONCE(con_driver->con))
                        con_driver->con = NULL;
                con_driver->desc = NULL;
                con_driver->dev = NULL;
                con_driver->node = 0;
                WARN_ON_ONCE(con_driver->flag != CON_DRIVER_FLAG_ZOMBIE);
                con_driver->flag = 0;
                con_driver->first = 0;
                con_driver->last = 0;
        }

        console_unlock();
}

/*
 *        If we support more console drivers, this function is used
 *        when a driver wants to take over some existing consoles
 *        and become default driver for newly opened ones.
 *
 *        do_take_over_console is basically a register followed by bind
 */
int do_take_over_console(const struct consw *csw, int first, int last, int deflt)
{
        int err;

        err = do_register_con_driver(csw, first, last);
        /*
         * If we get an busy error we still want to bind the console driver
         * and return success, as we may have unbound the console driver
         * but not unregistered it.
         */
        if (err == -EBUSY)
                err = 0;
        if (!err)
                do_bind_con_driver(csw, first, last, deflt);

        return err;
}
EXPORT_SYMBOL_GPL(do_take_over_console);


/*
 * give_up_console is a wrapper to unregister_con_driver. It will only
 * work if driver is fully unbound.
 */
void give_up_console(const struct consw *csw)
{
        console_lock();
        do_unregister_con_driver(csw);
        console_unlock();
}
EXPORT_SYMBOL(give_up_console);

static int __init vtconsole_class_init(void)
{
        int i;

        i = class_register(&vtconsole_class);
        if (i)
                pr_warn("Unable to create vt console class; errno = %d\n", i);

        /* Add system drivers to sysfs */
        for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
                struct con_driver *con = &registered_con_driver[i];

                if (con->con && !con->dev) {
                        con->dev =
                                device_create_with_groups(&vtconsole_class, NULL,
                                                          MKDEV(0, con->node),
                                                          con, con_dev_groups,
                                                          "vtcon%i", con->node);

                        if (IS_ERR(con->dev)) {
                                pr_warn("Unable to create device for %s; errno = %ld\n",
                                        con->desc, PTR_ERR(con->dev));
                                con->dev = NULL;
                        } else {
                                vtconsole_init_device(con);
                        }
                }
        }

        return 0;
}
postcore_initcall(vtconsole_class_init);

/*
 *        Screen blanking
 */

static int set_vesa_blanking(u8 __user *mode_user)
{
        u8 mode;

        if (get_user(mode, mode_user))
                return -EFAULT;

        console_lock();
        vesa_blank_mode = (mode <= VESA_BLANK_MAX) ? mode : VESA_NO_BLANKING;
        console_unlock();

        return 0;
}

void do_blank_screen(int entering_gfx)
{
        struct vc_data *vc = vc_cons[fg_console].d;
        int i;

        might_sleep();

        WARN_CONSOLE_UNLOCKED();

        if (console_blanked) {
                if (blank_state == blank_vesa_wait) {
                        blank_state = blank_off;
                        vc->vc_sw->con_blank(vc, vesa_blank_mode + 1, 0);
                }
                return;
        }

        /* entering graphics mode? */
        if (entering_gfx) {
                hide_cursor(vc);
                save_screen(vc);
                vc->vc_sw->con_blank(vc, VESA_VSYNC_SUSPEND, 1);
                console_blanked = fg_console + 1;
                blank_state = blank_off;
                set_origin(vc);
                return;
        }

        blank_state = blank_off;

        /* don't blank graphics */
        if (vc->vc_mode != KD_TEXT) {
                console_blanked = fg_console + 1;
                return;
        }

        hide_cursor(vc);
        del_timer_sync(&console_timer);
        blank_timer_expired = 0;

        save_screen(vc);
        /* In case we need to reset origin, blanking hook returns 1 */
        i = vc->vc_sw->con_blank(vc, vesa_off_interval ? VESA_VSYNC_SUSPEND :
                                 (vesa_blank_mode + 1), 0);
        console_blanked = fg_console + 1;
        if (i)
                set_origin(vc);

        if (console_blank_hook && console_blank_hook(1))
                return;

        if (vesa_off_interval && vesa_blank_mode) {
                blank_state = blank_vesa_wait;
                mod_timer(&console_timer, jiffies + vesa_off_interval);
        }
        vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num);
}
EXPORT_SYMBOL(do_blank_screen);

/*
 * Called by timer as well as from vt_console_driver
 */
void do_unblank_screen(int leaving_gfx)
{
        struct vc_data *vc;

        /* This should now always be called from a "sane" (read: can schedule)
         * context for the sake of the low level drivers, except in the special
         * case of oops_in_progress
         */
        if (!oops_in_progress)
                might_sleep();

        WARN_CONSOLE_UNLOCKED();

        ignore_poke = 0;
        if (!console_blanked)
                return;
        if (!vc_cons_allocated(fg_console)) {
                /* impossible */
                pr_warn("unblank_screen: tty %d not allocated ??\n",
                        fg_console + 1);
                return;
        }
        vc = vc_cons[fg_console].d;
        if (vc->vc_mode != KD_TEXT)
                return; /* but leave console_blanked != 0 */

        if (blankinterval) {
                mod_timer(&console_timer, jiffies + (blankinterval * HZ));
                blank_state = blank_normal_wait;
        }

        console_blanked = 0;
        if (vc->vc_sw->con_blank(vc, VESA_NO_BLANKING, leaving_gfx))
                /* Low-level driver cannot restore -> do it ourselves */
                update_screen(vc);
        if (console_blank_hook)
                console_blank_hook(0);
        set_palette(vc);
        set_cursor(vc);
        vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
}
EXPORT_SYMBOL(do_unblank_screen);

/*
 * This is called by the outside world to cause a forced unblank, mostly for
 * oopses. Currently, I just call do_unblank_screen(0), but we could eventually
 * call it with 1 as an argument and so force a mode restore... that may kill
 * X or at least garbage the screen but would also make the Oops visible...
 */
static void unblank_screen(void)
{
        do_unblank_screen(0);
}

/*
 * We defer the timer blanking to work queue so it can take the console mutex
 * (console operations can still happen at irq time, but only from printk which
 * has the console mutex. Not perfect yet, but better than no locking
 */
static void blank_screen_t(struct timer_list *unused)
{
        blank_timer_expired = 1;
        schedule_work(&console_work);
}

void poke_blanked_console(void)
{
        WARN_CONSOLE_UNLOCKED();

        /* Add this so we quickly catch whoever might call us in a non
         * safe context. Nowadays, unblank_screen() isn't to be called in
         * atomic contexts and is allowed to schedule (with the special case
         * of oops_in_progress, but that isn't of any concern for this
         * function. --BenH.
         */
        might_sleep();

        /* This isn't perfectly race free, but a race here would be mostly harmless,
         * at worst, we'll do a spurious blank and it's unlikely
         */
        del_timer(&console_timer);
        blank_timer_expired = 0;

        if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS)
                return;
        if (console_blanked)
                unblank_screen();
        else if (blankinterval) {
                mod_timer(&console_timer, jiffies + (blankinterval * HZ));
                blank_state = blank_normal_wait;
        }
}

/*
 *        Palettes
 */

static void set_palette(struct vc_data *vc)
{
        WARN_CONSOLE_UNLOCKED();

        if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_set_palette)
                vc->vc_sw->con_set_palette(vc, color_table);
}

/*
 * Load palette into the DAC registers. arg points to a colour
 * map, 3 bytes per colour, 16 colours, range from 0 to 255.
 */

int con_set_cmap(unsigned char __user *arg)
{
        int i, j, k;
        unsigned char colormap[3*16];

        if (copy_from_user(colormap, arg, sizeof(colormap)))
                return -EFAULT;

        console_lock();
        for (i = k = 0; i < 16; i++) {
                default_red[i] = colormap[k++];
                default_grn[i] = colormap[k++];
                default_blu[i] = colormap[k++];
        }
        for (i = 0; i < MAX_NR_CONSOLES; i++) {
                if (!vc_cons_allocated(i))
                        continue;
                for (j = k = 0; j < 16; j++) {
                        vc_cons[i].d->vc_palette[k++] = default_red[j];
                        vc_cons[i].d->vc_palette[k++] = default_grn[j];
                        vc_cons[i].d->vc_palette[k++] = default_blu[j];
                }
                set_palette(vc_cons[i].d);
        }
        console_unlock();

        return 0;
}

int con_get_cmap(unsigned char __user *arg)
{
        int i, k;
        unsigned char colormap[3*16];

        console_lock();
        for (i = k = 0; i < 16; i++) {
                colormap[k++] = default_red[i];
                colormap[k++] = default_grn[i];
                colormap[k++] = default_blu[i];
        }
        console_unlock();

        if (copy_to_user(arg, colormap, sizeof(colormap)))
                return -EFAULT;

        return 0;
}

void reset_palette(struct vc_data *vc)
{
        int j, k;
        for (j=k=0; j<16; j++) {
                vc->vc_palette[k++] = default_red[j];
                vc->vc_palette[k++] = default_grn[j];
                vc->vc_palette[k++] = default_blu[j];
        }
        set_palette(vc);
}

/*
 *  Font switching
 *
 *  Currently we only support fonts up to 128 pixels wide, at a maximum height
 *  of 128 pixels. Userspace fontdata may have to be stored with 32 bytes
 *  (shorts/ints, depending on width) reserved for each character which is
 *  kinda wasty, but this is done in order to maintain compatibility with the
 *  EGA/VGA fonts. It is up to the actual low-level console-driver convert data
 *  into its favorite format (maybe we should add a `fontoffset' field to the
 *  `display' structure so we won't have to convert the fontdata all the time.
 *  /Jes
 */

#define max_font_width        64
#define max_font_height        128
#define max_font_glyphs        512
#define max_font_size        (max_font_glyphs*max_font_width*max_font_height)

static int con_font_get(struct vc_data *vc, struct console_font_op *op)
{
        struct console_font font;
        int rc = -EINVAL;
        int c;
        unsigned int vpitch = op->op == KD_FONT_OP_GET_TALL ? op->height : 32;

        if (vpitch > max_font_height)
                return -EINVAL;

        if (op->data) {
                font.data = kvmalloc(max_font_size, GFP_KERNEL);
                if (!font.data)
                        return -ENOMEM;
        } else
                font.data = NULL;

        console_lock();
        if (vc->vc_mode != KD_TEXT)
                rc = -EINVAL;
        else if (vc->vc_sw->con_font_get)
                rc = vc->vc_sw->con_font_get(vc, &font, vpitch);
        else
                rc = -ENOSYS;
        console_unlock();

        if (rc)
                goto out;

        c = (font.width+7)/8 * vpitch * font.charcount;

        if (op->data && font.charcount > op->charcount)
                rc = -ENOSPC;
        if (font.width > op->width || font.height > op->height)
                rc = -ENOSPC;
        if (rc)
                goto out;

        op->height = font.height;
        op->width = font.width;
        op->charcount = font.charcount;

        if (op->data && copy_to_user(op->data, font.data, c))
                rc = -EFAULT;

out:
        kvfree(font.data);
        return rc;
}

static int con_font_set(struct vc_data *vc, const struct console_font_op *op)
{
        struct console_font font;
        int rc = -EINVAL;
        int size;
        unsigned int vpitch = op->op == KD_FONT_OP_SET_TALL ? op->height : 32;

        if (vc->vc_mode != KD_TEXT)
                return -EINVAL;
        if (!op->data)
                return -EINVAL;
        if (op->charcount > max_font_glyphs)
                return -EINVAL;
        if (op->width <= 0 || op->width > max_font_width || !op->height ||
            op->height > max_font_height)
                return -EINVAL;
        if (vpitch < op->height)
                return -EINVAL;
        size = (op->width+7)/8 * vpitch * op->charcount;
        if (size > max_font_size)
                return -ENOSPC;

        font.data = memdup_user(op->data, size);
        if (IS_ERR(font.data))
                return PTR_ERR(font.data);

        font.charcount = op->charcount;
        font.width = op->width;
        font.height = op->height;

        console_lock();
        if (vc->vc_mode != KD_TEXT)
                rc = -EINVAL;
        else if (vc->vc_sw->con_font_set) {
                if (vc_is_sel(vc))
                        clear_selection();
                rc = vc->vc_sw->con_font_set(vc, &font, vpitch, op->flags);
        } else
                rc = -ENOSYS;
        console_unlock();
        kfree(font.data);
        return rc;
}

static int con_font_default(struct vc_data *vc, struct console_font_op *op)
{
        struct console_font font = {.width = op->width, .height = op->height};
        char name[MAX_FONT_NAME];
        char *s = name;
        int rc;


        if (!op->data)
                s = NULL;
        else if (strncpy_from_user(name, op->data, MAX_FONT_NAME - 1) < 0)
                return -EFAULT;
        else
                name[MAX_FONT_NAME - 1] = 0;

        console_lock();
        if (vc->vc_mode != KD_TEXT) {
                console_unlock();
                return -EINVAL;
        }
        if (vc->vc_sw->con_font_default) {
                if (vc_is_sel(vc))
                        clear_selection();
                rc = vc->vc_sw->con_font_default(vc, &font, s);
        } else
                rc = -ENOSYS;
        console_unlock();
        if (!rc) {
                op->width = font.width;
                op->height = font.height;
        }
        return rc;
}

int con_font_op(struct vc_data *vc, struct console_font_op *op)
{
        switch (op->op) {
        case KD_FONT_OP_SET:
        case KD_FONT_OP_SET_TALL:
                return con_font_set(vc, op);
        case KD_FONT_OP_GET:
        case KD_FONT_OP_GET_TALL:
                return con_font_get(vc, op);
        case KD_FONT_OP_SET_DEFAULT:
                return con_font_default(vc, op);
        case KD_FONT_OP_COPY:
                /* was buggy and never really used */
                return -EINVAL;
        }
        return -ENOSYS;
}

/*
 *        Interface exported to selection and vcs.
 */

/* used by selection */
u16 screen_glyph(const struct vc_data *vc, int offset)
{
        u16 w = scr_readw(screenpos(vc, offset, true));
        u16 c = w & 0xff;

        if (w & vc->vc_hi_font_mask)
                c |= 0x100;
        return c;
}
EXPORT_SYMBOL_GPL(screen_glyph);

u32 screen_glyph_unicode(const struct vc_data *vc, int n)
{
        u32 **uni_lines = vc->vc_uni_lines;

        if (uni_lines)
                return uni_lines[n / vc->vc_cols][n % vc->vc_cols];

        return inverse_translate(vc, screen_glyph(vc, n * 2), true);
}
EXPORT_SYMBOL_GPL(screen_glyph_unicode);

/* used by vcs - note the word offset */
unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed)
{
        return screenpos(vc, 2 * w_offset, viewed);
}
EXPORT_SYMBOL_GPL(screen_pos);

void getconsxy(const struct vc_data *vc, unsigned char xy[static 2])
{
        /* clamp values if they don't fit */
        xy[0] = min(vc->state.x, 0xFFu);
        xy[1] = min(vc->state.y, 0xFFu);
}

void putconsxy(struct vc_data *vc, unsigned char xy[static const 2])
{
        hide_cursor(vc);
        gotoxy(vc, xy[0], xy[1]);
        set_cursor(vc);
}

u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org)
{
        if ((unsigned long)org == vc->vc_pos && softcursor_original != -1)
                return softcursor_original;
        return scr_readw(org);
}

void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org)
{
        scr_writew(val, org);
        if ((unsigned long)org == vc->vc_pos) {
                softcursor_original = -1;
                add_softcursor(vc);
        }
}

void vcs_scr_updated(struct vc_data *vc)
{
        notify_update(vc);
}


















  522 



  522 

  524 
  522 
  523 
  524 







































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
 * Stack trace management functions
 *
 *  Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 */
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <linux/export.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>

void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
                     struct task_struct *task, struct pt_regs *regs)
{
        struct unwind_state state;
        unsigned long addr;

        if (regs && !consume_entry(cookie, regs->ip))
                return;

        for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
             unwind_next_frame(&state)) {
                addr = unwind_get_return_address(&state);
                if (!addr || !consume_entry(cookie, addr))
                        break;
        }
}

int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
                             void *cookie, struct task_struct *task)
{
        struct unwind_state state;
        struct pt_regs *regs;
        unsigned long addr;

        for (unwind_start(&state, task, NULL, NULL);
             !unwind_done(&state) && !unwind_error(&state);
             unwind_next_frame(&state)) {

                regs = unwind_get_entry_regs(&state, NULL);
                if (regs) {
                        /* Success path for user tasks */
                        if (user_mode(regs))
                                return 0;

                        /*
                         * Kernel mode registers on the stack indicate an
                         * in-kernel interrupt or exception (e.g., preemption
                         * or a page fault), which can make frame pointers
                         * unreliable.
                         */
                        if (IS_ENABLED(CONFIG_FRAME_POINTER))
                                return -EINVAL;
                }

                addr = unwind_get_return_address(&state);

                /*
                 * A NULL or invalid return address probably means there's some
                 * generated code which __kernel_text_address() doesn't know
                 * about.
                 */
                if (!addr)
                        return -EINVAL;

                if (!consume_entry(cookie, addr))
                        return -EINVAL;
        }

        /* Check for stack corruption */
        if (unwind_error(&state))
                return -EINVAL;

        return 0;
}

/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */

struct stack_frame_user {
        const void __user        *next_fp;
        unsigned long                ret_addr;
};

static int
copy_stack_frame(const struct stack_frame_user __user *fp,
                 struct stack_frame_user *frame)
{
        int ret;

        if (!__access_ok(fp, sizeof(*frame)))
                return 0;

        ret = 1;
        pagefault_disable();
        if (__get_user(frame->next_fp, &fp->next_fp) ||
            __get_user(frame->ret_addr, &fp->ret_addr))
                ret = 0;
        pagefault_enable();

        return ret;
}

void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
                          const struct pt_regs *regs)
{
        const void __user *fp = (const void __user *)regs->bp;

        if (!consume_entry(cookie, regs->ip))
                return;

        while (1) {
                struct stack_frame_user frame;

                frame.next_fp = NULL;
                frame.ret_addr = 0;
                if (!copy_stack_frame(fp, &frame))
                        break;
                if ((unsigned long)fp < regs->sp)
                        break;
                if (!frame.ret_addr)
                        break;
                if (!consume_entry(cookie, frame.ret_addr))
                        break;
                fp = frame.next_fp;
        }
}









































   74 














   73 





   74 






















































   74 

   73 
   72 



   73 

   74 











   73 
   74 
   73 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/fault-inject-usercopy.h>
#include <linux/kasan-checks.h>
#include <linux/thread_info.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/mm.h>

#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>

#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
#define IS_UNALIGNED(src, dst)        0
#else
#define IS_UNALIGNED(src, dst)        \
        (((long) dst | (long) src) & (sizeof(long) - 1))
#endif

/*
 * Do a strncpy, return length of string without final '\0'.
 * 'count' is the user-supplied count (return 'count' if we
 * hit it), 'max' is the address space maximum (and we return
 * -EFAULT if we hit it).
 */
static __always_inline long do_strncpy_from_user(char *dst, const char __user *src,
                                        unsigned long count, unsigned long max)
{
        const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
        unsigned long res = 0;

        if (IS_UNALIGNED(src, dst))
                goto byte_at_a_time;

        while (max >= sizeof(unsigned long)) {
                unsigned long c, data, mask;

                /* Fall back to byte-at-a-time if we get a page fault */
                unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);

                /*
                 * Note that we mask out the bytes following the NUL. This is
                 * important to do because string oblivious code may read past
                 * the NUL. For those routines, we don't want to give them
                 * potentially random bytes after the NUL in `src`.
                 *
                 * One example of such code is BPF map keys. BPF treats map keys
                 * as an opaque set of bytes. Without the post-NUL mask, any BPF
                 * maps keyed by strings returned from strncpy_from_user() may
                 * have multiple entries for semantically identical strings.
                 */
                if (has_zero(c, &data, &constants)) {
                        data = prep_zero_mask(c, data, &constants);
                        data = create_zero_mask(data);
                        mask = zero_bytemask(data);
                        *(unsigned long *)(dst+res) = c & mask;
                        return res + find_zero(data);
                }

                *(unsigned long *)(dst+res) = c;

                res += sizeof(unsigned long);
                max -= sizeof(unsigned long);
        }

byte_at_a_time:
        while (max) {
                char c;

                unsafe_get_user(c,src+res, efault);
                dst[res] = c;
                if (!c)
                        return res;
                res++;
                max--;
        }

        /*
         * Uhhuh. We hit 'max'. But was that the user-specified maximum
         * too? If so, that's ok - we got as much as the user asked for.
         */
        if (res >= count)
                return res;

        /*
         * Nope: we hit the address space limit, and we still had more
         * characters the caller would have wanted. That's an EFAULT.
         */
efault:
        return -EFAULT;
}

/**
 * strncpy_from_user: - Copy a NUL terminated string from userspace.
 * @dst:   Destination address, in kernel space.  This buffer must be at
 *         least @count bytes long.
 * @src:   Source address, in user space.
 * @count: Maximum number of bytes to copy, including the trailing NUL.
 *
 * Copies a NUL-terminated string from userspace to kernel space.
 *
 * On success, returns the length of the string (not including the trailing
 * NUL).
 *
 * If access to userspace fails, returns -EFAULT (some data may have been
 * copied).
 *
 * If @count is smaller than the length of the string, copies @count bytes
 * and returns @count.
 */
long strncpy_from_user(char *dst, const char __user *src, long count)
{
        unsigned long max_addr, src_addr;

        might_fault();
        if (should_fail_usercopy())
                return -EFAULT;
        if (unlikely(count <= 0))
                return 0;

        max_addr = TASK_SIZE_MAX;
        src_addr = (unsigned long)untagged_addr(src);
        if (likely(src_addr < max_addr)) {
                unsigned long max = max_addr - src_addr;
                long retval;

                /*
                 * Truncate 'max' to the user-specified limit, so that
                 * we only have one limit we need to check in the loop
                 */
                if (max > count)
                        max = count;

                kasan_check_write(dst, count);
                check_object_size(dst, count, false);
                if (user_read_access_begin(src, max)) {
                        retval = do_strncpy_from_user(dst, src, count, max);
                        user_read_access_end();
                        return retval;
                }
        }
        return -EFAULT;
}
EXPORT_SYMBOL(strncpy_from_user);






































   20 
   20 




   20 


   20 

   20 













   45 
   45 

   18 
   27 
















































































































   48 


   48 
   47 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/file.c
 *
 * (C) Copyright Linus Torvalds 1999
 * (C) Copyright Johannes Erdfelt 1999-2001
 * (C) Copyright Andreas Gal 1999
 * (C) Copyright Gregory P. Smith 1999
 * (C) Copyright Deti Fliegl 1999 (new USB architecture)
 * (C) Copyright Randy Dunlap 2000
 * (C) Copyright David Brownell 2000-2001 (kernel hotplug, usb_device_id,
 *        more docs, etc)
 * (C) Copyright Yggdrasil Computing, Inc. 2000
 *     (usb_device_id matching changes by Adam J. Richter)
 * (C) Copyright Greg Kroah-Hartman 2002-2003
 *
 * Released under the GPLv2 only.
 */

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/usb.h>

#include "usb.h"

#define MAX_USB_MINORS        256
static const struct file_operations *usb_minors[MAX_USB_MINORS];
static DECLARE_RWSEM(minor_rwsem);

static int usb_open(struct inode *inode, struct file *file)
{
        int err = -ENODEV;
        const struct file_operations *new_fops;

        down_read(&minor_rwsem);
        new_fops = fops_get(usb_minors[iminor(inode)]);

        if (!new_fops)
                goto done;

        replace_fops(file, new_fops);
        /* Curiouser and curiouser... NULL ->open() as "no device" ? */
        if (file->f_op->open)
                err = file->f_op->open(inode, file);
 done:
        up_read(&minor_rwsem);
        return err;
}

static const struct file_operations usb_fops = {
        .owner =        THIS_MODULE,
        .open =                usb_open,
        .llseek =        noop_llseek,
};

static char *usb_devnode(const struct device *dev, umode_t *mode)
{
        struct usb_class_driver *drv;

        drv = dev_get_drvdata(dev);
        if (!drv || !drv->devnode)
                return NULL;
        return drv->devnode(dev, mode);
}

const struct class usbmisc_class = {
        .name                = "usbmisc",
        .devnode        = usb_devnode,
};

int usb_major_init(void)
{
        int error;

        error = register_chrdev(USB_MAJOR, "usb", &usb_fops);
        if (error)
                printk(KERN_ERR "Unable to get major %d for usb devices\n",
                       USB_MAJOR);

        return error;
}

void usb_major_cleanup(void)
{
        unregister_chrdev(USB_MAJOR, "usb");
}

/**
 * usb_register_dev - register a USB device, and ask for a minor number
 * @intf: pointer to the usb_interface that is being registered
 * @class_driver: pointer to the usb_class_driver for this device
 *
 * This should be called by all USB drivers that use the USB major number.
 * If CONFIG_USB_DYNAMIC_MINORS is enabled, the minor number will be
 * dynamically allocated out of the list of available ones.  If it is not
 * enabled, the minor number will be based on the next available free minor,
 * starting at the class_driver->minor_base.
 *
 * This function also creates a usb class device in the sysfs tree.
 *
 * usb_deregister_dev() must be called when the driver is done with
 * the minor numbers given out by this function.
 *
 * Return: -EINVAL if something bad happens with trying to register a
 * device, and 0 on success.
 */
int usb_register_dev(struct usb_interface *intf,
                     struct usb_class_driver *class_driver)
{
        int retval = 0;
        int minor_base = class_driver->minor_base;
        int minor;
        char name[20];

#ifdef CONFIG_USB_DYNAMIC_MINORS
        /*
         * We don't care what the device tries to start at, we want to start
         * at zero to pack the devices into the smallest available space with
         * no holes in the minor range.
         */
        minor_base = 0;
#endif

        if (class_driver->fops == NULL)
                return -EINVAL;
        if (intf->minor >= 0)
                return -EADDRINUSE;

        dev_dbg(&intf->dev, "looking for a minor, starting at %d\n", minor_base);

        down_write(&minor_rwsem);
        for (minor = minor_base; minor < MAX_USB_MINORS; ++minor) {
                if (usb_minors[minor])
                        continue;

                usb_minors[minor] = class_driver->fops;
                intf->minor = minor;
                break;
        }
        if (intf->minor < 0) {
                up_write(&minor_rwsem);
                return -EXFULL;
        }

        /* create a usb class device for this usb interface */
        snprintf(name, sizeof(name), class_driver->name, minor - minor_base);
        intf->usb_dev = device_create(&usbmisc_class, &intf->dev,
                                      MKDEV(USB_MAJOR, minor), class_driver,
                                      "%s", kbasename(name));
        if (IS_ERR(intf->usb_dev)) {
                usb_minors[minor] = NULL;
                intf->minor = -1;
                retval = PTR_ERR(intf->usb_dev);
        }
        up_write(&minor_rwsem);
        return retval;
}
EXPORT_SYMBOL_GPL(usb_register_dev);

/**
 * usb_deregister_dev - deregister a USB device's dynamic minor.
 * @intf: pointer to the usb_interface that is being deregistered
 * @class_driver: pointer to the usb_class_driver for this device
 *
 * Used in conjunction with usb_register_dev().  This function is called
 * when the USB driver is finished with the minor numbers gotten from a
 * call to usb_register_dev() (usually when the device is disconnected
 * from the system.)
 *
 * This function also removes the usb class device from the sysfs tree.
 *
 * This should be called by all drivers that use the USB major number.
 */
void usb_deregister_dev(struct usb_interface *intf,
                        struct usb_class_driver *class_driver)
{
        if (intf->minor == -1)
                return;

        dev_dbg(&intf->dev, "removing %d minor\n", intf->minor);
        device_destroy(&usbmisc_class, MKDEV(USB_MAJOR, intf->minor));

        down_write(&minor_rwsem);
        usb_minors[intf->minor] = NULL;
        up_write(&minor_rwsem);

        intf->usb_dev = NULL;
        intf->minor = -1;
}
EXPORT_SYMBOL_GPL(usb_deregister_dev);




























































































































































































































    9 


    9 


















































































































    9 








    9 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
// SPDX-License-Identifier: GPL-2.0
/* dvb-usb-dvb.c is part of the DVB USB library.
 *
 * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de)
 * see dvb-usb-init.c for copyright information.
 *
 * This file contains functions for initializing and handling the
 * linux-dvb API.
 */
#include "dvb-usb-common.h"
#include <media/media-device.h>

/* does the complete input transfer handling */
static int dvb_usb_ctrl_feed(struct dvb_demux_feed *dvbdmxfeed, int onoff)
{
        struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv;
        int newfeedcount, ret;

        if (adap == NULL)
                return -ENODEV;

        if ((adap->active_fe < 0) ||
            (adap->active_fe >= adap->num_frontends_initialized)) {
                return -EINVAL;
        }

        newfeedcount = adap->feedcount + (onoff ? 1 : -1);

        /* stop feed before setting a new pid if there will be no pid anymore */
        if (newfeedcount == 0) {
                deb_ts("stop feeding\n");
                usb_urb_kill(&adap->fe_adap[adap->active_fe].stream);

                if (adap->props.fe[adap->active_fe].streaming_ctrl != NULL) {
                        ret = adap->props.fe[adap->active_fe].streaming_ctrl(adap, 0);
                        if (ret < 0) {
                                err("error while stopping stream.");
                                return ret;
                        }
                }
        }

        adap->feedcount = newfeedcount;

        /* activate the pid on the device specific pid_filter */
        deb_ts("setting pid (%s): %5d %04x at index %d '%s'\n",
                adap->fe_adap[adap->active_fe].pid_filtering ?
                "yes" : "no", dvbdmxfeed->pid, dvbdmxfeed->pid,
                dvbdmxfeed->index, onoff ? "on" : "off");
        if (adap->props.fe[adap->active_fe].caps & DVB_USB_ADAP_HAS_PID_FILTER &&
                adap->fe_adap[adap->active_fe].pid_filtering &&
                adap->props.fe[adap->active_fe].pid_filter != NULL)
                adap->props.fe[adap->active_fe].pid_filter(adap, dvbdmxfeed->index, dvbdmxfeed->pid, onoff);

        /* start the feed if this was the first feed and there is still a feed
         * for reception.
         */
        if (adap->feedcount == onoff && adap->feedcount > 0) {
                deb_ts("controlling pid parser\n");
                if (adap->props.fe[adap->active_fe].caps & DVB_USB_ADAP_HAS_PID_FILTER &&
                        adap->props.fe[adap->active_fe].caps &
                        DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF &&
                        adap->props.fe[adap->active_fe].pid_filter_ctrl != NULL) {
                        ret = adap->props.fe[adap->active_fe].pid_filter_ctrl(adap,
                                adap->fe_adap[adap->active_fe].pid_filtering);
                        if (ret < 0) {
                                err("could not handle pid_parser");
                                return ret;
                        }
                }
                deb_ts("start feeding\n");
                if (adap->props.fe[adap->active_fe].streaming_ctrl != NULL) {
                        ret = adap->props.fe[adap->active_fe].streaming_ctrl(adap, 1);
                        if (ret < 0) {
                                err("error while enabling fifo.");
                                return ret;
                        }
                }

                deb_ts("submitting all URBs\n");
                usb_urb_submit(&adap->fe_adap[adap->active_fe].stream);
        }
        return 0;
}

static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
{
        deb_ts("start pid: 0x%04x, feedtype: %d\n", dvbdmxfeed->pid,
               dvbdmxfeed->type);
        return dvb_usb_ctrl_feed(dvbdmxfeed, 1);
}

static int dvb_usb_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
{
        deb_ts("stop pid: 0x%04x, feedtype: %d\n", dvbdmxfeed->pid, dvbdmxfeed->type);
        return dvb_usb_ctrl_feed(dvbdmxfeed, 0);
}

static int dvb_usb_media_device_init(struct dvb_usb_adapter *adap)
{
#ifdef CONFIG_MEDIA_CONTROLLER_DVB
        struct media_device *mdev;
        struct dvb_usb_device *d = adap->dev;
        struct usb_device *udev = d->udev;

        mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
        if (!mdev)
                return -ENOMEM;

        media_device_usb_init(mdev, udev, d->desc->name);

        dvb_register_media_controller(&adap->dvb_adap, mdev);

        dev_info(&d->udev->dev, "media controller created\n");
#endif
        return 0;
}

static int  dvb_usb_media_device_register(struct dvb_usb_adapter *adap)
{
#ifdef CONFIG_MEDIA_CONTROLLER_DVB
        return media_device_register(adap->dvb_adap.mdev);
#else
        return 0;
#endif
}

static void dvb_usb_media_device_unregister(struct dvb_usb_adapter *adap)
{
#ifdef CONFIG_MEDIA_CONTROLLER_DVB
        if (!adap->dvb_adap.mdev)
                return;

        mutex_lock(&adap->dvb_adap.mdev_lock);

        media_device_unregister(adap->dvb_adap.mdev);
        media_device_cleanup(adap->dvb_adap.mdev);
        kfree(adap->dvb_adap.mdev);
        adap->dvb_adap.mdev = NULL;

        mutex_unlock(&adap->dvb_adap.mdev_lock);
#endif
}

int dvb_usb_adapter_dvb_init(struct dvb_usb_adapter *adap, short *adapter_nums)
{
        int i;
        int ret = dvb_register_adapter(&adap->dvb_adap, adap->dev->desc->name,
                                       adap->dev->owner, &adap->dev->udev->dev,
                                       adapter_nums);

        if (ret < 0) {
                deb_info("dvb_register_adapter failed: error %d", ret);
                goto err;
        }
        adap->dvb_adap.priv = adap;

        ret = dvb_usb_media_device_init(adap);
        if (ret < 0) {
                deb_info("dvb_usb_media_device_init failed: error %d", ret);
                goto err_mc;
        }

        if (adap->dev->props.read_mac_address) {
                if (adap->dev->props.read_mac_address(adap->dev, adap->dvb_adap.proposed_mac) == 0)
                        info("MAC address: %pM", adap->dvb_adap.proposed_mac);
                else
                        err("MAC address reading failed.");
        }


        adap->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING;
        adap->demux.priv             = adap;

        adap->demux.filternum        = 0;
        for (i = 0; i < adap->props.num_frontends; i++) {
                if (adap->demux.filternum < adap->fe_adap[i].max_feed_count)
                        adap->demux.filternum = adap->fe_adap[i].max_feed_count;
        }
        adap->demux.feednum          = adap->demux.filternum;
        adap->demux.start_feed       = dvb_usb_start_feed;
        adap->demux.stop_feed        = dvb_usb_stop_feed;
        adap->demux.write_to_decoder = NULL;
        if ((ret = dvb_dmx_init(&adap->demux)) < 0) {
                err("dvb_dmx_init failed: error %d", ret);
                goto err_dmx;
        }

        adap->dmxdev.filternum       = adap->demux.filternum;
        adap->dmxdev.demux           = &adap->demux.dmx;
        adap->dmxdev.capabilities    = 0;
        if ((ret = dvb_dmxdev_init(&adap->dmxdev, &adap->dvb_adap)) < 0) {
                err("dvb_dmxdev_init failed: error %d", ret);
                goto err_dmx_dev;
        }

        if ((ret = dvb_net_init(&adap->dvb_adap, &adap->dvb_net,
                                                &adap->demux.dmx)) < 0) {
                err("dvb_net_init failed: error %d", ret);
                goto err_net_init;
        }

        adap->state |= DVB_USB_ADAP_STATE_DVB;
        return 0;

err_net_init:
        dvb_dmxdev_release(&adap->dmxdev);
err_dmx_dev:
        dvb_dmx_release(&adap->demux);
err_dmx:
        dvb_usb_media_device_unregister(adap);
err_mc:
        dvb_unregister_adapter(&adap->dvb_adap);
err:
        return ret;
}

int dvb_usb_adapter_dvb_exit(struct dvb_usb_adapter *adap)
{
        if (adap->state & DVB_USB_ADAP_STATE_DVB) {
                deb_info("unregistering DVB part\n");
                dvb_net_release(&adap->dvb_net);
                adap->demux.dmx.close(&adap->demux.dmx);
                dvb_dmxdev_release(&adap->dmxdev);
                dvb_dmx_release(&adap->demux);
                dvb_usb_media_device_unregister(adap);
                dvb_unregister_adapter(&adap->dvb_adap);
                adap->state &= ~DVB_USB_ADAP_STATE_DVB;
        }
        return 0;
}

static int dvb_usb_set_active_fe(struct dvb_frontend *fe, int onoff)
{
        struct dvb_usb_adapter *adap = fe->dvb->priv;

        int ret = (adap->props.frontend_ctrl) ?
                adap->props.frontend_ctrl(fe, onoff) : 0;

        if (ret < 0) {
                err("frontend_ctrl request failed");
                return ret;
        }
        if (onoff)
                adap->active_fe = fe->id;

        return 0;
}

static int dvb_usb_fe_wakeup(struct dvb_frontend *fe)
{
        struct dvb_usb_adapter *adap = fe->dvb->priv;

        dvb_usb_device_power_ctrl(adap->dev, 1);

        dvb_usb_set_active_fe(fe, 1);

        if (adap->fe_adap[fe->id].fe_init)
                adap->fe_adap[fe->id].fe_init(fe);

        return 0;
}

static int dvb_usb_fe_sleep(struct dvb_frontend *fe)
{
        struct dvb_usb_adapter *adap = fe->dvb->priv;

        if (adap->fe_adap[fe->id].fe_sleep)
                adap->fe_adap[fe->id].fe_sleep(fe);

        dvb_usb_set_active_fe(fe, 0);

        return dvb_usb_device_power_ctrl(adap->dev, 0);
}

int dvb_usb_adapter_frontend_init(struct dvb_usb_adapter *adap)
{
        int ret, i;

        /* register all given adapter frontends */
        for (i = 0; i < adap->props.num_frontends; i++) {

                if (adap->props.fe[i].frontend_attach == NULL) {
                        err("strange: '%s' #%d,%d doesn't want to attach a frontend.",
                            adap->dev->desc->name, adap->id, i);

                        return 0;
                }

                ret = adap->props.fe[i].frontend_attach(adap);
                if (ret || adap->fe_adap[i].fe == NULL) {
                        /* only print error when there is no FE at all */
                        if (i == 0)
                                err("no frontend was attached by '%s'",
                                        adap->dev->desc->name);

                        return 0;
                }

                adap->fe_adap[i].fe->id = i;

                /* re-assign sleep and wakeup functions */
                adap->fe_adap[i].fe_init = adap->fe_adap[i].fe->ops.init;
                adap->fe_adap[i].fe->ops.init  = dvb_usb_fe_wakeup;
                adap->fe_adap[i].fe_sleep = adap->fe_adap[i].fe->ops.sleep;
                adap->fe_adap[i].fe->ops.sleep = dvb_usb_fe_sleep;

                if (dvb_register_frontend(&adap->dvb_adap, adap->fe_adap[i].fe)) {
                        err("Frontend %d registration failed.", i);
                        dvb_frontend_detach(adap->fe_adap[i].fe);
                        adap->fe_adap[i].fe = NULL;
                        /* In error case, do not try register more FEs,
                         * still leaving already registered FEs alive. */
                        if (i == 0)
                                return -ENODEV;
                        else
                                return 0;
                }

                /* only attach the tuner if the demod is there */
                if (adap->props.fe[i].tuner_attach != NULL)
                        adap->props.fe[i].tuner_attach(adap);

                adap->num_frontends_initialized++;
        }

        ret = dvb_create_media_graph(&adap->dvb_adap, true);
        if (ret)
                return ret;

        ret = dvb_usb_media_device_register(adap);

        return ret;
}

int dvb_usb_adapter_frontend_exit(struct dvb_usb_adapter *adap)
{
        int i = adap->num_frontends_initialized - 1;

        /* unregister all given adapter frontends */
        for (; i >= 0; i--) {
                if (adap->fe_adap[i].fe != NULL) {
                        dvb_unregister_frontend(adap->fe_adap[i].fe);
                        dvb_frontend_detach(adap->fe_adap[i].fe);
                }
        }
        adap->num_frontends_initialized = 0;

        return 0;
}












































































































































































































































































































































    1 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
 * Cypress USB Thermometer driver 
 * 
 * Copyright (c) 2004 Erik Rigtorp <erkki@linux.nu> <erik@rigtorp.com>
 * 
 * This driver works with Elektor magazine USB Interface as published in 
 * issue #291. It should also work with the original starter kit/demo board
 * from Cypress.
 */


#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb.h>

#define DRIVER_AUTHOR "Erik Rigtorp"
#define DRIVER_DESC "Cypress USB Thermometer driver"

#define USB_SKEL_VENDOR_ID        0x04b4
#define USB_SKEL_PRODUCT_ID        0x0002

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) },
        { }
};
MODULE_DEVICE_TABLE (usb, id_table);

/* Structure to hold all of our device specific stuff */
struct usb_cytherm {
        struct usb_device    *udev;         /* save off the usb device pointer */
        struct usb_interface *interface; /* the interface for this device */
        int brightness;
};


/* Vendor requests */
/* They all operate on one byte at a time */
#define PING       0x00
#define READ_ROM   0x01 /* Reads form ROM, value = address */
#define READ_RAM   0x02 /* Reads form RAM, value = address */
#define WRITE_RAM  0x03 /* Write to RAM, value = address, index = data */
#define READ_PORT  0x04 /* Reads from port, value = address */
#define WRITE_PORT 0x05 /* Write to port, value = address, index = data */ 


/* Send a vendor command to device */
static int vendor_command(struct usb_device *dev, unsigned char request, 
                          unsigned char value, unsigned char index,
                          void *buf, int size)
{
        return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
                               request, 
                               USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER,
                               value, 
                               index, buf, size,
                               USB_CTRL_GET_TIMEOUT);
}



#define BRIGHTNESS 0x2c     /* RAM location for brightness value */
#define BRIGHTNESS_SEM 0x2b /* RAM location for brightness semaphore */

static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);    
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);     

        return sprintf(buf, "%i", cytherm->brightness);
}

static ssize_t brightness_store(struct device *dev, struct device_attribute *attr, const char *buf,
                              size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        unsigned char *buffer;
        int retval;
   
        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        cytherm->brightness = simple_strtoul(buf, NULL, 10);
   
        if (cytherm->brightness > 0xFF)
                cytherm->brightness = 0xFF;
        else if (cytherm->brightness < 0)
                cytherm->brightness = 0;
   
        /* Set brightness */
        retval = vendor_command(cytherm->udev, WRITE_RAM, BRIGHTNESS, 
                                cytherm->brightness, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
        /* Inform µC that we have changed the brightness setting */
        retval = vendor_command(cytherm->udev, WRITE_RAM, BRIGHTNESS_SEM,
                                0x01, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
   
        kfree(buffer);
   
        return count;
}
static DEVICE_ATTR_RW(brightness);


#define TEMP 0x33 /* RAM location for temperature */
#define SIGN 0x34 /* RAM location for temperature sign */

static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char *buf)
{

        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        int retval;
        unsigned char *buffer;

        int temp, sign;
   
        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        /* read temperature */
        retval = vendor_command(cytherm->udev, READ_RAM, TEMP, 0, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
        temp = buffer[1];
   
        /* read sign */
        retval = vendor_command(cytherm->udev, READ_RAM, SIGN, 0, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
        sign = buffer[1];

        kfree(buffer);
   
        return sprintf(buf, "%c%i.%i", sign ? '-' : '+', temp >> 1,
                       5*(temp - ((temp >> 1) << 1)));
}
static DEVICE_ATTR_RO(temp);


#define BUTTON 0x7a

static ssize_t button_show(struct device *dev, struct device_attribute *attr, char *buf)
{

        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        int retval;
        unsigned char *buffer;

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        /* check button */
        retval = vendor_command(cytherm->udev, READ_RAM, BUTTON, 0, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
   
        retval = buffer[1];

        kfree(buffer);

        if (retval)
                return sprintf(buf, "1");
        else
                return sprintf(buf, "0");
}
static DEVICE_ATTR_RO(button);


static ssize_t port0_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        int retval;
        unsigned char *buffer;

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        retval = vendor_command(cytherm->udev, READ_PORT, 0, 0, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);

        retval = buffer[1];

        kfree(buffer);

        return sprintf(buf, "%d", retval);
}


static ssize_t port0_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        unsigned char *buffer;
        int retval;
        int tmp;
   
        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        tmp = simple_strtoul(buf, NULL, 10);
   
        if (tmp > 0xFF)
                tmp = 0xFF;
        else if (tmp < 0)
                tmp = 0;
   
        retval = vendor_command(cytherm->udev, WRITE_PORT, 0,
                                tmp, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);

        kfree(buffer);

        return count;
}
static DEVICE_ATTR_RW(port0);

static ssize_t port1_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        int retval;
        unsigned char *buffer;

        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        retval = vendor_command(cytherm->udev, READ_PORT, 1, 0, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);
   
        retval = buffer[1];

        kfree(buffer);

        return sprintf(buf, "%d", retval);
}


static ssize_t port1_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_cytherm *cytherm = usb_get_intfdata(intf);

        unsigned char *buffer;
        int retval;
        int tmp;
   
        buffer = kmalloc(8, GFP_KERNEL);
        if (!buffer)
                return 0;

        tmp = simple_strtoul(buf, NULL, 10);
   
        if (tmp > 0xFF)
                tmp = 0xFF;
        else if (tmp < 0)
                tmp = 0;
   
        retval = vendor_command(cytherm->udev, WRITE_PORT, 1,
                                tmp, buffer, 8);
        if (retval)
                dev_dbg(&cytherm->udev->dev, "retval = %d\n", retval);

        kfree(buffer);

        return count;
}
static DEVICE_ATTR_RW(port1);

static struct attribute *cytherm_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_temp.attr,
        &dev_attr_button.attr,
        &dev_attr_port0.attr,
        &dev_attr_port1.attr,
        NULL,
};
ATTRIBUTE_GROUPS(cytherm);

static int cytherm_probe(struct usb_interface *interface, 
                         const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_cytherm *dev;
        int retval = -ENOMEM;

        dev = kzalloc(sizeof(struct usb_cytherm), GFP_KERNEL);
        if (!dev)
                goto error_mem;

        dev->udev = usb_get_dev(udev);

        usb_set_intfdata(interface, dev);

        dev->brightness = 0xFF;

        dev_info(&interface->dev,
                  "Cypress thermometer device now attached\n");
        return 0;

error_mem:
        return retval;
}

static void cytherm_disconnect(struct usb_interface *interface)
{
        struct usb_cytherm *dev;

        dev = usb_get_intfdata(interface);

        /* first remove the files, then NULL the pointer */
        usb_set_intfdata(interface, NULL);

        usb_put_dev(dev->udev);

        kfree(dev);

        dev_info(&interface->dev, "Cypress thermometer now disconnected\n");
}

/* usb specific object needed to register this driver with the usb subsystem */
static struct usb_driver cytherm_driver = {
        .name =                "cytherm",
        .probe =        cytherm_probe,
        .disconnect =        cytherm_disconnect,
        .id_table =        id_table,
        .dev_groups =        cytherm_groups,
};

module_usb_driver(cytherm_driver);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");




























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 








    3 

    3 








    3 


    3 














































































































































































































































































    3 




    3 


    3 





    3 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
// SPDX-License-Identifier: GPL-2.0-or-later
/******************************************************************************
 * usbtouchscreen.c
 * Driver for USB Touchscreens, supporting those devices:
 *  - eGalax Touchkit
 *    includes eTurboTouch CT-410/510/700
 *  - 3M/Microtouch  EX II series
 *  - ITM
 *  - PanJit TouchSet
 *  - eTurboTouch
 *  - Gunze AHL61
 *  - DMC TSC-10/25
 *  - IRTOUCHSYSTEMS/UNITOP
 *  - IdealTEK URTC1000
 *  - General Touch
 *  - GoTop Super_Q2/GogoPen/PenPower tablets
 *  - JASTEC USB touch controller/DigiTech DTR-02U
 *  - Zytronic capacitive touchscreen
 *  - NEXIO/iNexio
 *  - Elo TouchSystems 2700 IntelliTouch
 *  - EasyTouch USB Dual/Multi touch controller from Data Modul
 *
 * Copyright (C) 2004-2007 by Daniel Ritz <daniel.ritz@gmx.ch>
 * Copyright (C) by Todd E. Johnson (mtouchusb.c)
 *
 * Driver is based on touchkitusb.c
 * - ITM parts are from itmtouch.c
 * - 3M parts are from mtouchusb.c
 * - PanJit parts are from an unmerged driver by Lanslott Gish
 * - DMC TSC 10/25 are from Holger Schurig, with ideas from an unmerged
 *   driver from Marius Vollmer
 *
 *****************************************************************************/

//#define DEBUG

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/input.h>
#include <linux/hid.h>
#include <linux/mutex.h>

static bool swap_xy;
module_param(swap_xy, bool, 0644);
MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped.");

static bool hwcalib_xy;
module_param(hwcalib_xy, bool, 0644);
MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available");

/* device specifc data/functions */
struct usbtouch_usb;
struct usbtouch_device_info {
        int min_xc, max_xc;
        int min_yc, max_yc;
        int min_press, max_press;
        int rept_size;

        /*
         * Always service the USB devices irq not just when the input device is
         * open. This is useful when devices have a watchdog which prevents us
         * from periodically polling the device. Leave this unset unless your
         * touchscreen device requires it, as it does consume more of the USB
         * bandwidth.
         */
        bool irq_always;

        void (*process_pkt) (struct usbtouch_usb *usbtouch, unsigned char *pkt, int len);

        /*
         * used to get the packet len. possible return values:
         * > 0: packet len
         * = 0: skip one byte
         * < 0: -return value more bytes needed
         */
        int  (*get_pkt_len) (unsigned char *pkt, int len);

        int  (*read_data)   (struct usbtouch_usb *usbtouch, unsigned char *pkt);
        int  (*alloc)       (struct usbtouch_usb *usbtouch);
        int  (*init)        (struct usbtouch_usb *usbtouch);
        void (*exit)            (struct usbtouch_usb *usbtouch);
};

/* a usbtouch device */
struct usbtouch_usb {
        unsigned char *data;
        dma_addr_t data_dma;
        int data_size;
        unsigned char *buffer;
        int buf_len;
        struct urb *irq;
        struct usb_interface *interface;
        struct input_dev *input;
        struct usbtouch_device_info *type;
        struct mutex pm_mutex;  /* serialize access to open/suspend */
        bool is_open;
        char name[128];
        char phys[64];
        void *priv;

        int x, y;
        int touch, press;
};


/* device types */
enum {
        DEVTYPE_IGNORE = -1,
        DEVTYPE_EGALAX,
        DEVTYPE_PANJIT,
        DEVTYPE_3M,
        DEVTYPE_ITM,
        DEVTYPE_ETURBO,
        DEVTYPE_GUNZE,
        DEVTYPE_DMC_TSC10,
        DEVTYPE_IRTOUCH,
        DEVTYPE_IRTOUCH_HIRES,
        DEVTYPE_IDEALTEK,
        DEVTYPE_GENERAL_TOUCH,
        DEVTYPE_GOTOP,
        DEVTYPE_JASTEC,
        DEVTYPE_E2I,
        DEVTYPE_ZYTRONIC,
        DEVTYPE_TC45USB,
        DEVTYPE_NEXIO,
        DEVTYPE_ELO,
        DEVTYPE_ETOUCH,
};

#define USB_DEVICE_HID_CLASS(vend, prod) \
        .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS \
                | USB_DEVICE_ID_MATCH_DEVICE, \
        .idVendor = (vend), \
        .idProduct = (prod), \
        .bInterfaceClass = USB_INTERFACE_CLASS_HID

static const struct usb_device_id usbtouch_devices[] = {
#ifdef CONFIG_TOUCHSCREEN_USB_EGALAX
        /* ignore the HID capable devices, handled by usbhid */
        {USB_DEVICE_HID_CLASS(0x0eef, 0x0001), .driver_info = DEVTYPE_IGNORE},
        {USB_DEVICE_HID_CLASS(0x0eef, 0x0002), .driver_info = DEVTYPE_IGNORE},

        /* normal device IDs */
        {USB_DEVICE(0x3823, 0x0001), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x3823, 0x0002), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x0123, 0x0001), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x0eef, 0x0001), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x0eef, 0x0002), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x1234, 0x0001), .driver_info = DEVTYPE_EGALAX},
        {USB_DEVICE(0x1234, 0x0002), .driver_info = DEVTYPE_EGALAX},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_PANJIT
        {USB_DEVICE(0x134c, 0x0001), .driver_info = DEVTYPE_PANJIT},
        {USB_DEVICE(0x134c, 0x0002), .driver_info = DEVTYPE_PANJIT},
        {USB_DEVICE(0x134c, 0x0003), .driver_info = DEVTYPE_PANJIT},
        {USB_DEVICE(0x134c, 0x0004), .driver_info = DEVTYPE_PANJIT},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_3M
        {USB_DEVICE(0x0596, 0x0001), .driver_info = DEVTYPE_3M},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ITM
        {USB_DEVICE(0x0403, 0xf9e9), .driver_info = DEVTYPE_ITM},
        {USB_DEVICE(0x16e3, 0xf9e9), .driver_info = DEVTYPE_ITM},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ETURBO
        {USB_DEVICE(0x1234, 0x5678), .driver_info = DEVTYPE_ETURBO},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GUNZE
        {USB_DEVICE(0x0637, 0x0001), .driver_info = DEVTYPE_GUNZE},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10
        {USB_DEVICE(0x0afa, 0x03e8), .driver_info = DEVTYPE_DMC_TSC10},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH
        {USB_DEVICE(0x255e, 0x0001), .driver_info = DEVTYPE_IRTOUCH},
        {USB_DEVICE(0x595a, 0x0001), .driver_info = DEVTYPE_IRTOUCH},
        {USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH},
        {USB_DEVICE(0x6615, 0x0012), .driver_info = DEVTYPE_IRTOUCH_HIRES},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
        {USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
        {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
        {USB_DEVICE(0x08f2, 0x007f), .driver_info = DEVTYPE_GOTOP},
        {USB_DEVICE(0x08f2, 0x00ce), .driver_info = DEVTYPE_GOTOP},
        {USB_DEVICE(0x08f2, 0x00f4), .driver_info = DEVTYPE_GOTOP},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_JASTEC
        {USB_DEVICE(0x0f92, 0x0001), .driver_info = DEVTYPE_JASTEC},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_E2I
        {USB_DEVICE(0x1ac7, 0x0001), .driver_info = DEVTYPE_E2I},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC
        {USB_DEVICE(0x14c8, 0x0003), .driver_info = DEVTYPE_ZYTRONIC},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB
        /* TC5UH */
        {USB_DEVICE(0x0664, 0x0309), .driver_info = DEVTYPE_TC45USB},
        /* TC4UM */
        {USB_DEVICE(0x0664, 0x0306), .driver_info = DEVTYPE_TC45USB},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_NEXIO
        /* data interface only */
        {USB_DEVICE_AND_INTERFACE_INFO(0x10f0, 0x2002, 0x0a, 0x00, 0x00),
                .driver_info = DEVTYPE_NEXIO},
        {USB_DEVICE_AND_INTERFACE_INFO(0x1870, 0x0001, 0x0a, 0x00, 0x00),
                .driver_info = DEVTYPE_NEXIO},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ELO
        {USB_DEVICE(0x04e7, 0x0020), .driver_info = DEVTYPE_ELO},
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH
        {USB_DEVICE(0x7374, 0x0001), .driver_info = DEVTYPE_ETOUCH},
#endif

        {}
};


/*****************************************************************************
 * e2i Part
 */

#ifdef CONFIG_TOUCHSCREEN_USB_E2I
static int e2i_init(struct usbtouch_usb *usbtouch)
{
        int ret;
        struct usb_device *udev = interface_to_usbdev(usbtouch->interface);

        ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                              0x01, 0x02, 0x0000, 0x0081,
                              NULL, 0, USB_CTRL_SET_TIMEOUT);

        dev_dbg(&usbtouch->interface->dev,
                "%s - usb_control_msg - E2I_RESET - bytes|err: %d\n",
                __func__, ret);
        return ret;
}

static int e2i_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        int tmp = (pkt[0] << 8) | pkt[1];
        dev->x  = (pkt[2] << 8) | pkt[3];
        dev->y  = (pkt[4] << 8) | pkt[5];

        tmp = tmp - 0xA000;
        dev->touch = (tmp > 0);
        dev->press = (tmp > 0 ? tmp : 0);

        return 1;
}
#endif


/*****************************************************************************
 * eGalax part
 */

#ifdef CONFIG_TOUCHSCREEN_USB_EGALAX

#ifndef MULTI_PACKET
#define MULTI_PACKET
#endif

#define EGALAX_PKT_TYPE_MASK                0xFE
#define EGALAX_PKT_TYPE_REPT                0x80
#define EGALAX_PKT_TYPE_DIAG                0x0A

static int egalax_init(struct usbtouch_usb *usbtouch)
{
        int ret, i;
        unsigned char *buf;
        struct usb_device *udev = interface_to_usbdev(usbtouch->interface);

        /*
         * An eGalax diagnostic packet kicks the device into using the right
         * protocol.  We send a "check active" packet.  The response will be
         * read later and ignored.
         */

        buf = kmalloc(3, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        buf[0] = EGALAX_PKT_TYPE_DIAG;
        buf[1] = 1;        /* length */
        buf[2] = 'A';        /* command - check active */

        for (i = 0; i < 3; i++) {
                ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                      0,
                                      USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                                      0, 0, buf, 3,
                                      USB_CTRL_SET_TIMEOUT);
                if (ret >= 0) {
                        ret = 0;
                        break;
                }
                if (ret != -EPIPE)
                        break;
        }

        kfree(buf);

        return ret;
}

static int egalax_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        if ((pkt[0] & EGALAX_PKT_TYPE_MASK) != EGALAX_PKT_TYPE_REPT)
                return 0;

        dev->x = ((pkt[3] & 0x0F) << 7) | (pkt[4] & 0x7F);
        dev->y = ((pkt[1] & 0x0F) << 7) | (pkt[2] & 0x7F);
        dev->touch = pkt[0] & 0x01;

        return 1;
}

static int egalax_get_pkt_len(unsigned char *buf, int len)
{
        switch (buf[0] & EGALAX_PKT_TYPE_MASK) {
        case EGALAX_PKT_TYPE_REPT:
                return 5;

        case EGALAX_PKT_TYPE_DIAG:
                if (len < 2)
                        return -1;

                return buf[1] + 2;
        }

        return 0;
}
#endif

/*****************************************************************************
 * EasyTouch part
 */

#ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH

#ifndef MULTI_PACKET
#define MULTI_PACKET
#endif

#define ETOUCH_PKT_TYPE_MASK                0xFE
#define ETOUCH_PKT_TYPE_REPT                0x80
#define ETOUCH_PKT_TYPE_REPT2                0xB0
#define ETOUCH_PKT_TYPE_DIAG                0x0A

static int etouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        if ((pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT &&
                (pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT2)
                return 0;

        dev->x = ((pkt[1] & 0x1F) << 7) | (pkt[2] & 0x7F);
        dev->y = ((pkt[3] & 0x1F) << 7) | (pkt[4] & 0x7F);
        dev->touch = pkt[0] & 0x01;

        return 1;
}

static int etouch_get_pkt_len(unsigned char *buf, int len)
{
        switch (buf[0] & ETOUCH_PKT_TYPE_MASK) {
        case ETOUCH_PKT_TYPE_REPT:
        case ETOUCH_PKT_TYPE_REPT2:
                return 5;

        case ETOUCH_PKT_TYPE_DIAG:
                if (len < 2)
                        return -1;

                return buf[1] + 2;
        }

        return 0;
}
#endif

/*****************************************************************************
 * PanJit Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_PANJIT
static int panjit_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1];
        dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3];
        dev->touch = pkt[0] & 0x01;

        return 1;
}
#endif


/*****************************************************************************
 * 3M/Microtouch Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_3M

#define MTOUCHUSB_ASYNC_REPORT          1
#define MTOUCHUSB_RESET                 7
#define MTOUCHUSB_REQ_CTRLLR_ID         10

#define MTOUCHUSB_REQ_CTRLLR_ID_LEN        16

static int mtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        if (hwcalib_xy) {
                dev->x = (pkt[4] << 8) | pkt[3];
                dev->y = 0xffff - ((pkt[6] << 8) | pkt[5]);
        } else {
                dev->x = (pkt[8] << 8) | pkt[7];
                dev->y = (pkt[10] << 8) | pkt[9];
        }
        dev->touch = (pkt[2] & 0x40) ? 1 : 0;

        return 1;
}

struct mtouch_priv {
        u8 fw_rev_major;
        u8 fw_rev_minor;
};

static ssize_t mtouch_firmware_rev_show(struct device *dev,
                                struct device_attribute *attr, char *output)
{
        struct usb_interface *intf = to_usb_interface(dev);
        struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);
        struct mtouch_priv *priv = usbtouch->priv;

        return sysfs_emit(output, "%1x.%1x\n",
                          priv->fw_rev_major, priv->fw_rev_minor);
}
static DEVICE_ATTR(firmware_rev, 0444, mtouch_firmware_rev_show, NULL);

static struct attribute *mtouch_attrs[] = {
        &dev_attr_firmware_rev.attr,
        NULL
};

static const struct attribute_group mtouch_attr_group = {
        .attrs = mtouch_attrs,
};

static int mtouch_get_fw_revision(struct usbtouch_usb *usbtouch)
{
        struct usb_device *udev = interface_to_usbdev(usbtouch->interface);
        struct mtouch_priv *priv = usbtouch->priv;
        u8 *buf;
        int ret;

        buf = kzalloc(MTOUCHUSB_REQ_CTRLLR_ID_LEN, GFP_NOIO);
        if (!buf)
                return -ENOMEM;

        ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                              MTOUCHUSB_REQ_CTRLLR_ID,
                              USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              0, 0, buf, MTOUCHUSB_REQ_CTRLLR_ID_LEN,
                              USB_CTRL_SET_TIMEOUT);
        if (ret != MTOUCHUSB_REQ_CTRLLR_ID_LEN) {
                dev_warn(&usbtouch->interface->dev,
                         "Failed to read FW rev: %d\n", ret);
                ret = ret < 0 ? ret : -EIO;
                goto free;
        }

        priv->fw_rev_major = buf[3];
        priv->fw_rev_minor = buf[4];

        ret = 0;

free:
        kfree(buf);
        return ret;
}

static int mtouch_alloc(struct usbtouch_usb *usbtouch)
{
        int ret;

        usbtouch->priv = kmalloc(sizeof(struct mtouch_priv), GFP_KERNEL);
        if (!usbtouch->priv)
                return -ENOMEM;

        ret = sysfs_create_group(&usbtouch->interface->dev.kobj,
                                 &mtouch_attr_group);
        if (ret) {
                kfree(usbtouch->priv);
                usbtouch->priv = NULL;
                return ret;
        }

        return 0;
}

static int mtouch_init(struct usbtouch_usb *usbtouch)
{
        int ret, i;
        struct usb_device *udev = interface_to_usbdev(usbtouch->interface);

        ret = mtouch_get_fw_revision(usbtouch);
        if (ret)
                return ret;

        ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                              MTOUCHUSB_RESET,
                              USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              1, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
        dev_dbg(&usbtouch->interface->dev,
                "%s - usb_control_msg - MTOUCHUSB_RESET - bytes|err: %d\n",
                __func__, ret);
        if (ret < 0)
                return ret;
        msleep(150);

        for (i = 0; i < 3; i++) {
                ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                      MTOUCHUSB_ASYNC_REPORT,
                                      USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                                      1, 1, NULL, 0, USB_CTRL_SET_TIMEOUT);
                dev_dbg(&usbtouch->interface->dev,
                        "%s - usb_control_msg - MTOUCHUSB_ASYNC_REPORT - bytes|err: %d\n",
                        __func__, ret);
                if (ret >= 0)
                        break;
                if (ret != -EPIPE)
                        return ret;
        }

        /* Default min/max xy are the raw values, override if using hw-calib */
        if (hwcalib_xy) {
                input_set_abs_params(usbtouch->input, ABS_X, 0, 0xffff, 0, 0);
                input_set_abs_params(usbtouch->input, ABS_Y, 0, 0xffff, 0, 0);
        }

        return 0;
}

static void mtouch_exit(struct usbtouch_usb *usbtouch)
{
        struct mtouch_priv *priv = usbtouch->priv;

        sysfs_remove_group(&usbtouch->interface->dev.kobj, &mtouch_attr_group);
        kfree(priv);
}
#endif


/*****************************************************************************
 * ITM Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_ITM
static int itm_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        int touch;
        /*
         * ITM devices report invalid x/y data if not touched.
         * if the screen was touched before but is not touched any more
         * report touch as 0 with the last valid x/y data once. then stop
         * reporting data until touched again.
         */
        dev->press = ((pkt[2] & 0x01) << 7) | (pkt[5] & 0x7F);

        touch = ~pkt[7] & 0x20;
        if (!touch) {
                if (dev->touch) {
                        dev->touch = 0;
                        return 1;
                }

                return 0;
        }

        dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[3] & 0x7F);
        dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[4] & 0x7F);
        dev->touch = touch;

        return 1;
}
#endif


/*****************************************************************************
 * eTurboTouch part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_ETURBO
#ifndef MULTI_PACKET
#define MULTI_PACKET
#endif
static int eturbo_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        unsigned int shift;

        /* packets should start with sync */
        if (!(pkt[0] & 0x80))
                return 0;

        shift = (6 - (pkt[0] & 0x03));
        dev->x = ((pkt[3] << 7) | pkt[4]) >> shift;
        dev->y = ((pkt[1] << 7) | pkt[2]) >> shift;
        dev->touch = (pkt[0] & 0x10) ? 1 : 0;

        return 1;
}

static int eturbo_get_pkt_len(unsigned char *buf, int len)
{
        if (buf[0] & 0x80)
                return 5;
        if (buf[0] == 0x01)
                return 3;
        return 0;
}
#endif


/*****************************************************************************
 * Gunze part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_GUNZE
static int gunze_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        if (!(pkt[0] & 0x80) || ((pkt[1] | pkt[2] | pkt[3]) & 0x80))
                return 0;

        dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[2] & 0x7F);
        dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[3] & 0x7F);
        dev->touch = pkt[0] & 0x20;

        return 1;
}
#endif

/*****************************************************************************
 * DMC TSC-10/25 Part
 *
 * Documentation about the controller and it's protocol can be found at
 *   http://www.dmccoltd.com/files/controler/tsc10usb_pi_e.pdf
 *   http://www.dmccoltd.com/files/controler/tsc25_usb_e.pdf
 */
#ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10

/* supported data rates. currently using 130 */
#define TSC10_RATE_POINT        0x50
#define TSC10_RATE_30                0x40
#define TSC10_RATE_50                0x41
#define TSC10_RATE_80                0x42
#define TSC10_RATE_100                0x43
#define TSC10_RATE_130                0x44
#define TSC10_RATE_150                0x45

/* commands */
#define TSC10_CMD_RESET                0x55
#define TSC10_CMD_RATE                0x05
#define TSC10_CMD_DATA1                0x01

static int dmc_tsc10_init(struct usbtouch_usb *usbtouch)
{
        struct usb_device *dev = interface_to_usbdev(usbtouch->interface);
        int ret = -ENOMEM;
        unsigned char *buf;

        buf = kmalloc(2, GFP_NOIO);
        if (!buf)
                goto err_nobuf;
        /* reset */
        buf[0] = buf[1] = 0xFF;
        ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0),
                              TSC10_CMD_RESET,
                              USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              0, 0, buf, 2, USB_CTRL_SET_TIMEOUT);
        if (ret < 0)
                goto err_out;
        if (buf[0] != 0x06) {
                ret = -ENODEV;
                goto err_out;
        }

        /* TSC-25 data sheet specifies a delay after the RESET command */
        msleep(150);

        /* set coordinate output rate */
        buf[0] = buf[1] = 0xFF;
        ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0),
                              TSC10_CMD_RATE,
                              USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT);
        if (ret < 0)
                goto err_out;
        if ((buf[0] != 0x06) && (buf[0] != 0x15 || buf[1] != 0x01)) {
                ret = -ENODEV;
                goto err_out;
        }

        /* start sending data */
        ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                              TSC10_CMD_DATA1,
                              USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
err_out:
        kfree(buf);
err_nobuf:
        return ret;
}


static int dmc_tsc10_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = ((pkt[2] & 0x03) << 8) | pkt[1];
        dev->y = ((pkt[4] & 0x03) << 8) | pkt[3];
        dev->touch = pkt[0] & 0x01;

        return 1;
}
#endif


/*****************************************************************************
 * IRTOUCH Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH
static int irtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = (pkt[3] << 8) | pkt[2];
        dev->y = (pkt[5] << 8) | pkt[4];
        dev->touch = (pkt[1] & 0x03) ? 1 : 0;

        return 1;
}
#endif

/*****************************************************************************
 * ET&T TC5UH/TC4UM part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB
static int tc45usb_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1];
        dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3];
        dev->touch = pkt[0] & 0x01;

        return 1;
}
#endif

/*****************************************************************************
 * IdealTEK URTC1000 Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
#ifndef MULTI_PACKET
#define MULTI_PACKET
#endif
static int idealtek_get_pkt_len(unsigned char *buf, int len)
{
        if (buf[0] & 0x80)
                return 5;
        if (buf[0] == 0x01)
                return len;
        return 0;
}

static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        switch (pkt[0] & 0x98) {
        case 0x88:
                /* touch data in IdealTEK mode */
                dev->x = (pkt[1] << 5) | (pkt[2] >> 2);
                dev->y = (pkt[3] << 5) | (pkt[4] >> 2);
                dev->touch = (pkt[0] & 0x40) ? 1 : 0;
                return 1;

        case 0x98:
                /* touch data in MT emulation mode */
                dev->x = (pkt[2] << 5) | (pkt[1] >> 2);
                dev->y = (pkt[4] << 5) | (pkt[3] >> 2);
                dev->touch = (pkt[0] & 0x40) ? 1 : 0;
                return 1;

        default:
                return 0;
        }
}
#endif

/*****************************************************************************
 * General Touch Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = (pkt[2] << 8) | pkt[1];
        dev->y = (pkt[4] << 8) | pkt[3];
        dev->press = pkt[5] & 0xff;
        dev->touch = pkt[0] & 0x01;

        return 1;
}
#endif

/*****************************************************************************
 * GoTop Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
static int gotop_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = ((pkt[1] & 0x38) << 4) | pkt[2];
        dev->y = ((pkt[1] & 0x07) << 7) | pkt[3];
        dev->touch = pkt[0] & 0x01;

        return 1;
}
#endif

/*****************************************************************************
 * JASTEC Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_JASTEC
static int jastec_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = ((pkt[0] & 0x3f) << 6) | (pkt[2] & 0x3f);
        dev->y = ((pkt[1] & 0x3f) << 6) | (pkt[3] & 0x3f);
        dev->touch = (pkt[0] & 0x40) >> 6;

        return 1;
}
#endif

/*****************************************************************************
 * Zytronic Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC
static int zytronic_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        struct usb_interface *intf = dev->interface;

        switch (pkt[0]) {
        case 0x3A: /* command response */
                dev_dbg(&intf->dev, "%s: Command response %d\n", __func__, pkt[1]);
                break;

        case 0xC0: /* down */
                dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7);
                dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7);
                dev->touch = 1;
                dev_dbg(&intf->dev, "%s: down %d,%d\n", __func__, dev->x, dev->y);
                return 1;

        case 0x80: /* up */
                dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7);
                dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7);
                dev->touch = 0;
                dev_dbg(&intf->dev, "%s: up %d,%d\n", __func__, dev->x, dev->y);
                return 1;

        default:
                dev_dbg(&intf->dev, "%s: Unknown return %d\n", __func__, pkt[0]);
                break;
        }

        return 0;
}
#endif

/*****************************************************************************
 * NEXIO Part
 */
#ifdef CONFIG_TOUCHSCREEN_USB_NEXIO

#define NEXIO_TIMEOUT        5000
#define NEXIO_BUFSIZE        1024
#define NEXIO_THRESHOLD        50

struct nexio_priv {
        struct urb *ack;
        unsigned char *ack_buf;
};

struct nexio_touch_packet {
        u8        flags;                /* 0xe1 = touch, 0xe1 = release */
        __be16        data_len;        /* total bytes of touch data */
        __be16        x_len;                /* bytes for X axis */
        __be16        y_len;                /* bytes for Y axis */
        u8        data[];
} __attribute__ ((packed));

static unsigned char nexio_ack_pkt[2] = { 0xaa, 0x02 };
static unsigned char nexio_init_pkt[4] = { 0x82, 0x04, 0x0a, 0x0f };

static void nexio_ack_complete(struct urb *urb)
{
}

static int nexio_alloc(struct usbtouch_usb *usbtouch)
{
        struct nexio_priv *priv;
        int ret = -ENOMEM;

        usbtouch->priv = kmalloc(sizeof(struct nexio_priv), GFP_KERNEL);
        if (!usbtouch->priv)
                goto out_buf;

        priv = usbtouch->priv;

        priv->ack_buf = kmemdup(nexio_ack_pkt, sizeof(nexio_ack_pkt),
                                GFP_KERNEL);
        if (!priv->ack_buf)
                goto err_priv;

        priv->ack = usb_alloc_urb(0, GFP_KERNEL);
        if (!priv->ack) {
                dev_dbg(&usbtouch->interface->dev,
                        "%s - usb_alloc_urb failed: usbtouch->ack\n", __func__);
                goto err_ack_buf;
        }

        return 0;

err_ack_buf:
        kfree(priv->ack_buf);
err_priv:
        kfree(priv);
out_buf:
        return ret;
}

static int nexio_init(struct usbtouch_usb *usbtouch)
{
        struct usb_device *dev = interface_to_usbdev(usbtouch->interface);
        struct usb_host_interface *interface = usbtouch->interface->cur_altsetting;
        struct nexio_priv *priv = usbtouch->priv;
        int ret = -ENOMEM;
        int actual_len, i;
        unsigned char *buf;
        char *firmware_ver = NULL, *device_name = NULL;
        int input_ep = 0, output_ep = 0;

        /* find first input and output endpoint */
        for (i = 0; i < interface->desc.bNumEndpoints; i++) {
                if (!input_ep &&
                    usb_endpoint_dir_in(&interface->endpoint[i].desc))
                        input_ep = interface->endpoint[i].desc.bEndpointAddress;
                if (!output_ep &&
                    usb_endpoint_dir_out(&interface->endpoint[i].desc))
                        output_ep = interface->endpoint[i].desc.bEndpointAddress;
        }
        if (!input_ep || !output_ep)
                return -ENXIO;

        buf = kmalloc(NEXIO_BUFSIZE, GFP_NOIO);
        if (!buf)
                goto out_buf;

        /* two empty reads */
        for (i = 0; i < 2; i++) {
                ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep),
                                   buf, NEXIO_BUFSIZE, &actual_len,
                                   NEXIO_TIMEOUT);
                if (ret < 0)
                        goto out_buf;
        }

        /* send init command */
        memcpy(buf, nexio_init_pkt, sizeof(nexio_init_pkt));
        ret = usb_bulk_msg(dev, usb_sndbulkpipe(dev, output_ep),
                           buf, sizeof(nexio_init_pkt), &actual_len,
                           NEXIO_TIMEOUT);
        if (ret < 0)
                goto out_buf;

        /* read replies */
        for (i = 0; i < 3; i++) {
                memset(buf, 0, NEXIO_BUFSIZE);
                ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep),
                                   buf, NEXIO_BUFSIZE, &actual_len,
                                   NEXIO_TIMEOUT);
                if (ret < 0 || actual_len < 1 || buf[1] != actual_len)
                        continue;
                switch (buf[0]) {
                case 0x83:        /* firmware version */
                        if (!firmware_ver)
                                firmware_ver = kstrdup(&buf[2], GFP_NOIO);
                        break;
                case 0x84:        /* device name */
                        if (!device_name)
                                device_name = kstrdup(&buf[2], GFP_NOIO);
                        break;
                }
        }

        printk(KERN_INFO "Nexio device: %s, firmware version: %s\n",
               device_name, firmware_ver);

        kfree(firmware_ver);
        kfree(device_name);

        usb_fill_bulk_urb(priv->ack, dev, usb_sndbulkpipe(dev, output_ep),
                          priv->ack_buf, sizeof(nexio_ack_pkt),
                          nexio_ack_complete, usbtouch);
        ret = 0;

out_buf:
        kfree(buf);
        return ret;
}

static void nexio_exit(struct usbtouch_usb *usbtouch)
{
        struct nexio_priv *priv = usbtouch->priv;

        usb_kill_urb(priv->ack);
        usb_free_urb(priv->ack);
        kfree(priv->ack_buf);
        kfree(priv);
}

static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt)
{
        struct device *dev = &usbtouch->interface->dev;
        struct nexio_touch_packet *packet = (void *) pkt;
        struct nexio_priv *priv = usbtouch->priv;
        unsigned int data_len = be16_to_cpu(packet->data_len);
        unsigned int x_len = be16_to_cpu(packet->x_len);
        unsigned int y_len = be16_to_cpu(packet->y_len);
        int x, y, begin_x, begin_y, end_x, end_y, w, h, ret;

        /* got touch data? */
        if ((pkt[0] & 0xe0) != 0xe0)
                return 0;

        if (data_len > 0xff)
                data_len -= 0x100;
        if (x_len > 0xff)
                x_len -= 0x80;

        /* send ACK */
        ret = usb_submit_urb(priv->ack, GFP_ATOMIC);
        if (ret)
                dev_warn(dev, "Failed to submit ACK URB: %d\n", ret);

        if (!usbtouch->type->max_xc) {
                usbtouch->type->max_xc = 2 * x_len;
                input_set_abs_params(usbtouch->input, ABS_X,
                                     0, usbtouch->type->max_xc, 0, 0);
                usbtouch->type->max_yc = 2 * y_len;
                input_set_abs_params(usbtouch->input, ABS_Y,
                                     0, usbtouch->type->max_yc, 0, 0);
        }
        /*
         * The device reports state of IR sensors on X and Y axes.
         * Each byte represents "darkness" percentage (0-100) of one element.
         * 17" touchscreen reports only 64 x 52 bytes so the resolution is low.
         * This also means that there's a limited multi-touch capability but
         * it's disabled (and untested) here as there's no X driver for that.
         */
        begin_x = end_x = begin_y = end_y = -1;
        for (x = 0; x < x_len; x++) {
                if (begin_x == -1 && packet->data[x] > NEXIO_THRESHOLD) {
                        begin_x = x;
                        continue;
                }
                if (end_x == -1 && begin_x != -1 && packet->data[x] < NEXIO_THRESHOLD) {
                        end_x = x - 1;
                        for (y = x_len; y < data_len; y++) {
                                if (begin_y == -1 && packet->data[y] > NEXIO_THRESHOLD) {
                                        begin_y = y - x_len;
                                        continue;
                                }
                                if (end_y == -1 &&
                                    begin_y != -1 && packet->data[y] < NEXIO_THRESHOLD) {
                                        end_y = y - 1 - x_len;
                                        w = end_x - begin_x;
                                        h = end_y - begin_y;
#if 0
                                        /* multi-touch */
                                        input_report_abs(usbtouch->input,
                                                    ABS_MT_TOUCH_MAJOR, max(w,h));
                                        input_report_abs(usbtouch->input,
                                                    ABS_MT_TOUCH_MINOR, min(x,h));
                                        input_report_abs(usbtouch->input,
                                                    ABS_MT_POSITION_X, 2*begin_x+w);
                                        input_report_abs(usbtouch->input,
                                                    ABS_MT_POSITION_Y, 2*begin_y+h);
                                        input_report_abs(usbtouch->input,
                                                    ABS_MT_ORIENTATION, w > h);
                                        input_mt_sync(usbtouch->input);
#endif
                                        /* single touch */
                                        usbtouch->x = 2 * begin_x + w;
                                        usbtouch->y = 2 * begin_y + h;
                                        usbtouch->touch = packet->flags & 0x01;
                                        begin_y = end_y = -1;
                                        return 1;
                                }
                        }
                        begin_x = end_x = -1;
                }

        }
        return 0;
}
#endif


/*****************************************************************************
 * ELO part
 */

#ifdef CONFIG_TOUCHSCREEN_USB_ELO

static int elo_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
{
        dev->x = (pkt[3] << 8) | pkt[2];
        dev->y = (pkt[5] << 8) | pkt[4];
        dev->touch = pkt[6] > 0;
        dev->press = pkt[6];

        return 1;
}
#endif


/*****************************************************************************
 * the different device descriptors
 */
#ifdef MULTI_PACKET
static void usbtouch_process_multi(struct usbtouch_usb *usbtouch,
                                   unsigned char *pkt, int len);
#endif

static struct usbtouch_device_info usbtouch_dev_info[] = {
#ifdef CONFIG_TOUCHSCREEN_USB_ELO
        [DEVTYPE_ELO] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .max_press        = 0xff,
                .rept_size        = 8,
                .read_data        = elo_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_EGALAX
        [DEVTYPE_EGALAX] = {
                .min_xc                = 0x0,
                .max_xc                = 0x07ff,
                .min_yc                = 0x0,
                .max_yc                = 0x07ff,
                .rept_size        = 16,
                .process_pkt        = usbtouch_process_multi,
                .get_pkt_len        = egalax_get_pkt_len,
                .read_data        = egalax_read_data,
                .init                = egalax_init,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_PANJIT
        [DEVTYPE_PANJIT] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 8,
                .read_data        = panjit_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_3M
        [DEVTYPE_3M] = {
                .min_xc                = 0x0,
                .max_xc                = 0x4000,
                .min_yc                = 0x0,
                .max_yc                = 0x4000,
                .rept_size        = 11,
                .read_data        = mtouch_read_data,
                .alloc                = mtouch_alloc,
                .init                = mtouch_init,
                .exit                = mtouch_exit,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ITM
        [DEVTYPE_ITM] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .max_press        = 0xff,
                .rept_size        = 8,
                .read_data        = itm_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ETURBO
        [DEVTYPE_ETURBO] = {
                .min_xc                = 0x0,
                .max_xc                = 0x07ff,
                .min_yc                = 0x0,
                .max_yc                = 0x07ff,
                .rept_size        = 8,
                .process_pkt        = usbtouch_process_multi,
                .get_pkt_len        = eturbo_get_pkt_len,
                .read_data        = eturbo_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GUNZE
        [DEVTYPE_GUNZE] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 4,
                .read_data        = gunze_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10
        [DEVTYPE_DMC_TSC10] = {
                .min_xc                = 0x0,
                .max_xc                = 0x03ff,
                .min_yc                = 0x0,
                .max_yc                = 0x03ff,
                .rept_size        = 5,
                .init                = dmc_tsc10_init,
                .read_data        = dmc_tsc10_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH
        [DEVTYPE_IRTOUCH] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 8,
                .read_data        = irtouch_read_data,
        },

        [DEVTYPE_IRTOUCH_HIRES] = {
                .min_xc                = 0x0,
                .max_xc                = 0x7fff,
                .min_yc                = 0x0,
                .max_yc                = 0x7fff,
                .rept_size        = 8,
                .read_data        = irtouch_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK
        [DEVTYPE_IDEALTEK] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 8,
                .process_pkt        = usbtouch_process_multi,
                .get_pkt_len        = idealtek_get_pkt_len,
                .read_data        = idealtek_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
        [DEVTYPE_GENERAL_TOUCH] = {
                .min_xc                = 0x0,
                .max_xc                = 0x7fff,
                .min_yc                = 0x0,
                .max_yc                = 0x7fff,
                .rept_size        = 7,
                .read_data        = general_touch_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
        [DEVTYPE_GOTOP] = {
                .min_xc                = 0x0,
                .max_xc                = 0x03ff,
                .min_yc                = 0x0,
                .max_yc                = 0x03ff,
                .rept_size        = 4,
                .read_data        = gotop_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_JASTEC
        [DEVTYPE_JASTEC] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 4,
                .read_data        = jastec_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_E2I
        [DEVTYPE_E2I] = {
                .min_xc                = 0x0,
                .max_xc                = 0x7fff,
                .min_yc                = 0x0,
                .max_yc                = 0x7fff,
                .rept_size        = 6,
                .init                = e2i_init,
                .read_data        = e2i_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC
        [DEVTYPE_ZYTRONIC] = {
                .min_xc                = 0x0,
                .max_xc                = 0x03ff,
                .min_yc                = 0x0,
                .max_yc                = 0x03ff,
                .rept_size        = 5,
                .read_data        = zytronic_read_data,
                .irq_always     = true,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB
        [DEVTYPE_TC45USB] = {
                .min_xc                = 0x0,
                .max_xc                = 0x0fff,
                .min_yc                = 0x0,
                .max_yc                = 0x0fff,
                .rept_size        = 5,
                .read_data        = tc45usb_read_data,
        },
#endif

#ifdef CONFIG_TOUCHSCREEN_USB_NEXIO
        [DEVTYPE_NEXIO] = {
                .rept_size        = 1024,
                .irq_always        = true,
                .read_data        = nexio_read_data,
                .alloc                = nexio_alloc,
                .init                = nexio_init,
                .exit                = nexio_exit,
        },
#endif
#ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH
        [DEVTYPE_ETOUCH] = {
                .min_xc                = 0x0,
                .max_xc                = 0x07ff,
                .min_yc                = 0x0,
                .max_yc                = 0x07ff,
                .rept_size        = 16,
                .process_pkt        = usbtouch_process_multi,
                .get_pkt_len        = etouch_get_pkt_len,
                .read_data        = etouch_read_data,
        },
#endif
};


/*****************************************************************************
 * Generic Part
 */
static void usbtouch_process_pkt(struct usbtouch_usb *usbtouch,
                                 unsigned char *pkt, int len)
{
        struct usbtouch_device_info *type = usbtouch->type;

        if (!type->read_data(usbtouch, pkt))
                        return;

        input_report_key(usbtouch->input, BTN_TOUCH, usbtouch->touch);

        if (swap_xy) {
                input_report_abs(usbtouch->input, ABS_X, usbtouch->y);
                input_report_abs(usbtouch->input, ABS_Y, usbtouch->x);
        } else {
                input_report_abs(usbtouch->input, ABS_X, usbtouch->x);
                input_report_abs(usbtouch->input, ABS_Y, usbtouch->y);
        }
        if (type->max_press)
                input_report_abs(usbtouch->input, ABS_PRESSURE, usbtouch->press);
        input_sync(usbtouch->input);
}


#ifdef MULTI_PACKET
static void usbtouch_process_multi(struct usbtouch_usb *usbtouch,
                                   unsigned char *pkt, int len)
{
        unsigned char *buffer;
        int pkt_len, pos, buf_len, tmp;

        /* process buffer */
        if (unlikely(usbtouch->buf_len)) {
                /* try to get size */
                pkt_len = usbtouch->type->get_pkt_len(
                                usbtouch->buffer, usbtouch->buf_len);

                /* drop? */
                if (unlikely(!pkt_len))
                        goto out_flush_buf;

                /* need to append -pkt_len bytes before able to get size */
                if (unlikely(pkt_len < 0)) {
                        int append = -pkt_len;
                        if (unlikely(append > len))
                               append = len;
                        if (usbtouch->buf_len + append >= usbtouch->type->rept_size)
                                goto out_flush_buf;
                        memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, append);
                        usbtouch->buf_len += append;

                        pkt_len = usbtouch->type->get_pkt_len(
                                        usbtouch->buffer, usbtouch->buf_len);
                        if (pkt_len < 0)
                                return;
                }

                /* append */
                tmp = pkt_len - usbtouch->buf_len;
                if (usbtouch->buf_len + tmp >= usbtouch->type->rept_size)
                        goto out_flush_buf;
                memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, tmp);
                usbtouch_process_pkt(usbtouch, usbtouch->buffer, pkt_len);

                buffer = pkt + tmp;
                buf_len = len - tmp;
        } else {
                buffer = pkt;
                buf_len = len;
        }

        /* loop over the received packet, process */
        pos = 0;
        while (pos < buf_len) {
                /* get packet len */
                pkt_len = usbtouch->type->get_pkt_len(buffer + pos,
                                                        buf_len - pos);

                /* unknown packet: skip one byte */
                if (unlikely(!pkt_len)) {
                        pos++;
                        continue;
                }

                /* full packet: process */
                if (likely((pkt_len > 0) && (pkt_len <= buf_len - pos))) {
                        usbtouch_process_pkt(usbtouch, buffer + pos, pkt_len);
                } else {
                        /* incomplete packet: save in buffer */
                        memcpy(usbtouch->buffer, buffer + pos, buf_len - pos);
                        usbtouch->buf_len = buf_len - pos;
                        return;
                }
                pos += pkt_len;
        }

out_flush_buf:
        usbtouch->buf_len = 0;
        return;
}
#endif


static void usbtouch_irq(struct urb *urb)
{
        struct usbtouch_usb *usbtouch = urb->context;
        struct device *dev = &usbtouch->interface->dev;
        int retval;

        switch (urb->status) {
        case 0:
                /* success */
                break;
        case -ETIME:
                /* this urb is timing out */
                dev_dbg(dev,
                        "%s - urb timed out - was the device unplugged?\n",
                        __func__);
                return;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
        case -EPIPE:
                /* this urb is terminated, clean up */
                dev_dbg(dev, "%s - urb shutting down with status: %d\n",
                        __func__, urb->status);
                return;
        default:
                dev_dbg(dev, "%s - nonzero urb status received: %d\n",
                        __func__, urb->status);
                goto exit;
        }

        usbtouch->type->process_pkt(usbtouch, usbtouch->data, urb->actual_length);

exit:
        usb_mark_last_busy(interface_to_usbdev(usbtouch->interface));
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(dev, "%s - usb_submit_urb failed with result: %d\n",
                        __func__, retval);
}

static int usbtouch_open(struct input_dev *input)
{
        struct usbtouch_usb *usbtouch = input_get_drvdata(input);
        int r;

        usbtouch->irq->dev = interface_to_usbdev(usbtouch->interface);

        r = usb_autopm_get_interface(usbtouch->interface) ? -EIO : 0;
        if (r < 0)
                goto out;

        mutex_lock(&usbtouch->pm_mutex);
        if (!usbtouch->type->irq_always) {
                if (usb_submit_urb(usbtouch->irq, GFP_KERNEL)) {
                        r = -EIO;
                        goto out_put;
                }
        }

        usbtouch->interface->needs_remote_wakeup = 1;
        usbtouch->is_open = true;
out_put:
        mutex_unlock(&usbtouch->pm_mutex);
        usb_autopm_put_interface(usbtouch->interface);
out:
        return r;
}

static void usbtouch_close(struct input_dev *input)
{
        struct usbtouch_usb *usbtouch = input_get_drvdata(input);
        int r;

        mutex_lock(&usbtouch->pm_mutex);
        if (!usbtouch->type->irq_always)
                usb_kill_urb(usbtouch->irq);
        usbtouch->is_open = false;
        mutex_unlock(&usbtouch->pm_mutex);

        r = usb_autopm_get_interface(usbtouch->interface);
        usbtouch->interface->needs_remote_wakeup = 0;
        if (!r)
                usb_autopm_put_interface(usbtouch->interface);
}

static int usbtouch_suspend
(struct usb_interface *intf, pm_message_t message)
{
        struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);

        usb_kill_urb(usbtouch->irq);

        return 0;
}

static int usbtouch_resume(struct usb_interface *intf)
{
        struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);
        int result = 0;

        mutex_lock(&usbtouch->pm_mutex);
        if (usbtouch->is_open || usbtouch->type->irq_always)
                result = usb_submit_urb(usbtouch->irq, GFP_NOIO);
        mutex_unlock(&usbtouch->pm_mutex);

        return result;
}

static int usbtouch_reset_resume(struct usb_interface *intf)
{
        struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);
        int err = 0;

        /* reinit the device */
        if (usbtouch->type->init) {
                err = usbtouch->type->init(usbtouch);
                if (err) {
                        dev_dbg(&intf->dev,
                                "%s - type->init() failed, err: %d\n",
                                __func__, err);
                        return err;
                }
        }

        /* restart IO if needed */
        mutex_lock(&usbtouch->pm_mutex);
        if (usbtouch->is_open)
                err = usb_submit_urb(usbtouch->irq, GFP_NOIO);
        mutex_unlock(&usbtouch->pm_mutex);

        return err;
}

static void usbtouch_free_buffers(struct usb_device *udev,
                                  struct usbtouch_usb *usbtouch)
{
        usb_free_coherent(udev, usbtouch->data_size,
                          usbtouch->data, usbtouch->data_dma);
        kfree(usbtouch->buffer);
}

static struct usb_endpoint_descriptor *
usbtouch_get_input_endpoint(struct usb_host_interface *interface)
{
        int i;

        for (i = 0; i < interface->desc.bNumEndpoints; i++)
                if (usb_endpoint_dir_in(&interface->endpoint[i].desc))
                        return &interface->endpoint[i].desc;

        return NULL;
}

static int usbtouch_probe(struct usb_interface *intf,
                          const struct usb_device_id *id)
{
        struct usbtouch_usb *usbtouch;
        struct input_dev *input_dev;
        struct usb_endpoint_descriptor *endpoint;
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usbtouch_device_info *type;
        int err = -ENOMEM;

        /* some devices are ignored */
        if (id->driver_info == DEVTYPE_IGNORE)
                return -ENODEV;

        if (id->driver_info >= ARRAY_SIZE(usbtouch_dev_info))
                return -ENODEV;

        endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting);
        if (!endpoint)
                return -ENXIO;

        usbtouch = kzalloc(sizeof(struct usbtouch_usb), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!usbtouch || !input_dev)
                goto out_free;

        mutex_init(&usbtouch->pm_mutex);

        type = &usbtouch_dev_info[id->driver_info];
        usbtouch->type = type;
        if (!type->process_pkt)
                type->process_pkt = usbtouch_process_pkt;

        usbtouch->data_size = type->rept_size;
        if (type->get_pkt_len) {
                /*
                 * When dealing with variable-length packets we should
                 * not request more than wMaxPacketSize bytes at once
                 * as we do not know if there is more data coming or
                 * we filled exactly wMaxPacketSize bytes and there is
                 * nothing else.
                 */
                usbtouch->data_size = min(usbtouch->data_size,
                                          usb_endpoint_maxp(endpoint));
        }

        usbtouch->data = usb_alloc_coherent(udev, usbtouch->data_size,
                                            GFP_KERNEL, &usbtouch->data_dma);
        if (!usbtouch->data)
                goto out_free;

        if (type->get_pkt_len) {
                usbtouch->buffer = kmalloc(type->rept_size, GFP_KERNEL);
                if (!usbtouch->buffer)
                        goto out_free_buffers;
        }

        usbtouch->irq = usb_alloc_urb(0, GFP_KERNEL);
        if (!usbtouch->irq) {
                dev_dbg(&intf->dev,
                        "%s - usb_alloc_urb failed: usbtouch->irq\n", __func__);
                goto out_free_buffers;
        }

        usbtouch->interface = intf;
        usbtouch->input = input_dev;

        if (udev->manufacturer)
                strscpy(usbtouch->name, udev->manufacturer, sizeof(usbtouch->name));

        if (udev->product) {
                if (udev->manufacturer)
                        strlcat(usbtouch->name, " ", sizeof(usbtouch->name));
                strlcat(usbtouch->name, udev->product, sizeof(usbtouch->name));
        }

        if (!strlen(usbtouch->name))
                snprintf(usbtouch->name, sizeof(usbtouch->name),
                        "USB Touchscreen %04x:%04x",
                         le16_to_cpu(udev->descriptor.idVendor),
                         le16_to_cpu(udev->descriptor.idProduct));

        usb_make_path(udev, usbtouch->phys, sizeof(usbtouch->phys));
        strlcat(usbtouch->phys, "/input0", sizeof(usbtouch->phys));

        input_dev->name = usbtouch->name;
        input_dev->phys = usbtouch->phys;
        usb_to_input_id(udev, &input_dev->id);
        input_dev->dev.parent = &intf->dev;

        input_set_drvdata(input_dev, usbtouch);

        input_dev->open = usbtouch_open;
        input_dev->close = usbtouch_close;

        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
        input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0);
        if (type->max_press)
                input_set_abs_params(input_dev, ABS_PRESSURE, type->min_press,
                                     type->max_press, 0, 0);

        if (usb_endpoint_type(endpoint) == USB_ENDPOINT_XFER_INT)
                usb_fill_int_urb(usbtouch->irq, udev,
                         usb_rcvintpipe(udev, endpoint->bEndpointAddress),
                         usbtouch->data, usbtouch->data_size,
                         usbtouch_irq, usbtouch, endpoint->bInterval);
        else
                usb_fill_bulk_urb(usbtouch->irq, udev,
                         usb_rcvbulkpipe(udev, endpoint->bEndpointAddress),
                         usbtouch->data, usbtouch->data_size,
                         usbtouch_irq, usbtouch);

        usbtouch->irq->dev = udev;
        usbtouch->irq->transfer_dma = usbtouch->data_dma;
        usbtouch->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        /* device specific allocations */
        if (type->alloc) {
                err = type->alloc(usbtouch);
                if (err) {
                        dev_dbg(&intf->dev,
                                "%s - type->alloc() failed, err: %d\n",
                                __func__, err);
                        goto out_free_urb;
                }
        }

        /* device specific initialisation*/
        if (type->init) {
                err = type->init(usbtouch);
                if (err) {
                        dev_dbg(&intf->dev,
                                "%s - type->init() failed, err: %d\n",
                                __func__, err);
                        goto out_do_exit;
                }
        }

        err = input_register_device(usbtouch->input);
        if (err) {
                dev_dbg(&intf->dev,
                        "%s - input_register_device failed, err: %d\n",
                        __func__, err);
                goto out_do_exit;
        }

        usb_set_intfdata(intf, usbtouch);

        if (usbtouch->type->irq_always) {
                /* this can't fail */
                usb_autopm_get_interface(intf);
                err = usb_submit_urb(usbtouch->irq, GFP_KERNEL);
                if (err) {
                        usb_autopm_put_interface(intf);
                        dev_err(&intf->dev,
                                "%s - usb_submit_urb failed with result: %d\n",
                                __func__, err);
                        goto out_unregister_input;
                }
        }

        return 0;

out_unregister_input:
        input_unregister_device(input_dev);
        input_dev = NULL;
out_do_exit:
        if (type->exit)
                type->exit(usbtouch);
out_free_urb:
        usb_free_urb(usbtouch->irq);
out_free_buffers:
        usbtouch_free_buffers(udev, usbtouch);
out_free:
        input_free_device(input_dev);
        kfree(usbtouch);
        return err;
}

static void usbtouch_disconnect(struct usb_interface *intf)
{
        struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);

        if (!usbtouch)
                return;

        dev_dbg(&intf->dev,
                "%s - usbtouch is initialized, cleaning up\n", __func__);

        usb_set_intfdata(intf, NULL);
        /* this will stop IO via close */
        input_unregister_device(usbtouch->input);
        usb_free_urb(usbtouch->irq);
        if (usbtouch->type->exit)
                usbtouch->type->exit(usbtouch);
        usbtouch_free_buffers(interface_to_usbdev(intf), usbtouch);
        kfree(usbtouch);
}

MODULE_DEVICE_TABLE(usb, usbtouch_devices);

static struct usb_driver usbtouch_driver = {
        .name                = "usbtouchscreen",
        .probe                = usbtouch_probe,
        .disconnect        = usbtouch_disconnect,
        .suspend        = usbtouch_suspend,
        .resume                = usbtouch_resume,
        .reset_resume        = usbtouch_reset_resume,
        .id_table        = usbtouch_devices,
        .supports_autosuspend = 1,
};

module_usb_driver(usbtouch_driver);

MODULE_AUTHOR("Daniel Ritz <daniel.ritz@gmx.ch>");
MODULE_DESCRIPTION("USB Touchscreen Driver");
MODULE_LICENSE("GPL");

MODULE_ALIAS("touchkitusb");
MODULE_ALIAS("itmtouch");
MODULE_ALIAS("mtouchusb");











































































































































































































































































   14 









   13 





   14 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * net/core/netprio_cgroup.c        Priority Control Group
 *
 * Authors:        Neil Horman <nhorman@tuxdriver.com>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/cgroup.h>
#include <linux/rcupdate.h>
#include <linux/atomic.h>
#include <linux/sched/task.h>

#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/netprio_cgroup.h>

#include <linux/fdtable.h>

/*
 * netprio allocates per-net_device priomap array which is indexed by
 * css->id.  Limiting css ID to 16bits doesn't lose anything.
 */
#define NETPRIO_ID_MAX                USHRT_MAX

#define PRIOMAP_MIN_SZ                128

/*
 * Extend @dev->priomap so that it's large enough to accommodate
 * @target_idx.  @dev->priomap.priomap_len > @target_idx after successful
 * return.  Must be called under rtnl lock.
 */
static int extend_netdev_table(struct net_device *dev, u32 target_idx)
{
        struct netprio_map *old, *new;
        size_t new_sz, new_len;

        /* is the existing priomap large enough? */
        old = rtnl_dereference(dev->priomap);
        if (old && old->priomap_len > target_idx)
                return 0;

        /*
         * Determine the new size.  Let's keep it power-of-two.  We start
         * from PRIOMAP_MIN_SZ and double it until it's large enough to
         * accommodate @target_idx.
         */
        new_sz = PRIOMAP_MIN_SZ;
        while (true) {
                new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
                        sizeof(new->priomap[0]);
                if (new_len > target_idx)
                        break;
                new_sz *= 2;
                /* overflowed? */
                if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
                        return -ENOSPC;
        }

        /* allocate & copy */
        new = kzalloc(new_sz, GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        if (old)
                memcpy(new->priomap, old->priomap,
                       old->priomap_len * sizeof(old->priomap[0]));

        new->priomap_len = new_len;

        /* install the new priomap */
        rcu_assign_pointer(dev->priomap, new);
        if (old)
                kfree_rcu(old, rcu);
        return 0;
}

/**
 * netprio_prio - return the effective netprio of a cgroup-net_device pair
 * @css: css part of the target pair
 * @dev: net_device part of the target pair
 *
 * Should be called under RCU read or rtnl lock.
 */
static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
{
        struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
        int id = css->id;

        if (map && id < map->priomap_len)
                return map->priomap[id];
        return 0;
}

/**
 * netprio_set_prio - set netprio on a cgroup-net_device pair
 * @css: css part of the target pair
 * @dev: net_device part of the target pair
 * @prio: prio to set
 *
 * Set netprio to @prio on @css-@dev pair.  Should be called under rtnl
 * lock and may fail under memory pressure for non-zero @prio.
 */
static int netprio_set_prio(struct cgroup_subsys_state *css,
                            struct net_device *dev, u32 prio)
{
        struct netprio_map *map;
        int id = css->id;
        int ret;

        /* avoid extending priomap for zero writes */
        map = rtnl_dereference(dev->priomap);
        if (!prio && (!map || map->priomap_len <= id))
                return 0;

        ret = extend_netdev_table(dev, id);
        if (ret)
                return ret;

        map = rtnl_dereference(dev->priomap);
        map->priomap[id] = prio;
        return 0;
}

static struct cgroup_subsys_state *
cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
        struct cgroup_subsys_state *css;

        css = kzalloc(sizeof(*css), GFP_KERNEL);
        if (!css)
                return ERR_PTR(-ENOMEM);

        return css;
}

static int cgrp_css_online(struct cgroup_subsys_state *css)
{
        struct cgroup_subsys_state *parent_css = css->parent;
        struct net_device *dev;
        int ret = 0;

        if (css->id > NETPRIO_ID_MAX)
                return -ENOSPC;

        if (!parent_css)
                return 0;

        rtnl_lock();
        /*
         * Inherit prios from the parent.  As all prios are set during
         * onlining, there is no need to clear them on offline.
         */
        for_each_netdev(&init_net, dev) {
                u32 prio = netprio_prio(parent_css, dev);

                ret = netprio_set_prio(css, dev, prio);
                if (ret)
                        break;
        }
        rtnl_unlock();
        return ret;
}

static void cgrp_css_free(struct cgroup_subsys_state *css)
{
        kfree(css);
}

static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
{
        return css->id;
}

static int read_priomap(struct seq_file *sf, void *v)
{
        struct net_device *dev;

        rcu_read_lock();
        for_each_netdev_rcu(&init_net, dev)
                seq_printf(sf, "%s %u\n", dev->name,
                           netprio_prio(seq_css(sf), dev));
        rcu_read_unlock();
        return 0;
}

static ssize_t write_priomap(struct kernfs_open_file *of,
                             char *buf, size_t nbytes, loff_t off)
{
        char devname[IFNAMSIZ + 1];
        struct net_device *dev;
        u32 prio;
        int ret;

        if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
                return -EINVAL;

        dev = dev_get_by_name(&init_net, devname);
        if (!dev)
                return -ENODEV;

        rtnl_lock();

        ret = netprio_set_prio(of_css(of), dev, prio);

        rtnl_unlock();
        dev_put(dev);
        return ret ?: nbytes;
}

static int update_netprio(const void *v, struct file *file, unsigned n)
{
        struct socket *sock = sock_from_file(file);

        if (sock)
                sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
                                        (unsigned long)v);
        return 0;
}

static void net_prio_attach(struct cgroup_taskset *tset)
{
        struct task_struct *p;
        struct cgroup_subsys_state *css;

        cgroup_taskset_for_each(p, css, tset) {
                void *v = (void *)(unsigned long)css->id;

                task_lock(p);
                iterate_fd(p->files, 0, update_netprio, v);
                task_unlock(p);
        }
}

static struct cftype ss_files[] = {
        {
                .name = "prioidx",
                .read_u64 = read_prioidx,
        },
        {
                .name = "ifpriomap",
                .seq_show = read_priomap,
                .write = write_priomap,
        },
        { }        /* terminate */
};

struct cgroup_subsys net_prio_cgrp_subsys = {
        .css_alloc        = cgrp_css_alloc,
        .css_online        = cgrp_css_online,
        .css_free        = cgrp_css_free,
        .attach                = net_prio_attach,
        .legacy_cftypes        = ss_files,
};

static int netprio_device_event(struct notifier_block *unused,
                                unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netprio_map *old;

        /*
         * Note this is called with rtnl_lock held so we have update side
         * protection on our rcu assignments
         */

        switch (event) {
        case NETDEV_UNREGISTER:
                old = rtnl_dereference(dev->priomap);
                RCU_INIT_POINTER(dev->priomap, NULL);
                if (old)
                        kfree_rcu(old, rcu);
                break;
        }
        return NOTIFY_DONE;
}

static struct notifier_block netprio_device_notifier = {
        .notifier_call = netprio_device_event
};

static int __init init_cgroup_netprio(void)
{
        register_netdevice_notifier(&netprio_device_notifier);
        return 0;
}
subsys_initcall(init_cgroup_netprio);




























































































































































































































































































































































    4 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_IRQ_H
#define _LINUX_IRQ_H

/*
 * Please do not include this file in generic code.  There is currently
 * no requirement for any architecture to implement anything held
 * within this file.
 *
 * Thanks. --rmk
 */

#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
#include <linux/irqhandler.h>
#include <linux/irqreturn.h>
#include <linux/irqnr.h>
#include <linux/topology.h>
#include <linux/io.h>
#include <linux/slab.h>

#include <asm/irq.h>
#include <asm/ptrace.h>
#include <asm/irq_regs.h>

struct seq_file;
struct module;
struct msi_msg;
struct irq_affinity_desc;
enum irqchip_irq_state;

/*
 * IRQ line status.
 *
 * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
 *
 * IRQ_TYPE_NONE                - default, unspecified type
 * IRQ_TYPE_EDGE_RISING                - rising edge triggered
 * IRQ_TYPE_EDGE_FALLING        - falling edge triggered
 * IRQ_TYPE_EDGE_BOTH                - rising and falling edge triggered
 * IRQ_TYPE_LEVEL_HIGH                - high level triggered
 * IRQ_TYPE_LEVEL_LOW                - low level triggered
 * IRQ_TYPE_LEVEL_MASK                - Mask to filter out the level bits
 * IRQ_TYPE_SENSE_MASK                - Mask for all the above bits
 * IRQ_TYPE_DEFAULT                - For use by some PICs to ask irq_set_type
 *                                  to setup the HW to a sane default (used
 *                                by irqdomain map() callbacks to synchronize
 *                                the HW state and SW flags for a newly
 *                                allocated descriptor).
 *
 * IRQ_TYPE_PROBE                - Special flag for probing in progress
 *
 * Bits which can be modified via irq_set/clear/modify_status_flags()
 * IRQ_LEVEL                        - Interrupt is level type. Will be also
 *                                  updated in the code when the above trigger
 *                                  bits are modified via irq_set_irq_type()
 * IRQ_PER_CPU                        - Mark an interrupt PER_CPU. Will protect
 *                                  it from affinity setting
 * IRQ_NOPROBE                        - Interrupt cannot be probed by autoprobing
 * IRQ_NOREQUEST                - Interrupt cannot be requested via
 *                                  request_irq()
 * IRQ_NOTHREAD                        - Interrupt cannot be threaded
 * IRQ_NOAUTOEN                        - Interrupt is not automatically enabled in
 *                                  request/setup_irq()
 * IRQ_NO_BALANCING                - Interrupt cannot be balanced (affinity set)
 * IRQ_MOVE_PCNTXT                - Interrupt can be migrated from process context
 * IRQ_NESTED_THREAD                - Interrupt nests into another thread
 * IRQ_PER_CPU_DEVID                - Dev_id is a per-cpu variable
 * IRQ_IS_POLLED                - Always polled by another interrupt. Exclude
 *                                  it from the spurious interrupt detection
 *                                  mechanism and from core side polling.
 * IRQ_DISABLE_UNLAZY                - Disable lazy irq disable
 * IRQ_HIDDEN                        - Don't show up in /proc/interrupts
 * IRQ_NO_DEBUG                        - Exclude from note_interrupt() debugging
 */
enum {
        IRQ_TYPE_NONE                = 0x00000000,
        IRQ_TYPE_EDGE_RISING        = 0x00000001,
        IRQ_TYPE_EDGE_FALLING        = 0x00000002,
        IRQ_TYPE_EDGE_BOTH        = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
        IRQ_TYPE_LEVEL_HIGH        = 0x00000004,
        IRQ_TYPE_LEVEL_LOW        = 0x00000008,
        IRQ_TYPE_LEVEL_MASK        = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
        IRQ_TYPE_SENSE_MASK        = 0x0000000f,
        IRQ_TYPE_DEFAULT        = IRQ_TYPE_SENSE_MASK,

        IRQ_TYPE_PROBE                = 0x00000010,

        IRQ_LEVEL                = (1 <<  8),
        IRQ_PER_CPU                = (1 <<  9),
        IRQ_NOPROBE                = (1 << 10),
        IRQ_NOREQUEST                = (1 << 11),
        IRQ_NOAUTOEN                = (1 << 12),
        IRQ_NO_BALANCING        = (1 << 13),
        IRQ_MOVE_PCNTXT                = (1 << 14),
        IRQ_NESTED_THREAD        = (1 << 15),
        IRQ_NOTHREAD                = (1 << 16),
        IRQ_PER_CPU_DEVID        = (1 << 17),
        IRQ_IS_POLLED                = (1 << 18),
        IRQ_DISABLE_UNLAZY        = (1 << 19),
        IRQ_HIDDEN                = (1 << 20),
        IRQ_NO_DEBUG                = (1 << 21),
};

#define IRQF_MODIFY_MASK        \
        (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
         IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
         IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
         IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN)

#define IRQ_NO_BALANCING_MASK        (IRQ_PER_CPU | IRQ_NO_BALANCING)

/*
 * Return value for chip->irq_set_affinity()
 *
 * IRQ_SET_MASK_OK        - OK, core updates irq_common_data.affinity
 * IRQ_SET_MASK_NOCPY        - OK, chip did update irq_common_data.affinity
 * IRQ_SET_MASK_OK_DONE        - Same as IRQ_SET_MASK_OK for core. Special code to
 *                          support stacked irqchips, which indicates skipping
 *                          all descendant irqchips.
 */
enum {
        IRQ_SET_MASK_OK = 0,
        IRQ_SET_MASK_OK_NOCOPY,
        IRQ_SET_MASK_OK_DONE,
};

struct msi_desc;
struct irq_domain;

/**
 * struct irq_common_data - per irq data shared by all irqchips
 * @state_use_accessors: status information for irq chip functions.
 *                        Use accessor functions to deal with it
 * @node:                node index useful for balancing
 * @handler_data:        per-IRQ data for the irq_chip methods
 * @affinity:                IRQ affinity on SMP. If this is an IPI
 *                        related irq, then this is the mask of the
 *                        CPUs to which an IPI can be sent.
 * @effective_affinity:        The effective IRQ affinity on SMP as some irq
 *                        chips do not allow multi CPU destinations.
 *                        A subset of @affinity.
 * @msi_desc:                MSI descriptor
 * @ipi_offset:                Offset of first IPI target cpu in @affinity. Optional.
 */
struct irq_common_data {
        unsigned int                __private state_use_accessors;
#ifdef CONFIG_NUMA
        unsigned int                node;
#endif
        void                        *handler_data;
        struct msi_desc                *msi_desc;
#ifdef CONFIG_SMP
        cpumask_var_t                affinity;
#endif
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
        cpumask_var_t                effective_affinity;
#endif
#ifdef CONFIG_GENERIC_IRQ_IPI
        unsigned int                ipi_offset;
#endif
};

/**
 * struct irq_data - per irq chip data passed down to chip functions
 * @mask:                precomputed bitmask for accessing the chip registers
 * @irq:                interrupt number
 * @hwirq:                hardware interrupt number, local to the interrupt domain
 * @common:                point to data shared by all irqchips
 * @chip:                low level interrupt hardware access
 * @domain:                Interrupt translation domain; responsible for mapping
 *                        between hwirq number and linux irq number.
 * @parent_data:        pointer to parent struct irq_data to support hierarchy
 *                        irq_domain
 * @chip_data:                platform-specific per-chip private data for the chip
 *                        methods, to allow shared chip implementations
 */
struct irq_data {
        u32                        mask;
        unsigned int                irq;
        irq_hw_number_t                hwirq;
        struct irq_common_data        *common;
        struct irq_chip                *chip;
        struct irq_domain        *domain;
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
        struct irq_data                *parent_data;
#endif
        void                        *chip_data;
};

/*
 * Bit masks for irq_common_data.state_use_accessors
 *
 * IRQD_TRIGGER_MASK                - Mask for the trigger type bits
 * IRQD_SETAFFINITY_PENDING        - Affinity setting is pending
 * IRQD_ACTIVATED                - Interrupt has already been activated
 * IRQD_NO_BALANCING                - Balancing disabled for this IRQ
 * IRQD_PER_CPU                        - Interrupt is per cpu
 * IRQD_AFFINITY_SET                - Interrupt affinity was set
 * IRQD_LEVEL                        - Interrupt is level triggered
 * IRQD_WAKEUP_STATE                - Interrupt is configured for wakeup
 *                                  from suspend
 * IRQD_MOVE_PCNTXT                - Interrupt can be moved in process
 *                                  context
 * IRQD_IRQ_DISABLED                - Disabled state of the interrupt
 * IRQD_IRQ_MASKED                - Masked state of the interrupt
 * IRQD_IRQ_INPROGRESS                - In progress state of the interrupt
 * IRQD_WAKEUP_ARMED                - Wakeup mode armed
 * IRQD_FORWARDED_TO_VCPU        - The interrupt is forwarded to a VCPU
 * IRQD_AFFINITY_MANAGED        - Affinity is auto-managed by the kernel
 * IRQD_IRQ_STARTED                - Startup state of the interrupt
 * IRQD_MANAGED_SHUTDOWN        - Interrupt was shutdown due to empty affinity
 *                                  mask. Applies only to affinity managed irqs.
 * IRQD_SINGLE_TARGET                - IRQ allows only a single affinity target
 * IRQD_DEFAULT_TRIGGER_SET        - Expected trigger already been set
 * IRQD_CAN_RESERVE                - Can use reservation mode
 * IRQD_HANDLE_ENFORCE_IRQCTX        - Enforce that handle_irq_*() is only invoked
 *                                  from actual interrupt context.
 * IRQD_AFFINITY_ON_ACTIVATE        - Affinity is set on activation. Don't call
 *                                  irq_chip::irq_set_affinity() when deactivated.
 * IRQD_IRQ_ENABLED_ON_SUSPEND        - Interrupt is enabled on suspend by irq pm if
 *                                  irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set.
 * IRQD_RESEND_WHEN_IN_PROGRESS        - Interrupt may fire when already in progress in which
 *                                  case it must be resent at the next available opportunity.
 */
enum {
        IRQD_TRIGGER_MASK                = 0xf,
        IRQD_SETAFFINITY_PENDING        = BIT(8),
        IRQD_ACTIVATED                        = BIT(9),
        IRQD_NO_BALANCING                = BIT(10),
        IRQD_PER_CPU                        = BIT(11),
        IRQD_AFFINITY_SET                = BIT(12),
        IRQD_LEVEL                        = BIT(13),
        IRQD_WAKEUP_STATE                = BIT(14),
        IRQD_MOVE_PCNTXT                = BIT(15),
        IRQD_IRQ_DISABLED                = BIT(16),
        IRQD_IRQ_MASKED                        = BIT(17),
        IRQD_IRQ_INPROGRESS                = BIT(18),
        IRQD_WAKEUP_ARMED                = BIT(19),
        IRQD_FORWARDED_TO_VCPU                = BIT(20),
        IRQD_AFFINITY_MANAGED                = BIT(21),
        IRQD_IRQ_STARTED                = BIT(22),
        IRQD_MANAGED_SHUTDOWN                = BIT(23),
        IRQD_SINGLE_TARGET                = BIT(24),
        IRQD_DEFAULT_TRIGGER_SET        = BIT(25),
        IRQD_CAN_RESERVE                = BIT(26),
        IRQD_HANDLE_ENFORCE_IRQCTX        = BIT(27),
        IRQD_AFFINITY_ON_ACTIVATE        = BIT(28),
        IRQD_IRQ_ENABLED_ON_SUSPEND        = BIT(29),
        IRQD_RESEND_WHEN_IN_PROGRESS    = BIT(30),
};

#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)

static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING;
}

static inline bool irqd_is_per_cpu(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_PER_CPU;
}

static inline bool irqd_can_balance(struct irq_data *d)
{
        return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING));
}

static inline bool irqd_affinity_was_set(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_AFFINITY_SET;
}

static inline void irqd_mark_affinity_was_set(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_AFFINITY_SET;
}

static inline bool irqd_trigger_type_was_set(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_DEFAULT_TRIGGER_SET;
}

static inline u32 irqd_get_trigger_type(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_TRIGGER_MASK;
}

/*
 * Must only be called inside irq_chip.irq_set_type() functions or
 * from the DT/ACPI setup code.
 */
static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
{
        __irqd_to_state(d) &= ~IRQD_TRIGGER_MASK;
        __irqd_to_state(d) |= type & IRQD_TRIGGER_MASK;
        __irqd_to_state(d) |= IRQD_DEFAULT_TRIGGER_SET;
}

static inline bool irqd_is_level_type(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_LEVEL;
}

/*
 * Must only be called of irqchip.irq_set_affinity() or low level
 * hierarchy domain allocation functions.
 */
static inline void irqd_set_single_target(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_SINGLE_TARGET;
}

static inline bool irqd_is_single_target(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_SINGLE_TARGET;
}

static inline void irqd_set_handle_enforce_irqctx(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_HANDLE_ENFORCE_IRQCTX;
}

static inline bool irqd_is_handle_enforce_irqctx(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_HANDLE_ENFORCE_IRQCTX;
}

static inline bool irqd_is_enabled_on_suspend(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_IRQ_ENABLED_ON_SUSPEND;
}

static inline bool irqd_is_wakeup_set(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
}

static inline bool irqd_can_move_in_process_context(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_MOVE_PCNTXT;
}

static inline bool irqd_irq_disabled(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_IRQ_DISABLED;
}

static inline bool irqd_irq_masked(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_IRQ_MASKED;
}

static inline bool irqd_irq_inprogress(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_IRQ_INPROGRESS;
}

static inline bool irqd_is_wakeup_armed(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_WAKEUP_ARMED;
}

static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU;
}

static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU;
}

static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
{
        __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
}

static inline bool irqd_affinity_is_managed(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
}

static inline bool irqd_is_activated(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_ACTIVATED;
}

static inline void irqd_set_activated(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_ACTIVATED;
}

static inline void irqd_clr_activated(struct irq_data *d)
{
        __irqd_to_state(d) &= ~IRQD_ACTIVATED;
}

static inline bool irqd_is_started(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_IRQ_STARTED;
}

static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
}

static inline void irqd_set_can_reserve(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_CAN_RESERVE;
}

static inline void irqd_clr_can_reserve(struct irq_data *d)
{
        __irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
}

static inline bool irqd_can_reserve(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_CAN_RESERVE;
}

static inline void irqd_set_affinity_on_activate(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
}

static inline bool irqd_affinity_on_activate(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
}

static inline void irqd_set_resend_when_in_progress(struct irq_data *d)
{
        __irqd_to_state(d) |= IRQD_RESEND_WHEN_IN_PROGRESS;
}

static inline bool irqd_needs_resend_when_in_progress(struct irq_data *d)
{
        return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS;
}

#undef __irqd_to_state

static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
{
        return d->hwirq;
}

/**
 * struct irq_chip - hardware interrupt chip descriptor
 *
 * @name:                name for /proc/interrupts
 * @irq_startup:        start up the interrupt (defaults to ->enable if NULL)
 * @irq_shutdown:        shut down the interrupt (defaults to ->disable if NULL)
 * @irq_enable:                enable the interrupt (defaults to chip->unmask if NULL)
 * @irq_disable:        disable the interrupt
 * @irq_ack:                start of a new interrupt
 * @irq_mask:                mask an interrupt source
 * @irq_mask_ack:        ack and mask an interrupt source
 * @irq_unmask:                unmask an interrupt source
 * @irq_eoi:                end of interrupt
 * @irq_set_affinity:        Set the CPU affinity on SMP machines. If the force
 *                        argument is true, it tells the driver to
 *                        unconditionally apply the affinity setting. Sanity
 *                        checks against the supplied affinity mask are not
 *                        required. This is used for CPU hotplug where the
 *                        target CPU is not yet set in the cpu_online_mask.
 * @irq_retrigger:        resend an IRQ to the CPU
 * @irq_set_type:        set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
 * @irq_set_wake:        enable/disable power-management wake-on of an IRQ
 * @irq_bus_lock:        function to lock access to slow bus (i2c) chips
 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
 * @irq_cpu_online:        configure an interrupt source for a secondary CPU
 * @irq_cpu_offline:        un-configure an interrupt source for a secondary CPU
 * @irq_suspend:        function called from core code on suspend once per
 *                        chip, when one or more interrupts are installed
 * @irq_resume:                function called from core code on resume once per chip,
 *                        when one ore more interrupts are installed
 * @irq_pm_shutdown:        function called from core code on shutdown once per chip
 * @irq_calc_mask:        Optional function to set irq_data.mask for special cases
 * @irq_print_chip:        optional to print special chip info in show_interrupts
 * @irq_request_resources:        optional to request resources before calling
 *                                any other callback related to this irq
 * @irq_release_resources:        optional to release resources acquired with
 *                                irq_request_resources
 * @irq_compose_msi_msg:        optional to compose message content for MSI
 * @irq_write_msi_msg:        optional to write message content for MSI
 * @irq_get_irqchip_state:        return the internal state of an interrupt
 * @irq_set_irqchip_state:        set the internal state of a interrupt
 * @irq_set_vcpu_affinity:        optional to target a vCPU in a virtual machine
 * @ipi_send_single:        send a single IPI to destination cpus
 * @ipi_send_mask:        send an IPI to destination cpus in cpumask
 * @irq_nmi_setup:        function called from core code before enabling an NMI
 * @irq_nmi_teardown:        function called from core code after disabling an NMI
 * @flags:                chip specific flags
 */
struct irq_chip {
        const char        *name;
        unsigned int        (*irq_startup)(struct irq_data *data);
        void                (*irq_shutdown)(struct irq_data *data);
        void                (*irq_enable)(struct irq_data *data);
        void                (*irq_disable)(struct irq_data *data);

        void                (*irq_ack)(struct irq_data *data);
        void                (*irq_mask)(struct irq_data *data);
        void                (*irq_mask_ack)(struct irq_data *data);
        void                (*irq_unmask)(struct irq_data *data);
        void                (*irq_eoi)(struct irq_data *data);

        int                (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
        int                (*irq_retrigger)(struct irq_data *data);
        int                (*irq_set_type)(struct irq_data *data, unsigned int flow_type);
        int                (*irq_set_wake)(struct irq_data *data, unsigned int on);

        void                (*irq_bus_lock)(struct irq_data *data);
        void                (*irq_bus_sync_unlock)(struct irq_data *data);

#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
        void                (*irq_cpu_online)(struct irq_data *data);
        void                (*irq_cpu_offline)(struct irq_data *data);
#endif
        void                (*irq_suspend)(struct irq_data *data);
        void                (*irq_resume)(struct irq_data *data);
        void                (*irq_pm_shutdown)(struct irq_data *data);

        void                (*irq_calc_mask)(struct irq_data *data);

        void                (*irq_print_chip)(struct irq_data *data, struct seq_file *p);
        int                (*irq_request_resources)(struct irq_data *data);
        void                (*irq_release_resources)(struct irq_data *data);

        void                (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
        void                (*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);

        int                (*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
        int                (*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);

        int                (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);

        void                (*ipi_send_single)(struct irq_data *data, unsigned int cpu);
        void                (*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);

        int                (*irq_nmi_setup)(struct irq_data *data);
        void                (*irq_nmi_teardown)(struct irq_data *data);

        unsigned long        flags;
};

/*
 * irq_chip specific flags
 *
 * IRQCHIP_SET_TYPE_MASKED:           Mask before calling chip.irq_set_type()
 * IRQCHIP_EOI_IF_HANDLED:            Only issue irq_eoi() when irq was handled
 * IRQCHIP_MASK_ON_SUSPEND:           Mask non wake irqs in the suspend path
 * IRQCHIP_ONOFFLINE_ENABLED:         Only call irq_on/off_line callbacks
 *                                    when irq enabled
 * IRQCHIP_SKIP_SET_WAKE:             Skip chip.irq_set_wake(), for this irq chip
 * IRQCHIP_ONESHOT_SAFE:              One shot does not require mask/unmask
 * IRQCHIP_EOI_THREADED:              Chip requires eoi() on unmask in threaded mode
 * IRQCHIP_SUPPORTS_LEVEL_MSI:        Chip can provide two doorbells for Level MSIs
 * IRQCHIP_SUPPORTS_NMI:              Chip can deliver NMIs, only for root irqchips
 * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND:  Invokes __enable_irq()/__disable_irq() for wake irqs
 *                                    in the suspend path if they are in disabled state
 * IRQCHIP_AFFINITY_PRE_STARTUP:      Default affinity update before startup
 * IRQCHIP_IMMUTABLE:                      Don't ever change anything in this chip
 */
enum {
        IRQCHIP_SET_TYPE_MASKED                        = (1 <<  0),
        IRQCHIP_EOI_IF_HANDLED                        = (1 <<  1),
        IRQCHIP_MASK_ON_SUSPEND                        = (1 <<  2),
        IRQCHIP_ONOFFLINE_ENABLED                = (1 <<  3),
        IRQCHIP_SKIP_SET_WAKE                        = (1 <<  4),
        IRQCHIP_ONESHOT_SAFE                        = (1 <<  5),
        IRQCHIP_EOI_THREADED                        = (1 <<  6),
        IRQCHIP_SUPPORTS_LEVEL_MSI                = (1 <<  7),
        IRQCHIP_SUPPORTS_NMI                        = (1 <<  8),
        IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND        = (1 <<  9),
        IRQCHIP_AFFINITY_PRE_STARTUP                = (1 << 10),
        IRQCHIP_IMMUTABLE                        = (1 << 11),
};

#include <linux/irqdesc.h>

/*
 * Pick up the arch-dependent methods:
 */
#include <asm/hw_irq.h>

#ifndef NR_IRQS_LEGACY
# define NR_IRQS_LEGACY 0
#endif

#ifndef ARCH_IRQ_INIT_FLAGS
# define ARCH_IRQ_INIT_FLAGS        0
#endif

#define IRQ_DEFAULT_INIT_FLAGS        ARCH_IRQ_INIT_FLAGS

struct irqaction;
extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);

#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
extern void irq_cpu_online(void);
extern void irq_cpu_offline(void);
#endif
extern int irq_set_affinity_locked(struct irq_data *data,
                                   const struct cpumask *cpumask, bool force);
extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);

#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
extern void irq_migrate_all_off_this_cpu(void);
extern int irq_affinity_online_cpu(unsigned int cpu);
#else
# define irq_affinity_online_cpu        NULL
#endif

#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
void __irq_move_irq(struct irq_data *data);
static inline void irq_move_irq(struct irq_data *data)
{
        if (unlikely(irqd_is_setaffinity_pending(data)))
                __irq_move_irq(data);
}
void irq_move_masked_irq(struct irq_data *data);
void irq_force_complete_move(struct irq_desc *desc);
#else
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif

extern int no_irq_affinity;

#ifdef CONFIG_HARDIRQS_SW_RESEND
int irq_set_parent(int irq, int parent_irq);
#else
static inline int irq_set_parent(int irq, int parent_irq)
{
        return 0;
}
#endif

/*
 * Built-in IRQ handlers for various IRQ types,
 * callable via desc->handle_irq()
 */
extern void handle_level_irq(struct irq_desc *desc);
extern void handle_fasteoi_irq(struct irq_desc *desc);
extern void handle_edge_irq(struct irq_desc *desc);
extern void handle_edge_eoi_irq(struct irq_desc *desc);
extern void handle_simple_irq(struct irq_desc *desc);
extern void handle_untracked_irq(struct irq_desc *desc);
extern void handle_percpu_irq(struct irq_desc *desc);
extern void handle_percpu_devid_irq(struct irq_desc *desc);
extern void handle_bad_irq(struct irq_desc *desc);
extern void handle_nested_irq(unsigned int irq);

extern void handle_fasteoi_nmi(struct irq_desc *desc);
extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc);

extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
extern int irq_chip_pm_get(struct irq_data *data);
extern int irq_chip_pm_put(struct irq_data *data);
#ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
extern void handle_fasteoi_ack_irq(struct irq_desc *desc);
extern void handle_fasteoi_mask_irq(struct irq_desc *desc);
extern int irq_chip_set_parent_state(struct irq_data *data,
                                     enum irqchip_irq_state which,
                                     bool val);
extern int irq_chip_get_parent_state(struct irq_data *data,
                                     enum irqchip_irq_state which,
                                     bool *state);
extern void irq_chip_enable_parent(struct irq_data *data);
extern void irq_chip_disable_parent(struct irq_data *data);
extern void irq_chip_ack_parent(struct irq_data *data);
extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
extern void irq_chip_mask_parent(struct irq_data *data);
extern void irq_chip_mask_ack_parent(struct irq_data *data);
extern void irq_chip_unmask_parent(struct irq_data *data);
extern void irq_chip_eoi_parent(struct irq_data *data);
extern int irq_chip_set_affinity_parent(struct irq_data *data,
                                        const struct cpumask *dest,
                                        bool force);
extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on);
extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
                                             void *vcpu_info);
extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type);
extern int irq_chip_request_resources_parent(struct irq_data *data);
extern void irq_chip_release_resources_parent(struct irq_data *data);
#endif

/* Handling of unhandled and spurious interrupts: */
extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);


/* Enable/disable irq debugging output: */
extern int noirqdebug_setup(char *str);

/* Checks whether the interrupt can be requested by request_irq(): */
extern int can_request_irq(unsigned int irq, unsigned long irqflags);

/* Dummy irq-chip implementations: */
extern struct irq_chip no_irq_chip;
extern struct irq_chip dummy_irq_chip;

extern void
irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
                              irq_flow_handler_t handle, const char *name);

static inline void irq_set_chip_and_handler(unsigned int irq,
                                            const struct irq_chip *chip,
                                            irq_flow_handler_t handle)
{
        irq_set_chip_and_handler_name(irq, chip, handle, NULL);
}

extern int irq_set_percpu_devid(unsigned int irq);
extern int irq_set_percpu_devid_partition(unsigned int irq,
                                          const struct cpumask *affinity);
extern int irq_get_percpu_devid_partition(unsigned int irq,
                                          struct cpumask *affinity);

extern void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
                  const char *name);

static inline void
irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
{
        __irq_set_handler(irq, handle, 0, NULL);
}

/*
 * Set a highlevel chained flow handler for a given IRQ.
 * (a chained handler is automatically enabled and set to
 *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
 */
static inline void
irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
{
        __irq_set_handler(irq, handle, 1, NULL);
}

/*
 * Set a highlevel chained flow handler and its data for a given IRQ.
 * (a chained handler is automatically enabled and set to
 *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
 */
void
irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
                                 void *data);

void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);

static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
{
        irq_modify_status(irq, 0, set);
}

static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
{
        irq_modify_status(irq, clr, 0);
}

static inline void irq_set_noprobe(unsigned int irq)
{
        irq_modify_status(irq, 0, IRQ_NOPROBE);
}

static inline void irq_set_probe(unsigned int irq)
{
        irq_modify_status(irq, IRQ_NOPROBE, 0);
}

static inline void irq_set_nothread(unsigned int irq)
{
        irq_modify_status(irq, 0, IRQ_NOTHREAD);
}

static inline void irq_set_thread(unsigned int irq)
{
        irq_modify_status(irq, IRQ_NOTHREAD, 0);
}

static inline void irq_set_nested_thread(unsigned int irq, bool nest)
{
        if (nest)
                irq_set_status_flags(irq, IRQ_NESTED_THREAD);
        else
                irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
}

static inline void irq_set_percpu_devid_flags(unsigned int irq)
{
        irq_set_status_flags(irq,
                             IRQ_NOAUTOEN | IRQ_PER_CPU | IRQ_NOTHREAD |
                             IRQ_NOPROBE | IRQ_PER_CPU_DEVID);
}

/* Set/get chip/data for an IRQ: */
extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip);
extern int irq_set_handler_data(unsigned int irq, void *data);
extern int irq_set_chip_data(unsigned int irq, void *data);
extern int irq_set_irq_type(unsigned int irq, unsigned int type);
extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
                                struct msi_desc *entry);
extern struct irq_data *irq_get_irq_data(unsigned int irq);

static inline struct irq_chip *irq_get_chip(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);
        return d ? d->chip : NULL;
}

static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
{
        return d->chip;
}

static inline void *irq_get_chip_data(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);
        return d ? d->chip_data : NULL;
}

static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
{
        return d->chip_data;
}

static inline void *irq_get_handler_data(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);
        return d ? d->common->handler_data : NULL;
}

static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
{
        return d->common->handler_data;
}

static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);
        return d ? d->common->msi_desc : NULL;
}

static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d)
{
        return d->common->msi_desc;
}

static inline u32 irq_get_trigger_type(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);
        return d ? irqd_get_trigger_type(d) : 0;
}

static inline int irq_common_data_get_node(struct irq_common_data *d)
{
#ifdef CONFIG_NUMA
        return d->node;
#else
        return 0;
#endif
}

static inline int irq_data_get_node(struct irq_data *d)
{
        return irq_common_data_get_node(d->common);
}

static inline
const struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
{
#ifdef CONFIG_SMP
        return d->common->affinity;
#else
        return cpumask_of(0);
#endif
}

static inline void irq_data_update_affinity(struct irq_data *d,
                                            const struct cpumask *m)
{
#ifdef CONFIG_SMP
        cpumask_copy(d->common->affinity, m);
#endif
}

static inline const struct cpumask *irq_get_affinity_mask(int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);

        return d ? irq_data_get_affinity_mask(d) : NULL;
}

#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static inline
const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
{
        return d->common->effective_affinity;
}
static inline void irq_data_update_effective_affinity(struct irq_data *d,
                                                      const struct cpumask *m)
{
        cpumask_copy(d->common->effective_affinity, m);
}
#else
static inline void irq_data_update_effective_affinity(struct irq_data *d,
                                                      const struct cpumask *m)
{
}
static inline
const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
{
        return irq_data_get_affinity_mask(d);
}
#endif

static inline
const struct cpumask *irq_get_effective_affinity_mask(unsigned int irq)
{
        struct irq_data *d = irq_get_irq_data(irq);

        return d ? irq_data_get_effective_affinity_mask(d) : NULL;
}

unsigned int arch_dynirq_lower_bound(unsigned int from);

int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
                      struct module *owner,
                      const struct irq_affinity_desc *affinity);

int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
                           unsigned int cnt, int node, struct module *owner,
                           const struct irq_affinity_desc *affinity);

/* use macros to avoid needing export.h for THIS_MODULE */
#define irq_alloc_descs(irq, from, cnt, node)        \
        __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL)

#define irq_alloc_desc(node)                        \
        irq_alloc_descs(-1, 1, 1, node)

#define irq_alloc_desc_at(at, node)                \
        irq_alloc_descs(at, at, 1, node)

#define irq_alloc_desc_from(from, node)                \
        irq_alloc_descs(-1, from, 1, node)

#define irq_alloc_descs_from(from, cnt, node)        \
        irq_alloc_descs(-1, from, cnt, node)

#define devm_irq_alloc_descs(dev, irq, from, cnt, node)                \
        __devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL)

#define devm_irq_alloc_desc(dev, node)                                \
        devm_irq_alloc_descs(dev, -1, 1, 1, node)

#define devm_irq_alloc_desc_at(dev, at, node)                        \
        devm_irq_alloc_descs(dev, at, at, 1, node)

#define devm_irq_alloc_desc_from(dev, from, node)                \
        devm_irq_alloc_descs(dev, -1, from, 1, node)

#define devm_irq_alloc_descs_from(dev, from, cnt, node)                \
        devm_irq_alloc_descs(dev, -1, from, cnt, node)

void irq_free_descs(unsigned int irq, unsigned int cnt);
static inline void irq_free_desc(unsigned int irq)
{
        irq_free_descs(irq, 1);
}

#ifdef CONFIG_GENERIC_IRQ_LEGACY
void irq_init_desc(unsigned int irq);
#endif

/**
 * struct irq_chip_regs - register offsets for struct irq_gci
 * @enable:        Enable register offset to reg_base
 * @disable:        Disable register offset to reg_base
 * @mask:        Mask register offset to reg_base
 * @ack:        Ack register offset to reg_base
 * @eoi:        Eoi register offset to reg_base
 * @type:        Type configuration register offset to reg_base
 * @polarity:        Polarity configuration register offset to reg_base
 */
struct irq_chip_regs {
        unsigned long                enable;
        unsigned long                disable;
        unsigned long                mask;
        unsigned long                ack;
        unsigned long                eoi;
        unsigned long                type;
        unsigned long                polarity;
};

/**
 * struct irq_chip_type - Generic interrupt chip instance for a flow type
 * @chip:                The real interrupt chip which provides the callbacks
 * @regs:                Register offsets for this chip
 * @handler:                Flow handler associated with this chip
 * @type:                Chip can handle these flow types
 * @mask_cache_priv:        Cached mask register private to the chip type
 * @mask_cache:                Pointer to cached mask register
 *
 * A irq_generic_chip can have several instances of irq_chip_type when
 * it requires different functions and register offsets for different
 * flow types.
 */
struct irq_chip_type {
        struct irq_chip                chip;
        struct irq_chip_regs        regs;
        irq_flow_handler_t        handler;
        u32                        type;
        u32                        mask_cache_priv;
        u32                        *mask_cache;
};

/**
 * struct irq_chip_generic - Generic irq chip data structure
 * @lock:                Lock to protect register and cache data access
 * @reg_base:                Register base address (virtual)
 * @reg_readl:                Alternate I/O accessor (defaults to readl if NULL)
 * @reg_writel:                Alternate I/O accessor (defaults to writel if NULL)
 * @suspend:                Function called from core code on suspend once per
 *                        chip; can be useful instead of irq_chip::suspend to
 *                        handle chip details even when no interrupts are in use
 * @resume:                Function called from core code on resume once per chip;
 *                        can be useful instead of irq_chip::suspend to handle
 *                        chip details even when no interrupts are in use
 * @irq_base:                Interrupt base nr for this chip
 * @irq_cnt:                Number of interrupts handled by this chip
 * @mask_cache:                Cached mask register shared between all chip types
 * @type_cache:                Cached type register
 * @polarity_cache:        Cached polarity register
 * @wake_enabled:        Interrupt can wakeup from suspend
 * @wake_active:        Interrupt is marked as an wakeup from suspend source
 * @num_ct:                Number of available irq_chip_type instances (usually 1)
 * @private:                Private data for non generic chip callbacks
 * @installed:                bitfield to denote installed interrupts
 * @unused:                bitfield to denote unused interrupts
 * @domain:                irq domain pointer
 * @list:                List head for keeping track of instances
 * @chip_types:                Array of interrupt irq_chip_types
 *
 * Note, that irq_chip_generic can have multiple irq_chip_type
 * implementations which can be associated to a particular irq line of
 * an irq_chip_generic instance. That allows to share and protect
 * state in an irq_chip_generic instance when we need to implement
 * different flow mechanisms (level/edge) for it.
 */
struct irq_chip_generic {
        raw_spinlock_t                lock;
        void __iomem                *reg_base;
        u32                        (*reg_readl)(void __iomem *addr);
        void                        (*reg_writel)(u32 val, void __iomem *addr);
        void                        (*suspend)(struct irq_chip_generic *gc);
        void                        (*resume)(struct irq_chip_generic *gc);
        unsigned int                irq_base;
        unsigned int                irq_cnt;
        u32                        mask_cache;
        u32                        type_cache;
        u32                        polarity_cache;
        u32                        wake_enabled;
        u32                        wake_active;
        unsigned int                num_ct;
        void                        *private;
        unsigned long                installed;
        unsigned long                unused;
        struct irq_domain        *domain;
        struct list_head        list;
        struct irq_chip_type        chip_types[];
};

/**
 * enum irq_gc_flags - Initialization flags for generic irq chips
 * @IRQ_GC_INIT_MASK_CACHE:        Initialize the mask_cache by reading mask reg
 * @IRQ_GC_INIT_NESTED_LOCK:        Set the lock class of the irqs to nested for
 *                                irq chips which need to call irq_set_wake() on
 *                                the parent irq. Usually GPIO implementations
 * @IRQ_GC_MASK_CACHE_PER_TYPE:        Mask cache is chip type private
 * @IRQ_GC_NO_MASK:                Do not calculate irq_data->mask
 * @IRQ_GC_BE_IO:                Use big-endian register accesses (default: LE)
 */
enum irq_gc_flags {
        IRQ_GC_INIT_MASK_CACHE                = 1 << 0,
        IRQ_GC_INIT_NESTED_LOCK                = 1 << 1,
        IRQ_GC_MASK_CACHE_PER_TYPE        = 1 << 2,
        IRQ_GC_NO_MASK                        = 1 << 3,
        IRQ_GC_BE_IO                        = 1 << 4,
};

/*
 * struct irq_domain_chip_generic - Generic irq chip data structure for irq domains
 * @irqs_per_chip:        Number of interrupts per chip
 * @num_chips:                Number of chips
 * @irq_flags_to_set:        IRQ* flags to set on irq setup
 * @irq_flags_to_clear:        IRQ* flags to clear on irq setup
 * @gc_flags:                Generic chip specific setup flags
 * @gc:                        Array of pointers to generic interrupt chips
 */
struct irq_domain_chip_generic {
        unsigned int                irqs_per_chip;
        unsigned int                num_chips;
        unsigned int                irq_flags_to_clear;
        unsigned int                irq_flags_to_set;
        enum irq_gc_flags        gc_flags;
        struct irq_chip_generic        *gc[];
};

/* Generic chip callback functions */
void irq_gc_noop(struct irq_data *d);
void irq_gc_mask_disable_reg(struct irq_data *d);
void irq_gc_mask_set_bit(struct irq_data *d);
void irq_gc_mask_clr_bit(struct irq_data *d);
void irq_gc_unmask_enable_reg(struct irq_data *d);
void irq_gc_ack_set_bit(struct irq_data *d);
void irq_gc_ack_clr_bit(struct irq_data *d);
void irq_gc_mask_disable_and_ack_set(struct irq_data *d);
void irq_gc_eoi(struct irq_data *d);
int irq_gc_set_wake(struct irq_data *d, unsigned int on);

/* Setup functions for irq_chip_generic */
int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
                         irq_hw_number_t hw_irq);
void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq);
struct irq_chip_generic *
irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
                       void __iomem *reg_base, irq_flow_handler_t handler);
void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
                            enum irq_gc_flags flags, unsigned int clr,
                            unsigned int set);
int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
                             unsigned int clr, unsigned int set);

struct irq_chip_generic *
devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
                            unsigned int irq_base, void __iomem *reg_base,
                            irq_flow_handler_t handler);
int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
                                u32 msk, enum irq_gc_flags flags,
                                unsigned int clr, unsigned int set);

struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);

int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
                                     int num_ct, const char *name,
                                     irq_flow_handler_t handler,
                                     unsigned int clr, unsigned int set,
                                     enum irq_gc_flags flags);

#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,        \
                                       handler,        clr, set, flags)        \
({                                                                        \
        MAYBE_BUILD_BUG_ON(irqs_per_chip > 32);                                \
        __irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\
                                         handler, clr, set, flags);        \
})

static inline void irq_free_generic_chip(struct irq_chip_generic *gc)
{
        kfree(gc);
}

static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc,
                                            u32 msk, unsigned int clr,
                                            unsigned int set)
{
        irq_remove_generic_chip(gc, msk, clr, set);
        irq_free_generic_chip(gc);
}

static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
{
        return container_of(d->chip, struct irq_chip_type, chip);
}

#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)

#ifdef CONFIG_SMP
static inline void irq_gc_lock(struct irq_chip_generic *gc)
{
        raw_spin_lock(&gc->lock);
}

static inline void irq_gc_unlock(struct irq_chip_generic *gc)
{
        raw_spin_unlock(&gc->lock);
}
#else
static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
#endif

/*
 * The irqsave variants are for usage in non interrupt code. Do not use
 * them in irq_chip callbacks. Use irq_gc_lock() instead.
 */
#define irq_gc_lock_irqsave(gc, flags)        \
        raw_spin_lock_irqsave(&(gc)->lock, flags)

#define irq_gc_unlock_irqrestore(gc, flags)        \
        raw_spin_unlock_irqrestore(&(gc)->lock, flags)

static inline void irq_reg_writel(struct irq_chip_generic *gc,
                                  u32 val, int reg_offset)
{
        if (gc->reg_writel)
                gc->reg_writel(val, gc->reg_base + reg_offset);
        else
                writel(val, gc->reg_base + reg_offset);
}

static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
                                int reg_offset)
{
        if (gc->reg_readl)
                return gc->reg_readl(gc->reg_base + reg_offset);
        else
                return readl(gc->reg_base + reg_offset);
}

struct irq_matrix;
struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
                                    unsigned int alloc_start,
                                    unsigned int alloc_end);
void irq_matrix_online(struct irq_matrix *m);
void irq_matrix_offline(struct irq_matrix *m);
void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace);
int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk);
void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk);
int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
                                unsigned int *mapped_cpu);
void irq_matrix_reserve(struct irq_matrix *m);
void irq_matrix_remove_reserved(struct irq_matrix *m);
int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
                     bool reserved, unsigned int *mapped_cpu);
void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
                     unsigned int bit, bool managed);
void irq_matrix_assign(struct irq_matrix *m, unsigned int bit);
unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown);
unsigned int irq_matrix_allocated(struct irq_matrix *m);
unsigned int irq_matrix_reserved(struct irq_matrix *m);
void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind);

/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
#define INVALID_HWIRQ        (~0UL)
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
int __ipi_send_single(struct irq_desc *desc, unsigned int cpu);
int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
int ipi_send_single(unsigned int virq, unsigned int cpu);
int ipi_send_mask(unsigned int virq, const struct cpumask *dest);

void ipi_mux_process(void);
int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu));

#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
/*
 * Registers a generic IRQ handling function as the top-level IRQ handler in
 * the system, which is generally the first C code called from an assembly
 * architecture-specific interrupt handler.
 *
 * Returns 0 on success, or -EBUSY if an IRQ handler has already been
 * registered.
 */
int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));

/*
 * Allows interrupt handlers to find the irqchip that's been registered as the
 * top-level IRQ handler.
 */
extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
asmlinkage void generic_handle_arch_irq(struct pt_regs *regs);
#else
#ifndef set_handle_irq
#define set_handle_irq(handle_irq)                \
        do {                                        \
                (void)handle_irq;                \
                WARN_ON(1);                        \
        } while (0)
#endif
#endif

#endif /* _LINUX_IRQ_H */














































   85 

































    1 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM maple_tree

#if !defined(_TRACE_MM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MM_H


#include <linux/tracepoint.h>

struct ma_state;

TRACE_EVENT(ma_op,

        TP_PROTO(const char *fn, struct ma_state *mas),

        TP_ARGS(fn, mas),

        TP_STRUCT__entry(
                        __field(const char *, fn)
                        __field(unsigned long, min)
                        __field(unsigned long, max)
                        __field(unsigned long, index)
                        __field(unsigned long, last)
                        __field(void *, node)
        ),

        TP_fast_assign(
                        __entry->fn                = fn;
                        __entry->min                = mas->min;
                        __entry->max                = mas->max;
                        __entry->index                = mas->index;
                        __entry->last                = mas->last;
                        __entry->node                = mas->node;
        ),

        TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu",
                  __entry->fn,
                  (void *) __entry->node,
                  (unsigned long) __entry->min,
                  (unsigned long) __entry->max,
                  (unsigned long) __entry->index,
                  (unsigned long) __entry->last
        )
)
TRACE_EVENT(ma_read,

        TP_PROTO(const char *fn, struct ma_state *mas),

        TP_ARGS(fn, mas),

        TP_STRUCT__entry(
                        __field(const char *, fn)
                        __field(unsigned long, min)
                        __field(unsigned long, max)
                        __field(unsigned long, index)
                        __field(unsigned long, last)
                        __field(void *, node)
        ),

        TP_fast_assign(
                        __entry->fn                = fn;
                        __entry->min                = mas->min;
                        __entry->max                = mas->max;
                        __entry->index                = mas->index;
                        __entry->last                = mas->last;
                        __entry->node                = mas->node;
        ),

        TP_printk("%s\tNode: %p (%lu %lu) range: %lu-%lu",
                  __entry->fn,
                  (void *) __entry->node,
                  (unsigned long) __entry->min,
                  (unsigned long) __entry->max,
                  (unsigned long) __entry->index,
                  (unsigned long) __entry->last
        )
)

TRACE_EVENT(ma_write,

        TP_PROTO(const char *fn, struct ma_state *mas, unsigned long piv,
                 void *val),

        TP_ARGS(fn, mas, piv, val),

        TP_STRUCT__entry(
                        __field(const char *, fn)
                        __field(unsigned long, min)
                        __field(unsigned long, max)
                        __field(unsigned long, index)
                        __field(unsigned long, last)
                        __field(unsigned long, piv)
                        __field(void *, val)
                        __field(void *, node)
        ),

        TP_fast_assign(
                        __entry->fn                = fn;
                        __entry->min                = mas->min;
                        __entry->max                = mas->max;
                        __entry->index                = mas->index;
                        __entry->last                = mas->last;
                        __entry->piv                = piv;
                        __entry->val                = val;
                        __entry->node                = mas->node;
        ),

        TP_printk("%s\tNode %p (%lu %lu) range:%lu-%lu piv (%lu) val %p",
                  __entry->fn,
                  (void *) __entry->node,
                  (unsigned long) __entry->min,
                  (unsigned long) __entry->max,
                  (unsigned long) __entry->index,
                  (unsigned long) __entry->last,
                  (unsigned long) __entry->piv,
                  (void *) __entry->val
        )
)
#endif /* _TRACE_MM_H */

/* This part must be outside protection */
#include <trace/define_trace.h>







































































































































































































  162 

    6 





  162 

  161 






    6 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   47 

   47 






















   47 
   48 
   49 
   49 




   47 



























































































  324 




  324 

































  325 























































































  159 


  162 
































  160 












































































































































  160 





































  159 

  158 
































   48 





   49 
   49 

























  130 
  130 

  130 










































































































































































    8 




















    8 


    8 

    8 



    8 
    7 



    8 


    8 

    8 
























































































































































    6 








    6 











































































































































































































































    1 
































    1 



































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Simple NUMA memory policy for the Linux kernel.
 *
 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc.
 *
 * NUMA policy allows the user to give hints in which node(s) memory should
 * be allocated.
 *
 * Support four policies per VMA and per process:
 *
 * The VMA policy has priority over the process policy for a page fault.
 *
 * interleave     Allocate memory interleaved over a set of nodes,
 *                with normal fallback if it fails.
 *                For VMA based allocations this interleaves based on the
 *                offset into the backing object or offset into the mapping
 *                for anonymous memory. For process policy an process counter
 *                is used.
 *
 * weighted interleave
 *                Allocate memory interleaved over a set of nodes based on
 *                a set of weights (per-node), with normal fallback if it
 *                fails.  Otherwise operates the same as interleave.
 *                Example: nodeset(0,1) & weights (2,1) - 2 pages allocated
 *                on node 0 for every 1 page allocated on node 1.
 *
 * bind           Only allocate memory on a specific set of nodes,
 *                no fallback.
 *                FIXME: memory is allocated starting with the first node
 *                to the last. It would be better if bind would truly restrict
 *                the allocation to memory nodes instead
 *
 * preferred      Try a specific node first before normal fallback.
 *                As a special case NUMA_NO_NODE here means do the allocation
 *                on the local CPU. This is normally identical to default,
 *                but useful to set in a VMA when you have a non default
 *                process policy.
 *
 * preferred many Try a set of nodes first before normal fallback. This is
 *                similar to preferred without the special case.
 *
 * default        Allocate on the local node first, or when on a VMA
 *                use the process policy. This is what Linux always did
 *                  in a NUMA aware kernel and still does by, ahem, default.
 *
 * The process policy is applied for most non interrupt memory allocations
 * in that process' context. Interrupts ignore the policies and always
 * try to allocate on the local CPU. The VMA policy is only applied for memory
 * allocations for a VMA in the VM.
 *
 * Currently there are a few corner cases in swapping where the policy
 * is not applied, but the majority should be handled. When process policy
 * is used it is not remembered over swap outs/swap ins.
 *
 * Only the highest zone in the zone hierarchy gets policied. Allocations
 * requesting a lower zone just use default policy. This implies that
 * on systems with highmem kernel lowmem allocation don't get policied.
 * Same with GFP_DMA allocations.
 *
 * For shmem/tmpfs shared memory the policy is shared between
 * all users and remembered even when nobody has memory mapped.
 */

/* Notebook:
   fix mmap readahead to honour policy and enable policy for any page cache
   object
   statistics for bigpages
   global policy for page cache? currently it uses process policy. Requires
   first item above.
   handle mremap for shared memory (currently ignored for the policy)
   grows down?
   make bind policy root only? It can trigger oom much faster and the
   kernel is not always grateful with that.
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mempolicy.h>
#include <linux/pagewalk.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/task.h>
#include <linux/nodemask.h>
#include <linux/cpuset.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/export.h>
#include <linux/nsproxy.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/compat.h>
#include <linux/ptrace.h>
#include <linux/swap.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/migrate.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/ctype.h>
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/printk.h>
#include <linux/swapops.h>

#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <linux/uaccess.h>

#include "internal.h"

/* Internal flags */
#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)        /* Skip checks for continuous vmas */
#define MPOL_MF_INVERT       (MPOL_MF_INTERNAL << 1)        /* Invert check for nodemask */
#define MPOL_MF_WRLOCK       (MPOL_MF_INTERNAL << 2)        /* Write-lock walked vmas */

static struct kmem_cache *policy_cache;
static struct kmem_cache *sn_cache;

/* Highest zone. An specific allocation for a zone below that is not
   policied. */
enum zone_type policy_zone = 0;

/*
 * run-time system-wide default policy => local allocation
 */
static struct mempolicy default_policy = {
        .refcnt = ATOMIC_INIT(1), /* never free it */
        .mode = MPOL_LOCAL,
};

static struct mempolicy preferred_node_policy[MAX_NUMNODES];

/*
 * iw_table is the sysfs-set interleave weight table, a value of 0 denotes
 * system-default value should be used. A NULL iw_table also denotes that
 * system-default values should be used. Until the system-default table
 * is implemented, the system-default is always 1.
 *
 * iw_table is RCU protected
 */
static u8 __rcu *iw_table;
static DEFINE_MUTEX(iw_table_lock);

static u8 get_il_weight(int node)
{
        u8 *table;
        u8 weight;

        rcu_read_lock();
        table = rcu_dereference(iw_table);
        /* if no iw_table, use system default */
        weight = table ? table[node] : 1;
        /* if value in iw_table is 0, use system default */
        weight = weight ? weight : 1;
        rcu_read_unlock();
        return weight;
}

/**
 * numa_nearest_node - Find nearest node by state
 * @node: Node id to start the search
 * @state: State to filter the search
 *
 * Lookup the closest node by distance if @nid is not in state.
 *
 * Return: this @node if it is in state, otherwise the closest node by distance
 */
int numa_nearest_node(int node, unsigned int state)
{
        int min_dist = INT_MAX, dist, n, min_node;

        if (state >= NR_NODE_STATES)
                return -EINVAL;

        if (node == NUMA_NO_NODE || node_state(node, state))
                return node;

        min_node = node;
        for_each_node_state(n, state) {
                dist = node_distance(node, n);
                if (dist < min_dist) {
                        min_dist = dist;
                        min_node = n;
                }
        }

        return min_node;
}
EXPORT_SYMBOL_GPL(numa_nearest_node);

struct mempolicy *get_task_policy(struct task_struct *p)
{
        struct mempolicy *pol = p->mempolicy;
        int node;

        if (pol)
                return pol;

        node = numa_node_id();
        if (node != NUMA_NO_NODE) {
                pol = &preferred_node_policy[node];
                /* preferred_node_policy is not initialised early in boot */
                if (pol->mode)
                        return pol;
        }

        return &default_policy;
}

static const struct mempolicy_operations {
        int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
        void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
} mpol_ops[MPOL_MAX];

static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
{
        return pol->flags & MPOL_MODE_FLAGS;
}

static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
                                   const nodemask_t *rel)
{
        nodemask_t tmp;
        nodes_fold(tmp, *orig, nodes_weight(*rel));
        nodes_onto(*ret, tmp, *rel);
}

static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
{
        if (nodes_empty(*nodes))
                return -EINVAL;
        pol->nodes = *nodes;
        return 0;
}

static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
{
        if (nodes_empty(*nodes))
                return -EINVAL;

        nodes_clear(pol->nodes);
        node_set(first_node(*nodes), pol->nodes);
        return 0;
}

/*
 * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if
 * any, for the new policy.  mpol_new() has already validated the nodes
 * parameter with respect to the policy mode and flags.
 *
 * Must be called holding task's alloc_lock to protect task's mems_allowed
 * and mempolicy.  May also be called holding the mmap_lock for write.
 */
static int mpol_set_nodemask(struct mempolicy *pol,
                     const nodemask_t *nodes, struct nodemask_scratch *nsc)
{
        int ret;

        /*
         * Default (pol==NULL) resp. local memory policies are not a
         * subject of any remapping. They also do not need any special
         * constructor.
         */
        if (!pol || pol->mode == MPOL_LOCAL)
                return 0;

        /* Check N_MEMORY */
        nodes_and(nsc->mask1,
                  cpuset_current_mems_allowed, node_states[N_MEMORY]);

        VM_BUG_ON(!nodes);

        if (pol->flags & MPOL_F_RELATIVE_NODES)
                mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
        else
                nodes_and(nsc->mask2, *nodes, nsc->mask1);

        if (mpol_store_user_nodemask(pol))
                pol->w.user_nodemask = *nodes;
        else
                pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed;

        ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
        return ret;
}

/*
 * This function just creates a new policy, does some check and simple
 * initialization. You must invoke mpol_set_nodemask() to set nodes.
 */
static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
                                  nodemask_t *nodes)
{
        struct mempolicy *policy;

        if (mode == MPOL_DEFAULT) {
                if (nodes && !nodes_empty(*nodes))
                        return ERR_PTR(-EINVAL);
                return NULL;
        }
        VM_BUG_ON(!nodes);

        /*
         * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or
         * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation).
         * All other modes require a valid pointer to a non-empty nodemask.
         */
        if (mode == MPOL_PREFERRED) {
                if (nodes_empty(*nodes)) {
                        if (((flags & MPOL_F_STATIC_NODES) ||
                             (flags & MPOL_F_RELATIVE_NODES)))
                                return ERR_PTR(-EINVAL);

                        mode = MPOL_LOCAL;
                }
        } else if (mode == MPOL_LOCAL) {
                if (!nodes_empty(*nodes) ||
                    (flags & MPOL_F_STATIC_NODES) ||
                    (flags & MPOL_F_RELATIVE_NODES))
                        return ERR_PTR(-EINVAL);
        } else if (nodes_empty(*nodes))
                return ERR_PTR(-EINVAL);

        policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
        if (!policy)
                return ERR_PTR(-ENOMEM);
        atomic_set(&policy->refcnt, 1);
        policy->mode = mode;
        policy->flags = flags;
        policy->home_node = NUMA_NO_NODE;

        return policy;
}

/* Slow path of a mpol destructor. */
void __mpol_put(struct mempolicy *pol)
{
        if (!atomic_dec_and_test(&pol->refcnt))
                return;
        kmem_cache_free(policy_cache, pol);
}

static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
{
}

static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
{
        nodemask_t tmp;

        if (pol->flags & MPOL_F_STATIC_NODES)
                nodes_and(tmp, pol->w.user_nodemask, *nodes);
        else if (pol->flags & MPOL_F_RELATIVE_NODES)
                mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
        else {
                nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed,
                                                                *nodes);
                pol->w.cpuset_mems_allowed = *nodes;
        }

        if (nodes_empty(tmp))
                tmp = *nodes;

        pol->nodes = tmp;
}

static void mpol_rebind_preferred(struct mempolicy *pol,
                                                const nodemask_t *nodes)
{
        pol->w.cpuset_mems_allowed = *nodes;
}

/*
 * mpol_rebind_policy - Migrate a policy to a different set of nodes
 *
 * Per-vma policies are protected by mmap_lock. Allocations using per-task
 * policies are protected by task->mems_allowed_seq to prevent a premature
 * OOM/allocation failure due to parallel nodemask modification.
 */
static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
{
        if (!pol || pol->mode == MPOL_LOCAL)
                return;
        if (!mpol_store_user_nodemask(pol) &&
            nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
                return;

        mpol_ops[pol->mode].rebind(pol, newmask);
}

/*
 * Wrapper for mpol_rebind_policy() that just requires task
 * pointer, and updates task mempolicy.
 *
 * Called with task's alloc_lock held.
 */
void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
{
        mpol_rebind_policy(tsk->mempolicy, new);
}

/*
 * Rebind each vma in mm to new nodemask.
 *
 * Call holding a reference to mm.  Takes mm->mmap_lock during call.
 */
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
        struct vm_area_struct *vma;
        VMA_ITERATOR(vmi, mm, 0);

        mmap_write_lock(mm);
        for_each_vma(vmi, vma) {
                vma_start_write(vma);
                mpol_rebind_policy(vma->vm_policy, new);
        }
        mmap_write_unlock(mm);
}

static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
        [MPOL_DEFAULT] = {
                .rebind = mpol_rebind_default,
        },
        [MPOL_INTERLEAVE] = {
                .create = mpol_new_nodemask,
                .rebind = mpol_rebind_nodemask,
        },
        [MPOL_PREFERRED] = {
                .create = mpol_new_preferred,
                .rebind = mpol_rebind_preferred,
        },
        [MPOL_BIND] = {
                .create = mpol_new_nodemask,
                .rebind = mpol_rebind_nodemask,
        },
        [MPOL_LOCAL] = {
                .rebind = mpol_rebind_default,
        },
        [MPOL_PREFERRED_MANY] = {
                .create = mpol_new_nodemask,
                .rebind = mpol_rebind_preferred,
        },
        [MPOL_WEIGHTED_INTERLEAVE] = {
                .create = mpol_new_nodemask,
                .rebind = mpol_rebind_nodemask,
        },
};

static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
                                unsigned long flags);
static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
                                pgoff_t ilx, int *nid);

static bool strictly_unmovable(unsigned long flags)
{
        /*
         * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO
         * if any misplaced page is found.
         */
        return (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ==
                         MPOL_MF_STRICT;
}

struct migration_mpol {                /* for alloc_migration_target_by_mpol() */
        struct mempolicy *pol;
        pgoff_t ilx;
};

struct queue_pages {
        struct list_head *pagelist;
        unsigned long flags;
        nodemask_t *nmask;
        unsigned long start;
        unsigned long end;
        struct vm_area_struct *first;
        struct folio *large;                /* note last large folio encountered */
        long nr_failed;                        /* could not be isolated at this time */
};

/*
 * Check if the folio's nid is in qp->nmask.
 *
 * If MPOL_MF_INVERT is set in qp->flags, check if the nid is
 * in the invert of qp->nmask.
 */
static inline bool queue_folio_required(struct folio *folio,
                                        struct queue_pages *qp)
{
        int nid = folio_nid(folio);
        unsigned long flags = qp->flags;

        return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}

static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
{
        struct folio *folio;
        struct queue_pages *qp = walk->private;

        if (unlikely(is_pmd_migration_entry(*pmd))) {
                qp->nr_failed++;
                return;
        }
        folio = pfn_folio(pmd_pfn(*pmd));
        if (is_huge_zero_page(&folio->page)) {
                walk->action = ACTION_CONTINUE;
                return;
        }
        if (!queue_folio_required(folio, qp))
                return;
        if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
            !vma_migratable(walk->vma) ||
            !migrate_folio_add(folio, qp->pagelist, qp->flags))
                qp->nr_failed++;
}

/*
 * Scan through folios, checking if they satisfy the required conditions,
 * moving them from LRU to local pagelist for migration if they do (or not).
 *
 * queue_folios_pte_range() has two possible return values:
 * 0 - continue walking to scan for more, even if an existing folio on the
 *     wrong node could not be isolated and queued for migration.
 * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL,
 *        and an existing folio was on a node that does not follow the policy.
 */
static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
                        unsigned long end, struct mm_walk *walk)
{
        struct vm_area_struct *vma = walk->vma;
        struct folio *folio;
        struct queue_pages *qp = walk->private;
        unsigned long flags = qp->flags;
        pte_t *pte, *mapped_pte;
        pte_t ptent;
        spinlock_t *ptl;

        ptl = pmd_trans_huge_lock(pmd, vma);
        if (ptl) {
                queue_folios_pmd(pmd, walk);
                spin_unlock(ptl);
                goto out;
        }

        mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
        if (!pte) {
                walk->action = ACTION_AGAIN;
                return 0;
        }
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                ptent = ptep_get(pte);
                if (pte_none(ptent))
                        continue;
                if (!pte_present(ptent)) {
                        if (is_migration_entry(pte_to_swp_entry(ptent)))
                                qp->nr_failed++;
                        continue;
                }
                folio = vm_normal_folio(vma, addr, ptent);
                if (!folio || folio_is_zone_device(folio))
                        continue;
                /*
                 * vm_normal_folio() filters out zero pages, but there might
                 * still be reserved folios to skip, perhaps in a VDSO.
                 */
                if (folio_test_reserved(folio))
                        continue;
                if (!queue_folio_required(folio, qp))
                        continue;
                if (folio_test_large(folio)) {
                        /*
                         * A large folio can only be isolated from LRU once,
                         * but may be mapped by many PTEs (and Copy-On-Write may
                         * intersperse PTEs of other, order 0, folios).  This is
                         * a common case, so don't mistake it for failure (but
                         * there can be other cases of multi-mapped pages which
                         * this quick check does not help to filter out - and a
                         * search of the pagelist might grow to be prohibitive).
                         *
                         * migrate_pages(&pagelist) returns nr_failed folios, so
                         * check "large" now so that queue_pages_range() returns
                         * a comparable nr_failed folios.  This does imply that
                         * if folio could not be isolated for some racy reason
                         * at its first PTE, later PTEs will not give it another
                         * chance of isolation; but keeps the accounting simple.
                         */
                        if (folio == qp->large)
                                continue;
                        qp->large = folio;
                }
                if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
                    !vma_migratable(vma) ||
                    !migrate_folio_add(folio, qp->pagelist, flags)) {
                        qp->nr_failed++;
                        if (strictly_unmovable(flags))
                                break;
                }
        }
        pte_unmap_unlock(mapped_pte, ptl);
        cond_resched();
out:
        if (qp->nr_failed && strictly_unmovable(flags))
                return -EIO;
        return 0;
}

static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
                               unsigned long addr, unsigned long end,
                               struct mm_walk *walk)
{
#ifdef CONFIG_HUGETLB_PAGE
        struct queue_pages *qp = walk->private;
        unsigned long flags = qp->flags;
        struct folio *folio;
        spinlock_t *ptl;
        pte_t entry;

        ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
        entry = huge_ptep_get(pte);
        if (!pte_present(entry)) {
                if (unlikely(is_hugetlb_entry_migration(entry)))
                        qp->nr_failed++;
                goto unlock;
        }
        folio = pfn_folio(pte_pfn(entry));
        if (!queue_folio_required(folio, qp))
                goto unlock;
        if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
            !vma_migratable(walk->vma)) {
                qp->nr_failed++;
                goto unlock;
        }
        /*
         * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
         * Choosing not to migrate a shared folio is not counted as a failure.
         *
         * To check if the folio is shared, ideally we want to make sure
         * every page is mapped to the same process. Doing that is very
         * expensive, so check the estimated sharers of the folio instead.
         */
        if ((flags & MPOL_MF_MOVE_ALL) ||
            (folio_estimated_sharers(folio) == 1 && !hugetlb_pmd_shared(pte)))
                if (!isolate_hugetlb(folio, qp->pagelist))
                        qp->nr_failed++;
unlock:
        spin_unlock(ptl);
        if (qp->nr_failed && strictly_unmovable(flags))
                return -EIO;
#endif
        return 0;
}

#ifdef CONFIG_NUMA_BALANCING
/*
 * This is used to mark a range of virtual addresses to be inaccessible.
 * These are later cleared by a NUMA hinting fault. Depending on these
 * faults, pages may be migrated for better NUMA placement.
 *
 * This is assuming that NUMA faults are handled using PROT_NONE. If
 * an architecture makes a different choice, it will need further
 * changes to the core.
 */
unsigned long change_prot_numa(struct vm_area_struct *vma,
                        unsigned long addr, unsigned long end)
{
        struct mmu_gather tlb;
        long nr_updated;

        tlb_gather_mmu(&tlb, vma->vm_mm);

        nr_updated = change_protection(&tlb, vma, addr, end, MM_CP_PROT_NUMA);
        if (nr_updated > 0)
                count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);

        tlb_finish_mmu(&tlb);

        return nr_updated;
}
#endif /* CONFIG_NUMA_BALANCING */

static int queue_pages_test_walk(unsigned long start, unsigned long end,
                                struct mm_walk *walk)
{
        struct vm_area_struct *next, *vma = walk->vma;
        struct queue_pages *qp = walk->private;
        unsigned long flags = qp->flags;

        /* range check first */
        VM_BUG_ON_VMA(!range_in_vma(vma, start, end), vma);

        if (!qp->first) {
                qp->first = vma;
                if (!(flags & MPOL_MF_DISCONTIG_OK) &&
                        (qp->start < vma->vm_start))
                        /* hole at head side of range */
                        return -EFAULT;
        }
        next = find_vma(vma->vm_mm, vma->vm_end);
        if (!(flags & MPOL_MF_DISCONTIG_OK) &&
                ((vma->vm_end < qp->end) &&
                (!next || vma->vm_end < next->vm_start)))
                /* hole at middle or tail of range */
                return -EFAULT;

        /*
         * Need check MPOL_MF_STRICT to return -EIO if possible
         * regardless of vma_migratable
         */
        if (!vma_migratable(vma) &&
            !(flags & MPOL_MF_STRICT))
                return 1;

        /*
         * Check page nodes, and queue pages to move, in the current vma.
         * But if no moving, and no strict checking, the scan can be skipped.
         */
        if (flags & (MPOL_MF_STRICT | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                return 0;
        return 1;
}

static const struct mm_walk_ops queue_pages_walk_ops = {
        .hugetlb_entry                = queue_folios_hugetlb,
        .pmd_entry                = queue_folios_pte_range,
        .test_walk                = queue_pages_test_walk,
        .walk_lock                = PGWALK_RDLOCK,
};

static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
        .hugetlb_entry                = queue_folios_hugetlb,
        .pmd_entry                = queue_folios_pte_range,
        .test_walk                = queue_pages_test_walk,
        .walk_lock                = PGWALK_WRLOCK,
};

/*
 * Walk through page tables and collect pages to be migrated.
 *
 * If pages found in a given range are not on the required set of @nodes,
 * and migration is allowed, they are isolated and queued to @pagelist.
 *
 * queue_pages_range() may return:
 * 0 - all pages already on the right node, or successfully queued for moving
 *     (or neither strict checking nor moving requested: only range checking).
 * >0 - this number of misplaced folios could not be queued for moving
 *      (a hugetlbfs page or a transparent huge page being counted as 1).
 * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs.
 * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified.
 */
static long
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                nodemask_t *nodes, unsigned long flags,
                struct list_head *pagelist)
{
        int err;
        struct queue_pages qp = {
                .pagelist = pagelist,
                .flags = flags,
                .nmask = nodes,
                .start = start,
                .end = end,
                .first = NULL,
        };
        const struct mm_walk_ops *ops = (flags & MPOL_MF_WRLOCK) ?
                        &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;

        err = walk_page_range(mm, start, end, ops, &qp);

        if (!qp.first)
                /* whole range in hole */
                err = -EFAULT;

        return err ? : qp.nr_failed;
}

/*
 * Apply policy to a single VMA
 * This must be called with the mmap_lock held for writing.
 */
static int vma_replace_policy(struct vm_area_struct *vma,
                                struct mempolicy *pol)
{
        int err;
        struct mempolicy *old;
        struct mempolicy *new;

        vma_assert_write_locked(vma);

        new = mpol_dup(pol);
        if (IS_ERR(new))
                return PTR_ERR(new);

        if (vma->vm_ops && vma->vm_ops->set_policy) {
                err = vma->vm_ops->set_policy(vma, new);
                if (err)
                        goto err_out;
        }

        old = vma->vm_policy;
        vma->vm_policy = new; /* protected by mmap_lock */
        mpol_put(old);

        return 0;
 err_out:
        mpol_put(new);
        return err;
}

/* Split or merge the VMA (if required) and apply the new policy */
static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
                struct vm_area_struct **prev, unsigned long start,
                unsigned long end, struct mempolicy *new_pol)
{
        unsigned long vmstart, vmend;

        vmend = min(end, vma->vm_end);
        if (start > vma->vm_start) {
                *prev = vma;
                vmstart = start;
        } else {
                vmstart = vma->vm_start;
        }

        if (mpol_equal(vma->vm_policy, new_pol)) {
                *prev = vma;
                return 0;
        }

        vma =  vma_modify_policy(vmi, *prev, vma, vmstart, vmend, new_pol);
        if (IS_ERR(vma))
                return PTR_ERR(vma);

        *prev = vma;
        return vma_replace_policy(vma, new_pol);
}

/* Set the process memory policy */
static long do_set_mempolicy(unsigned short mode, unsigned short flags,
                             nodemask_t *nodes)
{
        struct mempolicy *new, *old;
        NODEMASK_SCRATCH(scratch);
        int ret;

        if (!scratch)
                return -ENOMEM;

        new = mpol_new(mode, flags, nodes);
        if (IS_ERR(new)) {
                ret = PTR_ERR(new);
                goto out;
        }

        task_lock(current);
        ret = mpol_set_nodemask(new, nodes, scratch);
        if (ret) {
                task_unlock(current);
                mpol_put(new);
                goto out;
        }

        old = current->mempolicy;
        current->mempolicy = new;
        if (new && (new->mode == MPOL_INTERLEAVE ||
                    new->mode == MPOL_WEIGHTED_INTERLEAVE)) {
                current->il_prev = MAX_NUMNODES-1;
                current->il_weight = 0;
        }
        task_unlock(current);
        mpol_put(old);
        ret = 0;
out:
        NODEMASK_SCRATCH_FREE(scratch);
        return ret;
}

/*
 * Return nodemask for policy for get_mempolicy() query
 *
 * Called with task's alloc_lock held
 */
static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
{
        nodes_clear(*nodes);
        if (pol == &default_policy)
                return;

        switch (pol->mode) {
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
        case MPOL_WEIGHTED_INTERLEAVE:
                *nodes = pol->nodes;
                break;
        case MPOL_LOCAL:
                /* return empty node mask for local allocation */
                break;
        default:
                BUG();
        }
}

static int lookup_node(struct mm_struct *mm, unsigned long addr)
{
        struct page *p = NULL;
        int ret;

        ret = get_user_pages_fast(addr & PAGE_MASK, 1, 0, &p);
        if (ret > 0) {
                ret = page_to_nid(p);
                put_page(p);
        }
        return ret;
}

/* Retrieve NUMA policy */
static long do_get_mempolicy(int *policy, nodemask_t *nmask,
                             unsigned long addr, unsigned long flags)
{
        int err;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma = NULL;
        struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL;

        if (flags &
                ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
                return -EINVAL;

        if (flags & MPOL_F_MEMS_ALLOWED) {
                if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
                        return -EINVAL;
                *policy = 0;        /* just so it's initialized */
                task_lock(current);
                *nmask  = cpuset_current_mems_allowed;
                task_unlock(current);
                return 0;
        }

        if (flags & MPOL_F_ADDR) {
                pgoff_t ilx;                /* ignored here */
                /*
                 * Do NOT fall back to task policy if the
                 * vma/shared policy at addr is NULL.  We
                 * want to return MPOL_DEFAULT in this case.
                 */
                mmap_read_lock(mm);
                vma = vma_lookup(mm, addr);
                if (!vma) {
                        mmap_read_unlock(mm);
                        return -EFAULT;
                }
                pol = __get_vma_policy(vma, addr, &ilx);
        } else if (addr)
                return -EINVAL;

        if (!pol)
                pol = &default_policy;        /* indicates default behavior */

        if (flags & MPOL_F_NODE) {
                if (flags & MPOL_F_ADDR) {
                        /*
                         * Take a refcount on the mpol, because we are about to
                         * drop the mmap_lock, after which only "pol" remains
                         * valid, "vma" is stale.
                         */
                        pol_refcount = pol;
                        vma = NULL;
                        mpol_get(pol);
                        mmap_read_unlock(mm);
                        err = lookup_node(mm, addr);
                        if (err < 0)
                                goto out;
                        *policy = err;
                } else if (pol == current->mempolicy &&
                                pol->mode == MPOL_INTERLEAVE) {
                        *policy = next_node_in(current->il_prev, pol->nodes);
                } else if (pol == current->mempolicy &&
                                pol->mode == MPOL_WEIGHTED_INTERLEAVE) {
                        if (current->il_weight)
                                *policy = current->il_prev;
                        else
                                *policy = next_node_in(current->il_prev,
                                                       pol->nodes);
                } else {
                        err = -EINVAL;
                        goto out;
                }
        } else {
                *policy = pol == &default_policy ? MPOL_DEFAULT :
                                                pol->mode;
                /*
                 * Internal mempolicy flags must be masked off before exposing
                 * the policy to userspace.
                 */
                *policy |= (pol->flags & MPOL_MODE_FLAGS);
        }

        err = 0;
        if (nmask) {
                if (mpol_store_user_nodemask(pol)) {
                        *nmask = pol->w.user_nodemask;
                } else {
                        task_lock(current);
                        get_policy_nodemask(pol, nmask);
                        task_unlock(current);
                }
        }

 out:
        mpol_cond_put(pol);
        if (vma)
                mmap_read_unlock(mm);
        if (pol_refcount)
                mpol_put(pol_refcount);
        return err;
}

#ifdef CONFIG_MIGRATION
static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
                                unsigned long flags)
{
        /*
         * Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
         * Choosing not to migrate a shared folio is not counted as a failure.
         *
         * To check if the folio is shared, ideally we want to make sure
         * every page is mapped to the same process. Doing that is very
         * expensive, so check the estimated sharers of the folio instead.
         */
        if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) {
                if (folio_isolate_lru(folio)) {
                        list_add_tail(&folio->lru, foliolist);
                        node_stat_mod_folio(folio,
                                NR_ISOLATED_ANON + folio_is_file_lru(folio),
                                folio_nr_pages(folio));
                } else {
                        /*
                         * Non-movable folio may reach here.  And, there may be
                         * temporary off LRU folios or non-LRU movable folios.
                         * Treat them as unmovable folios since they can't be
                         * isolated, so they can't be moved at the moment.
                         */
                        return false;
                }
        }
        return true;
}

/*
 * Migrate pages from one node to a target node.
 * Returns error or the number of pages not migrated.
 */
static long migrate_to_node(struct mm_struct *mm, int source, int dest,
                            int flags)
{
        nodemask_t nmask;
        struct vm_area_struct *vma;
        LIST_HEAD(pagelist);
        long nr_failed;
        long err = 0;
        struct migration_target_control mtc = {
                .nid = dest,
                .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
        };

        nodes_clear(nmask);
        node_set(source, nmask);

        VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));

        mmap_read_lock(mm);
        vma = find_vma(mm, 0);

        /*
         * This does not migrate the range, but isolates all pages that
         * need migration.  Between passing in the full user address
         * space range and MPOL_MF_DISCONTIG_OK, this call cannot fail,
         * but passes back the count of pages which could not be isolated.
         */
        nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
                                      flags | MPOL_MF_DISCONTIG_OK, &pagelist);
        mmap_read_unlock(mm);

        if (!list_empty(&pagelist)) {
                err = migrate_pages(&pagelist, alloc_migration_target, NULL,
                        (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
                if (err)
                        putback_movable_pages(&pagelist);
        }

        if (err >= 0)
                err += nr_failed;
        return err;
}

/*
 * Move pages between the two nodesets so as to preserve the physical
 * layout as much as possible.
 *
 * Returns the number of page that could not be moved.
 */
int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                     const nodemask_t *to, int flags)
{
        long nr_failed = 0;
        long err = 0;
        nodemask_t tmp;

        lru_cache_disable();

        /*
         * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
         * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
         * bit in 'tmp', and return that <source, dest> pair for migration.
         * The pair of nodemasks 'to' and 'from' define the map.
         *
         * If no pair of bits is found that way, fallback to picking some
         * pair of 'source' and 'dest' bits that are not the same.  If the
         * 'source' and 'dest' bits are the same, this represents a node
         * that will be migrating to itself, so no pages need move.
         *
         * If no bits are left in 'tmp', or if all remaining bits left
         * in 'tmp' correspond to the same bit in 'to', return false
         * (nothing left to migrate).
         *
         * This lets us pick a pair of nodes to migrate between, such that
         * if possible the dest node is not already occupied by some other
         * source node, minimizing the risk of overloading the memory on a
         * node that would happen if we migrated incoming memory to a node
         * before migrating outgoing memory source that same node.
         *
         * A single scan of tmp is sufficient.  As we go, we remember the
         * most recent <s, d> pair that moved (s != d).  If we find a pair
         * that not only moved, but what's better, moved to an empty slot
         * (d is not set in tmp), then we break out then, with that pair.
         * Otherwise when we finish scanning from_tmp, we at least have the
         * most recent <s, d> pair that moved.  If we get all the way through
         * the scan of tmp without finding any node that moved, much less
         * moved to an empty node, then there is nothing left worth migrating.
         */

        tmp = *from;
        while (!nodes_empty(tmp)) {
                int s, d;
                int source = NUMA_NO_NODE;
                int dest = 0;

                for_each_node_mask(s, tmp) {

                        /*
                         * do_migrate_pages() tries to maintain the relative
                         * node relationship of the pages established between
                         * threads and memory areas.
                         *
                         * However if the number of source nodes is not equal to
                         * the number of destination nodes we can not preserve
                         * this node relative relationship.  In that case, skip
                         * copying memory from a node that is in the destination
                         * mask.
                         *
                         * Example: [2,3,4] -> [3,4,5] moves everything.
                         *          [0-7] - > [3,4,5] moves only 0,1,2,6,7.
                         */

                        if ((nodes_weight(*from) != nodes_weight(*to)) &&
                                                (node_isset(s, *to)))
                                continue;

                        d = node_remap(s, *from, *to);
                        if (s == d)
                                continue;

                        source = s;        /* Node moved. Memorize */
                        dest = d;

                        /* dest not in remaining from nodes? */
                        if (!node_isset(dest, tmp))
                                break;
                }
                if (source == NUMA_NO_NODE)
                        break;

                node_clear(source, tmp);
                err = migrate_to_node(mm, source, dest, flags);
                if (err > 0)
                        nr_failed += err;
                if (err < 0)
                        break;
        }

        lru_cache_enable();
        if (err < 0)
                return err;
        return (nr_failed < INT_MAX) ? nr_failed : INT_MAX;
}

/*
 * Allocate a new folio for page migration, according to NUMA mempolicy.
 */
static struct folio *alloc_migration_target_by_mpol(struct folio *src,
                                                    unsigned long private)
{
        struct migration_mpol *mmpol = (struct migration_mpol *)private;
        struct mempolicy *pol = mmpol->pol;
        pgoff_t ilx = mmpol->ilx;
        struct page *page;
        unsigned int order;
        int nid = numa_node_id();
        gfp_t gfp;

        order = folio_order(src);
        ilx += src->index >> order;

        if (folio_test_hugetlb(src)) {
                nodemask_t *nodemask;
                struct hstate *h;

                h = folio_hstate(src);
                gfp = htlb_alloc_mask(h);
                nodemask = policy_nodemask(gfp, pol, ilx, &nid);
                return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp);
        }

        if (folio_test_large(src))
                gfp = GFP_TRANSHUGE;
        else
                gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL | __GFP_COMP;

        page = alloc_pages_mpol(gfp, order, pol, ilx, nid);
        return page_rmappable_folio(page);
}
#else

static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
                                unsigned long flags)
{
        return false;
}

int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                     const nodemask_t *to, int flags)
{
        return -ENOSYS;
}

static struct folio *alloc_migration_target_by_mpol(struct folio *src,
                                                    unsigned long private)
{
        return NULL;
}
#endif

static long do_mbind(unsigned long start, unsigned long len,
                     unsigned short mode, unsigned short mode_flags,
                     nodemask_t *nmask, unsigned long flags)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
        struct vma_iterator vmi;
        struct migration_mpol mmpol;
        struct mempolicy *new;
        unsigned long end;
        long err;
        long nr_failed;
        LIST_HEAD(pagelist);

        if (flags & ~(unsigned long)MPOL_MF_VALID)
                return -EINVAL;
        if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
                return -EPERM;

        if (start & ~PAGE_MASK)
                return -EINVAL;

        if (mode == MPOL_DEFAULT)
                flags &= ~MPOL_MF_STRICT;

        len = PAGE_ALIGN(len);
        end = start + len;

        if (end < start)
                return -EINVAL;
        if (end == start)
                return 0;

        new = mpol_new(mode, mode_flags, nmask);
        if (IS_ERR(new))
                return PTR_ERR(new);

        /*
         * If we are using the default policy then operation
         * on discontinuous address spaces is okay after all
         */
        if (!new)
                flags |= MPOL_MF_DISCONTIG_OK;

        if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                lru_cache_disable();
        {
                NODEMASK_SCRATCH(scratch);
                if (scratch) {
                        mmap_write_lock(mm);
                        err = mpol_set_nodemask(new, nmask, scratch);
                        if (err)
                                mmap_write_unlock(mm);
                } else
                        err = -ENOMEM;
                NODEMASK_SCRATCH_FREE(scratch);
        }
        if (err)
                goto mpol_out;

        /*
         * Lock the VMAs before scanning for pages to migrate,
         * to ensure we don't miss a concurrently inserted page.
         */
        nr_failed = queue_pages_range(mm, start, end, nmask,
                        flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist);

        if (nr_failed < 0) {
                err = nr_failed;
                nr_failed = 0;
        } else {
                vma_iter_init(&vmi, mm, start);
                prev = vma_prev(&vmi);
                for_each_vma_range(vmi, vma, end) {
                        err = mbind_range(&vmi, vma, &prev, start, end, new);
                        if (err)
                                break;
                }
        }

        if (!err && !list_empty(&pagelist)) {
                /* Convert MPOL_DEFAULT's NULL to task or default policy */
                if (!new) {
                        new = get_task_policy(current);
                        mpol_get(new);
                }
                mmpol.pol = new;
                mmpol.ilx = 0;

                /*
                 * In the interleaved case, attempt to allocate on exactly the
                 * targeted nodes, for the first VMA to be migrated; for later
                 * VMAs, the nodes will still be interleaved from the targeted
                 * nodemask, but one by one may be selected differently.
                 */
                if (new->mode == MPOL_INTERLEAVE ||
                    new->mode == MPOL_WEIGHTED_INTERLEAVE) {
                        struct folio *folio;
                        unsigned int order;
                        unsigned long addr = -EFAULT;

                        list_for_each_entry(folio, &pagelist, lru) {
                                if (!folio_test_ksm(folio))
                                        break;
                        }
                        if (!list_entry_is_head(folio, &pagelist, lru)) {
                                vma_iter_init(&vmi, mm, start);
                                for_each_vma_range(vmi, vma, end) {
                                        addr = page_address_in_vma(
                                                folio_page(folio, 0), vma);
                                        if (addr != -EFAULT)
                                                break;
                                }
                        }
                        if (addr != -EFAULT) {
                                order = folio_order(folio);
                                /* We already know the pol, but not the ilx */
                                mpol_cond_put(get_vma_policy(vma, addr, order,
                                                             &mmpol.ilx));
                                /* Set base from which to increment by index */
                                mmpol.ilx -= folio->index >> order;
                        }
                }
        }

        mmap_write_unlock(mm);

        if (!err && !list_empty(&pagelist)) {
                nr_failed |= migrate_pages(&pagelist,
                                alloc_migration_target_by_mpol, NULL,
                                (unsigned long)&mmpol, MIGRATE_SYNC,
                                MR_MEMPOLICY_MBIND, NULL);
        }

        if (nr_failed && (flags & MPOL_MF_STRICT))
                err = -EIO;
        if (!list_empty(&pagelist))
                putback_movable_pages(&pagelist);
mpol_out:
        mpol_put(new);
        if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                lru_cache_enable();
        return err;
}

/*
 * User space interface with variable sized bitmaps for nodelists.
 */
static int get_bitmap(unsigned long *mask, const unsigned long __user *nmask,
                      unsigned long maxnode)
{
        unsigned long nlongs = BITS_TO_LONGS(maxnode);
        int ret;

        if (in_compat_syscall())
                ret = compat_get_bitmap(mask,
                                        (const compat_ulong_t __user *)nmask,
                                        maxnode);
        else
                ret = copy_from_user(mask, nmask,
                                     nlongs * sizeof(unsigned long));

        if (ret)
                return -EFAULT;

        if (maxnode % BITS_PER_LONG)
                mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1;

        return 0;
}

/* Copy a node mask from user space. */
static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
                     unsigned long maxnode)
{
        --maxnode;
        nodes_clear(*nodes);
        if (maxnode == 0 || !nmask)
                return 0;
        if (maxnode > PAGE_SIZE*BITS_PER_BYTE)
                return -EINVAL;

        /*
         * When the user specified more nodes than supported just check
         * if the non supported part is all zero, one word at a time,
         * starting at the end.
         */
        while (maxnode > MAX_NUMNODES) {
                unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG);
                unsigned long t;

                if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits))
                        return -EFAULT;

                if (maxnode - bits >= MAX_NUMNODES) {
                        maxnode -= bits;
                } else {
                        maxnode = MAX_NUMNODES;
                        t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1);
                }
                if (t)
                        return -EINVAL;
        }

        return get_bitmap(nodes_addr(*nodes), nmask, maxnode);
}

/* Copy a kernel node mask to user space */
static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
                              nodemask_t *nodes)
{
        unsigned long copy = ALIGN(maxnode-1, 64) / 8;
        unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
        bool compat = in_compat_syscall();

        if (compat)
                nbytes = BITS_TO_COMPAT_LONGS(nr_node_ids) * sizeof(compat_long_t);

        if (copy > nbytes) {
                if (copy > PAGE_SIZE)
                        return -EINVAL;
                if (clear_user((char __user *)mask + nbytes, copy - nbytes))
                        return -EFAULT;
                copy = nbytes;
                maxnode = nr_node_ids;
        }

        if (compat)
                return compat_put_bitmap((compat_ulong_t __user *)mask,
                                         nodes_addr(*nodes), maxnode);

        return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
}

/* Basic parameter sanity check used by both mbind() and set_mempolicy() */
static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
{
        *flags = *mode & MPOL_MODE_FLAGS;
        *mode &= ~MPOL_MODE_FLAGS;

        if ((unsigned int)(*mode) >=  MPOL_MAX)
                return -EINVAL;
        if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
                return -EINVAL;
        if (*flags & MPOL_F_NUMA_BALANCING) {
                if (*mode != MPOL_BIND)
                        return -EINVAL;
                *flags |= (MPOL_F_MOF | MPOL_F_MORON);
        }
        return 0;
}

static long kernel_mbind(unsigned long start, unsigned long len,
                         unsigned long mode, const unsigned long __user *nmask,
                         unsigned long maxnode, unsigned int flags)
{
        unsigned short mode_flags;
        nodemask_t nodes;
        int lmode = mode;
        int err;

        start = untagged_addr(start);
        err = sanitize_mpol_flags(&lmode, &mode_flags);
        if (err)
                return err;

        err = get_nodes(&nodes, nmask, maxnode);
        if (err)
                return err;

        return do_mbind(start, len, lmode, mode_flags, &nodes, flags);
}

SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len,
                unsigned long, home_node, unsigned long, flags)
{
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
        struct mempolicy *new, *old;
        unsigned long end;
        int err = -ENOENT;
        VMA_ITERATOR(vmi, mm, start);

        start = untagged_addr(start);
        if (start & ~PAGE_MASK)
                return -EINVAL;
        /*
         * flags is used for future extension if any.
         */
        if (flags != 0)
                return -EINVAL;

        /*
         * Check home_node is online to avoid accessing uninitialized
         * NODE_DATA.
         */
        if (home_node >= MAX_NUMNODES || !node_online(home_node))
                return -EINVAL;

        len = PAGE_ALIGN(len);
        end = start + len;

        if (end < start)
                return -EINVAL;
        if (end == start)
                return 0;
        mmap_write_lock(mm);
        prev = vma_prev(&vmi);
        for_each_vma_range(vmi, vma, end) {
                /*
                 * If any vma in the range got policy other than MPOL_BIND
                 * or MPOL_PREFERRED_MANY we return error. We don't reset
                 * the home node for vmas we already updated before.
                 */
                old = vma_policy(vma);
                if (!old) {
                        prev = vma;
                        continue;
                }
                if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) {
                        err = -EOPNOTSUPP;
                        break;
                }
                new = mpol_dup(old);
                if (IS_ERR(new)) {
                        err = PTR_ERR(new);
                        break;
                }

                vma_start_write(vma);
                new->home_node = home_node;
                err = mbind_range(&vmi, vma, &prev, start, end, new);
                mpol_put(new);
                if (err)
                        break;
        }
        mmap_write_unlock(mm);
        return err;
}

SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
                unsigned long, mode, const unsigned long __user *, nmask,
                unsigned long, maxnode, unsigned int, flags)
{
        return kernel_mbind(start, len, mode, nmask, maxnode, flags);
}

/* Set the process memory policy */
static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask,
                                 unsigned long maxnode)
{
        unsigned short mode_flags;
        nodemask_t nodes;
        int lmode = mode;
        int err;

        err = sanitize_mpol_flags(&lmode, &mode_flags);
        if (err)
                return err;

        err = get_nodes(&nodes, nmask, maxnode);
        if (err)
                return err;

        return do_set_mempolicy(lmode, mode_flags, &nodes);
}

SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
                unsigned long, maxnode)
{
        return kernel_set_mempolicy(mode, nmask, maxnode);
}

static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
                                const unsigned long __user *old_nodes,
                                const unsigned long __user *new_nodes)
{
        struct mm_struct *mm = NULL;
        struct task_struct *task;
        nodemask_t task_nodes;
        int err;
        nodemask_t *old;
        nodemask_t *new;
        NODEMASK_SCRATCH(scratch);

        if (!scratch)
                return -ENOMEM;

        old = &scratch->mask1;
        new = &scratch->mask2;

        err = get_nodes(old, old_nodes, maxnode);
        if (err)
                goto out;

        err = get_nodes(new, new_nodes, maxnode);
        if (err)
                goto out;

        /* Find the mm_struct */
        rcu_read_lock();
        task = pid ? find_task_by_vpid(pid) : current;
        if (!task) {
                rcu_read_unlock();
                err = -ESRCH;
                goto out;
        }
        get_task_struct(task);

        err = -EINVAL;

        /*
         * Check if this process has the right to modify the specified process.
         * Use the regular "ptrace_may_access()" checks.
         */
        if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
                rcu_read_unlock();
                err = -EPERM;
                goto out_put;
        }
        rcu_read_unlock();

        task_nodes = cpuset_mems_allowed(task);
        /* Is the user allowed to access the target nodes? */
        if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
                err = -EPERM;
                goto out_put;
        }

        task_nodes = cpuset_mems_allowed(current);
        nodes_and(*new, *new, task_nodes);
        if (nodes_empty(*new))
                goto out_put;

        err = security_task_movememory(task);
        if (err)
                goto out_put;

        mm = get_task_mm(task);
        put_task_struct(task);

        if (!mm) {
                err = -EINVAL;
                goto out;
        }

        err = do_migrate_pages(mm, old, new,
                capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);

        mmput(mm);
out:
        NODEMASK_SCRATCH_FREE(scratch);

        return err;

out_put:
        put_task_struct(task);
        goto out;
}

SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
                const unsigned long __user *, old_nodes,
                const unsigned long __user *, new_nodes)
{
        return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes);
}

/* Retrieve NUMA policy */
static int kernel_get_mempolicy(int __user *policy,
                                unsigned long __user *nmask,
                                unsigned long maxnode,
                                unsigned long addr,
                                unsigned long flags)
{
        int err;
        int pval;
        nodemask_t nodes;

        if (nmask != NULL && maxnode < nr_node_ids)
                return -EINVAL;

        addr = untagged_addr(addr);

        err = do_get_mempolicy(&pval, &nodes, addr, flags);

        if (err)
                return err;

        if (policy && put_user(pval, policy))
                return -EFAULT;

        if (nmask)
                err = copy_nodes_to_user(nmask, maxnode, &nodes);

        return err;
}

SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
                unsigned long __user *, nmask, unsigned long, maxnode,
                unsigned long, addr, unsigned long, flags)
{
        return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags);
}

bool vma_migratable(struct vm_area_struct *vma)
{
        if (vma->vm_flags & (VM_IO | VM_PFNMAP))
                return false;

        /*
         * DAX device mappings require predictable access latency, so avoid
         * incurring periodic faults.
         */
        if (vma_is_dax(vma))
                return false;

        if (is_vm_hugetlb_page(vma) &&
                !hugepage_migration_supported(hstate_vma(vma)))
                return false;

        /*
         * Migration allocates pages in the highest zone. If we cannot
         * do so then migration (at least from node to node) is not
         * possible.
         */
        if (vma->vm_file &&
                gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
                        < policy_zone)
                return false;
        return true;
}

struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
                                   unsigned long addr, pgoff_t *ilx)
{
        *ilx = 0;
        return (vma->vm_ops && vma->vm_ops->get_policy) ?
                vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy;
}

/*
 * get_vma_policy(@vma, @addr, @order, @ilx)
 * @vma: virtual memory area whose policy is sought
 * @addr: address in @vma for shared policy lookup
 * @order: 0, or appropriate huge_page_order for interleaving
 * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE or
 *       MPOL_WEIGHTED_INTERLEAVE
 *
 * Returns effective policy for a VMA at specified address.
 * Falls back to current->mempolicy or system default policy, as necessary.
 * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
 * count--added by the get_policy() vm_op, as appropriate--to protect against
 * freeing by another task.  It is the caller's responsibility to free the
 * extra reference for shared policies.
 */
struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                 unsigned long addr, int order, pgoff_t *ilx)
{
        struct mempolicy *pol;

        pol = __get_vma_policy(vma, addr, ilx);
        if (!pol)
                pol = get_task_policy(current);
        if (pol->mode == MPOL_INTERLEAVE ||
            pol->mode == MPOL_WEIGHTED_INTERLEAVE) {
                *ilx += vma->vm_pgoff >> order;
                *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
        }
        return pol;
}

bool vma_policy_mof(struct vm_area_struct *vma)
{
        struct mempolicy *pol;

        if (vma->vm_ops && vma->vm_ops->get_policy) {
                bool ret = false;
                pgoff_t ilx;                /* ignored here */

                pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx);
                if (pol && (pol->flags & MPOL_F_MOF))
                        ret = true;
                mpol_cond_put(pol);

                return ret;
        }

        pol = vma->vm_policy;
        if (!pol)
                pol = get_task_policy(current);

        return pol->flags & MPOL_F_MOF;
}

bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
{
        enum zone_type dynamic_policy_zone = policy_zone;

        BUG_ON(dynamic_policy_zone == ZONE_MOVABLE);

        /*
         * if policy->nodes has movable memory only,
         * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only.
         *
         * policy->nodes is intersect with node_states[N_MEMORY].
         * so if the following test fails, it implies
         * policy->nodes has movable memory only.
         */
        if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY]))
                dynamic_policy_zone = ZONE_MOVABLE;

        return zone >= dynamic_policy_zone;
}

static unsigned int weighted_interleave_nodes(struct mempolicy *policy)
{
        unsigned int node;
        unsigned int cpuset_mems_cookie;

retry:
        /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */
        cpuset_mems_cookie = read_mems_allowed_begin();
        node = current->il_prev;
        if (!current->il_weight || !node_isset(node, policy->nodes)) {
                node = next_node_in(node, policy->nodes);
                if (read_mems_allowed_retry(cpuset_mems_cookie))
                        goto retry;
                if (node == MAX_NUMNODES)
                        return node;
                current->il_prev = node;
                current->il_weight = get_il_weight(node);
        }
        current->il_weight--;
        return node;
}

/* Do dynamic interleaving for a process */
static unsigned int interleave_nodes(struct mempolicy *policy)
{
        unsigned int nid;
        unsigned int cpuset_mems_cookie;

        /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */
        do {
                cpuset_mems_cookie = read_mems_allowed_begin();
                nid = next_node_in(current->il_prev, policy->nodes);
        } while (read_mems_allowed_retry(cpuset_mems_cookie));

        if (nid < MAX_NUMNODES)
                current->il_prev = nid;
        return nid;
}

/*
 * Depending on the memory policy provide a node from which to allocate the
 * next slab entry.
 */
unsigned int mempolicy_slab_node(void)
{
        struct mempolicy *policy;
        int node = numa_mem_id();

        if (!in_task())
                return node;

        policy = current->mempolicy;
        if (!policy)
                return node;

        switch (policy->mode) {
        case MPOL_PREFERRED:
                return first_node(policy->nodes);

        case MPOL_INTERLEAVE:
                return interleave_nodes(policy);

        case MPOL_WEIGHTED_INTERLEAVE:
                return weighted_interleave_nodes(policy);

        case MPOL_BIND:
        case MPOL_PREFERRED_MANY:
        {
                struct zoneref *z;

                /*
                 * Follow bind policy behavior and start allocation at the
                 * first node.
                 */
                struct zonelist *zonelist;
                enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
                zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
                z = first_zones_zonelist(zonelist, highest_zoneidx,
                                                        &policy->nodes);
                return z->zone ? zone_to_nid(z->zone) : node;
        }
        case MPOL_LOCAL:
                return node;

        default:
                BUG();
        }
}

static unsigned int read_once_policy_nodemask(struct mempolicy *pol,
                                              nodemask_t *mask)
{
        /*
         * barrier stabilizes the nodemask locally so that it can be iterated
         * over safely without concern for changes. Allocators validate node
         * selection does not violate mems_allowed, so this is safe.
         */
        barrier();
        memcpy(mask, &pol->nodes, sizeof(nodemask_t));
        barrier();
        return nodes_weight(*mask);
}

static unsigned int weighted_interleave_nid(struct mempolicy *pol, pgoff_t ilx)
{
        nodemask_t nodemask;
        unsigned int target, nr_nodes;
        u8 *table;
        unsigned int weight_total = 0;
        u8 weight;
        int nid;

        nr_nodes = read_once_policy_nodemask(pol, &nodemask);
        if (!nr_nodes)
                return numa_node_id();

        rcu_read_lock();
        table = rcu_dereference(iw_table);
        /* calculate the total weight */
        for_each_node_mask(nid, nodemask) {
                /* detect system default usage */
                weight = table ? table[nid] : 1;
                weight = weight ? weight : 1;
                weight_total += weight;
        }

        /* Calculate the node offset based on totals */
        target = ilx % weight_total;
        nid = first_node(nodemask);
        while (target) {
                /* detect system default usage */
                weight = table ? table[nid] : 1;
                weight = weight ? weight : 1;
                if (target < weight)
                        break;
                target -= weight;
                nid = next_node_in(nid, nodemask);
        }
        rcu_read_unlock();
        return nid;
}

/*
 * Do static interleaving for interleave index @ilx.  Returns the ilx'th
 * node in pol->nodes (starting from ilx=0), wrapping around if ilx
 * exceeds the number of present nodes.
 */
static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
{
        nodemask_t nodemask;
        unsigned int target, nnodes;
        int i;
        int nid;

        nnodes = read_once_policy_nodemask(pol, &nodemask);
        if (!nnodes)
                return numa_node_id();
        target = ilx % nnodes;
        nid = first_node(nodemask);
        for (i = 0; i < target; i++)
                nid = next_node(nid, nodemask);
        return nid;
}

/*
 * Return a nodemask representing a mempolicy for filtering nodes for
 * page allocation, together with preferred node id (or the input node id).
 */
static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
                                   pgoff_t ilx, int *nid)
{
        nodemask_t *nodemask = NULL;

        switch (pol->mode) {
        case MPOL_PREFERRED:
                /* Override input node id */
                *nid = first_node(pol->nodes);
                break;
        case MPOL_PREFERRED_MANY:
                nodemask = &pol->nodes;
                if (pol->home_node != NUMA_NO_NODE)
                        *nid = pol->home_node;
                break;
        case MPOL_BIND:
                /* Restrict to nodemask (but not on lower zones) */
                if (apply_policy_zone(pol, gfp_zone(gfp)) &&
                    cpuset_nodemask_valid_mems_allowed(&pol->nodes))
                        nodemask = &pol->nodes;
                if (pol->home_node != NUMA_NO_NODE)
                        *nid = pol->home_node;
                /*
                 * __GFP_THISNODE shouldn't even be used with the bind policy
                 * because we might easily break the expectation to stay on the
                 * requested node and not break the policy.
                 */
                WARN_ON_ONCE(gfp & __GFP_THISNODE);
                break;
        case MPOL_INTERLEAVE:
                /* Override input node id */
                *nid = (ilx == NO_INTERLEAVE_INDEX) ?
                        interleave_nodes(pol) : interleave_nid(pol, ilx);
                break;
        case MPOL_WEIGHTED_INTERLEAVE:
                *nid = (ilx == NO_INTERLEAVE_INDEX) ?
                        weighted_interleave_nodes(pol) :
                        weighted_interleave_nid(pol, ilx);
                break;
        }

        return nodemask;
}

#ifdef CONFIG_HUGETLBFS
/*
 * huge_node(@vma, @addr, @gfp_flags, @mpol)
 * @vma: virtual memory area whose policy is sought
 * @addr: address in @vma for shared policy lookup and interleave policy
 * @gfp_flags: for requested zone
 * @mpol: pointer to mempolicy pointer for reference counted mempolicy
 * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy
 *
 * Returns a nid suitable for a huge page allocation and a pointer
 * to the struct mempolicy for conditional unref after allocation.
 * If the effective policy is 'bind' or 'prefer-many', returns a pointer
 * to the mempolicy's @nodemask for filtering the zonelist.
 */
int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
                struct mempolicy **mpol, nodemask_t **nodemask)
{
        pgoff_t ilx;
        int nid;

        nid = numa_node_id();
        *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx);
        *nodemask = policy_nodemask(gfp_flags, *mpol, ilx, &nid);
        return nid;
}

/*
 * init_nodemask_of_mempolicy
 *
 * If the current task's mempolicy is "default" [NULL], return 'false'
 * to indicate default policy.  Otherwise, extract the policy nodemask
 * for 'bind' or 'interleave' policy into the argument nodemask, or
 * initialize the argument nodemask to contain the single node for
 * 'preferred' or 'local' policy and return 'true' to indicate presence
 * of non-default mempolicy.
 *
 * We don't bother with reference counting the mempolicy [mpol_get/put]
 * because the current task is examining it's own mempolicy and a task's
 * mempolicy is only ever changed by the task itself.
 *
 * N.B., it is the caller's responsibility to free a returned nodemask.
 */
bool init_nodemask_of_mempolicy(nodemask_t *mask)
{
        struct mempolicy *mempolicy;

        if (!(mask && current->mempolicy))
                return false;

        task_lock(current);
        mempolicy = current->mempolicy;
        switch (mempolicy->mode) {
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
        case MPOL_WEIGHTED_INTERLEAVE:
                *mask = mempolicy->nodes;
                break;

        case MPOL_LOCAL:
                init_nodemask_of_node(mask, numa_node_id());
                break;

        default:
                BUG();
        }
        task_unlock(current);

        return true;
}
#endif

/*
 * mempolicy_in_oom_domain
 *
 * If tsk's mempolicy is "bind", check for intersection between mask and
 * the policy nodemask. Otherwise, return true for all other policies
 * including "interleave", as a tsk with "interleave" policy may have
 * memory allocated from all nodes in system.
 *
 * Takes task_lock(tsk) to prevent freeing of its mempolicy.
 */
bool mempolicy_in_oom_domain(struct task_struct *tsk,
                                        const nodemask_t *mask)
{
        struct mempolicy *mempolicy;
        bool ret = true;

        if (!mask)
                return ret;

        task_lock(tsk);
        mempolicy = tsk->mempolicy;
        if (mempolicy && mempolicy->mode == MPOL_BIND)
                ret = nodes_intersects(mempolicy->nodes, *mask);
        task_unlock(tsk);

        return ret;
}

static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
                                                int nid, nodemask_t *nodemask)
{
        struct page *page;
        gfp_t preferred_gfp;

        /*
         * This is a two pass approach. The first pass will only try the
         * preferred nodes but skip the direct reclaim and allow the
         * allocation to fail, while the second pass will try all the
         * nodes in system.
         */
        preferred_gfp = gfp | __GFP_NOWARN;
        preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
        page = __alloc_pages(preferred_gfp, order, nid, nodemask);
        if (!page)
                page = __alloc_pages(gfp, order, nid, NULL);

        return page;
}

/**
 * alloc_pages_mpol - Allocate pages according to NUMA mempolicy.
 * @gfp: GFP flags.
 * @order: Order of the page allocation.
 * @pol: Pointer to the NUMA mempolicy.
 * @ilx: Index for interleave mempolicy (also distinguishes alloc_pages()).
 * @nid: Preferred node (usually numa_node_id() but @mpol may override it).
 *
 * Return: The page on success or NULL if allocation fails.
 */
struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
                struct mempolicy *pol, pgoff_t ilx, int nid)
{
        nodemask_t *nodemask;
        struct page *page;

        nodemask = policy_nodemask(gfp, pol, ilx, &nid);

        if (pol->mode == MPOL_PREFERRED_MANY)
                return alloc_pages_preferred_many(gfp, order, nid, nodemask);

        if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
            /* filter "hugepage" allocation, unless from alloc_pages() */
            order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
                /*
                 * For hugepage allocation and non-interleave policy which
                 * allows the current node (or other explicitly preferred
                 * node) we only try to allocate from the current/preferred
                 * node and don't fall back to other nodes, as the cost of
                 * remote accesses would likely offset THP benefits.
                 *
                 * If the policy is interleave or does not allow the current
                 * node in its nodemask, we allocate the standard way.
                 */
                if (pol->mode != MPOL_INTERLEAVE &&
                    pol->mode != MPOL_WEIGHTED_INTERLEAVE &&
                    (!nodemask || node_isset(nid, *nodemask))) {
                        /*
                         * First, try to allocate THP only on local node, but
                         * don't reclaim unnecessarily, just compact.
                         */
                        page = __alloc_pages_node(nid,
                                gfp | __GFP_THISNODE | __GFP_NORETRY, order);
                        if (page || !(gfp & __GFP_DIRECT_RECLAIM))
                                return page;
                        /*
                         * If hugepage allocations are configured to always
                         * synchronous compact or the vma has been madvised
                         * to prefer hugepage backing, retry allowing remote
                         * memory with both reclaim and compact as well.
                         */
                }
        }

        page = __alloc_pages(gfp, order, nid, nodemask);

        if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) {
                /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */
                if (static_branch_likely(&vm_numa_stat_key) &&
                    page_to_nid(page) == nid) {
                        preempt_disable();
                        __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
                        preempt_enable();
                }
        }

        return page;
}

/**
 * vma_alloc_folio - Allocate a folio for a VMA.
 * @gfp: GFP flags.
 * @order: Order of the folio.
 * @vma: Pointer to VMA.
 * @addr: Virtual address of the allocation.  Must be inside @vma.
 * @hugepage: Unused (was: For hugepages try only preferred node if possible).
 *
 * Allocate a folio for a specific address in @vma, using the appropriate
 * NUMA policy.  The caller must hold the mmap_lock of the mm_struct of the
 * VMA to prevent it from going away.  Should be used for all allocations
 * for folios that will be mapped into user space, excepting hugetlbfs, and
 * excepting where direct use of alloc_pages_mpol() is more appropriate.
 *
 * Return: The folio on success or NULL if allocation fails.
 */
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
                unsigned long addr, bool hugepage)
{
        struct mempolicy *pol;
        pgoff_t ilx;
        struct page *page;

        pol = get_vma_policy(vma, addr, order, &ilx);
        page = alloc_pages_mpol(gfp | __GFP_COMP, order,
                                pol, ilx, numa_node_id());
        mpol_cond_put(pol);
        return page_rmappable_folio(page);
}
EXPORT_SYMBOL(vma_alloc_folio);

/**
 * alloc_pages - Allocate pages.
 * @gfp: GFP flags.
 * @order: Power of two of number of pages to allocate.
 *
 * Allocate 1 << @order contiguous pages.  The physical address of the
 * first page is naturally aligned (eg an order-3 allocation will be aligned
 * to a multiple of 8 * PAGE_SIZE bytes).  The NUMA policy of the current
 * process is honoured when in process context.
 *
 * Context: Can be called from any context, providing the appropriate GFP
 * flags are used.
 * Return: The page on success or NULL if allocation fails.
 */
struct page *alloc_pages(gfp_t gfp, unsigned int order)
{
        struct mempolicy *pol = &default_policy;

        /*
         * No reference counting needed for current->mempolicy
         * nor system default_policy
         */
        if (!in_interrupt() && !(gfp & __GFP_THISNODE))
                pol = get_task_policy(current);

        return alloc_pages_mpol(gfp, order,
                                pol, NO_INTERLEAVE_INDEX, numa_node_id());
}
EXPORT_SYMBOL(alloc_pages);

struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
        return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order));
}
EXPORT_SYMBOL(folio_alloc);

static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
                struct mempolicy *pol, unsigned long nr_pages,
                struct page **page_array)
{
        int nodes;
        unsigned long nr_pages_per_node;
        int delta;
        int i;
        unsigned long nr_allocated;
        unsigned long total_allocated = 0;

        nodes = nodes_weight(pol->nodes);
        nr_pages_per_node = nr_pages / nodes;
        delta = nr_pages - nodes * nr_pages_per_node;

        for (i = 0; i < nodes; i++) {
                if (delta) {
                        nr_allocated = __alloc_pages_bulk(gfp,
                                        interleave_nodes(pol), NULL,
                                        nr_pages_per_node + 1, NULL,
                                        page_array);
                        delta--;
                } else {
                        nr_allocated = __alloc_pages_bulk(gfp,
                                        interleave_nodes(pol), NULL,
                                        nr_pages_per_node, NULL, page_array);
                }

                page_array += nr_allocated;
                total_allocated += nr_allocated;
        }

        return total_allocated;
}

static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp,
                struct mempolicy *pol, unsigned long nr_pages,
                struct page **page_array)
{
        struct task_struct *me = current;
        unsigned int cpuset_mems_cookie;
        unsigned long total_allocated = 0;
        unsigned long nr_allocated = 0;
        unsigned long rounds;
        unsigned long node_pages, delta;
        u8 *table, *weights, weight;
        unsigned int weight_total = 0;
        unsigned long rem_pages = nr_pages;
        nodemask_t nodes;
        int nnodes, node;
        int resume_node = MAX_NUMNODES - 1;
        u8 resume_weight = 0;
        int prev_node;
        int i;

        if (!nr_pages)
                return 0;

        /* read the nodes onto the stack, retry if done during rebind */
        do {
                cpuset_mems_cookie = read_mems_allowed_begin();
                nnodes = read_once_policy_nodemask(pol, &nodes);
        } while (read_mems_allowed_retry(cpuset_mems_cookie));

        /* if the nodemask has become invalid, we cannot do anything */
        if (!nnodes)
                return 0;

        /* Continue allocating from most recent node and adjust the nr_pages */
        node = me->il_prev;
        weight = me->il_weight;
        if (weight && node_isset(node, nodes)) {
                node_pages = min(rem_pages, weight);
                nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
                                                  NULL, page_array);
                page_array += nr_allocated;
                total_allocated += nr_allocated;
                /* if that's all the pages, no need to interleave */
                if (rem_pages <= weight) {
                        me->il_weight -= rem_pages;
                        return total_allocated;
                }
                /* Otherwise we adjust remaining pages, continue from there */
                rem_pages -= weight;
        }
        /* clear active weight in case of an allocation failure */
        me->il_weight = 0;
        prev_node = node;

        /* create a local copy of node weights to operate on outside rcu */
        weights = kzalloc(nr_node_ids, GFP_KERNEL);
        if (!weights)
                return total_allocated;

        rcu_read_lock();
        table = rcu_dereference(iw_table);
        if (table)
                memcpy(weights, table, nr_node_ids);
        rcu_read_unlock();

        /* calculate total, detect system default usage */
        for_each_node_mask(node, nodes) {
                if (!weights[node])
                        weights[node] = 1;
                weight_total += weights[node];
        }

        /*
         * Calculate rounds/partial rounds to minimize __alloc_pages_bulk calls.
         * Track which node weighted interleave should resume from.
         *
         * if (rounds > 0) and (delta == 0), resume_node will always be
         * the node following prev_node and its weight.
         */
        rounds = rem_pages / weight_total;
        delta = rem_pages % weight_total;
        resume_node = next_node_in(prev_node, nodes);
        resume_weight = weights[resume_node];
        for (i = 0; i < nnodes; i++) {
                node = next_node_in(prev_node, nodes);
                weight = weights[node];
                node_pages = weight * rounds;
                /* If a delta exists, add this node's portion of the delta */
                if (delta > weight) {
                        node_pages += weight;
                        delta -= weight;
                } else if (delta) {
                        /* when delta is depleted, resume from that node */
                        node_pages += delta;
                        resume_node = node;
                        resume_weight = weight - delta;
                        delta = 0;
                }
                /* node_pages can be 0 if an allocation fails and rounds == 0 */
                if (!node_pages)
                        break;
                nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
                                                  NULL, page_array);
                page_array += nr_allocated;
                total_allocated += nr_allocated;
                if (total_allocated == nr_pages)
                        break;
                prev_node = node;
        }
        me->il_prev = resume_node;
        me->il_weight = resume_weight;
        kfree(weights);
        return total_allocated;
}

static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
                struct mempolicy *pol, unsigned long nr_pages,
                struct page **page_array)
{
        gfp_t preferred_gfp;
        unsigned long nr_allocated = 0;

        preferred_gfp = gfp | __GFP_NOWARN;
        preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);

        nr_allocated  = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
                                           nr_pages, NULL, page_array);

        if (nr_allocated < nr_pages)
                nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
                                nr_pages - nr_allocated, NULL,
                                page_array + nr_allocated);
        return nr_allocated;
}

/* alloc pages bulk and mempolicy should be considered at the
 * same time in some situation such as vmalloc.
 *
 * It can accelerate memory allocation especially interleaving
 * allocate memory.
 */
unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
                unsigned long nr_pages, struct page **page_array)
{
        struct mempolicy *pol = &default_policy;
        nodemask_t *nodemask;
        int nid;

        if (!in_interrupt() && !(gfp & __GFP_THISNODE))
                pol = get_task_policy(current);

        if (pol->mode == MPOL_INTERLEAVE)
                return alloc_pages_bulk_array_interleave(gfp, pol,
                                                         nr_pages, page_array);

        if (pol->mode == MPOL_WEIGHTED_INTERLEAVE)
                return alloc_pages_bulk_array_weighted_interleave(
                                  gfp, pol, nr_pages, page_array);

        if (pol->mode == MPOL_PREFERRED_MANY)
                return alloc_pages_bulk_array_preferred_many(gfp,
                                numa_node_id(), pol, nr_pages, page_array);

        nid = numa_node_id();
        nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid);
        return __alloc_pages_bulk(gfp, nid, nodemask,
                                  nr_pages, NULL, page_array);
}

int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
        struct mempolicy *pol = mpol_dup(src->vm_policy);

        if (IS_ERR(pol))
                return PTR_ERR(pol);
        dst->vm_policy = pol;
        return 0;
}

/*
 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
 * rebinds the mempolicy its copying by calling mpol_rebind_policy()
 * with the mems_allowed returned by cpuset_mems_allowed().  This
 * keeps mempolicies cpuset relative after its cpuset moves.  See
 * further kernel/cpuset.c update_nodemask().
 *
 * current's mempolicy may be rebinded by the other task(the task that changes
 * cpuset's mems), so we needn't do rebind work for current task.
 */

/* Slow path of a mempolicy duplicate */
struct mempolicy *__mpol_dup(struct mempolicy *old)
{
        struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL);

        if (!new)
                return ERR_PTR(-ENOMEM);

        /* task's mempolicy is protected by alloc_lock */
        if (old == current->mempolicy) {
                task_lock(current);
                *new = *old;
                task_unlock(current);
        } else
                *new = *old;

        if (current_cpuset_is_being_rebound()) {
                nodemask_t mems = cpuset_mems_allowed(current);
                mpol_rebind_policy(new, &mems);
        }
        atomic_set(&new->refcnt, 1);
        return new;
}

/* Slow path of a mempolicy comparison */
bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
        if (!a || !b)
                return false;
        if (a->mode != b->mode)
                return false;
        if (a->flags != b->flags)
                return false;
        if (a->home_node != b->home_node)
                return false;
        if (mpol_store_user_nodemask(a))
                if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask))
                        return false;

        switch (a->mode) {
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
        case MPOL_WEIGHTED_INTERLEAVE:
                return !!nodes_equal(a->nodes, b->nodes);
        case MPOL_LOCAL:
                return true;
        default:
                BUG();
                return false;
        }
}

/*
 * Shared memory backing store policy support.
 *
 * Remember policies even when nobody has shared memory mapped.
 * The policies are kept in Red-Black tree linked from the inode.
 * They are protected by the sp->lock rwlock, which should be held
 * for any accesses to the tree.
 */

/*
 * lookup first element intersecting start-end.  Caller holds sp->lock for
 * reading or for writing
 */
static struct sp_node *sp_lookup(struct shared_policy *sp,
                                        pgoff_t start, pgoff_t end)
{
        struct rb_node *n = sp->root.rb_node;

        while (n) {
                struct sp_node *p = rb_entry(n, struct sp_node, nd);

                if (start >= p->end)
                        n = n->rb_right;
                else if (end <= p->start)
                        n = n->rb_left;
                else
                        break;
        }
        if (!n)
                return NULL;
        for (;;) {
                struct sp_node *w = NULL;
                struct rb_node *prev = rb_prev(n);
                if (!prev)
                        break;
                w = rb_entry(prev, struct sp_node, nd);
                if (w->end <= start)
                        break;
                n = prev;
        }
        return rb_entry(n, struct sp_node, nd);
}

/*
 * Insert a new shared policy into the list.  Caller holds sp->lock for
 * writing.
 */
static void sp_insert(struct shared_policy *sp, struct sp_node *new)
{
        struct rb_node **p = &sp->root.rb_node;
        struct rb_node *parent = NULL;
        struct sp_node *nd;

        while (*p) {
                parent = *p;
                nd = rb_entry(parent, struct sp_node, nd);
                if (new->start < nd->start)
                        p = &(*p)->rb_left;
                else if (new->end > nd->end)
                        p = &(*p)->rb_right;
                else
                        BUG();
        }
        rb_link_node(&new->nd, parent, p);
        rb_insert_color(&new->nd, &sp->root);
}

/* Find shared policy intersecting idx */
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
                                                pgoff_t idx)
{
        struct mempolicy *pol = NULL;
        struct sp_node *sn;

        if (!sp->root.rb_node)
                return NULL;
        read_lock(&sp->lock);
        sn = sp_lookup(sp, idx, idx+1);
        if (sn) {
                mpol_get(sn->policy);
                pol = sn->policy;
        }
        read_unlock(&sp->lock);
        return pol;
}

static void sp_free(struct sp_node *n)
{
        mpol_put(n->policy);
        kmem_cache_free(sn_cache, n);
}

/**
 * mpol_misplaced - check whether current folio node is valid in policy
 *
 * @folio: folio to be checked
 * @vma: vm area where folio mapped
 * @addr: virtual address in @vma for shared policy lookup and interleave policy
 *
 * Lookup current policy node id for vma,addr and "compare to" folio's
 * node id.  Policy determination "mimics" alloc_page_vma().
 * Called from fault path where we know the vma and faulting address.
 *
 * Return: NUMA_NO_NODE if the page is in a node that is valid for this
 * policy, or a suitable node ID to allocate a replacement folio from.
 */
int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
                   unsigned long addr)
{
        struct mempolicy *pol;
        pgoff_t ilx;
        struct zoneref *z;
        int curnid = folio_nid(folio);
        int thiscpu = raw_smp_processor_id();
        int thisnid = cpu_to_node(thiscpu);
        int polnid = NUMA_NO_NODE;
        int ret = NUMA_NO_NODE;

        pol = get_vma_policy(vma, addr, folio_order(folio), &ilx);
        if (!(pol->flags & MPOL_F_MOF))
                goto out;

        switch (pol->mode) {
        case MPOL_INTERLEAVE:
                polnid = interleave_nid(pol, ilx);
                break;

        case MPOL_WEIGHTED_INTERLEAVE:
                polnid = weighted_interleave_nid(pol, ilx);
                break;

        case MPOL_PREFERRED:
                if (node_isset(curnid, pol->nodes))
                        goto out;
                polnid = first_node(pol->nodes);
                break;

        case MPOL_LOCAL:
                polnid = numa_node_id();
                break;

        case MPOL_BIND:
                /* Optimize placement among multiple nodes via NUMA balancing */
                if (pol->flags & MPOL_F_MORON) {
                        if (node_isset(thisnid, pol->nodes))
                                break;
                        goto out;
                }
                fallthrough;

        case MPOL_PREFERRED_MANY:
                /*
                 * use current page if in policy nodemask,
                 * else select nearest allowed node, if any.
                 * If no allowed nodes, use current [!misplaced].
                 */
                if (node_isset(curnid, pol->nodes))
                        goto out;
                z = first_zones_zonelist(
                                node_zonelist(numa_node_id(), GFP_HIGHUSER),
                                gfp_zone(GFP_HIGHUSER),
                                &pol->nodes);
                polnid = zone_to_nid(z->zone);
                break;

        default:
                BUG();
        }

        /* Migrate the folio towards the node whose CPU is referencing it */
        if (pol->flags & MPOL_F_MORON) {
                polnid = thisnid;

                if (!should_numa_migrate_memory(current, folio, curnid,
                                                thiscpu))
                        goto out;
        }

        if (curnid != polnid)
                ret = polnid;
out:
        mpol_cond_put(pol);

        return ret;
}

/*
 * Drop the (possibly final) reference to task->mempolicy.  It needs to be
 * dropped after task->mempolicy is set to NULL so that any allocation done as
 * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed
 * policy.
 */
void mpol_put_task_policy(struct task_struct *task)
{
        struct mempolicy *pol;

        task_lock(task);
        pol = task->mempolicy;
        task->mempolicy = NULL;
        task_unlock(task);
        mpol_put(pol);
}

static void sp_delete(struct shared_policy *sp, struct sp_node *n)
{
        rb_erase(&n->nd, &sp->root);
        sp_free(n);
}

static void sp_node_init(struct sp_node *node, unsigned long start,
                        unsigned long end, struct mempolicy *pol)
{
        node->start = start;
        node->end = end;
        node->policy = pol;
}

static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
                                struct mempolicy *pol)
{
        struct sp_node *n;
        struct mempolicy *newpol;

        n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
        if (!n)
                return NULL;

        newpol = mpol_dup(pol);
        if (IS_ERR(newpol)) {
                kmem_cache_free(sn_cache, n);
                return NULL;
        }
        newpol->flags |= MPOL_F_SHARED;
        sp_node_init(n, start, end, newpol);

        return n;
}

/* Replace a policy range. */
static int shared_policy_replace(struct shared_policy *sp, pgoff_t start,
                                 pgoff_t end, struct sp_node *new)
{
        struct sp_node *n;
        struct sp_node *n_new = NULL;
        struct mempolicy *mpol_new = NULL;
        int ret = 0;

restart:
        write_lock(&sp->lock);
        n = sp_lookup(sp, start, end);
        /* Take care of old policies in the same range. */
        while (n && n->start < end) {
                struct rb_node *next = rb_next(&n->nd);
                if (n->start >= start) {
                        if (n->end <= end)
                                sp_delete(sp, n);
                        else
                                n->start = end;
                } else {
                        /* Old policy spanning whole new range. */
                        if (n->end > end) {
                                if (!n_new)
                                        goto alloc_new;

                                *mpol_new = *n->policy;
                                atomic_set(&mpol_new->refcnt, 1);
                                sp_node_init(n_new, end, n->end, mpol_new);
                                n->end = start;
                                sp_insert(sp, n_new);
                                n_new = NULL;
                                mpol_new = NULL;
                                break;
                        } else
                                n->end = start;
                }
                if (!next)
                        break;
                n = rb_entry(next, struct sp_node, nd);
        }
        if (new)
                sp_insert(sp, new);
        write_unlock(&sp->lock);
        ret = 0;

err_out:
        if (mpol_new)
                mpol_put(mpol_new);
        if (n_new)
                kmem_cache_free(sn_cache, n_new);

        return ret;

alloc_new:
        write_unlock(&sp->lock);
        ret = -ENOMEM;
        n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL);
        if (!n_new)
                goto err_out;
        mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
        if (!mpol_new)
                goto err_out;
        atomic_set(&mpol_new->refcnt, 1);
        goto restart;
}

/**
 * mpol_shared_policy_init - initialize shared policy for inode
 * @sp: pointer to inode shared policy
 * @mpol:  struct mempolicy to install
 *
 * Install non-NULL @mpol in inode's shared policy rb-tree.
 * On entry, the current task has a reference on a non-NULL @mpol.
 * This must be released on exit.
 * This is called at get_inode() calls and we can use GFP_KERNEL.
 */
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
{
        int ret;

        sp->root = RB_ROOT;                /* empty tree == default mempolicy */
        rwlock_init(&sp->lock);

        if (mpol) {
                struct sp_node *sn;
                struct mempolicy *npol;
                NODEMASK_SCRATCH(scratch);

                if (!scratch)
                        goto put_mpol;

                /* contextualize the tmpfs mount point mempolicy to this file */
                npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
                if (IS_ERR(npol))
                        goto free_scratch; /* no valid nodemask intersection */

                task_lock(current);
                ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch);
                task_unlock(current);
                if (ret)
                        goto put_npol;

                /* alloc node covering entire file; adds ref to file's npol */
                sn = sp_alloc(0, MAX_LFS_FILESIZE >> PAGE_SHIFT, npol);
                if (sn)
                        sp_insert(sp, sn);
put_npol:
                mpol_put(npol);        /* drop initial ref on file's npol */
free_scratch:
                NODEMASK_SCRATCH_FREE(scratch);
put_mpol:
                mpol_put(mpol);        /* drop our incoming ref on sb mpol */
        }
}

int mpol_set_shared_policy(struct shared_policy *sp,
                        struct vm_area_struct *vma, struct mempolicy *pol)
{
        int err;
        struct sp_node *new = NULL;
        unsigned long sz = vma_pages(vma);

        if (pol) {
                new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol);
                if (!new)
                        return -ENOMEM;
        }
        err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new);
        if (err && new)
                sp_free(new);
        return err;
}

/* Free a backing policy store on inode delete. */
void mpol_free_shared_policy(struct shared_policy *sp)
{
        struct sp_node *n;
        struct rb_node *next;

        if (!sp->root.rb_node)
                return;
        write_lock(&sp->lock);
        next = rb_first(&sp->root);
        while (next) {
                n = rb_entry(next, struct sp_node, nd);
                next = rb_next(&n->nd);
                sp_delete(sp, n);
        }
        write_unlock(&sp->lock);
}

#ifdef CONFIG_NUMA_BALANCING
static int __initdata numabalancing_override;

static void __init check_numabalancing_enable(void)
{
        bool numabalancing_default = false;

        if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
                numabalancing_default = true;

        /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */
        if (numabalancing_override)
                set_numabalancing_state(numabalancing_override == 1);

        if (num_online_nodes() > 1 && !numabalancing_override) {
                pr_info("%s automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl\n",
                        numabalancing_default ? "Enabling" : "Disabling");
                set_numabalancing_state(numabalancing_default);
        }
}

static int __init setup_numabalancing(char *str)
{
        int ret = 0;
        if (!str)
                goto out;

        if (!strcmp(str, "enable")) {
                numabalancing_override = 1;
                ret = 1;
        } else if (!strcmp(str, "disable")) {
                numabalancing_override = -1;
                ret = 1;
        }
out:
        if (!ret)
                pr_warn("Unable to parse numa_balancing=\n");

        return ret;
}
__setup("numa_balancing=", setup_numabalancing);
#else
static inline void __init check_numabalancing_enable(void)
{
}
#endif /* CONFIG_NUMA_BALANCING */

void __init numa_policy_init(void)
{
        nodemask_t interleave_nodes;
        unsigned long largest = 0;
        int nid, prefer = 0;

        policy_cache = kmem_cache_create("numa_policy",
                                         sizeof(struct mempolicy),
                                         0, SLAB_PANIC, NULL);

        sn_cache = kmem_cache_create("shared_policy_node",
                                     sizeof(struct sp_node),
                                     0, SLAB_PANIC, NULL);

        for_each_node(nid) {
                preferred_node_policy[nid] = (struct mempolicy) {
                        .refcnt = ATOMIC_INIT(1),
                        .mode = MPOL_PREFERRED,
                        .flags = MPOL_F_MOF | MPOL_F_MORON,
                        .nodes = nodemask_of_node(nid),
                };
        }

        /*
         * Set interleaving policy for system init. Interleaving is only
         * enabled across suitably sized nodes (default is >= 16MB), or
         * fall back to the largest node if they're all smaller.
         */
        nodes_clear(interleave_nodes);
        for_each_node_state(nid, N_MEMORY) {
                unsigned long total_pages = node_present_pages(nid);

                /* Preserve the largest node */
                if (largest < total_pages) {
                        largest = total_pages;
                        prefer = nid;
                }

                /* Interleave this node? */
                if ((total_pages << PAGE_SHIFT) >= (16 << 20))
                        node_set(nid, interleave_nodes);
        }

        /* All too small, use the largest */
        if (unlikely(nodes_empty(interleave_nodes)))
                node_set(prefer, interleave_nodes);

        if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
                pr_err("%s: interleaving failed\n", __func__);

        check_numabalancing_enable();
}

/* Reset policy of current process to default */
void numa_default_policy(void)
{
        do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
}

/*
 * Parse and format mempolicy from/to strings
 */
static const char * const policy_modes[] =
{
        [MPOL_DEFAULT]    = "default",
        [MPOL_PREFERRED]  = "prefer",
        [MPOL_BIND]       = "bind",
        [MPOL_INTERLEAVE] = "interleave",
        [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
        [MPOL_LOCAL]      = "local",
        [MPOL_PREFERRED_MANY]  = "prefer (many)",
};

#ifdef CONFIG_TMPFS
/**
 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
 * @str:  string containing mempolicy to parse
 * @mpol:  pointer to struct mempolicy pointer, returned on success.
 *
 * Format of input:
 *        <mode>[=<flags>][:<nodelist>]
 *
 * Return: %0 on success, else %1
 */
int mpol_parse_str(char *str, struct mempolicy **mpol)
{
        struct mempolicy *new = NULL;
        unsigned short mode_flags;
        nodemask_t nodes;
        char *nodelist = strchr(str, ':');
        char *flags = strchr(str, '=');
        int err = 1, mode;

        if (flags)
                *flags++ = '\0';        /* terminate mode string */

        if (nodelist) {
                /* NUL-terminate mode or flags string */
                *nodelist++ = '\0';
                if (nodelist_parse(nodelist, nodes))
                        goto out;
                if (!nodes_subset(nodes, node_states[N_MEMORY]))
                        goto out;
        } else
                nodes_clear(nodes);

        mode = match_string(policy_modes, MPOL_MAX, str);
        if (mode < 0)
                goto out;

        switch (mode) {
        case MPOL_PREFERRED:
                /*
                 * Insist on a nodelist of one node only, although later
                 * we use first_node(nodes) to grab a single node, so here
                 * nodelist (or nodes) cannot be empty.
                 */
                if (nodelist) {
                        char *rest = nodelist;
                        while (isdigit(*rest))
                                rest++;
                        if (*rest)
                                goto out;
                        if (nodes_empty(nodes))
                                goto out;
                }
                break;
        case MPOL_INTERLEAVE:
        case MPOL_WEIGHTED_INTERLEAVE:
                /*
                 * Default to online nodes with memory if no nodelist
                 */
                if (!nodelist)
                        nodes = node_states[N_MEMORY];
                break;
        case MPOL_LOCAL:
                /*
                 * Don't allow a nodelist;  mpol_new() checks flags
                 */
                if (nodelist)
                        goto out;
                break;
        case MPOL_DEFAULT:
                /*
                 * Insist on a empty nodelist
                 */
                if (!nodelist)
                        err = 0;
                goto out;
        case MPOL_PREFERRED_MANY:
        case MPOL_BIND:
                /*
                 * Insist on a nodelist
                 */
                if (!nodelist)
                        goto out;
        }

        mode_flags = 0;
        if (flags) {
                /*
                 * Currently, we only support two mutually exclusive
                 * mode flags.
                 */
                if (!strcmp(flags, "static"))
                        mode_flags |= MPOL_F_STATIC_NODES;
                else if (!strcmp(flags, "relative"))
                        mode_flags |= MPOL_F_RELATIVE_NODES;
                else
                        goto out;
        }

        new = mpol_new(mode, mode_flags, &nodes);
        if (IS_ERR(new))
                goto out;

        /*
         * Save nodes for mpol_to_str() to show the tmpfs mount options
         * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
         */
        if (mode != MPOL_PREFERRED) {
                new->nodes = nodes;
        } else if (nodelist) {
                nodes_clear(new->nodes);
                node_set(first_node(nodes), new->nodes);
        } else {
                new->mode = MPOL_LOCAL;
        }

        /*
         * Save nodes for contextualization: this will be used to "clone"
         * the mempolicy in a specific context [cpuset] at a later time.
         */
        new->w.user_nodemask = nodes;

        err = 0;

out:
        /* Restore string for error message */
        if (nodelist)
                *--nodelist = ':';
        if (flags)
                *--flags = '=';
        if (!err)
                *mpol = new;
        return err;
}
#endif /* CONFIG_TMPFS */

/**
 * mpol_to_str - format a mempolicy structure for printing
 * @buffer:  to contain formatted mempolicy string
 * @maxlen:  length of @buffer
 * @pol:  pointer to mempolicy to be formatted
 *
 * Convert @pol into a string.  If @buffer is too short, truncate the string.
 * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
 * longest flag, "relative", and to display at least a few node ids.
 */
void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
{
        char *p = buffer;
        nodemask_t nodes = NODE_MASK_NONE;
        unsigned short mode = MPOL_DEFAULT;
        unsigned short flags = 0;

        if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {
                mode = pol->mode;
                flags = pol->flags;
        }

        switch (mode) {
        case MPOL_DEFAULT:
        case MPOL_LOCAL:
                break;
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
        case MPOL_WEIGHTED_INTERLEAVE:
                nodes = pol->nodes;
                break;
        default:
                WARN_ON_ONCE(1);
                snprintf(p, maxlen, "unknown");
                return;
        }

        p += snprintf(p, maxlen, "%s", policy_modes[mode]);

        if (flags & MPOL_MODE_FLAGS) {
                p += snprintf(p, buffer + maxlen - p, "=");

                /*
                 * Currently, the only defined flags are mutually exclusive
                 */
                if (flags & MPOL_F_STATIC_NODES)
                        p += snprintf(p, buffer + maxlen - p, "static");
                else if (flags & MPOL_F_RELATIVE_NODES)
                        p += snprintf(p, buffer + maxlen - p, "relative");
        }

        if (!nodes_empty(nodes))
                p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
                               nodemask_pr_args(&nodes));
}

#ifdef CONFIG_SYSFS
struct iw_node_attr {
        struct kobj_attribute kobj_attr;
        int nid;
};

static ssize_t node_show(struct kobject *kobj, struct kobj_attribute *attr,
                         char *buf)
{
        struct iw_node_attr *node_attr;
        u8 weight;

        node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
        weight = get_il_weight(node_attr->nid);
        return sysfs_emit(buf, "%d\n", weight);
}

static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
                          const char *buf, size_t count)
{
        struct iw_node_attr *node_attr;
        u8 *new;
        u8 *old;
        u8 weight = 0;

        node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
        if (count == 0 || sysfs_streq(buf, ""))
                weight = 0;
        else if (kstrtou8(buf, 0, &weight))
                return -EINVAL;

        new = kzalloc(nr_node_ids, GFP_KERNEL);
        if (!new)
                return -ENOMEM;

        mutex_lock(&iw_table_lock);
        old = rcu_dereference_protected(iw_table,
                                        lockdep_is_held(&iw_table_lock));
        if (old)
                memcpy(new, old, nr_node_ids);
        new[node_attr->nid] = weight;
        rcu_assign_pointer(iw_table, new);
        mutex_unlock(&iw_table_lock);
        synchronize_rcu();
        kfree(old);
        return count;
}

static struct iw_node_attr **node_attrs;

static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
                                  struct kobject *parent)
{
        if (!node_attr)
                return;
        sysfs_remove_file(parent, &node_attr->kobj_attr.attr);
        kfree(node_attr->kobj_attr.attr.name);
        kfree(node_attr);
}

static void sysfs_wi_release(struct kobject *wi_kobj)
{
        int i;

        for (i = 0; i < nr_node_ids; i++)
                sysfs_wi_node_release(node_attrs[i], wi_kobj);
        kobject_put(wi_kobj);
}

static const struct kobj_type wi_ktype = {
        .sysfs_ops = &kobj_sysfs_ops,
        .release = sysfs_wi_release,
};

static int add_weight_node(int nid, struct kobject *wi_kobj)
{
        struct iw_node_attr *node_attr;
        char *name;

        node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
        if (!node_attr)
                return -ENOMEM;

        name = kasprintf(GFP_KERNEL, "node%d", nid);
        if (!name) {
                kfree(node_attr);
                return -ENOMEM;
        }

        sysfs_attr_init(&node_attr->kobj_attr.attr);
        node_attr->kobj_attr.attr.name = name;
        node_attr->kobj_attr.attr.mode = 0644;
        node_attr->kobj_attr.show = node_show;
        node_attr->kobj_attr.store = node_store;
        node_attr->nid = nid;

        if (sysfs_create_file(wi_kobj, &node_attr->kobj_attr.attr)) {
                kfree(node_attr->kobj_attr.attr.name);
                kfree(node_attr);
                pr_err("failed to add attribute to weighted_interleave\n");
                return -ENOMEM;
        }

        node_attrs[nid] = node_attr;
        return 0;
}

static int add_weighted_interleave_group(struct kobject *root_kobj)
{
        struct kobject *wi_kobj;
        int nid, err;

        wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
        if (!wi_kobj)
                return -ENOMEM;

        err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
                                   "weighted_interleave");
        if (err) {
                kfree(wi_kobj);
                return err;
        }

        for_each_node_state(nid, N_POSSIBLE) {
                err = add_weight_node(nid, wi_kobj);
                if (err) {
                        pr_err("failed to add sysfs [node%d]\n", nid);
                        break;
                }
        }
        if (err)
                kobject_put(wi_kobj);
        return 0;
}

static void mempolicy_kobj_release(struct kobject *kobj)
{
        u8 *old;

        mutex_lock(&iw_table_lock);
        old = rcu_dereference_protected(iw_table,
                                        lockdep_is_held(&iw_table_lock));
        rcu_assign_pointer(iw_table, NULL);
        mutex_unlock(&iw_table_lock);
        synchronize_rcu();
        kfree(old);
        kfree(node_attrs);
        kfree(kobj);
}

static const struct kobj_type mempolicy_ktype = {
        .release = mempolicy_kobj_release
};

static int __init mempolicy_sysfs_init(void)
{
        int err;
        static struct kobject *mempolicy_kobj;

        mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
        if (!mempolicy_kobj) {
                err = -ENOMEM;
                goto err_out;
        }

        node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
                             GFP_KERNEL);
        if (!node_attrs) {
                err = -ENOMEM;
                goto mempol_out;
        }

        err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
                                   "mempolicy");
        if (err)
                goto node_out;

        err = add_weighted_interleave_group(mempolicy_kobj);
        if (err) {
                pr_err("mempolicy sysfs structure failed to initialize\n");
                kobject_put(mempolicy_kobj);
                return err;
        }

        return err;
node_out:
        kfree(node_attrs);
mempol_out:
        kfree(mempolicy_kobj);
err_out:
        pr_err("failed to add mempolicy kobject to the system\n");
        return err;
}

late_initcall(mempolicy_sysfs_init);
#endif /* CONFIG_SYSFS */































  296 

  296 
  295 



  294 
  293 





  296 




  296 
  296 




  293 
  296 






















  296 


















  293 

  294 
  296 
  295 

  294 








































































































































  296 






  292 





  295 

  296 



  295 









  296 




  295 






  294 


  294 


  293 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/realpath.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include "common.h"
#include <linux/magic.h>
#include <linux/proc_fs.h>

/**
 * tomoyo_encode2 - Encode binary string to ascii string.
 *
 * @str:     String in binary format.
 * @str_len: Size of @str in byte.
 *
 * Returns pointer to @str in ascii format on success, NULL otherwise.
 *
 * This function uses kzalloc(), so caller must kfree() if this function
 * didn't return NULL.
 */
char *tomoyo_encode2(const char *str, int str_len)
{
        int i;
        int len = 0;
        const char *p = str;
        char *cp;
        char *cp0;

        if (!p)
                return NULL;
        for (i = 0; i < str_len; i++) {
                const unsigned char c = p[i];

                if (c == '\\')
                        len += 2;
                else if (c > ' ' && c < 127)
                        len++;
                else
                        len += 4;
        }
        len++;
        /* Reserve space for appending "/". */
        cp = kzalloc(len + 10, GFP_NOFS);
        if (!cp)
                return NULL;
        cp0 = cp;
        p = str;
        for (i = 0; i < str_len; i++) {
                const unsigned char c = p[i];

                if (c == '\\') {
                        *cp++ = '\\';
                        *cp++ = '\\';
                } else if (c > ' ' && c < 127) {
                        *cp++ = c;
                } else {
                        *cp++ = '\\';
                        *cp++ = (c >> 6) + '0';
                        *cp++ = ((c >> 3) & 7) + '0';
                        *cp++ = (c & 7) + '0';
                }
        }
        return cp0;
}

/**
 * tomoyo_encode - Encode binary string to ascii string.
 *
 * @str: String in binary format.
 *
 * Returns pointer to @str in ascii format on success, NULL otherwise.
 *
 * This function uses kzalloc(), so caller must kfree() if this function
 * didn't return NULL.
 */
char *tomoyo_encode(const char *str)
{
        return str ? tomoyo_encode2(str, strlen(str)) : NULL;
}

/**
 * tomoyo_get_absolute_path - Get the path of a dentry but ignores chroot'ed root.
 *
 * @path:   Pointer to "struct path".
 * @buffer: Pointer to buffer to return value in.
 * @buflen: Sizeof @buffer.
 *
 * Returns the buffer on success, an error code otherwise.
 *
 * If dentry is a directory, trailing '/' is appended.
 */
static char *tomoyo_get_absolute_path(const struct path *path, char * const buffer,
                                      const int buflen)
{
        char *pos = ERR_PTR(-ENOMEM);

        if (buflen >= 256) {
                /* go to whatever namespace root we are under */
                pos = d_absolute_path(path, buffer, buflen - 1);
                if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
                        struct inode *inode = d_backing_inode(path->dentry);

                        if (inode && S_ISDIR(inode->i_mode)) {
                                buffer[buflen - 2] = '/';
                                buffer[buflen - 1] = '\0';
                        }
                }
        }
        return pos;
}

/**
 * tomoyo_get_dentry_path - Get the path of a dentry.
 *
 * @dentry: Pointer to "struct dentry".
 * @buffer: Pointer to buffer to return value in.
 * @buflen: Sizeof @buffer.
 *
 * Returns the buffer on success, an error code otherwise.
 *
 * If dentry is a directory, trailing '/' is appended.
 */
static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
                                    const int buflen)
{
        char *pos = ERR_PTR(-ENOMEM);

        if (buflen >= 256) {
                pos = dentry_path_raw(dentry, buffer, buflen - 1);
                if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
                        struct inode *inode = d_backing_inode(dentry);

                        if (inode && S_ISDIR(inode->i_mode)) {
                                buffer[buflen - 2] = '/';
                                buffer[buflen - 1] = '\0';
                        }
                }
        }
        return pos;
}

/**
 * tomoyo_get_local_path - Get the path of a dentry.
 *
 * @dentry: Pointer to "struct dentry".
 * @buffer: Pointer to buffer to return value in.
 * @buflen: Sizeof @buffer.
 *
 * Returns the buffer on success, an error code otherwise.
 */
static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
                                   const int buflen)
{
        struct super_block *sb = dentry->d_sb;
        char *pos = tomoyo_get_dentry_path(dentry, buffer, buflen);

        if (IS_ERR(pos))
                return pos;
        /* Convert from $PID to self if $PID is current thread. */
        if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') {
                char *ep;
                const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10);
                struct pid_namespace *proc_pidns = proc_pid_ns(sb);

                if (*ep == '/' && pid && pid ==
                    task_tgid_nr_ns(current, proc_pidns)) {
                        pos = ep - 5;
                        if (pos < buffer)
                                goto out;
                        memmove(pos, "/self", 5);
                }
                goto prepend_filesystem_name;
        }
        /* Use filesystem name for unnamed devices. */
        if (!MAJOR(sb->s_dev))
                goto prepend_filesystem_name;
        {
                struct inode *inode = d_backing_inode(sb->s_root);

                /*
                 * Use filesystem name if filesystem does not support rename()
                 * operation.
                 */
                if (!inode->i_op->rename)
                        goto prepend_filesystem_name;
        }
        /* Prepend device name. */
        {
                char name[64];
                int name_len;
                const dev_t dev = sb->s_dev;

                name[sizeof(name) - 1] = '\0';
                snprintf(name, sizeof(name) - 1, "dev(%u,%u):", MAJOR(dev),
                         MINOR(dev));
                name_len = strlen(name);
                pos -= name_len;
                if (pos < buffer)
                        goto out;
                memmove(pos, name, name_len);
                return pos;
        }
        /* Prepend filesystem name. */
prepend_filesystem_name:
        {
                const char *name = sb->s_type->name;
                const int name_len = strlen(name);

                pos -= name_len + 1;
                if (pos < buffer)
                        goto out;
                memmove(pos, name, name_len);
                pos[name_len] = ':';
        }
        return pos;
out:
        return ERR_PTR(-ENOMEM);
}

/**
 * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
 *
 * @path: Pointer to "struct path".
 *
 * Returns the realpath of the given @path on success, NULL otherwise.
 *
 * If dentry is a directory, trailing '/' is appended.
 * Characters out of 0x20 < c < 0x7F range are converted to
 * \ooo style octal string.
 * Character \ is converted to \\ string.
 *
 * These functions use kzalloc(), so the caller must call kfree()
 * if these functions didn't return NULL.
 */
char *tomoyo_realpath_from_path(const struct path *path)
{
        char *buf = NULL;
        char *name = NULL;
        unsigned int buf_len = PAGE_SIZE / 2;
        struct dentry *dentry = path->dentry;
        struct super_block *sb = dentry->d_sb;

        while (1) {
                char *pos;
                struct inode *inode;

                buf_len <<= 1;
                kfree(buf);
                buf = kmalloc(buf_len, GFP_NOFS);
                if (!buf)
                        break;
                /* To make sure that pos is '\0' terminated. */
                buf[buf_len - 1] = '\0';
                /* For "pipe:[\$]" and "socket:[\$]". */
                if (dentry->d_op && dentry->d_op->d_dname) {
                        pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
                        goto encode;
                }
                inode = d_backing_inode(sb->s_root);
                /*
                 * Get local name for filesystems without rename() operation
                 */
                if ((!inode->i_op->rename &&
                     !(sb->s_type->fs_flags & FS_REQUIRES_DEV)))
                        pos = tomoyo_get_local_path(path->dentry, buf,
                                                    buf_len - 1);
                /* Get absolute name for the rest. */
                else {
                        pos = tomoyo_get_absolute_path(path, buf, buf_len - 1);
                        /*
                         * Fall back to local name if absolute name is not
                         * available.
                         */
                        if (pos == ERR_PTR(-EINVAL))
                                pos = tomoyo_get_local_path(path->dentry, buf,
                                                            buf_len - 1);
                }
encode:
                if (IS_ERR(pos))
                        continue;
                name = tomoyo_encode(pos);
                break;
        }
        kfree(buf);
        if (!name)
                tomoyo_warn_oom(__func__);
        return name;
}

/**
 * tomoyo_realpath_nofollow - Get realpath of a pathname.
 *
 * @pathname: The pathname to solve.
 *
 * Returns the realpath of @pathname on success, NULL otherwise.
 */
char *tomoyo_realpath_nofollow(const char *pathname)
{
        struct path path;

        if (pathname && kern_path(pathname, 0, &path) == 0) {
                char *buf = tomoyo_realpath_from_path(&path);

                path_put(&path);
                return buf;
        }
        return NULL;
}
















































































































































































































































  109 







































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_BITMAP_H
#define __LINUX_BITMAP_H

#ifndef __ASSEMBLY__

#include <linux/align.h>
#include <linux/bitops.h>
#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/find.h>
#include <linux/limits.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/bitmap-str.h>

struct device;

/*
 * bitmaps provide bit arrays that consume one or more unsigned
 * longs.  The bitmap interface and available operations are listed
 * here, in bitmap.h
 *
 * Function implementations generic to all architectures are in
 * lib/bitmap.c.  Functions implementations that are architecture
 * specific are in various include/asm-<arch>/bitops.h headers
 * and other arch/<arch> specific files.
 *
 * See lib/bitmap.c for more details.
 */

/**
 * DOC: bitmap overview
 *
 * The available bitmap operations and their rough meaning in the
 * case that the bitmap is a single unsigned long are thus:
 *
 * The generated code is more efficient when nbits is known at
 * compile-time and at most BITS_PER_LONG.
 *
 * ::
 *
 *  bitmap_zero(dst, nbits)                     *dst = 0UL
 *  bitmap_fill(dst, nbits)                     *dst = ~0UL
 *  bitmap_copy(dst, src, nbits)                *dst = *src
 *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
 *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
 *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
 *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
 *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
 *  bitmap_equal(src1, src2, nbits)             Are *src1 and *src2 equal?
 *  bitmap_intersects(src1, src2, nbits)        Do *src1 and *src2 overlap?
 *  bitmap_subset(src1, src2, nbits)            Is *src1 a subset of *src2?
 *  bitmap_empty(src, nbits)                    Are all bits zero in *src?
 *  bitmap_full(src, nbits)                     Are all bits set in *src?
 *  bitmap_weight(src, nbits)                   Hamming Weight: number set bits
 *  bitmap_weight_and(src1, src2, nbits)        Hamming Weight of and'ed bitmap
 *  bitmap_weight_andnot(src1, src2, nbits)     Hamming Weight of andnot'ed bitmap
 *  bitmap_set(dst, pos, nbits)                 Set specified bit area
 *  bitmap_clear(dst, pos, nbits)               Clear specified bit area
 *  bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
 *  bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off)  as above
 *  bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
 *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
 *  bitmap_cut(dst, src, first, n, nbits)       Cut n bits from first, copy rest
 *  bitmap_replace(dst, old, new, mask, nbits)  *dst = (*old & ~(*mask)) | (*new & *mask)
 *  bitmap_scatter(dst, src, mask, nbits)        *dst = map(dense, sparse)(src)
 *  bitmap_gather(dst, src, mask, nbits)        *dst = map(sparse, dense)(src)
 *  bitmap_remap(dst, src, old, new, nbits)     *dst = map(old, new)(src)
 *  bitmap_bitremap(oldbit, old, new, nbits)    newbit = map(old, new)(oldbit)
 *  bitmap_onto(dst, orig, relmap, nbits)       *dst = orig relative to relmap
 *  bitmap_fold(dst, orig, sz, nbits)           dst bits = orig bits mod sz
 *  bitmap_parse(buf, buflen, dst, nbits)       Parse bitmap dst from kernel buf
 *  bitmap_parse_user(ubuf, ulen, dst, nbits)   Parse bitmap dst from user buf
 *  bitmap_parselist(buf, dst, nbits)           Parse bitmap dst from kernel buf
 *  bitmap_parselist_user(buf, dst, nbits)      Parse bitmap dst from user buf
 *  bitmap_find_free_region(bitmap, bits, order)  Find and allocate bit region
 *  bitmap_release_region(bitmap, pos, order)   Free specified bit region
 *  bitmap_allocate_region(bitmap, pos, order)  Allocate specified bit region
 *  bitmap_from_arr32(dst, buf, nbits)          Copy nbits from u32[] buf to dst
 *  bitmap_from_arr64(dst, buf, nbits)          Copy nbits from u64[] buf to dst
 *  bitmap_to_arr32(buf, src, nbits)            Copy nbits from buf to u32[] dst
 *  bitmap_to_arr64(buf, src, nbits)            Copy nbits from buf to u64[] dst
 *  bitmap_get_value8(map, start)               Get 8bit value from map at start
 *  bitmap_set_value8(map, value, start)        Set 8bit value to map at start
 *
 * Note, bitmap_zero() and bitmap_fill() operate over the region of
 * unsigned longs, that is, bits behind bitmap till the unsigned long
 * boundary will be zeroed or filled as well. Consider to use
 * bitmap_clear() or bitmap_set() to make explicit zeroing or filling
 * respectively.
 */

/**
 * DOC: bitmap bitops
 *
 * Also the following operations in asm/bitops.h apply to bitmaps.::
 *
 *  set_bit(bit, addr)                  *addr |= bit
 *  clear_bit(bit, addr)                *addr &= ~bit
 *  change_bit(bit, addr)               *addr ^= bit
 *  test_bit(bit, addr)                 Is bit set in *addr?
 *  test_and_set_bit(bit, addr)         Set bit and return old value
 *  test_and_clear_bit(bit, addr)       Clear bit and return old value
 *  test_and_change_bit(bit, addr)      Change bit and return old value
 *  find_first_zero_bit(addr, nbits)    Position first zero bit in *addr
 *  find_first_bit(addr, nbits)         Position first set bit in *addr
 *  find_next_zero_bit(addr, nbits, bit)
 *                                      Position next zero bit in *addr >= bit
 *  find_next_bit(addr, nbits, bit)     Position next set bit in *addr >= bit
 *  find_next_and_bit(addr1, addr2, nbits, bit)
 *                                      Same as find_next_bit, but in
 *                                      (*addr1 & *addr2)
 *
 */

/**
 * DOC: declare bitmap
 * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used
 * to declare an array named 'name' of just enough unsigned longs to
 * contain all bit positions from 0 to 'bits' - 1.
 */

/*
 * Allocation and deallocation of bitmap.
 * Provided in lib/bitmap.c to avoid circular dependency.
 */
unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
void bitmap_free(const unsigned long *bitmap);

DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T))

/* Managed variants of the above. */
unsigned long *devm_bitmap_alloc(struct device *dev,
                                 unsigned int nbits, gfp_t flags);
unsigned long *devm_bitmap_zalloc(struct device *dev,
                                  unsigned int nbits, gfp_t flags);

/*
 * lib/bitmap.c provides these functions:
 */

bool __bitmap_equal(const unsigned long *bitmap1,
                    const unsigned long *bitmap2, unsigned int nbits);
bool __pure __bitmap_or_equal(const unsigned long *src1,
                              const unsigned long *src2,
                              const unsigned long *src3,
                              unsigned int nbits);
void __bitmap_complement(unsigned long *dst, const unsigned long *src,
                         unsigned int nbits);
void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
                          unsigned int shift, unsigned int nbits);
void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
                         unsigned int shift, unsigned int nbits);
void bitmap_cut(unsigned long *dst, const unsigned long *src,
                unsigned int first, unsigned int cut, unsigned int nbits);
bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
                 const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
                 const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
                  const unsigned long *bitmap2, unsigned int nbits);
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
                    const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_replace(unsigned long *dst,
                      const unsigned long *old, const unsigned long *new,
                      const unsigned long *mask, unsigned int nbits);
bool __bitmap_intersects(const unsigned long *bitmap1,
                         const unsigned long *bitmap2, unsigned int nbits);
bool __bitmap_subset(const unsigned long *bitmap1,
                     const unsigned long *bitmap2, unsigned int nbits);
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
                                 const unsigned long *bitmap2, unsigned int nbits);
unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1,
                                    const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);

unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
                                             unsigned long size,
                                             unsigned long start,
                                             unsigned int nr,
                                             unsigned long align_mask,
                                             unsigned long align_offset);

/**
 * bitmap_find_next_zero_area - find a contiguous aligned zero area
 * @map: The address to base the search on
 * @size: The bitmap size in bits
 * @start: The bitnumber to start searching at
 * @nr: The number of zeroed bits we're looking for
 * @align_mask: Alignment mask for zero area
 *
 * The @align_mask should be one less than a power of 2; the effect is that
 * the bit offset of all zero areas this function finds is multiples of that
 * power of 2. A @align_mask of 0 means no alignment is required.
 */
static inline unsigned long
bitmap_find_next_zero_area(unsigned long *map,
                           unsigned long size,
                           unsigned long start,
                           unsigned int nr,
                           unsigned long align_mask)
{
        return bitmap_find_next_zero_area_off(map, size, start, nr,
                                              align_mask, 0);
}

void bitmap_remap(unsigned long *dst, const unsigned long *src,
                const unsigned long *old, const unsigned long *new, unsigned int nbits);
int bitmap_bitremap(int oldbit,
                const unsigned long *old, const unsigned long *new, int bits);
void bitmap_onto(unsigned long *dst, const unsigned long *orig,
                const unsigned long *relmap, unsigned int bits);
void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                unsigned int sz, unsigned int nbits);

#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))

static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
        unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);

        if (small_const_nbits(nbits))
                *dst = 0;
        else
                memset(dst, 0, len);
}

static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
        unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);

        if (small_const_nbits(nbits))
                *dst = ~0UL;
        else
                memset(dst, 0xff, len);
}

static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
                        unsigned int nbits)
{
        unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);

        if (small_const_nbits(nbits))
                *dst = *src;
        else
                memcpy(dst, src, len);
}

/*
 * Copy bitmap and clear tail bits in last word.
 */
static inline void bitmap_copy_clear_tail(unsigned long *dst,
                const unsigned long *src, unsigned int nbits)
{
        bitmap_copy(dst, src, nbits);
        if (nbits % BITS_PER_LONG)
                dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
}

/*
 * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
 * machines the order of hi and lo parts of numbers match the bitmap structure.
 * In both cases conversion is not needed when copying data from/to arrays of
 * u32. But in LE64 case, typecast in bitmap_copy_clear_tail() may lead
 * to out-of-bound access. To avoid that, both LE and BE variants of 64-bit
 * architectures are not using bitmap_copy_clear_tail().
 */
#if BITS_PER_LONG == 64
void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
                                                        unsigned int nbits);
void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap,
                                                        unsigned int nbits);
#else
#define bitmap_from_arr32(bitmap, buf, nbits)                        \
        bitmap_copy_clear_tail((unsigned long *) (bitmap),        \
                        (const unsigned long *) (buf), (nbits))
#define bitmap_to_arr32(buf, bitmap, nbits)                        \
        bitmap_copy_clear_tail((unsigned long *) (buf),                \
                        (const unsigned long *) (bitmap), (nbits))
#endif

/*
 * On 64-bit systems bitmaps are represented as u64 arrays internally. So,
 * the conversion is not needed when copying data from/to arrays of u64.
 */
#if BITS_PER_LONG == 32
void bitmap_from_arr64(unsigned long *bitmap, const u64 *buf, unsigned int nbits);
void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
#else
#define bitmap_from_arr64(bitmap, buf, nbits)                        \
        bitmap_copy_clear_tail((unsigned long *)(bitmap), (const unsigned long *)(buf), (nbits))
#define bitmap_to_arr64(buf, bitmap, nbits)                        \
        bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits))
#endif

static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
                        const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_and(dst, src1, src2, nbits);
}

static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
                        const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = *src1 | *src2;
        else
                __bitmap_or(dst, src1, src2, nbits);
}

static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
                        const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = *src1 ^ *src2;
        else
                __bitmap_xor(dst, src1, src2, nbits);
}

static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
                        const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_andnot(dst, src1, src2, nbits);
}

static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
                        unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = ~(*src);
        else
                __bitmap_complement(dst, src, nbits);
}

#ifdef __LITTLE_ENDIAN
#define BITMAP_MEM_ALIGNMENT 8
#else
#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long))
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)

static inline bool bitmap_equal(const unsigned long *src1,
                                const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
        if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
            IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
                return !memcmp(src1, src2, nbits / 8);
        return __bitmap_equal(src1, src2, nbits);
}

/**
 * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
 * @src1:        Pointer to bitmap 1
 * @src2:        Pointer to bitmap 2 will be or'ed with bitmap 1
 * @src3:        Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
 * @nbits:        number of bits in each of these bitmaps
 *
 * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
 */
static inline bool bitmap_or_equal(const unsigned long *src1,
                                   const unsigned long *src2,
                                   const unsigned long *src3,
                                   unsigned int nbits)
{
        if (!small_const_nbits(nbits))
                return __bitmap_or_equal(src1, src2, src3, nbits);

        return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
}

static inline bool bitmap_intersects(const unsigned long *src1,
                                     const unsigned long *src2,
                                     unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        else
                return __bitmap_intersects(src1, src2, nbits);
}

static inline bool bitmap_subset(const unsigned long *src1,
                                 const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
        else
                return __bitmap_subset(src1, src2, nbits);
}

static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
{
        if (small_const_nbits(nbits))
                return ! (*src & BITMAP_LAST_WORD_MASK(nbits));

        return find_first_bit(src, nbits) == nbits;
}

static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));

        return find_first_zero_bit(src, nbits) == nbits;
}

static __always_inline
unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
        return __bitmap_weight(src, nbits);
}

static __always_inline
unsigned long bitmap_weight_and(const unsigned long *src1,
                                const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits));
        return __bitmap_weight_and(src1, src2, nbits);
}

static __always_inline
unsigned long bitmap_weight_andnot(const unsigned long *src1,
                                   const unsigned long *src2, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits));
        return __bitmap_weight_andnot(src1, src2, nbits);
}

static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
                unsigned int nbits)
{
        if (__builtin_constant_p(nbits) && nbits == 1)
                __set_bit(start, map);
        else if (small_const_nbits(start + nbits))
                *map |= GENMASK(start + nbits - 1, start);
        else if (__builtin_constant_p(start & BITMAP_MEM_MASK) &&
                 IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) &&
                 __builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
                 IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
                memset((char *)map + start / 8, 0xff, nbits / 8);
        else
                __bitmap_set(map, start, nbits);
}

static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
                unsigned int nbits)
{
        if (__builtin_constant_p(nbits) && nbits == 1)
                __clear_bit(start, map);
        else if (small_const_nbits(start + nbits))
                *map &= ~GENMASK(start + nbits - 1, start);
        else if (__builtin_constant_p(start & BITMAP_MEM_MASK) &&
                 IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) &&
                 __builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
                 IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
                memset((char *)map + start / 8, 0, nbits / 8);
        else
                __bitmap_clear(map, start, nbits);
}

static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
                                unsigned int shift, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
        else
                __bitmap_shift_right(dst, src, shift, nbits);
}

static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
                                unsigned int shift, unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
        else
                __bitmap_shift_left(dst, src, shift, nbits);
}

static inline void bitmap_replace(unsigned long *dst,
                                  const unsigned long *old,
                                  const unsigned long *new,
                                  const unsigned long *mask,
                                  unsigned int nbits)
{
        if (small_const_nbits(nbits))
                *dst = (*old & ~(*mask)) | (*new & *mask);
        else
                __bitmap_replace(dst, old, new, mask, nbits);
}

/**
 * bitmap_scatter - Scatter a bitmap according to the given mask
 * @dst: scattered bitmap
 * @src: gathered bitmap
 * @mask: mask representing bits to assign to in the scattered bitmap
 * @nbits: number of bits in each of these bitmaps
 *
 * Scatters bitmap with sequential bits according to the given @mask.
 *
 * Example:
 * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302.
 *
 * Or in binary form
 * @src                        @mask                        @dst
 * 0000000001011010        0001001100010011        0000001100000010
 *
 * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
 *
 * A more 'visual' description of the operation::
 *
 *        src:  0000000001011010
 *                        ||||||
 *                 +------+|||||
 *                 |  +----+||||
 *                 |  |+----+|||
 *                 |  ||   +-+||
 *                 |  ||   |  ||
 *        mask: ...v..vv...v..vv
 *              ...0..11...0..10
 *        dst:  0000001100000010
 *
 * A relationship exists between bitmap_scatter() and bitmap_gather().
 * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
 * See bitmap_scatter() for details related to this relationship.
 */
static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
                                  const unsigned long *mask, unsigned int nbits)
{
        unsigned int n = 0;
        unsigned int bit;

        bitmap_zero(dst, nbits);

        for_each_set_bit(bit, mask, nbits)
                __assign_bit(bit, dst, test_bit(n++, src));
}

/**
 * bitmap_gather - Gather a bitmap according to given mask
 * @dst: gathered bitmap
 * @src: scattered bitmap
 * @mask: mask representing bits to extract from in the scattered bitmap
 * @nbits: number of bits in each of these bitmaps
 *
 * Gathers bitmap with sparse bits according to the given @mask.
 *
 * Example:
 * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a.
 *
 * Or in binary form
 * @src                        @mask                        @dst
 * 0000001100000010        0001001100010011        0000000000011010
 *
 * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
 *
 * A more 'visual' description of the operation::
 *
 *        mask: ...v..vv...v..vv
 *        src:  0000001100000010
 *                 ^  ^^   ^   0
 *                 |  ||   |  10
 *                 |  ||   > 010
 *                 |  |+--> 1010
 *                 |  +--> 11010
 *                 +----> 011010
 *        dst:  0000000000011010
 *
 * A relationship exists between bitmap_gather() and bitmap_scatter(). See
 * bitmap_scatter() for the bitmap scatter detailed operations.
 * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n).
 * The operation bitmap_gather(result, scattered, mask, n) leads to a result
 * equal or equivalent to src.
 *
 * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather()
 * are not bijective.
 * The result and src values are equivalent in that sense that a call to
 * bitmap_scatter(res, src, mask, n) and a call to
 * bitmap_scatter(res, result, mask, n) will lead to the same res value.
 */
static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
                                 const unsigned long *mask, unsigned int nbits)
{
        unsigned int n = 0;
        unsigned int bit;

        bitmap_zero(dst, nbits);

        for_each_set_bit(bit, mask, nbits)
                __assign_bit(n++, dst, test_bit(bit, src));
}

static inline void bitmap_next_set_region(unsigned long *bitmap,
                                          unsigned int *rs, unsigned int *re,
                                          unsigned int end)
{
        *rs = find_next_bit(bitmap, end, *rs);
        *re = find_next_zero_bit(bitmap, end, *rs + 1);
}

/**
 * bitmap_release_region - release allocated bitmap region
 *        @bitmap: array of unsigned longs corresponding to the bitmap
 *        @pos: beginning of bit region to release
 *        @order: region size (log base 2 of number of bits) to release
 *
 * This is the complement to __bitmap_find_free_region() and releases
 * the found region (by clearing it in the bitmap).
 */
static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
        bitmap_clear(bitmap, pos, BIT(order));
}

/**
 * bitmap_allocate_region - allocate bitmap region
 *        @bitmap: array of unsigned longs corresponding to the bitmap
 *        @pos: beginning of bit region to allocate
 *        @order: region size (log base 2 of number of bits) to allocate
 *
 * Allocate (set bits in) a specified region of a bitmap.
 *
 * Returns: 0 on success, or %-EBUSY if specified region wasn't
 * free (not all bits were zero).
 */
static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
        unsigned int len = BIT(order);

        if (find_next_bit(bitmap, pos + len, pos) < pos + len)
                return -EBUSY;
        bitmap_set(bitmap, pos, len);
        return 0;
}

/**
 * bitmap_find_free_region - find a contiguous aligned mem region
 *        @bitmap: array of unsigned longs corresponding to the bitmap
 *        @bits: number of bits in the bitmap
 *        @order: region size (log base 2 of number of bits) to find
 *
 * Find a region of free (zero) bits in a @bitmap of @bits bits and
 * allocate them (set them to one).  Only consider regions of length
 * a power (@order) of two, aligned to that power of two, which
 * makes the search algorithm much faster.
 *
 * Returns: the bit offset in bitmap of the allocated region,
 * or -errno on failure.
 */
static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
        unsigned int pos, end;                /* scans bitmap by regions of size order */

        for (pos = 0; (end = pos + BIT(order)) <= bits; pos = end) {
                if (!bitmap_allocate_region(bitmap, pos, order))
                        return pos;
        }
        return -ENOMEM;
}

/**
 * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
 * @n: u64 value
 *
 * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit
 * integers in 32-bit environment, and 64-bit integers in 64-bit one.
 *
 * There are four combinations of endianness and length of the word in linux
 * ABIs: LE64, BE64, LE32 and BE32.
 *
 * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in
 * bitmaps and therefore don't require any special handling.
 *
 * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory
 * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the
 * other hand is represented as an array of 32-bit words and the position of
 * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that
 * word.  For example, bit #42 is located at 10th position of 2nd word.
 * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit
 * values in memory as it usually does. But for BE we need to swap hi and lo
 * words manually.
 *
 * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and
 * lo parts of u64.  For LE32 it does nothing, and for BE environment it swaps
 * hi and lo words, as is expected by bitmap.
 */
#if __BITS_PER_LONG == 64
#define BITMAP_FROM_U64(n) (n)
#else
#define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \
                                ((unsigned long) ((u64)(n) >> 32))
#endif

/**
 * bitmap_from_u64 - Check and swap words within u64.
 *  @mask: source bitmap
 *  @dst:  destination bitmap
 *
 * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]``
 * to read u64 mask, we will get the wrong word.
 * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits,
 * but we expect the lower 32-bits of u64.
 */
static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
{
        bitmap_from_arr64(dst, &mask, 64);
}

/**
 * bitmap_get_value8 - get an 8-bit value within a memory region
 * @map: address to the bitmap memory region
 * @start: bit offset of the 8-bit value; must be a multiple of 8
 *
 * Returns the 8-bit value located at the @start bit offset within the @src
 * memory region.
 */
static inline unsigned long bitmap_get_value8(const unsigned long *map,
                                              unsigned long start)
{
        const size_t index = BIT_WORD(start);
        const unsigned long offset = start % BITS_PER_LONG;

        return (map[index] >> offset) & 0xFF;
}

/**
 * bitmap_set_value8 - set an 8-bit value within a memory region
 * @map: address to the bitmap memory region
 * @value: the 8-bit value; values wider than 8 bits may clobber bitmap
 * @start: bit offset of the 8-bit value; must be a multiple of 8
 */
static inline void bitmap_set_value8(unsigned long *map, unsigned long value,
                                     unsigned long start)
{
        const size_t index = BIT_WORD(start);
        const unsigned long offset = start % BITS_PER_LONG;

        map[index] &= ~(0xFFUL << offset);
        map[index] |= value << offset;
}

#endif /* __ASSEMBLY__ */

#endif /* __LINUX_BITMAP_H */

































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 
    2 




    2 



    2 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
// SPDX-License-Identifier: GPL-2.0
/*
 * NETLINK      Generic Netlink Family
 *
 *                 Authors:        Jamal Hadi Salim
 *                                 Thomas Graf <tgraf@suug.ch>
 *                                Johannes Berg <johannes@sipsolutions.net>
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/string_helpers.h>
#include <linux/skbuff.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/rwsem.h>
#include <linux/idr.h>
#include <net/sock.h>
#include <net/genetlink.h>

static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
static DECLARE_RWSEM(cb_lock);

atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0);
DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq);

void genl_lock(void)
{
        mutex_lock(&genl_mutex);
}
EXPORT_SYMBOL(genl_lock);

void genl_unlock(void)
{
        mutex_unlock(&genl_mutex);
}
EXPORT_SYMBOL(genl_unlock);

static void genl_lock_all(void)
{
        down_write(&cb_lock);
        genl_lock();
}

static void genl_unlock_all(void)
{
        genl_unlock();
        up_write(&cb_lock);
}

static void genl_op_lock(const struct genl_family *family)
{
        if (!family->parallel_ops)
                genl_lock();
}

static void genl_op_unlock(const struct genl_family *family)
{
        if (!family->parallel_ops)
                genl_unlock();
}

static DEFINE_IDR(genl_fam_idr);

/*
 * Bitmap of multicast groups that are currently in use.
 *
 * To avoid an allocation at boot of just one unsigned long,
 * declare it global instead.
 * Bit 0 is marked as already used since group 0 is invalid.
 * Bit 1 is marked as already used since the drop-monitor code
 * abuses the API and thinks it can statically use group 1.
 * That group will typically conflict with other groups that
 * any proper users use.
 * Bit 16 is marked as used since it's used for generic netlink
 * and the code no longer marks pre-reserved IDs as used.
 * Bit 17 is marked as already used since the VFS quota code
 * also abused this API and relied on family == group ID, we
 * cater to that by giving it a static family and group ID.
 * Bit 18 is marked as already used since the PMCRAID driver
 * did the same thing as the VFS quota code (maybe copied?)
 */
static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
                                      BIT(GENL_ID_VFS_DQUOT) |
                                      BIT(GENL_ID_PMCRAID);
static unsigned long *mc_groups = &mc_group_start;
static unsigned long mc_groups_longs = 1;

/* We need the last attribute with non-zero ID therefore a 2-entry array */
static struct nla_policy genl_policy_reject_all[] = {
        { .type = NLA_REJECT },
        { .type = NLA_REJECT },
};

static int genl_ctrl_event(int event, const struct genl_family *family,
                           const struct genl_multicast_group *grp,
                           int grp_id);

static void
genl_op_fill_in_reject_policy(const struct genl_family *family,
                              struct genl_ops *op)
{
        BUILD_BUG_ON(ARRAY_SIZE(genl_policy_reject_all) - 1 != 1);

        if (op->policy || op->cmd < family->resv_start_op)
                return;

        op->policy = genl_policy_reject_all;
        op->maxattr = 1;
}

static void
genl_op_fill_in_reject_policy_split(const struct genl_family *family,
                                    struct genl_split_ops *op)
{
        if (op->policy)
                return;

        op->policy = genl_policy_reject_all;
        op->maxattr = 1;
}

static const struct genl_family *genl_family_find_byid(unsigned int id)
{
        return idr_find(&genl_fam_idr, id);
}

static const struct genl_family *genl_family_find_byname(char *name)
{
        const struct genl_family *family;
        unsigned int id;

        idr_for_each_entry(&genl_fam_idr, family, id)
                if (strcmp(family->name, name) == 0)
                        return family;

        return NULL;
}

struct genl_op_iter {
        const struct genl_family *family;
        struct genl_split_ops doit;
        struct genl_split_ops dumpit;
        int cmd_idx;
        int entry_idx;
        u32 cmd;
        u8 flags;
};

static void genl_op_from_full(const struct genl_family *family,
                              unsigned int i, struct genl_ops *op)
{
        *op = family->ops[i];

        if (!op->maxattr)
                op->maxattr = family->maxattr;
        if (!op->policy)
                op->policy = family->policy;

        genl_op_fill_in_reject_policy(family, op);
}

static int genl_get_cmd_full(u32 cmd, const struct genl_family *family,
                             struct genl_ops *op)
{
        int i;

        for (i = 0; i < family->n_ops; i++)
                if (family->ops[i].cmd == cmd) {
                        genl_op_from_full(family, i, op);
                        return 0;
                }

        return -ENOENT;
}

static void genl_op_from_small(const struct genl_family *family,
                               unsigned int i, struct genl_ops *op)
{
        memset(op, 0, sizeof(*op));
        op->doit        = family->small_ops[i].doit;
        op->dumpit        = family->small_ops[i].dumpit;
        op->cmd                = family->small_ops[i].cmd;
        op->internal_flags = family->small_ops[i].internal_flags;
        op->flags        = family->small_ops[i].flags;
        op->validate        = family->small_ops[i].validate;

        op->maxattr = family->maxattr;
        op->policy = family->policy;

        genl_op_fill_in_reject_policy(family, op);
}

static int genl_get_cmd_small(u32 cmd, const struct genl_family *family,
                              struct genl_ops *op)
{
        int i;

        for (i = 0; i < family->n_small_ops; i++)
                if (family->small_ops[i].cmd == cmd) {
                        genl_op_from_small(family, i, op);
                        return 0;
                }

        return -ENOENT;
}

static void genl_op_from_split(struct genl_op_iter *iter)
{
        const struct genl_family *family = iter->family;
        int i, cnt = 0;

        i = iter->entry_idx - family->n_ops - family->n_small_ops;

        if (family->split_ops[i + cnt].flags & GENL_CMD_CAP_DO) {
                iter->doit = family->split_ops[i + cnt];
                genl_op_fill_in_reject_policy_split(family, &iter->doit);
                cnt++;
        } else {
                memset(&iter->doit, 0, sizeof(iter->doit));
        }

        if (i + cnt < family->n_split_ops &&
            family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP &&
            (!cnt || family->split_ops[i + cnt].cmd == iter->doit.cmd)) {
                iter->dumpit = family->split_ops[i + cnt];
                genl_op_fill_in_reject_policy_split(family, &iter->dumpit);
                cnt++;
        } else {
                memset(&iter->dumpit, 0, sizeof(iter->dumpit));
        }

        WARN_ON(!cnt);
        iter->entry_idx += cnt;
}

static int
genl_get_cmd_split(u32 cmd, u8 flag, const struct genl_family *family,
                   struct genl_split_ops *op)
{
        int i;

        for (i = 0; i < family->n_split_ops; i++)
                if (family->split_ops[i].cmd == cmd &&
                    family->split_ops[i].flags & flag) {
                        *op = family->split_ops[i];
                        return 0;
                }

        return -ENOENT;
}

static int
genl_cmd_full_to_split(struct genl_split_ops *op,
                       const struct genl_family *family,
                       const struct genl_ops *full, u8 flags)
{
        if ((flags & GENL_CMD_CAP_DO && !full->doit) ||
            (flags & GENL_CMD_CAP_DUMP && !full->dumpit)) {
                memset(op, 0, sizeof(*op));
                return -ENOENT;
        }

        if (flags & GENL_CMD_CAP_DUMP) {
                op->start        = full->start;
                op->dumpit        = full->dumpit;
                op->done        = full->done;
        } else {
                op->pre_doit        = family->pre_doit;
                op->doit        = full->doit;
                op->post_doit        = family->post_doit;
        }

        if (flags & GENL_CMD_CAP_DUMP &&
            full->validate & GENL_DONT_VALIDATE_DUMP) {
                op->policy        = NULL;
                op->maxattr        = 0;
        } else {
                op->policy        = full->policy;
                op->maxattr        = full->maxattr;
        }

        op->cmd                        = full->cmd;
        op->internal_flags        = full->internal_flags;
        op->flags                = full->flags;
        op->validate                = full->validate;

        /* Make sure flags include the GENL_CMD_CAP_DO / GENL_CMD_CAP_DUMP */
        op->flags                |= flags;

        return 0;
}

/* Must make sure that op is initialized to 0 on failure */
static int
genl_get_cmd(u32 cmd, u8 flags, const struct genl_family *family,
             struct genl_split_ops *op)
{
        struct genl_ops full;
        int err;

        err = genl_get_cmd_full(cmd, family, &full);
        if (err == -ENOENT)
                err = genl_get_cmd_small(cmd, family, &full);
        /* Found one of legacy forms */
        if (err == 0)
                return genl_cmd_full_to_split(op, family, &full, flags);

        err = genl_get_cmd_split(cmd, flags, family, op);
        if (err)
                memset(op, 0, sizeof(*op));
        return err;
}

/* For policy dumping only, get ops of both do and dump.
 * Fail if both are missing, genl_get_cmd() will zero-init in case of failure.
 */
static int
genl_get_cmd_both(u32 cmd, const struct genl_family *family,
                  struct genl_split_ops *doit, struct genl_split_ops *dumpit)
{
        int err1, err2;

        err1 = genl_get_cmd(cmd, GENL_CMD_CAP_DO, family, doit);
        err2 = genl_get_cmd(cmd, GENL_CMD_CAP_DUMP, family, dumpit);

        return err1 && err2 ? -ENOENT : 0;
}

static bool
genl_op_iter_init(const struct genl_family *family, struct genl_op_iter *iter)
{
        iter->family = family;
        iter->cmd_idx = 0;
        iter->entry_idx = 0;

        iter->flags = 0;

        return iter->family->n_ops +
                iter->family->n_small_ops +
                iter->family->n_split_ops;
}

static bool genl_op_iter_next(struct genl_op_iter *iter)
{
        const struct genl_family *family = iter->family;
        bool legacy_op = true;
        struct genl_ops op;

        if (iter->entry_idx < family->n_ops) {
                genl_op_from_full(family, iter->entry_idx, &op);
        } else if (iter->entry_idx < family->n_ops + family->n_small_ops) {
                genl_op_from_small(family, iter->entry_idx - family->n_ops,
                                   &op);
        } else if (iter->entry_idx <
                   family->n_ops + family->n_small_ops + family->n_split_ops) {
                legacy_op = false;
                /* updates entry_idx */
                genl_op_from_split(iter);
        } else {
                return false;
        }

        iter->cmd_idx++;

        if (legacy_op) {
                iter->entry_idx++;

                genl_cmd_full_to_split(&iter->doit, family,
                                       &op, GENL_CMD_CAP_DO);
                genl_cmd_full_to_split(&iter->dumpit, family,
                                       &op, GENL_CMD_CAP_DUMP);
        }

        iter->cmd = iter->doit.cmd | iter->dumpit.cmd;
        iter->flags = iter->doit.flags | iter->dumpit.flags;

        return true;
}

static void
genl_op_iter_copy(struct genl_op_iter *dst, struct genl_op_iter *src)
{
        *dst = *src;
}

static unsigned int genl_op_iter_idx(struct genl_op_iter *iter)
{
        return iter->cmd_idx;
}

static int genl_allocate_reserve_groups(int n_groups, int *first_id)
{
        unsigned long *new_groups;
        int start = 0;
        int i;
        int id;
        bool fits;

        do {
                if (start == 0)
                        id = find_first_zero_bit(mc_groups,
                                                 mc_groups_longs *
                                                 BITS_PER_LONG);
                else
                        id = find_next_zero_bit(mc_groups,
                                                mc_groups_longs * BITS_PER_LONG,
                                                start);

                fits = true;
                for (i = id;
                     i < min_t(int, id + n_groups,
                               mc_groups_longs * BITS_PER_LONG);
                     i++) {
                        if (test_bit(i, mc_groups)) {
                                start = i;
                                fits = false;
                                break;
                        }
                }

                if (id + n_groups > mc_groups_longs * BITS_PER_LONG) {
                        unsigned long new_longs = mc_groups_longs +
                                                  BITS_TO_LONGS(n_groups);
                        size_t nlen = new_longs * sizeof(unsigned long);

                        if (mc_groups == &mc_group_start) {
                                new_groups = kzalloc(nlen, GFP_KERNEL);
                                if (!new_groups)
                                        return -ENOMEM;
                                mc_groups = new_groups;
                                *mc_groups = mc_group_start;
                        } else {
                                new_groups = krealloc(mc_groups, nlen,
                                                      GFP_KERNEL);
                                if (!new_groups)
                                        return -ENOMEM;
                                mc_groups = new_groups;
                                for (i = 0; i < BITS_TO_LONGS(n_groups); i++)
                                        mc_groups[mc_groups_longs + i] = 0;
                        }
                        mc_groups_longs = new_longs;
                }
        } while (!fits);

        for (i = id; i < id + n_groups; i++)
                set_bit(i, mc_groups);
        *first_id = id;
        return 0;
}

static struct genl_family genl_ctrl;

static int genl_validate_assign_mc_groups(struct genl_family *family)
{
        int first_id;
        int n_groups = family->n_mcgrps;
        int err = 0, i;
        bool groups_allocated = false;

        if (!n_groups)
                return 0;

        for (i = 0; i < n_groups; i++) {
                const struct genl_multicast_group *grp = &family->mcgrps[i];

                if (WARN_ON(grp->name[0] == '\0'))
                        return -EINVAL;
                if (WARN_ON(!string_is_terminated(grp->name, GENL_NAMSIZ)))
                        return -EINVAL;
        }

        /* special-case our own group and hacks */
        if (family == &genl_ctrl) {
                first_id = GENL_ID_CTRL;
                BUG_ON(n_groups != 1);
        } else if (strcmp(family->name, "NET_DM") == 0) {
                first_id = 1;
                BUG_ON(n_groups != 1);
        } else if (family->id == GENL_ID_VFS_DQUOT) {
                first_id = GENL_ID_VFS_DQUOT;
                BUG_ON(n_groups != 1);
        } else if (family->id == GENL_ID_PMCRAID) {
                first_id = GENL_ID_PMCRAID;
                BUG_ON(n_groups != 1);
        } else {
                groups_allocated = true;
                err = genl_allocate_reserve_groups(n_groups, &first_id);
                if (err)
                        return err;
        }

        family->mcgrp_offset = first_id;

        /* if still initializing, can't and don't need to realloc bitmaps */
        if (!init_net.genl_sock)
                return 0;

        if (family->netnsok) {
                struct net *net;

                netlink_table_grab();
                rcu_read_lock();
                for_each_net_rcu(net) {
                        err = __netlink_change_ngroups(net->genl_sock,
                                        mc_groups_longs * BITS_PER_LONG);
                        if (err) {
                                /*
                                 * No need to roll back, can only fail if
                                 * memory allocation fails and then the
                                 * number of _possible_ groups has been
                                 * increased on some sockets which is ok.
                                 */
                                break;
                        }
                }
                rcu_read_unlock();
                netlink_table_ungrab();
        } else {
                err = netlink_change_ngroups(init_net.genl_sock,
                                             mc_groups_longs * BITS_PER_LONG);
        }

        if (groups_allocated && err) {
                for (i = 0; i < family->n_mcgrps; i++)
                        clear_bit(family->mcgrp_offset + i, mc_groups);
        }

        return err;
}

static void genl_unregister_mc_groups(const struct genl_family *family)
{
        struct net *net;
        int i;

        netlink_table_grab();
        rcu_read_lock();
        for_each_net_rcu(net) {
                for (i = 0; i < family->n_mcgrps; i++)
                        __netlink_clear_multicast_users(
                                net->genl_sock, family->mcgrp_offset + i);
        }
        rcu_read_unlock();
        netlink_table_ungrab();

        for (i = 0; i < family->n_mcgrps; i++) {
                int grp_id = family->mcgrp_offset + i;

                if (grp_id != 1)
                        clear_bit(grp_id, mc_groups);
                genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, family,
                                &family->mcgrps[i], grp_id);
        }
}

static bool genl_split_op_check(const struct genl_split_ops *op)
{
        if (WARN_ON(hweight8(op->flags & (GENL_CMD_CAP_DO |
                                          GENL_CMD_CAP_DUMP)) != 1))
                return true;
        return false;
}

static int genl_validate_ops(const struct genl_family *family)
{
        struct genl_op_iter i, j;
        unsigned int s;

        if (WARN_ON(family->n_ops && !family->ops) ||
            WARN_ON(family->n_small_ops && !family->small_ops) ||
            WARN_ON(family->n_split_ops && !family->split_ops))
                return -EINVAL;

        for (genl_op_iter_init(family, &i); genl_op_iter_next(&i); ) {
                if (!(i.flags & (GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP)))
                        return -EINVAL;

                if (WARN_ON(i.cmd >= family->resv_start_op &&
                            (i.doit.validate || i.dumpit.validate)))
                        return -EINVAL;

                genl_op_iter_copy(&j, &i);
                while (genl_op_iter_next(&j)) {
                        if (i.cmd == j.cmd)
                                return -EINVAL;
                }
        }

        if (family->n_split_ops) {
                if (genl_split_op_check(&family->split_ops[0]))
                        return -EINVAL;
        }

        for (s = 1; s < family->n_split_ops; s++) {
                const struct genl_split_ops *a, *b;

                a = &family->split_ops[s - 1];
                b = &family->split_ops[s];

                if (genl_split_op_check(b))
                        return -EINVAL;

                /* Check sort order */
                if (a->cmd < b->cmd) {
                        continue;
                } else if (a->cmd > b->cmd) {
                        WARN_ON(1);
                        return -EINVAL;
                }

                if (a->internal_flags != b->internal_flags ||
                    ((a->flags ^ b->flags) & ~(GENL_CMD_CAP_DO |
                                               GENL_CMD_CAP_DUMP))) {
                        WARN_ON(1);
                        return -EINVAL;
                }

                if ((a->flags & GENL_CMD_CAP_DO) &&
                    (b->flags & GENL_CMD_CAP_DUMP))
                        continue;

                WARN_ON(1);
                return -EINVAL;
        }

        return 0;
}

static void *genl_sk_priv_alloc(struct genl_family *family)
{
        void *priv;

        priv = kzalloc(family->sock_priv_size, GFP_KERNEL);
        if (!priv)
                return ERR_PTR(-ENOMEM);

        if (family->sock_priv_init)
                family->sock_priv_init(priv);

        return priv;
}

static void genl_sk_priv_free(const struct genl_family *family, void *priv)
{
        if (family->sock_priv_destroy)
                family->sock_priv_destroy(priv);
        kfree(priv);
}

static int genl_sk_privs_alloc(struct genl_family *family)
{
        if (!family->sock_priv_size)
                return 0;

        family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL);
        if (!family->sock_privs)
                return -ENOMEM;
        xa_init(family->sock_privs);
        return 0;
}

static void genl_sk_privs_free(const struct genl_family *family)
{
        unsigned long id;
        void *priv;

        if (!family->sock_priv_size)
                return;

        xa_for_each(family->sock_privs, id, priv)
                genl_sk_priv_free(family, priv);

        xa_destroy(family->sock_privs);
        kfree(family->sock_privs);
}

static void genl_sk_priv_free_by_sock(struct genl_family *family,
                                      struct sock *sk)
{
        void *priv;

        if (!family->sock_priv_size)
                return;
        priv = xa_erase(family->sock_privs, (unsigned long) sk);
        if (!priv)
                return;
        genl_sk_priv_free(family, priv);
}

static void genl_release(struct sock *sk, unsigned long *groups)
{
        struct genl_family *family;
        unsigned int id;

        down_read(&cb_lock);

        idr_for_each_entry(&genl_fam_idr, family, id)
                genl_sk_priv_free_by_sock(family, sk);

        up_read(&cb_lock);
}

/**
 * __genl_sk_priv_get - Get family private pointer for socket, if exists
 *
 * @family: family
 * @sk: socket
 *
 * Lookup a private memory for a Generic netlink family and specified socket.
 *
 * Caller should make sure this is called in RCU read locked section.
 *
 * Return: valid pointer on success, otherwise negative error value
 * encoded by ERR_PTR(), NULL in case priv does not exist.
 */
void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk)
{
        if (WARN_ON_ONCE(!family->sock_privs))
                return ERR_PTR(-EINVAL);
        return xa_load(family->sock_privs, (unsigned long) sk);
}

/**
 * genl_sk_priv_get - Get family private pointer for socket
 *
 * @family: family
 * @sk: socket
 *
 * Lookup a private memory for a Generic netlink family and specified socket.
 * Allocate the private memory in case it was not already done.
 *
 * Return: valid pointer on success, otherwise negative error value
 * encoded by ERR_PTR().
 */
void *genl_sk_priv_get(struct genl_family *family, struct sock *sk)
{
        void *priv, *old_priv;

        priv = __genl_sk_priv_get(family, sk);
        if (priv)
                return priv;

        /* priv for the family does not exist so far, create it. */

        priv = genl_sk_priv_alloc(family);
        if (IS_ERR(priv))
                return ERR_CAST(priv);

        old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL,
                              priv, GFP_KERNEL);
        if (old_priv) {
                genl_sk_priv_free(family, priv);
                if (xa_is_err(old_priv))
                        return ERR_PTR(xa_err(old_priv));
                /* Race happened, priv for the socket was already inserted. */
                return old_priv;
        }
        return priv;
}

/**
 * genl_register_family - register a generic netlink family
 * @family: generic netlink family
 *
 * Registers the specified family after validating it first. Only one
 * family may be registered with the same family name or identifier.
 *
 * The family's ops, multicast groups and module pointer must already
 * be assigned.
 *
 * Return 0 on success or a negative error code.
 */
int genl_register_family(struct genl_family *family)
{
        int err, i;
        int start = GENL_START_ALLOC, end = GENL_MAX_ID;

        err = genl_validate_ops(family);
        if (err)
                return err;

        genl_lock_all();

        if (genl_family_find_byname(family->name)) {
                err = -EEXIST;
                goto errout_locked;
        }

        err = genl_sk_privs_alloc(family);
        if (err)
                goto errout_locked;

        /*
         * Sadly, a few cases need to be special-cased
         * due to them having previously abused the API
         * and having used their family ID also as their
         * multicast group ID, so we use reserved IDs
         * for both to be sure we can do that mapping.
         */
        if (family == &genl_ctrl) {
                /* and this needs to be special for initial family lookups */
                start = end = GENL_ID_CTRL;
        } else if (strcmp(family->name, "pmcraid") == 0) {
                start = end = GENL_ID_PMCRAID;
        } else if (strcmp(family->name, "VFS_DQUOT") == 0) {
                start = end = GENL_ID_VFS_DQUOT;
        }

        family->id = idr_alloc_cyclic(&genl_fam_idr, family,
                                      start, end + 1, GFP_KERNEL);
        if (family->id < 0) {
                err = family->id;
                goto errout_sk_privs_free;
        }

        err = genl_validate_assign_mc_groups(family);
        if (err)
                goto errout_remove;

        genl_unlock_all();

        /* send all events */
        genl_ctrl_event(CTRL_CMD_NEWFAMILY, family, NULL, 0);
        for (i = 0; i < family->n_mcgrps; i++)
                genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, family,
                                &family->mcgrps[i], family->mcgrp_offset + i);

        return 0;

errout_remove:
        idr_remove(&genl_fam_idr, family->id);
errout_sk_privs_free:
        genl_sk_privs_free(family);
errout_locked:
        genl_unlock_all();
        return err;
}
EXPORT_SYMBOL(genl_register_family);

/**
 * genl_unregister_family - unregister generic netlink family
 * @family: generic netlink family
 *
 * Unregisters the specified family.
 *
 * Returns 0 on success or a negative error code.
 */
int genl_unregister_family(const struct genl_family *family)
{
        genl_lock_all();

        if (!genl_family_find_byid(family->id)) {
                genl_unlock_all();
                return -ENOENT;
        }

        genl_unregister_mc_groups(family);

        idr_remove(&genl_fam_idr, family->id);

        up_write(&cb_lock);
        wait_event(genl_sk_destructing_waitq,
                   atomic_read(&genl_sk_destructing_cnt) == 0);

        genl_sk_privs_free(family);

        genl_unlock();

        genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);

        return 0;
}
EXPORT_SYMBOL(genl_unregister_family);

/**
 * genlmsg_put - Add generic netlink header to netlink message
 * @skb: socket buffer holding the message
 * @portid: netlink portid the message is addressed to
 * @seq: sequence number (usually the one of the sender)
 * @family: generic netlink family
 * @flags: netlink message flags
 * @cmd: generic netlink command
 *
 * Returns pointer to user specific header
 */
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
                  const struct genl_family *family, int flags, u8 cmd)
{
        struct nlmsghdr *nlh;
        struct genlmsghdr *hdr;

        nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN +
                        family->hdrsize, flags);
        if (nlh == NULL)
                return NULL;

        hdr = nlmsg_data(nlh);
        hdr->cmd = cmd;
        hdr->version = family->version;
        hdr->reserved = 0;

        return (char *) hdr + GENL_HDRLEN;
}
EXPORT_SYMBOL(genlmsg_put);

static struct genl_dumpit_info *genl_dumpit_info_alloc(void)
{
        return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL);
}

static void genl_dumpit_info_free(const struct genl_dumpit_info *info)
{
        kfree(info);
}

static struct nlattr **
genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
                                struct nlmsghdr *nlh,
                                struct netlink_ext_ack *extack,
                                const struct genl_split_ops *ops,
                                int hdrlen,
                                enum genl_validate_flags no_strict_flag)
{
        enum netlink_validation validate = ops->validate & no_strict_flag ?
                                           NL_VALIDATE_LIBERAL :
                                           NL_VALIDATE_STRICT;
        struct nlattr **attrbuf;
        int err;

        if (!ops->maxattr)
                return NULL;

        attrbuf = kmalloc_array(ops->maxattr + 1,
                                sizeof(struct nlattr *), GFP_KERNEL);
        if (!attrbuf)
                return ERR_PTR(-ENOMEM);

        err = __nlmsg_parse(nlh, hdrlen, attrbuf, ops->maxattr, ops->policy,
                            validate, extack);
        if (err) {
                kfree(attrbuf);
                return ERR_PTR(err);
        }
        return attrbuf;
}

static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf)
{
        kfree(attrbuf);
}

struct genl_start_context {
        const struct genl_family *family;
        struct nlmsghdr *nlh;
        struct netlink_ext_ack *extack;
        const struct genl_split_ops *ops;
        int hdrlen;
};

static int genl_start(struct netlink_callback *cb)
{
        struct genl_start_context *ctx = cb->data;
        const struct genl_split_ops *ops;
        struct genl_dumpit_info *info;
        struct nlattr **attrs = NULL;
        int rc = 0;

        ops = ctx->ops;
        if (!(ops->validate & GENL_DONT_VALIDATE_DUMP) &&
            ctx->nlh->nlmsg_len < nlmsg_msg_size(ctx->hdrlen))
                return -EINVAL;

        attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack,
                                                ops, ctx->hdrlen,
                                                GENL_DONT_VALIDATE_DUMP_STRICT);
        if (IS_ERR(attrs))
                return PTR_ERR(attrs);

        info = genl_dumpit_info_alloc();
        if (!info) {
                genl_family_rcv_msg_attrs_free(attrs);
                return -ENOMEM;
        }
        info->op = *ops;
        info->info.family        = ctx->family;
        info->info.snd_seq        = cb->nlh->nlmsg_seq;
        info->info.snd_portid        = NETLINK_CB(cb->skb).portid;
        info->info.nlhdr        = cb->nlh;
        info->info.genlhdr        = nlmsg_data(cb->nlh);
        info->info.attrs        = attrs;
        genl_info_net_set(&info->info, sock_net(cb->skb->sk));
        info->info.extack        = cb->extack;
        memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));

        cb->data = info;
        if (ops->start) {
                genl_op_lock(ctx->family);
                rc = ops->start(cb);
                genl_op_unlock(ctx->family);
        }

        if (rc) {
                genl_family_rcv_msg_attrs_free(info->info.attrs);
                genl_dumpit_info_free(info);
                cb->data = NULL;
        }
        return rc;
}

static int genl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct genl_dumpit_info *dump_info = cb->data;
        const struct genl_split_ops *ops = &dump_info->op;
        struct genl_info *info = &dump_info->info;
        int rc;

        info->extack = cb->extack;

        genl_op_lock(info->family);
        rc = ops->dumpit(skb, cb);
        genl_op_unlock(info->family);
        return rc;
}

static int genl_done(struct netlink_callback *cb)
{
        struct genl_dumpit_info *dump_info = cb->data;
        const struct genl_split_ops *ops = &dump_info->op;
        struct genl_info *info = &dump_info->info;
        int rc = 0;

        info->extack = cb->extack;

        if (ops->done) {
                genl_op_lock(info->family);
                rc = ops->done(cb);
                genl_op_unlock(info->family);
        }
        genl_family_rcv_msg_attrs_free(info->attrs);
        genl_dumpit_info_free(dump_info);
        return rc;
}

static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
                                      struct sk_buff *skb,
                                      struct nlmsghdr *nlh,
                                      struct netlink_ext_ack *extack,
                                      const struct genl_split_ops *ops,
                                      int hdrlen, struct net *net)
{
        struct genl_start_context ctx;
        struct netlink_dump_control c = {
                .module = family->module,
                .data = &ctx,
                .start = genl_start,
                .dump = genl_dumpit,
                .done = genl_done,
                .extack = extack,
        };
        int err;

        ctx.family = family;
        ctx.nlh = nlh;
        ctx.extack = extack;
        ctx.ops = ops;
        ctx.hdrlen = hdrlen;

        genl_op_unlock(family);
        err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
        genl_op_lock(family);

        return err;
}

static int genl_family_rcv_msg_doit(const struct genl_family *family,
                                    struct sk_buff *skb,
                                    struct nlmsghdr *nlh,
                                    struct netlink_ext_ack *extack,
                                    const struct genl_split_ops *ops,
                                    int hdrlen, struct net *net)
{
        struct nlattr **attrbuf;
        struct genl_info info;
        int err;

        attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
                                                  ops, hdrlen,
                                                  GENL_DONT_VALIDATE_STRICT);
        if (IS_ERR(attrbuf))
                return PTR_ERR(attrbuf);

        info.snd_seq = nlh->nlmsg_seq;
        info.snd_portid = NETLINK_CB(skb).portid;
        info.family = family;
        info.nlhdr = nlh;
        info.genlhdr = nlmsg_data(nlh);
        info.attrs = attrbuf;
        info.extack = extack;
        genl_info_net_set(&info, net);
        memset(&info.user_ptr, 0, sizeof(info.user_ptr));

        if (ops->pre_doit) {
                err = ops->pre_doit(ops, skb, &info);
                if (err)
                        goto out;
        }

        err = ops->doit(skb, &info);

        if (ops->post_doit)
                ops->post_doit(ops, skb, &info);

out:
        genl_family_rcv_msg_attrs_free(attrbuf);

        return err;
}

static int genl_header_check(const struct genl_family *family,
                             struct nlmsghdr *nlh, struct genlmsghdr *hdr,
                             struct netlink_ext_ack *extack)
{
        u16 flags;

        /* Only for commands added after we started validating */
        if (hdr->cmd < family->resv_start_op)
                return 0;

        if (hdr->reserved) {
                NL_SET_ERR_MSG(extack, "genlmsghdr.reserved field is not 0");
                return -EINVAL;
        }

        /* Old netlink flags have pretty loose semantics, allow only the flags
         * consumed by the core where we can enforce the meaning.
         */
        flags = nlh->nlmsg_flags;
        if ((flags & NLM_F_DUMP) == NLM_F_DUMP) /* DUMP is 2 bits */
                flags &= ~NLM_F_DUMP;
        if (flags & ~(NLM_F_REQUEST | NLM_F_ACK | NLM_F_ECHO)) {
                NL_SET_ERR_MSG(extack,
                               "ambiguous or reserved bits set in nlmsg_flags");
                return -EINVAL;
        }

        return 0;
}

static int genl_family_rcv_msg(const struct genl_family *family,
                               struct sk_buff *skb,
                               struct nlmsghdr *nlh,
                               struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct genlmsghdr *hdr = nlmsg_data(nlh);
        struct genl_split_ops op;
        int hdrlen;
        u8 flags;

        /* this family doesn't exist in this netns */
        if (!family->netnsok && !net_eq(net, &init_net))
                return -ENOENT;

        hdrlen = GENL_HDRLEN + family->hdrsize;
        if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
                return -EINVAL;

        if (genl_header_check(family, nlh, hdr, extack))
                return -EINVAL;

        flags = (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP ?
                GENL_CMD_CAP_DUMP : GENL_CMD_CAP_DO;
        if (genl_get_cmd(hdr->cmd, flags, family, &op))
                return -EOPNOTSUPP;

        if ((op.flags & GENL_ADMIN_PERM) &&
            !netlink_capable(skb, CAP_NET_ADMIN))
                return -EPERM;

        if ((op.flags & GENL_UNS_ADMIN_PERM) &&
            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                return -EPERM;

        if (flags & GENL_CMD_CAP_DUMP)
                return genl_family_rcv_msg_dumpit(family, skb, nlh, extack,
                                                  &op, hdrlen, net);
        else
                return genl_family_rcv_msg_doit(family, skb, nlh, extack,
                                                &op, hdrlen, net);
}

static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
                        struct netlink_ext_ack *extack)
{
        const struct genl_family *family;
        int err;

        family = genl_family_find_byid(nlh->nlmsg_type);
        if (family == NULL)
                return -ENOENT;

        genl_op_lock(family);
        err = genl_family_rcv_msg(family, skb, nlh, extack);
        genl_op_unlock(family);

        return err;
}

static void genl_rcv(struct sk_buff *skb)
{
        down_read(&cb_lock);
        netlink_rcv_skb(skb, &genl_rcv_msg);
        up_read(&cb_lock);
}

/**************************************************************************
 * Controller
 **************************************************************************/

static struct genl_family genl_ctrl;

static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
                          u32 flags, struct sk_buff *skb, u8 cmd)
{
        struct genl_op_iter i;
        void *hdr;

        hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
        if (hdr == NULL)
                return -EMSGSIZE;

        if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) ||
            nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) ||
            nla_put_u32(skb, CTRL_ATTR_VERSION, family->version) ||
            nla_put_u32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize) ||
            nla_put_u32(skb, CTRL_ATTR_MAXATTR, family->maxattr))
                goto nla_put_failure;

        if (genl_op_iter_init(family, &i)) {
                struct nlattr *nla_ops;

                nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS);
                if (nla_ops == NULL)
                        goto nla_put_failure;

                while (genl_op_iter_next(&i)) {
                        struct nlattr *nest;
                        u32 op_flags;

                        op_flags = i.flags;
                        if (i.doit.policy || i.dumpit.policy)
                                op_flags |= GENL_CMD_CAP_HASPOL;

                        nest = nla_nest_start_noflag(skb, genl_op_iter_idx(&i));
                        if (nest == NULL)
                                goto nla_put_failure;

                        if (nla_put_u32(skb, CTRL_ATTR_OP_ID, i.cmd) ||
                            nla_put_u32(skb, CTRL_ATTR_OP_FLAGS, op_flags))
                                goto nla_put_failure;

                        nla_nest_end(skb, nest);
                }

                nla_nest_end(skb, nla_ops);
        }

        if (family->n_mcgrps) {
                struct nlattr *nla_grps;
                int i;

                nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
                if (nla_grps == NULL)
                        goto nla_put_failure;

                for (i = 0; i < family->n_mcgrps; i++) {
                        struct nlattr *nest;
                        const struct genl_multicast_group *grp;

                        grp = &family->mcgrps[i];

                        nest = nla_nest_start_noflag(skb, i + 1);
                        if (nest == NULL)
                                goto nla_put_failure;

                        if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID,
                                        family->mcgrp_offset + i) ||
                            nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME,
                                           grp->name))
                                goto nla_put_failure;

                        nla_nest_end(skb, nest);
                }
                nla_nest_end(skb, nla_grps);
        }

        genlmsg_end(skb, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(skb, hdr);
        return -EMSGSIZE;
}

static int ctrl_fill_mcgrp_info(const struct genl_family *family,
                                const struct genl_multicast_group *grp,
                                int grp_id, u32 portid, u32 seq, u32 flags,
                                struct sk_buff *skb, u8 cmd)
{
        void *hdr;
        struct nlattr *nla_grps;
        struct nlattr *nest;

        hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
        if (hdr == NULL)
                return -1;

        if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) ||
            nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id))
                goto nla_put_failure;

        nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
        if (nla_grps == NULL)
                goto nla_put_failure;

        nest = nla_nest_start_noflag(skb, 1);
        if (nest == NULL)
                goto nla_put_failure;

        if (nla_put_u32(skb, CTRL_ATTR_MCAST_GRP_ID, grp_id) ||
            nla_put_string(skb, CTRL_ATTR_MCAST_GRP_NAME,
                           grp->name))
                goto nla_put_failure;

        nla_nest_end(skb, nest);
        nla_nest_end(skb, nla_grps);

        genlmsg_end(skb, hdr);
        return 0;

nla_put_failure:
        genlmsg_cancel(skb, hdr);
        return -EMSGSIZE;
}

static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
{
        int n = 0;
        struct genl_family *rt;
        struct net *net = sock_net(skb->sk);
        int fams_to_skip = cb->args[0];
        unsigned int id;
        int err = 0;

        idr_for_each_entry(&genl_fam_idr, rt, id) {
                if (!rt->netnsok && !net_eq(net, &init_net))
                        continue;

                if (n++ < fams_to_skip)
                        continue;

                err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
                                     cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                     skb, CTRL_CMD_NEWFAMILY);
                if (err) {
                        n--;
                        break;
                }
        }

        cb->args[0] = n;
        return err;
}

static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
                                             u32 portid, int seq, u8 cmd)
{
        struct sk_buff *skb;
        int err;

        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (skb == NULL)
                return ERR_PTR(-ENOBUFS);

        err = ctrl_fill_info(family, portid, seq, 0, skb, cmd);
        if (err < 0) {
                nlmsg_free(skb);
                return ERR_PTR(err);
        }

        return skb;
}

static struct sk_buff *
ctrl_build_mcgrp_msg(const struct genl_family *family,
                     const struct genl_multicast_group *grp,
                     int grp_id, u32 portid, int seq, u8 cmd)
{
        struct sk_buff *skb;
        int err;

        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (skb == NULL)
                return ERR_PTR(-ENOBUFS);

        err = ctrl_fill_mcgrp_info(family, grp, grp_id, portid,
                                   seq, 0, skb, cmd);
        if (err < 0) {
                nlmsg_free(skb);
                return ERR_PTR(err);
        }

        return skb;
}

static const struct nla_policy ctrl_policy_family[] = {
        [CTRL_ATTR_FAMILY_ID]        = { .type = NLA_U16 },
        [CTRL_ATTR_FAMILY_NAME]        = { .type = NLA_NUL_STRING,
                                    .len = GENL_NAMSIZ - 1 },
};

static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
{
        struct sk_buff *msg;
        const struct genl_family *res = NULL;
        int err = -EINVAL;

        if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
                u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]);
                res = genl_family_find_byid(id);
                err = -ENOENT;
        }

        if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
                char *name;

                name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
                res = genl_family_find_byname(name);
#ifdef CONFIG_MODULES
                if (res == NULL) {
                        genl_unlock();
                        up_read(&cb_lock);
                        request_module("net-pf-%d-proto-%d-family-%s",
                                       PF_NETLINK, NETLINK_GENERIC, name);
                        down_read(&cb_lock);
                        genl_lock();
                        res = genl_family_find_byname(name);
                }
#endif
                err = -ENOENT;
        }

        if (res == NULL)
                return err;

        if (!res->netnsok && !net_eq(genl_info_net(info), &init_net)) {
                /* family doesn't exist here */
                return -ENOENT;
        }

        msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq,
                                    CTRL_CMD_NEWFAMILY);
        if (IS_ERR(msg))
                return PTR_ERR(msg);

        return genlmsg_reply(msg, info);
}

static int genl_ctrl_event(int event, const struct genl_family *family,
                           const struct genl_multicast_group *grp,
                           int grp_id)
{
        struct sk_buff *msg;

        /* genl is still initialising */
        if (!init_net.genl_sock)
                return 0;

        switch (event) {
        case CTRL_CMD_NEWFAMILY:
        case CTRL_CMD_DELFAMILY:
                WARN_ON(grp);
                msg = ctrl_build_family_msg(family, 0, 0, event);
                break;
        case CTRL_CMD_NEWMCAST_GRP:
        case CTRL_CMD_DELMCAST_GRP:
                BUG_ON(!grp);
                msg = ctrl_build_mcgrp_msg(family, grp, grp_id, 0, 0, event);
                break;
        default:
                return -EINVAL;
        }

        if (IS_ERR(msg))
                return PTR_ERR(msg);

        if (!family->netnsok) {
                genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0,
                                        0, GFP_KERNEL);
        } else {
                rcu_read_lock();
                genlmsg_multicast_allns(&genl_ctrl, msg, 0,
                                        0, GFP_ATOMIC);
                rcu_read_unlock();
        }

        return 0;
}

struct ctrl_dump_policy_ctx {
        struct netlink_policy_dump_state *state;
        const struct genl_family *rt;
        struct genl_op_iter *op_iter;
        u32 op;
        u16 fam_id;
        u8 dump_map:1,
           single_op:1;
};

static const struct nla_policy ctrl_policy_policy[] = {
        [CTRL_ATTR_FAMILY_ID]        = { .type = NLA_U16 },
        [CTRL_ATTR_FAMILY_NAME]        = { .type = NLA_NUL_STRING,
                                    .len = GENL_NAMSIZ - 1 },
        [CTRL_ATTR_OP]                = { .type = NLA_U32 },
};

static int ctrl_dumppolicy_start(struct netlink_callback *cb)
{
        const struct genl_dumpit_info *info = genl_dumpit_info(cb);
        struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
        struct nlattr **tb = info->info.attrs;
        const struct genl_family *rt;
        struct genl_op_iter i;
        int err;

        BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));

        if (!tb[CTRL_ATTR_FAMILY_ID] && !tb[CTRL_ATTR_FAMILY_NAME])
                return -EINVAL;

        if (tb[CTRL_ATTR_FAMILY_ID]) {
                ctx->fam_id = nla_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
        } else {
                rt = genl_family_find_byname(
                        nla_data(tb[CTRL_ATTR_FAMILY_NAME]));
                if (!rt)
                        return -ENOENT;
                ctx->fam_id = rt->id;
        }

        rt = genl_family_find_byid(ctx->fam_id);
        if (!rt)
                return -ENOENT;

        ctx->rt = rt;

        if (tb[CTRL_ATTR_OP]) {
                struct genl_split_ops doit, dump;

                ctx->single_op = true;
                ctx->op = nla_get_u32(tb[CTRL_ATTR_OP]);

                err = genl_get_cmd_both(ctx->op, rt, &doit, &dump);
                if (err) {
                        NL_SET_BAD_ATTR(cb->extack, tb[CTRL_ATTR_OP]);
                        return err;
                }

                if (doit.policy) {
                        err = netlink_policy_dump_add_policy(&ctx->state,
                                                             doit.policy,
                                                             doit.maxattr);
                        if (err)
                                goto err_free_state;
                }
                if (dump.policy) {
                        err = netlink_policy_dump_add_policy(&ctx->state,
                                                             dump.policy,
                                                             dump.maxattr);
                        if (err)
                                goto err_free_state;
                }

                if (!ctx->state)
                        return -ENODATA;

                ctx->dump_map = 1;
                return 0;
        }

        ctx->op_iter = kmalloc(sizeof(*ctx->op_iter), GFP_KERNEL);
        if (!ctx->op_iter)
                return -ENOMEM;

        genl_op_iter_init(rt, ctx->op_iter);
        ctx->dump_map = genl_op_iter_next(ctx->op_iter);

        for (genl_op_iter_init(rt, &i); genl_op_iter_next(&i); ) {
                if (i.doit.policy) {
                        err = netlink_policy_dump_add_policy(&ctx->state,
                                                             i.doit.policy,
                                                             i.doit.maxattr);
                        if (err)
                                goto err_free_state;
                }
                if (i.dumpit.policy) {
                        err = netlink_policy_dump_add_policy(&ctx->state,
                                                             i.dumpit.policy,
                                                             i.dumpit.maxattr);
                        if (err)
                                goto err_free_state;
                }
        }

        if (!ctx->state) {
                err = -ENODATA;
                goto err_free_op_iter;
        }
        return 0;

err_free_state:
        netlink_policy_dump_free(ctx->state);
err_free_op_iter:
        kfree(ctx->op_iter);
        return err;
}

static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
                                  struct netlink_callback *cb)
{
        struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
        void *hdr;

        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
                          cb->nlh->nlmsg_seq, &genl_ctrl,
                          NLM_F_MULTI, CTRL_CMD_GETPOLICY);
        if (!hdr)
                return NULL;

        if (nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, ctx->fam_id))
                return NULL;

        return hdr;
}

static int ctrl_dumppolicy_put_op(struct sk_buff *skb,
                                  struct netlink_callback *cb,
                                  struct genl_split_ops *doit,
                                  struct genl_split_ops *dumpit)
{
        struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
        struct nlattr *nest_pol, *nest_op;
        void *hdr;
        int idx;

        /* skip if we have nothing to show */
        if (!doit->policy && !dumpit->policy)
                return 0;

        hdr = ctrl_dumppolicy_prep(skb, cb);
        if (!hdr)
                return -ENOBUFS;

        nest_pol = nla_nest_start(skb, CTRL_ATTR_OP_POLICY);
        if (!nest_pol)
                goto err;

        nest_op = nla_nest_start(skb, doit->cmd);
        if (!nest_op)
                goto err;

        if (doit->policy) {
                idx = netlink_policy_dump_get_policy_idx(ctx->state,
                                                         doit->policy,
                                                         doit->maxattr);

                if (nla_put_u32(skb, CTRL_ATTR_POLICY_DO, idx))
                        goto err;
        }
        if (dumpit->policy) {
                idx = netlink_policy_dump_get_policy_idx(ctx->state,
                                                         dumpit->policy,
                                                         dumpit->maxattr);

                if (nla_put_u32(skb, CTRL_ATTR_POLICY_DUMP, idx))
                        goto err;
        }

        nla_nest_end(skb, nest_op);
        nla_nest_end(skb, nest_pol);
        genlmsg_end(skb, hdr);

        return 0;
err:
        genlmsg_cancel(skb, hdr);
        return -ENOBUFS;
}

static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
        void *hdr;

        if (ctx->dump_map) {
                if (ctx->single_op) {
                        struct genl_split_ops doit, dumpit;

                        if (WARN_ON(genl_get_cmd_both(ctx->op, ctx->rt,
                                                      &doit, &dumpit)))
                                return -ENOENT;

                        if (ctrl_dumppolicy_put_op(skb, cb, &doit, &dumpit))
                                return skb->len;

                        /* done with the per-op policy index list */
                        ctx->dump_map = 0;
                }

                while (ctx->dump_map) {
                        if (ctrl_dumppolicy_put_op(skb, cb,
                                                   &ctx->op_iter->doit,
                                                   &ctx->op_iter->dumpit))
                                return skb->len;

                        ctx->dump_map = genl_op_iter_next(ctx->op_iter);
                }
        }

        while (netlink_policy_dump_loop(ctx->state)) {
                struct nlattr *nest;

                hdr = ctrl_dumppolicy_prep(skb, cb);
                if (!hdr)
                        goto nla_put_failure;

                nest = nla_nest_start(skb, CTRL_ATTR_POLICY);
                if (!nest)
                        goto nla_put_failure;

                if (netlink_policy_dump_write(skb, ctx->state))
                        goto nla_put_failure;

                nla_nest_end(skb, nest);

                genlmsg_end(skb, hdr);
        }

        return skb->len;

nla_put_failure:
        genlmsg_cancel(skb, hdr);
        return skb->len;
}

static int ctrl_dumppolicy_done(struct netlink_callback *cb)
{
        struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;

        kfree(ctx->op_iter);
        netlink_policy_dump_free(ctx->state);
        return 0;
}

static const struct genl_split_ops genl_ctrl_ops[] = {
        {
                .cmd                = CTRL_CMD_GETFAMILY,
                .validate        = GENL_DONT_VALIDATE_STRICT,
                .policy                = ctrl_policy_family,
                .maxattr        = ARRAY_SIZE(ctrl_policy_family) - 1,
                .doit                = ctrl_getfamily,
                .flags                = GENL_CMD_CAP_DO,
        },
        {
                .cmd                = CTRL_CMD_GETFAMILY,
                .validate        = GENL_DONT_VALIDATE_DUMP,
                .policy                = ctrl_policy_family,
                .maxattr        = ARRAY_SIZE(ctrl_policy_family) - 1,
                .dumpit                = ctrl_dumpfamily,
                .flags                = GENL_CMD_CAP_DUMP,
        },
        {
                .cmd                = CTRL_CMD_GETPOLICY,
                .policy                = ctrl_policy_policy,
                .maxattr        = ARRAY_SIZE(ctrl_policy_policy) - 1,
                .start                = ctrl_dumppolicy_start,
                .dumpit                = ctrl_dumppolicy,
                .done                = ctrl_dumppolicy_done,
                .flags                = GENL_CMD_CAP_DUMP,
        },
};

static const struct genl_multicast_group genl_ctrl_groups[] = {
        { .name = "notify", },
};

static struct genl_family genl_ctrl __ro_after_init = {
        .module = THIS_MODULE,
        .split_ops = genl_ctrl_ops,
        .n_split_ops = ARRAY_SIZE(genl_ctrl_ops),
        .resv_start_op = CTRL_CMD_GETPOLICY + 1,
        .mcgrps = genl_ctrl_groups,
        .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
        .id = GENL_ID_CTRL,
        .name = "nlctrl",
        .version = 0x2,
        .netnsok = true,
};

static int genl_bind(struct net *net, int group)
{
        const struct genl_family *family;
        unsigned int id;
        int ret = 0;

        down_read(&cb_lock);

        idr_for_each_entry(&genl_fam_idr, family, id) {
                const struct genl_multicast_group *grp;
                int i;

                if (family->n_mcgrps == 0)
                        continue;

                i = group - family->mcgrp_offset;
                if (i < 0 || i >= family->n_mcgrps)
                        continue;

                grp = &family->mcgrps[i];
                if ((grp->flags & GENL_MCAST_CAP_NET_ADMIN) &&
                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
                        ret = -EPERM;
                if ((grp->flags & GENL_MCAST_CAP_SYS_ADMIN) &&
                    !ns_capable(net->user_ns, CAP_SYS_ADMIN))
                        ret = -EPERM;

                if (family->bind)
                        family->bind(i);

                break;
        }

        up_read(&cb_lock);
        return ret;
}

static void genl_unbind(struct net *net, int group)
{
        const struct genl_family *family;
        unsigned int id;

        down_read(&cb_lock);

        idr_for_each_entry(&genl_fam_idr, family, id) {
                int i;

                if (family->n_mcgrps == 0)
                        continue;

                i = group - family->mcgrp_offset;
                if (i < 0 || i >= family->n_mcgrps)
                        continue;

                if (family->unbind)
                        family->unbind(i);

                break;
        }

        up_read(&cb_lock);
}

static int __net_init genl_pernet_init(struct net *net)
{
        struct netlink_kernel_cfg cfg = {
                .input                = genl_rcv,
                .flags                = NL_CFG_F_NONROOT_RECV,
                .bind                = genl_bind,
                .unbind                = genl_unbind,
                .release        = genl_release,
        };

        /* we'll bump the group number right afterwards */
        net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg);

        if (!net->genl_sock && net_eq(net, &init_net))
                panic("GENL: Cannot initialize generic netlink\n");

        if (!net->genl_sock)
                return -ENOMEM;

        return 0;
}

static void __net_exit genl_pernet_exit(struct net *net)
{
        netlink_kernel_release(net->genl_sock);
        net->genl_sock = NULL;
}

static struct pernet_operations genl_pernet_ops = {
        .init = genl_pernet_init,
        .exit = genl_pernet_exit,
};

static int __init genl_init(void)
{
        int err;

        err = genl_register_family(&genl_ctrl);
        if (err < 0)
                goto problem;

        err = register_pernet_subsys(&genl_pernet_ops);
        if (err)
                goto problem;

        return 0;

problem:
        panic("GENL: Cannot register controller: %d\n", err);
}

core_initcall(genl_init);

static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
                         gfp_t flags)
{
        struct sk_buff *tmp;
        struct net *net, *prev = NULL;
        bool delivered = false;
        int err;

        for_each_net_rcu(net) {
                if (prev) {
                        tmp = skb_clone(skb, flags);
                        if (!tmp) {
                                err = -ENOMEM;
                                goto error;
                        }
                        err = nlmsg_multicast(prev->genl_sock, tmp,
                                              portid, group, flags);
                        if (!err)
                                delivered = true;
                        else if (err != -ESRCH)
                                goto error;
                }

                prev = net;
        }

        err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
        if (!err)
                delivered = true;
        else if (err != -ESRCH)
                return err;
        return delivered ? 0 : -ESRCH;
 error:
        kfree_skb(skb);
        return err;
}

int genlmsg_multicast_allns(const struct genl_family *family,
                            struct sk_buff *skb, u32 portid,
                            unsigned int group, gfp_t flags)
{
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;

        group = family->mcgrp_offset + group;
        return genlmsg_mcast(skb, portid, group, flags);
}
EXPORT_SYMBOL(genlmsg_multicast_allns);

void genl_notify(const struct genl_family *family, struct sk_buff *skb,
                 struct genl_info *info, u32 group, gfp_t flags)
{
        struct net *net = genl_info_net(info);
        struct sock *sk = net->genl_sock;

        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return;

        group = family->mcgrp_offset + group;
        nlmsg_notify(sk, skb, info->snd_portid, group,
                     nlmsg_report(info->nlhdr), flags);
}
EXPORT_SYMBOL(genl_notify);


































































































































































































    2 

    2 


    2 

    2 
    2 





    2 






























































  167 











  168 







  168 











  166 
  168 


  168 

  166 











   82 






   83 









   83 
   83 



   82 
   82 
















  168 


  168 
  164 

  166 




























   50 




   50 





   50 


   50 




   48 
























































































  241 


















































  123 





  123 
  123 

































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
// SPDX-License-Identifier: GPL-2.0
/*
 * fs/sysfs/file.c - sysfs regular (text) file implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * Please see Documentation/filesystems/sysfs.rst for more information.
 */

#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
#include <linux/mm.h>

#include "sysfs.h"

/*
 * Determine ktype->sysfs_ops for the given kernfs_node.  This function
 * must be called while holding an active reference.
 */
static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn)
{
        struct kobject *kobj = kn->parent->priv;

        if (kn->flags & KERNFS_LOCKDEP)
                lockdep_assert_held(kn);
        return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
}

/*
 * Reads on sysfs are handled through seq_file, which takes care of hairy
 * details like buffering and seeking.  The following function pipes
 * sysfs_ops->show() result through seq_file.
 */
static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
{
        struct kernfs_open_file *of = sf->private;
        struct kobject *kobj = of->kn->parent->priv;
        const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
        ssize_t count;
        char *buf;

        if (WARN_ON_ONCE(!ops->show))
                return -EINVAL;

        /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */
        count = seq_get_buf(sf, &buf);
        if (count < PAGE_SIZE) {
                seq_commit(sf, -1);
                return 0;
        }
        memset(buf, 0, PAGE_SIZE);

        count = ops->show(kobj, of->kn->priv, buf);
        if (count < 0)
                return count;

        /*
         * The code works fine with PAGE_SIZE return but it's likely to
         * indicate truncated result or overflow in normal use cases.
         */
        if (count >= (ssize_t)PAGE_SIZE) {
                printk("fill_read_buffer: %pS returned bad count\n",
                                ops->show);
                /* Try to struggle along */
                count = PAGE_SIZE - 1;
        }
        seq_commit(sf, count);
        return 0;
}

static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf,
                                 size_t count, loff_t pos)
{
        struct bin_attribute *battr = of->kn->priv;
        struct kobject *kobj = of->kn->parent->priv;
        loff_t size = file_inode(of->file)->i_size;

        if (!count)
                return 0;

        if (size) {
                if (pos >= size)
                        return 0;
                if (pos + count > size)
                        count = size - pos;
        }

        if (!battr->read)
                return -EIO;

        return battr->read(of->file, kobj, battr, buf, pos, count);
}

/* kernfs read callback for regular sysfs files with pre-alloc */
static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
                             size_t count, loff_t pos)
{
        const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
        struct kobject *kobj = of->kn->parent->priv;
        ssize_t len;

        /*
         * If buf != of->prealloc_buf, we don't know how
         * large it is, so cannot safely pass it to ->show
         */
        if (WARN_ON_ONCE(buf != of->prealloc_buf))
                return 0;
        len = ops->show(kobj, of->kn->priv, buf);
        if (len < 0)
                return len;
        if (pos) {
                if (len <= pos)
                        return 0;
                len -= pos;
                memmove(buf, buf + pos, len);
        }
        return min_t(ssize_t, count, len);
}

/* kernfs write callback for regular sysfs files */
static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf,
                              size_t count, loff_t pos)
{
        const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
        struct kobject *kobj = of->kn->parent->priv;

        if (!count)
                return 0;

        return ops->store(kobj, of->kn->priv, buf, count);
}

/* kernfs write callback for bin sysfs files */
static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf,
                                  size_t count, loff_t pos)
{
        struct bin_attribute *battr = of->kn->priv;
        struct kobject *kobj = of->kn->parent->priv;
        loff_t size = file_inode(of->file)->i_size;

        if (size) {
                if (size <= pos)
                        return -EFBIG;
                count = min_t(ssize_t, count, size - pos);
        }
        if (!count)
                return 0;

        if (!battr->write)
                return -EIO;

        return battr->write(of->file, kobj, battr, buf, pos, count);
}

static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
                             struct vm_area_struct *vma)
{
        struct bin_attribute *battr = of->kn->priv;
        struct kobject *kobj = of->kn->parent->priv;

        return battr->mmap(of->file, kobj, battr, vma);
}

static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset,
                                  int whence)
{
        struct bin_attribute *battr = of->kn->priv;
        struct kobject *kobj = of->kn->parent->priv;

        if (battr->llseek)
                return battr->llseek(of->file, kobj, battr, offset, whence);
        else
                return generic_file_llseek(of->file, offset, whence);
}

static int sysfs_kf_bin_open(struct kernfs_open_file *of)
{
        struct bin_attribute *battr = of->kn->priv;

        if (battr->f_mapping)
                of->file->f_mapping = battr->f_mapping();

        return 0;
}

void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
{
        struct kernfs_node *kn = kobj->sd, *tmp;

        if (kn && dir)
                kn = kernfs_find_and_get(kn, dir);
        else
                kernfs_get(kn);

        if (kn && attr) {
                tmp = kernfs_find_and_get(kn, attr);
                kernfs_put(kn);
                kn = tmp;
        }

        if (kn) {
                kernfs_notify(kn);
                kernfs_put(kn);
        }
}
EXPORT_SYMBOL_GPL(sysfs_notify);

static const struct kernfs_ops sysfs_file_kfops_empty = {
};

static const struct kernfs_ops sysfs_file_kfops_ro = {
        .seq_show        = sysfs_kf_seq_show,
};

static const struct kernfs_ops sysfs_file_kfops_wo = {
        .write                = sysfs_kf_write,
};

static const struct kernfs_ops sysfs_file_kfops_rw = {
        .seq_show        = sysfs_kf_seq_show,
        .write                = sysfs_kf_write,
};

static const struct kernfs_ops sysfs_prealloc_kfops_ro = {
        .read                = sysfs_kf_read,
        .prealloc        = true,
};

static const struct kernfs_ops sysfs_prealloc_kfops_wo = {
        .write                = sysfs_kf_write,
        .prealloc        = true,
};

static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
        .read                = sysfs_kf_read,
        .write                = sysfs_kf_write,
        .prealloc        = true,
};

static const struct kernfs_ops sysfs_bin_kfops_ro = {
        .read                = sysfs_kf_bin_read,
};

static const struct kernfs_ops sysfs_bin_kfops_wo = {
        .write                = sysfs_kf_bin_write,
};

static const struct kernfs_ops sysfs_bin_kfops_rw = {
        .read                = sysfs_kf_bin_read,
        .write                = sysfs_kf_bin_write,
};

static const struct kernfs_ops sysfs_bin_kfops_mmap = {
        .read                = sysfs_kf_bin_read,
        .write                = sysfs_kf_bin_write,
        .mmap                = sysfs_kf_bin_mmap,
        .open                = sysfs_kf_bin_open,
        .llseek                = sysfs_kf_bin_llseek,
};

int sysfs_add_file_mode_ns(struct kernfs_node *parent,
                const struct attribute *attr, umode_t mode, kuid_t uid,
                kgid_t gid, const void *ns)
{
        struct kobject *kobj = parent->priv;
        const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
        struct lock_class_key *key = NULL;
        const struct kernfs_ops *ops = NULL;
        struct kernfs_node *kn;

        /* every kobject with an attribute needs a ktype assigned */
        if (WARN(!sysfs_ops, KERN_ERR
                        "missing sysfs attribute operations for kobject: %s\n",
                        kobject_name(kobj)))
                return -EINVAL;

        if (mode & SYSFS_PREALLOC) {
                if (sysfs_ops->show && sysfs_ops->store)
                        ops = &sysfs_prealloc_kfops_rw;
                else if (sysfs_ops->show)
                        ops = &sysfs_prealloc_kfops_ro;
                else if (sysfs_ops->store)
                        ops = &sysfs_prealloc_kfops_wo;
        } else {
                if (sysfs_ops->show && sysfs_ops->store)
                        ops = &sysfs_file_kfops_rw;
                else if (sysfs_ops->show)
                        ops = &sysfs_file_kfops_ro;
                else if (sysfs_ops->store)
                        ops = &sysfs_file_kfops_wo;
        }

        if (!ops)
                ops = &sysfs_file_kfops_empty;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (!attr->ignore_lockdep)
                key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif

        kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
                                  PAGE_SIZE, ops, (void *)attr, ns, key);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, attr->name);
                return PTR_ERR(kn);
        }
        return 0;
}

int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent,
                const struct bin_attribute *battr, umode_t mode,
                kuid_t uid, kgid_t gid, const void *ns)
{
        const struct attribute *attr = &battr->attr;
        struct lock_class_key *key = NULL;
        const struct kernfs_ops *ops;
        struct kernfs_node *kn;

        if (battr->mmap)
                ops = &sysfs_bin_kfops_mmap;
        else if (battr->read && battr->write)
                ops = &sysfs_bin_kfops_rw;
        else if (battr->read)
                ops = &sysfs_bin_kfops_ro;
        else if (battr->write)
                ops = &sysfs_bin_kfops_wo;
        else
                ops = &sysfs_file_kfops_empty;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (!attr->ignore_lockdep)
                key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif

        kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
                                  battr->size, ops, (void *)attr, ns, key);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, attr->name);
                return PTR_ERR(kn);
        }
        return 0;
}

/**
 * sysfs_create_file_ns - create an attribute file for an object with custom ns
 * @kobj: object we're creating for
 * @attr: attribute descriptor
 * @ns: namespace the new file should belong to
 */
int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
                         const void *ns)
{
        kuid_t uid;
        kgid_t gid;

        if (WARN_ON(!kobj || !kobj->sd || !attr))
                return -EINVAL;

        kobject_get_ownership(kobj, &uid, &gid);
        return sysfs_add_file_mode_ns(kobj->sd, attr, attr->mode, uid, gid, ns);
}
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);

int sysfs_create_files(struct kobject *kobj, const struct attribute * const *ptr)
{
        int err = 0;
        int i;

        for (i = 0; ptr[i] && !err; i++)
                err = sysfs_create_file(kobj, ptr[i]);
        if (err)
                while (--i >= 0)
                        sysfs_remove_file(kobj, ptr[i]);
        return err;
}
EXPORT_SYMBOL_GPL(sysfs_create_files);

/**
 * sysfs_add_file_to_group - add an attribute file to a pre-existing group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
int sysfs_add_file_to_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
{
        struct kernfs_node *parent;
        kuid_t uid;
        kgid_t gid;
        int error;

        if (group) {
                parent = kernfs_find_and_get(kobj->sd, group);
        } else {
                parent = kobj->sd;
                kernfs_get(parent);
        }

        if (!parent)
                return -ENOENT;

        kobject_get_ownership(kobj, &uid, &gid);
        error = sysfs_add_file_mode_ns(parent, attr, attr->mode, uid, gid,
                                       NULL);
        kernfs_put(parent);

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);

/**
 * sysfs_chmod_file - update the modified mode value on an object attribute.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @mode: file permissions.
 *
 */
int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
                     umode_t mode)
{
        struct kernfs_node *kn;
        struct iattr newattrs;
        int rc;

        kn = kernfs_find_and_get(kobj->sd, attr->name);
        if (!kn)
                return -ENOENT;

        newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE;

        rc = kernfs_setattr(kn, &newattrs);

        kernfs_put(kn);
        return rc;
}
EXPORT_SYMBOL_GPL(sysfs_chmod_file);

/**
 * sysfs_break_active_protection - break "active" protection
 * @kobj: The kernel object @attr is associated with.
 * @attr: The attribute to break the "active" protection for.
 *
 * With sysfs, just like kernfs, deletion of an attribute is postponed until
 * all active .show() and .store() callbacks have finished unless this function
 * is called. Hence this function is useful in methods that implement self
 * deletion.
 */
struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
                                                  const struct attribute *attr)
{
        struct kernfs_node *kn;

        kobject_get(kobj);
        kn = kernfs_find_and_get(kobj->sd, attr->name);
        if (kn)
                kernfs_break_active_protection(kn);
        else
                kobject_put(kobj);
        return kn;
}
EXPORT_SYMBOL_GPL(sysfs_break_active_protection);

/**
 * sysfs_unbreak_active_protection - restore "active" protection
 * @kn: Pointer returned by sysfs_break_active_protection().
 *
 * Undo the effects of sysfs_break_active_protection(). Since this function
 * calls kernfs_put() on the kernfs node that corresponds to the 'attr'
 * argument passed to sysfs_break_active_protection() that attribute may have
 * been removed between the sysfs_break_active_protection() and
 * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after
 * this function has returned.
 */
void sysfs_unbreak_active_protection(struct kernfs_node *kn)
{
        struct kobject *kobj = kn->parent->priv;

        kernfs_unbreak_active_protection(kn);
        kernfs_put(kn);
        kobject_put(kobj);
}
EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection);

/**
 * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
 * @kobj: object we're acting for
 * @attr: attribute descriptor
 * @ns: namespace tag of the file to remove
 *
 * Hash the attribute name and namespace tag and kill the victim.
 */
void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
                          const void *ns)
{
        struct kernfs_node *parent = kobj->sd;

        kernfs_remove_by_name_ns(parent, attr->name, ns);
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);

/**
 * sysfs_remove_file_self - remove an object attribute from its own method
 * @kobj: object we're acting for
 * @attr: attribute descriptor
 *
 * See kernfs_remove_self() for details.
 */
bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
{
        struct kernfs_node *parent = kobj->sd;
        struct kernfs_node *kn;
        bool ret;

        kn = kernfs_find_and_get(parent, attr->name);
        if (WARN_ON_ONCE(!kn))
                return false;

        ret = kernfs_remove_self(kn);

        kernfs_put(kn);
        return ret;
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_self);

void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *ptr)
{
        int i;

        for (i = 0; ptr[i]; i++)
                sysfs_remove_file(kobj, ptr[i]);
}
EXPORT_SYMBOL_GPL(sysfs_remove_files);

/**
 * sysfs_remove_file_from_group - remove an attribute file from a group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
void sysfs_remove_file_from_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
{
        struct kernfs_node *parent;

        if (group) {
                parent = kernfs_find_and_get(kobj->sd, group);
        } else {
                parent = kobj->sd;
                kernfs_get(parent);
        }

        if (parent) {
                kernfs_remove_by_name(parent, attr->name);
                kernfs_put(parent);
        }
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);

/**
 *        sysfs_create_bin_file - create binary file for object.
 *        @kobj:        object.
 *        @attr:        attribute descriptor.
 */
int sysfs_create_bin_file(struct kobject *kobj,
                          const struct bin_attribute *attr)
{
        kuid_t uid;
        kgid_t gid;

        if (WARN_ON(!kobj || !kobj->sd || !attr))
                return -EINVAL;

        kobject_get_ownership(kobj, &uid, &gid);
        return sysfs_add_bin_file_mode_ns(kobj->sd, attr, attr->attr.mode, uid,
                                           gid, NULL);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);

/**
 *        sysfs_remove_bin_file - remove binary file for object.
 *        @kobj:        object.
 *        @attr:        attribute descriptor.
 */
void sysfs_remove_bin_file(struct kobject *kobj,
                           const struct bin_attribute *attr)
{
        kernfs_remove_by_name(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);

static int internal_change_owner(struct kernfs_node *kn, kuid_t kuid,
                                 kgid_t kgid)
{
        struct iattr newattrs = {
                .ia_valid = ATTR_UID | ATTR_GID,
                .ia_uid = kuid,
                .ia_gid = kgid,
        };
        return kernfs_setattr(kn, &newattrs);
}

/**
 *        sysfs_link_change_owner - change owner of a sysfs file.
 *        @kobj:        object of the kernfs_node the symlink is located in.
 *        @targ:        object of the kernfs_node the symlink points to.
 *        @name:        name of the link.
 *        @kuid:        new owner's kuid
 *        @kgid:        new owner's kgid
 *
 * This function looks up the sysfs symlink entry @name under @kobj and changes
 * the ownership to @kuid/@kgid. The symlink is looked up in the namespace of
 * @targ.
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ,
                            const char *name, kuid_t kuid, kgid_t kgid)
{
        struct kernfs_node *kn = NULL;
        int error;

        if (!name || !kobj->state_in_sysfs || !targ->state_in_sysfs)
                return -EINVAL;

        error = -ENOENT;
        kn = kernfs_find_and_get_ns(kobj->sd, name, targ->sd->ns);
        if (!kn)
                goto out;

        error = -EINVAL;
        if (kernfs_type(kn) != KERNFS_LINK)
                goto out;
        if (kn->symlink.target_kn->priv != targ)
                goto out;

        error = internal_change_owner(kn, kuid, kgid);

out:
        kernfs_put(kn);
        return error;
}

/**
 *        sysfs_file_change_owner - change owner of a sysfs file.
 *        @kobj:        object.
 *        @name:        name of the file to change.
 *        @kuid:        new owner's kuid
 *        @kgid:        new owner's kgid
 *
 * This function looks up the sysfs entry @name under @kobj and changes the
 * ownership to @kuid/@kgid.
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid,
                            kgid_t kgid)
{
        struct kernfs_node *kn;
        int error;

        if (!name)
                return -EINVAL;

        if (!kobj->state_in_sysfs)
                return -EINVAL;

        kn = kernfs_find_and_get(kobj->sd, name);
        if (!kn)
                return -ENOENT;

        error = internal_change_owner(kn, kuid, kgid);

        kernfs_put(kn);

        return error;
}
EXPORT_SYMBOL_GPL(sysfs_file_change_owner);

/**
 *        sysfs_change_owner - change owner of the given object.
 *        @kobj:        object.
 *        @kuid:        new owner's kuid
 *        @kgid:        new owner's kgid
 *
 * Change the owner of the default directory, files, groups, and attributes of
 * @kobj to @kuid/@kgid. Note that sysfs_change_owner mirrors how the sysfs
 * entries for a kobject are added by driver core. In summary,
 * sysfs_change_owner() takes care of the default directory entry for @kobj,
 * the default attributes associated with the ktype of @kobj and the default
 * attributes associated with the ktype of @kobj.
 * Additional properties not added by driver core have to be changed by the
 * driver or subsystem which created them. This is similar to how
 * driver/subsystem specific entries are removed.
 *
 * Returns 0 on success or error code on failure.
 */
int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid)
{
        int error;
        const struct kobj_type *ktype;

        if (!kobj->state_in_sysfs)
                return -EINVAL;

        /* Change the owner of the kobject itself. */
        error = internal_change_owner(kobj->sd, kuid, kgid);
        if (error)
                return error;

        ktype = get_ktype(kobj);
        if (ktype) {
                /*
                 * Change owner of the default groups associated with the
                 * ktype of @kobj.
                 */
                error = sysfs_groups_change_owner(kobj, ktype->default_groups,
                                                  kuid, kgid);
                if (error)
                        return error;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(sysfs_change_owner);

/**
 *        sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer.
 *        @buf:        start of PAGE_SIZE buffer.
 *        @fmt:        format
 *        @...:        optional arguments to @format
 *
 *
 * Returns number of characters written to @buf.
 */
int sysfs_emit(char *buf, const char *fmt, ...)
{
        va_list args;
        int len;

        if (WARN(!buf || offset_in_page(buf),
                 "invalid sysfs_emit: buf:%p\n", buf))
                return 0;

        va_start(args, fmt);
        len = vscnprintf(buf, PAGE_SIZE, fmt, args);
        va_end(args);

        return len;
}
EXPORT_SYMBOL_GPL(sysfs_emit);

/**
 *        sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer.
 *        @buf:        start of PAGE_SIZE buffer.
 *        @at:        offset in @buf to start write in bytes
 *                @at must be >= 0 && < PAGE_SIZE
 *        @fmt:        format
 *        @...:        optional arguments to @fmt
 *
 *
 * Returns number of characters written starting at &@buf[@at].
 */
int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
{
        va_list args;
        int len;

        if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE,
                 "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at))
                return 0;

        va_start(args, fmt);
        len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args);
        va_end(args);

        return len;
}
EXPORT_SYMBOL_GPL(sysfs_emit_at);













































































































































































































































































































































































































































































































































































































































































































































































































































































































































  240 






















































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Linux Socket Filter Data Structures
 */
#ifndef __LINUX_FILTER_H__
#define __LINUX_FILTER_H__

#include <linux/atomic.h>
#include <linux/bpf.h>
#include <linux/refcount.h>
#include <linux/compat.h>
#include <linux/skbuff.h>
#include <linux/linkage.h>
#include <linux/printk.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/capability.h>
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
#include <crypto/sha1.h>
#include <linux/u64_stats_sync.h>

#include <net/sch_generic.h>

#include <asm/byteorder.h>
#include <uapi/linux/filter.h>

struct sk_buff;
struct sock;
struct seccomp_data;
struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
struct ctl_table;
struct ctl_table_header;

/* ArgX, context and stack frame pointer register positions. Note,
 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
 * calls in BPF_CALL instruction.
 */
#define BPF_REG_ARG1        BPF_REG_1
#define BPF_REG_ARG2        BPF_REG_2
#define BPF_REG_ARG3        BPF_REG_3
#define BPF_REG_ARG4        BPF_REG_4
#define BPF_REG_ARG5        BPF_REG_5
#define BPF_REG_CTX        BPF_REG_6
#define BPF_REG_FP        BPF_REG_10

/* Additional register mappings for converted user programs. */
#define BPF_REG_A        BPF_REG_0
#define BPF_REG_X        BPF_REG_7
#define BPF_REG_TMP        BPF_REG_2        /* scratch reg */
#define BPF_REG_D        BPF_REG_8        /* data, callee-saved */
#define BPF_REG_H        BPF_REG_9        /* hlen, callee-saved */

/* Kernel hidden auxiliary/helper register. */
#define BPF_REG_AX                MAX_BPF_REG
#define MAX_BPF_EXT_REG                (MAX_BPF_REG + 1)
#define MAX_BPF_JIT_REG                MAX_BPF_EXT_REG

/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL        0xf0

/* unused opcode to mark special load instruction. Same as BPF_ABS */
#define BPF_PROBE_MEM        0x20

/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
#define BPF_PROBE_MEMSX        0x40

/* unused opcode to mark special load instruction. Same as BPF_MSH */
#define BPF_PROBE_MEM32        0xa0

/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS        0xe0

/* unused opcode to mark speculation barrier for mitigating
 * Speculative Store Bypass
 */
#define BPF_NOSPEC        0xc0

/* As per nm, we expose JITed images as text (code) section for
 * kallsyms. That way, tools like perf can find it to match
 * addresses.
 */
#define BPF_SYM_ELF_TYPE        't'

/* BPF program can access up to 512 bytes of stack space. */
#define MAX_BPF_STACK        512

/* Helper macros for filter block array initializers. */

/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */

#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

#define BPF_ALU64_REG(OP, DST, SRC)                                \
        BPF_ALU64_REG_OFF(OP, DST, SRC, 0)

#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_OP(OP) | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

#define BPF_ALU32_REG(OP, DST, SRC)                                \
        BPF_ALU32_REG_OFF(OP, DST, SRC, 0)

/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */

#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,        \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })
#define BPF_ALU64_IMM(OP, DST, IMM)                                \
        BPF_ALU64_IMM_OFF(OP, DST, IMM, 0)

#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_OP(OP) | BPF_K,                \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })
#define BPF_ALU32_IMM(OP, DST, IMM)                                \
        BPF_ALU32_IMM_OFF(OP, DST, IMM, 0)

/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */

#define BPF_ENDIAN(TYPE, DST, LEN)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),        \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = LEN })

/* Byte Swap, bswap16/32/64 */

#define BPF_BSWAP(DST, LEN)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE),        \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = LEN })

/* Short form of mov, dst_reg = src_reg */

#define BPF_MOV64_REG(DST, SRC)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_MOV | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = 0 })

#define BPF_MOV32_REG(DST, SRC)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_MOV | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = 0 })

/* Short form of mov, dst_reg = imm32 */

#define BPF_MOV64_IMM(DST, IMM)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_MOV | BPF_K,                \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

#define BPF_MOV32_IMM(DST, IMM)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_MOV | BPF_K,                \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */

#define BPF_MOVSX64_REG(DST, SRC, OFF)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_MOV | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

#define BPF_MOVSX32_REG(DST, SRC, OFF)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_MOV | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Special form of mov32, used for doing explicit zero extension on dst. */
#define BPF_ZEXT_REG(DST)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_MOV | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = DST,                                        \
                .off   = 0,                                        \
                .imm   = 1 })

static inline bool insn_is_zext(const struct bpf_insn *insn)
{
        return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
}

/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
#define BPF_LD_IMM64(DST, IMM)                                        \
        BPF_LD_IMM64_RAW(DST, 0, IMM)

#define BPF_LD_IMM64_RAW(DST, SRC, IMM)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_LD | BPF_DW | BPF_IMM,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = (__u32) (IMM) }),                        \
        ((struct bpf_insn) {                                        \
                .code  = 0, /* zero is reserved opcode */        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = ((__u64) (IMM)) >> 32 })

/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
#define BPF_LD_MAP_FD(DST, MAP_FD)                                \
        BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)

/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */

#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */

#define BPF_LD_ABS(SIZE, IMM)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */

#define BPF_LD_IND(SIZE, SRC, IMM)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,        \
                .dst_reg = 0,                                        \
                .src_reg = SRC,                                        \
                .off   = 0,                                        \
                .imm   = IMM })

/* Memory load, dst_reg = *(uint *) (src_reg + off16) */

#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */

#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Memory store, *(uint *) (dst_reg + off16) = src_reg */

#define BPF_STX_MEM(SIZE, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })


/*
 * Atomic operations:
 *
 *   BPF_ADD                  *(uint *) (dst_reg + off16) += src_reg
 *   BPF_AND                  *(uint *) (dst_reg + off16) &= src_reg
 *   BPF_OR                   *(uint *) (dst_reg + off16) |= src_reg
 *   BPF_XOR                  *(uint *) (dst_reg + off16) ^= src_reg
 *   BPF_ADD | BPF_FETCH      src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
 *   BPF_AND | BPF_FETCH      src_reg = atomic_fetch_and(dst_reg + off16, src_reg);
 *   BPF_OR | BPF_FETCH       src_reg = atomic_fetch_or(dst_reg + off16, src_reg);
 *   BPF_XOR | BPF_FETCH      src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
 *   BPF_XCHG                 src_reg = atomic_xchg(dst_reg + off16, src_reg)
 *   BPF_CMPXCHG              r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
 */

#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = OP })

/* Legacy alias */
#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)

/* Memory store, *(uint *) (dst_reg + off16) = imm32 */

#define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,        \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })

/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */

#define BPF_JMP_REG(OP, DST, SRC, OFF)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_OP(OP) | BPF_X,                \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */

#define BPF_JMP_IMM(OP, DST, IMM, OFF)                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_OP(OP) | BPF_K,                \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })

/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */

#define BPF_JMP32_REG(OP, DST, SRC, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */

#define BPF_JMP32_IMM(OP, DST, IMM, OFF)                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,        \
                .dst_reg = DST,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })

/* Unconditional jumps, goto pc + off16 */

#define BPF_JMP_A(OFF)                                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_JA,                        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = OFF,                                        \
                .imm   = 0 })

/* Relative call */

#define BPF_CALL_REL(TGT)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_CALL,                        \
                .dst_reg = 0,                                        \
                .src_reg = BPF_PSEUDO_CALL,                        \
                .off   = 0,                                        \
                .imm   = TGT })

/* Convert function address to BPF immediate */

#define BPF_CALL_IMM(x)        ((void *)(x) - (void *)__bpf_call_base)

#define BPF_EMIT_CALL(FUNC)                                        \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_CALL,                        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = BPF_CALL_IMM(FUNC) })

/* Raw code statement block */

#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)                        \
        ((struct bpf_insn) {                                        \
                .code  = CODE,                                        \
                .dst_reg = DST,                                        \
                .src_reg = SRC,                                        \
                .off   = OFF,                                        \
                .imm   = IMM })

/* Program exit */

#define BPF_EXIT_INSN()                                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_JMP | BPF_EXIT,                        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = 0 })

/* Speculation barrier */

#define BPF_ST_NOSPEC()                                                \
        ((struct bpf_insn) {                                        \
                .code  = BPF_ST | BPF_NOSPEC,                        \
                .dst_reg = 0,                                        \
                .src_reg = 0,                                        \
                .off   = 0,                                        \
                .imm   = 0 })

/* Internal classic blocks for direct assignment */

#define __BPF_STMT(CODE, K)                                        \
        ((struct sock_filter) BPF_STMT(CODE, K))

#define __BPF_JUMP(CODE, K, JT, JF)                                \
        ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF))

#define bytes_to_bpf_size(bytes)                                \
({                                                                \
        int bpf_size = -EINVAL;                                        \
                                                                \
        if (bytes == sizeof(u8))                                \
                bpf_size = BPF_B;                                \
        else if (bytes == sizeof(u16))                                \
                bpf_size = BPF_H;                                \
        else if (bytes == sizeof(u32))                                \
                bpf_size = BPF_W;                                \
        else if (bytes == sizeof(u64))                                \
                bpf_size = BPF_DW;                                \
                                                                \
        bpf_size;                                                \
})

#define bpf_size_to_bytes(bpf_size)                                \
({                                                                \
        int bytes = -EINVAL;                                        \
                                                                \
        if (bpf_size == BPF_B)                                        \
                bytes = sizeof(u8);                                \
        else if (bpf_size == BPF_H)                                \
                bytes = sizeof(u16);                                \
        else if (bpf_size == BPF_W)                                \
                bytes = sizeof(u32);                                \
        else if (bpf_size == BPF_DW)                                \
                bytes = sizeof(u64);                                \
                                                                \
        bytes;                                                        \
})

#define BPF_SIZEOF(type)                                        \
        ({                                                        \
                const int __size = bytes_to_bpf_size(sizeof(type)); \
                BUILD_BUG_ON(__size < 0);                        \
                __size;                                                \
        })

#define BPF_FIELD_SIZEOF(type, field)                                \
        ({                                                        \
                const int __size = bytes_to_bpf_size(sizeof_field(type, field)); \
                BUILD_BUG_ON(__size < 0);                        \
                __size;                                                \
        })

#define BPF_LDST_BYTES(insn)                                        \
        ({                                                        \
                const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
                WARN_ON(__size < 0);                                \
                __size;                                                \
        })

#define __BPF_MAP_0(m, v, ...) v
#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__)
#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__)
#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__)

#define __BPF_REG_0(...) __BPF_PAD(5)
#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4)
#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3)
#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2)
#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1)
#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__)

#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__)
#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__)

#define __BPF_CAST(t, a)                                                       \
        (__force t)                                                               \
        (__force                                                               \
         typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long),      \
                                      (unsigned long)0, (t)0))) a
#define __BPF_V void
#define __BPF_N

#define __BPF_DECL_ARGS(t, a) t   a
#define __BPF_DECL_REGS(t, a) u64 a

#define __BPF_PAD(n)                                                               \
        __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2,       \
                  u64, __ur_3, u64, __ur_4, u64, __ur_5)

#define BPF_CALL_x(x, attr, name, ...)                                               \
        static __always_inline                                                       \
        u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
        typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
        attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));    \
        attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))     \
        {                                                                       \
                return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
        }                                                                       \
        static __always_inline                                                       \
        u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))

#define __NOATTR
#define BPF_CALL_0(name, ...)        BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__)
#define BPF_CALL_1(name, ...)        BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__)
#define BPF_CALL_2(name, ...)        BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__)
#define BPF_CALL_3(name, ...)        BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__)
#define BPF_CALL_4(name, ...)        BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__)
#define BPF_CALL_5(name, ...)        BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__)

#define NOTRACE_BPF_CALL_1(name, ...)        BPF_CALL_x(1, notrace, name, __VA_ARGS__)

#define bpf_ctx_range(TYPE, MEMBER)                                                \
        offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                                \
        offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
#if BITS_PER_LONG == 64
# define bpf_ctx_range_ptr(TYPE, MEMBER)                                        \
        offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
#else
# define bpf_ctx_range_ptr(TYPE, MEMBER)                                        \
        offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
#endif /* BITS_PER_LONG == 64 */

#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)                                \
        ({                                                                        \
                BUILD_BUG_ON(sizeof_field(TYPE, MEMBER) != (SIZE));                \
                *(PTR_SIZE) = (SIZE);                                                \
                offsetof(TYPE, MEMBER);                                                \
        })

/* A struct sock_filter is architecture independent. */
struct compat_sock_fprog {
        u16                len;
        compat_uptr_t        filter;        /* struct sock_filter * */
};

struct sock_fprog_kern {
        u16                        len;
        struct sock_filter        *filter;
};

/* Some arches need doubleword alignment for their instructions and/or data */
#define BPF_IMAGE_ALIGNMENT 8

struct bpf_binary_header {
        u32 size;
        u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};

struct bpf_prog_stats {
        u64_stats_t cnt;
        u64_stats_t nsecs;
        u64_stats_t misses;
        struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));

struct sk_filter {
        refcount_t        refcnt;
        struct rcu_head        rcu;
        struct bpf_prog        *prog;
};

DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);

extern struct mutex nf_conn_btf_access_lock;
extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
                                     const struct bpf_reg_state *reg,
                                     int off, int size);

typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
                                          const struct bpf_insn *insnsi,
                                          unsigned int (*bpf_func)(const void *,
                                                                   const struct bpf_insn *));

static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
                                          const void *ctx,
                                          bpf_dispatcher_fn dfunc)
{
        u32 ret;

        cant_migrate();
        if (static_branch_unlikely(&bpf_stats_enabled_key)) {
                struct bpf_prog_stats *stats;
                u64 start = sched_clock();
                unsigned long flags;

                ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
                stats = this_cpu_ptr(prog->stats);
                flags = u64_stats_update_begin_irqsave(&stats->syncp);
                u64_stats_inc(&stats->cnt);
                u64_stats_add(&stats->nsecs, sched_clock() - start);
                u64_stats_update_end_irqrestore(&stats->syncp, flags);
        } else {
                ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
        }
        return ret;
}

static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void *ctx)
{
        return __bpf_prog_run(prog, ctx, bpf_dispatcher_nop_func);
}

/*
 * Use in preemptible and therefore migratable context to make sure that
 * the execution of the BPF program runs on one CPU.
 *
 * This uses migrate_disable/enable() explicitly to document that the
 * invocation of a BPF program does not require reentrancy protection
 * against a BPF program which is invoked from a preempting task.
 */
static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
                                          const void *ctx)
{
        u32 ret;

        migrate_disable();
        ret = bpf_prog_run(prog, ctx);
        migrate_enable();
        return ret;
}

#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

struct bpf_skb_data_end {
        struct qdisc_skb_cb qdisc_cb;
        void *data_meta;
        void *data_end;
};

struct bpf_nh_params {
        u32 nh_family;
        union {
                u32 ipv4_nh;
                struct in6_addr ipv6_nh;
        };
};

struct bpf_redirect_info {
        u64 tgt_index;
        void *tgt_value;
        struct bpf_map *map;
        u32 flags;
        u32 kern_flags;
        u32 map_id;
        enum bpf_map_type map_type;
        struct bpf_nh_params nh;
};

DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);

/* flags for bpf_redirect_info kern_flags */
#define BPF_RI_F_RF_NO_DIRECT        BIT(0)        /* no napi_direct on return_frame */

/* Compute the linear packet data range [data, data_end) which
 * will be accessed by various program types (cls_bpf, act_bpf,
 * lwt, ...). Subsystems allowing direct data access must (!)
 * ensure that cb[] area can be written to when BPF program is
 * invoked (otherwise cb[] save/restore is necessary).
 */
static inline void bpf_compute_data_pointers(struct sk_buff *skb)
{
        struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

        BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
        cb->data_meta = skb->data - skb_metadata_len(skb);
        cb->data_end  = skb->data + skb_headlen(skb);
}

/* Similar to bpf_compute_data_pointers(), except that save orginal
 * data in cb->data and cb->meta_data for restore.
 */
static inline void bpf_compute_and_save_data_end(
        struct sk_buff *skb, void **saved_data_end)
{
        struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

        *saved_data_end = cb->data_end;
        cb->data_end  = skb->data + skb_headlen(skb);
}

/* Restore data saved by bpf_compute_and_save_data_end(). */
static inline void bpf_restore_data_end(
        struct sk_buff *skb, void *saved_data_end)
{
        struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

        cb->data_end = saved_data_end;
}

static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
{
        /* eBPF programs may read/write skb->cb[] area to transfer meta
         * data between tail calls. Since this also needs to work with
         * tc, that scratch memory is mapped to qdisc_skb_cb's data area.
         *
         * In some socket filter cases, the cb unfortunately needs to be
         * saved/restored so that protocol specific skb->cb[] data won't
         * be lost. In any case, due to unpriviledged eBPF programs
         * attached to sockets, we need to clear the bpf_skb_cb() area
         * to not leak previous contents to user space.
         */
        BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
        BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) !=
                     sizeof_field(struct qdisc_skb_cb, data));

        return qdisc_skb_cb(skb)->data;
}

/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
                                         const void *ctx)
{
        const struct sk_buff *skb = ctx;
        u8 *cb_data = bpf_skb_cb(skb);
        u8 cb_saved[BPF_SKB_CB_LEN];
        u32 res;

        if (unlikely(prog->cb_access)) {
                memcpy(cb_saved, cb_data, sizeof(cb_saved));
                memset(cb_data, 0, sizeof(cb_saved));
        }

        res = bpf_prog_run(prog, skb);

        if (unlikely(prog->cb_access))
                memcpy(cb_data, cb_saved, sizeof(cb_saved));

        return res;
}

static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
                                       struct sk_buff *skb)
{
        u32 res;

        migrate_disable();
        res = __bpf_prog_run_save_cb(prog, skb);
        migrate_enable();
        return res;
}

static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
                                        struct sk_buff *skb)
{
        u8 *cb_data = bpf_skb_cb(skb);
        u32 res;

        if (unlikely(prog->cb_access))
                memset(cb_data, 0, BPF_SKB_CB_LEN);

        res = bpf_prog_run_pin_on_cpu(prog, skb);
        return res;
}

DECLARE_BPF_DISPATCHER(xdp)

DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);

u32 xdp_master_redirect(struct xdp_buff *xdp);

void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);

static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
{
        return prog->len * sizeof(struct bpf_insn);
}

static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
{
        return round_up(bpf_prog_insn_size(prog) +
                        sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
}

static inline unsigned int bpf_prog_size(unsigned int proglen)
{
        return max(sizeof(struct bpf_prog),
                   offsetof(struct bpf_prog, insns[proglen]));
}

static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
{
        /* When classic BPF programs have been loaded and the arch
         * does not have a classic BPF JIT (anymore), they have been
         * converted via bpf_migrate_filter() to eBPF and thus always
         * have an unspec program type.
         */
        return prog->type == BPF_PROG_TYPE_UNSPEC;
}

static inline u32 bpf_ctx_off_adjust_machine(u32 size)
{
        const u32 size_machine = sizeof(unsigned long);

        if (size > size_machine && size % size_machine == 0)
                size = size_machine;

        return size;
}

static inline bool
bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
{
        return size <= size_default && (size & (size - 1)) == 0;
}

static inline u8
bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
{
        u8 access_off = off & (size_default - 1);

#ifdef __LITTLE_ENDIAN
        return access_off;
#else
        return size_default - (access_off + size);
#endif
}

#define bpf_ctx_wide_access_ok(off, size, type, field)                        \
        (size == sizeof(__u64) &&                                        \
        off >= offsetof(type, field) &&                                        \
        off + sizeof(__u64) <= offsetofend(type, field) &&                \
        off % sizeof(__u64) == 0)

#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))

static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
        if (!fp->jited) {
                set_vm_flush_reset_perms(fp);
                set_memory_ro((unsigned long)fp, fp->pages);
        }
#endif
}

static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
        set_vm_flush_reset_perms(hdr);
        set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
}

int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
        return sk_filter_trim_cap(sk, skb, 1);
}

struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
void bpf_prog_free(struct bpf_prog *fp);

bool bpf_opcode_in_insntable(u8 code);

void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
                               const u32 *insn_to_jit_off);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
void bpf_prog_jit_attempt_done(struct bpf_prog *prog);

struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
                                  gfp_t gfp_extra_flags);
void __bpf_prog_free(struct bpf_prog *fp);

static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
{
        __bpf_prog_free(fp);
}

typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
                                       unsigned int flen);

int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
                              bpf_aux_classic_check_t trans, bool save_orig);
void bpf_prog_destroy(struct bpf_prog *fp);

int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
void sk_reuseport_prog_free(struct bpf_prog *prog);
int sk_detach_filter(struct sock *sk);
int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);

bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);

u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#define __bpf_call_base_args \
        ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
         (void *)__bpf_call_base)

struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);

static inline bool bpf_dump_raw_ok(const struct cred *cred)
{
        /* Reconstruction of call-sites is dependent on kallsyms,
         * thus make dump the same restriction.
         */
        return kallsyms_show_value(cred);
}

struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                       const struct bpf_insn *patch, u32 len);
int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);

void bpf_clear_redirect_map(struct bpf_map *map);

static inline bool xdp_return_frame_no_direct(void)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
}

static inline void xdp_set_return_frame_no_direct(void)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
}

static inline void xdp_clear_return_frame_no_direct(void)
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

        ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
}

static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
                                 unsigned int pktlen)
{
        unsigned int len;

        if (unlikely(!(fwd->flags & IFF_UP)))
                return -ENETDOWN;

        len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
        if (pktlen > len)
                return -EMSGSIZE;

        return 0;
}

/* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the
 * same cpu context. Further for best results no more than a single map
 * for the do_redirect/do_flush pair should be used. This limitation is
 * because we only track one map and force a flush when the map changes.
 * This does not appear to be a real limitation for existing software.
 */
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
                            struct xdp_buff *xdp, struct bpf_prog *prog);
int xdp_do_redirect(struct net_device *dev,
                    struct xdp_buff *xdp,
                    struct bpf_prog *prog);
int xdp_do_redirect_frame(struct net_device *dev,
                          struct xdp_buff *xdp,
                          struct xdp_frame *xdpf,
                          struct bpf_prog *prog);
void xdp_do_flush(void);

void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act);

#ifdef CONFIG_INET
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                  struct bpf_prog *prog, struct sk_buff *skb,
                                  struct sock *migrating_sk,
                                  u32 hash);
#else
static inline struct sock *
bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                     struct bpf_prog *prog, struct sk_buff *skb,
                     struct sock *migrating_sk,
                     u32 hash)
{
        return NULL;
}
#endif

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
extern int bpf_jit_kallsyms;
extern long bpf_jit_limit;
extern long bpf_jit_limit_max;

typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);

void bpf_jit_fill_hole_with_zero(void *area, unsigned int size);

struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
                     unsigned int alignment,
                     bpf_jit_fill_hole_t bpf_fill_ill_insns);
void bpf_jit_binary_free(struct bpf_binary_header *hdr);
u64 bpf_jit_alloc_exec_limit(void);
void *bpf_jit_alloc_exec(unsigned long size);
void bpf_jit_free_exec(void *addr);
void bpf_jit_free(struct bpf_prog *fp);
struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);

void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
void bpf_prog_pack_free(void *ptr, u32 size);

static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
        return list_empty(&fp->aux->ksym.lnode) ||
               fp->aux->ksym.lnode.prev == LIST_POISON2;
}

struct bpf_binary_header *
bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
                          unsigned int alignment,
                          struct bpf_binary_header **rw_hdr,
                          u8 **rw_image,
                          bpf_jit_fill_hole_t bpf_fill_ill_insns);
int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
                                 struct bpf_binary_header *ro_header,
                                 struct bpf_binary_header *rw_header);
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
                              struct bpf_binary_header *rw_header);

int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
                                struct bpf_jit_poke_descriptor *poke);

int bpf_jit_get_func_addr(const struct bpf_prog *prog,
                          const struct bpf_insn *insn, bool extra_pass,
                          u64 *func_addr, bool *func_addr_fixed);

struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);

static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
                                u32 pass, void *image)
{
        pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
               proglen, pass, image, current->comm, task_pid_nr(current));

        if (image)
                print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
                               16, 1, image, proglen, false);
}

static inline bool bpf_jit_is_ebpf(void)
{
# ifdef CONFIG_HAVE_EBPF_JIT
        return true;
# else
        return false;
# endif
}

static inline bool ebpf_jit_enabled(void)
{
        return bpf_jit_enable && bpf_jit_is_ebpf();
}

static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
{
        return fp->jited && bpf_jit_is_ebpf();
}

static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
{
        /* These are the prerequisites, should someone ever have the
         * idea to call blinding outside of them, we make sure to
         * bail out.
         */
        if (!bpf_jit_is_ebpf())
                return false;
        if (!prog->jit_requested)
                return false;
        if (!bpf_jit_harden)
                return false;
        if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF))
                return false;

        return true;
}

static inline bool bpf_jit_kallsyms_enabled(void)
{
        /* There are a couple of corner cases where kallsyms should
         * not be enabled f.e. on hardening.
         */
        if (bpf_jit_harden)
                return false;
        if (!bpf_jit_kallsyms)
                return false;
        if (bpf_jit_kallsyms == 1)
                return true;

        return false;
}

const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
                                 unsigned long *off, char *sym);
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                    char *sym);
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);

static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
                   unsigned long *off, char **modname, char *sym)
{
        const char *ret = __bpf_address_lookup(addr, size, off, sym);

        if (ret && modname)
                *modname = NULL;
        return ret;
}

void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);

#else /* CONFIG_BPF_JIT */

static inline bool ebpf_jit_enabled(void)
{
        return false;
}

static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
{
        return false;
}

static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
{
        return false;
}

static inline int
bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
                            struct bpf_jit_poke_descriptor *poke)
{
        return -ENOTSUPP;
}

static inline void bpf_jit_free(struct bpf_prog *fp)
{
        bpf_prog_unlock_free(fp);
}

static inline bool bpf_jit_kallsyms_enabled(void)
{
        return false;
}

static inline const char *
__bpf_address_lookup(unsigned long addr, unsigned long *size,
                     unsigned long *off, char *sym)
{
        return NULL;
}

static inline bool is_bpf_text_address(unsigned long addr)
{
        return false;
}

static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
                                  char *type, char *sym)
{
        return -ERANGE;
}

static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
{
        return NULL;
}

static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
                   unsigned long *off, char **modname, char *sym)
{
        return NULL;
}

static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
}

static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
{
}

#endif /* CONFIG_BPF_JIT */

void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);

#define BPF_ANC                BIT(15)

static inline bool bpf_needs_clear_a(const struct sock_filter *first)
{
        switch (first->code) {
        case BPF_RET | BPF_K:
        case BPF_LD | BPF_W | BPF_LEN:
                return false;

        case BPF_LD | BPF_W | BPF_ABS:
        case BPF_LD | BPF_H | BPF_ABS:
        case BPF_LD | BPF_B | BPF_ABS:
                if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
                        return true;
                return false;

        default:
                return true;
        }
}

static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
{
        BUG_ON(ftest->code & BPF_ANC);

        switch (ftest->code) {
        case BPF_LD | BPF_W | BPF_ABS:
        case BPF_LD | BPF_H | BPF_ABS:
        case BPF_LD | BPF_B | BPF_ABS:
#define BPF_ANCILLARY(CODE)        case SKF_AD_OFF + SKF_AD_##CODE:        \
                                return BPF_ANC | SKF_AD_##CODE
                switch (ftest->k) {
                BPF_ANCILLARY(PROTOCOL);
                BPF_ANCILLARY(PKTTYPE);
                BPF_ANCILLARY(IFINDEX);
                BPF_ANCILLARY(NLATTR);
                BPF_ANCILLARY(NLATTR_NEST);
                BPF_ANCILLARY(MARK);
                BPF_ANCILLARY(QUEUE);
                BPF_ANCILLARY(HATYPE);
                BPF_ANCILLARY(RXHASH);
                BPF_ANCILLARY(CPU);
                BPF_ANCILLARY(ALU_XOR_X);
                BPF_ANCILLARY(VLAN_TAG);
                BPF_ANCILLARY(VLAN_TAG_PRESENT);
                BPF_ANCILLARY(PAY_OFFSET);
                BPF_ANCILLARY(RANDOM);
                BPF_ANCILLARY(VLAN_TPID);
                }
                fallthrough;
        default:
                return ftest->code;
        }
}

void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
                                           int k, unsigned int size);

static inline int bpf_tell_extensions(void)
{
        return SKF_AD_MAX;
}

struct bpf_sock_addr_kern {
        struct sock *sk;
        struct sockaddr *uaddr;
        /* Temporary "register" to make indirect stores to nested structures
         * defined above. We need three registers to make such a store, but
         * only two (src and dst) are available at convert_ctx_access time
         */
        u64 tmp_reg;
        void *t_ctx;        /* Attach type specific context. */
        u32 uaddrlen;
};

struct bpf_sock_ops_kern {
        struct        sock *sk;
        union {
                u32 args[4];
                u32 reply;
                u32 replylong[4];
        };
        struct sk_buff        *syn_skb;
        struct sk_buff        *skb;
        void        *skb_data_end;
        u8        op;
        u8        is_fullsock;
        u8        remaining_opt_len;
        u64        temp;                        /* temp and everything after is not
                                         * initialized to 0 before calling
                                         * the BPF program. New fields that
                                         * should be initialized to 0 should
                                         * be inserted before temp.
                                         * temp is scratch storage used by
                                         * sock_ops_convert_ctx_access
                                         * as temporary storage of a register.
                                         */
};

struct bpf_sysctl_kern {
        struct ctl_table_header *head;
        struct ctl_table *table;
        void *cur_val;
        size_t cur_len;
        void *new_val;
        size_t new_len;
        int new_updated;
        int write;
        loff_t *ppos;
        /* Temporary "register" for indirect stores to ppos. */
        u64 tmp_reg;
};

#define BPF_SOCKOPT_KERN_BUF_SIZE        32
struct bpf_sockopt_buf {
        u8                data[BPF_SOCKOPT_KERN_BUF_SIZE];
};

struct bpf_sockopt_kern {
        struct sock        *sk;
        u8                *optval;
        u8                *optval_end;
        s32                level;
        s32                optname;
        s32                optlen;
        /* for retval in struct bpf_cg_run_ctx */
        struct task_struct *current_task;
        /* Temporary "register" for indirect stores to ppos. */
        u64                tmp_reg;
};

int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);

struct bpf_sk_lookup_kern {
        u16                family;
        u16                protocol;
        __be16                sport;
        u16                dport;
        struct {
                __be32 saddr;
                __be32 daddr;
        } v4;
        struct {
                const struct in6_addr *saddr;
                const struct in6_addr *daddr;
        } v6;
        struct sock        *selected_sk;
        u32                ingress_ifindex;
        bool                no_reuseport;
};

extern struct static_key_false bpf_sk_lookup_enabled;

/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup.
 *
 * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and
 * SK_DROP. Their meaning is as follows:
 *
 *  SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result
 *  SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup
 *  SK_DROP                           : terminate lookup with -ECONNREFUSED
 *
 * This macro aggregates return values and selected sockets from
 * multiple BPF programs according to following rules in order:
 *
 *  1. If any program returned SK_PASS and a non-NULL ctx.selected_sk,
 *     macro result is SK_PASS and last ctx.selected_sk is used.
 *  2. If any program returned SK_DROP return value,
 *     macro result is SK_DROP.
 *  3. Otherwise result is SK_PASS and ctx.selected_sk is NULL.
 *
 * Caller must ensure that the prog array is non-NULL, and that the
 * array as well as the programs it contains remain valid.
 */
#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func)                        \
        ({                                                                \
                struct bpf_sk_lookup_kern *_ctx = &(ctx);                \
                struct bpf_prog_array_item *_item;                        \
                struct sock *_selected_sk = NULL;                        \
                bool _no_reuseport = false;                                \
                struct bpf_prog *_prog;                                        \
                bool _all_pass = true;                                        \
                u32 _ret;                                                \
                                                                        \
                migrate_disable();                                        \
                _item = &(array)->items[0];                                \
                while ((_prog = READ_ONCE(_item->prog))) {                \
                        /* restore most recent selection */                \
                        _ctx->selected_sk = _selected_sk;                \
                        _ctx->no_reuseport = _no_reuseport;                \
                                                                        \
                        _ret = func(_prog, _ctx);                        \
                        if (_ret == SK_PASS && _ctx->selected_sk) {        \
                                /* remember last non-NULL socket */        \
                                _selected_sk = _ctx->selected_sk;        \
                                _no_reuseport = _ctx->no_reuseport;        \
                        } else if (_ret == SK_DROP && _all_pass) {        \
                                _all_pass = false;                        \
                        }                                                \
                        _item++;                                        \
                }                                                        \
                _ctx->selected_sk = _selected_sk;                        \
                _ctx->no_reuseport = _no_reuseport;                        \
                migrate_enable();                                        \
                _all_pass || _selected_sk ? SK_PASS : SK_DROP;                \
         })

static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
                                        const __be32 saddr, const __be16 sport,
                                        const __be32 daddr, const u16 dport,
                                        const int ifindex, struct sock **psk)
{
        struct bpf_prog_array *run_array;
        struct sock *selected_sk = NULL;
        bool no_reuseport = false;

        rcu_read_lock();
        run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
        if (run_array) {
                struct bpf_sk_lookup_kern ctx = {
                        .family                = AF_INET,
                        .protocol        = protocol,
                        .v4.saddr        = saddr,
                        .v4.daddr        = daddr,
                        .sport                = sport,
                        .dport                = dport,
                        .ingress_ifindex        = ifindex,
                };
                u32 act;

                act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
                if (act == SK_PASS) {
                        selected_sk = ctx.selected_sk;
                        no_reuseport = ctx.no_reuseport;
                } else {
                        selected_sk = ERR_PTR(-ECONNREFUSED);
                }
        }
        rcu_read_unlock();
        *psk = selected_sk;
        return no_reuseport;
}

#if IS_ENABLED(CONFIG_IPV6)
static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
                                        const struct in6_addr *saddr,
                                        const __be16 sport,
                                        const struct in6_addr *daddr,
                                        const u16 dport,
                                        const int ifindex, struct sock **psk)
{
        struct bpf_prog_array *run_array;
        struct sock *selected_sk = NULL;
        bool no_reuseport = false;

        rcu_read_lock();
        run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
        if (run_array) {
                struct bpf_sk_lookup_kern ctx = {
                        .family                = AF_INET6,
                        .protocol        = protocol,
                        .v6.saddr        = saddr,
                        .v6.daddr        = daddr,
                        .sport                = sport,
                        .dport                = dport,
                        .ingress_ifindex        = ifindex,
                };
                u32 act;

                act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
                if (act == SK_PASS) {
                        selected_sk = ctx.selected_sk;
                        no_reuseport = ctx.no_reuseport;
                } else {
                        selected_sk = ERR_PTR(-ECONNREFUSED);
                }
        }
        rcu_read_unlock();
        *psk = selected_sk;
        return no_reuseport;
}
#endif /* IS_ENABLED(CONFIG_IPV6) */

static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index,
                                                   u64 flags, const u64 flag_mask,
                                                   void *lookup_elem(struct bpf_map *map, u32 key))
{
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
        const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;

        /* Lower bits of the flags are used as return code on lookup failure */
        if (unlikely(flags & ~(action_mask | flag_mask)))
                return XDP_ABORTED;

        ri->tgt_value = lookup_elem(map, index);
        if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
                /* If the lookup fails we want to clear out the state in the
                 * redirect_info struct completely, so that if an eBPF program
                 * performs multiple lookups, the last one always takes
                 * precedence.
                 */
                ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
                ri->map_type = BPF_MAP_TYPE_UNSPEC;
                return flags & action_mask;
        }

        ri->tgt_index = index;
        ri->map_id = map->id;
        ri->map_type = map->map_type;

        if (flags & BPF_F_BROADCAST) {
                WRITE_ONCE(ri->map, map);
                ri->flags = flags;
        } else {
                WRITE_ONCE(ri->map, NULL);
                ri->flags = 0;
        }

        return XDP_REDIRECT;
}

#ifdef CONFIG_NET
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
                          u32 len, u64 flags);
int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
                      void *buf, unsigned long len, bool flush);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
                                       void *to, u32 len)
{
        return -EOPNOTSUPP;
}

static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
                                        const void *from, u32 len, u64 flags)
{
        return -EOPNOTSUPP;
}

static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset,
                                       void *buf, u32 len)
{
        return -EOPNOTSUPP;
}

static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
                                        void *buf, u32 len)
{
        return -EOPNOTSUPP;
}

static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
        return NULL;
}

static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
                                    unsigned long len, bool flush)
{
}
#endif /* CONFIG_NET */

#endif /* __LINUX_FILTER_H__ */




























































































































































































































































































































































































































































































































































































































































































































































































































    1 




















    1 





























































    2 


    1 


    1 









    2 






    1 























    1 











    1 








    1 












    1 










    1 















    2 
    2 
    1 









    1 



    1 




    1 





    1 
    1 

    1 
    1 



    1 




   53 
    1 
    1 
    1 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/fcntl.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/syscalls.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/sched/task.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/capability.h>
#include <linux/dnotify.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/pipe_fs_i.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/memfd.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/rw_hint.h>

#include <linux/poll.h>
#include <asm/siginfo.h>
#include <linux/uaccess.h>

#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)

static int setfl(int fd, struct file * filp, unsigned int arg)
{
        struct inode * inode = file_inode(filp);
        int error = 0;

        /*
         * O_APPEND cannot be cleared if the file is marked as append-only
         * and the file is open for write.
         */
        if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
                return -EPERM;

        /* O_NOATIME can only be set by the owner or superuser */
        if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
                if (!inode_owner_or_capable(file_mnt_idmap(filp), inode))
                        return -EPERM;

        /* required for strict SunOS emulation */
        if (O_NONBLOCK != O_NDELAY)
               if (arg & O_NDELAY)
                   arg |= O_NONBLOCK;

        /* Pipe packetized mode is controlled by O_DIRECT flag */
        if (!S_ISFIFO(inode->i_mode) &&
            (arg & O_DIRECT) &&
            !(filp->f_mode & FMODE_CAN_ODIRECT))
                return -EINVAL;

        if (filp->f_op->check_flags)
                error = filp->f_op->check_flags(arg);
        if (error)
                return error;

        /*
         * ->fasync() is responsible for setting the FASYNC bit.
         */
        if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
                error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
                if (error < 0)
                        goto out;
                if (error > 0)
                        error = 0;
        }
        spin_lock(&filp->f_lock);
        filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
        filp->f_iocb_flags = iocb_flags(filp);
        spin_unlock(&filp->f_lock);

 out:
        return error;
}

static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                     int force)
{
        write_lock_irq(&filp->f_owner.lock);
        if (force || !filp->f_owner.pid) {
                put_pid(filp->f_owner.pid);
                filp->f_owner.pid = get_pid(pid);
                filp->f_owner.pid_type = type;

                if (pid) {
                        const struct cred *cred = current_cred();
                        filp->f_owner.uid = cred->uid;
                        filp->f_owner.euid = cred->euid;
                }
        }
        write_unlock_irq(&filp->f_owner.lock);
}

void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
                int force)
{
        security_file_set_fowner(filp);
        f_modown(filp, pid, type, force);
}
EXPORT_SYMBOL(__f_setown);

int f_setown(struct file *filp, int who, int force)
{
        enum pid_type type;
        struct pid *pid = NULL;
        int ret = 0;

        type = PIDTYPE_TGID;
        if (who < 0) {
                /* avoid overflow below */
                if (who == INT_MIN)
                        return -EINVAL;

                type = PIDTYPE_PGID;
                who = -who;
        }

        rcu_read_lock();
        if (who) {
                pid = find_vpid(who);
                if (!pid)
                        ret = -ESRCH;
        }

        if (!ret)
                __f_setown(filp, pid, type, force);
        rcu_read_unlock();

        return ret;
}
EXPORT_SYMBOL(f_setown);

void f_delown(struct file *filp)
{
        f_modown(filp, NULL, PIDTYPE_TGID, 1);
}

pid_t f_getown(struct file *filp)
{
        pid_t pid = 0;

        read_lock_irq(&filp->f_owner.lock);
        rcu_read_lock();
        if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
                pid = pid_vnr(filp->f_owner.pid);
                if (filp->f_owner.pid_type == PIDTYPE_PGID)
                        pid = -pid;
        }
        rcu_read_unlock();
        read_unlock_irq(&filp->f_owner.lock);
        return pid;
}

static int f_setown_ex(struct file *filp, unsigned long arg)
{
        struct f_owner_ex __user *owner_p = (void __user *)arg;
        struct f_owner_ex owner;
        struct pid *pid;
        int type;
        int ret;

        ret = copy_from_user(&owner, owner_p, sizeof(owner));
        if (ret)
                return -EFAULT;

        switch (owner.type) {
        case F_OWNER_TID:
                type = PIDTYPE_PID;
                break;

        case F_OWNER_PID:
                type = PIDTYPE_TGID;
                break;

        case F_OWNER_PGRP:
                type = PIDTYPE_PGID;
                break;

        default:
                return -EINVAL;
        }

        rcu_read_lock();
        pid = find_vpid(owner.pid);
        if (owner.pid && !pid)
                ret = -ESRCH;
        else
                 __f_setown(filp, pid, type, 1);
        rcu_read_unlock();

        return ret;
}

static int f_getown_ex(struct file *filp, unsigned long arg)
{
        struct f_owner_ex __user *owner_p = (void __user *)arg;
        struct f_owner_ex owner = {};
        int ret = 0;

        read_lock_irq(&filp->f_owner.lock);
        rcu_read_lock();
        if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
                owner.pid = pid_vnr(filp->f_owner.pid);
        rcu_read_unlock();
        switch (filp->f_owner.pid_type) {
        case PIDTYPE_PID:
                owner.type = F_OWNER_TID;
                break;

        case PIDTYPE_TGID:
                owner.type = F_OWNER_PID;
                break;

        case PIDTYPE_PGID:
                owner.type = F_OWNER_PGRP;
                break;

        default:
                WARN_ON(1);
                ret = -EINVAL;
                break;
        }
        read_unlock_irq(&filp->f_owner.lock);

        if (!ret) {
                ret = copy_to_user(owner_p, &owner, sizeof(owner));
                if (ret)
                        ret = -EFAULT;
        }
        return ret;
}

#ifdef CONFIG_CHECKPOINT_RESTORE
static int f_getowner_uids(struct file *filp, unsigned long arg)
{
        struct user_namespace *user_ns = current_user_ns();
        uid_t __user *dst = (void __user *)arg;
        uid_t src[2];
        int err;

        read_lock_irq(&filp->f_owner.lock);
        src[0] = from_kuid(user_ns, filp->f_owner.uid);
        src[1] = from_kuid(user_ns, filp->f_owner.euid);
        read_unlock_irq(&filp->f_owner.lock);

        err  = put_user(src[0], &dst[0]);
        err |= put_user(src[1], &dst[1]);

        return err;
}
#else
static int f_getowner_uids(struct file *filp, unsigned long arg)
{
        return -EINVAL;
}
#endif

static bool rw_hint_valid(u64 hint)
{
        BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
        BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
        BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
        BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
        BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
        BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);

        switch (hint) {
        case RWH_WRITE_LIFE_NOT_SET:
        case RWH_WRITE_LIFE_NONE:
        case RWH_WRITE_LIFE_SHORT:
        case RWH_WRITE_LIFE_MEDIUM:
        case RWH_WRITE_LIFE_LONG:
        case RWH_WRITE_LIFE_EXTREME:
                return true;
        default:
                return false;
        }
}

static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
                              unsigned long arg)
{
        struct inode *inode = file_inode(file);
        u64 __user *argp = (u64 __user *)arg;
        u64 hint = READ_ONCE(inode->i_write_hint);

        if (copy_to_user(argp, &hint, sizeof(*argp)))
                return -EFAULT;
        return 0;
}

static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
                              unsigned long arg)
{
        struct inode *inode = file_inode(file);
        u64 __user *argp = (u64 __user *)arg;
        u64 hint;

        if (copy_from_user(&hint, argp, sizeof(hint)))
                return -EFAULT;
        if (!rw_hint_valid(hint))
                return -EINVAL;

        WRITE_ONCE(inode->i_write_hint, hint);

        /*
         * file->f_mapping->host may differ from inode. As an example,
         * blkdev_open() modifies file->f_mapping.
         */
        if (file->f_mapping->host != inode)
                WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);

        return 0;
}

static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                struct file *filp)
{
        void __user *argp = (void __user *)arg;
        int argi = (int)arg;
        struct flock flock;
        long err = -EINVAL;

        switch (cmd) {
        case F_DUPFD:
                err = f_dupfd(argi, filp, 0);
                break;
        case F_DUPFD_CLOEXEC:
                err = f_dupfd(argi, filp, O_CLOEXEC);
                break;
        case F_GETFD:
                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
                break;
        case F_SETFD:
                err = 0;
                set_close_on_exec(fd, argi & FD_CLOEXEC);
                break;
        case F_GETFL:
                err = filp->f_flags;
                break;
        case F_SETFL:
                err = setfl(fd, filp, argi);
                break;
#if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
        case F_OFD_GETLK:
#endif
        case F_GETLK:
                if (copy_from_user(&flock, argp, sizeof(flock)))
                        return -EFAULT;
                err = fcntl_getlk(filp, cmd, &flock);
                if (!err && copy_to_user(argp, &flock, sizeof(flock)))
                        return -EFAULT;
                break;
#if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
        case F_OFD_SETLK:
        case F_OFD_SETLKW:
                fallthrough;
#endif
        case F_SETLK:
        case F_SETLKW:
                if (copy_from_user(&flock, argp, sizeof(flock)))
                        return -EFAULT;
                err = fcntl_setlk(fd, filp, cmd, &flock);
                break;
        case F_GETOWN:
                /*
                 * XXX If f_owner is a process group, the
                 * negative return value will get converted
                 * into an error.  Oops.  If we keep the
                 * current syscall conventions, the only way
                 * to fix this will be in libc.
                 */
                err = f_getown(filp);
                force_successful_syscall_return();
                break;
        case F_SETOWN:
                err = f_setown(filp, argi, 1);
                break;
        case F_GETOWN_EX:
                err = f_getown_ex(filp, arg);
                break;
        case F_SETOWN_EX:
                err = f_setown_ex(filp, arg);
                break;
        case F_GETOWNER_UIDS:
                err = f_getowner_uids(filp, arg);
                break;
        case F_GETSIG:
                err = filp->f_owner.signum;
                break;
        case F_SETSIG:
                /* arg == 0 restores default behaviour. */
                if (!valid_signal(argi)) {
                        break;
                }
                err = 0;
                filp->f_owner.signum = argi;
                break;
        case F_GETLEASE:
                err = fcntl_getlease(filp);
                break;
        case F_SETLEASE:
                err = fcntl_setlease(fd, filp, argi);
                break;
        case F_NOTIFY:
                err = fcntl_dirnotify(fd, filp, argi);
                break;
        case F_SETPIPE_SZ:
        case F_GETPIPE_SZ:
                err = pipe_fcntl(filp, cmd, argi);
                break;
        case F_ADD_SEALS:
        case F_GET_SEALS:
                err = memfd_fcntl(filp, cmd, argi);
                break;
        case F_GET_RW_HINT:
                err = fcntl_get_rw_hint(filp, cmd, arg);
                break;
        case F_SET_RW_HINT:
                err = fcntl_set_rw_hint(filp, cmd, arg);
                break;
        default:
                break;
        }
        return err;
}

static int check_fcntl_cmd(unsigned cmd)
{
        switch (cmd) {
        case F_DUPFD:
        case F_DUPFD_CLOEXEC:
        case F_GETFD:
        case F_SETFD:
        case F_GETFL:
                return 1;
        }
        return 0;
}

SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{        
        struct fd f = fdget_raw(fd);
        long err = -EBADF;

        if (!f.file)
                goto out;

        if (unlikely(f.file->f_mode & FMODE_PATH)) {
                if (!check_fcntl_cmd(cmd))
                        goto out1;
        }

        err = security_file_fcntl(f.file, cmd, arg);
        if (!err)
                err = do_fcntl(fd, cmd, arg, f.file);

out1:
         fdput(f);
out:
        return err;
}

#if BITS_PER_LONG == 32
SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
                unsigned long, arg)
{        
        void __user *argp = (void __user *)arg;
        struct fd f = fdget_raw(fd);
        struct flock64 flock;
        long err = -EBADF;

        if (!f.file)
                goto out;

        if (unlikely(f.file->f_mode & FMODE_PATH)) {
                if (!check_fcntl_cmd(cmd))
                        goto out1;
        }

        err = security_file_fcntl(f.file, cmd, arg);
        if (err)
                goto out1;
        
        switch (cmd) {
        case F_GETLK64:
        case F_OFD_GETLK:
                err = -EFAULT;
                if (copy_from_user(&flock, argp, sizeof(flock)))
                        break;
                err = fcntl_getlk64(f.file, cmd, &flock);
                if (!err && copy_to_user(argp, &flock, sizeof(flock)))
                        err = -EFAULT;
                break;
        case F_SETLK64:
        case F_SETLKW64:
        case F_OFD_SETLK:
        case F_OFD_SETLKW:
                err = -EFAULT;
                if (copy_from_user(&flock, argp, sizeof(flock)))
                        break;
                err = fcntl_setlk64(fd, f.file, cmd, &flock);
                break;
        default:
                err = do_fcntl(fd, cmd, arg, f.file);
                break;
        }
out1:
        fdput(f);
out:
        return err;
}
#endif

#ifdef CONFIG_COMPAT
/* careful - don't use anywhere else */
#define copy_flock_fields(dst, src)                \
        (dst)->l_type = (src)->l_type;                \
        (dst)->l_whence = (src)->l_whence;        \
        (dst)->l_start = (src)->l_start;        \
        (dst)->l_len = (src)->l_len;                \
        (dst)->l_pid = (src)->l_pid;

static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
{
        struct compat_flock fl;

        if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
                return -EFAULT;
        copy_flock_fields(kfl, &fl);
        return 0;
}

static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
{
        struct compat_flock64 fl;

        if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
                return -EFAULT;
        copy_flock_fields(kfl, &fl);
        return 0;
}

static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
{
        struct compat_flock fl;

        memset(&fl, 0, sizeof(struct compat_flock));
        copy_flock_fields(&fl, kfl);
        if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
                return -EFAULT;
        return 0;
}

static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
{
        struct compat_flock64 fl;

        BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
        BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));

        memset(&fl, 0, sizeof(struct compat_flock64));
        copy_flock_fields(&fl, kfl);
        if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
                return -EFAULT;
        return 0;
}
#undef copy_flock_fields

static unsigned int
convert_fcntl_cmd(unsigned int cmd)
{
        switch (cmd) {
        case F_GETLK64:
                return F_GETLK;
        case F_SETLK64:
                return F_SETLK;
        case F_SETLKW64:
                return F_SETLKW;
        }

        return cmd;
}

/*
 * GETLK was successful and we need to return the data, but it needs to fit in
 * the compat structure.
 * l_start shouldn't be too big, unless the original start + end is greater than
 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
 * -EOVERFLOW in that case.  l_len could be too big, in which case we just
 * truncate it, and only allow the app to see that part of the conflicting lock
 * that might make sense to it anyway
 */
static int fixup_compat_flock(struct flock *flock)
{
        if (flock->l_start > COMPAT_OFF_T_MAX)
                return -EOVERFLOW;
        if (flock->l_len > COMPAT_OFF_T_MAX)
                flock->l_len = COMPAT_OFF_T_MAX;
        return 0;
}

static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
                             compat_ulong_t arg)
{
        struct fd f = fdget_raw(fd);
        struct flock flock;
        long err = -EBADF;

        if (!f.file)
                return err;

        if (unlikely(f.file->f_mode & FMODE_PATH)) {
                if (!check_fcntl_cmd(cmd))
                        goto out_put;
        }

        err = security_file_fcntl(f.file, cmd, arg);
        if (err)
                goto out_put;

        switch (cmd) {
        case F_GETLK:
                err = get_compat_flock(&flock, compat_ptr(arg));
                if (err)
                        break;
                err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
                if (err)
                        break;
                err = fixup_compat_flock(&flock);
                if (!err)
                        err = put_compat_flock(&flock, compat_ptr(arg));
                break;
        case F_GETLK64:
        case F_OFD_GETLK:
                err = get_compat_flock64(&flock, compat_ptr(arg));
                if (err)
                        break;
                err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
                if (!err)
                        err = put_compat_flock64(&flock, compat_ptr(arg));
                break;
        case F_SETLK:
        case F_SETLKW:
                err = get_compat_flock(&flock, compat_ptr(arg));
                if (err)
                        break;
                err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
                break;
        case F_SETLK64:
        case F_SETLKW64:
        case F_OFD_SETLK:
        case F_OFD_SETLKW:
                err = get_compat_flock64(&flock, compat_ptr(arg));
                if (err)
                        break;
                err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
                break;
        default:
                err = do_fcntl(fd, cmd, arg, f.file);
                break;
        }
out_put:
        fdput(f);
        return err;
}

COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
                       compat_ulong_t, arg)
{
        return do_compat_fcntl64(fd, cmd, arg);
}

COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
                       compat_ulong_t, arg)
{
        switch (cmd) {
        case F_GETLK64:
        case F_SETLK64:
        case F_SETLKW64:
        case F_OFD_GETLK:
        case F_OFD_SETLK:
        case F_OFD_SETLKW:
                return -EINVAL;
        }
        return do_compat_fcntl64(fd, cmd, arg);
}
#endif

/* Table to convert sigio signal codes into poll band bitmaps */

static const __poll_t band_table[NSIGPOLL] = {
        EPOLLIN | EPOLLRDNORM,                        /* POLL_IN */
        EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,        /* POLL_OUT */
        EPOLLIN | EPOLLRDNORM | EPOLLMSG,                /* POLL_MSG */
        EPOLLERR,                                /* POLL_ERR */
        EPOLLPRI | EPOLLRDBAND,                        /* POLL_PRI */
        EPOLLHUP | EPOLLERR                        /* POLL_HUP */
};

static inline int sigio_perm(struct task_struct *p,
                             struct fown_struct *fown, int sig)
{
        const struct cred *cred;
        int ret;

        rcu_read_lock();
        cred = __task_cred(p);
        ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
                uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
                uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
               !security_file_send_sigiotask(p, fown, sig));
        rcu_read_unlock();
        return ret;
}

static void send_sigio_to_task(struct task_struct *p,
                               struct fown_struct *fown,
                               int fd, int reason, enum pid_type type)
{
        /*
         * F_SETSIG can change ->signum lockless in parallel, make
         * sure we read it once and use the same value throughout.
         */
        int signum = READ_ONCE(fown->signum);

        if (!sigio_perm(p, fown, signum))
                return;

        switch (signum) {
                default: {
                        kernel_siginfo_t si;

                        /* Queue a rt signal with the appropriate fd as its
                           value.  We use SI_SIGIO as the source, not 
                           SI_KERNEL, since kernel signals always get 
                           delivered even if we can't queue.  Failure to
                           queue in this case _should_ be reported; we fall
                           back to SIGIO in that case. --sct */
                        clear_siginfo(&si);
                        si.si_signo = signum;
                        si.si_errno = 0;
                        si.si_code  = reason;
                        /*
                         * Posix definies POLL_IN and friends to be signal
                         * specific si_codes for SIG_POLL.  Linux extended
                         * these si_codes to other signals in a way that is
                         * ambiguous if other signals also have signal
                         * specific si_codes.  In that case use SI_SIGIO instead
                         * to remove the ambiguity.
                         */
                        if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
                                si.si_code = SI_SIGIO;

                        /* Make sure we are called with one of the POLL_*
                           reasons, otherwise we could leak kernel stack into
                           userspace.  */
                        BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
                        if (reason - POLL_IN >= NSIGPOLL)
                                si.si_band  = ~0L;
                        else
                                si.si_band = mangle_poll(band_table[reason - POLL_IN]);
                        si.si_fd    = fd;
                        if (!do_send_sig_info(signum, &si, p, type))
                                break;
                }
                        fallthrough;        /* fall back on the old plain SIGIO signal */
                case 0:
                        do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
        }
}

void send_sigio(struct fown_struct *fown, int fd, int band)
{
        struct task_struct *p;
        enum pid_type type;
        unsigned long flags;
        struct pid *pid;
        
        read_lock_irqsave(&fown->lock, flags);

        type = fown->pid_type;
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;

        if (type <= PIDTYPE_TGID) {
                rcu_read_lock();
                p = pid_task(pid, PIDTYPE_PID);
                if (p)
                        send_sigio_to_task(p, fown, fd, band, type);
                rcu_read_unlock();
        } else {
                read_lock(&tasklist_lock);
                do_each_pid_task(pid, type, p) {
                        send_sigio_to_task(p, fown, fd, band, type);
                } while_each_pid_task(pid, type, p);
                read_unlock(&tasklist_lock);
        }
 out_unlock_fown:
        read_unlock_irqrestore(&fown->lock, flags);
}

static void send_sigurg_to_task(struct task_struct *p,
                                struct fown_struct *fown, enum pid_type type)
{
        if (sigio_perm(p, fown, SIGURG))
                do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
}

int send_sigurg(struct fown_struct *fown)
{
        struct task_struct *p;
        enum pid_type type;
        struct pid *pid;
        unsigned long flags;
        int ret = 0;
        
        read_lock_irqsave(&fown->lock, flags);

        type = fown->pid_type;
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;

        ret = 1;

        if (type <= PIDTYPE_TGID) {
                rcu_read_lock();
                p = pid_task(pid, PIDTYPE_PID);
                if (p)
                        send_sigurg_to_task(p, fown, type);
                rcu_read_unlock();
        } else {
                read_lock(&tasklist_lock);
                do_each_pid_task(pid, type, p) {
                        send_sigurg_to_task(p, fown, type);
                } while_each_pid_task(pid, type, p);
                read_unlock(&tasklist_lock);
        }
 out_unlock_fown:
        read_unlock_irqrestore(&fown->lock, flags);
        return ret;
}

static DEFINE_SPINLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __ro_after_init;

/*
 * Remove a fasync entry. If successfully removed, return
 * positive and clear the FASYNC flag. If no entry exists,
 * do nothing and return 0.
 *
 * NOTE! It is very important that the FASYNC flag always
 * match the state "is the filp on a fasync list".
 *
 */
int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{
        struct fasync_struct *fa, **fp;
        int result = 0;

        spin_lock(&filp->f_lock);
        spin_lock(&fasync_lock);
        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
                if (fa->fa_file != filp)
                        continue;

                write_lock_irq(&fa->fa_lock);
                fa->fa_file = NULL;
                write_unlock_irq(&fa->fa_lock);

                *fp = fa->fa_next;
                kfree_rcu(fa, fa_rcu);
                filp->f_flags &= ~FASYNC;
                result = 1;
                break;
        }
        spin_unlock(&fasync_lock);
        spin_unlock(&filp->f_lock);
        return result;
}

struct fasync_struct *fasync_alloc(void)
{
        return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
}

/*
 * NOTE! This can be used only for unused fasync entries:
 * entries that actually got inserted on the fasync list
 * need to be released by rcu - see fasync_remove_entry.
 */
void fasync_free(struct fasync_struct *new)
{
        kmem_cache_free(fasync_cache, new);
}

/*
 * Insert a new entry into the fasync list.  Return the pointer to the
 * old one if we didn't use the new one.
 *
 * NOTE! It is very important that the FASYNC flag always
 * match the state "is the filp on a fasync list".
 */
struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
{
        struct fasync_struct *fa, **fp;

        spin_lock(&filp->f_lock);
        spin_lock(&fasync_lock);
        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
                if (fa->fa_file != filp)
                        continue;

                write_lock_irq(&fa->fa_lock);
                fa->fa_fd = fd;
                write_unlock_irq(&fa->fa_lock);
                goto out;
        }

        rwlock_init(&new->fa_lock);
        new->magic = FASYNC_MAGIC;
        new->fa_file = filp;
        new->fa_fd = fd;
        new->fa_next = *fapp;
        rcu_assign_pointer(*fapp, new);
        filp->f_flags |= FASYNC;

out:
        spin_unlock(&fasync_lock);
        spin_unlock(&filp->f_lock);
        return fa;
}

/*
 * Add a fasync entry. Return negative on error, positive if
 * added, and zero if did nothing but change an existing one.
 */
static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
{
        struct fasync_struct *new;

        new = fasync_alloc();
        if (!new)
                return -ENOMEM;

        /*
         * fasync_insert_entry() returns the old (update) entry if
         * it existed.
         *
         * So free the (unused) new entry and return 0 to let the
         * caller know that we didn't add any new fasync entries.
         */
        if (fasync_insert_entry(fd, filp, fapp, new)) {
                fasync_free(new);
                return 0;
        }

        return 1;
}

/*
 * fasync_helper() is used by almost all character device drivers
 * to set up the fasync queue, and for regular files by the file
 * lease code. It returns negative on error, 0 if it did no changes
 * and positive if it added/deleted the entry.
 */
int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
{
        if (!on)
                return fasync_remove_entry(filp, fapp);
        return fasync_add_entry(fd, filp, fapp);
}

EXPORT_SYMBOL(fasync_helper);

/*
 * rcu_read_lock() is held
 */
static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
        while (fa) {
                struct fown_struct *fown;
                unsigned long flags;

                if (fa->magic != FASYNC_MAGIC) {
                        printk(KERN_ERR "kill_fasync: bad magic number in "
                               "fasync_struct!\n");
                        return;
                }
                read_lock_irqsave(&fa->fa_lock, flags);
                if (fa->fa_file) {
                        fown = &fa->fa_file->f_owner;
                        /* Don't send SIGURG to processes which have not set a
                           queued signum: SIGURG has its own default signalling
                           mechanism. */
                        if (!(sig == SIGURG && fown->signum == 0))
                                send_sigio(fown, fa->fa_fd, band);
                }
                read_unlock_irqrestore(&fa->fa_lock, flags);
                fa = rcu_dereference(fa->fa_next);
        }
}

void kill_fasync(struct fasync_struct **fp, int sig, int band)
{
        /* First a quick test without locking: usually
         * the list is empty.
         */
        if (*fp) {
                rcu_read_lock();
                kill_fasync_rcu(rcu_dereference(*fp), sig, band);
                rcu_read_unlock();
        }
}
EXPORT_SYMBOL(kill_fasync);

static int __init fcntl_init(void)
{
        /*
         * Please add new bits here to ensure allocation uniqueness.
         * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
         * is defined as O_NONBLOCK on some platforms and not on others.
         */
        BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
                HWEIGHT32(
                        (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
                        __FMODE_EXEC | __FMODE_NONOTIFY));

        fasync_cache = kmem_cache_create("fasync_cache",
                                         sizeof(struct fasync_struct), 0,
                                         SLAB_PANIC | SLAB_ACCOUNT, NULL);
        return 0;
}

module_init(fcntl_init)















































































































































































































































































































































































































































































   17 

  239 


    4 




































































































   15 





















































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
// SPDX-License-Identifier: GPL-2.0
/*
 * <linux/usb/gadget.h>
 *
 * We call the USB code inside a Linux-based peripheral device a "gadget"
 * driver, except for the hardware-specific bus glue.  One USB host can
 * talk to many USB gadgets, but the gadgets are only able to communicate
 * to one host.
 *
 *
 * (C) Copyright 2002-2004 by David Brownell
 * All Rights Reserved.
 */

#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H

#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/usb/ch9.h>

#define UDC_TRACE_STR_MAX        512

struct usb_ep;

/**
 * struct usb_request - describes one i/o request
 * @buf: Buffer used for data.  Always provide this; some controllers
 *        only use PIO, or don't use DMA for some endpoints.
 * @dma: DMA address corresponding to 'buf'.  If you don't set this
 *        field, and the usb controller needs one, it is responsible
 *        for mapping and unmapping the buffer.
 * @sg: a scatterlist for SG-capable controllers.
 * @num_sgs: number of SG entries
 * @num_mapped_sgs: number of SG entries mapped to DMA (internal)
 * @length: Length of that data
 * @stream_id: The stream id, when USB3.0 bulk streams are being used
 * @is_last: Indicates if this is the last request of a stream_id before
 *        switching to a different stream (required for DWC3 controllers).
 * @no_interrupt: If true, hints that no completion irq is needed.
 *        Helpful sometimes with deep request queues that are handled
 *        directly by DMA controllers.
 * @zero: If true, when writing data, makes the last packet be "short"
 *     by adding a zero length packet as needed;
 * @short_not_ok: When reading data, makes short packets be
 *     treated as errors (queue stops advancing till cleanup).
 * @dma_mapped: Indicates if request has been mapped to DMA (internal)
 * @sg_was_mapped: Set if the scatterlist has been mapped before the request
 * @complete: Function called when request completes, so this request and
 *        its buffer may be re-used.  The function will always be called with
 *        interrupts disabled, and it must not sleep.
 *        Reads terminate with a short packet, or when the buffer fills,
 *        whichever comes first.  When writes terminate, some data bytes
 *        will usually still be in flight (often in a hardware fifo).
 *        Errors (for reads or writes) stop the queue from advancing
 *        until the completion function returns, so that any transfers
 *        invalidated by the error may first be dequeued.
 * @context: For use by the completion callback
 * @list: For use by the gadget driver.
 * @frame_number: Reports the interval number in (micro)frame in which the
 *        isochronous transfer was transmitted or received.
 * @status: Reports completion code, zero or a negative errno.
 *        Normally, faults block the transfer queue from advancing until
 *        the completion callback returns.
 *        Code "-ESHUTDOWN" indicates completion caused by device disconnect,
 *        or when the driver disabled the endpoint.
 * @actual: Reports bytes transferred to/from the buffer.  For reads (OUT
 *        transfers) this may be less than the requested length.  If the
 *        short_not_ok flag is set, short reads are treated as errors
 *        even when status otherwise indicates successful completion.
 *        Note that for writes (IN transfers) some data bytes may still
 *        reside in a device-side FIFO when the request is reported as
 *        complete.
 *
 * These are allocated/freed through the endpoint they're used with.  The
 * hardware's driver can add extra per-request data to the memory it returns,
 * which often avoids separate memory allocations (potential failures),
 * later when the request is queued.
 *
 * Request flags affect request handling, such as whether a zero length
 * packet is written (the "zero" flag), whether a short read should be
 * treated as an error (blocking request queue advance, the "short_not_ok"
 * flag), or hinting that an interrupt is not required (the "no_interrupt"
 * flag, for use with deep request queues).
 *
 * Bulk endpoints can use any size buffers, and can also be used for interrupt
 * transfers. interrupt-only endpoints can be much less functional.
 *
 * NOTE:  this is analogous to 'struct urb' on the host side, except that
 * it's thinner and promotes more pre-allocation.
 */

struct usb_request {
        void                        *buf;
        unsigned                length;
        dma_addr_t                dma;

        struct scatterlist        *sg;
        unsigned                num_sgs;
        unsigned                num_mapped_sgs;

        unsigned                stream_id:16;
        unsigned                is_last:1;
        unsigned                no_interrupt:1;
        unsigned                zero:1;
        unsigned                short_not_ok:1;
        unsigned                dma_mapped:1;
        unsigned                sg_was_mapped:1;

        void                        (*complete)(struct usb_ep *ep,
                                        struct usb_request *req);
        void                        *context;
        struct list_head        list;

        unsigned                frame_number;                /* ISO ONLY */

        int                        status;
        unsigned                actual;
};

/*-------------------------------------------------------------------------*/

/* endpoint-specific parts of the api to the usb controller hardware.
 * unlike the urb model, (de)multiplexing layers are not required.
 * (so this api could slash overhead if used on the host side...)
 *
 * note that device side usb controllers commonly differ in how many
 * endpoints they support, as well as their capabilities.
 */
struct usb_ep_ops {
        int (*enable) (struct usb_ep *ep,
                const struct usb_endpoint_descriptor *desc);
        int (*disable) (struct usb_ep *ep);
        void (*dispose) (struct usb_ep *ep);

        struct usb_request *(*alloc_request) (struct usb_ep *ep,
                gfp_t gfp_flags);
        void (*free_request) (struct usb_ep *ep, struct usb_request *req);

        int (*queue) (struct usb_ep *ep, struct usb_request *req,
                gfp_t gfp_flags);
        int (*dequeue) (struct usb_ep *ep, struct usb_request *req);

        int (*set_halt) (struct usb_ep *ep, int value);
        int (*set_wedge) (struct usb_ep *ep);

        int (*fifo_status) (struct usb_ep *ep);
        void (*fifo_flush) (struct usb_ep *ep);
};

/**
 * struct usb_ep_caps - endpoint capabilities description
 * @type_control:Endpoint supports control type (reserved for ep0).
 * @type_iso:Endpoint supports isochronous transfers.
 * @type_bulk:Endpoint supports bulk transfers.
 * @type_int:Endpoint supports interrupt transfers.
 * @dir_in:Endpoint supports IN direction.
 * @dir_out:Endpoint supports OUT direction.
 */
struct usb_ep_caps {
        unsigned type_control:1;
        unsigned type_iso:1;
        unsigned type_bulk:1;
        unsigned type_int:1;
        unsigned dir_in:1;
        unsigned dir_out:1;
};

#define USB_EP_CAPS_TYPE_CONTROL     0x01
#define USB_EP_CAPS_TYPE_ISO         0x02
#define USB_EP_CAPS_TYPE_BULK        0x04
#define USB_EP_CAPS_TYPE_INT         0x08
#define USB_EP_CAPS_TYPE_ALL \
        (USB_EP_CAPS_TYPE_ISO | USB_EP_CAPS_TYPE_BULK | USB_EP_CAPS_TYPE_INT)
#define USB_EP_CAPS_DIR_IN           0x01
#define USB_EP_CAPS_DIR_OUT          0x02
#define USB_EP_CAPS_DIR_ALL  (USB_EP_CAPS_DIR_IN | USB_EP_CAPS_DIR_OUT)

#define USB_EP_CAPS(_type, _dir) \
        { \
                .type_control = !!(_type & USB_EP_CAPS_TYPE_CONTROL), \
                .type_iso = !!(_type & USB_EP_CAPS_TYPE_ISO), \
                .type_bulk = !!(_type & USB_EP_CAPS_TYPE_BULK), \
                .type_int = !!(_type & USB_EP_CAPS_TYPE_INT), \
                .dir_in = !!(_dir & USB_EP_CAPS_DIR_IN), \
                .dir_out = !!(_dir & USB_EP_CAPS_DIR_OUT), \
        }

/**
 * struct usb_ep - device side representation of USB endpoint
 * @name:identifier for the endpoint, such as "ep-a" or "ep9in-bulk"
 * @ops: Function pointers used to access hardware-specific operations.
 * @ep_list:the gadget's ep_list holds all of its endpoints
 * @caps:The structure describing types and directions supported by endpoint.
 * @enabled: The current endpoint enabled/disabled state.
 * @claimed: True if this endpoint is claimed by a function.
 * @maxpacket:The maximum packet size used on this endpoint.  The initial
 *        value can sometimes be reduced (hardware allowing), according to
 *        the endpoint descriptor used to configure the endpoint.
 * @maxpacket_limit:The maximum packet size value which can be handled by this
 *        endpoint. It's set once by UDC driver when endpoint is initialized, and
 *        should not be changed. Should not be confused with maxpacket.
 * @max_streams: The maximum number of streams supported
 *        by this EP (0 - 16, actual number is 2^n)
 * @mult: multiplier, 'mult' value for SS Isoc EPs
 * @maxburst: the maximum number of bursts supported by this EP (for usb3)
 * @driver_data:for use by the gadget driver.
 * @address: used to identify the endpoint when finding descriptor that
 *        matches connection speed
 * @desc: endpoint descriptor.  This pointer is set before the endpoint is
 *        enabled and remains valid until the endpoint is disabled.
 * @comp_desc: In case of SuperSpeed support, this is the endpoint companion
 *        descriptor that is used to configure the endpoint
 *
 * the bus controller driver lists all the general purpose endpoints in
 * gadget->ep_list.  the control endpoint (gadget->ep0) is not in that list,
 * and is accessed only in response to a driver setup() callback.
 */

struct usb_ep {
        void                        *driver_data;

        const char                *name;
        const struct usb_ep_ops        *ops;
        struct list_head        ep_list;
        struct usb_ep_caps        caps;
        bool                        claimed;
        bool                        enabled;
        unsigned                maxpacket:16;
        unsigned                maxpacket_limit:16;
        unsigned                max_streams:16;
        unsigned                mult:2;
        unsigned                maxburst:5;
        u8                        address;
        const struct usb_endpoint_descriptor        *desc;
        const struct usb_ss_ep_comp_descriptor        *comp_desc;
};

/*-------------------------------------------------------------------------*/

#if IS_ENABLED(CONFIG_USB_GADGET)
void usb_ep_set_maxpacket_limit(struct usb_ep *ep, unsigned maxpacket_limit);
int usb_ep_enable(struct usb_ep *ep);
int usb_ep_disable(struct usb_ep *ep);
struct usb_request *usb_ep_alloc_request(struct usb_ep *ep, gfp_t gfp_flags);
void usb_ep_free_request(struct usb_ep *ep, struct usb_request *req);
int usb_ep_queue(struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags);
int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req);
int usb_ep_set_halt(struct usb_ep *ep);
int usb_ep_clear_halt(struct usb_ep *ep);
int usb_ep_set_wedge(struct usb_ep *ep);
int usb_ep_fifo_status(struct usb_ep *ep);
void usb_ep_fifo_flush(struct usb_ep *ep);
#else
static inline void usb_ep_set_maxpacket_limit(struct usb_ep *ep,
                unsigned maxpacket_limit)
{ }
static inline int usb_ep_enable(struct usb_ep *ep)
{ return 0; }
static inline int usb_ep_disable(struct usb_ep *ep)
{ return 0; }
static inline struct usb_request *usb_ep_alloc_request(struct usb_ep *ep,
                gfp_t gfp_flags)
{ return NULL; }
static inline void usb_ep_free_request(struct usb_ep *ep,
                struct usb_request *req)
{ }
static inline int usb_ep_queue(struct usb_ep *ep, struct usb_request *req,
                gfp_t gfp_flags)
{ return 0; }
static inline int usb_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
{ return 0; }
static inline int usb_ep_set_halt(struct usb_ep *ep)
{ return 0; }
static inline int usb_ep_clear_halt(struct usb_ep *ep)
{ return 0; }
static inline int usb_ep_set_wedge(struct usb_ep *ep)
{ return 0; }
static inline int usb_ep_fifo_status(struct usb_ep *ep)
{ return 0; }
static inline void usb_ep_fifo_flush(struct usb_ep *ep)
{ }
#endif /* USB_GADGET */

/*-------------------------------------------------------------------------*/

struct usb_dcd_config_params {
        __u8  bU1devExitLat;        /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT        0x01        /* Less then 1 microsec */
        __le16 bU2DevExitLat;        /* U2 Device exit Latency */
#define USB_DEFAULT_U2_DEV_EXIT_LAT        0x1F4        /* Less then 500 microsec */
        __u8 besl_baseline;        /* Recommended baseline BESL (0-15) */
        __u8 besl_deep;                /* Recommended deep BESL (0-15) */
#define USB_DEFAULT_BESL_UNSPECIFIED        0xFF        /* No recommended value */
};


struct usb_gadget;
struct usb_gadget_driver;
struct usb_udc;

/* the rest of the api to the controller hardware: device operations,
 * which don't involve endpoints (or i/o).
 */
struct usb_gadget_ops {
        int        (*get_frame)(struct usb_gadget *);
        int        (*wakeup)(struct usb_gadget *);
        int        (*func_wakeup)(struct usb_gadget *gadget, int intf_id);
        int        (*set_remote_wakeup)(struct usb_gadget *, int set);
        int        (*set_selfpowered) (struct usb_gadget *, int is_selfpowered);
        int        (*vbus_session) (struct usb_gadget *, int is_active);
        int        (*vbus_draw) (struct usb_gadget *, unsigned mA);
        int        (*pullup) (struct usb_gadget *, int is_on);
        int        (*ioctl)(struct usb_gadget *,
                                unsigned code, unsigned long param);
        void        (*get_config_params)(struct usb_gadget *,
                                     struct usb_dcd_config_params *);
        int        (*udc_start)(struct usb_gadget *,
                        struct usb_gadget_driver *);
        int        (*udc_stop)(struct usb_gadget *);
        void        (*udc_set_speed)(struct usb_gadget *, enum usb_device_speed);
        void        (*udc_set_ssp_rate)(struct usb_gadget *gadget,
                        enum usb_ssp_rate rate);
        void        (*udc_async_callbacks)(struct usb_gadget *gadget, bool enable);
        struct usb_ep *(*match_ep)(struct usb_gadget *,
                        struct usb_endpoint_descriptor *,
                        struct usb_ss_ep_comp_descriptor *);
        int        (*check_config)(struct usb_gadget *gadget);
};

/**
 * struct usb_gadget - represents a usb device
 * @work: (internal use) Workqueue to be used for sysfs_notify()
 * @udc: struct usb_udc pointer for this gadget
 * @ops: Function pointers used to access hardware-specific operations.
 * @ep0: Endpoint zero, used when reading or writing responses to
 *        driver setup() requests
 * @ep_list: List of other endpoints supported by the device.
 * @speed: Speed of current connection to USB host.
 * @max_speed: Maximal speed the UDC can handle.  UDC must support this
 *      and all slower speeds.
 * @ssp_rate: Current connected SuperSpeed Plus signaling rate and lane count.
 * @max_ssp_rate: Maximum SuperSpeed Plus signaling rate and lane count the UDC
 *        can handle. The UDC must support this and all slower speeds and lower
 *        number of lanes.
 * @state: the state we are now (attached, suspended, configured, etc)
 * @name: Identifies the controller hardware type.  Used in diagnostics
 *        and sometimes configuration.
 * @dev: Driver model state for this abstract device.
 * @isoch_delay: value from Set Isoch Delay request. Only valid on SS/SSP
 * @out_epnum: last used out ep number
 * @in_epnum: last used in ep number
 * @mA: last set mA value
 * @otg_caps: OTG capabilities of this gadget.
 * @sg_supported: true if we can handle scatter-gather
 * @is_otg: True if the USB device port uses a Mini-AB jack, so that the
 *        gadget driver must provide a USB OTG descriptor.
 * @is_a_peripheral: False unless is_otg, the "A" end of a USB cable
 *        is in the Mini-AB jack, and HNP has been used to switch roles
 *        so that the "A" device currently acts as A-Peripheral, not A-Host.
 * @a_hnp_support: OTG device feature flag, indicating that the A-Host
 *        supports HNP at this port.
 * @a_alt_hnp_support: OTG device feature flag, indicating that the A-Host
 *        only supports HNP on a different root port.
 * @b_hnp_enable: OTG device feature flag, indicating that the A-Host
 *        enabled HNP support.
 * @hnp_polling_support: OTG device feature flag, indicating if the OTG device
 *        in peripheral mode can support HNP polling.
 * @host_request_flag: OTG device feature flag, indicating if A-Peripheral
 *        or B-Peripheral wants to take host role.
 * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to
 *        MaxPacketSize.
 * @quirk_altset_not_supp: UDC controller doesn't support alt settings.
 * @quirk_stall_not_supp: UDC controller doesn't support stalling.
 * @quirk_zlp_not_supp: UDC controller doesn't support ZLP.
 * @quirk_avoids_skb_reserve: udc/platform wants to avoid skb_reserve() in
 *        u_ether.c to improve performance.
 * @is_selfpowered: if the gadget is self-powered.
 * @deactivated: True if gadget is deactivated - in deactivated state it cannot
 *        be connected.
 * @connected: True if gadget is connected.
 * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag
 *        indicates that it supports LPM as per the LPM ECN & errata.
 * @wakeup_capable: True if gadget is capable of sending remote wakeup.
 * @wakeup_armed: True if gadget is armed by the host for remote wakeup.
 * @irq: the interrupt number for device controller.
 * @id_number: a unique ID number for ensuring that gadget names are distinct
 *
 * Gadgets have a mostly-portable "gadget driver" implementing device
 * functions, handling all usb configurations and interfaces.  Gadget
 * drivers talk to hardware-specific code indirectly, through ops vectors.
 * That insulates the gadget driver from hardware details, and packages
 * the hardware endpoints through generic i/o queues.  The "usb_gadget"
 * and "usb_ep" interfaces provide that insulation from the hardware.
 *
 * Except for the driver data, all fields in this structure are
 * read-only to the gadget driver.  That driver data is part of the
 * "driver model" infrastructure in 2.6 (and later) kernels, and for
 * earlier systems is grouped in a similar structure that's not known
 * to the rest of the kernel.
 *
 * Values of the three OTG device feature flags are updated before the
 * setup() call corresponding to USB_REQ_SET_CONFIGURATION, and before
 * driver suspend() calls.  They are valid only when is_otg, and when the
 * device is acting as a B-Peripheral (so is_a_peripheral is false).
 */
struct usb_gadget {
        struct work_struct                work;
        struct usb_udc                        *udc;
        /* readonly to gadget driver */
        const struct usb_gadget_ops        *ops;
        struct usb_ep                        *ep0;
        struct list_head                ep_list;        /* of usb_ep */
        enum usb_device_speed                speed;
        enum usb_device_speed                max_speed;

        /* USB SuperSpeed Plus only */
        enum usb_ssp_rate                ssp_rate;
        enum usb_ssp_rate                max_ssp_rate;

        enum usb_device_state                state;
        const char                        *name;
        struct device                        dev;
        unsigned                        isoch_delay;
        unsigned                        out_epnum;
        unsigned                        in_epnum;
        unsigned                        mA;
        struct usb_otg_caps                *otg_caps;

        unsigned                        sg_supported:1;
        unsigned                        is_otg:1;
        unsigned                        is_a_peripheral:1;
        unsigned                        b_hnp_enable:1;
        unsigned                        a_hnp_support:1;
        unsigned                        a_alt_hnp_support:1;
        unsigned                        hnp_polling_support:1;
        unsigned                        host_request_flag:1;
        unsigned                        quirk_ep_out_aligned_size:1;
        unsigned                        quirk_altset_not_supp:1;
        unsigned                        quirk_stall_not_supp:1;
        unsigned                        quirk_zlp_not_supp:1;
        unsigned                        quirk_avoids_skb_reserve:1;
        unsigned                        is_selfpowered:1;
        unsigned                        deactivated:1;
        unsigned                        connected:1;
        unsigned                        lpm_capable:1;
        unsigned                        wakeup_capable:1;
        unsigned                        wakeup_armed:1;
        int                                irq;
        int                                id_number;
};
#define work_to_gadget(w)        (container_of((w), struct usb_gadget, work))

/* Interface to the device model */
static inline void set_gadget_data(struct usb_gadget *gadget, void *data)
        { dev_set_drvdata(&gadget->dev, data); }
static inline void *get_gadget_data(struct usb_gadget *gadget)
        { return dev_get_drvdata(&gadget->dev); }
static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev)
{
        return container_of(dev, struct usb_gadget, dev);
}
static inline struct usb_gadget *usb_get_gadget(struct usb_gadget *gadget)
{
        get_device(&gadget->dev);
        return gadget;
}
static inline void usb_put_gadget(struct usb_gadget *gadget)
{
        put_device(&gadget->dev);
}
extern void usb_initialize_gadget(struct device *parent,
                struct usb_gadget *gadget, void (*release)(struct device *dev));
extern int usb_add_gadget(struct usb_gadget *gadget);
extern void usb_del_gadget(struct usb_gadget *gadget);

/* Legacy device-model interface */
extern int usb_add_gadget_udc_release(struct device *parent,
                struct usb_gadget *gadget, void (*release)(struct device *dev));
extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget);
extern void usb_del_gadget_udc(struct usb_gadget *gadget);
extern char *usb_get_gadget_udc_name(void);

/* iterates the non-control endpoints; 'tmp' is a struct usb_ep pointer */
#define gadget_for_each_ep(tmp, gadget) \
        list_for_each_entry(tmp, &(gadget)->ep_list, ep_list)

/**
 * usb_ep_align - returns @len aligned to ep's maxpacketsize.
 * @ep: the endpoint whose maxpacketsize is used to align @len
 * @len: buffer size's length to align to @ep's maxpacketsize
 *
 * This helper is used to align buffer's size to an ep's maxpacketsize.
 */
static inline size_t usb_ep_align(struct usb_ep *ep, size_t len)
{
        int max_packet_size = (size_t)usb_endpoint_maxp(ep->desc);

        return round_up(len, max_packet_size);
}

/**
 * usb_ep_align_maybe - returns @len aligned to ep's maxpacketsize if gadget
 *        requires quirk_ep_out_aligned_size, otherwise returns len.
 * @g: controller to check for quirk
 * @ep: the endpoint whose maxpacketsize is used to align @len
 * @len: buffer size's length to align to @ep's maxpacketsize
 *
 * This helper is used in case it's required for any reason to check and maybe
 * align buffer's size to an ep's maxpacketsize.
 */
static inline size_t
usb_ep_align_maybe(struct usb_gadget *g, struct usb_ep *ep, size_t len)
{
        return g->quirk_ep_out_aligned_size ? usb_ep_align(ep, len) : len;
}

/**
 * gadget_is_altset_supported - return true iff the hardware supports
 *        altsettings
 * @g: controller to check for quirk
 */
static inline int gadget_is_altset_supported(struct usb_gadget *g)
{
        return !g->quirk_altset_not_supp;
}

/**
 * gadget_is_stall_supported - return true iff the hardware supports stalling
 * @g: controller to check for quirk
 */
static inline int gadget_is_stall_supported(struct usb_gadget *g)
{
        return !g->quirk_stall_not_supp;
}

/**
 * gadget_is_zlp_supported - return true iff the hardware supports zlp
 * @g: controller to check for quirk
 */
static inline int gadget_is_zlp_supported(struct usb_gadget *g)
{
        return !g->quirk_zlp_not_supp;
}

/**
 * gadget_avoids_skb_reserve - return true iff the hardware would like to avoid
 *        skb_reserve to improve performance.
 * @g: controller to check for quirk
 */
static inline int gadget_avoids_skb_reserve(struct usb_gadget *g)
{
        return g->quirk_avoids_skb_reserve;
}

/**
 * gadget_is_dualspeed - return true iff the hardware handles high speed
 * @g: controller that might support both high and full speeds
 */
static inline int gadget_is_dualspeed(struct usb_gadget *g)
{
        return g->max_speed >= USB_SPEED_HIGH;
}

/**
 * gadget_is_superspeed() - return true if the hardware handles superspeed
 * @g: controller that might support superspeed
 */
static inline int gadget_is_superspeed(struct usb_gadget *g)
{
        return g->max_speed >= USB_SPEED_SUPER;
}

/**
 * gadget_is_superspeed_plus() - return true if the hardware handles
 *        superspeed plus
 * @g: controller that might support superspeed plus
 */
static inline int gadget_is_superspeed_plus(struct usb_gadget *g)
{
        return g->max_speed >= USB_SPEED_SUPER_PLUS;
}

/**
 * gadget_is_otg - return true iff the hardware is OTG-ready
 * @g: controller that might have a Mini-AB connector
 *
 * This is a runtime test, since kernels with a USB-OTG stack sometimes
 * run on boards which only have a Mini-B (or Mini-A) connector.
 */
static inline int gadget_is_otg(struct usb_gadget *g)
{
#ifdef CONFIG_USB_OTG
        return g->is_otg;
#else
        return 0;
#endif
}

/*-------------------------------------------------------------------------*/

#if IS_ENABLED(CONFIG_USB_GADGET)
int usb_gadget_frame_number(struct usb_gadget *gadget);
int usb_gadget_wakeup(struct usb_gadget *gadget);
int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set);
int usb_gadget_set_selfpowered(struct usb_gadget *gadget);
int usb_gadget_clear_selfpowered(struct usb_gadget *gadget);
int usb_gadget_vbus_connect(struct usb_gadget *gadget);
int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA);
int usb_gadget_vbus_disconnect(struct usb_gadget *gadget);
int usb_gadget_connect(struct usb_gadget *gadget);
int usb_gadget_disconnect(struct usb_gadget *gadget);
int usb_gadget_deactivate(struct usb_gadget *gadget);
int usb_gadget_activate(struct usb_gadget *gadget);
int usb_gadget_check_config(struct usb_gadget *gadget);
#else
static inline int usb_gadget_frame_number(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_wakeup(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set)
{ return 0; }
static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_vbus_connect(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
{ return 0; }
static inline int usb_gadget_vbus_disconnect(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_connect(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_deactivate(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_activate(struct usb_gadget *gadget)
{ return 0; }
static inline int usb_gadget_check_config(struct usb_gadget *gadget)
{ return 0; }
#endif /* CONFIG_USB_GADGET */

/*-------------------------------------------------------------------------*/

/**
 * struct usb_gadget_driver - driver for usb gadget devices
 * @function: String describing the gadget's function
 * @max_speed: Highest speed the driver handles.
 * @setup: Invoked for ep0 control requests that aren't handled by
 *        the hardware level driver. Most calls must be handled by
 *        the gadget driver, including descriptor and configuration
 *        management.  The 16 bit members of the setup data are in
 *        USB byte order. Called in_interrupt; this may not sleep.  Driver
 *        queues a response to ep0, or returns negative to stall.
 * @disconnect: Invoked after all transfers have been stopped,
 *        when the host is disconnected.  May be called in_interrupt; this
 *        may not sleep.  Some devices can't detect disconnect, so this might
 *        not be called except as part of controller shutdown.
 * @bind: the driver's bind callback
 * @unbind: Invoked when the driver is unbound from a gadget,
 *        usually from rmmod (after a disconnect is reported).
 *        Called in a context that permits sleeping.
 * @suspend: Invoked on USB suspend.  May be called in_interrupt.
 * @resume: Invoked on USB resume.  May be called in_interrupt.
 * @reset: Invoked on USB bus reset. It is mandatory for all gadget drivers
 *        and should be called in_interrupt.
 * @driver: Driver model state for this driver.
 * @udc_name: A name of UDC this driver should be bound to. If udc_name is NULL,
 *        this driver will be bound to any available UDC.
 * @match_existing_only: If udc is not found, return an error and fail
 *        the driver registration
 * @is_bound: Allow a driver to be bound to only one gadget
 *
 * Devices are disabled till a gadget driver successfully bind()s, which
 * means the driver will handle setup() requests needed to enumerate (and
 * meet "chapter 9" requirements) then do some useful work.
 *
 * If gadget->is_otg is true, the gadget driver must provide an OTG
 * descriptor during enumeration, or else fail the bind() call.  In such
 * cases, no USB traffic may flow until both bind() returns without
 * having called usb_gadget_disconnect(), and the USB host stack has
 * initialized.
 *
 * Drivers use hardware-specific knowledge to configure the usb hardware.
 * endpoint addressing is only one of several hardware characteristics that
 * are in descriptors the ep0 implementation returns from setup() calls.
 *
 * Except for ep0 implementation, most driver code shouldn't need change to
 * run on top of different usb controllers.  It'll use endpoints set up by
 * that ep0 implementation.
 *
 * The usb controller driver handles a few standard usb requests.  Those
 * include set_address, and feature flags for devices, interfaces, and
 * endpoints (the get_status, set_feature, and clear_feature requests).
 *
 * Accordingly, the driver's setup() callback must always implement all
 * get_descriptor requests, returning at least a device descriptor and
 * a configuration descriptor.  Drivers must make sure the endpoint
 * descriptors match any hardware constraints. Some hardware also constrains
 * other descriptors. (The pxa250 allows only configurations 1, 2, or 3).
 *
 * The driver's setup() callback must also implement set_configuration,
 * and should also implement set_interface, get_configuration, and
 * get_interface.  Setting a configuration (or interface) is where
 * endpoints should be activated or (config 0) shut down.
 *
 * The gadget driver's setup() callback does not have to queue a response to
 * ep0 within the setup() call, the driver can do it after setup() returns.
 * The UDC driver must wait until such a response is queued before proceeding
 * with the data/status stages of the control transfer.
 *
 * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS
 * being returned from the setup() callback, which is a bug. See the comment
 * next to USB_GADGET_DELAYED_STATUS for details.
 *
 * (Note that only the default control endpoint is supported.  Neither
 * hosts nor devices generally support control traffic except to ep0.)
 *
 * Most devices will ignore USB suspend/resume operations, and so will
 * not provide those callbacks.  However, some may need to change modes
 * when the host is not longer directing those activities.  For example,
 * local controls (buttons, dials, etc) may need to be re-enabled since
 * the (remote) host can't do that any longer; or an error state might
 * be cleared, to make the device behave identically whether or not
 * power is maintained.
 */
struct usb_gadget_driver {
        char                        *function;
        enum usb_device_speed        max_speed;
        int                        (*bind)(struct usb_gadget *gadget,
                                        struct usb_gadget_driver *driver);
        void                        (*unbind)(struct usb_gadget *);
        int                        (*setup)(struct usb_gadget *,
                                        const struct usb_ctrlrequest *);
        void                        (*disconnect)(struct usb_gadget *);
        void                        (*suspend)(struct usb_gadget *);
        void                        (*resume)(struct usb_gadget *);
        void                        (*reset)(struct usb_gadget *);

        /* FIXME support safe rmmod */
        struct device_driver        driver;

        char                        *udc_name;
        unsigned                match_existing_only:1;
        bool                        is_bound:1;
};



/*-------------------------------------------------------------------------*/

/* driver modules register and unregister, as usual.
 * these calls must be made in a context that can sleep.
 *
 * A gadget driver can be bound to only one gadget at a time.
 */

/**
 * usb_gadget_register_driver_owner - register a gadget driver
 * @driver: the driver being registered
 * @owner: the driver module
 * @mod_name: the driver module's build name
 * Context: can sleep
 *
 * Call this in your gadget driver's module initialization function,
 * to tell the underlying UDC controller driver about your driver.
 * The @bind() function will be called to bind it to a gadget before this
 * registration call returns.  It's expected that the @bind() function will
 * be in init sections.
 *
 * Use the macro defined below instead of calling this directly.
 */
int usb_gadget_register_driver_owner(struct usb_gadget_driver *driver,
                struct module *owner, const char *mod_name);

/* use a define to avoid include chaining to get THIS_MODULE & friends */
#define usb_gadget_register_driver(driver) \
        usb_gadget_register_driver_owner(driver, THIS_MODULE, KBUILD_MODNAME)

/**
 * usb_gadget_unregister_driver - unregister a gadget driver
 * @driver:the driver being unregistered
 * Context: can sleep
 *
 * Call this in your gadget driver's module cleanup function,
 * to tell the underlying usb controller that your driver is
 * going away.  If the controller is connected to a USB host,
 * it will first disconnect().  The driver is also requested
 * to unbind() and clean up any device state, before this procedure
 * finally returns.  It's expected that the unbind() functions
 * will be in exit sections, so may not be linked in some kernels.
 */
int usb_gadget_unregister_driver(struct usb_gadget_driver *driver);

/*-------------------------------------------------------------------------*/

/* utility to simplify dealing with string descriptors */

/**
 * struct usb_string - wraps a C string and its USB id
 * @id:the (nonzero) ID for this string
 * @s:the string, in UTF-8 encoding
 *
 * If you're using usb_gadget_get_string(), use this to wrap a string
 * together with its ID.
 */
struct usb_string {
        u8                        id;
        const char                *s;
};

/**
 * struct usb_gadget_strings - a set of USB strings in a given language
 * @language:identifies the strings' language (0x0409 for en-us)
 * @strings:array of strings with their ids
 *
 * If you're using usb_gadget_get_string(), use this to wrap all the
 * strings for a given language.
 */
struct usb_gadget_strings {
        u16                        language;        /* 0x0409 for en-us */
        struct usb_string        *strings;
};

struct usb_gadget_string_container {
        struct list_head        list;
        u8                      *stash[];
};

/* put descriptor for string with that id into buf (buflen >= 256) */
int usb_gadget_get_string(const struct usb_gadget_strings *table, int id, u8 *buf);

/* check if the given language identifier is valid */
bool usb_validate_langid(u16 langid);

struct gadget_string {
        struct config_item item;
        struct list_head list;
        char string[USB_MAX_STRING_LEN];
        struct usb_string usb_string;
};

#define to_gadget_string(str_item)\
container_of(str_item, struct gadget_string, item)

/*-------------------------------------------------------------------------*/

/* utility to simplify managing config descriptors */

/* write vector of descriptors into buffer */
int usb_descriptor_fillbuf(void *, unsigned,
                const struct usb_descriptor_header **);

/* build config descriptor from single descriptor vector */
int usb_gadget_config_buf(const struct usb_config_descriptor *config,
        void *buf, unsigned buflen, const struct usb_descriptor_header **desc);

/* copy a NULL-terminated vector of descriptors */
struct usb_descriptor_header **usb_copy_descriptors(
                struct usb_descriptor_header **);

/**
 * usb_free_descriptors - free descriptors returned by usb_copy_descriptors()
 * @v: vector of descriptors
 */
static inline void usb_free_descriptors(struct usb_descriptor_header **v)
{
        kfree(v);
}

struct usb_function;
int usb_assign_descriptors(struct usb_function *f,
                struct usb_descriptor_header **fs,
                struct usb_descriptor_header **hs,
                struct usb_descriptor_header **ss,
                struct usb_descriptor_header **ssp);
void usb_free_all_descriptors(struct usb_function *f);

struct usb_descriptor_header *usb_otg_descriptor_alloc(
                                struct usb_gadget *gadget);
int usb_otg_descriptor_init(struct usb_gadget *gadget,
                struct usb_descriptor_header *otg_desc);
/*-------------------------------------------------------------------------*/

/* utility to simplify map/unmap of usb_requests to/from DMA */

#ifdef        CONFIG_HAS_DMA
extern int usb_gadget_map_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in);
extern int usb_gadget_map_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in);

extern void usb_gadget_unmap_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in);
extern void usb_gadget_unmap_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in);
#else /* !CONFIG_HAS_DMA */
static inline int usb_gadget_map_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in) { return -ENOSYS; }
static inline int usb_gadget_map_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in) { return -ENOSYS; }

static inline void usb_gadget_unmap_request_by_dev(struct device *dev,
                struct usb_request *req, int is_in) { }
static inline void usb_gadget_unmap_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in) { }
#endif /* !CONFIG_HAS_DMA */

/*-------------------------------------------------------------------------*/

/* utility to set gadget state properly */

extern void usb_gadget_set_state(struct usb_gadget *gadget,
                enum usb_device_state state);

/*-------------------------------------------------------------------------*/

/* utility to tell udc core that the bus reset occurs */
extern void usb_gadget_udc_reset(struct usb_gadget *gadget,
                struct usb_gadget_driver *driver);

/*-------------------------------------------------------------------------*/

/* utility to give requests back to the gadget layer */

extern void usb_gadget_giveback_request(struct usb_ep *ep,
                struct usb_request *req);

/*-------------------------------------------------------------------------*/

/* utility to find endpoint by name */

extern struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g,
                const char *name);

/*-------------------------------------------------------------------------*/

/* utility to check if endpoint caps match descriptor needs */

extern int usb_gadget_ep_match_desc(struct usb_gadget *gadget,
                struct usb_ep *ep, struct usb_endpoint_descriptor *desc,
                struct usb_ss_ep_comp_descriptor *ep_comp);

/*-------------------------------------------------------------------------*/

/* utility to update vbus status for udc core, it may be scheduled */
extern void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status);

/*-------------------------------------------------------------------------*/

/* utility wrapping a simple endpoint selection policy */

extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
                        struct usb_endpoint_descriptor *);


extern struct usb_ep *usb_ep_autoconfig_ss(struct usb_gadget *,
                        struct usb_endpoint_descriptor *,
                        struct usb_ss_ep_comp_descriptor *);

extern void usb_ep_autoconfig_release(struct usb_ep *);

extern void usb_ep_autoconfig_reset(struct usb_gadget *);

#endif /* __LINUX_USB_GADGET_H */

































    5 
    5 


    5 




















    5 





    5 

    5 


    5 
    5 
    5 




    5 


















































































































































































































































































































































    5 


    5 


    5 

    5 



































































































































































































































































































































































































































































































































































































































    5 































































































































    5 











    5 










































































































































































































    1 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
// SPDX-License-Identifier: GPL-2.0-only
/*
  File: fs/xattr.c

  Extended attribute handling.

  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
  Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
  Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
 */
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/fsnotify.h>
#include <linux/audit.h>
#include <linux/vmalloc.h>
#include <linux/posix_acl_xattr.h>

#include <linux/uaccess.h>

#include "internal.h"

static const char *
strcmp_prefix(const char *a, const char *a_prefix)
{
        while (*a_prefix && *a == *a_prefix) {
                a++;
                a_prefix++;
        }
        return *a_prefix ? NULL : a;
}

/*
 * In order to implement different sets of xattr operations for each xattr
 * prefix, a filesystem should create a null-terminated array of struct
 * xattr_handler (one for each prefix) and hang a pointer to it off of the
 * s_xattr field of the superblock.
 */
#define for_each_xattr_handler(handlers, handler)                \
        if (handlers)                                                \
                for ((handler) = *(handlers)++;                        \
                        (handler) != NULL;                        \
                        (handler) = *(handlers)++)

/*
 * Find the xattr_handler with the matching prefix.
 */
static const struct xattr_handler *
xattr_resolve_name(struct inode *inode, const char **name)
{
        const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
        const struct xattr_handler *handler;

        if (!(inode->i_opflags & IOP_XATTR)) {
                if (unlikely(is_bad_inode(inode)))
                        return ERR_PTR(-EIO);
                return ERR_PTR(-EOPNOTSUPP);
        }
        for_each_xattr_handler(handlers, handler) {
                const char *n;

                n = strcmp_prefix(*name, xattr_prefix(handler));
                if (n) {
                        if (!handler->prefix ^ !*n) {
                                if (*n)
                                        continue;
                                return ERR_PTR(-EINVAL);
                        }
                        *name = n;
                        return handler;
                }
        }
        return ERR_PTR(-EOPNOTSUPP);
}

/**
 * may_write_xattr - check whether inode allows writing xattr
 * @idmap: idmap of the mount the inode was found from
 * @inode: the inode on which to set an xattr
 *
 * Check whether the inode allows writing xattrs. Specifically, we can never
 * set or remove an extended attribute on a read-only filesystem  or on an
 * immutable / append-only inode.
 *
 * We also need to ensure that the inode has a mapping in the mount to
 * not risk writing back invalid i_{g,u}id values.
 *
 * Return: On success zero is returned. On error a negative errno is returned.
 */
int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
{
        if (IS_IMMUTABLE(inode))
                return -EPERM;
        if (IS_APPEND(inode))
                return -EPERM;
        if (HAS_UNMAPPED_ID(idmap, inode))
                return -EPERM;
        return 0;
}

/*
 * Check permissions for extended attribute access.  This is a bit complicated
 * because different namespaces have very different rules.
 */
static int
xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
                 const char *name, int mask)
{
        if (mask & MAY_WRITE) {
                int ret;

                ret = may_write_xattr(idmap, inode);
                if (ret)
                        return ret;
        }

        /*
         * No restriction for security.* and system.* from the VFS.  Decision
         * on these is left to the underlying filesystem / security module.
         */
        if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
            !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                return 0;

        /*
         * The trusted.* namespace can only be accessed by privileged users.
         */
        if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
                if (!capable(CAP_SYS_ADMIN))
                        return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
                return 0;
        }

        /*
         * In the user.* namespace, only regular files and directories can have
         * extended attributes. For sticky directories, only the owner and
         * privileged users can write attributes.
         */
        if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
                if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
                        return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
                if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
                    (mask & MAY_WRITE) &&
                    !inode_owner_or_capable(idmap, inode))
                        return -EPERM;
        }

        return inode_permission(idmap, inode, mask);
}

/*
 * Look for any handler that deals with the specified namespace.
 */
int
xattr_supports_user_prefix(struct inode *inode)
{
        const struct xattr_handler * const *handlers = inode->i_sb->s_xattr;
        const struct xattr_handler *handler;

        if (!(inode->i_opflags & IOP_XATTR)) {
                if (unlikely(is_bad_inode(inode)))
                        return -EIO;
                return -EOPNOTSUPP;
        }

        for_each_xattr_handler(handlers, handler) {
                if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
                             XATTR_USER_PREFIX_LEN))
                        return 0;
        }

        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(xattr_supports_user_prefix);

int
__vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
               struct inode *inode, const char *name, const void *value,
               size_t size, int flags)
{
        const struct xattr_handler *handler;

        if (is_posix_acl_xattr(name))
                return -EOPNOTSUPP;

        handler = xattr_resolve_name(inode, &name);
        if (IS_ERR(handler))
                return PTR_ERR(handler);
        if (!handler->set)
                return -EOPNOTSUPP;
        if (size == 0)
                value = "";  /* empty EA, do not remove */
        return handler->set(handler, idmap, dentry, inode, name, value,
                            size, flags);
}
EXPORT_SYMBOL(__vfs_setxattr);

/**
 *  __vfs_setxattr_noperm - perform setxattr operation without performing
 *  permission checks.
 *
 *  @idmap: idmap of the mount the inode was found from
 *  @dentry: object to perform setxattr on
 *  @name: xattr name to set
 *  @value: value to set @name to
 *  @size: size of @value
 *  @flags: flags to pass into filesystem operations
 *
 *  returns the result of the internal setxattr or setsecurity operations.
 *
 *  This function requires the caller to lock the inode's i_mutex before it
 *  is executed. It also assumes that the caller will make the appropriate
 *  permission checks.
 */
int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
                          struct dentry *dentry, const char *name,
                          const void *value, size_t size, int flags)
{
        struct inode *inode = dentry->d_inode;
        int error = -EAGAIN;
        int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
                                   XATTR_SECURITY_PREFIX_LEN);

        if (issec)
                inode->i_flags &= ~S_NOSEC;
        if (inode->i_opflags & IOP_XATTR) {
                error = __vfs_setxattr(idmap, dentry, inode, name, value,
                                       size, flags);
                if (!error) {
                        fsnotify_xattr(dentry);
                        security_inode_post_setxattr(dentry, name, value,
                                                     size, flags);
                }
        } else {
                if (unlikely(is_bad_inode(inode)))
                        return -EIO;
        }
        if (error == -EAGAIN) {
                error = -EOPNOTSUPP;

                if (issec) {
                        const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;

                        error = security_inode_setsecurity(inode, suffix, value,
                                                           size, flags);
                        if (!error)
                                fsnotify_xattr(dentry);
                }
        }

        return error;
}

/**
 * __vfs_setxattr_locked - set an extended attribute while holding the inode
 * lock
 *
 *  @idmap: idmap of the mount of the target inode
 *  @dentry: object to perform setxattr on
 *  @name: xattr name to set
 *  @value: value to set @name to
 *  @size: size of @value
 *  @flags: flags to pass into filesystem operations
 *  @delegated_inode: on return, will contain an inode pointer that
 *  a delegation was broken on, NULL if none.
 */
int
__vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
                      const char *name, const void *value, size_t size,
                      int flags, struct inode **delegated_inode)
{
        struct inode *inode = dentry->d_inode;
        int error;

        error = xattr_permission(idmap, inode, name, MAY_WRITE);
        if (error)
                return error;

        error = security_inode_setxattr(idmap, dentry, name, value, size,
                                        flags);
        if (error)
                goto out;

        error = try_break_deleg(inode, delegated_inode);
        if (error)
                goto out;

        error = __vfs_setxattr_noperm(idmap, dentry, name, value,
                                      size, flags);

out:
        return error;
}
EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);

int
vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
             const char *name, const void *value, size_t size, int flags)
{
        struct inode *inode = dentry->d_inode;
        struct inode *delegated_inode = NULL;
        const void  *orig_value = value;
        int error;

        if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
                error = cap_convert_nscap(idmap, dentry, &value, size);
                if (error < 0)
                        return error;
                size = error;
        }

retry_deleg:
        inode_lock(inode);
        error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
                                      flags, &delegated_inode);
        inode_unlock(inode);

        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }
        if (value != orig_value)
                kfree(value);

        return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);

static ssize_t
xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
                  const char *name, void *value, size_t size)
{
        void *buffer = NULL;
        ssize_t len;

        if (!value || !size) {
                len = security_inode_getsecurity(idmap, inode, name,
                                                 &buffer, false);
                goto out_noalloc;
        }

        len = security_inode_getsecurity(idmap, inode, name, &buffer,
                                         true);
        if (len < 0)
                return len;
        if (size < len) {
                len = -ERANGE;
                goto out;
        }
        memcpy(value, buffer, len);
out:
        kfree(buffer);
out_noalloc:
        return len;
}

/*
 * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
 *
 * Allocate memory, if not already allocated, or re-allocate correct size,
 * before retrieving the extended attribute.  The xattr value buffer should
 * always be freed by the caller, even on error.
 *
 * Returns the result of alloc, if failed, or the getxattr operation.
 */
int
vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
                   const char *name, char **xattr_value, size_t xattr_size,
                   gfp_t flags)
{
        const struct xattr_handler *handler;
        struct inode *inode = dentry->d_inode;
        char *value = *xattr_value;
        int error;

        error = xattr_permission(idmap, inode, name, MAY_READ);
        if (error)
                return error;

        handler = xattr_resolve_name(inode, &name);
        if (IS_ERR(handler))
                return PTR_ERR(handler);
        if (!handler->get)
                return -EOPNOTSUPP;
        error = handler->get(handler, dentry, inode, name, NULL, 0);
        if (error < 0)
                return error;

        if (!value || (error > xattr_size)) {
                value = krealloc(*xattr_value, error + 1, flags);
                if (!value)
                        return -ENOMEM;
                memset(value, 0, error + 1);
        }

        error = handler->get(handler, dentry, inode, name, value, error);
        *xattr_value = value;
        return error;
}

ssize_t
__vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
               void *value, size_t size)
{
        const struct xattr_handler *handler;

        if (is_posix_acl_xattr(name))
                return -EOPNOTSUPP;

        handler = xattr_resolve_name(inode, &name);
        if (IS_ERR(handler))
                return PTR_ERR(handler);
        if (!handler->get)
                return -EOPNOTSUPP;
        return handler->get(handler, dentry, inode, name, value, size);
}
EXPORT_SYMBOL(__vfs_getxattr);

ssize_t
vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
             const char *name, void *value, size_t size)
{
        struct inode *inode = dentry->d_inode;
        int error;

        error = xattr_permission(idmap, inode, name, MAY_READ);
        if (error)
                return error;

        error = security_inode_getxattr(dentry, name);
        if (error)
                return error;

        if (!strncmp(name, XATTR_SECURITY_PREFIX,
                                XATTR_SECURITY_PREFIX_LEN)) {
                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
                int ret = xattr_getsecurity(idmap, inode, suffix, value,
                                            size);
                /*
                 * Only overwrite the return value if a security module
                 * is actually active.
                 */
                if (ret == -EOPNOTSUPP)
                        goto nolsm;
                return ret;
        }
nolsm:
        return __vfs_getxattr(dentry, inode, name, value, size);
}
EXPORT_SYMBOL_GPL(vfs_getxattr);

/**
 * vfs_listxattr - retrieve \0 separated list of xattr names
 * @dentry: the dentry from whose inode the xattr names are retrieved
 * @list: buffer to store xattr names into
 * @size: size of the buffer
 *
 * This function returns the names of all xattrs associated with the
 * inode of @dentry.
 *
 * Note, for legacy reasons the vfs_listxattr() function lists POSIX
 * ACLs as well. Since POSIX ACLs are decoupled from IOP_XATTR the
 * vfs_listxattr() function doesn't check for this flag since a
 * filesystem could implement POSIX ACLs without implementing any other
 * xattrs.
 *
 * However, since all codepaths that remove IOP_XATTR also assign of
 * inode operations that either don't implement or implement a stub
 * ->listxattr() operation.
 *
 * Return: On success, the size of the buffer that was used. On error a
 *         negative error code.
 */
ssize_t
vfs_listxattr(struct dentry *dentry, char *list, size_t size)
{
        struct inode *inode = d_inode(dentry);
        ssize_t error;

        error = security_inode_listxattr(dentry);
        if (error)
                return error;

        if (inode->i_op->listxattr) {
                error = inode->i_op->listxattr(dentry, list, size);
        } else {
                error = security_inode_listsecurity(inode, list, size);
                if (size && error > size)
                        error = -ERANGE;
        }
        return error;
}
EXPORT_SYMBOL_GPL(vfs_listxattr);

int
__vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
                  const char *name)
{
        struct inode *inode = d_inode(dentry);
        const struct xattr_handler *handler;

        if (is_posix_acl_xattr(name))
                return -EOPNOTSUPP;

        handler = xattr_resolve_name(inode, &name);
        if (IS_ERR(handler))
                return PTR_ERR(handler);
        if (!handler->set)
                return -EOPNOTSUPP;
        return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
                            XATTR_REPLACE);
}
EXPORT_SYMBOL(__vfs_removexattr);

/**
 * __vfs_removexattr_locked - set an extended attribute while holding the inode
 * lock
 *
 *  @idmap: idmap of the mount of the target inode
 *  @dentry: object to perform setxattr on
 *  @name: name of xattr to remove
 *  @delegated_inode: on return, will contain an inode pointer that
 *  a delegation was broken on, NULL if none.
 */
int
__vfs_removexattr_locked(struct mnt_idmap *idmap,
                         struct dentry *dentry, const char *name,
                         struct inode **delegated_inode)
{
        struct inode *inode = dentry->d_inode;
        int error;

        error = xattr_permission(idmap, inode, name, MAY_WRITE);
        if (error)
                return error;

        error = security_inode_removexattr(idmap, dentry, name);
        if (error)
                goto out;

        error = try_break_deleg(inode, delegated_inode);
        if (error)
                goto out;

        error = __vfs_removexattr(idmap, dentry, name);
        if (error)
                return error;

        fsnotify_xattr(dentry);
        security_inode_post_removexattr(dentry, name);

out:
        return error;
}
EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);

int
vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
                const char *name)
{
        struct inode *inode = dentry->d_inode;
        struct inode *delegated_inode = NULL;
        int error;

retry_deleg:
        inode_lock(inode);
        error = __vfs_removexattr_locked(idmap, dentry,
                                         name, &delegated_inode);
        inode_unlock(inode);

        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
                if (!error)
                        goto retry_deleg;
        }

        return error;
}
EXPORT_SYMBOL_GPL(vfs_removexattr);

/*
 * Extended attribute SET operations
 */

int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
{
        int error;

        if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
                return -EINVAL;

        error = strncpy_from_user(ctx->kname->name, name,
                                sizeof(ctx->kname->name));
        if (error == 0 || error == sizeof(ctx->kname->name))
                return  -ERANGE;
        if (error < 0)
                return error;

        error = 0;
        if (ctx->size) {
                if (ctx->size > XATTR_SIZE_MAX)
                        return -E2BIG;

                ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
                if (IS_ERR(ctx->kvalue)) {
                        error = PTR_ERR(ctx->kvalue);
                        ctx->kvalue = NULL;
                }
        }

        return error;
}

int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
                struct xattr_ctx *ctx)
{
        if (is_posix_acl_xattr(ctx->kname->name))
                return do_set_acl(idmap, dentry, ctx->kname->name,
                                  ctx->kvalue, ctx->size);

        return vfs_setxattr(idmap, dentry, ctx->kname->name,
                        ctx->kvalue, ctx->size, ctx->flags);
}

static long
setxattr(struct mnt_idmap *idmap, struct dentry *d,
        const char __user *name, const void __user *value, size_t size,
        int flags)
{
        struct xattr_name kname;
        struct xattr_ctx ctx = {
                .cvalue   = value,
                .kvalue   = NULL,
                .size     = size,
                .kname    = &kname,
                .flags    = flags,
        };
        int error;

        error = setxattr_copy(name, &ctx);
        if (error)
                return error;

        error = do_setxattr(idmap, d, &ctx);

        kvfree(ctx.kvalue);
        return error;
}

static int path_setxattr(const char __user *pathname,
                         const char __user *name, const void __user *value,
                         size_t size, int flags, unsigned int lookup_flags)
{
        struct path path;
        int error;

retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
        error = mnt_want_write(path.mnt);
        if (!error) {
                error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
                                 value, size, flags);
                mnt_drop_write(path.mnt);
        }
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
        return error;
}

SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
                const char __user *, name, const void __user *, value,
                size_t, size, int, flags)
{
        return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW);
}

SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
                const char __user *, name, const void __user *, value,
                size_t, size, int, flags)
{
        return path_setxattr(pathname, name, value, size, flags, 0);
}

SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
                const void __user *,value, size_t, size, int, flags)
{
        struct fd f = fdget(fd);
        int error = -EBADF;

        if (!f.file)
                return error;
        audit_file(f.file);
        error = mnt_want_write_file(f.file);
        if (!error) {
                error = setxattr(file_mnt_idmap(f.file),
                                 f.file->f_path.dentry, name,
                                 value, size, flags);
                mnt_drop_write_file(f.file);
        }
        fdput(f);
        return error;
}

/*
 * Extended attribute GET operations
 */
ssize_t
do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
        struct xattr_ctx *ctx)
{
        ssize_t error;
        char *kname = ctx->kname->name;

        if (ctx->size) {
                if (ctx->size > XATTR_SIZE_MAX)
                        ctx->size = XATTR_SIZE_MAX;
                ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
                if (!ctx->kvalue)
                        return -ENOMEM;
        }

        if (is_posix_acl_xattr(ctx->kname->name))
                error = do_get_acl(idmap, d, kname, ctx->kvalue, ctx->size);
        else
                error = vfs_getxattr(idmap, d, kname, ctx->kvalue, ctx->size);
        if (error > 0) {
                if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
                        error = -EFAULT;
        } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
                /* The file system tried to returned a value bigger
                   than XATTR_SIZE_MAX bytes. Not possible. */
                error = -E2BIG;
        }

        return error;
}

static ssize_t
getxattr(struct mnt_idmap *idmap, struct dentry *d,
         const char __user *name, void __user *value, size_t size)
{
        ssize_t error;
        struct xattr_name kname;
        struct xattr_ctx ctx = {
                .value    = value,
                .kvalue   = NULL,
                .size     = size,
                .kname    = &kname,
                .flags    = 0,
        };

        error = strncpy_from_user(kname.name, name, sizeof(kname.name));
        if (error == 0 || error == sizeof(kname.name))
                error = -ERANGE;
        if (error < 0)
                return error;

        error =  do_getxattr(idmap, d, &ctx);

        kvfree(ctx.kvalue);
        return error;
}

static ssize_t path_getxattr(const char __user *pathname,
                             const char __user *name, void __user *value,
                             size_t size, unsigned int lookup_flags)
{
        struct path path;
        ssize_t error;
retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
        error = getxattr(mnt_idmap(path.mnt), path.dentry, name, value, size);
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
        return error;
}

SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
                const char __user *, name, void __user *, value, size_t, size)
{
        return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW);
}

SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
                const char __user *, name, void __user *, value, size_t, size)
{
        return path_getxattr(pathname, name, value, size, 0);
}

SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
                void __user *, value, size_t, size)
{
        struct fd f = fdget(fd);
        ssize_t error = -EBADF;

        if (!f.file)
                return error;
        audit_file(f.file);
        error = getxattr(file_mnt_idmap(f.file), f.file->f_path.dentry,
                         name, value, size);
        fdput(f);
        return error;
}

/*
 * Extended attribute LIST operations
 */
static ssize_t
listxattr(struct dentry *d, char __user *list, size_t size)
{
        ssize_t error;
        char *klist = NULL;

        if (size) {
                if (size > XATTR_LIST_MAX)
                        size = XATTR_LIST_MAX;
                klist = kvmalloc(size, GFP_KERNEL);
                if (!klist)
                        return -ENOMEM;
        }

        error = vfs_listxattr(d, klist, size);
        if (error > 0) {
                if (size && copy_to_user(list, klist, error))
                        error = -EFAULT;
        } else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
                /* The file system tried to returned a list bigger
                   than XATTR_LIST_MAX bytes. Not possible. */
                error = -E2BIG;
        }

        kvfree(klist);

        return error;
}

static ssize_t path_listxattr(const char __user *pathname, char __user *list,
                              size_t size, unsigned int lookup_flags)
{
        struct path path;
        ssize_t error;
retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
        error = listxattr(path.dentry, list, size);
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
        return error;
}

SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
                size_t, size)
{
        return path_listxattr(pathname, list, size, LOOKUP_FOLLOW);
}

SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
                size_t, size)
{
        return path_listxattr(pathname, list, size, 0);
}

SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
{
        struct fd f = fdget(fd);
        ssize_t error = -EBADF;

        if (!f.file)
                return error;
        audit_file(f.file);
        error = listxattr(f.file->f_path.dentry, list, size);
        fdput(f);
        return error;
}

/*
 * Extended attribute REMOVE operations
 */
static long
removexattr(struct mnt_idmap *idmap, struct dentry *d,
            const char __user *name)
{
        int error;
        char kname[XATTR_NAME_MAX + 1];

        error = strncpy_from_user(kname, name, sizeof(kname));
        if (error == 0 || error == sizeof(kname))
                error = -ERANGE;
        if (error < 0)
                return error;

        if (is_posix_acl_xattr(kname))
                return vfs_remove_acl(idmap, d, kname);

        return vfs_removexattr(idmap, d, kname);
}

static int path_removexattr(const char __user *pathname,
                            const char __user *name, unsigned int lookup_flags)
{
        struct path path;
        int error;
retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
        error = mnt_want_write(path.mnt);
        if (!error) {
                error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
                mnt_drop_write(path.mnt);
        }
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
                goto retry;
        }
        return error;
}

SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
                const char __user *, name)
{
        return path_removexattr(pathname, name, LOOKUP_FOLLOW);
}

SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
                const char __user *, name)
{
        return path_removexattr(pathname, name, 0);
}

SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
{
        struct fd f = fdget(fd);
        int error = -EBADF;

        if (!f.file)
                return error;
        audit_file(f.file);
        error = mnt_want_write_file(f.file);
        if (!error) {
                error = removexattr(file_mnt_idmap(f.file),
                                    f.file->f_path.dentry, name);
                mnt_drop_write_file(f.file);
        }
        fdput(f);
        return error;
}

int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
{
        size_t len;

        len = strlen(name) + 1;
        if (*buffer) {
                if (*remaining_size < len)
                        return -ERANGE;
                memcpy(*buffer, name, len);
                *buffer += len;
        }
        *remaining_size -= len;
        return 0;
}

/**
 * generic_listxattr - run through a dentry's xattr list() operations
 * @dentry: dentry to list the xattrs
 * @buffer: result buffer
 * @buffer_size: size of @buffer
 *
 * Combine the results of the list() operation from every xattr_handler in the
 * xattr_handler stack.
 *
 * Note that this will not include the entries for POSIX ACLs.
 */
ssize_t
generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
        const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr;
        ssize_t remaining_size = buffer_size;
        int err = 0;

        for_each_xattr_handler(handlers, handler) {
                if (!handler->name || (handler->list && !handler->list(dentry)))
                        continue;
                err = xattr_list_one(&buffer, &remaining_size, handler->name);
                if (err)
                        return err;
        }

        return err ? err : buffer_size - remaining_size;
}
EXPORT_SYMBOL(generic_listxattr);

/**
 * xattr_full_name  -  Compute full attribute name from suffix
 *
 * @handler:        handler of the xattr_handler operation
 * @name:        name passed to the xattr_handler operation
 *
 * The get and set xattr handler operations are called with the remainder of
 * the attribute name after skipping the handler's prefix: for example, "foo"
 * is passed to the get operation of a handler with prefix "user." to get
 * attribute "user.foo".  The full name is still "there" in the name though.
 *
 * Note: the list xattr handler operation when called from the vfs is passed a
 * NULL name; some file systems use this operation internally, with varying
 * semantics.
 */
const char *xattr_full_name(const struct xattr_handler *handler,
                            const char *name)
{
        size_t prefix_len = strlen(xattr_prefix(handler));

        return name - prefix_len;
}
EXPORT_SYMBOL(xattr_full_name);

/**
 * simple_xattr_space - estimate the memory used by a simple xattr
 * @name: the full name of the xattr
 * @size: the size of its value
 *
 * This takes no account of how much larger the two slab objects actually are:
 * that would depend on the slab implementation, when what is required is a
 * deterministic number, which grows with name length and size and quantity.
 *
 * Return: The approximate number of bytes of memory used by such an xattr.
 */
size_t simple_xattr_space(const char *name, size_t size)
{
        /*
         * Use "40" instead of sizeof(struct simple_xattr), to return the
         * same result on 32-bit and 64-bit, and even if simple_xattr grows.
         */
        return 40 + size + strlen(name);
}

/**
 * simple_xattr_free - free an xattr object
 * @xattr: the xattr object
 *
 * Free the xattr object. Can handle @xattr being NULL.
 */
void simple_xattr_free(struct simple_xattr *xattr)
{
        if (xattr)
                kfree(xattr->name);
        kvfree(xattr);
}

/**
 * simple_xattr_alloc - allocate new xattr object
 * @value: value of the xattr object
 * @size: size of @value
 *
 * Allocate a new xattr object and initialize respective members. The caller is
 * responsible for handling the name of the xattr.
 *
 * Return: On success a new xattr object is returned. On failure NULL is
 * returned.
 */
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
{
        struct simple_xattr *new_xattr;
        size_t len;

        /* wrap around? */
        len = sizeof(*new_xattr) + size;
        if (len < sizeof(*new_xattr))
                return NULL;

        new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
        if (!new_xattr)
                return NULL;

        new_xattr->size = size;
        memcpy(new_xattr->value, value, size);
        return new_xattr;
}

/**
 * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
 * @key: xattr name
 * @node: current node
 *
 * Compare the xattr name with the xattr name attached to @node in the rbtree.
 *
 * Return: Negative value if continuing left, positive if continuing right, 0
 * if the xattr attached to @node matches @key.
 */
static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
{
        const char *xattr_name = key;
        const struct simple_xattr *xattr;

        xattr = rb_entry(node, struct simple_xattr, rb_node);
        return strcmp(xattr->name, xattr_name);
}

/**
 * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
 * @new_node: new node
 * @node: current node
 *
 * Compare the xattr attached to @new_node with the xattr attached to @node.
 *
 * Return: Negative value if continuing left, positive if continuing right, 0
 * if the xattr attached to @new_node matches the xattr attached to @node.
 */
static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
                                        const struct rb_node *node)
{
        struct simple_xattr *xattr;
        xattr = rb_entry(new_node, struct simple_xattr, rb_node);
        return rbtree_simple_xattr_cmp(xattr->name, node);
}

/**
 * simple_xattr_get - get an xattr object
 * @xattrs: the header of the xattr object
 * @name: the name of the xattr to retrieve
 * @buffer: the buffer to store the value into
 * @size: the size of @buffer
 *
 * Try to find and retrieve the xattr object associated with @name.
 * If @buffer is provided store the value of @xattr in @buffer
 * otherwise just return the length. The size of @buffer is limited
 * to XATTR_SIZE_MAX which currently is 65536.
 *
 * Return: On success the length of the xattr value is returned. On error a
 * negative error code is returned.
 */
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
                     void *buffer, size_t size)
{
        struct simple_xattr *xattr = NULL;
        struct rb_node *rbp;
        int ret = -ENODATA;

        read_lock(&xattrs->lock);
        rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
        if (rbp) {
                xattr = rb_entry(rbp, struct simple_xattr, rb_node);
                ret = xattr->size;
                if (buffer) {
                        if (size < xattr->size)
                                ret = -ERANGE;
                        else
                                memcpy(buffer, xattr->value, xattr->size);
                }
        }
        read_unlock(&xattrs->lock);
        return ret;
}

/**
 * simple_xattr_set - set an xattr object
 * @xattrs: the header of the xattr object
 * @name: the name of the xattr to retrieve
 * @value: the value to store along the xattr
 * @size: the size of @value
 * @flags: the flags determining how to set the xattr
 *
 * Set a new xattr object.
 * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
 * is specified in @flags a matching xattr object for @name must already exist.
 * If it does it will be replaced with the new xattr object. If it doesn't we
 * fail. If XATTR_CREATE is specified and a matching xattr does already exist
 * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
 * insert the new xattr replacing any existing one.
 *
 * If @value is empty and a matching xattr object is found we delete it if
 * XATTR_REPLACE is specified in @flags or @flags is zero.
 *
 * If @value is empty and no matching xattr object for @name is found we do
 * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
 * XATTR_REPLACE we fail as mentioned above.
 *
 * Return: On success, the removed or replaced xattr is returned, to be freed
 * by the caller; or NULL if none. On failure a negative error code is returned.
 */
struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
                                      const char *name, const void *value,
                                      size_t size, int flags)
{
        struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
        struct rb_node *parent = NULL, **rbp;
        int err = 0, ret;

        /* value == NULL means remove */
        if (value) {
                new_xattr = simple_xattr_alloc(value, size);
                if (!new_xattr)
                        return ERR_PTR(-ENOMEM);

                new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
                if (!new_xattr->name) {
                        simple_xattr_free(new_xattr);
                        return ERR_PTR(-ENOMEM);
                }
        }

        write_lock(&xattrs->lock);
        rbp = &xattrs->rb_root.rb_node;
        while (*rbp) {
                parent = *rbp;
                ret = rbtree_simple_xattr_cmp(name, *rbp);
                if (ret < 0)
                        rbp = &(*rbp)->rb_left;
                else if (ret > 0)
                        rbp = &(*rbp)->rb_right;
                else
                        old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
                if (old_xattr)
                        break;
        }

        if (old_xattr) {
                /* Fail if XATTR_CREATE is requested and the xattr exists. */
                if (flags & XATTR_CREATE) {
                        err = -EEXIST;
                        goto out_unlock;
                }

                if (new_xattr)
                        rb_replace_node(&old_xattr->rb_node,
                                        &new_xattr->rb_node, &xattrs->rb_root);
                else
                        rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
        } else {
                /* Fail if XATTR_REPLACE is requested but no xattr is found. */
                if (flags & XATTR_REPLACE) {
                        err = -ENODATA;
                        goto out_unlock;
                }

                /*
                 * If XATTR_CREATE or no flags are specified together with a
                 * new value simply insert it.
                 */
                if (new_xattr) {
                        rb_link_node(&new_xattr->rb_node, parent, rbp);
                        rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
                }

                /*
                 * If XATTR_CREATE or no flags are specified and neither an
                 * old or new xattr exist then we don't need to do anything.
                 */
        }

out_unlock:
        write_unlock(&xattrs->lock);
        if (!err)
                return old_xattr;
        simple_xattr_free(new_xattr);
        return ERR_PTR(err);
}

static bool xattr_is_trusted(const char *name)
{
        return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}

/**
 * simple_xattr_list - list all xattr objects
 * @inode: inode from which to get the xattrs
 * @xattrs: the header of the xattr object
 * @buffer: the buffer to store all xattrs into
 * @size: the size of @buffer
 *
 * List all xattrs associated with @inode. If @buffer is NULL we returned
 * the required size of the buffer. If @buffer is provided we store the
 * xattrs value into it provided it is big enough.
 *
 * Note, the number of xattr names that can be listed with listxattr(2) is
 * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
 * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
 * are found it will return -E2BIG.
 *
 * Return: On success the required size or the size of the copied xattrs is
 * returned. On error a negative error code is returned.
 */
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
                          char *buffer, size_t size)
{
        bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
        struct simple_xattr *xattr;
        struct rb_node *rbp;
        ssize_t remaining_size = size;
        int err = 0;

        err = posix_acl_listxattr(inode, &buffer, &remaining_size);
        if (err)
                return err;

        read_lock(&xattrs->lock);
        for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
                xattr = rb_entry(rbp, struct simple_xattr, rb_node);

                /* skip "trusted." attributes for unprivileged callers */
                if (!trusted && xattr_is_trusted(xattr->name))
                        continue;

                err = xattr_list_one(&buffer, &remaining_size, xattr->name);
                if (err)
                        break;
        }
        read_unlock(&xattrs->lock);

        return err ? err : size - remaining_size;
}

/**
 * rbtree_simple_xattr_less - compare two xattr rbtree nodes
 * @new_node: new node
 * @node: current node
 *
 * Compare the xattr attached to @new_node with the xattr attached to @node.
 * Note that this function technically tolerates duplicate entries.
 *
 * Return: True if insertion point in the rbtree is found.
 */
static bool rbtree_simple_xattr_less(struct rb_node *new_node,
                                     const struct rb_node *node)
{
        return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
}

/**
 * simple_xattr_add - add xattr objects
 * @xattrs: the header of the xattr object
 * @new_xattr: the xattr object to add
 *
 * Add an xattr object to @xattrs. This assumes no replacement or removal
 * of matching xattrs is wanted. Should only be called during inode
 * initialization when a few distinct initial xattrs are supposed to be set.
 */
void simple_xattr_add(struct simple_xattrs *xattrs,
                      struct simple_xattr *new_xattr)
{
        write_lock(&xattrs->lock);
        rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
        write_unlock(&xattrs->lock);
}

/**
 * simple_xattrs_init - initialize new xattr header
 * @xattrs: header to initialize
 *
 * Initialize relevant fields of a an xattr header.
 */
void simple_xattrs_init(struct simple_xattrs *xattrs)
{
        xattrs->rb_root = RB_ROOT;
        rwlock_init(&xattrs->lock);
}

/**
 * simple_xattrs_free - free xattrs
 * @xattrs: xattr header whose xattrs to destroy
 * @freed_space: approximate number of bytes of memory freed from @xattrs
 *
 * Destroy all xattrs in @xattr. When this is called no one can hold a
 * reference to any of the xattrs anymore.
 */
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
{
        struct rb_node *rbp;

        if (freed_space)
                *freed_space = 0;
        rbp = rb_first(&xattrs->rb_root);
        while (rbp) {
                struct simple_xattr *xattr;
                struct rb_node *rbp_next;

                rbp_next = rb_next(rbp);
                xattr = rb_entry(rbp, struct simple_xattr, rb_node);
                rb_erase(&xattr->rb_node, &xattrs->rb_root);
                if (freed_space)
                        *freed_space += simple_xattr_space(xattr->name,
                                                           xattr->size);
                simple_xattr_free(xattr);
                rbp = rbp_next;
        }
}





















  298 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TIMEKEEPING_INTERNAL_H
#define _TIMEKEEPING_INTERNAL_H

#include <linux/clocksource.h>
#include <linux/spinlock.h>
#include <linux/time.h>

/*
 * timekeeping debug functions
 */
#ifdef CONFIG_DEBUG_FS
extern void tk_debug_account_sleep_time(const struct timespec64 *t);
#else
#define tk_debug_account_sleep_time(x)
#endif

#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
{
        u64 ret = (now - last) & mask;

        /*
         * Prevent time going backwards by checking the MSB of mask in
         * the result. If set, return 0.
         */
        return ret & ~(mask >> 1) ? 0 : ret;
}
#else
static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
{
        return (now - last) & mask;
}
#endif

/* Semi public for serialization of non timekeeper VDSO updates. */
extern raw_spinlock_t timekeeper_lock;

#endif /* _TIMEKEEPING_INTERNAL_H */























































































































































































































   20 
    1 

































  492 








































   20 





  369 





    2 







  369 












































































































































































































































































   97 


















   79 





































































































   28 











  256 














































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FORTIFY_STRING_H_
#define _LINUX_FORTIFY_STRING_H_

#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/const.h>
#include <linux/limits.h>

#define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable
#define __RENAME(x) __asm__(#x)

#define FORTIFY_REASON_DIR(r)                FIELD_GET(BIT(0), r)
#define FORTIFY_REASON_FUNC(r)                FIELD_GET(GENMASK(7, 1), r)
#define FORTIFY_REASON(func, write)        (FIELD_PREP(BIT(0), write) | \
                                         FIELD_PREP(GENMASK(7, 1), func))

#ifndef fortify_panic
# define fortify_panic(func, write, avail, size, retfail)        \
         __fortify_panic(FORTIFY_REASON(func, write), avail, size)
#endif

#define FORTIFY_READ                 0
#define FORTIFY_WRITE                 1

#define EACH_FORTIFY_FUNC(macro)        \
        macro(strncpy),                        \
        macro(strnlen),                        \
        macro(strlen),                        \
        macro(strscpy),                        \
        macro(strlcat),                        \
        macro(strcat),                        \
        macro(strncat),                        \
        macro(memset),                        \
        macro(memcpy),                        \
        macro(memmove),                        \
        macro(memscan),                        \
        macro(memcmp),                        \
        macro(memchr),                        \
        macro(memchr_inv),                \
        macro(kmemdup),                        \
        macro(strcpy),                        \
        macro(UNKNOWN),

#define MAKE_FORTIFY_FUNC(func)        FORTIFY_FUNC_##func

enum fortify_func {
        EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC)
};

void __fortify_report(const u8 reason, const size_t avail, const size_t size);
void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn;
void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)");
void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?");
void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)");
void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("detected write beyond size of field (1st parameter); maybe use struct_group()?");

#define __compiletime_strlen(p)                                        \
({                                                                \
        char *__p = (char *)(p);                                \
        size_t __ret = SIZE_MAX;                                \
        const size_t __p_size = __member_size(p);                \
        if (__p_size != SIZE_MAX &&                                \
            __builtin_constant_p(*__p)) {                        \
                size_t __p_len = __p_size - 1;                        \
                if (__builtin_constant_p(__p[__p_len]) &&        \
                    __p[__p_len] == '\0')                        \
                        __ret = __builtin_strlen(__p);                \
        }                                                        \
        __ret;                                                        \
})

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
#else

#if defined(__SANITIZE_MEMORY__)
/*
 * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the
 * corresponding __msan_XXX functions.
 */
#include <linux/kmsan_string.h>
#define __underlying_memcpy        __msan_memcpy
#define __underlying_memmove        __msan_memmove
#define __underlying_memset        __msan_memset
#else
#define __underlying_memcpy        __builtin_memcpy
#define __underlying_memmove        __builtin_memmove
#define __underlying_memset        __builtin_memset
#endif

#define __underlying_memchr        __builtin_memchr
#define __underlying_memcmp        __builtin_memcmp
#define __underlying_strcat        __builtin_strcat
#define __underlying_strcpy        __builtin_strcpy
#define __underlying_strlen        __builtin_strlen
#define __underlying_strncat        __builtin_strncat
#define __underlying_strncpy        __builtin_strncpy
#endif

/**
 * unsafe_memcpy - memcpy implementation with no FORTIFY bounds checking
 *
 * @dst: Destination memory address to write to
 * @src: Source memory address to read from
 * @bytes: How many bytes to write to @dst from @src
 * @justification: Free-form text or comment describing why the use is needed
 *
 * This should be used for corner cases where the compiler cannot do the
 * right thing, or during transitions between APIs, etc. It should be used
 * very rarely, and includes a place for justification detailing where bounds
 * checking has happened, and why existing solutions cannot be employed.
 */
#define unsafe_memcpy(dst, src, bytes, justification)                \
        __underlying_memcpy(dst, src, bytes)

/*
 * Clang's use of __builtin_*object_size() within inlines needs hinting via
 * __pass_*object_size(). The preference is to only ever use type 1 (member
 * size, rather than struct size), but there remain some stragglers using
 * type 0 that will be converted in the future.
 */
#if __has_builtin(__builtin_dynamic_object_size)
#define POS                        __pass_dynamic_object_size(1)
#define POS0                        __pass_dynamic_object_size(0)
#else
#define POS                        __pass_object_size(1)
#define POS0                        __pass_object_size(0)
#endif

#define __compiletime_lessthan(bounds, length)        (        \
        __builtin_constant_p((bounds) < (length)) &&        \
        (bounds) < (length)                                \
)

/**
 * strncpy - Copy a string to memory with non-guaranteed NUL padding
 *
 * @p: pointer to destination of copy
 * @q: pointer to NUL-terminated source string to copy
 * @size: bytes to write at @p
 *
 * If strlen(@q) >= @size, the copy of @q will stop after @size bytes,
 * and @p will NOT be NUL-terminated
 *
 * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes
 * will be written to @p until @size total bytes have been written.
 *
 * Do not use this function. While FORTIFY_SOURCE tries to avoid
 * over-reads of @q, it cannot defend against writing unterminated
 * results to @p. Using strncpy() remains ambiguous and fragile.
 * Instead, please choose an alternative, so that the expectation
 * of @p's contents is unambiguous:
 *
 * +--------------------+--------------------+------------+
 * | **p** needs to be: | padded to **size** | not padded |
 * +====================+====================+============+
 * |     NUL-terminated | strscpy_pad()      | strscpy()  |
 * +--------------------+--------------------+------------+
 * | not NUL-terminated | strtomem_pad()     | strtomem() |
 * +--------------------+--------------------+------------+
 *
 * Note strscpy*()'s differing return values for detecting truncation,
 * and strtomem*()'s expectation that the destination is marked with
 * __nonstring when it is a character array.
 *
 */
__FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3)
char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
{
        const size_t p_size = __member_size(p);

        if (__compiletime_lessthan(p_size, size))
                __write_overflow();
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p);
        return __underlying_strncpy(p, q, size);
}

extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
/**
 * strnlen - Return bounded count of characters in a NUL-terminated string
 *
 * @p: pointer to NUL-terminated string to count.
 * @maxlen: maximum number of characters to count.
 *
 * Returns number of characters in @p (NOT including the final NUL), or
 * @maxlen, if no NUL has been found up to there.
 *
 */
__FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen)
{
        const size_t p_size = __member_size(p);
        const size_t p_len = __compiletime_strlen(p);
        size_t ret;

        /* We can take compile-time actions when maxlen is const. */
        if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) {
                /* If p is const, we can use its compile-time-known len. */
                if (maxlen >= p_size)
                        return p_len;
        }

        /* Do not check characters beyond the end of p. */
        ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
        if (p_size <= ret && maxlen != ret)
                fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret);
        return ret;
}

/*
 * Defined after fortified strnlen to reuse it. However, it must still be
 * possible for strlen() to be used on compile-time strings for use in
 * static initializers (i.e. as a constant expression).
 */
/**
 * strlen - Return count of characters in a NUL-terminated string
 *
 * @p: pointer to NUL-terminated string to count.
 *
 * Do not use this function unless the string length is known at
 * compile-time. When @p is unterminated, this function may crash
 * or return unexpected counts that could lead to memory content
 * exposures. Prefer strnlen().
 *
 * Returns number of characters in @p (NOT including the final NUL).
 *
 */
#define strlen(p)                                                        \
        __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)),        \
                __builtin_strlen(p), __fortify_strlen(p))
__FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1)
__kernel_size_t __fortify_strlen(const char * const POS p)
{
        const size_t p_size = __member_size(p);
        __kernel_size_t ret;

        /* Give up if we don't know how large p is. */
        if (p_size == SIZE_MAX)
                return __underlying_strlen(p);
        ret = strnlen(p, p_size);
        if (p_size <= ret)
                fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret);
        return ret;
}

/* Defined after fortified strnlen() to reuse it. */
extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy);
__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size)
{
        /* Use string size rather than possible enclosing struct size. */
        const size_t p_size = __member_size(p);
        const size_t q_size = __member_size(q);
        size_t len;

        /* If we cannot get size of p and q default to call strscpy. */
        if (p_size == SIZE_MAX && q_size == SIZE_MAX)
                return __real_strscpy(p, q, size);

        /*
         * If size can be known at compile time and is greater than
         * p_size, generate a compile time write overflow error.
         */
        if (__compiletime_lessthan(p_size, size))
                __write_overflow();

        /* Short-circuit for compile-time known-safe lengths. */
        if (__compiletime_lessthan(p_size, SIZE_MAX)) {
                len = __compiletime_strlen(q);

                if (len < SIZE_MAX && __compiletime_lessthan(len, size)) {
                        __underlying_memcpy(p, q, len + 1);
                        return len;
                }
        }

        /*
         * This call protects from read overflow, because len will default to q
         * length if it smaller than size.
         */
        len = strnlen(q, size);
        /*
         * If len equals size, we will copy only size bytes which leads to
         * -E2BIG being returned.
         * Otherwise we will copy len + 1 because of the final '\O'.
         */
        len = len == size ? size : len + 1;

        /*
         * Generate a runtime write overflow error if len is greater than
         * p_size.
         */
        if (p_size < len)
                fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG);

        /*
         * We can now safely call vanilla strscpy because we are protected from:
         * 1. Read overflow thanks to call to strnlen().
         * 2. Write overflow thanks to above ifs.
         */
        return __real_strscpy(p, q, len);
}

/* Defined after fortified strlen() to reuse it. */
extern size_t __real_strlcat(char *p, const char *q, size_t avail) __RENAME(strlcat);
/**
 * strlcat - Append a string to an existing string
 *
 * @p: pointer to %NUL-terminated string to append to
 * @q: pointer to %NUL-terminated string to append from
 * @avail: Maximum bytes available in @p
 *
 * Appends %NUL-terminated string @q after the %NUL-terminated
 * string at @p, but will not write beyond @avail bytes total,
 * potentially truncating the copy from @q. @p will stay
 * %NUL-terminated only if a %NUL already existed within
 * the @avail bytes of @p. If so, the resulting number of
 * bytes copied from @q will be at most "@avail - strlen(@p) - 1".
 *
 * Do not use this function. While FORTIFY_SOURCE tries to avoid
 * read and write overflows, this is only possible when the sizes
 * of @p and @q are known to the compiler. Prefer building the
 * string with formatting, via scnprintf(), seq_buf, or similar.
 *
 * Returns total bytes that _would_ have been contained by @p
 * regardless of truncation, similar to snprintf(). If return
 * value is >= @avail, the string has been truncated.
 *
 */
__FORTIFY_INLINE
size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
{
        const size_t p_size = __member_size(p);
        const size_t q_size = __member_size(q);
        size_t p_len, copy_len;
        size_t actual, wanted;

        /* Give up immediately if both buffer sizes are unknown. */
        if (p_size == SIZE_MAX && q_size == SIZE_MAX)
                return __real_strlcat(p, q, avail);

        p_len = strnlen(p, avail);
        copy_len = strlen(q);
        wanted = actual = p_len + copy_len;

        /* Cannot append any more: report truncation. */
        if (avail <= p_len)
                return wanted;

        /* Give up if string is already overflowed. */
        if (p_size <= p_len)
                fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted);

        if (actual >= avail) {
                copy_len = avail - p_len - 1;
                actual = p_len + copy_len;
        }

        /* Give up if copy will overflow. */
        if (p_size <= actual)
                fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted);
        __underlying_memcpy(p + p_len, q, copy_len);
        p[actual] = '\0';

        return wanted;
}

/* Defined after fortified strlcat() to reuse it. */
/**
 * strcat - Append a string to an existing string
 *
 * @p: pointer to NUL-terminated string to append to
 * @q: pointer to NUL-terminated source string to append from
 *
 * Do not use this function. While FORTIFY_SOURCE tries to avoid
 * read and write overflows, this is only possible when the
 * destination buffer size is known to the compiler. Prefer
 * building the string with formatting, via scnprintf() or similar.
 * At the very least, use strncat().
 *
 * Returns @p.
 *
 */
__FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
char *strcat(char * const POS p, const char *q)
{
        const size_t p_size = __member_size(p);
        const size_t wanted = strlcat(p, q, p_size);

        if (p_size <= wanted)
                fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p);
        return p;
}

/**
 * strncat - Append a string to an existing string
 *
 * @p: pointer to NUL-terminated string to append to
 * @q: pointer to source string to append from
 * @count: Maximum bytes to read from @q
 *
 * Appends at most @count bytes from @q (stopping at the first
 * NUL byte) after the NUL-terminated string at @p. @p will be
 * NUL-terminated.
 *
 * Do not use this function. While FORTIFY_SOURCE tries to avoid
 * read and write overflows, this is only possible when the sizes
 * of @p and @q are known to the compiler. Prefer building the
 * string with formatting, via scnprintf() or similar.
 *
 * Returns @p.
 *
 */
/* Defined after fortified strlen() and strnlen() to reuse them. */
__FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3)
char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count)
{
        const size_t p_size = __member_size(p);
        const size_t q_size = __member_size(q);
        size_t p_len, copy_len, total;

        if (p_size == SIZE_MAX && q_size == SIZE_MAX)
                return __underlying_strncat(p, q, count);
        p_len = strlen(p);
        copy_len = strnlen(q, count);
        total = p_len + copy_len + 1;
        if (p_size < total)
                fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p);
        __underlying_memcpy(p + p_len, q, copy_len);
        p[p_len + copy_len] = '\0';
        return p;
}

__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size,
                                         const size_t p_size,
                                         const size_t p_size_field)
{
        if (__builtin_constant_p(size)) {
                /*
                 * Length argument is a constant expression, so we
                 * can perform compile-time bounds checking where
                 * buffer sizes are also known at compile time.
                 */

                /* Error when size is larger than enclosing struct. */
                if (__compiletime_lessthan(p_size_field, p_size) &&
                    __compiletime_lessthan(p_size, size))
                        __write_overflow();

                /* Warn when write size is larger than dest field. */
                if (__compiletime_lessthan(p_size_field, size))
                        __write_overflow_field(p_size_field, size);
        }
        /*
         * At this point, length argument may not be a constant expression,
         * so run-time bounds checking can be done where buffer sizes are
         * known. (This is not an "else" because the above checks may only
         * be compile-time warnings, and we want to still warn for run-time
         * overflows.)
         */

        /*
         * Always stop accesses beyond the struct that contains the
         * field, when the buffer's remaining size is known.
         * (The SIZE_MAX test is to optimize away checks where the buffer
         * lengths are unknown.)
         */
        if (p_size != SIZE_MAX && p_size < size)
                fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true);
        return false;
}

#define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({        \
        size_t __fortify_size = (size_t)(size);                                \
        fortify_memset_chk(__fortify_size, p_size, p_size_field),        \
        __underlying_memset(p, c, __fortify_size);                        \
})

/*
 * __struct_size() vs __member_size() must be captured here to avoid
 * evaluating argument side-effects further into the macro layers.
 */
#ifndef CONFIG_KMSAN
#define memset(p, c, s) __fortify_memset_chk(p, c, s,                        \
                __struct_size(p), __member_size(p))
#endif

/*
 * To make sure the compiler can enforce protection against buffer overflows,
 * memcpy(), memmove(), and memset() must not be used beyond individual
 * struct members. If you need to copy across multiple members, please use
 * struct_group() to create a named mirror of an anonymous struct union.
 * (e.g. see struct sk_buff.) Read overflow checking is currently only
 * done when a write overflow is also present, or when building with W=1.
 *
 * Mitigation coverage matrix
 *                                        Bounds checking at:
 *                                        +-------+-------+-------+-------+
 *                                        | Compile time  |   Run time    |
 * memcpy() argument sizes:                | write | read  | write | read  |
 *        dest     source   length      +-------+-------+-------+-------+
 * memcpy(known,   known,   constant)        |   y   |   y   |  n/a  |  n/a  |
 * memcpy(known,   unknown, constant)        |   y   |   n   |  n/a  |   V   |
 * memcpy(known,   known,   dynamic)        |   n   |   n   |   B   |   B   |
 * memcpy(known,   unknown, dynamic)        |   n   |   n   |   B   |   V   |
 * memcpy(unknown, known,   constant)        |   n   |   y   |   V   |  n/a  |
 * memcpy(unknown, unknown, constant)        |   n   |   n   |   V   |   V   |
 * memcpy(unknown, known,   dynamic)        |   n   |   n   |   V   |   B   |
 * memcpy(unknown, unknown, dynamic)        |   n   |   n   |   V   |   V   |
 *                                        +-------+-------+-------+-------+
 *
 * y = perform deterministic compile-time bounds checking
 * n = cannot perform deterministic compile-time bounds checking
 * n/a = no run-time bounds checking needed since compile-time deterministic
 * B = can perform run-time bounds checking (currently unimplemented)
 * V = vulnerable to run-time overflow (will need refactoring to solve)
 *
 */
__FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
                                         const size_t p_size,
                                         const size_t q_size,
                                         const size_t p_size_field,
                                         const size_t q_size_field,
                                         const u8 func)
{
        if (__builtin_constant_p(size)) {
                /*
                 * Length argument is a constant expression, so we
                 * can perform compile-time bounds checking where
                 * buffer sizes are also known at compile time.
                 */

                /* Error when size is larger than enclosing struct. */
                if (__compiletime_lessthan(p_size_field, p_size) &&
                    __compiletime_lessthan(p_size, size))
                        __write_overflow();
                if (__compiletime_lessthan(q_size_field, q_size) &&
                    __compiletime_lessthan(q_size, size))
                        __read_overflow2();

                /* Warn when write size argument larger than dest field. */
                if (__compiletime_lessthan(p_size_field, size))
                        __write_overflow_field(p_size_field, size);
                /*
                 * Warn for source field over-read when building with W=1
                 * or when an over-write happened, so both can be fixed at
                 * the same time.
                 */
                if ((IS_ENABLED(KBUILD_EXTRA_WARN1) ||
                     __compiletime_lessthan(p_size_field, size)) &&
                    __compiletime_lessthan(q_size_field, size))
                        __read_overflow2_field(q_size_field, size);
        }
        /*
         * At this point, length argument may not be a constant expression,
         * so run-time bounds checking can be done where buffer sizes are
         * known. (This is not an "else" because the above checks may only
         * be compile-time warnings, and we want to still warn for run-time
         * overflows.)
         */

        /*
         * Always stop accesses beyond the struct that contains the
         * field, when the buffer's remaining size is known.
         * (The SIZE_MAX test is to optimize away checks where the buffer
         * lengths are unknown.)
         */
        if (p_size != SIZE_MAX && p_size < size)
                fortify_panic(func, FORTIFY_WRITE, p_size, size, true);
        else if (q_size != SIZE_MAX && q_size < size)
                fortify_panic(func, FORTIFY_READ, p_size, size, true);

        /*
         * Warn when writing beyond destination field size.
         *
         * We must ignore p_size_field == 0 for existing 0-element
         * fake flexible arrays, until they are all converted to
         * proper flexible arrays.
         *
         * The implementation of __builtin_*object_size() behaves
         * like sizeof() when not directly referencing a flexible
         * array member, which means there will be many bounds checks
         * that will appear at run-time, without a way for them to be
         * detected at compile-time (as can be done when the destination
         * is specifically the flexible array member).
         * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832
         */
        if (p_size_field != 0 && p_size_field != SIZE_MAX &&
            p_size != p_size_field && p_size_field < size)
                return true;

        return false;
}

#define __fortify_memcpy_chk(p, q, size, p_size, q_size,                \
                             p_size_field, q_size_field, op) ({                \
        const size_t __fortify_size = (size_t)(size);                        \
        const size_t __p_size = (p_size);                                \
        const size_t __q_size = (q_size);                                \
        const size_t __p_size_field = (p_size_field);                        \
        const size_t __q_size_field = (q_size_field);                        \
        WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size,                \
                                     __q_size, __p_size_field,                \
                                     __q_size_field, FORTIFY_FUNC_ ##op), \
                  #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \
                  __fortify_size,                                        \
                  "field \"" #p "\" at " FILE_LINE,                        \
                  __p_size_field);                                        \
        __underlying_##op(p, q, __fortify_size);                        \
})

/*
 * Notes about compile-time buffer size detection:
 *
 * With these types...
 *
 *        struct middle {
 *                u16 a;
 *                u8 middle_buf[16];
 *                int b;
 *        };
 *        struct end {
 *                u16 a;
 *                u8 end_buf[16];
 *        };
 *        struct flex {
 *                int a;
 *                u8 flex_buf[];
 *        };
 *
 *        void func(TYPE *ptr) { ... }
 *
 * Cases where destination size cannot be currently detected:
 * - the size of ptr's object (seemingly by design, gcc & clang fail):
 *        __builtin_object_size(ptr, 1) == SIZE_MAX
 * - the size of flexible arrays in ptr's obj (by design, dynamic size):
 *        __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX
 * - the size of ANY array at the end of ptr's obj (gcc and clang bug):
 *        __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX
 *        https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836
 *
 * Cases where destination size is currently detected:
 * - the size of non-array members within ptr's object:
 *        __builtin_object_size(ptr->a, 1) == 2
 * - the size of non-flexible-array in the middle of ptr's obj:
 *        __builtin_object_size(ptr->middle_buf, 1) == 16
 *
 */

/*
 * __struct_size() vs __member_size() must be captured here to avoid
 * evaluating argument side-effects further into the macro layers.
 */
#define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                        \
                __struct_size(p), __struct_size(q),                        \
                __member_size(p), __member_size(q),                        \
                memcpy)
#define memmove(p, q, s)  __fortify_memcpy_chk(p, q, s,                        \
                __struct_size(p), __struct_size(q),                        \
                __member_size(p), __member_size(q),                        \
                memmove)

extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
__FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
{
        const size_t p_size = __struct_size(p);

        if (__compiletime_lessthan(p_size, size))
                __read_overflow();
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL);
        return __real_memscan(p, c, size);
}

__FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3)
int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size)
{
        const size_t p_size = __struct_size(p);
        const size_t q_size = __struct_size(q);

        if (__builtin_constant_p(size)) {
                if (__compiletime_lessthan(p_size, size))
                        __read_overflow();
                if (__compiletime_lessthan(q_size, size))
                        __read_overflow2();
        }
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN);
        else if (q_size < size)
                fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN);
        return __underlying_memcmp(p, q, size);
}

__FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3)
void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
{
        const size_t p_size = __struct_size(p);

        if (__compiletime_lessthan(p_size, size))
                __read_overflow();
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL);
        return __underlying_memchr(p, c, size);
}

void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
__FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
{
        const size_t p_size = __struct_size(p);

        if (__compiletime_lessthan(p_size, size))
                __read_overflow();
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL);
        return __real_memchr_inv(p, c, size);
}

extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup)
                                                                    __realloc_size(2);
__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
{
        const size_t p_size = __struct_size(p);

        if (__compiletime_lessthan(p_size, size))
                __read_overflow();
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL);
        return __real_kmemdup(p, size, gfp);
}

/**
 * strcpy - Copy a string into another string buffer
 *
 * @p: pointer to destination of copy
 * @q: pointer to NUL-terminated source string to copy
 *
 * Do not use this function. While FORTIFY_SOURCE tries to avoid
 * overflows, this is only possible when the sizes of @q and @p are
 * known to the compiler. Prefer strscpy(), though note its different
 * return values for detecting truncation.
 *
 * Returns @p.
 *
 */
/* Defined after fortified strlen to reuse it. */
__FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2)
char *strcpy(char * const POS p, const char * const POS q)
{
        const size_t p_size = __member_size(p);
        const size_t q_size = __member_size(q);
        size_t size;

        /* If neither buffer size is known, immediately give up. */
        if (__builtin_constant_p(p_size) &&
            __builtin_constant_p(q_size) &&
            p_size == SIZE_MAX && q_size == SIZE_MAX)
                return __underlying_strcpy(p, q);
        size = strlen(q) + 1;
        /* Compile-time check for const size overflow. */
        if (__compiletime_lessthan(p_size, size))
                __write_overflow();
        /* Run-time check for dynamic size overflow. */
        if (p_size < size)
                fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p);
        __underlying_memcpy(p, q, size);
        return p;
}

/* Don't use these outside the FORITFY_SOURCE implementation */
#undef __underlying_memchr
#undef __underlying_memcmp
#undef __underlying_strcat
#undef __underlying_strcpy
#undef __underlying_strlen
#undef __underlying_strncat
#undef __underlying_strncpy

#undef POS
#undef POS0

#endif /* _LINUX_FORTIFY_STRING_H_ */

















































































































































    1 



























































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
/*
 * Copyright (C) 2016 Red Hat
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 * Rob Clark <robdclark@gmail.com>
 */

#ifndef DRM_PRINT_H_
#define DRM_PRINT_H_

#include <linux/compiler.h>
#include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/debugfs.h>
#include <linux/dynamic_debug.h>

#include <drm/drm.h>

struct drm_device;

/* Do *not* use outside of drm_print.[ch]! */
extern unsigned long __drm_debug;

/**
 * DOC: print
 *
 * A simple wrapper for dev_printk(), seq_printf(), etc.  Allows same
 * debug code to be used for both debugfs and printk logging.
 *
 * For example::
 *
 *     void log_some_info(struct drm_printer *p)
 *     {
 *             drm_printf(p, "foo=%d\n", foo);
 *             drm_printf(p, "bar=%d\n", bar);
 *     }
 *
 *     #ifdef CONFIG_DEBUG_FS
 *     void debugfs_show(struct seq_file *f)
 *     {
 *             struct drm_printer p = drm_seq_file_printer(f);
 *             log_some_info(&p);
 *     }
 *     #endif
 *
 *     void some_other_function(...)
 *     {
 *             struct drm_printer p = drm_info_printer(drm->dev);
 *             log_some_info(&p);
 *     }
 */

/**
 * enum drm_debug_category - The DRM debug categories
 *
 * Each of the DRM debug logging macros use a specific category, and the logging
 * is filtered by the drm.debug module parameter. This enum specifies the values
 * for the interface.
 *
 * Each DRM_DEBUG_<CATEGORY> macro logs to DRM_UT_<CATEGORY> category, except
 * DRM_DEBUG() logs to DRM_UT_CORE.
 *
 * Enabling verbose debug messages is done through the drm.debug parameter, each
 * category being enabled by a bit:
 *
 *  - drm.debug=0x1 will enable CORE messages
 *  - drm.debug=0x2 will enable DRIVER messages
 *  - drm.debug=0x3 will enable CORE and DRIVER messages
 *  - ...
 *  - drm.debug=0x1ff will enable all messages
 *
 * An interesting feature is that it's possible to enable verbose logging at
 * run-time by echoing the debug value in its sysfs node::
 *
 *   # echo 0xf > /sys/module/drm/parameters/debug
 *
 */
enum drm_debug_category {
        /* These names must match those in DYNAMIC_DEBUG_CLASSBITS */
        /**
         * @DRM_UT_CORE: Used in the generic drm code: drm_ioctl.c, drm_mm.c,
         * drm_memory.c, ...
         */
        DRM_UT_CORE,
        /**
         * @DRM_UT_DRIVER: Used in the vendor specific part of the driver: i915,
         * radeon, ... macro.
         */
        DRM_UT_DRIVER,
        /**
         * @DRM_UT_KMS: Used in the modesetting code.
         */
        DRM_UT_KMS,
        /**
         * @DRM_UT_PRIME: Used in the prime code.
         */
        DRM_UT_PRIME,
        /**
         * @DRM_UT_ATOMIC: Used in the atomic code.
         */
        DRM_UT_ATOMIC,
        /**
         * @DRM_UT_VBL: Used for verbose debug message in the vblank code.
         */
        DRM_UT_VBL,
        /**
         * @DRM_UT_STATE: Used for verbose atomic state debugging.
         */
        DRM_UT_STATE,
        /**
         * @DRM_UT_LEASE: Used in the lease code.
         */
        DRM_UT_LEASE,
        /**
         * @DRM_UT_DP: Used in the DP code.
         */
        DRM_UT_DP,
        /**
         * @DRM_UT_DRMRES: Used in the drm managed resources code.
         */
        DRM_UT_DRMRES
};

static inline bool drm_debug_enabled_raw(enum drm_debug_category category)
{
        return unlikely(__drm_debug & BIT(category));
}

#define drm_debug_enabled_instrumented(category)                        \
        ({                                                                \
                pr_debug("todo: is this frequent enough to optimize ?\n"); \
                drm_debug_enabled_raw(category);                        \
        })

#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
/*
 * the drm.debug API uses dyndbg, so each drm_*dbg macro/callsite gets
 * a descriptor, and only enabled callsites are reachable.  They use
 * the private macro to avoid re-testing the enable-bit.
 */
#define __drm_debug_enabled(category)        true
#define drm_debug_enabled(category)        drm_debug_enabled_instrumented(category)
#else
#define __drm_debug_enabled(category)        drm_debug_enabled_raw(category)
#define drm_debug_enabled(category)        drm_debug_enabled_raw(category)
#endif

/**
 * struct drm_printer - drm output "stream"
 *
 * Do not use struct members directly.  Use drm_printer_seq_file(),
 * drm_printer_info(), etc to initialize.  And drm_printf() for output.
 */
struct drm_printer {
        /* private: */
        void (*printfn)(struct drm_printer *p, struct va_format *vaf);
        void (*puts)(struct drm_printer *p, const char *str);
        void *arg;
        const char *prefix;
        enum drm_debug_category category;
};

void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf);
void __drm_puts_coredump(struct drm_printer *p, const char *str);
void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf);
void __drm_puts_seq_file(struct drm_printer *p, const char *str);
void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf);
void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf);
void __drm_printfn_err(struct drm_printer *p, struct va_format *vaf);

__printf(2, 3)
void drm_printf(struct drm_printer *p, const char *f, ...);
void drm_puts(struct drm_printer *p, const char *str);
void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset);
void drm_print_bits(struct drm_printer *p, unsigned long value,
                    const char * const bits[], unsigned int nbits);

__printf(2, 0)
/**
 * drm_vprintf - print to a &drm_printer stream
 * @p: the &drm_printer
 * @fmt: format string
 * @va: the va_list
 */
static inline void
drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va)
{
        struct va_format vaf = { .fmt = fmt, .va = va };

        p->printfn(p, &vaf);
}

/**
 * drm_printf_indent - Print to a &drm_printer stream with indentation
 * @printer: DRM printer
 * @indent: Tab indentation level (max 5)
 * @fmt: Format string
 */
#define drm_printf_indent(printer, indent, fmt, ...) \
        drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__)

/**
 * struct drm_print_iterator - local struct used with drm_printer_coredump
 * @data: Pointer to the devcoredump output buffer
 * @start: The offset within the buffer to start writing
 * @remain: The number of bytes to write for this iteration
 */
struct drm_print_iterator {
        void *data;
        ssize_t start;
        ssize_t remain;
        /* private: */
        ssize_t offset;
};

/**
 * drm_coredump_printer - construct a &drm_printer that can output to a buffer
 * from the read function for devcoredump
 * @iter: A pointer to a struct drm_print_iterator for the read instance
 *
 * This wrapper extends drm_printf() to work with a dev_coredumpm() callback
 * function. The passed in drm_print_iterator struct contains the buffer
 * pointer, size and offset as passed in from devcoredump.
 *
 * For example::
 *
 *        void coredump_read(char *buffer, loff_t offset, size_t count,
 *                void *data, size_t datalen)
 *        {
 *                struct drm_print_iterator iter;
 *                struct drm_printer p;
 *
 *                iter.data = buffer;
 *                iter.start = offset;
 *                iter.remain = count;
 *
 *                p = drm_coredump_printer(&iter);
 *
 *                drm_printf(p, "foo=%d\n", foo);
 *        }
 *
 *        void makecoredump(...)
 *        {
 *                ...
 *                dev_coredumpm(dev, THIS_MODULE, data, 0, GFP_KERNEL,
 *                        coredump_read, ...)
 *        }
 *
 * RETURNS:
 * The &drm_printer object
 */
static inline struct drm_printer
drm_coredump_printer(struct drm_print_iterator *iter)
{
        struct drm_printer p = {
                .printfn = __drm_printfn_coredump,
                .puts = __drm_puts_coredump,
                .arg = iter,
        };

        /* Set the internal offset of the iterator to zero */
        iter->offset = 0;

        return p;
}

/**
 * drm_seq_file_printer - construct a &drm_printer that outputs to &seq_file
 * @f:  the &struct seq_file to output to
 *
 * RETURNS:
 * The &drm_printer object
 */
static inline struct drm_printer drm_seq_file_printer(struct seq_file *f)
{
        struct drm_printer p = {
                .printfn = __drm_printfn_seq_file,
                .puts = __drm_puts_seq_file,
                .arg = f,
        };
        return p;
}

/**
 * drm_info_printer - construct a &drm_printer that outputs to dev_printk()
 * @dev: the &struct device pointer
 *
 * RETURNS:
 * The &drm_printer object
 */
static inline struct drm_printer drm_info_printer(struct device *dev)
{
        struct drm_printer p = {
                .printfn = __drm_printfn_info,
                .arg = dev,
        };
        return p;
}

/**
 * drm_dbg_printer - construct a &drm_printer for drm device specific output
 * @drm: the &struct drm_device pointer, or NULL
 * @category: the debug category to use
 * @prefix: debug output prefix, or NULL for no prefix
 *
 * RETURNS:
 * The &drm_printer object
 */
static inline struct drm_printer drm_dbg_printer(struct drm_device *drm,
                                                 enum drm_debug_category category,
                                                 const char *prefix)
{
        struct drm_printer p = {
                .printfn = __drm_printfn_dbg,
                .arg = drm,
                .prefix = prefix,
                .category = category,
        };
        return p;
}

/**
 * drm_err_printer - construct a &drm_printer that outputs to drm_err()
 * @drm: the &struct drm_device pointer
 * @prefix: debug output prefix, or NULL for no prefix
 *
 * RETURNS:
 * The &drm_printer object
 */
static inline struct drm_printer drm_err_printer(struct drm_device *drm,
                                                 const char *prefix)
{
        struct drm_printer p = {
                .printfn = __drm_printfn_err,
                .arg = drm,
                .prefix = prefix
        };
        return p;
}

/*
 * struct device based logging
 *
 * Prefer drm_device based logging over device or printk based logging.
 */

__printf(3, 4)
void drm_dev_printk(const struct device *dev, const char *level,
                    const char *format, ...);
struct _ddebug;
__printf(4, 5)
void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
                   enum drm_debug_category category, const char *format, ...);

/**
 * DRM_DEV_ERROR() - Error output.
 *
 * NOTE: this is deprecated in favor of drm_err() or dev_err().
 *
 * @dev: device pointer
 * @fmt: printf() like format string.
 */
#define DRM_DEV_ERROR(dev, fmt, ...)                                        \
        drm_dev_printk(dev, KERN_ERR, "*ERROR* " fmt, ##__VA_ARGS__)

/**
 * DRM_DEV_ERROR_RATELIMITED() - Rate limited error output.
 *
 * NOTE: this is deprecated in favor of drm_err_ratelimited() or
 * dev_err_ratelimited().
 *
 * @dev: device pointer
 * @fmt: printf() like format string.
 *
 * Like DRM_ERROR() but won't flood the log.
 */
#define DRM_DEV_ERROR_RATELIMITED(dev, fmt, ...)                        \
({                                                                        \
        static DEFINE_RATELIMIT_STATE(_rs,                                \
                                      DEFAULT_RATELIMIT_INTERVAL,        \
                                      DEFAULT_RATELIMIT_BURST);                \
                                                                        \
        if (__ratelimit(&_rs))                                                \
                DRM_DEV_ERROR(dev, fmt, ##__VA_ARGS__);                        \
})

/* NOTE: this is deprecated in favor of drm_info() or dev_info(). */
#define DRM_DEV_INFO(dev, fmt, ...)                                \
        drm_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_info_once() or dev_info_once(). */
#define DRM_DEV_INFO_ONCE(dev, fmt, ...)                                \
({                                                                        \
        static bool __print_once __read_mostly;                                \
        if (!__print_once) {                                                \
                __print_once = true;                                        \
                DRM_DEV_INFO(dev, fmt, ##__VA_ARGS__);                        \
        }                                                                \
})

#if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
#define drm_dev_dbg(dev, cat, fmt, ...)                                \
        __drm_dev_dbg(NULL, dev, cat, fmt, ##__VA_ARGS__)
#else
#define drm_dev_dbg(dev, cat, fmt, ...)                                \
        _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg,                \
                               dev, cat, fmt, ##__VA_ARGS__)
#endif

/**
 * DRM_DEV_DEBUG() - Debug output for generic drm code
 *
 * NOTE: this is deprecated in favor of drm_dbg_core().
 *
 * @dev: device pointer
 * @fmt: printf() like format string.
 */
#define DRM_DEV_DEBUG(dev, fmt, ...)                                        \
        drm_dev_dbg(dev, DRM_UT_CORE, fmt, ##__VA_ARGS__)
/**
 * DRM_DEV_DEBUG_DRIVER() - Debug output for vendor specific part of the driver
 *
 * NOTE: this is deprecated in favor of drm_dbg() or dev_dbg().
 *
 * @dev: device pointer
 * @fmt: printf() like format string.
 */
#define DRM_DEV_DEBUG_DRIVER(dev, fmt, ...)                                \
        drm_dev_dbg(dev, DRM_UT_DRIVER,        fmt, ##__VA_ARGS__)
/**
 * DRM_DEV_DEBUG_KMS() - Debug output for modesetting code
 *
 * NOTE: this is deprecated in favor of drm_dbg_kms().
 *
 * @dev: device pointer
 * @fmt: printf() like format string.
 */
#define DRM_DEV_DEBUG_KMS(dev, fmt, ...)                                \
        drm_dev_dbg(dev, DRM_UT_KMS, fmt, ##__VA_ARGS__)

/*
 * struct drm_device based logging
 *
 * Prefer drm_device based logging over device or prink based logging.
 */

/* Helper for struct drm_device based logging. */
#define __drm_printk(drm, level, type, fmt, ...)                        \
        dev_##level##type((drm) ? (drm)->dev : NULL, "[drm] " fmt, ##__VA_ARGS__)


#define drm_info(drm, fmt, ...)                                        \
        __drm_printk((drm), info,, fmt, ##__VA_ARGS__)

#define drm_notice(drm, fmt, ...)                                \
        __drm_printk((drm), notice,, fmt, ##__VA_ARGS__)

#define drm_warn(drm, fmt, ...)                                        \
        __drm_printk((drm), warn,, fmt, ##__VA_ARGS__)

#define drm_err(drm, fmt, ...)                                        \
        __drm_printk((drm), err,, "*ERROR* " fmt, ##__VA_ARGS__)


#define drm_info_once(drm, fmt, ...)                                \
        __drm_printk((drm), info, _once, fmt, ##__VA_ARGS__)

#define drm_notice_once(drm, fmt, ...)                                \
        __drm_printk((drm), notice, _once, fmt, ##__VA_ARGS__)

#define drm_warn_once(drm, fmt, ...)                                \
        __drm_printk((drm), warn, _once, fmt, ##__VA_ARGS__)

#define drm_err_once(drm, fmt, ...)                                \
        __drm_printk((drm), err, _once, "*ERROR* " fmt, ##__VA_ARGS__)


#define drm_err_ratelimited(drm, fmt, ...)                                \
        __drm_printk((drm), err, _ratelimited, "*ERROR* " fmt, ##__VA_ARGS__)


#define drm_dbg_core(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_CORE, fmt, ##__VA_ARGS__)
#define drm_dbg_driver(drm, fmt, ...)                                                \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__)
#define drm_dbg_kms(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_KMS, fmt, ##__VA_ARGS__)
#define drm_dbg_prime(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_PRIME, fmt, ##__VA_ARGS__)
#define drm_dbg_atomic(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_ATOMIC, fmt, ##__VA_ARGS__)
#define drm_dbg_vbl(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_VBL, fmt, ##__VA_ARGS__)
#define drm_dbg_state(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_STATE, fmt, ##__VA_ARGS__)
#define drm_dbg_lease(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
#define drm_dbg_dp(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DP, fmt, ##__VA_ARGS__)
#define drm_dbg_drmres(drm, fmt, ...)                                        \
        drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)

#define drm_dbg(drm, fmt, ...)        drm_dbg_driver(drm, fmt, ##__VA_ARGS__)

/*
 * printk based logging
 *
 * Prefer drm_device based logging over device or prink based logging.
 */

__printf(3, 4)
void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...);
__printf(1, 2)
void __drm_err(const char *format, ...);

#if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
#define __drm_dbg(cat, fmt, ...)                ___drm_dbg(NULL, cat, fmt, ##__VA_ARGS__)
#else
#define __drm_dbg(cat, fmt, ...)                                        \
        _dynamic_func_call_cls(cat, fmt, ___drm_dbg,                        \
                               cat, fmt, ##__VA_ARGS__)
#endif

/* Macros to make printk easier */

#define _DRM_PRINTK(once, level, fmt, ...)                                \
        printk##once(KERN_##level "[" DRM_NAME "] " fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of pr_info(). */
#define DRM_INFO(fmt, ...)                                                \
        _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__)
/* NOTE: this is deprecated in favor of pr_notice(). */
#define DRM_NOTE(fmt, ...)                                                \
        _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__)
/* NOTE: this is deprecated in favor of pr_warn(). */
#define DRM_WARN(fmt, ...)                                                \
        _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of pr_info_once(). */
#define DRM_INFO_ONCE(fmt, ...)                                                \
        _DRM_PRINTK(_once, INFO, fmt, ##__VA_ARGS__)
/* NOTE: this is deprecated in favor of pr_notice_once(). */
#define DRM_NOTE_ONCE(fmt, ...)                                                \
        _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__)
/* NOTE: this is deprecated in favor of pr_warn_once(). */
#define DRM_WARN_ONCE(fmt, ...)                                                \
        _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of pr_err(). */
#define DRM_ERROR(fmt, ...)                                                \
        __drm_err(fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of pr_err_ratelimited(). */
#define DRM_ERROR_RATELIMITED(fmt, ...)                                        \
        DRM_DEV_ERROR_RATELIMITED(NULL, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_core(NULL, ...). */
#define DRM_DEBUG(fmt, ...)                                                \
        __drm_dbg(DRM_UT_CORE, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg(NULL, ...). */
#define DRM_DEBUG_DRIVER(fmt, ...)                                        \
        __drm_dbg(DRM_UT_DRIVER, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_kms(NULL, ...). */
#define DRM_DEBUG_KMS(fmt, ...)                                                \
        __drm_dbg(DRM_UT_KMS, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_prime(NULL, ...). */
#define DRM_DEBUG_PRIME(fmt, ...)                                        \
        __drm_dbg(DRM_UT_PRIME, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_atomic(NULL, ...). */
#define DRM_DEBUG_ATOMIC(fmt, ...)                                        \
        __drm_dbg(DRM_UT_ATOMIC, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_vbl(NULL, ...). */
#define DRM_DEBUG_VBL(fmt, ...)                                                \
        __drm_dbg(DRM_UT_VBL, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_lease(NULL, ...). */
#define DRM_DEBUG_LEASE(fmt, ...)                                        \
        __drm_dbg(DRM_UT_LEASE, fmt, ##__VA_ARGS__)

/* NOTE: this is deprecated in favor of drm_dbg_dp(NULL, ...). */
#define DRM_DEBUG_DP(fmt, ...)                                                \
        __drm_dbg(DRM_UT_DP, fmt, ## __VA_ARGS__)

#define __DRM_DEFINE_DBG_RATELIMITED(category, drm, fmt, ...)                                        \
({                                                                                                \
        static DEFINE_RATELIMIT_STATE(rs_, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);\
        const struct drm_device *drm_ = (drm);                                                        \
                                                                                                \
        if (drm_debug_enabled(DRM_UT_ ## category) && __ratelimit(&rs_))                        \
                drm_dev_printk(drm_ ? drm_->dev : NULL, KERN_DEBUG, fmt, ## __VA_ARGS__);        \
})

#define drm_dbg_ratelimited(drm, fmt, ...) \
        __DRM_DEFINE_DBG_RATELIMITED(DRIVER, drm, fmt, ## __VA_ARGS__)

#define drm_dbg_kms_ratelimited(drm, fmt, ...) \
        __DRM_DEFINE_DBG_RATELIMITED(KMS, drm, fmt, ## __VA_ARGS__)

/*
 * struct drm_device based WARNs
 *
 * drm_WARN*() acts like WARN*(), but with the key difference of
 * using device specific information so that we know from which device
 * warning is originating from.
 *
 * Prefer drm_device based drm_WARN* over regular WARN*
 */

/* Helper for struct drm_device based WARNs */
#define drm_WARN(drm, condition, format, arg...)                        \
        WARN(condition, "%s %s: " format,                                \
                        dev_driver_string((drm)->dev),                        \
                        dev_name((drm)->dev), ## arg)

#define drm_WARN_ONCE(drm, condition, format, arg...)                        \
        WARN_ONCE(condition, "%s %s: " format,                                \
                        dev_driver_string((drm)->dev),                        \
                        dev_name((drm)->dev), ## arg)

#define drm_WARN_ON(drm, x)                                                \
        drm_WARN((drm), (x), "%s",                                        \
                 "drm_WARN_ON(" __stringify(x) ")")

#define drm_WARN_ON_ONCE(drm, x)                                        \
        drm_WARN_ONCE((drm), (x), "%s",                                        \
                      "drm_WARN_ON_ONCE(" __stringify(x) ")")

#endif /* DRM_PRINT_H_ */


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 





   14 











   14 






















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
// SPDX-License-Identifier: GPL-2.0
/* Generic nexthop implementation
 *
 * Copyright (c) 2017-19 Cumulus Networks
 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
 */

#include <linux/nexthop.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
#include <net/lwtunnel.h>
#include <net/ndisc.h>
#include <net/nexthop.h>
#include <net/route.h>
#include <net/sock.h>

#define NH_RES_DEFAULT_IDLE_TIMER        (120 * HZ)
#define NH_RES_DEFAULT_UNBALANCED_TIMER        0        /* No forced rebalancing. */

static void remove_nexthop(struct net *net, struct nexthop *nh,
                           struct nl_info *nlinfo);

#define NH_DEV_HASHBITS  8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)

#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS |                \
                               NHA_OP_FLAG_DUMP_HW_STATS)

static const struct nla_policy rtm_nh_policy_new[] = {
        [NHA_ID]                = { .type = NLA_U32 },
        [NHA_GROUP]                = { .type = NLA_BINARY },
        [NHA_GROUP_TYPE]        = { .type = NLA_U16 },
        [NHA_BLACKHOLE]                = { .type = NLA_FLAG },
        [NHA_OIF]                = { .type = NLA_U32 },
        [NHA_GATEWAY]                = { .type = NLA_BINARY },
        [NHA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [NHA_ENCAP]                = { .type = NLA_NESTED },
        [NHA_FDB]                = { .type = NLA_FLAG },
        [NHA_RES_GROUP]                = { .type = NLA_NESTED },
        [NHA_HW_STATS_ENABLE]        = NLA_POLICY_MAX(NLA_U32, true),
};

static const struct nla_policy rtm_nh_policy_get[] = {
        [NHA_ID]                = { .type = NLA_U32 },
        [NHA_OP_FLAGS]                = NLA_POLICY_MASK(NLA_U32,
                                                  NHA_OP_FLAGS_DUMP_ALL),
};

static const struct nla_policy rtm_nh_policy_del[] = {
        [NHA_ID]                = { .type = NLA_U32 },
};

static const struct nla_policy rtm_nh_policy_dump[] = {
        [NHA_OIF]                = { .type = NLA_U32 },
        [NHA_GROUPS]                = { .type = NLA_FLAG },
        [NHA_MASTER]                = { .type = NLA_U32 },
        [NHA_FDB]                = { .type = NLA_FLAG },
        [NHA_OP_FLAGS]                = NLA_POLICY_MASK(NLA_U32,
                                                  NHA_OP_FLAGS_DUMP_ALL),
};

static const struct nla_policy rtm_nh_res_policy_new[] = {
        [NHA_RES_GROUP_BUCKETS]                        = { .type = NLA_U16 },
        [NHA_RES_GROUP_IDLE_TIMER]                = { .type = NLA_U32 },
        [NHA_RES_GROUP_UNBALANCED_TIMER]        = { .type = NLA_U32 },
};

static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
        [NHA_ID]                = { .type = NLA_U32 },
        [NHA_OIF]                = { .type = NLA_U32 },
        [NHA_MASTER]                = { .type = NLA_U32 },
        [NHA_RES_BUCKET]        = { .type = NLA_NESTED },
};

static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
        [NHA_RES_BUCKET_NH_ID]        = { .type = NLA_U32 },
};

static const struct nla_policy rtm_nh_policy_get_bucket[] = {
        [NHA_ID]                = { .type = NLA_U32 },
        [NHA_RES_BUCKET]        = { .type = NLA_NESTED },
};

static const struct nla_policy rtm_nh_res_bucket_policy_get[] = {
        [NHA_RES_BUCKET_INDEX]        = { .type = NLA_U16 },
};

static bool nexthop_notifiers_is_empty(struct net *net)
{
        return !net->nexthop.notifier_chain.head;
}

static void
__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
                               const struct nh_info *nhi)
{
        nh_info->dev = nhi->fib_nhc.nhc_dev;
        nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
        if (nh_info->gw_family == AF_INET)
                nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
        else if (nh_info->gw_family == AF_INET6)
                nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;

        nh_info->id = nhi->nh_parent->id;
        nh_info->is_reject = nhi->reject_nh;
        nh_info->is_fdb = nhi->fdb_nh;
        nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
}

static int nh_notifier_single_info_init(struct nh_notifier_info *info,
                                        const struct nexthop *nh)
{
        struct nh_info *nhi = rtnl_dereference(nh->nh_info);

        info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
        info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
        if (!info->nh)
                return -ENOMEM;

        __nh_notifier_single_info_init(info->nh, nhi);

        return 0;
}

static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
{
        kfree(info->nh);
}

static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
                                       struct nh_group *nhg)
{
        u16 num_nh = nhg->num_nh;
        int i;

        info->type = NH_NOTIFIER_INFO_TYPE_GRP;
        info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
                               GFP_KERNEL);
        if (!info->nh_grp)
                return -ENOMEM;

        info->nh_grp->num_nh = num_nh;
        info->nh_grp->is_fdb = nhg->fdb_nh;
        info->nh_grp->hw_stats = nhg->hw_stats;

        for (i = 0; i < num_nh; i++) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                struct nh_info *nhi;

                nhi = rtnl_dereference(nhge->nh->nh_info);
                info->nh_grp->nh_entries[i].weight = nhge->weight;
                __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
                                               nhi);
        }

        return 0;
}

static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
                                           struct nh_group *nhg)
{
        struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
        u16 num_nh_buckets = res_table->num_nh_buckets;
        unsigned long size;
        u16 i;

        info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
        size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
        info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
                                       __GFP_NOWARN);
        if (!info->nh_res_table)
                return -ENOMEM;

        info->nh_res_table->num_nh_buckets = num_nh_buckets;
        info->nh_res_table->hw_stats = nhg->hw_stats;

        for (i = 0; i < num_nh_buckets; i++) {
                struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
                struct nh_grp_entry *nhge;
                struct nh_info *nhi;

                nhge = rtnl_dereference(bucket->nh_entry);
                nhi = rtnl_dereference(nhge->nh->nh_info);
                __nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
                                               nhi);
        }

        return 0;
}

static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
                                     const struct nexthop *nh)
{
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);

        if (nhg->hash_threshold)
                return nh_notifier_mpath_info_init(info, nhg);
        else if (nhg->resilient)
                return nh_notifier_res_table_info_init(info, nhg);
        return -EINVAL;
}

static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
                                      const struct nexthop *nh)
{
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);

        if (nhg->hash_threshold)
                kfree(info->nh_grp);
        else if (nhg->resilient)
                vfree(info->nh_res_table);
}

static int nh_notifier_info_init(struct nh_notifier_info *info,
                                 const struct nexthop *nh)
{
        info->id = nh->id;

        if (nh->is_group)
                return nh_notifier_grp_info_init(info, nh);
        else
                return nh_notifier_single_info_init(info, nh);
}

static void nh_notifier_info_fini(struct nh_notifier_info *info,
                                  const struct nexthop *nh)
{
        if (nh->is_group)
                nh_notifier_grp_info_fini(info, nh);
        else
                nh_notifier_single_info_fini(info);
}

static int call_nexthop_notifiers(struct net *net,
                                  enum nexthop_event_type event_type,
                                  struct nexthop *nh,
                                  struct netlink_ext_ack *extack)
{
        struct nh_notifier_info info = {
                .net = net,
                .extack = extack,
        };
        int err;

        ASSERT_RTNL();

        if (nexthop_notifiers_is_empty(net))
                return 0;

        err = nh_notifier_info_init(&info, nh);
        if (err) {
                NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
                return err;
        }

        err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
                                           event_type, &info);
        nh_notifier_info_fini(&info, nh);

        return notifier_to_errno(err);
}

static int
nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
                                      bool force, unsigned int *p_idle_timer_ms)
{
        struct nh_res_table *res_table;
        struct nh_group *nhg;
        struct nexthop *nh;
        int err = 0;

        /* When 'force' is false, nexthop bucket replacement is performed
         * because the bucket was deemed to be idle. In this case, capable
         * listeners can choose to perform an atomic replacement: The bucket is
         * only replaced if it is inactive. However, if the idle timer interval
         * is smaller than the interval in which a listener is querying
         * buckets' activity from the device, then atomic replacement should
         * not be tried. Pass the idle timer value to listeners, so that they
         * could determine which type of replacement to perform.
         */
        if (force) {
                *p_idle_timer_ms = 0;
                return 0;
        }

        rcu_read_lock();

        nh = nexthop_find_by_id(info->net, info->id);
        if (!nh) {
                err = -EINVAL;
                goto out;
        }

        nhg = rcu_dereference(nh->nh_grp);
        res_table = rcu_dereference(nhg->res_table);
        *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);

out:
        rcu_read_unlock();

        return err;
}

static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
                                            u16 bucket_index, bool force,
                                            struct nh_info *oldi,
                                            struct nh_info *newi)
{
        unsigned int idle_timer_ms;
        int err;

        err = nh_notifier_res_bucket_idle_timer_get(info, force,
                                                    &idle_timer_ms);
        if (err)
                return err;

        info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
        info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
                                      GFP_KERNEL);
        if (!info->nh_res_bucket)
                return -ENOMEM;

        info->nh_res_bucket->bucket_index = bucket_index;
        info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
        info->nh_res_bucket->force = force;
        __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
        __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
        return 0;
}

static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
{
        kfree(info->nh_res_bucket);
}

static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
                                               u16 bucket_index, bool force,
                                               struct nh_info *oldi,
                                               struct nh_info *newi,
                                               struct netlink_ext_ack *extack)
{
        struct nh_notifier_info info = {
                .net = net,
                .extack = extack,
                .id = nhg_id,
        };
        int err;

        if (nexthop_notifiers_is_empty(net))
                return 0;

        err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
                                               oldi, newi);
        if (err)
                return err;

        err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
                                           NEXTHOP_EVENT_BUCKET_REPLACE, &info);
        nh_notifier_res_bucket_info_fini(&info);

        return notifier_to_errno(err);
}

/* There are three users of RES_TABLE, and NHs etc. referenced from there:
 *
 * 1) a collection of callbacks for NH maintenance. This operates under
 *    RTNL,
 * 2) the delayed work that gradually balances the resilient table,
 * 3) and nexthop_select_path(), operating under RCU.
 *
 * Both the delayed work and the RTNL block are writers, and need to
 * maintain mutual exclusion. Since there are only two and well-known
 * writers for each table, the RTNL code can make sure it has exclusive
 * access thus:
 *
 * - Have the DW operate without locking;
 * - synchronously cancel the DW;
 * - do the writing;
 * - if the write was not actually a delete, call upkeep, which schedules
 *   DW again if necessary.
 *
 * The functions that are always called from the RTNL context use
 * rtnl_dereference(). The functions that can also be called from the DW do
 * a raw dereference and rely on the above mutual exclusion scheme.
 */
#define nh_res_dereference(p) (rcu_dereference_raw(p))

static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
                                             u16 bucket_index, bool force,
                                             struct nexthop *old_nh,
                                             struct nexthop *new_nh,
                                             struct netlink_ext_ack *extack)
{
        struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
        struct nh_info *newi = nh_res_dereference(new_nh->nh_info);

        return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
                                                   force, oldi, newi, extack);
}

static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
                                            struct netlink_ext_ack *extack)
{
        struct nh_notifier_info info = {
                .net = net,
                .extack = extack,
                .id = nh->id,
        };
        struct nh_group *nhg;
        int err;

        ASSERT_RTNL();

        if (nexthop_notifiers_is_empty(net))
                return 0;

        /* At this point, the nexthop buckets are still not populated. Only
         * emit a notification with the logical nexthops, so that a listener
         * could potentially veto it in case of unsupported configuration.
         */
        nhg = rtnl_dereference(nh->nh_grp);
        err = nh_notifier_mpath_info_init(&info, nhg);
        if (err) {
                NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
                return err;
        }

        err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
                                           NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
                                           &info);
        kfree(info.nh_grp);

        return notifier_to_errno(err);
}

static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
                                 enum nexthop_event_type event_type,
                                 struct nexthop *nh,
                                 struct netlink_ext_ack *extack)
{
        struct nh_notifier_info info = {
                .net = net,
                .extack = extack,
        };
        int err;

        err = nh_notifier_info_init(&info, nh);
        if (err)
                return err;

        err = nb->notifier_call(nb, event_type, &info);
        nh_notifier_info_fini(&info, nh);

        return notifier_to_errno(err);
}

static unsigned int nh_dev_hashfn(unsigned int val)
{
        unsigned int mask = NH_DEV_HASHSIZE - 1;

        return (val ^
                (val >> NH_DEV_HASHBITS) ^
                (val >> (NH_DEV_HASHBITS * 2))) & mask;
}

static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
{
        struct net_device *dev = nhi->fib_nhc.nhc_dev;
        struct hlist_head *head;
        unsigned int hash;

        WARN_ON(!dev);

        hash = nh_dev_hashfn(dev->ifindex);
        head = &net->nexthop.devhash[hash];
        hlist_add_head(&nhi->dev_hash, head);
}

static void nexthop_free_group(struct nexthop *nh)
{
        struct nh_group *nhg;
        int i;

        nhg = rcu_dereference_raw(nh->nh_grp);
        for (i = 0; i < nhg->num_nh; ++i) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                WARN_ON(!list_empty(&nhge->nh_list));
                free_percpu(nhge->stats);
                nexthop_put(nhge->nh);
        }

        WARN_ON(nhg->spare == nhg);

        if (nhg->resilient)
                vfree(rcu_dereference_raw(nhg->res_table));

        kfree(nhg->spare);
        kfree(nhg);
}

static void nexthop_free_single(struct nexthop *nh)
{
        struct nh_info *nhi;

        nhi = rcu_dereference_raw(nh->nh_info);
        switch (nhi->family) {
        case AF_INET:
                fib_nh_release(nh->net, &nhi->fib_nh);
                break;
        case AF_INET6:
                ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
                break;
        }
        kfree(nhi);
}

void nexthop_free_rcu(struct rcu_head *head)
{
        struct nexthop *nh = container_of(head, struct nexthop, rcu);

        if (nh->is_group)
                nexthop_free_group(nh);
        else
                nexthop_free_single(nh);

        kfree(nh);
}
EXPORT_SYMBOL_GPL(nexthop_free_rcu);

static struct nexthop *nexthop_alloc(void)
{
        struct nexthop *nh;

        nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
        if (nh) {
                INIT_LIST_HEAD(&nh->fi_list);
                INIT_LIST_HEAD(&nh->f6i_list);
                INIT_LIST_HEAD(&nh->grp_list);
                INIT_LIST_HEAD(&nh->fdb_list);
        }
        return nh;
}

static struct nh_group *nexthop_grp_alloc(u16 num_nh)
{
        struct nh_group *nhg;

        nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
        if (nhg)
                nhg->num_nh = num_nh;

        return nhg;
}

static void nh_res_table_upkeep_dw(struct work_struct *work);

static struct nh_res_table *
nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
{
        const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
        struct nh_res_table *res_table;
        unsigned long size;

        size = struct_size(res_table, nh_buckets, num_nh_buckets);
        res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
        if (!res_table)
                return NULL;

        res_table->net = net;
        res_table->nhg_id = nhg_id;
        INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
        INIT_LIST_HEAD(&res_table->uw_nh_entries);
        res_table->idle_timer = cfg->nh_grp_res_idle_timer;
        res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
        res_table->num_nh_buckets = num_nh_buckets;
        return res_table;
}

static void nh_base_seq_inc(struct net *net)
{
        while (++net->nexthop.seq == 0)
                ;
}

/* no reference taken; rcu lock or rtnl must be held */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
{
        struct rb_node **pp, *parent = NULL, *next;

        pp = &net->nexthop.rb_root.rb_node;
        while (1) {
                struct nexthop *nh;

                next = rcu_dereference_raw(*pp);
                if (!next)
                        break;
                parent = next;

                nh = rb_entry(parent, struct nexthop, rb_node);
                if (id < nh->id)
                        pp = &next->rb_left;
                else if (id > nh->id)
                        pp = &next->rb_right;
                else
                        return nh;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(nexthop_find_by_id);

/* used for auto id allocation; called with rtnl held */
static u32 nh_find_unused_id(struct net *net)
{
        u32 id_start = net->nexthop.last_id_allocated;

        while (1) {
                net->nexthop.last_id_allocated++;
                if (net->nexthop.last_id_allocated == id_start)
                        break;

                if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
                        return net->nexthop.last_id_allocated;
        }
        return 0;
}

static void nh_res_time_set_deadline(unsigned long next_time,
                                     unsigned long *deadline)
{
        if (time_before(next_time, *deadline))
                *deadline = next_time;
}

static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table)
{
        if (list_empty(&res_table->uw_nh_entries))
                return 0;
        return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since);
}

static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
{
        struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
        struct nlattr *nest;

        nest = nla_nest_start(skb, NHA_RES_GROUP);
        if (!nest)
                return -EMSGSIZE;

        if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS,
                        res_table->num_nh_buckets) ||
            nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER,
                        jiffies_to_clock_t(res_table->idle_timer)) ||
            nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER,
                        jiffies_to_clock_t(res_table->unbalanced_timer)) ||
            nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME,
                              nh_res_table_unbalanced_time(res_table),
                              NHA_RES_GROUP_PAD))
                goto nla_put_failure;

        nla_nest_end(skb, nest);
        return 0;

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge)
{
        struct nh_grp_entry_stats *cpu_stats;

        cpu_stats = get_cpu_ptr(nhge->stats);
        u64_stats_update_begin(&cpu_stats->syncp);
        u64_stats_inc(&cpu_stats->packets);
        u64_stats_update_end(&cpu_stats->syncp);
        put_cpu_ptr(cpu_stats);
}

static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge,
                                    u64 *ret_packets)
{
        int i;

        *ret_packets = 0;

        for_each_possible_cpu(i) {
                struct nh_grp_entry_stats *cpu_stats;
                unsigned int start;
                u64 packets;

                cpu_stats = per_cpu_ptr(nhge->stats, i);
                do {
                        start = u64_stats_fetch_begin(&cpu_stats->syncp);
                        packets = u64_stats_read(&cpu_stats->packets);
                } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));

                *ret_packets += packets;
        }
}

static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info,
                                         const struct nexthop *nh)
{
        struct nh_group *nhg;
        int i;

        ASSERT_RTNL();
        nhg = rtnl_dereference(nh->nh_grp);

        info->id = nh->id;
        info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS;
        info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats,
                                                    stats, nhg->num_nh),
                                        GFP_KERNEL);
        if (!info->nh_grp_hw_stats)
                return -ENOMEM;

        info->nh_grp_hw_stats->num_nh = nhg->num_nh;
        for (i = 0; i < nhg->num_nh; i++) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                info->nh_grp_hw_stats->stats[i].id = nhge->nh->id;
        }

        return 0;
}

static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info)
{
        kfree(info->nh_grp_hw_stats);
}

void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
                                  unsigned int nh_idx,
                                  u64 delta_packets)
{
        info->hw_stats_used = true;
        info->stats[nh_idx].packets += delta_packets;
}
EXPORT_SYMBOL(nh_grp_hw_stats_report_delta);

static void nh_grp_hw_stats_apply_update(struct nexthop *nh,
                                         struct nh_notifier_info *info)
{
        struct nh_group *nhg;
        int i;

        ASSERT_RTNL();
        nhg = rtnl_dereference(nh->nh_grp);

        for (i = 0; i < nhg->num_nh; i++) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets;
        }
}

static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
{
        struct nh_notifier_info info = {
                .net = nh->net,
        };
        struct net *net = nh->net;
        int err;

        if (nexthop_notifiers_is_empty(net)) {
                *hw_stats_used = false;
                return 0;
        }

        err = nh_notifier_grp_hw_stats_init(&info, nh);
        if (err)
                return err;

        err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
                                           NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
                                           &info);

        /* Cache whatever we got, even if there was an error, otherwise the
         * successful stats retrievals would get lost.
         */
        nh_grp_hw_stats_apply_update(nh, &info);
        *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used;

        nh_notifier_grp_hw_stats_fini(&info);
        return notifier_to_errno(err);
}

static int nla_put_nh_group_stats_entry(struct sk_buff *skb,
                                        struct nh_grp_entry *nhge,
                                        u32 op_flags)
{
        struct nlattr *nest;
        u64 packets;

        nh_grp_entry_stats_read(nhge, &packets);

        nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY);
        if (!nest)
                return -EMSGSIZE;

        if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) ||
            nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS,
                         packets + nhge->packets_hw))
                goto nla_put_failure;

        if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
            nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW,
                         nhge->packets_hw))
                goto nla_put_failure;

        nla_nest_end(skb, nest);
        return 0;

nla_put_failure:
        nla_nest_cancel(skb, nest);
        return -EMSGSIZE;
}

static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh,
                                  u32 op_flags)
{
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
        struct nlattr *nest;
        bool hw_stats_used;
        int err;
        int i;

        if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats))
                goto err_out;

        if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
            nhg->hw_stats) {
                err = nh_grp_hw_stats_update(nh, &hw_stats_used);
                if (err)
                        goto out;

                if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used))
                        goto err_out;
        }

        nest = nla_nest_start(skb, NHA_GROUP_STATS);
        if (!nest)
                goto err_out;

        for (i = 0; i < nhg->num_nh; i++)
                if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i],
                                                 op_flags))
                        goto cancel_out;

        nla_nest_end(skb, nest);
        return 0;

cancel_out:
        nla_nest_cancel(skb, nest);
err_out:
        err = -EMSGSIZE;
out:
        return err;
}

static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
                            u32 op_flags)
{
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
        struct nexthop_grp *p;
        size_t len = nhg->num_nh * sizeof(*p);
        struct nlattr *nla;
        u16 group_type = 0;
        int i;

        if (nhg->hash_threshold)
                group_type = NEXTHOP_GRP_TYPE_MPATH;
        else if (nhg->resilient)
                group_type = NEXTHOP_GRP_TYPE_RES;

        if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
                goto nla_put_failure;

        nla = nla_reserve(skb, NHA_GROUP, len);
        if (!nla)
                goto nla_put_failure;

        p = nla_data(nla);
        for (i = 0; i < nhg->num_nh; ++i) {
                p->id = nhg->nh_entries[i].nh->id;
                p->weight = nhg->nh_entries[i].weight - 1;
                p += 1;
        }

        if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
                goto nla_put_failure;

        if (op_flags & NHA_OP_FLAG_DUMP_STATS &&
            (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) ||
             nla_put_nh_group_stats(skb, nh, op_flags)))
                goto nla_put_failure;

        return 0;

nla_put_failure:
        return -EMSGSIZE;
}

static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
                        int event, u32 portid, u32 seq, unsigned int nlflags,
                        u32 op_flags)
{
        struct fib6_nh *fib6_nh;
        struct fib_nh *fib_nh;
        struct nlmsghdr *nlh;
        struct nh_info *nhi;
        struct nhmsg *nhm;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
        if (!nlh)
                return -EMSGSIZE;

        nhm = nlmsg_data(nlh);
        nhm->nh_family = AF_UNSPEC;
        nhm->nh_flags = nh->nh_flags;
        nhm->nh_protocol = nh->protocol;
        nhm->nh_scope = 0;
        nhm->resvd = 0;

        if (nla_put_u32(skb, NHA_ID, nh->id))
                goto nla_put_failure;

        if (nh->is_group) {
                struct nh_group *nhg = rtnl_dereference(nh->nh_grp);

                if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
                        goto nla_put_failure;
                if (nla_put_nh_group(skb, nh, op_flags))
                        goto nla_put_failure;
                goto out;
        }

        nhi = rtnl_dereference(nh->nh_info);
        nhm->nh_family = nhi->family;
        if (nhi->reject_nh) {
                if (nla_put_flag(skb, NHA_BLACKHOLE))
                        goto nla_put_failure;
                goto out;
        } else if (nhi->fdb_nh) {
                if (nla_put_flag(skb, NHA_FDB))
                        goto nla_put_failure;
        } else {
                const struct net_device *dev;

                dev = nhi->fib_nhc.nhc_dev;
                if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
                        goto nla_put_failure;
        }

        nhm->nh_scope = nhi->fib_nhc.nhc_scope;
        switch (nhi->family) {
        case AF_INET:
                fib_nh = &nhi->fib_nh;
                if (fib_nh->fib_nh_gw_family &&
                    nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
                        goto nla_put_failure;
                break;

        case AF_INET6:
                fib6_nh = &nhi->fib6_nh;
                if (fib6_nh->fib_nh_gw_family &&
                    nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
                        goto nla_put_failure;
                break;
        }

        if (nhi->fib_nhc.nhc_lwtstate &&
            lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
                                NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
                goto nla_put_failure;

out:
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
{
        return nla_total_size(0) +        /* NHA_RES_GROUP */
                nla_total_size(2) +        /* NHA_RES_GROUP_BUCKETS */
                nla_total_size(4) +        /* NHA_RES_GROUP_IDLE_TIMER */
                nla_total_size(4) +        /* NHA_RES_GROUP_UNBALANCED_TIMER */
                nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
}

static size_t nh_nlmsg_size_grp(struct nexthop *nh)
{
        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
        size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
        size_t tot = nla_total_size(sz) +
                nla_total_size(2); /* NHA_GROUP_TYPE */

        if (nhg->resilient)
                tot += nh_nlmsg_size_grp_res(nhg);

        return tot;
}

static size_t nh_nlmsg_size_single(struct nexthop *nh)
{
        struct nh_info *nhi = rtnl_dereference(nh->nh_info);
        size_t sz;

        /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
         * are mutually exclusive
         */
        sz = nla_total_size(4);  /* NHA_OIF */

        switch (nhi->family) {
        case AF_INET:
                if (nhi->fib_nh.fib_nh_gw_family)
                        sz += nla_total_size(4);  /* NHA_GATEWAY */
                break;

        case AF_INET6:
                /* NHA_GATEWAY */
                if (nhi->fib6_nh.fib_nh_gw_family)
                        sz += nla_total_size(sizeof(const struct in6_addr));
                break;
        }

        if (nhi->fib_nhc.nhc_lwtstate) {
                sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
                sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
        }

        return sz;
}

static size_t nh_nlmsg_size(struct nexthop *nh)
{
        size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));

        sz += nla_total_size(4); /* NHA_ID */

        if (nh->is_group)
                sz += nh_nlmsg_size_grp(nh);
        else
                sz += nh_nlmsg_size_single(nh);

        return sz;
}

static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
{
        unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
        if (!skb)
                goto errout;

        err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
                    info->nlh, gfp_any());
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
}

static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
{
        return (unsigned long)atomic_long_read(&bucket->used_time);
}

static unsigned long
nh_res_bucket_idle_point(const struct nh_res_table *res_table,
                         const struct nh_res_bucket *bucket,
                         unsigned long now)
{
        unsigned long time = nh_res_bucket_used_time(bucket);

        /* Bucket was not used since it was migrated. The idle time is now. */
        if (time == bucket->migrated_time)
                return now;

        return time + res_table->idle_timer;
}

static unsigned long
nh_res_table_unb_point(const struct nh_res_table *res_table)
{
        return res_table->unbalanced_since + res_table->unbalanced_timer;
}

static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
                                   struct nh_res_bucket *bucket)
{
        unsigned long now = jiffies;

        atomic_long_set(&bucket->used_time, (long)now);
        bucket->migrated_time = now;
}

static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
{
        atomic_long_set(&bucket->used_time, (long)jiffies);
}

static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
{
        unsigned long used_time = nh_res_bucket_used_time(bucket);

        return jiffies_delta_to_clock_t(jiffies - used_time);
}

static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
                              struct nh_res_bucket *bucket, u16 bucket_index,
                              int event, u32 portid, u32 seq,
                              unsigned int nlflags,
                              struct netlink_ext_ack *extack)
{
        struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
        struct nlmsghdr *nlh;
        struct nlattr *nest;
        struct nhmsg *nhm;

        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
        if (!nlh)
                return -EMSGSIZE;

        nhm = nlmsg_data(nlh);
        nhm->nh_family = AF_UNSPEC;
        nhm->nh_flags = bucket->nh_flags;
        nhm->nh_protocol = nh->protocol;
        nhm->nh_scope = 0;
        nhm->resvd = 0;

        if (nla_put_u32(skb, NHA_ID, nh->id))
                goto nla_put_failure;

        nest = nla_nest_start(skb, NHA_RES_BUCKET);
        if (!nest)
                goto nla_put_failure;

        if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
            nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
            nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
                              nh_res_bucket_idle_time(bucket),
                              NHA_RES_BUCKET_PAD))
                goto nla_put_failure_nest;

        nla_nest_end(skb, nest);
        nlmsg_end(skb, nlh);
        return 0;

nla_put_failure_nest:
        nla_nest_cancel(skb, nest);
nla_put_failure:
        nlmsg_cancel(skb, nlh);
        return -EMSGSIZE;
}

static void nexthop_bucket_notify(struct nh_res_table *res_table,
                                  u16 bucket_index)
{
        struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
        struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
        struct nexthop *nh = nhge->nh_parent;
        struct sk_buff *skb;
        int err = -ENOBUFS;

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                goto errout;

        err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
                                 RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE,
                                 NULL);
        if (err < 0) {
                kfree_skb(skb);
                goto errout;
        }

        rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
        return;
errout:
        if (err < 0)
                rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
}

static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
                           bool *is_fdb, struct netlink_ext_ack *extack)
{
        if (nh->is_group) {
                struct nh_group *nhg = rtnl_dereference(nh->nh_grp);

                /* Nesting groups within groups is not supported. */
                if (nhg->hash_threshold) {
                        NL_SET_ERR_MSG(extack,
                                       "Hash-threshold group can not be a nexthop within a group");
                        return false;
                }
                if (nhg->resilient) {
                        NL_SET_ERR_MSG(extack,
                                       "Resilient group can not be a nexthop within a group");
                        return false;
                }
                *is_fdb = nhg->fdb_nh;
        } else {
                struct nh_info *nhi = rtnl_dereference(nh->nh_info);

                if (nhi->reject_nh && npaths > 1) {
                        NL_SET_ERR_MSG(extack,
                                       "Blackhole nexthop can not be used in a group with more than 1 path");
                        return false;
                }
                *is_fdb = nhi->fdb_nh;
        }

        return true;
}

static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
                                   struct netlink_ext_ack *extack)
{
        struct nh_info *nhi;

        nhi = rtnl_dereference(nh->nh_info);

        if (!nhi->fdb_nh) {
                NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
                return -EINVAL;
        }

        if (*nh_family == AF_UNSPEC) {
                *nh_family = nhi->family;
        } else if (*nh_family != nhi->family) {
                NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
                return -EINVAL;
        }

        return 0;
}

static int nh_check_attr_group(struct net *net,
                               struct nlattr *tb[], size_t tb_size,
                               u16 nh_grp_type, struct netlink_ext_ack *extack)
{
        unsigned int len = nla_len(tb[NHA_GROUP]);
        u8 nh_family = AF_UNSPEC;
        struct nexthop_grp *nhg;
        unsigned int i, j;
        u8 nhg_fdb = 0;

        if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
                NL_SET_ERR_MSG(extack,
                               "Invalid length for nexthop group attribute");
                return -EINVAL;
        }

        /* convert len to number of nexthop ids */
        len /= sizeof(*nhg);

        nhg = nla_data(tb[NHA_GROUP]);
        for (i = 0; i < len; ++i) {
                if (nhg[i].resvd1 || nhg[i].resvd2) {
                        NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
                        return -EINVAL;
                }
                if (nhg[i].weight > 254) {
                        NL_SET_ERR_MSG(extack, "Invalid value for weight");
                        return -EINVAL;
                }
                for (j = i + 1; j < len; ++j) {
                        if (nhg[i].id == nhg[j].id) {
                                NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
                                return -EINVAL;
                        }
                }
        }

        if (tb[NHA_FDB])
                nhg_fdb = 1;
        nhg = nla_data(tb[NHA_GROUP]);
        for (i = 0; i < len; ++i) {
                struct nexthop *nh;
                bool is_fdb_nh;

                nh = nexthop_find_by_id(net, nhg[i].id);
                if (!nh) {
                        NL_SET_ERR_MSG(extack, "Invalid nexthop id");
                        return -EINVAL;
                }
                if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
                        return -EINVAL;

                if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
                        return -EINVAL;

                if (!nhg_fdb && is_fdb_nh) {
                        NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
                        return -EINVAL;
                }
        }
        for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
                if (!tb[i])
                        continue;
                switch (i) {
                case NHA_HW_STATS_ENABLE:
                case NHA_FDB:
                        continue;
                case NHA_RES_GROUP:
                        if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
                                continue;
                        break;
                }
                NL_SET_ERR_MSG(extack,
                               "No other attributes can be set in nexthop groups");
                return -EINVAL;
        }

        return 0;
}

static bool ipv6_good_nh(const struct fib6_nh *nh)
{
        int state = NUD_REACHABLE;
        struct neighbour *n;

        rcu_read_lock();

        n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
        if (n)
                state = READ_ONCE(n->nud_state);

        rcu_read_unlock();

        return !!(state & NUD_VALID);
}

static bool ipv4_good_nh(const struct fib_nh *nh)
{
        int state = NUD_REACHABLE;
        struct neighbour *n;

        rcu_read_lock();

        n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
                                      (__force u32)nh->fib_nh_gw4);
        if (n)
                state = READ_ONCE(n->nud_state);

        rcu_read_unlock();

        return !!(state & NUD_VALID);
}

static bool nexthop_is_good_nh(const struct nexthop *nh)
{
        struct nh_info *nhi = rcu_dereference(nh->nh_info);

        switch (nhi->family) {
        case AF_INET:
                return ipv4_good_nh(&nhi->fib_nh);
        case AF_INET6:
                return ipv6_good_nh(&nhi->fib6_nh);
        }

        return false;
}

static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
        int i;

        for (i = 0; i < nhg->num_nh; i++) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                if (hash > atomic_read(&nhge->hthr.upper_bound))
                        continue;

                nh_grp_entry_stats_inc(nhge);
                return nhge->nh;
        }

        WARN_ON_ONCE(1);
        return NULL;
}

static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
        struct nh_grp_entry *nhge0 = NULL;
        int i;

        if (nhg->fdb_nh)
                return nexthop_select_path_fdb(nhg, hash);

        for (i = 0; i < nhg->num_nh; ++i) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                /* nexthops always check if it is good and does
                 * not rely on a sysctl for this behavior
                 */
                if (!nexthop_is_good_nh(nhge->nh))
                        continue;

                if (!nhge0)
                        nhge0 = nhge;

                if (hash > atomic_read(&nhge->hthr.upper_bound))
                        continue;

                nh_grp_entry_stats_inc(nhge);
                return nhge->nh;
        }

        if (!nhge0)
                nhge0 = &nhg->nh_entries[0];
        nh_grp_entry_stats_inc(nhge0);
        return nhge0->nh;
}

static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
{
        struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
        u16 bucket_index = hash % res_table->num_nh_buckets;
        struct nh_res_bucket *bucket;
        struct nh_grp_entry *nhge;

        /* nexthop_select_path() is expected to return a non-NULL value, so
         * skip protocol validation and just hand out whatever there is.
         */
        bucket = &res_table->nh_buckets[bucket_index];
        nh_res_bucket_set_busy(bucket);
        nhge = rcu_dereference(bucket->nh_entry);
        nh_grp_entry_stats_inc(nhge);
        return nhge->nh;
}

struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
{
        struct nh_group *nhg;

        if (!nh->is_group)
                return nh;

        nhg = rcu_dereference(nh->nh_grp);
        if (nhg->hash_threshold)
                return nexthop_select_path_hthr(nhg, hash);
        else if (nhg->resilient)
                return nexthop_select_path_res(nhg, hash);

        /* Unreachable. */
        return NULL;
}
EXPORT_SYMBOL_GPL(nexthop_select_path);

int nexthop_for_each_fib6_nh(struct nexthop *nh,
                             int (*cb)(struct fib6_nh *nh, void *arg),
                             void *arg)
{
        struct nh_info *nhi;
        int err;

        if (nh->is_group) {
                struct nh_group *nhg;
                int i;

                nhg = rcu_dereference_rtnl(nh->nh_grp);
                for (i = 0; i < nhg->num_nh; i++) {
                        struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                        nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
                        err = cb(&nhi->fib6_nh, arg);
                        if (err)
                                return err;
                }
        } else {
                nhi = rcu_dereference_rtnl(nh->nh_info);
                err = cb(&nhi->fib6_nh, arg);
                if (err)
                        return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);

static int check_src_addr(const struct in6_addr *saddr,
                          struct netlink_ext_ack *extack)
{
        if (!ipv6_addr_any(saddr)) {
                NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
                return -EINVAL;
        }
        return 0;
}

int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
                       struct netlink_ext_ack *extack)
{
        struct nh_info *nhi;
        bool is_fdb_nh;

        /* fib6_src is unique to a fib6_info and limits the ability to cache
         * routes in fib6_nh within a nexthop that is potentially shared
         * across multiple fib entries. If the config wants to use source
         * routing it can not use nexthop objects. mlxsw also does not allow
         * fib6_src on routes.
         */
        if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
                return -EINVAL;

        if (nh->is_group) {
                struct nh_group *nhg;

                nhg = rtnl_dereference(nh->nh_grp);
                if (nhg->has_v4)
                        goto no_v4_nh;
                is_fdb_nh = nhg->fdb_nh;
        } else {
                nhi = rtnl_dereference(nh->nh_info);
                if (nhi->family == AF_INET)
                        goto no_v4_nh;
                is_fdb_nh = nhi->fdb_nh;
        }

        if (is_fdb_nh) {
                NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
                return -EINVAL;
        }

        return 0;
no_v4_nh:
        NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(fib6_check_nexthop);

/* if existing nexthop has ipv6 routes linked to it, need
 * to verify this new spec works with ipv6
 */
static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
                              struct netlink_ext_ack *extack)
{
        struct fib6_info *f6i;

        if (list_empty(&old->f6i_list))
                return 0;

        list_for_each_entry(f6i, &old->f6i_list, nh_list) {
                if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
                        return -EINVAL;
        }

        return fib6_check_nexthop(new, NULL, extack);
}

static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
                               struct netlink_ext_ack *extack)
{
        if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
                NL_SET_ERR_MSG(extack,
                               "Route with host scope can not have a gateway");
                return -EINVAL;
        }

        if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
                NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
                return -EINVAL;
        }

        return 0;
}

/* Invoked by fib add code to verify nexthop by id is ok with
 * config for prefix; parts of fib_check_nh not done when nexthop
 * object is used.
 */
int fib_check_nexthop(struct nexthop *nh, u8 scope,
                      struct netlink_ext_ack *extack)
{
        struct nh_info *nhi;
        int err = 0;

        if (nh->is_group) {
                struct nh_group *nhg;

                nhg = rtnl_dereference(nh->nh_grp);
                if (nhg->fdb_nh) {
                        NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
                        err = -EINVAL;
                        goto out;
                }

                if (scope == RT_SCOPE_HOST) {
                        NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
                        err = -EINVAL;
                        goto out;
                }

                /* all nexthops in a group have the same scope */
                nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
                err = nexthop_check_scope(nhi, scope, extack);
        } else {
                nhi = rtnl_dereference(nh->nh_info);
                if (nhi->fdb_nh) {
                        NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
                        err = -EINVAL;
                        goto out;
                }
                err = nexthop_check_scope(nhi, scope, extack);
        }

out:
        return err;
}

static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
                             struct netlink_ext_ack *extack)
{
        struct fib_info *fi;

        list_for_each_entry(fi, &old->fi_list, nh_list) {
                int err;

                err = fib_check_nexthop(new, fi->fib_scope, extack);
                if (err)
                        return err;
        }
        return 0;
}

static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
{
        return nhge->res.count_buckets == nhge->res.wants_buckets;
}

static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
{
        return nhge->res.count_buckets > nhge->res.wants_buckets;
}

static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
{
        return nhge->res.count_buckets < nhge->res.wants_buckets;
}

static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
{
        return list_empty(&res_table->uw_nh_entries);
}

static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
{
        struct nh_grp_entry *nhge;

        if (bucket->occupied) {
                nhge = nh_res_dereference(bucket->nh_entry);
                nhge->res.count_buckets--;
                bucket->occupied = false;
        }
}

static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
                                 struct nh_grp_entry *nhge)
{
        nh_res_bucket_unset_nh(bucket);

        bucket->occupied = true;
        rcu_assign_pointer(bucket->nh_entry, nhge);
        nhge->res.count_buckets++;
}

static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
                                         struct nh_res_bucket *bucket,
                                         unsigned long *deadline, bool *force)
{
        unsigned long now = jiffies;
        struct nh_grp_entry *nhge;
        unsigned long idle_point;

        if (!bucket->occupied) {
                /* The bucket is not occupied, its NHGE pointer is either
                 * NULL or obsolete. We _have to_ migrate: set force.
                 */
                *force = true;
                return true;
        }

        nhge = nh_res_dereference(bucket->nh_entry);

        /* If the bucket is populated by an underweight or balanced
         * nexthop, do not migrate.
         */
        if (!nh_res_nhge_is_ow(nhge))
                return false;

        /* At this point we know that the bucket is populated with an
         * overweight nexthop. It needs to be migrated to a new nexthop if
         * the idle timer of unbalanced timer expired.
         */

        idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
        if (time_after_eq(now, idle_point)) {
                /* The bucket is idle. We _can_ migrate: unset force. */
                *force = false;
                return true;
        }

        /* Unbalanced timer of 0 means "never force". */
        if (res_table->unbalanced_timer) {
                unsigned long unb_point;

                unb_point = nh_res_table_unb_point(res_table);
                if (time_after(now, unb_point)) {
                        /* The bucket is not idle, but the unbalanced timer
                         * expired. We _can_ migrate, but set force anyway,
                         * so that drivers know to ignore activity reports
                         * from the HW.
                         */
                        *force = true;
                        return true;
                }

                nh_res_time_set_deadline(unb_point, deadline);
        }

        nh_res_time_set_deadline(idle_point, deadline);
        return false;
}

static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
                                  u16 bucket_index, bool notify,
                                  bool notify_nl, bool force)
{
        struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
        struct nh_grp_entry *new_nhge;
        struct netlink_ext_ack extack;
        int err;

        new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
                                            struct nh_grp_entry,
                                            res.uw_nh_entry);
        if (WARN_ON_ONCE(!new_nhge))
                /* If this function is called, "bucket" is either not
                 * occupied, or it belongs to a next hop that is
                 * overweight. In either case, there ought to be a
                 * corresponding underweight next hop.
                 */
                return false;

        if (notify) {
                struct nh_grp_entry *old_nhge;

                old_nhge = nh_res_dereference(bucket->nh_entry);
                err = call_nexthop_res_bucket_notifiers(res_table->net,
                                                        res_table->nhg_id,
                                                        bucket_index, force,
                                                        old_nhge->nh,
                                                        new_nhge->nh, &extack);
                if (err) {
                        pr_err_ratelimited("%s\n", extack._msg);
                        if (!force)
                                return false;
                        /* It is not possible to veto a forced replacement, so
                         * just clear the hardware flags from the nexthop
                         * bucket to indicate to user space that this bucket is
                         * not correctly populated in hardware.
                         */
                        bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
                }
        }

        nh_res_bucket_set_nh(bucket, new_nhge);
        nh_res_bucket_set_idle(res_table, bucket);

        if (notify_nl)
                nexthop_bucket_notify(res_table, bucket_index);

        if (nh_res_nhge_is_balanced(new_nhge))
                list_del(&new_nhge->res.uw_nh_entry);
        return true;
}

#define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)

static void nh_res_table_upkeep(struct nh_res_table *res_table,
                                bool notify, bool notify_nl)
{
        unsigned long now = jiffies;
        unsigned long deadline;
        u16 i;

        /* Deadline is the next time that upkeep should be run. It is the
         * earliest time at which one of the buckets might be migrated.
         * Start at the most pessimistic estimate: either unbalanced_timer
         * from now, or if there is none, idle_timer from now. For each
         * encountered time point, call nh_res_time_set_deadline() to
         * refine the estimate.
         */
        if (res_table->unbalanced_timer)
                deadline = now + res_table->unbalanced_timer;
        else
                deadline = now + res_table->idle_timer;

        for (i = 0; i < res_table->num_nh_buckets; i++) {
                struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
                bool force;

                if (nh_res_bucket_should_migrate(res_table, bucket,
                                                 &deadline, &force)) {
                        if (!nh_res_bucket_migrate(res_table, i, notify,
                                                   notify_nl, force)) {
                                unsigned long idle_point;

                                /* A driver can override the migration
                                 * decision if the HW reports that the
                                 * bucket is actually not idle. Therefore
                                 * remark the bucket as busy again and
                                 * update the deadline.
                                 */
                                nh_res_bucket_set_busy(bucket);
                                idle_point = nh_res_bucket_idle_point(res_table,
                                                                      bucket,
                                                                      now);
                                nh_res_time_set_deadline(idle_point, &deadline);
                        }
                }
        }

        /* If the group is still unbalanced, schedule the next upkeep to
         * either the deadline computed above, or the minimum deadline,
         * whichever comes later.
         */
        if (!nh_res_table_is_balanced(res_table)) {
                unsigned long now = jiffies;
                unsigned long min_deadline;

                min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
                if (time_before(deadline, min_deadline))
                        deadline = min_deadline;

                queue_delayed_work(system_power_efficient_wq,
                                   &res_table->upkeep_dw, deadline - now);
        }
}

static void nh_res_table_upkeep_dw(struct work_struct *work)
{
        struct delayed_work *dw = to_delayed_work(work);
        struct nh_res_table *res_table;

        res_table = container_of(dw, struct nh_res_table, upkeep_dw);
        nh_res_table_upkeep(res_table, true, true);
}

static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
{
        cancel_delayed_work_sync(&res_table->upkeep_dw);
}

static void nh_res_group_rebalance(struct nh_group *nhg,
                                   struct nh_res_table *res_table)
{
        int prev_upper_bound = 0;
        int total = 0;
        int w = 0;
        int i;

        INIT_LIST_HEAD(&res_table->uw_nh_entries);

        for (i = 0; i < nhg->num_nh; ++i)
                total += nhg->nh_entries[i].weight;

        for (i = 0; i < nhg->num_nh; ++i) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                int upper_bound;

                w += nhge->weight;
                upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
                                                total);
                nhge->res.wants_buckets = upper_bound - prev_upper_bound;
                prev_upper_bound = upper_bound;

                if (nh_res_nhge_is_uw(nhge)) {
                        if (list_empty(&res_table->uw_nh_entries))
                                res_table->unbalanced_since = jiffies;
                        list_add(&nhge->res.uw_nh_entry,
                                 &res_table->uw_nh_entries);
                }
        }
}

/* Migrate buckets in res_table so that they reference NHGE's from NHG with
 * the right NH ID. Set those buckets that do not have a corresponding NHGE
 * entry in NHG as not occupied.
 */
static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
                                         struct nh_group *nhg)
{
        u16 i;

        for (i = 0; i < res_table->num_nh_buckets; i++) {
                struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
                u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
                bool found = false;
                int j;

                for (j = 0; j < nhg->num_nh; j++) {
                        struct nh_grp_entry *nhge = &nhg->nh_entries[j];

                        if (nhge->nh->id == id) {
                                nh_res_bucket_set_nh(bucket, nhge);
                                found = true;
                                break;
                        }
                }

                if (!found)
                        nh_res_bucket_unset_nh(bucket);
        }
}

static void replace_nexthop_grp_res(struct nh_group *oldg,
                                    struct nh_group *newg)
{
        /* For NH group replacement, the new NHG might only have a stub
         * hash table with 0 buckets, because the number of buckets was not
         * specified. For NH removal, oldg and newg both reference the same
         * res_table. So in any case, in the following, we want to work
         * with oldg->res_table.
         */
        struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
        unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
        bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);

        nh_res_table_cancel_upkeep(old_res_table);
        nh_res_table_migrate_buckets(old_res_table, newg);
        nh_res_group_rebalance(newg, old_res_table);
        if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
                old_res_table->unbalanced_since = prev_unbalanced_since;
        nh_res_table_upkeep(old_res_table, true, false);
}

static void nh_hthr_group_rebalance(struct nh_group *nhg)
{
        int total = 0;
        int w = 0;
        int i;

        for (i = 0; i < nhg->num_nh; ++i)
                total += nhg->nh_entries[i].weight;

        for (i = 0; i < nhg->num_nh; ++i) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                int upper_bound;

                w += nhge->weight;
                upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
                atomic_set(&nhge->hthr.upper_bound, upper_bound);
        }
}

static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
                                struct nl_info *nlinfo)
{
        struct nh_grp_entry *nhges, *new_nhges;
        struct nexthop *nhp = nhge->nh_parent;
        struct netlink_ext_ack extack;
        struct nexthop *nh = nhge->nh;
        struct nh_group *nhg, *newg;
        int i, j, err;

        WARN_ON(!nh);

        nhg = rtnl_dereference(nhp->nh_grp);
        newg = nhg->spare;

        /* last entry, keep it visible and remove the parent */
        if (nhg->num_nh == 1) {
                remove_nexthop(net, nhp, nlinfo);
                return;
        }

        newg->has_v4 = false;
        newg->is_multipath = nhg->is_multipath;
        newg->hash_threshold = nhg->hash_threshold;
        newg->resilient = nhg->resilient;
        newg->fdb_nh = nhg->fdb_nh;
        newg->num_nh = nhg->num_nh;

        /* copy old entries to new except the one getting removed */
        nhges = nhg->nh_entries;
        new_nhges = newg->nh_entries;
        for (i = 0, j = 0; i < nhg->num_nh; ++i) {
                struct nh_info *nhi;

                /* current nexthop getting removed */
                if (nhg->nh_entries[i].nh == nh) {
                        newg->num_nh--;
                        continue;
                }

                nhi = rtnl_dereference(nhges[i].nh->nh_info);
                if (nhi->family == AF_INET)
                        newg->has_v4 = true;

                list_del(&nhges[i].nh_list);
                new_nhges[j].stats = nhges[i].stats;
                new_nhges[j].nh_parent = nhges[i].nh_parent;
                new_nhges[j].nh = nhges[i].nh;
                new_nhges[j].weight = nhges[i].weight;
                list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
                j++;
        }

        if (newg->hash_threshold)
                nh_hthr_group_rebalance(newg);
        else if (newg->resilient)
                replace_nexthop_grp_res(nhg, newg);

        rcu_assign_pointer(nhp->nh_grp, newg);

        list_del(&nhge->nh_list);
        free_percpu(nhge->stats);
        nexthop_put(nhge->nh);

        /* Removal of a NH from a resilient group is notified through
         * bucket notifications.
         */
        if (newg->hash_threshold) {
                err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
                                             &extack);
                if (err)
                        pr_err("%s\n", extack._msg);
        }

        if (nlinfo)
                nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
}

static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
                                       struct nl_info *nlinfo)
{
        struct nh_grp_entry *nhge, *tmp;

        list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
                remove_nh_grp_entry(net, nhge, nlinfo);

        /* make sure all see the newly published array before releasing rtnl */
        synchronize_net();
}

static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
{
        struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
        struct nh_res_table *res_table;
        int i, num_nh = nhg->num_nh;

        for (i = 0; i < num_nh; ++i) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];

                if (WARN_ON(!nhge->nh))
                        continue;

                list_del_init(&nhge->nh_list);
        }

        if (nhg->resilient) {
                res_table = rtnl_dereference(nhg->res_table);
                nh_res_table_cancel_upkeep(res_table);
        }
}

/* not called for nexthop replace */
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
        struct fib6_info *f6i, *tmp;
        bool do_flush = false;
        struct fib_info *fi;

        list_for_each_entry(fi, &nh->fi_list, nh_list) {
                fi->fib_flags |= RTNH_F_DEAD;
                do_flush = true;
        }
        if (do_flush)
                fib_flush(net);

        /* ip6_del_rt removes the entry from this list hence the _safe */
        list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
                /* __ip6_del_rt does a release, so do a hold here */
                fib6_info_hold(f6i);
                ipv6_stub->ip6_del_rt(net, f6i,
                                      !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
        }
}

static void __remove_nexthop(struct net *net, struct nexthop *nh,
                             struct nl_info *nlinfo)
{
        __remove_nexthop_fib(net, nh);

        if (nh->is_group) {
                remove_nexthop_group(nh, nlinfo);
        } else {
                struct nh_info *nhi;

                nhi = rtnl_dereference(nh->nh_info);
                if (nhi->fib_nhc.nhc_dev)
                        hlist_del(&nhi->dev_hash);

                remove_nexthop_from_groups(net, nh, nlinfo);
        }
}

static void remove_nexthop(struct net *net, struct nexthop *nh,
                           struct nl_info *nlinfo)
{
        call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);

        /* remove from the tree */
        rb_erase(&nh->rb_node, &net->nexthop.rb_root);

        if (nlinfo)
                nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);

        __remove_nexthop(net, nh, nlinfo);
        nh_base_seq_inc(net);

        nexthop_put(nh);
}

/* if any FIB entries reference this nexthop, any dst entries
 * need to be regenerated
 */
static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
                              struct nexthop *replaced_nh)
{
        struct fib6_info *f6i;
        struct nh_group *nhg;
        int i;

        if (!list_empty(&nh->fi_list))
                rt_cache_flush(net);

        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
                ipv6_stub->fib6_update_sernum(net, f6i);

        /* if an IPv6 group was replaced, we have to release all old
         * dsts to make sure all refcounts are released
         */
        if (!replaced_nh->is_group)
                return;

        nhg = rtnl_dereference(replaced_nh->nh_grp);
        for (i = 0; i < nhg->num_nh; i++) {
                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
                struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);

                if (nhi->family == AF_INET6)
                        ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
        }
}

static int replace_nexthop_grp(struct net *net, struct nexthop *old,
                               struct nexthop *new, const struct nh_config *cfg,
                               struct netlink_ext_ack *extack)
{
        struct nh_res_table *tmp_table = NULL;
        struct nh_res_table *new_res_table;
        struct nh_res_table *old_res_table;
        struct nh_group *oldg, *newg;
        int i, err;

        if (!new->is_group) {
                NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
                return -EINVAL;
        }

        oldg = rtnl_dereference(old->nh_grp);
        newg = rtnl_dereference(new->nh_grp);

        if (newg->hash_threshold != oldg->hash_threshold) {
                NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
                return -EINVAL;
        }

        if (newg->hash_threshold) {
                err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
                                             extack);
                if (err)
                        return err;
        } else if (newg->resilient) {
                new_res_table = rtnl_dereference(newg->res_table);
                old_res_table = rtnl_dereference(oldg->res_table);

                /* Accept if num_nh_buckets was not given, but if it was
                 * given, demand that the value be correct.
                 */
                if (cfg->nh_grp_res_has_num_buckets &&
                    cfg->nh_grp_res_num_buckets !=
                    old_res_table->num_nh_buckets) {
                        NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
                        return -EINVAL;
                }

                /* Emit a pre-replace notification so that listeners could veto
                 * a potentially unsupported configuration. Otherwise,
                 * individual bucket replacement notifications would need to be
                 * vetoed, which is something that should only happen if the
                 * bucket is currently active.
                 */
                err = call_nexthop_res_table_notifiers(net, new, extack);
                if (err)
                        return err;

                if (cfg->nh_grp_res_has_idle_timer)
                        old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
                if (cfg->nh_grp_res_has_unbalanced_timer)
                        old_res_table->unbalanced_timer =
                                cfg->nh_grp_res_unbalanced_timer;

                replace_nexthop_grp_res(oldg, newg);

                tmp_table = new_res_table;
                rcu_assign_pointer(newg->res_table, old_res_table);
                rcu_assign_pointer(newg->spare->res_table, old_res_table);
        }

        /* update parents - used by nexthop code for cleanup */
        for (i = 0; i < newg->num_nh; i++)
                newg->nh_entries[i].nh_parent = old;

        rcu_assign_pointer(old->nh_grp, newg);

        /* Make sure concurrent readers are not using 'oldg' anymore. */
        synchronize_net();

        if (newg->resilient) {
                rcu_assign_pointer(oldg->res_table, tmp_table);
                rcu_assign_pointer(oldg->spare->res_table, tmp_table);
        }

        for (i = 0; i < oldg->num_nh; i++)
                oldg->nh_entries[i].nh_parent = new;

        rcu_assign_pointer(new->nh_grp, oldg);

        return 0;
}

static void nh_group_v4_update(struct nh_group *nhg)
{
        struct nh_grp_entry *nhges;
        bool has_v4 = false;
        int i;

        nhges = nhg->nh_entries;
        for (i = 0; i < nhg->num_nh; i++) {
                struct nh_info *nhi;

                nhi = rtnl_dereference(nhges[i].nh->nh_info);
                if (nhi->family == AF_INET)
                        has_v4 = true;
        }
        nhg->has_v4 = has_v4;
}

static int replace_nexthop_single_notify_res(struct net *net,
                                             struct nh_res_table *res_table,
                                             struct nexthop *old,
                                             struct nh_info *oldi,
                                             struct nh_info *newi,
                                             struct netlink_ext_ack *extack)
{
        u32 nhg_id = res_table->nhg_id;
        int err;
        u16 i;

        for (i = 0; i < res_table->num_nh_buckets; i++) {
                struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
                struct nh_grp_entry *nhge;

                nhge = rtnl_dereference(bucket->nh_entry);
                if (nhge->nh == old) {
                        err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
                                                                  i, true,
                                                                  oldi, newi,
                                                                  extack);
                        if (err)
                                goto err_notify;
                }
        }

        return 0;

err_notify:
        while (i-- > 0) {
                struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
                struct nh_grp_entry *nhge;

                nhge = rtnl_dereference(bucket->nh_entry);
                if (nhge->nh == old)
                        __call_nexthop_res_bucket_notifiers(net, nhg_id, i,
                                                            true, newi, oldi,
                                                            extack);
        }
        return err;
}

static int replace_nexthop_single_notify(struct net *net,
                                         struct nexthop *group_nh,
                                         struct nexthop *old,
                                         struct nh_info *oldi,
                                         struct nh_info *newi,
                                         struct netlink_ext_ack *extack)
{
        struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
        struct nh_res_table *res_table;

        if (nhg->hash_threshold) {
                return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
                                              group_nh, extack);
        } else if (nhg->resilient) {
                res_table = rtnl_dereference(nhg->res_table);
                return replace_nexthop_single_notify_res(net, res_table,
                                                         old, oldi, newi,
                                                         extack);
        }

        return -EINVAL;
}

static int replace_nexthop_single(struct net *net, struct nexthop *old,
                                  struct nexthop *new,
                                  struct netlink_ext_ack *extack)
{
        u8 old_protocol, old_nh_flags;
        struct nh_info *oldi, *newi;
        struct nh_grp_entry *nhge;
        int err;

        if (new->is_group) {
                NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
                return -EINVAL;
        }

        err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
        if (err)
                return err;

        /* Hardware flags were set on 'old' as 'new' is not in the red-black
         * tree. Therefore, inherit the flags from 'old' to 'new'.
         */
        new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);

        oldi = rtnl_dereference(old->nh_info);
        newi = rtnl_dereference(new->nh_info);

        newi->nh_parent = old;
        oldi->nh_parent = new;

        old_protocol = old->protocol;
        old_nh_flags = old->nh_flags;

        old->protocol = new->protocol;
        old->nh_flags = new->nh_flags;

        rcu_assign_pointer(old->nh_info, newi);
        rcu_assign_pointer(new->nh_info, oldi);

        /* Send a replace notification for all the groups using the nexthop. */
        list_for_each_entry(nhge, &old->grp_list, nh_list) {
                struct nexthop *nhp = nhge->nh_parent;

                err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
                                                    extack);
                if (err)
                        goto err_notify;
        }

        /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
         * update IPv4 indication in all the groups using the nexthop.
         */
        if (oldi->family == AF_INET && newi->family == AF_INET6) {
                list_for_each_entry(nhge, &old->grp_list, nh_list) {
                        struct nexthop *nhp = nhge->nh_parent;
                        struct nh_group *nhg;

                        nhg = rtnl_dereference(nhp->nh_grp);
                        nh_group_v4_update(nhg);
                }
        }

        return 0;

err_notify:
        rcu_assign_pointer(new->nh_info, newi);
        rcu_assign_pointer(old->nh_info, oldi);
        old->nh_flags = old_nh_flags;
        old->protocol = old_protocol;
        oldi->nh_parent = old;
        newi->nh_parent = new;
        list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
                struct nexthop *nhp = nhge->nh_parent;

                replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
        }
        call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
        return err;
}

static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
                                     struct nl_info *info)
{
        struct fib6_info *f6i;

        if (!list_empty(&nh->fi_list)) {
                struct fib_info *fi;

                /* expectation is a few fib_info per nexthop and then
                 * a lot of routes per fib_info. So mark the fib_info
                 * and then walk the fib tables once
                 */
                list_for_each_entry(fi, &nh->fi_list, nh_list)
                        fi->nh_updated = true;

                fib_info_notify_update(net, info);

                list_for_each_entry(fi, &nh->fi_list, nh_list)
                        fi->nh_updated = false;
        }

        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
                ipv6_stub->fib6_rt_update(net, f6i, info);
}

/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
 * linked to this nexthop and for all groups that the nexthop
 * is a member of
 */
static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
                                   struct nl_info *info)
{
        struct nh_grp_entry *nhge;

        __nexthop_replace_notify(net, nh, info);

        list_for_each_entry(nhge, &nh->grp_list, nh_list)
                __nexthop_replace_notify(net, nhge->nh_parent, info);
}

static int replace_nexthop(struct net *net, struct nexthop *old,
                           struct nexthop *new, const struct nh_config *cfg,
                           struct netlink_ext_ack *extack)
{
        bool new_is_reject = false;
        struct nh_grp_entry *nhge;
        int err;

        /* check that existing FIB entries are ok with the
         * new nexthop definition
         */
        err = fib_check_nh_list(old, new, extack);
        if (err)
                return err;

        err = fib6_check_nh_list(old, new, extack);
        if (err)
                return err;

        if (!new->is_group) {
                struct nh_info *nhi = rtnl_dereference(new->nh_info);

                new_is_reject = nhi->reject_nh;
        }

        list_for_each_entry(nhge, &old->grp_list, nh_list) {
                /* if new nexthop is a blackhole, any groups using this
                 * nexthop cannot have more than 1 path
                 */
                if (new_is_reject &&
                    nexthop_num_path(nhge->nh_parent) > 1) {
                        NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
                        return -EINVAL;
                }

                err = fib_check_nh_list(nhge->nh_parent, new, extack);
                if (err)
                        return err;

                err = fib6_check_nh_list(nhge->nh_parent, new, extack);
                if (err)
                        return err;
        }

        if (old->is_group)
                err = replace_nexthop_grp(net, old, new, cfg, extack);
        else
                err = replace_nexthop_single(net, old, new, extack);

        if (!err) {
                nh_rt_cache_flush(net, old, new);

                __remove_nexthop(net, new, NULL);
                nexthop_put(new);
        }

        return err;
}

/* called with rtnl_lock held */
static int insert_nexthop(struct net *net, struct nexthop *new_nh,
                          struct nh_config *cfg, struct netlink_ext_ack *extack)
{
        struct rb_node **pp, *parent = NULL, *next;
        struct rb_root *root = &net->nexthop.rb_root;
        bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
        bool create = !!(cfg->nlflags & NLM_F_CREATE);
        u32 new_id = new_nh->id;
        int replace_notify = 0;
        int rc = -EEXIST;

        pp = &root->rb_node;
        while (1) {
                struct nexthop *nh;

                next = *pp;
                if (!next)
                        break;

                parent = next;

                nh = rb_entry(parent, struct nexthop, rb_node);
                if (new_id < nh->id) {
                        pp = &next->rb_left;
                } else if (new_id > nh->id) {
                        pp = &next->rb_right;
                } else if (replace) {
                        rc = replace_nexthop(net, nh, new_nh, cfg, extack);
                        if (!rc) {
                                new_nh = nh; /* send notification with old nh */
                                replace_notify = 1;
                        }
                        goto out;
                } else {
                        /* id already exists and not a replace */
                        goto out;
                }
        }

        if (replace && !create) {
                NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
                rc = -ENOENT;
                goto out;
        }

        if (new_nh->is_group) {
                struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
                struct nh_res_table *res_table;

                if (nhg->resilient) {
                        res_table = rtnl_dereference(nhg->res_table);

                        /* Not passing the number of buckets is OK when
                         * replacing, but not when creating a new group.
                         */
                        if (!cfg->nh_grp_res_has_num_buckets) {
                                NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
                                rc = -EINVAL;
                                goto out;
                        }

                        nh_res_group_rebalance(nhg, res_table);

                        /* Do not send bucket notifications, we do full
                         * notification below.
                         */
                        nh_res_table_upkeep(res_table, false, false);
                }
        }

        rb_link_node_rcu(&new_nh->rb_node, parent, pp);
        rb_insert_color(&new_nh->rb_node, root);

        /* The initial insertion is a full notification for hash-threshold as
         * well as resilient groups.
         */
        rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
        if (rc)
                rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);

out:
        if (!rc) {
                nh_base_seq_inc(net);
                nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
                if (replace_notify &&
                    READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
                        nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
        }

        return rc;
}

/* rtnl */
/* remove all nexthops tied to a device being deleted */
static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
{
        unsigned int hash = nh_dev_hashfn(dev->ifindex);
        struct net *net = dev_net(dev);
        struct hlist_head *head = &net->nexthop.devhash[hash];
        struct hlist_node *n;
        struct nh_info *nhi;

        hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
                if (nhi->fib_nhc.nhc_dev != dev)
                        continue;

                if (nhi->reject_nh &&
                    (event == NETDEV_DOWN || event == NETDEV_CHANGE))
                        continue;

                remove_nexthop(net, nhi->nh_parent, NULL);
        }
}

/* rtnl; called when net namespace is deleted */
static void flush_all_nexthops(struct net *net)
{
        struct rb_root *root = &net->nexthop.rb_root;
        struct rb_node *node;
        struct nexthop *nh;

        while ((node = rb_first(root))) {
                nh = rb_entry(node, struct nexthop, rb_node);
                remove_nexthop(net, nh, NULL);
                cond_resched();
        }
}

static struct nexthop *nexthop_create_group(struct net *net,
                                            struct nh_config *cfg)
{
        struct nlattr *grps_attr = cfg->nh_grp;
        struct nexthop_grp *entry = nla_data(grps_attr);
        u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
        struct nh_group *nhg;
        struct nexthop *nh;
        int err;
        int i;

        if (WARN_ON(!num_nh))
                return ERR_PTR(-EINVAL);

        nh = nexthop_alloc();
        if (!nh)
                return ERR_PTR(-ENOMEM);

        nh->is_group = 1;

        nhg = nexthop_grp_alloc(num_nh);
        if (!nhg) {
                kfree(nh);
                return ERR_PTR(-ENOMEM);
        }

        /* spare group used for removals */
        nhg->spare = nexthop_grp_alloc(num_nh);
        if (!nhg->spare) {
                kfree(nhg);
                kfree(nh);
                return ERR_PTR(-ENOMEM);
        }
        nhg->spare->spare = nhg;

        for (i = 0; i < nhg->num_nh; ++i) {
                struct nexthop *nhe;
                struct nh_info *nhi;

                nhe = nexthop_find_by_id(net, entry[i].id);
                if (!nexthop_get(nhe)) {
                        err = -ENOENT;
                        goto out_no_nh;
                }

                nhi = rtnl_dereference(nhe->nh_info);
                if (nhi->family == AF_INET)
                        nhg->has_v4 = true;

                nhg->nh_entries[i].stats =
                        netdev_alloc_pcpu_stats(struct nh_grp_entry_stats);
                if (!nhg->nh_entries[i].stats) {
                        err = -ENOMEM;
                        nexthop_put(nhe);
                        goto out_no_nh;
                }
                nhg->nh_entries[i].nh = nhe;
                nhg->nh_entries[i].weight = entry[i].weight + 1;
                list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
                nhg->nh_entries[i].nh_parent = nh;
        }

        if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
                nhg->hash_threshold = 1;
                nhg->is_multipath = true;
        } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
                struct nh_res_table *res_table;

                res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
                if (!res_table) {
                        err = -ENOMEM;
                        goto out_no_nh;
                }

                rcu_assign_pointer(nhg->spare->res_table, res_table);
                rcu_assign_pointer(nhg->res_table, res_table);
                nhg->resilient = true;
                nhg->is_multipath = true;
        }

        WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1);

        if (nhg->hash_threshold)
                nh_hthr_group_rebalance(nhg);

        if (cfg->nh_fdb)
                nhg->fdb_nh = 1;

        if (cfg->nh_hw_stats)
                nhg->hw_stats = true;

        rcu_assign_pointer(nh->nh_grp, nhg);

        return nh;

out_no_nh:
        for (i--; i >= 0; --i) {
                list_del(&nhg->nh_entries[i].nh_list);
                free_percpu(nhg->nh_entries[i].stats);
                nexthop_put(nhg->nh_entries[i].nh);
        }

        kfree(nhg->spare);
        kfree(nhg);
        kfree(nh);

        return ERR_PTR(err);
}

static int nh_create_ipv4(struct net *net, struct nexthop *nh,
                          struct nh_info *nhi, struct nh_config *cfg,
                          struct netlink_ext_ack *extack)
{
        struct fib_nh *fib_nh = &nhi->fib_nh;
        struct fib_config fib_cfg = {
                .fc_oif   = cfg->nh_ifindex,
                .fc_gw4   = cfg->gw.ipv4,
                .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
                .fc_flags = cfg->nh_flags,
                .fc_nlinfo = cfg->nlinfo,
                .fc_encap = cfg->nh_encap,
                .fc_encap_type = cfg->nh_encap_type,
        };
        u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
        int err;

        err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
        if (err) {
                fib_nh_release(net, fib_nh);
                goto out;
        }

        if (nhi->fdb_nh)
                goto out;

        /* sets nh_dev if successful */
        err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
        if (!err) {
                nh->nh_flags = fib_nh->fib_nh_flags;
                fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
                                          !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
        } else {
                fib_nh_release(net, fib_nh);
        }
out:
        return err;
}

static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
                          struct nh_info *nhi, struct nh_config *cfg,
                          struct netlink_ext_ack *extack)
{
        struct fib6_nh *fib6_nh = &nhi->fib6_nh;
        struct fib6_config fib6_cfg = {
                .fc_table = l3mdev_fib_table(cfg->dev),
                .fc_ifindex = cfg->nh_ifindex,
                .fc_gateway = cfg->gw.ipv6,
                .fc_flags = cfg->nh_flags,
                .fc_nlinfo = cfg->nlinfo,
                .fc_encap = cfg->nh_encap,
                .fc_encap_type = cfg->nh_encap_type,
                .fc_is_fdb = cfg->nh_fdb,
        };
        int err;

        if (!ipv6_addr_any(&cfg->gw.ipv6))
                fib6_cfg.fc_flags |= RTF_GATEWAY;

        /* sets nh_dev if successful */
        err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
                                      extack);
        if (err) {
                /* IPv6 is not enabled, don't call fib6_nh_release */
                if (err == -EAFNOSUPPORT)
                        goto out;
                ipv6_stub->fib6_nh_release(fib6_nh);
        } else {
                nh->nh_flags = fib6_nh->fib_nh_flags;
        }
out:
        return err;
}

static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
                                      struct netlink_ext_ack *extack)
{
        struct nh_info *nhi;
        struct nexthop *nh;
        int err = 0;

        nh = nexthop_alloc();
        if (!nh)
                return ERR_PTR(-ENOMEM);

        nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
        if (!nhi) {
                kfree(nh);
                return ERR_PTR(-ENOMEM);
        }

        nh->nh_flags = cfg->nh_flags;
        nh->net = net;

        nhi->nh_parent = nh;
        nhi->family = cfg->nh_family;
        nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;

        if (cfg->nh_fdb)
                nhi->fdb_nh = 1;

        if (cfg->nh_blackhole) {
                nhi->reject_nh = 1;
                cfg->nh_ifindex = net->loopback_dev->ifindex;
        }

        switch (cfg->nh_family) {
        case AF_INET:
                err = nh_create_ipv4(net, nh, nhi, cfg, extack);
                break;
        case AF_INET6:
                err = nh_create_ipv6(net, nh, nhi, cfg, extack);
                break;
        }

        if (err) {
                kfree(nhi);
                kfree(nh);
                return ERR_PTR(err);
        }

        /* add the entry to the device based hash */
        if (!nhi->fdb_nh)
                nexthop_devhash_add(net, nhi);

        rcu_assign_pointer(nh->nh_info, nhi);

        return nh;
}

/* called with rtnl lock held */
static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
                                   struct netlink_ext_ack *extack)
{
        struct nexthop *nh;
        int err;

        if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
                NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
                return ERR_PTR(-EINVAL);
        }

        if (!cfg->nh_id) {
                cfg->nh_id = nh_find_unused_id(net);
                if (!cfg->nh_id) {
                        NL_SET_ERR_MSG(extack, "No unused id");
                        return ERR_PTR(-EINVAL);
                }
        }

        if (cfg->nh_grp)
                nh = nexthop_create_group(net, cfg);
        else
                nh = nexthop_create(net, cfg, extack);

        if (IS_ERR(nh))
                return nh;

        refcount_set(&nh->refcnt, 1);
        nh->id = cfg->nh_id;
        nh->protocol = cfg->nh_protocol;
        nh->net = net;

        err = insert_nexthop(net, nh, cfg, extack);
        if (err) {
                __remove_nexthop(net, nh, NULL);
                nexthop_put(nh);
                nh = ERR_PTR(err);
        }

        return nh;
}

static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback,
                            unsigned long *timer_p, bool *has_p,
                            struct netlink_ext_ack *extack)
{
        unsigned long timer;
        u32 value;

        if (!attr) {
                *timer_p = fallback;
                *has_p = false;
                return 0;
        }

        value = nla_get_u32(attr);
        timer = clock_t_to_jiffies(value);
        if (timer == ~0UL) {
                NL_SET_ERR_MSG(extack, "Timer value too large");
                return -EINVAL;
        }

        *timer_p = timer;
        *has_p = true;
        return 0;
}

static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
                                    struct netlink_ext_ack *extack)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {};
        int err;

        if (res) {
                err = nla_parse_nested(tb,
                                       ARRAY_SIZE(rtm_nh_res_policy_new) - 1,
                                       res, rtm_nh_res_policy_new, extack);
                if (err < 0)
                        return err;
        }

        if (tb[NHA_RES_GROUP_BUCKETS]) {
                cfg->nh_grp_res_num_buckets =
                        nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]);
                cfg->nh_grp_res_has_num_buckets = true;
                if (!cfg->nh_grp_res_num_buckets) {
                        NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0");
                        return -EINVAL;
                }
        }

        err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER],
                               NH_RES_DEFAULT_IDLE_TIMER,
                               &cfg->nh_grp_res_idle_timer,
                               &cfg->nh_grp_res_has_idle_timer,
                               extack);
        if (err)
                return err;

        return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER],
                                NH_RES_DEFAULT_UNBALANCED_TIMER,
                                &cfg->nh_grp_res_unbalanced_timer,
                                &cfg->nh_grp_res_has_unbalanced_timer,
                                extack);
}

static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
                            struct nlmsghdr *nlh, struct nh_config *cfg,
                            struct netlink_ext_ack *extack)
{
        struct nhmsg *nhm = nlmsg_data(nlh);
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
        int err;

        err = nlmsg_parse(nlh, sizeof(*nhm), tb,
                          ARRAY_SIZE(rtm_nh_policy_new) - 1,
                          rtm_nh_policy_new, extack);
        if (err < 0)
                return err;

        err = -EINVAL;
        if (nhm->resvd || nhm->nh_scope) {
                NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
                goto out;
        }
        if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
                NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
                goto out;
        }

        switch (nhm->nh_family) {
        case AF_INET:
        case AF_INET6:
                break;
        case AF_UNSPEC:
                if (tb[NHA_GROUP])
                        break;
                fallthrough;
        default:
                NL_SET_ERR_MSG(extack, "Invalid address family");
                goto out;
        }

        memset(cfg, 0, sizeof(*cfg));
        cfg->nlflags = nlh->nlmsg_flags;
        cfg->nlinfo.portid = NETLINK_CB(skb).portid;
        cfg->nlinfo.nlh = nlh;
        cfg->nlinfo.nl_net = net;

        cfg->nh_family = nhm->nh_family;
        cfg->nh_protocol = nhm->nh_protocol;
        cfg->nh_flags = nhm->nh_flags;

        if (tb[NHA_ID])
                cfg->nh_id = nla_get_u32(tb[NHA_ID]);

        if (tb[NHA_FDB]) {
                if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
                    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
                        NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
                        goto out;
                }
                if (nhm->nh_flags) {
                        NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
                        goto out;
                }
                cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
        }

        if (tb[NHA_GROUP]) {
                if (nhm->nh_family != AF_UNSPEC) {
                        NL_SET_ERR_MSG(extack, "Invalid family for group");
                        goto out;
                }
                cfg->nh_grp = tb[NHA_GROUP];

                cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
                if (tb[NHA_GROUP_TYPE])
                        cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);

                if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
                        NL_SET_ERR_MSG(extack, "Invalid group type");
                        goto out;
                }
                err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
                                          cfg->nh_grp_type, extack);
                if (err)
                        goto out;

                if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES)
                        err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
                                                       cfg, extack);

                if (tb[NHA_HW_STATS_ENABLE])
                        cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]);

                /* no other attributes should be set */
                goto out;
        }

        if (tb[NHA_BLACKHOLE]) {
                if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
                    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
                        NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
                        goto out;
                }

                cfg->nh_blackhole = 1;
                err = 0;
                goto out;
        }

        if (!cfg->nh_fdb && !tb[NHA_OIF]) {
                NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
                goto out;
        }

        if (!cfg->nh_fdb && tb[NHA_OIF]) {
                cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
                if (cfg->nh_ifindex)
                        cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);

                if (!cfg->dev) {
                        NL_SET_ERR_MSG(extack, "Invalid device index");
                        goto out;
                } else if (!(cfg->dev->flags & IFF_UP)) {
                        NL_SET_ERR_MSG(extack, "Nexthop device is not up");
                        err = -ENETDOWN;
                        goto out;
                } else if (!netif_carrier_ok(cfg->dev)) {
                        NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
                        err = -ENETDOWN;
                        goto out;
                }
        }

        err = -EINVAL;
        if (tb[NHA_GATEWAY]) {
                struct nlattr *gwa = tb[NHA_GATEWAY];

                switch (cfg->nh_family) {
                case AF_INET:
                        if (nla_len(gwa) != sizeof(u32)) {
                                NL_SET_ERR_MSG(extack, "Invalid gateway");
                                goto out;
                        }
                        cfg->gw.ipv4 = nla_get_be32(gwa);
                        break;
                case AF_INET6:
                        if (nla_len(gwa) != sizeof(struct in6_addr)) {
                                NL_SET_ERR_MSG(extack, "Invalid gateway");
                                goto out;
                        }
                        cfg->gw.ipv6 = nla_get_in6_addr(gwa);
                        break;
                default:
                        NL_SET_ERR_MSG(extack,
                                       "Unknown address family for gateway");
                        goto out;
                }
        } else {
                /* device only nexthop (no gateway) */
                if (cfg->nh_flags & RTNH_F_ONLINK) {
                        NL_SET_ERR_MSG(extack,
                                       "ONLINK flag can not be set for nexthop without a gateway");
                        goto out;
                }
        }

        if (tb[NHA_ENCAP]) {
                cfg->nh_encap = tb[NHA_ENCAP];

                if (!tb[NHA_ENCAP_TYPE]) {
                        NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
                        goto out;
                }

                cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
                err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
                if (err < 0)
                        goto out;

        } else if (tb[NHA_ENCAP_TYPE]) {
                NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
                goto out;
        }

        if (tb[NHA_HW_STATS_ENABLE]) {
                NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops");
                goto out;
        }

        err = 0;
out:
        return err;
}

/* rtnl */
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
                           struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(skb->sk);
        struct nh_config cfg;
        struct nexthop *nh;
        int err;

        err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
        if (!err) {
                nh = nexthop_add(net, &cfg, extack);
                if (IS_ERR(nh))
                        err = PTR_ERR(nh);
        }

        return err;
}

static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
                                struct nlattr **tb, u32 *id, u32 *op_flags,
                                struct netlink_ext_ack *extack)
{
        struct nhmsg *nhm = nlmsg_data(nlh);

        if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
                NL_SET_ERR_MSG(extack, "Invalid values in header");
                return -EINVAL;
        }

        if (!tb[NHA_ID]) {
                NL_SET_ERR_MSG(extack, "Nexthop id is missing");
                return -EINVAL;
        }

        *id = nla_get_u32(tb[NHA_ID]);
        if (!(*id)) {
                NL_SET_ERR_MSG(extack, "Invalid nexthop id");
                return -EINVAL;
        }

        if (op_flags) {
                if (tb[NHA_OP_FLAGS])
                        *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
                else
                        *op_flags = 0;
        }

        return 0;
}

/* rtnl */
static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
                           struct netlink_ext_ack *extack)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)];
        struct net *net = sock_net(skb->sk);
        struct nl_info nlinfo = {
                .nlh = nlh,
                .nl_net = net,
                .portid = NETLINK_CB(skb).portid,
        };
        struct nexthop *nh;
        int err;
        u32 id;

        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
                          ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del,
                          extack);
        if (err < 0)
                return err;

        err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack);
        if (err)
                return err;

        nh = nexthop_find_by_id(net, id);
        if (!nh)
                return -ENOENT;

        remove_nexthop(net, nh, &nlinfo);

        return 0;
}

/* rtnl */
static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                           struct netlink_ext_ack *extack)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
        struct net *net = sock_net(in_skb->sk);
        struct sk_buff *skb = NULL;
        struct nexthop *nh;
        u32 op_flags;
        int err;
        u32 id;

        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
                          ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get,
                          extack);
        if (err < 0)
                return err;

        err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
        if (err)
                return err;

        err = -ENOBUFS;
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                goto out;

        err = -ENOENT;
        nh = nexthop_find_by_id(net, id);
        if (!nh)
                goto errout_free;

        err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
                           nlh->nlmsg_seq, 0, op_flags);
        if (err < 0) {
                WARN_ON(err == -EMSGSIZE);
                goto errout_free;
        }

        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
out:
        return err;
errout_free:
        kfree_skb(skb);
        goto out;
}

struct nh_dump_filter {
        u32 nh_id;
        int dev_idx;
        int master_idx;
        bool group_filter;
        bool fdb_filter;
        u32 res_bucket_nh_id;
        u32 op_flags;
};

static bool nh_dump_filtered(struct nexthop *nh,
                             struct nh_dump_filter *filter, u8 family)
{
        const struct net_device *dev;
        const struct nh_info *nhi;

        if (filter->group_filter && !nh->is_group)
                return true;

        if (!filter->dev_idx && !filter->master_idx && !family)
                return false;

        if (nh->is_group)
                return true;

        nhi = rtnl_dereference(nh->nh_info);
        if (family && nhi->family != family)
                return true;

        dev = nhi->fib_nhc.nhc_dev;
        if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
                return true;

        if (filter->master_idx) {
                struct net_device *master;

                if (!dev)
                        return true;

                master = netdev_master_upper_dev_get((struct net_device *)dev);
                if (!master || master->ifindex != filter->master_idx)
                        return true;
        }

        return false;
}

static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
                               struct nh_dump_filter *filter,
                               struct netlink_ext_ack *extack)
{
        struct nhmsg *nhm;
        u32 idx;

        if (tb[NHA_OIF]) {
                idx = nla_get_u32(tb[NHA_OIF]);
                if (idx > INT_MAX) {
                        NL_SET_ERR_MSG(extack, "Invalid device index");
                        return -EINVAL;
                }
                filter->dev_idx = idx;
        }
        if (tb[NHA_MASTER]) {
                idx = nla_get_u32(tb[NHA_MASTER]);
                if (idx > INT_MAX) {
                        NL_SET_ERR_MSG(extack, "Invalid master device index");
                        return -EINVAL;
                }
                filter->master_idx = idx;
        }
        filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
        filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);

        nhm = nlmsg_data(nlh);
        if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
                NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
                return -EINVAL;
        }

        return 0;
}

static int nh_valid_dump_req(const struct nlmsghdr *nlh,
                             struct nh_dump_filter *filter,
                             struct netlink_callback *cb)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
        int err;

        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
                          ARRAY_SIZE(rtm_nh_policy_dump) - 1,
                          rtm_nh_policy_dump, cb->extack);
        if (err < 0)
                return err;

        if (tb[NHA_OP_FLAGS])
                filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
        else
                filter->op_flags = 0;

        return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}

struct rtm_dump_nh_ctx {
        u32 idx;
};

static struct rtm_dump_nh_ctx *
rtm_dump_nh_ctx(struct netlink_callback *cb)
{
        struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;

        BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
        return ctx;
}

static int rtm_dump_walk_nexthops(struct sk_buff *skb,
                                  struct netlink_callback *cb,
                                  struct rb_root *root,
                                  struct rtm_dump_nh_ctx *ctx,
                                  int (*nh_cb)(struct sk_buff *skb,
                                               struct netlink_callback *cb,
                                               struct nexthop *nh, void *data),
                                  void *data)
{
        struct rb_node *node;
        int s_idx;
        int err;

        s_idx = ctx->idx;
        for (node = rb_first(root); node; node = rb_next(node)) {
                struct nexthop *nh;

                nh = rb_entry(node, struct nexthop, rb_node);
                if (nh->id < s_idx)
                        continue;

                ctx->idx = nh->id;
                err = nh_cb(skb, cb, nh, data);
                if (err)
                        return err;
        }

        return 0;
}

static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
                               struct nexthop *nh, void *data)
{
        struct nhmsg *nhm = nlmsg_data(cb->nlh);
        struct nh_dump_filter *filter = data;

        if (nh_dump_filtered(nh, filter, nhm->nh_family))
                return 0;

        return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
                            NETLINK_CB(cb->skb).portid,
                            cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags);
}

/* rtnl */
static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
{
        struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
        struct net *net = sock_net(skb->sk);
        struct rb_root *root = &net->nexthop.rb_root;
        struct nh_dump_filter filter = {};
        int err;

        err = nh_valid_dump_req(cb->nlh, &filter, cb);
        if (err < 0)
                return err;

        err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
                                     &rtm_dump_nexthop_cb, &filter);

        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
}

static struct nexthop *
nexthop_find_group_resilient(struct net *net, u32 id,
                             struct netlink_ext_ack *extack)
{
        struct nh_group *nhg;
        struct nexthop *nh;

        nh = nexthop_find_by_id(net, id);
        if (!nh)
                return ERR_PTR(-ENOENT);

        if (!nh->is_group) {
                NL_SET_ERR_MSG(extack, "Not a nexthop group");
                return ERR_PTR(-EINVAL);
        }

        nhg = rtnl_dereference(nh->nh_grp);
        if (!nhg->resilient) {
                NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
                return ERR_PTR(-EINVAL);
        }

        return nh;
}

static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
                              struct netlink_ext_ack *extack)
{
        u32 idx;

        if (attr) {
                idx = nla_get_u32(attr);
                if (!idx) {
                        NL_SET_ERR_MSG(extack, "Invalid nexthop id");
                        return -EINVAL;
                }
                *nh_id_p = idx;
        } else {
                *nh_id_p = 0;
        }

        return 0;
}

static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
                                    struct nh_dump_filter *filter,
                                    struct netlink_callback *cb)
{
        struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
        int err;

        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
                          ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
                          rtm_nh_policy_dump_bucket, NULL);
        if (err < 0)
                return err;

        err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
        if (err)
                return err;

        if (tb[NHA_RES_BUCKET]) {
                size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;

                err = nla_parse_nested(res_tb, max,
                                       tb[NHA_RES_BUCKET],
                                       rtm_nh_res_bucket_policy_dump,
                                       cb->extack);
                if (err < 0)
                        return err;

                err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
                                         &filter->res_bucket_nh_id,
                                         cb->extack);
                if (err)
                        return err;
        }

        return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}

struct rtm_dump_res_bucket_ctx {
        struct rtm_dump_nh_ctx nh;
        u16 bucket_index;
};

static struct rtm_dump_res_bucket_ctx *
rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
{
        struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;

        BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
        return ctx;
}

struct rtm_dump_nexthop_bucket_data {
        struct rtm_dump_res_bucket_ctx *ctx;
        struct nh_dump_filter filter;
};

static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
                                      struct netlink_callback *cb,
                                      struct nexthop *nh,
                                      struct rtm_dump_nexthop_bucket_data *dd)
{
        u32 portid = NETLINK_CB(cb->skb).portid;
        struct nhmsg *nhm = nlmsg_data(cb->nlh);
        struct nh_res_table *res_table;
        struct nh_group *nhg;
        u16 bucket_index;
        int err;

        nhg = rtnl_dereference(nh->nh_grp);
        res_table = rtnl_dereference(nhg->res_table);
        for (bucket_index = dd->ctx->bucket_index;
             bucket_index < res_table->num_nh_buckets;
             bucket_index++) {
                struct nh_res_bucket *bucket;
                struct nh_grp_entry *nhge;

                bucket = &res_table->nh_buckets[bucket_index];
                nhge = rtnl_dereference(bucket->nh_entry);
                if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
                        continue;

                if (dd->filter.res_bucket_nh_id &&
                    dd->filter.res_bucket_nh_id != nhge->nh->id)
                        continue;

                dd->ctx->bucket_index = bucket_index;
                err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
                                         RTM_NEWNEXTHOPBUCKET, portid,
                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                         cb->extack);
                if (err)
                        return err;
        }

        dd->ctx->bucket_index = 0;

        return 0;
}

static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
                                      struct netlink_callback *cb,
                                      struct nexthop *nh, void *data)
{
        struct rtm_dump_nexthop_bucket_data *dd = data;
        struct nh_group *nhg;

        if (!nh->is_group)
                return 0;

        nhg = rtnl_dereference(nh->nh_grp);
        if (!nhg->resilient)
                return 0;

        return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
}

/* rtnl */
static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
                                   struct netlink_callback *cb)
{
        struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
        struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
        struct net *net = sock_net(skb->sk);
        struct nexthop *nh;
        int err;

        err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
        if (err)
                return err;

        if (dd.filter.nh_id) {
                nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
                                                  cb->extack);
                if (IS_ERR(nh))
                        return PTR_ERR(nh);
                err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
        } else {
                struct rb_root *root = &net->nexthop.rb_root;

                err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
                                             &rtm_dump_nexthop_bucket_cb, &dd);
        }

        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
}

static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res,
                                              u16 *bucket_index,
                                              struct netlink_ext_ack *extack)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)];
        int err;

        err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1,
                               res, rtm_nh_res_bucket_policy_get, extack);
        if (err < 0)
                return err;

        if (!tb[NHA_RES_BUCKET_INDEX]) {
                NL_SET_ERR_MSG(extack, "Bucket index is missing");
                return -EINVAL;
        }

        *bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]);
        return 0;
}

static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
                                   u32 *id, u16 *bucket_index,
                                   struct netlink_ext_ack *extack)
{
        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)];
        int err;

        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
                          ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
                          rtm_nh_policy_get_bucket, extack);
        if (err < 0)
                return err;

        err = nh_valid_get_del_req(nlh, tb, id, NULL, extack);
        if (err)
                return err;

        if (!tb[NHA_RES_BUCKET]) {
                NL_SET_ERR_MSG(extack, "Bucket information is missing");
                return -EINVAL;
        }

        err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET],
                                                 bucket_index, extack);
        if (err)
                return err;

        return 0;
}

/* rtnl */
static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                                  struct netlink_ext_ack *extack)
{
        struct net *net = sock_net(in_skb->sk);
        struct nh_res_table *res_table;
        struct sk_buff *skb = NULL;
        struct nh_group *nhg;
        struct nexthop *nh;
        u16 bucket_index;
        int err;
        u32 id;

        err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack);
        if (err)
                return err;

        nh = nexthop_find_group_resilient(net, id, extack);
        if (IS_ERR(nh))
                return PTR_ERR(nh);

        nhg = rtnl_dereference(nh->nh_grp);
        res_table = rtnl_dereference(nhg->res_table);
        if (bucket_index >= res_table->num_nh_buckets) {
                NL_SET_ERR_MSG(extack, "Bucket index out of bounds");
                return -ENOENT;
        }

        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
                return -ENOBUFS;

        err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index],
                                 bucket_index, RTM_NEWNEXTHOPBUCKET,
                                 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
                                 0, extack);
        if (err < 0) {
                WARN_ON(err == -EMSGSIZE);
                goto errout_free;
        }

        return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);

errout_free:
        kfree_skb(skb);
        return err;
}

static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
        unsigned int hash = nh_dev_hashfn(dev->ifindex);
        struct net *net = dev_net(dev);
        struct hlist_head *head = &net->nexthop.devhash[hash];
        struct hlist_node *n;
        struct nh_info *nhi;

        hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
                if (nhi->fib_nhc.nhc_dev == dev) {
                        if (nhi->family == AF_INET)
                                fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
                                                   orig_mtu);
                }
        }
}

/* rtnl */
static int nh_netdev_event(struct notifier_block *this,
                           unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_info_ext *info_ext;

        switch (event) {
        case NETDEV_DOWN:
        case NETDEV_UNREGISTER:
                nexthop_flush_dev(dev, event);
                break;
        case NETDEV_CHANGE:
                if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
                        nexthop_flush_dev(dev, event);
                break;
        case NETDEV_CHANGEMTU:
                info_ext = ptr;
                nexthop_sync_mtu(dev, info_ext->ext.mtu);
                rt_cache_flush(dev_net(dev));
                break;
        }
        return NOTIFY_DONE;
}

static struct notifier_block nh_netdev_notifier = {
        .notifier_call = nh_netdev_event,
};

static int nexthops_dump(struct net *net, struct notifier_block *nb,
                         enum nexthop_event_type event_type,
                         struct netlink_ext_ack *extack)
{
        struct rb_root *root = &net->nexthop.rb_root;
        struct rb_node *node;
        int err = 0;

        for (node = rb_first(root); node; node = rb_next(node)) {
                struct nexthop *nh;

                nh = rb_entry(node, struct nexthop, rb_node);
                err = call_nexthop_notifier(nb, net, event_type, nh, extack);
                if (err)
                        break;
        }

        return err;
}

int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
                              struct netlink_ext_ack *extack)
{
        int err;

        rtnl_lock();
        err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
        if (err)
                goto unlock;
        err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
                                               nb);
unlock:
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(register_nexthop_notifier);

int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
        int err;

        err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
                                                 nb);
        if (!err)
                nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
        return err;
}
EXPORT_SYMBOL(__unregister_nexthop_notifier);

int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
        int err;

        rtnl_lock();
        err = __unregister_nexthop_notifier(net, nb);
        rtnl_unlock();
        return err;
}
EXPORT_SYMBOL(unregister_nexthop_notifier);

void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
{
        struct nexthop *nexthop;

        rcu_read_lock();

        nexthop = nexthop_find_by_id(net, id);
        if (!nexthop)
                goto out;

        nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
        if (offload)
                nexthop->nh_flags |= RTNH_F_OFFLOAD;
        if (trap)
                nexthop->nh_flags |= RTNH_F_TRAP;

out:
        rcu_read_unlock();
}
EXPORT_SYMBOL(nexthop_set_hw_flags);

void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
                                 bool offload, bool trap)
{
        struct nh_res_table *res_table;
        struct nh_res_bucket *bucket;
        struct nexthop *nexthop;
        struct nh_group *nhg;

        rcu_read_lock();

        nexthop = nexthop_find_by_id(net, id);
        if (!nexthop || !nexthop->is_group)
                goto out;

        nhg = rcu_dereference(nexthop->nh_grp);
        if (!nhg->resilient)
                goto out;

        if (bucket_index >= nhg->res_table->num_nh_buckets)
                goto out;

        res_table = rcu_dereference(nhg->res_table);
        bucket = &res_table->nh_buckets[bucket_index];
        bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
        if (offload)
                bucket->nh_flags |= RTNH_F_OFFLOAD;
        if (trap)
                bucket->nh_flags |= RTNH_F_TRAP;

out:
        rcu_read_unlock();
}
EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);

void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
                                     unsigned long *activity)
{
        struct nh_res_table *res_table;
        struct nexthop *nexthop;
        struct nh_group *nhg;
        u16 i;

        rcu_read_lock();

        nexthop = nexthop_find_by_id(net, id);
        if (!nexthop || !nexthop->is_group)
                goto out;

        nhg = rcu_dereference(nexthop->nh_grp);
        if (!nhg->resilient)
                goto out;

        /* Instead of silently ignoring some buckets, demand that the sizes
         * be the same.
         */
        res_table = rcu_dereference(nhg->res_table);
        if (num_buckets != res_table->num_nh_buckets)
                goto out;

        for (i = 0; i < num_buckets; i++) {
                if (test_bit(i, activity))
                        nh_res_bucket_set_busy(&res_table->nh_buckets[i]);
        }

out:
        rcu_read_unlock();
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);

static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
                                                   struct list_head *dev_to_kill)
{
        struct net *net;

        ASSERT_RTNL();
        list_for_each_entry(net, net_list, exit_list)
                flush_all_nexthops(net);
}

static void __net_exit nexthop_net_exit(struct net *net)
{
        kfree(net->nexthop.devhash);
        net->nexthop.devhash = NULL;
}

static int __net_init nexthop_net_init(struct net *net)
{
        size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;

        net->nexthop.rb_root = RB_ROOT;
        net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
        if (!net->nexthop.devhash)
                return -ENOMEM;
        BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);

        return 0;
}

static struct pernet_operations nexthop_net_ops = {
        .init = nexthop_net_init,
        .exit = nexthop_net_exit,
        .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};

static int __init nexthop_init(void)
{
        register_pernet_subsys(&nexthop_net_ops);

        register_netdevice_notifier(&nh_netdev_notifier);

        rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
                      rtm_dump_nexthop, 0);

        rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
        rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);

        rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
        rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);

        rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
                      rtm_dump_nexthop_bucket, 0);

        return 0;
}
subsys_initcall(nexthop_init);












    2 



    2 



    2 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Do sleep inside a spin-lock
 *  Copyright (c) 1999 by Takashi Iwai <tiwai@suse.de>
 */

#include <linux/export.h>
#include <sound/core.h>
#include "seq_lock.h"

/* wait until all locks are released */
void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line)
{
        int warn_count = 5 * HZ;

        if (atomic_read(lockp) < 0) {
                pr_warn("ALSA: seq_lock: lock trouble [counter = %d] in %s:%d\n", atomic_read(lockp), file, line);
                return;
        }
        while (atomic_read(lockp) > 0) {
                if (warn_count-- == 0)
                        pr_warn("ALSA: seq_lock: waiting [%d left] in %s:%d\n", atomic_read(lockp), file, line);
                schedule_timeout_uninterruptible(1);
        }
}
EXPORT_SYMBOL(snd_use_lock_sync_helper);










  236 
















































    8 




    8 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM csd

#if !defined(_TRACE_CSD_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_CSD_H

#include <linux/tracepoint.h>

TRACE_EVENT(csd_queue_cpu,

        TP_PROTO(const unsigned int cpu,
                unsigned long callsite,
                smp_call_func_t func,
                call_single_data_t *csd),

        TP_ARGS(cpu, callsite, func, csd),

        TP_STRUCT__entry(
                __field(unsigned int, cpu)
                __field(void *, callsite)
                __field(void *, func)
                __field(void *, csd)
                ),

            TP_fast_assign(
                __entry->cpu = cpu;
                __entry->callsite = (void *)callsite;
                __entry->func = func;
                __entry->csd  = csd;
                ),

        TP_printk("cpu=%u callsite=%pS func=%ps csd=%p",
                __entry->cpu, __entry->callsite, __entry->func, __entry->csd)
        );

/*
 * Tracepoints for a function which is called as an effect of smp_call_function.*
 */
DECLARE_EVENT_CLASS(csd_function,

        TP_PROTO(smp_call_func_t func, call_single_data_t *csd),

        TP_ARGS(func, csd),

        TP_STRUCT__entry(
                __field(void *,        func)
                __field(void *,        csd)
        ),

        TP_fast_assign(
                __entry->func        = func;
                __entry->csd        = csd;
        ),

        TP_printk("func=%ps, csd=%p", __entry->func, __entry->csd)
);

DEFINE_EVENT(csd_function, csd_function_entry,
        TP_PROTO(smp_call_func_t func, call_single_data_t *csd),
        TP_ARGS(func, csd)
);

DEFINE_EVENT(csd_function, csd_function_exit,
        TP_PROTO(smp_call_func_t func, call_single_data_t *csd),
        TP_ARGS(func, csd)
);

#endif /* _TRACE_CSD_H */

/* This part must be outside protection */
#include <trace/define_trace.h>



































   51 










   51 






   51 
   51 


   51 













































   51 

   51 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 *
 * This is an implementation of the BLAKE2s hash and PRF functions.
 *
 * Information: https://blake2.net/
 *
 */

#include <crypto/internal/blake2s.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bug.h>
#include <asm/unaligned.h>

static const u8 blake2s_sigma[10][16] = {
        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
        { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
        { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
        { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
        { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
        { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
        { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
        { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
        { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
        { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
};

static inline void blake2s_increment_counter(struct blake2s_state *state,
                                             const u32 inc)
{
        state->t[0] += inc;
        state->t[1] += (state->t[0] < inc);
}

void blake2s_compress(struct blake2s_state *state, const u8 *block,
                      size_t nblocks, const u32 inc)
                      __weak __alias(blake2s_compress_generic);

void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
                              size_t nblocks, const u32 inc)
{
        u32 m[16];
        u32 v[16];
        int i;

        WARN_ON(IS_ENABLED(DEBUG) &&
                (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));

        while (nblocks > 0) {
                blake2s_increment_counter(state, inc);
                memcpy(m, block, BLAKE2S_BLOCK_SIZE);
                le32_to_cpu_array(m, ARRAY_SIZE(m));
                memcpy(v, state->h, 32);
                v[ 8] = BLAKE2S_IV0;
                v[ 9] = BLAKE2S_IV1;
                v[10] = BLAKE2S_IV2;
                v[11] = BLAKE2S_IV3;
                v[12] = BLAKE2S_IV4 ^ state->t[0];
                v[13] = BLAKE2S_IV5 ^ state->t[1];
                v[14] = BLAKE2S_IV6 ^ state->f[0];
                v[15] = BLAKE2S_IV7 ^ state->f[1];

#define G(r, i, a, b, c, d) do { \
        a += b + m[blake2s_sigma[r][2 * i + 0]]; \
        d = ror32(d ^ a, 16); \
        c += d; \
        b = ror32(b ^ c, 12); \
        a += b + m[blake2s_sigma[r][2 * i + 1]]; \
        d = ror32(d ^ a, 8); \
        c += d; \
        b = ror32(b ^ c, 7); \
} while (0)

#define ROUND(r) do { \
        G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
        G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
        G(r, 2, v[2], v[ 6], v[10], v[14]); \
        G(r, 3, v[3], v[ 7], v[11], v[15]); \
        G(r, 4, v[0], v[ 5], v[10], v[15]); \
        G(r, 5, v[1], v[ 6], v[11], v[12]); \
        G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
        G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
} while (0)
                ROUND(0);
                ROUND(1);
                ROUND(2);
                ROUND(3);
                ROUND(4);
                ROUND(5);
                ROUND(6);
                ROUND(7);
                ROUND(8);
                ROUND(9);

#undef G
#undef ROUND

                for (i = 0; i < 8; ++i)
                        state->h[i] ^= v[i] ^ v[i + 8];

                block += BLAKE2S_BLOCK_SIZE;
                --nblocks;
        }
}

EXPORT_SYMBOL(blake2s_compress_generic);




























































































































































































































































    4 




















































































    4 

    4 

    4 




























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Linux INET6 implementation 
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>        
 */

#ifndef _IP6_FIB_H
#define _IP6_FIB_H

#include <linux/ipv6_route.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/ip_fib.h>
#include <net/netlink.h>
#include <net/inetpeer.h>
#include <net/fib_notifier.h>
#include <linux/indirect_call_wrapper.h>
#include <uapi/linux/bpf.h>

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_HASHSZ 256
#else
#define FIB6_TABLE_HASHSZ 1
#endif

#define RT6_DEBUG 2

struct rt6_info;
struct fib6_info;

struct fib6_config {
        u32                fc_table;
        u32                fc_metric;
        int                fc_dst_len;
        int                fc_src_len;
        int                fc_ifindex;
        u32                fc_flags;
        u32                fc_protocol;
        u16                fc_type;        /* only 8 bits are used */
        u16                fc_delete_all_nh : 1,
                        fc_ignore_dev_down:1,
                        __unused : 14;
        u32                fc_nh_id;

        struct in6_addr        fc_dst;
        struct in6_addr        fc_src;
        struct in6_addr        fc_prefsrc;
        struct in6_addr        fc_gateway;

        unsigned long        fc_expires;
        struct nlattr        *fc_mx;
        int                fc_mx_len;
        int                fc_mp_len;
        struct nlattr        *fc_mp;

        struct nl_info        fc_nlinfo;
        struct nlattr        *fc_encap;
        u16                fc_encap_type;
        bool                fc_is_fdb;
};

struct fib6_node {
        struct fib6_node __rcu        *parent;
        struct fib6_node __rcu        *left;
        struct fib6_node __rcu        *right;
#ifdef CONFIG_IPV6_SUBTREES
        struct fib6_node __rcu        *subtree;
#endif
        struct fib6_info __rcu        *leaf;

        __u16                        fn_bit;                /* bit key */
        __u16                        fn_flags;
        int                        fn_sernum;
        struct fib6_info __rcu        *rr_ptr;
        struct rcu_head                rcu;
};

struct fib6_gc_args {
        int                        timeout;
        int                        more;
};

#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn)        NULL

static inline bool fib6_routes_require_src(const struct net *net)
{
        return false;
}

static inline void fib6_routes_require_src_inc(struct net *net) {}
static inline void fib6_routes_require_src_dec(struct net *net) {}

#else

static inline bool fib6_routes_require_src(const struct net *net)
{
        return net->ipv6.fib6_routes_require_src > 0;
}

static inline void fib6_routes_require_src_inc(struct net *net)
{
        net->ipv6.fib6_routes_require_src++;
}

static inline void fib6_routes_require_src_dec(struct net *net)
{
        net->ipv6.fib6_routes_require_src--;
}

#define FIB6_SUBTREE(fn)        (rcu_dereference_protected((fn)->subtree, 1))
#endif

/*
 *        routing information
 *
 */

struct rt6key {
        struct in6_addr        addr;
        int                plen;
};

struct fib6_table;

struct rt6_exception_bucket {
        struct hlist_head        chain;
        int                        depth;
};

struct rt6_exception {
        struct hlist_node        hlist;
        struct rt6_info                *rt6i;
        unsigned long                stamp;
        struct rcu_head                rcu;
};

#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10
#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
#define FIB6_MAX_DEPTH 5

struct fib6_nh {
        struct fib_nh_common        nh_common;

#ifdef CONFIG_IPV6_ROUTER_PREF
        unsigned long                last_probe;
#endif

        struct rt6_info * __percpu *rt6i_pcpu;
        struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
};

struct fib6_info {
        struct fib6_table                *fib6_table;
        struct fib6_info __rcu                *fib6_next;
        struct fib6_node __rcu                *fib6_node;

        /* Multipath routes:
         * siblings is a list of fib6_info that have the same metric/weight,
         * destination, but not the same gateway. nsiblings is just a cache
         * to speed up lookup.
         */
        union {
                struct list_head        fib6_siblings;
                struct list_head        nh_list;
        };
        unsigned int                        fib6_nsiblings;

        refcount_t                        fib6_ref;
        unsigned long                        expires;

        struct hlist_node                gc_link;

        struct dst_metrics                *fib6_metrics;
#define fib6_pmtu                fib6_metrics->metrics[RTAX_MTU-1]

        struct rt6key                        fib6_dst;
        u32                                fib6_flags;
        struct rt6key                        fib6_src;
        struct rt6key                        fib6_prefsrc;

        u32                                fib6_metric;
        u8                                fib6_protocol;
        u8                                fib6_type;

        u8                                offload;
        u8                                trap;
        u8                                offload_failed;

        u8                                should_flush:1,
                                        dst_nocount:1,
                                        dst_nopolicy:1,
                                        fib6_destroying:1,
                                        unused:4;

        struct rcu_head                        rcu;
        struct nexthop                        *nh;
        struct fib6_nh                        fib6_nh[];
};

struct rt6_info {
        struct dst_entry                dst;
        struct fib6_info __rcu                *from;
        int                                sernum;

        struct rt6key                        rt6i_dst;
        struct rt6key                        rt6i_src;
        struct in6_addr                        rt6i_gateway;
        struct inet6_dev                *rt6i_idev;
        u32                                rt6i_flags;

        /* more non-fragment space at head required */
        unsigned short                        rt6i_nfheader_len;
};

struct fib6_result {
        struct fib6_nh                *nh;
        struct fib6_info        *f6i;
        u32                        fib6_flags;
        u8                        fib6_type;
        struct rt6_info                *rt6;
};

#define for_each_fib6_node_rt_rcu(fn)                                        \
        for (rt = rcu_dereference((fn)->leaf); rt;                        \
             rt = rcu_dereference(rt->fib6_next))

#define for_each_fib6_walker_rt(w)                                        \
        for (rt = (w)->leaf; rt;                                        \
             rt = rcu_dereference_protected(rt->fib6_next, 1))

static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
        return ((struct rt6_info *)dst)->rt6i_idev;
}

static inline bool fib6_requires_src(const struct fib6_info *rt)
{
        return rt->fib6_src.plen > 0;
}

/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
 * been added to a table before.
 */
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
        f6i->fib6_flags &= ~RTF_EXPIRES;
        f6i->expires = 0;
}

/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
 * been added to a table before.
 */
static inline void fib6_set_expires(struct fib6_info *f6i,
                                    unsigned long expires)
{
        f6i->expires = expires;
        f6i->fib6_flags |= RTF_EXPIRES;
}

static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
        if (f6i->fib6_flags & RTF_EXPIRES)
                return time_after(jiffies, f6i->expires);
        return false;
}

/* Function to safely get fn->fn_sernum for passed in rt
 * and store result in passed in cookie.
 * Return true if we can get cookie safely
 * Return false if not
 */
static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
                                        u32 *cookie)
{
        struct fib6_node *fn;
        bool status = false;

        fn = rcu_dereference(f6i->fib6_node);

        if (fn) {
                *cookie = READ_ONCE(fn->fn_sernum);
                /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
                smp_rmb();
                status = true;
        }

        return status;
}

static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
        struct fib6_info *from;
        u32 cookie = 0;

        if (rt->sernum)
                return rt->sernum;

        rcu_read_lock();

        from = rcu_dereference(rt->from);
        if (from)
                fib6_get_cookie_safe(from, &cookie);

        rcu_read_unlock();

        return cookie;
}

static inline void ip6_rt_put(struct rt6_info *rt)
{
        /* dst_release() accepts a NULL parameter.
         * We rely on dst being first structure in struct rt6_info
         */
        BUILD_BUG_ON(offsetof(struct rt6_info, dst) != 0);
        dst_release(&rt->dst);
}

struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh);
void fib6_info_destroy_rcu(struct rcu_head *head);

static inline void fib6_info_hold(struct fib6_info *f6i)
{
        refcount_inc(&f6i->fib6_ref);
}

static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
{
        return refcount_inc_not_zero(&f6i->fib6_ref);
}

static inline void fib6_info_release(struct fib6_info *f6i)
{
        if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
                DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
                call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
        }
}

enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
        FWS_S,
#endif
        FWS_L,
        FWS_R,
        FWS_C,
        FWS_U
};

struct fib6_walker {
        struct list_head lh;
        struct fib6_node *root, *node;
        struct fib6_info *leaf;
        enum fib6_walk_state state;
        unsigned int skip;
        unsigned int count;
        unsigned int skip_in_node;
        int (*func)(struct fib6_walker *);
        void *args;
};

struct rt6_statistics {
        __u32                fib_nodes;                /* all fib6 nodes */
        __u32                fib_route_nodes;        /* intermediate nodes */
        __u32                fib_rt_entries;                /* rt entries in fib table */
        __u32                fib_rt_cache;                /* cached rt entries in exception table */
        __u32                fib_discarded_routes;        /* total number of routes delete */

        /* The following stat is not protected by any lock */
        atomic_t        fib_rt_alloc;                /* total number of routes alloced */
};

#define RTN_TL_ROOT        0x0001
#define RTN_ROOT        0x0002                /* tree root node                */
#define RTN_RTINFO        0x0004                /* node with valid routing info        */

/*
 *        priority levels (or metrics)
 *
 */


struct fib6_table {
        struct hlist_node        tb6_hlist;
        u32                        tb6_id;
        spinlock_t                tb6_lock;
        struct fib6_node        tb6_root;
        struct inet_peer_base        tb6_peers;
        unsigned int                flags;
        unsigned int                fib_seq;
        struct hlist_head       tb6_gc_hlist;        /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER        BIT(0)
};

#define RT6_TABLE_UNSPEC        RT_TABLE_UNSPEC
#define RT6_TABLE_MAIN                RT_TABLE_MAIN
#define RT6_TABLE_DFLT                RT6_TABLE_MAIN
#define RT6_TABLE_INFO                RT6_TABLE_MAIN
#define RT6_TABLE_PREFIX        RT6_TABLE_MAIN

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_MIN                1
#define FIB6_TABLE_MAX                RT_TABLE_MAX
#define RT6_TABLE_LOCAL                RT_TABLE_LOCAL
#else
#define FIB6_TABLE_MIN                RT_TABLE_MAIN
#define FIB6_TABLE_MAX                FIB6_TABLE_MIN
#define RT6_TABLE_LOCAL                RT6_TABLE_MAIN
#endif

typedef struct rt6_info *(*pol_lookup_t)(struct net *,
                                         struct fib6_table *,
                                         struct flowi6 *,
                                         const struct sk_buff *, int);

struct fib6_entry_notifier_info {
        struct fib_notifier_info info; /* must be first */
        struct fib6_info *rt;
        unsigned int nsiblings;
};

/*
 *        exported functions
 */

struct fib6_table *fib6_get_table(struct net *net, u32 id);
struct fib6_table *fib6_new_table(struct net *net, u32 id);
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   const struct sk_buff *skb,
                                   int flags, pol_lookup_t lookup);

/* called with rcu lock held; can return error pointer
 * caller needs to select path
 */
int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
                struct fib6_result *res, int flags);

/* called with rcu lock held; caller needs to select path */
int fib6_table_lookup(struct net *net, struct fib6_table *table,
                      int oif, struct flowi6 *fl6, struct fib6_result *res,
                      int strict);

void fib6_select_path(const struct net *net, struct fib6_result *res,
                      struct flowi6 *fl6, int oif, bool have_oif_match,
                      const struct sk_buff *skb, int strict);
struct fib6_node *fib6_node_lookup(struct fib6_node *root,
                                   const struct in6_addr *daddr,
                                   const struct in6_addr *saddr);

struct fib6_node *fib6_locate(struct fib6_node *root,
                              const struct in6_addr *daddr, int dst_len,
                              const struct in6_addr *saddr, int src_len,
                              bool exact_match);

void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
                    void *arg);
void fib6_clean_all_skip_notify(struct net *net,
                                int (*func)(struct fib6_info *, void *arg),
                                void *arg);

int fib6_add(struct fib6_node *root, struct fib6_info *rt,
             struct nl_info *info, struct netlink_ext_ack *extack);
int fib6_del(struct fib6_info *rt, struct nl_info *info);

static inline
void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
{
        const struct fib6_info *from;

        rcu_read_lock();

        from = rcu_dereference(rt->from);
        if (from)
                *addr = from->fib6_prefsrc.addr;
        else
                *addr = in6addr_any;

        rcu_read_unlock();
}

int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
                 struct fib6_config *cfg, gfp_t gfp_flags,
                 struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);

int call_fib6_entry_notifiers(struct net *net,
                              enum fib_event_type event_type,
                              struct fib6_info *rt,
                              struct netlink_ext_ack *extack);
int call_fib6_multipath_entry_notifiers(struct net *net,
                                        enum fib_event_type event_type,
                                        struct fib6_info *rt,
                                        unsigned int nsiblings,
                                        struct netlink_ext_ack *extack);
int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt);
void fib6_rt_update(struct net *net, struct fib6_info *rt,
                    struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
                     unsigned int flags);

void fib6_run_gc(unsigned long expires, struct net *net, bool force);

void fib6_gc_cleanup(void);

int fib6_init(void);

/* Add the route to the gc list if it is not already there
 *
 * The callers should hold f6i->fib6_table->tb6_lock.
 */
static inline void fib6_add_gc_list(struct fib6_info *f6i)
{
        /* If fib6_node is null, the f6i is not in (or removed from) the
         * table.
         *
         * There is a gap between finding the f6i from the table and
         * calling this function without the protection of the tb6_lock.
         * This check makes sure the f6i is not added to the gc list when
         * it is not on the table.
         */
        if (!rcu_dereference_protected(f6i->fib6_node,
                                       lockdep_is_held(&f6i->fib6_table->tb6_lock)))
                return;

        if (hlist_unhashed(&f6i->gc_link))
                hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
}

/* Remove the route from the gc list if it is on the list.
 *
 * The callers should hold f6i->fib6_table->tb6_lock.
 */
static inline void fib6_remove_gc_list(struct fib6_info *f6i)
{
        if (!hlist_unhashed(&f6i->gc_link))
                hlist_del_init(&f6i->gc_link);
}

struct ipv6_route_iter {
        struct seq_net_private p;
        struct fib6_walker w;
        loff_t skip;
        struct fib6_table *tbl;
        int sernum;
};

extern const struct seq_operations ipv6_route_seq_ops;

int call_fib6_notifier(struct notifier_block *nb,
                       enum fib_event_type event_type,
                       struct fib_notifier_info *info);
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
                        struct fib_notifier_info *info);

int __net_init fib6_notifier_init(struct net *net);
void __net_exit fib6_notifier_exit(struct net *net);

unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb,
                     struct netlink_ext_ack *extack);

void fib6_update_sernum(struct net *net, struct fib6_info *rt);
void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i);

void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{
        return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
                            bool offload, bool trap, bool offload_failed);

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct fib6_info *, rt);
};
#endif

INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_output(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags));
INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_input(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags));
INDIRECT_CALLABLE_DECLARE(struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags));
INDIRECT_CALLABLE_DECLARE(struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
                                             struct flowi6 *fl6,
                                             const struct sk_buff *skb,
                                             int flags));
static inline struct rt6_info *pol_lookup_func(pol_lookup_t lookup,
                                                struct net *net,
                                                struct fib6_table *table,
                                                struct flowi6 *fl6,
                                                const struct sk_buff *skb,
                                                int flags)
{
        return INDIRECT_CALL_4(lookup,
                               ip6_pol_route_output,
                               ip6_pol_route_input,
                               ip6_pol_route_lookup,
                               __ip6_route_redirect,
                               net, table, fl6, skb, flags);
}

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static inline bool fib6_has_custom_rules(const struct net *net)
{
        return net->ipv6.fib6_has_custom_rules;
}

int fib6_rules_init(void);
void fib6_rules_cleanup(void);
bool fib6_rule_default(const struct fib_rule *rule);
int fib6_rules_dump(struct net *net, struct notifier_block *nb,
                    struct netlink_ext_ack *extack);
unsigned int fib6_rules_seq_read(struct net *net);

static inline bool fib6_rules_early_flow_dissect(struct net *net,
                                                 struct sk_buff *skb,
                                                 struct flowi6 *fl6,
                                                 struct flow_keys *flkeys)
{
        unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;

        if (!net->ipv6.fib6_rules_require_fldissect)
                return false;

        memset(flkeys, 0, sizeof(*flkeys));
        __skb_flow_dissect(net, skb, &flow_keys_dissector,
                           flkeys, NULL, 0, 0, 0, flag);

        fl6->fl6_sport = flkeys->ports.src;
        fl6->fl6_dport = flkeys->ports.dst;
        fl6->flowi6_proto = flkeys->basic.ip_proto;

        return true;
}
#else
static inline bool fib6_has_custom_rules(const struct net *net)
{
        return false;
}
static inline int               fib6_rules_init(void)
{
        return 0;
}
static inline void              fib6_rules_cleanup(void)
{
        return ;
}
static inline bool fib6_rule_default(const struct fib_rule *rule)
{
        return true;
}
static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb,
                                  struct netlink_ext_ack *extack)
{
        return 0;
}
static inline unsigned int fib6_rules_seq_read(struct net *net)
{
        return 0;
}
static inline bool fib6_rules_early_flow_dissect(struct net *net,
                                                 struct sk_buff *skb,
                                                 struct flowi6 *fl6,
                                                 struct flow_keys *flkeys)
{
        return false;
}
#endif
#endif








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 
    4 






    4 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
// SPDX-License-Identifier: GPL-2.0
/*
 * scsi_scan.c
 *
 * Copyright (C) 2000 Eric Youngdale,
 * Copyright (C) 2002 Patrick Mansfield
 *
 * The general scanning/probing algorithm is as follows, exceptions are
 * made to it depending on device specific flags, compilation options, and
 * global variable (boot or module load time) settings.
 *
 * A specific LUN is scanned via an INQUIRY command; if the LUN has a
 * device attached, a scsi_device is allocated and setup for it.
 *
 * For every id of every channel on the given host:
 *
 *         Scan LUN 0; if the target responds to LUN 0 (even if there is no
 *         device or storage attached to LUN 0):
 *
 *                 If LUN 0 has a device attached, allocate and setup a
 *                 scsi_device for it.
 *
 *                 If target is SCSI-3 or up, issue a REPORT LUN, and scan
 *                 all of the LUNs returned by the REPORT LUN; else,
 *                 sequentially scan LUNs up until some maximum is reached,
 *                 or a LUN is seen that cannot have a device attached to it.
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/async.h>
#include <linux/slab.h>
#include <asm/unaligned.h>

#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_devinfo.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h>
#include <scsi/scsi_dh.h>
#include <scsi/scsi_eh.h>

#include "scsi_priv.h"
#include "scsi_logging.h"

#define ALLOC_FAILURE_MSG        KERN_ERR "%s: Allocation failure during" \
        " SCSI scanning, some SCSI devices might not be configured\n"

/*
 * Default timeout
 */
#define SCSI_TIMEOUT (2*HZ)
#define SCSI_REPORT_LUNS_TIMEOUT (30*HZ)

/*
 * Prefix values for the SCSI id's (stored in sysfs name field)
 */
#define SCSI_UID_SER_NUM 'S'
#define SCSI_UID_UNKNOWN 'Z'

/*
 * Return values of some of the scanning functions.
 *
 * SCSI_SCAN_NO_RESPONSE: no valid response received from the target, this
 * includes allocation or general failures preventing IO from being sent.
 *
 * SCSI_SCAN_TARGET_PRESENT: target responded, but no device is available
 * on the given LUN.
 *
 * SCSI_SCAN_LUN_PRESENT: target responded, and a device is available on a
 * given LUN.
 */
#define SCSI_SCAN_NO_RESPONSE                0
#define SCSI_SCAN_TARGET_PRESENT        1
#define SCSI_SCAN_LUN_PRESENT                2

static const char *scsi_null_device_strs = "nullnullnullnull";

#define MAX_SCSI_LUNS        512

static u64 max_scsi_luns = MAX_SCSI_LUNS;

module_param_named(max_luns, max_scsi_luns, ullong, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(max_luns,
                 "last scsi LUN (should be between 1 and 2^64-1)");

#ifdef CONFIG_SCSI_SCAN_ASYNC
#define SCSI_SCAN_TYPE_DEFAULT "async"
#else
#define SCSI_SCAN_TYPE_DEFAULT "sync"
#endif

static char scsi_scan_type[7] = SCSI_SCAN_TYPE_DEFAULT;

module_param_string(scan, scsi_scan_type, sizeof(scsi_scan_type),
                    S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(scan, "sync, async, manual, or none. "
                 "Setting to 'manual' disables automatic scanning, but allows "
                 "for manual device scan via the 'scan' sysfs attribute.");

static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ + 18;

module_param_named(inq_timeout, scsi_inq_timeout, uint, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(inq_timeout, 
                 "Timeout (in seconds) waiting for devices to answer INQUIRY."
                 " Default is 20. Some devices may need more; most need less.");

/* This lock protects only this list */
static DEFINE_SPINLOCK(async_scan_lock);
static LIST_HEAD(scanning_hosts);

struct async_scan_data {
        struct list_head list;
        struct Scsi_Host *shost;
        struct completion prev_finished;
};

/*
 * scsi_enable_async_suspend - Enable async suspend and resume
 */
void scsi_enable_async_suspend(struct device *dev)
{
        /*
         * If a user has disabled async probing a likely reason is due to a
         * storage enclosure that does not inject staggered spin-ups. For
         * safety, make resume synchronous as well in that case.
         */
        if (strncmp(scsi_scan_type, "async", 5) != 0)
                return;
        /* Enable asynchronous suspend and resume. */
        device_enable_async_suspend(dev);
}

/**
 * scsi_complete_async_scans - Wait for asynchronous scans to complete
 *
 * When this function returns, any host which started scanning before
 * this function was called will have finished its scan.  Hosts which
 * started scanning after this function was called may or may not have
 * finished.
 */
int scsi_complete_async_scans(void)
{
        struct async_scan_data *data;

        do {
                if (list_empty(&scanning_hosts))
                        return 0;
                /* If we can't get memory immediately, that's OK.  Just
                 * sleep a little.  Even if we never get memory, the async
                 * scans will finish eventually.
                 */
                data = kmalloc(sizeof(*data), GFP_KERNEL);
                if (!data)
                        msleep(1);
        } while (!data);

        data->shost = NULL;
        init_completion(&data->prev_finished);

        spin_lock(&async_scan_lock);
        /* Check that there's still somebody else on the list */
        if (list_empty(&scanning_hosts))
                goto done;
        list_add_tail(&data->list, &scanning_hosts);
        spin_unlock(&async_scan_lock);

        printk(KERN_INFO "scsi: waiting for bus probes to complete ...\n");
        wait_for_completion(&data->prev_finished);

        spin_lock(&async_scan_lock);
        list_del(&data->list);
        if (!list_empty(&scanning_hosts)) {
                struct async_scan_data *next = list_entry(scanning_hosts.next,
                                struct async_scan_data, list);
                complete(&next->prev_finished);
        }
 done:
        spin_unlock(&async_scan_lock);

        kfree(data);
        return 0;
}

/**
 * scsi_unlock_floptical - unlock device via a special MODE SENSE command
 * @sdev:        scsi device to send command to
 * @result:        area to store the result of the MODE SENSE
 *
 * Description:
 *     Send a vendor specific MODE SENSE (not a MODE SELECT) command.
 *     Called for BLIST_KEY devices.
 **/
static void scsi_unlock_floptical(struct scsi_device *sdev,
                                  unsigned char *result)
{
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];

        sdev_printk(KERN_NOTICE, sdev, "unlocking floptical drive\n");
        scsi_cmd[0] = MODE_SENSE;
        scsi_cmd[1] = 0;
        scsi_cmd[2] = 0x2e;
        scsi_cmd[3] = 0;
        scsi_cmd[4] = 0x2a;     /* size */
        scsi_cmd[5] = 0;
        scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, result, 0x2a,
                         SCSI_TIMEOUT, 3, NULL);
}

static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
                                        unsigned int depth)
{
        int new_shift = sbitmap_calculate_shift(depth);
        bool need_alloc = !sdev->budget_map.map;
        bool need_free = false;
        int ret;
        struct sbitmap sb_backup;

        depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev));

        /*
         * realloc if new shift is calculated, which is caused by setting
         * up one new default queue depth after calling ->slave_configure
         */
        if (!need_alloc && new_shift != sdev->budget_map.shift)
                need_alloc = need_free = true;

        if (!need_alloc)
                return 0;

        /*
         * Request queue has to be frozen for reallocating budget map,
         * and here disk isn't added yet, so freezing is pretty fast
         */
        if (need_free) {
                blk_mq_freeze_queue(sdev->request_queue);
                sb_backup = sdev->budget_map;
        }
        ret = sbitmap_init_node(&sdev->budget_map,
                                scsi_device_max_queue_depth(sdev),
                                new_shift, GFP_KERNEL,
                                sdev->request_queue->node, false, true);
        if (!ret)
                sbitmap_resize(&sdev->budget_map, depth);

        if (need_free) {
                if (ret)
                        sdev->budget_map = sb_backup;
                else
                        sbitmap_free(&sb_backup);
                ret = 0;
                blk_mq_unfreeze_queue(sdev->request_queue);
        }
        return ret;
}

/**
 * scsi_alloc_sdev - allocate and setup a scsi_Device
 * @starget: which target to allocate a &scsi_device for
 * @lun: which lun
 * @hostdata: usually NULL and set by ->slave_alloc instead
 *
 * Description:
 *     Allocate, initialize for io, and return a pointer to a scsi_Device.
 *     Stores the @shost, @channel, @id, and @lun in the scsi_Device, and
 *     adds scsi_Device to the appropriate list.
 *
 * Return value:
 *     scsi_Device pointer, or NULL on failure.
 **/
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
                                           u64 lun, void *hostdata)
{
        unsigned int depth;
        struct scsi_device *sdev;
        struct request_queue *q;
        int display_failure_msg = 1, ret;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

        sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
                       GFP_KERNEL);
        if (!sdev)
                goto out;

        sdev->vendor = scsi_null_device_strs;
        sdev->model = scsi_null_device_strs;
        sdev->rev = scsi_null_device_strs;
        sdev->host = shost;
        sdev->queue_ramp_up_period = SCSI_DEFAULT_RAMP_UP_PERIOD;
        sdev->id = starget->id;
        sdev->lun = lun;
        sdev->channel = starget->channel;
        mutex_init(&sdev->state_mutex);
        sdev->sdev_state = SDEV_CREATED;
        INIT_LIST_HEAD(&sdev->siblings);
        INIT_LIST_HEAD(&sdev->same_target_siblings);
        INIT_LIST_HEAD(&sdev->starved_entry);
        INIT_LIST_HEAD(&sdev->event_list);
        spin_lock_init(&sdev->list_lock);
        mutex_init(&sdev->inquiry_mutex);
        INIT_WORK(&sdev->event_work, scsi_evt_thread);
        INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue);

        sdev->sdev_gendev.parent = get_device(&starget->dev);
        sdev->sdev_target = starget;

        /* usually NULL and set by ->slave_alloc instead */
        sdev->hostdata = hostdata;

        /* if the device needs this changing, it may do so in the
         * slave_configure function */
        sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;

        /*
         * Some low level driver could use device->type
         */
        sdev->type = -1;

        /*
         * Assume that the device will have handshaking problems,
         * and then fix this field later if it turns out it
         * doesn't
         */
        sdev->borken = 1;

        sdev->sg_reserved_size = INT_MAX;

        q = blk_mq_alloc_queue(&sdev->host->tag_set, NULL, NULL);
        if (IS_ERR(q)) {
                /* release fn is set up in scsi_sysfs_device_initialise, so
                 * have to free and put manually here */
                put_device(&starget->dev);
                kfree(sdev);
                goto out;
        }
        kref_get(&sdev->host->tagset_refcnt);
        sdev->request_queue = q;
        q->queuedata = sdev;
        __scsi_init_queue(sdev->host, q);

        depth = sdev->host->cmd_per_lun ?: 1;

        /*
         * Use .can_queue as budget map's depth because we have to
         * support adjusting queue depth from sysfs. Meantime use
         * default device queue depth to figure out sbitmap shift
         * since we use this queue depth most of times.
         */
        if (scsi_realloc_sdev_budget_map(sdev, depth)) {
                put_device(&starget->dev);
                kfree(sdev);
                goto out;
        }

        scsi_change_queue_depth(sdev, depth);

        scsi_sysfs_device_initialize(sdev);

        if (shost->hostt->slave_alloc) {
                ret = shost->hostt->slave_alloc(sdev);
                if (ret) {
                        /*
                         * if LLDD reports slave not present, don't clutter
                         * console with alloc failure messages
                         */
                        if (ret == -ENXIO)
                                display_failure_msg = 0;
                        goto out_device_destroy;
                }
        }

        return sdev;

out_device_destroy:
        __scsi_remove_device(sdev);
out:
        if (display_failure_msg)
                printk(ALLOC_FAILURE_MSG, __func__);
        return NULL;
}

static void scsi_target_destroy(struct scsi_target *starget)
{
        struct device *dev = &starget->dev;
        struct Scsi_Host *shost = dev_to_shost(dev->parent);
        unsigned long flags;

        BUG_ON(starget->state == STARGET_DEL);
        starget->state = STARGET_DEL;
        transport_destroy_device(dev);
        spin_lock_irqsave(shost->host_lock, flags);
        if (shost->hostt->target_destroy)
                shost->hostt->target_destroy(starget);
        list_del_init(&starget->siblings);
        spin_unlock_irqrestore(shost->host_lock, flags);
        put_device(dev);
}

static void scsi_target_dev_release(struct device *dev)
{
        struct device *parent = dev->parent;
        struct scsi_target *starget = to_scsi_target(dev);

        kfree(starget);
        put_device(parent);
}

static const struct device_type scsi_target_type = {
        .name =                "scsi_target",
        .release =        scsi_target_dev_release,
};

int scsi_is_target_device(const struct device *dev)
{
        return dev->type == &scsi_target_type;
}
EXPORT_SYMBOL(scsi_is_target_device);

static struct scsi_target *__scsi_find_target(struct device *parent,
                                              int channel, uint id)
{
        struct scsi_target *starget, *found_starget = NULL;
        struct Scsi_Host *shost = dev_to_shost(parent);
        /*
         * Search for an existing target for this sdev.
         */
        list_for_each_entry(starget, &shost->__targets, siblings) {
                if (starget->id == id &&
                    starget->channel == channel) {
                        found_starget = starget;
                        break;
                }
        }
        if (found_starget)
                get_device(&found_starget->dev);

        return found_starget;
}

/**
 * scsi_target_reap_ref_release - remove target from visibility
 * @kref: the reap_ref in the target being released
 *
 * Called on last put of reap_ref, which is the indication that no device
 * under this target is visible anymore, so render the target invisible in
 * sysfs.  Note: we have to be in user context here because the target reaps
 * should be done in places where the scsi device visibility is being removed.
 */
static void scsi_target_reap_ref_release(struct kref *kref)
{
        struct scsi_target *starget
                = container_of(kref, struct scsi_target, reap_ref);

        /*
         * if we get here and the target is still in a CREATED state that
         * means it was allocated but never made visible (because a scan
         * turned up no LUNs), so don't call device_del() on it.
         */
        if ((starget->state != STARGET_CREATED) &&
            (starget->state != STARGET_CREATED_REMOVE)) {
                transport_remove_device(&starget->dev);
                device_del(&starget->dev);
        }
        scsi_target_destroy(starget);
}

static void scsi_target_reap_ref_put(struct scsi_target *starget)
{
        kref_put(&starget->reap_ref, scsi_target_reap_ref_release);
}

/**
 * scsi_alloc_target - allocate a new or find an existing target
 * @parent:        parent of the target (need not be a scsi host)
 * @channel:        target channel number (zero if no channels)
 * @id:                target id number
 *
 * Return an existing target if one exists, provided it hasn't already
 * gone into STARGET_DEL state, otherwise allocate a new target.
 *
 * The target is returned with an incremented reference, so the caller
 * is responsible for both reaping and doing a last put
 */
static struct scsi_target *scsi_alloc_target(struct device *parent,
                                             int channel, uint id)
{
        struct Scsi_Host *shost = dev_to_shost(parent);
        struct device *dev = NULL;
        unsigned long flags;
        const int size = sizeof(struct scsi_target)
                + shost->transportt->target_size;
        struct scsi_target *starget;
        struct scsi_target *found_target;
        int error, ref_got;

        starget = kzalloc(size, GFP_KERNEL);
        if (!starget) {
                printk(KERN_ERR "%s: allocation failure\n", __func__);
                return NULL;
        }
        dev = &starget->dev;
        device_initialize(dev);
        kref_init(&starget->reap_ref);
        dev->parent = get_device(parent);
        dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id);
        dev->bus = &scsi_bus_type;
        dev->type = &scsi_target_type;
        scsi_enable_async_suspend(dev);
        starget->id = id;
        starget->channel = channel;
        starget->can_queue = 0;
        INIT_LIST_HEAD(&starget->siblings);
        INIT_LIST_HEAD(&starget->devices);
        starget->state = STARGET_CREATED;
        starget->scsi_level = SCSI_2;
        starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED;
 retry:
        spin_lock_irqsave(shost->host_lock, flags);

        found_target = __scsi_find_target(parent, channel, id);
        if (found_target)
                goto found;

        list_add_tail(&starget->siblings, &shost->__targets);
        spin_unlock_irqrestore(shost->host_lock, flags);
        /* allocate and add */
        transport_setup_device(dev);
        if (shost->hostt->target_alloc) {
                error = shost->hostt->target_alloc(starget);

                if(error) {
                        if (error != -ENXIO)
                                dev_err(dev, "target allocation failed, error %d\n", error);
                        /* don't want scsi_target_reap to do the final
                         * put because it will be under the host lock */
                        scsi_target_destroy(starget);
                        return NULL;
                }
        }
        get_device(dev);

        return starget;

 found:
        /*
         * release routine already fired if kref is zero, so if we can still
         * take the reference, the target must be alive.  If we can't, it must
         * be dying and we need to wait for a new target
         */
        ref_got = kref_get_unless_zero(&found_target->reap_ref);

        spin_unlock_irqrestore(shost->host_lock, flags);
        if (ref_got) {
                put_device(dev);
                return found_target;
        }
        /*
         * Unfortunately, we found a dying target; need to wait until it's
         * dead before we can get a new one.  There is an anomaly here.  We
         * *should* call scsi_target_reap() to balance the kref_get() of the
         * reap_ref above.  However, since the target being released, it's
         * already invisible and the reap_ref is irrelevant.  If we call
         * scsi_target_reap() we might spuriously do another device_del() on
         * an already invisible target.
         */
        put_device(&found_target->dev);
        /*
         * length of time is irrelevant here, we just want to yield the CPU
         * for a tick to avoid busy waiting for the target to die.
         */
        msleep(1);
        goto retry;
}

/**
 * scsi_target_reap - check to see if target is in use and destroy if not
 * @starget: target to be checked
 *
 * This is used after removing a LUN or doing a last put of the target
 * it checks atomically that nothing is using the target and removes
 * it if so.
 */
void scsi_target_reap(struct scsi_target *starget)
{
        /*
         * serious problem if this triggers: STARGET_DEL is only set in the if
         * the reap_ref drops to zero, so we're trying to do another final put
         * on an already released kref
         */
        BUG_ON(starget->state == STARGET_DEL);
        scsi_target_reap_ref_put(starget);
}

/**
 * scsi_sanitize_inquiry_string - remove non-graphical chars from an
 *                                INQUIRY result string
 * @s: INQUIRY result string to sanitize
 * @len: length of the string
 *
 * Description:
 *        The SCSI spec says that INQUIRY vendor, product, and revision
 *        strings must consist entirely of graphic ASCII characters,
 *        padded on the right with spaces.  Since not all devices obey
 *        this rule, we will replace non-graphic or non-ASCII characters
 *        with spaces.  Exception: a NUL character is interpreted as a
 *        string terminator, so all the following characters are set to
 *        spaces.
 **/
void scsi_sanitize_inquiry_string(unsigned char *s, int len)
{
        int terminated = 0;

        for (; len > 0; (--len, ++s)) {
                if (*s == 0)
                        terminated = 1;
                if (terminated || *s < 0x20 || *s > 0x7e)
                        *s = ' ';
        }
}
EXPORT_SYMBOL(scsi_sanitize_inquiry_string);


/**
 * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
 * @sdev:        scsi_device to probe
 * @inq_result:        area to store the INQUIRY result
 * @result_len: len of inq_result
 * @bflags:        store any bflags found here
 *
 * Description:
 *     Probe the lun associated with @req using a standard SCSI INQUIRY;
 *
 *     If the INQUIRY is successful, zero is returned and the
 *     INQUIRY data is in @inq_result; the scsi_level and INQUIRY length
 *     are copied to the scsi_device any flags value is stored in *@bflags.
 **/
static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
                          int result_len, blist_flags_t *bflags)
{
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
        int first_inquiry_len, try_inquiry_len, next_inquiry_len;
        int response_len = 0;
        int pass, count, result, resid;
        struct scsi_failure failure_defs[] = {
                /*
                 * not-ready to ready transition [asc/ascq=0x28/0x0] or
                 * power-on, reset [asc/ascq=0x29/0x0], continue. INQUIRY
                 * should not yield UNIT_ATTENTION but many buggy devices do
                 * so anyway.
                 */
                {
                        .sense = UNIT_ATTENTION,
                        .asc = 0x28,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                {
                        .sense = UNIT_ATTENTION,
                        .asc = 0x29,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                {
                        .allowed = 1,
                        .result = DID_TIME_OUT << 16,
                },
                {}
        };
        struct scsi_failures failures = {
                .total_allowed = 3,
                .failure_definitions = failure_defs,
        };
        const struct scsi_exec_args exec_args = {
                .resid = &resid,
                .failures = &failures,
        };

        *bflags = 0;

        /* Perform up to 3 passes.  The first pass uses a conservative
         * transfer length of 36 unless sdev->inquiry_len specifies a
         * different value. */
        first_inquiry_len = sdev->inquiry_len ? sdev->inquiry_len : 36;
        try_inquiry_len = first_inquiry_len;
        pass = 1;

 next_pass:
        SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
                                "scsi scan: INQUIRY pass %d length %d\n",
                                pass, try_inquiry_len));

        /* Each pass gets up to three chances to ignore Unit Attention */
        scsi_failures_reset_retries(&failures);

        for (count = 0; count < 3; ++count) {
                memset(scsi_cmd, 0, 6);
                scsi_cmd[0] = INQUIRY;
                scsi_cmd[4] = (unsigned char) try_inquiry_len;

                memset(inq_result, 0, try_inquiry_len);

                result = scsi_execute_cmd(sdev,  scsi_cmd, REQ_OP_DRV_IN,
                                          inq_result, try_inquiry_len,
                                          HZ / 2 + HZ * scsi_inq_timeout, 3,
                                          &exec_args);

                SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
                                "scsi scan: INQUIRY %s with code 0x%x\n",
                                result ? "failed" : "successful", result));

                if (result == 0) {
                        /*
                         * if nothing was transferred, we try
                         * again. It's a workaround for some USB
                         * devices.
                         */
                        if (resid == try_inquiry_len)
                                continue;
                }
                break;
        }

        if (result == 0) {
                scsi_sanitize_inquiry_string(&inq_result[8], 8);
                scsi_sanitize_inquiry_string(&inq_result[16], 16);
                scsi_sanitize_inquiry_string(&inq_result[32], 4);

                response_len = inq_result[4] + 5;
                if (response_len > 255)
                        response_len = first_inquiry_len;        /* sanity */

                /*
                 * Get any flags for this device.
                 *
                 * XXX add a bflags to scsi_device, and replace the
                 * corresponding bit fields in scsi_device, so bflags
                 * need not be passed as an argument.
                 */
                *bflags = scsi_get_device_flags(sdev, &inq_result[8],
                                &inq_result[16]);

                /* When the first pass succeeds we gain information about
                 * what larger transfer lengths might work. */
                if (pass == 1) {
                        if (BLIST_INQUIRY_36 & *bflags)
                                next_inquiry_len = 36;
                        /*
                         * LLD specified a maximum sdev->inquiry_len
                         * but device claims it has more data. Capping
                         * the length only makes sense for legacy
                         * devices. If a device supports SPC-4 (2014)
                         * or newer, assume that it is safe to ask for
                         * as much as the device says it supports.
                         */
                        else if (sdev->inquiry_len &&
                                 response_len > sdev->inquiry_len &&
                                 (inq_result[2] & 0x7) < 6) /* SPC-4 */
                                next_inquiry_len = sdev->inquiry_len;
                        else
                                next_inquiry_len = response_len;

                        /* If more data is available perform the second pass */
                        if (next_inquiry_len > try_inquiry_len) {
                                try_inquiry_len = next_inquiry_len;
                                pass = 2;
                                goto next_pass;
                        }
                }

        } else if (pass == 2) {
                sdev_printk(KERN_INFO, sdev,
                            "scsi scan: %d byte inquiry failed.  "
                            "Consider BLIST_INQUIRY_36 for this device\n",
                            try_inquiry_len);

                /* If this pass failed, the third pass goes back and transfers
                 * the same amount as we successfully got in the first pass. */
                try_inquiry_len = first_inquiry_len;
                pass = 3;
                goto next_pass;
        }

        /* If the last transfer attempt got an error, assume the
         * peripheral doesn't exist or is dead. */
        if (result)
                return -EIO;

        /* Don't report any more data than the device says is valid */
        sdev->inquiry_len = min(try_inquiry_len, response_len);

        /*
         * XXX Abort if the response length is less than 36? If less than
         * 32, the lookup of the device flags (above) could be invalid,
         * and it would be possible to take an incorrect action - we do
         * not want to hang because of a short INQUIRY. On the flip side,
         * if the device is spun down or becoming ready (and so it gives a
         * short INQUIRY), an abort here prevents any further use of the
         * device, including spin up.
         *
         * On the whole, the best approach seems to be to assume the first
         * 36 bytes are valid no matter what the device says.  That's
         * better than copying < 36 bytes to the inquiry-result buffer
         * and displaying garbage for the Vendor, Product, or Revision
         * strings.
         */
        if (sdev->inquiry_len < 36) {
                if (!sdev->host->short_inquiry) {
                        shost_printk(KERN_INFO, sdev->host,
                                    "scsi scan: INQUIRY result too short (%d),"
                                    " using 36\n", sdev->inquiry_len);
                        sdev->host->short_inquiry = 1;
                }
                sdev->inquiry_len = 36;
        }

        /*
         * Related to the above issue:
         *
         * XXX Devices (disk or all?) should be sent a TEST UNIT READY,
         * and if not ready, sent a START_STOP to start (maybe spin up) and
         * then send the INQUIRY again, since the INQUIRY can change after
         * a device is initialized.
         *
         * Ideally, start a device if explicitly asked to do so.  This
         * assumes that a device is spun up on power on, spun down on
         * request, and then spun up on request.
         */

        /*
         * The scanning code needs to know the scsi_level, even if no
         * device is attached at LUN 0 (SCSI_SCAN_TARGET_PRESENT) so
         * non-zero LUNs can be scanned.
         */
        sdev->scsi_level = inq_result[2] & 0x0f;
        if (sdev->scsi_level >= 2 ||
            (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1))
                sdev->scsi_level++;
        sdev->sdev_target->scsi_level = sdev->scsi_level;

        /*
         * If SCSI-2 or lower, and if the transport requires it,
         * store the LUN value in CDB[1].
         */
        sdev->lun_in_cdb = 0;
        if (sdev->scsi_level <= SCSI_2 &&
            sdev->scsi_level != SCSI_UNKNOWN &&
            !sdev->host->no_scsi2_lun_in_cdb)
                sdev->lun_in_cdb = 1;

        return 0;
}

/**
 * scsi_add_lun - allocate and fully initialze a scsi_device
 * @sdev:        holds information to be stored in the new scsi_device
 * @inq_result:        holds the result of a previous INQUIRY to the LUN
 * @bflags:        black/white list flag
 * @async:        1 if this device is being scanned asynchronously
 *
 * Description:
 *     Initialize the scsi_device @sdev.  Optionally set fields based
 *     on values in *@bflags.
 *
 * Return:
 *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
                blist_flags_t *bflags, int async)
{
        int ret;

        /*
         * XXX do not save the inquiry, since it can change underneath us,
         * save just vendor/model/rev.
         *
         * Rather than save it and have an ioctl that retrieves the saved
         * value, have an ioctl that executes the same INQUIRY code used
         * in scsi_probe_lun, let user level programs doing INQUIRY
         * scanning run at their own risk, or supply a user level program
         * that can correctly scan.
         */

        /*
         * Copy at least 36 bytes of INQUIRY data, so that we don't
         * dereference unallocated memory when accessing the Vendor,
         * Product, and Revision strings.  Badly behaved devices may set
         * the INQUIRY Additional Length byte to a small value, indicating
         * these strings are invalid, but often they contain plausible data
         * nonetheless.  It doesn't matter if the device sent < 36 bytes
         * total, since scsi_probe_lun() initializes inq_result with 0s.
         */
        sdev->inquiry = kmemdup(inq_result,
                                max_t(size_t, sdev->inquiry_len, 36),
                                GFP_KERNEL);
        if (sdev->inquiry == NULL)
                return SCSI_SCAN_NO_RESPONSE;

        sdev->vendor = (char *) (sdev->inquiry + 8);
        sdev->model = (char *) (sdev->inquiry + 16);
        sdev->rev = (char *) (sdev->inquiry + 32);

        if (strncmp(sdev->vendor, "ATA     ", 8) == 0) {
                /*
                 * sata emulation layer device.  This is a hack to work around
                 * the SATL power management specifications which state that
                 * when the SATL detects the device has gone into standby
                 * mode, it shall respond with NOT READY.
                 */
                sdev->allow_restart = 1;
        }

        if (*bflags & BLIST_ISROM) {
                sdev->type = TYPE_ROM;
                sdev->removable = 1;
        } else {
                sdev->type = (inq_result[0] & 0x1f);
                sdev->removable = (inq_result[1] & 0x80) >> 7;

                /*
                 * some devices may respond with wrong type for
                 * well-known logical units. Force well-known type
                 * to enumerate them correctly.
                 */
                if (scsi_is_wlun(sdev->lun) && sdev->type != TYPE_WLUN) {
                        sdev_printk(KERN_WARNING, sdev,
                                "%s: correcting incorrect peripheral device type 0x%x for W-LUN 0x%16xhN\n",
                                __func__, sdev->type, (unsigned int)sdev->lun);
                        sdev->type = TYPE_WLUN;
                }

        }

        if (sdev->type == TYPE_RBC || sdev->type == TYPE_ROM) {
                /* RBC and MMC devices can return SCSI-3 compliance and yet
                 * still not support REPORT LUNS, so make them act as
                 * BLIST_NOREPORTLUN unless BLIST_REPORTLUN2 is
                 * specifically set */
                if ((*bflags & BLIST_REPORTLUN2) == 0)
                        *bflags |= BLIST_NOREPORTLUN;
        }

        /*
         * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI
         * spec says: The device server is capable of supporting the
         * specified peripheral device type on this logical unit. However,
         * the physical device is not currently connected to this logical
         * unit.
         *
         * The above is vague, as it implies that we could treat 001 and
         * 011 the same. Stay compatible with previous code, and create a
         * scsi_device for a PQ of 1
         *
         * Don't set the device offline here; rather let the upper
         * level drivers eval the PQ to decide whether they should
         * attach. So remove ((inq_result[0] >> 5) & 7) == 1 check.
         */ 

        sdev->inq_periph_qual = (inq_result[0] >> 5) & 7;
        sdev->lockable = sdev->removable;
        sdev->soft_reset = (inq_result[7] & 1) && ((inq_result[3] & 7) == 2);

        if (sdev->scsi_level >= SCSI_3 ||
                        (sdev->inquiry_len > 56 && inq_result[56] & 0x04))
                sdev->ppr = 1;
        if (inq_result[7] & 0x60)
                sdev->wdtr = 1;
        if (inq_result[7] & 0x10)
                sdev->sdtr = 1;

        sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
                        "ANSI: %d%s\n", scsi_device_type(sdev->type),
                        sdev->vendor, sdev->model, sdev->rev,
                        sdev->inq_periph_qual, inq_result[2] & 0x07,
                        (inq_result[3] & 0x0f) == 1 ? " CCS" : "");

        if ((sdev->scsi_level >= SCSI_2) && (inq_result[7] & 2) &&
            !(*bflags & BLIST_NOTQ)) {
                sdev->tagged_supported = 1;
                sdev->simple_tags = 1;
        }

        /*
         * Some devices (Texel CD ROM drives) have handshaking problems
         * when used with the Seagate controllers. borken is initialized
         * to 1, and then set it to 0 here.
         */
        if ((*bflags & BLIST_BORKEN) == 0)
                sdev->borken = 0;

        if (*bflags & BLIST_NO_ULD_ATTACH)
                sdev->no_uld_attach = 1;

        /*
         * Apparently some really broken devices (contrary to the SCSI
         * standards) need to be selected without asserting ATN
         */
        if (*bflags & BLIST_SELECT_NO_ATN)
                sdev->select_no_atn = 1;

        /*
         * Maximum 512 sector transfer length
         * broken RA4x00 Compaq Disk Array
         */
        if (*bflags & BLIST_MAX_512)
                blk_queue_max_hw_sectors(sdev->request_queue, 512);
        /*
         * Max 1024 sector transfer length for targets that report incorrect
         * max/optimal lengths and relied on the old block layer safe default
         */
        else if (*bflags & BLIST_MAX_1024)
                blk_queue_max_hw_sectors(sdev->request_queue, 1024);

        /*
         * Some devices may not want to have a start command automatically
         * issued when a device is added.
         */
        if (*bflags & BLIST_NOSTARTONADD)
                sdev->no_start_on_add = 1;

        if (*bflags & BLIST_SINGLELUN)
                scsi_target(sdev)->single_lun = 1;

        sdev->use_10_for_rw = 1;

        /* some devices don't like REPORT SUPPORTED OPERATION CODES
         * and will simply timeout causing sd_mod init to take a very
         * very long time */
        if (*bflags & BLIST_NO_RSOC)
                sdev->no_report_opcodes = 1;

        /* set the device running here so that slave configure
         * may do I/O */
        mutex_lock(&sdev->state_mutex);
        ret = scsi_device_set_state(sdev, SDEV_RUNNING);
        if (ret)
                ret = scsi_device_set_state(sdev, SDEV_BLOCK);
        mutex_unlock(&sdev->state_mutex);

        if (ret) {
                sdev_printk(KERN_ERR, sdev,
                            "in wrong state %s to complete scan\n",
                            scsi_device_state_name(sdev->sdev_state));
                return SCSI_SCAN_NO_RESPONSE;
        }

        if (*bflags & BLIST_NOT_LOCKABLE)
                sdev->lockable = 0;

        if (*bflags & BLIST_RETRY_HWERROR)
                sdev->retry_hwerror = 1;

        if (*bflags & BLIST_NO_DIF)
                sdev->no_dif = 1;

        if (*bflags & BLIST_UNMAP_LIMIT_WS)
                sdev->unmap_limit_for_ws = 1;

        if (*bflags & BLIST_IGN_MEDIA_CHANGE)
                sdev->ignore_media_change = 1;

        sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT;

        if (*bflags & BLIST_TRY_VPD_PAGES)
                sdev->try_vpd_pages = 1;
        else if (*bflags & BLIST_SKIP_VPD_PAGES)
                sdev->skip_vpd_pages = 1;

        if (*bflags & BLIST_NO_VPD_SIZE)
                sdev->no_vpd_size = 1;

        transport_configure_device(&sdev->sdev_gendev);

        if (sdev->host->hostt->slave_configure) {
                ret = sdev->host->hostt->slave_configure(sdev);
                if (ret) {
                        /*
                         * if LLDD reports slave not present, don't clutter
                         * console with alloc failure messages
                         */
                        if (ret != -ENXIO) {
                                sdev_printk(KERN_ERR, sdev,
                                        "failed to configure device\n");
                        }
                        return SCSI_SCAN_NO_RESPONSE;
                }

                /*
                 * The queue_depth is often changed in ->slave_configure.
                 * Set up budget map again since memory consumption of
                 * the map depends on actual queue depth.
                 */
                scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth);
        }

        if (sdev->scsi_level >= SCSI_3)
                scsi_attach_vpd(sdev);

        scsi_cdl_check(sdev);

        sdev->max_queue_depth = sdev->queue_depth;
        WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth);
        sdev->sdev_bflags = *bflags;

        /*
         * Ok, the device is now all set up, we can
         * register it and tell the rest of the kernel
         * about it.
         */
        if (!async && scsi_sysfs_add_sdev(sdev) != 0)
                return SCSI_SCAN_NO_RESPONSE;

        return SCSI_SCAN_LUN_PRESENT;
}

#ifdef CONFIG_SCSI_LOGGING
/** 
 * scsi_inq_str - print INQUIRY data from min to max index, strip trailing whitespace
 * @buf:   Output buffer with at least end-first+1 bytes of space
 * @inq:   Inquiry buffer (input)
 * @first: Offset of string into inq
 * @end:   Index after last character in inq
 */
static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
                                   unsigned first, unsigned end)
{
        unsigned term = 0, idx;

        for (idx = 0; idx + first < end && idx + first < inq[4] + 5; idx++) {
                if (inq[idx+first] > ' ') {
                        buf[idx] = inq[idx+first];
                        term = idx+1;
                } else {
                        buf[idx] = ' ';
                }
        }
        buf[term] = 0;
        return buf;
}
#endif

/**
 * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
 * @starget:        pointer to target device structure
 * @lun:        LUN of target device
 * @bflagsp:        store bflags here if not NULL
 * @sdevp:        probe the LUN corresponding to this scsi_device
 * @rescan:     if not equal to SCSI_SCAN_INITIAL skip some code only
 *              needed on first scan
 * @hostdata:        passed to scsi_alloc_sdev()
 *
 * Description:
 *     Call scsi_probe_lun, if a LUN with an attached device is found,
 *     allocate and set it up by calling scsi_add_lun.
 *
 * Return:
 *
 *   - SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *   - SCSI_SCAN_TARGET_PRESENT: target responded, but no device is
 *         attached at the LUN
 *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_probe_and_add_lun(struct scsi_target *starget,
                                  u64 lun, blist_flags_t *bflagsp,
                                  struct scsi_device **sdevp,
                                  enum scsi_scan_mode rescan,
                                  void *hostdata)
{
        struct scsi_device *sdev;
        unsigned char *result;
        blist_flags_t bflags;
        int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

        /*
         * The rescan flag is used as an optimization, the first scan of a
         * host adapter calls into here with rescan == 0.
         */
        sdev = scsi_device_lookup_by_target(starget, lun);
        if (sdev) {
                if (rescan != SCSI_SCAN_INITIAL || !scsi_device_created(sdev)) {
                        SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
                                "scsi scan: device exists on %s\n",
                                dev_name(&sdev->sdev_gendev)));
                        if (sdevp)
                                *sdevp = sdev;
                        else
                                scsi_device_put(sdev);

                        if (bflagsp)
                                *bflagsp = scsi_get_device_flags(sdev,
                                                                 sdev->vendor,
                                                                 sdev->model);
                        return SCSI_SCAN_LUN_PRESENT;
                }
                scsi_device_put(sdev);
        } else
                sdev = scsi_alloc_sdev(starget, lun, hostdata);
        if (!sdev)
                goto out;

        result = kmalloc(result_len, GFP_KERNEL);
        if (!result)
                goto out_free_sdev;

        if (scsi_probe_lun(sdev, result, result_len, &bflags))
                goto out_free_result;

        if (bflagsp)
                *bflagsp = bflags;
        /*
         * result contains valid SCSI INQUIRY data.
         */
        if ((result[0] >> 5) == 3) {
                /*
                 * For a Peripheral qualifier 3 (011b), the SCSI
                 * spec says: The device server is not capable of
                 * supporting a physical device on this logical
                 * unit.
                 *
                 * For disks, this implies that there is no
                 * logical disk configured at sdev->lun, but there
                 * is a target id responding.
                 */
                SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:"
                                   " peripheral qualifier of 3, device not"
                                   " added\n"))
                if (lun == 0) {
                        SCSI_LOG_SCAN_BUS(1, {
                                unsigned char vend[9];
                                unsigned char mod[17];

                                sdev_printk(KERN_INFO, sdev,
                                        "scsi scan: consider passing scsi_mod."
                                        "dev_flags=%s:%s:0x240 or 0x1000240\n",
                                        scsi_inq_str(vend, result, 8, 16),
                                        scsi_inq_str(mod, result, 16, 32));
                        });

                }

                res = SCSI_SCAN_TARGET_PRESENT;
                goto out_free_result;
        }

        /*
         * Some targets may set slight variations of PQ and PDT to signal
         * that no LUN is present, so don't add sdev in these cases.
         * Two specific examples are:
         * 1) NetApp targets: return PQ=1, PDT=0x1f
         * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
         *    in the UFI 1.0 spec (we cannot rely on reserved bits).
         *
         * References:
         * 1) SCSI SPC-3, pp. 145-146
         * PQ=1: "A peripheral device having the specified peripheral
         * device type is not connected to this logical unit. However, the
         * device server is capable of supporting the specified peripheral
         * device type on this logical unit."
         * PDT=0x1f: "Unknown or no device type"
         * 2) USB UFI 1.0, p. 20
         * PDT=00h Direct-access device (floppy)
         * PDT=1Fh none (no FDD connected to the requested logical unit)
         */
        if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
            (result[0] & 0x1f) == 0x1f &&
            !scsi_is_wlun(lun)) {
                SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev,
                                        "scsi scan: peripheral device type"
                                        " of 31, no device added\n"));
                res = SCSI_SCAN_TARGET_PRESENT;
                goto out_free_result;
        }

        res = scsi_add_lun(sdev, result, &bflags, shost->async_scan);
        if (res == SCSI_SCAN_LUN_PRESENT) {
                if (bflags & BLIST_KEY) {
                        sdev->lockable = 0;
                        scsi_unlock_floptical(sdev, result);
                }
        }

 out_free_result:
        kfree(result);
 out_free_sdev:
        if (res == SCSI_SCAN_LUN_PRESENT) {
                if (sdevp) {
                        if (scsi_device_get(sdev) == 0) {
                                *sdevp = sdev;
                        } else {
                                __scsi_remove_device(sdev);
                                res = SCSI_SCAN_NO_RESPONSE;
                        }
                }
        } else
                __scsi_remove_device(sdev);
 out:
        return res;
}

/**
 * scsi_sequential_lun_scan - sequentially scan a SCSI target
 * @starget:        pointer to target structure to scan
 * @bflags:        black/white list flag for LUN 0
 * @scsi_level: Which version of the standard does this device adhere to
 * @rescan:     passed to scsi_probe_add_lun()
 *
 * Description:
 *     Generally, scan from LUN 1 (LUN 0 is assumed to already have been
 *     scanned) to some maximum lun until a LUN is found with no device
 *     attached. Use the bflags to figure out any oddities.
 *
 *     Modifies sdevscan->lun.
 **/
static void scsi_sequential_lun_scan(struct scsi_target *starget,
                                     blist_flags_t bflags, int scsi_level,
                                     enum scsi_scan_mode rescan)
{
        uint max_dev_lun;
        u64 sparse_lun, lun;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

        SCSI_LOG_SCAN_BUS(3, starget_printk(KERN_INFO, starget,
                "scsi scan: Sequential scan\n"));

        max_dev_lun = min(max_scsi_luns, shost->max_lun);
        /*
         * If this device is known to support sparse multiple units,
         * override the other settings, and scan all of them. Normally,
         * SCSI-3 devices should be scanned via the REPORT LUNS.
         */
        if (bflags & BLIST_SPARSELUN) {
                max_dev_lun = shost->max_lun;
                sparse_lun = 1;
        } else
                sparse_lun = 0;

        /*
         * If less than SCSI_1_CCS, and no special lun scanning, stop
         * scanning; this matches 2.4 behaviour, but could just be a bug
         * (to continue scanning a SCSI_1_CCS device).
         *
         * This test is broken.  We might not have any device on lun0 for
         * a sparselun device, and if that's the case then how would we
         * know the real scsi_level, eh?  It might make sense to just not
         * scan any SCSI_1 device for non-0 luns, but that check would best
         * go into scsi_alloc_sdev() and just have it return null when asked
         * to alloc an sdev for lun > 0 on an already found SCSI_1 device.
         *
        if ((sdevscan->scsi_level < SCSI_1_CCS) &&
            ((bflags & (BLIST_FORCELUN | BLIST_SPARSELUN | BLIST_MAX5LUN))
             == 0))
                return;
         */
        /*
         * If this device is known to support multiple units, override
         * the other settings, and scan all of them.
         */
        if (bflags & BLIST_FORCELUN)
                max_dev_lun = shost->max_lun;
        /*
         * REGAL CDC-4X: avoid hang after LUN 4
         */
        if (bflags & BLIST_MAX5LUN)
                max_dev_lun = min(5U, max_dev_lun);
        /*
         * Do not scan SCSI-2 or lower device past LUN 7, unless
         * BLIST_LARGELUN.
         */
        if (scsi_level < SCSI_3 && !(bflags & BLIST_LARGELUN))
                max_dev_lun = min(8U, max_dev_lun);
        else
                max_dev_lun = min(256U, max_dev_lun);

        /*
         * We have already scanned LUN 0, so start at LUN 1. Keep scanning
         * until we reach the max, or no LUN is found and we are not
         * sparse_lun.
         */
        for (lun = 1; lun < max_dev_lun; ++lun)
                if ((scsi_probe_and_add_lun(starget, lun, NULL, NULL, rescan,
                                            NULL) != SCSI_SCAN_LUN_PRESENT) &&
                    !sparse_lun)
                        return;
}

/**
 * scsi_report_lun_scan - Scan using SCSI REPORT LUN results
 * @starget: which target
 * @bflags: Zero or a mix of BLIST_NOLUN, BLIST_REPORTLUN2, or BLIST_NOREPORTLUN
 * @rescan: nonzero if we can skip code only needed on first scan
 *
 * Description:
 *   Fast scanning for modern (SCSI-3) devices by sending a REPORT LUN command.
 *   Scan the resulting list of LUNs by calling scsi_probe_and_add_lun.
 *
 *   If BLINK_REPORTLUN2 is set, scan a target that supports more than 8
 *   LUNs even if it's older than SCSI-3.
 *   If BLIST_NOREPORTLUN is set, return 1 always.
 *   If BLIST_NOLUN is set, return 0 always.
 *   If starget->no_report_luns is set, return 1 always.
 *
 * Return:
 *     0: scan completed (or no memory, so further scanning is futile)
 *     1: could not scan with REPORT LUN
 **/
static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
                                enum scsi_scan_mode rescan)
{
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
        unsigned int length;
        u64 lun;
        unsigned int num_luns;
        int result;
        struct scsi_lun *lunp, *lun_data;
        struct scsi_device *sdev;
        struct Scsi_Host *shost = dev_to_shost(&starget->dev);
        struct scsi_failure failure_defs[] = {
                {
                        .sense = UNIT_ATTENTION,
                        .asc = SCMD_FAILURE_ASC_ANY,
                        .ascq = SCMD_FAILURE_ASCQ_ANY,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                /* Fail all CCs except the UA above */
                {
                        .sense = SCMD_FAILURE_SENSE_ANY,
                        .result = SAM_STAT_CHECK_CONDITION,
                },
                /* Retry any other errors not listed above */
                {
                        .result = SCMD_FAILURE_RESULT_ANY,
                },
                {}
        };
        struct scsi_failures failures = {
                .total_allowed = 3,
                .failure_definitions = failure_defs,
        };
        const struct scsi_exec_args exec_args = {
                .failures = &failures,
        };
        int ret = 0;

        /*
         * Only support SCSI-3 and up devices if BLIST_NOREPORTLUN is not set.
         * Also allow SCSI-2 if BLIST_REPORTLUN2 is set and host adapter does
         * support more than 8 LUNs.
         * Don't attempt if the target doesn't support REPORT LUNS.
         */
        if (bflags & BLIST_NOREPORTLUN)
                return 1;
        if (starget->scsi_level < SCSI_2 &&
            starget->scsi_level != SCSI_UNKNOWN)
                return 1;
        if (starget->scsi_level < SCSI_3 &&
            (!(bflags & BLIST_REPORTLUN2) || shost->max_lun <= 8))
                return 1;
        if (bflags & BLIST_NOLUN)
                return 0;
        if (starget->no_report_luns)
                return 1;

        if (!(sdev = scsi_device_lookup_by_target(starget, 0))) {
                sdev = scsi_alloc_sdev(starget, 0, NULL);
                if (!sdev)
                        return 0;
                if (scsi_device_get(sdev)) {
                        __scsi_remove_device(sdev);
                        return 0;
                }
        }

        /*
         * Allocate enough to hold the header (the same size as one scsi_lun)
         * plus the number of luns we are requesting.  511 was the default
         * value of the now removed max_report_luns parameter.
         */
        length = (511 + 1) * sizeof(struct scsi_lun);
retry:
        lun_data = kmalloc(length, GFP_KERNEL);
        if (!lun_data) {
                printk(ALLOC_FAILURE_MSG, __func__);
                goto out;
        }

        scsi_cmd[0] = REPORT_LUNS;

        /*
         * bytes 1 - 5: reserved, set to zero.
         */
        memset(&scsi_cmd[1], 0, 5);

        /*
         * bytes 6 - 9: length of the command.
         */
        put_unaligned_be32(length, &scsi_cmd[6]);

        scsi_cmd[10] = 0;        /* reserved */
        scsi_cmd[11] = 0;        /* control */

        /*
         * We can get a UNIT ATTENTION, for example a power on/reset, so
         * retry a few times (like sd.c does for TEST UNIT READY).
         * Experience shows some combinations of adapter/devices get at
         * least two power on/resets.
         *
         * Illegal requests (for devices that do not support REPORT LUNS)
         * should come through as a check condition, and will not generate
         * a retry.
         */
        scsi_failures_reset_retries(&failures);

        SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
                          "scsi scan: Sending REPORT LUNS\n"));

        result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, lun_data,
                                  length, SCSI_REPORT_LUNS_TIMEOUT, 3,
                                  &exec_args);

        SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
                          "scsi scan: REPORT LUNS  %s result 0x%x\n",
                          result ?  "failed" : "successful", result));
        if (result) {
                /*
                 * The device probably does not support a REPORT LUN command
                 */
                ret = 1;
                goto out_err;
        }

        /*
         * Get the length from the first four bytes of lun_data.
         */
        if (get_unaligned_be32(lun_data->scsi_lun) +
            sizeof(struct scsi_lun) > length) {
                length = get_unaligned_be32(lun_data->scsi_lun) +
                         sizeof(struct scsi_lun);
                kfree(lun_data);
                goto retry;
        }
        length = get_unaligned_be32(lun_data->scsi_lun);

        num_luns = (length / sizeof(struct scsi_lun));

        SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
                "scsi scan: REPORT LUN scan\n"));

        /*
         * Scan the luns in lun_data. The entry at offset 0 is really
         * the header, so start at 1 and go up to and including num_luns.
         */
        for (lunp = &lun_data[1]; lunp <= &lun_data[num_luns]; lunp++) {
                lun = scsilun_to_int(lunp);

                if (lun > sdev->host->max_lun) {
                        sdev_printk(KERN_WARNING, sdev,
                                    "lun%llu has a LUN larger than"
                                    " allowed by the host adapter\n", lun);
                } else {
                        int res;

                        res = scsi_probe_and_add_lun(starget,
                                lun, NULL, NULL, rescan, NULL);
                        if (res == SCSI_SCAN_NO_RESPONSE) {
                                /*
                                 * Got some results, but now none, abort.
                                 */
                                sdev_printk(KERN_ERR, sdev,
                                        "Unexpected response"
                                        " from lun %llu while scanning, scan"
                                        " aborted\n", (unsigned long long)lun);
                                break;
                        }
                }
        }

 out_err:
        kfree(lun_data);
 out:
        if (scsi_device_created(sdev))
                /*
                 * the sdev we used didn't appear in the report luns scan
                 */
                __scsi_remove_device(sdev);
        scsi_device_put(sdev);
        return ret;
}

struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
                                      uint id, u64 lun, void *hostdata)
{
        struct scsi_device *sdev = ERR_PTR(-ENODEV);
        struct device *parent = &shost->shost_gendev;
        struct scsi_target *starget;

        if (strncmp(scsi_scan_type, "none", 4) == 0)
                return ERR_PTR(-ENODEV);

        starget = scsi_alloc_target(parent, channel, id);
        if (!starget)
                return ERR_PTR(-ENOMEM);
        scsi_autopm_get_target(starget);

        mutex_lock(&shost->scan_mutex);
        if (!shost->async_scan)
                scsi_complete_async_scans();

        if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
                scsi_probe_and_add_lun(starget, lun, NULL, &sdev,
                                       SCSI_SCAN_RESCAN, hostdata);
                scsi_autopm_put_host(shost);
        }
        mutex_unlock(&shost->scan_mutex);
        scsi_autopm_put_target(starget);
        /*
         * paired with scsi_alloc_target().  Target will be destroyed unless
         * scsi_probe_and_add_lun made an underlying device visible
         */
        scsi_target_reap(starget);
        put_device(&starget->dev);

        return sdev;
}
EXPORT_SYMBOL(__scsi_add_device);

int scsi_add_device(struct Scsi_Host *host, uint channel,
                    uint target, u64 lun)
{
        struct scsi_device *sdev = 
                __scsi_add_device(host, channel, target, lun, NULL);
        if (IS_ERR(sdev))
                return PTR_ERR(sdev);

        scsi_device_put(sdev);
        return 0;
}
EXPORT_SYMBOL(scsi_add_device);

int scsi_resume_device(struct scsi_device *sdev)
{
        struct device *dev = &sdev->sdev_gendev;
        int ret = 0;

        device_lock(dev);

        /*
         * Bail out if the device or its queue are not running. Otherwise,
         * the rescan may block waiting for commands to be executed, with us
         * holding the device lock. This can result in a potential deadlock
         * in the power management core code when system resume is on-going.
         */
        if (sdev->sdev_state != SDEV_RUNNING ||
            blk_queue_pm_only(sdev->request_queue)) {
                ret = -EWOULDBLOCK;
                goto unlock;
        }

        if (dev->driver && try_module_get(dev->driver->owner)) {
                struct scsi_driver *drv = to_scsi_driver(dev->driver);

                if (drv->resume)
                        ret = drv->resume(dev);
                module_put(dev->driver->owner);
        }

unlock:
        device_unlock(dev);

        return ret;
}
EXPORT_SYMBOL(scsi_resume_device);

int scsi_rescan_device(struct scsi_device *sdev)
{
        struct device *dev = &sdev->sdev_gendev;
        int ret = 0;

        device_lock(dev);

        /*
         * Bail out if the device or its queue are not running. Otherwise,
         * the rescan may block waiting for commands to be executed, with us
         * holding the device lock. This can result in a potential deadlock
         * in the power management core code when system resume is on-going.
         */
        if (sdev->sdev_state != SDEV_RUNNING ||
            blk_queue_pm_only(sdev->request_queue)) {
                ret = -EWOULDBLOCK;
                goto unlock;
        }

        scsi_attach_vpd(sdev);
        scsi_cdl_check(sdev);

        if (sdev->handler && sdev->handler->rescan)
                sdev->handler->rescan(sdev);

        if (dev->driver && try_module_get(dev->driver->owner)) {
                struct scsi_driver *drv = to_scsi_driver(dev->driver);

                if (drv->rescan)
                        drv->rescan(dev);
                module_put(dev->driver->owner);
        }

unlock:
        device_unlock(dev);

        return ret;
}
EXPORT_SYMBOL(scsi_rescan_device);

static void __scsi_scan_target(struct device *parent, unsigned int channel,
                unsigned int id, u64 lun, enum scsi_scan_mode rescan)
{
        struct Scsi_Host *shost = dev_to_shost(parent);
        blist_flags_t bflags = 0;
        int res;
        struct scsi_target *starget;

        if (shost->this_id == id)
                /*
                 * Don't scan the host adapter
                 */
                return;

        starget = scsi_alloc_target(parent, channel, id);
        if (!starget)
                return;
        scsi_autopm_get_target(starget);

        if (lun != SCAN_WILD_CARD) {
                /*
                 * Scan for a specific host/chan/id/lun.
                 */
                scsi_probe_and_add_lun(starget, lun, NULL, NULL, rescan, NULL);
                goto out_reap;
        }

        /*
         * Scan LUN 0, if there is some response, scan further. Ideally, we
         * would not configure LUN 0 until all LUNs are scanned.
         */
        res = scsi_probe_and_add_lun(starget, 0, &bflags, NULL, rescan, NULL);
        if (res == SCSI_SCAN_LUN_PRESENT || res == SCSI_SCAN_TARGET_PRESENT) {
                if (scsi_report_lun_scan(starget, bflags, rescan) != 0)
                        /*
                         * The REPORT LUN did not scan the target,
                         * do a sequential scan.
                         */
                        scsi_sequential_lun_scan(starget, bflags,
                                                 starget->scsi_level, rescan);
        }

 out_reap:
        scsi_autopm_put_target(starget);
        /*
         * paired with scsi_alloc_target(): determine if the target has
         * any children at all and if not, nuke it
         */
        scsi_target_reap(starget);

        put_device(&starget->dev);
}

/**
 * scsi_scan_target - scan a target id, possibly including all LUNs on the target.
 * @parent:        host to scan
 * @channel:        channel to scan
 * @id:                target id to scan
 * @lun:        Specific LUN to scan or SCAN_WILD_CARD
 * @rescan:        passed to LUN scanning routines; SCSI_SCAN_INITIAL for
 *              no rescan, SCSI_SCAN_RESCAN to rescan existing LUNs,
 *              and SCSI_SCAN_MANUAL to force scanning even if
 *              'scan=manual' is set.
 *
 * Description:
 *     Scan the target id on @parent, @channel, and @id. Scan at least LUN 0,
 *     and possibly all LUNs on the target id.
 *
 *     First try a REPORT LUN scan, if that does not scan the target, do a
 *     sequential scan of LUNs on the target id.
 **/
void scsi_scan_target(struct device *parent, unsigned int channel,
                      unsigned int id, u64 lun, enum scsi_scan_mode rescan)
{
        struct Scsi_Host *shost = dev_to_shost(parent);

        if (strncmp(scsi_scan_type, "none", 4) == 0)
                return;

        if (rescan != SCSI_SCAN_MANUAL &&
            strncmp(scsi_scan_type, "manual", 6) == 0)
                return;

        mutex_lock(&shost->scan_mutex);
        if (!shost->async_scan)
                scsi_complete_async_scans();

        if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
                __scsi_scan_target(parent, channel, id, lun, rescan);
                scsi_autopm_put_host(shost);
        }
        mutex_unlock(&shost->scan_mutex);
}
EXPORT_SYMBOL(scsi_scan_target);

static void scsi_scan_channel(struct Scsi_Host *shost, unsigned int channel,
                              unsigned int id, u64 lun,
                              enum scsi_scan_mode rescan)
{
        uint order_id;

        if (id == SCAN_WILD_CARD)
                for (id = 0; id < shost->max_id; ++id) {
                        /*
                         * XXX adapter drivers when possible (FCP, iSCSI)
                         * could modify max_id to match the current max,
                         * not the absolute max.
                         *
                         * XXX add a shost id iterator, so for example,
                         * the FC ID can be the same as a target id
                         * without a huge overhead of sparse id's.
                         */
                        if (shost->reverse_ordering)
                                /*
                                 * Scan from high to low id.
                                 */
                                order_id = shost->max_id - id - 1;
                        else
                                order_id = id;
                        __scsi_scan_target(&shost->shost_gendev, channel,
                                        order_id, lun, rescan);
                }
        else
                __scsi_scan_target(&shost->shost_gendev, channel,
                                id, lun, rescan);
}

int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel,
                            unsigned int id, u64 lun,
                            enum scsi_scan_mode rescan)
{
        SCSI_LOG_SCAN_BUS(3, shost_printk (KERN_INFO, shost,
                "%s: <%u:%u:%llu>\n",
                __func__, channel, id, lun));

        if (((channel != SCAN_WILD_CARD) && (channel > shost->max_channel)) ||
            ((id != SCAN_WILD_CARD) && (id >= shost->max_id)) ||
            ((lun != SCAN_WILD_CARD) && (lun >= shost->max_lun)))
                return -EINVAL;

        mutex_lock(&shost->scan_mutex);
        if (!shost->async_scan)
                scsi_complete_async_scans();

        if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
                if (channel == SCAN_WILD_CARD)
                        for (channel = 0; channel <= shost->max_channel;
                             channel++)
                                scsi_scan_channel(shost, channel, id, lun,
                                                  rescan);
                else
                        scsi_scan_channel(shost, channel, id, lun, rescan);
                scsi_autopm_put_host(shost);
        }
        mutex_unlock(&shost->scan_mutex);

        return 0;
}

static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
{
        struct scsi_device *sdev;
        shost_for_each_device(sdev, shost) {
                /* target removed before the device could be added */
                if (sdev->sdev_state == SDEV_DEL)
                        continue;
                /* If device is already visible, skip adding it to sysfs */
                if (sdev->is_visible)
                        continue;
                if (!scsi_host_scan_allowed(shost) ||
                    scsi_sysfs_add_sdev(sdev) != 0)
                        __scsi_remove_device(sdev);
        }
}

/**
 * scsi_prep_async_scan - prepare for an async scan
 * @shost: the host which will be scanned
 * Returns: a cookie to be passed to scsi_finish_async_scan()
 *
 * Tells the midlayer this host is going to do an asynchronous scan.
 * It reserves the host's position in the scanning list and ensures
 * that other asynchronous scans started after this one won't affect the
 * ordering of the discovered devices.
 */
static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
{
        struct async_scan_data *data = NULL;
        unsigned long flags;

        if (strncmp(scsi_scan_type, "sync", 4) == 0)
                return NULL;

        mutex_lock(&shost->scan_mutex);
        if (shost->async_scan) {
                shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
                goto err;
        }

        data = kmalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                goto err;
        data->shost = scsi_host_get(shost);
        if (!data->shost)
                goto err;
        init_completion(&data->prev_finished);

        spin_lock_irqsave(shost->host_lock, flags);
        shost->async_scan = 1;
        spin_unlock_irqrestore(shost->host_lock, flags);
        mutex_unlock(&shost->scan_mutex);

        spin_lock(&async_scan_lock);
        if (list_empty(&scanning_hosts))
                complete(&data->prev_finished);
        list_add_tail(&data->list, &scanning_hosts);
        spin_unlock(&async_scan_lock);

        return data;

 err:
        mutex_unlock(&shost->scan_mutex);
        kfree(data);
        return NULL;
}

/**
 * scsi_finish_async_scan - asynchronous scan has finished
 * @data: cookie returned from earlier call to scsi_prep_async_scan()
 *
 * All the devices currently attached to this host have been found.
 * This function announces all the devices it has found to the rest
 * of the system.
 */
static void scsi_finish_async_scan(struct async_scan_data *data)
{
        struct Scsi_Host *shost;
        unsigned long flags;

        if (!data)
                return;

        shost = data->shost;

        mutex_lock(&shost->scan_mutex);

        if (!shost->async_scan) {
                shost_printk(KERN_INFO, shost, "%s called twice\n", __func__);
                dump_stack();
                mutex_unlock(&shost->scan_mutex);
                return;
        }

        wait_for_completion(&data->prev_finished);

        scsi_sysfs_add_devices(shost);

        spin_lock_irqsave(shost->host_lock, flags);
        shost->async_scan = 0;
        spin_unlock_irqrestore(shost->host_lock, flags);

        mutex_unlock(&shost->scan_mutex);

        spin_lock(&async_scan_lock);
        list_del(&data->list);
        if (!list_empty(&scanning_hosts)) {
                struct async_scan_data *next = list_entry(scanning_hosts.next,
                                struct async_scan_data, list);
                complete(&next->prev_finished);
        }
        spin_unlock(&async_scan_lock);

        scsi_autopm_put_host(shost);
        scsi_host_put(shost);
        kfree(data);
}

static void do_scsi_scan_host(struct Scsi_Host *shost)
{
        if (shost->hostt->scan_finished) {
                unsigned long start = jiffies;
                if (shost->hostt->scan_start)
                        shost->hostt->scan_start(shost);

                while (!shost->hostt->scan_finished(shost, jiffies - start))
                        msleep(10);
        } else {
                scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
                                SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
        }
}

static void do_scan_async(void *_data, async_cookie_t c)
{
        struct async_scan_data *data = _data;
        struct Scsi_Host *shost = data->shost;

        do_scsi_scan_host(shost);
        scsi_finish_async_scan(data);
}

/**
 * scsi_scan_host - scan the given adapter
 * @shost:        adapter to scan
 **/
void scsi_scan_host(struct Scsi_Host *shost)
{
        struct async_scan_data *data;

        if (strncmp(scsi_scan_type, "none", 4) == 0 ||
            strncmp(scsi_scan_type, "manual", 6) == 0)
                return;
        if (scsi_autopm_get_host(shost) < 0)
                return;

        data = scsi_prep_async_scan(shost);
        if (!data) {
                do_scsi_scan_host(shost);
                scsi_autopm_put_host(shost);
                return;
        }

        /* register with the async subsystem so wait_for_device_probe()
         * will flush this work
         */
        async_schedule(do_scan_async, data);

        /* scsi_autopm_put_host(shost) is called in scsi_finish_async_scan() */
}
EXPORT_SYMBOL(scsi_scan_host);

void scsi_forget_host(struct Scsi_Host *shost)
{
        struct scsi_device *sdev;
        unsigned long flags;

 restart:
        spin_lock_irqsave(shost->host_lock, flags);
        list_for_each_entry(sdev, &shost->__devices, siblings) {
                if (sdev->sdev_state == SDEV_DEL)
                        continue;
                spin_unlock_irqrestore(shost->host_lock, flags);
                __scsi_remove_device(sdev);
                goto restart;
        }
        spin_unlock_irqrestore(shost->host_lock, flags);
}








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


    1 




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  HIDPP protocol for Logitech receivers
 *
 *  Copyright (c) 2011 Logitech (c)
 *  Copyright (c) 2012-2013 Google (c)
 *  Copyright (c) 2013-2014 Red Hat Inc.
 */


#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/input.h>
#include <linux/usb.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/kfifo.h>
#include <linux/input/mt.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
#include <linux/fixp-arith.h>
#include <asm/unaligned.h>
#include "usbhid/usbhid.h"
#include "hid-ids.h"

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
MODULE_AUTHOR("Nestor Lopez Casado <nlopezcasad@logitech.com>");
MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>");

static bool disable_tap_to_click;
module_param(disable_tap_to_click, bool, 0644);
MODULE_PARM_DESC(disable_tap_to_click,
        "Disable Tap-To-Click mode reporting for touchpads (only on the K400 currently).");

/* Define a non-zero software ID to identify our own requests */
#define LINUX_KERNEL_SW_ID                        0x01

#define REPORT_ID_HIDPP_SHORT                        0x10
#define REPORT_ID_HIDPP_LONG                        0x11
#define REPORT_ID_HIDPP_VERY_LONG                0x12

#define HIDPP_REPORT_SHORT_LENGTH                7
#define HIDPP_REPORT_LONG_LENGTH                20
#define HIDPP_REPORT_VERY_LONG_MAX_LENGTH        64

#define HIDPP_REPORT_SHORT_SUPPORTED                BIT(0)
#define HIDPP_REPORT_LONG_SUPPORTED                BIT(1)
#define HIDPP_REPORT_VERY_LONG_SUPPORTED        BIT(2)

#define HIDPP_SUB_ID_CONSUMER_VENDOR_KEYS        0x03
#define HIDPP_SUB_ID_ROLLER                        0x05
#define HIDPP_SUB_ID_MOUSE_EXTRA_BTNS                0x06
#define HIDPP_SUB_ID_USER_IFACE_EVENT                0x08
#define HIDPP_USER_IFACE_EVENT_ENCRYPTION_KEY_LOST        BIT(5)

#define HIDPP_QUIRK_CLASS_WTP                        BIT(0)
#define HIDPP_QUIRK_CLASS_M560                        BIT(1)
#define HIDPP_QUIRK_CLASS_K400                        BIT(2)
#define HIDPP_QUIRK_CLASS_G920                        BIT(3)
#define HIDPP_QUIRK_CLASS_K750                        BIT(4)

/* bits 2..20 are reserved for classes */
/* #define HIDPP_QUIRK_CONNECT_EVENTS                BIT(21) disabled */
#define HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS        BIT(22)
#define HIDPP_QUIRK_DELAYED_INIT                BIT(23)
#define HIDPP_QUIRK_FORCE_OUTPUT_REPORTS        BIT(24)
#define HIDPP_QUIRK_HIDPP_WHEELS                BIT(25)
#define HIDPP_QUIRK_HIDPP_EXTRA_MOUSE_BTNS        BIT(26)
#define HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS        BIT(27)
#define HIDPP_QUIRK_HI_RES_SCROLL_1P0                BIT(28)
#define HIDPP_QUIRK_WIRELESS_STATUS                BIT(29)

/* These are just aliases for now */
#define HIDPP_QUIRK_KBD_SCROLL_WHEEL HIDPP_QUIRK_HIDPP_WHEELS
#define HIDPP_QUIRK_KBD_ZOOM_WHEEL   HIDPP_QUIRK_HIDPP_WHEELS

/* Convenience constant to check for any high-res support. */
#define HIDPP_CAPABILITY_HI_RES_SCROLL        (HIDPP_CAPABILITY_HIDPP10_FAST_SCROLL | \
                                         HIDPP_CAPABILITY_HIDPP20_HI_RES_SCROLL | \
                                         HIDPP_CAPABILITY_HIDPP20_HI_RES_WHEEL)

#define HIDPP_CAPABILITY_HIDPP10_BATTERY        BIT(0)
#define HIDPP_CAPABILITY_HIDPP20_BATTERY        BIT(1)
#define HIDPP_CAPABILITY_BATTERY_MILEAGE        BIT(2)
#define HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS        BIT(3)
#define HIDPP_CAPABILITY_BATTERY_VOLTAGE        BIT(4)
#define HIDPP_CAPABILITY_BATTERY_PERCENTAGE        BIT(5)
#define HIDPP_CAPABILITY_UNIFIED_BATTERY        BIT(6)
#define HIDPP_CAPABILITY_HIDPP20_HI_RES_WHEEL        BIT(7)
#define HIDPP_CAPABILITY_HIDPP20_HI_RES_SCROLL        BIT(8)
#define HIDPP_CAPABILITY_HIDPP10_FAST_SCROLL        BIT(9)
#define HIDPP_CAPABILITY_ADC_MEASUREMENT        BIT(10)

#define lg_map_key_clear(c)  hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c))

/*
 * There are two hidpp protocols in use, the first version hidpp10 is known
 * as register access protocol or RAP, the second version hidpp20 is known as
 * feature access protocol or FAP
 *
 * Most older devices (including the Unifying usb receiver) use the RAP protocol
 * where as most newer devices use the FAP protocol. Both protocols are
 * compatible with the underlying transport, which could be usb, Unifiying, or
 * bluetooth. The message lengths are defined by the hid vendor specific report
 * descriptor for the HIDPP_SHORT report type (total message lenth 7 bytes) and
 * the HIDPP_LONG report type (total message length 20 bytes)
 *
 * The RAP protocol uses both report types, whereas the FAP only uses HIDPP_LONG
 * messages. The Unifying receiver itself responds to RAP messages (device index
 * is 0xFF for the receiver), and all messages (short or long) with a device
 * index between 1 and 6 are passed untouched to the corresponding paired
 * Unifying device.
 *
 * The paired device can be RAP or FAP, it will receive the message untouched
 * from the Unifiying receiver.
 */

struct fap {
        u8 feature_index;
        u8 funcindex_clientid;
        u8 params[HIDPP_REPORT_VERY_LONG_MAX_LENGTH - 4U];
};

struct rap {
        u8 sub_id;
        u8 reg_address;
        u8 params[HIDPP_REPORT_VERY_LONG_MAX_LENGTH - 4U];
};

struct hidpp_report {
        u8 report_id;
        u8 device_index;
        union {
                struct fap fap;
                struct rap rap;
                u8 rawbytes[sizeof(struct fap)];
        };
} __packed;

struct hidpp_battery {
        u8 feature_index;
        u8 solar_feature_index;
        u8 voltage_feature_index;
        u8 adc_measurement_feature_index;
        struct power_supply_desc desc;
        struct power_supply *ps;
        char name[64];
        int status;
        int capacity;
        int level;
        int voltage;
        int charge_type;
        bool online;
        u8 supported_levels_1004;
};

/**
 * struct hidpp_scroll_counter - Utility class for processing high-resolution
 *                             scroll events.
 * @dev: the input device for which events should be reported.
 * @wheel_multiplier: the scalar multiplier to be applied to each wheel event
 * @remainder: counts the number of high-resolution units moved since the last
 *             low-resolution event (REL_WHEEL or REL_HWHEEL) was sent. Should
 *             only be used by class methods.
 * @direction: direction of last movement (1 or -1)
 * @last_time: last event time, used to reset remainder after inactivity
 */
struct hidpp_scroll_counter {
        int wheel_multiplier;
        int remainder;
        int direction;
        unsigned long long last_time;
};

struct hidpp_device {
        struct hid_device *hid_dev;
        struct input_dev *input;
        struct mutex send_mutex;
        void *send_receive_buf;
        char *name;                /* will never be NULL and should not be freed */
        wait_queue_head_t wait;
        int very_long_report_length;
        bool answer_available;
        u8 protocol_major;
        u8 protocol_minor;

        void *private_data;

        struct work_struct work;
        struct kfifo delayed_work_fifo;
        struct input_dev *delayed_input;

        unsigned long quirks;
        unsigned long capabilities;
        u8 supported_reports;

        struct hidpp_battery battery;
        struct hidpp_scroll_counter vertical_wheel_counter;

        u8 wireless_feature_index;

        bool connected_once;
};

/* HID++ 1.0 error codes */
#define HIDPP_ERROR                                0x8f
#define HIDPP_ERROR_SUCCESS                        0x00
#define HIDPP_ERROR_INVALID_SUBID                0x01
#define HIDPP_ERROR_INVALID_ADRESS                0x02
#define HIDPP_ERROR_INVALID_VALUE                0x03
#define HIDPP_ERROR_CONNECT_FAIL                0x04
#define HIDPP_ERROR_TOO_MANY_DEVICES                0x05
#define HIDPP_ERROR_ALREADY_EXISTS                0x06
#define HIDPP_ERROR_BUSY                        0x07
#define HIDPP_ERROR_UNKNOWN_DEVICE                0x08
#define HIDPP_ERROR_RESOURCE_ERROR                0x09
#define HIDPP_ERROR_REQUEST_UNAVAILABLE                0x0a
#define HIDPP_ERROR_INVALID_PARAM_VALUE                0x0b
#define HIDPP_ERROR_WRONG_PIN_CODE                0x0c
/* HID++ 2.0 error codes */
#define HIDPP20_ERROR_NO_ERROR                        0x00
#define HIDPP20_ERROR_UNKNOWN                        0x01
#define HIDPP20_ERROR_INVALID_ARGS                0x02
#define HIDPP20_ERROR_OUT_OF_RANGE                0x03
#define HIDPP20_ERROR_HW_ERROR                        0x04
#define HIDPP20_ERROR_NOT_ALLOWED                0x05
#define HIDPP20_ERROR_INVALID_FEATURE_INDEX        0x06
#define HIDPP20_ERROR_INVALID_FUNCTION_ID        0x07
#define HIDPP20_ERROR_BUSY                        0x08
#define HIDPP20_ERROR_UNSUPPORTED                0x09
#define HIDPP20_ERROR                                0xff

static int __hidpp_send_report(struct hid_device *hdev,
                                struct hidpp_report *hidpp_report)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        int fields_count, ret;

        switch (hidpp_report->report_id) {
        case REPORT_ID_HIDPP_SHORT:
                fields_count = HIDPP_REPORT_SHORT_LENGTH;
                break;
        case REPORT_ID_HIDPP_LONG:
                fields_count = HIDPP_REPORT_LONG_LENGTH;
                break;
        case REPORT_ID_HIDPP_VERY_LONG:
                fields_count = hidpp->very_long_report_length;
                break;
        default:
                return -ENODEV;
        }

        /*
         * set the device_index as the receiver, it will be overwritten by
         * hid_hw_request if needed
         */
        hidpp_report->device_index = 0xff;

        if (hidpp->quirks & HIDPP_QUIRK_FORCE_OUTPUT_REPORTS) {
                ret = hid_hw_output_report(hdev, (u8 *)hidpp_report, fields_count);
        } else {
                ret = hid_hw_raw_request(hdev, hidpp_report->report_id,
                        (u8 *)hidpp_report, fields_count, HID_OUTPUT_REPORT,
                        HID_REQ_SET_REPORT);
        }

        return ret == fields_count ? 0 : -1;
}

/*
 * Effectively send the message to the device, waiting for its answer.
 *
 * Must be called with hidpp->send_mutex locked
 *
 * Same return protocol than hidpp_send_message_sync():
 * - success on 0
 * - negative error means transport error
 * - positive value means protocol error
 */
static int __do_hidpp_send_message_sync(struct hidpp_device *hidpp,
        struct hidpp_report *message,
        struct hidpp_report *response)
{
        int ret;

        __must_hold(&hidpp->send_mutex);

        hidpp->send_receive_buf = response;
        hidpp->answer_available = false;

        /*
         * So that we can later validate the answer when it arrives
         * in hidpp_raw_event
         */
        *response = *message;

        ret = __hidpp_send_report(hidpp->hid_dev, message);
        if (ret) {
                dbg_hid("__hidpp_send_report returned err: %d\n", ret);
                memset(response, 0, sizeof(struct hidpp_report));
                return ret;
        }

        if (!wait_event_timeout(hidpp->wait, hidpp->answer_available,
                                5*HZ)) {
                dbg_hid("%s:timeout waiting for response\n", __func__);
                memset(response, 0, sizeof(struct hidpp_report));
                return -ETIMEDOUT;
        }

        if (response->report_id == REPORT_ID_HIDPP_SHORT &&
            response->rap.sub_id == HIDPP_ERROR) {
                ret = response->rap.params[1];
                dbg_hid("%s:got hidpp error %02X\n", __func__, ret);
                return ret;
        }

        if ((response->report_id == REPORT_ID_HIDPP_LONG ||
             response->report_id == REPORT_ID_HIDPP_VERY_LONG) &&
            response->fap.feature_index == HIDPP20_ERROR) {
                ret = response->fap.params[1];
                dbg_hid("%s:got hidpp 2.0 error %02X\n", __func__, ret);
                return ret;
        }

        return 0;
}

/*
 * hidpp_send_message_sync() returns 0 in case of success, and something else
 * in case of a failure.
 *
 * See __do_hidpp_send_message_sync() for a detailed explanation of the returned
 * value.
 */
static int hidpp_send_message_sync(struct hidpp_device *hidpp,
        struct hidpp_report *message,
        struct hidpp_report *response)
{
        int ret;
        int max_retries = 3;

        mutex_lock(&hidpp->send_mutex);

        do {
                ret = __do_hidpp_send_message_sync(hidpp, message, response);
                if (ret != HIDPP20_ERROR_BUSY)
                        break;

                dbg_hid("%s:got busy hidpp 2.0 error %02X, retrying\n", __func__, ret);
        } while (--max_retries);

        mutex_unlock(&hidpp->send_mutex);
        return ret;

}

/*
 * hidpp_send_fap_command_sync() returns 0 in case of success, and something else
 * in case of a failure.
 *
 * See __do_hidpp_send_message_sync() for a detailed explanation of the returned
 * value.
 */
static int hidpp_send_fap_command_sync(struct hidpp_device *hidpp,
        u8 feat_index, u8 funcindex_clientid, u8 *params, int param_count,
        struct hidpp_report *response)
{
        struct hidpp_report *message;
        int ret;

        if (param_count > sizeof(message->fap.params)) {
                hid_dbg(hidpp->hid_dev,
                        "Invalid number of parameters passed to command (%d != %llu)\n",
                        param_count,
                        (unsigned long long) sizeof(message->fap.params));
                return -EINVAL;
        }

        message = kzalloc(sizeof(struct hidpp_report), GFP_KERNEL);
        if (!message)
                return -ENOMEM;

        if (param_count > (HIDPP_REPORT_LONG_LENGTH - 4))
                message->report_id = REPORT_ID_HIDPP_VERY_LONG;
        else
                message->report_id = REPORT_ID_HIDPP_LONG;
        message->fap.feature_index = feat_index;
        message->fap.funcindex_clientid = funcindex_clientid | LINUX_KERNEL_SW_ID;
        memcpy(&message->fap.params, params, param_count);

        ret = hidpp_send_message_sync(hidpp, message, response);
        kfree(message);
        return ret;
}

/*
 * hidpp_send_rap_command_sync() returns 0 in case of success, and something else
 * in case of a failure.
 *
 * See __do_hidpp_send_message_sync() for a detailed explanation of the returned
 * value.
 */
static int hidpp_send_rap_command_sync(struct hidpp_device *hidpp_dev,
        u8 report_id, u8 sub_id, u8 reg_address, u8 *params, int param_count,
        struct hidpp_report *response)
{
        struct hidpp_report *message;
        int ret, max_count;

        /* Send as long report if short reports are not supported. */
        if (report_id == REPORT_ID_HIDPP_SHORT &&
            !(hidpp_dev->supported_reports & HIDPP_REPORT_SHORT_SUPPORTED))
                report_id = REPORT_ID_HIDPP_LONG;

        switch (report_id) {
        case REPORT_ID_HIDPP_SHORT:
                max_count = HIDPP_REPORT_SHORT_LENGTH - 4;
                break;
        case REPORT_ID_HIDPP_LONG:
                max_count = HIDPP_REPORT_LONG_LENGTH - 4;
                break;
        case REPORT_ID_HIDPP_VERY_LONG:
                max_count = hidpp_dev->very_long_report_length - 4;
                break;
        default:
                return -EINVAL;
        }

        if (param_count > max_count)
                return -EINVAL;

        message = kzalloc(sizeof(struct hidpp_report), GFP_KERNEL);
        if (!message)
                return -ENOMEM;
        message->report_id = report_id;
        message->rap.sub_id = sub_id;
        message->rap.reg_address = reg_address;
        memcpy(&message->rap.params, params, param_count);

        ret = hidpp_send_message_sync(hidpp_dev, message, response);
        kfree(message);
        return ret;
}

static inline bool hidpp_match_answer(struct hidpp_report *question,
                struct hidpp_report *answer)
{
        return (answer->fap.feature_index == question->fap.feature_index) &&
           (answer->fap.funcindex_clientid == question->fap.funcindex_clientid);
}

static inline bool hidpp_match_error(struct hidpp_report *question,
                struct hidpp_report *answer)
{
        return ((answer->rap.sub_id == HIDPP_ERROR) ||
            (answer->fap.feature_index == HIDPP20_ERROR)) &&
            (answer->fap.funcindex_clientid == question->fap.feature_index) &&
            (answer->fap.params[0] == question->fap.funcindex_clientid);
}

static inline bool hidpp_report_is_connect_event(struct hidpp_device *hidpp,
                struct hidpp_report *report)
{
        return (hidpp->wireless_feature_index &&
                (report->fap.feature_index == hidpp->wireless_feature_index)) ||
                ((report->report_id == REPORT_ID_HIDPP_SHORT) &&
                (report->rap.sub_id == 0x41));
}

/*
 * hidpp_prefix_name() prefixes the current given name with "Logitech ".
 */
static void hidpp_prefix_name(char **name, int name_length)
{
#define PREFIX_LENGTH 9 /* "Logitech " */

        int new_length;
        char *new_name;

        if (name_length > PREFIX_LENGTH &&
            strncmp(*name, "Logitech ", PREFIX_LENGTH) == 0)
                /* The prefix has is already in the name */
                return;

        new_length = PREFIX_LENGTH + name_length;
        new_name = kzalloc(new_length, GFP_KERNEL);
        if (!new_name)
                return;

        snprintf(new_name, new_length, "Logitech %s", *name);

        kfree(*name);

        *name = new_name;
}

/*
 * Updates the USB wireless_status based on whether the headset
 * is turned on and reachable.
 */
static void hidpp_update_usb_wireless_status(struct hidpp_device *hidpp)
{
        struct hid_device *hdev = hidpp->hid_dev;
        struct usb_interface *intf;

        if (!(hidpp->quirks & HIDPP_QUIRK_WIRELESS_STATUS))
                return;
        if (!hid_is_usb(hdev))
                return;

        intf = to_usb_interface(hdev->dev.parent);
        usb_set_wireless_status(intf, hidpp->battery.online ?
                                USB_WIRELESS_STATUS_CONNECTED :
                                USB_WIRELESS_STATUS_DISCONNECTED);
}

/**
 * hidpp_scroll_counter_handle_scroll() - Send high- and low-resolution scroll
 *                                        events given a high-resolution wheel
 *                                        movement.
 * @input_dev: Pointer to the input device
 * @counter: a hid_scroll_counter struct describing the wheel.
 * @hi_res_value: the movement of the wheel, in the mouse's high-resolution
 *                units.
 *
 * Given a high-resolution movement, this function converts the movement into
 * fractions of 120 and emits high-resolution scroll events for the input
 * device. It also uses the multiplier from &struct hid_scroll_counter to
 * emit low-resolution scroll events when appropriate for
 * backwards-compatibility with userspace input libraries.
 */
static void hidpp_scroll_counter_handle_scroll(struct input_dev *input_dev,
                                               struct hidpp_scroll_counter *counter,
                                               int hi_res_value)
{
        int low_res_value, remainder, direction;
        unsigned long long now, previous;

        hi_res_value = hi_res_value * 120/counter->wheel_multiplier;
        input_report_rel(input_dev, REL_WHEEL_HI_RES, hi_res_value);

        remainder = counter->remainder;
        direction = hi_res_value > 0 ? 1 : -1;

        now = sched_clock();
        previous = counter->last_time;
        counter->last_time = now;
        /*
         * Reset the remainder after a period of inactivity or when the
         * direction changes. This prevents the REL_WHEEL emulation point
         * from sliding for devices that don't always provide the same
         * number of movements per detent.
         */
        if (now - previous > 1000000000 || direction != counter->direction)
                remainder = 0;

        counter->direction = direction;
        remainder += hi_res_value;

        /* Some wheels will rest 7/8ths of a detent from the previous detent
         * after slow movement, so we want the threshold for low-res events to
         * be in the middle between two detents (e.g. after 4/8ths) as
         * opposed to on the detents themselves (8/8ths).
         */
        if (abs(remainder) >= 60) {
                /* Add (or subtract) 1 because we want to trigger when the wheel
                 * is half-way to the next detent (i.e. scroll 1 detent after a
                 * 1/2 detent movement, 2 detents after a 1 1/2 detent movement,
                 * etc.).
                 */
                low_res_value = remainder / 120;
                if (low_res_value == 0)
                        low_res_value = (hi_res_value > 0 ? 1 : -1);
                input_report_rel(input_dev, REL_WHEEL, low_res_value);
                remainder -= low_res_value * 120;
        }
        counter->remainder = remainder;
}

/* -------------------------------------------------------------------------- */
/* HIDP++ 1.0 commands                                                        */
/* -------------------------------------------------------------------------- */

#define HIDPP_SET_REGISTER                                0x80
#define HIDPP_GET_REGISTER                                0x81
#define HIDPP_SET_LONG_REGISTER                                0x82
#define HIDPP_GET_LONG_REGISTER                                0x83

/**
 * hidpp10_set_register - Modify a HID++ 1.0 register.
 * @hidpp_dev: the device to set the register on.
 * @register_address: the address of the register to modify.
 * @byte: the byte of the register to modify. Should be less than 3.
 * @mask: mask of the bits to modify
 * @value: new values for the bits in mask
 * Return: 0 if successful, otherwise a negative error code.
 */
static int hidpp10_set_register(struct hidpp_device *hidpp_dev,
        u8 register_address, u8 byte, u8 mask, u8 value)
{
        struct hidpp_report response;
        int ret;
        u8 params[3] = { 0 };

        ret = hidpp_send_rap_command_sync(hidpp_dev,
                                          REPORT_ID_HIDPP_SHORT,
                                          HIDPP_GET_REGISTER,
                                          register_address,
                                          NULL, 0, &response);
        if (ret)
                return ret;

        memcpy(params, response.rap.params, 3);

        params[byte] &= ~mask;
        params[byte] |= value & mask;

        return hidpp_send_rap_command_sync(hidpp_dev,
                                           REPORT_ID_HIDPP_SHORT,
                                           HIDPP_SET_REGISTER,
                                           register_address,
                                           params, 3, &response);
}

#define HIDPP_REG_ENABLE_REPORTS                        0x00
#define HIDPP_ENABLE_CONSUMER_REPORT                        BIT(0)
#define HIDPP_ENABLE_WHEEL_REPORT                        BIT(2)
#define HIDPP_ENABLE_MOUSE_EXTRA_BTN_REPORT                BIT(3)
#define HIDPP_ENABLE_BAT_REPORT                                BIT(4)
#define HIDPP_ENABLE_HWHEEL_REPORT                        BIT(5)

static int hidpp10_enable_battery_reporting(struct hidpp_device *hidpp_dev)
{
        return hidpp10_set_register(hidpp_dev, HIDPP_REG_ENABLE_REPORTS, 0,
                          HIDPP_ENABLE_BAT_REPORT, HIDPP_ENABLE_BAT_REPORT);
}

#define HIDPP_REG_FEATURES                                0x01
#define HIDPP_ENABLE_SPECIAL_BUTTON_FUNC                BIT(1)
#define HIDPP_ENABLE_FAST_SCROLL                        BIT(6)

/* On HID++ 1.0 devices, high-res scroll was called "scrolling acceleration". */
static int hidpp10_enable_scrolling_acceleration(struct hidpp_device *hidpp_dev)
{
        return hidpp10_set_register(hidpp_dev, HIDPP_REG_FEATURES, 0,
                          HIDPP_ENABLE_FAST_SCROLL, HIDPP_ENABLE_FAST_SCROLL);
}

#define HIDPP_REG_BATTERY_STATUS                        0x07

static int hidpp10_battery_status_map_level(u8 param)
{
        int level;

        switch (param) {
        case 1 ... 2:
                level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
                break;
        case 3 ... 4:
                level = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
                break;
        case 5 ... 6:
                level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
                break;
        case 7:
                level = POWER_SUPPLY_CAPACITY_LEVEL_HIGH;
                break;
        default:
                level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
        }

        return level;
}

static int hidpp10_battery_status_map_status(u8 param)
{
        int status;

        switch (param) {
        case 0x00:
                /* discharging (in use) */
                status = POWER_SUPPLY_STATUS_DISCHARGING;
                break;
        case 0x21: /* (standard) charging */
        case 0x24: /* fast charging */
        case 0x25: /* slow charging */
                status = POWER_SUPPLY_STATUS_CHARGING;
                break;
        case 0x26: /* topping charge */
        case 0x22: /* charge complete */
                status = POWER_SUPPLY_STATUS_FULL;
                break;
        case 0x20: /* unknown */
                status = POWER_SUPPLY_STATUS_UNKNOWN;
                break;
        /*
         * 0x01...0x1F = reserved (not charging)
         * 0x23 = charging error
         * 0x27..0xff = reserved
         */
        default:
                status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                break;
        }

        return status;
}

static int hidpp10_query_battery_status(struct hidpp_device *hidpp)
{
        struct hidpp_report response;
        int ret, status;

        ret = hidpp_send_rap_command_sync(hidpp,
                                        REPORT_ID_HIDPP_SHORT,
                                        HIDPP_GET_REGISTER,
                                        HIDPP_REG_BATTERY_STATUS,
                                        NULL, 0, &response);
        if (ret)
                return ret;

        hidpp->battery.level =
                hidpp10_battery_status_map_level(response.rap.params[0]);
        status = hidpp10_battery_status_map_status(response.rap.params[1]);
        hidpp->battery.status = status;
        /* the capacity is only available when discharging or full */
        hidpp->battery.online = status == POWER_SUPPLY_STATUS_DISCHARGING ||
                                status == POWER_SUPPLY_STATUS_FULL;

        return 0;
}

#define HIDPP_REG_BATTERY_MILEAGE                        0x0D

static int hidpp10_battery_mileage_map_status(u8 param)
{
        int status;

        switch (param >> 6) {
        case 0x00:
                /* discharging (in use) */
                status = POWER_SUPPLY_STATUS_DISCHARGING;
                break;
        case 0x01: /* charging */
                status = POWER_SUPPLY_STATUS_CHARGING;
                break;
        case 0x02: /* charge complete */
                status = POWER_SUPPLY_STATUS_FULL;
                break;
        /*
         * 0x03 = charging error
         */
        default:
                status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                break;
        }

        return status;
}

static int hidpp10_query_battery_mileage(struct hidpp_device *hidpp)
{
        struct hidpp_report response;
        int ret, status;

        ret = hidpp_send_rap_command_sync(hidpp,
                                        REPORT_ID_HIDPP_SHORT,
                                        HIDPP_GET_REGISTER,
                                        HIDPP_REG_BATTERY_MILEAGE,
                                        NULL, 0, &response);
        if (ret)
                return ret;

        hidpp->battery.capacity = response.rap.params[0];
        status = hidpp10_battery_mileage_map_status(response.rap.params[2]);
        hidpp->battery.status = status;
        /* the capacity is only available when discharging or full */
        hidpp->battery.online = status == POWER_SUPPLY_STATUS_DISCHARGING ||
                                status == POWER_SUPPLY_STATUS_FULL;

        return 0;
}

static int hidpp10_battery_event(struct hidpp_device *hidpp, u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        int status, capacity, level;
        bool changed;

        if (report->report_id != REPORT_ID_HIDPP_SHORT)
                return 0;

        switch (report->rap.sub_id) {
        case HIDPP_REG_BATTERY_STATUS:
                capacity = hidpp->battery.capacity;
                level = hidpp10_battery_status_map_level(report->rawbytes[1]);
                status = hidpp10_battery_status_map_status(report->rawbytes[2]);
                break;
        case HIDPP_REG_BATTERY_MILEAGE:
                capacity = report->rap.params[0];
                level = hidpp->battery.level;
                status = hidpp10_battery_mileage_map_status(report->rawbytes[3]);
                break;
        default:
                return 0;
        }

        changed = capacity != hidpp->battery.capacity ||
                  level != hidpp->battery.level ||
                  status != hidpp->battery.status;

        /* the capacity is only available when discharging or full */
        hidpp->battery.online = status == POWER_SUPPLY_STATUS_DISCHARGING ||
                                status == POWER_SUPPLY_STATUS_FULL;

        if (changed) {
                hidpp->battery.level = level;
                hidpp->battery.status = status;
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
        }

        return 0;
}

#define HIDPP_REG_PAIRING_INFORMATION                        0xB5
#define HIDPP_EXTENDED_PAIRING                                0x30
#define HIDPP_DEVICE_NAME                                0x40

static char *hidpp_unifying_get_name(struct hidpp_device *hidpp_dev)
{
        struct hidpp_report response;
        int ret;
        u8 params[1] = { HIDPP_DEVICE_NAME };
        char *name;
        int len;

        ret = hidpp_send_rap_command_sync(hidpp_dev,
                                        REPORT_ID_HIDPP_SHORT,
                                        HIDPP_GET_LONG_REGISTER,
                                        HIDPP_REG_PAIRING_INFORMATION,
                                        params, 1, &response);
        if (ret)
                return NULL;

        len = response.rap.params[1];

        if (2 + len > sizeof(response.rap.params))
                return NULL;

        if (len < 4) /* logitech devices are usually at least Xddd */
                return NULL;

        name = kzalloc(len + 1, GFP_KERNEL);
        if (!name)
                return NULL;

        memcpy(name, &response.rap.params[2], len);

        /* include the terminating '\0' */
        hidpp_prefix_name(&name, len + 1);

        return name;
}

static int hidpp_unifying_get_serial(struct hidpp_device *hidpp, u32 *serial)
{
        struct hidpp_report response;
        int ret;
        u8 params[1] = { HIDPP_EXTENDED_PAIRING };

        ret = hidpp_send_rap_command_sync(hidpp,
                                        REPORT_ID_HIDPP_SHORT,
                                        HIDPP_GET_LONG_REGISTER,
                                        HIDPP_REG_PAIRING_INFORMATION,
                                        params, 1, &response);
        if (ret)
                return ret;

        /*
         * We don't care about LE or BE, we will output it as a string
         * with %4phD, so we need to keep the order.
         */
        *serial = *((u32 *)&response.rap.params[1]);
        return 0;
}

static int hidpp_unifying_init(struct hidpp_device *hidpp)
{
        struct hid_device *hdev = hidpp->hid_dev;
        const char *name;
        u32 serial;
        int ret;

        ret = hidpp_unifying_get_serial(hidpp, &serial);
        if (ret)
                return ret;

        snprintf(hdev->uniq, sizeof(hdev->uniq), "%4phD", &serial);
        dbg_hid("HID++ Unifying: Got serial: %s\n", hdev->uniq);

        name = hidpp_unifying_get_name(hidpp);
        if (!name)
                return -EIO;

        snprintf(hdev->name, sizeof(hdev->name), "%s", name);
        dbg_hid("HID++ Unifying: Got name: %s\n", name);

        kfree(name);
        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x0000: Root                                                               */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_ROOT                                        0x0000
#define HIDPP_PAGE_ROOT_IDX                                0x00

#define CMD_ROOT_GET_FEATURE                                0x00
#define CMD_ROOT_GET_PROTOCOL_VERSION                        0x10

static int hidpp_root_get_feature(struct hidpp_device *hidpp, u16 feature,
        u8 *feature_index, u8 *feature_type)
{
        struct hidpp_report response;
        int ret;
        u8 params[2] = { feature >> 8, feature & 0x00FF };

        ret = hidpp_send_fap_command_sync(hidpp,
                        HIDPP_PAGE_ROOT_IDX,
                        CMD_ROOT_GET_FEATURE,
                        params, 2, &response);
        if (ret)
                return ret;

        if (response.fap.params[0] == 0)
                return -ENOENT;

        *feature_index = response.fap.params[0];
        *feature_type = response.fap.params[1];

        return ret;
}

static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp)
{
        const u8 ping_byte = 0x5a;
        u8 ping_data[3] = { 0, 0, ping_byte };
        struct hidpp_report response;
        int ret;

        ret = hidpp_send_rap_command_sync(hidpp,
                        REPORT_ID_HIDPP_SHORT,
                        HIDPP_PAGE_ROOT_IDX,
                        CMD_ROOT_GET_PROTOCOL_VERSION | LINUX_KERNEL_SW_ID,
                        ping_data, sizeof(ping_data), &response);

        if (ret == HIDPP_ERROR_INVALID_SUBID) {
                hidpp->protocol_major = 1;
                hidpp->protocol_minor = 0;
                goto print_version;
        }

        /* the device might not be connected */
        if (ret == HIDPP_ERROR_RESOURCE_ERROR)
                return -EIO;

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        if (response.rap.params[2] != ping_byte) {
                hid_err(hidpp->hid_dev, "%s: ping mismatch 0x%02x != 0x%02x\n",
                        __func__, response.rap.params[2], ping_byte);
                return -EPROTO;
        }

        hidpp->protocol_major = response.rap.params[0];
        hidpp->protocol_minor = response.rap.params[1];

print_version:
        if (!hidpp->connected_once) {
                hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
                         hidpp->protocol_major, hidpp->protocol_minor);
                hidpp->connected_once = true;
        } else
                hid_dbg(hidpp->hid_dev, "HID++ %u.%u device connected.\n",
                         hidpp->protocol_major, hidpp->protocol_minor);
        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x0003: Device Information                                                 */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_DEVICE_INFORMATION                        0x0003

#define CMD_GET_DEVICE_INFO                                0x00

static int hidpp_get_serial(struct hidpp_device *hidpp, u32 *serial)
{
        struct hidpp_report response;
        u8 feature_type;
        u8 feature_index;
        int ret;

        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_DEVICE_INFORMATION,
                                     &feature_index,
                                     &feature_type);
        if (ret)
                return ret;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_GET_DEVICE_INFO,
                                          NULL, 0, &response);
        if (ret)
                return ret;

        /* See hidpp_unifying_get_serial() */
        *serial = *((u32 *)&response.rap.params[1]);
        return 0;
}

static int hidpp_serial_init(struct hidpp_device *hidpp)
{
        struct hid_device *hdev = hidpp->hid_dev;
        u32 serial;
        int ret;

        ret = hidpp_get_serial(hidpp, &serial);
        if (ret)
                return ret;

        snprintf(hdev->uniq, sizeof(hdev->uniq), "%4phD", &serial);
        dbg_hid("HID++ DeviceInformation: Got serial: %s\n", hdev->uniq);

        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x0005: GetDeviceNameType                                                  */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_GET_DEVICE_NAME_TYPE                        0x0005

#define CMD_GET_DEVICE_NAME_TYPE_GET_COUNT                0x00
#define CMD_GET_DEVICE_NAME_TYPE_GET_DEVICE_NAME        0x10
#define CMD_GET_DEVICE_NAME_TYPE_GET_TYPE                0x20

static int hidpp_devicenametype_get_count(struct hidpp_device *hidpp,
        u8 feature_index, u8 *nameLength)
{
        struct hidpp_report response;
        int ret;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                CMD_GET_DEVICE_NAME_TYPE_GET_COUNT, NULL, 0, &response);

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        *nameLength = response.fap.params[0];

        return ret;
}

static int hidpp_devicenametype_get_device_name(struct hidpp_device *hidpp,
        u8 feature_index, u8 char_index, char *device_name, int len_buf)
{
        struct hidpp_report response;
        int ret, i;
        int count;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                CMD_GET_DEVICE_NAME_TYPE_GET_DEVICE_NAME, &char_index, 1,
                &response);

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        switch (response.report_id) {
        case REPORT_ID_HIDPP_VERY_LONG:
                count = hidpp->very_long_report_length - 4;
                break;
        case REPORT_ID_HIDPP_LONG:
                count = HIDPP_REPORT_LONG_LENGTH - 4;
                break;
        case REPORT_ID_HIDPP_SHORT:
                count = HIDPP_REPORT_SHORT_LENGTH - 4;
                break;
        default:
                return -EPROTO;
        }

        if (len_buf < count)
                count = len_buf;

        for (i = 0; i < count; i++)
                device_name[i] = response.fap.params[i];

        return count;
}

static char *hidpp_get_device_name(struct hidpp_device *hidpp)
{
        u8 feature_type;
        u8 feature_index;
        u8 __name_length;
        char *name;
        unsigned index = 0;
        int ret;

        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_GET_DEVICE_NAME_TYPE,
                &feature_index, &feature_type);
        if (ret)
                return NULL;

        ret = hidpp_devicenametype_get_count(hidpp, feature_index,
                &__name_length);
        if (ret)
                return NULL;

        name = kzalloc(__name_length + 1, GFP_KERNEL);
        if (!name)
                return NULL;

        while (index < __name_length) {
                ret = hidpp_devicenametype_get_device_name(hidpp,
                        feature_index, index, name + index,
                        __name_length - index);
                if (ret <= 0) {
                        kfree(name);
                        return NULL;
                }
                index += ret;
        }

        /* include the terminating '\0' */
        hidpp_prefix_name(&name, __name_length + 1);

        return name;
}

/* -------------------------------------------------------------------------- */
/* 0x1000: Battery level status                                               */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_BATTERY_LEVEL_STATUS                                0x1000

#define CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_LEVEL_STATUS        0x00
#define CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_CAPABILITY                0x10

#define EVENT_BATTERY_LEVEL_STATUS_BROADCAST                        0x00

#define FLAG_BATTERY_LEVEL_DISABLE_OSD                                BIT(0)
#define FLAG_BATTERY_LEVEL_MILEAGE                                BIT(1)
#define FLAG_BATTERY_LEVEL_RECHARGEABLE                                BIT(2)

static int hidpp_map_battery_level(int capacity)
{
        if (capacity < 11)
                return POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
        /*
         * The spec says this should be < 31 but some devices report 30
         * with brand new batteries and Windows reports 30 as "Good".
         */
        else if (capacity < 30)
                return POWER_SUPPLY_CAPACITY_LEVEL_LOW;
        else if (capacity < 81)
                return POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
        return POWER_SUPPLY_CAPACITY_LEVEL_FULL;
}

static int hidpp20_batterylevel_map_status_capacity(u8 data[3], int *capacity,
                                                    int *next_capacity,
                                                    int *level)
{
        int status;

        *capacity = data[0];
        *next_capacity = data[1];
        *level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;

        /* When discharging, we can rely on the device reported capacity.
         * For all other states the device reports 0 (unknown).
         */
        switch (data[2]) {
                case 0: /* discharging (in use) */
                        status = POWER_SUPPLY_STATUS_DISCHARGING;
                        *level = hidpp_map_battery_level(*capacity);
                        break;
                case 1: /* recharging */
                        status = POWER_SUPPLY_STATUS_CHARGING;
                        break;
                case 2: /* charge in final stage */
                        status = POWER_SUPPLY_STATUS_CHARGING;
                        break;
                case 3: /* charge complete */
                        status = POWER_SUPPLY_STATUS_FULL;
                        *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
                        *capacity = 100;
                        break;
                case 4: /* recharging below optimal speed */
                        status = POWER_SUPPLY_STATUS_CHARGING;
                        break;
                /* 5 = invalid battery type
                   6 = thermal error
                   7 = other charging error */
                default:
                        status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                        break;
        }

        return status;
}

static int hidpp20_batterylevel_get_battery_capacity(struct hidpp_device *hidpp,
                                                     u8 feature_index,
                                                     int *status,
                                                     int *capacity,
                                                     int *next_capacity,
                                                     int *level)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_LEVEL_STATUS,
                                          NULL, 0, &response);
        /* Ignore these intermittent errors */
        if (ret == HIDPP_ERROR_RESOURCE_ERROR)
                return -EIO;
        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        *status = hidpp20_batterylevel_map_status_capacity(params, capacity,
                                                           next_capacity,
                                                           level);

        return 0;
}

static int hidpp20_batterylevel_get_battery_info(struct hidpp_device *hidpp,
                                                  u8 feature_index)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;
        unsigned int level_count, flags;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_CAPABILITY,
                                          NULL, 0, &response);
        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        level_count = params[0];
        flags = params[1];

        if (level_count < 10 || !(flags & FLAG_BATTERY_LEVEL_MILEAGE))
                hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS;
        else
                hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_MILEAGE;

        return 0;
}

static int hidpp20_query_battery_info_1000(struct hidpp_device *hidpp)
{
        u8 feature_type;
        int ret;
        int status, capacity, next_capacity, level;

        if (hidpp->battery.feature_index == 0xff) {
                ret = hidpp_root_get_feature(hidpp,
                                             HIDPP_PAGE_BATTERY_LEVEL_STATUS,
                                             &hidpp->battery.feature_index,
                                             &feature_type);
                if (ret)
                        return ret;
        }

        ret = hidpp20_batterylevel_get_battery_capacity(hidpp,
                                                hidpp->battery.feature_index,
                                                &status, &capacity,
                                                &next_capacity, &level);
        if (ret)
                return ret;

        ret = hidpp20_batterylevel_get_battery_info(hidpp,
                                                hidpp->battery.feature_index);
        if (ret)
                return ret;

        hidpp->battery.status = status;
        hidpp->battery.capacity = capacity;
        hidpp->battery.level = level;
        /* the capacity is only available when discharging or full */
        hidpp->battery.online = status == POWER_SUPPLY_STATUS_DISCHARGING ||
                                status == POWER_SUPPLY_STATUS_FULL;

        return 0;
}

static int hidpp20_battery_event_1000(struct hidpp_device *hidpp,
                                 u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        int status, capacity, next_capacity, level;
        bool changed;

        if (report->fap.feature_index != hidpp->battery.feature_index ||
            report->fap.funcindex_clientid != EVENT_BATTERY_LEVEL_STATUS_BROADCAST)
                return 0;

        status = hidpp20_batterylevel_map_status_capacity(report->fap.params,
                                                          &capacity,
                                                          &next_capacity,
                                                          &level);

        /* the capacity is only available when discharging or full */
        hidpp->battery.online = status == POWER_SUPPLY_STATUS_DISCHARGING ||
                                status == POWER_SUPPLY_STATUS_FULL;

        changed = capacity != hidpp->battery.capacity ||
                  level != hidpp->battery.level ||
                  status != hidpp->battery.status;

        if (changed) {
                hidpp->battery.level = level;
                hidpp->battery.capacity = capacity;
                hidpp->battery.status = status;
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
        }

        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x1001: Battery voltage                                                    */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_BATTERY_VOLTAGE 0x1001

#define CMD_BATTERY_VOLTAGE_GET_BATTERY_VOLTAGE 0x00

#define EVENT_BATTERY_VOLTAGE_STATUS_BROADCAST 0x00

static int hidpp20_battery_map_status_voltage(u8 data[3], int *voltage,
                                                int *level, int *charge_type)
{
        int status;

        long flags = (long) data[2];
        *level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;

        if (flags & 0x80)
                switch (flags & 0x07) {
                case 0:
                        status = POWER_SUPPLY_STATUS_CHARGING;
                        break;
                case 1:
                        status = POWER_SUPPLY_STATUS_FULL;
                        *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
                        break;
                case 2:
                        status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                        break;
                default:
                        status = POWER_SUPPLY_STATUS_UNKNOWN;
                        break;
                }
        else
                status = POWER_SUPPLY_STATUS_DISCHARGING;

        *charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
        if (test_bit(3, &flags)) {
                *charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST;
        }
        if (test_bit(4, &flags)) {
                *charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
        }
        if (test_bit(5, &flags)) {
                *level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
        }

        *voltage = get_unaligned_be16(data);

        return status;
}

static int hidpp20_battery_get_battery_voltage(struct hidpp_device *hidpp,
                                                 u8 feature_index,
                                                 int *status, int *voltage,
                                                 int *level, int *charge_type)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_BATTERY_VOLTAGE_GET_BATTERY_VOLTAGE,
                                          NULL, 0, &response);

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_VOLTAGE;

        *status = hidpp20_battery_map_status_voltage(params, voltage,
                                                     level, charge_type);

        return 0;
}

static int hidpp20_map_battery_capacity(struct hid_device *hid_dev, int voltage)
{
        /* NB: This voltage curve doesn't necessarily map perfectly to all
         * devices that implement the BATTERY_VOLTAGE feature. This is because
         * there are a few devices that use different battery technology.
         */

        static const int voltages[100] = {
                4186, 4156, 4143, 4133, 4122, 4113, 4103, 4094, 4086, 4075,
                4067, 4059, 4051, 4043, 4035, 4027, 4019, 4011, 4003, 3997,
                3989, 3983, 3976, 3969, 3961, 3955, 3949, 3942, 3935, 3929,
                3922, 3916, 3909, 3902, 3896, 3890, 3883, 3877, 3870, 3865,
                3859, 3853, 3848, 3842, 3837, 3833, 3828, 3824, 3819, 3815,
                3811, 3808, 3804, 3800, 3797, 3793, 3790, 3787, 3784, 3781,
                3778, 3775, 3772, 3770, 3767, 3764, 3762, 3759, 3757, 3754,
                3751, 3748, 3744, 3741, 3737, 3734, 3730, 3726, 3724, 3720,
                3717, 3714, 3710, 3706, 3702, 3697, 3693, 3688, 3683, 3677,
                3671, 3666, 3662, 3658, 3654, 3646, 3633, 3612, 3579, 3537
        };

        int i;

        if (unlikely(voltage < 3500 || voltage >= 5000))
                hid_warn_once(hid_dev,
                              "%s: possibly using the wrong voltage curve\n",
                              __func__);

        for (i = 0; i < ARRAY_SIZE(voltages); i++) {
                if (voltage >= voltages[i])
                        return ARRAY_SIZE(voltages) - i;
        }

        return 0;
}

static int hidpp20_query_battery_voltage_info(struct hidpp_device *hidpp)
{
        u8 feature_type;
        int ret;
        int status, voltage, level, charge_type;

        if (hidpp->battery.voltage_feature_index == 0xff) {
                ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_BATTERY_VOLTAGE,
                                             &hidpp->battery.voltage_feature_index,
                                             &feature_type);
                if (ret)
                        return ret;
        }

        ret = hidpp20_battery_get_battery_voltage(hidpp,
                                                  hidpp->battery.voltage_feature_index,
                                                  &status, &voltage, &level, &charge_type);

        if (ret)
                return ret;

        hidpp->battery.status = status;
        hidpp->battery.voltage = voltage;
        hidpp->battery.capacity = hidpp20_map_battery_capacity(hidpp->hid_dev,
                                                               voltage);
        hidpp->battery.level = level;
        hidpp->battery.charge_type = charge_type;
        hidpp->battery.online = status != POWER_SUPPLY_STATUS_NOT_CHARGING;

        return 0;
}

static int hidpp20_battery_voltage_event(struct hidpp_device *hidpp,
                                            u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        int status, voltage, level, charge_type;

        if (report->fap.feature_index != hidpp->battery.voltage_feature_index ||
                report->fap.funcindex_clientid != EVENT_BATTERY_VOLTAGE_STATUS_BROADCAST)
                return 0;

        status = hidpp20_battery_map_status_voltage(report->fap.params, &voltage,
                                                    &level, &charge_type);

        hidpp->battery.online = status != POWER_SUPPLY_STATUS_NOT_CHARGING;

        if (voltage != hidpp->battery.voltage || status != hidpp->battery.status) {
                hidpp->battery.voltage = voltage;
                hidpp->battery.capacity = hidpp20_map_battery_capacity(hidpp->hid_dev,
                                                                       voltage);
                hidpp->battery.status = status;
                hidpp->battery.level = level;
                hidpp->battery.charge_type = charge_type;
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
        }
        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x1004: Unified battery                                                    */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_UNIFIED_BATTERY                                0x1004

#define CMD_UNIFIED_BATTERY_GET_CAPABILITIES                        0x00
#define CMD_UNIFIED_BATTERY_GET_STATUS                                0x10

#define EVENT_UNIFIED_BATTERY_STATUS_EVENT                        0x00

#define FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL                        BIT(0)
#define FLAG_UNIFIED_BATTERY_LEVEL_LOW                                BIT(1)
#define FLAG_UNIFIED_BATTERY_LEVEL_GOOD                                BIT(2)
#define FLAG_UNIFIED_BATTERY_LEVEL_FULL                                BIT(3)

#define FLAG_UNIFIED_BATTERY_FLAGS_RECHARGEABLE                        BIT(0)
#define FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE                BIT(1)

static int hidpp20_unifiedbattery_get_capabilities(struct hidpp_device *hidpp,
                                                   u8 feature_index)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS ||
            hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) {
                /* we have already set the device capabilities, so let's skip */
                return 0;
        }

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_UNIFIED_BATTERY_GET_CAPABILITIES,
                                          NULL, 0, &response);
        /* Ignore these intermittent errors */
        if (ret == HIDPP_ERROR_RESOURCE_ERROR)
                return -EIO;
        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        /*
         * If the device supports state of charge (battery percentage) we won't
         * export the battery level information. there are 4 possible battery
         * levels and they all are optional, this means that the device might
         * not support any of them, we are just better off with the battery
         * percentage.
         */
        if (params[1] & FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE) {
                hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_PERCENTAGE;
                hidpp->battery.supported_levels_1004 = 0;
        } else {
                hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS;
                hidpp->battery.supported_levels_1004 = params[0];
        }

        return 0;
}

static int hidpp20_unifiedbattery_map_status(struct hidpp_device *hidpp,
                                             u8 charging_status,
                                             u8 external_power_status)
{
        int status;

        switch (charging_status) {
                case 0: /* discharging */
                        status = POWER_SUPPLY_STATUS_DISCHARGING;
                        break;
                case 1: /* charging */
                case 2: /* charging slow */
                        status = POWER_SUPPLY_STATUS_CHARGING;
                        break;
                case 3: /* complete */
                        status = POWER_SUPPLY_STATUS_FULL;
                        break;
                case 4: /* error */
                        status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                        hid_info(hidpp->hid_dev, "%s: charging error",
                                 hidpp->name);
                        break;
                default:
                        status = POWER_SUPPLY_STATUS_NOT_CHARGING;
                        break;
        }

        return status;
}

static int hidpp20_unifiedbattery_map_level(struct hidpp_device *hidpp,
                                            u8 battery_level)
{
        /* cler unsupported level bits */
        battery_level &= hidpp->battery.supported_levels_1004;

        if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_FULL)
                return POWER_SUPPLY_CAPACITY_LEVEL_FULL;
        else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_GOOD)
                return POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
        else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_LOW)
                return POWER_SUPPLY_CAPACITY_LEVEL_LOW;
        else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL)
                return POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;

        return POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
}

static int hidpp20_unifiedbattery_get_status(struct hidpp_device *hidpp,
                                             u8 feature_index,
                                             u8 *state_of_charge,
                                             int *status,
                                             int *level)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_UNIFIED_BATTERY_GET_STATUS,
                                          NULL, 0, &response);
        /* Ignore these intermittent errors */
        if (ret == HIDPP_ERROR_RESOURCE_ERROR)
                return -EIO;
        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        *state_of_charge = params[0];
        *status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]);
        *level = hidpp20_unifiedbattery_map_level(hidpp, params[1]);

        return 0;
}

static int hidpp20_query_battery_info_1004(struct hidpp_device *hidpp)
{
        u8 feature_type;
        int ret;
        u8 state_of_charge;
        int status, level;

        if (hidpp->battery.feature_index == 0xff) {
                ret = hidpp_root_get_feature(hidpp,
                                             HIDPP_PAGE_UNIFIED_BATTERY,
                                             &hidpp->battery.feature_index,
                                             &feature_type);
                if (ret)
                        return ret;
        }

        ret = hidpp20_unifiedbattery_get_capabilities(hidpp,
                                        hidpp->battery.feature_index);
        if (ret)
                return ret;

        ret = hidpp20_unifiedbattery_get_status(hidpp,
                                                hidpp->battery.feature_index,
                                                &state_of_charge,
                                                &status,
                                                &level);
        if (ret)
                return ret;

        hidpp->capabilities |= HIDPP_CAPABILITY_UNIFIED_BATTERY;
        hidpp->battery.capacity = state_of_charge;
        hidpp->battery.status = status;
        hidpp->battery.level = level;
        hidpp->battery.online = true;

        return 0;
}

static int hidpp20_battery_event_1004(struct hidpp_device *hidpp,
                                 u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        u8 *params = (u8 *)report->fap.params;
        int state_of_charge, status, level;
        bool changed;

        if (report->fap.feature_index != hidpp->battery.feature_index ||
            report->fap.funcindex_clientid != EVENT_UNIFIED_BATTERY_STATUS_EVENT)
                return 0;

        state_of_charge = params[0];
        status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]);
        level = hidpp20_unifiedbattery_map_level(hidpp, params[1]);

        changed = status != hidpp->battery.status ||
                  (state_of_charge != hidpp->battery.capacity &&
                   hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) ||
                  (level != hidpp->battery.level &&
                   hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS);

        if (changed) {
                hidpp->battery.capacity = state_of_charge;
                hidpp->battery.status = status;
                hidpp->battery.level = level;
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
        }

        return 0;
}

/* -------------------------------------------------------------------------- */
/* Battery feature helpers                                                    */
/* -------------------------------------------------------------------------- */

static enum power_supply_property hidpp_battery_props[] = {
        POWER_SUPPLY_PROP_ONLINE,
        POWER_SUPPLY_PROP_STATUS,
        POWER_SUPPLY_PROP_SCOPE,
        POWER_SUPPLY_PROP_MODEL_NAME,
        POWER_SUPPLY_PROP_MANUFACTURER,
        POWER_SUPPLY_PROP_SERIAL_NUMBER,
        0, /* placeholder for POWER_SUPPLY_PROP_CAPACITY, */
        0, /* placeholder for POWER_SUPPLY_PROP_CAPACITY_LEVEL, */
        0, /* placeholder for POWER_SUPPLY_PROP_VOLTAGE_NOW, */
};

static int hidpp_battery_get_property(struct power_supply *psy,
                                      enum power_supply_property psp,
                                      union power_supply_propval *val)
{
        struct hidpp_device *hidpp = power_supply_get_drvdata(psy);
        int ret = 0;

        switch(psp) {
                case POWER_SUPPLY_PROP_STATUS:
                        val->intval = hidpp->battery.status;
                        break;
                case POWER_SUPPLY_PROP_CAPACITY:
                        val->intval = hidpp->battery.capacity;
                        break;
                case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
                        val->intval = hidpp->battery.level;
                        break;
                case POWER_SUPPLY_PROP_SCOPE:
                        val->intval = POWER_SUPPLY_SCOPE_DEVICE;
                        break;
                case POWER_SUPPLY_PROP_ONLINE:
                        val->intval = hidpp->battery.online;
                        break;
                case POWER_SUPPLY_PROP_MODEL_NAME:
                        if (!strncmp(hidpp->name, "Logitech ", 9))
                                val->strval = hidpp->name + 9;
                        else
                                val->strval = hidpp->name;
                        break;
                case POWER_SUPPLY_PROP_MANUFACTURER:
                        val->strval = "Logitech";
                        break;
                case POWER_SUPPLY_PROP_SERIAL_NUMBER:
                        val->strval = hidpp->hid_dev->uniq;
                        break;
                case POWER_SUPPLY_PROP_VOLTAGE_NOW:
                        /* hardware reports voltage in mV. sysfs expects uV */
                        val->intval = hidpp->battery.voltage * 1000;
                        break;
                case POWER_SUPPLY_PROP_CHARGE_TYPE:
                        val->intval = hidpp->battery.charge_type;
                        break;
                default:
                        ret = -EINVAL;
                        break;
        }

        return ret;
}

/* -------------------------------------------------------------------------- */
/* 0x1d4b: Wireless device status                                             */
/* -------------------------------------------------------------------------- */
#define HIDPP_PAGE_WIRELESS_DEVICE_STATUS                        0x1d4b

static int hidpp_get_wireless_feature_index(struct hidpp_device *hidpp, u8 *feature_index)
{
        u8 feature_type;
        int ret;

        ret = hidpp_root_get_feature(hidpp,
                                     HIDPP_PAGE_WIRELESS_DEVICE_STATUS,
                                     feature_index, &feature_type);

        return ret;
}

/* -------------------------------------------------------------------------- */
/* 0x1f20: ADC measurement                                                    */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_ADC_MEASUREMENT 0x1f20

#define CMD_ADC_MEASUREMENT_GET_ADC_MEASUREMENT 0x00

#define EVENT_ADC_MEASUREMENT_STATUS_BROADCAST 0x00

static int hidpp20_map_adc_measurement_1f20_capacity(struct hid_device *hid_dev, int voltage)
{
        /* NB: This voltage curve doesn't necessarily map perfectly to all
         * devices that implement the ADC_MEASUREMENT feature. This is because
         * there are a few devices that use different battery technology.
         *
         * Adapted from:
         * https://github.com/Sapd/HeadsetControl/blob/acd972be0468e039b93aae81221f20a54d2d60f7/src/devices/logitech_g633_g933_935.c#L44-L52
         */
        static const int voltages[100] = {
                4030, 4024, 4018, 4011, 4003, 3994, 3985, 3975, 3963, 3951,
                3937, 3922, 3907, 3893, 3880, 3868, 3857, 3846, 3837, 3828,
                3820, 3812, 3805, 3798, 3791, 3785, 3779, 3773, 3768, 3762,
                3757, 3752, 3747, 3742, 3738, 3733, 3729, 3724, 3720, 3716,
                3712, 3708, 3704, 3700, 3696, 3692, 3688, 3685, 3681, 3677,
                3674, 3670, 3667, 3663, 3660, 3657, 3653, 3650, 3646, 3643,
                3640, 3637, 3633, 3630, 3627, 3624, 3620, 3617, 3614, 3611,
                3608, 3604, 3601, 3598, 3595, 3592, 3589, 3585, 3582, 3579,
                3576, 3573, 3569, 3566, 3563, 3560, 3556, 3553, 3550, 3546,
                3543, 3539, 3536, 3532, 3529, 3525, 3499, 3466, 3433, 3399,
        };

        int i;

        if (voltage == 0)
                return 0;

        if (unlikely(voltage < 3400 || voltage >= 5000))
                hid_warn_once(hid_dev,
                              "%s: possibly using the wrong voltage curve\n",
                              __func__);

        for (i = 0; i < ARRAY_SIZE(voltages); i++) {
                if (voltage >= voltages[i])
                        return ARRAY_SIZE(voltages) - i;
        }

        return 0;
}

static int hidpp20_map_adc_measurement_1f20(u8 data[3], int *voltage)
{
        int status;
        u8 flags;

        flags = data[2];

        switch (flags) {
        case 0x01:
                status = POWER_SUPPLY_STATUS_DISCHARGING;
                break;
        case 0x03:
                status = POWER_SUPPLY_STATUS_CHARGING;
                break;
        case 0x07:
                status = POWER_SUPPLY_STATUS_FULL;
                break;
        case 0x0F:
        default:
                status = POWER_SUPPLY_STATUS_UNKNOWN;
                break;
        }

        *voltage = get_unaligned_be16(data);

        dbg_hid("Parsed 1f20 data as flag 0x%02x voltage %dmV\n",
                flags, *voltage);

        return status;
}

/* Return value is whether the device is online */
static bool hidpp20_get_adc_measurement_1f20(struct hidpp_device *hidpp,
                                                 u8 feature_index,
                                                 int *status, int *voltage)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        *status = POWER_SUPPLY_STATUS_UNKNOWN;
        *voltage = 0;
        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_ADC_MEASUREMENT_GET_ADC_MEASUREMENT,
                                          NULL, 0, &response);

        if (ret > 0) {
                hid_dbg(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return false;
        }

        *status = hidpp20_map_adc_measurement_1f20(params, voltage);
        return true;
}

static int hidpp20_query_adc_measurement_info_1f20(struct hidpp_device *hidpp)
{
        u8 feature_type;

        if (hidpp->battery.adc_measurement_feature_index == 0xff) {
                int ret;

                ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_ADC_MEASUREMENT,
                                             &hidpp->battery.adc_measurement_feature_index,
                                             &feature_type);
                if (ret)
                        return ret;

                hidpp->capabilities |= HIDPP_CAPABILITY_ADC_MEASUREMENT;
        }

        hidpp->battery.online = hidpp20_get_adc_measurement_1f20(hidpp,
                                                                 hidpp->battery.adc_measurement_feature_index,
                                                                 &hidpp->battery.status,
                                                                 &hidpp->battery.voltage);
        hidpp->battery.capacity = hidpp20_map_adc_measurement_1f20_capacity(hidpp->hid_dev,
                                                                            hidpp->battery.voltage);
        hidpp_update_usb_wireless_status(hidpp);

        return 0;
}

static int hidpp20_adc_measurement_event_1f20(struct hidpp_device *hidpp,
                                            u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        int status, voltage;

        if (report->fap.feature_index != hidpp->battery.adc_measurement_feature_index ||
                report->fap.funcindex_clientid != EVENT_ADC_MEASUREMENT_STATUS_BROADCAST)
                return 0;

        status = hidpp20_map_adc_measurement_1f20(report->fap.params, &voltage);

        hidpp->battery.online = status != POWER_SUPPLY_STATUS_UNKNOWN;

        if (voltage != hidpp->battery.voltage || status != hidpp->battery.status) {
                hidpp->battery.status = status;
                hidpp->battery.voltage = voltage;
                hidpp->battery.capacity = hidpp20_map_adc_measurement_1f20_capacity(hidpp->hid_dev, voltage);
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
                hidpp_update_usb_wireless_status(hidpp);
        }
        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x2120: Hi-resolution scrolling                                            */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_HI_RESOLUTION_SCROLLING                        0x2120

#define CMD_HI_RESOLUTION_SCROLLING_SET_HIGHRES_SCROLLING_MODE        0x10

static int hidpp_hrs_set_highres_scrolling_mode(struct hidpp_device *hidpp,
        bool enabled, u8 *multiplier)
{
        u8 feature_index;
        u8 feature_type;
        int ret;
        u8 params[1];
        struct hidpp_report response;

        ret = hidpp_root_get_feature(hidpp,
                                     HIDPP_PAGE_HI_RESOLUTION_SCROLLING,
                                     &feature_index,
                                     &feature_type);
        if (ret)
                return ret;

        params[0] = enabled ? BIT(0) : 0;
        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_HI_RESOLUTION_SCROLLING_SET_HIGHRES_SCROLLING_MODE,
                                          params, sizeof(params), &response);
        if (ret)
                return ret;
        *multiplier = response.fap.params[1];
        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x2121: HiRes Wheel                                                        */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_HIRES_WHEEL                0x2121

#define CMD_HIRES_WHEEL_GET_WHEEL_CAPABILITY        0x00
#define CMD_HIRES_WHEEL_SET_WHEEL_MODE                0x20

static int hidpp_hrw_get_wheel_capability(struct hidpp_device *hidpp,
        u8 *multiplier)
{
        u8 feature_index;
        u8 feature_type;
        int ret;
        struct hidpp_report response;

        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HIRES_WHEEL,
                                     &feature_index, &feature_type);
        if (ret)
                goto return_default;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                                          CMD_HIRES_WHEEL_GET_WHEEL_CAPABILITY,
                                          NULL, 0, &response);
        if (ret)
                goto return_default;

        *multiplier = response.fap.params[0];
        return 0;
return_default:
        hid_warn(hidpp->hid_dev,
                 "Couldn't get wheel multiplier (error %d)\n", ret);
        return ret;
}

static int hidpp_hrw_set_wheel_mode(struct hidpp_device *hidpp, bool invert,
        bool high_resolution, bool use_hidpp)
{
        u8 feature_index;
        u8 feature_type;
        int ret;
        u8 params[1];
        struct hidpp_report response;

        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HIRES_WHEEL,
                                     &feature_index, &feature_type);
        if (ret)
                return ret;

        params[0] = (invert          ? BIT(2) : 0) |
                    (high_resolution ? BIT(1) : 0) |
                    (use_hidpp       ? BIT(0) : 0);

        return hidpp_send_fap_command_sync(hidpp, feature_index,
                                           CMD_HIRES_WHEEL_SET_WHEEL_MODE,
                                           params, sizeof(params), &response);
}

/* -------------------------------------------------------------------------- */
/* 0x4301: Solar Keyboard                                                     */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_SOLAR_KEYBOARD                        0x4301

#define CMD_SOLAR_SET_LIGHT_MEASURE                        0x00

#define EVENT_SOLAR_BATTERY_BROADCAST                        0x00
#define EVENT_SOLAR_BATTERY_LIGHT_MEASURE                0x10
#define EVENT_SOLAR_CHECK_LIGHT_BUTTON                        0x20

static int hidpp_solar_request_battery_event(struct hidpp_device *hidpp)
{
        struct hidpp_report response;
        u8 params[2] = { 1, 1 };
        u8 feature_type;
        int ret;

        if (hidpp->battery.feature_index == 0xff) {
                ret = hidpp_root_get_feature(hidpp,
                                             HIDPP_PAGE_SOLAR_KEYBOARD,
                                             &hidpp->battery.solar_feature_index,
                                             &feature_type);
                if (ret)
                        return ret;
        }

        ret = hidpp_send_fap_command_sync(hidpp,
                                          hidpp->battery.solar_feature_index,
                                          CMD_SOLAR_SET_LIGHT_MEASURE,
                                          params, 2, &response);
        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_MILEAGE;

        return 0;
}

static int hidpp_solar_battery_event(struct hidpp_device *hidpp,
                                     u8 *data, int size)
{
        struct hidpp_report *report = (struct hidpp_report *)data;
        int capacity, lux, status;
        u8 function;

        function = report->fap.funcindex_clientid;


        if (report->fap.feature_index != hidpp->battery.solar_feature_index ||
            !(function == EVENT_SOLAR_BATTERY_BROADCAST ||
              function == EVENT_SOLAR_BATTERY_LIGHT_MEASURE ||
              function == EVENT_SOLAR_CHECK_LIGHT_BUTTON))
                return 0;

        capacity = report->fap.params[0];

        switch (function) {
        case EVENT_SOLAR_BATTERY_LIGHT_MEASURE:
                lux = (report->fap.params[1] << 8) | report->fap.params[2];
                if (lux > 200)
                        status = POWER_SUPPLY_STATUS_CHARGING;
                else
                        status = POWER_SUPPLY_STATUS_DISCHARGING;
                break;
        case EVENT_SOLAR_CHECK_LIGHT_BUTTON:
        default:
                if (capacity < hidpp->battery.capacity)
                        status = POWER_SUPPLY_STATUS_DISCHARGING;
                else
                        status = POWER_SUPPLY_STATUS_CHARGING;

        }

        if (capacity == 100)
                status = POWER_SUPPLY_STATUS_FULL;

        hidpp->battery.online = true;
        if (capacity != hidpp->battery.capacity ||
            status != hidpp->battery.status) {
                hidpp->battery.capacity = capacity;
                hidpp->battery.status = status;
                if (hidpp->battery.ps)
                        power_supply_changed(hidpp->battery.ps);
        }

        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x6010: Touchpad FW items                                                  */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_TOUCHPAD_FW_ITEMS                        0x6010

#define CMD_TOUCHPAD_FW_ITEMS_SET                        0x10

struct hidpp_touchpad_fw_items {
        uint8_t presence;
        uint8_t desired_state;
        uint8_t state;
        uint8_t persistent;
};

/*
 * send a set state command to the device by reading the current items->state
 * field. items is then filled with the current state.
 */
static int hidpp_touchpad_fw_items_set(struct hidpp_device *hidpp,
                                       u8 feature_index,
                                       struct hidpp_touchpad_fw_items *items)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                CMD_TOUCHPAD_FW_ITEMS_SET, &items->state, 1, &response);

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        items->presence = params[0];
        items->desired_state = params[1];
        items->state = params[2];
        items->persistent = params[3];

        return 0;
}

/* -------------------------------------------------------------------------- */
/* 0x6100: TouchPadRawXY                                                      */
/* -------------------------------------------------------------------------- */

#define HIDPP_PAGE_TOUCHPAD_RAW_XY                        0x6100

#define CMD_TOUCHPAD_GET_RAW_INFO                        0x00
#define CMD_TOUCHPAD_SET_RAW_REPORT_STATE                0x20

#define EVENT_TOUCHPAD_RAW_XY                                0x00

#define TOUCHPAD_RAW_XY_ORIGIN_LOWER_LEFT                0x01
#define TOUCHPAD_RAW_XY_ORIGIN_UPPER_LEFT                0x03

struct hidpp_touchpad_raw_info {
        u16 x_size;
        u16 y_size;
        u8 z_range;
        u8 area_range;
        u8 timestamp_unit;
        u8 maxcontacts;
        u8 origin;
        u16 res;
};

struct hidpp_touchpad_raw_xy_finger {
        u8 contact_type;
        u8 contact_status;
        u16 x;
        u16 y;
        u8 z;
        u8 area;
        u8 finger_id;
};

struct hidpp_touchpad_raw_xy {
        u16 timestamp;
        struct hidpp_touchpad_raw_xy_finger fingers[2];
        u8 spurious_flag;
        u8 end_of_frame;
        u8 finger_count;
        u8 button;
};

static int hidpp_touchpad_get_raw_info(struct hidpp_device *hidpp,
        u8 feature_index, struct hidpp_touchpad_raw_info *raw_info)
{
        struct hidpp_report response;
        int ret;
        u8 *params = (u8 *)response.fap.params;

        ret = hidpp_send_fap_command_sync(hidpp, feature_index,
                CMD_TOUCHPAD_GET_RAW_INFO, NULL, 0, &response);

        if (ret > 0) {
                hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
                        __func__, ret);
                return -EPROTO;
        }
        if (ret)
                return ret;

        raw_info->x_size = get_unaligned_be16(&params[0]);
        raw_info->y_size = get_unaligned_be16(&params[2]);
        raw_info->z_range = params[4];
        raw_info->area_range = params[5];
        raw_info->maxcontacts = params[7];
        raw_info->origin = params[8];
        /* res is given in unit per inch */
        raw_info->res = get_unaligned_be16(&params[13]) * 2 / 51;

        return ret;
}

static int hidpp_touchpad_set_raw_report_state(struct hidpp_device *hidpp_dev,
                u8 feature_index, bool send_raw_reports,
                bool sensor_enhanced_settings)
{
        struct hidpp_report response;

        /*
         * Params:
         *   bit 0 - enable raw
         *   bit 1 - 16bit Z, no area
         *   bit 2 - enhanced sensitivity
         *   bit 3 - width, height (4 bits each) instead of area
         *   bit 4 - send raw + gestures (degrades smoothness)
         *   remaining bits - reserved
         */
        u8 params = send_raw_reports | (sensor_enhanced_settings << 2);

        return hidpp_send_fap_command_sync(hidpp_dev, feature_index,
                CMD_TOUCHPAD_SET_RAW_REPORT_STATE, &params, 1, &response);
}

static void hidpp_touchpad_touch_event(u8 *data,
        struct hidpp_touchpad_raw_xy_finger *finger)
{
        u8 x_m = data[0] << 2;
        u8 y_m = data[2] << 2;

        finger->x = x_m << 6 | data[1];
        finger->y = y_m << 6 | data[3];

        finger->contact_type = data[0] >> 6;
        finger->contact_status = data[2] >> 6;

        finger->z = data[4];
        finger->area = data[5];
        finger->finger_id = data[6] >> 4;
}

static void hidpp_touchpad_raw_xy_event(struct hidpp_device *hidpp_dev,
                u8 *data, struct hidpp_touchpad_raw_xy *raw_xy)
{
        memset(raw_xy, 0, sizeof(struct hidpp_touchpad_raw_xy));
        raw_xy->end_of_frame = data[8] & 0x01;
        raw_xy->spurious_flag = (data[8] >> 1) & 0x01;
        raw_xy->finger_count = data[15] & 0x0f;
        raw_xy->button = (data[8] >> 2) & 0x01;

        if (raw_xy->finger_count) {
                hidpp_touchpad_touch_event(&data[2], &raw_xy->fingers[0]);
                hidpp_touchpad_touch_event(&data[9], &raw_xy->fingers[1]);
        }
}

/* -------------------------------------------------------------------------- */
/* 0x8123: Force feedback support                                             */
/* -------------------------------------------------------------------------- */

#define HIDPP_FF_GET_INFO                0x01
#define HIDPP_FF_RESET_ALL                0x11
#define HIDPP_FF_DOWNLOAD_EFFECT        0x21
#define HIDPP_FF_SET_EFFECT_STATE        0x31
#define HIDPP_FF_DESTROY_EFFECT                0x41
#define HIDPP_FF_GET_APERTURE                0x51
#define HIDPP_FF_SET_APERTURE                0x61
#define HIDPP_FF_GET_GLOBAL_GAINS        0x71
#define HIDPP_FF_SET_GLOBAL_GAINS        0x81

#define HIDPP_FF_EFFECT_STATE_GET        0x00
#define HIDPP_FF_EFFECT_STATE_STOP        0x01
#define HIDPP_FF_EFFECT_STATE_PLAY        0x02
#define HIDPP_FF_EFFECT_STATE_PAUSE        0x03

#define HIDPP_FF_EFFECT_CONSTANT        0x00
#define HIDPP_FF_EFFECT_PERIODIC_SINE                0x01
#define HIDPP_FF_EFFECT_PERIODIC_SQUARE                0x02
#define HIDPP_FF_EFFECT_PERIODIC_TRIANGLE        0x03
#define HIDPP_FF_EFFECT_PERIODIC_SAWTOOTHUP        0x04
#define HIDPP_FF_EFFECT_PERIODIC_SAWTOOTHDOWN        0x05
#define HIDPP_FF_EFFECT_SPRING                0x06
#define HIDPP_FF_EFFECT_DAMPER                0x07
#define HIDPP_FF_EFFECT_FRICTION        0x08
#define HIDPP_FF_EFFECT_INERTIA                0x09
#define HIDPP_FF_EFFECT_RAMP                0x0A

#define HIDPP_FF_EFFECT_AUTOSTART        0x80

#define HIDPP_FF_EFFECTID_NONE                -1
#define HIDPP_FF_EFFECTID_AUTOCENTER        -2
#define HIDPP_AUTOCENTER_PARAMS_LENGTH        18

#define HIDPP_FF_MAX_PARAMS        20
#define HIDPP_FF_RESERVED_SLOTS        1

struct hidpp_ff_private_data {
        struct hidpp_device *hidpp;
        u8 feature_index;
        u8 version;
        u16 gain;
        s16 range;
        u8 slot_autocenter;
        u8 num_effects;
        int *effect_ids;
        struct workqueue_struct *wq;
        atomic_t workqueue_size;
};

struct hidpp_ff_work_data {
        struct work_struct work;
        struct hidpp_ff_private_data *data;
        int effect_id;
        u8 command;
        u8 params[HIDPP_FF_MAX_PARAMS];
        u8 size;
};

static const signed short hidpp_ff_effects[] = {
        FF_CONSTANT,
        FF_PERIODIC,
        FF_SINE,
        FF_SQUARE,
        FF_SAW_UP,
        FF_SAW_DOWN,
        FF_TRIANGLE,
        FF_SPRING,
        FF_DAMPER,
        FF_AUTOCENTER,
        FF_GAIN,
        -1
};

static const signed short hidpp_ff_effects_v2[] = {
        FF_RAMP,
        FF_FRICTION,
        FF_INERTIA,
        -1
};

static const u8 HIDPP_FF_CONDITION_CMDS[] = {
        HIDPP_FF_EFFECT_SPRING,
        HIDPP_FF_EFFECT_FRICTION,
        HIDPP_FF_EFFECT_DAMPER,
        HIDPP_FF_EFFECT_INERTIA
};

static const char *HIDPP_FF_CONDITION_NAMES[] = {
        "spring",
        "friction",
        "damper",
        "inertia"
};


static u8 hidpp_ff_find_effect(struct hidpp_ff_private_data *data, int effect_id)
{
        int i;

        for (i = 0; i < data->num_effects; i++)
                if (data->effect_ids[i] == effect_id)
                        return i+1;

        return 0;
}

static void hidpp_ff_work_handler(struct work_struct *w)
{
        struct hidpp_ff_work_data *wd = container_of(w, struct hidpp_ff_work_data, work);
        struct hidpp_ff_private_data *data = wd->data;
        struct hidpp_report response;
        u8 slot;
        int ret;

        /* add slot number if needed */
        switch (wd->effect_id) {
        case HIDPP_FF_EFFECTID_AUTOCENTER:
                wd->params[0] = data->slot_autocenter;
                break;
        case HIDPP_FF_EFFECTID_NONE:
                /* leave slot as zero */
                break;
        default:
                /* find current slot for effect */
                wd->params[0] = hidpp_ff_find_effect(data, wd->effect_id);
                break;
        }

        /* send command and wait for reply */
        ret = hidpp_send_fap_command_sync(data->hidpp, data->feature_index,
                wd->command, wd->params, wd->size, &response);

        if (ret) {
                hid_err(data->hidpp->hid_dev, "Failed to send command to device!\n");
                goto out;
        }

        /* parse return data */
        switch (wd->command) {
        case HIDPP_FF_DOWNLOAD_EFFECT:
                slot = response.fap.params[0];
                if (slot > 0 && slot <= data->num_effects) {
                        if (wd->effect_id >= 0)
                                /* regular effect uploaded */
                                data->effect_ids[slot-1] = wd->effect_id;
                        else if (wd->effect_id >= HIDPP_FF_EFFECTID_AUTOCENTER)
                                /* autocenter spring uploaded */
                                data->slot_autocenter = slot;
                }
                break;
        case HIDPP_FF_DESTROY_EFFECT:
                if (wd->effect_id >= 0)
                        /* regular effect destroyed */
                        data->effect_ids[wd->params[0]-1] = -1;
                else if (wd->effect_id >= HIDPP_FF_EFFECTID_AUTOCENTER)
                        /* autocenter spring destoyed */
                        data->slot_autocenter = 0;
                break;
        case HIDPP_FF_SET_GLOBAL_GAINS:
                data->gain = (wd->params[0] << 8) + wd->params[1];
                break;
        case HIDPP_FF_SET_APERTURE:
                data->range = (wd->params[0] << 8) + wd->params[1];
                break;
        default:
                /* no action needed */
                break;
        }

out:
        atomic_dec(&data->workqueue_size);
        kfree(wd);
}

static int hidpp_ff_queue_work(struct hidpp_ff_private_data *data, int effect_id, u8 command, u8 *params, u8 size)
{
        struct hidpp_ff_work_data *wd = kzalloc(sizeof(*wd), GFP_KERNEL);
        int s;

        if (!wd)
                return -ENOMEM;

        INIT_WORK(&wd->work, hidpp_ff_work_handler);

        wd->data = data;
        wd->effect_id = effect_id;
        wd->command = command;
        wd->size = size;
        memcpy(wd->params, params, size);

        s = atomic_inc_return(&data->workqueue_size);
        queue_work(data->wq, &wd->work);

        /* warn about excessive queue size */
        if (s >= 20 && s % 20 == 0)
                hid_warn(data->hidpp->hid_dev, "Force feedback command queue contains %d commands, causing substantial delays!", s);

        return 0;
}

static int hidpp_ff_upload_effect(struct input_dev *dev, struct ff_effect *effect, struct ff_effect *old)
{
        struct hidpp_ff_private_data *data = dev->ff->private;
        u8 params[20];
        u8 size;
        int force;

        /* set common parameters */
        params[2] = effect->replay.length >> 8;
        params[3] = effect->replay.length & 255;
        params[4] = effect->replay.delay >> 8;
        params[5] = effect->replay.delay & 255;

        switch (effect->type) {
        case FF_CONSTANT:
                force = (effect->u.constant.level * fixp_sin16((effect->direction * 360) >> 16)) >> 15;
                params[1] = HIDPP_FF_EFFECT_CONSTANT;
                params[6] = force >> 8;
                params[7] = force & 255;
                params[8] = effect->u.constant.envelope.attack_level >> 7;
                params[9] = effect->u.constant.envelope.attack_length >> 8;
                params[10] = effect->u.constant.envelope.attack_length & 255;
                params[11] = effect->u.constant.envelope.fade_level >> 7;
                params[12] = effect->u.constant.envelope.fade_length >> 8;
                params[13] = effect->u.constant.envelope.fade_length & 255;
                size = 14;
                dbg_hid("Uploading constant force level=%d in dir %d = %d\n",
                                effect->u.constant.level,
                                effect->direction, force);
                dbg_hid("          envelope attack=(%d, %d ms) fade=(%d, %d ms)\n",
                                effect->u.constant.envelope.attack_level,
                                effect->u.constant.envelope.attack_length,
                                effect->u.constant.envelope.fade_level,
                                effect->u.constant.envelope.fade_length);
                break;
        case FF_PERIODIC:
        {
                switch (effect->u.periodic.waveform) {
                case FF_SINE:
                        params[1] = HIDPP_FF_EFFECT_PERIODIC_SINE;
                        break;
                case FF_SQUARE:
                        params[1] = HIDPP_FF_EFFECT_PERIODIC_SQUARE;
                        break;
                case FF_SAW_UP:
                        params[1] = HIDPP_FF_EFFECT_PERIODIC_SAWTOOTHUP;
                        break;
                case FF_SAW_DOWN:
                        params[1] = HIDPP_FF_EFFECT_PERIODIC_SAWTOOTHDOWN;
                        break;
                case FF_TRIANGLE:
                        params[1] = HIDPP_FF_EFFECT_PERIODIC_TRIANGLE;
                        break;
                default:
                        hid_err(data->hidpp->hid_dev, "Unexpected periodic waveform type %i!\n", effect->u.periodic.waveform);
                        return -EINVAL;
                }
                force = (effect->u.periodic.magnitude * fixp_sin16((effect->direction * 360) >> 16)) >> 15;
                params[6] = effect->u.periodic.magnitude >> 8;
                params[7] = effect->u.periodic.magnitude & 255;
                params[8] = effect->u.periodic.offset >> 8;
                params[9] = effect->u.periodic.offset & 255;
                params[10] = effect->u.periodic.period >> 8;
                params[11] = effect->u.periodic.period & 255;
                params[12] = effect->u.periodic.phase >> 8;
                params[13] = effect->u.periodic.phase & 255;
                params[14] = effect->u.periodic.envelope.attack_level >> 7;
                params[15] = effect->u.periodic.envelope.attack_length >> 8;
                params[16] = effect->u.periodic.envelope.attack_length & 255;
                params[17] = effect->u.periodic.envelope.fade_level >> 7;
                params[18] = effect->u.periodic.envelope.fade_length >> 8;
                params[19] = effect->u.periodic.envelope.fade_length & 255;
                size = 20;
                dbg_hid("Uploading periodic force mag=%d/dir=%d, offset=%d, period=%d ms, phase=%d\n",
                                effect->u.periodic.magnitude, effect->direction,
                                effect->u.periodic.offset,
                                effect->u.periodic.period,
                                effect->u.periodic.phase);
                dbg_hid("          envelope attack=(%d, %d ms) fade=(%d, %d ms)\n",
                                effect->u.periodic.envelope.attack_level,
                                effect->u.periodic.envelope.attack_length,
                                effect->u.periodic.envelope.fade_level,
                                effect->u.periodic.envelope.fade_length);
                break;
        }
        case FF_RAMP:
                params[1] = HIDPP_FF_EFFECT_RAMP;
                force = (effect->u.ramp.start_level * fixp_sin16((effect->direction * 360) >> 16)) >> 15;
                params[6] = force >> 8;
                params[7] = force & 255;
                force = (effect->u.ramp.end_level * fixp_sin16((effect->direction * 360) >> 16)) >> 15;
                params[8] = force >> 8;
                params[9] = force & 255;
                params[10] = effect->u.ramp.envelope.attack_level >> 7;
                params[11] = effect->u.ramp.envelope.attack_length >> 8;
                params[12] = effect->u.ramp.envelope.attack_length & 255;
                params[13] = effect->u.ramp.envelope.fade_level >> 7;
                params[14] = effect->u.ramp.envelope.fade_length >> 8;
                params[15] = effect->u.ramp.envelope.fade_length & 255;
                size = 16;
                dbg_hid("Uploading ramp force level=%d -> %d in dir %d = %d\n",
                                effect->u.ramp.start_level,
                                effect->u.ramp.end_level,
                                effect->direction, force);
                dbg_hid("          envelope attack=(%d, %d ms) fade=(%d, %d ms)\n",
                                effect->u.ramp.envelope.attack_level,
                                effect->u.ramp.envelope.attack_length,
                                effect->u.ramp.envelope.fade_level,
                                effect->u.ramp.envelope.fade_length);
                break;
        case FF_FRICTION:
        case FF_INERTIA:
        case FF_SPRING:
        case FF_DAMPER:
                params[1] = HIDPP_FF_CONDITION_CMDS[effect->type - FF_SPRING];
                params[6] = effect->u.condition[0].left_saturation >> 9;
                params[7] = (effect->u.condition[0].left_saturation >> 1) & 255;
                params[8] = effect->u.condition[0].left_coeff >> 8;
                params[9] = effect->u.condition[0].left_coeff & 255;
                params[10] = effect->u.condition[0].deadband >> 9;
                params[11] = (effect->u.condition[0].deadband >> 1) & 255;
                params[12] = effect->u.condition[0].center >> 8;
                params[13] = effect->u.condition[0].center & 255;
                params[14] = effect->u.condition[0].right_coeff >> 8;
                params[15] = effect->u.condition[0].right_coeff & 255;
                params[16] = effect->u.condition[0].right_saturation >> 9;
                params[17] = (effect->u.condition[0].right_saturation >> 1) & 255;
                size = 18;
                dbg_hid("Uploading %s force left coeff=%d, left sat=%d, right coeff=%d, right sat=%d\n",
                                HIDPP_FF_CONDITION_NAMES[effect->type - FF_SPRING],
                                effect->u.condition[0].left_coeff,
                                effect->u.condition[0].left_saturation,
                                effect->u.condition[0].right_coeff,
                                effect->u.condition[0].right_saturation);
                dbg_hid("          deadband=%d, center=%d\n",
                                effect->u.condition[0].deadband,
                                effect->u.condition[0].center);
                break;
        default:
                hid_err(data->hidpp->hid_dev, "Unexpected force type %i!\n", effect->type);
                return -EINVAL;
        }

        return hidpp_ff_queue_work(data, effect->id, HIDPP_FF_DOWNLOAD_EFFECT, params, size);
}

static int hidpp_ff_playback(struct input_dev *dev, int effect_id, int value)
{
        struct hidpp_ff_private_data *data = dev->ff->private;
        u8 params[2];

        params[1] = value ? HIDPP_FF_EFFECT_STATE_PLAY : HIDPP_FF_EFFECT_STATE_STOP;

        dbg_hid("St%sing playback of effect %d.\n", value?"art":"opp", effect_id);

        return hidpp_ff_queue_work(data, effect_id, HIDPP_FF_SET_EFFECT_STATE, params, ARRAY_SIZE(params));
}

static int hidpp_ff_erase_effect(struct input_dev *dev, int effect_id)
{
        struct hidpp_ff_private_data *data = dev->ff->private;
        u8 slot = 0;

        dbg_hid("Erasing effect %d.\n", effect_id);

        return hidpp_ff_queue_work(data, effect_id, HIDPP_FF_DESTROY_EFFECT, &slot, 1);
}

static void hidpp_ff_set_autocenter(struct input_dev *dev, u16 magnitude)
{
        struct hidpp_ff_private_data *data = dev->ff->private;
        u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH];

        dbg_hid("Setting autocenter to %d.\n", magnitude);

        /* start a standard spring effect */
        params[1] = HIDPP_FF_EFFECT_SPRING | HIDPP_FF_EFFECT_AUTOSTART;
        /* zero delay and duration */
        params[2] = params[3] = params[4] = params[5] = 0;
        /* set coeff to 25% of saturation */
        params[8] = params[14] = magnitude >> 11;
        params[9] = params[15] = (magnitude >> 3) & 255;
        params[6] = params[16] = magnitude >> 9;
        params[7] = params[17] = (magnitude >> 1) & 255;
        /* zero deadband and center */
        params[10] = params[11] = params[12] = params[13] = 0;

        hidpp_ff_queue_work(data, HIDPP_FF_EFFECTID_AUTOCENTER, HIDPP_FF_DOWNLOAD_EFFECT, params, ARRAY_SIZE(params));
}

static void hidpp_ff_set_gain(struct input_dev *dev, u16 gain)
{
        struct hidpp_ff_private_data *data = dev->ff->private;
        u8 params[4];

        dbg_hid("Setting gain to %d.\n", gain);

        params[0] = gain >> 8;
        params[1] = gain & 255;
        params[2] = 0; /* no boost */
        params[3] = 0;

        hidpp_ff_queue_work(data, HIDPP_FF_EFFECTID_NONE, HIDPP_FF_SET_GLOBAL_GAINS, params, ARRAY_SIZE(params));
}

static ssize_t hidpp_ff_range_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct hid_device *hid = to_hid_device(dev);
        struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
        struct input_dev *idev = hidinput->input;
        struct hidpp_ff_private_data *data = idev->ff->private;

        return scnprintf(buf, PAGE_SIZE, "%u\n", data->range);
}

static ssize_t hidpp_ff_range_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
        struct hid_device *hid = to_hid_device(dev);
        struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
        struct input_dev *idev = hidinput->input;
        struct hidpp_ff_private_data *data = idev->ff->private;
        u8 params[2];
        int range = simple_strtoul(buf, NULL, 10);

        range = clamp(range, 180, 900);

        params[0] = range >> 8;
        params[1] = range & 0x00FF;

        hidpp_ff_queue_work(data, -1, HIDPP_FF_SET_APERTURE, params, ARRAY_SIZE(params));

        return count;
}

static DEVICE_ATTR(range, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, hidpp_ff_range_show, hidpp_ff_range_store);

static void hidpp_ff_destroy(struct ff_device *ff)
{
        struct hidpp_ff_private_data *data = ff->private;
        struct hid_device *hid = data->hidpp->hid_dev;

        hid_info(hid, "Unloading HID++ force feedback.\n");

        device_remove_file(&hid->dev, &dev_attr_range);
        destroy_workqueue(data->wq);
        kfree(data->effect_ids);
}

static int hidpp_ff_init(struct hidpp_device *hidpp,
                         struct hidpp_ff_private_data *data)
{
        struct hid_device *hid = hidpp->hid_dev;
        struct hid_input *hidinput;
        struct input_dev *dev;
        struct usb_device_descriptor *udesc;
        u16 bcdDevice;
        struct ff_device *ff;
        int error, j, num_slots = data->num_effects;
        u8 version;

        if (!hid_is_usb(hid)) {
                hid_err(hid, "device is not USB\n");
                return -ENODEV;
        }

        if (list_empty(&hid->inputs)) {
                hid_err(hid, "no inputs found\n");
                return -ENODEV;
        }
        hidinput = list_entry(hid->inputs.next, struct hid_input, list);
        dev = hidinput->input;

        if (!dev) {
                hid_err(hid, "Struct input_dev not set!\n");
                return -EINVAL;
        }

        /* Get firmware release */
        udesc = &(hid_to_usb_dev(hid)->descriptor);
        bcdDevice = le16_to_cpu(udesc->bcdDevice);
        version = bcdDevice & 255;

        /* Set supported force feedback capabilities */
        for (j = 0; hidpp_ff_effects[j] >= 0; j++)
                set_bit(hidpp_ff_effects[j], dev->ffbit);
        if (version > 1)
                for (j = 0; hidpp_ff_effects_v2[j] >= 0; j++)
                        set_bit(hidpp_ff_effects_v2[j], dev->ffbit);

        error = input_ff_create(dev, num_slots);

        if (error) {
                hid_err(dev, "Failed to create FF device!\n");
                return error;
        }
        /*
         * Create a copy of passed data, so we can transfer memory
         * ownership to FF core
         */
        data = kmemdup(data, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
        data->effect_ids = kcalloc(num_slots, sizeof(int), GFP_KERNEL);
        if (!data->effect_ids) {
                kfree(data);
                return -ENOMEM;
        }
        data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue");
        if (!data->wq) {
                kfree(data->effect_ids);
                kfree(data);
                return -ENOMEM;
        }

        data->hidpp = hidpp;
        data->version = version;
        for (j = 0; j < num_slots; j++)
                data->effect_ids[j] = -1;

        ff = dev->ff;
        ff->private = data;

        ff->upload = hidpp_ff_upload_effect;
        ff->erase = hidpp_ff_erase_effect;
        ff->playback = hidpp_ff_playback;
        ff->set_gain = hidpp_ff_set_gain;
        ff->set_autocenter = hidpp_ff_set_autocenter;
        ff->destroy = hidpp_ff_destroy;

        /* Create sysfs interface */
        error = device_create_file(&(hidpp->hid_dev->dev), &dev_attr_range);
        if (error)
                hid_warn(hidpp->hid_dev, "Unable to create sysfs interface for \"range\", errno %d!\n", error);

        /* init the hardware command queue */
        atomic_set(&data->workqueue_size, 0);

        hid_info(hid, "Force feedback support loaded (firmware release %d).\n",
                 version);

        return 0;
}

/* ************************************************************************** */
/*                                                                            */
/* Device Support                                                             */
/*                                                                            */
/* ************************************************************************** */

/* -------------------------------------------------------------------------- */
/* Touchpad HID++ devices                                                     */
/* -------------------------------------------------------------------------- */

#define WTP_MANUAL_RESOLUTION                                39

struct wtp_data {
        u16 x_size, y_size;
        u8 finger_count;
        u8 mt_feature_index;
        u8 button_feature_index;
        u8 maxcontacts;
        bool flip_y;
        unsigned int resolution;
};

static int wtp_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        return -1;
}

static void wtp_populate_input(struct hidpp_device *hidpp,
                               struct input_dev *input_dev)
{
        struct wtp_data *wd = hidpp->private_data;

        __set_bit(EV_ABS, input_dev->evbit);
        __set_bit(EV_KEY, input_dev->evbit);
        __clear_bit(EV_REL, input_dev->evbit);
        __clear_bit(EV_LED, input_dev->evbit);

        input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0, wd->x_size, 0, 0);
        input_abs_set_res(input_dev, ABS_MT_POSITION_X, wd->resolution);
        input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0, wd->y_size, 0, 0);
        input_abs_set_res(input_dev, ABS_MT_POSITION_Y, wd->resolution);

        /* Max pressure is not given by the devices, pick one */
        input_set_abs_params(input_dev, ABS_MT_PRESSURE, 0, 50, 0, 0);

        input_set_capability(input_dev, EV_KEY, BTN_LEFT);

        if (hidpp->quirks & HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS)
                input_set_capability(input_dev, EV_KEY, BTN_RIGHT);
        else
                __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);

        input_mt_init_slots(input_dev, wd->maxcontacts, INPUT_MT_POINTER |
                INPUT_MT_DROP_UNUSED);
}

static void wtp_touch_event(struct hidpp_device *hidpp,
        struct hidpp_touchpad_raw_xy_finger *touch_report)
{
        struct wtp_data *wd = hidpp->private_data;
        int slot;

        if (!touch_report->finger_id || touch_report->contact_type)
                /* no actual data */
                return;

        slot = input_mt_get_slot_by_key(hidpp->input, touch_report->finger_id);

        input_mt_slot(hidpp->input, slot);
        input_mt_report_slot_state(hidpp->input, MT_TOOL_FINGER,
                                        touch_report->contact_status);
        if (touch_report->contact_status) {
                input_event(hidpp->input, EV_ABS, ABS_MT_POSITION_X,
                                touch_report->x);
                input_event(hidpp->input, EV_ABS, ABS_MT_POSITION_Y,
                                wd->flip_y ? wd->y_size - touch_report->y :
                                             touch_report->y);
                input_event(hidpp->input, EV_ABS, ABS_MT_PRESSURE,
                                touch_report->area);
        }
}

static void wtp_send_raw_xy_event(struct hidpp_device *hidpp,
                struct hidpp_touchpad_raw_xy *raw)
{
        int i;

        for (i = 0; i < 2; i++)
                wtp_touch_event(hidpp, &(raw->fingers[i]));

        if (raw->end_of_frame &&
            !(hidpp->quirks & HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS))
                input_event(hidpp->input, EV_KEY, BTN_LEFT, raw->button);

        if (raw->end_of_frame || raw->finger_count <= 2) {
                input_mt_sync_frame(hidpp->input);
                input_sync(hidpp->input);
        }
}

static int wtp_mouse_raw_xy_event(struct hidpp_device *hidpp, u8 *data)
{
        struct wtp_data *wd = hidpp->private_data;
        u8 c1_area = ((data[7] & 0xf) * (data[7] & 0xf) +
                      (data[7] >> 4) * (data[7] >> 4)) / 2;
        u8 c2_area = ((data[13] & 0xf) * (data[13] & 0xf) +
                      (data[13] >> 4) * (data[13] >> 4)) / 2;
        struct hidpp_touchpad_raw_xy raw = {
                .timestamp = data[1],
                .fingers = {
                        {
                                .contact_type = 0,
                                .contact_status = !!data[7],
                                .x = get_unaligned_le16(&data[3]),
                                .y = get_unaligned_le16(&data[5]),
                                .z = c1_area,
                                .area = c1_area,
                                .finger_id = data[2],
                        }, {
                                .contact_type = 0,
                                .contact_status = !!data[13],
                                .x = get_unaligned_le16(&data[9]),
                                .y = get_unaligned_le16(&data[11]),
                                .z = c2_area,
                                .area = c2_area,
                                .finger_id = data[8],
                        }
                },
                .finger_count = wd->maxcontacts,
                .spurious_flag = 0,
                .end_of_frame = (data[0] >> 7) == 0,
                .button = data[0] & 0x01,
        };

        wtp_send_raw_xy_event(hidpp, &raw);

        return 1;
}

static int wtp_raw_event(struct hid_device *hdev, u8 *data, int size)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct wtp_data *wd = hidpp->private_data;
        struct hidpp_report *report = (struct hidpp_report *)data;
        struct hidpp_touchpad_raw_xy raw;

        if (!wd || !hidpp->input)
                return 1;

        switch (data[0]) {
        case 0x02:
                if (size < 2) {
                        hid_err(hdev, "Received HID report of bad size (%d)",
                                size);
                        return 1;
                }
                if (hidpp->quirks & HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS) {
                        input_event(hidpp->input, EV_KEY, BTN_LEFT,
                                        !!(data[1] & 0x01));
                        input_event(hidpp->input, EV_KEY, BTN_RIGHT,
                                        !!(data[1] & 0x02));
                        input_sync(hidpp->input);
                        return 0;
                } else {
                        if (size < 21)
                                return 1;
                        return wtp_mouse_raw_xy_event(hidpp, &data[7]);
                }
        case REPORT_ID_HIDPP_LONG:
                /* size is already checked in hidpp_raw_event. */
                if ((report->fap.feature_index != wd->mt_feature_index) ||
                    (report->fap.funcindex_clientid != EVENT_TOUCHPAD_RAW_XY))
                        return 1;
                hidpp_touchpad_raw_xy_event(hidpp, data + 4, &raw);

                wtp_send_raw_xy_event(hidpp, &raw);
                return 0;
        }

        return 0;
}

static int wtp_get_config(struct hidpp_device *hidpp)
{
        struct wtp_data *wd = hidpp->private_data;
        struct hidpp_touchpad_raw_info raw_info = {0};
        u8 feature_type;
        int ret;

        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_TOUCHPAD_RAW_XY,
                &wd->mt_feature_index, &feature_type);
        if (ret)
                /* means that the device is not powered up */
                return ret;

        ret = hidpp_touchpad_get_raw_info(hidpp, wd->mt_feature_index,
                &raw_info);
        if (ret)
                return ret;

        wd->x_size = raw_info.x_size;
        wd->y_size = raw_info.y_size;
        wd->maxcontacts = raw_info.maxcontacts;
        wd->flip_y = raw_info.origin == TOUCHPAD_RAW_XY_ORIGIN_LOWER_LEFT;
        wd->resolution = raw_info.res;
        if (!wd->resolution)
                wd->resolution = WTP_MANUAL_RESOLUTION;

        return 0;
}

static int wtp_allocate(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct wtp_data *wd;

        wd = devm_kzalloc(&hdev->dev, sizeof(struct wtp_data),
                        GFP_KERNEL);
        if (!wd)
                return -ENOMEM;

        hidpp->private_data = wd;

        return 0;
};

static int wtp_connect(struct hid_device *hdev)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct wtp_data *wd = hidpp->private_data;
        int ret;

        if (!wd->x_size) {
                ret = wtp_get_config(hidpp);
                if (ret) {
                        hid_err(hdev, "Can not get wtp config: %d\n", ret);
                        return ret;
                }
        }

        return hidpp_touchpad_set_raw_report_state(hidpp, wd->mt_feature_index,
                        true, true);
}

/* ------------------------------------------------------------------------- */
/* Logitech M560 devices                                                     */
/* ------------------------------------------------------------------------- */

/*
 * Logitech M560 protocol overview
 *
 * The Logitech M560 mouse, is designed for windows 8. When the middle and/or
 * the sides buttons are pressed, it sends some keyboard keys events
 * instead of buttons ones.
 * To complicate things further, the middle button keys sequence
 * is different from the odd press and the even press.
 *
 * forward button -> Super_R
 * backward button -> Super_L+'d' (press only)
 * middle button -> 1st time: Alt_L+SuperL+XF86TouchpadOff (press only)
 *                  2nd time: left-click (press only)
 * NB: press-only means that when the button is pressed, the
 * KeyPress/ButtonPress and KeyRelease/ButtonRelease events are generated
 * together sequentially; instead when the button is released, no event is
 * generated !
 *
 * With the command
 *        10<xx>0a 3500af03 (where <xx> is the mouse id),
 * the mouse reacts differently:
 * - it never sends a keyboard key event
 * - for the three mouse button it sends:
 *        middle button               press   11<xx>0a 3500af00...
 *        side 1 button (forward)     press   11<xx>0a 3500b000...
 *        side 2 button (backward)    press   11<xx>0a 3500ae00...
 *        middle/side1/side2 button   release 11<xx>0a 35000000...
 */

static const u8 m560_config_parameter[] = {0x00, 0xaf, 0x03};

/* how buttons are mapped in the report */
#define M560_MOUSE_BTN_LEFT                0x01
#define M560_MOUSE_BTN_RIGHT                0x02
#define M560_MOUSE_BTN_WHEEL_LEFT        0x08
#define M560_MOUSE_BTN_WHEEL_RIGHT        0x10

#define M560_SUB_ID                        0x0a
#define M560_BUTTON_MODE_REGISTER        0x35

static int m560_send_config_command(struct hid_device *hdev)
{
        struct hidpp_report response;
        struct hidpp_device *hidpp_dev;

        hidpp_dev = hid_get_drvdata(hdev);

        return hidpp_send_rap_command_sync(
                hidpp_dev,
                REPORT_ID_HIDPP_SHORT,
                M560_SUB_ID,
                M560_BUTTON_MODE_REGISTER,
                (u8 *)m560_config_parameter,
                sizeof(m560_config_parameter),
                &response
        );
}

static int m560_raw_event(struct hid_device *hdev, u8 *data, int size)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        /* sanity check */
        if (!hidpp->input) {
                hid_err(hdev, "error in parameter\n");
                return -EINVAL;
        }

        if (size < 7) {
                hid_err(hdev, "error in report\n");
                return 0;
        }

        if (data[0] == REPORT_ID_HIDPP_LONG &&
            data[2] == M560_SUB_ID && data[6] == 0x00) {
                /*
                 * m560 mouse report for middle, forward and backward button
                 *
                 * data[0] = 0x11
                 * data[1] = device-id
                 * data[2] = 0x0a
                 * data[5] = 0xaf -> middle
                 *             0xb0 -> forward
                 *             0xae -> backward
                 *             0x00 -> release all
                 * data[6] = 0x00
                 */

                switch (data[5]) {
                case 0xaf:
                        input_report_key(hidpp->input, BTN_MIDDLE, 1);
                        break;
                case 0xb0:
                        input_report_key(hidpp->input, BTN_FORWARD, 1);
                        break;
                case 0xae:
                        input_report_key(hidpp->input, BTN_BACK, 1);
                        break;
                case 0x00:
                        input_report_key(hidpp->input, BTN_BACK, 0);
                        input_report_key(hidpp->input, BTN_FORWARD, 0);
                        input_report_key(hidpp->input, BTN_MIDDLE, 0);
                        break;
                default:
                        hid_err(hdev, "error in report\n");
                        return 0;
                }
                input_sync(hidpp->input);

        } else if (data[0] == 0x02) {
                /*
                 * Logitech M560 mouse report
                 *
                 * data[0] = type (0x02)
                 * data[1..2] = buttons
                 * data[3..5] = xy
                 * data[6] = wheel
                 */

                int v;

                input_report_key(hidpp->input, BTN_LEFT,
                        !!(data[1] & M560_MOUSE_BTN_LEFT));
                input_report_key(hidpp->input, BTN_RIGHT,
                        !!(data[1] & M560_MOUSE_BTN_RIGHT));

                if (data[1] & M560_MOUSE_BTN_WHEEL_LEFT) {
                        input_report_rel(hidpp->input, REL_HWHEEL, -1);
                        input_report_rel(hidpp->input, REL_HWHEEL_HI_RES,
                                         -120);
                } else if (data[1] & M560_MOUSE_BTN_WHEEL_RIGHT) {
                        input_report_rel(hidpp->input, REL_HWHEEL, 1);
                        input_report_rel(hidpp->input, REL_HWHEEL_HI_RES,
                                         120);
                }

                v = hid_snto32(hid_field_extract(hdev, data+3, 0, 12), 12);
                input_report_rel(hidpp->input, REL_X, v);

                v = hid_snto32(hid_field_extract(hdev, data+3, 12, 12), 12);
                input_report_rel(hidpp->input, REL_Y, v);

                v = hid_snto32(data[6], 8);
                if (v != 0)
                        hidpp_scroll_counter_handle_scroll(hidpp->input,
                                        &hidpp->vertical_wheel_counter, v);

                input_sync(hidpp->input);
        }

        return 1;
}

static void m560_populate_input(struct hidpp_device *hidpp,
                                struct input_dev *input_dev)
{
        __set_bit(EV_KEY, input_dev->evbit);
        __set_bit(BTN_MIDDLE, input_dev->keybit);
        __set_bit(BTN_RIGHT, input_dev->keybit);
        __set_bit(BTN_LEFT, input_dev->keybit);
        __set_bit(BTN_BACK, input_dev->keybit);
        __set_bit(BTN_FORWARD, input_dev->keybit);

        __set_bit(EV_REL, input_dev->evbit);
        __set_bit(REL_X, input_dev->relbit);
        __set_bit(REL_Y, input_dev->relbit);
        __set_bit(REL_WHEEL, input_dev->relbit);
        __set_bit(REL_HWHEEL, input_dev->relbit);
        __set_bit(REL_WHEEL_HI_RES, input_dev->relbit);
        __set_bit(REL_HWHEEL_HI_RES, input_dev->relbit);
}

static int m560_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        return -1;
}

/* ------------------------------------------------------------------------- */
/* Logitech K400 devices                                                     */
/* ------------------------------------------------------------------------- */

/*
 * The Logitech K400 keyboard has an embedded touchpad which is seen
 * as a mouse from the OS point of view. There is a hardware shortcut to disable
 * tap-to-click but the setting is not remembered accross reset, annoying some
 * users.
 *
 * We can toggle this feature from the host by using the feature 0x6010:
 * Touchpad FW items
 */

struct k400_private_data {
        u8 feature_index;
};

static int k400_disable_tap_to_click(struct hidpp_device *hidpp)
{
        struct k400_private_data *k400 = hidpp->private_data;
        struct hidpp_touchpad_fw_items items = {};
        int ret;
        u8 feature_type;

        if (!k400->feature_index) {
                ret = hidpp_root_get_feature(hidpp,
                        HIDPP_PAGE_TOUCHPAD_FW_ITEMS,
                        &k400->feature_index, &feature_type);
                if (ret)
                        /* means that the device is not powered up */
                        return ret;
        }

        ret = hidpp_touchpad_fw_items_set(hidpp, k400->feature_index, &items);
        if (ret)
                return ret;

        return 0;
}

static int k400_allocate(struct hid_device *hdev)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct k400_private_data *k400;

        k400 = devm_kzalloc(&hdev->dev, sizeof(struct k400_private_data),
                            GFP_KERNEL);
        if (!k400)
                return -ENOMEM;

        hidpp->private_data = k400;

        return 0;
};

static int k400_connect(struct hid_device *hdev)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!disable_tap_to_click)
                return 0;

        return k400_disable_tap_to_click(hidpp);
}

/* ------------------------------------------------------------------------- */
/* Logitech G920 Driving Force Racing Wheel for Xbox One                     */
/* ------------------------------------------------------------------------- */

#define HIDPP_PAGE_G920_FORCE_FEEDBACK                        0x8123

static int g920_ff_set_autocenter(struct hidpp_device *hidpp,
                                  struct hidpp_ff_private_data *data)
{
        struct hidpp_report response;
        u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH] = {
                [1] = HIDPP_FF_EFFECT_SPRING | HIDPP_FF_EFFECT_AUTOSTART,
        };
        int ret;

        /* initialize with zero autocenter to get wheel in usable state */

        dbg_hid("Setting autocenter to 0.\n");
        ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
                                          HIDPP_FF_DOWNLOAD_EFFECT,
                                          params, ARRAY_SIZE(params),
                                          &response);
        if (ret)
                hid_warn(hidpp->hid_dev, "Failed to autocenter device!\n");
        else
                data->slot_autocenter = response.fap.params[0];

        return ret;
}

static int g920_get_config(struct hidpp_device *hidpp,
                           struct hidpp_ff_private_data *data)
{
        struct hidpp_report response;
        u8 feature_type;
        int ret;

        memset(data, 0, sizeof(*data));

        /* Find feature and store for later use */
        ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_G920_FORCE_FEEDBACK,
                                     &data->feature_index, &feature_type);
        if (ret)
                return ret;

        /* Read number of slots available in device */
        ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
                                          HIDPP_FF_GET_INFO,
                                          NULL, 0,
                                          &response);
        if (ret) {
                if (ret < 0)
                        return ret;
                hid_err(hidpp->hid_dev,
                        "%s: received protocol error 0x%02x\n", __func__, ret);
                return -EPROTO;
        }

        data->num_effects = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS;

        /* reset all forces */
        ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
                                          HIDPP_FF_RESET_ALL,
                                          NULL, 0,
                                          &response);
        if (ret)
                hid_warn(hidpp->hid_dev, "Failed to reset all forces!\n");

        ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
                                          HIDPP_FF_GET_APERTURE,
                                          NULL, 0,
                                          &response);
        if (ret) {
                hid_warn(hidpp->hid_dev,
                         "Failed to read range from device!\n");
        }
        data->range = ret ?
                900 : get_unaligned_be16(&response.fap.params[0]);

        /* Read the current gain values */
        ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
                                          HIDPP_FF_GET_GLOBAL_GAINS,
                                          NULL, 0,
                                          &response);
        if (ret)
                hid_warn(hidpp->hid_dev,
                         "Failed to read gain values from device!\n");
        data->gain = ret ?
                0xffff : get_unaligned_be16(&response.fap.params[0]);

        /* ignore boost value at response.fap.params[2] */

        return g920_ff_set_autocenter(hidpp, data);
}

/* -------------------------------------------------------------------------- */
/* Logitech Dinovo Mini keyboard with builtin touchpad                        */
/* -------------------------------------------------------------------------- */
#define DINOVO_MINI_PRODUCT_ID                0xb30c

static int lg_dinovo_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        if ((usage->hid & HID_USAGE_PAGE) != HID_UP_LOGIVENDOR)
                return 0;

        switch (usage->hid & HID_USAGE) {
        case 0x00d: lg_map_key_clear(KEY_MEDIA);        break;
        default:
                return 0;
        }
        return 1;
}

/* -------------------------------------------------------------------------- */
/* HID++1.0 devices which use HID++ reports for their wheels                  */
/* -------------------------------------------------------------------------- */
static int hidpp10_wheel_connect(struct hidpp_device *hidpp)
{
        return hidpp10_set_register(hidpp, HIDPP_REG_ENABLE_REPORTS, 0,
                        HIDPP_ENABLE_WHEEL_REPORT | HIDPP_ENABLE_HWHEEL_REPORT,
                        HIDPP_ENABLE_WHEEL_REPORT | HIDPP_ENABLE_HWHEEL_REPORT);
}

static int hidpp10_wheel_raw_event(struct hidpp_device *hidpp,
                                   u8 *data, int size)
{
        s8 value, hvalue;

        if (!hidpp->input)
                return -EINVAL;

        if (size < 7)
                return 0;

        if (data[0] != REPORT_ID_HIDPP_SHORT || data[2] != HIDPP_SUB_ID_ROLLER)
                return 0;

        value = data[3];
        hvalue = data[4];

        input_report_rel(hidpp->input, REL_WHEEL, value);
        input_report_rel(hidpp->input, REL_WHEEL_HI_RES, value * 120);
        input_report_rel(hidpp->input, REL_HWHEEL, hvalue);
        input_report_rel(hidpp->input, REL_HWHEEL_HI_RES, hvalue * 120);
        input_sync(hidpp->input);

        return 1;
}

static void hidpp10_wheel_populate_input(struct hidpp_device *hidpp,
                                         struct input_dev *input_dev)
{
        __set_bit(EV_REL, input_dev->evbit);
        __set_bit(REL_WHEEL, input_dev->relbit);
        __set_bit(REL_WHEEL_HI_RES, input_dev->relbit);
        __set_bit(REL_HWHEEL, input_dev->relbit);
        __set_bit(REL_HWHEEL_HI_RES, input_dev->relbit);
}

/* -------------------------------------------------------------------------- */
/* HID++1.0 mice which use HID++ reports for extra mouse buttons              */
/* -------------------------------------------------------------------------- */
static int hidpp10_extra_mouse_buttons_connect(struct hidpp_device *hidpp)
{
        return hidpp10_set_register(hidpp, HIDPP_REG_ENABLE_REPORTS, 0,
                                    HIDPP_ENABLE_MOUSE_EXTRA_BTN_REPORT,
                                    HIDPP_ENABLE_MOUSE_EXTRA_BTN_REPORT);
}

static int hidpp10_extra_mouse_buttons_raw_event(struct hidpp_device *hidpp,
                                    u8 *data, int size)
{
        int i;

        if (!hidpp->input)
                return -EINVAL;

        if (size < 7)
                return 0;

        if (data[0] != REPORT_ID_HIDPP_SHORT ||
            data[2] != HIDPP_SUB_ID_MOUSE_EXTRA_BTNS)
                return 0;

        /*
         * Buttons are either delivered through the regular mouse report *or*
         * through the extra buttons report. At least for button 6 how it is
         * delivered differs per receiver firmware version. Even receivers with
         * the same usb-id show different behavior, so we handle both cases.
         */
        for (i = 0; i < 8; i++)
                input_report_key(hidpp->input, BTN_MOUSE + i,
                                 (data[3] & (1 << i)));

        /* Some mice report events on button 9+, use BTN_MISC */
        for (i = 0; i < 8; i++)
                input_report_key(hidpp->input, BTN_MISC + i,
                                 (data[4] & (1 << i)));

        input_sync(hidpp->input);
        return 1;
}

static void hidpp10_extra_mouse_buttons_populate_input(
                        struct hidpp_device *hidpp, struct input_dev *input_dev)
{
        /* BTN_MOUSE - BTN_MOUSE+7 are set already by the descriptor */
        __set_bit(BTN_0, input_dev->keybit);
        __set_bit(BTN_1, input_dev->keybit);
        __set_bit(BTN_2, input_dev->keybit);
        __set_bit(BTN_3, input_dev->keybit);
        __set_bit(BTN_4, input_dev->keybit);
        __set_bit(BTN_5, input_dev->keybit);
        __set_bit(BTN_6, input_dev->keybit);
        __set_bit(BTN_7, input_dev->keybit);
}

/* -------------------------------------------------------------------------- */
/* HID++1.0 kbds which only report 0x10xx consumer usages through sub-id 0x03 */
/* -------------------------------------------------------------------------- */

/* Find the consumer-page input report desc and change Maximums to 0x107f */
static u8 *hidpp10_consumer_keys_report_fixup(struct hidpp_device *hidpp,
                                              u8 *_rdesc, unsigned int *rsize)
{
        /* Note 0 terminated so we can use strnstr to search for this. */
        static const char consumer_rdesc_start[] = {
                0x05, 0x0C,        /* USAGE_PAGE (Consumer Devices)       */
                0x09, 0x01,        /* USAGE (Consumer Control)            */
                0xA1, 0x01,        /* COLLECTION (Application)            */
                0x85, 0x03,        /* REPORT_ID = 3                       */
                0x75, 0x10,        /* REPORT_SIZE (16)                    */
                0x95, 0x02,        /* REPORT_COUNT (2)                    */
                0x15, 0x01,        /* LOGICAL_MIN (1)                     */
                0x26, 0x00        /* LOGICAL_MAX (...                    */
        };
        char *consumer_rdesc, *rdesc = (char *)_rdesc;
        unsigned int size;

        consumer_rdesc = strnstr(rdesc, consumer_rdesc_start, *rsize);
        size = *rsize - (consumer_rdesc - rdesc);
        if (consumer_rdesc && size >= 25) {
                consumer_rdesc[15] = 0x7f;
                consumer_rdesc[16] = 0x10;
                consumer_rdesc[20] = 0x7f;
                consumer_rdesc[21] = 0x10;
        }
        return _rdesc;
}

static int hidpp10_consumer_keys_connect(struct hidpp_device *hidpp)
{
        return hidpp10_set_register(hidpp, HIDPP_REG_ENABLE_REPORTS, 0,
                                    HIDPP_ENABLE_CONSUMER_REPORT,
                                    HIDPP_ENABLE_CONSUMER_REPORT);
}

static int hidpp10_consumer_keys_raw_event(struct hidpp_device *hidpp,
                                           u8 *data, int size)
{
        u8 consumer_report[5];

        if (size < 7)
                return 0;

        if (data[0] != REPORT_ID_HIDPP_SHORT ||
            data[2] != HIDPP_SUB_ID_CONSUMER_VENDOR_KEYS)
                return 0;

        /*
         * Build a normal consumer report (3) out of the data, this detour
         * is necessary to get some keyboards to report their 0x10xx usages.
         */
        consumer_report[0] = 0x03;
        memcpy(&consumer_report[1], &data[3], 4);
        /* We are called from atomic context */
        hid_report_raw_event(hidpp->hid_dev, HID_INPUT_REPORT,
                             consumer_report, 5, 1);

        return 1;
}

/* -------------------------------------------------------------------------- */
/* High-resolution scroll wheels                                              */
/* -------------------------------------------------------------------------- */

static int hi_res_scroll_enable(struct hidpp_device *hidpp)
{
        int ret;
        u8 multiplier = 1;

        if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_HI_RES_WHEEL) {
                ret = hidpp_hrw_set_wheel_mode(hidpp, false, true, false);
                if (ret == 0)
                        ret = hidpp_hrw_get_wheel_capability(hidpp, &multiplier);
        } else if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_HI_RES_SCROLL) {
                ret = hidpp_hrs_set_highres_scrolling_mode(hidpp, true,
                                                           &multiplier);
        } else /* if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_FAST_SCROLL) */ {
                ret = hidpp10_enable_scrolling_acceleration(hidpp);
                multiplier = 8;
        }
        if (ret) {
                hid_dbg(hidpp->hid_dev,
                        "Could not enable hi-res scrolling: %d\n", ret);
                return ret;
        }

        if (multiplier == 0) {
                hid_dbg(hidpp->hid_dev,
                        "Invalid multiplier 0 from device, setting it to 1\n");
                multiplier = 1;
        }

        hidpp->vertical_wheel_counter.wheel_multiplier = multiplier;
        hid_dbg(hidpp->hid_dev, "wheel multiplier = %d\n", multiplier);
        return 0;
}

static int hidpp_initialize_hires_scroll(struct hidpp_device *hidpp)
{
        int ret;
        unsigned long capabilities;

        capabilities = hidpp->capabilities;

        if (hidpp->protocol_major >= 2) {
                u8 feature_index;
                u8 feature_type;

                ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HIRES_WHEEL,
                                             &feature_index, &feature_type);
                if (!ret) {
                        hidpp->capabilities |= HIDPP_CAPABILITY_HIDPP20_HI_RES_WHEEL;
                        hid_dbg(hidpp->hid_dev, "Detected HID++ 2.0 hi-res scroll wheel\n");
                        return 0;
                }
                ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HI_RESOLUTION_SCROLLING,
                                             &feature_index, &feature_type);
                if (!ret) {
                        hidpp->capabilities |= HIDPP_CAPABILITY_HIDPP20_HI_RES_SCROLL;
                        hid_dbg(hidpp->hid_dev, "Detected HID++ 2.0 hi-res scrolling\n");
                }
        } else {
                /* We cannot detect fast scrolling support on HID++ 1.0 devices */
                if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL_1P0) {
                        hidpp->capabilities |= HIDPP_CAPABILITY_HIDPP10_FAST_SCROLL;
                        hid_dbg(hidpp->hid_dev, "Detected HID++ 1.0 fast scroll\n");
                }
        }

        if (hidpp->capabilities == capabilities)
                hid_dbg(hidpp->hid_dev, "Did not detect HID++ hi-res scrolling hardware support\n");
        return 0;
}

/* -------------------------------------------------------------------------- */
/* Generic HID++ devices                                                      */
/* -------------------------------------------------------------------------- */

static u8 *hidpp_report_fixup(struct hid_device *hdev, u8 *rdesc,
                              unsigned int *rsize)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!hidpp)
                return rdesc;

        /* For 27 MHz keyboards the quirk gets set after hid_parse. */
        if (hdev->group == HID_GROUP_LOGITECH_27MHZ_DEVICE ||
            (hidpp->quirks & HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS))
                rdesc = hidpp10_consumer_keys_report_fixup(hidpp, rdesc, rsize);

        return rdesc;
}

static int hidpp_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!hidpp)
                return 0;

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP)
                return wtp_input_mapping(hdev, hi, field, usage, bit, max);
        else if (hidpp->quirks & HIDPP_QUIRK_CLASS_M560 &&
                        field->application != HID_GD_MOUSE)
                return m560_input_mapping(hdev, hi, field, usage, bit, max);

        if (hdev->product == DINOVO_MINI_PRODUCT_ID)
                return lg_dinovo_input_mapping(hdev, hi, field, usage, bit, max);

        return 0;
}

static int hidpp_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!hidpp)
                return 0;

        /* Ensure that Logitech G920 is not given a default fuzz/flat value */
        if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) {
                if (usage->type == EV_ABS && (usage->code == ABS_X ||
                                usage->code == ABS_Y || usage->code == ABS_Z ||
                                usage->code == ABS_RZ)) {
                        field->application = HID_GD_MULTIAXIS;
                }
        }

        return 0;
}


static void hidpp_populate_input(struct hidpp_device *hidpp,
                                 struct input_dev *input)
{
        hidpp->input = input;

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP)
                wtp_populate_input(hidpp, input);
        else if (hidpp->quirks & HIDPP_QUIRK_CLASS_M560)
                m560_populate_input(hidpp, input);

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS)
                hidpp10_wheel_populate_input(hidpp, input);

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_EXTRA_MOUSE_BTNS)
                hidpp10_extra_mouse_buttons_populate_input(hidpp, input);
}

static int hidpp_input_configured(struct hid_device *hdev,
                                struct hid_input *hidinput)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct input_dev *input = hidinput->input;

        if (!hidpp)
                return 0;

        hidpp_populate_input(hidpp, input);

        return 0;
}

static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data,
                int size)
{
        struct hidpp_report *question = hidpp->send_receive_buf;
        struct hidpp_report *answer = hidpp->send_receive_buf;
        struct hidpp_report *report = (struct hidpp_report *)data;
        int ret;

        /*
         * If the mutex is locked then we have a pending answer from a
         * previously sent command.
         */
        if (unlikely(mutex_is_locked(&hidpp->send_mutex))) {
                /*
                 * Check for a correct hidpp20 answer or the corresponding
                 * error
                 */
                if (hidpp_match_answer(question, report) ||
                                hidpp_match_error(question, report)) {
                        *answer = *report;
                        hidpp->answer_available = true;
                        wake_up(&hidpp->wait);
                        /*
                         * This was an answer to a command that this driver sent
                         * We return 1 to hid-core to avoid forwarding the
                         * command upstream as it has been treated by the driver
                         */

                        return 1;
                }
        }

        if (unlikely(hidpp_report_is_connect_event(hidpp, report))) {
                if (schedule_work(&hidpp->work) == 0)
                        dbg_hid("%s: connect event already queued\n", __func__);
                return 1;
        }

        if (hidpp->hid_dev->group == HID_GROUP_LOGITECH_27MHZ_DEVICE &&
            data[0] == REPORT_ID_HIDPP_SHORT &&
            data[2] == HIDPP_SUB_ID_USER_IFACE_EVENT &&
            (data[3] & HIDPP_USER_IFACE_EVENT_ENCRYPTION_KEY_LOST)) {
                dev_err_ratelimited(&hidpp->hid_dev->dev,
                        "Error the keyboard's wireless encryption key has been lost, your keyboard will not work unless you re-configure encryption.\n");
                dev_err_ratelimited(&hidpp->hid_dev->dev,
                        "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n");
        }

        if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) {
                ret = hidpp20_battery_event_1000(hidpp, data, size);
                if (ret != 0)
                        return ret;
                ret = hidpp20_battery_event_1004(hidpp, data, size);
                if (ret != 0)
                        return ret;
                ret = hidpp_solar_battery_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
                ret = hidpp20_battery_voltage_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
                ret = hidpp20_adc_measurement_event_1f20(hidpp, data, size);
                if (ret != 0)
                        return ret;
        }

        if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) {
                ret = hidpp10_battery_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS) {
                ret = hidpp10_wheel_raw_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_EXTRA_MOUSE_BTNS) {
                ret = hidpp10_extra_mouse_buttons_raw_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS) {
                ret = hidpp10_consumer_keys_raw_event(hidpp, data, size);
                if (ret != 0)
                        return ret;
        }

        return 0;
}

static int hidpp_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *data, int size)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        int ret = 0;

        if (!hidpp)
                return 0;

        /* Generic HID++ processing. */
        switch (data[0]) {
        case REPORT_ID_HIDPP_VERY_LONG:
                if (size != hidpp->very_long_report_length) {
                        hid_err(hdev, "received hid++ report of bad size (%d)",
                                size);
                        return 1;
                }
                ret = hidpp_raw_hidpp_event(hidpp, data, size);
                break;
        case REPORT_ID_HIDPP_LONG:
                if (size != HIDPP_REPORT_LONG_LENGTH) {
                        hid_err(hdev, "received hid++ report of bad size (%d)",
                                size);
                        return 1;
                }
                ret = hidpp_raw_hidpp_event(hidpp, data, size);
                break;
        case REPORT_ID_HIDPP_SHORT:
                if (size != HIDPP_REPORT_SHORT_LENGTH) {
                        hid_err(hdev, "received hid++ report of bad size (%d)",
                                size);
                        return 1;
                }
                ret = hidpp_raw_hidpp_event(hidpp, data, size);
                break;
        }

        /* If no report is available for further processing, skip calling
         * raw_event of subclasses. */
        if (ret != 0)
                return ret;

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP)
                return wtp_raw_event(hdev, data, size);
        else if (hidpp->quirks & HIDPP_QUIRK_CLASS_M560)
                return m560_raw_event(hdev, data, size);

        return 0;
}

static int hidpp_event(struct hid_device *hdev, struct hid_field *field,
        struct hid_usage *usage, __s32 value)
{
        /* This function will only be called for scroll events, due to the
         * restriction imposed in hidpp_usages.
         */
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        struct hidpp_scroll_counter *counter;

        if (!hidpp)
                return 0;

        counter = &hidpp->vertical_wheel_counter;
        /* A scroll event may occur before the multiplier has been retrieved or
         * the input device set, or high-res scroll enabling may fail. In such
         * cases we must return early (falling back to default behaviour) to
         * avoid a crash in hidpp_scroll_counter_handle_scroll.
         */
        if (!(hidpp->capabilities & HIDPP_CAPABILITY_HI_RES_SCROLL)
            || value == 0 || hidpp->input == NULL
            || counter->wheel_multiplier == 0)
                return 0;

        hidpp_scroll_counter_handle_scroll(hidpp->input, counter, value);
        return 1;
}

static int hidpp_initialize_battery(struct hidpp_device *hidpp)
{
        static atomic_t battery_no = ATOMIC_INIT(0);
        struct power_supply_config cfg = { .drv_data = hidpp };
        struct power_supply_desc *desc = &hidpp->battery.desc;
        enum power_supply_property *battery_props;
        struct hidpp_battery *battery;
        unsigned int num_battery_props;
        unsigned long n;
        int ret;

        if (hidpp->battery.ps)
                return 0;

        hidpp->battery.feature_index = 0xff;
        hidpp->battery.solar_feature_index = 0xff;
        hidpp->battery.voltage_feature_index = 0xff;
        hidpp->battery.adc_measurement_feature_index = 0xff;

        if (hidpp->protocol_major >= 2) {
                if (hidpp->quirks & HIDPP_QUIRK_CLASS_K750)
                        ret = hidpp_solar_request_battery_event(hidpp);
                else {
                        /* we only support one battery feature right now, so let's
                           first check the ones that support battery level first
                           and leave voltage for last */
                        ret = hidpp20_query_battery_info_1000(hidpp);
                        if (ret)
                                ret = hidpp20_query_battery_info_1004(hidpp);
                        if (ret)
                                ret = hidpp20_query_battery_voltage_info(hidpp);
                        if (ret)
                                ret = hidpp20_query_adc_measurement_info_1f20(hidpp);
                }

                if (ret)
                        return ret;
                hidpp->capabilities |= HIDPP_CAPABILITY_HIDPP20_BATTERY;
        } else {
                ret = hidpp10_query_battery_status(hidpp);
                if (ret) {
                        ret = hidpp10_query_battery_mileage(hidpp);
                        if (ret)
                                return -ENOENT;
                        hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_MILEAGE;
                } else {
                        hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS;
                }
                hidpp->capabilities |= HIDPP_CAPABILITY_HIDPP10_BATTERY;
        }

        battery_props = devm_kmemdup(&hidpp->hid_dev->dev,
                                     hidpp_battery_props,
                                     sizeof(hidpp_battery_props),
                                     GFP_KERNEL);
        if (!battery_props)
                return -ENOMEM;

        num_battery_props = ARRAY_SIZE(hidpp_battery_props) - 3;

        if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE ||
            hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE ||
            hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE ||
            hidpp->capabilities & HIDPP_CAPABILITY_ADC_MEASUREMENT)
                battery_props[num_battery_props++] =
                                POWER_SUPPLY_PROP_CAPACITY;

        if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS)
                battery_props[num_battery_props++] =
                                POWER_SUPPLY_PROP_CAPACITY_LEVEL;

        if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE ||
            hidpp->capabilities & HIDPP_CAPABILITY_ADC_MEASUREMENT)
                battery_props[num_battery_props++] =
                        POWER_SUPPLY_PROP_VOLTAGE_NOW;

        battery = &hidpp->battery;

        n = atomic_inc_return(&battery_no) - 1;
        desc->properties = battery_props;
        desc->num_properties = num_battery_props;
        desc->get_property = hidpp_battery_get_property;
        sprintf(battery->name, "hidpp_battery_%ld", n);
        desc->name = battery->name;
        desc->type = POWER_SUPPLY_TYPE_BATTERY;
        desc->use_for_apm = 0;

        battery->ps = devm_power_supply_register(&hidpp->hid_dev->dev,
                                                 &battery->desc,
                                                 &cfg);
        if (IS_ERR(battery->ps))
                return PTR_ERR(battery->ps);

        power_supply_powers(battery->ps, &hidpp->hid_dev->dev);

        return ret;
}

/* Get name + serial for USB and Bluetooth HID++ devices */
static void hidpp_non_unifying_init(struct hidpp_device *hidpp)
{
        struct hid_device *hdev = hidpp->hid_dev;
        char *name;

        /* Bluetooth devices already have their serialnr set */
        if (hid_is_usb(hdev))
                hidpp_serial_init(hidpp);

        name = hidpp_get_device_name(hidpp);
        if (name) {
                dbg_hid("HID++: Got name: %s\n", name);
                snprintf(hdev->name, sizeof(hdev->name), "%s", name);
                kfree(name);
        }
}

static int hidpp_input_open(struct input_dev *dev)
{
        struct hid_device *hid = input_get_drvdata(dev);

        return hid_hw_open(hid);
}

static void hidpp_input_close(struct input_dev *dev)
{
        struct hid_device *hid = input_get_drvdata(dev);

        hid_hw_close(hid);
}

static struct input_dev *hidpp_allocate_input(struct hid_device *hdev)
{
        struct input_dev *input_dev = devm_input_allocate_device(&hdev->dev);
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!input_dev)
                return NULL;

        input_set_drvdata(input_dev, hdev);
        input_dev->open = hidpp_input_open;
        input_dev->close = hidpp_input_close;

        input_dev->name = hidpp->name;
        input_dev->phys = hdev->phys;
        input_dev->uniq = hdev->uniq;
        input_dev->id.bustype = hdev->bus;
        input_dev->id.vendor  = hdev->vendor;
        input_dev->id.product = hdev->product;
        input_dev->id.version = hdev->version;
        input_dev->dev.parent = &hdev->dev;

        return input_dev;
}

static void hidpp_connect_event(struct work_struct *work)
{
        struct hidpp_device *hidpp = container_of(work, struct hidpp_device, work);
        struct hid_device *hdev = hidpp->hid_dev;
        struct input_dev *input;
        char *name, *devm_name;
        int ret;

        /* Get device version to check if it is connected */
        ret = hidpp_root_get_protocol_version(hidpp);
        if (ret) {
                hid_dbg(hidpp->hid_dev, "Disconnected\n");
                if (hidpp->battery.ps) {
                        hidpp->battery.online = false;
                        hidpp->battery.status = POWER_SUPPLY_STATUS_UNKNOWN;
                        hidpp->battery.level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
                        power_supply_changed(hidpp->battery.ps);
                }
                return;
        }

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP) {
                ret = wtp_connect(hdev);
                if (ret)
                        return;
        } else if (hidpp->quirks & HIDPP_QUIRK_CLASS_M560) {
                ret = m560_send_config_command(hdev);
                if (ret)
                        return;
        } else if (hidpp->quirks & HIDPP_QUIRK_CLASS_K400) {
                ret = k400_connect(hdev);
                if (ret)
                        return;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS) {
                ret = hidpp10_wheel_connect(hidpp);
                if (ret)
                        return;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_EXTRA_MOUSE_BTNS) {
                ret = hidpp10_extra_mouse_buttons_connect(hidpp);
                if (ret)
                        return;
        }

        if (hidpp->quirks & HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS) {
                ret = hidpp10_consumer_keys_connect(hidpp);
                if (ret)
                        return;
        }

        if (hidpp->protocol_major >= 2) {
                u8 feature_index;

                if (!hidpp_get_wireless_feature_index(hidpp, &feature_index))
                        hidpp->wireless_feature_index = feature_index;
        }

        if (hidpp->name == hdev->name && hidpp->protocol_major >= 2) {
                name = hidpp_get_device_name(hidpp);
                if (name) {
                        devm_name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
                                                   "%s", name);
                        kfree(name);
                        if (!devm_name)
                                return;

                        hidpp->name = devm_name;
                }
        }

        hidpp_initialize_battery(hidpp);
        if (!hid_is_usb(hidpp->hid_dev))
                hidpp_initialize_hires_scroll(hidpp);

        /* forward current battery state */
        if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP10_BATTERY) {
                hidpp10_enable_battery_reporting(hidpp);
                if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE)
                        hidpp10_query_battery_mileage(hidpp);
                else
                        hidpp10_query_battery_status(hidpp);
        } else if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) {
                if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE)
                        hidpp20_query_battery_voltage_info(hidpp);
                else if (hidpp->capabilities & HIDPP_CAPABILITY_UNIFIED_BATTERY)
                        hidpp20_query_battery_info_1004(hidpp);
                else if (hidpp->capabilities & HIDPP_CAPABILITY_ADC_MEASUREMENT)
                        hidpp20_query_adc_measurement_info_1f20(hidpp);
                else
                        hidpp20_query_battery_info_1000(hidpp);
        }
        if (hidpp->battery.ps)
                power_supply_changed(hidpp->battery.ps);

        if (hidpp->capabilities & HIDPP_CAPABILITY_HI_RES_SCROLL)
                hi_res_scroll_enable(hidpp);

        if (!(hidpp->quirks & HIDPP_QUIRK_DELAYED_INIT) || hidpp->delayed_input)
                /* if the input nodes are already created, we can stop now */
                return;

        input = hidpp_allocate_input(hdev);
        if (!input) {
                hid_err(hdev, "cannot allocate new input device: %d\n", ret);
                return;
        }

        hidpp_populate_input(hidpp, input);

        ret = input_register_device(input);
        if (ret) {
                input_free_device(input);
                return;
        }

        hidpp->delayed_input = input;
}

static DEVICE_ATTR(builtin_power_supply, 0000, NULL, NULL);

static struct attribute *sysfs_attrs[] = {
        &dev_attr_builtin_power_supply.attr,
        NULL
};

static const struct attribute_group ps_attribute_group = {
        .attrs = sysfs_attrs
};

static int hidpp_get_report_length(struct hid_device *hdev, int id)
{
        struct hid_report_enum *re;
        struct hid_report *report;

        re = &(hdev->report_enum[HID_OUTPUT_REPORT]);
        report = re->report_id_hash[id];
        if (!report)
                return 0;

        return report->field[0]->report_count + 1;
}

static u8 hidpp_validate_device(struct hid_device *hdev)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);
        int id, report_length;
        u8 supported_reports = 0;

        id = REPORT_ID_HIDPP_SHORT;
        report_length = hidpp_get_report_length(hdev, id);
        if (report_length) {
                if (report_length < HIDPP_REPORT_SHORT_LENGTH)
                        goto bad_device;

                supported_reports |= HIDPP_REPORT_SHORT_SUPPORTED;
        }

        id = REPORT_ID_HIDPP_LONG;
        report_length = hidpp_get_report_length(hdev, id);
        if (report_length) {
                if (report_length < HIDPP_REPORT_LONG_LENGTH)
                        goto bad_device;

                supported_reports |= HIDPP_REPORT_LONG_SUPPORTED;
        }

        id = REPORT_ID_HIDPP_VERY_LONG;
        report_length = hidpp_get_report_length(hdev, id);
        if (report_length) {
                if (report_length < HIDPP_REPORT_LONG_LENGTH ||
                    report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH)
                        goto bad_device;

                supported_reports |= HIDPP_REPORT_VERY_LONG_SUPPORTED;
                hidpp->very_long_report_length = report_length;
        }

        return supported_reports;

bad_device:
        hid_warn(hdev, "not enough values in hidpp report %d\n", id);
        return false;
}

static bool hidpp_application_equals(struct hid_device *hdev,
                                     unsigned int application)
{
        struct list_head *report_list;
        struct hid_report *report;

        report_list = &hdev->report_enum[HID_INPUT_REPORT].report_list;
        report = list_first_entry_or_null(report_list, struct hid_report, list);
        return report && report->application == application;
}

static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        struct hidpp_device *hidpp;
        int ret;
        unsigned int connect_mask = HID_CONNECT_DEFAULT;

        /* report_fixup needs drvdata to be set before we call hid_parse */
        hidpp = devm_kzalloc(&hdev->dev, sizeof(*hidpp), GFP_KERNEL);
        if (!hidpp)
                return -ENOMEM;

        hidpp->hid_dev = hdev;
        hidpp->name = hdev->name;
        hidpp->quirks = id->driver_data;
        hid_set_drvdata(hdev, hidpp);

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "%s:parse failed\n", __func__);
                return ret;
        }

        /*
         * Make sure the device is HID++ capable, otherwise treat as generic HID
         */
        hidpp->supported_reports = hidpp_validate_device(hdev);

        if (!hidpp->supported_reports) {
                hid_set_drvdata(hdev, NULL);
                devm_kfree(&hdev->dev, hidpp);
                return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        }

        if (id->group == HID_GROUP_LOGITECH_27MHZ_DEVICE &&
            hidpp_application_equals(hdev, HID_GD_MOUSE))
                hidpp->quirks |= HIDPP_QUIRK_HIDPP_WHEELS |
                                 HIDPP_QUIRK_HIDPP_EXTRA_MOUSE_BTNS;

        if (id->group == HID_GROUP_LOGITECH_27MHZ_DEVICE &&
            hidpp_application_equals(hdev, HID_GD_KEYBOARD))
                hidpp->quirks |= HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS;

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_WTP) {
                ret = wtp_allocate(hdev, id);
                if (ret)
                        return ret;
        } else if (hidpp->quirks & HIDPP_QUIRK_CLASS_K400) {
                ret = k400_allocate(hdev);
                if (ret)
                        return ret;
        }

        INIT_WORK(&hidpp->work, hidpp_connect_event);
        mutex_init(&hidpp->send_mutex);
        init_waitqueue_head(&hidpp->wait);

        /* indicates we are handling the battery properties in the kernel */
        ret = sysfs_create_group(&hdev->dev.kobj, &ps_attribute_group);
        if (ret)
                hid_warn(hdev, "Cannot allocate sysfs group for %s\n",
                         hdev->name);

        /*
         * First call hid_hw_start(hdev, 0) to allow IO without connecting any
         * hid subdrivers (hid-input, hidraw). This allows retrieving the dev's
         * name and serial number and store these in hdev->name and hdev->uniq,
         * before the hid-input and hidraw drivers expose these to userspace.
         */
        ret = hid_hw_start(hdev, 0);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
                goto hid_hw_start_fail;
        }

        ret = hid_hw_open(hdev);
        if (ret < 0) {
                dev_err(&hdev->dev, "%s:hid_hw_open returned error:%d\n",
                        __func__, ret);
                goto hid_hw_open_fail;
        }

        /* Allow incoming packets */
        hid_device_io_start(hdev);

        /* Get name + serial, store in hdev->name + hdev->uniq */
        if (id->group == HID_GROUP_LOGITECH_DJ_DEVICE)
                hidpp_unifying_init(hidpp);
        else
                hidpp_non_unifying_init(hidpp);

        if (hidpp->quirks & HIDPP_QUIRK_DELAYED_INIT)
                connect_mask &= ~HID_CONNECT_HIDINPUT;

        /* Now export the actual inputs and hidraw nodes to the world */
        hid_device_io_stop(hdev);
        ret = hid_connect(hdev, connect_mask);
        if (ret) {
                hid_err(hdev, "%s:hid_connect returned error %d\n", __func__, ret);
                goto hid_hw_init_fail;
        }

        /* Check for connected devices now that incoming packets will not be disabled again */
        hid_device_io_start(hdev);
        schedule_work(&hidpp->work);
        flush_work(&hidpp->work);

        if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) {
                struct hidpp_ff_private_data data;

                ret = g920_get_config(hidpp, &data);
                if (!ret)
                        ret = hidpp_ff_init(hidpp, &data);

                if (ret)
                        hid_warn(hidpp->hid_dev,
                     "Unable to initialize force feedback support, errno %d\n",
                                 ret);
        }

        /*
         * This relies on logi_dj_ll_close() being a no-op so that DJ connection
         * events will still be received.
         */
        hid_hw_close(hdev);
        return ret;

hid_hw_init_fail:
        hid_hw_close(hdev);
hid_hw_open_fail:
        hid_hw_stop(hdev);
hid_hw_start_fail:
        sysfs_remove_group(&hdev->dev.kobj, &ps_attribute_group);
        cancel_work_sync(&hidpp->work);
        mutex_destroy(&hidpp->send_mutex);
        return ret;
}

static void hidpp_remove(struct hid_device *hdev)
{
        struct hidpp_device *hidpp = hid_get_drvdata(hdev);

        if (!hidpp)
                return hid_hw_stop(hdev);

        sysfs_remove_group(&hdev->dev.kobj, &ps_attribute_group);

        hid_hw_stop(hdev);
        cancel_work_sync(&hidpp->work);
        mutex_destroy(&hidpp->send_mutex);
}

#define LDJ_DEVICE(product) \
        HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \
                   USB_VENDOR_ID_LOGITECH, (product))

#define L27MHZ_DEVICE(product) \
        HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_27MHZ_DEVICE, \
                   USB_VENDOR_ID_LOGITECH, (product))

static const struct hid_device_id hidpp_devices[] = {
        { /* wireless touchpad */
          LDJ_DEVICE(0x4011),
          .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT |
                         HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS },
        { /* wireless touchpad T650 */
          LDJ_DEVICE(0x4101),
          .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT },
        { /* wireless touchpad T651 */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_T651),
          .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT },
        { /* Mouse Logitech Anywhere MX */
          LDJ_DEVICE(0x1017), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
        { /* Mouse logitech M560 */
          LDJ_DEVICE(0x402d),
          .driver_data = HIDPP_QUIRK_DELAYED_INIT | HIDPP_QUIRK_CLASS_M560 },
        { /* Mouse Logitech M705 (firmware RQM17) */
          LDJ_DEVICE(0x101b), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
        { /* Mouse Logitech Performance MX */
          LDJ_DEVICE(0x101a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
        { /* Keyboard logitech K400 */
          LDJ_DEVICE(0x4024),
          .driver_data = HIDPP_QUIRK_CLASS_K400 },
        { /* Solar Keyboard Logitech K750 */
          LDJ_DEVICE(0x4002),
          .driver_data = HIDPP_QUIRK_CLASS_K750 },
        { /* Keyboard MX5000 (Bluetooth-receiver in HID proxy mode) */
          LDJ_DEVICE(0xb305),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* Dinovo Edge (Bluetooth-receiver in HID proxy mode) */
          LDJ_DEVICE(0xb309),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */
          LDJ_DEVICE(0xb30b),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },

        { LDJ_DEVICE(HID_ANY_ID) },

        { /* Keyboard LX501 (Y-RR53) */
          L27MHZ_DEVICE(0x0049),
          .driver_data = HIDPP_QUIRK_KBD_ZOOM_WHEEL },
        { /* Keyboard MX3000 (Y-RAM74) */
          L27MHZ_DEVICE(0x0057),
          .driver_data = HIDPP_QUIRK_KBD_SCROLL_WHEEL },
        { /* Keyboard MX3200 (Y-RAV80) */
          L27MHZ_DEVICE(0x005c),
          .driver_data = HIDPP_QUIRK_KBD_ZOOM_WHEEL },
        { /* S510 Media Remote */
          L27MHZ_DEVICE(0x00fe),
          .driver_data = HIDPP_QUIRK_KBD_SCROLL_WHEEL },

        { L27MHZ_DEVICE(HID_ANY_ID) },

        { /* Logitech G403 Wireless Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC082) },
        { /* Logitech G502 Lightspeed Wireless Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC08D) },
        { /* Logitech G703 Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC087) },
        { /* Logitech G703 Hero Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC090) },
        { /* Logitech G900 Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC081) },
        { /* Logitech G903 Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC086) },
        { /* Logitech G903 Hero Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC091) },
        { /* Logitech G915 TKL Keyboard over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC343) },
        { /* Logitech G920 Wheel over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL),
                .driver_data = HIDPP_QUIRK_CLASS_G920 | HIDPP_QUIRK_FORCE_OUTPUT_REPORTS},
        { /* Logitech G923 Wheel (Xbox version) over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G923_XBOX_WHEEL),
                .driver_data = HIDPP_QUIRK_CLASS_G920 | HIDPP_QUIRK_FORCE_OUTPUT_REPORTS },
        { /* Logitech G Pro Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) },
        { /* Logitech G Pro X Superlight Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) },
        { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) },

        { /* G935 Gaming Headset */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87),
                .driver_data = HIDPP_QUIRK_WIRELESS_STATUS },

        { /* MX5000 keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb305),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* Dinovo Edge keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb309),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* MX5500 keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* Logitech G915 TKL keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb35f) },
        { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) },
        { /* MX Master mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) },
        { /* M720 Triathlon mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb015) },
        { /* MX Ergo trackball over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) },
        { /* Signature M650 over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb02a) },
        { /* MX Master 3 mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) },
        { /* MX Anywhere 3 mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb025) },
        { /* MX Master 3S mouse over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb034) },
        {}
};

MODULE_DEVICE_TABLE(hid, hidpp_devices);

static const struct hid_usage_id hidpp_usages[] = {
        { HID_GD_WHEEL, EV_REL, REL_WHEEL_HI_RES },
        { HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1}
};

static struct hid_driver hidpp_driver = {
        .name = "logitech-hidpp-device",
        .id_table = hidpp_devices,
        .report_fixup = hidpp_report_fixup,
        .probe = hidpp_probe,
        .remove = hidpp_remove,
        .raw_event = hidpp_raw_event,
        .usage_table = hidpp_usages,
        .event = hidpp_event,
        .input_configured = hidpp_input_configured,
        .input_mapping = hidpp_input_mapping,
        .input_mapped = hidpp_input_mapped,
};

module_hid_driver(hidpp_driver);










































































































































































































































































































































































































































































































































































































































































































































































































































































































































   16 




























   14 

































   14 


   14 

























   14 
   14 


   14 





































   16 




















   14 


































   16 


   15 
   16 






















   14 
   14 


   14 
   14 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
// SPDX-License-Identifier: GPL-2.0
/*
 * NETLINK      Netlink attributes
 *
 *                 Authors:        Thomas Graf <tgraf@suug.ch>
 *                                 Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 */

#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/jiffies.h>
#include <linux/nospec.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/netlink.h>

/* For these data types, attribute length should be exactly the given
 * size. However, to maintain compatibility with broken commands, if the
 * attribute length does not match the expected size a warning is emitted
 * to the user that the command is sending invalid data and needs to be fixed.
 */
static const u8 nla_attr_len[NLA_TYPE_MAX+1] = {
        [NLA_U8]        = sizeof(u8),
        [NLA_U16]        = sizeof(u16),
        [NLA_U32]        = sizeof(u32),
        [NLA_U64]        = sizeof(u64),
        [NLA_S8]        = sizeof(s8),
        [NLA_S16]        = sizeof(s16),
        [NLA_S32]        = sizeof(s32),
        [NLA_S64]        = sizeof(s64),
        [NLA_BE16]        = sizeof(__be16),
        [NLA_BE32]        = sizeof(__be32),
};

static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
        [NLA_U8]        = sizeof(u8),
        [NLA_U16]        = sizeof(u16),
        [NLA_U32]        = sizeof(u32),
        [NLA_U64]        = sizeof(u64),
        [NLA_MSECS]        = sizeof(u64),
        [NLA_NESTED]        = NLA_HDRLEN,
        [NLA_S8]        = sizeof(s8),
        [NLA_S16]        = sizeof(s16),
        [NLA_S32]        = sizeof(s32),
        [NLA_S64]        = sizeof(s64),
        [NLA_BE16]        = sizeof(__be16),
        [NLA_BE32]        = sizeof(__be32),
};

/*
 * Nested policies might refer back to the original
 * policy in some cases, and userspace could try to
 * abuse that and recurse by nesting in the right
 * ways. Limit recursion to avoid this problem.
 */
#define MAX_POLICY_RECURSION_DEPTH        10

static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
                                const struct nla_policy *policy,
                                unsigned int validate,
                                struct netlink_ext_ack *extack,
                                struct nlattr **tb, unsigned int depth);

static int validate_nla_bitfield32(const struct nlattr *nla,
                                   const u32 valid_flags_mask)
{
        const struct nla_bitfield32 *bf = nla_data(nla);

        if (!valid_flags_mask)
                return -EINVAL;

        /*disallow invalid bit selector */
        if (bf->selector & ~valid_flags_mask)
                return -EINVAL;

        /*disallow invalid bit values */
        if (bf->value & ~valid_flags_mask)
                return -EINVAL;

        /*disallow valid bit values that are not selected*/
        if (bf->value & ~bf->selector)
                return -EINVAL;

        return 0;
}

static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
                              const struct nla_policy *policy,
                              struct netlink_ext_ack *extack,
                              unsigned int validate, unsigned int depth)
{
        const struct nlattr *entry;
        int rem;

        nla_for_each_attr(entry, head, len, rem) {
                int ret;

                if (nla_len(entry) == 0)
                        continue;

                if (nla_len(entry) < NLA_HDRLEN) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy,
                                                "Array element too short");
                        return -ERANGE;
                }

                ret = __nla_validate_parse(nla_data(entry), nla_len(entry),
                                           maxtype, policy, validate, extack,
                                           NULL, depth + 1);
                if (ret < 0)
                        return ret;
        }

        return 0;
}

void nla_get_range_unsigned(const struct nla_policy *pt,
                            struct netlink_range_validation *range)
{
        WARN_ON_ONCE(pt->validation_type != NLA_VALIDATE_RANGE_PTR &&
                     (pt->min < 0 || pt->max < 0));

        range->min = 0;

        switch (pt->type) {
        case NLA_U8:
                range->max = U8_MAX;
                break;
        case NLA_U16:
        case NLA_BE16:
        case NLA_BINARY:
                range->max = U16_MAX;
                break;
        case NLA_U32:
        case NLA_BE32:
                range->max = U32_MAX;
                break;
        case NLA_U64:
        case NLA_UINT:
        case NLA_MSECS:
                range->max = U64_MAX;
                break;
        default:
                WARN_ON_ONCE(1);
                return;
        }

        switch (pt->validation_type) {
        case NLA_VALIDATE_RANGE:
        case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
                range->min = pt->min;
                range->max = pt->max;
                break;
        case NLA_VALIDATE_RANGE_PTR:
                *range = *pt->range;
                break;
        case NLA_VALIDATE_MIN:
                range->min = pt->min;
                break;
        case NLA_VALIDATE_MAX:
                range->max = pt->max;
                break;
        default:
                break;
        }
}

static int nla_validate_range_unsigned(const struct nla_policy *pt,
                                       const struct nlattr *nla,
                                       struct netlink_ext_ack *extack,
                                       unsigned int validate)
{
        struct netlink_range_validation range;
        u64 value;

        switch (pt->type) {
        case NLA_U8:
                value = nla_get_u8(nla);
                break;
        case NLA_U16:
                value = nla_get_u16(nla);
                break;
        case NLA_U32:
                value = nla_get_u32(nla);
                break;
        case NLA_U64:
                value = nla_get_u64(nla);
                break;
        case NLA_UINT:
                value = nla_get_uint(nla);
                break;
        case NLA_MSECS:
                value = nla_get_u64(nla);
                break;
        case NLA_BINARY:
                value = nla_len(nla);
                break;
        case NLA_BE16:
                value = ntohs(nla_get_be16(nla));
                break;
        case NLA_BE32:
                value = ntohl(nla_get_be32(nla));
                break;
        default:
                return -EINVAL;
        }

        nla_get_range_unsigned(pt, &range);

        if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG &&
            pt->type == NLA_BINARY && value > range.max) {
                pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
                                    current->comm, pt->type);
                if (validate & NL_VALIDATE_STRICT_ATTRS) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "invalid attribute length");
                        return -EINVAL;
                }

                /* this assumes min <= max (don't validate against min) */
                return 0;
        }

        if (value < range.min || value > range.max) {
                bool binary = pt->type == NLA_BINARY;

                if (binary)
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "binary attribute size out of range");
                else
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "integer out of range");

                return -ERANGE;
        }

        return 0;
}

void nla_get_range_signed(const struct nla_policy *pt,
                          struct netlink_range_validation_signed *range)
{
        switch (pt->type) {
        case NLA_S8:
                range->min = S8_MIN;
                range->max = S8_MAX;
                break;
        case NLA_S16:
                range->min = S16_MIN;
                range->max = S16_MAX;
                break;
        case NLA_S32:
                range->min = S32_MIN;
                range->max = S32_MAX;
                break;
        case NLA_S64:
        case NLA_SINT:
                range->min = S64_MIN;
                range->max = S64_MAX;
                break;
        default:
                WARN_ON_ONCE(1);
                return;
        }

        switch (pt->validation_type) {
        case NLA_VALIDATE_RANGE:
                range->min = pt->min;
                range->max = pt->max;
                break;
        case NLA_VALIDATE_RANGE_PTR:
                *range = *pt->range_signed;
                break;
        case NLA_VALIDATE_MIN:
                range->min = pt->min;
                break;
        case NLA_VALIDATE_MAX:
                range->max = pt->max;
                break;
        default:
                break;
        }
}

static int nla_validate_int_range_signed(const struct nla_policy *pt,
                                         const struct nlattr *nla,
                                         struct netlink_ext_ack *extack)
{
        struct netlink_range_validation_signed range;
        s64 value;

        switch (pt->type) {
        case NLA_S8:
                value = nla_get_s8(nla);
                break;
        case NLA_S16:
                value = nla_get_s16(nla);
                break;
        case NLA_S32:
                value = nla_get_s32(nla);
                break;
        case NLA_S64:
                value = nla_get_s64(nla);
                break;
        case NLA_SINT:
                value = nla_get_sint(nla);
                break;
        default:
                return -EINVAL;
        }

        nla_get_range_signed(pt, &range);

        if (value < range.min || value > range.max) {
                NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                        "integer out of range");
                return -ERANGE;
        }

        return 0;
}

static int nla_validate_int_range(const struct nla_policy *pt,
                                  const struct nlattr *nla,
                                  struct netlink_ext_ack *extack,
                                  unsigned int validate)
{
        switch (pt->type) {
        case NLA_U8:
        case NLA_U16:
        case NLA_U32:
        case NLA_U64:
        case NLA_UINT:
        case NLA_MSECS:
        case NLA_BINARY:
        case NLA_BE16:
        case NLA_BE32:
                return nla_validate_range_unsigned(pt, nla, extack, validate);
        case NLA_S8:
        case NLA_S16:
        case NLA_S32:
        case NLA_S64:
        case NLA_SINT:
                return nla_validate_int_range_signed(pt, nla, extack);
        default:
                WARN_ON(1);
                return -EINVAL;
        }
}

static int nla_validate_mask(const struct nla_policy *pt,
                             const struct nlattr *nla,
                             struct netlink_ext_ack *extack)
{
        u64 value;

        switch (pt->type) {
        case NLA_U8:
                value = nla_get_u8(nla);
                break;
        case NLA_U16:
                value = nla_get_u16(nla);
                break;
        case NLA_U32:
                value = nla_get_u32(nla);
                break;
        case NLA_U64:
                value = nla_get_u64(nla);
                break;
        case NLA_UINT:
                value = nla_get_uint(nla);
                break;
        case NLA_BE16:
                value = ntohs(nla_get_be16(nla));
                break;
        case NLA_BE32:
                value = ntohl(nla_get_be32(nla));
                break;
        default:
                return -EINVAL;
        }

        if (value & ~(u64)pt->mask) {
                NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set");
                return -EINVAL;
        }

        return 0;
}

static int validate_nla(const struct nlattr *nla, int maxtype,
                        const struct nla_policy *policy, unsigned int validate,
                        struct netlink_ext_ack *extack, unsigned int depth)
{
        u16 strict_start_type = policy[0].strict_start_type;
        const struct nla_policy *pt;
        int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
        int err = -ERANGE;

        if (strict_start_type && type >= strict_start_type)
                validate |= NL_VALIDATE_STRICT;

        if (type <= 0 || type > maxtype)
                return 0;

        type = array_index_nospec(type, maxtype + 1);
        pt = &policy[type];

        BUG_ON(pt->type > NLA_TYPE_MAX);

        if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
                pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
                                    current->comm, type);
                if (validate & NL_VALIDATE_STRICT_ATTRS) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "invalid attribute length");
                        return -EINVAL;
                }
        }

        if (validate & NL_VALIDATE_NESTED) {
                if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
                    !(nla->nla_type & NLA_F_NESTED)) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "NLA_F_NESTED is missing");
                        return -EINVAL;
                }
                if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
                    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "NLA_F_NESTED not expected");
                        return -EINVAL;
                }
        }

        switch (pt->type) {
        case NLA_REJECT:
                if (extack && pt->reject_message) {
                        NL_SET_BAD_ATTR(extack, nla);
                        extack->_msg = pt->reject_message;
                        return -EINVAL;
                }
                err = -EINVAL;
                goto out_err;

        case NLA_FLAG:
                if (attrlen > 0)
                        goto out_err;
                break;

        case NLA_SINT:
        case NLA_UINT:
                if (attrlen != sizeof(u32) && attrlen != sizeof(u64)) {
                        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                                "invalid attribute length");
                        return -EINVAL;
                }
                break;

        case NLA_BITFIELD32:
                if (attrlen != sizeof(struct nla_bitfield32))
                        goto out_err;

                err = validate_nla_bitfield32(nla, pt->bitfield32_valid);
                if (err)
                        goto out_err;
                break;

        case NLA_NUL_STRING:
                if (pt->len)
                        minlen = min_t(int, attrlen, pt->len + 1);
                else
                        minlen = attrlen;

                if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) {
                        err = -EINVAL;
                        goto out_err;
                }
                fallthrough;

        case NLA_STRING:
                if (attrlen < 1)
                        goto out_err;

                if (pt->len) {
                        char *buf = nla_data(nla);

                        if (buf[attrlen - 1] == '\0')
                                attrlen--;

                        if (attrlen > pt->len)
                                goto out_err;
                }
                break;

        case NLA_BINARY:
                if (pt->len && attrlen > pt->len)
                        goto out_err;
                break;

        case NLA_NESTED:
                /* a nested attributes is allowed to be empty; if its not,
                 * it must have a size of at least NLA_HDRLEN.
                 */
                if (attrlen == 0)
                        break;
                if (attrlen < NLA_HDRLEN)
                        goto out_err;
                if (pt->nested_policy) {
                        err = __nla_validate_parse(nla_data(nla), nla_len(nla),
                                                   pt->len, pt->nested_policy,
                                                   validate, extack, NULL,
                                                   depth + 1);
                        if (err < 0) {
                                /*
                                 * return directly to preserve the inner
                                 * error message/attribute pointer
                                 */
                                return err;
                        }
                }
                break;
        case NLA_NESTED_ARRAY:
                /* a nested array attribute is allowed to be empty; if its not,
                 * it must have a size of at least NLA_HDRLEN.
                 */
                if (attrlen == 0)
                        break;
                if (attrlen < NLA_HDRLEN)
                        goto out_err;
                if (pt->nested_policy) {
                        int err;

                        err = nla_validate_array(nla_data(nla), nla_len(nla),
                                                 pt->len, pt->nested_policy,
                                                 extack, validate, depth);
                        if (err < 0) {
                                /*
                                 * return directly to preserve the inner
                                 * error message/attribute pointer
                                 */
                                return err;
                        }
                }
                break;

        case NLA_UNSPEC:
                if (validate & NL_VALIDATE_UNSPEC) {
                        NL_SET_ERR_MSG_ATTR(extack, nla,
                                            "Unsupported attribute");
                        return -EINVAL;
                }
                if (attrlen < pt->len)
                        goto out_err;
                break;

        default:
                if (pt->len)
                        minlen = pt->len;
                else
                        minlen = nla_attr_minlen[pt->type];

                if (attrlen < minlen)
                        goto out_err;
        }

        /* further validation */
        switch (pt->validation_type) {
        case NLA_VALIDATE_NONE:
                /* nothing to do */
                break;
        case NLA_VALIDATE_RANGE_PTR:
        case NLA_VALIDATE_RANGE:
        case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
        case NLA_VALIDATE_MIN:
        case NLA_VALIDATE_MAX:
                err = nla_validate_int_range(pt, nla, extack, validate);
                if (err)
                        return err;
                break;
        case NLA_VALIDATE_MASK:
                err = nla_validate_mask(pt, nla, extack);
                if (err)
                        return err;
                break;
        case NLA_VALIDATE_FUNCTION:
                if (pt->validate) {
                        err = pt->validate(nla, extack);
                        if (err)
                                return err;
                }
                break;
        }

        return 0;
out_err:
        NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
                                "Attribute failed policy validation");
        return err;
}

static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
                                const struct nla_policy *policy,
                                unsigned int validate,
                                struct netlink_ext_ack *extack,
                                struct nlattr **tb, unsigned int depth)
{
        const struct nlattr *nla;
        int rem;

        if (depth >= MAX_POLICY_RECURSION_DEPTH) {
                NL_SET_ERR_MSG(extack,
                               "allowed policy recursion depth exceeded");
                return -EINVAL;
        }

        if (tb)
                memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));

        nla_for_each_attr(nla, head, len, rem) {
                u16 type = nla_type(nla);

                if (type == 0 || type > maxtype) {
                        if (validate & NL_VALIDATE_MAXTYPE) {
                                NL_SET_ERR_MSG_ATTR(extack, nla,
                                                    "Unknown attribute type");
                                return -EINVAL;
                        }
                        continue;
                }
                type = array_index_nospec(type, maxtype + 1);
                if (policy) {
                        int err = validate_nla(nla, maxtype, policy,
                                               validate, extack, depth);

                        if (err < 0)
                                return err;
                }

                if (tb)
                        tb[type] = (struct nlattr *)nla;
        }

        if (unlikely(rem > 0)) {
                pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
                                    rem, current->comm);
                NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
                if (validate & NL_VALIDATE_TRAILING)
                        return -EINVAL;
        }

        return 0;
}

/**
 * __nla_validate - Validate a stream of attributes
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @maxtype: maximum attribute type to be expected
 * @policy: validation policy
 * @validate: validation strictness
 * @extack: extended ACK report struct
 *
 * Validates all attributes in the specified attribute stream against the
 * specified policy. Validation depends on the validate flags passed, see
 * &enum netlink_validation for more details on that.
 * See documentation of struct nla_policy for more details.
 *
 * Returns 0 on success or a negative error code.
 */
int __nla_validate(const struct nlattr *head, int len, int maxtype,
                   const struct nla_policy *policy, unsigned int validate,
                   struct netlink_ext_ack *extack)
{
        return __nla_validate_parse(head, len, maxtype, policy, validate,
                                    extack, NULL, 0);
}
EXPORT_SYMBOL(__nla_validate);

/**
 * nla_policy_len - Determine the max. length of a policy
 * @p: policy to use
 * @n: number of policies
 *
 * Determines the max. length of the policy.  It is currently used
 * to allocated Netlink buffers roughly the size of the actual
 * message.
 *
 * Returns 0 on success or a negative error code.
 */
int
nla_policy_len(const struct nla_policy *p, int n)
{
        int i, len = 0;

        for (i = 0; i < n; i++, p++) {
                if (p->len)
                        len += nla_total_size(p->len);
                else if (nla_attr_len[p->type])
                        len += nla_total_size(nla_attr_len[p->type]);
                else if (nla_attr_minlen[p->type])
                        len += nla_total_size(nla_attr_minlen[p->type]);
        }

        return len;
}
EXPORT_SYMBOL(nla_policy_len);

/**
 * __nla_parse - Parse a stream of attributes into a tb buffer
 * @tb: destination array with maxtype+1 elements
 * @maxtype: maximum attribute type to be expected
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @policy: validation policy
 * @validate: validation strictness
 * @extack: extended ACK pointer
 *
 * Parses a stream of attributes and stores a pointer to each attribute in
 * the tb array accessible via the attribute type.
 * Validation is controlled by the @validate parameter.
 *
 * Returns 0 on success or a negative error code.
 */
int __nla_parse(struct nlattr **tb, int maxtype,
                const struct nlattr *head, int len,
                const struct nla_policy *policy, unsigned int validate,
                struct netlink_ext_ack *extack)
{
        return __nla_validate_parse(head, len, maxtype, policy, validate,
                                    extack, tb, 0);
}
EXPORT_SYMBOL(__nla_parse);

/**
 * nla_find - Find a specific attribute in a stream of attributes
 * @head: head of attribute stream
 * @len: length of attribute stream
 * @attrtype: type of attribute to look for
 *
 * Returns the first attribute in the stream matching the specified type.
 */
struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
{
        const struct nlattr *nla;
        int rem;

        nla_for_each_attr(nla, head, len, rem)
                if (nla_type(nla) == attrtype)
                        return (struct nlattr *)nla;

        return NULL;
}
EXPORT_SYMBOL(nla_find);

/**
 * nla_strscpy - Copy string attribute payload into a sized buffer
 * @dst: Where to copy the string to.
 * @nla: Attribute to copy the string from.
 * @dstsize: Size of destination buffer.
 *
 * Copies at most dstsize - 1 bytes into the destination buffer.
 * Unlike strscpy() the destination buffer is always padded out.
 *
 * Return:
 * * srclen - Returns @nla length (not including the trailing %NUL).
 * * -E2BIG - If @dstsize is 0 or greater than U16_MAX or @nla length greater
 *            than @dstsize.
 */
ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize)
{
        size_t srclen = nla_len(nla);
        char *src = nla_data(nla);
        ssize_t ret;
        size_t len;

        if (dstsize == 0 || WARN_ON_ONCE(dstsize > U16_MAX))
                return -E2BIG;

        if (srclen > 0 && src[srclen - 1] == '\0')
                srclen--;

        if (srclen >= dstsize) {
                len = dstsize - 1;
                ret = -E2BIG;
        } else {
                len = srclen;
                ret = len;
        }

        memcpy(dst, src, len);
        /* Zero pad end of dst. */
        memset(dst + len, 0, dstsize - len);

        return ret;
}
EXPORT_SYMBOL(nla_strscpy);

/**
 * nla_strdup - Copy string attribute payload into a newly allocated buffer
 * @nla: attribute to copy the string from
 * @flags: the type of memory to allocate (see kmalloc).
 *
 * Returns a pointer to the allocated buffer or NULL on error.
 */
char *nla_strdup(const struct nlattr *nla, gfp_t flags)
{
        size_t srclen = nla_len(nla);
        char *src = nla_data(nla), *dst;

        if (srclen > 0 && src[srclen - 1] == '\0')
                srclen--;

        dst = kmalloc(srclen + 1, flags);
        if (dst != NULL) {
                memcpy(dst, src, srclen);
                dst[srclen] = '\0';
        }
        return dst;
}
EXPORT_SYMBOL(nla_strdup);

/**
 * nla_memcpy - Copy a netlink attribute into another memory area
 * @dest: where to copy to memcpy
 * @src: netlink attribute to copy from
 * @count: size of the destination area
 *
 * Note: The number of bytes copied is limited by the length of
 *       attribute's payload. memcpy
 *
 * Returns the number of bytes copied.
 */
int nla_memcpy(void *dest, const struct nlattr *src, int count)
{
        int minlen = min_t(int, count, nla_len(src));

        memcpy(dest, nla_data(src), minlen);
        if (count > minlen)
                memset(dest + minlen, 0, count - minlen);

        return minlen;
}
EXPORT_SYMBOL(nla_memcpy);

/**
 * nla_memcmp - Compare an attribute with sized memory area
 * @nla: netlink attribute
 * @data: memory area
 * @size: size of memory area
 */
int nla_memcmp(const struct nlattr *nla, const void *data,
                             size_t size)
{
        int d = nla_len(nla) - size;

        if (d == 0)
                d = memcmp(nla_data(nla), data, size);

        return d;
}
EXPORT_SYMBOL(nla_memcmp);

/**
 * nla_strcmp - Compare a string attribute against a string
 * @nla: netlink string attribute
 * @str: another string
 */
int nla_strcmp(const struct nlattr *nla, const char *str)
{
        int len = strlen(str);
        char *buf = nla_data(nla);
        int attrlen = nla_len(nla);
        int d;

        while (attrlen > 0 && buf[attrlen - 1] == '\0')
                attrlen--;

        d = attrlen - len;
        if (d == 0)
                d = memcmp(nla_data(nla), str, len);

        return d;
}
EXPORT_SYMBOL(nla_strcmp);

#ifdef CONFIG_NET
/**
 * __nla_reserve - reserve room for attribute on the skb
 * @skb: socket buffer to reserve room on
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 *
 * Adds a netlink attribute header to a socket buffer and reserves
 * room for the payload but does not copy it.
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the attribute header and payload.
 */
struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
{
        struct nlattr *nla;

        nla = skb_put(skb, nla_total_size(attrlen));
        nla->nla_type = attrtype;
        nla->nla_len = nla_attr_size(attrlen);

        memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));

        return nla;
}
EXPORT_SYMBOL(__nla_reserve);

/**
 * __nla_reserve_64bit - reserve room for attribute on the skb and align it
 * @skb: socket buffer to reserve room on
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @padattr: attribute type for the padding
 *
 * Adds a netlink attribute header to a socket buffer and reserves
 * room for the payload but does not copy it. It also ensure that this
 * attribute will have a 64-bit aligned nla_data() area.
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the attribute header and payload.
 */
struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
                                   int attrlen, int padattr)
{
        nla_align_64bit(skb, padattr);

        return __nla_reserve(skb, attrtype, attrlen);
}
EXPORT_SYMBOL(__nla_reserve_64bit);

/**
 * __nla_reserve_nohdr - reserve room for attribute without header
 * @skb: socket buffer to reserve room on
 * @attrlen: length of attribute payload
 *
 * Reserves room for attribute payload without a header.
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the payload.
 */
void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
{
        return skb_put_zero(skb, NLA_ALIGN(attrlen));
}
EXPORT_SYMBOL(__nla_reserve_nohdr);

/**
 * nla_reserve - reserve room for attribute on the skb
 * @skb: socket buffer to reserve room on
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 *
 * Adds a netlink attribute header to a socket buffer and reserves
 * room for the payload but does not copy it.
 *
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the attribute header and payload.
 */
struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
{
        if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
                return NULL;

        return __nla_reserve(skb, attrtype, attrlen);
}
EXPORT_SYMBOL(nla_reserve);

/**
 * nla_reserve_64bit - reserve room for attribute on the skb and align it
 * @skb: socket buffer to reserve room on
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @padattr: attribute type for the padding
 *
 * Adds a netlink attribute header to a socket buffer and reserves
 * room for the payload but does not copy it. It also ensure that this
 * attribute will have a 64-bit aligned nla_data() area.
 *
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the attribute header and payload.
 */
struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen,
                                 int padattr)
{
        size_t len;

        if (nla_need_padding_for_64bit(skb))
                len = nla_total_size_64bit(attrlen);
        else
                len = nla_total_size(attrlen);
        if (unlikely(skb_tailroom(skb) < len))
                return NULL;

        return __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
}
EXPORT_SYMBOL(nla_reserve_64bit);

/**
 * nla_reserve_nohdr - reserve room for attribute without header
 * @skb: socket buffer to reserve room on
 * @attrlen: length of attribute payload
 *
 * Reserves room for attribute payload without a header.
 *
 * Returns NULL if the tailroom of the skb is insufficient to store
 * the attribute payload.
 */
void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
{
        if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
                return NULL;

        return __nla_reserve_nohdr(skb, attrlen);
}
EXPORT_SYMBOL(nla_reserve_nohdr);

/**
 * __nla_put - Add a netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the attribute header and payload.
 */
void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
                             const void *data)
{
        struct nlattr *nla;

        nla = __nla_reserve(skb, attrtype, attrlen);
        memcpy(nla_data(nla), data, attrlen);
}
EXPORT_SYMBOL(__nla_put);

/**
 * __nla_put_64bit - Add a netlink attribute to a socket buffer and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 * @padattr: attribute type for the padding
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the attribute header and payload.
 */
void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
                     const void *data, int padattr)
{
        struct nlattr *nla;

        nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
        memcpy(nla_data(nla), data, attrlen);
}
EXPORT_SYMBOL(__nla_put_64bit);

/**
 * __nla_put_nohdr - Add a netlink attribute without header
 * @skb: socket buffer to add attribute to
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 *
 * The caller is responsible to ensure that the skb provides enough
 * tailroom for the attribute payload.
 */
void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
{
        void *start;

        start = __nla_reserve_nohdr(skb, attrlen);
        memcpy(start, data, attrlen);
}
EXPORT_SYMBOL(__nla_put_nohdr);

/**
 * nla_put - Add a netlink attribute to a socket buffer
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 *
 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
 * the attribute header and payload.
 */
int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
        if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
                return -EMSGSIZE;

        __nla_put(skb, attrtype, attrlen, data);
        return 0;
}
EXPORT_SYMBOL(nla_put);

/**
 * nla_put_64bit - Add a netlink attribute to a socket buffer and align it
 * @skb: socket buffer to add attribute to
 * @attrtype: attribute type
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 * @padattr: attribute type for the padding
 *
 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
 * the attribute header and payload.
 */
int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
                  const void *data, int padattr)
{
        size_t len;

        if (nla_need_padding_for_64bit(skb))
                len = nla_total_size_64bit(attrlen);
        else
                len = nla_total_size(attrlen);
        if (unlikely(skb_tailroom(skb) < len))
                return -EMSGSIZE;

        __nla_put_64bit(skb, attrtype, attrlen, data, padattr);
        return 0;
}
EXPORT_SYMBOL(nla_put_64bit);

/**
 * nla_put_nohdr - Add a netlink attribute without header
 * @skb: socket buffer to add attribute to
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 *
 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
 * the attribute payload.
 */
int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
{
        if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
                return -EMSGSIZE;

        __nla_put_nohdr(skb, attrlen, data);
        return 0;
}
EXPORT_SYMBOL(nla_put_nohdr);

/**
 * nla_append - Add a netlink attribute without header or padding
 * @skb: socket buffer to add attribute to
 * @attrlen: length of attribute payload
 * @data: head of attribute payload
 *
 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
 * the attribute payload.
 */
int nla_append(struct sk_buff *skb, int attrlen, const void *data)
{
        if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
                return -EMSGSIZE;

        skb_put_data(skb, data, attrlen);
        return 0;
}
EXPORT_SYMBOL(nla_append);
#endif



























































































































































































































































































































































































































































































    1 



































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
// SPDX-License-Identifier: GPL-2.0+
/*
 *  HID driver for UC-Logic devices not fully compliant with HID standard
 *
 *  Copyright (c) 2010-2014 Nikolai Kondrashov
 *  Copyright (c) 2013 Martin Rusko
 */

/*
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/timer.h>
#include "usbhid/usbhid.h"
#include "hid-uclogic-params.h"

#include "hid-ids.h"

/**
 * uclogic_inrange_timeout - handle pen in-range state timeout.
 * Emulate input events normally generated when pen goes out of range for
 * tablets which don't report that.
 *
 * @t:        The timer the timeout handler is attached to, stored in a struct
 *        uclogic_drvdata.
 */
static void uclogic_inrange_timeout(struct timer_list *t)
{
        struct uclogic_drvdata *drvdata = from_timer(drvdata, t,
                                                        inrange_timer);
        struct input_dev *input = drvdata->pen_input;

        if (input == NULL)
                return;
        input_report_abs(input, ABS_PRESSURE, 0);
        /* If BTN_TOUCH state is changing */
        if (test_bit(BTN_TOUCH, input->key)) {
                input_event(input, EV_MSC, MSC_SCAN,
                                /* Digitizer Tip Switch usage */
                                0xd0042);
                input_report_key(input, BTN_TOUCH, 0);
        }
        input_report_key(input, BTN_TOOL_PEN, 0);
        input_sync(input);
}

static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                                        unsigned int *rsize)
{
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);

        if (drvdata->desc_ptr != NULL) {
                rdesc = drvdata->desc_ptr;
                *rsize = drvdata->desc_size;
        }
        return rdesc;
}

static int uclogic_input_mapping(struct hid_device *hdev,
                                 struct hid_input *hi,
                                 struct hid_field *field,
                                 struct hid_usage *usage,
                                 unsigned long **bit,
                                 int *max)
{
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
        struct uclogic_params *params = &drvdata->params;

        /* Discard invalid pen usages */
        if (params->pen.usage_invalid && (field->application == HID_DG_PEN))
                return -1;

        /* Let hid-core decide what to do */
        return 0;
}

static int uclogic_input_configured(struct hid_device *hdev,
                struct hid_input *hi)
{
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
        struct uclogic_params *params = &drvdata->params;
        const char *suffix = NULL;
        struct hid_field *field;
        size_t i;
        const struct uclogic_params_frame *frame;

        /* no report associated (HID_QUIRK_MULTI_INPUT not set) */
        if (!hi->report)
                return 0;

        /*
         * If this is the input corresponding to the pen report
         * in need of tweaking.
         */
        if (hi->report->id == params->pen.id) {
                /* Remember the input device so we can simulate events */
                drvdata->pen_input = hi->input;
        }

        /* If it's one of the frame devices */
        for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
                frame = &params->frame_list[i];
                if (hi->report->id == frame->id) {
                        /* Assign custom suffix, if any */
                        suffix = frame->suffix;
                        /*
                         * Disable EV_MSC reports for touch ring interfaces to
                         * make the Wacom driver pickup touch ring extents
                         */
                        if (frame->touch_byte > 0)
                                __clear_bit(EV_MSC, hi->input->evbit);
                }
        }

        if (!suffix) {
                field = hi->report->field[0];

                switch (field->application) {
                case HID_GD_KEYBOARD:
                        suffix = "Keyboard";
                        break;
                case HID_GD_MOUSE:
                        suffix = "Mouse";
                        break;
                case HID_GD_KEYPAD:
                        suffix = "Pad";
                        break;
                case HID_DG_PEN:
                case HID_DG_DIGITIZER:
                        suffix = "Pen";
                        break;
                case HID_CP_CONSUMER_CONTROL:
                        suffix = "Consumer Control";
                        break;
                case HID_GD_SYSTEM_CONTROL:
                        suffix = "System Control";
                        break;
                }
        }

        if (suffix)
                hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
                                                 "%s %s", hdev->name, suffix);

        return 0;
}

static int uclogic_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int rc;
        struct uclogic_drvdata *drvdata = NULL;
        bool params_initialized = false;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        /*
         * libinput requires the pad interface to be on a different node
         * than the pen, so use QUIRK_MULTI_INPUT for all tablets.
         */
        hdev->quirks |= HID_QUIRK_MULTI_INPUT;
        hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE;

        /* Allocate and assign driver data */
        drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
        if (drvdata == NULL) {
                rc = -ENOMEM;
                goto failure;
        }
        timer_setup(&drvdata->inrange_timer, uclogic_inrange_timeout, 0);
        drvdata->re_state = U8_MAX;
        drvdata->quirks = id->driver_data;
        hid_set_drvdata(hdev, drvdata);

        /* Initialize the device and retrieve interface parameters */
        rc = uclogic_params_init(&drvdata->params, hdev);
        if (rc != 0) {
                hid_err(hdev, "failed probing parameters: %d\n", rc);
                goto failure;
        }
        params_initialized = true;
        hid_dbg(hdev, "parameters:\n");
        uclogic_params_hid_dbg(hdev, &drvdata->params);
        if (drvdata->params.invalid) {
                hid_info(hdev, "interface is invalid, ignoring\n");
                rc = -ENODEV;
                goto failure;
        }

        /* Generate replacement report descriptor */
        rc = uclogic_params_get_desc(&drvdata->params,
                                     &drvdata->desc_ptr,
                                     &drvdata->desc_size);
        if (rc) {
                hid_err(hdev,
                        "failed generating replacement report descriptor: %d\n",
                        rc);
                goto failure;
        }

        rc = hid_parse(hdev);
        if (rc) {
                hid_err(hdev, "parse failed\n");
                goto failure;
        }

        rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (rc) {
                hid_err(hdev, "hw start failed\n");
                goto failure;
        }

        return 0;
failure:
        /* Assume "remove" might not be called if "probe" failed */
        if (params_initialized)
                uclogic_params_cleanup(&drvdata->params);
        return rc;
}

#ifdef CONFIG_PM
static int uclogic_resume(struct hid_device *hdev)
{
        int rc;
        struct uclogic_params params;

        /* Re-initialize the device, but discard parameters */
        rc = uclogic_params_init(&params, hdev);
        if (rc != 0)
                hid_err(hdev, "failed to re-initialize the device\n");
        else
                uclogic_params_cleanup(&params);

        return rc;
}
#endif

/**
 * uclogic_exec_event_hook - if the received event is hooked schedules the
 * associated work.
 *
 * @p:                Tablet interface report parameters.
 * @event:        Raw event.
 * @size:        The size of event.
 *
 * Returns:
 *        Whether the event was hooked or not.
 */
static bool uclogic_exec_event_hook(struct uclogic_params *p, u8 *event, int size)
{
        struct uclogic_raw_event_hook *curr;

        if (!p->event_hooks)
                return false;

        list_for_each_entry(curr, &p->event_hooks->list, list) {
                if (curr->size == size && memcmp(curr->event, event, size) == 0) {
                        schedule_work(&curr->work);
                        return true;
                }
        }

        return false;
}

/**
 * uclogic_raw_event_pen - handle raw pen events (pen HID reports).
 *
 * @drvdata:        Driver data.
 * @data:        Report data buffer, can be modified.
 * @size:        Report data size, bytes.
 *
 * Returns:
 *        Negative value on error (stops event delivery), zero for success.
 */
static int uclogic_raw_event_pen(struct uclogic_drvdata *drvdata,
                                        u8 *data, int size)
{
        struct uclogic_params_pen *pen = &drvdata->params.pen;

        WARN_ON(drvdata == NULL);
        WARN_ON(data == NULL && size != 0);

        /* If in-range reports are inverted */
        if (pen->inrange ==
                UCLOGIC_PARAMS_PEN_INRANGE_INVERTED) {
                /* Invert the in-range bit */
                data[1] ^= 0x40;
        }
        /*
         * If report contains fragmented high-resolution pen
         * coordinates
         */
        if (size >= 10 && pen->fragmented_hires) {
                u8 pressure_low_byte;
                u8 pressure_high_byte;

                /* Lift pressure bytes */
                pressure_low_byte = data[6];
                pressure_high_byte = data[7];
                /*
                 * Move Y coord to make space for high-order X
                 * coord byte
                 */
                data[6] = data[5];
                data[5] = data[4];
                /* Move high-order X coord byte */
                data[4] = data[8];
                /* Move high-order Y coord byte */
                data[7] = data[9];
                /* Place pressure bytes */
                data[8] = pressure_low_byte;
                data[9] = pressure_high_byte;
        }
        /* If we need to emulate in-range detection */
        if (pen->inrange == UCLOGIC_PARAMS_PEN_INRANGE_NONE) {
                /* Set in-range bit */
                data[1] |= 0x40;
                /* (Re-)start in-range timeout */
                mod_timer(&drvdata->inrange_timer,
                                jiffies + msecs_to_jiffies(100));
        }
        /* If we report tilt and Y direction is flipped */
        if (size >= 12 && pen->tilt_y_flipped)
                data[11] = -data[11];

        return 0;
}

/**
 * uclogic_raw_event_frame - handle raw frame events (frame HID reports).
 *
 * @drvdata:        Driver data.
 * @frame:        The parameters of the frame controls to handle.
 * @data:        Report data buffer, can be modified.
 * @size:        Report data size, bytes.
 *
 * Returns:
 *        Negative value on error (stops event delivery), zero for success.
 */
static int uclogic_raw_event_frame(
                struct uclogic_drvdata *drvdata,
                const struct uclogic_params_frame *frame,
                u8 *data, int size)
{
        WARN_ON(drvdata == NULL);
        WARN_ON(data == NULL && size != 0);

        /* If need to, and can, set pad device ID for Wacom drivers */
        if (frame->dev_id_byte > 0 && frame->dev_id_byte < size) {
                /* If we also have a touch ring and the finger left it */
                if (frame->touch_byte > 0 && frame->touch_byte < size &&
                    data[frame->touch_byte] == 0) {
                        data[frame->dev_id_byte] = 0;
                } else {
                        data[frame->dev_id_byte] = 0xf;
                }
        }

        /* If need to, and can, read rotary encoder state change */
        if (frame->re_lsb > 0 && frame->re_lsb / 8 < size) {
                unsigned int byte = frame->re_lsb / 8;
                unsigned int bit = frame->re_lsb % 8;

                u8 change;
                u8 prev_state = drvdata->re_state;
                /* Read Gray-coded state */
                u8 state = (data[byte] >> bit) & 0x3;
                /* Encode state change into 2-bit signed integer */
                if ((prev_state == 1 && state == 0) ||
                    (prev_state == 2 && state == 3)) {
                        change = 1;
                } else if ((prev_state == 2 && state == 0) ||
                           (prev_state == 1 && state == 3)) {
                        change = 3;
                } else {
                        change = 0;
                }
                /* Write change */
                data[byte] = (data[byte] & ~((u8)3 << bit)) |
                                (change << bit);
                /* Remember state */
                drvdata->re_state = state;
        }

        /* If need to, and can, transform the touch ring reports */
        if (frame->touch_byte > 0 && frame->touch_byte < size) {
                __s8 value = data[frame->touch_byte];

                if (value != 0) {
                        if (frame->touch_flip_at != 0) {
                                value = frame->touch_flip_at - value;
                                if (value <= 0)
                                        value = frame->touch_max + value;
                        }
                        data[frame->touch_byte] = value - 1;
                }
        }

        /* If need to, and can, transform the bitmap dial reports */
        if (frame->bitmap_dial_byte > 0 && frame->bitmap_dial_byte < size) {
                if (data[frame->bitmap_dial_byte] == 2)
                        data[frame->bitmap_dial_byte] = -1;
        }

        return 0;
}

static int uclogic_raw_event(struct hid_device *hdev,
                                struct hid_report *report,
                                u8 *data, int size)
{
        unsigned int report_id = report->id;
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);
        struct uclogic_params *params = &drvdata->params;
        struct uclogic_params_pen_subreport *subreport;
        struct uclogic_params_pen_subreport *subreport_list_end;
        size_t i;

        /* Do not handle anything but input reports */
        if (report->type != HID_INPUT_REPORT)
                return 0;

        if (uclogic_exec_event_hook(params, data, size))
                return 0;

        while (true) {
                /* Tweak pen reports, if necessary */
                if ((report_id == params->pen.id) && (size >= 2)) {
                        subreport_list_end =
                                params->pen.subreport_list +
                                ARRAY_SIZE(params->pen.subreport_list);
                        /* Try to match a subreport */
                        for (subreport = params->pen.subreport_list;
                             subreport < subreport_list_end; subreport++) {
                                if (subreport->value != 0 &&
                                    subreport->value == data[1]) {
                                        break;
                                }
                        }
                        /* If a subreport matched */
                        if (subreport < subreport_list_end) {
                                /* Change to subreport ID, and restart */
                                report_id = data[0] = subreport->id;
                                continue;
                        } else {
                                return uclogic_raw_event_pen(drvdata, data, size);
                        }
                }

                /* Tweak frame control reports, if necessary */
                for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
                        if (report_id == params->frame_list[i].id) {
                                return uclogic_raw_event_frame(
                                        drvdata, &params->frame_list[i],
                                        data, size);
                        }
                }

                break;
        }

        return 0;
}

static void uclogic_remove(struct hid_device *hdev)
{
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);

        del_timer_sync(&drvdata->inrange_timer);
        hid_hw_stop(hdev);
        kfree(drvdata->desc_ptr);
        uclogic_params_cleanup(&drvdata->params);
}

static const struct hid_device_id uclogic_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
                                USB_DEVICE_ID_HUION_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
                                USB_DEVICE_ID_HUION_TABLET2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TRUST,
                                USB_DEVICE_ID_TRUST_PANORA_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_HUION_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_YIYNOVA_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_81) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_45) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_47) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER,
                                USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER,
                                USB_DEVICE_ID_UGTIZER_TABLET_GT5040) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_PARBLO_A610_PRO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_TABLET_G5) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_TABLET_EX07S) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_TABLET_RAINBOW_CV720) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW),
                .driver_data = UCLOGIC_MOUSE_FRAME_QUIRK | UCLOGIC_BATTERY_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW),
                .driver_data = UCLOGIC_MOUSE_FRAME_QUIRK | UCLOGIC_BATTERY_QUIRK },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06) },
        { }
};
MODULE_DEVICE_TABLE(hid, uclogic_devices);

static struct hid_driver uclogic_driver = {
        .name = "uclogic",
        .id_table = uclogic_devices,
        .probe = uclogic_probe,
        .remove = uclogic_remove,
        .report_fixup = uclogic_report_fixup,
        .raw_event = uclogic_raw_event,
        .input_mapping = uclogic_input_mapping,
        .input_configured = uclogic_input_configured,
#ifdef CONFIG_PM
        .resume                  = uclogic_resume,
        .reset_resume     = uclogic_resume,
#endif
};
module_hid_driver(uclogic_driver);

MODULE_AUTHOR("Martin Rusko");
MODULE_AUTHOR("Nikolai Kondrashov");
MODULE_LICENSE("GPL");

#ifdef CONFIG_HID_KUNIT_TEST
#include "hid-uclogic-core-test.c"
#endif

























































    1 














































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/* SPDX-License-Identifier: GPL-2.0 */
/*
  File: linux/posix_acl.h

  (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
*/


#ifndef __LINUX_POSIX_ACL_H
#define __LINUX_POSIX_ACL_H

#include <linux/bug.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <uapi/linux/posix_acl.h>

struct user_namespace;

struct posix_acl_entry {
        short                        e_tag;
        unsigned short                e_perm;
        union {
                kuid_t                e_uid;
                kgid_t                e_gid;
        };
};

struct posix_acl {
        refcount_t                a_refcount;
        struct rcu_head                a_rcu;
        unsigned int                a_count;
        struct posix_acl_entry        a_entries[];
};

#define FOREACH_ACL_ENTRY(pa, acl, pe) \
        for(pa=(acl)->a_entries, pe=pa+(acl)->a_count; pa<pe; pa++)


/*
 * Duplicate an ACL handle.
 */
static inline struct posix_acl *
posix_acl_dup(struct posix_acl *acl)
{
        if (acl)
                refcount_inc(&acl->a_refcount);
        return acl;
}

/*
 * Free an ACL handle.
 */
static inline void
posix_acl_release(struct posix_acl *acl)
{
        if (acl && refcount_dec_and_test(&acl->a_refcount))
                kfree_rcu(acl, a_rcu);
}


/* posix_acl.c */

extern void posix_acl_init(struct posix_acl *, int);
extern struct posix_acl *posix_acl_alloc(int, gfp_t);
extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);

extern struct posix_acl *get_posix_acl(struct inode *, int);
int set_posix_acl(struct mnt_idmap *, struct dentry *, int,
                  struct posix_acl *);

struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags);

#ifdef CONFIG_FS_POSIX_ACL
int posix_acl_chmod(struct mnt_idmap *, struct dentry *, umode_t);
extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
                struct posix_acl **);
int posix_acl_update_mode(struct mnt_idmap *, struct inode *, umode_t *,
                          struct posix_acl **);

int simple_set_acl(struct mnt_idmap *, struct dentry *,
                   struct posix_acl *, int);
extern int simple_acl_create(struct inode *, struct inode *);

struct posix_acl *get_cached_acl(struct inode *inode, int type);
void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
void forget_cached_acl(struct inode *inode, int type);
void forget_all_cached_acls(struct inode *inode);
int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
int posix_acl_permission(struct mnt_idmap *, struct inode *,
                         const struct posix_acl *, int);

static inline void cache_no_acl(struct inode *inode)
{
        inode->i_acl = NULL;
        inode->i_default_acl = NULL;
}

int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                const char *acl_name, struct posix_acl *kacl);
struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
                              struct dentry *dentry, const char *acl_name);
int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                   const char *acl_name);
int posix_acl_listxattr(struct inode *inode, char **buffer,
                        ssize_t *remaining_size);
#else
static inline int posix_acl_chmod(struct mnt_idmap *idmap,
                                  struct dentry *dentry, umode_t mode)
{
        return 0;
}

#define simple_set_acl                NULL

static inline int simple_acl_create(struct inode *dir, struct inode *inode)
{
        return 0;
}
static inline void cache_no_acl(struct inode *inode)
{
}

static inline int posix_acl_create(struct inode *inode, umode_t *mode,
                struct posix_acl **default_acl, struct posix_acl **acl)
{
        *default_acl = *acl = NULL;
        return 0;
}

static inline void forget_all_cached_acls(struct inode *inode)
{
}

static inline int vfs_set_acl(struct mnt_idmap *idmap,
                              struct dentry *dentry, const char *name,
                              struct posix_acl *acl)
{
        return -EOPNOTSUPP;
}

static inline struct posix_acl *vfs_get_acl(struct mnt_idmap *idmap,
                                            struct dentry *dentry,
                                            const char *acl_name)
{
        return ERR_PTR(-EOPNOTSUPP);
}

static inline int vfs_remove_acl(struct mnt_idmap *idmap,
                                 struct dentry *dentry, const char *acl_name)
{
        return -EOPNOTSUPP;
}
static inline int posix_acl_listxattr(struct inode *inode, char **buffer,
                                      ssize_t *remaining_size)
{
        return 0;
}
#endif /* CONFIG_FS_POSIX_ACL */

struct posix_acl *get_inode_acl(struct inode *inode, int type);

#endif  /* __LINUX_POSIX_ACL_H */














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FS_STRUCT_H
#define _LINUX_FS_STRUCT_H

#include <linux/path.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>

struct fs_struct {
        int users;
        spinlock_t lock;
        seqcount_spinlock_t seq;
        int umask;
        int in_exec;
        struct path root, pwd;
} __randomize_layout;

extern struct kmem_cache *fs_cachep;

extern void exit_fs(struct task_struct *);
extern void set_fs_root(struct fs_struct *, const struct path *);
extern void set_fs_pwd(struct fs_struct *, const struct path *);
extern struct fs_struct *copy_fs_struct(struct fs_struct *);
extern void free_fs_struct(struct fs_struct *);
extern int unshare_fs_struct(void);

static inline void get_fs_root(struct fs_struct *fs, struct path *root)
{
        spin_lock(&fs->lock);
        *root = fs->root;
        path_get(root);
        spin_unlock(&fs->lock);
}

static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
{
        spin_lock(&fs->lock);
        *pwd = fs->pwd;
        path_get(pwd);
        spin_unlock(&fs->lock);
}

extern bool current_chrooted(void);

#endif /* _LINUX_FS_STRUCT_H */






















































  133 





   12 






   12 

   12 















   11 
   12 




































   73 




    2 



















   73 




   73 




   74 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_LIST_BL_H
#define _LINUX_LIST_BL_H

#include <linux/list.h>
#include <linux/bit_spinlock.h>

/*
 * Special version of lists, where head of the list has a lock in the lowest
 * bit. This is useful for scalable hash tables without increasing memory
 * footprint overhead.
 *
 * For modification operations, the 0 bit of hlist_bl_head->first
 * pointer must be set.
 *
 * With some small modifications, this can easily be adapted to store several
 * arbitrary bits (not just a single lock bit), if the need arises to store
 * some fast and compact auxiliary data.
 */

#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
#define LIST_BL_LOCKMASK        1UL
#else
#define LIST_BL_LOCKMASK        0UL
#endif

#ifdef CONFIG_DEBUG_LIST
#define LIST_BL_BUG_ON(x) BUG_ON(x)
#else
#define LIST_BL_BUG_ON(x)
#endif


struct hlist_bl_head {
        struct hlist_bl_node *first;
};

struct hlist_bl_node {
        struct hlist_bl_node *next, **pprev;
};
#define INIT_HLIST_BL_HEAD(ptr) \
        ((ptr)->first = NULL)

static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
{
        h->next = NULL;
        h->pprev = NULL;
}

#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)

static inline bool  hlist_bl_unhashed(const struct hlist_bl_node *h)
{
        return !h->pprev;
}

static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
{
        return (struct hlist_bl_node *)
                ((unsigned long)h->first & ~LIST_BL_LOCKMASK);
}

static inline void hlist_bl_set_first(struct hlist_bl_head *h,
                                        struct hlist_bl_node *n)
{
        LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
        LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
                                                        LIST_BL_LOCKMASK);
        h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
}

static inline bool hlist_bl_empty(const struct hlist_bl_head *h)
{
        return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK);
}

static inline void hlist_bl_add_head(struct hlist_bl_node *n,
                                        struct hlist_bl_head *h)
{
        struct hlist_bl_node *first = hlist_bl_first(h);

        n->next = first;
        if (first)
                first->pprev = &n->next;
        n->pprev = &h->first;
        hlist_bl_set_first(h, n);
}

static inline void hlist_bl_add_before(struct hlist_bl_node *n,
                                       struct hlist_bl_node *next)
{
        struct hlist_bl_node **pprev = next->pprev;

        n->pprev = pprev;
        n->next = next;
        next->pprev = &n->next;

        /* pprev may be `first`, so be careful not to lose the lock bit */
        WRITE_ONCE(*pprev,
                   (struct hlist_bl_node *)
                        ((uintptr_t)n | ((uintptr_t)*pprev & LIST_BL_LOCKMASK)));
}

static inline void hlist_bl_add_behind(struct hlist_bl_node *n,
                                       struct hlist_bl_node *prev)
{
        n->next = prev->next;
        n->pprev = &prev->next;
        prev->next = n;

        if (n->next)
                n->next->pprev = &n->next;
}

static inline void __hlist_bl_del(struct hlist_bl_node *n)
{
        struct hlist_bl_node *next = n->next;
        struct hlist_bl_node **pprev = n->pprev;

        LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);

        /* pprev may be `first`, so be careful not to lose the lock bit */
        WRITE_ONCE(*pprev,
                   (struct hlist_bl_node *)
                        ((unsigned long)next |
                         ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
        if (next)
                next->pprev = pprev;
}

static inline void hlist_bl_del(struct hlist_bl_node *n)
{
        __hlist_bl_del(n);
        n->next = LIST_POISON1;
        n->pprev = LIST_POISON2;
}

static inline void hlist_bl_del_init(struct hlist_bl_node *n)
{
        if (!hlist_bl_unhashed(n)) {
                __hlist_bl_del(n);
                INIT_HLIST_BL_NODE(n);
        }
}

static inline void hlist_bl_lock(struct hlist_bl_head *b)
{
        bit_spin_lock(0, (unsigned long *)b);
}

static inline void hlist_bl_unlock(struct hlist_bl_head *b)
{
        __bit_spin_unlock(0, (unsigned long *)b);
}

static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
{
        return bit_spin_is_locked(0, (unsigned long *)b);
}

/**
 * hlist_bl_for_each_entry        - iterate over list of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct hlist_node to use as a loop cursor.
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 *
 */
#define hlist_bl_for_each_entry(tpos, pos, head, member)                \
        for (pos = hlist_bl_first(head);                                \
             pos &&                                                        \
                ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
             pos = pos->next)

/**
 * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct hlist_node to use as a loop cursor.
 * @n:                another &struct hlist_node to use as temporary storage
 * @head:        the head for your list.
 * @member:        the name of the hlist_node within the struct.
 */
#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member)         \
        for (pos = hlist_bl_first(head);                                 \
             pos && ({ n = pos->next; 1; }) &&                                  \
                ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
             pos = n)

#endif























































































































































































































































































































































































































































    1 













































    1 
    1 






























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Pyra driver for Linux
 *
 * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Pyra is a mobile gamer mouse which comes in wired and wireless
 * variant. Wireless variant is not tested.
 * Userland tools can be found at http://sourceforge.net/projects/roccat
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-pyra.h"

static uint profile_numbers[5] = {0, 1, 2, 3, 4};

static void profile_activated(struct pyra_device *pyra,
                unsigned int new_profile)
{
        if (new_profile >= ARRAY_SIZE(pyra->profile_settings))
                return;
        pyra->actual_profile = new_profile;
        pyra->actual_cpi = pyra->profile_settings[pyra->actual_profile].y_cpi;
}

static int pyra_send_control(struct usb_device *usb_dev, int value,
                enum pyra_control_requests request)
{
        struct roccat_common2_control control;

        if ((request == PYRA_CONTROL_REQUEST_PROFILE_SETTINGS ||
                        request == PYRA_CONTROL_REQUEST_PROFILE_BUTTONS) &&
                        (value < 0 || value > 4))
                return -EINVAL;

        control.command = ROCCAT_COMMON_COMMAND_CONTROL;
        control.value = value;
        control.request = request;

        return roccat_common2_send(usb_dev, ROCCAT_COMMON_COMMAND_CONTROL,
                        &control, sizeof(struct roccat_common2_control));
}

static int pyra_get_profile_settings(struct usb_device *usb_dev,
                struct pyra_profile_settings *buf, int number)
{
        int retval;
        retval = pyra_send_control(usb_dev, number,
                        PYRA_CONTROL_REQUEST_PROFILE_SETTINGS);
        if (retval)
                return retval;
        return roccat_common2_receive(usb_dev, PYRA_COMMAND_PROFILE_SETTINGS,
                        buf, PYRA_SIZE_PROFILE_SETTINGS);
}

static int pyra_get_settings(struct usb_device *usb_dev,
                struct pyra_settings *buf)
{
        return roccat_common2_receive(usb_dev, PYRA_COMMAND_SETTINGS,
                        buf, PYRA_SIZE_SETTINGS);
}

static int pyra_set_settings(struct usb_device *usb_dev,
                struct pyra_settings const *settings)
{
        return roccat_common2_send_with_status(usb_dev,
                        PYRA_COMMAND_SETTINGS, settings,
                        PYRA_SIZE_SETTINGS);
}

static ssize_t pyra_sysfs_read(struct file *fp, struct kobject *kobj,
                char *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off >= real_size)
                return 0;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&pyra->pyra_lock);
        retval = roccat_common2_receive(usb_dev, command, buf, real_size);
        mutex_unlock(&pyra->pyra_lock);

        if (retval)
                return retval;

        return real_size;
}

static ssize_t pyra_sysfs_write(struct file *fp, struct kobject *kobj,
                void const *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&pyra->pyra_lock);
        retval = roccat_common2_send_with_status(usb_dev, command, (void *)buf, real_size);
        mutex_unlock(&pyra->pyra_lock);

        if (retval)
                return retval;

        return real_size;
}

#define PYRA_SYSFS_W(thingy, THINGY) \
static ssize_t pyra_sysfs_write_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return pyra_sysfs_write(fp, kobj, buf, off, count, \
                        PYRA_SIZE_ ## THINGY, PYRA_COMMAND_ ## THINGY); \
}

#define PYRA_SYSFS_R(thingy, THINGY) \
static ssize_t pyra_sysfs_read_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return pyra_sysfs_read(fp, kobj, buf, off, count, \
                        PYRA_SIZE_ ## THINGY, PYRA_COMMAND_ ## THINGY); \
}

#define PYRA_SYSFS_RW(thingy, THINGY) \
PYRA_SYSFS_W(thingy, THINGY) \
PYRA_SYSFS_R(thingy, THINGY)

#define PYRA_BIN_ATTRIBUTE_RW(thingy, THINGY) \
PYRA_SYSFS_RW(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0660 }, \
        .size = PYRA_SIZE_ ## THINGY, \
        .read = pyra_sysfs_read_ ## thingy, \
        .write = pyra_sysfs_write_ ## thingy \
}

#define PYRA_BIN_ATTRIBUTE_R(thingy, THINGY) \
PYRA_SYSFS_R(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0440 }, \
        .size = PYRA_SIZE_ ## THINGY, \
        .read = pyra_sysfs_read_ ## thingy, \
}

#define PYRA_BIN_ATTRIBUTE_W(thingy, THINGY) \
PYRA_SYSFS_W(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0220 }, \
        .size = PYRA_SIZE_ ## THINGY, \
        .write = pyra_sysfs_write_ ## thingy \
}

PYRA_BIN_ATTRIBUTE_W(control, CONTROL);
PYRA_BIN_ATTRIBUTE_RW(info, INFO);
PYRA_BIN_ATTRIBUTE_RW(profile_settings, PROFILE_SETTINGS);
PYRA_BIN_ATTRIBUTE_RW(profile_buttons, PROFILE_BUTTONS);

static ssize_t pyra_sysfs_read_profilex_settings(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = pyra_send_control(usb_dev, *(uint *)(attr->private),
                        PYRA_CONTROL_REQUEST_PROFILE_SETTINGS);
        if (retval)
                return retval;

        return pyra_sysfs_read(fp, kobj, buf, off, count,
                        PYRA_SIZE_PROFILE_SETTINGS,
                        PYRA_COMMAND_PROFILE_SETTINGS);
}

static ssize_t pyra_sysfs_read_profilex_buttons(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = pyra_send_control(usb_dev, *(uint *)(attr->private),
                        PYRA_CONTROL_REQUEST_PROFILE_BUTTONS);
        if (retval)
                return retval;

        return pyra_sysfs_read(fp, kobj, buf, off, count,
                        PYRA_SIZE_PROFILE_BUTTONS,
                        PYRA_COMMAND_PROFILE_BUTTONS);
}

#define PROFILE_ATTR(number)                                                \
static struct bin_attribute bin_attr_profile##number##_settings = {        \
        .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = PYRA_SIZE_PROFILE_SETTINGS,                                \
        .read = pyra_sysfs_read_profilex_settings,                        \
        .private = &profile_numbers[number-1],                                \
};                                                                        \
static struct bin_attribute bin_attr_profile##number##_buttons = {        \
        .attr = { .name = "profile" #number "_buttons", .mode = 0440 },        \
        .size = PYRA_SIZE_PROFILE_BUTTONS,                                \
        .read = pyra_sysfs_read_profilex_buttons,                        \
        .private = &profile_numbers[number-1],                                \
};
PROFILE_ATTR(1);
PROFILE_ATTR(2);
PROFILE_ATTR(3);
PROFILE_ATTR(4);
PROFILE_ATTR(5);

static ssize_t pyra_sysfs_write_settings(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct pyra_device *pyra = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval = 0;
        struct pyra_roccat_report roccat_report;
        struct pyra_settings const *settings;

        if (off != 0 || count != PYRA_SIZE_SETTINGS)
                return -EINVAL;

        settings = (struct pyra_settings const *)buf;
        if (settings->startup_profile >= ARRAY_SIZE(pyra->profile_settings))
                return -EINVAL;

        mutex_lock(&pyra->pyra_lock);

        retval = pyra_set_settings(usb_dev, settings);
        if (retval) {
                mutex_unlock(&pyra->pyra_lock);
                return retval;
        }

        profile_activated(pyra, settings->startup_profile);

        roccat_report.type = PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2;
        roccat_report.value = settings->startup_profile + 1;
        roccat_report.key = 0;
        roccat_report_event(pyra->chrdev_minor,
                        (uint8_t const *)&roccat_report);

        mutex_unlock(&pyra->pyra_lock);
        return PYRA_SIZE_SETTINGS;
}

PYRA_SYSFS_R(settings, SETTINGS);
static struct bin_attribute bin_attr_settings =
        __BIN_ATTR(settings, (S_IWUSR | S_IRUGO),
                   pyra_sysfs_read_settings, pyra_sysfs_write_settings,
                   PYRA_SIZE_SETTINGS);

static ssize_t pyra_sysfs_show_actual_cpi(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct pyra_device *pyra =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", pyra->actual_cpi);
}
static DEVICE_ATTR(actual_cpi, 0440, pyra_sysfs_show_actual_cpi, NULL);

static ssize_t pyra_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct pyra_device *pyra =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        struct pyra_settings settings;

        mutex_lock(&pyra->pyra_lock);
        roccat_common2_receive(usb_dev, PYRA_COMMAND_SETTINGS,
                        &settings, PYRA_SIZE_SETTINGS);
        mutex_unlock(&pyra->pyra_lock);

        return snprintf(buf, PAGE_SIZE, "%d\n", settings.startup_profile);
}
static DEVICE_ATTR(actual_profile, 0440, pyra_sysfs_show_actual_profile, NULL);
static DEVICE_ATTR(startup_profile, 0440, pyra_sysfs_show_actual_profile, NULL);

static ssize_t pyra_sysfs_show_firmware_version(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct pyra_device *pyra;
        struct usb_device *usb_dev;
        struct pyra_info info;

        dev = dev->parent->parent;
        pyra = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        mutex_lock(&pyra->pyra_lock);
        roccat_common2_receive(usb_dev, PYRA_COMMAND_INFO,
                        &info, PYRA_SIZE_INFO);
        mutex_unlock(&pyra->pyra_lock);

        return snprintf(buf, PAGE_SIZE, "%d\n", info.firmware_version);
}
static DEVICE_ATTR(firmware_version, 0440, pyra_sysfs_show_firmware_version,
                   NULL);

static struct attribute *pyra_attrs[] = {
        &dev_attr_actual_cpi.attr,
        &dev_attr_actual_profile.attr,
        &dev_attr_firmware_version.attr,
        &dev_attr_startup_profile.attr,
        NULL,
};

static struct bin_attribute *pyra_bin_attributes[] = {
        &bin_attr_control,
        &bin_attr_info,
        &bin_attr_profile_settings,
        &bin_attr_profile_buttons,
        &bin_attr_settings,
        &bin_attr_profile1_settings,
        &bin_attr_profile2_settings,
        &bin_attr_profile3_settings,
        &bin_attr_profile4_settings,
        &bin_attr_profile5_settings,
        &bin_attr_profile1_buttons,
        &bin_attr_profile2_buttons,
        &bin_attr_profile3_buttons,
        &bin_attr_profile4_buttons,
        &bin_attr_profile5_buttons,
        NULL,
};

static const struct attribute_group pyra_group = {
        .attrs = pyra_attrs,
        .bin_attrs = pyra_bin_attributes,
};

static const struct attribute_group *pyra_groups[] = {
        &pyra_group,
        NULL,
};

/* pyra_class is used for creating sysfs attributes via roccat char device */
static const struct class pyra_class = {
        .name = "pyra",
        .dev_groups = pyra_groups,
};

static int pyra_init_pyra_device_struct(struct usb_device *usb_dev,
                struct pyra_device *pyra)
{
        struct pyra_settings settings;
        int retval, i;

        mutex_init(&pyra->pyra_lock);

        retval = pyra_get_settings(usb_dev, &settings);
        if (retval)
                return retval;

        for (i = 0; i < 5; ++i) {
                retval = pyra_get_profile_settings(usb_dev,
                                &pyra->profile_settings[i], i);
                if (retval)
                        return retval;
        }

        profile_activated(pyra, settings.startup_profile);

        return 0;
}

static int pyra_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct pyra_device *pyra;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {

                pyra = kzalloc(sizeof(*pyra), GFP_KERNEL);
                if (!pyra) {
                        hid_err(hdev, "can't alloc device descriptor\n");
                        return -ENOMEM;
                }
                hid_set_drvdata(hdev, pyra);

                retval = pyra_init_pyra_device_struct(usb_dev, pyra);
                if (retval) {
                        hid_err(hdev, "couldn't init struct pyra_device\n");
                        goto exit_free;
                }

                retval = roccat_connect(&pyra_class, hdev,
                                sizeof(struct pyra_roccat_report));
                if (retval < 0) {
                        hid_err(hdev, "couldn't init char dev\n");
                } else {
                        pyra->chrdev_minor = retval;
                        pyra->roccat_claimed = 1;
                }
        } else {
                hid_set_drvdata(hdev, NULL);
        }

        return 0;
exit_free:
        kfree(pyra);
        return retval;
}

static void pyra_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct pyra_device *pyra;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {
                pyra = hid_get_drvdata(hdev);
                if (pyra->roccat_claimed)
                        roccat_disconnect(pyra->chrdev_minor);
                kfree(hid_get_drvdata(hdev));
        }
}

static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = pyra_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }
        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void pyra_remove(struct hid_device *hdev)
{
        pyra_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void pyra_keep_values_up_to_date(struct pyra_device *pyra,
                u8 const *data)
{
        struct pyra_mouse_event_button const *button_event;

        switch (data[0]) {
        case PYRA_MOUSE_REPORT_NUMBER_BUTTON:
                button_event = (struct pyra_mouse_event_button const *)data;
                switch (button_event->type) {
                case PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2:
                        profile_activated(pyra, button_event->data1 - 1);
                        break;
                case PYRA_MOUSE_EVENT_BUTTON_TYPE_CPI:
                        pyra->actual_cpi = button_event->data1;
                        break;
                }
                break;
        }
}

static void pyra_report_to_chrdev(struct pyra_device const *pyra,
                u8 const *data)
{
        struct pyra_roccat_report roccat_report;
        struct pyra_mouse_event_button const *button_event;

        if (data[0] != PYRA_MOUSE_REPORT_NUMBER_BUTTON)
                return;

        button_event = (struct pyra_mouse_event_button const *)data;

        switch (button_event->type) {
        case PYRA_MOUSE_EVENT_BUTTON_TYPE_PROFILE_2:
        case PYRA_MOUSE_EVENT_BUTTON_TYPE_CPI:
                roccat_report.type = button_event->type;
                roccat_report.value = button_event->data1;
                roccat_report.key = 0;
                roccat_report_event(pyra->chrdev_minor,
                                (uint8_t const *)&roccat_report);
                break;
        case PYRA_MOUSE_EVENT_BUTTON_TYPE_MACRO:
        case PYRA_MOUSE_EVENT_BUTTON_TYPE_SHORTCUT:
        case PYRA_MOUSE_EVENT_BUTTON_TYPE_QUICKLAUNCH:
                if (button_event->data2 == PYRA_MOUSE_EVENT_BUTTON_PRESS) {
                        roccat_report.type = button_event->type;
                        roccat_report.key = button_event->data1;
                        /*
                         * pyra reports profile numbers with range 1-5.
                         * Keeping this behaviour.
                         */
                        roccat_report.value = pyra->actual_profile + 1;
                        roccat_report_event(pyra->chrdev_minor,
                                        (uint8_t const *)&roccat_report);
                }
                break;
        }
}

static int pyra_raw_event(struct hid_device *hdev, struct hid_report *report,
                u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct pyra_device *pyra = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return 0;

        if (pyra == NULL)
                return 0;

        pyra_keep_values_up_to_date(pyra, data);

        if (pyra->roccat_claimed)
                pyra_report_to_chrdev(pyra, data);

        return 0;
}

static const struct hid_device_id pyra_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT,
                        USB_DEVICE_ID_ROCCAT_PYRA_WIRED) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT,
                        USB_DEVICE_ID_ROCCAT_PYRA_WIRELESS) },
        { }
};

MODULE_DEVICE_TABLE(hid, pyra_devices);

static struct hid_driver pyra_driver = {
                .name = "pyra",
                .id_table = pyra_devices,
                .probe = pyra_probe,
                .remove = pyra_remove,
                .raw_event = pyra_raw_event
};

static int __init pyra_init(void)
{
        int retval;

        /* class name has to be same as driver name */
        retval = class_register(&pyra_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&pyra_driver);
        if (retval)
                class_unregister(&pyra_class);
        return retval;
}

static void __exit pyra_exit(void)
{
        hid_unregister_driver(&pyra_driver);
        class_unregister(&pyra_class);
}

module_init(pyra_init);
module_exit(pyra_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Pyra driver");
MODULE_LICENSE("GPL v2");

































































  357 




  358 








































































  359 




  358 




  357 

  357 




  358 












  212 








  211 



















  351 











  350 
  212 



  212 

  351 





  185 




  185 














  351 
  351 







  351 
















  185 
  183 


  185 


  184 












  186 





  185 



  185 

  183 









  185 






  349 









  350 

  351 





































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/memblock.h>
#include <linux/stacktrace.h>
#include <linux/page_owner.h>
#include <linux/jump_label.h>
#include <linux/migrate.h>
#include <linux/stackdepot.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
#include <linux/sched/clock.h>

#include "internal.h"

/*
 * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
 * to use off stack temporal storage
 */
#define PAGE_OWNER_STACK_DEPTH (16)

struct page_owner {
        unsigned short order;
        short last_migrate_reason;
        gfp_t gfp_mask;
        depot_stack_handle_t handle;
        depot_stack_handle_t free_handle;
        u64 ts_nsec;
        u64 free_ts_nsec;
        char comm[TASK_COMM_LEN];
        pid_t pid;
        pid_t tgid;
        pid_t free_pid;
        pid_t free_tgid;
};

struct stack {
        struct stack_record *stack_record;
        struct stack *next;
};
static struct stack dummy_stack;
static struct stack failure_stack;
static struct stack *stack_list;
static DEFINE_SPINLOCK(stack_list_lock);

static bool page_owner_enabled __initdata;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);

static depot_stack_handle_t dummy_handle;
static depot_stack_handle_t failure_handle;
static depot_stack_handle_t early_handle;

static void init_early_allocated_pages(void);

static inline void set_current_in_page_owner(void)
{
        /*
         * Avoid recursion.
         *
         * We might need to allocate more memory from page_owner code, so make
         * sure to signal it in order to avoid recursion.
         */
        current->in_page_owner = 1;
}

static inline void unset_current_in_page_owner(void)
{
        current->in_page_owner = 0;
}

static int __init early_page_owner_param(char *buf)
{
        int ret = kstrtobool(buf, &page_owner_enabled);

        if (page_owner_enabled)
                stack_depot_request_early_init();

        return ret;
}
early_param("page_owner", early_page_owner_param);

static __init bool need_page_owner(void)
{
        return page_owner_enabled;
}

static __always_inline depot_stack_handle_t create_dummy_stack(void)
{
        unsigned long entries[4];
        unsigned int nr_entries;

        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
        return stack_depot_save(entries, nr_entries, GFP_KERNEL);
}

static noinline void register_dummy_stack(void)
{
        dummy_handle = create_dummy_stack();
}

static noinline void register_failure_stack(void)
{
        failure_handle = create_dummy_stack();
}

static noinline void register_early_stack(void)
{
        early_handle = create_dummy_stack();
}

static __init void init_page_owner(void)
{
        if (!page_owner_enabled)
                return;

        register_dummy_stack();
        register_failure_stack();
        register_early_stack();
        init_early_allocated_pages();
        /* Initialize dummy and failure stacks and link them to stack_list */
        dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
        failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle);
        if (dummy_stack.stack_record)
                refcount_set(&dummy_stack.stack_record->count, 1);
        if (failure_stack.stack_record)
                refcount_set(&failure_stack.stack_record->count, 1);
        dummy_stack.next = &failure_stack;
        stack_list = &dummy_stack;
        static_branch_enable(&page_owner_inited);
}

struct page_ext_operations page_owner_ops = {
        .size = sizeof(struct page_owner),
        .need = need_page_owner,
        .init = init_page_owner,
        .need_shared_flags = true,
};

static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
{
        return page_ext_data(page_ext, &page_owner_ops);
}

static noinline depot_stack_handle_t save_stack(gfp_t flags)
{
        unsigned long entries[PAGE_OWNER_STACK_DEPTH];
        depot_stack_handle_t handle;
        unsigned int nr_entries;

        if (current->in_page_owner)
                return dummy_handle;

        set_current_in_page_owner();
        nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
        handle = stack_depot_save(entries, nr_entries, flags);
        if (!handle)
                handle = failure_handle;
        unset_current_in_page_owner();

        return handle;
}

static void add_stack_record_to_list(struct stack_record *stack_record,
                                     gfp_t gfp_mask)
{
        unsigned long flags;
        struct stack *stack;

        /* Filter gfp_mask the same way stackdepot does, for consistency */
        gfp_mask &= ~GFP_ZONEMASK;
        gfp_mask &= (GFP_ATOMIC | GFP_KERNEL);
        gfp_mask |= __GFP_NOWARN;

        set_current_in_page_owner();
        stack = kmalloc(sizeof(*stack), gfp_mask);
        if (!stack) {
                unset_current_in_page_owner();
                return;
        }
        unset_current_in_page_owner();

        stack->stack_record = stack_record;
        stack->next = NULL;

        spin_lock_irqsave(&stack_list_lock, flags);
        stack->next = stack_list;
        /*
         * This pairs with smp_load_acquire() from function
         * stack_start(). This guarantees that stack_start()
         * will see an updated stack_list before starting to
         * traverse the list.
         */
        smp_store_release(&stack_list, stack);
        spin_unlock_irqrestore(&stack_list_lock, flags);
}

static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
                                   int nr_base_pages)
{
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);

        if (!stack_record)
                return;

        /*
         * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
         * with REFCOUNT_SATURATED to catch spurious increments of their
         * refcount.
         * Since we do not use STACK_DEPOT_FLAG_GET API, let us
         * set a refcount of 1 ourselves.
         */
        if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
                int old = REFCOUNT_SATURATED;

                if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
                        /* Add the new stack_record to our list */
                        add_stack_record_to_list(stack_record, gfp_mask);
        }
        refcount_add(nr_base_pages, &stack_record->count);
}

static void dec_stack_record_count(depot_stack_handle_t handle,
                                   int nr_base_pages)
{
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);

        if (!stack_record)
                return;

        if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
                pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
                        handle);
}

static inline void __update_page_owner_handle(struct page_ext *page_ext,
                                              depot_stack_handle_t handle,
                                              unsigned short order,
                                              gfp_t gfp_mask,
                                              short last_migrate_reason, u64 ts_nsec,
                                              pid_t pid, pid_t tgid, char *comm)
{
        int i;
        struct page_owner *page_owner;

        for (i = 0; i < (1 << order); i++) {
                page_owner = get_page_owner(page_ext);
                page_owner->handle = handle;
                page_owner->order = order;
                page_owner->gfp_mask = gfp_mask;
                page_owner->last_migrate_reason = last_migrate_reason;
                page_owner->pid = pid;
                page_owner->tgid = tgid;
                page_owner->ts_nsec = ts_nsec;
                strscpy(page_owner->comm, comm,
                        sizeof(page_owner->comm));
                __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
                __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
                page_ext = page_ext_next(page_ext);
        }
}

static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
                                                   depot_stack_handle_t handle,
                                                   unsigned short order,
                                                   pid_t pid, pid_t tgid,
                                                   u64 free_ts_nsec)
{
        int i;
        struct page_owner *page_owner;

        for (i = 0; i < (1 << order); i++) {
                page_owner = get_page_owner(page_ext);
                /* Only __reset_page_owner() wants to clear the bit */
                if (handle) {
                        __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
                        page_owner->free_handle = handle;
                }
                page_owner->free_ts_nsec = free_ts_nsec;
                page_owner->free_pid = current->pid;
                page_owner->free_tgid = current->tgid;
                page_ext = page_ext_next(page_ext);
        }
}

void __reset_page_owner(struct page *page, unsigned short order)
{
        struct page_ext *page_ext;
        depot_stack_handle_t handle;
        depot_stack_handle_t alloc_handle;
        struct page_owner *page_owner;
        u64 free_ts_nsec = local_clock();

        page_ext = page_ext_get(page);
        if (unlikely(!page_ext))
                return;

        page_owner = get_page_owner(page_ext);
        alloc_handle = page_owner->handle;

        handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
        __update_page_owner_free_handle(page_ext, handle, order, current->pid,
                                        current->tgid, free_ts_nsec);
        page_ext_put(page_ext);

        if (alloc_handle != early_handle)
                /*
                 * early_handle is being set as a handle for all those
                 * early allocated pages. See init_pages_in_zone().
                 * Since their refcount is not being incremented because
                 * the machinery is not ready yet, we cannot decrement
                 * their refcount either.
                 */
                dec_stack_record_count(alloc_handle, 1 << order);
}

noinline void __set_page_owner(struct page *page, unsigned short order,
                                        gfp_t gfp_mask)
{
        struct page_ext *page_ext;
        u64 ts_nsec = local_clock();
        depot_stack_handle_t handle;

        handle = save_stack(gfp_mask);

        page_ext = page_ext_get(page);
        if (unlikely(!page_ext))
                return;
        __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
                                   current->pid, current->tgid, ts_nsec,
                                   current->comm);
        page_ext_put(page_ext);
        inc_stack_record_count(handle, gfp_mask, 1 << order);
}

void __set_page_owner_migrate_reason(struct page *page, int reason)
{
        struct page_ext *page_ext = page_ext_get(page);
        struct page_owner *page_owner;

        if (unlikely(!page_ext))
                return;

        page_owner = get_page_owner(page_ext);
        page_owner->last_migrate_reason = reason;
        page_ext_put(page_ext);
}

void __split_page_owner(struct page *page, int old_order, int new_order)
{
        int i;
        struct page_ext *page_ext = page_ext_get(page);
        struct page_owner *page_owner;

        if (unlikely(!page_ext))
                return;

        for (i = 0; i < (1 << old_order); i++) {
                page_owner = get_page_owner(page_ext);
                page_owner->order = new_order;
                page_ext = page_ext_next(page_ext);
        }
        page_ext_put(page_ext);
}

void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
        int i;
        struct page_ext *old_ext;
        struct page_ext *new_ext;
        struct page_owner *old_page_owner;
        struct page_owner *new_page_owner;
        depot_stack_handle_t migrate_handle;

        old_ext = page_ext_get(&old->page);
        if (unlikely(!old_ext))
                return;

        new_ext = page_ext_get(&newfolio->page);
        if (unlikely(!new_ext)) {
                page_ext_put(old_ext);
                return;
        }

        old_page_owner = get_page_owner(old_ext);
        new_page_owner = get_page_owner(new_ext);
        migrate_handle = new_page_owner->handle;
        __update_page_owner_handle(new_ext, old_page_owner->handle,
                                   old_page_owner->order, old_page_owner->gfp_mask,
                                   old_page_owner->last_migrate_reason,
                                   old_page_owner->ts_nsec, old_page_owner->pid,
                                   old_page_owner->tgid, old_page_owner->comm);
        /*
         * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
         * will be freed after migration. Keep them until then as they may be
         * useful.
         */
        __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
                                        old_page_owner->free_pid,
                                        old_page_owner->free_tgid,
                                        old_page_owner->free_ts_nsec);
        /*
         * We linked the original stack to the new folio, we need to do the same
         * for the new one and the old folio otherwise there will be an imbalance
         * when subtracting those pages from the stack.
         */
        for (i = 0; i < (1 << new_page_owner->order); i++) {
                old_page_owner->handle = migrate_handle;
                old_ext = page_ext_next(old_ext);
                old_page_owner = get_page_owner(old_ext);
        }

        page_ext_put(new_ext);
        page_ext_put(old_ext);
}

void pagetypeinfo_showmixedcount_print(struct seq_file *m,
                                       pg_data_t *pgdat, struct zone *zone)
{
        struct page *page;
        struct page_ext *page_ext;
        struct page_owner *page_owner;
        unsigned long pfn, block_end_pfn;
        unsigned long end_pfn = zone_end_pfn(zone);
        unsigned long count[MIGRATE_TYPES] = { 0, };
        int pageblock_mt, page_mt;
        int i;

        /* Scan block by block. First and last block may be incomplete */
        pfn = zone->zone_start_pfn;

        /*
         * Walk the zone in pageblock_nr_pages steps. If a page block spans
         * a zone boundary, it will be double counted between zones. This does
         * not matter as the mixed block count will still be correct
         */
        for (; pfn < end_pfn; ) {
                page = pfn_to_online_page(pfn);
                if (!page) {
                        pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
                        continue;
                }

                block_end_pfn = pageblock_end_pfn(pfn);
                block_end_pfn = min(block_end_pfn, end_pfn);

                pageblock_mt = get_pageblock_migratetype(page);

                for (; pfn < block_end_pfn; pfn++) {
                        /* The pageblock is online, no need to recheck. */
                        page = pfn_to_page(pfn);

                        if (page_zone(page) != zone)
                                continue;

                        if (PageBuddy(page)) {
                                unsigned long freepage_order;

                                freepage_order = buddy_order_unsafe(page);
                                if (freepage_order <= MAX_PAGE_ORDER)
                                        pfn += (1UL << freepage_order) - 1;
                                continue;
                        }

                        if (PageReserved(page))
                                continue;

                        page_ext = page_ext_get(page);
                        if (unlikely(!page_ext))
                                continue;

                        if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
                                goto ext_put_continue;

                        page_owner = get_page_owner(page_ext);
                        page_mt = gfp_migratetype(page_owner->gfp_mask);
                        if (pageblock_mt != page_mt) {
                                if (is_migrate_cma(pageblock_mt))
                                        count[MIGRATE_MOVABLE]++;
                                else
                                        count[pageblock_mt]++;

                                pfn = block_end_pfn;
                                page_ext_put(page_ext);
                                break;
                        }
                        pfn += (1UL << page_owner->order) - 1;
ext_put_continue:
                        page_ext_put(page_ext);
                }
        }

        /* Print counts */
        seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
        for (i = 0; i < MIGRATE_TYPES; i++)
                seq_printf(m, "%12lu ", count[i]);
        seq_putc(m, '\n');
}

/*
 * Looking for memcg information and print it out
 */
static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
                                         struct page *page)
{
#ifdef CONFIG_MEMCG
        unsigned long memcg_data;
        struct mem_cgroup *memcg;
        bool online;
        char name[80];

        rcu_read_lock();
        memcg_data = READ_ONCE(page->memcg_data);
        if (!memcg_data)
                goto out_unlock;

        if (memcg_data & MEMCG_DATA_OBJCGS)
                ret += scnprintf(kbuf + ret, count - ret,
                                "Slab cache page\n");

        memcg = page_memcg_check(page);
        if (!memcg)
                goto out_unlock;

        online = (memcg->css.flags & CSS_ONLINE);
        cgroup_name(memcg->css.cgroup, name, sizeof(name));
        ret += scnprintf(kbuf + ret, count - ret,
                        "Charged %sto %smemcg %s\n",
                        PageMemcgKmem(page) ? "(via objcg) " : "",
                        online ? "" : "offline ",
                        name);
out_unlock:
        rcu_read_unlock();
#endif /* CONFIG_MEMCG */

        return ret;
}

static ssize_t
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
                struct page *page, struct page_owner *page_owner,
                depot_stack_handle_t handle)
{
        int ret, pageblock_mt, page_mt;
        char *kbuf;

        count = min_t(size_t, count, PAGE_SIZE);
        kbuf = kmalloc(count, GFP_KERNEL);
        if (!kbuf)
                return -ENOMEM;

        ret = scnprintf(kbuf, count,
                        "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns\n",
                        page_owner->order, page_owner->gfp_mask,
                        &page_owner->gfp_mask, page_owner->pid,
                        page_owner->tgid, page_owner->comm,
                        page_owner->ts_nsec);

        /* Print information relevant to grouping pages by mobility */
        pageblock_mt = get_pageblock_migratetype(page);
        page_mt  = gfp_migratetype(page_owner->gfp_mask);
        ret += scnprintf(kbuf + ret, count - ret,
                        "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n",
                        pfn,
                        migratetype_names[page_mt],
                        pfn >> pageblock_order,
                        migratetype_names[pageblock_mt],
                        &page->flags);

        ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
        if (ret >= count)
                goto err;

        if (page_owner->last_migrate_reason != -1) {
                ret += scnprintf(kbuf + ret, count - ret,
                        "Page has been migrated, last migrate reason: %s\n",
                        migrate_reason_names[page_owner->last_migrate_reason]);
        }

        ret = print_page_owner_memcg(kbuf, count, ret, page);

        ret += snprintf(kbuf + ret, count - ret, "\n");
        if (ret >= count)
                goto err;

        if (copy_to_user(buf, kbuf, ret))
                ret = -EFAULT;

        kfree(kbuf);
        return ret;

err:
        kfree(kbuf);
        return -ENOMEM;
}

void __dump_page_owner(const struct page *page)
{
        struct page_ext *page_ext = page_ext_get((void *)page);
        struct page_owner *page_owner;
        depot_stack_handle_t handle;
        gfp_t gfp_mask;
        int mt;

        if (unlikely(!page_ext)) {
                pr_alert("There is not page extension available.\n");
                return;
        }

        page_owner = get_page_owner(page_ext);
        gfp_mask = page_owner->gfp_mask;
        mt = gfp_migratetype(gfp_mask);

        if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
                pr_alert("page_owner info is not present (never set?)\n");
                page_ext_put(page_ext);
                return;
        }

        if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
                pr_alert("page_owner tracks the page as allocated\n");
        else
                pr_alert("page_owner tracks the page as freed\n");

        pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
                 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
                 page_owner->pid, page_owner->tgid, page_owner->comm,
                 page_owner->ts_nsec, page_owner->free_ts_nsec);

        handle = READ_ONCE(page_owner->handle);
        if (!handle)
                pr_alert("page_owner allocation stack trace missing\n");
        else
                stack_depot_print(handle);

        handle = READ_ONCE(page_owner->free_handle);
        if (!handle) {
                pr_alert("page_owner free stack trace missing\n");
        } else {
                pr_alert("page last free pid %d tgid %d stack trace:\n",
                          page_owner->free_pid, page_owner->free_tgid);
                stack_depot_print(handle);
        }

        if (page_owner->last_migrate_reason != -1)
                pr_alert("page has been migrated, last migrate reason: %s\n",
                        migrate_reason_names[page_owner->last_migrate_reason]);
        page_ext_put(page_ext);
}

static ssize_t
read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
        unsigned long pfn;
        struct page *page;
        struct page_ext *page_ext;
        struct page_owner *page_owner;
        depot_stack_handle_t handle;

        if (!static_branch_unlikely(&page_owner_inited))
                return -EINVAL;

        page = NULL;
        if (*ppos == 0)
                pfn = min_low_pfn;
        else
                pfn = *ppos;
        /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
        while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
                pfn++;

        /* Find an allocated page */
        for (; pfn < max_pfn; pfn++) {
                /*
                 * This temporary page_owner is required so
                 * that we can avoid the context switches while holding
                 * the rcu lock and copying the page owner information to
                 * user through copy_to_user() or GFP_KERNEL allocations.
                 */
                struct page_owner page_owner_tmp;

                /*
                 * If the new page is in a new MAX_ORDER_NR_PAGES area,
                 * validate the area as existing, skip it if not
                 */
                if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
                        pfn += MAX_ORDER_NR_PAGES - 1;
                        continue;
                }

                page = pfn_to_page(pfn);
                if (PageBuddy(page)) {
                        unsigned long freepage_order = buddy_order_unsafe(page);

                        if (freepage_order <= MAX_PAGE_ORDER)
                                pfn += (1UL << freepage_order) - 1;
                        continue;
                }

                page_ext = page_ext_get(page);
                if (unlikely(!page_ext))
                        continue;

                /*
                 * Some pages could be missed by concurrent allocation or free,
                 * because we don't hold the zone lock.
                 */
                if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                        goto ext_put_continue;

                /*
                 * Although we do have the info about past allocation of free
                 * pages, it's not relevant for current memory usage.
                 */
                if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
                        goto ext_put_continue;

                page_owner = get_page_owner(page_ext);

                /*
                 * Don't print "tail" pages of high-order allocations as that
                 * would inflate the stats.
                 */
                if (!IS_ALIGNED(pfn, 1 << page_owner->order))
                        goto ext_put_continue;

                /*
                 * Access to page_ext->handle isn't synchronous so we should
                 * be careful to access it.
                 */
                handle = READ_ONCE(page_owner->handle);
                if (!handle)
                        goto ext_put_continue;

                /* Record the next PFN to read in the file offset */
                *ppos = pfn + 1;

                page_owner_tmp = *page_owner;
                page_ext_put(page_ext);
                return print_page_owner(buf, count, pfn, page,
                                &page_owner_tmp, handle);
ext_put_continue:
                page_ext_put(page_ext);
        }

        return 0;
}

static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig)
{
        switch (orig) {
        case SEEK_SET:
                file->f_pos = offset;
                break;
        case SEEK_CUR:
                file->f_pos += offset;
                break;
        default:
                return -EINVAL;
        }
        return file->f_pos;
}

static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
{
        unsigned long pfn = zone->zone_start_pfn;
        unsigned long end_pfn = zone_end_pfn(zone);
        unsigned long count = 0;

        /*
         * Walk the zone in pageblock_nr_pages steps. If a page block spans
         * a zone boundary, it will be double counted between zones. This does
         * not matter as the mixed block count will still be correct
         */
        for (; pfn < end_pfn; ) {
                unsigned long block_end_pfn;

                if (!pfn_valid(pfn)) {
                        pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
                        continue;
                }

                block_end_pfn = pageblock_end_pfn(pfn);
                block_end_pfn = min(block_end_pfn, end_pfn);

                for (; pfn < block_end_pfn; pfn++) {
                        struct page *page = pfn_to_page(pfn);
                        struct page_ext *page_ext;

                        if (page_zone(page) != zone)
                                continue;

                        /*
                         * To avoid having to grab zone->lock, be a little
                         * careful when reading buddy page order. The only
                         * danger is that we skip too much and potentially miss
                         * some early allocated pages, which is better than
                         * heavy lock contention.
                         */
                        if (PageBuddy(page)) {
                                unsigned long order = buddy_order_unsafe(page);

                                if (order > 0 && order <= MAX_PAGE_ORDER)
                                        pfn += (1UL << order) - 1;
                                continue;
                        }

                        if (PageReserved(page))
                                continue;

                        page_ext = page_ext_get(page);
                        if (unlikely(!page_ext))
                                continue;

                        /* Maybe overlapping zone */
                        if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
                                goto ext_put_continue;

                        /* Found early allocated page */
                        __update_page_owner_handle(page_ext, early_handle, 0, 0,
                                                   -1, local_clock(), current->pid,
                                                   current->tgid, current->comm);
                        count++;
ext_put_continue:
                        page_ext_put(page_ext);
                }
                cond_resched();
        }

        pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
                pgdat->node_id, zone->name, count);
}

static void init_zones_in_node(pg_data_t *pgdat)
{
        struct zone *zone;
        struct zone *node_zones = pgdat->node_zones;

        for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
                if (!populated_zone(zone))
                        continue;

                init_pages_in_zone(pgdat, zone);
        }
}

static void init_early_allocated_pages(void)
{
        pg_data_t *pgdat;

        for_each_online_pgdat(pgdat)
                init_zones_in_node(pgdat);
}

static const struct file_operations proc_page_owner_operations = {
        .read                = read_page_owner,
        .llseek                = lseek_page_owner,
};

static void *stack_start(struct seq_file *m, loff_t *ppos)
{
        struct stack *stack;

        if (*ppos == -1UL)
                return NULL;

        if (!*ppos) {
                /*
                 * This pairs with smp_store_release() from function
                 * add_stack_record_to_list(), so we get a consistent
                 * value of stack_list.
                 */
                stack = smp_load_acquire(&stack_list);
                m->private = stack;
        } else {
                stack = m->private;
        }

        return stack;
}

static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
{
        struct stack *stack = v;

        stack = stack->next;
        *ppos = stack ? *ppos + 1 : -1UL;
        m->private = stack;

        return stack;
}

static unsigned long page_owner_pages_threshold;

static int stack_print(struct seq_file *m, void *v)
{
        int i, nr_base_pages;
        struct stack *stack = v;
        unsigned long *entries;
        unsigned long nr_entries;
        struct stack_record *stack_record = stack->stack_record;

        if (!stack->stack_record)
                return 0;

        nr_entries = stack_record->size;
        entries = stack_record->entries;
        nr_base_pages = refcount_read(&stack_record->count) - 1;

        if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
                return 0;

        for (i = 0; i < nr_entries; i++)
                seq_printf(m, " %pS\n", (void *)entries[i]);
        seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);

        return 0;
}

static void stack_stop(struct seq_file *m, void *v)
{
}

static const struct seq_operations page_owner_stack_op = {
        .start        = stack_start,
        .next        = stack_next,
        .stop        = stack_stop,
        .show        = stack_print
};

static int page_owner_stack_open(struct inode *inode, struct file *file)
{
        return seq_open_private(file, &page_owner_stack_op, 0);
}

static const struct file_operations page_owner_stack_operations = {
        .open                = page_owner_stack_open,
        .read                = seq_read,
        .llseek                = seq_lseek,
        .release        = seq_release,
};

static int page_owner_threshold_get(void *data, u64 *val)
{
        *val = READ_ONCE(page_owner_pages_threshold);
        return 0;
}

static int page_owner_threshold_set(void *data, u64 val)
{
        WRITE_ONCE(page_owner_pages_threshold, val);
        return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get,
                        &page_owner_threshold_set, "%llu");


static int __init pageowner_init(void)
{
        struct dentry *dir;

        if (!static_branch_unlikely(&page_owner_inited)) {
                pr_info("page_owner is disabled\n");
                return 0;
        }

        debugfs_create_file("page_owner", 0400, NULL, NULL,
                            &proc_page_owner_operations);
        dir = debugfs_create_dir("page_owner_stacks", NULL);
        debugfs_create_file("show_stacks", 0400, dir, NULL,
                            &page_owner_stack_operations);
        debugfs_create_file("count_threshold", 0600, dir, NULL,
                            &proc_page_owner_threshold);

        return 0;
}
late_initcall(pageowner_init)




























































































































































































































































































































































































































    1 






































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright 2007, Frank A Kingswood <frank@kingswood-consulting.co.uk>
 * Copyright 2007, Werner Cornelius <werner@cornelius-consult.de>
 * Copyright 2009, Boris Hajduk <boris@hajduk.org>
 *
 * ch341.c implements a serial port driver for the Winchiphead CH341.
 *
 * The CH341 device can be used to implement an RS232 asynchronous
 * serial port, an IEEE-1284 parallel printer port or a memory-like
 * interface. In all cases the CH341 supports an I2C interface as well.
 * This driver only supports the asynchronous serial interface.
 */

#include <linux/kernel.h>
#include <linux/tty.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/serial.h>
#include <asm/unaligned.h>

#define DEFAULT_BAUD_RATE 9600
#define DEFAULT_TIMEOUT   1000

/* flags for IO-Bits */
#define CH341_BIT_RTS (1 << 6)
#define CH341_BIT_DTR (1 << 5)

/******************************/
/* interrupt pipe definitions */
/******************************/
/* always 4 interrupt bytes */
/* first irq byte normally 0x08 */
/* second irq byte base 0x7d + below */
/* third irq byte base 0x94 + below */
/* fourth irq byte normally 0xee */

/* second interrupt byte */
#define CH341_MULT_STAT 0x04 /* multiple status since last interrupt event */

/* status returned in third interrupt answer byte, inverted in data
   from irq */
#define CH341_BIT_CTS 0x01
#define CH341_BIT_DSR 0x02
#define CH341_BIT_RI  0x04
#define CH341_BIT_DCD 0x08
#define CH341_BITS_MODEM_STAT 0x0f /* all bits */

/* Break support - the information used to implement this was gleaned from
 * the Net/FreeBSD uchcom.c driver by Takanori Watanabe.  Domo arigato.
 */

#define CH341_REQ_READ_VERSION 0x5F
#define CH341_REQ_WRITE_REG    0x9A
#define CH341_REQ_READ_REG     0x95
#define CH341_REQ_SERIAL_INIT  0xA1
#define CH341_REQ_MODEM_CTRL   0xA4

#define CH341_REG_BREAK        0x05
#define CH341_REG_PRESCALER    0x12
#define CH341_REG_DIVISOR      0x13
#define CH341_REG_LCR          0x18
#define CH341_REG_LCR2         0x25

#define CH341_NBREAK_BITS      0x01

#define CH341_LCR_ENABLE_RX    0x80
#define CH341_LCR_ENABLE_TX    0x40
#define CH341_LCR_MARK_SPACE   0x20
#define CH341_LCR_PAR_EVEN     0x10
#define CH341_LCR_ENABLE_PAR   0x08
#define CH341_LCR_STOP_BITS_2  0x04
#define CH341_LCR_CS8          0x03
#define CH341_LCR_CS7          0x02
#define CH341_LCR_CS6          0x01
#define CH341_LCR_CS5          0x00

#define CH341_QUIRK_LIMITED_PRESCALER        BIT(0)
#define CH341_QUIRK_SIMULATE_BREAK        BIT(1)

static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1a86, 0x5523) },
        { USB_DEVICE(0x1a86, 0x7522) },
        { USB_DEVICE(0x1a86, 0x7523) },
        { USB_DEVICE(0x2184, 0x0057) },
        { USB_DEVICE(0x4348, 0x5523) },
        { USB_DEVICE(0x9986, 0x7523) },
        { },
};
MODULE_DEVICE_TABLE(usb, id_table);

struct ch341_private {
        spinlock_t lock; /* access lock */
        unsigned baud_rate; /* set baud rate */
        u8 mcr;
        u8 msr;
        u8 lcr;

        unsigned long quirks;
        u8 version;

        unsigned long break_end;
};

static void ch341_set_termios(struct tty_struct *tty,
                              struct usb_serial_port *port,
                              const struct ktermios *old_termios);

static int ch341_control_out(struct usb_device *dev, u8 request,
                             u16 value, u16 index)
{
        int r;

        dev_dbg(&dev->dev, "%s - (%02x,%04x,%04x)\n", __func__,
                request, value, index);

        r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request,
                            USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
                            value, index, NULL, 0, DEFAULT_TIMEOUT);
        if (r < 0)
                dev_err(&dev->dev, "failed to send control message: %d\n", r);

        return r;
}

static int ch341_control_in(struct usb_device *dev,
                            u8 request, u16 value, u16 index,
                            char *buf, unsigned bufsize)
{
        int r;

        dev_dbg(&dev->dev, "%s - (%02x,%04x,%04x,%u)\n", __func__,
                request, value, index, bufsize);

        r = usb_control_msg_recv(dev, 0, request,
                                 USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                                 value, index, buf, bufsize, DEFAULT_TIMEOUT,
                                 GFP_KERNEL);
        if (r) {
                dev_err(&dev->dev, "failed to receive control message: %d\n",
                        r);
                return r;
        }

        return 0;
}

#define CH341_CLKRATE                48000000
#define CH341_CLK_DIV(ps, fact)        (1 << (12 - 3 * (ps) - (fact)))
#define CH341_MIN_RATE(ps)        (CH341_CLKRATE / (CH341_CLK_DIV((ps), 1) * 512))

static const speed_t ch341_min_rates[] = {
        CH341_MIN_RATE(0),
        CH341_MIN_RATE(1),
        CH341_MIN_RATE(2),
        CH341_MIN_RATE(3),
};

/* Supported range is 46 to 3000000 bps. */
#define CH341_MIN_BPS        DIV_ROUND_UP(CH341_CLKRATE, CH341_CLK_DIV(0, 0) * 256)
#define CH341_MAX_BPS        (CH341_CLKRATE / (CH341_CLK_DIV(3, 0) * 2))

/*
 * The device line speed is given by the following equation:
 *
 *        baudrate = 48000000 / (2^(12 - 3 * ps - fact) * div), where
 *
 *                0 <= ps <= 3,
 *                0 <= fact <= 1,
 *                2 <= div <= 256 if fact = 0, or
 *                9 <= div <= 256 if fact = 1
 */
static int ch341_get_divisor(struct ch341_private *priv, speed_t speed)
{
        unsigned int fact, div, clk_div;
        bool force_fact0 = false;
        int ps;

        /*
         * Clamp to supported range, this makes the (ps < 0) and (div < 2)
         * sanity checks below redundant.
         */
        speed = clamp_val(speed, CH341_MIN_BPS, CH341_MAX_BPS);

        /*
         * Start with highest possible base clock (fact = 1) that will give a
         * divisor strictly less than 512.
         */
        fact = 1;
        for (ps = 3; ps >= 0; ps--) {
                if (speed > ch341_min_rates[ps])
                        break;
        }

        if (ps < 0)
                return -EINVAL;

        /* Determine corresponding divisor, rounding down. */
        clk_div = CH341_CLK_DIV(ps, fact);
        div = CH341_CLKRATE / (clk_div * speed);

        /* Some devices require a lower base clock if ps < 3. */
        if (ps < 3 && (priv->quirks & CH341_QUIRK_LIMITED_PRESCALER))
                force_fact0 = true;

        /* Halve base clock (fact = 0) if required. */
        if (div < 9 || div > 255 || force_fact0) {
                div /= 2;
                clk_div *= 2;
                fact = 0;
        }

        if (div < 2)
                return -EINVAL;

        /*
         * Pick next divisor if resulting rate is closer to the requested one,
         * scale up to avoid rounding errors on low rates.
         */
        if (16 * CH341_CLKRATE / (clk_div * div) - 16 * speed >=
                        16 * speed - 16 * CH341_CLKRATE / (clk_div * (div + 1)))
                div++;

        /*
         * Prefer lower base clock (fact = 0) if even divisor.
         *
         * Note that this makes the receiver more tolerant to errors.
         */
        if (fact == 1 && div % 2 == 0) {
                div /= 2;
                fact = 0;
        }

        return (0x100 - div) << 8 | fact << 2 | ps;
}

static int ch341_set_baudrate_lcr(struct usb_device *dev,
                                  struct ch341_private *priv,
                                  speed_t baud_rate, u8 lcr)
{
        int val;
        int r;

        if (!baud_rate)
                return -EINVAL;

        val = ch341_get_divisor(priv, baud_rate);
        if (val < 0)
                return -EINVAL;

        /*
         * CH341A buffers data until a full endpoint-size packet (32 bytes)
         * has been received unless bit 7 is set.
         *
         * At least one device with version 0x27 appears to have this bit
         * inverted.
         */
        if (priv->version > 0x27)
                val |= BIT(7);

        r = ch341_control_out(dev, CH341_REQ_WRITE_REG,
                              CH341_REG_DIVISOR << 8 | CH341_REG_PRESCALER,
                              val);
        if (r)
                return r;

        /*
         * Chip versions before version 0x30 as read using
         * CH341_REQ_READ_VERSION used separate registers for line control
         * (stop bits, parity and word length). Version 0x30 and above use
         * CH341_REG_LCR only and CH341_REG_LCR2 is always set to zero.
         */
        if (priv->version < 0x30)
                return 0;

        r = ch341_control_out(dev, CH341_REQ_WRITE_REG,
                              CH341_REG_LCR2 << 8 | CH341_REG_LCR, lcr);
        if (r)
                return r;

        return r;
}

static int ch341_set_handshake(struct usb_device *dev, u8 control)
{
        return ch341_control_out(dev, CH341_REQ_MODEM_CTRL, ~control, 0);
}

static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv)
{
        const unsigned int size = 2;
        u8 buffer[2];
        int r;
        unsigned long flags;

        r = ch341_control_in(dev, CH341_REQ_READ_REG, 0x0706, 0, buffer, size);
        if (r)
                return r;

        spin_lock_irqsave(&priv->lock, flags);
        priv->msr = (~(*buffer)) & CH341_BITS_MODEM_STAT;
        spin_unlock_irqrestore(&priv->lock, flags);

        return 0;
}

/* -------------------------------------------------------------------------- */

static int ch341_configure(struct usb_device *dev, struct ch341_private *priv)
{
        const unsigned int size = 2;
        u8 buffer[2];
        int r;

        /* expect two bytes 0x27 0x00 */
        r = ch341_control_in(dev, CH341_REQ_READ_VERSION, 0, 0, buffer, size);
        if (r)
                return r;

        priv->version = buffer[0];
        dev_dbg(&dev->dev, "Chip version: 0x%02x\n", priv->version);

        r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, 0, 0);
        if (r < 0)
                return r;

        r = ch341_set_baudrate_lcr(dev, priv, priv->baud_rate, priv->lcr);
        if (r < 0)
                return r;

        r = ch341_set_handshake(dev, priv->mcr);
        if (r < 0)
                return r;

        return 0;
}

static int ch341_detect_quirks(struct usb_serial_port *port)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        struct usb_device *udev = port->serial->dev;
        const unsigned int size = 2;
        unsigned long quirks = 0;
        u8 buffer[2];
        int r;

        /*
         * A subset of CH34x devices does not support all features. The
         * prescaler is limited and there is no support for sending a RS232
         * break condition. A read failure when trying to set up the latter is
         * used to detect these devices.
         */
        r = usb_control_msg_recv(udev, 0, CH341_REQ_READ_REG,
                                 USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                                 CH341_REG_BREAK, 0, &buffer, size,
                                 DEFAULT_TIMEOUT, GFP_KERNEL);
        if (r == -EPIPE) {
                dev_info(&port->dev, "break control not supported, using simulated break\n");
                quirks = CH341_QUIRK_LIMITED_PRESCALER | CH341_QUIRK_SIMULATE_BREAK;
                r = 0;
        } else if (r) {
                dev_err(&port->dev, "failed to read break control: %d\n", r);
        }

        if (quirks) {
                dev_dbg(&port->dev, "enabling quirk flags: 0x%02lx\n", quirks);
                priv->quirks |= quirks;
        }

        return r;
}

static int ch341_port_probe(struct usb_serial_port *port)
{
        struct ch341_private *priv;
        int r;

        priv = kzalloc(sizeof(struct ch341_private), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        spin_lock_init(&priv->lock);
        priv->baud_rate = DEFAULT_BAUD_RATE;
        /*
         * Some CH340 devices appear unable to change the initial LCR
         * settings, so set a sane 8N1 default.
         */
        priv->lcr = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX | CH341_LCR_CS8;

        r = ch341_configure(port->serial->dev, priv);
        if (r < 0)
                goto error;

        usb_set_serial_port_data(port, priv);

        r = ch341_detect_quirks(port);
        if (r < 0)
                goto error;

        return 0;

error:        kfree(priv);
        return r;
}

static void ch341_port_remove(struct usb_serial_port *port)
{
        struct ch341_private *priv;

        priv = usb_get_serial_port_data(port);
        kfree(priv);
}

static int ch341_carrier_raised(struct usb_serial_port *port)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        if (priv->msr & CH341_BIT_DCD)
                return 1;
        return 0;
}

static void ch341_dtr_rts(struct usb_serial_port *port, int on)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;

        /* drop DTR and RTS */
        spin_lock_irqsave(&priv->lock, flags);
        if (on)
                priv->mcr |= CH341_BIT_RTS | CH341_BIT_DTR;
        else
                priv->mcr &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
        spin_unlock_irqrestore(&priv->lock, flags);
        ch341_set_handshake(port->serial->dev, priv->mcr);
}

static void ch341_close(struct usb_serial_port *port)
{
        usb_serial_generic_close(port);
        usb_kill_urb(port->interrupt_in_urb);
}


/* open this device, set default parameters */
static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        int r;

        if (tty)
                ch341_set_termios(tty, port, NULL);

        dev_dbg(&port->dev, "%s - submitting interrupt urb\n", __func__);
        r = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
        if (r) {
                dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n",
                        __func__, r);
                return r;
        }

        r = ch341_get_status(port->serial->dev, priv);
        if (r < 0) {
                dev_err(&port->dev, "failed to read modem status: %d\n", r);
                goto err_kill_interrupt_urb;
        }

        r = usb_serial_generic_open(tty, port);
        if (r)
                goto err_kill_interrupt_urb;

        return 0;

err_kill_interrupt_urb:
        usb_kill_urb(port->interrupt_in_urb);

        return r;
}

/* Old_termios contains the original termios settings and
 * tty->termios contains the new setting to be used.
 */
static void ch341_set_termios(struct tty_struct *tty,
                              struct usb_serial_port *port,
                              const struct ktermios *old_termios)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned baud_rate;
        unsigned long flags;
        u8 lcr;
        int r;

        /* redundant changes may cause the chip to lose bytes */
        if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios))
                return;

        baud_rate = tty_get_baud_rate(tty);

        lcr = CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX;

        switch (C_CSIZE(tty)) {
        case CS5:
                lcr |= CH341_LCR_CS5;
                break;
        case CS6:
                lcr |= CH341_LCR_CS6;
                break;
        case CS7:
                lcr |= CH341_LCR_CS7;
                break;
        case CS8:
                lcr |= CH341_LCR_CS8;
                break;
        }

        if (C_PARENB(tty)) {
                lcr |= CH341_LCR_ENABLE_PAR;
                if (C_PARODD(tty) == 0)
                        lcr |= CH341_LCR_PAR_EVEN;
                if (C_CMSPAR(tty))
                        lcr |= CH341_LCR_MARK_SPACE;
        }

        if (C_CSTOPB(tty))
                lcr |= CH341_LCR_STOP_BITS_2;

        if (baud_rate) {
                priv->baud_rate = baud_rate;

                r = ch341_set_baudrate_lcr(port->serial->dev, priv,
                                           priv->baud_rate, lcr);
                if (r < 0 && old_termios) {
                        priv->baud_rate = tty_termios_baud_rate(old_termios);
                        tty_termios_copy_hw(&tty->termios, old_termios);
                } else if (r == 0) {
                        priv->lcr = lcr;
                }
        }

        spin_lock_irqsave(&priv->lock, flags);
        if (C_BAUD(tty) == B0)
                priv->mcr &= ~(CH341_BIT_DTR | CH341_BIT_RTS);
        else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
                priv->mcr |= (CH341_BIT_DTR | CH341_BIT_RTS);
        spin_unlock_irqrestore(&priv->lock, flags);

        ch341_set_handshake(port->serial->dev, priv->mcr);
}

/*
 * A subset of all CH34x devices don't support a real break condition and
 * reading CH341_REG_BREAK fails (see also ch341_detect_quirks). This function
 * simulates a break condition by lowering the baud rate to the minimum
 * supported by the hardware upon enabling the break condition and sending
 * a NUL byte.
 *
 * Incoming data is corrupted while the break condition is being simulated.
 *
 * Normally the duration of the break condition can be controlled individually
 * by userspace using TIOCSBRK and TIOCCBRK or by passing an argument to
 * TCSBRKP. Due to how the simulation is implemented the duration can't be
 * controlled. The duration is always about (1s / 46bd * 9bit) = 196ms.
 */
static int ch341_simulate_break(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned long now, delay;
        int r, r2;

        if (break_state != 0) {
                dev_dbg(&port->dev, "enter break state requested\n");

                r = ch341_set_baudrate_lcr(port->serial->dev, priv,
                                CH341_MIN_BPS,
                                CH341_LCR_ENABLE_RX | CH341_LCR_ENABLE_TX | CH341_LCR_CS8);
                if (r < 0) {
                        dev_err(&port->dev,
                                "failed to change baud rate to %u: %d\n",
                                CH341_MIN_BPS, r);
                        goto restore;
                }

                r = tty_put_char(tty, '\0');
                if (r < 0) {
                        dev_err(&port->dev,
                                "failed to write NUL byte for simulated break condition: %d\n",
                                r);
                        goto restore;
                }

                /*
                 * Compute expected transmission duration including safety
                 * margin. The original baud rate is only restored after the
                 * computed point in time.
                 *
                 * 11 bits = 1 start, 8 data, 1 stop, 1 margin
                 */
                priv->break_end = jiffies + (11 * HZ / CH341_MIN_BPS);

                return 0;
        }

        dev_dbg(&port->dev, "leave break state requested\n");

        now = jiffies;

        if (time_before(now, priv->break_end)) {
                /* Wait until NUL byte is written */
                delay = priv->break_end - now;
                dev_dbg(&port->dev,
                        "wait %d ms while transmitting NUL byte at %u baud\n",
                        jiffies_to_msecs(delay), CH341_MIN_BPS);
                schedule_timeout_interruptible(delay);
        }

        r = 0;
restore:
        /* Restore original baud rate */
        r2 = ch341_set_baudrate_lcr(port->serial->dev, priv, priv->baud_rate,
                        priv->lcr);
        if (r2 < 0) {
                dev_err(&port->dev,
                        "restoring original baud rate of %u failed: %d\n",
                        priv->baud_rate, r2);
                return r2;
        }

        return r;
}

static int ch341_break_ctl(struct tty_struct *tty, int break_state)
{
        const uint16_t ch341_break_reg =
                        ((uint16_t) CH341_REG_LCR << 8) | CH341_REG_BREAK;
        struct usb_serial_port *port = tty->driver_data;
        struct ch341_private *priv = usb_get_serial_port_data(port);
        int r;
        uint16_t reg_contents;
        uint8_t break_reg[2];

        if (priv->quirks & CH341_QUIRK_SIMULATE_BREAK)
                return ch341_simulate_break(tty, break_state);

        r = ch341_control_in(port->serial->dev, CH341_REQ_READ_REG,
                        ch341_break_reg, 0, break_reg, 2);
        if (r) {
                dev_err(&port->dev, "%s - USB control read error (%d)\n",
                                __func__, r);
                if (r > 0)
                        r = -EIO;
                return r;
        }
        dev_dbg(&port->dev, "%s - initial ch341 break register contents - reg1: %x, reg2: %x\n",
                __func__, break_reg[0], break_reg[1]);
        if (break_state != 0) {
                dev_dbg(&port->dev, "%s - Enter break state requested\n", __func__);
                break_reg[0] &= ~CH341_NBREAK_BITS;
                break_reg[1] &= ~CH341_LCR_ENABLE_TX;
        } else {
                dev_dbg(&port->dev, "%s - Leave break state requested\n", __func__);
                break_reg[0] |= CH341_NBREAK_BITS;
                break_reg[1] |= CH341_LCR_ENABLE_TX;
        }
        dev_dbg(&port->dev, "%s - New ch341 break register contents - reg1: %x, reg2: %x\n",
                __func__, break_reg[0], break_reg[1]);
        reg_contents = get_unaligned_le16(break_reg);
        r = ch341_control_out(port->serial->dev, CH341_REQ_WRITE_REG,
                        ch341_break_reg, reg_contents);
        if (r < 0) {
                dev_err(&port->dev, "%s - USB control write error (%d)\n",
                                __func__, r);
                return r;
        }

        return 0;
}

static int ch341_tiocmset(struct tty_struct *tty,
                          unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        u8 control;

        spin_lock_irqsave(&priv->lock, flags);
        if (set & TIOCM_RTS)
                priv->mcr |= CH341_BIT_RTS;
        if (set & TIOCM_DTR)
                priv->mcr |= CH341_BIT_DTR;
        if (clear & TIOCM_RTS)
                priv->mcr &= ~CH341_BIT_RTS;
        if (clear & TIOCM_DTR)
                priv->mcr &= ~CH341_BIT_DTR;
        control = priv->mcr;
        spin_unlock_irqrestore(&priv->lock, flags);

        return ch341_set_handshake(port->serial->dev, control);
}

static void ch341_update_status(struct usb_serial_port *port,
                                        unsigned char *data, size_t len)
{
        struct ch341_private *priv = usb_get_serial_port_data(port);
        struct tty_struct *tty;
        unsigned long flags;
        u8 status;
        u8 delta;

        if (len < 4)
                return;

        status = ~data[2] & CH341_BITS_MODEM_STAT;

        spin_lock_irqsave(&priv->lock, flags);
        delta = status ^ priv->msr;
        priv->msr = status;
        spin_unlock_irqrestore(&priv->lock, flags);

        if (data[1] & CH341_MULT_STAT)
                dev_dbg(&port->dev, "%s - multiple status change\n", __func__);

        if (!delta)
                return;

        if (delta & CH341_BIT_CTS)
                port->icount.cts++;
        if (delta & CH341_BIT_DSR)
                port->icount.dsr++;
        if (delta & CH341_BIT_RI)
                port->icount.rng++;
        if (delta & CH341_BIT_DCD) {
                port->icount.dcd++;
                tty = tty_port_tty_get(&port->port);
                if (tty) {
                        usb_serial_handle_dcd_change(port, tty,
                                                status & CH341_BIT_DCD);
                        tty_kref_put(tty);
                }
        }

        wake_up_interruptible(&port->port.delta_msr_wait);
}

static void ch341_read_int_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        unsigned int len = urb->actual_length;
        int status;

        switch (urb->status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(&urb->dev->dev, "%s - urb shutting down: %d\n",
                        __func__, urb->status);
                return;
        default:
                dev_dbg(&urb->dev->dev, "%s - nonzero urb status: %d\n",
                        __func__, urb->status);
                goto exit;
        }

        usb_serial_debug_data(&port->dev, __func__, len, data);
        ch341_update_status(port, data, len);
exit:
        status = usb_submit_urb(urb, GFP_ATOMIC);
        if (status) {
                dev_err(&urb->dev->dev, "%s - usb_submit_urb failed: %d\n",
                        __func__, status);
        }
}

static int ch341_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        u8 mcr;
        u8 status;
        unsigned int result;

        spin_lock_irqsave(&priv->lock, flags);
        mcr = priv->mcr;
        status = priv->msr;
        spin_unlock_irqrestore(&priv->lock, flags);

        result = ((mcr & CH341_BIT_DTR)                ? TIOCM_DTR : 0)
                  | ((mcr & CH341_BIT_RTS)        ? TIOCM_RTS : 0)
                  | ((status & CH341_BIT_CTS)        ? TIOCM_CTS : 0)
                  | ((status & CH341_BIT_DSR)        ? TIOCM_DSR : 0)
                  | ((status & CH341_BIT_RI)        ? TIOCM_RI  : 0)
                  | ((status & CH341_BIT_DCD)        ? TIOCM_CD  : 0);

        dev_dbg(&port->dev, "%s - result = %x\n", __func__, result);

        return result;
}

static int ch341_reset_resume(struct usb_serial *serial)
{
        struct usb_serial_port *port = serial->port[0];
        struct ch341_private *priv;
        int ret;

        priv = usb_get_serial_port_data(port);
        if (!priv)
                return 0;

        /* reconfigure ch341 serial port after bus-reset */
        ch341_configure(serial->dev, priv);

        if (tty_port_initialized(&port->port)) {
                ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO);
                if (ret) {
                        dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
                                ret);
                        return ret;
                }

                ret = ch341_get_status(port->serial->dev, priv);
                if (ret < 0) {
                        dev_err(&port->dev, "failed to read modem status: %d\n",
                                ret);
                }
        }

        return usb_serial_generic_resume(serial);
}

static struct usb_serial_driver ch341_device = {
        .driver = {
                .owner        = THIS_MODULE,
                .name        = "ch341-uart",
        },
        .id_table          = id_table,
        .num_ports         = 1,
        .open              = ch341_open,
        .dtr_rts           = ch341_dtr_rts,
        .carrier_raised           = ch341_carrier_raised,
        .close             = ch341_close,
        .set_termios       = ch341_set_termios,
        .break_ctl         = ch341_break_ctl,
        .tiocmget          = ch341_tiocmget,
        .tiocmset          = ch341_tiocmset,
        .tiocmiwait        = usb_serial_generic_tiocmiwait,
        .read_int_callback = ch341_read_int_callback,
        .port_probe        = ch341_port_probe,
        .port_remove       = ch341_port_remove,
        .reset_resume      = ch341_reset_resume,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &ch341_device, NULL
};

module_usb_serial_driver(serial_drivers, id_table);

MODULE_LICENSE("GPL v2");































































































































    9 















    9 



























































































































































































   11 




   11 
   11 
    9 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
// SPDX-License-Identifier: GPL-2.0-only
/*
 * DVB USB library - provides a generic interface for a DVB USB device driver.
 *
 * dvb-usb-init.c
 *
 * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de)
 *
 * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information
 */
#include "dvb-usb-common.h"

/* debug */
int dvb_usb_debug;
module_param_named(debug, dvb_usb_debug, int, 0644);
MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,pll=4,ts=8,err=16,rc=32,fw=64,mem=128,uxfer=256  (or-able))." DVB_USB_DEBUG_STATUS);

int dvb_usb_disable_rc_polling;
module_param_named(disable_rc_polling, dvb_usb_disable_rc_polling, int, 0644);
MODULE_PARM_DESC(disable_rc_polling, "disable remote control polling (default: 0).");

static int dvb_usb_force_pid_filter_usage;
module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444);
MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0).");

static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs)
{
        struct dvb_usb_adapter *adap;
        int ret, n, o;

        for (n = 0; n < d->props.num_adapters; n++) {
                adap = &d->adapter[n];
                adap->dev = d;
                adap->id  = n;

                memcpy(&adap->props, &d->props.adapter[n], sizeof(struct dvb_usb_adapter_properties));

                for (o = 0; o < adap->props.num_frontends; o++) {
                        struct dvb_usb_adapter_fe_properties *props = &adap->props.fe[o];
                        /* speed - when running at FULL speed we need a HW PID filter */
                        if (d->udev->speed == USB_SPEED_FULL && !(props->caps & DVB_USB_ADAP_HAS_PID_FILTER)) {
                                err("This USB2.0 device cannot be run on a USB1.1 port. (it lacks a hardware PID filter)");
                                return -ENODEV;
                        }

                        if ((d->udev->speed == USB_SPEED_FULL && props->caps & DVB_USB_ADAP_HAS_PID_FILTER) ||
                                (props->caps & DVB_USB_ADAP_NEED_PID_FILTERING)) {
                                info("will use the device's hardware PID filter (table count: %d).", props->pid_filter_count);
                                adap->fe_adap[o].pid_filtering  = 1;
                                adap->fe_adap[o].max_feed_count = props->pid_filter_count;
                        } else {
                                info("will pass the complete MPEG2 transport stream to the software demuxer.");
                                adap->fe_adap[o].pid_filtering  = 0;
                                adap->fe_adap[o].max_feed_count = 255;
                        }

                        if (!adap->fe_adap[o].pid_filtering &&
                                dvb_usb_force_pid_filter_usage &&
                                props->caps & DVB_USB_ADAP_HAS_PID_FILTER) {
                                info("pid filter enabled by module option.");
                                adap->fe_adap[o].pid_filtering  = 1;
                                adap->fe_adap[o].max_feed_count = props->pid_filter_count;
                        }

                        if (props->size_of_priv > 0) {
                                adap->fe_adap[o].priv = kzalloc(props->size_of_priv, GFP_KERNEL);
                                if (adap->fe_adap[o].priv == NULL) {
                                        err("no memory for priv for adapter %d fe %d.", n, o);
                                        return -ENOMEM;
                                }
                        }
                }

                if (adap->props.size_of_priv > 0) {
                        adap->priv = kzalloc(adap->props.size_of_priv, GFP_KERNEL);
                        if (adap->priv == NULL) {
                                err("no memory for priv for adapter %d.", n);
                                return -ENOMEM;
                        }
                }

                ret = dvb_usb_adapter_stream_init(adap);
                if (ret)
                        goto stream_init_err;

                ret = dvb_usb_adapter_dvb_init(adap, adapter_nrs);
                if (ret)
                        goto dvb_init_err;

                ret = dvb_usb_adapter_frontend_init(adap);
                if (ret)
                        goto frontend_init_err;

                /* use exclusive FE lock if there is multiple shared FEs */
                if (adap->fe_adap[1].fe && adap->dvb_adap.mfe_shared < 1)
                        adap->dvb_adap.mfe_shared = 1;

                d->num_adapters_initialized++;
                d->state |= DVB_USB_STATE_DVB;
        }

        /*
         * when reloading the driver w/o replugging the device
         * sometimes a timeout occurs, this helps
         */
        if (d->props.generic_bulk_ctrl_endpoint != 0) {
                usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint));
                usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint));
        }

        return 0;

frontend_init_err:
        dvb_usb_adapter_dvb_exit(adap);
dvb_init_err:
        dvb_usb_adapter_stream_exit(adap);
stream_init_err:
        kfree(adap->priv);
        return ret;
}

static int dvb_usb_adapter_exit(struct dvb_usb_device *d)
{
        int n;

        for (n = 0; n < d->num_adapters_initialized; n++) {
                dvb_usb_adapter_frontend_exit(&d->adapter[n]);
                dvb_usb_adapter_dvb_exit(&d->adapter[n]);
                dvb_usb_adapter_stream_exit(&d->adapter[n]);
                kfree(d->adapter[n].priv);
        }
        d->num_adapters_initialized = 0;
        d->state &= ~DVB_USB_STATE_DVB;
        return 0;
}


/* general initialization functions */
static int dvb_usb_exit(struct dvb_usb_device *d)
{
        deb_info("state before exiting everything: %x\n", d->state);
        dvb_usb_remote_exit(d);
        dvb_usb_adapter_exit(d);
        dvb_usb_i2c_exit(d);
        deb_info("state should be zero now: %x\n", d->state);
        d->state = DVB_USB_STATE_INIT;

        if (d->priv != NULL && d->props.priv_destroy != NULL)
                d->props.priv_destroy(d);

        kfree(d->priv);
        kfree(d);
        return 0;
}

static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums)
{
        int ret = 0;

        mutex_init(&d->data_mutex);
        mutex_init(&d->usb_mutex);
        mutex_init(&d->i2c_mutex);

        d->state = DVB_USB_STATE_INIT;

        if (d->props.size_of_priv > 0) {
                d->priv = kzalloc(d->props.size_of_priv, GFP_KERNEL);
                if (d->priv == NULL) {
                        err("no memory for priv in 'struct dvb_usb_device'");
                        return -ENOMEM;
                }

                if (d->props.priv_init != NULL) {
                        ret = d->props.priv_init(d);
                        if (ret != 0)
                                goto err_priv_init;
                }
        }

        /* check the capabilities and set appropriate variables */
        dvb_usb_device_power_ctrl(d, 1);

        ret = dvb_usb_i2c_init(d);
        if (ret)
                goto err_i2c_init;
        ret = dvb_usb_adapter_init(d, adapter_nums);
        if (ret)
                goto err_adapter_init;

        if ((ret = dvb_usb_remote_init(d)))
                err("could not initialize remote control.");

        dvb_usb_device_power_ctrl(d, 0);

        return 0;

err_adapter_init:
        dvb_usb_adapter_exit(d);
        dvb_usb_i2c_exit(d);
err_i2c_init:
        if (d->priv && d->props.priv_destroy)
                d->props.priv_destroy(d);
err_priv_init:
        kfree(d->priv);
        d->priv = NULL;
        return ret;
}

/* determine the name and the state of the just found USB device */
static const struct dvb_usb_device_description *dvb_usb_find_device(struct usb_device *udev, const struct dvb_usb_device_properties *props, int *cold)
{
        int i, j;
        const struct dvb_usb_device_description *desc = NULL;

        *cold = -1;

        for (i = 0; i < props->num_device_descs; i++) {

                for (j = 0; j < DVB_USB_ID_MAX_NUM && props->devices[i].cold_ids[j] != NULL; j++) {
                        deb_info("check for cold %x %x\n", props->devices[i].cold_ids[j]->idVendor, props->devices[i].cold_ids[j]->idProduct);
                        if (props->devices[i].cold_ids[j]->idVendor  == le16_to_cpu(udev->descriptor.idVendor) &&
                                props->devices[i].cold_ids[j]->idProduct == le16_to_cpu(udev->descriptor.idProduct)) {
                                *cold = 1;
                                desc = &props->devices[i];
                                break;
                        }
                }

                if (desc != NULL)
                        break;

                for (j = 0; j < DVB_USB_ID_MAX_NUM && props->devices[i].warm_ids[j] != NULL; j++) {
                        deb_info("check for warm %x %x\n", props->devices[i].warm_ids[j]->idVendor, props->devices[i].warm_ids[j]->idProduct);
                        if (props->devices[i].warm_ids[j]->idVendor == le16_to_cpu(udev->descriptor.idVendor) &&
                                props->devices[i].warm_ids[j]->idProduct == le16_to_cpu(udev->descriptor.idProduct)) {
                                *cold = 0;
                                desc = &props->devices[i];
                                break;
                        }
                }
        }

        if (desc != NULL && props->identify_state != NULL)
                props->identify_state(udev, props, &desc, cold);

        return desc;
}

int dvb_usb_device_power_ctrl(struct dvb_usb_device *d, int onoff)
{
        if (onoff)
                d->powered++;
        else
                d->powered--;

        if (d->powered == 0 || (onoff && d->powered == 1)) { /* when switching from 1 to 0 or from 0 to 1 */
                deb_info("power control: %d\n", onoff);
                if (d->props.power_ctrl)
                        return d->props.power_ctrl(d, onoff);
        }
        return 0;
}

/*
 * USB
 */
int dvb_usb_device_init(struct usb_interface *intf,
                        const struct dvb_usb_device_properties *props,
                        struct module *owner, struct dvb_usb_device **du,
                        short *adapter_nums)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct dvb_usb_device *d = NULL;
        const struct dvb_usb_device_description *desc = NULL;

        int ret = -ENOMEM, cold = 0;

        if (du != NULL)
                *du = NULL;

        d = kzalloc(sizeof(*d), GFP_KERNEL);
        if (!d) {
                err("no memory for 'struct dvb_usb_device'");
                return -ENOMEM;
        }

        memcpy(&d->props, props, sizeof(struct dvb_usb_device_properties));

        desc = dvb_usb_find_device(udev, &d->props, &cold);
        if (!desc) {
                deb_err("something went very wrong, device was not found in current device list - let's see what comes next.\n");
                ret = -ENODEV;
                goto error;
        }

        if (cold) {
                info("found a '%s' in cold state, will try to load a firmware", desc->name);
                ret = dvb_usb_download_firmware(udev, props);
                if (!props->no_reconnect || ret != 0)
                        goto error;
        }

        info("found a '%s' in warm state.", desc->name);
        d->udev = udev;
        d->desc = desc;
        d->owner = owner;

        usb_set_intfdata(intf, d);

        ret = dvb_usb_init(d, adapter_nums);
        if (ret) {
                info("%s error while loading driver (%d)", desc->name, ret);
                goto error;
        }

        if (du)
                *du = d;

        info("%s successfully initialized and connected.", desc->name);
        return 0;

 error:
        usb_set_intfdata(intf, NULL);
        kfree(d);
        return ret;
}
EXPORT_SYMBOL(dvb_usb_device_init);

void dvb_usb_device_exit(struct usb_interface *intf)
{
        struct dvb_usb_device *d = usb_get_intfdata(intf);
        const char *default_name = "generic DVB-USB module";
        char name[40];

        usb_set_intfdata(intf, NULL);
        if (d != NULL && d->desc != NULL) {
                strscpy(name, d->desc->name, sizeof(name));
                dvb_usb_exit(d);
        } else {
                strscpy(name, default_name, sizeof(name));
        }
        info("%s successfully deinitialized and disconnected.", name);

}
EXPORT_SYMBOL(dvb_usb_device_exit);

MODULE_VERSION("1.0");
MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>");
MODULE_DESCRIPTION("A library module containing commonly used USB and DVB function USB DVB devices");
MODULE_LICENSE("GPL");












































































































































































    4 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_USER_NAMESPACE_H
#define _LINUX_USER_NAMESPACE_H

#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/rwsem.h>
#include <linux/sysctl.h>
#include <linux/err.h>

#define UID_GID_MAP_MAX_BASE_EXTENTS 5
#define UID_GID_MAP_MAX_EXTENTS 340

struct uid_gid_extent {
        u32 first;
        u32 lower_first;
        u32 count;
};

struct uid_gid_map { /* 64 bytes -- 1 cache line */
        u32 nr_extents;
        union {
                struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS];
                struct {
                        struct uid_gid_extent *forward;
                        struct uid_gid_extent *reverse;
                };
        };
};

#define USERNS_SETGROUPS_ALLOWED 1UL

#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED

struct ucounts;

enum ucount_type {
        UCOUNT_USER_NAMESPACES,
        UCOUNT_PID_NAMESPACES,
        UCOUNT_UTS_NAMESPACES,
        UCOUNT_IPC_NAMESPACES,
        UCOUNT_NET_NAMESPACES,
        UCOUNT_MNT_NAMESPACES,
        UCOUNT_CGROUP_NAMESPACES,
        UCOUNT_TIME_NAMESPACES,
#ifdef CONFIG_INOTIFY_USER
        UCOUNT_INOTIFY_INSTANCES,
        UCOUNT_INOTIFY_WATCHES,
#endif
#ifdef CONFIG_FANOTIFY
        UCOUNT_FANOTIFY_GROUPS,
        UCOUNT_FANOTIFY_MARKS,
#endif
        UCOUNT_COUNTS,
};

enum rlimit_type {
        UCOUNT_RLIMIT_NPROC,
        UCOUNT_RLIMIT_MSGQUEUE,
        UCOUNT_RLIMIT_SIGPENDING,
        UCOUNT_RLIMIT_MEMLOCK,
        UCOUNT_RLIMIT_COUNTS,
};

#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc;
#endif

struct user_namespace {
        struct uid_gid_map        uid_map;
        struct uid_gid_map        gid_map;
        struct uid_gid_map        projid_map;
        struct user_namespace        *parent;
        int                        level;
        kuid_t                        owner;
        kgid_t                        group;
        struct ns_common        ns;
        unsigned long                flags;
        /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP
         * in its effective capability set at the child ns creation time. */
        bool                        parent_could_setfcap;

#ifdef CONFIG_KEYS
        /* List of joinable keyrings in this namespace.  Modification access of
         * these pointers is controlled by keyring_sem.  Once
         * user_keyring_register is set, it won't be changed, so it can be
         * accessed directly with READ_ONCE().
         */
        struct list_head        keyring_name_list;
        struct key                *user_keyring_register;
        struct rw_semaphore        keyring_sem;
#endif

        /* Register of per-UID persistent keyrings for this namespace */
#ifdef CONFIG_PERSISTENT_KEYRINGS
        struct key                *persistent_keyring_register;
#endif
        struct work_struct        work;
#ifdef CONFIG_SYSCTL
        struct ctl_table_set        set;
        struct ctl_table_header *sysctls;
#endif
        struct ucounts                *ucounts;
        long ucount_max[UCOUNT_COUNTS];
        long rlimit_max[UCOUNT_RLIMIT_COUNTS];

#if IS_ENABLED(CONFIG_BINFMT_MISC)
        struct binfmt_misc *binfmt_misc;
#endif
} __randomize_layout;

struct ucounts {
        struct hlist_node node;
        struct user_namespace *ns;
        kuid_t uid;
        atomic_t count;
        atomic_long_t ucount[UCOUNT_COUNTS];
        atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS];
};

extern struct user_namespace init_user_ns;
extern struct ucounts init_ucounts;

bool setup_userns_sysctls(struct user_namespace *ns);
void retire_userns_sysctls(struct user_namespace *ns);
struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
void put_ucounts(struct ucounts *ucounts);

static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type)
{
        return atomic_long_read(&ucounts->rlimit[type]);
}

long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);

static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type)
{
        return READ_ONCE(ns->rlimit_max[type]);
}

static inline void set_userns_rlimit_max(struct user_namespace *ns,
                enum rlimit_type type, unsigned long max)
{
        ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
}

#ifdef CONFIG_USER_NS

static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
        if (ns)
                refcount_inc(&ns->ns.count);
        return ns;
}

extern int create_user_ns(struct cred *new);
extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
extern void __put_user_ns(struct user_namespace *ns);

static inline void put_user_ns(struct user_namespace *ns)
{
        if (ns && refcount_dec_and_test(&ns->ns.count))
                __put_user_ns(ns);
}

struct seq_operations;
extern const struct seq_operations proc_uid_seq_operations;
extern const struct seq_operations proc_gid_seq_operations;
extern const struct seq_operations proc_projid_seq_operations;
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool in_userns(const struct user_namespace *ancestor,
                       const struct user_namespace *child);
extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else

static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
        return &init_user_ns;
}

static inline int create_user_ns(struct cred *new)
{
        return -EINVAL;
}

static inline int unshare_userns(unsigned long unshare_flags,
                                 struct cred **new_cred)
{
        if (unshare_flags & CLONE_NEWUSER)
                return -EINVAL;
        return 0;
}

static inline void put_user_ns(struct user_namespace *ns)
{
}

static inline bool userns_may_setgroups(const struct user_namespace *ns)
{
        return true;
}

static inline bool in_userns(const struct user_namespace *ancestor,
                             const struct user_namespace *child)
{
        return true;
}

static inline bool current_in_userns(const struct user_namespace *target_ns)
{
        return true;
}

static inline struct ns_common *ns_get_owner(struct ns_common *ns)
{
        return ERR_PTR(-EPERM);
}
#endif

#endif /* _LINUX_USER_H */










































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Definitions of the Internet Protocol.
 *
 * Version:        @(#)in.h        1.0.1        04/21/93
 *
 * Authors:        Original taken from the GNU Project <netinet/in.h> file.
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 */
#ifndef _LINUX_IN_H
#define _LINUX_IN_H


#include <linux/errno.h>
#include <uapi/linux/in.h>

static inline int proto_ports_offset(int proto)
{
        switch (proto) {
        case IPPROTO_TCP:
        case IPPROTO_UDP:
        case IPPROTO_DCCP:
        case IPPROTO_ESP:        /* SPI */
        case IPPROTO_SCTP:
        case IPPROTO_UDPLITE:
                return 0;
        case IPPROTO_AH:        /* SPI */
                return 4;
        default:
                return -EINVAL;
        }
}

static inline bool ipv4_is_loopback(__be32 addr)
{
        return (addr & htonl(0xff000000)) == htonl(0x7f000000);
}

static inline bool ipv4_is_multicast(__be32 addr)
{
        return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
}

static inline bool ipv4_is_local_multicast(__be32 addr)
{
        return (addr & htonl(0xffffff00)) == htonl(0xe0000000);
}

static inline bool ipv4_is_lbcast(__be32 addr)
{
        /* limited broadcast */
        return addr == htonl(INADDR_BROADCAST);
}

static inline bool ipv4_is_all_snoopers(__be32 addr)
{
        return addr == htonl(INADDR_ALLSNOOPERS_GROUP);
}

static inline bool ipv4_is_zeronet(__be32 addr)
{
        return (addr == 0);
}

/* Special-Use IPv4 Addresses (RFC3330) */

static inline bool ipv4_is_private_10(__be32 addr)
{
        return (addr & htonl(0xff000000)) == htonl(0x0a000000);
}

static inline bool ipv4_is_private_172(__be32 addr)
{
        return (addr & htonl(0xfff00000)) == htonl(0xac100000);
}

static inline bool ipv4_is_private_192(__be32 addr)
{
        return (addr & htonl(0xffff0000)) == htonl(0xc0a80000);
}

static inline bool ipv4_is_linklocal_169(__be32 addr)
{
        return (addr & htonl(0xffff0000)) == htonl(0xa9fe0000);
}

static inline bool ipv4_is_anycast_6to4(__be32 addr)
{
        return (addr & htonl(0xffffff00)) == htonl(0xc0586300);
}

static inline bool ipv4_is_test_192(__be32 addr)
{
        return (addr & htonl(0xffffff00)) == htonl(0xc0000200);
}

static inline bool ipv4_is_test_198(__be32 addr)
{
        return (addr & htonl(0xfffe0000)) == htonl(0xc6120000);
}
#endif        /* _LINUX_IN_H */






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 










    2 





















































































































    3 




































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
// SPDX-License-Identifier: GPL-2.0+
/*
  Keyspan USB to Serial Converter driver

  (C) Copyright (C) 2000-2001        Hugh Blemings <hugh@blemings.org>
  (C) Copyright (C) 2002        Greg Kroah-Hartman <greg@kroah.com>

  See http://blemings.org/hugh/keyspan.html for more information.

  Code in this driver inspired by and in a number of places taken
  from Brian Warner's original Keyspan-PDA driver.

  This driver has been put together with the support of Innosys, Inc.
  and Keyspan, Inc the manufacturers of the Keyspan USB-serial products.
  Thanks Guys :)

  Thanks to Paulus for miscellaneous tidy ups, some largish chunks
  of much nicer and/or completely new code and (perhaps most uniquely)
  having the patience to sit down and explain why and where he'd changed
  stuff.

  Tip 'o the hat to IBM (and previously Linuxcare :) for supporting
  staff in their work on open source projects.
*/


#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/usb/ezusb.h>

#define DRIVER_AUTHOR "Hugh Blemings <hugh@misc.nu"
#define DRIVER_DESC "Keyspan USB to Serial Converter Driver"

static void keyspan_send_setup(struct usb_serial_port *port, int reset_port);

static int keyspan_usa19_calc_baud(struct usb_serial_port *port,
                                   u32 baud_rate, u32 baudclk,
                                   u8 *rate_hi, u8 *rate_low,
                                   u8 *prescaler, int portnum);
static int keyspan_usa19w_calc_baud(struct usb_serial_port *port,
                                    u32 baud_rate, u32 baudclk,
                                    u8 *rate_hi, u8 *rate_low,
                                    u8 *prescaler, int portnum);
static int keyspan_usa28_calc_baud(struct usb_serial_port *port,
                                   u32 baud_rate, u32 baudclk,
                                   u8 *rate_hi, u8 *rate_low,
                                   u8 *prescaler, int portnum);
static int keyspan_usa19hs_calc_baud(struct usb_serial_port *port,
                                     u32 baud_rate, u32 baudclk,
                                     u8 *rate_hi, u8 *rate_low,
                                     u8 *prescaler, int portnum);

static int keyspan_usa28_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port);
static int keyspan_usa26_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port);
static int keyspan_usa49_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port);
static int keyspan_usa90_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port);
static int keyspan_usa67_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port);

/* Values used for baud rate calculation - device specific */
#define KEYSPAN_INVALID_BAUD_RATE                (-1)
#define KEYSPAN_BAUD_RATE_OK                        (0)
#define KEYSPAN_USA18X_BAUDCLK                        (12000000L)        /* a guess */
#define KEYSPAN_USA19_BAUDCLK                        (12000000L)
#define KEYSPAN_USA19W_BAUDCLK                        (24000000L)
#define KEYSPAN_USA19HS_BAUDCLK                        (14769231L)
#define KEYSPAN_USA28_BAUDCLK                        (1843200L)
#define KEYSPAN_USA28X_BAUDCLK                        (12000000L)
#define KEYSPAN_USA49W_BAUDCLK                        (48000000L)

/* Some constants used to characterise each device.  */
#define KEYSPAN_MAX_NUM_PORTS                        (4)
#define KEYSPAN_MAX_FLIPS                        (2)

/*
 * Device info for the Keyspan serial converter, used by the overall
 * usb-serial probe function.
 */
#define KEYSPAN_VENDOR_ID                        (0x06cd)

/* Product IDs for the products supported, pre-renumeration */
#define keyspan_usa18x_pre_product_id                0x0105
#define keyspan_usa19_pre_product_id                0x0103
#define keyspan_usa19qi_pre_product_id                0x010b
#define keyspan_mpr_pre_product_id                0x011b
#define keyspan_usa19qw_pre_product_id                0x0118
#define keyspan_usa19w_pre_product_id                0x0106
#define keyspan_usa28_pre_product_id                0x0101
#define keyspan_usa28x_pre_product_id                0x0102
#define keyspan_usa28xa_pre_product_id                0x0114
#define keyspan_usa28xb_pre_product_id                0x0113
#define keyspan_usa49w_pre_product_id                0x0109
#define keyspan_usa49wlc_pre_product_id                0x011a

/*
 * Product IDs post-renumeration.  Note that the 28x and 28xb have the same
 * id's post-renumeration but behave identically so it's not an issue. As
 * such, the 28xb is not listed in any of the device tables.
 */
#define keyspan_usa18x_product_id                0x0112
#define keyspan_usa19_product_id                0x0107
#define keyspan_usa19qi_product_id                0x010c
#define keyspan_usa19hs_product_id                0x0121
#define keyspan_mpr_product_id                        0x011c
#define keyspan_usa19qw_product_id                0x0119
#define keyspan_usa19w_product_id                0x0108
#define keyspan_usa28_product_id                0x010f
#define keyspan_usa28x_product_id                0x0110
#define keyspan_usa28xa_product_id                0x0115
#define keyspan_usa28xb_product_id                0x0110
#define keyspan_usa28xg_product_id                0x0135
#define keyspan_usa49w_product_id                0x010a
#define keyspan_usa49wlc_product_id                0x012a
#define keyspan_usa49wg_product_id                0x0131

struct keyspan_device_details {
        /* product ID value */
        int        product_id;

        enum        {msg_usa26, msg_usa28, msg_usa49, msg_usa90, msg_usa67} msg_format;

                /* Number of physical ports */
        int        num_ports;

                /* 1 if endpoint flipping used on input, 0 if not */
        int        indat_endp_flip;

                /* 1 if endpoint flipping used on output, 0 if not */
        int        outdat_endp_flip;

                /*
                 * Table mapping input data endpoint IDs to physical port
                 * number and flip if used
                 */
        int        indat_endpoints[KEYSPAN_MAX_NUM_PORTS];

                /* Same for output endpoints */
        int        outdat_endpoints[KEYSPAN_MAX_NUM_PORTS];

                /* Input acknowledge endpoints */
        int        inack_endpoints[KEYSPAN_MAX_NUM_PORTS];

                /* Output control endpoints */
        int        outcont_endpoints[KEYSPAN_MAX_NUM_PORTS];

                /* Endpoint used for input status */
        int        instat_endpoint;

                /* Endpoint used for input data 49WG only */
        int        indat_endpoint;

                /* Endpoint used for global control functions */
        int        glocont_endpoint;

        int        (*calculate_baud_rate)(struct usb_serial_port *port,
                                       u32 baud_rate, u32 baudclk,
                                       u8 *rate_hi, u8 *rate_low, u8 *prescaler,
                                       int portnum);
        u32        baudclk;
};

/*
 * Now for each device type we setup the device detail structure with the
 * appropriate information (provided in Keyspan's documentation)
 */

static const struct keyspan_device_details usa18x_device_details = {
        .product_id                = keyspan_usa18x_product_id,
        .msg_format                = msg_usa26,
        .num_ports                = 1,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x85},
        .outcont_endpoints        = {0x05},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA18X_BAUDCLK,
};

static const struct keyspan_device_details usa19_device_details = {
        .product_id                = keyspan_usa19_product_id,
        .msg_format                = msg_usa28,
        .num_ports                = 1,
        .indat_endp_flip        = 1,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x83},
        .outcont_endpoints        = {0x03},
        .instat_endpoint        = 0x84,
        .indat_endpoint                = -1,
        .glocont_endpoint        = -1,
        .calculate_baud_rate        = keyspan_usa19_calc_baud,
        .baudclk                = KEYSPAN_USA19_BAUDCLK,
};

static const struct keyspan_device_details usa19qi_device_details = {
        .product_id                = keyspan_usa19qi_product_id,
        .msg_format                = msg_usa28,
        .num_ports                = 1,
        .indat_endp_flip        = 1,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x83},
        .outcont_endpoints        = {0x03},
        .instat_endpoint        = 0x84,
        .indat_endpoint                = -1,
        .glocont_endpoint        = -1,
        .calculate_baud_rate        = keyspan_usa28_calc_baud,
        .baudclk                = KEYSPAN_USA19_BAUDCLK,
};

static const struct keyspan_device_details mpr_device_details = {
        .product_id                = keyspan_mpr_product_id,
        .msg_format                = msg_usa28,
        .num_ports                = 1,
        .indat_endp_flip        = 1,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x83},
        .outcont_endpoints        = {0x03},
        .instat_endpoint        = 0x84,
        .indat_endpoint                = -1,
        .glocont_endpoint        = -1,
        .calculate_baud_rate        = keyspan_usa28_calc_baud,
        .baudclk                = KEYSPAN_USA19_BAUDCLK,
};

static const struct keyspan_device_details usa19qw_device_details = {
        .product_id                = keyspan_usa19qw_product_id,
        .msg_format                = msg_usa26,
        .num_ports                = 1,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x85},
        .outcont_endpoints        = {0x05},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA19W_BAUDCLK,
};

static const struct keyspan_device_details usa19w_device_details = {
        .product_id                = keyspan_usa19w_product_id,
        .msg_format                = msg_usa26,
        .num_ports                = 1,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {0x85},
        .outcont_endpoints        = {0x05},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA19W_BAUDCLK,
};

static const struct keyspan_device_details usa19hs_device_details = {
        .product_id                = keyspan_usa19hs_product_id,
        .msg_format                = msg_usa90,
        .num_ports                = 1,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 0,
        .indat_endpoints        = {0x81},
        .outdat_endpoints        = {0x01},
        .inack_endpoints        = {-1},
        .outcont_endpoints        = {0x02},
        .instat_endpoint        = 0x82,
        .indat_endpoint                = -1,
        .glocont_endpoint        = -1,
        .calculate_baud_rate        = keyspan_usa19hs_calc_baud,
        .baudclk                = KEYSPAN_USA19HS_BAUDCLK,
};

static const struct keyspan_device_details usa28_device_details = {
        .product_id                = keyspan_usa28_product_id,
        .msg_format                = msg_usa28,
        .num_ports                = 2,
        .indat_endp_flip        = 1,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81, 0x83},
        .outdat_endpoints        = {0x01, 0x03},
        .inack_endpoints        = {0x85, 0x86},
        .outcont_endpoints        = {0x05, 0x06},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa28_calc_baud,
        .baudclk                = KEYSPAN_USA28_BAUDCLK,
};

static const struct keyspan_device_details usa28x_device_details = {
        .product_id                = keyspan_usa28x_product_id,
        .msg_format                = msg_usa26,
        .num_ports                = 2,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81, 0x83},
        .outdat_endpoints        = {0x01, 0x03},
        .inack_endpoints        = {0x85, 0x86},
        .outcont_endpoints        = {0x05, 0x06},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA28X_BAUDCLK,
};

static const struct keyspan_device_details usa28xa_device_details = {
        .product_id                = keyspan_usa28xa_product_id,
        .msg_format                = msg_usa26,
        .num_ports                = 2,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 1,
        .indat_endpoints        = {0x81, 0x83},
        .outdat_endpoints        = {0x01, 0x03},
        .inack_endpoints        = {0x85, 0x86},
        .outcont_endpoints        = {0x05, 0x06},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA28X_BAUDCLK,
};

static const struct keyspan_device_details usa28xg_device_details = {
        .product_id                = keyspan_usa28xg_product_id,
        .msg_format                = msg_usa67,
        .num_ports                = 2,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 0,
        .indat_endpoints        = {0x84, 0x88},
        .outdat_endpoints        = {0x02, 0x06},
        .inack_endpoints        = {-1, -1},
        .outcont_endpoints        = {-1, -1},
        .instat_endpoint        = 0x81,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x01,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA28X_BAUDCLK,
};
/*
 * We don't need a separate entry for the usa28xb as it appears as a 28x
 * anyway.
 */

static const struct keyspan_device_details usa49w_device_details = {
        .product_id                = keyspan_usa49w_product_id,
        .msg_format                = msg_usa49,
        .num_ports                = 4,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 0,
        .indat_endpoints        = {0x81, 0x82, 0x83, 0x84},
        .outdat_endpoints        = {0x01, 0x02, 0x03, 0x04},
        .inack_endpoints        = {-1, -1, -1, -1},
        .outcont_endpoints        = {-1, -1, -1, -1},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA49W_BAUDCLK,
};

static const struct keyspan_device_details usa49wlc_device_details = {
        .product_id                = keyspan_usa49wlc_product_id,
        .msg_format                = msg_usa49,
        .num_ports                = 4,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 0,
        .indat_endpoints        = {0x81, 0x82, 0x83, 0x84},
        .outdat_endpoints        = {0x01, 0x02, 0x03, 0x04},
        .inack_endpoints        = {-1, -1, -1, -1},
        .outcont_endpoints        = {-1, -1, -1, -1},
        .instat_endpoint        = 0x87,
        .indat_endpoint                = -1,
        .glocont_endpoint        = 0x07,
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA19W_BAUDCLK,
};

static const struct keyspan_device_details usa49wg_device_details = {
        .product_id                = keyspan_usa49wg_product_id,
        .msg_format                = msg_usa49,
        .num_ports                = 4,
        .indat_endp_flip        = 0,
        .outdat_endp_flip        = 0,
        .indat_endpoints        = {-1, -1, -1, -1},        /* single 'global' data in EP */
        .outdat_endpoints        = {0x01, 0x02, 0x04, 0x06},
        .inack_endpoints        = {-1, -1, -1, -1},
        .outcont_endpoints        = {-1, -1, -1, -1},
        .instat_endpoint        = 0x81,
        .indat_endpoint                = 0x88,
        .glocont_endpoint        = 0x00,                        /* uses control EP */
        .calculate_baud_rate        = keyspan_usa19w_calc_baud,
        .baudclk                = KEYSPAN_USA19W_BAUDCLK,
};

static const struct keyspan_device_details *keyspan_devices[] = {
        &usa18x_device_details,
        &usa19_device_details,
        &usa19qi_device_details,
        &mpr_device_details,
        &usa19qw_device_details,
        &usa19w_device_details,
        &usa19hs_device_details,
        &usa28_device_details,
        &usa28x_device_details,
        &usa28xa_device_details,
        &usa28xg_device_details,
        /* 28xb not required as it renumerates as a 28x */
        &usa49w_device_details,
        &usa49wlc_device_details,
        &usa49wg_device_details,
        NULL,
};

static const struct usb_device_id keyspan_ids_combined[] = {
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa18x_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19w_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qi_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qw_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_mpr_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28x_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xa_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xb_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49w_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wlc_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa18x_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19w_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qi_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qw_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19hs_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_mpr_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28x_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xa_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xg_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49w_product_id)},
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wlc_product_id)},
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wg_product_id)},
        { } /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, keyspan_ids_combined);

/* usb_device_id table for the pre-firmware download keyspan devices */
static const struct usb_device_id keyspan_pre_ids[] = {
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa18x_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qi_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qw_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19w_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_mpr_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28x_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xa_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xb_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49w_pre_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wlc_pre_product_id) },
        { } /* Terminating entry */
};

static const struct usb_device_id keyspan_1port_ids[] = {
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa18x_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qi_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19qw_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19w_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa19hs_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_mpr_product_id) },
        { } /* Terminating entry */
};

static const struct usb_device_id keyspan_2port_ids[] = {
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28x_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xa_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa28xg_product_id) },
        { } /* Terminating entry */
};

static const struct usb_device_id keyspan_4port_ids[] = {
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49w_product_id) },
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wlc_product_id)},
        { USB_DEVICE(KEYSPAN_VENDOR_ID, keyspan_usa49wg_product_id)},
        { } /* Terminating entry */
};

#define INSTAT_BUFLEN        32
#define GLOCONT_BUFLEN        64
#define INDAT49W_BUFLEN        512
#define IN_BUFLEN        64
#define OUT_BUFLEN        64
#define INACK_BUFLEN        1
#define OUTCONT_BUFLEN        64

        /* Per device and per port private data */
struct keyspan_serial_private {
        const struct keyspan_device_details        *device_details;

        struct urb        *instat_urb;
        char                *instat_buf;

        /* added to support 49wg, where data from all 4 ports comes in
           on 1 EP and high-speed supported */
        struct urb        *indat_urb;
        char                *indat_buf;

        /* XXX this one probably will need a lock */
        struct urb        *glocont_urb;
        char                *glocont_buf;
        char                *ctrl_buf;        /* for EP0 control message */
};

struct keyspan_port_private {
        /* Keep track of which input & output endpoints to use */
        int                in_flip;
        int                out_flip;

        /* Keep duplicate of device details in each port
           structure as well - simplifies some of the
           callback functions etc. */
        const struct keyspan_device_details        *device_details;

        /* Input endpoints and buffer for this port */
        struct urb        *in_urbs[2];
        char                *in_buffer[2];
        /* Output endpoints and buffer for this port */
        struct urb        *out_urbs[2];
        char                *out_buffer[2];

        /* Input ack endpoint */
        struct urb        *inack_urb;
        char                *inack_buffer;

        /* Output control endpoint */
        struct urb        *outcont_urb;
        char                *outcont_buffer;

        /* Settings for the port */
        int                baud;
        int                old_baud;
        unsigned int        cflag;
        unsigned int        old_cflag;
        enum                {flow_none, flow_cts, flow_xon} flow_control;
        int                rts_state;        /* Handshaking pins (outputs) */
        int                dtr_state;
        int                cts_state;        /* Handshaking pins (inputs) */
        int                dsr_state;
        int                dcd_state;
        int                ri_state;
        int                break_on;

        unsigned long        tx_start_time[2];
        int                resend_cont;        /* need to resend control packet */
};

/* Include Keyspan message headers.  All current Keyspan Adapters
   make use of one of five message formats which are referred
   to as USA-26, USA-28, USA-49, USA-90, USA-67 by Keyspan and
   within this driver. */
#include "keyspan_usa26msg.h"
#include "keyspan_usa28msg.h"
#include "keyspan_usa49msg.h"
#include "keyspan_usa90msg.h"
#include "keyspan_usa67msg.h"


static int keyspan_break_ctl(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;
        struct keyspan_port_private         *p_priv;

        p_priv = usb_get_serial_port_data(port);

        if (break_state == -1)
                p_priv->break_on = 1;
        else
                p_priv->break_on = 0;

        /* FIXME: return errors */
        keyspan_send_setup(port, 0);

        return 0;
}


static void keyspan_set_termios(struct tty_struct *tty,
                                struct usb_serial_port *port,
                                const struct ktermios *old_termios)
{
        int                                baud_rate, device_port;
        struct keyspan_port_private         *p_priv;
        const struct keyspan_device_details        *d_details;
        unsigned int                         cflag;

        p_priv = usb_get_serial_port_data(port);
        d_details = p_priv->device_details;
        cflag = tty->termios.c_cflag;
        device_port = port->port_number;

        /* Baud rate calculation takes baud rate as an integer
           so other rates can be generated if desired. */
        baud_rate = tty_get_baud_rate(tty);
        /* If no match or invalid, don't change */
        if (d_details->calculate_baud_rate(port, baud_rate, d_details->baudclk,
                                NULL, NULL, NULL, device_port) == KEYSPAN_BAUD_RATE_OK) {
                /* FIXME - more to do here to ensure rate changes cleanly */
                /* FIXME - calculate exact rate from divisor ? */
                p_priv->baud = baud_rate;
        } else
                baud_rate = tty_termios_baud_rate(old_termios);

        tty_encode_baud_rate(tty, baud_rate, baud_rate);
        /* set CTS/RTS handshake etc. */
        p_priv->cflag = cflag;
        p_priv->flow_control = (cflag & CRTSCTS) ? flow_cts : flow_none;

        /* Mark/Space not supported */
        tty->termios.c_cflag &= ~CMSPAR;

        keyspan_send_setup(port, 0);
}

static int keyspan_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
        unsigned int                        value;

        value = ((p_priv->rts_state) ? TIOCM_RTS : 0) |
                ((p_priv->dtr_state) ? TIOCM_DTR : 0) |
                ((p_priv->cts_state) ? TIOCM_CTS : 0) |
                ((p_priv->dsr_state) ? TIOCM_DSR : 0) |
                ((p_priv->dcd_state) ? TIOCM_CAR : 0) |
                ((p_priv->ri_state) ? TIOCM_RNG : 0);

        return value;
}

static int keyspan_tiocmset(struct tty_struct *tty,
                            unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);

        if (set & TIOCM_RTS)
                p_priv->rts_state = 1;
        if (set & TIOCM_DTR)
                p_priv->dtr_state = 1;
        if (clear & TIOCM_RTS)
                p_priv->rts_state = 0;
        if (clear & TIOCM_DTR)
                p_priv->dtr_state = 0;
        keyspan_send_setup(port, 0);
        return 0;
}

/* Write function is similar for the four protocols used
   with only a minor change for usa90 (usa19hs) required */
static int keyspan_write(struct tty_struct *tty,
        struct usb_serial_port *port, const unsigned char *buf, int count)
{
        struct keyspan_port_private         *p_priv;
        const struct keyspan_device_details        *d_details;
        int                                flip;
        int                                 left, todo;
        struct urb                        *this_urb;
        int                                 err, maxDataLen, dataOffset;

        p_priv = usb_get_serial_port_data(port);
        d_details = p_priv->device_details;

        if (d_details->msg_format == msg_usa90) {
                maxDataLen = 64;
                dataOffset = 0;
        } else {
                maxDataLen = 63;
                dataOffset = 1;
        }

        dev_dbg(&port->dev, "%s - %d chars, flip=%d\n", __func__, count,
                p_priv->out_flip);

        for (left = count; left > 0; left -= todo) {
                todo = left;
                if (todo > maxDataLen)
                        todo = maxDataLen;

                flip = p_priv->out_flip;

                /* Check we have a valid urb/endpoint before we use it... */
                this_urb = p_priv->out_urbs[flip];
                if (this_urb == NULL) {
                        /* no bulk out, so return 0 bytes written */
                        dev_dbg(&port->dev, "%s - no output urb :(\n", __func__);
                        return count;
                }

                dev_dbg(&port->dev, "%s - endpoint %x flip %d\n",
                        __func__, usb_pipeendpoint(this_urb->pipe), flip);

                if (this_urb->status == -EINPROGRESS) {
                        if (time_before(jiffies,
                                        p_priv->tx_start_time[flip] + 10 * HZ))
                                break;
                        usb_unlink_urb(this_urb);
                        break;
                }

                /* First byte in buffer is "last flag" (except for usa19hx)
                   - unused so for now so set to zero */
                ((char *)this_urb->transfer_buffer)[0] = 0;

                memcpy(this_urb->transfer_buffer + dataOffset, buf, todo);
                buf += todo;

                /* send the data out the bulk port */
                this_urb->transfer_buffer_length = todo + dataOffset;

                err = usb_submit_urb(this_urb, GFP_ATOMIC);
                if (err != 0)
                        dev_dbg(&port->dev, "usb_submit_urb(write bulk) failed (%d)\n", err);
                p_priv->tx_start_time[flip] = jiffies;

                /* Flip for next time if usa26 or usa28 interface
                   (not used on usa49) */
                p_priv->out_flip = (flip + 1) & d_details->outdat_endp_flip;
        }

        return count - left;
}

static void        usa26_indat_callback(struct urb *urb)
{
        int                        i, err;
        int                        endpoint;
        struct usb_serial_port        *port;
        unsigned char                 *data = urb->transfer_buffer;
        int status = urb->status;

        endpoint = usb_pipeendpoint(urb->pipe);

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status %d on endpoint %x\n",
                        __func__, status, endpoint);
                return;
        }

        port =  urb->context;
        if (urb->actual_length) {
                /* 0x80 bit is error flag */
                if ((data[0] & 0x80) == 0) {
                        /* no errors on individual bytes, only
                           possible overrun err */
                        if (data[0] & RXERROR_OVERRUN) {
                                tty_insert_flip_char(&port->port, 0,
                                                                TTY_OVERRUN);
                        }
                        for (i = 1; i < urb->actual_length ; ++i)
                                tty_insert_flip_char(&port->port, data[i],
                                                                TTY_NORMAL);
                } else {
                        /* some bytes had errors, every byte has status */
                        dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__);
                        for (i = 0; i + 1 < urb->actual_length; i += 2) {
                                int stat = data[i];
                                int flag = TTY_NORMAL;

                                if (stat & RXERROR_OVERRUN) {
                                        tty_insert_flip_char(&port->port, 0,
                                                                TTY_OVERRUN);
                                }
                                /* XXX should handle break (0x10) */
                                if (stat & RXERROR_PARITY)
                                        flag = TTY_PARITY;
                                else if (stat & RXERROR_FRAMING)
                                        flag = TTY_FRAME;

                                tty_insert_flip_char(&port->port, data[i+1],
                                                flag);
                        }
                }
                tty_flip_buffer_push(&port->port);
        }

        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
}

/* Outdat handling is common for all devices */
static void        usa2x_outdat_callback(struct urb *urb)
{
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);
        dev_dbg(&port->dev, "%s - urb %d\n", __func__, urb == p_priv->out_urbs[1]);

        usb_serial_port_softint(port);
}

static void        usa26_inack_callback(struct urb *urb)
{
}

static void        usa26_outcont_callback(struct urb *urb)
{
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);

        if (p_priv->resend_cont) {
                dev_dbg(&port->dev, "%s - sending setup\n", __func__);
                keyspan_usa26_send_setup(port->serial, port,
                                                p_priv->resend_cont - 1);
        }
}

static void        usa26_instat_callback(struct urb *urb)
{
        unsigned char                                 *data = urb->transfer_buffer;
        struct keyspan_usa26_portStatusMessage        *msg;
        struct usb_serial                        *serial;
        struct usb_serial_port                        *port;
        struct keyspan_port_private                 *p_priv;
        int old_dcd_state, err;
        int status = urb->status;

        serial =  urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }
        if (urb->actual_length != 9) {
                dev_dbg(&urb->dev->dev, "%s - %d byte report??\n", __func__, urb->actual_length);
                goto exit;
        }

        msg = (struct keyspan_usa26_portStatusMessage *)data;

        /* Check port number from message and retrieve private data */
        if (msg->port >= serial->num_ports) {
                dev_dbg(&urb->dev->dev, "%s - Unexpected port number %d\n", __func__, msg->port);
                goto exit;
        }
        port = serial->port[msg->port];
        p_priv = usb_get_serial_port_data(port);
        if (!p_priv)
                goto resubmit;

        /* Update handshaking pin state information */
        old_dcd_state = p_priv->dcd_state;
        p_priv->cts_state = ((msg->hskia_cts) ? 1 : 0);
        p_priv->dsr_state = ((msg->dsr) ? 1 : 0);
        p_priv->dcd_state = ((msg->gpia_dcd) ? 1 : 0);
        p_priv->ri_state = ((msg->ri) ? 1 : 0);

        if (old_dcd_state != p_priv->dcd_state)
                tty_port_tty_hangup(&port->port, true);
resubmit:
        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
exit: ;
}

static void        usa26_glocont_callback(struct urb *urb)
{
}


static void usa28_indat_callback(struct urb *urb)
{
        int                     err;
        struct usb_serial_port  *port;
        unsigned char           *data;
        struct keyspan_port_private             *p_priv;
        int status = urb->status;

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);

        if (urb != p_priv->in_urbs[p_priv->in_flip])
                return;

        do {
                if (status) {
                        dev_dbg(&urb->dev->dev, "%s - nonzero status %d on endpoint %x\n",
                                __func__, status, usb_pipeendpoint(urb->pipe));
                        return;
                }

                port =  urb->context;
                p_priv = usb_get_serial_port_data(port);
                data = urb->transfer_buffer;

                if (urb->actual_length) {
                        tty_insert_flip_string(&port->port, data,
                                        urb->actual_length);
                        tty_flip_buffer_push(&port->port);
                }

                /* Resubmit urb so we continue receiving */
                err = usb_submit_urb(urb, GFP_ATOMIC);
                if (err != 0)
                        dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n",
                                                        __func__, err);
                p_priv->in_flip ^= 1;

                urb = p_priv->in_urbs[p_priv->in_flip];
        } while (urb->status != -EINPROGRESS);
}

static void        usa28_inack_callback(struct urb *urb)
{
}

static void        usa28_outcont_callback(struct urb *urb)
{
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);

        if (p_priv->resend_cont) {
                dev_dbg(&port->dev, "%s - sending setup\n", __func__);
                keyspan_usa28_send_setup(port->serial, port,
                                                p_priv->resend_cont - 1);
        }
}

static void        usa28_instat_callback(struct urb *urb)
{
        int                                        err;
        unsigned char                                 *data = urb->transfer_buffer;
        struct keyspan_usa28_portStatusMessage        *msg;
        struct usb_serial                        *serial;
        struct usb_serial_port                        *port;
        struct keyspan_port_private                 *p_priv;
        int old_dcd_state;
        int status = urb->status;

        serial =  urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }

        if (urb->actual_length != sizeof(struct keyspan_usa28_portStatusMessage)) {
                dev_dbg(&urb->dev->dev, "%s - bad length %d\n", __func__, urb->actual_length);
                goto exit;
        }

        msg = (struct keyspan_usa28_portStatusMessage *)data;

        /* Check port number from message and retrieve private data */
        if (msg->port >= serial->num_ports) {
                dev_dbg(&urb->dev->dev, "%s - Unexpected port number %d\n", __func__, msg->port);
                goto exit;
        }
        port = serial->port[msg->port];
        p_priv = usb_get_serial_port_data(port);
        if (!p_priv)
                goto resubmit;

        /* Update handshaking pin state information */
        old_dcd_state = p_priv->dcd_state;
        p_priv->cts_state = ((msg->cts) ? 1 : 0);
        p_priv->dsr_state = ((msg->dsr) ? 1 : 0);
        p_priv->dcd_state = ((msg->dcd) ? 1 : 0);
        p_priv->ri_state = ((msg->ri) ? 1 : 0);

        if (old_dcd_state != p_priv->dcd_state && old_dcd_state)
                tty_port_tty_hangup(&port->port, true);
resubmit:
                /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
exit: ;
}

static void        usa28_glocont_callback(struct urb *urb)
{
}


static void        usa49_glocont_callback(struct urb *urb)
{
        struct usb_serial *serial;
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;
        int i;

        serial =  urb->context;
        for (i = 0; i < serial->num_ports; ++i) {
                port = serial->port[i];
                p_priv = usb_get_serial_port_data(port);
                if (!p_priv)
                        continue;

                if (p_priv->resend_cont) {
                        dev_dbg(&port->dev, "%s - sending setup\n", __func__);
                        keyspan_usa49_send_setup(serial, port,
                                                p_priv->resend_cont - 1);
                        break;
                }
        }
}

        /* This is actually called glostat in the Keyspan
           doco */
static void        usa49_instat_callback(struct urb *urb)
{
        int                                        err;
        unsigned char                                 *data = urb->transfer_buffer;
        struct keyspan_usa49_portStatusMessage        *msg;
        struct usb_serial                        *serial;
        struct usb_serial_port                        *port;
        struct keyspan_port_private                 *p_priv;
        int old_dcd_state;
        int status = urb->status;

        serial =  urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }

        if (urb->actual_length !=
                        sizeof(struct keyspan_usa49_portStatusMessage)) {
                dev_dbg(&urb->dev->dev, "%s - bad length %d\n", __func__, urb->actual_length);
                goto exit;
        }

        msg = (struct keyspan_usa49_portStatusMessage *)data;

        /* Check port number from message and retrieve private data */
        if (msg->portNumber >= serial->num_ports) {
                dev_dbg(&urb->dev->dev, "%s - Unexpected port number %d\n",
                        __func__, msg->portNumber);
                goto exit;
        }
        port = serial->port[msg->portNumber];
        p_priv = usb_get_serial_port_data(port);
        if (!p_priv)
                goto resubmit;

        /* Update handshaking pin state information */
        old_dcd_state = p_priv->dcd_state;
        p_priv->cts_state = ((msg->cts) ? 1 : 0);
        p_priv->dsr_state = ((msg->dsr) ? 1 : 0);
        p_priv->dcd_state = ((msg->dcd) ? 1 : 0);
        p_priv->ri_state = ((msg->ri) ? 1 : 0);

        if (old_dcd_state != p_priv->dcd_state && old_dcd_state)
                tty_port_tty_hangup(&port->port, true);
resubmit:
        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
exit:        ;
}

static void        usa49_inack_callback(struct urb *urb)
{
}

static void        usa49_indat_callback(struct urb *urb)
{
        int                        i, err;
        int                        endpoint;
        struct usb_serial_port        *port;
        unsigned char                 *data = urb->transfer_buffer;
        int status = urb->status;

        endpoint = usb_pipeendpoint(urb->pipe);

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status %d on endpoint %x\n",
                        __func__, status, endpoint);
                return;
        }

        port =  urb->context;
        if (urb->actual_length) {
                /* 0x80 bit is error flag */
                if ((data[0] & 0x80) == 0) {
                        /* no error on any byte */
                        tty_insert_flip_string(&port->port, data + 1,
                                                urb->actual_length - 1);
                } else {
                        /* some bytes had errors, every byte has status */
                        for (i = 0; i + 1 < urb->actual_length; i += 2) {
                                int stat = data[i];
                                int flag = TTY_NORMAL;

                                if (stat & RXERROR_OVERRUN) {
                                        tty_insert_flip_char(&port->port, 0,
                                                                TTY_OVERRUN);
                                }
                                /* XXX should handle break (0x10) */
                                if (stat & RXERROR_PARITY)
                                        flag = TTY_PARITY;
                                else if (stat & RXERROR_FRAMING)
                                        flag = TTY_FRAME;

                                tty_insert_flip_char(&port->port, data[i+1],
                                                flag);
                        }
                }
                tty_flip_buffer_push(&port->port);
        }

        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
}

static void usa49wg_indat_callback(struct urb *urb)
{
        int                        i, len, x, err;
        struct usb_serial        *serial;
        struct usb_serial_port        *port;
        unsigned char                 *data = urb->transfer_buffer;
        int status = urb->status;

        serial = urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }

        /* inbound data is in the form P#, len, status, data */
        i = 0;
        len = 0;

        while (i < urb->actual_length) {

                /* Check port number from message */
                if (data[i] >= serial->num_ports) {
                        dev_dbg(&urb->dev->dev, "%s - Unexpected port number %d\n",
                                __func__, data[i]);
                        return;
                }
                port = serial->port[data[i++]];
                len = data[i++];

                /* 0x80 bit is error flag */
                if ((data[i] & 0x80) == 0) {
                        /* no error on any byte */
                        i++;
                        for (x = 1; x < len && i < urb->actual_length; ++x)
                                tty_insert_flip_char(&port->port,
                                                data[i++], 0);
                } else {
                        /*
                         * some bytes had errors, every byte has status
                         */
                        for (x = 0; x + 1 < len &&
                                    i + 1 < urb->actual_length; x += 2) {
                                int stat = data[i];
                                int flag = TTY_NORMAL;

                                if (stat & RXERROR_OVERRUN) {
                                        tty_insert_flip_char(&port->port, 0,
                                                                TTY_OVERRUN);
                                }
                                /* XXX should handle break (0x10) */
                                if (stat & RXERROR_PARITY)
                                        flag = TTY_PARITY;
                                else if (stat & RXERROR_FRAMING)
                                        flag = TTY_FRAME;

                                tty_insert_flip_char(&port->port, data[i+1],
                                                     flag);
                                i += 2;
                        }
                }
                tty_flip_buffer_push(&port->port);
        }

        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&urb->dev->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
}

/* not used, usa-49 doesn't have per-port control endpoints */
static void usa49_outcont_callback(struct urb *urb)
{
}

static void usa90_indat_callback(struct urb *urb)
{
        int                        i, err;
        int                        endpoint;
        struct usb_serial_port        *port;
        struct keyspan_port_private                 *p_priv;
        unsigned char                 *data = urb->transfer_buffer;
        int status = urb->status;

        endpoint = usb_pipeendpoint(urb->pipe);

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status %d on endpoint %x\n",
                        __func__, status, endpoint);
                return;
        }

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);

        if (urb->actual_length) {
                /* if current mode is DMA, looks like usa28 format
                   otherwise looks like usa26 data format */

                if (p_priv->baud > 57600)
                        tty_insert_flip_string(&port->port, data,
                                        urb->actual_length);
                else {
                        /* 0x80 bit is error flag */
                        if ((data[0] & 0x80) == 0) {
                                /* no errors on individual bytes, only
                                   possible overrun err*/
                                if (data[0] & RXERROR_OVERRUN) {
                                        tty_insert_flip_char(&port->port, 0,
                                                                TTY_OVERRUN);
                                }
                                for (i = 1; i < urb->actual_length ; ++i)
                                        tty_insert_flip_char(&port->port,
                                                        data[i], TTY_NORMAL);
                        }  else {
                        /* some bytes had errors, every byte has status */
                                dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__);
                                for (i = 0; i + 1 < urb->actual_length; i += 2) {
                                        int stat = data[i];
                                        int flag = TTY_NORMAL;

                                        if (stat & RXERROR_OVERRUN) {
                                                tty_insert_flip_char(
                                                                &port->port, 0,
                                                                TTY_OVERRUN);
                                        }
                                        /* XXX should handle break (0x10) */
                                        if (stat & RXERROR_PARITY)
                                                flag = TTY_PARITY;
                                        else if (stat & RXERROR_FRAMING)
                                                flag = TTY_FRAME;

                                        tty_insert_flip_char(&port->port,
                                                        data[i+1], flag);
                                }
                        }
                }
                tty_flip_buffer_push(&port->port);
        }

        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
}


static void        usa90_instat_callback(struct urb *urb)
{
        unsigned char                                 *data = urb->transfer_buffer;
        struct keyspan_usa90_portStatusMessage        *msg;
        struct usb_serial                        *serial;
        struct usb_serial_port                        *port;
        struct keyspan_port_private                 *p_priv;
        int old_dcd_state, err;
        int status = urb->status;

        serial =  urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }
        if (urb->actual_length < 14) {
                dev_dbg(&urb->dev->dev, "%s - %d byte report??\n", __func__, urb->actual_length);
                goto exit;
        }

        msg = (struct keyspan_usa90_portStatusMessage *)data;

        /* Now do something useful with the data */

        port = serial->port[0];
        p_priv = usb_get_serial_port_data(port);
        if (!p_priv)
                goto resubmit;

        /* Update handshaking pin state information */
        old_dcd_state = p_priv->dcd_state;
        p_priv->cts_state = ((msg->cts) ? 1 : 0);
        p_priv->dsr_state = ((msg->dsr) ? 1 : 0);
        p_priv->dcd_state = ((msg->dcd) ? 1 : 0);
        p_priv->ri_state = ((msg->ri) ? 1 : 0);

        if (old_dcd_state != p_priv->dcd_state && old_dcd_state)
                tty_port_tty_hangup(&port->port, true);
resubmit:
        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
exit:
        ;
}

static void        usa90_outcont_callback(struct urb *urb)
{
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;

        port =  urb->context;
        p_priv = usb_get_serial_port_data(port);

        if (p_priv->resend_cont) {
                dev_dbg(&urb->dev->dev, "%s - sending setup\n", __func__);
                keyspan_usa90_send_setup(port->serial, port,
                                                p_priv->resend_cont - 1);
        }
}

/* Status messages from the 28xg */
static void        usa67_instat_callback(struct urb *urb)
{
        int                                        err;
        unsigned char                                 *data = urb->transfer_buffer;
        struct keyspan_usa67_portStatusMessage        *msg;
        struct usb_serial                        *serial;
        struct usb_serial_port                        *port;
        struct keyspan_port_private                 *p_priv;
        int old_dcd_state;
        int status = urb->status;

        serial = urb->context;

        if (status) {
                dev_dbg(&urb->dev->dev, "%s - nonzero status: %d\n",
                                __func__, status);
                return;
        }

        if (urb->actual_length !=
                        sizeof(struct keyspan_usa67_portStatusMessage)) {
                dev_dbg(&urb->dev->dev, "%s - bad length %d\n", __func__, urb->actual_length);
                return;
        }


        /* Now do something useful with the data */
        msg = (struct keyspan_usa67_portStatusMessage *)data;

        /* Check port number from message and retrieve private data */
        if (msg->port >= serial->num_ports) {
                dev_dbg(&urb->dev->dev, "%s - Unexpected port number %d\n", __func__, msg->port);
                return;
        }

        port = serial->port[msg->port];
        p_priv = usb_get_serial_port_data(port);
        if (!p_priv)
                goto resubmit;

        /* Update handshaking pin state information */
        old_dcd_state = p_priv->dcd_state;
        p_priv->cts_state = ((msg->hskia_cts) ? 1 : 0);
        p_priv->dcd_state = ((msg->gpia_dcd) ? 1 : 0);

        if (old_dcd_state != p_priv->dcd_state && old_dcd_state)
                tty_port_tty_hangup(&port->port, true);
resubmit:
        /* Resubmit urb so we continue receiving */
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - resubmit read urb failed. (%d)\n", __func__, err);
}

static void usa67_glocont_callback(struct urb *urb)
{
        struct usb_serial *serial;
        struct usb_serial_port *port;
        struct keyspan_port_private *p_priv;
        int i;

        serial = urb->context;
        for (i = 0; i < serial->num_ports; ++i) {
                port = serial->port[i];
                p_priv = usb_get_serial_port_data(port);
                if (!p_priv)
                        continue;

                if (p_priv->resend_cont) {
                        dev_dbg(&port->dev, "%s - sending setup\n", __func__);
                        keyspan_usa67_send_setup(serial, port,
                                                p_priv->resend_cont - 1);
                        break;
                }
        }
}

static unsigned int keyspan_write_room(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct keyspan_port_private        *p_priv;
        const struct keyspan_device_details        *d_details;
        int                                flip;
        unsigned int                        data_len;
        struct urb                        *this_urb;

        p_priv = usb_get_serial_port_data(port);
        d_details = p_priv->device_details;

        /* FIXME: locking */
        if (d_details->msg_format == msg_usa90)
                data_len = 64;
        else
                data_len = 63;

        flip = p_priv->out_flip;

        /* Check both endpoints to see if any are available. */
        this_urb = p_priv->out_urbs[flip];
        if (this_urb != NULL) {
                if (this_urb->status != -EINPROGRESS)
                        return data_len;
                flip = (flip + 1) & d_details->outdat_endp_flip;
                this_urb = p_priv->out_urbs[flip];
                if (this_urb != NULL) {
                        if (this_urb->status != -EINPROGRESS)
                                return data_len;
                }
        }
        return 0;
}


static int keyspan_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct keyspan_port_private         *p_priv;
        const struct keyspan_device_details        *d_details;
        int                                i, err;
        int                                baud_rate, device_port;
        struct urb                        *urb;
        unsigned int                        cflag = 0;

        p_priv = usb_get_serial_port_data(port);
        d_details = p_priv->device_details;

        /* Set some sane defaults */
        p_priv->rts_state = 1;
        p_priv->dtr_state = 1;
        p_priv->baud = 9600;

        /* force baud and lcr to be set on open */
        p_priv->old_baud = 0;
        p_priv->old_cflag = 0;

        p_priv->out_flip = 0;
        p_priv->in_flip = 0;

        /* Reset low level data toggle and start reading from endpoints */
        for (i = 0; i < 2; i++) {
                urb = p_priv->in_urbs[i];
                if (urb == NULL)
                        continue;

                /* make sure endpoint data toggle is synchronized
                   with the device */
                usb_clear_halt(urb->dev, urb->pipe);
                err = usb_submit_urb(urb, GFP_KERNEL);
                if (err != 0)
                        dev_dbg(&port->dev, "%s - submit urb %d failed (%d)\n", __func__, i, err);
        }

        /* Reset low level data toggle on out endpoints */
        for (i = 0; i < 2; i++) {
                urb = p_priv->out_urbs[i];
                if (urb == NULL)
                        continue;
                /* usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe),
                                                usb_pipeout(urb->pipe), 0); */
        }

        /* get the terminal config for the setup message now so we don't
         * need to send 2 of them */

        device_port = port->port_number;
        if (tty) {
                cflag = tty->termios.c_cflag;
                /* Baud rate calculation takes baud rate as an integer
                   so other rates can be generated if desired. */
                baud_rate = tty_get_baud_rate(tty);
                /* If no match or invalid, leave as default */
                if (baud_rate >= 0
                    && d_details->calculate_baud_rate(port, baud_rate, d_details->baudclk,
                                        NULL, NULL, NULL, device_port) == KEYSPAN_BAUD_RATE_OK) {
                        p_priv->baud = baud_rate;
                }
        }
        /* set CTS/RTS handshake etc. */
        p_priv->cflag = cflag;
        p_priv->flow_control = (cflag & CRTSCTS) ? flow_cts : flow_none;

        keyspan_send_setup(port, 1);
        /* mdelay(100); */
        /* keyspan_set_termios(port, NULL); */

        return 0;
}

static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
{
        struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);

        p_priv->rts_state = on;
        p_priv->dtr_state = on;
        keyspan_send_setup(port, 0);
}

static void keyspan_close(struct usb_serial_port *port)
{
        int                        i;
        struct keyspan_port_private         *p_priv;

        p_priv = usb_get_serial_port_data(port);

        p_priv->rts_state = 0;
        p_priv->dtr_state = 0;

        keyspan_send_setup(port, 2);
        /* pilot-xfer seems to work best with this delay */
        mdelay(100);

        p_priv->out_flip = 0;
        p_priv->in_flip = 0;

        usb_kill_urb(p_priv->inack_urb);
        for (i = 0; i < 2; i++) {
                usb_kill_urb(p_priv->in_urbs[i]);
                usb_kill_urb(p_priv->out_urbs[i]);
        }
}

/* download the firmware to a pre-renumeration device */
static int keyspan_fake_startup(struct usb_serial *serial)
{
        char        *fw_name;

        dev_dbg(&serial->dev->dev, "Keyspan startup version %04x product %04x\n",
                le16_to_cpu(serial->dev->descriptor.bcdDevice),
                le16_to_cpu(serial->dev->descriptor.idProduct));

        if ((le16_to_cpu(serial->dev->descriptor.bcdDevice) & 0x8000)
                                                                != 0x8000) {
                dev_dbg(&serial->dev->dev, "Firmware already loaded.  Quitting.\n");
                return 1;
        }

                /* Select firmware image on the basis of idProduct */
        switch (le16_to_cpu(serial->dev->descriptor.idProduct)) {
        case keyspan_usa28_pre_product_id:
                fw_name = "keyspan/usa28.fw";
                break;

        case keyspan_usa28x_pre_product_id:
                fw_name = "keyspan/usa28x.fw";
                break;

        case keyspan_usa28xa_pre_product_id:
                fw_name = "keyspan/usa28xa.fw";
                break;

        case keyspan_usa28xb_pre_product_id:
                fw_name = "keyspan/usa28xb.fw";
                break;

        case keyspan_usa19_pre_product_id:
                fw_name = "keyspan/usa19.fw";
                break;

        case keyspan_usa19qi_pre_product_id:
                fw_name = "keyspan/usa19qi.fw";
                break;

        case keyspan_mpr_pre_product_id:
                fw_name = "keyspan/mpr.fw";
                break;

        case keyspan_usa19qw_pre_product_id:
                fw_name = "keyspan/usa19qw.fw";
                break;

        case keyspan_usa18x_pre_product_id:
                fw_name = "keyspan/usa18x.fw";
                break;

        case keyspan_usa19w_pre_product_id:
                fw_name = "keyspan/usa19w.fw";
                break;

        case keyspan_usa49w_pre_product_id:
                fw_name = "keyspan/usa49w.fw";
                break;

        case keyspan_usa49wlc_pre_product_id:
                fw_name = "keyspan/usa49wlc.fw";
                break;

        default:
                dev_err(&serial->dev->dev, "Unknown product ID (%04x)\n",
                        le16_to_cpu(serial->dev->descriptor.idProduct));
                return 1;
        }

        dev_dbg(&serial->dev->dev, "Uploading Keyspan %s firmware.\n", fw_name);

        if (ezusb_fx1_ihex_firmware_download(serial->dev, fw_name) < 0) {
                dev_err(&serial->dev->dev, "failed to load firmware \"%s\"\n",
                        fw_name);
                return -ENOENT;
        }

        /* after downloading firmware Renumeration will occur in a
          moment and the new device will bind to the real driver */

        /* we don't want this device to have a driver assigned to it. */
        return 1;
}

/* Helper functions used by keyspan_setup_urbs */
static struct usb_endpoint_descriptor const *find_ep(struct usb_serial const *serial,
                                                     int endpoint)
{
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *ep;
        int i;

        iface_desc = serial->interface->cur_altsetting;
        for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
                ep = &iface_desc->endpoint[i].desc;
                if (ep->bEndpointAddress == endpoint)
                        return ep;
        }
        dev_warn(&serial->interface->dev, "found no endpoint descriptor for endpoint %x\n",
                        endpoint);
        return NULL;
}

static struct urb *keyspan_setup_urb(struct usb_serial *serial, int endpoint,
                                      int dir, void *ctx, char *buf, int len,
                                      void (*callback)(struct urb *))
{
        struct urb *urb;
        struct usb_endpoint_descriptor const *ep_desc;
        char const *ep_type_name;

        if (endpoint == -1)
                return NULL;                /* endpoint not needed */

        dev_dbg(&serial->interface->dev, "%s - alloc for endpoint %x\n",
                        __func__, endpoint);
        urb = usb_alloc_urb(0, GFP_KERNEL);                /* No ISO */
        if (!urb)
                return NULL;

        if (endpoint == 0) {
                /* control EP filled in when used */
                return urb;
        }

        ep_desc = find_ep(serial, endpoint);
        if (!ep_desc) {
                usb_free_urb(urb);
                return NULL;
        }
        if (usb_endpoint_xfer_int(ep_desc)) {
                ep_type_name = "INT";
                usb_fill_int_urb(urb, serial->dev,
                                 usb_sndintpipe(serial->dev, endpoint) | dir,
                                 buf, len, callback, ctx,
                                 ep_desc->bInterval);
        } else if (usb_endpoint_xfer_bulk(ep_desc)) {
                ep_type_name = "BULK";
                usb_fill_bulk_urb(urb, serial->dev,
                                  usb_sndbulkpipe(serial->dev, endpoint) | dir,
                                  buf, len, callback, ctx);
        } else {
                dev_warn(&serial->interface->dev,
                         "unsupported endpoint type %x\n",
                         usb_endpoint_type(ep_desc));
                usb_free_urb(urb);
                return NULL;
        }

        dev_dbg(&serial->interface->dev, "%s - using urb %p for %s endpoint %x\n",
            __func__, urb, ep_type_name, endpoint);
        return urb;
}

static struct callbacks {
        void        (*instat_callback)(struct urb *);
        void        (*glocont_callback)(struct urb *);
        void        (*indat_callback)(struct urb *);
        void        (*outdat_callback)(struct urb *);
        void        (*inack_callback)(struct urb *);
        void        (*outcont_callback)(struct urb *);
} keyspan_callbacks[] = {
        {
                /* msg_usa26 callbacks */
                .instat_callback =        usa26_instat_callback,
                .glocont_callback =        usa26_glocont_callback,
                .indat_callback =        usa26_indat_callback,
                .outdat_callback =        usa2x_outdat_callback,
                .inack_callback =        usa26_inack_callback,
                .outcont_callback =        usa26_outcont_callback,
        }, {
                /* msg_usa28 callbacks */
                .instat_callback =        usa28_instat_callback,
                .glocont_callback =        usa28_glocont_callback,
                .indat_callback =        usa28_indat_callback,
                .outdat_callback =        usa2x_outdat_callback,
                .inack_callback =        usa28_inack_callback,
                .outcont_callback =        usa28_outcont_callback,
        }, {
                /* msg_usa49 callbacks */
                .instat_callback =        usa49_instat_callback,
                .glocont_callback =        usa49_glocont_callback,
                .indat_callback =        usa49_indat_callback,
                .outdat_callback =        usa2x_outdat_callback,
                .inack_callback =        usa49_inack_callback,
                .outcont_callback =        usa49_outcont_callback,
        }, {
                /* msg_usa90 callbacks */
                .instat_callback =        usa90_instat_callback,
                .glocont_callback =        usa28_glocont_callback,
                .indat_callback =        usa90_indat_callback,
                .outdat_callback =        usa2x_outdat_callback,
                .inack_callback =        usa28_inack_callback,
                .outcont_callback =        usa90_outcont_callback,
        }, {
                /* msg_usa67 callbacks */
                .instat_callback =        usa67_instat_callback,
                .glocont_callback =        usa67_glocont_callback,
                .indat_callback =        usa26_indat_callback,
                .outdat_callback =        usa2x_outdat_callback,
                .inack_callback =        usa26_inack_callback,
                .outcont_callback =        usa26_outcont_callback,
        }
};

        /* Generic setup urbs function that uses
           data in device_details */
static void keyspan_setup_urbs(struct usb_serial *serial)
{
        struct keyspan_serial_private         *s_priv;
        const struct keyspan_device_details        *d_details;
        struct callbacks                *cback;

        s_priv = usb_get_serial_data(serial);
        d_details = s_priv->device_details;

        /* Setup values for the various callback routines */
        cback = &keyspan_callbacks[d_details->msg_format];

        /* Allocate and set up urbs for each one that is in use,
           starting with instat endpoints */
        s_priv->instat_urb = keyspan_setup_urb
                (serial, d_details->instat_endpoint, USB_DIR_IN,
                 serial, s_priv->instat_buf, INSTAT_BUFLEN,
                 cback->instat_callback);

        s_priv->indat_urb = keyspan_setup_urb
                (serial, d_details->indat_endpoint, USB_DIR_IN,
                 serial, s_priv->indat_buf, INDAT49W_BUFLEN,
                 usa49wg_indat_callback);

        s_priv->glocont_urb = keyspan_setup_urb
                (serial, d_details->glocont_endpoint, USB_DIR_OUT,
                 serial, s_priv->glocont_buf, GLOCONT_BUFLEN,
                 cback->glocont_callback);
}

/* usa19 function doesn't require prescaler */
static int keyspan_usa19_calc_baud(struct usb_serial_port *port,
                                   u32 baud_rate, u32 baudclk, u8 *rate_hi,
                                   u8 *rate_low, u8 *prescaler, int portnum)
{
        u32         b16,        /* baud rate times 16 (actual rate used internally) */
                div,        /* divisor */
                cnt;        /* inverse of divisor (programmed into 8051) */

        dev_dbg(&port->dev, "%s - %d.\n", __func__, baud_rate);

        /* prevent divide by zero...  */
        b16 = baud_rate * 16L;
        if (b16 == 0)
                return KEYSPAN_INVALID_BAUD_RATE;
        /* Any "standard" rate over 57k6 is marginal on the USA-19
           as we run out of divisor resolution. */
        if (baud_rate > 57600)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* calculate the divisor and the counter (its inverse) */
        div = baudclk / b16;
        if (div == 0)
                return KEYSPAN_INVALID_BAUD_RATE;
        else
                cnt = 0 - div;

        if (div > 0xffff)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* return the counter values if non-null */
        if (rate_low)
                *rate_low = (u8) (cnt & 0xff);
        if (rate_hi)
                *rate_hi = (u8) ((cnt >> 8) & 0xff);
        if (rate_low && rate_hi)
                dev_dbg(&port->dev, "%s - %d %02x %02x.\n",
                                __func__, baud_rate, *rate_hi, *rate_low);
        return KEYSPAN_BAUD_RATE_OK;
}

/* usa19hs function doesn't require prescaler */
static int keyspan_usa19hs_calc_baud(struct usb_serial_port *port,
                                     u32 baud_rate, u32 baudclk, u8 *rate_hi,
                                     u8 *rate_low, u8 *prescaler, int portnum)
{
        u32         b16,        /* baud rate times 16 (actual rate used internally) */
                        div;        /* divisor */

        dev_dbg(&port->dev, "%s - %d.\n", __func__, baud_rate);

        /* prevent divide by zero...  */
        b16 = baud_rate * 16L;
        if (b16 == 0)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* calculate the divisor */
        div = baudclk / b16;
        if (div == 0)
                return KEYSPAN_INVALID_BAUD_RATE;

        if (div > 0xffff)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* return the counter values if non-null */
        if (rate_low)
                *rate_low = (u8) (div & 0xff);

        if (rate_hi)
                *rate_hi = (u8) ((div >> 8) & 0xff);

        if (rate_low && rate_hi)
                dev_dbg(&port->dev, "%s - %d %02x %02x.\n",
                        __func__, baud_rate, *rate_hi, *rate_low);

        return KEYSPAN_BAUD_RATE_OK;
}

static int keyspan_usa19w_calc_baud(struct usb_serial_port *port,
                                    u32 baud_rate, u32 baudclk, u8 *rate_hi,
                                    u8 *rate_low, u8 *prescaler, int portnum)
{
        u32         b16,        /* baud rate times 16 (actual rate used internally) */
                clk,        /* clock with 13/8 prescaler */
                div,        /* divisor using 13/8 prescaler */
                res,        /* resulting baud rate using 13/8 prescaler */
                diff,        /* error using 13/8 prescaler */
                smallest_diff;
        u8        best_prescaler;
        int        i;

        dev_dbg(&port->dev, "%s - %d.\n", __func__, baud_rate);

        /* prevent divide by zero */
        b16 = baud_rate * 16L;
        if (b16 == 0)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* Calculate prescaler by trying them all and looking
           for best fit */

        /* start with largest possible difference */
        smallest_diff = 0xffffffff;

                /* 0 is an invalid prescaler, used as a flag */
        best_prescaler = 0;

        for (i = 8; i <= 0xff; ++i) {
                clk = (baudclk * 8) / (u32) i;

                div = clk / b16;
                if (div == 0)
                        continue;

                res = clk / div;
                diff = (res > b16) ? (res-b16) : (b16-res);

                if (diff < smallest_diff) {
                        best_prescaler = i;
                        smallest_diff = diff;
                }
        }

        if (best_prescaler == 0)
                return KEYSPAN_INVALID_BAUD_RATE;

        clk = (baudclk * 8) / (u32) best_prescaler;
        div = clk / b16;

        /* return the divisor and prescaler if non-null */
        if (rate_low)
                *rate_low = (u8) (div & 0xff);
        if (rate_hi)
                *rate_hi = (u8) ((div >> 8) & 0xff);
        if (prescaler) {
                *prescaler = best_prescaler;
                /*  dev_dbg(&port->dev, "%s - %d %d\n", __func__, *prescaler, div); */
        }
        return KEYSPAN_BAUD_RATE_OK;
}

        /* USA-28 supports different maximum baud rates on each port */
static int keyspan_usa28_calc_baud(struct usb_serial_port *port,
                                   u32 baud_rate, u32 baudclk, u8 *rate_hi,
                                   u8 *rate_low, u8 *prescaler, int portnum)
{
        u32         b16,        /* baud rate times 16 (actual rate used internally) */
                div,        /* divisor */
                cnt;        /* inverse of divisor (programmed into 8051) */

        dev_dbg(&port->dev, "%s - %d.\n", __func__, baud_rate);

                /* prevent divide by zero */
        b16 = baud_rate * 16L;
        if (b16 == 0)
                return KEYSPAN_INVALID_BAUD_RATE;

        /* calculate the divisor and the counter (its inverse) */
        div = KEYSPAN_USA28_BAUDCLK / b16;
        if (div == 0)
                return KEYSPAN_INVALID_BAUD_RATE;
        else
                cnt = 0 - div;

        /* check for out of range, based on portnum,
           and return result */
        if (portnum == 0) {
                if (div > 0xffff)
                        return KEYSPAN_INVALID_BAUD_RATE;
        } else {
                if (portnum == 1) {
                        if (div > 0xff)
                                return KEYSPAN_INVALID_BAUD_RATE;
                } else
                        return KEYSPAN_INVALID_BAUD_RATE;
        }

                /* return the counter values if not NULL
                   (port 1 will ignore retHi) */
        if (rate_low)
                *rate_low = (u8) (cnt & 0xff);
        if (rate_hi)
                *rate_hi = (u8) ((cnt >> 8) & 0xff);
        dev_dbg(&port->dev, "%s - %d OK.\n", __func__, baud_rate);
        return KEYSPAN_BAUD_RATE_OK;
}

static int keyspan_usa26_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port)
{
        struct keyspan_usa26_portControlMessage        msg;
        struct keyspan_serial_private                 *s_priv;
        struct keyspan_port_private                 *p_priv;
        const struct keyspan_device_details        *d_details;
        struct urb                                *this_urb;
        int                                         device_port, err;

        dev_dbg(&port->dev, "%s reset=%d\n", __func__, reset_port);

        s_priv = usb_get_serial_data(serial);
        p_priv = usb_get_serial_port_data(port);
        d_details = s_priv->device_details;
        device_port = port->port_number;

        this_urb = p_priv->outcont_urb;

                /* Make sure we have an urb then send the message */
        if (this_urb == NULL) {
                dev_dbg(&port->dev, "%s - oops no urb.\n", __func__);
                return -1;
        }

        dev_dbg(&port->dev, "%s - endpoint %x\n",
                        __func__, usb_pipeendpoint(this_urb->pipe));

        /* Save reset port val for resend.
           Don't overwrite resend for open/close condition. */
        if ((reset_port + 1) > p_priv->resend_cont)
                p_priv->resend_cont = reset_port + 1;
        if (this_urb->status == -EINPROGRESS) {
                /*  dev_dbg(&port->dev, "%s - already writing\n", __func__); */
                mdelay(5);
                return -1;
        }

        memset(&msg, 0, sizeof(struct keyspan_usa26_portControlMessage));

        /* Only set baud rate if it's changed */
        if (p_priv->old_baud != p_priv->baud) {
                p_priv->old_baud = p_priv->baud;
                msg.setClocking = 0xff;
                if (d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                                   &msg.baudHi, &msg.baudLo, &msg.prescaler,
                                                   device_port) == KEYSPAN_INVALID_BAUD_RATE) {
                        dev_dbg(&port->dev, "%s - Invalid baud rate %d requested, using 9600.\n",
                                __func__, p_priv->baud);
                        msg.baudLo = 0;
                        msg.baudHi = 125;        /* Values for 9600 baud */
                        msg.prescaler = 10;
                }
                msg.setPrescaler = 0xff;
        }

        msg.lcr = (p_priv->cflag & CSTOPB) ? STOPBITS_678_2 : STOPBITS_5678_1;
        switch (p_priv->cflag & CSIZE) {
        case CS5:
                msg.lcr |= USA_DATABITS_5;
                break;
        case CS6:
                msg.lcr |= USA_DATABITS_6;
                break;
        case CS7:
                msg.lcr |= USA_DATABITS_7;
                break;
        case CS8:
                msg.lcr |= USA_DATABITS_8;
                break;
        }
        if (p_priv->cflag & PARENB) {
                /* note USA_PARITY_NONE == 0 */
                msg.lcr |= (p_priv->cflag & PARODD) ?
                        USA_PARITY_ODD : USA_PARITY_EVEN;
        }
        msg.setLcr = 0xff;

        msg.ctsFlowControl = (p_priv->flow_control == flow_cts);
        msg.xonFlowControl = 0;
        msg.setFlowControl = 0xff;
        msg.forwardingLength = 16;
        msg.xonChar = 17;
        msg.xoffChar = 19;

        /* Opening port */
        if (reset_port == 1) {
                msg._txOn = 1;
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 1;
                msg.rxOff = 0;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0xff;
        }

        /* Closing port */
        else if (reset_port == 2) {
                msg._txOn = 0;
                msg._txOff = 1;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 0;
                msg.rxOff = 1;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0;
        }

        /* Sending intermediate configs */
        else {
                msg._txOn = (!p_priv->break_on);
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = (p_priv->break_on);
                msg.rxOn = 0;
                msg.rxOff = 0;
                msg.rxFlush = 0;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0x0;
        }

        /* Do handshaking outputs */
        msg.setTxTriState_setRts = 0xff;
        msg.txTriState_rts = p_priv->rts_state;

        msg.setHskoa_setDtr = 0xff;
        msg.hskoa_dtr = p_priv->dtr_state;

        p_priv->resend_cont = 0;
        memcpy(this_urb->transfer_buffer, &msg, sizeof(msg));

        /* send the data out the device on control endpoint */
        this_urb->transfer_buffer_length = sizeof(msg);

        err = usb_submit_urb(this_urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - usb_submit_urb(setup) failed (%d)\n", __func__, err);
        return 0;
}

static int keyspan_usa28_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port)
{
        struct keyspan_usa28_portControlMessage        msg;
        struct keyspan_serial_private                 *s_priv;
        struct keyspan_port_private                 *p_priv;
        const struct keyspan_device_details        *d_details;
        struct urb                                *this_urb;
        int                                         device_port, err;

        s_priv = usb_get_serial_data(serial);
        p_priv = usb_get_serial_port_data(port);
        d_details = s_priv->device_details;
        device_port = port->port_number;

        /* only do something if we have a bulk out endpoint */
        this_urb = p_priv->outcont_urb;
        if (this_urb == NULL) {
                dev_dbg(&port->dev, "%s - oops no urb.\n", __func__);
                return -1;
        }

        /* Save reset port val for resend.
           Don't overwrite resend for open/close condition. */
        if ((reset_port + 1) > p_priv->resend_cont)
                p_priv->resend_cont = reset_port + 1;
        if (this_urb->status == -EINPROGRESS) {
                dev_dbg(&port->dev, "%s already writing\n", __func__);
                mdelay(5);
                return -1;
        }

        memset(&msg, 0, sizeof(struct keyspan_usa28_portControlMessage));

        msg.setBaudRate = 1;
        if (d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                           &msg.baudHi, &msg.baudLo, NULL,
                                           device_port) == KEYSPAN_INVALID_BAUD_RATE) {
                dev_dbg(&port->dev, "%s - Invalid baud rate requested %d.\n",
                                                __func__, p_priv->baud);
                msg.baudLo = 0xff;
                msg.baudHi = 0xb2;        /* Values for 9600 baud */
        }

        /* If parity is enabled, we must calculate it ourselves. */
        msg.parity = 0;                /* XXX for now */

        msg.ctsFlowControl = (p_priv->flow_control == flow_cts);
        msg.xonFlowControl = 0;

        /* Do handshaking outputs, DTR is inverted relative to RTS */
        msg.rts = p_priv->rts_state;
        msg.dtr = p_priv->dtr_state;

        msg.forwardingLength = 16;
        msg.forwardMs = 10;
        msg.breakThreshold = 45;
        msg.xonChar = 17;
        msg.xoffChar = 19;

        /*msg.returnStatus = 1;
        msg.resetDataToggle = 0xff;*/
        /* Opening port */
        if (reset_port == 1) {
                msg._txOn = 1;
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txForceXoff = 0;
                msg.txBreak = 0;
                msg.rxOn = 1;
                msg.rxOff = 0;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0xff;
        }
        /* Closing port */
        else if (reset_port == 2) {
                msg._txOn = 0;
                msg._txOff = 1;
                msg.txFlush = 0;
                msg.txForceXoff = 0;
                msg.txBreak = 0;
                msg.rxOn = 0;
                msg.rxOff = 1;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0;
        }
        /* Sending intermediate configs */
        else {
                msg._txOn = (!p_priv->break_on);
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txForceXoff = 0;
                msg.txBreak = (p_priv->break_on);
                msg.rxOn = 0;
                msg.rxOff = 0;
                msg.rxFlush = 0;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0x0;
        }

        p_priv->resend_cont = 0;
        memcpy(this_urb->transfer_buffer, &msg, sizeof(msg));

        /* send the data out the device on control endpoint */
        this_urb->transfer_buffer_length = sizeof(msg);

        err = usb_submit_urb(this_urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - usb_submit_urb(setup) failed\n", __func__);

        return 0;
}

static int keyspan_usa49_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port)
{
        struct keyspan_usa49_portControlMessage        msg;
        struct usb_ctrlrequest                         *dr = NULL;
        struct keyspan_serial_private                 *s_priv;
        struct keyspan_port_private                 *p_priv;
        const struct keyspan_device_details        *d_details;
        struct urb                                *this_urb;
        int                                         err, device_port;

        s_priv = usb_get_serial_data(serial);
        p_priv = usb_get_serial_port_data(port);
        d_details = s_priv->device_details;

        this_urb = s_priv->glocont_urb;

        /* Work out which port within the device is being setup */
        device_port = port->port_number;

        /* Make sure we have an urb then send the message */
        if (this_urb == NULL) {
                dev_dbg(&port->dev, "%s - oops no urb for port.\n", __func__);
                return -1;
        }

        dev_dbg(&port->dev, "%s - endpoint %x (%d)\n",
                __func__, usb_pipeendpoint(this_urb->pipe), device_port);

        /* Save reset port val for resend.
           Don't overwrite resend for open/close condition. */
        if ((reset_port + 1) > p_priv->resend_cont)
                p_priv->resend_cont = reset_port + 1;

        if (this_urb->status == -EINPROGRESS) {
                /*  dev_dbg(&port->dev, "%s - already writing\n", __func__); */
                mdelay(5);
                return -1;
        }

        memset(&msg, 0, sizeof(struct keyspan_usa49_portControlMessage));

        msg.portNumber = device_port;

        /* Only set baud rate if it's changed */
        if (p_priv->old_baud != p_priv->baud) {
                p_priv->old_baud = p_priv->baud;
                msg.setClocking = 0xff;
                if (d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                                   &msg.baudHi, &msg.baudLo, &msg.prescaler,
                                                   device_port) == KEYSPAN_INVALID_BAUD_RATE) {
                        dev_dbg(&port->dev, "%s - Invalid baud rate %d requested, using 9600.\n",
                                __func__, p_priv->baud);
                        msg.baudLo = 0;
                        msg.baudHi = 125;        /* Values for 9600 baud */
                        msg.prescaler = 10;
                }
                /* msg.setPrescaler = 0xff; */
        }

        msg.lcr = (p_priv->cflag & CSTOPB) ? STOPBITS_678_2 : STOPBITS_5678_1;
        switch (p_priv->cflag & CSIZE) {
        case CS5:
                msg.lcr |= USA_DATABITS_5;
                break;
        case CS6:
                msg.lcr |= USA_DATABITS_6;
                break;
        case CS7:
                msg.lcr |= USA_DATABITS_7;
                break;
        case CS8:
                msg.lcr |= USA_DATABITS_8;
                break;
        }
        if (p_priv->cflag & PARENB) {
                /* note USA_PARITY_NONE == 0 */
                msg.lcr |= (p_priv->cflag & PARODD) ?
                        USA_PARITY_ODD : USA_PARITY_EVEN;
        }
        msg.setLcr = 0xff;

        msg.ctsFlowControl = (p_priv->flow_control == flow_cts);
        msg.xonFlowControl = 0;
        msg.setFlowControl = 0xff;

        msg.forwardingLength = 16;
        msg.xonChar = 17;
        msg.xoffChar = 19;

        /* Opening port */
        if (reset_port == 1) {
                msg._txOn = 1;
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 1;
                msg.rxOff = 0;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0xff;
                msg.enablePort = 1;
                msg.disablePort = 0;
        }
        /* Closing port */
        else if (reset_port == 2) {
                msg._txOn = 0;
                msg._txOff = 1;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 0;
                msg.rxOff = 1;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0;
                msg.enablePort = 0;
                msg.disablePort = 1;
        }
        /* Sending intermediate configs */
        else {
                msg._txOn = (!p_priv->break_on);
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = (p_priv->break_on);
                msg.rxOn = 0;
                msg.rxOff = 0;
                msg.rxFlush = 0;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0x0;
                msg.enablePort = 0;
                msg.disablePort = 0;
        }

        /* Do handshaking outputs */
        msg.setRts = 0xff;
        msg.rts = p_priv->rts_state;

        msg.setDtr = 0xff;
        msg.dtr = p_priv->dtr_state;

        p_priv->resend_cont = 0;

        /* if the device is a 49wg, we send control message on usb
           control EP 0 */

        if (d_details->product_id == keyspan_usa49wg_product_id) {
                dr = (void *)(s_priv->ctrl_buf);
                dr->bRequestType = USB_TYPE_VENDOR | USB_DIR_OUT;
                dr->bRequest = 0xB0;        /* 49wg control message */
                dr->wValue = 0;
                dr->wIndex = 0;
                dr->wLength = cpu_to_le16(sizeof(msg));

                memcpy(s_priv->glocont_buf, &msg, sizeof(msg));

                usb_fill_control_urb(this_urb, serial->dev,
                                usb_sndctrlpipe(serial->dev, 0),
                                (unsigned char *)dr, s_priv->glocont_buf,
                                sizeof(msg), usa49_glocont_callback, serial);

        } else {
                memcpy(this_urb->transfer_buffer, &msg, sizeof(msg));

                /* send the data out the device on control endpoint */
                this_urb->transfer_buffer_length = sizeof(msg);
        }
        err = usb_submit_urb(this_urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - usb_submit_urb(setup) failed (%d)\n", __func__, err);

        return 0;
}

static int keyspan_usa90_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port)
{
        struct keyspan_usa90_portControlMessage        msg;
        struct keyspan_serial_private                 *s_priv;
        struct keyspan_port_private                 *p_priv;
        const struct keyspan_device_details        *d_details;
        struct urb                                *this_urb;
        int                                         err;
        u8                                                prescaler;

        s_priv = usb_get_serial_data(serial);
        p_priv = usb_get_serial_port_data(port);
        d_details = s_priv->device_details;

        /* only do something if we have a bulk out endpoint */
        this_urb = p_priv->outcont_urb;
        if (this_urb == NULL) {
                dev_dbg(&port->dev, "%s - oops no urb.\n", __func__);
                return -1;
        }

        /* Save reset port val for resend.
           Don't overwrite resend for open/close condition. */
        if ((reset_port + 1) > p_priv->resend_cont)
                p_priv->resend_cont = reset_port + 1;
        if (this_urb->status == -EINPROGRESS) {
                dev_dbg(&port->dev, "%s already writing\n", __func__);
                mdelay(5);
                return -1;
        }

        memset(&msg, 0, sizeof(struct keyspan_usa90_portControlMessage));

        /* Only set baud rate if it's changed */
        if (p_priv->old_baud != p_priv->baud) {
                p_priv->old_baud = p_priv->baud;
                msg.setClocking = 0x01;
                if (d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                                   &msg.baudHi, &msg.baudLo, &prescaler, 0) == KEYSPAN_INVALID_BAUD_RATE) {
                        dev_dbg(&port->dev, "%s - Invalid baud rate %d requested, using 9600.\n",
                                __func__, p_priv->baud);
                        p_priv->baud = 9600;
                        d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                &msg.baudHi, &msg.baudLo, &prescaler, 0);
                }
                msg.setRxMode = 1;
                msg.setTxMode = 1;
        }

        /* modes must always be correctly specified */
        if (p_priv->baud > 57600) {
                msg.rxMode = RXMODE_DMA;
                msg.txMode = TXMODE_DMA;
        } else {
                msg.rxMode = RXMODE_BYHAND;
                msg.txMode = TXMODE_BYHAND;
        }

        msg.lcr = (p_priv->cflag & CSTOPB) ? STOPBITS_678_2 : STOPBITS_5678_1;
        switch (p_priv->cflag & CSIZE) {
        case CS5:
                msg.lcr |= USA_DATABITS_5;
                break;
        case CS6:
                msg.lcr |= USA_DATABITS_6;
                break;
        case CS7:
                msg.lcr |= USA_DATABITS_7;
                break;
        case CS8:
                msg.lcr |= USA_DATABITS_8;
                break;
        }
        if (p_priv->cflag & PARENB) {
                /* note USA_PARITY_NONE == 0 */
                msg.lcr |= (p_priv->cflag & PARODD) ?
                        USA_PARITY_ODD : USA_PARITY_EVEN;
        }
        if (p_priv->old_cflag != p_priv->cflag) {
                p_priv->old_cflag = p_priv->cflag;
                msg.setLcr = 0x01;
        }

        if (p_priv->flow_control == flow_cts)
                msg.txFlowControl = TXFLOW_CTS;
        msg.setTxFlowControl = 0x01;
        msg.setRxFlowControl = 0x01;

        msg.rxForwardingLength = 16;
        msg.rxForwardingTimeout = 16;
        msg.txAckSetting = 0;
        msg.xonChar = 17;
        msg.xoffChar = 19;

        /* Opening port */
        if (reset_port == 1) {
                msg.portEnabled = 1;
                msg.rxFlush = 1;
                msg.txBreak = (p_priv->break_on);
        }
        /* Closing port */
        else if (reset_port == 2)
                msg.portEnabled = 0;
        /* Sending intermediate configs */
        else {
                msg.portEnabled = 1;
                msg.txBreak = (p_priv->break_on);
        }

        /* Do handshaking outputs */
        msg.setRts = 0x01;
        msg.rts = p_priv->rts_state;

        msg.setDtr = 0x01;
        msg.dtr = p_priv->dtr_state;

        p_priv->resend_cont = 0;
        memcpy(this_urb->transfer_buffer, &msg, sizeof(msg));

        /* send the data out the device on control endpoint */
        this_urb->transfer_buffer_length = sizeof(msg);

        err = usb_submit_urb(this_urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - usb_submit_urb(setup) failed (%d)\n", __func__, err);
        return 0;
}

static int keyspan_usa67_send_setup(struct usb_serial *serial,
                                    struct usb_serial_port *port,
                                    int reset_port)
{
        struct keyspan_usa67_portControlMessage        msg;
        struct keyspan_serial_private                 *s_priv;
        struct keyspan_port_private                 *p_priv;
        const struct keyspan_device_details        *d_details;
        struct urb                                *this_urb;
        int                                         err, device_port;

        s_priv = usb_get_serial_data(serial);
        p_priv = usb_get_serial_port_data(port);
        d_details = s_priv->device_details;

        this_urb = s_priv->glocont_urb;

        /* Work out which port within the device is being setup */
        device_port = port->port_number;

        /* Make sure we have an urb then send the message */
        if (this_urb == NULL) {
                dev_dbg(&port->dev, "%s - oops no urb for port.\n", __func__);
                return -1;
        }

        /* Save reset port val for resend.
           Don't overwrite resend for open/close condition. */
        if ((reset_port + 1) > p_priv->resend_cont)
                p_priv->resend_cont = reset_port + 1;
        if (this_urb->status == -EINPROGRESS) {
                /*  dev_dbg(&port->dev, "%s - already writing\n", __func__); */
                mdelay(5);
                return -1;
        }

        memset(&msg, 0, sizeof(struct keyspan_usa67_portControlMessage));

        msg.port = device_port;

        /* Only set baud rate if it's changed */
        if (p_priv->old_baud != p_priv->baud) {
                p_priv->old_baud = p_priv->baud;
                msg.setClocking = 0xff;
                if (d_details->calculate_baud_rate(port, p_priv->baud, d_details->baudclk,
                                                   &msg.baudHi, &msg.baudLo, &msg.prescaler,
                                                   device_port) == KEYSPAN_INVALID_BAUD_RATE) {
                        dev_dbg(&port->dev, "%s - Invalid baud rate %d requested, using 9600.\n",
                                __func__, p_priv->baud);
                        msg.baudLo = 0;
                        msg.baudHi = 125;        /* Values for 9600 baud */
                        msg.prescaler = 10;
                }
                msg.setPrescaler = 0xff;
        }

        msg.lcr = (p_priv->cflag & CSTOPB) ? STOPBITS_678_2 : STOPBITS_5678_1;
        switch (p_priv->cflag & CSIZE) {
        case CS5:
                msg.lcr |= USA_DATABITS_5;
                break;
        case CS6:
                msg.lcr |= USA_DATABITS_6;
                break;
        case CS7:
                msg.lcr |= USA_DATABITS_7;
                break;
        case CS8:
                msg.lcr |= USA_DATABITS_8;
                break;
        }
        if (p_priv->cflag & PARENB) {
                /* note USA_PARITY_NONE == 0 */
                msg.lcr |= (p_priv->cflag & PARODD) ?
                                        USA_PARITY_ODD : USA_PARITY_EVEN;
        }
        msg.setLcr = 0xff;

        msg.ctsFlowControl = (p_priv->flow_control == flow_cts);
        msg.xonFlowControl = 0;
        msg.setFlowControl = 0xff;
        msg.forwardingLength = 16;
        msg.xonChar = 17;
        msg.xoffChar = 19;

        if (reset_port == 1) {
                /* Opening port */
                msg._txOn = 1;
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 1;
                msg.rxOff = 0;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0xff;
        } else if (reset_port == 2) {
                /* Closing port */
                msg._txOn = 0;
                msg._txOff = 1;
                msg.txFlush = 0;
                msg.txBreak = 0;
                msg.rxOn = 0;
                msg.rxOff = 1;
                msg.rxFlush = 1;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0;
        } else {
                /* Sending intermediate configs */
                msg._txOn = (!p_priv->break_on);
                msg._txOff = 0;
                msg.txFlush = 0;
                msg.txBreak = (p_priv->break_on);
                msg.rxOn = 0;
                msg.rxOff = 0;
                msg.rxFlush = 0;
                msg.rxForward = 0;
                msg.returnStatus = 0;
                msg.resetDataToggle = 0x0;
        }

        /* Do handshaking outputs */
        msg.setTxTriState_setRts = 0xff;
        msg.txTriState_rts = p_priv->rts_state;

        msg.setHskoa_setDtr = 0xff;
        msg.hskoa_dtr = p_priv->dtr_state;

        p_priv->resend_cont = 0;

        memcpy(this_urb->transfer_buffer, &msg, sizeof(msg));

        /* send the data out the device on control endpoint */
        this_urb->transfer_buffer_length = sizeof(msg);

        err = usb_submit_urb(this_urb, GFP_ATOMIC);
        if (err != 0)
                dev_dbg(&port->dev, "%s - usb_submit_urb(setup) failed (%d)\n", __func__, err);
        return 0;
}

static void keyspan_send_setup(struct usb_serial_port *port, int reset_port)
{
        struct usb_serial *serial = port->serial;
        struct keyspan_serial_private *s_priv;
        const struct keyspan_device_details *d_details;

        s_priv = usb_get_serial_data(serial);
        d_details = s_priv->device_details;

        switch (d_details->msg_format) {
        case msg_usa26:
                keyspan_usa26_send_setup(serial, port, reset_port);
                break;
        case msg_usa28:
                keyspan_usa28_send_setup(serial, port, reset_port);
                break;
        case msg_usa49:
                keyspan_usa49_send_setup(serial, port, reset_port);
                break;
        case msg_usa90:
                keyspan_usa90_send_setup(serial, port, reset_port);
                break;
        case msg_usa67:
                keyspan_usa67_send_setup(serial, port, reset_port);
                break;
        }
}


/* Gets called by the "real" driver (ie once firmware is loaded
   and renumeration has taken place. */
static int keyspan_startup(struct usb_serial *serial)
{
        int                                i, err;
        struct keyspan_serial_private         *s_priv;
        const struct keyspan_device_details        *d_details;

        for (i = 0; (d_details = keyspan_devices[i]) != NULL; ++i)
                if (d_details->product_id ==
                                le16_to_cpu(serial->dev->descriptor.idProduct))
                        break;
        if (d_details == NULL) {
                dev_err(&serial->dev->dev, "%s - unknown product id %x\n",
                    __func__, le16_to_cpu(serial->dev->descriptor.idProduct));
                return -ENODEV;
        }

        /* Setup private data for serial driver */
        s_priv = kzalloc(sizeof(struct keyspan_serial_private), GFP_KERNEL);
        if (!s_priv)
                return -ENOMEM;

        s_priv->instat_buf = kzalloc(INSTAT_BUFLEN, GFP_KERNEL);
        if (!s_priv->instat_buf)
                goto err_instat_buf;

        s_priv->indat_buf = kzalloc(INDAT49W_BUFLEN, GFP_KERNEL);
        if (!s_priv->indat_buf)
                goto err_indat_buf;

        s_priv->glocont_buf = kzalloc(GLOCONT_BUFLEN, GFP_KERNEL);
        if (!s_priv->glocont_buf)
                goto err_glocont_buf;

        s_priv->ctrl_buf = kzalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
        if (!s_priv->ctrl_buf)
                goto err_ctrl_buf;

        s_priv->device_details = d_details;
        usb_set_serial_data(serial, s_priv);

        keyspan_setup_urbs(serial);

        if (s_priv->instat_urb != NULL) {
                err = usb_submit_urb(s_priv->instat_urb, GFP_KERNEL);
                if (err != 0)
                        dev_dbg(&serial->dev->dev, "%s - submit instat urb failed %d\n", __func__, err);
        }
        if (s_priv->indat_urb != NULL) {
                err = usb_submit_urb(s_priv->indat_urb, GFP_KERNEL);
                if (err != 0)
                        dev_dbg(&serial->dev->dev, "%s - submit indat urb failed %d\n", __func__, err);
        }

        return 0;

err_ctrl_buf:
        kfree(s_priv->glocont_buf);
err_glocont_buf:
        kfree(s_priv->indat_buf);
err_indat_buf:
        kfree(s_priv->instat_buf);
err_instat_buf:
        kfree(s_priv);

        return -ENOMEM;
}

static void keyspan_disconnect(struct usb_serial *serial)
{
        struct keyspan_serial_private *s_priv;

        s_priv = usb_get_serial_data(serial);

        usb_kill_urb(s_priv->instat_urb);
        usb_kill_urb(s_priv->glocont_urb);
        usb_kill_urb(s_priv->indat_urb);
}

static void keyspan_release(struct usb_serial *serial)
{
        struct keyspan_serial_private *s_priv;

        s_priv = usb_get_serial_data(serial);

        /* Make sure to unlink the URBs submitted in attach. */
        usb_kill_urb(s_priv->instat_urb);
        usb_kill_urb(s_priv->indat_urb);

        usb_free_urb(s_priv->instat_urb);
        usb_free_urb(s_priv->indat_urb);
        usb_free_urb(s_priv->glocont_urb);

        kfree(s_priv->ctrl_buf);
        kfree(s_priv->glocont_buf);
        kfree(s_priv->indat_buf);
        kfree(s_priv->instat_buf);

        kfree(s_priv);
}

static int keyspan_port_probe(struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct keyspan_serial_private *s_priv;
        struct keyspan_port_private *p_priv;
        const struct keyspan_device_details *d_details;
        struct callbacks *cback;
        int endp;
        int port_num;
        int i;

        s_priv = usb_get_serial_data(serial);
        d_details = s_priv->device_details;

        p_priv = kzalloc(sizeof(*p_priv), GFP_KERNEL);
        if (!p_priv)
                return -ENOMEM;

        for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) {
                p_priv->in_buffer[i] = kzalloc(IN_BUFLEN, GFP_KERNEL);
                if (!p_priv->in_buffer[i])
                        goto err_free_in_buffer;
        }

        for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) {
                p_priv->out_buffer[i] = kzalloc(OUT_BUFLEN, GFP_KERNEL);
                if (!p_priv->out_buffer[i])
                        goto err_free_out_buffer;
        }

        p_priv->inack_buffer = kzalloc(INACK_BUFLEN, GFP_KERNEL);
        if (!p_priv->inack_buffer)
                goto err_free_out_buffer;

        p_priv->outcont_buffer = kzalloc(OUTCONT_BUFLEN, GFP_KERNEL);
        if (!p_priv->outcont_buffer)
                goto err_free_inack_buffer;

        p_priv->device_details = d_details;

        /* Setup values for the various callback routines */
        cback = &keyspan_callbacks[d_details->msg_format];

        port_num = port->port_number;

        /* Do indat endpoints first, once for each flip */
        endp = d_details->indat_endpoints[port_num];
        for (i = 0; i <= d_details->indat_endp_flip; ++i, ++endp) {
                p_priv->in_urbs[i] = keyspan_setup_urb(serial, endp,
                                                USB_DIR_IN, port,
                                                p_priv->in_buffer[i],
                                                IN_BUFLEN,
                                                cback->indat_callback);
        }
        /* outdat endpoints also have flip */
        endp = d_details->outdat_endpoints[port_num];
        for (i = 0; i <= d_details->outdat_endp_flip; ++i, ++endp) {
                p_priv->out_urbs[i] = keyspan_setup_urb(serial, endp,
                                                USB_DIR_OUT, port,
                                                p_priv->out_buffer[i],
                                                OUT_BUFLEN,
                                                cback->outdat_callback);
        }
        /* inack endpoint */
        p_priv->inack_urb = keyspan_setup_urb(serial,
                                        d_details->inack_endpoints[port_num],
                                        USB_DIR_IN, port,
                                        p_priv->inack_buffer,
                                        INACK_BUFLEN,
                                        cback->inack_callback);
        /* outcont endpoint */
        p_priv->outcont_urb = keyspan_setup_urb(serial,
                                        d_details->outcont_endpoints[port_num],
                                        USB_DIR_OUT, port,
                                        p_priv->outcont_buffer,
                                        OUTCONT_BUFLEN,
                                         cback->outcont_callback);

        usb_set_serial_port_data(port, p_priv);

        return 0;

err_free_inack_buffer:
        kfree(p_priv->inack_buffer);
err_free_out_buffer:
        for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i)
                kfree(p_priv->out_buffer[i]);
err_free_in_buffer:
        for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i)
                kfree(p_priv->in_buffer[i]);
        kfree(p_priv);

        return -ENOMEM;
}

static void keyspan_port_remove(struct usb_serial_port *port)
{
        struct keyspan_port_private *p_priv;
        int i;

        p_priv = usb_get_serial_port_data(port);

        usb_kill_urb(p_priv->inack_urb);
        usb_kill_urb(p_priv->outcont_urb);
        for (i = 0; i < 2; i++) {
                usb_kill_urb(p_priv->in_urbs[i]);
                usb_kill_urb(p_priv->out_urbs[i]);
        }

        usb_free_urb(p_priv->inack_urb);
        usb_free_urb(p_priv->outcont_urb);
        for (i = 0; i < 2; i++) {
                usb_free_urb(p_priv->in_urbs[i]);
                usb_free_urb(p_priv->out_urbs[i]);
        }

        kfree(p_priv->outcont_buffer);
        kfree(p_priv->inack_buffer);
        for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i)
                kfree(p_priv->out_buffer[i]);
        for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i)
                kfree(p_priv->in_buffer[i]);

        kfree(p_priv);
}

/* Structs for the devices, pre and post renumeration. */
static struct usb_serial_driver keyspan_pre_device = {
        .driver = {
                .owner                = THIS_MODULE,
                .name                = "keyspan_no_firm",
        },
        .description                = "Keyspan - (without firmware)",
        .id_table                = keyspan_pre_ids,
        .num_ports                = 1,
        .attach                        = keyspan_fake_startup,
};

static struct usb_serial_driver keyspan_1port_device = {
        .driver = {
                .owner                = THIS_MODULE,
                .name                = "keyspan_1",
        },
        .description                = "Keyspan 1 port adapter",
        .id_table                = keyspan_1port_ids,
        .num_ports                = 1,
        .open                        = keyspan_open,
        .close                        = keyspan_close,
        .dtr_rts                = keyspan_dtr_rts,
        .write                        = keyspan_write,
        .write_room                = keyspan_write_room,
        .set_termios                = keyspan_set_termios,
        .break_ctl                = keyspan_break_ctl,
        .tiocmget                = keyspan_tiocmget,
        .tiocmset                = keyspan_tiocmset,
        .attach                        = keyspan_startup,
        .disconnect                = keyspan_disconnect,
        .release                = keyspan_release,
        .port_probe                = keyspan_port_probe,
        .port_remove                = keyspan_port_remove,
};

static struct usb_serial_driver keyspan_2port_device = {
        .driver = {
                .owner                = THIS_MODULE,
                .name                = "keyspan_2",
        },
        .description                = "Keyspan 2 port adapter",
        .id_table                = keyspan_2port_ids,
        .num_ports                = 2,
        .open                        = keyspan_open,
        .close                        = keyspan_close,
        .dtr_rts                = keyspan_dtr_rts,
        .write                        = keyspan_write,
        .write_room                = keyspan_write_room,
        .set_termios                = keyspan_set_termios,
        .break_ctl                = keyspan_break_ctl,
        .tiocmget                = keyspan_tiocmget,
        .tiocmset                = keyspan_tiocmset,
        .attach                        = keyspan_startup,
        .disconnect                = keyspan_disconnect,
        .release                = keyspan_release,
        .port_probe                = keyspan_port_probe,
        .port_remove                = keyspan_port_remove,
};

static struct usb_serial_driver keyspan_4port_device = {
        .driver = {
                .owner                = THIS_MODULE,
                .name                = "keyspan_4",
        },
        .description                = "Keyspan 4 port adapter",
        .id_table                = keyspan_4port_ids,
        .num_ports                = 4,
        .open                        = keyspan_open,
        .close                        = keyspan_close,
        .dtr_rts                = keyspan_dtr_rts,
        .write                        = keyspan_write,
        .write_room                = keyspan_write_room,
        .set_termios                = keyspan_set_termios,
        .break_ctl                = keyspan_break_ctl,
        .tiocmget                = keyspan_tiocmget,
        .tiocmset                = keyspan_tiocmset,
        .attach                        = keyspan_startup,
        .disconnect                = keyspan_disconnect,
        .release                = keyspan_release,
        .port_probe                = keyspan_port_probe,
        .port_remove                = keyspan_port_remove,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &keyspan_pre_device, &keyspan_1port_device,
        &keyspan_2port_device, &keyspan_4port_device, NULL
};

module_usb_serial_driver(serial_drivers, keyspan_ids_combined);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

MODULE_FIRMWARE("keyspan/usa28.fw");
MODULE_FIRMWARE("keyspan/usa28x.fw");
MODULE_FIRMWARE("keyspan/usa28xa.fw");
MODULE_FIRMWARE("keyspan/usa28xb.fw");
MODULE_FIRMWARE("keyspan/usa19.fw");
MODULE_FIRMWARE("keyspan/usa19qi.fw");
MODULE_FIRMWARE("keyspan/mpr.fw");
MODULE_FIRMWARE("keyspan/usa19qw.fw");
MODULE_FIRMWARE("keyspan/usa18x.fw");
MODULE_FIRMWARE("keyspan/usa19w.fw");
MODULE_FIRMWARE("keyspan/usa49w.fw");
MODULE_FIRMWARE("keyspan/usa49wlc.fw");




























  237 















   63 















    4 
















































































  404 
  297 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * This file provides wrappers with sanitizer instrumentation for non-atomic
 * bit operations.
 *
 * To use this functionality, an arch's bitops.h file needs to define each of
 * the below bit operations with an arch_ prefix (e.g. arch_set_bit(),
 * arch___set_bit(), etc.).
 */
#ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
#define _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H

#include <linux/instrumented.h>

/**
 * ___set_bit - Set a bit in memory
 * @nr: the bit to set
 * @addr: the address to start counting from
 *
 * Unlike set_bit(), this function is non-atomic. If it is called on the same
 * region of memory concurrently, the effect may be that only one operation
 * succeeds.
 */
static __always_inline void
___set_bit(unsigned long nr, volatile unsigned long *addr)
{
        instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___set_bit(nr, addr);
}

/**
 * ___clear_bit - Clears a bit in memory
 * @nr: the bit to clear
 * @addr: the address to start counting from
 *
 * Unlike clear_bit(), this function is non-atomic. If it is called on the same
 * region of memory concurrently, the effect may be that only one operation
 * succeeds.
 */
static __always_inline void
___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
        instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___clear_bit(nr, addr);
}

/**
 * ___change_bit - Toggle a bit in memory
 * @nr: the bit to change
 * @addr: the address to start counting from
 *
 * Unlike change_bit(), this function is non-atomic. If it is called on the same
 * region of memory concurrently, the effect may be that only one operation
 * succeeds.
 */
static __always_inline void
___change_bit(unsigned long nr, volatile unsigned long *addr)
{
        instrument_write(addr + BIT_WORD(nr), sizeof(long));
        arch___change_bit(nr, addr);
}

static __always_inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr)
{
        if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) {
                /*
                 * We treat non-atomic read-write bitops a little more special.
                 * Given the operations here only modify a single bit, assuming
                 * non-atomicity of the writer is sufficient may be reasonable
                 * for certain usage (and follows the permissible nature of the
                 * assume-plain-writes-atomic rule):
                 * 1. report read-modify-write races -> check read;
                 * 2. do not report races with marked readers, but do report
                 *    races with unmarked readers -> check "atomic" write.
                 */
                kcsan_check_read(addr + BIT_WORD(nr), sizeof(long));
                /*
                 * Use generic write instrumentation, in case other sanitizers
                 * or tools are enabled alongside KCSAN.
                 */
                instrument_write(addr + BIT_WORD(nr), sizeof(long));
        } else {
                instrument_read_write(addr + BIT_WORD(nr), sizeof(long));
        }
}

/**
 * ___test_and_set_bit - Set a bit and return its old value
 * @nr: Bit to set
 * @addr: Address to count from
 *
 * This operation is non-atomic. If two instances of this operation race, one
 * can appear to succeed but actually fail.
 */
static __always_inline bool
___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
        __instrument_read_write_bitop(nr, addr);
        return arch___test_and_set_bit(nr, addr);
}

/**
 * ___test_and_clear_bit - Clear a bit and return its old value
 * @nr: Bit to clear
 * @addr: Address to count from
 *
 * This operation is non-atomic. If two instances of this operation race, one
 * can appear to succeed but actually fail.
 */
static __always_inline bool
___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
        __instrument_read_write_bitop(nr, addr);
        return arch___test_and_clear_bit(nr, addr);
}

/**
 * ___test_and_change_bit - Change a bit and return its old value
 * @nr: Bit to change
 * @addr: Address to count from
 *
 * This operation is non-atomic. If two instances of this operation race, one
 * can appear to succeed but actually fail.
 */
static __always_inline bool
___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
        __instrument_read_write_bitop(nr, addr);
        return arch___test_and_change_bit(nr, addr);
}

/**
 * _test_bit - Determine whether a bit is set
 * @nr: bit number to test
 * @addr: Address to start counting from
 */
static __always_inline bool
_test_bit(unsigned long nr, const volatile unsigned long *addr)
{
        instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_bit(nr, addr);
}

/**
 * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set
 * @nr: bit number to test
 * @addr: Address to start counting from
 */
static __always_inline bool
_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
{
        instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long));
        return arch_test_bit_acquire(nr, addr);
}

#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
























































































































































































































































































































































































































































































































































































































    1 
    1 


    1 







    1 







   21 
   21 


   19 



   19 



   20 












  233 












  233 
  232 


   28 
   21 


    7 


  233 

















  233 









  232 





  233 



  233 


    7 



    7 
  233 











  210 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
// SPDX-License-Identifier: GPL-2.0
/*
 * USB device quirk handling logic and table
 *
 * Copyright (c) 2007 Oliver Neukum
 * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
 */

#include <linux/moduleparam.h>
#include <linux/usb.h>
#include <linux/usb/quirks.h>
#include <linux/usb/hcd.h>
#include "usb.h"

struct quirk_entry {
        u16 vid;
        u16 pid;
        u32 flags;
};

static DEFINE_MUTEX(quirk_mutex);

static struct quirk_entry *quirk_list;
static unsigned int quirk_count;

static char quirks_param[128];

static int quirks_param_set(const char *value, const struct kernel_param *kp)
{
        char *val, *p, *field;
        u16 vid, pid;
        u32 flags;
        size_t i;
        int err;

        val = kstrdup(value, GFP_KERNEL);
        if (!val)
                return -ENOMEM;

        err = param_set_copystring(val, kp);
        if (err) {
                kfree(val);
                return err;
        }

        mutex_lock(&quirk_mutex);

        if (!*val) {
                quirk_count = 0;
                kfree(quirk_list);
                quirk_list = NULL;
                goto unlock;
        }

        for (quirk_count = 1, i = 0; val[i]; i++)
                if (val[i] == ',')
                        quirk_count++;

        if (quirk_list) {
                kfree(quirk_list);
                quirk_list = NULL;
        }

        quirk_list = kcalloc(quirk_count, sizeof(struct quirk_entry),
                             GFP_KERNEL);
        if (!quirk_list) {
                quirk_count = 0;
                mutex_unlock(&quirk_mutex);
                kfree(val);
                return -ENOMEM;
        }

        for (i = 0, p = val; p && *p;) {
                /* Each entry consists of VID:PID:flags */
                field = strsep(&p, ":");
                if (!field)
                        break;

                if (kstrtou16(field, 16, &vid))
                        break;

                field = strsep(&p, ":");
                if (!field)
                        break;

                if (kstrtou16(field, 16, &pid))
                        break;

                field = strsep(&p, ",");
                if (!field || !*field)
                        break;

                /* Collect the flags */
                for (flags = 0; *field; field++) {
                        switch (*field) {
                        case 'a':
                                flags |= USB_QUIRK_STRING_FETCH_255;
                                break;
                        case 'b':
                                flags |= USB_QUIRK_RESET_RESUME;
                                break;
                        case 'c':
                                flags |= USB_QUIRK_NO_SET_INTF;
                                break;
                        case 'd':
                                flags |= USB_QUIRK_CONFIG_INTF_STRINGS;
                                break;
                        case 'e':
                                flags |= USB_QUIRK_RESET;
                                break;
                        case 'f':
                                flags |= USB_QUIRK_HONOR_BNUMINTERFACES;
                                break;
                        case 'g':
                                flags |= USB_QUIRK_DELAY_INIT;
                                break;
                        case 'h':
                                flags |= USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL;
                                break;
                        case 'i':
                                flags |= USB_QUIRK_DEVICE_QUALIFIER;
                                break;
                        case 'j':
                                flags |= USB_QUIRK_IGNORE_REMOTE_WAKEUP;
                                break;
                        case 'k':
                                flags |= USB_QUIRK_NO_LPM;
                                break;
                        case 'l':
                                flags |= USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL;
                                break;
                        case 'm':
                                flags |= USB_QUIRK_DISCONNECT_SUSPEND;
                                break;
                        case 'n':
                                flags |= USB_QUIRK_DELAY_CTRL_MSG;
                                break;
                        case 'o':
                                flags |= USB_QUIRK_HUB_SLOW_RESET;
                                break;
                        case 'p':
                                flags |= USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT;
                                break;
                        /* Ignore unrecognized flag characters */
                        }
                }

                quirk_list[i++] = (struct quirk_entry)
                        { .vid = vid, .pid = pid, .flags = flags };
        }

        if (i < quirk_count)
                quirk_count = i;

unlock:
        mutex_unlock(&quirk_mutex);
        kfree(val);

        return 0;
}

static const struct kernel_param_ops quirks_param_ops = {
        .set = quirks_param_set,
        .get = param_get_string,
};

static struct kparam_string quirks_param_string = {
        .maxlen = sizeof(quirks_param),
        .string = quirks_param,
};

device_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0644);
MODULE_PARM_DESC(quirks, "Add/modify USB quirks by specifying quirks=vendorID:productID:quirks");

/* Lists of quirky USB devices, split in device quirks and interface quirks.
 * Device quirks are applied at the very beginning of the enumeration process,
 * right after reading the device descriptor. They can thus only match on device
 * information.
 *
 * Interface quirks are applied after reading all the configuration descriptors.
 * They can match on both device and interface information.
 *
 * Note that the DELAY_INIT and HONOR_BNUMINTERFACES quirks do not make sense as
 * interface quirks, as they only influence the enumeration process which is run
 * before processing the interface quirks.
 *
 * Please keep the lists ordered by:
 *         1) Vendor ID
 *         2) Product ID
 *         3) Class ID
 */
static const struct usb_device_id usb_quirk_list[] = {
        /* CBM - Flash disk */
        { USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME },

        /* WORLDE Controller KS49 or Prodipe MIDI 49C USB controller */
        { USB_DEVICE(0x0218, 0x0201), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* WORLDE easy key (easykey.25) MIDI controller  */
        { USB_DEVICE(0x0218, 0x0401), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* HP 5300/5370C scanner */
        { USB_DEVICE(0x03f0, 0x0701), .driver_info =
                        USB_QUIRK_STRING_FETCH_255 },

        /* HP v222w 16GB Mini USB Drive */
        { USB_DEVICE(0x03f0, 0x3f40), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Creative SB Audigy 2 NX */
        { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },

        /* USB3503 */
        { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Microsoft Wireless Laser Mouse 6000 Receiver */
        { USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Microsoft LifeCam-VX700 v2.0 */
        { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Microsoft Surface Dock Ethernet (RTL8153 GigE) */
        { USB_DEVICE(0x045e, 0x07c6), .driver_info = USB_QUIRK_NO_LPM },

        /* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
        { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech HD Webcam C270 */
        { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */
        { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x085c), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Logitech ConferenceCam CC3000e */
        { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0848), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Logitech PTZ Pro Camera */
        { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Logitech Screen Share */
        { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM },

        /* Logitech Quickcam Fusion */
        { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Quickcam Orbit MP */
        { USB_DEVICE(0x046d, 0x08c2), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Quickcam Pro for Notebook */
        { USB_DEVICE(0x046d, 0x08c3), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Quickcam Pro 5000 */
        { USB_DEVICE(0x046d, 0x08c5), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Quickcam OEM Dell Notebook */
        { USB_DEVICE(0x046d, 0x08c6), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Quickcam OEM Cisco VT Camera II */
        { USB_DEVICE(0x046d, 0x08c7), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Harmony 700-series */
        { USB_DEVICE(0x046d, 0xc122), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Philips PSC805 audio device */
        { USB_DEVICE(0x0471, 0x0155), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Plantronic Audio 655 DSP */
        { USB_DEVICE(0x047f, 0xc008), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Plantronic Audio 648 USB */
        { USB_DEVICE(0x047f, 0xc013), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Artisman Watchdog Dongle */
        { USB_DEVICE(0x04b4, 0x0526), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Microchip Joss Optical infrared touchboard device */
        { USB_DEVICE(0x04d8, 0x000c), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* CarrolTouch 4000U */
        { USB_DEVICE(0x04e7, 0x0009), .driver_info = USB_QUIRK_RESET_RESUME },

        /* CarrolTouch 4500U */
        { USB_DEVICE(0x04e7, 0x0030), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Samsung Android phone modem - ID conflict with SPH-I500 */
        { USB_DEVICE(0x04e8, 0x6601), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Elan Touchscreen */
        { USB_DEVICE(0x04f3, 0x0089), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        { USB_DEVICE(0x04f3, 0x009b), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        { USB_DEVICE(0x04f3, 0x010c), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        { USB_DEVICE(0x04f3, 0x0125), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        { USB_DEVICE(0x04f3, 0x016f), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        { USB_DEVICE(0x04f3, 0x0381), .driver_info =
                        USB_QUIRK_NO_LPM },

        { USB_DEVICE(0x04f3, 0x21b8), .driver_info =
                        USB_QUIRK_DEVICE_QUALIFIER },

        /* Roland SC-8820 */
        { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Edirol SD-20 */
        { USB_DEVICE(0x0582, 0x0027), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Alcor Micro Corp. Hub */
        { USB_DEVICE(0x058f, 0x9254), .driver_info = USB_QUIRK_RESET_RESUME },

        /* appletouch */
        { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
        { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },

        /* ELSA MicroLink 56K */
        { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
        { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },

        /* Avision AV600U */
        { USB_DEVICE(0x0638, 0x0a13), .driver_info =
          USB_QUIRK_STRING_FETCH_255 },

        /* Saitek Cyborg Gold Joystick */
        { USB_DEVICE(0x06a3, 0x0006), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Agfa SNAPSCAN 1212U */
        { USB_DEVICE(0x06bd, 0x0001), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Guillemot Webcam Hercules Dualpix Exchange (2nd ID) */
        { USB_DEVICE(0x06f8, 0x0804), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Guillemot Webcam Hercules Dualpix Exchange*/
        { USB_DEVICE(0x06f8, 0x3005), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Guillemot Hercules DJ Console audio card (BZ 208357) */
        { USB_DEVICE(0x06f8, 0xb000), .driver_info =
                        USB_QUIRK_ENDPOINT_IGNORE },

        /* Midiman M-Audio Keystation 88es */
        { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME },

        /* SanDisk Ultra Fit and Ultra Flair */
        { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM },
        { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM },

        /* Realforce 87U Keyboard */
        { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM },

        /* M-Systems Flash Disk Pioneers */
        { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Baum Vario Ultra */
        { USB_DEVICE(0x0904, 0x6101), .driver_info =
                        USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
        { USB_DEVICE(0x0904, 0x6102), .driver_info =
                        USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
        { USB_DEVICE(0x0904, 0x6103), .driver_info =
                        USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },

        /* Sound Devices USBPre2 */
        { USB_DEVICE(0x0926, 0x0202), .driver_info =
                        USB_QUIRK_ENDPOINT_IGNORE },

        /* Sound Devices MixPre-D */
        { USB_DEVICE(0x0926, 0x0208), .driver_info =
                        USB_QUIRK_ENDPOINT_IGNORE },

        /* Keytouch QWERTY Panel keyboard */
        { USB_DEVICE(0x0926, 0x3333), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Kingston DataTraveler 3.0 */
        { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM },

        /* NVIDIA Jetson devices in Force Recovery mode */
        { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME },

        /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
        { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },

        /* ELMO L-12F document camera */
        { USB_DEVICE(0x09a1, 0x0028), .driver_info = USB_QUIRK_DELAY_CTRL_MSG },

        /* Broadcom BCM92035DGROM BT dongle */
        { USB_DEVICE(0x0a5c, 0x2021), .driver_info = USB_QUIRK_RESET_RESUME },

        /* MAYA44USB sound device */
        { USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },

        /* ASUS Base Station(T100) */
        { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
                        USB_QUIRK_IGNORE_REMOTE_WAKEUP },

        /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/
        { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Realtek hub in Dell WD19 (Type-C) */
        { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },

        /* Generic RTL8153 based ethernet adapters */
        { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM },

        /* SONiX USB DEVICE Touchpad */
        { USB_DEVICE(0x0c45, 0x7056), .driver_info =
                        USB_QUIRK_IGNORE_REMOTE_WAKEUP },

        /* Action Semiconductor flash disk */
        { USB_DEVICE(0x10d6, 0x2200), .driver_info =
                        USB_QUIRK_STRING_FETCH_255 },

        /* novation SoundControl XL */
        { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Focusrite Scarlett Solo USB */
        { USB_DEVICE(0x1235, 0x8211), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },

        /* Huawei 4G LTE module */
        { USB_DEVICE(0x12d1, 0x15bb), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },
        { USB_DEVICE(0x12d1, 0x15c3), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },

        /* SKYMEDI USB_DRIVE */
        { USB_DEVICE(0x1516, 0x8628), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Razer - Razer Blade Keyboard */
        { USB_DEVICE(0x1532, 0x0116), .driver_info =
                        USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },

        /* Lenovo ThinkPad OneLink+ Dock twin hub controllers (VIA Labs VL812) */
        { USB_DEVICE(0x17ef, 0x1018), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x17ef, 0x1019), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */
        { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM },

        /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */
        { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM },

        /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */
        { USB_DEVICE(0x17ef, 0xa012), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },

        /* Lenovo ThinkPad USB-C Dock Gen2 Ethernet (RTL8153 GigE) */
        { USB_DEVICE(0x17ef, 0xa387), .driver_info = USB_QUIRK_NO_LPM },

        /* BUILDWIN Photo Frame */
        { USB_DEVICE(0x1908, 0x1315), .driver_info =
                        USB_QUIRK_HONOR_BNUMINTERFACES },

        /* Protocol and OTG Electrical Test Device */
        { USB_DEVICE(0x1a0a, 0x0200), .driver_info =
                        USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },

        /* Terminus Technology Inc. Hub */
        { USB_DEVICE(0x1a40, 0x0101), .driver_info = USB_QUIRK_HUB_SLOW_RESET },

        /* Corsair K70 RGB */
        { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT |
          USB_QUIRK_DELAY_CTRL_MSG },

        /* Corsair Strafe */
        { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT |
          USB_QUIRK_DELAY_CTRL_MSG },

        /* Corsair Strafe RGB */
        { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
          USB_QUIRK_DELAY_CTRL_MSG },

        /* Corsair K70 LUX RGB */
        { USB_DEVICE(0x1b1c, 0x1b33), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Corsair K70 LUX */
        { USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT },

        /* Corsair K70 RGB RAPDIFIRE */
        { USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT |
          USB_QUIRK_DELAY_CTRL_MSG },

        /* MIDI keyboard WORLDE MINI */
        { USB_DEVICE(0x1c75, 0x0204), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Acer C120 LED Projector */
        { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },

        /* Blackmagic Design Intensity Shuttle */
        { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },

        /* Blackmagic Design UltraStudio SDI */
        { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },

        /* Hauppauge HVR-950q */
        { USB_DEVICE(0x2040, 0x7200), .driver_info =
                        USB_QUIRK_CONFIG_INTF_STRINGS },

        /* Raydium Touchscreen */
        { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM },

        { USB_DEVICE(0x2386, 0x3119), .driver_info = USB_QUIRK_NO_LPM },

        { USB_DEVICE(0x2386, 0x350e), .driver_info = USB_QUIRK_NO_LPM },

        /* APTIV AUTOMOTIVE HUB */
        { USB_DEVICE(0x2c48, 0x0132), .driver_info =
                        USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT },

        /* DJI CineSSD */
        { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },

        /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */
        { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM },

        /* DELL USB GEN2 */
        { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME },

        /* VCOM device */
        { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },

        /* INTEL VALUE SSD */
        { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },

        { }  /* terminating entry must be last */
};

static const struct usb_device_id usb_interface_quirk_list[] = {
        /* Logitech UVC Cameras */
        { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0),
          .driver_info = USB_QUIRK_RESET_RESUME },

        { }  /* terminating entry must be last */
};

static const struct usb_device_id usb_amd_resume_quirk_list[] = {
        /* Lenovo Mouse with Pixart controller */
        { USB_DEVICE(0x17ef, 0x602e), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Pixart Mouse */
        { USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME },
        { USB_DEVICE(0x03f0, 0x2b4a), .driver_info = USB_QUIRK_RESET_RESUME },

        /* Logitech Optical Mouse M90/M100 */
        { USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME },

        { }  /* terminating entry must be last */
};

/*
 * Entries for endpoints that should be ignored when parsing configuration
 * descriptors.
 *
 * Matched for devices with USB_QUIRK_ENDPOINT_IGNORE.
 */
static const struct usb_device_id usb_endpoint_ignore[] = {
        { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x01 },
        { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x81 },
        { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 },
        { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0208, 1), .driver_info = 0x85 },
        { }
};

bool usb_endpoint_is_ignored(struct usb_device *udev,
                             struct usb_host_interface *intf,
                             struct usb_endpoint_descriptor *epd)
{
        const struct usb_device_id *id;
        unsigned int address;

        for (id = usb_endpoint_ignore; id->match_flags; ++id) {
                if (!usb_match_device(udev, id))
                        continue;

                if (!usb_match_one_id_intf(udev, intf, id))
                        continue;

                address = id->driver_info;
                if (address == epd->bEndpointAddress)
                        return true;
        }

        return false;
}

static bool usb_match_any_interface(struct usb_device *udev,
                                    const struct usb_device_id *id)
{
        unsigned int i;

        for (i = 0; i < udev->descriptor.bNumConfigurations; ++i) {
                struct usb_host_config *cfg = &udev->config[i];
                unsigned int j;

                for (j = 0; j < cfg->desc.bNumInterfaces; ++j) {
                        struct usb_interface_cache *cache;
                        struct usb_host_interface *intf;

                        cache = cfg->intf_cache[j];
                        if (cache->num_altsetting == 0)
                                continue;

                        intf = &cache->altsetting[0];
                        if (usb_match_one_id_intf(udev, intf, id))
                                return true;
                }
        }

        return false;
}

static int usb_amd_resume_quirk(struct usb_device *udev)
{
        struct usb_hcd *hcd;

        hcd = bus_to_hcd(udev->bus);
        /* The device should be attached directly to root hub */
        if (udev->level == 1 && hcd->amd_resume_bug == 1)
                return 1;

        return 0;
}

static u32 usb_detect_static_quirks(struct usb_device *udev,
                                    const struct usb_device_id *id)
{
        u32 quirks = 0;

        for (; id->match_flags; id++) {
                if (!usb_match_device(udev, id))
                        continue;

                if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_INFO) &&
                    !usb_match_any_interface(udev, id))
                        continue;

                quirks |= (u32)(id->driver_info);
        }

        return quirks;
}

static u32 usb_detect_dynamic_quirks(struct usb_device *udev)
{
        u16 vid = le16_to_cpu(udev->descriptor.idVendor);
        u16 pid = le16_to_cpu(udev->descriptor.idProduct);
        int i, flags = 0;

        mutex_lock(&quirk_mutex);

        for (i = 0; i < quirk_count; i++) {
                if (vid == quirk_list[i].vid && pid == quirk_list[i].pid) {
                        flags = quirk_list[i].flags;
                        break;
                }
        }

        mutex_unlock(&quirk_mutex);

        return flags;
}

/*
 * Detect any quirks the device has, and do any housekeeping for it if needed.
 */
void usb_detect_quirks(struct usb_device *udev)
{
        udev->quirks = usb_detect_static_quirks(udev, usb_quirk_list);

        /*
         * Pixart-based mice would trigger remote wakeup issue on AMD
         * Yangtze chipset, so set them as RESET_RESUME flag.
         */
        if (usb_amd_resume_quirk(udev))
                udev->quirks |= usb_detect_static_quirks(udev,
                                usb_amd_resume_quirk_list);

        udev->quirks ^= usb_detect_dynamic_quirks(udev);

        if (udev->quirks)
                dev_dbg(&udev->dev, "USB quirks for this device: %x\n",
                        udev->quirks);

#ifdef CONFIG_USB_DEFAULT_PERSIST
        if (!(udev->quirks & USB_QUIRK_RESET))
                udev->persist_enabled = 1;
#else
        /* Hubs are automatically enabled for USB-PERSIST */
        if (udev->descriptor.bDeviceClass == USB_CLASS_HUB)
                udev->persist_enabled = 1;
#endif        /* CONFIG_USB_DEFAULT_PERSIST */
}

void usb_detect_interface_quirks(struct usb_device *udev)
{
        u32 quirks;

        quirks = usb_detect_static_quirks(udev, usb_interface_quirk_list);
        if (quirks == 0)
                return;

        dev_dbg(&udev->dev, "USB interface quirks for this device: %x\n",
                quirks);
        udev->quirks |= quirks;
}

void usb_release_quirk_list(void)
{
        mutex_lock(&quirk_mutex);
        kfree(quirk_list);
        quirk_list = NULL;
        mutex_unlock(&quirk_mutex);
}










































































    3 



    3 



































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// SPDX-License-Identifier: GPL-2.0-only
/*
 *
 *  Copyright (C) 2005 Mike Isely <isely@pobox.com>
 *  Copyright (C) 2004 Aurelien Alleaume <slts@free.fr>
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/videodev2.h>

#include "pvrusb2-hdw.h"
#include "pvrusb2-devattr.h"
#include "pvrusb2-context.h"
#include "pvrusb2-debug.h"
#include "pvrusb2-v4l2.h"
#include "pvrusb2-sysfs.h"

#define DRIVER_AUTHOR "Mike Isely <isely@pobox.com>"
#define DRIVER_DESC "Hauppauge WinTV-PVR-USB2 MPEG2 Encoder/Tuner"
#define DRIVER_VERSION "V4L in-tree version"

#define DEFAULT_DEBUG_MASK (PVR2_TRACE_ERROR_LEGS| \
                            PVR2_TRACE_INFO| \
                            PVR2_TRACE_STD| \
                            PVR2_TRACE_TOLERANCE| \
                            PVR2_TRACE_TRAP| \
                            0)

int pvrusb2_debug = DEFAULT_DEBUG_MASK;

module_param_named(debug,pvrusb2_debug,int,S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(debug, "Debug trace mask");

static void pvr_setup_attach(struct pvr2_context *pvr)
{
        /* Create association with v4l layer */
        pvr2_v4l2_create(pvr);
#ifdef CONFIG_VIDEO_PVRUSB2_DVB
        /* Create association with dvb layer */
        pvr2_dvb_create(pvr);
#endif
        pvr2_sysfs_create(pvr);
}

static int pvr_probe(struct usb_interface *intf,
                     const struct usb_device_id *devid)
{
        struct pvr2_context *pvr;

        /* Create underlying hardware interface */
        pvr = pvr2_context_create(intf,devid,pvr_setup_attach);
        if (!pvr) {
                pvr2_trace(PVR2_TRACE_ERROR_LEGS,
                           "Failed to create hdw handler");
                return -ENOMEM;
        }

        pvr2_trace(PVR2_TRACE_INIT,"pvr_probe(pvr=%p)",pvr);

        usb_set_intfdata(intf, pvr);

        return 0;
}

/*
 * pvr_disconnect()
 *
 */
static void pvr_disconnect(struct usb_interface *intf)
{
        struct pvr2_context *pvr = usb_get_intfdata(intf);

        pvr2_trace(PVR2_TRACE_INIT,"pvr_disconnect(pvr=%p) BEGIN",pvr);

        usb_set_intfdata (intf, NULL);
        pvr2_context_disconnect(pvr);

        pvr2_trace(PVR2_TRACE_INIT,"pvr_disconnect(pvr=%p) DONE",pvr);

}

static struct usb_driver pvr_driver = {
        .name =         "pvrusb2",
        .id_table =     pvr2_device_table,
        .probe =        pvr_probe,
        .disconnect =   pvr_disconnect
};

/*
 * pvr_init() / pvr_exit()
 *
 * This code is run to initialize/exit the driver.
 *
 */
static int __init pvr_init(void)
{
        int ret;

        pvr2_trace(PVR2_TRACE_INIT,"pvr_init");

        ret = pvr2_context_global_init();
        if (ret != 0) {
                pvr2_trace(PVR2_TRACE_INIT,"pvr_init failure code=%d",ret);
                return ret;
        }

        pvr2_sysfs_class_create();

        ret = usb_register(&pvr_driver);

        if (ret == 0)
                pr_info("pvrusb2: " DRIVER_VERSION ":"
                       DRIVER_DESC "\n");
        if (pvrusb2_debug)
                pr_info("pvrusb2: Debug mask is %d (0x%x)\n",
                       pvrusb2_debug,pvrusb2_debug);

        pvr2_trace(PVR2_TRACE_INIT,"pvr_init complete");

        return ret;
}

static void __exit pvr_exit(void)
{
        pvr2_trace(PVR2_TRACE_INIT,"pvr_exit");

        usb_deregister(&pvr_driver);

        pvr2_context_global_done();

        pvr2_sysfs_class_destroy();

        pvr2_trace(PVR2_TRACE_INIT,"pvr_exit complete");
}

module_init(pvr_init);
module_exit(pvr_exit);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
MODULE_VERSION("0.9.1");























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Module internals
 *
 * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org>
 */

#include <linux/elf.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/mm.h>

#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
#endif

/*
 * Use highest 4 bits of sh_entsize to store the mod_mem_type of this
 * section. This leaves 28 bits for offset on 32-bit systems, which is
 * about 256 MiB (WARN_ON_ONCE if we exceed that).
 */

#define SH_ENTSIZE_TYPE_BITS        4
#define SH_ENTSIZE_TYPE_SHIFT        (BITS_PER_LONG - SH_ENTSIZE_TYPE_BITS)
#define SH_ENTSIZE_TYPE_MASK        ((1UL << SH_ENTSIZE_TYPE_BITS) - 1)
#define SH_ENTSIZE_OFFSET_MASK        ((1UL << (BITS_PER_LONG - SH_ENTSIZE_TYPE_BITS)) - 1)

/* Maximum number of characters written by module_flags() */
#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)

struct kernel_symbol {
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
        int value_offset;
        int name_offset;
        int namespace_offset;
#else
        unsigned long value;
        const char *name;
        const char *namespace;
#endif
};

extern struct mutex module_mutex;
extern struct list_head modules;

extern struct module_attribute *modinfo_attrs[];
extern size_t modinfo_attrs_count;

/* Provided by the linker */
extern const struct kernel_symbol __start___ksymtab[];
extern const struct kernel_symbol __stop___ksymtab[];
extern const struct kernel_symbol __start___ksymtab_gpl[];
extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const s32 __start___kcrctab[];
extern const s32 __start___kcrctab_gpl[];

struct load_info {
        const char *name;
        /* pointer to module in temporary copy, freed at end of load_module() */
        struct module *mod;
        Elf_Ehdr *hdr;
        unsigned long len;
        Elf_Shdr *sechdrs;
        char *secstrings, *strtab;
        unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs;
        bool sig_ok;
#ifdef CONFIG_KALLSYMS
        unsigned long mod_kallsyms_init_off;
#endif
#ifdef CONFIG_MODULE_DECOMPRESS
#ifdef CONFIG_MODULE_STATS
        unsigned long compressed_len;
#endif
        struct page **pages;
        unsigned int max_pages;
        unsigned int used_pages;
#endif
        struct {
                unsigned int sym, str, mod, vers, info, pcpu;
        } index;
};

enum mod_license {
        NOT_GPL_ONLY,
        GPL_ONLY,
};

struct find_symbol_arg {
        /* Input */
        const char *name;
        bool gplok;
        bool warn;

        /* Output */
        struct module *owner;
        const s32 *crc;
        const struct kernel_symbol *sym;
        enum mod_license license;
};

int mod_verify_sig(const void *mod, struct load_info *info);
int try_to_force_load(struct module *mod, const char *reason);
bool find_symbol(struct find_symbol_arg *fsa);
struct module *find_module_all(const char *name, size_t len, bool even_unformed);
int cmp_name(const void *name, const void *sym);
long module_get_offset_and_type(struct module *mod, enum mod_mem_type type,
                                Elf_Shdr *sechdr, unsigned int section);
char *module_flags(struct module *mod, char *buf, bool show_state);
size_t module_flags_taint(unsigned long taints, char *buf);

char *module_next_tag_pair(char *string, unsigned long *secsize);

#define for_each_modinfo_entry(entry, info, name) \
        for (entry = get_modinfo(info, name); entry; entry = get_next_modinfo(info, name, entry))

static inline void module_assert_mutex_or_preempt(void)
{
#ifdef CONFIG_LOCKDEP
        if (unlikely(!debug_locks))
                return;

        WARN_ON_ONCE(!rcu_read_lock_sched_held() &&
                     !lockdep_is_held(&module_mutex));
#endif
}

static inline unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
{
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
        return (unsigned long)offset_to_ptr(&sym->value_offset);
#else
        return sym->value;
#endif
}

#ifdef CONFIG_LIVEPATCH
int copy_module_elf(struct module *mod, struct load_info *info);
void free_module_elf(struct module *mod);
#else /* !CONFIG_LIVEPATCH */
static inline int copy_module_elf(struct module *mod, struct load_info *info)
{
        return 0;
}

static inline void free_module_elf(struct module *mod) { }
#endif /* CONFIG_LIVEPATCH */

static inline bool set_livepatch_module(struct module *mod)
{
#ifdef CONFIG_LIVEPATCH
        mod->klp = true;
        return true;
#else
        return false;
#endif
}

/**
 * enum fail_dup_mod_reason - state at which a duplicate module was detected
 *
 * @FAIL_DUP_MOD_BECOMING: the module is read properly, passes all checks but
 *         we've determined that another module with the same name is already loaded
 *         or being processed on our &modules list. This happens on early_mod_check()
 *         right before layout_and_allocate(). The kernel would have already
 *         vmalloc()'d space for the entire module through finit_module(). If
 *         decompression was used two vmap() spaces were used. These failures can
 *         happen when userspace has not seen the module present on the kernel and
 *         tries to load the module multiple times at same time.
 * @FAIL_DUP_MOD_LOAD: the module has been read properly, passes all validation
 *        checks and the kernel determines that the module was unique and because
 *        of this allocated yet another private kernel copy of the module space in
 *        layout_and_allocate() but after this determined in add_unformed_module()
 *        that another module with the same name is already loaded or being processed.
 *        These failures should be mitigated as much as possible and are indicative
 *        of really fast races in loading modules. Without module decompression
 *        they waste twice as much vmap space. With module decompression three
 *        times the module's size vmap space is wasted.
 */
enum fail_dup_mod_reason {
        FAIL_DUP_MOD_BECOMING = 0,
        FAIL_DUP_MOD_LOAD,
};

#ifdef CONFIG_MODULE_DEBUGFS
extern struct dentry *mod_debugfs_root;
#endif

#ifdef CONFIG_MODULE_STATS

#define mod_stat_add_long(count, var) atomic_long_add(count, var)
#define mod_stat_inc(name) atomic_inc(name)

extern atomic_long_t total_mod_size;
extern atomic_long_t total_text_size;
extern atomic_long_t invalid_kread_bytes;
extern atomic_long_t invalid_decompress_bytes;

extern atomic_t modcount;
extern atomic_t failed_kreads;
extern atomic_t failed_decompress;
struct mod_fail_load {
        struct list_head list;
        char name[MODULE_NAME_LEN];
        atomic_long_t count;
        unsigned long dup_fail_mask;
};

int try_add_failed_module(const char *name, enum fail_dup_mod_reason reason);
void mod_stat_bump_invalid(struct load_info *info, int flags);
void mod_stat_bump_becoming(struct load_info *info, int flags);

#else

#define mod_stat_add_long(name, var)
#define mod_stat_inc(name)

static inline int try_add_failed_module(const char *name,
                                        enum fail_dup_mod_reason reason)
{
        return 0;
}

static inline void mod_stat_bump_invalid(struct load_info *info, int flags)
{
}

static inline void mod_stat_bump_becoming(struct load_info *info, int flags)
{
}

#endif /* CONFIG_MODULE_STATS */

#ifdef CONFIG_MODULE_DEBUG_AUTOLOAD_DUPS
bool kmod_dup_request_exists_wait(char *module_name, bool wait, int *dup_ret);
void kmod_dup_request_announce(char *module_name, int ret);
#else
static inline bool kmod_dup_request_exists_wait(char *module_name, bool wait, int *dup_ret)
{
        return false;
}

static inline void kmod_dup_request_announce(char *module_name, int ret)
{
}
#endif

#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
struct mod_unload_taint {
        struct list_head list;
        char name[MODULE_NAME_LEN];
        unsigned long taints;
        u64 count;
};

int try_add_tainted_module(struct module *mod);
void print_unloaded_tainted_modules(void);
#else /* !CONFIG_MODULE_UNLOAD_TAINT_TRACKING */
static inline int try_add_tainted_module(struct module *mod)
{
        return 0;
}

static inline void print_unloaded_tainted_modules(void)
{
}
#endif /* CONFIG_MODULE_UNLOAD_TAINT_TRACKING */

#ifdef CONFIG_MODULE_DECOMPRESS
int module_decompress(struct load_info *info, const void *buf, size_t size);
void module_decompress_cleanup(struct load_info *info);
#else
static inline int module_decompress(struct load_info *info,
                                    const void *buf, size_t size)
{
        return -EOPNOTSUPP;
}

static inline void module_decompress_cleanup(struct load_info *info)
{
}
#endif

struct mod_tree_root {
#ifdef CONFIG_MODULES_TREE_LOOKUP
        struct latch_tree_root root;
#endif
        unsigned long addr_min;
        unsigned long addr_max;
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
        unsigned long data_addr_min;
        unsigned long data_addr_max;
#endif
};

extern struct mod_tree_root mod_tree;

#ifdef CONFIG_MODULES_TREE_LOOKUP
void mod_tree_insert(struct module *mod);
void mod_tree_remove_init(struct module *mod);
void mod_tree_remove(struct module *mod);
struct module *mod_find(unsigned long addr, struct mod_tree_root *tree);
#else /* !CONFIG_MODULES_TREE_LOOKUP */

static inline void mod_tree_insert(struct module *mod) { }
static inline void mod_tree_remove_init(struct module *mod) { }
static inline void mod_tree_remove(struct module *mod) { }
static inline struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
{
        struct module *mod;

        list_for_each_entry_rcu(mod, &modules, list,
                                lockdep_is_held(&module_mutex)) {
                if (within_module(addr, mod))
                        return mod;
        }

        return NULL;
}
#endif /* CONFIG_MODULES_TREE_LOOKUP */

int module_enable_rodata_ro(const struct module *mod, bool after_init);
int module_enable_data_nx(const struct module *mod);
int module_enable_text_rox(const struct module *mod);
int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
                                char *secstrings, struct module *mod);

#ifdef CONFIG_MODULE_SIG
int module_sig_check(struct load_info *info, int flags);
#else /* !CONFIG_MODULE_SIG */
static inline int module_sig_check(struct load_info *info, int flags)
{
        return 0;
}
#endif /* !CONFIG_MODULE_SIG */

#ifdef CONFIG_DEBUG_KMEMLEAK
void kmemleak_load_module(const struct module *mod, const struct load_info *info);
#else /* !CONFIG_DEBUG_KMEMLEAK */
static inline void kmemleak_load_module(const struct module *mod,
                                        const struct load_info *info) { }
#endif /* CONFIG_DEBUG_KMEMLEAK */

#ifdef CONFIG_KALLSYMS
void init_build_id(struct module *mod, const struct load_info *info);
void layout_symtab(struct module *mod, struct load_info *info);
void add_kallsyms(struct module *mod, const struct load_info *info);

static inline bool sect_empty(const Elf_Shdr *sect)
{
        return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
}
#else /* !CONFIG_KALLSYMS */
static inline void init_build_id(struct module *mod, const struct load_info *info) { }
static inline void layout_symtab(struct module *mod, struct load_info *info) { }
static inline void add_kallsyms(struct module *mod, const struct load_info *info) { }
#endif /* CONFIG_KALLSYMS */

#ifdef CONFIG_SYSFS
int mod_sysfs_setup(struct module *mod, const struct load_info *info,
                    struct kernel_param *kparam, unsigned int num_params);
void mod_sysfs_teardown(struct module *mod);
void init_param_lock(struct module *mod);
#else /* !CONFIG_SYSFS */
static inline int mod_sysfs_setup(struct module *mod,
                                     const struct load_info *info,
                                     struct kernel_param *kparam,
                                     unsigned int num_params)
{
        return 0;
}

static inline void mod_sysfs_teardown(struct module *mod) { }
static inline void init_param_lock(struct module *mod) { }
#endif /* CONFIG_SYSFS */

#ifdef CONFIG_MODVERSIONS
int check_version(const struct load_info *info,
                  const char *symname, struct module *mod, const s32 *crc);
void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp,
                   struct kernel_symbol *ks, struct tracepoint * const *tp);
int check_modstruct_version(const struct load_info *info, struct module *mod);
int same_magic(const char *amagic, const char *bmagic, bool has_crcs);
#else /* !CONFIG_MODVERSIONS */
static inline int check_version(const struct load_info *info,
                                const char *symname,
                                struct module *mod,
                                const s32 *crc)
{
        return 1;
}

static inline int check_modstruct_version(const struct load_info *info,
                                          struct module *mod)
{
        return 1;
}

static inline int same_magic(const char *amagic, const char *bmagic, bool has_crcs)
{
        return strcmp(amagic, bmagic) == 0;
}
#endif /* CONFIG_MODVERSIONS */

































































































































    2 






    2 

    2 
    2 






































    2 
    2 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Ryos driver for Linux
 *
 * Copyright (c) 2013 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

#include <linux/types.h>
#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"

enum {
        RYOS_REPORT_NUMBER_SPECIAL = 3,
        RYOS_USB_INTERFACE_PROTOCOL = 0,
};

struct ryos_report_special {
        uint8_t number; /* RYOS_REPORT_NUMBER_SPECIAL */
        uint8_t data[4];
} __packed;

ROCCAT_COMMON2_BIN_ATTRIBUTE_W(control, 0x04, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(profile, 0x05, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_primary, 0x06, 0x7d);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_function, 0x07, 0x5f);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_macro, 0x08, 0x23);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_thumbster, 0x09, 0x17);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_extra, 0x0a, 0x08);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(keys_easyzone, 0x0b, 0x126);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(key_mask, 0x0c, 0x06);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(light, 0x0d, 0x10);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(macro, 0x0e, 0x7d2);
ROCCAT_COMMON2_BIN_ATTRIBUTE_R(info, 0x0f, 0x08);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(reset, 0x11, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(light_control, 0x13, 0x08);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(talk, 0x16, 0x10);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(stored_lights, 0x17, 0x0566);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(custom_lights, 0x18, 0x14);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(light_macro, 0x19, 0x07d2);

static struct bin_attribute *ryos_bin_attrs[] = {
        &bin_attr_control,
        &bin_attr_profile,
        &bin_attr_keys_primary,
        &bin_attr_keys_function,
        &bin_attr_keys_macro,
        &bin_attr_keys_thumbster,
        &bin_attr_keys_extra,
        &bin_attr_keys_easyzone,
        &bin_attr_key_mask,
        &bin_attr_light,
        &bin_attr_macro,
        &bin_attr_info,
        &bin_attr_reset,
        &bin_attr_light_control,
        &bin_attr_talk,
        &bin_attr_stored_lights,
        &bin_attr_custom_lights,
        &bin_attr_light_macro,
        NULL,
};

static const struct attribute_group ryos_group = {
        .bin_attrs = ryos_bin_attrs,
};

static const struct attribute_group *ryos_groups[] = {
        &ryos_group,
        NULL,
};

static const struct class ryos_class = {
        .name = "ryos",
        .dev_groups = ryos_groups,
};

static int ryos_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct roccat_common2_device *ryos;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != RYOS_USB_INTERFACE_PROTOCOL) {
                hid_set_drvdata(hdev, NULL);
                return 0;
        }

        ryos = kzalloc(sizeof(*ryos), GFP_KERNEL);
        if (!ryos) {
                hid_err(hdev, "can't alloc device descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, ryos);

        retval = roccat_common2_device_init_struct(usb_dev, ryos);
        if (retval) {
                hid_err(hdev, "couldn't init Ryos device\n");
                goto exit_free;
        }

        retval = roccat_connect(&ryos_class, hdev,
                        sizeof(struct ryos_report_special));
        if (retval < 0) {
                hid_err(hdev, "couldn't init char dev\n");
        } else {
                ryos->chrdev_minor = retval;
                ryos->roccat_claimed = 1;
        }

        return 0;
exit_free:
        kfree(ryos);
        return retval;
}

static void ryos_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *ryos;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != RYOS_USB_INTERFACE_PROTOCOL)
                return;

        ryos = hid_get_drvdata(hdev);
        if (ryos->roccat_claimed)
                roccat_disconnect(ryos->chrdev_minor);
        kfree(ryos);
}

static int ryos_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = ryos_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void ryos_remove(struct hid_device *hdev)
{
        ryos_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static int ryos_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *ryos = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != RYOS_USB_INTERFACE_PROTOCOL)
                return 0;

        if (data[0] != RYOS_REPORT_NUMBER_SPECIAL)
                return 0;

        if (ryos != NULL && ryos->roccat_claimed)
                roccat_report_event(ryos->chrdev_minor, data);

        return 0;
}

static const struct hid_device_id ryos_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK_GLOW) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_RYOS_MK_PRO) },
        { }
};

MODULE_DEVICE_TABLE(hid, ryos_devices);

static struct hid_driver ryos_driver = {
                .name = "ryos",
                .id_table = ryos_devices,
                .probe = ryos_probe,
                .remove = ryos_remove,
                .raw_event = ryos_raw_event
};

static int __init ryos_init(void)
{
        int retval;

        retval = class_register(&ryos_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&ryos_driver);
        if (retval)
                class_unregister(&ryos_class);
        return retval;
}

static void __exit ryos_exit(void)
{
        hid_unregister_driver(&ryos_driver);
        class_unregister(&ryos_class);
}

module_init(ryos_init);
module_exit(ryos_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Ryos MK/Glow/Pro driver");
MODULE_LICENSE("GPL v2");






























   34 





   34 

   34 


   34 









































































































    4 














   33 

   34 


































































































































































   34 



   33 











   33 






































   34 

































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <linux/bitfield.h>
#include <xen/xen.h>

#include <asm/fpu/api.h>
#include <asm/fred.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
#include <asm/insn-eval.h>
#include <asm/sgx.h>

static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr)
{
        int reg_offset = pt_regs_offset(regs, nr);
        static unsigned long __dummy;

        if (WARN_ON_ONCE(reg_offset < 0))
                return &__dummy;

        return (unsigned long *)((unsigned long)regs + reg_offset);
}

static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
        return (unsigned long)&x->fixup + x->fixup;
}

static bool ex_handler_default(const struct exception_table_entry *e,
                               struct pt_regs *regs)
{
        if (e->data & EX_FLAG_CLEAR_AX)
                regs->ax = 0;
        if (e->data & EX_FLAG_CLEAR_DX)
                regs->dx = 0;

        regs->ip = ex_fixup_addr(e);
        return true;
}

/*
 * This is the *very* rare case where we do a "load_unaligned_zeropad()"
 * and it's a page crosser into a non-existent page.
 *
 * This happens when we optimistically load a pathname a word-at-a-time
 * and the name is less than the full word and the  next page is not
 * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC.
 *
 * NOTE! The faulting address is always a 'mov mem,reg' type instruction
 * of size 'long', and the exception fixup must always point to right
 * after the instruction.
 */
static bool ex_handler_zeropad(const struct exception_table_entry *e,
                               struct pt_regs *regs,
                               unsigned long fault_addr)
{
        struct insn insn;
        const unsigned long mask = sizeof(long) - 1;
        unsigned long offset, addr, next_ip, len;
        unsigned long *reg;

        next_ip = ex_fixup_addr(e);
        len = next_ip - regs->ip;
        if (len > MAX_INSN_SIZE)
                return false;

        if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN))
                return false;
        if (insn.length != len)
                return false;

        if (insn.opcode.bytes[0] != 0x8b)
                return false;
        if (insn.opnd_bytes != sizeof(long))
                return false;

        addr = (unsigned long) insn_get_addr_ref(&insn, regs);
        if (addr == ~0ul)
                return false;

        offset = addr & mask;
        addr = addr & ~mask;
        if (fault_addr != addr + sizeof(long))
                return false;

        reg = insn_get_modrm_reg_ptr(&insn, regs);
        if (!reg)
                return false;

        *reg = *(unsigned long *)addr >> (offset * 8);
        return ex_handler_default(e, regs);
}

static bool ex_handler_fault(const struct exception_table_entry *fixup,
                             struct pt_regs *regs, int trapnr)
{
        regs->ax = trapnr;
        return ex_handler_default(fixup, regs);
}

static bool ex_handler_sgx(const struct exception_table_entry *fixup,
                           struct pt_regs *regs, int trapnr)
{
        regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG;
        return ex_handler_default(fixup, regs);
}

/*
 * Handler for when we fail to restore a task's FPU state.  We should never get
 * here because the FPU state of a task using the FPU (task->thread.fpu.state)
 * should always be valid.  However, past bugs have allowed userspace to set
 * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
 * These caused XRSTOR to fail when switching to the task, leaking the FPU
 * registers of the task previously executing on the CPU.  Mitigate this class
 * of vulnerability by restoring from the initial state (essentially, zeroing
 * out all the FPU registers) if we can't restore from the task's FPU state.
 */
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
                                 struct pt_regs *regs)
{
        regs->ip = ex_fixup_addr(fixup);

        WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
                  (void *)instruction_pointer(regs));

        fpu_reset_from_exception_fixup();
        return true;
}

/*
 * On x86-64, we end up being imprecise with 'access_ok()', and allow
 * non-canonical user addresses to make the range comparisons simpler,
 * and to not have to worry about LAM being enabled.
 *
 * In fact, we allow up to one page of "slop" at the sign boundary,
 * which means that we can do access_ok() by just checking the sign
 * of the pointer for the common case of having a small access size.
 */
static bool gp_fault_address_ok(unsigned long fault_address)
{
#ifdef CONFIG_X86_64
        /* Is it in the "user space" part of the non-canonical space? */
        if (valid_user_address(fault_address))
                return true;

        /* .. or just above it? */
        fault_address -= PAGE_SIZE;
        if (valid_user_address(fault_address))
                return true;
#endif
        return false;
}

static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
                               struct pt_regs *regs, int trapnr,
                               unsigned long fault_address)
{
        WARN_ONCE(trapnr == X86_TRAP_GP && !gp_fault_address_ok(fault_address),
                "General protection fault in user access. Non-canonical address?");
        return ex_handler_default(fixup, regs);
}

static bool ex_handler_copy(const struct exception_table_entry *fixup,
                            struct pt_regs *regs, int trapnr)
{
        WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
        return ex_handler_fault(fixup, regs, trapnr);
}

static bool ex_handler_msr(const struct exception_table_entry *fixup,
                           struct pt_regs *regs, bool wrmsr, bool safe, int reg)
{
        if (__ONCE_LITE_IF(!safe && wrmsr)) {
                pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
                        (unsigned int)regs->cx, (unsigned int)regs->dx,
                        (unsigned int)regs->ax,  regs->ip, (void *)regs->ip);
                show_stack_regs(regs);
        }

        if (__ONCE_LITE_IF(!safe && !wrmsr)) {
                pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
                        (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
                show_stack_regs(regs);
        }

        if (!wrmsr) {
                /* Pretend that the read succeeded and returned 0. */
                regs->ax = 0;
                regs->dx = 0;
        }

        if (safe)
                *pt_regs_nr(regs, reg) = -EIO;

        return ex_handler_default(fixup, regs);
}

static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
                                struct pt_regs *regs)
{
        if (static_cpu_has(X86_BUG_NULL_SEG))
                asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
        asm volatile ("mov %0, %%fs" : : "rm" (0));
        return ex_handler_default(fixup, regs);
}

static bool ex_handler_imm_reg(const struct exception_table_entry *fixup,
                               struct pt_regs *regs, int reg, int imm)
{
        *pt_regs_nr(regs, reg) = (long)imm;
        return ex_handler_default(fixup, regs);
}

static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
                                  struct pt_regs *regs, int trapnr,
                                  unsigned long fault_address,
                                  int reg, int imm)
{
        regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg);
        return ex_handler_uaccess(fixup, regs, trapnr, fault_address);
}

#ifdef CONFIG_X86_FRED
static bool ex_handler_eretu(const struct exception_table_entry *fixup,
                             struct pt_regs *regs, unsigned long error_code)
{
        struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, orig_ax));
        unsigned short ss = uregs->ss;
        unsigned short cs = uregs->cs;

        /*
         * Move the NMI bit from the invalid stack frame, which caused ERETU
         * to fault, to the fault handler's stack frame, thus to unblock NMI
         * with the fault handler's ERETS instruction ASAP if NMI is blocked.
         */
        regs->fred_ss.nmi = uregs->fred_ss.nmi;

        /*
         * Sync event information to uregs, i.e., the ERETU return frame, but
         * is it safe to write to the ERETU return frame which is just above
         * current event stack frame?
         *
         * The RSP used by FRED to push a stack frame is not the value in %rsp,
         * it is calculated from %rsp with the following 2 steps:
         * 1) RSP = %rsp - (IA32_FRED_CONFIG & 0x1c0)        // Reserve N*64 bytes
         * 2) RSP = RSP & ~0x3f                // Align to a 64-byte cache line
         * when an event delivery doesn't trigger a stack level change.
         *
         * Here is an example with N*64 (N=1) bytes reserved:
         *
         *  64-byte cache line ==>  ______________
         *                         |___Reserved___|
         *                         |__Event_data__|
         *                         |_____SS_______|
         *                         |_____RSP______|
         *                         |_____FLAGS____|
         *                         |_____CS_______|
         *                         |_____IP_______|
         *  64-byte cache line ==> |__Error_code__| <== ERETU return frame
         *                         |______________|
         *                         |______________|
         *                         |______________|
         *                         |______________|
         *                         |______________|
         *                         |______________|
         *                         |______________|
         *  64-byte cache line ==> |______________| <== RSP after step 1) and 2)
         *                         |___Reserved___|
         *                         |__Event_data__|
         *                         |_____SS_______|
         *                         |_____RSP______|
         *                         |_____FLAGS____|
         *                         |_____CS_______|
         *                         |_____IP_______|
         *  64-byte cache line ==> |__Error_code__| <== ERETS return frame
         *
         * Thus a new FRED stack frame will always be pushed below a previous
         * FRED stack frame ((N*64) bytes may be reserved between), and it is
         * safe to write to a previous FRED stack frame as they never overlap.
         */
        fred_info(uregs)->edata = fred_event_data(regs);
        uregs->ssx = regs->ssx;
        uregs->fred_ss.ss = ss;
        /* The NMI bit was moved away above */
        uregs->fred_ss.nmi = 0;
        uregs->csx = regs->csx;
        uregs->fred_cs.sl = 0;
        uregs->fred_cs.wfe = 0;
        uregs->cs = cs;
        uregs->orig_ax = error_code;

        return ex_handler_default(fixup, regs);
}
#endif

int ex_get_fixup_type(unsigned long ip)
{
        const struct exception_table_entry *e = search_exception_tables(ip);

        return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
}

int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
                    unsigned long fault_addr)
{
        const struct exception_table_entry *e;
        int type, reg, imm;

#ifdef CONFIG_PNPBIOS
        if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
                extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
                extern u32 pnp_bios_is_utter_crap;
                pnp_bios_is_utter_crap = 1;
                printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
                __asm__ volatile(
                        "movl %0, %%esp\n\t"
                        "jmp *%1\n\t"
                        : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
                panic("do_trap: can't hit this");
        }
#endif

        e = search_exception_tables(regs->ip);
        if (!e)
                return 0;

        type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
        reg  = FIELD_GET(EX_DATA_REG_MASK,  e->data);
        imm  = FIELD_GET(EX_DATA_IMM_MASK,  e->data);

        switch (type) {
        case EX_TYPE_DEFAULT:
        case EX_TYPE_DEFAULT_MCE_SAFE:
                return ex_handler_default(e, regs);
        case EX_TYPE_FAULT:
        case EX_TYPE_FAULT_MCE_SAFE:
                return ex_handler_fault(e, regs, trapnr);
        case EX_TYPE_UACCESS:
                return ex_handler_uaccess(e, regs, trapnr, fault_addr);
        case EX_TYPE_COPY:
                return ex_handler_copy(e, regs, trapnr);
        case EX_TYPE_CLEAR_FS:
                return ex_handler_clear_fs(e, regs);
        case EX_TYPE_FPU_RESTORE:
                return ex_handler_fprestore(e, regs);
        case EX_TYPE_BPF:
                return ex_handler_bpf(e, regs);
        case EX_TYPE_WRMSR:
                return ex_handler_msr(e, regs, true, false, reg);
        case EX_TYPE_RDMSR:
                return ex_handler_msr(e, regs, false, false, reg);
        case EX_TYPE_WRMSR_SAFE:
                return ex_handler_msr(e, regs, true, true, reg);
        case EX_TYPE_RDMSR_SAFE:
                return ex_handler_msr(e, regs, false, true, reg);
        case EX_TYPE_WRMSR_IN_MCE:
                ex_handler_msr_mce(regs, true);
                break;
        case EX_TYPE_RDMSR_IN_MCE:
                ex_handler_msr_mce(regs, false);
                break;
        case EX_TYPE_POP_REG:
                regs->sp += sizeof(long);
                fallthrough;
        case EX_TYPE_IMM_REG:
                return ex_handler_imm_reg(e, regs, reg, imm);
        case EX_TYPE_FAULT_SGX:
                return ex_handler_sgx(e, regs, trapnr);
        case EX_TYPE_UCOPY_LEN:
                return ex_handler_ucopy_len(e, regs, trapnr, fault_addr, reg, imm);
        case EX_TYPE_ZEROPAD:
                return ex_handler_zeropad(e, regs, fault_addr);
#ifdef CONFIG_X86_FRED
        case EX_TYPE_ERETU:
                return ex_handler_eretu(e, regs, error_code);
#endif
        }
        BUG();
}

extern unsigned int early_recursion_flag;

/* Restricted version used during very early boot */
void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
        /* Ignore early NMIs. */
        if (trapnr == X86_TRAP_NMI)
                return;

        if (early_recursion_flag > 2)
                goto halt_loop;

        /*
         * Old CPUs leave the high bits of CS on the stack
         * undefined.  I'm not sure which CPUs do this, but at least
         * the 486 DX works this way.
         * Xen pv domains are not using the default __KERNEL_CS.
         */
        if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
                goto fail;

        /*
         * The full exception fixup machinery is available as soon as
         * the early IDT is loaded.  This means that it is the
         * responsibility of extable users to either function correctly
         * when handlers are invoked early or to simply avoid causing
         * exceptions before they're ready to handle them.
         *
         * This is better than filtering which handlers can be used,
         * because refusing to call a handler here is guaranteed to
         * result in a hard-to-debug panic.
         *
         * Keep in mind that not all vectors actually get here.  Early
         * page faults, for example, are special.
         */
        if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
                return;

        if (trapnr == X86_TRAP_UD) {
                if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
                        /* Skip the ud2. */
                        regs->ip += LEN_UD2;
                        return;
                }

                /*
                 * If this was a BUG and report_bug returns or if this
                 * was just a normal #UD, we want to continue onward and
                 * crash.
                 */
        }

fail:
        early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
                     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
                     regs->orig_ax, read_cr2());

        show_regs(regs);

halt_loop:
        while (true)
                halt();
}

























































































































































































































































































































































































































































































































































































































































































































































































    6 




    6 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   78 
   78 










   77 





   78 



   78 


   78 








































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
// SPDX-License-Identifier: GPL-2.0+
/*****************************************************************************/

/*
 *      devio.c  --  User space communication with USB devices.
 *
 *      Copyright (C) 1999-2000  Thomas Sailer (sailer@ife.ee.ethz.ch)
 *
 *  This file implements the usbfs/x/y files, where
 *  x is the bus number and y the device number.
 *
 *  It allows user space programs/"drivers" to communicate directly
 *  with USB devices without intervening kernel driver.
 *
 *  Revision history
 *    22.12.1999   0.1   Initial release (split from proc_usb.c)
 *    04.01.2000   0.2   Turned into its own filesystem
 *    30.09.2005   0.3   Fix user-triggerable oops in async URB delivery
 *                             (CAN-2005-3055)
 */

/*****************************************************************************/

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/signal.h>
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/usb.h>
#include <linux/usbdevice_fs.h>
#include <linux/usb/hcd.h>        /* for usbcore internals */
#include <linux/usb/quirks.h>
#include <linux/cdev.h>
#include <linux/notifier.h>
#include <linux/security.h>
#include <linux/user_namespace.h>
#include <linux/scatterlist.h>
#include <linux/uaccess.h>
#include <linux/dma-mapping.h>
#include <asm/byteorder.h>
#include <linux/moduleparam.h>

#include "usb.h"

#ifdef CONFIG_PM
#define MAYBE_CAP_SUSPEND        USBDEVFS_CAP_SUSPEND
#else
#define MAYBE_CAP_SUSPEND        0
#endif

#define USB_MAXBUS                        64
#define USB_DEVICE_MAX                        (USB_MAXBUS * 128)
#define USB_SG_SIZE                        16384 /* split-size for large txs */

/* Mutual exclusion for ps->list in resume vs. release and remove */
static DEFINE_MUTEX(usbfs_mutex);

struct usb_dev_state {
        struct list_head list;      /* state list */
        struct usb_device *dev;
        struct file *file;
        spinlock_t lock;            /* protects the async urb lists */
        struct list_head async_pending;
        struct list_head async_completed;
        struct list_head memory_list;
        wait_queue_head_t wait;     /* wake up if a request completed */
        wait_queue_head_t wait_for_resume;   /* wake up upon runtime resume */
        unsigned int discsignr;
        struct pid *disc_pid;
        const struct cred *cred;
        sigval_t disccontext;
        unsigned long ifclaimed;
        u32 disabled_bulk_eps;
        unsigned long interface_allowed_mask;
        int not_yet_resumed;
        bool suspend_allowed;
        bool privileges_dropped;
};

struct usb_memory {
        struct list_head memlist;
        int vma_use_count;
        int urb_use_count;
        u32 size;
        void *mem;
        dma_addr_t dma_handle;
        unsigned long vm_start;
        struct usb_dev_state *ps;
};

struct async {
        struct list_head asynclist;
        struct usb_dev_state *ps;
        struct pid *pid;
        const struct cred *cred;
        unsigned int signr;
        unsigned int ifnum;
        void __user *userbuffer;
        void __user *userurb;
        sigval_t userurb_sigval;
        struct urb *urb;
        struct usb_memory *usbm;
        unsigned int mem_usage;
        int status;
        u8 bulk_addr;
        u8 bulk_status;
};

static bool usbfs_snoop;
module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic");

static unsigned usbfs_snoop_max = 65536;
module_param(usbfs_snoop_max, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(usbfs_snoop_max,
                "maximum number of bytes to print while snooping");

#define snoop(dev, format, arg...)                                \
        do {                                                        \
                if (usbfs_snoop)                                \
                        dev_info(dev, format, ## arg);                \
        } while (0)

enum snoop_when {
        SUBMIT, COMPLETE
};

#define USB_DEVICE_DEV                MKDEV(USB_DEVICE_MAJOR, 0)

/* Limit on the total amount of memory we can allocate for transfers */
static u32 usbfs_memory_mb = 16;
module_param(usbfs_memory_mb, uint, 0644);
MODULE_PARM_DESC(usbfs_memory_mb,
                "maximum MB allowed for usbfs buffers (0 = no limit)");

/* Hard limit, necessary to avoid arithmetic overflow */
#define USBFS_XFER_MAX         (UINT_MAX / 2 - 1000000)

static DEFINE_SPINLOCK(usbfs_memory_usage_lock);
static u64 usbfs_memory_usage;        /* Total memory currently allocated */

/* Check whether it's okay to allocate more memory for a transfer */
static int usbfs_increase_memory_usage(u64 amount)
{
        u64 lim, total_mem;
        unsigned long flags;
        int ret;

        lim = READ_ONCE(usbfs_memory_mb);
        lim <<= 20;

        ret = 0;
        spin_lock_irqsave(&usbfs_memory_usage_lock, flags);
        total_mem = usbfs_memory_usage + amount;
        if (lim > 0 && total_mem > lim)
                ret = -ENOMEM;
        else
                usbfs_memory_usage = total_mem;
        spin_unlock_irqrestore(&usbfs_memory_usage_lock, flags);

        return ret;
}

/* Memory for a transfer is being deallocated */
static void usbfs_decrease_memory_usage(u64 amount)
{
        unsigned long flags;

        spin_lock_irqsave(&usbfs_memory_usage_lock, flags);
        if (amount > usbfs_memory_usage)
                usbfs_memory_usage = 0;
        else
                usbfs_memory_usage -= amount;
        spin_unlock_irqrestore(&usbfs_memory_usage_lock, flags);
}

static int connected(struct usb_dev_state *ps)
{
        return (!list_empty(&ps->list) &&
                        ps->dev->state != USB_STATE_NOTATTACHED);
}

static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
{
        struct usb_dev_state *ps = usbm->ps;
        struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
        unsigned long flags;

        spin_lock_irqsave(&ps->lock, flags);
        --*count;
        if (usbm->urb_use_count == 0 && usbm->vma_use_count == 0) {
                list_del(&usbm->memlist);
                spin_unlock_irqrestore(&ps->lock, flags);

                hcd_buffer_free_pages(hcd, usbm->size,
                                usbm->mem, usbm->dma_handle);
                usbfs_decrease_memory_usage(
                        usbm->size + sizeof(struct usb_memory));
                kfree(usbm);
        } else {
                spin_unlock_irqrestore(&ps->lock, flags);
        }
}

static void usbdev_vm_open(struct vm_area_struct *vma)
{
        struct usb_memory *usbm = vma->vm_private_data;
        unsigned long flags;

        spin_lock_irqsave(&usbm->ps->lock, flags);
        ++usbm->vma_use_count;
        spin_unlock_irqrestore(&usbm->ps->lock, flags);
}

static void usbdev_vm_close(struct vm_area_struct *vma)
{
        struct usb_memory *usbm = vma->vm_private_data;

        dec_usb_memory_use_count(usbm, &usbm->vma_use_count);
}

static const struct vm_operations_struct usbdev_vm_ops = {
        .open = usbdev_vm_open,
        .close = usbdev_vm_close
};

static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
{
        struct usb_memory *usbm = NULL;
        struct usb_dev_state *ps = file->private_data;
        struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
        size_t size = vma->vm_end - vma->vm_start;
        void *mem;
        unsigned long flags;
        dma_addr_t dma_handle = DMA_MAPPING_ERROR;
        int ret;

        ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory));
        if (ret)
                goto error;

        usbm = kzalloc(sizeof(struct usb_memory), GFP_KERNEL);
        if (!usbm) {
                ret = -ENOMEM;
                goto error_decrease_mem;
        }

        mem = hcd_buffer_alloc_pages(hcd,
                        size, GFP_USER | __GFP_NOWARN, &dma_handle);
        if (!mem) {
                ret = -ENOMEM;
                goto error_free_usbm;
        }

        memset(mem, 0, size);

        usbm->mem = mem;
        usbm->dma_handle = dma_handle;
        usbm->size = size;
        usbm->ps = ps;
        usbm->vm_start = vma->vm_start;
        usbm->vma_use_count = 1;
        INIT_LIST_HEAD(&usbm->memlist);

        /*
         * In DMA-unavailable cases, hcd_buffer_alloc_pages allocates
         * normal pages and assigns DMA_MAPPING_ERROR to dma_handle. Check
         * whether we are in such cases, and then use remap_pfn_range (or
         * dma_mmap_coherent) to map normal (or DMA) pages into the user
         * space, respectively.
         */
        if (dma_handle == DMA_MAPPING_ERROR) {
                if (remap_pfn_range(vma, vma->vm_start,
                                    virt_to_phys(usbm->mem) >> PAGE_SHIFT,
                                    size, vma->vm_page_prot) < 0) {
                        dec_usb_memory_use_count(usbm, &usbm->vma_use_count);
                        return -EAGAIN;
                }
        } else {
                if (dma_mmap_coherent(hcd->self.sysdev, vma, mem, dma_handle,
                                      size)) {
                        dec_usb_memory_use_count(usbm, &usbm->vma_use_count);
                        return -EAGAIN;
                }
        }

        vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
        vma->vm_ops = &usbdev_vm_ops;
        vma->vm_private_data = usbm;

        spin_lock_irqsave(&ps->lock, flags);
        list_add_tail(&usbm->memlist, &ps->memory_list);
        spin_unlock_irqrestore(&ps->lock, flags);

        return 0;

error_free_usbm:
        kfree(usbm);
error_decrease_mem:
        usbfs_decrease_memory_usage(size + sizeof(struct usb_memory));
error:
        return ret;
}

static ssize_t usbdev_read(struct file *file, char __user *buf, size_t nbytes,
                           loff_t *ppos)
{
        struct usb_dev_state *ps = file->private_data;
        struct usb_device *dev = ps->dev;
        ssize_t ret = 0;
        unsigned len;
        loff_t pos;
        int i;

        pos = *ppos;
        usb_lock_device(dev);
        if (!connected(ps)) {
                ret = -ENODEV;
                goto err;
        } else if (pos < 0) {
                ret = -EINVAL;
                goto err;
        }

        if (pos < sizeof(struct usb_device_descriptor)) {
                /* 18 bytes - fits on the stack */
                struct usb_device_descriptor temp_desc;

                memcpy(&temp_desc, &dev->descriptor, sizeof(dev->descriptor));
                le16_to_cpus(&temp_desc.bcdUSB);
                le16_to_cpus(&temp_desc.idVendor);
                le16_to_cpus(&temp_desc.idProduct);
                le16_to_cpus(&temp_desc.bcdDevice);

                len = sizeof(struct usb_device_descriptor) - pos;
                if (len > nbytes)
                        len = nbytes;
                if (copy_to_user(buf, ((char *)&temp_desc) + pos, len)) {
                        ret = -EFAULT;
                        goto err;
                }

                *ppos += len;
                buf += len;
                nbytes -= len;
                ret += len;
        }

        pos = sizeof(struct usb_device_descriptor);
        for (i = 0; nbytes && i < dev->descriptor.bNumConfigurations; i++) {
                struct usb_config_descriptor *config =
                        (struct usb_config_descriptor *)dev->rawdescriptors[i];
                unsigned int length = le16_to_cpu(config->wTotalLength);

                if (*ppos < pos + length) {

                        /* The descriptor may claim to be longer than it
                         * really is.  Here is the actual allocated length. */
                        unsigned alloclen =
                                le16_to_cpu(dev->config[i].desc.wTotalLength);

                        len = length - (*ppos - pos);
                        if (len > nbytes)
                                len = nbytes;

                        /* Simply don't write (skip over) unallocated parts */
                        if (alloclen > (*ppos - pos)) {
                                alloclen -= (*ppos - pos);
                                if (copy_to_user(buf,
                                    dev->rawdescriptors[i] + (*ppos - pos),
                                    min(len, alloclen))) {
                                        ret = -EFAULT;
                                        goto err;
                                }
                        }

                        *ppos += len;
                        buf += len;
                        nbytes -= len;
                        ret += len;
                }

                pos += length;
        }

err:
        usb_unlock_device(dev);
        return ret;
}

/*
 * async list handling
 */

static struct async *alloc_async(unsigned int numisoframes)
{
        struct async *as;

        as = kzalloc(sizeof(struct async), GFP_KERNEL);
        if (!as)
                return NULL;
        as->urb = usb_alloc_urb(numisoframes, GFP_KERNEL);
        if (!as->urb) {
                kfree(as);
                return NULL;
        }
        return as;
}

static void free_async(struct async *as)
{
        int i;

        put_pid(as->pid);
        if (as->cred)
                put_cred(as->cred);
        for (i = 0; i < as->urb->num_sgs; i++) {
                if (sg_page(&as->urb->sg[i]))
                        kfree(sg_virt(&as->urb->sg[i]));
        }

        kfree(as->urb->sg);
        if (as->usbm == NULL)
                kfree(as->urb->transfer_buffer);
        else
                dec_usb_memory_use_count(as->usbm, &as->usbm->urb_use_count);

        kfree(as->urb->setup_packet);
        usb_free_urb(as->urb);
        usbfs_decrease_memory_usage(as->mem_usage);
        kfree(as);
}

static void async_newpending(struct async *as)
{
        struct usb_dev_state *ps = as->ps;
        unsigned long flags;

        spin_lock_irqsave(&ps->lock, flags);
        list_add_tail(&as->asynclist, &ps->async_pending);
        spin_unlock_irqrestore(&ps->lock, flags);
}

static void async_removepending(struct async *as)
{
        struct usb_dev_state *ps = as->ps;
        unsigned long flags;

        spin_lock_irqsave(&ps->lock, flags);
        list_del_init(&as->asynclist);
        spin_unlock_irqrestore(&ps->lock, flags);
}

static struct async *async_getcompleted(struct usb_dev_state *ps)
{
        unsigned long flags;
        struct async *as = NULL;

        spin_lock_irqsave(&ps->lock, flags);
        if (!list_empty(&ps->async_completed)) {
                as = list_entry(ps->async_completed.next, struct async,
                                asynclist);
                list_del_init(&as->asynclist);
        }
        spin_unlock_irqrestore(&ps->lock, flags);
        return as;
}

static struct async *async_getpending(struct usb_dev_state *ps,
                                             void __user *userurb)
{
        struct async *as;

        list_for_each_entry(as, &ps->async_pending, asynclist)
                if (as->userurb == userurb) {
                        list_del_init(&as->asynclist);
                        return as;
                }

        return NULL;
}

static void snoop_urb(struct usb_device *udev,
                void __user *userurb, int pipe, unsigned length,
                int timeout_or_status, enum snoop_when when,
                unsigned char *data, unsigned data_len)
{
        static const char *types[] = {"isoc", "int", "ctrl", "bulk"};
        static const char *dirs[] = {"out", "in"};
        int ep;
        const char *t, *d;

        if (!usbfs_snoop)
                return;

        ep = usb_pipeendpoint(pipe);
        t = types[usb_pipetype(pipe)];
        d = dirs[!!usb_pipein(pipe)];

        if (userurb) {                /* Async */
                if (when == SUBMIT)
                        dev_info(&udev->dev, "userurb %px, ep%d %s-%s, "
                                        "length %u\n",
                                        userurb, ep, t, d, length);
                else
                        dev_info(&udev->dev, "userurb %px, ep%d %s-%s, "
                                        "actual_length %u status %d\n",
                                        userurb, ep, t, d, length,
                                        timeout_or_status);
        } else {
                if (when == SUBMIT)
                        dev_info(&udev->dev, "ep%d %s-%s, length %u, "
                                        "timeout %d\n",
                                        ep, t, d, length, timeout_or_status);
                else
                        dev_info(&udev->dev, "ep%d %s-%s, actual_length %u, "
                                        "status %d\n",
                                        ep, t, d, length, timeout_or_status);
        }

        data_len = min(data_len, usbfs_snoop_max);
        if (data && data_len > 0) {
                print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, 32, 1,
                        data, data_len, 1);
        }
}

static void snoop_urb_data(struct urb *urb, unsigned len)
{
        int i, size;

        len = min(len, usbfs_snoop_max);
        if (!usbfs_snoop || len == 0)
                return;

        if (urb->num_sgs == 0) {
                print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, 32, 1,
                        urb->transfer_buffer, len, 1);
                return;
        }

        for (i = 0; i < urb->num_sgs && len; i++) {
                size = (len > USB_SG_SIZE) ? USB_SG_SIZE : len;
                print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, 32, 1,
                        sg_virt(&urb->sg[i]), size, 1);
                len -= size;
        }
}

static int copy_urb_data_to_user(u8 __user *userbuffer, struct urb *urb)
{
        unsigned i, len, size;

        if (urb->number_of_packets > 0)                /* Isochronous */
                len = urb->transfer_buffer_length;
        else                                        /* Non-Isoc */
                len = urb->actual_length;

        if (urb->num_sgs == 0) {
                if (copy_to_user(userbuffer, urb->transfer_buffer, len))
                        return -EFAULT;
                return 0;
        }

        for (i = 0; i < urb->num_sgs && len; i++) {
                size = (len > USB_SG_SIZE) ? USB_SG_SIZE : len;
                if (copy_to_user(userbuffer, sg_virt(&urb->sg[i]), size))
                        return -EFAULT;
                userbuffer += size;
                len -= size;
        }

        return 0;
}

#define AS_CONTINUATION        1
#define AS_UNLINK        2

static void cancel_bulk_urbs(struct usb_dev_state *ps, unsigned bulk_addr)
__releases(ps->lock)
__acquires(ps->lock)
{
        struct urb *urb;
        struct async *as;

        /* Mark all the pending URBs that match bulk_addr, up to but not
         * including the first one without AS_CONTINUATION.  If such an
         * URB is encountered then a new transfer has already started so
         * the endpoint doesn't need to be disabled; otherwise it does.
         */
        list_for_each_entry(as, &ps->async_pending, asynclist) {
                if (as->bulk_addr == bulk_addr) {
                        if (as->bulk_status != AS_CONTINUATION)
                                goto rescan;
                        as->bulk_status = AS_UNLINK;
                        as->bulk_addr = 0;
                }
        }
        ps->disabled_bulk_eps |= (1 << bulk_addr);

        /* Now carefully unlink all the marked pending URBs */
 rescan:
        list_for_each_entry_reverse(as, &ps->async_pending, asynclist) {
                if (as->bulk_status == AS_UNLINK) {
                        as->bulk_status = 0;                /* Only once */
                        urb = as->urb;
                        usb_get_urb(urb);
                        spin_unlock(&ps->lock);                /* Allow completions */
                        usb_unlink_urb(urb);
                        usb_put_urb(urb);
                        spin_lock(&ps->lock);
                        goto rescan;
                }
        }
}

static void async_completed(struct urb *urb)
{
        struct async *as = urb->context;
        struct usb_dev_state *ps = as->ps;
        struct pid *pid = NULL;
        const struct cred *cred = NULL;
        unsigned long flags;
        sigval_t addr;
        int signr, errno;

        spin_lock_irqsave(&ps->lock, flags);
        list_move_tail(&as->asynclist, &ps->async_completed);
        as->status = urb->status;
        signr = as->signr;
        if (signr) {
                errno = as->status;
                addr = as->userurb_sigval;
                pid = get_pid(as->pid);
                cred = get_cred(as->cred);
        }
        snoop(&urb->dev->dev, "urb complete\n");
        snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
                        as->status, COMPLETE, NULL, 0);
        if (usb_urb_dir_in(urb))
                snoop_urb_data(urb, urb->actual_length);

        if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET &&
                        as->status != -ENOENT)
                cancel_bulk_urbs(ps, as->bulk_addr);

        wake_up(&ps->wait);
        spin_unlock_irqrestore(&ps->lock, flags);

        if (signr) {
                kill_pid_usb_asyncio(signr, errno, addr, pid, cred);
                put_pid(pid);
                put_cred(cred);
        }
}

static void destroy_async(struct usb_dev_state *ps, struct list_head *list)
{
        struct urb *urb;
        struct async *as;
        unsigned long flags;

        spin_lock_irqsave(&ps->lock, flags);
        while (!list_empty(list)) {
                as = list_last_entry(list, struct async, asynclist);
                list_del_init(&as->asynclist);
                urb = as->urb;
                usb_get_urb(urb);

                /* drop the spinlock so the completion handler can run */
                spin_unlock_irqrestore(&ps->lock, flags);
                usb_kill_urb(urb);
                usb_put_urb(urb);
                spin_lock_irqsave(&ps->lock, flags);
        }
        spin_unlock_irqrestore(&ps->lock, flags);
}

static void destroy_async_on_interface(struct usb_dev_state *ps,
                                       unsigned int ifnum)
{
        struct list_head *p, *q, hitlist;
        unsigned long flags;

        INIT_LIST_HEAD(&hitlist);
        spin_lock_irqsave(&ps->lock, flags);
        list_for_each_safe(p, q, &ps->async_pending)
                if (ifnum == list_entry(p, struct async, asynclist)->ifnum)
                        list_move_tail(p, &hitlist);
        spin_unlock_irqrestore(&ps->lock, flags);
        destroy_async(ps, &hitlist);
}

static void destroy_all_async(struct usb_dev_state *ps)
{
        destroy_async(ps, &ps->async_pending);
}

/*
 * interface claims are made only at the request of user level code,
 * which can also release them (explicitly or by closing files).
 * they're also undone when devices disconnect.
 */

static int driver_probe(struct usb_interface *intf,
                        const struct usb_device_id *id)
{
        return -ENODEV;
}

static void driver_disconnect(struct usb_interface *intf)
{
        struct usb_dev_state *ps = usb_get_intfdata(intf);
        unsigned int ifnum = intf->altsetting->desc.bInterfaceNumber;

        if (!ps)
                return;

        /* NOTE:  this relies on usbcore having canceled and completed
         * all pending I/O requests; 2.6 does that.
         */

        if (likely(ifnum < 8*sizeof(ps->ifclaimed)))
                clear_bit(ifnum, &ps->ifclaimed);
        else
                dev_warn(&intf->dev, "interface number %u out of range\n",
                         ifnum);

        usb_set_intfdata(intf, NULL);

        /* force async requests to complete */
        destroy_async_on_interface(ps, ifnum);
}

/* We don't care about suspend/resume of claimed interfaces */
static int driver_suspend(struct usb_interface *intf, pm_message_t msg)
{
        return 0;
}

static int driver_resume(struct usb_interface *intf)
{
        return 0;
}

#ifdef CONFIG_PM
/* The following routines apply to the entire device, not interfaces */
void usbfs_notify_suspend(struct usb_device *udev)
{
        /* We don't need to handle this */
}

void usbfs_notify_resume(struct usb_device *udev)
{
        struct usb_dev_state *ps;

        /* Protect against simultaneous remove or release */
        mutex_lock(&usbfs_mutex);
        list_for_each_entry(ps, &udev->filelist, list) {
                WRITE_ONCE(ps->not_yet_resumed, 0);
                wake_up_all(&ps->wait_for_resume);
        }
        mutex_unlock(&usbfs_mutex);
}
#endif

struct usb_driver usbfs_driver = {
        .name =                "usbfs",
        .probe =        driver_probe,
        .disconnect =        driver_disconnect,
        .suspend =        driver_suspend,
        .resume =        driver_resume,
        .supports_autosuspend = 1,
};

static int claimintf(struct usb_dev_state *ps, unsigned int ifnum)
{
        struct usb_device *dev = ps->dev;
        struct usb_interface *intf;
        int err;

        if (ifnum >= 8*sizeof(ps->ifclaimed))
                return -EINVAL;
        /* already claimed */
        if (test_bit(ifnum, &ps->ifclaimed))
                return 0;

        if (ps->privileges_dropped &&
                        !test_bit(ifnum, &ps->interface_allowed_mask))
                return -EACCES;

        intf = usb_ifnum_to_if(dev, ifnum);
        if (!intf)
                err = -ENOENT;
        else {
                unsigned int old_suppress;

                /* suppress uevents while claiming interface */
                old_suppress = dev_get_uevent_suppress(&intf->dev);
                dev_set_uevent_suppress(&intf->dev, 1);
                err = usb_driver_claim_interface(&usbfs_driver, intf, ps);
                dev_set_uevent_suppress(&intf->dev, old_suppress);
        }
        if (err == 0)
                set_bit(ifnum, &ps->ifclaimed);
        return err;
}

static int releaseintf(struct usb_dev_state *ps, unsigned int ifnum)
{
        struct usb_device *dev;
        struct usb_interface *intf;
        int err;

        err = -EINVAL;
        if (ifnum >= 8*sizeof(ps->ifclaimed))
                return err;
        dev = ps->dev;
        intf = usb_ifnum_to_if(dev, ifnum);
        if (!intf)
                err = -ENOENT;
        else if (test_and_clear_bit(ifnum, &ps->ifclaimed)) {
                unsigned int old_suppress;

                /* suppress uevents while releasing interface */
                old_suppress = dev_get_uevent_suppress(&intf->dev);
                dev_set_uevent_suppress(&intf->dev, 1);
                usb_driver_release_interface(&usbfs_driver, intf);
                dev_set_uevent_suppress(&intf->dev, old_suppress);
                err = 0;
        }
        return err;
}

static int checkintf(struct usb_dev_state *ps, unsigned int ifnum)
{
        if (ps->dev->state != USB_STATE_CONFIGURED)
                return -EHOSTUNREACH;
        if (ifnum >= 8*sizeof(ps->ifclaimed))
                return -EINVAL;
        if (test_bit(ifnum, &ps->ifclaimed))
                return 0;
        /* if not yet claimed, claim it for the driver */
        dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim "
                 "interface %u before use\n", task_pid_nr(current),
                 current->comm, ifnum);
        return claimintf(ps, ifnum);
}

static int findintfep(struct usb_device *dev, unsigned int ep)
{
        unsigned int i, j, e;
        struct usb_interface *intf;
        struct usb_host_interface *alts;
        struct usb_endpoint_descriptor *endpt;

        if (ep & ~(USB_DIR_IN|0xf))
                return -EINVAL;
        if (!dev->actconfig)
                return -ESRCH;
        for (i = 0; i < dev->actconfig->desc.bNumInterfaces; i++) {
                intf = dev->actconfig->interface[i];
                for (j = 0; j < intf->num_altsetting; j++) {
                        alts = &intf->altsetting[j];
                        for (e = 0; e < alts->desc.bNumEndpoints; e++) {
                                endpt = &alts->endpoint[e].desc;
                                if (endpt->bEndpointAddress == ep)
                                        return alts->desc.bInterfaceNumber;
                        }
                }
        }
        return -ENOENT;
}

static int check_ctrlrecip(struct usb_dev_state *ps, unsigned int requesttype,
                           unsigned int request, unsigned int index)
{
        int ret = 0;
        struct usb_host_interface *alt_setting;

        if (ps->dev->state != USB_STATE_UNAUTHENTICATED
         && ps->dev->state != USB_STATE_ADDRESS
         && ps->dev->state != USB_STATE_CONFIGURED)
                return -EHOSTUNREACH;
        if (USB_TYPE_VENDOR == (USB_TYPE_MASK & requesttype))
                return 0;

        /*
         * check for the special corner case 'get_device_id' in the printer
         * class specification, which we always want to allow as it is used
         * to query things like ink level, etc.
         */
        if (requesttype == 0xa1 && request == 0) {
                alt_setting = usb_find_alt_setting(ps->dev->actconfig,
                                                   index >> 8, index & 0xff);
                if (alt_setting
                 && alt_setting->desc.bInterfaceClass == USB_CLASS_PRINTER)
                        return 0;
        }

        index &= 0xff;
        switch (requesttype & USB_RECIP_MASK) {
        case USB_RECIP_ENDPOINT:
                if ((index & ~USB_DIR_IN) == 0)
                        return 0;
                ret = findintfep(ps->dev, index);
                if (ret < 0) {
                        /*
                         * Some not fully compliant Win apps seem to get
                         * index wrong and have the endpoint number here
                         * rather than the endpoint address (with the
                         * correct direction). Win does let this through,
                         * so we'll not reject it here but leave it to
                         * the device to not break KVM. But we warn.
                         */
                        ret = findintfep(ps->dev, index ^ 0x80);
                        if (ret >= 0)
                                dev_info(&ps->dev->dev,
                                        "%s: process %i (%s) requesting ep %02x but needs %02x\n",
                                        __func__, task_pid_nr(current),
                                        current->comm, index, index ^ 0x80);
                }
                if (ret >= 0)
                        ret = checkintf(ps, ret);
                break;

        case USB_RECIP_INTERFACE:
                ret = checkintf(ps, index);
                break;
        }
        return ret;
}

static struct usb_host_endpoint *ep_to_host_endpoint(struct usb_device *dev,
                                                     unsigned char ep)
{
        if (ep & USB_ENDPOINT_DIR_MASK)
                return dev->ep_in[ep & USB_ENDPOINT_NUMBER_MASK];
        else
                return dev->ep_out[ep & USB_ENDPOINT_NUMBER_MASK];
}

static int parse_usbdevfs_streams(struct usb_dev_state *ps,
                                  struct usbdevfs_streams __user *streams,
                                  unsigned int *num_streams_ret,
                                  unsigned int *num_eps_ret,
                                  struct usb_host_endpoint ***eps_ret,
                                  struct usb_interface **intf_ret)
{
        unsigned int i, num_streams, num_eps;
        struct usb_host_endpoint **eps;
        struct usb_interface *intf = NULL;
        unsigned char ep;
        int ifnum, ret;

        if (get_user(num_streams, &streams->num_streams) ||
            get_user(num_eps, &streams->num_eps))
                return -EFAULT;

        if (num_eps < 1 || num_eps > USB_MAXENDPOINTS)
                return -EINVAL;

        /* The XHCI controller allows max 2 ^ 16 streams */
        if (num_streams_ret && (num_streams < 2 || num_streams > 65536))
                return -EINVAL;

        eps = kmalloc_array(num_eps, sizeof(*eps), GFP_KERNEL);
        if (!eps)
                return -ENOMEM;

        for (i = 0; i < num_eps; i++) {
                if (get_user(ep, &streams->eps[i])) {
                        ret = -EFAULT;
                        goto error;
                }
                eps[i] = ep_to_host_endpoint(ps->dev, ep);
                if (!eps[i]) {
                        ret = -EINVAL;
                        goto error;
                }

                /* usb_alloc/free_streams operate on an usb_interface */
                ifnum = findintfep(ps->dev, ep);
                if (ifnum < 0) {
                        ret = ifnum;
                        goto error;
                }

                if (i == 0) {
                        ret = checkintf(ps, ifnum);
                        if (ret < 0)
                                goto error;
                        intf = usb_ifnum_to_if(ps->dev, ifnum);
                } else {
                        /* Verify all eps belong to the same interface */
                        if (ifnum != intf->altsetting->desc.bInterfaceNumber) {
                                ret = -EINVAL;
                                goto error;
                        }
                }
        }

        if (num_streams_ret)
                *num_streams_ret = num_streams;
        *num_eps_ret = num_eps;
        *eps_ret = eps;
        *intf_ret = intf;

        return 0;

error:
        kfree(eps);
        return ret;
}

static struct usb_device *usbdev_lookup_by_devt(dev_t devt)
{
        struct device *dev;

        dev = bus_find_device_by_devt(&usb_bus_type, devt);
        if (!dev)
                return NULL;
        return to_usb_device(dev);
}

/*
 * file operations
 */
static int usbdev_open(struct inode *inode, struct file *file)
{
        struct usb_device *dev = NULL;
        struct usb_dev_state *ps;
        int ret;

        ret = -ENOMEM;
        ps = kzalloc(sizeof(struct usb_dev_state), GFP_KERNEL);
        if (!ps)
                goto out_free_ps;

        ret = -ENODEV;

        /* usbdev device-node */
        if (imajor(inode) == USB_DEVICE_MAJOR)
                dev = usbdev_lookup_by_devt(inode->i_rdev);
        if (!dev)
                goto out_free_ps;

        usb_lock_device(dev);
        if (dev->state == USB_STATE_NOTATTACHED)
                goto out_unlock_device;

        ret = usb_autoresume_device(dev);
        if (ret)
                goto out_unlock_device;

        ps->dev = dev;
        ps->file = file;
        ps->interface_allowed_mask = 0xFFFFFFFF; /* 32 bits */
        spin_lock_init(&ps->lock);
        INIT_LIST_HEAD(&ps->list);
        INIT_LIST_HEAD(&ps->async_pending);
        INIT_LIST_HEAD(&ps->async_completed);
        INIT_LIST_HEAD(&ps->memory_list);
        init_waitqueue_head(&ps->wait);
        init_waitqueue_head(&ps->wait_for_resume);
        ps->disc_pid = get_pid(task_pid(current));
        ps->cred = get_current_cred();
        smp_wmb();

        /* Can't race with resume; the device is already active */
        list_add_tail(&ps->list, &dev->filelist);
        file->private_data = ps;
        usb_unlock_device(dev);
        snoop(&dev->dev, "opened by process %d: %s\n", task_pid_nr(current),
                        current->comm);
        return ret;

 out_unlock_device:
        usb_unlock_device(dev);
        usb_put_dev(dev);
 out_free_ps:
        kfree(ps);
        return ret;
}

static int usbdev_release(struct inode *inode, struct file *file)
{
        struct usb_dev_state *ps = file->private_data;
        struct usb_device *dev = ps->dev;
        unsigned int ifnum;
        struct async *as;

        usb_lock_device(dev);
        usb_hub_release_all_ports(dev, ps);

        /* Protect against simultaneous resume */
        mutex_lock(&usbfs_mutex);
        list_del_init(&ps->list);
        mutex_unlock(&usbfs_mutex);

        for (ifnum = 0; ps->ifclaimed && ifnum < 8*sizeof(ps->ifclaimed);
                        ifnum++) {
                if (test_bit(ifnum, &ps->ifclaimed))
                        releaseintf(ps, ifnum);
        }
        destroy_all_async(ps);
        if (!ps->suspend_allowed)
                usb_autosuspend_device(dev);
        usb_unlock_device(dev);
        usb_put_dev(dev);
        put_pid(ps->disc_pid);
        put_cred(ps->cred);

        as = async_getcompleted(ps);
        while (as) {
                free_async(as);
                as = async_getcompleted(ps);
        }

        kfree(ps);
        return 0;
}

static void usbfs_blocking_completion(struct urb *urb)
{
        complete((struct completion *) urb->context);
}

/*
 * Much like usb_start_wait_urb, but returns status separately from
 * actual_length and uses a killable wait.
 */
static int usbfs_start_wait_urb(struct urb *urb, int timeout,
                unsigned int *actlen)
{
        DECLARE_COMPLETION_ONSTACK(ctx);
        unsigned long expire;
        int rc;

        urb->context = &ctx;
        urb->complete = usbfs_blocking_completion;
        *actlen = 0;
        rc = usb_submit_urb(urb, GFP_KERNEL);
        if (unlikely(rc))
                return rc;

        expire = (timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT);
        rc = wait_for_completion_killable_timeout(&ctx, expire);
        if (rc <= 0) {
                usb_kill_urb(urb);
                *actlen = urb->actual_length;
                if (urb->status != -ENOENT)
                        ;        /* Completed before it was killed */
                else if (rc < 0)
                        return -EINTR;
                else
                        return -ETIMEDOUT;
        }
        *actlen = urb->actual_length;
        return urb->status;
}

static int do_proc_control(struct usb_dev_state *ps,
                struct usbdevfs_ctrltransfer *ctrl)
{
        struct usb_device *dev = ps->dev;
        unsigned int tmo;
        unsigned char *tbuf;
        unsigned int wLength, actlen;
        int i, pipe, ret;
        struct urb *urb = NULL;
        struct usb_ctrlrequest *dr = NULL;

        ret = check_ctrlrecip(ps, ctrl->bRequestType, ctrl->bRequest,
                              ctrl->wIndex);
        if (ret)
                return ret;
        wLength = ctrl->wLength;        /* To suppress 64k PAGE_SIZE warning */
        if (wLength > PAGE_SIZE)
                return -EINVAL;
        ret = usbfs_increase_memory_usage(PAGE_SIZE + sizeof(struct urb) +
                        sizeof(struct usb_ctrlrequest));
        if (ret)
                return ret;

        ret = -ENOMEM;
        tbuf = (unsigned char *)__get_free_page(GFP_KERNEL);
        if (!tbuf)
                goto done;
        urb = usb_alloc_urb(0, GFP_NOIO);
        if (!urb)
                goto done;
        dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_NOIO);
        if (!dr)
                goto done;

        dr->bRequestType = ctrl->bRequestType;
        dr->bRequest = ctrl->bRequest;
        dr->wValue = cpu_to_le16(ctrl->wValue);
        dr->wIndex = cpu_to_le16(ctrl->wIndex);
        dr->wLength = cpu_to_le16(ctrl->wLength);

        tmo = ctrl->timeout;
        snoop(&dev->dev, "control urb: bRequestType=%02x "
                "bRequest=%02x wValue=%04x "
                "wIndex=%04x wLength=%04x\n",
                ctrl->bRequestType, ctrl->bRequest, ctrl->wValue,
                ctrl->wIndex, ctrl->wLength);

        if ((ctrl->bRequestType & USB_DIR_IN) && wLength) {
                pipe = usb_rcvctrlpipe(dev, 0);
                usb_fill_control_urb(urb, dev, pipe, (unsigned char *) dr, tbuf,
                                wLength, NULL, NULL);
                snoop_urb(dev, NULL, pipe, wLength, tmo, SUBMIT, NULL, 0);

                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &actlen);

                /* Linger a bit, prior to the next control message. */
                if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
                        msleep(200);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen);
                if (!i && actlen) {
                        if (copy_to_user(ctrl->data, tbuf, actlen)) {
                                ret = -EFAULT;
                                goto done;
                        }
                }
        } else {
                if (wLength) {
                        if (copy_from_user(tbuf, ctrl->data, wLength)) {
                                ret = -EFAULT;
                                goto done;
                        }
                }
                pipe = usb_sndctrlpipe(dev, 0);
                usb_fill_control_urb(urb, dev, pipe, (unsigned char *) dr, tbuf,
                                wLength, NULL, NULL);
                snoop_urb(dev, NULL, pipe, wLength, tmo, SUBMIT, tbuf, wLength);

                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &actlen);

                /* Linger a bit, prior to the next control message. */
                if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
                        msleep(200);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0);
        }
        if (i < 0 && i != -EPIPE) {
                dev_printk(KERN_DEBUG, &dev->dev, "usbfs: USBDEVFS_CONTROL "
                           "failed cmd %s rqt %u rq %u len %u ret %d\n",
                           current->comm, ctrl->bRequestType, ctrl->bRequest,
                           ctrl->wLength, i);
        }
        ret = (i < 0 ? i : actlen);

 done:
        kfree(dr);
        usb_free_urb(urb);
        free_page((unsigned long) tbuf);
        usbfs_decrease_memory_usage(PAGE_SIZE + sizeof(struct urb) +
                        sizeof(struct usb_ctrlrequest));
        return ret;
}

static int proc_control(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_ctrltransfer ctrl;

        if (copy_from_user(&ctrl, arg, sizeof(ctrl)))
                return -EFAULT;
        return do_proc_control(ps, &ctrl);
}

static int do_proc_bulk(struct usb_dev_state *ps,
                struct usbdevfs_bulktransfer *bulk)
{
        struct usb_device *dev = ps->dev;
        unsigned int tmo, len1, len2, pipe;
        unsigned char *tbuf;
        int i, ret;
        struct urb *urb = NULL;
        struct usb_host_endpoint *ep;

        ret = findintfep(ps->dev, bulk->ep);
        if (ret < 0)
                return ret;
        ret = checkintf(ps, ret);
        if (ret)
                return ret;

        len1 = bulk->len;
        if (len1 < 0 || len1 >= (INT_MAX - sizeof(struct urb)))
                return -EINVAL;

        if (bulk->ep & USB_DIR_IN)
                pipe = usb_rcvbulkpipe(dev, bulk->ep & 0x7f);
        else
                pipe = usb_sndbulkpipe(dev, bulk->ep & 0x7f);
        ep = usb_pipe_endpoint(dev, pipe);
        if (!ep || !usb_endpoint_maxp(&ep->desc))
                return -EINVAL;
        ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb));
        if (ret)
                return ret;

        /*
         * len1 can be almost arbitrarily large.  Don't WARN if it's
         * too big, just fail the request.
         */
        ret = -ENOMEM;
        tbuf = kmalloc(len1, GFP_KERNEL | __GFP_NOWARN);
        if (!tbuf)
                goto done;
        urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!urb)
                goto done;

        if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
                        USB_ENDPOINT_XFER_INT) {
                pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30);
                usb_fill_int_urb(urb, dev, pipe, tbuf, len1,
                                NULL, NULL, ep->desc.bInterval);
        } else {
                usb_fill_bulk_urb(urb, dev, pipe, tbuf, len1, NULL, NULL);
        }

        tmo = bulk->timeout;
        if (bulk->ep & 0x80) {
                snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT, NULL, 0);

                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &len2);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, len2, i, COMPLETE, tbuf, len2);

                if (!i && len2) {
                        if (copy_to_user(bulk->data, tbuf, len2)) {
                                ret = -EFAULT;
                                goto done;
                        }
                }
        } else {
                if (len1) {
                        if (copy_from_user(tbuf, bulk->data, len1)) {
                                ret = -EFAULT;
                                goto done;
                        }
                }
                snoop_urb(dev, NULL, pipe, len1, tmo, SUBMIT, tbuf, len1);

                usb_unlock_device(dev);
                i = usbfs_start_wait_urb(urb, tmo, &len2);
                usb_lock_device(dev);
                snoop_urb(dev, NULL, pipe, len2, i, COMPLETE, NULL, 0);
        }
        ret = (i < 0 ? i : len2);
 done:
        usb_free_urb(urb);
        kfree(tbuf);
        usbfs_decrease_memory_usage(len1 + sizeof(struct urb));
        return ret;
}

static int proc_bulk(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_bulktransfer bulk;

        if (copy_from_user(&bulk, arg, sizeof(bulk)))
                return -EFAULT;
        return do_proc_bulk(ps, &bulk);
}

static void check_reset_of_active_ep(struct usb_device *udev,
                unsigned int epnum, char *ioctl_name)
{
        struct usb_host_endpoint **eps;
        struct usb_host_endpoint *ep;

        eps = (epnum & USB_DIR_IN) ? udev->ep_in : udev->ep_out;
        ep = eps[epnum & 0x0f];
        if (ep && !list_empty(&ep->urb_list))
                dev_warn(&udev->dev, "Process %d (%s) called USBDEVFS_%s for active endpoint 0x%02x\n",
                                task_pid_nr(current), current->comm,
                                ioctl_name, epnum);
}

static int proc_resetep(struct usb_dev_state *ps, void __user *arg)
{
        unsigned int ep;
        int ret;

        if (get_user(ep, (unsigned int __user *)arg))
                return -EFAULT;
        ret = findintfep(ps->dev, ep);
        if (ret < 0)
                return ret;
        ret = checkintf(ps, ret);
        if (ret)
                return ret;
        check_reset_of_active_ep(ps->dev, ep, "RESETEP");
        usb_reset_endpoint(ps->dev, ep);
        return 0;
}

static int proc_clearhalt(struct usb_dev_state *ps, void __user *arg)
{
        unsigned int ep;
        int pipe;
        int ret;

        if (get_user(ep, (unsigned int __user *)arg))
                return -EFAULT;
        ret = findintfep(ps->dev, ep);
        if (ret < 0)
                return ret;
        ret = checkintf(ps, ret);
        if (ret)
                return ret;
        check_reset_of_active_ep(ps->dev, ep, "CLEAR_HALT");
        if (ep & USB_DIR_IN)
                pipe = usb_rcvbulkpipe(ps->dev, ep & 0x7f);
        else
                pipe = usb_sndbulkpipe(ps->dev, ep & 0x7f);

        return usb_clear_halt(ps->dev, pipe);
}

static int proc_getdriver(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_getdriver gd;
        struct usb_interface *intf;
        int ret;

        if (copy_from_user(&gd, arg, sizeof(gd)))
                return -EFAULT;
        intf = usb_ifnum_to_if(ps->dev, gd.interface);
        if (!intf || !intf->dev.driver)
                ret = -ENODATA;
        else {
                strscpy(gd.driver, intf->dev.driver->name,
                                sizeof(gd.driver));
                ret = (copy_to_user(arg, &gd, sizeof(gd)) ? -EFAULT : 0);
        }
        return ret;
}

static int proc_connectinfo(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_connectinfo ci;

        memset(&ci, 0, sizeof(ci));
        ci.devnum = ps->dev->devnum;
        ci.slow = ps->dev->speed == USB_SPEED_LOW;

        if (copy_to_user(arg, &ci, sizeof(ci)))
                return -EFAULT;
        return 0;
}

static int proc_conninfo_ex(struct usb_dev_state *ps,
                            void __user *arg, size_t size)
{
        struct usbdevfs_conninfo_ex ci;
        struct usb_device *udev = ps->dev;

        if (size < sizeof(ci.size))
                return -EINVAL;

        memset(&ci, 0, sizeof(ci));
        ci.size = sizeof(ci);
        ci.busnum = udev->bus->busnum;
        ci.devnum = udev->devnum;
        ci.speed = udev->speed;

        while (udev && udev->portnum != 0) {
                if (++ci.num_ports <= ARRAY_SIZE(ci.ports))
                        ci.ports[ARRAY_SIZE(ci.ports) - ci.num_ports] =
                                        udev->portnum;
                udev = udev->parent;
        }

        if (ci.num_ports < ARRAY_SIZE(ci.ports))
                memmove(&ci.ports[0],
                        &ci.ports[ARRAY_SIZE(ci.ports) - ci.num_ports],
                        ci.num_ports);

        if (copy_to_user(arg, &ci, min(sizeof(ci), size)))
                return -EFAULT;

        return 0;
}

static int proc_resetdevice(struct usb_dev_state *ps)
{
        struct usb_host_config *actconfig = ps->dev->actconfig;
        struct usb_interface *interface;
        int i, number;

        /* Don't allow a device reset if the process has dropped the
         * privilege to do such things and any of the interfaces are
         * currently claimed.
         */
        if (ps->privileges_dropped && actconfig) {
                for (i = 0; i < actconfig->desc.bNumInterfaces; ++i) {
                        interface = actconfig->interface[i];
                        number = interface->cur_altsetting->desc.bInterfaceNumber;
                        if (usb_interface_claimed(interface) &&
                                        !test_bit(number, &ps->ifclaimed)) {
                                dev_warn(&ps->dev->dev,
                                        "usbfs: interface %d claimed by %s while '%s' resets device\n",
                                        number,        interface->dev.driver->name, current->comm);
                                return -EACCES;
                        }
                }
        }

        return usb_reset_device(ps->dev);
}

static int proc_setintf(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_setinterface setintf;
        int ret;

        if (copy_from_user(&setintf, arg, sizeof(setintf)))
                return -EFAULT;
        ret = checkintf(ps, setintf.interface);
        if (ret)
                return ret;

        destroy_async_on_interface(ps, setintf.interface);

        return usb_set_interface(ps->dev, setintf.interface,
                        setintf.altsetting);
}

static int proc_setconfig(struct usb_dev_state *ps, void __user *arg)
{
        int u;
        int status = 0;
        struct usb_host_config *actconfig;

        if (get_user(u, (int __user *)arg))
                return -EFAULT;

        actconfig = ps->dev->actconfig;

        /* Don't touch the device if any interfaces are claimed.
         * It could interfere with other drivers' operations, and if
         * an interface is claimed by usbfs it could easily deadlock.
         */
        if (actconfig) {
                int i;

                for (i = 0; i < actconfig->desc.bNumInterfaces; ++i) {
                        if (usb_interface_claimed(actconfig->interface[i])) {
                                dev_warn(&ps->dev->dev,
                                        "usbfs: interface %d claimed by %s "
                                        "while '%s' sets config #%d\n",
                                        actconfig->interface[i]
                                                ->cur_altsetting
                                                ->desc.bInterfaceNumber,
                                        actconfig->interface[i]
                                                ->dev.driver->name,
                                        current->comm, u);
                                status = -EBUSY;
                                break;
                        }
                }
        }

        /* SET_CONFIGURATION is often abused as a "cheap" driver reset,
         * so avoid usb_set_configuration()'s kick to sysfs
         */
        if (status == 0) {
                if (actconfig && actconfig->desc.bConfigurationValue == u)
                        status = usb_reset_configuration(ps->dev);
                else
                        status = usb_set_configuration(ps->dev, u);
        }

        return status;
}

static struct usb_memory *
find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb)
{
        struct usb_memory *usbm = NULL, *iter;
        unsigned long flags;
        unsigned long uurb_start = (unsigned long)uurb->buffer;

        spin_lock_irqsave(&ps->lock, flags);
        list_for_each_entry(iter, &ps->memory_list, memlist) {
                if (uurb_start >= iter->vm_start &&
                                uurb_start < iter->vm_start + iter->size) {
                        if (uurb->buffer_length > iter->vm_start + iter->size -
                                        uurb_start) {
                                usbm = ERR_PTR(-EINVAL);
                        } else {
                                usbm = iter;
                                usbm->urb_use_count++;
                        }
                        break;
                }
        }
        spin_unlock_irqrestore(&ps->lock, flags);
        return usbm;
}

static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb,
                        struct usbdevfs_iso_packet_desc __user *iso_frame_desc,
                        void __user *arg, sigval_t userurb_sigval)
{
        struct usbdevfs_iso_packet_desc *isopkt = NULL;
        struct usb_host_endpoint *ep;
        struct async *as = NULL;
        struct usb_ctrlrequest *dr = NULL;
        unsigned int u, totlen, isofrmlen;
        int i, ret, num_sgs = 0, ifnum = -1;
        int number_of_packets = 0;
        unsigned int stream_id = 0;
        void *buf;
        bool is_in;
        bool allow_short = false;
        bool allow_zero = false;
        unsigned long mask =        USBDEVFS_URB_SHORT_NOT_OK |
                                USBDEVFS_URB_BULK_CONTINUATION |
                                USBDEVFS_URB_NO_FSBR |
                                USBDEVFS_URB_ZERO_PACKET |
                                USBDEVFS_URB_NO_INTERRUPT;
        /* USBDEVFS_URB_ISO_ASAP is a special case */
        if (uurb->type == USBDEVFS_URB_TYPE_ISO)
                mask |= USBDEVFS_URB_ISO_ASAP;

        if (uurb->flags & ~mask)
                        return -EINVAL;

        if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX)
                return -EINVAL;
        if (uurb->buffer_length > 0 && !uurb->buffer)
                return -EINVAL;
        if (!(uurb->type == USBDEVFS_URB_TYPE_CONTROL &&
            (uurb->endpoint & ~USB_ENDPOINT_DIR_MASK) == 0)) {
                ifnum = findintfep(ps->dev, uurb->endpoint);
                if (ifnum < 0)
                        return ifnum;
                ret = checkintf(ps, ifnum);
                if (ret)
                        return ret;
        }
        ep = ep_to_host_endpoint(ps->dev, uurb->endpoint);
        if (!ep)
                return -ENOENT;
        is_in = (uurb->endpoint & USB_ENDPOINT_DIR_MASK) != 0;

        u = 0;
        switch (uurb->type) {
        case USBDEVFS_URB_TYPE_CONTROL:
                if (!usb_endpoint_xfer_control(&ep->desc))
                        return -EINVAL;
                /* min 8 byte setup packet */
                if (uurb->buffer_length < 8)
                        return -EINVAL;
                dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
                if (!dr)
                        return -ENOMEM;
                if (copy_from_user(dr, uurb->buffer, 8)) {
                        ret = -EFAULT;
                        goto error;
                }
                if (uurb->buffer_length < (le16_to_cpu(dr->wLength) + 8)) {
                        ret = -EINVAL;
                        goto error;
                }
                ret = check_ctrlrecip(ps, dr->bRequestType, dr->bRequest,
                                      le16_to_cpu(dr->wIndex));
                if (ret)
                        goto error;
                uurb->buffer_length = le16_to_cpu(dr->wLength);
                uurb->buffer += 8;
                if ((dr->bRequestType & USB_DIR_IN) && uurb->buffer_length) {
                        is_in = true;
                        uurb->endpoint |= USB_DIR_IN;
                } else {
                        is_in = false;
                        uurb->endpoint &= ~USB_DIR_IN;
                }
                if (is_in)
                        allow_short = true;
                snoop(&ps->dev->dev, "control urb: bRequestType=%02x "
                        "bRequest=%02x wValue=%04x "
                        "wIndex=%04x wLength=%04x\n",
                        dr->bRequestType, dr->bRequest,
                        __le16_to_cpu(dr->wValue),
                        __le16_to_cpu(dr->wIndex),
                        __le16_to_cpu(dr->wLength));
                u = sizeof(struct usb_ctrlrequest);
                break;

        case USBDEVFS_URB_TYPE_BULK:
                if (!is_in)
                        allow_zero = true;
                else
                        allow_short = true;
                switch (usb_endpoint_type(&ep->desc)) {
                case USB_ENDPOINT_XFER_CONTROL:
                case USB_ENDPOINT_XFER_ISOC:
                        return -EINVAL;
                case USB_ENDPOINT_XFER_INT:
                        /* allow single-shot interrupt transfers */
                        uurb->type = USBDEVFS_URB_TYPE_INTERRUPT;
                        goto interrupt_urb;
                }
                num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE);
                if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize)
                        num_sgs = 0;
                if (ep->streams)
                        stream_id = uurb->stream_id;
                break;

        case USBDEVFS_URB_TYPE_INTERRUPT:
                if (!usb_endpoint_xfer_int(&ep->desc))
                        return -EINVAL;
 interrupt_urb:
                if (!is_in)
                        allow_zero = true;
                else
                        allow_short = true;
                break;

        case USBDEVFS_URB_TYPE_ISO:
                /* arbitrary limit */
                if (uurb->number_of_packets < 1 ||
                    uurb->number_of_packets > 128)
                        return -EINVAL;
                if (!usb_endpoint_xfer_isoc(&ep->desc))
                        return -EINVAL;
                number_of_packets = uurb->number_of_packets;
                isofrmlen = sizeof(struct usbdevfs_iso_packet_desc) *
                                   number_of_packets;
                isopkt = memdup_user(iso_frame_desc, isofrmlen);
                if (IS_ERR(isopkt)) {
                        ret = PTR_ERR(isopkt);
                        isopkt = NULL;
                        goto error;
                }
                for (totlen = u = 0; u < number_of_packets; u++) {
                        /*
                         * arbitrary limit need for USB 3.1 Gen2
                         * sizemax: 96 DPs at SSP, 96 * 1024 = 98304
                         */
                        if (isopkt[u].length > 98304) {
                                ret = -EINVAL;
                                goto error;
                        }
                        totlen += isopkt[u].length;
                }
                u *= sizeof(struct usb_iso_packet_descriptor);
                uurb->buffer_length = totlen;
                break;

        default:
                return -EINVAL;
        }

        if (uurb->buffer_length > 0 &&
                        !access_ok(uurb->buffer, uurb->buffer_length)) {
                ret = -EFAULT;
                goto error;
        }
        as = alloc_async(number_of_packets);
        if (!as) {
                ret = -ENOMEM;
                goto error;
        }

        as->usbm = find_memory_area(ps, uurb);
        if (IS_ERR(as->usbm)) {
                ret = PTR_ERR(as->usbm);
                as->usbm = NULL;
                goto error;
        }

        /* do not use SG buffers when memory mapped segments
         * are in use
         */
        if (as->usbm)
                num_sgs = 0;

        u += sizeof(struct async) + sizeof(struct urb) +
             (as->usbm ? 0 : uurb->buffer_length) +
             num_sgs * sizeof(struct scatterlist);
        ret = usbfs_increase_memory_usage(u);
        if (ret)
                goto error;
        as->mem_usage = u;

        if (num_sgs) {
                as->urb->sg = kmalloc_array(num_sgs,
                                            sizeof(struct scatterlist),
                                            GFP_KERNEL | __GFP_NOWARN);
                if (!as->urb->sg) {
                        ret = -ENOMEM;
                        goto error;
                }
                as->urb->num_sgs = num_sgs;
                sg_init_table(as->urb->sg, as->urb->num_sgs);

                totlen = uurb->buffer_length;
                for (i = 0; i < as->urb->num_sgs; i++) {
                        u = (totlen > USB_SG_SIZE) ? USB_SG_SIZE : totlen;
                        buf = kmalloc(u, GFP_KERNEL);
                        if (!buf) {
                                ret = -ENOMEM;
                                goto error;
                        }
                        sg_set_buf(&as->urb->sg[i], buf, u);

                        if (!is_in) {
                                if (copy_from_user(buf, uurb->buffer, u)) {
                                        ret = -EFAULT;
                                        goto error;
                                }
                                uurb->buffer += u;
                        }
                        totlen -= u;
                }
        } else if (uurb->buffer_length > 0) {
                if (as->usbm) {
                        unsigned long uurb_start = (unsigned long)uurb->buffer;

                        as->urb->transfer_buffer = as->usbm->mem +
                                        (uurb_start - as->usbm->vm_start);
                } else {
                        as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
                                        GFP_KERNEL | __GFP_NOWARN);
                        if (!as->urb->transfer_buffer) {
                                ret = -ENOMEM;
                                goto error;
                        }
                        if (!is_in) {
                                if (copy_from_user(as->urb->transfer_buffer,
                                                   uurb->buffer,
                                                   uurb->buffer_length)) {
                                        ret = -EFAULT;
                                        goto error;
                                }
                        } else if (uurb->type == USBDEVFS_URB_TYPE_ISO) {
                                /*
                                 * Isochronous input data may end up being
                                 * discontiguous if some of the packets are
                                 * short. Clear the buffer so that the gaps
                                 * don't leak kernel data to userspace.
                                 */
                                memset(as->urb->transfer_buffer, 0,
                                                uurb->buffer_length);
                        }
                }
        }
        as->urb->dev = ps->dev;
        as->urb->pipe = (uurb->type << 30) |
                        __create_pipe(ps->dev, uurb->endpoint & 0xf) |
                        (uurb->endpoint & USB_DIR_IN);

        /* This tedious sequence is necessary because the URB_* flags
         * are internal to the kernel and subject to change, whereas
         * the USBDEVFS_URB_* flags are a user API and must not be changed.
         */
        u = (is_in ? URB_DIR_IN : URB_DIR_OUT);
        if (uurb->flags & USBDEVFS_URB_ISO_ASAP)
                u |= URB_ISO_ASAP;
        if (allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK)
                u |= URB_SHORT_NOT_OK;
        if (allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET)
                u |= URB_ZERO_PACKET;
        if (uurb->flags & USBDEVFS_URB_NO_INTERRUPT)
                u |= URB_NO_INTERRUPT;
        as->urb->transfer_flags = u;

        if (!allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK)
                dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_SHORT_NOT_OK.\n");
        if (!allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET)
                dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_ZERO_PACKET.\n");

        as->urb->transfer_buffer_length = uurb->buffer_length;
        as->urb->setup_packet = (unsigned char *)dr;
        dr = NULL;
        as->urb->start_frame = uurb->start_frame;
        as->urb->number_of_packets = number_of_packets;
        as->urb->stream_id = stream_id;

        if (ep->desc.bInterval) {
                if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
                                ps->dev->speed == USB_SPEED_HIGH ||
                                ps->dev->speed >= USB_SPEED_SUPER)
                        as->urb->interval = 1 <<
                                        min(15, ep->desc.bInterval - 1);
                else
                        as->urb->interval = ep->desc.bInterval;
        }

        as->urb->context = as;
        as->urb->complete = async_completed;
        for (totlen = u = 0; u < number_of_packets; u++) {
                as->urb->iso_frame_desc[u].offset = totlen;
                as->urb->iso_frame_desc[u].length = isopkt[u].length;
                totlen += isopkt[u].length;
        }
        kfree(isopkt);
        isopkt = NULL;
        as->ps = ps;
        as->userurb = arg;
        as->userurb_sigval = userurb_sigval;
        if (as->usbm) {
                unsigned long uurb_start = (unsigned long)uurb->buffer;

                as->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                as->urb->transfer_dma = as->usbm->dma_handle +
                                (uurb_start - as->usbm->vm_start);
        } else if (is_in && uurb->buffer_length > 0)
                as->userbuffer = uurb->buffer;
        as->signr = uurb->signr;
        as->ifnum = ifnum;
        as->pid = get_pid(task_pid(current));
        as->cred = get_current_cred();
        snoop_urb(ps->dev, as->userurb, as->urb->pipe,
                        as->urb->transfer_buffer_length, 0, SUBMIT,
                        NULL, 0);
        if (!is_in)
                snoop_urb_data(as->urb, as->urb->transfer_buffer_length);

        async_newpending(as);

        if (usb_endpoint_xfer_bulk(&ep->desc)) {
                spin_lock_irq(&ps->lock);

                /* Not exactly the endpoint address; the direction bit is
                 * shifted to the 0x10 position so that the value will be
                 * between 0 and 31.
                 */
                as->bulk_addr = usb_endpoint_num(&ep->desc) |
                        ((ep->desc.bEndpointAddress & USB_ENDPOINT_DIR_MASK)
                                >> 3);

                /* If this bulk URB is the start of a new transfer, re-enable
                 * the endpoint.  Otherwise mark it as a continuation URB.
                 */
                if (uurb->flags & USBDEVFS_URB_BULK_CONTINUATION)
                        as->bulk_status = AS_CONTINUATION;
                else
                        ps->disabled_bulk_eps &= ~(1 << as->bulk_addr);

                /* Don't accept continuation URBs if the endpoint is
                 * disabled because of an earlier error.
                 */
                if (ps->disabled_bulk_eps & (1 << as->bulk_addr))
                        ret = -EREMOTEIO;
                else
                        ret = usb_submit_urb(as->urb, GFP_ATOMIC);
                spin_unlock_irq(&ps->lock);
        } else {
                ret = usb_submit_urb(as->urb, GFP_KERNEL);
        }

        if (ret) {
                dev_printk(KERN_DEBUG, &ps->dev->dev,
                           "usbfs: usb_submit_urb returned %d\n", ret);
                snoop_urb(ps->dev, as->userurb, as->urb->pipe,
                                0, ret, COMPLETE, NULL, 0);
                async_removepending(as);
                goto error;
        }
        return 0;

 error:
        kfree(isopkt);
        kfree(dr);
        if (as)
                free_async(as);
        return ret;
}

static int proc_submiturb(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_urb uurb;
        sigval_t userurb_sigval;

        if (copy_from_user(&uurb, arg, sizeof(uurb)))
                return -EFAULT;

        memset(&userurb_sigval, 0, sizeof(userurb_sigval));
        userurb_sigval.sival_ptr = arg;

        return proc_do_submiturb(ps, &uurb,
                        (((struct usbdevfs_urb __user *)arg)->iso_frame_desc),
                        arg, userurb_sigval);
}

static int proc_unlinkurb(struct usb_dev_state *ps, void __user *arg)
{
        struct urb *urb;
        struct async *as;
        unsigned long flags;

        spin_lock_irqsave(&ps->lock, flags);
        as = async_getpending(ps, arg);
        if (!as) {
                spin_unlock_irqrestore(&ps->lock, flags);
                return -EINVAL;
        }

        urb = as->urb;
        usb_get_urb(urb);
        spin_unlock_irqrestore(&ps->lock, flags);

        usb_kill_urb(urb);
        usb_put_urb(urb);

        return 0;
}

static void compute_isochronous_actual_length(struct urb *urb)
{
        unsigned int i;

        if (urb->number_of_packets > 0) {
                urb->actual_length = 0;
                for (i = 0; i < urb->number_of_packets; i++)
                        urb->actual_length +=
                                        urb->iso_frame_desc[i].actual_length;
        }
}

static int processcompl(struct async *as, void __user * __user *arg)
{
        struct urb *urb = as->urb;
        struct usbdevfs_urb __user *userurb = as->userurb;
        void __user *addr = as->userurb;
        unsigned int i;

        compute_isochronous_actual_length(urb);
        if (as->userbuffer && urb->actual_length) {
                if (copy_urb_data_to_user(as->userbuffer, urb))
                        goto err_out;
        }
        if (put_user(as->status, &userurb->status))
                goto err_out;
        if (put_user(urb->actual_length, &userurb->actual_length))
                goto err_out;
        if (put_user(urb->error_count, &userurb->error_count))
                goto err_out;

        if (usb_endpoint_xfer_isoc(&urb->ep->desc)) {
                for (i = 0; i < urb->number_of_packets; i++) {
                        if (put_user(urb->iso_frame_desc[i].actual_length,
                                     &userurb->iso_frame_desc[i].actual_length))
                                goto err_out;
                        if (put_user(urb->iso_frame_desc[i].status,
                                     &userurb->iso_frame_desc[i].status))
                                goto err_out;
                }
        }

        if (put_user(addr, (void __user * __user *)arg))
                return -EFAULT;
        return 0;

err_out:
        return -EFAULT;
}

static struct async *reap_as(struct usb_dev_state *ps)
{
        DECLARE_WAITQUEUE(wait, current);
        struct async *as = NULL;
        struct usb_device *dev = ps->dev;

        add_wait_queue(&ps->wait, &wait);
        for (;;) {
                __set_current_state(TASK_INTERRUPTIBLE);
                as = async_getcompleted(ps);
                if (as || !connected(ps))
                        break;
                if (signal_pending(current))
                        break;
                usb_unlock_device(dev);
                schedule();
                usb_lock_device(dev);
        }
        remove_wait_queue(&ps->wait, &wait);
        set_current_state(TASK_RUNNING);
        return as;
}

static int proc_reapurb(struct usb_dev_state *ps, void __user *arg)
{
        struct async *as = reap_as(ps);

        if (as) {
                int retval;

                snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl(as, (void __user * __user *)arg);
                free_async(as);
                return retval;
        }
        if (signal_pending(current))
                return -EINTR;
        return -ENODEV;
}

static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
{
        int retval;
        struct async *as;

        as = async_getcompleted(ps);
        if (as) {
                snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl(as, (void __user * __user *)arg);
                free_async(as);
        } else {
                retval = (connected(ps) ? -EAGAIN : -ENODEV);
        }
        return retval;
}

#ifdef CONFIG_COMPAT
static int proc_control_compat(struct usb_dev_state *ps,
                                struct usbdevfs_ctrltransfer32 __user *p32)
{
        struct usbdevfs_ctrltransfer ctrl;
        u32 udata;

        if (copy_from_user(&ctrl, p32, sizeof(*p32) - sizeof(compat_caddr_t)) ||
            get_user(udata, &p32->data))
                return -EFAULT;
        ctrl.data = compat_ptr(udata);
        return do_proc_control(ps, &ctrl);
}

static int proc_bulk_compat(struct usb_dev_state *ps,
                        struct usbdevfs_bulktransfer32 __user *p32)
{
        struct usbdevfs_bulktransfer bulk;
        compat_caddr_t addr;

        if (get_user(bulk.ep, &p32->ep) ||
            get_user(bulk.len, &p32->len) ||
            get_user(bulk.timeout, &p32->timeout) ||
            get_user(addr, &p32->data))
                return -EFAULT;
        bulk.data = compat_ptr(addr);
        return do_proc_bulk(ps, &bulk);
}

static int proc_disconnectsignal_compat(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_disconnectsignal32 ds;

        if (copy_from_user(&ds, arg, sizeof(ds)))
                return -EFAULT;
        ps->discsignr = ds.signr;
        ps->disccontext.sival_int = ds.context;
        return 0;
}

static int get_urb32(struct usbdevfs_urb *kurb,
                     struct usbdevfs_urb32 __user *uurb)
{
        struct usbdevfs_urb32 urb32;
        if (copy_from_user(&urb32, uurb, sizeof(*uurb)))
                return -EFAULT;
        kurb->type = urb32.type;
        kurb->endpoint = urb32.endpoint;
        kurb->status = urb32.status;
        kurb->flags = urb32.flags;
        kurb->buffer = compat_ptr(urb32.buffer);
        kurb->buffer_length = urb32.buffer_length;
        kurb->actual_length = urb32.actual_length;
        kurb->start_frame = urb32.start_frame;
        kurb->number_of_packets = urb32.number_of_packets;
        kurb->error_count = urb32.error_count;
        kurb->signr = urb32.signr;
        kurb->usercontext = compat_ptr(urb32.usercontext);
        return 0;
}

static int proc_submiturb_compat(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_urb uurb;
        sigval_t userurb_sigval;

        if (get_urb32(&uurb, (struct usbdevfs_urb32 __user *)arg))
                return -EFAULT;

        memset(&userurb_sigval, 0, sizeof(userurb_sigval));
        userurb_sigval.sival_int = ptr_to_compat(arg);

        return proc_do_submiturb(ps, &uurb,
                        ((struct usbdevfs_urb32 __user *)arg)->iso_frame_desc,
                        arg, userurb_sigval);
}

static int processcompl_compat(struct async *as, void __user * __user *arg)
{
        struct urb *urb = as->urb;
        struct usbdevfs_urb32 __user *userurb = as->userurb;
        void __user *addr = as->userurb;
        unsigned int i;

        compute_isochronous_actual_length(urb);
        if (as->userbuffer && urb->actual_length) {
                if (copy_urb_data_to_user(as->userbuffer, urb))
                        return -EFAULT;
        }
        if (put_user(as->status, &userurb->status))
                return -EFAULT;
        if (put_user(urb->actual_length, &userurb->actual_length))
                return -EFAULT;
        if (put_user(urb->error_count, &userurb->error_count))
                return -EFAULT;

        if (usb_endpoint_xfer_isoc(&urb->ep->desc)) {
                for (i = 0; i < urb->number_of_packets; i++) {
                        if (put_user(urb->iso_frame_desc[i].actual_length,
                                     &userurb->iso_frame_desc[i].actual_length))
                                return -EFAULT;
                        if (put_user(urb->iso_frame_desc[i].status,
                                     &userurb->iso_frame_desc[i].status))
                                return -EFAULT;
                }
        }

        if (put_user(ptr_to_compat(addr), (u32 __user *)arg))
                return -EFAULT;
        return 0;
}

static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg)
{
        struct async *as = reap_as(ps);

        if (as) {
                int retval;

                snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl_compat(as, (void __user * __user *)arg);
                free_async(as);
                return retval;
        }
        if (signal_pending(current))
                return -EINTR;
        return -ENODEV;
}

static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *arg)
{
        int retval;
        struct async *as;

        as = async_getcompleted(ps);
        if (as) {
                snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl_compat(as, (void __user * __user *)arg);
                free_async(as);
        } else {
                retval = (connected(ps) ? -EAGAIN : -ENODEV);
        }
        return retval;
}


#endif

static int proc_disconnectsignal(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_disconnectsignal ds;

        if (copy_from_user(&ds, arg, sizeof(ds)))
                return -EFAULT;
        ps->discsignr = ds.signr;
        ps->disccontext.sival_ptr = ds.context;
        return 0;
}

static int proc_claiminterface(struct usb_dev_state *ps, void __user *arg)
{
        unsigned int ifnum;

        if (get_user(ifnum, (unsigned int __user *)arg))
                return -EFAULT;
        return claimintf(ps, ifnum);
}

static int proc_releaseinterface(struct usb_dev_state *ps, void __user *arg)
{
        unsigned int ifnum;
        int ret;

        if (get_user(ifnum, (unsigned int __user *)arg))
                return -EFAULT;
        ret = releaseintf(ps, ifnum);
        if (ret < 0)
                return ret;
        destroy_async_on_interface(ps, ifnum);
        return 0;
}

static int proc_ioctl(struct usb_dev_state *ps, struct usbdevfs_ioctl *ctl)
{
        int                        size;
        void                        *buf = NULL;
        int                        retval = 0;
        struct usb_interface    *intf = NULL;
        struct usb_driver       *driver = NULL;

        if (ps->privileges_dropped)
                return -EACCES;

        if (!connected(ps))
                return -ENODEV;

        /* alloc buffer */
        size = _IOC_SIZE(ctl->ioctl_code);
        if (size > 0) {
                buf = kmalloc(size, GFP_KERNEL);
                if (buf == NULL)
                        return -ENOMEM;
                if ((_IOC_DIR(ctl->ioctl_code) & _IOC_WRITE)) {
                        if (copy_from_user(buf, ctl->data, size)) {
                                kfree(buf);
                                return -EFAULT;
                        }
                } else {
                        memset(buf, 0, size);
                }
        }

        if (ps->dev->state != USB_STATE_CONFIGURED)
                retval = -EHOSTUNREACH;
        else if (!(intf = usb_ifnum_to_if(ps->dev, ctl->ifno)))
                retval = -EINVAL;
        else switch (ctl->ioctl_code) {

        /* disconnect kernel driver from interface */
        case USBDEVFS_DISCONNECT:
                if (intf->dev.driver) {
                        driver = to_usb_driver(intf->dev.driver);
                        dev_dbg(&intf->dev, "disconnect by usbfs\n");
                        usb_driver_release_interface(driver, intf);
                } else
                        retval = -ENODATA;
                break;

        /* let kernel drivers try to (re)bind to the interface */
        case USBDEVFS_CONNECT:
                if (!intf->dev.driver)
                        retval = device_attach(&intf->dev);
                else
                        retval = -EBUSY;
                break;

        /* talk directly to the interface's driver */
        default:
                if (intf->dev.driver)
                        driver = to_usb_driver(intf->dev.driver);
                if (driver == NULL || driver->unlocked_ioctl == NULL) {
                        retval = -ENOTTY;
                } else {
                        retval = driver->unlocked_ioctl(intf, ctl->ioctl_code, buf);
                        if (retval == -ENOIOCTLCMD)
                                retval = -ENOTTY;
                }
        }

        /* cleanup and return */
        if (retval >= 0
                        && (_IOC_DIR(ctl->ioctl_code) & _IOC_READ) != 0
                        && size > 0
                        && copy_to_user(ctl->data, buf, size) != 0)
                retval = -EFAULT;

        kfree(buf);
        return retval;
}

static int proc_ioctl_default(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_ioctl        ctrl;

        if (copy_from_user(&ctrl, arg, sizeof(ctrl)))
                return -EFAULT;
        return proc_ioctl(ps, &ctrl);
}

#ifdef CONFIG_COMPAT
static int proc_ioctl_compat(struct usb_dev_state *ps, compat_uptr_t arg)
{
        struct usbdevfs_ioctl32 ioc32;
        struct usbdevfs_ioctl ctrl;

        if (copy_from_user(&ioc32, compat_ptr(arg), sizeof(ioc32)))
                return -EFAULT;
        ctrl.ifno = ioc32.ifno;
        ctrl.ioctl_code = ioc32.ioctl_code;
        ctrl.data = compat_ptr(ioc32.data);
        return proc_ioctl(ps, &ctrl);
}
#endif

static int proc_claim_port(struct usb_dev_state *ps, void __user *arg)
{
        unsigned portnum;
        int rc;

        if (get_user(portnum, (unsigned __user *) arg))
                return -EFAULT;
        rc = usb_hub_claim_port(ps->dev, portnum, ps);
        if (rc == 0)
                snoop(&ps->dev->dev, "port %d claimed by process %d: %s\n",
                        portnum, task_pid_nr(current), current->comm);
        return rc;
}

static int proc_release_port(struct usb_dev_state *ps, void __user *arg)
{
        unsigned portnum;

        if (get_user(portnum, (unsigned __user *) arg))
                return -EFAULT;
        return usb_hub_release_port(ps->dev, portnum, ps);
}

static int proc_get_capabilities(struct usb_dev_state *ps, void __user *arg)
{
        __u32 caps;

        caps = USBDEVFS_CAP_ZERO_PACKET | USBDEVFS_CAP_NO_PACKET_SIZE_LIM |
                        USBDEVFS_CAP_REAP_AFTER_DISCONNECT | USBDEVFS_CAP_MMAP |
                        USBDEVFS_CAP_DROP_PRIVILEGES |
                        USBDEVFS_CAP_CONNINFO_EX | MAYBE_CAP_SUSPEND;
        if (!ps->dev->bus->no_stop_on_short)
                caps |= USBDEVFS_CAP_BULK_CONTINUATION;
        if (ps->dev->bus->sg_tablesize)
                caps |= USBDEVFS_CAP_BULK_SCATTER_GATHER;

        if (put_user(caps, (__u32 __user *)arg))
                return -EFAULT;

        return 0;
}

static int proc_disconnect_claim(struct usb_dev_state *ps, void __user *arg)
{
        struct usbdevfs_disconnect_claim dc;
        struct usb_interface *intf;

        if (copy_from_user(&dc, arg, sizeof(dc)))
                return -EFAULT;

        intf = usb_ifnum_to_if(ps->dev, dc.interface);
        if (!intf)
                return -EINVAL;

        if (intf->dev.driver) {
                struct usb_driver *driver = to_usb_driver(intf->dev.driver);

                if (ps->privileges_dropped)
                        return -EACCES;

                if ((dc.flags & USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER) &&
                                strncmp(dc.driver, intf->dev.driver->name,
                                        sizeof(dc.driver)) != 0)
                        return -EBUSY;

                if ((dc.flags & USBDEVFS_DISCONNECT_CLAIM_EXCEPT_DRIVER) &&
                                strncmp(dc.driver, intf->dev.driver->name,
                                        sizeof(dc.driver)) == 0)
                        return -EBUSY;

                dev_dbg(&intf->dev, "disconnect by usbfs\n");
                usb_driver_release_interface(driver, intf);
        }

        return claimintf(ps, dc.interface);
}

static int proc_alloc_streams(struct usb_dev_state *ps, void __user *arg)
{
        unsigned num_streams, num_eps;
        struct usb_host_endpoint **eps;
        struct usb_interface *intf;
        int r;

        r = parse_usbdevfs_streams(ps, arg, &num_streams, &num_eps,
                                   &eps, &intf);
        if (r)
                return r;

        destroy_async_on_interface(ps,
                                   intf->altsetting[0].desc.bInterfaceNumber);

        r = usb_alloc_streams(intf, eps, num_eps, num_streams, GFP_KERNEL);
        kfree(eps);
        return r;
}

static int proc_free_streams(struct usb_dev_state *ps, void __user *arg)
{
        unsigned num_eps;
        struct usb_host_endpoint **eps;
        struct usb_interface *intf;
        int r;

        r = parse_usbdevfs_streams(ps, arg, NULL, &num_eps, &eps, &intf);
        if (r)
                return r;

        destroy_async_on_interface(ps,
                                   intf->altsetting[0].desc.bInterfaceNumber);

        r = usb_free_streams(intf, eps, num_eps, GFP_KERNEL);
        kfree(eps);
        return r;
}

static int proc_drop_privileges(struct usb_dev_state *ps, void __user *arg)
{
        u32 data;

        if (copy_from_user(&data, arg, sizeof(data)))
                return -EFAULT;

        /* This is a one way operation. Once privileges are
         * dropped, you cannot regain them. You may however reissue
         * this ioctl to shrink the allowed interfaces mask.
         */
        ps->interface_allowed_mask &= data;
        ps->privileges_dropped = true;

        return 0;
}

static int proc_forbid_suspend(struct usb_dev_state *ps)
{
        int ret = 0;

        if (ps->suspend_allowed) {
                ret = usb_autoresume_device(ps->dev);
                if (ret == 0)
                        ps->suspend_allowed = false;
                else if (ret != -ENODEV)
                        ret = -EIO;
        }
        return ret;
}

static int proc_allow_suspend(struct usb_dev_state *ps)
{
        if (!connected(ps))
                return -ENODEV;

        WRITE_ONCE(ps->not_yet_resumed, 1);
        if (!ps->suspend_allowed) {
                usb_autosuspend_device(ps->dev);
                ps->suspend_allowed = true;
        }
        return 0;
}

static int proc_wait_for_resume(struct usb_dev_state *ps)
{
        int ret;

        usb_unlock_device(ps->dev);
        ret = wait_event_interruptible(ps->wait_for_resume,
                        READ_ONCE(ps->not_yet_resumed) == 0);
        usb_lock_device(ps->dev);

        if (ret != 0)
                return -EINTR;
        return proc_forbid_suspend(ps);
}

/*
 * NOTE:  All requests here that have interface numbers as parameters
 * are assuming that somehow the configuration has been prevented from
 * changing.  But there's no mechanism to ensure that...
 */
static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                                void __user *p)
{
        struct usb_dev_state *ps = file->private_data;
        struct inode *inode = file_inode(file);
        struct usb_device *dev = ps->dev;
        int ret = -ENOTTY;

        if (!(file->f_mode & FMODE_WRITE))
                return -EPERM;

        usb_lock_device(dev);

        /* Reap operations are allowed even after disconnection */
        switch (cmd) {
        case USBDEVFS_REAPURB:
                snoop(&dev->dev, "%s: REAPURB\n", __func__);
                ret = proc_reapurb(ps, p);
                goto done;

        case USBDEVFS_REAPURBNDELAY:
                snoop(&dev->dev, "%s: REAPURBNDELAY\n", __func__);
                ret = proc_reapurbnonblock(ps, p);
                goto done;

#ifdef CONFIG_COMPAT
        case USBDEVFS_REAPURB32:
                snoop(&dev->dev, "%s: REAPURB32\n", __func__);
                ret = proc_reapurb_compat(ps, p);
                goto done;

        case USBDEVFS_REAPURBNDELAY32:
                snoop(&dev->dev, "%s: REAPURBNDELAY32\n", __func__);
                ret = proc_reapurbnonblock_compat(ps, p);
                goto done;
#endif
        }

        if (!connected(ps)) {
                usb_unlock_device(dev);
                return -ENODEV;
        }

        switch (cmd) {
        case USBDEVFS_CONTROL:
                snoop(&dev->dev, "%s: CONTROL\n", __func__);
                ret = proc_control(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_BULK:
                snoop(&dev->dev, "%s: BULK\n", __func__);
                ret = proc_bulk(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_RESETEP:
                snoop(&dev->dev, "%s: RESETEP\n", __func__);
                ret = proc_resetep(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_RESET:
                snoop(&dev->dev, "%s: RESET\n", __func__);
                ret = proc_resetdevice(ps);
                break;

        case USBDEVFS_CLEAR_HALT:
                snoop(&dev->dev, "%s: CLEAR_HALT\n", __func__);
                ret = proc_clearhalt(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_GETDRIVER:
                snoop(&dev->dev, "%s: GETDRIVER\n", __func__);
                ret = proc_getdriver(ps, p);
                break;

        case USBDEVFS_CONNECTINFO:
                snoop(&dev->dev, "%s: CONNECTINFO\n", __func__);
                ret = proc_connectinfo(ps, p);
                break;

        case USBDEVFS_SETINTERFACE:
                snoop(&dev->dev, "%s: SETINTERFACE\n", __func__);
                ret = proc_setintf(ps, p);
                break;

        case USBDEVFS_SETCONFIGURATION:
                snoop(&dev->dev, "%s: SETCONFIGURATION\n", __func__);
                ret = proc_setconfig(ps, p);
                break;

        case USBDEVFS_SUBMITURB:
                snoop(&dev->dev, "%s: SUBMITURB\n", __func__);
                ret = proc_submiturb(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

#ifdef CONFIG_COMPAT
        case USBDEVFS_CONTROL32:
                snoop(&dev->dev, "%s: CONTROL32\n", __func__);
                ret = proc_control_compat(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_BULK32:
                snoop(&dev->dev, "%s: BULK32\n", __func__);
                ret = proc_bulk_compat(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_DISCSIGNAL32:
                snoop(&dev->dev, "%s: DISCSIGNAL32\n", __func__);
                ret = proc_disconnectsignal_compat(ps, p);
                break;

        case USBDEVFS_SUBMITURB32:
                snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
                ret = proc_submiturb_compat(ps, p);
                if (ret >= 0)
                        inode_set_mtime_to_ts(inode,
                                              inode_set_ctime_current(inode));
                break;

        case USBDEVFS_IOCTL32:
                snoop(&dev->dev, "%s: IOCTL32\n", __func__);
                ret = proc_ioctl_compat(ps, ptr_to_compat(p));
                break;
#endif

        case USBDEVFS_DISCARDURB:
                snoop(&dev->dev, "%s: DISCARDURB %px\n", __func__, p);
                ret = proc_unlinkurb(ps, p);
                break;

        case USBDEVFS_DISCSIGNAL:
                snoop(&dev->dev, "%s: DISCSIGNAL\n", __func__);
                ret = proc_disconnectsignal(ps, p);
                break;

        case USBDEVFS_CLAIMINTERFACE:
                snoop(&dev->dev, "%s: CLAIMINTERFACE\n", __func__);
                ret = proc_claiminterface(ps, p);
                break;

        case USBDEVFS_RELEASEINTERFACE:
                snoop(&dev->dev, "%s: RELEASEINTERFACE\n", __func__);
                ret = proc_releaseinterface(ps, p);
                break;

        case USBDEVFS_IOCTL:
                snoop(&dev->dev, "%s: IOCTL\n", __func__);
                ret = proc_ioctl_default(ps, p);
                break;

        case USBDEVFS_CLAIM_PORT:
                snoop(&dev->dev, "%s: CLAIM_PORT\n", __func__);
                ret = proc_claim_port(ps, p);
                break;

        case USBDEVFS_RELEASE_PORT:
                snoop(&dev->dev, "%s: RELEASE_PORT\n", __func__);
                ret = proc_release_port(ps, p);
                break;
        case USBDEVFS_GET_CAPABILITIES:
                ret = proc_get_capabilities(ps, p);
                break;
        case USBDEVFS_DISCONNECT_CLAIM:
                ret = proc_disconnect_claim(ps, p);
                break;
        case USBDEVFS_ALLOC_STREAMS:
                ret = proc_alloc_streams(ps, p);
                break;
        case USBDEVFS_FREE_STREAMS:
                ret = proc_free_streams(ps, p);
                break;
        case USBDEVFS_DROP_PRIVILEGES:
                ret = proc_drop_privileges(ps, p);
                break;
        case USBDEVFS_GET_SPEED:
                ret = ps->dev->speed;
                break;
        case USBDEVFS_FORBID_SUSPEND:
                ret = proc_forbid_suspend(ps);
                break;
        case USBDEVFS_ALLOW_SUSPEND:
                ret = proc_allow_suspend(ps);
                break;
        case USBDEVFS_WAIT_FOR_RESUME:
                ret = proc_wait_for_resume(ps);
                break;
        }

        /* Handle variable-length commands */
        switch (cmd & ~IOCSIZE_MASK) {
        case USBDEVFS_CONNINFO_EX(0):
                ret = proc_conninfo_ex(ps, p, _IOC_SIZE(cmd));
                break;
        }

 done:
        usb_unlock_device(dev);
        if (ret >= 0)
                inode_set_atime_to_ts(inode, current_time(inode));
        return ret;
}

static long usbdev_ioctl(struct file *file, unsigned int cmd,
                        unsigned long arg)
{
        int ret;

        ret = usbdev_do_ioctl(file, cmd, (void __user *)arg);

        return ret;
}

/* No kernel lock - fine */
static __poll_t usbdev_poll(struct file *file,
                                struct poll_table_struct *wait)
{
        struct usb_dev_state *ps = file->private_data;
        __poll_t mask = 0;

        poll_wait(file, &ps->wait, wait);
        if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
                mask |= EPOLLOUT | EPOLLWRNORM;
        if (!connected(ps))
                mask |= EPOLLHUP;
        if (list_empty(&ps->list))
                mask |= EPOLLERR;
        return mask;
}

const struct file_operations usbdev_file_operations = {
        .owner =          THIS_MODULE,
        .llseek =          no_seek_end_llseek,
        .read =                  usbdev_read,
        .poll =                  usbdev_poll,
        .unlocked_ioctl = usbdev_ioctl,
        .compat_ioctl =   compat_ptr_ioctl,
        .mmap =           usbdev_mmap,
        .open =                  usbdev_open,
        .release =          usbdev_release,
};

static void usbdev_remove(struct usb_device *udev)
{
        struct usb_dev_state *ps;

        /* Protect against simultaneous resume */
        mutex_lock(&usbfs_mutex);
        while (!list_empty(&udev->filelist)) {
                ps = list_entry(udev->filelist.next, struct usb_dev_state, list);
                destroy_all_async(ps);
                wake_up_all(&ps->wait);
                WRITE_ONCE(ps->not_yet_resumed, 0);
                wake_up_all(&ps->wait_for_resume);
                list_del_init(&ps->list);
                if (ps->discsignr)
                        kill_pid_usb_asyncio(ps->discsignr, EPIPE, ps->disccontext,
                                             ps->disc_pid, ps->cred);
        }
        mutex_unlock(&usbfs_mutex);
}

static int usbdev_notify(struct notifier_block *self,
                               unsigned long action, void *dev)
{
        switch (action) {
        case USB_DEVICE_ADD:
                break;
        case USB_DEVICE_REMOVE:
                usbdev_remove(dev);
                break;
        }
        return NOTIFY_OK;
}

static struct notifier_block usbdev_nb = {
        .notifier_call =        usbdev_notify,
};

static struct cdev usb_device_cdev;

int __init usb_devio_init(void)
{
        int retval;

        retval = register_chrdev_region(USB_DEVICE_DEV, USB_DEVICE_MAX,
                                        "usb_device");
        if (retval) {
                printk(KERN_ERR "Unable to register minors for usb_device\n");
                goto out;
        }
        cdev_init(&usb_device_cdev, &usbdev_file_operations);
        retval = cdev_add(&usb_device_cdev, USB_DEVICE_DEV, USB_DEVICE_MAX);
        if (retval) {
                printk(KERN_ERR "Unable to get usb_device major %d\n",
                       USB_DEVICE_MAJOR);
                goto error_cdev;
        }
        usb_register_notify(&usbdev_nb);
out:
        return retval;

error_cdev:
        unregister_chrdev_region(USB_DEVICE_DEV, USB_DEVICE_MAX);
        goto out;
}

void usb_devio_cleanup(void)
{
        usb_unregister_notify(&usbdev_nb);
        cdev_del(&usb_device_cdev);
        unregister_chrdev_region(USB_DEVICE_DEV, USB_DEVICE_MAX);
}

























































































































































































































   77 






   78 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
// SPDX-License-Identifier: GPL-2.0
/*
 * The USB Monitor, inspired by Dave Harding's USBMon.
 *
 * mon_main.c: Main file, module initiation and exit, registrations, etc.
 *
 * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/mutex.h>

#include "usb_mon.h"


static void mon_stop(struct mon_bus *mbus);
static void mon_dissolve(struct mon_bus *mbus, struct usb_bus *ubus);
static void mon_bus_drop(struct kref *r);
static void mon_bus_init(struct usb_bus *ubus);

DEFINE_MUTEX(mon_lock);

struct mon_bus mon_bus0;                /* Pseudo bus meaning "all buses" */
static LIST_HEAD(mon_buses);                /* All buses we know: struct mon_bus */

/*
 * Link a reader into the bus.
 *
 * This must be called with mon_lock taken because of mbus->ref.
 */
void mon_reader_add(struct mon_bus *mbus, struct mon_reader *r)
{
        unsigned long flags;
        struct list_head *p;

        spin_lock_irqsave(&mbus->lock, flags);
        if (mbus->nreaders == 0) {
                if (mbus == &mon_bus0) {
                        list_for_each (p, &mon_buses) {
                                struct mon_bus *m1;
                                m1 = list_entry(p, struct mon_bus, bus_link);
                                m1->u_bus->monitored = 1;
                        }
                } else {
                        mbus->u_bus->monitored = 1;
                }
        }
        mbus->nreaders++;
        list_add_tail(&r->r_link, &mbus->r_list);
        spin_unlock_irqrestore(&mbus->lock, flags);

        kref_get(&mbus->ref);
}

/*
 * Unlink reader from the bus.
 *
 * This is called with mon_lock taken, so we can decrement mbus->ref.
 */
void mon_reader_del(struct mon_bus *mbus, struct mon_reader *r)
{
        unsigned long flags;

        spin_lock_irqsave(&mbus->lock, flags);
        list_del(&r->r_link);
        --mbus->nreaders;
        if (mbus->nreaders == 0)
                mon_stop(mbus);
        spin_unlock_irqrestore(&mbus->lock, flags);

        kref_put(&mbus->ref, mon_bus_drop);
}

/*
 */
static void mon_bus_submit(struct mon_bus *mbus, struct urb *urb)
{
        unsigned long flags;
        struct mon_reader *r;

        spin_lock_irqsave(&mbus->lock, flags);
        mbus->cnt_events++;
        list_for_each_entry(r, &mbus->r_list, r_link)
                r->rnf_submit(r->r_data, urb);
        spin_unlock_irqrestore(&mbus->lock, flags);
}

static void mon_submit(struct usb_bus *ubus, struct urb *urb)
{
        struct mon_bus *mbus;

        mbus = ubus->mon_bus;
        if (mbus != NULL)
                mon_bus_submit(mbus, urb);
        mon_bus_submit(&mon_bus0, urb);
}

/*
 */
static void mon_bus_submit_error(struct mon_bus *mbus, struct urb *urb, int error)
{
        unsigned long flags;
        struct mon_reader *r;

        spin_lock_irqsave(&mbus->lock, flags);
        mbus->cnt_events++;
        list_for_each_entry(r, &mbus->r_list, r_link)
                r->rnf_error(r->r_data, urb, error);
        spin_unlock_irqrestore(&mbus->lock, flags);
}

static void mon_submit_error(struct usb_bus *ubus, struct urb *urb, int error)
{
        struct mon_bus *mbus;

        mbus = ubus->mon_bus;
        if (mbus != NULL)
                mon_bus_submit_error(mbus, urb, error);
        mon_bus_submit_error(&mon_bus0, urb, error);
}

/*
 */
static void mon_bus_complete(struct mon_bus *mbus, struct urb *urb, int status)
{
        unsigned long flags;
        struct mon_reader *r;

        spin_lock_irqsave(&mbus->lock, flags);
        mbus->cnt_events++;
        list_for_each_entry(r, &mbus->r_list, r_link)
                r->rnf_complete(r->r_data, urb, status);
        spin_unlock_irqrestore(&mbus->lock, flags);
}

static void mon_complete(struct usb_bus *ubus, struct urb *urb, int status)
{
        struct mon_bus *mbus;

        mbus = ubus->mon_bus;
        if (mbus != NULL)
                mon_bus_complete(mbus, urb, status);
        mon_bus_complete(&mon_bus0, urb, status);
}

/* int (*unlink_urb) (struct urb *urb, int status); */

/*
 * Stop monitoring.
 */
static void mon_stop(struct mon_bus *mbus)
{
        struct usb_bus *ubus;

        if (mbus == &mon_bus0) {
                list_for_each_entry(mbus, &mon_buses, bus_link) {
                        /*
                         * We do not change nreaders here, so rely on mon_lock.
                         */
                        if (mbus->nreaders == 0 && (ubus = mbus->u_bus) != NULL)
                                ubus->monitored = 0;
                }
        } else {
                /*
                 * A stop can be called for a dissolved mon_bus in case of
                 * a reader staying across an rmmod foo_hcd, so test ->u_bus.
                 */
                if (mon_bus0.nreaders == 0 && (ubus = mbus->u_bus) != NULL) {
                        ubus->monitored = 0;
                        mb();
                }
        }
}

/*
 * Add a USB bus (usually by a modprobe foo-hcd)
 *
 * This does not return an error code because the core cannot care less
 * if monitoring is not established.
 */
static void mon_bus_add(struct usb_bus *ubus)
{
        mon_bus_init(ubus);
        mutex_lock(&mon_lock);
        if (mon_bus0.nreaders != 0)
                ubus->monitored = 1;
        mutex_unlock(&mon_lock);
}

/*
 * Remove a USB bus (either from rmmod foo-hcd or from a hot-remove event).
 */
static void mon_bus_remove(struct usb_bus *ubus)
{
        struct mon_bus *mbus = ubus->mon_bus;

        mutex_lock(&mon_lock);
        list_del(&mbus->bus_link);
        if (mbus->text_inited)
                mon_text_del(mbus);
        if (mbus->bin_inited)
                mon_bin_del(mbus);

        mon_dissolve(mbus, ubus);
        kref_put(&mbus->ref, mon_bus_drop);
        mutex_unlock(&mon_lock);
}

static int mon_notify(struct notifier_block *self, unsigned long action,
                      void *dev)
{
        switch (action) {
        case USB_BUS_ADD:
                mon_bus_add(dev);
                break;
        case USB_BUS_REMOVE:
                mon_bus_remove(dev);
        }
        return NOTIFY_OK;
}

static struct notifier_block mon_nb = {
        .notifier_call =         mon_notify,
};

/*
 * Ops
 */
static const struct usb_mon_operations mon_ops_0 = {
        .urb_submit =        mon_submit,
        .urb_submit_error = mon_submit_error,
        .urb_complete =        mon_complete,
};

/*
 * Tear usb_bus and mon_bus apart.
 */
static void mon_dissolve(struct mon_bus *mbus, struct usb_bus *ubus)
{

        if (ubus->monitored) {
                ubus->monitored = 0;
                mb();
        }

        ubus->mon_bus = NULL;
        mbus->u_bus = NULL;
        mb();

        /* We want synchronize_irq() here, but that needs an argument. */
}

/*
 */
static void mon_bus_drop(struct kref *r)
{
        struct mon_bus *mbus = container_of(r, struct mon_bus, ref);
        kfree(mbus);
}

/*
 * Initialize a bus for us:
 *  - allocate mon_bus
 *  - refcount USB bus struct
 *  - link
 */
static void mon_bus_init(struct usb_bus *ubus)
{
        struct mon_bus *mbus;

        mbus = kzalloc(sizeof(struct mon_bus), GFP_KERNEL);
        if (mbus == NULL)
                goto err_alloc;
        kref_init(&mbus->ref);
        spin_lock_init(&mbus->lock);
        INIT_LIST_HEAD(&mbus->r_list);

        /*
         * We don't need to take a reference to ubus, because we receive
         * a notification if the bus is about to be removed.
         */
        mbus->u_bus = ubus;
        ubus->mon_bus = mbus;

        mbus->text_inited = mon_text_add(mbus, ubus);
        mbus->bin_inited = mon_bin_add(mbus, ubus);

        mutex_lock(&mon_lock);
        list_add_tail(&mbus->bus_link, &mon_buses);
        mutex_unlock(&mon_lock);
        return;

err_alloc:
        return;
}

static void mon_bus0_init(void)
{
        struct mon_bus *mbus = &mon_bus0;

        kref_init(&mbus->ref);
        spin_lock_init(&mbus->lock);
        INIT_LIST_HEAD(&mbus->r_list);

        mbus->text_inited = mon_text_add(mbus, NULL);
        mbus->bin_inited = mon_bin_add(mbus, NULL);
}

/*
 * Search a USB bus by number. Notice that USB bus numbers start from one,
 * which we may later use to identify "all" with zero.
 *
 * This function must be called with mon_lock held.
 *
 * This is obviously inefficient and may be revised in the future.
 */
struct mon_bus *mon_bus_lookup(unsigned int num)
{
        struct mon_bus *mbus;

        if (num == 0) {
                return &mon_bus0;
        }
        list_for_each_entry(mbus, &mon_buses, bus_link) {
                if (mbus->u_bus->busnum == num) {
                        return mbus;
                }
        }
        return NULL;
}

static int __init mon_init(void)
{
        struct usb_bus *ubus;
        int rc, id;

        if ((rc = mon_text_init()) != 0)
                goto err_text;
        if ((rc = mon_bin_init()) != 0)
                goto err_bin;

        mon_bus0_init();

        if (usb_mon_register(&mon_ops_0) != 0) {
                printk(KERN_NOTICE TAG ": unable to register with the core\n");
                rc = -ENODEV;
                goto err_reg;
        }
        // MOD_INC_USE_COUNT(which_module?);

        mutex_lock(&usb_bus_idr_lock);
        idr_for_each_entry(&usb_bus_idr, ubus, id)
                mon_bus_init(ubus);
        usb_register_notify(&mon_nb);
        mutex_unlock(&usb_bus_idr_lock);
        return 0;

err_reg:
        mon_bin_exit();
err_bin:
        mon_text_exit();
err_text:
        return rc;
}

static void __exit mon_exit(void)
{
        struct mon_bus *mbus;
        struct list_head *p;

        usb_unregister_notify(&mon_nb);
        usb_mon_deregister();

        mutex_lock(&mon_lock);

        while (!list_empty(&mon_buses)) {
                p = mon_buses.next;
                mbus = list_entry(p, struct mon_bus, bus_link);
                list_del(p);

                if (mbus->text_inited)
                        mon_text_del(mbus);
                if (mbus->bin_inited)
                        mon_bin_del(mbus);

                /*
                 * This never happens, because the open/close paths in
                 * file level maintain module use counters and so rmmod fails
                 * before reaching here. However, better be safe...
                 */
                if (mbus->nreaders) {
                        printk(KERN_ERR TAG
                            ": Outstanding opens (%d) on usb%d, leaking...\n",
                            mbus->nreaders, mbus->u_bus->busnum);
                        kref_get(&mbus->ref); /* Force leak */
                }

                mon_dissolve(mbus, mbus->u_bus);
                kref_put(&mbus->ref, mon_bus_drop);
        }

        mbus = &mon_bus0;
        if (mbus->text_inited)
                mon_text_del(mbus);
        if (mbus->bin_inited)
                mon_bin_del(mbus);

        mutex_unlock(&mon_lock);

        mon_text_exit();
        mon_bin_exit();
}

module_init(mon_init);
module_exit(mon_exit);

MODULE_LICENSE("GPL");




























   18 








   18 

   18 




















   18 
   18 
   17 









   18 





   18 



   18 



   18 

























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
// SPDX-License-Identifier: GPL-2.0
/*
 * Convert integer string representation to an integer.
 * If an integer doesn't fit into specified type, -E is returned.
 *
 * Integer starts with optional sign.
 * kstrtou*() functions do not accept sign "-".
 *
 * Radix 0 means autodetection: leading "0x" implies radix 16,
 * leading "0" implies radix 8, otherwise radix is 10.
 * Autodetection hints work after optional sign, but not before.
 *
 * If -E is returned, result is not touched.
 */
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/kstrtox.h>
#include <linux/math64.h>
#include <linux/types.h>
#include <linux/uaccess.h>

#include "kstrtox.h"

noinline
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
{
        if (*base == 0) {
                if (s[0] == '0') {
                        if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
                                *base = 16;
                        else
                                *base = 8;
                } else
                        *base = 10;
        }
        if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
                s += 2;
        return s;
}

/*
 * Convert non-negative integer string representation in explicitly given radix
 * to an integer. A maximum of max_chars characters will be converted.
 *
 * Return number of characters consumed maybe or-ed with overflow bit.
 * If overflow occurs, result integer (incorrect) is still returned.
 *
 * Don't you dare use this function.
 */
noinline
unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
                                  size_t max_chars)
{
        unsigned long long res;
        unsigned int rv;

        res = 0;
        rv = 0;
        while (max_chars--) {
                unsigned int c = *s;
                unsigned int lc = _tolower(c);
                unsigned int val;

                if ('0' <= c && c <= '9')
                        val = c - '0';
                else if ('a' <= lc && lc <= 'f')
                        val = lc - 'a' + 10;
                else
                        break;

                if (val >= base)
                        break;
                /*
                 * Check for overflow only if we are within range of
                 * it in the max base we support (16)
                 */
                if (unlikely(res & (~0ull << 60))) {
                        if (res > div_u64(ULLONG_MAX - val, base))
                                rv |= KSTRTOX_OVERFLOW;
                }
                res = res * base + val;
                rv++;
                s++;
        }
        *p = res;
        return rv;
}

noinline
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
{
        return _parse_integer_limit(s, base, p, INT_MAX);
}

static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
        unsigned long long _res;
        unsigned int rv;

        s = _parse_integer_fixup_radix(s, &base);
        rv = _parse_integer(s, base, &_res);
        if (rv & KSTRTOX_OVERFLOW)
                return -ERANGE;
        if (rv == 0)
                return -EINVAL;
        s += rv;
        if (*s == '\n')
                s++;
        if (*s)
                return -EINVAL;
        *res = _res;
        return 0;
}

/**
 * kstrtoull - convert a string to an unsigned long long
 * @s: The start of the string. The string must be null-terminated, and may also
 *  include a single newline before its terminating null. The first character
 *  may also be a plus sign, but not a minus sign.
 * @base: The number base to use. The maximum supported base is 16. If base is
 *  given as 0, then the base of the string is automatically detected with the
 *  conventional semantics - If it begins with 0x the number will be parsed as a
 *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
 *  parsed as an octal number. Otherwise it will be parsed as a decimal.
 * @res: Where to write the result of the conversion on success.
 *
 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
 * Preferred over simple_strtoull(). Return code must be checked.
 */
noinline
int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
        if (s[0] == '+')
                s++;
        return _kstrtoull(s, base, res);
}
EXPORT_SYMBOL(kstrtoull);

/**
 * kstrtoll - convert a string to a long long
 * @s: The start of the string. The string must be null-terminated, and may also
 *  include a single newline before its terminating null. The first character
 *  may also be a plus sign or a minus sign.
 * @base: The number base to use. The maximum supported base is 16. If base is
 *  given as 0, then the base of the string is automatically detected with the
 *  conventional semantics - If it begins with 0x the number will be parsed as a
 *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
 *  parsed as an octal number. Otherwise it will be parsed as a decimal.
 * @res: Where to write the result of the conversion on success.
 *
 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
 * Preferred over simple_strtoll(). Return code must be checked.
 */
noinline
int kstrtoll(const char *s, unsigned int base, long long *res)
{
        unsigned long long tmp;
        int rv;

        if (s[0] == '-') {
                rv = _kstrtoull(s + 1, base, &tmp);
                if (rv < 0)
                        return rv;
                if ((long long)-tmp > 0)
                        return -ERANGE;
                *res = -tmp;
        } else {
                rv = kstrtoull(s, base, &tmp);
                if (rv < 0)
                        return rv;
                if ((long long)tmp < 0)
                        return -ERANGE;
                *res = tmp;
        }
        return 0;
}
EXPORT_SYMBOL(kstrtoll);

/* Internal, do not use. */
int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
{
        unsigned long long tmp;
        int rv;

        rv = kstrtoull(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (unsigned long)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(_kstrtoul);

/* Internal, do not use. */
int _kstrtol(const char *s, unsigned int base, long *res)
{
        long long tmp;
        int rv;

        rv = kstrtoll(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (long)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(_kstrtol);

/**
 * kstrtouint - convert a string to an unsigned int
 * @s: The start of the string. The string must be null-terminated, and may also
 *  include a single newline before its terminating null. The first character
 *  may also be a plus sign, but not a minus sign.
 * @base: The number base to use. The maximum supported base is 16. If base is
 *  given as 0, then the base of the string is automatically detected with the
 *  conventional semantics - If it begins with 0x the number will be parsed as a
 *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
 *  parsed as an octal number. Otherwise it will be parsed as a decimal.
 * @res: Where to write the result of the conversion on success.
 *
 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
 * Preferred over simple_strtoul(). Return code must be checked.
 */
noinline
int kstrtouint(const char *s, unsigned int base, unsigned int *res)
{
        unsigned long long tmp;
        int rv;

        rv = kstrtoull(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (unsigned int)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtouint);

/**
 * kstrtoint - convert a string to an int
 * @s: The start of the string. The string must be null-terminated, and may also
 *  include a single newline before its terminating null. The first character
 *  may also be a plus sign or a minus sign.
 * @base: The number base to use. The maximum supported base is 16. If base is
 *  given as 0, then the base of the string is automatically detected with the
 *  conventional semantics - If it begins with 0x the number will be parsed as a
 *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
 *  parsed as an octal number. Otherwise it will be parsed as a decimal.
 * @res: Where to write the result of the conversion on success.
 *
 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
 * Preferred over simple_strtol(). Return code must be checked.
 */
noinline
int kstrtoint(const char *s, unsigned int base, int *res)
{
        long long tmp;
        int rv;

        rv = kstrtoll(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (int)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtoint);

noinline
int kstrtou16(const char *s, unsigned int base, u16 *res)
{
        unsigned long long tmp;
        int rv;

        rv = kstrtoull(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (u16)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtou16);

noinline
int kstrtos16(const char *s, unsigned int base, s16 *res)
{
        long long tmp;
        int rv;

        rv = kstrtoll(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (s16)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtos16);

noinline
int kstrtou8(const char *s, unsigned int base, u8 *res)
{
        unsigned long long tmp;
        int rv;

        rv = kstrtoull(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (u8)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtou8);

noinline
int kstrtos8(const char *s, unsigned int base, s8 *res)
{
        long long tmp;
        int rv;

        rv = kstrtoll(s, base, &tmp);
        if (rv < 0)
                return rv;
        if (tmp != (s8)tmp)
                return -ERANGE;
        *res = tmp;
        return 0;
}
EXPORT_SYMBOL(kstrtos8);

/**
 * kstrtobool - convert common user inputs into boolean values
 * @s: input string
 * @res: result
 *
 * This routine returns 0 iff the first character is one of 'YyTt1NnFf0', or
 * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
 * pointed to by res is updated upon finding a match.
 */
noinline
int kstrtobool(const char *s, bool *res)
{
        if (!s)
                return -EINVAL;

        switch (s[0]) {
        case 'y':
        case 'Y':
        case 't':
        case 'T':
        case '1':
                *res = true;
                return 0;
        case 'n':
        case 'N':
        case 'f':
        case 'F':
        case '0':
                *res = false;
                return 0;
        case 'o':
        case 'O':
                switch (s[1]) {
                case 'n':
                case 'N':
                        *res = true;
                        return 0;
                case 'f':
                case 'F':
                        *res = false;
                        return 0;
                default:
                        break;
                }
                break;
        default:
                break;
        }

        return -EINVAL;
}
EXPORT_SYMBOL(kstrtobool);

/*
 * Since "base" would be a nonsense argument, this open-codes the
 * _from_user helper instead of using the helper macro below.
 */
int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
{
        /* Longest string needed to differentiate, newline, terminator */
        char buf[4];

        count = min(count, sizeof(buf) - 1);
        if (copy_from_user(buf, s, count))
                return -EFAULT;
        buf[count] = '\0';
        return kstrtobool(buf, res);
}
EXPORT_SYMBOL(kstrtobool_from_user);

#define kstrto_from_user(f, g, type)                                        \
int f(const char __user *s, size_t count, unsigned int base, type *res)        \
{                                                                        \
        /* sign, base 2 representation, newline, terminator */                \
        char buf[1 + sizeof(type) * 8 + 1 + 1];                                \
                                                                        \
        count = min(count, sizeof(buf) - 1);                                \
        if (copy_from_user(buf, s, count))                                \
                return -EFAULT;                                                \
        buf[count] = '\0';                                                \
        return g(buf, base, res);                                        \
}                                                                        \
EXPORT_SYMBOL(f)

kstrto_from_user(kstrtoull_from_user,        kstrtoull,        unsigned long long);
kstrto_from_user(kstrtoll_from_user,        kstrtoll,        long long);
kstrto_from_user(kstrtoul_from_user,        kstrtoul,        unsigned long);
kstrto_from_user(kstrtol_from_user,        kstrtol,        long);
kstrto_from_user(kstrtouint_from_user,        kstrtouint,        unsigned int);
kstrto_from_user(kstrtoint_from_user,        kstrtoint,        int);
kstrto_from_user(kstrtou16_from_user,        kstrtou16,        u16);
kstrto_from_user(kstrtos16_from_user,        kstrtos16,        s16);
kstrto_from_user(kstrtou8_from_user,        kstrtou8,        u8);
kstrto_from_user(kstrtos8_from_user,        kstrtos8,        s8);

















































































































































































































  411 








  402 






















  176 




  392 


   83 
  402 


































  176 











  391 













  176 

  176 

  176 




   62 

   63 

   62 




























































































































































































   21 


    8 


   14 

  108 
    4 
   60 



  411 













   22 

   47 















    8 

    6 


    9 


















   22 
   22 













   20 

   15 


   66 






























































































   89 




   90 





















































   21 














   15 









   14 











    5 










    6 

























  110 




















  110 
























































































































































































   19 







  102 














   49 



























   49 
   49 
   49 































































    8 




    8 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Macros for manipulating and testing page->flags
 */

#ifndef PAGE_FLAGS_H
#define PAGE_FLAGS_H

#include <linux/types.h>
#include <linux/bug.h>
#include <linux/mmdebug.h>
#ifndef __GENERATING_BOUNDS_H
#include <linux/mm_types.h>
#include <generated/bounds.h>
#endif /* !__GENERATING_BOUNDS_H */

/*
 * Various page->flags bits:
 *
 * PG_reserved is set for special pages. The "struct page" of such a page
 * should in general not be touched (e.g. set dirty) except by its owner.
 * Pages marked as PG_reserved include:
 * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS,
 *   initrd, HW tables)
 * - Pages reserved or allocated early during boot (before the page allocator
 *   was initialized). This includes (depending on the architecture) the
 *   initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much
 *   much more. Once (if ever) freed, PG_reserved is cleared and they will
 *   be given to the page allocator.
 * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
 *   to read/write these pages might end badly. Don't touch!
 * - The zero page(s)
 * - Pages not added to the page allocator when onlining a section because
 *   they were excluded via the online_page_callback() or because they are
 *   PG_hwpoison.
 * - Pages allocated in the context of kexec/kdump (loaded kernel image,
 *   control pages, vmcoreinfo)
 * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
 *   not marked PG_reserved (as they might be in use by somebody else who does
 *   not respect the caching strategy).
 * - Pages part of an offline section (struct pages of offline sections should
 *   not be trusted as they will be initialized when first onlined).
 * - MCA pages on ia64
 * - Pages holding CPU notes for POWER Firmware Assisted Dump
 * - Device memory (e.g. PMEM, DAX, HMM)
 * Some PG_reserved pages will be excluded from the hibernation image.
 * PG_reserved does in general not hinder anybody from dumping or swapping
 * and is no longer required for remap_pfn_range(). ioremap might require it.
 * Consequently, PG_reserved for a page mapped into user space can indicate
 * the zero page, the vDSO, MMIO pages or device memory.
 *
 * The PG_private bitflag is set on pagecache pages if they contain filesystem
 * specific data (which is normally at page->private). It can be used by
 * private allocations for its own usage.
 *
 * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
 * and cleared when writeback _starts_ or when read _completes_. PG_writeback
 * is set before writeback starts and cleared when it finishes.
 *
 * PG_locked also pins a page in pagecache, and blocks truncation of the file
 * while it is held.
 *
 * page_waitqueue(page) is a wait queue of all tasks waiting for the page
 * to become unlocked.
 *
 * PG_swapbacked is set when a page uses swap as a backing storage.  This are
 * usually PageAnon or shmem pages but please note that even anonymous pages
 * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
 * a result of MADV_FREE).
 *
 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
 * file-backed pagecache (see mm/vmscan.c).
 *
 * PG_error is set to indicate that an I/O error occurred on this page.
 *
 * PG_arch_1 is an architecture specific page state bit.  The generic code
 * guarantees that this bit is cleared for a page when it first is entered into
 * the page cache.
 *
 * PG_hwpoison indicates that a page got corrupted in hardware and contains
 * data with incorrect ECC bits that triggered a machine check. Accessing is
 * not safe since it may cause another machine check. Don't touch!
 */

/*
 * Don't use the pageflags directly.  Use the PageFoo macros.
 *
 * The page flags field is split into two parts, the main flags area
 * which extends from the low bits upwards, and the fields area which
 * extends from the high bits downwards.
 *
 *  | FIELD | ... | FLAGS |
 *  N-1           ^       0
 *               (NR_PAGEFLAGS)
 *
 * The fields area is reserved for fields mapping zone, node (for NUMA) and
 * SPARSEMEM section (for variants of SPARSEMEM that require section ids like
 * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP).
 */
enum pageflags {
        PG_locked,                /* Page is locked. Don't touch. */
        PG_writeback,                /* Page is under writeback */
        PG_referenced,
        PG_uptodate,
        PG_dirty,
        PG_lru,
        PG_head,                /* Must be in bit 6 */
        PG_waiters,                /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
        PG_active,
        PG_workingset,
        PG_error,
        PG_slab,
        PG_owner_priv_1,        /* Owner use. If pagecache, fs may use*/
        PG_arch_1,
        PG_reserved,
        PG_private,                /* If pagecache, has fs-private data */
        PG_private_2,                /* If pagecache, has fs aux data */
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,                /* To be reclaimed asap */
        PG_swapbacked,                /* Page is backed by RAM/swap */
        PG_unevictable,                /* Page is "unevictable"  */
#ifdef CONFIG_MMU
        PG_mlocked,                /* Page is vma mlocked */
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
        PG_uncached,                /* Page has been mapped as uncached */
#endif
#ifdef CONFIG_MEMORY_FAILURE
        PG_hwpoison,                /* hardware poisoned page. Don't touch */
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
        PG_young,
        PG_idle,
#endif
#ifdef CONFIG_ARCH_USES_PG_ARCH_X
        PG_arch_2,
        PG_arch_3,
#endif
        __NR_PAGEFLAGS,

        PG_readahead = PG_reclaim,

        /*
         * Depending on the way an anonymous folio can be mapped into a page
         * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped
         * THP), PG_anon_exclusive may be set only for the head page or for
         * tail pages of an anonymous folio. For now, we only expect it to be
         * set on tail pages for PTE-mapped THP.
         */
        PG_anon_exclusive = PG_mappedtodisk,

        /* Filesystems */
        PG_checked = PG_owner_priv_1,

        /* SwapBacked */
        PG_swapcache = PG_owner_priv_1,        /* Swap page: swp_entry_t in private */

        /* Two page bits are conscripted by FS-Cache to maintain local caching
         * state.  These bits are set on pages belonging to the netfs's inodes
         * when those inodes are being locally cached.
         */
        PG_fscache = PG_private_2,        /* page backed by cache */

        /* XEN */
        /* Pinned in Xen as a read-only pagetable page. */
        PG_pinned = PG_owner_priv_1,
        /* Pinned as part of domain save (see xen_mm_pin_all()). */
        PG_savepinned = PG_dirty,
        /* Has a grant mapping of another (foreign) domain's page. */
        PG_foreign = PG_owner_priv_1,
        /* Remapped by swiotlb-xen. */
        PG_xen_remapped = PG_owner_priv_1,

        /* non-lru isolated movable page */
        PG_isolated = PG_reclaim,

        /* Only valid for buddy pages. Used to track pages that are reported */
        PG_reported = PG_uptodate,

#ifdef CONFIG_MEMORY_HOTPLUG
        /* For self-hosted memmap pages */
        PG_vmemmap_self_hosted = PG_owner_priv_1,
#endif

        /*
         * Flags only valid for compound pages.  Stored in first tail page's
         * flags word.  Cannot use the first 8 flags or any flag marked as
         * PF_ANY.
         */

        /* At least one page in this folio has the hwpoison flag set */
        PG_has_hwpoisoned = PG_error,
        PG_large_rmappable = PG_workingset, /* anon or file-backed */
};

#define PAGEFLAGS_MASK                ((1UL << NR_PAGEFLAGS) - 1)

#ifndef __GENERATING_BOUNDS_H

#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);

/*
 * Return the real head page struct iff the @page is a fake head page, otherwise
 * return the @page itself. See Documentation/mm/vmemmap_dedup.rst.
 */
static __always_inline const struct page *page_fixed_fake_head(const struct page *page)
{
        if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
                return page;

        /*
         * Only addresses aligned with PAGE_SIZE of struct page may be fake head
         * struct page. The alignment check aims to avoid access the fields (
         * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly)
         * cold cacheline in some cases.
         */
        if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) &&
            test_bit(PG_head, &page->flags)) {
                /*
                 * We can safely access the field of the @page[1] with PG_head
                 * because the @page is a compound page composed with at least
                 * two contiguous pages.
                 */
                unsigned long head = READ_ONCE(page[1].compound_head);

                if (likely(head & 1))
                        return (const struct page *)(head - 1);
        }
        return page;
}
#else
static inline const struct page *page_fixed_fake_head(const struct page *page)
{
        return page;
}
#endif

static __always_inline int page_is_fake_head(const struct page *page)
{
        return page_fixed_fake_head(page) != page;
}

static inline unsigned long _compound_head(const struct page *page)
{
        unsigned long head = READ_ONCE(page->compound_head);

        if (unlikely(head & 1))
                return head - 1;
        return (unsigned long)page_fixed_fake_head(page);
}

#define compound_head(page)        ((typeof(page))_compound_head(page))

/**
 * page_folio - Converts from page to folio.
 * @p: The page.
 *
 * Every page is part of a folio.  This function cannot be called on a
 * NULL pointer.
 *
 * Context: No reference, nor lock is required on @page.  If the caller
 * does not hold a reference, this call may race with a folio split, so
 * it should re-check the folio still contains this page after gaining
 * a reference on the folio.
 * Return: The folio which contains this page.
 */
#define page_folio(p)                (_Generic((p),                                \
        const struct page *:        (const struct folio *)_compound_head(p), \
        struct page *:                (struct folio *)_compound_head(p)))

/**
 * folio_page - Return a page from a folio.
 * @folio: The folio.
 * @n: The page number to return.
 *
 * @n is relative to the start of the folio.  This function does not
 * check that the page number lies within @folio; the caller is presumed
 * to have a reference to the page.
 */
#define folio_page(folio, n)        nth_page(&(folio)->page, n)

static __always_inline int PageTail(const struct page *page)
{
        return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
}

static __always_inline int PageCompound(const struct page *page)
{
        return test_bit(PG_head, &page->flags) ||
               READ_ONCE(page->compound_head) & 1;
}

#define        PAGE_POISON_PATTERN        -1l
static inline int PagePoisoned(const struct page *page)
{
        return READ_ONCE(page->flags) == PAGE_POISON_PATTERN;
}

#ifdef CONFIG_DEBUG_VM
void page_init_poison(struct page *page, size_t size);
#else
static inline void page_init_poison(struct page *page, size_t size)
{
}
#endif

static const unsigned long *const_folio_flags(const struct folio *folio,
                unsigned n)
{
        const struct page *page = &folio->page;

        VM_BUG_ON_PGFLAGS(PageTail(page), page);
        VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
        return &page[n].flags;
}

static unsigned long *folio_flags(struct folio *folio, unsigned n)
{
        struct page *page = &folio->page;

        VM_BUG_ON_PGFLAGS(PageTail(page), page);
        VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
        return &page[n].flags;
}

/*
 * Page flags policies wrt compound pages
 *
 * PF_POISONED_CHECK
 *     check if this struct page poisoned/uninitialized
 *
 * PF_ANY:
 *     the page flag is relevant for small, head and tail pages.
 *
 * PF_HEAD:
 *     for compound page all operations related to the page flag applied to
 *     head page.
 *
 * PF_NO_TAIL:
 *     modifications of the page flag must be done on small or head pages,
 *     checks can be done on tail pages too.
 *
 * PF_NO_COMPOUND:
 *     the page flag is not relevant for compound pages.
 *
 * PF_SECOND:
 *     the page flag is stored in the first tail page.
 */
#define PF_POISONED_CHECK(page) ({                                        \
                VM_BUG_ON_PGFLAGS(PagePoisoned(page), page);                \
                page; })
#define PF_ANY(page, enforce)        PF_POISONED_CHECK(page)
#define PF_HEAD(page, enforce)        PF_POISONED_CHECK(compound_head(page))
#define PF_NO_TAIL(page, enforce) ({                                        \
                VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);        \
                PF_POISONED_CHECK(compound_head(page)); })
#define PF_NO_COMPOUND(page, enforce) ({                                \
                VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page);        \
                PF_POISONED_CHECK(page); })
#define PF_SECOND(page, enforce) ({                                        \
                VM_BUG_ON_PGFLAGS(!PageHead(page), page);                \
                PF_POISONED_CHECK(&page[1]); })

/* Which page is the flag stored in */
#define FOLIO_PF_ANY                0
#define FOLIO_PF_HEAD                0
#define FOLIO_PF_NO_TAIL        0
#define FOLIO_PF_NO_COMPOUND        0
#define FOLIO_PF_SECOND                1

#define FOLIO_HEAD_PAGE                0
#define FOLIO_SECOND_PAGE        1

/*
 * Macros to create function definitions for page flags
 */
#define FOLIO_TEST_FLAG(name, page)                                        \
static __always_inline bool folio_test_##name(const struct folio *folio) \
{ return test_bit(PG_##name, const_folio_flags(folio, page)); }

#define FOLIO_SET_FLAG(name, page)                                        \
static __always_inline void folio_set_##name(struct folio *folio)        \
{ set_bit(PG_##name, folio_flags(folio, page)); }

#define FOLIO_CLEAR_FLAG(name, page)                                        \
static __always_inline void folio_clear_##name(struct folio *folio)        \
{ clear_bit(PG_##name, folio_flags(folio, page)); }

#define __FOLIO_SET_FLAG(name, page)                                        \
static __always_inline void __folio_set_##name(struct folio *folio)        \
{ __set_bit(PG_##name, folio_flags(folio, page)); }

#define __FOLIO_CLEAR_FLAG(name, page)                                        \
static __always_inline void __folio_clear_##name(struct folio *folio)        \
{ __clear_bit(PG_##name, folio_flags(folio, page)); }

#define FOLIO_TEST_SET_FLAG(name, page)                                        \
static __always_inline bool folio_test_set_##name(struct folio *folio)        \
{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }

#define FOLIO_TEST_CLEAR_FLAG(name, page)                                \
static __always_inline bool folio_test_clear_##name(struct folio *folio) \
{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }

#define FOLIO_FLAG(name, page)                                                \
FOLIO_TEST_FLAG(name, page)                                                \
FOLIO_SET_FLAG(name, page)                                                \
FOLIO_CLEAR_FLAG(name, page)

#define TESTPAGEFLAG(uname, lname, policy)                                \
FOLIO_TEST_FLAG(lname, FOLIO_##policy)                                        \
static __always_inline int Page##uname(const struct page *page)                \
{ return test_bit(PG_##lname, &policy(page, 0)->flags); }

#define SETPAGEFLAG(uname, lname, policy)                                \
FOLIO_SET_FLAG(lname, FOLIO_##policy)                                        \
static __always_inline void SetPage##uname(struct page *page)                \
{ set_bit(PG_##lname, &policy(page, 1)->flags); }

#define CLEARPAGEFLAG(uname, lname, policy)                                \
FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)                                        \
static __always_inline void ClearPage##uname(struct page *page)                \
{ clear_bit(PG_##lname, &policy(page, 1)->flags); }

#define __SETPAGEFLAG(uname, lname, policy)                                \
__FOLIO_SET_FLAG(lname, FOLIO_##policy)                                        \
static __always_inline void __SetPage##uname(struct page *page)                \
{ __set_bit(PG_##lname, &policy(page, 1)->flags); }

#define __CLEARPAGEFLAG(uname, lname, policy)                                \
__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)                                \
static __always_inline void __ClearPage##uname(struct page *page)        \
{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }

#define TESTSETFLAG(uname, lname, policy)                                \
FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy)                                \
static __always_inline int TestSetPage##uname(struct page *page)        \
{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }

#define TESTCLEARFLAG(uname, lname, policy)                                \
FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy)                                \
static __always_inline int TestClearPage##uname(struct page *page)        \
{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }

#define PAGEFLAG(uname, lname, policy)                                        \
        TESTPAGEFLAG(uname, lname, policy)                                \
        SETPAGEFLAG(uname, lname, policy)                                \
        CLEARPAGEFLAG(uname, lname, policy)

#define __PAGEFLAG(uname, lname, policy)                                \
        TESTPAGEFLAG(uname, lname, policy)                                \
        __SETPAGEFLAG(uname, lname, policy)                                \
        __CLEARPAGEFLAG(uname, lname, policy)

#define TESTSCFLAG(uname, lname, policy)                                \
        TESTSETFLAG(uname, lname, policy)                                \
        TESTCLEARFLAG(uname, lname, policy)

#define FOLIO_TEST_FLAG_FALSE(name)                                        \
static inline bool folio_test_##name(const struct folio *folio)                \
{ return false; }
#define FOLIO_SET_FLAG_NOOP(name)                                        \
static inline void folio_set_##name(struct folio *folio) { }
#define FOLIO_CLEAR_FLAG_NOOP(name)                                        \
static inline void folio_clear_##name(struct folio *folio) { }
#define __FOLIO_SET_FLAG_NOOP(name)                                        \
static inline void __folio_set_##name(struct folio *folio) { }
#define __FOLIO_CLEAR_FLAG_NOOP(name)                                        \
static inline void __folio_clear_##name(struct folio *folio) { }
#define FOLIO_TEST_SET_FLAG_FALSE(name)                                        \
static inline bool folio_test_set_##name(struct folio *folio)                \
{ return false; }
#define FOLIO_TEST_CLEAR_FLAG_FALSE(name)                                \
static inline bool folio_test_clear_##name(struct folio *folio)                \
{ return false; }

#define FOLIO_FLAG_FALSE(name)                                                \
FOLIO_TEST_FLAG_FALSE(name)                                                \
FOLIO_SET_FLAG_NOOP(name)                                                \
FOLIO_CLEAR_FLAG_NOOP(name)

#define TESTPAGEFLAG_FALSE(uname, lname)                                \
FOLIO_TEST_FLAG_FALSE(lname)                                                \
static inline int Page##uname(const struct page *page) { return 0; }

#define SETPAGEFLAG_NOOP(uname, lname)                                        \
FOLIO_SET_FLAG_NOOP(lname)                                                \
static inline void SetPage##uname(struct page *page) {  }

#define CLEARPAGEFLAG_NOOP(uname, lname)                                \
FOLIO_CLEAR_FLAG_NOOP(lname)                                                \
static inline void ClearPage##uname(struct page *page) {  }

#define __CLEARPAGEFLAG_NOOP(uname, lname)                                \
__FOLIO_CLEAR_FLAG_NOOP(lname)                                                \
static inline void __ClearPage##uname(struct page *page) {  }

#define TESTSETFLAG_FALSE(uname, lname)                                        \
FOLIO_TEST_SET_FLAG_FALSE(lname)                                        \
static inline int TestSetPage##uname(struct page *page) { return 0; }

#define TESTCLEARFLAG_FALSE(uname, lname)                                \
FOLIO_TEST_CLEAR_FLAG_FALSE(lname)                                        \
static inline int TestClearPage##uname(struct page *page) { return 0; }

#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname)        \
        SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)

#define TESTSCFLAG_FALSE(uname, lname)                                        \
        TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)

__PAGEFLAG(Locked, locked, PF_NO_TAIL)
FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
        TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
        __SETPAGEFLAG(Referenced, referenced, PF_HEAD)
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
        __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
        TESTCLEARFLAG(LRU, lru, PF_HEAD)
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
        TESTCLEARFLAG(Active, active, PF_HEAD)
PAGEFLAG(Workingset, workingset, PF_HEAD)
        TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
__PAGEFLAG(Slab, slab, PF_NO_TAIL)
PAGEFLAG(Checked, checked, PF_NO_COMPOUND)           /* Used by some filesystems */

/* Xen */
PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
        TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND)
PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND);
PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
        TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)

PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
        __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
        __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
        __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
        __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)

/*
 * Private page markings that may be used by the filesystem that owns the page
 * for its own purposes.
 * - PG_private and PG_private_2 cause release_folio() and co to be invoked
 */
PAGEFLAG(Private, private, PF_ANY)
PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
        TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)

/*
 * Only test-and-set exist for PG_writeback.  The unconditional operators are
 * risky: they bypass page accounting.
 */
TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
        TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)

/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
        TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
        TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)

#ifdef CONFIG_HIGHMEM
/*
 * Must use a macro here due to header dependency issues. page_zone() is not
 * available at this point.
 */
#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#define folio_test_highmem(__f)        is_highmem_idx(folio_zonenum(__f))
#else
PAGEFLAG_FALSE(HighMem, highmem)
#endif

#ifdef CONFIG_SWAP
static __always_inline bool folio_test_swapcache(const struct folio *folio)
{
        return folio_test_swapbacked(folio) &&
                        test_bit(PG_swapcache, const_folio_flags(folio, 0));
}

static __always_inline bool PageSwapCache(const struct page *page)
{
        return folio_test_swapcache(page_folio(page));
}

SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
#else
PAGEFLAG_FALSE(SwapCache, swapcache)
#endif

PAGEFLAG(Unevictable, unevictable, PF_HEAD)
        __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD)
        TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD)

#ifdef CONFIG_MMU
PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
        __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
        TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
        TESTSCFLAG_FALSE(Mlocked, mlocked)
#endif

#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
PAGEFLAG_FALSE(Uncached, uncached)
#endif

#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
#define MAGIC_HWPOISON        0x48575053U        /* HWPS */
extern void SetPageHWPoisonTakenOff(struct page *page);
extern void ClearPageHWPoisonTakenOff(struct page *page);
extern bool take_page_off_buddy(struct page *page);
extern bool put_page_back_buddy(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif

#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_FLAG(idle, FOLIO_HEAD_PAGE)
#endif

/*
 * PageReported() is used to track reported free pages within the Buddy
 * allocator. We can use the non-atomic version of the test and set
 * operations as both should be shielded with the zone lock to prevent
 * any possible races on the setting or clearing of the bit.
 */
__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)

#ifdef CONFIG_MEMORY_HOTPLUG
PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY)
#else
PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
#endif

/*
 * On an anonymous page mapped into a user virtual memory area,
 * page->mapping points to its anon_vma, not to a struct address_space;
 * with the PAGE_MAPPING_ANON bit set to distinguish it.  See rmap.h.
 *
 * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
 * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON
 * bit; and then page->mapping points, not to an anon_vma, but to a private
 * structure which KSM associates with that merged page.  See ksm.h.
 *
 * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
 * page and then page->mapping points to a struct movable_operations.
 *
 * Please note that, confusingly, "page_mapping" refers to the inode
 * address_space which maps the page from disk; whereas "page_mapped"
 * refers to user virtual address space into which the page is mapped.
 *
 * For slab pages, since slab reuses the bits in struct page to store its
 * internal states, the page->mapping does not exist as such, nor do these
 * flags below.  So in order to avoid testing non-existent bits, please
 * make sure that PageSlab(page) actually evaluates to false before calling
 * the following functions (e.g., PageAnon).  See mm/slab.h.
 */
#define PAGE_MAPPING_ANON        0x1
#define PAGE_MAPPING_MOVABLE        0x2
#define PAGE_MAPPING_KSM        (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
#define PAGE_MAPPING_FLAGS        (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)

/*
 * Different with flags above, this flag is used only for fsdax mode.  It
 * indicates that this page->mapping is now under reflink case.
 */
#define PAGE_MAPPING_DAX_SHARED        ((void *)0x1)

static __always_inline bool folio_mapping_flags(const struct folio *folio)
{
        return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
}

static __always_inline int PageMappingFlags(const struct page *page)
{
        return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}

static __always_inline bool folio_test_anon(const struct folio *folio)
{
        return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
}

static __always_inline bool PageAnon(const struct page *page)
{
        return folio_test_anon(page_folio(page));
}

static __always_inline bool __folio_test_movable(const struct folio *folio)
{
        return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
                        PAGE_MAPPING_MOVABLE;
}

static __always_inline int __PageMovable(const struct page *page)
{
        return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
                                PAGE_MAPPING_MOVABLE;
}

#ifdef CONFIG_KSM
/*
 * A KSM page is one of those write-protected "shared pages" or "merged pages"
 * which KSM maps into multiple mms, wherever identical anonymous page content
 * is found in VM_MERGEABLE vmas.  It's a PageAnon page, pointing not to any
 * anon_vma, but to that page's node of the stable tree.
 */
static __always_inline bool folio_test_ksm(const struct folio *folio)
{
        return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
                                PAGE_MAPPING_KSM;
}

static __always_inline bool PageKsm(const struct page *page)
{
        return folio_test_ksm(page_folio(page));
}
#else
TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif

u64 stable_page_flags(struct page *page);

/**
 * folio_xor_flags_has_waiters - Change some folio flags.
 * @folio: The folio.
 * @mask: Bits set in this word will be changed.
 *
 * This must only be used for flags which are changed with the folio
 * lock held.  For example, it is unsafe to use for PG_dirty as that
 * can be set without the folio lock held.  It can also only be used
 * on flags which are in the range 0-6 as some of the implementations
 * only affect those bits.
 *
 * Return: Whether there are tasks waiting on the folio.
 */
static inline bool folio_xor_flags_has_waiters(struct folio *folio,
                unsigned long mask)
{
        return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0));
}

/**
 * folio_test_uptodate - Is this folio up to date?
 * @folio: The folio.
 *
 * The uptodate flag is set on a folio when every byte in the folio is
 * at least as new as the corresponding bytes on storage.  Anonymous
 * and CoW folios are always uptodate.  If the folio is not uptodate,
 * some of the bytes in it may be; see the is_partially_uptodate()
 * address_space operation.
 */
static inline bool folio_test_uptodate(const struct folio *folio)
{
        bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0));
        /*
         * Must ensure that the data we read out of the folio is loaded
         * _after_ we've loaded folio->flags to check the uptodate bit.
         * We can skip the barrier if the folio is not uptodate, because
         * we wouldn't be reading anything from it.
         *
         * See folio_mark_uptodate() for the other side of the story.
         */
        if (ret)
                smp_rmb();

        return ret;
}

static inline int PageUptodate(const struct page *page)
{
        return folio_test_uptodate(page_folio(page));
}

static __always_inline void __folio_mark_uptodate(struct folio *folio)
{
        smp_wmb();
        __set_bit(PG_uptodate, folio_flags(folio, 0));
}

static __always_inline void folio_mark_uptodate(struct folio *folio)
{
        /*
         * Memory barrier must be issued before setting the PG_uptodate bit,
         * so that all previous stores issued in order to bring the folio
         * uptodate are actually visible before folio_test_uptodate becomes true.
         */
        smp_wmb();
        set_bit(PG_uptodate, folio_flags(folio, 0));
}

static __always_inline void __SetPageUptodate(struct page *page)
{
        __folio_mark_uptodate((struct folio *)page);
}

static __always_inline void SetPageUptodate(struct page *page)
{
        folio_mark_uptodate((struct folio *)page);
}

CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)

void __folio_start_writeback(struct folio *folio, bool keep_write);
void set_page_writeback(struct page *page);

#define folio_start_writeback(folio)                        \
        __folio_start_writeback(folio, false)
#define folio_start_writeback_keepwrite(folio)        \
        __folio_start_writeback(folio, true)

static __always_inline bool folio_test_head(const struct folio *folio)
{
        return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY));
}

static __always_inline int PageHead(const struct page *page)
{
        PF_POISONED_CHECK(page);
        return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
}

__SETPAGEFLAG(Head, head, PF_ANY)
__CLEARPAGEFLAG(Head, head, PF_ANY)
CLEARPAGEFLAG(Head, head, PF_ANY)

/**
 * folio_test_large() - Does this folio contain more than one page?
 * @folio: The folio to test.
 *
 * Return: True if the folio is larger than one page.
 */
static inline bool folio_test_large(const struct folio *folio)
{
        return folio_test_head(folio);
}

static __always_inline void set_compound_head(struct page *page, struct page *head)
{
        WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
}

static __always_inline void clear_compound_head(struct page *page)
{
        WRITE_ONCE(page->compound_head, 0);
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void ClearPageCompound(struct page *page)
{
        BUG_ON(!PageHead(page));
        ClearPageHead(page);
}
PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND)
#else
TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
#endif

#define PG_head_mask ((1UL << PG_head))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
 * PageHuge() only returns true for hugetlbfs pages, but not for
 * normal or transparent huge pages.
 *
 * PageTransHuge() returns true for both transparent huge and
 * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
 * called only in the core VM paths where hugetlbfs pages can't exist.
 */
static inline int PageTransHuge(const struct page *page)
{
        VM_BUG_ON_PAGE(PageTail(page), page);
        return PageHead(page);
}

/*
 * PageTransCompound returns true for both transparent huge pages
 * and hugetlbfs pages, so it should only be called when it's known
 * that hugetlbfs pages aren't involved.
 */
static inline int PageTransCompound(const struct page *page)
{
        return PageCompound(page);
}

/*
 * PageTransTail returns true for both transparent huge pages
 * and hugetlbfs pages, so it should only be called when it's known
 * that hugetlbfs pages aren't involved.
 */
static inline int PageTransTail(const struct page *page)
{
        return PageTail(page);
}
#else
TESTPAGEFLAG_FALSE(TransHuge, transhuge)
TESTPAGEFLAG_FALSE(TransCompound, transcompound)
TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
TESTPAGEFLAG_FALSE(TransTail, transtail)
#endif

#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
/*
 * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
 * compound page.
 *
 * This flag is set by hwpoison handler.  Cleared by THP split or free page.
 */
PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
        TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
#else
PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
        TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
#endif

/*
 * For pages that are never mapped to userspace (and aren't PageSlab),
 * page_type may be used.  Because it is initialised to -1, we invert the
 * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
 * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
 * low bits so that an underflow or overflow of _mapcount won't be
 * mistaken for a page type value.
 */

#define PAGE_TYPE_BASE        0xf0000000
/* Reserve                0x0000007f to catch underflows of _mapcount */
#define PAGE_MAPCOUNT_RESERVE        -128
#define PG_buddy        0x00000080
#define PG_offline        0x00000100
#define PG_table        0x00000200
#define PG_guard        0x00000400
#define PG_hugetlb        0x00000800

#define PageType(page, flag)                                                \
        ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
#define folio_test_type(folio, flag)                                        \
        ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)

static inline int page_type_has_type(unsigned int page_type)
{
        return (int)page_type < PAGE_MAPCOUNT_RESERVE;
}

static inline int page_has_type(const struct page *page)
{
        return page_type_has_type(page->page_type);
}

#define FOLIO_TYPE_OPS(lname, fname)                                        \
static __always_inline bool folio_test_##fname(const struct folio *folio)\
{                                                                        \
        return folio_test_type(folio, PG_##lname);                        \
}                                                                        \
static __always_inline void __folio_set_##fname(struct folio *folio)        \
{                                                                        \
        VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio);                \
        folio->page.page_type &= ~PG_##lname;                                \
}                                                                        \
static __always_inline void __folio_clear_##fname(struct folio *folio)        \
{                                                                        \
        VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio);                \
        folio->page.page_type |= PG_##lname;                                \
}

#define PAGE_TYPE_OPS(uname, lname, fname)                                \
FOLIO_TYPE_OPS(lname, fname)                                                \
static __always_inline int Page##uname(const struct page *page)                \
{                                                                        \
        return PageType(page, PG_##lname);                                \
}                                                                        \
static __always_inline void __SetPage##uname(struct page *page)                \
{                                                                        \
        VM_BUG_ON_PAGE(!PageType(page, 0), page);                        \
        page->page_type &= ~PG_##lname;                                        \
}                                                                        \
static __always_inline void __ClearPage##uname(struct page *page)        \
{                                                                        \
        VM_BUG_ON_PAGE(!Page##uname(page), page);                        \
        page->page_type |= PG_##lname;                                        \
}

/*
 * PageBuddy() indicates that the page is free and in the buddy system
 * (see mm/page_alloc.c).
 */
PAGE_TYPE_OPS(Buddy, buddy, buddy)

/*
 * PageOffline() indicates that the page is logically offline although the
 * containing section is online. (e.g. inflated in a balloon driver or
 * not onlined when onlining the section).
 * The content of these pages is effectively stale. Such pages should not
 * be touched (read/write/dump/save) except by their owner.
 *
 * If a driver wants to allow to offline unmovable PageOffline() pages without
 * putting them back to the buddy, it can do so via the memory notifier by
 * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the
 * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline()
 * pages (now with a reference count of zero) are treated like free pages,
 * allowing the containing memory block to get offlined. A driver that
 * relies on this feature is aware that re-onlining the memory block will
 * require to re-set the pages PageOffline() and not giving them to the
 * buddy via online_page_callback_t.
 *
 * There are drivers that mark a page PageOffline() and expect there won't be
 * any further access to page content. PFN walkers that read content of random
 * pages should check PageOffline() and synchronize with such drivers using
 * page_offline_freeze()/page_offline_thaw().
 */
PAGE_TYPE_OPS(Offline, offline, offline)

extern void page_offline_freeze(void);
extern void page_offline_thaw(void);
extern void page_offline_begin(void);
extern void page_offline_end(void);

/*
 * Marks pages in use as page tables.
 */
PAGE_TYPE_OPS(Table, table, pgtable)

/*
 * Marks guardpages used with debug_pagealloc.
 */
PAGE_TYPE_OPS(Guard, guard, guard)

#ifdef CONFIG_HUGETLB_PAGE
FOLIO_TYPE_OPS(hugetlb, hugetlb)
#else
FOLIO_TEST_FLAG_FALSE(hugetlb)
#endif

/**
 * PageHuge - Determine if the page belongs to hugetlbfs
 * @page: The page to test.
 *
 * Context: Any context.
 * Return: True for hugetlbfs pages, false for anon pages or pages
 * belonging to other filesystems.
 */
static inline bool PageHuge(const struct page *page)
{
        return folio_test_hugetlb(page_folio(page));
}

/*
 * Check if a page is currently marked HWPoisoned. Note that this check is
 * best effort only and inherently racy: there is no way to synchronize with
 * failing hardware.
 */
static inline bool is_page_hwpoison(struct page *page)
{
        if (PageHWPoison(page))
                return true;
        return PageHuge(page) && PageHWPoison(compound_head(page));
}

extern bool is_free_buddy_page(struct page *page);

PAGEFLAG(Isolated, isolated, PF_ANY);

static __always_inline int PageAnonExclusive(const struct page *page)
{
        VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
        VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
        return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
}

static __always_inline void SetPageAnonExclusive(struct page *page)
{
        VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
        VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
        set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
}

static __always_inline void ClearPageAnonExclusive(struct page *page)
{
        VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page);
        VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
        clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
}

static __always_inline void __ClearPageAnonExclusive(struct page *page)
{
        VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
        VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
        __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
}

#ifdef CONFIG_MMU
#define __PG_MLOCKED                (1UL << PG_mlocked)
#else
#define __PG_MLOCKED                0
#endif

/*
 * Flags checked when a page is freed.  Pages being freed should not have
 * these flags set.  If they are, there is a problem.
 */
#define PAGE_FLAGS_CHECK_AT_FREE                                \
        (1UL << PG_lru                | 1UL << PG_locked        |        \
         1UL << PG_private        | 1UL << PG_private_2        |        \
         1UL << PG_writeback        | 1UL << PG_reserved        |        \
         1UL << PG_slab                | 1UL << PG_active         |        \
         1UL << PG_unevictable        | __PG_MLOCKED | LRU_GEN_MASK)

/*
 * Flags checked when a page is prepped for return by the page allocator.
 * Pages being prepped should not have these flags set.  If they are set,
 * there has been a kernel bug or struct page corruption.
 *
 * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
 * alloc-free cycle to prevent from reusing the page.
 */
#define PAGE_FLAGS_CHECK_AT_PREP        \
        ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)

/*
 * Flags stored in the second page of a compound page.  They may overlap
 * the CHECK_AT_FREE flags above, so need to be cleared.
 */
#define PAGE_FLAGS_SECOND                                                \
        (0xffUL /* order */                | 1UL << PG_has_hwpoisoned |        \
         1UL << PG_large_rmappable)

#define PAGE_FLAGS_PRIVATE                                \
        (1UL << PG_private | 1UL << PG_private_2)
/**
 * page_has_private - Determine if page has private stuff
 * @page: The page to be checked
 *
 * Determine if a page has private stuff, indicating that release routines
 * should be invoked upon it.
 */
static inline int page_has_private(const struct page *page)
{
        return !!(page->flags & PAGE_FLAGS_PRIVATE);
}

static inline bool folio_has_private(const struct folio *folio)
{
        return page_has_private(&folio->page);
}

#undef PF_ANY
#undef PF_HEAD
#undef PF_NO_TAIL
#undef PF_NO_COMPOUND
#undef PF_SECOND
#endif /* !__GENERATING_BOUNDS_H */

#endif        /* PAGE_FLAGS_H */






















    2 

    2 



























    6 

    6 





































    7 
    7 


    7 




















































































    3 
    3 
    3 
    3 
    2 































































    6 















































































    8 








    9 






    9 



    3 



    3 
    3 
    3 

    2 


    3 





    2 

    1 
































































































    6 

    6 












    6 
    6 
    6 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/export.h>
#include <linux/bvec.h>
#include <linux/fault-inject-usercopy.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/splice.h>
#include <linux/compat.h>
#include <linux/scatterlist.h>
#include <linux/instrumented.h>
#include <linux/iov_iter.h>

static __always_inline
size_t copy_to_user_iter(void __user *iter_to, size_t progress,
                         size_t len, void *from, void *priv2)
{
        if (should_fail_usercopy())
                return len;
        if (access_ok(iter_to, len)) {
                from += progress;
                instrument_copy_to_user(iter_to, from, len);
                len = raw_copy_to_user(iter_to, from, len);
        }
        return len;
}

static __always_inline
size_t copy_to_user_iter_nofault(void __user *iter_to, size_t progress,
                                 size_t len, void *from, void *priv2)
{
        ssize_t res;

        if (should_fail_usercopy())
                return len;

        from += progress;
        res = copy_to_user_nofault(iter_to, from, len);
        return res < 0 ? len : res;
}

static __always_inline
size_t copy_from_user_iter(void __user *iter_from, size_t progress,
                           size_t len, void *to, void *priv2)
{
        size_t res = len;

        if (should_fail_usercopy())
                return len;
        if (access_ok(iter_from, len)) {
                to += progress;
                instrument_copy_from_user_before(to, iter_from, len);
                res = raw_copy_from_user(to, iter_from, len);
                instrument_copy_from_user_after(to, iter_from, len, res);
        }
        return res;
}

static __always_inline
size_t memcpy_to_iter(void *iter_to, size_t progress,
                      size_t len, void *from, void *priv2)
{
        memcpy(iter_to, from + progress, len);
        return 0;
}

static __always_inline
size_t memcpy_from_iter(void *iter_from, size_t progress,
                        size_t len, void *to, void *priv2)
{
        memcpy(to + progress, iter_from, len);
        return 0;
}

/*
 * fault_in_iov_iter_readable - fault in iov iterator for reading
 * @i: iterator
 * @size: maximum length
 *
 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
 * @size.  For each iovec, fault in each page that constitutes the iovec.
 *
 * Returns the number of bytes not faulted in (like copy_to_user() and
 * copy_from_user()).
 *
 * Always returns 0 for non-userspace iterators.
 */
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
{
        if (iter_is_ubuf(i)) {
                size_t n = min(size, iov_iter_count(i));
                n -= fault_in_readable(i->ubuf + i->iov_offset, n);
                return size - n;
        } else if (iter_is_iovec(i)) {
                size_t count = min(size, iov_iter_count(i));
                const struct iovec *p;
                size_t skip;

                size -= count;
                for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
                        size_t len = min(count, p->iov_len - skip);
                        size_t ret;

                        if (unlikely(!len))
                                continue;
                        ret = fault_in_readable(p->iov_base + skip, len);
                        count -= len - ret;
                        if (ret)
                                break;
                }
                return count + size;
        }
        return 0;
}
EXPORT_SYMBOL(fault_in_iov_iter_readable);

/*
 * fault_in_iov_iter_writeable - fault in iov iterator for writing
 * @i: iterator
 * @size: maximum length
 *
 * Faults in the iterator using get_user_pages(), i.e., without triggering
 * hardware page faults.  This is primarily useful when we already know that
 * some or all of the pages in @i aren't in memory.
 *
 * Returns the number of bytes not faulted in, like copy_to_user() and
 * copy_from_user().
 *
 * Always returns 0 for non-user-space iterators.
 */
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
{
        if (iter_is_ubuf(i)) {
                size_t n = min(size, iov_iter_count(i));
                n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
                return size - n;
        } else if (iter_is_iovec(i)) {
                size_t count = min(size, iov_iter_count(i));
                const struct iovec *p;
                size_t skip;

                size -= count;
                for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
                        size_t len = min(count, p->iov_len - skip);
                        size_t ret;

                        if (unlikely(!len))
                                continue;
                        ret = fault_in_safe_writeable(p->iov_base + skip, len);
                        count -= len - ret;
                        if (ret)
                                break;
                }
                return count + size;
        }
        return 0;
}
EXPORT_SYMBOL(fault_in_iov_iter_writeable);

void iov_iter_init(struct iov_iter *i, unsigned int direction,
                        const struct iovec *iov, unsigned long nr_segs,
                        size_t count)
{
        WARN_ON(direction & ~(READ | WRITE));
        *i = (struct iov_iter) {
                .iter_type = ITER_IOVEC,
                .nofault = false,
                .data_source = direction,
                .__iov = iov,
                .nr_segs = nr_segs,
                .iov_offset = 0,
                .count = count
        };
}
EXPORT_SYMBOL(iov_iter_init);

size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
        if (WARN_ON_ONCE(i->data_source))
                return 0;
        if (user_backed_iter(i))
                might_fault();
        return iterate_and_advance(i, bytes, (void *)addr,
                                   copy_to_user_iter, memcpy_to_iter);
}
EXPORT_SYMBOL(_copy_to_iter);

#ifdef CONFIG_ARCH_HAS_COPY_MC
static __always_inline
size_t copy_to_user_iter_mc(void __user *iter_to, size_t progress,
                            size_t len, void *from, void *priv2)
{
        if (access_ok(iter_to, len)) {
                from += progress;
                instrument_copy_to_user(iter_to, from, len);
                len = copy_mc_to_user(iter_to, from, len);
        }
        return len;
}

static __always_inline
size_t memcpy_to_iter_mc(void *iter_to, size_t progress,
                         size_t len, void *from, void *priv2)
{
        return copy_mc_to_kernel(iter_to, from + progress, len);
}

/**
 * _copy_mc_to_iter - copy to iter with source memory error exception handling
 * @addr: source kernel address
 * @bytes: total transfer length
 * @i: destination iterator
 *
 * The pmem driver deploys this for the dax operation
 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
 * successfully copied.
 *
 * The main differences between this and typical _copy_to_iter().
 *
 * * Typical tail/residue handling after a fault retries the copy
 *   byte-by-byte until the fault happens again. Re-triggering machine
 *   checks is potentially fatal so the implementation uses source
 *   alignment and poison alignment assumptions to avoid re-triggering
 *   hardware exceptions.
 *
 * * ITER_KVEC and ITER_BVEC can return short copies.  Compare to
 *   copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
 *
 * Return: number of bytes copied (may be %0)
 */
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
        if (WARN_ON_ONCE(i->data_source))
                return 0;
        if (user_backed_iter(i))
                might_fault();
        return iterate_and_advance(i, bytes, (void *)addr,
                                   copy_to_user_iter_mc, memcpy_to_iter_mc);
}
EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
#endif /* CONFIG_ARCH_HAS_COPY_MC */

static __always_inline
size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
        return iterate_and_advance(i, bytes, addr,
                                   copy_from_user_iter, memcpy_from_iter);
}

size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
        if (WARN_ON_ONCE(!i->data_source))
                return 0;

        if (user_backed_iter(i))
                might_fault();
        return __copy_from_iter(addr, bytes, i);
}
EXPORT_SYMBOL(_copy_from_iter);

static __always_inline
size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress,
                                   size_t len, void *to, void *priv2)
{
        return __copy_from_user_inatomic_nocache(to + progress, iter_from, len);
}

size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
        if (WARN_ON_ONCE(!i->data_source))
                return 0;

        return iterate_and_advance(i, bytes, addr,
                                   copy_from_user_iter_nocache,
                                   memcpy_from_iter);
}
EXPORT_SYMBOL(_copy_from_iter_nocache);

#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
static __always_inline
size_t copy_from_user_iter_flushcache(void __user *iter_from, size_t progress,
                                      size_t len, void *to, void *priv2)
{
        return __copy_from_user_flushcache(to + progress, iter_from, len);
}

static __always_inline
size_t memcpy_from_iter_flushcache(void *iter_from, size_t progress,
                                   size_t len, void *to, void *priv2)
{
        memcpy_flushcache(to + progress, iter_from, len);
        return 0;
}

/**
 * _copy_from_iter_flushcache - write destination through cpu cache
 * @addr: destination kernel address
 * @bytes: total transfer length
 * @i: source iterator
 *
 * The pmem driver arranges for filesystem-dax to use this facility via
 * dax_copy_from_iter() for ensuring that writes to persistent memory
 * are flushed through the CPU cache. It is differentiated from
 * _copy_from_iter_nocache() in that guarantees all data is flushed for
 * all iterator types. The _copy_from_iter_nocache() only attempts to
 * bypass the cache for the ITER_IOVEC case, and on some archs may use
 * instructions that strand dirty-data in the cache.
 *
 * Return: number of bytes copied (may be %0)
 */
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
        if (WARN_ON_ONCE(!i->data_source))
                return 0;

        return iterate_and_advance(i, bytes, addr,
                                   copy_from_user_iter_flushcache,
                                   memcpy_from_iter_flushcache);
}
EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
#endif

static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
{
        struct page *head;
        size_t v = n + offset;

        /*
         * The general case needs to access the page order in order
         * to compute the page size.
         * However, we mostly deal with order-0 pages and thus can
         * avoid a possible cache line miss for requests that fit all
         * page orders.
         */
        if (n <= v && v <= PAGE_SIZE)
                return true;

        head = compound_head(page);
        v += (page - head) << PAGE_SHIFT;

        if (WARN_ON(n > v || v > page_size(head)))
                return false;
        return true;
}

size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
{
        size_t res = 0;
        if (!page_copy_sane(page, offset, bytes))
                return 0;
        if (WARN_ON_ONCE(i->data_source))
                return 0;
        page += offset / PAGE_SIZE; // first subpage
        offset %= PAGE_SIZE;
        while (1) {
                void *kaddr = kmap_local_page(page);
                size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
                n = _copy_to_iter(kaddr + offset, n, i);
                kunmap_local(kaddr);
                res += n;
                bytes -= n;
                if (!bytes || !n)
                        break;
                offset += n;
                if (offset == PAGE_SIZE) {
                        page++;
                        offset = 0;
                }
        }
        return res;
}
EXPORT_SYMBOL(copy_page_to_iter);

size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes,
                                 struct iov_iter *i)
{
        size_t res = 0;

        if (!page_copy_sane(page, offset, bytes))
                return 0;
        if (WARN_ON_ONCE(i->data_source))
                return 0;
        page += offset / PAGE_SIZE; // first subpage
        offset %= PAGE_SIZE;
        while (1) {
                void *kaddr = kmap_local_page(page);
                size_t n = min(bytes, (size_t)PAGE_SIZE - offset);

                n = iterate_and_advance(i, n, kaddr + offset,
                                        copy_to_user_iter_nofault,
                                        memcpy_to_iter);
                kunmap_local(kaddr);
                res += n;
                bytes -= n;
                if (!bytes || !n)
                        break;
                offset += n;
                if (offset == PAGE_SIZE) {
                        page++;
                        offset = 0;
                }
        }
        return res;
}
EXPORT_SYMBOL(copy_page_to_iter_nofault);

size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
{
        size_t res = 0;
        if (!page_copy_sane(page, offset, bytes))
                return 0;
        page += offset / PAGE_SIZE; // first subpage
        offset %= PAGE_SIZE;
        while (1) {
                void *kaddr = kmap_local_page(page);
                size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
                n = _copy_from_iter(kaddr + offset, n, i);
                kunmap_local(kaddr);
                res += n;
                bytes -= n;
                if (!bytes || !n)
                        break;
                offset += n;
                if (offset == PAGE_SIZE) {
                        page++;
                        offset = 0;
                }
        }
        return res;
}
EXPORT_SYMBOL(copy_page_from_iter);

static __always_inline
size_t zero_to_user_iter(void __user *iter_to, size_t progress,
                         size_t len, void *priv, void *priv2)
{
        return clear_user(iter_to, len);
}

static __always_inline
size_t zero_to_iter(void *iter_to, size_t progress,
                    size_t len, void *priv, void *priv2)
{
        memset(iter_to, 0, len);
        return 0;
}

size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
        return iterate_and_advance(i, bytes, NULL,
                                   zero_to_user_iter, zero_to_iter);
}
EXPORT_SYMBOL(iov_iter_zero);

size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
                size_t bytes, struct iov_iter *i)
{
        size_t n, copied = 0;

        if (!page_copy_sane(page, offset, bytes))
                return 0;
        if (WARN_ON_ONCE(!i->data_source))
                return 0;

        do {
                char *p;

                n = bytes - copied;
                if (PageHighMem(page)) {
                        page += offset / PAGE_SIZE;
                        offset %= PAGE_SIZE;
                        n = min_t(size_t, n, PAGE_SIZE - offset);
                }

                p = kmap_atomic(page) + offset;
                n = __copy_from_iter(p, n, i);
                kunmap_atomic(p);
                copied += n;
                offset += n;
        } while (PageHighMem(page) && copied != bytes && n > 0);

        return copied;
}
EXPORT_SYMBOL(copy_page_from_iter_atomic);

static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
{
        const struct bio_vec *bvec, *end;

        if (!i->count)
                return;
        i->count -= size;

        size += i->iov_offset;

        for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) {
                if (likely(size < bvec->bv_len))
                        break;
                size -= bvec->bv_len;
        }
        i->iov_offset = size;
        i->nr_segs -= bvec - i->bvec;
        i->bvec = bvec;
}

static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
{
        const struct iovec *iov, *end;

        if (!i->count)
                return;
        i->count -= size;

        size += i->iov_offset; // from beginning of current segment
        for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) {
                if (likely(size < iov->iov_len))
                        break;
                size -= iov->iov_len;
        }
        i->iov_offset = size;
        i->nr_segs -= iov - iter_iov(i);
        i->__iov = iov;
}

void iov_iter_advance(struct iov_iter *i, size_t size)
{
        if (unlikely(i->count < size))
                size = i->count;
        if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
                i->iov_offset += size;
                i->count -= size;
        } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
                /* iovec and kvec have identical layouts */
                iov_iter_iovec_advance(i, size);
        } else if (iov_iter_is_bvec(i)) {
                iov_iter_bvec_advance(i, size);
        } else if (iov_iter_is_discard(i)) {
                i->count -= size;
        }
}
EXPORT_SYMBOL(iov_iter_advance);

void iov_iter_revert(struct iov_iter *i, size_t unroll)
{
        if (!unroll)
                return;
        if (WARN_ON(unroll > MAX_RW_COUNT))
                return;
        i->count += unroll;
        if (unlikely(iov_iter_is_discard(i)))
                return;
        if (unroll <= i->iov_offset) {
                i->iov_offset -= unroll;
                return;
        }
        unroll -= i->iov_offset;
        if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
                BUG(); /* We should never go beyond the start of the specified
                        * range since we might then be straying into pages that
                        * aren't pinned.
                        */
        } else if (iov_iter_is_bvec(i)) {
                const struct bio_vec *bvec = i->bvec;
                while (1) {
                        size_t n = (--bvec)->bv_len;
                        i->nr_segs++;
                        if (unroll <= n) {
                                i->bvec = bvec;
                                i->iov_offset = n - unroll;
                                return;
                        }
                        unroll -= n;
                }
        } else { /* same logics for iovec and kvec */
                const struct iovec *iov = iter_iov(i);
                while (1) {
                        size_t n = (--iov)->iov_len;
                        i->nr_segs++;
                        if (unroll <= n) {
                                i->__iov = iov;
                                i->iov_offset = n - unroll;
                                return;
                        }
                        unroll -= n;
                }
        }
}
EXPORT_SYMBOL(iov_iter_revert);

/*
 * Return the count of just the current iov_iter segment.
 */
size_t iov_iter_single_seg_count(const struct iov_iter *i)
{
        if (i->nr_segs > 1) {
                if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
                        return min(i->count, iter_iov(i)->iov_len - i->iov_offset);
                if (iov_iter_is_bvec(i))
                        return min(i->count, i->bvec->bv_len - i->iov_offset);
        }
        return i->count;
}
EXPORT_SYMBOL(iov_iter_single_seg_count);

void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
                        const struct kvec *kvec, unsigned long nr_segs,
                        size_t count)
{
        WARN_ON(direction & ~(READ | WRITE));
        *i = (struct iov_iter){
                .iter_type = ITER_KVEC,
                .data_source = direction,
                .kvec = kvec,
                .nr_segs = nr_segs,
                .iov_offset = 0,
                .count = count
        };
}
EXPORT_SYMBOL(iov_iter_kvec);

void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
                        const struct bio_vec *bvec, unsigned long nr_segs,
                        size_t count)
{
        WARN_ON(direction & ~(READ | WRITE));
        *i = (struct iov_iter){
                .iter_type = ITER_BVEC,
                .data_source = direction,
                .bvec = bvec,
                .nr_segs = nr_segs,
                .iov_offset = 0,
                .count = count
        };
}
EXPORT_SYMBOL(iov_iter_bvec);

/**
 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
 * @i: The iterator to initialise.
 * @direction: The direction of the transfer.
 * @xarray: The xarray to access.
 * @start: The start file position.
 * @count: The size of the I/O buffer in bytes.
 *
 * Set up an I/O iterator to either draw data out of the pages attached to an
 * inode or to inject data into those pages.  The pages *must* be prevented
 * from evaporation, either by taking a ref on them or locking them by the
 * caller.
 */
void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
                     struct xarray *xarray, loff_t start, size_t count)
{
        BUG_ON(direction & ~1);
        *i = (struct iov_iter) {
                .iter_type = ITER_XARRAY,
                .data_source = direction,
                .xarray = xarray,
                .xarray_start = start,
                .count = count,
                .iov_offset = 0
        };
}
EXPORT_SYMBOL(iov_iter_xarray);

/**
 * iov_iter_discard - Initialise an I/O iterator that discards data
 * @i: The iterator to initialise.
 * @direction: The direction of the transfer.
 * @count: The size of the I/O buffer in bytes.
 *
 * Set up an I/O iterator that just discards everything that's written to it.
 * It's only available as a READ iterator.
 */
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
{
        BUG_ON(direction != READ);
        *i = (struct iov_iter){
                .iter_type = ITER_DISCARD,
                .data_source = false,
                .count = count,
                .iov_offset = 0
        };
}
EXPORT_SYMBOL(iov_iter_discard);

static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
                                   unsigned len_mask)
{
        const struct iovec *iov = iter_iov(i);
        size_t size = i->count;
        size_t skip = i->iov_offset;

        do {
                size_t len = iov->iov_len - skip;

                if (len > size)
                        len = size;
                if (len & len_mask)
                        return false;
                if ((unsigned long)(iov->iov_base + skip) & addr_mask)
                        return false;

                iov++;
                size -= len;
                skip = 0;
        } while (size);

        return true;
}

static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
                                  unsigned len_mask)
{
        const struct bio_vec *bvec = i->bvec;
        unsigned skip = i->iov_offset;
        size_t size = i->count;

        do {
                size_t len = bvec->bv_len;

                if (len > size)
                        len = size;
                if (len & len_mask)
                        return false;
                if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
                        return false;

                bvec++;
                size -= len;
                skip = 0;
        } while (size);

        return true;
}

/**
 * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
 *         are aligned to the parameters.
 *
 * @i: &struct iov_iter to restore
 * @addr_mask: bit mask to check against the iov element's addresses
 * @len_mask: bit mask to check against the iov element's lengths
 *
 * Return: false if any addresses or lengths intersect with the provided masks
 */
bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
                         unsigned len_mask)
{
        if (likely(iter_is_ubuf(i))) {
                if (i->count & len_mask)
                        return false;
                if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
                        return false;
                return true;
        }

        if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
                return iov_iter_aligned_iovec(i, addr_mask, len_mask);

        if (iov_iter_is_bvec(i))
                return iov_iter_aligned_bvec(i, addr_mask, len_mask);

        if (iov_iter_is_xarray(i)) {
                if (i->count & len_mask)
                        return false;
                if ((i->xarray_start + i->iov_offset) & addr_mask)
                        return false;
        }

        return true;
}
EXPORT_SYMBOL_GPL(iov_iter_is_aligned);

static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
{
        const struct iovec *iov = iter_iov(i);
        unsigned long res = 0;
        size_t size = i->count;
        size_t skip = i->iov_offset;

        do {
                size_t len = iov->iov_len - skip;
                if (len) {
                        res |= (unsigned long)iov->iov_base + skip;
                        if (len > size)
                                len = size;
                        res |= len;
                        size -= len;
                }
                iov++;
                skip = 0;
        } while (size);
        return res;
}

static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
{
        const struct bio_vec *bvec = i->bvec;
        unsigned res = 0;
        size_t size = i->count;
        unsigned skip = i->iov_offset;

        do {
                size_t len = bvec->bv_len - skip;
                res |= (unsigned long)bvec->bv_offset + skip;
                if (len > size)
                        len = size;
                res |= len;
                bvec++;
                size -= len;
                skip = 0;
        } while (size);

        return res;
}

unsigned long iov_iter_alignment(const struct iov_iter *i)
{
        if (likely(iter_is_ubuf(i))) {
                size_t size = i->count;
                if (size)
                        return ((unsigned long)i->ubuf + i->iov_offset) | size;
                return 0;
        }

        /* iovec and kvec have identical layouts */
        if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
                return iov_iter_alignment_iovec(i);

        if (iov_iter_is_bvec(i))
                return iov_iter_alignment_bvec(i);

        if (iov_iter_is_xarray(i))
                return (i->xarray_start + i->iov_offset) | i->count;

        return 0;
}
EXPORT_SYMBOL(iov_iter_alignment);

unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
{
        unsigned long res = 0;
        unsigned long v = 0;
        size_t size = i->count;
        unsigned k;

        if (iter_is_ubuf(i))
                return 0;

        if (WARN_ON(!iter_is_iovec(i)))
                return ~0U;

        for (k = 0; k < i->nr_segs; k++) {
                const struct iovec *iov = iter_iov(i) + k;
                if (iov->iov_len) {
                        unsigned long base = (unsigned long)iov->iov_base;
                        if (v) // if not the first one
                                res |= base | v; // this start | previous end
                        v = base + iov->iov_len;
                        if (size <= iov->iov_len)
                                break;
                        size -= iov->iov_len;
                }
        }
        return res;
}
EXPORT_SYMBOL(iov_iter_gap_alignment);

static int want_pages_array(struct page ***res, size_t size,
                            size_t start, unsigned int maxpages)
{
        unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE);

        if (count > maxpages)
                count = maxpages;
        WARN_ON(!count);        // caller should've prevented that
        if (!*res) {
                *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
                if (!*res)
                        return 0;
        }
        return count;
}

static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
                                          pgoff_t index, unsigned int nr_pages)
{
        XA_STATE(xas, xa, index);
        struct page *page;
        unsigned int ret = 0;

        rcu_read_lock();
        for (page = xas_load(&xas); page; page = xas_next(&xas)) {
                if (xas_retry(&xas, page))
                        continue;

                /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas))) {
                        xas_reset(&xas);
                        continue;
                }

                pages[ret] = find_subpage(page, xas.xa_index);
                get_page(pages[ret]);
                if (++ret == nr_pages)
                        break;
        }
        rcu_read_unlock();
        return ret;
}

static ssize_t iter_xarray_get_pages(struct iov_iter *i,
                                     struct page ***pages, size_t maxsize,
                                     unsigned maxpages, size_t *_start_offset)
{
        unsigned nr, offset, count;
        pgoff_t index;
        loff_t pos;

        pos = i->xarray_start + i->iov_offset;
        index = pos >> PAGE_SHIFT;
        offset = pos & ~PAGE_MASK;
        *_start_offset = offset;

        count = want_pages_array(pages, maxsize, offset, maxpages);
        if (!count)
                return -ENOMEM;
        nr = iter_xarray_populate_pages(*pages, i->xarray, index, count);
        if (nr == 0)
                return 0;

        maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
        i->iov_offset += maxsize;
        i->count -= maxsize;
        return maxsize;
}

/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
{
        size_t skip;
        long k;

        if (iter_is_ubuf(i))
                return (unsigned long)i->ubuf + i->iov_offset;

        for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
                const struct iovec *iov = iter_iov(i) + k;
                size_t len = iov->iov_len - skip;

                if (unlikely(!len))
                        continue;
                if (*size > len)
                        *size = len;
                return (unsigned long)iov->iov_base + skip;
        }
        BUG(); // if it had been empty, we wouldn't get called
}

/* must be done on non-empty ITER_BVEC one */
static struct page *first_bvec_segment(const struct iov_iter *i,
                                       size_t *size, size_t *start)
{
        struct page *page;
        size_t skip = i->iov_offset, len;

        len = i->bvec->bv_len - skip;
        if (*size > len)
                *size = len;
        skip += i->bvec->bv_offset;
        page = i->bvec->bv_page + skip / PAGE_SIZE;
        *start = skip % PAGE_SIZE;
        return page;
}

static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   unsigned int maxpages, size_t *start)
{
        unsigned int n, gup_flags = 0;

        if (maxsize > i->count)
                maxsize = i->count;
        if (!maxsize)
                return 0;
        if (maxsize > MAX_RW_COUNT)
                maxsize = MAX_RW_COUNT;

        if (likely(user_backed_iter(i))) {
                unsigned long addr;
                int res;

                if (iov_iter_rw(i) != WRITE)
                        gup_flags |= FOLL_WRITE;
                if (i->nofault)
                        gup_flags |= FOLL_NOFAULT;

                addr = first_iovec_segment(i, &maxsize);
                *start = addr % PAGE_SIZE;
                addr &= PAGE_MASK;
                n = want_pages_array(pages, maxsize, *start, maxpages);
                if (!n)
                        return -ENOMEM;
                res = get_user_pages_fast(addr, n, gup_flags, *pages);
                if (unlikely(res <= 0))
                        return res;
                maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start);
                iov_iter_advance(i, maxsize);
                return maxsize;
        }
        if (iov_iter_is_bvec(i)) {
                struct page **p;
                struct page *page;

                page = first_bvec_segment(i, &maxsize, start);
                n = want_pages_array(pages, maxsize, *start, maxpages);
                if (!n)
                        return -ENOMEM;
                p = *pages;
                for (int k = 0; k < n; k++)
                        get_page(p[k] = page + k);
                maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
                i->count -= maxsize;
                i->iov_offset += maxsize;
                if (i->iov_offset == i->bvec->bv_len) {
                        i->iov_offset = 0;
                        i->bvec++;
                        i->nr_segs--;
                }
                return maxsize;
        }
        if (iov_iter_is_xarray(i))
                return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
        return -EFAULT;
}

ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
                size_t maxsize, unsigned maxpages, size_t *start)
{
        if (!maxpages)
                return 0;
        BUG_ON(!pages);

        return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);
}
EXPORT_SYMBOL(iov_iter_get_pages2);

ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
                struct page ***pages, size_t maxsize, size_t *start)
{
        ssize_t len;

        *pages = NULL;

        len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start);
        if (len <= 0) {
                kvfree(*pages);
                *pages = NULL;
        }
        return len;
}
EXPORT_SYMBOL(iov_iter_get_pages_alloc2);

static int iov_npages(const struct iov_iter *i, int maxpages)
{
        size_t skip = i->iov_offset, size = i->count;
        const struct iovec *p;
        int npages = 0;

        for (p = iter_iov(i); size; skip = 0, p++) {
                unsigned offs = offset_in_page(p->iov_base + skip);
                size_t len = min(p->iov_len - skip, size);

                if (len) {
                        size -= len;
                        npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
                        if (unlikely(npages > maxpages))
                                return maxpages;
                }
        }
        return npages;
}

static int bvec_npages(const struct iov_iter *i, int maxpages)
{
        size_t skip = i->iov_offset, size = i->count;
        const struct bio_vec *p;
        int npages = 0;

        for (p = i->bvec; size; skip = 0, p++) {
                unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
                size_t len = min(p->bv_len - skip, size);

                size -= len;
                npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
                if (unlikely(npages > maxpages))
                        return maxpages;
        }
        return npages;
}

int iov_iter_npages(const struct iov_iter *i, int maxpages)
{
        if (unlikely(!i->count))
                return 0;
        if (likely(iter_is_ubuf(i))) {
                unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
                int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
                return min(npages, maxpages);
        }
        /* iovec and kvec have identical layouts */
        if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
                return iov_npages(i, maxpages);
        if (iov_iter_is_bvec(i))
                return bvec_npages(i, maxpages);
        if (iov_iter_is_xarray(i)) {
                unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
                int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
                return min(npages, maxpages);
        }
        return 0;
}
EXPORT_SYMBOL(iov_iter_npages);

const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
{
        *new = *old;
        if (iov_iter_is_bvec(new))
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),
                                    flags);
        else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
                /* iovec and kvec have identical layout */
                return new->__iov = kmemdup(new->__iov,
                                   new->nr_segs * sizeof(struct iovec),
                                   flags);
        return NULL;
}
EXPORT_SYMBOL(dup_iter);

static __noclone int copy_compat_iovec_from_user(struct iovec *iov,
                const struct iovec __user *uvec, u32 nr_segs)
{
        const struct compat_iovec __user *uiov =
                (const struct compat_iovec __user *)uvec;
        int ret = -EFAULT;
        u32 i;

        if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
                return -EFAULT;

        for (i = 0; i < nr_segs; i++) {
                compat_uptr_t buf;
                compat_ssize_t len;

                unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
                unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);

                /* check for compat_size_t not fitting in compat_ssize_t .. */
                if (len < 0) {
                        ret = -EINVAL;
                        goto uaccess_end;
                }
                iov[i].iov_base = compat_ptr(buf);
                iov[i].iov_len = len;
        }

        ret = 0;
uaccess_end:
        user_access_end();
        return ret;
}

static __noclone int copy_iovec_from_user(struct iovec *iov,
                const struct iovec __user *uiov, unsigned long nr_segs)
{
        int ret = -EFAULT;

        if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
                return -EFAULT;

        do {
                void __user *buf;
                ssize_t len;

                unsafe_get_user(len, &uiov->iov_len, uaccess_end);
                unsafe_get_user(buf, &uiov->iov_base, uaccess_end);

                /* check for size_t not fitting in ssize_t .. */
                if (unlikely(len < 0)) {
                        ret = -EINVAL;
                        goto uaccess_end;
                }
                iov->iov_base = buf;
                iov->iov_len = len;

                uiov++; iov++;
        } while (--nr_segs);

        ret = 0;
uaccess_end:
        user_access_end();
        return ret;
}

struct iovec *iovec_from_user(const struct iovec __user *uvec,
                unsigned long nr_segs, unsigned long fast_segs,
                struct iovec *fast_iov, bool compat)
{
        struct iovec *iov = fast_iov;
        int ret;

        /*
         * SuS says "The readv() function *may* fail if the iovcnt argument was
         * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
         * traditionally returned zero for zero segments, so...
         */
        if (nr_segs == 0)
                return iov;
        if (nr_segs > UIO_MAXIOV)
                return ERR_PTR(-EINVAL);
        if (nr_segs > fast_segs) {
                iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
                if (!iov)
                        return ERR_PTR(-ENOMEM);
        }

        if (unlikely(compat))
                ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
        else
                ret = copy_iovec_from_user(iov, uvec, nr_segs);
        if (ret) {
                if (iov != fast_iov)
                        kfree(iov);
                return ERR_PTR(ret);
        }

        return iov;
}

/*
 * Single segment iovec supplied by the user, import it as ITER_UBUF.
 */
static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec,
                                   struct iovec **iovp, struct iov_iter *i,
                                   bool compat)
{
        struct iovec *iov = *iovp;
        ssize_t ret;

        if (compat)
                ret = copy_compat_iovec_from_user(iov, uvec, 1);
        else
                ret = copy_iovec_from_user(iov, uvec, 1);
        if (unlikely(ret))
                return ret;

        ret = import_ubuf(type, iov->iov_base, iov->iov_len, i);
        if (unlikely(ret))
                return ret;
        *iovp = NULL;
        return i->count;
}

ssize_t __import_iovec(int type, const struct iovec __user *uvec,
                 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
                 struct iov_iter *i, bool compat)
{
        ssize_t total_len = 0;
        unsigned long seg;
        struct iovec *iov;

        if (nr_segs == 1)
                return __import_iovec_ubuf(type, uvec, iovp, i, compat);

        iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
        if (IS_ERR(iov)) {
                *iovp = NULL;
                return PTR_ERR(iov);
        }

        /*
         * According to the Single Unix Specification we should return EINVAL if
         * an element length is < 0 when cast to ssize_t or if the total length
         * would overflow the ssize_t return value of the system call.
         *
         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
         * overflow case.
         */
        for (seg = 0; seg < nr_segs; seg++) {
                ssize_t len = (ssize_t)iov[seg].iov_len;

                if (!access_ok(iov[seg].iov_base, len)) {
                        if (iov != *iovp)
                                kfree(iov);
                        *iovp = NULL;
                        return -EFAULT;
                }

                if (len > MAX_RW_COUNT - total_len) {
                        len = MAX_RW_COUNT - total_len;
                        iov[seg].iov_len = len;
                }
                total_len += len;
        }

        iov_iter_init(i, type, iov, nr_segs, total_len);
        if (iov == *iovp)
                *iovp = NULL;
        else
                *iovp = iov;
        return total_len;
}

/**
 * import_iovec() - Copy an array of &struct iovec from userspace
 *     into the kernel, check that it is valid, and initialize a new
 *     &struct iov_iter iterator to access it.
 *
 * @type: One of %READ or %WRITE.
 * @uvec: Pointer to the userspace array.
 * @nr_segs: Number of elements in userspace array.
 * @fast_segs: Number of elements in @iov.
 * @iovp: (input and output parameter) Pointer to pointer to (usually small
 *     on-stack) kernel array.
 * @i: Pointer to iterator that will be initialized on success.
 *
 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
 * then this function places %NULL in *@iov on return. Otherwise, a new
 * array will be allocated and the result placed in *@iov. This means that
 * the caller may call kfree() on *@iov regardless of whether the small
 * on-stack array was used or not (and regardless of whether this function
 * returns an error or not).
 *
 * Return: Negative error code on error, bytes imported on success
 */
ssize_t import_iovec(int type, const struct iovec __user *uvec,
                 unsigned nr_segs, unsigned fast_segs,
                 struct iovec **iovp, struct iov_iter *i)
{
        return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
                              in_compat_syscall());
}
EXPORT_SYMBOL(import_iovec);

int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
{
        if (len > MAX_RW_COUNT)
                len = MAX_RW_COUNT;
        if (unlikely(!access_ok(buf, len)))
                return -EFAULT;

        iov_iter_ubuf(i, rw, buf, len);
        return 0;
}
EXPORT_SYMBOL_GPL(import_ubuf);

/**
 * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
 *     iov_iter_save_state() was called.
 *
 * @i: &struct iov_iter to restore
 * @state: state to restore from
 *
 * Used after iov_iter_save_state() to bring restore @i, if operations may
 * have advanced it.
 *
 * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
 */
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
{
        if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) &&
                         !iter_is_ubuf(i)) && !iov_iter_is_kvec(i))
                return;
        i->iov_offset = state->iov_offset;
        i->count = state->count;
        if (iter_is_ubuf(i))
                return;
        /*
         * For the *vec iters, nr_segs + iov is constant - if we increment
         * the vec, then we also decrement the nr_segs count. Hence we don't
         * need to track both of these, just one is enough and we can deduct
         * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
         * size, so we can just increment the iov pointer as they are unionzed.
         * ITER_BVEC _may_ be the same size on some archs, but on others it is
         * not. Be safe and handle it separately.
         */
        BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
        if (iov_iter_is_bvec(i))
                i->bvec -= state->nr_segs - i->nr_segs;
        else
                i->__iov -= state->nr_segs - i->nr_segs;
        i->nr_segs = state->nr_segs;
}

/*
 * Extract a list of contiguous pages from an ITER_XARRAY iterator.  This does not
 * get references on the pages, nor does it get a pin on them.
 */
static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
                                             struct page ***pages, size_t maxsize,
                                             unsigned int maxpages,
                                             iov_iter_extraction_t extraction_flags,
                                             size_t *offset0)
{
        struct page *page, **p;
        unsigned int nr = 0, offset;
        loff_t pos = i->xarray_start + i->iov_offset;
        pgoff_t index = pos >> PAGE_SHIFT;
        XA_STATE(xas, i->xarray, index);

        offset = pos & ~PAGE_MASK;
        *offset0 = offset;

        maxpages = want_pages_array(pages, maxsize, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        p = *pages;

        rcu_read_lock();
        for (page = xas_load(&xas); page; page = xas_next(&xas)) {
                if (xas_retry(&xas, page))
                        continue;

                /* Has the page moved or been split? */
                if (unlikely(page != xas_reload(&xas))) {
                        xas_reset(&xas);
                        continue;
                }

                p[nr++] = find_subpage(page, xas.xa_index);
                if (nr == maxpages)
                        break;
        }
        rcu_read_unlock();

        maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
        iov_iter_advance(i, maxsize);
        return maxsize;
}

/*
 * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
 * not get references on the pages, nor does it get a pin on them.
 */
static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
                                           struct page ***pages, size_t maxsize,
                                           unsigned int maxpages,
                                           iov_iter_extraction_t extraction_flags,
                                           size_t *offset0)
{
        struct page **p, *page;
        size_t skip = i->iov_offset, offset, size;
        int k;

        for (;;) {
                if (i->nr_segs == 0)
                        return 0;
                size = min(maxsize, i->bvec->bv_len - skip);
                if (size)
                        break;
                i->iov_offset = 0;
                i->nr_segs--;
                i->bvec++;
                skip = 0;
        }

        skip += i->bvec->bv_offset;
        page = i->bvec->bv_page + skip / PAGE_SIZE;
        offset = skip % PAGE_SIZE;
        *offset0 = offset;

        maxpages = want_pages_array(pages, size, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        p = *pages;
        for (k = 0; k < maxpages; k++)
                p[k] = page + k;

        size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
        iov_iter_advance(i, size);
        return size;
}

/*
 * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
 * This does not get references on the pages, nor does it get a pin on them.
 */
static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
                                           struct page ***pages, size_t maxsize,
                                           unsigned int maxpages,
                                           iov_iter_extraction_t extraction_flags,
                                           size_t *offset0)
{
        struct page **p, *page;
        const void *kaddr;
        size_t skip = i->iov_offset, offset, len, size;
        int k;

        for (;;) {
                if (i->nr_segs == 0)
                        return 0;
                size = min(maxsize, i->kvec->iov_len - skip);
                if (size)
                        break;
                i->iov_offset = 0;
                i->nr_segs--;
                i->kvec++;
                skip = 0;
        }

        kaddr = i->kvec->iov_base + skip;
        offset = (unsigned long)kaddr & ~PAGE_MASK;
        *offset0 = offset;

        maxpages = want_pages_array(pages, size, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        p = *pages;

        kaddr -= offset;
        len = offset + size;
        for (k = 0; k < maxpages; k++) {
                size_t seg = min_t(size_t, len, PAGE_SIZE);

                if (is_vmalloc_or_module_addr(kaddr))
                        page = vmalloc_to_page(kaddr);
                else
                        page = virt_to_page(kaddr);

                p[k] = page;
                len -= seg;
                kaddr += PAGE_SIZE;
        }

        size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
        iov_iter_advance(i, size);
        return size;
}

/*
 * Extract a list of contiguous pages from a user iterator and get a pin on
 * each of them.  This should only be used if the iterator is user-backed
 * (IOBUF/UBUF).
 *
 * It does not get refs on the pages, but the pages must be unpinned by the
 * caller once the transfer is complete.
 *
 * This is safe to be used where background IO/DMA *is* going to be modifying
 * the buffer; using a pin rather than a ref makes forces fork() to give the
 * child a copy of the page.
 */
static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
                                           struct page ***pages,
                                           size_t maxsize,
                                           unsigned int maxpages,
                                           iov_iter_extraction_t extraction_flags,
                                           size_t *offset0)
{
        unsigned long addr;
        unsigned int gup_flags = 0;
        size_t offset;
        int res;

        if (i->data_source == ITER_DEST)
                gup_flags |= FOLL_WRITE;
        if (extraction_flags & ITER_ALLOW_P2PDMA)
                gup_flags |= FOLL_PCI_P2PDMA;
        if (i->nofault)
                gup_flags |= FOLL_NOFAULT;

        addr = first_iovec_segment(i, &maxsize);
        *offset0 = offset = addr % PAGE_SIZE;
        addr &= PAGE_MASK;
        maxpages = want_pages_array(pages, maxsize, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages);
        if (unlikely(res <= 0))
                return res;
        maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset);
        iov_iter_advance(i, maxsize);
        return maxsize;
}

/**
 * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator
 * @i: The iterator to extract from
 * @pages: Where to return the list of pages
 * @maxsize: The maximum amount of iterator to extract
 * @maxpages: The maximum size of the list of pages
 * @extraction_flags: Flags to qualify request
 * @offset0: Where to return the starting offset into (*@pages)[0]
 *
 * Extract a list of contiguous pages from the current point of the iterator,
 * advancing the iterator.  The maximum number of pages and the maximum amount
 * of page contents can be set.
 *
 * If *@pages is NULL, a page list will be allocated to the required size and
 * *@pages will be set to its base.  If *@pages is not NULL, it will be assumed
 * that the caller allocated a page list at least @maxpages in size and this
 * will be filled in.
 *
 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
 * be allowed on the pages extracted.
 *
 * The iov_iter_extract_will_pin() function can be used to query how cleanup
 * should be performed.
 *
 * Extra refs or pins on the pages may be obtained as follows:
 *
 *  (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be
 *      added to the pages, but refs will not be taken.
 *      iov_iter_extract_will_pin() will return true.
 *
 *  (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are
 *      merely listed; no extra refs or pins are obtained.
 *      iov_iter_extract_will_pin() will return 0.
 *
 * Note also:
 *
 *  (*) Use with ITER_DISCARD is not supported as that has no content.
 *
 * On success, the function sets *@pages to the new pagelist, if allocated, and
 * sets *offset0 to the offset into the first page.
 *
 * It may also return -ENOMEM and -EFAULT.
 */
ssize_t iov_iter_extract_pages(struct iov_iter *i,
                               struct page ***pages,
                               size_t maxsize,
                               unsigned int maxpages,
                               iov_iter_extraction_t extraction_flags,
                               size_t *offset0)
{
        maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT);
        if (!maxsize)
                return 0;

        if (likely(user_backed_iter(i)))
                return iov_iter_extract_user_pages(i, pages, maxsize,
                                                   maxpages, extraction_flags,
                                                   offset0);
        if (iov_iter_is_kvec(i))
                return iov_iter_extract_kvec_pages(i, pages, maxsize,
                                                   maxpages, extraction_flags,
                                                   offset0);
        if (iov_iter_is_bvec(i))
                return iov_iter_extract_bvec_pages(i, pages, maxsize,
                                                   maxpages, extraction_flags,
                                                   offset0);
        if (iov_iter_is_xarray(i))
                return iov_iter_extract_xarray_pages(i, pages, maxsize,
                                                     maxpages, extraction_flags,
                                                     offset0);
        return -EFAULT;
}
EXPORT_SYMBOL_GPL(iov_iter_extract_pages);









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   55 




   54 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/mm/swapfile.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *  Swap reorganised 29.12.95, Stephen Tweedie
 */

#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/shmem_fs.h>
#include <linux/blk-cgroup.h>
#include <linux/random.h>
#include <linux/writeback.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
#include <linux/mutex.h>
#include <linux/capability.h>
#include <linux/syscalls.h>
#include <linux/memcontrol.h>
#include <linux/poll.h>
#include <linux/oom.h>
#include <linux/swapfile.h>
#include <linux/export.h>
#include <linux/swap_slots.h>
#include <linux/sort.h>
#include <linux/completion.h>
#include <linux/suspend.h>
#include <linux/zswap.h>
#include <linux/plist.h>

#include <asm/tlbflush.h>
#include <linux/swapops.h>
#include <linux/swap_cgroup.h>
#include "internal.h"
#include "swap.h"

static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
                                 unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);

static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
atomic_long_t nr_swap_pages;
/*
 * Some modules use swappable objects and may try to swap them out under
 * memory pressure (via the shrinker). Before doing so, they may wish to
 * check to see if any swap space is available.
 */
EXPORT_SYMBOL_GPL(nr_swap_pages);
/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
long total_swap_pages;
static int least_priority = -1;
unsigned long swapfile_maximum_size;
#ifdef CONFIG_MIGRATION
bool swap_migration_ad_supported;
#endif        /* CONFIG_MIGRATION */

static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
static const char Unused_offset[] = "Unused swap offset entry ";

/*
 * all active swap_info_structs
 * protected with swap_lock, and ordered by priority.
 */
static PLIST_HEAD(swap_active_head);

/*
 * all available (active, not full) swap_info_structs
 * protected with swap_avail_lock, ordered by priority.
 * This is used by folio_alloc_swap() instead of swap_active_head
 * because swap_active_head includes all swap_info_structs,
 * but folio_alloc_swap() doesn't need to look at full ones.
 * This uses its own lock instead of swap_lock because when a
 * swap_info_struct changes between not-full/full, it needs to
 * add/remove itself to/from this list, but the swap_info_struct->lock
 * is held and the locking order requires swap_lock to be taken
 * before any swap_info_struct->lock.
 */
static struct plist_head *swap_avail_heads;
static DEFINE_SPINLOCK(swap_avail_lock);

static struct swap_info_struct *swap_info[MAX_SWAPFILES];

static DEFINE_MUTEX(swapon_mutex);

static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
/* Activity counter to indicate that a swapon or swapoff has occurred */
static atomic_t proc_poll_event = ATOMIC_INIT(0);

atomic_t nr_rotate_swap = ATOMIC_INIT(0);

static struct swap_info_struct *swap_type_to_swap_info(int type)
{
        if (type >= MAX_SWAPFILES)
                return NULL;

        return READ_ONCE(swap_info[type]); /* rcu_dereference() */
}

static inline unsigned char swap_count(unsigned char ent)
{
        return ent & ~SWAP_HAS_CACHE;        /* may include COUNT_CONTINUED flag */
}

/* Reclaim the swap entry anyway if possible */
#define TTRS_ANYWAY                0x1
/*
 * Reclaim the swap entry if there are no more mappings of the
 * corresponding page
 */
#define TTRS_UNMAPPED                0x2
/* Reclaim the swap entry if swap is getting full*/
#define TTRS_FULL                0x4

/* returns 1 if swap entry is freed */
static int __try_to_reclaim_swap(struct swap_info_struct *si,
                                 unsigned long offset, unsigned long flags)
{
        swp_entry_t entry = swp_entry(si->type, offset);
        struct folio *folio;
        int ret = 0;

        folio = filemap_get_folio(swap_address_space(entry), offset);
        if (IS_ERR(folio))
                return 0;
        /*
         * When this function is called from scan_swap_map_slots() and it's
         * called by vmscan.c at reclaiming folios. So we hold a folio lock
         * here. We have to use trylock for avoiding deadlock. This is a special
         * case and you should use folio_free_swap() with explicit folio_lock()
         * in usual operations.
         */
        if (folio_trylock(folio)) {
                if ((flags & TTRS_ANYWAY) ||
                    ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) ||
                    ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio)))
                        ret = folio_free_swap(folio);
                folio_unlock(folio);
        }
        folio_put(folio);
        return ret;
}

static inline struct swap_extent *first_se(struct swap_info_struct *sis)
{
        struct rb_node *rb = rb_first(&sis->swap_extent_root);
        return rb_entry(rb, struct swap_extent, rb_node);
}

static inline struct swap_extent *next_se(struct swap_extent *se)
{
        struct rb_node *rb = rb_next(&se->rb_node);
        return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL;
}

/*
 * swapon tell device that all the old swap contents can be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
static int discard_swap(struct swap_info_struct *si)
{
        struct swap_extent *se;
        sector_t start_block;
        sector_t nr_blocks;
        int err = 0;

        /* Do not discard the swap header page! */
        se = first_se(si);
        start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
        nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
        if (nr_blocks) {
                err = blkdev_issue_discard(si->bdev, start_block,
                                nr_blocks, GFP_KERNEL);
                if (err)
                        return err;
                cond_resched();
        }

        for (se = next_se(se); se; se = next_se(se)) {
                start_block = se->start_block << (PAGE_SHIFT - 9);
                nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);

                err = blkdev_issue_discard(si->bdev, start_block,
                                nr_blocks, GFP_KERNEL);
                if (err)
                        break;

                cond_resched();
        }
        return err;                /* That will often be -EOPNOTSUPP */
}

static struct swap_extent *
offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
{
        struct swap_extent *se;
        struct rb_node *rb;

        rb = sis->swap_extent_root.rb_node;
        while (rb) {
                se = rb_entry(rb, struct swap_extent, rb_node);
                if (offset < se->start_page)
                        rb = rb->rb_left;
                else if (offset >= se->start_page + se->nr_pages)
                        rb = rb->rb_right;
                else
                        return se;
        }
        /* It *must* be present */
        BUG();
}

sector_t swap_folio_sector(struct folio *folio)
{
        struct swap_info_struct *sis = swp_swap_info(folio->swap);
        struct swap_extent *se;
        sector_t sector;
        pgoff_t offset;

        offset = swp_offset(folio->swap);
        se = offset_to_swap_extent(sis, offset);
        sector = se->start_block + (offset - se->start_page);
        return sector << (PAGE_SHIFT - 9);
}

/*
 * swap allocation tell device that a cluster of swap can now be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
static void discard_swap_cluster(struct swap_info_struct *si,
                                 pgoff_t start_page, pgoff_t nr_pages)
{
        struct swap_extent *se = offset_to_swap_extent(si, start_page);

        while (nr_pages) {
                pgoff_t offset = start_page - se->start_page;
                sector_t start_block = se->start_block + offset;
                sector_t nr_blocks = se->nr_pages - offset;

                if (nr_blocks > nr_pages)
                        nr_blocks = nr_pages;
                start_page += nr_blocks;
                nr_pages -= nr_blocks;

                start_block <<= PAGE_SHIFT - 9;
                nr_blocks <<= PAGE_SHIFT - 9;
                if (blkdev_issue_discard(si->bdev, start_block,
                                        nr_blocks, GFP_NOIO))
                        break;

                se = next_se(se);
        }
}

#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER        HPAGE_PMD_NR

#define swap_entry_size(size)        (size)
#else
#define SWAPFILE_CLUSTER        256

/*
 * Define swap_entry_size() as constant to let compiler to optimize
 * out some code if !CONFIG_THP_SWAP
 */
#define swap_entry_size(size)        1
#endif
#define LATENCY_LIMIT                256

static inline void cluster_set_flag(struct swap_cluster_info *info,
        unsigned int flag)
{
        info->flags = flag;
}

static inline unsigned int cluster_count(struct swap_cluster_info *info)
{
        return info->data;
}

static inline void cluster_set_count(struct swap_cluster_info *info,
                                     unsigned int c)
{
        info->data = c;
}

static inline void cluster_set_count_flag(struct swap_cluster_info *info,
                                         unsigned int c, unsigned int f)
{
        info->flags = f;
        info->data = c;
}

static inline unsigned int cluster_next(struct swap_cluster_info *info)
{
        return info->data;
}

static inline void cluster_set_next(struct swap_cluster_info *info,
                                    unsigned int n)
{
        info->data = n;
}

static inline void cluster_set_next_flag(struct swap_cluster_info *info,
                                         unsigned int n, unsigned int f)
{
        info->flags = f;
        info->data = n;
}

static inline bool cluster_is_free(struct swap_cluster_info *info)
{
        return info->flags & CLUSTER_FLAG_FREE;
}

static inline bool cluster_is_null(struct swap_cluster_info *info)
{
        return info->flags & CLUSTER_FLAG_NEXT_NULL;
}

static inline void cluster_set_null(struct swap_cluster_info *info)
{
        info->flags = CLUSTER_FLAG_NEXT_NULL;
        info->data = 0;
}

static inline bool cluster_is_huge(struct swap_cluster_info *info)
{
        if (IS_ENABLED(CONFIG_THP_SWAP))
                return info->flags & CLUSTER_FLAG_HUGE;
        return false;
}

static inline void cluster_clear_huge(struct swap_cluster_info *info)
{
        info->flags &= ~CLUSTER_FLAG_HUGE;
}

static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
                                                     unsigned long offset)
{
        struct swap_cluster_info *ci;

        ci = si->cluster_info;
        if (ci) {
                ci += offset / SWAPFILE_CLUSTER;
                spin_lock(&ci->lock);
        }
        return ci;
}

static inline void unlock_cluster(struct swap_cluster_info *ci)
{
        if (ci)
                spin_unlock(&ci->lock);
}

/*
 * Determine the locking method in use for this device.  Return
 * swap_cluster_info if SSD-style cluster-based locking is in place.
 */
static inline struct swap_cluster_info *lock_cluster_or_swap_info(
                struct swap_info_struct *si, unsigned long offset)
{
        struct swap_cluster_info *ci;

        /* Try to use fine-grained SSD-style locking if available: */
        ci = lock_cluster(si, offset);
        /* Otherwise, fall back to traditional, coarse locking: */
        if (!ci)
                spin_lock(&si->lock);

        return ci;
}

static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
                                               struct swap_cluster_info *ci)
{
        if (ci)
                unlock_cluster(ci);
        else
                spin_unlock(&si->lock);
}

static inline bool cluster_list_empty(struct swap_cluster_list *list)
{
        return cluster_is_null(&list->head);
}

static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
{
        return cluster_next(&list->head);
}

static void cluster_list_init(struct swap_cluster_list *list)
{
        cluster_set_null(&list->head);
        cluster_set_null(&list->tail);
}

static void cluster_list_add_tail(struct swap_cluster_list *list,
                                  struct swap_cluster_info *ci,
                                  unsigned int idx)
{
        if (cluster_list_empty(list)) {
                cluster_set_next_flag(&list->head, idx, 0);
                cluster_set_next_flag(&list->tail, idx, 0);
        } else {
                struct swap_cluster_info *ci_tail;
                unsigned int tail = cluster_next(&list->tail);

                /*
                 * Nested cluster lock, but both cluster locks are
                 * only acquired when we held swap_info_struct->lock
                 */
                ci_tail = ci + tail;
                spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
                cluster_set_next(ci_tail, idx);
                spin_unlock(&ci_tail->lock);
                cluster_set_next_flag(&list->tail, idx, 0);
        }
}

static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
                                           struct swap_cluster_info *ci)
{
        unsigned int idx;

        idx = cluster_next(&list->head);
        if (cluster_next(&list->tail) == idx) {
                cluster_set_null(&list->head);
                cluster_set_null(&list->tail);
        } else
                cluster_set_next_flag(&list->head,
                                      cluster_next(&ci[idx]), 0);

        return idx;
}

/* Add a cluster to discard list and schedule it to do discard */
static void swap_cluster_schedule_discard(struct swap_info_struct *si,
                unsigned int idx)
{
        /*
         * If scan_swap_map_slots() can't find a free cluster, it will check
         * si->swap_map directly. To make sure the discarding cluster isn't
         * taken by scan_swap_map_slots(), mark the swap entries bad (occupied).
         * It will be cleared after discard
         */
        memset(si->swap_map + idx * SWAPFILE_CLUSTER,
                        SWAP_MAP_BAD, SWAPFILE_CLUSTER);

        cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);

        schedule_work(&si->discard_work);
}

static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
{
        struct swap_cluster_info *ci = si->cluster_info;

        cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE);
        cluster_list_add_tail(&si->free_clusters, ci, idx);
}

/*
 * Doing discard actually. After a cluster discard is finished, the cluster
 * will be added to free cluster list. caller should hold si->lock.
*/
static void swap_do_scheduled_discard(struct swap_info_struct *si)
{
        struct swap_cluster_info *info, *ci;
        unsigned int idx;

        info = si->cluster_info;

        while (!cluster_list_empty(&si->discard_clusters)) {
                idx = cluster_list_del_first(&si->discard_clusters, info);
                spin_unlock(&si->lock);

                discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
                                SWAPFILE_CLUSTER);

                spin_lock(&si->lock);
                ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
                __free_cluster(si, idx);
                memset(si->swap_map + idx * SWAPFILE_CLUSTER,
                                0, SWAPFILE_CLUSTER);
                unlock_cluster(ci);
        }
}

static void swap_discard_work(struct work_struct *work)
{
        struct swap_info_struct *si;

        si = container_of(work, struct swap_info_struct, discard_work);

        spin_lock(&si->lock);
        swap_do_scheduled_discard(si);
        spin_unlock(&si->lock);
}

static void swap_users_ref_free(struct percpu_ref *ref)
{
        struct swap_info_struct *si;

        si = container_of(ref, struct swap_info_struct, users);
        complete(&si->comp);
}

static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
{
        struct swap_cluster_info *ci = si->cluster_info;

        VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx);
        cluster_list_del_first(&si->free_clusters, ci);
        cluster_set_count_flag(ci + idx, 0, 0);
}

static void free_cluster(struct swap_info_struct *si, unsigned long idx)
{
        struct swap_cluster_info *ci = si->cluster_info + idx;

        VM_BUG_ON(cluster_count(ci) != 0);
        /*
         * If the swap is discardable, prepare discard the cluster
         * instead of free it immediately. The cluster will be freed
         * after discard.
         */
        if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
            (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
                swap_cluster_schedule_discard(si, idx);
                return;
        }

        __free_cluster(si, idx);
}

/*
 * The cluster corresponding to page_nr will be used. The cluster will be
 * removed from free cluster list and its usage counter will be increased.
 */
static void inc_cluster_info_page(struct swap_info_struct *p,
        struct swap_cluster_info *cluster_info, unsigned long page_nr)
{
        unsigned long idx = page_nr / SWAPFILE_CLUSTER;

        if (!cluster_info)
                return;
        if (cluster_is_free(&cluster_info[idx]))
                alloc_cluster(p, idx);

        VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
        cluster_set_count(&cluster_info[idx],
                cluster_count(&cluster_info[idx]) + 1);
}

/*
 * The cluster corresponding to page_nr decreases one usage. If the usage
 * counter becomes 0, which means no page in the cluster is in using, we can
 * optionally discard the cluster and add it to free cluster list.
 */
static void dec_cluster_info_page(struct swap_info_struct *p,
        struct swap_cluster_info *cluster_info, unsigned long page_nr)
{
        unsigned long idx = page_nr / SWAPFILE_CLUSTER;

        if (!cluster_info)
                return;

        VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
        cluster_set_count(&cluster_info[idx],
                cluster_count(&cluster_info[idx]) - 1);

        if (cluster_count(&cluster_info[idx]) == 0)
                free_cluster(p, idx);
}

/*
 * It's possible scan_swap_map_slots() uses a free cluster in the middle of free
 * cluster list. Avoiding such abuse to avoid list corruption.
 */
static bool
scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
        unsigned long offset)
{
        struct percpu_cluster *percpu_cluster;
        bool conflict;

        offset /= SWAPFILE_CLUSTER;
        conflict = !cluster_list_empty(&si->free_clusters) &&
                offset != cluster_list_first(&si->free_clusters) &&
                cluster_is_free(&si->cluster_info[offset]);

        if (!conflict)
                return false;

        percpu_cluster = this_cpu_ptr(si->percpu_cluster);
        cluster_set_null(&percpu_cluster->index);
        return true;
}

/*
 * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
 * might involve allocating a new cluster for current CPU too.
 */
static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
        unsigned long *offset, unsigned long *scan_base)
{
        struct percpu_cluster *cluster;
        struct swap_cluster_info *ci;
        unsigned long tmp, max;

new_cluster:
        cluster = this_cpu_ptr(si->percpu_cluster);
        if (cluster_is_null(&cluster->index)) {
                if (!cluster_list_empty(&si->free_clusters)) {
                        cluster->index = si->free_clusters.head;
                        cluster->next = cluster_next(&cluster->index) *
                                        SWAPFILE_CLUSTER;
                } else if (!cluster_list_empty(&si->discard_clusters)) {
                        /*
                         * we don't have free cluster but have some clusters in
                         * discarding, do discard now and reclaim them, then
                         * reread cluster_next_cpu since we dropped si->lock
                         */
                        swap_do_scheduled_discard(si);
                        *scan_base = this_cpu_read(*si->cluster_next_cpu);
                        *offset = *scan_base;
                        goto new_cluster;
                } else
                        return false;
        }

        /*
         * Other CPUs can use our cluster if they can't find a free cluster,
         * check if there is still free entry in the cluster
         */
        tmp = cluster->next;
        max = min_t(unsigned long, si->max,
                    (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
        if (tmp < max) {
                ci = lock_cluster(si, tmp);
                while (tmp < max) {
                        if (!si->swap_map[tmp])
                                break;
                        tmp++;
                }
                unlock_cluster(ci);
        }
        if (tmp >= max) {
                cluster_set_null(&cluster->index);
                goto new_cluster;
        }
        cluster->next = tmp + 1;
        *offset = tmp;
        *scan_base = tmp;
        return true;
}

static void __del_from_avail_list(struct swap_info_struct *p)
{
        int nid;

        assert_spin_locked(&p->lock);
        for_each_node(nid)
                plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]);
}

static void del_from_avail_list(struct swap_info_struct *p)
{
        spin_lock(&swap_avail_lock);
        __del_from_avail_list(p);
        spin_unlock(&swap_avail_lock);
}

static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
                             unsigned int nr_entries)
{
        unsigned int end = offset + nr_entries - 1;

        if (offset == si->lowest_bit)
                si->lowest_bit += nr_entries;
        if (end == si->highest_bit)
                WRITE_ONCE(si->highest_bit, si->highest_bit - nr_entries);
        WRITE_ONCE(si->inuse_pages, si->inuse_pages + nr_entries);
        if (si->inuse_pages == si->pages) {
                si->lowest_bit = si->max;
                si->highest_bit = 0;
                del_from_avail_list(si);
        }
}

static void add_to_avail_list(struct swap_info_struct *p)
{
        int nid;

        spin_lock(&swap_avail_lock);
        for_each_node(nid)
                plist_add(&p->avail_lists[nid], &swap_avail_heads[nid]);
        spin_unlock(&swap_avail_lock);
}

static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
                            unsigned int nr_entries)
{
        unsigned long begin = offset;
        unsigned long end = offset + nr_entries - 1;
        void (*swap_slot_free_notify)(struct block_device *, unsigned long);

        if (offset < si->lowest_bit)
                si->lowest_bit = offset;
        if (end > si->highest_bit) {
                bool was_full = !si->highest_bit;

                WRITE_ONCE(si->highest_bit, end);
                if (was_full && (si->flags & SWP_WRITEOK))
                        add_to_avail_list(si);
        }
        if (si->flags & SWP_BLKDEV)
                swap_slot_free_notify =
                        si->bdev->bd_disk->fops->swap_slot_free_notify;
        else
                swap_slot_free_notify = NULL;
        while (offset <= end) {
                arch_swap_invalidate_page(si->type, offset);
                if (swap_slot_free_notify)
                        swap_slot_free_notify(si->bdev, offset);
                offset++;
        }
        clear_shadow_from_swap_cache(si->type, begin, end);

        /*
         * Make sure that try_to_unuse() observes si->inuse_pages reaching 0
         * only after the above cleanups are done.
         */
        smp_wmb();
        atomic_long_add(nr_entries, &nr_swap_pages);
        WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
}

static void set_cluster_next(struct swap_info_struct *si, unsigned long next)
{
        unsigned long prev;

        if (!(si->flags & SWP_SOLIDSTATE)) {
                si->cluster_next = next;
                return;
        }

        prev = this_cpu_read(*si->cluster_next_cpu);
        /*
         * Cross the swap address space size aligned trunk, choose
         * another trunk randomly to avoid lock contention on swap
         * address space if possible.
         */
        if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) !=
            (next >> SWAP_ADDRESS_SPACE_SHIFT)) {
                /* No free swap slots available */
                if (si->highest_bit <= si->lowest_bit)
                        return;
                next = get_random_u32_inclusive(si->lowest_bit, si->highest_bit);
                next = ALIGN_DOWN(next, SWAP_ADDRESS_SPACE_PAGES);
                next = max_t(unsigned int, next, si->lowest_bit);
        }
        this_cpu_write(*si->cluster_next_cpu, next);
}

static bool swap_offset_available_and_locked(struct swap_info_struct *si,
                                             unsigned long offset)
{
        if (data_race(!si->swap_map[offset])) {
                spin_lock(&si->lock);
                return true;
        }

        if (vm_swap_full() && READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
                spin_lock(&si->lock);
                return true;
        }

        return false;
}

static int scan_swap_map_slots(struct swap_info_struct *si,
                               unsigned char usage, int nr,
                               swp_entry_t slots[])
{
        struct swap_cluster_info *ci;
        unsigned long offset;
        unsigned long scan_base;
        unsigned long last_in_cluster = 0;
        int latency_ration = LATENCY_LIMIT;
        int n_ret = 0;
        bool scanned_many = false;

        /*
         * We try to cluster swap pages by allocating them sequentially
         * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
         * way, however, we resort to first-free allocation, starting
         * a new cluster.  This prevents us from scattering swap pages
         * all over the entire swap partition, so that we reduce
         * overall disk seek times between swap pages.  -- sct
         * But we do now try to find an empty cluster.  -Andrea
         * And we let swap pages go all over an SSD partition.  Hugh
         */

        si->flags += SWP_SCANNING;
        /*
         * Use percpu scan base for SSD to reduce lock contention on
         * cluster and swap cache.  For HDD, sequential access is more
         * important.
         */
        if (si->flags & SWP_SOLIDSTATE)
                scan_base = this_cpu_read(*si->cluster_next_cpu);
        else
                scan_base = si->cluster_next;
        offset = scan_base;

        /* SSD algorithm */
        if (si->cluster_info) {
                if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
                        goto scan;
        } else if (unlikely(!si->cluster_nr--)) {
                if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
                        si->cluster_nr = SWAPFILE_CLUSTER - 1;
                        goto checks;
                }

                spin_unlock(&si->lock);

                /*
                 * If seek is expensive, start searching for new cluster from
                 * start of partition, to minimize the span of allocated swap.
                 * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info
                 * case, just handled by scan_swap_map_try_ssd_cluster() above.
                 */
                scan_base = offset = si->lowest_bit;
                last_in_cluster = offset + SWAPFILE_CLUSTER - 1;

                /* Locate the first empty (unaligned) cluster */
                for (; last_in_cluster <= si->highest_bit; offset++) {
                        if (si->swap_map[offset])
                                last_in_cluster = offset + SWAPFILE_CLUSTER;
                        else if (offset == last_in_cluster) {
                                spin_lock(&si->lock);
                                offset -= SWAPFILE_CLUSTER - 1;
                                si->cluster_next = offset;
                                si->cluster_nr = SWAPFILE_CLUSTER - 1;
                                goto checks;
                        }
                        if (unlikely(--latency_ration < 0)) {
                                cond_resched();
                                latency_ration = LATENCY_LIMIT;
                        }
                }

                offset = scan_base;
                spin_lock(&si->lock);
                si->cluster_nr = SWAPFILE_CLUSTER - 1;
        }

checks:
        if (si->cluster_info) {
                while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
                /* take a break if we already got some slots */
                        if (n_ret)
                                goto done;
                        if (!scan_swap_map_try_ssd_cluster(si, &offset,
                                                        &scan_base))
                                goto scan;
                }
        }
        if (!(si->flags & SWP_WRITEOK))
                goto no_page;
        if (!si->highest_bit)
                goto no_page;
        if (offset > si->highest_bit)
                scan_base = offset = si->lowest_bit;

        ci = lock_cluster(si, offset);
        /* reuse swap entry of cache-only swap if not busy. */
        if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
                int swap_was_freed;
                unlock_cluster(ci);
                spin_unlock(&si->lock);
                swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
                spin_lock(&si->lock);
                /* entry was freed successfully, try to use this again */
                if (swap_was_freed)
                        goto checks;
                goto scan; /* check next one */
        }

        if (si->swap_map[offset]) {
                unlock_cluster(ci);
                if (!n_ret)
                        goto scan;
                else
                        goto done;
        }
        WRITE_ONCE(si->swap_map[offset], usage);
        inc_cluster_info_page(si, si->cluster_info, offset);
        unlock_cluster(ci);

        swap_range_alloc(si, offset, 1);
        slots[n_ret++] = swp_entry(si->type, offset);

        /* got enough slots or reach max slots? */
        if ((n_ret == nr) || (offset >= si->highest_bit))
                goto done;

        /* search for next available slot */

        /* time to take a break? */
        if (unlikely(--latency_ration < 0)) {
                if (n_ret)
                        goto done;
                spin_unlock(&si->lock);
                cond_resched();
                spin_lock(&si->lock);
                latency_ration = LATENCY_LIMIT;
        }

        /* try to get more slots in cluster */
        if (si->cluster_info) {
                if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
                        goto checks;
        } else if (si->cluster_nr && !si->swap_map[++offset]) {
                /* non-ssd case, still more slots in cluster? */
                --si->cluster_nr;
                goto checks;
        }

        /*
         * Even if there's no free clusters available (fragmented),
         * try to scan a little more quickly with lock held unless we
         * have scanned too many slots already.
         */
        if (!scanned_many) {
                unsigned long scan_limit;

                if (offset < scan_base)
                        scan_limit = scan_base;
                else
                        scan_limit = si->highest_bit;
                for (; offset <= scan_limit && --latency_ration > 0;
                     offset++) {
                        if (!si->swap_map[offset])
                                goto checks;
                }
        }

done:
        set_cluster_next(si, offset + 1);
        si->flags -= SWP_SCANNING;
        return n_ret;

scan:
        spin_unlock(&si->lock);
        while (++offset <= READ_ONCE(si->highest_bit)) {
                if (unlikely(--latency_ration < 0)) {
                        cond_resched();
                        latency_ration = LATENCY_LIMIT;
                        scanned_many = true;
                }
                if (swap_offset_available_and_locked(si, offset))
                        goto checks;
        }
        offset = si->lowest_bit;
        while (offset < scan_base) {
                if (unlikely(--latency_ration < 0)) {
                        cond_resched();
                        latency_ration = LATENCY_LIMIT;
                        scanned_many = true;
                }
                if (swap_offset_available_and_locked(si, offset))
                        goto checks;
                offset++;
        }
        spin_lock(&si->lock);

no_page:
        si->flags -= SWP_SCANNING;
        return n_ret;
}

static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
{
        unsigned long idx;
        struct swap_cluster_info *ci;
        unsigned long offset;

        /*
         * Should not even be attempting cluster allocations when huge
         * page swap is disabled.  Warn and fail the allocation.
         */
        if (!IS_ENABLED(CONFIG_THP_SWAP)) {
                VM_WARN_ON_ONCE(1);
                return 0;
        }

        if (cluster_list_empty(&si->free_clusters))
                return 0;

        idx = cluster_list_first(&si->free_clusters);
        offset = idx * SWAPFILE_CLUSTER;
        ci = lock_cluster(si, offset);
        alloc_cluster(si, idx);
        cluster_set_count_flag(ci, SWAPFILE_CLUSTER, CLUSTER_FLAG_HUGE);

        memset(si->swap_map + offset, SWAP_HAS_CACHE, SWAPFILE_CLUSTER);
        unlock_cluster(ci);
        swap_range_alloc(si, offset, SWAPFILE_CLUSTER);
        *slot = swp_entry(si->type, offset);

        return 1;
}

static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
{
        unsigned long offset = idx * SWAPFILE_CLUSTER;
        struct swap_cluster_info *ci;

        ci = lock_cluster(si, offset);
        memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
        cluster_set_count_flag(ci, 0, 0);
        free_cluster(si, idx);
        unlock_cluster(ci);
        swap_range_free(si, offset, SWAPFILE_CLUSTER);
}

int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
{
        unsigned long size = swap_entry_size(entry_size);
        struct swap_info_struct *si, *next;
        long avail_pgs;
        int n_ret = 0;
        int node;

        /* Only single cluster request supported */
        WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);

        spin_lock(&swap_avail_lock);

        avail_pgs = atomic_long_read(&nr_swap_pages) / size;
        if (avail_pgs <= 0) {
                spin_unlock(&swap_avail_lock);
                goto noswap;
        }

        n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs);

        atomic_long_sub(n_goal * size, &nr_swap_pages);

start_over:
        node = numa_node_id();
        plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
                /* requeue si to after same-priority siblings */
                plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
                spin_unlock(&swap_avail_lock);
                spin_lock(&si->lock);
                if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
                        spin_lock(&swap_avail_lock);
                        if (plist_node_empty(&si->avail_lists[node])) {
                                spin_unlock(&si->lock);
                                goto nextsi;
                        }
                        WARN(!si->highest_bit,
                             "swap_info %d in list but !highest_bit\n",
                             si->type);
                        WARN(!(si->flags & SWP_WRITEOK),
                             "swap_info %d in list but !SWP_WRITEOK\n",
                             si->type);
                        __del_from_avail_list(si);
                        spin_unlock(&si->lock);
                        goto nextsi;
                }
                if (size == SWAPFILE_CLUSTER) {
                        if (si->flags & SWP_BLKDEV)
                                n_ret = swap_alloc_cluster(si, swp_entries);
                } else
                        n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
                                                    n_goal, swp_entries);
                spin_unlock(&si->lock);
                if (n_ret || size == SWAPFILE_CLUSTER)
                        goto check_out;
                cond_resched();

                spin_lock(&swap_avail_lock);
nextsi:
                /*
                 * if we got here, it's likely that si was almost full before,
                 * and since scan_swap_map_slots() can drop the si->lock,
                 * multiple callers probably all tried to get a page from the
                 * same si and it filled up before we could get one; or, the si
                 * filled up between us dropping swap_avail_lock and taking
                 * si->lock. Since we dropped the swap_avail_lock, the
                 * swap_avail_head list may have been modified; so if next is
                 * still in the swap_avail_head list then try it, otherwise
                 * start over if we have not gotten any slots.
                 */
                if (plist_node_empty(&next->avail_lists[node]))
                        goto start_over;
        }

        spin_unlock(&swap_avail_lock);

check_out:
        if (n_ret < n_goal)
                atomic_long_add((long)(n_goal - n_ret) * size,
                                &nr_swap_pages);
noswap:
        return n_ret;
}

static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
{
        struct swap_info_struct *p;
        unsigned long offset;

        if (!entry.val)
                goto out;
        p = swp_swap_info(entry);
        if (!p)
                goto bad_nofile;
        if (data_race(!(p->flags & SWP_USED)))
                goto bad_device;
        offset = swp_offset(entry);
        if (offset >= p->max)
                goto bad_offset;
        if (data_race(!p->swap_map[swp_offset(entry)]))
                goto bad_free;
        return p;

bad_free:
        pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
        goto out;
bad_offset:
        pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
        goto out;
bad_device:
        pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val);
        goto out;
bad_nofile:
        pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
out:
        return NULL;
}

static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
                                        struct swap_info_struct *q)
{
        struct swap_info_struct *p;

        p = _swap_info_get(entry);

        if (p != q) {
                if (q != NULL)
                        spin_unlock(&q->lock);
                if (p != NULL)
                        spin_lock(&p->lock);
        }
        return p;
}

static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
                                              unsigned long offset,
                                              unsigned char usage)
{
        unsigned char count;
        unsigned char has_cache;

        count = p->swap_map[offset];

        has_cache = count & SWAP_HAS_CACHE;
        count &= ~SWAP_HAS_CACHE;

        if (usage == SWAP_HAS_CACHE) {
                VM_BUG_ON(!has_cache);
                has_cache = 0;
        } else if (count == SWAP_MAP_SHMEM) {
                /*
                 * Or we could insist on shmem.c using a special
                 * swap_shmem_free() and free_shmem_swap_and_cache()...
                 */
                count = 0;
        } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
                if (count == COUNT_CONTINUED) {
                        if (swap_count_continued(p, offset, count))
                                count = SWAP_MAP_MAX | COUNT_CONTINUED;
                        else
                                count = SWAP_MAP_MAX;
                } else
                        count--;
        }

        usage = count | has_cache;
        if (usage)
                WRITE_ONCE(p->swap_map[offset], usage);
        else
                WRITE_ONCE(p->swap_map[offset], SWAP_HAS_CACHE);

        return usage;
}

/*
 * When we get a swap entry, if there aren't some other ways to
 * prevent swapoff, such as the folio in swap cache is locked, page
 * table lock is held, etc., the swap entry may become invalid because
 * of swapoff.  Then, we need to enclose all swap related functions
 * with get_swap_device() and put_swap_device(), unless the swap
 * functions call get/put_swap_device() by themselves.
 *
 * Note that when only holding the PTL, swapoff might succeed immediately
 * after freeing a swap entry. Therefore, immediately after
 * __swap_entry_free(), the swap info might become stale and should not
 * be touched without a prior get_swap_device().
 *
 * Check whether swap entry is valid in the swap device.  If so,
 * return pointer to swap_info_struct, and keep the swap entry valid
 * via preventing the swap device from being swapoff, until
 * put_swap_device() is called.  Otherwise return NULL.
 *
 * Notice that swapoff or swapoff+swapon can still happen before the
 * percpu_ref_tryget_live() in get_swap_device() or after the
 * percpu_ref_put() in put_swap_device() if there isn't any other way
 * to prevent swapoff.  The caller must be prepared for that.  For
 * example, the following situation is possible.
 *
 *   CPU1                                CPU2
 *   do_swap_page()
 *     ...                                swapoff+swapon
 *     __read_swap_cache_async()
 *       swapcache_prepare()
 *         __swap_duplicate()
 *           // check swap_map
 *     // verify PTE not changed
 *
 * In __swap_duplicate(), the swap_map need to be checked before
 * changing partly because the specified swap entry may be for another
 * swap device which has been swapoff.  And in do_swap_page(), after
 * the page is read from the swap device, the PTE is verified not
 * changed with the page table locked to check whether the swap device
 * has been swapoff or swapoff+swapon.
 */
struct swap_info_struct *get_swap_device(swp_entry_t entry)
{
        struct swap_info_struct *si;
        unsigned long offset;

        if (!entry.val)
                goto out;
        si = swp_swap_info(entry);
        if (!si)
                goto bad_nofile;
        if (!percpu_ref_tryget_live(&si->users))
                goto out;
        /*
         * Guarantee the si->users are checked before accessing other
         * fields of swap_info_struct.
         *
         * Paired with the spin_unlock() after setup_swap_info() in
         * enable_swap_info().
         */
        smp_rmb();
        offset = swp_offset(entry);
        if (offset >= si->max)
                goto put_out;

        return si;
bad_nofile:
        pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
out:
        return NULL;
put_out:
        pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
        percpu_ref_put(&si->users);
        return NULL;
}

static unsigned char __swap_entry_free(struct swap_info_struct *p,
                                       swp_entry_t entry)
{
        struct swap_cluster_info *ci;
        unsigned long offset = swp_offset(entry);
        unsigned char usage;

        ci = lock_cluster_or_swap_info(p, offset);
        usage = __swap_entry_free_locked(p, offset, 1);
        unlock_cluster_or_swap_info(p, ci);
        if (!usage)
                free_swap_slot(entry);

        return usage;
}

static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
{
        struct swap_cluster_info *ci;
        unsigned long offset = swp_offset(entry);
        unsigned char count;

        ci = lock_cluster(p, offset);
        count = p->swap_map[offset];
        VM_BUG_ON(count != SWAP_HAS_CACHE);
        p->swap_map[offset] = 0;
        dec_cluster_info_page(p, p->cluster_info, offset);
        unlock_cluster(ci);

        mem_cgroup_uncharge_swap(entry, 1);
        swap_range_free(p, offset, 1);
}

/*
 * Caller has made sure that the swap device corresponding to entry
 * is still around or has not been recycled.
 */
void swap_free(swp_entry_t entry)
{
        struct swap_info_struct *p;

        p = _swap_info_get(entry);
        if (p)
                __swap_entry_free(p, entry);
}

/*
 * Called after dropping swapcache to decrease refcnt to swap entries.
 */
void put_swap_folio(struct folio *folio, swp_entry_t entry)
{
        unsigned long offset = swp_offset(entry);
        unsigned long idx = offset / SWAPFILE_CLUSTER;
        struct swap_cluster_info *ci;
        struct swap_info_struct *si;
        unsigned char *map;
        unsigned int i, free_entries = 0;
        unsigned char val;
        int size = swap_entry_size(folio_nr_pages(folio));

        si = _swap_info_get(entry);
        if (!si)
                return;

        ci = lock_cluster_or_swap_info(si, offset);
        if (size == SWAPFILE_CLUSTER) {
                VM_BUG_ON(!cluster_is_huge(ci));
                map = si->swap_map + offset;
                for (i = 0; i < SWAPFILE_CLUSTER; i++) {
                        val = map[i];
                        VM_BUG_ON(!(val & SWAP_HAS_CACHE));
                        if (val == SWAP_HAS_CACHE)
                                free_entries++;
                }
                cluster_clear_huge(ci);
                if (free_entries == SWAPFILE_CLUSTER) {
                        unlock_cluster_or_swap_info(si, ci);
                        spin_lock(&si->lock);
                        mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
                        swap_free_cluster(si, idx);
                        spin_unlock(&si->lock);
                        return;
                }
        }
        for (i = 0; i < size; i++, entry.val++) {
                if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
                        unlock_cluster_or_swap_info(si, ci);
                        free_swap_slot(entry);
                        if (i == size - 1)
                                return;
                        lock_cluster_or_swap_info(si, offset);
                }
        }
        unlock_cluster_or_swap_info(si, ci);
}

#ifdef CONFIG_THP_SWAP
int split_swap_cluster(swp_entry_t entry)
{
        struct swap_info_struct *si;
        struct swap_cluster_info *ci;
        unsigned long offset = swp_offset(entry);

        si = _swap_info_get(entry);
        if (!si)
                return -EBUSY;
        ci = lock_cluster(si, offset);
        cluster_clear_huge(ci);
        unlock_cluster(ci);
        return 0;
}
#endif

static int swp_entry_cmp(const void *ent1, const void *ent2)
{
        const swp_entry_t *e1 = ent1, *e2 = ent2;

        return (int)swp_type(*e1) - (int)swp_type(*e2);
}

void swapcache_free_entries(swp_entry_t *entries, int n)
{
        struct swap_info_struct *p, *prev;
        int i;

        if (n <= 0)
                return;

        prev = NULL;
        p = NULL;

        /*
         * Sort swap entries by swap device, so each lock is only taken once.
         * nr_swapfiles isn't absolutely correct, but the overhead of sort() is
         * so low that it isn't necessary to optimize further.
         */
        if (nr_swapfiles > 1)
                sort(entries, n, sizeof(entries[0]), swp_entry_cmp, NULL);
        for (i = 0; i < n; ++i) {
                p = swap_info_get_cont(entries[i], prev);
                if (p)
                        swap_entry_free(p, entries[i]);
                prev = p;
        }
        if (p)
                spin_unlock(&p->lock);
}

int __swap_count(swp_entry_t entry)
{
        struct swap_info_struct *si = swp_swap_info(entry);
        pgoff_t offset = swp_offset(entry);

        return swap_count(si->swap_map[offset]);
}

/*
 * How many references to @entry are currently swapped out?
 * This does not give an exact answer when swap count is continued,
 * but does include the high COUNT_CONTINUED flag to allow for that.
 */
int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
{
        pgoff_t offset = swp_offset(entry);
        struct swap_cluster_info *ci;
        int count;

        ci = lock_cluster_or_swap_info(si, offset);
        count = swap_count(si->swap_map[offset]);
        unlock_cluster_or_swap_info(si, ci);
        return count;
}

/*
 * How many references to @entry are currently swapped out?
 * This considers COUNT_CONTINUED so it returns exact answer.
 */
int swp_swapcount(swp_entry_t entry)
{
        int count, tmp_count, n;
        struct swap_info_struct *p;
        struct swap_cluster_info *ci;
        struct page *page;
        pgoff_t offset;
        unsigned char *map;

        p = _swap_info_get(entry);
        if (!p)
                return 0;

        offset = swp_offset(entry);

        ci = lock_cluster_or_swap_info(p, offset);

        count = swap_count(p->swap_map[offset]);
        if (!(count & COUNT_CONTINUED))
                goto out;

        count &= ~COUNT_CONTINUED;
        n = SWAP_MAP_MAX + 1;

        page = vmalloc_to_page(p->swap_map + offset);
        offset &= ~PAGE_MASK;
        VM_BUG_ON(page_private(page) != SWP_CONTINUED);

        do {
                page = list_next_entry(page, lru);
                map = kmap_local_page(page);
                tmp_count = map[offset];
                kunmap_local(map);

                count += (tmp_count & ~COUNT_CONTINUED) * n;
                n *= (SWAP_CONT_MAX + 1);
        } while (tmp_count & COUNT_CONTINUED);
out:
        unlock_cluster_or_swap_info(p, ci);
        return count;
}

static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
                                         swp_entry_t entry)
{
        struct swap_cluster_info *ci;
        unsigned char *map = si->swap_map;
        unsigned long roffset = swp_offset(entry);
        unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER);
        int i;
        bool ret = false;

        ci = lock_cluster_or_swap_info(si, offset);
        if (!ci || !cluster_is_huge(ci)) {
                if (swap_count(map[roffset]))
                        ret = true;
                goto unlock_out;
        }
        for (i = 0; i < SWAPFILE_CLUSTER; i++) {
                if (swap_count(map[offset + i])) {
                        ret = true;
                        break;
                }
        }
unlock_out:
        unlock_cluster_or_swap_info(si, ci);
        return ret;
}

static bool folio_swapped(struct folio *folio)
{
        swp_entry_t entry = folio->swap;
        struct swap_info_struct *si = _swap_info_get(entry);

        if (!si)
                return false;

        if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio)))
                return swap_swapcount(si, entry) != 0;

        return swap_page_trans_huge_swapped(si, entry);
}

/**
 * folio_free_swap() - Free the swap space used for this folio.
 * @folio: The folio to remove.
 *
 * If swap is getting full, or if there are no more mappings of this folio,
 * then call folio_free_swap to free its swap space.
 *
 * Return: true if we were able to release the swap space.
 */
bool folio_free_swap(struct folio *folio)
{
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

        if (!folio_test_swapcache(folio))
                return false;
        if (folio_test_writeback(folio))
                return false;
        if (folio_swapped(folio))
                return false;

        /*
         * Once hibernation has begun to create its image of memory,
         * there's a danger that one of the calls to folio_free_swap()
         * - most probably a call from __try_to_reclaim_swap() while
         * hibernation is allocating its own swap pages for the image,
         * but conceivably even a call from memory reclaim - will free
         * the swap from a folio which has already been recorded in the
         * image as a clean swapcache folio, and then reuse its swap for
         * another page of the image.  On waking from hibernation, the
         * original folio might be freed under memory pressure, then
         * later read back in from swap, now with the wrong data.
         *
         * Hibernation suspends storage while it is writing the image
         * to disk so check that here.
         */
        if (pm_suspended_storage())
                return false;

        delete_from_swap_cache(folio);
        folio_set_dirty(folio);
        return true;
}

/*
 * Free the swap entry like above, but also try to
 * free the page cache entry if it is the last user.
 */
int free_swap_and_cache(swp_entry_t entry)
{
        struct swap_info_struct *p;
        unsigned char count;

        if (non_swap_entry(entry))
                return 1;

        p = get_swap_device(entry);
        if (p) {
                if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
                        put_swap_device(p);
                        return 0;
                }

                count = __swap_entry_free(p, entry);
                if (count == SWAP_HAS_CACHE &&
                    !swap_page_trans_huge_swapped(p, entry))
                        __try_to_reclaim_swap(p, swp_offset(entry),
                                              TTRS_UNMAPPED | TTRS_FULL);
                put_swap_device(p);
        }
        return p != NULL;
}

#ifdef CONFIG_HIBERNATION

swp_entry_t get_swap_page_of_type(int type)
{
        struct swap_info_struct *si = swap_type_to_swap_info(type);
        swp_entry_t entry = {0};

        if (!si)
                goto fail;

        /* This is called for allocating swap entry, not cache */
        spin_lock(&si->lock);
        if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry))
                atomic_long_dec(&nr_swap_pages);
        spin_unlock(&si->lock);
fail:
        return entry;
}

/*
 * Find the swap type that corresponds to given device (if any).
 *
 * @offset - number of the PAGE_SIZE-sized block of the device, starting
 * from 0, in which the swap header is expected to be located.
 *
 * This is needed for the suspend to disk (aka swsusp).
 */
int swap_type_of(dev_t device, sector_t offset)
{
        int type;

        if (!device)
                return -1;

        spin_lock(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                struct swap_info_struct *sis = swap_info[type];

                if (!(sis->flags & SWP_WRITEOK))
                        continue;

                if (device == sis->bdev->bd_dev) {
                        struct swap_extent *se = first_se(sis);

                        if (se->start_block == offset) {
                                spin_unlock(&swap_lock);
                                return type;
                        }
                }
        }
        spin_unlock(&swap_lock);
        return -ENODEV;
}

int find_first_swap(dev_t *device)
{
        int type;

        spin_lock(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                struct swap_info_struct *sis = swap_info[type];

                if (!(sis->flags & SWP_WRITEOK))
                        continue;
                *device = sis->bdev->bd_dev;
                spin_unlock(&swap_lock);
                return type;
        }
        spin_unlock(&swap_lock);
        return -ENODEV;
}

/*
 * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
 * corresponding to given index in swap_info (swap type).
 */
sector_t swapdev_block(int type, pgoff_t offset)
{
        struct swap_info_struct *si = swap_type_to_swap_info(type);
        struct swap_extent *se;

        if (!si || !(si->flags & SWP_WRITEOK))
                return 0;
        se = offset_to_swap_extent(si, offset);
        return se->start_block + (offset - se->start_page);
}

/*
 * Return either the total number of swap pages of given type, or the number
 * of free pages of that type (depending on @free)
 *
 * This is needed for software suspend
 */
unsigned int count_swap_pages(int type, int free)
{
        unsigned int n = 0;

        spin_lock(&swap_lock);
        if ((unsigned int)type < nr_swapfiles) {
                struct swap_info_struct *sis = swap_info[type];

                spin_lock(&sis->lock);
                if (sis->flags & SWP_WRITEOK) {
                        n = sis->pages;
                        if (free)
                                n -= sis->inuse_pages;
                }
                spin_unlock(&sis->lock);
        }
        spin_unlock(&swap_lock);
        return n;
}
#endif /* CONFIG_HIBERNATION */

static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
{
        return pte_same(pte_swp_clear_flags(pte), swp_pte);
}

/*
 * No need to decide whether this PTE shares the swap entry with others,
 * just let do_wp_page work it out if a write is requested later - to
 * force COW, vm_page_prot omits write permission from any private vma.
 */
static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, swp_entry_t entry, struct folio *folio)
{
        struct page *page;
        struct folio *swapcache;
        spinlock_t *ptl;
        pte_t *pte, new_pte, old_pte;
        bool hwpoisoned = false;
        int ret = 1;

        swapcache = folio;
        folio = ksm_might_need_to_copy(folio, vma, addr);
        if (unlikely(!folio))
                return -ENOMEM;
        else if (unlikely(folio == ERR_PTR(-EHWPOISON))) {
                hwpoisoned = true;
                folio = swapcache;
        }

        page = folio_file_page(folio, swp_offset(entry));
        if (PageHWPoison(page))
                hwpoisoned = true;

        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
                                                swp_entry_to_pte(entry)))) {
                ret = 0;
                goto out;
        }

        old_pte = ptep_get(pte);

        if (unlikely(hwpoisoned || !folio_test_uptodate(folio))) {
                swp_entry_t swp_entry;

                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
                if (hwpoisoned) {
                        swp_entry = make_hwpoison_entry(page);
                } else {
                        swp_entry = make_poisoned_swp_entry();
                }
                new_pte = swp_entry_to_pte(swp_entry);
                ret = 0;
                goto setpte;
        }

        /*
         * Some architectures may have to restore extra metadata to the page
         * when reading from swap. This metadata may be indexed by swap entry
         * so this must be called before swap_free().
         */
        arch_swap_restore(entry, folio);

        dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
        inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
        folio_get(folio);
        if (folio == swapcache) {
                rmap_t rmap_flags = RMAP_NONE;

                /*
                 * See do_swap_page(): writeback would be problematic.
                 * However, we do a folio_wait_writeback() just before this
                 * call and have the folio locked.
                 */
                VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio);
                if (pte_swp_exclusive(old_pte))
                        rmap_flags |= RMAP_EXCLUSIVE;

                folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags);
        } else { /* ksm created a completely new copy */
                folio_add_new_anon_rmap(folio, vma, addr);
                folio_add_lru_vma(folio, vma);
        }
        new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
        if (pte_swp_soft_dirty(old_pte))
                new_pte = pte_mksoft_dirty(new_pte);
        if (pte_swp_uffd_wp(old_pte))
                new_pte = pte_mkuffd_wp(new_pte);
setpte:
        set_pte_at(vma->vm_mm, addr, pte, new_pte);
        swap_free(entry);
out:
        if (pte)
                pte_unmap_unlock(pte, ptl);
        if (folio != swapcache) {
                folio_unlock(folio);
                folio_put(folio);
        }
        return ret;
}

static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        unsigned long addr, unsigned long end,
                        unsigned int type)
{
        pte_t *pte = NULL;
        struct swap_info_struct *si;

        si = swap_info[type];
        do {
                struct folio *folio;
                unsigned long offset;
                unsigned char swp_count;
                swp_entry_t entry;
                int ret;
                pte_t ptent;

                if (!pte++) {
                        pte = pte_offset_map(pmd, addr);
                        if (!pte)
                                break;
                }

                ptent = ptep_get_lockless(pte);

                if (!is_swap_pte(ptent))
                        continue;

                entry = pte_to_swp_entry(ptent);
                if (swp_type(entry) != type)
                        continue;

                offset = swp_offset(entry);
                pte_unmap(pte);
                pte = NULL;

                folio = swap_cache_get_folio(entry, vma, addr);
                if (!folio) {
                        struct page *page;
                        struct vm_fault vmf = {
                                .vma = vma,
                                .address = addr,
                                .real_address = addr,
                                .pmd = pmd,
                        };

                        page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
                                                &vmf);
                        if (page)
                                folio = page_folio(page);
                }
                if (!folio) {
                        swp_count = READ_ONCE(si->swap_map[offset]);
                        if (swp_count == 0 || swp_count == SWAP_MAP_BAD)
                                continue;
                        return -ENOMEM;
                }

                folio_lock(folio);
                folio_wait_writeback(folio);
                ret = unuse_pte(vma, pmd, addr, entry, folio);
                if (ret < 0) {
                        folio_unlock(folio);
                        folio_put(folio);
                        return ret;
                }

                folio_free_swap(folio);
                folio_unlock(folio);
                folio_put(folio);
        } while (addr += PAGE_SIZE, addr != end);

        if (pte)
                pte_unmap(pte);
        return 0;
}

static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
                                unsigned long addr, unsigned long end,
                                unsigned int type)
{
        pmd_t *pmd;
        unsigned long next;
        int ret;

        pmd = pmd_offset(pud, addr);
        do {
                cond_resched();
                next = pmd_addr_end(addr, end);
                ret = unuse_pte_range(vma, pmd, addr, next, type);
                if (ret)
                        return ret;
        } while (pmd++, addr = next, addr != end);
        return 0;
}

static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
                                unsigned long addr, unsigned long end,
                                unsigned int type)
{
        pud_t *pud;
        unsigned long next;
        int ret;

        pud = pud_offset(p4d, addr);
        do {
                next = pud_addr_end(addr, end);
                if (pud_none_or_clear_bad(pud))
                        continue;
                ret = unuse_pmd_range(vma, pud, addr, next, type);
                if (ret)
                        return ret;
        } while (pud++, addr = next, addr != end);
        return 0;
}

static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
                                unsigned long addr, unsigned long end,
                                unsigned int type)
{
        p4d_t *p4d;
        unsigned long next;
        int ret;

        p4d = p4d_offset(pgd, addr);
        do {
                next = p4d_addr_end(addr, end);
                if (p4d_none_or_clear_bad(p4d))
                        continue;
                ret = unuse_pud_range(vma, p4d, addr, next, type);
                if (ret)
                        return ret;
        } while (p4d++, addr = next, addr != end);
        return 0;
}

static int unuse_vma(struct vm_area_struct *vma, unsigned int type)
{
        pgd_t *pgd;
        unsigned long addr, end, next;
        int ret;

        addr = vma->vm_start;
        end = vma->vm_end;

        pgd = pgd_offset(vma->vm_mm, addr);
        do {
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
                ret = unuse_p4d_range(vma, pgd, addr, next, type);
                if (ret)
                        return ret;
        } while (pgd++, addr = next, addr != end);
        return 0;
}

static int unuse_mm(struct mm_struct *mm, unsigned int type)
{
        struct vm_area_struct *vma;
        int ret = 0;
        VMA_ITERATOR(vmi, mm, 0);

        mmap_read_lock(mm);
        for_each_vma(vmi, vma) {
                if (vma->anon_vma) {
                        ret = unuse_vma(vma, type);
                        if (ret)
                                break;
                }

                cond_resched();
        }
        mmap_read_unlock(mm);
        return ret;
}

/*
 * Scan swap_map from current position to next entry still in use.
 * Return 0 if there are no inuse entries after prev till end of
 * the map.
 */
static unsigned int find_next_to_unuse(struct swap_info_struct *si,
                                        unsigned int prev)
{
        unsigned int i;
        unsigned char count;

        /*
         * No need for swap_lock here: we're just looking
         * for whether an entry is in use, not modifying it; false
         * hits are okay, and sys_swapoff() has already prevented new
         * allocations from this area (while holding swap_lock).
         */
        for (i = prev + 1; i < si->max; i++) {
                count = READ_ONCE(si->swap_map[i]);
                if (count && swap_count(count) != SWAP_MAP_BAD)
                        break;
                if ((i % LATENCY_LIMIT) == 0)
                        cond_resched();
        }

        if (i == si->max)
                i = 0;

        return i;
}

static int try_to_unuse(unsigned int type)
{
        struct mm_struct *prev_mm;
        struct mm_struct *mm;
        struct list_head *p;
        int retval = 0;
        struct swap_info_struct *si = swap_info[type];
        struct folio *folio;
        swp_entry_t entry;
        unsigned int i;

        if (!READ_ONCE(si->inuse_pages))
                goto success;

retry:
        retval = shmem_unuse(type);
        if (retval)
                return retval;

        prev_mm = &init_mm;
        mmget(prev_mm);

        spin_lock(&mmlist_lock);
        p = &init_mm.mmlist;
        while (READ_ONCE(si->inuse_pages) &&
               !signal_pending(current) &&
               (p = p->next) != &init_mm.mmlist) {

                mm = list_entry(p, struct mm_struct, mmlist);
                if (!mmget_not_zero(mm))
                        continue;
                spin_unlock(&mmlist_lock);
                mmput(prev_mm);
                prev_mm = mm;
                retval = unuse_mm(mm, type);
                if (retval) {
                        mmput(prev_mm);
                        return retval;
                }

                /*
                 * Make sure that we aren't completely killing
                 * interactive performance.
                 */
                cond_resched();
                spin_lock(&mmlist_lock);
        }
        spin_unlock(&mmlist_lock);

        mmput(prev_mm);

        i = 0;
        while (READ_ONCE(si->inuse_pages) &&
               !signal_pending(current) &&
               (i = find_next_to_unuse(si, i)) != 0) {

                entry = swp_entry(type, i);
                folio = filemap_get_folio(swap_address_space(entry), i);
                if (IS_ERR(folio))
                        continue;

                /*
                 * It is conceivable that a racing task removed this folio from
                 * swap cache just before we acquired the page lock. The folio
                 * might even be back in swap cache on another swap area. But
                 * that is okay, folio_free_swap() only removes stale folios.
                 */
                folio_lock(folio);
                folio_wait_writeback(folio);
                folio_free_swap(folio);
                folio_unlock(folio);
                folio_put(folio);
        }

        /*
         * Lets check again to see if there are still swap entries in the map.
         * If yes, we would need to do retry the unuse logic again.
         * Under global memory pressure, swap entries can be reinserted back
         * into process space after the mmlist loop above passes over them.
         *
         * Limit the number of retries? No: when mmget_not_zero()
         * above fails, that mm is likely to be freeing swap from
         * exit_mmap(), which proceeds at its own independent pace;
         * and even shmem_writepage() could have been preempted after
         * folio_alloc_swap(), temporarily hiding that swap.  It's easy
         * and robust (though cpu-intensive) just to keep retrying.
         */
        if (READ_ONCE(si->inuse_pages)) {
                if (!signal_pending(current))
                        goto retry;
                return -EINTR;
        }

success:
        /*
         * Make sure that further cleanups after try_to_unuse() returns happen
         * after swap_range_free() reduces si->inuse_pages to 0.
         */
        smp_mb();
        return 0;
}

/*
 * After a successful try_to_unuse, if no swap is now in use, we know
 * we can empty the mmlist.  swap_lock must be held on entry and exit.
 * Note that mmlist_lock nests inside swap_lock, and an mm must be
 * added to the mmlist just after page_duplicate - before would be racy.
 */
static void drain_mmlist(void)
{
        struct list_head *p, *next;
        unsigned int type;

        for (type = 0; type < nr_swapfiles; type++)
                if (swap_info[type]->inuse_pages)
                        return;
        spin_lock(&mmlist_lock);
        list_for_each_safe(p, next, &init_mm.mmlist)
                list_del_init(p);
        spin_unlock(&mmlist_lock);
}

/*
 * Free all of a swapdev's extent information
 */
static void destroy_swap_extents(struct swap_info_struct *sis)
{
        while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) {
                struct rb_node *rb = sis->swap_extent_root.rb_node;
                struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node);

                rb_erase(rb, &sis->swap_extent_root);
                kfree(se);
        }

        if (sis->flags & SWP_ACTIVATED) {
                struct file *swap_file = sis->swap_file;
                struct address_space *mapping = swap_file->f_mapping;

                sis->flags &= ~SWP_ACTIVATED;
                if (mapping->a_ops->swap_deactivate)
                        mapping->a_ops->swap_deactivate(swap_file);
        }
}

/*
 * Add a block range (and the corresponding page range) into this swapdev's
 * extent tree.
 *
 * This function rather assumes that it is called in ascending page order.
 */
int
add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
                unsigned long nr_pages, sector_t start_block)
{
        struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL;
        struct swap_extent *se;
        struct swap_extent *new_se;

        /*
         * place the new node at the right most since the
         * function is called in ascending page order.
         */
        while (*link) {
                parent = *link;
                link = &parent->rb_right;
        }

        if (parent) {
                se = rb_entry(parent, struct swap_extent, rb_node);
                BUG_ON(se->start_page + se->nr_pages != start_page);
                if (se->start_block + se->nr_pages == start_block) {
                        /* Merge it */
                        se->nr_pages += nr_pages;
                        return 0;
                }
        }

        /* No merge, insert a new extent. */
        new_se = kmalloc(sizeof(*se), GFP_KERNEL);
        if (new_se == NULL)
                return -ENOMEM;
        new_se->start_page = start_page;
        new_se->nr_pages = nr_pages;
        new_se->start_block = start_block;

        rb_link_node(&new_se->rb_node, parent, link);
        rb_insert_color(&new_se->rb_node, &sis->swap_extent_root);
        return 1;
}
EXPORT_SYMBOL_GPL(add_swap_extent);

/*
 * A `swap extent' is a simple thing which maps a contiguous range of pages
 * onto a contiguous range of disk blocks.  A rbtree of swap extents is
 * built at swapon time and is then used at swap_writepage/swap_read_folio
 * time for locating where on disk a page belongs.
 *
 * If the swapfile is an S_ISBLK block device, a single extent is installed.
 * This is done so that the main operating code can treat S_ISBLK and S_ISREG
 * swap files identically.
 *
 * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap
 * extent rbtree operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK
 * swapfiles are handled *identically* after swapon time.
 *
 * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks
 * and will parse them into a rbtree, in PAGE_SIZE chunks.  If some stray
 * blocks are found which do not fall within the PAGE_SIZE alignment
 * requirements, they are simply tossed out - we will never use those blocks
 * for swapping.
 *
 * For all swap devices we set S_SWAPFILE across the life of the swapon.  This
 * prevents users from writing to the swap device, which will corrupt memory.
 *
 * The amount of disk space which a single swap extent represents varies.
 * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
 * extents in the rbtree. - akpm.
 */
static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
{
        struct file *swap_file = sis->swap_file;
        struct address_space *mapping = swap_file->f_mapping;
        struct inode *inode = mapping->host;
        int ret;

        if (S_ISBLK(inode->i_mode)) {
                ret = add_swap_extent(sis, 0, sis->max, 0);
                *span = sis->pages;
                return ret;
        }

        if (mapping->a_ops->swap_activate) {
                ret = mapping->a_ops->swap_activate(sis, swap_file, span);
                if (ret < 0)
                        return ret;
                sis->flags |= SWP_ACTIVATED;
                if ((sis->flags & SWP_FS_OPS) &&
                    sio_pool_init() != 0) {
                        destroy_swap_extents(sis);
                        return -ENOMEM;
                }
                return ret;
        }

        return generic_swapfile_activate(sis, swap_file, span);
}

static int swap_node(struct swap_info_struct *p)
{
        struct block_device *bdev;

        if (p->bdev)
                bdev = p->bdev;
        else
                bdev = p->swap_file->f_inode->i_sb->s_bdev;

        return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE;
}

static void setup_swap_info(struct swap_info_struct *p, int prio,
                            unsigned char *swap_map,
                            struct swap_cluster_info *cluster_info)
{
        int i;

        if (prio >= 0)
                p->prio = prio;
        else
                p->prio = --least_priority;
        /*
         * the plist prio is negated because plist ordering is
         * low-to-high, while swap ordering is high-to-low
         */
        p->list.prio = -p->prio;
        for_each_node(i) {
                if (p->prio >= 0)
                        p->avail_lists[i].prio = -p->prio;
                else {
                        if (swap_node(p) == i)
                                p->avail_lists[i].prio = 1;
                        else
                                p->avail_lists[i].prio = -p->prio;
                }
        }
        p->swap_map = swap_map;
        p->cluster_info = cluster_info;
}

static void _enable_swap_info(struct swap_info_struct *p)
{
        p->flags |= SWP_WRITEOK;
        atomic_long_add(p->pages, &nr_swap_pages);
        total_swap_pages += p->pages;

        assert_spin_locked(&swap_lock);
        /*
         * both lists are plists, and thus priority ordered.
         * swap_active_head needs to be priority ordered for swapoff(),
         * which on removal of any swap_info_struct with an auto-assigned
         * (i.e. negative) priority increments the auto-assigned priority
         * of any lower-priority swap_info_structs.
         * swap_avail_head needs to be priority ordered for folio_alloc_swap(),
         * which allocates swap pages from the highest available priority
         * swap_info_struct.
         */
        plist_add(&p->list, &swap_active_head);

        /* add to available list iff swap device is not full */
        if (p->highest_bit)
                add_to_avail_list(p);
}

static void enable_swap_info(struct swap_info_struct *p, int prio,
                                unsigned char *swap_map,
                                struct swap_cluster_info *cluster_info)
{
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
        setup_swap_info(p, prio, swap_map, cluster_info);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
        /*
         * Finished initializing swap device, now it's safe to reference it.
         */
        percpu_ref_resurrect(&p->users);
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
        _enable_swap_info(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
}

static void reinsert_swap_info(struct swap_info_struct *p)
{
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
        setup_swap_info(p, p->prio, p->swap_map, p->cluster_info);
        _enable_swap_info(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
}

bool has_usable_swap(void)
{
        bool ret = true;

        spin_lock(&swap_lock);
        if (plist_head_empty(&swap_active_head))
                ret = false;
        spin_unlock(&swap_lock);
        return ret;
}

SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
        struct swap_info_struct *p = NULL;
        unsigned char *swap_map;
        struct swap_cluster_info *cluster_info;
        struct file *swap_file, *victim;
        struct address_space *mapping;
        struct inode *inode;
        struct filename *pathname;
        int err, found = 0;
        unsigned int old_block_size;

        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

        BUG_ON(!current->mm);

        pathname = getname(specialfile);
        if (IS_ERR(pathname))
                return PTR_ERR(pathname);

        victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
        err = PTR_ERR(victim);
        if (IS_ERR(victim))
                goto out;

        mapping = victim->f_mapping;
        spin_lock(&swap_lock);
        plist_for_each_entry(p, &swap_active_head, list) {
                if (p->flags & SWP_WRITEOK) {
                        if (p->swap_file->f_mapping == mapping) {
                                found = 1;
                                break;
                        }
                }
        }
        if (!found) {
                err = -EINVAL;
                spin_unlock(&swap_lock);
                goto out_dput;
        }
        if (!security_vm_enough_memory_mm(current->mm, p->pages))
                vm_unacct_memory(p->pages);
        else {
                err = -ENOMEM;
                spin_unlock(&swap_lock);
                goto out_dput;
        }
        spin_lock(&p->lock);
        del_from_avail_list(p);
        if (p->prio < 0) {
                struct swap_info_struct *si = p;
                int nid;

                plist_for_each_entry_continue(si, &swap_active_head, list) {
                        si->prio++;
                        si->list.prio--;
                        for_each_node(nid) {
                                if (si->avail_lists[nid].prio != 1)
                                        si->avail_lists[nid].prio--;
                        }
                }
                least_priority++;
        }
        plist_del(&p->list, &swap_active_head);
        atomic_long_sub(p->pages, &nr_swap_pages);
        total_swap_pages -= p->pages;
        p->flags &= ~SWP_WRITEOK;
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);

        disable_swap_slots_cache_lock();

        set_current_oom_origin();
        err = try_to_unuse(p->type);
        clear_current_oom_origin();

        if (err) {
                /* re-insert swap space back into swap_list */
                reinsert_swap_info(p);
                reenable_swap_slots_cache_unlock();
                goto out_dput;
        }

        reenable_swap_slots_cache_unlock();

        /*
         * Wait for swap operations protected by get/put_swap_device()
         * to complete.
         *
         * We need synchronize_rcu() here to protect the accessing to
         * the swap cache data structure.
         */
        percpu_ref_kill(&p->users);
        synchronize_rcu();
        wait_for_completion(&p->comp);

        flush_work(&p->discard_work);

        destroy_swap_extents(p);
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);

        if (!p->bdev || !bdev_nonrot(p->bdev))
                atomic_dec(&nr_rotate_swap);

        mutex_lock(&swapon_mutex);
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
        drain_mmlist();

        /* wait for anyone still in scan_swap_map_slots */
        p->highest_bit = 0;                /* cuts scans short */
        while (p->flags >= SWP_SCANNING) {
                spin_unlock(&p->lock);
                spin_unlock(&swap_lock);
                schedule_timeout_uninterruptible(1);
                spin_lock(&swap_lock);
                spin_lock(&p->lock);
        }

        swap_file = p->swap_file;
        old_block_size = p->old_block_size;
        p->swap_file = NULL;
        p->max = 0;
        swap_map = p->swap_map;
        p->swap_map = NULL;
        cluster_info = p->cluster_info;
        p->cluster_info = NULL;
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
        arch_swap_invalidate_area(p->type);
        zswap_swapoff(p->type);
        mutex_unlock(&swapon_mutex);
        free_percpu(p->percpu_cluster);
        p->percpu_cluster = NULL;
        free_percpu(p->cluster_next_cpu);
        p->cluster_next_cpu = NULL;
        vfree(swap_map);
        kvfree(cluster_info);
        /* Destroy swap account information */
        swap_cgroup_swapoff(p->type);
        exit_swap_address_space(p->type);

        inode = mapping->host;
        if (p->bdev_file) {
                set_blocksize(p->bdev, old_block_size);
                fput(p->bdev_file);
                p->bdev_file = NULL;
        }

        inode_lock(inode);
        inode->i_flags &= ~S_SWAPFILE;
        inode_unlock(inode);
        filp_close(swap_file, NULL);

        /*
         * Clear the SWP_USED flag after all resources are freed so that swapon
         * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
         * not hold p->lock after we cleared its SWP_WRITEOK.
         */
        spin_lock(&swap_lock);
        p->flags = 0;
        spin_unlock(&swap_lock);

        err = 0;
        atomic_inc(&proc_poll_event);
        wake_up_interruptible(&proc_poll_wait);

out_dput:
        filp_close(victim, NULL);
out:
        putname(pathname);
        return err;
}

#ifdef CONFIG_PROC_FS
static __poll_t swaps_poll(struct file *file, poll_table *wait)
{
        struct seq_file *seq = file->private_data;

        poll_wait(file, &proc_poll_wait, wait);

        if (seq->poll_event != atomic_read(&proc_poll_event)) {
                seq->poll_event = atomic_read(&proc_poll_event);
                return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
        }

        return EPOLLIN | EPOLLRDNORM;
}

/* iterator */
static void *swap_start(struct seq_file *swap, loff_t *pos)
{
        struct swap_info_struct *si;
        int type;
        loff_t l = *pos;

        mutex_lock(&swapon_mutex);

        if (!l)
                return SEQ_START_TOKEN;

        for (type = 0; (si = swap_type_to_swap_info(type)); type++) {
                if (!(si->flags & SWP_USED) || !si->swap_map)
                        continue;
                if (!--l)
                        return si;
        }

        return NULL;
}

static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
{
        struct swap_info_struct *si = v;
        int type;

        if (v == SEQ_START_TOKEN)
                type = 0;
        else
                type = si->type + 1;

        ++(*pos);
        for (; (si = swap_type_to_swap_info(type)); type++) {
                if (!(si->flags & SWP_USED) || !si->swap_map)
                        continue;
                return si;
        }

        return NULL;
}

static void swap_stop(struct seq_file *swap, void *v)
{
        mutex_unlock(&swapon_mutex);
}

static int swap_show(struct seq_file *swap, void *v)
{
        struct swap_info_struct *si = v;
        struct file *file;
        int len;
        unsigned long bytes, inuse;

        if (si == SEQ_START_TOKEN) {
                seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
                return 0;
        }

        bytes = K(si->pages);
        inuse = K(READ_ONCE(si->inuse_pages));

        file = si->swap_file;
        len = seq_file_path(swap, file, " \t\n\\");
        seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n",
                        len < 40 ? 40 - len : 1, " ",
                        S_ISBLK(file_inode(file)->i_mode) ?
                                "partition" : "file\t",
                        bytes, bytes < 10000000 ? "\t" : "",
                        inuse, inuse < 10000000 ? "\t" : "",
                        si->prio);
        return 0;
}

static const struct seq_operations swaps_op = {
        .start =        swap_start,
        .next =                swap_next,
        .stop =                swap_stop,
        .show =                swap_show
};

static int swaps_open(struct inode *inode, struct file *file)
{
        struct seq_file *seq;
        int ret;

        ret = seq_open(file, &swaps_op);
        if (ret)
                return ret;

        seq = file->private_data;
        seq->poll_event = atomic_read(&proc_poll_event);
        return 0;
}

static const struct proc_ops swaps_proc_ops = {
        .proc_flags        = PROC_ENTRY_PERMANENT,
        .proc_open        = swaps_open,
        .proc_read        = seq_read,
        .proc_lseek        = seq_lseek,
        .proc_release        = seq_release,
        .proc_poll        = swaps_poll,
};

static int __init procswaps_init(void)
{
        proc_create("swaps", 0, NULL, &swaps_proc_ops);
        return 0;
}
__initcall(procswaps_init);
#endif /* CONFIG_PROC_FS */

#ifdef MAX_SWAPFILES_CHECK
static int __init max_swapfiles_check(void)
{
        MAX_SWAPFILES_CHECK();
        return 0;
}
late_initcall(max_swapfiles_check);
#endif

static struct swap_info_struct *alloc_swap_info(void)
{
        struct swap_info_struct *p;
        struct swap_info_struct *defer = NULL;
        unsigned int type;
        int i;

        p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL);
        if (!p)
                return ERR_PTR(-ENOMEM);

        if (percpu_ref_init(&p->users, swap_users_ref_free,
                            PERCPU_REF_INIT_DEAD, GFP_KERNEL)) {
                kvfree(p);
                return ERR_PTR(-ENOMEM);
        }

        spin_lock(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                if (!(swap_info[type]->flags & SWP_USED))
                        break;
        }
        if (type >= MAX_SWAPFILES) {
                spin_unlock(&swap_lock);
                percpu_ref_exit(&p->users);
                kvfree(p);
                return ERR_PTR(-EPERM);
        }
        if (type >= nr_swapfiles) {
                p->type = type;
                /*
                 * Publish the swap_info_struct after initializing it.
                 * Note that kvzalloc() above zeroes all its fields.
                 */
                smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */
                nr_swapfiles++;
        } else {
                defer = p;
                p = swap_info[type];
                /*
                 * Do not memset this entry: a racing procfs swap_next()
                 * would be relying on p->type to remain valid.
                 */
        }
        p->swap_extent_root = RB_ROOT;
        plist_node_init(&p->list, 0);
        for_each_node(i)
                plist_node_init(&p->avail_lists[i], 0);
        p->flags = SWP_USED;
        spin_unlock(&swap_lock);
        if (defer) {
                percpu_ref_exit(&defer->users);
                kvfree(defer);
        }
        spin_lock_init(&p->lock);
        spin_lock_init(&p->cont_lock);
        init_completion(&p->comp);

        return p;
}

static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
{
        int error;

        if (S_ISBLK(inode->i_mode)) {
                p->bdev_file = bdev_file_open_by_dev(inode->i_rdev,
                                BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
                if (IS_ERR(p->bdev_file)) {
                        error = PTR_ERR(p->bdev_file);
                        p->bdev_file = NULL;
                        return error;
                }
                p->bdev = file_bdev(p->bdev_file);
                p->old_block_size = block_size(p->bdev);
                error = set_blocksize(p->bdev, PAGE_SIZE);
                if (error < 0)
                        return error;
                /*
                 * Zoned block devices contain zones that have a sequential
                 * write only restriction.  Hence zoned block devices are not
                 * suitable for swapping.  Disallow them here.
                 */
                if (bdev_is_zoned(p->bdev))
                        return -EINVAL;
                p->flags |= SWP_BLKDEV;
        } else if (S_ISREG(inode->i_mode)) {
                p->bdev = inode->i_sb->s_bdev;
        }

        return 0;
}


/*
 * Find out how many pages are allowed for a single swap device. There
 * are two limiting factors:
 * 1) the number of bits for the swap offset in the swp_entry_t type, and
 * 2) the number of bits in the swap pte, as defined by the different
 * architectures.
 *
 * In order to find the largest possible bit mask, a swap entry with
 * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
 * decoded to a swp_entry_t again, and finally the swap offset is
 * extracted.
 *
 * This will mask all the bits from the initial ~0UL mask that can't
 * be encoded in either the swp_entry_t or the architecture definition
 * of a swap pte.
 */
unsigned long generic_max_swapfile_size(void)
{
        return swp_offset(pte_to_swp_entry(
                        swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
}

/* Can be overridden by an architecture for additional checks. */
__weak unsigned long arch_max_swapfile_size(void)
{
        return generic_max_swapfile_size();
}

static unsigned long read_swap_header(struct swap_info_struct *p,
                                        union swap_header *swap_header,
                                        struct inode *inode)
{
        int i;
        unsigned long maxpages;
        unsigned long swapfilepages;
        unsigned long last_page;

        if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
                pr_err("Unable to find swap-space signature\n");
                return 0;
        }

        /* swap partition endianness hack... */
        if (swab32(swap_header->info.version) == 1) {
                swab32s(&swap_header->info.version);
                swab32s(&swap_header->info.last_page);
                swab32s(&swap_header->info.nr_badpages);
                if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
                        return 0;
                for (i = 0; i < swap_header->info.nr_badpages; i++)
                        swab32s(&swap_header->info.badpages[i]);
        }
        /* Check the swap header's sub-version */
        if (swap_header->info.version != 1) {
                pr_warn("Unable to handle swap header version %d\n",
                        swap_header->info.version);
                return 0;
        }

        p->lowest_bit  = 1;
        p->cluster_next = 1;
        p->cluster_nr = 0;

        maxpages = swapfile_maximum_size;
        last_page = swap_header->info.last_page;
        if (!last_page) {
                pr_warn("Empty swap-file\n");
                return 0;
        }
        if (last_page > maxpages) {
                pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
                        K(maxpages), K(last_page));
        }
        if (maxpages > last_page) {
                maxpages = last_page + 1;
                /* p->max is an unsigned int: don't overflow it */
                if ((unsigned int)maxpages == 0)
                        maxpages = UINT_MAX;
        }
        p->highest_bit = maxpages - 1;

        if (!maxpages)
                return 0;
        swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
        if (swapfilepages && maxpages > swapfilepages) {
                pr_warn("Swap area shorter than signature indicates\n");
                return 0;
        }
        if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
                return 0;
        if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
                return 0;

        return maxpages;
}

#define SWAP_CLUSTER_INFO_COLS                                                \
        DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
#define SWAP_CLUSTER_SPACE_COLS                                                \
        DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
#define SWAP_CLUSTER_COLS                                                \
        max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)

static int setup_swap_map_and_extents(struct swap_info_struct *p,
                                        union swap_header *swap_header,
                                        unsigned char *swap_map,
                                        struct swap_cluster_info *cluster_info,
                                        unsigned long maxpages,
                                        sector_t *span)
{
        unsigned int j, k;
        unsigned int nr_good_pages;
        int nr_extents;
        unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
        unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
        unsigned long i, idx;

        nr_good_pages = maxpages - 1;        /* omit header page */

        cluster_list_init(&p->free_clusters);
        cluster_list_init(&p->discard_clusters);

        for (i = 0; i < swap_header->info.nr_badpages; i++) {
                unsigned int page_nr = swap_header->info.badpages[i];
                if (page_nr == 0 || page_nr > swap_header->info.last_page)
                        return -EINVAL;
                if (page_nr < maxpages) {
                        swap_map[page_nr] = SWAP_MAP_BAD;
                        nr_good_pages--;
                        /*
                         * Haven't marked the cluster free yet, no list
                         * operation involved
                         */
                        inc_cluster_info_page(p, cluster_info, page_nr);
                }
        }

        /* Haven't marked the cluster free yet, no list operation involved */
        for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
                inc_cluster_info_page(p, cluster_info, i);

        if (nr_good_pages) {
                swap_map[0] = SWAP_MAP_BAD;
                /*
                 * Not mark the cluster free yet, no list
                 * operation involved
                 */
                inc_cluster_info_page(p, cluster_info, 0);
                p->max = maxpages;
                p->pages = nr_good_pages;
                nr_extents = setup_swap_extents(p, span);
                if (nr_extents < 0)
                        return nr_extents;
                nr_good_pages = p->pages;
        }
        if (!nr_good_pages) {
                pr_warn("Empty swap-file\n");
                return -EINVAL;
        }

        if (!cluster_info)
                return nr_extents;


        /*
         * Reduce false cache line sharing between cluster_info and
         * sharing same address space.
         */
        for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
                j = (k + col) % SWAP_CLUSTER_COLS;
                for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
                        idx = i * SWAP_CLUSTER_COLS + j;
                        if (idx >= nr_clusters)
                                continue;
                        if (cluster_count(&cluster_info[idx]))
                                continue;
                        cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
                        cluster_list_add_tail(&p->free_clusters, cluster_info,
                                              idx);
                }
        }
        return nr_extents;
}

SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
{
        struct swap_info_struct *p;
        struct filename *name;
        struct file *swap_file = NULL;
        struct address_space *mapping;
        struct dentry *dentry;
        int prio;
        int error;
        union swap_header *swap_header;
        int nr_extents;
        sector_t span;
        unsigned long maxpages;
        unsigned char *swap_map = NULL;
        struct swap_cluster_info *cluster_info = NULL;
        struct page *page = NULL;
        struct inode *inode = NULL;
        bool inced_nr_rotate_swap = false;

        if (swap_flags & ~SWAP_FLAGS_VALID)
                return -EINVAL;

        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;

        if (!swap_avail_heads)
                return -ENOMEM;

        p = alloc_swap_info();
        if (IS_ERR(p))
                return PTR_ERR(p);

        INIT_WORK(&p->discard_work, swap_discard_work);

        name = getname(specialfile);
        if (IS_ERR(name)) {
                error = PTR_ERR(name);
                name = NULL;
                goto bad_swap;
        }
        swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
        if (IS_ERR(swap_file)) {
                error = PTR_ERR(swap_file);
                swap_file = NULL;
                goto bad_swap;
        }

        p->swap_file = swap_file;
        mapping = swap_file->f_mapping;
        dentry = swap_file->f_path.dentry;
        inode = mapping->host;

        error = claim_swapfile(p, inode);
        if (unlikely(error))
                goto bad_swap;

        inode_lock(inode);
        if (d_unlinked(dentry) || cant_mount(dentry)) {
                error = -ENOENT;
                goto bad_swap_unlock_inode;
        }
        if (IS_SWAPFILE(inode)) {
                error = -EBUSY;
                goto bad_swap_unlock_inode;
        }

        /*
         * Read the swap header.
         */
        if (!mapping->a_ops->read_folio) {
                error = -EINVAL;
                goto bad_swap_unlock_inode;
        }
        page = read_mapping_page(mapping, 0, swap_file);
        if (IS_ERR(page)) {
                error = PTR_ERR(page);
                goto bad_swap_unlock_inode;
        }
        swap_header = kmap(page);

        maxpages = read_swap_header(p, swap_header, inode);
        if (unlikely(!maxpages)) {
                error = -EINVAL;
                goto bad_swap_unlock_inode;
        }

        /* OK, set up the swap map and apply the bad block list */
        swap_map = vzalloc(maxpages);
        if (!swap_map) {
                error = -ENOMEM;
                goto bad_swap_unlock_inode;
        }

        if (p->bdev && bdev_stable_writes(p->bdev))
                p->flags |= SWP_STABLE_WRITES;

        if (p->bdev && bdev_synchronous(p->bdev))
                p->flags |= SWP_SYNCHRONOUS_IO;

        if (p->bdev && bdev_nonrot(p->bdev)) {
                int cpu;
                unsigned long ci, nr_cluster;

                p->flags |= SWP_SOLIDSTATE;
                p->cluster_next_cpu = alloc_percpu(unsigned int);
                if (!p->cluster_next_cpu) {
                        error = -ENOMEM;
                        goto bad_swap_unlock_inode;
                }
                /*
                 * select a random position to start with to help wear leveling
                 * SSD
                 */
                for_each_possible_cpu(cpu) {
                        per_cpu(*p->cluster_next_cpu, cpu) =
                                get_random_u32_inclusive(1, p->highest_bit);
                }
                nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);

                cluster_info = kvcalloc(nr_cluster, sizeof(*cluster_info),
                                        GFP_KERNEL);
                if (!cluster_info) {
                        error = -ENOMEM;
                        goto bad_swap_unlock_inode;
                }

                for (ci = 0; ci < nr_cluster; ci++)
                        spin_lock_init(&((cluster_info + ci)->lock));

                p->percpu_cluster = alloc_percpu(struct percpu_cluster);
                if (!p->percpu_cluster) {
                        error = -ENOMEM;
                        goto bad_swap_unlock_inode;
                }
                for_each_possible_cpu(cpu) {
                        struct percpu_cluster *cluster;
                        cluster = per_cpu_ptr(p->percpu_cluster, cpu);
                        cluster_set_null(&cluster->index);
                }
        } else {
                atomic_inc(&nr_rotate_swap);
                inced_nr_rotate_swap = true;
        }

        error = swap_cgroup_swapon(p->type, maxpages);
        if (error)
                goto bad_swap_unlock_inode;

        nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
                cluster_info, maxpages, &span);
        if (unlikely(nr_extents < 0)) {
                error = nr_extents;
                goto bad_swap_unlock_inode;
        }

        if ((swap_flags & SWAP_FLAG_DISCARD) &&
            p->bdev && bdev_max_discard_sectors(p->bdev)) {
                /*
                 * When discard is enabled for swap with no particular
                 * policy flagged, we set all swap discard flags here in
                 * order to sustain backward compatibility with older
                 * swapon(8) releases.
                 */
                p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
                             SWP_PAGE_DISCARD);

                /*
                 * By flagging sys_swapon, a sysadmin can tell us to
                 * either do single-time area discards only, or to just
                 * perform discards for released swap page-clusters.
                 * Now it's time to adjust the p->flags accordingly.
                 */
                if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
                        p->flags &= ~SWP_PAGE_DISCARD;
                else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
                        p->flags &= ~SWP_AREA_DISCARD;

                /* issue a swapon-time discard if it's still required */
                if (p->flags & SWP_AREA_DISCARD) {
                        int err = discard_swap(p);
                        if (unlikely(err))
                                pr_err("swapon: discard_swap(%p): %d\n",
                                        p, err);
                }
        }

        error = init_swap_address_space(p->type, maxpages);
        if (error)
                goto bad_swap_unlock_inode;

        error = zswap_swapon(p->type, maxpages);
        if (error)
                goto free_swap_address_space;

        /*
         * Flush any pending IO and dirty mappings before we start using this
         * swap device.
         */
        inode->i_flags |= S_SWAPFILE;
        error = inode_drain_writes(inode);
        if (error) {
                inode->i_flags &= ~S_SWAPFILE;
                goto free_swap_zswap;
        }

        mutex_lock(&swapon_mutex);
        prio = -1;
        if (swap_flags & SWAP_FLAG_PREFER)
                prio =
                  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
        enable_swap_info(p, prio, swap_map, cluster_info);

        pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
                K(p->pages), name->name, p->prio, nr_extents,
                K((unsigned long long)span),
                (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
                (p->flags & SWP_DISCARDABLE) ? "D" : "",
                (p->flags & SWP_AREA_DISCARD) ? "s" : "",
                (p->flags & SWP_PAGE_DISCARD) ? "c" : "");

        mutex_unlock(&swapon_mutex);
        atomic_inc(&proc_poll_event);
        wake_up_interruptible(&proc_poll_wait);

        error = 0;
        goto out;
free_swap_zswap:
        zswap_swapoff(p->type);
free_swap_address_space:
        exit_swap_address_space(p->type);
bad_swap_unlock_inode:
        inode_unlock(inode);
bad_swap:
        free_percpu(p->percpu_cluster);
        p->percpu_cluster = NULL;
        free_percpu(p->cluster_next_cpu);
        p->cluster_next_cpu = NULL;
        if (p->bdev_file) {
                set_blocksize(p->bdev, p->old_block_size);
                fput(p->bdev_file);
                p->bdev_file = NULL;
        }
        inode = NULL;
        destroy_swap_extents(p);
        swap_cgroup_swapoff(p->type);
        spin_lock(&swap_lock);
        p->swap_file = NULL;
        p->flags = 0;
        spin_unlock(&swap_lock);
        vfree(swap_map);
        kvfree(cluster_info);
        if (inced_nr_rotate_swap)
                atomic_dec(&nr_rotate_swap);
        if (swap_file)
                filp_close(swap_file, NULL);
out:
        if (page && !IS_ERR(page)) {
                kunmap(page);
                put_page(page);
        }
        if (name)
                putname(name);
        if (inode)
                inode_unlock(inode);
        if (!error)
                enable_swap_slots_cache();
        return error;
}

void si_swapinfo(struct sysinfo *val)
{
        unsigned int type;
        unsigned long nr_to_be_unused = 0;

        spin_lock(&swap_lock);
        for (type = 0; type < nr_swapfiles; type++) {
                struct swap_info_struct *si = swap_info[type];

                if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
                        nr_to_be_unused += READ_ONCE(si->inuse_pages);
        }
        val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
        val->totalswap = total_swap_pages + nr_to_be_unused;
        spin_unlock(&swap_lock);
}

/*
 * Verify that a swap entry is valid and increment its swap map count.
 *
 * Returns error code in following case.
 * - success -> 0
 * - swp_entry is invalid -> EINVAL
 * - swp_entry is migration entry -> EINVAL
 * - swap-cache reference is requested but there is already one. -> EEXIST
 * - swap-cache reference is requested but the entry is not used. -> ENOENT
 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
 */
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
{
        struct swap_info_struct *p;
        struct swap_cluster_info *ci;
        unsigned long offset;
        unsigned char count;
        unsigned char has_cache;
        int err;

        p = swp_swap_info(entry);

        offset = swp_offset(entry);
        ci = lock_cluster_or_swap_info(p, offset);

        count = p->swap_map[offset];

        /*
         * swapin_readahead() doesn't check if a swap entry is valid, so the
         * swap entry could be SWAP_MAP_BAD. Check here with lock held.
         */
        if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
                err = -ENOENT;
                goto unlock_out;
        }

        has_cache = count & SWAP_HAS_CACHE;
        count &= ~SWAP_HAS_CACHE;
        err = 0;

        if (usage == SWAP_HAS_CACHE) {

                /* set SWAP_HAS_CACHE if there is no cache and entry is used */
                if (!has_cache && count)
                        has_cache = SWAP_HAS_CACHE;
                else if (has_cache)                /* someone else added cache */
                        err = -EEXIST;
                else                                /* no users remaining */
                        err = -ENOENT;

        } else if (count || has_cache) {

                if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
                        count += usage;
                else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
                        err = -EINVAL;
                else if (swap_count_continued(p, offset, count))
                        count = COUNT_CONTINUED;
                else
                        err = -ENOMEM;
        } else
                err = -ENOENT;                        /* unused swap entry */

        if (!err)
                WRITE_ONCE(p->swap_map[offset], count | has_cache);

unlock_out:
        unlock_cluster_or_swap_info(p, ci);
        return err;
}

/*
 * Help swapoff by noting that swap entry belongs to shmem/tmpfs
 * (in which case its reference count is never incremented).
 */
void swap_shmem_alloc(swp_entry_t entry)
{
        __swap_duplicate(entry, SWAP_MAP_SHMEM);
}

/*
 * Increase reference count of swap entry by 1.
 * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
 * but could not be atomically allocated.  Returns 0, just as if it succeeded,
 * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
 * might occur if a page table entry has got corrupted.
 */
int swap_duplicate(swp_entry_t entry)
{
        int err = 0;

        while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
                err = add_swap_count_continuation(entry, GFP_ATOMIC);
        return err;
}

/*
 * @entry: swap entry for which we allocate swap cache.
 *
 * Called when allocating swap cache for existing swap entry,
 * This can return error codes. Returns 0 at success.
 * -EEXIST means there is a swap cache.
 * Note: return code is different from swap_duplicate().
 */
int swapcache_prepare(swp_entry_t entry)
{
        return __swap_duplicate(entry, SWAP_HAS_CACHE);
}

void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
{
        struct swap_cluster_info *ci;
        unsigned long offset = swp_offset(entry);
        unsigned char usage;

        ci = lock_cluster_or_swap_info(si, offset);
        usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE);
        unlock_cluster_or_swap_info(si, ci);
        if (!usage)
                free_swap_slot(entry);
}

struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
        return swap_type_to_swap_info(swp_type(entry));
}

/*
 * out-of-line methods to avoid include hell.
 */
struct address_space *swapcache_mapping(struct folio *folio)
{
        return swp_swap_info(folio->swap)->swap_file->f_mapping;
}
EXPORT_SYMBOL_GPL(swapcache_mapping);

pgoff_t __page_file_index(struct page *page)
{
        swp_entry_t swap = page_swap_entry(page);
        return swp_offset(swap);
}
EXPORT_SYMBOL_GPL(__page_file_index);

/*
 * add_swap_count_continuation - called when a swap count is duplicated
 * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
 * page of the original vmalloc'ed swap_map, to hold the continuation count
 * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
 * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
 *
 * These continuation pages are seldom referenced: the common paths all work
 * on the original swap_map, only referring to a continuation page when the
 * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
 *
 * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
 * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
 * can be called after dropping locks.
 */
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
{
        struct swap_info_struct *si;
        struct swap_cluster_info *ci;
        struct page *head;
        struct page *page;
        struct page *list_page;
        pgoff_t offset;
        unsigned char count;
        int ret = 0;

        /*
         * When debugging, it's easier to use __GFP_ZERO here; but it's better
         * for latency not to zero a page while GFP_ATOMIC and holding locks.
         */
        page = alloc_page(gfp_mask | __GFP_HIGHMEM);

        si = get_swap_device(entry);
        if (!si) {
                /*
                 * An acceptable race has occurred since the failing
                 * __swap_duplicate(): the swap device may be swapoff
                 */
                goto outer;
        }
        spin_lock(&si->lock);

        offset = swp_offset(entry);

        ci = lock_cluster(si, offset);

        count = swap_count(si->swap_map[offset]);

        if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
                /*
                 * The higher the swap count, the more likely it is that tasks
                 * will race to add swap count continuation: we need to avoid
                 * over-provisioning.
                 */
                goto out;
        }

        if (!page) {
                ret = -ENOMEM;
                goto out;
        }

        head = vmalloc_to_page(si->swap_map + offset);
        offset &= ~PAGE_MASK;

        spin_lock(&si->cont_lock);
        /*
         * Page allocation does not initialize the page's lru field,
         * but it does always reset its private field.
         */
        if (!page_private(head)) {
                BUG_ON(count & COUNT_CONTINUED);
                INIT_LIST_HEAD(&head->lru);
                set_page_private(head, SWP_CONTINUED);
                si->flags |= SWP_CONTINUED;
        }

        list_for_each_entry(list_page, &head->lru, lru) {
                unsigned char *map;

                /*
                 * If the previous map said no continuation, but we've found
                 * a continuation page, free our allocation and use this one.
                 */
                if (!(count & COUNT_CONTINUED))
                        goto out_unlock_cont;

                map = kmap_local_page(list_page) + offset;
                count = *map;
                kunmap_local(map);

                /*
                 * If this continuation count now has some space in it,
                 * free our allocation and use this one.
                 */
                if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
                        goto out_unlock_cont;
        }

        list_add_tail(&page->lru, &head->lru);
        page = NULL;                        /* now it's attached, don't free it */
out_unlock_cont:
        spin_unlock(&si->cont_lock);
out:
        unlock_cluster(ci);
        spin_unlock(&si->lock);
        put_swap_device(si);
outer:
        if (page)
                __free_page(page);
        return ret;
}

/*
 * swap_count_continued - when the original swap_map count is incremented
 * from SWAP_MAP_MAX, check if there is already a continuation page to carry
 * into, carry if so, or else fail until a new continuation page is allocated;
 * when the original swap_map count is decremented from 0 with continuation,
 * borrow from the continuation and report whether it still holds more.
 * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
 * lock.
 */
static bool swap_count_continued(struct swap_info_struct *si,
                                 pgoff_t offset, unsigned char count)
{
        struct page *head;
        struct page *page;
        unsigned char *map;
        bool ret;

        head = vmalloc_to_page(si->swap_map + offset);
        if (page_private(head) != SWP_CONTINUED) {
                BUG_ON(count & COUNT_CONTINUED);
                return false;                /* need to add count continuation */
        }

        spin_lock(&si->cont_lock);
        offset &= ~PAGE_MASK;
        page = list_next_entry(head, lru);
        map = kmap_local_page(page) + offset;

        if (count == SWAP_MAP_MAX)        /* initial increment from swap_map */
                goto init_map;                /* jump over SWAP_CONT_MAX checks */

        if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
                /*
                 * Think of how you add 1 to 999
                 */
                while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
                        kunmap_local(map);
                        page = list_next_entry(page, lru);
                        BUG_ON(page == head);
                        map = kmap_local_page(page) + offset;
                }
                if (*map == SWAP_CONT_MAX) {
                        kunmap_local(map);
                        page = list_next_entry(page, lru);
                        if (page == head) {
                                ret = false;        /* add count continuation */
                                goto out;
                        }
                        map = kmap_local_page(page) + offset;
init_map:                *map = 0;                /* we didn't zero the page */
                }
                *map += 1;
                kunmap_local(map);
                while ((page = list_prev_entry(page, lru)) != head) {
                        map = kmap_local_page(page) + offset;
                        *map = COUNT_CONTINUED;
                        kunmap_local(map);
                }
                ret = true;                        /* incremented */

        } else {                                /* decrementing */
                /*
                 * Think of how you subtract 1 from 1000
                 */
                BUG_ON(count != COUNT_CONTINUED);
                while (*map == COUNT_CONTINUED) {
                        kunmap_local(map);
                        page = list_next_entry(page, lru);
                        BUG_ON(page == head);
                        map = kmap_local_page(page) + offset;
                }
                BUG_ON(*map == 0);
                *map -= 1;
                if (*map == 0)
                        count = 0;
                kunmap_local(map);
                while ((page = list_prev_entry(page, lru)) != head) {
                        map = kmap_local_page(page) + offset;
                        *map = SWAP_CONT_MAX | count;
                        count = COUNT_CONTINUED;
                        kunmap_local(map);
                }
                ret = count == COUNT_CONTINUED;
        }
out:
        spin_unlock(&si->cont_lock);
        return ret;
}

/*
 * free_swap_count_continuations - swapoff free all the continuation pages
 * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
 */
static void free_swap_count_continuations(struct swap_info_struct *si)
{
        pgoff_t offset;

        for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
                struct page *head;
                head = vmalloc_to_page(si->swap_map + offset);
                if (page_private(head)) {
                        struct page *page, *next;

                        list_for_each_entry_safe(page, next, &head->lru, lru) {
                                list_del(&page->lru);
                                __free_page(page);
                        }
                }
        }
}

#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
{
        struct swap_info_struct *si, *next;
        int nid = folio_nid(folio);

        if (!(gfp & __GFP_IO))
                return;

        if (!blk_cgroup_congested())
                return;

        /*
         * We've already scheduled a throttle, avoid taking the global swap
         * lock.
         */
        if (current->throttle_disk)
                return;

        spin_lock(&swap_avail_lock);
        plist_for_each_entry_safe(si, next, &swap_avail_heads[nid],
                                  avail_lists[nid]) {
                if (si->bdev) {
                        blkcg_schedule_throttle(si->bdev->bd_disk, true);
                        break;
                }
        }
        spin_unlock(&swap_avail_lock);
}
#endif

static int __init swapfile_init(void)
{
        int nid;

        swap_avail_heads = kmalloc_array(nr_node_ids, sizeof(struct plist_head),
                                         GFP_KERNEL);
        if (!swap_avail_heads) {
                pr_emerg("Not enough memory for swap heads, swap is disabled\n");
                return -ENOMEM;
        }

        for_each_node(nid)
                plist_head_init(&swap_avail_heads[nid]);

        swapfile_maximum_size = arch_max_swapfile_size();

#ifdef CONFIG_MIGRATION
        if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))
                swap_migration_ad_supported = true;
#endif        /* CONFIG_MIGRATION */

        return 0;
}
subsys_initcall(swapfile_init);




























































































































































































































































































































   74 




   74 
   74 







































   74 





   73 





   67 

















    1 









































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* audit.h -- Auditing support
 *
 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
 * All Rights Reserved.
 *
 * Written by Rickard E. (Rik) Faith <faith@redhat.com>
 */
#ifndef _LINUX_AUDIT_H_
#define _LINUX_AUDIT_H_

#include <linux/sched.h>
#include <linux/ptrace.h>
#include <linux/audit_arch.h>
#include <uapi/linux/audit.h>
#include <uapi/linux/netfilter/nf_tables.h>
#include <uapi/linux/fanotify.h>

#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)

struct audit_sig_info {
        uid_t                uid;
        pid_t                pid;
        char                ctx[];
};

struct audit_buffer;
struct audit_context;
struct inode;
struct netlink_skb_parms;
struct path;
struct linux_binprm;
struct mq_attr;
struct mqstat;
struct audit_watch;
struct audit_tree;
struct sk_buff;
struct kern_ipc_perm;

struct audit_krule {
        u32                        pflags;
        u32                        flags;
        u32                        listnr;
        u32                        action;
        u32                        mask[AUDIT_BITMASK_SIZE];
        u32                        buflen; /* for data alloc on list rules */
        u32                        field_count;
        char                        *filterkey; /* ties events to rules */
        struct audit_field        *fields;
        struct audit_field        *arch_f; /* quick access to arch field */
        struct audit_field        *inode_f; /* quick access to an inode field */
        struct audit_watch        *watch;        /* associated watch */
        struct audit_tree        *tree;        /* associated watched tree */
        struct audit_fsnotify_mark        *exe;
        struct list_head        rlist;        /* entry in audit_{watch,tree}.rules list */
        struct list_head        list;        /* for AUDIT_LIST* purposes only */
        u64                        prio;
};

/* Flag to indicate legacy AUDIT_LOGINUID unset usage */
#define AUDIT_LOGINUID_LEGACY                0x1

struct audit_field {
        u32                                type;
        union {
                u32                        val;
                kuid_t                        uid;
                kgid_t                        gid;
                struct {
                        char                *lsm_str;
                        void                *lsm_rule;
                };
        };
        u32                                op;
};

enum audit_ntp_type {
        AUDIT_NTP_OFFSET,
        AUDIT_NTP_FREQ,
        AUDIT_NTP_STATUS,
        AUDIT_NTP_TAI,
        AUDIT_NTP_TICK,
        AUDIT_NTP_ADJUST,

        AUDIT_NTP_NVALS /* count */
};

#ifdef CONFIG_AUDITSYSCALL
struct audit_ntp_val {
        long long oldval, newval;
};

struct audit_ntp_data {
        struct audit_ntp_val vals[AUDIT_NTP_NVALS];
};
#else
struct audit_ntp_data {};
#endif

enum audit_nfcfgop {
        AUDIT_XT_OP_REGISTER,
        AUDIT_XT_OP_REPLACE,
        AUDIT_XT_OP_UNREGISTER,
        AUDIT_NFT_OP_TABLE_REGISTER,
        AUDIT_NFT_OP_TABLE_UNREGISTER,
        AUDIT_NFT_OP_CHAIN_REGISTER,
        AUDIT_NFT_OP_CHAIN_UNREGISTER,
        AUDIT_NFT_OP_RULE_REGISTER,
        AUDIT_NFT_OP_RULE_UNREGISTER,
        AUDIT_NFT_OP_SET_REGISTER,
        AUDIT_NFT_OP_SET_UNREGISTER,
        AUDIT_NFT_OP_SETELEM_REGISTER,
        AUDIT_NFT_OP_SETELEM_UNREGISTER,
        AUDIT_NFT_OP_GEN_REGISTER,
        AUDIT_NFT_OP_OBJ_REGISTER,
        AUDIT_NFT_OP_OBJ_UNREGISTER,
        AUDIT_NFT_OP_OBJ_RESET,
        AUDIT_NFT_OP_FLOWTABLE_REGISTER,
        AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
        AUDIT_NFT_OP_SETELEM_RESET,
        AUDIT_NFT_OP_RULE_RESET,
        AUDIT_NFT_OP_INVALID,
};

extern int __init audit_register_class(int class, unsigned *list);
extern int audit_classify_syscall(int abi, unsigned syscall);
extern int audit_classify_arch(int arch);
/* only for compat system calls */
extern unsigned compat_write_class[];
extern unsigned compat_read_class[];
extern unsigned compat_dir_class[];
extern unsigned compat_chattr_class[];
extern unsigned compat_signal_class[];

/* audit_names->type values */
#define        AUDIT_TYPE_UNKNOWN        0        /* we don't know yet */
#define        AUDIT_TYPE_NORMAL        1        /* a "normal" audit record */
#define        AUDIT_TYPE_PARENT        2        /* a parent audit record */
#define        AUDIT_TYPE_CHILD_DELETE 3        /* a child being deleted */
#define        AUDIT_TYPE_CHILD_CREATE 4        /* a child being created */

/* maximized args number that audit_socketcall can process */
#define AUDITSC_ARGS                6

/* bit values for ->signal->audit_tty */
#define AUDIT_TTY_ENABLE        BIT(0)
#define AUDIT_TTY_LOG_PASSWD        BIT(1)

struct filename;

#define AUDIT_OFF        0
#define AUDIT_ON        1
#define AUDIT_LOCKED        2
#ifdef CONFIG_AUDIT
/* These are defined in audit.c */
                                /* Public API */
extern __printf(4, 5)
void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
               const char *fmt, ...);

extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type);
extern __printf(2, 3)
void audit_log_format(struct audit_buffer *ab, const char *fmt, ...);
extern void                    audit_log_end(struct audit_buffer *ab);
extern bool                    audit_string_contains_control(const char *string,
                                                          size_t len);
extern void                    audit_log_n_hex(struct audit_buffer *ab,
                                          const unsigned char *buf,
                                          size_t len);
extern void                    audit_log_n_string(struct audit_buffer *ab,
                                               const char *buf,
                                               size_t n);
extern void                    audit_log_n_untrustedstring(struct audit_buffer *ab,
                                                        const char *string,
                                                        size_t n);
extern void                    audit_log_untrustedstring(struct audit_buffer *ab,
                                                      const char *string);
extern void                    audit_log_d_path(struct audit_buffer *ab,
                                             const char *prefix,
                                             const struct path *path);
extern void                    audit_log_key(struct audit_buffer *ab,
                                          char *key);
extern void                    audit_log_path_denied(int type,
                                                  const char *operation);
extern void                    audit_log_lost(const char *message);

extern int audit_log_task_context(struct audit_buffer *ab);
extern void audit_log_task_info(struct audit_buffer *ab);

extern int                    audit_update_lsm_rules(void);

                                /* Private API (for audit.c only) */
extern int audit_rule_change(int type, int seq, void *data, size_t datasz);
extern int audit_list_rules_send(struct sk_buff *request_skb, int seq);

extern int audit_set_loginuid(kuid_t loginuid);

static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
        return tsk->loginuid;
}

static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
{
        return tsk->sessionid;
}

extern u32 audit_enabled;

extern int audit_signal_info(int sig, struct task_struct *t);

#else /* CONFIG_AUDIT */
static inline __printf(4, 5)
void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
               const char *fmt, ...)
{ }
static inline struct audit_buffer *audit_log_start(struct audit_context *ctx,
                                                   gfp_t gfp_mask, int type)
{
        return NULL;
}
static inline __printf(2, 3)
void audit_log_format(struct audit_buffer *ab, const char *fmt, ...)
{ }
static inline void audit_log_end(struct audit_buffer *ab)
{ }
static inline void audit_log_n_hex(struct audit_buffer *ab,
                                   const unsigned char *buf, size_t len)
{ }
static inline void audit_log_n_string(struct audit_buffer *ab,
                                      const char *buf, size_t n)
{ }
static inline void  audit_log_n_untrustedstring(struct audit_buffer *ab,
                                                const char *string, size_t n)
{ }
static inline void audit_log_untrustedstring(struct audit_buffer *ab,
                                             const char *string)
{ }
static inline void audit_log_d_path(struct audit_buffer *ab,
                                    const char *prefix,
                                    const struct path *path)
{ }
static inline void audit_log_key(struct audit_buffer *ab, char *key)
{ }
static inline void audit_log_path_denied(int type, const char *operation)
{ }
static inline int audit_log_task_context(struct audit_buffer *ab)
{
        return 0;
}
static inline void audit_log_task_info(struct audit_buffer *ab)
{ }

static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
        return INVALID_UID;
}

static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
{
        return AUDIT_SID_UNSET;
}

#define audit_enabled AUDIT_OFF

static inline int audit_signal_info(int sig, struct task_struct *t)
{
        return 0;
}

#endif /* CONFIG_AUDIT */

#ifdef CONFIG_AUDIT_COMPAT_GENERIC
#define audit_is_compat(arch)  (!((arch) & __AUDIT_ARCH_64BIT))
#else
#define audit_is_compat(arch)  false
#endif

#define AUDIT_INODE_PARENT        1        /* dentry represents the parent */
#define AUDIT_INODE_HIDDEN        2        /* audit record should be hidden */
#define AUDIT_INODE_NOEVAL        4        /* audit record incomplete */

#ifdef CONFIG_AUDITSYSCALL
#include <asm/syscall.h> /* for syscall_get_arch() */

/* These are defined in auditsc.c */
                                /* Public API */
extern int  audit_alloc(struct task_struct *task);
extern void __audit_free(struct task_struct *task);
extern void __audit_uring_entry(u8 op);
extern void __audit_uring_exit(int success, long code);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
                                  unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
extern struct filename *__audit_reusename(const __user char *uptr);
extern void __audit_getname(struct filename *name);
extern void __audit_inode(struct filename *name, const struct dentry *dentry,
                                unsigned int flags);
extern void __audit_file(const struct file *);
extern void __audit_inode_child(struct inode *parent,
                                const struct dentry *dentry,
                                const unsigned char type);
extern void audit_seccomp(unsigned long syscall, long signr, int code);
extern void audit_seccomp_actions_logged(const char *names,
                                         const char *old_names, int res);
extern void __audit_ptrace(struct task_struct *t);

static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
{
        task->audit_context = ctx;
}

static inline struct audit_context *audit_context(void)
{
        return current->audit_context;
}

static inline bool audit_dummy_context(void)
{
        void *p = audit_context();
        return !p || *(int *)p;
}
static inline void audit_free(struct task_struct *task)
{
        if (unlikely(task->audit_context))
                __audit_free(task);
}
static inline void audit_uring_entry(u8 op)
{
        /*
         * We intentionally check audit_context() before audit_enabled as most
         * Linux systems (as of ~2021) rely on systemd which forces audit to
         * be enabled regardless of the user's audit configuration.
         */
        if (unlikely(audit_context() && audit_enabled))
                __audit_uring_entry(op);
}
static inline void audit_uring_exit(int success, long code)
{
        if (unlikely(audit_context()))
                __audit_uring_exit(success, code);
}
static inline void audit_syscall_entry(int major, unsigned long a0,
                                       unsigned long a1, unsigned long a2,
                                       unsigned long a3)
{
        if (unlikely(audit_context()))
                __audit_syscall_entry(major, a0, a1, a2, a3);
}
static inline void audit_syscall_exit(void *pt_regs)
{
        if (unlikely(audit_context())) {
                int success = is_syscall_success(pt_regs);
                long return_code = regs_return_value(pt_regs);

                __audit_syscall_exit(success, return_code);
        }
}
static inline struct filename *audit_reusename(const __user char *name)
{
        if (unlikely(!audit_dummy_context()))
                return __audit_reusename(name);
        return NULL;
}
static inline void audit_getname(struct filename *name)
{
        if (unlikely(!audit_dummy_context()))
                __audit_getname(name);
}
static inline void audit_inode(struct filename *name,
                                const struct dentry *dentry,
                                unsigned int aflags) {
        if (unlikely(!audit_dummy_context()))
                __audit_inode(name, dentry, aflags);
}
static inline void audit_file(struct file *file)
{
        if (unlikely(!audit_dummy_context()))
                __audit_file(file);
}
static inline void audit_inode_parent_hidden(struct filename *name,
                                                const struct dentry *dentry)
{
        if (unlikely(!audit_dummy_context()))
                __audit_inode(name, dentry,
                                AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN);
}
static inline void audit_inode_child(struct inode *parent,
                                     const struct dentry *dentry,
                                     const unsigned char type) {
        if (unlikely(!audit_dummy_context()))
                __audit_inode_child(parent, dentry, type);
}
void audit_core_dumps(long signr);

static inline void audit_ptrace(struct task_struct *t)
{
        if (unlikely(!audit_dummy_context()))
                __audit_ptrace(t);
}

                                /* Private API (for audit.c only) */
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
extern void __audit_bprm(struct linux_binprm *bprm);
extern int __audit_socketcall(int nargs, unsigned long *args);
extern int __audit_sockaddr(int len, void *addr);
extern void __audit_fd_pair(int fd1, int fd2);
extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout);
extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
                                  const struct cred *new,
                                  const struct cred *old);
extern void __audit_log_capset(const struct cred *new, const struct cred *old);
extern void __audit_mmap_fd(int fd, int flags);
extern void __audit_openat2_how(struct open_how *how);
extern void __audit_log_kern_module(char *name);
extern void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar);
extern void __audit_tk_injoffset(struct timespec64 offset);
extern void __audit_ntp_log(const struct audit_ntp_data *ad);
extern void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
                              enum audit_nfcfgop op, gfp_t gfp);

static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{
        if (unlikely(!audit_dummy_context()))
                __audit_ipc_obj(ipcp);
}
static inline void audit_fd_pair(int fd1, int fd2)
{
        if (unlikely(!audit_dummy_context()))
                __audit_fd_pair(fd1, fd2);
}
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
{
        if (unlikely(!audit_dummy_context()))
                __audit_ipc_set_perm(qbytes, uid, gid, mode);
}
static inline void audit_bprm(struct linux_binprm *bprm)
{
        if (unlikely(!audit_dummy_context()))
                __audit_bprm(bprm);
}
static inline int audit_socketcall(int nargs, unsigned long *args)
{
        if (unlikely(!audit_dummy_context()))
                return __audit_socketcall(nargs, args);
        return 0;
}

static inline int audit_socketcall_compat(int nargs, u32 *args)
{
        unsigned long a[AUDITSC_ARGS];
        int i;

        if (audit_dummy_context())
                return 0;

        for (i = 0; i < nargs; i++)
                a[i] = (unsigned long)args[i];
        return __audit_socketcall(nargs, a);
}

static inline int audit_sockaddr(int len, void *addr)
{
        if (unlikely(!audit_dummy_context()))
                return __audit_sockaddr(len, addr);
        return 0;
}
static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
{
        if (unlikely(!audit_dummy_context()))
                __audit_mq_open(oflag, mode, attr);
}
static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout)
{
        if (unlikely(!audit_dummy_context()))
                __audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout);
}
static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
{
        if (unlikely(!audit_dummy_context()))
                __audit_mq_notify(mqdes, notification);
}
static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
{
        if (unlikely(!audit_dummy_context()))
                __audit_mq_getsetattr(mqdes, mqstat);
}

static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
                                       const struct cred *new,
                                       const struct cred *old)
{
        if (unlikely(!audit_dummy_context()))
                return __audit_log_bprm_fcaps(bprm, new, old);
        return 0;
}

static inline void audit_log_capset(const struct cred *new,
                                   const struct cred *old)
{
        if (unlikely(!audit_dummy_context()))
                __audit_log_capset(new, old);
}

static inline void audit_mmap_fd(int fd, int flags)
{
        if (unlikely(!audit_dummy_context()))
                __audit_mmap_fd(fd, flags);
}

static inline void audit_openat2_how(struct open_how *how)
{
        if (unlikely(!audit_dummy_context()))
                __audit_openat2_how(how);
}

static inline void audit_log_kern_module(char *name)
{
        if (!audit_dummy_context())
                __audit_log_kern_module(name);
}

static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{
        if (!audit_dummy_context())
                __audit_fanotify(response, friar);
}

static inline void audit_tk_injoffset(struct timespec64 offset)
{
        /* ignore no-op events */
        if (offset.tv_sec == 0 && offset.tv_nsec == 0)
                return;

        if (!audit_dummy_context())
                __audit_tk_injoffset(offset);
}

static inline void audit_ntp_init(struct audit_ntp_data *ad)
{
        memset(ad, 0, sizeof(*ad));
}

static inline void audit_ntp_set_old(struct audit_ntp_data *ad,
                                     enum audit_ntp_type type, long long val)
{
        ad->vals[type].oldval = val;
}

static inline void audit_ntp_set_new(struct audit_ntp_data *ad,
                                     enum audit_ntp_type type, long long val)
{
        ad->vals[type].newval = val;
}

static inline void audit_ntp_log(const struct audit_ntp_data *ad)
{
        if (!audit_dummy_context())
                __audit_ntp_log(ad);
}

static inline void audit_log_nfcfg(const char *name, u8 af,
                                   unsigned int nentries,
                                   enum audit_nfcfgop op, gfp_t gfp)
{
        if (audit_enabled)
                __audit_log_nfcfg(name, af, nentries, op, gfp);
}

extern int audit_n_rules;
extern int audit_signals;
#else /* CONFIG_AUDITSYSCALL */
static inline int audit_alloc(struct task_struct *task)
{
        return 0;
}
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_uring_entry(u8 op)
{ }
static inline void audit_uring_exit(int success, long code)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
                                       unsigned long a1, unsigned long a2,
                                       unsigned long a3)
{ }
static inline void audit_syscall_exit(void *pt_regs)
{ }
static inline bool audit_dummy_context(void)
{
        return true;
}
static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
{ }
static inline struct audit_context *audit_context(void)
{
        return NULL;
}
static inline struct filename *audit_reusename(const __user char *name)
{
        return NULL;
}
static inline void audit_getname(struct filename *name)
{ }
static inline void audit_inode(struct filename *name,
                                const struct dentry *dentry,
                                unsigned int aflags)
{ }
static inline void audit_file(struct file *file)
{
}
static inline void audit_inode_parent_hidden(struct filename *name,
                                const struct dentry *dentry)
{ }
static inline void audit_inode_child(struct inode *parent,
                                     const struct dentry *dentry,
                                     const unsigned char type)
{ }
static inline void audit_core_dumps(long signr)
{ }
static inline void audit_seccomp(unsigned long syscall, long signr, int code)
{ }
static inline void audit_seccomp_actions_logged(const char *names,
                                                const char *old_names, int res)
{ }
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
                                        gid_t gid, umode_t mode)
{ }
static inline void audit_bprm(struct linux_binprm *bprm)
{ }
static inline int audit_socketcall(int nargs, unsigned long *args)
{
        return 0;
}

static inline int audit_socketcall_compat(int nargs, u32 *args)
{
        return 0;
}

static inline void audit_fd_pair(int fd1, int fd2)
{ }
static inline int audit_sockaddr(int len, void *addr)
{
        return 0;
}
static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
{ }
static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len,
                                     unsigned int msg_prio,
                                     const struct timespec64 *abs_timeout)
{ }
static inline void audit_mq_notify(mqd_t mqdes,
                                   const struct sigevent *notification)
{ }
static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
{ }
static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
                                       const struct cred *new,
                                       const struct cred *old)
{
        return 0;
}
static inline void audit_log_capset(const struct cred *new,
                                    const struct cred *old)
{ }
static inline void audit_mmap_fd(int fd, int flags)
{ }

static inline void audit_openat2_how(struct open_how *how)
{ }

static inline void audit_log_kern_module(char *name)
{
}

static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar)
{ }

static inline void audit_tk_injoffset(struct timespec64 offset)
{ }

static inline void audit_ntp_init(struct audit_ntp_data *ad)
{ }

static inline void audit_ntp_set_old(struct audit_ntp_data *ad,
                                     enum audit_ntp_type type, long long val)
{ }

static inline void audit_ntp_set_new(struct audit_ntp_data *ad,
                                     enum audit_ntp_type type, long long val)
{ }

static inline void audit_ntp_log(const struct audit_ntp_data *ad)
{ }

static inline void audit_ptrace(struct task_struct *t)
{ }

static inline void audit_log_nfcfg(const char *name, u8 af,
                                   unsigned int nentries,
                                   enum audit_nfcfgop op, gfp_t gfp)
{ }

#define audit_n_rules 0
#define audit_signals 0
#endif /* CONFIG_AUDITSYSCALL */

static inline bool audit_loginuid_set(struct task_struct *tsk)
{
        return uid_valid(audit_get_loginuid(tsk));
}

#endif













































































































































































  267 

  266 
































  268 
  268 
























  268 
  268 

  268 











  229 



  229 











  229 

  266 
































































  268 





  268 














































































  233 





  268 




  192 



















   29 






   29 















  233 
  233 





  268 
  268 




  192 
  192 







  192 


  192 
  192 















  192 



  192 









  192 




  192 


  192 


  191 







  191 
































  191 


























  192 
  191 

















  191 
































  251 



  135 


















  251 


  251 

















  135 














































































































  251 









  251 

































  251 






  251 


  250 

  251 























































































































































  268 


























   10 




   10 


   10 















   10 

















  268 
  267 

  268 





















  191 







  192 










  192 










    4 












    4 
    4 










































































  267 















  267 
  268 



  268 



  268 





  266 








  135 
























  267 



  268 

  268 
  250 

  268 


























   33 


    4 

    4 
    4 

    4 










































































































   29 























































































  231 
  248 











  248 





















   29 



  248 
  248 




















  233 
  233 
















  269 
  268 


  270 
  269 

  268 
  268 
































  192 

  192 
  191 



























  192 
  192 

   10 


  192 
  191 











   10 
  192 








  192 


  192 






  191 

  192 



  191 


















  192 


  192 




  191 




























  192 





  190 





  192 















  192 













  189 
















   18 





















































   10 






































  144 



  145 

  145 























   29 










































































   29 
   29 


   28 

   29 


   28 

   29 









   29 

























   29 







   29 
   29 















   29 
   29 



















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
 *
 *  High-resolution kernel timers
 *
 *  In contrast to the low-resolution timeout API, aka timer wheel,
 *  hrtimers provide finer resolution and accuracy depending on system
 *  configuration and capabilities.
 *
 *  Started by: Thomas Gleixner and Ingo Molnar
 *
 *  Credits:
 *        Based on the original timer wheel code
 *
 *        Help, testing, suggestions, bugfixes, improvements were
 *        provided by:
 *
 *        George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
 *        et. al.
 */

#include <linux/cpu.h>
#include <linux/export.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/notifier.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched/signal.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/sched/nohz.h>
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/compat.h>

#include <linux/uaccess.h>

#include <trace/events/timer.h>

#include "tick-internal.h"

/*
 * Masks for selecting the soft and hard context timers from
 * cpu_base->active
 */
#define MASK_SHIFT                (HRTIMER_BASE_MONOTONIC_SOFT)
#define HRTIMER_ACTIVE_HARD        ((1U << MASK_SHIFT) - 1)
#define HRTIMER_ACTIVE_SOFT        (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
#define HRTIMER_ACTIVE_ALL        (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)

/*
 * The timer bases:
 *
 * There are more clockids than hrtimer bases. Thus, we index
 * into the timer bases by the hrtimer_base_type enum. When trying
 * to reach a base using a clockid, hrtimer_clockid_to_base()
 * is used to convert from clockid to the proper hrtimer_base_type.
 */
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
        .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
        .clock_base =
        {
                {
                        .index = HRTIMER_BASE_MONOTONIC,
                        .clockid = CLOCK_MONOTONIC,
                        .get_time = &ktime_get,
                },
                {
                        .index = HRTIMER_BASE_REALTIME,
                        .clockid = CLOCK_REALTIME,
                        .get_time = &ktime_get_real,
                },
                {
                        .index = HRTIMER_BASE_BOOTTIME,
                        .clockid = CLOCK_BOOTTIME,
                        .get_time = &ktime_get_boottime,
                },
                {
                        .index = HRTIMER_BASE_TAI,
                        .clockid = CLOCK_TAI,
                        .get_time = &ktime_get_clocktai,
                },
                {
                        .index = HRTIMER_BASE_MONOTONIC_SOFT,
                        .clockid = CLOCK_MONOTONIC,
                        .get_time = &ktime_get,
                },
                {
                        .index = HRTIMER_BASE_REALTIME_SOFT,
                        .clockid = CLOCK_REALTIME,
                        .get_time = &ktime_get_real,
                },
                {
                        .index = HRTIMER_BASE_BOOTTIME_SOFT,
                        .clockid = CLOCK_BOOTTIME,
                        .get_time = &ktime_get_boottime,
                },
                {
                        .index = HRTIMER_BASE_TAI_SOFT,
                        .clockid = CLOCK_TAI,
                        .get_time = &ktime_get_clocktai,
                },
        }
};

static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
        /* Make sure we catch unsupported clockids */
        [0 ... MAX_CLOCKS - 1]        = HRTIMER_MAX_CLOCK_BASES,

        [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
        [CLOCK_MONOTONIC]        = HRTIMER_BASE_MONOTONIC,
        [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
        [CLOCK_TAI]                = HRTIMER_BASE_TAI,
};

/*
 * Functions and macros which are different for UP/SMP systems are kept in a
 * single place
 */
#ifdef CONFIG_SMP

/*
 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
 * such that hrtimer_callback_running() can unconditionally dereference
 * timer->base->cpu_base
 */
static struct hrtimer_cpu_base migration_cpu_base = {
        .clock_base = { {
                .cpu_base = &migration_cpu_base,
                .seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
                                                     &migration_cpu_base.lock),
        }, },
};

#define migration_base        migration_cpu_base.clock_base[0]

static inline bool is_migration_base(struct hrtimer_clock_base *base)
{
        return base == &migration_base;
}

/*
 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 * means that all timers which are tied to this base via timer->base are
 * locked, and the base itself is locked too.
 *
 * So __run_timers/migrate_timers can safely modify all timers which could
 * be found on the lists/queues.
 *
 * When the timer's base is locked, and the timer removed from list, it is
 * possible to set timer->base = &migration_base and drop the lock: the timer
 * remains locked.
 */
static
struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
                                             unsigned long *flags)
        __acquires(&timer->base->lock)
{
        struct hrtimer_clock_base *base;

        for (;;) {
                base = READ_ONCE(timer->base);
                if (likely(base != &migration_base)) {
                        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
                        if (likely(base == timer->base))
                                return base;
                        /* The timer has migrated to another CPU: */
                        raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
                }
                cpu_relax();
        }
}

/*
 * We do not migrate the timer when it is expiring before the next
 * event on the target cpu. When high resolution is enabled, we cannot
 * reprogram the target cpu hardware and we would cause it to fire
 * late. To keep it simple, we handle the high resolution enabled and
 * disabled case similar.
 *
 * Called with cpu_base->lock of target cpu held.
 */
static int
hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
{
        ktime_t expires;

        expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
        return expires < new_base->cpu_base->expires_next;
}

static inline
struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
                                         int pinned)
{
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
        if (static_branch_likely(&timers_migration_enabled) && !pinned)
                return &per_cpu(hrtimer_bases, get_nohz_timer_target());
#endif
        return base;
}

/*
 * We switch the timer base to a power-optimized selected CPU target,
 * if:
 *        - NO_HZ_COMMON is enabled
 *        - timer migration is enabled
 *        - the timer callback is not running
 *        - the timer is not the first expiring timer on the new target
 *
 * If one of the above requirements is not fulfilled we move the timer
 * to the current CPU or leave it on the previously assigned CPU if
 * the timer callback is currently running.
 */
static inline struct hrtimer_clock_base *
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
                    int pinned)
{
        struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
        struct hrtimer_clock_base *new_base;
        int basenum = base->index;

        this_cpu_base = this_cpu_ptr(&hrtimer_bases);
        new_cpu_base = get_target_base(this_cpu_base, pinned);
again:
        new_base = &new_cpu_base->clock_base[basenum];

        if (base != new_base) {
                /*
                 * We are trying to move timer to new_base.
                 * However we can't change timer's base while it is running,
                 * so we keep it on the same CPU. No hassle vs. reprogramming
                 * the event source in the high resolution case. The softirq
                 * code will take care of this when the timer function has
                 * completed. There is no conflict as we hold the lock until
                 * the timer is enqueued.
                 */
                if (unlikely(hrtimer_callback_running(timer)))
                        return base;

                /* See the comment in lock_hrtimer_base() */
                WRITE_ONCE(timer->base, &migration_base);
                raw_spin_unlock(&base->cpu_base->lock);
                raw_spin_lock(&new_base->cpu_base->lock);

                if (new_cpu_base != this_cpu_base &&
                    hrtimer_check_target(timer, new_base)) {
                        raw_spin_unlock(&new_base->cpu_base->lock);
                        raw_spin_lock(&base->cpu_base->lock);
                        new_cpu_base = this_cpu_base;
                        WRITE_ONCE(timer->base, base);
                        goto again;
                }
                WRITE_ONCE(timer->base, new_base);
        } else {
                if (new_cpu_base != this_cpu_base &&
                    hrtimer_check_target(timer, new_base)) {
                        new_cpu_base = this_cpu_base;
                        goto again;
                }
        }
        return new_base;
}

#else /* CONFIG_SMP */

static inline bool is_migration_base(struct hrtimer_clock_base *base)
{
        return false;
}

static inline struct hrtimer_clock_base *
lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
        __acquires(&timer->base->cpu_base->lock)
{
        struct hrtimer_clock_base *base = timer->base;

        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);

        return base;
}

# define switch_hrtimer_base(t, b, p)        (b)

#endif        /* !CONFIG_SMP */

/*
 * Functions for the union type storage format of ktime_t which are
 * too large for inlining:
 */
#if BITS_PER_LONG < 64
/*
 * Divide a ktime value by a nanosecond value
 */
s64 __ktime_divns(const ktime_t kt, s64 div)
{
        int sft = 0;
        s64 dclc;
        u64 tmp;

        dclc = ktime_to_ns(kt);
        tmp = dclc < 0 ? -dclc : dclc;

        /* Make sure the divisor is less than 2^32: */
        while (div >> 32) {
                sft++;
                div >>= 1;
        }
        tmp >>= sft;
        do_div(tmp, (u32) div);
        return dclc < 0 ? -tmp : tmp;
}
EXPORT_SYMBOL_GPL(__ktime_divns);
#endif /* BITS_PER_LONG >= 64 */

/*
 * Add two ktime values and do a safety check for overflow:
 */
ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
{
        ktime_t res = ktime_add_unsafe(lhs, rhs);

        /*
         * We use KTIME_SEC_MAX here, the maximum timeout which we can
         * return to user space in a timespec:
         */
        if (res < 0 || res < lhs || res < rhs)
                res = ktime_set(KTIME_SEC_MAX, 0);

        return res;
}

EXPORT_SYMBOL_GPL(ktime_add_safe);

#ifdef CONFIG_DEBUG_OBJECTS_TIMERS

static const struct debug_obj_descr hrtimer_debug_descr;

static void *hrtimer_debug_hint(void *addr)
{
        return ((struct hrtimer *) addr)->function;
}

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
{
        struct hrtimer *timer = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                hrtimer_cancel(timer);
                debug_object_init(timer, &hrtimer_debug_descr);
                return true;
        default:
                return false;
        }
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown non-static object is activated
 */
static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
{
        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                WARN_ON(1);
                fallthrough;
        default:
                return false;
        }
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
{
        struct hrtimer *timer = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                hrtimer_cancel(timer);
                debug_object_free(timer, &hrtimer_debug_descr);
                return true;
        default:
                return false;
        }
}

static const struct debug_obj_descr hrtimer_debug_descr = {
        .name                = "hrtimer",
        .debug_hint        = hrtimer_debug_hint,
        .fixup_init        = hrtimer_fixup_init,
        .fixup_activate        = hrtimer_fixup_activate,
        .fixup_free        = hrtimer_fixup_free,
};

static inline void debug_hrtimer_init(struct hrtimer *timer)
{
        debug_object_init(timer, &hrtimer_debug_descr);
}

static inline void debug_hrtimer_activate(struct hrtimer *timer,
                                          enum hrtimer_mode mode)
{
        debug_object_activate(timer, &hrtimer_debug_descr);
}

static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
{
        debug_object_deactivate(timer, &hrtimer_debug_descr);
}

static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
                           enum hrtimer_mode mode);

void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
                           enum hrtimer_mode mode)
{
        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
        __hrtimer_init(timer, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);

static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
                                   clockid_t clock_id, enum hrtimer_mode mode);

void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
                                   clockid_t clock_id, enum hrtimer_mode mode)
{
        debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
        __hrtimer_init_sleeper(sl, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);

void destroy_hrtimer_on_stack(struct hrtimer *timer)
{
        debug_object_free(timer, &hrtimer_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);

#else

static inline void debug_hrtimer_init(struct hrtimer *timer) { }
static inline void debug_hrtimer_activate(struct hrtimer *timer,
                                          enum hrtimer_mode mode) { }
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
#endif

static inline void
debug_init(struct hrtimer *timer, clockid_t clockid,
           enum hrtimer_mode mode)
{
        debug_hrtimer_init(timer);
        trace_hrtimer_init(timer, clockid, mode);
}

static inline void debug_activate(struct hrtimer *timer,
                                  enum hrtimer_mode mode)
{
        debug_hrtimer_activate(timer, mode);
        trace_hrtimer_start(timer, mode);
}

static inline void debug_deactivate(struct hrtimer *timer)
{
        debug_hrtimer_deactivate(timer);
        trace_hrtimer_cancel(timer);
}

static struct hrtimer_clock_base *
__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
{
        unsigned int idx;

        if (!*active)
                return NULL;

        idx = __ffs(*active);
        *active &= ~(1U << idx);

        return &cpu_base->clock_base[idx];
}

#define for_each_active_base(base, cpu_base, active)        \
        while ((base = __next_base((cpu_base), &(active))))

static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
                                         const struct hrtimer *exclude,
                                         unsigned int active,
                                         ktime_t expires_next)
{
        struct hrtimer_clock_base *base;
        ktime_t expires;

        for_each_active_base(base, cpu_base, active) {
                struct timerqueue_node *next;
                struct hrtimer *timer;

                next = timerqueue_getnext(&base->active);
                timer = container_of(next, struct hrtimer, node);
                if (timer == exclude) {
                        /* Get to the next timer in the queue. */
                        next = timerqueue_iterate_next(next);
                        if (!next)
                                continue;

                        timer = container_of(next, struct hrtimer, node);
                }
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                if (expires < expires_next) {
                        expires_next = expires;

                        /* Skip cpu_base update if a timer is being excluded. */
                        if (exclude)
                                continue;

                        if (timer->is_soft)
                                cpu_base->softirq_next_timer = timer;
                        else
                                cpu_base->next_timer = timer;
                }
        }
        /*
         * clock_was_set() might have changed base->offset of any of
         * the clock bases so the result might be negative. Fix it up
         * to prevent a false positive in clockevents_program_event().
         */
        if (expires_next < 0)
                expires_next = 0;
        return expires_next;
}

/*
 * Recomputes cpu_base::*next_timer and returns the earliest expires_next
 * but does not set cpu_base::*expires_next, that is done by
 * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
 * cpu_base::*expires_next right away, reprogramming logic would no longer
 * work.
 *
 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
 * those timers will get run whenever the softirq gets handled, at the end of
 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
 *
 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
 *
 * @active_mask must be one of:
 *  - HRTIMER_ACTIVE_ALL,
 *  - HRTIMER_ACTIVE_SOFT, or
 *  - HRTIMER_ACTIVE_HARD.
 */
static ktime_t
__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
{
        unsigned int active;
        struct hrtimer *next_timer = NULL;
        ktime_t expires_next = KTIME_MAX;

        if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
                active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
                cpu_base->softirq_next_timer = NULL;
                expires_next = __hrtimer_next_event_base(cpu_base, NULL,
                                                         active, KTIME_MAX);

                next_timer = cpu_base->softirq_next_timer;
        }

        if (active_mask & HRTIMER_ACTIVE_HARD) {
                active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
                cpu_base->next_timer = next_timer;
                expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
                                                         expires_next);
        }

        return expires_next;
}

static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
{
        ktime_t expires_next, soft = KTIME_MAX;

        /*
         * If the soft interrupt has already been activated, ignore the
         * soft bases. They will be handled in the already raised soft
         * interrupt.
         */
        if (!cpu_base->softirq_activated) {
                soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
                /*
                 * Update the soft expiry time. clock_settime() might have
                 * affected it.
                 */
                cpu_base->softirq_expires_next = soft;
        }

        expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
        /*
         * If a softirq timer is expiring first, update cpu_base->next_timer
         * and program the hardware with the soft expiry time.
         */
        if (expires_next > soft) {
                cpu_base->next_timer = cpu_base->softirq_next_timer;
                expires_next = soft;
        }

        return expires_next;
}

static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
        ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
        ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
        ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;

        ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
                                            offs_real, offs_boot, offs_tai);

        base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
        base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
        base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;

        return now;
}

/*
 * Is the high resolution mode active ?
 */
static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
{
        return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
                cpu_base->hres_active : 0;
}

static inline int hrtimer_hres_active(void)
{
        return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}

static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
                                struct hrtimer *next_timer,
                                ktime_t expires_next)
{
        cpu_base->expires_next = expires_next;

        /*
         * If hres is not active, hardware does not have to be
         * reprogrammed yet.
         *
         * If a hang was detected in the last timer interrupt then we
         * leave the hang delay active in the hardware. We want the
         * system to make progress. That also prevents the following
         * scenario:
         * T1 expires 50ms from now
         * T2 expires 5s from now
         *
         * T1 is removed, so this code is called and would reprogram
         * the hardware to 5s from now. Any hrtimer_start after that
         * will not reprogram the hardware due to hang_detected being
         * set. So we'd effectively block all timers until the T2 event
         * fires.
         */
        if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
                return;

        tick_program_event(expires_next, 1);
}

/*
 * Reprogram the event source with checking both queues for the
 * next event
 * Called with interrupts disabled and base->lock held
 */
static void
hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
{
        ktime_t expires_next;

        expires_next = hrtimer_update_next_event(cpu_base);

        if (skip_equal && expires_next == cpu_base->expires_next)
                return;

        __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
}

/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS

/*
 * High resolution timer enabled ?
 */
static bool hrtimer_hres_enabled __read_mostly  = true;
unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
EXPORT_SYMBOL_GPL(hrtimer_resolution);

/*
 * Enable / Disable high resolution mode
 */
static int __init setup_hrtimer_hres(char *str)
{
        return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
}

__setup("highres=", setup_hrtimer_hres);

/*
 * hrtimer_high_res_enabled - query, if the highres mode is enabled
 */
static inline int hrtimer_is_hres_enabled(void)
{
        return hrtimer_hres_enabled;
}

static void retrigger_next_event(void *arg);

/*
 * Switch to high resolution mode
 */
static void hrtimer_switch_to_hres(void)
{
        struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);

        if (tick_init_highres()) {
                pr_warn("Could not switch to high resolution mode on CPU %u\n",
                        base->cpu);
                return;
        }
        base->hres_active = 1;
        hrtimer_resolution = HIGH_RES_NSEC;

        tick_setup_sched_timer(true);
        /* "Retrigger" the interrupt to get things going */
        retrigger_next_event(NULL);
}

#else

static inline int hrtimer_is_hres_enabled(void) { return 0; }
static inline void hrtimer_switch_to_hres(void) { }

#endif /* CONFIG_HIGH_RES_TIMERS */
/*
 * Retrigger next event is called after clock was set with interrupts
 * disabled through an SMP function call or directly from low level
 * resume code.
 *
 * This is only invoked when:
 *        - CONFIG_HIGH_RES_TIMERS is enabled.
 *        - CONFIG_NOHZ_COMMON is enabled
 *
 * For the other cases this function is empty and because the call sites
 * are optimized out it vanishes as well, i.e. no need for lots of
 * #ifdeffery.
 */
static void retrigger_next_event(void *arg)
{
        struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);

        /*
         * When high resolution mode or nohz is active, then the offsets of
         * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
         * next tick will take care of that.
         *
         * If high resolution mode is active then the next expiring timer
         * must be reevaluated and the clock event device reprogrammed if
         * necessary.
         *
         * In the NOHZ case the update of the offset and the reevaluation
         * of the next expiring timer is enough. The return from the SMP
         * function call will take care of the reprogramming in case the
         * CPU was in a NOHZ idle sleep.
         */
        if (!__hrtimer_hres_active(base) && !tick_nohz_active)
                return;

        raw_spin_lock(&base->lock);
        hrtimer_update_base(base);
        if (__hrtimer_hres_active(base))
                hrtimer_force_reprogram(base, 0);
        else
                hrtimer_update_next_event(base);
        raw_spin_unlock(&base->lock);
}

/*
 * When a timer is enqueued and expires earlier than the already enqueued
 * timers, we have to check, whether it expires earlier than the timer for
 * which the clock event device was armed.
 *
 * Called with interrupts disabled and base->cpu_base.lock held
 */
static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        struct hrtimer_clock_base *base = timer->base;
        ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);

        WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);

        /*
         * CLOCK_REALTIME timer might be requested with an absolute
         * expiry time which is less than base->offset. Set it to 0.
         */
        if (expires < 0)
                expires = 0;

        if (timer->is_soft) {
                /*
                 * soft hrtimer could be started on a remote CPU. In this
                 * case softirq_expires_next needs to be updated on the
                 * remote CPU. The soft hrtimer will not expire before the
                 * first hard hrtimer on the remote CPU -
                 * hrtimer_check_target() prevents this case.
                 */
                struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;

                if (timer_cpu_base->softirq_activated)
                        return;

                if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
                        return;

                timer_cpu_base->softirq_next_timer = timer;
                timer_cpu_base->softirq_expires_next = expires;

                if (!ktime_before(expires, timer_cpu_base->expires_next) ||
                    !reprogram)
                        return;
        }

        /*
         * If the timer is not on the current cpu, we cannot reprogram
         * the other cpus clock event device.
         */
        if (base->cpu_base != cpu_base)
                return;

        if (expires >= cpu_base->expires_next)
                return;

        /*
         * If the hrtimer interrupt is running, then it will reevaluate the
         * clock bases and reprogram the clock event device.
         */
        if (cpu_base->in_hrtirq)
                return;

        cpu_base->next_timer = timer;

        __hrtimer_reprogram(cpu_base, timer, expires);
}

static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
                             unsigned int active)
{
        struct hrtimer_clock_base *base;
        unsigned int seq;
        ktime_t expires;

        /*
         * Update the base offsets unconditionally so the following
         * checks whether the SMP function call is required works.
         *
         * The update is safe even when the remote CPU is in the hrtimer
         * interrupt or the hrtimer soft interrupt and expiring affected
         * bases. Either it will see the update before handling a base or
         * it will see it when it finishes the processing and reevaluates
         * the next expiring timer.
         */
        seq = cpu_base->clock_was_set_seq;
        hrtimer_update_base(cpu_base);

        /*
         * If the sequence did not change over the update then the
         * remote CPU already handled it.
         */
        if (seq == cpu_base->clock_was_set_seq)
                return false;

        /*
         * If the remote CPU is currently handling an hrtimer interrupt, it
         * will reevaluate the first expiring timer of all clock bases
         * before reprogramming. Nothing to do here.
         */
        if (cpu_base->in_hrtirq)
                return false;

        /*
         * Walk the affected clock bases and check whether the first expiring
         * timer in a clock base is moving ahead of the first expiring timer of
         * @cpu_base. If so, the IPI must be invoked because per CPU clock
         * event devices cannot be remotely reprogrammed.
         */
        active &= cpu_base->active_bases;

        for_each_active_base(base, cpu_base, active) {
                struct timerqueue_node *next;

                next = timerqueue_getnext(&base->active);
                expires = ktime_sub(next->expires, base->offset);
                if (expires < cpu_base->expires_next)
                        return true;

                /* Extra check for softirq clock bases */
                if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
                        continue;
                if (cpu_base->softirq_activated)
                        continue;
                if (expires < cpu_base->softirq_expires_next)
                        return true;
        }
        return false;
}

/*
 * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
 * CLOCK_BOOTTIME (for late sleep time injection).
 *
 * This requires to update the offsets for these clocks
 * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
 * also requires to eventually reprogram the per CPU clock event devices
 * when the change moves an affected timer ahead of the first expiring
 * timer on that CPU. Obviously remote per CPU clock event devices cannot
 * be reprogrammed. The other reason why an IPI has to be sent is when the
 * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
 * in the tick, which obviously might be stopped, so this has to bring out
 * the remote CPU which might sleep in idle to get this sorted.
 */
void clock_was_set(unsigned int bases)
{
        struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
        cpumask_var_t mask;
        int cpu;

        if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
                goto out_timerfd;

        if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
                on_each_cpu(retrigger_next_event, NULL, 1);
                goto out_timerfd;
        }

        /* Avoid interrupting CPUs if possible */
        cpus_read_lock();
        for_each_online_cpu(cpu) {
                unsigned long flags;

                cpu_base = &per_cpu(hrtimer_bases, cpu);
                raw_spin_lock_irqsave(&cpu_base->lock, flags);

                if (update_needs_ipi(cpu_base, bases))
                        cpumask_set_cpu(cpu, mask);

                raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
        }

        preempt_disable();
        smp_call_function_many(mask, retrigger_next_event, NULL, 1);
        preempt_enable();
        cpus_read_unlock();
        free_cpumask_var(mask);

out_timerfd:
        timerfd_clock_was_set();
}

static void clock_was_set_work(struct work_struct *work)
{
        clock_was_set(CLOCK_SET_WALL);
}

static DECLARE_WORK(hrtimer_work, clock_was_set_work);

/*
 * Called from timekeeping code to reprogram the hrtimer interrupt device
 * on all cpus and to notify timerfd.
 */
void clock_was_set_delayed(void)
{
        schedule_work(&hrtimer_work);
}

/*
 * Called during resume either directly from via timekeeping_resume()
 * or in the case of s2idle from tick_unfreeze() to ensure that the
 * hrtimers are up to date.
 */
void hrtimers_resume_local(void)
{
        lockdep_assert_irqs_disabled();
        /* Retrigger on the local CPU */
        retrigger_next_event(NULL);
}

/*
 * Counterpart to lock_hrtimer_base above:
 */
static inline
void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
        __releases(&timer->base->cpu_base->lock)
{
        raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
}

/**
 * hrtimer_forward() - forward the timer expiry
 * @timer:        hrtimer to forward
 * @now:        forward past this time
 * @interval:        the interval to forward
 *
 * Forward the timer expiry so it will expire in the future.
 *
 * .. note::
 *  This only updates the timer expiry value and does not requeue the timer.
 *
 * There is also a variant of the function hrtimer_forward_now().
 *
 * Context: Can be safely called from the callback function of @timer. If called
 *          from other contexts @timer must neither be enqueued nor running the
 *          callback and the caller needs to take care of serialization.
 *
 * Return: The number of overruns are returned.
 */
u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
{
        u64 orun = 1;
        ktime_t delta;

        delta = ktime_sub(now, hrtimer_get_expires(timer));

        if (delta < 0)
                return 0;

        if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
                return 0;

        if (interval < hrtimer_resolution)
                interval = hrtimer_resolution;

        if (unlikely(delta >= interval)) {
                s64 incr = ktime_to_ns(interval);

                orun = ktime_divns(delta, incr);
                hrtimer_add_expires_ns(timer, incr * orun);
                if (hrtimer_get_expires_tv64(timer) > now)
                        return orun;
                /*
                 * This (and the ktime_add() below) is the
                 * correction for exact:
                 */
                orun++;
        }
        hrtimer_add_expires(timer, interval);

        return orun;
}
EXPORT_SYMBOL_GPL(hrtimer_forward);

/*
 * enqueue_hrtimer - internal function to (re)start a timer
 *
 * The timer is inserted in expiry order. Insertion into the
 * red black tree is O(log(n)). Must hold the base lock.
 *
 * Returns 1 when the new timer is the leftmost timer in the tree.
 */
static int enqueue_hrtimer(struct hrtimer *timer,
                           struct hrtimer_clock_base *base,
                           enum hrtimer_mode mode)
{
        debug_activate(timer, mode);
        WARN_ON_ONCE(!base->cpu_base->online);

        base->cpu_base->active_bases |= 1 << base->index;

        /* Pairs with the lockless read in hrtimer_is_queued() */
        WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);

        return timerqueue_add(&base->active, &timer->node);
}

/*
 * __remove_hrtimer - internal function to remove a timer
 *
 * Caller must hold the base lock.
 *
 * High resolution timer mode reprograms the clock event device when the
 * timer is the one which expires next. The caller can disable this by setting
 * reprogram to zero. This is useful, when the context does a reprogramming
 * anyway (e.g. timer interrupt)
 */
static void __remove_hrtimer(struct hrtimer *timer,
                             struct hrtimer_clock_base *base,
                             u8 newstate, int reprogram)
{
        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
        u8 state = timer->state;

        /* Pairs with the lockless read in hrtimer_is_queued() */
        WRITE_ONCE(timer->state, newstate);
        if (!(state & HRTIMER_STATE_ENQUEUED))
                return;

        if (!timerqueue_del(&base->active, &timer->node))
                cpu_base->active_bases &= ~(1 << base->index);

        /*
         * Note: If reprogram is false we do not update
         * cpu_base->next_timer. This happens when we remove the first
         * timer on a remote cpu. No harm as we never dereference
         * cpu_base->next_timer. So the worst thing what can happen is
         * an superfluous call to hrtimer_force_reprogram() on the
         * remote cpu later on if the same timer gets enqueued again.
         */
        if (reprogram && timer == cpu_base->next_timer)
                hrtimer_force_reprogram(cpu_base, 1);
}

/*
 * remove hrtimer, called with base lock held
 */
static inline int
remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
               bool restart, bool keep_local)
{
        u8 state = timer->state;

        if (state & HRTIMER_STATE_ENQUEUED) {
                bool reprogram;

                /*
                 * Remove the timer and force reprogramming when high
                 * resolution mode is active and the timer is on the current
                 * CPU. If we remove a timer on another CPU, reprogramming is
                 * skipped. The interrupt event on this CPU is fired and
                 * reprogramming happens in the interrupt handler. This is a
                 * rare case and less expensive than a smp call.
                 */
                debug_deactivate(timer);
                reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);

                /*
                 * If the timer is not restarted then reprogramming is
                 * required if the timer is local. If it is local and about
                 * to be restarted, avoid programming it twice (on removal
                 * and a moment later when it's requeued).
                 */
                if (!restart)
                        state = HRTIMER_STATE_INACTIVE;
                else
                        reprogram &= !keep_local;

                __remove_hrtimer(timer, base, state, reprogram);
                return 1;
        }
        return 0;
}

static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
                                            const enum hrtimer_mode mode)
{
#ifdef CONFIG_TIME_LOW_RES
        /*
         * CONFIG_TIME_LOW_RES indicates that the system has no way to return
         * granular time values. For relative timers we add hrtimer_resolution
         * (i.e. one jiffie) to prevent short timeouts.
         */
        timer->is_rel = mode & HRTIMER_MODE_REL;
        if (timer->is_rel)
                tim = ktime_add_safe(tim, hrtimer_resolution);
#endif
        return tim;
}

static void
hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
{
        ktime_t expires;

        /*
         * Find the next SOFT expiration.
         */
        expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);

        /*
         * reprogramming needs to be triggered, even if the next soft
         * hrtimer expires at the same time than the next hard
         * hrtimer. cpu_base->softirq_expires_next needs to be updated!
         */
        if (expires == KTIME_MAX)
                return;

        /*
         * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
         * cpu_base->*expires_next is only set by hrtimer_reprogram()
         */
        hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
}

static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
                                    u64 delta_ns, const enum hrtimer_mode mode,
                                    struct hrtimer_clock_base *base)
{
        struct hrtimer_clock_base *new_base;
        bool force_local, first;

        /*
         * If the timer is on the local cpu base and is the first expiring
         * timer then this might end up reprogramming the hardware twice
         * (on removal and on enqueue). To avoid that by prevent the
         * reprogram on removal, keep the timer local to the current CPU
         * and enforce reprogramming after it is queued no matter whether
         * it is the new first expiring timer again or not.
         */
        force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
        force_local &= base->cpu_base->next_timer == timer;

        /*
         * Remove an active timer from the queue. In case it is not queued
         * on the current CPU, make sure that remove_hrtimer() updates the
         * remote data correctly.
         *
         * If it's on the current CPU and the first expiring timer, then
         * skip reprogramming, keep the timer local and enforce
         * reprogramming later if it was the first expiring timer.  This
         * avoids programming the underlying clock event twice (once at
         * removal and once after enqueue).
         */
        remove_hrtimer(timer, base, true, force_local);

        if (mode & HRTIMER_MODE_REL)
                tim = ktime_add_safe(tim, base->get_time());

        tim = hrtimer_update_lowres(timer, tim, mode);

        hrtimer_set_expires_range_ns(timer, tim, delta_ns);

        /* Switch the timer base, if necessary: */
        if (!force_local) {
                new_base = switch_hrtimer_base(timer, base,
                                               mode & HRTIMER_MODE_PINNED);
        } else {
                new_base = base;
        }

        first = enqueue_hrtimer(timer, new_base, mode);
        if (!force_local)
                return first;

        /*
         * Timer was forced to stay on the current CPU to avoid
         * reprogramming on removal and enqueue. Force reprogram the
         * hardware by evaluating the new first expiring timer.
         */
        hrtimer_force_reprogram(new_base->cpu_base, 1);
        return 0;
}

/**
 * hrtimer_start_range_ns - (re)start an hrtimer
 * @timer:        the timer to be added
 * @tim:        expiry time
 * @delta_ns:        "slack" range for the timer
 * @mode:        timer mode: absolute (HRTIMER_MODE_ABS) or
 *                relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
 *                softirq based mode is considered for debug purpose only!
 */
void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
                            u64 delta_ns, const enum hrtimer_mode mode)
{
        struct hrtimer_clock_base *base;
        unsigned long flags;

        /*
         * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
         * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
         * expiry mode because unmarked timers are moved to softirq expiry.
         */
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
        else
                WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);

        base = lock_hrtimer_base(timer, &flags);

        if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
                hrtimer_reprogram(timer, true);

        unlock_hrtimer_base(timer, &flags);
}
EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);

/**
 * hrtimer_try_to_cancel - try to deactivate a timer
 * @timer:        hrtimer to stop
 *
 * Returns:
 *
 *  *  0 when the timer was not active
 *  *  1 when the timer was active
 *  * -1 when the timer is currently executing the callback function and
 *    cannot be stopped
 */
int hrtimer_try_to_cancel(struct hrtimer *timer)
{
        struct hrtimer_clock_base *base;
        unsigned long flags;
        int ret = -1;

        /*
         * Check lockless first. If the timer is not active (neither
         * enqueued nor running the callback, nothing to do here.  The
         * base lock does not serialize against a concurrent enqueue,
         * so we can avoid taking it.
         */
        if (!hrtimer_active(timer))
                return 0;

        base = lock_hrtimer_base(timer, &flags);

        if (!hrtimer_callback_running(timer))
                ret = remove_hrtimer(timer, base, false, false);

        unlock_hrtimer_base(timer, &flags);

        return ret;

}
EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);

#ifdef CONFIG_PREEMPT_RT
static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
{
        spin_lock_init(&base->softirq_expiry_lock);
}

static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
{
        spin_lock(&base->softirq_expiry_lock);
}

static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
{
        spin_unlock(&base->softirq_expiry_lock);
}

/*
 * The counterpart to hrtimer_cancel_wait_running().
 *
 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
 * the timer callback to finish. Drop expiry_lock and reacquire it. That
 * allows the waiter to acquire the lock and make progress.
 */
static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
                                      unsigned long flags)
{
        if (atomic_read(&cpu_base->timer_waiters)) {
                raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
                spin_unlock(&cpu_base->softirq_expiry_lock);
                spin_lock(&cpu_base->softirq_expiry_lock);
                raw_spin_lock_irq(&cpu_base->lock);
        }
}

/*
 * This function is called on PREEMPT_RT kernels when the fast path
 * deletion of a timer failed because the timer callback function was
 * running.
 *
 * This prevents priority inversion: if the soft irq thread is preempted
 * in the middle of a timer callback, then calling del_timer_sync() can
 * lead to two issues:
 *
 *  - If the caller is on a remote CPU then it has to spin wait for the timer
 *    handler to complete. This can result in unbound priority inversion.
 *
 *  - If the caller originates from the task which preempted the timer
 *    handler on the same CPU, then spin waiting for the timer handler to
 *    complete is never going to end.
 */
void hrtimer_cancel_wait_running(const struct hrtimer *timer)
{
        /* Lockless read. Prevent the compiler from reloading it below */
        struct hrtimer_clock_base *base = READ_ONCE(timer->base);

        /*
         * Just relax if the timer expires in hard interrupt context or if
         * it is currently on the migration base.
         */
        if (!timer->is_soft || is_migration_base(base)) {
                cpu_relax();
                return;
        }

        /*
         * Mark the base as contended and grab the expiry lock, which is
         * held by the softirq across the timer callback. Drop the lock
         * immediately so the softirq can expire the next timer. In theory
         * the timer could already be running again, but that's more than
         * unlikely and just causes another wait loop.
         */
        atomic_inc(&base->cpu_base->timer_waiters);
        spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
        atomic_dec(&base->cpu_base->timer_waiters);
        spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
}
#else
static inline void
hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
static inline void
hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
static inline void
hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
                                             unsigned long flags) { }
#endif

/**
 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
 * @timer:        the timer to be cancelled
 *
 * Returns:
 *  0 when the timer was not active
 *  1 when the timer was active
 */
int hrtimer_cancel(struct hrtimer *timer)
{
        int ret;

        do {
                ret = hrtimer_try_to_cancel(timer);

                if (ret < 0)
                        hrtimer_cancel_wait_running(timer);
        } while (ret < 0);
        return ret;
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);

/**
 * __hrtimer_get_remaining - get remaining time for the timer
 * @timer:        the timer to read
 * @adjust:        adjust relative timers when CONFIG_TIME_LOW_RES=y
 */
ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
{
        unsigned long flags;
        ktime_t rem;

        lock_hrtimer_base(timer, &flags);
        if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
                rem = hrtimer_expires_remaining_adjusted(timer);
        else
                rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);

        return rem;
}
EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);

#ifdef CONFIG_NO_HZ_COMMON
/**
 * hrtimer_get_next_event - get the time until next expiry event
 *
 * Returns the next expiry time or KTIME_MAX if no timer is pending.
 */
u64 hrtimer_get_next_event(void)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        u64 expires = KTIME_MAX;
        unsigned long flags;

        raw_spin_lock_irqsave(&cpu_base->lock, flags);

        if (!__hrtimer_hres_active(cpu_base))
                expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);

        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);

        return expires;
}

/**
 * hrtimer_next_event_without - time until next expiry event w/o one timer
 * @exclude:        timer to exclude
 *
 * Returns the next expiry time over all timers except for the @exclude one or
 * KTIME_MAX if none of them is pending.
 */
u64 hrtimer_next_event_without(const struct hrtimer *exclude)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        u64 expires = KTIME_MAX;
        unsigned long flags;

        raw_spin_lock_irqsave(&cpu_base->lock, flags);

        if (__hrtimer_hres_active(cpu_base)) {
                unsigned int active;

                if (!cpu_base->softirq_activated) {
                        active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
                        expires = __hrtimer_next_event_base(cpu_base, exclude,
                                                            active, KTIME_MAX);
                }
                active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
                expires = __hrtimer_next_event_base(cpu_base, exclude, active,
                                                    expires);
        }

        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);

        return expires;
}
#endif

static inline int hrtimer_clockid_to_base(clockid_t clock_id)
{
        if (likely(clock_id < MAX_CLOCKS)) {
                int base = hrtimer_clock_to_base_table[clock_id];

                if (likely(base != HRTIMER_MAX_CLOCK_BASES))
                        return base;
        }
        WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
        return HRTIMER_BASE_MONOTONIC;
}

static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
                           enum hrtimer_mode mode)
{
        bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
        struct hrtimer_cpu_base *cpu_base;
        int base;

        /*
         * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
         * marked for hard interrupt expiry mode are moved into soft
         * interrupt context for latency reasons and because the callbacks
         * can invoke functions which might sleep on RT, e.g. spin_lock().
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
                softtimer = true;

        memset(timer, 0, sizeof(struct hrtimer));

        cpu_base = raw_cpu_ptr(&hrtimer_bases);

        /*
         * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
         * clock modifications, so they needs to become CLOCK_MONOTONIC to
         * ensure POSIX compliance.
         */
        if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
                clock_id = CLOCK_MONOTONIC;

        base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
        base += hrtimer_clockid_to_base(clock_id);
        timer->is_soft = softtimer;
        timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
        timer->base = &cpu_base->clock_base[base];
        timerqueue_init(&timer->node);
}

/**
 * hrtimer_init - initialize a timer to the given clock
 * @timer:        the timer to be initialized
 * @clock_id:        the clock to be used
 * @mode:       The modes which are relevant for initialization:
 *              HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
 *              HRTIMER_MODE_REL_SOFT
 *
 *              The PINNED variants of the above can be handed in,
 *              but the PINNED bit is ignored as pinning happens
 *              when the hrtimer is started
 */
void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
                  enum hrtimer_mode mode)
{
        debug_init(timer, clock_id, mode);
        __hrtimer_init(timer, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_init);

/*
 * A timer is active, when it is enqueued into the rbtree or the
 * callback function is running or it's in the state of being migrated
 * to another cpu.
 *
 * It is important for this function to not return a false negative.
 */
bool hrtimer_active(const struct hrtimer *timer)
{
        struct hrtimer_clock_base *base;
        unsigned int seq;

        do {
                base = READ_ONCE(timer->base);
                seq = raw_read_seqcount_begin(&base->seq);

                if (timer->state != HRTIMER_STATE_INACTIVE ||
                    base->running == timer)
                        return true;

        } while (read_seqcount_retry(&base->seq, seq) ||
                 base != READ_ONCE(timer->base));

        return false;
}
EXPORT_SYMBOL_GPL(hrtimer_active);

/*
 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
 * distinct sections:
 *
 *  - queued:        the timer is queued
 *  - callback:        the timer is being ran
 *  - post:        the timer is inactive or (re)queued
 *
 * On the read side we ensure we observe timer->state and cpu_base->running
 * from the same section, if anything changed while we looked at it, we retry.
 * This includes timer->base changing because sequence numbers alone are
 * insufficient for that.
 *
 * The sequence numbers are required because otherwise we could still observe
 * a false negative if the read side got smeared over multiple consecutive
 * __run_hrtimer() invocations.
 */

static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
                          struct hrtimer_clock_base *base,
                          struct hrtimer *timer, ktime_t *now,
                          unsigned long flags) __must_hold(&cpu_base->lock)
{
        enum hrtimer_restart (*fn)(struct hrtimer *);
        bool expires_in_hardirq;
        int restart;

        lockdep_assert_held(&cpu_base->lock);

        debug_deactivate(timer);
        base->running = timer;

        /*
         * Separate the ->running assignment from the ->state assignment.
         *
         * As with a regular write barrier, this ensures the read side in
         * hrtimer_active() cannot observe base->running == NULL &&
         * timer->state == INACTIVE.
         */
        raw_write_seqcount_barrier(&base->seq);

        __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
        fn = timer->function;

        /*
         * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
         * timer is restarted with a period then it becomes an absolute
         * timer. If its not restarted it does not matter.
         */
        if (IS_ENABLED(CONFIG_TIME_LOW_RES))
                timer->is_rel = false;

        /*
         * The timer is marked as running in the CPU base, so it is
         * protected against migration to a different CPU even if the lock
         * is dropped.
         */
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
        trace_hrtimer_expire_entry(timer, now);
        expires_in_hardirq = lockdep_hrtimer_enter(timer);

        restart = fn(timer);

        lockdep_hrtimer_exit(expires_in_hardirq);
        trace_hrtimer_expire_exit(timer);
        raw_spin_lock_irq(&cpu_base->lock);

        /*
         * Note: We clear the running state after enqueue_hrtimer and
         * we do not reprogram the event hardware. Happens either in
         * hrtimer_start_range_ns() or in hrtimer_interrupt()
         *
         * Note: Because we dropped the cpu_base->lock above,
         * hrtimer_start_range_ns() can have popped in and enqueued the timer
         * for us already.
         */
        if (restart != HRTIMER_NORESTART &&
            !(timer->state & HRTIMER_STATE_ENQUEUED))
                enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);

        /*
         * Separate the ->running assignment from the ->state assignment.
         *
         * As with a regular write barrier, this ensures the read side in
         * hrtimer_active() cannot observe base->running.timer == NULL &&
         * timer->state == INACTIVE.
         */
        raw_write_seqcount_barrier(&base->seq);

        WARN_ON_ONCE(base->running != timer);
        base->running = NULL;
}

static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
                                 unsigned long flags, unsigned int active_mask)
{
        struct hrtimer_clock_base *base;
        unsigned int active = cpu_base->active_bases & active_mask;

        for_each_active_base(base, cpu_base, active) {
                struct timerqueue_node *node;
                ktime_t basenow;

                basenow = ktime_add(now, base->offset);

                while ((node = timerqueue_getnext(&base->active))) {
                        struct hrtimer *timer;

                        timer = container_of(node, struct hrtimer, node);

                        /*
                         * The immediate goal for using the softexpires is
                         * minimizing wakeups, not running timers at the
                         * earliest interrupt after their soft expiration.
                         * This allows us to avoid using a Priority Search
                         * Tree, which can answer a stabbing query for
                         * overlapping intervals and instead use the simple
                         * BST we already have.
                         * We don't add extra wakeups by delaying timers that
                         * are right-of a not yet expired timer, because that
                         * timer will have to trigger a wakeup anyway.
                         */
                        if (basenow < hrtimer_get_softexpires_tv64(timer))
                                break;

                        __run_hrtimer(cpu_base, base, timer, &basenow, flags);
                        if (active_mask == HRTIMER_ACTIVE_SOFT)
                                hrtimer_sync_wait_running(cpu_base, flags);
                }
        }
}

static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        unsigned long flags;
        ktime_t now;

        hrtimer_cpu_base_lock_expiry(cpu_base);
        raw_spin_lock_irqsave(&cpu_base->lock, flags);

        now = hrtimer_update_base(cpu_base);
        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);

        cpu_base->softirq_activated = 0;
        hrtimer_update_softirq_timer(cpu_base, true);

        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
        hrtimer_cpu_base_unlock_expiry(cpu_base);
}

#ifdef CONFIG_HIGH_RES_TIMERS

/*
 * High resolution timer interrupt
 * Called with interrupts disabled
 */
void hrtimer_interrupt(struct clock_event_device *dev)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        ktime_t expires_next, now, entry_time, delta;
        unsigned long flags;
        int retries = 0;

        BUG_ON(!cpu_base->hres_active);
        cpu_base->nr_events++;
        dev->next_event = KTIME_MAX;

        raw_spin_lock_irqsave(&cpu_base->lock, flags);
        entry_time = now = hrtimer_update_base(cpu_base);
retry:
        cpu_base->in_hrtirq = 1;
        /*
         * We set expires_next to KTIME_MAX here with cpu_base->lock
         * held to prevent that a timer is enqueued in our queue via
         * the migration code. This does not affect enqueueing of
         * timers which run their callback and need to be requeued on
         * this CPU.
         */
        cpu_base->expires_next = KTIME_MAX;

        if (!ktime_before(now, cpu_base->softirq_expires_next)) {
                cpu_base->softirq_expires_next = KTIME_MAX;
                cpu_base->softirq_activated = 1;
                raise_softirq_irqoff(HRTIMER_SOFTIRQ);
        }

        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);

        /* Reevaluate the clock bases for the [soft] next expiry */
        expires_next = hrtimer_update_next_event(cpu_base);
        /*
         * Store the new expiry value so the migration code can verify
         * against it.
         */
        cpu_base->expires_next = expires_next;
        cpu_base->in_hrtirq = 0;
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);

        /* Reprogramming necessary ? */
        if (!tick_program_event(expires_next, 0)) {
                cpu_base->hang_detected = 0;
                return;
        }

        /*
         * The next timer was already expired due to:
         * - tracing
         * - long lasting callbacks
         * - being scheduled away when running in a VM
         *
         * We need to prevent that we loop forever in the hrtimer
         * interrupt routine. We give it 3 attempts to avoid
         * overreacting on some spurious event.
         *
         * Acquire base lock for updating the offsets and retrieving
         * the current time.
         */
        raw_spin_lock_irqsave(&cpu_base->lock, flags);
        now = hrtimer_update_base(cpu_base);
        cpu_base->nr_retries++;
        if (++retries < 3)
                goto retry;
        /*
         * Give the system a chance to do something else than looping
         * here. We stored the entry time, so we know exactly how long
         * we spent here. We schedule the next event this amount of
         * time away.
         */
        cpu_base->nr_hangs++;
        cpu_base->hang_detected = 1;
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);

        delta = ktime_sub(now, entry_time);
        if ((unsigned int)delta > cpu_base->max_hang_time)
                cpu_base->max_hang_time = (unsigned int) delta;
        /*
         * Limit it to a sensible value as we enforce a longer
         * delay. Give the CPU at least 100ms to catch up.
         */
        if (delta > 100 * NSEC_PER_MSEC)
                expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
        else
                expires_next = ktime_add(now, delta);
        tick_program_event(expires_next, 1);
        pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
}

/* called with interrupts disabled */
static inline void __hrtimer_peek_ahead_timers(void)
{
        struct tick_device *td;

        if (!hrtimer_hres_active())
                return;

        td = this_cpu_ptr(&tick_cpu_device);
        if (td && td->evtdev)
                hrtimer_interrupt(td->evtdev);
}

#else /* CONFIG_HIGH_RES_TIMERS */

static inline void __hrtimer_peek_ahead_timers(void) { }

#endif        /* !CONFIG_HIGH_RES_TIMERS */

/*
 * Called from run_local_timers in hardirq context every jiffy
 */
void hrtimer_run_queues(void)
{
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        unsigned long flags;
        ktime_t now;

        if (__hrtimer_hres_active(cpu_base))
                return;

        /*
         * This _is_ ugly: We have to check periodically, whether we
         * can switch to highres and / or nohz mode. The clocksource
         * switch happens with xtime_lock held. Notification from
         * there only sets the check bit in the tick_oneshot code,
         * otherwise we might deadlock vs. xtime_lock.
         */
        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
                hrtimer_switch_to_hres();
                return;
        }

        raw_spin_lock_irqsave(&cpu_base->lock, flags);
        now = hrtimer_update_base(cpu_base);

        if (!ktime_before(now, cpu_base->softirq_expires_next)) {
                cpu_base->softirq_expires_next = KTIME_MAX;
                cpu_base->softirq_activated = 1;
                raise_softirq_irqoff(HRTIMER_SOFTIRQ);
        }

        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
}

/*
 * Sleep related functions:
 */
static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
{
        struct hrtimer_sleeper *t =
                container_of(timer, struct hrtimer_sleeper, timer);
        struct task_struct *task = t->task;

        t->task = NULL;
        if (task)
                wake_up_process(task);

        return HRTIMER_NORESTART;
}

/**
 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
 * @sl:                sleeper to be started
 * @mode:        timer mode abs/rel
 *
 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
 */
void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
                                   enum hrtimer_mode mode)
{
        /*
         * Make the enqueue delivery mode check work on RT. If the sleeper
         * was initialized for hard interrupt delivery, force the mode bit.
         * This is a special case for hrtimer_sleepers because
         * hrtimer_init_sleeper() determines the delivery mode on RT so the
         * fiddling with this decision is avoided at the call sites.
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
                mode |= HRTIMER_MODE_HARD;

        hrtimer_start_expires(&sl->timer, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);

static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
                                   clockid_t clock_id, enum hrtimer_mode mode)
{
        /*
         * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
         * marked for hard interrupt expiry mode are moved into soft
         * interrupt context either for latency reasons or because the
         * hrtimer callback takes regular spinlocks or invokes other
         * functions which are not suitable for hard interrupt context on
         * PREEMPT_RT.
         *
         * The hrtimer_sleeper callback is RT compatible in hard interrupt
         * context, but there is a latency concern: Untrusted userspace can
         * spawn many threads which arm timers for the same expiry time on
         * the same CPU. That causes a latency spike due to the wakeup of
         * a gazillion threads.
         *
         * OTOH, privileged real-time user space applications rely on the
         * low latency of hard interrupt wakeups. If the current task is in
         * a real-time scheduling class, mark the mode for hard interrupt
         * expiry.
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
                if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
                        mode |= HRTIMER_MODE_HARD;
        }

        __hrtimer_init(&sl->timer, clock_id, mode);
        sl->timer.function = hrtimer_wakeup;
        sl->task = current;
}

/**
 * hrtimer_init_sleeper - initialize sleeper to the given clock
 * @sl:                sleeper to be initialized
 * @clock_id:        the clock to be used
 * @mode:        timer mode abs/rel
 */
void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
                          enum hrtimer_mode mode)
{
        debug_init(&sl->timer, clock_id, mode);
        __hrtimer_init_sleeper(sl, clock_id, mode);

}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);

int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
{
        switch(restart->nanosleep.type) {
#ifdef CONFIG_COMPAT_32BIT_TIME
        case TT_COMPAT:
                if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
                        return -EFAULT;
                break;
#endif
        case TT_NATIVE:
                if (put_timespec64(ts, restart->nanosleep.rmtp))
                        return -EFAULT;
                break;
        default:
                BUG();
        }
        return -ERESTART_RESTARTBLOCK;
}

static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
{
        struct restart_block *restart;

        do {
                set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
                hrtimer_sleeper_start_expires(t, mode);

                if (likely(t->task))
                        schedule();

                hrtimer_cancel(&t->timer);
                mode = HRTIMER_MODE_ABS;

        } while (t->task && !signal_pending(current));

        __set_current_state(TASK_RUNNING);

        if (!t->task)
                return 0;

        restart = &current->restart_block;
        if (restart->nanosleep.type != TT_NONE) {
                ktime_t rem = hrtimer_expires_remaining(&t->timer);
                struct timespec64 rmt;

                if (rem <= 0)
                        return 0;
                rmt = ktime_to_timespec64(rem);

                return nanosleep_copyout(restart, &rmt);
        }
        return -ERESTART_RESTARTBLOCK;
}

static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
{
        struct hrtimer_sleeper t;
        int ret;

        hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
                                      HRTIMER_MODE_ABS);
        hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
        ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
        destroy_hrtimer_on_stack(&t.timer);
        return ret;
}

long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
                       const clockid_t clockid)
{
        struct restart_block *restart;
        struct hrtimer_sleeper t;
        int ret = 0;
        u64 slack;

        slack = current->timer_slack_ns;
        if (rt_task(current))
                slack = 0;

        hrtimer_init_sleeper_on_stack(&t, clockid, mode);
        hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
        ret = do_nanosleep(&t, mode);
        if (ret != -ERESTART_RESTARTBLOCK)
                goto out;

        /* Absolute timers do not update the rmtp value and restart: */
        if (mode == HRTIMER_MODE_ABS) {
                ret = -ERESTARTNOHAND;
                goto out;
        }

        restart = &current->restart_block;
        restart->nanosleep.clockid = t.timer.base->clockid;
        restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
        set_restart_fn(restart, hrtimer_nanosleep_restart);
out:
        destroy_hrtimer_on_stack(&t.timer);
        return ret;
}

#ifdef CONFIG_64BIT

SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
                struct __kernel_timespec __user *, rmtp)
{
        struct timespec64 tu;

        if (get_timespec64(&tu, rqtp))
                return -EFAULT;

        if (!timespec64_valid(&tu))
                return -EINVAL;

        current->restart_block.fn = do_no_restart_syscall;
        current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
        current->restart_block.nanosleep.rmtp = rmtp;
        return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
                                 CLOCK_MONOTONIC);
}

#endif

#ifdef CONFIG_COMPAT_32BIT_TIME

SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
                       struct old_timespec32 __user *, rmtp)
{
        struct timespec64 tu;

        if (get_old_timespec32(&tu, rqtp))
                return -EFAULT;

        if (!timespec64_valid(&tu))
                return -EINVAL;

        current->restart_block.fn = do_no_restart_syscall;
        current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
        current->restart_block.nanosleep.compat_rmtp = rmtp;
        return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
                                 CLOCK_MONOTONIC);
}
#endif

/*
 * Functions related to boot-time initialization:
 */
int hrtimers_prepare_cpu(unsigned int cpu)
{
        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
        int i;

        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];

                clock_b->cpu_base = cpu_base;
                seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
                timerqueue_init_head(&clock_b->active);
        }

        cpu_base->cpu = cpu;
        cpu_base->active_bases = 0;
        cpu_base->hres_active = 0;
        cpu_base->hang_detected = 0;
        cpu_base->next_timer = NULL;
        cpu_base->softirq_next_timer = NULL;
        cpu_base->expires_next = KTIME_MAX;
        cpu_base->softirq_expires_next = KTIME_MAX;
        cpu_base->online = 1;
        hrtimer_cpu_base_init_expiry_lock(cpu_base);
        return 0;
}

#ifdef CONFIG_HOTPLUG_CPU

static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
                                struct hrtimer_clock_base *new_base)
{
        struct hrtimer *timer;
        struct timerqueue_node *node;

        while ((node = timerqueue_getnext(&old_base->active))) {
                timer = container_of(node, struct hrtimer, node);
                BUG_ON(hrtimer_callback_running(timer));
                debug_deactivate(timer);

                /*
                 * Mark it as ENQUEUED not INACTIVE otherwise the
                 * timer could be seen as !active and just vanish away
                 * under us on another CPU
                 */
                __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
                timer->base = new_base;
                /*
                 * Enqueue the timers on the new cpu. This does not
                 * reprogram the event device in case the timer
                 * expires before the earliest on this CPU, but we run
                 * hrtimer_interrupt after we migrated everything to
                 * sort out already expired timers and reprogram the
                 * event device.
                 */
                enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
        }
}

int hrtimers_cpu_dying(unsigned int dying_cpu)
{
        int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
        struct hrtimer_cpu_base *old_base, *new_base;

        old_base = this_cpu_ptr(&hrtimer_bases);
        new_base = &per_cpu(hrtimer_bases, ncpu);

        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
         */
        raw_spin_lock(&old_base->lock);
        raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);

        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                migrate_hrtimer_list(&old_base->clock_base[i],
                                     &new_base->clock_base[i]);
        }

        /*
         * The migration might have changed the first expiring softirq
         * timer on this CPU. Update it.
         */
        __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
        /* Tell the other CPU to retrigger the next event */
        smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);

        raw_spin_unlock(&new_base->lock);
        old_base->online = 0;
        raw_spin_unlock(&old_base->lock);

        return 0;
}

#endif /* CONFIG_HOTPLUG_CPU */

void __init hrtimers_init(void)
{
        hrtimers_prepare_cpu(smp_processor_id());
        open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
}

/**
 * schedule_hrtimeout_range_clock - sleep until timeout
 * @expires:        timeout value (ktime_t)
 * @delta:        slack in expires timeout (ktime_t) for SCHED_OTHER tasks
 * @mode:        timer mode
 * @clock_id:        timer clock to be used
 */
int __sched
schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
                               const enum hrtimer_mode mode, clockid_t clock_id)
{
        struct hrtimer_sleeper t;

        /*
         * Optimize when a zero timeout value is given. It does not
         * matter whether this is an absolute or a relative time.
         */
        if (expires && *expires == 0) {
                __set_current_state(TASK_RUNNING);
                return 0;
        }

        /*
         * A NULL parameter means "infinite"
         */
        if (!expires) {
                schedule();
                return -EINTR;
        }

        /*
         * Override any slack passed by the user if under
         * rt contraints.
         */
        if (rt_task(current))
                delta = 0;

        hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
        hrtimer_sleeper_start_expires(&t, mode);

        if (likely(t.task))
                schedule();

        hrtimer_cancel(&t.timer);
        destroy_hrtimer_on_stack(&t.timer);

        __set_current_state(TASK_RUNNING);

        return !t.task ? 0 : -EINTR;
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);

/**
 * schedule_hrtimeout_range - sleep until timeout
 * @expires:        timeout value (ktime_t)
 * @delta:        slack in expires timeout (ktime_t) for SCHED_OTHER tasks
 * @mode:        timer mode
 *
 * Make the current task sleep until the given expiry time has
 * elapsed. The routine will return immediately unless
 * the current task state has been set (see set_current_state()).
 *
 * The @delta argument gives the kernel the freedom to schedule the
 * actual wakeup to a time that is both power and performance friendly
 * for regular (non RT/DL) tasks.
 * The kernel give the normal best effort behavior for "@expires+@delta",
 * but may decide to fire the timer earlier, but no earlier than @expires.
 *
 * You can set the task state as follows -
 *
 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
 * pass before the routine returns unless the current task is explicitly
 * woken up, (e.g. by wake_up_process()).
 *
 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 * delivered to the current task or the current task is explicitly woken
 * up.
 *
 * The current task state is guaranteed to be TASK_RUNNING when this
 * routine returns.
 *
 * Returns 0 when the timer has expired. If the task was woken before the
 * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
 * by an explicit wakeup, it returns -EINTR.
 */
int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
                                     const enum hrtimer_mode mode)
{
        return schedule_hrtimeout_range_clock(expires, delta, mode,
                                              CLOCK_MONOTONIC);
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);

/**
 * schedule_hrtimeout - sleep until timeout
 * @expires:        timeout value (ktime_t)
 * @mode:        timer mode
 *
 * Make the current task sleep until the given expiry time has
 * elapsed. The routine will return immediately unless
 * the current task state has been set (see set_current_state()).
 *
 * You can set the task state as follows -
 *
 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
 * pass before the routine returns unless the current task is explicitly
 * woken up, (e.g. by wake_up_process()).
 *
 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 * delivered to the current task or the current task is explicitly woken
 * up.
 *
 * The current task state is guaranteed to be TASK_RUNNING when this
 * routine returns.
 *
 * Returns 0 when the timer has expired. If the task was woken before the
 * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
 * by an explicit wakeup, it returns -EINTR.
 */
int __sched schedule_hrtimeout(ktime_t *expires,
                               const enum hrtimer_mode mode)
{
        return schedule_hrtimeout_range(expires, 0, mode);
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout);
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




    1 

    1 















    1 






















































































































































































































































































   14 


































































































































   14 









































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
// SPDX-License-Identifier: GPL-2.0-only
/*
 * This is the linux wireless configuration interface.
 *
 * Copyright 2006-2010                Johannes Berg <johannes@sipsolutions.net>
 * Copyright 2013-2014  Intel Mobile Communications GmbH
 * Copyright 2015-2017        Intel Deutschland GmbH
 * Copyright (C) 2018-2024 Intel Corporation
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/if.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/nl80211.h>
#include <linux/debugfs.h>
#include <linux/notifier.h>
#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/sched.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
#include "nl80211.h"
#include "core.h"
#include "sysfs.h"
#include "debugfs.h"
#include "wext-compat.h"
#include "rdev-ops.h"

/* name for sysfs, %d is appended */
#define PHY_NAME "phy"

MODULE_AUTHOR("Johannes Berg");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("wireless configuration support");
MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME);

/* RCU-protected (and RTNL for writers) */
LIST_HEAD(cfg80211_rdev_list);
int cfg80211_rdev_list_generation;

/* for debugfs */
static struct dentry *ieee80211_debugfs_dir;

/* for the cleanup, scan and event works */
struct workqueue_struct *cfg80211_wq;

static bool cfg80211_disable_40mhz_24ghz;
module_param(cfg80211_disable_40mhz_24ghz, bool, 0644);
MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz,
                 "Disable 40MHz support in the 2.4GHz band");

struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx)
{
        struct cfg80211_registered_device *result = NULL, *rdev;

        ASSERT_RTNL();

        for_each_rdev(rdev) {
                if (rdev->wiphy_idx == wiphy_idx) {
                        result = rdev;
                        break;
                }
        }

        return result;
}

int get_wiphy_idx(struct wiphy *wiphy)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);

        return rdev->wiphy_idx;
}

struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
{
        struct cfg80211_registered_device *rdev;

        ASSERT_RTNL();

        rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx);
        if (!rdev)
                return NULL;
        return &rdev->wiphy;
}

static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev,
                                   const char *newname)
{
        struct cfg80211_registered_device *rdev2;
        int wiphy_idx, taken = -1, digits;

        ASSERT_RTNL();

        if (strlen(newname) > NL80211_WIPHY_NAME_MAXLEN)
                return -EINVAL;

        /* prohibit calling the thing phy%d when %d is not its number */
        sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
        if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {
                /* count number of places needed to print wiphy_idx */
                digits = 1;
                while (wiphy_idx /= 10)
                        digits++;
                /*
                 * deny the name if it is phy<idx> where <idx> is printed
                 * without leading zeroes. taken == strlen(newname) here
                 */
                if (taken == strlen(PHY_NAME) + digits)
                        return -EINVAL;
        }

        /* Ensure another device does not already have this name. */
        for_each_rdev(rdev2)
                if (strcmp(newname, wiphy_name(&rdev2->wiphy)) == 0)
                        return -EINVAL;

        return 0;
}

int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
                        char *newname)
{
        int result;

        ASSERT_RTNL();
        lockdep_assert_wiphy(&rdev->wiphy);

        /* Ignore nop renames */
        if (strcmp(newname, wiphy_name(&rdev->wiphy)) == 0)
                return 0;

        result = cfg80211_dev_check_name(rdev, newname);
        if (result < 0)
                return result;

        result = device_rename(&rdev->wiphy.dev, newname);
        if (result)
                return result;

        if (!IS_ERR_OR_NULL(rdev->wiphy.debugfsdir))
                debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
                               rdev->wiphy.debugfsdir,
                               rdev->wiphy.debugfsdir->d_parent, newname);

        nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);

        return 0;
}

int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
                          struct net *net)
{
        struct wireless_dev *wdev;
        int err = 0;

        if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK))
                return -EOPNOTSUPP;

        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (!wdev->netdev)
                        continue;
                wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
                err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
                if (err)
                        break;
                wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
        }

        if (err) {
                /* failed -- clean up to old netns */
                net = wiphy_net(&rdev->wiphy);

                list_for_each_entry_continue_reverse(wdev,
                                                     &rdev->wiphy.wdev_list,
                                                     list) {
                        if (!wdev->netdev)
                                continue;
                        wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
                        err = dev_change_net_namespace(wdev->netdev, net,
                                                        "wlan%d");
                        WARN_ON(err);
                        wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
                }

                return err;
        }

        wiphy_lock(&rdev->wiphy);
        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (!wdev->netdev)
                        continue;
                nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
        }

        nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);

        wiphy_net_set(&rdev->wiphy, net);

        err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
        WARN_ON(err);

        nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);

        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (!wdev->netdev)
                        continue;
                nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
        }
        wiphy_unlock(&rdev->wiphy);

        return 0;
}

static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
{
        struct cfg80211_registered_device *rdev = data;

        wiphy_lock(&rdev->wiphy);
        rdev_rfkill_poll(rdev);
        wiphy_unlock(&rdev->wiphy);
}

void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
                              struct wireless_dev *wdev)
{
        lockdep_assert_held(&rdev->wiphy.mtx);

        if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
                return;

        if (!wdev_running(wdev))
                return;

        rdev_stop_p2p_device(rdev, wdev);
        wdev->is_running = false;

        rdev->opencount--;

        if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
                if (WARN_ON(!rdev->scan_req->notified &&
                            (!rdev->int_scan_req ||
                             !rdev->int_scan_req->notified)))
                        rdev->scan_req->info.aborted = true;
                ___cfg80211_scan_done(rdev, false);
        }
}

void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
                       struct wireless_dev *wdev)
{
        lockdep_assert_held(&rdev->wiphy.mtx);

        if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
                return;

        if (!wdev_running(wdev))
                return;

        rdev_stop_nan(rdev, wdev);
        wdev->is_running = false;

        rdev->opencount--;
}

void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        struct wireless_dev *wdev;

        ASSERT_RTNL();

        list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
                if (wdev->netdev) {
                        dev_close(wdev->netdev);
                        continue;
                }

                /* otherwise, check iftype */

                wiphy_lock(wiphy);

                switch (wdev->iftype) {
                case NL80211_IFTYPE_P2P_DEVICE:
                        cfg80211_stop_p2p_device(rdev, wdev);
                        break;
                case NL80211_IFTYPE_NAN:
                        cfg80211_stop_nan(rdev, wdev);
                        break;
                default:
                        break;
                }

                wiphy_unlock(wiphy);
        }
}
EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);

static int cfg80211_rfkill_set_block(void *data, bool blocked)
{
        struct cfg80211_registered_device *rdev = data;

        if (!blocked)
                return 0;

        rtnl_lock();
        cfg80211_shutdown_all_interfaces(&rdev->wiphy);
        rtnl_unlock();

        return 0;
}

static void cfg80211_rfkill_block_work(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;

        rdev = container_of(work, struct cfg80211_registered_device,
                            rfkill_block);
        cfg80211_rfkill_set_block(rdev, true);
}

static void cfg80211_event_work(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;

        rdev = container_of(work, struct cfg80211_registered_device,
                            event_work);

        wiphy_lock(&rdev->wiphy);
        cfg80211_process_rdev_events(rdev);
        wiphy_unlock(&rdev->wiphy);
}

void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
        struct wireless_dev *wdev, *tmp;

        ASSERT_RTNL();

        list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
                if (wdev->nl_owner_dead) {
                        if (wdev->netdev)
                                dev_close(wdev->netdev);

                        wiphy_lock(&rdev->wiphy);
                        cfg80211_leave(rdev, wdev);
                        cfg80211_remove_virtual_intf(rdev, wdev);
                        wiphy_unlock(&rdev->wiphy);
                }
        }
}

static void cfg80211_destroy_iface_wk(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;

        rdev = container_of(work, struct cfg80211_registered_device,
                            destroy_work);

        rtnl_lock();
        cfg80211_destroy_ifaces(rdev);
        rtnl_unlock();
}

static void cfg80211_sched_scan_stop_wk(struct wiphy *wiphy,
                                        struct wiphy_work *work)
{
        struct cfg80211_registered_device *rdev;
        struct cfg80211_sched_scan_request *req, *tmp;

        rdev = container_of(work, struct cfg80211_registered_device,
                           sched_scan_stop_wk);

        list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
                if (req->nl_owner_dead)
                        cfg80211_stop_sched_scan_req(rdev, req, false);
        }
}

static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;

        rdev = container_of(work, struct cfg80211_registered_device,
                            propagate_radar_detect_wk);

        rtnl_lock();

        regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->radar_chandef,
                                       NL80211_DFS_UNAVAILABLE,
                                       NL80211_RADAR_DETECTED);

        rtnl_unlock();
}

static void cfg80211_propagate_cac_done_wk(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;

        rdev = container_of(work, struct cfg80211_registered_device,
                            propagate_cac_done_wk);

        rtnl_lock();

        regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->cac_done_chandef,
                                       NL80211_DFS_AVAILABLE,
                                       NL80211_RADAR_CAC_FINISHED);

        rtnl_unlock();
}

static void cfg80211_wiphy_work(struct work_struct *work)
{
        struct cfg80211_registered_device *rdev;
        struct wiphy_work *wk;

        rdev = container_of(work, struct cfg80211_registered_device, wiphy_work);

        wiphy_lock(&rdev->wiphy);
        if (rdev->suspended)
                goto out;

        spin_lock_irq(&rdev->wiphy_work_lock);
        wk = list_first_entry_or_null(&rdev->wiphy_work_list,
                                      struct wiphy_work, entry);
        if (wk) {
                list_del_init(&wk->entry);
                if (!list_empty(&rdev->wiphy_work_list))
                        schedule_work(work);
                spin_unlock_irq(&rdev->wiphy_work_lock);

                wk->func(&rdev->wiphy, wk);
        } else {
                spin_unlock_irq(&rdev->wiphy_work_lock);
        }
out:
        wiphy_unlock(&rdev->wiphy);
}

/* exported functions */

struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
                           const char *requested_name)
{
        static atomic_t wiphy_counter = ATOMIC_INIT(0);

        struct cfg80211_registered_device *rdev;
        int alloc_size;

        WARN_ON(ops->add_key && (!ops->del_key || !ops->set_default_key));
        WARN_ON(ops->auth && (!ops->assoc || !ops->deauth || !ops->disassoc));
        WARN_ON(ops->connect && !ops->disconnect);
        WARN_ON(ops->join_ibss && !ops->leave_ibss);
        WARN_ON(ops->add_virtual_intf && !ops->del_virtual_intf);
        WARN_ON(ops->add_station && !ops->del_station);
        WARN_ON(ops->add_mpath && !ops->del_mpath);
        WARN_ON(ops->join_mesh && !ops->leave_mesh);
        WARN_ON(ops->start_p2p_device && !ops->stop_p2p_device);
        WARN_ON(ops->start_ap && !ops->stop_ap);
        WARN_ON(ops->join_ocb && !ops->leave_ocb);
        WARN_ON(ops->suspend && !ops->resume);
        WARN_ON(ops->sched_scan_start && !ops->sched_scan_stop);
        WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel);
        WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch);
        WARN_ON(ops->add_tx_ts && !ops->del_tx_ts);

        alloc_size = sizeof(*rdev) + sizeof_priv;

        rdev = kzalloc(alloc_size, GFP_KERNEL);
        if (!rdev)
                return NULL;

        rdev->ops = ops;

        rdev->wiphy_idx = atomic_inc_return(&wiphy_counter);

        if (unlikely(rdev->wiphy_idx < 0)) {
                /* ugh, wrapped! */
                atomic_dec(&wiphy_counter);
                kfree(rdev);
                return NULL;
        }

        /* atomic_inc_return makes it start at 1, make it start at 0 */
        rdev->wiphy_idx--;

        /* give it a proper name */
        if (requested_name && requested_name[0]) {
                int rv;

                rtnl_lock();
                rv = cfg80211_dev_check_name(rdev, requested_name);

                if (rv < 0) {
                        rtnl_unlock();
                        goto use_default_name;
                }

                rv = dev_set_name(&rdev->wiphy.dev, "%s", requested_name);
                rtnl_unlock();
                if (rv)
                        goto use_default_name;
        } else {
                int rv;

use_default_name:
                /* NOTE:  This is *probably* safe w/out holding rtnl because of
                 * the restrictions on phy names.  Probably this call could
                 * fail if some other part of the kernel (re)named a device
                 * phyX.  But, might should add some locking and check return
                 * value, and use a different name if this one exists?
                 */
                rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
                if (rv < 0) {
                        kfree(rdev);
                        return NULL;
                }
        }

        mutex_init(&rdev->wiphy.mtx);
        INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
        INIT_LIST_HEAD(&rdev->beacon_registrations);
        spin_lock_init(&rdev->beacon_registrations_lock);
        spin_lock_init(&rdev->bss_lock);
        INIT_LIST_HEAD(&rdev->bss_list);
        INIT_LIST_HEAD(&rdev->sched_scan_req_list);
        wiphy_work_init(&rdev->scan_done_wk, __cfg80211_scan_done);
        INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
                          cfg80211_dfs_channels_update_work);
#ifdef CONFIG_CFG80211_WEXT
        rdev->wiphy.wext = &cfg80211_wext_handler;
#endif

        device_initialize(&rdev->wiphy.dev);
        rdev->wiphy.dev.class = &ieee80211_class;
        rdev->wiphy.dev.platform_data = rdev;
        device_enable_async_suspend(&rdev->wiphy.dev);

        INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
        wiphy_work_init(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
        INIT_WORK(&rdev->sched_scan_res_wk, cfg80211_sched_scan_results_wk);
        INIT_WORK(&rdev->propagate_radar_detect_wk,
                  cfg80211_propagate_radar_detect_wk);
        INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
        INIT_WORK(&rdev->mgmt_registrations_update_wk,
                  cfg80211_mgmt_registrations_update_wk);
        spin_lock_init(&rdev->mgmt_registrations_lock);

#ifdef CONFIG_CFG80211_DEFAULT_PS
        rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
#endif

        wiphy_net_set(&rdev->wiphy, &init_net);

        rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
        rdev->wiphy.rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
                                          &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
                                          &rdev->rfkill_ops, rdev);

        if (!rdev->wiphy.rfkill) {
                wiphy_free(&rdev->wiphy);
                return NULL;
        }

        INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work);
        INIT_LIST_HEAD(&rdev->wiphy_work_list);
        spin_lock_init(&rdev->wiphy_work_lock);
        INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
        INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
        INIT_WORK(&rdev->event_work, cfg80211_event_work);
        INIT_WORK(&rdev->background_cac_abort_wk,
                  cfg80211_background_cac_abort_wk);
        INIT_DELAYED_WORK(&rdev->background_cac_done_wk,
                          cfg80211_background_cac_done_wk);

        init_waitqueue_head(&rdev->dev_wait);

        /*
         * Initialize wiphy parameters to IEEE 802.11 MIB default values.
         * Fragmentation and RTS threshold are disabled by default with the
         * special -1 value.
         */
        rdev->wiphy.retry_short = 7;
        rdev->wiphy.retry_long = 4;
        rdev->wiphy.frag_threshold = (u32) -1;
        rdev->wiphy.rts_threshold = (u32) -1;
        rdev->wiphy.coverage_class = 0;

        rdev->wiphy.max_num_csa_counters = 1;

        rdev->wiphy.max_sched_scan_plans = 1;
        rdev->wiphy.max_sched_scan_plan_interval = U32_MAX;

        return &rdev->wiphy;
}
EXPORT_SYMBOL(wiphy_new_nm);

static int wiphy_verify_combinations(struct wiphy *wiphy)
{
        const struct ieee80211_iface_combination *c;
        int i, j;

        for (i = 0; i < wiphy->n_iface_combinations; i++) {
                u32 cnt = 0;
                u16 all_iftypes = 0;

                c = &wiphy->iface_combinations[i];

                /*
                 * Combinations with just one interface aren't real,
                 * however we make an exception for DFS.
                 */
                if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths))
                        return -EINVAL;

                /* Need at least one channel */
                if (WARN_ON(!c->num_different_channels))
                        return -EINVAL;

                /* DFS only works on one channel. */
                if (WARN_ON(c->radar_detect_widths &&
                            (c->num_different_channels > 1)))
                        return -EINVAL;

                if (WARN_ON(!c->n_limits))
                        return -EINVAL;

                for (j = 0; j < c->n_limits; j++) {
                        u16 types = c->limits[j].types;

                        /* interface types shouldn't overlap */
                        if (WARN_ON(types & all_iftypes))
                                return -EINVAL;
                        all_iftypes |= types;

                        if (WARN_ON(!c->limits[j].max))
                                return -EINVAL;

                        /* Shouldn't list software iftypes in combinations! */
                        if (WARN_ON(wiphy->software_iftypes & types))
                                return -EINVAL;

                        /* Only a single P2P_DEVICE can be allowed */
                        if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
                                    c->limits[j].max > 1))
                                return -EINVAL;

                        /* Only a single NAN can be allowed */
                        if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
                                    c->limits[j].max > 1))
                                return -EINVAL;

                        /*
                         * This isn't well-defined right now. If you have an
                         * IBSS interface, then its beacon interval may change
                         * by joining other networks, and nothing prevents it
                         * from doing that.
                         * So technically we probably shouldn't even allow AP
                         * and IBSS in the same interface, but it seems that
                         * some drivers support that, possibly only with fixed
                         * beacon intervals for IBSS.
                         */
                        if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) &&
                                    c->beacon_int_min_gcd)) {
                                return -EINVAL;
                        }

                        cnt += c->limits[j].max;
                        /*
                         * Don't advertise an unsupported type
                         * in a combination.
                         */
                        if (WARN_ON((wiphy->interface_modes & types) != types))
                                return -EINVAL;
                }

                if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS)))
                        return -EINVAL;

                /* You can't even choose that many! */
                if (WARN_ON(cnt < c->max_interfaces))
                        return -EINVAL;
        }

        return 0;
}

int wiphy_register(struct wiphy *wiphy)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        int res;
        enum nl80211_band band;
        struct ieee80211_supported_band *sband;
        bool have_band = false;
        int i;
        u16 ifmodes = wiphy->interface_modes;

#ifdef CONFIG_PM
        if (WARN_ON(wiphy->wowlan &&
                    (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
                    !(wiphy->wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY)))
                return -EINVAL;
        if (WARN_ON(wiphy->wowlan &&
                    !wiphy->wowlan->flags && !wiphy->wowlan->n_patterns &&
                    !wiphy->wowlan->tcp))
                return -EINVAL;
#endif
        if (WARN_ON((wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH) &&
                    (!rdev->ops->tdls_channel_switch ||
                     !rdev->ops->tdls_cancel_channel_switch)))
                return -EINVAL;

        if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) &&
                    (!rdev->ops->start_nan || !rdev->ops->stop_nan ||
                     !rdev->ops->add_nan_func || !rdev->ops->del_nan_func ||
                     !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ)))))
                return -EINVAL;

        if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)))
                return -EINVAL;

        if (WARN_ON(wiphy->pmsr_capa && !wiphy->pmsr_capa->ftm.supported))
                return -EINVAL;

        if (wiphy->pmsr_capa && wiphy->pmsr_capa->ftm.supported) {
                if (WARN_ON(!wiphy->pmsr_capa->ftm.asap &&
                            !wiphy->pmsr_capa->ftm.non_asap))
                        return -EINVAL;
                if (WARN_ON(!wiphy->pmsr_capa->ftm.preambles ||
                            !wiphy->pmsr_capa->ftm.bandwidths))
                        return -EINVAL;
                if (WARN_ON(wiphy->pmsr_capa->ftm.preambles &
                                ~(BIT(NL80211_PREAMBLE_LEGACY) |
                                  BIT(NL80211_PREAMBLE_HT) |
                                  BIT(NL80211_PREAMBLE_VHT) |
                                  BIT(NL80211_PREAMBLE_HE) |
                                  BIT(NL80211_PREAMBLE_DMG))))
                        return -EINVAL;
                if (WARN_ON((wiphy->pmsr_capa->ftm.trigger_based ||
                             wiphy->pmsr_capa->ftm.non_trigger_based) &&
                            !(wiphy->pmsr_capa->ftm.preambles &
                              BIT(NL80211_PREAMBLE_HE))))
                        return -EINVAL;
                if (WARN_ON(wiphy->pmsr_capa->ftm.bandwidths &
                                ~(BIT(NL80211_CHAN_WIDTH_20_NOHT) |
                                  BIT(NL80211_CHAN_WIDTH_20) |
                                  BIT(NL80211_CHAN_WIDTH_40) |
                                  BIT(NL80211_CHAN_WIDTH_80) |
                                  BIT(NL80211_CHAN_WIDTH_80P80) |
                                  BIT(NL80211_CHAN_WIDTH_160) |
                                  BIT(NL80211_CHAN_WIDTH_5) |
                                  BIT(NL80211_CHAN_WIDTH_10))))
                        return -EINVAL;
        }

        if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) &&
                    (wiphy->regulatory_flags &
                                        (REGULATORY_CUSTOM_REG |
                                         REGULATORY_STRICT_REG |
                                         REGULATORY_COUNTRY_IE_FOLLOW_POWER |
                                         REGULATORY_COUNTRY_IE_IGNORE))))
                return -EINVAL;

        if (WARN_ON(wiphy->coalesce &&
                    (!wiphy->coalesce->n_rules ||
                     !wiphy->coalesce->n_patterns) &&
                    (!wiphy->coalesce->pattern_min_len ||
                     wiphy->coalesce->pattern_min_len >
                        wiphy->coalesce->pattern_max_len)))
                return -EINVAL;

        if (WARN_ON(wiphy->ap_sme_capa &&
                    !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
                return -EINVAL;

        if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
                return -EINVAL;

        if (WARN_ON(wiphy->addresses &&
                    !is_zero_ether_addr(wiphy->perm_addr) &&
                    memcmp(wiphy->perm_addr, wiphy->addresses[0].addr,
                           ETH_ALEN)))
                return -EINVAL;

        if (WARN_ON(wiphy->max_acl_mac_addrs &&
                    (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) ||
                     !rdev->ops->set_mac_acl)))
                return -EINVAL;

        /* assure only valid behaviours are flagged by driver
         * hence subtract 2 as bit 0 is invalid.
         */
        if (WARN_ON(wiphy->bss_select_support &&
                    (wiphy->bss_select_support & ~(BIT(__NL80211_BSS_SELECT_ATTR_AFTER_LAST) - 2))))
                return -EINVAL;

        if (WARN_ON(wiphy_ext_feature_isset(&rdev->wiphy,
                                            NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X) &&
                    (!rdev->ops->set_pmk || !rdev->ops->del_pmk)))
                return -EINVAL;

        if (WARN_ON(!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
                    rdev->ops->update_connect_params))
                return -EINVAL;

        if (wiphy->addresses)
                memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);

        /* sanity check ifmodes */
        WARN_ON(!ifmodes);
        ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
        if (WARN_ON(ifmodes != wiphy->interface_modes))
                wiphy->interface_modes = ifmodes;

        res = wiphy_verify_combinations(wiphy);
        if (res)
                return res;

        /* sanity check supported bands/channels */
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                const struct ieee80211_sband_iftype_data *iftd;
                u16 types = 0;
                bool have_he = false;

                sband = wiphy->bands[band];
                if (!sband)
                        continue;

                sband->band = band;
                if (WARN_ON(!sband->n_channels))
                        return -EINVAL;
                /*
                 * on 60GHz or sub-1Ghz band, there are no legacy rates, so
                 * n_bitrates is 0
                 */
                if (WARN_ON((band != NL80211_BAND_60GHZ &&
                             band != NL80211_BAND_S1GHZ) &&
                            !sband->n_bitrates))
                        return -EINVAL;

                if (WARN_ON(band == NL80211_BAND_6GHZ &&
                            (sband->ht_cap.ht_supported ||
                             sband->vht_cap.vht_supported)))
                        return -EINVAL;

                /*
                 * Since cfg80211_disable_40mhz_24ghz is global, we can
                 * modify the sband's ht data even if the driver uses a
                 * global structure for that.
                 */
                if (cfg80211_disable_40mhz_24ghz &&
                    band == NL80211_BAND_2GHZ &&
                    sband->ht_cap.ht_supported) {
                        sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
                        sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
                }

                /*
                 * Since we use a u32 for rate bitmaps in
                 * ieee80211_get_response_rate, we cannot
                 * have more than 32 legacy rates.
                 */
                if (WARN_ON(sband->n_bitrates > 32))
                        return -EINVAL;

                for (i = 0; i < sband->n_channels; i++) {
                        sband->channels[i].orig_flags =
                                sband->channels[i].flags;
                        sband->channels[i].orig_mag = INT_MAX;
                        sband->channels[i].orig_mpwr =
                                sband->channels[i].max_power;
                        sband->channels[i].band = band;

                        if (WARN_ON(sband->channels[i].freq_offset >= 1000))
                                return -EINVAL;
                }

                for_each_sband_iftype_data(sband, i, iftd) {
                        bool has_ap, has_non_ap;
                        u32 ap_bits = BIT(NL80211_IFTYPE_AP) |
                                      BIT(NL80211_IFTYPE_P2P_GO);

                        if (WARN_ON(!iftd->types_mask))
                                return -EINVAL;
                        if (WARN_ON(types & iftd->types_mask))
                                return -EINVAL;

                        /* at least one piece of information must be present */
                        if (WARN_ON(!iftd->he_cap.has_he))
                                return -EINVAL;

                        types |= iftd->types_mask;

                        if (i == 0)
                                have_he = iftd->he_cap.has_he;
                        else
                                have_he = have_he &&
                                          iftd->he_cap.has_he;

                        has_ap = iftd->types_mask & ap_bits;
                        has_non_ap = iftd->types_mask & ~ap_bits;

                        /*
                         * For EHT 20 MHz STA, the capabilities format differs
                         * but to simplify, don't check 20 MHz but rather check
                         * only if AP and non-AP were mentioned at the same time,
                         * reject if so.
                         */
                        if (WARN_ON(iftd->eht_cap.has_eht &&
                                    has_ap && has_non_ap))
                                return -EINVAL;
                }

                if (WARN_ON(!have_he && band == NL80211_BAND_6GHZ))
                        return -EINVAL;

                have_band = true;
        }

        if (!have_band) {
                WARN_ON(1);
                return -EINVAL;
        }

        for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) {
                /*
                 * Validate we have a policy (can be explicitly set to
                 * VENDOR_CMD_RAW_DATA which is non-NULL) and also that
                 * we have at least one of doit/dumpit.
                 */
                if (WARN_ON(!rdev->wiphy.vendor_commands[i].policy))
                        return -EINVAL;
                if (WARN_ON(!rdev->wiphy.vendor_commands[i].doit &&
                            !rdev->wiphy.vendor_commands[i].dumpit))
                        return -EINVAL;
        }

#ifdef CONFIG_PM
        if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns &&
                    (!rdev->wiphy.wowlan->pattern_min_len ||
                     rdev->wiphy.wowlan->pattern_min_len >
                                rdev->wiphy.wowlan->pattern_max_len)))
                return -EINVAL;
#endif

        if (!wiphy->max_num_akm_suites)
                wiphy->max_num_akm_suites = NL80211_MAX_NR_AKM_SUITES;
        else if (wiphy->max_num_akm_suites < NL80211_MAX_NR_AKM_SUITES ||
                 wiphy->max_num_akm_suites > CFG80211_MAX_NUM_AKM_SUITES)
                return -EINVAL;

        /* check and set up bitrates */
        ieee80211_set_bitrate_flags(wiphy);

        rdev->wiphy.features |= NL80211_FEATURE_SCAN_FLUSH;

        rtnl_lock();
        wiphy_lock(&rdev->wiphy);
        res = device_add(&rdev->wiphy.dev);
        if (res) {
                wiphy_unlock(&rdev->wiphy);
                rtnl_unlock();
                return res;
        }

        list_add_rcu(&rdev->list, &cfg80211_rdev_list);
        cfg80211_rdev_list_generation++;

        /* add to debugfs */
        rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy),
                                                    ieee80211_debugfs_dir);

        cfg80211_debugfs_rdev_add(rdev);
        nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
        wiphy_unlock(&rdev->wiphy);

        /* set up regulatory info */
        wiphy_regulatory_register(wiphy);

        if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
                struct regulatory_request request;

                request.wiphy_idx = get_wiphy_idx(wiphy);
                request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
                request.alpha2[0] = '9';
                request.alpha2[1] = '9';

                nl80211_send_reg_change_event(&request);
        }

        /* Check that nobody globally advertises any capabilities they do not
         * advertise on all possible interface types.
         */
        if (wiphy->extended_capabilities_len &&
            wiphy->num_iftype_ext_capab &&
            wiphy->iftype_ext_capab) {
                u8 supported_on_all, j;
                const struct wiphy_iftype_ext_capab *capab;

                capab = wiphy->iftype_ext_capab;
                for (j = 0; j < wiphy->extended_capabilities_len; j++) {
                        if (capab[0].extended_capabilities_len > j)
                                supported_on_all =
                                        capab[0].extended_capabilities[j];
                        else
                                supported_on_all = 0x00;
                        for (i = 1; i < wiphy->num_iftype_ext_capab; i++) {
                                if (j >= capab[i].extended_capabilities_len) {
                                        supported_on_all = 0x00;
                                        break;
                                }
                                supported_on_all &=
                                        capab[i].extended_capabilities[j];
                        }
                        if (WARN_ON(wiphy->extended_capabilities[j] &
                                    ~supported_on_all))
                                break;
                }
        }

        rdev->wiphy.registered = true;
        rtnl_unlock();

        res = rfkill_register(rdev->wiphy.rfkill);
        if (res) {
                rfkill_destroy(rdev->wiphy.rfkill);
                rdev->wiphy.rfkill = NULL;
                wiphy_unregister(&rdev->wiphy);
                return res;
        }

        return 0;
}
EXPORT_SYMBOL(wiphy_register);

void wiphy_rfkill_start_polling(struct wiphy *wiphy)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);

        if (!rdev->ops->rfkill_poll)
                return;
        rdev->rfkill_ops.poll = cfg80211_rfkill_poll;
        rfkill_resume_polling(wiphy->rfkill);
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);

void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev,
                                  struct wiphy_work *end)
{
        unsigned int runaway_limit = 100;
        unsigned long flags;

        lockdep_assert_held(&rdev->wiphy.mtx);

        spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
        while (!list_empty(&rdev->wiphy_work_list)) {
                struct wiphy_work *wk;

                wk = list_first_entry(&rdev->wiphy_work_list,
                                      struct wiphy_work, entry);
                list_del_init(&wk->entry);
                spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);

                wk->func(&rdev->wiphy, wk);

                spin_lock_irqsave(&rdev->wiphy_work_lock, flags);

                if (wk == end)
                        break;

                if (WARN_ON(--runaway_limit == 0))
                        INIT_LIST_HEAD(&rdev->wiphy_work_list);
        }
        spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
}

void wiphy_unregister(struct wiphy *wiphy)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);

        wait_event(rdev->dev_wait, ({
                int __count;
                wiphy_lock(&rdev->wiphy);
                __count = rdev->opencount;
                wiphy_unlock(&rdev->wiphy);
                __count == 0; }));

        if (rdev->wiphy.rfkill)
                rfkill_unregister(rdev->wiphy.rfkill);

        rtnl_lock();
        wiphy_lock(&rdev->wiphy);
        nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
        rdev->wiphy.registered = false;

        WARN_ON(!list_empty(&rdev->wiphy.wdev_list));

        /*
         * First remove the hardware from everywhere, this makes
         * it impossible to find from userspace.
         */
        debugfs_remove_recursive(rdev->wiphy.debugfsdir);
        list_del_rcu(&rdev->list);
        synchronize_rcu();

        /*
         * If this device got a regulatory hint tell core its
         * free to listen now to a new shiny device regulatory hint
         */
        wiphy_regulatory_deregister(wiphy);

        cfg80211_rdev_list_generation++;
        device_del(&rdev->wiphy.dev);

#ifdef CONFIG_PM
        if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
                rdev_set_wakeup(rdev, false);
#endif

        /* surely nothing is reachable now, clean up work */
        cfg80211_process_wiphy_works(rdev, NULL);
        wiphy_unlock(&rdev->wiphy);
        rtnl_unlock();

        /* this has nothing to do now but make sure it's gone */
        cancel_work_sync(&rdev->wiphy_work);

        cancel_work_sync(&rdev->conn_work);
        flush_work(&rdev->event_work);
        cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
        cancel_delayed_work_sync(&rdev->background_cac_done_wk);
        flush_work(&rdev->destroy_work);
        flush_work(&rdev->propagate_radar_detect_wk);
        flush_work(&rdev->propagate_cac_done_wk);
        flush_work(&rdev->mgmt_registrations_update_wk);
        flush_work(&rdev->background_cac_abort_wk);

        cfg80211_rdev_free_wowlan(rdev);
        cfg80211_rdev_free_coalesce(rdev);
}
EXPORT_SYMBOL(wiphy_unregister);

void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
{
        struct cfg80211_internal_bss *scan, *tmp;
        struct cfg80211_beacon_registration *reg, *treg;
        rfkill_destroy(rdev->wiphy.rfkill);
        list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
                list_del(&reg->list);
                kfree(reg);
        }
        list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
                cfg80211_put_bss(&rdev->wiphy, &scan->pub);
        mutex_destroy(&rdev->wiphy.mtx);

        /*
         * The 'regd' can only be non-NULL if we never finished
         * initializing the wiphy and thus never went through the
         * unregister path - e.g. in failure scenarios. Thus, it
         * cannot have been visible to anyone if non-NULL, so we
         * can just free it here.
         */
        kfree(rcu_dereference_raw(rdev->wiphy.regd));

        kfree(rdev);
}

void wiphy_free(struct wiphy *wiphy)
{
        put_device(&wiphy->dev);
}
EXPORT_SYMBOL(wiphy_free);

void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
                                      enum rfkill_hard_block_reasons reason)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);

        if (rfkill_set_hw_state_reason(wiphy->rfkill, blocked, reason))
                schedule_work(&rdev->rfkill_block);
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);

static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
                                      bool unregister_netdev)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
        struct cfg80211_cqm_config *cqm_config;
        unsigned int link_id;

        ASSERT_RTNL();
        lockdep_assert_held(&rdev->wiphy.mtx);

        nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);

        wdev->registered = false;

        if (wdev->netdev) {
                sysfs_remove_link(&wdev->netdev->dev.kobj, "phy80211");
                if (unregister_netdev)
                        unregister_netdevice(wdev->netdev);
        }

        list_del_rcu(&wdev->list);
        synchronize_net();
        rdev->devlist_generation++;

        cfg80211_mlme_purge_registrations(wdev);

        switch (wdev->iftype) {
        case NL80211_IFTYPE_P2P_DEVICE:
                cfg80211_stop_p2p_device(rdev, wdev);
                break;
        case NL80211_IFTYPE_NAN:
                cfg80211_stop_nan(rdev, wdev);
                break;
        default:
                break;
        }

#ifdef CONFIG_CFG80211_WEXT
        kfree_sensitive(wdev->wext.keys);
        wdev->wext.keys = NULL;
#endif
        wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work);
        /* deleted from the list, so can't be found from nl80211 any more */
        cqm_config = rcu_access_pointer(wdev->cqm_config);
        kfree_rcu(cqm_config, rcu_head);

        /*
         * Ensure that all events have been processed and
         * freed.
         */
        cfg80211_process_wdev_events(wdev);

        if (wdev->iftype == NL80211_IFTYPE_STATION ||
            wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) {
                for (link_id = 0; link_id < ARRAY_SIZE(wdev->links); link_id++) {
                        struct cfg80211_internal_bss *curbss;

                        curbss = wdev->links[link_id].client.current_bss;

                        if (WARN_ON(curbss)) {
                                cfg80211_unhold_bss(curbss);
                                cfg80211_put_bss(wdev->wiphy, &curbss->pub);
                                wdev->links[link_id].client.current_bss = NULL;
                        }
                }
        }

        wdev->connected = false;
}

void cfg80211_unregister_wdev(struct wireless_dev *wdev)
{
        _cfg80211_unregister_wdev(wdev, true);
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);

static const struct device_type wiphy_type = {
        .name        = "wlan",
};

void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
                               enum nl80211_iftype iftype, int num)
{
        lockdep_assert_held(&rdev->wiphy.mtx);

        rdev->num_running_ifaces += num;
        if (iftype == NL80211_IFTYPE_MONITOR)
                rdev->num_running_monitor_ifaces += num;
}

void cfg80211_leave(struct cfg80211_registered_device *rdev,
                    struct wireless_dev *wdev)
{
        struct net_device *dev = wdev->netdev;
        struct cfg80211_sched_scan_request *pos, *tmp;

        lockdep_assert_held(&rdev->wiphy.mtx);

        cfg80211_pmsr_wdev_down(wdev);

        cfg80211_stop_background_radar_detection(wdev);

        switch (wdev->iftype) {
        case NL80211_IFTYPE_ADHOC:
                cfg80211_leave_ibss(rdev, dev, true);
                break;
        case NL80211_IFTYPE_P2P_CLIENT:
        case NL80211_IFTYPE_STATION:
                list_for_each_entry_safe(pos, tmp, &rdev->sched_scan_req_list,
                                         list) {
                        if (dev == pos->dev)
                                cfg80211_stop_sched_scan_req(rdev, pos, false);
                }

#ifdef CONFIG_CFG80211_WEXT
                kfree(wdev->wext.ie);
                wdev->wext.ie = NULL;
                wdev->wext.ie_len = 0;
                wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
#endif
                cfg80211_disconnect(rdev, dev,
                                    WLAN_REASON_DEAUTH_LEAVING, true);
                break;
        case NL80211_IFTYPE_MESH_POINT:
                cfg80211_leave_mesh(rdev, dev);
                break;
        case NL80211_IFTYPE_AP:
        case NL80211_IFTYPE_P2P_GO:
                cfg80211_stop_ap(rdev, dev, -1, true);
                break;
        case NL80211_IFTYPE_OCB:
                cfg80211_leave_ocb(rdev, dev);
                break;
        case NL80211_IFTYPE_P2P_DEVICE:
        case NL80211_IFTYPE_NAN:
                /* cannot happen, has no netdev */
                break;
        case NL80211_IFTYPE_AP_VLAN:
        case NL80211_IFTYPE_MONITOR:
                /* nothing to do */
                break;
        case NL80211_IFTYPE_UNSPECIFIED:
        case NL80211_IFTYPE_WDS:
        case NUM_NL80211_IFTYPES:
                /* invalid */
                break;
        }
}

void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
                         gfp_t gfp)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        struct cfg80211_event *ev;
        unsigned long flags;

        trace_cfg80211_stop_iface(wiphy, wdev);

        ev = kzalloc(sizeof(*ev), gfp);
        if (!ev)
                return;

        ev->type = EVENT_STOPPED;

        spin_lock_irqsave(&wdev->event_lock, flags);
        list_add_tail(&ev->list, &wdev->event_list);
        spin_unlock_irqrestore(&wdev->event_lock, flags);
        queue_work(cfg80211_wq, &rdev->event_work);
}
EXPORT_SYMBOL(cfg80211_stop_iface);

void cfg80211_init_wdev(struct wireless_dev *wdev)
{
        INIT_LIST_HEAD(&wdev->event_list);
        spin_lock_init(&wdev->event_lock);
        INIT_LIST_HEAD(&wdev->mgmt_registrations);
        INIT_LIST_HEAD(&wdev->pmsr_list);
        spin_lock_init(&wdev->pmsr_lock);
        INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);

#ifdef CONFIG_CFG80211_WEXT
        wdev->wext.default_key = -1;
        wdev->wext.default_mgmt_key = -1;
        wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
#endif

        wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work);

        if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
                wdev->ps = true;
        else
                wdev->ps = false;
        /* allow mac80211 to determine the timeout */
        wdev->ps_timeout = -1;

        if ((wdev->iftype == NL80211_IFTYPE_STATION ||
             wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
             wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
                wdev->netdev->priv_flags |= IFF_DONT_BRIDGE;

        INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk);
}

void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
                            struct wireless_dev *wdev)
{
        ASSERT_RTNL();
        lockdep_assert_held(&rdev->wiphy.mtx);

        /*
         * We get here also when the interface changes network namespaces,
         * as it's registered into the new one, but we don't want it to
         * change ID in that case. Checking if the ID is already assigned
         * works, because 0 isn't considered a valid ID and the memory is
         * 0-initialized.
         */
        if (!wdev->identifier)
                wdev->identifier = ++rdev->wdev_id;
        list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
        rdev->devlist_generation++;
        wdev->registered = true;

        if (wdev->netdev &&
            sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
                              "phy80211"))
                pr_err("failed to add phy80211 symlink to netdev!\n");

        nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
}

int cfg80211_register_netdevice(struct net_device *dev)
{
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct cfg80211_registered_device *rdev;
        int ret;

        ASSERT_RTNL();

        if (WARN_ON(!wdev))
                return -EINVAL;

        rdev = wiphy_to_rdev(wdev->wiphy);

        lockdep_assert_held(&rdev->wiphy.mtx);

        /* we'll take care of this */
        wdev->registered = true;
        wdev->registering = true;
        ret = register_netdevice(dev);
        if (ret)
                goto out;

        cfg80211_register_wdev(rdev, wdev);
        ret = 0;
out:
        wdev->registering = false;
        if (ret)
                wdev->registered = false;
        return ret;
}
EXPORT_SYMBOL(cfg80211_register_netdevice);

static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
                                         unsigned long state, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct cfg80211_registered_device *rdev;
        struct cfg80211_sched_scan_request *pos, *tmp;

        if (!wdev)
                return NOTIFY_DONE;

        rdev = wiphy_to_rdev(wdev->wiphy);

        WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED);

        switch (state) {
        case NETDEV_POST_INIT:
                SET_NETDEV_DEVTYPE(dev, &wiphy_type);
                wdev->netdev = dev;
                /* can only change netns with wiphy */
                dev->features |= NETIF_F_NETNS_LOCAL;

                cfg80211_init_wdev(wdev);
                break;
        case NETDEV_REGISTER:
                if (!wdev->registered) {
                        wiphy_lock(&rdev->wiphy);
                        cfg80211_register_wdev(rdev, wdev);
                        wiphy_unlock(&rdev->wiphy);
                }
                break;
        case NETDEV_UNREGISTER:
                /*
                 * It is possible to get NETDEV_UNREGISTER multiple times,
                 * so check wdev->registered.
                 */
                if (wdev->registered && !wdev->registering) {
                        wiphy_lock(&rdev->wiphy);
                        _cfg80211_unregister_wdev(wdev, false);
                        wiphy_unlock(&rdev->wiphy);
                }
                break;
        case NETDEV_GOING_DOWN:
                wiphy_lock(&rdev->wiphy);
                cfg80211_leave(rdev, wdev);
                cfg80211_remove_links(wdev);
                wiphy_unlock(&rdev->wiphy);
                /* since we just did cfg80211_leave() nothing to do there */
                cancel_work_sync(&wdev->disconnect_wk);
                cancel_work_sync(&wdev->pmsr_free_wk);
                break;
        case NETDEV_DOWN:
                wiphy_lock(&rdev->wiphy);
                cfg80211_update_iface_num(rdev, wdev->iftype, -1);
                if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
                        if (WARN_ON(!rdev->scan_req->notified &&
                                    (!rdev->int_scan_req ||
                                     !rdev->int_scan_req->notified)))
                                rdev->scan_req->info.aborted = true;
                        ___cfg80211_scan_done(rdev, false);
                }

                list_for_each_entry_safe(pos, tmp,
                                         &rdev->sched_scan_req_list, list) {
                        if (WARN_ON(pos->dev == wdev->netdev))
                                cfg80211_stop_sched_scan_req(rdev, pos, false);
                }

                rdev->opencount--;
                wiphy_unlock(&rdev->wiphy);
                wake_up(&rdev->dev_wait);
                break;
        case NETDEV_UP:
                wiphy_lock(&rdev->wiphy);
                cfg80211_update_iface_num(rdev, wdev->iftype, 1);
                switch (wdev->iftype) {
#ifdef CONFIG_CFG80211_WEXT
                case NL80211_IFTYPE_ADHOC:
                        cfg80211_ibss_wext_join(rdev, wdev);
                        break;
                case NL80211_IFTYPE_STATION:
                        cfg80211_mgd_wext_connect(rdev, wdev);
                        break;
#endif
#ifdef CONFIG_MAC80211_MESH
                case NL80211_IFTYPE_MESH_POINT:
                        {
                                /* backward compat code... */
                                struct mesh_setup setup;
                                memcpy(&setup, &default_mesh_setup,
                                                sizeof(setup));
                                 /* back compat only needed for mesh_id */
                                setup.mesh_id = wdev->u.mesh.id;
                                setup.mesh_id_len = wdev->u.mesh.id_up_len;
                                if (wdev->u.mesh.id_up_len)
                                        __cfg80211_join_mesh(rdev, dev,
                                                        &setup,
                                                        &default_mesh_config);
                                break;
                        }
#endif
                default:
                        break;
                }
                rdev->opencount++;

                /*
                 * Configure power management to the driver here so that its
                 * correctly set also after interface type changes etc.
                 */
                if ((wdev->iftype == NL80211_IFTYPE_STATION ||
                     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
                    rdev->ops->set_power_mgmt &&
                    rdev_set_power_mgmt(rdev, dev, wdev->ps,
                                        wdev->ps_timeout)) {
                        /* assume this means it's off */
                        wdev->ps = false;
                }
                wiphy_unlock(&rdev->wiphy);
                break;
        case NETDEV_PRE_UP:
                if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype,
                                             wdev->use_4addr, 0))
                        return notifier_from_errno(-EOPNOTSUPP);

                if (rfkill_blocked(rdev->wiphy.rfkill))
                        return notifier_from_errno(-ERFKILL);
                break;
        default:
                return NOTIFY_DONE;
        }

        wireless_nlevent_flush();

        return NOTIFY_OK;
}

static struct notifier_block cfg80211_netdev_notifier = {
        .notifier_call = cfg80211_netdev_notifier_call,
};

static void __net_exit cfg80211_pernet_exit(struct net *net)
{
        struct cfg80211_registered_device *rdev;

        rtnl_lock();
        for_each_rdev(rdev) {
                if (net_eq(wiphy_net(&rdev->wiphy), net))
                        WARN_ON(cfg80211_switch_netns(rdev, &init_net));
        }
        rtnl_unlock();
}

static struct pernet_operations cfg80211_pernet_ops = {
        .exit = cfg80211_pernet_exit,
};

void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        unsigned long flags;

        spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
        if (list_empty(&work->entry))
                list_add_tail(&work->entry, &rdev->wiphy_work_list);
        spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);

        queue_work(system_unbound_wq, &rdev->wiphy_work);
}
EXPORT_SYMBOL_GPL(wiphy_work_queue);

void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        unsigned long flags;

        lockdep_assert_held(&wiphy->mtx);

        spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
        if (!list_empty(&work->entry))
                list_del_init(&work->entry);
        spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
}
EXPORT_SYMBOL_GPL(wiphy_work_cancel);

void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work)
{
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
        unsigned long flags;
        bool run;

        spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
        run = !work || !list_empty(&work->entry);
        spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);

        if (run)
                cfg80211_process_wiphy_works(rdev, work);
}
EXPORT_SYMBOL_GPL(wiphy_work_flush);

void wiphy_delayed_work_timer(struct timer_list *t)
{
        struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer);

        wiphy_work_queue(dwork->wiphy, &dwork->work);
}
EXPORT_SYMBOL(wiphy_delayed_work_timer);

void wiphy_delayed_work_queue(struct wiphy *wiphy,
                              struct wiphy_delayed_work *dwork,
                              unsigned long delay)
{
        if (!delay) {
                del_timer(&dwork->timer);
                wiphy_work_queue(wiphy, &dwork->work);
                return;
        }

        dwork->wiphy = wiphy;
        mod_timer(&dwork->timer, jiffies + delay);
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_queue);

void wiphy_delayed_work_cancel(struct wiphy *wiphy,
                               struct wiphy_delayed_work *dwork)
{
        lockdep_assert_held(&wiphy->mtx);

        del_timer_sync(&dwork->timer);
        wiphy_work_cancel(wiphy, &dwork->work);
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel);

void wiphy_delayed_work_flush(struct wiphy *wiphy,
                              struct wiphy_delayed_work *dwork)
{
        lockdep_assert_held(&wiphy->mtx);

        del_timer_sync(&dwork->timer);
        wiphy_work_flush(wiphy, &dwork->work);
}
EXPORT_SYMBOL_GPL(wiphy_delayed_work_flush);

static int __init cfg80211_init(void)
{
        int err;

        err = register_pernet_device(&cfg80211_pernet_ops);
        if (err)
                goto out_fail_pernet;

        err = wiphy_sysfs_init();
        if (err)
                goto out_fail_sysfs;

        err = register_netdevice_notifier(&cfg80211_netdev_notifier);
        if (err)
                goto out_fail_notifier;

        err = nl80211_init();
        if (err)
                goto out_fail_nl80211;

        ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);

        err = regulatory_init();
        if (err)
                goto out_fail_reg;

        cfg80211_wq = alloc_ordered_workqueue("cfg80211", WQ_MEM_RECLAIM);
        if (!cfg80211_wq) {
                err = -ENOMEM;
                goto out_fail_wq;
        }

        return 0;

out_fail_wq:
        regulatory_exit();
out_fail_reg:
        debugfs_remove(ieee80211_debugfs_dir);
        nl80211_exit();
out_fail_nl80211:
        unregister_netdevice_notifier(&cfg80211_netdev_notifier);
out_fail_notifier:
        wiphy_sysfs_exit();
out_fail_sysfs:
        unregister_pernet_device(&cfg80211_pernet_ops);
out_fail_pernet:
        return err;
}
fs_initcall(cfg80211_init);

static void __exit cfg80211_exit(void)
{
        debugfs_remove(ieee80211_debugfs_dir);
        nl80211_exit();
        unregister_netdevice_notifier(&cfg80211_netdev_notifier);
        wiphy_sysfs_exit();
        regulatory_exit();
        unregister_pernet_device(&cfg80211_pernet_ops);
        destroy_workqueue(cfg80211_wq);
}
module_exit(cfg80211_exit);



























































































































































































































































    3 










    3 




    3 

























    3 
    3 













   78 



   77 
   77 
    1 
   78 

   78 



















































































































  195 










   10 






  195 
   22 





  195 
  195 

  195 


   88 
   88 










   88 










  130 



  119 
   13 
   10 


   13 



  129 






    7 


  130 
   89 
  130 


  124 

  130 





























































































































  233 
  232 


   66 
   67 




   28 



   28 



   28 



   28 



   28 














   21 





   20 

   21 
   20 


    2 
    1 


    1 



    1 
    1 


    1 




























































































































   12 
   12 










   12 




   12 

   12 
   12 







   12 




   12 
   12 

   12 






   12 


   12 





























  186 
   95 
  163 


  164 

  185 


  185 




  185 








  186 






  186 




























   12 


























































































































































































































































































































    8 



    8 



    8 

    8 
    8 

































    6 



    6 






    6 








    6 

    6 













    6 

















































































































































   11 



    8 



    8 


    6 
    6 
    6 




    6 




    6 
   10 


























































































































  232 























  124 






























   12 

























   76 



























    1 





















   47 


   47 


























  241 





  240 
























    1 







    1 














  236 












   15 




















































   15 




   11 





















   11 






   12 











    5 







    5 



















    6 

    6 






  137 
  137 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/driver.c - most of the driver model stuff for usb
 *
 * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de>
 *
 * based on drivers/usb/usb.c which had the following copyrights:
 *        (C) Copyright Linus Torvalds 1999
 *        (C) Copyright Johannes Erdfelt 1999-2001
 *        (C) Copyright Andreas Gal 1999
 *        (C) Copyright Gregory P. Smith 1999
 *        (C) Copyright Deti Fliegl 1999 (new USB architecture)
 *        (C) Copyright Randy Dunlap 2000
 *        (C) Copyright David Brownell 2000-2004
 *        (C) Copyright Yggdrasil Computing, Inc. 2000
 *                (usb_device_id matching changes by Adam J. Richter)
 *        (C) Copyright Greg Kroah-Hartman 2002-2003
 *
 * Released under the GPLv2 only.
 *
 * NOTE! This is not actually a driver at all, rather this is
 * just a collection of helper routines that implement the
 * matching, probing, releasing, suspending and resuming for
 * real drivers.
 *
 */

#include <linux/device.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/usb.h>
#include <linux/usb/quirks.h>
#include <linux/usb/hcd.h>

#include "usb.h"


/*
 * Adds a new dynamic USBdevice ID to this driver,
 * and cause the driver to probe for all devices again.
 */
ssize_t usb_store_new_id(struct usb_dynids *dynids,
                         const struct usb_device_id *id_table,
                         struct device_driver *driver,
                         const char *buf, size_t count)
{
        struct usb_dynid *dynid;
        u32 idVendor = 0;
        u32 idProduct = 0;
        unsigned int bInterfaceClass = 0;
        u32 refVendor, refProduct;
        int fields = 0;
        int retval = 0;

        fields = sscanf(buf, "%x %x %x %x %x", &idVendor, &idProduct,
                        &bInterfaceClass, &refVendor, &refProduct);
        if (fields < 2)
                return -EINVAL;

        dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
        if (!dynid)
                return -ENOMEM;

        INIT_LIST_HEAD(&dynid->node);
        dynid->id.idVendor = idVendor;
        dynid->id.idProduct = idProduct;
        dynid->id.match_flags = USB_DEVICE_ID_MATCH_DEVICE;
        if (fields > 2 && bInterfaceClass) {
                if (bInterfaceClass > 255) {
                        retval = -EINVAL;
                        goto fail;
                }

                dynid->id.bInterfaceClass = (u8)bInterfaceClass;
                dynid->id.match_flags |= USB_DEVICE_ID_MATCH_INT_CLASS;
        }

        if (fields > 4) {
                const struct usb_device_id *id = id_table;

                if (!id) {
                        retval = -ENODEV;
                        goto fail;
                }

                for (; id->match_flags; id++)
                        if (id->idVendor == refVendor && id->idProduct == refProduct)
                                break;

                if (id->match_flags) {
                        dynid->id.driver_info = id->driver_info;
                } else {
                        retval = -ENODEV;
                        goto fail;
                }
        }

        spin_lock(&dynids->lock);
        list_add_tail(&dynid->node, &dynids->list);
        spin_unlock(&dynids->lock);

        retval = driver_attach(driver);

        if (retval)
                return retval;
        return count;

fail:
        kfree(dynid);
        return retval;
}
EXPORT_SYMBOL_GPL(usb_store_new_id);

ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf)
{
        struct usb_dynid *dynid;
        size_t count = 0;

        list_for_each_entry(dynid, &dynids->list, node)
                if (dynid->id.bInterfaceClass != 0)
                        count += scnprintf(&buf[count], PAGE_SIZE - count, "%04x %04x %02x\n",
                                           dynid->id.idVendor, dynid->id.idProduct,
                                           dynid->id.bInterfaceClass);
                else
                        count += scnprintf(&buf[count], PAGE_SIZE - count, "%04x %04x\n",
                                           dynid->id.idVendor, dynid->id.idProduct);
        return count;
}
EXPORT_SYMBOL_GPL(usb_show_dynids);

static ssize_t new_id_show(struct device_driver *driver, char *buf)
{
        struct usb_driver *usb_drv = to_usb_driver(driver);

        return usb_show_dynids(&usb_drv->dynids, buf);
}

static ssize_t new_id_store(struct device_driver *driver,
                            const char *buf, size_t count)
{
        struct usb_driver *usb_drv = to_usb_driver(driver);

        return usb_store_new_id(&usb_drv->dynids, usb_drv->id_table, driver, buf, count);
}
static DRIVER_ATTR_RW(new_id);

/*
 * Remove a USB device ID from this driver
 */
static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
                               size_t count)
{
        struct usb_dynid *dynid, *n;
        struct usb_driver *usb_driver = to_usb_driver(driver);
        u32 idVendor;
        u32 idProduct;
        int fields;

        fields = sscanf(buf, "%x %x", &idVendor, &idProduct);
        if (fields < 2)
                return -EINVAL;

        spin_lock(&usb_driver->dynids.lock);
        list_for_each_entry_safe(dynid, n, &usb_driver->dynids.list, node) {
                struct usb_device_id *id = &dynid->id;

                if ((id->idVendor == idVendor) &&
                    (id->idProduct == idProduct)) {
                        list_del(&dynid->node);
                        kfree(dynid);
                        break;
                }
        }
        spin_unlock(&usb_driver->dynids.lock);
        return count;
}

static ssize_t remove_id_show(struct device_driver *driver, char *buf)
{
        return new_id_show(driver, buf);
}
static DRIVER_ATTR_RW(remove_id);

static int usb_create_newid_files(struct usb_driver *usb_drv)
{
        int error = 0;

        if (usb_drv->no_dynamic_id)
                goto exit;

        if (usb_drv->probe != NULL) {
                error = driver_create_file(&usb_drv->driver,
                                           &driver_attr_new_id);
                if (error == 0) {
                        error = driver_create_file(&usb_drv->driver,
                                        &driver_attr_remove_id);
                        if (error)
                                driver_remove_file(&usb_drv->driver,
                                                &driver_attr_new_id);
                }
        }
exit:
        return error;
}

static void usb_remove_newid_files(struct usb_driver *usb_drv)
{
        if (usb_drv->no_dynamic_id)
                return;

        if (usb_drv->probe != NULL) {
                driver_remove_file(&usb_drv->driver,
                                &driver_attr_remove_id);
                driver_remove_file(&usb_drv->driver,
                                   &driver_attr_new_id);
        }
}

static void usb_free_dynids(struct usb_driver *usb_drv)
{
        struct usb_dynid *dynid, *n;

        spin_lock(&usb_drv->dynids.lock);
        list_for_each_entry_safe(dynid, n, &usb_drv->dynids.list, node) {
                list_del(&dynid->node);
                kfree(dynid);
        }
        spin_unlock(&usb_drv->dynids.lock);
}

static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *intf,
                                                        struct usb_driver *drv)
{
        struct usb_dynid *dynid;

        spin_lock(&drv->dynids.lock);
        list_for_each_entry(dynid, &drv->dynids.list, node) {
                if (usb_match_one_id(intf, &dynid->id)) {
                        spin_unlock(&drv->dynids.lock);
                        return &dynid->id;
                }
        }
        spin_unlock(&drv->dynids.lock);
        return NULL;
}


/* called from driver core with dev locked */
static int usb_probe_device(struct device *dev)
{
        struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
        struct usb_device *udev = to_usb_device(dev);
        int error = 0;

        dev_dbg(dev, "%s\n", __func__);

        /* TODO: Add real matching code */

        /* The device should always appear to be in use
         * unless the driver supports autosuspend.
         */
        if (!udriver->supports_autosuspend)
                error = usb_autoresume_device(udev);
        if (error)
                return error;

        if (udriver->generic_subclass)
                error = usb_generic_driver_probe(udev);
        if (error)
                return error;

        /* Probe the USB device with the driver in hand, but only
         * defer to a generic driver in case the current USB
         * device driver has an id_table or a match function; i.e.,
         * when the device driver was explicitly matched against
         * a device.
         *
         * If the device driver does not have either of these,
         * then we assume that it can bind to any device and is
         * not truly a more specialized/non-generic driver, so a
         * return value of -ENODEV should not force the device
         * to be handled by the generic USB driver, as there
         * can still be another, more specialized, device driver.
         *
         * This accommodates the usbip driver.
         *
         * TODO: What if, in the future, there are multiple
         * specialized USB device drivers for a particular device?
         * In such cases, there is a need to try all matching
         * specialised device drivers prior to setting the
         * use_generic_driver bit.
         */
        if (udriver->probe)
                error = udriver->probe(udev);
        else if (!udriver->generic_subclass)
                error = -EINVAL;
        if (error == -ENODEV && udriver != &usb_generic_driver &&
            (udriver->id_table || udriver->match)) {
                udev->use_generic_driver = 1;
                return -EPROBE_DEFER;
        }
        return error;
}

/* called from driver core with dev locked */
static int usb_unbind_device(struct device *dev)
{
        struct usb_device *udev = to_usb_device(dev);
        struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);

        if (udriver->disconnect)
                udriver->disconnect(udev);
        if (udriver->generic_subclass)
                usb_generic_driver_disconnect(udev);
        if (!udriver->supports_autosuspend)
                usb_autosuspend_device(udev);
        return 0;
}

/* called from driver core with dev locked */
static int usb_probe_interface(struct device *dev)
{
        struct usb_driver *driver = to_usb_driver(dev->driver);
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_device *udev = interface_to_usbdev(intf);
        const struct usb_device_id *id;
        int error = -ENODEV;
        int lpm_disable_error = -ENODEV;

        dev_dbg(dev, "%s\n", __func__);

        intf->needs_binding = 0;

        if (usb_device_is_owned(udev))
                return error;

        if (udev->authorized == 0) {
                dev_err(&intf->dev, "Device is not authorized for usage\n");
                return error;
        } else if (intf->authorized == 0) {
                dev_err(&intf->dev, "Interface %d is not authorized for usage\n",
                                intf->altsetting->desc.bInterfaceNumber);
                return error;
        }

        id = usb_match_dynamic_id(intf, driver);
        if (!id)
                id = usb_match_id(intf, driver->id_table);
        if (!id)
                return error;

        dev_dbg(dev, "%s - got id\n", __func__);

        error = usb_autoresume_device(udev);
        if (error)
                return error;

        intf->condition = USB_INTERFACE_BINDING;

        /* Probed interfaces are initially active.  They are
         * runtime-PM-enabled only if the driver has autosuspend support.
         * They are sensitive to their children's power states.
         */
        pm_runtime_set_active(dev);
        pm_suspend_ignore_children(dev, false);
        if (driver->supports_autosuspend)
                pm_runtime_enable(dev);

        /* If the new driver doesn't allow hub-initiated LPM, and we can't
         * disable hub-initiated LPM, then fail the probe.
         *
         * Otherwise, leaving LPM enabled should be harmless, because the
         * endpoint intervals should remain the same, and the U1/U2 timeouts
         * should remain the same.
         *
         * If we need to install alt setting 0 before probe, or another alt
         * setting during probe, that should also be fine.  usb_set_interface()
         * will attempt to disable LPM, and fail if it can't disable it.
         */
        if (driver->disable_hub_initiated_lpm) {
                lpm_disable_error = usb_unlocked_disable_lpm(udev);
                if (lpm_disable_error) {
                        dev_err(&intf->dev, "%s Failed to disable LPM for driver %s\n",
                                __func__, driver->name);
                        error = lpm_disable_error;
                        goto err;
                }
        }

        /* Carry out a deferred switch to altsetting 0 */
        if (intf->needs_altsetting0) {
                error = usb_set_interface(udev, intf->altsetting[0].
                                desc.bInterfaceNumber, 0);
                if (error < 0)
                        goto err;
                intf->needs_altsetting0 = 0;
        }

        error = driver->probe(intf, id);
        if (error)
                goto err;

        intf->condition = USB_INTERFACE_BOUND;

        /* If the LPM disable succeeded, balance the ref counts. */
        if (!lpm_disable_error)
                usb_unlocked_enable_lpm(udev);

        usb_autosuspend_device(udev);
        return error;

 err:
        usb_set_intfdata(intf, NULL);
        intf->needs_remote_wakeup = 0;
        intf->condition = USB_INTERFACE_UNBOUND;

        /* If the LPM disable succeeded, balance the ref counts. */
        if (!lpm_disable_error)
                usb_unlocked_enable_lpm(udev);

        /* Unbound interfaces are always runtime-PM-disabled and -suspended */
        if (driver->supports_autosuspend)
                pm_runtime_disable(dev);
        pm_runtime_set_suspended(dev);

        usb_autosuspend_device(udev);
        return error;
}

/* called from driver core with dev locked */
static int usb_unbind_interface(struct device *dev)
{
        struct usb_driver *driver = to_usb_driver(dev->driver);
        struct usb_interface *intf = to_usb_interface(dev);
        struct usb_host_endpoint *ep, **eps = NULL;
        struct usb_device *udev;
        int i, j, error, r;
        int lpm_disable_error = -ENODEV;

        intf->condition = USB_INTERFACE_UNBINDING;

        /* Autoresume for set_interface call below */
        udev = interface_to_usbdev(intf);
        error = usb_autoresume_device(udev);

        /* If hub-initiated LPM policy may change, attempt to disable LPM until
         * the driver is unbound.  If LPM isn't disabled, that's fine because it
         * wouldn't be enabled unless all the bound interfaces supported
         * hub-initiated LPM.
         */
        if (driver->disable_hub_initiated_lpm)
                lpm_disable_error = usb_unlocked_disable_lpm(udev);

        /*
         * Terminate all URBs for this interface unless the driver
         * supports "soft" unbinding and the device is still present.
         */
        if (!driver->soft_unbind || udev->state == USB_STATE_NOTATTACHED)
                usb_disable_interface(udev, intf, false);

        driver->disconnect(intf);

        /* Free streams */
        for (i = 0, j = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
                ep = &intf->cur_altsetting->endpoint[i];
                if (ep->streams == 0)
                        continue;
                if (j == 0) {
                        eps = kmalloc_array(USB_MAXENDPOINTS, sizeof(void *),
                                      GFP_KERNEL);
                        if (!eps)
                                break;
                }
                eps[j++] = ep;
        }
        if (j) {
                usb_free_streams(intf, eps, j, GFP_KERNEL);
                kfree(eps);
        }

        /* Reset other interface state.
         * We cannot do a Set-Interface if the device is suspended or
         * if it is prepared for a system sleep (since installing a new
         * altsetting means creating new endpoint device entries).
         * When either of these happens, defer the Set-Interface.
         */
        if (intf->cur_altsetting->desc.bAlternateSetting == 0) {
                /* Already in altsetting 0 so skip Set-Interface.
                 * Just re-enable it without affecting the endpoint toggles.
                 */
                usb_enable_interface(udev, intf, false);
        } else if (!error && !intf->dev.power.is_prepared) {
                r = usb_set_interface(udev, intf->altsetting[0].
                                desc.bInterfaceNumber, 0);
                if (r < 0)
                        intf->needs_altsetting0 = 1;
        } else {
                intf->needs_altsetting0 = 1;
        }
        usb_set_intfdata(intf, NULL);

        intf->condition = USB_INTERFACE_UNBOUND;
        intf->needs_remote_wakeup = 0;

        /* Attempt to re-enable USB3 LPM, if the disable succeeded. */
        if (!lpm_disable_error)
                usb_unlocked_enable_lpm(udev);

        /* Unbound interfaces are always runtime-PM-disabled and -suspended */
        if (driver->supports_autosuspend)
                pm_runtime_disable(dev);
        pm_runtime_set_suspended(dev);

        if (!error)
                usb_autosuspend_device(udev);

        return 0;
}

/**
 * usb_driver_claim_interface - bind a driver to an interface
 * @driver: the driver to be bound
 * @iface: the interface to which it will be bound; must be in the
 *        usb device's active configuration
 * @data: driver data associated with that interface
 *
 * This is used by usb device drivers that need to claim more than one
 * interface on a device when probing (audio and acm are current examples).
 * No device driver should directly modify internal usb_interface or
 * usb_device structure members.
 *
 * Callers must own the device lock, so driver probe() entries don't need
 * extra locking, but other call contexts may need to explicitly claim that
 * lock.
 *
 * Return: 0 on success.
 */
int usb_driver_claim_interface(struct usb_driver *driver,
                                struct usb_interface *iface, void *data)
{
        struct device *dev;
        int retval = 0;

        if (!iface)
                return -ENODEV;

        dev = &iface->dev;
        if (dev->driver)
                return -EBUSY;

        /* reject claim if interface is not authorized */
        if (!iface->authorized)
                return -ENODEV;

        dev->driver = &driver->driver;
        usb_set_intfdata(iface, data);
        iface->needs_binding = 0;

        iface->condition = USB_INTERFACE_BOUND;

        /* Claimed interfaces are initially inactive (suspended) and
         * runtime-PM-enabled, but only if the driver has autosuspend
         * support.  Otherwise they are marked active, to prevent the
         * device from being autosuspended, but left disabled.  In either
         * case they are sensitive to their children's power states.
         */
        pm_suspend_ignore_children(dev, false);
        if (driver->supports_autosuspend)
                pm_runtime_enable(dev);
        else
                pm_runtime_set_active(dev);

        /* if interface was already added, bind now; else let
         * the future device_add() bind it, bypassing probe()
         */
        if (device_is_registered(dev))
                retval = device_bind_driver(dev);

        if (retval) {
                dev->driver = NULL;
                usb_set_intfdata(iface, NULL);
                iface->needs_remote_wakeup = 0;
                iface->condition = USB_INTERFACE_UNBOUND;

                /*
                 * Unbound interfaces are always runtime-PM-disabled
                 * and runtime-PM-suspended
                 */
                if (driver->supports_autosuspend)
                        pm_runtime_disable(dev);
                pm_runtime_set_suspended(dev);
        }

        return retval;
}
EXPORT_SYMBOL_GPL(usb_driver_claim_interface);

/**
 * usb_driver_release_interface - unbind a driver from an interface
 * @driver: the driver to be unbound
 * @iface: the interface from which it will be unbound
 *
 * This can be used by drivers to release an interface without waiting
 * for their disconnect() methods to be called.  In typical cases this
 * also causes the driver disconnect() method to be called.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 * Callers must own the device lock, so driver disconnect() entries don't
 * need extra locking, but other call contexts may need to explicitly claim
 * that lock.
 */
void usb_driver_release_interface(struct usb_driver *driver,
                                        struct usb_interface *iface)
{
        struct device *dev = &iface->dev;

        /* this should never happen, don't release something that's not ours */
        if (!dev->driver || dev->driver != &driver->driver)
                return;

        /* don't release from within disconnect() */
        if (iface->condition != USB_INTERFACE_BOUND)
                return;
        iface->condition = USB_INTERFACE_UNBINDING;

        /* Release via the driver core only if the interface
         * has already been registered
         */
        if (device_is_registered(dev)) {
                device_release_driver(dev);
        } else {
                device_lock(dev);
                usb_unbind_interface(dev);
                dev->driver = NULL;
                device_unlock(dev);
        }
}
EXPORT_SYMBOL_GPL(usb_driver_release_interface);

/* returns 0 if no match, 1 if match */
int usb_match_device(struct usb_device *dev, const struct usb_device_id *id)
{
        if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) &&
            id->idVendor != le16_to_cpu(dev->descriptor.idVendor))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) &&
            id->idProduct != le16_to_cpu(dev->descriptor.idProduct))
                return 0;

        /* No need to test id->bcdDevice_lo != 0, since 0 is never
           greater than any unsigned number. */
        if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) &&
            (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice)))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) &&
            (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice)))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) &&
            (id->bDeviceClass != dev->descriptor.bDeviceClass))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) &&
            (id->bDeviceSubClass != dev->descriptor.bDeviceSubClass))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) &&
            (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol))
                return 0;

        return 1;
}

/* returns 0 if no match, 1 if match */
int usb_match_one_id_intf(struct usb_device *dev,
                          struct usb_host_interface *intf,
                          const struct usb_device_id *id)
{
        /* The interface class, subclass, protocol and number should never be
         * checked for a match if the device class is Vendor Specific,
         * unless the match record specifies the Vendor ID. */
        if (dev->descriptor.bDeviceClass == USB_CLASS_VENDOR_SPEC &&
                        !(id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) &&
                        (id->match_flags & (USB_DEVICE_ID_MATCH_INT_CLASS |
                                USB_DEVICE_ID_MATCH_INT_SUBCLASS |
                                USB_DEVICE_ID_MATCH_INT_PROTOCOL |
                                USB_DEVICE_ID_MATCH_INT_NUMBER)))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) &&
            (id->bInterfaceClass != intf->desc.bInterfaceClass))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) &&
            (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) &&
            (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol))
                return 0;

        if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER) &&
            (id->bInterfaceNumber != intf->desc.bInterfaceNumber))
                return 0;

        return 1;
}

/* returns 0 if no match, 1 if match */
int usb_match_one_id(struct usb_interface *interface,
                     const struct usb_device_id *id)
{
        struct usb_host_interface *intf;
        struct usb_device *dev;

        /* proc_connectinfo in devio.c may call us with id == NULL. */
        if (id == NULL)
                return 0;

        intf = interface->cur_altsetting;
        dev = interface_to_usbdev(interface);

        if (!usb_match_device(dev, id))
                return 0;

        return usb_match_one_id_intf(dev, intf, id);
}
EXPORT_SYMBOL_GPL(usb_match_one_id);

/**
 * usb_match_id - find first usb_device_id matching device or interface
 * @interface: the interface of interest
 * @id: array of usb_device_id structures, terminated by zero entry
 *
 * usb_match_id searches an array of usb_device_id's and returns
 * the first one matching the device or interface, or null.
 * This is used when binding (or rebinding) a driver to an interface.
 * Most USB device drivers will use this indirectly, through the usb core,
 * but some layered driver frameworks use it directly.
 * These device tables are exported with MODULE_DEVICE_TABLE, through
 * modutils, to support the driver loading functionality of USB hotplugging.
 *
 * Return: The first matching usb_device_id, or %NULL.
 *
 * What Matches:
 *
 * The "match_flags" element in a usb_device_id controls which
 * members are used.  If the corresponding bit is set, the
 * value in the device_id must match its corresponding member
 * in the device or interface descriptor, or else the device_id
 * does not match.
 *
 * "driver_info" is normally used only by device drivers,
 * but you can create a wildcard "matches anything" usb_device_id
 * as a driver's "modules.usbmap" entry if you provide an id with
 * only a nonzero "driver_info" field.  If you do this, the USB device
 * driver's probe() routine should use additional intelligence to
 * decide whether to bind to the specified interface.
 *
 * What Makes Good usb_device_id Tables:
 *
 * The match algorithm is very simple, so that intelligence in
 * driver selection must come from smart driver id records.
 * Unless you have good reasons to use another selection policy,
 * provide match elements only in related groups, and order match
 * specifiers from specific to general.  Use the macros provided
 * for that purpose if you can.
 *
 * The most specific match specifiers use device descriptor
 * data.  These are commonly used with product-specific matches;
 * the USB_DEVICE macro lets you provide vendor and product IDs,
 * and you can also match against ranges of product revisions.
 * These are widely used for devices with application or vendor
 * specific bDeviceClass values.
 *
 * Matches based on device class/subclass/protocol specifications
 * are slightly more general; use the USB_DEVICE_INFO macro, or
 * its siblings.  These are used with single-function devices
 * where bDeviceClass doesn't specify that each interface has
 * its own class.
 *
 * Matches based on interface class/subclass/protocol are the
 * most general; they let drivers bind to any interface on a
 * multiple-function device.  Use the USB_INTERFACE_INFO
 * macro, or its siblings, to match class-per-interface style
 * devices (as recorded in bInterfaceClass).
 *
 * Note that an entry created by USB_INTERFACE_INFO won't match
 * any interface if the device class is set to Vendor-Specific.
 * This is deliberate; according to the USB spec the meanings of
 * the interface class/subclass/protocol for these devices are also
 * vendor-specific, and hence matching against a standard product
 * class wouldn't work anyway.  If you really want to use an
 * interface-based match for such a device, create a match record
 * that also specifies the vendor ID.  (Unforunately there isn't a
 * standard macro for creating records like this.)
 *
 * Within those groups, remember that not all combinations are
 * meaningful.  For example, don't give a product version range
 * without vendor and product IDs; or specify a protocol without
 * its associated class and subclass.
 */
const struct usb_device_id *usb_match_id(struct usb_interface *interface,
                                         const struct usb_device_id *id)
{
        /* proc_connectinfo in devio.c may call us with id == NULL. */
        if (id == NULL)
                return NULL;

        /* It is important to check that id->driver_info is nonzero,
           since an entry that is all zeroes except for a nonzero
           id->driver_info is the way to create an entry that
           indicates that the driver want to examine every
           device and interface. */
        for (; id->idVendor || id->idProduct || id->bDeviceClass ||
               id->bInterfaceClass || id->driver_info; id++) {
                if (usb_match_one_id(interface, id))
                        return id;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_match_id);

const struct usb_device_id *usb_device_match_id(struct usb_device *udev,
                                const struct usb_device_id *id)
{
        if (!id)
                return NULL;

        for (; id->idVendor || id->idProduct ; id++) {
                if (usb_match_device(udev, id))
                        return id;
        }

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_device_match_id);

bool usb_driver_applicable(struct usb_device *udev,
                           struct usb_device_driver *udrv)
{
        if (udrv->id_table && udrv->match)
                return usb_device_match_id(udev, udrv->id_table) != NULL &&
                       udrv->match(udev);

        if (udrv->id_table)
                return usb_device_match_id(udev, udrv->id_table) != NULL;

        if (udrv->match)
                return udrv->match(udev);

        return false;
}

static int usb_device_match(struct device *dev, struct device_driver *drv)
{
        /* devices and interfaces are handled separately */
        if (is_usb_device(dev)) {
                struct usb_device *udev;
                struct usb_device_driver *udrv;

                /* interface drivers never match devices */
                if (!is_usb_device_driver(drv))
                        return 0;

                udev = to_usb_device(dev);
                udrv = to_usb_device_driver(drv);

                /* If the device driver under consideration does not have a
                 * id_table or a match function, then let the driver's probe
                 * function decide.
                 */
                if (!udrv->id_table && !udrv->match)
                        return 1;

                return usb_driver_applicable(udev, udrv);

        } else if (is_usb_interface(dev)) {
                struct usb_interface *intf;
                struct usb_driver *usb_drv;
                const struct usb_device_id *id;

                /* device drivers never match interfaces */
                if (is_usb_device_driver(drv))
                        return 0;

                intf = to_usb_interface(dev);
                usb_drv = to_usb_driver(drv);

                id = usb_match_id(intf, usb_drv->id_table);
                if (id)
                        return 1;

                id = usb_match_dynamic_id(intf, usb_drv);
                if (id)
                        return 1;
        }

        return 0;
}

static int usb_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct usb_device *usb_dev;

        if (is_usb_device(dev)) {
                usb_dev = to_usb_device(dev);
        } else if (is_usb_interface(dev)) {
                const struct usb_interface *intf = to_usb_interface(dev);

                usb_dev = interface_to_usbdev(intf);
        } else {
                return 0;
        }

        if (usb_dev->devnum < 0) {
                /* driver is often null here; dev_dbg() would oops */
                pr_debug("usb %s: already deleted?\n", dev_name(dev));
                return -ENODEV;
        }
        if (!usb_dev->bus) {
                pr_debug("usb %s: bus removed?\n", dev_name(dev));
                return -ENODEV;
        }

        /* per-device configurations are common */
        if (add_uevent_var(env, "PRODUCT=%x/%x/%x",
                           le16_to_cpu(usb_dev->descriptor.idVendor),
                           le16_to_cpu(usb_dev->descriptor.idProduct),
                           le16_to_cpu(usb_dev->descriptor.bcdDevice)))
                return -ENOMEM;

        /* class-based driver binding models */
        if (add_uevent_var(env, "TYPE=%d/%d/%d",
                           usb_dev->descriptor.bDeviceClass,
                           usb_dev->descriptor.bDeviceSubClass,
                           usb_dev->descriptor.bDeviceProtocol))
                return -ENOMEM;

        return 0;
}

static int __usb_bus_reprobe_drivers(struct device *dev, void *data)
{
        struct usb_device_driver *new_udriver = data;
        struct usb_device *udev;
        int ret;

        /* Don't reprobe if current driver isn't usb_generic_driver */
        if (dev->driver != &usb_generic_driver.driver)
                return 0;

        udev = to_usb_device(dev);
        if (!usb_driver_applicable(udev, new_udriver))
                return 0;

        ret = device_reprobe(dev);
        if (ret && ret != -EPROBE_DEFER)
                dev_err(dev, "Failed to reprobe device (error %d)\n", ret);

        return 0;
}

bool is_usb_device_driver(const struct device_driver *drv)
{
        return drv->probe == usb_probe_device;
}

/**
 * usb_register_device_driver - register a USB device (not interface) driver
 * @new_udriver: USB operations for the device driver
 * @owner: module owner of this driver.
 *
 * Registers a USB device driver with the USB core.  The list of
 * unattached devices will be rescanned whenever a new driver is
 * added, allowing the new driver to attach to any recognized devices.
 *
 * Return: A negative error code on failure and 0 on success.
 */
int usb_register_device_driver(struct usb_device_driver *new_udriver,
                struct module *owner)
{
        int retval = 0;

        if (usb_disabled())
                return -ENODEV;

        new_udriver->driver.name = new_udriver->name;
        new_udriver->driver.bus = &usb_bus_type;
        new_udriver->driver.probe = usb_probe_device;
        new_udriver->driver.remove = usb_unbind_device;
        new_udriver->driver.owner = owner;
        new_udriver->driver.dev_groups = new_udriver->dev_groups;

        retval = driver_register(&new_udriver->driver);

        if (!retval) {
                pr_info("%s: registered new device driver %s\n",
                        usbcore_name, new_udriver->name);
                /*
                 * Check whether any device could be better served with
                 * this new driver
                 */
                bus_for_each_dev(&usb_bus_type, NULL, new_udriver,
                                 __usb_bus_reprobe_drivers);
        } else {
                pr_err("%s: error %d registering device driver %s\n",
                        usbcore_name, retval, new_udriver->name);
        }

        return retval;
}
EXPORT_SYMBOL_GPL(usb_register_device_driver);

/**
 * usb_deregister_device_driver - unregister a USB device (not interface) driver
 * @udriver: USB operations of the device driver to unregister
 * Context: must be able to sleep
 *
 * Unlinks the specified driver from the internal USB driver list.
 */
void usb_deregister_device_driver(struct usb_device_driver *udriver)
{
        pr_info("%s: deregistering device driver %s\n",
                        usbcore_name, udriver->name);

        driver_unregister(&udriver->driver);
}
EXPORT_SYMBOL_GPL(usb_deregister_device_driver);

/**
 * usb_register_driver - register a USB interface driver
 * @new_driver: USB operations for the interface driver
 * @owner: module owner of this driver.
 * @mod_name: module name string
 *
 * Registers a USB interface driver with the USB core.  The list of
 * unattached interfaces will be rescanned whenever a new driver is
 * added, allowing the new driver to attach to any recognized interfaces.
 *
 * Return: A negative error code on failure and 0 on success.
 *
 * NOTE: if you want your driver to use the USB major number, you must call
 * usb_register_dev() to enable that functionality.  This function no longer
 * takes care of that.
 */
int usb_register_driver(struct usb_driver *new_driver, struct module *owner,
                        const char *mod_name)
{
        int retval = 0;

        if (usb_disabled())
                return -ENODEV;

        new_driver->driver.name = new_driver->name;
        new_driver->driver.bus = &usb_bus_type;
        new_driver->driver.probe = usb_probe_interface;
        new_driver->driver.remove = usb_unbind_interface;
        new_driver->driver.owner = owner;
        new_driver->driver.mod_name = mod_name;
        new_driver->driver.dev_groups = new_driver->dev_groups;
        spin_lock_init(&new_driver->dynids.lock);
        INIT_LIST_HEAD(&new_driver->dynids.list);

        retval = driver_register(&new_driver->driver);
        if (retval)
                goto out;

        retval = usb_create_newid_files(new_driver);
        if (retval)
                goto out_newid;

        pr_info("%s: registered new interface driver %s\n",
                        usbcore_name, new_driver->name);

out:
        return retval;

out_newid:
        driver_unregister(&new_driver->driver);

        pr_err("%s: error %d registering interface driver %s\n",
                usbcore_name, retval, new_driver->name);
        goto out;
}
EXPORT_SYMBOL_GPL(usb_register_driver);

/**
 * usb_deregister - unregister a USB interface driver
 * @driver: USB operations of the interface driver to unregister
 * Context: must be able to sleep
 *
 * Unlinks the specified driver from the internal USB driver list.
 *
 * NOTE: If you called usb_register_dev(), you still need to call
 * usb_deregister_dev() to clean up your driver's allocated minor numbers,
 * this * call will no longer do it for you.
 */
void usb_deregister(struct usb_driver *driver)
{
        pr_info("%s: deregistering interface driver %s\n",
                        usbcore_name, driver->name);

        usb_remove_newid_files(driver);
        driver_unregister(&driver->driver);
        usb_free_dynids(driver);
}
EXPORT_SYMBOL_GPL(usb_deregister);

/* Forced unbinding of a USB interface driver, either because
 * it doesn't support pre_reset/post_reset/reset_resume or
 * because it doesn't support suspend/resume.
 *
 * The caller must hold @intf's device's lock, but not @intf's lock.
 */
void usb_forced_unbind_intf(struct usb_interface *intf)
{
        struct usb_driver *driver = to_usb_driver(intf->dev.driver);

        dev_dbg(&intf->dev, "forced unbind\n");
        usb_driver_release_interface(driver, intf);

        /* Mark the interface for later rebinding */
        intf->needs_binding = 1;
}

/*
 * Unbind drivers for @udev's marked interfaces.  These interfaces have
 * the needs_binding flag set, for example by usb_resume_interface().
 *
 * The caller must hold @udev's device lock.
 */
static void unbind_marked_interfaces(struct usb_device *udev)
{
        struct usb_host_config        *config;
        int                        i;
        struct usb_interface        *intf;

        config = udev->actconfig;
        if (config) {
                for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                        intf = config->interface[i];
                        if (intf->dev.driver && intf->needs_binding)
                                usb_forced_unbind_intf(intf);
                }
        }
}

/* Delayed forced unbinding of a USB interface driver and scan
 * for rebinding.
 *
 * The caller must hold @intf's device's lock, but not @intf's lock.
 *
 * Note: Rebinds will be skipped if a system sleep transition is in
 * progress and the PM "complete" callback hasn't occurred yet.
 */
static void usb_rebind_intf(struct usb_interface *intf)
{
        int rc;

        /* Delayed unbind of an existing driver */
        if (intf->dev.driver)
                usb_forced_unbind_intf(intf);

        /* Try to rebind the interface */
        if (!intf->dev.power.is_prepared) {
                intf->needs_binding = 0;
                rc = device_attach(&intf->dev);
                if (rc < 0 && rc != -EPROBE_DEFER)
                        dev_warn(&intf->dev, "rebind failed: %d\n", rc);
        }
}

/*
 * Rebind drivers to @udev's marked interfaces.  These interfaces have
 * the needs_binding flag set.
 *
 * The caller must hold @udev's device lock.
 */
static void rebind_marked_interfaces(struct usb_device *udev)
{
        struct usb_host_config        *config;
        int                        i;
        struct usb_interface        *intf;

        config = udev->actconfig;
        if (config) {
                for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                        intf = config->interface[i];
                        if (intf->needs_binding)
                                usb_rebind_intf(intf);
                }
        }
}

/*
 * Unbind all of @udev's marked interfaces and then rebind all of them.
 * This ordering is necessary because some drivers claim several interfaces
 * when they are first probed.
 *
 * The caller must hold @udev's device lock.
 */
void usb_unbind_and_rebind_marked_interfaces(struct usb_device *udev)
{
        unbind_marked_interfaces(udev);
        rebind_marked_interfaces(udev);
}

#ifdef CONFIG_PM

/* Unbind drivers for @udev's interfaces that don't support suspend/resume
 * There is no check for reset_resume here because it can be determined
 * only during resume whether reset_resume is needed.
 *
 * The caller must hold @udev's device lock.
 */
static void unbind_no_pm_drivers_interfaces(struct usb_device *udev)
{
        struct usb_host_config        *config;
        int                        i;
        struct usb_interface        *intf;
        struct usb_driver        *drv;

        config = udev->actconfig;
        if (config) {
                for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                        intf = config->interface[i];

                        if (intf->dev.driver) {
                                drv = to_usb_driver(intf->dev.driver);
                                if (!drv->suspend || !drv->resume)
                                        usb_forced_unbind_intf(intf);
                        }
                }
        }
}

static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
{
        struct usb_device_driver        *udriver;
        int                                status = 0;

        if (udev->state == USB_STATE_NOTATTACHED ||
                        udev->state == USB_STATE_SUSPENDED)
                goto done;

        /* For devices that don't have a driver, we do a generic suspend. */
        if (udev->dev.driver)
                udriver = to_usb_device_driver(udev->dev.driver);
        else {
                udev->do_remote_wakeup = 0;
                udriver = &usb_generic_driver;
        }
        if (udriver->suspend)
                status = udriver->suspend(udev, msg);
        if (status == 0 && udriver->generic_subclass)
                status = usb_generic_driver_suspend(udev, msg);

 done:
        dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
        return status;
}

static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
{
        struct usb_device_driver        *udriver;
        int                                status = 0;

        if (udev->state == USB_STATE_NOTATTACHED)
                goto done;

        /* Can't resume it if it doesn't have a driver. */
        if (udev->dev.driver == NULL) {
                status = -ENOTCONN;
                goto done;
        }

        /* Non-root devices on a full/low-speed bus must wait for their
         * companion high-speed root hub, in case a handoff is needed.
         */
        if (!PMSG_IS_AUTO(msg) && udev->parent && udev->bus->hs_companion)
                device_pm_wait_for_dev(&udev->dev,
                                &udev->bus->hs_companion->root_hub->dev);

        if (udev->quirks & USB_QUIRK_RESET_RESUME)
                udev->reset_resume = 1;

        udriver = to_usb_device_driver(udev->dev.driver);
        if (udriver->generic_subclass)
                status = usb_generic_driver_resume(udev, msg);
        if (status == 0 && udriver->resume)
                status = udriver->resume(udev, msg);

 done:
        dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
        return status;
}

static int usb_suspend_interface(struct usb_device *udev,
                struct usb_interface *intf, pm_message_t msg)
{
        struct usb_driver        *driver;
        int                        status = 0;

        if (udev->state == USB_STATE_NOTATTACHED ||
                        intf->condition == USB_INTERFACE_UNBOUND)
                goto done;
        driver = to_usb_driver(intf->dev.driver);

        /* at this time we know the driver supports suspend */
        status = driver->suspend(intf, msg);
        if (status && !PMSG_IS_AUTO(msg))
                dev_err(&intf->dev, "suspend error %d\n", status);

 done:
        dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status);
        return status;
}

static int usb_resume_interface(struct usb_device *udev,
                struct usb_interface *intf, pm_message_t msg, int reset_resume)
{
        struct usb_driver        *driver;
        int                        status = 0;

        if (udev->state == USB_STATE_NOTATTACHED)
                goto done;

        /* Don't let autoresume interfere with unbinding */
        if (intf->condition == USB_INTERFACE_UNBINDING)
                goto done;

        /* Can't resume it if it doesn't have a driver. */
        if (intf->condition == USB_INTERFACE_UNBOUND) {

                /* Carry out a deferred switch to altsetting 0 */
                if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) {
                        usb_set_interface(udev, intf->altsetting[0].
                                        desc.bInterfaceNumber, 0);
                        intf->needs_altsetting0 = 0;
                }
                goto done;
        }

        /* Don't resume if the interface is marked for rebinding */
        if (intf->needs_binding)
                goto done;
        driver = to_usb_driver(intf->dev.driver);

        if (reset_resume) {
                if (driver->reset_resume) {
                        status = driver->reset_resume(intf);
                        if (status)
                                dev_err(&intf->dev, "%s error %d\n",
                                                "reset_resume", status);
                } else {
                        intf->needs_binding = 1;
                        dev_dbg(&intf->dev, "no reset_resume for driver %s?\n",
                                        driver->name);
                }
        } else {
                status = driver->resume(intf);
                if (status)
                        dev_err(&intf->dev, "resume error %d\n", status);
        }

done:
        dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status);

        /* Later we will unbind the driver and/or reprobe, if necessary */
        return status;
}

/**
 * usb_suspend_both - suspend a USB device and its interfaces
 * @udev: the usb_device to suspend
 * @msg: Power Management message describing this state transition
 *
 * This is the central routine for suspending USB devices.  It calls the
 * suspend methods for all the interface drivers in @udev and then calls
 * the suspend method for @udev itself.  When the routine is called in
 * autosuspend, if an error occurs at any stage, all the interfaces
 * which were suspended are resumed so that they remain in the same
 * state as the device, but when called from system sleep, all error
 * from suspend methods of interfaces and the non-root-hub device itself
 * are simply ignored, so all suspended interfaces are only resumed
 * to the device's state when @udev is root-hub and its suspend method
 * returns failure.
 *
 * Autosuspend requests originating from a child device or an interface
 * driver may be made without the protection of @udev's device lock, but
 * all other suspend calls will hold the lock.  Usbcore will insure that
 * method calls do not arrive during bind, unbind, or reset operations.
 * However drivers must be prepared to handle suspend calls arriving at
 * unpredictable times.
 *
 * This routine can run only in process context.
 *
 * Return: 0 if the suspend succeeded.
 */
static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
{
        int                        status = 0;
        int                        i = 0, n = 0;
        struct usb_interface        *intf;

        if (udev->state == USB_STATE_NOTATTACHED ||
                        udev->state == USB_STATE_SUSPENDED)
                goto done;

        /* Suspend all the interfaces and then udev itself */
        if (udev->actconfig) {
                n = udev->actconfig->desc.bNumInterfaces;
                for (i = n - 1; i >= 0; --i) {
                        intf = udev->actconfig->interface[i];
                        status = usb_suspend_interface(udev, intf, msg);

                        /* Ignore errors during system sleep transitions */
                        if (!PMSG_IS_AUTO(msg))
                                status = 0;
                        if (status != 0)
                                break;
                }
        }
        if (status == 0) {
                status = usb_suspend_device(udev, msg);

                /*
                 * Ignore errors from non-root-hub devices during
                 * system sleep transitions.  For the most part,
                 * these devices should go to low power anyway when
                 * the entire bus is suspended.
                 */
                if (udev->parent && !PMSG_IS_AUTO(msg))
                        status = 0;

                /*
                 * If the device is inaccessible, don't try to resume
                 * suspended interfaces and just return the error.
                 */
                if (status && status != -EBUSY) {
                        int err;
                        u16 devstat;

                        err = usb_get_std_status(udev, USB_RECIP_DEVICE, 0,
                                                 &devstat);
                        if (err) {
                                dev_err(&udev->dev,
                                        "Failed to suspend device, error %d\n",
                                        status);
                                goto done;
                        }
                }
        }

        /* If the suspend failed, resume interfaces that did get suspended */
        if (status != 0) {
                if (udev->actconfig) {
                        msg.event ^= (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
                        while (++i < n) {
                                intf = udev->actconfig->interface[i];
                                usb_resume_interface(udev, intf, msg, 0);
                        }
                }

        /* If the suspend succeeded then prevent any more URB submissions
         * and flush any outstanding URBs.
         */
        } else {
                udev->can_submit = 0;
                for (i = 0; i < 16; ++i) {
                        usb_hcd_flush_endpoint(udev, udev->ep_out[i]);
                        usb_hcd_flush_endpoint(udev, udev->ep_in[i]);
                }
        }

 done:
        dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
        return status;
}

/**
 * usb_resume_both - resume a USB device and its interfaces
 * @udev: the usb_device to resume
 * @msg: Power Management message describing this state transition
 *
 * This is the central routine for resuming USB devices.  It calls the
 * resume method for @udev and then calls the resume methods for all
 * the interface drivers in @udev.
 *
 * Autoresume requests originating from a child device or an interface
 * driver may be made without the protection of @udev's device lock, but
 * all other resume calls will hold the lock.  Usbcore will insure that
 * method calls do not arrive during bind, unbind, or reset operations.
 * However drivers must be prepared to handle resume calls arriving at
 * unpredictable times.
 *
 * This routine can run only in process context.
 *
 * Return: 0 on success.
 */
static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
{
        int                        status = 0;
        int                        i;
        struct usb_interface        *intf;

        if (udev->state == USB_STATE_NOTATTACHED) {
                status = -ENODEV;
                goto done;
        }
        udev->can_submit = 1;

        /* Resume the device */
        if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume)
                status = usb_resume_device(udev, msg);

        /* Resume the interfaces */
        if (status == 0 && udev->actconfig) {
                for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
                        intf = udev->actconfig->interface[i];
                        usb_resume_interface(udev, intf, msg,
                                        udev->reset_resume);
                }
        }
        usb_mark_last_busy(udev);

 done:
        dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
        if (!status)
                udev->reset_resume = 0;
        return status;
}

static void choose_wakeup(struct usb_device *udev, pm_message_t msg)
{
        int        w;

        /*
         * For FREEZE/QUIESCE, disable remote wakeups so no interrupts get
         * generated.
         */
        if (msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_QUIESCE) {
                w = 0;

        } else {
                /*
                 * Enable remote wakeup if it is allowed, even if no interface
                 * drivers actually want it.
                 */
                w = device_may_wakeup(&udev->dev);
        }

        /*
         * If the device is autosuspended with the wrong wakeup setting,
         * autoresume now so the setting can be changed.
         */
        if (udev->state == USB_STATE_SUSPENDED && w != udev->do_remote_wakeup)
                pm_runtime_resume(&udev->dev);
        udev->do_remote_wakeup = w;
}

/* The device lock is held by the PM core */
int usb_suspend(struct device *dev, pm_message_t msg)
{
        struct usb_device        *udev = to_usb_device(dev);
        int r;

        unbind_no_pm_drivers_interfaces(udev);

        /* From now on we are sure all drivers support suspend/resume
         * but not necessarily reset_resume()
         * so we may still need to unbind and rebind upon resume
         */
        choose_wakeup(udev, msg);
        r = usb_suspend_both(udev, msg);
        if (r)
                return r;

        if (udev->quirks & USB_QUIRK_DISCONNECT_SUSPEND)
                usb_port_disable(udev);

        return 0;
}

/* The device lock is held by the PM core */
int usb_resume_complete(struct device *dev)
{
        struct usb_device *udev = to_usb_device(dev);

        /* For PM complete calls, all we do is rebind interfaces
         * whose needs_binding flag is set
         */
        if (udev->state != USB_STATE_NOTATTACHED)
                rebind_marked_interfaces(udev);
        return 0;
}

/* The device lock is held by the PM core */
int usb_resume(struct device *dev, pm_message_t msg)
{
        struct usb_device        *udev = to_usb_device(dev);
        int                        status;

        /* For all calls, take the device back to full power and
         * tell the PM core in case it was autosuspended previously.
         * Unbind the interfaces that will need rebinding later,
         * because they fail to support reset_resume.
         * (This can't be done in usb_resume_interface()
         * above because it doesn't own the right set of locks.)
         */
        status = usb_resume_both(udev, msg);
        if (status == 0) {
                pm_runtime_disable(dev);
                pm_runtime_set_active(dev);
                pm_runtime_enable(dev);
                unbind_marked_interfaces(udev);
        }

        /* Avoid PM error messages for devices disconnected while suspended
         * as we'll display regular disconnect messages just a bit later.
         */
        if (status == -ENODEV || status == -ESHUTDOWN)
                status = 0;
        return status;
}

/**
 * usb_enable_autosuspend - allow a USB device to be autosuspended
 * @udev: the USB device which may be autosuspended
 *
 * This routine allows @udev to be autosuspended.  An autosuspend won't
 * take place until the autosuspend_delay has elapsed and all the other
 * necessary conditions are satisfied.
 *
 * The caller must hold @udev's device lock.
 */
void usb_enable_autosuspend(struct usb_device *udev)
{
        pm_runtime_allow(&udev->dev);
}
EXPORT_SYMBOL_GPL(usb_enable_autosuspend);

/**
 * usb_disable_autosuspend - prevent a USB device from being autosuspended
 * @udev: the USB device which may not be autosuspended
 *
 * This routine prevents @udev from being autosuspended and wakes it up
 * if it is already autosuspended.
 *
 * The caller must hold @udev's device lock.
 */
void usb_disable_autosuspend(struct usb_device *udev)
{
        pm_runtime_forbid(&udev->dev);
}
EXPORT_SYMBOL_GPL(usb_disable_autosuspend);

/**
 * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces
 * @udev: the usb_device to autosuspend
 *
 * This routine should be called when a core subsystem is finished using
 * @udev and wants to allow it to autosuspend.  Examples would be when
 * @udev's device file in usbfs is closed or after a configuration change.
 *
 * @udev's usage counter is decremented; if it drops to 0 and all the
 * interfaces are inactive then a delayed autosuspend will be attempted.
 * The attempt may fail (see autosuspend_check()).
 *
 * The caller must hold @udev's device lock.
 *
 * This routine can run only in process context.
 */
void usb_autosuspend_device(struct usb_device *udev)
{
        int        status;

        usb_mark_last_busy(udev);
        status = pm_runtime_put_sync_autosuspend(&udev->dev);
        dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&udev->dev.power.usage_count),
                        status);
}

/**
 * usb_autoresume_device - immediately autoresume a USB device and its interfaces
 * @udev: the usb_device to autoresume
 *
 * This routine should be called when a core subsystem wants to use @udev
 * and needs to guarantee that it is not suspended.  No autosuspend will
 * occur until usb_autosuspend_device() is called.  (Note that this will
 * not prevent suspend events originating in the PM core.)  Examples would
 * be when @udev's device file in usbfs is opened or when a remote-wakeup
 * request is received.
 *
 * @udev's usage counter is incremented to prevent subsequent autosuspends.
 * However if the autoresume fails then the usage counter is re-decremented.
 *
 * The caller must hold @udev's device lock.
 *
 * This routine can run only in process context.
 *
 * Return: 0 on success. A negative error code otherwise.
 */
int usb_autoresume_device(struct usb_device *udev)
{
        int        status;

        status = pm_runtime_resume_and_get(&udev->dev);
        dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&udev->dev.power.usage_count),
                        status);
        if (status > 0)
                status = 0;
        return status;
}

/**
 * usb_autopm_put_interface - decrement a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be decremented
 *
 * This routine should be called by an interface driver when it is
 * finished using @intf and wants to allow it to autosuspend.  A typical
 * example would be a character-device driver when its device file is
 * closed.
 *
 * The routine decrements @intf's usage counter.  When the counter reaches
 * 0, a delayed autosuspend request for @intf's device is attempted.  The
 * attempt may fail (see autosuspend_check()).
 *
 * This routine can run only in process context.
 */
void usb_autopm_put_interface(struct usb_interface *intf)
{
        struct usb_device        *udev = interface_to_usbdev(intf);
        int                        status;

        usb_mark_last_busy(udev);
        status = pm_runtime_put_sync(&intf->dev);
        dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&intf->dev.power.usage_count),
                        status);
}
EXPORT_SYMBOL_GPL(usb_autopm_put_interface);

/**
 * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be decremented
 *
 * This routine does much the same thing as usb_autopm_put_interface():
 * It decrements @intf's usage counter and schedules a delayed
 * autosuspend request if the counter is <= 0.  The difference is that it
 * does not perform any synchronization; callers should hold a private
 * lock and handle all synchronization issues themselves.
 *
 * Typically a driver would call this routine during an URB's completion
 * handler, if no more URBs were pending.
 *
 * This routine can run in atomic context.
 */
void usb_autopm_put_interface_async(struct usb_interface *intf)
{
        struct usb_device        *udev = interface_to_usbdev(intf);
        int                        status;

        usb_mark_last_busy(udev);
        status = pm_runtime_put(&intf->dev);
        dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&intf->dev.power.usage_count),
                        status);
}
EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);

/**
 * usb_autopm_put_interface_no_suspend - decrement a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be decremented
 *
 * This routine decrements @intf's usage counter but does not carry out an
 * autosuspend.
 *
 * This routine can run in atomic context.
 */
void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
{
        struct usb_device        *udev = interface_to_usbdev(intf);

        usb_mark_last_busy(udev);
        pm_runtime_put_noidle(&intf->dev);
}
EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend);

/**
 * usb_autopm_get_interface - increment a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be incremented
 *
 * This routine should be called by an interface driver when it wants to
 * use @intf and needs to guarantee that it is not suspended.  In addition,
 * the routine prevents @intf from being autosuspended subsequently.  (Note
 * that this will not prevent suspend events originating in the PM core.)
 * This prevention will persist until usb_autopm_put_interface() is called
 * or @intf is unbound.  A typical example would be a character-device
 * driver when its device file is opened.
 *
 * @intf's usage counter is incremented to prevent subsequent autosuspends.
 * However if the autoresume fails then the counter is re-decremented.
 *
 * This routine can run only in process context.
 *
 * Return: 0 on success.
 */
int usb_autopm_get_interface(struct usb_interface *intf)
{
        int        status;

        status = pm_runtime_resume_and_get(&intf->dev);
        dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&intf->dev.power.usage_count),
                        status);
        if (status > 0)
                status = 0;
        return status;
}
EXPORT_SYMBOL_GPL(usb_autopm_get_interface);

/**
 * usb_autopm_get_interface_async - increment a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be incremented
 *
 * This routine does much the same thing as
 * usb_autopm_get_interface(): It increments @intf's usage counter and
 * queues an autoresume request if the device is suspended.  The
 * differences are that it does not perform any synchronization (callers
 * should hold a private lock and handle all synchronization issues
 * themselves), and it does not autoresume the device directly (it only
 * queues a request).  After a successful call, the device may not yet be
 * resumed.
 *
 * This routine can run in atomic context.
 *
 * Return: 0 on success. A negative error code otherwise.
 */
int usb_autopm_get_interface_async(struct usb_interface *intf)
{
        int        status;

        status = pm_runtime_get(&intf->dev);
        if (status < 0 && status != -EINPROGRESS)
                pm_runtime_put_noidle(&intf->dev);
        dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
                        __func__, atomic_read(&intf->dev.power.usage_count),
                        status);
        if (status > 0 || status == -EINPROGRESS)
                status = 0;
        return status;
}
EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);

/**
 * usb_autopm_get_interface_no_resume - increment a USB interface's PM-usage counter
 * @intf: the usb_interface whose counter should be incremented
 *
 * This routine increments @intf's usage counter but does not carry out an
 * autoresume.
 *
 * This routine can run in atomic context.
 */
void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
{
        struct usb_device        *udev = interface_to_usbdev(intf);

        usb_mark_last_busy(udev);
        pm_runtime_get_noresume(&intf->dev);
}
EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume);

/* Internal routine to check whether we may autosuspend a device. */
static int autosuspend_check(struct usb_device *udev)
{
        int                        w, i;
        struct usb_interface        *intf;

        if (udev->state == USB_STATE_NOTATTACHED)
                return -ENODEV;

        /* Fail if autosuspend is disabled, or any interfaces are in use, or
         * any interface drivers require remote wakeup but it isn't available.
         */
        w = 0;
        if (udev->actconfig) {
                for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
                        intf = udev->actconfig->interface[i];

                        /* We don't need to check interfaces that are
                         * disabled for runtime PM.  Either they are unbound
                         * or else their drivers don't support autosuspend
                         * and so they are permanently active.
                         */
                        if (intf->dev.power.disable_depth)
                                continue;
                        if (atomic_read(&intf->dev.power.usage_count) > 0)
                                return -EBUSY;
                        w |= intf->needs_remote_wakeup;

                        /* Don't allow autosuspend if the device will need
                         * a reset-resume and any of its interface drivers
                         * doesn't include support or needs remote wakeup.
                         */
                        if (udev->quirks & USB_QUIRK_RESET_RESUME) {
                                struct usb_driver *driver;

                                driver = to_usb_driver(intf->dev.driver);
                                if (!driver->reset_resume ||
                                                intf->needs_remote_wakeup)
                                        return -EOPNOTSUPP;
                        }
                }
        }
        if (w && !device_can_wakeup(&udev->dev)) {
                dev_dbg(&udev->dev, "remote wakeup needed for autosuspend\n");
                return -EOPNOTSUPP;
        }

        /*
         * If the device is a direct child of the root hub and the HCD
         * doesn't handle wakeup requests, don't allow autosuspend when
         * wakeup is needed.
         */
        if (w && udev->parent == udev->bus->root_hub &&
                        bus_to_hcd(udev->bus)->cant_recv_wakeups) {
                dev_dbg(&udev->dev, "HCD doesn't handle wakeup requests\n");
                return -EOPNOTSUPP;
        }

        udev->do_remote_wakeup = w;
        return 0;
}

int usb_runtime_suspend(struct device *dev)
{
        struct usb_device        *udev = to_usb_device(dev);
        int                        status;

        /* A USB device can be suspended if it passes the various autosuspend
         * checks.  Runtime suspend for a USB device means suspending all the
         * interfaces and then the device itself.
         */
        if (autosuspend_check(udev) != 0)
                return -EAGAIN;

        status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);

        /* Allow a retry if autosuspend failed temporarily */
        if (status == -EAGAIN || status == -EBUSY)
                usb_mark_last_busy(udev);

        /*
         * The PM core reacts badly unless the return code is 0,
         * -EAGAIN, or -EBUSY, so always return -EBUSY on an error
         * (except for root hubs, because they don't suspend through
         * an upstream port like other USB devices).
         */
        if (status != 0 && udev->parent)
                return -EBUSY;
        return status;
}

int usb_runtime_resume(struct device *dev)
{
        struct usb_device        *udev = to_usb_device(dev);
        int                        status;

        /* Runtime resume for a USB device means resuming both the device
         * and all its interfaces.
         */
        status = usb_resume_both(udev, PMSG_AUTO_RESUME);
        return status;
}

int usb_runtime_idle(struct device *dev)
{
        struct usb_device        *udev = to_usb_device(dev);

        /* An idle USB device can be suspended if it passes the various
         * autosuspend checks.
         */
        if (autosuspend_check(udev) == 0)
                pm_runtime_autosuspend(dev);
        /* Tell the core not to suspend it, though. */
        return -EBUSY;
}

static int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);
        int ret = -EPERM;

        if (hcd->driver->set_usb2_hw_lpm) {
                ret = hcd->driver->set_usb2_hw_lpm(hcd, udev, enable);
                if (!ret)
                        udev->usb2_hw_lpm_enabled = enable;
        }

        return ret;
}

int usb_enable_usb2_hardware_lpm(struct usb_device *udev)
{
        if (!udev->usb2_hw_lpm_capable ||
            !udev->usb2_hw_lpm_allowed ||
            udev->usb2_hw_lpm_enabled)
                return 0;

        return usb_set_usb2_hardware_lpm(udev, 1);
}

int usb_disable_usb2_hardware_lpm(struct usb_device *udev)
{
        if (!udev->usb2_hw_lpm_enabled)
                return 0;

        return usb_set_usb2_hardware_lpm(udev, 0);
}

#endif /* CONFIG_PM */

const struct bus_type usb_bus_type = {
        .name =                "usb",
        .match =        usb_device_match,
        .uevent =        usb_uevent,
        .need_parent_lock =        true,
};













































































































































































































































































    5 







    5 









































































































  242 





































































































































   14 





    2 
    2 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Operations on the network namespace
 */
#ifndef __NET_NET_NAMESPACE_H
#define __NET_NET_NAMESPACE_H

#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/sysctl.h>
#include <linux/uidgid.h>

#include <net/flow.h>
#include <net/netns/core.h>
#include <net/netns/mib.h>
#include <net/netns/unix.h>
#include <net/netns/packet.h>
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
#include <net/netns/nexthop.h>
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
#include <net/netns/netfilter.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
#endif
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
#include <net/netns/flow_table.h>
#endif
#include <net/netns/nftables.h>
#include <net/netns/xfrm.h>
#include <net/netns/mpls.h>
#include <net/netns/can.h>
#include <net/netns/xdp.h>
#include <net/netns/smc.h>
#include <net/netns/bpf.h>
#include <net/netns/mctp.h>
#include <net/net_trackers.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>
#include <linux/xarray.h>

struct user_namespace;
struct proc_dir_entry;
struct net_device;
struct sock;
struct ctl_table_header;
struct net_generic;
struct uevent_sock;
struct netns_ipvs;
struct bpf_prog;


#define NETDEV_HASHBITS    8
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)

struct net {
        /* First cache line can be often dirtied.
         * Do not place here read-mostly fields.
         */
        refcount_t                passive;        /* To decide when the network
                                                 * namespace should be freed.
                                                 */
        spinlock_t                rules_mod_lock;

        unsigned int                dev_base_seq;        /* protected by rtnl_mutex */
        u32                        ifindex;

        spinlock_t                nsid_lock;
        atomic_t                fnhe_genid;

        struct list_head        list;                /* list of network namespaces */
        struct list_head        exit_list;        /* To linked to call pernet exit
                                                 * methods on dead net (
                                                 * pernet_ops_rwsem read locked),
                                                 * or to unregister pernet ops
                                                 * (pernet_ops_rwsem write locked).
                                                 */
        struct llist_node        cleanup_list;        /* namespaces on death row */

#ifdef CONFIG_KEYS
        struct key_tag                *key_domain;        /* Key domain of operation tag */
#endif
        struct user_namespace   *user_ns;        /* Owning user namespace */
        struct ucounts                *ucounts;
        struct idr                netns_ids;

        struct ns_common        ns;
        struct ref_tracker_dir  refcnt_tracker;
        struct ref_tracker_dir  notrefcnt_tracker; /* tracker for objects not
                                                    * refcounted against netns
                                                    */
        struct list_head         dev_base_head;
        struct proc_dir_entry         *proc_net;
        struct proc_dir_entry         *proc_net_stat;

#ifdef CONFIG_SYSCTL
        struct ctl_table_set        sysctls;
#endif

        struct sock                 *rtnl;                        /* rtnetlink socket */
        struct sock                *genl_sock;

        struct uevent_sock        *uevent_sock;                /* uevent socket */

        struct hlist_head         *dev_name_head;
        struct hlist_head        *dev_index_head;
        struct xarray                dev_by_index;
        struct raw_notifier_head        netdev_chain;

        /* Note that @hash_mix can be read millions times per second,
         * it is critical that it is on a read_mostly cache line.
         */
        u32                        hash_mix;

        struct net_device       *loopback_dev;          /* The loopback */

        /* core fib_rules */
        struct list_head        rules_ops;

        struct netns_core        core;
        struct netns_mib        mib;
        struct netns_packet        packet;
#if IS_ENABLED(CONFIG_UNIX)
        struct netns_unix        unx;
#endif
        struct netns_nexthop        nexthop;
        struct netns_ipv4        ipv4;
#if IS_ENABLED(CONFIG_IPV6)
        struct netns_ipv6        ipv6;
#endif
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
        struct netns_ieee802154_lowpan        ieee802154_lowpan;
#endif
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
        struct netns_sctp        sctp;
#endif
#ifdef CONFIG_NETFILTER
        struct netns_nf                nf;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        struct netns_ct                ct;
#endif
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
        struct netns_nftables        nft;
#endif
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
        struct netns_ft ft;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
        struct sk_buff_head        wext_nlevents;
#endif
        struct net_generic __rcu        *gen;

        /* Used to store attached BPF programs */
        struct netns_bpf        bpf;

        /* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
        struct netns_xfrm        xfrm;
#endif

        u64                        net_cookie; /* written once */

#if IS_ENABLED(CONFIG_IP_VS)
        struct netns_ipvs        *ipvs;
#endif
#if IS_ENABLED(CONFIG_MPLS)
        struct netns_mpls        mpls;
#endif
#if IS_ENABLED(CONFIG_CAN)
        struct netns_can        can;
#endif
#ifdef CONFIG_XDP_SOCKETS
        struct netns_xdp        xdp;
#endif
#if IS_ENABLED(CONFIG_MCTP)
        struct netns_mctp        mctp;
#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
        struct sock                *crypto_nlsk;
#endif
        struct sock                *diag_nlsk;
#if IS_ENABLED(CONFIG_SMC)
        struct netns_smc        smc;
#endif
} __randomize_layout;

#include <linux/seq_file_net.h>

/* Init's network namespace */
extern struct net init_net;

#ifdef CONFIG_NET_NS
struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
                        struct net *old_net);

void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);

void net_ns_barrier(void);

struct ns_common *get_net_ns(struct ns_common *ns);
struct net *get_net_ns_by_fd(int fd);
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
static inline struct net *copy_net_ns(unsigned long flags,
        struct user_namespace *user_ns, struct net *old_net)
{
        if (flags & CLONE_NEWNET)
                return ERR_PTR(-EINVAL);
        return old_net;
}

static inline void net_ns_get_ownership(const struct net *net,
                                        kuid_t *uid, kgid_t *gid)
{
        *uid = GLOBAL_ROOT_UID;
        *gid = GLOBAL_ROOT_GID;
}

static inline void net_ns_barrier(void) {}

static inline struct ns_common *get_net_ns(struct ns_common *ns)
{
        return ERR_PTR(-EINVAL);
}

static inline struct net *get_net_ns_by_fd(int fd)
{
        return ERR_PTR(-EINVAL);
}
#endif /* CONFIG_NET_NS */


extern struct list_head net_namespace_list;

struct net *get_net_ns_by_pid(pid_t pid);

#ifdef CONFIG_SYSCTL
void ipx_register_sysctl(void);
void ipx_unregister_sysctl(void);
#else
#define ipx_register_sysctl()
#define ipx_unregister_sysctl()
#endif

#ifdef CONFIG_NET_NS
void __put_net(struct net *net);

/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
        refcount_inc(&net->ns.count);
        return net;
}

static inline struct net *maybe_get_net(struct net *net)
{
        /* Used when we know struct net exists but we
         * aren't guaranteed a previous reference count
         * exists.  If the reference count is zero this
         * function fails and returns NULL.
         */
        if (!refcount_inc_not_zero(&net->ns.count))
                net = NULL;
        return net;
}

/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
        if (refcount_dec_and_test(&net->ns.count))
                __put_net(net);
}

static inline
int net_eq(const struct net *net1, const struct net *net2)
{
        return net1 == net2;
}

static inline int check_net(const struct net *net)
{
        return refcount_read(&net->ns.count) != 0;
}

void net_drop_ns(void *);

#else

static inline struct net *get_net(struct net *net)
{
        return net;
}

static inline void put_net(struct net *net)
{
}

static inline struct net *maybe_get_net(struct net *net)
{
        return net;
}

static inline
int net_eq(const struct net *net1, const struct net *net2)
{
        return 1;
}

static inline int check_net(const struct net *net)
{
        return 1;
}

#define net_drop_ns NULL
#endif


static inline void __netns_tracker_alloc(struct net *net,
                                         netns_tracker *tracker,
                                         bool refcounted,
                                         gfp_t gfp)
{
#ifdef CONFIG_NET_NS_REFCNT_TRACKER
        ref_tracker_alloc(refcounted ? &net->refcnt_tracker :
                                       &net->notrefcnt_tracker,
                          tracker, gfp);
#endif
}

static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker,
                                       gfp_t gfp)
{
        __netns_tracker_alloc(net, tracker, true, gfp);
}

static inline void __netns_tracker_free(struct net *net,
                                        netns_tracker *tracker,
                                        bool refcounted)
{
#ifdef CONFIG_NET_NS_REFCNT_TRACKER
       ref_tracker_free(refcounted ? &net->refcnt_tracker :
                                     &net->notrefcnt_tracker, tracker);
#endif
}

static inline struct net *get_net_track(struct net *net,
                                        netns_tracker *tracker, gfp_t gfp)
{
        get_net(net);
        netns_tracker_alloc(net, tracker, gfp);
        return net;
}

static inline void put_net_track(struct net *net, netns_tracker *tracker)
{
        __netns_tracker_free(net, tracker, true);
        put_net(net);
}

typedef struct {
#ifdef CONFIG_NET_NS
        struct net __rcu *net;
#endif
} possible_net_t;

static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
        rcu_assign_pointer(pnet->net, net);
#endif
}

static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
        return rcu_dereference_protected(pnet->net, true);
#else
        return &init_net;
#endif
}

static inline struct net *read_pnet_rcu(possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
        return rcu_dereference(pnet->net);
#else
        return &init_net;
#endif
}

/* Protected by net_rwsem */
#define for_each_net(VAR)                                \
        list_for_each_entry(VAR, &net_namespace_list, list)
#define for_each_net_continue_reverse(VAR)                \
        list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
#define for_each_net_rcu(VAR)                                \
        list_for_each_entry_rcu(VAR, &net_namespace_list, list)

#ifdef CONFIG_NET_NS
#define __net_init
#define __net_exit
#define __net_initdata
#define __net_initconst
#else
#define __net_init        __init
#define __net_exit        __ref
#define __net_initdata        __initdata
#define __net_initconst        __initconst
#endif

int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
int peernet2id(const struct net *net, struct net *peer);
bool peernet_has_id(const struct net *net, struct net *peer);
struct net *get_net_ns_by_id(const struct net *net, int id);

struct pernet_operations {
        struct list_head list;
        /*
         * Below methods are called without any exclusive locks.
         * More than one net may be constructed and destructed
         * in parallel on several cpus. Every pernet_operations
         * have to keep in mind all other pernet_operations and
         * to introduce a locking, if they share common resources.
         *
         * The only time they are called with exclusive lock is
         * from register_pernet_subsys(), unregister_pernet_subsys()
         * register_pernet_device() and unregister_pernet_device().
         *
         * Exit methods using blocking RCU primitives, such as
         * synchronize_rcu(), should be implemented via exit_batch.
         * Then, destruction of a group of net requires single
         * synchronize_rcu() related to these pernet_operations,
         * instead of separate synchronize_rcu() for every net.
         * Please, avoid synchronize_rcu() at all, where it's possible.
         *
         * Note that a combination of pre_exit() and exit() can
         * be used, since a synchronize_rcu() is guaranteed between
         * the calls.
         */
        int (*init)(struct net *net);
        void (*pre_exit)(struct net *net);
        void (*exit)(struct net *net);
        void (*exit_batch)(struct list_head *net_exit_list);
        /* Following method is called with RTNL held. */
        void (*exit_batch_rtnl)(struct list_head *net_exit_list,
                                struct list_head *dev_kill_list);
        unsigned int *id;
        size_t size;
};

/*
 * Use these carefully.  If you implement a network device and it
 * needs per network namespace operations use device pernet operations,
 * otherwise use pernet subsys operations.
 *
 * Network interfaces need to be removed from a dying netns _before_
 * subsys notifiers can be called, as most of the network code cleanup
 * (which is done from subsys notifiers) runs with the assumption that
 * dev_remove_pack has been called so no new packets will arrive during
 * and after the cleanup functions have been called.  dev_remove_pack
 * is not per namespace so instead the guarantee of no more packets
 * arriving in a network namespace is provided by ensuring that all
 * network devices and all sockets have left the network namespace
 * before the cleanup methods are called.
 *
 * For the longest time the ipv4 icmp code was registered as a pernet
 * device which caused kernel oops, and panics during network
 * namespace cleanup.   So please don't get this wrong.
 */
int register_pernet_subsys(struct pernet_operations *);
void unregister_pernet_subsys(struct pernet_operations *);
int register_pernet_device(struct pernet_operations *);
void unregister_pernet_device(struct pernet_operations *);

struct ctl_table;

#define register_net_sysctl(net, path, table)        \
        register_net_sysctl_sz(net, path, table, ARRAY_SIZE(table))
#ifdef CONFIG_SYSCTL
int net_sysctl_init(void);
struct ctl_table_header *register_net_sysctl_sz(struct net *net, const char *path,
                                             struct ctl_table *table, size_t table_size);
void unregister_net_sysctl_table(struct ctl_table_header *header);
#else
static inline int net_sysctl_init(void) { return 0; }
static inline struct ctl_table_header *register_net_sysctl_sz(struct net *net,
        const char *path, struct ctl_table *table, size_t table_size)
{
        return NULL;
}
static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
{
}
#endif

static inline int rt_genid_ipv4(const struct net *net)
{
        return atomic_read(&net->ipv4.rt_genid);
}

#if IS_ENABLED(CONFIG_IPV6)
static inline int rt_genid_ipv6(const struct net *net)
{
        return atomic_read(&net->ipv6.fib6_sernum);
}
#endif

static inline void rt_genid_bump_ipv4(struct net *net)
{
        atomic_inc(&net->ipv4.rt_genid);
}

extern void (*__fib6_flush_trees)(struct net *net);
static inline void rt_genid_bump_ipv6(struct net *net)
{
        if (__fib6_flush_trees)
                __fib6_flush_trees(net);
}

#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
static inline struct netns_ieee802154_lowpan *
net_ieee802154_lowpan(struct net *net)
{
        return &net->ieee802154_lowpan;
}
#endif

/* For callers who don't really care about whether it's IPv4 or IPv6 */
static inline void rt_genid_bump_all(struct net *net)
{
        rt_genid_bump_ipv4(net);
        rt_genid_bump_ipv6(net);
}

static inline int fnhe_genid(const struct net *net)
{
        return atomic_read(&net->fnhe_genid);
}

static inline void fnhe_genid_bump(struct net *net)
{
        atomic_inc(&net->fnhe_genid);
}

#ifdef CONFIG_NET
void net_ns_init(void);
#else
static inline void net_ns_init(void) {}
#endif

#endif /* __NET_NET_NAMESPACE_H */
























































































































































































































































































































































































































































































































































































































































































































































































































    5 





    5 












    5 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Linux I2C core ACPI support code
 *
 * Copyright (C) 2014 Intel Corp, Author: Lan Tianyu <tianyu.lan@intel.com>
 */

#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/i2c.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/slab.h>

#include "i2c-core.h"

struct i2c_acpi_handler_data {
        struct acpi_connection_info info;
        struct i2c_adapter *adapter;
};

struct gsb_buffer {
        u8        status;
        u8        len;
        union {
                u16        wdata;
                u8        bdata;
                DECLARE_FLEX_ARRAY(u8, data);
        };
} __packed;

struct i2c_acpi_lookup {
        struct i2c_board_info *info;
        acpi_handle adapter_handle;
        acpi_handle device_handle;
        acpi_handle search_handle;
        int n;
        int index;
        u32 speed;
        u32 min_speed;
        u32 force_speed;
};

/**
 * i2c_acpi_get_i2c_resource - Gets I2cSerialBus resource if type matches
 * @ares:        ACPI resource
 * @i2c:        Pointer to I2cSerialBus resource will be returned here
 *
 * Checks if the given ACPI resource is of type I2cSerialBus.
 * In this case, returns a pointer to it to the caller.
 *
 * Returns true if resource type is of I2cSerialBus, otherwise false.
 */
bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
                               struct acpi_resource_i2c_serialbus **i2c)
{
        struct acpi_resource_i2c_serialbus *sb;

        if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
                return false;

        sb = &ares->data.i2c_serial_bus;
        if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
                return false;

        *i2c = sb;
        return true;
}
EXPORT_SYMBOL_GPL(i2c_acpi_get_i2c_resource);

static int i2c_acpi_resource_count(struct acpi_resource *ares, void *data)
{
        struct acpi_resource_i2c_serialbus *sb;
        int *count = data;

        if (i2c_acpi_get_i2c_resource(ares, &sb))
                *count = *count + 1;

        return 1;
}

/**
 * i2c_acpi_client_count - Count the number of I2cSerialBus resources
 * @adev:        ACPI device
 *
 * Returns the number of I2cSerialBus resources in the ACPI-device's
 * resource-list; or a negative error code.
 */
int i2c_acpi_client_count(struct acpi_device *adev)
{
        int ret, count = 0;
        LIST_HEAD(r);

        ret = acpi_dev_get_resources(adev, &r, i2c_acpi_resource_count, &count);
        if (ret < 0)
                return ret;

        acpi_dev_free_resource_list(&r);
        return count;
}
EXPORT_SYMBOL_GPL(i2c_acpi_client_count);

static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
{
        struct i2c_acpi_lookup *lookup = data;
        struct i2c_board_info *info = lookup->info;
        struct acpi_resource_i2c_serialbus *sb;
        acpi_status status;

        if (info->addr || !i2c_acpi_get_i2c_resource(ares, &sb))
                return 1;

        if (lookup->index != -1 && lookup->n++ != lookup->index)
                return 1;

        status = acpi_get_handle(lookup->device_handle,
                                 sb->resource_source.string_ptr,
                                 &lookup->adapter_handle);
        if (ACPI_FAILURE(status))
                return 1;

        info->addr = sb->slave_address;
        lookup->speed = sb->connection_speed;
        if (sb->access_mode == ACPI_I2C_10BIT_MODE)
                info->flags |= I2C_CLIENT_TEN;

        return 1;
}

static const struct acpi_device_id i2c_acpi_ignored_device_ids[] = {
        /*
         * ACPI video acpi_devices, which are handled by the acpi-video driver
         * sometimes contain a SERIAL_TYPE_I2C ACPI resource, ignore these.
         */
        { ACPI_VIDEO_HID, 0 },
        {}
};

struct i2c_acpi_irq_context {
        int irq;
        bool wake_capable;
};

static int i2c_acpi_do_lookup(struct acpi_device *adev,
                              struct i2c_acpi_lookup *lookup)
{
        struct i2c_board_info *info = lookup->info;
        struct list_head resource_list;
        int ret;

        if (acpi_bus_get_status(adev))
                return -EINVAL;

        if (!acpi_dev_ready_for_enumeration(adev))
                return -ENODEV;

        if (acpi_match_device_ids(adev, i2c_acpi_ignored_device_ids) == 0)
                return -ENODEV;

        memset(info, 0, sizeof(*info));
        lookup->device_handle = acpi_device_handle(adev);

        /* Look up for I2cSerialBus resource */
        INIT_LIST_HEAD(&resource_list);
        ret = acpi_dev_get_resources(adev, &resource_list,
                                     i2c_acpi_fill_info, lookup);
        acpi_dev_free_resource_list(&resource_list);

        if (ret < 0 || !info->addr)
                return -EINVAL;

        return 0;
}

static int i2c_acpi_add_irq_resource(struct acpi_resource *ares, void *data)
{
        struct i2c_acpi_irq_context *irq_ctx = data;
        struct resource r;

        if (irq_ctx->irq > 0)
                return 1;

        if (!acpi_dev_resource_interrupt(ares, 0, &r))
                return 1;

        irq_ctx->irq = i2c_dev_irq_from_resources(&r, 1);
        irq_ctx->wake_capable = r.flags & IORESOURCE_IRQ_WAKECAPABLE;

        return 1; /* No need to add resource to the list */
}

/**
 * i2c_acpi_get_irq - get device IRQ number from ACPI
 * @client: Pointer to the I2C client device
 * @wake_capable: Set to true if the IRQ is wake capable
 *
 * Find the IRQ number used by a specific client device.
 *
 * Return: The IRQ number or an error code.
 */
int i2c_acpi_get_irq(struct i2c_client *client, bool *wake_capable)
{
        struct acpi_device *adev = ACPI_COMPANION(&client->dev);
        struct list_head resource_list;
        struct i2c_acpi_irq_context irq_ctx = {
                .irq = -ENOENT,
        };
        int ret;

        INIT_LIST_HEAD(&resource_list);

        ret = acpi_dev_get_resources(adev, &resource_list,
                                     i2c_acpi_add_irq_resource, &irq_ctx);
        if (ret < 0)
                return ret;

        acpi_dev_free_resource_list(&resource_list);

        if (irq_ctx.irq == -ENOENT)
                irq_ctx.irq = acpi_dev_gpio_irq_wake_get(adev, 0, &irq_ctx.wake_capable);

        if (irq_ctx.irq < 0)
                return irq_ctx.irq;

        if (wake_capable)
                *wake_capable = irq_ctx.wake_capable;

        return irq_ctx.irq;
}

static int i2c_acpi_get_info(struct acpi_device *adev,
                             struct i2c_board_info *info,
                             struct i2c_adapter *adapter,
                             acpi_handle *adapter_handle)
{
        struct i2c_acpi_lookup lookup;
        int ret;

        memset(&lookup, 0, sizeof(lookup));
        lookup.info = info;
        lookup.index = -1;

        if (acpi_device_enumerated(adev))
                return -EINVAL;

        ret = i2c_acpi_do_lookup(adev, &lookup);
        if (ret)
                return ret;

        if (adapter) {
                /* The adapter must match the one in I2cSerialBus() connector */
                if (ACPI_HANDLE(&adapter->dev) != lookup.adapter_handle)
                        return -ENODEV;
        } else {
                struct acpi_device *adapter_adev;

                /* The adapter must be present */
                adapter_adev = acpi_fetch_acpi_dev(lookup.adapter_handle);
                if (!adapter_adev)
                        return -ENODEV;
                if (acpi_bus_get_status(adapter_adev) ||
                    !adapter_adev->status.present)
                        return -ENODEV;
        }

        info->fwnode = acpi_fwnode_handle(adev);
        if (adapter_handle)
                *adapter_handle = lookup.adapter_handle;

        acpi_set_modalias(adev, dev_name(&adev->dev), info->type,
                          sizeof(info->type));

        return 0;
}

static void i2c_acpi_register_device(struct i2c_adapter *adapter,
                                     struct acpi_device *adev,
                                     struct i2c_board_info *info)
{
        /*
         * Skip registration on boards where the ACPI tables are
         * known to contain bogus I2C devices.
         */
        if (acpi_quirk_skip_i2c_client_enumeration(adev))
                return;

        adev->power.flags.ignore_parent = true;
        acpi_device_set_enumerated(adev);

        if (IS_ERR(i2c_new_client_device(adapter, info)))
                adev->power.flags.ignore_parent = false;
}

static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
                                       void *data, void **return_value)
{
        struct i2c_adapter *adapter = data;
        struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
        struct i2c_board_info info;

        if (!adev || i2c_acpi_get_info(adev, &info, adapter, NULL))
                return AE_OK;

        i2c_acpi_register_device(adapter, adev, &info);

        return AE_OK;
}

#define I2C_ACPI_MAX_SCAN_DEPTH 32

/**
 * i2c_acpi_register_devices - enumerate I2C slave devices behind adapter
 * @adap: pointer to adapter
 *
 * Enumerate all I2C slave devices behind this adapter by walking the ACPI
 * namespace. When a device is found it will be added to the Linux device
 * model and bound to the corresponding ACPI handle.
 */
void i2c_acpi_register_devices(struct i2c_adapter *adap)
{
        struct acpi_device *adev;
        acpi_status status;

        if (!has_acpi_companion(&adap->dev))
                return;

        status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
                                     I2C_ACPI_MAX_SCAN_DEPTH,
                                     i2c_acpi_add_device, NULL,
                                     adap, NULL);
        if (ACPI_FAILURE(status))
                dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");

        if (!adap->dev.parent)
                return;

        adev = ACPI_COMPANION(adap->dev.parent);
        if (!adev)
                return;

        acpi_dev_clear_dependencies(adev);
}

static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = {
        /*
         * These Silead touchscreen controllers only work at 400KHz, for
         * some reason they do not work at 100KHz. On some devices the ACPI
         * tables list another device at their bus as only being capable
         * of 100KHz, testing has shown that these other devices work fine
         * at 400KHz (as can be expected of any recent i2c hw) so we force
         * the speed of the bus to 400 KHz if a Silead device is present.
         */
        { "MSSL1680", 0 },
        {}
};

static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
                                           void *data, void **return_value)
{
        struct i2c_acpi_lookup *lookup = data;
        struct acpi_device *adev = acpi_fetch_acpi_dev(handle);

        if (!adev || i2c_acpi_do_lookup(adev, lookup))
                return AE_OK;

        if (lookup->search_handle != lookup->adapter_handle)
                return AE_OK;

        if (lookup->speed <= lookup->min_speed)
                lookup->min_speed = lookup->speed;

        if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0)
                lookup->force_speed = I2C_MAX_FAST_MODE_FREQ;

        return AE_OK;
}

/**
 * i2c_acpi_find_bus_speed - find I2C bus speed from ACPI
 * @dev: The device owning the bus
 *
 * Find the I2C bus speed by walking the ACPI namespace for all I2C slaves
 * devices connected to this bus and use the speed of slowest device.
 *
 * Returns the speed in Hz or zero
 */
u32 i2c_acpi_find_bus_speed(struct device *dev)
{
        struct i2c_acpi_lookup lookup;
        struct i2c_board_info dummy;
        acpi_status status;

        if (!has_acpi_companion(dev))
                return 0;

        memset(&lookup, 0, sizeof(lookup));
        lookup.search_handle = ACPI_HANDLE(dev);
        lookup.min_speed = UINT_MAX;
        lookup.info = &dummy;
        lookup.index = -1;

        status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
                                     I2C_ACPI_MAX_SCAN_DEPTH,
                                     i2c_acpi_lookup_speed, NULL,
                                     &lookup, NULL);

        if (ACPI_FAILURE(status)) {
                dev_warn(dev, "unable to find I2C bus speed from ACPI\n");
                return 0;
        }

        if (lookup.force_speed) {
                if (lookup.force_speed != lookup.min_speed)
                        dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n",
                                 lookup.min_speed, lookup.force_speed);
                return lookup.force_speed;
        } else if (lookup.min_speed != UINT_MAX) {
                return lookup.min_speed;
        } else {
                return 0;
        }
}
EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);

struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
{
        struct i2c_adapter *adapter;
        struct device *dev;

        dev = bus_find_device(&i2c_bus_type, NULL, handle, device_match_acpi_handle);
        if (!dev)
                return NULL;

        adapter = i2c_verify_adapter(dev);
        if (!adapter)
                put_device(dev);

        return adapter;
}
EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);

static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
{
        return i2c_find_device_by_fwnode(acpi_fwnode_handle(adev));
}

static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
                           void *arg)
{
        struct acpi_device *adev = arg;
        struct i2c_board_info info;
        acpi_handle adapter_handle;
        struct i2c_adapter *adapter;
        struct i2c_client *client;

        switch (value) {
        case ACPI_RECONFIG_DEVICE_ADD:
                if (i2c_acpi_get_info(adev, &info, NULL, &adapter_handle))
                        break;

                adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
                if (!adapter)
                        break;

                i2c_acpi_register_device(adapter, adev, &info);
                put_device(&adapter->dev);
                break;
        case ACPI_RECONFIG_DEVICE_REMOVE:
                if (!acpi_device_enumerated(adev))
                        break;

                client = i2c_acpi_find_client_by_adev(adev);
                if (!client)
                        break;

                i2c_unregister_device(client);
                put_device(&client->dev);
                break;
        }

        return NOTIFY_OK;
}

struct notifier_block i2c_acpi_notifier = {
        .notifier_call = i2c_acpi_notify,
};

/**
 * i2c_acpi_new_device_by_fwnode - Create i2c-client for the Nth I2cSerialBus resource
 * @fwnode:  fwnode with the ACPI resources to get the client from
 * @index:   Index of ACPI resource to get
 * @info:    describes the I2C device; note this is modified (addr gets set)
 * Context: can sleep
 *
 * By default the i2c subsys creates an i2c-client for the first I2cSerialBus
 * resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus
 * resources, in that case this function can be used to create an i2c-client
 * for other I2cSerialBus resources in the Current Resource Settings table.
 *
 * Also see i2c_new_client_device, which this function calls to create the
 * i2c-client.
 *
 * Returns a pointer to the new i2c-client, or error pointer in case of failure.
 * Specifically, -EPROBE_DEFER is returned if the adapter is not found.
 */
struct i2c_client *i2c_acpi_new_device_by_fwnode(struct fwnode_handle *fwnode,
                                                 int index,
                                                 struct i2c_board_info *info)
{
        struct i2c_acpi_lookup lookup;
        struct i2c_adapter *adapter;
        struct acpi_device *adev;
        LIST_HEAD(resource_list);
        int ret;

        adev = to_acpi_device_node(fwnode);
        if (!adev)
                return ERR_PTR(-ENODEV);

        memset(&lookup, 0, sizeof(lookup));
        lookup.info = info;
        lookup.device_handle = acpi_device_handle(adev);
        lookup.index = index;

        ret = acpi_dev_get_resources(adev, &resource_list,
                                     i2c_acpi_fill_info, &lookup);
        if (ret < 0)
                return ERR_PTR(ret);

        acpi_dev_free_resource_list(&resource_list);

        if (!info->addr)
                return ERR_PTR(-EADDRNOTAVAIL);

        adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
        if (!adapter)
                return ERR_PTR(-EPROBE_DEFER);

        return i2c_new_client_device(adapter, info);
}
EXPORT_SYMBOL_GPL(i2c_acpi_new_device_by_fwnode);

bool i2c_acpi_waive_d0_probe(struct device *dev)
{
        struct i2c_driver *driver = to_i2c_driver(dev->driver);
        struct acpi_device *adev = ACPI_COMPANION(dev);

        return driver->flags & I2C_DRV_ACPI_WAIVE_D0_PROBE &&
                adev && adev->power.state_for_enumeration >= adev->power.state;
}
EXPORT_SYMBOL_GPL(i2c_acpi_waive_d0_probe);

#ifdef CONFIG_ACPI_I2C_OPREGION
static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
                u8 cmd, u8 *data, u8 data_len)
{

        struct i2c_msg msgs[2];
        int ret;
        u8 *buffer;

        buffer = kzalloc(data_len, GFP_KERNEL);
        if (!buffer)
                return AE_NO_MEMORY;

        msgs[0].addr = client->addr;
        msgs[0].flags = client->flags;
        msgs[0].len = 1;
        msgs[0].buf = &cmd;

        msgs[1].addr = client->addr;
        msgs[1].flags = client->flags | I2C_M_RD;
        msgs[1].len = data_len;
        msgs[1].buf = buffer;

        ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
        if (ret < 0) {
                /* Getting a NACK is unfortunately normal with some DSTDs */
                if (ret == -EREMOTEIO)
                        dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
                                data_len, client->addr, cmd, ret);
                else
                        dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n",
                                data_len, client->addr, cmd, ret);
        /* 2 transfers must have completed successfully */
        } else if (ret == 2) {
                memcpy(data, buffer, data_len);
                ret = 0;
        } else {
                ret = -EIO;
        }

        kfree(buffer);
        return ret;
}

static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
                u8 cmd, u8 *data, u8 data_len)
{

        struct i2c_msg msgs[1];
        u8 *buffer;
        int ret = AE_OK;

        buffer = kzalloc(data_len + 1, GFP_KERNEL);
        if (!buffer)
                return AE_NO_MEMORY;

        buffer[0] = cmd;
        memcpy(buffer + 1, data, data_len);

        msgs[0].addr = client->addr;
        msgs[0].flags = client->flags;
        msgs[0].len = data_len + 1;
        msgs[0].buf = buffer;

        ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));

        kfree(buffer);

        if (ret < 0) {
                dev_err(&client->adapter->dev, "i2c write failed: %d\n", ret);
                return ret;
        }

        /* 1 transfer must have completed successfully */
        return (ret == 1) ? 0 : -EIO;
}

static acpi_status
i2c_acpi_space_handler(u32 function, acpi_physical_address command,
                        u32 bits, u64 *value64,
                        void *handler_context, void *region_context)
{
        struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
        struct i2c_acpi_handler_data *data = handler_context;
        struct acpi_connection_info *info = &data->info;
        struct acpi_resource_i2c_serialbus *sb;
        struct i2c_adapter *adapter = data->adapter;
        struct i2c_client *client;
        struct acpi_resource *ares;
        u32 accessor_type = function >> 16;
        u8 action = function & ACPI_IO_MASK;
        acpi_status ret;
        int status;

        ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
        if (ACPI_FAILURE(ret))
                return ret;

        client = kzalloc(sizeof(*client), GFP_KERNEL);
        if (!client) {
                ret = AE_NO_MEMORY;
                goto err;
        }

        if (!value64 || !i2c_acpi_get_i2c_resource(ares, &sb)) {
                ret = AE_BAD_PARAMETER;
                goto err;
        }

        client->adapter = adapter;
        client->addr = sb->slave_address;

        if (sb->access_mode == ACPI_I2C_10BIT_MODE)
                client->flags |= I2C_CLIENT_TEN;

        switch (accessor_type) {
        case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
                if (action == ACPI_READ) {
                        status = i2c_smbus_read_byte(client);
                        if (status >= 0) {
                                gsb->bdata = status;
                                status = 0;
                        }
                } else {
                        status = i2c_smbus_write_byte(client, gsb->bdata);
                }
                break;

        case ACPI_GSB_ACCESS_ATTRIB_BYTE:
                if (action == ACPI_READ) {
                        status = i2c_smbus_read_byte_data(client, command);
                        if (status >= 0) {
                                gsb->bdata = status;
                                status = 0;
                        }
                } else {
                        status = i2c_smbus_write_byte_data(client, command,
                                        gsb->bdata);
                }
                break;

        case ACPI_GSB_ACCESS_ATTRIB_WORD:
                if (action == ACPI_READ) {
                        status = i2c_smbus_read_word_data(client, command);
                        if (status >= 0) {
                                gsb->wdata = status;
                                status = 0;
                        }
                } else {
                        status = i2c_smbus_write_word_data(client, command,
                                        gsb->wdata);
                }
                break;

        case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
                if (action == ACPI_READ) {
                        status = i2c_smbus_read_block_data(client, command,
                                        gsb->data);
                        if (status >= 0) {
                                gsb->len = status;
                                status = 0;
                        }
                } else {
                        status = i2c_smbus_write_block_data(client, command,
                                        gsb->len, gsb->data);
                }
                break;

        case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
                if (action == ACPI_READ) {
                        status = acpi_gsb_i2c_read_bytes(client, command,
                                        gsb->data, info->access_length);
                } else {
                        status = acpi_gsb_i2c_write_bytes(client, command,
                                        gsb->data, info->access_length);
                }
                break;

        default:
                dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
                         accessor_type, client->addr);
                ret = AE_BAD_PARAMETER;
                goto err;
        }

        gsb->status = status;

 err:
        kfree(client);
        ACPI_FREE(ares);
        return ret;
}


int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
{
        acpi_handle handle;
        struct i2c_acpi_handler_data *data;
        acpi_status status;

        if (!adapter->dev.parent)
                return -ENODEV;

        handle = ACPI_HANDLE(adapter->dev.parent);

        if (!handle)
                return -ENODEV;

        data = kzalloc(sizeof(struct i2c_acpi_handler_data),
                            GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        data->adapter = adapter;
        status = acpi_bus_attach_private_data(handle, (void *)data);
        if (ACPI_FAILURE(status)) {
                kfree(data);
                return -ENOMEM;
        }

        status = acpi_install_address_space_handler(handle,
                                ACPI_ADR_SPACE_GSBUS,
                                &i2c_acpi_space_handler,
                                NULL,
                                data);
        if (ACPI_FAILURE(status)) {
                dev_err(&adapter->dev, "Error installing i2c space handler\n");
                acpi_bus_detach_private_data(handle);
                kfree(data);
                return -ENOMEM;
        }

        return 0;
}

void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
{
        acpi_handle handle;
        struct i2c_acpi_handler_data *data;
        acpi_status status;

        if (!adapter->dev.parent)
                return;

        handle = ACPI_HANDLE(adapter->dev.parent);

        if (!handle)
                return;

        acpi_remove_address_space_handler(handle,
                                ACPI_ADR_SPACE_GSBUS,
                                &i2c_acpi_space_handler);

        status = acpi_bus_get_private_data(handle, (void **)&data);
        if (ACPI_SUCCESS(status))
                kfree(data);

        acpi_bus_detach_private_data(handle);
}
#endif /* CONFIG_ACPI_I2C_OPREGION */























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PROFILE_H
#define _LINUX_PROFILE_H

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cpumask.h>
#include <linux/cache.h>

#include <asm/errno.h>

#define CPU_PROFILING        1
#define SCHED_PROFILING        2
#define SLEEP_PROFILING        3
#define KVM_PROFILING        4

struct proc_dir_entry;
struct notifier_block;

#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
int create_proc_profile(void);
#else
static inline int create_proc_profile(void)
{
        return 0;
}
#endif

#ifdef CONFIG_PROFILING

extern int prof_on __read_mostly;

/* init basic kernel profiler */
int profile_init(void);
int profile_setup(char *str);
void profile_tick(int type);
int setup_profiling_timer(unsigned int multiplier);

/*
 * Add multiple profiler hits to a given address:
 */
void profile_hits(int type, void *ip, unsigned int nr_hits);

/*
 * Single profiler hit:
 */
static inline void profile_hit(int type, void *ip)
{
        /*
         * Speedup for the common (no profiling enabled) case:
         */
        if (unlikely(prof_on == type))
                profile_hits(type, ip, 1);
}

struct task_struct;
struct mm_struct;

#else

#define prof_on 0

static inline int profile_init(void)
{
        return 0;
}

static inline void profile_tick(int type)
{
        return;
}

static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
{
        return;
}

static inline void profile_hit(int type, void *ip)
{
        return;
}


#endif /* CONFIG_PROFILING */

#endif /* _LINUX_PROFILE_H */

















































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Percpu refcounts:
 * (C) 2012 Google, Inc.
 * Author: Kent Overstreet <koverstreet@google.com>
 *
 * This implements a refcount with similar semantics to atomic_t - atomic_inc(),
 * atomic_dec_and_test() - but percpu.
 *
 * There's one important difference between percpu refs and normal atomic_t
 * refcounts; you have to keep track of your initial refcount, and then when you
 * start shutting down you call percpu_ref_kill() _before_ dropping the initial
 * refcount.
 *
 * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
 * than an atomic_t - this is because of the way shutdown works, see
 * percpu_ref_kill()/PERCPU_COUNT_BIAS.
 *
 * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
 * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
 * puts the ref back in single atomic_t mode, collecting the per cpu refs and
 * issuing the appropriate barriers, and then marks the ref as shutting down so
 * that percpu_ref_put() will check for the ref hitting 0.  After it returns,
 * it's safe to drop the initial ref.
 *
 * USAGE:
 *
 * See fs/aio.c for some example usage; it's used there for struct kioctx, which
 * is created when userspaces calls io_setup(), and destroyed when userspace
 * calls io_destroy() or the process exits.
 *
 * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
 * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref.
 * After that, there can't be any new users of the kioctx (from lookup_ioctx())
 * and it's then safe to drop the initial ref with percpu_ref_put().
 *
 * Note that the free path, free_ioctx(), needs to go through explicit call_rcu()
 * to synchronize with RCU protected lookup_ioctx().  percpu_ref operations don't
 * imply RCU grace periods of any kind and if a user wants to combine percpu_ref
 * with RCU protection, it must be done explicitly.
 *
 * Code that does a two stage shutdown like this often needs some kind of
 * explicit synchronization to ensure the initial refcount can only be dropped
 * once - percpu_ref_kill() does this for you, it returns true once and false if
 * someone else already called it. The aio code uses it this way, but it's not
 * necessary if the code has some other mechanism to synchronize teardown.
 * around.
 */

#ifndef _LINUX_PERCPU_REFCOUNT_H
#define _LINUX_PERCPU_REFCOUNT_H

#include <linux/atomic.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/types.h>
#include <linux/gfp.h>

struct percpu_ref;
typedef void (percpu_ref_func_t)(struct percpu_ref *);

/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
enum {
        __PERCPU_REF_ATOMIC        = 1LU << 0,        /* operating in atomic mode */
        __PERCPU_REF_DEAD        = 1LU << 1,        /* (being) killed */
        __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,

        __PERCPU_REF_FLAG_BITS        = 2,
};

/* @flags for percpu_ref_init() */
enum {
        /*
         * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
         * operation using percpu_ref_switch_to_percpu().  If initialized
         * with this flag, the ref will stay in atomic mode until
         * percpu_ref_switch_to_percpu() is invoked on it.
         * Implies ALLOW_REINIT.
         */
        PERCPU_REF_INIT_ATOMIC        = 1 << 0,

        /*
         * Start dead w/ ref == 0 in atomic mode.  Must be revived with
         * percpu_ref_reinit() before used.  Implies INIT_ATOMIC and
         * ALLOW_REINIT.
         */
        PERCPU_REF_INIT_DEAD        = 1 << 1,

        /*
         * Allow switching from atomic mode to percpu mode.
         */
        PERCPU_REF_ALLOW_REINIT        = 1 << 2,
};

struct percpu_ref_data {
        atomic_long_t                count;
        percpu_ref_func_t        *release;
        percpu_ref_func_t        *confirm_switch;
        bool                        force_atomic:1;
        bool                        allow_reinit:1;
        struct rcu_head                rcu;
        struct percpu_ref        *ref;
};

struct percpu_ref {
        /*
         * The low bit of the pointer indicates whether the ref is in percpu
         * mode; if set, then get/put will manipulate the atomic_t.
         */
        unsigned long                percpu_count_ptr;

        /*
         * 'percpu_ref' is often embedded into user structure, and only
         * 'percpu_count_ptr' is required in fast path, move other fields
         * into 'percpu_ref_data', so we can reduce memory footprint in
         * fast path.
         */
        struct percpu_ref_data  *data;
};

int __must_check percpu_ref_init(struct percpu_ref *ref,
                                 percpu_ref_func_t *release, unsigned int flags,
                                 gfp_t gfp);
void percpu_ref_exit(struct percpu_ref *ref);
void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
                                 percpu_ref_func_t *confirm_switch);
void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref);
void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
                                 percpu_ref_func_t *confirm_kill);
void percpu_ref_resurrect(struct percpu_ref *ref);
void percpu_ref_reinit(struct percpu_ref *ref);
bool percpu_ref_is_zero(struct percpu_ref *ref);

/**
 * percpu_ref_kill - drop the initial ref
 * @ref: percpu_ref to kill
 *
 * Must be used to drop the initial ref on a percpu refcount; must be called
 * precisely once before shutdown.
 *
 * Switches @ref into atomic mode before gathering up the percpu counters
 * and dropping the initial ref.
 *
 * There are no implied RCU grace periods between kill and release.
 */
static inline void percpu_ref_kill(struct percpu_ref *ref)
{
        percpu_ref_kill_and_confirm(ref, NULL);
}

/*
 * Internal helper.  Don't use outside percpu-refcount proper.  The
 * function doesn't return the pointer and let the caller test it for NULL
 * because doing so forces the compiler to generate two conditional
 * branches as it can't assume that @ref->percpu_count is not NULL.
 */
static inline bool __ref_is_percpu(struct percpu_ref *ref,
                                          unsigned long __percpu **percpu_countp)
{
        unsigned long percpu_ptr;

        /*
         * The value of @ref->percpu_count_ptr is tested for
         * !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then
         * used as a pointer.  If the compiler generates a separate fetch
         * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
         * between contaminating the pointer value, meaning that
         * READ_ONCE() is required when fetching it.
         *
         * The dependency ordering from the READ_ONCE() pairs
         * with smp_store_release() in __percpu_ref_switch_to_percpu().
         */
        percpu_ptr = READ_ONCE(ref->percpu_count_ptr);

        /*
         * Theoretically, the following could test just ATOMIC; however,
         * then we'd have to mask off DEAD separately as DEAD may be
         * visible without ATOMIC if we race with percpu_ref_kill().  DEAD
         * implies ATOMIC anyway.  Test them together.
         */
        if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
                return false;

        *percpu_countp = (unsigned long __percpu *)percpu_ptr;
        return true;
}

/**
 * percpu_ref_get_many - increment a percpu refcount
 * @ref: percpu_ref to get
 * @nr: number of references to get
 *
 * Analogous to atomic_long_add().
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
{
        unsigned long __percpu *percpu_count;

        rcu_read_lock();

        if (__ref_is_percpu(ref, &percpu_count))
                this_cpu_add(*percpu_count, nr);
        else
                atomic_long_add(nr, &ref->data->count);

        rcu_read_unlock();
}

/**
 * percpu_ref_get - increment a percpu refcount
 * @ref: percpu_ref to get
 *
 * Analogous to atomic_long_inc().
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline void percpu_ref_get(struct percpu_ref *ref)
{
        percpu_ref_get_many(ref, 1);
}

/**
 * percpu_ref_tryget_many - try to increment a percpu refcount
 * @ref: percpu_ref to try-get
 * @nr: number of references to get
 *
 * Increment a percpu refcount  by @nr unless its count already reached zero.
 * Returns %true on success; %false on failure.
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline bool percpu_ref_tryget_many(struct percpu_ref *ref,
                                          unsigned long nr)
{
        unsigned long __percpu *percpu_count;
        bool ret;

        rcu_read_lock();

        if (__ref_is_percpu(ref, &percpu_count)) {
                this_cpu_add(*percpu_count, nr);
                ret = true;
        } else {
                ret = atomic_long_add_unless(&ref->data->count, nr, 0);
        }

        rcu_read_unlock();

        return ret;
}

/**
 * percpu_ref_tryget - try to increment a percpu refcount
 * @ref: percpu_ref to try-get
 *
 * Increment a percpu refcount unless its count already reached zero.
 * Returns %true on success; %false on failure.
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline bool percpu_ref_tryget(struct percpu_ref *ref)
{
        return percpu_ref_tryget_many(ref, 1);
}

/**
 * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the
 * caller is responsible for taking RCU.
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref)
{
        unsigned long __percpu *percpu_count;
        bool ret = false;

        WARN_ON_ONCE(!rcu_read_lock_held());

        if (likely(__ref_is_percpu(ref, &percpu_count))) {
                this_cpu_inc(*percpu_count);
                ret = true;
        } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
                ret = atomic_long_inc_not_zero(&ref->data->count);
        }
        return ret;
}

/**
 * percpu_ref_tryget_live - try to increment a live percpu refcount
 * @ref: percpu_ref to try-get
 *
 * Increment a percpu refcount unless it has already been killed.  Returns
 * %true on success; %false on failure.
 *
 * Completion of percpu_ref_kill() in itself doesn't guarantee that this
 * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
 * should be used.  After the confirm_kill callback is invoked, it's
 * guaranteed that no new reference will be given out by
 * percpu_ref_tryget_live().
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
{
        bool ret = false;

        rcu_read_lock();
        ret = percpu_ref_tryget_live_rcu(ref);
        rcu_read_unlock();
        return ret;
}

/**
 * percpu_ref_put_many - decrement a percpu refcount
 * @ref: percpu_ref to put
 * @nr: number of references to put
 *
 * Decrement the refcount, and if 0, call the release function (which was passed
 * to percpu_ref_init())
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
{
        unsigned long __percpu *percpu_count;

        rcu_read_lock();

        if (__ref_is_percpu(ref, &percpu_count))
                this_cpu_sub(*percpu_count, nr);
        else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count)))
                ref->data->release(ref);

        rcu_read_unlock();
}

/**
 * percpu_ref_put - decrement a percpu refcount
 * @ref: percpu_ref to put
 *
 * Decrement the refcount, and if 0, call the release function (which was passed
 * to percpu_ref_init())
 *
 * This function is safe to call as long as @ref is between init and exit.
 */
static inline void percpu_ref_put(struct percpu_ref *ref)
{
        percpu_ref_put_many(ref, 1);
}

/**
 * percpu_ref_is_dying - test whether a percpu refcount is dying or dead
 * @ref: percpu_ref to test
 *
 * Returns %true if @ref is dying or dead.
 *
 * This function is safe to call as long as @ref is between init and exit
 * and the caller is responsible for synchronizing against state changes.
 */
static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
{
        return ref->percpu_count_ptr & __PERCPU_REF_DEAD;
}

#endif


































































































  233 
































   63 


   64 

   64 

   63 


   64 
   64 









  224 


  226 

  226 

  226 

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  164 
  241 


  165 
   57 
   11 
   11 
   11 
   11 
  240 
   11 






  239 
  240 
  222 
  219 
  241 
  241 
  237 
  236 
   27 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/base/power/main.c - Where the driver meets power management.
 *
 * Copyright (c) 2003 Patrick Mochel
 * Copyright (c) 2003 Open Source Development Lab
 *
 * The driver model core calls device_pm_add() when a device is registered.
 * This will initialize the embedded device_pm_info object in the device
 * and add it to the list of power-controlled devices. sysfs entries for
 * controlling device power management will also be added.
 *
 * A separate list is used for keeping track of power info, because the power
 * domain dependencies may differ from the ancestral dependencies that the
 * subsystem list maintains.
 */

#define pr_fmt(fmt) "PM: " fmt
#define dev_fmt pr_fmt

#include <linux/device.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/pm-trace.h>
#include <linux/pm_wakeirq.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/async.h>
#include <linux/suspend.h>
#include <trace/events/power.h>
#include <linux/cpufreq.h>
#include <linux/devfreq.h>
#include <linux/timer.h>

#include "../base.h"
#include "power.h"

typedef int (*pm_callback_t)(struct device *);

#define list_for_each_entry_rcu_locked(pos, head, member) \
        list_for_each_entry_rcu(pos, head, member, \
                        device_links_read_lock_held())

/*
 * The entries in the dpm_list list are in a depth first order, simply
 * because children are guaranteed to be discovered after parents, and
 * are inserted at the back of the list on discovery.
 *
 * Since device_pm_add() may be called with a device lock held,
 * we must never try to acquire a device lock while holding
 * dpm_list_mutex.
 */

LIST_HEAD(dpm_list);
static LIST_HEAD(dpm_prepared_list);
static LIST_HEAD(dpm_suspended_list);
static LIST_HEAD(dpm_late_early_list);
static LIST_HEAD(dpm_noirq_list);

static DEFINE_MUTEX(dpm_list_mtx);
static pm_message_t pm_transition;

static int async_error;

static const char *pm_verb(int event)
{
        switch (event) {
        case PM_EVENT_SUSPEND:
                return "suspend";
        case PM_EVENT_RESUME:
                return "resume";
        case PM_EVENT_FREEZE:
                return "freeze";
        case PM_EVENT_QUIESCE:
                return "quiesce";
        case PM_EVENT_HIBERNATE:
                return "hibernate";
        case PM_EVENT_THAW:
                return "thaw";
        case PM_EVENT_RESTORE:
                return "restore";
        case PM_EVENT_RECOVER:
                return "recover";
        default:
                return "(unknown PM event)";
        }
}

/**
 * device_pm_sleep_init - Initialize system suspend-related device fields.
 * @dev: Device object being initialized.
 */
void device_pm_sleep_init(struct device *dev)
{
        dev->power.is_prepared = false;
        dev->power.is_suspended = false;
        dev->power.is_noirq_suspended = false;
        dev->power.is_late_suspended = false;
        init_completion(&dev->power.completion);
        complete_all(&dev->power.completion);
        dev->power.wakeup = NULL;
        INIT_LIST_HEAD(&dev->power.entry);
}

/**
 * device_pm_lock - Lock the list of active devices used by the PM core.
 */
void device_pm_lock(void)
{
        mutex_lock(&dpm_list_mtx);
}

/**
 * device_pm_unlock - Unlock the list of active devices used by the PM core.
 */
void device_pm_unlock(void)
{
        mutex_unlock(&dpm_list_mtx);
}

/**
 * device_pm_add - Add a device to the PM core's list of active devices.
 * @dev: Device to add to the list.
 */
void device_pm_add(struct device *dev)
{
        /* Skip PM setup/initialization. */
        if (device_pm_not_required(dev))
                return;

        pr_debug("Adding info for %s:%s\n",
                 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
        device_pm_check_callbacks(dev);
        mutex_lock(&dpm_list_mtx);
        if (dev->parent && dev->parent->power.is_prepared)
                dev_warn(dev, "parent %s should not be sleeping\n",
                        dev_name(dev->parent));
        list_add_tail(&dev->power.entry, &dpm_list);
        dev->power.in_dpm_list = true;
        mutex_unlock(&dpm_list_mtx);
}

/**
 * device_pm_remove - Remove a device from the PM core's list of active devices.
 * @dev: Device to be removed from the list.
 */
void device_pm_remove(struct device *dev)
{
        if (device_pm_not_required(dev))
                return;

        pr_debug("Removing info for %s:%s\n",
                 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
        complete_all(&dev->power.completion);
        mutex_lock(&dpm_list_mtx);
        list_del_init(&dev->power.entry);
        dev->power.in_dpm_list = false;
        mutex_unlock(&dpm_list_mtx);
        device_wakeup_disable(dev);
        pm_runtime_remove(dev);
        device_pm_check_callbacks(dev);
}

/**
 * device_pm_move_before - Move device in the PM core's list of active devices.
 * @deva: Device to move in dpm_list.
 * @devb: Device @deva should come before.
 */
void device_pm_move_before(struct device *deva, struct device *devb)
{
        pr_debug("Moving %s:%s before %s:%s\n",
                 deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
                 devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
        /* Delete deva from dpm_list and reinsert before devb. */
        list_move_tail(&deva->power.entry, &devb->power.entry);
}

/**
 * device_pm_move_after - Move device in the PM core's list of active devices.
 * @deva: Device to move in dpm_list.
 * @devb: Device @deva should come after.
 */
void device_pm_move_after(struct device *deva, struct device *devb)
{
        pr_debug("Moving %s:%s after %s:%s\n",
                 deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
                 devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
        /* Delete deva from dpm_list and reinsert after devb. */
        list_move(&deva->power.entry, &devb->power.entry);
}

/**
 * device_pm_move_last - Move device to end of the PM core's list of devices.
 * @dev: Device to move in dpm_list.
 */
void device_pm_move_last(struct device *dev)
{
        pr_debug("Moving %s:%s to end of list\n",
                 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
        list_move_tail(&dev->power.entry, &dpm_list);
}

static ktime_t initcall_debug_start(struct device *dev, void *cb)
{
        if (!pm_print_times_enabled)
                return 0;

        dev_info(dev, "calling %pS @ %i, parent: %s\n", cb,
                 task_pid_nr(current),
                 dev->parent ? dev_name(dev->parent) : "none");
        return ktime_get();
}

static void initcall_debug_report(struct device *dev, ktime_t calltime,
                                  void *cb, int error)
{
        ktime_t rettime;

        if (!pm_print_times_enabled)
                return;

        rettime = ktime_get();
        dev_info(dev, "%pS returned %d after %Ld usecs\n", cb, error,
                 (unsigned long long)ktime_us_delta(rettime, calltime));
}

/**
 * dpm_wait - Wait for a PM operation to complete.
 * @dev: Device to wait for.
 * @async: If unset, wait only if the device's power.async_suspend flag is set.
 */
static void dpm_wait(struct device *dev, bool async)
{
        if (!dev)
                return;

        if (async || (pm_async_enabled && dev->power.async_suspend))
                wait_for_completion(&dev->power.completion);
}

static int dpm_wait_fn(struct device *dev, void *async_ptr)
{
        dpm_wait(dev, *((bool *)async_ptr));
        return 0;
}

static void dpm_wait_for_children(struct device *dev, bool async)
{
       device_for_each_child(dev, &async, dpm_wait_fn);
}

static void dpm_wait_for_suppliers(struct device *dev, bool async)
{
        struct device_link *link;
        int idx;

        idx = device_links_read_lock();

        /*
         * If the supplier goes away right after we've checked the link to it,
         * we'll wait for its completion to change the state, but that's fine,
         * because the only things that will block as a result are the SRCU
         * callbacks freeing the link objects for the links in the list we're
         * walking.
         */
        list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
                if (READ_ONCE(link->status) != DL_STATE_DORMANT)
                        dpm_wait(link->supplier, async);

        device_links_read_unlock(idx);
}

static bool dpm_wait_for_superior(struct device *dev, bool async)
{
        struct device *parent;

        /*
         * If the device is resumed asynchronously and the parent's callback
         * deletes both the device and the parent itself, the parent object may
         * be freed while this function is running, so avoid that by reference
         * counting the parent once more unless the device has been deleted
         * already (in which case return right away).
         */
        mutex_lock(&dpm_list_mtx);

        if (!device_pm_initialized(dev)) {
                mutex_unlock(&dpm_list_mtx);
                return false;
        }

        parent = get_device(dev->parent);

        mutex_unlock(&dpm_list_mtx);

        dpm_wait(parent, async);
        put_device(parent);

        dpm_wait_for_suppliers(dev, async);

        /*
         * If the parent's callback has deleted the device, attempting to resume
         * it would be invalid, so avoid doing that then.
         */
        return device_pm_initialized(dev);
}

static void dpm_wait_for_consumers(struct device *dev, bool async)
{
        struct device_link *link;
        int idx;

        idx = device_links_read_lock();

        /*
         * The status of a device link can only be changed from "dormant" by a
         * probe, but that cannot happen during system suspend/resume.  In
         * theory it can change to "dormant" at that time, but then it is
         * reasonable to wait for the target device anyway (eg. if it goes
         * away, it's better to wait for it to go away completely and then
         * continue instead of trying to continue in parallel with its
         * unregistration).
         */
        list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node)
                if (READ_ONCE(link->status) != DL_STATE_DORMANT)
                        dpm_wait(link->consumer, async);

        device_links_read_unlock(idx);
}

static void dpm_wait_for_subordinate(struct device *dev, bool async)
{
        dpm_wait_for_children(dev, async);
        dpm_wait_for_consumers(dev, async);
}

/**
 * pm_op - Return the PM operation appropriate for given PM event.
 * @ops: PM operations to choose from.
 * @state: PM transition of the system being carried out.
 */
static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
{
        switch (state.event) {
#ifdef CONFIG_SUSPEND
        case PM_EVENT_SUSPEND:
                return ops->suspend;
        case PM_EVENT_RESUME:
                return ops->resume;
#endif /* CONFIG_SUSPEND */
#ifdef CONFIG_HIBERNATE_CALLBACKS
        case PM_EVENT_FREEZE:
        case PM_EVENT_QUIESCE:
                return ops->freeze;
        case PM_EVENT_HIBERNATE:
                return ops->poweroff;
        case PM_EVENT_THAW:
        case PM_EVENT_RECOVER:
                return ops->thaw;
        case PM_EVENT_RESTORE:
                return ops->restore;
#endif /* CONFIG_HIBERNATE_CALLBACKS */
        }

        return NULL;
}

/**
 * pm_late_early_op - Return the PM operation appropriate for given PM event.
 * @ops: PM operations to choose from.
 * @state: PM transition of the system being carried out.
 *
 * Runtime PM is disabled for @dev while this function is being executed.
 */
static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
                                      pm_message_t state)
{
        switch (state.event) {
#ifdef CONFIG_SUSPEND
        case PM_EVENT_SUSPEND:
                return ops->suspend_late;
        case PM_EVENT_RESUME:
                return ops->resume_early;
#endif /* CONFIG_SUSPEND */
#ifdef CONFIG_HIBERNATE_CALLBACKS
        case PM_EVENT_FREEZE:
        case PM_EVENT_QUIESCE:
                return ops->freeze_late;
        case PM_EVENT_HIBERNATE:
                return ops->poweroff_late;
        case PM_EVENT_THAW:
        case PM_EVENT_RECOVER:
                return ops->thaw_early;
        case PM_EVENT_RESTORE:
                return ops->restore_early;
#endif /* CONFIG_HIBERNATE_CALLBACKS */
        }

        return NULL;
}

/**
 * pm_noirq_op - Return the PM operation appropriate for given PM event.
 * @ops: PM operations to choose from.
 * @state: PM transition of the system being carried out.
 *
 * The driver of @dev will not receive interrupts while this function is being
 * executed.
 */
static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state)
{
        switch (state.event) {
#ifdef CONFIG_SUSPEND
        case PM_EVENT_SUSPEND:
                return ops->suspend_noirq;
        case PM_EVENT_RESUME:
                return ops->resume_noirq;
#endif /* CONFIG_SUSPEND */
#ifdef CONFIG_HIBERNATE_CALLBACKS
        case PM_EVENT_FREEZE:
        case PM_EVENT_QUIESCE:
                return ops->freeze_noirq;
        case PM_EVENT_HIBERNATE:
                return ops->poweroff_noirq;
        case PM_EVENT_THAW:
        case PM_EVENT_RECOVER:
                return ops->thaw_noirq;
        case PM_EVENT_RESTORE:
                return ops->restore_noirq;
#endif /* CONFIG_HIBERNATE_CALLBACKS */
        }

        return NULL;
}

static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info)
{
        dev_dbg(dev, "%s%s%s driver flags: %x\n", info, pm_verb(state.event),
                ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ?
                ", may wakeup" : "", dev->power.driver_flags);
}

static void pm_dev_err(struct device *dev, pm_message_t state, const char *info,
                        int error)
{
        dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info,
                error);
}

static void dpm_show_time(ktime_t starttime, pm_message_t state, int error,
                          const char *info)
{
        ktime_t calltime;
        u64 usecs64;
        int usecs;

        calltime = ktime_get();
        usecs64 = ktime_to_ns(ktime_sub(calltime, starttime));
        do_div(usecs64, NSEC_PER_USEC);
        usecs = usecs64;
        if (usecs == 0)
                usecs = 1;

        pm_pr_dbg("%s%s%s of devices %s after %ld.%03ld msecs\n",
                  info ?: "", info ? " " : "", pm_verb(state.event),
                  error ? "aborted" : "complete",
                  usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC);
}

static int dpm_run_callback(pm_callback_t cb, struct device *dev,
                            pm_message_t state, const char *info)
{
        ktime_t calltime;
        int error;

        if (!cb)
                return 0;

        calltime = initcall_debug_start(dev, cb);

        pm_dev_dbg(dev, state, info);
        trace_device_pm_callback_start(dev, info, state.event);
        error = cb(dev);
        trace_device_pm_callback_end(dev, error);
        suspend_report_result(dev, cb, error);

        initcall_debug_report(dev, calltime, cb, error);

        return error;
}

#ifdef CONFIG_DPM_WATCHDOG
struct dpm_watchdog {
        struct device                *dev;
        struct task_struct        *tsk;
        struct timer_list        timer;
};

#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
        struct dpm_watchdog wd

/**
 * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
 * @t: The timer that PM watchdog depends on.
 *
 * Called when a driver has timed out suspending or resuming.
 * There's not much we can do here to recover so panic() to
 * capture a crash-dump in pstore.
 */
static void dpm_watchdog_handler(struct timer_list *t)
{
        struct dpm_watchdog *wd = from_timer(wd, t, timer);

        dev_emerg(wd->dev, "**** DPM device timeout ****\n");
        show_stack(wd->tsk, NULL, KERN_EMERG);
        panic("%s %s: unrecoverable failure\n",
                dev_driver_string(wd->dev), dev_name(wd->dev));
}

/**
 * dpm_watchdog_set - Enable pm watchdog for given device.
 * @wd: Watchdog. Must be allocated on the stack.
 * @dev: Device to handle.
 */
static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
{
        struct timer_list *timer = &wd->timer;

        wd->dev = dev;
        wd->tsk = current;

        timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
        /* use same timeout value for both suspend and resume */
        timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
        add_timer(timer);
}

/**
 * dpm_watchdog_clear - Disable suspend/resume watchdog.
 * @wd: Watchdog to disable.
 */
static void dpm_watchdog_clear(struct dpm_watchdog *wd)
{
        struct timer_list *timer = &wd->timer;

        del_timer_sync(timer);
        destroy_timer_on_stack(timer);
}
#else
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
#define dpm_watchdog_set(x, y)
#define dpm_watchdog_clear(x)
#endif

/*------------------------- Resume routines -------------------------*/

/**
 * dev_pm_skip_resume - System-wide device resume optimization check.
 * @dev: Target device.
 *
 * Return:
 * - %false if the transition under way is RESTORE.
 * - Return value of dev_pm_skip_suspend() if the transition under way is THAW.
 * - The logical negation of %power.must_resume otherwise (that is, when the
 *   transition under way is RESUME).
 */
bool dev_pm_skip_resume(struct device *dev)
{
        if (pm_transition.event == PM_EVENT_RESTORE)
                return false;

        if (pm_transition.event == PM_EVENT_THAW)
                return dev_pm_skip_suspend(dev);

        return !dev->power.must_resume;
}

static bool is_async(struct device *dev)
{
        return dev->power.async_suspend && pm_async_enabled
                && !pm_trace_is_enabled();
}

static bool dpm_async_fn(struct device *dev, async_func_t func)
{
        reinit_completion(&dev->power.completion);

        if (is_async(dev)) {
                dev->power.async_in_progress = true;

                get_device(dev);

                if (async_schedule_dev_nocall(func, dev))
                        return true;

                put_device(dev);
        }
        /*
         * Because async_schedule_dev_nocall() above has returned false or it
         * has not been called at all, func() is not running and it is safe to
         * update the async_in_progress flag without extra synchronization.
         */
        dev->power.async_in_progress = false;
        return false;
}

/**
 * device_resume_noirq - Execute a "noirq resume" callback for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being resumed asynchronously.
 *
 * The driver of @dev will not receive interrupts while this function is being
 * executed.
 */
static void device_resume_noirq(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        bool skip_resume;
        int error = 0;

        TRACE_DEVICE(dev);
        TRACE_RESUME(0);

        if (dev->power.syscore || dev->power.direct_complete)
                goto Out;

        if (!dev->power.is_noirq_suspended)
                goto Out;

        if (!dpm_wait_for_superior(dev, async))
                goto Out;

        skip_resume = dev_pm_skip_resume(dev);
        /*
         * If the driver callback is skipped below or by the middle layer
         * callback and device_resume_early() also skips the driver callback for
         * this device later, it needs to appear as "suspended" to PM-runtime,
         * so change its status accordingly.
         *
         * Otherwise, the device is going to be resumed, so set its PM-runtime
         * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set
         * to avoid confusing drivers that don't use it.
         */
        if (skip_resume)
                pm_runtime_set_suspended(dev);
        else if (dev_pm_skip_suspend(dev))
                pm_runtime_set_active(dev);

        if (dev->pm_domain) {
                info = "noirq power domain ";
                callback = pm_noirq_op(&dev->pm_domain->ops, state);
        } else if (dev->type && dev->type->pm) {
                info = "noirq type ";
                callback = pm_noirq_op(dev->type->pm, state);
        } else if (dev->class && dev->class->pm) {
                info = "noirq class ";
                callback = pm_noirq_op(dev->class->pm, state);
        } else if (dev->bus && dev->bus->pm) {
                info = "noirq bus ";
                callback = pm_noirq_op(dev->bus->pm, state);
        }
        if (callback)
                goto Run;

        if (skip_resume)
                goto Skip;

        if (dev->driver && dev->driver->pm) {
                info = "noirq driver ";
                callback = pm_noirq_op(dev->driver->pm, state);
        }

Run:
        error = dpm_run_callback(callback, dev, state, info);

Skip:
        dev->power.is_noirq_suspended = false;

Out:
        complete_all(&dev->power.completion);
        TRACE_RESUME(error);

        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
        }
}

static void async_resume_noirq(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_resume_noirq(dev, pm_transition, true);
        put_device(dev);
}

static void dpm_noirq_resume_devices(pm_message_t state)
{
        struct device *dev;
        ktime_t starttime = ktime_get();

        trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);

        async_error = 0;
        pm_transition = state;

        mutex_lock(&dpm_list_mtx);

        /*
         * Trigger the resume of "async" devices upfront so they don't have to
         * wait for the "non-async" ones they don't depend on.
         */
        list_for_each_entry(dev, &dpm_noirq_list, power.entry)
                dpm_async_fn(dev, async_resume_noirq);

        while (!list_empty(&dpm_noirq_list)) {
                dev = to_device(dpm_noirq_list.next);
                list_move_tail(&dev->power.entry, &dpm_late_early_list);

                if (!dev->power.async_in_progress) {
                        get_device(dev);

                        mutex_unlock(&dpm_list_mtx);

                        device_resume_noirq(dev, state, false);

                        put_device(dev);

                        mutex_lock(&dpm_list_mtx);
                }
        }
        mutex_unlock(&dpm_list_mtx);
        async_synchronize_full();
        dpm_show_time(starttime, state, 0, "noirq");
        if (async_error)
                dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);

        trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
}

/**
 * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
 * @state: PM transition of the system being carried out.
 *
 * Invoke the "noirq" resume callbacks for all devices in dpm_noirq_list and
 * allow device drivers' interrupt handlers to be called.
 */
void dpm_resume_noirq(pm_message_t state)
{
        dpm_noirq_resume_devices(state);

        resume_device_irqs();
        device_wakeup_disarm_wake_irqs();
}

/**
 * device_resume_early - Execute an "early resume" callback for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being resumed asynchronously.
 *
 * Runtime PM is disabled for @dev while this function is being executed.
 */
static void device_resume_early(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        int error = 0;

        TRACE_DEVICE(dev);
        TRACE_RESUME(0);

        if (dev->power.syscore || dev->power.direct_complete)
                goto Out;

        if (!dev->power.is_late_suspended)
                goto Out;

        if (!dpm_wait_for_superior(dev, async))
                goto Out;

        if (dev->pm_domain) {
                info = "early power domain ";
                callback = pm_late_early_op(&dev->pm_domain->ops, state);
        } else if (dev->type && dev->type->pm) {
                info = "early type ";
                callback = pm_late_early_op(dev->type->pm, state);
        } else if (dev->class && dev->class->pm) {
                info = "early class ";
                callback = pm_late_early_op(dev->class->pm, state);
        } else if (dev->bus && dev->bus->pm) {
                info = "early bus ";
                callback = pm_late_early_op(dev->bus->pm, state);
        }
        if (callback)
                goto Run;

        if (dev_pm_skip_resume(dev))
                goto Skip;

        if (dev->driver && dev->driver->pm) {
                info = "early driver ";
                callback = pm_late_early_op(dev->driver->pm, state);
        }

Run:
        error = dpm_run_callback(callback, dev, state, info);

Skip:
        dev->power.is_late_suspended = false;

Out:
        TRACE_RESUME(error);

        pm_runtime_enable(dev);
        complete_all(&dev->power.completion);

        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async early" : " early", error);
        }
}

static void async_resume_early(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_resume_early(dev, pm_transition, true);
        put_device(dev);
}

/**
 * dpm_resume_early - Execute "early resume" callbacks for all devices.
 * @state: PM transition of the system being carried out.
 */
void dpm_resume_early(pm_message_t state)
{
        struct device *dev;
        ktime_t starttime = ktime_get();

        trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);

        async_error = 0;
        pm_transition = state;

        mutex_lock(&dpm_list_mtx);

        /*
         * Trigger the resume of "async" devices upfront so they don't have to
         * wait for the "non-async" ones they don't depend on.
         */
        list_for_each_entry(dev, &dpm_late_early_list, power.entry)
                dpm_async_fn(dev, async_resume_early);

        while (!list_empty(&dpm_late_early_list)) {
                dev = to_device(dpm_late_early_list.next);
                list_move_tail(&dev->power.entry, &dpm_suspended_list);

                if (!dev->power.async_in_progress) {
                        get_device(dev);

                        mutex_unlock(&dpm_list_mtx);

                        device_resume_early(dev, state, false);

                        put_device(dev);

                        mutex_lock(&dpm_list_mtx);
                }
        }
        mutex_unlock(&dpm_list_mtx);
        async_synchronize_full();
        dpm_show_time(starttime, state, 0, "early");
        if (async_error)
                dpm_save_failed_step(SUSPEND_RESUME_EARLY);

        trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
}

/**
 * dpm_resume_start - Execute "noirq" and "early" device callbacks.
 * @state: PM transition of the system being carried out.
 */
void dpm_resume_start(pm_message_t state)
{
        dpm_resume_noirq(state);
        dpm_resume_early(state);
}
EXPORT_SYMBOL_GPL(dpm_resume_start);

/**
 * device_resume - Execute "resume" callbacks for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being resumed asynchronously.
 */
static void device_resume(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        int error = 0;
        DECLARE_DPM_WATCHDOG_ON_STACK(wd);

        TRACE_DEVICE(dev);
        TRACE_RESUME(0);

        if (dev->power.syscore)
                goto Complete;

        if (dev->power.direct_complete) {
                /* Match the pm_runtime_disable() in __device_suspend(). */
                pm_runtime_enable(dev);
                goto Complete;
        }

        if (!dpm_wait_for_superior(dev, async))
                goto Complete;

        dpm_watchdog_set(&wd, dev);
        device_lock(dev);

        /*
         * This is a fib.  But we'll allow new children to be added below
         * a resumed device, even if the device hasn't been completed yet.
         */
        dev->power.is_prepared = false;

        if (!dev->power.is_suspended)
                goto Unlock;

        if (dev->pm_domain) {
                info = "power domain ";
                callback = pm_op(&dev->pm_domain->ops, state);
                goto Driver;
        }

        if (dev->type && dev->type->pm) {
                info = "type ";
                callback = pm_op(dev->type->pm, state);
                goto Driver;
        }

        if (dev->class && dev->class->pm) {
                info = "class ";
                callback = pm_op(dev->class->pm, state);
                goto Driver;
        }

        if (dev->bus) {
                if (dev->bus->pm) {
                        info = "bus ";
                        callback = pm_op(dev->bus->pm, state);
                } else if (dev->bus->resume) {
                        info = "legacy bus ";
                        callback = dev->bus->resume;
                        goto End;
                }
        }

 Driver:
        if (!callback && dev->driver && dev->driver->pm) {
                info = "driver ";
                callback = pm_op(dev->driver->pm, state);
        }

 End:
        error = dpm_run_callback(callback, dev, state, info);
        dev->power.is_suspended = false;

 Unlock:
        device_unlock(dev);
        dpm_watchdog_clear(&wd);

 Complete:
        complete_all(&dev->power.completion);

        TRACE_RESUME(error);

        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async" : "", error);
        }
}

static void async_resume(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_resume(dev, pm_transition, true);
        put_device(dev);
}

/**
 * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
 * @state: PM transition of the system being carried out.
 *
 * Execute the appropriate "resume" callback for all devices whose status
 * indicates that they are suspended.
 */
void dpm_resume(pm_message_t state)
{
        struct device *dev;
        ktime_t starttime = ktime_get();

        trace_suspend_resume(TPS("dpm_resume"), state.event, true);
        might_sleep();

        pm_transition = state;
        async_error = 0;

        mutex_lock(&dpm_list_mtx);

        /*
         * Trigger the resume of "async" devices upfront so they don't have to
         * wait for the "non-async" ones they don't depend on.
         */
        list_for_each_entry(dev, &dpm_suspended_list, power.entry)
                dpm_async_fn(dev, async_resume);

        while (!list_empty(&dpm_suspended_list)) {
                dev = to_device(dpm_suspended_list.next);
                list_move_tail(&dev->power.entry, &dpm_prepared_list);

                if (!dev->power.async_in_progress) {
                        get_device(dev);

                        mutex_unlock(&dpm_list_mtx);

                        device_resume(dev, state, false);

                        put_device(dev);

                        mutex_lock(&dpm_list_mtx);
                }
        }
        mutex_unlock(&dpm_list_mtx);
        async_synchronize_full();
        dpm_show_time(starttime, state, 0, NULL);
        if (async_error)
                dpm_save_failed_step(SUSPEND_RESUME);

        cpufreq_resume();
        devfreq_resume();
        trace_suspend_resume(TPS("dpm_resume"), state.event, false);
}

/**
 * device_complete - Complete a PM transition for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 */
static void device_complete(struct device *dev, pm_message_t state)
{
        void (*callback)(struct device *) = NULL;
        const char *info = NULL;

        if (dev->power.syscore)
                goto out;

        device_lock(dev);

        if (dev->pm_domain) {
                info = "completing power domain ";
                callback = dev->pm_domain->ops.complete;
        } else if (dev->type && dev->type->pm) {
                info = "completing type ";
                callback = dev->type->pm->complete;
        } else if (dev->class && dev->class->pm) {
                info = "completing class ";
                callback = dev->class->pm->complete;
        } else if (dev->bus && dev->bus->pm) {
                info = "completing bus ";
                callback = dev->bus->pm->complete;
        }

        if (!callback && dev->driver && dev->driver->pm) {
                info = "completing driver ";
                callback = dev->driver->pm->complete;
        }

        if (callback) {
                pm_dev_dbg(dev, state, info);
                callback(dev);
        }

        device_unlock(dev);

out:
        pm_runtime_put(dev);
}

/**
 * dpm_complete - Complete a PM transition for all non-sysdev devices.
 * @state: PM transition of the system being carried out.
 *
 * Execute the ->complete() callbacks for all devices whose PM status is not
 * DPM_ON (this allows new devices to be registered).
 */
void dpm_complete(pm_message_t state)
{
        struct list_head list;

        trace_suspend_resume(TPS("dpm_complete"), state.event, true);
        might_sleep();

        INIT_LIST_HEAD(&list);
        mutex_lock(&dpm_list_mtx);
        while (!list_empty(&dpm_prepared_list)) {
                struct device *dev = to_device(dpm_prepared_list.prev);

                get_device(dev);
                dev->power.is_prepared = false;
                list_move(&dev->power.entry, &list);

                mutex_unlock(&dpm_list_mtx);

                trace_device_pm_callback_start(dev, "", state.event);
                device_complete(dev, state);
                trace_device_pm_callback_end(dev, 0);

                put_device(dev);

                mutex_lock(&dpm_list_mtx);
        }
        list_splice(&list, &dpm_list);
        mutex_unlock(&dpm_list_mtx);

        /* Allow device probing and trigger re-probing of deferred devices */
        device_unblock_probing();
        trace_suspend_resume(TPS("dpm_complete"), state.event, false);
}

/**
 * dpm_resume_end - Execute "resume" callbacks and complete system transition.
 * @state: PM transition of the system being carried out.
 *
 * Execute "resume" callbacks for all devices and complete the PM transition of
 * the system.
 */
void dpm_resume_end(pm_message_t state)
{
        dpm_resume(state);
        dpm_complete(state);
}
EXPORT_SYMBOL_GPL(dpm_resume_end);


/*------------------------- Suspend routines -------------------------*/

/**
 * resume_event - Return a "resume" message for given "suspend" sleep state.
 * @sleep_state: PM message representing a sleep state.
 *
 * Return a PM message representing the resume event corresponding to given
 * sleep state.
 */
static pm_message_t resume_event(pm_message_t sleep_state)
{
        switch (sleep_state.event) {
        case PM_EVENT_SUSPEND:
                return PMSG_RESUME;
        case PM_EVENT_FREEZE:
        case PM_EVENT_QUIESCE:
                return PMSG_RECOVER;
        case PM_EVENT_HIBERNATE:
                return PMSG_RESTORE;
        }
        return PMSG_ON;
}

static void dpm_superior_set_must_resume(struct device *dev)
{
        struct device_link *link;
        int idx;

        if (dev->parent)
                dev->parent->power.must_resume = true;

        idx = device_links_read_lock();

        list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
                link->supplier->power.must_resume = true;

        device_links_read_unlock(idx);
}

/**
 * device_suspend_noirq - Execute a "noirq suspend" callback for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being suspended asynchronously.
 *
 * The driver of @dev will not receive interrupts while this function is being
 * executed.
 */
static int device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        int error = 0;

        TRACE_DEVICE(dev);
        TRACE_SUSPEND(0);

        dpm_wait_for_subordinate(dev, async);

        if (async_error)
                goto Complete;

        if (dev->power.syscore || dev->power.direct_complete)
                goto Complete;

        if (dev->pm_domain) {
                info = "noirq power domain ";
                callback = pm_noirq_op(&dev->pm_domain->ops, state);
        } else if (dev->type && dev->type->pm) {
                info = "noirq type ";
                callback = pm_noirq_op(dev->type->pm, state);
        } else if (dev->class && dev->class->pm) {
                info = "noirq class ";
                callback = pm_noirq_op(dev->class->pm, state);
        } else if (dev->bus && dev->bus->pm) {
                info = "noirq bus ";
                callback = pm_noirq_op(dev->bus->pm, state);
        }
        if (callback)
                goto Run;

        if (dev_pm_skip_suspend(dev))
                goto Skip;

        if (dev->driver && dev->driver->pm) {
                info = "noirq driver ";
                callback = pm_noirq_op(dev->driver->pm, state);
        }

Run:
        error = dpm_run_callback(callback, dev, state, info);
        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
                goto Complete;
        }

Skip:
        dev->power.is_noirq_suspended = true;

        /*
         * Skipping the resume of devices that were in use right before the
         * system suspend (as indicated by their PM-runtime usage counters)
         * would be suboptimal.  Also resume them if doing that is not allowed
         * to be skipped.
         */
        if (atomic_read(&dev->power.usage_count) > 1 ||
            !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) &&
              dev->power.may_skip_resume))
                dev->power.must_resume = true;

        if (dev->power.must_resume)
                dpm_superior_set_must_resume(dev);

Complete:
        complete_all(&dev->power.completion);
        TRACE_SUSPEND(error);
        return error;
}

static void async_suspend_noirq(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_suspend_noirq(dev, pm_transition, true);
        put_device(dev);
}

static int dpm_noirq_suspend_devices(pm_message_t state)
{
        ktime_t starttime = ktime_get();
        int error = 0;

        trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);

        pm_transition = state;
        async_error = 0;

        mutex_lock(&dpm_list_mtx);

        while (!list_empty(&dpm_late_early_list)) {
                struct device *dev = to_device(dpm_late_early_list.prev);

                list_move(&dev->power.entry, &dpm_noirq_list);

                if (dpm_async_fn(dev, async_suspend_noirq))
                        continue;

                get_device(dev);

                mutex_unlock(&dpm_list_mtx);

                error = device_suspend_noirq(dev, state, false);

                put_device(dev);

                mutex_lock(&dpm_list_mtx);

                if (error || async_error)
                        break;
        }

        mutex_unlock(&dpm_list_mtx);

        async_synchronize_full();
        if (!error)
                error = async_error;

        if (error)
                dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);

        dpm_show_time(starttime, state, error, "noirq");
        trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
        return error;
}

/**
 * dpm_suspend_noirq - Execute "noirq suspend" callbacks for all devices.
 * @state: PM transition of the system being carried out.
 *
 * Prevent device drivers' interrupt handlers from being called and invoke
 * "noirq" suspend callbacks for all non-sysdev devices.
 */
int dpm_suspend_noirq(pm_message_t state)
{
        int ret;

        device_wakeup_arm_wake_irqs();
        suspend_device_irqs();

        ret = dpm_noirq_suspend_devices(state);
        if (ret)
                dpm_resume_noirq(resume_event(state));

        return ret;
}

static void dpm_propagate_wakeup_to_parent(struct device *dev)
{
        struct device *parent = dev->parent;

        if (!parent)
                return;

        spin_lock_irq(&parent->power.lock);

        if (device_wakeup_path(dev) && !parent->power.ignore_children)
                parent->power.wakeup_path = true;

        spin_unlock_irq(&parent->power.lock);
}

/**
 * device_suspend_late - Execute a "late suspend" callback for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being suspended asynchronously.
 *
 * Runtime PM is disabled for @dev while this function is being executed.
 */
static int device_suspend_late(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        int error = 0;

        TRACE_DEVICE(dev);
        TRACE_SUSPEND(0);

        __pm_runtime_disable(dev, false);

        dpm_wait_for_subordinate(dev, async);

        if (async_error)
                goto Complete;

        if (pm_wakeup_pending()) {
                async_error = -EBUSY;
                goto Complete;
        }

        if (dev->power.syscore || dev->power.direct_complete)
                goto Complete;

        if (dev->pm_domain) {
                info = "late power domain ";
                callback = pm_late_early_op(&dev->pm_domain->ops, state);
        } else if (dev->type && dev->type->pm) {
                info = "late type ";
                callback = pm_late_early_op(dev->type->pm, state);
        } else if (dev->class && dev->class->pm) {
                info = "late class ";
                callback = pm_late_early_op(dev->class->pm, state);
        } else if (dev->bus && dev->bus->pm) {
                info = "late bus ";
                callback = pm_late_early_op(dev->bus->pm, state);
        }
        if (callback)
                goto Run;

        if (dev_pm_skip_suspend(dev))
                goto Skip;

        if (dev->driver && dev->driver->pm) {
                info = "late driver ";
                callback = pm_late_early_op(dev->driver->pm, state);
        }

Run:
        error = dpm_run_callback(callback, dev, state, info);
        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async late" : " late", error);
                goto Complete;
        }
        dpm_propagate_wakeup_to_parent(dev);

Skip:
        dev->power.is_late_suspended = true;

Complete:
        TRACE_SUSPEND(error);
        complete_all(&dev->power.completion);
        return error;
}

static void async_suspend_late(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_suspend_late(dev, pm_transition, true);
        put_device(dev);
}

/**
 * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
 * @state: PM transition of the system being carried out.
 */
int dpm_suspend_late(pm_message_t state)
{
        ktime_t starttime = ktime_get();
        int error = 0;

        trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);

        pm_transition = state;
        async_error = 0;

        wake_up_all_idle_cpus();

        mutex_lock(&dpm_list_mtx);

        while (!list_empty(&dpm_suspended_list)) {
                struct device *dev = to_device(dpm_suspended_list.prev);

                list_move(&dev->power.entry, &dpm_late_early_list);

                if (dpm_async_fn(dev, async_suspend_late))
                        continue;

                get_device(dev);

                mutex_unlock(&dpm_list_mtx);

                error = device_suspend_late(dev, state, false);

                put_device(dev);

                mutex_lock(&dpm_list_mtx);

                if (error || async_error)
                        break;
        }

        mutex_unlock(&dpm_list_mtx);

        async_synchronize_full();
        if (!error)
                error = async_error;

        if (error) {
                dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
                dpm_resume_early(resume_event(state));
        }
        dpm_show_time(starttime, state, error, "late");
        trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false);
        return error;
}

/**
 * dpm_suspend_end - Execute "late" and "noirq" device suspend callbacks.
 * @state: PM transition of the system being carried out.
 */
int dpm_suspend_end(pm_message_t state)
{
        ktime_t starttime = ktime_get();
        int error;

        error = dpm_suspend_late(state);
        if (error)
                goto out;

        error = dpm_suspend_noirq(state);
        if (error)
                dpm_resume_early(resume_event(state));

out:
        dpm_show_time(starttime, state, error, "end");
        return error;
}
EXPORT_SYMBOL_GPL(dpm_suspend_end);

/**
 * legacy_suspend - Execute a legacy (bus or class) suspend callback for device.
 * @dev: Device to suspend.
 * @state: PM transition of the system being carried out.
 * @cb: Suspend callback to execute.
 * @info: string description of caller.
 */
static int legacy_suspend(struct device *dev, pm_message_t state,
                          int (*cb)(struct device *dev, pm_message_t state),
                          const char *info)
{
        int error;
        ktime_t calltime;

        calltime = initcall_debug_start(dev, cb);

        trace_device_pm_callback_start(dev, info, state.event);
        error = cb(dev, state);
        trace_device_pm_callback_end(dev, error);
        suspend_report_result(dev, cb, error);

        initcall_debug_report(dev, calltime, cb, error);

        return error;
}

static void dpm_clear_superiors_direct_complete(struct device *dev)
{
        struct device_link *link;
        int idx;

        if (dev->parent) {
                spin_lock_irq(&dev->parent->power.lock);
                dev->parent->power.direct_complete = false;
                spin_unlock_irq(&dev->parent->power.lock);
        }

        idx = device_links_read_lock();

        list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
                spin_lock_irq(&link->supplier->power.lock);
                link->supplier->power.direct_complete = false;
                spin_unlock_irq(&link->supplier->power.lock);
        }

        device_links_read_unlock(idx);
}

/**
 * device_suspend - Execute "suspend" callbacks for given device.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 * @async: If true, the device is being suspended asynchronously.
 */
static int device_suspend(struct device *dev, pm_message_t state, bool async)
{
        pm_callback_t callback = NULL;
        const char *info = NULL;
        int error = 0;
        DECLARE_DPM_WATCHDOG_ON_STACK(wd);

        TRACE_DEVICE(dev);
        TRACE_SUSPEND(0);

        dpm_wait_for_subordinate(dev, async);

        if (async_error) {
                dev->power.direct_complete = false;
                goto Complete;
        }

        /*
         * Wait for possible runtime PM transitions of the device in progress
         * to complete and if there's a runtime resume request pending for it,
         * resume it before proceeding with invoking the system-wide suspend
         * callbacks for it.
         *
         * If the system-wide suspend callbacks below change the configuration
         * of the device, they must disable runtime PM for it or otherwise
         * ensure that its runtime-resume callbacks will not be confused by that
         * change in case they are invoked going forward.
         */
        pm_runtime_barrier(dev);

        if (pm_wakeup_pending()) {
                dev->power.direct_complete = false;
                async_error = -EBUSY;
                goto Complete;
        }

        if (dev->power.syscore)
                goto Complete;

        /* Avoid direct_complete to let wakeup_path propagate. */
        if (device_may_wakeup(dev) || device_wakeup_path(dev))
                dev->power.direct_complete = false;

        if (dev->power.direct_complete) {
                if (pm_runtime_status_suspended(dev)) {
                        pm_runtime_disable(dev);
                        if (pm_runtime_status_suspended(dev)) {
                                pm_dev_dbg(dev, state, "direct-complete ");
                                goto Complete;
                        }

                        pm_runtime_enable(dev);
                }
                dev->power.direct_complete = false;
        }

        dev->power.may_skip_resume = true;
        dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);

        dpm_watchdog_set(&wd, dev);
        device_lock(dev);

        if (dev->pm_domain) {
                info = "power domain ";
                callback = pm_op(&dev->pm_domain->ops, state);
                goto Run;
        }

        if (dev->type && dev->type->pm) {
                info = "type ";
                callback = pm_op(dev->type->pm, state);
                goto Run;
        }

        if (dev->class && dev->class->pm) {
                info = "class ";
                callback = pm_op(dev->class->pm, state);
                goto Run;
        }

        if (dev->bus) {
                if (dev->bus->pm) {
                        info = "bus ";
                        callback = pm_op(dev->bus->pm, state);
                } else if (dev->bus->suspend) {
                        pm_dev_dbg(dev, state, "legacy bus ");
                        error = legacy_suspend(dev, state, dev->bus->suspend,
                                                "legacy bus ");
                        goto End;
                }
        }

 Run:
        if (!callback && dev->driver && dev->driver->pm) {
                info = "driver ";
                callback = pm_op(dev->driver->pm, state);
        }

        error = dpm_run_callback(callback, dev, state, info);

 End:
        if (!error) {
                dev->power.is_suspended = true;
                if (device_may_wakeup(dev))
                        dev->power.wakeup_path = true;

                dpm_propagate_wakeup_to_parent(dev);
                dpm_clear_superiors_direct_complete(dev);
        }

        device_unlock(dev);
        dpm_watchdog_clear(&wd);

 Complete:
        if (error) {
                async_error = error;
                dpm_save_failed_dev(dev_name(dev));
                pm_dev_err(dev, state, async ? " async" : "", error);
        }

        complete_all(&dev->power.completion);
        TRACE_SUSPEND(error);
        return error;
}

static void async_suspend(void *data, async_cookie_t cookie)
{
        struct device *dev = data;

        device_suspend(dev, pm_transition, true);
        put_device(dev);
}

/**
 * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
 * @state: PM transition of the system being carried out.
 */
int dpm_suspend(pm_message_t state)
{
        ktime_t starttime = ktime_get();
        int error = 0;

        trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
        might_sleep();

        devfreq_suspend();
        cpufreq_suspend();

        pm_transition = state;
        async_error = 0;

        mutex_lock(&dpm_list_mtx);

        while (!list_empty(&dpm_prepared_list)) {
                struct device *dev = to_device(dpm_prepared_list.prev);

                list_move(&dev->power.entry, &dpm_suspended_list);

                if (dpm_async_fn(dev, async_suspend))
                        continue;

                get_device(dev);

                mutex_unlock(&dpm_list_mtx);

                error = device_suspend(dev, state, false);

                put_device(dev);

                mutex_lock(&dpm_list_mtx);

                if (error || async_error)
                        break;
        }

        mutex_unlock(&dpm_list_mtx);

        async_synchronize_full();
        if (!error)
                error = async_error;

        if (error)
                dpm_save_failed_step(SUSPEND_SUSPEND);

        dpm_show_time(starttime, state, error, NULL);
        trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
        return error;
}

/**
 * device_prepare - Prepare a device for system power transition.
 * @dev: Device to handle.
 * @state: PM transition of the system being carried out.
 *
 * Execute the ->prepare() callback(s) for given device.  No new children of the
 * device may be registered after this function has returned.
 */
static int device_prepare(struct device *dev, pm_message_t state)
{
        int (*callback)(struct device *) = NULL;
        int ret = 0;

        /*
         * If a device's parent goes into runtime suspend at the wrong time,
         * it won't be possible to resume the device.  To prevent this we
         * block runtime suspend here, during the prepare phase, and allow
         * it again during the complete phase.
         */
        pm_runtime_get_noresume(dev);

        if (dev->power.syscore)
                return 0;

        device_lock(dev);

        dev->power.wakeup_path = false;

        if (dev->power.no_pm_callbacks)
                goto unlock;

        if (dev->pm_domain)
                callback = dev->pm_domain->ops.prepare;
        else if (dev->type && dev->type->pm)
                callback = dev->type->pm->prepare;
        else if (dev->class && dev->class->pm)
                callback = dev->class->pm->prepare;
        else if (dev->bus && dev->bus->pm)
                callback = dev->bus->pm->prepare;

        if (!callback && dev->driver && dev->driver->pm)
                callback = dev->driver->pm->prepare;

        if (callback)
                ret = callback(dev);

unlock:
        device_unlock(dev);

        if (ret < 0) {
                suspend_report_result(dev, callback, ret);
                pm_runtime_put(dev);
                return ret;
        }
        /*
         * A positive return value from ->prepare() means "this device appears
         * to be runtime-suspended and its state is fine, so if it really is
         * runtime-suspended, you can leave it in that state provided that you
         * will do the same thing with all of its descendants".  This only
         * applies to suspend transitions, however.
         */
        spin_lock_irq(&dev->power.lock);
        dev->power.direct_complete = state.event == PM_EVENT_SUSPEND &&
                (ret > 0 || dev->power.no_pm_callbacks) &&
                !dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE);
        spin_unlock_irq(&dev->power.lock);
        return 0;
}

/**
 * dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
 * @state: PM transition of the system being carried out.
 *
 * Execute the ->prepare() callback(s) for all devices.
 */
int dpm_prepare(pm_message_t state)
{
        int error = 0;

        trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
        might_sleep();

        /*
         * Give a chance for the known devices to complete their probes, before
         * disable probing of devices. This sync point is important at least
         * at boot time + hibernation restore.
         */
        wait_for_device_probe();
        /*
         * It is unsafe if probing of devices will happen during suspend or
         * hibernation and system behavior will be unpredictable in this case.
         * So, let's prohibit device's probing here and defer their probes
         * instead. The normal behavior will be restored in dpm_complete().
         */
        device_block_probing();

        mutex_lock(&dpm_list_mtx);
        while (!list_empty(&dpm_list) && !error) {
                struct device *dev = to_device(dpm_list.next);

                get_device(dev);

                mutex_unlock(&dpm_list_mtx);

                trace_device_pm_callback_start(dev, "", state.event);
                error = device_prepare(dev, state);
                trace_device_pm_callback_end(dev, error);

                mutex_lock(&dpm_list_mtx);

                if (!error) {
                        dev->power.is_prepared = true;
                        if (!list_empty(&dev->power.entry))
                                list_move_tail(&dev->power.entry, &dpm_prepared_list);
                } else if (error == -EAGAIN) {
                        error = 0;
                } else {
                        dev_info(dev, "not prepared for power transition: code %d\n",
                                 error);
                }

                mutex_unlock(&dpm_list_mtx);

                put_device(dev);

                mutex_lock(&dpm_list_mtx);
        }
        mutex_unlock(&dpm_list_mtx);
        trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
        return error;
}

/**
 * dpm_suspend_start - Prepare devices for PM transition and suspend them.
 * @state: PM transition of the system being carried out.
 *
 * Prepare all non-sysdev devices for system PM transition and execute "suspend"
 * callbacks for them.
 */
int dpm_suspend_start(pm_message_t state)
{
        ktime_t starttime = ktime_get();
        int error;

        error = dpm_prepare(state);
        if (error)
                dpm_save_failed_step(SUSPEND_PREPARE);
        else
                error = dpm_suspend(state);

        dpm_show_time(starttime, state, error, "start");
        return error;
}
EXPORT_SYMBOL_GPL(dpm_suspend_start);

void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret)
{
        if (ret)
                dev_err(dev, "%s(): %pS returns %d\n", function, fn, ret);
}
EXPORT_SYMBOL_GPL(__suspend_report_result);

/**
 * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete.
 * @subordinate: Device that needs to wait for @dev.
 * @dev: Device to wait for.
 */
int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
{
        dpm_wait(dev, subordinate->power.async_suspend);
        return async_error;
}
EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);

/**
 * dpm_for_each_dev - device iterator.
 * @data: data for the callback.
 * @fn: function to be called for each device.
 *
 * Iterate over devices in dpm_list, and call @fn for each device,
 * passing it @data.
 */
void dpm_for_each_dev(void *data, void (*fn)(struct device *, void *))
{
        struct device *dev;

        if (!fn)
                return;

        device_pm_lock();
        list_for_each_entry(dev, &dpm_list, power.entry)
                fn(dev, data);
        device_pm_unlock();
}
EXPORT_SYMBOL_GPL(dpm_for_each_dev);

static bool pm_ops_is_empty(const struct dev_pm_ops *ops)
{
        if (!ops)
                return true;

        return !ops->prepare &&
               !ops->suspend &&
               !ops->suspend_late &&
               !ops->suspend_noirq &&
               !ops->resume_noirq &&
               !ops->resume_early &&
               !ops->resume &&
               !ops->complete;
}

void device_pm_check_callbacks(struct device *dev)
{
        unsigned long flags;

        spin_lock_irqsave(&dev->power.lock, flags);
        dev->power.no_pm_callbacks =
                (!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
                 !dev->bus->suspend && !dev->bus->resume)) &&
                (!dev->class || pm_ops_is_empty(dev->class->pm)) &&
                (!dev->type || pm_ops_is_empty(dev->type->pm)) &&
                (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
                (!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
                 !dev->driver->suspend && !dev->driver->resume));
        spin_unlock_irqrestore(&dev->power.lock, flags);
}

bool dev_pm_skip_suspend(struct device *dev)
{
        return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) &&
                pm_runtime_status_suspended(dev);
}
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
















































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for multitouch panels
 *
 *  Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr>
 *  Copyright (c) 2010-2013 Benjamin Tissoires <benjamin.tissoires@gmail.com>
 *  Copyright (c) 2010-2012 Ecole Nationale de l'Aviation Civile, France
 *  Copyright (c) 2012-2013 Red Hat, Inc
 *
 *  This code is partly based on hid-egalax.c:
 *
 *  Copyright (c) 2010 Stephane Chatty <chatty@enac.fr>
 *  Copyright (c) 2010 Henrik Rydberg <rydberg@euromail.se>
 *  Copyright (c) 2010 Canonical, Ltd.
 *
 *  This code is partly based on hid-3m-pct.c:
 *
 *  Copyright (c) 2009-2010 Stephane Chatty <chatty@enac.fr>
 *  Copyright (c) 2010      Henrik Rydberg <rydberg@euromail.se>
 *  Copyright (c) 2010      Canonical, Ltd.
 */

/*
 */

/*
 * This driver is regularly tested thanks to the test suite in hid-tools[1].
 * Please run these regression tests before patching this module so that
 * your patch won't break existing known devices.
 *
 * [1] https://gitlab.freedesktop.org/libevdev/hid-tools
 */

#include <linux/device.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/input/mt.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/timer.h>


MODULE_AUTHOR("Stephane Chatty <chatty@enac.fr>");
MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
MODULE_DESCRIPTION("HID multitouch panels");
MODULE_LICENSE("GPL");

#include "hid-ids.h"

/* quirks to control the device */
#define MT_QUIRK_NOT_SEEN_MEANS_UP        BIT(0)
#define MT_QUIRK_SLOT_IS_CONTACTID        BIT(1)
#define MT_QUIRK_CYPRESS                BIT(2)
#define MT_QUIRK_SLOT_IS_CONTACTNUMBER        BIT(3)
#define MT_QUIRK_ALWAYS_VALID                BIT(4)
#define MT_QUIRK_VALID_IS_INRANGE        BIT(5)
#define MT_QUIRK_VALID_IS_CONFIDENCE        BIT(6)
#define MT_QUIRK_CONFIDENCE                BIT(7)
#define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE        BIT(8)
#define MT_QUIRK_NO_AREA                BIT(9)
#define MT_QUIRK_IGNORE_DUPLICATES        BIT(10)
#define MT_QUIRK_HOVERING                BIT(11)
#define MT_QUIRK_CONTACT_CNT_ACCURATE        BIT(12)
#define MT_QUIRK_FORCE_GET_FEATURE        BIT(13)
#define MT_QUIRK_FIX_CONST_CONTACT_ID        BIT(14)
#define MT_QUIRK_TOUCH_SIZE_SCALING        BIT(15)
#define MT_QUIRK_STICKY_FINGERS                BIT(16)
#define MT_QUIRK_ASUS_CUSTOM_UP                BIT(17)
#define MT_QUIRK_WIN8_PTP_BUTTONS        BIT(18)
#define MT_QUIRK_SEPARATE_APP_REPORT        BIT(19)
#define MT_QUIRK_FORCE_MULTI_INPUT        BIT(20)
#define MT_QUIRK_DISABLE_WAKEUP                BIT(21)
#define MT_QUIRK_ORIENTATION_INVERT        BIT(22)

#define MT_INPUTMODE_TOUCHSCREEN        0x02
#define MT_INPUTMODE_TOUCHPAD                0x03

#define MT_BUTTONTYPE_CLICKPAD                0

enum latency_mode {
        HID_LATENCY_NORMAL = 0,
        HID_LATENCY_HIGH = 1,
};

#define MT_IO_FLAGS_RUNNING                0
#define MT_IO_FLAGS_ACTIVE_SLOTS        1
#define MT_IO_FLAGS_PENDING_SLOTS        2

static const bool mtrue = true;                /* default for true */
static const bool mfalse;                /* default for false */
static const __s32 mzero;                /* default for 0 */

#define DEFAULT_TRUE        ((void *)&mtrue)
#define DEFAULT_FALSE        ((void *)&mfalse)
#define DEFAULT_ZERO        ((void *)&mzero)

struct mt_usages {
        struct list_head list;
        __s32 *x, *y, *cx, *cy, *p, *w, *h, *a;
        __s32 *contactid;        /* the device ContactID assigned to this slot */
        bool *tip_state;        /* is the touch valid? */
        bool *inrange_state;        /* is the finger in proximity of the sensor? */
        bool *confidence_state;        /* is the touch made by a finger? */
};

struct mt_application {
        struct list_head list;
        unsigned int application;
        unsigned int report_id;
        struct list_head mt_usages;        /* mt usages list */

        __s32 quirks;

        __s32 *scantime;                /* scantime reported */
        __s32 scantime_logical_max;        /* max value for raw scantime */

        __s32 *raw_cc;                        /* contact count in the report */
        int left_button_state;                /* left button state */
        unsigned int mt_flags;                /* flags to pass to input-mt */

        unsigned long *pending_palm_slots;        /* slots where we reported palm
                                                 * and need to release */

        __u8 num_received;        /* how many contacts we received */
        __u8 num_expected;        /* expected last contact index */
        __u8 buttons_count;        /* number of physical buttons per touchpad */
        __u8 touches_by_report;        /* how many touches are present in one report:
                                 * 1 means we should use a serial protocol
                                 * > 1 means hybrid (multitouch) protocol
                                 */

        unsigned long jiffies;        /* the frame's jiffies */
        int timestamp;                /* the timestamp to be sent */
        int prev_scantime;                /* scantime reported previously */

        bool have_contact_count;
};

struct mt_class {
        __s32 name;        /* MT_CLS */
        __s32 quirks;
        __s32 sn_move;        /* Signal/noise ratio for move events */
        __s32 sn_width;        /* Signal/noise ratio for width events */
        __s32 sn_height;        /* Signal/noise ratio for height events */
        __s32 sn_pressure;        /* Signal/noise ratio for pressure events */
        __u8 maxcontacts;
        bool is_indirect;        /* true for touchpads */
        bool export_all_inputs;        /* do not ignore mouse, keyboards, etc... */
};

struct mt_report_data {
        struct list_head list;
        struct hid_report *report;
        struct mt_application *application;
        bool is_mt_collection;
};

struct mt_device {
        struct mt_class mtclass;        /* our mt device class */
        struct timer_list release_timer;        /* to release sticky fingers */
        struct hid_device *hdev;        /* hid_device we're attached to */
        unsigned long mt_io_flags;        /* mt flags (MT_IO_FLAGS_*) */
        __u8 inputmode_value;        /* InputMode HID feature value */
        __u8 maxcontacts;
        bool is_buttonpad;        /* is this device a button pad? */
        bool serial_maybe;        /* need to check for serial protocol */

        struct list_head applications;
        struct list_head reports;
};

static void mt_post_parse_default_settings(struct mt_device *td,
                                           struct mt_application *app);
static void mt_post_parse(struct mt_device *td, struct mt_application *app);

/* classes of device behavior */
#define MT_CLS_DEFAULT                                0x0001

#define MT_CLS_SERIAL                                0x0002
#define MT_CLS_CONFIDENCE                        0x0003
#define MT_CLS_CONFIDENCE_CONTACT_ID                0x0004
#define MT_CLS_CONFIDENCE_MINUS_ONE                0x0005
#define MT_CLS_DUAL_INRANGE_CONTACTID                0x0006
#define MT_CLS_DUAL_INRANGE_CONTACTNUMBER        0x0007
/* reserved                                        0x0008 */
#define MT_CLS_INRANGE_CONTACTNUMBER                0x0009
#define MT_CLS_NSMU                                0x000a
/* reserved                                        0x0010 */
/* reserved                                        0x0011 */
#define MT_CLS_WIN_8                                0x0012
#define MT_CLS_EXPORT_ALL_INPUTS                0x0013
/* reserved                                        0x0014 */
#define MT_CLS_WIN_8_FORCE_MULTI_INPUT                0x0015
#define MT_CLS_WIN_8_DISABLE_WAKEUP                0x0016
#define MT_CLS_WIN_8_NO_STICKY_FINGERS                0x0017
#define MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU        0x0018

/* vendor specific classes */
#define MT_CLS_3M                                0x0101
/* reserved                                        0x0102 */
#define MT_CLS_EGALAX                                0x0103
#define MT_CLS_EGALAX_SERIAL                        0x0104
#define MT_CLS_TOPSEED                                0x0105
#define MT_CLS_PANASONIC                        0x0106
#define MT_CLS_FLATFROG                                0x0107
#define MT_CLS_GENERALTOUCH_TWOFINGERS                0x0108
#define MT_CLS_GENERALTOUCH_PWT_TENFINGERS        0x0109
#define MT_CLS_LG                                0x010a
#define MT_CLS_ASUS                                0x010b
#define MT_CLS_VTL                                0x0110
#define MT_CLS_GOOGLE                                0x0111
#define MT_CLS_RAZER_BLADE_STEALTH                0x0112
#define MT_CLS_SMART_TECH                        0x0113

#define MT_DEFAULT_MAXCONTACT        10
#define MT_MAX_MAXCONTACT        250

/*
 * Resync device and local timestamps after that many microseconds without
 * receiving data.
 */
#define MAX_TIMESTAMP_INTERVAL        1000000

#define MT_USB_DEVICE(v, p)        HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH, v, p)
#define MT_BT_DEVICE(v, p)        HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_MULTITOUCH, v, p)

/*
 * these device-dependent functions determine what slot corresponds
 * to a valid contact that was just read.
 */

static int cypress_compute_slot(struct mt_application *application,
                                struct mt_usages *slot)
{
        if (*slot->contactid != 0 || application->num_received == 0)
                return *slot->contactid;
        else
                return -1;
}

static const struct mt_class mt_classes[] = {
        { .name = MT_CLS_DEFAULT,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE },
        { .name = MT_CLS_NSMU,
                .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP },
        { .name = MT_CLS_SERIAL,
                .quirks = MT_QUIRK_ALWAYS_VALID},
        { .name = MT_CLS_CONFIDENCE,
                .quirks = MT_QUIRK_VALID_IS_CONFIDENCE },
        { .name = MT_CLS_CONFIDENCE_CONTACT_ID,
                .quirks = MT_QUIRK_VALID_IS_CONFIDENCE |
                        MT_QUIRK_SLOT_IS_CONTACTID },
        { .name = MT_CLS_CONFIDENCE_MINUS_ONE,
                .quirks = MT_QUIRK_VALID_IS_CONFIDENCE |
                        MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE },
        { .name = MT_CLS_DUAL_INRANGE_CONTACTID,
                .quirks = MT_QUIRK_VALID_IS_INRANGE |
                        MT_QUIRK_SLOT_IS_CONTACTID,
                .maxcontacts = 2 },
        { .name = MT_CLS_DUAL_INRANGE_CONTACTNUMBER,
                .quirks = MT_QUIRK_VALID_IS_INRANGE |
                        MT_QUIRK_SLOT_IS_CONTACTNUMBER,
                .maxcontacts = 2 },
        { .name = MT_CLS_INRANGE_CONTACTNUMBER,
                .quirks = MT_QUIRK_VALID_IS_INRANGE |
                        MT_QUIRK_SLOT_IS_CONTACTNUMBER },
        { .name = MT_CLS_WIN_8,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_STICKY_FINGERS |
                        MT_QUIRK_WIN8_PTP_BUTTONS,
                .export_all_inputs = true },
        { .name = MT_CLS_EXPORT_ALL_INPUTS,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE,
                .export_all_inputs = true },
        { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_STICKY_FINGERS |
                        MT_QUIRK_WIN8_PTP_BUTTONS |
                        MT_QUIRK_FORCE_MULTI_INPUT,
                .export_all_inputs = true },
        { .name = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
                .quirks = MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_STICKY_FINGERS |
                        MT_QUIRK_WIN8_PTP_BUTTONS |
                        MT_QUIRK_FORCE_MULTI_INPUT |
                        MT_QUIRK_NOT_SEEN_MEANS_UP,
                .export_all_inputs = true },
        { .name = MT_CLS_WIN_8_DISABLE_WAKEUP,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_STICKY_FINGERS |
                        MT_QUIRK_WIN8_PTP_BUTTONS |
                        MT_QUIRK_DISABLE_WAKEUP,
                .export_all_inputs = true },
        { .name = MT_CLS_WIN_8_NO_STICKY_FINGERS,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_WIN8_PTP_BUTTONS,
                .export_all_inputs = true },

        /*
         * vendor specific classes
         */
        { .name = MT_CLS_3M,
                .quirks = MT_QUIRK_VALID_IS_CONFIDENCE |
                        MT_QUIRK_SLOT_IS_CONTACTID |
                        MT_QUIRK_TOUCH_SIZE_SCALING,
                .sn_move = 2048,
                .sn_width = 128,
                .sn_height = 128,
                .maxcontacts = 60,
        },
        { .name = MT_CLS_EGALAX,
                .quirks =  MT_QUIRK_SLOT_IS_CONTACTID |
                        MT_QUIRK_VALID_IS_INRANGE,
                .sn_move = 4096,
                .sn_pressure = 32,
        },
        { .name = MT_CLS_EGALAX_SERIAL,
                .quirks =  MT_QUIRK_SLOT_IS_CONTACTID |
                        MT_QUIRK_ALWAYS_VALID,
                .sn_move = 4096,
                .sn_pressure = 32,
        },
        { .name = MT_CLS_TOPSEED,
                .quirks = MT_QUIRK_ALWAYS_VALID,
                .is_indirect = true,
                .maxcontacts = 2,
        },
        { .name = MT_CLS_PANASONIC,
                .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP,
                .maxcontacts = 4 },
        { .name        = MT_CLS_GENERALTOUCH_TWOFINGERS,
                .quirks        = MT_QUIRK_NOT_SEEN_MEANS_UP |
                        MT_QUIRK_VALID_IS_INRANGE |
                        MT_QUIRK_SLOT_IS_CONTACTID,
                .maxcontacts = 2
        },
        { .name        = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                .quirks        = MT_QUIRK_NOT_SEEN_MEANS_UP |
                        MT_QUIRK_SLOT_IS_CONTACTID
        },

        { .name = MT_CLS_FLATFROG,
                .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP |
                        MT_QUIRK_NO_AREA,
                .sn_move = 2048,
                .maxcontacts = 40,
        },
        { .name = MT_CLS_LG,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_FIX_CONST_CONTACT_ID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE },
        { .name = MT_CLS_ASUS,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_ASUS_CUSTOM_UP },
        { .name = MT_CLS_VTL,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_FORCE_GET_FEATURE,
        },
        { .name = MT_CLS_GOOGLE,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_SLOT_IS_CONTACTID |
                        MT_QUIRK_HOVERING
        },
        { .name = MT_CLS_RAZER_BLADE_STEALTH,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_HOVERING |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_WIN8_PTP_BUTTONS,
        },
        { .name = MT_CLS_SMART_TECH,
                .quirks = MT_QUIRK_ALWAYS_VALID |
                        MT_QUIRK_IGNORE_DUPLICATES |
                        MT_QUIRK_CONTACT_CNT_ACCURATE |
                        MT_QUIRK_SEPARATE_APP_REPORT,
        },
        { }
};

static ssize_t mt_show_quirks(struct device *dev,
                           struct device_attribute *attr,
                           char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct mt_device *td = hid_get_drvdata(hdev);

        return sprintf(buf, "%u\n", td->mtclass.quirks);
}

static ssize_t mt_set_quirks(struct device *dev,
                          struct device_attribute *attr,
                          const char *buf, size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_application *application;

        unsigned long val;

        if (kstrtoul(buf, 0, &val))
                return -EINVAL;

        td->mtclass.quirks = val;

        list_for_each_entry(application, &td->applications, list) {
                application->quirks = val;
                if (!application->have_contact_count)
                        application->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
        }

        return count;
}

static DEVICE_ATTR(quirks, S_IWUSR | S_IRUGO, mt_show_quirks, mt_set_quirks);

static struct attribute *sysfs_attrs[] = {
        &dev_attr_quirks.attr,
        NULL
};

static const struct attribute_group mt_attribute_group = {
        .attrs = sysfs_attrs
};

static void mt_get_feature(struct hid_device *hdev, struct hid_report *report)
{
        int ret;
        u32 size = hid_report_len(report);
        u8 *buf;

        /*
         * Do not fetch the feature report if the device has been explicitly
         * marked as non-capable.
         */
        if (hdev->quirks & HID_QUIRK_NO_INIT_REPORTS)
                return;

        buf = hid_alloc_report_buf(report, GFP_KERNEL);
        if (!buf)
                return;

        ret = hid_hw_raw_request(hdev, report->id, buf, size,
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0) {
                dev_warn(&hdev->dev, "failed to fetch feature %d\n",
                         report->id);
        } else {
                ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
                                           size, 0);
                if (ret)
                        dev_warn(&hdev->dev, "failed to report feature\n");
        }

        kfree(buf);
}

static void mt_feature_mapping(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage)
{
        struct mt_device *td = hid_get_drvdata(hdev);

        switch (usage->hid) {
        case HID_DG_CONTACTMAX:
                mt_get_feature(hdev, field->report);

                td->maxcontacts = field->value[0];
                if (!td->maxcontacts &&
                    field->logical_maximum <= MT_MAX_MAXCONTACT)
                        td->maxcontacts = field->logical_maximum;
                if (td->mtclass.maxcontacts)
                        /* check if the maxcontacts is given by the class */
                        td->maxcontacts = td->mtclass.maxcontacts;

                break;
        case HID_DG_BUTTONTYPE:
                if (usage->usage_index >= field->report_count) {
                        dev_err(&hdev->dev, "HID_DG_BUTTONTYPE out of range\n");
                        break;
                }

                mt_get_feature(hdev, field->report);
                if (field->value[usage->usage_index] == MT_BUTTONTYPE_CLICKPAD)
                        td->is_buttonpad = true;

                break;
        case 0xff0000c5:
                /* Retrieve the Win8 blob once to enable some devices */
                if (usage->usage_index == 0)
                        mt_get_feature(hdev, field->report);
                break;
        }
}

static void set_abs(struct input_dev *input, unsigned int code,
                struct hid_field *field, int snratio)
{
        int fmin = field->logical_minimum;
        int fmax = field->logical_maximum;
        int fuzz = snratio ? (fmax - fmin) / snratio : 0;
        input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
        input_abs_set_res(input, code, hidinput_calc_abs_res(field, code));
}

static struct mt_usages *mt_allocate_usage(struct hid_device *hdev,
                                           struct mt_application *application)
{
        struct mt_usages *usage;

        usage = devm_kzalloc(&hdev->dev, sizeof(*usage), GFP_KERNEL);
        if (!usage)
                return NULL;

        /* set some defaults so we do not need to check for null pointers */
        usage->x = DEFAULT_ZERO;
        usage->y = DEFAULT_ZERO;
        usage->cx = DEFAULT_ZERO;
        usage->cy = DEFAULT_ZERO;
        usage->p = DEFAULT_ZERO;
        usage->w = DEFAULT_ZERO;
        usage->h = DEFAULT_ZERO;
        usage->a = DEFAULT_ZERO;
        usage->contactid = DEFAULT_ZERO;
        usage->tip_state = DEFAULT_FALSE;
        usage->inrange_state = DEFAULT_FALSE;
        usage->confidence_state = DEFAULT_TRUE;

        list_add_tail(&usage->list, &application->mt_usages);

        return usage;
}

static struct mt_application *mt_allocate_application(struct mt_device *td,
                                                      struct hid_report *report)
{
        unsigned int application = report->application;
        struct mt_application *mt_application;

        mt_application = devm_kzalloc(&td->hdev->dev, sizeof(*mt_application),
                                      GFP_KERNEL);
        if (!mt_application)
                return NULL;

        mt_application->application = application;
        INIT_LIST_HEAD(&mt_application->mt_usages);

        if (application == HID_DG_TOUCHSCREEN)
                mt_application->mt_flags |= INPUT_MT_DIRECT;

        /*
         * Model touchscreens providing buttons as touchpads.
         */
        if (application == HID_DG_TOUCHPAD) {
                mt_application->mt_flags |= INPUT_MT_POINTER;
                td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
        }

        mt_application->scantime = DEFAULT_ZERO;
        mt_application->raw_cc = DEFAULT_ZERO;
        mt_application->quirks = td->mtclass.quirks;
        mt_application->report_id = report->id;

        list_add_tail(&mt_application->list, &td->applications);

        return mt_application;
}

static struct mt_application *mt_find_application(struct mt_device *td,
                                                  struct hid_report *report)
{
        unsigned int application = report->application;
        struct mt_application *tmp, *mt_application = NULL;

        list_for_each_entry(tmp, &td->applications, list) {
                if (application == tmp->application) {
                        if (!(td->mtclass.quirks & MT_QUIRK_SEPARATE_APP_REPORT) ||
                            tmp->report_id == report->id) {
                                mt_application = tmp;
                                break;
                        }
                }
        }

        if (!mt_application)
                mt_application = mt_allocate_application(td, report);

        return mt_application;
}

static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
                                                      struct hid_report *report)
{
        struct mt_report_data *rdata;
        struct hid_field *field;
        int r, n;

        rdata = devm_kzalloc(&td->hdev->dev, sizeof(*rdata), GFP_KERNEL);
        if (!rdata)
                return NULL;

        rdata->report = report;
        rdata->application = mt_find_application(td, report);

        if (!rdata->application) {
                devm_kfree(&td->hdev->dev, rdata);
                return NULL;
        }

        for (r = 0; r < report->maxfield; r++) {
                field = report->field[r];

                if (!(HID_MAIN_ITEM_VARIABLE & field->flags))
                        continue;

                if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) {
                        for (n = 0; n < field->report_count; n++) {
                                if (field->usage[n].hid == HID_DG_CONTACTID) {
                                        rdata->is_mt_collection = true;
                                        break;
                                }
                        }
                }
        }

        list_add_tail(&rdata->list, &td->reports);

        return rdata;
}

static struct mt_report_data *mt_find_report_data(struct mt_device *td,
                                                  struct hid_report *report)
{
        struct mt_report_data *tmp, *rdata = NULL;

        list_for_each_entry(tmp, &td->reports, list) {
                if (report == tmp->report) {
                        rdata = tmp;
                        break;
                }
        }

        if (!rdata)
                rdata = mt_allocate_report_data(td, report);

        return rdata;
}

static void mt_store_field(struct hid_device *hdev,
                           struct mt_application *application,
                           __s32 *value,
                           size_t offset)
{
        struct mt_usages *usage;
        __s32 **target;

        if (list_empty(&application->mt_usages))
                usage = mt_allocate_usage(hdev, application);
        else
                usage = list_last_entry(&application->mt_usages,
                                        struct mt_usages,
                                        list);

        if (!usage)
                return;

        target = (__s32 **)((char *)usage + offset);

        /* the value has already been filled, create a new slot */
        if (*target != DEFAULT_TRUE &&
            *target != DEFAULT_FALSE &&
            *target != DEFAULT_ZERO) {
                if (usage->contactid == DEFAULT_ZERO ||
                    usage->x == DEFAULT_ZERO ||
                    usage->y == DEFAULT_ZERO) {
                        hid_dbg(hdev,
                                "ignoring duplicate usage on incomplete");
                        return;
                }
                usage = mt_allocate_usage(hdev, application);
                if (!usage)
                        return;

                target = (__s32 **)((char *)usage + offset);
        }

        *target = value;
}

#define MT_STORE_FIELD(__name)                                                \
        mt_store_field(hdev, app,                                        \
                       &field->value[usage->usage_index],                \
                       offsetof(struct mt_usages, __name))

static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max, struct mt_application *app)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_class *cls = &td->mtclass;
        int code;
        struct hid_usage *prev_usage = NULL;

        /*
         * Model touchscreens providing buttons as touchpads.
         */
        if (field->application == HID_DG_TOUCHSCREEN &&
            (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
                app->mt_flags |= INPUT_MT_POINTER;
                td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
        }

        /* count the buttons on touchpads */
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON)
                app->buttons_count++;

        if (usage->usage_index)
                prev_usage = &field->usage[usage->usage_index - 1];

        switch (usage->hid & HID_USAGE_PAGE) {

        case HID_UP_GENDESK:
                switch (usage->hid) {
                case HID_GD_X:
                        if (prev_usage && (prev_usage->hid == usage->hid)) {
                                code = ABS_MT_TOOL_X;
                                MT_STORE_FIELD(cx);
                        } else {
                                code = ABS_MT_POSITION_X;
                                MT_STORE_FIELD(x);
                        }

                        set_abs(hi->input, code, field, cls->sn_move);

                        /*
                         * A system multi-axis that exports X and Y has a high
                         * chance of being used directly on a surface
                         */
                        if (field->application == HID_GD_SYSTEM_MULTIAXIS) {
                                __set_bit(INPUT_PROP_DIRECT,
                                          hi->input->propbit);
                                input_set_abs_params(hi->input,
                                                     ABS_MT_TOOL_TYPE,
                                                     MT_TOOL_DIAL,
                                                     MT_TOOL_DIAL, 0, 0);
                        }

                        return 1;
                case HID_GD_Y:
                        if (prev_usage && (prev_usage->hid == usage->hid)) {
                                code = ABS_MT_TOOL_Y;
                                MT_STORE_FIELD(cy);
                        } else {
                                code = ABS_MT_POSITION_Y;
                                MT_STORE_FIELD(y);
                        }

                        set_abs(hi->input, code, field, cls->sn_move);

                        return 1;
                }
                return 0;

        case HID_UP_DIGITIZER:
                switch (usage->hid) {
                case HID_DG_INRANGE:
                        if (app->quirks & MT_QUIRK_HOVERING) {
                                input_set_abs_params(hi->input,
                                        ABS_MT_DISTANCE, 0, 1, 0, 0);
                        }
                        MT_STORE_FIELD(inrange_state);
                        return 1;
                case HID_DG_CONFIDENCE:
                        if ((cls->name == MT_CLS_WIN_8 ||
                             cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT ||
                             cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU ||
                             cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) &&
                                (field->application == HID_DG_TOUCHPAD ||
                                 field->application == HID_DG_TOUCHSCREEN))
                                app->quirks |= MT_QUIRK_CONFIDENCE;

                        if (app->quirks & MT_QUIRK_CONFIDENCE)
                                input_set_abs_params(hi->input,
                                                     ABS_MT_TOOL_TYPE,
                                                     MT_TOOL_FINGER,
                                                     MT_TOOL_PALM, 0, 0);

                        MT_STORE_FIELD(confidence_state);
                        return 1;
                case HID_DG_TIPSWITCH:
                        if (field->application != HID_GD_SYSTEM_MULTIAXIS)
                                input_set_capability(hi->input,
                                                     EV_KEY, BTN_TOUCH);
                        MT_STORE_FIELD(tip_state);
                        return 1;
                case HID_DG_CONTACTID:
                        MT_STORE_FIELD(contactid);
                        app->touches_by_report++;
                        return 1;
                case HID_DG_WIDTH:
                        if (!(app->quirks & MT_QUIRK_NO_AREA))
                                set_abs(hi->input, ABS_MT_TOUCH_MAJOR, field,
                                        cls->sn_width);
                        MT_STORE_FIELD(w);
                        return 1;
                case HID_DG_HEIGHT:
                        if (!(app->quirks & MT_QUIRK_NO_AREA)) {
                                set_abs(hi->input, ABS_MT_TOUCH_MINOR, field,
                                        cls->sn_height);

                                /*
                                 * Only set ABS_MT_ORIENTATION if it is not
                                 * already set by the HID_DG_AZIMUTH usage.
                                 */
                                if (!test_bit(ABS_MT_ORIENTATION,
                                                hi->input->absbit))
                                        input_set_abs_params(hi->input,
                                                ABS_MT_ORIENTATION, 0, 1, 0, 0);
                        }
                        MT_STORE_FIELD(h);
                        return 1;
                case HID_DG_TIPPRESSURE:
                        set_abs(hi->input, ABS_MT_PRESSURE, field,
                                cls->sn_pressure);
                        MT_STORE_FIELD(p);
                        return 1;
                case HID_DG_SCANTIME:
                        input_set_capability(hi->input, EV_MSC, MSC_TIMESTAMP);
                        app->scantime = &field->value[usage->usage_index];
                        app->scantime_logical_max = field->logical_maximum;
                        return 1;
                case HID_DG_CONTACTCOUNT:
                        app->have_contact_count = true;
                        app->raw_cc = &field->value[usage->usage_index];
                        return 1;
                case HID_DG_AZIMUTH:
                        /*
                         * Azimuth has the range of [0, MAX) representing a full
                         * revolution. Set ABS_MT_ORIENTATION to a quarter of
                         * MAX according the definition of ABS_MT_ORIENTATION
                         */
                        input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
                                -field->logical_maximum / 4,
                                field->logical_maximum / 4,
                                cls->sn_move ?
                                field->logical_maximum / cls->sn_move : 0, 0);
                        MT_STORE_FIELD(a);
                        return 1;
                case HID_DG_CONTACTMAX:
                        /* contact max are global to the report */
                        return -1;
                case HID_DG_TOUCH:
                        /* Legacy devices use TIPSWITCH and not TOUCH.
                         * Let's just ignore this field. */
                        return -1;
                }
                /* let hid-input decide for the others */
                return 0;

        case HID_UP_BUTTON:
                code = BTN_MOUSE + ((usage->hid - 1) & HID_USAGE);
                /*
                 * MS PTP spec says that external buttons left and right have
                 * usages 2 and 3.
                 */
                if ((app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS) &&
                    field->application == HID_DG_TOUCHPAD &&
                    (usage->hid & HID_USAGE) > 1)
                        code--;

                if (field->application == HID_GD_SYSTEM_MULTIAXIS)
                        code = BTN_0  + ((usage->hid - 1) & HID_USAGE);

                hid_map_usage(hi, usage, bit, max, EV_KEY, code);
                if (!*bit)
                        return -1;
                input_set_capability(hi->input, EV_KEY, code);
                return 1;

        case 0xff000000:
                /* we do not want to map these: no input-oriented meaning */
                return -1;
        }

        return 0;
}

static int mt_compute_slot(struct mt_device *td, struct mt_application *app,
                           struct mt_usages *slot,
                           struct input_dev *input)
{
        __s32 quirks = app->quirks;

        if (quirks & MT_QUIRK_SLOT_IS_CONTACTID)
                return *slot->contactid;

        if (quirks & MT_QUIRK_CYPRESS)
                return cypress_compute_slot(app, slot);

        if (quirks & MT_QUIRK_SLOT_IS_CONTACTNUMBER)
                return app->num_received;

        if (quirks & MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE)
                return *slot->contactid - 1;

        return input_mt_get_slot_by_key(input, *slot->contactid);
}

static void mt_release_pending_palms(struct mt_device *td,
                                     struct mt_application *app,
                                     struct input_dev *input)
{
        int slotnum;
        bool need_sync = false;

        for_each_set_bit(slotnum, app->pending_palm_slots, td->maxcontacts) {
                clear_bit(slotnum, app->pending_palm_slots);

                input_mt_slot(input, slotnum);
                input_mt_report_slot_inactive(input);

                need_sync = true;
        }

        if (need_sync) {
                input_mt_sync_frame(input);
                input_sync(input);
        }
}

/*
 * this function is called when a whole packet has been received and processed,
 * so that it can decide what to send to the input layer.
 */
static void mt_sync_frame(struct mt_device *td, struct mt_application *app,
                          struct input_dev *input)
{
        if (app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS)
                input_event(input, EV_KEY, BTN_LEFT, app->left_button_state);

        input_mt_sync_frame(input);
        input_event(input, EV_MSC, MSC_TIMESTAMP, app->timestamp);
        input_sync(input);

        mt_release_pending_palms(td, app, input);

        app->num_received = 0;
        app->left_button_state = 0;

        if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags))
                set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags);
        else
                clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags);
        clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags);
}

static int mt_compute_timestamp(struct mt_application *app, __s32 value)
{
        long delta = value - app->prev_scantime;
        unsigned long jdelta = jiffies_to_usecs(jiffies - app->jiffies);

        app->jiffies = jiffies;

        if (delta < 0)
                delta += app->scantime_logical_max;

        /* HID_DG_SCANTIME is expressed in 100us, we want it in us. */
        delta *= 100;

        if (jdelta > MAX_TIMESTAMP_INTERVAL)
                /* No data received for a while, resync the timestamp. */
                return 0;
        else
                return app->timestamp + delta;
}

static int mt_touch_event(struct hid_device *hid, struct hid_field *field,
                                struct hid_usage *usage, __s32 value)
{
        /* we will handle the hidinput part later, now remains hiddev */
        if (hid->claimed & HID_CLAIMED_HIDDEV && hid->hiddev_hid_event)
                hid->hiddev_hid_event(hid, field, usage, value);

        return 1;
}

static int mt_process_slot(struct mt_device *td, struct input_dev *input,
                            struct mt_application *app,
                            struct mt_usages *slot)
{
        struct input_mt *mt = input->mt;
        struct hid_device *hdev = td->hdev;
        __s32 quirks = app->quirks;
        bool valid = true;
        bool confidence_state = true;
        bool inrange_state = false;
        int active;
        int slotnum;
        int tool = MT_TOOL_FINGER;

        if (!slot)
                return -EINVAL;

        if ((quirks & MT_QUIRK_CONTACT_CNT_ACCURATE) &&
            app->num_received >= app->num_expected)
                return -EAGAIN;

        if (!(quirks & MT_QUIRK_ALWAYS_VALID)) {
                if (quirks & MT_QUIRK_VALID_IS_INRANGE)
                        valid = *slot->inrange_state;
                if (quirks & MT_QUIRK_NOT_SEEN_MEANS_UP)
                        valid = *slot->tip_state;
                if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
                        valid = *slot->confidence_state;

                if (!valid)
                        return 0;
        }

        slotnum = mt_compute_slot(td, app, slot, input);
        if (slotnum < 0 || slotnum >= td->maxcontacts)
                return 0;

        if ((quirks & MT_QUIRK_IGNORE_DUPLICATES) && mt) {
                struct input_mt_slot *i_slot = &mt->slots[slotnum];

                if (input_mt_is_active(i_slot) &&
                    input_mt_is_used(mt, i_slot))
                        return -EAGAIN;
        }

        if (quirks & MT_QUIRK_CONFIDENCE)
                confidence_state = *slot->confidence_state;

        if (quirks & MT_QUIRK_HOVERING)
                inrange_state = *slot->inrange_state;

        active = *slot->tip_state || inrange_state;

        if (app->application == HID_GD_SYSTEM_MULTIAXIS)
                tool = MT_TOOL_DIAL;
        else if (unlikely(!confidence_state)) {
                tool = MT_TOOL_PALM;
                if (!active && mt &&
                    input_mt_is_active(&mt->slots[slotnum])) {
                        /*
                         * The non-confidence was reported for
                         * previously valid contact that is also no
                         * longer valid. We can't simply report
                         * lift-off as userspace will not be aware
                         * of non-confidence, so we need to split
                         * it into 2 events: active MT_TOOL_PALM
                         * and a separate liftoff.
                         */
                        active = true;
                        set_bit(slotnum, app->pending_palm_slots);
                }
        }

        input_mt_slot(input, slotnum);
        input_mt_report_slot_state(input, tool, active);
        if (active) {
                /* this finger is in proximity of the sensor */
                int wide = (*slot->w > *slot->h);
                int major = max(*slot->w, *slot->h);
                int minor = min(*slot->w, *slot->h);
                int orientation = wide;
                int max_azimuth;
                int azimuth;
                int x;
                int y;
                int cx;
                int cy;

                if (slot->a != DEFAULT_ZERO) {
                        /*
                         * Azimuth is counter-clockwise and ranges from [0, MAX)
                         * (a full revolution). Convert it to clockwise ranging
                         * [-MAX/2, MAX/2].
                         *
                         * Note that ABS_MT_ORIENTATION require us to report
                         * the limit of [-MAX/4, MAX/4], but the value can go
                         * out of range to [-MAX/2, MAX/2] to report an upside
                         * down ellipsis.
                         */
                        azimuth = *slot->a;
                        max_azimuth = input_abs_get_max(input,
                                                        ABS_MT_ORIENTATION);
                        if (azimuth > max_azimuth * 2)
                                azimuth -= max_azimuth * 4;
                        orientation = -azimuth;
                        if (quirks & MT_QUIRK_ORIENTATION_INVERT)
                                orientation = -orientation;

                }

                if (quirks & MT_QUIRK_TOUCH_SIZE_SCALING) {
                        /*
                         * divided by two to match visual scale of touch
                         * for devices with this quirk
                         */
                        major = major >> 1;
                        minor = minor >> 1;
                }

                x = hdev->quirks & HID_QUIRK_X_INVERT ?
                        input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->x :
                        *slot->x;
                y = hdev->quirks & HID_QUIRK_Y_INVERT ?
                        input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->y :
                        *slot->y;
                cx = hdev->quirks & HID_QUIRK_X_INVERT ?
                        input_abs_get_max(input, ABS_MT_POSITION_X) - *slot->cx :
                        *slot->cx;
                cy = hdev->quirks & HID_QUIRK_Y_INVERT ?
                        input_abs_get_max(input, ABS_MT_POSITION_Y) - *slot->cy :
                        *slot->cy;

                input_event(input, EV_ABS, ABS_MT_POSITION_X, x);
                input_event(input, EV_ABS, ABS_MT_POSITION_Y, y);
                input_event(input, EV_ABS, ABS_MT_TOOL_X, cx);
                input_event(input, EV_ABS, ABS_MT_TOOL_Y, cy);
                input_event(input, EV_ABS, ABS_MT_DISTANCE, !*slot->tip_state);
                input_event(input, EV_ABS, ABS_MT_ORIENTATION, orientation);
                input_event(input, EV_ABS, ABS_MT_PRESSURE, *slot->p);
                input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
                input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);

                set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags);
        }

        return 0;
}

static void mt_process_mt_event(struct hid_device *hid,
                                struct mt_application *app,
                                struct hid_field *field,
                                struct hid_usage *usage,
                                __s32 value,
                                bool first_packet)
{
        __s32 quirks = app->quirks;
        struct input_dev *input = field->hidinput->input;

        if (!usage->type || !(hid->claimed & HID_CLAIMED_INPUT))
                return;

        if (quirks & MT_QUIRK_WIN8_PTP_BUTTONS) {

                /*
                 * For Win8 PTP touchpads we should only look at
                 * non finger/touch events in the first_packet of a
                 * (possible) multi-packet frame.
                 */
                if (!first_packet)
                        return;

                /*
                 * For Win8 PTP touchpads we map both the clickpad click
                 * and any "external" left buttons to BTN_LEFT if a
                 * device claims to have both we need to report 1 for
                 * BTN_LEFT if either is pressed, so we or all values
                 * together and report the result in mt_sync_frame().
                 */
                if (usage->type == EV_KEY && usage->code == BTN_LEFT) {
                        app->left_button_state |= value;
                        return;
                }
        }

        input_event(input, usage->type, usage->code, value);
}

static void mt_touch_report(struct hid_device *hid,
                            struct mt_report_data *rdata)
{
        struct mt_device *td = hid_get_drvdata(hid);
        struct hid_report *report = rdata->report;
        struct mt_application *app = rdata->application;
        struct hid_field *field;
        struct input_dev *input;
        struct mt_usages *slot;
        bool first_packet;
        unsigned count;
        int r, n;
        int scantime = 0;
        int contact_count = -1;

        /* sticky fingers release in progress, abort */
        if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
                return;

        scantime = *app->scantime;
        app->timestamp = mt_compute_timestamp(app, scantime);
        if (app->raw_cc != DEFAULT_ZERO)
                contact_count = *app->raw_cc;

        /*
         * Includes multi-packet support where subsequent
         * packets are sent with zero contactcount.
         */
        if (contact_count >= 0) {
                /*
                 * For Win8 PTPs the first packet (td->num_received == 0) may
                 * have a contactcount of 0 if there only is a button event.
                 * We double check that this is not a continuation packet
                 * of a possible multi-packet frame be checking that the
                 * timestamp has changed.
                 */
                if ((app->quirks & MT_QUIRK_WIN8_PTP_BUTTONS) &&
                    app->num_received == 0 &&
                    app->prev_scantime != scantime)
                        app->num_expected = contact_count;
                /* A non 0 contact count always indicates a first packet */
                else if (contact_count)
                        app->num_expected = contact_count;
        }
        app->prev_scantime = scantime;

        first_packet = app->num_received == 0;

        input = report->field[0]->hidinput->input;

        list_for_each_entry(slot, &app->mt_usages, list) {
                if (!mt_process_slot(td, input, app, slot))
                        app->num_received++;
        }

        for (r = 0; r < report->maxfield; r++) {
                field = report->field[r];
                count = field->report_count;

                if (!(HID_MAIN_ITEM_VARIABLE & field->flags))
                        continue;

                for (n = 0; n < count; n++)
                        mt_process_mt_event(hid, app, field,
                                            &field->usage[n], field->value[n],
                                            first_packet);
        }

        if (app->num_received >= app->num_expected)
                mt_sync_frame(td, app, input);

        /*
         * Windows 8 specs says 2 things:
         * - once a contact has been reported, it has to be reported in each
         *   subsequent report
         * - the report rate when fingers are present has to be at least
         *   the refresh rate of the screen, 60 or 120 Hz
         *
         * I interprete this that the specification forces a report rate of
         * at least 60 Hz for a touchscreen to be certified.
         * Which means that if we do not get a report whithin 16 ms, either
         * something wrong happens, either the touchscreen forgets to send
         * a release. Taking a reasonable margin allows to remove issues
         * with USB communication or the load of the machine.
         *
         * Given that Win 8 devices are forced to send a release, this will
         * only affect laggish machines and the ones that have a firmware
         * defect.
         */
        if (app->quirks & MT_QUIRK_STICKY_FINGERS) {
                if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags))
                        mod_timer(&td->release_timer,
                                  jiffies + msecs_to_jiffies(100));
                else
                        del_timer(&td->release_timer);
        }

        clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
}

static int mt_touch_input_configured(struct hid_device *hdev,
                                     struct hid_input *hi,
                                     struct mt_application *app)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_class *cls = &td->mtclass;
        struct input_dev *input = hi->input;
        int ret;

        if (!td->maxcontacts)
                td->maxcontacts = MT_DEFAULT_MAXCONTACT;

        mt_post_parse(td, app);
        if (td->serial_maybe)
                mt_post_parse_default_settings(td, app);

        if (cls->is_indirect)
                app->mt_flags |= INPUT_MT_POINTER;

        if (app->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP)
                app->mt_flags |= INPUT_MT_DROP_UNUSED;

        /* check for clickpads */
        if ((app->mt_flags & INPUT_MT_POINTER) &&
            (app->buttons_count == 1))
                td->is_buttonpad = true;

        if (td->is_buttonpad)
                __set_bit(INPUT_PROP_BUTTONPAD, input->propbit);

        app->pending_palm_slots = devm_kcalloc(&hi->input->dev,
                                               BITS_TO_LONGS(td->maxcontacts),
                                               sizeof(long),
                                               GFP_KERNEL);
        if (!app->pending_palm_slots)
                return -ENOMEM;

        ret = input_mt_init_slots(input, td->maxcontacts, app->mt_flags);
        if (ret)
                return ret;

        app->mt_flags = 0;
        return 0;
}

#define mt_map_key_clear(c)        hid_map_usage_clear(hi, usage, bit, \
                                                    max, EV_KEY, (c))
static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_application *application;
        struct mt_report_data *rdata;

        rdata = mt_find_report_data(td, field->report);
        if (!rdata) {
                hid_err(hdev, "failed to allocate data for report\n");
                return 0;
        }

        application = rdata->application;

        /*
         * If mtclass.export_all_inputs is not set, only map fields from
         * TouchScreen or TouchPad collections. We need to ignore fields
         * that belong to other collections such as Mouse that might have
         * the same GenericDesktop usages.
         */
        if (!td->mtclass.export_all_inputs &&
            field->application != HID_DG_TOUCHSCREEN &&
            field->application != HID_DG_PEN &&
            field->application != HID_DG_TOUCHPAD &&
            field->application != HID_GD_KEYBOARD &&
            field->application != HID_GD_SYSTEM_CONTROL &&
            field->application != HID_CP_CONSUMER_CONTROL &&
            field->application != HID_GD_WIRELESS_RADIO_CTLS &&
            field->application != HID_GD_SYSTEM_MULTIAXIS &&
            !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS &&
              application->quirks & MT_QUIRK_ASUS_CUSTOM_UP))
                return -1;

        /*
         * Some Asus keyboard+touchpad devices have the hotkeys defined in the
         * touchpad report descriptor. We need to treat these as an array to
         * map usages to input keys.
         */
        if (field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS &&
            application->quirks & MT_QUIRK_ASUS_CUSTOM_UP &&
            (usage->hid & HID_USAGE_PAGE) == HID_UP_CUSTOM) {
                set_bit(EV_REP, hi->input->evbit);
                if (field->flags & HID_MAIN_ITEM_VARIABLE)
                        field->flags &= ~HID_MAIN_ITEM_VARIABLE;
                switch (usage->hid & HID_USAGE) {
                case 0x10: mt_map_key_clear(KEY_BRIGHTNESSDOWN);        break;
                case 0x20: mt_map_key_clear(KEY_BRIGHTNESSUP);                break;
                case 0x35: mt_map_key_clear(KEY_DISPLAY_OFF);                break;
                case 0x6b: mt_map_key_clear(KEY_F21);                        break;
                case 0x6c: mt_map_key_clear(KEY_SLEEP);                        break;
                default:
                        return -1;
                }
                return 1;
        }

        if (rdata->is_mt_collection)
                return mt_touch_input_mapping(hdev, hi, field, usage, bit, max,
                                              application);

        /*
         * some egalax touchscreens have "application == DG_TOUCHSCREEN"
         * for the stylus. Overwrite the hid_input application
         */
        if (field->physical == HID_DG_STYLUS)
                hi->application = HID_DG_STYLUS;

        /* let hid-core decide for the others */
        return 0;
}

static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_report_data *rdata;

        rdata = mt_find_report_data(td, field->report);
        if (rdata && rdata->is_mt_collection) {
                /* We own these mappings, tell hid-input to ignore them */
                return -1;
        }

        /* let hid-core decide for the others */
        return 0;
}

static int mt_event(struct hid_device *hid, struct hid_field *field,
                                struct hid_usage *usage, __s32 value)
{
        struct mt_device *td = hid_get_drvdata(hid);
        struct mt_report_data *rdata;

        rdata = mt_find_report_data(td, field->report);
        if (rdata && rdata->is_mt_collection)
                return mt_touch_event(hid, field, usage, value);

        return 0;
}

static void mt_report(struct hid_device *hid, struct hid_report *report)
{
        struct mt_device *td = hid_get_drvdata(hid);
        struct hid_field *field = report->field[0];
        struct mt_report_data *rdata;

        if (!(hid->claimed & HID_CLAIMED_INPUT))
                return;

        rdata = mt_find_report_data(td, report);
        if (rdata && rdata->is_mt_collection)
                return mt_touch_report(hid, rdata);

        if (field && field->hidinput && field->hidinput->input)
                input_sync(field->hidinput->input);
}

static bool mt_need_to_apply_feature(struct hid_device *hdev,
                                     struct hid_field *field,
                                     struct hid_usage *usage,
                                     enum latency_mode latency,
                                     bool surface_switch,
                                     bool button_switch,
                                     bool *inputmode_found)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        struct mt_class *cls = &td->mtclass;
        struct hid_report *report = field->report;
        unsigned int index = usage->usage_index;
        char *buf;
        u32 report_len;
        int max;

        switch (usage->hid) {
        case HID_DG_INPUTMODE:
                /*
                 * Some elan panels wrongly declare 2 input mode features,
                 * and silently ignore when we set the value in the second
                 * field. Skip the second feature and hope for the best.
                 */
                if (*inputmode_found)
                        return false;

                if (cls->quirks & MT_QUIRK_FORCE_GET_FEATURE) {
                        report_len = hid_report_len(report);
                        buf = hid_alloc_report_buf(report, GFP_KERNEL);
                        if (!buf) {
                                hid_err(hdev,
                                        "failed to allocate buffer for report\n");
                                return false;
                        }
                        hid_hw_raw_request(hdev, report->id, buf, report_len,
                                           HID_FEATURE_REPORT,
                                           HID_REQ_GET_REPORT);
                        kfree(buf);
                }

                field->value[index] = td->inputmode_value;
                *inputmode_found = true;
                return true;

        case HID_DG_CONTACTMAX:
                if (cls->maxcontacts) {
                        max = min_t(int, field->logical_maximum,
                                    cls->maxcontacts);
                        if (field->value[index] != max) {
                                field->value[index] = max;
                                return true;
                        }
                }
                break;

        case HID_DG_LATENCYMODE:
                field->value[index] = latency;
                return true;

        case HID_DG_SURFACESWITCH:
                field->value[index] = surface_switch;
                return true;

        case HID_DG_BUTTONSWITCH:
                field->value[index] = button_switch;
                return true;
        }

        return false; /* no need to update the report */
}

static void mt_set_modes(struct hid_device *hdev, enum latency_mode latency,
                         bool surface_switch, bool button_switch)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        struct hid_usage *usage;
        int i, j;
        bool update_report;
        bool inputmode_found = false;

        rep_enum = &hdev->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                update_report = false;

                for (i = 0; i < rep->maxfield; i++) {
                        /* Ignore if report count is out of bounds. */
                        if (rep->field[i]->report_count < 1)
                                continue;

                        for (j = 0; j < rep->field[i]->maxusage; j++) {
                                usage = &rep->field[i]->usage[j];

                                if (mt_need_to_apply_feature(hdev,
                                                             rep->field[i],
                                                             usage,
                                                             latency,
                                                             surface_switch,
                                                             button_switch,
                                                             &inputmode_found))
                                        update_report = true;
                        }
                }

                if (update_report)
                        hid_hw_request(hdev, rep, HID_REQ_SET_REPORT);
        }
}

static void mt_post_parse_default_settings(struct mt_device *td,
                                           struct mt_application *app)
{
        __s32 quirks = app->quirks;

        /* unknown serial device needs special quirks */
        if (list_is_singular(&app->mt_usages)) {
                quirks |= MT_QUIRK_ALWAYS_VALID;
                quirks &= ~MT_QUIRK_NOT_SEEN_MEANS_UP;
                quirks &= ~MT_QUIRK_VALID_IS_INRANGE;
                quirks &= ~MT_QUIRK_VALID_IS_CONFIDENCE;
                quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
        }

        app->quirks = quirks;
}

static void mt_post_parse(struct mt_device *td, struct mt_application *app)
{
        if (!app->have_contact_count)
                app->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
}

static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
{
        struct mt_device *td = hid_get_drvdata(hdev);
        const char *suffix = NULL;
        struct mt_report_data *rdata;
        struct mt_application *mt_application = NULL;
        struct hid_report *report;
        int ret;

        list_for_each_entry(report, &hi->reports, hidinput_list) {
                rdata = mt_find_report_data(td, report);
                if (!rdata) {
                        hid_err(hdev, "failed to allocate data for report\n");
                        return -ENOMEM;
                }

                mt_application = rdata->application;

                if (rdata->is_mt_collection) {
                        ret = mt_touch_input_configured(hdev, hi,
                                                        mt_application);
                        if (ret)
                                return ret;
                }
        }

        switch (hi->application) {
        case HID_GD_KEYBOARD:
        case HID_GD_KEYPAD:
        case HID_GD_MOUSE:
        case HID_DG_TOUCHPAD:
        case HID_GD_SYSTEM_CONTROL:
        case HID_CP_CONSUMER_CONTROL:
        case HID_GD_WIRELESS_RADIO_CTLS:
        case HID_GD_SYSTEM_MULTIAXIS:
                /* already handled by hid core */
                break;
        case HID_DG_TOUCHSCREEN:
                /* we do not set suffix = "Touchscreen" */
                hi->input->name = hdev->name;
                break;
        case HID_VD_ASUS_CUSTOM_MEDIA_KEYS:
                suffix = "Custom Media Keys";
                break;
        case HID_DG_STYLUS:
                /* force BTN_STYLUS to allow tablet matching in udev */
                __set_bit(BTN_STYLUS, hi->input->keybit);
                break;
        default:
                suffix = "UNKNOWN";
                break;
        }

        if (suffix)
                hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
                                                 "%s %s", hdev->name, suffix);

        return 0;
}

static void mt_fix_const_field(struct hid_field *field, unsigned int usage)
{
        if (field->usage[0].hid != usage ||
            !(field->flags & HID_MAIN_ITEM_CONSTANT))
                return;

        field->flags &= ~HID_MAIN_ITEM_CONSTANT;
        field->flags |= HID_MAIN_ITEM_VARIABLE;
}

static void mt_fix_const_fields(struct hid_device *hdev, unsigned int usage)
{
        struct hid_report *report;
        int i;

        list_for_each_entry(report,
                            &hdev->report_enum[HID_INPUT_REPORT].report_list,
                            list) {

                if (!report->maxfield)
                        continue;

                for (i = 0; i < report->maxfield; i++)
                        if (report->field[i]->maxusage >= 1)
                                mt_fix_const_field(report->field[i], usage);
        }
}

static void mt_release_contacts(struct hid_device *hid)
{
        struct hid_input *hidinput;
        struct mt_application *application;
        struct mt_device *td = hid_get_drvdata(hid);

        list_for_each_entry(hidinput, &hid->inputs, list) {
                struct input_dev *input_dev = hidinput->input;
                struct input_mt *mt = input_dev->mt;
                int i;

                if (mt) {
                        for (i = 0; i < mt->num_slots; i++) {
                                input_mt_slot(input_dev, i);
                                input_mt_report_slot_inactive(input_dev);
                        }
                        input_mt_sync_frame(input_dev);
                        input_sync(input_dev);
                }
        }

        list_for_each_entry(application, &td->applications, list) {
                application->num_received = 0;
        }
}

static void mt_expired_timeout(struct timer_list *t)
{
        struct mt_device *td = from_timer(td, t, release_timer);
        struct hid_device *hdev = td->hdev;

        /*
         * An input report came in just before we release the sticky fingers,
         * it will take care of the sticky fingers.
         */
        if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
                return;
        if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags))
                mt_release_contacts(hdev);
        clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
}

static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
{
        int ret, i;
        struct mt_device *td;
        const struct mt_class *mtclass = mt_classes; /* MT_CLS_DEFAULT */

        for (i = 0; mt_classes[i].name ; i++) {
                if (id->driver_data == mt_classes[i].name) {
                        mtclass = &(mt_classes[i]);
                        break;
                }
        }

        td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL);
        if (!td) {
                dev_err(&hdev->dev, "cannot allocate multitouch data\n");
                return -ENOMEM;
        }
        td->hdev = hdev;
        td->mtclass = *mtclass;
        td->inputmode_value = MT_INPUTMODE_TOUCHSCREEN;
        hid_set_drvdata(hdev, td);

        INIT_LIST_HEAD(&td->applications);
        INIT_LIST_HEAD(&td->reports);

        if (id->vendor == HID_ANY_ID && id->product == HID_ANY_ID)
                td->serial_maybe = true;


        /* Orientation is inverted if the X or Y axes are
         * flipped, but normalized if both are inverted.
         */
        if (hdev->quirks & (HID_QUIRK_X_INVERT | HID_QUIRK_Y_INVERT) &&
            !((hdev->quirks & HID_QUIRK_X_INVERT)
              && (hdev->quirks & HID_QUIRK_Y_INVERT)))
                td->mtclass.quirks = MT_QUIRK_ORIENTATION_INVERT;

        /* This allows the driver to correctly support devices
         * that emit events over several HID messages.
         */
        hdev->quirks |= HID_QUIRK_NO_INPUT_SYNC;

        /*
         * This allows the driver to handle different input sensors
         * that emits events through different applications on the same HID
         * device.
         */
        hdev->quirks |= HID_QUIRK_INPUT_PER_APP;

        if (id->group != HID_GROUP_MULTITOUCH_WIN_8)
                hdev->quirks |= HID_QUIRK_MULTI_INPUT;

        if (mtclass->quirks & MT_QUIRK_FORCE_MULTI_INPUT) {
                hdev->quirks &= ~HID_QUIRK_INPUT_PER_APP;
                hdev->quirks |= HID_QUIRK_MULTI_INPUT;
        }

        timer_setup(&td->release_timer, mt_expired_timeout, 0);

        ret = hid_parse(hdev);
        if (ret != 0)
                return ret;

        if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID)
                mt_fix_const_fields(hdev, HID_DG_CONTACTID);

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret)
                return ret;

        ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group);
        if (ret)
                dev_warn(&hdev->dev, "Cannot allocate sysfs group for %s\n",
                                hdev->name);

        mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);

        return 0;
}

static int mt_suspend(struct hid_device *hdev, pm_message_t state)
{
        struct mt_device *td = hid_get_drvdata(hdev);

        /* High latency is desirable for power savings during S3/S0ix */
        if ((td->mtclass.quirks & MT_QUIRK_DISABLE_WAKEUP) ||
            !hid_hw_may_wakeup(hdev))
                mt_set_modes(hdev, HID_LATENCY_HIGH, false, false);
        else
                mt_set_modes(hdev, HID_LATENCY_HIGH, true, true);

        return 0;
}

static int mt_reset_resume(struct hid_device *hdev)
{
        mt_release_contacts(hdev);
        mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);
        return 0;
}

static int mt_resume(struct hid_device *hdev)
{
        /* Some Elan legacy devices require SET_IDLE to be set on resume.
         * It should be safe to send it to other devices too.
         * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */

        hid_hw_idle(hdev, 0, 0, HID_REQ_SET_IDLE);

        mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);

        return 0;
}

static void mt_remove(struct hid_device *hdev)
{
        struct mt_device *td = hid_get_drvdata(hdev);

        del_timer_sync(&td->release_timer);

        sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group);
        hid_hw_stop(hdev);
}

/*
 * This list contains only:
 * - VID/PID of products not working with the default multitouch handling
 * - 2 generic rules.
 * So there is no point in adding here any device with MT_CLS_DEFAULT.
 */
static const struct hid_device_id mt_devices[] = {

        /* 3M panels */
        { .driver_data = MT_CLS_3M,
                MT_USB_DEVICE(USB_VENDOR_ID_3M,
                        USB_DEVICE_ID_3M1968) },
        { .driver_data = MT_CLS_3M,
                MT_USB_DEVICE(USB_VENDOR_ID_3M,
                        USB_DEVICE_ID_3M2256) },
        { .driver_data = MT_CLS_3M,
                MT_USB_DEVICE(USB_VENDOR_ID_3M,
                        USB_DEVICE_ID_3M3266) },

        /* Anton devices */
        { .driver_data = MT_CLS_EXPORT_ALL_INPUTS,
                MT_USB_DEVICE(USB_VENDOR_ID_ANTON,
                        USB_DEVICE_ID_ANTON_TOUCH_PAD) },

        /* Asus T101HA */
        { .driver_data = MT_CLS_WIN_8_DISABLE_WAKEUP,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
                           USB_VENDOR_ID_ASUSTEK,
                           USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD) },

        /* Asus T304UA */
        { .driver_data = MT_CLS_ASUS,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_ASUSTEK,
                        USB_DEVICE_ID_ASUSTEK_T304_KEYBOARD) },

        /* Atmel panels */
        { .driver_data = MT_CLS_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_ATMEL,
                        USB_DEVICE_ID_ATMEL_MXT_DIGITIZER) },

        /* Baanto multitouch devices */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_BAANTO,
                        USB_DEVICE_ID_BAANTO_MT_190W2) },

        /* Cando panels */
        { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTNUMBER,
                MT_USB_DEVICE(USB_VENDOR_ID_CANDO,
                        USB_DEVICE_ID_CANDO_MULTI_TOUCH) },
        { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTNUMBER,
                MT_USB_DEVICE(USB_VENDOR_ID_CANDO,
                        USB_DEVICE_ID_CANDO_MULTI_TOUCH_15_6) },

        /* Chunghwa Telecom touch panels */
        {  .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_CHUNGHWAT,
                        USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH) },

        /* CJTouch panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH,
                        USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_CJTOUCH,
                        USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040) },

        /* CVTouch panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_CVTOUCH,
                        USB_DEVICE_ID_CVTOUCH_SCREEN) },

        /* eGalax devices (SAW) */
        { .driver_data = MT_CLS_EXPORT_ALL_INPUTS,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER) },

        /* eGalax devices (resistive) */
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_480D) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_480E) },

        /* eGalax devices (capacitive) */
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7207) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_720C) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7224) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_722A) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_725E) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7262) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_726B) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72A1) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72AA) },
        { .driver_data = MT_CLS_EGALAX,
                HID_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72C4) },
        { .driver_data = MT_CLS_EGALAX,
                HID_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72D0) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72FA) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7302) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7349) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_73F7) },
        { .driver_data = MT_CLS_EGALAX_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001) },
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
                        USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_C002) },

        /* Elan devices */
        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_ELAN, 0x313a) },

        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_ELAN, 0x3148) },

        /* Elitegroup panel */
        { .driver_data = MT_CLS_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP,
                        USB_DEVICE_ID_ELITEGROUP_05D8) },

        /* Flatfrog Panels */
        { .driver_data = MT_CLS_FLATFROG,
                MT_USB_DEVICE(USB_VENDOR_ID_FLATFROG,
                        USB_DEVICE_ID_MULTITOUCH_3200) },

        /* FocalTech Panels */
        { .driver_data = MT_CLS_SERIAL,
                MT_USB_DEVICE(USB_VENDOR_ID_CYGNAL,
                        USB_DEVICE_ID_FOCALTECH_FTXXXX_MULTITOUCH) },

        /* GeneralTouch panel */
        { .driver_data = MT_CLS_GENERALTOUCH_TWOFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN7_TWOFINGERS) },
        { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PWT_TENFINGERS) },
        { .driver_data = MT_CLS_GENERALTOUCH_TWOFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0101) },
        { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0102) },
        { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_0106) },
        { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A) },
        { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS,
                MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH,
                        USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100) },

        /* Gametel game controller */
        { .driver_data = MT_CLS_NSMU,
                MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL,
                        USB_DEVICE_ID_GAMETEL_MT_MODE) },

        /* GoodTouch panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH,
                        USB_DEVICE_ID_GOODTOUCH_000f) },

        /* Hanvon panels */
        { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTID,
                MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT,
                        USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) },

        /* HONOR GLO-GXXX panel */
        { .driver_data = MT_CLS_VTL,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        0x347d, 0x7853) },

        /* Ilitek dual touch panel */
        {  .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_ILITEK,
                        USB_DEVICE_ID_ILITEK_MULTITOUCH) },

        /* LG Melfas panel */
        { .driver_data = MT_CLS_LG,
                HID_USB_DEVICE(USB_VENDOR_ID_LG,
                        USB_DEVICE_ID_LG_MELFAS_MT) },
        { .driver_data = MT_CLS_LG,
                HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC,
                        USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) },

        /* Lenovo X1 TAB Gen 2 */
        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
                           USB_VENDOR_ID_LENOVO,
                           USB_DEVICE_ID_LENOVO_X1_TAB) },

        /* Lenovo X1 TAB Gen 3 */
        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
                           USB_VENDOR_ID_LENOVO,
                           USB_DEVICE_ID_LENOVO_X1_TAB3) },

        /* Lenovo X12 TAB Gen 1 */
        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
                           USB_VENDOR_ID_LENOVO,
                           USB_DEVICE_ID_LENOVO_X12_TAB) },

        /* MosArt panels */
        { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
                MT_USB_DEVICE(USB_VENDOR_ID_ASUS,
                        USB_DEVICE_ID_ASUS_T91MT)},
        { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
                MT_USB_DEVICE(USB_VENDOR_ID_ASUS,
                        USB_DEVICE_ID_ASUSTEK_MULTITOUCH_YFO) },
        { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
                MT_USB_DEVICE(USB_VENDOR_ID_TURBOX,
                        USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART) },

        /* Novatek Panel */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK,
                        USB_DEVICE_ID_NOVATEK_PCT) },

        /* Ntrig Panel */
        { .driver_data = MT_CLS_NSMU,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_NTRIG, 0x1b05) },

        /* Panasonic panels */
        { .driver_data = MT_CLS_PANASONIC,
                MT_USB_DEVICE(USB_VENDOR_ID_PANASONIC,
                        USB_DEVICE_ID_PANABOARD_UBT780) },
        { .driver_data = MT_CLS_PANASONIC,
                MT_USB_DEVICE(USB_VENDOR_ID_PANASONIC,
                        USB_DEVICE_ID_PANABOARD_UBT880) },

        /* PixArt optical touch screen */
        { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
                MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
                        USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN) },
        { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
                MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
                        USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1) },
        { .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
                MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
                        USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2) },

        /* PixCir-based panels */
        { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTID,
                MT_USB_DEVICE(USB_VENDOR_ID_CANDO,
                        USB_DEVICE_ID_CANDO_PIXCIR_MULTI_TOUCH) },

        /* Quanta-based panels */
        { .driver_data = MT_CLS_CONFIDENCE_CONTACT_ID,
                MT_USB_DEVICE(USB_VENDOR_ID_QUANTA,
                        USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001) },

        /* Razer touchpads */
        { .driver_data = MT_CLS_RAZER_BLADE_STEALTH,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0x8323) },

        /* Smart Tech panels */
        { .driver_data = MT_CLS_SMART_TECH,
                MT_USB_DEVICE(0x0b8c, 0x0092)},

        /* Stantum panels */
        { .driver_data = MT_CLS_CONFIDENCE,
                MT_USB_DEVICE(USB_VENDOR_ID_STANTUM_STM,
                        USB_DEVICE_ID_MTP_STM)},

        /* Synaptics devices */
        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0xcd7e) },

        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0xcddc) },

        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0xce08) },

        { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT,
                HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
                        USB_VENDOR_ID_SYNAPTICS, 0xce09) },

        /* TopSeed panels */
        { .driver_data = MT_CLS_TOPSEED,
                MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2,
                        USB_DEVICE_ID_TOPSEED2_PERIPAD_701) },

        /* Touch International panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_TOUCH_INTL,
                        USB_DEVICE_ID_TOUCH_INTL_MULTI_TOUCH) },

        /* Unitec panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_UNITEC,
                        USB_DEVICE_ID_UNITEC_USB_TOUCH_0709) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_UNITEC,
                        USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19) },

        /* VTL panels */
        { .driver_data = MT_CLS_VTL,
                MT_USB_DEVICE(USB_VENDOR_ID_VTL,
                        USB_DEVICE_ID_VTL_MULTITOUCH_FF3F) },

        /* Winbond Electronics Corp. */
        { .driver_data = MT_CLS_WIN_8_NO_STICKY_FINGERS,
                HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8,
                           USB_VENDOR_ID_WINBOND, USB_DEVICE_ID_TSTP_MTOUCH) },

        /* Wistron panels */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_WISTRON,
                        USB_DEVICE_ID_WISTRON_OPTICAL_TOUCH) },

        /* XAT */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XAT,
                        USB_DEVICE_ID_XAT_CSR) },

        /* Xiroku */
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_SPX) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_MPX) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_CSR) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_SPX1) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_MPX1) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_CSR1) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_SPX2) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_MPX2) },
        { .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
                        USB_DEVICE_ID_XIROKU_CSR2) },

        /* Google MT devices */
        { .driver_data = MT_CLS_GOOGLE,
                HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
                        USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) },
        { .driver_data = MT_CLS_GOOGLE,
                HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE,
                        USB_DEVICE_ID_GOOGLE_WHISKERS) },

        /* Generic MT device */
        { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },

        /* Generic Win 8 certified MT device */
        {  .driver_data = MT_CLS_WIN_8,
                HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH_WIN_8,
                        HID_ANY_ID, HID_ANY_ID) },
        { }
};
MODULE_DEVICE_TABLE(hid, mt_devices);

static const struct hid_usage_id mt_grabbed_usages[] = {
        { HID_ANY_ID, HID_ANY_ID, HID_ANY_ID },
        { HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1}
};

static struct hid_driver mt_driver = {
        .name = "hid-multitouch",
        .id_table = mt_devices,
        .probe = mt_probe,
        .remove = mt_remove,
        .input_mapping = mt_input_mapping,
        .input_mapped = mt_input_mapped,
        .input_configured = mt_input_configured,
        .feature_mapping = mt_feature_mapping,
        .usage_table = mt_grabbed_usages,
        .event = mt_event,
        .report = mt_report,
        .suspend = pm_ptr(mt_suspend),
        .reset_resume = pm_ptr(mt_reset_resume),
        .resume = pm_ptr(mt_resume),
};
module_hid_driver(mt_driver);














































































































































































































































































































    3 


    1 
    3 
    3 
    2 
    2 
    2 







    3 



    3 




    3 
    2 



    3 



















   11 
   11 
   11 

   11 
























   10 



   11 




   11 
   11 



   10 


















    1 


















   11 
















































    1 


















   10 















































    2 






























































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/export.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <linux/user_namespace.h>
#include <linux/proc_ns.h>
#include <linux/highuid.h>
#include <linux/cred.h>
#include <linux/securebits.h>
#include <linux/security.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/projid.h>
#include <linux/fs_struct.h>
#include <linux/bsearch.h>
#include <linux/sort.h>

static struct kmem_cache *user_ns_cachep __ro_after_init;
static DEFINE_MUTEX(userns_state_mutex);

static bool new_idmap_permitted(const struct file *file,
                                struct user_namespace *ns, int cap_setid,
                                struct uid_gid_map *map);
static void free_user_ns(struct work_struct *work);

static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
{
        return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
}

static void dec_user_namespaces(struct ucounts *ucounts)
{
        return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
}

static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
{
        /* Start with the same capabilities as init but useless for doing
         * anything as the capabilities are bound to the new user namespace.
         */
        cred->securebits = SECUREBITS_DEFAULT;
        cred->cap_inheritable = CAP_EMPTY_SET;
        cred->cap_permitted = CAP_FULL_SET;
        cred->cap_effective = CAP_FULL_SET;
        cred->cap_ambient = CAP_EMPTY_SET;
        cred->cap_bset = CAP_FULL_SET;
#ifdef CONFIG_KEYS
        key_put(cred->request_key_auth);
        cred->request_key_auth = NULL;
#endif
        /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
        cred->user_ns = user_ns;
}

static unsigned long enforced_nproc_rlimit(void)
{
        unsigned long limit = RLIM_INFINITY;

        /* Is RLIMIT_NPROC currently enforced? */
        if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
            (current_user_ns() != &init_user_ns))
                limit = rlimit(RLIMIT_NPROC);

        return limit;
}

/*
 * Create a new user namespace, deriving the creator from the user in the
 * passed credentials, and replacing that user with the new root user for the
 * new namespace.
 *
 * This is called by copy_creds(), which will finish setting the target task's
 * credentials.
 */
int create_user_ns(struct cred *new)
{
        struct user_namespace *ns, *parent_ns = new->user_ns;
        kuid_t owner = new->euid;
        kgid_t group = new->egid;
        struct ucounts *ucounts;
        int ret, i;

        ret = -ENOSPC;
        if (parent_ns->level > 32)
                goto fail;

        ucounts = inc_user_namespaces(parent_ns, owner);
        if (!ucounts)
                goto fail;

        /*
         * Verify that we can not violate the policy of which files
         * may be accessed that is specified by the root directory,
         * by verifying that the root directory is at the root of the
         * mount namespace which allows all files to be accessed.
         */
        ret = -EPERM;
        if (current_chrooted())
                goto fail_dec;

        /* The creator needs a mapping in the parent user namespace
         * or else we won't be able to reasonably tell userspace who
         * created a user_namespace.
         */
        ret = -EPERM;
        if (!kuid_has_mapping(parent_ns, owner) ||
            !kgid_has_mapping(parent_ns, group))
                goto fail_dec;

        ret = security_create_user_ns(new);
        if (ret < 0)
                goto fail_dec;

        ret = -ENOMEM;
        ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
        if (!ns)
                goto fail_dec;

        ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
        ret = ns_alloc_inum(&ns->ns);
        if (ret)
                goto fail_free;
        ns->ns.ops = &userns_operations;

        refcount_set(&ns->ns.count, 1);
        /* Leave the new->user_ns reference with the new user namespace. */
        ns->parent = parent_ns;
        ns->level = parent_ns->level + 1;
        ns->owner = owner;
        ns->group = group;
        INIT_WORK(&ns->work, free_user_ns);
        for (i = 0; i < UCOUNT_COUNTS; i++) {
                ns->ucount_max[i] = INT_MAX;
        }
        set_userns_rlimit_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
        set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
        set_userns_rlimit_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
        set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
        ns->ucounts = ucounts;

        /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
        mutex_lock(&userns_state_mutex);
        ns->flags = parent_ns->flags;
        mutex_unlock(&userns_state_mutex);

#ifdef CONFIG_KEYS
        INIT_LIST_HEAD(&ns->keyring_name_list);
        init_rwsem(&ns->keyring_sem);
#endif
        ret = -ENOMEM;
        if (!setup_userns_sysctls(ns))
                goto fail_keyring;

        set_cred_user_ns(new, ns);
        return 0;
fail_keyring:
#ifdef CONFIG_PERSISTENT_KEYRINGS
        key_put(ns->persistent_keyring_register);
#endif
        ns_free_inum(&ns->ns);
fail_free:
        kmem_cache_free(user_ns_cachep, ns);
fail_dec:
        dec_user_namespaces(ucounts);
fail:
        return ret;
}

int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
{
        struct cred *cred;
        int err = -ENOMEM;

        if (!(unshare_flags & CLONE_NEWUSER))
                return 0;

        cred = prepare_creds();
        if (cred) {
                err = create_user_ns(cred);
                if (err)
                        put_cred(cred);
                else
                        *new_cred = cred;
        }

        return err;
}

static void free_user_ns(struct work_struct *work)
{
        struct user_namespace *parent, *ns =
                container_of(work, struct user_namespace, work);

        do {
                struct ucounts *ucounts = ns->ucounts;
                parent = ns->parent;
                if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                        kfree(ns->gid_map.forward);
                        kfree(ns->gid_map.reverse);
                }
                if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                        kfree(ns->uid_map.forward);
                        kfree(ns->uid_map.reverse);
                }
                if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                        kfree(ns->projid_map.forward);
                        kfree(ns->projid_map.reverse);
                }
#if IS_ENABLED(CONFIG_BINFMT_MISC)
                kfree(ns->binfmt_misc);
#endif
                retire_userns_sysctls(ns);
                key_free_user_ns(ns);
                ns_free_inum(&ns->ns);
                kmem_cache_free(user_ns_cachep, ns);
                dec_user_namespaces(ucounts);
                ns = parent;
        } while (refcount_dec_and_test(&parent->ns.count));
}

void __put_user_ns(struct user_namespace *ns)
{
        schedule_work(&ns->work);
}
EXPORT_SYMBOL(__put_user_ns);

/*
 * struct idmap_key - holds the information necessary to find an idmapping in a
 * sorted idmap array. It is passed to cmp_map_id() as first argument.
 */
struct idmap_key {
        bool map_up; /* true  -> id from kid; false -> kid from id */
        u32 id; /* id to find */
        u32 count; /* == 0 unless used with map_id_range_down() */
};

/*
 * cmp_map_id - Function to be passed to bsearch() to find the requested
 * idmapping. Expects struct idmap_key to be passed via @k.
 */
static int cmp_map_id(const void *k, const void *e)
{
        u32 first, last, id2;
        const struct idmap_key *key = k;
        const struct uid_gid_extent *el = e;

        id2 = key->id + key->count - 1;

        /* handle map_id_{down,up}() */
        if (key->map_up)
                first = el->lower_first;
        else
                first = el->first;

        last = first + el->count - 1;

        if (key->id >= first && key->id <= last &&
            (id2 >= first && id2 <= last))
                return 0;

        if (key->id < first || id2 < first)
                return -1;

        return 1;
}

/*
 * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static struct uid_gid_extent *
map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
{
        struct idmap_key key;

        key.map_up = false;
        key.count = count;
        key.id = id;

        return bsearch(&key, map->forward, extents,
                       sizeof(struct uid_gid_extent), cmp_map_id);
}

/*
 * map_id_range_down_base - Find idmap via binary search in static extent array.
 * Can only be called if number of mappings is equal or less than
 * UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static struct uid_gid_extent *
map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
{
        unsigned idx;
        u32 first, last, id2;

        id2 = id + count - 1;

        /* Find the matching extent */
        for (idx = 0; idx < extents; idx++) {
                first = map->extent[idx].first;
                last = first + map->extent[idx].count - 1;
                if (id >= first && id <= last &&
                    (id2 >= first && id2 <= last))
                        return &map->extent[idx];
        }
        return NULL;
}

static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
{
        struct uid_gid_extent *extent;
        unsigned extents = map->nr_extents;
        smp_rmb();

        if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                extent = map_id_range_down_base(extents, map, id, count);
        else
                extent = map_id_range_down_max(extents, map, id, count);

        /* Map the id or note failure */
        if (extent)
                id = (id - extent->first) + extent->lower_first;
        else
                id = (u32) -1;

        return id;
}

u32 map_id_down(struct uid_gid_map *map, u32 id)
{
        return map_id_range_down(map, id, 1);
}

/*
 * map_id_up_base - Find idmap via binary search in static extent array.
 * Can only be called if number of mappings is equal or less than
 * UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static struct uid_gid_extent *
map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
{
        unsigned idx;
        u32 first, last;

        /* Find the matching extent */
        for (idx = 0; idx < extents; idx++) {
                first = map->extent[idx].lower_first;
                last = first + map->extent[idx].count - 1;
                if (id >= first && id <= last)
                        return &map->extent[idx];
        }
        return NULL;
}

/*
 * map_id_up_max - Find idmap via binary search in ordered idmap array.
 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static struct uid_gid_extent *
map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
{
        struct idmap_key key;

        key.map_up = true;
        key.count = 1;
        key.id = id;

        return bsearch(&key, map->reverse, extents,
                       sizeof(struct uid_gid_extent), cmp_map_id);
}

u32 map_id_up(struct uid_gid_map *map, u32 id)
{
        struct uid_gid_extent *extent;
        unsigned extents = map->nr_extents;
        smp_rmb();

        if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                extent = map_id_up_base(extents, map, id);
        else
                extent = map_id_up_max(extents, map, id);

        /* Map the id or note failure */
        if (extent)
                id = (id - extent->lower_first) + extent->first;
        else
                id = (u32) -1;

        return id;
}

/**
 *        make_kuid - Map a user-namespace uid pair into a kuid.
 *        @ns:  User namespace that the uid is in
 *        @uid: User identifier
 *
 *        Maps a user-namespace uid pair into a kernel internal kuid,
 *        and returns that kuid.
 *
 *        When there is no mapping defined for the user-namespace uid
 *        pair INVALID_UID is returned.  Callers are expected to test
 *        for and handle INVALID_UID being returned.  INVALID_UID
 *        may be tested for using uid_valid().
 */
kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
{
        /* Map the uid to a global kernel uid */
        return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
}
EXPORT_SYMBOL(make_kuid);

/**
 *        from_kuid - Create a uid from a kuid user-namespace pair.
 *        @targ: The user namespace we want a uid in.
 *        @kuid: The kernel internal uid to start with.
 *
 *        Map @kuid into the user-namespace specified by @targ and
 *        return the resulting uid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        If @kuid has no mapping in @targ (uid_t)-1 is returned.
 */
uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
{
        /* Map the uid from a global kernel uid */
        return map_id_up(&targ->uid_map, __kuid_val(kuid));
}
EXPORT_SYMBOL(from_kuid);

/**
 *        from_kuid_munged - Create a uid from a kuid user-namespace pair.
 *        @targ: The user namespace we want a uid in.
 *        @kuid: The kernel internal uid to start with.
 *
 *        Map @kuid into the user-namespace specified by @targ and
 *        return the resulting uid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        Unlike from_kuid from_kuid_munged never fails and always
 *        returns a valid uid.  This makes from_kuid_munged appropriate
 *        for use in syscalls like stat and getuid where failing the
 *        system call and failing to provide a valid uid are not an
 *        options.
 *
 *        If @kuid has no mapping in @targ overflowuid is returned.
 */
uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
{
        uid_t uid;
        uid = from_kuid(targ, kuid);

        if (uid == (uid_t) -1)
                uid = overflowuid;
        return uid;
}
EXPORT_SYMBOL(from_kuid_munged);

/**
 *        make_kgid - Map a user-namespace gid pair into a kgid.
 *        @ns:  User namespace that the gid is in
 *        @gid: group identifier
 *
 *        Maps a user-namespace gid pair into a kernel internal kgid,
 *        and returns that kgid.
 *
 *        When there is no mapping defined for the user-namespace gid
 *        pair INVALID_GID is returned.  Callers are expected to test
 *        for and handle INVALID_GID being returned.  INVALID_GID may be
 *        tested for using gid_valid().
 */
kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
{
        /* Map the gid to a global kernel gid */
        return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
}
EXPORT_SYMBOL(make_kgid);

/**
 *        from_kgid - Create a gid from a kgid user-namespace pair.
 *        @targ: The user namespace we want a gid in.
 *        @kgid: The kernel internal gid to start with.
 *
 *        Map @kgid into the user-namespace specified by @targ and
 *        return the resulting gid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        If @kgid has no mapping in @targ (gid_t)-1 is returned.
 */
gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
{
        /* Map the gid from a global kernel gid */
        return map_id_up(&targ->gid_map, __kgid_val(kgid));
}
EXPORT_SYMBOL(from_kgid);

/**
 *        from_kgid_munged - Create a gid from a kgid user-namespace pair.
 *        @targ: The user namespace we want a gid in.
 *        @kgid: The kernel internal gid to start with.
 *
 *        Map @kgid into the user-namespace specified by @targ and
 *        return the resulting gid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        Unlike from_kgid from_kgid_munged never fails and always
 *        returns a valid gid.  This makes from_kgid_munged appropriate
 *        for use in syscalls like stat and getgid where failing the
 *        system call and failing to provide a valid gid are not options.
 *
 *        If @kgid has no mapping in @targ overflowgid is returned.
 */
gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
{
        gid_t gid;
        gid = from_kgid(targ, kgid);

        if (gid == (gid_t) -1)
                gid = overflowgid;
        return gid;
}
EXPORT_SYMBOL(from_kgid_munged);

/**
 *        make_kprojid - Map a user-namespace projid pair into a kprojid.
 *        @ns:  User namespace that the projid is in
 *        @projid: Project identifier
 *
 *        Maps a user-namespace uid pair into a kernel internal kuid,
 *        and returns that kuid.
 *
 *        When there is no mapping defined for the user-namespace projid
 *        pair INVALID_PROJID is returned.  Callers are expected to test
 *        for and handle INVALID_PROJID being returned.  INVALID_PROJID
 *        may be tested for using projid_valid().
 */
kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
{
        /* Map the uid to a global kernel uid */
        return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
}
EXPORT_SYMBOL(make_kprojid);

/**
 *        from_kprojid - Create a projid from a kprojid user-namespace pair.
 *        @targ: The user namespace we want a projid in.
 *        @kprojid: The kernel internal project identifier to start with.
 *
 *        Map @kprojid into the user-namespace specified by @targ and
 *        return the resulting projid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        If @kprojid has no mapping in @targ (projid_t)-1 is returned.
 */
projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
{
        /* Map the uid from a global kernel uid */
        return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
}
EXPORT_SYMBOL(from_kprojid);

/**
 *        from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
 *        @targ: The user namespace we want a projid in.
 *        @kprojid: The kernel internal projid to start with.
 *
 *        Map @kprojid into the user-namespace specified by @targ and
 *        return the resulting projid.
 *
 *        There is always a mapping into the initial user_namespace.
 *
 *        Unlike from_kprojid from_kprojid_munged never fails and always
 *        returns a valid projid.  This makes from_kprojid_munged
 *        appropriate for use in syscalls like stat and where
 *        failing the system call and failing to provide a valid projid are
 *        not an options.
 *
 *        If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
 */
projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
{
        projid_t projid;
        projid = from_kprojid(targ, kprojid);

        if (projid == (projid_t) -1)
                projid = OVERFLOW_PROJID;
        return projid;
}
EXPORT_SYMBOL(from_kprojid_munged);


static int uid_m_show(struct seq_file *seq, void *v)
{
        struct user_namespace *ns = seq->private;
        struct uid_gid_extent *extent = v;
        struct user_namespace *lower_ns;
        uid_t lower;

        lower_ns = seq_user_ns(seq);
        if ((lower_ns == ns) && lower_ns->parent)
                lower_ns = lower_ns->parent;

        lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));

        seq_printf(seq, "%10u %10u %10u\n",
                extent->first,
                lower,
                extent->count);

        return 0;
}

static int gid_m_show(struct seq_file *seq, void *v)
{
        struct user_namespace *ns = seq->private;
        struct uid_gid_extent *extent = v;
        struct user_namespace *lower_ns;
        gid_t lower;

        lower_ns = seq_user_ns(seq);
        if ((lower_ns == ns) && lower_ns->parent)
                lower_ns = lower_ns->parent;

        lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));

        seq_printf(seq, "%10u %10u %10u\n",
                extent->first,
                lower,
                extent->count);

        return 0;
}

static int projid_m_show(struct seq_file *seq, void *v)
{
        struct user_namespace *ns = seq->private;
        struct uid_gid_extent *extent = v;
        struct user_namespace *lower_ns;
        projid_t lower;

        lower_ns = seq_user_ns(seq);
        if ((lower_ns == ns) && lower_ns->parent)
                lower_ns = lower_ns->parent;

        lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));

        seq_printf(seq, "%10u %10u %10u\n",
                extent->first,
                lower,
                extent->count);

        return 0;
}

static void *m_start(struct seq_file *seq, loff_t *ppos,
                     struct uid_gid_map *map)
{
        loff_t pos = *ppos;
        unsigned extents = map->nr_extents;
        smp_rmb();

        if (pos >= extents)
                return NULL;

        if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                return &map->extent[pos];

        return &map->forward[pos];
}

static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
{
        struct user_namespace *ns = seq->private;

        return m_start(seq, ppos, &ns->uid_map);
}

static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
{
        struct user_namespace *ns = seq->private;

        return m_start(seq, ppos, &ns->gid_map);
}

static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
{
        struct user_namespace *ns = seq->private;

        return m_start(seq, ppos, &ns->projid_map);
}

static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
{
        (*pos)++;
        return seq->op->start(seq, pos);
}

static void m_stop(struct seq_file *seq, void *v)
{
        return;
}

const struct seq_operations proc_uid_seq_operations = {
        .start = uid_m_start,
        .stop = m_stop,
        .next = m_next,
        .show = uid_m_show,
};

const struct seq_operations proc_gid_seq_operations = {
        .start = gid_m_start,
        .stop = m_stop,
        .next = m_next,
        .show = gid_m_show,
};

const struct seq_operations proc_projid_seq_operations = {
        .start = projid_m_start,
        .stop = m_stop,
        .next = m_next,
        .show = projid_m_show,
};

static bool mappings_overlap(struct uid_gid_map *new_map,
                             struct uid_gid_extent *extent)
{
        u32 upper_first, lower_first, upper_last, lower_last;
        unsigned idx;

        upper_first = extent->first;
        lower_first = extent->lower_first;
        upper_last = upper_first + extent->count - 1;
        lower_last = lower_first + extent->count - 1;

        for (idx = 0; idx < new_map->nr_extents; idx++) {
                u32 prev_upper_first, prev_lower_first;
                u32 prev_upper_last, prev_lower_last;
                struct uid_gid_extent *prev;

                if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                        prev = &new_map->extent[idx];
                else
                        prev = &new_map->forward[idx];

                prev_upper_first = prev->first;
                prev_lower_first = prev->lower_first;
                prev_upper_last = prev_upper_first + prev->count - 1;
                prev_lower_last = prev_lower_first + prev->count - 1;

                /* Does the upper range intersect a previous extent? */
                if ((prev_upper_first <= upper_last) &&
                    (prev_upper_last >= upper_first))
                        return true;

                /* Does the lower range intersect a previous extent? */
                if ((prev_lower_first <= lower_last) &&
                    (prev_lower_last >= lower_first))
                        return true;
        }
        return false;
}

/*
 * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
 * Takes care to allocate a 4K block of memory if the number of mappings exceeds
 * UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
{
        struct uid_gid_extent *dest;

        if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
                struct uid_gid_extent *forward;

                /* Allocate memory for 340 mappings. */
                forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS,
                                        sizeof(struct uid_gid_extent),
                                        GFP_KERNEL);
                if (!forward)
                        return -ENOMEM;

                /* Copy over memory. Only set up memory for the forward pointer.
                 * Defer the memory setup for the reverse pointer.
                 */
                memcpy(forward, map->extent,
                       map->nr_extents * sizeof(map->extent[0]));

                map->forward = forward;
                map->reverse = NULL;
        }

        if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
                dest = &map->extent[map->nr_extents];
        else
                dest = &map->forward[map->nr_extents];

        *dest = *extent;
        map->nr_extents++;
        return 0;
}

/* cmp function to sort() forward mappings */
static int cmp_extents_forward(const void *a, const void *b)
{
        const struct uid_gid_extent *e1 = a;
        const struct uid_gid_extent *e2 = b;

        if (e1->first < e2->first)
                return -1;

        if (e1->first > e2->first)
                return 1;

        return 0;
}

/* cmp function to sort() reverse mappings */
static int cmp_extents_reverse(const void *a, const void *b)
{
        const struct uid_gid_extent *e1 = a;
        const struct uid_gid_extent *e2 = b;

        if (e1->lower_first < e2->lower_first)
                return -1;

        if (e1->lower_first > e2->lower_first)
                return 1;

        return 0;
}

/*
 * sort_idmaps - Sorts an array of idmap entries.
 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
 */
static int sort_idmaps(struct uid_gid_map *map)
{
        if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                return 0;

        /* Sort forward array. */
        sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
             cmp_extents_forward, NULL);

        /* Only copy the memory from forward we actually need. */
        map->reverse = kmemdup(map->forward,
                               map->nr_extents * sizeof(struct uid_gid_extent),
                               GFP_KERNEL);
        if (!map->reverse)
                return -ENOMEM;

        /* Sort reverse array. */
        sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
             cmp_extents_reverse, NULL);

        return 0;
}

/**
 * verify_root_map() - check the uid 0 mapping
 * @file: idmapping file
 * @map_ns: user namespace of the target process
 * @new_map: requested idmap
 *
 * If a process requests mapping parent uid 0 into the new ns, verify that the
 * process writing the map had the CAP_SETFCAP capability as the target process
 * will be able to write fscaps that are valid in ancestor user namespaces.
 *
 * Return: true if the mapping is allowed, false if not.
 */
static bool verify_root_map(const struct file *file,
                            struct user_namespace *map_ns,
                            struct uid_gid_map *new_map)
{
        int idx;
        const struct user_namespace *file_ns = file->f_cred->user_ns;
        struct uid_gid_extent *extent0 = NULL;

        for (idx = 0; idx < new_map->nr_extents; idx++) {
                if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                        extent0 = &new_map->extent[idx];
                else
                        extent0 = &new_map->forward[idx];
                if (extent0->lower_first == 0)
                        break;

                extent0 = NULL;
        }

        if (!extent0)
                return true;

        if (map_ns == file_ns) {
                /* The process unshared its ns and is writing to its own
                 * /proc/self/uid_map.  User already has full capabilites in
                 * the new namespace.  Verify that the parent had CAP_SETFCAP
                 * when it unshared.
                 * */
                if (!file_ns->parent_could_setfcap)
                        return false;
        } else {
                /* Process p1 is writing to uid_map of p2, who is in a child
                 * user namespace to p1's.  Verify that the opener of the map
                 * file has CAP_SETFCAP against the parent of the new map
                 * namespace */
                if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP))
                        return false;
        }

        return true;
}

static ssize_t map_write(struct file *file, const char __user *buf,
                         size_t count, loff_t *ppos,
                         int cap_setid,
                         struct uid_gid_map *map,
                         struct uid_gid_map *parent_map)
{
        struct seq_file *seq = file->private_data;
        struct user_namespace *map_ns = seq->private;
        struct uid_gid_map new_map;
        unsigned idx;
        struct uid_gid_extent extent;
        char *kbuf, *pos, *next_line;
        ssize_t ret;

        /* Only allow < page size writes at the beginning of the file */
        if ((*ppos != 0) || (count >= PAGE_SIZE))
                return -EINVAL;

        /* Slurp in the user data */
        kbuf = memdup_user_nul(buf, count);
        if (IS_ERR(kbuf))
                return PTR_ERR(kbuf);

        /*
         * The userns_state_mutex serializes all writes to any given map.
         *
         * Any map is only ever written once.
         *
         * An id map fits within 1 cache line on most architectures.
         *
         * On read nothing needs to be done unless you are on an
         * architecture with a crazy cache coherency model like alpha.
         *
         * There is a one time data dependency between reading the
         * count of the extents and the values of the extents.  The
         * desired behavior is to see the values of the extents that
         * were written before the count of the extents.
         *
         * To achieve this smp_wmb() is used on guarantee the write
         * order and smp_rmb() is guaranteed that we don't have crazy
         * architectures returning stale data.
         */
        mutex_lock(&userns_state_mutex);

        memset(&new_map, 0, sizeof(struct uid_gid_map));

        ret = -EPERM;
        /* Only allow one successful write to the map */
        if (map->nr_extents != 0)
                goto out;

        /*
         * Adjusting namespace settings requires capabilities on the target.
         */
        if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
                goto out;

        /* Parse the user data */
        ret = -EINVAL;
        pos = kbuf;
        for (; pos; pos = next_line) {

                /* Find the end of line and ensure I don't look past it */
                next_line = strchr(pos, '\n');
                if (next_line) {
                        *next_line = '\0';
                        next_line++;
                        if (*next_line == '\0')
                                next_line = NULL;
                }

                pos = skip_spaces(pos);
                extent.first = simple_strtoul(pos, &pos, 10);
                if (!isspace(*pos))
                        goto out;

                pos = skip_spaces(pos);
                extent.lower_first = simple_strtoul(pos, &pos, 10);
                if (!isspace(*pos))
                        goto out;

                pos = skip_spaces(pos);
                extent.count = simple_strtoul(pos, &pos, 10);
                if (*pos && !isspace(*pos))
                        goto out;

                /* Verify there is not trailing junk on the line */
                pos = skip_spaces(pos);
                if (*pos != '\0')
                        goto out;

                /* Verify we have been given valid starting values */
                if ((extent.first == (u32) -1) ||
                    (extent.lower_first == (u32) -1))
                        goto out;

                /* Verify count is not zero and does not cause the
                 * extent to wrap
                 */
                if ((extent.first + extent.count) <= extent.first)
                        goto out;
                if ((extent.lower_first + extent.count) <=
                     extent.lower_first)
                        goto out;

                /* Do the ranges in extent overlap any previous extents? */
                if (mappings_overlap(&new_map, &extent))
                        goto out;

                if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
                    (next_line != NULL))
                        goto out;

                ret = insert_extent(&new_map, &extent);
                if (ret < 0)
                        goto out;
                ret = -EINVAL;
        }
        /* Be very certain the new map actually exists */
        if (new_map.nr_extents == 0)
                goto out;

        ret = -EPERM;
        /* Validate the user is allowed to use user id's mapped to. */
        if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
                goto out;

        ret = -EPERM;
        /* Map the lower ids from the parent user namespace to the
         * kernel global id space.
         */
        for (idx = 0; idx < new_map.nr_extents; idx++) {
                struct uid_gid_extent *e;
                u32 lower_first;

                if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
                        e = &new_map.extent[idx];
                else
                        e = &new_map.forward[idx];

                lower_first = map_id_range_down(parent_map,
                                                e->lower_first,
                                                e->count);

                /* Fail if we can not map the specified extent to
                 * the kernel global id space.
                 */
                if (lower_first == (u32) -1)
                        goto out;

                e->lower_first = lower_first;
        }

        /*
         * If we want to use binary search for lookup, this clones the extent
         * array and sorts both copies.
         */
        ret = sort_idmaps(&new_map);
        if (ret < 0)
                goto out;

        /* Install the map */
        if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
                memcpy(map->extent, new_map.extent,
                       new_map.nr_extents * sizeof(new_map.extent[0]));
        } else {
                map->forward = new_map.forward;
                map->reverse = new_map.reverse;
        }
        smp_wmb();
        map->nr_extents = new_map.nr_extents;

        *ppos = count;
        ret = count;
out:
        if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
                kfree(new_map.forward);
                kfree(new_map.reverse);
                map->forward = NULL;
                map->reverse = NULL;
                map->nr_extents = 0;
        }

        mutex_unlock(&userns_state_mutex);
        kfree(kbuf);
        return ret;
}

ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
                           size_t size, loff_t *ppos)
{
        struct seq_file *seq = file->private_data;
        struct user_namespace *ns = seq->private;
        struct user_namespace *seq_ns = seq_user_ns(seq);

        if (!ns->parent)
                return -EPERM;

        if ((seq_ns != ns) && (seq_ns != ns->parent))
                return -EPERM;

        return map_write(file, buf, size, ppos, CAP_SETUID,
                         &ns->uid_map, &ns->parent->uid_map);
}

ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
                           size_t size, loff_t *ppos)
{
        struct seq_file *seq = file->private_data;
        struct user_namespace *ns = seq->private;
        struct user_namespace *seq_ns = seq_user_ns(seq);

        if (!ns->parent)
                return -EPERM;

        if ((seq_ns != ns) && (seq_ns != ns->parent))
                return -EPERM;

        return map_write(file, buf, size, ppos, CAP_SETGID,
                         &ns->gid_map, &ns->parent->gid_map);
}

ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
                              size_t size, loff_t *ppos)
{
        struct seq_file *seq = file->private_data;
        struct user_namespace *ns = seq->private;
        struct user_namespace *seq_ns = seq_user_ns(seq);

        if (!ns->parent)
                return -EPERM;

        if ((seq_ns != ns) && (seq_ns != ns->parent))
                return -EPERM;

        /* Anyone can set any valid project id no capability needed */
        return map_write(file, buf, size, ppos, -1,
                         &ns->projid_map, &ns->parent->projid_map);
}

static bool new_idmap_permitted(const struct file *file,
                                struct user_namespace *ns, int cap_setid,
                                struct uid_gid_map *new_map)
{
        const struct cred *cred = file->f_cred;

        if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map))
                return false;

        /* Don't allow mappings that would allow anything that wouldn't
         * be allowed without the establishment of unprivileged mappings.
         */
        if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
            uid_eq(ns->owner, cred->euid)) {
                u32 id = new_map->extent[0].lower_first;
                if (cap_setid == CAP_SETUID) {
                        kuid_t uid = make_kuid(ns->parent, id);
                        if (uid_eq(uid, cred->euid))
                                return true;
                } else if (cap_setid == CAP_SETGID) {
                        kgid_t gid = make_kgid(ns->parent, id);
                        if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
                            gid_eq(gid, cred->egid))
                                return true;
                }
        }

        /* Allow anyone to set a mapping that doesn't require privilege */
        if (!cap_valid(cap_setid))
                return true;

        /* Allow the specified ids if we have the appropriate capability
         * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
         * And the opener of the id file also has the appropriate capability.
         */
        if (ns_capable(ns->parent, cap_setid) &&
            file_ns_capable(file, ns->parent, cap_setid))
                return true;

        return false;
}

int proc_setgroups_show(struct seq_file *seq, void *v)
{
        struct user_namespace *ns = seq->private;
        unsigned long userns_flags = READ_ONCE(ns->flags);

        seq_printf(seq, "%s\n",
                   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
                   "allow" : "deny");
        return 0;
}

ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
{
        struct seq_file *seq = file->private_data;
        struct user_namespace *ns = seq->private;
        char kbuf[8], *pos;
        bool setgroups_allowed;
        ssize_t ret;

        /* Only allow a very narrow range of strings to be written */
        ret = -EINVAL;
        if ((*ppos != 0) || (count >= sizeof(kbuf)))
                goto out;

        /* What was written? */
        ret = -EFAULT;
        if (copy_from_user(kbuf, buf, count))
                goto out;
        kbuf[count] = '\0';
        pos = kbuf;

        /* What is being requested? */
        ret = -EINVAL;
        if (strncmp(pos, "allow", 5) == 0) {
                pos += 5;
                setgroups_allowed = true;
        }
        else if (strncmp(pos, "deny", 4) == 0) {
                pos += 4;
                setgroups_allowed = false;
        }
        else
                goto out;

        /* Verify there is not trailing junk on the line */
        pos = skip_spaces(pos);
        if (*pos != '\0')
                goto out;

        ret = -EPERM;
        mutex_lock(&userns_state_mutex);
        if (setgroups_allowed) {
                /* Enabling setgroups after setgroups has been disabled
                 * is not allowed.
                 */
                if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
                        goto out_unlock;
        } else {
                /* Permanently disabling setgroups after setgroups has
                 * been enabled by writing the gid_map is not allowed.
                 */
                if (ns->gid_map.nr_extents != 0)
                        goto out_unlock;
                ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
        }
        mutex_unlock(&userns_state_mutex);

        /* Report a successful write */
        *ppos = count;
        ret = count;
out:
        return ret;
out_unlock:
        mutex_unlock(&userns_state_mutex);
        goto out;
}

bool userns_may_setgroups(const struct user_namespace *ns)
{
        bool allowed;

        mutex_lock(&userns_state_mutex);
        /* It is not safe to use setgroups until a gid mapping in
         * the user namespace has been established.
         */
        allowed = ns->gid_map.nr_extents != 0;
        /* Is setgroups allowed? */
        allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
        mutex_unlock(&userns_state_mutex);

        return allowed;
}

/*
 * Returns true if @child is the same namespace or a descendant of
 * @ancestor.
 */
bool in_userns(const struct user_namespace *ancestor,
               const struct user_namespace *child)
{
        const struct user_namespace *ns;
        for (ns = child; ns->level > ancestor->level; ns = ns->parent)
                ;
        return (ns == ancestor);
}

bool current_in_userns(const struct user_namespace *target_ns)
{
        return in_userns(target_ns, current_user_ns());
}
EXPORT_SYMBOL(current_in_userns);

static inline struct user_namespace *to_user_ns(struct ns_common *ns)
{
        return container_of(ns, struct user_namespace, ns);
}

static struct ns_common *userns_get(struct task_struct *task)
{
        struct user_namespace *user_ns;

        rcu_read_lock();
        user_ns = get_user_ns(__task_cred(task)->user_ns);
        rcu_read_unlock();

        return user_ns ? &user_ns->ns : NULL;
}

static void userns_put(struct ns_common *ns)
{
        put_user_ns(to_user_ns(ns));
}

static int userns_install(struct nsset *nsset, struct ns_common *ns)
{
        struct user_namespace *user_ns = to_user_ns(ns);
        struct cred *cred;

        /* Don't allow gaining capabilities by reentering
         * the same user namespace.
         */
        if (user_ns == current_user_ns())
                return -EINVAL;

        /* Tasks that share a thread group must share a user namespace */
        if (!thread_group_empty(current))
                return -EINVAL;

        if (current->fs->users != 1)
                return -EINVAL;

        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        cred = nsset_cred(nsset);
        if (!cred)
                return -EINVAL;

        put_user_ns(cred->user_ns);
        set_cred_user_ns(cred, get_user_ns(user_ns));

        if (set_cred_ucounts(cred) < 0)
                return -EINVAL;

        return 0;
}

struct ns_common *ns_get_owner(struct ns_common *ns)
{
        struct user_namespace *my_user_ns = current_user_ns();
        struct user_namespace *owner, *p;

        /* See if the owner is in the current user namespace */
        owner = p = ns->ops->owner(ns);
        for (;;) {
                if (!p)
                        return ERR_PTR(-EPERM);
                if (p == my_user_ns)
                        break;
                p = p->parent;
        }

        return &get_user_ns(owner)->ns;
}

static struct user_namespace *userns_owner(struct ns_common *ns)
{
        return to_user_ns(ns)->parent;
}

const struct proc_ns_operations userns_operations = {
        .name                = "user",
        .type                = CLONE_NEWUSER,
        .get                = userns_get,
        .put                = userns_put,
        .install        = userns_install,
        .owner                = userns_owner,
        .get_parent        = ns_get_owner,
};

static __init int user_namespaces_init(void)
{
        user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
        return 0;
}
subsys_initcall(user_namespaces_init);
























































    1 







    1 











    1 













    1 




















    1 


    1 


    1 


    1 











    1 
    1 











    1 





































































































































































































































































































































































    8 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 



































    1 
    1 





















    1 



    1 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/super.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  super.c contains code to handle: - mount structures
 *                                   - super-block tables
 *                                   - filesystem drivers list
 *                                   - mount system call
 *                                   - umount system call
 *                                   - ustat system call
 *
 * GK 2/5/95  -  Changed to support mounting the root fs via NFS
 *
 *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
 *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
 *  Added options to /proc/mounts:
 *    Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
 *  Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
 *  Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
 */

#include <linux/export.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h>                /* for the emergency remount stuff */
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
#include <linux/fs_context.h>
#include <uapi/linux/mount.h>
#include "internal.h"

static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);

static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);

static char *sb_writers_name[SB_FREEZE_LEVELS] = {
        "sb_writers",
        "sb_pagefaults",
        "sb_internal",
};

static inline void __super_lock(struct super_block *sb, bool excl)
{
        if (excl)
                down_write(&sb->s_umount);
        else
                down_read(&sb->s_umount);
}

static inline void super_unlock(struct super_block *sb, bool excl)
{
        if (excl)
                up_write(&sb->s_umount);
        else
                up_read(&sb->s_umount);
}

static inline void __super_lock_excl(struct super_block *sb)
{
        __super_lock(sb, true);
}

static inline void super_unlock_excl(struct super_block *sb)
{
        super_unlock(sb, true);
}

static inline void super_unlock_shared(struct super_block *sb)
{
        super_unlock(sb, false);
}

static bool super_flags(const struct super_block *sb, unsigned int flags)
{
        /*
         * Pairs with smp_store_release() in super_wake() and ensures
         * that we see @flags after we're woken.
         */
        return smp_load_acquire(&sb->s_flags) & flags;
}

/**
 * super_lock - wait for superblock to become ready and lock it
 * @sb: superblock to wait for
 * @excl: whether exclusive access is required
 *
 * If the superblock has neither passed through vfs_get_tree() or
 * generic_shutdown_super() yet wait for it to happen. Either superblock
 * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
 * woken and we'll see SB_DYING.
 *
 * The caller must have acquired a temporary reference on @sb->s_count.
 *
 * Return: The function returns true if SB_BORN was set and with
 *         s_umount held. The function returns false if SB_DYING was
 *         set and without s_umount held.
 */
static __must_check bool super_lock(struct super_block *sb, bool excl)
{
        lockdep_assert_not_held(&sb->s_umount);

        /* wait until the superblock is ready or dying */
        wait_var_event(&sb->s_flags, super_flags(sb, SB_BORN | SB_DYING));

        /* Don't pointlessly acquire s_umount. */
        if (super_flags(sb, SB_DYING))
                return false;

        __super_lock(sb, excl);

        /*
         * Has gone through generic_shutdown_super() in the meantime.
         * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
         * grab a reference to this. Tell them so.
         */
        if (sb->s_flags & SB_DYING) {
                super_unlock(sb, excl);
                return false;
        }

        WARN_ON_ONCE(!(sb->s_flags & SB_BORN));
        return true;
}

/* wait and try to acquire read-side of @sb->s_umount */
static inline bool super_lock_shared(struct super_block *sb)
{
        return super_lock(sb, false);
}

/* wait and try to acquire write-side of @sb->s_umount */
static inline bool super_lock_excl(struct super_block *sb)
{
        return super_lock(sb, true);
}

/* wake waiters */
#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
static void super_wake(struct super_block *sb, unsigned int flag)
{
        WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
        WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1);

        /*
         * Pairs with smp_load_acquire() in super_lock() to make sure
         * all initializations in the superblock are seen by the user
         * seeing SB_BORN sent.
         */
        smp_store_release(&sb->s_flags, sb->s_flags | flag);
        /*
         * Pairs with the barrier in prepare_to_wait_event() to make sure
         * ___wait_var_event() either sees SB_BORN set or
         * waitqueue_active() check in wake_up_var() sees the waiter.
         */
        smp_mb();
        wake_up_var(&sb->s_flags);
}

/*
 * One thing we have to be careful of with a per-sb shrinker is that we don't
 * drop the last active reference to the superblock from within the shrinker.
 * If that happens we could trigger unregistering the shrinker from within the
 * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
 * take a passive reference to the superblock to avoid this from occurring.
 */
static unsigned long super_cache_scan(struct shrinker *shrink,
                                      struct shrink_control *sc)
{
        struct super_block *sb;
        long        fs_objects = 0;
        long        total_objects;
        long        freed = 0;
        long        dentries;
        long        inodes;

        sb = shrink->private_data;

        /*
         * Deadlock avoidance.  We may hold various FS locks, and we don't want
         * to recurse into the FS that called us in clear_inode() and friends..
         */
        if (!(sc->gfp_mask & __GFP_FS))
                return SHRINK_STOP;

        if (!super_trylock_shared(sb))
                return SHRINK_STOP;

        if (sb->s_op->nr_cached_objects)
                fs_objects = sb->s_op->nr_cached_objects(sb, sc);

        inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
        dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
        total_objects = dentries + inodes + fs_objects + 1;
        if (!total_objects)
                total_objects = 1;

        /* proportion the scan between the caches */
        dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
        inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
        fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);

        /*
         * prune the dcache first as the icache is pinned by it, then
         * prune the icache, followed by the filesystem specific caches
         *
         * Ensure that we always scan at least one object - memcg kmem
         * accounting uses this to fully empty the caches.
         */
        sc->nr_to_scan = dentries + 1;
        freed = prune_dcache_sb(sb, sc);
        sc->nr_to_scan = inodes + 1;
        freed += prune_icache_sb(sb, sc);

        if (fs_objects) {
                sc->nr_to_scan = fs_objects + 1;
                freed += sb->s_op->free_cached_objects(sb, sc);
        }

        super_unlock_shared(sb);
        return freed;
}

static unsigned long super_cache_count(struct shrinker *shrink,
                                       struct shrink_control *sc)
{
        struct super_block *sb;
        long        total_objects = 0;

        sb = shrink->private_data;

        /*
         * We don't call super_trylock_shared() here as it is a scalability
         * bottleneck, so we're exposed to partial setup state. The shrinker
         * rwsem does not protect filesystem operations backing
         * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can
         * change between super_cache_count and super_cache_scan, so we really
         * don't need locks here.
         *
         * However, if we are currently mounting the superblock, the underlying
         * filesystem might be in a state of partial construction and hence it
         * is dangerous to access it.  super_trylock_shared() uses a SB_BORN check
         * to avoid this situation, so do the same here. The memory barrier is
         * matched with the one in mount_fs() as we don't hold locks here.
         */
        if (!(sb->s_flags & SB_BORN))
                return 0;
        smp_rmb();

        if (sb->s_op && sb->s_op->nr_cached_objects)
                total_objects = sb->s_op->nr_cached_objects(sb, sc);

        total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
        total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);

        if (!total_objects)
                return SHRINK_EMPTY;

        total_objects = vfs_pressure_ratio(total_objects);
        return total_objects;
}

static void destroy_super_work(struct work_struct *work)
{
        struct super_block *s = container_of(work, struct super_block,
                                                        destroy_work);
        security_sb_free(s);
        put_user_ns(s->s_user_ns);
        kfree(s->s_subtype);
        for (int i = 0; i < SB_FREEZE_LEVELS; i++)
                percpu_free_rwsem(&s->s_writers.rw_sem[i]);
        kfree(s);
}

static void destroy_super_rcu(struct rcu_head *head)
{
        struct super_block *s = container_of(head, struct super_block, rcu);
        INIT_WORK(&s->destroy_work, destroy_super_work);
        schedule_work(&s->destroy_work);
}

/* Free a superblock that has never been seen by anyone */
static void destroy_unused_super(struct super_block *s)
{
        if (!s)
                return;
        super_unlock_excl(s);
        list_lru_destroy(&s->s_dentry_lru);
        list_lru_destroy(&s->s_inode_lru);
        shrinker_free(s->s_shrink);
        /* no delays needed */
        destroy_super_work(&s->destroy_work);
}

/**
 *        alloc_super        -        create new superblock
 *        @type:        filesystem type superblock should belong to
 *        @flags: the mount flags
 *        @user_ns: User namespace for the super_block
 *
 *        Allocates and initializes a new &struct super_block.  alloc_super()
 *        returns a pointer new superblock or %NULL if allocation had failed.
 */
static struct super_block *alloc_super(struct file_system_type *type, int flags,
                                       struct user_namespace *user_ns)
{
        struct super_block *s = kzalloc(sizeof(struct super_block), GFP_KERNEL);
        static const struct super_operations default_op;
        int i;

        if (!s)
                return NULL;

        INIT_LIST_HEAD(&s->s_mounts);
        s->s_user_ns = get_user_ns(user_ns);
        init_rwsem(&s->s_umount);
        lockdep_set_class(&s->s_umount, &type->s_umount_key);
        /*
         * sget() can have s_umount recursion.
         *
         * When it cannot find a suitable sb, it allocates a new
         * one (this one), and tries again to find a suitable old
         * one.
         *
         * In case that succeeds, it will acquire the s_umount
         * lock of the old one. Since these are clearly distrinct
         * locks, and this object isn't exposed yet, there's no
         * risk of deadlocks.
         *
         * Annotate this by putting this lock in a different
         * subclass.
         */
        down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);

        if (security_sb_alloc(s))
                goto fail;

        for (i = 0; i < SB_FREEZE_LEVELS; i++) {
                if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
                                        sb_writers_name[i],
                                        &type->s_writers_key[i]))
                        goto fail;
        }
        s->s_bdi = &noop_backing_dev_info;
        s->s_flags = flags;
        if (s->s_user_ns != &init_user_ns)
                s->s_iflags |= SB_I_NODEV;
        INIT_HLIST_NODE(&s->s_instances);
        INIT_HLIST_BL_HEAD(&s->s_roots);
        mutex_init(&s->s_sync_lock);
        INIT_LIST_HEAD(&s->s_inodes);
        spin_lock_init(&s->s_inode_list_lock);
        INIT_LIST_HEAD(&s->s_inodes_wb);
        spin_lock_init(&s->s_inode_wblist_lock);

        s->s_count = 1;
        atomic_set(&s->s_active, 1);
        mutex_init(&s->s_vfs_rename_mutex);
        lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
        init_rwsem(&s->s_dquot.dqio_sem);
        s->s_maxbytes = MAX_NON_LFS;
        s->s_op = &default_op;
        s->s_time_gran = 1000000000;
        s->s_time_min = TIME64_MIN;
        s->s_time_max = TIME64_MAX;

        s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE,
                                     "sb-%s", type->name);
        if (!s->s_shrink)
                goto fail;

        s->s_shrink->scan_objects = super_cache_scan;
        s->s_shrink->count_objects = super_cache_count;
        s->s_shrink->batch = 1024;
        s->s_shrink->private_data = s;

        if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink))
                goto fail;
        if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
                goto fail;
        return s;

fail:
        destroy_unused_super(s);
        return NULL;
}

/* Superblock refcounting  */

/*
 * Drop a superblock's refcount.  The caller must hold sb_lock.
 */
static void __put_super(struct super_block *s)
{
        if (!--s->s_count) {
                list_del_init(&s->s_list);
                WARN_ON(s->s_dentry_lru.node);
                WARN_ON(s->s_inode_lru.node);
                WARN_ON(!list_empty(&s->s_mounts));
                call_rcu(&s->rcu, destroy_super_rcu);
        }
}

/**
 *        put_super        -        drop a temporary reference to superblock
 *        @sb: superblock in question
 *
 *        Drops a temporary reference, frees superblock if there's no
 *        references left.
 */
void put_super(struct super_block *sb)
{
        spin_lock(&sb_lock);
        __put_super(sb);
        spin_unlock(&sb_lock);
}

static void kill_super_notify(struct super_block *sb)
{
        lockdep_assert_not_held(&sb->s_umount);

        /* already notified earlier */
        if (sb->s_flags & SB_DEAD)
                return;

        /*
         * Remove it from @fs_supers so it isn't found by new
         * sget{_fc}() walkers anymore. Any concurrent mounter still
         * managing to grab a temporary reference is guaranteed to
         * already see SB_DYING and will wait until we notify them about
         * SB_DEAD.
         */
        spin_lock(&sb_lock);
        hlist_del_init(&sb->s_instances);
        spin_unlock(&sb_lock);

        /*
         * Let concurrent mounts know that this thing is really dead.
         * We don't need @sb->s_umount here as every concurrent caller
         * will see SB_DYING and either discard the superblock or wait
         * for SB_DEAD.
         */
        super_wake(sb, SB_DEAD);
}

/**
 *        deactivate_locked_super        -        drop an active reference to superblock
 *        @s: superblock to deactivate
 *
 *        Drops an active reference to superblock, converting it into a temporary
 *        one if there is no other active references left.  In that case we
 *        tell fs driver to shut it down and drop the temporary reference we
 *        had just acquired.
 *
 *        Caller holds exclusive lock on superblock; that lock is released.
 */
void deactivate_locked_super(struct super_block *s)
{
        struct file_system_type *fs = s->s_type;
        if (atomic_dec_and_test(&s->s_active)) {
                shrinker_free(s->s_shrink);
                fs->kill_sb(s);

                kill_super_notify(s);

                /*
                 * Since list_lru_destroy() may sleep, we cannot call it from
                 * put_super(), where we hold the sb_lock. Therefore we destroy
                 * the lru lists right now.
                 */
                list_lru_destroy(&s->s_dentry_lru);
                list_lru_destroy(&s->s_inode_lru);

                put_filesystem(fs);
                put_super(s);
        } else {
                super_unlock_excl(s);
        }
}

EXPORT_SYMBOL(deactivate_locked_super);

/**
 *        deactivate_super        -        drop an active reference to superblock
 *        @s: superblock to deactivate
 *
 *        Variant of deactivate_locked_super(), except that superblock is *not*
 *        locked by caller.  If we are going to drop the final active reference,
 *        lock will be acquired prior to that.
 */
void deactivate_super(struct super_block *s)
{
        if (!atomic_add_unless(&s->s_active, -1, 1)) {
                __super_lock_excl(s);
                deactivate_locked_super(s);
        }
}

EXPORT_SYMBOL(deactivate_super);

/**
 * grab_super - acquire an active reference to a superblock
 * @sb: superblock to acquire
 *
 * Acquire a temporary reference on a superblock and try to trade it for
 * an active reference. This is used in sget{_fc}() to wait for a
 * superblock to either become SB_BORN or for it to pass through
 * sb->kill() and be marked as SB_DEAD.
 *
 * Return: This returns true if an active reference could be acquired,
 *         false if not.
 */
static bool grab_super(struct super_block *sb)
{
        bool locked;

        sb->s_count++;
        spin_unlock(&sb_lock);
        locked = super_lock_excl(sb);
        if (locked) {
                if (atomic_inc_not_zero(&sb->s_active)) {
                        put_super(sb);
                        return true;
                }
                super_unlock_excl(sb);
        }
        wait_var_event(&sb->s_flags, super_flags(sb, SB_DEAD));
        put_super(sb);
        return false;
}

/*
 *        super_trylock_shared - try to grab ->s_umount shared
 *        @sb: reference we are trying to grab
 *
 *        Try to prevent fs shutdown.  This is used in places where we
 *        cannot take an active reference but we need to ensure that the
 *        filesystem is not shut down while we are working on it. It returns
 *        false if we cannot acquire s_umount or if we lose the race and
 *        filesystem already got into shutdown, and returns true with the s_umount
 *        lock held in read mode in case of success. On successful return,
 *        the caller must drop the s_umount lock when done.
 *
 *        Note that unlike get_super() et.al. this one does *not* bump ->s_count.
 *        The reason why it's safe is that we are OK with doing trylock instead
 *        of down_read().  There's a couple of places that are OK with that, but
 *        it's very much not a general-purpose interface.
 */
bool super_trylock_shared(struct super_block *sb)
{
        if (down_read_trylock(&sb->s_umount)) {
                if (!(sb->s_flags & SB_DYING) && sb->s_root &&
                    (sb->s_flags & SB_BORN))
                        return true;
                super_unlock_shared(sb);
        }

        return false;
}

/**
 *        retire_super        -        prevents superblock from being reused
 *        @sb: superblock to retire
 *
 *        The function marks superblock to be ignored in superblock test, which
 *        prevents it from being reused for any new mounts.  If the superblock has
 *        a private bdi, it also unregisters it, but doesn't reduce the refcount
 *        of the superblock to prevent potential races.  The refcount is reduced
 *        by generic_shutdown_super().  The function can not be called
 *        concurrently with generic_shutdown_super().  It is safe to call the
 *        function multiple times, subsequent calls have no effect.
 *
 *        The marker will affect the re-use only for block-device-based
 *        superblocks.  Other superblocks will still get marked if this function
 *        is used, but that will not affect their reusability.
 */
void retire_super(struct super_block *sb)
{
        WARN_ON(!sb->s_bdev);
        __super_lock_excl(sb);
        if (sb->s_iflags & SB_I_PERSB_BDI) {
                bdi_unregister(sb->s_bdi);
                sb->s_iflags &= ~SB_I_PERSB_BDI;
        }
        sb->s_iflags |= SB_I_RETIRED;
        super_unlock_excl(sb);
}
EXPORT_SYMBOL(retire_super);

/**
 *        generic_shutdown_super        -        common helper for ->kill_sb()
 *        @sb: superblock to kill
 *
 *        generic_shutdown_super() does all fs-independent work on superblock
 *        shutdown.  Typical ->kill_sb() should pick all fs-specific objects
 *        that need destruction out of superblock, call generic_shutdown_super()
 *        and release aforementioned objects.  Note: dentries and inodes _are_
 *        taken care of and do not need specific handling.
 *
 *        Upon calling this function, the filesystem may no longer alter or
 *        rearrange the set of dentries belonging to this super_block, nor may it
 *        change the attachments of dentries to inodes.
 */
void generic_shutdown_super(struct super_block *sb)
{
        const struct super_operations *sop = sb->s_op;

        if (sb->s_root) {
                shrink_dcache_for_umount(sb);
                sync_filesystem(sb);
                sb->s_flags &= ~SB_ACTIVE;

                cgroup_writeback_umount();

                /* Evict all inodes with zero refcount. */
                evict_inodes(sb);

                /*
                 * Clean up and evict any inodes that still have references due
                 * to fsnotify or the security policy.
                 */
                fsnotify_sb_delete(sb);
                security_sb_delete(sb);

                if (sb->s_dio_done_wq) {
                        destroy_workqueue(sb->s_dio_done_wq);
                        sb->s_dio_done_wq = NULL;
                }

                if (sop->put_super)
                        sop->put_super(sb);

                /*
                 * Now that all potentially-encrypted inodes have been evicted,
                 * the fscrypt keyring can be destroyed.
                 */
                fscrypt_destroy_keyring(sb);

                if (CHECK_DATA_CORRUPTION(!list_empty(&sb->s_inodes),
                                "VFS: Busy inodes after unmount of %s (%s)",
                                sb->s_id, sb->s_type->name)) {
                        /*
                         * Adding a proper bailout path here would be hard, but
                         * we can at least make it more likely that a later
                         * iput_final() or such crashes cleanly.
                         */
                        struct inode *inode;

                        spin_lock(&sb->s_inode_list_lock);
                        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                                inode->i_op = VFS_PTR_POISON;
                                inode->i_sb = VFS_PTR_POISON;
                                inode->i_mapping = VFS_PTR_POISON;
                        }
                        spin_unlock(&sb->s_inode_list_lock);
                }
        }
        /*
         * Broadcast to everyone that grabbed a temporary reference to this
         * superblock before we removed it from @fs_supers that the superblock
         * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
         * discard this superblock and treat it as dead.
         *
         * We leave the superblock on @fs_supers so it can be found by
         * sget{_fc}() until we passed sb->kill_sb().
         */
        super_wake(sb, SB_DYING);
        super_unlock_excl(sb);
        if (sb->s_bdi != &noop_backing_dev_info) {
                if (sb->s_iflags & SB_I_PERSB_BDI)
                        bdi_unregister(sb->s_bdi);
                bdi_put(sb->s_bdi);
                sb->s_bdi = &noop_backing_dev_info;
        }
}

EXPORT_SYMBOL(generic_shutdown_super);

bool mount_capable(struct fs_context *fc)
{
        if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT))
                return capable(CAP_SYS_ADMIN);
        else
                return ns_capable(fc->user_ns, CAP_SYS_ADMIN);
}

/**
 * sget_fc - Find or create a superblock
 * @fc:        Filesystem context.
 * @test: Comparison callback
 * @set: Setup callback
 *
 * Create a new superblock or find an existing one.
 *
 * The @test callback is used to find a matching existing superblock.
 * Whether or not the requested parameters in @fc are taken into account
 * is specific to the @test callback that is used. They may even be
 * completely ignored.
 *
 * If an extant superblock is matched, it will be returned unless:
 *
 * (1) the namespace the filesystem context @fc and the extant
 *     superblock's namespace differ
 *
 * (2) the filesystem context @fc has requested that reusing an extant
 *     superblock is not allowed
 *
 * In both cases EBUSY will be returned.
 *
 * If no match is made, a new superblock will be allocated and basic
 * initialisation will be performed (s_type, s_fs_info and s_id will be
 * set and the @set callback will be invoked), the superblock will be
 * published and it will be returned in a partially constructed state
 * with SB_BORN and SB_ACTIVE as yet unset.
 *
 * Return: On success, an extant or newly created superblock is
 *         returned. On failure an error pointer is returned.
 */
struct super_block *sget_fc(struct fs_context *fc,
                            int (*test)(struct super_block *, struct fs_context *),
                            int (*set)(struct super_block *, struct fs_context *))
{
        struct super_block *s = NULL;
        struct super_block *old;
        struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
        int err;

retry:
        spin_lock(&sb_lock);
        if (test) {
                hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
                        if (test(old, fc))
                                goto share_extant_sb;
                }
        }
        if (!s) {
                spin_unlock(&sb_lock);
                s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
                if (!s)
                        return ERR_PTR(-ENOMEM);
                goto retry;
        }

        s->s_fs_info = fc->s_fs_info;
        err = set(s, fc);
        if (err) {
                s->s_fs_info = NULL;
                spin_unlock(&sb_lock);
                destroy_unused_super(s);
                return ERR_PTR(err);
        }
        fc->s_fs_info = NULL;
        s->s_type = fc->fs_type;
        s->s_iflags |= fc->s_iflags;
        strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
        /*
         * Make the superblock visible on @super_blocks and @fs_supers.
         * It's in a nascent state and users should wait on SB_BORN or
         * SB_DYING to be set.
         */
        list_add_tail(&s->s_list, &super_blocks);
        hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(s->s_type);
        shrinker_register(s->s_shrink);
        return s;

share_extant_sb:
        if (user_ns != old->s_user_ns || fc->exclusive) {
                spin_unlock(&sb_lock);
                destroy_unused_super(s);
                if (fc->exclusive)
                        warnfc(fc, "reusing existing filesystem not allowed");
                else
                        warnfc(fc, "reusing existing filesystem in another namespace not allowed");
                return ERR_PTR(-EBUSY);
        }
        if (!grab_super(old))
                goto retry;
        destroy_unused_super(s);
        return old;
}
EXPORT_SYMBOL(sget_fc);

/**
 *        sget        -        find or create a superblock
 *        @type:          filesystem type superblock should belong to
 *        @test:          comparison callback
 *        @set:          setup callback
 *        @flags:          mount flags
 *        @data:          argument to each of them
 */
struct super_block *sget(struct file_system_type *type,
                        int (*test)(struct super_block *,void *),
                        int (*set)(struct super_block *,void *),
                        int flags,
                        void *data)
{
        struct user_namespace *user_ns = current_user_ns();
        struct super_block *s = NULL;
        struct super_block *old;
        int err;

        /* We don't yet pass the user namespace of the parent
         * mount through to here so always use &init_user_ns
         * until that changes.
         */
        if (flags & SB_SUBMOUNT)
                user_ns = &init_user_ns;

retry:
        spin_lock(&sb_lock);
        if (test) {
                hlist_for_each_entry(old, &type->fs_supers, s_instances) {
                        if (!test(old, data))
                                continue;
                        if (user_ns != old->s_user_ns) {
                                spin_unlock(&sb_lock);
                                destroy_unused_super(s);
                                return ERR_PTR(-EBUSY);
                        }
                        if (!grab_super(old))
                                goto retry;
                        destroy_unused_super(s);
                        return old;
                }
        }
        if (!s) {
                spin_unlock(&sb_lock);
                s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
                if (!s)
                        return ERR_PTR(-ENOMEM);
                goto retry;
        }

        err = set(s, data);
        if (err) {
                spin_unlock(&sb_lock);
                destroy_unused_super(s);
                return ERR_PTR(err);
        }
        s->s_type = type;
        strscpy(s->s_id, type->name, sizeof(s->s_id));
        list_add_tail(&s->s_list, &super_blocks);
        hlist_add_head(&s->s_instances, &type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(type);
        shrinker_register(s->s_shrink);
        return s;
}
EXPORT_SYMBOL(sget);

void drop_super(struct super_block *sb)
{
        super_unlock_shared(sb);
        put_super(sb);
}

EXPORT_SYMBOL(drop_super);

void drop_super_exclusive(struct super_block *sb)
{
        super_unlock_excl(sb);
        put_super(sb);
}
EXPORT_SYMBOL(drop_super_exclusive);

static void __iterate_supers(void (*f)(struct super_block *))
{
        struct super_block *sb, *p = NULL;

        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
                if (super_flags(sb, SB_DYING))
                        continue;
                sb->s_count++;
                spin_unlock(&sb_lock);

                f(sb);

                spin_lock(&sb_lock);
                if (p)
                        __put_super(p);
                p = sb;
        }
        if (p)
                __put_super(p);
        spin_unlock(&sb_lock);
}
/**
 *        iterate_supers - call function for all active superblocks
 *        @f: function to call
 *        @arg: argument to pass to it
 *
 *        Scans the superblock list and calls given function, passing it
 *        locked superblock and given argument.
 */
void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
{
        struct super_block *sb, *p = NULL;

        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
                bool locked;

                sb->s_count++;
                spin_unlock(&sb_lock);

                locked = super_lock_shared(sb);
                if (locked) {
                        if (sb->s_root)
                                f(sb, arg);
                        super_unlock_shared(sb);
                }

                spin_lock(&sb_lock);
                if (p)
                        __put_super(p);
                p = sb;
        }
        if (p)
                __put_super(p);
        spin_unlock(&sb_lock);
}

/**
 *        iterate_supers_type - call function for superblocks of given type
 *        @type: fs type
 *        @f: function to call
 *        @arg: argument to pass to it
 *
 *        Scans the superblock list and calls given function, passing it
 *        locked superblock and given argument.
 */
void iterate_supers_type(struct file_system_type *type,
        void (*f)(struct super_block *, void *), void *arg)
{
        struct super_block *sb, *p = NULL;

        spin_lock(&sb_lock);
        hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
                bool locked;

                sb->s_count++;
                spin_unlock(&sb_lock);

                locked = super_lock_shared(sb);
                if (locked) {
                        if (sb->s_root)
                                f(sb, arg);
                        super_unlock_shared(sb);
                }

                spin_lock(&sb_lock);
                if (p)
                        __put_super(p);
                p = sb;
        }
        if (p)
                __put_super(p);
        spin_unlock(&sb_lock);
}

EXPORT_SYMBOL(iterate_supers_type);

struct super_block *user_get_super(dev_t dev, bool excl)
{
        struct super_block *sb;

        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
                if (sb->s_dev ==  dev) {
                        bool locked;

                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        /* still alive? */
                        locked = super_lock(sb, excl);
                        if (locked) {
                                if (sb->s_root)
                                        return sb;
                                super_unlock(sb, excl);
                        }
                        /* nope, got unmounted */
                        spin_lock(&sb_lock);
                        __put_super(sb);
                        break;
                }
        }
        spin_unlock(&sb_lock);
        return NULL;
}

/**
 * reconfigure_super - asks filesystem to change superblock parameters
 * @fc: The superblock and configuration
 *
 * Alters the configuration parameters of a live superblock.
 */
int reconfigure_super(struct fs_context *fc)
{
        struct super_block *sb = fc->root->d_sb;
        int retval;
        bool remount_ro = false;
        bool remount_rw = false;
        bool force = fc->sb_flags & SB_FORCE;

        if (fc->sb_flags_mask & ~MS_RMT_MASK)
                return -EINVAL;
        if (sb->s_writers.frozen != SB_UNFROZEN)
                return -EBUSY;

        retval = security_sb_remount(sb, fc->security);
        if (retval)
                return retval;

        if (fc->sb_flags_mask & SB_RDONLY) {
#ifdef CONFIG_BLOCK
                if (!(fc->sb_flags & SB_RDONLY) && sb->s_bdev &&
                    bdev_read_only(sb->s_bdev))
                        return -EACCES;
#endif
                remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
                remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
        }

        if (remount_ro) {
                if (!hlist_empty(&sb->s_pins)) {
                        super_unlock_excl(sb);
                        group_pin_kill(&sb->s_pins);
                        __super_lock_excl(sb);
                        if (!sb->s_root)
                                return 0;
                        if (sb->s_writers.frozen != SB_UNFROZEN)
                                return -EBUSY;
                        remount_ro = !sb_rdonly(sb);
                }
        }
        shrink_dcache_sb(sb);

        /* If we are reconfiguring to RDONLY and current sb is read/write,
         * make sure there are no files open for writing.
         */
        if (remount_ro) {
                if (force) {
                        sb_start_ro_state_change(sb);
                } else {
                        retval = sb_prepare_remount_readonly(sb);
                        if (retval)
                                return retval;
                }
        } else if (remount_rw) {
                /*
                 * Protect filesystem's reconfigure code from writes from
                 * userspace until reconfigure finishes.
                 */
                sb_start_ro_state_change(sb);
        }

        if (fc->ops->reconfigure) {
                retval = fc->ops->reconfigure(fc);
                if (retval) {
                        if (!force)
                                goto cancel_readonly;
                        /* If forced remount, go ahead despite any errors */
                        WARN(1, "forced remount of a %s fs returned %i\n",
                             sb->s_type->name, retval);
                }
        }

        WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
                                 (fc->sb_flags & fc->sb_flags_mask)));
        sb_end_ro_state_change(sb);

        /*
         * Some filesystems modify their metadata via some other path than the
         * bdev buffer cache (eg. use a private mapping, or directories in
         * pagecache, etc). Also file data modifications go via their own
         * mappings. So If we try to mount readonly then copy the filesystem
         * from bdev, we could get stale data, so invalidate it to give a best
         * effort at coherency.
         */
        if (remount_ro && sb->s_bdev)
                invalidate_bdev(sb->s_bdev);
        return 0;

cancel_readonly:
        sb_end_ro_state_change(sb);
        return retval;
}

static void do_emergency_remount_callback(struct super_block *sb)
{
        bool locked = super_lock_excl(sb);

        if (locked && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
                struct fs_context *fc;

                fc = fs_context_for_reconfigure(sb->s_root,
                                        SB_RDONLY | SB_FORCE, SB_RDONLY);
                if (!IS_ERR(fc)) {
                        if (parse_monolithic_mount_data(fc, NULL) == 0)
                                (void)reconfigure_super(fc);
                        put_fs_context(fc);
                }
        }
        if (locked)
                super_unlock_excl(sb);
}

static void do_emergency_remount(struct work_struct *work)
{
        __iterate_supers(do_emergency_remount_callback);
        kfree(work);
        printk("Emergency Remount complete\n");
}

void emergency_remount(void)
{
        struct work_struct *work;

        work = kmalloc(sizeof(*work), GFP_ATOMIC);
        if (work) {
                INIT_WORK(work, do_emergency_remount);
                schedule_work(work);
        }
}

static void do_thaw_all_callback(struct super_block *sb)
{
        bool locked = super_lock_excl(sb);

        if (locked && sb->s_root) {
                if (IS_ENABLED(CONFIG_BLOCK))
                        while (sb->s_bdev && !bdev_thaw(sb->s_bdev))
                                pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
                thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
                return;
        }
        if (locked)
                super_unlock_excl(sb);
}

static void do_thaw_all(struct work_struct *work)
{
        __iterate_supers(do_thaw_all_callback);
        kfree(work);
        printk(KERN_WARNING "Emergency Thaw complete\n");
}

/**
 * emergency_thaw_all -- forcibly thaw every frozen filesystem
 *
 * Used for emergency unfreeze of all filesystems via SysRq
 */
void emergency_thaw_all(void)
{
        struct work_struct *work;

        work = kmalloc(sizeof(*work), GFP_ATOMIC);
        if (work) {
                INIT_WORK(work, do_thaw_all);
                schedule_work(work);
        }
}

static DEFINE_IDA(unnamed_dev_ida);

/**
 * get_anon_bdev - Allocate a block device for filesystems which don't have one.
 * @p: Pointer to a dev_t.
 *
 * Filesystems which don't use real block devices can call this function
 * to allocate a virtual block device.
 *
 * Context: Any context.  Frequently called while holding sb_lock.
 * Return: 0 on success, -EMFILE if there are no anonymous bdevs left
 * or -ENOMEM if memory allocation failed.
 */
int get_anon_bdev(dev_t *p)
{
        int dev;

        /*
         * Many userspace utilities consider an FSID of 0 invalid.
         * Always return at least 1 from get_anon_bdev.
         */
        dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
                        GFP_ATOMIC);
        if (dev == -ENOSPC)
                dev = -EMFILE;
        if (dev < 0)
                return dev;

        *p = MKDEV(0, dev);
        return 0;
}
EXPORT_SYMBOL(get_anon_bdev);

void free_anon_bdev(dev_t dev)
{
        ida_free(&unnamed_dev_ida, MINOR(dev));
}
EXPORT_SYMBOL(free_anon_bdev);

int set_anon_super(struct super_block *s, void *data)
{
        return get_anon_bdev(&s->s_dev);
}
EXPORT_SYMBOL(set_anon_super);

void kill_anon_super(struct super_block *sb)
{
        dev_t dev = sb->s_dev;
        generic_shutdown_super(sb);
        kill_super_notify(sb);
        free_anon_bdev(dev);
}
EXPORT_SYMBOL(kill_anon_super);

void kill_litter_super(struct super_block *sb)
{
        if (sb->s_root)
                d_genocide(sb->s_root);
        kill_anon_super(sb);
}
EXPORT_SYMBOL(kill_litter_super);

int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
{
        return set_anon_super(sb, NULL);
}
EXPORT_SYMBOL(set_anon_super_fc);

static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
{
        return sb->s_fs_info == fc->s_fs_info;
}

static int test_single_super(struct super_block *s, struct fs_context *fc)
{
        return 1;
}

static int vfs_get_super(struct fs_context *fc,
                int (*test)(struct super_block *, struct fs_context *),
                int (*fill_super)(struct super_block *sb,
                                  struct fs_context *fc))
{
        struct super_block *sb;
        int err;

        sb = sget_fc(fc, test, set_anon_super_fc);
        if (IS_ERR(sb))
                return PTR_ERR(sb);

        if (!sb->s_root) {
                err = fill_super(sb, fc);
                if (err)
                        goto error;

                sb->s_flags |= SB_ACTIVE;
        }

        fc->root = dget(sb->s_root);
        return 0;

error:
        deactivate_locked_super(sb);
        return err;
}

int get_tree_nodev(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc))
{
        return vfs_get_super(fc, NULL, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);

int get_tree_single(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc))
{
        return vfs_get_super(fc, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);

int get_tree_keyed(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc),
                void *key)
{
        fc->s_fs_info = key;
        return vfs_get_super(fc, test_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);

static int set_bdev_super(struct super_block *s, void *data)
{
        s->s_dev = *(dev_t *)data;
        return 0;
}

static int super_s_dev_set(struct super_block *s, struct fs_context *fc)
{
        return set_bdev_super(s, fc->sget_key);
}

static int super_s_dev_test(struct super_block *s, struct fs_context *fc)
{
        return !(s->s_iflags & SB_I_RETIRED) &&
                s->s_dev == *(dev_t *)fc->sget_key;
}

/**
 * sget_dev - Find or create a superblock by device number
 * @fc: Filesystem context.
 * @dev: device number
 *
 * Find or create a superblock using the provided device number that
 * will be stored in fc->sget_key.
 *
 * If an extant superblock is matched, then that will be returned with
 * an elevated reference count that the caller must transfer or discard.
 *
 * If no match is made, a new superblock will be allocated and basic
 * initialisation will be performed (s_type, s_fs_info, s_id, s_dev will
 * be set). The superblock will be published and it will be returned in
 * a partially constructed state with SB_BORN and SB_ACTIVE as yet
 * unset.
 *
 * Return: an existing or newly created superblock on success, an error
 *         pointer on failure.
 */
struct super_block *sget_dev(struct fs_context *fc, dev_t dev)
{
        fc->sget_key = &dev;
        return sget_fc(fc, super_s_dev_test, super_s_dev_set);
}
EXPORT_SYMBOL(sget_dev);

#ifdef CONFIG_BLOCK
/*
 * Lock the superblock that is holder of the bdev. Returns the superblock
 * pointer if we successfully locked the superblock and it is alive. Otherwise
 * we return NULL and just unlock bdev->bd_holder_lock.
 *
 * The function must be called with bdev->bd_holder_lock and releases it.
 */
static struct super_block *bdev_super_lock(struct block_device *bdev, bool excl)
        __releases(&bdev->bd_holder_lock)
{
        struct super_block *sb = bdev->bd_holder;
        bool locked;

        lockdep_assert_held(&bdev->bd_holder_lock);
        lockdep_assert_not_held(&sb->s_umount);
        lockdep_assert_not_held(&bdev->bd_disk->open_mutex);

        /* Make sure sb doesn't go away from under us */
        spin_lock(&sb_lock);
        sb->s_count++;
        spin_unlock(&sb_lock);

        mutex_unlock(&bdev->bd_holder_lock);

        locked = super_lock(sb, excl);

        /*
         * If the superblock wasn't already SB_DYING then we hold
         * s_umount and can safely drop our temporary reference.
         */
        put_super(sb);

        if (!locked)
                return NULL;

        if (!sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
                super_unlock(sb, excl);
                return NULL;
        }

        return sb;
}

static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
{
        struct super_block *sb;

        sb = bdev_super_lock(bdev, false);
        if (!sb)
                return;

        if (!surprise)
                sync_filesystem(sb);
        shrink_dcache_sb(sb);
        invalidate_inodes(sb);
        if (sb->s_op->shutdown)
                sb->s_op->shutdown(sb);

        super_unlock_shared(sb);
}

static void fs_bdev_sync(struct block_device *bdev)
{
        struct super_block *sb;

        sb = bdev_super_lock(bdev, false);
        if (!sb)
                return;

        sync_filesystem(sb);
        super_unlock_shared(sb);
}

static struct super_block *get_bdev_super(struct block_device *bdev)
{
        bool active = false;
        struct super_block *sb;

        sb = bdev_super_lock(bdev, true);
        if (sb) {
                active = atomic_inc_not_zero(&sb->s_active);
                super_unlock_excl(sb);
        }
        if (!active)
                return NULL;
        return sb;
}

/**
 * fs_bdev_freeze - freeze owning filesystem of block device
 * @bdev: block device
 *
 * Freeze the filesystem that owns this block device if it is still
 * active.
 *
 * A filesystem that owns multiple block devices may be frozen from each
 * block device and won't be unfrozen until all block devices are
 * unfrozen. Each block device can only freeze the filesystem once as we
 * nest freezes for block devices in the block layer.
 *
 * Return: If the freeze was successful zero is returned. If the freeze
 *         failed a negative error code is returned.
 */
static int fs_bdev_freeze(struct block_device *bdev)
{
        struct super_block *sb;
        int error = 0;

        lockdep_assert_held(&bdev->bd_fsfreeze_mutex);

        sb = get_bdev_super(bdev);
        if (!sb)
                return -EINVAL;

        if (sb->s_op->freeze_super)
                error = sb->s_op->freeze_super(sb,
                                FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
        else
                error = freeze_super(sb,
                                FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
        if (!error)
                error = sync_blockdev(bdev);
        deactivate_super(sb);
        return error;
}

/**
 * fs_bdev_thaw - thaw owning filesystem of block device
 * @bdev: block device
 *
 * Thaw the filesystem that owns this block device.
 *
 * A filesystem that owns multiple block devices may be frozen from each
 * block device and won't be unfrozen until all block devices are
 * unfrozen. Each block device can only freeze the filesystem once as we
 * nest freezes for block devices in the block layer.
 *
 * Return: If the thaw was successful zero is returned. If the thaw
 *         failed a negative error code is returned. If this function
 *         returns zero it doesn't mean that the filesystem is unfrozen
 *         as it may have been frozen multiple times (kernel may hold a
 *         freeze or might be frozen from other block devices).
 */
static int fs_bdev_thaw(struct block_device *bdev)
{
        struct super_block *sb;
        int error;

        lockdep_assert_held(&bdev->bd_fsfreeze_mutex);

        sb = get_bdev_super(bdev);
        if (WARN_ON_ONCE(!sb))
                return -EINVAL;

        if (sb->s_op->thaw_super)
                error = sb->s_op->thaw_super(sb,
                                FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
        else
                error = thaw_super(sb,
                                FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
        deactivate_super(sb);
        return error;
}

const struct blk_holder_ops fs_holder_ops = {
        .mark_dead                = fs_bdev_mark_dead,
        .sync                        = fs_bdev_sync,
        .freeze                        = fs_bdev_freeze,
        .thaw                        = fs_bdev_thaw,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);

int setup_bdev_super(struct super_block *sb, int sb_flags,
                struct fs_context *fc)
{
        blk_mode_t mode = sb_open_mode(sb_flags);
        struct file *bdev_file;
        struct block_device *bdev;

        bdev_file = bdev_file_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
        if (IS_ERR(bdev_file)) {
                if (fc)
                        errorf(fc, "%s: Can't open blockdev", fc->source);
                return PTR_ERR(bdev_file);
        }
        bdev = file_bdev(bdev_file);

        /*
         * This really should be in blkdev_get_by_dev, but right now can't due
         * to legacy issues that require us to allow opening a block device node
         * writable from userspace even for a read-only block device.
         */
        if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
                bdev_fput(bdev_file);
                return -EACCES;
        }

        /*
         * It is enough to check bdev was not frozen before we set
         * s_bdev as freezing will wait until SB_BORN is set.
         */
        if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
                if (fc)
                        warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
                bdev_fput(bdev_file);
                return -EBUSY;
        }
        spin_lock(&sb_lock);
        sb->s_bdev_file = bdev_file;
        sb->s_bdev = bdev;
        sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
        if (bdev_stable_writes(bdev))
                sb->s_iflags |= SB_I_STABLE_WRITES;
        spin_unlock(&sb_lock);

        snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
        shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name,
                                sb->s_id);
        sb_set_blocksize(sb, block_size(bdev));
        return 0;
}
EXPORT_SYMBOL_GPL(setup_bdev_super);

/**
 * get_tree_bdev - Get a superblock based on a single block device
 * @fc: The filesystem context holding the parameters
 * @fill_super: Helper to initialise a new superblock
 */
int get_tree_bdev(struct fs_context *fc,
                int (*fill_super)(struct super_block *,
                                  struct fs_context *))
{
        struct super_block *s;
        int error = 0;
        dev_t dev;

        if (!fc->source)
                return invalf(fc, "No source specified");

        error = lookup_bdev(fc->source, &dev);
        if (error) {
                errorf(fc, "%s: Can't lookup blockdev", fc->source);
                return error;
        }

        fc->sb_flags |= SB_NOSEC;
        s = sget_dev(fc, dev);
        if (IS_ERR(s))
                return PTR_ERR(s);

        if (s->s_root) {
                /* Don't summarily change the RO/RW state. */
                if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
                        warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
                        deactivate_locked_super(s);
                        return -EBUSY;
                }
        } else {
                error = setup_bdev_super(s, fc->sb_flags, fc);
                if (!error)
                        error = fill_super(s, fc);
                if (error) {
                        deactivate_locked_super(s);
                        return error;
                }
                s->s_flags |= SB_ACTIVE;
        }

        BUG_ON(fc->root);
        fc->root = dget(s->s_root);
        return 0;
}
EXPORT_SYMBOL(get_tree_bdev);

static int test_bdev_super(struct super_block *s, void *data)
{
        return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
}

struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int))
{
        struct super_block *s;
        int error;
        dev_t dev;

        error = lookup_bdev(dev_name, &dev);
        if (error)
                return ERR_PTR(error);

        flags |= SB_NOSEC;
        s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
        if (IS_ERR(s))
                return ERR_CAST(s);

        if (s->s_root) {
                if ((flags ^ s->s_flags) & SB_RDONLY) {
                        deactivate_locked_super(s);
                        return ERR_PTR(-EBUSY);
                }
        } else {
                error = setup_bdev_super(s, flags, NULL);
                if (!error)
                        error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(s);
                        return ERR_PTR(error);
                }

                s->s_flags |= SB_ACTIVE;
        }

        return dget(s->s_root);
}
EXPORT_SYMBOL(mount_bdev);

void kill_block_super(struct super_block *sb)
{
        struct block_device *bdev = sb->s_bdev;

        generic_shutdown_super(sb);
        if (bdev) {
                sync_blockdev(bdev);
                bdev_fput(sb->s_bdev_file);
        }
}

EXPORT_SYMBOL(kill_block_super);
#endif

struct dentry *mount_nodev(struct file_system_type *fs_type,
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int))
{
        int error;
        struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);

        if (IS_ERR(s))
                return ERR_CAST(s);

        error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
        if (error) {
                deactivate_locked_super(s);
                return ERR_PTR(error);
        }
        s->s_flags |= SB_ACTIVE;
        return dget(s->s_root);
}
EXPORT_SYMBOL(mount_nodev);

int reconfigure_single(struct super_block *s,
                       int flags, void *data)
{
        struct fs_context *fc;
        int ret;

        /* The caller really need to be passing fc down into mount_single(),
         * then a chunk of this can be removed.  [Bollocks -- AV]
         * Better yet, reconfiguration shouldn't happen, but rather the second
         * mount should be rejected if the parameters are not compatible.
         */
        fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
        if (IS_ERR(fc))
                return PTR_ERR(fc);

        ret = parse_monolithic_mount_data(fc, data);
        if (ret < 0)
                goto out;

        ret = reconfigure_super(fc);
out:
        put_fs_context(fc);
        return ret;
}

static int compare_single(struct super_block *s, void *p)
{
        return 1;
}

struct dentry *mount_single(struct file_system_type *fs_type,
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int))
{
        struct super_block *s;
        int error;

        s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
        if (IS_ERR(s))
                return ERR_CAST(s);
        if (!s->s_root) {
                error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
                if (!error)
                        s->s_flags |= SB_ACTIVE;
        } else {
                error = reconfigure_single(s, flags, data);
        }
        if (unlikely(error)) {
                deactivate_locked_super(s);
                return ERR_PTR(error);
        }
        return dget(s->s_root);
}
EXPORT_SYMBOL(mount_single);

/**
 * vfs_get_tree - Get the mountable root
 * @fc: The superblock configuration context.
 *
 * The filesystem is invoked to get or create a superblock which can then later
 * be used for mounting.  The filesystem places a pointer to the root to be
 * used for mounting in @fc->root.
 */
int vfs_get_tree(struct fs_context *fc)
{
        struct super_block *sb;
        int error;

        if (fc->root)
                return -EBUSY;

        /* Get the mountable root in fc->root, with a ref on the root and a ref
         * on the superblock.
         */
        error = fc->ops->get_tree(fc);
        if (error < 0)
                return error;

        if (!fc->root) {
                pr_err("Filesystem %s get_tree() didn't set fc->root\n",
                       fc->fs_type->name);
                /* We don't know what the locking state of the superblock is -
                 * if there is a superblock.
                 */
                BUG();
        }

        sb = fc->root->d_sb;
        WARN_ON(!sb->s_bdi);

        /*
         * super_wake() contains a memory barrier which also care of
         * ordering for super_cache_count(). We place it before setting
         * SB_BORN as the data dependency between the two functions is
         * the superblock structure contents that we just set up, not
         * the SB_BORN flag.
         */
        super_wake(sb, SB_BORN);

        error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
        if (unlikely(error)) {
                fc_drop_locked(fc);
                return error;
        }

        /*
         * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
         * but s_maxbytes was an unsigned long long for many releases. Throw
         * this warning for a little while to try and catch filesystems that
         * violate this rule.
         */
        WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
                "negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);

        return 0;
}
EXPORT_SYMBOL(vfs_get_tree);

/*
 * Setup private BDI for given superblock. It gets automatically cleaned up
 * in generic_shutdown_super().
 */
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
{
        struct backing_dev_info *bdi;
        int err;
        va_list args;

        bdi = bdi_alloc(NUMA_NO_NODE);
        if (!bdi)
                return -ENOMEM;

        va_start(args, fmt);
        err = bdi_register_va(bdi, fmt, args);
        va_end(args);
        if (err) {
                bdi_put(bdi);
                return err;
        }
        WARN_ON(sb->s_bdi != &noop_backing_dev_info);
        sb->s_bdi = bdi;
        sb->s_iflags |= SB_I_PERSB_BDI;

        return 0;
}
EXPORT_SYMBOL(super_setup_bdi_name);

/*
 * Setup private BDI for given superblock. I gets automatically cleaned up
 * in generic_shutdown_super().
 */
int super_setup_bdi(struct super_block *sb)
{
        static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);

        return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name,
                                    atomic_long_inc_return(&bdi_seq));
}
EXPORT_SYMBOL(super_setup_bdi);

/**
 * sb_wait_write - wait until all writers to given file system finish
 * @sb: the super for which we wait
 * @level: type of writers we wait for (normal vs page fault)
 *
 * This function waits until there are no writers of given type to given file
 * system.
 */
static void sb_wait_write(struct super_block *sb, int level)
{
        percpu_down_write(sb->s_writers.rw_sem + level-1);
}

/*
 * We are going to return to userspace and forget about these locks, the
 * ownership goes to the caller of thaw_super() which does unlock().
 */
static void lockdep_sb_freeze_release(struct super_block *sb)
{
        int level;

        for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
                percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
}

/*
 * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
 */
static void lockdep_sb_freeze_acquire(struct super_block *sb)
{
        int level;

        for (level = 0; level < SB_FREEZE_LEVELS; ++level)
                percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
}

static void sb_freeze_unlock(struct super_block *sb, int level)
{
        for (level--; level >= 0; level--)
                percpu_up_write(sb->s_writers.rw_sem + level);
}

static int wait_for_partially_frozen(struct super_block *sb)
{
        int ret = 0;

        do {
                unsigned short old = sb->s_writers.frozen;

                up_write(&sb->s_umount);
                ret = wait_var_event_killable(&sb->s_writers.frozen,
                                               sb->s_writers.frozen != old);
                down_write(&sb->s_umount);
        } while (ret == 0 &&
                 sb->s_writers.frozen != SB_UNFROZEN &&
                 sb->s_writers.frozen != SB_FREEZE_COMPLETE);

        return ret;
}

#define FREEZE_HOLDERS (FREEZE_HOLDER_KERNEL | FREEZE_HOLDER_USERSPACE)
#define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST)

static inline int freeze_inc(struct super_block *sb, enum freeze_holder who)
{
        WARN_ON_ONCE((who & ~FREEZE_FLAGS));
        WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);

        if (who & FREEZE_HOLDER_KERNEL)
                ++sb->s_writers.freeze_kcount;
        if (who & FREEZE_HOLDER_USERSPACE)
                ++sb->s_writers.freeze_ucount;
        return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount;
}

static inline int freeze_dec(struct super_block *sb, enum freeze_holder who)
{
        WARN_ON_ONCE((who & ~FREEZE_FLAGS));
        WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);

        if ((who & FREEZE_HOLDER_KERNEL) && sb->s_writers.freeze_kcount)
                --sb->s_writers.freeze_kcount;
        if ((who & FREEZE_HOLDER_USERSPACE) && sb->s_writers.freeze_ucount)
                --sb->s_writers.freeze_ucount;
        return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount;
}

static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
{
        WARN_ON_ONCE((who & ~FREEZE_FLAGS));
        WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);

        if (who & FREEZE_HOLDER_KERNEL)
                return (who & FREEZE_MAY_NEST) ||
                       sb->s_writers.freeze_kcount == 0;
        if (who & FREEZE_HOLDER_USERSPACE)
                return (who & FREEZE_MAY_NEST) ||
                       sb->s_writers.freeze_ucount == 0;
        return false;
}

/**
 * freeze_super - lock the filesystem and force it into a consistent state
 * @sb: the super to lock
 * @who: context that wants to freeze
 *
 * Syncs the super to make sure the filesystem is consistent and calls the fs's
 * freeze_fs.  Subsequent calls to this without first thawing the fs may return
 * -EBUSY.
 *
 * @who should be:
 * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs;
 * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs.
 * * %FREEZE_MAY_NEST whether nesting freeze and thaw requests is allowed.
 *
 * The @who argument distinguishes between the kernel and userspace trying to
 * freeze the filesystem.  Although there cannot be multiple kernel freezes or
 * multiple userspace freezes in effect at any given time, the kernel and
 * userspace can both hold a filesystem frozen.  The filesystem remains frozen
 * until there are no kernel or userspace freezes in effect.
 *
 * A filesystem may hold multiple devices and thus a filesystems may be
 * frozen through the block layer via multiple block devices. In this
 * case the request is marked as being allowed to nest by passing
 * FREEZE_MAY_NEST. The filesystem remains frozen until all block
 * devices are unfrozen. If multiple freezes are attempted without
 * FREEZE_MAY_NEST -EBUSY will be returned.
 *
 * During this function, sb->s_writers.frozen goes through these values:
 *
 * SB_UNFROZEN: File system is normal, all writes progress as usual.
 *
 * SB_FREEZE_WRITE: The file system is in the process of being frozen.  New
 * writes should be blocked, though page faults are still allowed. We wait for
 * all writes to complete and then proceed to the next stage.
 *
 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
 * but internal fs threads can still modify the filesystem (although they
 * should not dirty new pages or inodes), writeback can run etc. After waiting
 * for all running page faults we sync the filesystem which will clean all
 * dirty pages and inodes (no new dirty pages or inodes can be created when
 * sync is running).
 *
 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
 * modification are blocked (e.g. XFS preallocation truncation on inode
 * reclaim). This is usually implemented by blocking new transactions for
 * filesystems that have them and need this additional guard. After all
 * internal writers are finished we call ->freeze_fs() to finish filesystem
 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
 *
 * sb->s_writers.frozen is protected by sb->s_umount.
 *
 * Return: If the freeze was successful zero is returned. If the freeze
 *         failed a negative error code is returned.
 */
int freeze_super(struct super_block *sb, enum freeze_holder who)
{
        int ret;

        if (!super_lock_excl(sb)) {
                WARN_ON_ONCE("Dying superblock while freezing!");
                return -EINVAL;
        }
        atomic_inc(&sb->s_active);

retry:
        if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
                if (may_freeze(sb, who))
                        ret = !!WARN_ON_ONCE(freeze_inc(sb, who) == 1);
                else
                        ret = -EBUSY;
                /* All freezers share a single active reference. */
                deactivate_locked_super(sb);
                return ret;
        }

        if (sb->s_writers.frozen != SB_UNFROZEN) {
                ret = wait_for_partially_frozen(sb);
                if (ret) {
                        deactivate_locked_super(sb);
                        return ret;
                }

                goto retry;
        }

        if (sb_rdonly(sb)) {
                /* Nothing to do really... */
                WARN_ON_ONCE(freeze_inc(sb, who) > 1);
                sb->s_writers.frozen = SB_FREEZE_COMPLETE;
                wake_up_var(&sb->s_writers.frozen);
                super_unlock_excl(sb);
                return 0;
        }

        sb->s_writers.frozen = SB_FREEZE_WRITE;
        /* Release s_umount to preserve sb_start_write -> s_umount ordering */
        super_unlock_excl(sb);
        sb_wait_write(sb, SB_FREEZE_WRITE);
        __super_lock_excl(sb);

        /* Now we go and block page faults... */
        sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
        sb_wait_write(sb, SB_FREEZE_PAGEFAULT);

        /* All writers are done so after syncing there won't be dirty data */
        ret = sync_filesystem(sb);
        if (ret) {
                sb->s_writers.frozen = SB_UNFROZEN;
                sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
                wake_up_var(&sb->s_writers.frozen);
                deactivate_locked_super(sb);
                return ret;
        }

        /* Now wait for internal filesystem counter */
        sb->s_writers.frozen = SB_FREEZE_FS;
        sb_wait_write(sb, SB_FREEZE_FS);

        if (sb->s_op->freeze_fs) {
                ret = sb->s_op->freeze_fs(sb);
                if (ret) {
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
                        sb->s_writers.frozen = SB_UNFROZEN;
                        sb_freeze_unlock(sb, SB_FREEZE_FS);
                        wake_up_var(&sb->s_writers.frozen);
                        deactivate_locked_super(sb);
                        return ret;
                }
        }
        /*
         * For debugging purposes so that fs can warn if it sees write activity
         * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
         */
        WARN_ON_ONCE(freeze_inc(sb, who) > 1);
        sb->s_writers.frozen = SB_FREEZE_COMPLETE;
        wake_up_var(&sb->s_writers.frozen);
        lockdep_sb_freeze_release(sb);
        super_unlock_excl(sb);
        return 0;
}
EXPORT_SYMBOL(freeze_super);

/*
 * Undoes the effect of a freeze_super_locked call.  If the filesystem is
 * frozen both by userspace and the kernel, a thaw call from either source
 * removes that state without releasing the other state or unlocking the
 * filesystem.
 */
static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
{
        int error = -EINVAL;

        if (sb->s_writers.frozen != SB_FREEZE_COMPLETE)
                goto out_unlock;

        /*
         * All freezers share a single active reference.
         * So just unlock in case there are any left.
         */
        if (freeze_dec(sb, who))
                goto out_unlock;

        if (sb_rdonly(sb)) {
                sb->s_writers.frozen = SB_UNFROZEN;
                wake_up_var(&sb->s_writers.frozen);
                goto out_deactivate;
        }

        lockdep_sb_freeze_acquire(sb);

        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
                if (error) {
                        pr_err("VFS: Filesystem thaw failed\n");
                        freeze_inc(sb, who);
                        lockdep_sb_freeze_release(sb);
                        goto out_unlock;
                }
        }

        sb->s_writers.frozen = SB_UNFROZEN;
        wake_up_var(&sb->s_writers.frozen);
        sb_freeze_unlock(sb, SB_FREEZE_FS);
out_deactivate:
        deactivate_locked_super(sb);
        return 0;

out_unlock:
        super_unlock_excl(sb);
        return error;
}

/**
 * thaw_super -- unlock filesystem
 * @sb: the super to thaw
 * @who: context that wants to freeze
 *
 * Unlocks the filesystem and marks it writeable again after freeze_super()
 * if there are no remaining freezes on the filesystem.
 *
 * @who should be:
 * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs;
 * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs.
 * * %FREEZE_MAY_NEST whether nesting freeze and thaw requests is allowed
 *
 * A filesystem may hold multiple devices and thus a filesystems may
 * have been frozen through the block layer via multiple block devices.
 * The filesystem remains frozen until all block devices are unfrozen.
 */
int thaw_super(struct super_block *sb, enum freeze_holder who)
{
        if (!super_lock_excl(sb)) {
                WARN_ON_ONCE("Dying superblock while thawing!");
                return -EINVAL;
        }
        return thaw_super_locked(sb, who);
}
EXPORT_SYMBOL(thaw_super);

/*
 * Create workqueue for deferred direct IO completions. We allocate the
 * workqueue when it's first needed. This avoids creating workqueue for
 * filesystems that don't need it and also allows us to create the workqueue
 * late enough so the we can include s_id in the name of the workqueue.
 */
int sb_init_dio_done_wq(struct super_block *sb)
{
        struct workqueue_struct *old;
        struct workqueue_struct *wq = alloc_workqueue("dio/%s",
                                                      WQ_MEM_RECLAIM, 0,
                                                      sb->s_id);
        if (!wq)
                return -ENOMEM;
        /*
         * This has to be atomic as more DIOs can race to create the workqueue
         */
        old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
        /* Someone created workqueue before us? Free ours... */
        if (old)
                destroy_workqueue(wq);
        return 0;
}
EXPORT_SYMBOL_GPL(sb_init_dio_done_wq);






















































































































































































































































































































































































































































































































































































































  256 














































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
// SPDX-License-Identifier: GPL-2.0+
/*
 *  Universal/legacy driver for 8250/16550-type serial ports
 *
 *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
 *
 *  Copyright (C) 2001 Russell King.
 *
 *  Supports: ISA-compatible 8250/16550 ports
 *              PNP 8250/16550 ports
 *              early_serial_setup() ports
 *              userspace-configurable "phantom" ports
 *              "serial8250" platform devices
 *              serial8250_register_8250_port() ports
 */

#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/sysrq.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/tty.h>
#include <linux/ratelimit.h>
#include <linux/tty_flip.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
#include <linux/nmi.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/string_helpers.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#ifdef CONFIG_SPARC
#include <linux/sunserialcore.h>
#endif

#include <asm/irq.h>

#include "8250.h"

/*
 * Configuration:
 *   share_irqs - whether we pass IRQF_SHARED to request_irq().  This option
 *                is unsafe when used on edge-triggered interrupts.
 */
static unsigned int share_irqs = SERIAL8250_SHARE_IRQS;

static unsigned int nr_uarts = CONFIG_SERIAL_8250_RUNTIME_UARTS;

static struct uart_driver serial8250_reg;

static unsigned int skip_txen_test; /* force skip of txen test at init time */

#define PASS_LIMIT        512

#include <asm/serial.h>
/*
 * SERIAL_PORT_DFNS tells us about built-in ports that have no
 * standard enumeration mechanism.   Platforms that can find all
 * serial ports via mechanisms like ACPI or PCI need not supply it.
 */
#ifndef SERIAL_PORT_DFNS
#define SERIAL_PORT_DFNS
#endif

static const struct old_serial_port old_serial_port[] = {
        SERIAL_PORT_DFNS /* defined in asm/serial.h */
};

#define UART_NR        CONFIG_SERIAL_8250_NR_UARTS

#ifdef CONFIG_SERIAL_8250_RSA

#define PORT_RSA_MAX 4
static unsigned long probe_rsa[PORT_RSA_MAX];
static unsigned int probe_rsa_count;
#endif /* CONFIG_SERIAL_8250_RSA  */

struct irq_info {
        struct                        hlist_node node;
        int                        irq;
        spinlock_t                lock;        /* Protects list not the hash */
        struct list_head        *head;
};

#define NR_IRQ_HASH                32        /* Can be adjusted later */
static struct hlist_head irq_lists[NR_IRQ_HASH];
static DEFINE_MUTEX(hash_mutex);        /* Used to walk the hash */

/*
 * This is the serial driver's interrupt routine.
 *
 * Arjan thinks the old way was overly complex, so it got simplified.
 * Alan disagrees, saying that need the complexity to handle the weird
 * nature of ISA shared interrupts.  (This is a special exception.)
 *
 * In order to handle ISA shared interrupts properly, we need to check
 * that all ports have been serviced, and therefore the ISA interrupt
 * line has been de-asserted.
 *
 * This means we need to loop through all ports. checking that they
 * don't have an interrupt pending.
 */
static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
{
        struct irq_info *i = dev_id;
        struct list_head *l, *end = NULL;
        int pass_counter = 0, handled = 0;

        pr_debug("%s(%d): start\n", __func__, irq);

        spin_lock(&i->lock);

        l = i->head;
        do {
                struct uart_8250_port *up;
                struct uart_port *port;

                up = list_entry(l, struct uart_8250_port, list);
                port = &up->port;

                if (port->handle_irq(port)) {
                        handled = 1;
                        end = NULL;
                } else if (end == NULL)
                        end = l;

                l = l->next;

                if (l == i->head && pass_counter++ > PASS_LIMIT)
                        break;
        } while (l != end);

        spin_unlock(&i->lock);

        pr_debug("%s(%d): end\n", __func__, irq);

        return IRQ_RETVAL(handled);
}

/*
 * To support ISA shared interrupts, we need to have one interrupt
 * handler that ensures that the IRQ line has been deasserted
 * before returning.  Failing to do this will result in the IRQ
 * line being stuck active, and, since ISA irqs are edge triggered,
 * no more IRQs will be seen.
 */
static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
{
        spin_lock_irq(&i->lock);

        if (!list_empty(i->head)) {
                if (i->head == &up->list)
                        i->head = i->head->next;
                list_del(&up->list);
        } else {
                BUG_ON(i->head != &up->list);
                i->head = NULL;
        }
        spin_unlock_irq(&i->lock);
        /* List empty so throw away the hash node */
        if (i->head == NULL) {
                hlist_del(&i->node);
                kfree(i);
        }
}

static int serial_link_irq_chain(struct uart_8250_port *up)
{
        struct hlist_head *h;
        struct irq_info *i;
        int ret;

        mutex_lock(&hash_mutex);

        h = &irq_lists[up->port.irq % NR_IRQ_HASH];

        hlist_for_each_entry(i, h, node)
                if (i->irq == up->port.irq)
                        break;

        if (i == NULL) {
                i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
                if (i == NULL) {
                        mutex_unlock(&hash_mutex);
                        return -ENOMEM;
                }
                spin_lock_init(&i->lock);
                i->irq = up->port.irq;
                hlist_add_head(&i->node, h);
        }
        mutex_unlock(&hash_mutex);

        spin_lock_irq(&i->lock);

        if (i->head) {
                list_add(&up->list, i->head);
                spin_unlock_irq(&i->lock);

                ret = 0;
        } else {
                INIT_LIST_HEAD(&up->list);
                i->head = &up->list;
                spin_unlock_irq(&i->lock);
                ret = request_irq(up->port.irq, serial8250_interrupt,
                                  up->port.irqflags, up->port.name, i);
                if (ret < 0)
                        serial_do_unlink(i, up);
        }

        return ret;
}

static void serial_unlink_irq_chain(struct uart_8250_port *up)
{
        struct irq_info *i;
        struct hlist_head *h;

        mutex_lock(&hash_mutex);

        h = &irq_lists[up->port.irq % NR_IRQ_HASH];

        hlist_for_each_entry(i, h, node)
                if (i->irq == up->port.irq)
                        break;

        BUG_ON(i == NULL);
        BUG_ON(i->head == NULL);

        if (list_empty(i->head))
                free_irq(up->port.irq, i);

        serial_do_unlink(i, up);
        mutex_unlock(&hash_mutex);
}

/*
 * This function is used to handle ports that do not have an
 * interrupt.  This doesn't work very well for 16450's, but gives
 * barely passable results for a 16550A.  (Although at the expense
 * of much CPU overhead).
 */
static void serial8250_timeout(struct timer_list *t)
{
        struct uart_8250_port *up = from_timer(up, t, timer);

        up->port.handle_irq(&up->port);
        mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port));
}

static void serial8250_backup_timeout(struct timer_list *t)
{
        struct uart_8250_port *up = from_timer(up, t, timer);
        unsigned int iir, ier = 0, lsr;
        unsigned long flags;

        uart_port_lock_irqsave(&up->port, &flags);

        /*
         * Must disable interrupts or else we risk racing with the interrupt
         * based handler.
         */
        if (up->port.irq) {
                ier = serial_in(up, UART_IER);
                serial_out(up, UART_IER, 0);
        }

        iir = serial_in(up, UART_IIR);

        /*
         * This should be a safe test for anyone who doesn't trust the
         * IIR bits on their UART, but it's specifically designed for
         * the "Diva" UART used on the management processor on many HP
         * ia64 and parisc boxes.
         */
        lsr = serial_lsr_in(up);
        if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
            (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
            (lsr & UART_LSR_THRE)) {
                iir &= ~(UART_IIR_ID | UART_IIR_NO_INT);
                iir |= UART_IIR_THRI;
        }

        if (!(iir & UART_IIR_NO_INT))
                serial8250_tx_chars(up);

        if (up->port.irq)
                serial_out(up, UART_IER, ier);

        uart_port_unlock_irqrestore(&up->port, flags);

        /* Standard timer interval plus 0.2s to keep the port running */
        mod_timer(&up->timer,
                jiffies + uart_poll_timeout(&up->port) + HZ / 5);
}

static void univ8250_setup_timer(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;

        /*
         * The above check will only give an accurate result the first time
         * the port is opened so this value needs to be preserved.
         */
        if (up->bugs & UART_BUG_THRE) {
                pr_debug("%s - using backup timer\n", port->name);

                up->timer.function = serial8250_backup_timeout;
                mod_timer(&up->timer, jiffies +
                          uart_poll_timeout(port) + HZ / 5);
        }

        /*
         * If the "interrupt" for this port doesn't correspond with any
         * hardware interrupt, we use a timer-based system.  The original
         * driver used to do this with IRQ0.
         */
        if (!port->irq)
                mod_timer(&up->timer, jiffies + uart_poll_timeout(port));
}

static int univ8250_setup_irq(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;

        if (port->irq)
                return serial_link_irq_chain(up);

        return 0;
}

static void univ8250_release_irq(struct uart_8250_port *up)
{
        struct uart_port *port = &up->port;

        del_timer_sync(&up->timer);
        up->timer.function = serial8250_timeout;
        if (port->irq)
                serial_unlink_irq_chain(up);
}

#ifdef CONFIG_SERIAL_8250_RSA
static int serial8250_request_rsa_resource(struct uart_8250_port *up)
{
        unsigned long start = UART_RSA_BASE << up->port.regshift;
        unsigned int size = 8 << up->port.regshift;
        struct uart_port *port = &up->port;
        int ret = -EINVAL;

        switch (port->iotype) {
        case UPIO_HUB6:
        case UPIO_PORT:
                start += port->iobase;
                if (request_region(start, size, "serial-rsa"))
                        ret = 0;
                else
                        ret = -EBUSY;
                break;
        }

        return ret;
}

static void serial8250_release_rsa_resource(struct uart_8250_port *up)
{
        unsigned long offset = UART_RSA_BASE << up->port.regshift;
        unsigned int size = 8 << up->port.regshift;
        struct uart_port *port = &up->port;

        switch (port->iotype) {
        case UPIO_HUB6:
        case UPIO_PORT:
                release_region(port->iobase + offset, size);
                break;
        }
}
#endif

static const struct uart_ops *base_ops;
static struct uart_ops univ8250_port_ops;

static const struct uart_8250_ops univ8250_driver_ops = {
        .setup_irq        = univ8250_setup_irq,
        .release_irq        = univ8250_release_irq,
        .setup_timer        = univ8250_setup_timer,
};

static struct uart_8250_port serial8250_ports[UART_NR];

/**
 * serial8250_get_port - retrieve struct uart_8250_port
 * @line: serial line number
 *
 * This function retrieves struct uart_8250_port for the specific line.
 * This struct *must* *not* be used to perform a 8250 or serial core operation
 * which is not accessible otherwise. Its only purpose is to make the struct
 * accessible to the runtime-pm callbacks for context suspend/restore.
 * The lock assumption made here is none because runtime-pm suspend/resume
 * callbacks should not be invoked if there is any operation performed on the
 * port.
 */
struct uart_8250_port *serial8250_get_port(int line)
{
        return &serial8250_ports[line];
}
EXPORT_SYMBOL_GPL(serial8250_get_port);

static void (*serial8250_isa_config)(int port, struct uart_port *up,
        u32 *capabilities);

void serial8250_set_isa_configurator(
        void (*v)(int port, struct uart_port *up, u32 *capabilities))
{
        serial8250_isa_config = v;
}
EXPORT_SYMBOL(serial8250_set_isa_configurator);

#ifdef CONFIG_SERIAL_8250_RSA

static void univ8250_config_port(struct uart_port *port, int flags)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        up->probe &= ~UART_PROBE_RSA;
        if (port->type == PORT_RSA) {
                if (serial8250_request_rsa_resource(up) == 0)
                        up->probe |= UART_PROBE_RSA;
        } else if (flags & UART_CONFIG_TYPE) {
                int i;

                for (i = 0; i < probe_rsa_count; i++) {
                        if (probe_rsa[i] == up->port.iobase) {
                                if (serial8250_request_rsa_resource(up) == 0)
                                        up->probe |= UART_PROBE_RSA;
                                break;
                        }
                }
        }

        base_ops->config_port(port, flags);

        if (port->type != PORT_RSA && up->probe & UART_PROBE_RSA)
                serial8250_release_rsa_resource(up);
}

static int univ8250_request_port(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);
        int ret;

        ret = base_ops->request_port(port);
        if (ret == 0 && port->type == PORT_RSA) {
                ret = serial8250_request_rsa_resource(up);
                if (ret < 0)
                        base_ops->release_port(port);
        }

        return ret;
}

static void univ8250_release_port(struct uart_port *port)
{
        struct uart_8250_port *up = up_to_u8250p(port);

        if (port->type == PORT_RSA)
                serial8250_release_rsa_resource(up);
        base_ops->release_port(port);
}

static void univ8250_rsa_support(struct uart_ops *ops)
{
        ops->config_port  = univ8250_config_port;
        ops->request_port = univ8250_request_port;
        ops->release_port = univ8250_release_port;
}

#else
#define univ8250_rsa_support(x)                do { } while (0)
#endif /* CONFIG_SERIAL_8250_RSA */

static inline void serial8250_apply_quirks(struct uart_8250_port *up)
{
        up->port.quirks |= skip_txen_test ? UPQ_NO_TXEN_TEST : 0;
}

static struct uart_8250_port *serial8250_setup_port(int index)
{
        struct uart_8250_port *up;

        if (index >= UART_NR)
                return NULL;

        up = &serial8250_ports[index];
        up->port.line = index;
        up->port.port_id = index;

        serial8250_init_port(up);
        if (!base_ops)
                base_ops = up->port.ops;
        up->port.ops = &univ8250_port_ops;

        timer_setup(&up->timer, serial8250_timeout, 0);

        up->ops = &univ8250_driver_ops;

        if (IS_ENABLED(CONFIG_ALPHA_JENSEN) ||
            (IS_ENABLED(CONFIG_ALPHA_GENERIC) && alpha_jensen()))
                up->port.set_mctrl = alpha_jensen_set_mctrl;

        serial8250_set_defaults(up);

        return up;
}

static void __init serial8250_isa_init_ports(void)
{
        struct uart_8250_port *up;
        static int first = 1;
        int i, irqflag = 0;

        if (!first)
                return;
        first = 0;

        if (nr_uarts > UART_NR)
                nr_uarts = UART_NR;

        /*
         * Set up initial isa ports based on nr_uart module param, or else
         * default to CONFIG_SERIAL_8250_RUNTIME_UARTS. Note that we do not
         * need to increase nr_uarts when setting up the initial isa ports.
         */
        for (i = 0; i < nr_uarts; i++)
                serial8250_setup_port(i);

        /* chain base port ops to support Remote Supervisor Adapter */
        univ8250_port_ops = *base_ops;
        univ8250_rsa_support(&univ8250_port_ops);

        if (share_irqs)
                irqflag = IRQF_SHARED;

        for (i = 0, up = serial8250_ports;
             i < ARRAY_SIZE(old_serial_port) && i < nr_uarts;
             i++, up++) {
                struct uart_port *port = &up->port;

                port->iobase   = old_serial_port[i].port;
                port->irq      = irq_canonicalize(old_serial_port[i].irq);
                port->irqflags = 0;
                port->uartclk  = old_serial_port[i].baud_base * 16;
                port->flags    = old_serial_port[i].flags;
                port->hub6     = 0;
                port->membase  = old_serial_port[i].iomem_base;
                port->iotype   = old_serial_port[i].io_type;
                port->regshift = old_serial_port[i].iomem_reg_shift;

                port->irqflags |= irqflag;
                if (serial8250_isa_config != NULL)
                        serial8250_isa_config(i, &up->port, &up->capabilities);
        }
}

static void __init
serial8250_register_ports(struct uart_driver *drv, struct device *dev)
{
        int i;

        for (i = 0; i < nr_uarts; i++) {
                struct uart_8250_port *up = &serial8250_ports[i];

                if (up->port.type == PORT_8250_CIR)
                        continue;

                if (up->port.dev)
                        continue;

                up->port.dev = dev;

                if (uart_console_registered(&up->port))
                        pm_runtime_get_sync(up->port.dev);

                serial8250_apply_quirks(up);
                uart_add_one_port(drv, &up->port);
        }
}

#ifdef CONFIG_SERIAL_8250_CONSOLE

static void univ8250_console_write(struct console *co, const char *s,
                                   unsigned int count)
{
        struct uart_8250_port *up = &serial8250_ports[co->index];

        serial8250_console_write(up, s, count);
}

static int univ8250_console_setup(struct console *co, char *options)
{
        struct uart_8250_port *up;
        struct uart_port *port;
        int retval, i;

        /*
         * Check whether an invalid uart number has been specified, and
         * if so, search for the first available port that does have
         * console support.
         */
        if (co->index < 0 || co->index >= UART_NR)
                co->index = 0;

        /*
         * If the console is past the initial isa ports, init more ports up to
         * co->index as needed and increment nr_uarts accordingly.
         */
        for (i = nr_uarts; i <= co->index; i++) {
                up = serial8250_setup_port(i);
                if (!up)
                        return -ENODEV;
                nr_uarts++;
        }

        port = &serial8250_ports[co->index].port;
        /* link port to console */
        port->cons = co;

        retval = serial8250_console_setup(port, options, false);
        if (retval != 0)
                port->cons = NULL;
        return retval;
}

static int univ8250_console_exit(struct console *co)
{
        struct uart_port *port;

        port = &serial8250_ports[co->index].port;
        return serial8250_console_exit(port);
}

/**
 *        univ8250_console_match - non-standard console matching
 *        @co:          registering console
 *        @name:          name from console command line
 *        @idx:          index from console command line
 *        @options: ptr to option string from console command line
 *
 *        Only attempts to match console command lines of the form:
 *            console=uart[8250],io|mmio|mmio16|mmio32,<addr>[,<options>]
 *            console=uart[8250],0x<addr>[,<options>]
 *        This form is used to register an initial earlycon boot console and
 *        replace it with the serial8250_console at 8250 driver init.
 *
 *        Performs console setup for a match (as required by interface)
 *        If no <options> are specified, then assume the h/w is already setup.
 *
 *        Returns 0 if console matches; otherwise non-zero to use default matching
 */
static int univ8250_console_match(struct console *co, char *name, int idx,
                                  char *options)
{
        char match[] = "uart";        /* 8250-specific earlycon name */
        unsigned char iotype;
        resource_size_t addr;
        int i;

        if (strncmp(name, match, 4) != 0)
                return -ENODEV;

        if (uart_parse_earlycon(options, &iotype, &addr, &options))
                return -ENODEV;

        /* try to match the port specified on the command line */
        for (i = 0; i < nr_uarts; i++) {
                struct uart_port *port = &serial8250_ports[i].port;

                if (port->iotype != iotype)
                        continue;
                if ((iotype == UPIO_MEM || iotype == UPIO_MEM16 ||
                     iotype == UPIO_MEM32 || iotype == UPIO_MEM32BE)
                    && (port->mapbase != addr))
                        continue;
                if (iotype == UPIO_PORT && port->iobase != addr)
                        continue;

                co->index = i;
                port->cons = co;
                return serial8250_console_setup(port, options, true);
        }

        return -ENODEV;
}

static struct console univ8250_console = {
        .name                = "ttyS",
        .write                = univ8250_console_write,
        .device                = uart_console_device,
        .setup                = univ8250_console_setup,
        .exit                = univ8250_console_exit,
        .match                = univ8250_console_match,
        .flags                = CON_PRINTBUFFER | CON_ANYTIME,
        .index                = -1,
        .data                = &serial8250_reg,
};

static int __init univ8250_console_init(void)
{
        if (nr_uarts == 0)
                return -ENODEV;

        serial8250_isa_init_ports();
        register_console(&univ8250_console);
        return 0;
}
console_initcall(univ8250_console_init);

#define SERIAL8250_CONSOLE        (&univ8250_console)
#else
#define SERIAL8250_CONSOLE        NULL
#endif

static struct uart_driver serial8250_reg = {
        .owner                        = THIS_MODULE,
        .driver_name                = "serial",
        .dev_name                = "ttyS",
        .major                        = TTY_MAJOR,
        .minor                        = 64,
        .cons                        = SERIAL8250_CONSOLE,
};

/*
 * early_serial_setup - early registration for 8250 ports
 *
 * Setup an 8250 port structure prior to console initialisation.  Use
 * after console initialisation will cause undefined behaviour.
 */
int __init early_serial_setup(struct uart_port *port)
{
        struct uart_port *p;

        if (port->line >= ARRAY_SIZE(serial8250_ports) || nr_uarts == 0)
                return -ENODEV;

        serial8250_isa_init_ports();
        p = &serial8250_ports[port->line].port;
        p->iobase       = port->iobase;
        p->membase      = port->membase;
        p->irq          = port->irq;
        p->irqflags     = port->irqflags;
        p->uartclk      = port->uartclk;
        p->fifosize     = port->fifosize;
        p->regshift     = port->regshift;
        p->iotype       = port->iotype;
        p->flags        = port->flags;
        p->mapbase      = port->mapbase;
        p->mapsize      = port->mapsize;
        p->private_data = port->private_data;
        p->type                = port->type;
        p->line                = port->line;

        serial8250_set_defaults(up_to_u8250p(p));

        if (port->serial_in)
                p->serial_in = port->serial_in;
        if (port->serial_out)
                p->serial_out = port->serial_out;
        if (port->handle_irq)
                p->handle_irq = port->handle_irq;

        return 0;
}

/**
 *        serial8250_suspend_port - suspend one serial port
 *        @line:  serial line number
 *
 *        Suspend one serial port.
 */
void serial8250_suspend_port(int line)
{
        struct uart_8250_port *up = &serial8250_ports[line];
        struct uart_port *port = &up->port;

        if (!console_suspend_enabled && uart_console(port) &&
            port->type != PORT_8250) {
                unsigned char canary = 0xa5;

                serial_out(up, UART_SCR, canary);
                if (serial_in(up, UART_SCR) == canary)
                        up->canary = canary;
        }

        uart_suspend_port(&serial8250_reg, port);
}
EXPORT_SYMBOL(serial8250_suspend_port);

/**
 *        serial8250_resume_port - resume one serial port
 *        @line:  serial line number
 *
 *        Resume one serial port.
 */
void serial8250_resume_port(int line)
{
        struct uart_8250_port *up = &serial8250_ports[line];
        struct uart_port *port = &up->port;

        up->canary = 0;

        if (up->capabilities & UART_NATSEMI) {
                /* Ensure it's still in high speed mode */
                serial_port_out(port, UART_LCR, 0xE0);

                ns16550a_goto_highspeed(up);

                serial_port_out(port, UART_LCR, 0);
                port->uartclk = 921600*16;
        }
        uart_resume_port(&serial8250_reg, port);
}
EXPORT_SYMBOL(serial8250_resume_port);

/*
 * Register a set of serial devices attached to a platform device.  The
 * list is terminated with a zero flags entry, which means we expect
 * all entries to have at least UPF_BOOT_AUTOCONF set.
 */
static int serial8250_probe(struct platform_device *dev)
{
        struct plat_serial8250_port *p = dev_get_platdata(&dev->dev);
        struct uart_8250_port uart;
        int ret, i, irqflag = 0;

        memset(&uart, 0, sizeof(uart));

        if (share_irqs)
                irqflag = IRQF_SHARED;

        for (i = 0; p && p->flags != 0; p++, i++) {
                uart.port.iobase        = p->iobase;
                uart.port.membase        = p->membase;
                uart.port.irq                = p->irq;
                uart.port.irqflags        = p->irqflags;
                uart.port.uartclk        = p->uartclk;
                uart.port.regshift        = p->regshift;
                uart.port.iotype        = p->iotype;
                uart.port.flags                = p->flags;
                uart.port.mapbase        = p->mapbase;
                uart.port.mapsize        = p->mapsize;
                uart.port.hub6                = p->hub6;
                uart.port.has_sysrq        = p->has_sysrq;
                uart.port.private_data        = p->private_data;
                uart.port.type                = p->type;
                uart.bugs                = p->bugs;
                uart.port.serial_in        = p->serial_in;
                uart.port.serial_out        = p->serial_out;
                uart.dl_read                = p->dl_read;
                uart.dl_write                = p->dl_write;
                uart.port.handle_irq        = p->handle_irq;
                uart.port.handle_break        = p->handle_break;
                uart.port.set_termios        = p->set_termios;
                uart.port.set_ldisc        = p->set_ldisc;
                uart.port.get_mctrl        = p->get_mctrl;
                uart.port.pm                = p->pm;
                uart.port.dev                = &dev->dev;
                uart.port.irqflags        |= irqflag;
                ret = serial8250_register_8250_port(&uart);
                if (ret < 0) {
                        dev_err(&dev->dev, "unable to register port at index %d "
                                "(IO%lx MEM%llx IRQ%d): %d\n", i,
                                p->iobase, (unsigned long long)p->mapbase,
                                p->irq, ret);
                }
        }
        return 0;
}

/*
 * Remove serial ports registered against a platform device.
 */
static void serial8250_remove(struct platform_device *dev)
{
        int i;

        for (i = 0; i < nr_uarts; i++) {
                struct uart_8250_port *up = &serial8250_ports[i];

                if (up->port.dev == &dev->dev)
                        serial8250_unregister_port(i);
        }
}

static int serial8250_suspend(struct platform_device *dev, pm_message_t state)
{
        int i;

        for (i = 0; i < UART_NR; i++) {
                struct uart_8250_port *up = &serial8250_ports[i];

                if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
                        uart_suspend_port(&serial8250_reg, &up->port);
        }

        return 0;
}

static int serial8250_resume(struct platform_device *dev)
{
        int i;

        for (i = 0; i < UART_NR; i++) {
                struct uart_8250_port *up = &serial8250_ports[i];

                if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
                        serial8250_resume_port(i);
        }

        return 0;
}

static struct platform_driver serial8250_isa_driver = {
        .probe                = serial8250_probe,
        .remove_new        = serial8250_remove,
        .suspend        = serial8250_suspend,
        .resume                = serial8250_resume,
        .driver                = {
                .name        = "serial8250",
        },
};

/*
 * This "device" covers _all_ ISA 8250-compatible serial devices listed
 * in the table in include/asm/serial.h
 */
static struct platform_device *serial8250_isa_devs;

/*
 * serial8250_register_8250_port and serial8250_unregister_port allows for
 * 16x50 serial ports to be configured at run-time, to support PCMCIA
 * modems and PCI multiport cards.
 */
static DEFINE_MUTEX(serial_mutex);

static struct uart_8250_port *serial8250_find_match_or_unused(const struct uart_port *port)
{
        int i;

        /*
         * First, find a port entry which matches.
         */
        for (i = 0; i < nr_uarts; i++)
                if (uart_match_port(&serial8250_ports[i].port, port))
                        return &serial8250_ports[i];

        /* try line number first if still available */
        i = port->line;
        if (i < nr_uarts && serial8250_ports[i].port.type == PORT_UNKNOWN &&
                        serial8250_ports[i].port.iobase == 0)
                return &serial8250_ports[i];
        /*
         * We didn't find a matching entry, so look for the first
         * free entry.  We look for one which hasn't been previously
         * used (indicated by zero iobase).
         */
        for (i = 0; i < nr_uarts; i++)
                if (serial8250_ports[i].port.type == PORT_UNKNOWN &&
                    serial8250_ports[i].port.iobase == 0)
                        return &serial8250_ports[i];

        /*
         * That also failed.  Last resort is to find any entry which
         * doesn't have a real port associated with it.
         */
        for (i = 0; i < nr_uarts; i++)
                if (serial8250_ports[i].port.type == PORT_UNKNOWN)
                        return &serial8250_ports[i];

        return NULL;
}

static void serial_8250_overrun_backoff_work(struct work_struct *work)
{
        struct uart_8250_port *up =
            container_of(to_delayed_work(work), struct uart_8250_port,
                         overrun_backoff);
        struct uart_port *port = &up->port;
        unsigned long flags;

        uart_port_lock_irqsave(port, &flags);
        up->ier |= UART_IER_RLSI | UART_IER_RDI;
        up->port.read_status_mask |= UART_LSR_DR;
        serial_out(up, UART_IER, up->ier);
        uart_port_unlock_irqrestore(port, flags);
}

/**
 *        serial8250_register_8250_port - register a serial port
 *        @up: serial port template
 *
 *        Configure the serial port specified by the request. If the
 *        port exists and is in use, it is hung up and unregistered
 *        first.
 *
 *        The port is then probed and if necessary the IRQ is autodetected
 *        If this fails an error is returned.
 *
 *        On success the port is ready to use and the line number is returned.
 */
int serial8250_register_8250_port(const struct uart_8250_port *up)
{
        struct uart_8250_port *uart;
        int ret = -ENOSPC;

        if (up->port.uartclk == 0)
                return -EINVAL;

        mutex_lock(&serial_mutex);

        uart = serial8250_find_match_or_unused(&up->port);
        if (!uart) {
                /*
                 * If the port is past the initial isa ports, initialize a new
                 * port and increment nr_uarts accordingly.
                 */
                uart = serial8250_setup_port(nr_uarts);
                if (!uart)
                        goto unlock;
                nr_uarts++;
        }

        if (uart->port.type != PORT_8250_CIR) {
                struct mctrl_gpios *gpios;

                if (uart->port.dev)
                        uart_remove_one_port(&serial8250_reg, &uart->port);

                uart->port.ctrl_id        = up->port.ctrl_id;
                uart->port.port_id        = up->port.port_id;
                uart->port.iobase       = up->port.iobase;
                uart->port.membase      = up->port.membase;
                uart->port.irq          = up->port.irq;
                uart->port.irqflags     = up->port.irqflags;
                uart->port.uartclk      = up->port.uartclk;
                uart->port.fifosize     = up->port.fifosize;
                uart->port.regshift     = up->port.regshift;
                uart->port.iotype       = up->port.iotype;
                uart->port.flags        = up->port.flags | UPF_BOOT_AUTOCONF;
                uart->bugs                = up->bugs;
                uart->port.mapbase      = up->port.mapbase;
                uart->port.mapsize      = up->port.mapsize;
                uart->port.private_data = up->port.private_data;
                uart->tx_loadsz                = up->tx_loadsz;
                uart->capabilities        = up->capabilities;
                uart->port.throttle        = up->port.throttle;
                uart->port.unthrottle        = up->port.unthrottle;
                uart->port.rs485_config        = up->port.rs485_config;
                uart->port.rs485_supported = up->port.rs485_supported;
                uart->port.rs485        = up->port.rs485;
                uart->rs485_start_tx        = up->rs485_start_tx;
                uart->rs485_stop_tx        = up->rs485_stop_tx;
                uart->lsr_save_mask        = up->lsr_save_mask;
                uart->dma                = up->dma;

                /* Take tx_loadsz from fifosize if it wasn't set separately */
                if (uart->port.fifosize && !uart->tx_loadsz)
                        uart->tx_loadsz = uart->port.fifosize;

                if (up->port.dev) {
                        uart->port.dev = up->port.dev;
                        ret = uart_get_rs485_mode(&uart->port);
                        if (ret)
                                goto err;
                }

                if (up->port.flags & UPF_FIXED_TYPE)
                        uart->port.type = up->port.type;

                /*
                 * Only call mctrl_gpio_init(), if the device has no ACPI
                 * companion device
                 */
                if (!has_acpi_companion(uart->port.dev)) {
                        gpios = mctrl_gpio_init(&uart->port, 0);
                        if (IS_ERR(gpios)) {
                                ret = PTR_ERR(gpios);
                                goto err;
                        } else {
                                uart->gpios = gpios;
                        }
                }

                serial8250_set_defaults(uart);

                /* Possibly override default I/O functions.  */
                if (up->port.serial_in)
                        uart->port.serial_in = up->port.serial_in;
                if (up->port.serial_out)
                        uart->port.serial_out = up->port.serial_out;
                if (up->port.handle_irq)
                        uart->port.handle_irq = up->port.handle_irq;
                /*  Possibly override set_termios call */
                if (up->port.set_termios)
                        uart->port.set_termios = up->port.set_termios;
                if (up->port.set_ldisc)
                        uart->port.set_ldisc = up->port.set_ldisc;
                if (up->port.get_mctrl)
                        uart->port.get_mctrl = up->port.get_mctrl;
                if (up->port.set_mctrl)
                        uart->port.set_mctrl = up->port.set_mctrl;
                if (up->port.get_divisor)
                        uart->port.get_divisor = up->port.get_divisor;
                if (up->port.set_divisor)
                        uart->port.set_divisor = up->port.set_divisor;
                if (up->port.startup)
                        uart->port.startup = up->port.startup;
                if (up->port.shutdown)
                        uart->port.shutdown = up->port.shutdown;
                if (up->port.pm)
                        uart->port.pm = up->port.pm;
                if (up->port.handle_break)
                        uart->port.handle_break = up->port.handle_break;
                if (up->dl_read)
                        uart->dl_read = up->dl_read;
                if (up->dl_write)
                        uart->dl_write = up->dl_write;

                if (uart->port.type != PORT_8250_CIR) {
                        if (serial8250_isa_config != NULL)
                                serial8250_isa_config(0, &uart->port,
                                                &uart->capabilities);

                        serial8250_apply_quirks(uart);
                        ret = uart_add_one_port(&serial8250_reg,
                                                &uart->port);
                        if (ret)
                                goto err;

                        ret = uart->port.line;
                } else {
                        dev_info(uart->port.dev,
                                "skipping CIR port at 0x%lx / 0x%llx, IRQ %d\n",
                                uart->port.iobase,
                                (unsigned long long)uart->port.mapbase,
                                uart->port.irq);

                        ret = 0;
                }

                if (!uart->lsr_save_mask)
                        uart->lsr_save_mask = LSR_SAVE_FLAGS;        /* Use default LSR mask */

                /* Initialise interrupt backoff work if required */
                if (up->overrun_backoff_time_ms > 0) {
                        uart->overrun_backoff_time_ms =
                                up->overrun_backoff_time_ms;
                        INIT_DELAYED_WORK(&uart->overrun_backoff,
                                        serial_8250_overrun_backoff_work);
                } else {
                        uart->overrun_backoff_time_ms = 0;
                }
        }

unlock:
        mutex_unlock(&serial_mutex);

        return ret;

err:
        uart->port.dev = NULL;
        mutex_unlock(&serial_mutex);
        return ret;
}
EXPORT_SYMBOL(serial8250_register_8250_port);

/**
 *        serial8250_unregister_port - remove a 16x50 serial port at runtime
 *        @line: serial line number
 *
 *        Remove one serial port.  This may not be called from interrupt
 *        context.  We hand the port back to the our control.
 */
void serial8250_unregister_port(int line)
{
        struct uart_8250_port *uart = &serial8250_ports[line];

        mutex_lock(&serial_mutex);

        if (uart->em485) {
                unsigned long flags;

                uart_port_lock_irqsave(&uart->port, &flags);
                serial8250_em485_destroy(uart);
                uart_port_unlock_irqrestore(&uart->port, flags);
        }

        uart_remove_one_port(&serial8250_reg, &uart->port);
        if (serial8250_isa_devs) {
                uart->port.flags &= ~UPF_BOOT_AUTOCONF;
                uart->port.type = PORT_UNKNOWN;
                uart->port.dev = &serial8250_isa_devs->dev;
                uart->port.port_id = line;
                uart->capabilities = 0;
                serial8250_init_port(uart);
                serial8250_apply_quirks(uart);
                uart_add_one_port(&serial8250_reg, &uart->port);
        } else {
                uart->port.dev = NULL;
        }
        mutex_unlock(&serial_mutex);
}
EXPORT_SYMBOL(serial8250_unregister_port);

static int __init serial8250_init(void)
{
        int ret;

        if (nr_uarts == 0)
                return -ENODEV;

        serial8250_isa_init_ports();

        pr_info("Serial: 8250/16550 driver, %d ports, IRQ sharing %s\n",
                nr_uarts, str_enabled_disabled(share_irqs));

#ifdef CONFIG_SPARC
        ret = sunserial_register_minors(&serial8250_reg, UART_NR);
#else
        serial8250_reg.nr = UART_NR;
        ret = uart_register_driver(&serial8250_reg);
#endif
        if (ret)
                goto out;

        ret = serial8250_pnp_init();
        if (ret)
                goto unreg_uart_drv;

        serial8250_isa_devs = platform_device_alloc("serial8250",
                                                    PLAT8250_DEV_LEGACY);
        if (!serial8250_isa_devs) {
                ret = -ENOMEM;
                goto unreg_pnp;
        }

        ret = platform_device_add(serial8250_isa_devs);
        if (ret)
                goto put_dev;

        serial8250_register_ports(&serial8250_reg, &serial8250_isa_devs->dev);

        ret = platform_driver_register(&serial8250_isa_driver);
        if (ret == 0)
                goto out;

        platform_device_del(serial8250_isa_devs);
put_dev:
        platform_device_put(serial8250_isa_devs);
unreg_pnp:
        serial8250_pnp_exit();
unreg_uart_drv:
#ifdef CONFIG_SPARC
        sunserial_unregister_minors(&serial8250_reg, UART_NR);
#else
        uart_unregister_driver(&serial8250_reg);
#endif
out:
        return ret;
}

static void __exit serial8250_exit(void)
{
        struct platform_device *isa_dev = serial8250_isa_devs;

        /*
         * This tells serial8250_unregister_port() not to re-register
         * the ports (thereby making serial8250_isa_driver permanently
         * in use.)
         */
        serial8250_isa_devs = NULL;

        platform_driver_unregister(&serial8250_isa_driver);
        platform_device_unregister(isa_dev);

        serial8250_pnp_exit();

#ifdef CONFIG_SPARC
        sunserial_unregister_minors(&serial8250_reg, UART_NR);
#else
        uart_unregister_driver(&serial8250_reg);
#endif
}

module_init(serial8250_init);
module_exit(serial8250_exit);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Generic 8250/16x50 serial driver");

module_param_hw(share_irqs, uint, other, 0644);
MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices (unsafe)");

module_param(nr_uarts, uint, 0644);
MODULE_PARM_DESC(nr_uarts, "Maximum number of UARTs supported. (1-" __MODULE_STRING(CONFIG_SERIAL_8250_NR_UARTS) ")");

module_param(skip_txen_test, uint, 0644);
MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time");

#ifdef CONFIG_SERIAL_8250_RSA
module_param_hw_array(probe_rsa, ulong, ioport, &probe_rsa_count, 0444);
MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
#endif
MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);

#ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS
#ifndef MODULE
/* This module was renamed to 8250_core in 3.7.  Keep the old "8250" name
 * working as well for the module options so we don't break people.  We
 * need to keep the names identical and the convenient macros will happily
 * refuse to let us do that by failing the build with redefinition errors
 * of global variables.  So we stick them inside a dummy function to avoid
 * those conflicts.  The options still get parsed, and the redefined
 * MODULE_PARAM_PREFIX lets us keep the "8250." syntax alive.
 *
 * This is hacky.  I'm sorry.
 */
static void __used s8250_options(void)
{
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "8250_core."

        module_param_cb(share_irqs, &param_ops_uint, &share_irqs, 0644);
        module_param_cb(nr_uarts, &param_ops_uint, &nr_uarts, 0644);
        module_param_cb(skip_txen_test, &param_ops_uint, &skip_txen_test, 0644);
#ifdef CONFIG_SERIAL_8250_RSA
        __module_param_call(MODULE_PARAM_PREFIX, probe_rsa,
                &param_array_ops, .arr = &__param_arr_probe_rsa,
                0444, -1, 0);
#endif
}
#else
MODULE_ALIAS("8250_core");
#endif
#endif






















































































  104 







  104 





  104 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_COMPAT_H
#define _ASM_X86_COMPAT_H

/*
 * Architecture specific compatibility types
 */
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/user32.h>
#include <asm/unistd.h>

#define compat_mode_t        compat_mode_t
typedef u16                compat_mode_t;

#define __compat_uid_t        __compat_uid_t
typedef u16                __compat_uid_t;
typedef u16                __compat_gid_t;

#define compat_dev_t        compat_dev_t
typedef u16                compat_dev_t;

#define compat_ipc_pid_t compat_ipc_pid_t
typedef u16                 compat_ipc_pid_t;

#define compat_statfs        compat_statfs

#include <asm-generic/compat.h>

#define COMPAT_UTS_MACHINE        "i686\0\0"

typedef u16                compat_nlink_t;

struct compat_stat {
        u32                st_dev;
        compat_ino_t        st_ino;
        compat_mode_t        st_mode;
        compat_nlink_t        st_nlink;
        __compat_uid_t        st_uid;
        __compat_gid_t        st_gid;
        u32                st_rdev;
        u32                st_size;
        u32                st_blksize;
        u32                st_blocks;
        u32                st_atime;
        u32                st_atime_nsec;
        u32                st_mtime;
        u32                st_mtime_nsec;
        u32                st_ctime;
        u32                st_ctime_nsec;
        u32                __unused4;
        u32                __unused5;
};

/*
 * IA32 uses 4 byte alignment for 64 bit quantities, so we need to pack the
 * compat flock64 structure.
 */
#define __ARCH_NEED_COMPAT_FLOCK64_PACKED

struct compat_statfs {
        int                f_type;
        int                f_bsize;
        int                f_blocks;
        int                f_bfree;
        int                f_bavail;
        int                f_files;
        int                f_ffree;
        compat_fsid_t        f_fsid;
        int                f_namelen;        /* SunOS ignores this field. */
        int                f_frsize;
        int                f_flags;
        int                f_spare[4];
};

#ifdef CONFIG_X86_X32_ABI
#define COMPAT_USE_64BIT_TIME \
        (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
#endif

static inline bool in_x32_syscall(void)
{
#ifdef CONFIG_X86_X32_ABI
        if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
                return true;
#endif
        return false;
}

static inline bool in_32bit_syscall(void)
{
        return in_ia32_syscall() || in_x32_syscall();
}

#ifdef CONFIG_COMPAT
static inline bool in_compat_syscall(void)
{
        return in_32bit_syscall();
}
#define in_compat_syscall in_compat_syscall        /* override the generic impl */
#define compat_need_64bit_alignment_fixup in_ia32_syscall
#endif

struct compat_siginfo;

#ifdef CONFIG_X86_X32_ABI
int copy_siginfo_to_user32(struct compat_siginfo __user *to,
                const kernel_siginfo_t *from);
#define copy_siginfo_to_user32 copy_siginfo_to_user32
#endif /* CONFIG_X86_X32_ABI */

#endif /* _ASM_X86_COMPAT_H */



















































































































































































































































































































































  444 























































































































































































































































































































































































































































  553 


  556 



























  556 


  556 
  231 
























































  352 


  354 

















  354 

  354 

  200 







































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
/* SPDX-License-Identifier: GPL-2.0+ */
/*
 * Read-Copy Update mechanism for mutual exclusion
 *
 * Copyright IBM Corporation, 2001
 *
 * Author: Dipankar Sarma <dipankar@in.ibm.com>
 *
 * Based on the original work by Paul McKenney <paulmck@vnet.ibm.com>
 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
 * Papers:
 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
 *
 * For detailed explanation of Read-Copy Update mechanism see -
 *                http://lse.sourceforge.net/locking/rcupdate.html
 *
 */

#ifndef __LINUX_RCUPDATE_H
#define __LINUX_RCUPDATE_H

#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
#include <linux/irqflags.h>
#include <linux/preempt.h>
#include <linux/bottom_half.h>
#include <linux/lockdep.h>
#include <linux/cleanup.h>
#include <asm/processor.h>
#include <linux/cpumask.h>
#include <linux/context_tracking_irq.h>

#define ULONG_CMP_GE(a, b)        (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b)        (ULONG_MAX / 2 < (a) - (b))

/* Exported common interfaces */
void call_rcu(struct rcu_head *head, rcu_callback_t func);
void rcu_barrier_tasks(void);
void rcu_barrier_tasks_rude(void);
void synchronize_rcu(void);

struct rcu_gp_oldstate;
unsigned long get_completed_synchronize_rcu(void);
void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);

// Maximum number of unsigned long values corresponding to
// not-yet-completed RCU grace periods.
#define NUM_ACTIVE_RCU_POLL_OLDSTATE 2

/**
 * same_state_synchronize_rcu - Are two old-state values identical?
 * @oldstate1: First old-state value.
 * @oldstate2: Second old-state value.
 *
 * The two old-state values must have been obtained from either
 * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or
 * get_completed_synchronize_rcu().  Returns @true if the two values are
 * identical and @false otherwise.  This allows structures whose lifetimes
 * are tracked by old-state values to push these values to a list header,
 * allowing those structures to be slightly smaller.
 */
static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2)
{
        return oldstate1 == oldstate2;
}

#ifdef CONFIG_PREEMPT_RCU

void __rcu_read_lock(void);
void __rcu_read_unlock(void);

/*
 * Defined as a macro as it is a very low level header included from
 * areas that don't even know about current.  This gives the rcu_read_lock()
 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
 */
#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)

#else /* #ifdef CONFIG_PREEMPT_RCU */

#ifdef CONFIG_TINY_RCU
#define rcu_read_unlock_strict() do { } while (0)
#else
void rcu_read_unlock_strict(void);
#endif

static inline void __rcu_read_lock(void)
{
        preempt_disable();
}

static inline void __rcu_read_unlock(void)
{
        preempt_enable();
        if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
                rcu_read_unlock_strict();
}

static inline int rcu_preempt_depth(void)
{
        return 0;
}

#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

#ifdef CONFIG_RCU_LAZY
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
#else
static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
        call_rcu(head, func);
}
#endif

/* Internal to kernel */
void rcu_init(void);
extern int rcu_scheduler_active;
void rcu_sched_clock_irq(int user);

#ifdef CONFIG_TASKS_RCU_GENERIC
void rcu_init_tasks_generic(void);
#else
static inline void rcu_init_tasks_generic(void) { }
#endif

#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
#else /* #ifdef CONFIG_RCU_STALL_COMMON */
static inline void rcu_sysrq_start(void) { }
static inline void rcu_sysrq_end(void) { }
#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */

#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
void rcu_irq_work_resched(void);
#else
static inline void rcu_irq_work_resched(void) { }
#endif

#ifdef CONFIG_RCU_NOCB_CPU
void rcu_init_nohz(void);
int rcu_nocb_cpu_offload(int cpu);
int rcu_nocb_cpu_deoffload(int cpu);
void rcu_nocb_flush_deferred_wakeup(void);
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static inline void rcu_init_nohz(void) { }
static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; }
static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
static inline void rcu_nocb_flush_deferred_wakeup(void) { }
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

/*
 * Note a quasi-voluntary context switch for RCU-tasks's benefit.
 * This is a macro rather than an inline function to avoid #include hell.
 */
#ifdef CONFIG_TASKS_RCU_GENERIC

# ifdef CONFIG_TASKS_RCU
# define rcu_tasks_classic_qs(t, preempt)                                \
        do {                                                                \
                if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout))        \
                        WRITE_ONCE((t)->rcu_tasks_holdout, false);        \
        } while (0)
void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks(void);
# else
# define rcu_tasks_classic_qs(t, preempt) do { } while (0)
# define call_rcu_tasks call_rcu
# define synchronize_rcu_tasks synchronize_rcu
# endif

# ifdef CONFIG_TASKS_TRACE_RCU
// Bits for ->trc_reader_special.b.need_qs field.
#define TRC_NEED_QS                0x1  // Task needs a quiescent state.
#define TRC_NEED_QS_CHECKED        0x2  // Task has been checked for needing quiescent state.

u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new);
void rcu_tasks_trace_qs_blkd(struct task_struct *t);

# define rcu_tasks_trace_qs(t)                                                        \
        do {                                                                        \
                int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting);        \
                                                                                \
                if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) &&        \
                    likely(!___rttq_nesting)) {                                        \
                        rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED);        \
                } else if (___rttq_nesting && ___rttq_nesting != INT_MIN &&        \
                           !READ_ONCE((t)->trc_reader_special.b.blocked)) {        \
                        rcu_tasks_trace_qs_blkd(t);                                \
                }                                                                \
        } while (0)
# else
# define rcu_tasks_trace_qs(t) do { } while (0)
# endif

#define rcu_tasks_qs(t, preempt)                                        \
do {                                                                        \
        rcu_tasks_classic_qs((t), (preempt));                                \
        rcu_tasks_trace_qs(t);                                                \
} while (0)

# ifdef CONFIG_TASKS_RUDE_RCU
void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks_rude(void);
# endif

#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
#define rcu_tasks_qs(t, preempt) do { } while (0)
#define rcu_note_voluntary_context_switch(t) do { } while (0)
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */

/**
 * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period?
 *
 * As an accident of implementation, an RCU Tasks Trace grace period also
 * acts as an RCU grace period.  However, this could change at any time.
 * Code relying on this accident must call this function to verify that
 * this accident is still happening.
 *
 * You have been warned!
 */
static inline bool rcu_trace_implies_rcu_gp(void) { return true; }

/**
 * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
 *
 * This macro resembles cond_resched(), except that it is defined to
 * report potential quiescent states to RCU-tasks even if the cond_resched()
 * machinery were to be shut off, as some advocate for PREEMPTION kernels.
 */
#define cond_resched_tasks_rcu_qs() \
do { \
        rcu_tasks_qs(current, false); \
        cond_resched(); \
} while (0)

/**
 * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states
 * @old_ts: jiffies at start of processing.
 *
 * This helper is for long-running softirq handlers, such as NAPI threads in
 * networking. The caller should initialize the variable passed in as @old_ts
 * at the beginning of the softirq handler. When invoked frequently, this macro
 * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will
 * provide both RCU and RCU-Tasks quiescent states. Note that this macro
 * modifies its old_ts argument.
 *
 * Because regions of code that have disabled softirq act as RCU read-side
 * critical sections, this macro should be invoked with softirq (and
 * preemption) enabled.
 *
 * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would
 * have more chance to invoke schedule() calls and provide necessary quiescent
 * states. As a contrast, calling cond_resched() only won't achieve the same
 * effect because cond_resched() does not provide RCU-Tasks quiescent states.
 */
#define rcu_softirq_qs_periodic(old_ts) \
do { \
        if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \
            time_after(jiffies, (old_ts) + HZ / 10)) { \
                preempt_disable(); \
                rcu_softirq_qs(); \
                preempt_enable(); \
                (old_ts) = jiffies; \
        } \
} while (0)

/*
 * Infrastructure to implement the synchronize_() primitives in
 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
 */

#if defined(CONFIG_TREE_RCU)
#include <linux/rcutree.h>
#elif defined(CONFIG_TINY_RCU)
#include <linux/rcutiny.h>
#else
#error "Unknown RCU implementation specified to kernel configuration"
#endif

/*
 * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls
 * are needed for dynamic initialization and destruction of rcu_head
 * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for
 * dynamic initialization and destruction of statically allocated rcu_head
 * structures.  However, rcu_head structures allocated dynamically in the
 * heap don't need any initialization.
 */
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
void init_rcu_head(struct rcu_head *head);
void destroy_rcu_head(struct rcu_head *head);
void init_rcu_head_on_stack(struct rcu_head *head);
void destroy_rcu_head_on_stack(struct rcu_head *head);
#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
static inline void init_rcu_head(struct rcu_head *head) { }
static inline void destroy_rcu_head(struct rcu_head *head) { }
static inline void init_rcu_head_on_stack(struct rcu_head *head) { }
static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
#endif        /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */

#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
bool rcu_lockdep_current_cpu_online(void);
#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */

extern struct lockdep_map rcu_lock_map;
extern struct lockdep_map rcu_bh_lock_map;
extern struct lockdep_map rcu_sched_lock_map;
extern struct lockdep_map rcu_callback_map;

#ifdef CONFIG_DEBUG_LOCK_ALLOC

static inline void rcu_lock_acquire(struct lockdep_map *map)
{
        lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_);
}

static inline void rcu_try_lock_acquire(struct lockdep_map *map)
{
        lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_);
}

static inline void rcu_lock_release(struct lockdep_map *map)
{
        lock_release(map, _THIS_IP_);
}

int debug_lockdep_rcu_enabled(void);
int rcu_read_lock_held(void);
int rcu_read_lock_bh_held(void);
int rcu_read_lock_sched_held(void);
int rcu_read_lock_any_held(void);

#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */

# define rcu_lock_acquire(a)                do { } while (0)
# define rcu_try_lock_acquire(a)        do { } while (0)
# define rcu_lock_release(a)                do { } while (0)

static inline int rcu_read_lock_held(void)
{
        return 1;
}

static inline int rcu_read_lock_bh_held(void)
{
        return 1;
}

static inline int rcu_read_lock_sched_held(void)
{
        return !preemptible();
}

static inline int rcu_read_lock_any_held(void)
{
        return !preemptible();
}

static inline int debug_lockdep_rcu_enabled(void)
{
        return 0;
}

#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */

#ifdef CONFIG_PROVE_RCU

/**
 * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
 * @c: condition to check
 * @s: informative message
 *
 * This checks debug_lockdep_rcu_enabled() before checking (c) to
 * prevent early boot splats due to lockdep not yet being initialized,
 * and rechecks it after checking (c) to prevent false-positive splats
 * due to races with lockdep being disabled.  See commit 3066820034b5dd
 * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail.
 */
#define RCU_LOCKDEP_WARN(c, s)                                                \
        do {                                                                \
                static bool __section(".data.unlikely") __warned;        \
                if (debug_lockdep_rcu_enabled() && (c) &&                \
                    debug_lockdep_rcu_enabled() && !__warned) {                \
                        __warned = true;                                \
                        lockdep_rcu_suspicious(__FILE__, __LINE__, s);        \
                }                                                        \
        } while (0)

#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
static inline void rcu_preempt_sleep_check(void)
{
        RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map),
                         "Illegal context switch in RCU read-side critical section");
}
#else /* #ifdef CONFIG_PROVE_RCU */
static inline void rcu_preempt_sleep_check(void) { }
#endif /* #else #ifdef CONFIG_PROVE_RCU */

#define rcu_sleep_check()                                                \
        do {                                                                \
                rcu_preempt_sleep_check();                                \
                if (!IS_ENABLED(CONFIG_PREEMPT_RT))                        \
                    RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),        \
                                 "Illegal context switch in RCU-bh read-side critical section"); \
                RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),        \
                                 "Illegal context switch in RCU-sched read-side critical section"); \
        } while (0)

#else /* #ifdef CONFIG_PROVE_RCU */

#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c))
#define rcu_sleep_check() do { } while (0)

#endif /* #else #ifdef CONFIG_PROVE_RCU */

/*
 * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
 * and rcu_assign_pointer().  Some of these could be folded into their
 * callers, but they are left separate in order to ease introduction of
 * multiple pointers markings to match different RCU implementations
 * (e.g., __srcu), should this make sense in the future.
 */

#ifdef __CHECKER__
#define rcu_check_sparse(p, space) \
        ((void)(((typeof(*p) space *)p) == p))
#else /* #ifdef __CHECKER__ */
#define rcu_check_sparse(p, space)
#endif /* #else #ifdef __CHECKER__ */

#define __unrcu_pointer(p, local)                                        \
({                                                                        \
        typeof(*p) *local = (typeof(*p) *__force)(p);                        \
        rcu_check_sparse(p, __rcu);                                        \
        ((typeof(*p) __force __kernel *)(local));                         \
})
/**
 * unrcu_pointer - mark a pointer as not being RCU protected
 * @p: pointer needing to lose its __rcu property
 *
 * Converts @p from an __rcu pointer to a __kernel pointer.
 * This allows an __rcu pointer to be used with xchg() and friends.
 */
#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu))

#define __rcu_access_pointer(p, local, space) \
({ \
        typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
        rcu_check_sparse(p, space); \
        ((typeof(*p) __force __kernel *)(local)); \
})
#define __rcu_dereference_check(p, local, c, space) \
({ \
        /* Dependency order vs. p above. */ \
        typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
        RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
        rcu_check_sparse(p, space); \
        ((typeof(*p) __force __kernel *)(local)); \
})
#define __rcu_dereference_protected(p, local, c, space) \
({ \
        RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \
        rcu_check_sparse(p, space); \
        ((typeof(*p) __force __kernel *)(p)); \
})
#define __rcu_dereference_raw(p, local) \
({ \
        /* Dependency order vs. p above. */ \
        typeof(p) local = READ_ONCE(p); \
        ((typeof(*p) __force __kernel *)(local)); \
})
#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu))

/**
 * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
 * @v: The value to statically initialize with.
 */
#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)

/**
 * rcu_assign_pointer() - assign to RCU-protected pointer
 * @p: pointer to assign to
 * @v: value to assign (publish)
 *
 * Assigns the specified value to the specified RCU-protected
 * pointer, ensuring that any concurrent RCU readers will see
 * any prior initialization.
 *
 * Inserts memory barriers on architectures that require them
 * (which is most of them), and also prevents the compiler from
 * reordering the code that initializes the structure after the pointer
 * assignment.  More importantly, this call documents which pointers
 * will be dereferenced by RCU read-side code.
 *
 * In some special cases, you may use RCU_INIT_POINTER() instead
 * of rcu_assign_pointer().  RCU_INIT_POINTER() is a bit faster due
 * to the fact that it does not constrain either the CPU or the compiler.
 * That said, using RCU_INIT_POINTER() when you should have used
 * rcu_assign_pointer() is a very bad thing that results in
 * impossible-to-diagnose memory corruption.  So please be careful.
 * See the RCU_INIT_POINTER() comment header for details.
 *
 * Note that rcu_assign_pointer() evaluates each of its arguments only
 * once, appearances notwithstanding.  One of the "extra" evaluations
 * is in typeof() and the other visible only to sparse (__CHECKER__),
 * neither of which actually execute the argument.  As with most cpp
 * macros, this execute-arguments-only-once property is important, so
 * please be careful when making changes to rcu_assign_pointer() and the
 * other macros that it invokes.
 */
#define rcu_assign_pointer(p, v)                                              \
do {                                                                              \
        uintptr_t _r_a_p__v = (uintptr_t)(v);                                      \
        rcu_check_sparse(p, __rcu);                                              \
                                                                              \
        if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL)              \
                WRITE_ONCE((p), (typeof(p))(_r_a_p__v));                      \
        else                                                                      \
                smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
} while (0)

/**
 * rcu_replace_pointer() - replace an RCU pointer, returning its old value
 * @rcu_ptr: RCU pointer, whose old value is returned
 * @ptr: regular pointer
 * @c: the lockdep conditions under which the dereference will take place
 *
 * Perform a replacement, where @rcu_ptr is an RCU-annotated
 * pointer and @c is the lockdep argument that is passed to the
 * rcu_dereference_protected() call used to read that pointer.  The old
 * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr.
 */
#define rcu_replace_pointer(rcu_ptr, ptr, c)                                \
({                                                                        \
        typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c));        \
        rcu_assign_pointer((rcu_ptr), (ptr));                                \
        __tmp;                                                                \
})

/**
 * rcu_access_pointer() - fetch RCU pointer with no dereferencing
 * @p: The pointer to read
 *
 * Return the value of the specified RCU-protected pointer, but omit the
 * lockdep checks for being in an RCU read-side critical section.  This is
 * useful when the value of this pointer is accessed, but the pointer is
 * not dereferenced, for example, when testing an RCU-protected pointer
 * against NULL.  Although rcu_access_pointer() may also be used in cases
 * where update-side locks prevent the value of the pointer from changing,
 * you should instead use rcu_dereference_protected() for this use case.
 * Within an RCU read-side critical section, there is little reason to
 * use rcu_access_pointer().
 *
 * It is usually best to test the rcu_access_pointer() return value
 * directly in order to avoid accidental dereferences being introduced
 * by later inattentive changes.  In other words, assigning the
 * rcu_access_pointer() return value to a local variable results in an
 * accident waiting to happen.
 *
 * It is also permissible to use rcu_access_pointer() when read-side
 * access to the pointer was removed at least one grace period ago, as is
 * the case in the context of the RCU callback that is freeing up the data,
 * or after a synchronize_rcu() returns.  This can be useful when tearing
 * down multi-linked structures after a grace period has elapsed.  However,
 * rcu_dereference_protected() is normally preferred for this use case.
 */
#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu)

/**
 * rcu_dereference_check() - rcu_dereference with debug checking
 * @p: The pointer to read, prior to dereferencing
 * @c: The conditions under which the dereference will take place
 *
 * Do an rcu_dereference(), but check that the conditions under which the
 * dereference will take place are correct.  Typically the conditions
 * indicate the various locking conditions that should be held at that
 * point.  The check should return true if the conditions are satisfied.
 * An implicit check for being in an RCU read-side critical section
 * (rcu_read_lock()) is included.
 *
 * For example:
 *
 *        bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
 *
 * could be used to indicate to lockdep that foo->bar may only be dereferenced
 * if either rcu_read_lock() is held, or that the lock required to replace
 * the bar struct at foo->bar is held.
 *
 * Note that the list of conditions may also include indications of when a lock
 * need not be held, for example during initialisation or destruction of the
 * target struct:
 *
 *        bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
 *                                              atomic_read(&foo->usage) == 0);
 *
 * Inserts memory barriers on architectures that require them
 * (currently only the Alpha), prevents the compiler from refetching
 * (and from merging fetches), and, more importantly, documents exactly
 * which pointers are protected by RCU and checks that the pointer is
 * annotated as __rcu.
 */
#define rcu_dereference_check(p, c) \
        __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
                                (c) || rcu_read_lock_held(), __rcu)

/**
 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
 * @p: The pointer to read, prior to dereferencing
 * @c: The conditions under which the dereference will take place
 *
 * This is the RCU-bh counterpart to rcu_dereference_check().  However,
 * please note that starting in v5.0 kernels, vanilla RCU grace periods
 * wait for local_bh_disable() regions of code in addition to regions of
 * code demarked by rcu_read_lock() and rcu_read_unlock().  This means
 * that synchronize_rcu(), call_rcu, and friends all take not only
 * rcu_read_lock() but also rcu_read_lock_bh() into account.
 */
#define rcu_dereference_bh_check(p, c) \
        __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
                                (c) || rcu_read_lock_bh_held(), __rcu)

/**
 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
 * @p: The pointer to read, prior to dereferencing
 * @c: The conditions under which the dereference will take place
 *
 * This is the RCU-sched counterpart to rcu_dereference_check().
 * However, please note that starting in v5.0 kernels, vanilla RCU grace
 * periods wait for preempt_disable() regions of code in addition to
 * regions of code demarked by rcu_read_lock() and rcu_read_unlock().
 * This means that synchronize_rcu(), call_rcu, and friends all take not
 * only rcu_read_lock() but also rcu_read_lock_sched() into account.
 */
#define rcu_dereference_sched_check(p, c) \
        __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
                                (c) || rcu_read_lock_sched_held(), \
                                __rcu)

/*
 * The tracing infrastructure traces RCU (we want that), but unfortunately
 * some of the RCU checks causes tracing to lock up the system.
 *
 * The no-tracing version of rcu_dereference_raw() must not call
 * rcu_read_lock_held().
 */
#define rcu_dereference_raw_check(p) \
        __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu)

/**
 * rcu_dereference_protected() - fetch RCU pointer when updates prevented
 * @p: The pointer to read, prior to dereferencing
 * @c: The conditions under which the dereference will take place
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * the READ_ONCE().  This is useful in cases where update-side locks
 * prevent the value of the pointer from changing.  Please note that this
 * primitive does *not* prevent the compiler from repeating this reference
 * or combining it with other references, so it should not be used without
 * protection of appropriate locks.
 *
 * This function is only for update-side use.  Using this function
 * when protected only by rcu_read_lock() will result in infrequent
 * but very ugly failures.
 */
#define rcu_dereference_protected(p, c) \
        __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu)


/**
 * rcu_dereference() - fetch RCU-protected pointer for dereferencing
 * @p: The pointer to read, prior to dereferencing
 *
 * This is a simple wrapper around rcu_dereference_check().
 */
#define rcu_dereference(p) rcu_dereference_check(p, 0)

/**
 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
 * @p: The pointer to read, prior to dereferencing
 *
 * Makes rcu_dereference_check() do the dirty work.
 */
#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)

/**
 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
 * @p: The pointer to read, prior to dereferencing
 *
 * Makes rcu_dereference_check() do the dirty work.
 */
#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)

/**
 * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
 * @p: The pointer to hand off
 *
 * This is simply an identity function, but it documents where a pointer
 * is handed off from RCU to some other synchronization mechanism, for
 * example, reference counting or locking.  In C11, it would map to
 * kill_dependency().  It could be used as follows::
 *
 *        rcu_read_lock();
 *        p = rcu_dereference(gp);
 *        long_lived = is_long_lived(p);
 *        if (long_lived) {
 *                if (!atomic_inc_not_zero(p->refcnt))
 *                        long_lived = false;
 *                else
 *                        p = rcu_pointer_handoff(p);
 *        }
 *        rcu_read_unlock();
 */
#define rcu_pointer_handoff(p) (p)

/**
 * rcu_read_lock() - mark the beginning of an RCU read-side critical section
 *
 * When synchronize_rcu() is invoked on one CPU while other CPUs
 * are within RCU read-side critical sections, then the
 * synchronize_rcu() is guaranteed to block until after all the other
 * CPUs exit their critical sections.  Similarly, if call_rcu() is invoked
 * on one CPU while other CPUs are within RCU read-side critical
 * sections, invocation of the corresponding RCU callback is deferred
 * until after the all the other CPUs exit their critical sections.
 *
 * In v5.0 and later kernels, synchronize_rcu() and call_rcu() also
 * wait for regions of code with preemption disabled, including regions of
 * code with interrupts or softirqs disabled.  In pre-v5.0 kernels, which
 * define synchronize_sched(), only code enclosed within rcu_read_lock()
 * and rcu_read_unlock() are guaranteed to be waited for.
 *
 * Note, however, that RCU callbacks are permitted to run concurrently
 * with new RCU read-side critical sections.  One way that this can happen
 * is via the following sequence of events: (1) CPU 0 enters an RCU
 * read-side critical section, (2) CPU 1 invokes call_rcu() to register
 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
 * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
 * callback is invoked.  This is legal, because the RCU read-side critical
 * section that was running concurrently with the call_rcu() (and which
 * therefore might be referencing something that the corresponding RCU
 * callback would free up) has completed before the corresponding
 * RCU callback is invoked.
 *
 * RCU read-side critical sections may be nested.  Any deferred actions
 * will be deferred until the outermost RCU read-side critical section
 * completes.
 *
 * You can avoid reading and understanding the next paragraph by
 * following this rule: don't put anything in an rcu_read_lock() RCU
 * read-side critical section that would block in a !PREEMPTION kernel.
 * But if you want the full story, read on!
 *
 * In non-preemptible RCU implementations (pure TREE_RCU and TINY_RCU),
 * it is illegal to block while in an RCU read-side critical section.
 * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION
 * kernel builds, RCU read-side critical sections may be preempted,
 * but explicit blocking is illegal.  Finally, in preemptible RCU
 * implementations in real-time (with -rt patchset) kernel builds, RCU
 * read-side critical sections may be preempted and they may also block, but
 * only when acquiring spinlocks that are subject to priority inheritance.
 */
static __always_inline void rcu_read_lock(void)
{
        __rcu_read_lock();
        __acquire(RCU);
        rcu_lock_acquire(&rcu_lock_map);
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_lock() used illegally while idle");
}

/*
 * So where is rcu_write_lock()?  It does not exist, as there is no
 * way for writers to lock out RCU readers.  This is a feature, not
 * a bug -- this property is what provides RCU's performance benefits.
 * Of course, writers must coordinate with each other.  The normal
 * spinlock primitives work well for this, but any other technique may be
 * used as well.  RCU does not care how the writers keep out of each
 * others' way, as long as they do so.
 */

/**
 * rcu_read_unlock() - marks the end of an RCU read-side critical section.
 *
 * In almost all situations, rcu_read_unlock() is immune from deadlock.
 * In recent kernels that have consolidated synchronize_sched() and
 * synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity
 * also extends to the scheduler's runqueue and priority-inheritance
 * spinlocks, courtesy of the quiescent-state deferral that is carried
 * out when rcu_read_unlock() is invoked with interrupts disabled.
 *
 * See rcu_read_lock() for more information.
 */
static inline void rcu_read_unlock(void)
{
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_unlock() used illegally while idle");
        __release(RCU);
        __rcu_read_unlock();
        rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */
}

/**
 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
 *
 * This is equivalent to rcu_read_lock(), but also disables softirqs.
 * Note that anything else that disables softirqs can also serve as an RCU
 * read-side critical section.  However, please note that this equivalence
 * applies only to v5.0 and later.  Before v5.0, rcu_read_lock() and
 * rcu_read_lock_bh() were unrelated.
 *
 * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
 * must occur in the same context, for example, it is illegal to invoke
 * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
 * was invoked from some other task.
 */
static inline void rcu_read_lock_bh(void)
{
        local_bh_disable();
        __acquire(RCU_BH);
        rcu_lock_acquire(&rcu_bh_lock_map);
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_lock_bh() used illegally while idle");
}

/**
 * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section
 *
 * See rcu_read_lock_bh() for more information.
 */
static inline void rcu_read_unlock_bh(void)
{
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_unlock_bh() used illegally while idle");
        rcu_lock_release(&rcu_bh_lock_map);
        __release(RCU_BH);
        local_bh_enable();
}

/**
 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
 *
 * This is equivalent to rcu_read_lock(), but also disables preemption.
 * Read-side critical sections can also be introduced by anything else that
 * disables preemption, including local_irq_disable() and friends.  However,
 * please note that the equivalence to rcu_read_lock() applies only to
 * v5.0 and later.  Before v5.0, rcu_read_lock() and rcu_read_lock_sched()
 * were unrelated.
 *
 * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
 * must occur in the same context, for example, it is illegal to invoke
 * rcu_read_unlock_sched() from process context if the matching
 * rcu_read_lock_sched() was invoked from an NMI handler.
 */
static inline void rcu_read_lock_sched(void)
{
        preempt_disable();
        __acquire(RCU_SCHED);
        rcu_lock_acquire(&rcu_sched_lock_map);
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_lock_sched() used illegally while idle");
}

/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
static inline notrace void rcu_read_lock_sched_notrace(void)
{
        preempt_disable_notrace();
        __acquire(RCU_SCHED);
}

/**
 * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section
 *
 * See rcu_read_lock_sched() for more information.
 */
static inline void rcu_read_unlock_sched(void)
{
        RCU_LOCKDEP_WARN(!rcu_is_watching(),
                         "rcu_read_unlock_sched() used illegally while idle");
        rcu_lock_release(&rcu_sched_lock_map);
        __release(RCU_SCHED);
        preempt_enable();
}

/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
static inline notrace void rcu_read_unlock_sched_notrace(void)
{
        __release(RCU_SCHED);
        preempt_enable_notrace();
}

/**
 * RCU_INIT_POINTER() - initialize an RCU protected pointer
 * @p: The pointer to be initialized.
 * @v: The value to initialized the pointer to.
 *
 * Initialize an RCU-protected pointer in special cases where readers
 * do not need ordering constraints on the CPU or the compiler.  These
 * special cases are:
 *
 * 1.        This use of RCU_INIT_POINTER() is NULLing out the pointer *or*
 * 2.        The caller has taken whatever steps are required to prevent
 *        RCU readers from concurrently accessing this pointer *or*
 * 3.        The referenced data structure has already been exposed to
 *        readers either at compile time or via rcu_assign_pointer() *and*
 *
 *        a.        You have not made *any* reader-visible changes to
 *                this structure since then *or*
 *        b.        It is OK for readers accessing this structure from its
 *                new location to see the old state of the structure.  (For
 *                example, the changes were to statistical counters or to
 *                other state where exact synchronization is not required.)
 *
 * Failure to follow these rules governing use of RCU_INIT_POINTER() will
 * result in impossible-to-diagnose memory corruption.  As in the structures
 * will look OK in crash dumps, but any concurrent RCU readers might
 * see pre-initialized values of the referenced data structure.  So
 * please be very careful how you use RCU_INIT_POINTER()!!!
 *
 * If you are creating an RCU-protected linked structure that is accessed
 * by a single external-to-structure RCU-protected pointer, then you may
 * use RCU_INIT_POINTER() to initialize the internal RCU-protected
 * pointers, but you must use rcu_assign_pointer() to initialize the
 * external-to-structure pointer *after* you have completely initialized
 * the reader-accessible portions of the linked structure.
 *
 * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no
 * ordering guarantees for either the CPU or the compiler.
 */
#define RCU_INIT_POINTER(p, v) \
        do { \
                rcu_check_sparse(p, __rcu); \
                WRITE_ONCE(p, RCU_INITIALIZER(v)); \
        } while (0)

/**
 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
 * @p: The pointer to be initialized.
 * @v: The value to initialized the pointer to.
 *
 * GCC-style initialization for an RCU-protected pointer in a structure field.
 */
#define RCU_POINTER_INITIALIZER(p, v) \
                .p = RCU_INITIALIZER(v)

/*
 * Does the specified offset indicate that the corresponding rcu_head
 * structure can be handled by kvfree_rcu()?
 */
#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)

/**
 * kfree_rcu() - kfree an object after a grace period.
 * @ptr: pointer to kfree for double-argument invocations.
 * @rhf: the name of the struct rcu_head within the type of @ptr.
 *
 * Many rcu callbacks functions just call kfree() on the base structure.
 * These functions are trivial, but their size adds up, and furthermore
 * when they are used in a kernel module, that module must invoke the
 * high-latency rcu_barrier() function at module-unload time.
 *
 * The kfree_rcu() function handles this issue.  Rather than encoding a
 * function address in the embedded rcu_head structure, kfree_rcu() instead
 * encodes the offset of the rcu_head structure within the base structure.
 * Because the functions are not allowed in the low-order 4096 bytes of
 * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
 * If the offset is larger than 4095 bytes, a compile-time error will
 * be generated in kvfree_rcu_arg_2(). If this error is triggered, you can
 * either fall back to use of call_rcu() or rearrange the structure to
 * position the rcu_head structure into the first 4096 bytes.
 *
 * The object to be freed can be allocated either by kmalloc() or
 * kmem_cache_alloc().
 *
 * Note that the allowable offset might decrease in the future.
 *
 * The BUILD_BUG_ON check must not involve any function calls, hence the
 * checks are done in macros here.
 */
#define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)
#define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)

/**
 * kfree_rcu_mightsleep() - kfree an object after a grace period.
 * @ptr: pointer to kfree for single-argument invocations.
 *
 * When it comes to head-less variant, only one argument
 * is passed and that is just a pointer which has to be
 * freed after a grace period. Therefore the semantic is
 *
 *     kfree_rcu_mightsleep(ptr);
 *
 * where @ptr is the pointer to be freed by kvfree().
 *
 * Please note, head-less way of freeing is permitted to
 * use from a context that has to follow might_sleep()
 * annotation. Otherwise, please switch and embed the
 * rcu_head structure within the type of @ptr.
 */
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)

#define kvfree_rcu_arg_2(ptr, rhf)                                        \
do {                                                                        \
        typeof (ptr) ___p = (ptr);                                        \
                                                                        \
        if (___p) {                                                                        \
                BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf)));        \
                kvfree_call_rcu(&((___p)->rhf), (void *) (___p));                        \
        }                                                                                \
} while (0)

#define kvfree_rcu_arg_1(ptr)                                        \
do {                                                                \
        typeof(ptr) ___p = (ptr);                                \
                                                                \
        if (___p)                                                \
                kvfree_call_rcu(NULL, (void *) (___p));                \
} while (0)

/*
 * Place this after a lock-acquisition primitive to guarantee that
 * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
 * if the UNLOCK and LOCK are executed by the same CPU or if the
 * UNLOCK and LOCK operate on the same lock variable.
 */
#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
#define smp_mb__after_unlock_lock()        smp_mb()  /* Full ordering for lock. */
#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
#define smp_mb__after_unlock_lock()        do { } while (0)
#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */


/* Has the specified rcu_head structure been handed to call_rcu()? */

/**
 * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
 * @rhp: The rcu_head structure to initialize.
 *
 * If you intend to invoke rcu_head_after_call_rcu() to test whether a
 * given rcu_head structure has already been passed to call_rcu(), then
 * you must also invoke this rcu_head_init() function on it just after
 * allocating that structure.  Calls to this function must not race with
 * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
 */
static inline void rcu_head_init(struct rcu_head *rhp)
{
        rhp->func = (rcu_callback_t)~0L;
}

/**
 * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()?
 * @rhp: The rcu_head structure to test.
 * @f: The function passed to call_rcu() along with @rhp.
 *
 * Returns @true if the @rhp has been passed to call_rcu() with @func,
 * and @false otherwise.  Emits a warning in any other case, including
 * the case where @rhp has already been invoked after a grace period.
 * Calls to this function must not race with callback invocation.  One way
 * to avoid such races is to enclose the call to rcu_head_after_call_rcu()
 * in an RCU read-side critical section that includes a read-side fetch
 * of the pointer to the structure containing @rhp.
 */
static inline bool
rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
{
        rcu_callback_t func = READ_ONCE(rhp->func);

        if (func == f)
                return true;
        WARN_ON_ONCE(func != (rcu_callback_t)~0L);
        return false;
}

/* kernel/ksysfs.c definitions */
extern int rcu_expedited;
extern int rcu_normal;

DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())

#endif /* __LINUX_RCUPDATE_H */














































































































































   13 
















   13 

































































































































   13 







































































   13 
   13 
   13 












   13 

































































































































































































































   13 

   13 
   13 

   13 
   13 








   13 


   13 

   13 
   13 






















   13 
























   13 
   13 
   13 





































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Resizable, Scalable, Concurrent Hash Table
 *
 * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
 *
 * Code partially derived from nft_hash
 * Rewritten with rehash code from br_multicast plus single list
 * pointer as suggested by Josh Triplett
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#ifndef _LINUX_RHASHTABLE_H
#define _LINUX_RHASHTABLE_H

#include <linux/err.h>
#include <linux/errno.h>
#include <linux/jhash.h>
#include <linux/list_nulls.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
#include <linux/bit_spinlock.h>

#include <linux/rhashtable-types.h>
/*
 * Objects in an rhashtable have an embedded struct rhash_head
 * which is linked into as hash chain from the hash table - or one
 * of two or more hash tables when the rhashtable is being resized.
 * The end of the chain is marked with a special nulls marks which has
 * the least significant bit set but otherwise stores the address of
 * the hash bucket.  This allows us to be sure we've found the end
 * of the right list.
 * The value stored in the hash bucket has BIT(0) used as a lock bit.
 * This bit must be atomically set before any changes are made to
 * the chain.  To avoid dereferencing this pointer without clearing
 * the bit first, we use an opaque 'struct rhash_lock_head *' for the
 * pointer stored in the bucket.  This struct needs to be defined so
 * that rcu_dereference() works on it, but it has no content so a
 * cast is needed for it to be useful.  This ensures it isn't
 * used by mistake with clearing the lock bit first.
 */
struct rhash_lock_head {};

/* Maximum chain length before rehash
 *
 * The maximum (not average) chain length grows with the size of the hash
 * table, at a rate of (log N)/(log log N).
 *
 * The value of 16 is selected so that even if the hash table grew to
 * 2^32 you would not expect the maximum chain length to exceed it
 * unless we are under attack (or extremely unlucky).
 *
 * As this limit is only to detect attacks, we don't need to set it to a
 * lower value as you'd need the chain length to vastly exceed 16 to have
 * any real effect on the system.
 */
#define RHT_ELASTICITY        16u

/**
 * struct bucket_table - Table of hash buckets
 * @size: Number of hash buckets
 * @nest: Number of bits of first-level nested table.
 * @rehash: Current bucket being rehashed
 * @hash_rnd: Random seed to fold into hash
 * @walkers: List of active walkers
 * @rcu: RCU structure for freeing the table
 * @future_tbl: Table under construction during rehashing
 * @ntbl: Nested table used when out of memory.
 * @buckets: size * hash buckets
 */
struct bucket_table {
        unsigned int                size;
        unsigned int                nest;
        u32                        hash_rnd;
        struct list_head        walkers;
        struct rcu_head                rcu;

        struct bucket_table __rcu *future_tbl;

        struct lockdep_map        dep_map;

        struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
};

/*
 * NULLS_MARKER() expects a hash value with the low
 * bits mostly likely to be significant, and it discards
 * the msb.
 * We give it an address, in which the bottom bit is
 * always 0, and the msb might be significant.
 * So we shift the address down one bit to align with
 * expectations and avoid losing a significant bit.
 *
 * We never store the NULLS_MARKER in the hash table
 * itself as we need the lsb for locking.
 * Instead we store a NULL
 */
#define        RHT_NULLS_MARKER(ptr)        \
        ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
#define INIT_RHT_NULLS_HEAD(ptr)        \
        ((ptr) = NULL)

static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
        return ((unsigned long) ptr & 1);
}

static inline void *rht_obj(const struct rhashtable *ht,
                            const struct rhash_head *he)
{
        return (char *)he - ht->p.head_offset;
}

static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
                                            unsigned int hash)
{
        return hash & (tbl->size - 1);
}

static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
        const void *key, const struct rhashtable_params params,
        unsigned int hash_rnd)
{
        unsigned int hash;

        /* params must be equal to ht->p if it isn't constant. */
        if (!__builtin_constant_p(params.key_len))
                hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
        else if (params.key_len) {
                unsigned int key_len = params.key_len;

                if (params.hashfn)
                        hash = params.hashfn(key, key_len, hash_rnd);
                else if (key_len & (sizeof(u32) - 1))
                        hash = jhash(key, key_len, hash_rnd);
                else
                        hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
        } else {
                unsigned int key_len = ht->p.key_len;

                if (params.hashfn)
                        hash = params.hashfn(key, key_len, hash_rnd);
                else
                        hash = jhash(key, key_len, hash_rnd);
        }

        return hash;
}

static inline unsigned int rht_key_hashfn(
        struct rhashtable *ht, const struct bucket_table *tbl,
        const void *key, const struct rhashtable_params params)
{
        unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);

        return rht_bucket_index(tbl, hash);
}

static inline unsigned int rht_head_hashfn(
        struct rhashtable *ht, const struct bucket_table *tbl,
        const struct rhash_head *he, const struct rhashtable_params params)
{
        const char *ptr = rht_obj(ht, he);

        return likely(params.obj_hashfn) ?
               rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
                                                            ht->p.key_len,
                                                       tbl->hash_rnd)) :
               rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
}

/**
 * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
 * @ht:                hash table
 * @tbl:        current table
 */
static inline bool rht_grow_above_75(const struct rhashtable *ht,
                                     const struct bucket_table *tbl)
{
        /* Expand table when exceeding 75% load */
        return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
               (!ht->p.max_size || tbl->size < ht->p.max_size);
}

/**
 * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
 * @ht:                hash table
 * @tbl:        current table
 */
static inline bool rht_shrink_below_30(const struct rhashtable *ht,
                                       const struct bucket_table *tbl)
{
        /* Shrink table beneath 30% load */
        return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
               tbl->size > ht->p.min_size;
}

/**
 * rht_grow_above_100 - returns true if nelems > table-size
 * @ht:                hash table
 * @tbl:        current table
 */
static inline bool rht_grow_above_100(const struct rhashtable *ht,
                                      const struct bucket_table *tbl)
{
        return atomic_read(&ht->nelems) > tbl->size &&
                (!ht->p.max_size || tbl->size < ht->p.max_size);
}

/**
 * rht_grow_above_max - returns true if table is above maximum
 * @ht:                hash table
 * @tbl:        current table
 */
static inline bool rht_grow_above_max(const struct rhashtable *ht,
                                      const struct bucket_table *tbl)
{
        return atomic_read(&ht->nelems) >= ht->max_elems;
}

#ifdef CONFIG_PROVE_LOCKING
int lockdep_rht_mutex_is_held(struct rhashtable *ht);
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
#else
static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
{
        return 1;
}

static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
                                             u32 hash)
{
        return 1;
}
#endif /* CONFIG_PROVE_LOCKING */

void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
                             struct rhash_head *obj);

void rhashtable_walk_enter(struct rhashtable *ht,
                           struct rhashtable_iter *iter);
void rhashtable_walk_exit(struct rhashtable_iter *iter);
int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);

static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
{
        (void)rhashtable_walk_start_check(iter);
}

void *rhashtable_walk_next(struct rhashtable_iter *iter);
void *rhashtable_walk_peek(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);

void rhashtable_free_and_destroy(struct rhashtable *ht,
                                 void (*free_fn)(void *ptr, void *arg),
                                 void *arg);
void rhashtable_destroy(struct rhashtable *ht);

struct rhash_lock_head __rcu **rht_bucket_nested(
        const struct bucket_table *tbl, unsigned int hash);
struct rhash_lock_head __rcu **__rht_bucket_nested(
        const struct bucket_table *tbl, unsigned int hash);
struct rhash_lock_head __rcu **rht_bucket_nested_insert(
        struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);

#define rht_dereference(p, ht) \
        rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))

#define rht_dereference_rcu(p, ht) \
        rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))

#define rht_dereference_bucket(p, tbl, hash) \
        rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))

#define rht_dereference_bucket_rcu(p, tbl, hash) \
        rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))

#define rht_entry(tpos, pos, member) \
        ({ tpos = container_of(pos, typeof(*tpos), member); 1; })

static inline struct rhash_lock_head __rcu *const *rht_bucket(
        const struct bucket_table *tbl, unsigned int hash)
{
        return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
                                     &tbl->buckets[hash];
}

static inline struct rhash_lock_head __rcu **rht_bucket_var(
        struct bucket_table *tbl, unsigned int hash)
{
        return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
                                     &tbl->buckets[hash];
}

static inline struct rhash_lock_head __rcu **rht_bucket_insert(
        struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
        return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
                                     &tbl->buckets[hash];
}

/*
 * We lock a bucket by setting BIT(0) in the pointer - this is always
 * zero in real pointers.  The NULLS mark is never stored in the bucket,
 * rather we store NULL if the bucket is empty.
 * bit_spin_locks do not handle contention well, but the whole point
 * of the hashtable design is to achieve minimum per-bucket contention.
 * A nested hash table might not have a bucket pointer.  In that case
 * we cannot get a lock.  For remove and replace the bucket cannot be
 * interesting and doesn't need locking.
 * For insert we allocate the bucket if this is the last bucket_table,
 * and then take the lock.
 * Sometimes we unlock a bucket by writing a new pointer there.  In that
 * case we don't need to unlock, but we do need to reset state such as
 * local_bh. For that we have rht_assign_unlock().  As rcu_assign_pointer()
 * provides the same release semantics that bit_spin_unlock() provides,
 * this is safe.
 * When we write to a bucket without unlocking, we use rht_assign_locked().
 */

static inline unsigned long rht_lock(struct bucket_table *tbl,
                                     struct rhash_lock_head __rcu **bkt)
{
        unsigned long flags;

        local_irq_save(flags);
        bit_spin_lock(0, (unsigned long *)bkt);
        lock_map_acquire(&tbl->dep_map);
        return flags;
}

static inline unsigned long rht_lock_nested(struct bucket_table *tbl,
                                        struct rhash_lock_head __rcu **bucket,
                                        unsigned int subclass)
{
        unsigned long flags;

        local_irq_save(flags);
        bit_spin_lock(0, (unsigned long *)bucket);
        lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
        return flags;
}

static inline void rht_unlock(struct bucket_table *tbl,
                              struct rhash_lock_head __rcu **bkt,
                              unsigned long flags)
{
        lock_map_release(&tbl->dep_map);
        bit_spin_unlock(0, (unsigned long *)bkt);
        local_irq_restore(flags);
}

static inline struct rhash_head *__rht_ptr(
        struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
{
        return (struct rhash_head *)
                ((unsigned long)p & ~BIT(0) ?:
                 (unsigned long)RHT_NULLS_MARKER(bkt));
}

/*
 * Where 'bkt' is a bucket and might be locked:
 *   rht_ptr_rcu() dereferences that pointer and clears the lock bit.
 *   rht_ptr() dereferences in a context where the bucket is locked.
 *   rht_ptr_exclusive() dereferences in a context where exclusive
 *            access is guaranteed, such as when destroying the table.
 */
static inline struct rhash_head *rht_ptr_rcu(
        struct rhash_lock_head __rcu *const *bkt)
{
        return __rht_ptr(rcu_dereference(*bkt), bkt);
}

static inline struct rhash_head *rht_ptr(
        struct rhash_lock_head __rcu *const *bkt,
        struct bucket_table *tbl,
        unsigned int hash)
{
        return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
}

static inline struct rhash_head *rht_ptr_exclusive(
        struct rhash_lock_head __rcu *const *bkt)
{
        return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt);
}

static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
                                     struct rhash_head *obj)
{
        if (rht_is_a_nulls(obj))
                obj = NULL;
        rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
}

static inline void rht_assign_unlock(struct bucket_table *tbl,
                                     struct rhash_lock_head __rcu **bkt,
                                     struct rhash_head *obj,
                                     unsigned long flags)
{
        if (rht_is_a_nulls(obj))
                obj = NULL;
        lock_map_release(&tbl->dep_map);
        rcu_assign_pointer(*bkt, (void *)obj);
        preempt_enable();
        __release(bitlock);
        local_irq_restore(flags);
}

/**
 * rht_for_each_from - iterate over hash chain from given head
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @head:        the &struct rhash_head to start from
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 */
#define rht_for_each_from(pos, head, tbl, hash) \
        for (pos = head;                        \
             !rht_is_a_nulls(pos);                \
             pos = rht_dereference_bucket((pos)->next, tbl, hash))

/**
 * rht_for_each - iterate over hash chain
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 */
#define rht_for_each(pos, tbl, hash) \
        rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash),  \
                          tbl, hash)

/**
 * rht_for_each_entry_from - iterate over hash chain from given head
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @head:        the &struct rhash_head to start from
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 * @member:        name of the &struct rhash_head within the hashable struct.
 */
#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member)        \
        for (pos = head;                                                \
             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
             pos = rht_dereference_bucket((pos)->next, tbl, hash))

/**
 * rht_for_each_entry - iterate over hash chain of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 * @member:        name of the &struct rhash_head within the hashable struct.
 */
#define rht_for_each_entry(tpos, pos, tbl, hash, member)                \
        rht_for_each_entry_from(tpos, pos,                                \
                                rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
                                tbl, hash, member)

/**
 * rht_for_each_entry_safe - safely iterate over hash chain of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @next:        the &struct rhash_head to use as next in loop cursor.
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 * @member:        name of the &struct rhash_head within the hashable struct.
 *
 * This hash chain list-traversal primitive allows for the looped code to
 * remove the loop cursor from the list.
 */
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)              \
        for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash),                      \
             next = !rht_is_a_nulls(pos) ?                                      \
                       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);              \
             pos = next,                                                      \
             next = !rht_is_a_nulls(pos) ?                                      \
                       rht_dereference_bucket(pos->next, tbl, hash) : NULL)

/**
 * rht_for_each_rcu_from - iterate over rcu hash chain from given head
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @head:        the &struct rhash_head to start from
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_rcu_from(pos, head, tbl, hash)                        \
        for (({barrier(); }),                                                \
             pos = head;                                                \
             !rht_is_a_nulls(pos);                                        \
             pos = rcu_dereference_raw(pos->next))

/**
 * rht_for_each_rcu - iterate over rcu hash chain
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_rcu(pos, tbl, hash)                        \
        for (({barrier(); }),                                        \
             pos = rht_ptr_rcu(rht_bucket(tbl, hash));                \
             !rht_is_a_nulls(pos);                                \
             pos = rcu_dereference_raw(pos->next))

/**
 * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @head:        the &struct rhash_head to start from
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 * @member:        name of the &struct rhash_head within the hashable struct.
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
        for (({barrier(); }),                                                    \
             pos = head;                                                    \
             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);            \
             pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))

/**
 * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rhash_head to use as a loop cursor.
 * @tbl:        the &struct bucket_table
 * @hash:        the hash value / bucket index
 * @member:        name of the &struct rhash_head within the hashable struct.
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)                   \
        rht_for_each_entry_rcu_from(tpos, pos,                                   \
                                    rht_ptr_rcu(rht_bucket(tbl, hash)),           \
                                    tbl, hash, member)

/**
 * rhl_for_each_rcu - iterate over rcu hash table list
 * @pos:        the &struct rlist_head to use as a loop cursor.
 * @list:        the head of the list
 *
 * This hash chain list-traversal primitive should be used on the
 * list returned by rhltable_lookup.
 */
#define rhl_for_each_rcu(pos, list)                                        \
        for (pos = list; pos; pos = rcu_dereference_raw(pos->next))

/**
 * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
 * @tpos:        the type * to use as a loop cursor.
 * @pos:        the &struct rlist_head to use as a loop cursor.
 * @list:        the head of the list
 * @member:        name of the &struct rlist_head within the hashable struct.
 *
 * This hash chain list-traversal primitive should be used on the
 * list returned by rhltable_lookup.
 */
#define rhl_for_each_entry_rcu(tpos, pos, list, member)                        \
        for (pos = list; pos && rht_entry(tpos, pos, member);                \
             pos = rcu_dereference_raw(pos->next))

static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
                                     const void *obj)
{
        struct rhashtable *ht = arg->ht;
        const char *ptr = obj;

        return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
}

/* Internal function, do not use. */
static inline struct rhash_head *__rhashtable_lookup(
        struct rhashtable *ht, const void *key,
        const struct rhashtable_params params)
{
        struct rhashtable_compare_arg arg = {
                .ht = ht,
                .key = key,
        };
        struct rhash_lock_head __rcu *const *bkt;
        struct bucket_table *tbl;
        struct rhash_head *he;
        unsigned int hash;

        tbl = rht_dereference_rcu(ht->tbl, ht);
restart:
        hash = rht_key_hashfn(ht, tbl, key, params);
        bkt = rht_bucket(tbl, hash);
        do {
                rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
                        if (params.obj_cmpfn ?
                            params.obj_cmpfn(&arg, rht_obj(ht, he)) :
                            rhashtable_compare(&arg, rht_obj(ht, he)))
                                continue;
                        return he;
                }
                /* An object might have been moved to a different hash chain,
                 * while we walk along it - better check and retry.
                 */
        } while (he != RHT_NULLS_MARKER(bkt));

        /* Ensure we see any new tables. */
        smp_rmb();

        tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        if (unlikely(tbl))
                goto restart;

        return NULL;
}

/**
 * rhashtable_lookup - search hash table
 * @ht:                hash table
 * @key:        the pointer to the key
 * @params:        hash table parameters
 *
 * Computes the hash value for the key and traverses the bucket chain looking
 * for a entry with an identical key. The first matching entry is returned.
 *
 * This must only be called under the RCU read lock.
 *
 * Returns the first entry on which the compare function returned true.
 */
static inline void *rhashtable_lookup(
        struct rhashtable *ht, const void *key,
        const struct rhashtable_params params)
{
        struct rhash_head *he = __rhashtable_lookup(ht, key, params);

        return he ? rht_obj(ht, he) : NULL;
}

/**
 * rhashtable_lookup_fast - search hash table, without RCU read lock
 * @ht:                hash table
 * @key:        the pointer to the key
 * @params:        hash table parameters
 *
 * Computes the hash value for the key and traverses the bucket chain looking
 * for a entry with an identical key. The first matching entry is returned.
 *
 * Only use this function when you have other mechanisms guaranteeing
 * that the object won't go away after the RCU read lock is released.
 *
 * Returns the first entry on which the compare function returned true.
 */
static inline void *rhashtable_lookup_fast(
        struct rhashtable *ht, const void *key,
        const struct rhashtable_params params)
{
        void *obj;

        rcu_read_lock();
        obj = rhashtable_lookup(ht, key, params);
        rcu_read_unlock();

        return obj;
}

/**
 * rhltable_lookup - search hash list table
 * @hlt:        hash table
 * @key:        the pointer to the key
 * @params:        hash table parameters
 *
 * Computes the hash value for the key and traverses the bucket chain looking
 * for a entry with an identical key.  All matching entries are returned
 * in a list.
 *
 * This must only be called under the RCU read lock.
 *
 * Returns the list of entries that match the given key.
 */
static inline struct rhlist_head *rhltable_lookup(
        struct rhltable *hlt, const void *key,
        const struct rhashtable_params params)
{
        struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);

        return he ? container_of(he, struct rhlist_head, rhead) : NULL;
}

/* Internal function, please use rhashtable_insert_fast() instead. This
 * function returns the existing element already in hashes in there is a clash,
 * otherwise it returns an error via ERR_PTR().
 */
static inline void *__rhashtable_insert_fast(
        struct rhashtable *ht, const void *key, struct rhash_head *obj,
        const struct rhashtable_params params, bool rhlist)
{
        struct rhashtable_compare_arg arg = {
                .ht = ht,
                .key = key,
        };
        struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct bucket_table *tbl;
        struct rhash_head *head;
        unsigned long flags;
        unsigned int hash;
        int elasticity;
        void *data;

        rcu_read_lock();

        tbl = rht_dereference_rcu(ht->tbl, ht);
        hash = rht_head_hashfn(ht, tbl, obj, params);
        elasticity = RHT_ELASTICITY;
        bkt = rht_bucket_insert(ht, tbl, hash);
        data = ERR_PTR(-ENOMEM);
        if (!bkt)
                goto out;
        pprev = NULL;
        flags = rht_lock(tbl, bkt);

        if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
slow_path:
                rht_unlock(tbl, bkt, flags);
                rcu_read_unlock();
                return rhashtable_insert_slow(ht, key, obj);
        }

        rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
                struct rhlist_head *plist;
                struct rhlist_head *list;

                elasticity--;
                if (!key ||
                    (params.obj_cmpfn ?
                     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
                     rhashtable_compare(&arg, rht_obj(ht, head)))) {
                        pprev = &head->next;
                        continue;
                }

                data = rht_obj(ht, head);

                if (!rhlist)
                        goto out_unlock;


                list = container_of(obj, struct rhlist_head, rhead);
                plist = container_of(head, struct rhlist_head, rhead);

                RCU_INIT_POINTER(list->next, plist);
                head = rht_dereference_bucket(head->next, tbl, hash);
                RCU_INIT_POINTER(list->rhead.next, head);
                if (pprev) {
                        rcu_assign_pointer(*pprev, obj);
                        rht_unlock(tbl, bkt, flags);
                } else
                        rht_assign_unlock(tbl, bkt, obj, flags);
                data = NULL;
                goto out;
        }

        if (elasticity <= 0)
                goto slow_path;

        data = ERR_PTR(-E2BIG);
        if (unlikely(rht_grow_above_max(ht, tbl)))
                goto out_unlock;

        if (unlikely(rht_grow_above_100(ht, tbl)))
                goto slow_path;

        /* Inserting at head of list makes unlocking free. */
        head = rht_ptr(bkt, tbl, hash);

        RCU_INIT_POINTER(obj->next, head);
        if (rhlist) {
                struct rhlist_head *list;

                list = container_of(obj, struct rhlist_head, rhead);
                RCU_INIT_POINTER(list->next, NULL);
        }

        atomic_inc(&ht->nelems);
        rht_assign_unlock(tbl, bkt, obj, flags);

        if (rht_grow_above_75(ht, tbl))
                schedule_work(&ht->run_work);

        data = NULL;
out:
        rcu_read_unlock();

        return data;

out_unlock:
        rht_unlock(tbl, bkt, flags);
        goto out;
}

/**
 * rhashtable_insert_fast - insert object into hash table
 * @ht:                hash table
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * Will take the per bucket bitlock to protect against mutual mutations
 * on the same bucket. Multiple insertions may occur in parallel unless
 * they map to the same bucket.
 *
 * It is safe to call this function from atomic context.
 *
 * Will trigger an automatic deferred table resizing if residency in the
 * table grows beyond 70%.
 */
static inline int rhashtable_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        void *ret;

        ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
        if (IS_ERR(ret))
                return PTR_ERR(ret);

        return ret == NULL ? 0 : -EEXIST;
}

/**
 * rhltable_insert_key - insert object into hash list table
 * @hlt:        hash list table
 * @key:        the pointer to the key
 * @list:        pointer to hash list head inside object
 * @params:        hash table parameters
 *
 * Will take the per bucket bitlock to protect against mutual mutations
 * on the same bucket. Multiple insertions may occur in parallel unless
 * they map to the same bucket.
 *
 * It is safe to call this function from atomic context.
 *
 * Will trigger an automatic deferred table resizing if residency in the
 * table grows beyond 70%.
 */
static inline int rhltable_insert_key(
        struct rhltable *hlt, const void *key, struct rhlist_head *list,
        const struct rhashtable_params params)
{
        return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
                                                params, true));
}

/**
 * rhltable_insert - insert object into hash list table
 * @hlt:        hash list table
 * @list:        pointer to hash list head inside object
 * @params:        hash table parameters
 *
 * Will take the per bucket bitlock to protect against mutual mutations
 * on the same bucket. Multiple insertions may occur in parallel unless
 * they map to the same bucket.
 *
 * It is safe to call this function from atomic context.
 *
 * Will trigger an automatic deferred table resizing if residency in the
 * table grows beyond 70%.
 */
static inline int rhltable_insert(
        struct rhltable *hlt, struct rhlist_head *list,
        const struct rhashtable_params params)
{
        const char *key = rht_obj(&hlt->ht, &list->rhead);

        key += params.key_offset;

        return rhltable_insert_key(hlt, key, list, params);
}

/**
 * rhashtable_lookup_insert_fast - lookup and insert object into hash table
 * @ht:                hash table
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * This lookup function may only be used for fixed key hash table (key_len
 * parameter set). It will BUG() if used inappropriately.
 *
 * It is safe to call this function from atomic context.
 *
 * Will trigger an automatic deferred table resizing if residency in the
 * table grows beyond 70%.
 */
static inline int rhashtable_lookup_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        const char *key = rht_obj(ht, obj);
        void *ret;

        BUG_ON(ht->p.obj_hashfn);

        ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
                                       false);
        if (IS_ERR(ret))
                return PTR_ERR(ret);

        return ret == NULL ? 0 : -EEXIST;
}

/**
 * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
 * @ht:                hash table
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * Just like rhashtable_lookup_insert_fast(), but this function returns the
 * object if it exists, NULL if it did not and the insertion was successful,
 * and an ERR_PTR otherwise.
 */
static inline void *rhashtable_lookup_get_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        const char *key = rht_obj(ht, obj);

        BUG_ON(ht->p.obj_hashfn);

        return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
                                        false);
}

/**
 * rhashtable_lookup_insert_key - search and insert object to hash table
 *                                  with explicit key
 * @ht:                hash table
 * @key:        key
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * Lookups may occur in parallel with hashtable mutations and resizing.
 *
 * Will trigger an automatic deferred table resizing if residency in the
 * table grows beyond 70%.
 *
 * Returns zero on success.
 */
static inline int rhashtable_lookup_insert_key(
        struct rhashtable *ht, const void *key, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        void *ret;

        BUG_ON(!ht->p.obj_hashfn || !key);

        ret = __rhashtable_insert_fast(ht, key, obj, params, false);
        if (IS_ERR(ret))
                return PTR_ERR(ret);

        return ret == NULL ? 0 : -EEXIST;
}

/**
 * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
 * @ht:                hash table
 * @key:        key
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * Just like rhashtable_lookup_insert_key(), but this function returns the
 * object if it exists, NULL if it does not and the insertion was successful,
 * and an ERR_PTR otherwise.
 */
static inline void *rhashtable_lookup_get_insert_key(
        struct rhashtable *ht, const void *key, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        BUG_ON(!ht->p.obj_hashfn || !key);

        return __rhashtable_insert_fast(ht, key, obj, params, false);
}

/* Internal function, please use rhashtable_remove_fast() instead */
static inline int __rhashtable_remove_fast_one(
        struct rhashtable *ht, struct bucket_table *tbl,
        struct rhash_head *obj, const struct rhashtable_params params,
        bool rhlist)
{
        struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
        unsigned long flags;
        unsigned int hash;
        int err = -ENOENT;

        hash = rht_head_hashfn(ht, tbl, obj, params);
        bkt = rht_bucket_var(tbl, hash);
        if (!bkt)
                return -ENOENT;
        pprev = NULL;
        flags = rht_lock(tbl, bkt);

        rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
                struct rhlist_head *list;

                list = container_of(he, struct rhlist_head, rhead);

                if (he != obj) {
                        struct rhlist_head __rcu **lpprev;

                        pprev = &he->next;

                        if (!rhlist)
                                continue;

                        do {
                                lpprev = &list->next;
                                list = rht_dereference_bucket(list->next,
                                                              tbl, hash);
                        } while (list && obj != &list->rhead);

                        if (!list)
                                continue;

                        list = rht_dereference_bucket(list->next, tbl, hash);
                        RCU_INIT_POINTER(*lpprev, list);
                        err = 0;
                        break;
                }

                obj = rht_dereference_bucket(obj->next, tbl, hash);
                err = 1;

                if (rhlist) {
                        list = rht_dereference_bucket(list->next, tbl, hash);
                        if (list) {
                                RCU_INIT_POINTER(list->rhead.next, obj);
                                obj = &list->rhead;
                                err = 0;
                        }
                }

                if (pprev) {
                        rcu_assign_pointer(*pprev, obj);
                        rht_unlock(tbl, bkt, flags);
                } else {
                        rht_assign_unlock(tbl, bkt, obj, flags);
                }
                goto unlocked;
        }

        rht_unlock(tbl, bkt, flags);
unlocked:
        if (err > 0) {
                atomic_dec(&ht->nelems);
                if (unlikely(ht->p.automatic_shrinking &&
                             rht_shrink_below_30(ht, tbl)))
                        schedule_work(&ht->run_work);
                err = 0;
        }

        return err;
}

/* Internal function, please use rhashtable_remove_fast() instead */
static inline int __rhashtable_remove_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params, bool rhlist)
{
        struct bucket_table *tbl;
        int err;

        rcu_read_lock();

        tbl = rht_dereference_rcu(ht->tbl, ht);

        /* Because we have already taken (and released) the bucket
         * lock in old_tbl, if we find that future_tbl is not yet
         * visible then that guarantees the entry to still be in
         * the old tbl if it exists.
         */
        while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
                                                   rhlist)) &&
               (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
                ;

        rcu_read_unlock();

        return err;
}

/**
 * rhashtable_remove_fast - remove object from hash table
 * @ht:                hash table
 * @obj:        pointer to hash head inside object
 * @params:        hash table parameters
 *
 * Since the hash chain is single linked, the removal operation needs to
 * walk the bucket chain upon removal. The removal operation is thus
 * considerable slow if the hash table is not correctly sized.
 *
 * Will automatically shrink the table if permitted when residency drops
 * below 30%.
 *
 * Returns zero on success, -ENOENT if the entry could not be found.
 */
static inline int rhashtable_remove_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params)
{
        return __rhashtable_remove_fast(ht, obj, params, false);
}

/**
 * rhltable_remove - remove object from hash list table
 * @hlt:        hash list table
 * @list:        pointer to hash list head inside object
 * @params:        hash table parameters
 *
 * Since the hash chain is single linked, the removal operation needs to
 * walk the bucket chain upon removal. The removal operation is thus
 * considerable slow if the hash table is not correctly sized.
 *
 * Will automatically shrink the table if permitted when residency drops
 * below 30%
 *
 * Returns zero on success, -ENOENT if the entry could not be found.
 */
static inline int rhltable_remove(
        struct rhltable *hlt, struct rhlist_head *list,
        const struct rhashtable_params params)
{
        return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
}

/* Internal function, please use rhashtable_replace_fast() instead */
static inline int __rhashtable_replace_fast(
        struct rhashtable *ht, struct bucket_table *tbl,
        struct rhash_head *obj_old, struct rhash_head *obj_new,
        const struct rhashtable_params params)
{
        struct rhash_lock_head __rcu **bkt;
        struct rhash_head __rcu **pprev;
        struct rhash_head *he;
        unsigned long flags;
        unsigned int hash;
        int err = -ENOENT;

        /* Minimally, the old and new objects must have same hash
         * (which should mean identifiers are the same).
         */
        hash = rht_head_hashfn(ht, tbl, obj_old, params);
        if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
                return -EINVAL;

        bkt = rht_bucket_var(tbl, hash);
        if (!bkt)
                return -ENOENT;

        pprev = NULL;
        flags = rht_lock(tbl, bkt);

        rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
                if (he != obj_old) {
                        pprev = &he->next;
                        continue;
                }

                rcu_assign_pointer(obj_new->next, obj_old->next);
                if (pprev) {
                        rcu_assign_pointer(*pprev, obj_new);
                        rht_unlock(tbl, bkt, flags);
                } else {
                        rht_assign_unlock(tbl, bkt, obj_new, flags);
                }
                err = 0;
                goto unlocked;
        }

        rht_unlock(tbl, bkt, flags);

unlocked:
        return err;
}

/**
 * rhashtable_replace_fast - replace an object in hash table
 * @ht:                hash table
 * @obj_old:        pointer to hash head inside object being replaced
 * @obj_new:        pointer to hash head inside object which is new
 * @params:        hash table parameters
 *
 * Replacing an object doesn't affect the number of elements in the hash table
 * or bucket, so we don't need to worry about shrinking or expanding the
 * table here.
 *
 * Returns zero on success, -ENOENT if the entry could not be found,
 * -EINVAL if hash is not the same for the old and new objects.
 */
static inline int rhashtable_replace_fast(
        struct rhashtable *ht, struct rhash_head *obj_old,
        struct rhash_head *obj_new,
        const struct rhashtable_params params)
{
        struct bucket_table *tbl;
        int err;

        rcu_read_lock();

        tbl = rht_dereference_rcu(ht->tbl, ht);

        /* Because we have already taken (and released) the bucket
         * lock in old_tbl, if we find that future_tbl is not yet
         * visible then that guarantees the entry to still be in
         * the old tbl if it exists.
         */
        while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
                                                obj_new, params)) &&
               (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
                ;

        rcu_read_unlock();

        return err;
}

/**
 * rhltable_walk_enter - Initialise an iterator
 * @hlt:        Table to walk over
 * @iter:        Hash table Iterator
 *
 * This function prepares a hash table walk.
 *
 * Note that if you restart a walk after rhashtable_walk_stop you
 * may see the same object twice.  Also, you may miss objects if
 * there are removals in between rhashtable_walk_stop and the next
 * call to rhashtable_walk_start.
 *
 * For a completely stable walk you should construct your own data
 * structure outside the hash table.
 *
 * This function may be called from any process context, including
 * non-preemptable context, but cannot be called from softirq or
 * hardirq context.
 *
 * You must call rhashtable_walk_exit after this function returns.
 */
static inline void rhltable_walk_enter(struct rhltable *hlt,
                                       struct rhashtable_iter *iter)
{
        return rhashtable_walk_enter(&hlt->ht, iter);
}

/**
 * rhltable_free_and_destroy - free elements and destroy hash list table
 * @hlt:        the hash list table to destroy
 * @free_fn:        callback to release resources of element
 * @arg:        pointer passed to free_fn
 *
 * See documentation for rhashtable_free_and_destroy.
 */
static inline void rhltable_free_and_destroy(struct rhltable *hlt,
                                             void (*free_fn)(void *ptr,
                                                             void *arg),
                                             void *arg)
{
        return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
}

static inline void rhltable_destroy(struct rhltable *hlt)
{
        return rhltable_free_and_destroy(hlt, NULL, NULL);
}

#endif /* _LINUX_RHASHTABLE_H */




























































































    1 


    1 
    1 
    1 


    1 
    1 

    1 
    1 

    1 

    1 







































































   98 













   98 
   98 
   96 

   97 


    1 

















    1 



    1 



   96 



   98 





























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
/*
 * linux/fs/nls/nls_base.c
 *
 * Native language support--charsets and unicode translations.
 * By Gordon Chaffee 1996, 1997
 *
 * Unicode based case conversion 1999 by Wolfram Pienkoss
 *
 */

#include <linux/module.h>
#include <linux/string.h>
#include <linux/nls.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/kmod.h>
#include <linux/spinlock.h>
#include <asm/byteorder.h>

static struct nls_table default_table;
static struct nls_table *tables = &default_table;
static DEFINE_SPINLOCK(nls_lock);

/*
 * Sample implementation from Unicode home page.
 * http://www.stonehand.com/unicode/standard/fss-utf.html
 */
struct utf8_table {
        int     cmask;
        int     cval;
        int     shift;
        long    lmask;
        long    lval;
};

static const struct utf8_table utf8_table[] =
{
    {0x80,  0x00,   0*6,    0x7F,           0,         /* 1 byte sequence */},
    {0xE0,  0xC0,   1*6,    0x7FF,          0x80,      /* 2 byte sequence */},
    {0xF0,  0xE0,   2*6,    0xFFFF,         0x800,     /* 3 byte sequence */},
    {0xF8,  0xF0,   3*6,    0x1FFFFF,       0x10000,   /* 4 byte sequence */},
    {0xFC,  0xF8,   4*6,    0x3FFFFFF,      0x200000,  /* 5 byte sequence */},
    {0xFE,  0xFC,   5*6,    0x7FFFFFFF,     0x4000000, /* 6 byte sequence */},
    {0,                                                       /* end of table    */}
};

#define UNICODE_MAX        0x0010ffff
#define PLANE_SIZE        0x00010000

#define SURROGATE_MASK        0xfffff800
#define SURROGATE_PAIR        0x0000d800
#define SURROGATE_LOW        0x00000400
#define SURROGATE_BITS        0x000003ff

int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu)
{
        unsigned long l;
        int c0, c, nc;
        const struct utf8_table *t;
  
        nc = 0;
        c0 = *s;
        l = c0;
        for (t = utf8_table; t->cmask; t++) {
                nc++;
                if ((c0 & t->cmask) == t->cval) {
                        l &= t->lmask;
                        if (l < t->lval || l > UNICODE_MAX ||
                                        (l & SURROGATE_MASK) == SURROGATE_PAIR)
                                return -1;
                        *pu = (unicode_t) l;
                        return nc;
                }
                if (inlen <= nc)
                        return -1;
                s++;
                c = (*s ^ 0x80) & 0xFF;
                if (c & 0xC0)
                        return -1;
                l = (l << 6) | c;
        }
        return -1;
}
EXPORT_SYMBOL(utf8_to_utf32);

int utf32_to_utf8(unicode_t u, u8 *s, int maxout)
{
        unsigned long l;
        int c, nc;
        const struct utf8_table *t;

        if (!s)
                return 0;

        l = u;
        if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
                return -1;

        nc = 0;
        for (t = utf8_table; t->cmask && maxout; t++, maxout--) {
                nc++;
                if (l <= t->lmask) {
                        c = t->shift;
                        *s = (u8) (t->cval | (l >> c));
                        while (c > 0) {
                                c -= 6;
                                s++;
                                *s = (u8) (0x80 | ((l >> c) & 0x3F));
                        }
                        return nc;
                }
        }
        return -1;
}
EXPORT_SYMBOL(utf32_to_utf8);

static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
{
        switch (endian) {
        default:
                *s = (wchar_t) c;
                break;
        case UTF16_LITTLE_ENDIAN:
                *s = __cpu_to_le16(c);
                break;
        case UTF16_BIG_ENDIAN:
                *s = __cpu_to_be16(c);
                break;
        }
}

int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
                wchar_t *pwcs, int maxout)
{
        u16 *op;
        int size;
        unicode_t u;

        op = pwcs;
        while (inlen > 0 && maxout > 0 && *s) {
                if (*s & 0x80) {
                        size = utf8_to_utf32(s, inlen, &u);
                        if (size < 0)
                                return -EINVAL;
                        s += size;
                        inlen -= size;

                        if (u >= PLANE_SIZE) {
                                if (maxout < 2)
                                        break;
                                u -= PLANE_SIZE;
                                put_utf16(op++, SURROGATE_PAIR |
                                                ((u >> 10) & SURROGATE_BITS),
                                                endian);
                                put_utf16(op++, SURROGATE_PAIR |
                                                SURROGATE_LOW |
                                                (u & SURROGATE_BITS),
                                                endian);
                                maxout -= 2;
                        } else {
                                put_utf16(op++, u, endian);
                                maxout--;
                        }
                } else {
                        put_utf16(op++, *s++, endian);
                        inlen--;
                        maxout--;
                }
        }
        return op - pwcs;
}
EXPORT_SYMBOL(utf8s_to_utf16s);

static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
{
        switch (endian) {
        default:
                return c;
        case UTF16_LITTLE_ENDIAN:
                return __le16_to_cpu(c);
        case UTF16_BIG_ENDIAN:
                return __be16_to_cpu(c);
        }
}

int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
                u8 *s, int maxout)
{
        u8 *op;
        int size;
        unsigned long u, v;

        op = s;
        while (inlen > 0 && maxout > 0) {
                u = get_utf16(*pwcs, endian);
                if (!u)
                        break;
                pwcs++;
                inlen--;
                if (u > 0x7f) {
                        if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
                                if (u & SURROGATE_LOW) {
                                        /* Ignore character and move on */
                                        continue;
                                }
                                if (inlen <= 0)
                                        break;
                                v = get_utf16(*pwcs, endian);
                                if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
                                                !(v & SURROGATE_LOW)) {
                                        /* Ignore character and move on */
                                        continue;
                                }
                                u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
                                                + (v & SURROGATE_BITS);
                                pwcs++;
                                inlen--;
                        }
                        size = utf32_to_utf8(u, op, maxout);
                        if (size == -1) {
                                /* Ignore character and move on */
                        } else {
                                op += size;
                                maxout -= size;
                        }
                } else {
                        *op++ = (u8) u;
                        maxout--;
                }
        }
        return op - s;
}
EXPORT_SYMBOL(utf16s_to_utf8s);

int __register_nls(struct nls_table *nls, struct module *owner)
{
        struct nls_table ** tmp = &tables;

        if (nls->next)
                return -EBUSY;

        nls->owner = owner;
        spin_lock(&nls_lock);
        while (*tmp) {
                if (nls == *tmp) {
                        spin_unlock(&nls_lock);
                        return -EBUSY;
                }
                tmp = &(*tmp)->next;
        }
        nls->next = tables;
        tables = nls;
        spin_unlock(&nls_lock);
        return 0;        
}
EXPORT_SYMBOL(__register_nls);

int unregister_nls(struct nls_table * nls)
{
        struct nls_table ** tmp = &tables;

        spin_lock(&nls_lock);
        while (*tmp) {
                if (nls == *tmp) {
                        *tmp = nls->next;
                        spin_unlock(&nls_lock);
                        return 0;
                }
                tmp = &(*tmp)->next;
        }
        spin_unlock(&nls_lock);
        return -EINVAL;
}

static struct nls_table *find_nls(const char *charset)
{
        struct nls_table *nls;
        spin_lock(&nls_lock);
        for (nls = tables; nls; nls = nls->next) {
                if (!strcmp(nls->charset, charset))
                        break;
                if (nls->alias && !strcmp(nls->alias, charset))
                        break;
        }
        if (nls && !try_module_get(nls->owner))
                nls = NULL;
        spin_unlock(&nls_lock);
        return nls;
}

struct nls_table *load_nls(const char *charset)
{
        return try_then_request_module(find_nls(charset), "nls_%s", charset);
}

void unload_nls(struct nls_table *nls)
{
        if (nls)
                module_put(nls->owner);
}

static const wchar_t charset2uni[256] = {
        /* 0x00*/
        0x0000, 0x0001, 0x0002, 0x0003,
        0x0004, 0x0005, 0x0006, 0x0007,
        0x0008, 0x0009, 0x000a, 0x000b,
        0x000c, 0x000d, 0x000e, 0x000f,
        /* 0x10*/
        0x0010, 0x0011, 0x0012, 0x0013,
        0x0014, 0x0015, 0x0016, 0x0017,
        0x0018, 0x0019, 0x001a, 0x001b,
        0x001c, 0x001d, 0x001e, 0x001f,
        /* 0x20*/
        0x0020, 0x0021, 0x0022, 0x0023,
        0x0024, 0x0025, 0x0026, 0x0027,
        0x0028, 0x0029, 0x002a, 0x002b,
        0x002c, 0x002d, 0x002e, 0x002f,
        /* 0x30*/
        0x0030, 0x0031, 0x0032, 0x0033,
        0x0034, 0x0035, 0x0036, 0x0037,
        0x0038, 0x0039, 0x003a, 0x003b,
        0x003c, 0x003d, 0x003e, 0x003f,
        /* 0x40*/
        0x0040, 0x0041, 0x0042, 0x0043,
        0x0044, 0x0045, 0x0046, 0x0047,
        0x0048, 0x0049, 0x004a, 0x004b,
        0x004c, 0x004d, 0x004e, 0x004f,
        /* 0x50*/
        0x0050, 0x0051, 0x0052, 0x0053,
        0x0054, 0x0055, 0x0056, 0x0057,
        0x0058, 0x0059, 0x005a, 0x005b,
        0x005c, 0x005d, 0x005e, 0x005f,
        /* 0x60*/
        0x0060, 0x0061, 0x0062, 0x0063,
        0x0064, 0x0065, 0x0066, 0x0067,
        0x0068, 0x0069, 0x006a, 0x006b,
        0x006c, 0x006d, 0x006e, 0x006f,
        /* 0x70*/
        0x0070, 0x0071, 0x0072, 0x0073,
        0x0074, 0x0075, 0x0076, 0x0077,
        0x0078, 0x0079, 0x007a, 0x007b,
        0x007c, 0x007d, 0x007e, 0x007f,
        /* 0x80*/
        0x0080, 0x0081, 0x0082, 0x0083,
        0x0084, 0x0085, 0x0086, 0x0087,
        0x0088, 0x0089, 0x008a, 0x008b,
        0x008c, 0x008d, 0x008e, 0x008f,
        /* 0x90*/
        0x0090, 0x0091, 0x0092, 0x0093,
        0x0094, 0x0095, 0x0096, 0x0097,
        0x0098, 0x0099, 0x009a, 0x009b,
        0x009c, 0x009d, 0x009e, 0x009f,
        /* 0xa0*/
        0x00a0, 0x00a1, 0x00a2, 0x00a3,
        0x00a4, 0x00a5, 0x00a6, 0x00a7,
        0x00a8, 0x00a9, 0x00aa, 0x00ab,
        0x00ac, 0x00ad, 0x00ae, 0x00af,
        /* 0xb0*/
        0x00b0, 0x00b1, 0x00b2, 0x00b3,
        0x00b4, 0x00b5, 0x00b6, 0x00b7,
        0x00b8, 0x00b9, 0x00ba, 0x00bb,
        0x00bc, 0x00bd, 0x00be, 0x00bf,
        /* 0xc0*/
        0x00c0, 0x00c1, 0x00c2, 0x00c3,
        0x00c4, 0x00c5, 0x00c6, 0x00c7,
        0x00c8, 0x00c9, 0x00ca, 0x00cb,
        0x00cc, 0x00cd, 0x00ce, 0x00cf,
        /* 0xd0*/
        0x00d0, 0x00d1, 0x00d2, 0x00d3,
        0x00d4, 0x00d5, 0x00d6, 0x00d7,
        0x00d8, 0x00d9, 0x00da, 0x00db,
        0x00dc, 0x00dd, 0x00de, 0x00df,
        /* 0xe0*/
        0x00e0, 0x00e1, 0x00e2, 0x00e3,
        0x00e4, 0x00e5, 0x00e6, 0x00e7,
        0x00e8, 0x00e9, 0x00ea, 0x00eb,
        0x00ec, 0x00ed, 0x00ee, 0x00ef,
        /* 0xf0*/
        0x00f0, 0x00f1, 0x00f2, 0x00f3,
        0x00f4, 0x00f5, 0x00f6, 0x00f7,
        0x00f8, 0x00f9, 0x00fa, 0x00fb,
        0x00fc, 0x00fd, 0x00fe, 0x00ff,
};

static const unsigned char page00[256] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
        0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
        0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
        0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
        0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
        0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
        0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */

        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
        0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
        0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
        0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
        0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
        0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
        0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
        0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
};

static const unsigned char *const page_uni2charset[256] = {
        page00
};

static const unsigned char charset2lower[256] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
        0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
        0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
        0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
        0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
        0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
        0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
        0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
        0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */

        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
        0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
        0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
        0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
        0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
        0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
        0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
        0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
};

static const unsigned char charset2upper[256] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
        0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
        0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
        0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
        0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */

        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
        0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
        0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
        0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
        0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
        0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
        0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
        0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
};


static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
{
        const unsigned char *uni2charset;
        unsigned char cl = uni & 0x00ff;
        unsigned char ch = (uni & 0xff00) >> 8;

        if (boundlen <= 0)
                return -ENAMETOOLONG;

        uni2charset = page_uni2charset[ch];
        if (uni2charset && uni2charset[cl])
                out[0] = uni2charset[cl];
        else
                return -EINVAL;
        return 1;
}

static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
{
        *uni = charset2uni[*rawstring];
        if (*uni == 0x0000)
                return -EINVAL;
        return 1;
}

static struct nls_table default_table = {
        .charset        = "default",
        .uni2char        = uni2char,
        .char2uni        = char2uni,
        .charset2lower        = charset2lower,
        .charset2upper        = charset2upper,
};

/* Returns a simple default translation table */
struct nls_table *load_nls_default(void)
{
        struct nls_table *default_nls;
        
        default_nls = load_nls(CONFIG_NLS_DEFAULT);
        if (default_nls != NULL)
                return default_nls;
        else
                return &default_table;
}

EXPORT_SYMBOL(unregister_nls);
EXPORT_SYMBOL(unload_nls);
EXPORT_SYMBOL(load_nls);
EXPORT_SYMBOL(load_nls_default);

MODULE_LICENSE("Dual BSD/GPL");




















































































    1 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>

#include <linux/fb.h>
#include <linux/backlight.h>

#include "hid-picolcd.h"

static int picolcd_get_brightness(struct backlight_device *bdev)
{
        struct picolcd_data *data = bl_get_data(bdev);
        return data->lcd_brightness;
}

static int picolcd_set_brightness(struct backlight_device *bdev)
{
        struct picolcd_data *data = bl_get_data(bdev);
        struct hid_report *report = picolcd_out_report(REPORT_BRIGHTNESS, data->hdev);
        unsigned long flags;

        if (!report || report->maxfield != 1 || report->field[0]->report_count != 1)
                return -ENODEV;

        data->lcd_brightness = bdev->props.brightness & 0x0ff;
        data->lcd_power      = bdev->props.power;
        spin_lock_irqsave(&data->lock, flags);
        hid_set_field(report->field[0], 0, data->lcd_power == FB_BLANK_UNBLANK ? data->lcd_brightness : 0);
        if (!(data->status & PICOLCD_FAILED))
                hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);
        return 0;
}

static int picolcd_check_bl_fb(struct backlight_device *bdev, struct fb_info *fb)
{
        return fb && fb == picolcd_fbinfo((struct picolcd_data *)bl_get_data(bdev));
}

static const struct backlight_ops picolcd_blops = {
        .update_status  = picolcd_set_brightness,
        .get_brightness = picolcd_get_brightness,
        .check_fb       = picolcd_check_bl_fb,
};

int picolcd_init_backlight(struct picolcd_data *data, struct hid_report *report)
{
        struct device *dev = &data->hdev->dev;
        struct backlight_device *bdev;
        struct backlight_properties props;
        if (!report)
                return -ENODEV;
        if (report->maxfield != 1 || report->field[0]->report_count != 1 ||
                        report->field[0]->report_size != 8) {
                dev_err(dev, "unsupported BRIGHTNESS report");
                return -EINVAL;
        }

        memset(&props, 0, sizeof(props));
        props.type = BACKLIGHT_RAW;
        props.max_brightness = 0xff;
        bdev = backlight_device_register(dev_name(dev), dev, data,
                        &picolcd_blops, &props);
        if (IS_ERR(bdev)) {
                dev_err(dev, "failed to register backlight\n");
                return PTR_ERR(bdev);
        }
        bdev->props.brightness     = 0xff;
        data->lcd_brightness       = 0xff;
        data->backlight            = bdev;
        picolcd_set_brightness(bdev);
        return 0;
}

void picolcd_exit_backlight(struct picolcd_data *data)
{
        struct backlight_device *bdev = data->backlight;

        data->backlight = NULL;
        backlight_device_unregister(bdev);
}

int picolcd_resume_backlight(struct picolcd_data *data)
{
        if (!data->backlight)
                return 0;
        return picolcd_set_brightness(data->backlight);
}

#ifdef CONFIG_PM
void picolcd_suspend_backlight(struct picolcd_data *data)
{
        int bl_power = data->lcd_power;
        if (!data->backlight)
                return;

        data->backlight->props.power = FB_BLANK_POWERDOWN;
        picolcd_set_brightness(data->backlight);
        data->lcd_power = data->backlight->props.power = bl_power;
}
#endif /* CONFIG_PM */







































































































   13 



















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Isovalent */

#include <linux/bpf.h>
#include <linux/bpf_mprog.h>
#include <linux/netdevice.h>

#include <net/tcx.h>

int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
        bool created, ingress = attr->attach_type == BPF_TCX_INGRESS;
        struct net *net = current->nsproxy->net_ns;
        struct bpf_mprog_entry *entry, *entry_new;
        struct bpf_prog *replace_prog = NULL;
        struct net_device *dev;
        int ret;

        rtnl_lock();
        dev = __dev_get_by_index(net, attr->target_ifindex);
        if (!dev) {
                ret = -ENODEV;
                goto out;
        }
        if (attr->attach_flags & BPF_F_REPLACE) {
                replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
                                                 prog->type);
                if (IS_ERR(replace_prog)) {
                        ret = PTR_ERR(replace_prog);
                        replace_prog = NULL;
                        goto out;
                }
        }
        entry = tcx_entry_fetch_or_create(dev, ingress, &created);
        if (!entry) {
                ret = -ENOMEM;
                goto out;
        }
        ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
                               attr->attach_flags, attr->relative_fd,
                               attr->expected_revision);
        if (!ret) {
                if (entry != entry_new) {
                        tcx_entry_update(dev, entry_new, ingress);
                        tcx_entry_sync();
                        tcx_skeys_inc(ingress);
                }
                bpf_mprog_commit(entry);
        } else if (created) {
                tcx_entry_free(entry);
        }
out:
        if (replace_prog)
                bpf_prog_put(replace_prog);
        rtnl_unlock();
        return ret;
}

int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
{
        bool ingress = attr->attach_type == BPF_TCX_INGRESS;
        struct net *net = current->nsproxy->net_ns;
        struct bpf_mprog_entry *entry, *entry_new;
        struct net_device *dev;
        int ret;

        rtnl_lock();
        dev = __dev_get_by_index(net, attr->target_ifindex);
        if (!dev) {
                ret = -ENODEV;
                goto out;
        }
        entry = tcx_entry_fetch(dev, ingress);
        if (!entry) {
                ret = -ENOENT;
                goto out;
        }
        ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
                               attr->relative_fd, attr->expected_revision);
        if (!ret) {
                if (!tcx_entry_is_active(entry_new))
                        entry_new = NULL;
                tcx_entry_update(dev, entry_new, ingress);
                tcx_entry_sync();
                tcx_skeys_dec(ingress);
                bpf_mprog_commit(entry);
                if (!entry_new)
                        tcx_entry_free(entry);
        }
out:
        rtnl_unlock();
        return ret;
}

void tcx_uninstall(struct net_device *dev, bool ingress)
{
        struct bpf_mprog_entry *entry, *entry_new = NULL;
        struct bpf_tuple tuple = {};
        struct bpf_mprog_fp *fp;
        struct bpf_mprog_cp *cp;
        bool active;

        entry = tcx_entry_fetch(dev, ingress);
        if (!entry)
                return;
        active = tcx_entry(entry)->miniq_active;
        if (active)
                bpf_mprog_clear_all(entry, &entry_new);
        tcx_entry_update(dev, entry_new, ingress);
        tcx_entry_sync();
        bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
                if (tuple.link)
                        tcx_link(tuple.link)->dev = NULL;
                else
                        bpf_prog_put(tuple.prog);
                tcx_skeys_dec(ingress);
        }
        if (!active)
                tcx_entry_free(entry);
}

int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
{
        bool ingress = attr->query.attach_type == BPF_TCX_INGRESS;
        struct net *net = current->nsproxy->net_ns;
        struct net_device *dev;
        int ret;

        rtnl_lock();
        dev = __dev_get_by_index(net, attr->query.target_ifindex);
        if (!dev) {
                ret = -ENODEV;
                goto out;
        }
        ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress));
out:
        rtnl_unlock();
        return ret;
}

static int tcx_link_prog_attach(struct bpf_link *link, u32 flags, u32 id_or_fd,
                                u64 revision)
{
        struct tcx_link *tcx = tcx_link(link);
        bool created, ingress = tcx->location == BPF_TCX_INGRESS;
        struct bpf_mprog_entry *entry, *entry_new;
        struct net_device *dev = tcx->dev;
        int ret;

        ASSERT_RTNL();
        entry = tcx_entry_fetch_or_create(dev, ingress, &created);
        if (!entry)
                return -ENOMEM;
        ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
                               id_or_fd, revision);
        if (!ret) {
                if (entry != entry_new) {
                        tcx_entry_update(dev, entry_new, ingress);
                        tcx_entry_sync();
                        tcx_skeys_inc(ingress);
                }
                bpf_mprog_commit(entry);
        } else if (created) {
                tcx_entry_free(entry);
        }
        return ret;
}

static void tcx_link_release(struct bpf_link *link)
{
        struct tcx_link *tcx = tcx_link(link);
        bool ingress = tcx->location == BPF_TCX_INGRESS;
        struct bpf_mprog_entry *entry, *entry_new;
        struct net_device *dev;
        int ret = 0;

        rtnl_lock();
        dev = tcx->dev;
        if (!dev)
                goto out;
        entry = tcx_entry_fetch(dev, ingress);
        if (!entry) {
                ret = -ENOENT;
                goto out;
        }
        ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
        if (!ret) {
                if (!tcx_entry_is_active(entry_new))
                        entry_new = NULL;
                tcx_entry_update(dev, entry_new, ingress);
                tcx_entry_sync();
                tcx_skeys_dec(ingress);
                bpf_mprog_commit(entry);
                if (!entry_new)
                        tcx_entry_free(entry);
                tcx->dev = NULL;
        }
out:
        WARN_ON_ONCE(ret);
        rtnl_unlock();
}

static int tcx_link_update(struct bpf_link *link, struct bpf_prog *nprog,
                           struct bpf_prog *oprog)
{
        struct tcx_link *tcx = tcx_link(link);
        bool ingress = tcx->location == BPF_TCX_INGRESS;
        struct bpf_mprog_entry *entry, *entry_new;
        struct net_device *dev;
        int ret = 0;

        rtnl_lock();
        dev = tcx->dev;
        if (!dev) {
                ret = -ENOLINK;
                goto out;
        }
        if (oprog && link->prog != oprog) {
                ret = -EPERM;
                goto out;
        }
        oprog = link->prog;
        if (oprog == nprog) {
                bpf_prog_put(nprog);
                goto out;
        }
        entry = tcx_entry_fetch(dev, ingress);
        if (!entry) {
                ret = -ENOENT;
                goto out;
        }
        ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
                               BPF_F_REPLACE | BPF_F_ID,
                               link->prog->aux->id, 0);
        if (!ret) {
                WARN_ON_ONCE(entry != entry_new);
                oprog = xchg(&link->prog, nprog);
                bpf_prog_put(oprog);
                bpf_mprog_commit(entry);
        }
out:
        rtnl_unlock();
        return ret;
}

static void tcx_link_dealloc(struct bpf_link *link)
{
        kfree(tcx_link(link));
}

static void tcx_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
{
        const struct tcx_link *tcx = tcx_link(link);
        u32 ifindex = 0;

        rtnl_lock();
        if (tcx->dev)
                ifindex = tcx->dev->ifindex;
        rtnl_unlock();

        seq_printf(seq, "ifindex:\t%u\n", ifindex);
        seq_printf(seq, "attach_type:\t%u (%s)\n",
                   tcx->location,
                   tcx->location == BPF_TCX_INGRESS ? "ingress" : "egress");
}

static int tcx_link_fill_info(const struct bpf_link *link,
                              struct bpf_link_info *info)
{
        const struct tcx_link *tcx = tcx_link(link);
        u32 ifindex = 0;

        rtnl_lock();
        if (tcx->dev)
                ifindex = tcx->dev->ifindex;
        rtnl_unlock();

        info->tcx.ifindex = ifindex;
        info->tcx.attach_type = tcx->location;
        return 0;
}

static int tcx_link_detach(struct bpf_link *link)
{
        tcx_link_release(link);
        return 0;
}

static const struct bpf_link_ops tcx_link_lops = {
        .release        = tcx_link_release,
        .detach                = tcx_link_detach,
        .dealloc        = tcx_link_dealloc,
        .update_prog        = tcx_link_update,
        .show_fdinfo        = tcx_link_fdinfo,
        .fill_link_info        = tcx_link_fill_info,
};

static int tcx_link_init(struct tcx_link *tcx,
                         struct bpf_link_primer *link_primer,
                         const union bpf_attr *attr,
                         struct net_device *dev,
                         struct bpf_prog *prog)
{
        bpf_link_init(&tcx->link, BPF_LINK_TYPE_TCX, &tcx_link_lops, prog);
        tcx->location = attr->link_create.attach_type;
        tcx->dev = dev;
        return bpf_link_prime(&tcx->link, link_primer);
}

int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
        struct net *net = current->nsproxy->net_ns;
        struct bpf_link_primer link_primer;
        struct net_device *dev;
        struct tcx_link *tcx;
        int ret;

        rtnl_lock();
        dev = __dev_get_by_index(net, attr->link_create.target_ifindex);
        if (!dev) {
                ret = -ENODEV;
                goto out;
        }
        tcx = kzalloc(sizeof(*tcx), GFP_USER);
        if (!tcx) {
                ret = -ENOMEM;
                goto out;
        }
        ret = tcx_link_init(tcx, &link_primer, attr, dev, prog);
        if (ret) {
                kfree(tcx);
                goto out;
        }
        ret = tcx_link_prog_attach(&tcx->link, attr->link_create.flags,
                                   attr->link_create.tcx.relative_fd,
                                   attr->link_create.tcx.expected_revision);
        if (ret) {
                tcx->dev = NULL;
                bpf_link_cleanup(&link_primer);
                goto out;
        }
        ret = bpf_link_settle(&link_primer);
out:
        rtnl_unlock();
        return ret;
}



























    5 




    5 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TTY_BUFFER_H
#define _LINUX_TTY_BUFFER_H

#include <linux/atomic.h>
#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>

struct tty_buffer {
        union {
                struct tty_buffer *next;
                struct llist_node free;
        };
        unsigned int used;
        unsigned int size;
        unsigned int commit;
        unsigned int lookahead;                /* Lazy update on recv, can become less than "read" */
        unsigned int read;
        bool flags;
        /* Data points here */
        u8 data[] __aligned(sizeof(unsigned long));
};

static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs)
{
        return b->data + ofs;
}

static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs)
{
        return char_buf_ptr(b, ofs) + b->size;
}

struct tty_bufhead {
        struct tty_buffer *head;        /* Queue head */
        struct work_struct work;
        struct mutex           lock;
        atomic_t           priority;
        struct tty_buffer sentinel;
        struct llist_head free;                /* Free queue head */
        atomic_t           mem_used;    /* In-use buffers excluding free list */
        int                   mem_limit;
        struct tty_buffer *tail;        /* Active buffer */
};

/*
 * When a break, frame error, or parity error happens, these codes are
 * stuffed into the flags buffer.
 */
#define TTY_NORMAL        0
#define TTY_BREAK        1
#define TTY_FRAME        2
#define TTY_PARITY        3
#define TTY_OVERRUN        4

#endif




































































































    1 































































    1 
    1 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  LEDs triggers for power supply class
 *
 *  Copyright © 2007  Anton Vorontsov <cbou@mail.ru>
 *  Copyright © 2004  Szabolcs Gyurko
 *  Copyright © 2003  Ian Molton <spyro@f2s.com>
 *
 *  Modified: 2004, Oct     Szabolcs Gyurko
 */

#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/power_supply.h>
#include <linux/slab.h>
#include <linux/leds.h>

#include "power_supply.h"

/* Battery specific LEDs triggers. */

static void power_supply_update_bat_leds(struct power_supply *psy)
{
        union power_supply_propval status;

        if (power_supply_get_property(psy, POWER_SUPPLY_PROP_STATUS, &status))
                return;

        dev_dbg(&psy->dev, "%s %d\n", __func__, status.intval);

        switch (status.intval) {
        case POWER_SUPPLY_STATUS_FULL:
                led_trigger_event(psy->charging_full_trig, LED_FULL);
                led_trigger_event(psy->charging_trig, LED_OFF);
                led_trigger_event(psy->full_trig, LED_FULL);
                /* Going from blink to LED on requires a LED_OFF event to stop blink */
                led_trigger_event(psy->charging_blink_full_solid_trig, LED_OFF);
                led_trigger_event(psy->charging_blink_full_solid_trig, LED_FULL);
                break;
        case POWER_SUPPLY_STATUS_CHARGING:
                led_trigger_event(psy->charging_full_trig, LED_FULL);
                led_trigger_event(psy->charging_trig, LED_FULL);
                led_trigger_event(psy->full_trig, LED_OFF);
                led_trigger_blink(psy->charging_blink_full_solid_trig, 0, 0);
                break;
        default:
                led_trigger_event(psy->charging_full_trig, LED_OFF);
                led_trigger_event(psy->charging_trig, LED_OFF);
                led_trigger_event(psy->full_trig, LED_OFF);
                led_trigger_event(psy->charging_blink_full_solid_trig,
                        LED_OFF);
                break;
        }
}

static int power_supply_create_bat_triggers(struct power_supply *psy)
{
        psy->charging_full_trig_name = kasprintf(GFP_KERNEL,
                                        "%s-charging-or-full", psy->desc->name);
        if (!psy->charging_full_trig_name)
                goto charging_full_failed;

        psy->charging_trig_name = kasprintf(GFP_KERNEL,
                                        "%s-charging", psy->desc->name);
        if (!psy->charging_trig_name)
                goto charging_failed;

        psy->full_trig_name = kasprintf(GFP_KERNEL, "%s-full", psy->desc->name);
        if (!psy->full_trig_name)
                goto full_failed;

        psy->charging_blink_full_solid_trig_name = kasprintf(GFP_KERNEL,
                "%s-charging-blink-full-solid", psy->desc->name);
        if (!psy->charging_blink_full_solid_trig_name)
                goto charging_blink_full_solid_failed;

        led_trigger_register_simple(psy->charging_full_trig_name,
                                    &psy->charging_full_trig);
        led_trigger_register_simple(psy->charging_trig_name,
                                    &psy->charging_trig);
        led_trigger_register_simple(psy->full_trig_name,
                                    &psy->full_trig);
        led_trigger_register_simple(psy->charging_blink_full_solid_trig_name,
                                    &psy->charging_blink_full_solid_trig);

        return 0;

charging_blink_full_solid_failed:
        kfree(psy->full_trig_name);
full_failed:
        kfree(psy->charging_trig_name);
charging_failed:
        kfree(psy->charging_full_trig_name);
charging_full_failed:
        return -ENOMEM;
}

static void power_supply_remove_bat_triggers(struct power_supply *psy)
{
        led_trigger_unregister_simple(psy->charging_full_trig);
        led_trigger_unregister_simple(psy->charging_trig);
        led_trigger_unregister_simple(psy->full_trig);
        led_trigger_unregister_simple(psy->charging_blink_full_solid_trig);
        kfree(psy->charging_blink_full_solid_trig_name);
        kfree(psy->full_trig_name);
        kfree(psy->charging_trig_name);
        kfree(psy->charging_full_trig_name);
}

/* Generated power specific LEDs triggers. */

static void power_supply_update_gen_leds(struct power_supply *psy)
{
        union power_supply_propval online;

        if (power_supply_get_property(psy, POWER_SUPPLY_PROP_ONLINE, &online))
                return;

        dev_dbg(&psy->dev, "%s %d\n", __func__, online.intval);

        if (online.intval)
                led_trigger_event(psy->online_trig, LED_FULL);
        else
                led_trigger_event(psy->online_trig, LED_OFF);
}

static int power_supply_create_gen_triggers(struct power_supply *psy)
{
        psy->online_trig_name = kasprintf(GFP_KERNEL, "%s-online",
                                          psy->desc->name);
        if (!psy->online_trig_name)
                return -ENOMEM;

        led_trigger_register_simple(psy->online_trig_name, &psy->online_trig);

        return 0;
}

static void power_supply_remove_gen_triggers(struct power_supply *psy)
{
        led_trigger_unregister_simple(psy->online_trig);
        kfree(psy->online_trig_name);
}

/* Choice what triggers to create&update. */

void power_supply_update_leds(struct power_supply *psy)
{
        if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
                power_supply_update_bat_leds(psy);
        else
                power_supply_update_gen_leds(psy);
}

int power_supply_create_triggers(struct power_supply *psy)
{
        if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
                return power_supply_create_bat_triggers(psy);
        return power_supply_create_gen_triggers(psy);
}

void power_supply_remove_triggers(struct power_supply *psy)
{
        if (psy->desc->type == POWER_SUPPLY_TYPE_BATTERY)
                power_supply_remove_bat_triggers(psy);
        else
                power_supply_remove_gen_triggers(psy);
}

































































































































































































































































































































































































































































































































































































































































































































    8 






    8 
    8 

    8 


    8 


    2 
    2 

    8 





















    2 
    2 
    2 
















    2 



















































    6 










    6 


    6 















    6 









    6 







    8 

    8 
    8 
    8 


































































































    8 

    8 

    8 

    8 
    8 
    8 














    8 







































































    7 














    1 



    1 






















    7 



































































































    2 







    2 




    2 




    2 




    2 
    2 
    2 

    2 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 





    8 
    8 





    8 



























































    2 





    2 
    2 





































    2 

    2 


    2 












































    7 

    7 
    7 
    7 




    7 


    7 
    1 
    7 
    7 



























































    6 



    6 
    6 











    6 
    6 
    6 




    6 






















   14 
   14 

   14 








   14 







































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *        Multicast support for IPv6
 *        Linux INET6 implementation
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *
 *        Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
 */

/* Changes:
 *
 *        yoshfuji        : fix format of router-alert option
 *        YOSHIFUJI Hideaki @USAGI:
 *                Fixed source address for MLD message based on
 *                <draft-ietf-magma-mld-source-05.txt>.
 *        YOSHIFUJI Hideaki @USAGI:
 *                - Ignore Queries for invalid addresses.
 *                - MLD for link-local addresses.
 *        David L Stevens <dlstevens@us.ibm.com>:
 *                - MLDv2 support
 */

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/jiffies.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/route.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/pkt_sched.h>
#include <net/mld.h>
#include <linux/workqueue.h>

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/inet_common.h>

#include <net/ip6_checksum.h>

/* Ensure that we have struct in6_addr aligned on 32bit word. */
static int __mld2_query_bugs[] __attribute__((__unused__)) = {
        BUILD_BUG_ON_ZERO(offsetof(struct mld2_query, mld2q_srcs) % 4),
        BUILD_BUG_ON_ZERO(offsetof(struct mld2_report, mld2r_grec) % 4),
        BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4)
};

static struct workqueue_struct *mld_wq;
static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;

static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
static void mld_mca_work(struct work_struct *work);

static void mld_ifc_event(struct inet6_dev *idev);
static bool mld_in_v1_mode(const struct inet6_dev *idev);
static int sf_setstate(struct ifmcaddr6 *pmc);
static void sf_markstate(struct ifmcaddr6 *pmc);
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
                          int sfmode, int sfcount, const struct in6_addr *psfsrc,
                          int delta);
static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
                          int sfmode, int sfcount, const struct in6_addr *psfsrc,
                          int delta);
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
                            struct inet6_dev *idev);
static int __ipv6_dev_mc_inc(struct net_device *dev,
                             const struct in6_addr *addr, unsigned int mode);

#define MLD_QRV_DEFAULT                2
/* RFC3810, 9.2. Query Interval */
#define MLD_QI_DEFAULT                (125 * HZ)
/* RFC3810, 9.3. Query Response Interval */
#define MLD_QRI_DEFAULT                (10 * HZ)

/* RFC3810, 8.1 Query Version Distinctions */
#define MLD_V1_QUERY_LEN        24
#define MLD_V2_QUERY_LEN_MIN        28

#define IPV6_MLD_MAX_MSF        64

int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;

/*
 *        socket join on multicast group
 */
#define mc_dereference(e, idev) \
        rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock))

#define sock_dereference(e, sk) \
        rcu_dereference_protected(e, lockdep_sock_is_held(sk))

#define for_each_pmc_socklock(np, sk, pmc)                        \
        for (pmc = sock_dereference((np)->ipv6_mc_list, sk);        \
             pmc;                                                \
             pmc = sock_dereference(pmc->next, sk))

#define for_each_pmc_rcu(np, pmc)                                \
        for (pmc = rcu_dereference((np)->ipv6_mc_list);                \
             pmc;                                                \
             pmc = rcu_dereference(pmc->next))

#define for_each_psf_mclock(mc, psf)                                \
        for (psf = mc_dereference((mc)->mca_sources, mc->idev);        \
             psf;                                                \
             psf = mc_dereference(psf->sf_next, mc->idev))

#define for_each_psf_rcu(mc, psf)                                \
        for (psf = rcu_dereference((mc)->mca_sources);                \
             psf;                                                \
             psf = rcu_dereference(psf->sf_next))

#define for_each_psf_tomb(mc, psf)                                \
        for (psf = mc_dereference((mc)->mca_tomb, mc->idev);        \
             psf;                                                \
             psf = mc_dereference(psf->sf_next, mc->idev))

#define for_each_mc_mclock(idev, mc)                                \
        for (mc = mc_dereference((idev)->mc_list, idev);        \
             mc;                                                \
             mc = mc_dereference(mc->next, idev))

#define for_each_mc_rcu(idev, mc)                                \
        for (mc = rcu_dereference((idev)->mc_list);             \
             mc;                                                \
             mc = rcu_dereference(mc->next))

#define for_each_mc_tomb(idev, mc)                                \
        for (mc = mc_dereference((idev)->mc_tomb, idev);        \
             mc;                                                \
             mc = mc_dereference(mc->next, idev))

static int unsolicited_report_interval(struct inet6_dev *idev)
{
        int iv;

        if (mld_in_v1_mode(idev))
                iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval);
        else
                iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval);

        return iv > 0 ? iv : 1;
}

static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
                               const struct in6_addr *addr, unsigned int mode)
{
        struct net_device *dev = NULL;
        struct ipv6_mc_socklist *mc_lst;
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net *net = sock_net(sk);
        int err;

        ASSERT_RTNL();

        if (!ipv6_addr_is_multicast(addr))
                return -EINVAL;

        for_each_pmc_socklock(np, sk, mc_lst) {
                if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
                    ipv6_addr_equal(&mc_lst->addr, addr))
                        return -EADDRINUSE;
        }

        mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);

        if (!mc_lst)
                return -ENOMEM;

        mc_lst->next = NULL;
        mc_lst->addr = *addr;

        if (ifindex == 0) {
                struct rt6_info *rt;
                rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
                }
        } else
                dev = __dev_get_by_index(net, ifindex);

        if (!dev) {
                sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
                return -ENODEV;
        }

        mc_lst->ifindex = dev->ifindex;
        mc_lst->sfmode = mode;
        RCU_INIT_POINTER(mc_lst->sflist, NULL);

        /*
         *        now add/increase the group membership on the device
         */

        err = __ipv6_dev_mc_inc(dev, addr, mode);

        if (err) {
                sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
                return err;
        }

        mc_lst->next = np->ipv6_mc_list;
        rcu_assign_pointer(np->ipv6_mc_list, mc_lst);

        return 0;
}

int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
        return __ipv6_sock_mc_join(sk, ifindex, addr, MCAST_EXCLUDE);
}
EXPORT_SYMBOL(ipv6_sock_mc_join);

int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
                          const struct in6_addr *addr, unsigned int mode)
{
        return __ipv6_sock_mc_join(sk, ifindex, addr, mode);
}

/*
 *        socket leave on multicast group
 */
int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct ipv6_mc_socklist *mc_lst;
        struct ipv6_mc_socklist __rcu **lnk;
        struct net *net = sock_net(sk);

        ASSERT_RTNL();

        if (!ipv6_addr_is_multicast(addr))
                return -EINVAL;

        for (lnk = &np->ipv6_mc_list;
             (mc_lst = sock_dereference(*lnk, sk)) != NULL;
              lnk = &mc_lst->next) {
                if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
                    ipv6_addr_equal(&mc_lst->addr, addr)) {
                        struct net_device *dev;

                        *lnk = mc_lst->next;

                        dev = __dev_get_by_index(net, mc_lst->ifindex);
                        if (dev) {
                                struct inet6_dev *idev = __in6_dev_get(dev);

                                ip6_mc_leave_src(sk, mc_lst, idev);
                                if (idev)
                                        __ipv6_dev_mc_dec(idev, &mc_lst->addr);
                        } else {
                                ip6_mc_leave_src(sk, mc_lst, NULL);
                        }

                        atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
                        kfree_rcu(mc_lst, rcu);
                        return 0;
                }
        }

        return -EADDRNOTAVAIL;
}
EXPORT_SYMBOL(ipv6_sock_mc_drop);

static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net,
                                              const struct in6_addr *group,
                                              int ifindex)
{
        struct net_device *dev = NULL;
        struct inet6_dev *idev = NULL;

        if (ifindex == 0) {
                struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);

                if (rt) {
                        dev = rt->dst.dev;
                        ip6_rt_put(rt);
                }
        } else {
                dev = __dev_get_by_index(net, ifindex);
        }

        if (!dev)
                return NULL;
        idev = __in6_dev_get(dev);
        if (!idev)
                return NULL;
        if (idev->dead)
                return NULL;
        return idev;
}

void __ipv6_sock_mc_close(struct sock *sk)
{
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct ipv6_mc_socklist *mc_lst;
        struct net *net = sock_net(sk);

        ASSERT_RTNL();

        while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) {
                struct net_device *dev;

                np->ipv6_mc_list = mc_lst->next;

                dev = __dev_get_by_index(net, mc_lst->ifindex);
                if (dev) {
                        struct inet6_dev *idev = __in6_dev_get(dev);

                        ip6_mc_leave_src(sk, mc_lst, idev);
                        if (idev)
                                __ipv6_dev_mc_dec(idev, &mc_lst->addr);
                } else {
                        ip6_mc_leave_src(sk, mc_lst, NULL);
                }

                atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
                kfree_rcu(mc_lst, rcu);
        }
}

void ipv6_sock_mc_close(struct sock *sk)
{
        struct ipv6_pinfo *np = inet6_sk(sk);

        if (!rcu_access_pointer(np->ipv6_mc_list))
                return;

        rtnl_lock();
        lock_sock(sk);
        __ipv6_sock_mc_close(sk);
        release_sock(sk);
        rtnl_unlock();
}

int ip6_mc_source(int add, int omode, struct sock *sk,
        struct group_source_req *pgsr)
{
        struct in6_addr *source, *group;
        struct ipv6_mc_socklist *pmc;
        struct inet6_dev *idev;
        struct ipv6_pinfo *inet6 = inet6_sk(sk);
        struct ip6_sf_socklist *psl;
        struct net *net = sock_net(sk);
        int i, j, rv;
        int leavegroup = 0;
        int err;

        source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
        group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;

        if (!ipv6_addr_is_multicast(group))
                return -EINVAL;

        idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface);
        if (!idev)
                return -ENODEV;

        err = -EADDRNOTAVAIL;

        mutex_lock(&idev->mc_lock);
        for_each_pmc_socklock(inet6, sk, pmc) {
                if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
                        continue;
                if (ipv6_addr_equal(&pmc->addr, group))
                        break;
        }
        if (!pmc) {                /* must have a prior join */
                err = -EINVAL;
                goto done;
        }
        /* if a source filter was set, must be the same mode as before */
        if (rcu_access_pointer(pmc->sflist)) {
                if (pmc->sfmode != omode) {
                        err = -EINVAL;
                        goto done;
                }
        } else if (pmc->sfmode != omode) {
                /* allow mode switches for empty-set filters */
                ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
                ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
                pmc->sfmode = omode;
        }

        psl = sock_dereference(pmc->sflist, sk);
        if (!add) {
                if (!psl)
                        goto done;        /* err = -EADDRNOTAVAIL */
                rv = !0;
                for (i = 0; i < psl->sl_count; i++) {
                        rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
                        if (rv == 0)
                                break;
                }
                if (rv)                /* source not found */
                        goto done;        /* err = -EADDRNOTAVAIL */

                /* special case - (INCLUDE, empty) == LEAVE_GROUP */
                if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
                        leavegroup = 1;
                        goto done;
                }

                /* update the interface filter */
                ip6_mc_del_src(idev, group, omode, 1, source, 1);

                for (j = i+1; j < psl->sl_count; j++)
                        psl->sl_addr[j-1] = psl->sl_addr[j];
                psl->sl_count--;
                err = 0;
                goto done;
        }
        /* else, add a new source to the filter */

        if (psl && psl->sl_count >= sysctl_mld_max_msf) {
                err = -ENOBUFS;
                goto done;
        }
        if (!psl || psl->sl_count == psl->sl_max) {
                struct ip6_sf_socklist *newpsl;
                int count = IP6_SFBLOCK;

                if (psl)
                        count += psl->sl_max;
                newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
                                      GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = count;
                newpsl->sl_count = count - IP6_SFBLOCK;
                if (psl) {
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
                        atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                                   &sk->sk_omem_alloc);
                }
                rcu_assign_pointer(pmc->sflist, newpsl);
                kfree_rcu(psl, rcu);
                psl = newpsl;
        }
        rv = 1;        /* > 0 for insert logic below if sl_count is 0 */
        for (i = 0; i < psl->sl_count; i++) {
                rv = !ipv6_addr_equal(&psl->sl_addr[i], source);
                if (rv == 0) /* There is an error in the address. */
                        goto done;
        }
        for (j = psl->sl_count-1; j >= i; j--)
                psl->sl_addr[j+1] = psl->sl_addr[j];
        psl->sl_addr[i] = *source;
        psl->sl_count++;
        err = 0;
        /* update the interface list */
        ip6_mc_add_src(idev, group, omode, 1, source, 1);
done:
        mutex_unlock(&idev->mc_lock);
        if (leavegroup)
                err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
        return err;
}

int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                    struct sockaddr_storage *list)
{
        const struct in6_addr *group;
        struct ipv6_mc_socklist *pmc;
        struct inet6_dev *idev;
        struct ipv6_pinfo *inet6 = inet6_sk(sk);
        struct ip6_sf_socklist *newpsl, *psl;
        struct net *net = sock_net(sk);
        int leavegroup = 0;
        int i, err;

        group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;

        if (!ipv6_addr_is_multicast(group))
                return -EINVAL;
        if (gsf->gf_fmode != MCAST_INCLUDE &&
            gsf->gf_fmode != MCAST_EXCLUDE)
                return -EINVAL;

        idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface);
        if (!idev)
                return -ENODEV;

        err = 0;

        if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
                leavegroup = 1;
                goto done;
        }

        for_each_pmc_socklock(inet6, sk, pmc) {
                if (pmc->ifindex != gsf->gf_interface)
                        continue;
                if (ipv6_addr_equal(&pmc->addr, group))
                        break;
        }
        if (!pmc) {                /* must have a prior join */
                err = -EINVAL;
                goto done;
        }
        if (gsf->gf_numsrc) {
                newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
                                                      gsf->gf_numsrc),
                                      GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
                for (i = 0; i < newpsl->sl_count; ++i, ++list) {
                        struct sockaddr_in6 *psin6;

                        psin6 = (struct sockaddr_in6 *)list;
                        newpsl->sl_addr[i] = psin6->sin6_addr;
                }
                mutex_lock(&idev->mc_lock);
                err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
                                     newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
                        mutex_unlock(&idev->mc_lock);
                        sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
                                                             newpsl->sl_max));
                        goto done;
                }
                mutex_unlock(&idev->mc_lock);
        } else {
                newpsl = NULL;
                mutex_lock(&idev->mc_lock);
                ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
                mutex_unlock(&idev->mc_lock);
        }

        mutex_lock(&idev->mc_lock);
        psl = sock_dereference(pmc->sflist, sk);
        if (psl) {
                ip6_mc_del_src(idev, group, pmc->sfmode,
                               psl->sl_count, psl->sl_addr, 0);
                atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                           &sk->sk_omem_alloc);
        } else {
                ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
        }
        rcu_assign_pointer(pmc->sflist, newpsl);
        mutex_unlock(&idev->mc_lock);
        kfree_rcu(psl, rcu);
        pmc->sfmode = gsf->gf_fmode;
        err = 0;
done:
        if (leavegroup)
                err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
        return err;
}

int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
                  sockptr_t optval, size_t ss_offset)
{
        struct ipv6_pinfo *inet6 = inet6_sk(sk);
        const struct in6_addr *group;
        struct ipv6_mc_socklist *pmc;
        struct ip6_sf_socklist *psl;
        int i, count, copycount;

        group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;

        if (!ipv6_addr_is_multicast(group))
                return -EINVAL;

        /* changes to the ipv6_mc_list require the socket lock and
         * rtnl lock. We have the socket lock, so reading the list is safe.
         */

        for_each_pmc_socklock(inet6, sk, pmc) {
                if (pmc->ifindex != gsf->gf_interface)
                        continue;
                if (ipv6_addr_equal(group, &pmc->addr))
                        break;
        }
        if (!pmc)                /* must have a prior join */
                return -EADDRNOTAVAIL;

        gsf->gf_fmode = pmc->sfmode;
        psl = sock_dereference(pmc->sflist, sk);
        count = psl ? psl->sl_count : 0;

        copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
        gsf->gf_numsrc = count;
        for (i = 0; i < copycount; i++) {
                struct sockaddr_in6 *psin6;
                struct sockaddr_storage ss;

                psin6 = (struct sockaddr_in6 *)&ss;
                memset(&ss, 0, sizeof(ss));
                psin6->sin6_family = AF_INET6;
                psin6->sin6_addr = psl->sl_addr[i];
                if (copy_to_sockptr_offset(optval, ss_offset, &ss, sizeof(ss)))
                        return -EFAULT;
                ss_offset += sizeof(ss);
        }
        return 0;
}

bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
                    const struct in6_addr *src_addr)
{
        const struct ipv6_pinfo *np = inet6_sk(sk);
        const struct ipv6_mc_socklist *mc;
        const struct ip6_sf_socklist *psl;
        bool rv = true;

        rcu_read_lock();
        for_each_pmc_rcu(np, mc) {
                if (ipv6_addr_equal(&mc->addr, mc_addr))
                        break;
        }
        if (!mc) {
                rcu_read_unlock();
                return inet6_test_bit(MC6_ALL, sk);
        }
        psl = rcu_dereference(mc->sflist);
        if (!psl) {
                rv = mc->sfmode == MCAST_EXCLUDE;
        } else {
                int i;

                for (i = 0; i < psl->sl_count; i++) {
                        if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
                                break;
                }
                if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
                        rv = false;
                if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
                        rv = false;
        }
        rcu_read_unlock();

        return rv;
}

/* called with mc_lock */
static void igmp6_group_added(struct ifmcaddr6 *mc)
{
        struct net_device *dev = mc->idev->dev;
        char buf[MAX_ADDR_LEN];

        if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
            IPV6_ADDR_SCOPE_LINKLOCAL)
                return;

        if (!(mc->mca_flags&MAF_LOADED)) {
                mc->mca_flags |= MAF_LOADED;
                if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
                        dev_mc_add(dev, buf);
        }

        if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
                return;

        if (mld_in_v1_mode(mc->idev)) {
                igmp6_join_group(mc);
                return;
        }
        /* else v2 */

        /* Based on RFC3810 6.1, for newly added INCLUDE SSM, we
         * should not send filter-mode change record as the mode
         * should be from IN() to IN(A).
         */
        if (mc->mca_sfmode == MCAST_EXCLUDE)
                mc->mca_crcount = mc->idev->mc_qrv;

        mld_ifc_event(mc->idev);
}

/* called with mc_lock */
static void igmp6_group_dropped(struct ifmcaddr6 *mc)
{
        struct net_device *dev = mc->idev->dev;
        char buf[MAX_ADDR_LEN];

        if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
            IPV6_ADDR_SCOPE_LINKLOCAL)
                return;

        if (mc->mca_flags&MAF_LOADED) {
                mc->mca_flags &= ~MAF_LOADED;
                if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
                        dev_mc_del(dev, buf);
        }

        if (mc->mca_flags & MAF_NOREPORT)
                return;

        if (!mc->idev->dead)
                igmp6_leave_group(mc);

        if (cancel_delayed_work(&mc->mca_work))
                refcount_dec(&mc->mca_refcnt);
}

/*
 * deleted ifmcaddr6 manipulation
 * called with mc_lock
 */
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
{
        struct ifmcaddr6 *pmc;

        /* this is an "ifmcaddr6" for convenience; only the fields below
         * are actually used. In particular, the refcnt and users are not
         * used for management of the delete list. Using the same structure
         * for deleted items allows change reports to use common code with
         * non-deleted or query-response MCA's.
         */
        pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
        if (!pmc)
                return;

        pmc->idev = im->idev;
        in6_dev_hold(idev);
        pmc->mca_addr = im->mca_addr;
        pmc->mca_crcount = idev->mc_qrv;
        pmc->mca_sfmode = im->mca_sfmode;
        if (pmc->mca_sfmode == MCAST_INCLUDE) {
                struct ip6_sf_list *psf;

                rcu_assign_pointer(pmc->mca_tomb,
                                   mc_dereference(im->mca_tomb, idev));
                rcu_assign_pointer(pmc->mca_sources,
                                   mc_dereference(im->mca_sources, idev));
                RCU_INIT_POINTER(im->mca_tomb, NULL);
                RCU_INIT_POINTER(im->mca_sources, NULL);

                for_each_psf_mclock(pmc, psf)
                        psf->sf_crcount = pmc->mca_crcount;
        }

        rcu_assign_pointer(pmc->next, idev->mc_tomb);
        rcu_assign_pointer(idev->mc_tomb, pmc);
}

/* called with mc_lock */
static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
{
        struct ip6_sf_list *psf, *sources, *tomb;
        struct in6_addr *pmca = &im->mca_addr;
        struct ifmcaddr6 *pmc, *pmc_prev;

        pmc_prev = NULL;
        for_each_mc_tomb(idev, pmc) {
                if (ipv6_addr_equal(&pmc->mca_addr, pmca))
                        break;
                pmc_prev = pmc;
        }
        if (pmc) {
                if (pmc_prev)
                        rcu_assign_pointer(pmc_prev->next, pmc->next);
                else
                        rcu_assign_pointer(idev->mc_tomb, pmc->next);
        }

        if (pmc) {
                im->idev = pmc->idev;
                if (im->mca_sfmode == MCAST_INCLUDE) {
                        tomb = rcu_replace_pointer(im->mca_tomb,
                                                   mc_dereference(pmc->mca_tomb, pmc->idev),
                                                   lockdep_is_held(&im->idev->mc_lock));
                        rcu_assign_pointer(pmc->mca_tomb, tomb);

                        sources = rcu_replace_pointer(im->mca_sources,
                                                      mc_dereference(pmc->mca_sources, pmc->idev),
                                                      lockdep_is_held(&im->idev->mc_lock));
                        rcu_assign_pointer(pmc->mca_sources, sources);
                        for_each_psf_mclock(im, psf)
                                psf->sf_crcount = idev->mc_qrv;
                } else {
                        im->mca_crcount = idev->mc_qrv;
                }
                in6_dev_put(pmc->idev);
                ip6_mc_clear_src(pmc);
                kfree_rcu(pmc, rcu);
        }
}

/* called with mc_lock */
static void mld_clear_delrec(struct inet6_dev *idev)
{
        struct ifmcaddr6 *pmc, *nextpmc;

        pmc = mc_dereference(idev->mc_tomb, idev);
        RCU_INIT_POINTER(idev->mc_tomb, NULL);

        for (; pmc; pmc = nextpmc) {
                nextpmc = mc_dereference(pmc->next, idev);
                ip6_mc_clear_src(pmc);
                in6_dev_put(pmc->idev);
                kfree_rcu(pmc, rcu);
        }

        /* clear dead sources, too */
        for_each_mc_mclock(idev, pmc) {
                struct ip6_sf_list *psf, *psf_next;

                psf = mc_dereference(pmc->mca_tomb, idev);
                RCU_INIT_POINTER(pmc->mca_tomb, NULL);
                for (; psf; psf = psf_next) {
                        psf_next = mc_dereference(psf->sf_next, idev);
                        kfree_rcu(psf, rcu);
                }
        }
}

static void mld_clear_query(struct inet6_dev *idev)
{
        struct sk_buff *skb;

        spin_lock_bh(&idev->mc_query_lock);
        while ((skb = __skb_dequeue(&idev->mc_query_queue)))
                kfree_skb(skb);
        spin_unlock_bh(&idev->mc_query_lock);
}

static void mld_clear_report(struct inet6_dev *idev)
{
        struct sk_buff *skb;

        spin_lock_bh(&idev->mc_report_lock);
        while ((skb = __skb_dequeue(&idev->mc_report_queue)))
                kfree_skb(skb);
        spin_unlock_bh(&idev->mc_report_lock);
}

static void mca_get(struct ifmcaddr6 *mc)
{
        refcount_inc(&mc->mca_refcnt);
}

static void ma_put(struct ifmcaddr6 *mc)
{
        if (refcount_dec_and_test(&mc->mca_refcnt)) {
                in6_dev_put(mc->idev);
                kfree_rcu(mc, rcu);
        }
}

/* called with mc_lock */
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
                                   const struct in6_addr *addr,
                                   unsigned int mode)
{
        struct ifmcaddr6 *mc;

        mc = kzalloc(sizeof(*mc), GFP_KERNEL);
        if (!mc)
                return NULL;

        INIT_DELAYED_WORK(&mc->mca_work, mld_mca_work);

        mc->mca_addr = *addr;
        mc->idev = idev; /* reference taken by caller */
        mc->mca_users = 1;
        /* mca_stamp should be updated upon changes */
        mc->mca_cstamp = mc->mca_tstamp = jiffies;
        refcount_set(&mc->mca_refcnt, 1);

        mc->mca_sfmode = mode;
        mc->mca_sfcount[mode] = 1;

        if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
            IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
                mc->mca_flags |= MAF_NOREPORT;

        return mc;
}

/*
 *        device multicast group inc (add if not found)
 */
static int __ipv6_dev_mc_inc(struct net_device *dev,
                             const struct in6_addr *addr, unsigned int mode)
{
        struct ifmcaddr6 *mc;
        struct inet6_dev *idev;

        ASSERT_RTNL();

        /* we need to take a reference on idev */
        idev = in6_dev_get(dev);

        if (!idev)
                return -EINVAL;

        if (idev->dead) {
                in6_dev_put(idev);
                return -ENODEV;
        }

        mutex_lock(&idev->mc_lock);
        for_each_mc_mclock(idev, mc) {
                if (ipv6_addr_equal(&mc->mca_addr, addr)) {
                        mc->mca_users++;
                        ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
                        mutex_unlock(&idev->mc_lock);
                        in6_dev_put(idev);
                        return 0;
                }
        }

        mc = mca_alloc(idev, addr, mode);
        if (!mc) {
                mutex_unlock(&idev->mc_lock);
                in6_dev_put(idev);
                return -ENOMEM;
        }

        rcu_assign_pointer(mc->next, idev->mc_list);
        rcu_assign_pointer(idev->mc_list, mc);

        mca_get(mc);

        mld_del_delrec(idev, mc);
        igmp6_group_added(mc);
        mutex_unlock(&idev->mc_lock);
        ma_put(mc);
        return 0;
}

int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
{
        return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
}
EXPORT_SYMBOL(ipv6_dev_mc_inc);

/*
 * device multicast group del
 */
int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
        struct ifmcaddr6 *ma, __rcu **map;

        ASSERT_RTNL();

        mutex_lock(&idev->mc_lock);
        for (map = &idev->mc_list;
             (ma = mc_dereference(*map, idev));
             map = &ma->next) {
                if (ipv6_addr_equal(&ma->mca_addr, addr)) {
                        if (--ma->mca_users == 0) {
                                *map = ma->next;

                                igmp6_group_dropped(ma);
                                ip6_mc_clear_src(ma);
                                mutex_unlock(&idev->mc_lock);

                                ma_put(ma);
                                return 0;
                        }
                        mutex_unlock(&idev->mc_lock);
                        return 0;
                }
        }

        mutex_unlock(&idev->mc_lock);
        return -ENOENT;
}

int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
{
        struct inet6_dev *idev;
        int err;

        ASSERT_RTNL();

        idev = __in6_dev_get(dev);
        if (!idev)
                err = -ENODEV;
        else
                err = __ipv6_dev_mc_dec(idev, addr);

        return err;
}
EXPORT_SYMBOL(ipv6_dev_mc_dec);

/*
 *        check if the interface/address pair is valid
 */
bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
                         const struct in6_addr *src_addr)
{
        struct inet6_dev *idev;
        struct ifmcaddr6 *mc;
        bool rv = false;

        rcu_read_lock();
        idev = __in6_dev_get(dev);
        if (idev) {
                for_each_mc_rcu(idev, mc) {
                        if (ipv6_addr_equal(&mc->mca_addr, group))
                                break;
                }
                if (mc) {
                        if (src_addr && !ipv6_addr_any(src_addr)) {
                                struct ip6_sf_list *psf;

                                for_each_psf_rcu(mc, psf) {
                                        if (ipv6_addr_equal(&psf->sf_addr, src_addr))
                                                break;
                                }
                                if (psf)
                                        rv = psf->sf_count[MCAST_INCLUDE] ||
                                                psf->sf_count[MCAST_EXCLUDE] !=
                                                mc->mca_sfcount[MCAST_EXCLUDE];
                                else
                                        rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
                        } else
                                rv = true; /* don't filter unspecified source */
                }
        }
        rcu_read_unlock();
        return rv;
}

/* called with mc_lock */
static void mld_gq_start_work(struct inet6_dev *idev)
{
        unsigned long tv = get_random_u32_below(idev->mc_maxdelay);

        idev->mc_gq_running = 1;
        if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2))
                in6_dev_hold(idev);
}

/* called with mc_lock */
static void mld_gq_stop_work(struct inet6_dev *idev)
{
        idev->mc_gq_running = 0;
        if (cancel_delayed_work(&idev->mc_gq_work))
                __in6_dev_put(idev);
}

/* called with mc_lock */
static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
{
        unsigned long tv = get_random_u32_below(delay);

        if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2))
                in6_dev_hold(idev);
}

/* called with mc_lock */
static void mld_ifc_stop_work(struct inet6_dev *idev)
{
        idev->mc_ifc_count = 0;
        if (cancel_delayed_work(&idev->mc_ifc_work))
                __in6_dev_put(idev);
}

/* called with mc_lock */
static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay)
{
        unsigned long tv = get_random_u32_below(delay);

        if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2))
                in6_dev_hold(idev);
}

static void mld_dad_stop_work(struct inet6_dev *idev)
{
        if (cancel_delayed_work(&idev->mc_dad_work))
                __in6_dev_put(idev);
}

static void mld_query_stop_work(struct inet6_dev *idev)
{
        spin_lock_bh(&idev->mc_query_lock);
        if (cancel_delayed_work(&idev->mc_query_work))
                __in6_dev_put(idev);
        spin_unlock_bh(&idev->mc_query_lock);
}

static void mld_report_stop_work(struct inet6_dev *idev)
{
        if (cancel_delayed_work_sync(&idev->mc_report_work))
                __in6_dev_put(idev);
}

/*
 * IGMP handling (alias multicast ICMPv6 messages)
 * called with mc_lock
 */
static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
{
        unsigned long delay = resptime;

        /* Do not start work for these addresses */
        if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
            IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
                return;

        if (cancel_delayed_work(&ma->mca_work)) {
                refcount_dec(&ma->mca_refcnt);
                delay = ma->mca_work.timer.expires - jiffies;
        }

        if (delay >= resptime)
                delay = get_random_u32_below(resptime);

        if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
                refcount_inc(&ma->mca_refcnt);
        ma->mca_flags |= MAF_TIMER_RUNNING;
}

/* mark EXCLUDE-mode sources
 * called with mc_lock
 */
static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
                             const struct in6_addr *srcs)
{
        struct ip6_sf_list *psf;
        int i, scount;

        scount = 0;
        for_each_psf_mclock(pmc, psf) {
                if (scount == nsrcs)
                        break;
                for (i = 0; i < nsrcs; i++) {
                        /* skip inactive filters */
                        if (psf->sf_count[MCAST_INCLUDE] ||
                            pmc->mca_sfcount[MCAST_EXCLUDE] !=
                            psf->sf_count[MCAST_EXCLUDE])
                                break;
                        if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
                                scount++;
                                break;
                        }
                }
        }
        pmc->mca_flags &= ~MAF_GSQUERY;
        if (scount == nsrcs)        /* all sources excluded */
                return false;
        return true;
}

/* called with mc_lock */
static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
                            const struct in6_addr *srcs)
{
        struct ip6_sf_list *psf;
        int i, scount;

        if (pmc->mca_sfmode == MCAST_EXCLUDE)
                return mld_xmarksources(pmc, nsrcs, srcs);

        /* mark INCLUDE-mode sources */

        scount = 0;
        for_each_psf_mclock(pmc, psf) {
                if (scount == nsrcs)
                        break;
                for (i = 0; i < nsrcs; i++) {
                        if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
                                psf->sf_gsresp = 1;
                                scount++;
                                break;
                        }
                }
        }
        if (!scount) {
                pmc->mca_flags &= ~MAF_GSQUERY;
                return false;
        }
        pmc->mca_flags |= MAF_GSQUERY;
        return true;
}

static int mld_force_mld_version(const struct inet6_dev *idev)
{
        const struct net *net = dev_net(idev->dev);
        int all_force;

        all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version);
        /* Normally, both are 0 here. If enforcement to a particular is
         * being used, individual device enforcement will have a lower
         * precedence over 'all' device (.../conf/all/force_mld_version).
         */
        return all_force ?: READ_ONCE(idev->cnf.force_mld_version);
}

static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
{
        return mld_force_mld_version(idev) == 2;
}

static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
{
        return mld_force_mld_version(idev) == 1;
}

static bool mld_in_v1_mode(const struct inet6_dev *idev)
{
        if (mld_in_v2_mode_only(idev))
                return false;
        if (mld_in_v1_mode_only(idev))
                return true;
        if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
                return true;

        return false;
}

static void mld_set_v1_mode(struct inet6_dev *idev)
{
        /* RFC3810, relevant sections:
         *  - 9.1. Robustness Variable
         *  - 9.2. Query Interval
         *  - 9.3. Query Response Interval
         *  - 9.12. Older Version Querier Present Timeout
         */
        unsigned long switchback;

        switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;

        idev->mc_v1_seen = jiffies + switchback;
}

static void mld_update_qrv(struct inet6_dev *idev,
                           const struct mld2_query *mlh2)
{
        /* RFC3810, relevant sections:
         *  - 5.1.8. QRV (Querier's Robustness Variable)
         *  - 9.1. Robustness Variable
         */

        /* The value of the Robustness Variable MUST NOT be zero,
         * and SHOULD NOT be one. Catch this here if we ever run
         * into such a case in future.
         */
        const int min_qrv = min(MLD_QRV_DEFAULT, sysctl_mld_qrv);
        WARN_ON(idev->mc_qrv == 0);

        if (mlh2->mld2q_qrv > 0)
                idev->mc_qrv = mlh2->mld2q_qrv;

        if (unlikely(idev->mc_qrv < min_qrv)) {
                net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
                                     idev->mc_qrv, min_qrv);
                idev->mc_qrv = min_qrv;
        }
}

static void mld_update_qi(struct inet6_dev *idev,
                          const struct mld2_query *mlh2)
{
        /* RFC3810, relevant sections:
         *  - 5.1.9. QQIC (Querier's Query Interval Code)
         *  - 9.2. Query Interval
         *  - 9.12. Older Version Querier Present Timeout
         *    (the [Query Interval] in the last Query received)
         */
        unsigned long mc_qqi;

        if (mlh2->mld2q_qqic < 128) {
                mc_qqi = mlh2->mld2q_qqic;
        } else {
                unsigned long mc_man, mc_exp;

                mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
                mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);

                mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
        }

        idev->mc_qi = mc_qqi * HZ;
}

static void mld_update_qri(struct inet6_dev *idev,
                           const struct mld2_query *mlh2)
{
        /* RFC3810, relevant sections:
         *  - 5.1.3. Maximum Response Code
         *  - 9.3. Query Response Interval
         */
        idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
}

static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
                          unsigned long *max_delay, bool v1_query)
{
        unsigned long mldv1_md;

        /* Ignore v1 queries */
        if (mld_in_v2_mode_only(idev))
                return -EINVAL;

        mldv1_md = ntohs(mld->mld_maxdelay);

        /* When in MLDv1 fallback and a MLDv2 router start-up being
         * unaware of current MLDv1 operation, the MRC == MRD mapping
         * only works when the exponential algorithm is not being
         * used (as MLDv1 is unaware of such things).
         *
         * According to the RFC author, the MLDv2 implementations
         * he's aware of all use a MRC < 32768 on start up queries.
         *
         * Thus, should we *ever* encounter something else larger
         * than that, just assume the maximum possible within our
         * reach.
         */
        if (!v1_query)
                mldv1_md = min(mldv1_md, MLDV1_MRD_MAX_COMPAT);

        *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);

        /* MLDv1 router present: we need to go into v1 mode *only*
         * when an MLDv1 query is received as per section 9.12. of
         * RFC3810! And we know from RFC2710 section 3.7 that MLDv1
         * queries MUST be of exactly 24 octets.
         */
        if (v1_query)
                mld_set_v1_mode(idev);

        /* cancel MLDv2 report work */
        mld_gq_stop_work(idev);
        /* cancel the interface change work */
        mld_ifc_stop_work(idev);
        /* clear deleted report items */
        mld_clear_delrec(idev);

        return 0;
}

static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
                           unsigned long *max_delay)
{
        *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);

        mld_update_qrv(idev, mld);
        mld_update_qi(idev, mld);
        mld_update_qri(idev, mld);

        idev->mc_maxdelay = *max_delay;

        return;
}

/* called with rcu_read_lock() */
void igmp6_event_query(struct sk_buff *skb)
{
        struct inet6_dev *idev = __in6_dev_get(skb->dev);

        if (!idev || idev->dead)
                goto out;

        spin_lock_bh(&idev->mc_query_lock);
        if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
                __skb_queue_tail(&idev->mc_query_queue, skb);
                if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
                        in6_dev_hold(idev);
                skb = NULL;
        }
        spin_unlock_bh(&idev->mc_query_lock);
out:
        kfree_skb(skb);
}

static void __mld_query_work(struct sk_buff *skb)
{
        struct mld2_query *mlh2 = NULL;
        const struct in6_addr *group;
        unsigned long max_delay;
        struct inet6_dev *idev;
        struct ifmcaddr6 *ma;
        struct mld_msg *mld;
        int group_type;
        int mark = 0;
        int len, err;

        if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
                goto kfree_skb;

        /* compute payload length excluding extension headers */
        len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
        len -= skb_network_header_len(skb);

        /* RFC3810 6.2
         * Upon reception of an MLD message that contains a Query, the node
         * checks if the source address of the message is a valid link-local
         * address, if the Hop Limit is set to 1, and if the Router Alert
         * option is present in the Hop-By-Hop Options header of the IPv6
         * packet.  If any of these checks fails, the packet is dropped.
         */
        if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
            ipv6_hdr(skb)->hop_limit != 1 ||
            !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
            IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
                goto kfree_skb;

        idev = in6_dev_get(skb->dev);
        if (!idev)
                goto kfree_skb;

        mld = (struct mld_msg *)icmp6_hdr(skb);
        group = &mld->mld_mca;
        group_type = ipv6_addr_type(group);

        if (group_type != IPV6_ADDR_ANY &&
            !(group_type&IPV6_ADDR_MULTICAST))
                goto out;

        if (len < MLD_V1_QUERY_LEN) {
                goto out;
        } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) {
                err = mld_process_v1(idev, mld, &max_delay,
                                     len == MLD_V1_QUERY_LEN);
                if (err < 0)
                        goto out;
        } else if (len >= MLD_V2_QUERY_LEN_MIN) {
                int srcs_offset = sizeof(struct mld2_query) -
                                  sizeof(struct icmp6hdr);

                if (!pskb_may_pull(skb, srcs_offset))
                        goto out;

                mlh2 = (struct mld2_query *)skb_transport_header(skb);

                mld_process_v2(idev, mlh2, &max_delay);

                if (group_type == IPV6_ADDR_ANY) { /* general query */
                        if (mlh2->mld2q_nsrcs)
                                goto out; /* no sources allowed */

                        mld_gq_start_work(idev);
                        goto out;
                }
                /* mark sources to include, if group & source-specific */
                if (mlh2->mld2q_nsrcs != 0) {
                        if (!pskb_may_pull(skb, srcs_offset +
                            ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
                                goto out;

                        mlh2 = (struct mld2_query *)skb_transport_header(skb);
                        mark = 1;
                }
        } else {
                goto out;
        }

        if (group_type == IPV6_ADDR_ANY) {
                for_each_mc_mclock(idev, ma) {
                        igmp6_group_queried(ma, max_delay);
                }
        } else {
                for_each_mc_mclock(idev, ma) {
                        if (!ipv6_addr_equal(group, &ma->mca_addr))
                                continue;
                        if (ma->mca_flags & MAF_TIMER_RUNNING) {
                                /* gsquery <- gsquery && mark */
                                if (!mark)
                                        ma->mca_flags &= ~MAF_GSQUERY;
                        } else {
                                /* gsquery <- mark */
                                if (mark)
                                        ma->mca_flags |= MAF_GSQUERY;
                                else
                                        ma->mca_flags &= ~MAF_GSQUERY;
                        }
                        if (!(ma->mca_flags & MAF_GSQUERY) ||
                            mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
                                igmp6_group_queried(ma, max_delay);
                        break;
                }
        }

out:
        in6_dev_put(idev);
kfree_skb:
        consume_skb(skb);
}

static void mld_query_work(struct work_struct *work)
{
        struct inet6_dev *idev = container_of(to_delayed_work(work),
                                              struct inet6_dev,
                                              mc_query_work);
        struct sk_buff_head q;
        struct sk_buff *skb;
        bool rework = false;
        int cnt = 0;

        skb_queue_head_init(&q);

        spin_lock_bh(&idev->mc_query_lock);
        while ((skb = __skb_dequeue(&idev->mc_query_queue))) {
                __skb_queue_tail(&q, skb);

                if (++cnt >= MLD_MAX_QUEUE) {
                        rework = true;
                        break;
                }
        }
        spin_unlock_bh(&idev->mc_query_lock);

        mutex_lock(&idev->mc_lock);
        while ((skb = __skb_dequeue(&q)))
                __mld_query_work(skb);
        mutex_unlock(&idev->mc_lock);

        if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
                return;

        in6_dev_put(idev);
}

/* called with rcu_read_lock() */
void igmp6_event_report(struct sk_buff *skb)
{
        struct inet6_dev *idev = __in6_dev_get(skb->dev);

        if (!idev || idev->dead)
                goto out;

        spin_lock_bh(&idev->mc_report_lock);
        if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
                __skb_queue_tail(&idev->mc_report_queue, skb);
                if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
                        in6_dev_hold(idev);
                skb = NULL;
        }
        spin_unlock_bh(&idev->mc_report_lock);
out:
        kfree_skb(skb);
}

static void __mld_report_work(struct sk_buff *skb)
{
        struct inet6_dev *idev;
        struct ifmcaddr6 *ma;
        struct mld_msg *mld;
        int addr_type;

        /* Our own report looped back. Ignore it. */
        if (skb->pkt_type == PACKET_LOOPBACK)
                goto kfree_skb;

        /* send our report if the MC router may not have heard this report */
        if (skb->pkt_type != PACKET_MULTICAST &&
            skb->pkt_type != PACKET_BROADCAST)
                goto kfree_skb;

        if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
                goto kfree_skb;

        mld = (struct mld_msg *)icmp6_hdr(skb);

        /* Drop reports with not link local source */
        addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
        if (addr_type != IPV6_ADDR_ANY &&
            !(addr_type&IPV6_ADDR_LINKLOCAL))
                goto kfree_skb;

        idev = in6_dev_get(skb->dev);
        if (!idev)
                goto kfree_skb;

        /*
         *        Cancel the work for this group
         */

        for_each_mc_mclock(idev, ma) {
                if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
                        if (cancel_delayed_work(&ma->mca_work))
                                refcount_dec(&ma->mca_refcnt);
                        ma->mca_flags &= ~(MAF_LAST_REPORTER |
                                           MAF_TIMER_RUNNING);
                        break;
                }
        }

        in6_dev_put(idev);
kfree_skb:
        consume_skb(skb);
}

static void mld_report_work(struct work_struct *work)
{
        struct inet6_dev *idev = container_of(to_delayed_work(work),
                                              struct inet6_dev,
                                              mc_report_work);
        struct sk_buff_head q;
        struct sk_buff *skb;
        bool rework = false;
        int cnt = 0;

        skb_queue_head_init(&q);
        spin_lock_bh(&idev->mc_report_lock);
        while ((skb = __skb_dequeue(&idev->mc_report_queue))) {
                __skb_queue_tail(&q, skb);

                if (++cnt >= MLD_MAX_QUEUE) {
                        rework = true;
                        break;
                }
        }
        spin_unlock_bh(&idev->mc_report_lock);

        mutex_lock(&idev->mc_lock);
        while ((skb = __skb_dequeue(&q)))
                __mld_report_work(skb);
        mutex_unlock(&idev->mc_lock);

        if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
                return;

        in6_dev_put(idev);
}

static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
                  int gdeleted, int sdeleted)
{
        switch (type) {
        case MLD2_MODE_IS_INCLUDE:
        case MLD2_MODE_IS_EXCLUDE:
                if (gdeleted || sdeleted)
                        return false;
                if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
                        if (pmc->mca_sfmode == MCAST_INCLUDE)
                                return true;
                        /* don't include if this source is excluded
                         * in all filters
                         */
                        if (psf->sf_count[MCAST_INCLUDE])
                                return type == MLD2_MODE_IS_INCLUDE;
                        return pmc->mca_sfcount[MCAST_EXCLUDE] ==
                                psf->sf_count[MCAST_EXCLUDE];
                }
                return false;
        case MLD2_CHANGE_TO_INCLUDE:
                if (gdeleted || sdeleted)
                        return false;
                return psf->sf_count[MCAST_INCLUDE] != 0;
        case MLD2_CHANGE_TO_EXCLUDE:
                if (gdeleted || sdeleted)
                        return false;
                if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
                    psf->sf_count[MCAST_INCLUDE])
                        return false;
                return pmc->mca_sfcount[MCAST_EXCLUDE] ==
                        psf->sf_count[MCAST_EXCLUDE];
        case MLD2_ALLOW_NEW_SOURCES:
                if (gdeleted || !psf->sf_crcount)
                        return false;
                return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
        case MLD2_BLOCK_OLD_SOURCES:
                if (pmc->mca_sfmode == MCAST_INCLUDE)
                        return gdeleted || (psf->sf_crcount && sdeleted);
                return psf->sf_crcount && !gdeleted && !sdeleted;
        }
        return false;
}

static int
mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
{
        struct ip6_sf_list *psf;
        int scount = 0;

        for_each_psf_mclock(pmc, psf) {
                if (!is_in(pmc, psf, type, gdeleted, sdeleted))
                        continue;
                scount++;
        }
        return scount;
}

static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
                       struct net_device *dev, const struct in6_addr *saddr,
                       const struct in6_addr *daddr, int proto, int len)
{
        struct ipv6hdr *hdr;

        skb->protocol = htons(ETH_P_IPV6);
        skb->dev = dev;

        skb_reset_network_header(skb);
        skb_put(skb, sizeof(struct ipv6hdr));
        hdr = ipv6_hdr(skb);

        ip6_flow_hdr(hdr, 0, 0);

        hdr->payload_len = htons(len);
        hdr->nexthdr = proto;
        hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);

        hdr->saddr = *saddr;
        hdr->daddr = *daddr;
}

static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
{
        u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT,
                     2, 0, 0, IPV6_TLV_PADN, 0 };
        struct net_device *dev = idev->dev;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
        struct net *net = dev_net(dev);
        const struct in6_addr *saddr;
        struct in6_addr addr_buf;
        struct mld2_report *pmr;
        struct sk_buff *skb;
        unsigned int size;
        struct sock *sk;
        int err;

        sk = net->ipv6.igmp_sk;
        /* we assume size > sizeof(ra) here
         * Also try to not allocate high-order pages for big MTU
         */
        size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
        skb = sock_alloc_send_skb(sk, size, 1, &err);
        if (!skb)
                return NULL;

        skb->priority = TC_PRIO_CONTROL;
        skb_reserve(skb, hlen);
        skb_tailroom_reserve(skb, mtu, tlen);

        if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
                /* <draft-ietf-magma-mld-source-05.txt>:
                 * use unspecified address as the source address
                 * when a valid link-local address is not available.
                 */
                saddr = &in6addr_any;
        } else
                saddr = &addr_buf;

        ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);

        skb_put_data(skb, ra, sizeof(ra));

        skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
        skb_put(skb, sizeof(*pmr));
        pmr = (struct mld2_report *)skb_transport_header(skb);
        pmr->mld2r_type = ICMPV6_MLD2_REPORT;
        pmr->mld2r_resv1 = 0;
        pmr->mld2r_cksum = 0;
        pmr->mld2r_resv2 = 0;
        pmr->mld2r_ngrec = 0;
        return skb;
}

static void mld_sendpack(struct sk_buff *skb)
{
        struct ipv6hdr *pip6 = ipv6_hdr(skb);
        struct mld2_report *pmr =
                              (struct mld2_report *)skb_transport_header(skb);
        int payload_len, mldlen;
        struct inet6_dev *idev;
        struct net *net = dev_net(skb->dev);
        int err;
        struct flowi6 fl6;
        struct dst_entry *dst;

        rcu_read_lock();
        idev = __in6_dev_get(skb->dev);
        IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);

        payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
                sizeof(*pip6);
        mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
        pip6->payload_len = htons(payload_len);

        pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
                                           IPPROTO_ICMPV6,
                                           csum_partial(skb_transport_header(skb),
                                                        mldlen, 0));

        icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
                         &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
                         skb->dev->ifindex);
        dst = icmp6_dst_alloc(skb->dev, &fl6);

        err = 0;
        if (IS_ERR(dst)) {
                err = PTR_ERR(dst);
                dst = NULL;
        }
        skb_dst_set(skb, dst);
        if (err)
                goto err_out;

        err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
                      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
                      dst_output);
out:
        if (!err) {
                ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
        } else {
                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
        }

        rcu_read_unlock();
        return;

err_out:
        kfree_skb(skb);
        goto out;
}

static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
{
        return sizeof(struct mld2_grec) + 16 * mld_scount(pmc,type,gdel,sdel);
}

static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
        int type, struct mld2_grec **ppgr, unsigned int mtu)
{
        struct mld2_report *pmr;
        struct mld2_grec *pgr;

        if (!skb) {
                skb = mld_newpack(pmc->idev, mtu);
                if (!skb)
                        return NULL;
        }
        pgr = skb_put(skb, sizeof(struct mld2_grec));
        pgr->grec_type = type;
        pgr->grec_auxwords = 0;
        pgr->grec_nsrcs = 0;
        pgr->grec_mca = pmc->mca_addr;        /* structure copy */
        pmr = (struct mld2_report *)skb_transport_header(skb);
        pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
        *ppgr = pgr;
        return skb;
}

#define AVAILABLE(skb)        ((skb) ? skb_availroom(skb) : 0)

/* called with mc_lock */
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
                                int type, int gdeleted, int sdeleted,
                                int crsend)
{
        struct ip6_sf_list *psf, *psf_prev, *psf_next;
        int scount, stotal, first, isquery, truncate;
        struct ip6_sf_list __rcu **psf_list;
        struct inet6_dev *idev = pmc->idev;
        struct net_device *dev = idev->dev;
        struct mld2_grec *pgr = NULL;
        struct mld2_report *pmr;
        unsigned int mtu;

        if (pmc->mca_flags & MAF_NOREPORT)
                return skb;

        mtu = READ_ONCE(dev->mtu);
        if (mtu < IPV6_MIN_MTU)
                return skb;

        isquery = type == MLD2_MODE_IS_INCLUDE ||
                  type == MLD2_MODE_IS_EXCLUDE;
        truncate = type == MLD2_MODE_IS_EXCLUDE ||
                    type == MLD2_CHANGE_TO_EXCLUDE;

        stotal = scount = 0;

        psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;

        if (!rcu_access_pointer(*psf_list))
                goto empty_source;

        pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;

        /* EX and TO_EX get a fresh packet, if needed */
        if (truncate) {
                if (pmr && pmr->mld2r_ngrec &&
                    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
                        if (skb)
                                mld_sendpack(skb);
                        skb = mld_newpack(idev, mtu);
                }
        }
        first = 1;
        psf_prev = NULL;
        for (psf = mc_dereference(*psf_list, idev);
             psf;
             psf = psf_next) {
                struct in6_addr *psrc;

                psf_next = mc_dereference(psf->sf_next, idev);

                if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
                        psf_prev = psf;
                        continue;
                }

                /* Based on RFC3810 6.1. Should not send source-list change
                 * records when there is a filter mode change.
                 */
                if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
                     (!gdeleted && pmc->mca_crcount)) &&
                    (type == MLD2_ALLOW_NEW_SOURCES ||
                     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
                        goto decrease_sf_crcount;

                /* clear marks on query responses */
                if (isquery)
                        psf->sf_gsresp = 0;

                if (AVAILABLE(skb) < sizeof(*psrc) +
                    first*sizeof(struct mld2_grec)) {
                        if (truncate && !first)
                                break;         /* truncate these */
                        if (pgr)
                                pgr->grec_nsrcs = htons(scount);
                        if (skb)
                                mld_sendpack(skb);
                        skb = mld_newpack(idev, mtu);
                        first = 1;
                        scount = 0;
                }
                if (first) {
                        skb = add_grhead(skb, pmc, type, &pgr, mtu);
                        first = 0;
                }
                if (!skb)
                        return NULL;
                psrc = skb_put(skb, sizeof(*psrc));
                *psrc = psf->sf_addr;
                scount++; stotal++;
                if ((type == MLD2_ALLOW_NEW_SOURCES ||
                     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
decrease_sf_crcount:
                        psf->sf_crcount--;
                        if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
                                if (psf_prev)
                                        rcu_assign_pointer(psf_prev->sf_next,
                                                           mc_dereference(psf->sf_next, idev));
                                else
                                        rcu_assign_pointer(*psf_list,
                                                           mc_dereference(psf->sf_next, idev));
                                kfree_rcu(psf, rcu);
                                continue;
                        }
                }
                psf_prev = psf;
        }

empty_source:
        if (!stotal) {
                if (type == MLD2_ALLOW_NEW_SOURCES ||
                    type == MLD2_BLOCK_OLD_SOURCES)
                        return skb;
                if (pmc->mca_crcount || isquery || crsend) {
                        /* make sure we have room for group header */
                        if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
                                mld_sendpack(skb);
                                skb = NULL; /* add_grhead will get a new one */
                        }
                        skb = add_grhead(skb, pmc, type, &pgr, mtu);
                }
        }
        if (pgr)
                pgr->grec_nsrcs = htons(scount);

        if (isquery)
                pmc->mca_flags &= ~MAF_GSQUERY;        /* clear query state */
        return skb;
}

/* called with mc_lock */
static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
{
        struct sk_buff *skb = NULL;
        int type;

        if (!pmc) {
                for_each_mc_mclock(idev, pmc) {
                        if (pmc->mca_flags & MAF_NOREPORT)
                                continue;
                        if (pmc->mca_sfcount[MCAST_EXCLUDE])
                                type = MLD2_MODE_IS_EXCLUDE;
                        else
                                type = MLD2_MODE_IS_INCLUDE;
                        skb = add_grec(skb, pmc, type, 0, 0, 0);
                }
        } else {
                if (pmc->mca_sfcount[MCAST_EXCLUDE])
                        type = MLD2_MODE_IS_EXCLUDE;
                else
                        type = MLD2_MODE_IS_INCLUDE;
                skb = add_grec(skb, pmc, type, 0, 0, 0);
        }
        if (skb)
                mld_sendpack(skb);
}

/*
 * remove zero-count source records from a source filter list
 * called with mc_lock
 */
static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev)
{
        struct ip6_sf_list *psf_prev, *psf_next, *psf;

        psf_prev = NULL;
        for (psf = mc_dereference(*ppsf, idev);
             psf;
             psf = psf_next) {
                psf_next = mc_dereference(psf->sf_next, idev);
                if (psf->sf_crcount == 0) {
                        if (psf_prev)
                                rcu_assign_pointer(psf_prev->sf_next,
                                                   mc_dereference(psf->sf_next, idev));
                        else
                                rcu_assign_pointer(*ppsf,
                                                   mc_dereference(psf->sf_next, idev));
                        kfree_rcu(psf, rcu);
                } else {
                        psf_prev = psf;
                }
        }
}

/* called with mc_lock */
static void mld_send_cr(struct inet6_dev *idev)
{
        struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
        struct sk_buff *skb = NULL;
        int type, dtype;

        /* deleted MCA's */
        pmc_prev = NULL;
        for (pmc = mc_dereference(idev->mc_tomb, idev);
             pmc;
             pmc = pmc_next) {
                pmc_next = mc_dereference(pmc->next, idev);
                if (pmc->mca_sfmode == MCAST_INCLUDE) {
                        type = MLD2_BLOCK_OLD_SOURCES;
                        dtype = MLD2_BLOCK_OLD_SOURCES;
                        skb = add_grec(skb, pmc, type, 1, 0, 0);
                        skb = add_grec(skb, pmc, dtype, 1, 1, 0);
                }
                if (pmc->mca_crcount) {
                        if (pmc->mca_sfmode == MCAST_EXCLUDE) {
                                type = MLD2_CHANGE_TO_INCLUDE;
                                skb = add_grec(skb, pmc, type, 1, 0, 0);
                        }
                        pmc->mca_crcount--;
                        if (pmc->mca_crcount == 0) {
                                mld_clear_zeros(&pmc->mca_tomb, idev);
                                mld_clear_zeros(&pmc->mca_sources, idev);
                        }
                }
                if (pmc->mca_crcount == 0 &&
                    !rcu_access_pointer(pmc->mca_tomb) &&
                    !rcu_access_pointer(pmc->mca_sources)) {
                        if (pmc_prev)
                                rcu_assign_pointer(pmc_prev->next, pmc_next);
                        else
                                rcu_assign_pointer(idev->mc_tomb, pmc_next);
                        in6_dev_put(pmc->idev);
                        kfree_rcu(pmc, rcu);
                } else
                        pmc_prev = pmc;
        }

        /* change recs */
        for_each_mc_mclock(idev, pmc) {
                if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
                        type = MLD2_BLOCK_OLD_SOURCES;
                        dtype = MLD2_ALLOW_NEW_SOURCES;
                } else {
                        type = MLD2_ALLOW_NEW_SOURCES;
                        dtype = MLD2_BLOCK_OLD_SOURCES;
                }
                skb = add_grec(skb, pmc, type, 0, 0, 0);
                skb = add_grec(skb, pmc, dtype, 0, 1, 0);        /* deleted sources */

                /* filter mode changes */
                if (pmc->mca_crcount) {
                        if (pmc->mca_sfmode == MCAST_EXCLUDE)
                                type = MLD2_CHANGE_TO_EXCLUDE;
                        else
                                type = MLD2_CHANGE_TO_INCLUDE;
                        skb = add_grec(skb, pmc, type, 0, 0, 0);
                        pmc->mca_crcount--;
                }
        }
        if (!skb)
                return;
        (void) mld_sendpack(skb);
}

static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
        struct net *net = dev_net(dev);
        struct sock *sk = net->ipv6.igmp_sk;
        struct inet6_dev *idev;
        struct sk_buff *skb;
        struct mld_msg *hdr;
        const struct in6_addr *snd_addr, *saddr;
        struct in6_addr addr_buf;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
        int err, len, payload_len, full_len;
        u8 ra[8] = { IPPROTO_ICMPV6, 0,
                     IPV6_TLV_ROUTERALERT, 2, 0, 0,
                     IPV6_TLV_PADN, 0 };
        struct flowi6 fl6;
        struct dst_entry *dst;

        if (type == ICMPV6_MGM_REDUCTION)
                snd_addr = &in6addr_linklocal_allrouters;
        else
                snd_addr = addr;

        len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
        payload_len = len + sizeof(ra);
        full_len = sizeof(struct ipv6hdr) + payload_len;

        rcu_read_lock();
        IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
        rcu_read_unlock();

        skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);

        if (!skb) {
                rcu_read_lock();
                IP6_INC_STATS(net, __in6_dev_get(dev),
                              IPSTATS_MIB_OUTDISCARDS);
                rcu_read_unlock();
                return;
        }
        skb->priority = TC_PRIO_CONTROL;
        skb_reserve(skb, hlen);

        if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
                /* <draft-ietf-magma-mld-source-05.txt>:
                 * use unspecified address as the source address
                 * when a valid link-local address is not available.
                 */
                saddr = &in6addr_any;
        } else
                saddr = &addr_buf;

        ip6_mc_hdr(sk, skb, dev, saddr, snd_addr, NEXTHDR_HOP, payload_len);

        skb_put_data(skb, ra, sizeof(ra));

        hdr = skb_put_zero(skb, sizeof(struct mld_msg));
        hdr->mld_type = type;
        hdr->mld_mca = *addr;

        hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
                                         IPPROTO_ICMPV6,
                                         csum_partial(hdr, len, 0));

        rcu_read_lock();
        idev = __in6_dev_get(skb->dev);

        icmpv6_flow_init(sk, &fl6, type,
                         &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
                         skb->dev->ifindex);
        dst = icmp6_dst_alloc(skb->dev, &fl6);
        if (IS_ERR(dst)) {
                err = PTR_ERR(dst);
                goto err_out;
        }

        skb_dst_set(skb, dst);
        err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
                      net, sk, skb, NULL, skb->dev,
                      dst_output);
out:
        if (!err) {
                ICMP6MSGOUT_INC_STATS(net, idev, type);
                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
        } else
                IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);

        rcu_read_unlock();
        return;

err_out:
        kfree_skb(skb);
        goto out;
}

/* called with mc_lock */
static void mld_send_initial_cr(struct inet6_dev *idev)
{
        struct sk_buff *skb;
        struct ifmcaddr6 *pmc;
        int type;

        if (mld_in_v1_mode(idev))
                return;

        skb = NULL;
        for_each_mc_mclock(idev, pmc) {
                if (pmc->mca_sfcount[MCAST_EXCLUDE])
                        type = MLD2_CHANGE_TO_EXCLUDE;
                else
                        type = MLD2_ALLOW_NEW_SOURCES;
                skb = add_grec(skb, pmc, type, 0, 0, 1);
        }
        if (skb)
                mld_sendpack(skb);
}

void ipv6_mc_dad_complete(struct inet6_dev *idev)
{
        mutex_lock(&idev->mc_lock);
        idev->mc_dad_count = idev->mc_qrv;
        if (idev->mc_dad_count) {
                mld_send_initial_cr(idev);
                idev->mc_dad_count--;
                if (idev->mc_dad_count)
                        mld_dad_start_work(idev,
                                           unsolicited_report_interval(idev));
        }
        mutex_unlock(&idev->mc_lock);
}

static void mld_dad_work(struct work_struct *work)
{
        struct inet6_dev *idev = container_of(to_delayed_work(work),
                                              struct inet6_dev,
                                              mc_dad_work);
        mutex_lock(&idev->mc_lock);
        mld_send_initial_cr(idev);
        if (idev->mc_dad_count) {
                idev->mc_dad_count--;
                if (idev->mc_dad_count)
                        mld_dad_start_work(idev,
                                           unsolicited_report_interval(idev));
        }
        mutex_unlock(&idev->mc_lock);
        in6_dev_put(idev);
}

/* called with mc_lock */
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
        const struct in6_addr *psfsrc)
{
        struct ip6_sf_list *psf, *psf_prev;
        int rv = 0;

        psf_prev = NULL;
        for_each_psf_mclock(pmc, psf) {
                if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
                        break;
                psf_prev = psf;
        }
        if (!psf || psf->sf_count[sfmode] == 0) {
                /* source filter not found, or count wrong =>  bug */
                return -ESRCH;
        }
        psf->sf_count[sfmode]--;
        if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
                struct inet6_dev *idev = pmc->idev;

                /* no more filters for this source */
                if (psf_prev)
                        rcu_assign_pointer(psf_prev->sf_next,
                                           mc_dereference(psf->sf_next, idev));
                else
                        rcu_assign_pointer(pmc->mca_sources,
                                           mc_dereference(psf->sf_next, idev));

                if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
                    !mld_in_v1_mode(idev)) {
                        psf->sf_crcount = idev->mc_qrv;
                        rcu_assign_pointer(psf->sf_next,
                                           mc_dereference(pmc->mca_tomb, idev));
                        rcu_assign_pointer(pmc->mca_tomb, psf);
                        rv = 1;
                } else {
                        kfree_rcu(psf, rcu);
                }
        }
        return rv;
}

/* called with mc_lock */
static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
                          int sfmode, int sfcount, const struct in6_addr *psfsrc,
                          int delta)
{
        struct ifmcaddr6 *pmc;
        int        changerec = 0;
        int        i, err;

        if (!idev)
                return -ENODEV;

        for_each_mc_mclock(idev, pmc) {
                if (ipv6_addr_equal(pmca, &pmc->mca_addr))
                        break;
        }
        if (!pmc)
                return -ESRCH;

        sf_markstate(pmc);
        if (!delta) {
                if (!pmc->mca_sfcount[sfmode])
                        return -EINVAL;

                pmc->mca_sfcount[sfmode]--;
        }
        err = 0;
        for (i = 0; i < sfcount; i++) {
                int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);

                changerec |= rv > 0;
                if (!err && rv < 0)
                        err = rv;
        }
        if (pmc->mca_sfmode == MCAST_EXCLUDE &&
            pmc->mca_sfcount[MCAST_EXCLUDE] == 0 &&
            pmc->mca_sfcount[MCAST_INCLUDE]) {
                struct ip6_sf_list *psf;

                /* filter mode change */
                pmc->mca_sfmode = MCAST_INCLUDE;
                pmc->mca_crcount = idev->mc_qrv;
                idev->mc_ifc_count = pmc->mca_crcount;
                for_each_psf_mclock(pmc, psf)
                        psf->sf_crcount = 0;
                mld_ifc_event(pmc->idev);
        } else if (sf_setstate(pmc) || changerec) {
                mld_ifc_event(pmc->idev);
        }

        return err;
}

/*
 * Add multicast single-source filter to the interface list
 * called with mc_lock
 */
static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
        const struct in6_addr *psfsrc)
{
        struct ip6_sf_list *psf, *psf_prev;

        psf_prev = NULL;
        for_each_psf_mclock(pmc, psf) {
                if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
                        break;
                psf_prev = psf;
        }
        if (!psf) {
                psf = kzalloc(sizeof(*psf), GFP_KERNEL);
                if (!psf)
                        return -ENOBUFS;

                psf->sf_addr = *psfsrc;
                if (psf_prev) {
                        rcu_assign_pointer(psf_prev->sf_next, psf);
                } else {
                        rcu_assign_pointer(pmc->mca_sources, psf);
                }
        }
        psf->sf_count[sfmode]++;
        return 0;
}

/* called with mc_lock */
static void sf_markstate(struct ifmcaddr6 *pmc)
{
        struct ip6_sf_list *psf;
        int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];

        for_each_psf_mclock(pmc, psf) {
                if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
                        psf->sf_oldin = mca_xcount ==
                                psf->sf_count[MCAST_EXCLUDE] &&
                                !psf->sf_count[MCAST_INCLUDE];
                } else {
                        psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
                }
        }
}

/* called with mc_lock */
static int sf_setstate(struct ifmcaddr6 *pmc)
{
        struct ip6_sf_list *psf, *dpsf;
        int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
        int qrv = pmc->idev->mc_qrv;
        int new_in, rv;

        rv = 0;
        for_each_psf_mclock(pmc, psf) {
                if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
                        new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
                                !psf->sf_count[MCAST_INCLUDE];
                } else
                        new_in = psf->sf_count[MCAST_INCLUDE] != 0;
                if (new_in) {
                        if (!psf->sf_oldin) {
                                struct ip6_sf_list *prev = NULL;

                                for_each_psf_tomb(pmc, dpsf) {
                                        if (ipv6_addr_equal(&dpsf->sf_addr,
                                            &psf->sf_addr))
                                                break;
                                        prev = dpsf;
                                }
                                if (dpsf) {
                                        if (prev)
                                                rcu_assign_pointer(prev->sf_next,
                                                                   mc_dereference(dpsf->sf_next,
                                                                                  pmc->idev));
                                        else
                                                rcu_assign_pointer(pmc->mca_tomb,
                                                                   mc_dereference(dpsf->sf_next,
                                                                                  pmc->idev));
                                        kfree_rcu(dpsf, rcu);
                                }
                                psf->sf_crcount = qrv;
                                rv++;
                        }
                } else if (psf->sf_oldin) {
                        psf->sf_crcount = 0;
                        /*
                         * add or update "delete" records if an active filter
                         * is now inactive
                         */

                        for_each_psf_tomb(pmc, dpsf)
                                if (ipv6_addr_equal(&dpsf->sf_addr,
                                    &psf->sf_addr))
                                        break;
                        if (!dpsf) {
                                dpsf = kmalloc(sizeof(*dpsf), GFP_KERNEL);
                                if (!dpsf)
                                        continue;
                                *dpsf = *psf;
                                rcu_assign_pointer(dpsf->sf_next,
                                                   mc_dereference(pmc->mca_tomb, pmc->idev));
                                rcu_assign_pointer(pmc->mca_tomb, dpsf);
                        }
                        dpsf->sf_crcount = qrv;
                        rv++;
                }
        }
        return rv;
}

/*
 * Add multicast source filter list to the interface list
 * called with mc_lock
 */
static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
                          int sfmode, int sfcount, const struct in6_addr *psfsrc,
                          int delta)
{
        struct ifmcaddr6 *pmc;
        int        isexclude;
        int        i, err;

        if (!idev)
                return -ENODEV;

        for_each_mc_mclock(idev, pmc) {
                if (ipv6_addr_equal(pmca, &pmc->mca_addr))
                        break;
        }
        if (!pmc)
                return -ESRCH;

        sf_markstate(pmc);
        isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
        if (!delta)
                pmc->mca_sfcount[sfmode]++;
        err = 0;
        for (i = 0; i < sfcount; i++) {
                err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
                if (err)
                        break;
        }
        if (err) {
                int j;

                if (!delta)
                        pmc->mca_sfcount[sfmode]--;
                for (j = 0; j < i; j++)
                        ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
        } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
                struct ip6_sf_list *psf;

                /* filter mode change */
                if (pmc->mca_sfcount[MCAST_EXCLUDE])
                        pmc->mca_sfmode = MCAST_EXCLUDE;
                else if (pmc->mca_sfcount[MCAST_INCLUDE])
                        pmc->mca_sfmode = MCAST_INCLUDE;
                /* else no filters; keep old mode for reports */

                pmc->mca_crcount = idev->mc_qrv;
                idev->mc_ifc_count = pmc->mca_crcount;
                for_each_psf_mclock(pmc, psf)
                        psf->sf_crcount = 0;
                mld_ifc_event(idev);
        } else if (sf_setstate(pmc)) {
                mld_ifc_event(idev);
        }
        return err;
}

/* called with mc_lock */
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
{
        struct ip6_sf_list *psf, *nextpsf;

        for (psf = mc_dereference(pmc->mca_tomb, pmc->idev);
             psf;
             psf = nextpsf) {
                nextpsf = mc_dereference(psf->sf_next, pmc->idev);
                kfree_rcu(psf, rcu);
        }
        RCU_INIT_POINTER(pmc->mca_tomb, NULL);
        for (psf = mc_dereference(pmc->mca_sources, pmc->idev);
             psf;
             psf = nextpsf) {
                nextpsf = mc_dereference(psf->sf_next, pmc->idev);
                kfree_rcu(psf, rcu);
        }
        RCU_INIT_POINTER(pmc->mca_sources, NULL);
        pmc->mca_sfmode = MCAST_EXCLUDE;
        pmc->mca_sfcount[MCAST_INCLUDE] = 0;
        pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
}

/* called with mc_lock */
static void igmp6_join_group(struct ifmcaddr6 *ma)
{
        unsigned long delay;

        if (ma->mca_flags & MAF_NOREPORT)
                return;

        igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);

        delay = get_random_u32_below(unsolicited_report_interval(ma->idev));

        if (cancel_delayed_work(&ma->mca_work)) {
                refcount_dec(&ma->mca_refcnt);
                delay = ma->mca_work.timer.expires - jiffies;
        }

        if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
                refcount_inc(&ma->mca_refcnt);
        ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
}

static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
                            struct inet6_dev *idev)
{
        struct ip6_sf_socklist *psl;
        int err;

        psl = sock_dereference(iml->sflist, sk);

        if (idev)
                mutex_lock(&idev->mc_lock);

        if (!psl) {
                /* any-source empty exclude case */
                err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
        } else {
                err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
                                     psl->sl_count, psl->sl_addr, 0);
                RCU_INIT_POINTER(iml->sflist, NULL);
                atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
                           &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        }

        if (idev)
                mutex_unlock(&idev->mc_lock);

        return err;
}

/* called with mc_lock */
static void igmp6_leave_group(struct ifmcaddr6 *ma)
{
        if (mld_in_v1_mode(ma->idev)) {
                if (ma->mca_flags & MAF_LAST_REPORTER) {
                        igmp6_send(&ma->mca_addr, ma->idev->dev,
                                ICMPV6_MGM_REDUCTION);
                }
        } else {
                mld_add_delrec(ma->idev, ma);
                mld_ifc_event(ma->idev);
        }
}

static void mld_gq_work(struct work_struct *work)
{
        struct inet6_dev *idev = container_of(to_delayed_work(work),
                                              struct inet6_dev,
                                              mc_gq_work);

        mutex_lock(&idev->mc_lock);
        mld_send_report(idev, NULL);
        idev->mc_gq_running = 0;
        mutex_unlock(&idev->mc_lock);

        in6_dev_put(idev);
}

static void mld_ifc_work(struct work_struct *work)
{
        struct inet6_dev *idev = container_of(to_delayed_work(work),
                                              struct inet6_dev,
                                              mc_ifc_work);

        mutex_lock(&idev->mc_lock);
        mld_send_cr(idev);

        if (idev->mc_ifc_count) {
                idev->mc_ifc_count--;
                if (idev->mc_ifc_count)
                        mld_ifc_start_work(idev,
                                           unsolicited_report_interval(idev));
        }
        mutex_unlock(&idev->mc_lock);
        in6_dev_put(idev);
}

/* called with mc_lock */
static void mld_ifc_event(struct inet6_dev *idev)
{
        if (mld_in_v1_mode(idev))
                return;

        idev->mc_ifc_count = idev->mc_qrv;
        mld_ifc_start_work(idev, 1);
}

static void mld_mca_work(struct work_struct *work)
{
        struct ifmcaddr6 *ma = container_of(to_delayed_work(work),
                                            struct ifmcaddr6, mca_work);

        mutex_lock(&ma->idev->mc_lock);
        if (mld_in_v1_mode(ma->idev))
                igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
        else
                mld_send_report(ma->idev, ma);
        ma->mca_flags |=  MAF_LAST_REPORTER;
        ma->mca_flags &= ~MAF_TIMER_RUNNING;
        mutex_unlock(&ma->idev->mc_lock);

        ma_put(ma);
}

/* Device changing type */

void ipv6_mc_unmap(struct inet6_dev *idev)
{
        struct ifmcaddr6 *i;

        /* Install multicast list, except for all-nodes (already installed) */

        mutex_lock(&idev->mc_lock);
        for_each_mc_mclock(idev, i)
                igmp6_group_dropped(i);
        mutex_unlock(&idev->mc_lock);
}

void ipv6_mc_remap(struct inet6_dev *idev)
{
        ipv6_mc_up(idev);
}

/* Device going down */
void ipv6_mc_down(struct inet6_dev *idev)
{
        struct ifmcaddr6 *i;

        mutex_lock(&idev->mc_lock);
        /* Withdraw multicast list */
        for_each_mc_mclock(idev, i)
                igmp6_group_dropped(i);
        mutex_unlock(&idev->mc_lock);

        /* Should stop work after group drop. or we will
         * start work again in mld_ifc_event()
         */
        mld_query_stop_work(idev);
        mld_report_stop_work(idev);

        mutex_lock(&idev->mc_lock);
        mld_ifc_stop_work(idev);
        mld_gq_stop_work(idev);
        mutex_unlock(&idev->mc_lock);

        mld_dad_stop_work(idev);
}

static void ipv6_mc_reset(struct inet6_dev *idev)
{
        idev->mc_qrv = sysctl_mld_qrv;
        idev->mc_qi = MLD_QI_DEFAULT;
        idev->mc_qri = MLD_QRI_DEFAULT;
        idev->mc_v1_seen = 0;
        idev->mc_maxdelay = unsolicited_report_interval(idev);
}

/* Device going up */

void ipv6_mc_up(struct inet6_dev *idev)
{
        struct ifmcaddr6 *i;

        /* Install multicast list, except for all-nodes (already installed) */

        ipv6_mc_reset(idev);
        mutex_lock(&idev->mc_lock);
        for_each_mc_mclock(idev, i) {
                mld_del_delrec(idev, i);
                igmp6_group_added(i);
        }
        mutex_unlock(&idev->mc_lock);
}

/* IPv6 device initialization. */

void ipv6_mc_init_dev(struct inet6_dev *idev)
{
        idev->mc_gq_running = 0;
        INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work);
        RCU_INIT_POINTER(idev->mc_tomb, NULL);
        idev->mc_ifc_count = 0;
        INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work);
        INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work);
        INIT_DELAYED_WORK(&idev->mc_query_work, mld_query_work);
        INIT_DELAYED_WORK(&idev->mc_report_work, mld_report_work);
        skb_queue_head_init(&idev->mc_query_queue);
        skb_queue_head_init(&idev->mc_report_queue);
        spin_lock_init(&idev->mc_query_lock);
        spin_lock_init(&idev->mc_report_lock);
        mutex_init(&idev->mc_lock);
        ipv6_mc_reset(idev);
}

/*
 *        Device is about to be destroyed: clean up.
 */

void ipv6_mc_destroy_dev(struct inet6_dev *idev)
{
        struct ifmcaddr6 *i;

        /* Deactivate works */
        ipv6_mc_down(idev);
        mutex_lock(&idev->mc_lock);
        mld_clear_delrec(idev);
        mutex_unlock(&idev->mc_lock);
        mld_clear_query(idev);
        mld_clear_report(idev);

        /* Delete all-nodes address. */
        /* We cannot call ipv6_dev_mc_dec() directly, our caller in
         * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
         * fail.
         */
        __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allnodes);

        if (idev->cnf.forwarding)
                __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);

        mutex_lock(&idev->mc_lock);
        while ((i = mc_dereference(idev->mc_list, idev))) {
                rcu_assign_pointer(idev->mc_list, mc_dereference(i->next, idev));

                ip6_mc_clear_src(i);
                ma_put(i);
        }
        mutex_unlock(&idev->mc_lock);
}

static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
{
        struct ifmcaddr6 *pmc;

        ASSERT_RTNL();

        mutex_lock(&idev->mc_lock);
        if (mld_in_v1_mode(idev)) {
                for_each_mc_mclock(idev, pmc)
                        igmp6_join_group(pmc);
        } else {
                mld_send_report(idev, NULL);
        }
        mutex_unlock(&idev->mc_lock);
}

static int ipv6_mc_netdev_event(struct notifier_block *this,
                                unsigned long event,
                                void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct inet6_dev *idev = __in6_dev_get(dev);

        switch (event) {
        case NETDEV_RESEND_IGMP:
                if (idev)
                        ipv6_mc_rejoin_groups(idev);
                break;
        default:
                break;
        }

        return NOTIFY_DONE;
}

static struct notifier_block igmp6_netdev_notifier = {
        .notifier_call = ipv6_mc_netdev_event,
};

#ifdef CONFIG_PROC_FS
struct igmp6_mc_iter_state {
        struct seq_net_private p;
        struct net_device *dev;
        struct inet6_dev *idev;
};

#define igmp6_mc_seq_private(seq)        ((struct igmp6_mc_iter_state *)(seq)->private)

static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
{
        struct ifmcaddr6 *im = NULL;
        struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
        struct net *net = seq_file_net(seq);

        state->idev = NULL;
        for_each_netdev_rcu(net, state->dev) {
                struct inet6_dev *idev;
                idev = __in6_dev_get(state->dev);
                if (!idev)
                        continue;

                im = rcu_dereference(idev->mc_list);
                if (im) {
                        state->idev = idev;
                        break;
                }
        }
        return im;
}

static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr6 *im)
{
        struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);

        im = rcu_dereference(im->next);
        while (!im) {
                state->dev = next_net_device_rcu(state->dev);
                if (!state->dev) {
                        state->idev = NULL;
                        break;
                }
                state->idev = __in6_dev_get(state->dev);
                if (!state->idev)
                        continue;
                im = rcu_dereference(state->idev->mc_list);
        }
        return im;
}

static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
{
        struct ifmcaddr6 *im = igmp6_mc_get_first(seq);
        if (im)
                while (pos && (im = igmp6_mc_get_next(seq, im)) != NULL)
                        --pos;
        return pos ? NULL : im;
}

static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        rcu_read_lock();
        return igmp6_mc_get_idx(seq, *pos);
}

static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);

        ++*pos;
        return im;
}

static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);

        if (likely(state->idev))
                state->idev = NULL;
        state->dev = NULL;
        rcu_read_unlock();
}

static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
{
        struct ifmcaddr6 *im = (struct ifmcaddr6 *)v;
        struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);

        seq_printf(seq,
                   "%-4d %-15s %pi6 %5d %08X %ld\n",
                   state->dev->ifindex, state->dev->name,
                   &im->mca_addr,
                   im->mca_users, im->mca_flags,
                   (im->mca_flags & MAF_TIMER_RUNNING) ?
                   jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0);
        return 0;
}

static const struct seq_operations igmp6_mc_seq_ops = {
        .start        =        igmp6_mc_seq_start,
        .next        =        igmp6_mc_seq_next,
        .stop        =        igmp6_mc_seq_stop,
        .show        =        igmp6_mc_seq_show,
};

struct igmp6_mcf_iter_state {
        struct seq_net_private p;
        struct net_device *dev;
        struct inet6_dev *idev;
        struct ifmcaddr6 *im;
};

#define igmp6_mcf_seq_private(seq)        ((struct igmp6_mcf_iter_state *)(seq)->private)

static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
{
        struct ip6_sf_list *psf = NULL;
        struct ifmcaddr6 *im = NULL;
        struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
        struct net *net = seq_file_net(seq);

        state->idev = NULL;
        state->im = NULL;
        for_each_netdev_rcu(net, state->dev) {
                struct inet6_dev *idev;
                idev = __in6_dev_get(state->dev);
                if (unlikely(idev == NULL))
                        continue;

                im = rcu_dereference(idev->mc_list);
                if (likely(im)) {
                        psf = rcu_dereference(im->mca_sources);
                        if (likely(psf)) {
                                state->im = im;
                                state->idev = idev;
                                break;
                        }
                }
        }
        return psf;
}

static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_sf_list *psf)
{
        struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);

        psf = rcu_dereference(psf->sf_next);
        while (!psf) {
                state->im = rcu_dereference(state->im->next);
                while (!state->im) {
                        state->dev = next_net_device_rcu(state->dev);
                        if (!state->dev) {
                                state->idev = NULL;
                                goto out;
                        }
                        state->idev = __in6_dev_get(state->dev);
                        if (!state->idev)
                                continue;
                        state->im = rcu_dereference(state->idev->mc_list);
                }
                psf = rcu_dereference(state->im->mca_sources);
        }
out:
        return psf;
}

static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
{
        struct ip6_sf_list *psf = igmp6_mcf_get_first(seq);
        if (psf)
                while (pos && (psf = igmp6_mcf_get_next(seq, psf)) != NULL)
                        --pos;
        return pos ? NULL : psf;
}

static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
{
        rcu_read_lock();
        return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}

static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        struct ip6_sf_list *psf;
        if (v == SEQ_START_TOKEN)
                psf = igmp6_mcf_get_first(seq);
        else
                psf = igmp6_mcf_get_next(seq, v);
        ++*pos;
        return psf;
}

static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
        __releases(RCU)
{
        struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);

        if (likely(state->im))
                state->im = NULL;
        if (likely(state->idev))
                state->idev = NULL;

        state->dev = NULL;
        rcu_read_unlock();
}

static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
{
        struct ip6_sf_list *psf = (struct ip6_sf_list *)v;
        struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);

        if (v == SEQ_START_TOKEN) {
                seq_puts(seq, "Idx Device                Multicast Address                   Source Address    INC    EXC\n");
        } else {
                seq_printf(seq,
                           "%3d %6.6s %pi6 %pi6 %6lu %6lu\n",
                           state->dev->ifindex, state->dev->name,
                           &state->im->mca_addr,
                           &psf->sf_addr,
                           psf->sf_count[MCAST_INCLUDE],
                           psf->sf_count[MCAST_EXCLUDE]);
        }
        return 0;
}

static const struct seq_operations igmp6_mcf_seq_ops = {
        .start        =        igmp6_mcf_seq_start,
        .next        =        igmp6_mcf_seq_next,
        .stop        =        igmp6_mcf_seq_stop,
        .show        =        igmp6_mcf_seq_show,
};

static int __net_init igmp6_proc_init(struct net *net)
{
        int err;

        err = -ENOMEM;
        if (!proc_create_net("igmp6", 0444, net->proc_net, &igmp6_mc_seq_ops,
                        sizeof(struct igmp6_mc_iter_state)))
                goto out;
        if (!proc_create_net("mcfilter6", 0444, net->proc_net,
                        &igmp6_mcf_seq_ops,
                        sizeof(struct igmp6_mcf_iter_state)))
                goto out_proc_net_igmp6;

        err = 0;
out:
        return err;

out_proc_net_igmp6:
        remove_proc_entry("igmp6", net->proc_net);
        goto out;
}

static void __net_exit igmp6_proc_exit(struct net *net)
{
        remove_proc_entry("mcfilter6", net->proc_net);
        remove_proc_entry("igmp6", net->proc_net);
}
#else
static inline int igmp6_proc_init(struct net *net)
{
        return 0;
}
static inline void igmp6_proc_exit(struct net *net)
{
}
#endif

static int __net_init igmp6_net_init(struct net *net)
{
        int err;

        err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
                                   SOCK_RAW, IPPROTO_ICMPV6, net);
        if (err < 0) {
                pr_err("Failed to initialize the IGMP6 control socket (err %d)\n",
                       err);
                goto out;
        }

        inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
        net->ipv6.igmp_sk->sk_allocation = GFP_KERNEL;

        err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6,
                                   SOCK_RAW, IPPROTO_ICMPV6, net);
        if (err < 0) {
                pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n",
                       err);
                goto out_sock_create;
        }

        err = igmp6_proc_init(net);
        if (err)
                goto out_sock_create_autojoin;

        return 0;

out_sock_create_autojoin:
        inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
out_sock_create:
        inet_ctl_sock_destroy(net->ipv6.igmp_sk);
out:
        return err;
}

static void __net_exit igmp6_net_exit(struct net *net)
{
        inet_ctl_sock_destroy(net->ipv6.igmp_sk);
        inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
        igmp6_proc_exit(net);
}

static struct pernet_operations igmp6_net_ops = {
        .init = igmp6_net_init,
        .exit = igmp6_net_exit,
};

int __init igmp6_init(void)
{
        int err;

        err = register_pernet_subsys(&igmp6_net_ops);
        if (err)
                return err;

        mld_wq = create_workqueue("mld");
        if (!mld_wq) {
                unregister_pernet_subsys(&igmp6_net_ops);
                return -ENOMEM;
        }

        return err;
}

int __init igmp6_late_init(void)
{
        return register_netdevice_notifier(&igmp6_netdev_notifier);
}

void igmp6_cleanup(void)
{
        unregister_pernet_subsys(&igmp6_net_ops);
        destroy_workqueue(mld_wq);
}

void igmp6_late_cleanup(void)
{
        unregister_netdevice_notifier(&igmp6_netdev_notifier);
}







































   18 


   18 

   18 



   18 




























    6 


    6 



    6 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// SPDX-License-Identifier: GPL-2.0
/*
 * module.c - module sysfs fun for drivers
 */
#include <linux/device.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/string.h>
#include "base.h"

static char *make_driver_name(struct device_driver *drv)
{
        char *driver_name;

        driver_name = kasprintf(GFP_KERNEL, "%s:%s", drv->bus->name, drv->name);
        if (!driver_name)
                return NULL;

        return driver_name;
}

static void module_create_drivers_dir(struct module_kobject *mk)
{
        static DEFINE_MUTEX(drivers_dir_mutex);

        mutex_lock(&drivers_dir_mutex);
        if (mk && !mk->drivers_dir)
                mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
        mutex_unlock(&drivers_dir_mutex);
}

void module_add_driver(struct module *mod, struct device_driver *drv)
{
        char *driver_name;
        int no_warn;
        struct module_kobject *mk = NULL;

        if (!drv)
                return;

        if (mod)
                mk = &mod->mkobj;
        else if (drv->mod_name) {
                struct kobject *mkobj;

                /* Lookup built-in module entry in /sys/modules */
                mkobj = kset_find_obj(module_kset, drv->mod_name);
                if (mkobj) {
                        mk = container_of(mkobj, struct module_kobject, kobj);
                        /* remember our module structure */
                        drv->p->mkobj = mk;
                        /* kset_find_obj took a reference */
                        kobject_put(mkobj);
                }
        }

        if (!mk)
                return;

        /* Don't check return codes; these calls are idempotent */
        no_warn = sysfs_create_link(&drv->p->kobj, &mk->kobj, "module");
        driver_name = make_driver_name(drv);
        if (driver_name) {
                module_create_drivers_dir(mk);
                no_warn = sysfs_create_link(mk->drivers_dir, &drv->p->kobj,
                                            driver_name);
                kfree(driver_name);
        }
}

void module_remove_driver(struct device_driver *drv)
{
        struct module_kobject *mk = NULL;
        char *driver_name;

        if (!drv)
                return;

        sysfs_remove_link(&drv->p->kobj, "module");

        if (drv->owner)
                mk = &drv->owner->mkobj;
        else if (drv->p->mkobj)
                mk = drv->p->mkobj;
        if (mk && mk->drivers_dir) {
                driver_name = make_driver_name(drv);
                if (driver_name) {
                        sysfs_remove_link(mk->drivers_dir, driver_name);
                        kfree(driver_name);
                }
        }
}


































































































































































































































































































































































































































































































































































    3 




    3 


    1 




    3 



















































































































































































































































































































































































































































































































































































































































































































































































































    3 



    3 


    3 












    3 









































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
// SPDX-License-Identifier: GPL-2.0
/*
 * ACPI helpers for GPIO API
 *
 * Copyright (C) 2012, Intel Corporation
 * Authors: Mathias Nyman <mathias.nyman@linux.intel.com>
 *          Mika Westerberg <mika.westerberg@linux.intel.com>
 */

#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/mutex.h>
#include <linux/pinctrl/pinctrl.h>

#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>

#include "gpiolib.h"
#include "gpiolib-acpi.h"

static int run_edge_events_on_boot = -1;
module_param(run_edge_events_on_boot, int, 0444);
MODULE_PARM_DESC(run_edge_events_on_boot,
                 "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto");

static char *ignore_wake;
module_param(ignore_wake, charp, 0444);
MODULE_PARM_DESC(ignore_wake,
                 "controller@pin combos on which to ignore the ACPI wake flag "
                 "ignore_wake=controller@pin[,controller@pin[,...]]");

static char *ignore_interrupt;
module_param(ignore_interrupt, charp, 0444);
MODULE_PARM_DESC(ignore_interrupt,
                 "controller@pin combos on which to ignore interrupt "
                 "ignore_interrupt=controller@pin[,controller@pin[,...]]");

struct acpi_gpiolib_dmi_quirk {
        bool no_edge_events_on_boot;
        char *ignore_wake;
        char *ignore_interrupt;
};

/**
 * struct acpi_gpio_event - ACPI GPIO event handler data
 *
 * @node:          list-entry of the events list of the struct acpi_gpio_chip
 * @handle:          handle of ACPI method to execute when the IRQ triggers
 * @handler:          handler function to pass to request_irq() when requesting the IRQ
 * @pin:          GPIO pin number on the struct gpio_chip
 * @irq:          Linux IRQ number for the event, for request_irq() / free_irq()
 * @irqflags:          flags to pass to request_irq() when requesting the IRQ
 * @irq_is_wake:  If the ACPI flags indicate the IRQ is a wakeup source
 * @irq_requested:True if request_irq() has been done
 * @desc:          struct gpio_desc for the GPIO pin for this event
 */
struct acpi_gpio_event {
        struct list_head node;
        acpi_handle handle;
        irq_handler_t handler;
        unsigned int pin;
        unsigned int irq;
        unsigned long irqflags;
        bool irq_is_wake;
        bool irq_requested;
        struct gpio_desc *desc;
};

struct acpi_gpio_connection {
        struct list_head node;
        unsigned int pin;
        struct gpio_desc *desc;
};

struct acpi_gpio_chip {
        /*
         * ACPICA requires that the first field of the context parameter
         * passed to acpi_install_address_space_handler() is large enough
         * to hold struct acpi_connection_info.
         */
        struct acpi_connection_info conn_info;
        struct list_head conns;
        struct mutex conn_lock;
        struct gpio_chip *chip;
        struct list_head events;
        struct list_head deferred_req_irqs_list_entry;
};

/**
 * struct acpi_gpio_info - ACPI GPIO specific information
 * @adev: reference to ACPI device which consumes GPIO resource
 * @flags: GPIO initialization flags
 * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
 * @pin_config: pin bias as provided by ACPI
 * @polarity: interrupt polarity as provided by ACPI
 * @triggering: triggering type as provided by ACPI
 * @wake_capable: wake capability as provided by ACPI
 * @debounce: debounce timeout as provided by ACPI
 * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping
 */
struct acpi_gpio_info {
        struct acpi_device *adev;
        enum gpiod_flags flags;
        bool gpioint;
        int pin_config;
        int polarity;
        int triggering;
        bool wake_capable;
        unsigned int debounce;
        unsigned int quirks;
};

/*
 * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init
 * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a
 * late_initcall_sync() handler, so that other builtin drivers can register their
 * OpRegions before the event handlers can run. This list contains GPIO chips
 * for which the acpi_gpiochip_request_irqs() call has been deferred.
 */
static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock);
static LIST_HEAD(acpi_gpio_deferred_req_irqs_list);
static bool acpi_gpio_deferred_req_irqs_done;

static int acpi_gpiochip_find(struct gpio_chip *gc, const void *data)
{
        return device_match_acpi_handle(&gc->gpiodev->dev, data);
}

/**
 * acpi_get_gpiod() - Translate ACPI GPIO pin to GPIO descriptor usable with GPIO API
 * @path:        ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1")
 * @pin:        ACPI GPIO pin number (0-based, controller-relative)
 *
 * Return: GPIO descriptor to use with Linux generic GPIO API, or ERR_PTR
 * error value. Specifically returns %-EPROBE_DEFER if the referenced GPIO
 * controller does not have GPIO chip registered at the moment. This is to
 * support probe deferral.
 */
static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin)
{
        acpi_handle handle;
        acpi_status status;

        status = acpi_get_handle(NULL, path, &handle);
        if (ACPI_FAILURE(status))
                return ERR_PTR(-ENODEV);

        struct gpio_device *gdev __free(gpio_device_put) =
                                gpio_device_find(handle, acpi_gpiochip_find);
        if (!gdev)
                return ERR_PTR(-EPROBE_DEFER);

        /*
         * FIXME: keep track of the reference to the GPIO device somehow
         * instead of putting it here.
         */
        return gpio_device_get_desc(gdev, pin);
}

static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
{
        struct acpi_gpio_event *event = data;

        acpi_evaluate_object(event->handle, NULL, NULL, NULL);

        return IRQ_HANDLED;
}

static irqreturn_t acpi_gpio_irq_handler_evt(int irq, void *data)
{
        struct acpi_gpio_event *event = data;

        acpi_execute_simple_method(event->handle, NULL, event->pin);

        return IRQ_HANDLED;
}

static void acpi_gpio_chip_dh(acpi_handle handle, void *data)
{
        /* The address of this function is used as a key. */
}

bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
                                struct acpi_resource_gpio **agpio)
{
        struct acpi_resource_gpio *gpio;

        if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
                return false;

        gpio = &ares->data.gpio;
        if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_INT)
                return false;

        *agpio = gpio;
        return true;
}
EXPORT_SYMBOL_GPL(acpi_gpio_get_irq_resource);

/**
 * acpi_gpio_get_io_resource - Fetch details of an ACPI resource if it is a GPIO
 *                               I/O resource or return False if not.
 * @ares:        Pointer to the ACPI resource to fetch
 * @agpio:        Pointer to a &struct acpi_resource_gpio to store the output pointer
 */
bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
                               struct acpi_resource_gpio **agpio)
{
        struct acpi_resource_gpio *gpio;

        if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
                return false;

        gpio = &ares->data.gpio;
        if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_IO)
                return false;

        *agpio = gpio;
        return true;
}
EXPORT_SYMBOL_GPL(acpi_gpio_get_io_resource);

static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio,
                                      struct acpi_gpio_event *event)
{
        struct device *parent = acpi_gpio->chip->parent;
        int ret, value;

        ret = request_threaded_irq(event->irq, NULL, event->handler,
                                   event->irqflags | IRQF_ONESHOT, "ACPI:Event", event);
        if (ret) {
                dev_err(parent, "Failed to setup interrupt handler for %d\n", event->irq);
                return;
        }

        if (event->irq_is_wake)
                enable_irq_wake(event->irq);

        event->irq_requested = true;

        /* Make sure we trigger the initial state of edge-triggered IRQs */
        if (run_edge_events_on_boot &&
            (event->irqflags & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING))) {
                value = gpiod_get_raw_value_cansleep(event->desc);
                if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) ||
                    ((event->irqflags & IRQF_TRIGGER_FALLING) && value == 0))
                        event->handler(event->irq, event);
        }
}

static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio)
{
        struct acpi_gpio_event *event;

        list_for_each_entry(event, &acpi_gpio->events, node)
                acpi_gpiochip_request_irq(acpi_gpio, event);
}

static enum gpiod_flags
acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity)
{
        /* GpioInt() implies input configuration */
        if (agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
                return GPIOD_IN;

        switch (agpio->io_restriction) {
        case ACPI_IO_RESTRICT_INPUT:
                return GPIOD_IN;
        case ACPI_IO_RESTRICT_OUTPUT:
                /*
                 * ACPI GPIO resources don't contain an initial value for the
                 * GPIO. Therefore we deduce that value from the pull field
                 * and the polarity instead. If the pin is pulled up we assume
                 * default to be high, if it is pulled down we assume default
                 * to be low, otherwise we leave pin untouched. For active low
                 * polarity values will be switched. See also
                 * Documentation/firmware-guide/acpi/gpio-properties.rst.
                 */
                switch (agpio->pin_config) {
                case ACPI_PIN_CONFIG_PULLUP:
                        return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
                case ACPI_PIN_CONFIG_PULLDOWN:
                        return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
                default:
                        break;
                }
                break;
        default:
                break;
        }

        /*
         * Assume that the BIOS has configured the direction and pull
         * accordingly.
         */
        return GPIOD_ASIS;
}

static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
                                                struct acpi_resource_gpio *agpio,
                                                unsigned int index,
                                                const char *label)
{
        int polarity = GPIO_ACTIVE_HIGH;
        enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity);
        unsigned int pin = agpio->pin_table[index];
        struct gpio_desc *desc;
        int ret;

        desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags);
        if (IS_ERR(desc))
                return desc;

        /* ACPI uses hundredths of milliseconds units */
        ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10);
        if (ret)
                dev_warn(chip->parent,
                         "Failed to set debounce-timeout for pin 0x%04X, err %d\n",
                         pin, ret);

        return desc;
}

static bool acpi_gpio_in_ignore_list(const char *ignore_list, const char *controller_in,
                                     unsigned int pin_in)
{
        const char *controller, *pin_str;
        unsigned int pin;
        char *endp;
        int len;

        controller = ignore_list;
        while (controller) {
                pin_str = strchr(controller, '@');
                if (!pin_str)
                        goto err;

                len = pin_str - controller;
                if (len == strlen(controller_in) &&
                    strncmp(controller, controller_in, len) == 0) {
                        pin = simple_strtoul(pin_str + 1, &endp, 10);
                        if (*endp != 0 && *endp != ',')
                                goto err;

                        if (pin == pin_in)
                                return true;
                }

                controller = strchr(controller, ',');
                if (controller)
                        controller++;
        }

        return false;
err:
        pr_err_once("Error: Invalid value for gpiolib_acpi.ignore_...: %s\n", ignore_list);
        return false;
}

static bool acpi_gpio_irq_is_wake(struct device *parent,
                                  const struct acpi_resource_gpio *agpio)
{
        unsigned int pin = agpio->pin_table[0];

        if (agpio->wake_capable != ACPI_WAKE_CAPABLE)
                return false;

        if (acpi_gpio_in_ignore_list(ignore_wake, dev_name(parent), pin)) {
                dev_info(parent, "Ignoring wakeup on pin %u\n", pin);
                return false;
        }

        return true;
}

/* Always returns AE_OK so that we keep looping over the resources */
static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
                                             void *context)
{
        struct acpi_gpio_chip *acpi_gpio = context;
        struct gpio_chip *chip = acpi_gpio->chip;
        struct acpi_resource_gpio *agpio;
        acpi_handle handle, evt_handle;
        struct acpi_gpio_event *event;
        irq_handler_t handler = NULL;
        struct gpio_desc *desc;
        unsigned int pin;
        int ret, irq;

        if (!acpi_gpio_get_irq_resource(ares, &agpio))
                return AE_OK;

        handle = ACPI_HANDLE(chip->parent);
        pin = agpio->pin_table[0];

        if (pin <= 255) {
                char ev_name[8];
                sprintf(ev_name, "_%c%02X",
                        agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L',
                        pin);
                if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle)))
                        handler = acpi_gpio_irq_handler;
        }
        if (!handler) {
                if (ACPI_SUCCESS(acpi_get_handle(handle, "_EVT", &evt_handle)))
                        handler = acpi_gpio_irq_handler_evt;
        }
        if (!handler)
                return AE_OK;

        if (acpi_gpio_in_ignore_list(ignore_interrupt, dev_name(chip->parent), pin)) {
                dev_info(chip->parent, "Ignoring interrupt on pin %u\n", pin);
                return AE_OK;
        }

        desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event");
        if (IS_ERR(desc)) {
                dev_err(chip->parent,
                        "Failed to request GPIO for pin 0x%04X, err %ld\n",
                        pin, PTR_ERR(desc));
                return AE_OK;
        }

        ret = gpiochip_lock_as_irq(chip, pin);
        if (ret) {
                dev_err(chip->parent,
                        "Failed to lock GPIO pin 0x%04X as interrupt, err %d\n",
                        pin, ret);
                goto fail_free_desc;
        }

        irq = gpiod_to_irq(desc);
        if (irq < 0) {
                dev_err(chip->parent,
                        "Failed to translate GPIO pin 0x%04X to IRQ, err %d\n",
                        pin, irq);
                goto fail_unlock_irq;
        }

        event = kzalloc(sizeof(*event), GFP_KERNEL);
        if (!event)
                goto fail_unlock_irq;

        event->irqflags = IRQF_ONESHOT;
        if (agpio->triggering == ACPI_LEVEL_SENSITIVE) {
                if (agpio->polarity == ACPI_ACTIVE_HIGH)
                        event->irqflags |= IRQF_TRIGGER_HIGH;
                else
                        event->irqflags |= IRQF_TRIGGER_LOW;
        } else {
                switch (agpio->polarity) {
                case ACPI_ACTIVE_HIGH:
                        event->irqflags |= IRQF_TRIGGER_RISING;
                        break;
                case ACPI_ACTIVE_LOW:
                        event->irqflags |= IRQF_TRIGGER_FALLING;
                        break;
                default:
                        event->irqflags |= IRQF_TRIGGER_RISING |
                                           IRQF_TRIGGER_FALLING;
                        break;
                }
        }

        event->handle = evt_handle;
        event->handler = handler;
        event->irq = irq;
        event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio);
        event->pin = pin;
        event->desc = desc;

        list_add_tail(&event->node, &acpi_gpio->events);

        return AE_OK;

fail_unlock_irq:
        gpiochip_unlock_as_irq(chip, pin);
fail_free_desc:
        gpiochip_free_own_desc(desc);

        return AE_OK;
}

/**
 * acpi_gpiochip_request_interrupts() - Register isr for gpio chip ACPI events
 * @chip:      GPIO chip
 *
 * ACPI5 platforms can use GPIO signaled ACPI events. These GPIO interrupts are
 * handled by ACPI event methods which need to be called from the GPIO
 * chip's interrupt handler. acpi_gpiochip_request_interrupts() finds out which
 * GPIO pins have ACPI event methods and assigns interrupt handlers that calls
 * the ACPI event methods for those pins.
 */
void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
{
        struct acpi_gpio_chip *acpi_gpio;
        acpi_handle handle;
        acpi_status status;
        bool defer;

        if (!chip->parent || !chip->to_irq)
                return;

        handle = ACPI_HANDLE(chip->parent);
        if (!handle)
                return;

        status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio);
        if (ACPI_FAILURE(status))
                return;

        if (acpi_quirk_skip_gpio_event_handlers())
                return;

        acpi_walk_resources(handle, METHOD_NAME__AEI,
                            acpi_gpiochip_alloc_event, acpi_gpio);

        mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
        defer = !acpi_gpio_deferred_req_irqs_done;
        if (defer)
                list_add(&acpi_gpio->deferred_req_irqs_list_entry,
                         &acpi_gpio_deferred_req_irqs_list);
        mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);

        if (defer)
                return;

        acpi_gpiochip_request_irqs(acpi_gpio);
}
EXPORT_SYMBOL_GPL(acpi_gpiochip_request_interrupts);

/**
 * acpi_gpiochip_free_interrupts() - Free GPIO ACPI event interrupts.
 * @chip:      GPIO chip
 *
 * Free interrupts associated with GPIO ACPI event method for the given
 * GPIO chip.
 */
void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
{
        struct acpi_gpio_chip *acpi_gpio;
        struct acpi_gpio_event *event, *ep;
        acpi_handle handle;
        acpi_status status;

        if (!chip->parent || !chip->to_irq)
                return;

        handle = ACPI_HANDLE(chip->parent);
        if (!handle)
                return;

        status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio);
        if (ACPI_FAILURE(status))
                return;

        mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
        if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry))
                list_del_init(&acpi_gpio->deferred_req_irqs_list_entry);
        mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);

        list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
                if (event->irq_requested) {
                        if (event->irq_is_wake)
                                disable_irq_wake(event->irq);

                        free_irq(event->irq, event);
                }

                gpiochip_unlock_as_irq(chip, event->pin);
                gpiochip_free_own_desc(event->desc);
                list_del(&event->node);
                kfree(event);
        }
}
EXPORT_SYMBOL_GPL(acpi_gpiochip_free_interrupts);

int acpi_dev_add_driver_gpios(struct acpi_device *adev,
                              const struct acpi_gpio_mapping *gpios)
{
        if (adev && gpios) {
                adev->driver_gpios = gpios;
                return 0;
        }
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(acpi_dev_add_driver_gpios);

void acpi_dev_remove_driver_gpios(struct acpi_device *adev)
{
        if (adev)
                adev->driver_gpios = NULL;
}
EXPORT_SYMBOL_GPL(acpi_dev_remove_driver_gpios);

static void acpi_dev_release_driver_gpios(void *adev)
{
        acpi_dev_remove_driver_gpios(adev);
}

int devm_acpi_dev_add_driver_gpios(struct device *dev,
                                   const struct acpi_gpio_mapping *gpios)
{
        struct acpi_device *adev = ACPI_COMPANION(dev);
        int ret;

        ret = acpi_dev_add_driver_gpios(adev, gpios);
        if (ret)
                return ret;

        return devm_add_action_or_reset(dev, acpi_dev_release_driver_gpios, adev);
}
EXPORT_SYMBOL_GPL(devm_acpi_dev_add_driver_gpios);

static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
                                      const char *name, int index,
                                      struct fwnode_reference_args *args,
                                      unsigned int *quirks)
{
        const struct acpi_gpio_mapping *gm;

        if (!adev || !adev->driver_gpios)
                return false;

        for (gm = adev->driver_gpios; gm->name; gm++)
                if (!strcmp(name, gm->name) && gm->data && index < gm->size) {
                        const struct acpi_gpio_params *par = gm->data + index;

                        args->fwnode = acpi_fwnode_handle(adev);
                        args->args[0] = par->crs_entry_index;
                        args->args[1] = par->line_index;
                        args->args[2] = par->active_low;
                        args->nargs = 3;

                        *quirks = gm->quirks;
                        return true;
                }

        return false;
}

static int
__acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update)
{
        const enum gpiod_flags mask =
                GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT |
                GPIOD_FLAGS_BIT_DIR_VAL;
        int ret = 0;

        /*
         * Check if the BIOS has IoRestriction with explicitly set direction
         * and update @flags accordingly. Otherwise use whatever caller asked
         * for.
         */
        if (update & GPIOD_FLAGS_BIT_DIR_SET) {
                enum gpiod_flags diff = *flags ^ update;

                /*
                 * Check if caller supplied incompatible GPIO initialization
                 * flags.
                 *
                 * Return %-EINVAL to notify that firmware has different
                 * settings and we are going to use them.
                 */
                if (((*flags & GPIOD_FLAGS_BIT_DIR_SET) && (diff & GPIOD_FLAGS_BIT_DIR_OUT)) ||
                    ((*flags & GPIOD_FLAGS_BIT_DIR_OUT) && (diff & GPIOD_FLAGS_BIT_DIR_VAL)))
                        ret = -EINVAL;
                *flags = (*flags & ~mask) | (update & mask);
        }
        return ret;
}

static int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags,
                                        struct acpi_gpio_info *info)
{
        struct device *dev = &info->adev->dev;
        enum gpiod_flags old = *flags;
        int ret;

        ret = __acpi_gpio_update_gpiod_flags(&old, info->flags);
        if (info->quirks & ACPI_GPIO_QUIRK_NO_IO_RESTRICTION) {
                if (ret)
                        dev_warn(dev, FW_BUG "GPIO not in correct mode, fixing\n");
        } else {
                if (ret)
                        dev_dbg(dev, "Override GPIO initialization flags\n");
                *flags = old;
        }

        return ret;
}

static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
                                               struct acpi_gpio_info *info)
{
        switch (info->pin_config) {
        case ACPI_PIN_CONFIG_PULLUP:
                *lookupflags |= GPIO_PULL_UP;
                break;
        case ACPI_PIN_CONFIG_PULLDOWN:
                *lookupflags |= GPIO_PULL_DOWN;
                break;
        case ACPI_PIN_CONFIG_NOPULL:
                *lookupflags |= GPIO_PULL_DISABLE;
                break;
        default:
                break;
        }

        if (info->polarity == GPIO_ACTIVE_LOW)
                *lookupflags |= GPIO_ACTIVE_LOW;

        return 0;
}

struct acpi_gpio_lookup {
        struct acpi_gpio_info info;
        int index;
        u16 pin_index;
        bool active_low;
        struct gpio_desc *desc;
        int n;
};

static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
{
        struct acpi_gpio_lookup *lookup = data;

        if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
                return 1;

        if (!lookup->desc) {
                const struct acpi_resource_gpio *agpio = &ares->data.gpio;
                bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
                struct gpio_desc *desc;
                u16 pin_index;

                if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
                        lookup->index++;

                if (lookup->n++ != lookup->index)
                        return 1;

                pin_index = lookup->pin_index;
                if (pin_index >= agpio->pin_table_length)
                        return 1;

                if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER)
                        desc = gpio_to_desc(agpio->pin_table[pin_index]);
                else
                        desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
                                              agpio->pin_table[pin_index]);
                lookup->desc = desc;
                lookup->info.pin_config = agpio->pin_config;
                lookup->info.debounce = agpio->debounce_timeout;
                lookup->info.gpioint = gpioint;
                lookup->info.wake_capable = acpi_gpio_irq_is_wake(&lookup->info.adev->dev, agpio);

                /*
                 * Polarity and triggering are only specified for GpioInt
                 * resource.
                 * Note: we expect here:
                 * - ACPI_ACTIVE_LOW == GPIO_ACTIVE_LOW
                 * - ACPI_ACTIVE_HIGH == GPIO_ACTIVE_HIGH
                 */
                if (lookup->info.gpioint) {
                        lookup->info.polarity = agpio->polarity;
                        lookup->info.triggering = agpio->triggering;
                } else {
                        lookup->info.polarity = lookup->active_low;
                }

                lookup->info.flags = acpi_gpio_to_gpiod_flags(agpio, lookup->info.polarity);
        }

        return 1;
}

static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup,
                                     struct acpi_gpio_info *info)
{
        struct acpi_device *adev = lookup->info.adev;
        struct list_head res_list;
        int ret;

        INIT_LIST_HEAD(&res_list);

        ret = acpi_dev_get_resources(adev, &res_list,
                                     acpi_populate_gpio_lookup,
                                     lookup);
        if (ret < 0)
                return ret;

        acpi_dev_free_resource_list(&res_list);

        if (!lookup->desc)
                return -ENOENT;

        if (info)
                *info = lookup->info;
        return 0;
}

static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
                                     const char *propname, int index,
                                     struct acpi_gpio_lookup *lookup)
{
        struct fwnode_reference_args args;
        unsigned int quirks = 0;
        int ret;

        memset(&args, 0, sizeof(args));
        ret = __acpi_node_get_property_reference(fwnode, propname, index, 3,
                                                 &args);
        if (ret) {
                struct acpi_device *adev;

                adev = to_acpi_device_node(fwnode);
                if (!acpi_get_driver_gpio_data(adev, propname, index, &args, &quirks))
                        return ret;
        }
        /*
         * The property was found and resolved, so need to lookup the GPIO based
         * on returned args.
         */
        if (!to_acpi_device_node(args.fwnode))
                return -EINVAL;
        if (args.nargs != 3)
                return -EPROTO;

        lookup->index = args.args[0];
        lookup->pin_index = args.args[1];
        lookup->active_low = !!args.args[2];

        lookup->info.adev = to_acpi_device_node(args.fwnode);
        lookup->info.quirks = quirks;

        return 0;
}

/**
 * acpi_get_gpiod_by_index() - get a GPIO descriptor from device resources
 * @adev: pointer to a ACPI device to get GPIO from
 * @propname: Property name of the GPIO (optional)
 * @index: index of GpioIo/GpioInt resource (starting from %0)
 * @info: info pointer to fill in (optional)
 *
 * Function goes through ACPI resources for @adev and based on @index looks
 * up a GpioIo/GpioInt resource, translates it to the Linux GPIO descriptor,
 * and returns it. @index matches GpioIo/GpioInt resources only so if there
 * are total %3 GPIO resources, the index goes from %0 to %2.
 *
 * If @propname is specified the GPIO is looked using device property. In
 * that case @index is used to select the GPIO entry in the property value
 * (in case of multiple).
 *
 * If the GPIO cannot be translated or there is an error, an ERR_PTR is
 * returned.
 *
 * Note: if the GPIO resource has multiple entries in the pin list, this
 * function only returns the first.
 */
static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
                                                 const char *propname,
                                                 int index,
                                                 struct acpi_gpio_info *info)
{
        struct acpi_gpio_lookup lookup;
        int ret;

        if (!adev)
                return ERR_PTR(-ENODEV);

        memset(&lookup, 0, sizeof(lookup));
        lookup.index = index;

        if (propname) {
                dev_dbg(&adev->dev, "GPIO: looking up %s\n", propname);

                ret = acpi_gpio_property_lookup(acpi_fwnode_handle(adev),
                                                propname, index, &lookup);
                if (ret)
                        return ERR_PTR(ret);

                dev_dbg(&adev->dev, "GPIO: _DSD returned %s %d %u %u\n",
                        dev_name(&lookup.info.adev->dev), lookup.index,
                        lookup.pin_index, lookup.active_low);
        } else {
                dev_dbg(&adev->dev, "GPIO: looking up %d in _CRS\n", index);
                lookup.info.adev = adev;
        }

        ret = acpi_gpio_resource_lookup(&lookup, info);
        return ret ? ERR_PTR(ret) : lookup.desc;
}

/**
 * acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node
 * @fwnode: pointer to an ACPI firmware node to get the GPIO information from
 * @propname: Property name of the GPIO
 * @index: index of GpioIo/GpioInt resource (starting from %0)
 * @info: info pointer to fill in (optional)
 *
 * This function uses the property-based GPIO lookup to get to the GPIO
 * resource with the relevant information from a data-only ACPI firmware node
 * and uses that to obtain the GPIO descriptor to return.
 *
 * If the GPIO cannot be translated or there is an error an ERR_PTR is
 * returned.
 */
static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode,
                                                  const char *propname,
                                                  int index,
                                                  struct acpi_gpio_info *info)
{
        struct acpi_gpio_lookup lookup;
        int ret;

        if (!is_acpi_data_node(fwnode))
                return ERR_PTR(-ENODEV);

        if (!propname)
                return ERR_PTR(-EINVAL);

        memset(&lookup, 0, sizeof(lookup));
        lookup.index = index;

        ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup);
        if (ret)
                return ERR_PTR(ret);

        ret = acpi_gpio_resource_lookup(&lookup, info);
        return ret ? ERR_PTR(ret) : lookup.desc;
}

static bool acpi_can_fallback_to_crs(struct acpi_device *adev,
                                     const char *con_id)
{
        /* Never allow fallback if the device has properties */
        if (acpi_dev_has_props(adev) || adev->driver_gpios)
                return false;

        return con_id == NULL;
}

struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
                                 const char *con_id,
                                 unsigned int idx,
                                 enum gpiod_flags *dflags,
                                 unsigned long *lookupflags)
{
        struct acpi_device *adev = to_acpi_device_node(fwnode);
        struct acpi_gpio_info info;
        struct gpio_desc *desc;
        char propname[32];
        int i;

        /* Try first from _DSD */
        for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
                if (con_id) {
                        snprintf(propname, sizeof(propname), "%s-%s",
                                 con_id, gpio_suffixes[i]);
                } else {
                        snprintf(propname, sizeof(propname), "%s",
                                 gpio_suffixes[i]);
                }

                if (adev)
                        desc = acpi_get_gpiod_by_index(adev,
                                                       propname, idx, &info);
                else
                        desc = acpi_get_gpiod_from_data(fwnode,
                                                        propname, idx, &info);
                if (!IS_ERR(desc))
                        break;
                if (PTR_ERR(desc) == -EPROBE_DEFER)
                        return ERR_CAST(desc);
        }

        /* Then from plain _CRS GPIOs */
        if (IS_ERR(desc)) {
                if (!adev || !acpi_can_fallback_to_crs(adev, con_id))
                        return ERR_PTR(-ENOENT);

                desc = acpi_get_gpiod_by_index(adev, NULL, idx, &info);
                if (IS_ERR(desc))
                        return desc;
        }

        if (info.gpioint &&
            (*dflags == GPIOD_OUT_LOW || *dflags == GPIOD_OUT_HIGH)) {
                dev_dbg(&adev->dev, "refusing GpioInt() entry when doing GPIOD_OUT_* lookup\n");
                return ERR_PTR(-ENOENT);
        }

        acpi_gpio_update_gpiod_flags(dflags, &info);
        acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info);
        return desc;
}

/**
 * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number
 * @adev: pointer to a ACPI device to get IRQ from
 * @name: optional name of GpioInt resource
 * @index: index of GpioInt resource (starting from %0)
 * @wake_capable: Set to true if the IRQ is wake capable
 *
 * If the device has one or more GpioInt resources, this function can be
 * used to translate from the GPIO offset in the resource to the Linux IRQ
 * number.
 *
 * The function is idempotent, though each time it runs it will configure GPIO
 * pin direction according to the flags in GpioInt resource.
 *
 * The function takes optional @name parameter. If the resource has a property
 * name, then only those will be taken into account.
 *
 * The GPIO is considered wake capable if the GpioInt resource specifies
 * SharedAndWake or ExclusiveAndWake.
 *
 * Return: Linux IRQ number (> %0) on success, negative errno on failure.
 */
int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index,
                                  bool *wake_capable)
{
        int idx, i;
        unsigned int irq_flags;
        int ret;

        for (i = 0, idx = 0; idx <= index; i++) {
                struct acpi_gpio_info info;
                struct gpio_desc *desc;

                desc = acpi_get_gpiod_by_index(adev, name, i, &info);

                /* Ignore -EPROBE_DEFER, it only matters if idx matches */
                if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
                        return PTR_ERR(desc);

                if (info.gpioint && idx++ == index) {
                        unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
                        enum gpiod_flags dflags = GPIOD_ASIS;
                        char label[32];
                        int irq;

                        if (IS_ERR(desc))
                                return PTR_ERR(desc);

                        irq = gpiod_to_irq(desc);
                        if (irq < 0)
                                return irq;

                        acpi_gpio_update_gpiod_flags(&dflags, &info);
                        acpi_gpio_update_gpiod_lookup_flags(&lflags, &info);

                        snprintf(label, sizeof(label), "GpioInt() %d", index);
                        ret = gpiod_configure_flags(desc, label, lflags, dflags);
                        if (ret < 0)
                                return ret;

                        /* ACPI uses hundredths of milliseconds units */
                        ret = gpio_set_debounce_timeout(desc, info.debounce * 10);
                        if (ret)
                                return ret;

                        irq_flags = acpi_dev_get_irq_type(info.triggering,
                                                          info.polarity);

                        /*
                         * If the IRQ is not already in use then set type
                         * if specified and different than the current one.
                         */
                        if (can_request_irq(irq, irq_flags)) {
                                if (irq_flags != IRQ_TYPE_NONE &&
                                    irq_flags != irq_get_trigger_type(irq))
                                        irq_set_irq_type(irq, irq_flags);
                        } else {
                                dev_dbg(&adev->dev, "IRQ %d already in use\n", irq);
                        }

                        /* avoid suspend issues with GPIOs when systems are using S3 */
                        if (wake_capable && acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
                                *wake_capable = info.wake_capable;

                        return irq;
                }

        }
        return -ENOENT;
}
EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_wake_get_by);

static acpi_status
acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
                            u32 bits, u64 *value, void *handler_context,
                            void *region_context)
{
        struct acpi_gpio_chip *achip = region_context;
        struct gpio_chip *chip = achip->chip;
        struct acpi_resource_gpio *agpio;
        struct acpi_resource *ares;
        u16 pin_index = address;
        acpi_status status;
        int length;
        int i;

        status = acpi_buffer_to_resource(achip->conn_info.connection,
                                         achip->conn_info.length, &ares);
        if (ACPI_FAILURE(status))
                return status;

        if (WARN_ON(ares->type != ACPI_RESOURCE_TYPE_GPIO)) {
                ACPI_FREE(ares);
                return AE_BAD_PARAMETER;
        }

        agpio = &ares->data.gpio;

        if (WARN_ON(agpio->io_restriction == ACPI_IO_RESTRICT_INPUT &&
            function == ACPI_WRITE)) {
                ACPI_FREE(ares);
                return AE_BAD_PARAMETER;
        }

        length = min_t(u16, agpio->pin_table_length, pin_index + bits);
        for (i = pin_index; i < length; ++i) {
                unsigned int pin = agpio->pin_table[i];
                struct acpi_gpio_connection *conn;
                struct gpio_desc *desc;
                bool found;

                mutex_lock(&achip->conn_lock);

                found = false;
                list_for_each_entry(conn, &achip->conns, node) {
                        if (conn->pin == pin) {
                                found = true;
                                desc = conn->desc;
                                break;
                        }
                }

                /*
                 * The same GPIO can be shared between operation region and
                 * event but only if the access here is ACPI_READ. In that
                 * case we "borrow" the event GPIO instead.
                 */
                if (!found && agpio->shareable == ACPI_SHARED &&
                     function == ACPI_READ) {
                        struct acpi_gpio_event *event;

                        list_for_each_entry(event, &achip->events, node) {
                                if (event->pin == pin) {
                                        desc = event->desc;
                                        found = true;
                                        break;
                                }
                        }
                }

                if (!found) {
                        desc = acpi_request_own_gpiod(chip, agpio, i, "ACPI:OpRegion");
                        if (IS_ERR(desc)) {
                                mutex_unlock(&achip->conn_lock);
                                status = AE_ERROR;
                                goto out;
                        }

                        conn = kzalloc(sizeof(*conn), GFP_KERNEL);
                        if (!conn) {
                                gpiochip_free_own_desc(desc);
                                mutex_unlock(&achip->conn_lock);
                                status = AE_NO_MEMORY;
                                goto out;
                        }

                        conn->pin = pin;
                        conn->desc = desc;
                        list_add_tail(&conn->node, &achip->conns);
                }

                mutex_unlock(&achip->conn_lock);

                if (function == ACPI_WRITE)
                        gpiod_set_raw_value_cansleep(desc, !!(*value & BIT(i)));
                else
                        *value |= (u64)gpiod_get_raw_value_cansleep(desc) << i;
        }

out:
        ACPI_FREE(ares);
        return status;
}

static void acpi_gpiochip_request_regions(struct acpi_gpio_chip *achip)
{
        struct gpio_chip *chip = achip->chip;
        acpi_handle handle = ACPI_HANDLE(chip->parent);
        acpi_status status;

        INIT_LIST_HEAD(&achip->conns);
        mutex_init(&achip->conn_lock);
        status = acpi_install_address_space_handler(handle, ACPI_ADR_SPACE_GPIO,
                                                    acpi_gpio_adr_space_handler,
                                                    NULL, achip);
        if (ACPI_FAILURE(status))
                dev_err(chip->parent,
                        "Failed to install GPIO OpRegion handler\n");
}

static void acpi_gpiochip_free_regions(struct acpi_gpio_chip *achip)
{
        struct gpio_chip *chip = achip->chip;
        acpi_handle handle = ACPI_HANDLE(chip->parent);
        struct acpi_gpio_connection *conn, *tmp;
        acpi_status status;

        status = acpi_remove_address_space_handler(handle, ACPI_ADR_SPACE_GPIO,
                                                   acpi_gpio_adr_space_handler);
        if (ACPI_FAILURE(status)) {
                dev_err(chip->parent,
                        "Failed to remove GPIO OpRegion handler\n");
                return;
        }

        list_for_each_entry_safe_reverse(conn, tmp, &achip->conns, node) {
                gpiochip_free_own_desc(conn->desc);
                list_del(&conn->node);
                kfree(conn);
        }
}

static struct gpio_desc *
acpi_gpiochip_parse_own_gpio(struct acpi_gpio_chip *achip,
                             struct fwnode_handle *fwnode,
                             const char **name,
                             unsigned long *lflags,
                             enum gpiod_flags *dflags)
{
        struct gpio_chip *chip = achip->chip;
        struct gpio_desc *desc;
        u32 gpios[2];
        int ret;

        *lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
        *dflags = GPIOD_ASIS;
        *name = NULL;

        ret = fwnode_property_read_u32_array(fwnode, "gpios", gpios,
                                             ARRAY_SIZE(gpios));
        if (ret < 0)
                return ERR_PTR(ret);

        desc = gpiochip_get_desc(chip, gpios[0]);
        if (IS_ERR(desc))
                return desc;

        if (gpios[1])
                *lflags |= GPIO_ACTIVE_LOW;

        if (fwnode_property_present(fwnode, "input"))
                *dflags |= GPIOD_IN;
        else if (fwnode_property_present(fwnode, "output-low"))
                *dflags |= GPIOD_OUT_LOW;
        else if (fwnode_property_present(fwnode, "output-high"))
                *dflags |= GPIOD_OUT_HIGH;
        else
                return ERR_PTR(-EINVAL);

        fwnode_property_read_string(fwnode, "line-name", name);

        return desc;
}

static void acpi_gpiochip_scan_gpios(struct acpi_gpio_chip *achip)
{
        struct gpio_chip *chip = achip->chip;
        struct fwnode_handle *fwnode;

        device_for_each_child_node(chip->parent, fwnode) {
                unsigned long lflags;
                enum gpiod_flags dflags;
                struct gpio_desc *desc;
                const char *name;
                int ret;

                if (!fwnode_property_present(fwnode, "gpio-hog"))
                        continue;

                desc = acpi_gpiochip_parse_own_gpio(achip, fwnode, &name,
                                                    &lflags, &dflags);
                if (IS_ERR(desc))
                        continue;

                ret = gpiod_hog(desc, name, lflags, dflags);
                if (ret) {
                        dev_err(chip->parent, "Failed to hog GPIO\n");
                        fwnode_handle_put(fwnode);
                        return;
                }
        }
}

void acpi_gpiochip_add(struct gpio_chip *chip)
{
        struct acpi_gpio_chip *acpi_gpio;
        struct acpi_device *adev;
        acpi_status status;

        if (!chip || !chip->parent)
                return;

        adev = ACPI_COMPANION(chip->parent);
        if (!adev)
                return;

        acpi_gpio = kzalloc(sizeof(*acpi_gpio), GFP_KERNEL);
        if (!acpi_gpio) {
                dev_err(chip->parent,
                        "Failed to allocate memory for ACPI GPIO chip\n");
                return;
        }

        acpi_gpio->chip = chip;
        INIT_LIST_HEAD(&acpi_gpio->events);
        INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry);

        status = acpi_attach_data(adev->handle, acpi_gpio_chip_dh, acpi_gpio);
        if (ACPI_FAILURE(status)) {
                dev_err(chip->parent, "Failed to attach ACPI GPIO chip\n");
                kfree(acpi_gpio);
                return;
        }

        acpi_gpiochip_request_regions(acpi_gpio);
        acpi_gpiochip_scan_gpios(acpi_gpio);
        acpi_dev_clear_dependencies(adev);
}

void acpi_gpiochip_remove(struct gpio_chip *chip)
{
        struct acpi_gpio_chip *acpi_gpio;
        acpi_handle handle;
        acpi_status status;

        if (!chip || !chip->parent)
                return;

        handle = ACPI_HANDLE(chip->parent);
        if (!handle)
                return;

        status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio);
        if (ACPI_FAILURE(status)) {
                dev_warn(chip->parent, "Failed to retrieve ACPI GPIO chip\n");
                return;
        }

        acpi_gpiochip_free_regions(acpi_gpio);

        acpi_detach_data(handle, acpi_gpio_chip_dh);
        kfree(acpi_gpio);
}

static int acpi_gpio_package_count(const union acpi_object *obj)
{
        const union acpi_object *element = obj->package.elements;
        const union acpi_object *end = element + obj->package.count;
        unsigned int count = 0;

        while (element < end) {
                switch (element->type) {
                case ACPI_TYPE_LOCAL_REFERENCE:
                        element += 3;
                        fallthrough;
                case ACPI_TYPE_INTEGER:
                        element++;
                        count++;
                        break;

                default:
                        return -EPROTO;
                }
        }

        return count;
}

static int acpi_find_gpio_count(struct acpi_resource *ares, void *data)
{
        unsigned int *count = data;

        if (ares->type == ACPI_RESOURCE_TYPE_GPIO)
                *count += ares->data.gpio.pin_table_length;

        return 1;
}

/**
 * acpi_gpio_count - count the GPIOs associated with a firmware node / function
 * @fwnode:        firmware node of the GPIO consumer
 * @con_id:        function within the GPIO consumer
 *
 * Return:
 * The number of GPIOs associated with a firmware node / function or %-ENOENT,
 * if no GPIO has been assigned to the requested function.
 */
int acpi_gpio_count(const struct fwnode_handle *fwnode, const char *con_id)
{
        struct acpi_device *adev = to_acpi_device_node(fwnode);
        const union acpi_object *obj;
        const struct acpi_gpio_mapping *gm;
        int count = -ENOENT;
        int ret;
        char propname[32];
        unsigned int i;

        /* Try first from _DSD */
        for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
                if (con_id)
                        snprintf(propname, sizeof(propname), "%s-%s",
                                 con_id, gpio_suffixes[i]);
                else
                        snprintf(propname, sizeof(propname), "%s",
                                 gpio_suffixes[i]);

                ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_ANY, &obj);
                if (ret == 0) {
                        if (obj->type == ACPI_TYPE_LOCAL_REFERENCE)
                                count = 1;
                        else if (obj->type == ACPI_TYPE_PACKAGE)
                                count = acpi_gpio_package_count(obj);
                } else if (adev->driver_gpios) {
                        for (gm = adev->driver_gpios; gm->name; gm++)
                                if (strcmp(propname, gm->name) == 0) {
                                        count = gm->size;
                                        break;
                                }
                }
                if (count > 0)
                        break;
        }

        /* Then from plain _CRS GPIOs */
        if (count < 0) {
                struct list_head resource_list;
                unsigned int crs_count = 0;

                if (!acpi_can_fallback_to_crs(adev, con_id))
                        return count;

                INIT_LIST_HEAD(&resource_list);
                acpi_dev_get_resources(adev, &resource_list,
                                       acpi_find_gpio_count, &crs_count);
                acpi_dev_free_resource_list(&resource_list);
                if (crs_count > 0)
                        count = crs_count;
        }
        return count ? count : -ENOENT;
}

/* Run deferred acpi_gpiochip_request_irqs() */
static int __init acpi_gpio_handle_deferred_request_irqs(void)
{
        struct acpi_gpio_chip *acpi_gpio, *tmp;

        mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
        list_for_each_entry_safe(acpi_gpio, tmp,
                                 &acpi_gpio_deferred_req_irqs_list,
                                 deferred_req_irqs_list_entry)
                acpi_gpiochip_request_irqs(acpi_gpio);

        acpi_gpio_deferred_req_irqs_done = true;
        mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);

        return 0;
}
/* We must use _sync so that this runs after the first deferred_probe run */
late_initcall_sync(acpi_gpio_handle_deferred_request_irqs);

static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
        {
                /*
                 * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for
                 * a non existing micro-USB-B connector which puts the HDMI
                 * DDC pins in GPIO mode, breaking HDMI support.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .no_edge_events_on_boot = true,
                },
        },
        {
                /*
                 * The Terra Pad 1061 has a micro-USB-B id-pin handler, which
                 * instead of controlling the actual micro-USB-B turns the 5V
                 * boost for its USB-A connector off. The actual micro-USB-B
                 * connector is wired for charging only.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .no_edge_events_on_boot = true,
                },
        },
        {
                /*
                 * The Dell Venue 10 Pro 5055, with Bay Trail SoC + TI PMIC uses an
                 * external embedded-controller connected via I2C + an ACPI GPIO
                 * event handler on INT33FFC:02 pin 12, causing spurious wakeups.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Venue 10 Pro 5055"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "INT33FC:02@12",
                },
        },
        {
                /*
                 * HP X2 10 models with Cherry Trail SoC + TI PMIC use an
                 * external embedded-controller connected via I2C + an ACPI GPIO
                 * event handler on INT33FF:01 pin 0, causing spurious wakeups.
                 * When suspending by closing the LID, the power to the USB
                 * keyboard is turned off, causing INT0002 ACPI events to
                 * trigger once the XHCI controller notices the keyboard is
                 * gone. So INT0002 events cause spurious wakeups too. Ignoring
                 * EC wakes breaks wakeup when opening the lid, the user needs
                 * to press the power-button to wakeup the system. The
                 * alternative is suspend simply not working, which is worse.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "HP"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "INT33FF:01@0,INT0002:00@2",
                },
        },
        {
                /*
                 * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an
                 * external embedded-controller connected via I2C + an ACPI GPIO
                 * event handler on INT33FC:02 pin 28, causing spurious wakeups.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
                        DMI_MATCH(DMI_BOARD_NAME, "815D"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "INT33FC:02@28",
                },
        },
        {
                /*
                 * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an
                 * external embedded-controller connected via I2C + an ACPI GPIO
                 * event handler on INT33FF:01 pin 0, causing spurious wakeups.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "HP"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
                        DMI_MATCH(DMI_BOARD_NAME, "813E"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "INT33FF:01@0",
                },
        },
        {
                /*
                 * Interrupt storm caused from edge triggered floating pin
                 * Found in BIOS UX325UAZ.300
                 * https://bugzilla.kernel.org/show_bug.cgi?id=216208
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UAZ_UM325UAZ"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_interrupt = "AMDI0030:00@18",
                },
        },
        {
                /*
                 * Spurious wakeups from TP_ATTN# pin
                 * Found in BIOS 1.7.8
                 * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
                 */
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "ELAN0415:00@9",
                },
        },
        {
                /*
                 * Spurious wakeups from TP_ATTN# pin
                 * Found in BIOS 1.7.8
                 * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
                 */
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "ELAN0415:00@9",
                },
        },
        {
                /*
                 * Spurious wakeups from TP_ATTN# pin
                 * Found in BIOS 1.7.7
                 */
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "SYNA1202:00@16",
                },
        },
        {
                /*
                 * On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to
                 * a "dolby" button. At the ACPI level an _AEI event-handler
                 * is connected which sets an ACPI variable to 1 on both
                 * edges. This variable can be polled + cleared to 0 using
                 * WMI. But since the variable is set on both edges the WMI
                 * interface is pretty useless even when polling.
                 * So instead the x86-android-tablets code instantiates
                 * a gpio-keys platform device for it.
                 * Ignore the _AEI handler for the pin, so that it is not busy.
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "PEAQ"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "PEAQ PMM C1010 MD99187"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_interrupt = "INT33FC:00@3",
                },
        },
        {
                /*
                 * Spurious wakeups from TP_ATTN# pin
                 * Found in BIOS 0.35
                 * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
                 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
                },
                .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
                        .ignore_wake = "PNP0C50:00@8",
                },
        },
        {} /* Terminating entry */
};

static int __init acpi_gpio_setup_params(void)
{
        const struct acpi_gpiolib_dmi_quirk *quirk = NULL;
        const struct dmi_system_id *id;

        id = dmi_first_match(gpiolib_acpi_quirks);
        if (id)
                quirk = id->driver_data;

        if (run_edge_events_on_boot < 0) {
                if (quirk && quirk->no_edge_events_on_boot)
                        run_edge_events_on_boot = 0;
                else
                        run_edge_events_on_boot = 1;
        }

        if (ignore_wake == NULL && quirk && quirk->ignore_wake)
                ignore_wake = quirk->ignore_wake;

        if (ignore_interrupt == NULL && quirk && quirk->ignore_interrupt)
                ignore_interrupt = quirk->ignore_interrupt;

        return 0;
}

/* Directly after dmi_setup() which runs as core_initcall() */
postcore_initcall(acpi_gpio_setup_params);

















































    9 


    8 


































































































































































































































    8 












    9 











    1 




    9 






    9 












    1 

    1 




    1 





    9 




    9 











































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
// SPDX-License-Identifier: GPL-2.0-only
/*
 * LED Class Core
 *
 * Copyright 2005-2006 Openedhand Ltd.
 *
 * Author: Richard Purdie <rpurdie@openedhand.com>
 */

#include <linux/kernel.h>
#include <linux/leds.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/property.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <uapi/linux/uleds.h>
#include "leds.h"

DECLARE_RWSEM(leds_list_lock);
EXPORT_SYMBOL_GPL(leds_list_lock);

LIST_HEAD(leds_list);
EXPORT_SYMBOL_GPL(leds_list);

const char * const led_colors[LED_COLOR_ID_MAX] = {
        [LED_COLOR_ID_WHITE] = "white",
        [LED_COLOR_ID_RED] = "red",
        [LED_COLOR_ID_GREEN] = "green",
        [LED_COLOR_ID_BLUE] = "blue",
        [LED_COLOR_ID_AMBER] = "amber",
        [LED_COLOR_ID_VIOLET] = "violet",
        [LED_COLOR_ID_YELLOW] = "yellow",
        [LED_COLOR_ID_IR] = "ir",
        [LED_COLOR_ID_MULTI] = "multicolor",
        [LED_COLOR_ID_RGB] = "rgb",
        [LED_COLOR_ID_PURPLE] = "purple",
        [LED_COLOR_ID_ORANGE] = "orange",
        [LED_COLOR_ID_PINK] = "pink",
        [LED_COLOR_ID_CYAN] = "cyan",
        [LED_COLOR_ID_LIME] = "lime",
};
EXPORT_SYMBOL_GPL(led_colors);

static int __led_set_brightness(struct led_classdev *led_cdev, unsigned int value)
{
        if (!led_cdev->brightness_set)
                return -ENOTSUPP;

        led_cdev->brightness_set(led_cdev, value);

        return 0;
}

static int __led_set_brightness_blocking(struct led_classdev *led_cdev, unsigned int value)
{
        if (!led_cdev->brightness_set_blocking)
                return -ENOTSUPP;

        return led_cdev->brightness_set_blocking(led_cdev, value);
}

static void led_timer_function(struct timer_list *t)
{
        struct led_classdev *led_cdev = from_timer(led_cdev, t, blink_timer);
        unsigned long brightness;
        unsigned long delay;

        if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
                led_set_brightness_nosleep(led_cdev, LED_OFF);
                clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
                return;
        }

        if (test_and_clear_bit(LED_BLINK_ONESHOT_STOP,
                               &led_cdev->work_flags)) {
                clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
                return;
        }

        brightness = led_get_brightness(led_cdev);
        if (!brightness) {
                /* Time to switch the LED on. */
                if (test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE,
                                        &led_cdev->work_flags))
                        brightness = led_cdev->new_blink_brightness;
                else
                        brightness = led_cdev->blink_brightness;
                delay = led_cdev->blink_delay_on;
        } else {
                /* Store the current brightness value to be able
                 * to restore it when the delay_off period is over.
                 */
                led_cdev->blink_brightness = brightness;
                brightness = LED_OFF;
                delay = led_cdev->blink_delay_off;
        }

        led_set_brightness_nosleep(led_cdev, brightness);

        /* Return in next iteration if led is in one-shot mode and we are in
         * the final blink state so that the led is toggled each delay_on +
         * delay_off milliseconds in worst case.
         */
        if (test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags)) {
                if (test_bit(LED_BLINK_INVERT, &led_cdev->work_flags)) {
                        if (brightness)
                                set_bit(LED_BLINK_ONESHOT_STOP,
                                        &led_cdev->work_flags);
                } else {
                        if (!brightness)
                                set_bit(LED_BLINK_ONESHOT_STOP,
                                        &led_cdev->work_flags);
                }
        }

        mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay));
}

static void set_brightness_delayed_set_brightness(struct led_classdev *led_cdev,
                                                  unsigned int value)
{
        int ret = 0;

        ret = __led_set_brightness(led_cdev, value);
        if (ret == -ENOTSUPP)
                ret = __led_set_brightness_blocking(led_cdev, value);
        if (ret < 0 &&
            /* LED HW might have been unplugged, therefore don't warn */
            !(ret == -ENODEV && (led_cdev->flags & LED_UNREGISTERING) &&
            (led_cdev->flags & LED_HW_PLUGGABLE)))
                dev_err(led_cdev->dev,
                        "Setting an LED's brightness failed (%d)\n", ret);
}

static void set_brightness_delayed(struct work_struct *ws)
{
        struct led_classdev *led_cdev =
                container_of(ws, struct led_classdev, set_brightness_work);

        if (test_and_clear_bit(LED_BLINK_DISABLE, &led_cdev->work_flags)) {
                led_stop_software_blink(led_cdev);
                set_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags);
        }

        /*
         * Triggers may call led_set_brightness(LED_OFF),
         * led_set_brightness(LED_FULL) in quick succession to disable blinking
         * and turn the LED on. Both actions may have been scheduled to run
         * before this work item runs once. To make sure this works properly
         * handle LED_SET_BRIGHTNESS_OFF first.
         */
        if (test_and_clear_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags))
                set_brightness_delayed_set_brightness(led_cdev, LED_OFF);

        if (test_and_clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags))
                set_brightness_delayed_set_brightness(led_cdev, led_cdev->delayed_set_value);

        if (test_and_clear_bit(LED_SET_BLINK, &led_cdev->work_flags)) {
                unsigned long delay_on = led_cdev->delayed_delay_on;
                unsigned long delay_off = led_cdev->delayed_delay_off;

                led_blink_set(led_cdev, &delay_on, &delay_off);
        }
}

static void led_set_software_blink(struct led_classdev *led_cdev,
                                   unsigned long delay_on,
                                   unsigned long delay_off)
{
        int current_brightness;

        current_brightness = led_get_brightness(led_cdev);
        if (current_brightness)
                led_cdev->blink_brightness = current_brightness;
        if (!led_cdev->blink_brightness)
                led_cdev->blink_brightness = led_cdev->max_brightness;

        led_cdev->blink_delay_on = delay_on;
        led_cdev->blink_delay_off = delay_off;

        /* never on - just set to off */
        if (!delay_on) {
                led_set_brightness_nosleep(led_cdev, LED_OFF);
                return;
        }

        /* never off - just set to brightness */
        if (!delay_off) {
                led_set_brightness_nosleep(led_cdev,
                                           led_cdev->blink_brightness);
                return;
        }

        set_bit(LED_BLINK_SW, &led_cdev->work_flags);
        mod_timer(&led_cdev->blink_timer, jiffies + 1);
}


static void led_blink_setup(struct led_classdev *led_cdev,
                     unsigned long *delay_on,
                     unsigned long *delay_off)
{
        if (!test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags) &&
            led_cdev->blink_set &&
            !led_cdev->blink_set(led_cdev, delay_on, delay_off))
                return;

        /* blink with 1 Hz as default if nothing specified */
        if (!*delay_on && !*delay_off)
                *delay_on = *delay_off = 500;

        led_set_software_blink(led_cdev, *delay_on, *delay_off);
}

void led_init_core(struct led_classdev *led_cdev)
{
        INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);

        timer_setup(&led_cdev->blink_timer, led_timer_function, 0);
}
EXPORT_SYMBOL_GPL(led_init_core);

void led_blink_set(struct led_classdev *led_cdev,
                   unsigned long *delay_on,
                   unsigned long *delay_off)
{
        del_timer_sync(&led_cdev->blink_timer);

        clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);

        led_blink_setup(led_cdev, delay_on, delay_off);
}
EXPORT_SYMBOL_GPL(led_blink_set);

void led_blink_set_oneshot(struct led_classdev *led_cdev,
                           unsigned long *delay_on,
                           unsigned long *delay_off,
                           int invert)
{
        if (test_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags) &&
             timer_pending(&led_cdev->blink_timer))
                return;

        set_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);

        if (invert)
                set_bit(LED_BLINK_INVERT, &led_cdev->work_flags);
        else
                clear_bit(LED_BLINK_INVERT, &led_cdev->work_flags);

        led_blink_setup(led_cdev, delay_on, delay_off);
}
EXPORT_SYMBOL_GPL(led_blink_set_oneshot);

void led_blink_set_nosleep(struct led_classdev *led_cdev, unsigned long delay_on,
                           unsigned long delay_off)
{
        /* If necessary delegate to a work queue task. */
        if (led_cdev->blink_set && led_cdev->brightness_set_blocking) {
                led_cdev->delayed_delay_on = delay_on;
                led_cdev->delayed_delay_off = delay_off;
                set_bit(LED_SET_BLINK, &led_cdev->work_flags);
                schedule_work(&led_cdev->set_brightness_work);
                return;
        }

        led_blink_set(led_cdev, &delay_on, &delay_off);
}
EXPORT_SYMBOL_GPL(led_blink_set_nosleep);

void led_stop_software_blink(struct led_classdev *led_cdev)
{
        del_timer_sync(&led_cdev->blink_timer);
        led_cdev->blink_delay_on = 0;
        led_cdev->blink_delay_off = 0;
        clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
}
EXPORT_SYMBOL_GPL(led_stop_software_blink);

void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness)
{
        /*
         * If software blink is active, delay brightness setting
         * until the next timer tick.
         */
        if (test_bit(LED_BLINK_SW, &led_cdev->work_flags)) {
                /*
                 * If we need to disable soft blinking delegate this to the
                 * work queue task to avoid problems in case we are called
                 * from hard irq context.
                 */
                if (!brightness) {
                        set_bit(LED_BLINK_DISABLE, &led_cdev->work_flags);
                        schedule_work(&led_cdev->set_brightness_work);
                } else {
                        set_bit(LED_BLINK_BRIGHTNESS_CHANGE,
                                &led_cdev->work_flags);
                        led_cdev->new_blink_brightness = brightness;
                }
                return;
        }

        led_set_brightness_nosleep(led_cdev, brightness);
}
EXPORT_SYMBOL_GPL(led_set_brightness);

void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
{
        /* Use brightness_set op if available, it is guaranteed not to sleep */
        if (!__led_set_brightness(led_cdev, value))
                return;

        /*
         * Brightness setting can sleep, delegate it to a work queue task.
         * value 0 / LED_OFF is special, since it also disables hw-blinking
         * (sw-blink disable is handled in led_set_brightness()).
         * To avoid a hw-blink-disable getting lost when a second brightness
         * change is done immediately afterwards (before the work runs),
         * it uses a separate work_flag.
         */
        if (value) {
                led_cdev->delayed_set_value = value;
                set_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
        } else {
                clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
                clear_bit(LED_SET_BLINK, &led_cdev->work_flags);
                set_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags);
        }

        schedule_work(&led_cdev->set_brightness_work);
}
EXPORT_SYMBOL_GPL(led_set_brightness_nopm);

void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value)
{
        led_cdev->brightness = min(value, led_cdev->max_brightness);

        if (led_cdev->flags & LED_SUSPENDED)
                return;

        led_set_brightness_nopm(led_cdev, led_cdev->brightness);
}
EXPORT_SYMBOL_GPL(led_set_brightness_nosleep);

int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value)
{
        if (led_cdev->blink_delay_on || led_cdev->blink_delay_off)
                return -EBUSY;

        led_cdev->brightness = min(value, led_cdev->max_brightness);

        if (led_cdev->flags & LED_SUSPENDED)
                return 0;

        return __led_set_brightness_blocking(led_cdev, led_cdev->brightness);
}
EXPORT_SYMBOL_GPL(led_set_brightness_sync);

int led_update_brightness(struct led_classdev *led_cdev)
{
        int ret;

        if (led_cdev->brightness_get) {
                ret = led_cdev->brightness_get(led_cdev);
                if (ret < 0)
                        return ret;

                led_cdev->brightness = ret;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(led_update_brightness);

u32 *led_get_default_pattern(struct led_classdev *led_cdev, unsigned int *size)
{
        struct fwnode_handle *fwnode = led_cdev->dev->fwnode;
        u32 *pattern;
        int count;

        count = fwnode_property_count_u32(fwnode, "led-pattern");
        if (count < 0)
                return NULL;

        pattern = kcalloc(count, sizeof(*pattern), GFP_KERNEL);
        if (!pattern)
                return NULL;

        if (fwnode_property_read_u32_array(fwnode, "led-pattern", pattern, count)) {
                kfree(pattern);
                return NULL;
        }

        *size = count;

        return pattern;
}
EXPORT_SYMBOL_GPL(led_get_default_pattern);

/* Caller must ensure led_cdev->led_access held */
void led_sysfs_disable(struct led_classdev *led_cdev)
{
        lockdep_assert_held(&led_cdev->led_access);

        led_cdev->flags |= LED_SYSFS_DISABLE;
}
EXPORT_SYMBOL_GPL(led_sysfs_disable);

/* Caller must ensure led_cdev->led_access held */
void led_sysfs_enable(struct led_classdev *led_cdev)
{
        lockdep_assert_held(&led_cdev->led_access);

        led_cdev->flags &= ~LED_SYSFS_DISABLE;
}
EXPORT_SYMBOL_GPL(led_sysfs_enable);

static void led_parse_fwnode_props(struct device *dev,
                                   struct fwnode_handle *fwnode,
                                   struct led_properties *props)
{
        int ret;

        if (!fwnode)
                return;

        if (fwnode_property_present(fwnode, "label")) {
                ret = fwnode_property_read_string(fwnode, "label", &props->label);
                if (ret)
                        dev_err(dev, "Error parsing 'label' property (%d)\n", ret);
                return;
        }

        if (fwnode_property_present(fwnode, "color")) {
                ret = fwnode_property_read_u32(fwnode, "color", &props->color);
                if (ret)
                        dev_err(dev, "Error parsing 'color' property (%d)\n", ret);
                else if (props->color >= LED_COLOR_ID_MAX)
                        dev_err(dev, "LED color identifier out of range\n");
                else
                        props->color_present = true;
        }


        if (!fwnode_property_present(fwnode, "function"))
                return;

        ret = fwnode_property_read_string(fwnode, "function", &props->function);
        if (ret) {
                dev_err(dev,
                        "Error parsing 'function' property (%d)\n",
                        ret);
        }

        if (!fwnode_property_present(fwnode, "function-enumerator"))
                return;

        ret = fwnode_property_read_u32(fwnode, "function-enumerator",
                                       &props->func_enum);
        if (ret) {
                dev_err(dev,
                        "Error parsing 'function-enumerator' property (%d)\n",
                        ret);
        } else {
                props->func_enum_present = true;
        }
}

int led_compose_name(struct device *dev, struct led_init_data *init_data,
                     char *led_classdev_name)
{
        struct led_properties props = {};
        struct fwnode_handle *fwnode = init_data->fwnode;
        const char *devicename = init_data->devicename;

        if (!led_classdev_name)
                return -EINVAL;

        led_parse_fwnode_props(dev, fwnode, &props);

        if (props.label) {
                /*
                 * If init_data.devicename is NULL, then it indicates that
                 * DT label should be used as-is for LED class device name.
                 * Otherwise the label is prepended with devicename to compose
                 * the final LED class device name.
                 */
                if (!devicename) {
                        strscpy(led_classdev_name, props.label,
                                LED_MAX_NAME_SIZE);
                } else {
                        snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
                                 devicename, props.label);
                }
        } else if (props.function || props.color_present) {
                char tmp_buf[LED_MAX_NAME_SIZE];

                if (props.func_enum_present) {
                        snprintf(tmp_buf, LED_MAX_NAME_SIZE, "%s:%s-%d",
                                 props.color_present ? led_colors[props.color] : "",
                                 props.function ?: "", props.func_enum);
                } else {
                        snprintf(tmp_buf, LED_MAX_NAME_SIZE, "%s:%s",
                                 props.color_present ? led_colors[props.color] : "",
                                 props.function ?: "");
                }
                if (init_data->devname_mandatory) {
                        snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
                                 devicename, tmp_buf);
                } else {
                        strscpy(led_classdev_name, tmp_buf, LED_MAX_NAME_SIZE);

                }
        } else if (init_data->default_label) {
                if (!devicename) {
                        dev_err(dev, "Legacy LED naming requires devicename segment");
                        return -EINVAL;
                }
                snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
                         devicename, init_data->default_label);
        } else if (is_of_node(fwnode)) {
                strscpy(led_classdev_name, to_of_node(fwnode)->name,
                        LED_MAX_NAME_SIZE);
        } else
                return -EINVAL;

        return 0;
}
EXPORT_SYMBOL_GPL(led_compose_name);

enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode)
{
        const char *state = NULL;

        if (!fwnode_property_read_string(fwnode, "default-state", &state)) {
                if (!strcmp(state, "keep"))
                        return LEDS_DEFSTATE_KEEP;
                if (!strcmp(state, "on"))
                        return LEDS_DEFSTATE_ON;
        }

        return LEDS_DEFSTATE_OFF;
}
EXPORT_SYMBOL_GPL(led_init_default_state_get);


































































































    1 
















































































    1 

















































































































































































































































    1 





    1 


    1 


    1 









































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
// SPDX-License-Identifier: GPL-2.0-only

/*
 * drm_sysfs.c - Modifications to drm_sysfs_class.c to support
 *               extra sysfs attribute from DRM. Normal drm_sysfs_class
 *               does not allow adding attributes.
 *
 * Copyright (c) 2004 Jon Smirl <jonsmirl@gmail.com>
 * Copyright (c) 2003-2004 Greg Kroah-Hartman <greg@kroah.com>
 * Copyright (c) 2003-2004 IBM Corp.
 */

#include <linux/acpi.h>
#include <linux/component.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/i2c.h>
#include <linux/kdev_t.h>
#include <linux/property.h>
#include <linux/slab.h>

#include <drm/drm_accel.h>
#include <drm/drm_connector.h>
#include <drm/drm_device.h>
#include <drm/drm_file.h>
#include <drm/drm_modes.h>
#include <drm/drm_print.h>
#include <drm/drm_property.h>
#include <drm/drm_sysfs.h>

#include "drm_internal.h"
#include "drm_crtc_internal.h"

#define to_drm_minor(d) dev_get_drvdata(d)
#define to_drm_connector(d) dev_get_drvdata(d)

/**
 * DOC: overview
 *
 * DRM provides very little additional support to drivers for sysfs
 * interactions, beyond just all the standard stuff. Drivers who want to expose
 * additional sysfs properties and property groups can attach them at either
 * &drm_device.dev or &drm_connector.kdev.
 *
 * Registration is automatically handled when calling drm_dev_register(), or
 * drm_connector_register() in case of hot-plugged connectors. Unregistration is
 * also automatically handled by drm_dev_unregister() and
 * drm_connector_unregister().
 */

static struct device_type drm_sysfs_device_minor = {
        .name = "drm_minor"
};

static struct device_type drm_sysfs_device_connector = {
        .name = "drm_connector",
};

struct class *drm_class;

#ifdef CONFIG_ACPI
static bool drm_connector_acpi_bus_match(struct device *dev)
{
        return dev->type == &drm_sysfs_device_connector;
}

static struct acpi_device *drm_connector_acpi_find_companion(struct device *dev)
{
        struct drm_connector *connector = to_drm_connector(dev);

        return to_acpi_device_node(connector->fwnode);
}

static struct acpi_bus_type drm_connector_acpi_bus = {
        .name = "drm_connector",
        .match = drm_connector_acpi_bus_match,
        .find_companion = drm_connector_acpi_find_companion,
};

static void drm_sysfs_acpi_register(void)
{
        register_acpi_bus_type(&drm_connector_acpi_bus);
}

static void drm_sysfs_acpi_unregister(void)
{
        unregister_acpi_bus_type(&drm_connector_acpi_bus);
}
#else
static void drm_sysfs_acpi_register(void) { }
static void drm_sysfs_acpi_unregister(void) { }
#endif

static char *drm_devnode(const struct device *dev, umode_t *mode)
{
        return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
}

static int typec_connector_bind(struct device *dev,
                                struct device *typec_connector, void *data)
{
        int ret;

        ret = sysfs_create_link(&dev->kobj, &typec_connector->kobj, "typec_connector");
        if (ret)
                return ret;

        ret = sysfs_create_link(&typec_connector->kobj, &dev->kobj, "drm_connector");
        if (ret)
                sysfs_remove_link(&dev->kobj, "typec_connector");

        return ret;
}

static void typec_connector_unbind(struct device *dev,
                                   struct device *typec_connector, void *data)
{
        sysfs_remove_link(&typec_connector->kobj, "drm_connector");
        sysfs_remove_link(&dev->kobj, "typec_connector");
}

static const struct component_ops typec_connector_ops = {
        .bind = typec_connector_bind,
        .unbind = typec_connector_unbind,
};

static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");

/**
 * drm_sysfs_init - initialize sysfs helpers
 *
 * This is used to create the DRM class, which is the implicit parent of any
 * other top-level DRM sysfs objects.
 *
 * You must call drm_sysfs_destroy() to release the allocated resources.
 *
 * Return: 0 on success, negative error code on failure.
 */
int drm_sysfs_init(void)
{
        int err;

        drm_class = class_create("drm");
        if (IS_ERR(drm_class))
                return PTR_ERR(drm_class);

        err = class_create_file(drm_class, &class_attr_version.attr);
        if (err) {
                class_destroy(drm_class);
                drm_class = NULL;
                return err;
        }

        drm_class->devnode = drm_devnode;

        drm_sysfs_acpi_register();
        return 0;
}

/**
 * drm_sysfs_destroy - destroys DRM class
 *
 * Destroy the DRM device class.
 */
void drm_sysfs_destroy(void)
{
        if (IS_ERR_OR_NULL(drm_class))
                return;
        drm_sysfs_acpi_unregister();
        class_remove_file(drm_class, &class_attr_version.attr);
        class_destroy(drm_class);
        drm_class = NULL;
}

static void drm_sysfs_release(struct device *dev)
{
        kfree(dev);
}

/*
 * Connector properties
 */
static ssize_t status_store(struct device *device,
                           struct device_attribute *attr,
                           const char *buf, size_t count)
{
        struct drm_connector *connector = to_drm_connector(device);
        struct drm_device *dev = connector->dev;
        enum drm_connector_force old_force;
        int ret;

        ret = mutex_lock_interruptible(&dev->mode_config.mutex);
        if (ret)
                return ret;

        old_force = connector->force;

        if (sysfs_streq(buf, "detect"))
                connector->force = 0;
        else if (sysfs_streq(buf, "on"))
                connector->force = DRM_FORCE_ON;
        else if (sysfs_streq(buf, "on-digital"))
                connector->force = DRM_FORCE_ON_DIGITAL;
        else if (sysfs_streq(buf, "off"))
                connector->force = DRM_FORCE_OFF;
        else
                ret = -EINVAL;

        if (old_force != connector->force || !connector->force) {
                DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force updated from %d to %d or reprobing\n",
                              connector->base.id,
                              connector->name,
                              old_force, connector->force);

                connector->funcs->fill_modes(connector,
                                             dev->mode_config.max_width,
                                             dev->mode_config.max_height);
        }

        mutex_unlock(&dev->mode_config.mutex);

        return ret ? ret : count;
}

static ssize_t status_show(struct device *device,
                           struct device_attribute *attr,
                           char *buf)
{
        struct drm_connector *connector = to_drm_connector(device);
        enum drm_connector_status status;

        status = READ_ONCE(connector->status);

        return sysfs_emit(buf, "%s\n",
                          drm_get_connector_status_name(status));
}

static ssize_t dpms_show(struct device *device,
                           struct device_attribute *attr,
                           char *buf)
{
        struct drm_connector *connector = to_drm_connector(device);
        int dpms;

        dpms = READ_ONCE(connector->dpms);

        return sysfs_emit(buf, "%s\n", drm_get_dpms_name(dpms));
}

static ssize_t enabled_show(struct device *device,
                            struct device_attribute *attr,
                           char *buf)
{
        struct drm_connector *connector = to_drm_connector(device);
        bool enabled;

        enabled = READ_ONCE(connector->encoder);

        return sysfs_emit(buf, enabled ? "enabled\n" : "disabled\n");
}

static ssize_t edid_show(struct file *filp, struct kobject *kobj,
                         struct bin_attribute *attr, char *buf, loff_t off,
                         size_t count)
{
        struct device *connector_dev = kobj_to_dev(kobj);
        struct drm_connector *connector = to_drm_connector(connector_dev);
        unsigned char *edid;
        size_t size;
        ssize_t ret = 0;

        mutex_lock(&connector->dev->mode_config.mutex);
        if (!connector->edid_blob_ptr)
                goto unlock;

        edid = connector->edid_blob_ptr->data;
        size = connector->edid_blob_ptr->length;
        if (!edid)
                goto unlock;

        if (off >= size)
                goto unlock;

        if (off + count > size)
                count = size - off;
        memcpy(buf, edid + off, count);

        ret = count;
unlock:
        mutex_unlock(&connector->dev->mode_config.mutex);

        return ret;
}

static ssize_t modes_show(struct device *device,
                           struct device_attribute *attr,
                           char *buf)
{
        struct drm_connector *connector = to_drm_connector(device);
        struct drm_display_mode *mode;
        int written = 0;

        mutex_lock(&connector->dev->mode_config.mutex);
        list_for_each_entry(mode, &connector->modes, head) {
                written += scnprintf(buf + written, PAGE_SIZE - written, "%s\n",
                                    mode->name);
        }
        mutex_unlock(&connector->dev->mode_config.mutex);

        return written;
}

static ssize_t connector_id_show(struct device *device,
                                 struct device_attribute *attr,
                                 char *buf)
{
        struct drm_connector *connector = to_drm_connector(device);

        return sysfs_emit(buf, "%d\n", connector->base.id);
}

static DEVICE_ATTR_RW(status);
static DEVICE_ATTR_RO(enabled);
static DEVICE_ATTR_RO(dpms);
static DEVICE_ATTR_RO(modes);
static DEVICE_ATTR_RO(connector_id);

static struct attribute *connector_dev_attrs[] = {
        &dev_attr_status.attr,
        &dev_attr_enabled.attr,
        &dev_attr_dpms.attr,
        &dev_attr_modes.attr,
        &dev_attr_connector_id.attr,
        NULL
};

static struct bin_attribute edid_attr = {
        .attr.name = "edid",
        .attr.mode = 0444,
        .size = 0,
        .read = edid_show,
};

static struct bin_attribute *connector_bin_attrs[] = {
        &edid_attr,
        NULL
};

static const struct attribute_group connector_dev_group = {
        .attrs = connector_dev_attrs,
        .bin_attrs = connector_bin_attrs,
};

static const struct attribute_group *connector_dev_groups[] = {
        &connector_dev_group,
        NULL
};

int drm_sysfs_connector_add(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        struct device *kdev;
        int r;

        if (connector->kdev)
                return 0;

        kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
        if (!kdev)
                return -ENOMEM;

        device_initialize(kdev);
        kdev->class = drm_class;
        kdev->type = &drm_sysfs_device_connector;
        kdev->parent = dev->primary->kdev;
        kdev->groups = connector_dev_groups;
        kdev->release = drm_sysfs_release;
        dev_set_drvdata(kdev, connector);

        r = dev_set_name(kdev, "card%d-%s", dev->primary->index, connector->name);
        if (r)
                goto err_free;

        DRM_DEBUG("adding \"%s\" to sysfs\n",
                  connector->name);

        r = device_add(kdev);
        if (r) {
                drm_err(dev, "failed to register connector device: %d\n", r);
                goto err_free;
        }

        connector->kdev = kdev;

        if (dev_fwnode(kdev)) {
                r = component_add(kdev, &typec_connector_ops);
                if (r)
                        drm_err(dev, "failed to add component to create link to typec connector\n");
        }

        return 0;

err_free:
        put_device(kdev);
        return r;
}

int drm_sysfs_connector_add_late(struct drm_connector *connector)
{
        if (connector->ddc)
                return sysfs_create_link(&connector->kdev->kobj,
                                         &connector->ddc->dev.kobj, "ddc");

        return 0;
}

void drm_sysfs_connector_remove_early(struct drm_connector *connector)
{
        if (connector->ddc)
                sysfs_remove_link(&connector->kdev->kobj, "ddc");
}

void drm_sysfs_connector_remove(struct drm_connector *connector)
{
        if (!connector->kdev)
                return;

        if (dev_fwnode(connector->kdev))
                component_del(connector->kdev, &typec_connector_ops);

        DRM_DEBUG("removing \"%s\" from sysfs\n",
                  connector->name);

        device_unregister(connector->kdev);
        connector->kdev = NULL;
}

void drm_sysfs_lease_event(struct drm_device *dev)
{
        char *event_string = "LEASE=1";
        char *envp[] = { event_string, NULL };

        DRM_DEBUG("generating lease event\n");

        kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
}

/**
 * drm_sysfs_hotplug_event - generate a DRM uevent
 * @dev: DRM device
 *
 * Send a uevent for the DRM device specified by @dev.  Currently we only
 * set HOTPLUG=1 in the uevent environment, but this could be expanded to
 * deal with other types of events.
 *
 * Any new uapi should be using the drm_sysfs_connector_status_event()
 * for uevents on connector status change.
 */
void drm_sysfs_hotplug_event(struct drm_device *dev)
{
        char *event_string = "HOTPLUG=1";
        char *envp[] = { event_string, NULL };

        DRM_DEBUG("generating hotplug event\n");

        kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
}
EXPORT_SYMBOL(drm_sysfs_hotplug_event);

/**
 * drm_sysfs_connector_hotplug_event - generate a DRM uevent for any connector
 * change
 * @connector: connector which has changed
 *
 * Send a uevent for the DRM connector specified by @connector. This will send
 * a uevent with the properties HOTPLUG=1 and CONNECTOR.
 */
void drm_sysfs_connector_hotplug_event(struct drm_connector *connector)
{
        struct drm_device *dev = connector->dev;
        char hotplug_str[] = "HOTPLUG=1", conn_id[21];
        char *envp[] = { hotplug_str, conn_id, NULL };

        snprintf(conn_id, sizeof(conn_id),
                 "CONNECTOR=%u", connector->base.id);

        drm_dbg_kms(connector->dev,
                    "[CONNECTOR:%d:%s] generating connector hotplug event\n",
                    connector->base.id, connector->name);

        kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
}
EXPORT_SYMBOL(drm_sysfs_connector_hotplug_event);

/**
 * drm_sysfs_connector_property_event - generate a DRM uevent for connector
 * property change
 * @connector: connector on which property changed
 * @property: connector property which has changed.
 *
 * Send a uevent for the specified DRM connector and property.  Currently we
 * set HOTPLUG=1 and connector id along with the attached property id
 * related to the change.
 */
void drm_sysfs_connector_property_event(struct drm_connector *connector,
                                        struct drm_property *property)
{
        struct drm_device *dev = connector->dev;
        char hotplug_str[] = "HOTPLUG=1", conn_id[21], prop_id[21];
        char *envp[4] = { hotplug_str, conn_id, prop_id, NULL };

        WARN_ON(!drm_mode_obj_find_prop_id(&connector->base,
                                           property->base.id));

        snprintf(conn_id, ARRAY_SIZE(conn_id),
                 "CONNECTOR=%u", connector->base.id);
        snprintf(prop_id, ARRAY_SIZE(prop_id),
                 "PROPERTY=%u", property->base.id);

        drm_dbg_kms(connector->dev,
                    "[CONNECTOR:%d:%s] generating connector property event for [PROP:%d:%s]\n",
                    connector->base.id, connector->name,
                    property->base.id, property->name);

        kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
}
EXPORT_SYMBOL(drm_sysfs_connector_property_event);

struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
{
        const char *minor_str;
        struct device *kdev;
        int r;

        kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
        if (!kdev)
                return ERR_PTR(-ENOMEM);

        device_initialize(kdev);

        if (minor->type == DRM_MINOR_ACCEL) {
                minor_str = "accel%d";
                accel_set_device_instance_params(kdev, minor->index);
        } else {
                if (minor->type == DRM_MINOR_RENDER)
                        minor_str = "renderD%d";
                else
                        minor_str = "card%d";

                kdev->devt = MKDEV(DRM_MAJOR, minor->index);
                kdev->class = drm_class;
                kdev->type = &drm_sysfs_device_minor;
        }

        kdev->parent = minor->dev->dev;
        kdev->release = drm_sysfs_release;
        dev_set_drvdata(kdev, minor);

        r = dev_set_name(kdev, minor_str, minor->index);
        if (r < 0)
                goto err_free;

        return kdev;

err_free:
        put_device(kdev);
        return ERR_PTR(r);
}

/**
 * drm_class_device_register - register new device with the DRM sysfs class
 * @dev: device to register
 *
 * Registers a new &struct device within the DRM sysfs class. Essentially only
 * used by ttm to have a place for its global settings. Drivers should never use
 * this.
 */
int drm_class_device_register(struct device *dev)
{
        if (!drm_class || IS_ERR(drm_class))
                return -ENOENT;

        dev->class = drm_class;
        return device_register(dev);
}
EXPORT_SYMBOL_GPL(drm_class_device_register);

/**
 * drm_class_device_unregister - unregister device with the DRM sysfs class
 * @dev: device to unregister
 *
 * Unregisters a &struct device from the DRM sysfs class. Essentially only used
 * by ttm to have a place for its global settings. Drivers should never use
 * this.
 */
void drm_class_device_unregister(struct device *dev)
{
        return device_unregister(dev);
}
EXPORT_SYMBOL_GPL(drm_class_device_unregister);



















































































































































  256 






















































    4 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TIMER_H
#define _LINUX_TIMER_H

#include <linux/list.h>
#include <linux/ktime.h>
#include <linux/stddef.h>
#include <linux/debugobjects.h>
#include <linux/stringify.h>
#include <linux/timer_types.h>

#ifdef CONFIG_LOCKDEP
/*
 * NB: because we have to copy the lockdep_map, setting the lockdep_map key
 * (second argument) here is required, otherwise it could be initialised to
 * the copy of the lockdep_map later! We use the pointer to and the string
 * "<file>:<line>" as the key resp. the name of the lockdep_map.
 */
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)                                \
        .lockdep_map = STATIC_LOCKDEP_MAP_INIT(_kn, &_kn),
#else
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif

/*
 * @TIMER_DEFERRABLE: A deferrable timer will work normally when the
 * system is busy, but will not cause a CPU to come out of idle just
 * to service it; instead, the timer will be serviced when the CPU
 * eventually wakes up with a subsequent non-deferrable timer.
 *
 * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and
 * it's safe to wait for the completion of the running instance from
 * IRQ handlers, for example, by calling del_timer_sync().
 *
 * Note: The irq disabled callback execution is a special case for
 * workqueue locking issues. It's not meant for executing random crap
 * with interrupts disabled. Abuse is monitored!
 *
 * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the
 * timer was enqueued. When a particular CPU is required, add_timer_on()
 * has to be used. Enqueue via mod_timer() and add_timer() is always done
 * on the local CPU.
 */
#define TIMER_CPUMASK                0x0003FFFF
#define TIMER_MIGRATING                0x00040000
#define TIMER_BASEMASK                (TIMER_CPUMASK | TIMER_MIGRATING)
#define TIMER_DEFERRABLE        0x00080000
#define TIMER_PINNED                0x00100000
#define TIMER_IRQSAFE                0x00200000
#define TIMER_INIT_FLAGS        (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
#define TIMER_ARRAYSHIFT        22
#define TIMER_ARRAYMASK                0xFFC00000

#define TIMER_TRACE_FLAGMASK        (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)

#define __TIMER_INITIALIZER(_function, _flags) {                \
                .entry = { .next = TIMER_ENTRY_STATIC },        \
                .function = (_function),                        \
                .flags = (_flags),                                \
                __TIMER_LOCKDEP_MAP_INITIALIZER(FILE_LINE)        \
        }

#define DEFINE_TIMER(_name, _function)                                \
        struct timer_list _name =                                \
                __TIMER_INITIALIZER(_function, 0)

/*
 * LOCKDEP and DEBUG timer interfaces.
 */
void init_timer_key(struct timer_list *timer,
                    void (*func)(struct timer_list *), unsigned int flags,
                    const char *name, struct lock_class_key *key);

#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
extern void init_timer_on_stack_key(struct timer_list *timer,
                                    void (*func)(struct timer_list *),
                                    unsigned int flags, const char *name,
                                    struct lock_class_key *key);
#else
static inline void init_timer_on_stack_key(struct timer_list *timer,
                                           void (*func)(struct timer_list *),
                                           unsigned int flags,
                                           const char *name,
                                           struct lock_class_key *key)
{
        init_timer_key(timer, func, flags, name, key);
}
#endif

#ifdef CONFIG_LOCKDEP
#define __init_timer(_timer, _fn, _flags)                                \
        do {                                                                \
                static struct lock_class_key __key;                        \
                init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\
        } while (0)

#define __init_timer_on_stack(_timer, _fn, _flags)                        \
        do {                                                                \
                static struct lock_class_key __key;                        \
                init_timer_on_stack_key((_timer), (_fn), (_flags),        \
                                        #_timer, &__key);                 \
        } while (0)
#else
#define __init_timer(_timer, _fn, _flags)                                \
        init_timer_key((_timer), (_fn), (_flags), NULL, NULL)
#define __init_timer_on_stack(_timer, _fn, _flags)                        \
        init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL)
#endif

/**
 * timer_setup - prepare a timer for first use
 * @timer: the timer in question
 * @callback: the function to call when timer expires
 * @flags: any TIMER_* flags
 *
 * Regular timer initialization should use either DEFINE_TIMER() above,
 * or timer_setup(). For timers on the stack, timer_setup_on_stack() must
 * be used and must be balanced with a call to destroy_timer_on_stack().
 */
#define timer_setup(timer, callback, flags)                        \
        __init_timer((timer), (callback), (flags))

#define timer_setup_on_stack(timer, callback, flags)                \
        __init_timer_on_stack((timer), (callback), (flags))

#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
extern void destroy_timer_on_stack(struct timer_list *timer);
#else
static inline void destroy_timer_on_stack(struct timer_list *timer) { }
#endif

#define from_timer(var, callback_timer, timer_fieldname) \
        container_of(callback_timer, typeof(*var), timer_fieldname)

/**
 * timer_pending - is a timer pending?
 * @timer: the timer in question
 *
 * timer_pending will tell whether a given timer is currently pending,
 * or not. Callers must ensure serialization wrt. other operations done
 * to this timer, eg. interrupt contexts, or other CPUs on SMP.
 *
 * Returns: 1 if the timer is pending, 0 if not.
 */
static inline int timer_pending(const struct timer_list * timer)
{
        return !hlist_unhashed_lockless(&timer->entry);
}

extern void add_timer_on(struct timer_list *timer, int cpu);
extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int timer_reduce(struct timer_list *timer, unsigned long expires);

/*
 * The jiffies value which is added to now, when there is no timer
 * in the timer wheel:
 */
#define NEXT_TIMER_MAX_DELTA        ((1UL << 30) - 1)

extern void add_timer(struct timer_list *timer);
extern void add_timer_local(struct timer_list *timer);
extern void add_timer_global(struct timer_list *timer);

extern int try_to_del_timer_sync(struct timer_list *timer);
extern int timer_delete_sync(struct timer_list *timer);
extern int timer_delete(struct timer_list *timer);
extern int timer_shutdown_sync(struct timer_list *timer);
extern int timer_shutdown(struct timer_list *timer);

/**
 * del_timer_sync - Delete a pending timer and wait for a running callback
 * @timer:        The timer to be deleted
 *
 * See timer_delete_sync() for detailed explanation.
 *
 * Do not use in new code. Use timer_delete_sync() instead.
 *
 * Returns:
 * * %0        - The timer was not pending
 * * %1        - The timer was pending and deactivated
 */
static inline int del_timer_sync(struct timer_list *timer)
{
        return timer_delete_sync(timer);
}

/**
 * del_timer - Delete a pending timer
 * @timer:        The timer to be deleted
 *
 * See timer_delete() for detailed explanation.
 *
 * Do not use in new code. Use timer_delete() instead.
 *
 * Returns:
 * * %0        - The timer was not pending
 * * %1        - The timer was pending and deactivated
 */
static inline int del_timer(struct timer_list *timer)
{
        return timer_delete(timer);
}

extern void init_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);

unsigned long __round_jiffies(unsigned long j, int cpu);
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
unsigned long round_jiffies(unsigned long j);
unsigned long round_jiffies_relative(unsigned long j);

unsigned long __round_jiffies_up(unsigned long j, int cpu);
unsigned long __round_jiffies_up_relative(unsigned long j, int cpu);
unsigned long round_jiffies_up(unsigned long j);
unsigned long round_jiffies_up_relative(unsigned long j);

#ifdef CONFIG_HOTPLUG_CPU
int timers_prepare_cpu(unsigned int cpu);
int timers_dead_cpu(unsigned int cpu);
#else
#define timers_prepare_cpu        NULL
#define timers_dead_cpu                NULL
#endif

#endif





























































































































































































    4 



    4 
    4 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 



























   14 












   14 





























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
// SPDX-License-Identifier: GPL-2.0-or-later
/* linux/net/ipv4/arp.c
 *
 * Copyright (C) 1994 by Florian  La Roche
 *
 * This module implements the Address Resolution Protocol ARP (RFC 826),
 * which is used to convert IP addresses (or in the future maybe other
 * high-level addresses) into a low-level hardware address (like an Ethernet
 * address).
 *
 * Fixes:
 *                Alan Cox        :        Removed the Ethernet assumptions in
 *                                        Florian's code
 *                Alan Cox        :        Fixed some small errors in the ARP
 *                                        logic
 *                Alan Cox        :        Allow >4K in /proc
 *                Alan Cox        :        Make ARP add its own protocol entry
 *                Ross Martin     :       Rewrote arp_rcv() and arp_get_info()
 *                Stephen Henson        :        Add AX25 support to arp_get_info()
 *                Alan Cox        :        Drop data when a device is downed.
 *                Alan Cox        :        Use init_timer().
 *                Alan Cox        :        Double lock fixes.
 *                Martin Seine        :        Move the arphdr structure
 *                                        to if_arp.h for compatibility.
 *                                        with BSD based programs.
 *                Andrew Tridgell :       Added ARP netmask code and
 *                                        re-arranged proxy handling.
 *                Alan Cox        :        Changed to use notifiers.
 *                Niibe Yutaka        :        Reply for this device or proxies only.
 *                Alan Cox        :        Don't proxy across hardware types!
 *                Jonathan Naylor :        Added support for NET/ROM.
 *                Mike Shaver     :       RFC1122 checks.
 *                Jonathan Naylor :        Only lookup the hardware address for
 *                                        the correct hardware type.
 *                Germano Caronni        :        Assorted subtle races.
 *                Craig Schlenter :        Don't modify permanent entry
 *                                        during arp_rcv.
 *                Russ Nelson        :        Tidied up a few bits.
 *                Alexey Kuznetsov:        Major changes to caching and behaviour,
 *                                        eg intelligent arp probing and
 *                                        generation
 *                                        of host down events.
 *                Alan Cox        :        Missing unlock in device events.
 *                Eckes                :        ARP ioctl control errors.
 *                Alexey Kuznetsov:        Arp free fix.
 *                Manuel Rodriguez:        Gratuitous ARP.
 *              Jonathan Layes  :       Added arpd support through kerneld
 *                                      message queue (960314)
 *                Mike Shaver        :        /proc/sys/net/ipv4/arp_* support
 *                Mike McLagan    :        Routing by source
 *                Stuart Cheshire        :        Metricom and grat arp fixes
 *                                        *** FOR 2.1 clean this up ***
 *                Lawrence V. Stefani: (08/12/96) Added FDDI support.
 *                Alan Cox        :        Took the AP1000 nasty FDDI hack and
 *                                        folded into the mainstream FDDI code.
 *                                        Ack spit, Linus how did you allow that
 *                                        one in...
 *                Jes Sorensen        :        Make FDDI work again in 2.1.x and
 *                                        clean up the APFDDI & gen. FDDI bits.
 *                Alexey Kuznetsov:        new arp state machine;
 *                                        now it is in net/core/neighbour.c.
 *                Krzysztof Halasa:        Added Frame Relay ARP support.
 *                Arnaldo C. Melo :        convert /proc/net/arp to seq_file
 *                Shmulik Hen:                Split arp_send to arp_create and
 *                                        arp_xmit so intermediate drivers like
 *                                        bonding can change the skb before
 *                                        sending (e.g. insert 8021q tag).
 *                Harald Welte        :        convert to make use of jenkins hash
 *                Jesper D. Brouer:       Proxy ARP PVLAN RFC 3069 support.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/capability.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/fddidevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/ax25.h>
#include <net/netrom.h>
#include <net/dst_metadata.h>
#include <net/ip_tunnels.h>

#include <linux/uaccess.h>

#include <linux/netfilter_arp.h>

/*
 *        Interface to generic neighbour cache.
 */
static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd);
static bool arp_key_eq(const struct neighbour *n, const void *pkey);
static int arp_constructor(struct neighbour *neigh);
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
static void parp_redo(struct sk_buff *skb);
static int arp_is_multicast(const void *pkey);

static const struct neigh_ops arp_generic_ops = {
        .family =                AF_INET,
        .solicit =                arp_solicit,
        .error_report =                arp_error_report,
        .output =                neigh_resolve_output,
        .connected_output =        neigh_connected_output,
};

static const struct neigh_ops arp_hh_ops = {
        .family =                AF_INET,
        .solicit =                arp_solicit,
        .error_report =                arp_error_report,
        .output =                neigh_resolve_output,
        .connected_output =        neigh_resolve_output,
};

static const struct neigh_ops arp_direct_ops = {
        .family =                AF_INET,
        .output =                neigh_direct_output,
        .connected_output =        neigh_direct_output,
};

struct neigh_table arp_tbl = {
        .family                = AF_INET,
        .key_len        = 4,
        .protocol        = cpu_to_be16(ETH_P_IP),
        .hash                = arp_hash,
        .key_eq                = arp_key_eq,
        .constructor        = arp_constructor,
        .proxy_redo        = parp_redo,
        .is_multicast        = arp_is_multicast,
        .id                = "arp_cache",
        .parms                = {
                .tbl                        = &arp_tbl,
                .reachable_time                = 30 * HZ,
                .data        = {
                        [NEIGH_VAR_MCAST_PROBES] = 3,
                        [NEIGH_VAR_UCAST_PROBES] = 3,
                        [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
                        [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
                        [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
                        [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
                        [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
                        [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
                        [NEIGH_VAR_PROXY_QLEN] = 64,
                        [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
                        [NEIGH_VAR_PROXY_DELAY]        = (8 * HZ) / 10,
                        [NEIGH_VAR_LOCKTIME] = 1 * HZ,
                },
        },
        .gc_interval        = 30 * HZ,
        .gc_thresh1        = 128,
        .gc_thresh2        = 512,
        .gc_thresh3        = 1024,
};
EXPORT_SYMBOL(arp_tbl);

int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
{
        switch (dev->type) {
        case ARPHRD_ETHER:
        case ARPHRD_FDDI:
        case ARPHRD_IEEE802:
                ip_eth_mc_map(addr, haddr);
                return 0;
        case ARPHRD_INFINIBAND:
                ip_ib_mc_map(addr, dev->broadcast, haddr);
                return 0;
        case ARPHRD_IPGRE:
                ip_ipgre_mc_map(addr, dev->broadcast, haddr);
                return 0;
        default:
                if (dir) {
                        memcpy(haddr, dev->broadcast, dev->addr_len);
                        return 0;
                }
        }
        return -EINVAL;
}


static u32 arp_hash(const void *pkey,
                    const struct net_device *dev,
                    __u32 *hash_rnd)
{
        return arp_hashfn(pkey, dev, hash_rnd);
}

static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
{
        return neigh_key_eq32(neigh, pkey);
}

static int arp_constructor(struct neighbour *neigh)
{
        __be32 addr;
        struct net_device *dev = neigh->dev;
        struct in_device *in_dev;
        struct neigh_parms *parms;
        u32 inaddr_any = INADDR_ANY;

        if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
                memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);

        addr = *(__be32 *)neigh->primary_key;
        rcu_read_lock();
        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev) {
                rcu_read_unlock();
                return -EINVAL;
        }

        neigh->type = inet_addr_type_dev_table(dev_net(dev), dev, addr);

        parms = in_dev->arp_parms;
        __neigh_parms_put(neigh->parms);
        neigh->parms = neigh_parms_clone(parms);
        rcu_read_unlock();

        if (!dev->header_ops) {
                neigh->nud_state = NUD_NOARP;
                neigh->ops = &arp_direct_ops;
                neigh->output = neigh_direct_output;
        } else {
                /* Good devices (checked by reading texts, but only Ethernet is
                   tested)

                   ARPHRD_ETHER: (ethernet, apfddi)
                   ARPHRD_FDDI: (fddi)
                   ARPHRD_IEEE802: (tr)
                   ARPHRD_METRICOM: (strip)
                   ARPHRD_ARCNET:
                   etc. etc. etc.

                   ARPHRD_IPDDP will also work, if author repairs it.
                   I did not it, because this driver does not work even
                   in old paradigm.
                 */

                if (neigh->type == RTN_MULTICAST) {
                        neigh->nud_state = NUD_NOARP;
                        arp_mc_map(addr, neigh->ha, dev, 1);
                } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
                        neigh->nud_state = NUD_NOARP;
                        memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
                } else if (neigh->type == RTN_BROADCAST ||
                           (dev->flags & IFF_POINTOPOINT)) {
                        neigh->nud_state = NUD_NOARP;
                        memcpy(neigh->ha, dev->broadcast, dev->addr_len);
                }

                if (dev->header_ops->cache)
                        neigh->ops = &arp_hh_ops;
                else
                        neigh->ops = &arp_generic_ops;

                if (neigh->nud_state & NUD_VALID)
                        neigh->output = neigh->ops->connected_output;
                else
                        neigh->output = neigh->ops->output;
        }
        return 0;
}

static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
        dst_link_failure(skb);
        kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
}

/* Create and send an arp packet. */
static void arp_send_dst(int type, int ptype, __be32 dest_ip,
                         struct net_device *dev, __be32 src_ip,
                         const unsigned char *dest_hw,
                         const unsigned char *src_hw,
                         const unsigned char *target_hw,
                         struct dst_entry *dst)
{
        struct sk_buff *skb;

        /* arp on this interface. */
        if (dev->flags & IFF_NOARP)
                return;

        skb = arp_create(type, ptype, dest_ip, dev, src_ip,
                         dest_hw, src_hw, target_hw);
        if (!skb)
                return;

        skb_dst_set(skb, dst_clone(dst));
        arp_xmit(skb);
}

void arp_send(int type, int ptype, __be32 dest_ip,
              struct net_device *dev, __be32 src_ip,
              const unsigned char *dest_hw, const unsigned char *src_hw,
              const unsigned char *target_hw)
{
        arp_send_dst(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw,
                     target_hw, NULL);
}
EXPORT_SYMBOL(arp_send);

static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
        __be32 saddr = 0;
        u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
        struct net_device *dev = neigh->dev;
        __be32 target = *(__be32 *)neigh->primary_key;
        int probes = atomic_read(&neigh->probes);
        struct in_device *in_dev;
        struct dst_entry *dst = NULL;

        rcu_read_lock();
        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev) {
                rcu_read_unlock();
                return;
        }
        switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
        default:
        case 0:                /* By default announce any local IP */
                if (skb && inet_addr_type_dev_table(dev_net(dev), dev,
                                          ip_hdr(skb)->saddr) == RTN_LOCAL)
                        saddr = ip_hdr(skb)->saddr;
                break;
        case 1:                /* Restrict announcements of saddr in same subnet */
                if (!skb)
                        break;
                saddr = ip_hdr(skb)->saddr;
                if (inet_addr_type_dev_table(dev_net(dev), dev,
                                             saddr) == RTN_LOCAL) {
                        /* saddr should be known to target */
                        if (inet_addr_onlink(in_dev, target, saddr))
                                break;
                }
                saddr = 0;
                break;
        case 2:                /* Avoid secondary IPs, get a primary/preferred one */
                break;
        }
        rcu_read_unlock();

        if (!saddr)
                saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);

        probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
        if (probes < 0) {
                if (!(READ_ONCE(neigh->nud_state) & NUD_VALID))
                        pr_debug("trying to ucast probe in NUD_INVALID\n");
                neigh_ha_snapshot(dst_ha, neigh, dev);
                dst_hw = dst_ha;
        } else {
                probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
                if (probes < 0) {
                        neigh_app_ns(neigh);
                        return;
                }
        }

        if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
                dst = skb_dst(skb);
        arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
                     dst_hw, dev->dev_addr, NULL, dst);
}

static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
{
        struct net *net = dev_net(in_dev->dev);
        int scope;

        switch (IN_DEV_ARP_IGNORE(in_dev)) {
        case 0:        /* Reply, the tip is already validated */
                return 0;
        case 1:        /* Reply only if tip is configured on the incoming interface */
                sip = 0;
                scope = RT_SCOPE_HOST;
                break;
        case 2:        /*
                 * Reply only if tip is configured on the incoming interface
                 * and is in same subnet as sip
                 */
                scope = RT_SCOPE_HOST;
                break;
        case 3:        /* Do not reply for scope host addresses */
                sip = 0;
                scope = RT_SCOPE_LINK;
                in_dev = NULL;
                break;
        case 4:        /* Reserved */
        case 5:
        case 6:
        case 7:
                return 0;
        case 8:        /* Do not reply */
                return 1;
        default:
                return 0;
        }
        return !inet_confirm_addr(net, in_dev, sip, tip, scope);
}

static int arp_accept(struct in_device *in_dev, __be32 sip)
{
        struct net *net = dev_net(in_dev->dev);
        int scope = RT_SCOPE_LINK;

        switch (IN_DEV_ARP_ACCEPT(in_dev)) {
        case 0: /* Don't create new entries from garp */
                return 0;
        case 1: /* Create new entries from garp */
                return 1;
        case 2: /* Create a neighbor in the arp table only if sip
                 * is in the same subnet as an address configured
                 * on the interface that received the garp message
                 */
                return !!inet_confirm_addr(net, in_dev, sip, 0, scope);
        default:
                return 0;
        }
}

static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
{
        struct rtable *rt;
        int flag = 0;
        /*unsigned long now; */
        struct net *net = dev_net(dev);

        rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
        if (IS_ERR(rt))
                return 1;
        if (rt->dst.dev != dev) {
                __NET_INC_STATS(net, LINUX_MIB_ARPFILTER);
                flag = 1;
        }
        ip_rt_put(rt);
        return flag;
}

/*
 * Check if we can use proxy ARP for this path
 */
static inline int arp_fwd_proxy(struct in_device *in_dev,
                                struct net_device *dev,        struct rtable *rt)
{
        struct in_device *out_dev;
        int imi, omi = -1;

        if (rt->dst.dev == dev)
                return 0;

        if (!IN_DEV_PROXY_ARP(in_dev))
                return 0;
        imi = IN_DEV_MEDIUM_ID(in_dev);
        if (imi == 0)
                return 1;
        if (imi == -1)
                return 0;

        /* place to check for proxy_arp for routes */

        out_dev = __in_dev_get_rcu(rt->dst.dev);
        if (out_dev)
                omi = IN_DEV_MEDIUM_ID(out_dev);

        return omi != imi && omi != -1;
}

/*
 * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
 *
 * RFC3069 supports proxy arp replies back to the same interface.  This
 * is done to support (ethernet) switch features, like RFC 3069, where
 * the individual ports are not allowed to communicate with each
 * other, BUT they are allowed to talk to the upstream router.  As
 * described in RFC 3069, it is possible to allow these hosts to
 * communicate through the upstream router, by proxy_arp'ing.
 *
 * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
 *
 *  This technology is known by different names:
 *    In RFC 3069 it is called VLAN Aggregation.
 *    Cisco and Allied Telesyn call it Private VLAN.
 *    Hewlett-Packard call it Source-Port filtering or port-isolation.
 *    Ericsson call it MAC-Forced Forwarding (RFC Draft).
 *
 */
static inline int arp_fwd_pvlan(struct in_device *in_dev,
                                struct net_device *dev,        struct rtable *rt,
                                __be32 sip, __be32 tip)
{
        /* Private VLAN is only concerned about the same ethernet segment */
        if (rt->dst.dev != dev)
                return 0;

        /* Don't reply on self probes (often done by windowz boxes)*/
        if (sip == tip)
                return 0;

        if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
                return 1;
        else
                return 0;
}

/*
 *        Interface to link layer: send routine and receive handler.
 */

/*
 *        Create an arp packet. If dest_hw is not set, we create a broadcast
 *        message.
 */
struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
                           struct net_device *dev, __be32 src_ip,
                           const unsigned char *dest_hw,
                           const unsigned char *src_hw,
                           const unsigned char *target_hw)
{
        struct sk_buff *skb;
        struct arphdr *arp;
        unsigned char *arp_ptr;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;

        /*
         *        Allocate a buffer
         */

        skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
        if (!skb)
                return NULL;

        skb_reserve(skb, hlen);
        skb_reset_network_header(skb);
        arp = skb_put(skb, arp_hdr_len(dev));
        skb->dev = dev;
        skb->protocol = htons(ETH_P_ARP);
        if (!src_hw)
                src_hw = dev->dev_addr;
        if (!dest_hw)
                dest_hw = dev->broadcast;

        /*
         *        Fill the device header for the ARP frame
         */
        if (dev_hard_header(skb, dev, ptype, dest_hw, src_hw, skb->len) < 0)
                goto out;

        /*
         * Fill out the arp protocol part.
         *
         * The arp hardware type should match the device type, except for FDDI,
         * which (according to RFC 1390) should always equal 1 (Ethernet).
         */
        /*
         *        Exceptions everywhere. AX.25 uses the AX.25 PID value not the
         *        DIX code for the protocol. Make these device structure fields.
         */
        switch (dev->type) {
        default:
                arp->ar_hrd = htons(dev->type);
                arp->ar_pro = htons(ETH_P_IP);
                break;

#if IS_ENABLED(CONFIG_AX25)
        case ARPHRD_AX25:
                arp->ar_hrd = htons(ARPHRD_AX25);
                arp->ar_pro = htons(AX25_P_IP);
                break;

#if IS_ENABLED(CONFIG_NETROM)
        case ARPHRD_NETROM:
                arp->ar_hrd = htons(ARPHRD_NETROM);
                arp->ar_pro = htons(AX25_P_IP);
                break;
#endif
#endif

#if IS_ENABLED(CONFIG_FDDI)
        case ARPHRD_FDDI:
                arp->ar_hrd = htons(ARPHRD_ETHER);
                arp->ar_pro = htons(ETH_P_IP);
                break;
#endif
        }

        arp->ar_hln = dev->addr_len;
        arp->ar_pln = 4;
        arp->ar_op = htons(type);

        arp_ptr = (unsigned char *)(arp + 1);

        memcpy(arp_ptr, src_hw, dev->addr_len);
        arp_ptr += dev->addr_len;
        memcpy(arp_ptr, &src_ip, 4);
        arp_ptr += 4;

        switch (dev->type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
        case ARPHRD_IEEE1394:
                break;
#endif
        default:
                if (target_hw)
                        memcpy(arp_ptr, target_hw, dev->addr_len);
                else
                        memset(arp_ptr, 0, dev->addr_len);
                arp_ptr += dev->addr_len;
        }
        memcpy(arp_ptr, &dest_ip, 4);

        return skb;

out:
        kfree_skb(skb);
        return NULL;
}
EXPORT_SYMBOL(arp_create);

static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        return dev_queue_xmit(skb);
}

/*
 *        Send an arp packet.
 */
void arp_xmit(struct sk_buff *skb)
{
        /* Send it off, maybe filter it using firewalling first.  */
        NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
                dev_net(skb->dev), NULL, skb, NULL, skb->dev,
                arp_xmit_finish);
}
EXPORT_SYMBOL(arp_xmit);

static bool arp_is_garp(struct net *net, struct net_device *dev,
                        int *addr_type, __be16 ar_op,
                        __be32 sip, __be32 tip,
                        unsigned char *sha, unsigned char *tha)
{
        bool is_garp = tip == sip;

        /* Gratuitous ARP _replies_ also require target hwaddr to be
         * the same as source.
         */
        if (is_garp && ar_op == htons(ARPOP_REPLY))
                is_garp =
                        /* IPv4 over IEEE 1394 doesn't provide target
                         * hardware address field in its ARP payload.
                         */
                        tha &&
                        !memcmp(tha, sha, dev->addr_len);

        if (is_garp) {
                *addr_type = inet_addr_type_dev_table(net, dev, sip);
                if (*addr_type != RTN_UNICAST)
                        is_garp = false;
        }
        return is_garp;
}

/*
 *        Process an arp request.
 */

static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        struct arphdr *arp;
        unsigned char *arp_ptr;
        struct rtable *rt;
        unsigned char *sha;
        unsigned char *tha = NULL;
        __be32 sip, tip;
        u16 dev_type = dev->type;
        int addr_type;
        struct neighbour *n;
        struct dst_entry *reply_dst = NULL;
        bool is_garp = false;

        /* arp_rcv below verifies the ARP header and verifies the device
         * is ARP'able.
         */

        if (!in_dev)
                goto out_free_skb;

        arp = arp_hdr(skb);

        switch (dev_type) {
        default:
                if (arp->ar_pro != htons(ETH_P_IP) ||
                    htons(dev_type) != arp->ar_hrd)
                        goto out_free_skb;
                break;
        case ARPHRD_ETHER:
        case ARPHRD_FDDI:
        case ARPHRD_IEEE802:
                /*
                 * ETHERNET, and Fibre Channel (which are IEEE 802
                 * devices, according to RFC 2625) devices will accept ARP
                 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
                 * This is the case also of FDDI, where the RFC 1390 says that
                 * FDDI devices should accept ARP hardware of (1) Ethernet,
                 * however, to be more robust, we'll accept both 1 (Ethernet)
                 * or 6 (IEEE 802.2)
                 */
                if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
                     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
                    arp->ar_pro != htons(ETH_P_IP))
                        goto out_free_skb;
                break;
        case ARPHRD_AX25:
                if (arp->ar_pro != htons(AX25_P_IP) ||
                    arp->ar_hrd != htons(ARPHRD_AX25))
                        goto out_free_skb;
                break;
        case ARPHRD_NETROM:
                if (arp->ar_pro != htons(AX25_P_IP) ||
                    arp->ar_hrd != htons(ARPHRD_NETROM))
                        goto out_free_skb;
                break;
        }

        /* Understand only these message types */

        if (arp->ar_op != htons(ARPOP_REPLY) &&
            arp->ar_op != htons(ARPOP_REQUEST))
                goto out_free_skb;

/*
 *        Extract fields
 */
        arp_ptr = (unsigned char *)(arp + 1);
        sha        = arp_ptr;
        arp_ptr += dev->addr_len;
        memcpy(&sip, arp_ptr, 4);
        arp_ptr += 4;
        switch (dev_type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
        case ARPHRD_IEEE1394:
                break;
#endif
        default:
                tha = arp_ptr;
                arp_ptr += dev->addr_len;
        }
        memcpy(&tip, arp_ptr, 4);
/*
 *        Check for bad requests for 127.x.x.x and requests for multicast
 *        addresses.  If this is one such, delete it.
 */
        if (ipv4_is_multicast(tip) ||
            (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
                goto out_free_skb;

 /*
  *        For some 802.11 wireless deployments (and possibly other networks),
  *        there will be an ARP proxy and gratuitous ARP frames are attacks
  *        and thus should not be accepted.
  */
        if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
                goto out_free_skb;

/*
 *     Special case: We must set Frame Relay source Q.922 address
 */
        if (dev_type == ARPHRD_DLCI)
                sha = dev->broadcast;

/*
 *  Process entry.  The idea here is we want to send a reply if it is a
 *  request for us or if it is a request for someone else that we hold
 *  a proxy for.  We want to add an entry to our cache if it is a reply
 *  to us or if it is a request for our address.
 *  (The assumption for this last is that if someone is requesting our
 *  address, they are probably intending to talk to us, so it saves time
 *  if we cache their address.  Their address is also probably not in
 *  our cache, since ours is not in their cache.)
 *
 *  Putting this another way, we only care about replies if they are to
 *  us, in which case we add them to the cache.  For requests, we care
 *  about those for us and those for our proxies.  We reply to both,
 *  and in the case of requests for us we add the requester to the arp
 *  cache.
 */

        if (arp->ar_op == htons(ARPOP_REQUEST) && skb_metadata_dst(skb))
                reply_dst = (struct dst_entry *)
                            iptunnel_metadata_reply(skb_metadata_dst(skb),
                                                    GFP_ATOMIC);

        /* Special case: IPv4 duplicate address detection packet (RFC2131) */
        if (sip == 0) {
                if (arp->ar_op == htons(ARPOP_REQUEST) &&
                    inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL &&
                    !arp_ignore(in_dev, sip, tip))
                        arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
                                     sha, dev->dev_addr, sha, reply_dst);
                goto out_consume_skb;
        }

        if (arp->ar_op == htons(ARPOP_REQUEST) &&
            ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {

                rt = skb_rtable(skb);
                addr_type = rt->rt_type;

                if (addr_type == RTN_LOCAL) {
                        int dont_send;

                        dont_send = arp_ignore(in_dev, sip, tip);
                        if (!dont_send && IN_DEV_ARPFILTER(in_dev))
                                dont_send = arp_filter(sip, tip, dev);
                        if (!dont_send) {
                                n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
                                if (n) {
                                        arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
                                                     sip, dev, tip, sha,
                                                     dev->dev_addr, sha,
                                                     reply_dst);
                                        neigh_release(n);
                                }
                        }
                        goto out_consume_skb;
                } else if (IN_DEV_FORWARD(in_dev)) {
                        if (addr_type == RTN_UNICAST  &&
                            (arp_fwd_proxy(in_dev, dev, rt) ||
                             arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
                             (rt->dst.dev != dev &&
                              pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
                                n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
                                if (n)
                                        neigh_release(n);

                                if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
                                    skb->pkt_type == PACKET_HOST ||
                                    NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
                                        arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
                                                     sip, dev, tip, sha,
                                                     dev->dev_addr, sha,
                                                     reply_dst);
                                } else {
                                        pneigh_enqueue(&arp_tbl,
                                                       in_dev->arp_parms, skb);
                                        goto out_free_dst;
                                }
                                goto out_consume_skb;
                        }
                }
        }

        /* Update our ARP tables */

        n = __neigh_lookup(&arp_tbl, &sip, dev, 0);

        addr_type = -1;
        if (n || arp_accept(in_dev, sip)) {
                is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
                                      sip, tip, sha, tha);
        }

        if (arp_accept(in_dev, sip)) {
                /* Unsolicited ARP is not accepted by default.
                   It is possible, that this option should be enabled for some
                   devices (strip is candidate)
                 */
                if (!n &&
                    (is_garp ||
                     (arp->ar_op == htons(ARPOP_REPLY) &&
                      (addr_type == RTN_UNICAST ||
                       (addr_type < 0 &&
                        /* postpone calculation to as late as possible */
                        inet_addr_type_dev_table(net, dev, sip) ==
                                RTN_UNICAST)))))
                        n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
        }

        if (n) {
                int state = NUD_REACHABLE;
                int override;

                /* If several different ARP replies follows back-to-back,
                   use the FIRST one. It is possible, if several proxy
                   agents are active. Taking the first reply prevents
                   arp trashing and chooses the fastest router.
                 */
                override = time_after(jiffies,
                                      n->updated +
                                      NEIGH_VAR(n->parms, LOCKTIME)) ||
                           is_garp;

                /* Broadcast replies and request packets
                   do not assert neighbour reachability.
                 */
                if (arp->ar_op != htons(ARPOP_REPLY) ||
                    skb->pkt_type != PACKET_HOST)
                        state = NUD_STALE;
                neigh_update(n, sha, state,
                             override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
                neigh_release(n);
        }

out_consume_skb:
        consume_skb(skb);

out_free_dst:
        dst_release(reply_dst);
        return NET_RX_SUCCESS;

out_free_skb:
        kfree_skb(skb);
        return NET_RX_DROP;
}

static void parp_redo(struct sk_buff *skb)
{
        arp_process(dev_net(skb->dev), NULL, skb);
}

static int arp_is_multicast(const void *pkey)
{
        return ipv4_is_multicast(*((__be32 *)pkey));
}

/*
 *        Receive an arp request from the device layer.
 */

static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
                   struct packet_type *pt, struct net_device *orig_dev)
{
        const struct arphdr *arp;

        /* do not tweak dropwatch on an ARP we will ignore */
        if (dev->flags & IFF_NOARP ||
            skb->pkt_type == PACKET_OTHERHOST ||
            skb->pkt_type == PACKET_LOOPBACK)
                goto consumeskb;

        skb = skb_share_check(skb, GFP_ATOMIC);
        if (!skb)
                goto out_of_mem;

        /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
        if (!pskb_may_pull(skb, arp_hdr_len(dev)))
                goto freeskb;

        arp = arp_hdr(skb);
        if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
                goto freeskb;

        memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));

        return NF_HOOK(NFPROTO_ARP, NF_ARP_IN,
                       dev_net(dev), NULL, skb, dev, NULL,
                       arp_process);

consumeskb:
        consume_skb(skb);
        return NET_RX_SUCCESS;
freeskb:
        kfree_skb(skb);
out_of_mem:
        return NET_RX_DROP;
}

/*
 *        User level interface (ioctl)
 */

/*
 *        Set (create) an ARP cache entry.
 */

static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
{
        if (!dev) {
                IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
                return 0;
        }
        if (__in_dev_get_rtnl(dev)) {
                IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
                return 0;
        }
        return -ENXIO;
}

static int arp_req_set_public(struct net *net, struct arpreq *r,
                struct net_device *dev)
{
        __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
        __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;

        if (mask && mask != htonl(0xFFFFFFFF))
                return -EINVAL;
        if (!dev && (r->arp_flags & ATF_COM)) {
                dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
                                      r->arp_ha.sa_data);
                if (!dev)
                        return -ENODEV;
        }
        if (mask) {
                if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
                        return -ENOBUFS;
                return 0;
        }

        return arp_req_set_proxy(net, dev, 1);
}

static int arp_req_set(struct net *net, struct arpreq *r,
                       struct net_device *dev)
{
        __be32 ip;
        struct neighbour *neigh;
        int err;

        if (r->arp_flags & ATF_PUBL)
                return arp_req_set_public(net, r, dev);

        ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
        if (r->arp_flags & ATF_PERM)
                r->arp_flags |= ATF_COM;
        if (!dev) {
                struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);

                if (IS_ERR(rt))
                        return PTR_ERR(rt);
                dev = rt->dst.dev;
                ip_rt_put(rt);
                if (!dev)
                        return -EINVAL;
        }
        switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
        case ARPHRD_FDDI:
                /*
                 * According to RFC 1390, FDDI devices should accept ARP
                 * hardware types of 1 (Ethernet).  However, to be more
                 * robust, we'll accept hardware types of either 1 (Ethernet)
                 * or 6 (IEEE 802.2).
                 */
                if (r->arp_ha.sa_family != ARPHRD_FDDI &&
                    r->arp_ha.sa_family != ARPHRD_ETHER &&
                    r->arp_ha.sa_family != ARPHRD_IEEE802)
                        return -EINVAL;
                break;
#endif
        default:
                if (r->arp_ha.sa_family != dev->type)
                        return -EINVAL;
                break;
        }

        neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
        err = PTR_ERR(neigh);
        if (!IS_ERR(neigh)) {
                unsigned int state = NUD_STALE;
                if (r->arp_flags & ATF_PERM)
                        state = NUD_PERMANENT;
                err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
                                   r->arp_ha.sa_data : NULL, state,
                                   NEIGH_UPDATE_F_OVERRIDE |
                                   NEIGH_UPDATE_F_ADMIN, 0);
                neigh_release(neigh);
        }
        return err;
}

static unsigned int arp_state_to_flags(struct neighbour *neigh)
{
        if (neigh->nud_state&NUD_PERMANENT)
                return ATF_PERM | ATF_COM;
        else if (neigh->nud_state&NUD_VALID)
                return ATF_COM;
        else
                return 0;
}

/*
 *        Get an ARP cache entry.
 */

static int arp_req_get(struct arpreq *r, struct net_device *dev)
{
        __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
        struct neighbour *neigh;
        int err = -ENXIO;

        neigh = neigh_lookup(&arp_tbl, &ip, dev);
        if (neigh) {
                if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
                        read_lock_bh(&neigh->lock);
                        memcpy(r->arp_ha.sa_data, neigh->ha,
                               min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
                        r->arp_flags = arp_state_to_flags(neigh);
                        read_unlock_bh(&neigh->lock);
                        r->arp_ha.sa_family = dev->type;
                        strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
                        err = 0;
                }
                neigh_release(neigh);
        }
        return err;
}

int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
        struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
        int err = -ENXIO;
        struct neigh_table *tbl = &arp_tbl;

        if (neigh) {
                if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) {
                        neigh_release(neigh);
                        return 0;
                }

                if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP)
                        err = neigh_update(neigh, NULL, NUD_FAILED,
                                           NEIGH_UPDATE_F_OVERRIDE|
                                           NEIGH_UPDATE_F_ADMIN, 0);
                write_lock_bh(&tbl->lock);
                neigh_release(neigh);
                neigh_remove_one(neigh, tbl);
                write_unlock_bh(&tbl->lock);
        }

        return err;
}

static int arp_req_delete_public(struct net *net, struct arpreq *r,
                struct net_device *dev)
{
        __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
        __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;

        if (mask == htonl(0xFFFFFFFF))
                return pneigh_delete(&arp_tbl, net, &ip, dev);

        if (mask)
                return -EINVAL;

        return arp_req_set_proxy(net, dev, 0);
}

static int arp_req_delete(struct net *net, struct arpreq *r,
                          struct net_device *dev)
{
        __be32 ip;

        if (r->arp_flags & ATF_PUBL)
                return arp_req_delete_public(net, r, dev);

        ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
        if (!dev) {
                struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
                if (IS_ERR(rt))
                        return PTR_ERR(rt);
                dev = rt->dst.dev;
                ip_rt_put(rt);
                if (!dev)
                        return -EINVAL;
        }
        return arp_invalidate(dev, ip, true);
}

/*
 *        Handle an ARP layer I/O control request.
 */

int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
        int err;
        struct arpreq r;
        struct net_device *dev = NULL;

        switch (cmd) {
        case SIOCDARP:
        case SIOCSARP:
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                fallthrough;
        case SIOCGARP:
                err = copy_from_user(&r, arg, sizeof(struct arpreq));
                if (err)
                        return -EFAULT;
                break;
        default:
                return -EINVAL;
        }

        if (r.arp_pa.sa_family != AF_INET)
                return -EPFNOSUPPORT;

        if (!(r.arp_flags & ATF_PUBL) &&
            (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
                return -EINVAL;
        if (!(r.arp_flags & ATF_NETMASK))
                ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
                                                           htonl(0xFFFFFFFFUL);
        rtnl_lock();
        if (r.arp_dev[0]) {
                err = -ENODEV;
                dev = __dev_get_by_name(net, r.arp_dev);
                if (!dev)
                        goto out;

                /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
                if (!r.arp_ha.sa_family)
                        r.arp_ha.sa_family = dev->type;
                err = -EINVAL;
                if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
                        goto out;
        } else if (cmd == SIOCGARP) {
                err = -ENODEV;
                goto out;
        }

        switch (cmd) {
        case SIOCDARP:
                err = arp_req_delete(net, &r, dev);
                break;
        case SIOCSARP:
                err = arp_req_set(net, &r, dev);
                break;
        case SIOCGARP:
                err = arp_req_get(&r, dev);
                break;
        }
out:
        rtnl_unlock();
        if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
                err = -EFAULT;
        return err;
}

static int arp_netdev_event(struct notifier_block *this, unsigned long event,
                            void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_change_info *change_info;
        struct in_device *in_dev;
        bool evict_nocarrier;

        switch (event) {
        case NETDEV_CHANGEADDR:
                neigh_changeaddr(&arp_tbl, dev);
                rt_cache_flush(dev_net(dev));
                break;
        case NETDEV_CHANGE:
                change_info = ptr;
                if (change_info->flags_changed & IFF_NOARP)
                        neigh_changeaddr(&arp_tbl, dev);

                in_dev = __in_dev_get_rtnl(dev);
                if (!in_dev)
                        evict_nocarrier = true;
                else
                        evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev);

                if (evict_nocarrier && !netif_carrier_ok(dev))
                        neigh_carrier_down(&arp_tbl, dev);
                break;
        default:
                break;
        }

        return NOTIFY_DONE;
}

static struct notifier_block arp_netdev_notifier = {
        .notifier_call = arp_netdev_event,
};

/* Note, that it is not on notifier chain.
   It is necessary, that this routine was called after route cache will be
   flushed.
 */
void arp_ifdown(struct net_device *dev)
{
        neigh_ifdown(&arp_tbl, dev);
}


/*
 *        Called once on startup.
 */

static struct packet_type arp_packet_type __read_mostly = {
        .type =        cpu_to_be16(ETH_P_ARP),
        .func =        arp_rcv,
};

#ifdef CONFIG_PROC_FS
#if IS_ENABLED(CONFIG_AX25)

/*
 *        ax25 -> ASCII conversion
 */
static void ax2asc2(ax25_address *a, char *buf)
{
        char c, *s;
        int n;

        for (n = 0, s = buf; n < 6; n++) {
                c = (a->ax25_call[n] >> 1) & 0x7F;

                if (c != ' ')
                        *s++ = c;
        }

        *s++ = '-';
        n = (a->ax25_call[6] >> 1) & 0x0F;
        if (n > 9) {
                *s++ = '1';
                n -= 10;
        }

        *s++ = n + '0';
        *s++ = '\0';

        if (*buf == '\0' || *buf == '-') {
                buf[0] = '*';
                buf[1] = '\0';
        }
}
#endif /* CONFIG_AX25 */

#define HBUFFERLEN 30

static void arp_format_neigh_entry(struct seq_file *seq,
                                   struct neighbour *n)
{
        char hbuffer[HBUFFERLEN];
        int k, j;
        char tbuf[16];
        struct net_device *dev = n->dev;
        int hatype = dev->type;

        read_lock(&n->lock);
        /* Convert hardware address to XX:XX:XX:XX ... form. */
#if IS_ENABLED(CONFIG_AX25)
        if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
                ax2asc2((ax25_address *)n->ha, hbuffer);
        else {
#endif
        for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
                hbuffer[k++] = hex_asc_hi(n->ha[j]);
                hbuffer[k++] = hex_asc_lo(n->ha[j]);
                hbuffer[k++] = ':';
        }
        if (k != 0)
                --k;
        hbuffer[k] = 0;
#if IS_ENABLED(CONFIG_AX25)
        }
#endif
        sprintf(tbuf, "%pI4", n->primary_key);
        seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s     *        %s\n",
                   tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
        read_unlock(&n->lock);
}

static void arp_format_pneigh_entry(struct seq_file *seq,
                                    struct pneigh_entry *n)
{
        struct net_device *dev = n->dev;
        int hatype = dev ? dev->type : 0;
        char tbuf[16];

        sprintf(tbuf, "%pI4", n->key);
        seq_printf(seq, "%-16s 0x%-10x0x%-10x%s     *        %s\n",
                   tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
                   dev ? dev->name : "*");
}

static int arp_seq_show(struct seq_file *seq, void *v)
{
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq, "IP address       HW type     Flags       "
                              "HW address            Mask     Device\n");
        } else {
                struct neigh_seq_state *state = seq->private;

                if (state->flags & NEIGH_SEQ_IS_PNEIGH)
                        arp_format_pneigh_entry(seq, v);
                else
                        arp_format_neigh_entry(seq, v);
        }

        return 0;
}

static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
{
        /* Don't want to confuse "arp -a" w/ magic entries,
         * so we tell the generic iterator to skip NUD_NOARP.
         */
        return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP);
}

static const struct seq_operations arp_seq_ops = {
        .start        = arp_seq_start,
        .next        = neigh_seq_next,
        .stop        = neigh_seq_stop,
        .show        = arp_seq_show,
};
#endif /* CONFIG_PROC_FS */

static int __net_init arp_net_init(struct net *net)
{
        if (!proc_create_net("arp", 0444, net->proc_net, &arp_seq_ops,
                        sizeof(struct neigh_seq_state)))
                return -ENOMEM;
        return 0;
}

static void __net_exit arp_net_exit(struct net *net)
{
        remove_proc_entry("arp", net->proc_net);
}

static struct pernet_operations arp_net_ops = {
        .init = arp_net_init,
        .exit = arp_net_exit,
};

void __init arp_init(void)
{
        neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl);

        dev_add_pack(&arp_packet_type);
        register_pernet_subsys(&arp_net_ops);
#ifdef CONFIG_SYSCTL
        neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);
#endif
        register_netdevice_notifier(&arp_netdev_notifier);
}














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   31 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  (c) 1999 Andreas Gal                <gal@cs.uni-magdeburg.de>
 *  (c) 2000-2001 Vojtech Pavlik        <vojtech@ucw.cz>
 *  (c) 2007-2009 Jiri Kosina
 *
 *  HID debugging support
 */

/*
 *
 * Should you need to contact me, the author, you can do so either by
 * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
 * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kfifo.h>
#include <linux/sched/signal.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/poll.h>

#include <linux/hid.h>
#include <linux/hid-debug.h>

static struct dentry *hid_debug_root;

struct hid_usage_entry {
        unsigned  page;
        unsigned  usage;
        const char     *description;
};

static const struct hid_usage_entry hid_usage_table[] = {
  {  0,      0, "Undefined" },
  {  1,      0, "GenericDesktop" },
    {0, 0x01, "Pointer"},
    {0, 0x02, "Mouse"},
    {0, 0x04, "Joystick"},
    {0, 0x05, "GamePad"},
    {0, 0x06, "Keyboard"},
    {0, 0x07, "Keypad"},
    {0, 0x08, "MultiAxis"},
      {0, 0x30, "X"},
      {0, 0x31, "Y"},
      {0, 0x32, "Z"},
      {0, 0x33, "Rx"},
      {0, 0x34, "Ry"},
      {0, 0x35, "Rz"},
      {0, 0x36, "Slider"},
      {0, 0x37, "Dial"},
      {0, 0x38, "Wheel"},
      {0, 0x39, "HatSwitch"},
    {0, 0x3a, "CountedBuffer"},
      {0, 0x3b, "ByteCount"},
      {0, 0x3c, "MotionWakeup"},
      {0, 0x3d, "Start"},
      {0, 0x3e, "Select"},
      {0, 0x40, "Vx"},
      {0, 0x41, "Vy"},
      {0, 0x42, "Vz"},
      {0, 0x43, "Vbrx"},
      {0, 0x44, "Vbry"},
      {0, 0x45, "Vbrz"},
      {0, 0x46, "Vno"},
    {0, 0x80, "SystemControl"},
      {0, 0x81, "SystemPowerDown"},
      {0, 0x82, "SystemSleep"},
      {0, 0x83, "SystemWakeUp"},
      {0, 0x84, "SystemContextMenu"},
      {0, 0x85, "SystemMainMenu"},
      {0, 0x86, "SystemAppMenu"},
      {0, 0x87, "SystemMenuHelp"},
      {0, 0x88, "SystemMenuExit"},
      {0, 0x89, "SystemMenuSelect"},
      {0, 0x8a, "SystemMenuRight"},
      {0, 0x8b, "SystemMenuLeft"},
      {0, 0x8c, "SystemMenuUp"},
      {0, 0x8d, "SystemMenuDown"},
      {0, 0x90, "D-PadUp"},
      {0, 0x91, "D-PadDown"},
      {0, 0x92, "D-PadRight"},
      {0, 0x93, "D-PadLeft"},
  {  2, 0, "Simulation" },
      {0, 0xb0, "Aileron"},
      {0, 0xb1, "AileronTrim"},
      {0, 0xb2, "Anti-Torque"},
      {0, 0xb3, "Autopilot"},
      {0, 0xb4, "Chaff"},
      {0, 0xb5, "Collective"},
      {0, 0xb6, "DiveBrake"},
      {0, 0xb7, "ElectronicCountermeasures"},
      {0, 0xb8, "Elevator"},
      {0, 0xb9, "ElevatorTrim"},
      {0, 0xba, "Rudder"},
      {0, 0xbb, "Throttle"},
      {0, 0xbc, "FlightCommunications"},
      {0, 0xbd, "FlareRelease"},
      {0, 0xbe, "LandingGear"},
      {0, 0xbf, "ToeBrake"},
  {  6, 0, "GenericDeviceControls" },
      {0, 0x20, "BatteryStrength" },
      {0, 0x21, "WirelessChannel" },
      {0, 0x22, "WirelessID" },
      {0, 0x23, "DiscoverWirelessControl" },
      {0, 0x24, "SecurityCodeCharacterEntered" },
      {0, 0x25, "SecurityCodeCharactedErased" },
      {0, 0x26, "SecurityCodeCleared" },
  {  7, 0, "Keyboard" },
  {  8, 0, "LED" },
      {0, 0x01, "NumLock"},
      {0, 0x02, "CapsLock"},
      {0, 0x03, "ScrollLock"},
      {0, 0x04, "Compose"},
      {0, 0x05, "Kana"},
      {0, 0x4b, "GenericIndicator"},
  {  9, 0, "Button" },
  { 10, 0, "Ordinal" },
  { 12, 0, "Consumer" },
      {0, 0x003, "ProgrammableButtons"},
      {0, 0x238, "HorizontalWheel"},
  { 13, 0, "Digitizers" },
    {0, 0x01, "Digitizer"},
    {0, 0x02, "Pen"},
    {0, 0x03, "LightPen"},
    {0, 0x04, "TouchScreen"},
    {0, 0x05, "TouchPad"},
    {0, 0x0e, "DeviceConfiguration"},
    {0, 0x20, "Stylus"},
    {0, 0x21, "Puck"},
    {0, 0x22, "Finger"},
    {0, 0x23, "DeviceSettings"},
    {0, 0x30, "TipPressure"},
    {0, 0x31, "BarrelPressure"},
    {0, 0x32, "InRange"},
    {0, 0x33, "Touch"},
    {0, 0x34, "UnTouch"},
    {0, 0x35, "Tap"},
    {0, 0x38, "Transducer Index"},
    {0, 0x39, "TabletFunctionKey"},
    {0, 0x3a, "ProgramChangeKey"},
    {0, 0x3B, "Battery Strength"},
    {0, 0x3c, "Invert"},
    {0, 0x42, "TipSwitch"},
    {0, 0x43, "SecondaryTipSwitch"},
    {0, 0x44, "BarrelSwitch"},
    {0, 0x45, "Eraser"},
    {0, 0x46, "TabletPick"},
    {0, 0x47, "Confidence"},
    {0, 0x48, "Width"},
    {0, 0x49, "Height"},
    {0, 0x51, "ContactID"},
    {0, 0x52, "InputMode"},
    {0, 0x53, "DeviceIndex"},
    {0, 0x54, "ContactCount"},
    {0, 0x55, "ContactMaximumNumber"},
    {0, 0x59, "ButtonType"},
    {0, 0x5A, "SecondaryBarrelSwitch"},
    {0, 0x5B, "TransducerSerialNumber"},
    {0, 0x5C, "Preferred Color"},
    {0, 0x5D, "Preferred Color is Locked"},
    {0, 0x5E, "Preferred Line Width"},
    {0, 0x5F, "Preferred Line Width is Locked"},
    {0, 0x6e, "TransducerSerialNumber2"},
    {0, 0x70, "Preferred Line Style"},
      {0, 0x71, "Preferred Line Style is Locked"},
      {0, 0x72, "Ink"},
      {0, 0x73, "Pencil"},
      {0, 0x74, "Highlighter"},
      {0, 0x75, "Chisel Marker"},
      {0, 0x76, "Brush"},
      {0, 0x77, "No Preference"},
    {0, 0x80, "Digitizer Diagnostic"},
    {0, 0x81, "Digitizer Error"},
      {0, 0x82, "Err Normal Status"},
      {0, 0x83, "Err Transducers Exceeded"},
      {0, 0x84, "Err Full Trans Features Unavailable"},
      {0, 0x85, "Err Charge Low"},
    {0, 0x90, "Transducer Software Info"},
      {0, 0x91, "Transducer Vendor Id"},
      {0, 0x92, "Transducer Product Id"},
    {0, 0x93, "Device Supported Protocols"},
    {0, 0x94, "Transducer Supported Protocols"},
      {0, 0x95, "No Protocol"},
      {0, 0x96, "Wacom AES Protocol"},
      {0, 0x97, "USI Protocol"},
      {0, 0x98, "Microsoft Pen Protocol"},
    {0, 0xA0, "Supported Report Rates"},
      {0, 0xA1, "Report Rate"},
      {0, 0xA2, "Transducer Connected"},
      {0, 0xA3, "Switch Disabled"},
      {0, 0xA4, "Switch Unimplemented"},
      {0, 0xA5, "Transducer Switches"},
  { 15, 0, "PhysicalInterfaceDevice" },
    {0, 0x00, "Undefined"},
    {0, 0x01, "Physical_Interface_Device"},
      {0, 0x20, "Normal"},
    {0, 0x21, "Set_Effect_Report"},
      {0, 0x22, "Effect_Block_Index"},
      {0, 0x23, "Parameter_Block_Offset"},
      {0, 0x24, "ROM_Flag"},
      {0, 0x25, "Effect_Type"},
        {0, 0x26, "ET_Constant_Force"},
        {0, 0x27, "ET_Ramp"},
        {0, 0x28, "ET_Custom_Force_Data"},
        {0, 0x30, "ET_Square"},
        {0, 0x31, "ET_Sine"},
        {0, 0x32, "ET_Triangle"},
        {0, 0x33, "ET_Sawtooth_Up"},
        {0, 0x34, "ET_Sawtooth_Down"},
        {0, 0x40, "ET_Spring"},
        {0, 0x41, "ET_Damper"},
        {0, 0x42, "ET_Inertia"},
        {0, 0x43, "ET_Friction"},
      {0, 0x50, "Duration"},
      {0, 0x51, "Sample_Period"},
      {0, 0x52, "Gain"},
      {0, 0x53, "Trigger_Button"},
      {0, 0x54, "Trigger_Repeat_Interval"},
      {0, 0x55, "Axes_Enable"},
        {0, 0x56, "Direction_Enable"},
      {0, 0x57, "Direction"},
      {0, 0x58, "Type_Specific_Block_Offset"},
        {0, 0x59, "Block_Type"},
        {0, 0x5A, "Set_Envelope_Report"},
          {0, 0x5B, "Attack_Level"},
          {0, 0x5C, "Attack_Time"},
          {0, 0x5D, "Fade_Level"},
          {0, 0x5E, "Fade_Time"},
        {0, 0x5F, "Set_Condition_Report"},
        {0, 0x60, "CP_Offset"},
        {0, 0x61, "Positive_Coefficient"},
        {0, 0x62, "Negative_Coefficient"},
        {0, 0x63, "Positive_Saturation"},
        {0, 0x64, "Negative_Saturation"},
        {0, 0x65, "Dead_Band"},
      {0, 0x66, "Download_Force_Sample"},
      {0, 0x67, "Isoch_Custom_Force_Enable"},
      {0, 0x68, "Custom_Force_Data_Report"},
        {0, 0x69, "Custom_Force_Data"},
        {0, 0x6A, "Custom_Force_Vendor_Defined_Data"},
      {0, 0x6B, "Set_Custom_Force_Report"},
        {0, 0x6C, "Custom_Force_Data_Offset"},
        {0, 0x6D, "Sample_Count"},
      {0, 0x6E, "Set_Periodic_Report"},
        {0, 0x6F, "Offset"},
        {0, 0x70, "Magnitude"},
        {0, 0x71, "Phase"},
        {0, 0x72, "Period"},
      {0, 0x73, "Set_Constant_Force_Report"},
        {0, 0x74, "Set_Ramp_Force_Report"},
        {0, 0x75, "Ramp_Start"},
        {0, 0x76, "Ramp_End"},
      {0, 0x77, "Effect_Operation_Report"},
        {0, 0x78, "Effect_Operation"},
          {0, 0x79, "Op_Effect_Start"},
          {0, 0x7A, "Op_Effect_Start_Solo"},
          {0, 0x7B, "Op_Effect_Stop"},
          {0, 0x7C, "Loop_Count"},
      {0, 0x7D, "Device_Gain_Report"},
        {0, 0x7E, "Device_Gain"},
    {0, 0x7F, "PID_Pool_Report"},
      {0, 0x80, "RAM_Pool_Size"},
      {0, 0x81, "ROM_Pool_Size"},
      {0, 0x82, "ROM_Effect_Block_Count"},
      {0, 0x83, "Simultaneous_Effects_Max"},
      {0, 0x84, "Pool_Alignment"},
    {0, 0x85, "PID_Pool_Move_Report"},
      {0, 0x86, "Move_Source"},
      {0, 0x87, "Move_Destination"},
      {0, 0x88, "Move_Length"},
    {0, 0x89, "PID_Block_Load_Report"},
      {0, 0x8B, "Block_Load_Status"},
      {0, 0x8C, "Block_Load_Success"},
      {0, 0x8D, "Block_Load_Full"},
      {0, 0x8E, "Block_Load_Error"},
      {0, 0x8F, "Block_Handle"},
      {0, 0x90, "PID_Block_Free_Report"},
      {0, 0x91, "Type_Specific_Block_Handle"},
    {0, 0x92, "PID_State_Report"},
      {0, 0x94, "Effect_Playing"},
      {0, 0x95, "PID_Device_Control_Report"},
        {0, 0x96, "PID_Device_Control"},
        {0, 0x97, "DC_Enable_Actuators"},
        {0, 0x98, "DC_Disable_Actuators"},
        {0, 0x99, "DC_Stop_All_Effects"},
        {0, 0x9A, "DC_Device_Reset"},
        {0, 0x9B, "DC_Device_Pause"},
        {0, 0x9C, "DC_Device_Continue"},
      {0, 0x9F, "Device_Paused"},
      {0, 0xA0, "Actuators_Enabled"},
      {0, 0xA4, "Safety_Switch"},
      {0, 0xA5, "Actuator_Override_Switch"},
      {0, 0xA6, "Actuator_Power"},
    {0, 0xA7, "Start_Delay"},
    {0, 0xA8, "Parameter_Block_Size"},
    {0, 0xA9, "Device_Managed_Pool"},
    {0, 0xAA, "Shared_Parameter_Blocks"},
    {0, 0xAB, "Create_New_Effect_Report"},
    {0, 0xAC, "RAM_Pool_Available"},
  {  0x20, 0, "Sensor" },
    { 0x20, 0x01, "Sensor" },
    { 0x20, 0x10, "Biometric" },
      { 0x20, 0x11, "BiometricHumanPresence" },
      { 0x20, 0x12, "BiometricHumanProximity" },
      { 0x20, 0x13, "BiometricHumanTouch" },
    { 0x20, 0x20, "Electrical" },
      { 0x20, 0x21, "ElectricalCapacitance" },
      { 0x20, 0x22, "ElectricalCurrent" },
      { 0x20, 0x23, "ElectricalPower" },
      { 0x20, 0x24, "ElectricalInductance" },
      { 0x20, 0x25, "ElectricalResistance" },
      { 0x20, 0x26, "ElectricalVoltage" },
      { 0x20, 0x27, "ElectricalPoteniometer" },
      { 0x20, 0x28, "ElectricalFrequency" },
      { 0x20, 0x29, "ElectricalPeriod" },
    { 0x20, 0x30, "Environmental" },
      { 0x20, 0x31, "EnvironmentalAtmosphericPressure" },
      { 0x20, 0x32, "EnvironmentalHumidity" },
      { 0x20, 0x33, "EnvironmentalTemperature" },
      { 0x20, 0x34, "EnvironmentalWindDirection" },
      { 0x20, 0x35, "EnvironmentalWindSpeed" },
    { 0x20, 0x40, "Light" },
      { 0x20, 0x41, "LightAmbientLight" },
      { 0x20, 0x42, "LightConsumerInfrared" },
    { 0x20, 0x50, "Location" },
      { 0x20, 0x51, "LocationBroadcast" },
      { 0x20, 0x52, "LocationDeadReckoning" },
      { 0x20, 0x53, "LocationGPS" },
      { 0x20, 0x54, "LocationLookup" },
      { 0x20, 0x55, "LocationOther" },
      { 0x20, 0x56, "LocationStatic" },
      { 0x20, 0x57, "LocationTriangulation" },
    { 0x20, 0x60, "Mechanical" },
      { 0x20, 0x61, "MechanicalBooleanSwitch" },
      { 0x20, 0x62, "MechanicalBooleanSwitchArray" },
      { 0x20, 0x63, "MechanicalMultivalueSwitch" },
      { 0x20, 0x64, "MechanicalForce" },
      { 0x20, 0x65, "MechanicalPressure" },
      { 0x20, 0x66, "MechanicalStrain" },
      { 0x20, 0x67, "MechanicalWeight" },
      { 0x20, 0x68, "MechanicalHapticVibrator" },
      { 0x20, 0x69, "MechanicalHallEffectSwitch" },
    { 0x20, 0x70, "Motion" },
      { 0x20, 0x71, "MotionAccelerometer1D" },
      { 0x20, 0x72, "MotionAccelerometer2D" },
      { 0x20, 0x73, "MotionAccelerometer3D" },
      { 0x20, 0x74, "MotionGyrometer1D" },
      { 0x20, 0x75, "MotionGyrometer2D" },
      { 0x20, 0x76, "MotionGyrometer3D" },
      { 0x20, 0x77, "MotionMotionDetector" },
      { 0x20, 0x78, "MotionSpeedometer" },
      { 0x20, 0x79, "MotionAccelerometer" },
      { 0x20, 0x7A, "MotionGyrometer" },
    { 0x20, 0x80, "Orientation" },
      { 0x20, 0x81, "OrientationCompass1D" },
      { 0x20, 0x82, "OrientationCompass2D" },
      { 0x20, 0x83, "OrientationCompass3D" },
      { 0x20, 0x84, "OrientationInclinometer1D" },
      { 0x20, 0x85, "OrientationInclinometer2D" },
      { 0x20, 0x86, "OrientationInclinometer3D" },
      { 0x20, 0x87, "OrientationDistance1D" },
      { 0x20, 0x88, "OrientationDistance2D" },
      { 0x20, 0x89, "OrientationDistance3D" },
      { 0x20, 0x8A, "OrientationDeviceOrientation" },
      { 0x20, 0x8B, "OrientationCompass" },
      { 0x20, 0x8C, "OrientationInclinometer" },
      { 0x20, 0x8D, "OrientationDistance" },
    { 0x20, 0x90, "Scanner" },
      { 0x20, 0x91, "ScannerBarcode" },
      { 0x20, 0x91, "ScannerRFID" },
      { 0x20, 0x91, "ScannerNFC" },
    { 0x20, 0xA0, "Time" },
      { 0x20, 0xA1, "TimeAlarmTimer" },
      { 0x20, 0xA2, "TimeRealTimeClock" },
    { 0x20, 0xE0, "Other" },
      { 0x20, 0xE1, "OtherCustom" },
      { 0x20, 0xE2, "OtherGeneric" },
      { 0x20, 0xE3, "OtherGenericEnumerator" },
  { 0x84, 0, "Power Device" },
    { 0x84, 0x02, "PresentStatus" },
    { 0x84, 0x03, "ChangeStatus" },
    { 0x84, 0x04, "UPS" },
    { 0x84, 0x05, "PowerSupply" },
    { 0x84, 0x10, "BatterySystem" },
    { 0x84, 0x11, "BatterySystemID" },
    { 0x84, 0x12, "Battery" },
    { 0x84, 0x13, "BatteryID" },
    { 0x84, 0x14, "Charger" },
    { 0x84, 0x15, "ChargerID" },
    { 0x84, 0x16, "PowerConverter" },
    { 0x84, 0x17, "PowerConverterID" },
    { 0x84, 0x18, "OutletSystem" },
    { 0x84, 0x19, "OutletSystemID" },
    { 0x84, 0x1a, "Input" },
    { 0x84, 0x1b, "InputID" },
    { 0x84, 0x1c, "Output" },
    { 0x84, 0x1d, "OutputID" },
    { 0x84, 0x1e, "Flow" },
    { 0x84, 0x1f, "FlowID" },
    { 0x84, 0x20, "Outlet" },
    { 0x84, 0x21, "OutletID" },
    { 0x84, 0x22, "Gang" },
    { 0x84, 0x24, "PowerSummary" },
    { 0x84, 0x25, "PowerSummaryID" },
    { 0x84, 0x30, "Voltage" },
    { 0x84, 0x31, "Current" },
    { 0x84, 0x32, "Frequency" },
    { 0x84, 0x33, "ApparentPower" },
    { 0x84, 0x35, "PercentLoad" },
    { 0x84, 0x40, "ConfigVoltage" },
    { 0x84, 0x41, "ConfigCurrent" },
    { 0x84, 0x43, "ConfigApparentPower" },
    { 0x84, 0x53, "LowVoltageTransfer" },
    { 0x84, 0x54, "HighVoltageTransfer" },
    { 0x84, 0x56, "DelayBeforeStartup" },
    { 0x84, 0x57, "DelayBeforeShutdown" },
    { 0x84, 0x58, "Test" },
    { 0x84, 0x5a, "AudibleAlarmControl" },
    { 0x84, 0x60, "Present" },
    { 0x84, 0x61, "Good" },
    { 0x84, 0x62, "InternalFailure" },
    { 0x84, 0x65, "Overload" },
    { 0x84, 0x66, "OverCharged" },
    { 0x84, 0x67, "OverTemperature" },
    { 0x84, 0x68, "ShutdownRequested" },
    { 0x84, 0x69, "ShutdownImminent" },
    { 0x84, 0x6b, "SwitchOn/Off" },
    { 0x84, 0x6c, "Switchable" },
    { 0x84, 0x6d, "Used" },
    { 0x84, 0x6e, "Boost" },
    { 0x84, 0x73, "CommunicationLost" },
    { 0x84, 0xfd, "iManufacturer" },
    { 0x84, 0xfe, "iProduct" },
    { 0x84, 0xff, "iSerialNumber" },
  { 0x85, 0, "Battery System" },
    { 0x85, 0x01, "SMBBatteryMode" },
    { 0x85, 0x02, "SMBBatteryStatus" },
    { 0x85, 0x03, "SMBAlarmWarning" },
    { 0x85, 0x04, "SMBChargerMode" },
    { 0x85, 0x05, "SMBChargerStatus" },
    { 0x85, 0x06, "SMBChargerSpecInfo" },
    { 0x85, 0x07, "SMBSelectorState" },
    { 0x85, 0x08, "SMBSelectorPresets" },
    { 0x85, 0x09, "SMBSelectorInfo" },
    { 0x85, 0x29, "RemainingCapacityLimit" },
    { 0x85, 0x2c, "CapacityMode" },
    { 0x85, 0x42, "BelowRemainingCapacityLimit" },
    { 0x85, 0x44, "Charging" },
    { 0x85, 0x45, "Discharging" },
    { 0x85, 0x4b, "NeedReplacement" },
    { 0x85, 0x65, "AbsoluteStateOfCharge" },
    { 0x85, 0x66, "RemainingCapacity" },
    { 0x85, 0x68, "RunTimeToEmpty" },
    { 0x85, 0x6a, "AverageTimeToFull" },
    { 0x85, 0x83, "DesignCapacity" },
    { 0x85, 0x85, "ManufacturerDate" },
    { 0x85, 0x89, "iDeviceChemistry" },
    { 0x85, 0x8b, "Rechargeable" },
    { 0x85, 0x8f, "iOEMInformation" },
    { 0x85, 0x8d, "CapacityGranularity1" },
    { 0x85, 0xd0, "ACPresent" },
  /* pages 0xff00 to 0xffff are vendor-specific */
  { 0xffff, 0, "Vendor-specific-FF" },
  { 0, 0, NULL }
};

/* Either output directly into simple seq_file, or (if f == NULL)
 * allocate a separate buffer that will then be passed to the 'events'
 * ringbuffer.
 *
 * This is because these functions can be called both for "one-shot"
 * "rdesc" while resolving, or for blocking "events".
 *
 * This holds both for resolv_usage_page() and hid_resolv_usage().
 */
static char *resolv_usage_page(unsigned page, struct seq_file *f) {
        const struct hid_usage_entry *p;
        char *buf = NULL;

        if (!f) {
                buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC);
                if (!buf)
                        return ERR_PTR(-ENOMEM);
        }

        for (p = hid_usage_table; p->description; p++)
                if (p->page == page) {
                        if (!f) {
                                snprintf(buf, HID_DEBUG_BUFSIZE, "%s",
                                                p->description);
                                return buf;
                        }
                        else {
                                seq_printf(f, "%s", p->description);
                                return NULL;
                        }
                }
        if (!f)
                snprintf(buf, HID_DEBUG_BUFSIZE, "%04x", page);
        else
                seq_printf(f, "%04x", page);
        return buf;
}

char *hid_resolv_usage(unsigned usage, struct seq_file *f) {
        const struct hid_usage_entry *p;
        char *buf = NULL;
        int len = 0;

        buf = resolv_usage_page(usage >> 16, f);
        if (IS_ERR(buf)) {
                pr_err("error allocating HID debug buffer\n");
                return NULL;
        }


        if (!f) {
                len = strlen(buf);
                len += scnprintf(buf + len, HID_DEBUG_BUFSIZE - len, ".");
        }
        else {
                seq_printf(f, ".");
        }
        for (p = hid_usage_table; p->description; p++)
                if (p->page == (usage >> 16)) {
                        for(++p; p->description && p->usage != 0; p++)
                                if (p->usage == (usage & 0xffff)) {
                                        if (!f)
                                                snprintf(buf + len,
                                                        HID_DEBUG_BUFSIZE - len,
                                                        "%s", p->description);
                                        else
                                                seq_printf(f,
                                                        "%s",
                                                        p->description);
                                        return buf;
                                }
                        break;
                }
        if (!f)
                snprintf(buf + len, HID_DEBUG_BUFSIZE - len, "%04x",
                         usage & 0xffff);
        else
                seq_printf(f, "%04x", usage & 0xffff);
        return buf;
}
EXPORT_SYMBOL_GPL(hid_resolv_usage);

static void tab(int n, struct seq_file *f) {
        seq_printf(f, "%*s", n, "");
}

void hid_dump_field(struct hid_field *field, int n, struct seq_file *f) {
        int j;

        if (field->physical) {
                tab(n, f);
                seq_printf(f, "Physical(");
                hid_resolv_usage(field->physical, f); seq_printf(f, ")\n");
        }
        if (field->logical) {
                tab(n, f);
                seq_printf(f, "Logical(");
                hid_resolv_usage(field->logical, f); seq_printf(f, ")\n");
        }
        if (field->application) {
                tab(n, f);
                seq_printf(f, "Application(");
                hid_resolv_usage(field->application, f); seq_printf(f, ")\n");
        }
        tab(n, f); seq_printf(f, "Usage(%d)\n", field->maxusage);
        for (j = 0; j < field->maxusage; j++) {
                tab(n+2, f); hid_resolv_usage(field->usage[j].hid, f); seq_printf(f, "\n");
        }
        if (field->logical_minimum != field->logical_maximum) {
                tab(n, f); seq_printf(f, "Logical Minimum(%d)\n", field->logical_minimum);
                tab(n, f); seq_printf(f, "Logical Maximum(%d)\n", field->logical_maximum);
        }
        if (field->physical_minimum != field->physical_maximum) {
                tab(n, f); seq_printf(f, "Physical Minimum(%d)\n", field->physical_minimum);
                tab(n, f); seq_printf(f, "Physical Maximum(%d)\n", field->physical_maximum);
        }
        if (field->unit_exponent) {
                tab(n, f); seq_printf(f, "Unit Exponent(%d)\n", field->unit_exponent);
        }
        if (field->unit) {
                static const char *systems[5] = { "None", "SI Linear", "SI Rotation", "English Linear", "English Rotation" };
                static const char *units[5][8] = {
                        { "None", "None", "None", "None", "None", "None", "None", "None" },
                        { "None", "Centimeter", "Gram", "Seconds", "Kelvin",     "Ampere", "Candela", "None" },
                        { "None", "Radians",    "Gram", "Seconds", "Kelvin",     "Ampere", "Candela", "None" },
                        { "None", "Inch",       "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" },
                        { "None", "Degrees",    "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" }
                };

                int i;
                int sys;
                __u32 data = field->unit;

                /* First nibble tells us which system we're in. */
                sys = data & 0xf;
                data >>= 4;

                if(sys > 4) {
                        tab(n, f); seq_printf(f, "Unit(Invalid)\n");
                }
                else {
                        int earlier_unit = 0;

                        tab(n, f); seq_printf(f, "Unit(%s : ", systems[sys]);

                        for (i=1 ; i<sizeof(__u32)*2 ; i++) {
                                char nibble = data & 0xf;
                                data >>= 4;
                                if (nibble != 0) {
                                        if(earlier_unit++ > 0)
                                                seq_printf(f, "*");
                                        seq_printf(f, "%s", units[sys][i]);
                                        if(nibble != 1) {
                                                /* This is a _signed_ nibble(!) */

                                                int val = nibble & 0x7;
                                                if(nibble & 0x08)
                                                        val = -((0x7 & ~val) +1);
                                                seq_printf(f, "^%d", val);
                                        }
                                }
                        }
                        seq_printf(f, ")\n");
                }
        }
        tab(n, f); seq_printf(f, "Report Size(%u)\n", field->report_size);
        tab(n, f); seq_printf(f, "Report Count(%u)\n", field->report_count);
        tab(n, f); seq_printf(f, "Report Offset(%u)\n", field->report_offset);

        tab(n, f); seq_printf(f, "Flags( ");
        j = field->flags;
        seq_printf(f, "%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array ");
        seq_printf(f, "%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute ");
        seq_printf(f, "%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : "");
        seq_printf(f, "%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : "");
        seq_printf(f, ")\n");
}
EXPORT_SYMBOL_GPL(hid_dump_field);

void hid_dump_device(struct hid_device *device, struct seq_file *f)
{
        struct hid_report_enum *report_enum;
        struct hid_report *report;
        struct list_head *list;
        unsigned i,k;
        static const char *table[] = {"INPUT", "OUTPUT", "FEATURE"};

        for (i = 0; i < HID_REPORT_TYPES; i++) {
                report_enum = device->report_enum + i;
                list = report_enum->report_list.next;
                while (list != &report_enum->report_list) {
                        report = (struct hid_report *) list;
                        tab(2, f);
                        seq_printf(f, "%s", table[i]);
                        if (report->id)
                                seq_printf(f, "(%d)", report->id);
                        seq_printf(f, "[%s]", table[report->type]);
                        seq_printf(f, "\n");
                        for (k = 0; k < report->maxfield; k++) {
                                tab(4, f);
                                seq_printf(f, "Field(%d)\n", k);
                                hid_dump_field(report->field[k], 6, f);
                        }
                        list = list->next;
                }
        }
}
EXPORT_SYMBOL_GPL(hid_dump_device);

/* enqueue string to 'events' ring buffer */
void hid_debug_event(struct hid_device *hdev, char *buf)
{
        struct hid_debug_list *list;
        unsigned long flags;

        spin_lock_irqsave(&hdev->debug_list_lock, flags);
        list_for_each_entry(list, &hdev->debug_list, node)
                kfifo_in(&list->hid_debug_fifo, buf, strlen(buf));
        spin_unlock_irqrestore(&hdev->debug_list_lock, flags);

        wake_up_interruptible(&hdev->debug_wait);
}
EXPORT_SYMBOL_GPL(hid_debug_event);

void hid_dump_report(struct hid_device *hid, int type, u8 *data,
                int size)
{
        struct hid_report_enum *report_enum;
        char *buf;
        unsigned int i;

        buf = kmalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC);

        if (!buf)
                return;

        report_enum = hid->report_enum + type;

        /* dump the report */
        snprintf(buf, HID_DEBUG_BUFSIZE - 1,
                        "\nreport (size %u) (%snumbered) = ", size,
                        report_enum->numbered ? "" : "un");
        hid_debug_event(hid, buf);

        for (i = 0; i < size; i++) {
                snprintf(buf, HID_DEBUG_BUFSIZE - 1,
                                " %02x", data[i]);
                hid_debug_event(hid, buf);
        }
        hid_debug_event(hid, "\n");
        kfree(buf);
}
EXPORT_SYMBOL_GPL(hid_dump_report);

void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 value)
{
        char *buf;
        int len;

        buf = hid_resolv_usage(usage->hid, NULL);
        if (!buf)
                return;
        len = strlen(buf);
        snprintf(buf + len, HID_DEBUG_BUFSIZE - len - 1, " = %d\n", value);

        hid_debug_event(hdev, buf);

        kfree(buf);
        wake_up_interruptible(&hdev->debug_wait);
}
EXPORT_SYMBOL_GPL(hid_dump_input);

static const char *events[EV_MAX + 1] = {
        [EV_SYN] = "Sync",                        [EV_KEY] = "Key",
        [EV_REL] = "Relative",                        [EV_ABS] = "Absolute",
        [EV_MSC] = "Misc",                        [EV_LED] = "LED",
        [EV_SND] = "Sound",                        [EV_REP] = "Repeat",
        [EV_FF] = "ForceFeedback",                [EV_PWR] = "Power",
        [EV_FF_STATUS] = "ForceFeedbackStatus",
};

static const char *syncs[3] = {
        [SYN_REPORT] = "Report",                [SYN_CONFIG] = "Config",
        [SYN_MT_REPORT] = "MT Report",
};

static const char *keys[KEY_MAX + 1] = {
        [KEY_RESERVED] = "Reserved",                [KEY_ESC] = "Esc",
        [KEY_1] = "1",                                [KEY_2] = "2",
        [KEY_3] = "3",                                [KEY_4] = "4",
        [KEY_5] = "5",                                [KEY_6] = "6",
        [KEY_7] = "7",                                [KEY_8] = "8",
        [KEY_9] = "9",                                [KEY_0] = "0",
        [KEY_MINUS] = "Minus",                        [KEY_EQUAL] = "Equal",
        [KEY_BACKSPACE] = "Backspace",                [KEY_TAB] = "Tab",
        [KEY_Q] = "Q",                                [KEY_W] = "W",
        [KEY_E] = "E",                                [KEY_R] = "R",
        [KEY_T] = "T",                                [KEY_Y] = "Y",
        [KEY_U] = "U",                                [KEY_I] = "I",
        [KEY_O] = "O",                                [KEY_P] = "P",
        [KEY_LEFTBRACE] = "LeftBrace",                [KEY_RIGHTBRACE] = "RightBrace",
        [KEY_ENTER] = "Enter",                        [KEY_LEFTCTRL] = "LeftControl",
        [KEY_A] = "A",                                [KEY_S] = "S",
        [KEY_D] = "D",                                [KEY_F] = "F",
        [KEY_G] = "G",                                [KEY_H] = "H",
        [KEY_J] = "J",                                [KEY_K] = "K",
        [KEY_L] = "L",                                [KEY_SEMICOLON] = "Semicolon",
        [KEY_APOSTROPHE] = "Apostrophe",        [KEY_GRAVE] = "Grave",
        [KEY_LEFTSHIFT] = "LeftShift",                [KEY_BACKSLASH] = "BackSlash",
        [KEY_Z] = "Z",                                [KEY_X] = "X",
        [KEY_C] = "C",                                [KEY_V] = "V",
        [KEY_B] = "B",                                [KEY_N] = "N",
        [KEY_M] = "M",                                [KEY_COMMA] = "Comma",
        [KEY_DOT] = "Dot",                        [KEY_SLASH] = "Slash",
        [KEY_RIGHTSHIFT] = "RightShift",        [KEY_KPASTERISK] = "KPAsterisk",
        [KEY_LEFTALT] = "LeftAlt",                [KEY_SPACE] = "Space",
        [KEY_CAPSLOCK] = "CapsLock",                [KEY_F1] = "F1",
        [KEY_F2] = "F2",                        [KEY_F3] = "F3",
        [KEY_F4] = "F4",                        [KEY_F5] = "F5",
        [KEY_F6] = "F6",                        [KEY_F7] = "F7",
        [KEY_F8] = "F8",                        [KEY_F9] = "F9",
        [KEY_F10] = "F10",                        [KEY_NUMLOCK] = "NumLock",
        [KEY_SCROLLLOCK] = "ScrollLock",        [KEY_KP7] = "KP7",
        [KEY_KP8] = "KP8",                        [KEY_KP9] = "KP9",
        [KEY_KPMINUS] = "KPMinus",                [KEY_KP4] = "KP4",
        [KEY_KP5] = "KP5",                        [KEY_KP6] = "KP6",
        [KEY_KPPLUS] = "KPPlus",                [KEY_KP1] = "KP1",
        [KEY_KP2] = "KP2",                        [KEY_KP3] = "KP3",
        [KEY_KP0] = "KP0",                        [KEY_KPDOT] = "KPDot",
        [KEY_ZENKAKUHANKAKU] = "Zenkaku/Hankaku", [KEY_102ND] = "102nd",
        [KEY_F11] = "F11",                        [KEY_F12] = "F12",
        [KEY_RO] = "RO",                        [KEY_KATAKANA] = "Katakana",
        [KEY_HIRAGANA] = "HIRAGANA",                [KEY_HENKAN] = "Henkan",
        [KEY_KATAKANAHIRAGANA] = "Katakana/Hiragana", [KEY_MUHENKAN] = "Muhenkan",
        [KEY_KPJPCOMMA] = "KPJpComma",                [KEY_KPENTER] = "KPEnter",
        [KEY_RIGHTCTRL] = "RightCtrl",                [KEY_KPSLASH] = "KPSlash",
        [KEY_SYSRQ] = "SysRq",                        [KEY_RIGHTALT] = "RightAlt",
        [KEY_LINEFEED] = "LineFeed",                [KEY_HOME] = "Home",
        [KEY_UP] = "Up",                        [KEY_PAGEUP] = "PageUp",
        [KEY_LEFT] = "Left",                        [KEY_RIGHT] = "Right",
        [KEY_END] = "End",                        [KEY_DOWN] = "Down",
        [KEY_PAGEDOWN] = "PageDown",                [KEY_INSERT] = "Insert",
        [KEY_DELETE] = "Delete",                [KEY_MACRO] = "Macro",
        [KEY_MUTE] = "Mute",                        [KEY_VOLUMEDOWN] = "VolumeDown",
        [KEY_VOLUMEUP] = "VolumeUp",                [KEY_POWER] = "Power",
        [KEY_KPEQUAL] = "KPEqual",                [KEY_KPPLUSMINUS] = "KPPlusMinus",
        [KEY_PAUSE] = "Pause",                        [KEY_KPCOMMA] = "KPComma",
        [KEY_HANGUEL] = "Hangeul",                [KEY_HANJA] = "Hanja",
        [KEY_YEN] = "Yen",                        [KEY_LEFTMETA] = "LeftMeta",
        [KEY_RIGHTMETA] = "RightMeta",                [KEY_COMPOSE] = "Compose",
        [KEY_STOP] = "Stop",                        [KEY_AGAIN] = "Again",
        [KEY_PROPS] = "Props",                        [KEY_UNDO] = "Undo",
        [KEY_FRONT] = "Front",                        [KEY_COPY] = "Copy",
        [KEY_OPEN] = "Open",                        [KEY_PASTE] = "Paste",
        [KEY_FIND] = "Find",                        [KEY_CUT] = "Cut",
        [KEY_HELP] = "Help",                        [KEY_MENU] = "Menu",
        [KEY_CALC] = "Calc",                        [KEY_SETUP] = "Setup",
        [KEY_SLEEP] = "Sleep",                        [KEY_WAKEUP] = "WakeUp",
        [KEY_FILE] = "File",                        [KEY_SENDFILE] = "SendFile",
        [KEY_DELETEFILE] = "DeleteFile",        [KEY_XFER] = "X-fer",
        [KEY_PROG1] = "Prog1",                        [KEY_PROG2] = "Prog2",
        [KEY_WWW] = "WWW",                        [KEY_MSDOS] = "MSDOS",
        [KEY_COFFEE] = "Coffee",                [KEY_ROTATE_DISPLAY] = "RotateDisplay",
        [KEY_CYCLEWINDOWS] = "CycleWindows",        [KEY_MAIL] = "Mail",
        [KEY_BOOKMARKS] = "Bookmarks",                [KEY_COMPUTER] = "Computer",
        [KEY_BACK] = "Back",                        [KEY_FORWARD] = "Forward",
        [KEY_CLOSECD] = "CloseCD",                [KEY_EJECTCD] = "EjectCD",
        [KEY_EJECTCLOSECD] = "EjectCloseCD",        [KEY_NEXTSONG] = "NextSong",
        [KEY_PLAYPAUSE] = "PlayPause",                [KEY_PREVIOUSSONG] = "PreviousSong",
        [KEY_STOPCD] = "StopCD",                [KEY_RECORD] = "Record",
        [KEY_REWIND] = "Rewind",                [KEY_PHONE] = "Phone",
        [KEY_ISO] = "ISOKey",                        [KEY_CONFIG] = "Config",
        [KEY_HOMEPAGE] = "HomePage",                [KEY_REFRESH] = "Refresh",
        [KEY_EXIT] = "Exit",                        [KEY_MOVE] = "Move",
        [KEY_EDIT] = "Edit",                        [KEY_SCROLLUP] = "ScrollUp",
        [KEY_SCROLLDOWN] = "ScrollDown",        [KEY_KPLEFTPAREN] = "KPLeftParenthesis",
        [KEY_KPRIGHTPAREN] = "KPRightParenthesis", [KEY_NEW] = "New",
        [KEY_REDO] = "Redo",                        [KEY_F13] = "F13",
        [KEY_F14] = "F14",                        [KEY_F15] = "F15",
        [KEY_F16] = "F16",                        [KEY_F17] = "F17",
        [KEY_F18] = "F18",                        [KEY_F19] = "F19",
        [KEY_F20] = "F20",                        [KEY_F21] = "F21",
        [KEY_F22] = "F22",                        [KEY_F23] = "F23",
        [KEY_F24] = "F24",                        [KEY_PLAYCD] = "PlayCD",
        [KEY_PAUSECD] = "PauseCD",                [KEY_PROG3] = "Prog3",
        [KEY_PROG4] = "Prog4",
        [KEY_ALL_APPLICATIONS] = "AllApplications",
        [KEY_SUSPEND] = "Suspend",
        [KEY_CLOSE] = "Close",                        [KEY_PLAY] = "Play",
        [KEY_FASTFORWARD] = "FastForward",        [KEY_BASSBOOST] = "BassBoost",
        [KEY_PRINT] = "Print",                        [KEY_HP] = "HP",
        [KEY_CAMERA] = "Camera",                [KEY_SOUND] = "Sound",
        [KEY_QUESTION] = "Question",                [KEY_EMAIL] = "Email",
        [KEY_CHAT] = "Chat",                        [KEY_SEARCH] = "Search",
        [KEY_CONNECT] = "Connect",                [KEY_FINANCE] = "Finance",
        [KEY_SPORT] = "Sport",                        [KEY_SHOP] = "Shop",
        [KEY_ALTERASE] = "AlternateErase",        [KEY_CANCEL] = "Cancel",
        [KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp",
        [KEY_MEDIA] = "Media",                        [KEY_UNKNOWN] = "Unknown",
        [BTN_DPAD_UP] = "BtnDPadUp",                [BTN_DPAD_DOWN] = "BtnDPadDown",
        [BTN_DPAD_LEFT] = "BtnDPadLeft",        [BTN_DPAD_RIGHT] = "BtnDPadRight",
        [BTN_0] = "Btn0",                        [BTN_1] = "Btn1",
        [BTN_2] = "Btn2",                        [BTN_3] = "Btn3",
        [BTN_4] = "Btn4",                        [BTN_5] = "Btn5",
        [BTN_6] = "Btn6",                        [BTN_7] = "Btn7",
        [BTN_8] = "Btn8",                        [BTN_9] = "Btn9",
        [BTN_LEFT] = "LeftBtn",                        [BTN_RIGHT] = "RightBtn",
        [BTN_MIDDLE] = "MiddleBtn",                [BTN_SIDE] = "SideBtn",
        [BTN_EXTRA] = "ExtraBtn",                [BTN_FORWARD] = "ForwardBtn",
        [BTN_BACK] = "BackBtn",                        [BTN_TASK] = "TaskBtn",
        [BTN_TRIGGER] = "Trigger",                [BTN_THUMB] = "ThumbBtn",
        [BTN_THUMB2] = "ThumbBtn2",                [BTN_TOP] = "TopBtn",
        [BTN_TOP2] = "TopBtn2",                        [BTN_PINKIE] = "PinkieBtn",
        [BTN_BASE] = "BaseBtn",                        [BTN_BASE2] = "BaseBtn2",
        [BTN_BASE3] = "BaseBtn3",                [BTN_BASE4] = "BaseBtn4",
        [BTN_BASE5] = "BaseBtn5",                [BTN_BASE6] = "BaseBtn6",
        [BTN_DEAD] = "BtnDead",                        [BTN_A] = "BtnA",
        [BTN_B] = "BtnB",                        [BTN_C] = "BtnC",
        [BTN_X] = "BtnX",                        [BTN_Y] = "BtnY",
        [BTN_Z] = "BtnZ",                        [BTN_TL] = "BtnTL",
        [BTN_TR] = "BtnTR",                        [BTN_TL2] = "BtnTL2",
        [BTN_TR2] = "BtnTR2",                        [BTN_SELECT] = "BtnSelect",
        [BTN_START] = "BtnStart",                [BTN_MODE] = "BtnMode",
        [BTN_THUMBL] = "BtnThumbL",                [BTN_THUMBR] = "BtnThumbR",
        [BTN_TOOL_PEN] = "ToolPen",                [BTN_TOOL_RUBBER] = "ToolRubber",
        [BTN_TOOL_BRUSH] = "ToolBrush",                [BTN_TOOL_PENCIL] = "ToolPencil",
        [BTN_TOOL_AIRBRUSH] = "ToolAirbrush",        [BTN_TOOL_FINGER] = "ToolFinger",
        [BTN_TOOL_MOUSE] = "ToolMouse",                [BTN_TOOL_LENS] = "ToolLens",
        [BTN_TOUCH] = "Touch",                        [BTN_STYLUS] = "Stylus",
        [BTN_STYLUS2] = "Stylus2",                [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap",
        [BTN_TOOL_TRIPLETAP] = "ToolTripleTap",        [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap",
        [BTN_GEAR_DOWN] = "WheelBtn",
        [BTN_GEAR_UP] = "Gear up",                [KEY_OK] = "Ok",
        [KEY_SELECT] = "Select",                [KEY_GOTO] = "Goto",
        [KEY_CLEAR] = "Clear",                        [KEY_POWER2] = "Power2",
        [KEY_OPTION] = "Option",                [KEY_INFO] = "Info",
        [KEY_TIME] = "Time",                        [KEY_VENDOR] = "Vendor",
        [KEY_ARCHIVE] = "Archive",                [KEY_PROGRAM] = "Program",
        [KEY_CHANNEL] = "Channel",                [KEY_FAVORITES] = "Favorites",
        [KEY_EPG] = "EPG",                        [KEY_PVR] = "PVR",
        [KEY_MHP] = "MHP",                        [KEY_LANGUAGE] = "Language",
        [KEY_TITLE] = "Title",                        [KEY_SUBTITLE] = "Subtitle",
        [KEY_ANGLE] = "Angle",                        [KEY_ZOOM] = "Zoom",
        [KEY_MODE] = "Mode",                        [KEY_KEYBOARD] = "Keyboard",
        [KEY_SCREEN] = "Screen",                [KEY_PC] = "PC",
        [KEY_TV] = "TV",                        [KEY_TV2] = "TV2",
        [KEY_VCR] = "VCR",                        [KEY_VCR2] = "VCR2",
        [KEY_SAT] = "Sat",                        [KEY_SAT2] = "Sat2",
        [KEY_CD] = "CD",                        [KEY_TAPE] = "Tape",
        [KEY_RADIO] = "Radio",                        [KEY_TUNER] = "Tuner",
        [KEY_PLAYER] = "Player",                [KEY_TEXT] = "Text",
        [KEY_DVD] = "DVD",                        [KEY_AUX] = "Aux",
        [KEY_MP3] = "MP3",                        [KEY_AUDIO] = "Audio",
        [KEY_VIDEO] = "Video",                        [KEY_DIRECTORY] = "Directory",
        [KEY_LIST] = "List",                        [KEY_MEMO] = "Memo",
        [KEY_CALENDAR] = "Calendar",                [KEY_RED] = "Red",
        [KEY_GREEN] = "Green",                        [KEY_YELLOW] = "Yellow",
        [KEY_BLUE] = "Blue",                        [KEY_CHANNELUP] = "ChannelUp",
        [KEY_CHANNELDOWN] = "ChannelDown",        [KEY_FIRST] = "First",
        [KEY_LAST] = "Last",                        [KEY_AB] = "AB",
        [KEY_NEXT] = "Next",                        [KEY_RESTART] = "Restart",
        [KEY_SLOW] = "Slow",                        [KEY_SHUFFLE] = "Shuffle",
        [KEY_BREAK] = "Break",                        [KEY_PREVIOUS] = "Previous",
        [KEY_DIGITS] = "Digits",                [KEY_TEEN] = "TEEN",
        [KEY_TWEN] = "TWEN",                        [KEY_DEL_EOL] = "DeleteEOL",
        [KEY_DEL_EOS] = "DeleteEOS",                [KEY_INS_LINE] = "InsertLine",
        [KEY_DEL_LINE] = "DeleteLine",
        [KEY_SEND] = "Send",                        [KEY_REPLY] = "Reply",
        [KEY_FORWARDMAIL] = "ForwardMail",        [KEY_SAVE] = "Save",
        [KEY_DOCUMENTS] = "Documents",                [KEY_SPELLCHECK] = "SpellCheck",
        [KEY_LOGOFF] = "Logoff",
        [KEY_FN] = "Fn",                        [KEY_FN_ESC] = "Fn+ESC",
        [KEY_FN_1] = "Fn+1",                        [KEY_FN_2] = "Fn+2",
        [KEY_FN_B] = "Fn+B",                        [KEY_FN_D] = "Fn+D",
        [KEY_FN_E] = "Fn+E",                        [KEY_FN_F] = "Fn+F",
        [KEY_FN_S] = "Fn+S",
        [KEY_FN_F1] = "Fn+F1",                        [KEY_FN_F2] = "Fn+F2",
        [KEY_FN_F3] = "Fn+F3",                        [KEY_FN_F4] = "Fn+F4",
        [KEY_FN_F5] = "Fn+F5",                        [KEY_FN_F6] = "Fn+F6",
        [KEY_FN_F7] = "Fn+F7",                        [KEY_FN_F8] = "Fn+F8",
        [KEY_FN_F9] = "Fn+F9",                        [KEY_FN_F10] = "Fn+F10",
        [KEY_FN_F11] = "Fn+F11",                [KEY_FN_F12] = "Fn+F12",
        [KEY_KBDILLUMTOGGLE] = "KbdIlluminationToggle",
        [KEY_KBDILLUMDOWN] = "KbdIlluminationDown",
        [KEY_KBDILLUMUP] = "KbdIlluminationUp",
        [KEY_SWITCHVIDEOMODE] = "SwitchVideoMode",
        [KEY_BUTTONCONFIG] = "ButtonConfig",
        [KEY_TASKMANAGER] = "TaskManager",
        [KEY_JOURNAL] = "Journal",
        [KEY_CONTROLPANEL] = "ControlPanel",
        [KEY_APPSELECT] = "AppSelect",
        [KEY_SCREENSAVER] = "ScreenSaver",
        [KEY_VOICECOMMAND] = "VoiceCommand",
        [KEY_ASSISTANT] = "Assistant",
        [KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext",
        [KEY_EMOJI_PICKER] = "EmojiPicker",
        [KEY_CAMERA_ACCESS_ENABLE] = "CameraAccessEnable",
        [KEY_CAMERA_ACCESS_DISABLE] = "CameraAccessDisable",
        [KEY_CAMERA_ACCESS_TOGGLE] = "CameraAccessToggle",
        [KEY_DICTATE] = "Dictate",
        [KEY_MICMUTE] = "MicrophoneMute",
        [KEY_BRIGHTNESS_MIN] = "BrightnessMin",
        [KEY_BRIGHTNESS_MAX] = "BrightnessMax",
        [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
        [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev",
        [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext",
        [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup",
        [KEY_KBDINPUTASSIST_NEXTGROUP] = "KbdInputAssistNextGroup",
        [KEY_KBDINPUTASSIST_ACCEPT] = "KbdInputAssistAccept",
        [KEY_KBDINPUTASSIST_CANCEL] = "KbdInputAssistCancel",
        [KEY_MACRO1] = "Macro1", [KEY_MACRO2] = "Macro2", [KEY_MACRO3] = "Macro3",
        [KEY_MACRO4] = "Macro4", [KEY_MACRO5] = "Macro5", [KEY_MACRO6] = "Macro6",
        [KEY_MACRO7] = "Macro7", [KEY_MACRO8] = "Macro8", [KEY_MACRO9] = "Macro9",
        [KEY_MACRO10] = "Macro10", [KEY_MACRO11] = "Macro11", [KEY_MACRO12] = "Macro12",
        [KEY_MACRO13] = "Macro13", [KEY_MACRO14] = "Macro14", [KEY_MACRO15] = "Macro15",
        [KEY_MACRO16] = "Macro16", [KEY_MACRO17] = "Macro17", [KEY_MACRO18] = "Macro18",
        [KEY_MACRO19] = "Macro19", [KEY_MACRO20] = "Macro20", [KEY_MACRO21] = "Macro21",
        [KEY_MACRO22] = "Macro22", [KEY_MACRO23] = "Macro23", [KEY_MACRO24] = "Macro24",
        [KEY_MACRO25] = "Macro25", [KEY_MACRO26] = "Macro26", [KEY_MACRO27] = "Macro27",
        [KEY_MACRO28] = "Macro28", [KEY_MACRO29] = "Macro29", [KEY_MACRO30] = "Macro30",
};

static const char *relatives[REL_MAX + 1] = {
        [REL_X] = "X",                        [REL_Y] = "Y",
        [REL_Z] = "Z",                        [REL_RX] = "Rx",
        [REL_RY] = "Ry",                [REL_RZ] = "Rz",
        [REL_HWHEEL] = "HWheel",        [REL_DIAL] = "Dial",
        [REL_WHEEL] = "Wheel",                [REL_MISC] = "Misc",
};

static const char *absolutes[ABS_CNT] = {
        [ABS_X] = "X",                        [ABS_Y] = "Y",
        [ABS_Z] = "Z",                        [ABS_RX] = "Rx",
        [ABS_RY] = "Ry",                [ABS_RZ] = "Rz",
        [ABS_THROTTLE] = "Throttle",        [ABS_RUDDER] = "Rudder",
        [ABS_WHEEL] = "Wheel",                [ABS_GAS] = "Gas",
        [ABS_BRAKE] = "Brake",                [ABS_HAT0X] = "Hat0X",
        [ABS_HAT0Y] = "Hat0Y",                [ABS_HAT1X] = "Hat1X",
        [ABS_HAT1Y] = "Hat1Y",                [ABS_HAT2X] = "Hat2X",
        [ABS_HAT2Y] = "Hat2Y",                [ABS_HAT3X] = "Hat3X",
        [ABS_HAT3Y] = "Hat 3Y",                [ABS_PRESSURE] = "Pressure",
        [ABS_DISTANCE] = "Distance",        [ABS_TILT_X] = "XTilt",
        [ABS_TILT_Y] = "YTilt",                [ABS_TOOL_WIDTH] = "ToolWidth",
        [ABS_VOLUME] = "Volume",        [ABS_PROFILE] = "Profile",
        [ABS_MISC] = "Misc",
        [ABS_MT_TOUCH_MAJOR] = "MTMajor",
        [ABS_MT_TOUCH_MINOR] = "MTMinor",
        [ABS_MT_WIDTH_MAJOR] = "MTMajorW",
        [ABS_MT_WIDTH_MINOR] = "MTMinorW",
        [ABS_MT_ORIENTATION] = "MTOrientation",
        [ABS_MT_POSITION_X] = "MTPositionX",
        [ABS_MT_POSITION_Y] = "MTPositionY",
        [ABS_MT_TOOL_TYPE] = "MTToolType",
        [ABS_MT_BLOB_ID] = "MTBlobID",
};

static const char *misc[MSC_MAX + 1] = {
        [MSC_SERIAL] = "Serial",        [MSC_PULSELED] = "Pulseled",
        [MSC_GESTURE] = "Gesture",        [MSC_RAW] = "RawData"
};

static const char *leds[LED_MAX + 1] = {
        [LED_NUML] = "NumLock",                [LED_CAPSL] = "CapsLock",
        [LED_SCROLLL] = "ScrollLock",        [LED_COMPOSE] = "Compose",
        [LED_KANA] = "Kana",                [LED_SLEEP] = "Sleep",
        [LED_SUSPEND] = "Suspend",        [LED_MUTE] = "Mute",
        [LED_MISC] = "Misc",
};

static const char *repeats[REP_MAX + 1] = {
        [REP_DELAY] = "Delay",                [REP_PERIOD] = "Period"
};

static const char *sounds[SND_MAX + 1] = {
        [SND_CLICK] = "Click",                [SND_BELL] = "Bell",
        [SND_TONE] = "Tone"
};

static const char **names[EV_MAX + 1] = {
        [EV_SYN] = syncs,                        [EV_KEY] = keys,
        [EV_REL] = relatives,                        [EV_ABS] = absolutes,
        [EV_MSC] = misc,                        [EV_LED] = leds,
        [EV_SND] = sounds,                        [EV_REP] = repeats,
};

static void hid_resolv_event(__u8 type, __u16 code, struct seq_file *f)
{
        seq_printf(f, "%s.%s", events[type] ? events[type] : "?",
                names[type] ? (names[type][code] ? names[type][code] : "?") : "?");
}

static void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f)
{
        int i, j, k;
        struct hid_report *report;
        struct hid_usage *usage;

        for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) {
                list_for_each_entry(report, &hid->report_enum[k].report_list, list) {
                        for (i = 0; i < report->maxfield; i++) {
                                for ( j = 0; j < report->field[i]->maxusage; j++) {
                                        usage = report->field[i]->usage + j;
                                        hid_resolv_usage(usage->hid, f);
                                        seq_printf(f, " ---> ");
                                        hid_resolv_event(usage->type, usage->code, f);
                                        seq_printf(f, "\n");
                                }
                        }
                }
        }

}

static int hid_debug_rdesc_show(struct seq_file *f, void *p)
{
        struct hid_device *hdev = f->private;
        const __u8 *rdesc = hdev->rdesc;
        unsigned rsize = hdev->rsize;
        int i;

        if (!rdesc) {
                rdesc = hdev->dev_rdesc;
                rsize = hdev->dev_rsize;
        }

        /* dump HID report descriptor */
        for (i = 0; i < rsize; i++)
                seq_printf(f, "%02x ", rdesc[i]);
        seq_printf(f, "\n\n");

        /* dump parsed data and input mappings */
        if (down_interruptible(&hdev->driver_input_lock))
                return 0;

        hid_dump_device(hdev, f);
        seq_printf(f, "\n");
        hid_dump_input_mapping(hdev, f);

        up(&hdev->driver_input_lock);

        return 0;
}

static int hid_debug_events_open(struct inode *inode, struct file *file)
{
        int err = 0;
        struct hid_debug_list *list;
        unsigned long flags;

        if (!(list = kzalloc(sizeof(struct hid_debug_list), GFP_KERNEL))) {
                err = -ENOMEM;
                goto out;
        }

        err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL);
        if (err) {
                kfree(list);
                goto out;
        }
        list->hdev = (struct hid_device *) inode->i_private;
        kref_get(&list->hdev->ref);
        file->private_data = list;
        mutex_init(&list->read_mutex);

        spin_lock_irqsave(&list->hdev->debug_list_lock, flags);
        list_add_tail(&list->node, &list->hdev->debug_list);
        spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);

out:
        return err;
}

static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
                size_t count, loff_t *ppos)
{
        struct hid_debug_list *list = file->private_data;
        int ret = 0, copied;
        DECLARE_WAITQUEUE(wait, current);

        mutex_lock(&list->read_mutex);
        if (kfifo_is_empty(&list->hid_debug_fifo)) {
                add_wait_queue(&list->hdev->debug_wait, &wait);
                set_current_state(TASK_INTERRUPTIBLE);

                while (kfifo_is_empty(&list->hid_debug_fifo)) {
                        if (signal_pending(current)) {
                                ret = -ERESTARTSYS;
                                break;
                        }

                        /* if list->hdev is NULL we cannot remove_wait_queue().
                         * if list->hdev->debug is 0 then hid_debug_unregister()
                         * was already called and list->hdev is being destroyed.
                         * if we add remove_wait_queue() here we can hit a race.
                         */
                        if (!list->hdev || !list->hdev->debug) {
                                ret = -EIO;
                                set_current_state(TASK_RUNNING);
                                goto out;
                        }

                        if (file->f_flags & O_NONBLOCK) {
                                ret = -EAGAIN;
                                break;
                        }

                        /* allow O_NONBLOCK from other threads */
                        mutex_unlock(&list->read_mutex);
                        schedule();
                        mutex_lock(&list->read_mutex);
                        set_current_state(TASK_INTERRUPTIBLE);
                }

                __set_current_state(TASK_RUNNING);
                remove_wait_queue(&list->hdev->debug_wait, &wait);

                if (ret)
                        goto out;
        }

        /* pass the fifo content to userspace, locking is not needed with only
         * one concurrent reader and one concurrent writer
         */
        ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied);
        if (ret)
                goto out;
        ret = copied;
out:
        mutex_unlock(&list->read_mutex);
        return ret;
}

static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait)
{
        struct hid_debug_list *list = file->private_data;

        poll_wait(file, &list->hdev->debug_wait, wait);
        if (!kfifo_is_empty(&list->hid_debug_fifo))
                return EPOLLIN | EPOLLRDNORM;
        if (!list->hdev->debug)
                return EPOLLERR | EPOLLHUP;
        return 0;
}

static int hid_debug_events_release(struct inode *inode, struct file *file)
{
        struct hid_debug_list *list = file->private_data;
        unsigned long flags;

        spin_lock_irqsave(&list->hdev->debug_list_lock, flags);
        list_del(&list->node);
        spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);
        kfifo_free(&list->hid_debug_fifo);

        kref_put(&list->hdev->ref, hiddev_free);
        kfree(list);

        return 0;
}

DEFINE_SHOW_ATTRIBUTE(hid_debug_rdesc);

static const struct file_operations hid_debug_events_fops = {
        .owner =        THIS_MODULE,
        .open           = hid_debug_events_open,
        .read           = hid_debug_events_read,
        .poll                = hid_debug_events_poll,
        .release        = hid_debug_events_release,
        .llseek                = noop_llseek,
};


void hid_debug_register(struct hid_device *hdev, const char *name)
{
        hdev->debug_dir = debugfs_create_dir(name, hid_debug_root);
        hdev->debug_rdesc = debugfs_create_file("rdesc", 0400,
                        hdev->debug_dir, hdev, &hid_debug_rdesc_fops);
        hdev->debug_events = debugfs_create_file("events", 0400,
                        hdev->debug_dir, hdev, &hid_debug_events_fops);
        hdev->debug = 1;
}

void hid_debug_unregister(struct hid_device *hdev)
{
        hdev->debug = 0;
        wake_up_interruptible(&hdev->debug_wait);
        debugfs_remove(hdev->debug_rdesc);
        debugfs_remove(hdev->debug_events);
        debugfs_remove(hdev->debug_dir);
}

void hid_debug_init(void)
{
        hid_debug_root = debugfs_create_dir("hid", NULL);
}

void hid_debug_exit(void)
{
        debugfs_remove_recursive(hid_debug_root);
}



























































































































































































































































































































































































































































































































































































































































































































































































































































    1 



    1 
    1 










    1 







    1 
    1 








    1 














    1 




    1 










    1 













































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
// SPDX-License-Identifier: GPL-2.0
/*
 * Fintek F81232 USB to serial adaptor driver
 * Fintek F81532A/534A/535/536 USB to 2/4/8/12 serial adaptor driver
 *
 * Copyright (C) 2012 Greg Kroah-Hartman (gregkh@linuxfoundation.org)
 * Copyright (C) 2012 Linux Foundation
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/serial.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include <linux/serial_reg.h>

#define F81232_ID                \
        { USB_DEVICE(0x1934, 0x0706) }        /* 1 port UART device */

#define F81534A_SERIES_ID        \
        { USB_DEVICE(0x2c42, 0x1602) },        /* In-Box 2 port UART device */        \
        { USB_DEVICE(0x2c42, 0x1604) },        /* In-Box 4 port UART device */        \
        { USB_DEVICE(0x2c42, 0x1605) },        /* In-Box 8 port UART device */        \
        { USB_DEVICE(0x2c42, 0x1606) },        /* In-Box 12 port UART device */ \
        { USB_DEVICE(0x2c42, 0x1608) },        /* Non-Flash type */ \
        { USB_DEVICE(0x2c42, 0x1632) },        /* 2 port UART device */ \
        { USB_DEVICE(0x2c42, 0x1634) },        /* 4 port UART device */ \
        { USB_DEVICE(0x2c42, 0x1635) },        /* 8 port UART device */ \
        { USB_DEVICE(0x2c42, 0x1636) }        /* 12 port UART device */

#define F81534A_CTRL_ID                \
        { USB_DEVICE(0x2c42, 0x16f8) }        /* Global control device */

static const struct usb_device_id f81232_id_table[] = {
        F81232_ID,
        { }                                        /* Terminating entry */
};

static const struct usb_device_id f81534a_id_table[] = {
        F81534A_SERIES_ID,
        { }                                        /* Terminating entry */
};

static const struct usb_device_id f81534a_ctrl_id_table[] = {
        F81534A_CTRL_ID,
        { }                                        /* Terminating entry */
};

static const struct usb_device_id combined_id_table[] = {
        F81232_ID,
        F81534A_SERIES_ID,
        F81534A_CTRL_ID,
        { }                                        /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, combined_id_table);

/* Maximum baudrate for F81232 */
#define F81232_MAX_BAUDRATE                1500000
#define F81232_DEF_BAUDRATE                9600

/* USB Control EP parameter */
#define F81232_REGISTER_REQUEST                0xa0
#define F81232_GET_REGISTER                0xc0
#define F81232_SET_REGISTER                0x40
#define F81534A_ACCESS_REG_RETRY        2

#define SERIAL_BASE_ADDRESS                0x0120
#define RECEIVE_BUFFER_REGISTER                (0x00 + SERIAL_BASE_ADDRESS)
#define INTERRUPT_ENABLE_REGISTER        (0x01 + SERIAL_BASE_ADDRESS)
#define FIFO_CONTROL_REGISTER                (0x02 + SERIAL_BASE_ADDRESS)
#define LINE_CONTROL_REGISTER                (0x03 + SERIAL_BASE_ADDRESS)
#define MODEM_CONTROL_REGISTER                (0x04 + SERIAL_BASE_ADDRESS)
#define LINE_STATUS_REGISTER                (0x05 + SERIAL_BASE_ADDRESS)
#define MODEM_STATUS_REGISTER                (0x06 + SERIAL_BASE_ADDRESS)

/*
 * F81232 Clock registers (106h)
 *
 * Bit1-0:        Clock source selector
 *                        00: 1.846MHz.
 *                        01: 18.46MHz.
 *                        10: 24MHz.
 *                        11: 14.77MHz.
 */
#define F81232_CLK_REGISTER                0x106
#define F81232_CLK_1_846_MHZ                0
#define F81232_CLK_18_46_MHZ                BIT(0)
#define F81232_CLK_24_MHZ                BIT(1)
#define F81232_CLK_14_77_MHZ                (BIT(1) | BIT(0))
#define F81232_CLK_MASK                        GENMASK(1, 0)

#define F81534A_MODE_REG                0x107
#define F81534A_TRIGGER_MASK                GENMASK(3, 2)
#define F81534A_TRIGGER_MULTIPLE_4X        BIT(3)
#define F81534A_FIFO_128BYTE                (BIT(1) | BIT(0))

/* Serial port self GPIO control, 2bytes [control&output data][input data] */
#define F81534A_GPIO_REG                0x10e
#define F81534A_GPIO_MODE2_DIR                BIT(6) /* 1: input, 0: output */
#define F81534A_GPIO_MODE1_DIR                BIT(5)
#define F81534A_GPIO_MODE0_DIR                BIT(4)
#define F81534A_GPIO_MODE2_OUTPUT        BIT(2)
#define F81534A_GPIO_MODE1_OUTPUT        BIT(1)
#define F81534A_GPIO_MODE0_OUTPUT        BIT(0)

#define F81534A_CTRL_CMD_ENABLE_PORT        0x116

struct f81232_private {
        struct mutex lock;
        u8 modem_control;
        u8 modem_status;
        u8 shadow_lcr;
        speed_t baud_base;
        struct work_struct lsr_work;
        struct work_struct interrupt_work;
        struct usb_serial_port *port;
};

static u32 const baudrate_table[] = { 115200, 921600, 1152000, 1500000 };
static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ,
                                F81232_CLK_18_46_MHZ, F81232_CLK_24_MHZ };

static int calc_baud_divisor(speed_t baudrate, speed_t clockrate)
{
        return DIV_ROUND_CLOSEST(clockrate, baudrate);
}

static int f81232_get_register(struct usb_serial_port *port, u16 reg, u8 *val)
{
        int status;
        struct usb_device *dev = port->serial->dev;

        status = usb_control_msg_recv(dev,
                                      0,
                                      F81232_REGISTER_REQUEST,
                                      F81232_GET_REGISTER,
                                      reg,
                                      0,
                                      val,
                                      sizeof(*val),
                                      USB_CTRL_GET_TIMEOUT,
                                      GFP_KERNEL);
        if (status) {
                dev_err(&port->dev, "%s failed status: %d\n", __func__, status);
                status = usb_translate_errors(status);
        }

        return status;
}

static int f81232_set_register(struct usb_serial_port *port, u16 reg, u8 val)
{
        int status;
        struct usb_device *dev = port->serial->dev;

        status = usb_control_msg_send(dev,
                                      0,
                                      F81232_REGISTER_REQUEST,
                                      F81232_SET_REGISTER,
                                      reg,
                                      0,
                                      &val,
                                      sizeof(val),
                                      USB_CTRL_SET_TIMEOUT,
                                      GFP_KERNEL);
        if (status) {
                dev_err(&port->dev, "%s failed status: %d\n", __func__, status);
                status = usb_translate_errors(status);
        }

        return status;
}

static int f81232_set_mask_register(struct usb_serial_port *port, u16 reg,
                                        u8 mask, u8 val)
{
        int status;
        u8 tmp;

        status = f81232_get_register(port, reg, &tmp);
        if (status)
                return status;

        tmp = (tmp & ~mask) | (val & mask);

        return f81232_set_register(port, reg, tmp);
}

static void f81232_read_msr(struct usb_serial_port *port)
{
        int status;
        u8 current_msr;
        struct tty_struct *tty;
        struct f81232_private *priv = usb_get_serial_port_data(port);

        mutex_lock(&priv->lock);
        status = f81232_get_register(port, MODEM_STATUS_REGISTER,
                        &current_msr);
        if (status) {
                dev_err(&port->dev, "%s fail, status: %d\n", __func__, status);
                mutex_unlock(&priv->lock);
                return;
        }

        if (!(current_msr & UART_MSR_ANY_DELTA)) {
                mutex_unlock(&priv->lock);
                return;
        }

        priv->modem_status = current_msr;

        if (current_msr & UART_MSR_DCTS)
                port->icount.cts++;
        if (current_msr & UART_MSR_DDSR)
                port->icount.dsr++;
        if (current_msr & UART_MSR_TERI)
                port->icount.rng++;
        if (current_msr & UART_MSR_DDCD) {
                port->icount.dcd++;
                tty = tty_port_tty_get(&port->port);
                if (tty) {
                        usb_serial_handle_dcd_change(port, tty,
                                        current_msr & UART_MSR_DCD);

                        tty_kref_put(tty);
                }
        }

        wake_up_interruptible(&port->port.delta_msr_wait);
        mutex_unlock(&priv->lock);
}

static int f81232_set_mctrl(struct usb_serial_port *port,
                                           unsigned int set, unsigned int clear)
{
        u8 val;
        int status;
        struct f81232_private *priv = usb_get_serial_port_data(port);

        if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0)
                return 0;        /* no change */

        /* 'set' takes precedence over 'clear' */
        clear &= ~set;

        /* force enable interrupt with OUT2 */
        mutex_lock(&priv->lock);
        val = UART_MCR_OUT2 | priv->modem_control;

        if (clear & TIOCM_DTR)
                val &= ~UART_MCR_DTR;

        if (clear & TIOCM_RTS)
                val &= ~UART_MCR_RTS;

        if (set & TIOCM_DTR)
                val |= UART_MCR_DTR;

        if (set & TIOCM_RTS)
                val |= UART_MCR_RTS;

        dev_dbg(&port->dev, "%s new:%02x old:%02x\n", __func__,
                        val, priv->modem_control);

        status = f81232_set_register(port, MODEM_CONTROL_REGISTER, val);
        if (status) {
                dev_err(&port->dev, "%s set MCR status < 0\n", __func__);
                mutex_unlock(&priv->lock);
                return status;
        }

        priv->modem_control = val;
        mutex_unlock(&priv->lock);

        return 0;
}

static void f81232_update_line_status(struct usb_serial_port *port,
                                      unsigned char *data,
                                      size_t actual_length)
{
        struct f81232_private *priv = usb_get_serial_port_data(port);

        if (!actual_length)
                return;

        switch (data[0] & 0x07) {
        case 0x00: /* msr change */
                dev_dbg(&port->dev, "IIR: MSR Change: %02x\n", data[0]);
                schedule_work(&priv->interrupt_work);
                break;
        case 0x02: /* tx-empty */
                break;
        case 0x04: /* rx data available */
                break;
        case 0x06: /* lsr change */
                /* we can forget it. the LSR will read from bulk-in */
                dev_dbg(&port->dev, "IIR: LSR Change: %02x\n", data[0]);
                break;
        }
}

static void f81232_read_int_callback(struct urb *urb)
{
        struct usb_serial_port *port =  urb->context;
        unsigned char *data = urb->transfer_buffer;
        unsigned int actual_length = urb->actual_length;
        int status = urb->status;
        int retval;

        switch (status) {
        case 0:
                /* success */
                break;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* this urb is terminated, clean up */
                dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n",
                        __func__, status);
                return;
        default:
                dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n",
                        __func__, status);
                goto exit;
        }

        usb_serial_debug_data(&port->dev, __func__,
                              urb->actual_length, urb->transfer_buffer);

        f81232_update_line_status(port, data, actual_length);

exit:
        retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(&urb->dev->dev,
                        "%s - usb_submit_urb failed with result %d\n",
                        __func__, retval);
}

static char f81232_handle_lsr(struct usb_serial_port *port, u8 lsr)
{
        struct f81232_private *priv = usb_get_serial_port_data(port);
        char tty_flag = TTY_NORMAL;

        if (!(lsr & UART_LSR_BRK_ERROR_BITS))
                return tty_flag;

        if (lsr & UART_LSR_BI) {
                tty_flag = TTY_BREAK;
                port->icount.brk++;
                usb_serial_handle_break(port);
        } else if (lsr & UART_LSR_PE) {
                tty_flag = TTY_PARITY;
                port->icount.parity++;
        } else if (lsr & UART_LSR_FE) {
                tty_flag = TTY_FRAME;
                port->icount.frame++;
        }

        if (lsr & UART_LSR_OE) {
                port->icount.overrun++;
                schedule_work(&priv->lsr_work);
                tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
        }

        return tty_flag;
}

static void f81232_process_read_urb(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        char tty_flag;
        unsigned int i;
        u8 lsr;

        /*
         * When opening the port we get a 1-byte packet with the current LSR,
         * which we discard.
         */
        if ((urb->actual_length < 2) || (urb->actual_length % 2))
                return;

        /* bulk-in data: [LSR(1Byte)+DATA(1Byte)][LSR(1Byte)+DATA(1Byte)]... */

        for (i = 0; i < urb->actual_length; i += 2) {
                lsr = data[i];
                tty_flag = f81232_handle_lsr(port, lsr);

                if (port->sysrq) {
                        if (usb_serial_handle_sysrq_char(port, data[i + 1]))
                                continue;
                }

                tty_insert_flip_char(&port->port, data[i + 1], tty_flag);
        }

        tty_flip_buffer_push(&port->port);
}

static void f81534a_process_read_urb(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        char tty_flag;
        unsigned int i;
        u8 lsr;
        u8 len;

        if (urb->actual_length < 3) {
                dev_err(&port->dev, "short message received: %d\n",
                                urb->actual_length);
                return;
        }

        len = data[0];
        if (len != urb->actual_length) {
                dev_err(&port->dev, "malformed message received: %d (%d)\n",
                                urb->actual_length, len);
                return;
        }

        /* bulk-in data: [LEN][Data.....][LSR] */
        lsr = data[len - 1];
        tty_flag = f81232_handle_lsr(port, lsr);

        if (port->sysrq) {
                for (i = 1; i < len - 1; ++i) {
                        if (!usb_serial_handle_sysrq_char(port, data[i])) {
                                tty_insert_flip_char(&port->port, data[i],
                                                tty_flag);
                        }
                }
        } else {
                tty_insert_flip_string_fixed_flag(&port->port, &data[1],
                                                        tty_flag, len - 2);
        }

        tty_flip_buffer_push(&port->port);
}

static int f81232_break_ctl(struct tty_struct *tty, int break_state)
{
        struct usb_serial_port *port = tty->driver_data;
        struct f81232_private *priv = usb_get_serial_port_data(port);
        int status;

        mutex_lock(&priv->lock);

        if (break_state)
                priv->shadow_lcr |= UART_LCR_SBC;
        else
                priv->shadow_lcr &= ~UART_LCR_SBC;

        status = f81232_set_register(port, LINE_CONTROL_REGISTER,
                                        priv->shadow_lcr);
        if (status)
                dev_err(&port->dev, "set break failed: %d\n", status);

        mutex_unlock(&priv->lock);

        return status;
}

static int f81232_find_clk(speed_t baudrate)
{
        int idx;

        for (idx = 0; idx < ARRAY_SIZE(baudrate_table); ++idx) {
                if (baudrate <= baudrate_table[idx] &&
                                baudrate_table[idx] % baudrate == 0)
                        return idx;
        }

        return -EINVAL;
}

static void f81232_set_baudrate(struct tty_struct *tty,
                                struct usb_serial_port *port, speed_t baudrate,
                                speed_t old_baudrate)
{
        struct f81232_private *priv = usb_get_serial_port_data(port);
        u8 lcr;
        int divisor;
        int status = 0;
        int i;
        int idx;
        speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE };

        for (i = 0; i < ARRAY_SIZE(baud_list); ++i) {
                baudrate = baud_list[i];
                if (baudrate == 0) {
                        tty_encode_baud_rate(tty, 0, 0);
                        return;
                }

                idx = f81232_find_clk(baudrate);
                if (idx >= 0) {
                        tty_encode_baud_rate(tty, baudrate, baudrate);
                        break;
                }
        }

        if (idx < 0)
                return;

        priv->baud_base = baudrate_table[idx];
        divisor = calc_baud_divisor(baudrate, priv->baud_base);

        status = f81232_set_mask_register(port, F81232_CLK_REGISTER,
                        F81232_CLK_MASK, clock_table[idx]);
        if (status) {
                dev_err(&port->dev, "%s failed to set CLK_REG: %d\n",
                        __func__, status);
                return;
        }

        status = f81232_get_register(port, LINE_CONTROL_REGISTER,
                         &lcr); /* get LCR */
        if (status) {
                dev_err(&port->dev, "%s failed to get LCR: %d\n",
                        __func__, status);
                return;
        }

        status = f81232_set_register(port, LINE_CONTROL_REGISTER,
                         lcr | UART_LCR_DLAB); /* Enable DLAB */
        if (status) {
                dev_err(&port->dev, "%s failed to set DLAB: %d\n",
                        __func__, status);
                return;
        }

        status = f81232_set_register(port, RECEIVE_BUFFER_REGISTER,
                         divisor & 0x00ff); /* low */
        if (status) {
                dev_err(&port->dev, "%s failed to set baudrate MSB: %d\n",
                        __func__, status);
                goto reapply_lcr;
        }

        status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER,
                         (divisor & 0xff00) >> 8); /* high */
        if (status) {
                dev_err(&port->dev, "%s failed to set baudrate LSB: %d\n",
                        __func__, status);
        }

reapply_lcr:
        status = f81232_set_register(port, LINE_CONTROL_REGISTER,
                        lcr & ~UART_LCR_DLAB);
        if (status) {
                dev_err(&port->dev, "%s failed to set DLAB: %d\n",
                        __func__, status);
        }
}

static int f81232_port_enable(struct usb_serial_port *port)
{
        u8 val;
        int status;

        /* fifo on, trigger8, clear TX/RX*/
        val = UART_FCR_TRIGGER_8 | UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR |
                        UART_FCR_CLEAR_XMIT;

        status = f81232_set_register(port, FIFO_CONTROL_REGISTER, val);
        if (status) {
                dev_err(&port->dev, "%s failed to set FCR: %d\n",
                        __func__, status);
                return status;
        }

        /* MSR Interrupt only, LSR will read from Bulk-in odd byte */
        status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER,
                        UART_IER_MSI);
        if (status) {
                dev_err(&port->dev, "%s failed to set IER: %d\n",
                        __func__, status);
                return status;
        }

        return 0;
}

static int f81232_port_disable(struct usb_serial_port *port)
{
        int status;

        status = f81232_set_register(port, INTERRUPT_ENABLE_REGISTER, 0);
        if (status) {
                dev_err(&port->dev, "%s failed to set IER: %d\n",
                        __func__, status);
                return status;
        }

        return 0;
}

static void f81232_set_termios(struct tty_struct *tty,
                               struct usb_serial_port *port,
                               const struct ktermios *old_termios)
{
        struct f81232_private *priv = usb_get_serial_port_data(port);
        u8 new_lcr = 0;
        int status = 0;
        speed_t baudrate;
        speed_t old_baud;

        /* Don't change anything if nothing has changed */
        if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios))
                return;

        if (C_BAUD(tty) == B0)
                f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS);
        else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
                f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0);

        baudrate = tty_get_baud_rate(tty);
        if (baudrate > 0) {
                if (old_termios)
                        old_baud = tty_termios_baud_rate(old_termios);
                else
                        old_baud = F81232_DEF_BAUDRATE;

                f81232_set_baudrate(tty, port, baudrate, old_baud);
        }

        if (C_PARENB(tty)) {
                new_lcr |= UART_LCR_PARITY;

                if (!C_PARODD(tty))
                        new_lcr |= UART_LCR_EPAR;

                if (C_CMSPAR(tty))
                        new_lcr |= UART_LCR_SPAR;
        }

        if (C_CSTOPB(tty))
                new_lcr |= UART_LCR_STOP;

        new_lcr |= UART_LCR_WLEN(tty_get_char_size(tty->termios.c_cflag));

        mutex_lock(&priv->lock);

        new_lcr |= (priv->shadow_lcr & UART_LCR_SBC);
        status = f81232_set_register(port, LINE_CONTROL_REGISTER, new_lcr);
        if (status) {
                dev_err(&port->dev, "%s failed to set LCR: %d\n",
                        __func__, status);
        }

        priv->shadow_lcr = new_lcr;

        mutex_unlock(&priv->lock);
}

static int f81232_tiocmget(struct tty_struct *tty)
{
        int r;
        struct usb_serial_port *port = tty->driver_data;
        struct f81232_private *port_priv = usb_get_serial_port_data(port);
        u8 mcr, msr;

        /* force get current MSR changed state */
        f81232_read_msr(port);

        mutex_lock(&port_priv->lock);
        mcr = port_priv->modem_control;
        msr = port_priv->modem_status;
        mutex_unlock(&port_priv->lock);

        r = (mcr & UART_MCR_DTR ? TIOCM_DTR : 0) |
                (mcr & UART_MCR_RTS ? TIOCM_RTS : 0) |
                (msr & UART_MSR_CTS ? TIOCM_CTS : 0) |
                (msr & UART_MSR_DCD ? TIOCM_CAR : 0) |
                (msr & UART_MSR_RI ? TIOCM_RI : 0) |
                (msr & UART_MSR_DSR ? TIOCM_DSR : 0);

        return r;
}

static int f81232_tiocmset(struct tty_struct *tty,
                        unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;

        return f81232_set_mctrl(port, set, clear);
}

static int f81232_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        int result;

        result = f81232_port_enable(port);
        if (result)
                return result;

        /* Setup termios */
        if (tty)
                f81232_set_termios(tty, port, NULL);

        result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
        if (result) {
                dev_err(&port->dev, "%s - failed submitting interrupt urb,"
                        " error %d\n", __func__, result);
                return result;
        }

        result = usb_serial_generic_open(tty, port);
        if (result) {
                usb_kill_urb(port->interrupt_in_urb);
                return result;
        }

        return 0;
}

static int f81534a_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        int status;
        u8 mask;
        u8 val;

        val = F81534A_TRIGGER_MULTIPLE_4X | F81534A_FIFO_128BYTE;
        mask = F81534A_TRIGGER_MASK | F81534A_FIFO_128BYTE;

        status = f81232_set_mask_register(port, F81534A_MODE_REG, mask, val);
        if (status) {
                dev_err(&port->dev, "failed to set MODE_REG: %d\n", status);
                return status;
        }

        return f81232_open(tty, port);
}

static void f81232_close(struct usb_serial_port *port)
{
        struct f81232_private *port_priv = usb_get_serial_port_data(port);

        f81232_port_disable(port);
        usb_serial_generic_close(port);
        usb_kill_urb(port->interrupt_in_urb);
        flush_work(&port_priv->interrupt_work);
        flush_work(&port_priv->lsr_work);
}

static void f81232_dtr_rts(struct usb_serial_port *port, int on)
{
        if (on)
                f81232_set_mctrl(port, TIOCM_DTR | TIOCM_RTS, 0);
        else
                f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS);
}

static bool f81232_tx_empty(struct usb_serial_port *port)
{
        int status;
        u8 tmp;

        status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp);
        if (!status) {
                if ((tmp & UART_LSR_TEMT) != UART_LSR_TEMT)
                        return false;
        }

        return true;
}

static int f81232_carrier_raised(struct usb_serial_port *port)
{
        u8 msr;
        struct f81232_private *priv = usb_get_serial_port_data(port);

        mutex_lock(&priv->lock);
        msr = priv->modem_status;
        mutex_unlock(&priv->lock);

        if (msr & UART_MSR_DCD)
                return 1;
        return 0;
}

static void f81232_get_serial(struct tty_struct *tty, struct serial_struct *ss)
{
        struct usb_serial_port *port = tty->driver_data;
        struct f81232_private *priv = usb_get_serial_port_data(port);

        ss->baud_base = priv->baud_base;
}

static void  f81232_interrupt_work(struct work_struct *work)
{
        struct f81232_private *priv =
                container_of(work, struct f81232_private, interrupt_work);

        f81232_read_msr(priv->port);
}

static void f81232_lsr_worker(struct work_struct *work)
{
        struct f81232_private *priv;
        struct usb_serial_port *port;
        int status;
        u8 tmp;

        priv = container_of(work, struct f81232_private, lsr_work);
        port = priv->port;

        status = f81232_get_register(port, LINE_STATUS_REGISTER, &tmp);
        if (status)
                dev_warn(&port->dev, "read LSR failed: %d\n", status);
}

static int f81534a_ctrl_set_register(struct usb_interface *intf, u16 reg,
                                        u16 size, void *val)
{
        struct usb_device *dev = interface_to_usbdev(intf);
        int retry = F81534A_ACCESS_REG_RETRY;
        int status;

        while (retry--) {
                status = usb_control_msg_send(dev,
                                              0,
                                              F81232_REGISTER_REQUEST,
                                              F81232_SET_REGISTER,
                                              reg,
                                              0,
                                              val,
                                              size,
                                              USB_CTRL_SET_TIMEOUT,
                                              GFP_KERNEL);
                if (status) {
                        status = usb_translate_errors(status);
                        if (status == -EIO)
                                continue;
                }

                break;
        }

        if (status) {
                dev_err(&intf->dev, "failed to set register 0x%x: %d\n",
                                reg, status);
        }

        return status;
}

static int f81534a_ctrl_enable_all_ports(struct usb_interface *intf, bool en)
{
        unsigned char enable[2] = {0};
        int status;

        /*
         * Enable all available serial ports, define as following:
         * bit 15        : Reset behavior (when HUB got soft reset)
         *                        0: maintain all serial port enabled state.
         *                        1: disable all serial port.
         * bit 0~11        : Serial port enable bit.
         */
        if (en) {
                enable[0] = 0xff;
                enable[1] = 0x8f;
        }

        status = f81534a_ctrl_set_register(intf, F81534A_CTRL_CMD_ENABLE_PORT,
                        sizeof(enable), enable);
        if (status)
                dev_err(&intf->dev, "failed to enable ports: %d\n", status);

        return status;
}

static int f81534a_ctrl_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        return f81534a_ctrl_enable_all_ports(intf, true);
}

static void f81534a_ctrl_disconnect(struct usb_interface *intf)
{
        f81534a_ctrl_enable_all_ports(intf, false);
}

static int f81534a_ctrl_resume(struct usb_interface *intf)
{
        return f81534a_ctrl_enable_all_ports(intf, true);
}

static int f81232_port_probe(struct usb_serial_port *port)
{
        struct f81232_private *priv;

        priv = devm_kzalloc(&port->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        mutex_init(&priv->lock);
        INIT_WORK(&priv->interrupt_work,  f81232_interrupt_work);
        INIT_WORK(&priv->lsr_work, f81232_lsr_worker);

        usb_set_serial_port_data(port, priv);

        priv->port = port;

        return 0;
}

static int f81534a_port_probe(struct usb_serial_port *port)
{
        int status;

        /* tri-state with pull-high, default RS232 Mode */
        status = f81232_set_register(port, F81534A_GPIO_REG,
                                        F81534A_GPIO_MODE2_DIR);
        if (status)
                return status;

        return f81232_port_probe(port);
}

static int f81232_suspend(struct usb_serial *serial, pm_message_t message)
{
        struct usb_serial_port *port = serial->port[0];
        struct f81232_private *port_priv = usb_get_serial_port_data(port);
        int i;

        for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i)
                usb_kill_urb(port->read_urbs[i]);

        usb_kill_urb(port->interrupt_in_urb);

        if (port_priv) {
                flush_work(&port_priv->interrupt_work);
                flush_work(&port_priv->lsr_work);
        }

        return 0;
}

static int f81232_resume(struct usb_serial *serial)
{
        struct usb_serial_port *port = serial->port[0];
        int result;

        if (tty_port_initialized(&port->port)) {
                result = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO);
                if (result) {
                        dev_err(&port->dev, "submit interrupt urb failed: %d\n",
                                        result);
                        return result;
                }
        }

        return usb_serial_generic_resume(serial);
}

static struct usb_serial_driver f81232_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "f81232",
        },
        .id_table =                f81232_id_table,
        .num_ports =                1,
        .bulk_in_size =                256,
        .bulk_out_size =        256,
        .open =                        f81232_open,
        .close =                f81232_close,
        .dtr_rts =                f81232_dtr_rts,
        .carrier_raised =        f81232_carrier_raised,
        .get_serial =                f81232_get_serial,
        .break_ctl =                f81232_break_ctl,
        .set_termios =                f81232_set_termios,
        .tiocmget =                f81232_tiocmget,
        .tiocmset =                f81232_tiocmset,
        .tiocmiwait =                usb_serial_generic_tiocmiwait,
        .tx_empty =                f81232_tx_empty,
        .process_read_urb =        f81232_process_read_urb,
        .read_int_callback =        f81232_read_int_callback,
        .port_probe =                f81232_port_probe,
        .suspend =                f81232_suspend,
        .resume =                f81232_resume,
};

static struct usb_serial_driver f81534a_device = {
        .driver = {
                .owner =        THIS_MODULE,
                .name =                "f81534a",
        },
        .id_table =                f81534a_id_table,
        .num_ports =                1,
        .open =                        f81534a_open,
        .close =                f81232_close,
        .dtr_rts =                f81232_dtr_rts,
        .carrier_raised =        f81232_carrier_raised,
        .get_serial =                f81232_get_serial,
        .break_ctl =                f81232_break_ctl,
        .set_termios =                f81232_set_termios,
        .tiocmget =                f81232_tiocmget,
        .tiocmset =                f81232_tiocmset,
        .tiocmiwait =                usb_serial_generic_tiocmiwait,
        .tx_empty =                f81232_tx_empty,
        .process_read_urb =        f81534a_process_read_urb,
        .read_int_callback =        f81232_read_int_callback,
        .port_probe =                f81534a_port_probe,
        .suspend =                f81232_suspend,
        .resume =                f81232_resume,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &f81232_device,
        &f81534a_device,
        NULL,
};

static struct usb_driver f81534a_ctrl_driver = {
        .name =                "f81534a_ctrl",
        .id_table =        f81534a_ctrl_id_table,
        .probe =        f81534a_ctrl_probe,
        .disconnect =        f81534a_ctrl_disconnect,
        .resume =        f81534a_ctrl_resume,
};

static int __init f81232_init(void)
{
        int status;

        status = usb_register_driver(&f81534a_ctrl_driver, THIS_MODULE,
                        KBUILD_MODNAME);
        if (status)
                return status;

        status = usb_serial_register_drivers(serial_drivers, KBUILD_MODNAME,
                        combined_id_table);
        if (status) {
                usb_deregister(&f81534a_ctrl_driver);
                return status;
        }

        return 0;
}

static void __exit f81232_exit(void)
{
        usb_serial_deregister_drivers(serial_drivers);
        usb_deregister(&f81534a_ctrl_driver);
}

module_init(f81232_init);
module_exit(f81232_exit);

MODULE_DESCRIPTION("Fintek F81232/532A/534A/535/536 USB to serial driver");
MODULE_AUTHOR("Greg Kroah-Hartman <gregkh@linuxfoundation.org>");
MODULE_AUTHOR("Peter Hong <peter_hong@fintek.com.tw>");
MODULE_LICENSE("GPL v2");






































































































































































































   14 



   15 


   15 

   14 

















    7 
    7 


    7 















    6 





    6 


    6 
















    1 

    1 









































    1 

    1 


    1 






















    1 









    6 











































































































   63 



   65 
   64 













   65 
















   65 






   73 



   74 
   74 
   73 




   37 


   37 












   94 
   93 
   95 












   95 























































































































































































































































































































































































































































   52 
   52 
   51 



























































































































































































































































































































  242 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
// SPDX-License-Identifier: GPL-2.0-only
/*
 * AppArmor security module
 *
 * This file contains AppArmor LSM hooks.
 *
 * Copyright (C) 1998-2008 Novell/SUSE
 * Copyright 2009-2010 Canonical Ltd.
 */

#include <linux/lsm_hooks.h>
#include <linux/moduleparam.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/ptrace.h>
#include <linux/ctype.h>
#include <linux/sysctl.h>
#include <linux/audit.h>
#include <linux/user_namespace.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/zstd.h>
#include <net/sock.h>
#include <uapi/linux/mount.h>
#include <uapi/linux/lsm.h>

#include "include/apparmor.h"
#include "include/apparmorfs.h"
#include "include/audit.h"
#include "include/capability.h"
#include "include/cred.h"
#include "include/file.h"
#include "include/ipc.h"
#include "include/net.h"
#include "include/path.h"
#include "include/label.h"
#include "include/policy.h"
#include "include/policy_ns.h"
#include "include/procattr.h"
#include "include/mount.h"
#include "include/secid.h"

/* Flag indicating whether initialization completed */
int apparmor_initialized;

union aa_buffer {
        struct list_head list;
        DECLARE_FLEX_ARRAY(char, buffer);
};

struct aa_local_cache {
        unsigned int hold;
        unsigned int count;
        struct list_head head;
};

#define RESERVE_COUNT 2
static int reserve_count = RESERVE_COUNT;
static int buffer_count;

static LIST_HEAD(aa_global_buffers);
static DEFINE_SPINLOCK(aa_buffers_lock);
static DEFINE_PER_CPU(struct aa_local_cache, aa_local_buffers);

/*
 * LSM hook functions
 */

/*
 * put the associated labels
 */
static void apparmor_cred_free(struct cred *cred)
{
        aa_put_label(cred_label(cred));
        set_cred_label(cred, NULL);
}

/*
 * allocate the apparmor part of blank credentials
 */
static int apparmor_cred_alloc_blank(struct cred *cred, gfp_t gfp)
{
        set_cred_label(cred, NULL);
        return 0;
}

/*
 * prepare new cred label for modification by prepare_cred block
 */
static int apparmor_cred_prepare(struct cred *new, const struct cred *old,
                                 gfp_t gfp)
{
        set_cred_label(new, aa_get_newest_label(cred_label(old)));
        return 0;
}

/*
 * transfer the apparmor data to a blank set of creds
 */
static void apparmor_cred_transfer(struct cred *new, const struct cred *old)
{
        set_cred_label(new, aa_get_newest_label(cred_label(old)));
}

static void apparmor_task_free(struct task_struct *task)
{

        aa_free_task_ctx(task_ctx(task));
}

static int apparmor_task_alloc(struct task_struct *task,
                               unsigned long clone_flags)
{
        struct aa_task_ctx *new = task_ctx(task);

        aa_dup_task_ctx(new, task_ctx(current));

        return 0;
}

static int apparmor_ptrace_access_check(struct task_struct *child,
                                        unsigned int mode)
{
        struct aa_label *tracer, *tracee;
        const struct cred *cred;
        int error;

        cred = get_task_cred(child);
        tracee = cred_label(cred);        /* ref count on cred */
        tracer = __begin_current_label_crit_section();
        error = aa_may_ptrace(current_cred(), tracer, cred, tracee,
                        (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ
                                                  : AA_PTRACE_TRACE);
        __end_current_label_crit_section(tracer);
        put_cred(cred);

        return error;
}

static int apparmor_ptrace_traceme(struct task_struct *parent)
{
        struct aa_label *tracer, *tracee;
        const struct cred *cred;
        int error;

        tracee = __begin_current_label_crit_section();
        cred = get_task_cred(parent);
        tracer = cred_label(cred);        /* ref count on cred */
        error = aa_may_ptrace(cred, tracer, current_cred(), tracee,
                              AA_PTRACE_TRACE);
        put_cred(cred);
        __end_current_label_crit_section(tracee);

        return error;
}

/* Derived from security/commoncap.c:cap_capget */
static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effective,
                           kernel_cap_t *inheritable, kernel_cap_t *permitted)
{
        struct aa_label *label;
        const struct cred *cred;

        rcu_read_lock();
        cred = __task_cred(target);
        label = aa_get_newest_cred_label(cred);

        /*
         * cap_capget is stacked ahead of this and will
         * initialize effective and permitted.
         */
        if (!unconfined(label)) {
                struct aa_profile *profile;
                struct label_it i;

                label_for_each_confined(i, label, profile) {
                        struct aa_ruleset *rules;
                        if (COMPLAIN_MODE(profile))
                                continue;
                        rules = list_first_entry(&profile->rules,
                                                 typeof(*rules), list);
                        *effective = cap_intersect(*effective,
                                                   rules->caps.allow);
                        *permitted = cap_intersect(*permitted,
                                                   rules->caps.allow);
                }
        }
        rcu_read_unlock();
        aa_put_label(label);

        return 0;
}

static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
                            int cap, unsigned int opts)
{
        struct aa_label *label;
        int error = 0;

        label = aa_get_newest_cred_label(cred);
        if (!unconfined(label))
                error = aa_capable(cred, label, cap, opts);
        aa_put_label(label);

        return error;
}

/**
 * common_perm - basic common permission check wrapper fn for paths
 * @op: operation being checked
 * @path: path to check permission of  (NOT NULL)
 * @mask: requested permissions mask
 * @cond: conditional info for the permission request  (NOT NULL)
 *
 * Returns: %0 else error code if error or permission denied
 */
static int common_perm(const char *op, const struct path *path, u32 mask,
                       struct path_cond *cond)
{
        struct aa_label *label;
        int error = 0;

        label = __begin_current_label_crit_section();
        if (!unconfined(label))
                error = aa_path_perm(op, current_cred(), label, path, 0, mask,
                                     cond);
        __end_current_label_crit_section(label);

        return error;
}

/**
 * common_perm_cond - common permission wrapper around inode cond
 * @op: operation being checked
 * @path: location to check (NOT NULL)
 * @mask: requested permissions mask
 *
 * Returns: %0 else error code if error or permission denied
 */
static int common_perm_cond(const char *op, const struct path *path, u32 mask)
{
        vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt),
                                            d_backing_inode(path->dentry));
        struct path_cond cond = {
                vfsuid_into_kuid(vfsuid),
                d_backing_inode(path->dentry)->i_mode
        };

        if (!path_mediated_fs(path->dentry))
                return 0;

        return common_perm(op, path, mask, &cond);
}

/**
 * common_perm_dir_dentry - common permission wrapper when path is dir, dentry
 * @op: operation being checked
 * @dir: directory of the dentry  (NOT NULL)
 * @dentry: dentry to check  (NOT NULL)
 * @mask: requested permissions mask
 * @cond: conditional info for the permission request  (NOT NULL)
 *
 * Returns: %0 else error code if error or permission denied
 */
static int common_perm_dir_dentry(const char *op, const struct path *dir,
                                  struct dentry *dentry, u32 mask,
                                  struct path_cond *cond)
{
        struct path path = { .mnt = dir->mnt, .dentry = dentry };

        return common_perm(op, &path, mask, cond);
}

/**
 * common_perm_rm - common permission wrapper for operations doing rm
 * @op: operation being checked
 * @dir: directory that the dentry is in  (NOT NULL)
 * @dentry: dentry being rm'd  (NOT NULL)
 * @mask: requested permission mask
 *
 * Returns: %0 else error code if error or permission denied
 */
static int common_perm_rm(const char *op, const struct path *dir,
                          struct dentry *dentry, u32 mask)
{
        struct inode *inode = d_backing_inode(dentry);
        struct path_cond cond = { };
        vfsuid_t vfsuid;

        if (!inode || !path_mediated_fs(dentry))
                return 0;

        vfsuid = i_uid_into_vfsuid(mnt_idmap(dir->mnt), inode);
        cond.uid = vfsuid_into_kuid(vfsuid);
        cond.mode = inode->i_mode;

        return common_perm_dir_dentry(op, dir, dentry, mask, &cond);
}

/**
 * common_perm_create - common permission wrapper for operations doing create
 * @op: operation being checked
 * @dir: directory that dentry will be created in  (NOT NULL)
 * @dentry: dentry to create   (NOT NULL)
 * @mask: request permission mask
 * @mode: created file mode
 *
 * Returns: %0 else error code if error or permission denied
 */
static int common_perm_create(const char *op, const struct path *dir,
                              struct dentry *dentry, u32 mask, umode_t mode)
{
        struct path_cond cond = { current_fsuid(), mode };

        if (!path_mediated_fs(dir->dentry))
                return 0;

        return common_perm_dir_dentry(op, dir, dentry, mask, &cond);
}

static int apparmor_path_unlink(const struct path *dir, struct dentry *dentry)
{
        return common_perm_rm(OP_UNLINK, dir, dentry, AA_MAY_DELETE);
}

static int apparmor_path_mkdir(const struct path *dir, struct dentry *dentry,
                               umode_t mode)
{
        return common_perm_create(OP_MKDIR, dir, dentry, AA_MAY_CREATE,
                                  S_IFDIR);
}

static int apparmor_path_rmdir(const struct path *dir, struct dentry *dentry)
{
        return common_perm_rm(OP_RMDIR, dir, dentry, AA_MAY_DELETE);
}

static int apparmor_path_mknod(const struct path *dir, struct dentry *dentry,
                               umode_t mode, unsigned int dev)
{
        return common_perm_create(OP_MKNOD, dir, dentry, AA_MAY_CREATE, mode);
}

static int apparmor_path_truncate(const struct path *path)
{
        return common_perm_cond(OP_TRUNC, path, MAY_WRITE | AA_MAY_SETATTR);
}

static int apparmor_file_truncate(struct file *file)
{
        return apparmor_path_truncate(&file->f_path);
}

static int apparmor_path_symlink(const struct path *dir, struct dentry *dentry,
                                 const char *old_name)
{
        return common_perm_create(OP_SYMLINK, dir, dentry, AA_MAY_CREATE,
                                  S_IFLNK);
}

static int apparmor_path_link(struct dentry *old_dentry, const struct path *new_dir,
                              struct dentry *new_dentry)
{
        struct aa_label *label;
        int error = 0;

        if (!path_mediated_fs(old_dentry))
                return 0;

        label = begin_current_label_crit_section();
        if (!unconfined(label))
                error = aa_path_link(current_cred(), label, old_dentry, new_dir,
                                     new_dentry);
        end_current_label_crit_section(label);

        return error;
}

static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_dentry,
                                const struct path *new_dir, struct dentry *new_dentry,
                                const unsigned int flags)
{
        struct aa_label *label;
        int error = 0;

        if (!path_mediated_fs(old_dentry))
                return 0;
        if ((flags & RENAME_EXCHANGE) && !path_mediated_fs(new_dentry))
                return 0;

        label = begin_current_label_crit_section();
        if (!unconfined(label)) {
                struct mnt_idmap *idmap = mnt_idmap(old_dir->mnt);
                vfsuid_t vfsuid;
                struct path old_path = { .mnt = old_dir->mnt,
                                         .dentry = old_dentry };
                struct path new_path = { .mnt = new_dir->mnt,
                                         .dentry = new_dentry };
                struct path_cond cond = {
                        .mode = d_backing_inode(old_dentry)->i_mode
                };
                vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry));
                cond.uid = vfsuid_into_kuid(vfsuid);

                if (flags & RENAME_EXCHANGE) {
                        struct path_cond cond_exchange = {
                                .mode = d_backing_inode(new_dentry)->i_mode,
                        };
                        vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry));
                        cond_exchange.uid = vfsuid_into_kuid(vfsuid);

                        error = aa_path_perm(OP_RENAME_SRC, current_cred(),
                                             label, &new_path, 0,
                                             MAY_READ | AA_MAY_GETATTR | MAY_WRITE |
                                             AA_MAY_SETATTR | AA_MAY_DELETE,
                                             &cond_exchange);
                        if (!error)
                                error = aa_path_perm(OP_RENAME_DEST, current_cred(),
                                                     label, &old_path,
                                                     0, MAY_WRITE | AA_MAY_SETATTR |
                                                     AA_MAY_CREATE, &cond_exchange);
                }

                if (!error)
                        error = aa_path_perm(OP_RENAME_SRC, current_cred(),
                                             label, &old_path, 0,
                                             MAY_READ | AA_MAY_GETATTR | MAY_WRITE |
                                             AA_MAY_SETATTR | AA_MAY_DELETE,
                                             &cond);
                if (!error)
                        error = aa_path_perm(OP_RENAME_DEST, current_cred(),
                                             label, &new_path,
                                             0, MAY_WRITE | AA_MAY_SETATTR |
                                             AA_MAY_CREATE, &cond);

        }
        end_current_label_crit_section(label);

        return error;
}

static int apparmor_path_chmod(const struct path *path, umode_t mode)
{
        return common_perm_cond(OP_CHMOD, path, AA_MAY_CHMOD);
}

static int apparmor_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
{
        return common_perm_cond(OP_CHOWN, path, AA_MAY_CHOWN);
}

static int apparmor_inode_getattr(const struct path *path)
{
        return common_perm_cond(OP_GETATTR, path, AA_MAY_GETATTR);
}

static int apparmor_file_open(struct file *file)
{
        struct aa_file_ctx *fctx = file_ctx(file);
        struct aa_label *label;
        int error = 0;

        if (!path_mediated_fs(file->f_path.dentry))
                return 0;

        /* If in exec, permission is handled by bprm hooks.
         * Cache permissions granted by the previous exec check, with
         * implicit read and executable mmap which are required to
         * actually execute the image.
         *
         * Illogically, FMODE_EXEC is in f_flags, not f_mode.
         */
        if (file->f_flags & __FMODE_EXEC) {
                fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP;
                return 0;
        }

        label = aa_get_newest_cred_label(file->f_cred);
        if (!unconfined(label)) {
                struct mnt_idmap *idmap = file_mnt_idmap(file);
                struct inode *inode = file_inode(file);
                vfsuid_t vfsuid;
                struct path_cond cond = {
                        .mode = inode->i_mode,
                };
                vfsuid = i_uid_into_vfsuid(idmap, inode);
                cond.uid = vfsuid_into_kuid(vfsuid);

                error = aa_path_perm(OP_OPEN, file->f_cred,
                                     label, &file->f_path, 0,
                                     aa_map_file_to_perms(file), &cond);
                /* todo cache full allowed permissions set and state */
                fctx->allow = aa_map_file_to_perms(file);
        }
        aa_put_label(label);

        return error;
}

static int apparmor_file_alloc_security(struct file *file)
{
        struct aa_file_ctx *ctx = file_ctx(file);
        struct aa_label *label = begin_current_label_crit_section();

        spin_lock_init(&ctx->lock);
        rcu_assign_pointer(ctx->label, aa_get_label(label));
        end_current_label_crit_section(label);
        return 0;
}

static void apparmor_file_free_security(struct file *file)
{
        struct aa_file_ctx *ctx = file_ctx(file);

        if (ctx)
                aa_put_label(rcu_access_pointer(ctx->label));
}

static int common_file_perm(const char *op, struct file *file, u32 mask,
                            bool in_atomic)
{
        struct aa_label *label;
        int error = 0;

        /* don't reaudit files closed during inheritance */
        if (file->f_path.dentry == aa_null.dentry)
                return -EACCES;

        label = __begin_current_label_crit_section();
        error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic);
        __end_current_label_crit_section(label);

        return error;
}

static int apparmor_file_receive(struct file *file)
{
        return common_file_perm(OP_FRECEIVE, file, aa_map_file_to_perms(file),
                                false);
}

static int apparmor_file_permission(struct file *file, int mask)
{
        return common_file_perm(OP_FPERM, file, mask, false);
}

static int apparmor_file_lock(struct file *file, unsigned int cmd)
{
        u32 mask = AA_MAY_LOCK;

        if (cmd == F_WRLCK)
                mask |= MAY_WRITE;

        return common_file_perm(OP_FLOCK, file, mask, false);
}

static int common_mmap(const char *op, struct file *file, unsigned long prot,
                       unsigned long flags, bool in_atomic)
{
        int mask = 0;

        if (!file || !file_ctx(file))
                return 0;

        if (prot & PROT_READ)
                mask |= MAY_READ;
        /*
         * Private mappings don't require write perms since they don't
         * write back to the files
         */
        if ((prot & PROT_WRITE) && !(flags & MAP_PRIVATE))
                mask |= MAY_WRITE;
        if (prot & PROT_EXEC)
                mask |= AA_EXEC_MMAP;

        return common_file_perm(op, file, mask, in_atomic);
}

static int apparmor_mmap_file(struct file *file, unsigned long reqprot,
                              unsigned long prot, unsigned long flags)
{
        return common_mmap(OP_FMMAP, file, prot, flags, GFP_ATOMIC);
}

static int apparmor_file_mprotect(struct vm_area_struct *vma,
                                  unsigned long reqprot, unsigned long prot)
{
        return common_mmap(OP_FMPROT, vma->vm_file, prot,
                           !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0,
                           false);
}

#ifdef CONFIG_IO_URING
static const char *audit_uring_mask(u32 mask)
{
        if (mask & AA_MAY_CREATE_SQPOLL)
                return "sqpoll";
        if (mask & AA_MAY_OVERRIDE_CRED)
                return "override_creds";
        return "";
}

static void audit_uring_cb(struct audit_buffer *ab, void *va)
{
        struct apparmor_audit_data *ad = aad_of_va(va);

        if (ad->request & AA_URING_PERM_MASK) {
                audit_log_format(ab, " requested=\"%s\"",
                                 audit_uring_mask(ad->request));
                if (ad->denied & AA_URING_PERM_MASK) {
                        audit_log_format(ab, " denied=\"%s\"",
                                         audit_uring_mask(ad->denied));
                }
        }
        if (ad->uring.target) {
                audit_log_format(ab, " tcontext=");
                aa_label_xaudit(ab, labels_ns(ad->subj_label),
                                ad->uring.target,
                                FLAGS_NONE, GFP_ATOMIC);
        }
}

static int profile_uring(struct aa_profile *profile, u32 request,
                         struct aa_label *new, int cap,
                         struct apparmor_audit_data *ad)
{
        unsigned int state;
        struct aa_ruleset *rules;
        int error = 0;

        AA_BUG(!profile);

        rules = list_first_entry(&profile->rules, typeof(*rules), list);
        state = RULE_MEDIATES(rules, AA_CLASS_IO_URING);
        if (state) {
                struct aa_perms perms = { };

                if (new) {
                        aa_label_match(profile, rules, new, state,
                                       false, request, &perms);
                } else {
                        perms = *aa_lookup_perms(rules->policy, state);
                }
                aa_apply_modes_to_perms(profile, &perms);
                error = aa_check_perms(profile, &perms, request, ad,
                                       audit_uring_cb);
        }

        return error;
}

/**
 * apparmor_uring_override_creds - check the requested cred override
 * @new: the target creds
 *
 * Check to see if the current task is allowed to override it's credentials
 * to service an io_uring operation.
 */
static int apparmor_uring_override_creds(const struct cred *new)
{
        struct aa_profile *profile;
        struct aa_label *label;
        int error;
        DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING,
                          OP_URING_OVERRIDE);

        ad.uring.target = cred_label(new);
        label = __begin_current_label_crit_section();
        error = fn_for_each(label, profile,
                        profile_uring(profile, AA_MAY_OVERRIDE_CRED,
                                      cred_label(new), CAP_SYS_ADMIN, &ad));
        __end_current_label_crit_section(label);

        return error;
}

/**
 * apparmor_uring_sqpoll - check if a io_uring polling thread can be created
 *
 * Check to see if the current task is allowed to create a new io_uring
 * kernel polling thread.
 */
static int apparmor_uring_sqpoll(void)
{
        struct aa_profile *profile;
        struct aa_label *label;
        int error;
        DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING,
                          OP_URING_SQPOLL);

        label = __begin_current_label_crit_section();
        error = fn_for_each(label, profile,
                        profile_uring(profile, AA_MAY_CREATE_SQPOLL,
                                      NULL, CAP_SYS_ADMIN, &ad));
        __end_current_label_crit_section(label);

        return error;
}
#endif /* CONFIG_IO_URING */

static int apparmor_sb_mount(const char *dev_name, const struct path *path,
                             const char *type, unsigned long flags, void *data)
{
        struct aa_label *label;
        int error = 0;

        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;

        flags &= ~AA_MS_IGNORE_MASK;

        label = __begin_current_label_crit_section();
        if (!unconfined(label)) {
                if (flags & MS_REMOUNT)
                        error = aa_remount(current_cred(), label, path, flags,
                                           data);
                else if (flags & MS_BIND)
                        error = aa_bind_mount(current_cred(), label, path,
                                              dev_name, flags);
                else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |
                                  MS_UNBINDABLE))
                        error = aa_mount_change_type(current_cred(), label,
                                                     path, flags);
                else if (flags & MS_MOVE)
                        error = aa_move_mount_old(current_cred(), label, path,
                                                  dev_name);
                else
                        error = aa_new_mount(current_cred(), label, dev_name,
                                             path, type, flags, data);
        }
        __end_current_label_crit_section(label);

        return error;
}

static int apparmor_move_mount(const struct path *from_path,
                               const struct path *to_path)
{
        struct aa_label *label;
        int error = 0;

        label = __begin_current_label_crit_section();
        if (!unconfined(label))
                error = aa_move_mount(current_cred(), label, from_path,
                                      to_path);
        __end_current_label_crit_section(label);

        return error;
}

static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
{
        struct aa_label *label;
        int error = 0;

        label = __begin_current_label_crit_section();
        if (!unconfined(label))
                error = aa_umount(current_cred(), label, mnt, flags);
        __end_current_label_crit_section(label);

        return error;
}

static int apparmor_sb_pivotroot(const struct path *old_path,
                                 const struct path *new_path)
{
        struct aa_label *label;
        int error = 0;

        label = aa_get_current_label();
        if (!unconfined(label))
                error = aa_pivotroot(current_cred(), label, old_path, new_path);
        aa_put_label(label);

        return error;
}

static int apparmor_getselfattr(unsigned int attr, struct lsm_ctx __user *lx,
                                u32 *size, u32 flags)
{
        int error = -ENOENT;
        struct aa_task_ctx *ctx = task_ctx(current);
        struct aa_label *label = NULL;
        char *value = NULL;

        switch (attr) {
        case LSM_ATTR_CURRENT:
                label = aa_get_newest_label(cred_label(current_cred()));
                break;
        case LSM_ATTR_PREV:
                if (ctx->previous)
                        label = aa_get_newest_label(ctx->previous);
                break;
        case LSM_ATTR_EXEC:
                if (ctx->onexec)
                        label = aa_get_newest_label(ctx->onexec);
                break;
        default:
                error = -EOPNOTSUPP;
                break;
        }

        if (label) {
                error = aa_getprocattr(label, &value, false);
                if (error > 0)
                        error = lsm_fill_user_ctx(lx, size, value, error,
                                                  LSM_ID_APPARMOR, 0);
                kfree(value);
        }

        aa_put_label(label);

        if (error < 0)
                return error;
        return 1;
}

static int apparmor_getprocattr(struct task_struct *task, const char *name,
                                char **value)
{
        int error = -ENOENT;
        /* released below */
        const struct cred *cred = get_task_cred(task);
        struct aa_task_ctx *ctx = task_ctx(current);
        struct aa_label *label = NULL;

        if (strcmp(name, "current") == 0)
                label = aa_get_newest_label(cred_label(cred));
        else if (strcmp(name, "prev") == 0  && ctx->previous)
                label = aa_get_newest_label(ctx->previous);
        else if (strcmp(name, "exec") == 0 && ctx->onexec)
                label = aa_get_newest_label(ctx->onexec);
        else
                error = -EINVAL;

        if (label)
                error = aa_getprocattr(label, value, true);

        aa_put_label(label);
        put_cred(cred);

        return error;
}

static int do_setattr(u64 attr, void *value, size_t size)
{
        char *command, *largs = NULL, *args = value;
        size_t arg_size;
        int error;
        DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE,
                          OP_SETPROCATTR);

        if (size == 0)
                return -EINVAL;

        /* AppArmor requires that the buffer must be null terminated atm */
        if (args[size - 1] != '\0') {
                /* null terminate */
                largs = args = kmalloc(size + 1, GFP_KERNEL);
                if (!args)
                        return -ENOMEM;
                memcpy(args, value, size);
                args[size] = '\0';
        }

        error = -EINVAL;
        args = strim(args);
        command = strsep(&args, " ");
        if (!args)
                goto out;
        args = skip_spaces(args);
        if (!*args)
                goto out;

        arg_size = size - (args - (largs ? largs : (char *) value));
        if (attr == LSM_ATTR_CURRENT) {
                if (strcmp(command, "changehat") == 0) {
                        error = aa_setprocattr_changehat(args, arg_size,
                                                         AA_CHANGE_NOFLAGS);
                } else if (strcmp(command, "permhat") == 0) {
                        error = aa_setprocattr_changehat(args, arg_size,
                                                         AA_CHANGE_TEST);
                } else if (strcmp(command, "changeprofile") == 0) {
                        error = aa_change_profile(args, AA_CHANGE_NOFLAGS);
                } else if (strcmp(command, "permprofile") == 0) {
                        error = aa_change_profile(args, AA_CHANGE_TEST);
                } else if (strcmp(command, "stack") == 0) {
                        error = aa_change_profile(args, AA_CHANGE_STACK);
                } else
                        goto fail;
        } else if (attr == LSM_ATTR_EXEC) {
                if (strcmp(command, "exec") == 0)
                        error = aa_change_profile(args, AA_CHANGE_ONEXEC);
                else if (strcmp(command, "stack") == 0)
                        error = aa_change_profile(args, (AA_CHANGE_ONEXEC |
                                                         AA_CHANGE_STACK));
                else
                        goto fail;
        } else
                /* only support the "current" and "exec" process attributes */
                goto fail;

        if (!error)
                error = size;
out:
        kfree(largs);
        return error;

fail:
        ad.subj_label = begin_current_label_crit_section();
        if (attr == LSM_ATTR_CURRENT)
                ad.info = "current";
        else if (attr == LSM_ATTR_EXEC)
                ad.info = "exec";
        else
                ad.info = "invalid";
        ad.error = error = -EINVAL;
        aa_audit_msg(AUDIT_APPARMOR_DENIED, &ad, NULL);
        end_current_label_crit_section(ad.subj_label);
        goto out;
}

static int apparmor_setselfattr(unsigned int attr, struct lsm_ctx *ctx,
                                u32 size, u32 flags)
{
        int rc;

        if (attr != LSM_ATTR_CURRENT && attr != LSM_ATTR_EXEC)
                return -EOPNOTSUPP;

        rc = do_setattr(attr, ctx->ctx, ctx->ctx_len);
        if (rc > 0)
                return 0;
        return rc;
}

static int apparmor_setprocattr(const char *name, void *value,
                                size_t size)
{
        int attr = lsm_name_to_attr(name);

        if (attr)
                return do_setattr(attr, value, size);
        return -EINVAL;
}

/**
 * apparmor_bprm_committing_creds - do task cleanup on committing new creds
 * @bprm: binprm for the exec  (NOT NULL)
 */
static void apparmor_bprm_committing_creds(const struct linux_binprm *bprm)
{
        struct aa_label *label = aa_current_raw_label();
        struct aa_label *new_label = cred_label(bprm->cred);

        /* bail out if unconfined or not changing profile */
        if ((new_label->proxy == label->proxy) ||
            (unconfined(new_label)))
                return;

        aa_inherit_files(bprm->cred, current->files);

        current->pdeath_signal = 0;

        /* reset soft limits and set hard limits for the new label */
        __aa_transition_rlimits(label, new_label);
}

/**
 * apparmor_bprm_committed_creds() - do cleanup after new creds committed
 * @bprm: binprm for the exec  (NOT NULL)
 */
static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm)
{
        /* clear out temporary/transitional state from the context */
        aa_clear_task_ctx_trans(task_ctx(current));

        return;
}

static void apparmor_current_getsecid_subj(u32 *secid)
{
        struct aa_label *label = __begin_current_label_crit_section();
        *secid = label->secid;
        __end_current_label_crit_section(label);
}

static void apparmor_task_getsecid_obj(struct task_struct *p, u32 *secid)
{
        struct aa_label *label = aa_get_task_label(p);
        *secid = label->secid;
        aa_put_label(label);
}

static int apparmor_task_setrlimit(struct task_struct *task,
                unsigned int resource, struct rlimit *new_rlim)
{
        struct aa_label *label = __begin_current_label_crit_section();
        int error = 0;

        if (!unconfined(label))
                error = aa_task_setrlimit(current_cred(), label, task,
                                          resource, new_rlim);
        __end_current_label_crit_section(label);

        return error;
}

static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo *info,
                              int sig, const struct cred *cred)
{
        const struct cred *tc;
        struct aa_label *cl, *tl;
        int error;

        tc = get_task_cred(target);
        tl = aa_get_newest_cred_label(tc);
        if (cred) {
                /*
                 * Dealing with USB IO specific behavior
                 */
                cl = aa_get_newest_cred_label(cred);
                error = aa_may_signal(cred, cl, tc, tl, sig);
                aa_put_label(cl);
        } else {
                cl = __begin_current_label_crit_section();
                error = aa_may_signal(current_cred(), cl, tc, tl, sig);
                __end_current_label_crit_section(cl);
        }
        aa_put_label(tl);
        put_cred(tc);

        return error;
}

static int apparmor_userns_create(const struct cred *cred)
{
        struct aa_label *label;
        struct aa_profile *profile;
        int error = 0;
        DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_NS,
                          OP_USERNS_CREATE);

        ad.subj_cred = current_cred();

        label = begin_current_label_crit_section();
        if (!unconfined(label)) {
                error = fn_for_each(label, profile,
                                    aa_profile_ns_perm(profile, &ad,
                                                       AA_USERNS_CREATE));
        }
        end_current_label_crit_section(label);

        return error;
}

static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
{
        struct aa_sk_ctx *ctx;

        ctx = kzalloc(sizeof(*ctx), flags);
        if (!ctx)
                return -ENOMEM;

        sk->sk_security = ctx;

        return 0;
}

static void apparmor_sk_free_security(struct sock *sk)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);

        sk->sk_security = NULL;
        aa_put_label(ctx->label);
        aa_put_label(ctx->peer);
        kfree(ctx);
}

/**
 * apparmor_sk_clone_security - clone the sk_security field
 * @sk: sock to have security cloned
 * @newsk: sock getting clone
 */
static void apparmor_sk_clone_security(const struct sock *sk,
                                       struct sock *newsk)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);
        struct aa_sk_ctx *new = aa_sock(newsk);

        if (new->label)
                aa_put_label(new->label);
        new->label = aa_get_label(ctx->label);

        if (new->peer)
                aa_put_label(new->peer);
        new->peer = aa_get_label(ctx->peer);
}

static int apparmor_socket_create(int family, int type, int protocol, int kern)
{
        struct aa_label *label;
        int error = 0;

        AA_BUG(in_interrupt());

        label = begin_current_label_crit_section();
        if (!(kern || unconfined(label)))
                error = af_select(family,
                                  create_perm(label, family, type, protocol),
                                  aa_af_perm(current_cred(), label,
                                             OP_CREATE, AA_MAY_CREATE,
                                             family, type, protocol));
        end_current_label_crit_section(label);

        return error;
}

/**
 * apparmor_socket_post_create - setup the per-socket security struct
 * @sock: socket that is being setup
 * @family: family of socket being created
 * @type: type of the socket
 * @ptotocol: protocol of the socket
 * @kern: socket is a special kernel socket
 *
 * Note:
 * -   kernel sockets labeled kernel_t used to use unconfined
 * -   socket may not have sk here if created with sock_create_lite or
 *     sock_alloc. These should be accept cases which will be handled in
 *     sock_graft.
 */
static int apparmor_socket_post_create(struct socket *sock, int family,
                                       int type, int protocol, int kern)
{
        struct aa_label *label;

        if (kern) {
                label = aa_get_label(kernel_t);
        } else
                label = aa_get_current_label();

        if (sock->sk) {
                struct aa_sk_ctx *ctx = aa_sock(sock->sk);

                aa_put_label(ctx->label);
                ctx->label = aa_get_label(label);
        }
        aa_put_label(label);

        return 0;
}

static int apparmor_socket_bind(struct socket *sock,
                                struct sockaddr *address, int addrlen)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(!address);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         bind_perm(sock, address, addrlen),
                         aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk));
}

static int apparmor_socket_connect(struct socket *sock,
                                   struct sockaddr *address, int addrlen)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(!address);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         connect_perm(sock, address, addrlen),
                         aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk));
}

static int apparmor_socket_listen(struct socket *sock, int backlog)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         listen_perm(sock, backlog),
                         aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk));
}

/*
 * Note: while @newsock is created and has some information, the accept
 *       has not been done.
 */
static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(!newsock);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         accept_perm(sock, newsock),
                         aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk));
}

static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
                            struct msghdr *msg, int size)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(!msg);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         msg_perm(op, request, sock, msg, size),
                         aa_sk_perm(op, request, sock->sk));
}

static int apparmor_socket_sendmsg(struct socket *sock,
                                   struct msghdr *msg, int size)
{
        return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
}

static int apparmor_socket_recvmsg(struct socket *sock,
                                   struct msghdr *msg, int size, int flags)
{
        return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size);
}

/* revaliation, get/set attr, shutdown */
static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         sock_perm(op, request, sock),
                         aa_sk_perm(op, request, sock->sk));
}

static int apparmor_socket_getsockname(struct socket *sock)
{
        return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
}

static int apparmor_socket_getpeername(struct socket *sock)
{
        return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
}

/* revaliation, get/set attr, opt */
static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
                            int level, int optname)
{
        AA_BUG(!sock);
        AA_BUG(!sock->sk);
        AA_BUG(in_interrupt());

        return af_select(sock->sk->sk_family,
                         opt_perm(op, request, sock, level, optname),
                         aa_sk_perm(op, request, sock->sk));
}

static int apparmor_socket_getsockopt(struct socket *sock, int level,
                                      int optname)
{
        return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock,
                                level, optname);
}

static int apparmor_socket_setsockopt(struct socket *sock, int level,
                                      int optname)
{
        return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock,
                                level, optname);
}

static int apparmor_socket_shutdown(struct socket *sock, int how)
{
        return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
}

#ifdef CONFIG_NETWORK_SECMARK
/**
 * apparmor_socket_sock_rcv_skb - check perms before associating skb to sk
 * @sk: sk to associate @skb with
 * @skb: skb to check for perms
 *
 * Note: can not sleep may be called with locks held
 *
 * dont want protocol specific in __skb_recv_datagram()
 * to deny an incoming connection  socket_sock_rcv_skb()
 */
static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);

        if (!skb->secmark)
                return 0;

        return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
                                      skb->secmark, sk);
}
#endif


static struct aa_label *sk_peer_label(struct sock *sk)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);

        if (ctx->peer)
                return ctx->peer;

        return ERR_PTR(-ENOPROTOOPT);
}

/**
 * apparmor_socket_getpeersec_stream - get security context of peer
 * @sock: socket that we are trying to get the peer context of
 * @optval: output - buffer to copy peer name to
 * @optlen: output - size of copied name in @optval
 * @len: size of @optval buffer
 * Returns: 0 on success, -errno of failure
 *
 * Note: for tcp only valid if using ipsec or cipso on lan
 */
static int apparmor_socket_getpeersec_stream(struct socket *sock,
                                             sockptr_t optval, sockptr_t optlen,
                                             unsigned int len)
{
        char *name = NULL;
        int slen, error = 0;
        struct aa_label *label;
        struct aa_label *peer;

        label = begin_current_label_crit_section();
        peer = sk_peer_label(sock->sk);
        if (IS_ERR(peer)) {
                error = PTR_ERR(peer);
                goto done;
        }
        slen = aa_label_asxprint(&name, labels_ns(label), peer,
                                 FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
                                 FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
        /* don't include terminating \0 in slen, it breaks some apps */
        if (slen < 0) {
                error = -ENOMEM;
                goto done;
        }
        if (slen > len) {
                error = -ERANGE;
                goto done_len;
        }

        if (copy_to_sockptr(optval, name, slen))
                error = -EFAULT;
done_len:
        if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
                error = -EFAULT;
done:
        end_current_label_crit_section(label);
        kfree(name);
        return error;
}

/**
 * apparmor_socket_getpeersec_dgram - get security label of packet
 * @sock: the peer socket
 * @skb: packet data
 * @secid: pointer to where to put the secid of the packet
 *
 * Sets the netlabel socket state on sk from parent
 */
static int apparmor_socket_getpeersec_dgram(struct socket *sock,
                                            struct sk_buff *skb, u32 *secid)

{
        /* TODO: requires secid support */
        return -ENOPROTOOPT;
}

/**
 * apparmor_sock_graft - Initialize newly created socket
 * @sk: child sock
 * @parent: parent socket
 *
 * Note: could set off of SOCK_CTX(parent) but need to track inode and we can
 *       just set sk security information off of current creating process label
 *       Labeling of sk for accept case - probably should be sock based
 *       instead of task, because of the case where an implicitly labeled
 *       socket is shared by different tasks.
 */
static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);

        if (!ctx->label)
                ctx->label = aa_get_current_label();
}

#ifdef CONFIG_NETWORK_SECMARK
static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
                                      struct request_sock *req)
{
        struct aa_sk_ctx *ctx = aa_sock(sk);

        if (!skb->secmark)
                return 0;

        return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
                                      skb->secmark, sk);
}
#endif

/*
 * The cred blob is a pointer to, not an instance of, an aa_label.
 */
struct lsm_blob_sizes apparmor_blob_sizes __ro_after_init = {
        .lbs_cred = sizeof(struct aa_label *),
        .lbs_file = sizeof(struct aa_file_ctx),
        .lbs_task = sizeof(struct aa_task_ctx),
};

static const struct lsm_id apparmor_lsmid = {
        .name = "apparmor",
        .id = LSM_ID_APPARMOR,
};

static struct security_hook_list apparmor_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
        LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
        LSM_HOOK_INIT(capget, apparmor_capget),
        LSM_HOOK_INIT(capable, apparmor_capable),

        LSM_HOOK_INIT(move_mount, apparmor_move_mount),
        LSM_HOOK_INIT(sb_mount, apparmor_sb_mount),
        LSM_HOOK_INIT(sb_umount, apparmor_sb_umount),
        LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot),

        LSM_HOOK_INIT(path_link, apparmor_path_link),
        LSM_HOOK_INIT(path_unlink, apparmor_path_unlink),
        LSM_HOOK_INIT(path_symlink, apparmor_path_symlink),
        LSM_HOOK_INIT(path_mkdir, apparmor_path_mkdir),
        LSM_HOOK_INIT(path_rmdir, apparmor_path_rmdir),
        LSM_HOOK_INIT(path_mknod, apparmor_path_mknod),
        LSM_HOOK_INIT(path_rename, apparmor_path_rename),
        LSM_HOOK_INIT(path_chmod, apparmor_path_chmod),
        LSM_HOOK_INIT(path_chown, apparmor_path_chown),
        LSM_HOOK_INIT(path_truncate, apparmor_path_truncate),
        LSM_HOOK_INIT(inode_getattr, apparmor_inode_getattr),

        LSM_HOOK_INIT(file_open, apparmor_file_open),
        LSM_HOOK_INIT(file_receive, apparmor_file_receive),
        LSM_HOOK_INIT(file_permission, apparmor_file_permission),
        LSM_HOOK_INIT(file_alloc_security, apparmor_file_alloc_security),
        LSM_HOOK_INIT(file_free_security, apparmor_file_free_security),
        LSM_HOOK_INIT(mmap_file, apparmor_mmap_file),
        LSM_HOOK_INIT(file_mprotect, apparmor_file_mprotect),
        LSM_HOOK_INIT(file_lock, apparmor_file_lock),
        LSM_HOOK_INIT(file_truncate, apparmor_file_truncate),

        LSM_HOOK_INIT(getselfattr, apparmor_getselfattr),
        LSM_HOOK_INIT(setselfattr, apparmor_setselfattr),
        LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
        LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),

        LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
        LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
        LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),

        LSM_HOOK_INIT(socket_create, apparmor_socket_create),
        LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
        LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
        LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
        LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
        LSM_HOOK_INIT(socket_accept, apparmor_socket_accept),
        LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg),
        LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg),
        LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname),
        LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername),
        LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
        LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
        LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
#ifdef CONFIG_NETWORK_SECMARK
        LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
#endif
        LSM_HOOK_INIT(socket_getpeersec_stream,
                      apparmor_socket_getpeersec_stream),
        LSM_HOOK_INIT(socket_getpeersec_dgram,
                      apparmor_socket_getpeersec_dgram),
        LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
#ifdef CONFIG_NETWORK_SECMARK
        LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
#endif

        LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
        LSM_HOOK_INIT(cred_free, apparmor_cred_free),
        LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare),
        LSM_HOOK_INIT(cred_transfer, apparmor_cred_transfer),

        LSM_HOOK_INIT(bprm_creds_for_exec, apparmor_bprm_creds_for_exec),
        LSM_HOOK_INIT(bprm_committing_creds, apparmor_bprm_committing_creds),
        LSM_HOOK_INIT(bprm_committed_creds, apparmor_bprm_committed_creds),

        LSM_HOOK_INIT(task_free, apparmor_task_free),
        LSM_HOOK_INIT(task_alloc, apparmor_task_alloc),
        LSM_HOOK_INIT(current_getsecid_subj, apparmor_current_getsecid_subj),
        LSM_HOOK_INIT(task_getsecid_obj, apparmor_task_getsecid_obj),
        LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit),
        LSM_HOOK_INIT(task_kill, apparmor_task_kill),
        LSM_HOOK_INIT(userns_create, apparmor_userns_create),

#ifdef CONFIG_AUDIT
        LSM_HOOK_INIT(audit_rule_init, aa_audit_rule_init),
        LSM_HOOK_INIT(audit_rule_known, aa_audit_rule_known),
        LSM_HOOK_INIT(audit_rule_match, aa_audit_rule_match),
        LSM_HOOK_INIT(audit_rule_free, aa_audit_rule_free),
#endif

        LSM_HOOK_INIT(secid_to_secctx, apparmor_secid_to_secctx),
        LSM_HOOK_INIT(secctx_to_secid, apparmor_secctx_to_secid),
        LSM_HOOK_INIT(release_secctx, apparmor_release_secctx),

#ifdef CONFIG_IO_URING
        LSM_HOOK_INIT(uring_override_creds, apparmor_uring_override_creds),
        LSM_HOOK_INIT(uring_sqpoll, apparmor_uring_sqpoll),
#endif
};

/*
 * AppArmor sysfs module parameters
 */

static int param_set_aabool(const char *val, const struct kernel_param *kp);
static int param_get_aabool(char *buffer, const struct kernel_param *kp);
#define param_check_aabool param_check_bool
static const struct kernel_param_ops param_ops_aabool = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_aabool,
        .get = param_get_aabool
};

static int param_set_aauint(const char *val, const struct kernel_param *kp);
static int param_get_aauint(char *buffer, const struct kernel_param *kp);
#define param_check_aauint param_check_uint
static const struct kernel_param_ops param_ops_aauint = {
        .set = param_set_aauint,
        .get = param_get_aauint
};

static int param_set_aacompressionlevel(const char *val,
                                        const struct kernel_param *kp);
static int param_get_aacompressionlevel(char *buffer,
                                        const struct kernel_param *kp);
#define param_check_aacompressionlevel param_check_int
static const struct kernel_param_ops param_ops_aacompressionlevel = {
        .set = param_set_aacompressionlevel,
        .get = param_get_aacompressionlevel
};

static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp);
static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
#define param_check_aalockpolicy param_check_bool
static const struct kernel_param_ops param_ops_aalockpolicy = {
        .flags = KERNEL_PARAM_OPS_FL_NOARG,
        .set = param_set_aalockpolicy,
        .get = param_get_aalockpolicy
};

static int param_set_audit(const char *val, const struct kernel_param *kp);
static int param_get_audit(char *buffer, const struct kernel_param *kp);

static int param_set_mode(const char *val, const struct kernel_param *kp);
static int param_get_mode(char *buffer, const struct kernel_param *kp);

/* Flag values, also controllable via /sys/module/apparmor/parameters
 * We define special types as we want to do additional mediation.
 */

/* AppArmor global enforcement switch - complain, enforce, kill */
enum profile_mode aa_g_profile_mode = APPARMOR_ENFORCE;
module_param_call(mode, param_set_mode, param_get_mode,
                  &aa_g_profile_mode, S_IRUSR | S_IWUSR);

/* whether policy verification hashing is enabled */
bool aa_g_hash_policy = IS_ENABLED(CONFIG_SECURITY_APPARMOR_HASH_DEFAULT);
#ifdef CONFIG_SECURITY_APPARMOR_HASH
module_param_named(hash_policy, aa_g_hash_policy, aabool, S_IRUSR | S_IWUSR);
#endif

/* whether policy exactly as loaded is retained for debug and checkpointing */
bool aa_g_export_binary = IS_ENABLED(CONFIG_SECURITY_APPARMOR_EXPORT_BINARY);
#ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
module_param_named(export_binary, aa_g_export_binary, aabool, 0600);
#endif

/* policy loaddata compression level */
int aa_g_rawdata_compression_level = AA_DEFAULT_CLEVEL;
module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level,
                   aacompressionlevel, 0400);

/* Debug mode */
bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_APPARMOR_DEBUG_MESSAGES);
module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR);

/* Audit mode */
enum audit_mode aa_g_audit;
module_param_call(audit, param_set_audit, param_get_audit,
                  &aa_g_audit, S_IRUSR | S_IWUSR);

/* Determines if audit header is included in audited messages.  This
 * provides more context if the audit daemon is not running
 */
bool aa_g_audit_header = true;
module_param_named(audit_header, aa_g_audit_header, aabool,
                   S_IRUSR | S_IWUSR);

/* lock out loading/removal of policy
 * TODO: add in at boot loading of policy, which is the only way to
 *       load policy, if lock_policy is set
 */
bool aa_g_lock_policy;
module_param_named(lock_policy, aa_g_lock_policy, aalockpolicy,
                   S_IRUSR | S_IWUSR);

/* Syscall logging mode */
bool aa_g_logsyscall;
module_param_named(logsyscall, aa_g_logsyscall, aabool, S_IRUSR | S_IWUSR);

/* Maximum pathname length before accesses will start getting rejected */
unsigned int aa_g_path_max = 2 * PATH_MAX;
module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR);

/* Determines how paranoid loading of policy is and how much verification
 * on the loaded policy is done.
 * DEPRECATED: read only as strict checking of load is always done now
 * that none root users (user namespaces) can load policy.
 */
bool aa_g_paranoid_load = IS_ENABLED(CONFIG_SECURITY_APPARMOR_PARANOID_LOAD);
module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO);

static int param_get_aaintbool(char *buffer, const struct kernel_param *kp);
static int param_set_aaintbool(const char *val, const struct kernel_param *kp);
#define param_check_aaintbool param_check_int
static const struct kernel_param_ops param_ops_aaintbool = {
        .set = param_set_aaintbool,
        .get = param_get_aaintbool
};
/* Boot time disable flag */
static int apparmor_enabled __ro_after_init = 1;
module_param_named(enabled, apparmor_enabled, aaintbool, 0444);

static int __init apparmor_enabled_setup(char *str)
{
        unsigned long enabled;
        int error = kstrtoul(str, 0, &enabled);
        if (!error)
                apparmor_enabled = enabled ? 1 : 0;
        return 1;
}

__setup("apparmor=", apparmor_enabled_setup);

/* set global flag turning off the ability to load policy */
static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_admin_capable(NULL))
                return -EPERM;
        return param_set_bool(val, kp);
}

static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;
        return param_get_bool(buffer, kp);
}

static int param_set_aabool(const char *val, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_admin_capable(NULL))
                return -EPERM;
        return param_set_bool(val, kp);
}

static int param_get_aabool(char *buffer, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;
        return param_get_bool(buffer, kp);
}

static int param_set_aauint(const char *val, const struct kernel_param *kp)
{
        int error;

        if (!apparmor_enabled)
                return -EINVAL;
        /* file is ro but enforce 2nd line check */
        if (apparmor_initialized)
                return -EPERM;

        error = param_set_uint(val, kp);
        aa_g_path_max = max_t(uint32_t, aa_g_path_max, sizeof(union aa_buffer));
        pr_info("AppArmor: buffer size set to %d bytes\n", aa_g_path_max);

        return error;
}

static int param_get_aauint(char *buffer, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;
        return param_get_uint(buffer, kp);
}

/* Can only be set before AppArmor is initialized (i.e. on boot cmdline). */
static int param_set_aaintbool(const char *val, const struct kernel_param *kp)
{
        struct kernel_param kp_local;
        bool value;
        int error;

        if (apparmor_initialized)
                return -EPERM;

        /* Create local copy, with arg pointing to bool type. */
        value = !!*((int *)kp->arg);
        memcpy(&kp_local, kp, sizeof(kp_local));
        kp_local.arg = &value;

        error = param_set_bool(val, &kp_local);
        if (!error)
                *((int *)kp->arg) = *((bool *)kp_local.arg);
        return error;
}

/*
 * To avoid changing /sys/module/apparmor/parameters/enabled from Y/N to
 * 1/0, this converts the "int that is actually bool" back to bool for
 * display in the /sys filesystem, while keeping it "int" for the LSM
 * infrastructure.
 */
static int param_get_aaintbool(char *buffer, const struct kernel_param *kp)
{
        struct kernel_param kp_local;
        bool value;

        /* Create local copy, with arg pointing to bool type. */
        value = !!*((int *)kp->arg);
        memcpy(&kp_local, kp, sizeof(kp_local));
        kp_local.arg = &value;

        return param_get_bool(buffer, &kp_local);
}

static int param_set_aacompressionlevel(const char *val,
                                        const struct kernel_param *kp)
{
        int error;

        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized)
                return -EPERM;

        error = param_set_int(val, kp);

        aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level,
                                               AA_MIN_CLEVEL, AA_MAX_CLEVEL);
        pr_info("AppArmor: policy rawdata compression level set to %d\n",
                aa_g_rawdata_compression_level);

        return error;
}

static int param_get_aacompressionlevel(char *buffer,
                                        const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;
        return param_get_int(buffer, kp);
}

static int param_get_audit(char *buffer, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;
        return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]);
}

static int param_set_audit(const char *val, const struct kernel_param *kp)
{
        int i;

        if (!apparmor_enabled)
                return -EINVAL;
        if (!val)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_admin_capable(NULL))
                return -EPERM;

        i = match_string(audit_mode_names, AUDIT_MAX_INDEX, val);
        if (i < 0)
                return -EINVAL;

        aa_g_audit = i;
        return 0;
}

static int param_get_mode(char *buffer, const struct kernel_param *kp)
{
        if (!apparmor_enabled)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
                return -EPERM;

        return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]);
}

static int param_set_mode(const char *val, const struct kernel_param *kp)
{
        int i;

        if (!apparmor_enabled)
                return -EINVAL;
        if (!val)
                return -EINVAL;
        if (apparmor_initialized && !aa_current_policy_admin_capable(NULL))
                return -EPERM;

        i = match_string(aa_profile_mode_names, APPARMOR_MODE_NAMES_MAX_INDEX,
                         val);
        if (i < 0)
                return -EINVAL;

        aa_g_profile_mode = i;
        return 0;
}

char *aa_get_buffer(bool in_atomic)
{
        union aa_buffer *aa_buf;
        struct aa_local_cache *cache;
        bool try_again = true;
        gfp_t flags = (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);

        /* use per cpu cached buffers first */
        cache = get_cpu_ptr(&aa_local_buffers);
        if (!list_empty(&cache->head)) {
                aa_buf = list_first_entry(&cache->head, union aa_buffer, list);
                list_del(&aa_buf->list);
                cache->hold--;
                cache->count--;
                put_cpu_ptr(&aa_local_buffers);
                return &aa_buf->buffer[0];
        }
        put_cpu_ptr(&aa_local_buffers);

        if (!spin_trylock(&aa_buffers_lock)) {
                cache = get_cpu_ptr(&aa_local_buffers);
                cache->hold += 1;
                put_cpu_ptr(&aa_local_buffers);
                spin_lock(&aa_buffers_lock);
        } else {
                cache = get_cpu_ptr(&aa_local_buffers);
                put_cpu_ptr(&aa_local_buffers);
        }
retry:
        if (buffer_count > reserve_count ||
            (in_atomic && !list_empty(&aa_global_buffers))) {
                aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer,
                                          list);
                list_del(&aa_buf->list);
                buffer_count--;
                spin_unlock(&aa_buffers_lock);
                return aa_buf->buffer;
        }
        if (in_atomic) {
                /*
                 * out of reserve buffers and in atomic context so increase
                 * how many buffers to keep in reserve
                 */
                reserve_count++;
                flags = GFP_ATOMIC;
        }
        spin_unlock(&aa_buffers_lock);

        if (!in_atomic)
                might_sleep();
        aa_buf = kmalloc(aa_g_path_max, flags);
        if (!aa_buf) {
                if (try_again) {
                        try_again = false;
                        spin_lock(&aa_buffers_lock);
                        goto retry;
                }
                pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n");
                return NULL;
        }
        return aa_buf->buffer;
}

void aa_put_buffer(char *buf)
{
        union aa_buffer *aa_buf;
        struct aa_local_cache *cache;

        if (!buf)
                return;
        aa_buf = container_of(buf, union aa_buffer, buffer[0]);

        cache = get_cpu_ptr(&aa_local_buffers);
        if (!cache->hold) {
                put_cpu_ptr(&aa_local_buffers);

                if (spin_trylock(&aa_buffers_lock)) {
                        /* put back on global list */
                        list_add(&aa_buf->list, &aa_global_buffers);
                        buffer_count++;
                        spin_unlock(&aa_buffers_lock);
                        cache = get_cpu_ptr(&aa_local_buffers);
                        put_cpu_ptr(&aa_local_buffers);
                        return;
                }
                /* contention on global list, fallback to percpu */
                cache = get_cpu_ptr(&aa_local_buffers);
                cache->hold += 1;
        }

        /* cache in percpu list */
        list_add(&aa_buf->list, &cache->head);
        cache->count++;
        put_cpu_ptr(&aa_local_buffers);
}

/*
 * AppArmor init functions
 */

/**
 * set_init_ctx - set a task context and profile on the first task.
 *
 * TODO: allow setting an alternate profile than unconfined
 */
static int __init set_init_ctx(void)
{
        struct cred *cred = (__force struct cred *)current->real_cred;

        set_cred_label(cred, aa_get_label(ns_unconfined(root_ns)));

        return 0;
}

static void destroy_buffers(void)
{
        union aa_buffer *aa_buf;

        spin_lock(&aa_buffers_lock);
        while (!list_empty(&aa_global_buffers)) {
                aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer,
                                         list);
                list_del(&aa_buf->list);
                spin_unlock(&aa_buffers_lock);
                kfree(aa_buf);
                spin_lock(&aa_buffers_lock);
        }
        spin_unlock(&aa_buffers_lock);
}

static int __init alloc_buffers(void)
{
        union aa_buffer *aa_buf;
        int i, num;

        /*
         * per cpu set of cached allocated buffers used to help reduce
         * lock contention
         */
        for_each_possible_cpu(i) {
                per_cpu(aa_local_buffers, i).hold = 0;
                per_cpu(aa_local_buffers, i).count = 0;
                INIT_LIST_HEAD(&per_cpu(aa_local_buffers, i).head);
        }
        /*
         * A function may require two buffers at once. Usually the buffers are
         * used for a short period of time and are shared. On UP kernel buffers
         * two should be enough, with more CPUs it is possible that more
         * buffers will be used simultaneously. The preallocated pool may grow.
         * This preallocation has also the side-effect that AppArmor will be
         * disabled early at boot if aa_g_path_max is extremly high.
         */
        if (num_online_cpus() > 1)
                num = 4 + RESERVE_COUNT;
        else
                num = 2 + RESERVE_COUNT;

        for (i = 0; i < num; i++) {

                aa_buf = kmalloc(aa_g_path_max, GFP_KERNEL |
                                 __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
                if (!aa_buf) {
                        destroy_buffers();
                        return -ENOMEM;
                }
                aa_put_buffer(aa_buf->buffer);
        }
        return 0;
}

#ifdef CONFIG_SYSCTL
static int apparmor_dointvec(struct ctl_table *table, int write,
                             void *buffer, size_t *lenp, loff_t *ppos)
{
        if (!aa_current_policy_admin_capable(NULL))
                return -EPERM;
        if (!apparmor_enabled)
                return -EINVAL;

        return proc_dointvec(table, write, buffer, lenp, ppos);
}

static struct ctl_table apparmor_sysctl_table[] = {
#ifdef CONFIG_USER_NS
        {
                .procname       = "unprivileged_userns_apparmor_policy",
                .data           = &unprivileged_userns_apparmor_policy,
                .maxlen         = sizeof(int),
                .mode           = 0600,
                .proc_handler   = apparmor_dointvec,
        },
#endif /* CONFIG_USER_NS */
        {
                .procname       = "apparmor_display_secid_mode",
                .data           = &apparmor_display_secid_mode,
                .maxlen         = sizeof(int),
                .mode           = 0600,
                .proc_handler   = apparmor_dointvec,
        },
        {
                .procname       = "apparmor_restrict_unprivileged_unconfined",
                .data           = &aa_unprivileged_unconfined_restricted,
                .maxlen         = sizeof(int),
                .mode           = 0600,
                .proc_handler   = apparmor_dointvec,
        },
        { }
};

static int __init apparmor_init_sysctl(void)
{
        return register_sysctl("kernel", apparmor_sysctl_table) ? 0 : -ENOMEM;
}
#else
static inline int apparmor_init_sysctl(void)
{
        return 0;
}
#endif /* CONFIG_SYSCTL */

#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK)
static unsigned int apparmor_ip_postroute(void *priv,
                                          struct sk_buff *skb,
                                          const struct nf_hook_state *state)
{
        struct aa_sk_ctx *ctx;
        struct sock *sk;

        if (!skb->secmark)
                return NF_ACCEPT;

        sk = skb_to_full_sk(skb);
        if (sk == NULL)
                return NF_ACCEPT;

        ctx = aa_sock(sk);
        if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND,
                                    skb->secmark, sk))
                return NF_ACCEPT;

        return NF_DROP_ERR(-ECONNREFUSED);

}

static const struct nf_hook_ops apparmor_nf_ops[] = {
        {
                .hook =         apparmor_ip_postroute,
                .pf =           NFPROTO_IPV4,
                .hooknum =      NF_INET_POST_ROUTING,
                .priority =     NF_IP_PRI_SELINUX_FIRST,
        },
#if IS_ENABLED(CONFIG_IPV6)
        {
                .hook =         apparmor_ip_postroute,
                .pf =           NFPROTO_IPV6,
                .hooknum =      NF_INET_POST_ROUTING,
                .priority =     NF_IP6_PRI_SELINUX_FIRST,
        },
#endif
};

static int __net_init apparmor_nf_register(struct net *net)
{
        return nf_register_net_hooks(net, apparmor_nf_ops,
                                    ARRAY_SIZE(apparmor_nf_ops));
}

static void __net_exit apparmor_nf_unregister(struct net *net)
{
        nf_unregister_net_hooks(net, apparmor_nf_ops,
                                ARRAY_SIZE(apparmor_nf_ops));
}

static struct pernet_operations apparmor_net_ops = {
        .init = apparmor_nf_register,
        .exit = apparmor_nf_unregister,
};

static int __init apparmor_nf_ip_init(void)
{
        int err;

        if (!apparmor_enabled)
                return 0;

        err = register_pernet_subsys(&apparmor_net_ops);
        if (err)
                panic("Apparmor: register_pernet_subsys: error %d\n", err);

        return 0;
}
__initcall(apparmor_nf_ip_init);
#endif

static char nulldfa_src[] = {
        #include "nulldfa.in"
};
static struct aa_dfa *nulldfa;

static char stacksplitdfa_src[] = {
        #include "stacksplitdfa.in"
};
struct aa_dfa *stacksplitdfa;
struct aa_policydb *nullpdb;

static int __init aa_setup_dfa_engine(void)
{
        int error = -ENOMEM;

        nullpdb = aa_alloc_pdb(GFP_KERNEL);
        if (!nullpdb)
                return -ENOMEM;

        nulldfa = aa_dfa_unpack(nulldfa_src, sizeof(nulldfa_src),
                            TO_ACCEPT1_FLAG(YYTD_DATA32) |
                            TO_ACCEPT2_FLAG(YYTD_DATA32));
        if (IS_ERR(nulldfa)) {
                error = PTR_ERR(nulldfa);
                goto fail;
        }
        nullpdb->dfa = aa_get_dfa(nulldfa);
        nullpdb->perms = kcalloc(2, sizeof(struct aa_perms), GFP_KERNEL);
        if (!nullpdb->perms)
                goto fail;
        nullpdb->size = 2;

        stacksplitdfa = aa_dfa_unpack(stacksplitdfa_src,
                                      sizeof(stacksplitdfa_src),
                                      TO_ACCEPT1_FLAG(YYTD_DATA32) |
                                      TO_ACCEPT2_FLAG(YYTD_DATA32));
        if (IS_ERR(stacksplitdfa)) {
                error = PTR_ERR(stacksplitdfa);
                goto fail;
        }

        return 0;

fail:
        aa_put_pdb(nullpdb);
        aa_put_dfa(nulldfa);
        nullpdb = NULL;
        nulldfa = NULL;
        stacksplitdfa = NULL;

        return error;
}

static void __init aa_teardown_dfa_engine(void)
{
        aa_put_dfa(stacksplitdfa);
        aa_put_dfa(nulldfa);
        aa_put_pdb(nullpdb);
        nullpdb = NULL;
        stacksplitdfa = NULL;
        nulldfa = NULL;
}

static int __init apparmor_init(void)
{
        int error;

        error = aa_setup_dfa_engine();
        if (error) {
                AA_ERROR("Unable to setup dfa engine\n");
                goto alloc_out;
        }

        error = aa_alloc_root_ns();
        if (error) {
                AA_ERROR("Unable to allocate default profile namespace\n");
                goto alloc_out;
        }

        error = apparmor_init_sysctl();
        if (error) {
                AA_ERROR("Unable to register sysctls\n");
                goto alloc_out;

        }

        error = alloc_buffers();
        if (error) {
                AA_ERROR("Unable to allocate work buffers\n");
                goto alloc_out;
        }

        error = set_init_ctx();
        if (error) {
                AA_ERROR("Failed to set context on init task\n");
                aa_free_root_ns();
                goto buffers_out;
        }
        security_add_hooks(apparmor_hooks, ARRAY_SIZE(apparmor_hooks),
                                &apparmor_lsmid);

        /* Report that AppArmor successfully initialized */
        apparmor_initialized = 1;
        if (aa_g_profile_mode == APPARMOR_COMPLAIN)
                aa_info_message("AppArmor initialized: complain mode enabled");
        else if (aa_g_profile_mode == APPARMOR_KILL)
                aa_info_message("AppArmor initialized: kill mode enabled");
        else
                aa_info_message("AppArmor initialized");

        return error;

buffers_out:
        destroy_buffers();
alloc_out:
        aa_destroy_aafs();
        aa_teardown_dfa_engine();

        apparmor_enabled = false;
        return error;
}

DEFINE_LSM(apparmor) = {
        .name = "apparmor",
        .flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
        .enabled = &apparmor_enabled,
        .blobs = &apparmor_blob_sizes,
        .init = apparmor_init,
};


























































































































































































































































































































































































































   10 

   10 



   10 

   10 















































































































































































































































































































































































































































































































































































































































    3 

    3 







    3 
    3 
    3 




































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * KVM paravirt_ops implementation
 *
 * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 * Copyright IBM Corporation, 2007
 *   Authors: Anthony Liguori <aliguori@us.ibm.com>
 */

#define pr_fmt(fmt) "kvm-guest: " fmt

#include <linux/context_tracking.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hardirq.h>
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/hash.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kprobes.h>
#include <linux/nmi.h>
#include <linux/swait.h>
#include <linux/syscore_ops.h>
#include <linux/cc_platform.h>
#include <linux/efi.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>
#include <asm/e820/api.h>

DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);

static int kvmapf = 1;

static int __init parse_no_kvmapf(char *arg)
{
        kvmapf = 0;
        return 0;
}

early_param("no-kvmapf", parse_no_kvmapf);

static int steal_acc = 1;
static int __init parse_no_stealacc(char *arg)
{
        steal_acc = 0;
        return 0;
}

early_param("no-steal-acc", parse_no_stealacc);

static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;

static int has_guest_poll = 0;
/*
 * No need for any "IO delay" on KVM
 */
static void kvm_io_delay(void)
{
}

#define KVM_TASK_SLEEP_HASHBITS 8
#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)

struct kvm_task_sleep_node {
        struct hlist_node link;
        struct swait_queue_head wq;
        u32 token;
        int cpu;
};

static struct kvm_task_sleep_head {
        raw_spinlock_t lock;
        struct hlist_head list;
} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];

static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
                                                  u32 token)
{
        struct hlist_node *p;

        hlist_for_each(p, &b->list) {
                struct kvm_task_sleep_node *n =
                        hlist_entry(p, typeof(*n), link);
                if (n->token == token)
                        return n;
        }

        return NULL;
}

static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
{
        u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
        struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
        struct kvm_task_sleep_node *e;

        raw_spin_lock(&b->lock);
        e = _find_apf_task(b, token);
        if (e) {
                /* dummy entry exist -> wake up was delivered ahead of PF */
                hlist_del(&e->link);
                raw_spin_unlock(&b->lock);
                kfree(e);
                return false;
        }

        n->token = token;
        n->cpu = smp_processor_id();
        init_swait_queue_head(&n->wq);
        hlist_add_head(&n->link, &b->list);
        raw_spin_unlock(&b->lock);
        return true;
}

/*
 * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
 * @token:        Token to identify the sleep node entry
 *
 * Invoked from the async pagefault handling code or from the VM exit page
 * fault handler. In both cases RCU is watching.
 */
void kvm_async_pf_task_wait_schedule(u32 token)
{
        struct kvm_task_sleep_node n;
        DECLARE_SWAITQUEUE(wait);

        lockdep_assert_irqs_disabled();

        if (!kvm_async_pf_queue_task(token, &n))
                return;

        for (;;) {
                prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
                if (hlist_unhashed(&n.link))
                        break;

                local_irq_enable();
                schedule();
                local_irq_disable();
        }
        finish_swait(&n.wq, &wait);
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);

static void apf_task_wake_one(struct kvm_task_sleep_node *n)
{
        hlist_del_init(&n->link);
        if (swq_has_sleeper(&n->wq))
                swake_up_one(&n->wq);
}

static void apf_task_wake_all(void)
{
        int i;

        for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
                struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
                struct kvm_task_sleep_node *n;
                struct hlist_node *p, *next;

                raw_spin_lock(&b->lock);
                hlist_for_each_safe(p, next, &b->list) {
                        n = hlist_entry(p, typeof(*n), link);
                        if (n->cpu == smp_processor_id())
                                apf_task_wake_one(n);
                }
                raw_spin_unlock(&b->lock);
        }
}

void kvm_async_pf_task_wake(u32 token)
{
        u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
        struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
        struct kvm_task_sleep_node *n, *dummy = NULL;

        if (token == ~0) {
                apf_task_wake_all();
                return;
        }

again:
        raw_spin_lock(&b->lock);
        n = _find_apf_task(b, token);
        if (!n) {
                /*
                 * Async #PF not yet handled, add a dummy entry for the token.
                 * Allocating the token must be down outside of the raw lock
                 * as the allocator is preemptible on PREEMPT_RT kernels.
                 */
                if (!dummy) {
                        raw_spin_unlock(&b->lock);
                        dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);

                        /*
                         * Continue looping on allocation failure, eventually
                         * the async #PF will be handled and allocating a new
                         * node will be unnecessary.
                         */
                        if (!dummy)
                                cpu_relax();

                        /*
                         * Recheck for async #PF completion before enqueueing
                         * the dummy token to avoid duplicate list entries.
                         */
                        goto again;
                }
                dummy->token = token;
                dummy->cpu = smp_processor_id();
                init_swait_queue_head(&dummy->wq);
                hlist_add_head(&dummy->link, &b->list);
                dummy = NULL;
        } else {
                apf_task_wake_one(n);
        }
        raw_spin_unlock(&b->lock);

        /* A dummy token might be allocated and ultimately not used.  */
        kfree(dummy);
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);

noinstr u32 kvm_read_and_reset_apf_flags(void)
{
        u32 flags = 0;

        if (__this_cpu_read(async_pf_enabled)) {
                flags = __this_cpu_read(apf_reason.flags);
                __this_cpu_write(apf_reason.flags, 0);
        }

        return flags;
}
EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);

noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
        u32 flags = kvm_read_and_reset_apf_flags();
        irqentry_state_t state;

        if (!flags)
                return false;

        state = irqentry_enter(regs);
        instrumentation_begin();

        /*
         * If the host managed to inject an async #PF into an interrupt
         * disabled region, then die hard as this is not going to end well
         * and the host side is seriously broken.
         */
        if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
                panic("Host injected async #PF in interrupt disabled region\n");

        if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
                if (unlikely(!(user_mode(regs))))
                        panic("Host injected async #PF in kernel mode\n");
                /* Page is swapped out by the host. */
                kvm_async_pf_task_wait_schedule(token);
        } else {
                WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
        }

        instrumentation_end();
        irqentry_exit(regs, state);
        return true;
}

DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
{
        struct pt_regs *old_regs = set_irq_regs(regs);
        u32 token;

        apic_eoi();

        inc_irq_stat(irq_hv_callback_count);

        if (__this_cpu_read(async_pf_enabled)) {
                token = __this_cpu_read(apf_reason.token);
                kvm_async_pf_task_wake(token);
                __this_cpu_write(apf_reason.token, 0);
                wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
        }

        set_irq_regs(old_regs);
}

static void __init paravirt_ops_setup(void)
{
        pv_info.name = "KVM";

        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_ops.cpu.io_delay = kvm_io_delay;

#ifdef CONFIG_X86_IO_APIC
        no_timer_check = 1;
#endif
}

static void kvm_register_steal_time(void)
{
        int cpu = smp_processor_id();
        struct kvm_steal_time *st = &per_cpu(steal_time, cpu);

        if (!has_steal_clock)
                return;

        wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
        pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
                (unsigned long long) slow_virt_to_phys(st));
}

static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;

static notrace __maybe_unused void kvm_guest_apic_eoi_write(void)
{
        /**
         * This relies on __test_and_clear_bit to modify the memory
         * in a way that is atomic with respect to the local CPU.
         * The hypervisor only accesses this memory from the local CPU so
         * there's no need for lock or memory barriers.
         * An optimization barrier is implied in apic write.
         */
        if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
                return;
        apic_native_eoi();
}

static void kvm_guest_cpu_init(void)
{
        if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
                u64 pa;

                WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));

                pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
                pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;

                if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
                        pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;

                wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);

                wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                __this_cpu_write(async_pf_enabled, true);
                pr_debug("setup async PF for cpu %d\n", smp_processor_id());
        }

        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
                unsigned long pa;

                /* Size alignment is implied but just to make it explicit. */
                BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
                __this_cpu_write(kvm_apic_eoi, 0);
                pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
                        | KVM_MSR_ENABLED;
                wrmsrl(MSR_KVM_PV_EOI_EN, pa);
        }

        if (has_steal_clock)
                kvm_register_steal_time();
}

static void kvm_pv_disable_apf(void)
{
        if (!__this_cpu_read(async_pf_enabled))
                return;

        wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
        __this_cpu_write(async_pf_enabled, false);

        pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}

static void kvm_disable_steal_time(void)
{
        if (!has_steal_clock)
                return;

        wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}

static u64 kvm_steal_clock(int cpu)
{
        u64 steal;
        struct kvm_steal_time *src;
        int version;

        src = &per_cpu(steal_time, cpu);
        do {
                version = src->version;
                virt_rmb();
                steal = src->steal;
                virt_rmb();
        } while ((version & 1) || (version != src->version));

        return steal;
}

static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
{
        early_set_memory_decrypted((unsigned long) ptr, size);
}

/*
 * Iterate through all possible CPUs and map the memory region pointed
 * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
 *
 * Note: we iterate through all possible CPUs to ensure that CPUs
 * hotplugged will have their per-cpu variable already mapped as
 * decrypted.
 */
static void __init sev_map_percpu_data(void)
{
        int cpu;

        if (cc_vendor != CC_VENDOR_AMD ||
            !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
                return;

        for_each_possible_cpu(cpu) {
                __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
                __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
                __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
        }
}

static void kvm_guest_cpu_offline(bool shutdown)
{
        kvm_disable_steal_time();
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                wrmsrl(MSR_KVM_PV_EOI_EN, 0);
        if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
                wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
        kvm_pv_disable_apf();
        if (!shutdown)
                apf_task_wake_all();
        kvmclock_disable();
}

static int kvm_cpu_online(unsigned int cpu)
{
        unsigned long flags;

        local_irq_save(flags);
        kvm_guest_cpu_init();
        local_irq_restore(flags);
        return 0;
}

#ifdef CONFIG_SMP

static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);

static bool pv_tlb_flush_supported(void)
{
        return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
                !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
                kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
                !boot_cpu_has(X86_FEATURE_MWAIT) &&
                (num_possible_cpus() != 1));
}

static bool pv_ipi_supported(void)
{
        return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
               (num_possible_cpus() != 1));
}

static bool pv_sched_yield_supported(void)
{
        return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
                !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
            kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
            !boot_cpu_has(X86_FEATURE_MWAIT) &&
            (num_possible_cpus() != 1));
}

#define KVM_IPI_CLUSTER_SIZE        (2 * BITS_PER_LONG)

static void __send_ipi_mask(const struct cpumask *mask, int vector)
{
        unsigned long flags;
        int cpu, min = 0, max = 0;
#ifdef CONFIG_X86_64
        __uint128_t ipi_bitmap = 0;
#else
        u64 ipi_bitmap = 0;
#endif
        u32 apic_id, icr;
        long ret;

        if (cpumask_empty(mask))
                return;

        local_irq_save(flags);

        switch (vector) {
        default:
                icr = APIC_DM_FIXED | vector;
                break;
        case NMI_VECTOR:
                icr = APIC_DM_NMI;
                break;
        }

        for_each_cpu(cpu, mask) {
                apic_id = per_cpu(x86_cpu_to_apicid, cpu);
                if (!ipi_bitmap) {
                        min = max = apic_id;
                } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
                        ipi_bitmap <<= min - apic_id;
                        min = apic_id;
                } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) {
                        max = apic_id < max ? max : apic_id;
                } else {
                        ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
                                (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
                        WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
                                  ret);
                        min = max = apic_id;
                        ipi_bitmap = 0;
                }
                __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
        }

        if (ipi_bitmap) {
                ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
                        (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
                WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
                          ret);
        }

        local_irq_restore(flags);
}

static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
{
        __send_ipi_mask(mask, vector);
}

static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
        unsigned int this_cpu = smp_processor_id();
        struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
        const struct cpumask *local_mask;

        cpumask_copy(new_mask, mask);
        cpumask_clear_cpu(this_cpu, new_mask);
        local_mask = new_mask;
        __send_ipi_mask(local_mask, vector);
}

static int __init setup_efi_kvm_sev_migration(void)
{
        efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
        efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
        efi_status_t status;
        unsigned long size;
        bool enabled;

        if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
            !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
                return 0;

        if (!efi_enabled(EFI_BOOT))
                return 0;

        if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
                pr_info("%s : EFI runtime services are not enabled\n", __func__);
                return 0;
        }

        size = sizeof(enabled);

        /* Get variable contents into buffer */
        status = efi.get_variable(efi_sev_live_migration_enabled,
                                  &efi_variable_guid, NULL, &size, &enabled);

        if (status == EFI_NOT_FOUND) {
                pr_info("%s : EFI live migration variable not found\n", __func__);
                return 0;
        }

        if (status != EFI_SUCCESS) {
                pr_info("%s : EFI variable retrieval failed\n", __func__);
                return 0;
        }

        if (enabled == 0) {
                pr_info("%s: live migration disabled in EFI\n", __func__);
                return 0;
        }

        pr_info("%s : live migration enabled in EFI\n", __func__);
        wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);

        return 1;
}

late_initcall(setup_efi_kvm_sev_migration);

/*
 * Set the IPI entry points
 */
static __init void kvm_setup_pv_ipi(void)
{
        apic_update_callback(send_IPI_mask, kvm_send_ipi_mask);
        apic_update_callback(send_IPI_mask_allbutself, kvm_send_ipi_mask_allbutself);
        pr_info("setup PV IPIs\n");
}

static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
{
        int cpu;

        native_send_call_func_ipi(mask);

        /* Make sure other vCPUs get a chance to run if they need to. */
        for_each_cpu(cpu, mask) {
                if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
                        kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
                        break;
                }
        }
}

static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
                        const struct flush_tlb_info *info)
{
        u8 state;
        int cpu;
        struct kvm_steal_time *src;
        struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);

        cpumask_copy(flushmask, cpumask);
        /*
         * We have to call flush only on online vCPUs. And
         * queue flush_on_enter for pre-empted vCPUs
         */
        for_each_cpu(cpu, flushmask) {
                /*
                 * The local vCPU is never preempted, so we do not explicitly
                 * skip check for local vCPU - it will never be cleared from
                 * flushmask.
                 */
                src = &per_cpu(steal_time, cpu);
                state = READ_ONCE(src->preempted);
                if ((state & KVM_VCPU_PREEMPTED)) {
                        if (try_cmpxchg(&src->preempted, &state,
                                        state | KVM_VCPU_FLUSH_TLB))
                                __cpumask_clear_cpu(cpu, flushmask);
                }
        }

        native_flush_tlb_multi(flushmask, info);
}

static __init int kvm_alloc_cpumask(void)
{
        int cpu;

        if (!kvm_para_available() || nopv)
                return 0;

        if (pv_tlb_flush_supported() || pv_ipi_supported())
                for_each_possible_cpu(cpu) {
                        zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
                                GFP_KERNEL, cpu_to_node(cpu));
                }

        return 0;
}
arch_initcall(kvm_alloc_cpumask);

static void __init kvm_smp_prepare_boot_cpu(void)
{
        /*
         * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
         * shares the guest physical address with the hypervisor.
         */
        sev_map_percpu_data();

        kvm_guest_cpu_init();
        native_smp_prepare_boot_cpu();
        kvm_spinlock_init();
}

static int kvm_cpu_down_prepare(unsigned int cpu)
{
        unsigned long flags;

        local_irq_save(flags);
        kvm_guest_cpu_offline(false);
        local_irq_restore(flags);
        return 0;
}

#endif

static int kvm_suspend(void)
{
        u64 val = 0;

        kvm_guest_cpu_offline(false);

#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
        if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
                rdmsrl(MSR_KVM_POLL_CONTROL, val);
        has_guest_poll = !(val & 1);
#endif
        return 0;
}

static void kvm_resume(void)
{
        kvm_cpu_online(raw_smp_processor_id());

#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
        if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
                wrmsrl(MSR_KVM_POLL_CONTROL, 0);
#endif
}

static struct syscore_ops kvm_syscore_ops = {
        .suspend        = kvm_suspend,
        .resume                = kvm_resume,
};

static void kvm_pv_guest_cpu_reboot(void *unused)
{
        kvm_guest_cpu_offline(true);
}

static int kvm_pv_reboot_notify(struct notifier_block *nb,
                                unsigned long code, void *unused)
{
        if (code == SYS_RESTART)
                on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
        return NOTIFY_DONE;
}

static struct notifier_block kvm_pv_reboot_nb = {
        .notifier_call = kvm_pv_reboot_notify,
};

/*
 * After a PV feature is registered, the host will keep writing to the
 * registered memory location. If the guest happens to shutdown, this memory
 * won't be valid. In cases like kexec, in which you install a new kernel, this
 * means a random memory location will be kept being written.
 */
#ifdef CONFIG_CRASH_DUMP
static void kvm_crash_shutdown(struct pt_regs *regs)
{
        kvm_guest_cpu_offline(true);
        native_machine_crash_shutdown(regs);
}
#endif

#if defined(CONFIG_X86_32) || !defined(CONFIG_SMP)
bool __kvm_vcpu_is_preempted(long cpu);

__visible bool __kvm_vcpu_is_preempted(long cpu)
{
        struct kvm_steal_time *src = &per_cpu(steal_time, cpu);

        return !!(src->preempted & KVM_VCPU_PREEMPTED);
}
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);

#else

#include <asm/asm-offsets.h>

extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);

/*
 * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
 * restoring to/from the stack.
 */
#define PV_VCPU_PREEMPTED_ASM                                                     \
 "movq   __per_cpu_offset(,%rdi,8), %rax\n\t"                                     \
 "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
 "setne  %al\n\t"

DEFINE_ASM_FUNC(__raw_callee_save___kvm_vcpu_is_preempted,
                PV_VCPU_PREEMPTED_ASM, .text);
#endif

static void __init kvm_guest_init(void)
{
        int i;

        paravirt_ops_setup();
        register_reboot_notifier(&kvm_pv_reboot_nb);
        for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
                raw_spin_lock_init(&async_pf_sleepers[i].lock);

        if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                has_steal_clock = 1;
                static_call_update(pv_steal_clock, kvm_steal_clock);

                pv_ops.lock.vcpu_is_preempted =
                        PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
        }

        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                apic_update_callback(eoi, kvm_guest_apic_eoi_write);

        if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
                static_branch_enable(&kvm_async_pf_enabled);
                sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
        }

#ifdef CONFIG_SMP
        if (pv_tlb_flush_supported()) {
                pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
                pv_ops.mmu.tlb_remove_table = tlb_remove_table;
                pr_info("KVM setup pv remote TLB flush\n");
        }

        smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
        if (pv_sched_yield_supported()) {
                smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
                pr_info("setup PV sched yield\n");
        }
        if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
                                      kvm_cpu_online, kvm_cpu_down_prepare) < 0)
                pr_err("failed to install cpu hotplug callbacks\n");
#else
        sev_map_percpu_data();
        kvm_guest_cpu_init();
#endif

#ifdef CONFIG_CRASH_DUMP
        machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif

        register_syscore_ops(&kvm_syscore_ops);

        /*
         * Hard lockup detection is enabled by default. Disable it, as guests
         * can get false positives too easily, for example if the host is
         * overcommitted.
         */
        hardlockup_detector_disable();
}

static noinline uint32_t __kvm_cpuid_base(void)
{
        if (boot_cpu_data.cpuid_level < 0)
                return 0;        /* So we don't blow up on old processors */

        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
                return hypervisor_cpuid_base(KVM_SIGNATURE, 0);

        return 0;
}

static inline uint32_t kvm_cpuid_base(void)
{
        static int kvm_cpuid_base = -1;

        if (kvm_cpuid_base == -1)
                kvm_cpuid_base = __kvm_cpuid_base();

        return kvm_cpuid_base;
}

bool kvm_para_available(void)
{
        return kvm_cpuid_base() != 0;
}
EXPORT_SYMBOL_GPL(kvm_para_available);

unsigned int kvm_arch_para_features(void)
{
        return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
}

unsigned int kvm_arch_para_hints(void)
{
        return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
}
EXPORT_SYMBOL_GPL(kvm_arch_para_hints);

static uint32_t __init kvm_detect(void)
{
        return kvm_cpuid_base();
}

static void __init kvm_apic_init(void)
{
#ifdef CONFIG_SMP
        if (pv_ipi_supported())
                kvm_setup_pv_ipi();
#endif
}

static bool __init kvm_msi_ext_dest_id(void)
{
        return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
}

static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
{
        kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
                           KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
}

static void __init kvm_init_platform(void)
{
        if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
            kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
                unsigned long nr_pages;
                int i;

                pv_ops.mmu.notify_page_enc_status_changed =
                        kvm_sev_hc_page_enc_status;

                /*
                 * Reset the host's shared pages list related to kernel
                 * specific page encryption status settings before we load a
                 * new kernel by kexec. Reset the page encryption status
                 * during early boot instead of just before kexec to avoid SMP
                 * races during kvm_pv_guest_cpu_reboot().
                 * NOTE: We cannot reset the complete shared pages list
                 * here as we need to retain the UEFI/OVMF firmware
                 * specific settings.
                 */

                for (i = 0; i < e820_table->nr_entries; i++) {
                        struct e820_entry *entry = &e820_table->entries[i];

                        if (entry->type != E820_TYPE_RAM)
                                continue;

                        nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);

                        kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
                                       nr_pages,
                                       KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
                }

                /*
                 * Ensure that _bss_decrypted section is marked as decrypted in the
                 * shared pages list.
                 */
                early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
                                                __end_bss_decrypted - __start_bss_decrypted, 0);

                /*
                 * If not booted using EFI, enable Live migration support.
                 */
                if (!efi_enabled(EFI_BOOT))
                        wrmsrl(MSR_KVM_MIGRATION_CONTROL,
                               KVM_MIGRATION_READY);
        }
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
}

#if defined(CONFIG_AMD_MEM_ENCRYPT)
static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
{
        /* RAX and CPL are already in the GHCB */
        ghcb_set_rbx(ghcb, regs->bx);
        ghcb_set_rcx(ghcb, regs->cx);
        ghcb_set_rdx(ghcb, regs->dx);
        ghcb_set_rsi(ghcb, regs->si);
}

static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
{
        /* No checking of the return state needed */
        return true;
}
#endif

const __initconst struct hypervisor_x86 x86_hyper_kvm = {
        .name                                = "KVM",
        .detect                                = kvm_detect,
        .type                                = X86_HYPER_KVM,
        .init.guest_late_init                = kvm_guest_init,
        .init.x2apic_available                = kvm_para_available,
        .init.msi_ext_dest_id                = kvm_msi_ext_dest_id,
        .init.init_platform                = kvm_init_platform,
#if defined(CONFIG_AMD_MEM_ENCRYPT)
        .runtime.sev_es_hcall_prepare        = kvm_sev_es_hcall_prepare,
        .runtime.sev_es_hcall_finish        = kvm_sev_es_hcall_finish,
#endif
};

static __init int activate_jump_labels(void)
{
        if (has_steal_clock) {
                static_key_slow_inc(&paravirt_steal_enabled);
                if (steal_acc)
                        static_key_slow_inc(&paravirt_steal_rq_enabled);
        }

        return 0;
}
arch_initcall(activate_jump_labels);

#ifdef CONFIG_PARAVIRT_SPINLOCKS

/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
static void kvm_kick_cpu(int cpu)
{
        unsigned long flags = 0;
        u32 apicid;

        apicid = per_cpu(x86_cpu_to_apicid, cpu);
        kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}

#include <asm/qspinlock.h>

static void kvm_wait(u8 *ptr, u8 val)
{
        if (in_nmi())
                return;

        /*
         * halt until it's our turn and kicked. Note that we do safe halt
         * for irq enabled case to avoid hang when lock info is overwritten
         * in irq spinlock slowpath and no spurious interrupt occur to save us.
         */
        if (irqs_disabled()) {
                if (READ_ONCE(*ptr) == val)
                        halt();
        } else {
                local_irq_disable();

                /* safe_halt() will enable IRQ */
                if (READ_ONCE(*ptr) == val)
                        safe_halt();
                else
                        local_irq_enable();
        }
}

/*
 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
 */
void __init kvm_spinlock_init(void)
{
        /*
         * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
         * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
         * preferred over native qspinlock when vCPU is preempted.
         */
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
                pr_info("PV spinlocks disabled, no host support\n");
                return;
        }

        /*
         * Disable PV spinlocks and use native qspinlock when dedicated pCPUs
         * are available.
         */
        if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
                pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
                goto out;
        }

        if (num_possible_cpus() == 1) {
                pr_info("PV spinlocks disabled, single CPU\n");
                goto out;
        }

        if (nopvspin) {
                pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
                goto out;
        }

        pr_info("PV spinlocks enabled\n");

        __pv_init_lock_hash();
        pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
        pv_ops.lock.queued_spin_unlock =
                PV_CALLEE_SAVE(__pv_queued_spin_unlock);
        pv_ops.lock.wait = kvm_wait;
        pv_ops.lock.kick = kvm_kick_cpu;

        /*
         * When PV spinlock is enabled which is preferred over
         * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
         * Just disable it anyway.
         */
out:
        static_branch_disable(&virt_spin_lock_key);
}

#endif        /* CONFIG_PARAVIRT_SPINLOCKS */

#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL

static void kvm_disable_host_haltpoll(void *i)
{
        wrmsrl(MSR_KVM_POLL_CONTROL, 0);
}

static void kvm_enable_host_haltpoll(void *i)
{
        wrmsrl(MSR_KVM_POLL_CONTROL, 1);
}

void arch_haltpoll_enable(unsigned int cpu)
{
        if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
                pr_err_once("host does not support poll control\n");
                pr_err_once("host upgrade recommended\n");
                return;
        }

        /* Enable guest halt poll disables host halt poll */
        smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_enable);

void arch_haltpoll_disable(unsigned int cpu)
{
        if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
                return;

        /* Disable guest halt poll enables host halt poll */
        smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
#endif




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




    1 








    1 



















































































































































































    2 

    2 




    2 

    2 



    2 







    2 




































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2011 Instituto Nokia de Tecnologia
 *
 * Authors:
 *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
 *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/rfkill.h>
#include <linux/nfc.h>

#include <net/genetlink.h>

#include "nfc.h"

#define VERSION "0.1"

#define NFC_CHECK_PRES_FREQ_MS        2000

int nfc_devlist_generation;
DEFINE_MUTEX(nfc_devlist_mutex);

/* NFC device ID bitmap */
static DEFINE_IDA(nfc_index_ida);

int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
{
        int rc = 0;

        pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->dev_up) {
                rc = -EBUSY;
                goto error;
        }

        if (!dev->ops->fw_download) {
                rc = -EOPNOTSUPP;
                goto error;
        }

        dev->fw_download_in_progress = true;
        rc = dev->ops->fw_download(dev, firmware_name);
        if (rc)
                dev->fw_download_in_progress = false;

error:
        device_unlock(&dev->dev);
        return rc;
}

/**
 * nfc_fw_download_done - inform that a firmware download was completed
 *
 * @dev: The nfc device to which firmware was downloaded
 * @firmware_name: The firmware filename
 * @result: The positive value of a standard errno value
 */
int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
                         u32 result)
{
        dev->fw_download_in_progress = false;

        return nfc_genl_fw_download_done(dev, firmware_name, result);
}
EXPORT_SYMBOL(nfc_fw_download_done);

/**
 * nfc_dev_up - turn on the NFC device
 *
 * @dev: The nfc device to be turned on
 *
 * The device remains up until the nfc_dev_down function is called.
 */
int nfc_dev_up(struct nfc_dev *dev)
{
        int rc = 0;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
                rc = -ERFKILL;
                goto error;
        }

        if (dev->fw_download_in_progress) {
                rc = -EBUSY;
                goto error;
        }

        if (dev->dev_up) {
                rc = -EALREADY;
                goto error;
        }

        if (dev->ops->dev_up)
                rc = dev->ops->dev_up(dev);

        if (!rc)
                dev->dev_up = true;

        /* We have to enable the device before discovering SEs */
        if (dev->ops->discover_se && dev->ops->discover_se(dev))
                pr_err("SE discovery failed\n");

error:
        device_unlock(&dev->dev);
        return rc;
}

/**
 * nfc_dev_down - turn off the NFC device
 *
 * @dev: The nfc device to be turned off
 */
int nfc_dev_down(struct nfc_dev *dev)
{
        int rc = 0;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->dev_up) {
                rc = -EALREADY;
                goto error;
        }

        if (dev->polling || dev->active_target) {
                rc = -EBUSY;
                goto error;
        }

        if (dev->ops->dev_down)
                dev->ops->dev_down(dev);

        dev->dev_up = false;

error:
        device_unlock(&dev->dev);
        return rc;
}

static int nfc_rfkill_set_block(void *data, bool blocked)
{
        struct nfc_dev *dev = data;

        pr_debug("%s blocked %d", dev_name(&dev->dev), blocked);

        if (!blocked)
                return 0;

        nfc_dev_down(dev);

        return 0;
}

static const struct rfkill_ops nfc_rfkill_ops = {
        .set_block = nfc_rfkill_set_block,
};

/**
 * nfc_start_poll - start polling for nfc targets
 *
 * @dev: The nfc device that must start polling
 * @im_protocols: bitset of nfc initiator protocols to be used for polling
 * @tm_protocols: bitset of nfc transport protocols to be used for polling
 *
 * The device remains polling for targets until a target is found or
 * the nfc_stop_poll function is called.
 */
int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
{
        int rc;

        pr_debug("dev_name %s initiator protocols 0x%x target protocols 0x%x\n",
                 dev_name(&dev->dev), im_protocols, tm_protocols);

        if (!im_protocols && !tm_protocols)
                return -EINVAL;

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->dev_up) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->polling) {
                rc = -EBUSY;
                goto error;
        }

        rc = dev->ops->start_poll(dev, im_protocols, tm_protocols);
        if (!rc) {
                dev->polling = true;
                dev->rf_mode = NFC_RF_NONE;
        }

error:
        device_unlock(&dev->dev);
        return rc;
}

/**
 * nfc_stop_poll - stop polling for nfc targets
 *
 * @dev: The nfc device that must stop polling
 */
int nfc_stop_poll(struct nfc_dev *dev)
{
        int rc = 0;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->polling) {
                rc = -EINVAL;
                goto error;
        }

        dev->ops->stop_poll(dev);
        dev->polling = false;
        dev->rf_mode = NFC_RF_NONE;

error:
        device_unlock(&dev->dev);
        return rc;
}

static struct nfc_target *nfc_find_target(struct nfc_dev *dev, u32 target_idx)
{
        int i;

        for (i = 0; i < dev->n_targets; i++) {
                if (dev->targets[i].idx == target_idx)
                        return &dev->targets[i];
        }

        return NULL;
}

int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
{
        int rc = 0;
        u8 *gb;
        size_t gb_len;
        struct nfc_target *target;

        pr_debug("dev_name=%s comm %d\n", dev_name(&dev->dev), comm_mode);

        if (!dev->ops->dep_link_up)
                return -EOPNOTSUPP;

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->dep_link_up == true) {
                rc = -EALREADY;
                goto error;
        }

        gb = nfc_llcp_general_bytes(dev, &gb_len);
        if (gb_len > NFC_MAX_GT_LEN) {
                rc = -EINVAL;
                goto error;
        }

        target = nfc_find_target(dev, target_index);
        if (target == NULL) {
                rc = -ENOTCONN;
                goto error;
        }

        rc = dev->ops->dep_link_up(dev, target, comm_mode, gb, gb_len);
        if (!rc) {
                dev->active_target = target;
                dev->rf_mode = NFC_RF_INITIATOR;
        }

error:
        device_unlock(&dev->dev);
        return rc;
}

int nfc_dep_link_down(struct nfc_dev *dev)
{
        int rc = 0;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        if (!dev->ops->dep_link_down)
                return -EOPNOTSUPP;

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->dep_link_up == false) {
                rc = -EALREADY;
                goto error;
        }

        rc = dev->ops->dep_link_down(dev);
        if (!rc) {
                dev->dep_link_up = false;
                dev->active_target = NULL;
                dev->rf_mode = NFC_RF_NONE;
                nfc_llcp_mac_is_down(dev);
                nfc_genl_dep_link_down_event(dev);
        }

error:
        device_unlock(&dev->dev);

        return rc;
}

int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
                       u8 comm_mode, u8 rf_mode)
{
        dev->dep_link_up = true;

        if (!dev->active_target && rf_mode == NFC_RF_INITIATOR) {
                struct nfc_target *target;

                target = nfc_find_target(dev, target_idx);
                if (target == NULL)
                        return -ENOTCONN;

                dev->active_target = target;
        }

        dev->polling = false;
        dev->rf_mode = rf_mode;

        nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode);

        return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode);
}
EXPORT_SYMBOL(nfc_dep_link_is_up);

/**
 * nfc_activate_target - prepare the target for data exchange
 *
 * @dev: The nfc device that found the target
 * @target_idx: index of the target that must be activated
 * @protocol: nfc protocol that will be used for data exchange
 */
int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
{
        int rc;
        struct nfc_target *target;

        pr_debug("dev_name=%s target_idx=%u protocol=%u\n",
                 dev_name(&dev->dev), target_idx, protocol);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->active_target) {
                rc = -EBUSY;
                goto error;
        }

        target = nfc_find_target(dev, target_idx);
        if (target == NULL) {
                rc = -ENOTCONN;
                goto error;
        }

        rc = dev->ops->activate_target(dev, target, protocol);
        if (!rc) {
                dev->active_target = target;
                dev->rf_mode = NFC_RF_INITIATOR;

                if (dev->ops->check_presence && !dev->shutting_down)
                        mod_timer(&dev->check_pres_timer, jiffies +
                                  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
        }

error:
        device_unlock(&dev->dev);
        return rc;
}

/**
 * nfc_deactivate_target - deactivate a nfc target
 *
 * @dev: The nfc device that found the target
 * @target_idx: index of the target that must be deactivated
 * @mode: idle or sleep?
 */
int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
{
        int rc = 0;

        pr_debug("dev_name=%s target_idx=%u\n",
                 dev_name(&dev->dev), target_idx);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->active_target == NULL) {
                rc = -ENOTCONN;
                goto error;
        }

        if (dev->active_target->idx != target_idx) {
                rc = -ENOTCONN;
                goto error;
        }

        if (dev->ops->check_presence)
                del_timer_sync(&dev->check_pres_timer);

        dev->ops->deactivate_target(dev, dev->active_target, mode);
        dev->active_target = NULL;

error:
        device_unlock(&dev->dev);
        return rc;
}

/**
 * nfc_data_exchange - transceive data
 *
 * @dev: The nfc device that found the target
 * @target_idx: index of the target
 * @skb: data to be sent
 * @cb: callback called when the response is received
 * @cb_context: parameter for the callback function
 *
 * The user must wait for the callback before calling this function again.
 */
int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
                      data_exchange_cb_t cb, void *cb_context)
{
        int rc;

        pr_debug("dev_name=%s target_idx=%u skb->len=%u\n",
                 dev_name(&dev->dev), target_idx, skb->len);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                kfree_skb(skb);
                goto error;
        }

        if (dev->rf_mode == NFC_RF_INITIATOR && dev->active_target != NULL) {
                if (dev->active_target->idx != target_idx) {
                        rc = -EADDRNOTAVAIL;
                        kfree_skb(skb);
                        goto error;
                }

                if (dev->ops->check_presence)
                        del_timer_sync(&dev->check_pres_timer);

                rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb,
                                             cb_context);

                if (!rc && dev->ops->check_presence && !dev->shutting_down)
                        mod_timer(&dev->check_pres_timer, jiffies +
                                  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
        } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) {
                rc = dev->ops->tm_send(dev, skb);
        } else {
                rc = -ENOTCONN;
                kfree_skb(skb);
                goto error;
        }


error:
        device_unlock(&dev->dev);
        return rc;
}

struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx)
{
        struct nfc_se *se;

        list_for_each_entry(se, &dev->secure_elements, list)
                if (se->idx == se_idx)
                        return se;

        return NULL;
}
EXPORT_SYMBOL(nfc_find_se);

int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
{
        struct nfc_se *se;
        int rc;

        pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->dev_up) {
                rc = -ENODEV;
                goto error;
        }

        if (dev->polling) {
                rc = -EBUSY;
                goto error;
        }

        if (!dev->ops->enable_se || !dev->ops->disable_se) {
                rc = -EOPNOTSUPP;
                goto error;
        }

        se = nfc_find_se(dev, se_idx);
        if (!se) {
                rc = -EINVAL;
                goto error;
        }

        if (se->state == NFC_SE_ENABLED) {
                rc = -EALREADY;
                goto error;
        }

        rc = dev->ops->enable_se(dev, se_idx);
        if (rc >= 0)
                se->state = NFC_SE_ENABLED;

error:
        device_unlock(&dev->dev);
        return rc;
}

int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
{
        struct nfc_se *se;
        int rc;

        pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);

        device_lock(&dev->dev);

        if (dev->shutting_down) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->dev_up) {
                rc = -ENODEV;
                goto error;
        }

        if (!dev->ops->enable_se || !dev->ops->disable_se) {
                rc = -EOPNOTSUPP;
                goto error;
        }

        se = nfc_find_se(dev, se_idx);
        if (!se) {
                rc = -EINVAL;
                goto error;
        }

        if (se->state == NFC_SE_DISABLED) {
                rc = -EALREADY;
                goto error;
        }

        rc = dev->ops->disable_se(dev, se_idx);
        if (rc >= 0)
                se->state = NFC_SE_DISABLED;

error:
        device_unlock(&dev->dev);
        return rc;
}

int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
        pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);

        return nfc_llcp_set_remote_gb(dev, gb, gb_len);
}
EXPORT_SYMBOL(nfc_set_remote_general_bytes);

u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len)
{
        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        return nfc_llcp_general_bytes(dev, gb_len);
}
EXPORT_SYMBOL(nfc_get_local_general_bytes);

int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb)
{
        /* Only LLCP target mode for now */
        if (dev->dep_link_up == false) {
                kfree_skb(skb);
                return -ENOLINK;
        }

        return nfc_llcp_data_received(dev, skb);
}
EXPORT_SYMBOL(nfc_tm_data_received);

int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
                     const u8 *gb, size_t gb_len)
{
        int rc;

        device_lock(&dev->dev);

        dev->polling = false;

        if (gb != NULL) {
                rc = nfc_set_remote_general_bytes(dev, gb, gb_len);
                if (rc < 0)
                        goto out;
        }

        dev->rf_mode = NFC_RF_TARGET;

        if (protocol == NFC_PROTO_NFC_DEP_MASK)
                nfc_dep_link_is_up(dev, 0, comm_mode, NFC_RF_TARGET);

        rc = nfc_genl_tm_activated(dev, protocol);

out:
        device_unlock(&dev->dev);

        return rc;
}
EXPORT_SYMBOL(nfc_tm_activated);

int nfc_tm_deactivated(struct nfc_dev *dev)
{
        dev->dep_link_up = false;
        dev->rf_mode = NFC_RF_NONE;

        return nfc_genl_tm_deactivated(dev);
}
EXPORT_SYMBOL(nfc_tm_deactivated);

/**
 * nfc_alloc_send_skb - allocate a skb for data exchange responses
 *
 * @dev: device sending the response
 * @sk: socket sending the response
 * @flags: MSG_DONTWAIT flag
 * @size: size to allocate
 * @err: pointer to memory to store the error code
 */
struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk,
                                   unsigned int flags, unsigned int size,
                                   unsigned int *err)
{
        struct sk_buff *skb;
        unsigned int total_size;

        total_size = size +
                dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;

        skb = sock_alloc_send_skb(sk, total_size, flags & MSG_DONTWAIT, err);
        if (skb)
                skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);

        return skb;
}

/**
 * nfc_alloc_recv_skb - allocate a skb for data exchange responses
 *
 * @size: size to allocate
 * @gfp: gfp flags
 */
struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp)
{
        struct sk_buff *skb;
        unsigned int total_size;

        total_size = size + 1;
        skb = alloc_skb(total_size, gfp);

        if (skb)
                skb_reserve(skb, 1);

        return skb;
}
EXPORT_SYMBOL(nfc_alloc_recv_skb);

/**
 * nfc_targets_found - inform that targets were found
 *
 * @dev: The nfc device that found the targets
 * @targets: array of nfc targets found
 * @n_targets: targets array size
 *
 * The device driver must call this function when one or many nfc targets
 * are found. After calling this function, the device driver must stop
 * polling for targets.
 * NOTE: This function can be called with targets=NULL and n_targets=0 to
 * notify a driver error, meaning that the polling operation cannot complete.
 * IMPORTANT: this function must not be called from an atomic context.
 * In addition, it must also not be called from a context that would prevent
 * the NFC Core to call other nfc ops entry point concurrently.
 */
int nfc_targets_found(struct nfc_dev *dev,
                      struct nfc_target *targets, int n_targets)
{
        int i;

        pr_debug("dev_name=%s n_targets=%d\n", dev_name(&dev->dev), n_targets);

        for (i = 0; i < n_targets; i++)
                targets[i].idx = dev->target_next_idx++;

        device_lock(&dev->dev);

        if (dev->polling == false) {
                device_unlock(&dev->dev);
                return 0;
        }

        dev->polling = false;

        dev->targets_generation++;

        kfree(dev->targets);
        dev->targets = NULL;

        if (targets) {
                dev->targets = kmemdup(targets,
                                       n_targets * sizeof(struct nfc_target),
                                       GFP_ATOMIC);

                if (!dev->targets) {
                        dev->n_targets = 0;
                        device_unlock(&dev->dev);
                        return -ENOMEM;
                }
        }

        dev->n_targets = n_targets;
        device_unlock(&dev->dev);

        nfc_genl_targets_found(dev);

        return 0;
}
EXPORT_SYMBOL(nfc_targets_found);

/**
 * nfc_target_lost - inform that an activated target went out of field
 *
 * @dev: The nfc device that had the activated target in field
 * @target_idx: the nfc index of the target
 *
 * The device driver must call this function when the activated target
 * goes out of the field.
 * IMPORTANT: this function must not be called from an atomic context.
 * In addition, it must also not be called from a context that would prevent
 * the NFC Core to call other nfc ops entry point concurrently.
 */
int nfc_target_lost(struct nfc_dev *dev, u32 target_idx)
{
        const struct nfc_target *tg;
        int i;

        pr_debug("dev_name %s n_target %d\n", dev_name(&dev->dev), target_idx);

        device_lock(&dev->dev);

        for (i = 0; i < dev->n_targets; i++) {
                tg = &dev->targets[i];
                if (tg->idx == target_idx)
                        break;
        }

        if (i == dev->n_targets) {
                device_unlock(&dev->dev);
                return -EINVAL;
        }

        dev->targets_generation++;
        dev->n_targets--;
        dev->active_target = NULL;

        if (dev->n_targets) {
                memcpy(&dev->targets[i], &dev->targets[i + 1],
                       (dev->n_targets - i) * sizeof(struct nfc_target));
        } else {
                kfree(dev->targets);
                dev->targets = NULL;
        }

        device_unlock(&dev->dev);

        nfc_genl_target_lost(dev, target_idx);

        return 0;
}
EXPORT_SYMBOL(nfc_target_lost);

inline void nfc_driver_failure(struct nfc_dev *dev, int err)
{
        nfc_targets_found(dev, NULL, 0);
}
EXPORT_SYMBOL(nfc_driver_failure);

int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type)
{
        struct nfc_se *se;
        int rc;

        pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);

        se = nfc_find_se(dev, se_idx);
        if (se)
                return -EALREADY;

        se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL);
        if (!se)
                return -ENOMEM;

        se->idx = se_idx;
        se->type = type;
        se->state = NFC_SE_DISABLED;
        INIT_LIST_HEAD(&se->list);

        list_add(&se->list, &dev->secure_elements);

        rc = nfc_genl_se_added(dev, se_idx, type);
        if (rc < 0) {
                list_del(&se->list);
                kfree(se);

                return rc;
        }

        return 0;
}
EXPORT_SYMBOL(nfc_add_se);

int nfc_remove_se(struct nfc_dev *dev, u32 se_idx)
{
        struct nfc_se *se, *n;
        int rc;

        pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx);

        list_for_each_entry_safe(se, n, &dev->secure_elements, list)
                if (se->idx == se_idx) {
                        rc = nfc_genl_se_removed(dev, se_idx);
                        if (rc < 0)
                                return rc;

                        list_del(&se->list);
                        kfree(se);

                        return 0;
                }

        return -EINVAL;
}
EXPORT_SYMBOL(nfc_remove_se);

int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
                       struct nfc_evt_transaction *evt_transaction)
{
        int rc;

        pr_debug("transaction: %x\n", se_idx);

        device_lock(&dev->dev);

        if (!evt_transaction) {
                rc = -EPROTO;
                goto out;
        }

        rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction);
out:
        device_unlock(&dev->dev);
        return rc;
}
EXPORT_SYMBOL(nfc_se_transaction);

int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx)
{
        int rc;

        pr_debug("connectivity: %x\n", se_idx);

        device_lock(&dev->dev);
        rc = nfc_genl_se_connectivity(dev, se_idx);
        device_unlock(&dev->dev);
        return rc;
}
EXPORT_SYMBOL(nfc_se_connectivity);

static void nfc_release(struct device *d)
{
        struct nfc_dev *dev = to_nfc_dev(d);
        struct nfc_se *se, *n;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        nfc_genl_data_exit(&dev->genl_data);
        kfree(dev->targets);

        list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
                        nfc_genl_se_removed(dev, se->idx);
                        list_del(&se->list);
                        kfree(se);
        }

        ida_free(&nfc_index_ida, dev->idx);

        kfree(dev);
}

static void nfc_check_pres_work(struct work_struct *work)
{
        struct nfc_dev *dev = container_of(work, struct nfc_dev,
                                           check_pres_work);
        int rc;

        device_lock(&dev->dev);

        if (dev->active_target && timer_pending(&dev->check_pres_timer) == 0) {
                rc = dev->ops->check_presence(dev, dev->active_target);
                if (rc == -EOPNOTSUPP)
                        goto exit;
                if (rc) {
                        u32 active_target_idx = dev->active_target->idx;
                        device_unlock(&dev->dev);
                        nfc_target_lost(dev, active_target_idx);
                        return;
                }

                if (!dev->shutting_down)
                        mod_timer(&dev->check_pres_timer, jiffies +
                                  msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS));
        }

exit:
        device_unlock(&dev->dev);
}

static void nfc_check_pres_timeout(struct timer_list *t)
{
        struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);

        schedule_work(&dev->check_pres_work);
}

const struct class nfc_class = {
        .name = "nfc",
        .dev_release = nfc_release,
};
EXPORT_SYMBOL(nfc_class);

static int match_idx(struct device *d, const void *data)
{
        struct nfc_dev *dev = to_nfc_dev(d);
        const unsigned int *idx = data;

        return dev->idx == *idx;
}

struct nfc_dev *nfc_get_device(unsigned int idx)
{
        struct device *d;

        d = class_find_device(&nfc_class, NULL, &idx, match_idx);
        if (!d)
                return NULL;

        return to_nfc_dev(d);
}

/**
 * nfc_allocate_device - allocate a new nfc device
 *
 * @ops: device operations
 * @supported_protocols: NFC protocols supported by the device
 * @tx_headroom: reserved space at beginning of skb
 * @tx_tailroom: reserved space at end of skb
 */
struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
                                    u32 supported_protocols,
                                    int tx_headroom, int tx_tailroom)
{
        struct nfc_dev *dev;
        int rc;

        if (!ops->start_poll || !ops->stop_poll || !ops->activate_target ||
            !ops->deactivate_target || !ops->im_transceive)
                return NULL;

        if (!supported_protocols)
                return NULL;

        dev = kzalloc(sizeof(struct nfc_dev), GFP_KERNEL);
        if (!dev)
                return NULL;

        rc = ida_alloc(&nfc_index_ida, GFP_KERNEL);
        if (rc < 0)
                goto err_free_dev;
        dev->idx = rc;

        dev->dev.class = &nfc_class;
        dev_set_name(&dev->dev, "nfc%d", dev->idx);
        device_initialize(&dev->dev);

        dev->ops = ops;
        dev->supported_protocols = supported_protocols;
        dev->tx_headroom = tx_headroom;
        dev->tx_tailroom = tx_tailroom;
        INIT_LIST_HEAD(&dev->secure_elements);

        nfc_genl_data_init(&dev->genl_data);

        dev->rf_mode = NFC_RF_NONE;

        /* first generation must not be 0 */
        dev->targets_generation = 1;

        if (ops->check_presence) {
                timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
                INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
        }

        return dev;

err_free_dev:
        kfree(dev);

        return NULL;
}
EXPORT_SYMBOL(nfc_allocate_device);

/**
 * nfc_register_device - register a nfc device in the nfc subsystem
 *
 * @dev: The nfc device to register
 */
int nfc_register_device(struct nfc_dev *dev)
{
        int rc;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        mutex_lock(&nfc_devlist_mutex);
        nfc_devlist_generation++;
        rc = device_add(&dev->dev);
        mutex_unlock(&nfc_devlist_mutex);

        if (rc < 0)
                return rc;

        rc = nfc_llcp_register_device(dev);
        if (rc)
                pr_err("Could not register llcp device\n");

        device_lock(&dev->dev);
        dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
                                   RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
        if (dev->rfkill) {
                if (rfkill_register(dev->rfkill) < 0) {
                        rfkill_destroy(dev->rfkill);
                        dev->rfkill = NULL;
                }
        }
        dev->shutting_down = false;
        device_unlock(&dev->dev);

        rc = nfc_genl_device_added(dev);
        if (rc)
                pr_debug("The userspace won't be notified that the device %s was added\n",
                         dev_name(&dev->dev));

        return 0;
}
EXPORT_SYMBOL(nfc_register_device);

/**
 * nfc_unregister_device - unregister a nfc device in the nfc subsystem
 *
 * @dev: The nfc device to unregister
 */
void nfc_unregister_device(struct nfc_dev *dev)
{
        int rc;

        pr_debug("dev_name=%s\n", dev_name(&dev->dev));

        rc = nfc_genl_device_removed(dev);
        if (rc)
                pr_debug("The userspace won't be notified that the device %s "
                         "was removed\n", dev_name(&dev->dev));

        device_lock(&dev->dev);
        if (dev->rfkill) {
                rfkill_unregister(dev->rfkill);
                rfkill_destroy(dev->rfkill);
                dev->rfkill = NULL;
        }
        dev->shutting_down = true;
        device_unlock(&dev->dev);

        if (dev->ops->check_presence) {
                del_timer_sync(&dev->check_pres_timer);
                cancel_work_sync(&dev->check_pres_work);
        }

        nfc_llcp_unregister_device(dev);

        mutex_lock(&nfc_devlist_mutex);
        nfc_devlist_generation++;
        device_del(&dev->dev);
        mutex_unlock(&nfc_devlist_mutex);
}
EXPORT_SYMBOL(nfc_unregister_device);

static int __init nfc_init(void)
{
        int rc;

        pr_info("NFC Core ver %s\n", VERSION);

        rc = class_register(&nfc_class);
        if (rc)
                return rc;

        rc = nfc_genl_init();
        if (rc)
                goto err_genl;

        /* the first generation must not be 0 */
        nfc_devlist_generation = 1;

        rc = rawsock_init();
        if (rc)
                goto err_rawsock;

        rc = nfc_llcp_init();
        if (rc)
                goto err_llcp_sock;

        rc = af_nfc_init();
        if (rc)
                goto err_af_nfc;

        return 0;

err_af_nfc:
        nfc_llcp_exit();
err_llcp_sock:
        rawsock_exit();
err_rawsock:
        nfc_genl_exit();
err_genl:
        class_unregister(&nfc_class);
        return rc;
}

static void __exit nfc_exit(void)
{
        af_nfc_exit();
        nfc_llcp_exit();
        rawsock_exit();
        nfc_genl_exit();
        class_unregister(&nfc_class);
}

subsys_initcall(nfc_init);
module_exit(nfc_exit);

MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>");
MODULE_DESCRIPTION("NFC Core ver " VERSION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_NFC);
MODULE_ALIAS_GENL_FAMILY(NFC_GENL_NAME);























































































































































































































































































































































































































    5 



















































    5 


























    5 


    5 























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MMU_NOTIFIER_H
#define _LINUX_MMU_NOTIFIER_H

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <linux/srcu.h>
#include <linux/interval_tree.h>

struct mmu_notifier_subscriptions;
struct mmu_notifier;
struct mmu_notifier_range;
struct mmu_interval_notifier;

/**
 * enum mmu_notifier_event - reason for the mmu notifier callback
 * @MMU_NOTIFY_UNMAP: either munmap() that unmap the range or a mremap() that
 * move the range
 *
 * @MMU_NOTIFY_CLEAR: clear page table entry (many reasons for this like
 * madvise() or replacing a page by another one, ...).
 *
 * @MMU_NOTIFY_PROTECTION_VMA: update is due to protection change for the range
 * ie using the vma access permission (vm_page_prot) to update the whole range
 * is enough no need to inspect changes to the CPU page table (mprotect()
 * syscall)
 *
 * @MMU_NOTIFY_PROTECTION_PAGE: update is due to change in read/write flag for
 * pages in the range so to mirror those changes the user must inspect the CPU
 * page table (from the end callback).
 *
 * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same
 * access flags). User should soft dirty the page in the end callback to make
 * sure that anyone relying on soft dirtiness catch pages that might be written
 * through non CPU mappings.
 *
 * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal
 * that the mm refcount is zero and the range is no longer accessible.
 *
 * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
 * a device driver to possibly ignore the invalidation if the
 * owner field matches the driver's device private pgmap owner.
 *
 * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
 * longer have exclusive access to the page. When sent during creation of an
 * exclusive range the owner will be initialised to the value provided by the
 * caller of make_device_exclusive_range(), otherwise the owner will be NULL.
 */
enum mmu_notifier_event {
        MMU_NOTIFY_UNMAP = 0,
        MMU_NOTIFY_CLEAR,
        MMU_NOTIFY_PROTECTION_VMA,
        MMU_NOTIFY_PROTECTION_PAGE,
        MMU_NOTIFY_SOFT_DIRTY,
        MMU_NOTIFY_RELEASE,
        MMU_NOTIFY_MIGRATE,
        MMU_NOTIFY_EXCLUSIVE,
};

#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)

struct mmu_notifier_ops {
        /*
         * Called either by mmu_notifier_unregister or when the mm is
         * being destroyed by exit_mmap, always before all pages are
         * freed. This can run concurrently with other mmu notifier
         * methods (the ones invoked outside the mm context) and it
         * should tear down all secondary mmu mappings and freeze the
         * secondary mmu. If this method isn't implemented you've to
         * be sure that nothing could possibly write to the pages
         * through the secondary mmu by the time the last thread with
         * tsk->mm == mm exits.
         *
         * As side note: the pages freed after ->release returns could
         * be immediately reallocated by the gart at an alias physical
         * address with a different cache model, so if ->release isn't
         * implemented because all _software_ driven memory accesses
         * through the secondary mmu are terminated by the time the
         * last thread of this mm quits, you've also to be sure that
         * speculative _hardware_ operations can't allocate dirty
         * cachelines in the cpu that could not be snooped and made
         * coherent with the other read and write operations happening
         * through the gart alias address, so leading to memory
         * corruption.
         */
        void (*release)(struct mmu_notifier *subscription,
                        struct mm_struct *mm);

        /*
         * clear_flush_young is called after the VM is
         * test-and-clearing the young/accessed bitflag in the
         * pte. This way the VM will provide proper aging to the
         * accesses to the page through the secondary MMUs and not
         * only to the ones through the Linux pte.
         * Start-end is necessary in case the secondary MMU is mapping the page
         * at a smaller granularity than the primary MMU.
         */
        int (*clear_flush_young)(struct mmu_notifier *subscription,
                                 struct mm_struct *mm,
                                 unsigned long start,
                                 unsigned long end);

        /*
         * clear_young is a lightweight version of clear_flush_young. Like the
         * latter, it is supposed to test-and-clear the young/accessed bitflag
         * in the secondary pte, but it may omit flushing the secondary tlb.
         */
        int (*clear_young)(struct mmu_notifier *subscription,
                           struct mm_struct *mm,
                           unsigned long start,
                           unsigned long end);

        /*
         * test_young is called to check the young/accessed bitflag in
         * the secondary pte. This is used to know if the page is
         * frequently used without actually clearing the flag or tearing
         * down the secondary mapping on the page.
         */
        int (*test_young)(struct mmu_notifier *subscription,
                          struct mm_struct *mm,
                          unsigned long address);

        /*
         * change_pte is called in cases that pte mapping to page is changed:
         * for example, when ksm remaps pte to point to a new shared page.
         */
        void (*change_pte)(struct mmu_notifier *subscription,
                           struct mm_struct *mm,
                           unsigned long address,
                           pte_t pte);

        /*
         * invalidate_range_start() and invalidate_range_end() must be
         * paired and are called only when the mmap_lock and/or the
         * locks protecting the reverse maps are held. If the subsystem
         * can't guarantee that no additional references are taken to
         * the pages in the range, it has to implement the
         * invalidate_range() notifier to remove any references taken
         * after invalidate_range_start().
         *
         * Invalidation of multiple concurrent ranges may be
         * optionally permitted by the driver. Either way the
         * establishment of sptes is forbidden in the range passed to
         * invalidate_range_begin/end for the whole duration of the
         * invalidate_range_begin/end critical section.
         *
         * invalidate_range_start() is called when all pages in the
         * range are still mapped and have at least a refcount of one.
         *
         * invalidate_range_end() is called when all pages in the
         * range have been unmapped and the pages have been freed by
         * the VM.
         *
         * The VM will remove the page table entries and potentially
         * the page between invalidate_range_start() and
         * invalidate_range_end(). If the page must not be freed
         * because of pending I/O or other circumstances then the
         * invalidate_range_start() callback (or the initial mapping
         * by the driver) must make sure that the refcount is kept
         * elevated.
         *
         * If the driver increases the refcount when the pages are
         * initially mapped into an address space then either
         * invalidate_range_start() or invalidate_range_end() may
         * decrease the refcount. If the refcount is decreased on
         * invalidate_range_start() then the VM can free pages as page
         * table entries are removed.  If the refcount is only
         * dropped on invalidate_range_end() then the driver itself
         * will drop the last refcount but it must take care to flush
         * any secondary tlb before doing the final free on the
         * page. Pages will no longer be referenced by the linux
         * address space but may still be referenced by sptes until
         * the last refcount is dropped.
         *
         * If blockable argument is set to false then the callback cannot
         * sleep and has to return with -EAGAIN if sleeping would be required.
         * 0 should be returned otherwise. Please note that notifiers that can
         * fail invalidate_range_start are not allowed to implement
         * invalidate_range_end, as there is no mechanism for informing the
         * notifier that its start failed.
         */
        int (*invalidate_range_start)(struct mmu_notifier *subscription,
                                      const struct mmu_notifier_range *range);
        void (*invalidate_range_end)(struct mmu_notifier *subscription,
                                     const struct mmu_notifier_range *range);

        /*
         * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB
         * which shares page-tables with the CPU. The
         * invalidate_range_start()/end() callbacks should not be implemented as
         * invalidate_secondary_tlbs() already catches the points in time when
         * an external TLB needs to be flushed.
         *
         * This requires arch_invalidate_secondary_tlbs() to be called while
         * holding the ptl spin-lock and therefore this callback is not allowed
         * to sleep.
         *
         * This is called by architecture code whenever invalidating a TLB
         * entry. It is assumed that any secondary TLB has the same rules for
         * when invalidations are required. If this is not the case architecture
         * code will need to call this explicitly when required for secondary
         * TLB invalidation.
         */
        void (*arch_invalidate_secondary_tlbs)(
                                        struct mmu_notifier *subscription,
                                        struct mm_struct *mm,
                                        unsigned long start,
                                        unsigned long end);

        /*
         * These callbacks are used with the get/put interface to manage the
         * lifetime of the mmu_notifier memory. alloc_notifier() returns a new
         * notifier for use with the mm.
         *
         * free_notifier() is only called after the mmu_notifier has been
         * fully put, calls to any ops callback are prevented and no ops
         * callbacks are currently running. It is called from a SRCU callback
         * and cannot sleep.
         */
        struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
        void (*free_notifier)(struct mmu_notifier *subscription);
};

/*
 * The notifier chains are protected by mmap_lock and/or the reverse map
 * semaphores. Notifier chains are only changed when all reverse maps and
 * the mmap_lock locks are taken.
 *
 * Therefore notifier chains can only be traversed when either
 *
 * 1. mmap_lock is held.
 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
 * 3. No other concurrent thread can access the list (release)
 */
struct mmu_notifier {
        struct hlist_node hlist;
        const struct mmu_notifier_ops *ops;
        struct mm_struct *mm;
        struct rcu_head rcu;
        unsigned int users;
};

/**
 * struct mmu_interval_notifier_ops
 * @invalidate: Upon return the caller must stop using any SPTEs within this
 *              range. This function can sleep. Return false only if sleeping
 *              was required but mmu_notifier_range_blockable(range) is false.
 */
struct mmu_interval_notifier_ops {
        bool (*invalidate)(struct mmu_interval_notifier *interval_sub,
                           const struct mmu_notifier_range *range,
                           unsigned long cur_seq);
};

struct mmu_interval_notifier {
        struct interval_tree_node interval_tree;
        const struct mmu_interval_notifier_ops *ops;
        struct mm_struct *mm;
        struct hlist_node deferred_item;
        unsigned long invalidate_seq;
};

#ifdef CONFIG_MMU_NOTIFIER

#ifdef CONFIG_LOCKDEP
extern struct lockdep_map __mmu_notifier_invalidate_range_start_map;
#endif

struct mmu_notifier_range {
        struct mm_struct *mm;
        unsigned long start;
        unsigned long end;
        unsigned flags;
        enum mmu_notifier_event event;
        void *owner;
};

static inline int mm_has_notifiers(struct mm_struct *mm)
{
        return unlikely(mm->notifier_subscriptions);
}

struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
                                             struct mm_struct *mm);
static inline struct mmu_notifier *
mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
{
        struct mmu_notifier *ret;

        mmap_write_lock(mm);
        ret = mmu_notifier_get_locked(ops, mm);
        mmap_write_unlock(mm);
        return ret;
}
void mmu_notifier_put(struct mmu_notifier *subscription);
void mmu_notifier_synchronize(void);

extern int mmu_notifier_register(struct mmu_notifier *subscription,
                                 struct mm_struct *mm);
extern int __mmu_notifier_register(struct mmu_notifier *subscription,
                                   struct mm_struct *mm);
extern void mmu_notifier_unregister(struct mmu_notifier *subscription,
                                    struct mm_struct *mm);

unsigned long
mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub);
int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
                                 struct mm_struct *mm, unsigned long start,
                                 unsigned long length,
                                 const struct mmu_interval_notifier_ops *ops);
int mmu_interval_notifier_insert_locked(
        struct mmu_interval_notifier *interval_sub, struct mm_struct *mm,
        unsigned long start, unsigned long length,
        const struct mmu_interval_notifier_ops *ops);
void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub);

/**
 * mmu_interval_set_seq - Save the invalidation sequence
 * @interval_sub - The subscription passed to invalidate
 * @cur_seq - The cur_seq passed to the invalidate() callback
 *
 * This must be called unconditionally from the invalidate callback of a
 * struct mmu_interval_notifier_ops under the same lock that is used to call
 * mmu_interval_read_retry(). It updates the sequence number for later use by
 * mmu_interval_read_retry(). The provided cur_seq will always be odd.
 *
 * If the caller does not call mmu_interval_read_begin() or
 * mmu_interval_read_retry() then this call is not required.
 */
static inline void
mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub,
                     unsigned long cur_seq)
{
        WRITE_ONCE(interval_sub->invalidate_seq, cur_seq);
}

/**
 * mmu_interval_read_retry - End a read side critical section against a VA range
 * interval_sub: The subscription
 * seq: The return of the paired mmu_interval_read_begin()
 *
 * This MUST be called under a user provided lock that is also held
 * unconditionally by op->invalidate() when it calls mmu_interval_set_seq().
 *
 * Each call should be paired with a single mmu_interval_read_begin() and
 * should be used to conclude the read side.
 *
 * Returns true if an invalidation collided with this critical section, and
 * the caller should retry.
 */
static inline bool
mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub,
                        unsigned long seq)
{
        return interval_sub->invalidate_seq != seq;
}

/**
 * mmu_interval_check_retry - Test if a collision has occurred
 * interval_sub: The subscription
 * seq: The return of the matching mmu_interval_read_begin()
 *
 * This can be used in the critical section between mmu_interval_read_begin()
 * and mmu_interval_read_retry().  A return of true indicates an invalidation
 * has collided with this critical region and a future
 * mmu_interval_read_retry() will return true.
 *
 * False is not reliable and only suggests a collision may not have
 * occurred. It can be called many times and does not have to hold the user
 * provided lock.
 *
 * This call can be used as part of loops and other expensive operations to
 * expedite a retry.
 */
static inline bool
mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub,
                         unsigned long seq)
{
        /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */
        return READ_ONCE(interval_sub->invalidate_seq) != seq;
}

extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
                                          unsigned long start,
                                          unsigned long end);
extern int __mmu_notifier_clear_young(struct mm_struct *mm,
                                      unsigned long start,
                                      unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
                                     unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
                                      unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
                                        unsigned long start, unsigned long end);
extern bool
mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range);

static inline bool
mmu_notifier_range_blockable(const struct mmu_notifier_range *range)
{
        return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE);
}

static inline void mmu_notifier_release(struct mm_struct *mm)
{
        if (mm_has_notifiers(mm))
                __mmu_notifier_release(mm);
}

static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
                                          unsigned long start,
                                          unsigned long end)
{
        if (mm_has_notifiers(mm))
                return __mmu_notifier_clear_flush_young(mm, start, end);
        return 0;
}

static inline int mmu_notifier_clear_young(struct mm_struct *mm,
                                           unsigned long start,
                                           unsigned long end)
{
        if (mm_has_notifiers(mm))
                return __mmu_notifier_clear_young(mm, start, end);
        return 0;
}

static inline int mmu_notifier_test_young(struct mm_struct *mm,
                                          unsigned long address)
{
        if (mm_has_notifiers(mm))
                return __mmu_notifier_test_young(mm, address);
        return 0;
}

static inline void mmu_notifier_change_pte(struct mm_struct *mm,
                                           unsigned long address, pte_t pte)
{
        if (mm_has_notifiers(mm))
                __mmu_notifier_change_pte(mm, address, pte);
}

static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
        might_sleep();

        lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
        if (mm_has_notifiers(range->mm)) {
                range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
                __mmu_notifier_invalidate_range_start(range);
        }
        lock_map_release(&__mmu_notifier_invalidate_range_start_map);
}

/*
 * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it
 * can return an error if a notifier can't proceed without blocking, in which
 * case you're not allowed to modify PTEs in the specified range.
 *
 * This is mainly intended for OOM handling.
 */
static inline int __must_check
mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
{
        int ret = 0;

        lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
        if (mm_has_notifiers(range->mm)) {
                range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE;
                ret = __mmu_notifier_invalidate_range_start(range);
        }
        lock_map_release(&__mmu_notifier_invalidate_range_start_map);
        return ret;
}

static inline void
mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{
        if (mmu_notifier_range_blockable(range))
                might_sleep();

        if (mm_has_notifiers(range->mm))
                __mmu_notifier_invalidate_range_end(range);
}

static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
                                        unsigned long start, unsigned long end)
{
        if (mm_has_notifiers(mm))
                __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}

static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
{
        mm->notifier_subscriptions = NULL;
}

static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
{
        if (mm_has_notifiers(mm))
                __mmu_notifier_subscriptions_destroy(mm);
}


static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
                                           enum mmu_notifier_event event,
                                           unsigned flags,
                                           struct mm_struct *mm,
                                           unsigned long start,
                                           unsigned long end)
{
        range->event = event;
        range->mm = mm;
        range->start = start;
        range->end = end;
        range->flags = flags;
}

static inline void mmu_notifier_range_init_owner(
                        struct mmu_notifier_range *range,
                        enum mmu_notifier_event event, unsigned int flags,
                        struct mm_struct *mm, unsigned long start,
                        unsigned long end, void *owner)
{
        mmu_notifier_range_init(range, event, flags, mm, start, end);
        range->owner = owner;
}

#define ptep_clear_flush_young_notify(__vma, __address, __ptep)                \
({                                                                        \
        int __young;                                                        \
        struct vm_area_struct *___vma = __vma;                                \
        unsigned long ___address = __address;                                \
        __young = ptep_clear_flush_young(___vma, ___address, __ptep);        \
        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
                                                  ___address,                \
                                                  ___address +                \
                                                        PAGE_SIZE);        \
        __young;                                                        \
})

#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)                \
({                                                                        \
        int __young;                                                        \
        struct vm_area_struct *___vma = __vma;                                \
        unsigned long ___address = __address;                                \
        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);        \
        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
                                                  ___address,                \
                                                  ___address +                \
                                                        PMD_SIZE);        \
        __young;                                                        \
})

#define ptep_clear_young_notify(__vma, __address, __ptep)                \
({                                                                        \
        int __young;                                                        \
        struct vm_area_struct *___vma = __vma;                                \
        unsigned long ___address = __address;                                \
        __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,        \
                                            ___address + PAGE_SIZE);        \
        __young;                                                        \
})

#define pmdp_clear_young_notify(__vma, __address, __pmdp)                \
({                                                                        \
        int __young;                                                        \
        struct vm_area_struct *___vma = __vma;                                \
        unsigned long ___address = __address;                                \
        __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,        \
                                            ___address + PMD_SIZE);        \
        __young;                                                        \
})

/*
 * set_pte_at_notify() sets the pte _after_ running the notifier.
 * This is safe to start by updating the secondary MMUs, because the primary MMU
 * pte invalidate must have already happened with a ptep_clear_flush() before
 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 * required when we change both the protection of the mapping from read-only to
 * read-write and the pfn (like during copy on write page faults). Otherwise the
 * old page would remain mapped readonly in the secondary MMUs after the new
 * page is already writable by some CPU through the primary MMU.
 */
#define set_pte_at_notify(__mm, __address, __ptep, __pte)                \
({                                                                        \
        struct mm_struct *___mm = __mm;                                        \
        unsigned long ___address = __address;                                \
        pte_t ___pte = __pte;                                                \
                                                                        \
        mmu_notifier_change_pte(___mm, ___address, ___pte);                \
        set_pte_at(___mm, ___address, __ptep, ___pte);                        \
})

#else /* CONFIG_MMU_NOTIFIER */

struct mmu_notifier_range {
        unsigned long start;
        unsigned long end;
};

static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range,
                                            unsigned long start,
                                            unsigned long end)
{
        range->start = start;
        range->end = end;
}

#define mmu_notifier_range_init(range,event,flags,mm,start,end)  \
        _mmu_notifier_range_init(range, start, end)
#define mmu_notifier_range_init_owner(range, event, flags, mm, start, \
                                        end, owner) \
        _mmu_notifier_range_init(range, start, end)

static inline bool
mmu_notifier_range_blockable(const struct mmu_notifier_range *range)
{
        return true;
}

static inline int mm_has_notifiers(struct mm_struct *mm)
{
        return 0;
}

static inline void mmu_notifier_release(struct mm_struct *mm)
{
}

static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
                                          unsigned long start,
                                          unsigned long end)
{
        return 0;
}

static inline int mmu_notifier_test_young(struct mm_struct *mm,
                                          unsigned long address)
{
        return 0;
}

static inline void mmu_notifier_change_pte(struct mm_struct *mm,
                                           unsigned long address, pte_t pte)
{
}

static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
}

static inline int
mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
{
        return 0;
}

static inline
void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{
}

static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
                                  unsigned long start, unsigned long end)
{
}

static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
{
}

static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
{
}

#define mmu_notifier_range_update_to_read_only(r) false

#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
#define ptep_clear_young_notify ptep_test_and_clear_young
#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define        ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define set_pte_at_notify set_pte_at

static inline void mmu_notifier_synchronize(void)
{
}

#endif /* CONFIG_MMU_NOTIFIER */

#endif /* _LINUX_MMU_NOTIFIER_H */


















































































































  240 
  235 
















    5 































































    1 













































































































    1 




    5 














    1 
    1 











    5 




    6 













  235 






















  238 
  240 









    4 
    4 








    4 
    4 
    4 

    4 
    4 


    4 




    4 












    2 
























    2 


    2 







    2 

































    2 


    1 








    1 
    1 
    1 


    1 



    2 






    1 




    1 




    1 
    1 



    1 














    1 
















































    4 

















    4 








    4 







    4 



    4 





    4 

    4 

    4 










































































































    2 






    2 







    2 
    2 
    2 







    2 


    2 



    2 

    2 
    2 



    2 

    2 


    2 
    2 

    2 
    1 



    1 

    1 
    1 








    1 










    2 
    1 
    1 



    2 


    2 

    2 


    2 
    2 


    1 
    1 


    2 





























































































































































    2 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Generic helpers for smp ipi calls
 *
 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/irq_work.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/sched/idle.h>
#include <linux/hypervisor.h>
#include <linux/sched/clock.h>
#include <linux/nmi.h>
#include <linux/sched/debug.h>
#include <linux/jump_label.h>

#include <trace/events/ipi.h>
#define CREATE_TRACE_POINTS
#include <trace/events/csd.h>
#undef CREATE_TRACE_POINTS

#include "smpboot.h"
#include "sched/smp.h"

#define CSD_TYPE(_csd)        ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)

struct call_function_data {
        call_single_data_t        __percpu *csd;
        cpumask_var_t                cpumask;
        cpumask_var_t                cpumask_ipi;
};

static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);

static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);

static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1);

static void __flush_smp_call_function_queue(bool warn_cpu_offline);

int smpcfd_prepare_cpu(unsigned int cpu)
{
        struct call_function_data *cfd = &per_cpu(cfd_data, cpu);

        if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
                                     cpu_to_node(cpu)))
                return -ENOMEM;
        if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
                                     cpu_to_node(cpu))) {
                free_cpumask_var(cfd->cpumask);
                return -ENOMEM;
        }
        cfd->csd = alloc_percpu(call_single_data_t);
        if (!cfd->csd) {
                free_cpumask_var(cfd->cpumask);
                free_cpumask_var(cfd->cpumask_ipi);
                return -ENOMEM;
        }

        return 0;
}

int smpcfd_dead_cpu(unsigned int cpu)
{
        struct call_function_data *cfd = &per_cpu(cfd_data, cpu);

        free_cpumask_var(cfd->cpumask);
        free_cpumask_var(cfd->cpumask_ipi);
        free_percpu(cfd->csd);
        return 0;
}

int smpcfd_dying_cpu(unsigned int cpu)
{
        /*
         * The IPIs for the smp-call-function callbacks queued by other
         * CPUs might arrive late, either due to hardware latencies or
         * because this CPU disabled interrupts (inside stop-machine)
         * before the IPIs were sent. So flush out any pending callbacks
         * explicitly (without waiting for the IPIs to arrive), to
         * ensure that the outgoing CPU doesn't go offline with work
         * still pending.
         */
        __flush_smp_call_function_queue(false);
        irq_work_run();
        return 0;
}

void __init call_function_init(void)
{
        int i;

        for_each_possible_cpu(i)
                init_llist_head(&per_cpu(call_single_queue, i));

        smpcfd_prepare_cpu(smp_processor_id());
}

static __always_inline void
send_call_function_single_ipi(int cpu)
{
        if (call_function_single_prep_ipi(cpu)) {
                trace_ipi_send_cpu(cpu, _RET_IP_,
                                   generic_smp_call_function_single_interrupt);
                arch_send_call_function_single_ipi(cpu);
        }
}

static __always_inline void
send_call_function_ipi_mask(struct cpumask *mask)
{
        trace_ipi_send_cpumask(mask, _RET_IP_,
                               generic_smp_call_function_single_interrupt);
        arch_send_call_function_ipi_mask(mask);
}

static __always_inline void
csd_do_func(smp_call_func_t func, void *info, call_single_data_t *csd)
{
        trace_csd_function_entry(func, csd);
        func(info);
        trace_csd_function_exit(func, csd);
}

#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG

static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);

/*
 * Parse the csdlock_debug= kernel boot parameter.
 *
 * If you need to restore the old "ext" value that once provided
 * additional debugging information, reapply the following commits:
 *
 * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
 * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
 */
static int __init csdlock_debug(char *str)
{
        int ret;
        unsigned int val = 0;

        ret = get_option(&str, &val);
        if (ret) {
                if (val)
                        static_branch_enable(&csdlock_debug_enabled);
                else
                        static_branch_disable(&csdlock_debug_enabled);
        }

        return 1;
}
__setup("csdlock_debug=", csdlock_debug);

static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
static DEFINE_PER_CPU(void *, cur_csd_info);

static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
module_param(csd_lock_timeout, ulong, 0444);
static int panic_on_ipistall;  /* CSD panic timeout in milliseconds, 300000 for five minutes. */
module_param(panic_on_ipistall, int, 0444);

static atomic_t csd_bug_count = ATOMIC_INIT(0);

/* Record current CSD work for current CPU, NULL to erase. */
static void __csd_lock_record(call_single_data_t *csd)
{
        if (!csd) {
                smp_mb(); /* NULL cur_csd after unlock. */
                __this_cpu_write(cur_csd, NULL);
                return;
        }
        __this_cpu_write(cur_csd_func, csd->func);
        __this_cpu_write(cur_csd_info, csd->info);
        smp_wmb(); /* func and info before csd. */
        __this_cpu_write(cur_csd, csd);
        smp_mb(); /* Update cur_csd before function call. */
                  /* Or before unlock, as the case may be. */
}

static __always_inline void csd_lock_record(call_single_data_t *csd)
{
        if (static_branch_unlikely(&csdlock_debug_enabled))
                __csd_lock_record(csd);
}

static int csd_lock_wait_getcpu(call_single_data_t *csd)
{
        unsigned int csd_type;

        csd_type = CSD_TYPE(csd);
        if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
                return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
        return -1;
}

/*
 * Complain if too much time spent waiting.  Note that only
 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
 * so waiting on other types gets much less information.
 */
static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
{
        int cpu = -1;
        int cpux;
        bool firsttime;
        u64 ts2, ts_delta;
        call_single_data_t *cpu_cur_csd;
        unsigned int flags = READ_ONCE(csd->node.u_flags);
        unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;

        if (!(flags & CSD_FLAG_LOCK)) {
                if (!unlikely(*bug_id))
                        return true;
                cpu = csd_lock_wait_getcpu(csd);
                pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
                         *bug_id, raw_smp_processor_id(), cpu);
                return true;
        }

        ts2 = sched_clock();
        /* How long since we last checked for a stuck CSD lock.*/
        ts_delta = ts2 - *ts1;
        if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
                return false;

        firsttime = !*bug_id;
        if (firsttime)
                *bug_id = atomic_inc_return(&csd_bug_count);
        cpu = csd_lock_wait_getcpu(csd);
        if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
                cpux = 0;
        else
                cpux = cpu;
        cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
        /* How long since this CSD lock was stuck. */
        ts_delta = ts2 - ts0;
        pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
                 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta,
                 cpu, csd->func, csd->info);
        /*
         * If the CSD lock is still stuck after 5 minutes, it is unlikely
         * to become unstuck. Use a signed comparison to avoid triggering
         * on underflows when the TSC is out of sync between sockets.
         */
        BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC));
        if (cpu_cur_csd && csd != cpu_cur_csd) {
                pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
                         *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
                         READ_ONCE(per_cpu(cur_csd_info, cpux)));
        } else {
                pr_alert("\tcsd: CSD lock (#%d) %s.\n",
                         *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
        }
        if (cpu >= 0) {
                if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
                        dump_cpu_task(cpu);
                if (!cpu_cur_csd) {
                        pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
                        arch_send_call_function_single_ipi(cpu);
                }
        }
        if (firsttime)
                dump_stack();
        *ts1 = ts2;

        return false;
}

/*
 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
 *
 * For non-synchronous ipi calls the csd can still be in use by the
 * previous function call. For multi-cpu calls its even more interesting
 * as we'll have to ensure no other cpu is observing our csd.
 */
static void __csd_lock_wait(call_single_data_t *csd)
{
        int bug_id = 0;
        u64 ts0, ts1;

        ts1 = ts0 = sched_clock();
        for (;;) {
                if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
                        break;
                cpu_relax();
        }
        smp_acquire__after_ctrl_dep();
}

static __always_inline void csd_lock_wait(call_single_data_t *csd)
{
        if (static_branch_unlikely(&csdlock_debug_enabled)) {
                __csd_lock_wait(csd);
                return;
        }

        smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
#else
static void csd_lock_record(call_single_data_t *csd)
{
}

static __always_inline void csd_lock_wait(call_single_data_t *csd)
{
        smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
#endif

static __always_inline void csd_lock(call_single_data_t *csd)
{
        csd_lock_wait(csd);
        csd->node.u_flags |= CSD_FLAG_LOCK;

        /*
         * prevent CPU from reordering the above assignment
         * to ->flags with any subsequent assignments to other
         * fields of the specified call_single_data_t structure:
         */
        smp_wmb();
}

static __always_inline void csd_unlock(call_single_data_t *csd)
{
        WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));

        /*
         * ensure we're all done before releasing data:
         */
        smp_store_release(&csd->node.u_flags, 0);
}

static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);

void __smp_call_single_queue(int cpu, struct llist_node *node)
{
        /*
         * We have to check the type of the CSD before queueing it, because
         * once queued it can have its flags cleared by
         *   flush_smp_call_function_queue()
         * even if we haven't sent the smp_call IPI yet (e.g. the stopper
         * executes migration_cpu_stop() on the remote CPU).
         */
        if (trace_csd_queue_cpu_enabled()) {
                call_single_data_t *csd;
                smp_call_func_t func;

                csd = container_of(node, call_single_data_t, node.llist);
                func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
                        sched_ttwu_pending : csd->func;

                trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
        }

        /*
         * The list addition should be visible to the target CPU when it pops
         * the head of the list to pull the entry off it in the IPI handler
         * because of normal cache coherency rules implied by the underlying
         * llist ops.
         *
         * If IPIs can go out of order to the cache coherency protocol
         * in an architecture, sufficient synchronisation should be added
         * to arch code to make it appear to obey cache coherency WRT
         * locking and barrier primitives. Generic code isn't really
         * equipped to do the right thing...
         */
        if (llist_add(node, &per_cpu(call_single_queue, cpu)))
                send_call_function_single_ipi(cpu);
}

/*
 * Insert a previously allocated call_single_data_t element
 * for execution on the given CPU. data must already have
 * ->func, ->info, and ->flags set.
 */
static int generic_exec_single(int cpu, call_single_data_t *csd)
{
        if (cpu == smp_processor_id()) {
                smp_call_func_t func = csd->func;
                void *info = csd->info;
                unsigned long flags;

                /*
                 * We can unlock early even for the synchronous on-stack case,
                 * since we're doing this from the same CPU..
                 */
                csd_lock_record(csd);
                csd_unlock(csd);
                local_irq_save(flags);
                csd_do_func(func, info, NULL);
                csd_lock_record(NULL);
                local_irq_restore(flags);
                return 0;
        }

        if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
                csd_unlock(csd);
                return -ENXIO;
        }

        __smp_call_single_queue(cpu, &csd->node.llist);

        return 0;
}

/**
 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
 *
 * Invoked by arch to handle an IPI for call function single.
 * Must be called with interrupts disabled.
 */
void generic_smp_call_function_single_interrupt(void)
{
        __flush_smp_call_function_queue(true);
}

/**
 * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
 *
 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
 *                      offline CPU. Skip this check if set to 'false'.
 *
 * Flush any pending smp-call-function callbacks queued on this CPU. This is
 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
 * to ensure that all pending IPI callbacks are run before it goes completely
 * offline.
 *
 * Loop through the call_single_queue and run all the queued callbacks.
 * Must be called with interrupts disabled.
 */
static void __flush_smp_call_function_queue(bool warn_cpu_offline)
{
        call_single_data_t *csd, *csd_next;
        struct llist_node *entry, *prev;
        struct llist_head *head;
        static bool warned;
        atomic_t *tbt;

        lockdep_assert_irqs_disabled();

        /* Allow waiters to send backtrace NMI from here onwards */
        tbt = this_cpu_ptr(&trigger_backtrace);
        atomic_set_release(tbt, 1);

        head = this_cpu_ptr(&call_single_queue);
        entry = llist_del_all(head);
        entry = llist_reverse_order(entry);

        /* There shouldn't be any pending callbacks on an offline CPU. */
        if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
                     !warned && entry != NULL)) {
                warned = true;
                WARN(1, "IPI on offline CPU %d\n", smp_processor_id());

                /*
                 * We don't have to use the _safe() variant here
                 * because we are not invoking the IPI handlers yet.
                 */
                llist_for_each_entry(csd, entry, node.llist) {
                        switch (CSD_TYPE(csd)) {
                        case CSD_TYPE_ASYNC:
                        case CSD_TYPE_SYNC:
                        case CSD_TYPE_IRQ_WORK:
                                pr_warn("IPI callback %pS sent to offline CPU\n",
                                        csd->func);
                                break;

                        case CSD_TYPE_TTWU:
                                pr_warn("IPI task-wakeup sent to offline CPU\n");
                                break;

                        default:
                                pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
                                        CSD_TYPE(csd));
                                break;
                        }
                }
        }

        /*
         * First; run all SYNC callbacks, people are waiting for us.
         */
        prev = NULL;
        llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
                /* Do we wait until *after* callback? */
                if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
                        smp_call_func_t func = csd->func;
                        void *info = csd->info;

                        if (prev) {
                                prev->next = &csd_next->node.llist;
                        } else {
                                entry = &csd_next->node.llist;
                        }

                        csd_lock_record(csd);
                        csd_do_func(func, info, csd);
                        csd_unlock(csd);
                        csd_lock_record(NULL);
                } else {
                        prev = &csd->node.llist;
                }
        }

        if (!entry)
                return;

        /*
         * Second; run all !SYNC callbacks.
         */
        prev = NULL;
        llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
                int type = CSD_TYPE(csd);

                if (type != CSD_TYPE_TTWU) {
                        if (prev) {
                                prev->next = &csd_next->node.llist;
                        } else {
                                entry = &csd_next->node.llist;
                        }

                        if (type == CSD_TYPE_ASYNC) {
                                smp_call_func_t func = csd->func;
                                void *info = csd->info;

                                csd_lock_record(csd);
                                csd_unlock(csd);
                                csd_do_func(func, info, csd);
                                csd_lock_record(NULL);
                        } else if (type == CSD_TYPE_IRQ_WORK) {
                                irq_work_single(csd);
                        }

                } else {
                        prev = &csd->node.llist;
                }
        }

        /*
         * Third; only CSD_TYPE_TTWU is left, issue those.
         */
        if (entry) {
                csd = llist_entry(entry, typeof(*csd), node.llist);
                csd_do_func(sched_ttwu_pending, entry, csd);
        }
}


/**
 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
 *                                   from task context (idle, migration thread)
 *
 * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
 * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
 * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
 * handle queued SMP function calls before scheduling.
 *
 * The migration thread has to ensure that an eventually pending wakeup has
 * been handled before it migrates a task.
 */
void flush_smp_call_function_queue(void)
{
        unsigned int was_pending;
        unsigned long flags;

        if (llist_empty(this_cpu_ptr(&call_single_queue)))
                return;

        local_irq_save(flags);
        /* Get the already pending soft interrupts for RT enabled kernels */
        was_pending = local_softirq_pending();
        __flush_smp_call_function_queue(true);
        if (local_softirq_pending())
                do_softirq_post_smp_call_flush(was_pending);

        local_irq_restore(flags);
}

/*
 * smp_call_function_single - Run a function on a specific CPU
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait until function has completed on other CPUs.
 *
 * Returns 0 on success, else a negative status code.
 */
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
                             int wait)
{
        call_single_data_t *csd;
        call_single_data_t csd_stack = {
                .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
        };
        int this_cpu;
        int err;

        /*
         * prevent preemption and reschedule on another processor,
         * as well as CPU removal
         */
        this_cpu = get_cpu();

        /*
         * Can deadlock when called with interrupts disabled.
         * We allow cpu's that are not yet online though, as no one else can
         * send smp call function interrupt to this cpu and as such deadlocks
         * can't happen.
         */
        WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
                     && !oops_in_progress);

        /*
         * When @wait we can deadlock when we interrupt between llist_add() and
         * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
         * csd_lock() on because the interrupt context uses the same csd
         * storage.
         */
        WARN_ON_ONCE(!in_task());

        csd = &csd_stack;
        if (!wait) {
                csd = this_cpu_ptr(&csd_data);
                csd_lock(csd);
        }

        csd->func = func;
        csd->info = info;
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
        csd->node.src = smp_processor_id();
        csd->node.dst = cpu;
#endif

        err = generic_exec_single(cpu, csd);

        if (wait)
                csd_lock_wait(csd);

        put_cpu();

        return err;
}
EXPORT_SYMBOL(smp_call_function_single);

/**
 * smp_call_function_single_async() - Run an asynchronous function on a
 *                                  specific CPU.
 * @cpu: The CPU to run on.
 * @csd: Pre-allocated and setup data structure
 *
 * Like smp_call_function_single(), but the call is asynchonous and
 * can thus be done from contexts with disabled interrupts.
 *
 * The caller passes his own pre-allocated data structure
 * (ie: embedded in an object) and is responsible for synchronizing it
 * such that the IPIs performed on the @csd are strictly serialized.
 *
 * If the function is called with one csd which has not yet been
 * processed by previous call to smp_call_function_single_async(), the
 * function will return immediately with -EBUSY showing that the csd
 * object is still in progress.
 *
 * NOTE: Be careful, there is unfortunately no current debugging facility to
 * validate the correctness of this serialization.
 *
 * Return: %0 on success or negative errno value on error
 */
int smp_call_function_single_async(int cpu, call_single_data_t *csd)
{
        int err = 0;

        preempt_disable();

        if (csd->node.u_flags & CSD_FLAG_LOCK) {
                err = -EBUSY;
                goto out;
        }

        csd->node.u_flags = CSD_FLAG_LOCK;
        smp_wmb();

        err = generic_exec_single(cpu, csd);

out:
        preempt_enable();

        return err;
}
EXPORT_SYMBOL_GPL(smp_call_function_single_async);

/*
 * smp_call_function_any - Run a function on any of the given cpus
 * @mask: The mask of cpus it can run on.
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait until function has completed.
 *
 * Returns 0 on success, else a negative status code (if no cpus were online).
 *
 * Selection preference:
 *        1) current cpu if in @mask
 *        2) any cpu of current node if in @mask
 *        3) any other online cpu in @mask
 */
int smp_call_function_any(const struct cpumask *mask,
                          smp_call_func_t func, void *info, int wait)
{
        unsigned int cpu;
        const struct cpumask *nodemask;
        int ret;

        /* Try for same CPU (cheapest) */
        cpu = get_cpu();
        if (cpumask_test_cpu(cpu, mask))
                goto call;

        /* Try for same node. */
        nodemask = cpumask_of_node(cpu_to_node(cpu));
        for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
             cpu = cpumask_next_and(cpu, nodemask, mask)) {
                if (cpu_online(cpu))
                        goto call;
        }

        /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
        cpu = cpumask_any_and(mask, cpu_online_mask);
call:
        ret = smp_call_function_single(cpu, func, info, wait);
        put_cpu();
        return ret;
}
EXPORT_SYMBOL_GPL(smp_call_function_any);

/*
 * Flags to be used as scf_flags argument of smp_call_function_many_cond().
 *
 * %SCF_WAIT:                Wait until function execution is completed
 * %SCF_RUN_LOCAL:        Run also locally if local cpu is set in cpumask
 */
#define SCF_WAIT        (1U << 0)
#define SCF_RUN_LOCAL        (1U << 1)

static void smp_call_function_many_cond(const struct cpumask *mask,
                                        smp_call_func_t func, void *info,
                                        unsigned int scf_flags,
                                        smp_cond_func_t cond_func)
{
        int cpu, last_cpu, this_cpu = smp_processor_id();
        struct call_function_data *cfd;
        bool wait = scf_flags & SCF_WAIT;
        int nr_cpus = 0;
        bool run_remote = false;
        bool run_local = false;

        lockdep_assert_preemption_disabled();

        /*
         * Can deadlock when called with interrupts disabled.
         * We allow cpu's that are not yet online though, as no one else can
         * send smp call function interrupt to this cpu and as such deadlocks
         * can't happen.
         */
        if (cpu_online(this_cpu) && !oops_in_progress &&
            !early_boot_irqs_disabled)
                lockdep_assert_irqs_enabled();

        /*
         * When @wait we can deadlock when we interrupt between llist_add() and
         * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
         * csd_lock() on because the interrupt context uses the same csd
         * storage.
         */
        WARN_ON_ONCE(!in_task());

        /* Check if we need local execution. */
        if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
                run_local = true;

        /* Check if we need remote execution, i.e., any CPU excluding this one. */
        cpu = cpumask_first_and(mask, cpu_online_mask);
        if (cpu == this_cpu)
                cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
        if (cpu < nr_cpu_ids)
                run_remote = true;

        if (run_remote) {
                cfd = this_cpu_ptr(&cfd_data);
                cpumask_and(cfd->cpumask, mask, cpu_online_mask);
                __cpumask_clear_cpu(this_cpu, cfd->cpumask);

                cpumask_clear(cfd->cpumask_ipi);
                for_each_cpu(cpu, cfd->cpumask) {
                        call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);

                        if (cond_func && !cond_func(cpu, info)) {
                                __cpumask_clear_cpu(cpu, cfd->cpumask);
                                continue;
                        }

                        csd_lock(csd);
                        if (wait)
                                csd->node.u_flags |= CSD_TYPE_SYNC;
                        csd->func = func;
                        csd->info = info;
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
                        csd->node.src = smp_processor_id();
                        csd->node.dst = cpu;
#endif
                        trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);

                        if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
                                __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
                                nr_cpus++;
                                last_cpu = cpu;
                        }
                }

                /*
                 * Choose the most efficient way to send an IPI. Note that the
                 * number of CPUs might be zero due to concurrent changes to the
                 * provided mask.
                 */
                if (nr_cpus == 1)
                        send_call_function_single_ipi(last_cpu);
                else if (likely(nr_cpus > 1))
                        send_call_function_ipi_mask(cfd->cpumask_ipi);
        }

        if (run_local && (!cond_func || cond_func(this_cpu, info))) {
                unsigned long flags;

                local_irq_save(flags);
                csd_do_func(func, info, NULL);
                local_irq_restore(flags);
        }

        if (run_remote && wait) {
                for_each_cpu(cpu, cfd->cpumask) {
                        call_single_data_t *csd;

                        csd = per_cpu_ptr(cfd->csd, cpu);
                        csd_lock_wait(csd);
                }
        }
}

/**
 * smp_call_function_many(): Run a function on a set of CPUs.
 * @mask: The set of cpus to run on (only runs on online subset).
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
 *        (atomically) until function has completed on other CPUs. If
 *        %SCF_RUN_LOCAL is set, the function will also be run locally
 *        if the local CPU is set in the @cpumask.
 *
 * If @wait is true, then returns once @func has returned.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler. Preemption
 * must be disabled when calling this function.
 */
void smp_call_function_many(const struct cpumask *mask,
                            smp_call_func_t func, void *info, bool wait)
{
        smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
}
EXPORT_SYMBOL(smp_call_function_many);

/**
 * smp_call_function(): Run a function on all other CPUs.
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed
 *        on other CPUs.
 *
 * Returns 0.
 *
 * If @wait is true, then returns once @func has returned; otherwise
 * it returns just before the target cpu calls @func.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler.
 */
void smp_call_function(smp_call_func_t func, void *info, int wait)
{
        preempt_disable();
        smp_call_function_many(cpu_online_mask, func, info, wait);
        preempt_enable();
}
EXPORT_SYMBOL(smp_call_function);

/* Setup configured maximum number of CPUs to activate */
unsigned int setup_max_cpus = NR_CPUS;
EXPORT_SYMBOL(setup_max_cpus);


/*
 * Setup routine for controlling SMP activation
 *
 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
 * activation entirely (the MPS table probe still happens, though).
 *
 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
 * greater than 0, limits the maximum number of CPUs activated in
 * SMP mode to <NUM>.
 */

void __weak __init arch_disable_smp_support(void) { }

static int __init nosmp(char *str)
{
        setup_max_cpus = 0;
        arch_disable_smp_support();

        return 0;
}

early_param("nosmp", nosmp);

/* this is hard limit */
static int __init nrcpus(char *str)
{
        int nr_cpus;

        if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
                set_nr_cpu_ids(nr_cpus);

        return 0;
}

early_param("nr_cpus", nrcpus);

static int __init maxcpus(char *str)
{
        get_option(&str, &setup_max_cpus);
        if (setup_max_cpus == 0)
                arch_disable_smp_support();

        return 0;
}

early_param("maxcpus", maxcpus);

#if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS)
/* Setup number of possible processor ids */
unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
#endif

/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
void __init setup_nr_cpu_ids(void)
{
        set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
}

/* Called by boot processor to activate the rest. */
void __init smp_init(void)
{
        int num_nodes, num_cpus;

        idle_threads_init();
        cpuhp_threads_init();

        pr_info("Bringing up secondary CPUs ...\n");

        bringup_nonboot_cpus(setup_max_cpus);

        num_nodes = num_online_nodes();
        num_cpus  = num_online_cpus();
        pr_info("Brought up %d node%s, %d CPU%s\n",
                num_nodes, (num_nodes > 1 ? "s" : ""),
                num_cpus,  (num_cpus  > 1 ? "s" : ""));

        /* Any cleanup work */
        smp_cpus_done(setup_max_cpus);
}

/*
 * on_each_cpu_cond(): Call a function on each processor for which
 * the supplied function cond_func returns true, optionally waiting
 * for all the required CPUs to finish. This may include the local
 * processor.
 * @cond_func:        A callback function that is passed a cpu id and
 *                the info parameter. The function is called
 *                with preemption disabled. The function should
 *                return a blooean value indicating whether to IPI
 *                the specified CPU.
 * @func:        The function to run on all applicable CPUs.
 *                This must be fast and non-blocking.
 * @info:        An arbitrary pointer to pass to both functions.
 * @wait:        If true, wait (atomically) until function has
 *                completed on other CPUs.
 *
 * Preemption is disabled to protect against CPUs going offline but not online.
 * CPUs going online during the call will not be seen or sent an IPI.
 *
 * You must not call this function with disabled interrupts or
 * from a hardware interrupt handler or from a bottom half handler.
 */
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
                           void *info, bool wait, const struct cpumask *mask)
{
        unsigned int scf_flags = SCF_RUN_LOCAL;

        if (wait)
                scf_flags |= SCF_WAIT;

        preempt_disable();
        smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
        preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond_mask);

static void do_nothing(void *unused)
{
}

/**
 * kick_all_cpus_sync - Force all cpus out of idle
 *
 * Used to synchronize the update of pm_idle function pointer. It's
 * called after the pointer is updated and returns after the dummy
 * callback function has been executed on all cpus. The execution of
 * the function can only happen on the remote cpus after they have
 * left the idle function which had been called via pm_idle function
 * pointer. So it's guaranteed that nothing uses the previous pointer
 * anymore.
 */
void kick_all_cpus_sync(void)
{
        /* Make sure the change is visible before we kick the cpus */
        smp_mb();
        smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(kick_all_cpus_sync);

/**
 * wake_up_all_idle_cpus - break all cpus out of idle
 * wake_up_all_idle_cpus try to break all cpus which is in idle state even
 * including idle polling cpus, for non-idle cpus, we will do nothing
 * for them.
 */
void wake_up_all_idle_cpus(void)
{
        int cpu;

        for_each_possible_cpu(cpu) {
                preempt_disable();
                if (cpu != smp_processor_id() && cpu_online(cpu))
                        wake_up_if_idle(cpu);
                preempt_enable();
        }
}
EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);

/**
 * struct smp_call_on_cpu_struct - Call a function on a specific CPU
 * @work: &work_struct
 * @done: &completion to signal
 * @func: function to call
 * @data: function's data argument
 * @ret: return value from @func
 * @cpu: target CPU (%-1 for any CPU)
 *
 * Used to call a function on a specific cpu and wait for it to return.
 * Optionally make sure the call is done on a specified physical cpu via vcpu
 * pinning in order to support virtualized environments.
 */
struct smp_call_on_cpu_struct {
        struct work_struct        work;
        struct completion        done;
        int                        (*func)(void *);
        void                        *data;
        int                        ret;
        int                        cpu;
};

static void smp_call_on_cpu_callback(struct work_struct *work)
{
        struct smp_call_on_cpu_struct *sscs;

        sscs = container_of(work, struct smp_call_on_cpu_struct, work);
        if (sscs->cpu >= 0)
                hypervisor_pin_vcpu(sscs->cpu);
        sscs->ret = sscs->func(sscs->data);
        if (sscs->cpu >= 0)
                hypervisor_pin_vcpu(-1);

        complete(&sscs->done);
}

int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{
        struct smp_call_on_cpu_struct sscs = {
                .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
                .func = func,
                .data = par,
                .cpu  = phys ? cpu : -1,
        };

        INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);

        if (cpu >= nr_cpu_ids || !cpu_online(cpu))
                return -ENXIO;

        queue_work_on(cpu, system_wq, &sscs.work);
        wait_for_completion(&sscs.done);

        return sscs.ret;
}
EXPORT_SYMBOL_GPL(smp_call_on_cpu);































































































































































































































































































































































































































































































































































































































































    4 























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2011 IBM Corporation
 *
 * Author:
 * Mimi Zohar <zohar@us.ibm.com>
 */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/xattr.h>
#include <linux/magic.h>
#include <linux/ima.h>
#include <linux/evm.h>
#include <linux/fsverity.h>
#include <keys/system_keyring.h>
#include <uapi/linux/fsverity.h>

#include "ima.h"

#ifdef CONFIG_IMA_APPRAISE_BOOTPARAM
static char *ima_appraise_cmdline_default __initdata;
core_param(ima_appraise, ima_appraise_cmdline_default, charp, 0);

void __init ima_appraise_parse_cmdline(void)
{
        const char *str = ima_appraise_cmdline_default;
        bool sb_state = arch_ima_get_secureboot();
        int appraisal_state = ima_appraise;

        if (!str)
                return;

        if (strncmp(str, "off", 3) == 0)
                appraisal_state = 0;
        else if (strncmp(str, "log", 3) == 0)
                appraisal_state = IMA_APPRAISE_LOG;
        else if (strncmp(str, "fix", 3) == 0)
                appraisal_state = IMA_APPRAISE_FIX;
        else if (strncmp(str, "enforce", 7) == 0)
                appraisal_state = IMA_APPRAISE_ENFORCE;
        else
                pr_err("invalid \"%s\" appraise option", str);

        /* If appraisal state was changed, but secure boot is enabled,
         * keep its default */
        if (sb_state) {
                if (!(appraisal_state & IMA_APPRAISE_ENFORCE))
                        pr_info("Secure boot enabled: ignoring ima_appraise=%s option",
                                str);
        } else {
                ima_appraise = appraisal_state;
        }
}
#endif

/*
 * is_ima_appraise_enabled - return appraise status
 *
 * Only return enabled, if not in ima_appraise="fix" or "log" modes.
 */
bool is_ima_appraise_enabled(void)
{
        return ima_appraise & IMA_APPRAISE_ENFORCE;
}

/*
 * ima_must_appraise - set appraise flag
 *
 * Return 1 to appraise or hash
 */
int ima_must_appraise(struct mnt_idmap *idmap, struct inode *inode,
                      int mask, enum ima_hooks func)
{
        u32 secid;

        if (!ima_appraise)
                return 0;

        security_current_getsecid_subj(&secid);
        return ima_match_policy(idmap, inode, current_cred(), secid,
                                func, mask, IMA_APPRAISE | IMA_HASH, NULL,
                                NULL, NULL, NULL);
}

static int ima_fix_xattr(struct dentry *dentry, struct ima_iint_cache *iint)
{
        int rc, offset;
        u8 algo = iint->ima_hash->algo;

        if (algo <= HASH_ALGO_SHA1) {
                offset = 1;
                iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST;
        } else {
                offset = 0;
                iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
                iint->ima_hash->xattr.ng.algo = algo;
        }
        rc = __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_IMA,
                                   &iint->ima_hash->xattr.data[offset],
                                   (sizeof(iint->ima_hash->xattr) - offset) +
                                   iint->ima_hash->length, 0);
        return rc;
}

/* Return specific func appraised cached result */
enum integrity_status ima_get_cache_status(struct ima_iint_cache *iint,
                                           enum ima_hooks func)
{
        switch (func) {
        case MMAP_CHECK:
        case MMAP_CHECK_REQPROT:
                return iint->ima_mmap_status;
        case BPRM_CHECK:
                return iint->ima_bprm_status;
        case CREDS_CHECK:
                return iint->ima_creds_status;
        case FILE_CHECK:
        case POST_SETATTR:
                return iint->ima_file_status;
        case MODULE_CHECK ... MAX_CHECK - 1:
        default:
                return iint->ima_read_status;
        }
}

static void ima_set_cache_status(struct ima_iint_cache *iint,
                                 enum ima_hooks func,
                                 enum integrity_status status)
{
        switch (func) {
        case MMAP_CHECK:
        case MMAP_CHECK_REQPROT:
                iint->ima_mmap_status = status;
                break;
        case BPRM_CHECK:
                iint->ima_bprm_status = status;
                break;
        case CREDS_CHECK:
                iint->ima_creds_status = status;
                break;
        case FILE_CHECK:
        case POST_SETATTR:
                iint->ima_file_status = status;
                break;
        case MODULE_CHECK ... MAX_CHECK - 1:
        default:
                iint->ima_read_status = status;
                break;
        }
}

static void ima_cache_flags(struct ima_iint_cache *iint, enum ima_hooks func)
{
        switch (func) {
        case MMAP_CHECK:
        case MMAP_CHECK_REQPROT:
                iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED);
                break;
        case BPRM_CHECK:
                iint->flags |= (IMA_BPRM_APPRAISED | IMA_APPRAISED);
                break;
        case CREDS_CHECK:
                iint->flags |= (IMA_CREDS_APPRAISED | IMA_APPRAISED);
                break;
        case FILE_CHECK:
        case POST_SETATTR:
                iint->flags |= (IMA_FILE_APPRAISED | IMA_APPRAISED);
                break;
        case MODULE_CHECK ... MAX_CHECK - 1:
        default:
                iint->flags |= (IMA_READ_APPRAISED | IMA_APPRAISED);
                break;
        }
}

enum hash_algo ima_get_hash_algo(const struct evm_ima_xattr_data *xattr_value,
                                 int xattr_len)
{
        struct signature_v2_hdr *sig;
        enum hash_algo ret;

        if (!xattr_value || xattr_len < 2)
                /* return default hash algo */
                return ima_hash_algo;

        switch (xattr_value->type) {
        case IMA_VERITY_DIGSIG:
                sig = (typeof(sig))xattr_value;
                if (sig->version != 3 || xattr_len <= sizeof(*sig) ||
                    sig->hash_algo >= HASH_ALGO__LAST)
                        return ima_hash_algo;
                return sig->hash_algo;
        case EVM_IMA_XATTR_DIGSIG:
                sig = (typeof(sig))xattr_value;
                if (sig->version != 2 || xattr_len <= sizeof(*sig)
                    || sig->hash_algo >= HASH_ALGO__LAST)
                        return ima_hash_algo;
                return sig->hash_algo;
        case IMA_XATTR_DIGEST_NG:
                /* first byte contains algorithm id */
                ret = xattr_value->data[0];
                if (ret < HASH_ALGO__LAST)
                        return ret;
                break;
        case IMA_XATTR_DIGEST:
                /* this is for backward compatibility */
                if (xattr_len == 21) {
                        unsigned int zero = 0;
                        if (!memcmp(&xattr_value->data[16], &zero, 4))
                                return HASH_ALGO_MD5;
                        else
                                return HASH_ALGO_SHA1;
                } else if (xattr_len == 17)
                        return HASH_ALGO_MD5;
                break;
        }

        /* return default hash algo */
        return ima_hash_algo;
}

int ima_read_xattr(struct dentry *dentry,
                   struct evm_ima_xattr_data **xattr_value, int xattr_len)
{
        int ret;

        ret = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_IMA,
                                 (char **)xattr_value, xattr_len, GFP_NOFS);
        if (ret == -EOPNOTSUPP)
                ret = 0;
        return ret;
}

/*
 * calc_file_id_hash - calculate the hash of the ima_file_id struct data
 * @type: xattr type [enum evm_ima_xattr_type]
 * @algo: hash algorithm [enum hash_algo]
 * @digest: pointer to the digest to be hashed
 * @hash: (out) pointer to the hash
 *
 * IMA signature version 3 disambiguates the data that is signed by
 * indirectly signing the hash of the ima_file_id structure data.
 *
 * Signing the ima_file_id struct is currently only supported for
 * IMA_VERITY_DIGSIG type xattrs.
 *
 * Return 0 on success, error code otherwise.
 */
static int calc_file_id_hash(enum evm_ima_xattr_type type,
                             enum hash_algo algo, const u8 *digest,
                             struct ima_digest_data *hash)
{
        struct ima_file_id file_id = {
                .hash_type = IMA_VERITY_DIGSIG, .hash_algorithm = algo};
        unsigned int unused = HASH_MAX_DIGESTSIZE - hash_digest_size[algo];

        if (type != IMA_VERITY_DIGSIG)
                return -EINVAL;

        memcpy(file_id.hash, digest, hash_digest_size[algo]);

        hash->algo = algo;
        hash->length = hash_digest_size[algo];

        return ima_calc_buffer_hash(&file_id, sizeof(file_id) - unused, hash);
}

/*
 * xattr_verify - verify xattr digest or signature
 *
 * Verify whether the hash or signature matches the file contents.
 *
 * Return 0 on success, error code otherwise.
 */
static int xattr_verify(enum ima_hooks func, struct ima_iint_cache *iint,
                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
                        enum integrity_status *status, const char **cause)
{
        struct ima_max_digest_data hash;
        struct signature_v2_hdr *sig;
        int rc = -EINVAL, hash_start = 0;
        int mask;

        switch (xattr_value->type) {
        case IMA_XATTR_DIGEST_NG:
                /* first byte contains algorithm id */
                hash_start = 1;
                fallthrough;
        case IMA_XATTR_DIGEST:
                if (*status != INTEGRITY_PASS_IMMUTABLE) {
                        if (iint->flags & IMA_DIGSIG_REQUIRED) {
                                if (iint->flags & IMA_VERITY_REQUIRED)
                                        *cause = "verity-signature-required";
                                else
                                        *cause = "IMA-signature-required";
                                *status = INTEGRITY_FAIL;
                                break;
                        }
                        clear_bit(IMA_DIGSIG, &iint->atomic_flags);
                } else {
                        set_bit(IMA_DIGSIG, &iint->atomic_flags);
                }
                if (xattr_len - sizeof(xattr_value->type) - hash_start >=
                                iint->ima_hash->length)
                        /*
                         * xattr length may be longer. md5 hash in previous
                         * version occupied 20 bytes in xattr, instead of 16
                         */
                        rc = memcmp(&xattr_value->data[hash_start],
                                    iint->ima_hash->digest,
                                    iint->ima_hash->length);
                else
                        rc = -EINVAL;
                if (rc) {
                        *cause = "invalid-hash";
                        *status = INTEGRITY_FAIL;
                        break;
                }
                *status = INTEGRITY_PASS;
                break;
        case EVM_IMA_XATTR_DIGSIG:
                set_bit(IMA_DIGSIG, &iint->atomic_flags);

                mask = IMA_DIGSIG_REQUIRED | IMA_VERITY_REQUIRED;
                if ((iint->flags & mask) == mask) {
                        *cause = "verity-signature-required";
                        *status = INTEGRITY_FAIL;
                        break;
                }

                sig = (typeof(sig))xattr_value;
                if (sig->version >= 3) {
                        *cause = "invalid-signature-version";
                        *status = INTEGRITY_FAIL;
                        break;
                }
                rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
                                             (const char *)xattr_value,
                                             xattr_len,
                                             iint->ima_hash->digest,
                                             iint->ima_hash->length);
                if (rc == -EOPNOTSUPP) {
                        *status = INTEGRITY_UNKNOWN;
                        break;
                }
                if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
                    func == KEXEC_KERNEL_CHECK)
                        rc = integrity_digsig_verify(INTEGRITY_KEYRING_PLATFORM,
                                                     (const char *)xattr_value,
                                                     xattr_len,
                                                     iint->ima_hash->digest,
                                                     iint->ima_hash->length);
                if (rc) {
                        *cause = "invalid-signature";
                        *status = INTEGRITY_FAIL;
                } else {
                        *status = INTEGRITY_PASS;
                }
                break;
        case IMA_VERITY_DIGSIG:
                set_bit(IMA_DIGSIG, &iint->atomic_flags);

                if (iint->flags & IMA_DIGSIG_REQUIRED) {
                        if (!(iint->flags & IMA_VERITY_REQUIRED)) {
                                *cause = "IMA-signature-required";
                                *status = INTEGRITY_FAIL;
                                break;
                        }
                }

                sig = (typeof(sig))xattr_value;
                if (sig->version != 3) {
                        *cause = "invalid-signature-version";
                        *status = INTEGRITY_FAIL;
                        break;
                }

                rc = calc_file_id_hash(IMA_VERITY_DIGSIG, iint->ima_hash->algo,
                                       iint->ima_hash->digest, &hash.hdr);
                if (rc) {
                        *cause = "sigv3-hashing-error";
                        *status = INTEGRITY_FAIL;
                        break;
                }

                rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
                                             (const char *)xattr_value,
                                             xattr_len, hash.digest,
                                             hash.hdr.length);
                if (rc) {
                        *cause = "invalid-verity-signature";
                        *status = INTEGRITY_FAIL;
                } else {
                        *status = INTEGRITY_PASS;
                }

                break;
        default:
                *status = INTEGRITY_UNKNOWN;
                *cause = "unknown-ima-data";
                break;
        }

        return rc;
}

/*
 * modsig_verify - verify modsig signature
 *
 * Verify whether the signature matches the file contents.
 *
 * Return 0 on success, error code otherwise.
 */
static int modsig_verify(enum ima_hooks func, const struct modsig *modsig,
                         enum integrity_status *status, const char **cause)
{
        int rc;

        rc = integrity_modsig_verify(INTEGRITY_KEYRING_IMA, modsig);
        if (IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING) && rc &&
            func == KEXEC_KERNEL_CHECK)
                rc = integrity_modsig_verify(INTEGRITY_KEYRING_PLATFORM,
                                             modsig);
        if (rc) {
                *cause = "invalid-signature";
                *status = INTEGRITY_FAIL;
        } else {
                *status = INTEGRITY_PASS;
        }

        return rc;
}

/*
 * ima_check_blacklist - determine if the binary is blacklisted.
 *
 * Add the hash of the blacklisted binary to the measurement list, based
 * on policy.
 *
 * Returns -EPERM if the hash is blacklisted.
 */
int ima_check_blacklist(struct ima_iint_cache *iint,
                        const struct modsig *modsig, int pcr)
{
        enum hash_algo hash_algo;
        const u8 *digest = NULL;
        u32 digestsize = 0;
        int rc = 0;

        if (!(iint->flags & IMA_CHECK_BLACKLIST))
                return 0;

        if (iint->flags & IMA_MODSIG_ALLOWED && modsig) {
                ima_get_modsig_digest(modsig, &hash_algo, &digest, &digestsize);

                rc = is_binary_blacklisted(digest, digestsize);
        } else if (iint->flags & IMA_DIGSIG_REQUIRED && iint->ima_hash)
                rc = is_binary_blacklisted(iint->ima_hash->digest, iint->ima_hash->length);

        if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
                process_buffer_measurement(&nop_mnt_idmap, NULL, digest, digestsize,
                                           "blacklisted-hash", NONE,
                                           pcr, NULL, false, NULL, 0);

        return rc;
}

/*
 * ima_appraise_measurement - appraise file measurement
 *
 * Call evm_verifyxattr() to verify the integrity of 'security.ima'.
 * Assuming success, compare the xattr hash with the collected measurement.
 *
 * Return 0 on success, error code otherwise
 */
int ima_appraise_measurement(enum ima_hooks func, struct ima_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
                             int xattr_len, const struct modsig *modsig)
{
        static const char op[] = "appraise_data";
        const char *cause = "unknown";
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = d_backing_inode(dentry);
        enum integrity_status status = INTEGRITY_UNKNOWN;
        int rc = xattr_len;
        bool try_modsig = iint->flags & IMA_MODSIG_ALLOWED && modsig;

        /* If not appraising a modsig, we need an xattr. */
        if (!(inode->i_opflags & IOP_XATTR) && !try_modsig)
                return INTEGRITY_UNKNOWN;

        /* If reading the xattr failed and there's no modsig, error out. */
        if (rc <= 0 && !try_modsig) {
                if (rc && rc != -ENODATA)
                        goto out;

                if (iint->flags & IMA_DIGSIG_REQUIRED) {
                        if (iint->flags & IMA_VERITY_REQUIRED)
                                cause = "verity-signature-required";
                        else
                                cause = "IMA-signature-required";
                } else {
                        cause = "missing-hash";
                }

                status = INTEGRITY_NOLABEL;
                if (file->f_mode & FMODE_CREATED)
                        iint->flags |= IMA_NEW_FILE;
                if ((iint->flags & IMA_NEW_FILE) &&
                    (!(iint->flags & IMA_DIGSIG_REQUIRED) ||
                     (inode->i_size == 0)))
                        status = INTEGRITY_PASS;
                goto out;
        }

        status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
                                 rc < 0 ? 0 : rc);
        switch (status) {
        case INTEGRITY_PASS:
        case INTEGRITY_PASS_IMMUTABLE:
        case INTEGRITY_UNKNOWN:
                break;
        case INTEGRITY_NOXATTRS:        /* No EVM protected xattrs. */
                /* It's fine not to have xattrs when using a modsig. */
                if (try_modsig)
                        break;
                fallthrough;
        case INTEGRITY_NOLABEL:                /* No security.evm xattr. */
                cause = "missing-HMAC";
                goto out;
        case INTEGRITY_FAIL_IMMUTABLE:
                set_bit(IMA_DIGSIG, &iint->atomic_flags);
                cause = "invalid-fail-immutable";
                goto out;
        case INTEGRITY_FAIL:                /* Invalid HMAC/signature. */
                cause = "invalid-HMAC";
                goto out;
        default:
                WARN_ONCE(true, "Unexpected integrity status %d\n", status);
        }

        if (xattr_value)
                rc = xattr_verify(func, iint, xattr_value, xattr_len, &status,
                                  &cause);

        /*
         * If we have a modsig and either no imasig or the imasig's key isn't
         * known, then try verifying the modsig.
         */
        if (try_modsig &&
            (!xattr_value || xattr_value->type == IMA_XATTR_DIGEST_NG ||
             rc == -ENOKEY))
                rc = modsig_verify(func, modsig, &status, &cause);

out:
        /*
         * File signatures on some filesystems can not be properly verified.
         * When such filesystems are mounted by an untrusted mounter or on a
         * system not willing to accept such a risk, fail the file signature
         * verification.
         */
        if ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) &&
            ((inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) ||
             (iint->flags & IMA_FAIL_UNVERIFIABLE_SIGS))) {
                status = INTEGRITY_FAIL;
                cause = "unverifiable-signature";
                integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
                                    op, cause, rc, 0);
        } else if (status != INTEGRITY_PASS) {
                /* Fix mode, but don't replace file signatures. */
                if ((ima_appraise & IMA_APPRAISE_FIX) && !try_modsig &&
                    (!xattr_value ||
                     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
                        if (!ima_fix_xattr(dentry, iint))
                                status = INTEGRITY_PASS;
                }

                /*
                 * Permit new files with file/EVM portable signatures, but
                 * without data.
                 */
                if (inode->i_size == 0 && iint->flags & IMA_NEW_FILE &&
                    test_bit(IMA_DIGSIG, &iint->atomic_flags)) {
                        status = INTEGRITY_PASS;
                }

                integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
                                    op, cause, rc, 0);
        } else {
                ima_cache_flags(iint, func);
        }

        ima_set_cache_status(iint, func, status);
        return status;
}

/*
 * ima_update_xattr - update 'security.ima' hash value
 */
void ima_update_xattr(struct ima_iint_cache *iint, struct file *file)
{
        struct dentry *dentry = file_dentry(file);
        int rc = 0;

        /* do not collect and update hash for digital signatures */
        if (test_bit(IMA_DIGSIG, &iint->atomic_flags))
                return;

        if ((iint->ima_file_status != INTEGRITY_PASS) &&
            !(iint->flags & IMA_HASH))
                return;

        rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo, NULL);
        if (rc < 0)
                return;

        inode_lock(file_inode(file));
        ima_fix_xattr(dentry, iint);
        inode_unlock(file_inode(file));
}

/**
 * ima_inode_post_setattr - reflect file metadata changes
 * @idmap:  idmap of the mount the inode was found from
 * @dentry: pointer to the affected dentry
 * @ia_valid: for the UID and GID status
 *
 * Changes to a dentry's metadata might result in needing to appraise.
 *
 * This function is called from notify_change(), which expects the caller
 * to lock the inode's i_mutex.
 */
static void ima_inode_post_setattr(struct mnt_idmap *idmap,
                                   struct dentry *dentry, int ia_valid)
{
        struct inode *inode = d_backing_inode(dentry);
        struct ima_iint_cache *iint;
        int action;

        if (!(ima_policy_flag & IMA_APPRAISE) || !S_ISREG(inode->i_mode)
            || !(inode->i_opflags & IOP_XATTR))
                return;

        action = ima_must_appraise(idmap, inode, MAY_ACCESS, POST_SETATTR);
        iint = ima_iint_find(inode);
        if (iint) {
                set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
                if (!action)
                        clear_bit(IMA_UPDATE_XATTR, &iint->atomic_flags);
        }
}

/*
 * ima_protect_xattr - protect 'security.ima'
 *
 * Ensure that not just anyone can modify or remove 'security.ima'.
 */
static int ima_protect_xattr(struct dentry *dentry, const char *xattr_name,
                             const void *xattr_value, size_t xattr_value_len)
{
        if (strcmp(xattr_name, XATTR_NAME_IMA) == 0) {
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                return 1;
        }
        return 0;
}

static void ima_reset_appraise_flags(struct inode *inode, int digsig)
{
        struct ima_iint_cache *iint;

        if (!(ima_policy_flag & IMA_APPRAISE) || !S_ISREG(inode->i_mode))
                return;

        iint = ima_iint_find(inode);
        if (!iint)
                return;
        iint->measured_pcrs = 0;
        set_bit(IMA_CHANGE_XATTR, &iint->atomic_flags);
        if (digsig)
                set_bit(IMA_DIGSIG, &iint->atomic_flags);
        else
                clear_bit(IMA_DIGSIG, &iint->atomic_flags);
}

/**
 * validate_hash_algo() - Block setxattr with unsupported hash algorithms
 * @dentry: object of the setxattr()
 * @xattr_value: userland supplied xattr value
 * @xattr_value_len: length of xattr_value
 *
 * The xattr value is mapped to its hash algorithm, and this algorithm
 * must be built in the kernel for the setxattr to be allowed.
 *
 * Emit an audit message when the algorithm is invalid.
 *
 * Return: 0 on success, else an error.
 */
static int validate_hash_algo(struct dentry *dentry,
                              const struct evm_ima_xattr_data *xattr_value,
                              size_t xattr_value_len)
{
        char *path = NULL, *pathbuf = NULL;
        enum hash_algo xattr_hash_algo;
        const char *errmsg = "unavailable-hash-algorithm";
        unsigned int allowed_hashes;

        xattr_hash_algo = ima_get_hash_algo(xattr_value, xattr_value_len);

        allowed_hashes = atomic_read(&ima_setxattr_allowed_hash_algorithms);

        if (allowed_hashes) {
                /* success if the algorithm is allowed in the ima policy */
                if (allowed_hashes & (1U << xattr_hash_algo))
                        return 0;

                /*
                 * We use a different audit message when the hash algorithm
                 * is denied by a policy rule, instead of not being built
                 * in the kernel image
                 */
                errmsg = "denied-hash-algorithm";
        } else {
                if (likely(xattr_hash_algo == ima_hash_algo))
                        return 0;

                /* allow any xattr using an algorithm built in the kernel */
                if (crypto_has_alg(hash_algo_name[xattr_hash_algo], 0, 0))
                        return 0;
        }

        pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
        if (!pathbuf)
                return -EACCES;

        path = dentry_path(dentry, pathbuf, PATH_MAX);

        integrity_audit_msg(AUDIT_INTEGRITY_DATA, d_inode(dentry), path,
                            "set_data", errmsg, -EACCES, 0);

        kfree(pathbuf);

        return -EACCES;
}

static int ima_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
                              const char *xattr_name, const void *xattr_value,
                              size_t xattr_value_len, int flags)
{
        const struct evm_ima_xattr_data *xvalue = xattr_value;
        int digsig = 0;
        int result;
        int err;

        result = ima_protect_xattr(dentry, xattr_name, xattr_value,
                                   xattr_value_len);
        if (result == 1) {
                if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
                        return -EINVAL;

                err = validate_hash_algo(dentry, xvalue, xattr_value_len);
                if (err)
                        return err;

                digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG);
        } else if (!strcmp(xattr_name, XATTR_NAME_EVM) && xattr_value_len > 0) {
                digsig = (xvalue->type == EVM_XATTR_PORTABLE_DIGSIG);
        }
        if (result == 1 || evm_revalidate_status(xattr_name)) {
                ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
                if (result == 1)
                        result = 0;
        }
        return result;
}

static int ima_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                             const char *acl_name, struct posix_acl *kacl)
{
        if (evm_revalidate_status(acl_name))
                ima_reset_appraise_flags(d_backing_inode(dentry), 0);

        return 0;
}

static int ima_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
                                 const char *xattr_name)
{
        int result;

        result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
        if (result == 1 || evm_revalidate_status(xattr_name)) {
                ima_reset_appraise_flags(d_backing_inode(dentry), 0);
                if (result == 1)
                        result = 0;
        }
        return result;
}

static int ima_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                                const char *acl_name)
{
        return ima_inode_set_acl(idmap, dentry, acl_name, NULL);
}

static struct security_hook_list ima_appraise_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(inode_post_setattr, ima_inode_post_setattr),
        LSM_HOOK_INIT(inode_setxattr, ima_inode_setxattr),
        LSM_HOOK_INIT(inode_set_acl, ima_inode_set_acl),
        LSM_HOOK_INIT(inode_removexattr, ima_inode_removexattr),
        LSM_HOOK_INIT(inode_remove_acl, ima_inode_remove_acl),
};

void __init init_ima_appraise_lsm(const struct lsm_id *lsmid)
{
        security_add_hooks(ima_appraise_hooks, ARRAY_SIZE(ima_appraise_hooks),
                           lsmid);
}

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 


    1 


    1 

















    1 







    1 
    1 













    1 



    1 











    1 
    1 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
/*
 * usbmidi.c - ALSA USB MIDI driver
 *
 * Copyright (c) 2002-2009 Clemens Ladisch
 * All rights reserved.
 *
 * Based on the OSS usb-midi driver by NAGANO Daisuke,
 *          NetBSD's umidi driver by Takuya SHIOZAKI,
 *          the "USB Device Class Definition for MIDI Devices" by Roland
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer,
 *    without modification.
 * 2. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * Alternatively, this software may be distributed and/or modified under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/usb.h>
#include <linux/wait.h>
#include <linux/usb/audio.h>
#include <linux/usb/midi.h>
#include <linux/module.h>

#include <sound/core.h>
#include <sound/control.h>
#include <sound/rawmidi.h>
#include <sound/asequencer.h>
#include "usbaudio.h"
#include "midi.h"
#include "power.h"
#include "helper.h"

/*
 * define this to log all USB packets
 */
/* #define DUMP_PACKETS */

/*
 * how long to wait after some USB errors, so that hub_wq can disconnect() us
 * without too many spurious errors
 */
#define ERROR_DELAY_JIFFIES (HZ / 10)

#define OUTPUT_URBS 7
#define INPUT_URBS 7


MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
MODULE_DESCRIPTION("USB Audio/MIDI helper module");
MODULE_LICENSE("Dual BSD/GPL");

struct snd_usb_midi_in_endpoint;
struct snd_usb_midi_out_endpoint;
struct snd_usb_midi_endpoint;

struct usb_protocol_ops {
        void (*input)(struct snd_usb_midi_in_endpoint*, uint8_t*, int);
        void (*output)(struct snd_usb_midi_out_endpoint *ep, struct urb *urb);
        void (*output_packet)(struct urb*, uint8_t, uint8_t, uint8_t, uint8_t);
        void (*init_out_endpoint)(struct snd_usb_midi_out_endpoint *);
        void (*finish_out_endpoint)(struct snd_usb_midi_out_endpoint *);
};

struct snd_usb_midi {
        struct usb_device *dev;
        struct snd_card *card;
        struct usb_interface *iface;
        const struct snd_usb_audio_quirk *quirk;
        struct snd_rawmidi *rmidi;
        const struct usb_protocol_ops *usb_protocol_ops;
        struct list_head list;
        struct timer_list error_timer;
        spinlock_t disc_lock;
        struct rw_semaphore disc_rwsem;
        struct mutex mutex;
        u32 usb_id;
        int next_midi_device;

        struct snd_usb_midi_endpoint {
                struct snd_usb_midi_out_endpoint *out;
                struct snd_usb_midi_in_endpoint *in;
        } endpoints[MIDI_MAX_ENDPOINTS];
        unsigned long input_triggered;
        unsigned int opened[2];
        unsigned char disconnected;
        unsigned char input_running;

        struct snd_kcontrol *roland_load_ctl;
};

struct snd_usb_midi_out_endpoint {
        struct snd_usb_midi *umidi;
        struct out_urb_context {
                struct urb *urb;
                struct snd_usb_midi_out_endpoint *ep;
        } urbs[OUTPUT_URBS];
        unsigned int active_urbs;
        unsigned int drain_urbs;
        int max_transfer;                /* size of urb buffer */
        struct work_struct work;
        unsigned int next_urb;
        spinlock_t buffer_lock;

        struct usbmidi_out_port {
                struct snd_usb_midi_out_endpoint *ep;
                struct snd_rawmidi_substream *substream;
                int active;
                uint8_t cable;                /* cable number << 4 */
                uint8_t state;
#define STATE_UNKNOWN        0
#define STATE_1PARAM        1
#define STATE_2PARAM_1        2
#define STATE_2PARAM_2        3
#define STATE_SYSEX_0        4
#define STATE_SYSEX_1        5
#define STATE_SYSEX_2        6
                uint8_t data[2];
        } ports[0x10];
        int current_port;

        wait_queue_head_t drain_wait;
};

struct snd_usb_midi_in_endpoint {
        struct snd_usb_midi *umidi;
        struct urb *urbs[INPUT_URBS];
        struct usbmidi_in_port {
                struct snd_rawmidi_substream *substream;
                u8 running_status_length;
        } ports[0x10];
        u8 seen_f5;
        bool in_sysex;
        u8 last_cin;
        u8 error_resubmit;
        int current_port;
};

static void snd_usbmidi_do_output(struct snd_usb_midi_out_endpoint *ep);

static const uint8_t snd_usbmidi_cin_length[] = {
        0, 0, 2, 3, 3, 1, 2, 3, 3, 3, 3, 3, 2, 2, 3, 1
};

/*
 * Submits the URB, with error handling.
 */
static int snd_usbmidi_submit_urb(struct urb *urb, gfp_t flags)
{
        int err = usb_submit_urb(urb, flags);
        if (err < 0 && err != -ENODEV)
                dev_err(&urb->dev->dev, "usb_submit_urb: %d\n", err);
        return err;
}

/*
 * Error handling for URB completion functions.
 */
static int snd_usbmidi_urb_error(const struct urb *urb)
{
        switch (urb->status) {
        /* manually unlinked, or device gone */
        case -ENOENT:
        case -ECONNRESET:
        case -ESHUTDOWN:
        case -ENODEV:
                return -ENODEV;
        /* errors that might occur during unplugging */
        case -EPROTO:
        case -ETIME:
        case -EILSEQ:
                return -EIO;
        default:
                dev_err(&urb->dev->dev, "urb status %d\n", urb->status);
                return 0; /* continue */
        }
}

/*
 * Receives a chunk of MIDI data.
 */
static void snd_usbmidi_input_data(struct snd_usb_midi_in_endpoint *ep,
                                   int portidx, uint8_t *data, int length)
{
        struct usbmidi_in_port *port = &ep->ports[portidx];

        if (!port->substream) {
                dev_dbg(&ep->umidi->dev->dev, "unexpected port %d!\n", portidx);
                return;
        }
        if (!test_bit(port->substream->number, &ep->umidi->input_triggered))
                return;
        snd_rawmidi_receive(port->substream, data, length);
}

#ifdef DUMP_PACKETS
static void dump_urb(const char *type, const u8 *data, int length)
{
        snd_printk(KERN_DEBUG "%s packet: [", type);
        for (; length > 0; ++data, --length)
                printk(KERN_CONT " %02x", *data);
        printk(KERN_CONT " ]\n");
}
#else
#define dump_urb(type, data, length) /* nothing */
#endif

/*
 * Processes the data read from the device.
 */
static void snd_usbmidi_in_urb_complete(struct urb *urb)
{
        struct snd_usb_midi_in_endpoint *ep = urb->context;

        if (urb->status == 0) {
                dump_urb("received", urb->transfer_buffer, urb->actual_length);
                ep->umidi->usb_protocol_ops->input(ep, urb->transfer_buffer,
                                                   urb->actual_length);
        } else {
                int err = snd_usbmidi_urb_error(urb);
                if (err < 0) {
                        if (err != -ENODEV) {
                                ep->error_resubmit = 1;
                                mod_timer(&ep->umidi->error_timer,
                                          jiffies + ERROR_DELAY_JIFFIES);
                        }
                        return;
                }
        }

        urb->dev = ep->umidi->dev;
        snd_usbmidi_submit_urb(urb, GFP_ATOMIC);
}

static void snd_usbmidi_out_urb_complete(struct urb *urb)
{
        struct out_urb_context *context = urb->context;
        struct snd_usb_midi_out_endpoint *ep = context->ep;
        unsigned int urb_index;
        unsigned long flags;

        spin_lock_irqsave(&ep->buffer_lock, flags);
        urb_index = context - ep->urbs;
        ep->active_urbs &= ~(1 << urb_index);
        if (unlikely(ep->drain_urbs)) {
                ep->drain_urbs &= ~(1 << urb_index);
                wake_up(&ep->drain_wait);
        }
        spin_unlock_irqrestore(&ep->buffer_lock, flags);
        if (urb->status < 0) {
                int err = snd_usbmidi_urb_error(urb);
                if (err < 0) {
                        if (err != -ENODEV)
                                mod_timer(&ep->umidi->error_timer,
                                          jiffies + ERROR_DELAY_JIFFIES);
                        return;
                }
        }
        snd_usbmidi_do_output(ep);
}

/*
 * This is called when some data should be transferred to the device
 * (from one or more substreams).
 */
static void snd_usbmidi_do_output(struct snd_usb_midi_out_endpoint *ep)
{
        unsigned int urb_index;
        struct urb *urb;
        unsigned long flags;

        spin_lock_irqsave(&ep->buffer_lock, flags);
        if (ep->umidi->disconnected) {
                spin_unlock_irqrestore(&ep->buffer_lock, flags);
                return;
        }

        urb_index = ep->next_urb;
        for (;;) {
                if (!(ep->active_urbs & (1 << urb_index))) {
                        urb = ep->urbs[urb_index].urb;
                        urb->transfer_buffer_length = 0;
                        ep->umidi->usb_protocol_ops->output(ep, urb);
                        if (urb->transfer_buffer_length == 0)
                                break;

                        dump_urb("sending", urb->transfer_buffer,
                                 urb->transfer_buffer_length);
                        urb->dev = ep->umidi->dev;
                        if (snd_usbmidi_submit_urb(urb, GFP_ATOMIC) < 0)
                                break;
                        ep->active_urbs |= 1 << urb_index;
                }
                if (++urb_index >= OUTPUT_URBS)
                        urb_index = 0;
                if (urb_index == ep->next_urb)
                        break;
        }
        ep->next_urb = urb_index;
        spin_unlock_irqrestore(&ep->buffer_lock, flags);
}

static void snd_usbmidi_out_work(struct work_struct *work)
{
        struct snd_usb_midi_out_endpoint *ep =
                container_of(work, struct snd_usb_midi_out_endpoint, work);

        snd_usbmidi_do_output(ep);
}

/* called after transfers had been interrupted due to some USB error */
static void snd_usbmidi_error_timer(struct timer_list *t)
{
        struct snd_usb_midi *umidi = from_timer(umidi, t, error_timer);
        unsigned int i, j;

        spin_lock(&umidi->disc_lock);
        if (umidi->disconnected) {
                spin_unlock(&umidi->disc_lock);
                return;
        }
        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                struct snd_usb_midi_in_endpoint *in = umidi->endpoints[i].in;
                if (in && in->error_resubmit) {
                        in->error_resubmit = 0;
                        for (j = 0; j < INPUT_URBS; ++j) {
                                if (atomic_read(&in->urbs[j]->use_count))
                                        continue;
                                in->urbs[j]->dev = umidi->dev;
                                snd_usbmidi_submit_urb(in->urbs[j], GFP_ATOMIC);
                        }
                }
                if (umidi->endpoints[i].out)
                        snd_usbmidi_do_output(umidi->endpoints[i].out);
        }
        spin_unlock(&umidi->disc_lock);
}

/* helper function to send static data that may not DMA-able */
static int send_bulk_static_data(struct snd_usb_midi_out_endpoint *ep,
                                 const void *data, int len)
{
        int err = 0;
        void *buf = kmemdup(data, len, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
        dump_urb("sending", buf, len);
        if (ep->urbs[0].urb)
                err = usb_bulk_msg(ep->umidi->dev, ep->urbs[0].urb->pipe,
                                   buf, len, NULL, 250);
        kfree(buf);
        return err;
}

/*
 * Standard USB MIDI protocol: see the spec.
 * Midiman protocol: like the standard protocol, but the control byte is the
 * fourth byte in each packet, and uses length instead of CIN.
 */

static void snd_usbmidi_standard_input(struct snd_usb_midi_in_endpoint *ep,
                                       uint8_t *buffer, int buffer_length)
{
        int i;

        for (i = 0; i + 3 < buffer_length; i += 4)
                if (buffer[i] != 0) {
                        int cable = buffer[i] >> 4;
                        int length = snd_usbmidi_cin_length[buffer[i] & 0x0f];
                        snd_usbmidi_input_data(ep, cable, &buffer[i + 1],
                                               length);
                }
}

static void snd_usbmidi_midiman_input(struct snd_usb_midi_in_endpoint *ep,
                                      uint8_t *buffer, int buffer_length)
{
        int i;

        for (i = 0; i + 3 < buffer_length; i += 4)
                if (buffer[i + 3] != 0) {
                        int port = buffer[i + 3] >> 4;
                        int length = buffer[i + 3] & 3;
                        snd_usbmidi_input_data(ep, port, &buffer[i], length);
                }
}

/*
 * Buggy M-Audio device: running status on input results in a packet that has
 * the data bytes but not the status byte and that is marked with CIN 4.
 */
static void snd_usbmidi_maudio_broken_running_status_input(
                                        struct snd_usb_midi_in_endpoint *ep,
                                        uint8_t *buffer, int buffer_length)
{
        int i;

        for (i = 0; i + 3 < buffer_length; i += 4)
                if (buffer[i] != 0) {
                        int cable = buffer[i] >> 4;
                        u8 cin = buffer[i] & 0x0f;
                        struct usbmidi_in_port *port = &ep->ports[cable];
                        int length;

                        length = snd_usbmidi_cin_length[cin];
                        if (cin == 0xf && buffer[i + 1] >= 0xf8)
                                ; /* realtime msg: no running status change */
                        else if (cin >= 0x8 && cin <= 0xe)
                                /* channel msg */
                                port->running_status_length = length - 1;
                        else if (cin == 0x4 &&
                                 port->running_status_length != 0 &&
                                 buffer[i + 1] < 0x80)
                                /* CIN 4 that is not a SysEx */
                                length = port->running_status_length;
                        else
                                /*
                                 * All other msgs cannot begin running status.
                                 * (A channel msg sent as two or three CIN 0xF
                                 * packets could in theory, but this device
                                 * doesn't use this format.)
                                 */
                                port->running_status_length = 0;
                        snd_usbmidi_input_data(ep, cable, &buffer[i + 1],
                                               length);
                }
}

/*
 * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4
 * but the previously seen CIN, but still with three data bytes.
 */
static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep,
                                     uint8_t *buffer, int buffer_length)
{
        unsigned int i, cin, length;

        for (i = 0; i + 3 < buffer_length; i += 4) {
                if (buffer[i] == 0 && i > 0)
                        break;
                cin = buffer[i] & 0x0f;
                if (ep->in_sysex &&
                    cin == ep->last_cin &&
                    (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0)
                        cin = 0x4;
#if 0
                if (buffer[i + 1] == 0x90) {
                        /*
                         * Either a corrupted running status or a real note-on
                         * message; impossible to detect reliably.
                         */
                }
#endif
                length = snd_usbmidi_cin_length[cin];
                snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length);
                ep->in_sysex = cin == 0x4;
                if (!ep->in_sysex)
                        ep->last_cin = cin;
        }
}

/*
 * CME protocol: like the standard protocol, but SysEx commands are sent as a
 * single USB packet preceded by a 0x0F byte.
 */
static void snd_usbmidi_cme_input(struct snd_usb_midi_in_endpoint *ep,
                                  uint8_t *buffer, int buffer_length)
{
        if (buffer_length < 2 || (buffer[0] & 0x0f) != 0x0f)
                snd_usbmidi_standard_input(ep, buffer, buffer_length);
        else
                snd_usbmidi_input_data(ep, buffer[0] >> 4,
                                       &buffer[1], buffer_length - 1);
}

/*
 * Adds one USB MIDI packet to the output buffer.
 */
static void snd_usbmidi_output_standard_packet(struct urb *urb, uint8_t p0,
                                               uint8_t p1, uint8_t p2,
                                               uint8_t p3)
{

        uint8_t *buf =
                (uint8_t *)urb->transfer_buffer + urb->transfer_buffer_length;
        buf[0] = p0;
        buf[1] = p1;
        buf[2] = p2;
        buf[3] = p3;
        urb->transfer_buffer_length += 4;
}

/*
 * Adds one Midiman packet to the output buffer.
 */
static void snd_usbmidi_output_midiman_packet(struct urb *urb, uint8_t p0,
                                              uint8_t p1, uint8_t p2,
                                              uint8_t p3)
{

        uint8_t *buf =
                (uint8_t *)urb->transfer_buffer + urb->transfer_buffer_length;
        buf[0] = p1;
        buf[1] = p2;
        buf[2] = p3;
        buf[3] = (p0 & 0xf0) | snd_usbmidi_cin_length[p0 & 0x0f];
        urb->transfer_buffer_length += 4;
}

/*
 * Converts MIDI commands to USB MIDI packets.
 */
static void snd_usbmidi_transmit_byte(struct usbmidi_out_port *port,
                                      uint8_t b, struct urb *urb)
{
        uint8_t p0 = port->cable;
        void (*output_packet)(struct urb*, uint8_t, uint8_t, uint8_t, uint8_t) =
                port->ep->umidi->usb_protocol_ops->output_packet;

        if (b >= 0xf8) {
                output_packet(urb, p0 | 0x0f, b, 0, 0);
        } else if (b >= 0xf0) {
                switch (b) {
                case 0xf0:
                        port->data[0] = b;
                        port->state = STATE_SYSEX_1;
                        break;
                case 0xf1:
                case 0xf3:
                        port->data[0] = b;
                        port->state = STATE_1PARAM;
                        break;
                case 0xf2:
                        port->data[0] = b;
                        port->state = STATE_2PARAM_1;
                        break;
                case 0xf4:
                case 0xf5:
                        port->state = STATE_UNKNOWN;
                        break;
                case 0xf6:
                        output_packet(urb, p0 | 0x05, 0xf6, 0, 0);
                        port->state = STATE_UNKNOWN;
                        break;
                case 0xf7:
                        switch (port->state) {
                        case STATE_SYSEX_0:
                                output_packet(urb, p0 | 0x05, 0xf7, 0, 0);
                                break;
                        case STATE_SYSEX_1:
                                output_packet(urb, p0 | 0x06, port->data[0],
                                              0xf7, 0);
                                break;
                        case STATE_SYSEX_2:
                                output_packet(urb, p0 | 0x07, port->data[0],
                                              port->data[1], 0xf7);
                                break;
                        }
                        port->state = STATE_UNKNOWN;
                        break;
                }
        } else if (b >= 0x80) {
                port->data[0] = b;
                if (b >= 0xc0 && b <= 0xdf)
                        port->state = STATE_1PARAM;
                else
                        port->state = STATE_2PARAM_1;
        } else { /* b < 0x80 */
                switch (port->state) {
                case STATE_1PARAM:
                        if (port->data[0] < 0xf0) {
                                p0 |= port->data[0] >> 4;
                        } else {
                                p0 |= 0x02;
                                port->state = STATE_UNKNOWN;
                        }
                        output_packet(urb, p0, port->data[0], b, 0);
                        break;
                case STATE_2PARAM_1:
                        port->data[1] = b;
                        port->state = STATE_2PARAM_2;
                        break;
                case STATE_2PARAM_2:
                        if (port->data[0] < 0xf0) {
                                p0 |= port->data[0] >> 4;
                                port->state = STATE_2PARAM_1;
                        } else {
                                p0 |= 0x03;
                                port->state = STATE_UNKNOWN;
                        }
                        output_packet(urb, p0, port->data[0], port->data[1], b);
                        break;
                case STATE_SYSEX_0:
                        port->data[0] = b;
                        port->state = STATE_SYSEX_1;
                        break;
                case STATE_SYSEX_1:
                        port->data[1] = b;
                        port->state = STATE_SYSEX_2;
                        break;
                case STATE_SYSEX_2:
                        output_packet(urb, p0 | 0x04, port->data[0],
                                      port->data[1], b);
                        port->state = STATE_SYSEX_0;
                        break;
                }
        }
}

static void snd_usbmidi_standard_output(struct snd_usb_midi_out_endpoint *ep,
                                        struct urb *urb)
{
        int p;

        /* FIXME: lower-numbered ports can starve higher-numbered ports */
        for (p = 0; p < 0x10; ++p) {
                struct usbmidi_out_port *port = &ep->ports[p];
                if (!port->active)
                        continue;
                while (urb->transfer_buffer_length + 3 < ep->max_transfer) {
                        uint8_t b;
                        if (snd_rawmidi_transmit(port->substream, &b, 1) != 1) {
                                port->active = 0;
                                break;
                        }
                        snd_usbmidi_transmit_byte(port, b, urb);
                }
        }
}

static const struct usb_protocol_ops snd_usbmidi_standard_ops = {
        .input = snd_usbmidi_standard_input,
        .output = snd_usbmidi_standard_output,
        .output_packet = snd_usbmidi_output_standard_packet,
};

static const struct usb_protocol_ops snd_usbmidi_midiman_ops = {
        .input = snd_usbmidi_midiman_input,
        .output = snd_usbmidi_standard_output,
        .output_packet = snd_usbmidi_output_midiman_packet,
};

static const
struct usb_protocol_ops snd_usbmidi_maudio_broken_running_status_ops = {
        .input = snd_usbmidi_maudio_broken_running_status_input,
        .output = snd_usbmidi_standard_output,
        .output_packet = snd_usbmidi_output_standard_packet,
};

static const struct usb_protocol_ops snd_usbmidi_cme_ops = {
        .input = snd_usbmidi_cme_input,
        .output = snd_usbmidi_standard_output,
        .output_packet = snd_usbmidi_output_standard_packet,
};

static const struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = {
        .input = ch345_broken_sysex_input,
        .output = snd_usbmidi_standard_output,
        .output_packet = snd_usbmidi_output_standard_packet,
};

/*
 * AKAI MPD16 protocol:
 *
 * For control port (endpoint 1):
 * ==============================
 * One or more chunks consisting of first byte of (0x10 | msg_len) and then a
 * SysEx message (msg_len=9 bytes long).
 *
 * For data port (endpoint 2):
 * ===========================
 * One or more chunks consisting of first byte of (0x20 | msg_len) and then a
 * MIDI message (msg_len bytes long)
 *
 * Messages sent: Active Sense, Note On, Poly Pressure, Control Change.
 */
static void snd_usbmidi_akai_input(struct snd_usb_midi_in_endpoint *ep,
                                   uint8_t *buffer, int buffer_length)
{
        unsigned int pos = 0;
        unsigned int len = (unsigned int)buffer_length;
        while (pos < len) {
                unsigned int port = (buffer[pos] >> 4) - 1;
                unsigned int msg_len = buffer[pos] & 0x0f;
                pos++;
                if (pos + msg_len <= len && port < 2)
                        snd_usbmidi_input_data(ep, 0, &buffer[pos], msg_len);
                pos += msg_len;
        }
}

#define MAX_AKAI_SYSEX_LEN 9

static void snd_usbmidi_akai_output(struct snd_usb_midi_out_endpoint *ep,
                                    struct urb *urb)
{
        uint8_t *msg;
        int pos, end, count, buf_end;
        uint8_t tmp[MAX_AKAI_SYSEX_LEN];
        struct snd_rawmidi_substream *substream = ep->ports[0].substream;

        if (!ep->ports[0].active)
                return;

        msg = urb->transfer_buffer + urb->transfer_buffer_length;
        buf_end = ep->max_transfer - MAX_AKAI_SYSEX_LEN - 1;

        /* only try adding more data when there's space for at least 1 SysEx */
        while (urb->transfer_buffer_length < buf_end) {
                count = snd_rawmidi_transmit_peek(substream,
                                                  tmp, MAX_AKAI_SYSEX_LEN);
                if (!count) {
                        ep->ports[0].active = 0;
                        return;
                }
                /* try to skip non-SysEx data */
                for (pos = 0; pos < count && tmp[pos] != 0xF0; pos++)
                        ;

                if (pos > 0) {
                        snd_rawmidi_transmit_ack(substream, pos);
                        continue;
                }

                /* look for the start or end marker */
                for (end = 1; end < count && tmp[end] < 0xF0; end++)
                        ;

                /* next SysEx started before the end of current one */
                if (end < count && tmp[end] == 0xF0) {
                        /* it's incomplete - drop it */
                        snd_rawmidi_transmit_ack(substream, end);
                        continue;
                }
                /* SysEx complete */
                if (end < count && tmp[end] == 0xF7) {
                        /* queue it, ack it, and get the next one */
                        count = end + 1;
                        msg[0] = 0x10 | count;
                        memcpy(&msg[1], tmp, count);
                        snd_rawmidi_transmit_ack(substream, count);
                        urb->transfer_buffer_length += count + 1;
                        msg += count + 1;
                        continue;
                }
                /* less than 9 bytes and no end byte - wait for more */
                if (count < MAX_AKAI_SYSEX_LEN) {
                        ep->ports[0].active = 0;
                        return;
                }
                /* 9 bytes and no end marker in sight - malformed, skip it */
                snd_rawmidi_transmit_ack(substream, count);
        }
}

static const struct usb_protocol_ops snd_usbmidi_akai_ops = {
        .input = snd_usbmidi_akai_input,
        .output = snd_usbmidi_akai_output,
};

/*
 * Novation USB MIDI protocol: number of data bytes is in the first byte
 * (when receiving) (+1!) or in the second byte (when sending); data begins
 * at the third byte.
 */

static void snd_usbmidi_novation_input(struct snd_usb_midi_in_endpoint *ep,
                                       uint8_t *buffer, int buffer_length)
{
        if (buffer_length < 2 || !buffer[0] || buffer_length < buffer[0] + 1)
                return;
        snd_usbmidi_input_data(ep, 0, &buffer[2], buffer[0] - 1);
}

static void snd_usbmidi_novation_output(struct snd_usb_midi_out_endpoint *ep,
                                        struct urb *urb)
{
        uint8_t *transfer_buffer;
        int count;

        if (!ep->ports[0].active)
                return;
        transfer_buffer = urb->transfer_buffer;
        count = snd_rawmidi_transmit(ep->ports[0].substream,
                                     &transfer_buffer[2],
                                     ep->max_transfer - 2);
        if (count < 1) {
                ep->ports[0].active = 0;
                return;
        }
        transfer_buffer[0] = 0;
        transfer_buffer[1] = count;
        urb->transfer_buffer_length = 2 + count;
}

static const struct usb_protocol_ops snd_usbmidi_novation_ops = {
        .input = snd_usbmidi_novation_input,
        .output = snd_usbmidi_novation_output,
};

/*
 * "raw" protocol: just move raw MIDI bytes from/to the endpoint
 */

static void snd_usbmidi_raw_input(struct snd_usb_midi_in_endpoint *ep,
                                  uint8_t *buffer, int buffer_length)
{
        snd_usbmidi_input_data(ep, 0, buffer, buffer_length);
}

static void snd_usbmidi_raw_output(struct snd_usb_midi_out_endpoint *ep,
                                   struct urb *urb)
{
        int count;

        if (!ep->ports[0].active)
                return;
        count = snd_rawmidi_transmit(ep->ports[0].substream,
                                     urb->transfer_buffer,
                                     ep->max_transfer);
        if (count < 1) {
                ep->ports[0].active = 0;
                return;
        }
        urb->transfer_buffer_length = count;
}

static const struct usb_protocol_ops snd_usbmidi_raw_ops = {
        .input = snd_usbmidi_raw_input,
        .output = snd_usbmidi_raw_output,
};

/*
 * FTDI protocol: raw MIDI bytes, but input packets have two modem status bytes.
 */

static void snd_usbmidi_ftdi_input(struct snd_usb_midi_in_endpoint *ep,
                                   uint8_t *buffer, int buffer_length)
{
        if (buffer_length > 2)
                snd_usbmidi_input_data(ep, 0, buffer + 2, buffer_length - 2);
}

static const struct usb_protocol_ops snd_usbmidi_ftdi_ops = {
        .input = snd_usbmidi_ftdi_input,
        .output = snd_usbmidi_raw_output,
};

static void snd_usbmidi_us122l_input(struct snd_usb_midi_in_endpoint *ep,
                                     uint8_t *buffer, int buffer_length)
{
        if (buffer_length != 9)
                return;
        buffer_length = 8;
        while (buffer_length && buffer[buffer_length - 1] == 0xFD)
                buffer_length--;
        if (buffer_length)
                snd_usbmidi_input_data(ep, 0, buffer, buffer_length);
}

static void snd_usbmidi_us122l_output(struct snd_usb_midi_out_endpoint *ep,
                                      struct urb *urb)
{
        int count;

        if (!ep->ports[0].active)
                return;
        switch (snd_usb_get_speed(ep->umidi->dev)) {
        case USB_SPEED_HIGH:
        case USB_SPEED_SUPER:
        case USB_SPEED_SUPER_PLUS:
                count = 1;
                break;
        default:
                count = 2;
        }
        count = snd_rawmidi_transmit(ep->ports[0].substream,
                                     urb->transfer_buffer,
                                     count);
        if (count < 1) {
                ep->ports[0].active = 0;
                return;
        }

        memset(urb->transfer_buffer + count, 0xFD, ep->max_transfer - count);
        urb->transfer_buffer_length = ep->max_transfer;
}

static const struct usb_protocol_ops snd_usbmidi_122l_ops = {
        .input = snd_usbmidi_us122l_input,
        .output = snd_usbmidi_us122l_output,
};

/*
 * Emagic USB MIDI protocol: raw MIDI with "F5 xx" port switching.
 */

static void snd_usbmidi_emagic_init_out(struct snd_usb_midi_out_endpoint *ep)
{
        static const u8 init_data[] = {
                /* initialization magic: "get version" */
                0xf0,
                0x00, 0x20, 0x31,        /* Emagic */
                0x64,                        /* Unitor8 */
                0x0b,                        /* version number request */
                0x00,                        /* command version */
                0x00,                        /* EEPROM, box 0 */
                0xf7
        };
        send_bulk_static_data(ep, init_data, sizeof(init_data));
        /* while we're at it, pour on more magic */
        send_bulk_static_data(ep, init_data, sizeof(init_data));
}

static void snd_usbmidi_emagic_finish_out(struct snd_usb_midi_out_endpoint *ep)
{
        static const u8 finish_data[] = {
                /* switch to patch mode with last preset */
                0xf0,
                0x00, 0x20, 0x31,        /* Emagic */
                0x64,                        /* Unitor8 */
                0x10,                        /* patch switch command */
                0x00,                        /* command version */
                0x7f,                        /* to all boxes */
                0x40,                        /* last preset in EEPROM */
                0xf7
        };
        send_bulk_static_data(ep, finish_data, sizeof(finish_data));
}

static void snd_usbmidi_emagic_input(struct snd_usb_midi_in_endpoint *ep,
                                     uint8_t *buffer, int buffer_length)
{
        int i;

        /* FF indicates end of valid data */
        for (i = 0; i < buffer_length; ++i)
                if (buffer[i] == 0xff) {
                        buffer_length = i;
                        break;
                }

        /* handle F5 at end of last buffer */
        if (ep->seen_f5)
                goto switch_port;

        while (buffer_length > 0) {
                /* determine size of data until next F5 */
                for (i = 0; i < buffer_length; ++i)
                        if (buffer[i] == 0xf5)
                                break;
                snd_usbmidi_input_data(ep, ep->current_port, buffer, i);
                buffer += i;
                buffer_length -= i;

                if (buffer_length <= 0)
                        break;
                /* assert(buffer[0] == 0xf5); */
                ep->seen_f5 = 1;
                ++buffer;
                --buffer_length;

        switch_port:
                if (buffer_length <= 0)
                        break;
                if (buffer[0] < 0x80) {
                        ep->current_port = (buffer[0] - 1) & 15;
                        ++buffer;
                        --buffer_length;
                }
                ep->seen_f5 = 0;
        }
}

static void snd_usbmidi_emagic_output(struct snd_usb_midi_out_endpoint *ep,
                                      struct urb *urb)
{
        int port0 = ep->current_port;
        uint8_t *buf = urb->transfer_buffer;
        int buf_free = ep->max_transfer;
        int length, i;

        for (i = 0; i < 0x10; ++i) {
                /* round-robin, starting at the last current port */
                int portnum = (port0 + i) & 15;
                struct usbmidi_out_port *port = &ep->ports[portnum];

                if (!port->active)
                        continue;
                if (snd_rawmidi_transmit_peek(port->substream, buf, 1) != 1) {
                        port->active = 0;
                        continue;
                }

                if (portnum != ep->current_port) {
                        if (buf_free < 2)
                                break;
                        ep->current_port = portnum;
                        buf[0] = 0xf5;
                        buf[1] = (portnum + 1) & 15;
                        buf += 2;
                        buf_free -= 2;
                }

                if (buf_free < 1)
                        break;
                length = snd_rawmidi_transmit(port->substream, buf, buf_free);
                if (length > 0) {
                        buf += length;
                        buf_free -= length;
                        if (buf_free < 1)
                                break;
                }
        }
        if (buf_free < ep->max_transfer && buf_free > 0) {
                *buf = 0xff;
                --buf_free;
        }
        urb->transfer_buffer_length = ep->max_transfer - buf_free;
}

static const struct usb_protocol_ops snd_usbmidi_emagic_ops = {
        .input = snd_usbmidi_emagic_input,
        .output = snd_usbmidi_emagic_output,
        .init_out_endpoint = snd_usbmidi_emagic_init_out,
        .finish_out_endpoint = snd_usbmidi_emagic_finish_out,
};


static void update_roland_altsetting(struct snd_usb_midi *umidi)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;
        int is_light_load;

        intf = umidi->iface;
        is_light_load = intf->cur_altsetting != intf->altsetting;
        if (umidi->roland_load_ctl->private_value == is_light_load)
                return;
        hostif = &intf->altsetting[umidi->roland_load_ctl->private_value];
        intfd = get_iface_desc(hostif);
        snd_usbmidi_input_stop(&umidi->list);
        usb_set_interface(umidi->dev, intfd->bInterfaceNumber,
                          intfd->bAlternateSetting);
        snd_usbmidi_input_start(&umidi->list);
}

static int substream_open(struct snd_rawmidi_substream *substream, int dir,
                          int open)
{
        struct snd_usb_midi *umidi = substream->rmidi->private_data;
        struct snd_kcontrol *ctl;

        down_read(&umidi->disc_rwsem);
        if (umidi->disconnected) {
                up_read(&umidi->disc_rwsem);
                return open ? -ENODEV : 0;
        }

        mutex_lock(&umidi->mutex);
        if (open) {
                if (!umidi->opened[0] && !umidi->opened[1]) {
                        if (umidi->roland_load_ctl) {
                                ctl = umidi->roland_load_ctl;
                                ctl->vd[0].access |=
                                        SNDRV_CTL_ELEM_ACCESS_INACTIVE;
                                snd_ctl_notify(umidi->card,
                                       SNDRV_CTL_EVENT_MASK_INFO, &ctl->id);
                                update_roland_altsetting(umidi);
                        }
                }
                umidi->opened[dir]++;
                if (umidi->opened[1])
                        snd_usbmidi_input_start(&umidi->list);
        } else {
                umidi->opened[dir]--;
                if (!umidi->opened[1])
                        snd_usbmidi_input_stop(&umidi->list);
                if (!umidi->opened[0] && !umidi->opened[1]) {
                        if (umidi->roland_load_ctl) {
                                ctl = umidi->roland_load_ctl;
                                ctl->vd[0].access &=
                                        ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
                                snd_ctl_notify(umidi->card,
                                       SNDRV_CTL_EVENT_MASK_INFO, &ctl->id);
                        }
                }
        }
        mutex_unlock(&umidi->mutex);
        up_read(&umidi->disc_rwsem);
        return 0;
}

static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream)
{
        struct snd_usb_midi *umidi = substream->rmidi->private_data;
        struct usbmidi_out_port *port = NULL;
        int i, j;

        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i)
                if (umidi->endpoints[i].out)
                        for (j = 0; j < 0x10; ++j)
                                if (umidi->endpoints[i].out->ports[j].substream == substream) {
                                        port = &umidi->endpoints[i].out->ports[j];
                                        break;
                                }
        if (!port)
                return -ENXIO;

        substream->runtime->private_data = port;
        port->state = STATE_UNKNOWN;
        return substream_open(substream, 0, 1);
}

static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream)
{
        struct usbmidi_out_port *port = substream->runtime->private_data;

        cancel_work_sync(&port->ep->work);
        return substream_open(substream, 0, 0);
}

static void snd_usbmidi_output_trigger(struct snd_rawmidi_substream *substream,
                                       int up)
{
        struct usbmidi_out_port *port =
                (struct usbmidi_out_port *)substream->runtime->private_data;

        port->active = up;
        if (up) {
                if (port->ep->umidi->disconnected) {
                        /* gobble up remaining bytes to prevent wait in
                         * snd_rawmidi_drain_output */
                        snd_rawmidi_proceed(substream);
                        return;
                }
                queue_work(system_highpri_wq, &port->ep->work);
        }
}

static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream)
{
        struct usbmidi_out_port *port = substream->runtime->private_data;
        struct snd_usb_midi_out_endpoint *ep = port->ep;
        unsigned int drain_urbs;
        DEFINE_WAIT(wait);
        long timeout = msecs_to_jiffies(50);

        if (ep->umidi->disconnected)
                return;
        /*
         * The substream buffer is empty, but some data might still be in the
         * currently active URBs, so we have to wait for those to complete.
         */
        spin_lock_irq(&ep->buffer_lock);
        drain_urbs = ep->active_urbs;
        if (drain_urbs) {
                ep->drain_urbs |= drain_urbs;
                do {
                        prepare_to_wait(&ep->drain_wait, &wait,
                                        TASK_UNINTERRUPTIBLE);
                        spin_unlock_irq(&ep->buffer_lock);
                        timeout = schedule_timeout(timeout);
                        spin_lock_irq(&ep->buffer_lock);
                        drain_urbs &= ep->drain_urbs;
                } while (drain_urbs && timeout);
                finish_wait(&ep->drain_wait, &wait);
        }
        port->active = 0;
        spin_unlock_irq(&ep->buffer_lock);
}

static int snd_usbmidi_input_open(struct snd_rawmidi_substream *substream)
{
        return substream_open(substream, 1, 1);
}

static int snd_usbmidi_input_close(struct snd_rawmidi_substream *substream)
{
        return substream_open(substream, 1, 0);
}

static void snd_usbmidi_input_trigger(struct snd_rawmidi_substream *substream,
                                      int up)
{
        struct snd_usb_midi *umidi = substream->rmidi->private_data;

        if (up)
                set_bit(substream->number, &umidi->input_triggered);
        else
                clear_bit(substream->number, &umidi->input_triggered);
}

static const struct snd_rawmidi_ops snd_usbmidi_output_ops = {
        .open = snd_usbmidi_output_open,
        .close = snd_usbmidi_output_close,
        .trigger = snd_usbmidi_output_trigger,
        .drain = snd_usbmidi_output_drain,
};

static const struct snd_rawmidi_ops snd_usbmidi_input_ops = {
        .open = snd_usbmidi_input_open,
        .close = snd_usbmidi_input_close,
        .trigger = snd_usbmidi_input_trigger
};

static void free_urb_and_buffer(struct snd_usb_midi *umidi, struct urb *urb,
                                unsigned int buffer_length)
{
        usb_free_coherent(umidi->dev, buffer_length,
                          urb->transfer_buffer, urb->transfer_dma);
        usb_free_urb(urb);
}

/*
 * Frees an input endpoint.
 * May be called when ep hasn't been initialized completely.
 */
static void snd_usbmidi_in_endpoint_delete(struct snd_usb_midi_in_endpoint *ep)
{
        unsigned int i;

        for (i = 0; i < INPUT_URBS; ++i)
                if (ep->urbs[i])
                        free_urb_and_buffer(ep->umidi, ep->urbs[i],
                                            ep->urbs[i]->transfer_buffer_length);
        kfree(ep);
}

/*
 * Creates an input endpoint.
 */
static int snd_usbmidi_in_endpoint_create(struct snd_usb_midi *umidi,
                                          struct snd_usb_midi_endpoint_info *ep_info,
                                          struct snd_usb_midi_endpoint *rep)
{
        struct snd_usb_midi_in_endpoint *ep;
        void *buffer;
        unsigned int pipe;
        int length;
        unsigned int i;
        int err;

        rep->in = NULL;
        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
        if (!ep)
                return -ENOMEM;
        ep->umidi = umidi;

        for (i = 0; i < INPUT_URBS; ++i) {
                ep->urbs[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!ep->urbs[i]) {
                        err = -ENOMEM;
                        goto error;
                }
        }
        if (ep_info->in_interval)
                pipe = usb_rcvintpipe(umidi->dev, ep_info->in_ep);
        else
                pipe = usb_rcvbulkpipe(umidi->dev, ep_info->in_ep);
        length = usb_maxpacket(umidi->dev, pipe);
        for (i = 0; i < INPUT_URBS; ++i) {
                buffer = usb_alloc_coherent(umidi->dev, length, GFP_KERNEL,
                                            &ep->urbs[i]->transfer_dma);
                if (!buffer) {
                        err = -ENOMEM;
                        goto error;
                }
                if (ep_info->in_interval)
                        usb_fill_int_urb(ep->urbs[i], umidi->dev,
                                         pipe, buffer, length,
                                         snd_usbmidi_in_urb_complete,
                                         ep, ep_info->in_interval);
                else
                        usb_fill_bulk_urb(ep->urbs[i], umidi->dev,
                                          pipe, buffer, length,
                                          snd_usbmidi_in_urb_complete, ep);
                ep->urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                err = usb_urb_ep_type_check(ep->urbs[i]);
                if (err < 0) {
                        dev_err(&umidi->dev->dev, "invalid MIDI in EP %x\n",
                                ep_info->in_ep);
                        goto error;
                }
        }

        rep->in = ep;
        return 0;

 error:
        snd_usbmidi_in_endpoint_delete(ep);
        return err;
}

/*
 * Frees an output endpoint.
 * May be called when ep hasn't been initialized completely.
 */
static void snd_usbmidi_out_endpoint_clear(struct snd_usb_midi_out_endpoint *ep)
{
        unsigned int i;

        for (i = 0; i < OUTPUT_URBS; ++i)
                if (ep->urbs[i].urb) {
                        free_urb_and_buffer(ep->umidi, ep->urbs[i].urb,
                                            ep->max_transfer);
                        ep->urbs[i].urb = NULL;
                }
}

static void snd_usbmidi_out_endpoint_delete(struct snd_usb_midi_out_endpoint *ep)
{
        snd_usbmidi_out_endpoint_clear(ep);
        kfree(ep);
}

/*
 * Creates an output endpoint, and initializes output ports.
 */
static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi,
                                           struct snd_usb_midi_endpoint_info *ep_info,
                                           struct snd_usb_midi_endpoint *rep)
{
        struct snd_usb_midi_out_endpoint *ep;
        unsigned int i;
        unsigned int pipe;
        void *buffer;
        int err;

        rep->out = NULL;
        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
        if (!ep)
                return -ENOMEM;
        ep->umidi = umidi;

        for (i = 0; i < OUTPUT_URBS; ++i) {
                ep->urbs[i].urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!ep->urbs[i].urb) {
                        err = -ENOMEM;
                        goto error;
                }
                ep->urbs[i].ep = ep;
        }
        if (ep_info->out_interval)
                pipe = usb_sndintpipe(umidi->dev, ep_info->out_ep);
        else
                pipe = usb_sndbulkpipe(umidi->dev, ep_info->out_ep);
        switch (umidi->usb_id) {
        default:
                ep->max_transfer = usb_maxpacket(umidi->dev, pipe);
                break;
                /*
                 * Various chips declare a packet size larger than 4 bytes, but
                 * do not actually work with larger packets:
                 */
        case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */
        case USB_ID(0x0a92, 0x1020): /* ESI M4U */
        case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */
        case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */
        case USB_ID(0x15ca, 0x1806): /* Textech USB Midi Cable */
        case USB_ID(0x1a86, 0x752d): /* QinHeng CH345 "USB2.0-MIDI" */
        case USB_ID(0xfc08, 0x0101): /* Unknown vendor Cable */
                ep->max_transfer = 4;
                break;
                /*
                 * Some devices only work with 9 bytes packet size:
                 */
        case USB_ID(0x0644, 0x800e): /* Tascam US-122L */
        case USB_ID(0x0644, 0x800f): /* Tascam US-144 */
                ep->max_transfer = 9;
                break;
        }
        for (i = 0; i < OUTPUT_URBS; ++i) {
                buffer = usb_alloc_coherent(umidi->dev,
                                            ep->max_transfer, GFP_KERNEL,
                                            &ep->urbs[i].urb->transfer_dma);
                if (!buffer) {
                        err = -ENOMEM;
                        goto error;
                }
                if (ep_info->out_interval)
                        usb_fill_int_urb(ep->urbs[i].urb, umidi->dev,
                                         pipe, buffer, ep->max_transfer,
                                         snd_usbmidi_out_urb_complete,
                                         &ep->urbs[i], ep_info->out_interval);
                else
                        usb_fill_bulk_urb(ep->urbs[i].urb, umidi->dev,
                                          pipe, buffer, ep->max_transfer,
                                          snd_usbmidi_out_urb_complete,
                                          &ep->urbs[i]);
                err = usb_urb_ep_type_check(ep->urbs[i].urb);
                if (err < 0) {
                        dev_err(&umidi->dev->dev, "invalid MIDI out EP %x\n",
                                ep_info->out_ep);
                        goto error;
                }
                ep->urbs[i].urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
        }

        spin_lock_init(&ep->buffer_lock);
        INIT_WORK(&ep->work, snd_usbmidi_out_work);
        init_waitqueue_head(&ep->drain_wait);

        for (i = 0; i < 0x10; ++i)
                if (ep_info->out_cables & (1 << i)) {
                        ep->ports[i].ep = ep;
                        ep->ports[i].cable = i << 4;
                }

        if (umidi->usb_protocol_ops->init_out_endpoint)
                umidi->usb_protocol_ops->init_out_endpoint(ep);

        rep->out = ep;
        return 0;

 error:
        snd_usbmidi_out_endpoint_delete(ep);
        return err;
}

/*
 * Frees everything.
 */
static void snd_usbmidi_free(struct snd_usb_midi *umidi)
{
        int i;

        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i];
                if (ep->out)
                        snd_usbmidi_out_endpoint_delete(ep->out);
                if (ep->in)
                        snd_usbmidi_in_endpoint_delete(ep->in);
        }
        mutex_destroy(&umidi->mutex);
        kfree(umidi);
}

/*
 * Unlinks all URBs (must be done before the usb_device is deleted).
 */
void snd_usbmidi_disconnect(struct list_head *p)
{
        struct snd_usb_midi *umidi;
        unsigned int i, j;

        umidi = list_entry(p, struct snd_usb_midi, list);
        /*
         * an URB's completion handler may start the timer and
         * a timer may submit an URB. To reliably break the cycle
         * a flag under lock must be used
         */
        down_write(&umidi->disc_rwsem);
        spin_lock_irq(&umidi->disc_lock);
        umidi->disconnected = 1;
        spin_unlock_irq(&umidi->disc_lock);
        up_write(&umidi->disc_rwsem);

        del_timer_sync(&umidi->error_timer);

        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i];
                if (ep->out)
                        cancel_work_sync(&ep->out->work);
                if (ep->out) {
                        for (j = 0; j < OUTPUT_URBS; ++j)
                                usb_kill_urb(ep->out->urbs[j].urb);
                        if (umidi->usb_protocol_ops->finish_out_endpoint)
                                umidi->usb_protocol_ops->finish_out_endpoint(ep->out);
                        ep->out->active_urbs = 0;
                        if (ep->out->drain_urbs) {
                                ep->out->drain_urbs = 0;
                                wake_up(&ep->out->drain_wait);
                        }
                }
                if (ep->in)
                        for (j = 0; j < INPUT_URBS; ++j)
                                usb_kill_urb(ep->in->urbs[j]);
                /* free endpoints here; later call can result in Oops */
                if (ep->out)
                        snd_usbmidi_out_endpoint_clear(ep->out);
                if (ep->in) {
                        snd_usbmidi_in_endpoint_delete(ep->in);
                        ep->in = NULL;
                }
        }
}
EXPORT_SYMBOL(snd_usbmidi_disconnect);

static void snd_usbmidi_rawmidi_free(struct snd_rawmidi *rmidi)
{
        struct snd_usb_midi *umidi = rmidi->private_data;
        snd_usbmidi_free(umidi);
}

static struct snd_rawmidi_substream *snd_usbmidi_find_substream(struct snd_usb_midi *umidi,
                                                                int stream,
                                                                int number)
{
        struct snd_rawmidi_substream *substream;

        list_for_each_entry(substream, &umidi->rmidi->streams[stream].substreams,
                            list) {
                if (substream->number == number)
                        return substream;
        }
        return NULL;
}

/*
 * This list specifies names for ports that do not fit into the standard
 * "(product) MIDI (n)" schema because they aren't external MIDI ports,
 * such as internal control or synthesizer ports.
 */
static struct port_info {
        u32 id;
        short int port;
        short int voices;
        const char *name;
        unsigned int seq_flags;
} snd_usbmidi_port_info[] = {
#define PORT_INFO(vendor, product, num, name_, voices_, flags) \
        { .id = USB_ID(vendor, product), \
          .port = num, .voices = voices_, \
          .name = name_, .seq_flags = flags }
#define EXTERNAL_PORT(vendor, product, num, name) \
        PORT_INFO(vendor, product, num, name, 0, \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \
                  SNDRV_SEQ_PORT_TYPE_HARDWARE | \
                  SNDRV_SEQ_PORT_TYPE_PORT)
#define CONTROL_PORT(vendor, product, num, name) \
        PORT_INFO(vendor, product, num, name, 0, \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \
                  SNDRV_SEQ_PORT_TYPE_HARDWARE)
#define GM_SYNTH_PORT(vendor, product, num, name, voices) \
        PORT_INFO(vendor, product, num, name, voices, \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GM | \
                  SNDRV_SEQ_PORT_TYPE_HARDWARE | \
                  SNDRV_SEQ_PORT_TYPE_SYNTHESIZER)
#define ROLAND_SYNTH_PORT(vendor, product, num, name, voices) \
        PORT_INFO(vendor, product, num, name, voices, \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GM | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GM2 | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GS | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_XG | \
                  SNDRV_SEQ_PORT_TYPE_HARDWARE | \
                  SNDRV_SEQ_PORT_TYPE_SYNTHESIZER)
#define SOUNDCANVAS_PORT(vendor, product, num, name, voices) \
        PORT_INFO(vendor, product, num, name, voices, \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GM | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GM2 | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_GS | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_XG | \
                  SNDRV_SEQ_PORT_TYPE_MIDI_MT32 | \
                  SNDRV_SEQ_PORT_TYPE_HARDWARE | \
                  SNDRV_SEQ_PORT_TYPE_SYNTHESIZER)
        /* Yamaha MOTIF XF */
        GM_SYNTH_PORT(0x0499, 0x105c, 0, "%s Tone Generator", 128),
        CONTROL_PORT(0x0499, 0x105c, 1, "%s Remote Control"),
        EXTERNAL_PORT(0x0499, 0x105c, 2, "%s Thru"),
        CONTROL_PORT(0x0499, 0x105c, 3, "%s Editor"),
        /* Roland UA-100 */
        CONTROL_PORT(0x0582, 0x0000, 2, "%s Control"),
        /* Roland SC-8850 */
        SOUNDCANVAS_PORT(0x0582, 0x0003, 0, "%s Part A", 128),
        SOUNDCANVAS_PORT(0x0582, 0x0003, 1, "%s Part B", 128),
        SOUNDCANVAS_PORT(0x0582, 0x0003, 2, "%s Part C", 128),
        SOUNDCANVAS_PORT(0x0582, 0x0003, 3, "%s Part D", 128),
        EXTERNAL_PORT(0x0582, 0x0003, 4, "%s MIDI 1"),
        EXTERNAL_PORT(0x0582, 0x0003, 5, "%s MIDI 2"),
        /* Roland U-8 */
        EXTERNAL_PORT(0x0582, 0x0004, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x0004, 1, "%s Control"),
        /* Roland SC-8820 */
        SOUNDCANVAS_PORT(0x0582, 0x0007, 0, "%s Part A", 64),
        SOUNDCANVAS_PORT(0x0582, 0x0007, 1, "%s Part B", 64),
        EXTERNAL_PORT(0x0582, 0x0007, 2, "%s MIDI"),
        /* Roland SK-500 */
        SOUNDCANVAS_PORT(0x0582, 0x000b, 0, "%s Part A", 64),
        SOUNDCANVAS_PORT(0x0582, 0x000b, 1, "%s Part B", 64),
        EXTERNAL_PORT(0x0582, 0x000b, 2, "%s MIDI"),
        /* Roland SC-D70 */
        SOUNDCANVAS_PORT(0x0582, 0x000c, 0, "%s Part A", 64),
        SOUNDCANVAS_PORT(0x0582, 0x000c, 1, "%s Part B", 64),
        EXTERNAL_PORT(0x0582, 0x000c, 2, "%s MIDI"),
        /* Edirol UM-880 */
        CONTROL_PORT(0x0582, 0x0014, 8, "%s Control"),
        /* Edirol SD-90 */
        ROLAND_SYNTH_PORT(0x0582, 0x0016, 0, "%s Part A", 128),
        ROLAND_SYNTH_PORT(0x0582, 0x0016, 1, "%s Part B", 128),
        EXTERNAL_PORT(0x0582, 0x0016, 2, "%s MIDI 1"),
        EXTERNAL_PORT(0x0582, 0x0016, 3, "%s MIDI 2"),
        /* Edirol UM-550 */
        CONTROL_PORT(0x0582, 0x0023, 5, "%s Control"),
        /* Edirol SD-20 */
        ROLAND_SYNTH_PORT(0x0582, 0x0027, 0, "%s Part A", 64),
        ROLAND_SYNTH_PORT(0x0582, 0x0027, 1, "%s Part B", 64),
        EXTERNAL_PORT(0x0582, 0x0027, 2, "%s MIDI"),
        /* Edirol SD-80 */
        ROLAND_SYNTH_PORT(0x0582, 0x0029, 0, "%s Part A", 128),
        ROLAND_SYNTH_PORT(0x0582, 0x0029, 1, "%s Part B", 128),
        EXTERNAL_PORT(0x0582, 0x0029, 2, "%s MIDI 1"),
        EXTERNAL_PORT(0x0582, 0x0029, 3, "%s MIDI 2"),
        /* Edirol UA-700 */
        EXTERNAL_PORT(0x0582, 0x002b, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x002b, 1, "%s Control"),
        /* Roland VariOS */
        EXTERNAL_PORT(0x0582, 0x002f, 0, "%s MIDI"),
        EXTERNAL_PORT(0x0582, 0x002f, 1, "%s External MIDI"),
        EXTERNAL_PORT(0x0582, 0x002f, 2, "%s Sync"),
        /* Edirol PCR */
        EXTERNAL_PORT(0x0582, 0x0033, 0, "%s MIDI"),
        EXTERNAL_PORT(0x0582, 0x0033, 1, "%s 1"),
        EXTERNAL_PORT(0x0582, 0x0033, 2, "%s 2"),
        /* BOSS GS-10 */
        EXTERNAL_PORT(0x0582, 0x003b, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x003b, 1, "%s Control"),
        /* Edirol UA-1000 */
        EXTERNAL_PORT(0x0582, 0x0044, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x0044, 1, "%s Control"),
        /* Edirol UR-80 */
        EXTERNAL_PORT(0x0582, 0x0048, 0, "%s MIDI"),
        EXTERNAL_PORT(0x0582, 0x0048, 1, "%s 1"),
        EXTERNAL_PORT(0x0582, 0x0048, 2, "%s 2"),
        /* Edirol PCR-A */
        EXTERNAL_PORT(0x0582, 0x004d, 0, "%s MIDI"),
        EXTERNAL_PORT(0x0582, 0x004d, 1, "%s 1"),
        EXTERNAL_PORT(0x0582, 0x004d, 2, "%s 2"),
        /* BOSS GT-PRO */
        CONTROL_PORT(0x0582, 0x0089, 0, "%s Control"),
        /* Edirol UM-3EX */
        CONTROL_PORT(0x0582, 0x009a, 3, "%s Control"),
        /* Roland VG-99 */
        CONTROL_PORT(0x0582, 0x00b2, 0, "%s Control"),
        EXTERNAL_PORT(0x0582, 0x00b2, 1, "%s MIDI"),
        /* Cakewalk Sonar V-Studio 100 */
        EXTERNAL_PORT(0x0582, 0x00eb, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x00eb, 1, "%s Control"),
        /* Roland VB-99 */
        CONTROL_PORT(0x0582, 0x0102, 0, "%s Control"),
        EXTERNAL_PORT(0x0582, 0x0102, 1, "%s MIDI"),
        /* Roland A-PRO */
        EXTERNAL_PORT(0x0582, 0x010f, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x010f, 1, "%s 1"),
        CONTROL_PORT(0x0582, 0x010f, 2, "%s 2"),
        /* Roland SD-50 */
        ROLAND_SYNTH_PORT(0x0582, 0x0114, 0, "%s Synth", 128),
        EXTERNAL_PORT(0x0582, 0x0114, 1, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x0114, 2, "%s Control"),
        /* Roland OCTA-CAPTURE */
        EXTERNAL_PORT(0x0582, 0x0120, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x0120, 1, "%s Control"),
        EXTERNAL_PORT(0x0582, 0x0121, 0, "%s MIDI"),
        CONTROL_PORT(0x0582, 0x0121, 1, "%s Control"),
        /* Roland SPD-SX */
        CONTROL_PORT(0x0582, 0x0145, 0, "%s Control"),
        EXTERNAL_PORT(0x0582, 0x0145, 1, "%s MIDI"),
        /* Roland A-Series */
        CONTROL_PORT(0x0582, 0x0156, 0, "%s Keyboard"),
        EXTERNAL_PORT(0x0582, 0x0156, 1, "%s MIDI"),
        /* Roland INTEGRA-7 */
        ROLAND_SYNTH_PORT(0x0582, 0x015b, 0, "%s Synth", 128),
        CONTROL_PORT(0x0582, 0x015b, 1, "%s Control"),
        /* M-Audio MidiSport 8x8 */
        CONTROL_PORT(0x0763, 0x1031, 8, "%s Control"),
        CONTROL_PORT(0x0763, 0x1033, 8, "%s Control"),
        /* MOTU Fastlane */
        EXTERNAL_PORT(0x07fd, 0x0001, 0, "%s MIDI A"),
        EXTERNAL_PORT(0x07fd, 0x0001, 1, "%s MIDI B"),
        /* Emagic Unitor8/AMT8/MT4 */
        EXTERNAL_PORT(0x086a, 0x0001, 8, "%s Broadcast"),
        EXTERNAL_PORT(0x086a, 0x0002, 8, "%s Broadcast"),
        EXTERNAL_PORT(0x086a, 0x0003, 4, "%s Broadcast"),
        /* Akai MPD16 */
        CONTROL_PORT(0x09e8, 0x0062, 0, "%s Control"),
        PORT_INFO(0x09e8, 0x0062, 1, "%s MIDI", 0,
                SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC |
                SNDRV_SEQ_PORT_TYPE_HARDWARE),
        /* Access Music Virus TI */
        EXTERNAL_PORT(0x133e, 0x0815, 0, "%s MIDI"),
        PORT_INFO(0x133e, 0x0815, 1, "%s Synth", 0,
                SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC |
                SNDRV_SEQ_PORT_TYPE_HARDWARE |
                SNDRV_SEQ_PORT_TYPE_SYNTHESIZER),
};

static struct port_info *find_port_info(struct snd_usb_midi *umidi, int number)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(snd_usbmidi_port_info); ++i) {
                if (snd_usbmidi_port_info[i].id == umidi->usb_id &&
                    snd_usbmidi_port_info[i].port == number)
                        return &snd_usbmidi_port_info[i];
        }
        return NULL;
}

static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number,
                                      struct snd_seq_port_info *seq_port_info)
{
        struct snd_usb_midi *umidi = rmidi->private_data;
        struct port_info *port_info;

        /* TODO: read port flags from descriptors */
        port_info = find_port_info(umidi, number);
        if (port_info) {
                seq_port_info->type = port_info->seq_flags;
                seq_port_info->midi_voices = port_info->voices;
        }
}

/* return iJack for the corresponding jackID */
static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id)
{
        unsigned char *extra = hostif->extra;
        int extralen = hostif->extralen;
        struct usb_descriptor_header *h;
        struct usb_midi_out_jack_descriptor *outjd;
        struct usb_midi_in_jack_descriptor *injd;
        size_t sz;

        while (extralen > 4) {
                h = (struct usb_descriptor_header *)extra;
                if (h->bDescriptorType != USB_DT_CS_INTERFACE)
                        goto next;

                outjd = (struct usb_midi_out_jack_descriptor *)h;
                if (h->bLength >= sizeof(*outjd) &&
                    outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
                    outjd->bJackID == jack_id) {
                        sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
                        if (outjd->bLength < sz)
                                goto next;
                        return *(extra + sz - 1);
                }

                injd = (struct usb_midi_in_jack_descriptor *)h;
                if (injd->bLength >= sizeof(*injd) &&
                    injd->bDescriptorSubtype == UAC_MIDI_IN_JACK &&
                    injd->bJackID == jack_id)
                        return injd->iJack;

next:
                if (!extra[0])
                        break;
                extralen -= extra[0];
                extra += extra[0];
        }
        return 0;
}

static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
                                       int stream, int number, int jack_id,
                                       struct snd_rawmidi_substream **rsubstream)
{
        struct port_info *port_info;
        const char *name_format;
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        uint8_t jack_name_buf[32];
        uint8_t *default_jack_name = "MIDI";
        uint8_t *jack_name = default_jack_name;
        uint8_t iJack;
        int res;

        struct snd_rawmidi_substream *substream =
                snd_usbmidi_find_substream(umidi, stream, number);
        if (!substream) {
                dev_err(&umidi->dev->dev, "substream %d:%d not found\n", stream,
                        number);
                return;
        }

        intf = umidi->iface;
        if (intf && jack_id >= 0) {
                hostif = intf->cur_altsetting;
                iJack = find_usb_ijack(hostif, jack_id);
                if (iJack != 0) {
                        res = usb_string(umidi->dev, iJack, jack_name_buf,
                          ARRAY_SIZE(jack_name_buf));
                        if (res)
                                jack_name = jack_name_buf;
                }
        }

        port_info = find_port_info(umidi, number);
        name_format = port_info ? port_info->name :
                (jack_name != default_jack_name  ? "%s %s" : "%s %s %d");
        snprintf(substream->name, sizeof(substream->name),
                 name_format, umidi->card->shortname, jack_name, number + 1);

        *rsubstream = substream;
}

/*
 * Creates the endpoints and their ports.
 */
static int snd_usbmidi_create_endpoints(struct snd_usb_midi *umidi,
                                        struct snd_usb_midi_endpoint_info *endpoints)
{
        int i, j, err;
        int out_ports = 0, in_ports = 0;

        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                if (endpoints[i].out_cables) {
                        err = snd_usbmidi_out_endpoint_create(umidi,
                                                              &endpoints[i],
                                                              &umidi->endpoints[i]);
                        if (err < 0)
                                return err;
                }
                if (endpoints[i].in_cables) {
                        err = snd_usbmidi_in_endpoint_create(umidi,
                                                             &endpoints[i],
                                                             &umidi->endpoints[i]);
                        if (err < 0)
                                return err;
                }

                for (j = 0; j < 0x10; ++j) {
                        if (endpoints[i].out_cables & (1 << j)) {
                                snd_usbmidi_init_substream(umidi,
                                                           SNDRV_RAWMIDI_STREAM_OUTPUT,
                                                           out_ports,
                                                           endpoints[i].assoc_out_jacks[j],
                                                           &umidi->endpoints[i].out->ports[j].substream);
                                ++out_ports;
                        }
                        if (endpoints[i].in_cables & (1 << j)) {
                                snd_usbmidi_init_substream(umidi,
                                                           SNDRV_RAWMIDI_STREAM_INPUT,
                                                           in_ports,
                                                           endpoints[i].assoc_in_jacks[j],
                                                           &umidi->endpoints[i].in->ports[j].substream);
                                ++in_ports;
                        }
                }
        }
        dev_dbg(&umidi->dev->dev, "created %d output and %d input ports\n",
                    out_ports, in_ports);
        return 0;
}

static struct usb_ms_endpoint_descriptor *find_usb_ms_endpoint_descriptor(
                                        struct usb_host_endpoint *hostep)
{
        unsigned char *extra = hostep->extra;
        int extralen = hostep->extralen;

        while (extralen > 3) {
                struct usb_ms_endpoint_descriptor *ms_ep =
                                (struct usb_ms_endpoint_descriptor *)extra;

                if (ms_ep->bLength > 3 &&
                    ms_ep->bDescriptorType == USB_DT_CS_ENDPOINT &&
                    ms_ep->bDescriptorSubtype == UAC_MS_GENERAL)
                        return ms_ep;
                if (!extra[0])
                        break;
                extralen -= extra[0];
                extra += extra[0];
        }
        return NULL;
}

/*
 * Returns MIDIStreaming device capabilities.
 */
static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi,
                                   struct snd_usb_midi_endpoint_info *endpoints)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;
        struct usb_ms_header_descriptor *ms_header;
        struct usb_host_endpoint *hostep;
        struct usb_endpoint_descriptor *ep;
        struct usb_ms_endpoint_descriptor *ms_ep;
        int i, j, epidx;

        intf = umidi->iface;
        if (!intf)
                return -ENXIO;
        hostif = &intf->altsetting[0];
        intfd = get_iface_desc(hostif);
        ms_header = (struct usb_ms_header_descriptor *)hostif->extra;
        if (hostif->extralen >= 7 &&
            ms_header->bLength >= 7 &&
            ms_header->bDescriptorType == USB_DT_CS_INTERFACE &&
            ms_header->bDescriptorSubtype == UAC_HEADER)
                dev_dbg(&umidi->dev->dev, "MIDIStreaming version %02x.%02x\n",
                            ((uint8_t *)&ms_header->bcdMSC)[1], ((uint8_t *)&ms_header->bcdMSC)[0]);
        else
                dev_warn(&umidi->dev->dev,
                         "MIDIStreaming interface descriptor not found\n");

        epidx = 0;
        for (i = 0; i < intfd->bNumEndpoints; ++i) {
                hostep = &hostif->endpoint[i];
                ep = get_ep_desc(hostep);
                if (!usb_endpoint_xfer_bulk(ep) && !usb_endpoint_xfer_int(ep))
                        continue;
                ms_ep = find_usb_ms_endpoint_descriptor(hostep);
                if (!ms_ep)
                        continue;
                if (ms_ep->bLength <= sizeof(*ms_ep))
                        continue;
                if (ms_ep->bNumEmbMIDIJack > 0x10)
                        continue;
                if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack)
                        continue;
                if (usb_endpoint_dir_out(ep)) {
                        if (endpoints[epidx].out_ep) {
                                if (++epidx >= MIDI_MAX_ENDPOINTS) {
                                        dev_warn(&umidi->dev->dev,
                                                 "too many endpoints\n");
                                        break;
                                }
                        }
                        endpoints[epidx].out_ep = usb_endpoint_num(ep);
                        if (usb_endpoint_xfer_int(ep))
                                endpoints[epidx].out_interval = ep->bInterval;
                        else if (snd_usb_get_speed(umidi->dev) == USB_SPEED_LOW)
                                /*
                                 * Low speed bulk transfers don't exist, so
                                 * force interrupt transfers for devices like
                                 * ESI MIDI Mate that try to use them anyway.
                                 */
                                endpoints[epidx].out_interval = 1;
                        endpoints[epidx].out_cables =
                                (1 << ms_ep->bNumEmbMIDIJack) - 1;
                        for (j = 0; j < ms_ep->bNumEmbMIDIJack; ++j)
                                endpoints[epidx].assoc_out_jacks[j] = ms_ep->baAssocJackID[j];
                        for (; j < ARRAY_SIZE(endpoints[epidx].assoc_out_jacks); ++j)
                                endpoints[epidx].assoc_out_jacks[j] = -1;
                        dev_dbg(&umidi->dev->dev, "EP %02X: %d jack(s)\n",
                                ep->bEndpointAddress, ms_ep->bNumEmbMIDIJack);
                } else {
                        if (endpoints[epidx].in_ep) {
                                if (++epidx >= MIDI_MAX_ENDPOINTS) {
                                        dev_warn(&umidi->dev->dev,
                                                 "too many endpoints\n");
                                        break;
                                }
                        }
                        endpoints[epidx].in_ep = usb_endpoint_num(ep);
                        if (usb_endpoint_xfer_int(ep))
                                endpoints[epidx].in_interval = ep->bInterval;
                        else if (snd_usb_get_speed(umidi->dev) == USB_SPEED_LOW)
                                endpoints[epidx].in_interval = 1;
                        endpoints[epidx].in_cables =
                                (1 << ms_ep->bNumEmbMIDIJack) - 1;
                        for (j = 0; j < ms_ep->bNumEmbMIDIJack; ++j)
                                endpoints[epidx].assoc_in_jacks[j] = ms_ep->baAssocJackID[j];
                        for (; j < ARRAY_SIZE(endpoints[epidx].assoc_in_jacks); ++j)
                                endpoints[epidx].assoc_in_jacks[j] = -1;
                        dev_dbg(&umidi->dev->dev, "EP %02X: %d jack(s)\n",
                                ep->bEndpointAddress, ms_ep->bNumEmbMIDIJack);
                }
        }
        return 0;
}

static int roland_load_info(struct snd_kcontrol *kcontrol,
                            struct snd_ctl_elem_info *info)
{
        static const char *const names[] = { "High Load", "Light Load" };

        return snd_ctl_enum_info(info, 1, 2, names);
}

static int roland_load_get(struct snd_kcontrol *kcontrol,
                           struct snd_ctl_elem_value *value)
{
        value->value.enumerated.item[0] = kcontrol->private_value;
        return 0;
}

static int roland_load_put(struct snd_kcontrol *kcontrol,
                           struct snd_ctl_elem_value *value)
{
        struct snd_usb_midi *umidi = kcontrol->private_data;
        int changed;

        if (value->value.enumerated.item[0] > 1)
                return -EINVAL;
        mutex_lock(&umidi->mutex);
        changed = value->value.enumerated.item[0] != kcontrol->private_value;
        if (changed)
                kcontrol->private_value = value->value.enumerated.item[0];
        mutex_unlock(&umidi->mutex);
        return changed;
}

static const struct snd_kcontrol_new roland_load_ctl = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .name = "MIDI Input Mode",
        .info = roland_load_info,
        .get = roland_load_get,
        .put = roland_load_put,
        .private_value = 1,
};

/*
 * On Roland devices, use the second alternate setting to be able to use
 * the interrupt input endpoint.
 */
static void snd_usbmidi_switch_roland_altsetting(struct snd_usb_midi *umidi)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;

        intf = umidi->iface;
        if (!intf || intf->num_altsetting != 2)
                return;

        hostif = &intf->altsetting[1];
        intfd = get_iface_desc(hostif);
       /* If either or both of the endpoints support interrupt transfer,
        * then use the alternate setting
        */
        if (intfd->bNumEndpoints != 2 ||
            !((get_endpoint(hostif, 0)->bmAttributes &
               USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT ||
              (get_endpoint(hostif, 1)->bmAttributes &
               USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_INT))
                return;

        dev_dbg(&umidi->dev->dev, "switching to altsetting %d with int ep\n",
                    intfd->bAlternateSetting);
        usb_set_interface(umidi->dev, intfd->bInterfaceNumber,
                          intfd->bAlternateSetting);

        umidi->roland_load_ctl = snd_ctl_new1(&roland_load_ctl, umidi);
        if (snd_ctl_add(umidi->card, umidi->roland_load_ctl) < 0)
                umidi->roland_load_ctl = NULL;
}

/*
 * Try to find any usable endpoints in the interface.
 */
static int snd_usbmidi_detect_endpoints(struct snd_usb_midi *umidi,
                                        struct snd_usb_midi_endpoint_info *endpoint,
                                        int max_endpoints)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;
        struct usb_endpoint_descriptor *epd;
        int i, out_eps = 0, in_eps = 0;

        if (USB_ID_VENDOR(umidi->usb_id) == 0x0582)
                snd_usbmidi_switch_roland_altsetting(umidi);

        if (endpoint[0].out_ep || endpoint[0].in_ep)
                return 0;

        intf = umidi->iface;
        if (!intf || intf->num_altsetting < 1)
                return -ENOENT;
        hostif = intf->cur_altsetting;
        intfd = get_iface_desc(hostif);

        for (i = 0; i < intfd->bNumEndpoints; ++i) {
                epd = get_endpoint(hostif, i);
                if (!usb_endpoint_xfer_bulk(epd) &&
                    !usb_endpoint_xfer_int(epd))
                        continue;
                if (out_eps < max_endpoints &&
                    usb_endpoint_dir_out(epd)) {
                        endpoint[out_eps].out_ep = usb_endpoint_num(epd);
                        if (usb_endpoint_xfer_int(epd))
                                endpoint[out_eps].out_interval = epd->bInterval;
                        ++out_eps;
                }
                if (in_eps < max_endpoints &&
                    usb_endpoint_dir_in(epd)) {
                        endpoint[in_eps].in_ep = usb_endpoint_num(epd);
                        if (usb_endpoint_xfer_int(epd))
                                endpoint[in_eps].in_interval = epd->bInterval;
                        ++in_eps;
                }
        }
        return (out_eps || in_eps) ? 0 : -ENOENT;
}

/*
 * Detects the endpoints for one-port-per-endpoint protocols.
 */
static int snd_usbmidi_detect_per_port_endpoints(struct snd_usb_midi *umidi,
                                                 struct snd_usb_midi_endpoint_info *endpoints)
{
        int err, i;

        err = snd_usbmidi_detect_endpoints(umidi, endpoints, MIDI_MAX_ENDPOINTS);
        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                if (endpoints[i].out_ep)
                        endpoints[i].out_cables = 0x0001;
                if (endpoints[i].in_ep)
                        endpoints[i].in_cables = 0x0001;
        }
        return err;
}

/*
 * Detects the endpoints and ports of Yamaha devices.
 */
static int snd_usbmidi_detect_yamaha(struct snd_usb_midi *umidi,
                                     struct snd_usb_midi_endpoint_info *endpoint)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;
        uint8_t *cs_desc;

        intf = umidi->iface;
        if (!intf)
                return -ENOENT;
        hostif = intf->altsetting;
        intfd = get_iface_desc(hostif);
        if (intfd->bNumEndpoints < 1)
                return -ENOENT;

        /*
         * For each port there is one MIDI_IN/OUT_JACK descriptor, not
         * necessarily with any useful contents.  So simply count 'em.
         */
        for (cs_desc = hostif->extra;
             cs_desc < hostif->extra + hostif->extralen && cs_desc[0] >= 2;
             cs_desc += cs_desc[0]) {
                if (cs_desc[1] == USB_DT_CS_INTERFACE) {
                        if (cs_desc[2] == UAC_MIDI_IN_JACK)
                                endpoint->in_cables =
                                        (endpoint->in_cables << 1) | 1;
                        else if (cs_desc[2] == UAC_MIDI_OUT_JACK)
                                endpoint->out_cables =
                                        (endpoint->out_cables << 1) | 1;
                }
        }
        if (!endpoint->in_cables && !endpoint->out_cables)
                return -ENOENT;

        return snd_usbmidi_detect_endpoints(umidi, endpoint, 1);
}

/*
 * Detects the endpoints and ports of Roland devices.
 */
static int snd_usbmidi_detect_roland(struct snd_usb_midi *umidi,
                                     struct snd_usb_midi_endpoint_info *endpoint)
{
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        u8 *cs_desc;

        intf = umidi->iface;
        if (!intf)
                return -ENOENT;
        hostif = intf->altsetting;
        /*
         * Some devices have a descriptor <06 24 F1 02 <inputs> <outputs>>,
         * some have standard class descriptors, or both kinds, or neither.
         */
        for (cs_desc = hostif->extra;
             cs_desc < hostif->extra + hostif->extralen && cs_desc[0] >= 2;
             cs_desc += cs_desc[0]) {
                if (cs_desc[0] >= 6 &&
                    cs_desc[1] == USB_DT_CS_INTERFACE &&
                    cs_desc[2] == 0xf1 &&
                    cs_desc[3] == 0x02) {
                        if (cs_desc[4] > 0x10 || cs_desc[5] > 0x10)
                                continue;
                        endpoint->in_cables  = (1 << cs_desc[4]) - 1;
                        endpoint->out_cables = (1 << cs_desc[5]) - 1;
                        return snd_usbmidi_detect_endpoints(umidi, endpoint, 1);
                } else if (cs_desc[0] >= 7 &&
                           cs_desc[1] == USB_DT_CS_INTERFACE &&
                           cs_desc[2] == UAC_HEADER) {
                        return snd_usbmidi_get_ms_info(umidi, endpoint);
                }
        }

        return -ENODEV;
}

/*
 * Creates the endpoints and their ports for Midiman devices.
 */
static int snd_usbmidi_create_endpoints_midiman(struct snd_usb_midi *umidi,
                                                struct snd_usb_midi_endpoint_info *endpoint)
{
        struct snd_usb_midi_endpoint_info ep_info;
        struct usb_interface *intf;
        struct usb_host_interface *hostif;
        struct usb_interface_descriptor *intfd;
        struct usb_endpoint_descriptor *epd;
        int cable, err;

        intf = umidi->iface;
        if (!intf)
                return -ENOENT;
        hostif = intf->altsetting;
        intfd = get_iface_desc(hostif);
        /*
         * The various MidiSport devices have more or less random endpoint
         * numbers, so we have to identify the endpoints by their index in
         * the descriptor array, like the driver for that other OS does.
         *
         * There is one interrupt input endpoint for all input ports, one
         * bulk output endpoint for even-numbered ports, and one for odd-
         * numbered ports.  Both bulk output endpoints have corresponding
         * input bulk endpoints (at indices 1 and 3) which aren't used.
         */
        if (intfd->bNumEndpoints < (endpoint->out_cables > 0x0001 ? 5 : 3)) {
                dev_dbg(&umidi->dev->dev, "not enough endpoints\n");
                return -ENOENT;
        }

        epd = get_endpoint(hostif, 0);
        if (!usb_endpoint_dir_in(epd) || !usb_endpoint_xfer_int(epd)) {
                dev_dbg(&umidi->dev->dev, "endpoint[0] isn't interrupt\n");
                return -ENXIO;
        }
        epd = get_endpoint(hostif, 2);
        if (!usb_endpoint_dir_out(epd) || !usb_endpoint_xfer_bulk(epd)) {
                dev_dbg(&umidi->dev->dev, "endpoint[2] isn't bulk output\n");
                return -ENXIO;
        }
        if (endpoint->out_cables > 0x0001) {
                epd = get_endpoint(hostif, 4);
                if (!usb_endpoint_dir_out(epd) ||
                    !usb_endpoint_xfer_bulk(epd)) {
                        dev_dbg(&umidi->dev->dev,
                                "endpoint[4] isn't bulk output\n");
                        return -ENXIO;
                }
        }

        ep_info.out_ep = get_endpoint(hostif, 2)->bEndpointAddress &
                USB_ENDPOINT_NUMBER_MASK;
        ep_info.out_interval = 0;
        ep_info.out_cables = endpoint->out_cables & 0x5555;
        err = snd_usbmidi_out_endpoint_create(umidi, &ep_info,
                                              &umidi->endpoints[0]);
        if (err < 0)
                return err;

        ep_info.in_ep = get_endpoint(hostif, 0)->bEndpointAddress &
                USB_ENDPOINT_NUMBER_MASK;
        ep_info.in_interval = get_endpoint(hostif, 0)->bInterval;
        ep_info.in_cables = endpoint->in_cables;
        err = snd_usbmidi_in_endpoint_create(umidi, &ep_info,
                                             &umidi->endpoints[0]);
        if (err < 0)
                return err;

        if (endpoint->out_cables > 0x0001) {
                ep_info.out_ep = get_endpoint(hostif, 4)->bEndpointAddress &
                        USB_ENDPOINT_NUMBER_MASK;
                ep_info.out_cables = endpoint->out_cables & 0xaaaa;
                err = snd_usbmidi_out_endpoint_create(umidi, &ep_info,
                                                      &umidi->endpoints[1]);
                if (err < 0)
                        return err;
        }

        for (cable = 0; cable < 0x10; ++cable) {
                if (endpoint->out_cables & (1 << cable))
                        snd_usbmidi_init_substream(umidi,
                                                   SNDRV_RAWMIDI_STREAM_OUTPUT,
                                                   cable,
                                                   -1 /* prevent trying to find jack */,
                                                   &umidi->endpoints[cable & 1].out->ports[cable].substream);
                if (endpoint->in_cables & (1 << cable))
                        snd_usbmidi_init_substream(umidi,
                                                   SNDRV_RAWMIDI_STREAM_INPUT,
                                                   cable,
                                                   -1 /* prevent trying to find jack */,
                                                   &umidi->endpoints[0].in->ports[cable].substream);
        }
        return 0;
}

static const struct snd_rawmidi_global_ops snd_usbmidi_ops = {
        .get_port_info = snd_usbmidi_get_port_info,
};

static int snd_usbmidi_create_rawmidi(struct snd_usb_midi *umidi,
                                      int out_ports, int in_ports)
{
        struct snd_rawmidi *rmidi;
        int err;

        err = snd_rawmidi_new(umidi->card, "USB MIDI",
                              umidi->next_midi_device++,
                              out_ports, in_ports, &rmidi);
        if (err < 0)
                return err;
        strcpy(rmidi->name, umidi->card->shortname);
        rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT |
                            SNDRV_RAWMIDI_INFO_INPUT |
                            SNDRV_RAWMIDI_INFO_DUPLEX;
        rmidi->ops = &snd_usbmidi_ops;
        rmidi->private_data = umidi;
        rmidi->private_free = snd_usbmidi_rawmidi_free;
        snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT,
                            &snd_usbmidi_output_ops);
        snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT,
                            &snd_usbmidi_input_ops);

        umidi->rmidi = rmidi;
        return 0;
}

/*
 * Temporarily stop input.
 */
void snd_usbmidi_input_stop(struct list_head *p)
{
        struct snd_usb_midi *umidi;
        unsigned int i, j;

        umidi = list_entry(p, struct snd_usb_midi, list);
        if (!umidi->input_running)
                return;
        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i];
                if (ep->in)
                        for (j = 0; j < INPUT_URBS; ++j)
                                usb_kill_urb(ep->in->urbs[j]);
        }
        umidi->input_running = 0;
}
EXPORT_SYMBOL(snd_usbmidi_input_stop);

static void snd_usbmidi_input_start_ep(struct snd_usb_midi *umidi,
                                       struct snd_usb_midi_in_endpoint *ep)
{
        unsigned int i;
        unsigned long flags;

        if (!ep)
                return;
        for (i = 0; i < INPUT_URBS; ++i) {
                struct urb *urb = ep->urbs[i];
                spin_lock_irqsave(&umidi->disc_lock, flags);
                if (!atomic_read(&urb->use_count)) {
                        urb->dev = ep->umidi->dev;
                        snd_usbmidi_submit_urb(urb, GFP_ATOMIC);
                }
                spin_unlock_irqrestore(&umidi->disc_lock, flags);
        }
}

/*
 * Resume input after a call to snd_usbmidi_input_stop().
 */
void snd_usbmidi_input_start(struct list_head *p)
{
        struct snd_usb_midi *umidi;
        int i;

        umidi = list_entry(p, struct snd_usb_midi, list);
        if (umidi->input_running || !umidi->opened[1])
                return;
        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i)
                snd_usbmidi_input_start_ep(umidi, umidi->endpoints[i].in);
        umidi->input_running = 1;
}
EXPORT_SYMBOL(snd_usbmidi_input_start);

/*
 * Prepare for suspend. Typically called from the USB suspend callback.
 */
void snd_usbmidi_suspend(struct list_head *p)
{
        struct snd_usb_midi *umidi;

        umidi = list_entry(p, struct snd_usb_midi, list);
        mutex_lock(&umidi->mutex);
        snd_usbmidi_input_stop(p);
        mutex_unlock(&umidi->mutex);
}
EXPORT_SYMBOL(snd_usbmidi_suspend);

/*
 * Resume. Typically called from the USB resume callback.
 */
void snd_usbmidi_resume(struct list_head *p)
{
        struct snd_usb_midi *umidi;

        umidi = list_entry(p, struct snd_usb_midi, list);
        mutex_lock(&umidi->mutex);
        snd_usbmidi_input_start(p);
        mutex_unlock(&umidi->mutex);
}
EXPORT_SYMBOL(snd_usbmidi_resume);

/*
 * Creates and registers everything needed for a MIDI streaming interface.
 */
int __snd_usbmidi_create(struct snd_card *card,
                         struct usb_interface *iface,
                         struct list_head *midi_list,
                         const struct snd_usb_audio_quirk *quirk,
                         unsigned int usb_id,
                         unsigned int *num_rawmidis)
{
        struct snd_usb_midi *umidi;
        struct snd_usb_midi_endpoint_info endpoints[MIDI_MAX_ENDPOINTS];
        int out_ports, in_ports;
        int i, err;

        umidi = kzalloc(sizeof(*umidi), GFP_KERNEL);
        if (!umidi)
                return -ENOMEM;
        umidi->dev = interface_to_usbdev(iface);
        umidi->card = card;
        umidi->iface = iface;
        umidi->quirk = quirk;
        umidi->usb_protocol_ops = &snd_usbmidi_standard_ops;
        if (num_rawmidis)
                umidi->next_midi_device = *num_rawmidis;
        spin_lock_init(&umidi->disc_lock);
        init_rwsem(&umidi->disc_rwsem);
        mutex_init(&umidi->mutex);
        if (!usb_id)
                usb_id = USB_ID(le16_to_cpu(umidi->dev->descriptor.idVendor),
                               le16_to_cpu(umidi->dev->descriptor.idProduct));
        umidi->usb_id = usb_id;
        timer_setup(&umidi->error_timer, snd_usbmidi_error_timer, 0);

        /* detect the endpoint(s) to use */
        memset(endpoints, 0, sizeof(endpoints));
        switch (quirk ? quirk->type : QUIRK_MIDI_STANDARD_INTERFACE) {
        case QUIRK_MIDI_STANDARD_INTERFACE:
                err = snd_usbmidi_get_ms_info(umidi, endpoints);
                if (umidi->usb_id == USB_ID(0x0763, 0x0150)) /* M-Audio Uno */
                        umidi->usb_protocol_ops =
                                &snd_usbmidi_maudio_broken_running_status_ops;
                break;
        case QUIRK_MIDI_US122L:
                umidi->usb_protocol_ops = &snd_usbmidi_122l_ops;
                fallthrough;
        case QUIRK_MIDI_FIXED_ENDPOINT:
                memcpy(&endpoints[0], quirk->data,
                       sizeof(struct snd_usb_midi_endpoint_info));
                err = snd_usbmidi_detect_endpoints(umidi, &endpoints[0], 1);
                break;
        case QUIRK_MIDI_YAMAHA:
                err = snd_usbmidi_detect_yamaha(umidi, &endpoints[0]);
                break;
        case QUIRK_MIDI_ROLAND:
                err = snd_usbmidi_detect_roland(umidi, &endpoints[0]);
                break;
        case QUIRK_MIDI_MIDIMAN:
                umidi->usb_protocol_ops = &snd_usbmidi_midiman_ops;
                memcpy(&endpoints[0], quirk->data,
                       sizeof(struct snd_usb_midi_endpoint_info));
                err = 0;
                break;
        case QUIRK_MIDI_NOVATION:
                umidi->usb_protocol_ops = &snd_usbmidi_novation_ops;
                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                break;
        case QUIRK_MIDI_RAW_BYTES:
                umidi->usb_protocol_ops = &snd_usbmidi_raw_ops;
                /*
                 * Interface 1 contains isochronous endpoints, but with the same
                 * numbers as in interface 0.  Since it is interface 1 that the
                 * USB core has most recently seen, these descriptors are now
                 * associated with the endpoint numbers.  This will foul up our
                 * attempts to submit bulk/interrupt URBs to the endpoints in
                 * interface 0, so we have to make sure that the USB core looks
                 * again at interface 0 by calling usb_set_interface() on it.
                 */
                if (umidi->usb_id == USB_ID(0x07fd, 0x0001)) /* MOTU Fastlane */
                        usb_set_interface(umidi->dev, 0, 0);
                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                break;
        case QUIRK_MIDI_EMAGIC:
                umidi->usb_protocol_ops = &snd_usbmidi_emagic_ops;
                memcpy(&endpoints[0], quirk->data,
                       sizeof(struct snd_usb_midi_endpoint_info));
                err = snd_usbmidi_detect_endpoints(umidi, &endpoints[0], 1);
                break;
        case QUIRK_MIDI_CME:
                umidi->usb_protocol_ops = &snd_usbmidi_cme_ops;
                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                break;
        case QUIRK_MIDI_AKAI:
                umidi->usb_protocol_ops = &snd_usbmidi_akai_ops;
                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                /* endpoint 1 is input-only */
                endpoints[1].out_cables = 0;
                break;
        case QUIRK_MIDI_FTDI:
                umidi->usb_protocol_ops = &snd_usbmidi_ftdi_ops;

                /* set baud rate to 31250 (48 MHz / 16 / 96) */
                err = usb_control_msg(umidi->dev, usb_sndctrlpipe(umidi->dev, 0),
                                      3, 0x40, 0x60, 0, NULL, 0, 1000);
                if (err < 0)
                        break;

                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                break;
        case QUIRK_MIDI_CH345:
                umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops;
                err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
                break;
        default:
                dev_err(&umidi->dev->dev, "invalid quirk type %d\n",
                        quirk->type);
                err = -ENXIO;
                break;
        }
        if (err < 0)
                goto free_midi;

        /* create rawmidi device */
        out_ports = 0;
        in_ports = 0;
        for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) {
                out_ports += hweight16(endpoints[i].out_cables);
                in_ports += hweight16(endpoints[i].in_cables);
        }
        err = snd_usbmidi_create_rawmidi(umidi, out_ports, in_ports);
        if (err < 0)
                goto free_midi;

        /* create endpoint/port structures */
        if (quirk && quirk->type == QUIRK_MIDI_MIDIMAN)
                err = snd_usbmidi_create_endpoints_midiman(umidi, &endpoints[0]);
        else
                err = snd_usbmidi_create_endpoints(umidi, endpoints);
        if (err < 0)
                goto exit;

        usb_autopm_get_interface_no_resume(umidi->iface);

        list_add_tail(&umidi->list, midi_list);
        if (num_rawmidis)
                *num_rawmidis = umidi->next_midi_device;
        return 0;

free_midi:
        kfree(umidi);
exit:
        return err;
}
EXPORT_SYMBOL(__snd_usbmidi_create);


























































































































































   64 


   64 




   64 
   64 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
// SPDX-License-Identifier: GPL-2.0
/*
 * DMA memory management for framework level HCD code (hc_driver)
 *
 * This implementation plugs in through generic "usb_bus" level methods,
 * and should work with all USB controllers, regardless of bus type.
 *
 * Released under the GPLv2 only.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/genalloc.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>


/*
 * DMA-Coherent Buffers
 */

/* FIXME tune these based on pool statistics ... */
static size_t pool_max[HCD_BUFFER_POOLS] = {
        32, 128, 512, 2048,
};

void __init usb_init_pool_max(void)
{
        /*
         * The pool_max values must never be smaller than
         * ARCH_DMA_MINALIGN.
         */
        if (ARCH_DMA_MINALIGN <= 32)
                ;                        /* Original value is okay */
        else if (ARCH_DMA_MINALIGN <= 64)
                pool_max[0] = 64;
        else if (ARCH_DMA_MINALIGN <= 128)
                pool_max[0] = 0;        /* Don't use this pool */
        else
                BUILD_BUG();                /* We don't allow this */
}

/* SETUP primitives */

/**
 * hcd_buffer_create - initialize buffer pools
 * @hcd: the bus whose buffer pools are to be initialized
 *
 * Context: task context, might sleep
 *
 * Call this as part of initializing a host controller that uses the dma
 * memory allocators.  It initializes some pools of dma-coherent memory that
 * will be shared by all drivers using that controller.
 *
 * Call hcd_buffer_destroy() to clean up after using those pools.
 *
 * Return: 0 if successful. A negative errno value otherwise.
 */
int hcd_buffer_create(struct usb_hcd *hcd)
{
        char                name[16];
        int                i, size;

        if (hcd->localmem_pool || !hcd_uses_dma(hcd))
                return 0;

        for (i = 0; i < HCD_BUFFER_POOLS; i++) {
                size = pool_max[i];
                if (!size)
                        continue;
                snprintf(name, sizeof(name), "buffer-%d", size);
                hcd->pool[i] = dma_pool_create(name, hcd->self.sysdev,
                                size, size, 0);
                if (!hcd->pool[i]) {
                        hcd_buffer_destroy(hcd);
                        return -ENOMEM;
                }
        }
        return 0;
}


/**
 * hcd_buffer_destroy - deallocate buffer pools
 * @hcd: the bus whose buffer pools are to be destroyed
 *
 * Context: task context, might sleep
 *
 * This frees the buffer pools created by hcd_buffer_create().
 */
void hcd_buffer_destroy(struct usb_hcd *hcd)
{
        int i;

        if (!IS_ENABLED(CONFIG_HAS_DMA))
                return;

        for (i = 0; i < HCD_BUFFER_POOLS; i++) {
                dma_pool_destroy(hcd->pool[i]);
                hcd->pool[i] = NULL;
        }
}


/* sometimes alloc/free could use kmalloc with GFP_DMA, for
 * better sharing and to leverage mm/slab.c intelligence.
 */

void *hcd_buffer_alloc(
        struct usb_bus                *bus,
        size_t                        size,
        gfp_t                        mem_flags,
        dma_addr_t                *dma
)
{
        struct usb_hcd                *hcd = bus_to_hcd(bus);
        int                        i;

        if (size == 0)
                return NULL;

        if (hcd->localmem_pool)
                return gen_pool_dma_alloc(hcd->localmem_pool, size, dma);

        /* some USB hosts just use PIO */
        if (!hcd_uses_dma(hcd)) {
                *dma = ~(dma_addr_t) 0;
                return kmalloc(size, mem_flags);
        }

        for (i = 0; i < HCD_BUFFER_POOLS; i++) {
                if (size <= pool_max[i])
                        return dma_pool_alloc(hcd->pool[i], mem_flags, dma);
        }
        return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags);
}

void hcd_buffer_free(
        struct usb_bus                *bus,
        size_t                        size,
        void                        *addr,
        dma_addr_t                dma
)
{
        struct usb_hcd                *hcd = bus_to_hcd(bus);
        int                        i;

        if (!addr)
                return;

        if (hcd->localmem_pool) {
                gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size);
                return;
        }

        if (!hcd_uses_dma(hcd)) {
                kfree(addr);
                return;
        }

        for (i = 0; i < HCD_BUFFER_POOLS; i++) {
                if (size <= pool_max[i]) {
                        dma_pool_free(hcd->pool[i], addr, dma);
                        return;
                }
        }
        dma_free_coherent(hcd->self.sysdev, size, addr, dma);
}

void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
                size_t size, gfp_t mem_flags, dma_addr_t *dma)
{
        if (size == 0)
                return NULL;

        if (hcd->localmem_pool)
                return gen_pool_dma_alloc_align(hcd->localmem_pool,
                                size, dma, PAGE_SIZE);

        /* some USB hosts just use PIO */
        if (!hcd_uses_dma(hcd)) {
                *dma = DMA_MAPPING_ERROR;
                return (void *)__get_free_pages(mem_flags,
                                get_order(size));
        }

        return dma_alloc_coherent(hcd->self.sysdev,
                        size, dma, mem_flags);
}

void hcd_buffer_free_pages(struct usb_hcd *hcd,
                size_t size, void *addr, dma_addr_t dma)
{
        if (!addr)
                return;

        if (hcd->localmem_pool) {
                gen_pool_free(hcd->localmem_pool,
                                (unsigned long)addr, size);
                return;
        }

        if (!hcd_uses_dma(hcd)) {
                free_pages((unsigned long)addr, get_order(size));
                return;
        }

        dma_free_coherent(hcd->self.sysdev, size, addr, dma);
}

























































































  349 
















































































  324 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/*
 * include/linux/topology.h
 *
 * Written by: Matthew Dobson, IBM Corporation
 *
 * Copyright (C) 2002, IBM Corp.
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 * NON INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Send feedback to <colpatch@us.ibm.com>
 */
#ifndef _LINUX_TOPOLOGY_H
#define _LINUX_TOPOLOGY_H

#include <linux/arch_topology.h>
#include <linux/cpumask.h>
#include <linux/bitops.h>
#include <linux/mmzone.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <asm/topology.h>

#ifndef nr_cpus_node
#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node))
#endif

#define for_each_node_with_cpus(node)                        \
        for_each_online_node(node)                        \
                if (nr_cpus_node(node))

int arch_update_cpu_topology(void);

/* Conform to ACPI 2.0 SLIT distance definitions */
#define LOCAL_DISTANCE                10
#define REMOTE_DISTANCE                20
#define DISTANCE_BITS           8
#ifndef node_distance
#define node_distance(from,to)        ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
#endif
#ifndef RECLAIM_DISTANCE
/*
 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
 * (in whatever arch specific measurement units returned by node_distance())
 * and node_reclaim_mode is enabled then the VM will only call node_reclaim()
 * on nodes within this distance.
 */
#define RECLAIM_DISTANCE 30
#endif

/*
 * The following tunable allows platforms to override the default node
 * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are
 * sufficiently fast that the default value actually hurts
 * performance.
 *
 * AMD EPYC machines use this because even though the 2-hop distance
 * is 32 (3.2x slower than a local memory access) performance actually
 * *improves* if allowed to reclaim memory and load balance tasks
 * between NUMA nodes 2-hops apart.
 */
extern int __read_mostly node_reclaim_distance;

#ifndef PENALTY_FOR_NODE_WITH_CPUS
#define PENALTY_FOR_NODE_WITH_CPUS        (1)
#endif

#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DECLARE_PER_CPU(int, numa_node);

#ifndef numa_node_id
/* Returns the number of the current Node. */
static inline int numa_node_id(void)
{
        return raw_cpu_read(numa_node);
}
#endif

#ifndef cpu_to_node
static inline int cpu_to_node(int cpu)
{
        return per_cpu(numa_node, cpu);
}
#endif

#ifndef set_numa_node
static inline void set_numa_node(int node)
{
        this_cpu_write(numa_node, node);
}
#endif

#ifndef set_cpu_numa_node
static inline void set_cpu_numa_node(int cpu, int node)
{
        per_cpu(numa_node, cpu) = node;
}
#endif

#else        /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */

/* Returns the number of the current Node. */
#ifndef numa_node_id
static inline int numa_node_id(void)
{
        return cpu_to_node(raw_smp_processor_id());
}
#endif

#endif        /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */

#ifdef CONFIG_HAVE_MEMORYLESS_NODES

/*
 * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
 * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
 * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
 */
DECLARE_PER_CPU(int, _numa_mem_);

#ifndef set_numa_mem
static inline void set_numa_mem(int node)
{
        this_cpu_write(_numa_mem_, node);
}
#endif

#ifndef numa_mem_id
/* Returns the number of the nearest Node with memory */
static inline int numa_mem_id(void)
{
        return raw_cpu_read(_numa_mem_);
}
#endif

#ifndef cpu_to_mem
static inline int cpu_to_mem(int cpu)
{
        return per_cpu(_numa_mem_, cpu);
}
#endif

#ifndef set_cpu_numa_mem
static inline void set_cpu_numa_mem(int cpu, int node)
{
        per_cpu(_numa_mem_, cpu) = node;
}
#endif

#else        /* !CONFIG_HAVE_MEMORYLESS_NODES */

#ifndef numa_mem_id
/* Returns the number of the nearest Node with memory */
static inline int numa_mem_id(void)
{
        return numa_node_id();
}
#endif

#ifndef cpu_to_mem
static inline int cpu_to_mem(int cpu)
{
        return cpu_to_node(cpu);
}
#endif

#endif        /* [!]CONFIG_HAVE_MEMORYLESS_NODES */

#if defined(topology_die_id) && defined(topology_die_cpumask)
#define TOPOLOGY_DIE_SYSFS
#endif
#if defined(topology_cluster_id) && defined(topology_cluster_cpumask)
#define TOPOLOGY_CLUSTER_SYSFS
#endif
#if defined(topology_book_id) && defined(topology_book_cpumask)
#define TOPOLOGY_BOOK_SYSFS
#endif
#if defined(topology_drawer_id) && defined(topology_drawer_cpumask)
#define TOPOLOGY_DRAWER_SYSFS
#endif

#ifndef topology_physical_package_id
#define topology_physical_package_id(cpu)        ((void)(cpu), -1)
#endif
#ifndef topology_die_id
#define topology_die_id(cpu)                        ((void)(cpu), -1)
#endif
#ifndef topology_cluster_id
#define topology_cluster_id(cpu)                ((void)(cpu), -1)
#endif
#ifndef topology_core_id
#define topology_core_id(cpu)                        ((void)(cpu), 0)
#endif
#ifndef topology_book_id
#define topology_book_id(cpu)                        ((void)(cpu), -1)
#endif
#ifndef topology_drawer_id
#define topology_drawer_id(cpu)                        ((void)(cpu), -1)
#endif
#ifndef topology_ppin
#define topology_ppin(cpu)                        ((void)(cpu), 0ull)
#endif
#ifndef topology_sibling_cpumask
#define topology_sibling_cpumask(cpu)                cpumask_of(cpu)
#endif
#ifndef topology_core_cpumask
#define topology_core_cpumask(cpu)                cpumask_of(cpu)
#endif
#ifndef topology_cluster_cpumask
#define topology_cluster_cpumask(cpu)                cpumask_of(cpu)
#endif
#ifndef topology_die_cpumask
#define topology_die_cpumask(cpu)                cpumask_of(cpu)
#endif
#ifndef topology_book_cpumask
#define topology_book_cpumask(cpu)                cpumask_of(cpu)
#endif
#ifndef topology_drawer_cpumask
#define topology_drawer_cpumask(cpu)                cpumask_of(cpu)
#endif

#if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask)
static inline const struct cpumask *cpu_smt_mask(int cpu)
{
        return topology_sibling_cpumask(cpu);
}
#endif

static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
        return cpumask_of_node(cpu_to_node(cpu));
}

#ifdef CONFIG_NUMA
int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node);
extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops);
#else
static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
{
        return cpumask_nth_and(cpu, cpus, cpu_online_mask);
}

static inline const struct cpumask *
sched_numa_hop_mask(unsigned int node, unsigned int hops)
{
        return ERR_PTR(-EOPNOTSUPP);
}
#endif        /* CONFIG_NUMA */

/**
 * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance
 *                          from a given node.
 * @mask: the iteration variable.
 * @node: the NUMA node to start the search from.
 *
 * Requires rcu_lock to be held.
 *
 * Yields cpu_online_mask for @node == NUMA_NO_NODE.
 */
#define for_each_numa_hop_mask(mask, node)                                       \
        for (unsigned int __hops = 0;                                               \
             mask = (node != NUMA_NO_NODE || __hops) ?                               \
                     sched_numa_hop_mask(node, __hops) :                       \
                     cpu_online_mask,                                               \
             !IS_ERR_OR_NULL(mask);                                               \
             __hops++)

#endif /* _LINUX_TOPOLOGY_H */
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _LINUX_KCOV_IOCTLS_H
#define _LINUX_KCOV_IOCTLS_H

#include <linux/types.h>

/*
 * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst
 * and the comment before kcov_remote_start() for usage details.
 */
struct kcov_remote_arg {
        __u32                trace_mode;        /* KCOV_TRACE_PC or KCOV_TRACE_CMP */
        __u32                area_size;        /* Length of coverage buffer in words */
        __u32                num_handles;        /* Size of handles array */
        __aligned_u64        common_handle;
        __aligned_u64        handles[];
};

#define KCOV_REMOTE_MAX_HANDLES                0x100

#define KCOV_INIT_TRACE                        _IOR('c', 1, unsigned long)
#define KCOV_ENABLE                        _IO('c', 100)
#define KCOV_DISABLE                        _IO('c', 101)
#define KCOV_REMOTE_ENABLE                _IOW('c', 102, struct kcov_remote_arg)

enum {
        /*
         * Tracing coverage collection mode.
         * Covered PCs are collected in a per-task buffer.
         * In new KCOV version the mode is chosen by calling
         * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument
         * was supposed to be 0 in such a call. So, for reasons of backward
         * compatibility, we have chosen the value KCOV_TRACE_PC to be 0.
         */
        KCOV_TRACE_PC = 0,
        /* Collecting comparison operands mode. */
        KCOV_TRACE_CMP = 1,
};

/*
 * The format for the types of collected comparisons.
 *
 * Bit 0 shows whether one of the arguments is a compile-time constant.
 * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes.
 */
#define KCOV_CMP_CONST          (1 << 0)
#define KCOV_CMP_SIZE(n)        ((n) << 1)
#define KCOV_CMP_MASK           KCOV_CMP_SIZE(3)

#define KCOV_SUBSYSTEM_COMMON        (0x00ull << 56)
#define KCOV_SUBSYSTEM_USB        (0x01ull << 56)

#define KCOV_SUBSYSTEM_MASK        (0xffull << 56)
#define KCOV_INSTANCE_MASK        (0xffffffffull)

static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
{
        if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
                return 0;
        return subsys | inst;
}

#endif /* _LINUX_KCOV_IOCTLS_H */
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

















    1 





    1 













    4 

    1 





    1 



    1 



    4 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID driver for Lenovo:
 *  - ThinkPad USB Keyboard with TrackPoint (tpkbd)
 *  - ThinkPad Compact Bluetooth Keyboard with TrackPoint (cptkbd)
 *  - ThinkPad Compact USB Keyboard with TrackPoint (cptkbd)
 *  - ThinkPad TrackPoint Keyboard II USB/Bluetooth (cptkbd/tpIIkbd)
 *
 *  Copyright (c) 2012 Bernhard Seibold
 *  Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk>
 *
 * Linux IBM/Lenovo Scrollpoint mouse driver:
 * - IBM Scrollpoint III
 * - IBM Scrollpoint Pro
 * - IBM Scrollpoint Optical
 * - IBM Scrollpoint Optical 800dpi
 * - IBM Scrollpoint Optical 800dpi Pro
 * - Lenovo Scrollpoint Optical
 *
 *  Copyright (c) 2012 Peter De Wachter <pdewacht@gmail.com>
 *  Copyright (c) 2018 Peter Ganzhorn <peter.ganzhorn@gmail.com>
 */

/*
 */

#include <linux/module.h>
#include <linux/sysfs.h>
#include <linux/device.h>
#include <linux/hid.h>
#include <linux/input.h>
#include <linux/leds.h>
#include <linux/workqueue.h>

#include "hid-ids.h"

/* Userspace expects F20 for mic-mute KEY_MICMUTE does not work */
#define LENOVO_KEY_MICMUTE KEY_F20

struct lenovo_drvdata {
        u8 led_report[3]; /* Must be first for proper alignment */
        int led_state;
        struct mutex led_report_mutex;
        struct led_classdev led_mute;
        struct led_classdev led_micmute;
        struct work_struct fn_lock_sync_work;
        struct hid_device *hdev;
        int press_to_select;
        int dragging;
        int release_to_select;
        int select_right;
        int sensitivity;
        int press_speed;
        /* 0: Up
         * 1: Down (undecided)
         * 2: Scrolling
         */
        u8 middlebutton_state;
        bool fn_lock;
        bool middleclick_workaround_cptkbd;
};

#define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c))

#define TP10UBKBD_LED_OUTPUT_REPORT        9

#define TP10UBKBD_FN_LOCK_LED                0x54
#define TP10UBKBD_MUTE_LED                0x64
#define TP10UBKBD_MICMUTE_LED                0x74

#define TP10UBKBD_LED_OFF                1
#define TP10UBKBD_LED_ON                2

static int lenovo_led_set_tp10ubkbd(struct hid_device *hdev, u8 led_code,
                                    enum led_brightness value)
{
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);
        int ret;

        mutex_lock(&data->led_report_mutex);

        data->led_report[0] = TP10UBKBD_LED_OUTPUT_REPORT;
        data->led_report[1] = led_code;
        data->led_report[2] = value ? TP10UBKBD_LED_ON : TP10UBKBD_LED_OFF;
        ret = hid_hw_raw_request(hdev, data->led_report[0], data->led_report, 3,
                                 HID_OUTPUT_REPORT, HID_REQ_SET_REPORT);
        if (ret != 3) {
                if (ret != -ENODEV)
                        hid_err(hdev, "Set LED output report error: %d\n", ret);

                ret = ret < 0 ? ret : -EIO;
        } else {
                ret = 0;
        }

        mutex_unlock(&data->led_report_mutex);

        return ret;
}

static void lenovo_tp10ubkbd_sync_fn_lock(struct work_struct *work)
{
        struct lenovo_drvdata *data =
                container_of(work, struct lenovo_drvdata, fn_lock_sync_work);

        lenovo_led_set_tp10ubkbd(data->hdev, TP10UBKBD_FN_LOCK_LED,
                                 data->fn_lock);
}

static const __u8 lenovo_pro_dock_need_fixup_collection[] = {
        0x05, 0x88,                /* Usage Page (Vendor Usage Page 0x88)        */
        0x09, 0x01,                /* Usage (Vendor Usage 0x01)                */
        0xa1, 0x01,                /* Collection (Application)                */
        0x85, 0x04,                /*  Report ID (4)                        */
        0x19, 0x00,                /*  Usage Minimum (0)                        */
        0x2a, 0xff, 0xff,        /*  Usage Maximum (65535)                */
};

/* Broken ThinkPad TrackPoint II collection (Bluetooth mode) */
static const __u8 lenovo_tpIIbtkbd_need_fixup_collection[] = {
        0x06, 0x00, 0xFF,        /* Usage Page (Vendor Defined 0xFF00) */
        0x09, 0x01,                /* Usage (0x01) */
        0xA1, 0x01,                /* Collection (Application) */
        0x85, 0x05,                /*   Report ID (5) */
        0x1A, 0xF1, 0x00,        /*   Usage Minimum (0xF1) */
        0x2A, 0xFC, 0x00,        /*   Usage Maximum (0xFC) */
        0x15, 0x00,                /*   Logical Minimum (0) */
        0x25, 0x01,                /*   Logical Maximum (1) */
        0x75, 0x01,                /*   Report Size (1) */
        0x95, 0x0D,                /*   Report Count (13) */
        0x81, 0x02,                /*   Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) */
        0x95, 0x03,                /*   Report Count (3) */
        0x81, 0x01,                /*   Input (Const,Array,Abs,No Wrap,Linear,Preferred State,No Null Position) */
};

static __u8 *lenovo_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
{
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_TPPRODOCK:
                /* the fixups that need to be done:
                 *   - get a reasonable usage max for the vendor collection
                 *     0x8801 from the report ID 4
                 */
                if (*rsize >= 153 &&
                    memcmp(&rdesc[140], lenovo_pro_dock_need_fixup_collection,
                          sizeof(lenovo_pro_dock_need_fixup_collection)) == 0) {
                        rdesc[151] = 0x01;
                        rdesc[152] = 0x00;
                }
                break;
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                if (*rsize >= 263 &&
                    memcmp(&rdesc[234], lenovo_tpIIbtkbd_need_fixup_collection,
                          sizeof(lenovo_tpIIbtkbd_need_fixup_collection)) == 0) {
                        rdesc[244] = 0x00; /* usage minimum = 0x00 */
                        rdesc[247] = 0xff; /* usage maximum = 0xff */
                        rdesc[252] = 0xff; /* logical maximum = 0xff */
                        rdesc[254] = 0x08; /* report size = 0x08 */
                        rdesc[256] = 0x01; /* report count = 0x01 */
                        rdesc[258] = 0x00; /* input = 0x00 */
                        rdesc[260] = 0x01; /* report count (2) = 0x01 */
                }
                break;
        }
        return rdesc;
}

static int lenovo_input_mapping_tpkbd(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        if (usage->hid == (HID_UP_BUTTON | 0x0010)) {
                /* This sub-device contains trackpoint, mark it */
                hid_set_drvdata(hdev, (void *)1);
                map_key_clear(LENOVO_KEY_MICMUTE);
                return 1;
        }
        return 0;
}

static int lenovo_input_mapping_cptkbd(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        /* HID_UP_LNVENDOR = USB, HID_UP_MSVENDOR = BT */
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR ||
            (usage->hid & HID_USAGE_PAGE) == HID_UP_LNVENDOR) {
                switch (usage->hid & HID_USAGE) {
                case 0x00f1: /* Fn-F4: Mic mute */
                        map_key_clear(LENOVO_KEY_MICMUTE);
                        return 1;
                case 0x00f2: /* Fn-F5: Brightness down */
                        map_key_clear(KEY_BRIGHTNESSDOWN);
                        return 1;
                case 0x00f3: /* Fn-F6: Brightness up */
                        map_key_clear(KEY_BRIGHTNESSUP);
                        return 1;
                case 0x00f4: /* Fn-F7: External display (projector) */
                        map_key_clear(KEY_SWITCHVIDEOMODE);
                        return 1;
                case 0x00f5: /* Fn-F8: Wireless */
                        map_key_clear(KEY_WLAN);
                        return 1;
                case 0x00f6: /* Fn-F9: Control panel */
                        map_key_clear(KEY_CONFIG);
                        return 1;
                case 0x00f8: /* Fn-F11: View open applications (3 boxes) */
                        map_key_clear(KEY_SCALE);
                        return 1;
                case 0x00f9: /* Fn-F12: Open My computer (6 boxes) USB-only */
                        /* NB: This mapping is invented in raw_event below */
                        map_key_clear(KEY_FILE);
                        return 1;
                case 0x00fa: /* Fn-Esc: Fn-lock toggle */
                        map_key_clear(KEY_FN_ESC);
                        return 1;
                case 0x00fb: /* Middle mouse button (in native mode) */
                        map_key_clear(BTN_MIDDLE);
                        return 1;
                }
        }

        /* Compatibility middle/wheel mappings should be ignored */
        if (usage->hid == HID_GD_WHEEL)
                return -1;
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON &&
                        (usage->hid & HID_USAGE) == 0x003)
                return -1;
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER &&
                        (usage->hid & HID_USAGE) == 0x238)
                return -1;

        /* Map wheel emulation reports: 0xffa1 = USB, 0xff10 = BT */
        if ((usage->hid & HID_USAGE_PAGE) == 0xff100000 ||
            (usage->hid & HID_USAGE_PAGE) == 0xffa10000) {
                field->flags |= HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_VARIABLE;
                field->logical_minimum = -127;
                field->logical_maximum = 127;

                switch (usage->hid & HID_USAGE) {
                case 0x0000:
                        hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
                        return 1;
                case 0x0001:
                        hid_map_usage(hi, usage, bit, max, EV_REL, REL_WHEEL);
                        return 1;
                default:
                        return -1;
                }
        }

        return 0;
}

static int lenovo_input_mapping_tpIIkbd(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        /*
         * 0xff0a0000 = USB, HID_UP_MSVENDOR = BT.
         *
         * In BT mode, there are two HID_UP_MSVENDOR pages.
         * Use only the page that contains report ID == 5.
         */
        if (((usage->hid & HID_USAGE_PAGE) == 0xff0a0000 ||
            (usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) &&
            field->report->id == 5) {
                switch (usage->hid & HID_USAGE) {
                case 0x00bb: /* Fn-F4: Mic mute */
                        map_key_clear(LENOVO_KEY_MICMUTE);
                        return 1;
                case 0x00c3: /* Fn-F5: Brightness down */
                        map_key_clear(KEY_BRIGHTNESSDOWN);
                        return 1;
                case 0x00c4: /* Fn-F6: Brightness up */
                        map_key_clear(KEY_BRIGHTNESSUP);
                        return 1;
                case 0x00c1: /* Fn-F8: Notification center */
                        map_key_clear(KEY_NOTIFICATION_CENTER);
                        return 1;
                case 0x00bc: /* Fn-F9: Control panel */
                        map_key_clear(KEY_CONFIG);
                        return 1;
                case 0x00b6: /* Fn-F10: Bluetooth */
                        map_key_clear(KEY_BLUETOOTH);
                        return 1;
                case 0x00b7: /* Fn-F11: Keyboard config */
                        map_key_clear(KEY_KEYBOARD);
                        return 1;
                case 0x00b8: /* Fn-F12: User function */
                        map_key_clear(KEY_PROG1);
                        return 1;
                case 0x00b9: /* Fn-PrtSc: Snipping tool */
                        map_key_clear(KEY_SELECTIVE_SCREENSHOT);
                        return 1;
                case 0x00b5: /* Fn-Esc: Fn-lock toggle */
                        map_key_clear(KEY_FN_ESC);
                        return 1;
                }
        }

        if ((usage->hid & HID_USAGE_PAGE) == 0xffa00000) {
                switch (usage->hid & HID_USAGE) {
                case 0x00fb: /* Middle mouse (in native USB mode) */
                        map_key_clear(BTN_MIDDLE);
                        return 1;
                }
        }

        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR &&
            field->report->id == 21) {
                switch (usage->hid & HID_USAGE) {
                case 0x0004: /* Middle mouse (in native Bluetooth mode) */
                        map_key_clear(BTN_MIDDLE);
                        return 1;
                }
        }

        /* Compatibility middle/wheel mappings should be ignored */
        if (usage->hid == HID_GD_WHEEL)
                return -1;
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON &&
                        (usage->hid & HID_USAGE) == 0x003)
                return -1;
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER &&
                        (usage->hid & HID_USAGE) == 0x238)
                return -1;

        /* Map wheel emulation reports: 0xff10 */
        if ((usage->hid & HID_USAGE_PAGE) == 0xff100000) {
                field->flags |= HID_MAIN_ITEM_RELATIVE | HID_MAIN_ITEM_VARIABLE;
                field->logical_minimum = -127;
                field->logical_maximum = 127;

                switch (usage->hid & HID_USAGE) {
                case 0x0000:
                        hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
                        return 1;
                case 0x0001:
                        hid_map_usage(hi, usage, bit, max, EV_REL, REL_WHEEL);
                        return 1;
                default:
                        return -1;
                }
        }

        return 0;
}

static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        if (usage->hid == HID_GD_Z) {
                hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL);
                return 1;
        }
        return 0;
}

static int lenovo_input_mapping_tp10_ultrabook_kbd(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        /*
         * The ThinkPad 10 Ultrabook Keyboard uses 0x000c0001 usage for
         * a bunch of keys which have no standard consumer page code.
         */
        if (usage->hid == 0x000c0001) {
                switch (usage->usage_index) {
                case 8: /* Fn-Esc: Fn-lock toggle */
                        map_key_clear(KEY_FN_ESC);
                        return 1;
                case 9: /* Fn-F4: Mic mute */
                        map_key_clear(LENOVO_KEY_MICMUTE);
                        return 1;
                case 10: /* Fn-F7: Control panel */
                        map_key_clear(KEY_CONFIG);
                        return 1;
                case 11: /* Fn-F8: Search (magnifier glass) */
                        map_key_clear(KEY_SEARCH);
                        return 1;
                case 12: /* Fn-F10: Open My computer (6 boxes) */
                        map_key_clear(KEY_FILE);
                        return 1;
                }
        }

        /*
         * The Ultrabook Keyboard sends a spurious F23 key-press when resuming
         * from suspend and it does not actually have a F23 key, ignore it.
         */
        if (usage->hid == 0x00070072)
                return -1;

        return 0;
}

static int lenovo_input_mapping_x1_tab_kbd(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        /*
         * The ThinkPad X1 Tablet Thin Keyboard uses 0x000c0001 usage for
         * a bunch of keys which have no standard consumer page code.
         */
        if (usage->hid == 0x000c0001) {
                switch (usage->usage_index) {
                case 0: /* Fn-F10: Enable/disable bluetooth */
                        map_key_clear(KEY_BLUETOOTH);
                        return 1;
                case 1: /* Fn-F11: Keyboard settings */
                        map_key_clear(KEY_KEYBOARD);
                        return 1;
                case 2: /* Fn-F12: User function / Cortana */
                        map_key_clear(KEY_MACRO1);
                        return 1;
                case 3: /* Fn-PrtSc: Snipping tool */
                        map_key_clear(KEY_SELECTIVE_SCREENSHOT);
                        return 1;
                case 8: /* Fn-Esc: Fn-lock toggle */
                        map_key_clear(KEY_FN_ESC);
                        return 1;
                case 9: /* Fn-F4: Mute/unmute microphone */
                        map_key_clear(KEY_MICMUTE);
                        return 1;
                case 10: /* Fn-F9: Settings */
                        map_key_clear(KEY_CONFIG);
                        return 1;
                case 13: /* Fn-F7: Manage external displays */
                        map_key_clear(KEY_SWITCHVIDEOMODE);
                        return 1;
                case 14: /* Fn-F8: Enable/disable wifi */
                        map_key_clear(KEY_WLAN);
                        return 1;
                }
        }

        if (usage->hid == (HID_UP_KEYBOARD | 0x009a)) {
                map_key_clear(KEY_SYSRQ);
                return 1;
        }

        return 0;
}

static int lenovo_input_mapping(struct hid_device *hdev,
                struct hid_input *hi, struct hid_field *field,
                struct hid_usage *usage, unsigned long **bit, int *max)
{
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_TPKBD:
                return lenovo_input_mapping_tpkbd(hdev, hi, field,
                                                        usage, bit, max);
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
                return lenovo_input_mapping_cptkbd(hdev, hi, field,
                                                        usage, bit, max);
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                return lenovo_input_mapping_tpIIkbd(hdev, hi, field,
                                                        usage, bit, max);
        case USB_DEVICE_ID_IBM_SCROLLPOINT_III:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL:
        case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO:
        case USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL:
                return lenovo_input_mapping_scrollpoint(hdev, hi, field,
                                                        usage, bit, max);
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
                return lenovo_input_mapping_tp10_ultrabook_kbd(hdev, hi, field,
                                                               usage, bit, max);
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max);
        default:
                return 0;
        }
}

#undef map_key_clear

/* Send a config command to the keyboard */
static int lenovo_send_cmd_cptkbd(struct hid_device *hdev,
                        unsigned char byte2, unsigned char byte3)
{
        int ret;
        unsigned char *buf;

        buf = kzalloc(3, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        /*
         * Feature report 0x13 is used for USB,
         * output report 0x18 is used for Bluetooth.
         * buf[0] is ignored by hid_hw_raw_request.
         */
        buf[0] = 0x18;
        buf[1] = byte2;
        buf[2] = byte3;

        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
                ret = hid_hw_raw_request(hdev, 0x13, buf, 3,
                                        HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                break;
        case USB_DEVICE_ID_LENOVO_CBTKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                ret = hid_hw_output_report(hdev, buf, 3);
                break;
        default:
                ret = -EINVAL;
                break;
        }

        kfree(buf);

        return ret < 0 ? ret : 0; /* BT returns 0, USB returns sizeof(buf) */
}

static void lenovo_features_set_cptkbd(struct hid_device *hdev)
{
        int ret;
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);

        /*
         * Tell the keyboard a driver understands it, and turn F7, F9, F11 into
         * regular keys
         */
        ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
        if (ret)
                hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);

        /* Switch middle button to native mode */
        ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01);
        if (ret)
                hid_warn(hdev, "Failed to switch middle button: %d\n", ret);

        ret = lenovo_send_cmd_cptkbd(hdev, 0x05, cptkbd_data->fn_lock);
        if (ret)
                hid_err(hdev, "Fn-lock setting failed: %d\n", ret);

        ret = lenovo_send_cmd_cptkbd(hdev, 0x02, cptkbd_data->sensitivity);
        if (ret)
                hid_err(hdev, "Sensitivity setting failed: %d\n", ret);
}

static ssize_t attr_fn_lock_show(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n", data->fn_lock);
}

static ssize_t attr_fn_lock_store(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);
        int value, ret;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        data->fn_lock = !!value;

        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                lenovo_features_set_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value);
                if (ret)
                        return ret;
                break;
        }

        return count;
}

static ssize_t attr_sensitivity_show_cptkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n",
                cptkbd_data->sensitivity);
}

static ssize_t attr_sensitivity_store_cptkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value) || value < 1 || value > 255)
                return -EINVAL;

        cptkbd_data->sensitivity = value;
        lenovo_features_set_cptkbd(hdev);

        return count;
}

static ssize_t attr_middleclick_workaround_show_cptkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n",
                cptkbd_data->middleclick_workaround_cptkbd);
}

static ssize_t attr_middleclick_workaround_store_cptkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        cptkbd_data->middleclick_workaround_cptkbd = !!value;

        return count;
}


static struct device_attribute dev_attr_fn_lock =
        __ATTR(fn_lock, S_IWUSR | S_IRUGO,
                        attr_fn_lock_show,
                        attr_fn_lock_store);

static struct device_attribute dev_attr_sensitivity_cptkbd =
        __ATTR(sensitivity, S_IWUSR | S_IRUGO,
                        attr_sensitivity_show_cptkbd,
                        attr_sensitivity_store_cptkbd);

static struct device_attribute dev_attr_middleclick_workaround_cptkbd =
        __ATTR(middleclick_workaround, S_IWUSR | S_IRUGO,
                        attr_middleclick_workaround_show_cptkbd,
                        attr_middleclick_workaround_store_cptkbd);


static struct attribute *lenovo_attributes_cptkbd[] = {
        &dev_attr_fn_lock.attr,
        &dev_attr_sensitivity_cptkbd.attr,
        &dev_attr_middleclick_workaround_cptkbd.attr,
        NULL
};

static const struct attribute_group lenovo_attr_group_cptkbd = {
        .attrs = lenovo_attributes_cptkbd,
};

static int lenovo_raw_event(struct hid_device *hdev,
                        struct hid_report *report, u8 *data, int size)
{
        /*
         * Compact USB keyboard's Fn-F12 report holds down many other keys, and
         * its own key is outside the usage page range. Remove extra
         * keypresses and remap to inside usage page.
         */
        if (unlikely(hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD
                        && size == 3
                        && data[0] == 0x15
                        && data[1] == 0x94
                        && data[2] == 0x01)) {
                data[1] = 0x00;
                data[2] = 0x01;
        }

        return 0;
}

static int lenovo_event_tp10ubkbd(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage, __s32 value)
{
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);

        if (usage->type == EV_KEY && usage->code == KEY_FN_ESC && value == 1) {
                /*
                 * The user has toggled the Fn-lock state. Toggle our own
                 * cached value of it and sync our value to the keyboard to
                 * ensure things are in sync (the sycning should be a no-op).
                 */
                data->fn_lock = !data->fn_lock;
                schedule_work(&data->fn_lock_sync_work);
        }

        return 0;
}

static int lenovo_event_cptkbd(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage, __s32 value)
{
        struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev);

        if (cptkbd_data->middleclick_workaround_cptkbd) {
                /* "wheel" scroll events */
                if (usage->type == EV_REL && (usage->code == REL_WHEEL ||
                                usage->code == REL_HWHEEL)) {
                        /* Scroll events disable middle-click event */
                        cptkbd_data->middlebutton_state = 2;
                        return 0;
                }

                /* Middle click events */
                if (usage->type == EV_KEY && usage->code == BTN_MIDDLE) {
                        if (value == 1) {
                                cptkbd_data->middlebutton_state = 1;
                        } else if (value == 0) {
                                if (cptkbd_data->middlebutton_state == 1) {
                                        /* No scrolling inbetween, send middle-click */
                                        input_event(field->hidinput->input,
                                                EV_KEY, BTN_MIDDLE, 1);
                                        input_sync(field->hidinput->input);
                                        input_event(field->hidinput->input,
                                                EV_KEY, BTN_MIDDLE, 0);
                                        input_sync(field->hidinput->input);
                                }
                                cptkbd_data->middlebutton_state = 0;
                        }
                        return 1;
                }
        }

        if (usage->type == EV_KEY && usage->code == KEY_FN_ESC && value == 1) {
                /*
                 * The user has toggled the Fn-lock state. Toggle our own
                 * cached value of it and sync our value to the keyboard to
                 * ensure things are in sync (the syncing should be a no-op).
                 */
                cptkbd_data->fn_lock = !cptkbd_data->fn_lock;
        }

        return 0;
}

static int lenovo_event(struct hid_device *hdev, struct hid_field *field,
                struct hid_usage *usage, __s32 value)
{
        if (!hid_get_drvdata(hdev))
                return 0;

        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                return lenovo_event_cptkbd(hdev, field, usage, value);
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                return lenovo_event_tp10ubkbd(hdev, field, usage, value);
        default:
                return 0;
        }
}

static int lenovo_features_set_tpkbd(struct hid_device *hdev)
{
        struct hid_report *report;
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        report = hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[4];

        report->field[0]->value[0]  = data_pointer->press_to_select   ? 0x01 : 0x02;
        report->field[0]->value[0] |= data_pointer->dragging          ? 0x04 : 0x08;
        report->field[0]->value[0] |= data_pointer->release_to_select ? 0x10 : 0x20;
        report->field[0]->value[0] |= data_pointer->select_right      ? 0x80 : 0x40;
        report->field[1]->value[0] = 0x03; // unknown setting, imitate windows driver
        report->field[2]->value[0] = data_pointer->sensitivity;
        report->field[3]->value[0] = data_pointer->press_speed;

        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
        return 0;
}

static ssize_t attr_press_to_select_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->press_to_select);
}

static ssize_t attr_press_to_select_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        data_pointer->press_to_select = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static ssize_t attr_dragging_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->dragging);
}

static ssize_t attr_dragging_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        data_pointer->dragging = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static ssize_t attr_release_to_select_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->release_to_select);
}

static ssize_t attr_release_to_select_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        data_pointer->release_to_select = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static ssize_t attr_select_right_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->select_right);
}

static ssize_t attr_select_right_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value))
                return -EINVAL;
        if (value < 0 || value > 1)
                return -EINVAL;

        data_pointer->select_right = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static ssize_t attr_sensitivity_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n",
                data_pointer->sensitivity);
}

static ssize_t attr_sensitivity_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value) || value < 1 || value > 255)
                return -EINVAL;

        data_pointer->sensitivity = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static ssize_t attr_press_speed_show_tpkbd(struct device *dev,
                struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        return snprintf(buf, PAGE_SIZE, "%u\n",
                data_pointer->press_speed);
}

static ssize_t attr_press_speed_store_tpkbd(struct device *dev,
                struct device_attribute *attr,
                const char *buf,
                size_t count)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        int value;

        if (kstrtoint(buf, 10, &value) || value < 1 || value > 255)
                return -EINVAL;

        data_pointer->press_speed = value;
        lenovo_features_set_tpkbd(hdev);

        return count;
}

static struct device_attribute dev_attr_press_to_select_tpkbd =
        __ATTR(press_to_select, S_IWUSR | S_IRUGO,
                        attr_press_to_select_show_tpkbd,
                        attr_press_to_select_store_tpkbd);

static struct device_attribute dev_attr_dragging_tpkbd =
        __ATTR(dragging, S_IWUSR | S_IRUGO,
                        attr_dragging_show_tpkbd,
                        attr_dragging_store_tpkbd);

static struct device_attribute dev_attr_release_to_select_tpkbd =
        __ATTR(release_to_select, S_IWUSR | S_IRUGO,
                        attr_release_to_select_show_tpkbd,
                        attr_release_to_select_store_tpkbd);

static struct device_attribute dev_attr_select_right_tpkbd =
        __ATTR(select_right, S_IWUSR | S_IRUGO,
                        attr_select_right_show_tpkbd,
                        attr_select_right_store_tpkbd);

static struct device_attribute dev_attr_sensitivity_tpkbd =
        __ATTR(sensitivity, S_IWUSR | S_IRUGO,
                        attr_sensitivity_show_tpkbd,
                        attr_sensitivity_store_tpkbd);

static struct device_attribute dev_attr_press_speed_tpkbd =
        __ATTR(press_speed, S_IWUSR | S_IRUGO,
                        attr_press_speed_show_tpkbd,
                        attr_press_speed_store_tpkbd);

static struct attribute *lenovo_attributes_tpkbd[] = {
        &dev_attr_press_to_select_tpkbd.attr,
        &dev_attr_dragging_tpkbd.attr,
        &dev_attr_release_to_select_tpkbd.attr,
        &dev_attr_select_right_tpkbd.attr,
        &dev_attr_sensitivity_tpkbd.attr,
        &dev_attr_press_speed_tpkbd.attr,
        NULL
};

static const struct attribute_group lenovo_attr_group_tpkbd = {
        .attrs = lenovo_attributes_tpkbd,
};

static void lenovo_led_set_tpkbd(struct hid_device *hdev)
{
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        struct hid_report *report;

        report = hdev->report_enum[HID_OUTPUT_REPORT].report_id_hash[3];
        report->field[0]->value[0] = (data_pointer->led_state >> 0) & 1;
        report->field[0]->value[1] = (data_pointer->led_state >> 1) & 1;
        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
}

static int lenovo_led_brightness_set(struct led_classdev *led_cdev,
                        enum led_brightness value)
{
        struct device *dev = led_cdev->dev->parent;
        struct hid_device *hdev = to_hid_device(dev);
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);
        static const u8 tp10ubkbd_led[] = { TP10UBKBD_MUTE_LED, TP10UBKBD_MICMUTE_LED };
        int led_nr = 0;
        int ret = 0;

        if (led_cdev == &data_pointer->led_micmute)
                led_nr = 1;

        if (value == LED_OFF)
                data_pointer->led_state &= ~(1 << led_nr);
        else
                data_pointer->led_state |= 1 << led_nr;

        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_TPKBD:
                lenovo_led_set_tpkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value);
                break;
        }

        return ret;
}

static int lenovo_register_leds(struct hid_device *hdev)
{
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);
        size_t name_sz = strlen(dev_name(&hdev->dev)) + 16;
        char *name_mute, *name_micm;
        int ret;

        name_mute = devm_kzalloc(&hdev->dev, name_sz, GFP_KERNEL);
        name_micm = devm_kzalloc(&hdev->dev, name_sz, GFP_KERNEL);
        if (name_mute == NULL || name_micm == NULL) {
                hid_err(hdev, "Could not allocate memory for led data\n");
                return -ENOMEM;
        }
        snprintf(name_mute, name_sz, "%s:amber:mute", dev_name(&hdev->dev));
        snprintf(name_micm, name_sz, "%s:amber:micmute", dev_name(&hdev->dev));

        data->led_mute.name = name_mute;
        data->led_mute.default_trigger = "audio-mute";
        data->led_mute.brightness_set_blocking = lenovo_led_brightness_set;
        data->led_mute.max_brightness = 1;
        data->led_mute.flags = LED_HW_PLUGGABLE;
        data->led_mute.dev = &hdev->dev;
        ret = led_classdev_register(&hdev->dev, &data->led_mute);
        if (ret < 0)
                return ret;

        data->led_micmute.name = name_micm;
        data->led_micmute.default_trigger = "audio-micmute";
        data->led_micmute.brightness_set_blocking = lenovo_led_brightness_set;
        data->led_micmute.max_brightness = 1;
        data->led_micmute.flags = LED_HW_PLUGGABLE;
        data->led_micmute.dev = &hdev->dev;
        ret = led_classdev_register(&hdev->dev, &data->led_micmute);
        if (ret < 0) {
                led_classdev_unregister(&data->led_mute);
                return ret;
        }

        return 0;
}

static int lenovo_probe_tpkbd(struct hid_device *hdev)
{
        struct lenovo_drvdata *data_pointer;
        int i, ret;

        /*
         * Only register extra settings against subdevice where input_mapping
         * set drvdata to 1, i.e. the trackpoint.
         */
        if (!hid_get_drvdata(hdev))
                return 0;

        hid_set_drvdata(hdev, NULL);

        /* Validate required reports. */
        for (i = 0; i < 4; i++) {
                if (!hid_validate_values(hdev, HID_FEATURE_REPORT, 4, i, 1))
                        return -ENODEV;
        }
        if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 3, 0, 2))
                return -ENODEV;

        ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_tpkbd);
        if (ret)
                hid_warn(hdev, "Could not create sysfs group: %d\n", ret);

        data_pointer = devm_kzalloc(&hdev->dev,
                                    sizeof(struct lenovo_drvdata),
                                    GFP_KERNEL);
        if (data_pointer == NULL) {
                hid_err(hdev, "Could not allocate memory for driver data\n");
                ret = -ENOMEM;
                goto err;
        }

        // set same default values as windows driver
        data_pointer->sensitivity = 0xa0;
        data_pointer->press_speed = 0x38;

        hid_set_drvdata(hdev, data_pointer);

        ret = lenovo_register_leds(hdev);
        if (ret)
                goto err;

        lenovo_features_set_tpkbd(hdev);

        return 0;
err:
        sysfs_remove_group(&hdev->dev.kobj, &lenovo_attr_group_tpkbd);
        return ret;
}

static int lenovo_probe_cptkbd(struct hid_device *hdev)
{
        int ret;
        struct lenovo_drvdata *cptkbd_data;

        /* All the custom action happens on the USBMOUSE device for USB */
        if (((hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD) ||
            (hdev->product == USB_DEVICE_ID_LENOVO_TPIIUSBKBD)) &&
            hdev->type != HID_TYPE_USBMOUSE) {
                hid_dbg(hdev, "Ignoring keyboard half of device\n");
                return 0;
        }

        cptkbd_data = devm_kzalloc(&hdev->dev,
                                        sizeof(*cptkbd_data),
                                        GFP_KERNEL);
        if (cptkbd_data == NULL) {
                hid_err(hdev, "can't alloc keyboard descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, cptkbd_data);

        /* Set keyboard settings to known state */
        cptkbd_data->middlebutton_state = 0;
        cptkbd_data->fn_lock = true;
        cptkbd_data->sensitivity = 0x05;
        cptkbd_data->middleclick_workaround_cptkbd = true;
        lenovo_features_set_cptkbd(hdev);

        ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_cptkbd);
        if (ret)
                hid_warn(hdev, "Could not create sysfs group: %d\n", ret);

        return 0;
}

static struct attribute *lenovo_attributes_tp10ubkbd[] = {
        &dev_attr_fn_lock.attr,
        NULL
};

static const struct attribute_group lenovo_attr_group_tp10ubkbd = {
        .attrs = lenovo_attributes_tp10ubkbd,
};

static int lenovo_probe_tp10ubkbd(struct hid_device *hdev)
{
        struct hid_report_enum *rep_enum;
        struct lenovo_drvdata *data;
        struct hid_report *rep;
        bool found;
        int ret;

        /*
         * The LEDs and the Fn-lock functionality use output report 9,
         * with an application of 0xffa0001, add the LEDs on the interface
         * with this output report.
         */
        found = false;
        rep_enum = &hdev->report_enum[HID_OUTPUT_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                if (rep->application == 0xffa00001)
                        found = true;
        }
        if (!found)
                return 0;

        data = devm_kzalloc(&hdev->dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;

        mutex_init(&data->led_report_mutex);
        INIT_WORK(&data->fn_lock_sync_work, lenovo_tp10ubkbd_sync_fn_lock);
        data->hdev = hdev;

        hid_set_drvdata(hdev, data);

        /*
         * The Thinkpad 10 ultrabook USB kbd dock's Fn-lock defaults to on.
         * We cannot read the state, only set it, so we force it to on here
         * (which should be a no-op) to make sure that our state matches the
         * keyboard's FN-lock state. This is the same as what Windows does.
         */
        data->fn_lock = true;
        lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, data->fn_lock);

        ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_tp10ubkbd);
        if (ret)
                return ret;

        ret = lenovo_register_leds(hdev);
        if (ret)
                goto err;

        return 0;
err:
        sysfs_remove_group(&hdev->dev.kobj, &lenovo_attr_group_tp10ubkbd);
        return ret;
}

static int lenovo_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int ret;

        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "hid_parse failed\n");
                goto err;
        }

        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hid_hw_start failed\n");
                goto err;
        }

        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_TPKBD:
                ret = lenovo_probe_tpkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                ret = lenovo_probe_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                ret = lenovo_probe_tp10ubkbd(hdev);
                break;
        default:
                ret = 0;
                break;
        }
        if (ret)
                goto err_hid;

        return 0;
err_hid:
        hid_hw_stop(hdev);
err:
        return ret;
}

#ifdef CONFIG_PM
static int lenovo_reset_resume(struct hid_device *hdev)
{
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
                if (hdev->type == HID_TYPE_USBMOUSE)
                        lenovo_features_set_cptkbd(hdev);

                break;
        default:
                break;
        }

        return 0;
}
#endif

static void lenovo_remove_tpkbd(struct hid_device *hdev)
{
        struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev);

        /*
         * Only the trackpoint half of the keyboard has drvdata and stuff that
         * needs unregistering.
         */
        if (data_pointer == NULL)
                return;

        sysfs_remove_group(&hdev->dev.kobj,
                        &lenovo_attr_group_tpkbd);

        led_classdev_unregister(&data_pointer->led_micmute);
        led_classdev_unregister(&data_pointer->led_mute);
}

static void lenovo_remove_cptkbd(struct hid_device *hdev)
{
        sysfs_remove_group(&hdev->dev.kobj,
                        &lenovo_attr_group_cptkbd);
}

static void lenovo_remove_tp10ubkbd(struct hid_device *hdev)
{
        struct lenovo_drvdata *data = hid_get_drvdata(hdev);

        if (data == NULL)
                return;

        led_classdev_unregister(&data->led_micmute);
        led_classdev_unregister(&data->led_mute);

        sysfs_remove_group(&hdev->dev.kobj, &lenovo_attr_group_tp10ubkbd);
        cancel_work_sync(&data->fn_lock_sync_work);
}

static void lenovo_remove(struct hid_device *hdev)
{
        switch (hdev->product) {
        case USB_DEVICE_ID_LENOVO_TPKBD:
                lenovo_remove_tpkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_CUSBKBD:
        case USB_DEVICE_ID_LENOVO_CBTKBD:
        case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
        case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                lenovo_remove_cptkbd(hdev);
                break;
        case USB_DEVICE_ID_LENOVO_TP10UBKBD:
        case USB_DEVICE_ID_LENOVO_X1_TAB:
                lenovo_remove_tp10ubkbd(hdev);
                break;
        }

        hid_hw_stop(hdev);
}

static int lenovo_input_configured(struct hid_device *hdev,
                struct hid_input *hi)
{
        switch (hdev->product) {
                case USB_DEVICE_ID_LENOVO_TPKBD:
                case USB_DEVICE_ID_LENOVO_CUSBKBD:
                case USB_DEVICE_ID_LENOVO_CBTKBD:
                case USB_DEVICE_ID_LENOVO_TPIIUSBKBD:
                case USB_DEVICE_ID_LENOVO_TPIIBTKBD:
                        if (test_bit(EV_REL, hi->input->evbit)) {
                                /* set only for trackpoint device */
                                __set_bit(INPUT_PROP_POINTER, hi->input->propbit);
                                __set_bit(INPUT_PROP_POINTING_STICK,
                                                hi->input->propbit);
                        }
                        break;
        }

        return 0;
}


static const struct hid_device_id lenovo_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPIIUSBKBD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPIIBTKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TP10UBKBD) },
        /*
         * Note bind to the HID_GROUP_GENERIC group, so that we only bind to the keyboard
         * part, while letting hid-multitouch.c handle the touchpad and trackpoint.
         */
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) },
        { }
};

MODULE_DEVICE_TABLE(hid, lenovo_devices);

static struct hid_driver lenovo_driver = {
        .name = "lenovo",
        .id_table = lenovo_devices,
        .input_configured = lenovo_input_configured,
        .input_mapping = lenovo_input_mapping,
        .probe = lenovo_probe,
        .remove = lenovo_remove,
        .raw_event = lenovo_raw_event,
        .event = lenovo_event,
        .report_fixup = lenovo_report_fixup,
#ifdef CONFIG_PM
        .reset_resume = lenovo_reset_resume,
#endif
};
module_hid_driver(lenovo_driver);

MODULE_LICENSE("GPL");


























































































































  256 














  255 





























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
/* SPDX-License-Identifier: GPL-2.0+ */
/*
 *  Driver for 8250/16550-type serial ports
 *
 *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
 *
 *  Copyright (C) 2001 Russell King.
 */

#include <linux/bits.h>
#include <linux/serial_8250.h>
#include <linux/serial_reg.h>
#include <linux/dmaengine.h>

#include "../serial_mctrl_gpio.h"

struct uart_8250_dma {
        int (*tx_dma)(struct uart_8250_port *p);
        int (*rx_dma)(struct uart_8250_port *p);
        void (*prepare_tx_dma)(struct uart_8250_port *p);
        void (*prepare_rx_dma)(struct uart_8250_port *p);

        /* Filter function */
        dma_filter_fn                fn;
        /* Parameter to the filter function */
        void                        *rx_param;
        void                        *tx_param;

        struct dma_slave_config        rxconf;
        struct dma_slave_config        txconf;

        struct dma_chan                *rxchan;
        struct dma_chan                *txchan;

        /* Device address base for DMA operations */
        phys_addr_t                rx_dma_addr;
        phys_addr_t                tx_dma_addr;

        /* DMA address of the buffer in memory */
        dma_addr_t                rx_addr;
        dma_addr_t                tx_addr;

        dma_cookie_t                rx_cookie;
        dma_cookie_t                tx_cookie;

        void                        *rx_buf;

        size_t                        rx_size;
        size_t                        tx_size;

        unsigned char                tx_running;
        unsigned char                tx_err;
        unsigned char                rx_running;
};

struct old_serial_port {
        unsigned int uart;
        unsigned int baud_base;
        unsigned int port;
        unsigned int irq;
        upf_t        flags;
        unsigned char io_type;
        unsigned char __iomem *iomem_base;
        unsigned short iomem_reg_shift;
};

struct serial8250_config {
        const char        *name;
        unsigned short        fifo_size;
        unsigned short        tx_loadsz;
        unsigned char        fcr;
        unsigned char        rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE];
        unsigned int        flags;
};

#define UART_CAP_FIFO        BIT(8)        /* UART has FIFO */
#define UART_CAP_EFR        BIT(9)        /* UART has EFR */
#define UART_CAP_SLEEP        BIT(10)        /* UART has IER sleep */
#define UART_CAP_AFE        BIT(11)        /* MCR-based hw flow control */
#define UART_CAP_UUE        BIT(12)        /* UART needs IER bit 6 set (Xscale) */
#define UART_CAP_RTOIE        BIT(13)        /* UART needs IER bit 4 set (Xscale, Tegra) */
#define UART_CAP_HFIFO        BIT(14)        /* UART has a "hidden" FIFO */
#define UART_CAP_RPM        BIT(15)        /* Runtime PM is active while idle */
#define UART_CAP_IRDA        BIT(16)        /* UART supports IrDA line discipline */
#define UART_CAP_MINI        BIT(17)        /* Mini UART on BCM283X family lacks:
                                         * STOP PARITY EPAR SPAR WLEN5 WLEN6
                                         */
#define UART_CAP_NOTEMT        BIT(18)        /* UART without interrupt on TEMT available */

#define UART_BUG_QUOT        BIT(0)        /* UART has buggy quot LSB */
#define UART_BUG_TXEN        BIT(1)        /* UART has buggy TX IIR status */
#define UART_BUG_NOMSR        BIT(2)        /* UART has buggy MSR status bits (Au1x00) */
#define UART_BUG_THRE        BIT(3)        /* UART has buggy THRE reassertion */
#define UART_BUG_TXRACE        BIT(5)        /* UART Tx fails to set remote DR */


#ifdef CONFIG_SERIAL_8250_SHARE_IRQ
#define SERIAL8250_SHARE_IRQS 1
#else
#define SERIAL8250_SHARE_IRQS 0
#endif

#define SERIAL8250_PORT_FLAGS(_base, _irq, _flags)                \
        {                                                        \
                .iobase                = _base,                        \
                .irq                = _irq,                                \
                .uartclk        = 1843200,                        \
                .iotype                = UPIO_PORT,                        \
                .flags                = UPF_BOOT_AUTOCONF | (_flags),        \
        }

#define SERIAL8250_PORT(_base, _irq) SERIAL8250_PORT_FLAGS(_base, _irq, 0)


static inline int serial_in(struct uart_8250_port *up, int offset)
{
        return up->port.serial_in(&up->port, offset);
}

static inline void serial_out(struct uart_8250_port *up, int offset, int value)
{
        up->port.serial_out(&up->port, offset, value);
}

/**
 *        serial_lsr_in - Read LSR register and preserve flags across reads
 *        @up:        uart 8250 port
 *
 *        Read LSR register and handle saving non-preserved flags across reads.
 *        The flags that are not preserved across reads are stored into
 *        up->lsr_saved_flags.
 *
 *        Returns LSR value or'ed with the preserved flags (if any).
 */
static inline u16 serial_lsr_in(struct uart_8250_port *up)
{
        u16 lsr = up->lsr_saved_flags;

        lsr |= serial_in(up, UART_LSR);
        up->lsr_saved_flags = lsr & up->lsr_save_mask;

        return lsr;
}

/*
 * For the 16C950
 */
static void serial_icr_write(struct uart_8250_port *up, int offset, int value)
{
        serial_out(up, UART_SCR, offset);
        serial_out(up, UART_ICR, value);
}

static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up,
                                                   int offset)
{
        unsigned int value;

        serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD);
        serial_out(up, UART_SCR, offset);
        value = serial_in(up, UART_ICR);
        serial_icr_write(up, UART_ACR, up->acr);

        return value;
}

void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p);

static inline u32 serial_dl_read(struct uart_8250_port *up)
{
        return up->dl_read(up);
}

static inline void serial_dl_write(struct uart_8250_port *up, u32 value)
{
        up->dl_write(up, value);
}

static inline bool serial8250_set_THRI(struct uart_8250_port *up)
{
        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&up->port.lock);

        if (up->ier & UART_IER_THRI)
                return false;
        up->ier |= UART_IER_THRI;
        serial_out(up, UART_IER, up->ier);
        return true;
}

static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
{
        /* Port locked to synchronize UART_IER access against the console. */
        lockdep_assert_held_once(&up->port.lock);

        if (!(up->ier & UART_IER_THRI))
                return false;
        up->ier &= ~UART_IER_THRI;
        serial_out(up, UART_IER, up->ier);
        return true;
}

struct uart_8250_port *serial8250_get_port(int line);

void serial8250_rpm_get(struct uart_8250_port *p);
void serial8250_rpm_put(struct uart_8250_port *p);

void serial8250_rpm_get_tx(struct uart_8250_port *p);
void serial8250_rpm_put_tx(struct uart_8250_port *p);

int serial8250_em485_config(struct uart_port *port, struct ktermios *termios,
                            struct serial_rs485 *rs485);
void serial8250_em485_start_tx(struct uart_8250_port *p);
void serial8250_em485_stop_tx(struct uart_8250_port *p);
void serial8250_em485_destroy(struct uart_8250_port *p);
extern struct serial_rs485 serial8250_em485_supported;

/* MCR <-> TIOCM conversion */
static inline int serial8250_TIOCM_to_MCR(int tiocm)
{
        int mcr = 0;

        if (tiocm & TIOCM_RTS)
                mcr |= UART_MCR_RTS;
        if (tiocm & TIOCM_DTR)
                mcr |= UART_MCR_DTR;
        if (tiocm & TIOCM_OUT1)
                mcr |= UART_MCR_OUT1;
        if (tiocm & TIOCM_OUT2)
                mcr |= UART_MCR_OUT2;
        if (tiocm & TIOCM_LOOP)
                mcr |= UART_MCR_LOOP;

        return mcr;
}

static inline int serial8250_MCR_to_TIOCM(int mcr)
{
        int tiocm = 0;

        if (mcr & UART_MCR_RTS)
                tiocm |= TIOCM_RTS;
        if (mcr & UART_MCR_DTR)
                tiocm |= TIOCM_DTR;
        if (mcr & UART_MCR_OUT1)
                tiocm |= TIOCM_OUT1;
        if (mcr & UART_MCR_OUT2)
                tiocm |= TIOCM_OUT2;
        if (mcr & UART_MCR_LOOP)
                tiocm |= TIOCM_LOOP;

        return tiocm;
}

/* MSR <-> TIOCM conversion */
static inline int serial8250_MSR_to_TIOCM(int msr)
{
        int tiocm = 0;

        if (msr & UART_MSR_DCD)
                tiocm |= TIOCM_CAR;
        if (msr & UART_MSR_RI)
                tiocm |= TIOCM_RNG;
        if (msr & UART_MSR_DSR)
                tiocm |= TIOCM_DSR;
        if (msr & UART_MSR_CTS)
                tiocm |= TIOCM_CTS;

        return tiocm;
}

static inline void serial8250_out_MCR(struct uart_8250_port *up, int value)
{
        serial_out(up, UART_MCR, value);

        if (up->gpios)
                mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value));
}

static inline int serial8250_in_MCR(struct uart_8250_port *up)
{
        int mctrl;

        mctrl = serial_in(up, UART_MCR);

        if (up->gpios) {
                unsigned int mctrl_gpio = 0;

                mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio);
                mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio);
        }

        return mctrl;
}

bool alpha_jensen(void);
void alpha_jensen_set_mctrl(struct uart_port *port, unsigned int mctrl);

#ifdef CONFIG_SERIAL_8250_PNP
int serial8250_pnp_init(void);
void serial8250_pnp_exit(void);
#else
static inline int serial8250_pnp_init(void) { return 0; }
static inline void serial8250_pnp_exit(void) { }
#endif

#ifdef CONFIG_SERIAL_8250_FINTEK
int fintek_8250_probe(struct uart_8250_port *uart);
#else
static inline int fintek_8250_probe(struct uart_8250_port *uart) { return 0; }
#endif

#ifdef CONFIG_ARCH_OMAP1
#include <linux/soc/ti/omap1-soc.h>
static inline int is_omap1_8250(struct uart_8250_port *pt)
{
        int res;

        switch (pt->port.mapbase) {
        case OMAP1_UART1_BASE:
        case OMAP1_UART2_BASE:
        case OMAP1_UART3_BASE:
                res = 1;
                break;
        default:
                res = 0;
                break;
        }

        return res;
}

static inline int is_omap1510_8250(struct uart_8250_port *pt)
{
        if (!cpu_is_omap1510())
                return 0;

        return is_omap1_8250(pt);
}
#else
static inline int is_omap1_8250(struct uart_8250_port *pt)
{
        return 0;
}
static inline int is_omap1510_8250(struct uart_8250_port *pt)
{
        return 0;
}
#endif

#ifdef CONFIG_SERIAL_8250_DMA
extern int serial8250_tx_dma(struct uart_8250_port *);
extern int serial8250_rx_dma(struct uart_8250_port *);
extern void serial8250_rx_dma_flush(struct uart_8250_port *);
extern int serial8250_request_dma(struct uart_8250_port *);
extern void serial8250_release_dma(struct uart_8250_port *);

static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p)
{
        struct uart_8250_dma *dma = p->dma;

        if (dma->prepare_tx_dma)
                dma->prepare_tx_dma(p);
}

static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p)
{
        struct uart_8250_dma *dma = p->dma;

        if (dma->prepare_rx_dma)
                dma->prepare_rx_dma(p);
}

static inline bool serial8250_tx_dma_running(struct uart_8250_port *p)
{
        struct uart_8250_dma *dma = p->dma;

        return dma && dma->tx_running;
}
#else
static inline int serial8250_tx_dma(struct uart_8250_port *p)
{
        return -1;
}
static inline int serial8250_rx_dma(struct uart_8250_port *p)
{
        return -1;
}
static inline void serial8250_rx_dma_flush(struct uart_8250_port *p) { }
static inline int serial8250_request_dma(struct uart_8250_port *p)
{
        return -1;
}
static inline void serial8250_release_dma(struct uart_8250_port *p) { }

static inline bool serial8250_tx_dma_running(struct uart_8250_port *p)
{
        return false;
}
#endif

static inline int ns16550a_goto_highspeed(struct uart_8250_port *up)
{
        unsigned char status;

        status = serial_in(up, 0x04); /* EXCR2 */
#define PRESL(x) ((x) & 0x30)
        if (PRESL(status) == 0x10) {
                /* already in high speed mode */
                return 0;
        } else {
                status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */
                status |= 0x10;  /* 1.625 divisor for baud_base --> 921600 */
                serial_out(up, 0x04, status);
        }
        return 1;
}

static inline int serial_index(struct uart_port *port)
{
        return port->minor - 64;
}

























































  234 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM x86_fpu

#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_FPU_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(x86_fpu,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu),

        TP_STRUCT__entry(
                __field(struct fpu *, fpu)
                __field(bool, load_fpu)
                __field(u64, xfeatures)
                __field(u64, xcomp_bv)
                ),

        TP_fast_assign(
                __entry->fpu                = fpu;
                __entry->load_fpu        = test_thread_flag(TIF_NEED_FPU_LOAD);
                if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                        __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures;
                        __entry->xcomp_bv  = fpu->fpstate->regs.xsave.header.xcomp_bv;
                }
        ),
        TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
                        __entry->fpu,
                        __entry->load_fpu,
                        __entry->xfeatures,
                        __entry->xcomp_bv
        )
);

DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
        TP_PROTO(struct fpu *fpu),
        TP_ARGS(fpu)
);

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH asm/trace/
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE fpu
#endif /* _TRACE_FPU_H */

/* This part must be outside protection */
#include <trace/define_trace.h>



























































































































































































































































































































































    1 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




    1 
    1 
    1 
    1 
    1 
    1 

































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 
    1 

    1 
    1 






  242 

  242 
  242 
  242 
  242 

  242 
  242 

































    1 
    1 



    1 




    1 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * INET                An implementation of the TCP/IP protocol suite for the LINUX
 *                operating system.  INET is implemented using the  BSD Socket
 *                interface as the means of communication with the user level.
 *
 *                Generic socket support routines. Memory allocators, socket lock/release
 *                handler for protocols to use and generic option handler.
 *
 * Authors:        Ross Biro
 *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *                Florian La Roche, <flla@stud.uni-sb.de>
 *                Alan Cox, <A.Cox@swansea.ac.uk>
 *
 * Fixes:
 *                Alan Cox        :         Numerous verify_area() problems
 *                Alan Cox        :        Connecting on a connecting socket
 *                                        now returns an error for tcp.
 *                Alan Cox        :        sock->protocol is set correctly.
 *                                        and is not sometimes left as 0.
 *                Alan Cox        :        connect handles icmp errors on a
 *                                        connect properly. Unfortunately there
 *                                        is a restart syscall nasty there. I
 *                                        can't match BSD without hacking the C
 *                                        library. Ideas urgently sought!
 *                Alan Cox        :        Disallow bind() to addresses that are
 *                                        not ours - especially broadcast ones!!
 *                Alan Cox        :        Socket 1024 _IS_ ok for users. (fencepost)
 *                Alan Cox        :        sock_wfree/sock_rfree don't destroy sockets,
 *                                        instead they leave that for the DESTROY timer.
 *                Alan Cox        :        Clean up error flag in accept
 *                Alan Cox        :        TCP ack handling is buggy, the DESTROY timer
 *                                        was buggy. Put a remove_sock() in the handler
 *                                        for memory when we hit 0. Also altered the timer
 *                                        code. The ACK stuff can wait and needs major
 *                                        TCP layer surgery.
 *                Alan Cox        :        Fixed TCP ack bug, removed remove sock
 *                                        and fixed timer/inet_bh race.
 *                Alan Cox        :        Added zapped flag for TCP
 *                Alan Cox        :        Move kfree_skb into skbuff.c and tidied up surplus code
 *                Alan Cox        :        for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
 *                Alan Cox        :        kfree_s calls now are kfree_skbmem so we can track skb resources
 *                Alan Cox        :        Supports socket option broadcast now as does udp. Packet and raw need fixing.
 *                Alan Cox        :        Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
 *                Rick Sladkey        :        Relaxed UDP rules for matching packets.
 *                C.E.Hawkins        :        IFF_PROMISC/SIOCGHWADDR support
 *        Pauline Middelink        :        identd support
 *                Alan Cox        :        Fixed connect() taking signals I think.
 *                Alan Cox        :        SO_LINGER supported
 *                Alan Cox        :        Error reporting fixes
 *                Anonymous        :        inet_create tidied up (sk->reuse setting)
 *                Alan Cox        :        inet sockets don't set sk->type!
 *                Alan Cox        :        Split socket option code
 *                Alan Cox        :        Callbacks
 *                Alan Cox        :        Nagle flag for Charles & Johannes stuff
 *                Alex                :        Removed restriction on inet fioctl
 *                Alan Cox        :        Splitting INET from NET core
 *                Alan Cox        :        Fixed bogus SO_TYPE handling in getsockopt()
 *                Adam Caldwell        :        Missing return in SO_DONTROUTE/SO_DEBUG code
 *                Alan Cox        :        Split IP from generic code
 *                Alan Cox        :        New kfree_skbmem()
 *                Alan Cox        :        Make SO_DEBUG superuser only.
 *                Alan Cox        :        Allow anyone to clear SO_DEBUG
 *                                        (compatibility fix)
 *                Alan Cox        :        Added optimistic memory grabbing for AF_UNIX throughput.
 *                Alan Cox        :        Allocator for a socket is settable.
 *                Alan Cox        :        SO_ERROR includes soft errors.
 *                Alan Cox        :        Allow NULL arguments on some SO_ opts
 *                Alan Cox        :         Generic socket allocation to make hooks
 *                                        easier (suggested by Craig Metz).
 *                Michael Pall        :        SO_ERROR returns positive errno again
 *              Steve Whitehouse:       Added default destructor to free
 *                                      protocol private data.
 *              Steve Whitehouse:       Added various other default routines
 *                                      common to several socket families.
 *              Chris Evans     :       Call suser() check last on F_SETOWN
 *                Jay Schulist        :        Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
 *                Andi Kleen        :        Add sock_kmalloc()/sock_kfree_s()
 *                Andi Kleen        :        Fix write_space callback
 *                Chris Evans        :        Security fixes - signedness again
 *                Arnaldo C. Melo :       cleanups, use skb_queue_purge
 *
 * To Fix:
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <asm/unaligned.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/errqueue.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
#include <linux/static_key.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
#include <linux/compat.h>
#include <linux/mroute.h>
#include <linux/mroute6.h>
#include <linux/icmpv6.h>

#include <linux/uaccess.h>

#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
#include <linux/sock_diag.h>

#include <linux/filter.h>
#include <net/sock_reuseport.h>
#include <net/bpf_sk_storage.h>

#include <trace/events/sock.h>

#include <net/tcp.h>
#include <net/busy_poll.h>
#include <net/phonet/phonet.h>

#include <linux/ethtool.h>

#include "dev.h"

static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);

static void sock_def_write_space_wfree(struct sock *sk);
static void sock_def_write_space(struct sock *sk);

/**
 * sk_ns_capable - General socket capability test
 * @sk: Socket to use a capability on or through
 * @user_ns: The user namespace of the capability to use
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket had when the socket was
 * created and the current process has the capability @cap in the user
 * namespace @user_ns.
 */
bool sk_ns_capable(const struct sock *sk,
                   struct user_namespace *user_ns, int cap)
{
        return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
                ns_capable(user_ns, cap);
}
EXPORT_SYMBOL(sk_ns_capable);

/**
 * sk_capable - Socket global capability test
 * @sk: Socket to use a capability on or through
 * @cap: The global capability to use
 *
 * Test to see if the opener of the socket had when the socket was
 * created and the current process has the capability @cap in all user
 * namespaces.
 */
bool sk_capable(const struct sock *sk, int cap)
{
        return sk_ns_capable(sk, &init_user_ns, cap);
}
EXPORT_SYMBOL(sk_capable);

/**
 * sk_net_capable - Network namespace socket capability test
 * @sk: Socket to use a capability on or through
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket had when the socket was created
 * and the current process has the capability @cap over the network namespace
 * the socket is a member of.
 */
bool sk_net_capable(const struct sock *sk, int cap)
{
        return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
}
EXPORT_SYMBOL(sk_net_capable);

/*
 * Each address family might have different locking rules, so we have
 * one slock key per address family and separate keys for internal and
 * userspace sockets.
 */
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_kern_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
static struct lock_class_key af_family_kern_slock_keys[AF_MAX];

/*
 * Make lock validator output more readable. (we pre-construct these
 * strings build-time, so that runtime initialization of socket
 * locks is fast):
 */

#define _sock_locks(x)                                                  \
  x "AF_UNSPEC",        x "AF_UNIX"     ,        x "AF_INET"     , \
  x "AF_AX25"  ,        x "AF_IPX"      ,        x "AF_APPLETALK", \
  x "AF_NETROM",        x "AF_BRIDGE"   ,        x "AF_ATMPVC"   , \
  x "AF_X25"   ,        x "AF_INET6"    ,        x "AF_ROSE"     , \
  x "AF_DECnet",        x "AF_NETBEUI"  ,        x "AF_SECURITY" , \
  x "AF_KEY"   ,        x "AF_NETLINK"  ,        x "AF_PACKET"   , \
  x "AF_ASH"   ,        x "AF_ECONET"   ,        x "AF_ATMSVC"   , \
  x "AF_RDS"   ,        x "AF_SNA"      ,        x "AF_IRDA"     , \
  x "AF_PPPOX" ,        x "AF_WANPIPE"  ,        x "AF_LLC"      , \
  x "27"       ,        x "28"          ,        x "AF_CAN"      , \
  x "AF_TIPC"  ,        x "AF_BLUETOOTH",        x "IUCV"        , \
  x "AF_RXRPC" ,        x "AF_ISDN"     ,        x "AF_PHONET"   , \
  x "AF_IEEE802154",        x "AF_CAIF"        ,        x "AF_ALG"      , \
  x "AF_NFC"   ,        x "AF_VSOCK"    ,        x "AF_KCM"      , \
  x "AF_QIPCRTR",        x "AF_SMC"        ,        x "AF_XDP"        , \
  x "AF_MCTP"  , \
  x "AF_MAX"

static const char *const af_family_key_strings[AF_MAX+1] = {
        _sock_locks("sk_lock-")
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
        _sock_locks("slock-")
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
        _sock_locks("clock-")
};

static const char *const af_family_kern_key_strings[AF_MAX+1] = {
        _sock_locks("k-sk_lock-")
};
static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
        _sock_locks("k-slock-")
};
static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
        _sock_locks("k-clock-")
};
static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
        _sock_locks("rlock-")
};
static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
        _sock_locks("wlock-")
};
static const char *const af_family_elock_key_strings[AF_MAX+1] = {
        _sock_locks("elock-")
};

/*
 * sk_callback_lock and sk queues locking rules are per-address-family,
 * so split the lock classes by using a per-AF key:
 */
static struct lock_class_key af_callback_keys[AF_MAX];
static struct lock_class_key af_rlock_keys[AF_MAX];
static struct lock_class_key af_wlock_keys[AF_MAX];
static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];

/* Run time adjustable parameters. */
__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
EXPORT_SYMBOL(sysctl_wmem_max);
__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;

int sysctl_tstamp_allow_data __read_mostly = 1;

DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
EXPORT_SYMBOL_GPL(memalloc_socks_key);

/**
 * sk_set_memalloc - sets %SOCK_MEMALLOC
 * @sk: socket to set it on
 *
 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
 * It's the responsibility of the admin to adjust min_free_kbytes
 * to meet the requirements
 */
void sk_set_memalloc(struct sock *sk)
{
        sock_set_flag(sk, SOCK_MEMALLOC);
        sk->sk_allocation |= __GFP_MEMALLOC;
        static_branch_inc(&memalloc_socks_key);
}
EXPORT_SYMBOL_GPL(sk_set_memalloc);

void sk_clear_memalloc(struct sock *sk)
{
        sock_reset_flag(sk, SOCK_MEMALLOC);
        sk->sk_allocation &= ~__GFP_MEMALLOC;
        static_branch_dec(&memalloc_socks_key);

        /*
         * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
         * progress of swapping. SOCK_MEMALLOC may be cleared while
         * it has rmem allocations due to the last swapfile being deactivated
         * but there is a risk that the socket is unusable due to exceeding
         * the rmem limits. Reclaim the reserves and obey rmem limits again.
         */
        sk_mem_reclaim(sk);
}
EXPORT_SYMBOL_GPL(sk_clear_memalloc);

int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
        int ret;
        unsigned int noreclaim_flag;

        /* these should have been dropped before queueing */
        BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));

        noreclaim_flag = memalloc_noreclaim_save();
        ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
                                 tcp_v6_do_rcv,
                                 tcp_v4_do_rcv,
                                 sk, skb);
        memalloc_noreclaim_restore(noreclaim_flag);

        return ret;
}
EXPORT_SYMBOL(__sk_backlog_rcv);

void sk_error_report(struct sock *sk)
{
        sk->sk_error_report(sk);

        switch (sk->sk_family) {
        case AF_INET:
                fallthrough;
        case AF_INET6:
                trace_inet_sk_error_report(sk);
                break;
        default:
                break;
        }
}
EXPORT_SYMBOL(sk_error_report);

int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
        struct __kernel_sock_timeval tv;

        if (timeo == MAX_SCHEDULE_TIMEOUT) {
                tv.tv_sec = 0;
                tv.tv_usec = 0;
        } else {
                tv.tv_sec = timeo / HZ;
                tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
        }

        if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
                struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
                *(struct old_timeval32 *)optval = tv32;
                return sizeof(tv32);
        }

        if (old_timeval) {
                struct __kernel_old_timeval old_tv;
                old_tv.tv_sec = tv.tv_sec;
                old_tv.tv_usec = tv.tv_usec;
                *(struct __kernel_old_timeval *)optval = old_tv;
                return sizeof(old_tv);
        }

        *(struct __kernel_sock_timeval *)optval = tv;
        return sizeof(tv);
}
EXPORT_SYMBOL(sock_get_timeout);

int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
                           sockptr_t optval, int optlen, bool old_timeval)
{
        if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
                struct old_timeval32 tv32;

                if (optlen < sizeof(tv32))
                        return -EINVAL;

                if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
                        return -EFAULT;
                tv->tv_sec = tv32.tv_sec;
                tv->tv_usec = tv32.tv_usec;
        } else if (old_timeval) {
                struct __kernel_old_timeval old_tv;

                if (optlen < sizeof(old_tv))
                        return -EINVAL;
                if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
                        return -EFAULT;
                tv->tv_sec = old_tv.tv_sec;
                tv->tv_usec = old_tv.tv_usec;
        } else {
                if (optlen < sizeof(*tv))
                        return -EINVAL;
                if (copy_from_sockptr(tv, optval, sizeof(*tv)))
                        return -EFAULT;
        }

        return 0;
}
EXPORT_SYMBOL(sock_copy_user_timeval);

static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
                            bool old_timeval)
{
        struct __kernel_sock_timeval tv;
        int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
        long val;

        if (err)
                return err;

        if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
                return -EDOM;

        if (tv.tv_sec < 0) {
                static int warned __read_mostly;

                WRITE_ONCE(*timeo_p, 0);
                if (warned < 10 && net_ratelimit()) {
                        warned++;
                        pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
                                __func__, current->comm, task_pid_nr(current));
                }
                return 0;
        }
        val = MAX_SCHEDULE_TIMEOUT;
        if ((tv.tv_sec || tv.tv_usec) &&
            (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
                val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
                                                    USEC_PER_SEC / HZ);
        WRITE_ONCE(*timeo_p, val);
        return 0;
}

static bool sock_needs_netstamp(const struct sock *sk)
{
        switch (sk->sk_family) {
        case AF_UNSPEC:
        case AF_UNIX:
                return false;
        default:
                return true;
        }
}

static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
        if (sk->sk_flags & flags) {
                sk->sk_flags &= ~flags;
                if (sock_needs_netstamp(sk) &&
                    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
                        net_disable_timestamp();
        }
}


int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
        unsigned long flags;
        struct sk_buff_head *list = &sk->sk_receive_queue;

        if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
                atomic_inc(&sk->sk_drops);
                trace_sock_rcvqueue_full(sk, skb);
                return -ENOMEM;
        }

        if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
                atomic_inc(&sk->sk_drops);
                return -ENOBUFS;
        }

        skb->dev = NULL;
        skb_set_owner_r(skb, sk);

        /* we escape from rcu protected region, make sure we dont leak
         * a norefcounted dst
         */
        skb_dst_force(skb);

        spin_lock_irqsave(&list->lock, flags);
        sock_skb_set_dropcount(sk, skb);
        __skb_queue_tail(list, skb);
        spin_unlock_irqrestore(&list->lock, flags);

        if (!sock_flag(sk, SOCK_DEAD))
                sk->sk_data_ready(sk);
        return 0;
}
EXPORT_SYMBOL(__sock_queue_rcv_skb);

int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
                              enum skb_drop_reason *reason)
{
        enum skb_drop_reason drop_reason;
        int err;

        err = sk_filter(sk, skb);
        if (err) {
                drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
                goto out;
        }
        err = __sock_queue_rcv_skb(sk, skb);
        switch (err) {
        case -ENOMEM:
                drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
                break;
        case -ENOBUFS:
                drop_reason = SKB_DROP_REASON_PROTO_MEM;
                break;
        default:
                drop_reason = SKB_NOT_DROPPED_YET;
                break;
        }
out:
        if (reason)
                *reason = drop_reason;
        return err;
}
EXPORT_SYMBOL(sock_queue_rcv_skb_reason);

int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
                     const int nested, unsigned int trim_cap, bool refcounted)
{
        int rc = NET_RX_SUCCESS;

        if (sk_filter_trim_cap(sk, skb, trim_cap))
                goto discard_and_relse;

        skb->dev = NULL;

        if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
                atomic_inc(&sk->sk_drops);
                goto discard_and_relse;
        }
        if (nested)
                bh_lock_sock_nested(sk);
        else
                bh_lock_sock(sk);
        if (!sock_owned_by_user(sk)) {
                /*
                 * trylock + unlock semantics:
                 */
                mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);

                rc = sk_backlog_rcv(sk, skb);

                mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
        } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
                bh_unlock_sock(sk);
                atomic_inc(&sk->sk_drops);
                goto discard_and_relse;
        }

        bh_unlock_sock(sk);
out:
        if (refcounted)
                sock_put(sk);
        return rc;
discard_and_relse:
        kfree_skb(skb);
        goto out;
}
EXPORT_SYMBOL(__sk_receive_skb);

INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
                                                          u32));
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
                                                           u32));
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
        struct dst_entry *dst = __sk_dst_get(sk);

        if (dst && dst->obsolete &&
            INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
                               dst, cookie) == NULL) {
                sk_tx_queue_clear(sk);
                WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
                RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
                dst_release(dst);
                return NULL;
        }

        return dst;
}
EXPORT_SYMBOL(__sk_dst_check);

struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
{
        struct dst_entry *dst = sk_dst_get(sk);

        if (dst && dst->obsolete &&
            INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
                               dst, cookie) == NULL) {
                sk_dst_reset(sk);
                dst_release(dst);
                return NULL;
        }

        return dst;
}
EXPORT_SYMBOL(sk_dst_check);

static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
{
        int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
        struct net *net = sock_net(sk);

        /* Sorry... */
        ret = -EPERM;
        if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
                goto out;

        ret = -EINVAL;
        if (ifindex < 0)
                goto out;

        /* Paired with all READ_ONCE() done locklessly. */
        WRITE_ONCE(sk->sk_bound_dev_if, ifindex);

        if (sk->sk_prot->rehash)
                sk->sk_prot->rehash(sk);
        sk_dst_reset(sk);

        ret = 0;

out:
#endif

        return ret;
}

int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
{
        int ret;

        if (lock_sk)
                lock_sock(sk);
        ret = sock_bindtoindex_locked(sk, ifindex);
        if (lock_sk)
                release_sock(sk);

        return ret;
}
EXPORT_SYMBOL(sock_bindtoindex);

static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
{
        int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
        struct net *net = sock_net(sk);
        char devname[IFNAMSIZ];
        int index;

        ret = -EINVAL;
        if (optlen < 0)
                goto out;

        /* Bind this socket to a particular device like "eth0",
         * as specified in the passed interface name. If the
         * name is "" or the option length is zero the socket
         * is not bound.
         */
        if (optlen > IFNAMSIZ - 1)
                optlen = IFNAMSIZ - 1;
        memset(devname, 0, sizeof(devname));

        ret = -EFAULT;
        if (copy_from_sockptr(devname, optval, optlen))
                goto out;

        index = 0;
        if (devname[0] != '\0') {
                struct net_device *dev;

                rcu_read_lock();
                dev = dev_get_by_name_rcu(net, devname);
                if (dev)
                        index = dev->ifindex;
                rcu_read_unlock();
                ret = -ENODEV;
                if (!dev)
                        goto out;
        }

        sockopt_lock_sock(sk);
        ret = sock_bindtoindex_locked(sk, index);
        sockopt_release_sock(sk);
out:
#endif

        return ret;
}

static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
                                sockptr_t optlen, int len)
{
        int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
        int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
        struct net *net = sock_net(sk);
        char devname[IFNAMSIZ];

        if (bound_dev_if == 0) {
                len = 0;
                goto zero;
        }

        ret = -EINVAL;
        if (len < IFNAMSIZ)
                goto out;

        ret = netdev_get_name(net, devname, bound_dev_if);
        if (ret)
                goto out;

        len = strlen(devname) + 1;

        ret = -EFAULT;
        if (copy_to_sockptr(optval, devname, len))
                goto out;

zero:
        ret = -EFAULT;
        if (copy_to_sockptr(optlen, &len, sizeof(int)))
                goto out;

        ret = 0;

out:
#endif

        return ret;
}

bool sk_mc_loop(const struct sock *sk)
{
        if (dev_recursion_level())
                return false;
        if (!sk)
                return true;
        /* IPV6_ADDRFORM can change sk->sk_family under us. */
        switch (READ_ONCE(sk->sk_family)) {
        case AF_INET:
                return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
                return inet6_test_bit(MC6_LOOP, sk);
#endif
        }
        WARN_ON_ONCE(1);
        return true;
}
EXPORT_SYMBOL(sk_mc_loop);

void sock_set_reuseaddr(struct sock *sk)
{
        lock_sock(sk);
        sk->sk_reuse = SK_CAN_REUSE;
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_reuseaddr);

void sock_set_reuseport(struct sock *sk)
{
        lock_sock(sk);
        sk->sk_reuseport = true;
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_reuseport);

void sock_no_linger(struct sock *sk)
{
        lock_sock(sk);
        WRITE_ONCE(sk->sk_lingertime, 0);
        sock_set_flag(sk, SOCK_LINGER);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_no_linger);

void sock_set_priority(struct sock *sk, u32 priority)
{
        WRITE_ONCE(sk->sk_priority, priority);
}
EXPORT_SYMBOL(sock_set_priority);

void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
        lock_sock(sk);
        if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
                WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
        else
                WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);

static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
{
        if (val)  {
                sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
                sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
                sock_set_flag(sk, SOCK_RCVTSTAMP);
                sock_enable_timestamp(sk, SOCK_TIMESTAMP);
        } else {
                sock_reset_flag(sk, SOCK_RCVTSTAMP);
                sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
        }
}

void sock_enable_timestamps(struct sock *sk)
{
        lock_sock(sk);
        __sock_set_timestamps(sk, true, false, true);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_enable_timestamps);

void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
{
        switch (optname) {
        case SO_TIMESTAMP_OLD:
                __sock_set_timestamps(sk, valbool, false, false);
                break;
        case SO_TIMESTAMP_NEW:
                __sock_set_timestamps(sk, valbool, true, false);
                break;
        case SO_TIMESTAMPNS_OLD:
                __sock_set_timestamps(sk, valbool, false, true);
                break;
        case SO_TIMESTAMPNS_NEW:
                __sock_set_timestamps(sk, valbool, true, true);
                break;
        }
}

static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
{
        struct net *net = sock_net(sk);
        struct net_device *dev = NULL;
        bool match = false;
        int *vclock_index;
        int i, num;

        if (sk->sk_bound_dev_if)
                dev = dev_get_by_index(net, sk->sk_bound_dev_if);

        if (!dev) {
                pr_err("%s: sock not bind to device\n", __func__);
                return -EOPNOTSUPP;
        }

        num = ethtool_get_phc_vclocks(dev, &vclock_index);
        dev_put(dev);

        for (i = 0; i < num; i++) {
                if (*(vclock_index + i) == phc_index) {
                        match = true;
                        break;
                }
        }

        if (num > 0)
                kfree(vclock_index);

        if (!match)
                return -EINVAL;

        WRITE_ONCE(sk->sk_bind_phc, phc_index);

        return 0;
}

int sock_set_timestamping(struct sock *sk, int optname,
                          struct so_timestamping timestamping)
{
        int val = timestamping.flags;
        int ret;

        if (val & ~SOF_TIMESTAMPING_MASK)
                return -EINVAL;

        if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
            !(val & SOF_TIMESTAMPING_OPT_ID))
                return -EINVAL;

        if (val & SOF_TIMESTAMPING_OPT_ID &&
            !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
                if (sk_is_tcp(sk)) {
                        if ((1 << sk->sk_state) &
                            (TCPF_CLOSE | TCPF_LISTEN))
                                return -EINVAL;
                        if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
                                atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
                        else
                                atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
                } else {
                        atomic_set(&sk->sk_tskey, 0);
                }
        }

        if (val & SOF_TIMESTAMPING_OPT_STATS &&
            !(val & SOF_TIMESTAMPING_OPT_TSONLY))
                return -EINVAL;

        if (val & SOF_TIMESTAMPING_BIND_PHC) {
                ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
                if (ret)
                        return ret;
        }

        WRITE_ONCE(sk->sk_tsflags, val);
        sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);

        if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
                sock_enable_timestamp(sk,
                                      SOCK_TIMESTAMPING_RX_SOFTWARE);
        else
                sock_disable_timestamp(sk,
                                       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
        return 0;
}

void sock_set_keepalive(struct sock *sk)
{
        lock_sock(sk);
        if (sk->sk_prot->keepalive)
                sk->sk_prot->keepalive(sk, true);
        sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_keepalive);

static void __sock_set_rcvbuf(struct sock *sk, int val)
{
        /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
         * as a negative value.
         */
        val = min_t(int, val, INT_MAX / 2);
        sk->sk_userlocks |= SOCK_RCVBUF_LOCK;

        /* We double it on the way in to account for "struct sk_buff" etc.
         * overhead.   Applications assume that the SO_RCVBUF setting they make
         * will allow that much actual data to be received on that socket.
         *
         * Applications are unaware that "struct sk_buff" and other overheads
         * allocate from the receive buffer during socket buffer allocation.
         *
         * And after considering the possible alternatives, returning the value
         * we actually used in getsockopt is the most desirable behavior.
         */
        WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
}

void sock_set_rcvbuf(struct sock *sk, int val)
{
        lock_sock(sk);
        __sock_set_rcvbuf(sk, val);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_rcvbuf);

static void __sock_set_mark(struct sock *sk, u32 val)
{
        if (val != sk->sk_mark) {
                WRITE_ONCE(sk->sk_mark, val);
                sk_dst_reset(sk);
        }
}

void sock_set_mark(struct sock *sk, u32 val)
{
        lock_sock(sk);
        __sock_set_mark(sk, val);
        release_sock(sk);
}
EXPORT_SYMBOL(sock_set_mark);

static void sock_release_reserved_memory(struct sock *sk, int bytes)
{
        /* Round down bytes to multiple of pages */
        bytes = round_down(bytes, PAGE_SIZE);

        WARN_ON(bytes > sk->sk_reserved_mem);
        WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
        sk_mem_reclaim(sk);
}

static int sock_reserve_memory(struct sock *sk, int bytes)
{
        long allocated;
        bool charged;
        int pages;

        if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
                return -EOPNOTSUPP;

        if (!bytes)
                return 0;

        pages = sk_mem_pages(bytes);

        /* pre-charge to memcg */
        charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
                                          GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!charged)
                return -ENOMEM;

        /* pre-charge to forward_alloc */
        sk_memory_allocated_add(sk, pages);
        allocated = sk_memory_allocated(sk);
        /* If the system goes into memory pressure with this
         * precharge, give up and return error.
         */
        if (allocated > sk_prot_mem_limits(sk, 1)) {
                sk_memory_allocated_sub(sk, pages);
                mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
                return -ENOMEM;
        }
        sk_forward_alloc_add(sk, pages << PAGE_SHIFT);

        WRITE_ONCE(sk->sk_reserved_mem,
                   sk->sk_reserved_mem + (pages << PAGE_SHIFT));

        return 0;
}

void sockopt_lock_sock(struct sock *sk)
{
        /* When current->bpf_ctx is set, the setsockopt is called from
         * a bpf prog.  bpf has ensured the sk lock has been
         * acquired before calling setsockopt().
         */
        if (has_current_bpf_ctx())
                return;

        lock_sock(sk);
}
EXPORT_SYMBOL(sockopt_lock_sock);

void sockopt_release_sock(struct sock *sk)
{
        if (has_current_bpf_ctx())
                return;

        release_sock(sk);
}
EXPORT_SYMBOL(sockopt_release_sock);

bool sockopt_ns_capable(struct user_namespace *ns, int cap)
{
        return has_current_bpf_ctx() || ns_capable(ns, cap);
}
EXPORT_SYMBOL(sockopt_ns_capable);

bool sockopt_capable(int cap)
{
        return has_current_bpf_ctx() || capable(cap);
}
EXPORT_SYMBOL(sockopt_capable);

/*
 *        This is meant for all protocols to use and covers goings on
 *        at the socket level. Everything here is generic.
 */

int sk_setsockopt(struct sock *sk, int level, int optname,
                  sockptr_t optval, unsigned int optlen)
{
        struct so_timestamping timestamping;
        struct socket *sock = sk->sk_socket;
        struct sock_txtime sk_txtime;
        int val;
        int valbool;
        struct linger ling;
        int ret = 0;

        /*
         *        Options without arguments
         */

        if (optname == SO_BINDTODEVICE)
                return sock_setbindtodevice(sk, optval, optlen);

        if (optlen < sizeof(int))
                return -EINVAL;

        if (copy_from_sockptr(&val, optval, sizeof(val)))
                return -EFAULT;

        valbool = val ? 1 : 0;

        /* handle options which do not require locking the socket. */
        switch (optname) {
        case SO_PRIORITY:
                if ((val >= 0 && val <= 6) ||
                    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
                    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
                        sock_set_priority(sk, val);
                        return 0;
                }
                return -EPERM;
        case SO_PASSSEC:
                assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
                return 0;
        case SO_PASSCRED:
                assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
                return 0;
        case SO_PASSPIDFD:
                assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
                return 0;
        case SO_TYPE:
        case SO_PROTOCOL:
        case SO_DOMAIN:
        case SO_ERROR:
                return -ENOPROTOOPT;
#ifdef CONFIG_NET_RX_BUSY_POLL
        case SO_BUSY_POLL:
                if (val < 0)
                        return -EINVAL;
                WRITE_ONCE(sk->sk_ll_usec, val);
                return 0;
        case SO_PREFER_BUSY_POLL:
                if (valbool && !sockopt_capable(CAP_NET_ADMIN))
                        return -EPERM;
                WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
                return 0;
        case SO_BUSY_POLL_BUDGET:
                if (val > READ_ONCE(sk->sk_busy_poll_budget) &&
                    !sockopt_capable(CAP_NET_ADMIN))
                        return -EPERM;
                if (val < 0 || val > U16_MAX)
                        return -EINVAL;
                WRITE_ONCE(sk->sk_busy_poll_budget, val);
                return 0;
#endif
        case SO_MAX_PACING_RATE:
                {
                unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
                unsigned long pacing_rate;

                if (sizeof(ulval) != sizeof(val) &&
                    optlen >= sizeof(ulval) &&
                    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
                        return -EFAULT;
                }
                if (ulval != ~0UL)
                        cmpxchg(&sk->sk_pacing_status,
                                SK_PACING_NONE,
                                SK_PACING_NEEDED);
                /* Pairs with READ_ONCE() from sk_getsockopt() */
                WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
                pacing_rate = READ_ONCE(sk->sk_pacing_rate);
                if (ulval < pacing_rate)
                        WRITE_ONCE(sk->sk_pacing_rate, ulval);
                return 0;
                }
        case SO_TXREHASH:
                if (val < -1 || val > 1)
                        return -EINVAL;
                if ((u8)val == SOCK_TXREHASH_DEFAULT)
                        val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
                /* Paired with READ_ONCE() in tcp_rtx_synack()
                 * and sk_getsockopt().
                 */
                WRITE_ONCE(sk->sk_txrehash, (u8)val);
                return 0;
        case SO_PEEK_OFF:
                {
                int (*set_peek_off)(struct sock *sk, int val);

                set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
                if (set_peek_off)
                        ret = set_peek_off(sk, val);
                else
                        ret = -EOPNOTSUPP;
                return ret;
                }
        }

        sockopt_lock_sock(sk);

        switch (optname) {
        case SO_DEBUG:
                if (val && !sockopt_capable(CAP_NET_ADMIN))
                        ret = -EACCES;
                else
                        sock_valbool_flag(sk, SOCK_DBG, valbool);
                break;
        case SO_REUSEADDR:
                sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
                break;
        case SO_REUSEPORT:
                sk->sk_reuseport = valbool;
                break;
        case SO_DONTROUTE:
                sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
                sk_dst_reset(sk);
                break;
        case SO_BROADCAST:
                sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
                break;
        case SO_SNDBUF:
                /* Don't error on this BSD doesn't and if you think
                 * about it this is right. Otherwise apps have to
                 * play 'guess the biggest size' games. RCVBUF/SNDBUF
                 * are treated in BSD as hints
                 */
                val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
                /* Ensure val * 2 fits into an int, to prevent max_t()
                 * from treating it as a negative value.
                 */
                val = min_t(int, val, INT_MAX / 2);
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
                WRITE_ONCE(sk->sk_sndbuf,
                           max_t(int, val * 2, SOCK_MIN_SNDBUF));
                /* Wake up sending tasks if we upped the value. */
                sk->sk_write_space(sk);
                break;

        case SO_SNDBUFFORCE:
                if (!sockopt_capable(CAP_NET_ADMIN)) {
                        ret = -EPERM;
                        break;
                }

                /* No negative values (to prevent underflow, as val will be
                 * multiplied by 2).
                 */
                if (val < 0)
                        val = 0;
                goto set_sndbuf;

        case SO_RCVBUF:
                /* Don't error on this BSD doesn't and if you think
                 * about it this is right. Otherwise apps have to
                 * play 'guess the biggest size' games. RCVBUF/SNDBUF
                 * are treated in BSD as hints
                 */
                __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
                break;

        case SO_RCVBUFFORCE:
                if (!sockopt_capable(CAP_NET_ADMIN)) {
                        ret = -EPERM;
                        break;
                }

                /* No negative values (to prevent underflow, as val will be
                 * multiplied by 2).
                 */
                __sock_set_rcvbuf(sk, max(val, 0));
                break;

        case SO_KEEPALIVE:
                if (sk->sk_prot->keepalive)
                        sk->sk_prot->keepalive(sk, valbool);
                sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
                break;

        case SO_OOBINLINE:
                sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
                break;

        case SO_NO_CHECK:
                sk->sk_no_check_tx = valbool;
                break;

        case SO_LINGER:
                if (optlen < sizeof(ling)) {
                        ret = -EINVAL;        /* 1003.1g */
                        break;
                }
                if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
                        ret = -EFAULT;
                        break;
                }
                if (!ling.l_onoff) {
                        sock_reset_flag(sk, SOCK_LINGER);
                } else {
                        unsigned long t_sec = ling.l_linger;

                        if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
                                WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
                        else
                                WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
                        sock_set_flag(sk, SOCK_LINGER);
                }
                break;

        case SO_BSDCOMPAT:
                break;

        case SO_TIMESTAMP_OLD:
        case SO_TIMESTAMP_NEW:
        case SO_TIMESTAMPNS_OLD:
        case SO_TIMESTAMPNS_NEW:
                sock_set_timestamp(sk, optname, valbool);
                break;

        case SO_TIMESTAMPING_NEW:
        case SO_TIMESTAMPING_OLD:
                if (optlen == sizeof(timestamping)) {
                        if (copy_from_sockptr(&timestamping, optval,
                                              sizeof(timestamping))) {
                                ret = -EFAULT;
                                break;
                        }
                } else {
                        memset(&timestamping, 0, sizeof(timestamping));
                        timestamping.flags = val;
                }
                ret = sock_set_timestamping(sk, optname, timestamping);
                break;

        case SO_RCVLOWAT:
                {
                int (*set_rcvlowat)(struct sock *sk, int val) = NULL;

                if (val < 0)
                        val = INT_MAX;
                if (sock)
                        set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
                if (set_rcvlowat)
                        ret = set_rcvlowat(sk, val);
                else
                        WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
                break;
                }
        case SO_RCVTIMEO_OLD:
        case SO_RCVTIMEO_NEW:
                ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
                                       optlen, optname == SO_RCVTIMEO_OLD);
                break;

        case SO_SNDTIMEO_OLD:
        case SO_SNDTIMEO_NEW:
                ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
                                       optlen, optname == SO_SNDTIMEO_OLD);
                break;

        case SO_ATTACH_FILTER: {
                struct sock_fprog fprog;

                ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
                if (!ret)
                        ret = sk_attach_filter(&fprog, sk);
                break;
        }
        case SO_ATTACH_BPF:
                ret = -EINVAL;
                if (optlen == sizeof(u32)) {
                        u32 ufd;

                        ret = -EFAULT;
                        if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
                                break;

                        ret = sk_attach_bpf(ufd, sk);
                }
                break;

        case SO_ATTACH_REUSEPORT_CBPF: {
                struct sock_fprog fprog;

                ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
                if (!ret)
                        ret = sk_reuseport_attach_filter(&fprog, sk);
                break;
        }
        case SO_ATTACH_REUSEPORT_EBPF:
                ret = -EINVAL;
                if (optlen == sizeof(u32)) {
                        u32 ufd;

                        ret = -EFAULT;
                        if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
                                break;

                        ret = sk_reuseport_attach_bpf(ufd, sk);
                }
                break;

        case SO_DETACH_REUSEPORT_BPF:
                ret = reuseport_detach_prog(sk);
                break;

        case SO_DETACH_FILTER:
                ret = sk_detach_filter(sk);
                break;

        case SO_LOCK_FILTER:
                if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
                        ret = -EPERM;
                else
                        sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
                break;

        case SO_MARK:
                if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
                    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
                        ret = -EPERM;
                        break;
                }

                __sock_set_mark(sk, val);
                break;
        case SO_RCVMARK:
                sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
                break;

        case SO_RXQ_OVFL:
                sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
                break;

        case SO_WIFI_STATUS:
                sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
                break;

        case SO_NOFCS:
                sock_valbool_flag(sk, SOCK_NOFCS, valbool);
                break;

        case SO_SELECT_ERR_QUEUE:
                sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
                break;


        case SO_INCOMING_CPU:
                reuseport_update_incoming_cpu(sk, val);
                break;

        case SO_CNX_ADVICE:
                if (val == 1)
                        dst_negative_advice(sk);
                break;

        case SO_ZEROCOPY:
                if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
                        if (!(sk_is_tcp(sk) ||
                              (sk->sk_type == SOCK_DGRAM &&
                               sk->sk_protocol == IPPROTO_UDP)))
                                ret = -EOPNOTSUPP;
                } else if (sk->sk_family != PF_RDS) {
                        ret = -EOPNOTSUPP;
                }
                if (!ret) {
                        if (val < 0 || val > 1)
                                ret = -EINVAL;
                        else
                                sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
                }
                break;

        case SO_TXTIME:
                if (optlen != sizeof(struct sock_txtime)) {
                        ret = -EINVAL;
                        break;
                } else if (copy_from_sockptr(&sk_txtime, optval,
                           sizeof(struct sock_txtime))) {
                        ret = -EFAULT;
                        break;
                } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
                        ret = -EINVAL;
                        break;
                }
                /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
                 * scheduler has enough safe guards.
                 */
                if (sk_txtime.clockid != CLOCK_MONOTONIC &&
                    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
                        ret = -EPERM;
                        break;
                }
                sock_valbool_flag(sk, SOCK_TXTIME, true);
                sk->sk_clockid = sk_txtime.clockid;
                sk->sk_txtime_deadline_mode =
                        !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
                sk->sk_txtime_report_errors =
                        !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
                break;

        case SO_BINDTOIFINDEX:
                ret = sock_bindtoindex_locked(sk, val);
                break;

        case SO_BUF_LOCK:
                if (val & ~SOCK_BUF_LOCK_MASK) {
                        ret = -EINVAL;
                        break;
                }
                sk->sk_userlocks = val | (sk->sk_userlocks &
                                          ~SOCK_BUF_LOCK_MASK);
                break;

        case SO_RESERVE_MEM:
        {
                int delta;

                if (val < 0) {
                        ret = -EINVAL;
                        break;
                }

                delta = val - sk->sk_reserved_mem;
                if (delta < 0)
                        sock_release_reserved_memory(sk, -delta);
                else
                        ret = sock_reserve_memory(sk, delta);
                break;
        }

        default:
                ret = -ENOPROTOOPT;
                break;
        }
        sockopt_release_sock(sk);
        return ret;
}

int sock_setsockopt(struct socket *sock, int level, int optname,
                    sockptr_t optval, unsigned int optlen)
{
        return sk_setsockopt(sock->sk, level, optname,
                             optval, optlen);
}
EXPORT_SYMBOL(sock_setsockopt);

static const struct cred *sk_get_peer_cred(struct sock *sk)
{
        const struct cred *cred;

        spin_lock(&sk->sk_peer_lock);
        cred = get_cred(sk->sk_peer_cred);
        spin_unlock(&sk->sk_peer_lock);

        return cred;
}

static void cred_to_ucred(struct pid *pid, const struct cred *cred,
                          struct ucred *ucred)
{
        ucred->pid = pid_vnr(pid);
        ucred->uid = ucred->gid = -1;
        if (cred) {
                struct user_namespace *current_ns = current_user_ns();

                ucred->uid = from_kuid_munged(current_ns, cred->euid);
                ucred->gid = from_kgid_munged(current_ns, cred->egid);
        }
}

static int groups_to_user(sockptr_t dst, const struct group_info *src)
{
        struct user_namespace *user_ns = current_user_ns();
        int i;

        for (i = 0; i < src->ngroups; i++) {
                gid_t gid = from_kgid_munged(user_ns, src->gid[i]);

                if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
                        return -EFAULT;
        }

        return 0;
}

int sk_getsockopt(struct sock *sk, int level, int optname,
                  sockptr_t optval, sockptr_t optlen)
{
        struct socket *sock = sk->sk_socket;

        union {
                int val;
                u64 val64;
                unsigned long ulval;
                struct linger ling;
                struct old_timeval32 tm32;
                struct __kernel_old_timeval tm;
                struct  __kernel_sock_timeval stm;
                struct sock_txtime txtime;
                struct so_timestamping timestamping;
        } v;

        int lv = sizeof(int);
        int len;

        if (copy_from_sockptr(&len, optlen, sizeof(int)))
                return -EFAULT;
        if (len < 0)
                return -EINVAL;

        memset(&v, 0, sizeof(v));

        switch (optname) {
        case SO_DEBUG:
                v.val = sock_flag(sk, SOCK_DBG);
                break;

        case SO_DONTROUTE:
                v.val = sock_flag(sk, SOCK_LOCALROUTE);
                break;

        case SO_BROADCAST:
                v.val = sock_flag(sk, SOCK_BROADCAST);
                break;

        case SO_SNDBUF:
                v.val = READ_ONCE(sk->sk_sndbuf);
                break;

        case SO_RCVBUF:
                v.val = READ_ONCE(sk->sk_rcvbuf);
                break;

        case SO_REUSEADDR:
                v.val = sk->sk_reuse;
                break;

        case SO_REUSEPORT:
                v.val = sk->sk_reuseport;
                break;

        case SO_KEEPALIVE:
                v.val = sock_flag(sk, SOCK_KEEPOPEN);
                break;

        case SO_TYPE:
                v.val = sk->sk_type;
                break;

        case SO_PROTOCOL:
                v.val = sk->sk_protocol;
                break;

        case SO_DOMAIN:
                v.val = sk->sk_family;
                break;

        case SO_ERROR:
                v.val = -sock_error(sk);
                if (v.val == 0)
                        v.val = xchg(&sk->sk_err_soft, 0);
                break;

        case SO_OOBINLINE:
                v.val = sock_flag(sk, SOCK_URGINLINE);
                break;

        case SO_NO_CHECK:
                v.val = sk->sk_no_check_tx;
                break;

        case SO_PRIORITY:
                v.val = READ_ONCE(sk->sk_priority);
                break;

        case SO_LINGER:
                lv                = sizeof(v.ling);
                v.ling.l_onoff        = sock_flag(sk, SOCK_LINGER);
                v.ling.l_linger        = READ_ONCE(sk->sk_lingertime) / HZ;
                break;

        case SO_BSDCOMPAT:
                break;

        case SO_TIMESTAMP_OLD:
                v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
                                !sock_flag(sk, SOCK_TSTAMP_NEW) &&
                                !sock_flag(sk, SOCK_RCVTSTAMPNS);
                break;

        case SO_TIMESTAMPNS_OLD:
                v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
                break;

        case SO_TIMESTAMP_NEW:
                v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
                break;

        case SO_TIMESTAMPNS_NEW:
                v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
                break;

        case SO_TIMESTAMPING_OLD:
        case SO_TIMESTAMPING_NEW:
                lv = sizeof(v.timestamping);
                /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
                 * returning the flags when they were set through the same option.
                 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
                 */
                if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
                        v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
                        v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
                }
                break;

        case SO_RCVTIMEO_OLD:
        case SO_RCVTIMEO_NEW:
                lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
                                      SO_RCVTIMEO_OLD == optname);
                break;

        case SO_SNDTIMEO_OLD:
        case SO_SNDTIMEO_NEW:
                lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
                                      SO_SNDTIMEO_OLD == optname);
                break;

        case SO_RCVLOWAT:
                v.val = READ_ONCE(sk->sk_rcvlowat);
                break;

        case SO_SNDLOWAT:
                v.val = 1;
                break;

        case SO_PASSCRED:
                v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
                break;

        case SO_PASSPIDFD:
                v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
                break;

        case SO_PEERCRED:
        {
                struct ucred peercred;
                if (len > sizeof(peercred))
                        len = sizeof(peercred);

                spin_lock(&sk->sk_peer_lock);
                cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
                spin_unlock(&sk->sk_peer_lock);

                if (copy_to_sockptr(optval, &peercred, len))
                        return -EFAULT;
                goto lenout;
        }

        case SO_PEERPIDFD:
        {
                struct pid *peer_pid;
                struct file *pidfd_file = NULL;
                int pidfd;

                if (len > sizeof(pidfd))
                        len = sizeof(pidfd);

                spin_lock(&sk->sk_peer_lock);
                peer_pid = get_pid(sk->sk_peer_pid);
                spin_unlock(&sk->sk_peer_lock);

                if (!peer_pid)
                        return -ENODATA;

                pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
                put_pid(peer_pid);
                if (pidfd < 0)
                        return pidfd;

                if (copy_to_sockptr(optval, &pidfd, len) ||
                    copy_to_sockptr(optlen, &len, sizeof(int))) {
                        put_unused_fd(pidfd);
                        fput(pidfd_file);

                        return -EFAULT;
                }

                fd_install(pidfd, pidfd_file);
                return 0;
        }

        case SO_PEERGROUPS:
        {
                const struct cred *cred;
                int ret, n;

                cred = sk_get_peer_cred(sk);
                if (!cred)
                        return -ENODATA;

                n = cred->group_info->ngroups;
                if (len < n * sizeof(gid_t)) {
                        len = n * sizeof(gid_t);
                        put_cred(cred);
                        return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
                }
                len = n * sizeof(gid_t);

                ret = groups_to_user(optval, cred->group_info);
                put_cred(cred);
                if (ret)
                        return ret;
                goto lenout;
        }

        case SO_PEERNAME:
        {
                struct sockaddr_storage address;

                lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
                if (lv < 0)
                        return -ENOTCONN;
                if (lv < len)
                        return -EINVAL;
                if (copy_to_sockptr(optval, &address, len))
                        return -EFAULT;
                goto lenout;
        }

        /* Dubious BSD thing... Probably nobody even uses it, but
         * the UNIX standard wants it for whatever reason... -DaveM
         */
        case SO_ACCEPTCONN:
                v.val = sk->sk_state == TCP_LISTEN;
                break;

        case SO_PASSSEC:
                v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
                break;

        case SO_PEERSEC:
                return security_socket_getpeersec_stream(sock,
                                                         optval, optlen, len);

        case SO_MARK:
                v.val = READ_ONCE(sk->sk_mark);
                break;

        case SO_RCVMARK:
                v.val = sock_flag(sk, SOCK_RCVMARK);
                break;

        case SO_RXQ_OVFL:
                v.val = sock_flag(sk, SOCK_RXQ_OVFL);
                break;

        case SO_WIFI_STATUS:
                v.val = sock_flag(sk, SOCK_WIFI_STATUS);
                break;

        case SO_PEEK_OFF:
                if (!READ_ONCE(sock->ops)->set_peek_off)
                        return -EOPNOTSUPP;

                v.val = READ_ONCE(sk->sk_peek_off);
                break;
        case SO_NOFCS:
                v.val = sock_flag(sk, SOCK_NOFCS);
                break;

        case SO_BINDTODEVICE:
                return sock_getbindtodevice(sk, optval, optlen, len);

        case SO_GET_FILTER:
                len = sk_get_filter(sk, optval, len);
                if (len < 0)
                        return len;

                goto lenout;

        case SO_LOCK_FILTER:
                v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
                break;

        case SO_BPF_EXTENSIONS:
                v.val = bpf_tell_extensions();
                break;

        case SO_SELECT_ERR_QUEUE:
                v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
                break;

#ifdef CONFIG_NET_RX_BUSY_POLL
        case SO_BUSY_POLL:
                v.val = READ_ONCE(sk->sk_ll_usec);
                break;
        case SO_PREFER_BUSY_POLL:
                v.val = READ_ONCE(sk->sk_prefer_busy_poll);
                break;
#endif

        case SO_MAX_PACING_RATE:
                /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
                if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
                        lv = sizeof(v.ulval);
                        v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
                } else {
                        /* 32bit version */
                        v.val = min_t(unsigned long, ~0U,
                                      READ_ONCE(sk->sk_max_pacing_rate));
                }
                break;

        case SO_INCOMING_CPU:
                v.val = READ_ONCE(sk->sk_incoming_cpu);
                break;

        case SO_MEMINFO:
        {
                u32 meminfo[SK_MEMINFO_VARS];

                sk_get_meminfo(sk, meminfo);

                len = min_t(unsigned int, len, sizeof(meminfo));
                if (copy_to_sockptr(optval, &meminfo, len))
                        return -EFAULT;

                goto lenout;
        }

#ifdef CONFIG_NET_RX_BUSY_POLL
        case SO_INCOMING_NAPI_ID:
                v.val = READ_ONCE(sk->sk_napi_id);

                /* aggregate non-NAPI IDs down to 0 */
                if (v.val < MIN_NAPI_ID)
                        v.val = 0;

                break;
#endif

        case SO_COOKIE:
                lv = sizeof(u64);
                if (len < lv)
                        return -EINVAL;
                v.val64 = sock_gen_cookie(sk);
                break;

        case SO_ZEROCOPY:
                v.val = sock_flag(sk, SOCK_ZEROCOPY);
                break;

        case SO_TXTIME:
                lv = sizeof(v.txtime);
                v.txtime.clockid = sk->sk_clockid;
                v.txtime.flags |= sk->sk_txtime_deadline_mode ?
                                  SOF_TXTIME_DEADLINE_MODE : 0;
                v.txtime.flags |= sk->sk_txtime_report_errors ?
                                  SOF_TXTIME_REPORT_ERRORS : 0;
                break;

        case SO_BINDTOIFINDEX:
                v.val = READ_ONCE(sk->sk_bound_dev_if);
                break;

        case SO_NETNS_COOKIE:
                lv = sizeof(u64);
                if (len != lv)
                        return -EINVAL;
                v.val64 = sock_net(sk)->net_cookie;
                break;

        case SO_BUF_LOCK:
                v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
                break;

        case SO_RESERVE_MEM:
                v.val = READ_ONCE(sk->sk_reserved_mem);
                break;

        case SO_TXREHASH:
                /* Paired with WRITE_ONCE() in sk_setsockopt() */
                v.val = READ_ONCE(sk->sk_txrehash);
                break;

        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
                 */
                return -ENOPROTOOPT;
        }

        if (len > lv)
                len = lv;
        if (copy_to_sockptr(optval, &v, len))
                return -EFAULT;
lenout:
        if (copy_to_sockptr(optlen, &len, sizeof(int)))
                return -EFAULT;
        return 0;
}

/*
 * Initialize an sk_lock.
 *
 * (We also register the sk_lock with the lock validator.)
 */
static inline void sock_lock_init(struct sock *sk)
{
        if (sk->sk_kern_sock)
                sock_lock_init_class_and_name(
                        sk,
                        af_family_kern_slock_key_strings[sk->sk_family],
                        af_family_kern_slock_keys + sk->sk_family,
                        af_family_kern_key_strings[sk->sk_family],
                        af_family_kern_keys + sk->sk_family);
        else
                sock_lock_init_class_and_name(
                        sk,
                        af_family_slock_key_strings[sk->sk_family],
                        af_family_slock_keys + sk->sk_family,
                        af_family_key_strings[sk->sk_family],
                        af_family_keys + sk->sk_family);
}

/*
 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
 * even temporarly, because of RCU lookups. sk_node should also be left as is.
 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
 */
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
        const struct proto *prot = READ_ONCE(osk->sk_prot);
#ifdef CONFIG_SECURITY_NETWORK
        void *sptr = nsk->sk_security;
#endif

        /* If we move sk_tx_queue_mapping out of the private section,
         * we must check if sk_tx_queue_clear() is called after
         * sock_copy() in sk_clone_lock().
         */
        BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
                     offsetof(struct sock, sk_dontcopy_begin) ||
                     offsetof(struct sock, sk_tx_queue_mapping) >=
                     offsetof(struct sock, sk_dontcopy_end));

        memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));

        unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
                      prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
                      /* alloc is larger than struct, see sk_prot_alloc() */);

#ifdef CONFIG_SECURITY_NETWORK
        nsk->sk_security = sptr;
        security_sk_clone(osk, nsk);
#endif
}

static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                int family)
{
        struct sock *sk;
        struct kmem_cache *slab;

        slab = prot->slab;
        if (slab != NULL) {
                sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
                if (!sk)
                        return sk;
                if (want_init_on_alloc(priority))
                        sk_prot_clear_nulls(sk, prot->obj_size);
        } else
                sk = kmalloc(prot->obj_size, priority);

        if (sk != NULL) {
                if (security_sk_alloc(sk, family, priority))
                        goto out_free;

                if (!try_module_get(prot->owner))
                        goto out_free_sec;
        }

        return sk;

out_free_sec:
        security_sk_free(sk);
out_free:
        if (slab != NULL)
                kmem_cache_free(slab, sk);
        else
                kfree(sk);
        return NULL;
}

static void sk_prot_free(struct proto *prot, struct sock *sk)
{
        struct kmem_cache *slab;
        struct module *owner;

        owner = prot->owner;
        slab = prot->slab;

        cgroup_sk_free(&sk->sk_cgrp_data);
        mem_cgroup_sk_free(sk);
        security_sk_free(sk);
        if (slab != NULL)
                kmem_cache_free(slab, sk);
        else
                kfree(sk);
        module_put(owner);
}

/**
 *        sk_alloc - All socket objects are allocated here
 *        @net: the applicable net namespace
 *        @family: protocol family
 *        @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
 *        @prot: struct proto associated with this new sock instance
 *        @kern: is this to be a kernel socket?
 */
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                      struct proto *prot, int kern)
{
        struct sock *sk;

        sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
        if (sk) {
                sk->sk_family = family;
                /*
                 * See comment in struct sock definition to understand
                 * why we need sk_prot_creator -acme
                 */
                sk->sk_prot = sk->sk_prot_creator = prot;
                sk->sk_kern_sock = kern;
                sock_lock_init(sk);
                sk->sk_net_refcnt = kern ? 0 : 1;
                if (likely(sk->sk_net_refcnt)) {
                        get_net_track(net, &sk->ns_tracker, priority);
                        sock_inuse_add(net, 1);
                } else {
                        __netns_tracker_alloc(net, &sk->ns_tracker,
                                              false, priority);
                }

                sock_net_set(sk, net);
                refcount_set(&sk->sk_wmem_alloc, 1);

                mem_cgroup_sk_alloc(sk);
                cgroup_sk_alloc(&sk->sk_cgrp_data);
                sock_update_classid(&sk->sk_cgrp_data);
                sock_update_netprioidx(&sk->sk_cgrp_data);
                sk_tx_queue_clear(sk);
        }

        return sk;
}
EXPORT_SYMBOL(sk_alloc);

/* Sockets having SOCK_RCU_FREE will call this function after one RCU
 * grace period. This is the case for UDP sockets and TCP listeners.
 */
static void __sk_destruct(struct rcu_head *head)
{
        struct sock *sk = container_of(head, struct sock, sk_rcu);
        struct sk_filter *filter;

        if (sk->sk_destruct)
                sk->sk_destruct(sk);

        filter = rcu_dereference_check(sk->sk_filter,
                                       refcount_read(&sk->sk_wmem_alloc) == 0);
        if (filter) {
                sk_filter_uncharge(sk, filter);
                RCU_INIT_POINTER(sk->sk_filter, NULL);
        }

        sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);

#ifdef CONFIG_BPF_SYSCALL
        bpf_sk_storage_free(sk);
#endif

        if (atomic_read(&sk->sk_omem_alloc))
                pr_debug("%s: optmem leakage (%d bytes) detected\n",
                         __func__, atomic_read(&sk->sk_omem_alloc));

        if (sk->sk_frag.page) {
                put_page(sk->sk_frag.page);
                sk->sk_frag.page = NULL;
        }

        /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
        put_cred(sk->sk_peer_cred);
        put_pid(sk->sk_peer_pid);

        if (likely(sk->sk_net_refcnt))
                put_net_track(sock_net(sk), &sk->ns_tracker);
        else
                __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);

        sk_prot_free(sk->sk_prot_creator, sk);
}

void sk_destruct(struct sock *sk)
{
        bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);

        if (rcu_access_pointer(sk->sk_reuseport_cb)) {
                reuseport_detach_sock(sk);
                use_call_rcu = true;
        }

        if (use_call_rcu)
                call_rcu(&sk->sk_rcu, __sk_destruct);
        else
                __sk_destruct(&sk->sk_rcu);
}

static void __sk_free(struct sock *sk)
{
        if (likely(sk->sk_net_refcnt))
                sock_inuse_add(sock_net(sk), -1);

        if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
                sock_diag_broadcast_destroy(sk);
        else
                sk_destruct(sk);
}

void sk_free(struct sock *sk)
{
        /*
         * We subtract one from sk_wmem_alloc and can know if
         * some packets are still in some tx queue.
         * If not null, sock_wfree() will call __sk_free(sk) later
         */
        if (refcount_dec_and_test(&sk->sk_wmem_alloc))
                __sk_free(sk);
}
EXPORT_SYMBOL(sk_free);

static void sk_init_common(struct sock *sk)
{
        skb_queue_head_init(&sk->sk_receive_queue);
        skb_queue_head_init(&sk->sk_write_queue);
        skb_queue_head_init(&sk->sk_error_queue);

        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
                        af_rlock_keys + sk->sk_family,
                        af_family_rlock_key_strings[sk->sk_family]);
        lockdep_set_class_and_name(&sk->sk_write_queue.lock,
                        af_wlock_keys + sk->sk_family,
                        af_family_wlock_key_strings[sk->sk_family]);
        lockdep_set_class_and_name(&sk->sk_error_queue.lock,
                        af_elock_keys + sk->sk_family,
                        af_family_elock_key_strings[sk->sk_family]);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
                        af_family_clock_key_strings[sk->sk_family]);
}

/**
 *        sk_clone_lock - clone a socket, and lock its clone
 *        @sk: the socket to clone
 *        @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
 *
 *        Caller must unlock socket even in error path (bh_unlock_sock(newsk))
 */
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
        struct proto *prot = READ_ONCE(sk->sk_prot);
        struct sk_filter *filter;
        bool is_charged = true;
        struct sock *newsk;

        newsk = sk_prot_alloc(prot, priority, sk->sk_family);
        if (!newsk)
                goto out;

        sock_copy(newsk, sk);

        newsk->sk_prot_creator = prot;

        /* SANITY */
        if (likely(newsk->sk_net_refcnt)) {
                get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
                sock_inuse_add(sock_net(newsk), 1);
        } else {
                /* Kernel sockets are not elevating the struct net refcount.
                 * Instead, use a tracker to more easily detect if a layer
                 * is not properly dismantling its kernel sockets at netns
                 * destroy time.
                 */
                __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
                                      false, priority);
        }
        sk_node_init(&newsk->sk_node);
        sock_lock_init(newsk);
        bh_lock_sock(newsk);
        newsk->sk_backlog.head        = newsk->sk_backlog.tail = NULL;
        newsk->sk_backlog.len = 0;

        atomic_set(&newsk->sk_rmem_alloc, 0);

        /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
        refcount_set(&newsk->sk_wmem_alloc, 1);

        atomic_set(&newsk->sk_omem_alloc, 0);
        sk_init_common(newsk);

        newsk->sk_dst_cache        = NULL;
        newsk->sk_dst_pending_confirm = 0;
        newsk->sk_wmem_queued        = 0;
        newsk->sk_forward_alloc = 0;
        newsk->sk_reserved_mem  = 0;
        atomic_set(&newsk->sk_drops, 0);
        newsk->sk_send_head        = NULL;
        newsk->sk_userlocks        = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
        atomic_set(&newsk->sk_zckey, 0);

        sock_reset_flag(newsk, SOCK_DONE);

        /* sk->sk_memcg will be populated at accept() time */
        newsk->sk_memcg = NULL;

        cgroup_sk_clone(&newsk->sk_cgrp_data);

        rcu_read_lock();
        filter = rcu_dereference(sk->sk_filter);
        if (filter != NULL)
                /* though it's an empty new sock, the charging may fail
                 * if sysctl_optmem_max was changed between creation of
                 * original socket and cloning
                 */
                is_charged = sk_filter_charge(newsk, filter);
        RCU_INIT_POINTER(newsk->sk_filter, filter);
        rcu_read_unlock();

        if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
                /* We need to make sure that we don't uncharge the new
                 * socket if we couldn't charge it in the first place
                 * as otherwise we uncharge the parent's filter.
                 */
                if (!is_charged)
                        RCU_INIT_POINTER(newsk->sk_filter, NULL);
                sk_free_unlock_clone(newsk);
                newsk = NULL;
                goto out;
        }
        RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);

        if (bpf_sk_storage_clone(sk, newsk)) {
                sk_free_unlock_clone(newsk);
                newsk = NULL;
                goto out;
        }

        /* Clear sk_user_data if parent had the pointer tagged
         * as not suitable for copying when cloning.
         */
        if (sk_user_data_is_nocopy(newsk))
                newsk->sk_user_data = NULL;

        newsk->sk_err           = 0;
        newsk->sk_err_soft = 0;
        newsk->sk_priority = 0;
        newsk->sk_incoming_cpu = raw_smp_processor_id();

        /* Before updating sk_refcnt, we must commit prior changes to memory
         * (Documentation/RCU/rculist_nulls.rst for details)
         */
        smp_wmb();
        refcount_set(&newsk->sk_refcnt, 2);

        sk_set_socket(newsk, NULL);
        sk_tx_queue_clear(newsk);
        RCU_INIT_POINTER(newsk->sk_wq, NULL);

        if (newsk->sk_prot->sockets_allocated)
                sk_sockets_allocated_inc(newsk);

        if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
                net_enable_timestamp();
out:
        return newsk;
}
EXPORT_SYMBOL_GPL(sk_clone_lock);

void sk_free_unlock_clone(struct sock *sk)
{
        /* It is still raw copy of parent, so invalidate
         * destructor and make plain sk_free() */
        sk->sk_destruct = NULL;
        bh_unlock_sock(sk);
        sk_free(sk);
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);

static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
{
        bool is_ipv6 = false;
        u32 max_size;

#if IS_ENABLED(CONFIG_IPV6)
        is_ipv6 = (sk->sk_family == AF_INET6 &&
                   !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
        /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
        max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
                        READ_ONCE(dst->dev->gso_ipv4_max_size);
        if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
                max_size = GSO_LEGACY_MAX_SIZE;

        return max_size - (MAX_TCP_HEADER + 1);
}

void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
        u32 max_segs = 1;

        sk->sk_route_caps = dst->dev->features;
        if (sk_is_tcp(sk))
                sk->sk_route_caps |= NETIF_F_GSO;
        if (sk->sk_route_caps & NETIF_F_GSO)
                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
        if (unlikely(sk->sk_gso_disabled))
                sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
        if (sk_can_gso(sk)) {
                if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
                } else {
                        sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
                        sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
                        /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
                        max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
                }
        }
        sk->sk_gso_max_segs = max_segs;
        sk_dst_set(sk, dst);
}
EXPORT_SYMBOL_GPL(sk_setup_caps);

/*
 *        Simple resource managers for sockets.
 */


/*
 * Write buffer destructor automatically called from kfree_skb.
 */
void sock_wfree(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;
        unsigned int len = skb->truesize;
        bool free;

        if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
                if (sock_flag(sk, SOCK_RCU_FREE) &&
                    sk->sk_write_space == sock_def_write_space) {
                        rcu_read_lock();
                        free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
                        sock_def_write_space_wfree(sk);
                        rcu_read_unlock();
                        if (unlikely(free))
                                __sk_free(sk);
                        return;
                }

                /*
                 * Keep a reference on sk_wmem_alloc, this will be released
                 * after sk_write_space() call
                 */
                WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
                sk->sk_write_space(sk);
                len = 1;
        }
        /*
         * if sk_wmem_alloc reaches 0, we must finish what sk_free()
         * could not do because of in-flight packets
         */
        if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
                __sk_free(sk);
}
EXPORT_SYMBOL(sock_wfree);

/* This variant of sock_wfree() is used by TCP,
 * since it sets SOCK_USE_WRITE_QUEUE.
 */
void __sock_wfree(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;

        if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
                __sk_free(sk);
}

void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
        skb_orphan(skb);
        skb->sk = sk;
#ifdef CONFIG_INET
        if (unlikely(!sk_fullsock(sk))) {
                skb->destructor = sock_edemux;
                sock_hold(sk);
                return;
        }
#endif
        skb->destructor = sock_wfree;
        skb_set_hash_from_sk(skb, sk);
        /*
         * We used to take a refcount on sk, but following operation
         * is enough to guarantee sk_free() wont free this sock until
         * all in-flight packets are completed
         */
        refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(skb_set_owner_w);

static bool can_skb_orphan_partial(const struct sk_buff *skb)
{
#ifdef CONFIG_TLS_DEVICE
        /* Drivers depend on in-order delivery for crypto offload,
         * partial orphan breaks out-of-order-OK logic.
         */
        if (skb->decrypted)
                return false;
#endif
        return (skb->destructor == sock_wfree ||
                (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
}

/* This helper is used by netem, as it can hold packets in its
 * delay queue. We want to allow the owner socket to send more
 * packets, as if they were already TX completed by a typical driver.
 * But we also want to keep skb->sk set because some packet schedulers
 * rely on it (sch_fq for example).
 */
void skb_orphan_partial(struct sk_buff *skb)
{
        if (skb_is_tcp_pure_ack(skb))
                return;

        if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
                return;

        skb_orphan(skb);
}
EXPORT_SYMBOL(skb_orphan_partial);

/*
 * Read buffer destructor automatically called from kfree_skb.
 */
void sock_rfree(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;
        unsigned int len = skb->truesize;

        atomic_sub(len, &sk->sk_rmem_alloc);
        sk_mem_uncharge(sk, len);
}
EXPORT_SYMBOL(sock_rfree);

/*
 * Buffer destructor for skbs that are not used directly in read or write
 * path, e.g. for error handler skbs. Automatically called from kfree_skb.
 */
void sock_efree(struct sk_buff *skb)
{
        sock_put(skb->sk);
}
EXPORT_SYMBOL(sock_efree);

/* Buffer destructor for prefetch/receive path where reference count may
 * not be held, e.g. for listen sockets.
 */
#ifdef CONFIG_INET
void sock_pfree(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;

        if (!sk_is_refcounted(sk))
                return;

        if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
                inet_reqsk(sk)->rsk_listener = NULL;
                reqsk_free(inet_reqsk(sk));
                return;
        }

        sock_gen_put(sk);
}
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */

kuid_t sock_i_uid(struct sock *sk)
{
        kuid_t uid;

        read_lock_bh(&sk->sk_callback_lock);
        uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
        read_unlock_bh(&sk->sk_callback_lock);
        return uid;
}
EXPORT_SYMBOL(sock_i_uid);

unsigned long __sock_i_ino(struct sock *sk)
{
        unsigned long ino;

        read_lock(&sk->sk_callback_lock);
        ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
        read_unlock(&sk->sk_callback_lock);
        return ino;
}
EXPORT_SYMBOL(__sock_i_ino);

unsigned long sock_i_ino(struct sock *sk)
{
        unsigned long ino;

        local_bh_disable();
        ino = __sock_i_ino(sk);
        local_bh_enable();
        return ino;
}
EXPORT_SYMBOL(sock_i_ino);

/*
 * Allocate a skb from the socket's send buffer.
 */
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
                             gfp_t priority)
{
        if (force ||
            refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
                struct sk_buff *skb = alloc_skb(size, priority);

                if (skb) {
                        skb_set_owner_w(skb, sk);
                        return skb;
                }
        }
        return NULL;
}
EXPORT_SYMBOL(sock_wmalloc);

static void sock_ofree(struct sk_buff *skb)
{
        struct sock *sk = skb->sk;

        atomic_sub(skb->truesize, &sk->sk_omem_alloc);
}

struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
                             gfp_t priority)
{
        struct sk_buff *skb;

        /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
        if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
            READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
                return NULL;

        skb = alloc_skb(size, priority);
        if (!skb)
                return NULL;

        atomic_add(skb->truesize, &sk->sk_omem_alloc);
        skb->sk = sk;
        skb->destructor = sock_ofree;
        return skb;
}

/*
 * Allocate a memory block from the socket's option memory buffer.
 */
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
        int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);

        if ((unsigned int)size <= optmem_max &&
            atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
                void *mem;
                /* First do the add, to avoid the race if kmalloc
                 * might sleep.
                 */
                atomic_add(size, &sk->sk_omem_alloc);
                mem = kmalloc(size, priority);
                if (mem)
                        return mem;
                atomic_sub(size, &sk->sk_omem_alloc);
        }
        return NULL;
}
EXPORT_SYMBOL(sock_kmalloc);

/* Free an option memory block. Note, we actually want the inline
 * here as this allows gcc to detect the nullify and fold away the
 * condition entirely.
 */
static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
                                  const bool nullify)
{
        if (WARN_ON_ONCE(!mem))
                return;
        if (nullify)
                kfree_sensitive(mem);
        else
                kfree(mem);
        atomic_sub(size, &sk->sk_omem_alloc);
}

void sock_kfree_s(struct sock *sk, void *mem, int size)
{
        __sock_kfree_s(sk, mem, size, false);
}
EXPORT_SYMBOL(sock_kfree_s);

void sock_kzfree_s(struct sock *sk, void *mem, int size)
{
        __sock_kfree_s(sk, mem, size, true);
}
EXPORT_SYMBOL(sock_kzfree_s);

/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
   I think, these locks should be removed for datagram sockets.
 */
static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
        DEFINE_WAIT(wait);

        sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
        for (;;) {
                if (!timeo)
                        break;
                if (signal_pending(current))
                        break;
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
                        break;
                if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        break;
                if (READ_ONCE(sk->sk_err))
                        break;
                timeo = schedule_timeout(timeo);
        }
        finish_wait(sk_sleep(sk), &wait);
        return timeo;
}


/*
 *        Generic send/receive buffer handlers
 */

struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                                     unsigned long data_len, int noblock,
                                     int *errcode, int max_page_order)
{
        struct sk_buff *skb;
        long timeo;
        int err;

        timeo = sock_sndtimeo(sk, noblock);
        for (;;) {
                err = sock_error(sk);
                if (err != 0)
                        goto failure;

                err = -EPIPE;
                if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        goto failure;

                if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
                        break;

                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                err = -EAGAIN;
                if (!timeo)
                        goto failure;
                if (signal_pending(current))
                        goto interrupted;
                timeo = sock_wait_for_wmem(sk, timeo);
        }
        skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
                                   errcode, sk->sk_allocation);
        if (skb)
                skb_set_owner_w(skb, sk);
        return skb;

interrupted:
        err = sock_intr_errno(timeo);
failure:
        *errcode = err;
        return NULL;
}
EXPORT_SYMBOL(sock_alloc_send_pskb);

int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
                     struct sockcm_cookie *sockc)
{
        u32 tsflags;

        switch (cmsg->cmsg_type) {
        case SO_MARK:
                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
                    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
                        return -EINVAL;
                sockc->mark = *(u32 *)CMSG_DATA(cmsg);
                break;
        case SO_TIMESTAMPING_OLD:
        case SO_TIMESTAMPING_NEW:
                if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
                        return -EINVAL;

                tsflags = *(u32 *)CMSG_DATA(cmsg);
                if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
                        return -EINVAL;

                sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
                sockc->tsflags |= tsflags;
                break;
        case SCM_TXTIME:
                if (!sock_flag(sk, SOCK_TXTIME))
                        return -EINVAL;
                if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
                        return -EINVAL;
                sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
                break;
        /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
        case SCM_RIGHTS:
        case SCM_CREDENTIALS:
                break;
        default:
                return -EINVAL;
        }
        return 0;
}
EXPORT_SYMBOL(__sock_cmsg_send);

int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
                   struct sockcm_cookie *sockc)
{
        struct cmsghdr *cmsg;
        int ret;

        for_each_cmsghdr(cmsg, msg) {
                if (!CMSG_OK(msg, cmsg))
                        return -EINVAL;
                if (cmsg->cmsg_level != SOL_SOCKET)
                        continue;
                ret = __sock_cmsg_send(sk, cmsg, sockc);
                if (ret)
                        return ret;
        }
        return 0;
}
EXPORT_SYMBOL(sock_cmsg_send);

static void sk_enter_memory_pressure(struct sock *sk)
{
        if (!sk->sk_prot->enter_memory_pressure)
                return;

        sk->sk_prot->enter_memory_pressure(sk);
}

static void sk_leave_memory_pressure(struct sock *sk)
{
        if (sk->sk_prot->leave_memory_pressure) {
                INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
                                     tcp_leave_memory_pressure, sk);
        } else {
                unsigned long *memory_pressure = sk->sk_prot->memory_pressure;

                if (memory_pressure && READ_ONCE(*memory_pressure))
                        WRITE_ONCE(*memory_pressure, 0);
        }
}

DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);

/**
 * skb_page_frag_refill - check that a page_frag contains enough room
 * @sz: minimum size of the fragment we want to get
 * @pfrag: pointer to page_frag
 * @gfp: priority for memory allocation
 *
 * Note: While this allocator tries to use high order pages, there is
 * no guarantee that allocations succeed. Therefore, @sz MUST be
 * less or equal than PAGE_SIZE.
 */
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
{
        if (pfrag->page) {
                if (page_ref_count(pfrag->page) == 1) {
                        pfrag->offset = 0;
                        return true;
                }
                if (pfrag->offset + sz <= pfrag->size)
                        return true;
                put_page(pfrag->page);
        }

        pfrag->offset = 0;
        if (SKB_FRAG_PAGE_ORDER &&
            !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
                /* Avoid direct reclaim but allow kswapd to wake */
                pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
                                          __GFP_COMP | __GFP_NOWARN |
                                          __GFP_NORETRY,
                                          SKB_FRAG_PAGE_ORDER);
                if (likely(pfrag->page)) {
                        pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
                        return true;
                }
        }
        pfrag->page = alloc_page(gfp);
        if (likely(pfrag->page)) {
                pfrag->size = PAGE_SIZE;
                return true;
        }
        return false;
}
EXPORT_SYMBOL(skb_page_frag_refill);

bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
{
        if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
                return true;

        sk_enter_memory_pressure(sk);
        sk_stream_moderate_sndbuf(sk);
        return false;
}
EXPORT_SYMBOL(sk_page_frag_refill);

void __lock_sock(struct sock *sk)
        __releases(&sk->sk_lock.slock)
        __acquires(&sk->sk_lock.slock)
{
        DEFINE_WAIT(wait);

        for (;;) {
                prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
                                        TASK_UNINTERRUPTIBLE);
                spin_unlock_bh(&sk->sk_lock.slock);
                schedule();
                spin_lock_bh(&sk->sk_lock.slock);
                if (!sock_owned_by_user(sk))
                        break;
        }
        finish_wait(&sk->sk_lock.wq, &wait);
}

void __release_sock(struct sock *sk)
        __releases(&sk->sk_lock.slock)
        __acquires(&sk->sk_lock.slock)
{
        struct sk_buff *skb, *next;

        while ((skb = sk->sk_backlog.head) != NULL) {
                sk->sk_backlog.head = sk->sk_backlog.tail = NULL;

                spin_unlock_bh(&sk->sk_lock.slock);

                do {
                        next = skb->next;
                        prefetch(next);
                        DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
                        skb_mark_not_on_list(skb);
                        sk_backlog_rcv(sk, skb);

                        cond_resched();

                        skb = next;
                } while (skb != NULL);

                spin_lock_bh(&sk->sk_lock.slock);
        }

        /*
         * Doing the zeroing here guarantee we can not loop forever
         * while a wild producer attempts to flood us.
         */
        sk->sk_backlog.len = 0;
}

void __sk_flush_backlog(struct sock *sk)
{
        spin_lock_bh(&sk->sk_lock.slock);
        __release_sock(sk);

        if (sk->sk_prot->release_cb)
                INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
                                     tcp_release_cb, sk);

        spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL_GPL(__sk_flush_backlog);

/**
 * sk_wait_data - wait for data to arrive at sk_receive_queue
 * @sk:    sock to wait on
 * @timeo: for how long
 * @skb:   last skb seen on sk_receive_queue
 *
 * Now socket state including sk->sk_err is changed only under lock,
 * hence we may omit checks after joining wait queue.
 * We check receive queue before schedule() only as optimization;
 * it is very likely that release_sock() added new data.
 */
int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
{
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int rc;

        add_wait_queue(sk_sleep(sk), &wait);
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
        remove_wait_queue(sk_sleep(sk), &wait);
        return rc;
}
EXPORT_SYMBOL(sk_wait_data);

/**
 *        __sk_mem_raise_allocated - increase memory_allocated
 *        @sk: socket
 *        @size: memory size to allocate
 *        @amt: pages to allocate
 *        @kind: allocation type
 *
 *        Similar to __sk_mem_schedule(), but does not update sk_forward_alloc.
 *
 *        Unlike the globally shared limits among the sockets under same protocol,
 *        consuming the budget of a memcg won't have direct effect on other ones.
 *        So be optimistic about memcg's tolerance, and leave the callers to decide
 *        whether or not to raise allocated through sk_under_memory_pressure() or
 *        its variants.
 */
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
        struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
        struct proto *prot = sk->sk_prot;
        bool charged = false;
        long allocated;

        sk_memory_allocated_add(sk, amt);
        allocated = sk_memory_allocated(sk);

        if (memcg) {
                if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
                        goto suppress_allocation;
                charged = true;
        }

        /* Under limit. */
        if (allocated <= sk_prot_mem_limits(sk, 0)) {
                sk_leave_memory_pressure(sk);
                return 1;
        }

        /* Under pressure. */
        if (allocated > sk_prot_mem_limits(sk, 1))
                sk_enter_memory_pressure(sk);

        /* Over hard limit. */
        if (allocated > sk_prot_mem_limits(sk, 2))
                goto suppress_allocation;

        /* Guarantee minimum buffer size under pressure (either global
         * or memcg) to make sure features described in RFC 7323 (TCP
         * Extensions for High Performance) work properly.
         *
         * This rule does NOT stand when exceeds global or memcg's hard
         * limit, or else a DoS attack can be taken place by spawning
         * lots of sockets whose usage are under minimum buffer size.
         */
        if (kind == SK_MEM_RECV) {
                if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
                        return 1;

        } else { /* SK_MEM_SEND */
                int wmem0 = sk_get_wmem0(sk, prot);

                if (sk->sk_type == SOCK_STREAM) {
                        if (sk->sk_wmem_queued < wmem0)
                                return 1;
                } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
                                return 1;
                }
        }

        if (sk_has_memory_pressure(sk)) {
                u64 alloc;

                /* The following 'average' heuristic is within the
                 * scope of global accounting, so it only makes
                 * sense for global memory pressure.
                 */
                if (!sk_under_global_memory_pressure(sk))
                        return 1;

                /* Try to be fair among all the sockets under global
                 * pressure by allowing the ones that below average
                 * usage to raise.
                 */
                alloc = sk_sockets_allocated_read_positive(sk);
                if (sk_prot_mem_limits(sk, 2) > alloc *
                    sk_mem_pages(sk->sk_wmem_queued +
                                 atomic_read(&sk->sk_rmem_alloc) +
                                 sk->sk_forward_alloc))
                        return 1;
        }

suppress_allocation:

        if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
                sk_stream_moderate_sndbuf(sk);

                /* Fail only if socket is _under_ its sndbuf.
                 * In this case we cannot block, so that we have to fail.
                 */
                if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
                        /* Force charge with __GFP_NOFAIL */
                        if (memcg && !charged) {
                                mem_cgroup_charge_skmem(memcg, amt,
                                        gfp_memcg_charge() | __GFP_NOFAIL);
                        }
                        return 1;
                }
        }

        if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
                trace_sock_exceed_buf_limit(sk, prot, allocated, kind);

        sk_memory_allocated_sub(sk, amt);

        if (charged)
                mem_cgroup_uncharge_skmem(memcg, amt);

        return 0;
}

/**
 *        __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
 *        @sk: socket
 *        @size: memory size to allocate
 *        @kind: allocation type
 *
 *        If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
 *        rmem allocation. This function assumes that protocols which have
 *        memory_pressure use sk_wmem_queued as write buffer accounting.
 */
int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
        int ret, amt = sk_mem_pages(size);

        sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
        ret = __sk_mem_raise_allocated(sk, size, amt, kind);
        if (!ret)
                sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
        return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);

/**
 *        __sk_mem_reduce_allocated - reclaim memory_allocated
 *        @sk: socket
 *        @amount: number of quanta
 *
 *        Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
 */
void __sk_mem_reduce_allocated(struct sock *sk, int amount)
{
        sk_memory_allocated_sub(sk, amount);

        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);

        if (sk_under_global_memory_pressure(sk) &&
            (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
                sk_leave_memory_pressure(sk);
}

/**
 *        __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
 *        @sk: socket
 *        @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
 */
void __sk_mem_reclaim(struct sock *sk, int amount)
{
        amount >>= PAGE_SHIFT;
        sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
        __sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);

int sk_set_peek_off(struct sock *sk, int val)
{
        WRITE_ONCE(sk->sk_peek_off, val);
        return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);

/*
 * Set of default routines for initialising struct proto_ops when
 * the protocol does not support a particular function. In certain
 * cases where it makes no sense for a protocol to have a "do nothing"
 * function, some default processing is provided.
 */

int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_bind);

int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
                    int len, int flags)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_connect);

int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_socketpair);

int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
                   bool kern)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_accept);

int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
                    int peer)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_getname);

int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_ioctl);

int sock_no_listen(struct socket *sock, int backlog)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_listen);

int sock_no_shutdown(struct socket *sock, int how)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_shutdown);

int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_sendmsg);

int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_sendmsg_locked);

int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
                    int flags)
{
        return -EOPNOTSUPP;
}
EXPORT_SYMBOL(sock_no_recvmsg);

int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
{
        /* Mirror missing mmap method error code */
        return -ENODEV;
}
EXPORT_SYMBOL(sock_no_mmap);

/*
 * When a file is received (via SCM_RIGHTS, etc), we must bump the
 * various sock-based usage counts.
 */
void __receive_sock(struct file *file)
{
        struct socket *sock;

        sock = sock_from_file(file);
        if (sock) {
                sock_update_netprioidx(&sock->sk->sk_cgrp_data);
                sock_update_classid(&sock->sk->sk_cgrp_data);
        }
}

/*
 *        Default Socket Callbacks
 */

static void sock_def_wakeup(struct sock *sk)
{
        struct socket_wq *wq;

        rcu_read_lock();
        wq = rcu_dereference(sk->sk_wq);
        if (skwq_has_sleeper(wq))
                wake_up_interruptible_all(&wq->wait);
        rcu_read_unlock();
}

static void sock_def_error_report(struct sock *sk)
{
        struct socket_wq *wq;

        rcu_read_lock();
        wq = rcu_dereference(sk->sk_wq);
        if (skwq_has_sleeper(wq))
                wake_up_interruptible_poll(&wq->wait, EPOLLERR);
        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
        rcu_read_unlock();
}

void sock_def_readable(struct sock *sk)
{
        struct socket_wq *wq;

        trace_sk_data_ready(sk);

        rcu_read_lock();
        wq = rcu_dereference(sk->sk_wq);
        if (skwq_has_sleeper(wq))
                wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
                                                EPOLLRDNORM | EPOLLRDBAND);
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
        rcu_read_unlock();
}

static void sock_def_write_space(struct sock *sk)
{
        struct socket_wq *wq;

        rcu_read_lock();

        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
        if (sock_writeable(sk)) {
                wq = rcu_dereference(sk->sk_wq);
                if (skwq_has_sleeper(wq))
                        wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
                                                EPOLLWRNORM | EPOLLWRBAND);

                /* Should agree with poll, otherwise some programs break */
                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
        }

        rcu_read_unlock();
}

/* An optimised version of sock_def_write_space(), should only be called
 * for SOCK_RCU_FREE sockets under RCU read section and after putting
 * ->sk_wmem_alloc.
 */
static void sock_def_write_space_wfree(struct sock *sk)
{
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
        if (sock_writeable(sk)) {
                struct socket_wq *wq = rcu_dereference(sk->sk_wq);

                /* rely on refcount_sub from sock_wfree() */
                smp_mb__after_atomic();
                if (wq && waitqueue_active(&wq->wait))
                        wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
                                                EPOLLWRNORM | EPOLLWRBAND);

                /* Should agree with poll, otherwise some programs break */
                sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
        }
}

static void sock_def_destruct(struct sock *sk)
{
}

void sk_send_sigurg(struct sock *sk)
{
        if (sk->sk_socket && sk->sk_socket->file)
                if (send_sigurg(&sk->sk_socket->file->f_owner))
                        sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
EXPORT_SYMBOL(sk_send_sigurg);

void sk_reset_timer(struct sock *sk, struct timer_list* timer,
                    unsigned long expires)
{
        if (!mod_timer(timer, expires))
                sock_hold(sk);
}
EXPORT_SYMBOL(sk_reset_timer);

void sk_stop_timer(struct sock *sk, struct timer_list* timer)
{
        if (del_timer(timer))
                __sock_put(sk);
}
EXPORT_SYMBOL(sk_stop_timer);

void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
{
        if (del_timer_sync(timer))
                __sock_put(sk);
}
EXPORT_SYMBOL(sk_stop_timer_sync);

void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
{
        sk_init_common(sk);
        sk->sk_send_head        =        NULL;

        timer_setup(&sk->sk_timer, NULL, 0);

        sk->sk_allocation        =        GFP_KERNEL;
        sk->sk_rcvbuf                =        READ_ONCE(sysctl_rmem_default);
        sk->sk_sndbuf                =        READ_ONCE(sysctl_wmem_default);
        sk->sk_state                =        TCP_CLOSE;
        sk->sk_use_task_frag        =        true;
        sk_set_socket(sk, sock);

        sock_set_flag(sk, SOCK_ZAPPED);

        if (sock) {
                sk->sk_type        =        sock->type;
                RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
                sock->sk        =        sk;
        } else {
                RCU_INIT_POINTER(sk->sk_wq, NULL);
        }
        sk->sk_uid        =        uid;

        rwlock_init(&sk->sk_callback_lock);
        if (sk->sk_kern_sock)
                lockdep_set_class_and_name(
                        &sk->sk_callback_lock,
                        af_kern_callback_keys + sk->sk_family,
                        af_family_kern_clock_key_strings[sk->sk_family]);
        else
                lockdep_set_class_and_name(
                        &sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
                        af_family_clock_key_strings[sk->sk_family]);

        sk->sk_state_change        =        sock_def_wakeup;
        sk->sk_data_ready        =        sock_def_readable;
        sk->sk_write_space        =        sock_def_write_space;
        sk->sk_error_report        =        sock_def_error_report;
        sk->sk_destruct                =        sock_def_destruct;

        sk->sk_frag.page        =        NULL;
        sk->sk_frag.offset        =        0;
        sk->sk_peek_off                =        -1;

        sk->sk_peer_pid         =        NULL;
        sk->sk_peer_cred        =        NULL;
        spin_lock_init(&sk->sk_peer_lock);

        sk->sk_write_pending        =        0;
        sk->sk_rcvlowat                =        1;
        sk->sk_rcvtimeo                =        MAX_SCHEDULE_TIMEOUT;
        sk->sk_sndtimeo                =        MAX_SCHEDULE_TIMEOUT;

        sk->sk_stamp = SK_DEFAULT_STAMP;
#if BITS_PER_LONG==32
        seqlock_init(&sk->sk_stamp_seq);
#endif
        atomic_set(&sk->sk_zckey, 0);

#ifdef CONFIG_NET_RX_BUSY_POLL
        sk->sk_napi_id                =        0;
        sk->sk_ll_usec                =        READ_ONCE(sysctl_net_busy_read);
#endif

        sk->sk_max_pacing_rate = ~0UL;
        sk->sk_pacing_rate = ~0UL;
        WRITE_ONCE(sk->sk_pacing_shift, 10);
        sk->sk_incoming_cpu = -1;

        sk_rx_queue_clear(sk);
        /*
         * Before updating sk_refcnt, we must commit prior changes to memory
         * (Documentation/RCU/rculist_nulls.rst for details)
         */
        smp_wmb();
        refcount_set(&sk->sk_refcnt, 1);
        atomic_set(&sk->sk_drops, 0);
}
EXPORT_SYMBOL(sock_init_data_uid);

void sock_init_data(struct socket *sock, struct sock *sk)
{
        kuid_t uid = sock ?
                SOCK_INODE(sock)->i_uid :
                make_kuid(sock_net(sk)->user_ns, 0);

        sock_init_data_uid(sock, sk, uid);
}
EXPORT_SYMBOL(sock_init_data);

void lock_sock_nested(struct sock *sk, int subclass)
{
        /* The sk_lock has mutex_lock() semantics here. */
        mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);

        might_sleep();
        spin_lock_bh(&sk->sk_lock.slock);
        if (sock_owned_by_user_nocheck(sk))
                __lock_sock(sk);
        sk->sk_lock.owned = 1;
        spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);

void release_sock(struct sock *sk)
{
        spin_lock_bh(&sk->sk_lock.slock);
        if (sk->sk_backlog.tail)
                __release_sock(sk);

        if (sk->sk_prot->release_cb)
                INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
                                     tcp_release_cb, sk);

        sock_release_ownership(sk);
        if (waitqueue_active(&sk->sk_lock.wq))
                wake_up(&sk->sk_lock.wq);
        spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(release_sock);

bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
        might_sleep();
        spin_lock_bh(&sk->sk_lock.slock);

        if (!sock_owned_by_user_nocheck(sk)) {
                /*
                 * Fast path return with bottom halves disabled and
                 * sock::sk_lock.slock held.
                 *
                 * The 'mutex' is not contended and holding
                 * sock::sk_lock.slock prevents all other lockers to
                 * proceed so the corresponding unlock_sock_fast() can
                 * avoid the slow path of release_sock() completely and
                 * just release slock.
                 *
                 * From a semantical POV this is equivalent to 'acquiring'
                 * the 'mutex', hence the corresponding lockdep
                 * mutex_release() has to happen in the fast path of
                 * unlock_sock_fast().
                 */
                return false;
        }

        __lock_sock(sk);
        sk->sk_lock.owned = 1;
        __acquire(&sk->sk_lock.slock);
        spin_unlock_bh(&sk->sk_lock.slock);
        return true;
}
EXPORT_SYMBOL(__lock_sock_fast);

int sock_gettstamp(struct socket *sock, void __user *userstamp,
                   bool timeval, bool time32)
{
        struct sock *sk = sock->sk;
        struct timespec64 ts;

        sock_enable_timestamp(sk, SOCK_TIMESTAMP);
        ts = ktime_to_timespec64(sock_read_timestamp(sk));
        if (ts.tv_sec == -1)
                return -ENOENT;
        if (ts.tv_sec == 0) {
                ktime_t kt = ktime_get_real();
                sock_write_timestamp(sk, kt);
                ts = ktime_to_timespec64(kt);
        }

        if (timeval)
                ts.tv_nsec /= 1000;

#ifdef CONFIG_COMPAT_32BIT_TIME
        if (time32)
                return put_old_timespec32(&ts, userstamp);
#endif
#ifdef CONFIG_SPARC64
        /* beware of padding in sparc64 timeval */
        if (timeval && !in_compat_syscall()) {
                struct __kernel_old_timeval __user tv = {
                        .tv_sec = ts.tv_sec,
                        .tv_usec = ts.tv_nsec,
                };
                if (copy_to_user(userstamp, &tv, sizeof(tv)))
                        return -EFAULT;
                return 0;
        }
#endif
        return put_timespec64(&ts, userstamp);
}
EXPORT_SYMBOL(sock_gettstamp);

void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
{
        if (!sock_flag(sk, flag)) {
                unsigned long previous_flags = sk->sk_flags;

                sock_set_flag(sk, flag);
                /*
                 * we just set one of the two flags which require net
                 * time stamping, but time stamping might have been on
                 * already because of the other one
                 */
                if (sock_needs_netstamp(sk) &&
                    !(previous_flags & SK_FLAGS_TIMESTAMP))
                        net_enable_timestamp();
        }
}

int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
                       int level, int type)
{
        struct sock_exterr_skb *serr;
        struct sk_buff *skb;
        int copied, err;

        err = -EAGAIN;
        skb = sock_dequeue_err_skb(sk);
        if (skb == NULL)
                goto out;

        copied = skb->len;
        if (copied > len) {
                msg->msg_flags |= MSG_TRUNC;
                copied = len;
        }
        err = skb_copy_datagram_msg(skb, 0, msg, copied);
        if (err)
                goto out_free_skb;

        sock_recv_timestamp(msg, sk, skb);

        serr = SKB_EXT_ERR(skb);
        put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);

        msg->msg_flags |= MSG_ERRQUEUE;
        err = copied;

out_free_skb:
        kfree_skb(skb);
out:
        return err;
}
EXPORT_SYMBOL(sock_recv_errqueue);

/*
 *        Get a socket option on an socket.
 *
 *        FIX: POSIX 1003.1g is very ambiguous here. It states that
 *        asynchronous errors should be reported by getsockopt. We assume
 *        this means if you specify SO_ERROR (otherwise whats the point of it).
 */
int sock_common_getsockopt(struct socket *sock, int level, int optname,
                           char __user *optval, int __user *optlen)
{
        struct sock *sk = sock->sk;

        /* IPV6_ADDRFORM can change sk->sk_prot under us. */
        return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(sock_common_getsockopt);

int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        int flags)
{
        struct sock *sk = sock->sk;
        int addr_len = 0;
        int err;

        err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
        if (err >= 0)
                msg->msg_namelen = addr_len;
        return err;
}
EXPORT_SYMBOL(sock_common_recvmsg);

/*
 *        Set socket options on an inet socket.
 */
int sock_common_setsockopt(struct socket *sock, int level, int optname,
                           sockptr_t optval, unsigned int optlen)
{
        struct sock *sk = sock->sk;

        /* IPV6_ADDRFORM can change sk->sk_prot under us. */
        return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(sock_common_setsockopt);

void sk_common_release(struct sock *sk)
{
        if (sk->sk_prot->destroy)
                sk->sk_prot->destroy(sk);

        /*
         * Observation: when sk_common_release is called, processes have
         * no access to socket. But net still has.
         * Step one, detach it from networking:
         *
         * A. Remove from hash tables.
         */

        sk->sk_prot->unhash(sk);

        /*
         * In this point socket cannot receive new packets, but it is possible
         * that some packets are in flight because some CPU runs receiver and
         * did hash table lookup before we unhashed socket. They will achieve
         * receive queue and will be purged by socket destructor.
         *
         * Also we still have packets pending on receive queue and probably,
         * our own packets waiting in device queues. sock_destroy will drain
         * receive queue, but transmitted packets will delay socket destruction
         * until the last reference will be released.
         */

        sock_orphan(sk);

        xfrm_sk_free_policy(sk);

        sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);

void sk_get_meminfo(const struct sock *sk, u32 *mem)
{
        memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);

        mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
        mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
        mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
        mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
        mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
        mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
        mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
        mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
        mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
}

#ifdef CONFIG_PROC_FS
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);

int sock_prot_inuse_get(struct net *net, struct proto *prot)
{
        int cpu, idx = prot->inuse_idx;
        int res = 0;

        for_each_possible_cpu(cpu)
                res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];

        return res >= 0 ? res : 0;
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_get);

int sock_inuse_get(struct net *net)
{
        int cpu, res = 0;

        for_each_possible_cpu(cpu)
                res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;

        return res;
}

EXPORT_SYMBOL_GPL(sock_inuse_get);

static int __net_init sock_inuse_init_net(struct net *net)
{
        net->core.prot_inuse = alloc_percpu(struct prot_inuse);
        if (net->core.prot_inuse == NULL)
                return -ENOMEM;
        return 0;
}

static void __net_exit sock_inuse_exit_net(struct net *net)
{
        free_percpu(net->core.prot_inuse);
}

static struct pernet_operations net_inuse_ops = {
        .init = sock_inuse_init_net,
        .exit = sock_inuse_exit_net,
};

static __init int net_inuse_init(void)
{
        if (register_pernet_subsys(&net_inuse_ops))
                panic("Cannot initialize net inuse counters");

        return 0;
}

core_initcall(net_inuse_init);

static int assign_proto_idx(struct proto *prot)
{
        prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);

        if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
                pr_err("PROTO_INUSE_NR exhausted\n");
                return -ENOSPC;
        }

        set_bit(prot->inuse_idx, proto_inuse_idx);
        return 0;
}

static void release_proto_idx(struct proto *prot)
{
        if (prot->inuse_idx != PROTO_INUSE_NR - 1)
                clear_bit(prot->inuse_idx, proto_inuse_idx);
}
#else
static inline int assign_proto_idx(struct proto *prot)
{
        return 0;
}

static inline void release_proto_idx(struct proto *prot)
{
}

#endif

static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
{
        if (!twsk_prot)
                return;
        kfree(twsk_prot->twsk_slab_name);
        twsk_prot->twsk_slab_name = NULL;
        kmem_cache_destroy(twsk_prot->twsk_slab);
        twsk_prot->twsk_slab = NULL;
}

static int tw_prot_init(const struct proto *prot)
{
        struct timewait_sock_ops *twsk_prot = prot->twsk_prot;

        if (!twsk_prot)
                return 0;

        twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
                                              prot->name);
        if (!twsk_prot->twsk_slab_name)
                return -ENOMEM;

        twsk_prot->twsk_slab =
                kmem_cache_create(twsk_prot->twsk_slab_name,
                                  twsk_prot->twsk_obj_size, 0,
                                  SLAB_ACCOUNT | prot->slab_flags,
                                  NULL);
        if (!twsk_prot->twsk_slab) {
                pr_crit("%s: Can't create timewait sock SLAB cache!\n",
                        prot->name);
                return -ENOMEM;
        }

        return 0;
}

static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
{
        if (!rsk_prot)
                return;
        kfree(rsk_prot->slab_name);
        rsk_prot->slab_name = NULL;
        kmem_cache_destroy(rsk_prot->slab);
        rsk_prot->slab = NULL;
}

static int req_prot_init(const struct proto *prot)
{
        struct request_sock_ops *rsk_prot = prot->rsk_prot;

        if (!rsk_prot)
                return 0;

        rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
                                        prot->name);
        if (!rsk_prot->slab_name)
                return -ENOMEM;

        rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
                                           rsk_prot->obj_size, 0,
                                           SLAB_ACCOUNT | prot->slab_flags,
                                           NULL);

        if (!rsk_prot->slab) {
                pr_crit("%s: Can't create request sock SLAB cache!\n",
                        prot->name);
                return -ENOMEM;
        }
        return 0;
}

int proto_register(struct proto *prot, int alloc_slab)
{
        int ret = -ENOBUFS;

        if (prot->memory_allocated && !prot->sysctl_mem) {
                pr_err("%s: missing sysctl_mem\n", prot->name);
                return -EINVAL;
        }
        if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
                pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
                return -EINVAL;
        }
        if (alloc_slab) {
                prot->slab = kmem_cache_create_usercopy(prot->name,
                                        prot->obj_size, 0,
                                        SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
                                        prot->slab_flags,
                                        prot->useroffset, prot->usersize,
                                        NULL);

                if (prot->slab == NULL) {
                        pr_crit("%s: Can't create sock SLAB cache!\n",
                                prot->name);
                        goto out;
                }

                if (req_prot_init(prot))
                        goto out_free_request_sock_slab;

                if (tw_prot_init(prot))
                        goto out_free_timewait_sock_slab;
        }

        mutex_lock(&proto_list_mutex);
        ret = assign_proto_idx(prot);
        if (ret) {
                mutex_unlock(&proto_list_mutex);
                goto out_free_timewait_sock_slab;
        }
        list_add(&prot->node, &proto_list);
        mutex_unlock(&proto_list_mutex);
        return ret;

out_free_timewait_sock_slab:
        if (alloc_slab)
                tw_prot_cleanup(prot->twsk_prot);
out_free_request_sock_slab:
        if (alloc_slab) {
                req_prot_cleanup(prot->rsk_prot);

                kmem_cache_destroy(prot->slab);
                prot->slab = NULL;
        }
out:
        return ret;
}
EXPORT_SYMBOL(proto_register);

void proto_unregister(struct proto *prot)
{
        mutex_lock(&proto_list_mutex);
        release_proto_idx(prot);
        list_del(&prot->node);
        mutex_unlock(&proto_list_mutex);

        kmem_cache_destroy(prot->slab);
        prot->slab = NULL;

        req_prot_cleanup(prot->rsk_prot);
        tw_prot_cleanup(prot->twsk_prot);
}
EXPORT_SYMBOL(proto_unregister);

int sock_load_diag_module(int family, int protocol)
{
        if (!protocol) {
                if (!sock_is_registered(family))
                        return -ENOENT;

                return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
                                      NETLINK_SOCK_DIAG, family);
        }

#ifdef CONFIG_INET
        if (family == AF_INET &&
            protocol != IPPROTO_RAW &&
            protocol < MAX_INET_PROTOS &&
            !rcu_access_pointer(inet_protos[protocol]))
                return -ENOENT;
#endif

        return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
                              NETLINK_SOCK_DIAG, family, protocol);
}
EXPORT_SYMBOL(sock_load_diag_module);

#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(proto_list_mutex)
{
        mutex_lock(&proto_list_mutex);
        return seq_list_start_head(&proto_list, *pos);
}

static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        return seq_list_next(v, &proto_list, pos);
}

static void proto_seq_stop(struct seq_file *seq, void *v)
        __releases(proto_list_mutex)
{
        mutex_unlock(&proto_list_mutex);
}

static char proto_method_implemented(const void *method)
{
        return method == NULL ? 'n' : 'y';
}
static long sock_prot_memory_allocated(struct proto *proto)
{
        return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
}

static const char *sock_prot_memory_pressure(struct proto *proto)
{
        return proto->memory_pressure != NULL ?
        proto_memory_pressure(proto) ? "yes" : "no" : "NI";
}

static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{

        seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
                        "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
                   proto->name,
                   proto->obj_size,
                   sock_prot_inuse_get(seq_file_net(seq), proto),
                   sock_prot_memory_allocated(proto),
                   sock_prot_memory_pressure(proto),
                   proto->max_header,
                   proto->slab == NULL ? "no" : "yes",
                   module_name(proto->owner),
                   proto_method_implemented(proto->close),
                   proto_method_implemented(proto->connect),
                   proto_method_implemented(proto->disconnect),
                   proto_method_implemented(proto->accept),
                   proto_method_implemented(proto->ioctl),
                   proto_method_implemented(proto->init),
                   proto_method_implemented(proto->destroy),
                   proto_method_implemented(proto->shutdown),
                   proto_method_implemented(proto->setsockopt),
                   proto_method_implemented(proto->getsockopt),
                   proto_method_implemented(proto->sendmsg),
                   proto_method_implemented(proto->recvmsg),
                   proto_method_implemented(proto->bind),
                   proto_method_implemented(proto->backlog_rcv),
                   proto_method_implemented(proto->hash),
                   proto_method_implemented(proto->unhash),
                   proto_method_implemented(proto->get_port),
                   proto_method_implemented(proto->enter_memory_pressure));
}

static int proto_seq_show(struct seq_file *seq, void *v)
{
        if (v == &proto_list)
                seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
                           "protocol",
                           "size",
                           "sockets",
                           "memory",
                           "press",
                           "maxhdr",
                           "slab",
                           "module",
                           "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
        else
                proto_seq_printf(seq, list_entry(v, struct proto, node));
        return 0;
}

static const struct seq_operations proto_seq_ops = {
        .start  = proto_seq_start,
        .next   = proto_seq_next,
        .stop   = proto_seq_stop,
        .show   = proto_seq_show,
};

static __net_init int proto_init_net(struct net *net)
{
        if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
                        sizeof(struct seq_net_private)))
                return -ENOMEM;

        return 0;
}

static __net_exit void proto_exit_net(struct net *net)
{
        remove_proc_entry("protocols", net->proc_net);
}


static __net_initdata struct pernet_operations proto_net_ops = {
        .init = proto_init_net,
        .exit = proto_exit_net,
};

static int __init proto_init(void)
{
        return register_pernet_subsys(&proto_net_ops);
}

subsys_initcall(proto_init);

#endif /* PROC_FS */

#ifdef CONFIG_NET_RX_BUSY_POLL
bool sk_busy_loop_end(void *p, unsigned long start_time)
{
        struct sock *sk = p;

        if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
                return true;

        if (sk_is_udp(sk) &&
            !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
                return true;

        return sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */

int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
{
        if (!sk->sk_prot->bind_add)
                return -EOPNOTSUPP;
        return sk->sk_prot->bind_add(sk, addr, addr_len);
}
EXPORT_SYMBOL(sock_bind_add);

/* Copy 'size' bytes from userspace and return `size` back to userspace */
int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
                     void __user *arg, void *karg, size_t size)
{
        int ret;

        if (copy_from_user(karg, arg, size))
                return -EFAULT;

        ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
        if (ret)
                return ret;

        if (copy_to_user(arg, karg, size))
                return -EFAULT;

        return 0;
}
EXPORT_SYMBOL(sock_ioctl_inout);

/* This is the most common ioctl prep function, where the result (4 bytes) is
 * copied back to userspace if the ioctl() returns successfully. No input is
 * copied from userspace as input argument.
 */
static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
{
        int ret, karg = 0;

        ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
        if (ret)
                return ret;

        return put_user(karg, (int __user *)arg);
}

/* A wrapper around sock ioctls, which copies the data from userspace
 * (depending on the protocol/ioctl), and copies back the result to userspace.
 * The main motivation for this function is to pass kernel memory to the
 * protocol ioctl callbacks, instead of userspace memory.
 */
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
        int rc = 1;

        if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
                rc = ipmr_sk_ioctl(sk, cmd, arg);
        else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
                rc = ip6mr_sk_ioctl(sk, cmd, arg);
        else if (sk_is_phonet(sk))
                rc = phonet_sk_ioctl(sk, cmd, arg);

        /* If ioctl was processed, returns its value */
        if (rc <= 0)
                return rc;

        /* Otherwise call the default handler */
        return sock_ioctl_out(sk, cmd, arg);
}
EXPORT_SYMBOL(sk_ioctl);

static int __init sock_struct_check(void)
{
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);

        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);

        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);

        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);

        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);

        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
        CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
        return 0;
}

core_initcall(sock_struct_check);

































































































































































































































































































































































































































































































































































































































































































































































































































    4 

    4 


    4 

    3 



    4 
























    4 







    4 



    4 





    4 














    4 










































































































































































































    4 

    4 








































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
// SPDX-License-Identifier: GPL-2.0+
/*
 * Driver for USB Mass Storage compliant devices
 *
 * Current development and maintenance by:
 *   (c) 1999-2003 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
 *
 * Developed with the assistance of:
 *   (c) 2000 David L. Brown, Jr. (usb-storage@davidb.org)
 *   (c) 2003-2009 Alan Stern (stern@rowland.harvard.edu)
 *
 * Initial work by:
 *   (c) 1999 Michael Gee (michael@linuxspecific.com)
 *
 * usb_device_id support by Adam J. Richter (adam@yggdrasil.com):
 *   (c) 2000 Yggdrasil Computing, Inc.
 *
 * This driver is based on the 'USB Mass Storage Class' document. This
 * describes in detail the protocol used to communicate with such
 * devices.  Clearly, the designers had SCSI and ATAPI commands in
 * mind when they created this document.  The commands are all very
 * similar to commands in the SCSI-II and ATAPI specifications.
 *
 * It is important to note that in a number of cases this class
 * exhibits class-specific exemptions from the USB specification.
 * Notably the usage of NAK, STALL and ACK differs from the norm, in
 * that they are used to communicate wait, failed and OK on commands.
 *
 * Also, for certain devices, the interrupt endpoint is used to convey
 * status of a command.
 */

#ifdef CONFIG_USB_STORAGE_DEBUG
#define DEBUG
#endif

#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/utsname.h>

#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>

#include "usb.h"
#include "scsiglue.h"
#include "transport.h"
#include "protocol.h"
#include "debug.h"
#include "initializers.h"

#include "sierra_ms.h"
#include "option_ms.h"

#if IS_ENABLED(CONFIG_USB_UAS)
#include "uas-detect.h"
#endif

#define DRV_NAME "usb-storage"

/* Some informational data */
MODULE_AUTHOR("Matthew Dharm <mdharm-usb@one-eyed-alien.net>");
MODULE_DESCRIPTION("USB Mass Storage driver for Linux");
MODULE_LICENSE("GPL");

static unsigned int delay_use = 1;
module_param(delay_use, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device");

static char quirks[128];
module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks");


/*
 * The entries in this table correspond, line for line,
 * with the entries in usb_storage_usb_ids[], defined in usual-tables.c.
 */

/*
 *The vendor name should be kept at eight characters or less, and
 * the product name should be kept at 16 characters or less. If a device
 * has the US_FL_FIX_INQUIRY flag, then the vendor and product names
 * normally generated by a device through the INQUIRY response will be
 * taken from this list, and this is the reason for the above size
 * restriction. However, if the flag is not present, then you
 * are free to use as many characters as you like.
 */

#define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \
                    vendor_name, product_name, use_protocol, use_transport, \
                    init_function, Flags) \
{ \
        .vendorName = vendor_name,        \
        .productName = product_name,        \
        .useProtocol = use_protocol,        \
        .useTransport = use_transport,        \
        .initFunction = init_function,        \
}

#define COMPLIANT_DEV        UNUSUAL_DEV

#define USUAL_DEV(use_protocol, use_transport) \
{ \
        .useProtocol = use_protocol,        \
        .useTransport = use_transport,        \
}

static const struct us_unusual_dev us_unusual_dev_list[] = {
#        include "unusual_devs.h"
        { }                /* Terminating entry */
};

static const struct us_unusual_dev for_dynamic_ids =
                USUAL_DEV(USB_SC_SCSI, USB_PR_BULK);

#undef UNUSUAL_DEV
#undef COMPLIANT_DEV
#undef USUAL_DEV

#ifdef CONFIG_LOCKDEP

static struct lock_class_key us_interface_key[USB_MAXINTERFACES];

static void us_set_lock_class(struct mutex *mutex,
                struct usb_interface *intf)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usb_host_config *config = udev->actconfig;
        int i;

        for (i = 0; i < config->desc.bNumInterfaces; i++) {
                if (config->interface[i] == intf)
                        break;
        }

        BUG_ON(i == config->desc.bNumInterfaces);

        lockdep_set_class(mutex, &us_interface_key[i]);
}

#else

static void us_set_lock_class(struct mutex *mutex,
                struct usb_interface *intf)
{
}

#endif

#ifdef CONFIG_PM        /* Minimal support for suspend and resume */

int usb_stor_suspend(struct usb_interface *iface, pm_message_t message)
{
        struct us_data *us = usb_get_intfdata(iface);

        /* Wait until no command is running */
        mutex_lock(&us->dev_mutex);

        if (us->suspend_resume_hook)
                (us->suspend_resume_hook)(us, US_SUSPEND);

        /*
         * When runtime PM is working, we'll set a flag to indicate
         * whether we should autoresume when a SCSI request arrives.
         */

        mutex_unlock(&us->dev_mutex);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_stor_suspend);

int usb_stor_resume(struct usb_interface *iface)
{
        struct us_data *us = usb_get_intfdata(iface);

        mutex_lock(&us->dev_mutex);

        if (us->suspend_resume_hook)
                (us->suspend_resume_hook)(us, US_RESUME);

        mutex_unlock(&us->dev_mutex);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_stor_resume);

int usb_stor_reset_resume(struct usb_interface *iface)
{
        struct us_data *us = usb_get_intfdata(iface);

        /* Report the reset to the SCSI core */
        usb_stor_report_bus_reset(us);

        /*
         * If any of the subdrivers implemented a reinitialization scheme,
         * this is where the callback would be invoked.
         */
        return 0;
}
EXPORT_SYMBOL_GPL(usb_stor_reset_resume);

#endif /* CONFIG_PM */

/*
 * The next two routines get called just before and just after
 * a USB port reset, whether from this driver or a different one.
 */

int usb_stor_pre_reset(struct usb_interface *iface)
{
        struct us_data *us = usb_get_intfdata(iface);

        /* Make sure no command runs during the reset */
        mutex_lock(&us->dev_mutex);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_stor_pre_reset);

int usb_stor_post_reset(struct usb_interface *iface)
{
        struct us_data *us = usb_get_intfdata(iface);

        /* Report the reset to the SCSI core */
        usb_stor_report_bus_reset(us);

        /*
         * If any of the subdrivers implemented a reinitialization scheme,
         * this is where the callback would be invoked.
         */

        mutex_unlock(&us->dev_mutex);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_stor_post_reset);

/*
 * fill_inquiry_response takes an unsigned char array (which must
 * be at least 36 characters) and populates the vendor name,
 * product name, and revision fields. Then the array is copied
 * into the SCSI command's response buffer (oddly enough
 * called request_buffer). data_len contains the length of the
 * data array, which again must be at least 36.
 */

void fill_inquiry_response(struct us_data *us, unsigned char *data,
                unsigned int data_len)
{
        if (data_len < 36) /* You lose. */
                return;

        memset(data+8, ' ', 28);
        if (data[0]&0x20) { /*
                             * USB device currently not connected. Return
                             * peripheral qualifier 001b ("...however, the
                             * physical device is not currently connected
                             * to this logical unit") and leave vendor and
                             * product identification empty. ("If the target
                             * does store some of the INQUIRY data on the
                             * device, it may return zeros or ASCII spaces
                             * (20h) in those fields until the data is
                             * available from the device.").
                             */
        } else {
                u16 bcdDevice = le16_to_cpu(us->pusb_dev->descriptor.bcdDevice);
                int n;

                n = strlen(us->unusual_dev->vendorName);
                memcpy(data+8, us->unusual_dev->vendorName, min(8, n));
                n = strlen(us->unusual_dev->productName);
                memcpy(data+16, us->unusual_dev->productName, min(16, n));

                data[32] = 0x30 + ((bcdDevice>>12) & 0x0F);
                data[33] = 0x30 + ((bcdDevice>>8) & 0x0F);
                data[34] = 0x30 + ((bcdDevice>>4) & 0x0F);
                data[35] = 0x30 + ((bcdDevice) & 0x0F);
        }

        usb_stor_set_xfer_buf(data, data_len, us->srb);
}
EXPORT_SYMBOL_GPL(fill_inquiry_response);

static int usb_stor_control_thread(void * __us)
{
        struct us_data *us = (struct us_data *)__us;
        struct Scsi_Host *host = us_to_host(us);
        struct scsi_cmnd *srb;

        for (;;) {
                usb_stor_dbg(us, "*** thread sleeping\n");
                if (wait_for_completion_interruptible(&us->cmnd_ready))
                        break;

                usb_stor_dbg(us, "*** thread awakened\n");

                /* lock the device pointers */
                mutex_lock(&(us->dev_mutex));

                /* lock access to the state */
                scsi_lock(host);

                /* When we are called with no command pending, we're done */
                srb = us->srb;
                if (srb == NULL) {
                        scsi_unlock(host);
                        mutex_unlock(&us->dev_mutex);
                        usb_stor_dbg(us, "-- exiting\n");
                        break;
                }

                /* has the command timed out *already* ? */
                if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) {
                        srb->result = DID_ABORT << 16;
                        goto SkipForAbort;
                }

                scsi_unlock(host);

                /*
                 * reject the command if the direction indicator
                 * is UNKNOWN
                 */
                if (srb->sc_data_direction == DMA_BIDIRECTIONAL) {
                        usb_stor_dbg(us, "UNKNOWN data direction\n");
                        srb->result = DID_ERROR << 16;
                }

                /*
                 * reject if target != 0 or if LUN is higher than
                 * the maximum known LUN
                 */
                else if (srb->device->id &&
                                !(us->fflags & US_FL_SCM_MULT_TARG)) {
                        usb_stor_dbg(us, "Bad target number (%d:%llu)\n",
                                     srb->device->id,
                                     srb->device->lun);
                        srb->result = DID_BAD_TARGET << 16;
                }

                else if (srb->device->lun > us->max_lun) {
                        usb_stor_dbg(us, "Bad LUN (%d:%llu)\n",
                                     srb->device->id,
                                     srb->device->lun);
                        srb->result = DID_BAD_TARGET << 16;
                }

                /*
                 * Handle those devices which need us to fake
                 * their inquiry data
                 */
                else if ((srb->cmnd[0] == INQUIRY) &&
                            (us->fflags & US_FL_FIX_INQUIRY)) {
                        unsigned char data_ptr[36] = {
                            0x00, 0x80, 0x02, 0x02,
                            0x1F, 0x00, 0x00, 0x00};

                        usb_stor_dbg(us, "Faking INQUIRY command\n");
                        fill_inquiry_response(us, data_ptr, 36);
                        srb->result = SAM_STAT_GOOD;
                }

                /* we've got a command, let's do it! */
                else {
                        US_DEBUG(usb_stor_show_command(us, srb));
                        us->proto_handler(srb, us);
                        usb_mark_last_busy(us->pusb_dev);
                }

                /* lock access to the state */
                scsi_lock(host);

                /* was the command aborted? */
                if (srb->result == DID_ABORT << 16) {
SkipForAbort:
                        usb_stor_dbg(us, "scsi command aborted\n");
                        srb = NULL;        /* Don't call scsi_done() */
                }

                /*
                 * If an abort request was received we need to signal that
                 * the abort has finished.  The proper test for this is
                 * the TIMED_OUT flag, not srb->result == DID_ABORT, because
                 * the timeout might have occurred after the command had
                 * already completed with a different result code.
                 */
                if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) {
                        complete(&(us->notify));

                        /* Allow USB transfers to resume */
                        clear_bit(US_FLIDX_ABORTING, &us->dflags);
                        clear_bit(US_FLIDX_TIMED_OUT, &us->dflags);
                }

                /* finished working on this command */
                us->srb = NULL;
                scsi_unlock(host);

                /* unlock the device pointers */
                mutex_unlock(&us->dev_mutex);

                /* now that the locks are released, notify the SCSI core */
                if (srb) {
                        usb_stor_dbg(us, "scsi cmd done, result=0x%x\n",
                                        srb->result);
                        scsi_done_direct(srb);
                }
        } /* for (;;) */

        /* Wait until we are told to stop */
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
                if (kthread_should_stop())
                        break;
                schedule();
        }
        __set_current_state(TASK_RUNNING);
        return 0;
}

/***********************************************************************
 * Device probing and disconnecting
 ***********************************************************************/

/* Associate our private data with the USB device */
static int associate_dev(struct us_data *us, struct usb_interface *intf)
{
        /* Fill in the device-related fields */
        us->pusb_dev = interface_to_usbdev(intf);
        us->pusb_intf = intf;
        us->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
        usb_stor_dbg(us, "Vendor: 0x%04x, Product: 0x%04x, Revision: 0x%04x\n",
                     le16_to_cpu(us->pusb_dev->descriptor.idVendor),
                     le16_to_cpu(us->pusb_dev->descriptor.idProduct),
                     le16_to_cpu(us->pusb_dev->descriptor.bcdDevice));
        usb_stor_dbg(us, "Interface Subclass: 0x%02x, Protocol: 0x%02x\n",
                     intf->cur_altsetting->desc.bInterfaceSubClass,
                     intf->cur_altsetting->desc.bInterfaceProtocol);

        /* Store our private data in the interface */
        usb_set_intfdata(intf, us);

        /* Allocate the control/setup and DMA-mapped buffers */
        us->cr = kmalloc(sizeof(*us->cr), GFP_KERNEL);
        if (!us->cr)
                return -ENOMEM;

        us->iobuf = usb_alloc_coherent(us->pusb_dev, US_IOBUF_SIZE,
                        GFP_KERNEL, &us->iobuf_dma);
        if (!us->iobuf) {
                usb_stor_dbg(us, "I/O buffer allocation failed\n");
                return -ENOMEM;
        }
        return 0;
}

/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)

/* Adjust device flags based on the "quirks=" module parameter */
void usb_stor_adjust_quirks(struct usb_device *udev, u64 *fflags)
{
        char *p;
        u16 vid = le16_to_cpu(udev->descriptor.idVendor);
        u16 pid = le16_to_cpu(udev->descriptor.idProduct);
        u64 f = 0;
        u64 mask = (US_FL_SANE_SENSE | US_FL_BAD_SENSE |
                        US_FL_FIX_CAPACITY | US_FL_IGNORE_UAS |
                        US_FL_CAPACITY_HEURISTICS | US_FL_IGNORE_DEVICE |
                        US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 |
                        US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE |
                        US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
                        US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
                        US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
                        US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES |
                        US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS |
                        US_FL_ALWAYS_SYNC);

        p = quirks;
        while (*p) {
                /* Each entry consists of VID:PID:flags */
                if (vid == simple_strtoul(p, &p, 16) &&
                                *p == ':' &&
                                pid == simple_strtoul(p+1, &p, 16) &&
                                *p == ':')
                        break;

                /* Move forward to the next entry */
                while (*p) {
                        if (*p++ == ',')
                                break;
                }
        }
        if (!*p)        /* No match */
                return;

        /* Collect the flags */
        while (*++p && *p != ',') {
                switch (TOLOWER(*p)) {
                case 'a':
                        f |= US_FL_SANE_SENSE;
                        break;
                case 'b':
                        f |= US_FL_BAD_SENSE;
                        break;
                case 'c':
                        f |= US_FL_FIX_CAPACITY;
                        break;
                case 'd':
                        f |= US_FL_NO_READ_DISC_INFO;
                        break;
                case 'e':
                        f |= US_FL_NO_READ_CAPACITY_16;
                        break;
                case 'f':
                        f |= US_FL_NO_REPORT_OPCODES;
                        break;
                case 'g':
                        f |= US_FL_MAX_SECTORS_240;
                        break;
                case 'h':
                        f |= US_FL_CAPACITY_HEURISTICS;
                        break;
                case 'i':
                        f |= US_FL_IGNORE_DEVICE;
                        break;
                case 'j':
                        f |= US_FL_NO_REPORT_LUNS;
                        break;
                case 'k':
                        f |= US_FL_NO_SAME;
                        break;
                case 'l':
                        f |= US_FL_NOT_LOCKABLE;
                        break;
                case 'm':
                        f |= US_FL_MAX_SECTORS_64;
                        break;
                case 'n':
                        f |= US_FL_INITIAL_READ10;
                        break;
                case 'o':
                        f |= US_FL_CAPACITY_OK;
                        break;
                case 'p':
                        f |= US_FL_WRITE_CACHE;
                        break;
                case 'r':
                        f |= US_FL_IGNORE_RESIDUE;
                        break;
                case 's':
                        f |= US_FL_SINGLE_LUN;
                        break;
                case 't':
                        f |= US_FL_NO_ATA_1X;
                        break;
                case 'u':
                        f |= US_FL_IGNORE_UAS;
                        break;
                case 'w':
                        f |= US_FL_NO_WP_DETECT;
                        break;
                case 'y':
                        f |= US_FL_ALWAYS_SYNC;
                        break;
                /* Ignore unrecognized flag characters */
                }
        }
        *fflags = (*fflags & ~mask) | f;
}
EXPORT_SYMBOL_GPL(usb_stor_adjust_quirks);

/* Get the unusual_devs entries and the string descriptors */
static int get_device_info(struct us_data *us, const struct usb_device_id *id,
                const struct us_unusual_dev *unusual_dev)
{
        struct usb_device *dev = us->pusb_dev;
        struct usb_interface_descriptor *idesc =
                &us->pusb_intf->cur_altsetting->desc;
        struct device *pdev = &us->pusb_intf->dev;

        /* Store the entries */
        us->unusual_dev = unusual_dev;
        us->subclass = (unusual_dev->useProtocol == USB_SC_DEVICE) ?
                        idesc->bInterfaceSubClass :
                        unusual_dev->useProtocol;
        us->protocol = (unusual_dev->useTransport == USB_PR_DEVICE) ?
                        idesc->bInterfaceProtocol :
                        unusual_dev->useTransport;
        us->fflags = id->driver_info;
        usb_stor_adjust_quirks(us->pusb_dev, &us->fflags);

        if (us->fflags & US_FL_IGNORE_DEVICE) {
                dev_info(pdev, "device ignored\n");
                return -ENODEV;
        }

        /*
         * This flag is only needed when we're in high-speed, so let's
         * disable it if we're in full-speed
         */
        if (dev->speed != USB_SPEED_HIGH)
                us->fflags &= ~US_FL_GO_SLOW;

        if (us->fflags)
                dev_info(pdev, "Quirks match for vid %04x pid %04x: %llx\n",
                                le16_to_cpu(dev->descriptor.idVendor),
                                le16_to_cpu(dev->descriptor.idProduct),
                                us->fflags);

        /*
         * Log a message if a non-generic unusual_dev entry contains an
         * unnecessary subclass or protocol override.  This may stimulate
         * reports from users that will help us remove unneeded entries
         * from the unusual_devs.h table.
         */
        if (id->idVendor || id->idProduct) {
                static const char *msgs[3] = {
                        "an unneeded SubClass entry",
                        "an unneeded Protocol entry",
                        "unneeded SubClass and Protocol entries"};
                struct usb_device_descriptor *ddesc = &dev->descriptor;
                int msg = -1;

                if (unusual_dev->useProtocol != USB_SC_DEVICE &&
                        us->subclass == idesc->bInterfaceSubClass)
                        msg += 1;
                if (unusual_dev->useTransport != USB_PR_DEVICE &&
                        us->protocol == idesc->bInterfaceProtocol)
                        msg += 2;
                if (msg >= 0 && !(us->fflags & US_FL_NEED_OVERRIDE))
                        dev_notice(pdev, "This device "
                                        "(%04x,%04x,%04x S %02x P %02x)"
                                        " has %s in unusual_devs.h (kernel"
                                        " %s)\n"
                                        "   Please send a copy of this message to "
                                        "<linux-usb@vger.kernel.org> and "
                                        "<usb-storage@lists.one-eyed-alien.net>\n",
                                        le16_to_cpu(ddesc->idVendor),
                                        le16_to_cpu(ddesc->idProduct),
                                        le16_to_cpu(ddesc->bcdDevice),
                                        idesc->bInterfaceSubClass,
                                        idesc->bInterfaceProtocol,
                                        msgs[msg],
                                        utsname()->release);
        }

        return 0;
}

/* Get the transport settings */
static void get_transport(struct us_data *us)
{
        switch (us->protocol) {
        case USB_PR_CB:
                us->transport_name = "Control/Bulk";
                us->transport = usb_stor_CB_transport;
                us->transport_reset = usb_stor_CB_reset;
                us->max_lun = 7;
                break;

        case USB_PR_CBI:
                us->transport_name = "Control/Bulk/Interrupt";
                us->transport = usb_stor_CB_transport;
                us->transport_reset = usb_stor_CB_reset;
                us->max_lun = 7;
                break;

        case USB_PR_BULK:
                us->transport_name = "Bulk";
                us->transport = usb_stor_Bulk_transport;
                us->transport_reset = usb_stor_Bulk_reset;
                break;
        }
}

/* Get the protocol settings */
static void get_protocol(struct us_data *us)
{
        switch (us->subclass) {
        case USB_SC_RBC:
                us->protocol_name = "Reduced Block Commands (RBC)";
                us->proto_handler = usb_stor_transparent_scsi_command;
                break;

        case USB_SC_8020:
                us->protocol_name = "8020i";
                us->proto_handler = usb_stor_pad12_command;
                us->max_lun = 0;
                break;

        case USB_SC_QIC:
                us->protocol_name = "QIC-157";
                us->proto_handler = usb_stor_pad12_command;
                us->max_lun = 0;
                break;

        case USB_SC_8070:
                us->protocol_name = "8070i";
                us->proto_handler = usb_stor_pad12_command;
                us->max_lun = 0;
                break;

        case USB_SC_SCSI:
                us->protocol_name = "Transparent SCSI";
                us->proto_handler = usb_stor_transparent_scsi_command;
                break;

        case USB_SC_UFI:
                us->protocol_name = "Uniform Floppy Interface (UFI)";
                us->proto_handler = usb_stor_ufi_command;
                break;
        }
}

/* Get the pipe settings */
static int get_pipes(struct us_data *us)
{
        struct usb_host_interface *alt = us->pusb_intf->cur_altsetting;
        struct usb_endpoint_descriptor *ep_in;
        struct usb_endpoint_descriptor *ep_out;
        struct usb_endpoint_descriptor *ep_int;
        int res;

        /*
         * Find the first endpoint of each type we need.
         * We are expecting a minimum of 2 endpoints - in and out (bulk).
         * An optional interrupt-in is OK (necessary for CBI protocol).
         * We will ignore any others.
         */
        res = usb_find_common_endpoints(alt, &ep_in, &ep_out, NULL, NULL);
        if (res) {
                usb_stor_dbg(us, "bulk endpoints not found\n");
                return res;
        }

        res = usb_find_int_in_endpoint(alt, &ep_int);
        if (res && us->protocol == USB_PR_CBI) {
                usb_stor_dbg(us, "interrupt endpoint not found\n");
                return res;
        }

        /* Calculate and store the pipe values */
        us->send_ctrl_pipe = usb_sndctrlpipe(us->pusb_dev, 0);
        us->recv_ctrl_pipe = usb_rcvctrlpipe(us->pusb_dev, 0);
        us->send_bulk_pipe = usb_sndbulkpipe(us->pusb_dev,
                usb_endpoint_num(ep_out));
        us->recv_bulk_pipe = usb_rcvbulkpipe(us->pusb_dev,
                usb_endpoint_num(ep_in));
        if (ep_int) {
                us->recv_intr_pipe = usb_rcvintpipe(us->pusb_dev,
                        usb_endpoint_num(ep_int));
                us->ep_bInterval = ep_int->bInterval;
        }
        return 0;
}

/* Initialize all the dynamic resources we need */
static int usb_stor_acquire_resources(struct us_data *us)
{
        int p;
        struct task_struct *th;

        us->current_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!us->current_urb)
                return -ENOMEM;

        /*
         * Just before we start our control thread, initialize
         * the device if it needs initialization
         */
        if (us->unusual_dev->initFunction) {
                p = us->unusual_dev->initFunction(us);
                if (p)
                        return p;
        }

        /* Start up our control thread */
        th = kthread_run(usb_stor_control_thread, us, "usb-storage");
        if (IS_ERR(th)) {
                dev_warn(&us->pusb_intf->dev,
                                "Unable to start control thread\n");
                return PTR_ERR(th);
        }
        us->ctl_thread = th;

        return 0;
}

/* Release all our dynamic resources */
static void usb_stor_release_resources(struct us_data *us)
{
        /*
         * Tell the control thread to exit.  The SCSI host must
         * already have been removed and the DISCONNECTING flag set
         * so that we won't accept any more commands.
         */
        usb_stor_dbg(us, "-- sending exit command to thread\n");
        complete(&us->cmnd_ready);
        if (us->ctl_thread)
                kthread_stop(us->ctl_thread);

        /* Call the destructor routine, if it exists */
        if (us->extra_destructor) {
                usb_stor_dbg(us, "-- calling extra_destructor()\n");
                us->extra_destructor(us->extra);
        }

        /* Free the extra data and the URB */
        kfree(us->extra);
        usb_free_urb(us->current_urb);
}

/* Dissociate from the USB device */
static void dissociate_dev(struct us_data *us)
{
        /* Free the buffers */
        kfree(us->cr);
        usb_free_coherent(us->pusb_dev, US_IOBUF_SIZE, us->iobuf, us->iobuf_dma);

        /* Remove our private data from the interface */
        usb_set_intfdata(us->pusb_intf, NULL);
}

/*
 * First stage of disconnect processing: stop SCSI scanning,
 * remove the host, and stop accepting new commands
 */
static void quiesce_and_remove_host(struct us_data *us)
{
        struct Scsi_Host *host = us_to_host(us);

        /* If the device is really gone, cut short reset delays */
        if (us->pusb_dev->state == USB_STATE_NOTATTACHED) {
                set_bit(US_FLIDX_DISCONNECTING, &us->dflags);
                wake_up(&us->delay_wait);
        }

        /*
         * Prevent SCSI scanning (if it hasn't started yet)
         * or wait for the SCSI-scanning routine to stop.
         */
        cancel_delayed_work_sync(&us->scan_dwork);

        /* Balance autopm calls if scanning was cancelled */
        if (test_bit(US_FLIDX_SCAN_PENDING, &us->dflags))
                usb_autopm_put_interface_no_suspend(us->pusb_intf);

        /*
         * Removing the host will perform an orderly shutdown: caches
         * synchronized, disks spun down, etc.
         */
        scsi_remove_host(host);

        /*
         * Prevent any new commands from being accepted and cut short
         * reset delays.
         */
        scsi_lock(host);
        set_bit(US_FLIDX_DISCONNECTING, &us->dflags);
        scsi_unlock(host);
        wake_up(&us->delay_wait);
}

/* Second stage of disconnect processing: deallocate all resources */
static void release_everything(struct us_data *us)
{
        usb_stor_release_resources(us);
        dissociate_dev(us);

        /*
         * Drop our reference to the host; the SCSI core will free it
         * (and "us" along with it) when the refcount becomes 0.
         */
        scsi_host_put(us_to_host(us));
}

/* Delayed-work routine to carry out SCSI-device scanning */
static void usb_stor_scan_dwork(struct work_struct *work)
{
        struct us_data *us = container_of(work, struct us_data,
                        scan_dwork.work);
        struct device *dev = &us->pusb_intf->dev;

        dev_dbg(dev, "starting scan\n");

        /* For bulk-only devices, determine the max LUN value */
        if (us->protocol == USB_PR_BULK &&
            !(us->fflags & US_FL_SINGLE_LUN) &&
            !(us->fflags & US_FL_SCM_MULT_TARG)) {
                mutex_lock(&us->dev_mutex);
                us->max_lun = usb_stor_Bulk_max_lun(us);
                /*
                 * Allow proper scanning of devices that present more than 8 LUNs
                 * While not affecting other devices that may need the previous
                 * behavior
                 */
                if (us->max_lun >= 8)
                        us_to_host(us)->max_lun = us->max_lun+1;
                mutex_unlock(&us->dev_mutex);
        }
        scsi_scan_host(us_to_host(us));
        dev_dbg(dev, "scan complete\n");

        /* Should we unbind if no devices were detected? */

        usb_autopm_put_interface(us->pusb_intf);
        clear_bit(US_FLIDX_SCAN_PENDING, &us->dflags);
}

static unsigned int usb_stor_sg_tablesize(struct usb_interface *intf)
{
        struct usb_device *usb_dev = interface_to_usbdev(intf);

        if (usb_dev->bus->sg_tablesize) {
                return usb_dev->bus->sg_tablesize;
        }
        return SG_ALL;
}

/* First part of general USB mass-storage probing */
int usb_stor_probe1(struct us_data **pus,
                struct usb_interface *intf,
                const struct usb_device_id *id,
                const struct us_unusual_dev *unusual_dev,
                const struct scsi_host_template *sht)
{
        struct Scsi_Host *host;
        struct us_data *us;
        int result;

        dev_info(&intf->dev, "USB Mass Storage device detected\n");

        /*
         * Ask the SCSI layer to allocate a host structure, with extra
         * space at the end for our private us_data structure.
         */
        host = scsi_host_alloc(sht, sizeof(*us));
        if (!host) {
                dev_warn(&intf->dev, "Unable to allocate the scsi host\n");
                return -ENOMEM;
        }

        /*
         * Allow 16-byte CDBs and thus > 2TB
         */
        host->max_cmd_len = 16;
        host->sg_tablesize = usb_stor_sg_tablesize(intf);
        *pus = us = host_to_us(host);
        mutex_init(&(us->dev_mutex));
        us_set_lock_class(&us->dev_mutex, intf);
        init_completion(&us->cmnd_ready);
        init_completion(&(us->notify));
        init_waitqueue_head(&us->delay_wait);
        INIT_DELAYED_WORK(&us->scan_dwork, usb_stor_scan_dwork);

        /* Associate the us_data structure with the USB device */
        result = associate_dev(us, intf);
        if (result)
                goto BadDevice;

        /* Get the unusual_devs entries and the descriptors */
        result = get_device_info(us, id, unusual_dev);
        if (result)
                goto BadDevice;

        /* Get standard transport and protocol settings */
        get_transport(us);
        get_protocol(us);

        /*
         * Give the caller a chance to fill in specialized transport
         * or protocol settings.
         */
        return 0;

BadDevice:
        usb_stor_dbg(us, "storage_probe() failed\n");
        release_everything(us);
        return result;
}
EXPORT_SYMBOL_GPL(usb_stor_probe1);

/* Second part of general USB mass-storage probing */
int usb_stor_probe2(struct us_data *us)
{
        int result;
        struct device *dev = &us->pusb_intf->dev;

        /* Make sure the transport and protocol have both been set */
        if (!us->transport || !us->proto_handler) {
                result = -ENXIO;
                goto BadDevice;
        }
        usb_stor_dbg(us, "Transport: %s\n", us->transport_name);
        usb_stor_dbg(us, "Protocol: %s\n", us->protocol_name);

        if (us->fflags & US_FL_SCM_MULT_TARG) {
                /*
                 * SCM eUSCSI bridge devices can have different numbers
                 * of LUNs on different targets; allow all to be probed.
                 */
                us->max_lun = 7;
                /* The eUSCSI itself has ID 7, so avoid scanning that */
                us_to_host(us)->this_id = 7;
                /* max_id is 8 initially, so no need to set it here */
        } else {
                /* In the normal case there is only a single target */
                us_to_host(us)->max_id = 1;
                /*
                 * Like Windows, we won't store the LUN bits in CDB[1] for
                 * SCSI-2 devices using the Bulk-Only transport (even though
                 * this violates the SCSI spec).
                 */
                if (us->transport == usb_stor_Bulk_transport)
                        us_to_host(us)->no_scsi2_lun_in_cdb = 1;
        }

        /* fix for single-lun devices */
        if (us->fflags & US_FL_SINGLE_LUN)
                us->max_lun = 0;

        /* Find the endpoints and calculate pipe values */
        result = get_pipes(us);
        if (result)
                goto BadDevice;

        /*
         * If the device returns invalid data for the first READ(10)
         * command, indicate the command should be retried.
         */
        if (us->fflags & US_FL_INITIAL_READ10)
                set_bit(US_FLIDX_REDO_READ10, &us->dflags);

        /* Acquire all the other resources and add the host */
        result = usb_stor_acquire_resources(us);
        if (result)
                goto BadDevice;
        usb_autopm_get_interface_no_resume(us->pusb_intf);
        snprintf(us->scsi_name, sizeof(us->scsi_name), "usb-storage %s",
                                        dev_name(&us->pusb_intf->dev));
        result = scsi_add_host(us_to_host(us), dev);
        if (result) {
                dev_warn(dev,
                                "Unable to add the scsi host\n");
                goto HostAddErr;
        }

        /* Submit the delayed_work for SCSI-device scanning */
        set_bit(US_FLIDX_SCAN_PENDING, &us->dflags);

        if (delay_use > 0)
                dev_dbg(dev, "waiting for device to settle before scanning\n");
        queue_delayed_work(system_freezable_wq, &us->scan_dwork,
                        delay_use * HZ);
        return 0;

        /* We come here if there are any problems */
HostAddErr:
        usb_autopm_put_interface_no_suspend(us->pusb_intf);
BadDevice:
        usb_stor_dbg(us, "storage_probe() failed\n");
        release_everything(us);
        return result;
}
EXPORT_SYMBOL_GPL(usb_stor_probe2);

/* Handle a USB mass-storage disconnect */
void usb_stor_disconnect(struct usb_interface *intf)
{
        struct us_data *us = usb_get_intfdata(intf);

        quiesce_and_remove_host(us);
        release_everything(us);
}
EXPORT_SYMBOL_GPL(usb_stor_disconnect);

static struct scsi_host_template usb_stor_host_template;

/* The main probe routine for standard devices */
static int storage_probe(struct usb_interface *intf,
                         const struct usb_device_id *id)
{
        const struct us_unusual_dev *unusual_dev;
        struct us_data *us;
        int result;
        int size;

        /* If uas is enabled and this device can do uas then ignore it. */
#if IS_ENABLED(CONFIG_USB_UAS)
        if (uas_use_uas_driver(intf, id, NULL))
                return -ENXIO;
#endif

        /*
         * If the device isn't standard (is handled by a subdriver
         * module) then don't accept it.
         */
        if (usb_usual_ignore_device(intf))
                return -ENXIO;

        /*
         * Call the general probe procedures.
         *
         * The unusual_dev_list array is parallel to the usb_storage_usb_ids
         * table, so we use the index of the id entry to find the
         * corresponding unusual_devs entry.
         */

        size = ARRAY_SIZE(us_unusual_dev_list);
        if (id >= usb_storage_usb_ids && id < usb_storage_usb_ids + size) {
                unusual_dev = (id - usb_storage_usb_ids) + us_unusual_dev_list;
        } else {
                unusual_dev = &for_dynamic_ids;

                dev_dbg(&intf->dev, "Use Bulk-Only transport with the Transparent SCSI protocol for dynamic id: 0x%04x 0x%04x\n",
                        id->idVendor, id->idProduct);
        }

        result = usb_stor_probe1(&us, intf, id, unusual_dev,
                                 &usb_stor_host_template);
        if (result)
                return result;

        /* No special transport or protocol settings in the main module */

        result = usb_stor_probe2(us);
        return result;
}

static struct usb_driver usb_storage_driver = {
        .name =                DRV_NAME,
        .probe =        storage_probe,
        .disconnect =        usb_stor_disconnect,
        .suspend =        usb_stor_suspend,
        .resume =        usb_stor_resume,
        .reset_resume =        usb_stor_reset_resume,
        .pre_reset =        usb_stor_pre_reset,
        .post_reset =        usb_stor_post_reset,
        .id_table =        usb_storage_usb_ids,
        .supports_autosuspend = 1,
        .soft_unbind =        1,
};

module_usb_stor_driver(usb_storage_driver, usb_stor_host_template, DRV_NAME);





















































































































































































































































































































































































































































































































































































































































































    9 


    6 
    9 











   42 

   42 
   41 

    9 

   42 



   42 





























   31 













































































































































































































































































































































































































































































































































































































































































    2 




    2 


    2 







    2 











    2 
































































    2 
    2 

























































    2 





    2 









    2 
    2 
    2 









    2 







    2 
    2 





















    2 

    2 
    2 

    2 



    2 
    2 






































    2 
    2 





    2 


    2 
    2 
    2 


    2 



    2 
    2 


    2 




    2 

















    2 
    2 

    2 

    2 


    2 





    2 


    2 



    2 
    2 


    2 











































































































































    2 















































































    2 


    2 



    2 











































    2 











    2 




    2 

    2 


    2 

    2 


    2 





    2 

    2 
    2 




    2 
    2 
    2 
    2 



    2 
    2 

    2 






















    2 


    2 


    2 



    2 



    2 




    2 






    2 









    2 





    2 


    2 
    2 























































































































































































   58 

   13 
   53 
   16 
   48 
   47 
   45 











































   58 
















    5 



    5 
    5 




    5 
    5 














   16 

   14 
   16 













    1 
    1 

































































































































































































































































































   60 


   60 





   51 

   13 


   44 






   41 































   39 


   39 


   39 


   39 


   39 
   39 























































































































   60 
   58 



   31 















   60 








































































    2 


    2 
    1 
    2 
    2 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  HID support for Linux
 *
 *  Copyright (c) 1999 Andreas Gal
 *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
 *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
 *  Copyright (c) 2006-2012 Jiri Kosina
 */

/*
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <asm/byteorder.h>
#include <linux/input.h>
#include <linux/wait.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/semaphore.h>

#include <linux/hid.h>
#include <linux/hiddev.h>
#include <linux/hid-debug.h>
#include <linux/hidraw.h>

#include "hid-ids.h"

/*
 * Version Information
 */

#define DRIVER_DESC "HID core driver"

static int hid_ignore_special_drivers = 0;
module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600);
MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver");

/*
 * Register a new report for a device.
 */

struct hid_report *hid_register_report(struct hid_device *device,
                                       enum hid_report_type type, unsigned int id,
                                       unsigned int application)
{
        struct hid_report_enum *report_enum = device->report_enum + type;
        struct hid_report *report;

        if (id >= HID_MAX_IDS)
                return NULL;
        if (report_enum->report_id_hash[id])
                return report_enum->report_id_hash[id];

        report = kzalloc(sizeof(struct hid_report), GFP_KERNEL);
        if (!report)
                return NULL;

        if (id != 0)
                report_enum->numbered = 1;

        report->id = id;
        report->type = type;
        report->size = 0;
        report->device = device;
        report->application = application;
        report_enum->report_id_hash[id] = report;

        list_add_tail(&report->list, &report_enum->report_list);
        INIT_LIST_HEAD(&report->field_entry_list);

        return report;
}
EXPORT_SYMBOL_GPL(hid_register_report);

/*
 * Register a new field for this report.
 */

static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages)
{
        struct hid_field *field;

        if (report->maxfield == HID_MAX_FIELDS) {
                hid_err(report->device, "too many fields in report\n");
                return NULL;
        }

        field = kzalloc((sizeof(struct hid_field) +
                         usages * sizeof(struct hid_usage) +
                         3 * usages * sizeof(unsigned int)), GFP_KERNEL);
        if (!field)
                return NULL;

        field->index = report->maxfield++;
        report->field[field->index] = field;
        field->usage = (struct hid_usage *)(field + 1);
        field->value = (s32 *)(field->usage + usages);
        field->new_value = (s32 *)(field->value + usages);
        field->usages_priorities = (s32 *)(field->new_value + usages);
        field->report = report;

        return field;
}

/*
 * Open a collection. The type/usage is pushed on the stack.
 */

static int open_collection(struct hid_parser *parser, unsigned type)
{
        struct hid_collection *collection;
        unsigned usage;
        int collection_index;

        usage = parser->local.usage[0];

        if (parser->collection_stack_ptr == parser->collection_stack_size) {
                unsigned int *collection_stack;
                unsigned int new_size = parser->collection_stack_size +
                                        HID_COLLECTION_STACK_SIZE;

                collection_stack = krealloc(parser->collection_stack,
                                            new_size * sizeof(unsigned int),
                                            GFP_KERNEL);
                if (!collection_stack)
                        return -ENOMEM;

                parser->collection_stack = collection_stack;
                parser->collection_stack_size = new_size;
        }

        if (parser->device->maxcollection == parser->device->collection_size) {
                collection = kmalloc(
                                array3_size(sizeof(struct hid_collection),
                                            parser->device->collection_size,
                                            2),
                                GFP_KERNEL);
                if (collection == NULL) {
                        hid_err(parser->device, "failed to reallocate collection array\n");
                        return -ENOMEM;
                }
                memcpy(collection, parser->device->collection,
                        sizeof(struct hid_collection) *
                        parser->device->collection_size);
                memset(collection + parser->device->collection_size, 0,
                        sizeof(struct hid_collection) *
                        parser->device->collection_size);
                kfree(parser->device->collection);
                parser->device->collection = collection;
                parser->device->collection_size *= 2;
        }

        parser->collection_stack[parser->collection_stack_ptr++] =
                parser->device->maxcollection;

        collection_index = parser->device->maxcollection++;
        collection = parser->device->collection + collection_index;
        collection->type = type;
        collection->usage = usage;
        collection->level = parser->collection_stack_ptr - 1;
        collection->parent_idx = (collection->level == 0) ? -1 :
                parser->collection_stack[collection->level - 1];

        if (type == HID_COLLECTION_APPLICATION)
                parser->device->maxapplication++;

        return 0;
}

/*
 * Close a collection.
 */

static int close_collection(struct hid_parser *parser)
{
        if (!parser->collection_stack_ptr) {
                hid_err(parser->device, "collection stack underflow\n");
                return -EINVAL;
        }
        parser->collection_stack_ptr--;
        return 0;
}

/*
 * Climb up the stack, search for the specified collection type
 * and return the usage.
 */

static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type)
{
        struct hid_collection *collection = parser->device->collection;
        int n;

        for (n = parser->collection_stack_ptr - 1; n >= 0; n--) {
                unsigned index = parser->collection_stack[n];
                if (collection[index].type == type)
                        return collection[index].usage;
        }
        return 0; /* we know nothing about this usage type */
}

/*
 * Concatenate usage which defines 16 bits or less with the
 * currently defined usage page to form a 32 bit usage
 */

static void complete_usage(struct hid_parser *parser, unsigned int index)
{
        parser->local.usage[index] &= 0xFFFF;
        parser->local.usage[index] |=
                (parser->global.usage_page & 0xFFFF) << 16;
}

/*
 * Add a usage to the temporary parser table.
 */

static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size)
{
        if (parser->local.usage_index >= HID_MAX_USAGES) {
                hid_err(parser->device, "usage index exceeded\n");
                return -1;
        }
        parser->local.usage[parser->local.usage_index] = usage;

        /*
         * If Usage item only includes usage id, concatenate it with
         * currently defined usage page
         */
        if (size <= 2)
                complete_usage(parser, parser->local.usage_index);

        parser->local.usage_size[parser->local.usage_index] = size;
        parser->local.collection_index[parser->local.usage_index] =
                parser->collection_stack_ptr ?
                parser->collection_stack[parser->collection_stack_ptr - 1] : 0;
        parser->local.usage_index++;
        return 0;
}

/*
 * Register a new field for this report.
 */

static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsigned flags)
{
        struct hid_report *report;
        struct hid_field *field;
        unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
        unsigned int usages;
        unsigned int offset;
        unsigned int i;
        unsigned int application;

        application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION);

        report = hid_register_report(parser->device, report_type,
                                     parser->global.report_id, application);
        if (!report) {
                hid_err(parser->device, "hid_register_report failed\n");
                return -1;
        }

        /* Handle both signed and unsigned cases properly */
        if ((parser->global.logical_minimum < 0 &&
                parser->global.logical_maximum <
                parser->global.logical_minimum) ||
                (parser->global.logical_minimum >= 0 &&
                (__u32)parser->global.logical_maximum <
                (__u32)parser->global.logical_minimum)) {
                dbg_hid("logical range invalid 0x%x 0x%x\n",
                        parser->global.logical_minimum,
                        parser->global.logical_maximum);
                return -1;
        }

        offset = report->size;
        report->size += parser->global.report_size * parser->global.report_count;

        if (parser->device->ll_driver->max_buffer_size)
                max_buffer_size = parser->device->ll_driver->max_buffer_size;

        /* Total size check: Allow for possible report index byte */
        if (report->size > (max_buffer_size - 1) << 3) {
                hid_err(parser->device, "report is too long\n");
                return -1;
        }

        if (!parser->local.usage_index) /* Ignore padding fields */
                return 0;

        usages = max_t(unsigned, parser->local.usage_index,
                                 parser->global.report_count);

        field = hid_register_field(report, usages);
        if (!field)
                return 0;

        field->physical = hid_lookup_collection(parser, HID_COLLECTION_PHYSICAL);
        field->logical = hid_lookup_collection(parser, HID_COLLECTION_LOGICAL);
        field->application = application;

        for (i = 0; i < usages; i++) {
                unsigned j = i;
                /* Duplicate the last usage we parsed if we have excess values */
                if (i >= parser->local.usage_index)
                        j = parser->local.usage_index - 1;
                field->usage[i].hid = parser->local.usage[j];
                field->usage[i].collection_index =
                        parser->local.collection_index[j];
                field->usage[i].usage_index = i;
                field->usage[i].resolution_multiplier = 1;
        }

        field->maxusage = usages;
        field->flags = flags;
        field->report_offset = offset;
        field->report_type = report_type;
        field->report_size = parser->global.report_size;
        field->report_count = parser->global.report_count;
        field->logical_minimum = parser->global.logical_minimum;
        field->logical_maximum = parser->global.logical_maximum;
        field->physical_minimum = parser->global.physical_minimum;
        field->physical_maximum = parser->global.physical_maximum;
        field->unit_exponent = parser->global.unit_exponent;
        field->unit = parser->global.unit;

        return 0;
}

/*
 * Read data value from item.
 */

static u32 item_udata(struct hid_item *item)
{
        switch (item->size) {
        case 1: return item->data.u8;
        case 2: return item->data.u16;
        case 4: return item->data.u32;
        }
        return 0;
}

static s32 item_sdata(struct hid_item *item)
{
        switch (item->size) {
        case 1: return item->data.s8;
        case 2: return item->data.s16;
        case 4: return item->data.s32;
        }
        return 0;
}

/*
 * Process a global item.
 */

static int hid_parser_global(struct hid_parser *parser, struct hid_item *item)
{
        __s32 raw_value;
        switch (item->tag) {
        case HID_GLOBAL_ITEM_TAG_PUSH:

                if (parser->global_stack_ptr == HID_GLOBAL_STACK_SIZE) {
                        hid_err(parser->device, "global environment stack overflow\n");
                        return -1;
                }

                memcpy(parser->global_stack + parser->global_stack_ptr++,
                        &parser->global, sizeof(struct hid_global));
                return 0;

        case HID_GLOBAL_ITEM_TAG_POP:

                if (!parser->global_stack_ptr) {
                        hid_err(parser->device, "global environment stack underflow\n");
                        return -1;
                }

                memcpy(&parser->global, parser->global_stack +
                        --parser->global_stack_ptr, sizeof(struct hid_global));
                return 0;

        case HID_GLOBAL_ITEM_TAG_USAGE_PAGE:
                parser->global.usage_page = item_udata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_LOGICAL_MINIMUM:
                parser->global.logical_minimum = item_sdata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_LOGICAL_MAXIMUM:
                if (parser->global.logical_minimum < 0)
                        parser->global.logical_maximum = item_sdata(item);
                else
                        parser->global.logical_maximum = item_udata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_PHYSICAL_MINIMUM:
                parser->global.physical_minimum = item_sdata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_PHYSICAL_MAXIMUM:
                if (parser->global.physical_minimum < 0)
                        parser->global.physical_maximum = item_sdata(item);
                else
                        parser->global.physical_maximum = item_udata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_UNIT_EXPONENT:
                /* Many devices provide unit exponent as a two's complement
                 * nibble due to the common misunderstanding of HID
                 * specification 1.11, 6.2.2.7 Global Items. Attempt to handle
                 * both this and the standard encoding. */
                raw_value = item_sdata(item);
                if (!(raw_value & 0xfffffff0))
                        parser->global.unit_exponent = hid_snto32(raw_value, 4);
                else
                        parser->global.unit_exponent = raw_value;
                return 0;

        case HID_GLOBAL_ITEM_TAG_UNIT:
                parser->global.unit = item_udata(item);
                return 0;

        case HID_GLOBAL_ITEM_TAG_REPORT_SIZE:
                parser->global.report_size = item_udata(item);
                if (parser->global.report_size > 256) {
                        hid_err(parser->device, "invalid report_size %d\n",
                                        parser->global.report_size);
                        return -1;
                }
                return 0;

        case HID_GLOBAL_ITEM_TAG_REPORT_COUNT:
                parser->global.report_count = item_udata(item);
                if (parser->global.report_count > HID_MAX_USAGES) {
                        hid_err(parser->device, "invalid report_count %d\n",
                                        parser->global.report_count);
                        return -1;
                }
                return 0;

        case HID_GLOBAL_ITEM_TAG_REPORT_ID:
                parser->global.report_id = item_udata(item);
                if (parser->global.report_id == 0 ||
                    parser->global.report_id >= HID_MAX_IDS) {
                        hid_err(parser->device, "report_id %u is invalid\n",
                                parser->global.report_id);
                        return -1;
                }
                return 0;

        default:
                hid_err(parser->device, "unknown global tag 0x%x\n", item->tag);
                return -1;
        }
}

/*
 * Process a local item.
 */

static int hid_parser_local(struct hid_parser *parser, struct hid_item *item)
{
        __u32 data;
        unsigned n;
        __u32 count;

        data = item_udata(item);

        switch (item->tag) {
        case HID_LOCAL_ITEM_TAG_DELIMITER:

                if (data) {
                        /*
                         * We treat items before the first delimiter
                         * as global to all usage sets (branch 0).
                         * In the moment we process only these global
                         * items and the first delimiter set.
                         */
                        if (parser->local.delimiter_depth != 0) {
                                hid_err(parser->device, "nested delimiters\n");
                                return -1;
                        }
                        parser->local.delimiter_depth++;
                        parser->local.delimiter_branch++;
                } else {
                        if (parser->local.delimiter_depth < 1) {
                                hid_err(parser->device, "bogus close delimiter\n");
                                return -1;
                        }
                        parser->local.delimiter_depth--;
                }
                return 0;

        case HID_LOCAL_ITEM_TAG_USAGE:

                if (parser->local.delimiter_branch > 1) {
                        dbg_hid("alternative usage ignored\n");
                        return 0;
                }

                return hid_add_usage(parser, data, item->size);

        case HID_LOCAL_ITEM_TAG_USAGE_MINIMUM:

                if (parser->local.delimiter_branch > 1) {
                        dbg_hid("alternative usage ignored\n");
                        return 0;
                }

                parser->local.usage_minimum = data;
                return 0;

        case HID_LOCAL_ITEM_TAG_USAGE_MAXIMUM:

                if (parser->local.delimiter_branch > 1) {
                        dbg_hid("alternative usage ignored\n");
                        return 0;
                }

                count = data - parser->local.usage_minimum;
                if (count + parser->local.usage_index >= HID_MAX_USAGES) {
                        /*
                         * We do not warn if the name is not set, we are
                         * actually pre-scanning the device.
                         */
                        if (dev_name(&parser->device->dev))
                                hid_warn(parser->device,
                                         "ignoring exceeding usage max\n");
                        data = HID_MAX_USAGES - parser->local.usage_index +
                                parser->local.usage_minimum - 1;
                        if (data <= 0) {
                                hid_err(parser->device,
                                        "no more usage index available\n");
                                return -1;
                        }
                }

                for (n = parser->local.usage_minimum; n <= data; n++)
                        if (hid_add_usage(parser, n, item->size)) {
                                dbg_hid("hid_add_usage failed\n");
                                return -1;
                        }
                return 0;

        default:

                dbg_hid("unknown local item tag 0x%x\n", item->tag);
                return 0;
        }
        return 0;
}

/*
 * Concatenate Usage Pages into Usages where relevant:
 * As per specification, 6.2.2.8: "When the parser encounters a main item it
 * concatenates the last declared Usage Page with a Usage to form a complete
 * usage value."
 */

static void hid_concatenate_last_usage_page(struct hid_parser *parser)
{
        int i;
        unsigned int usage_page;
        unsigned int current_page;

        if (!parser->local.usage_index)
                return;

        usage_page = parser->global.usage_page;

        /*
         * Concatenate usage page again only if last declared Usage Page
         * has not been already used in previous usages concatenation
         */
        for (i = parser->local.usage_index - 1; i >= 0; i--) {
                if (parser->local.usage_size[i] > 2)
                        /* Ignore extended usages */
                        continue;

                current_page = parser->local.usage[i] >> 16;
                if (current_page == usage_page)
                        break;

                complete_usage(parser, i);
        }
}

/*
 * Process a main item.
 */

static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
{
        __u32 data;
        int ret;

        hid_concatenate_last_usage_page(parser);

        data = item_udata(item);

        switch (item->tag) {
        case HID_MAIN_ITEM_TAG_BEGIN_COLLECTION:
                ret = open_collection(parser, data & 0xff);
                break;
        case HID_MAIN_ITEM_TAG_END_COLLECTION:
                ret = close_collection(parser);
                break;
        case HID_MAIN_ITEM_TAG_INPUT:
                ret = hid_add_field(parser, HID_INPUT_REPORT, data);
                break;
        case HID_MAIN_ITEM_TAG_OUTPUT:
                ret = hid_add_field(parser, HID_OUTPUT_REPORT, data);
                break;
        case HID_MAIN_ITEM_TAG_FEATURE:
                ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
                break;
        default:
                hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
                ret = 0;
        }

        memset(&parser->local, 0, sizeof(parser->local));        /* Reset the local parser environment */

        return ret;
}

/*
 * Process a reserved item.
 */

static int hid_parser_reserved(struct hid_parser *parser, struct hid_item *item)
{
        dbg_hid("reserved item type, tag 0x%x\n", item->tag);
        return 0;
}

/*
 * Free a report and all registered fields. The field->usage and
 * field->value table's are allocated behind the field, so we need
 * only to free(field) itself.
 */

static void hid_free_report(struct hid_report *report)
{
        unsigned n;

        kfree(report->field_entries);

        for (n = 0; n < report->maxfield; n++)
                kfree(report->field[n]);
        kfree(report);
}

/*
 * Close report. This function returns the device
 * state to the point prior to hid_open_report().
 */
static void hid_close_report(struct hid_device *device)
{
        unsigned i, j;

        for (i = 0; i < HID_REPORT_TYPES; i++) {
                struct hid_report_enum *report_enum = device->report_enum + i;

                for (j = 0; j < HID_MAX_IDS; j++) {
                        struct hid_report *report = report_enum->report_id_hash[j];
                        if (report)
                                hid_free_report(report);
                }
                memset(report_enum, 0, sizeof(*report_enum));
                INIT_LIST_HEAD(&report_enum->report_list);
        }

        kfree(device->rdesc);
        device->rdesc = NULL;
        device->rsize = 0;

        kfree(device->collection);
        device->collection = NULL;
        device->collection_size = 0;
        device->maxcollection = 0;
        device->maxapplication = 0;

        device->status &= ~HID_STAT_PARSED;
}

/*
 * Free a device structure, all reports, and all fields.
 */

void hiddev_free(struct kref *ref)
{
        struct hid_device *hid = container_of(ref, struct hid_device, ref);

        hid_close_report(hid);
        kfree(hid->dev_rdesc);
        kfree(hid);
}

static void hid_device_release(struct device *dev)
{
        struct hid_device *hid = to_hid_device(dev);

        kref_put(&hid->ref, hiddev_free);
}

/*
 * Fetch a report description item from the data stream. We support long
 * items, though they are not used yet.
 */

static u8 *fetch_item(__u8 *start, __u8 *end, struct hid_item *item)
{
        u8 b;

        if ((end - start) <= 0)
                return NULL;

        b = *start++;

        item->type = (b >> 2) & 3;
        item->tag  = (b >> 4) & 15;

        if (item->tag == HID_ITEM_TAG_LONG) {

                item->format = HID_ITEM_FORMAT_LONG;

                if ((end - start) < 2)
                        return NULL;

                item->size = *start++;
                item->tag  = *start++;

                if ((end - start) < item->size)
                        return NULL;

                item->data.longdata = start;
                start += item->size;
                return start;
        }

        item->format = HID_ITEM_FORMAT_SHORT;
        item->size = b & 3;

        switch (item->size) {
        case 0:
                return start;

        case 1:
                if ((end - start) < 1)
                        return NULL;
                item->data.u8 = *start++;
                return start;

        case 2:
                if ((end - start) < 2)
                        return NULL;
                item->data.u16 = get_unaligned_le16(start);
                start = (__u8 *)((__le16 *)start + 1);
                return start;

        case 3:
                item->size++;
                if ((end - start) < 4)
                        return NULL;
                item->data.u32 = get_unaligned_le32(start);
                start = (__u8 *)((__le32 *)start + 1);
                return start;
        }

        return NULL;
}

static void hid_scan_input_usage(struct hid_parser *parser, u32 usage)
{
        struct hid_device *hid = parser->device;

        if (usage == HID_DG_CONTACTID)
                hid->group = HID_GROUP_MULTITOUCH;
}

static void hid_scan_feature_usage(struct hid_parser *parser, u32 usage)
{
        if (usage == 0xff0000c5 && parser->global.report_count == 256 &&
            parser->global.report_size == 8)
                parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8;

        if (usage == 0xff0000c6 && parser->global.report_count == 1 &&
            parser->global.report_size == 8)
                parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8;
}

static void hid_scan_collection(struct hid_parser *parser, unsigned type)
{
        struct hid_device *hid = parser->device;
        int i;

        if (((parser->global.usage_page << 16) == HID_UP_SENSOR) &&
            (type == HID_COLLECTION_PHYSICAL ||
             type == HID_COLLECTION_APPLICATION))
                hid->group = HID_GROUP_SENSOR_HUB;

        if (hid->vendor == USB_VENDOR_ID_MICROSOFT &&
            hid->product == USB_DEVICE_ID_MS_POWER_COVER &&
            hid->group == HID_GROUP_MULTITOUCH)
                hid->group = HID_GROUP_GENERIC;

        if ((parser->global.usage_page << 16) == HID_UP_GENDESK)
                for (i = 0; i < parser->local.usage_index; i++)
                        if (parser->local.usage[i] == HID_GD_POINTER)
                                parser->scan_flags |= HID_SCAN_FLAG_GD_POINTER;

        if ((parser->global.usage_page << 16) >= HID_UP_MSVENDOR)
                parser->scan_flags |= HID_SCAN_FLAG_VENDOR_SPECIFIC;

        if ((parser->global.usage_page << 16) == HID_UP_GOOGLEVENDOR)
                for (i = 0; i < parser->local.usage_index; i++)
                        if (parser->local.usage[i] ==
                                        (HID_UP_GOOGLEVENDOR | 0x0001))
                                parser->device->group =
                                        HID_GROUP_VIVALDI;
}

static int hid_scan_main(struct hid_parser *parser, struct hid_item *item)
{
        __u32 data;
        int i;

        hid_concatenate_last_usage_page(parser);

        data = item_udata(item);

        switch (item->tag) {
        case HID_MAIN_ITEM_TAG_BEGIN_COLLECTION:
                hid_scan_collection(parser, data & 0xff);
                break;
        case HID_MAIN_ITEM_TAG_END_COLLECTION:
                break;
        case HID_MAIN_ITEM_TAG_INPUT:
                /* ignore constant inputs, they will be ignored by hid-input */
                if (data & HID_MAIN_ITEM_CONSTANT)
                        break;
                for (i = 0; i < parser->local.usage_index; i++)
                        hid_scan_input_usage(parser, parser->local.usage[i]);
                break;
        case HID_MAIN_ITEM_TAG_OUTPUT:
                break;
        case HID_MAIN_ITEM_TAG_FEATURE:
                for (i = 0; i < parser->local.usage_index; i++)
                        hid_scan_feature_usage(parser, parser->local.usage[i]);
                break;
        }

        /* Reset the local parser environment */
        memset(&parser->local, 0, sizeof(parser->local));

        return 0;
}

/*
 * Scan a report descriptor before the device is added to the bus.
 * Sets device groups and other properties that determine what driver
 * to load.
 */
static int hid_scan_report(struct hid_device *hid)
{
        struct hid_parser *parser;
        struct hid_item item;
        __u8 *start = hid->dev_rdesc;
        __u8 *end = start + hid->dev_rsize;
        static int (*dispatch_type[])(struct hid_parser *parser,
                                      struct hid_item *item) = {
                hid_scan_main,
                hid_parser_global,
                hid_parser_local,
                hid_parser_reserved
        };

        parser = vzalloc(sizeof(struct hid_parser));
        if (!parser)
                return -ENOMEM;

        parser->device = hid;
        hid->group = HID_GROUP_GENERIC;

        /*
         * The parsing is simpler than the one in hid_open_report() as we should
         * be robust against hid errors. Those errors will be raised by
         * hid_open_report() anyway.
         */
        while ((start = fetch_item(start, end, &item)) != NULL)
                dispatch_type[item.type](parser, &item);

        /*
         * Handle special flags set during scanning.
         */
        if ((parser->scan_flags & HID_SCAN_FLAG_MT_WIN_8) &&
            (hid->group == HID_GROUP_MULTITOUCH))
                hid->group = HID_GROUP_MULTITOUCH_WIN_8;

        /*
         * Vendor specific handlings
         */
        switch (hid->vendor) {
        case USB_VENDOR_ID_WACOM:
                hid->group = HID_GROUP_WACOM;
                break;
        case USB_VENDOR_ID_SYNAPTICS:
                if (hid->group == HID_GROUP_GENERIC)
                        if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC)
                            && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER))
                                /*
                                 * hid-rmi should take care of them,
                                 * not hid-generic
                                 */
                                hid->group = HID_GROUP_RMI;
                break;
        }

        kfree(parser->collection_stack);
        vfree(parser);
        return 0;
}

/**
 * hid_parse_report - parse device report
 *
 * @hid: hid device
 * @start: report start
 * @size: report size
 *
 * Allocate the device report as read by the bus driver. This function should
 * only be called from parse() in ll drivers.
 */
int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size)
{
        hid->dev_rdesc = kmemdup(start, size, GFP_KERNEL);
        if (!hid->dev_rdesc)
                return -ENOMEM;
        hid->dev_rsize = size;
        return 0;
}
EXPORT_SYMBOL_GPL(hid_parse_report);

static const char * const hid_report_names[] = {
        "HID_INPUT_REPORT",
        "HID_OUTPUT_REPORT",
        "HID_FEATURE_REPORT",
};
/**
 * hid_validate_values - validate existing device report's value indexes
 *
 * @hid: hid device
 * @type: which report type to examine
 * @id: which report ID to examine (0 for first)
 * @field_index: which report field to examine
 * @report_counts: expected number of values
 *
 * Validate the number of values in a given field of a given report, after
 * parsing.
 */
struct hid_report *hid_validate_values(struct hid_device *hid,
                                       enum hid_report_type type, unsigned int id,
                                       unsigned int field_index,
                                       unsigned int report_counts)
{
        struct hid_report *report;

        if (type > HID_FEATURE_REPORT) {
                hid_err(hid, "invalid HID report type %u\n", type);
                return NULL;
        }

        if (id >= HID_MAX_IDS) {
                hid_err(hid, "invalid HID report id %u\n", id);
                return NULL;
        }

        /*
         * Explicitly not using hid_get_report() here since it depends on
         * ->numbered being checked, which may not always be the case when
         * drivers go to access report values.
         */
        if (id == 0) {
                /*
                 * Validating on id 0 means we should examine the first
                 * report in the list.
                 */
                report = list_first_entry_or_null(
                                &hid->report_enum[type].report_list,
                                struct hid_report, list);
        } else {
                report = hid->report_enum[type].report_id_hash[id];
        }
        if (!report) {
                hid_err(hid, "missing %s %u\n", hid_report_names[type], id);
                return NULL;
        }
        if (report->maxfield <= field_index) {
                hid_err(hid, "not enough fields in %s %u\n",
                        hid_report_names[type], id);
                return NULL;
        }
        if (report->field[field_index]->report_count < report_counts) {
                hid_err(hid, "not enough values in %s %u field %u\n",
                        hid_report_names[type], id, field_index);
                return NULL;
        }
        return report;
}
EXPORT_SYMBOL_GPL(hid_validate_values);

static int hid_calculate_multiplier(struct hid_device *hid,
                                     struct hid_field *multiplier)
{
        int m;
        __s32 v = *multiplier->value;
        __s32 lmin = multiplier->logical_minimum;
        __s32 lmax = multiplier->logical_maximum;
        __s32 pmin = multiplier->physical_minimum;
        __s32 pmax = multiplier->physical_maximum;

        /*
         * "Because OS implementations will generally divide the control's
         * reported count by the Effective Resolution Multiplier, designers
         * should take care not to establish a potential Effective
         * Resolution Multiplier of zero."
         * HID Usage Table, v1.12, Section 4.3.1, p31
         */
        if (lmax - lmin == 0)
                return 1;
        /*
         * Handling the unit exponent is left as an exercise to whoever
         * finds a device where that exponent is not 0.
         */
        m = ((v - lmin)/(lmax - lmin) * (pmax - pmin) + pmin);
        if (unlikely(multiplier->unit_exponent != 0)) {
                hid_warn(hid,
                         "unsupported Resolution Multiplier unit exponent %d\n",
                         multiplier->unit_exponent);
        }

        /* There are no devices with an effective multiplier > 255 */
        if (unlikely(m == 0 || m > 255 || m < -255)) {
                hid_warn(hid, "unsupported Resolution Multiplier %d\n", m);
                m = 1;
        }

        return m;
}

static void hid_apply_multiplier_to_field(struct hid_device *hid,
                                          struct hid_field *field,
                                          struct hid_collection *multiplier_collection,
                                          int effective_multiplier)
{
        struct hid_collection *collection;
        struct hid_usage *usage;
        int i;

        /*
         * If multiplier_collection is NULL, the multiplier applies
         * to all fields in the report.
         * Otherwise, it is the Logical Collection the multiplier applies to
         * but our field may be in a subcollection of that collection.
         */
        for (i = 0; i < field->maxusage; i++) {
                usage = &field->usage[i];

                collection = &hid->collection[usage->collection_index];
                while (collection->parent_idx != -1 &&
                       collection != multiplier_collection)
                        collection = &hid->collection[collection->parent_idx];

                if (collection->parent_idx != -1 ||
                    multiplier_collection == NULL)
                        usage->resolution_multiplier = effective_multiplier;

        }
}

static void hid_apply_multiplier(struct hid_device *hid,
                                 struct hid_field *multiplier)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        struct hid_field *field;
        struct hid_collection *multiplier_collection;
        int effective_multiplier;
        int i;

        /*
         * "The Resolution Multiplier control must be contained in the same
         * Logical Collection as the control(s) to which it is to be applied.
         * If no Resolution Multiplier is defined, then the Resolution
         * Multiplier defaults to 1.  If more than one control exists in a
         * Logical Collection, the Resolution Multiplier is associated with
         * all controls in the collection. If no Logical Collection is
         * defined, the Resolution Multiplier is associated with all
         * controls in the report."
         * HID Usage Table, v1.12, Section 4.3.1, p30
         *
         * Thus, search from the current collection upwards until we find a
         * logical collection. Then search all fields for that same parent
         * collection. Those are the fields the multiplier applies to.
         *
         * If we have more than one multiplier, it will overwrite the
         * applicable fields later.
         */
        multiplier_collection = &hid->collection[multiplier->usage->collection_index];
        while (multiplier_collection->parent_idx != -1 &&
               multiplier_collection->type != HID_COLLECTION_LOGICAL)
                multiplier_collection = &hid->collection[multiplier_collection->parent_idx];

        effective_multiplier = hid_calculate_multiplier(hid, multiplier);

        rep_enum = &hid->report_enum[HID_INPUT_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                for (i = 0; i < rep->maxfield; i++) {
                        field = rep->field[i];
                        hid_apply_multiplier_to_field(hid, field,
                                                      multiplier_collection,
                                                      effective_multiplier);
                }
        }
}

/*
 * hid_setup_resolution_multiplier - set up all resolution multipliers
 *
 * @device: hid device
 *
 * Search for all Resolution Multiplier Feature Reports and apply their
 * value to all matching Input items. This only updates the internal struct
 * fields.
 *
 * The Resolution Multiplier is applied by the hardware. If the multiplier
 * is anything other than 1, the hardware will send pre-multiplied events
 * so that the same physical interaction generates an accumulated
 *        accumulated_value = value * * multiplier
 * This may be achieved by sending
 * - "value * multiplier" for each event, or
 * - "value" but "multiplier" times as frequently, or
 * - a combination of the above
 * The only guarantee is that the same physical interaction always generates
 * an accumulated 'value * multiplier'.
 *
 * This function must be called before any event processing and after
 * any SetRequest to the Resolution Multiplier.
 */
void hid_setup_resolution_multiplier(struct hid_device *hid)
{
        struct hid_report_enum *rep_enum;
        struct hid_report *rep;
        struct hid_usage *usage;
        int i, j;

        rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list) {
                for (i = 0; i < rep->maxfield; i++) {
                        /* Ignore if report count is out of bounds. */
                        if (rep->field[i]->report_count < 1)
                                continue;

                        for (j = 0; j < rep->field[i]->maxusage; j++) {
                                usage = &rep->field[i]->usage[j];
                                if (usage->hid == HID_GD_RESOLUTION_MULTIPLIER)
                                        hid_apply_multiplier(hid,
                                                             rep->field[i]);
                        }
                }
        }
}
EXPORT_SYMBOL_GPL(hid_setup_resolution_multiplier);

/**
 * hid_open_report - open a driver-specific device report
 *
 * @device: hid device
 *
 * Parse a report description into a hid_device structure. Reports are
 * enumerated, fields are attached to these reports.
 * 0 returned on success, otherwise nonzero error value.
 *
 * This function (or the equivalent hid_parse() macro) should only be
 * called from probe() in drivers, before starting the device.
 */
int hid_open_report(struct hid_device *device)
{
        struct hid_parser *parser;
        struct hid_item item;
        unsigned int size;
        __u8 *start;
        __u8 *buf;
        __u8 *end;
        __u8 *next;
        int ret;
        int i;
        static int (*dispatch_type[])(struct hid_parser *parser,
                                      struct hid_item *item) = {
                hid_parser_main,
                hid_parser_global,
                hid_parser_local,
                hid_parser_reserved
        };

        if (WARN_ON(device->status & HID_STAT_PARSED))
                return -EBUSY;

        start = device->dev_rdesc;
        if (WARN_ON(!start))
                return -ENODEV;
        size = device->dev_rsize;

        /* call_hid_bpf_rdesc_fixup() ensures we work on a copy of rdesc */
        buf = call_hid_bpf_rdesc_fixup(device, start, &size);
        if (buf == NULL)
                return -ENOMEM;

        if (device->driver->report_fixup)
                start = device->driver->report_fixup(device, buf, &size);
        else
                start = buf;

        start = kmemdup(start, size, GFP_KERNEL);
        kfree(buf);
        if (start == NULL)
                return -ENOMEM;

        device->rdesc = start;
        device->rsize = size;

        parser = vzalloc(sizeof(struct hid_parser));
        if (!parser) {
                ret = -ENOMEM;
                goto alloc_err;
        }

        parser->device = device;

        end = start + size;

        device->collection = kcalloc(HID_DEFAULT_NUM_COLLECTIONS,
                                     sizeof(struct hid_collection), GFP_KERNEL);
        if (!device->collection) {
                ret = -ENOMEM;
                goto err;
        }
        device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
        for (i = 0; i < HID_DEFAULT_NUM_COLLECTIONS; i++)
                device->collection[i].parent_idx = -1;

        ret = -EINVAL;
        while ((next = fetch_item(start, end, &item)) != NULL) {
                start = next;

                if (item.format != HID_ITEM_FORMAT_SHORT) {
                        hid_err(device, "unexpected long global item\n");
                        goto err;
                }

                if (dispatch_type[item.type](parser, &item)) {
                        hid_err(device, "item %u %u %u %u parsing failed\n",
                                item.format, (unsigned)item.size,
                                (unsigned)item.type, (unsigned)item.tag);
                        goto err;
                }

                if (start == end) {
                        if (parser->collection_stack_ptr) {
                                hid_err(device, "unbalanced collection at end of report description\n");
                                goto err;
                        }
                        if (parser->local.delimiter_depth) {
                                hid_err(device, "unbalanced delimiter at end of report description\n");
                                goto err;
                        }

                        /*
                         * fetch initial values in case the device's
                         * default multiplier isn't the recommended 1
                         */
                        hid_setup_resolution_multiplier(device);

                        kfree(parser->collection_stack);
                        vfree(parser);
                        device->status |= HID_STAT_PARSED;

                        return 0;
                }
        }

        hid_err(device, "item fetching failed at offset %u/%u\n",
                size - (unsigned int)(end - start), size);
err:
        kfree(parser->collection_stack);
alloc_err:
        vfree(parser);
        hid_close_report(device);
        return ret;
}
EXPORT_SYMBOL_GPL(hid_open_report);

/*
 * Convert a signed n-bit integer to signed 32-bit integer. Common
 * cases are done through the compiler, the screwed things has to be
 * done by hand.
 */

static s32 snto32(__u32 value, unsigned n)
{
        if (!value || !n)
                return 0;

        if (n > 32)
                n = 32;

        switch (n) {
        case 8:  return ((__s8)value);
        case 16: return ((__s16)value);
        case 32: return ((__s32)value);
        }
        return value & (1 << (n - 1)) ? value | (~0U << n) : value;
}

s32 hid_snto32(__u32 value, unsigned n)
{
        return snto32(value, n);
}
EXPORT_SYMBOL_GPL(hid_snto32);

/*
 * Convert a signed 32-bit integer to a signed n-bit integer.
 */

static u32 s32ton(__s32 value, unsigned n)
{
        s32 a = value >> (n - 1);
        if (a && a != -1)
                return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1;
        return value & ((1 << n) - 1);
}

/*
 * Extract/implement a data field from/to a little endian report (bit array).
 *
 * Code sort-of follows HID spec:
 *     http://www.usb.org/developers/hidpage/HID1_11.pdf
 *
 * While the USB HID spec allows unlimited length bit fields in "report
 * descriptors", most devices never use more than 16 bits.
 * One model of UPS is claimed to report "LINEV" as a 32-bit field.
 * Search linux-kernel and linux-usb-devel archives for "hid-core extract".
 */

static u32 __extract(u8 *report, unsigned offset, int n)
{
        unsigned int idx = offset / 8;
        unsigned int bit_nr = 0;
        unsigned int bit_shift = offset % 8;
        int bits_to_copy = 8 - bit_shift;
        u32 value = 0;
        u32 mask = n < 32 ? (1U << n) - 1 : ~0U;

        while (n > 0) {
                value |= ((u32)report[idx] >> bit_shift) << bit_nr;
                n -= bits_to_copy;
                bit_nr += bits_to_copy;
                bits_to_copy = 8;
                bit_shift = 0;
                idx++;
        }

        return value & mask;
}

u32 hid_field_extract(const struct hid_device *hid, u8 *report,
                        unsigned offset, unsigned n)
{
        if (n > 32) {
                hid_warn_once(hid, "%s() called with n (%d) > 32! (%s)\n",
                              __func__, n, current->comm);
                n = 32;
        }

        return __extract(report, offset, n);
}
EXPORT_SYMBOL_GPL(hid_field_extract);

/*
 * "implement" : set bits in a little endian bit stream.
 * Same concepts as "extract" (see comments above).
 * The data mangled in the bit stream remains in little endian
 * order the whole time. It make more sense to talk about
 * endianness of register values by considering a register
 * a "cached" copy of the little endian bit stream.
 */

static void __implement(u8 *report, unsigned offset, int n, u32 value)
{
        unsigned int idx = offset / 8;
        unsigned int bit_shift = offset % 8;
        int bits_to_set = 8 - bit_shift;

        while (n - bits_to_set >= 0) {
                report[idx] &= ~(0xff << bit_shift);
                report[idx] |= value << bit_shift;
                value >>= bits_to_set;
                n -= bits_to_set;
                bits_to_set = 8;
                bit_shift = 0;
                idx++;
        }

        /* last nibble */
        if (n) {
                u8 bit_mask = ((1U << n) - 1);
                report[idx] &= ~(bit_mask << bit_shift);
                report[idx] |= value << bit_shift;
        }
}

static void implement(const struct hid_device *hid, u8 *report,
                      unsigned offset, unsigned n, u32 value)
{
        if (unlikely(n > 32)) {
                hid_warn(hid, "%s() called with n (%d) > 32! (%s)\n",
                         __func__, n, current->comm);
                n = 32;
        } else if (n < 32) {
                u32 m = (1U << n) - 1;

                if (unlikely(value > m)) {
                        hid_warn(hid,
                                 "%s() called with too large value %d (n: %d)! (%s)\n",
                                 __func__, value, n, current->comm);
                        WARN_ON(1);
                        value &= m;
                }
        }

        __implement(report, offset, n, value);
}

/*
 * Search an array for a value.
 */

static int search(__s32 *array, __s32 value, unsigned n)
{
        while (n--) {
                if (*array++ == value)
                        return 0;
        }
        return -1;
}

/**
 * hid_match_report - check if driver's raw_event should be called
 *
 * @hid: hid device
 * @report: hid report to match against
 *
 * compare hid->driver->report_table->report_type to report->type
 */
static int hid_match_report(struct hid_device *hid, struct hid_report *report)
{
        const struct hid_report_id *id = hid->driver->report_table;

        if (!id) /* NULL means all */
                return 1;

        for (; id->report_type != HID_TERMINATOR; id++)
                if (id->report_type == HID_ANY_ID ||
                                id->report_type == report->type)
                        return 1;
        return 0;
}

/**
 * hid_match_usage - check if driver's event should be called
 *
 * @hid: hid device
 * @usage: usage to match against
 *
 * compare hid->driver->usage_table->usage_{type,code} to
 * usage->usage_{type,code}
 */
static int hid_match_usage(struct hid_device *hid, struct hid_usage *usage)
{
        const struct hid_usage_id *id = hid->driver->usage_table;

        if (!id) /* NULL means all */
                return 1;

        for (; id->usage_type != HID_ANY_ID - 1; id++)
                if ((id->usage_hid == HID_ANY_ID ||
                                id->usage_hid == usage->hid) &&
                                (id->usage_type == HID_ANY_ID ||
                                id->usage_type == usage->type) &&
                                (id->usage_code == HID_ANY_ID ||
                                 id->usage_code == usage->code))
                        return 1;
        return 0;
}

static void hid_process_event(struct hid_device *hid, struct hid_field *field,
                struct hid_usage *usage, __s32 value, int interrupt)
{
        struct hid_driver *hdrv = hid->driver;
        int ret;

        if (!list_empty(&hid->debug_list))
                hid_dump_input(hid, usage, value);

        if (hdrv && hdrv->event && hid_match_usage(hid, usage)) {
                ret = hdrv->event(hid, field, usage, value);
                if (ret != 0) {
                        if (ret < 0)
                                hid_err(hid, "%s's event failed with %d\n",
                                                hdrv->name, ret);
                        return;
                }
        }

        if (hid->claimed & HID_CLAIMED_INPUT)
                hidinput_hid_event(hid, field, usage, value);
        if (hid->claimed & HID_CLAIMED_HIDDEV && interrupt && hid->hiddev_hid_event)
                hid->hiddev_hid_event(hid, field, usage, value);
}

/*
 * Checks if the given value is valid within this field
 */
static inline int hid_array_value_is_valid(struct hid_field *field,
                                           __s32 value)
{
        __s32 min = field->logical_minimum;

        /*
         * Value needs to be between logical min and max, and
         * (value - min) is used as an index in the usage array.
         * This array is of size field->maxusage
         */
        return value >= min &&
               value <= field->logical_maximum &&
               value - min < field->maxusage;
}

/*
 * Fetch the field from the data. The field content is stored for next
 * report processing (we do differential reporting to the layer).
 */
static void hid_input_fetch_field(struct hid_device *hid,
                                  struct hid_field *field,
                                  __u8 *data)
{
        unsigned n;
        unsigned count = field->report_count;
        unsigned offset = field->report_offset;
        unsigned size = field->report_size;
        __s32 min = field->logical_minimum;
        __s32 *value;

        value = field->new_value;
        memset(value, 0, count * sizeof(__s32));
        field->ignored = false;

        for (n = 0; n < count; n++) {

                value[n] = min < 0 ?
                        snto32(hid_field_extract(hid, data, offset + n * size,
                               size), size) :
                        hid_field_extract(hid, data, offset + n * size, size);

                /* Ignore report if ErrorRollOver */
                if (!(field->flags & HID_MAIN_ITEM_VARIABLE) &&
                    hid_array_value_is_valid(field, value[n]) &&
                    field->usage[value[n] - min].hid == HID_UP_KEYBOARD + 1) {
                        field->ignored = true;
                        return;
                }
        }
}

/*
 * Process a received variable field.
 */

static void hid_input_var_field(struct hid_device *hid,
                                struct hid_field *field,
                                int interrupt)
{
        unsigned int count = field->report_count;
        __s32 *value = field->new_value;
        unsigned int n;

        for (n = 0; n < count; n++)
                hid_process_event(hid,
                                  field,
                                  &field->usage[n],
                                  value[n],
                                  interrupt);

        memcpy(field->value, value, count * sizeof(__s32));
}

/*
 * Process a received array field. The field content is stored for
 * next report processing (we do differential reporting to the layer).
 */

static void hid_input_array_field(struct hid_device *hid,
                                  struct hid_field *field,
                                  int interrupt)
{
        unsigned int n;
        unsigned int count = field->report_count;
        __s32 min = field->logical_minimum;
        __s32 *value;

        value = field->new_value;

        /* ErrorRollOver */
        if (field->ignored)
                return;

        for (n = 0; n < count; n++) {
                if (hid_array_value_is_valid(field, field->value[n]) &&
                    search(value, field->value[n], count))
                        hid_process_event(hid,
                                          field,
                                          &field->usage[field->value[n] - min],
                                          0,
                                          interrupt);

                if (hid_array_value_is_valid(field, value[n]) &&
                    search(field->value, value[n], count))
                        hid_process_event(hid,
                                          field,
                                          &field->usage[value[n] - min],
                                          1,
                                          interrupt);
        }

        memcpy(field->value, value, count * sizeof(__s32));
}

/*
 * Analyse a received report, and fetch the data from it. The field
 * content is stored for next report processing (we do differential
 * reporting to the layer).
 */
static void hid_process_report(struct hid_device *hid,
                               struct hid_report *report,
                               __u8 *data,
                               int interrupt)
{
        unsigned int a;
        struct hid_field_entry *entry;
        struct hid_field *field;

        /* first retrieve all incoming values in data */
        for (a = 0; a < report->maxfield; a++)
                hid_input_fetch_field(hid, report->field[a], data);

        if (!list_empty(&report->field_entry_list)) {
                /* INPUT_REPORT, we have a priority list of fields */
                list_for_each_entry(entry,
                                    &report->field_entry_list,
                                    list) {
                        field = entry->field;

                        if (field->flags & HID_MAIN_ITEM_VARIABLE)
                                hid_process_event(hid,
                                                  field,
                                                  &field->usage[entry->index],
                                                  field->new_value[entry->index],
                                                  interrupt);
                        else
                                hid_input_array_field(hid, field, interrupt);
                }

                /* we need to do the memcpy at the end for var items */
                for (a = 0; a < report->maxfield; a++) {
                        field = report->field[a];

                        if (field->flags & HID_MAIN_ITEM_VARIABLE)
                                memcpy(field->value, field->new_value,
                                       field->report_count * sizeof(__s32));
                }
        } else {
                /* FEATURE_REPORT, regular processing */
                for (a = 0; a < report->maxfield; a++) {
                        field = report->field[a];

                        if (field->flags & HID_MAIN_ITEM_VARIABLE)
                                hid_input_var_field(hid, field, interrupt);
                        else
                                hid_input_array_field(hid, field, interrupt);
                }
        }
}

/*
 * Insert a given usage_index in a field in the list
 * of processed usages in the report.
 *
 * The elements of lower priority score are processed
 * first.
 */
static void __hid_insert_field_entry(struct hid_device *hid,
                                     struct hid_report *report,
                                     struct hid_field_entry *entry,
                                     struct hid_field *field,
                                     unsigned int usage_index)
{
        struct hid_field_entry *next;

        entry->field = field;
        entry->index = usage_index;
        entry->priority = field->usages_priorities[usage_index];

        /* insert the element at the correct position */
        list_for_each_entry(next,
                            &report->field_entry_list,
                            list) {
                /*
                 * the priority of our element is strictly higher
                 * than the next one, insert it before
                 */
                if (entry->priority > next->priority) {
                        list_add_tail(&entry->list, &next->list);
                        return;
                }
        }

        /* lowest priority score: insert at the end */
        list_add_tail(&entry->list, &report->field_entry_list);
}

static void hid_report_process_ordering(struct hid_device *hid,
                                        struct hid_report *report)
{
        struct hid_field *field;
        struct hid_field_entry *entries;
        unsigned int a, u, usages;
        unsigned int count = 0;

        /* count the number of individual fields in the report */
        for (a = 0; a < report->maxfield; a++) {
                field = report->field[a];

                if (field->flags & HID_MAIN_ITEM_VARIABLE)
                        count += field->report_count;
                else
                        count++;
        }

        /* allocate the memory to process the fields */
        entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
        if (!entries)
                return;

        report->field_entries = entries;

        /*
         * walk through all fields in the report and
         * store them by priority order in report->field_entry_list
         *
         * - Var elements are individualized (field + usage_index)
         * - Arrays are taken as one, we can not chose an order for them
         */
        usages = 0;
        for (a = 0; a < report->maxfield; a++) {
                field = report->field[a];

                if (field->flags & HID_MAIN_ITEM_VARIABLE) {
                        for (u = 0; u < field->report_count; u++) {
                                __hid_insert_field_entry(hid, report,
                                                         &entries[usages],
                                                         field, u);
                                usages++;
                        }
                } else {
                        __hid_insert_field_entry(hid, report, &entries[usages],
                                                 field, 0);
                        usages++;
                }
        }
}

static void hid_process_ordering(struct hid_device *hid)
{
        struct hid_report *report;
        struct hid_report_enum *report_enum = &hid->report_enum[HID_INPUT_REPORT];

        list_for_each_entry(report, &report_enum->report_list, list)
                hid_report_process_ordering(hid, report);
}

/*
 * Output the field into the report.
 */

static void hid_output_field(const struct hid_device *hid,
                             struct hid_field *field, __u8 *data)
{
        unsigned count = field->report_count;
        unsigned offset = field->report_offset;
        unsigned size = field->report_size;
        unsigned n;

        for (n = 0; n < count; n++) {
                if (field->logical_minimum < 0)        /* signed values */
                        implement(hid, data, offset + n * size, size,
                                  s32ton(field->value[n], size));
                else                                /* unsigned values */
                        implement(hid, data, offset + n * size, size,
                                  field->value[n]);
        }
}

/*
 * Compute the size of a report.
 */
static size_t hid_compute_report_size(struct hid_report *report)
{
        if (report->size)
                return ((report->size - 1) >> 3) + 1;

        return 0;
}

/*
 * Create a report. 'data' has to be allocated using
 * hid_alloc_report_buf() so that it has proper size.
 */

void hid_output_report(struct hid_report *report, __u8 *data)
{
        unsigned n;

        if (report->id > 0)
                *data++ = report->id;

        memset(data, 0, hid_compute_report_size(report));
        for (n = 0; n < report->maxfield; n++)
                hid_output_field(report->device, report->field[n], data);
}
EXPORT_SYMBOL_GPL(hid_output_report);

/*
 * Allocator for buffer that is going to be passed to hid_output_report()
 */
u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags)
{
        /*
         * 7 extra bytes are necessary to achieve proper functionality
         * of implement() working on 8 byte chunks
         */

        u32 len = hid_report_len(report) + 7;

        return kmalloc(len, flags);
}
EXPORT_SYMBOL_GPL(hid_alloc_report_buf);

/*
 * Set a field value. The report this field belongs to has to be
 * created and transferred to the device, to set this value in the
 * device.
 */

int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
{
        unsigned size;

        if (!field)
                return -1;

        size = field->report_size;

        hid_dump_input(field->report->device, field->usage + offset, value);

        if (offset >= field->report_count) {
                hid_err(field->report->device, "offset (%d) exceeds report_count (%d)\n",
                                offset, field->report_count);
                return -1;
        }
        if (field->logical_minimum < 0) {
                if (value != snto32(s32ton(value, size), size)) {
                        hid_err(field->report->device, "value %d is out of range\n", value);
                        return -1;
                }
        }
        field->value[offset] = value;
        return 0;
}
EXPORT_SYMBOL_GPL(hid_set_field);

static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
                const u8 *data)
{
        struct hid_report *report;
        unsigned int n = 0;        /* Normally report number is 0 */

        /* Device uses numbered reports, data[0] is report number */
        if (report_enum->numbered)
                n = *data;

        report = report_enum->report_id_hash[n];
        if (report == NULL)
                dbg_hid("undefined report_id %u received\n", n);

        return report;
}

/*
 * Implement a generic .request() callback, using .raw_request()
 * DO NOT USE in hid drivers directly, but through hid_hw_request instead.
 */
int __hid_request(struct hid_device *hid, struct hid_report *report,
                enum hid_class_request reqtype)
{
        char *buf;
        int ret;
        u32 len;

        buf = hid_alloc_report_buf(report, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        len = hid_report_len(report);

        if (reqtype == HID_REQ_SET_REPORT)
                hid_output_report(report, buf);

        ret = hid->ll_driver->raw_request(hid, report->id, buf, len,
                                          report->type, reqtype);
        if (ret < 0) {
                dbg_hid("unable to complete request: %d\n", ret);
                goto out;
        }

        if (reqtype == HID_REQ_GET_REPORT)
                hid_input_report(hid, report->type, buf, ret, 0);

        ret = 0;

out:
        kfree(buf);
        return ret;
}
EXPORT_SYMBOL_GPL(__hid_request);

int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
                         int interrupt)
{
        struct hid_report_enum *report_enum = hid->report_enum + type;
        struct hid_report *report;
        struct hid_driver *hdrv;
        int max_buffer_size = HID_MAX_BUFFER_SIZE;
        u32 rsize, csize = size;
        u8 *cdata = data;
        int ret = 0;

        report = hid_get_report(report_enum, data);
        if (!report)
                goto out;

        if (report_enum->numbered) {
                cdata++;
                csize--;
        }

        rsize = hid_compute_report_size(report);

        if (hid->ll_driver->max_buffer_size)
                max_buffer_size = hid->ll_driver->max_buffer_size;

        if (report_enum->numbered && rsize >= max_buffer_size)
                rsize = max_buffer_size - 1;
        else if (rsize > max_buffer_size)
                rsize = max_buffer_size;

        if (csize < rsize) {
                dbg_hid("report %d is too short, (%d < %d)\n", report->id,
                                csize, rsize);
                memset(cdata + csize, 0, rsize - csize);
        }

        if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event)
                hid->hiddev_report_event(hid, report);
        if (hid->claimed & HID_CLAIMED_HIDRAW) {
                ret = hidraw_report_event(hid, data, size);
                if (ret)
                        goto out;
        }

        if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) {
                hid_process_report(hid, report, cdata, interrupt);
                hdrv = hid->driver;
                if (hdrv && hdrv->report)
                        hdrv->report(hid, report);
        }

        if (hid->claimed & HID_CLAIMED_INPUT)
                hidinput_report_event(hid, report);
out:
        return ret;
}
EXPORT_SYMBOL_GPL(hid_report_raw_event);

/**
 * hid_input_report - report data from lower layer (usb, bt...)
 *
 * @hid: hid device
 * @type: HID report type (HID_*_REPORT)
 * @data: report contents
 * @size: size of data parameter
 * @interrupt: distinguish between interrupt and control transfers
 *
 * This is data entry for lower layers.
 */
int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
                     int interrupt)
{
        struct hid_report_enum *report_enum;
        struct hid_driver *hdrv;
        struct hid_report *report;
        int ret = 0;

        if (!hid)
                return -ENODEV;

        if (down_trylock(&hid->driver_input_lock))
                return -EBUSY;

        if (!hid->driver) {
                ret = -ENODEV;
                goto unlock;
        }
        report_enum = hid->report_enum + type;
        hdrv = hid->driver;

        data = dispatch_hid_bpf_device_event(hid, type, data, &size, interrupt);
        if (IS_ERR(data)) {
                ret = PTR_ERR(data);
                goto unlock;
        }

        if (!size) {
                dbg_hid("empty report\n");
                ret = -1;
                goto unlock;
        }

        /* Avoid unnecessary overhead if debugfs is disabled */
        if (!list_empty(&hid->debug_list))
                hid_dump_report(hid, type, data, size);

        report = hid_get_report(report_enum, data);

        if (!report) {
                ret = -1;
                goto unlock;
        }

        if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) {
                ret = hdrv->raw_event(hid, report, data, size);
                if (ret < 0)
                        goto unlock;
        }

        ret = hid_report_raw_event(hid, type, data, size, interrupt);

unlock:
        up(&hid->driver_input_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(hid_input_report);

bool hid_match_one_id(const struct hid_device *hdev,
                      const struct hid_device_id *id)
{
        return (id->bus == HID_BUS_ANY || id->bus == hdev->bus) &&
                (id->group == HID_GROUP_ANY || id->group == hdev->group) &&
                (id->vendor == HID_ANY_ID || id->vendor == hdev->vendor) &&
                (id->product == HID_ANY_ID || id->product == hdev->product);
}

const struct hid_device_id *hid_match_id(const struct hid_device *hdev,
                const struct hid_device_id *id)
{
        for (; id->bus; id++)
                if (hid_match_one_id(hdev, id))
                        return id;

        return NULL;
}
EXPORT_SYMBOL_GPL(hid_match_id);

static const struct hid_device_id hid_hiddev_list[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS1) },
        { }
};

static bool hid_hiddev(struct hid_device *hdev)
{
        return !!hid_match_id(hdev, hid_hiddev_list);
}


static ssize_t
read_report_descriptor(struct file *filp, struct kobject *kobj,
                struct bin_attribute *attr,
                char *buf, loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct hid_device *hdev = to_hid_device(dev);

        if (off >= hdev->rsize)
                return 0;

        if (off + count > hdev->rsize)
                count = hdev->rsize - off;

        memcpy(buf, hdev->rdesc + off, count);

        return count;
}

static ssize_t
show_country(struct device *dev, struct device_attribute *attr,
                char *buf)
{
        struct hid_device *hdev = to_hid_device(dev);

        return sprintf(buf, "%02x\n", hdev->country & 0xff);
}

static struct bin_attribute dev_bin_attr_report_desc = {
        .attr = { .name = "report_descriptor", .mode = 0444 },
        .read = read_report_descriptor,
        .size = HID_MAX_DESCRIPTOR_SIZE,
};

static const struct device_attribute dev_attr_country = {
        .attr = { .name = "country", .mode = 0444 },
        .show = show_country,
};

int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
{
        static const char *types[] = { "Device", "Pointer", "Mouse", "Device",
                "Joystick", "Gamepad", "Keyboard", "Keypad",
                "Multi-Axis Controller"
        };
        const char *type, *bus;
        char buf[64] = "";
        unsigned int i;
        int len;
        int ret;

        ret = hid_bpf_connect_device(hdev);
        if (ret)
                return ret;

        if (hdev->quirks & HID_QUIRK_HIDDEV_FORCE)
                connect_mask |= (HID_CONNECT_HIDDEV_FORCE | HID_CONNECT_HIDDEV);
        if (hdev->quirks & HID_QUIRK_HIDINPUT_FORCE)
                connect_mask |= HID_CONNECT_HIDINPUT_FORCE;
        if (hdev->bus != BUS_USB)
                connect_mask &= ~HID_CONNECT_HIDDEV;
        if (hid_hiddev(hdev))
                connect_mask |= HID_CONNECT_HIDDEV_FORCE;

        if ((connect_mask & HID_CONNECT_HIDINPUT) && !hidinput_connect(hdev,
                                connect_mask & HID_CONNECT_HIDINPUT_FORCE))
                hdev->claimed |= HID_CLAIMED_INPUT;

        if ((connect_mask & HID_CONNECT_HIDDEV) && hdev->hiddev_connect &&
                        !hdev->hiddev_connect(hdev,
                                connect_mask & HID_CONNECT_HIDDEV_FORCE))
                hdev->claimed |= HID_CLAIMED_HIDDEV;
        if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev))
                hdev->claimed |= HID_CLAIMED_HIDRAW;

        if (connect_mask & HID_CONNECT_DRIVER)
                hdev->claimed |= HID_CLAIMED_DRIVER;

        /* Drivers with the ->raw_event callback set are not required to connect
         * to any other listener. */
        if (!hdev->claimed && !hdev->driver->raw_event) {
                hid_err(hdev, "device has no listeners, quitting\n");
                return -ENODEV;
        }

        hid_process_ordering(hdev);

        if ((hdev->claimed & HID_CLAIMED_INPUT) &&
                        (connect_mask & HID_CONNECT_FF) && hdev->ff_init)
                hdev->ff_init(hdev);

        len = 0;
        if (hdev->claimed & HID_CLAIMED_INPUT)
                len += sprintf(buf + len, "input");
        if (hdev->claimed & HID_CLAIMED_HIDDEV)
                len += sprintf(buf + len, "%shiddev%d", len ? "," : "",
                                ((struct hiddev *)hdev->hiddev)->minor);
        if (hdev->claimed & HID_CLAIMED_HIDRAW)
                len += sprintf(buf + len, "%shidraw%d", len ? "," : "",
                                ((struct hidraw *)hdev->hidraw)->minor);

        type = "Device";
        for (i = 0; i < hdev->maxcollection; i++) {
                struct hid_collection *col = &hdev->collection[i];
                if (col->type == HID_COLLECTION_APPLICATION &&
                   (col->usage & HID_USAGE_PAGE) == HID_UP_GENDESK &&
                   (col->usage & 0xffff) < ARRAY_SIZE(types)) {
                        type = types[col->usage & 0xffff];
                        break;
                }
        }

        switch (hdev->bus) {
        case BUS_USB:
                bus = "USB";
                break;
        case BUS_BLUETOOTH:
                bus = "BLUETOOTH";
                break;
        case BUS_I2C:
                bus = "I2C";
                break;
        case BUS_VIRTUAL:
                bus = "VIRTUAL";
                break;
        case BUS_INTEL_ISHTP:
        case BUS_AMD_SFH:
                bus = "SENSOR HUB";
                break;
        default:
                bus = "<UNKNOWN>";
        }

        ret = device_create_file(&hdev->dev, &dev_attr_country);
        if (ret)
                hid_warn(hdev,
                         "can't create sysfs country code attribute err: %d\n", ret);

        hid_info(hdev, "%s: %s HID v%x.%02x %s [%s] on %s\n",
                 buf, bus, hdev->version >> 8, hdev->version & 0xff,
                 type, hdev->name, hdev->phys);

        return 0;
}
EXPORT_SYMBOL_GPL(hid_connect);

void hid_disconnect(struct hid_device *hdev)
{
        device_remove_file(&hdev->dev, &dev_attr_country);
        if (hdev->claimed & HID_CLAIMED_INPUT)
                hidinput_disconnect(hdev);
        if (hdev->claimed & HID_CLAIMED_HIDDEV)
                hdev->hiddev_disconnect(hdev);
        if (hdev->claimed & HID_CLAIMED_HIDRAW)
                hidraw_disconnect(hdev);
        hdev->claimed = 0;

        hid_bpf_disconnect_device(hdev);
}
EXPORT_SYMBOL_GPL(hid_disconnect);

/**
 * hid_hw_start - start underlying HW
 * @hdev: hid device
 * @connect_mask: which outputs to connect, see HID_CONNECT_*
 *
 * Call this in probe function *after* hid_parse. This will setup HW
 * buffers and start the device (if not defeirred to device open).
 * hid_hw_stop must be called if this was successful.
 */
int hid_hw_start(struct hid_device *hdev, unsigned int connect_mask)
{
        int error;

        error = hdev->ll_driver->start(hdev);
        if (error)
                return error;

        if (connect_mask) {
                error = hid_connect(hdev, connect_mask);
                if (error) {
                        hdev->ll_driver->stop(hdev);
                        return error;
                }
        }

        return 0;
}
EXPORT_SYMBOL_GPL(hid_hw_start);

/**
 * hid_hw_stop - stop underlying HW
 * @hdev: hid device
 *
 * This is usually called from remove function or from probe when something
 * failed and hid_hw_start was called already.
 */
void hid_hw_stop(struct hid_device *hdev)
{
        hid_disconnect(hdev);
        hdev->ll_driver->stop(hdev);
}
EXPORT_SYMBOL_GPL(hid_hw_stop);

/**
 * hid_hw_open - signal underlying HW to start delivering events
 * @hdev: hid device
 *
 * Tell underlying HW to start delivering events from the device.
 * This function should be called sometime after successful call
 * to hid_hw_start().
 */
int hid_hw_open(struct hid_device *hdev)
{
        int ret;

        ret = mutex_lock_killable(&hdev->ll_open_lock);
        if (ret)
                return ret;

        if (!hdev->ll_open_count++) {
                ret = hdev->ll_driver->open(hdev);
                if (ret)
                        hdev->ll_open_count--;
        }

        mutex_unlock(&hdev->ll_open_lock);
        return ret;
}
EXPORT_SYMBOL_GPL(hid_hw_open);

/**
 * hid_hw_close - signal underlaying HW to stop delivering events
 *
 * @hdev: hid device
 *
 * This function indicates that we are not interested in the events
 * from this device anymore. Delivery of events may or may not stop,
 * depending on the number of users still outstanding.
 */
void hid_hw_close(struct hid_device *hdev)
{
        mutex_lock(&hdev->ll_open_lock);
        if (!--hdev->ll_open_count)
                hdev->ll_driver->close(hdev);
        mutex_unlock(&hdev->ll_open_lock);
}
EXPORT_SYMBOL_GPL(hid_hw_close);

/**
 * hid_hw_request - send report request to device
 *
 * @hdev: hid device
 * @report: report to send
 * @reqtype: hid request type
 */
void hid_hw_request(struct hid_device *hdev,
                    struct hid_report *report, enum hid_class_request reqtype)
{
        if (hdev->ll_driver->request)
                return hdev->ll_driver->request(hdev, report, reqtype);

        __hid_request(hdev, report, reqtype);
}
EXPORT_SYMBOL_GPL(hid_hw_request);

/**
 * hid_hw_raw_request - send report request to device
 *
 * @hdev: hid device
 * @reportnum: report ID
 * @buf: in/out data to transfer
 * @len: length of buf
 * @rtype: HID report type
 * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT
 *
 * Return: count of data transferred, negative if error
 *
 * Same behavior as hid_hw_request, but with raw buffers instead.
 */
int hid_hw_raw_request(struct hid_device *hdev,
                       unsigned char reportnum, __u8 *buf,
                       size_t len, enum hid_report_type rtype, enum hid_class_request reqtype)
{
        unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;

        if (hdev->ll_driver->max_buffer_size)
                max_buffer_size = hdev->ll_driver->max_buffer_size;

        if (len < 1 || len > max_buffer_size || !buf)
                return -EINVAL;

        return hdev->ll_driver->raw_request(hdev, reportnum, buf, len,
                                            rtype, reqtype);
}
EXPORT_SYMBOL_GPL(hid_hw_raw_request);

/**
 * hid_hw_output_report - send output report to device
 *
 * @hdev: hid device
 * @buf: raw data to transfer
 * @len: length of buf
 *
 * Return: count of data transferred, negative if error
 */
int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len)
{
        unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;

        if (hdev->ll_driver->max_buffer_size)
                max_buffer_size = hdev->ll_driver->max_buffer_size;

        if (len < 1 || len > max_buffer_size || !buf)
                return -EINVAL;

        if (hdev->ll_driver->output_report)
                return hdev->ll_driver->output_report(hdev, buf, len);

        return -ENOSYS;
}
EXPORT_SYMBOL_GPL(hid_hw_output_report);

#ifdef CONFIG_PM
int hid_driver_suspend(struct hid_device *hdev, pm_message_t state)
{
        if (hdev->driver && hdev->driver->suspend)
                return hdev->driver->suspend(hdev, state);

        return 0;
}
EXPORT_SYMBOL_GPL(hid_driver_suspend);

int hid_driver_reset_resume(struct hid_device *hdev)
{
        if (hdev->driver && hdev->driver->reset_resume)
                return hdev->driver->reset_resume(hdev);

        return 0;
}
EXPORT_SYMBOL_GPL(hid_driver_reset_resume);

int hid_driver_resume(struct hid_device *hdev)
{
        if (hdev->driver && hdev->driver->resume)
                return hdev->driver->resume(hdev);

        return 0;
}
EXPORT_SYMBOL_GPL(hid_driver_resume);
#endif /* CONFIG_PM */

struct hid_dynid {
        struct list_head list;
        struct hid_device_id id;
};

/**
 * new_id_store - add a new HID device ID to this driver and re-probe devices
 * @drv: target device driver
 * @buf: buffer for scanning device ID data
 * @count: input size
 *
 * Adds a new dynamic hid device ID to this driver,
 * and causes the driver to probe for all devices again.
 */
static ssize_t new_id_store(struct device_driver *drv, const char *buf,
                size_t count)
{
        struct hid_driver *hdrv = to_hid_driver(drv);
        struct hid_dynid *dynid;
        __u32 bus, vendor, product;
        unsigned long driver_data = 0;
        int ret;

        ret = sscanf(buf, "%x %x %x %lx",
                        &bus, &vendor, &product, &driver_data);
        if (ret < 3)
                return -EINVAL;

        dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
        if (!dynid)
                return -ENOMEM;

        dynid->id.bus = bus;
        dynid->id.group = HID_GROUP_ANY;
        dynid->id.vendor = vendor;
        dynid->id.product = product;
        dynid->id.driver_data = driver_data;

        spin_lock(&hdrv->dyn_lock);
        list_add_tail(&dynid->list, &hdrv->dyn_list);
        spin_unlock(&hdrv->dyn_lock);

        ret = driver_attach(&hdrv->driver);

        return ret ? : count;
}
static DRIVER_ATTR_WO(new_id);

static struct attribute *hid_drv_attrs[] = {
        &driver_attr_new_id.attr,
        NULL,
};
ATTRIBUTE_GROUPS(hid_drv);

static void hid_free_dynids(struct hid_driver *hdrv)
{
        struct hid_dynid *dynid, *n;

        spin_lock(&hdrv->dyn_lock);
        list_for_each_entry_safe(dynid, n, &hdrv->dyn_list, list) {
                list_del(&dynid->list);
                kfree(dynid);
        }
        spin_unlock(&hdrv->dyn_lock);
}

const struct hid_device_id *hid_match_device(struct hid_device *hdev,
                                             struct hid_driver *hdrv)
{
        struct hid_dynid *dynid;

        spin_lock(&hdrv->dyn_lock);
        list_for_each_entry(dynid, &hdrv->dyn_list, list) {
                if (hid_match_one_id(hdev, &dynid->id)) {
                        spin_unlock(&hdrv->dyn_lock);
                        return &dynid->id;
                }
        }
        spin_unlock(&hdrv->dyn_lock);

        return hid_match_id(hdev, hdrv->id_table);
}
EXPORT_SYMBOL_GPL(hid_match_device);

static int hid_bus_match(struct device *dev, struct device_driver *drv)
{
        struct hid_driver *hdrv = to_hid_driver(drv);
        struct hid_device *hdev = to_hid_device(dev);

        return hid_match_device(hdev, hdrv) != NULL;
}

/**
 * hid_compare_device_paths - check if both devices share the same path
 * @hdev_a: hid device
 * @hdev_b: hid device
 * @separator: char to use as separator
 *
 * Check if two devices share the same path up to the last occurrence of
 * the separator char. Both paths must exist (i.e., zero-length paths
 * don't match).
 */
bool hid_compare_device_paths(struct hid_device *hdev_a,
                              struct hid_device *hdev_b, char separator)
{
        int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys;
        int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys;

        if (n1 != n2 || n1 <= 0 || n2 <= 0)
                return false;

        return !strncmp(hdev_a->phys, hdev_b->phys, n1);
}
EXPORT_SYMBOL_GPL(hid_compare_device_paths);

static bool hid_check_device_match(struct hid_device *hdev,
                                   struct hid_driver *hdrv,
                                   const struct hid_device_id **id)
{
        *id = hid_match_device(hdev, hdrv);
        if (!*id)
                return false;

        if (hdrv->match)
                return hdrv->match(hdev, hid_ignore_special_drivers);

        /*
         * hid-generic implements .match(), so we must be dealing with a
         * different HID driver here, and can simply check if
         * hid_ignore_special_drivers is set or not.
         */
        return !hid_ignore_special_drivers;
}

static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv)
{
        const struct hid_device_id *id;
        int ret;

        if (!hid_check_device_match(hdev, hdrv, &id))
                return -ENODEV;

        hdev->devres_group_id = devres_open_group(&hdev->dev, NULL, GFP_KERNEL);
        if (!hdev->devres_group_id)
                return -ENOMEM;

        /* reset the quirks that has been previously set */
        hdev->quirks = hid_lookup_quirk(hdev);
        hdev->driver = hdrv;

        if (hdrv->probe) {
                ret = hdrv->probe(hdev, id);
        } else { /* default probe */
                ret = hid_open_report(hdev);
                if (!ret)
                        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        }

        /*
         * Note that we are not closing the devres group opened above so
         * even resources that were attached to the device after probe is
         * run are released when hid_device_remove() is executed. This is
         * needed as some drivers would allocate additional resources,
         * for example when updating firmware.
         */

        if (ret) {
                devres_release_group(&hdev->dev, hdev->devres_group_id);
                hid_close_report(hdev);
                hdev->driver = NULL;
        }

        return ret;
}

static int hid_device_probe(struct device *dev)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct hid_driver *hdrv = to_hid_driver(dev->driver);
        int ret = 0;

        if (down_interruptible(&hdev->driver_input_lock))
                return -EINTR;

        hdev->io_started = false;
        clear_bit(ffs(HID_STAT_REPROBED), &hdev->status);

        if (!hdev->driver)
                ret = __hid_device_probe(hdev, hdrv);

        if (!hdev->io_started)
                up(&hdev->driver_input_lock);

        return ret;
}

static void hid_device_remove(struct device *dev)
{
        struct hid_device *hdev = to_hid_device(dev);
        struct hid_driver *hdrv;

        down(&hdev->driver_input_lock);
        hdev->io_started = false;

        hdrv = hdev->driver;
        if (hdrv) {
                if (hdrv->remove)
                        hdrv->remove(hdev);
                else /* default remove */
                        hid_hw_stop(hdev);

                /* Release all devres resources allocated by the driver */
                devres_release_group(&hdev->dev, hdev->devres_group_id);

                hid_close_report(hdev);
                hdev->driver = NULL;
        }

        if (!hdev->io_started)
                up(&hdev->driver_input_lock);
}

static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
                             char *buf)
{
        struct hid_device *hdev = container_of(dev, struct hid_device, dev);

        return scnprintf(buf, PAGE_SIZE, "hid:b%04Xg%04Xv%08Xp%08X\n",
                         hdev->bus, hdev->group, hdev->vendor, hdev->product);
}
static DEVICE_ATTR_RO(modalias);

static struct attribute *hid_dev_attrs[] = {
        &dev_attr_modalias.attr,
        NULL,
};
static struct bin_attribute *hid_dev_bin_attrs[] = {
        &dev_bin_attr_report_desc,
        NULL
};
static const struct attribute_group hid_dev_group = {
        .attrs = hid_dev_attrs,
        .bin_attrs = hid_dev_bin_attrs,
};
__ATTRIBUTE_GROUPS(hid_dev);

static int hid_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct hid_device *hdev = to_hid_device(dev);

        if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
                        hdev->bus, hdev->vendor, hdev->product))
                return -ENOMEM;

        if (add_uevent_var(env, "HID_NAME=%s", hdev->name))
                return -ENOMEM;

        if (add_uevent_var(env, "HID_PHYS=%s", hdev->phys))
                return -ENOMEM;

        if (add_uevent_var(env, "HID_UNIQ=%s", hdev->uniq))
                return -ENOMEM;

        if (add_uevent_var(env, "MODALIAS=hid:b%04Xg%04Xv%08Xp%08X",
                           hdev->bus, hdev->group, hdev->vendor, hdev->product))
                return -ENOMEM;

        return 0;
}

const struct bus_type hid_bus_type = {
        .name                = "hid",
        .dev_groups        = hid_dev_groups,
        .drv_groups        = hid_drv_groups,
        .match                = hid_bus_match,
        .probe                = hid_device_probe,
        .remove                = hid_device_remove,
        .uevent                = hid_uevent,
};
EXPORT_SYMBOL(hid_bus_type);

int hid_add_device(struct hid_device *hdev)
{
        static atomic_t id = ATOMIC_INIT(0);
        int ret;

        if (WARN_ON(hdev->status & HID_STAT_ADDED))
                return -EBUSY;

        hdev->quirks = hid_lookup_quirk(hdev);

        /* we need to kill them here, otherwise they will stay allocated to
         * wait for coming driver */
        if (hid_ignore(hdev))
                return -ENODEV;

        /*
         * Check for the mandatory transport channel.
         */
         if (!hdev->ll_driver->raw_request) {
                hid_err(hdev, "transport driver missing .raw_request()\n");
                return -EINVAL;
         }

        /*
         * Read the device report descriptor once and use as template
         * for the driver-specific modifications.
         */
        ret = hdev->ll_driver->parse(hdev);
        if (ret)
                return ret;
        if (!hdev->dev_rdesc)
                return -ENODEV;

        /*
         * Scan generic devices for group information
         */
        if (hid_ignore_special_drivers) {
                hdev->group = HID_GROUP_GENERIC;
        } else if (!hdev->group &&
                   !(hdev->quirks & HID_QUIRK_HAVE_SPECIAL_DRIVER)) {
                ret = hid_scan_report(hdev);
                if (ret)
                        hid_warn(hdev, "bad device descriptor (%d)\n", ret);
        }

        hdev->id = atomic_inc_return(&id);

        /* XXX hack, any other cleaner solution after the driver core
         * is converted to allow more than 20 bytes as the device name? */
        dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus,
                     hdev->vendor, hdev->product, hdev->id);

        hid_debug_register(hdev, dev_name(&hdev->dev));
        ret = device_add(&hdev->dev);
        if (!ret)
                hdev->status |= HID_STAT_ADDED;
        else
                hid_debug_unregister(hdev);

        return ret;
}
EXPORT_SYMBOL_GPL(hid_add_device);

/**
 * hid_allocate_device - allocate new hid device descriptor
 *
 * Allocate and initialize hid device, so that hid_destroy_device might be
 * used to free it.
 *
 * New hid_device pointer is returned on success, otherwise ERR_PTR encoded
 * error value.
 */
struct hid_device *hid_allocate_device(void)
{
        struct hid_device *hdev;
        int ret = -ENOMEM;

        hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
        if (hdev == NULL)
                return ERR_PTR(ret);

        device_initialize(&hdev->dev);
        hdev->dev.release = hid_device_release;
        hdev->dev.bus = &hid_bus_type;
        device_enable_async_suspend(&hdev->dev);

        hid_close_report(hdev);

        init_waitqueue_head(&hdev->debug_wait);
        INIT_LIST_HEAD(&hdev->debug_list);
        spin_lock_init(&hdev->debug_list_lock);
        sema_init(&hdev->driver_input_lock, 1);
        mutex_init(&hdev->ll_open_lock);
        kref_init(&hdev->ref);

        hid_bpf_device_init(hdev);

        return hdev;
}
EXPORT_SYMBOL_GPL(hid_allocate_device);

static void hid_remove_device(struct hid_device *hdev)
{
        if (hdev->status & HID_STAT_ADDED) {
                device_del(&hdev->dev);
                hid_debug_unregister(hdev);
                hdev->status &= ~HID_STAT_ADDED;
        }
        kfree(hdev->dev_rdesc);
        hdev->dev_rdesc = NULL;
        hdev->dev_rsize = 0;
}

/**
 * hid_destroy_device - free previously allocated device
 *
 * @hdev: hid device
 *
 * If you allocate hid_device through hid_allocate_device, you should ever
 * free by this function.
 */
void hid_destroy_device(struct hid_device *hdev)
{
        hid_bpf_destroy_device(hdev);
        hid_remove_device(hdev);
        put_device(&hdev->dev);
}
EXPORT_SYMBOL_GPL(hid_destroy_device);


static int __hid_bus_reprobe_drivers(struct device *dev, void *data)
{
        struct hid_driver *hdrv = data;
        struct hid_device *hdev = to_hid_device(dev);

        if (hdev->driver == hdrv &&
            !hdrv->match(hdev, hid_ignore_special_drivers) &&
            !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status))
                return device_reprobe(dev);

        return 0;
}

static int __hid_bus_driver_added(struct device_driver *drv, void *data)
{
        struct hid_driver *hdrv = to_hid_driver(drv);

        if (hdrv->match) {
                bus_for_each_dev(&hid_bus_type, NULL, hdrv,
                                 __hid_bus_reprobe_drivers);
        }

        return 0;
}

static int __bus_removed_driver(struct device_driver *drv, void *data)
{
        return bus_rescan_devices(&hid_bus_type);
}

int __hid_register_driver(struct hid_driver *hdrv, struct module *owner,
                const char *mod_name)
{
        int ret;

        hdrv->driver.name = hdrv->name;
        hdrv->driver.bus = &hid_bus_type;
        hdrv->driver.owner = owner;
        hdrv->driver.mod_name = mod_name;

        INIT_LIST_HEAD(&hdrv->dyn_list);
        spin_lock_init(&hdrv->dyn_lock);

        ret = driver_register(&hdrv->driver);

        if (ret == 0)
                bus_for_each_drv(&hid_bus_type, NULL, NULL,
                                 __hid_bus_driver_added);

        return ret;
}
EXPORT_SYMBOL_GPL(__hid_register_driver);

void hid_unregister_driver(struct hid_driver *hdrv)
{
        driver_unregister(&hdrv->driver);
        hid_free_dynids(hdrv);

        bus_for_each_drv(&hid_bus_type, NULL, hdrv, __bus_removed_driver);
}
EXPORT_SYMBOL_GPL(hid_unregister_driver);

int hid_check_keys_pressed(struct hid_device *hid)
{
        struct hid_input *hidinput;
        int i;

        if (!(hid->claimed & HID_CLAIMED_INPUT))
                return 0;

        list_for_each_entry(hidinput, &hid->inputs, list) {
                for (i = 0; i < BITS_TO_LONGS(KEY_MAX); i++)
                        if (hidinput->input->key[i])
                                return 1;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(hid_check_keys_pressed);

#ifdef CONFIG_HID_BPF
static struct hid_bpf_ops hid_ops = {
        .hid_get_report = hid_get_report,
        .hid_hw_raw_request = hid_hw_raw_request,
        .owner = THIS_MODULE,
        .bus_type = &hid_bus_type,
};
#endif

static int __init hid_init(void)
{
        int ret;

        ret = bus_register(&hid_bus_type);
        if (ret) {
                pr_err("can't register hid bus\n");
                goto err;
        }

#ifdef CONFIG_HID_BPF
        hid_bpf_ops = &hid_ops;
#endif

        ret = hidraw_init();
        if (ret)
                goto err_bus;

        hid_debug_init();

        return 0;
err_bus:
        bus_unregister(&hid_bus_type);
err:
        return ret;
}

static void __exit hid_exit(void)
{
#ifdef CONFIG_HID_BPF
        hid_bpf_ops = NULL;
#endif
        hid_debug_exit();
        hidraw_exit();
        bus_unregister(&hid_bus_type);
        hid_quirks_exit(HID_BUS_ANY);
}

module_init(hid_init);
module_exit(hid_exit);

MODULE_AUTHOR("Andreas Gal");
MODULE_AUTHOR("Vojtech Pavlik");
MODULE_AUTHOR("Jiri Kosina");
MODULE_LICENSE("GPL");
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_LEGACY_H
#define __X86_KERNEL_FPU_LEGACY_H

#include <asm/fpu/types.h>

extern unsigned int mxcsr_feature_mask;

static inline void ldmxcsr(u32 mxcsr)
{
        asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}

/*
 * Returns 0 on success or the trap number when the operation raises an
 * exception.
 */
#define user_insn(insn, output, input...)                                \
({                                                                        \
        int err;                                                        \
                                                                        \
        might_fault();                                                        \
                                                                        \
        asm volatile(ASM_STAC "\n"                                        \
                     "1: " #insn "\n"                                        \
                     "2: " ASM_CLAC "\n"                                \
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE)        \
                     : [err] "=a" (err), output                                \
                     : "0"(0), input);                                        \
        err;                                                                \
})

#define kernel_insn_err(insn, output, input...)                                \
({                                                                        \
        int err;                                                        \
        asm volatile("1:" #insn "\n\t"                                        \
                     "2:\n"                                                \
                     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[err]) \
                     : [err] "=r" (err), output                                \
                     : "0"(0), input);                                        \
        err;                                                                \
})

#define kernel_insn(insn, output, input...)                                \
        asm volatile("1:" #insn "\n\t"                                        \
                     "2:\n"                                                \
                     _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE)        \
                     : output : input)

static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
{
        return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
}

static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx)
{
        if (IS_ENABLED(CONFIG_X86_32))
                return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
        else
                return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));

}

static inline void fxrstor(struct fxregs_state *fx)
{
        if (IS_ENABLED(CONFIG_X86_32))
                kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
        else
                kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline int fxrstor_safe(struct fxregs_state *fx)
{
        if (IS_ENABLED(CONFIG_X86_32))
                return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
        else
                return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx)
{
        if (IS_ENABLED(CONFIG_X86_32))
                return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
        else
                return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline void frstor(struct fregs_state *fx)
{
        kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline int frstor_safe(struct fregs_state *fx)
{
        return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline int frstor_from_user_sigframe(struct fregs_state __user *fx)
{
        return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}

static inline void fxsave(struct fxregs_state *fx)
{
        if (IS_ENABLED(CONFIG_X86_32))
                asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
        else
                asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
}

#endif


















































































































   13 



















































   11 













































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
// SPDX-License-Identifier: GPL-2.0
/*
 * USB Serial Converter stuff
 *
 *        Copyright (C) 1999 - 2012
 *            Greg Kroah-Hartman (greg@kroah.com)
 */

#ifndef __LINUX_USB_SERIAL_H
#define __LINUX_USB_SERIAL_H

#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/serial.h>
#include <linux/kfifo.h>

/* The maximum number of ports one device can grab at once */
#define MAX_NUM_PORTS                16

/* USB serial flags */
#define USB_SERIAL_WRITE_BUSY        0
#define USB_SERIAL_THROTTLED        1

/**
 * usb_serial_port: structure for the specific ports of a device.
 * @serial: pointer back to the struct usb_serial owner of this port.
 * @port: pointer to the corresponding tty_port for this port.
 * @lock: spinlock to grab when updating portions of this structure.
 * @minor: the minor number of the port
 * @port_number: the struct usb_serial port number of this port (starts at 0)
 * @interrupt_in_buffer: pointer to the interrupt in buffer for this port.
 * @interrupt_in_urb: pointer to the interrupt in struct urb for this port.
 * @interrupt_in_endpointAddress: endpoint address for the interrupt in pipe
 *        for this port.
 * @interrupt_out_buffer: pointer to the interrupt out buffer for this port.
 * @interrupt_out_size: the size of the interrupt_out_buffer, in bytes.
 * @interrupt_out_urb: pointer to the interrupt out struct urb for this port.
 * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe
 *        for this port.
 * @bulk_in_buffer: pointer to the bulk in buffer for this port.
 * @bulk_in_size: the size of the bulk_in_buffer, in bytes.
 * @read_urb: pointer to the bulk in struct urb for this port.
 * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this
 *        port.
 * @bulk_in_buffers: pointers to the bulk in buffers for this port
 * @read_urbs: pointers to the bulk in urbs for this port
 * @read_urbs_free: status bitmap the for bulk in urbs
 * @bulk_out_buffer: pointer to the bulk out buffer for this port.
 * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
 * @write_urb: pointer to the bulk out struct urb for this port.
 * @write_fifo: kfifo used to buffer outgoing data
 * @bulk_out_buffers: pointers to the bulk out buffers for this port
 * @write_urbs: pointers to the bulk out urbs for this port
 * @write_urbs_free: status bitmap the for bulk out urbs
 * @icount: interrupt counters
 * @tx_bytes: number of bytes currently in host stack queues
 * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
 *        port.
 * @flags: usb serial port flags
 * @work: work queue entry for the line discipline waking up.
 * @dev: pointer to the serial device
 *
 * This structure is used by the usb-serial core and drivers for the specific
 * ports of a device.
 */
struct usb_serial_port {
        struct usb_serial        *serial;
        struct tty_port                port;
        spinlock_t                lock;
        u32                        minor;
        u8                        port_number;

        unsigned char                *interrupt_in_buffer;
        struct urb                *interrupt_in_urb;
        __u8                        interrupt_in_endpointAddress;

        unsigned char                *interrupt_out_buffer;
        int                        interrupt_out_size;
        struct urb                *interrupt_out_urb;
        __u8                        interrupt_out_endpointAddress;

        unsigned char                *bulk_in_buffer;
        int                        bulk_in_size;
        struct urb                *read_urb;
        __u8                        bulk_in_endpointAddress;

        unsigned char                *bulk_in_buffers[2];
        struct urb                *read_urbs[2];
        unsigned long                read_urbs_free;

        unsigned char                *bulk_out_buffer;
        int                        bulk_out_size;
        struct urb                *write_urb;
        struct kfifo                write_fifo;

        unsigned char                *bulk_out_buffers[2];
        struct urb                *write_urbs[2];
        unsigned long                write_urbs_free;
        __u8                        bulk_out_endpointAddress;

        struct async_icount        icount;
        int                        tx_bytes;

        unsigned long                flags;
        struct work_struct        work;
        unsigned long                sysrq; /* sysrq timeout */
        struct device                dev;
};
#define to_usb_serial_port(d) container_of(d, struct usb_serial_port, dev)

/* get and set the port private data pointer helper functions */
static inline void *usb_get_serial_port_data(struct usb_serial_port *port)
{
        return dev_get_drvdata(&port->dev);
}

static inline void usb_set_serial_port_data(struct usb_serial_port *port,
                                            void *data)
{
        dev_set_drvdata(&port->dev, data);
}

/**
 * usb_serial - structure used by the usb-serial core for a device
 * @dev: pointer to the struct usb_device for this device
 * @type: pointer to the struct usb_serial_driver for this device
 * @interface: pointer to the struct usb_interface for this device
 * @sibling: pointer to the struct usb_interface of any sibling interface
 * @suspend_count: number of suspended (sibling) interfaces
 * @num_ports: the number of ports this device has
 * @num_interrupt_in: number of interrupt in endpoints we have
 * @num_interrupt_out: number of interrupt out endpoints we have
 * @num_bulk_in: number of bulk in endpoints we have
 * @num_bulk_out: number of bulk out endpoints we have
 * @port: array of struct usb_serial_port structures for the different ports.
 * @private: place to put any driver specific information that is needed.  The
 *        usb-serial driver is required to manage this data, the usb-serial core
 *        will not touch this.  Use usb_get_serial_data() and
 *        usb_set_serial_data() to access this.
 */
struct usb_serial {
        struct usb_device                *dev;
        struct usb_serial_driver        *type;
        struct usb_interface                *interface;
        struct usb_interface                *sibling;
        unsigned int                        suspend_count;
        unsigned char                        disconnected:1;
        unsigned char                        attached:1;
        unsigned char                        minors_reserved:1;
        unsigned char                        num_ports;
        unsigned char                        num_port_pointers;
        unsigned char                        num_interrupt_in;
        unsigned char                        num_interrupt_out;
        unsigned char                        num_bulk_in;
        unsigned char                        num_bulk_out;
        struct usb_serial_port                *port[MAX_NUM_PORTS];
        struct kref                        kref;
        struct mutex                        disc_mutex;
        void                                *private;
};
#define to_usb_serial(d) container_of(d, struct usb_serial, kref)

/* get and set the serial private data pointer helper functions */
static inline void *usb_get_serial_data(struct usb_serial *serial)
{
        return serial->private;
}

static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
{
        serial->private = data;
}

struct usb_serial_endpoints {
        unsigned char num_bulk_in;
        unsigned char num_bulk_out;
        unsigned char num_interrupt_in;
        unsigned char num_interrupt_out;
        struct usb_endpoint_descriptor *bulk_in[MAX_NUM_PORTS];
        struct usb_endpoint_descriptor *bulk_out[MAX_NUM_PORTS];
        struct usb_endpoint_descriptor *interrupt_in[MAX_NUM_PORTS];
        struct usb_endpoint_descriptor *interrupt_out[MAX_NUM_PORTS];
};

/**
 * usb_serial_driver - describes a usb serial driver
 * @description: pointer to a string that describes this driver.  This string
 *        used in the syslog messages when a device is inserted or removed.
 * @id_table: pointer to a list of usb_device_id structures that define all
 *        of the devices this structure can support.
 * @num_ports: the number of different ports this device will have.
 * @num_bulk_in: minimum number of bulk-in endpoints
 * @num_bulk_out: minimum number of bulk-out endpoints
 * @num_interrupt_in: minimum number of interrupt-in endpoints
 * @num_interrupt_out: minimum number of interrupt-out endpoints
 * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer
 *        (0 = end-point size)
 * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size)
 * @calc_num_ports: pointer to a function to determine how many ports this
 *        device has dynamically. It can also be used to verify the number of
 *        endpoints or to modify the port-endpoint mapping. It will be called
 *        after the probe() callback is called, but before attach().
 * @probe: pointer to the driver's probe function.
 *        This will be called when the device is inserted into the system,
 *        but before the device has been fully initialized by the usb_serial
 *        subsystem.  Use this function to download any firmware to the device,
 *        or any other early initialization that might be needed.
 *        Return 0 to continue on with the initialization sequence.  Anything
 *        else will abort it.
 * @attach: pointer to the driver's attach function.
 *        This will be called when the struct usb_serial structure is fully
 *        set up.  Do any local initialization of the device, or any private
 *        memory structure allocation at this point in time.
 * @disconnect: pointer to the driver's disconnect function.  This will be
 *        called when the device is unplugged or unbound from the driver.
 * @release: pointer to the driver's release function.  This will be called
 *        when the usb_serial data structure is about to be destroyed.
 * @usb_driver: pointer to the struct usb_driver that controls this
 *        device.  This is necessary to allow dynamic ids to be added to
 *        the driver from sysfs.
 *
 * This structure is defines a USB Serial driver.  It provides all of
 * the information that the USB serial core code needs.  If the function
 * pointers are defined, then the USB serial core code will call them when
 * the corresponding tty port functions are called.  If they are not
 * called, the generic serial function will be used instead.
 *
 * The driver.owner field should be set to the module owner of this driver.
 * The driver.name field should be set to the name of this driver (remember
 * it will show up in sysfs, so it needs to be short and to the point.
 * Using the module name is a good idea.)
 */
struct usb_serial_driver {
        const char *description;
        const struct usb_device_id *id_table;

        struct list_head        driver_list;
        struct device_driver        driver;
        struct usb_driver        *usb_driver;
        struct usb_dynids        dynids;

        unsigned char                num_ports;

        unsigned char                num_bulk_in;
        unsigned char                num_bulk_out;
        unsigned char                num_interrupt_in;
        unsigned char                num_interrupt_out;

        size_t                        bulk_in_size;
        size_t                        bulk_out_size;

        int (*probe)(struct usb_serial *serial, const struct usb_device_id *id);
        int (*attach)(struct usb_serial *serial);
        int (*calc_num_ports)(struct usb_serial *serial,
                        struct usb_serial_endpoints *epds);

        void (*disconnect)(struct usb_serial *serial);
        void (*release)(struct usb_serial *serial);

        int (*port_probe)(struct usb_serial_port *port);
        void (*port_remove)(struct usb_serial_port *port);

        int (*suspend)(struct usb_serial *serial, pm_message_t message);
        int (*resume)(struct usb_serial *serial);
        int (*reset_resume)(struct usb_serial *serial);

        /* serial function calls */
        /* Called by console and by the tty layer */
        int  (*open)(struct tty_struct *tty, struct usb_serial_port *port);
        void (*close)(struct usb_serial_port *port);
        int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
                        const unsigned char *buf, int count);
        /* Called only by the tty layer */
        unsigned int (*write_room)(struct tty_struct *tty);
        int  (*ioctl)(struct tty_struct *tty,
                      unsigned int cmd, unsigned long arg);
        void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss);
        int  (*set_serial)(struct tty_struct *tty, struct serial_struct *ss);
        void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port,
                            const struct ktermios *old);
        int (*break_ctl)(struct tty_struct *tty, int break_state);
        unsigned int (*chars_in_buffer)(struct tty_struct *tty);
        void (*wait_until_sent)(struct tty_struct *tty, long timeout);
        bool (*tx_empty)(struct usb_serial_port *port);
        void (*throttle)(struct tty_struct *tty);
        void (*unthrottle)(struct tty_struct *tty);
        int  (*tiocmget)(struct tty_struct *tty);
        int  (*tiocmset)(struct tty_struct *tty,
                         unsigned int set, unsigned int clear);
        int  (*tiocmiwait)(struct tty_struct *tty, unsigned long arg);
        int  (*get_icount)(struct tty_struct *tty,
                        struct serial_icounter_struct *icount);
        /* Called by the tty layer for port level work. There may or may not
           be an attached tty at this point */
        void (*dtr_rts)(struct usb_serial_port *port, int on);
        int  (*carrier_raised)(struct usb_serial_port *port);
        /* Called by the usb serial hooks to allow the user to rework the
           termios state */
        void (*init_termios)(struct tty_struct *tty);
        /* USB events */
        void (*read_int_callback)(struct urb *urb);
        void (*write_int_callback)(struct urb *urb);
        void (*read_bulk_callback)(struct urb *urb);
        void (*write_bulk_callback)(struct urb *urb);
        /* Called by the generic read bulk callback */
        void (*process_read_urb)(struct urb *urb);
        /* Called by the generic write implementation */
        int (*prepare_write_buffer)(struct usb_serial_port *port,
                                                void *dest, size_t size);
};
#define to_usb_serial_driver(d) \
        container_of(d, struct usb_serial_driver, driver)

int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
                const char *name, const struct usb_device_id *id_table);
void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]);
void usb_serial_port_softint(struct usb_serial_port *port);

int usb_serial_suspend(struct usb_interface *intf, pm_message_t message);
int usb_serial_resume(struct usb_interface *intf);

/* USB Serial console functions */
#ifdef CONFIG_USB_SERIAL_CONSOLE
void usb_serial_console_init(int minor);
void usb_serial_console_exit(void);
void usb_serial_console_disconnect(struct usb_serial *serial);
#else
static inline void usb_serial_console_init(int minor) { }
static inline void usb_serial_console_exit(void) { }
static inline void usb_serial_console_disconnect(struct usb_serial *serial) {}
#endif

/* Functions needed by other parts of the usbserial core */
struct usb_serial_port *usb_serial_port_get_by_minor(unsigned int minor);
void usb_serial_put(struct usb_serial *serial);

int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf);

int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port);
int usb_serial_generic_write_start(struct usb_serial_port *port, gfp_t mem_flags);
int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port,
                const unsigned char *buf, int count);
void usb_serial_generic_close(struct usb_serial_port *port);
int usb_serial_generic_resume(struct usb_serial *serial);
unsigned int usb_serial_generic_write_room(struct tty_struct *tty);
unsigned int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout);
void usb_serial_generic_read_bulk_callback(struct urb *urb);
void usb_serial_generic_write_bulk_callback(struct urb *urb);
void usb_serial_generic_throttle(struct tty_struct *tty);
void usb_serial_generic_unthrottle(struct tty_struct *tty);
int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg);
int usb_serial_generic_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount);
int usb_serial_generic_register(void);
void usb_serial_generic_deregister(void);
int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, gfp_t mem_flags);
void usb_serial_generic_process_read_urb(struct urb *urb);
int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size);

#if defined(CONFIG_USB_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch);
int usb_serial_handle_break(struct usb_serial_port *port);
#else
static inline int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
{
        return 0;
}
static inline int usb_serial_handle_break(struct usb_serial_port *port)
{
        return 0;
}
#endif

void usb_serial_handle_dcd_change(struct usb_serial_port *usb_port,
                struct tty_struct *tty, unsigned int status);


int usb_serial_bus_register(struct usb_serial_driver *device);
void usb_serial_bus_deregister(struct usb_serial_driver *device);

extern const struct bus_type usb_serial_bus_type;
extern struct tty_driver *usb_serial_tty_driver;

static inline void usb_serial_debug_data(struct device *dev,
                                         const char *function, int size,
                                         const unsigned char *data)
{
        dev_dbg(dev, "%s - length = %d, data = %*ph\n",
                function, size, size, data);
}

/*
 * Macro for reporting errors in write path to avoid infinite loop
 * when port is used as a console.
 */
#define dev_err_console(usport, fmt, ...)                                \
do {                                                                        \
        static bool __print_once;                                        \
        struct usb_serial_port *__port = (usport);                        \
                                                                        \
        if (!__port->port.console || !__print_once) {                        \
                __print_once = true;                                        \
                dev_err(&__port->dev, fmt, ##__VA_ARGS__);                \
        }                                                                \
} while (0)

/*
 * module_usb_serial_driver() - Helper macro for registering a USB Serial driver
 * @__serial_drivers: list of usb_serial drivers to register
 * @__ids: all device ids that @__serial_drivers bind to
 *
 * Helper macro for USB serial drivers which do not do anything special
 * in module init/exit. This eliminates a lot of boilerplate. Each
 * module may only use this macro once, and calling it replaces
 * module_init() and module_exit()
 *
 */
#define usb_serial_module_driver(__name, __serial_drivers, __ids)        \
static int __init usb_serial_module_init(void)                                \
{                                                                        \
        return usb_serial_register_drivers(__serial_drivers,                \
                                           __name, __ids);                \
}                                                                        \
module_init(usb_serial_module_init);                                        \
static void __exit usb_serial_module_exit(void)                                \
{                                                                        \
        usb_serial_deregister_drivers(__serial_drivers);                \
}                                                                        \
module_exit(usb_serial_module_exit);

#define module_usb_serial_driver(__serial_drivers, __ids)                \
        usb_serial_module_driver(KBUILD_MODNAME, __serial_drivers, __ids)

#endif /* __LINUX_USB_SERIAL_H */








































































































































    3 









    3 














   48 







































    5 










    5 
    5 
    5 




































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RMAP_H
#define _LINUX_RMAP_H
/*
 * Declarations for Reverse Mapping functions in mm/rmap.c
 */

#include <linux/list.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/rwsem.h>
#include <linux/memcontrol.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/memremap.h>

/*
 * The anon_vma heads a list of private "related" vmas, to scan if
 * an anonymous page pointing to this anon_vma needs to be unmapped:
 * the vmas on the list will be related by forking, or by splitting.
 *
 * Since vmas come and go as they are split and merged (particularly
 * in mprotect), the mapping field of an anonymous page cannot point
 * directly to a vma: instead it points to an anon_vma, on whose list
 * the related vmas can be easily linked or unlinked.
 *
 * After unlinking the last vma on the list, we must garbage collect
 * the anon_vma object itself: we're guaranteed no page can be
 * pointing to this anon_vma once its vma list is empty.
 */
struct anon_vma {
        struct anon_vma *root;                /* Root of this anon_vma tree */
        struct rw_semaphore rwsem;        /* W: modification, R: walking the list */
        /*
         * The refcount is taken on an anon_vma when there is no
         * guarantee that the vma of page tables will exist for
         * the duration of the operation. A caller that takes
         * the reference is responsible for clearing up the
         * anon_vma if they are the last user on release
         */
        atomic_t refcount;

        /*
         * Count of child anon_vmas. Equals to the count of all anon_vmas that
         * have ->parent pointing to this one, including itself.
         *
         * This counter is used for making decision about reusing anon_vma
         * instead of forking new one. See comments in function anon_vma_clone.
         */
        unsigned long num_children;
        /* Count of VMAs whose ->anon_vma pointer points to this object. */
        unsigned long num_active_vmas;

        struct anon_vma *parent;        /* Parent of this anon_vma */

        /*
         * NOTE: the LSB of the rb_root.rb_node is set by
         * mm_take_all_locks() _after_ taking the above lock. So the
         * rb_root must only be read/written after taking the above lock
         * to be sure to see a valid next pointer. The LSB bit itself
         * is serialized by a system wide lock only visible to
         * mm_take_all_locks() (mm_all_locks_mutex).
         */

        /* Interval tree of private "related" vmas */
        struct rb_root_cached rb_root;
};

/*
 * The copy-on-write semantics of fork mean that an anon_vma
 * can become associated with multiple processes. Furthermore,
 * each child process will have its own anon_vma, where new
 * pages for that process are instantiated.
 *
 * This structure allows us to find the anon_vmas associated
 * with a VMA, or the VMAs associated with an anon_vma.
 * The "same_vma" list contains the anon_vma_chains linking
 * all the anon_vmas associated with this VMA.
 * The "rb" field indexes on an interval tree the anon_vma_chains
 * which link all the VMAs associated with this anon_vma.
 */
struct anon_vma_chain {
        struct vm_area_struct *vma;
        struct anon_vma *anon_vma;
        struct list_head same_vma;   /* locked by mmap_lock & page_table_lock */
        struct rb_node rb;                        /* locked by anon_vma->rwsem */
        unsigned long rb_subtree_last;
#ifdef CONFIG_DEBUG_VM_RB
        unsigned long cached_vma_start, cached_vma_last;
#endif
};

enum ttu_flags {
        TTU_SPLIT_HUGE_PMD        = 0x4,        /* split huge PMD if any */
        TTU_IGNORE_MLOCK        = 0x8,        /* ignore mlock */
        TTU_SYNC                = 0x10,        /* avoid racy checks with PVMW_SYNC */
        TTU_HWPOISON                = 0x20,        /* do convert pte to hwpoison entry */
        TTU_BATCH_FLUSH                = 0x40,        /* Batch TLB flushes where possible
                                         * and caller guarantees they will
                                         * do a final flush if necessary */
        TTU_RMAP_LOCKED                = 0x80,        /* do not grab rmap lock:
                                         * caller holds it */
};

#ifdef CONFIG_MMU
static inline void get_anon_vma(struct anon_vma *anon_vma)
{
        atomic_inc(&anon_vma->refcount);
}

void __put_anon_vma(struct anon_vma *anon_vma);

static inline void put_anon_vma(struct anon_vma *anon_vma)
{
        if (atomic_dec_and_test(&anon_vma->refcount))
                __put_anon_vma(anon_vma);
}

static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
{
        down_write(&anon_vma->root->rwsem);
}

static inline int anon_vma_trylock_write(struct anon_vma *anon_vma)
{
        return down_write_trylock(&anon_vma->root->rwsem);
}

static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
{
        up_write(&anon_vma->root->rwsem);
}

static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
{
        down_read(&anon_vma->root->rwsem);
}

static inline int anon_vma_trylock_read(struct anon_vma *anon_vma)
{
        return down_read_trylock(&anon_vma->root->rwsem);
}

static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
{
        up_read(&anon_vma->root->rwsem);
}


/*
 * anon_vma helper functions.
 */
void anon_vma_init(void);        /* create anon_vma_cachep */
int  __anon_vma_prepare(struct vm_area_struct *);
void unlink_anon_vmas(struct vm_area_struct *);
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);

static inline int anon_vma_prepare(struct vm_area_struct *vma)
{
        if (likely(vma->anon_vma))
                return 0;

        return __anon_vma_prepare(vma);
}

static inline void anon_vma_merge(struct vm_area_struct *vma,
                                  struct vm_area_struct *next)
{
        VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma);
        unlink_anon_vmas(next);
}

struct anon_vma *folio_get_anon_vma(struct folio *folio);

/* RMAP flags, currently only relevant for some anon rmap operations. */
typedef int __bitwise rmap_t;

/*
 * No special request: A mapped anonymous (sub)page is possibly shared between
 * processes.
 */
#define RMAP_NONE                ((__force rmap_t)0)

/* The anonymous (sub)page is exclusive to a single process. */
#define RMAP_EXCLUSIVE                ((__force rmap_t)BIT(0))

/*
 * Internally, we're using an enum to specify the granularity. We make the
 * compiler emit specialized code for each granularity.
 */
enum rmap_level {
        RMAP_LEVEL_PTE = 0,
        RMAP_LEVEL_PMD,
};

static inline void __folio_rmap_sanity_checks(struct folio *folio,
                struct page *page, int nr_pages, enum rmap_level level)
{
        /* hugetlb folios are handled separately. */
        VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);

        /*
         * TODO: we get driver-allocated folios that have nothing to do with
         * the rmap using vm_insert_page(); therefore, we cannot assume that
         * folio_test_large_rmappable() holds for large folios. We should
         * handle any desired mapcount+stats accounting for these folios in
         * VM_MIXEDMAP VMAs separately, and then sanity-check here that
         * we really only get rmappable folios.
         */

        VM_WARN_ON_ONCE(nr_pages <= 0);
        VM_WARN_ON_FOLIO(page_folio(page) != folio, folio);
        VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio);

        switch (level) {
        case RMAP_LEVEL_PTE:
                break;
        case RMAP_LEVEL_PMD:
                /*
                 * We don't support folios larger than a single PMD yet. So
                 * when RMAP_LEVEL_PMD is set, we assume that we are creating
                 * a single "entire" mapping of the folio.
                 */
                VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
                VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
                break;
        default:
                VM_WARN_ON_ONCE(true);
        }
}

/*
 * rmap interfaces called when adding or removing pte of page
 */
void folio_move_anon_rmap(struct folio *, struct vm_area_struct *);
void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
                struct vm_area_struct *, unsigned long address, rmap_t flags);
#define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \
        folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags)
void folio_add_anon_rmap_pmd(struct folio *, struct page *,
                struct vm_area_struct *, unsigned long address, rmap_t flags);
void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
                unsigned long address);
void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
                struct vm_area_struct *);
#define folio_add_file_rmap_pte(folio, page, vma) \
        folio_add_file_rmap_ptes(folio, page, 1, vma)
void folio_add_file_rmap_pmd(struct folio *, struct page *,
                struct vm_area_struct *);
void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
                struct vm_area_struct *);
#define folio_remove_rmap_pte(folio, page, vma) \
        folio_remove_rmap_ptes(folio, page, 1, vma)
void folio_remove_rmap_pmd(struct folio *, struct page *,
                struct vm_area_struct *);

void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
                unsigned long address, rmap_t flags);
void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
                unsigned long address);

/* See folio_try_dup_anon_rmap_*() */
static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
                struct vm_area_struct *vma)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

        if (PageAnonExclusive(&folio->page)) {
                if (unlikely(folio_needs_cow_for_dma(vma, folio)))
                        return -EBUSY;
                ClearPageAnonExclusive(&folio->page);
        }
        atomic_inc(&folio->_entire_mapcount);
        return 0;
}

/* See folio_try_share_anon_rmap_*() */
static inline int hugetlb_try_share_anon_rmap(struct folio *folio)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
        VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio);

        /* Paired with the memory barrier in try_grab_folio(). */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
                smp_mb();

        if (unlikely(folio_maybe_dma_pinned(folio)))
                return -EBUSY;
        ClearPageAnonExclusive(&folio->page);

        /*
         * This is conceptually a smp_wmb() paired with the smp_rmb() in
         * gup_must_unshare().
         */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
                smp_mb__after_atomic();
        return 0;
}

static inline void hugetlb_add_file_rmap(struct folio *folio)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
        VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);

        atomic_inc(&folio->_entire_mapcount);
}

static inline void hugetlb_remove_rmap(struct folio *folio)
{
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);

        atomic_dec(&folio->_entire_mapcount);
}

static __always_inline void __folio_dup_file_rmap(struct folio *folio,
                struct page *page, int nr_pages, enum rmap_level level)
{
        __folio_rmap_sanity_checks(folio, page, nr_pages, level);

        switch (level) {
        case RMAP_LEVEL_PTE:
                do {
                        atomic_inc(&page->_mapcount);
                } while (page++, --nr_pages > 0);
                break;
        case RMAP_LEVEL_PMD:
                atomic_inc(&folio->_entire_mapcount);
                break;
        }
}

/**
 * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio
 * @folio:        The folio to duplicate the mappings of
 * @page:        The first page to duplicate the mappings of
 * @nr_pages:        The number of pages of which the mapping will be duplicated
 *
 * The page range of the folio is defined by [page, page + nr_pages)
 *
 * The caller needs to hold the page table lock.
 */
static inline void folio_dup_file_rmap_ptes(struct folio *folio,
                struct page *page, int nr_pages)
{
        __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE);
}
#define folio_dup_file_rmap_pte(folio, page) \
        folio_dup_file_rmap_ptes(folio, page, 1)

/**
 * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio
 * @folio:        The folio to duplicate the mapping of
 * @page:        The first page to duplicate the mapping of
 *
 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock.
 */
static inline void folio_dup_file_rmap_pmd(struct folio *folio,
                struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE);
#else
        WARN_ON_ONCE(true);
#endif
}

static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *src_vma,
                enum rmap_level level)
{
        bool maybe_pinned;
        int i;

        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
        __folio_rmap_sanity_checks(folio, page, nr_pages, level);

        /*
         * If this folio may have been pinned by the parent process,
         * don't allow to duplicate the mappings but instead require to e.g.,
         * copy the subpage immediately for the child so that we'll always
         * guarantee the pinned folio won't be randomly replaced in the
         * future on write faults.
         */
        maybe_pinned = likely(!folio_is_device_private(folio)) &&
                       unlikely(folio_needs_cow_for_dma(src_vma, folio));

        /*
         * No need to check+clear for already shared PTEs/PMDs of the
         * folio. But if any page is PageAnonExclusive, we must fallback to
         * copying if the folio maybe pinned.
         */
        switch (level) {
        case RMAP_LEVEL_PTE:
                if (unlikely(maybe_pinned)) {
                        for (i = 0; i < nr_pages; i++)
                                if (PageAnonExclusive(page + i))
                                        return -EBUSY;
                }
                do {
                        if (PageAnonExclusive(page))
                                ClearPageAnonExclusive(page);
                        atomic_inc(&page->_mapcount);
                } while (page++, --nr_pages > 0);
                break;
        case RMAP_LEVEL_PMD:
                if (PageAnonExclusive(page)) {
                        if (unlikely(maybe_pinned))
                                return -EBUSY;
                        ClearPageAnonExclusive(page);
                }
                atomic_inc(&folio->_entire_mapcount);
                break;
        }
        return 0;
}

/**
 * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range
 *                                  of a folio
 * @folio:        The folio to duplicate the mappings of
 * @page:        The first page to duplicate the mappings of
 * @nr_pages:        The number of pages of which the mapping will be duplicated
 * @src_vma:        The vm area from which the mappings are duplicated
 *
 * The page range of the folio is defined by [page, page + nr_pages)
 *
 * The caller needs to hold the page table lock and the
 * vma->vma_mm->write_protect_seq.
 *
 * Duplicating the mappings can only fail if the folio may be pinned; device
 * private folios cannot get pinned and consequently this function cannot fail
 * for them.
 *
 * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in
 * the parent and the child. They must *not* be writable after this call
 * succeeded.
 *
 * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise.
 */
static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *src_vma)
{
        return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma,
                                         RMAP_LEVEL_PTE);
}
#define folio_try_dup_anon_rmap_pte(folio, page, vma) \
        folio_try_dup_anon_rmap_ptes(folio, page, 1, vma)

/**
 * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range
 *                                 of a folio
 * @folio:        The folio to duplicate the mapping of
 * @page:        The first page to duplicate the mapping of
 * @src_vma:        The vm area from which the mapping is duplicated
 *
 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock and the
 * vma->vma_mm->write_protect_seq.
 *
 * Duplicating the mapping can only fail if the folio may be pinned; device
 * private folios cannot get pinned and consequently this function cannot fail
 * for them.
 *
 * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in
 * the parent and the child. They must *not* be writable after this call
 * succeeded.
 *
 * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise.
 */
static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio,
                struct page *page, struct vm_area_struct *src_vma)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma,
                                         RMAP_LEVEL_PMD);
#else
        WARN_ON_ONCE(true);
        return -EBUSY;
#endif
}

static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
                struct page *page, int nr_pages, enum rmap_level level)
{
        VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
        VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio);
        __folio_rmap_sanity_checks(folio, page, nr_pages, level);

        /* device private folios cannot get pinned via GUP. */
        if (unlikely(folio_is_device_private(folio))) {
                ClearPageAnonExclusive(page);
                return 0;
        }

        /*
         * We have to make sure that when we clear PageAnonExclusive, that
         * the page is not pinned and that concurrent GUP-fast won't succeed in
         * concurrently pinning the page.
         *
         * Conceptually, PageAnonExclusive clearing consists of:
         * (A1) Clear PTE
         * (A2) Check if the page is pinned; back off if so.
         * (A3) Clear PageAnonExclusive
         * (A4) Restore PTE (optional, but certainly not writable)
         *
         * When clearing PageAnonExclusive, we cannot possibly map the page
         * writable again, because anon pages that may be shared must never
         * be writable. So in any case, if the PTE was writable it cannot
         * be writable anymore afterwards and there would be a PTE change. Only
         * if the PTE wasn't writable, there might not be a PTE change.
         *
         * Conceptually, GUP-fast pinning of an anon page consists of:
         * (B1) Read the PTE
         * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so.
         * (B3) Pin the mapped page
         * (B4) Check if the PTE changed by re-reading it; back off if so.
         * (B5) If the original PTE is not writable, check if
         *        PageAnonExclusive is not set; back off if so.
         *
         * If the PTE was writable, we only have to make sure that GUP-fast
         * observes a PTE change and properly backs off.
         *
         * If the PTE was not writable, we have to make sure that GUP-fast either
         * detects a (temporary) PTE change or that PageAnonExclusive is cleared
         * and properly backs off.
         *
         * Consequently, when clearing PageAnonExclusive(), we have to make
         * sure that (A1), (A2)/(A3) and (A4) happen in the right memory
         * order. In GUP-fast pinning code, we have to make sure that (B3),(B4)
         * and (B5) happen in the right memory order.
         *
         * We assume that there might not be a memory barrier after
         * clearing/invalidating the PTE (A1) and before restoring the PTE (A4),
         * so we use explicit ones here.
         */

        /* Paired with the memory barrier in try_grab_folio(). */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
                smp_mb();

        if (unlikely(folio_maybe_dma_pinned(folio)))
                return -EBUSY;
        ClearPageAnonExclusive(page);

        /*
         * This is conceptually a smp_wmb() paired with the smp_rmb() in
         * gup_must_unshare().
         */
        if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
                smp_mb__after_atomic();
        return 0;
}

/**
 * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page
 *                                   mapped by a PTE possibly shared to prepare
 *                                   for KSM or temporary unmapping
 * @folio:        The folio to share a mapping of
 * @page:        The mapped exclusive page
 *
 * The caller needs to hold the page table lock and has to have the page table
 * entries cleared/invalidated.
 *
 * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during
 * fork() to duplicate mappings, but instead to prepare for KSM or temporarily
 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte().
 *
 * Marking the mapped page shared can only fail if the folio maybe pinned;
 * device private folios cannot get pinned and consequently this function cannot
 * fail.
 *
 * Returns 0 if marking the mapped page possibly shared succeeded. Returns
 * -EBUSY otherwise.
 */
static inline int folio_try_share_anon_rmap_pte(struct folio *folio,
                struct page *page)
{
        return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE);
}

/**
 * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page
 *                                   range mapped by a PMD possibly shared to
 *                                   prepare for temporary unmapping
 * @folio:        The folio to share the mapping of
 * @page:        The first page to share the mapping of
 *
 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
 *
 * The caller needs to hold the page table lock and has to have the page table
 * entries cleared/invalidated.
 *
 * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during
 * fork() to duplicate a mapping, but instead to prepare for temporarily
 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd().
 *
 * Marking the mapped pages shared can only fail if the folio maybe pinned;
 * device private folios cannot get pinned and consequently this function cannot
 * fail.
 *
 * Returns 0 if marking the mapped pages possibly shared succeeded. Returns
 * -EBUSY otherwise.
 */
static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
                struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR,
                                           RMAP_LEVEL_PMD);
#else
        WARN_ON_ONCE(true);
        return -EBUSY;
#endif
}

/*
 * Called from mm/vmscan.c to handle paging out
 */
int folio_referenced(struct folio *, int is_locked,
                        struct mem_cgroup *memcg, unsigned long *vm_flags);

void try_to_migrate(struct folio *folio, enum ttu_flags flags);
void try_to_unmap(struct folio *, enum ttu_flags flags);

int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
                                unsigned long end, struct page **pages,
                                void *arg);

/* Avoid racy checks */
#define PVMW_SYNC                (1 << 0)
/* Look for migration entries rather than present PTEs */
#define PVMW_MIGRATION                (1 << 1)

struct page_vma_mapped_walk {
        unsigned long pfn;
        unsigned long nr_pages;
        pgoff_t pgoff;
        struct vm_area_struct *vma;
        unsigned long address;
        pmd_t *pmd;
        pte_t *pte;
        spinlock_t *ptl;
        unsigned int flags;
};

#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags)        \
        struct page_vma_mapped_walk name = {                                \
                .pfn = page_to_pfn(_page),                                \
                .nr_pages = compound_nr(_page),                                \
                .pgoff = page_to_pgoff(_page),                                \
                .vma = _vma,                                                \
                .address = _address,                                        \
                .flags = _flags,                                        \
        }

#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags)        \
        struct page_vma_mapped_walk name = {                                \
                .pfn = folio_pfn(_folio),                                \
                .nr_pages = folio_nr_pages(_folio),                        \
                .pgoff = folio_pgoff(_folio),                                \
                .vma = _vma,                                                \
                .address = _address,                                        \
                .flags = _flags,                                        \
        }

static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
{
        /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
        if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma))
                pte_unmap(pvmw->pte);
        if (pvmw->ptl)
                spin_unlock(pvmw->ptl);
}

bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);

/*
 * Used by swapoff to help locate where page is expected in vma.
 */
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);

/*
 * Cleans the PTEs of shared mappings.
 * (and since clean PTEs should also be readonly, write protects them too)
 *
 * returns the number of cleaned PTEs.
 */
int folio_mkclean(struct folio *);

int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
                      struct vm_area_struct *vma);

void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);

int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);

/*
 * rmap_walk_control: To control rmap traversing for specific needs
 *
 * arg: passed to rmap_one() and invalid_vma()
 * try_lock: bail out if the rmap lock is contended
 * contended: indicate the rmap traversal bailed out due to lock contention
 * rmap_one: executed on each vma where page is mapped
 * done: for checking traversing termination condition
 * anon_lock: for getting anon_lock by optimized way rather than default
 * invalid_vma: for skipping uninterested vma
 */
struct rmap_walk_control {
        void *arg;
        bool try_lock;
        bool contended;
        /*
         * Return false if page table scanning in rmap_walk should be stopped.
         * Otherwise, return true.
         */
        bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma,
                                        unsigned long addr, void *arg);
        int (*done)(struct folio *folio);
        struct anon_vma *(*anon_lock)(struct folio *folio,
                                      struct rmap_walk_control *rwc);
        bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
};

void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc);
void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc);
struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
                                          struct rmap_walk_control *rwc);

#else        /* !CONFIG_MMU */

#define anon_vma_init()                do {} while (0)
#define anon_vma_prepare(vma)        (0)

static inline int folio_referenced(struct folio *folio, int is_locked,
                                  struct mem_cgroup *memcg,
                                  unsigned long *vm_flags)
{
        *vm_flags = 0;
        return 0;
}

static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags)
{
}

static inline int folio_mkclean(struct folio *folio)
{
        return 0;
}
#endif        /* CONFIG_MMU */

static inline int page_mkclean(struct page *page)
{
        return folio_mkclean(page_folio(page));
}
#endif        /* _LINUX_RMAP_H */






































































   60 








   60 

   60 

   60 
   60 






   59 










   18 
   18 




   18 

   18 

   18 

   18 




   18 



   18 








   18 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/drivers/base/map.c
 *
 * (C) Copyright Al Viro 2002,2003
 *
 * NOTE: data structure needs to be changed.  It works, but for large dev_t
 * it will be too slow.  It is isolated, though, so these changes will be
 * local to that file.
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/kdev_t.h>
#include <linux/kobject.h>
#include <linux/kobj_map.h>

struct kobj_map {
        struct probe {
                struct probe *next;
                dev_t dev;
                unsigned long range;
                struct module *owner;
                kobj_probe_t *get;
                int (*lock)(dev_t, void *);
                void *data;
        } *probes[255];
        struct mutex *lock;
};

int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,
             struct module *module, kobj_probe_t *probe,
             int (*lock)(dev_t, void *), void *data)
{
        unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
        unsigned int index = MAJOR(dev);
        unsigned int i;
        struct probe *p;

        if (n > 255)
                n = 255;

        p = kmalloc_array(n, sizeof(struct probe), GFP_KERNEL);
        if (p == NULL)
                return -ENOMEM;

        for (i = 0; i < n; i++, p++) {
                p->owner = module;
                p->get = probe;
                p->lock = lock;
                p->dev = dev;
                p->range = range;
                p->data = data;
        }
        mutex_lock(domain->lock);
        for (i = 0, p -= n; i < n; i++, p++, index++) {
                struct probe **s = &domain->probes[index % 255];
                while (*s && (*s)->range < range)
                        s = &(*s)->next;
                p->next = *s;
                *s = p;
        }
        mutex_unlock(domain->lock);
        return 0;
}

void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range)
{
        unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;
        unsigned int index = MAJOR(dev);
        unsigned int i;
        struct probe *found = NULL;

        if (n > 255)
                n = 255;

        mutex_lock(domain->lock);
        for (i = 0; i < n; i++, index++) {
                struct probe **s;
                for (s = &domain->probes[index % 255]; *s; s = &(*s)->next) {
                        struct probe *p = *s;
                        if (p->dev == dev && p->range == range) {
                                *s = p->next;
                                if (!found)
                                        found = p;
                                break;
                        }
                }
        }
        mutex_unlock(domain->lock);
        kfree(found);
}

struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)
{
        struct kobject *kobj;
        struct probe *p;
        unsigned long best = ~0UL;

retry:
        mutex_lock(domain->lock);
        for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {
                struct kobject *(*probe)(dev_t, int *, void *);
                struct module *owner;
                void *data;

                if (p->dev > dev || p->dev + p->range - 1 < dev)
                        continue;
                if (p->range - 1 >= best)
                        break;
                if (!try_module_get(p->owner))
                        continue;
                owner = p->owner;
                data = p->data;
                probe = p->get;
                best = p->range - 1;
                *index = dev - p->dev;
                if (p->lock && p->lock(dev, data) < 0) {
                        module_put(owner);
                        continue;
                }
                mutex_unlock(domain->lock);
                kobj = probe(dev, index, data);
                /* Currently ->owner protects _only_ ->probe() itself. */
                module_put(owner);
                if (kobj)
                        return kobj;
                goto retry;
        }
        mutex_unlock(domain->lock);
        return NULL;
}

struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct mutex *lock)
{
        struct kobj_map *p = kmalloc(sizeof(struct kobj_map), GFP_KERNEL);
        struct probe *base = kzalloc(sizeof(*base), GFP_KERNEL);
        int i;

        if ((p == NULL) || (base == NULL)) {
                kfree(p);
                kfree(base);
                return NULL;
        }

        base->dev = 1;
        base->range = ~0;
        base->get = base_probe;
        for (i = 0; i < 255; i++)
                p->probes[i] = base;
        p->lock = lock;
        return p;
}






















































































































    1 













































    1 













































































































    1 



















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2013 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/usb.h>
#include <linux/hid.h>
#include <linux/mutex.h>
#include <linux/videodev2.h>
#include <asm/unaligned.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>

/*
 * 'Thanko's Raremono' is a Japanese si4734-based AM/FM/SW USB receiver:
 *
 * http://www.raremono.jp/product/484.html/
 *
 * The USB protocol has been reversed engineered using wireshark, initially
 * by Dinesh Ram <dinesh.ram@cern.ch> and finished by Hans Verkuil
 * <hverkuil@xs4all.nl>.
 *
 * Sadly the firmware used in this product hides lots of goodies since the
 * si4734 has more features than are supported by the firmware. Oh well...
 */

/* driver and module definitions */
MODULE_AUTHOR("Hans Verkuil <hverkuil@xs4all.nl>");
MODULE_DESCRIPTION("Thanko's Raremono AM/FM/SW Receiver USB driver");
MODULE_LICENSE("GPL v2");

/*
 * The Device announces itself as Cygnal Integrated Products, Inc.
 *
 * The vendor and product IDs (and in fact all other lsusb information as
 * well) are identical to the si470x Silicon Labs USB FM Radio Reference
 * Design board, even though this card has a si4734 device. Clearly the
 * designer of this product never bothered to change the USB IDs.
 */

/* USB Device ID List */
static const struct usb_device_id usb_raremono_device_table[] = {
        {USB_DEVICE_AND_INTERFACE_INFO(0x10c4, 0x818a, USB_CLASS_HID, 0, 0) },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, usb_raremono_device_table);

#define BUFFER_LENGTH 64

/* Timeout is set to a high value, could probably be reduced. Need more tests */
#define USB_TIMEOUT 10000

/* Frequency limits in KHz */
#define FM_FREQ_RANGE_LOW        64000
#define FM_FREQ_RANGE_HIGH        108000

#define AM_FREQ_RANGE_LOW        520
#define AM_FREQ_RANGE_HIGH        1710

#define SW_FREQ_RANGE_LOW        2300
#define SW_FREQ_RANGE_HIGH        26100

enum { BAND_FM, BAND_AM, BAND_SW };

static const struct v4l2_frequency_band bands[] = {
        /* Band FM */
        {
                .type = V4L2_TUNER_RADIO,
                .index = 0,
                .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO |
                              V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   = FM_FREQ_RANGE_LOW * 16,
                .rangehigh  = FM_FREQ_RANGE_HIGH * 16,
                .modulation = V4L2_BAND_MODULATION_FM,
        },
        /* Band AM */
        {
                .type = V4L2_TUNER_RADIO,
                .index = 1,
                .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   = AM_FREQ_RANGE_LOW * 16,
                .rangehigh  = AM_FREQ_RANGE_HIGH * 16,
                .modulation = V4L2_BAND_MODULATION_AM,
        },
        /* Band SW */
        {
                .type = V4L2_TUNER_RADIO,
                .index = 2,
                .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS,
                .rangelow   = SW_FREQ_RANGE_LOW * 16,
                .rangehigh  = SW_FREQ_RANGE_HIGH * 16,
                .modulation = V4L2_BAND_MODULATION_AM,
        },
};

struct raremono_device {
        struct usb_device *usbdev;
        struct usb_interface *intf;
        struct video_device vdev;
        struct v4l2_device v4l2_dev;
        struct mutex lock;

        u8 *buffer;
        u32 band;
        unsigned curfreq;
};

static inline struct raremono_device *to_raremono_dev(struct v4l2_device *v4l2_dev)
{
        return container_of(v4l2_dev, struct raremono_device, v4l2_dev);
}

/* Set frequency. */
static int raremono_cmd_main(struct raremono_device *radio, unsigned band, unsigned freq)
{
        unsigned band_offset;
        int ret;

        switch (band) {
        case BAND_FM:
                band_offset = 1;
                freq /= 10;
                break;
        case BAND_AM:
                band_offset = 0;
                break;
        default:
                band_offset = 2;
                break;
        }
        radio->buffer[0] = 0x04 + band_offset;
        radio->buffer[1] = freq >> 8;
        radio->buffer[2] = freq & 0xff;

        ret = usb_control_msg(radio->usbdev, usb_sndctrlpipe(radio->usbdev, 0),
                        HID_REQ_SET_REPORT,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
                        0x0300 + radio->buffer[0], 2,
                        radio->buffer, 3, USB_TIMEOUT);

        if (ret < 0) {
                dev_warn(radio->v4l2_dev.dev, "%s failed (%d)\n", __func__, ret);
                return ret;
        }
        radio->curfreq = (band == BAND_FM) ? freq * 10 : freq;
        return 0;
}

/* Handle unplugging the device.
 * We call video_unregister_device in any case.
 * The last function called in this procedure is
 * usb_raremono_device_release.
 */
static void usb_raremono_disconnect(struct usb_interface *intf)
{
        struct raremono_device *radio = to_raremono_dev(usb_get_intfdata(intf));

        dev_info(&intf->dev, "Thanko's Raremono disconnected\n");

        mutex_lock(&radio->lock);
        usb_set_intfdata(intf, NULL);
        video_unregister_device(&radio->vdev);
        v4l2_device_disconnect(&radio->v4l2_dev);
        mutex_unlock(&radio->lock);
        v4l2_device_put(&radio->v4l2_dev);
}

/*
 * Linux Video interface
 */
static int vidioc_querycap(struct file *file, void *priv,
                                        struct v4l2_capability *v)
{
        struct raremono_device *radio = video_drvdata(file);

        strscpy(v->driver, "radio-raremono", sizeof(v->driver));
        strscpy(v->card, "Thanko's Raremono", sizeof(v->card));
        usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info));
        return 0;
}

static int vidioc_enum_freq_bands(struct file *file, void *priv,
                struct v4l2_frequency_band *band)
{
        if (band->tuner != 0)
                return -EINVAL;

        if (band->index >= ARRAY_SIZE(bands))
                return -EINVAL;

        *band = bands[band->index];

        return 0;
}

static int vidioc_g_tuner(struct file *file, void *priv,
                struct v4l2_tuner *v)
{
        struct raremono_device *radio = video_drvdata(file);
        int ret;

        if (v->index > 0)
                return -EINVAL;

        strscpy(v->name, "AM/FM/SW", sizeof(v->name));
        v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO |
                V4L2_TUNER_CAP_FREQ_BANDS;
        v->rangelow = AM_FREQ_RANGE_LOW * 16;
        v->rangehigh = FM_FREQ_RANGE_HIGH * 16;
        v->rxsubchans = V4L2_TUNER_SUB_STEREO | V4L2_TUNER_SUB_MONO;
        v->audmode = (radio->curfreq < FM_FREQ_RANGE_LOW) ?
                V4L2_TUNER_MODE_MONO : V4L2_TUNER_MODE_STEREO;
        memset(radio->buffer, 1, BUFFER_LENGTH);
        ret = usb_control_msg(radio->usbdev, usb_rcvctrlpipe(radio->usbdev, 0),
                        1, 0xa1, 0x030d, 2, radio->buffer, BUFFER_LENGTH, USB_TIMEOUT);

        if (ret < 0) {
                dev_warn(radio->v4l2_dev.dev, "%s failed (%d)\n", __func__, ret);
                return ret;
        }
        v->signal = ((radio->buffer[1] & 0xf) << 8 | radio->buffer[2]) << 4;
        return 0;
}

static int vidioc_s_tuner(struct file *file, void *priv,
                                        const struct v4l2_tuner *v)
{
        return v->index ? -EINVAL : 0;
}

static int vidioc_s_frequency(struct file *file, void *priv,
                                const struct v4l2_frequency *f)
{
        struct raremono_device *radio = video_drvdata(file);
        u32 freq;
        unsigned band;

        if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO)
                return -EINVAL;

        if (f->frequency >= (FM_FREQ_RANGE_LOW + SW_FREQ_RANGE_HIGH) * 8)
                band = BAND_FM;
        else if (f->frequency <= (AM_FREQ_RANGE_HIGH + SW_FREQ_RANGE_LOW) * 8)
                band = BAND_AM;
        else
                band = BAND_SW;

        freq = clamp_t(u32, f->frequency, bands[band].rangelow, bands[band].rangehigh);
        return raremono_cmd_main(radio, band, freq / 16);
}

static int vidioc_g_frequency(struct file *file, void *priv,
                                struct v4l2_frequency *f)
{
        struct raremono_device *radio = video_drvdata(file);

        if (f->tuner != 0)
                return -EINVAL;
        f->type = V4L2_TUNER_RADIO;
        f->frequency = radio->curfreq * 16;
        return 0;
}

static void raremono_device_release(struct v4l2_device *v4l2_dev)
{
        struct raremono_device *radio = to_raremono_dev(v4l2_dev);

        kfree(radio->buffer);
        kfree(radio);
}

/* File system interface */
static const struct v4l2_file_operations usb_raremono_fops = {
        .owner                = THIS_MODULE,
        .open           = v4l2_fh_open,
        .release        = v4l2_fh_release,
        .unlocked_ioctl        = video_ioctl2,
};

static const struct v4l2_ioctl_ops usb_raremono_ioctl_ops = {
        .vidioc_querycap = vidioc_querycap,
        .vidioc_g_tuner = vidioc_g_tuner,
        .vidioc_s_tuner = vidioc_s_tuner,
        .vidioc_g_frequency = vidioc_g_frequency,
        .vidioc_s_frequency = vidioc_s_frequency,
        .vidioc_enum_freq_bands = vidioc_enum_freq_bands,
};

/* check if the device is present and register with v4l and usb if it is */
static int usb_raremono_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        struct raremono_device *radio;
        int retval = 0;

        radio = kzalloc(sizeof(*radio), GFP_KERNEL);
        if (!radio)
                return -ENOMEM;
        radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
        if (!radio->buffer) {
                kfree(radio);
                return -ENOMEM;
        }

        radio->usbdev = interface_to_usbdev(intf);
        radio->intf = intf;

        /*
         * This device uses the same USB IDs as the si470x SiLabs reference
         * design. So do an additional check: attempt to read the device ID
         * from the si470x: the lower 12 bits are 0x0242 for the si470x. The
         * Raremono always returns 0x0800 (the meaning of that is unknown, but
         * at least it works).
         *
         * We use this check to determine which device we are dealing with.
         */
        msleep(20);
        retval = usb_control_msg(radio->usbdev,
                usb_rcvctrlpipe(radio->usbdev, 0),
                HID_REQ_GET_REPORT,
                USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
                1, 2,
                radio->buffer, 3, 500);
        if (retval != 3 ||
            (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) {
                dev_info(&intf->dev, "this is not Thanko's Raremono.\n");
                retval = -ENODEV;
                goto free_mem;
        }

        dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n",
                        id->idVendor, id->idProduct);

        retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
        if (retval < 0) {
                dev_err(&intf->dev, "couldn't register v4l2_device\n");
                goto free_mem;
        }

        mutex_init(&radio->lock);

        strscpy(radio->vdev.name, radio->v4l2_dev.name,
                sizeof(radio->vdev.name));
        radio->vdev.v4l2_dev = &radio->v4l2_dev;
        radio->vdev.fops = &usb_raremono_fops;
        radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops;
        radio->vdev.lock = &radio->lock;
        radio->vdev.release = video_device_release_empty;
        radio->vdev.device_caps = V4L2_CAP_TUNER | V4L2_CAP_RADIO;
        radio->v4l2_dev.release = raremono_device_release;

        usb_set_intfdata(intf, &radio->v4l2_dev);

        video_set_drvdata(&radio->vdev, radio);

        raremono_cmd_main(radio, BAND_FM, 95160);

        retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO, -1);
        if (retval == 0) {
                dev_info(&intf->dev, "V4L2 device registered as %s\n",
                                video_device_node_name(&radio->vdev));
                return 0;
        }
        dev_err(&intf->dev, "could not register video device\n");
        v4l2_device_unregister(&radio->v4l2_dev);

free_mem:
        kfree(radio->buffer);
        kfree(radio);
        return retval;
}

/* USB subsystem interface */
static struct usb_driver usb_raremono_driver = {
        .name                        = "radio-raremono",
        .probe                        = usb_raremono_probe,
        .disconnect                = usb_raremono_disconnect,
        .id_table                = usb_raremono_device_table,
};

module_usb_driver(usb_raremono_driver);















































































































































































































































































































































































































































































































































































































































  233 

  232 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
// SPDX-License-Identifier: GPL-2.0+
/*
 * Procedures for creating, accessing and interpreting the device tree.
 *
 * Paul Mackerras        August 1996.
 * Copyright (C) 1996-2005 Paul Mackerras.
 *
 *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
 *    {engebret|bergner}@us.ibm.com
 *
 *  Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net
 *
 *  Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and
 *  Grant Likely.
 */

#define pr_fmt(fmt)        "OF: " fmt

#include <linux/console.h>
#include <linux/ctype.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_graph.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/proc_fs.h>

#include "of_private.h"

LIST_HEAD(aliases_lookup);

struct device_node *of_root;
EXPORT_SYMBOL(of_root);
struct device_node *of_chosen;
EXPORT_SYMBOL(of_chosen);
struct device_node *of_aliases;
struct device_node *of_stdout;
static const char *of_stdout_options;

struct kset *of_kset;

/*
 * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
 * This mutex must be held whenever modifications are being made to the
 * device tree. The of_{attach,detach}_node() and
 * of_{add,remove,update}_property() helpers make sure this happens.
 */
DEFINE_MUTEX(of_mutex);

/* use when traversing tree through the child, sibling,
 * or parent members of struct device_node.
 */
DEFINE_RAW_SPINLOCK(devtree_lock);

bool of_node_name_eq(const struct device_node *np, const char *name)
{
        const char *node_name;
        size_t len;

        if (!np)
                return false;

        node_name = kbasename(np->full_name);
        len = strchrnul(node_name, '@') - node_name;

        return (strlen(name) == len) && (strncmp(node_name, name, len) == 0);
}
EXPORT_SYMBOL(of_node_name_eq);

bool of_node_name_prefix(const struct device_node *np, const char *prefix)
{
        if (!np)
                return false;

        return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0;
}
EXPORT_SYMBOL(of_node_name_prefix);

static bool __of_node_is_type(const struct device_node *np, const char *type)
{
        const char *match = __of_get_property(np, "device_type", NULL);

        return np && match && type && !strcmp(match, type);
}

int of_bus_n_addr_cells(struct device_node *np)
{
        u32 cells;

        for (; np; np = np->parent)
                if (!of_property_read_u32(np, "#address-cells", &cells))
                        return cells;

        /* No #address-cells property for the root node */
        return OF_ROOT_NODE_ADDR_CELLS_DEFAULT;
}

int of_n_addr_cells(struct device_node *np)
{
        if (np->parent)
                np = np->parent;

        return of_bus_n_addr_cells(np);
}
EXPORT_SYMBOL(of_n_addr_cells);

int of_bus_n_size_cells(struct device_node *np)
{
        u32 cells;

        for (; np; np = np->parent)
                if (!of_property_read_u32(np, "#size-cells", &cells))
                        return cells;

        /* No #size-cells property for the root node */
        return OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
}

int of_n_size_cells(struct device_node *np)
{
        if (np->parent)
                np = np->parent;

        return of_bus_n_size_cells(np);
}
EXPORT_SYMBOL(of_n_size_cells);

#ifdef CONFIG_NUMA
int __weak of_node_to_nid(struct device_node *np)
{
        return NUMA_NO_NODE;
}
#endif

#define OF_PHANDLE_CACHE_BITS        7
#define OF_PHANDLE_CACHE_SZ        BIT(OF_PHANDLE_CACHE_BITS)

static struct device_node *phandle_cache[OF_PHANDLE_CACHE_SZ];

static u32 of_phandle_cache_hash(phandle handle)
{
        return hash_32(handle, OF_PHANDLE_CACHE_BITS);
}

/*
 * Caller must hold devtree_lock.
 */
void __of_phandle_cache_inv_entry(phandle handle)
{
        u32 handle_hash;
        struct device_node *np;

        if (!handle)
                return;

        handle_hash = of_phandle_cache_hash(handle);

        np = phandle_cache[handle_hash];
        if (np && handle == np->phandle)
                phandle_cache[handle_hash] = NULL;
}

void __init of_core_init(void)
{
        struct device_node *np;

        of_platform_register_reconfig_notifier();

        /* Create the kset, and register existing nodes */
        mutex_lock(&of_mutex);
        of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
        if (!of_kset) {
                mutex_unlock(&of_mutex);
                pr_err("failed to register existing nodes\n");
                return;
        }
        for_each_of_allnodes(np) {
                __of_attach_node_sysfs(np);
                if (np->phandle && !phandle_cache[of_phandle_cache_hash(np->phandle)])
                        phandle_cache[of_phandle_cache_hash(np->phandle)] = np;
        }
        mutex_unlock(&of_mutex);

        /* Symlink in /proc as required by userspace ABI */
        if (of_root)
                proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
}

static struct property *__of_find_property(const struct device_node *np,
                                           const char *name, int *lenp)
{
        struct property *pp;

        if (!np)
                return NULL;

        for (pp = np->properties; pp; pp = pp->next) {
                if (of_prop_cmp(pp->name, name) == 0) {
                        if (lenp)
                                *lenp = pp->length;
                        break;
                }
        }

        return pp;
}

struct property *of_find_property(const struct device_node *np,
                                  const char *name,
                                  int *lenp)
{
        struct property *pp;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        pp = __of_find_property(np, name, lenp);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);

        return pp;
}
EXPORT_SYMBOL(of_find_property);

struct device_node *__of_find_all_nodes(struct device_node *prev)
{
        struct device_node *np;
        if (!prev) {
                np = of_root;
        } else if (prev->child) {
                np = prev->child;
        } else {
                /* Walk back up looking for a sibling, or the end of the structure */
                np = prev;
                while (np->parent && !np->sibling)
                        np = np->parent;
                np = np->sibling; /* Might be null at the end of the tree */
        }
        return np;
}

/**
 * of_find_all_nodes - Get next node in global list
 * @prev:        Previous node or NULL to start iteration
 *                of_node_put() will be called on it
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_all_nodes(struct device_node *prev)
{
        struct device_node *np;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        np = __of_find_all_nodes(prev);
        of_node_get(np);
        of_node_put(prev);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_all_nodes);

/*
 * Find a property with a given name for a given node
 * and return the value.
 */
const void *__of_get_property(const struct device_node *np,
                              const char *name, int *lenp)
{
        struct property *pp = __of_find_property(np, name, lenp);

        return pp ? pp->value : NULL;
}

/*
 * Find a property with a given name for a given node
 * and return the value.
 */
const void *of_get_property(const struct device_node *np, const char *name,
                            int *lenp)
{
        struct property *pp = of_find_property(np, name, lenp);

        return pp ? pp->value : NULL;
}
EXPORT_SYMBOL(of_get_property);

/**
 * __of_device_is_compatible() - Check if the node matches given constraints
 * @device: pointer to node
 * @compat: required compatible string, NULL or "" for any match
 * @type: required device_type value, NULL or "" for any match
 * @name: required node name, NULL or "" for any match
 *
 * Checks if the given @compat, @type and @name strings match the
 * properties of the given @device. A constraints can be skipped by
 * passing NULL or an empty string as the constraint.
 *
 * Returns 0 for no match, and a positive integer on match. The return
 * value is a relative score with larger values indicating better
 * matches. The score is weighted for the most specific compatible value
 * to get the highest score. Matching type is next, followed by matching
 * name. Practically speaking, this results in the following priority
 * order for matches:
 *
 * 1. specific compatible && type && name
 * 2. specific compatible && type
 * 3. specific compatible && name
 * 4. specific compatible
 * 5. general compatible && type && name
 * 6. general compatible && type
 * 7. general compatible && name
 * 8. general compatible
 * 9. type && name
 * 10. type
 * 11. name
 */
static int __of_device_is_compatible(const struct device_node *device,
                                     const char *compat, const char *type, const char *name)
{
        struct property *prop;
        const char *cp;
        int index = 0, score = 0;

        /* Compatible match has highest priority */
        if (compat && compat[0]) {
                prop = __of_find_property(device, "compatible", NULL);
                for (cp = of_prop_next_string(prop, NULL); cp;
                     cp = of_prop_next_string(prop, cp), index++) {
                        if (of_compat_cmp(cp, compat, strlen(compat)) == 0) {
                                score = INT_MAX/2 - (index << 2);
                                break;
                        }
                }
                if (!score)
                        return 0;
        }

        /* Matching type is better than matching name */
        if (type && type[0]) {
                if (!__of_node_is_type(device, type))
                        return 0;
                score += 2;
        }

        /* Matching name is a bit better than not */
        if (name && name[0]) {
                if (!of_node_name_eq(device, name))
                        return 0;
                score++;
        }

        return score;
}

/** Checks if the given "compat" string matches one of the strings in
 * the device's "compatible" property
 */
int of_device_is_compatible(const struct device_node *device,
                const char *compat)
{
        unsigned long flags;
        int res;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        res = __of_device_is_compatible(device, compat, NULL, NULL);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return res;
}
EXPORT_SYMBOL(of_device_is_compatible);

/** Checks if the device is compatible with any of the entries in
 *  a NULL terminated array of strings. Returns the best match
 *  score or 0.
 */
int of_device_compatible_match(const struct device_node *device,
                               const char *const *compat)
{
        unsigned int tmp, score = 0;

        if (!compat)
                return 0;

        while (*compat) {
                tmp = of_device_is_compatible(device, *compat);
                if (tmp > score)
                        score = tmp;
                compat++;
        }

        return score;
}
EXPORT_SYMBOL_GPL(of_device_compatible_match);

/**
 * of_machine_compatible_match - Test root of device tree against a compatible array
 * @compats: NULL terminated array of compatible strings to look for in root node's compatible property.
 *
 * Returns true if the root node has any of the given compatible values in its
 * compatible property.
 */
bool of_machine_compatible_match(const char *const *compats)
{
        struct device_node *root;
        int rc = 0;

        root = of_find_node_by_path("/");
        if (root) {
                rc = of_device_compatible_match(root, compats);
                of_node_put(root);
        }

        return rc != 0;
}
EXPORT_SYMBOL(of_machine_compatible_match);

static bool __of_device_is_status(const struct device_node *device,
                                  const char * const*strings)
{
        const char *status;
        int statlen;

        if (!device)
                return false;

        status = __of_get_property(device, "status", &statlen);
        if (status == NULL)
                return false;

        if (statlen > 0) {
                while (*strings) {
                        unsigned int len = strlen(*strings);

                        if ((*strings)[len - 1] == '-') {
                                if (!strncmp(status, *strings, len))
                                        return true;
                        } else {
                                if (!strcmp(status, *strings))
                                        return true;
                        }
                        strings++;
                }
        }

        return false;
}

/**
 *  __of_device_is_available - check if a device is available for use
 *
 *  @device: Node to check for availability, with locks already held
 *
 *  Return: True if the status property is absent or set to "okay" or "ok",
 *  false otherwise
 */
static bool __of_device_is_available(const struct device_node *device)
{
        static const char * const ok[] = {"okay", "ok", NULL};

        if (!device)
                return false;

        return !__of_get_property(device, "status", NULL) ||
                __of_device_is_status(device, ok);
}

/**
 *  __of_device_is_reserved - check if a device is reserved
 *
 *  @device: Node to check for availability, with locks already held
 *
 *  Return: True if the status property is set to "reserved", false otherwise
 */
static bool __of_device_is_reserved(const struct device_node *device)
{
        static const char * const reserved[] = {"reserved", NULL};

        return __of_device_is_status(device, reserved);
}

/**
 *  of_device_is_available - check if a device is available for use
 *
 *  @device: Node to check for availability
 *
 *  Return: True if the status property is absent or set to "okay" or "ok",
 *  false otherwise
 */
bool of_device_is_available(const struct device_node *device)
{
        unsigned long flags;
        bool res;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        res = __of_device_is_available(device);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return res;

}
EXPORT_SYMBOL(of_device_is_available);

/**
 *  __of_device_is_fail - check if a device has status "fail" or "fail-..."
 *
 *  @device: Node to check status for, with locks already held
 *
 *  Return: True if the status property is set to "fail" or "fail-..." (for any
 *  error code suffix), false otherwise
 */
static bool __of_device_is_fail(const struct device_node *device)
{
        static const char * const fail[] = {"fail", "fail-", NULL};

        return __of_device_is_status(device, fail);
}

/**
 *  of_device_is_big_endian - check if a device has BE registers
 *
 *  @device: Node to check for endianness
 *
 *  Return: True if the device has a "big-endian" property, or if the kernel
 *  was compiled for BE *and* the device has a "native-endian" property.
 *  Returns false otherwise.
 *
 *  Callers would nominally use ioread32be/iowrite32be if
 *  of_device_is_big_endian() == true, or readl/writel otherwise.
 */
bool of_device_is_big_endian(const struct device_node *device)
{
        if (of_property_read_bool(device, "big-endian"))
                return true;
        if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
            of_property_read_bool(device, "native-endian"))
                return true;
        return false;
}
EXPORT_SYMBOL(of_device_is_big_endian);

/**
 * of_get_parent - Get a node's parent if any
 * @node:        Node to get parent
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_get_parent(const struct device_node *node)
{
        struct device_node *np;
        unsigned long flags;

        if (!node)
                return NULL;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        np = of_node_get(node->parent);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_get_parent);

/**
 * of_get_next_parent - Iterate to a node's parent
 * @node:        Node to get parent of
 *
 * This is like of_get_parent() except that it drops the
 * refcount on the passed node, making it suitable for iterating
 * through a node's parents.
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_get_next_parent(struct device_node *node)
{
        struct device_node *parent;
        unsigned long flags;

        if (!node)
                return NULL;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        parent = of_node_get(node->parent);
        of_node_put(node);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return parent;
}
EXPORT_SYMBOL(of_get_next_parent);

static struct device_node *__of_get_next_child(const struct device_node *node,
                                                struct device_node *prev)
{
        struct device_node *next;

        if (!node)
                return NULL;

        next = prev ? prev->sibling : node->child;
        of_node_get(next);
        of_node_put(prev);
        return next;
}
#define __for_each_child_of_node(parent, child) \
        for (child = __of_get_next_child(parent, NULL); child != NULL; \
             child = __of_get_next_child(parent, child))

/**
 * of_get_next_child - Iterate a node childs
 * @node:        parent node
 * @prev:        previous child of the parent node, or NULL to get first
 *
 * Return: A node pointer with refcount incremented, use of_node_put() on
 * it when done. Returns NULL when prev is the last child. Decrements the
 * refcount of prev.
 */
struct device_node *of_get_next_child(const struct device_node *node,
        struct device_node *prev)
{
        struct device_node *next;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        next = __of_get_next_child(node, prev);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return next;
}
EXPORT_SYMBOL(of_get_next_child);

static struct device_node *of_get_next_status_child(const struct device_node *node,
                                                    struct device_node *prev,
                                                    bool (*checker)(const struct device_node *))
{
        struct device_node *next;
        unsigned long flags;

        if (!node)
                return NULL;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        next = prev ? prev->sibling : node->child;
        for (; next; next = next->sibling) {
                if (!checker(next))
                        continue;
                if (of_node_get(next))
                        break;
        }
        of_node_put(prev);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return next;
}

/**
 * of_get_next_available_child - Find the next available child node
 * @node:        parent node
 * @prev:        previous child of the parent node, or NULL to get first
 *
 * This function is like of_get_next_child(), except that it
 * automatically skips any disabled nodes (i.e. status = "disabled").
 */
struct device_node *of_get_next_available_child(const struct device_node *node,
        struct device_node *prev)
{
        return of_get_next_status_child(node, prev, __of_device_is_available);
}
EXPORT_SYMBOL(of_get_next_available_child);

/**
 * of_get_next_reserved_child - Find the next reserved child node
 * @node:        parent node
 * @prev:        previous child of the parent node, or NULL to get first
 *
 * This function is like of_get_next_child(), except that it
 * automatically skips any disabled nodes (i.e. status = "disabled").
 */
struct device_node *of_get_next_reserved_child(const struct device_node *node,
                                                struct device_node *prev)
{
        return of_get_next_status_child(node, prev, __of_device_is_reserved);
}
EXPORT_SYMBOL(of_get_next_reserved_child);

/**
 * of_get_next_cpu_node - Iterate on cpu nodes
 * @prev:        previous child of the /cpus node, or NULL to get first
 *
 * Unusable CPUs (those with the status property set to "fail" or "fail-...")
 * will be skipped.
 *
 * Return: A cpu node pointer with refcount incremented, use of_node_put()
 * on it when done. Returns NULL when prev is the last child. Decrements
 * the refcount of prev.
 */
struct device_node *of_get_next_cpu_node(struct device_node *prev)
{
        struct device_node *next = NULL;
        unsigned long flags;
        struct device_node *node;

        if (!prev)
                node = of_find_node_by_path("/cpus");

        raw_spin_lock_irqsave(&devtree_lock, flags);
        if (prev)
                next = prev->sibling;
        else if (node) {
                next = node->child;
                of_node_put(node);
        }
        for (; next; next = next->sibling) {
                if (__of_device_is_fail(next))
                        continue;
                if (!(of_node_name_eq(next, "cpu") ||
                      __of_node_is_type(next, "cpu")))
                        continue;
                if (of_node_get(next))
                        break;
        }
        of_node_put(prev);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return next;
}
EXPORT_SYMBOL(of_get_next_cpu_node);

/**
 * of_get_compatible_child - Find compatible child node
 * @parent:        parent node
 * @compatible:        compatible string
 *
 * Lookup child node whose compatible property contains the given compatible
 * string.
 *
 * Return: a node pointer with refcount incremented, use of_node_put() on it
 * when done; or NULL if not found.
 */
struct device_node *of_get_compatible_child(const struct device_node *parent,
                                const char *compatible)
{
        struct device_node *child;

        for_each_child_of_node(parent, child) {
                if (of_device_is_compatible(child, compatible))
                        break;
        }

        return child;
}
EXPORT_SYMBOL(of_get_compatible_child);

/**
 * of_get_child_by_name - Find the child node by name for a given parent
 * @node:        parent node
 * @name:        child name to look for.
 *
 * This function looks for child node for given matching name
 *
 * Return: A node pointer if found, with refcount incremented, use
 * of_node_put() on it when done.
 * Returns NULL if node is not found.
 */
struct device_node *of_get_child_by_name(const struct device_node *node,
                                const char *name)
{
        struct device_node *child;

        for_each_child_of_node(node, child)
                if (of_node_name_eq(child, name))
                        break;
        return child;
}
EXPORT_SYMBOL(of_get_child_by_name);

struct device_node *__of_find_node_by_path(struct device_node *parent,
                                                const char *path)
{
        struct device_node *child;
        int len;

        len = strcspn(path, "/:");
        if (!len)
                return NULL;

        __for_each_child_of_node(parent, child) {
                const char *name = kbasename(child->full_name);
                if (strncmp(path, name, len) == 0 && (strlen(name) == len))
                        return child;
        }
        return NULL;
}

struct device_node *__of_find_node_by_full_path(struct device_node *node,
                                                const char *path)
{
        const char *separator = strchr(path, ':');

        while (node && *path == '/') {
                struct device_node *tmp = node;

                path++; /* Increment past '/' delimiter */
                node = __of_find_node_by_path(node, path);
                of_node_put(tmp);
                path = strchrnul(path, '/');
                if (separator && separator < path)
                        break;
        }
        return node;
}

/**
 * of_find_node_opts_by_path - Find a node matching a full OF path
 * @path: Either the full path to match, or if the path does not
 *       start with '/', the name of a property of the /aliases
 *       node (an alias).  In the case of an alias, the node
 *       matching the alias' value will be returned.
 * @opts: Address of a pointer into which to store the start of
 *       an options string appended to the end of the path with
 *       a ':' separator.
 *
 * Valid paths:
 *  * /foo/bar        Full path
 *  * foo        Valid alias
 *  * foo/bar        Valid alias + relative path
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_node_opts_by_path(const char *path, const char **opts)
{
        struct device_node *np = NULL;
        struct property *pp;
        unsigned long flags;
        const char *separator = strchr(path, ':');

        if (opts)
                *opts = separator ? separator + 1 : NULL;

        if (strcmp(path, "/") == 0)
                return of_node_get(of_root);

        /* The path could begin with an alias */
        if (*path != '/') {
                int len;
                const char *p = separator;

                if (!p)
                        p = strchrnul(path, '/');
                len = p - path;

                /* of_aliases must not be NULL */
                if (!of_aliases)
                        return NULL;

                for_each_property_of_node(of_aliases, pp) {
                        if (strlen(pp->name) == len && !strncmp(pp->name, path, len)) {
                                np = of_find_node_by_path(pp->value);
                                break;
                        }
                }
                if (!np)
                        return NULL;
                path = p;
        }

        /* Step down the tree matching path components */
        raw_spin_lock_irqsave(&devtree_lock, flags);
        if (!np)
                np = of_node_get(of_root);
        np = __of_find_node_by_full_path(np, path);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_node_opts_by_path);

/**
 * of_find_node_by_name - Find a node by its "name" property
 * @from:        The node to start searching from or NULL; the node
 *                you pass will not be searched, only the next one
 *                will. Typically, you pass what the previous call
 *                returned. of_node_put() will be called on @from.
 * @name:        The name string to match against
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_node_by_name(struct device_node *from,
        const char *name)
{
        struct device_node *np;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        for_each_of_allnodes_from(from, np)
                if (of_node_name_eq(np, name) && of_node_get(np))
                        break;
        of_node_put(from);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_node_by_name);

/**
 * of_find_node_by_type - Find a node by its "device_type" property
 * @from:        The node to start searching from, or NULL to start searching
 *                the entire device tree. The node you pass will not be
 *                searched, only the next one will; typically, you pass
 *                what the previous call returned. of_node_put() will be
 *                called on from for you.
 * @type:        The type string to match against
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_node_by_type(struct device_node *from,
        const char *type)
{
        struct device_node *np;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        for_each_of_allnodes_from(from, np)
                if (__of_node_is_type(np, type) && of_node_get(np))
                        break;
        of_node_put(from);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_node_by_type);

/**
 * of_find_compatible_node - Find a node based on type and one of the
 *                                tokens in its "compatible" property
 * @from:        The node to start searching from or NULL, the node
 *                you pass will not be searched, only the next one
 *                will; typically, you pass what the previous call
 *                returned. of_node_put() will be called on it
 * @type:        The type string to match "device_type" or NULL to ignore
 * @compatible:        The string to match to one of the tokens in the device
 *                "compatible" list.
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_compatible_node(struct device_node *from,
        const char *type, const char *compatible)
{
        struct device_node *np;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        for_each_of_allnodes_from(from, np)
                if (__of_device_is_compatible(np, compatible, type, NULL) &&
                    of_node_get(np))
                        break;
        of_node_put(from);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_compatible_node);

/**
 * of_find_node_with_property - Find a node which has a property with
 *                              the given name.
 * @from:        The node to start searching from or NULL, the node
 *                you pass will not be searched, only the next one
 *                will; typically, you pass what the previous call
 *                returned. of_node_put() will be called on it
 * @prop_name:        The name of the property to look for.
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_node_with_property(struct device_node *from,
        const char *prop_name)
{
        struct device_node *np;
        struct property *pp;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        for_each_of_allnodes_from(from, np) {
                for (pp = np->properties; pp; pp = pp->next) {
                        if (of_prop_cmp(pp->name, prop_name) == 0) {
                                of_node_get(np);
                                goto out;
                        }
                }
        }
out:
        of_node_put(from);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_node_with_property);

static
const struct of_device_id *__of_match_node(const struct of_device_id *matches,
                                           const struct device_node *node)
{
        const struct of_device_id *best_match = NULL;
        int score, best_score = 0;

        if (!matches)
                return NULL;

        for (; matches->name[0] || matches->type[0] || matches->compatible[0]; matches++) {
                score = __of_device_is_compatible(node, matches->compatible,
                                                  matches->type, matches->name);
                if (score > best_score) {
                        best_match = matches;
                        best_score = score;
                }
        }

        return best_match;
}

/**
 * of_match_node - Tell if a device_node has a matching of_match structure
 * @matches:        array of of device match structures to search in
 * @node:        the of device structure to match against
 *
 * Low level utility function used by device matching.
 */
const struct of_device_id *of_match_node(const struct of_device_id *matches,
                                         const struct device_node *node)
{
        const struct of_device_id *match;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        match = __of_match_node(matches, node);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return match;
}
EXPORT_SYMBOL(of_match_node);

/**
 * of_find_matching_node_and_match - Find a node based on an of_device_id
 *                                     match table.
 * @from:        The node to start searching from or NULL, the node
 *                you pass will not be searched, only the next one
 *                will; typically, you pass what the previous call
 *                returned. of_node_put() will be called on it
 * @matches:        array of of device match structures to search in
 * @match:        Updated to point at the matches entry which matched
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_matching_node_and_match(struct device_node *from,
                                        const struct of_device_id *matches,
                                        const struct of_device_id **match)
{
        struct device_node *np;
        const struct of_device_id *m;
        unsigned long flags;

        if (match)
                *match = NULL;

        raw_spin_lock_irqsave(&devtree_lock, flags);
        for_each_of_allnodes_from(from, np) {
                m = __of_match_node(matches, np);
                if (m && of_node_get(np)) {
                        if (match)
                                *match = m;
                        break;
                }
        }
        of_node_put(from);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_matching_node_and_match);

/**
 * of_alias_from_compatible - Lookup appropriate alias for a device node
 *                              depending on compatible
 * @node:        pointer to a device tree node
 * @alias:        Pointer to buffer that alias value will be copied into
 * @len:        Length of alias value
 *
 * Based on the value of the compatible property, this routine will attempt
 * to choose an appropriate alias value for a particular device tree node.
 * It does this by stripping the manufacturer prefix (as delimited by a ',')
 * from the first entry in the compatible list property.
 *
 * Note: The matching on just the "product" side of the compatible is a relic
 * from I2C and SPI. Please do not add any new user.
 *
 * Return: This routine returns 0 on success, <0 on failure.
 */
int of_alias_from_compatible(const struct device_node *node, char *alias, int len)
{
        const char *compatible, *p;
        int cplen;

        compatible = of_get_property(node, "compatible", &cplen);
        if (!compatible || strlen(compatible) > cplen)
                return -ENODEV;
        p = strchr(compatible, ',');
        strscpy(alias, p ? p + 1 : compatible, len);
        return 0;
}
EXPORT_SYMBOL_GPL(of_alias_from_compatible);

/**
 * of_find_node_by_phandle - Find a node given a phandle
 * @handle:        phandle of the node to find
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.
 */
struct device_node *of_find_node_by_phandle(phandle handle)
{
        struct device_node *np = NULL;
        unsigned long flags;
        u32 handle_hash;

        if (!handle)
                return NULL;

        handle_hash = of_phandle_cache_hash(handle);

        raw_spin_lock_irqsave(&devtree_lock, flags);

        if (phandle_cache[handle_hash] &&
            handle == phandle_cache[handle_hash]->phandle)
                np = phandle_cache[handle_hash];

        if (!np) {
                for_each_of_allnodes(np)
                        if (np->phandle == handle &&
                            !of_node_check_flag(np, OF_DETACHED)) {
                                phandle_cache[handle_hash] = np;
                                break;
                        }
        }

        of_node_get(np);
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
}
EXPORT_SYMBOL(of_find_node_by_phandle);

void of_print_phandle_args(const char *msg, const struct of_phandle_args *args)
{
        int i;
        printk("%s %pOF", msg, args->np);
        for (i = 0; i < args->args_count; i++) {
                const char delim = i ? ',' : ':';

                pr_cont("%c%08x", delim, args->args[i]);
        }
        pr_cont("\n");
}

int of_phandle_iterator_init(struct of_phandle_iterator *it,
                const struct device_node *np,
                const char *list_name,
                const char *cells_name,
                int cell_count)
{
        const __be32 *list;
        int size;

        memset(it, 0, sizeof(*it));

        /*
         * one of cell_count or cells_name must be provided to determine the
         * argument length.
         */
        if (cell_count < 0 && !cells_name)
                return -EINVAL;

        list = of_get_property(np, list_name, &size);
        if (!list)
                return -ENOENT;

        it->cells_name = cells_name;
        it->cell_count = cell_count;
        it->parent = np;
        it->list_end = list + size / sizeof(*list);
        it->phandle_end = list;
        it->cur = list;

        return 0;
}
EXPORT_SYMBOL_GPL(of_phandle_iterator_init);

int of_phandle_iterator_next(struct of_phandle_iterator *it)
{
        uint32_t count = 0;

        if (it->node) {
                of_node_put(it->node);
                it->node = NULL;
        }

        if (!it->cur || it->phandle_end >= it->list_end)
                return -ENOENT;

        it->cur = it->phandle_end;

        /* If phandle is 0, then it is an empty entry with no arguments. */
        it->phandle = be32_to_cpup(it->cur++);

        if (it->phandle) {

                /*
                 * Find the provider node and parse the #*-cells property to
                 * determine the argument length.
                 */
                it->node = of_find_node_by_phandle(it->phandle);

                if (it->cells_name) {
                        if (!it->node) {
                                pr_err("%pOF: could not find phandle %d\n",
                                       it->parent, it->phandle);
                                goto err;
                        }

                        if (of_property_read_u32(it->node, it->cells_name,
                                                 &count)) {
                                /*
                                 * If both cell_count and cells_name is given,
                                 * fall back to cell_count in absence
                                 * of the cells_name property
                                 */
                                if (it->cell_count >= 0) {
                                        count = it->cell_count;
                                } else {
                                        pr_err("%pOF: could not get %s for %pOF\n",
                                               it->parent,
                                               it->cells_name,
                                               it->node);
                                        goto err;
                                }
                        }
                } else {
                        count = it->cell_count;
                }

                /*
                 * Make sure that the arguments actually fit in the remaining
                 * property data length
                 */
                if (it->cur + count > it->list_end) {
                        if (it->cells_name)
                                pr_err("%pOF: %s = %d found %td\n",
                                        it->parent, it->cells_name,
                                        count, it->list_end - it->cur);
                        else
                                pr_err("%pOF: phandle %s needs %d, found %td\n",
                                        it->parent, of_node_full_name(it->node),
                                        count, it->list_end - it->cur);
                        goto err;
                }
        }

        it->phandle_end = it->cur + count;
        it->cur_count = count;

        return 0;

err:
        if (it->node) {
                of_node_put(it->node);
                it->node = NULL;
        }

        return -EINVAL;
}
EXPORT_SYMBOL_GPL(of_phandle_iterator_next);

int of_phandle_iterator_args(struct of_phandle_iterator *it,
                             uint32_t *args,
                             int size)
{
        int i, count;

        count = it->cur_count;

        if (WARN_ON(size < count))
                count = size;

        for (i = 0; i < count; i++)
                args[i] = be32_to_cpup(it->cur++);

        return count;
}

int __of_parse_phandle_with_args(const struct device_node *np,
                                 const char *list_name,
                                 const char *cells_name,
                                 int cell_count, int index,
                                 struct of_phandle_args *out_args)
{
        struct of_phandle_iterator it;
        int rc, cur_index = 0;

        if (index < 0)
                return -EINVAL;

        /* Loop over the phandles until all the requested entry is found */
        of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) {
                /*
                 * All of the error cases bail out of the loop, so at
                 * this point, the parsing is successful. If the requested
                 * index matches, then fill the out_args structure and return,
                 * or return -ENOENT for an empty entry.
                 */
                rc = -ENOENT;
                if (cur_index == index) {
                        if (!it.phandle)
                                goto err;

                        if (out_args) {
                                int c;

                                c = of_phandle_iterator_args(&it,
                                                             out_args->args,
                                                             MAX_PHANDLE_ARGS);
                                out_args->np = it.node;
                                out_args->args_count = c;
                        } else {
                                of_node_put(it.node);
                        }

                        /* Found it! return success */
                        return 0;
                }

                cur_index++;
        }

        /*
         * Unlock node before returning result; will be one of:
         * -ENOENT : index is for empty phandle
         * -EINVAL : parsing error on data
         */

 err:
        of_node_put(it.node);
        return rc;
}
EXPORT_SYMBOL(__of_parse_phandle_with_args);

/**
 * of_parse_phandle_with_args_map() - Find a node pointed by phandle in a list and remap it
 * @np:                pointer to a device tree node containing a list
 * @list_name:        property name that contains a list
 * @stem_name:        stem of property names that specify phandles' arguments count
 * @index:        index of a phandle to parse out
 * @out_args:        optional pointer to output arguments structure (will be filled)
 *
 * This function is useful to parse lists of phandles and their arguments.
 * Returns 0 on success and fills out_args, on error returns appropriate errno
 * value. The difference between this function and of_parse_phandle_with_args()
 * is that this API remaps a phandle if the node the phandle points to has
 * a <@stem_name>-map property.
 *
 * Caller is responsible to call of_node_put() on the returned out_args->np
 * pointer.
 *
 * Example::
 *
 *  phandle1: node1 {
 *          #list-cells = <2>;
 *  };
 *
 *  phandle2: node2 {
 *          #list-cells = <1>;
 *  };
 *
 *  phandle3: node3 {
 *          #list-cells = <1>;
 *          list-map = <0 &phandle2 3>,
 *                     <1 &phandle2 2>,
 *                     <2 &phandle1 5 1>;
 *          list-map-mask = <0x3>;
 *  };
 *
 *  node4 {
 *          list = <&phandle1 1 2 &phandle3 0>;
 *  };
 *
 * To get a device_node of the ``node2`` node you may call this:
 * of_parse_phandle_with_args(node4, "list", "list", 1, &args);
 */
int of_parse_phandle_with_args_map(const struct device_node *np,
                                   const char *list_name,
                                   const char *stem_name,
                                   int index, struct of_phandle_args *out_args)
{
        char *cells_name, *map_name = NULL, *mask_name = NULL;
        char *pass_name = NULL;
        struct device_node *cur, *new = NULL;
        const __be32 *map, *mask, *pass;
        static const __be32 dummy_mask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
        static const __be32 dummy_pass[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(0) };
        __be32 initial_match_array[MAX_PHANDLE_ARGS];
        const __be32 *match_array = initial_match_array;
        int i, ret, map_len, match;
        u32 list_size, new_size;

        if (index < 0)
                return -EINVAL;

        cells_name = kasprintf(GFP_KERNEL, "#%s-cells", stem_name);
        if (!cells_name)
                return -ENOMEM;

        ret = -ENOMEM;
        map_name = kasprintf(GFP_KERNEL, "%s-map", stem_name);
        if (!map_name)
                goto free;

        mask_name = kasprintf(GFP_KERNEL, "%s-map-mask", stem_name);
        if (!mask_name)
                goto free;

        pass_name = kasprintf(GFP_KERNEL, "%s-map-pass-thru", stem_name);
        if (!pass_name)
                goto free;

        ret = __of_parse_phandle_with_args(np, list_name, cells_name, -1, index,
                                           out_args);
        if (ret)
                goto free;

        /* Get the #<list>-cells property */
        cur = out_args->np;
        ret = of_property_read_u32(cur, cells_name, &list_size);
        if (ret < 0)
                goto put;

        /* Precalculate the match array - this simplifies match loop */
        for (i = 0; i < list_size; i++)
                initial_match_array[i] = cpu_to_be32(out_args->args[i]);

        ret = -EINVAL;
        while (cur) {
                /* Get the <list>-map property */
                map = of_get_property(cur, map_name, &map_len);
                if (!map) {
                        ret = 0;
                        goto free;
                }
                map_len /= sizeof(u32);

                /* Get the <list>-map-mask property (optional) */
                mask = of_get_property(cur, mask_name, NULL);
                if (!mask)
                        mask = dummy_mask;
                /* Iterate through <list>-map property */
                match = 0;
                while (map_len > (list_size + 1) && !match) {
                        /* Compare specifiers */
                        match = 1;
                        for (i = 0; i < list_size; i++, map_len--)
                                match &= !((match_array[i] ^ *map++) & mask[i]);

                        of_node_put(new);
                        new = of_find_node_by_phandle(be32_to_cpup(map));
                        map++;
                        map_len--;

                        /* Check if not found */
                        if (!new)
                                goto put;

                        if (!of_device_is_available(new))
                                match = 0;

                        ret = of_property_read_u32(new, cells_name, &new_size);
                        if (ret)
                                goto put;

                        /* Check for malformed properties */
                        if (WARN_ON(new_size > MAX_PHANDLE_ARGS))
                                goto put;
                        if (map_len < new_size)
                                goto put;

                        /* Move forward by new node's #<list>-cells amount */
                        map += new_size;
                        map_len -= new_size;
                }
                if (!match)
                        goto put;

                /* Get the <list>-map-pass-thru property (optional) */
                pass = of_get_property(cur, pass_name, NULL);
                if (!pass)
                        pass = dummy_pass;

                /*
                 * Successfully parsed a <list>-map translation; copy new
                 * specifier into the out_args structure, keeping the
                 * bits specified in <list>-map-pass-thru.
                 */
                match_array = map - new_size;
                for (i = 0; i < new_size; i++) {
                        __be32 val = *(map - new_size + i);

                        if (i < list_size) {
                                val &= ~pass[i];
                                val |= cpu_to_be32(out_args->args[i]) & pass[i];
                        }

                        out_args->args[i] = be32_to_cpu(val);
                }
                out_args->args_count = list_size = new_size;
                /* Iterate again with new provider */
                out_args->np = new;
                of_node_put(cur);
                cur = new;
                new = NULL;
        }
put:
        of_node_put(cur);
        of_node_put(new);
free:
        kfree(mask_name);
        kfree(map_name);
        kfree(cells_name);
        kfree(pass_name);

        return ret;
}
EXPORT_SYMBOL(of_parse_phandle_with_args_map);

/**
 * of_count_phandle_with_args() - Find the number of phandles references in a property
 * @np:                pointer to a device tree node containing a list
 * @list_name:        property name that contains a list
 * @cells_name:        property name that specifies phandles' arguments count
 *
 * Return: The number of phandle + argument tuples within a property. It
 * is a typical pattern to encode a list of phandle and variable
 * arguments into a single property. The number of arguments is encoded
 * by a property in the phandle-target node. For example, a gpios
 * property would contain a list of GPIO specifies consisting of a
 * phandle and 1 or more arguments. The number of arguments are
 * determined by the #gpio-cells property in the node pointed to by the
 * phandle.
 */
int of_count_phandle_with_args(const struct device_node *np, const char *list_name,
                                const char *cells_name)
{
        struct of_phandle_iterator it;
        int rc, cur_index = 0;

        /*
         * If cells_name is NULL we assume a cell count of 0. This makes
         * counting the phandles trivial as each 32bit word in the list is a
         * phandle and no arguments are to consider. So we don't iterate through
         * the list but just use the length to determine the phandle count.
         */
        if (!cells_name) {
                const __be32 *list;
                int size;

                list = of_get_property(np, list_name, &size);
                if (!list)
                        return -ENOENT;

                return size / sizeof(*list);
        }

        rc = of_phandle_iterator_init(&it, np, list_name, cells_name, -1);
        if (rc)
                return rc;

        while ((rc = of_phandle_iterator_next(&it)) == 0)
                cur_index += 1;

        if (rc != -ENOENT)
                return rc;

        return cur_index;
}
EXPORT_SYMBOL(of_count_phandle_with_args);

static struct property *__of_remove_property_from_list(struct property **list, struct property *prop)
{
        struct property **next;

        for (next = list; *next; next = &(*next)->next) {
                if (*next == prop) {
                        *next = prop->next;
                        prop->next = NULL;
                        return prop;
                }
        }
        return NULL;
}

/**
 * __of_add_property - Add a property to a node without lock operations
 * @np:                Caller's Device Node
 * @prop:        Property to add
 */
int __of_add_property(struct device_node *np, struct property *prop)
{
        int rc = 0;
        unsigned long flags;
        struct property **next;

        raw_spin_lock_irqsave(&devtree_lock, flags);

        __of_remove_property_from_list(&np->deadprops, prop);

        prop->next = NULL;
        next = &np->properties;
        while (*next) {
                if (strcmp(prop->name, (*next)->name) == 0) {
                        /* duplicate ! don't insert it */
                        rc = -EEXIST;
                        goto out_unlock;
                }
                next = &(*next)->next;
        }
        *next = prop;

out_unlock:
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        if (rc)
                return rc;

        __of_add_property_sysfs(np, prop);
        return 0;
}

/**
 * of_add_property - Add a property to a node
 * @np:                Caller's Device Node
 * @prop:        Property to add
 */
int of_add_property(struct device_node *np, struct property *prop)
{
        int rc;

        mutex_lock(&of_mutex);
        rc = __of_add_property(np, prop);
        mutex_unlock(&of_mutex);

        if (!rc)
                of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);

        return rc;
}
EXPORT_SYMBOL_GPL(of_add_property);

int __of_remove_property(struct device_node *np, struct property *prop)
{
        unsigned long flags;
        int rc = -ENODEV;

        raw_spin_lock_irqsave(&devtree_lock, flags);

        if (__of_remove_property_from_list(&np->properties, prop)) {
                /* Found the property, add it to deadprops list */
                prop->next = np->deadprops;
                np->deadprops = prop;
                rc = 0;
        }

        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        if (rc)
                return rc;

        __of_remove_property_sysfs(np, prop);
        return 0;
}

/**
 * of_remove_property - Remove a property from a node.
 * @np:                Caller's Device Node
 * @prop:        Property to remove
 *
 * Note that we don't actually remove it, since we have given out
 * who-knows-how-many pointers to the data using get-property.
 * Instead we just move the property to the "dead properties"
 * list, so it won't be found any more.
 */
int of_remove_property(struct device_node *np, struct property *prop)
{
        int rc;

        if (!prop)
                return -ENODEV;

        mutex_lock(&of_mutex);
        rc = __of_remove_property(np, prop);
        mutex_unlock(&of_mutex);

        if (!rc)
                of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);

        return rc;
}
EXPORT_SYMBOL_GPL(of_remove_property);

int __of_update_property(struct device_node *np, struct property *newprop,
                struct property **oldpropp)
{
        struct property **next, *oldprop;
        unsigned long flags;

        raw_spin_lock_irqsave(&devtree_lock, flags);

        __of_remove_property_from_list(&np->deadprops, newprop);

        for (next = &np->properties; *next; next = &(*next)->next) {
                if (of_prop_cmp((*next)->name, newprop->name) == 0)
                        break;
        }
        *oldpropp = oldprop = *next;

        if (oldprop) {
                /* replace the node */
                newprop->next = oldprop->next;
                *next = newprop;
                oldprop->next = np->deadprops;
                np->deadprops = oldprop;
        } else {
                /* new node */
                newprop->next = NULL;
                *next = newprop;
        }

        raw_spin_unlock_irqrestore(&devtree_lock, flags);

        __of_update_property_sysfs(np, newprop, oldprop);

        return 0;
}

/*
 * of_update_property - Update a property in a node, if the property does
 * not exist, add it.
 *
 * Note that we don't actually remove it, since we have given out
 * who-knows-how-many pointers to the data using get-property.
 * Instead we just move the property to the "dead properties" list,
 * and add the new property to the property list
 */
int of_update_property(struct device_node *np, struct property *newprop)
{
        struct property *oldprop;
        int rc;

        if (!newprop->name)
                return -EINVAL;

        mutex_lock(&of_mutex);
        rc = __of_update_property(np, newprop, &oldprop);
        mutex_unlock(&of_mutex);

        if (!rc)
                of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);

        return rc;
}

static void of_alias_add(struct alias_prop *ap, struct device_node *np,
                         int id, const char *stem, int stem_len)
{
        ap->np = np;
        ap->id = id;
        strscpy(ap->stem, stem, stem_len + 1);
        list_add_tail(&ap->link, &aliases_lookup);
        pr_debug("adding DT alias:%s: stem=%s id=%i node=%pOF\n",
                 ap->alias, ap->stem, ap->id, np);
}

/**
 * of_alias_scan - Scan all properties of the 'aliases' node
 * @dt_alloc:        An allocator that provides a virtual address to memory
 *                for storing the resulting tree
 *
 * The function scans all the properties of the 'aliases' node and populates
 * the global lookup table with the properties.  It returns the
 * number of alias properties found, or an error code in case of failure.
 */
void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
{
        struct property *pp;

        of_aliases = of_find_node_by_path("/aliases");
        of_chosen = of_find_node_by_path("/chosen");
        if (of_chosen == NULL)
                of_chosen = of_find_node_by_path("/chosen@0");

        if (of_chosen) {
                /* linux,stdout-path and /aliases/stdout are for legacy compatibility */
                const char *name = NULL;

                if (of_property_read_string(of_chosen, "stdout-path", &name))
                        of_property_read_string(of_chosen, "linux,stdout-path",
                                                &name);
                if (IS_ENABLED(CONFIG_PPC) && !name)
                        of_property_read_string(of_aliases, "stdout", &name);
                if (name)
                        of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
                if (of_stdout)
                        of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT;
        }

        if (!of_aliases)
                return;

        for_each_property_of_node(of_aliases, pp) {
                const char *start = pp->name;
                const char *end = start + strlen(start);
                struct device_node *np;
                struct alias_prop *ap;
                int id, len;

                /* Skip those we do not want to proceed */
                if (!strcmp(pp->name, "name") ||
                    !strcmp(pp->name, "phandle") ||
                    !strcmp(pp->name, "linux,phandle"))
                        continue;

                np = of_find_node_by_path(pp->value);
                if (!np)
                        continue;

                /* walk the alias backwards to extract the id and work out
                 * the 'stem' string */
                while (isdigit(*(end-1)) && end > start)
                        end--;
                len = end - start;

                if (kstrtoint(end, 10, &id) < 0)
                        continue;

                /* Allocate an alias_prop with enough space for the stem */
                ap = dt_alloc(sizeof(*ap) + len + 1, __alignof__(*ap));
                if (!ap)
                        continue;
                memset(ap, 0, sizeof(*ap) + len + 1);
                ap->alias = start;
                of_alias_add(ap, np, id, start, len);
        }
}

/**
 * of_alias_get_id - Get alias id for the given device_node
 * @np:                Pointer to the given device_node
 * @stem:        Alias stem of the given device_node
 *
 * The function travels the lookup table to get the alias id for the given
 * device_node and alias stem.
 *
 * Return: The alias id if found.
 */
int of_alias_get_id(struct device_node *np, const char *stem)
{
        struct alias_prop *app;
        int id = -ENODEV;

        mutex_lock(&of_mutex);
        list_for_each_entry(app, &aliases_lookup, link) {
                if (strcmp(app->stem, stem) != 0)
                        continue;

                if (np == app->np) {
                        id = app->id;
                        break;
                }
        }
        mutex_unlock(&of_mutex);

        return id;
}
EXPORT_SYMBOL_GPL(of_alias_get_id);

/**
 * of_alias_get_highest_id - Get highest alias id for the given stem
 * @stem:        Alias stem to be examined
 *
 * The function travels the lookup table to get the highest alias id for the
 * given alias stem.  It returns the alias id if found.
 */
int of_alias_get_highest_id(const char *stem)
{
        struct alias_prop *app;
        int id = -ENODEV;

        mutex_lock(&of_mutex);
        list_for_each_entry(app, &aliases_lookup, link) {
                if (strcmp(app->stem, stem) != 0)
                        continue;

                if (app->id > id)
                        id = app->id;
        }
        mutex_unlock(&of_mutex);

        return id;
}
EXPORT_SYMBOL_GPL(of_alias_get_highest_id);

/**
 * of_console_check() - Test and setup console for DT setup
 * @dn: Pointer to device node
 * @name: Name to use for preferred console without index. ex. "ttyS"
 * @index: Index to use for preferred console.
 *
 * Check if the given device node matches the stdout-path property in the
 * /chosen node. If it does then register it as the preferred console.
 *
 * Return: TRUE if console successfully setup. Otherwise return FALSE.
 */
bool of_console_check(struct device_node *dn, char *name, int index)
{
        if (!dn || dn != of_stdout || console_set_on_cmdline)
                return false;

        /*
         * XXX: cast `options' to char pointer to suppress complication
         * warnings: printk, UART and console drivers expect char pointer.
         */
        return !add_preferred_console(name, index, (char *)of_stdout_options);
}
EXPORT_SYMBOL_GPL(of_console_check);

/**
 * of_find_next_cache_node - Find a node's subsidiary cache
 * @np:        node of type "cpu" or "cache"
 *
 * Return: A node pointer with refcount incremented, use
 * of_node_put() on it when done.  Caller should hold a reference
 * to np.
 */
struct device_node *of_find_next_cache_node(const struct device_node *np)
{
        struct device_node *child, *cache_node;

        cache_node = of_parse_phandle(np, "l2-cache", 0);
        if (!cache_node)
                cache_node = of_parse_phandle(np, "next-level-cache", 0);

        if (cache_node)
                return cache_node;

        /* OF on pmac has nodes instead of properties named "l2-cache"
         * beneath CPU nodes.
         */
        if (IS_ENABLED(CONFIG_PPC_PMAC) && of_node_is_type(np, "cpu"))
                for_each_child_of_node(np, child)
                        if (of_node_is_type(child, "cache"))
                                return child;

        return NULL;
}

/**
 * of_find_last_cache_level - Find the level at which the last cache is
 *                 present for the given logical cpu
 *
 * @cpu: cpu number(logical index) for which the last cache level is needed
 *
 * Return: The level at which the last cache is present. It is exactly
 * same as  the total number of cache levels for the given logical cpu.
 */
int of_find_last_cache_level(unsigned int cpu)
{
        u32 cache_level = 0;
        struct device_node *prev = NULL, *np = of_cpu_device_node_get(cpu);

        while (np) {
                of_node_put(prev);
                prev = np;
                np = of_find_next_cache_node(np);
        }

        of_property_read_u32(prev, "cache-level", &cache_level);
        of_node_put(prev);

        return cache_level;
}

/**
 * of_map_id - Translate an ID through a downstream mapping.
 * @np: root complex device node.
 * @id: device ID to map.
 * @map_name: property name of the map to use.
 * @map_mask_name: optional property name of the mask to use.
 * @target: optional pointer to a target device node.
 * @id_out: optional pointer to receive the translated ID.
 *
 * Given a device ID, look up the appropriate implementation-defined
 * platform ID and/or the target device which receives transactions on that
 * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
 * @id_out may be NULL if only the other is required. If @target points to
 * a non-NULL device node pointer, only entries targeting that node will be
 * matched; if it points to a NULL value, it will receive the device node of
 * the first matching target phandle, with a reference held.
 *
 * Return: 0 on success or a standard error code on failure.
 */
int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out)
{
        u32 map_mask, masked_id;
        int map_len;
        const __be32 *map = NULL;

        if (!np || !map_name || (!target && !id_out))
                return -EINVAL;

        map = of_get_property(np, map_name, &map_len);
        if (!map) {
                if (target)
                        return -ENODEV;
                /* Otherwise, no map implies no translation */
                *id_out = id;
                return 0;
        }

        if (!map_len || map_len % (4 * sizeof(*map))) {
                pr_err("%pOF: Error: Bad %s length: %d\n", np,
                        map_name, map_len);
                return -EINVAL;
        }

        /* The default is to select all bits. */
        map_mask = 0xffffffff;

        /*
         * Can be overridden by "{iommu,msi}-map-mask" property.
         * If of_property_read_u32() fails, the default is used.
         */
        if (map_mask_name)
                of_property_read_u32(np, map_mask_name, &map_mask);

        masked_id = map_mask & id;
        for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
                struct device_node *phandle_node;
                u32 id_base = be32_to_cpup(map + 0);
                u32 phandle = be32_to_cpup(map + 1);
                u32 out_base = be32_to_cpup(map + 2);
                u32 id_len = be32_to_cpup(map + 3);

                if (id_base & ~map_mask) {
                        pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores id-base (0x%x)\n",
                                np, map_name, map_name,
                                map_mask, id_base);
                        return -EFAULT;
                }

                if (masked_id < id_base || masked_id >= id_base + id_len)
                        continue;

                phandle_node = of_find_node_by_phandle(phandle);
                if (!phandle_node)
                        return -ENODEV;

                if (target) {
                        if (*target)
                                of_node_put(phandle_node);
                        else
                                *target = phandle_node;

                        if (*target != phandle_node)
                                continue;
                }

                if (id_out)
                        *id_out = masked_id - id_base + out_base;

                pr_debug("%pOF: %s, using mask %08x, id-base: %08x, out-base: %08x, length: %08x, id: %08x -> %08x\n",
                        np, map_name, map_mask, id_base, out_base,
                        id_len, id, masked_id - id_base + out_base);
                return 0;
        }

        pr_info("%pOF: no %s translation for id 0x%x on %pOF\n", np, map_name,
                id, target && *target ? *target : NULL);

        /* Bypasses translation */
        if (id_out)
                *id_out = id;
        return 0;
}
EXPORT_SYMBOL_GPL(of_map_id);




























































































































































































































































































































































































































































































































































    8 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_KASAN_H
#define _LINUX_KASAN_H

#include <linux/bug.h>
#include <linux/kasan-enabled.h>
#include <linux/kasan-tags.h>
#include <linux/kernel.h>
#include <linux/static_key.h>
#include <linux/types.h>

struct kmem_cache;
struct page;
struct slab;
struct vm_struct;
struct task_struct;

#ifdef CONFIG_KASAN

#include <linux/linkage.h>
#include <asm/kasan.h>

#endif

typedef unsigned int __bitwise kasan_vmalloc_flags_t;

#define KASAN_VMALLOC_NONE                ((__force kasan_vmalloc_flags_t)0x00u)
#define KASAN_VMALLOC_INIT                ((__force kasan_vmalloc_flags_t)0x01u)
#define KASAN_VMALLOC_VM_ALLOC                ((__force kasan_vmalloc_flags_t)0x02u)
#define KASAN_VMALLOC_PROT_NORMAL        ((__force kasan_vmalloc_flags_t)0x04u)

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)

#include <linux/pgtable.h>

/* Software KASAN implementations use shadow memory. */

#ifdef CONFIG_KASAN_SW_TAGS
/* This matches KASAN_TAG_INVALID. */
#define KASAN_SHADOW_INIT 0xFE
#else
#define KASAN_SHADOW_INIT 0
#endif

#ifndef PTE_HWTABLE_PTRS
#define PTE_HWTABLE_PTRS 0
#endif

extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
extern pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS];
extern pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD];
extern pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD];
extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];

int kasan_populate_early_shadow(const void *shadow_start,
                                const void *shadow_end);

#ifndef kasan_mem_to_shadow
static inline void *kasan_mem_to_shadow(const void *addr)
{
        return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
                + KASAN_SHADOW_OFFSET;
}
#endif

int kasan_add_zero_shadow(void *start, unsigned long size);
void kasan_remove_zero_shadow(void *start, unsigned long size);

/* Enable reporting bugs after kasan_disable_current() */
extern void kasan_enable_current(void);

/* Disable reporting bugs for current task */
extern void kasan_disable_current(void);

#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

static inline int kasan_add_zero_shadow(void *start, unsigned long size)
{
        return 0;
}
static inline void kasan_remove_zero_shadow(void *start,
                                        unsigned long size)
{}

static inline void kasan_enable_current(void) {}
static inline void kasan_disable_current(void) {}

#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

#ifdef CONFIG_KASAN_HW_TAGS

#else /* CONFIG_KASAN_HW_TAGS */

#endif /* CONFIG_KASAN_HW_TAGS */

static inline bool kasan_has_integrated_init(void)
{
        return kasan_hw_tags_enabled();
}

#ifdef CONFIG_KASAN
void __kasan_unpoison_range(const void *addr, size_t size);
static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
{
        if (kasan_enabled())
                __kasan_unpoison_range(addr, size);
}

void __kasan_poison_pages(struct page *page, unsigned int order, bool init);
static __always_inline void kasan_poison_pages(struct page *page,
                                                unsigned int order, bool init)
{
        if (kasan_enabled())
                __kasan_poison_pages(page, order, init);
}

bool __kasan_unpoison_pages(struct page *page, unsigned int order, bool init);
static __always_inline bool kasan_unpoison_pages(struct page *page,
                                                 unsigned int order, bool init)
{
        if (kasan_enabled())
                return __kasan_unpoison_pages(page, order, init);
        return false;
}

void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
        if (kasan_enabled())
                __kasan_poison_slab(slab);
}

void __kasan_unpoison_new_object(struct kmem_cache *cache, void *object);
/**
 * kasan_unpoison_new_object - Temporarily unpoison a new slab object.
 * @cache: Cache the object belong to.
 * @object: Pointer to the object.
 *
 * This function is intended for the slab allocator's internal use. It
 * temporarily unpoisons an object from a newly allocated slab without doing
 * anything else. The object must later be repoisoned by
 * kasan_poison_new_object().
 */
static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache,
                                                        void *object)
{
        if (kasan_enabled())
                __kasan_unpoison_new_object(cache, object);
}

void __kasan_poison_new_object(struct kmem_cache *cache, void *object);
/**
 * kasan_unpoison_new_object - Repoison a new slab object.
 * @cache: Cache the object belong to.
 * @object: Pointer to the object.
 *
 * This function is intended for the slab allocator's internal use. It
 * repoisons an object that was previously unpoisoned by
 * kasan_unpoison_new_object() without doing anything else.
 */
static __always_inline void kasan_poison_new_object(struct kmem_cache *cache,
                                                        void *object)
{
        if (kasan_enabled())
                __kasan_poison_new_object(cache, object);
}

void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
                                          const void *object);
static __always_inline void * __must_check kasan_init_slab_obj(
                                struct kmem_cache *cache, const void *object)
{
        if (kasan_enabled())
                return __kasan_init_slab_obj(cache, object);
        return (void *)object;
}

bool __kasan_slab_free(struct kmem_cache *s, void *object,
                        unsigned long ip, bool init);
static __always_inline bool kasan_slab_free(struct kmem_cache *s,
                                                void *object, bool init)
{
        if (kasan_enabled())
                return __kasan_slab_free(s, object, _RET_IP_, init);
        return false;
}

void __kasan_kfree_large(void *ptr, unsigned long ip);
static __always_inline void kasan_kfree_large(void *ptr)
{
        if (kasan_enabled())
                __kasan_kfree_large(ptr, _RET_IP_);
}

void * __must_check __kasan_slab_alloc(struct kmem_cache *s,
                                       void *object, gfp_t flags, bool init);
static __always_inline void * __must_check kasan_slab_alloc(
                struct kmem_cache *s, void *object, gfp_t flags, bool init)
{
        if (kasan_enabled())
                return __kasan_slab_alloc(s, object, flags, init);
        return object;
}

void * __must_check __kasan_kmalloc(struct kmem_cache *s, const void *object,
                                    size_t size, gfp_t flags);
static __always_inline void * __must_check kasan_kmalloc(struct kmem_cache *s,
                                const void *object, size_t size, gfp_t flags)
{
        if (kasan_enabled())
                return __kasan_kmalloc(s, object, size, flags);
        return (void *)object;
}

void * __must_check __kasan_kmalloc_large(const void *ptr,
                                          size_t size, gfp_t flags);
static __always_inline void * __must_check kasan_kmalloc_large(const void *ptr,
                                                      size_t size, gfp_t flags)
{
        if (kasan_enabled())
                return __kasan_kmalloc_large(ptr, size, flags);
        return (void *)ptr;
}

void * __must_check __kasan_krealloc(const void *object,
                                     size_t new_size, gfp_t flags);
static __always_inline void * __must_check kasan_krealloc(const void *object,
                                                 size_t new_size, gfp_t flags)
{
        if (kasan_enabled())
                return __kasan_krealloc(object, new_size, flags);
        return (void *)object;
}

bool __kasan_mempool_poison_pages(struct page *page, unsigned int order,
                                  unsigned long ip);
/**
 * kasan_mempool_poison_pages - Check and poison a mempool page allocation.
 * @page: Pointer to the page allocation.
 * @order: Order of the allocation.
 *
 * This function is intended for kernel subsystems that cache page allocations
 * to reuse them instead of freeing them back to page_alloc (e.g. mempool).
 *
 * This function is similar to kasan_mempool_poison_object() but operates on
 * page allocations.
 *
 * Before the poisoned allocation can be reused, it must be unpoisoned via
 * kasan_mempool_unpoison_pages().
 *
 * Return: true if the allocation can be safely reused; false otherwise.
 */
static __always_inline bool kasan_mempool_poison_pages(struct page *page,
                                                       unsigned int order)
{
        if (kasan_enabled())
                return __kasan_mempool_poison_pages(page, order, _RET_IP_);
        return true;
}

void __kasan_mempool_unpoison_pages(struct page *page, unsigned int order,
                                    unsigned long ip);
/**
 * kasan_mempool_unpoison_pages - Unpoison a mempool page allocation.
 * @page: Pointer to the page allocation.
 * @order: Order of the allocation.
 *
 * This function is intended for kernel subsystems that cache page allocations
 * to reuse them instead of freeing them back to page_alloc (e.g. mempool).
 *
 * This function unpoisons a page allocation that was previously poisoned by
 * kasan_mempool_poison_pages() without zeroing the allocation's memory. For
 * the tag-based modes, this function assigns a new tag to the allocation.
 */
static __always_inline void kasan_mempool_unpoison_pages(struct page *page,
                                                         unsigned int order)
{
        if (kasan_enabled())
                __kasan_mempool_unpoison_pages(page, order, _RET_IP_);
}

bool __kasan_mempool_poison_object(void *ptr, unsigned long ip);
/**
 * kasan_mempool_poison_object - Check and poison a mempool slab allocation.
 * @ptr: Pointer to the slab allocation.
 *
 * This function is intended for kernel subsystems that cache slab allocations
 * to reuse them instead of freeing them back to the slab allocator (e.g.
 * mempool).
 *
 * This function poisons a slab allocation and saves a free stack trace for it
 * without initializing the allocation's memory and without putting it into the
 * quarantine (for the Generic mode).
 *
 * This function also performs checks to detect double-free and invalid-free
 * bugs and reports them. The caller can use the return value of this function
 * to find out if the allocation is buggy.
 *
 * Before the poisoned allocation can be reused, it must be unpoisoned via
 * kasan_mempool_unpoison_object().
 *
 * This function operates on all slab allocations including large kmalloc
 * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
 * size > KMALLOC_MAX_SIZE).
 *
 * Return: true if the allocation can be safely reused; false otherwise.
 */
static __always_inline bool kasan_mempool_poison_object(void *ptr)
{
        if (kasan_enabled())
                return __kasan_mempool_poison_object(ptr, _RET_IP_);
        return true;
}

void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip);
/**
 * kasan_mempool_unpoison_object - Unpoison a mempool slab allocation.
 * @ptr: Pointer to the slab allocation.
 * @size: Size to be unpoisoned.
 *
 * This function is intended for kernel subsystems that cache slab allocations
 * to reuse them instead of freeing them back to the slab allocator (e.g.
 * mempool).
 *
 * This function unpoisons a slab allocation that was previously poisoned via
 * kasan_mempool_poison_object() and saves an alloc stack trace for it without
 * initializing the allocation's memory. For the tag-based modes, this function
 * does not assign a new tag to the allocation and instead restores the
 * original tags based on the pointer value.
 *
 * This function operates on all slab allocations including large kmalloc
 * allocations (the ones returned by kmalloc_large() or by kmalloc() with the
 * size > KMALLOC_MAX_SIZE).
 */
static __always_inline void kasan_mempool_unpoison_object(void *ptr,
                                                          size_t size)
{
        if (kasan_enabled())
                __kasan_mempool_unpoison_object(ptr, size, _RET_IP_);
}

/*
 * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
 * the hardware tag-based mode that doesn't rely on compiler instrumentation.
 */
bool __kasan_check_byte(const void *addr, unsigned long ip);
static __always_inline bool kasan_check_byte(const void *addr)
{
        if (kasan_enabled())
                return __kasan_check_byte(addr, _RET_IP_);
        return true;
}

#else /* CONFIG_KASAN */

static inline void kasan_unpoison_range(const void *address, size_t size) {}
static inline void kasan_poison_pages(struct page *page, unsigned int order,
                                      bool init) {}
static inline bool kasan_unpoison_pages(struct page *page, unsigned int order,
                                        bool init)
{
        return false;
}
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_new_object(struct kmem_cache *cache,
                                        void *object) {}
static inline void kasan_poison_new_object(struct kmem_cache *cache,
                                        void *object) {}
static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
                                const void *object)
{
        return (void *)object;
}
static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init)
{
        return false;
}
static inline void kasan_kfree_large(void *ptr) {}
static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
                                   gfp_t flags, bool init)
{
        return object;
}
static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object,
                                size_t size, gfp_t flags)
{
        return (void *)object;
}
static inline void *kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
{
        return (void *)ptr;
}
static inline void *kasan_krealloc(const void *object, size_t new_size,
                                 gfp_t flags)
{
        return (void *)object;
}
static inline bool kasan_mempool_poison_pages(struct page *page, unsigned int order)
{
        return true;
}
static inline void kasan_mempool_unpoison_pages(struct page *page, unsigned int order) {}
static inline bool kasan_mempool_poison_object(void *ptr)
{
        return true;
}
static inline void kasan_mempool_unpoison_object(void *ptr, size_t size) {}

static inline bool kasan_check_byte(const void *address)
{
        return true;
}

#endif /* CONFIG_KASAN */

#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
void kasan_unpoison_task_stack(struct task_struct *task);
asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
#else
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
#endif

#ifdef CONFIG_KASAN_GENERIC

struct kasan_cache {
        int alloc_meta_offset;
        int free_meta_offset;
};

size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
                        slab_flags_t *flags);

void kasan_cache_shrink(struct kmem_cache *cache);
void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_record_aux_stack(void *ptr);
void kasan_record_aux_stack_noalloc(void *ptr);

#else /* CONFIG_KASAN_GENERIC */

/* Tag-based KASAN modes do not use per-object metadata. */
static inline size_t kasan_metadata_size(struct kmem_cache *cache,
                                                bool in_object)
{
        return 0;
}
/* And no cache-related metadata initialization is required. */
static inline void kasan_cache_create(struct kmem_cache *cache,
                                      unsigned int *size,
                                      slab_flags_t *flags) {}

static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_record_aux_stack(void *ptr) {}
static inline void kasan_record_aux_stack_noalloc(void *ptr) {}

#endif /* CONFIG_KASAN_GENERIC */

#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)

static inline void *kasan_reset_tag(const void *addr)
{
        return (void *)arch_kasan_reset_tag(addr);
}

/**
 * kasan_report - print a report about a bad memory access detected by KASAN
 * @addr: address of the bad access
 * @size: size of the bad access
 * @is_write: whether the bad access is a write or a read
 * @ip: instruction pointer for the accessibility check or the bad access itself
 */
bool kasan_report(const void *addr, size_t size,
                bool is_write, unsigned long ip);

#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */

static inline void *kasan_reset_tag(const void *addr)
{
        return (void *)addr;
}

#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/

#ifdef CONFIG_KASAN_HW_TAGS

void kasan_report_async(void);

#endif /* CONFIG_KASAN_HW_TAGS */

#ifdef CONFIG_KASAN_SW_TAGS
void __init kasan_init_sw_tags(void);
#else
static inline void kasan_init_sw_tags(void) { }
#endif

#ifdef CONFIG_KASAN_HW_TAGS
void kasan_init_hw_tags_cpu(void);
void __init kasan_init_hw_tags(void);
#else
static inline void kasan_init_hw_tags_cpu(void) { }
static inline void kasan_init_hw_tags(void) { }
#endif

#ifdef CONFIG_KASAN_VMALLOC

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)

void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
                           unsigned long free_region_start,
                           unsigned long free_region_end);

#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

static inline void kasan_populate_early_vm_area_shadow(void *start,
                                                       unsigned long size)
{ }
static inline int kasan_populate_vmalloc(unsigned long start,
                                        unsigned long size)
{
        return 0;
}
static inline void kasan_release_vmalloc(unsigned long start,
                                         unsigned long end,
                                         unsigned long free_region_start,
                                         unsigned long free_region_end) { }

#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
                               kasan_vmalloc_flags_t flags);
static __always_inline void *kasan_unpoison_vmalloc(const void *start,
                                                unsigned long size,
                                                kasan_vmalloc_flags_t flags)
{
        if (kasan_enabled())
                return __kasan_unpoison_vmalloc(start, size, flags);
        return (void *)start;
}

void __kasan_poison_vmalloc(const void *start, unsigned long size);
static __always_inline void kasan_poison_vmalloc(const void *start,
                                                 unsigned long size)
{
        if (kasan_enabled())
                __kasan_poison_vmalloc(start, size);
}

#else /* CONFIG_KASAN_VMALLOC */

static inline void kasan_populate_early_vm_area_shadow(void *start,
                                                       unsigned long size) { }
static inline int kasan_populate_vmalloc(unsigned long start,
                                        unsigned long size)
{
        return 0;
}
static inline void kasan_release_vmalloc(unsigned long start,
                                         unsigned long end,
                                         unsigned long free_region_start,
                                         unsigned long free_region_end) { }

static inline void *kasan_unpoison_vmalloc(const void *start,
                                           unsigned long size,
                                           kasan_vmalloc_flags_t flags)
{
        return (void *)start;
}
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
{ }

#endif /* CONFIG_KASAN_VMALLOC */

#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
                !defined(CONFIG_KASAN_VMALLOC)

/*
 * These functions allocate and free shadow memory for kernel modules.
 * They are only required when KASAN_VMALLOC is not supported, as otherwise
 * shadow memory is allocated by the generic vmalloc handlers.
 */
int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask);
void kasan_free_module_shadow(const struct vm_struct *vm);

#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */

static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
static inline void kasan_free_module_shadow(const struct vm_struct *vm) {}

#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
void kasan_non_canonical_hook(unsigned long addr);
#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
static inline void kasan_non_canonical_hook(unsigned long addr) { }
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

#endif /* LINUX_KASAN_H */




























































































































































































































































































































































































    4 



















































































































































   34 
   34 











   34 


































  289 

  290 

























































































    2 













































































































































































































   28 






   29 


   29 



   29 

   29 






































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  This file contains the interface functions for the various time related
 *  system calls: time, stime, gettimeofday, settimeofday, adjtime
 *
 * Modification history:
 *
 * 1993-09-02    Philip Gladstone
 *      Created file with time related functions from sched/core.c and adjtimex()
 * 1993-10-08    Torsten Duwe
 *      adjtime interface update and CMOS clock write code
 * 1995-08-13    Torsten Duwe
 *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
 * 1999-01-16    Ulrich Windl
 *        Introduced error checking for many cases in adjtimex().
 *        Updated NTP code according to technical memorandum Jan '96
 *        "A Kernel Model for Precision Timekeeping" by Dave Mills
 *        Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
 *        (Even though the technical memorandum forbids it)
 * 2004-07-14         Christoph Lameter
 *        Added getnstimeofday to allow the posix timer functions to return
 *        with nanosecond accuracy
 */

#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/capability.h>
#include <linux/timekeeper_internal.h>
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/security.h>
#include <linux/fs.h>
#include <linux/math64.h>
#include <linux/ptrace.h>

#include <linux/uaccess.h>
#include <linux/compat.h>
#include <asm/unistd.h>

#include <generated/timeconst.h>
#include "timekeeping.h"

/*
 * The timezone where the local system is located.  Used as a default by some
 * programs who obtain this value by using gettimeofday.
 */
struct timezone sys_tz;

EXPORT_SYMBOL(sys_tz);

#ifdef __ARCH_WANT_SYS_TIME

/*
 * sys_time() can be implemented in user-level using
 * sys_gettimeofday().  Is this for backwards compatibility?  If so,
 * why not move it into the appropriate arch directory (for those
 * architectures that need it).
 */
SYSCALL_DEFINE1(time, __kernel_old_time_t __user *, tloc)
{
        __kernel_old_time_t i = (__kernel_old_time_t)ktime_get_real_seconds();

        if (tloc) {
                if (put_user(i,tloc))
                        return -EFAULT;
        }
        force_successful_syscall_return();
        return i;
}

/*
 * sys_stime() can be implemented in user-level using
 * sys_settimeofday().  Is this for backwards compatibility?  If so,
 * why not move it into the appropriate arch directory (for those
 * architectures that need it).
 */

SYSCALL_DEFINE1(stime, __kernel_old_time_t __user *, tptr)
{
        struct timespec64 tv;
        int err;

        if (get_user(tv.tv_sec, tptr))
                return -EFAULT;

        tv.tv_nsec = 0;

        err = security_settime64(&tv, NULL);
        if (err)
                return err;

        do_settimeofday64(&tv);
        return 0;
}

#endif /* __ARCH_WANT_SYS_TIME */

#ifdef CONFIG_COMPAT_32BIT_TIME
#ifdef __ARCH_WANT_SYS_TIME32

/* old_time32_t is a 32 bit "long" and needs to get converted. */
SYSCALL_DEFINE1(time32, old_time32_t __user *, tloc)
{
        old_time32_t i;

        i = (old_time32_t)ktime_get_real_seconds();

        if (tloc) {
                if (put_user(i,tloc))
                        return -EFAULT;
        }
        force_successful_syscall_return();
        return i;
}

SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr)
{
        struct timespec64 tv;
        int err;

        if (get_user(tv.tv_sec, tptr))
                return -EFAULT;

        tv.tv_nsec = 0;

        err = security_settime64(&tv, NULL);
        if (err)
                return err;

        do_settimeofday64(&tv);
        return 0;
}

#endif /* __ARCH_WANT_SYS_TIME32 */
#endif

SYSCALL_DEFINE2(gettimeofday, struct __kernel_old_timeval __user *, tv,
                struct timezone __user *, tz)
{
        if (likely(tv != NULL)) {
                struct timespec64 ts;

                ktime_get_real_ts64(&ts);
                if (put_user(ts.tv_sec, &tv->tv_sec) ||
                    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
                        return -EFAULT;
        }
        if (unlikely(tz != NULL)) {
                if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
                        return -EFAULT;
        }
        return 0;
}

/*
 * In case for some reason the CMOS clock has not already been running
 * in UTC, but in some local time: The first time we set the timezone,
 * we will warp the clock so that it is ticking UTC time instead of
 * local time. Presumably, if someone is setting the timezone then we
 * are running in an environment where the programs understand about
 * timezones. This should be done at boot time in the /etc/rc script,
 * as soon as possible, so that the clock can be set right. Otherwise,
 * various programs will get confused when the clock gets warped.
 */

int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz)
{
        static int firsttime = 1;
        int error = 0;

        if (tv && !timespec64_valid_settod(tv))
                return -EINVAL;

        error = security_settime64(tv, tz);
        if (error)
                return error;

        if (tz) {
                /* Verify we're within the +-15 hrs range */
                if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60)
                        return -EINVAL;

                sys_tz = *tz;
                update_vsyscall_tz();
                if (firsttime) {
                        firsttime = 0;
                        if (!tv)
                                timekeeping_warp_clock();
                }
        }
        if (tv)
                return do_settimeofday64(tv);
        return 0;
}

SYSCALL_DEFINE2(settimeofday, struct __kernel_old_timeval __user *, tv,
                struct timezone __user *, tz)
{
        struct timespec64 new_ts;
        struct timezone new_tz;

        if (tv) {
                if (get_user(new_ts.tv_sec, &tv->tv_sec) ||
                    get_user(new_ts.tv_nsec, &tv->tv_usec))
                        return -EFAULT;

                if (new_ts.tv_nsec > USEC_PER_SEC || new_ts.tv_nsec < 0)
                        return -EINVAL;

                new_ts.tv_nsec *= NSEC_PER_USEC;
        }
        if (tz) {
                if (copy_from_user(&new_tz, tz, sizeof(*tz)))
                        return -EFAULT;
        }

        return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(gettimeofday, struct old_timeval32 __user *, tv,
                       struct timezone __user *, tz)
{
        if (tv) {
                struct timespec64 ts;

                ktime_get_real_ts64(&ts);
                if (put_user(ts.tv_sec, &tv->tv_sec) ||
                    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
                        return -EFAULT;
        }
        if (tz) {
                if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
                        return -EFAULT;
        }

        return 0;
}

COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
                       struct timezone __user *, tz)
{
        struct timespec64 new_ts;
        struct timezone new_tz;

        if (tv) {
                if (get_user(new_ts.tv_sec, &tv->tv_sec) ||
                    get_user(new_ts.tv_nsec, &tv->tv_usec))
                        return -EFAULT;

                if (new_ts.tv_nsec > USEC_PER_SEC || new_ts.tv_nsec < 0)
                        return -EINVAL;

                new_ts.tv_nsec *= NSEC_PER_USEC;
        }
        if (tz) {
                if (copy_from_user(&new_tz, tz, sizeof(*tz)))
                        return -EFAULT;
        }

        return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
}
#endif

#ifdef CONFIG_64BIT
SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
{
        struct __kernel_timex txc;                /* Local copy of parameter */
        int ret;

        /* Copy the user data space into the kernel copy
         * structure. But bear in mind that the structures
         * may change
         */
        if (copy_from_user(&txc, txc_p, sizeof(struct __kernel_timex)))
                return -EFAULT;
        ret = do_adjtimex(&txc);
        return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret;
}
#endif

#ifdef CONFIG_COMPAT_32BIT_TIME
int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp)
{
        struct old_timex32 tx32;

        memset(txc, 0, sizeof(struct __kernel_timex));
        if (copy_from_user(&tx32, utp, sizeof(struct old_timex32)))
                return -EFAULT;

        txc->modes = tx32.modes;
        txc->offset = tx32.offset;
        txc->freq = tx32.freq;
        txc->maxerror = tx32.maxerror;
        txc->esterror = tx32.esterror;
        txc->status = tx32.status;
        txc->constant = tx32.constant;
        txc->precision = tx32.precision;
        txc->tolerance = tx32.tolerance;
        txc->time.tv_sec = tx32.time.tv_sec;
        txc->time.tv_usec = tx32.time.tv_usec;
        txc->tick = tx32.tick;
        txc->ppsfreq = tx32.ppsfreq;
        txc->jitter = tx32.jitter;
        txc->shift = tx32.shift;
        txc->stabil = tx32.stabil;
        txc->jitcnt = tx32.jitcnt;
        txc->calcnt = tx32.calcnt;
        txc->errcnt = tx32.errcnt;
        txc->stbcnt = tx32.stbcnt;

        return 0;
}

int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex *txc)
{
        struct old_timex32 tx32;

        memset(&tx32, 0, sizeof(struct old_timex32));
        tx32.modes = txc->modes;
        tx32.offset = txc->offset;
        tx32.freq = txc->freq;
        tx32.maxerror = txc->maxerror;
        tx32.esterror = txc->esterror;
        tx32.status = txc->status;
        tx32.constant = txc->constant;
        tx32.precision = txc->precision;
        tx32.tolerance = txc->tolerance;
        tx32.time.tv_sec = txc->time.tv_sec;
        tx32.time.tv_usec = txc->time.tv_usec;
        tx32.tick = txc->tick;
        tx32.ppsfreq = txc->ppsfreq;
        tx32.jitter = txc->jitter;
        tx32.shift = txc->shift;
        tx32.stabil = txc->stabil;
        tx32.jitcnt = txc->jitcnt;
        tx32.calcnt = txc->calcnt;
        tx32.errcnt = txc->errcnt;
        tx32.stbcnt = txc->stbcnt;
        tx32.tai = txc->tai;
        if (copy_to_user(utp, &tx32, sizeof(struct old_timex32)))
                return -EFAULT;
        return 0;
}

SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp)
{
        struct __kernel_timex txc;
        int err, ret;

        err = get_old_timex32(&txc, utp);
        if (err)
                return err;

        ret = do_adjtimex(&txc);

        err = put_old_timex32(utp, &txc);
        if (err)
                return err;

        return ret;
}
#endif

/**
 * jiffies_to_msecs - Convert jiffies to milliseconds
 * @j: jiffies value
 *
 * Avoid unnecessary multiplications/divisions in the
 * two most common HZ cases.
 *
 * Return: milliseconds value
 */
unsigned int jiffies_to_msecs(const unsigned long j)
{
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
        return (MSEC_PER_SEC / HZ) * j;
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
        return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
#else
# if BITS_PER_LONG == 32
        return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
               HZ_TO_MSEC_SHR32;
# else
        return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
# endif
#endif
}
EXPORT_SYMBOL(jiffies_to_msecs);

/**
 * jiffies_to_usecs - Convert jiffies to microseconds
 * @j: jiffies value
 *
 * Return: microseconds value
 */
unsigned int jiffies_to_usecs(const unsigned long j)
{
        /*
         * Hz usually doesn't go much further MSEC_PER_SEC.
         * jiffies_to_usecs() and usecs_to_jiffies() depend on that.
         */
        BUILD_BUG_ON(HZ > USEC_PER_SEC);

#if !(USEC_PER_SEC % HZ)
        return (USEC_PER_SEC / HZ) * j;
#else
# if BITS_PER_LONG == 32
        return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
# else
        return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
# endif
#endif
}
EXPORT_SYMBOL(jiffies_to_usecs);

/**
 * mktime64 - Converts date to seconds.
 * @year0: year to convert
 * @mon0: month to convert
 * @day: day to convert
 * @hour: hour to convert
 * @min: minute to convert
 * @sec: second to convert
 *
 * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
 * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
 *
 * [For the Julian calendar (which was used in Russia before 1917,
 * Britain & colonies before 1752, anywhere else before 1582,
 * and is still in use by some communities) leave out the
 * -year/100+year/400 terms, and add 10.]
 *
 * This algorithm was first published by Gauss (I think).
 *
 * A leap second can be indicated by calling this function with sec as
 * 60 (allowable under ISO 8601).  The leap second is treated the same
 * as the following second since they don't exist in UNIX time.
 *
 * An encoding of midnight at the end of the day as 24:00:00 - ie. midnight
 * tomorrow - (allowable under ISO 8601) is supported.
 *
 * Return: seconds since the epoch time for the given input date
 */
time64_t mktime64(const unsigned int year0, const unsigned int mon0,
                const unsigned int day, const unsigned int hour,
                const unsigned int min, const unsigned int sec)
{
        unsigned int mon = mon0, year = year0;

        /* 1..12 -> 11,12,1..10 */
        if (0 >= (int) (mon -= 2)) {
                mon += 12;        /* Puts Feb last since it has leap day */
                year -= 1;
        }

        return ((((time64_t)
                  (year/4 - year/100 + year/400 + 367*mon/12 + day) +
                  year*365 - 719499
            )*24 + hour /* now have hours - midnight tomorrow handled here */
          )*60 + min /* now have minutes */
        )*60 + sec; /* finally seconds */
}
EXPORT_SYMBOL(mktime64);

struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec)
{
        struct timespec64 ts = ns_to_timespec64(nsec);
        struct __kernel_old_timeval tv;

        tv.tv_sec = ts.tv_sec;
        tv.tv_usec = (suseconds_t)ts.tv_nsec / 1000;

        return tv;
}
EXPORT_SYMBOL(ns_to_kernel_old_timeval);

/**
 * set_normalized_timespec64 - set timespec sec and nsec parts and normalize
 *
 * @ts:                pointer to timespec variable to be set
 * @sec:        seconds to set
 * @nsec:        nanoseconds to set
 *
 * Set seconds and nanoseconds field of a timespec variable and
 * normalize to the timespec storage format
 *
 * Note: The tv_nsec part is always in the range of 0 <= tv_nsec < NSEC_PER_SEC.
 * For negative values only the tv_sec field is negative !
 */
void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
{
        while (nsec >= NSEC_PER_SEC) {
                /*
                 * The following asm() prevents the compiler from
                 * optimising this loop into a modulo operation. See
                 * also __iter_div_u64_rem() in include/linux/time.h
                 */
                asm("" : "+rm"(nsec));
                nsec -= NSEC_PER_SEC;
                ++sec;
        }
        while (nsec < 0) {
                asm("" : "+rm"(nsec));
                nsec += NSEC_PER_SEC;
                --sec;
        }
        ts->tv_sec = sec;
        ts->tv_nsec = nsec;
}
EXPORT_SYMBOL(set_normalized_timespec64);

/**
 * ns_to_timespec64 - Convert nanoseconds to timespec64
 * @nsec:       the nanoseconds value to be converted
 *
 * Return: the timespec64 representation of the nsec parameter.
 */
struct timespec64 ns_to_timespec64(s64 nsec)
{
        struct timespec64 ts = { 0, 0 };
        s32 rem;

        if (likely(nsec > 0)) {
                ts.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
                ts.tv_nsec = rem;
        } else if (nsec < 0) {
                /*
                 * With negative times, tv_sec points to the earlier
                 * second, and tv_nsec counts the nanoseconds since
                 * then, so tv_nsec is always a positive number.
                 */
                ts.tv_sec = -div_u64_rem(-nsec - 1, NSEC_PER_SEC, &rem) - 1;
                ts.tv_nsec = NSEC_PER_SEC - rem - 1;
        }

        return ts;
}
EXPORT_SYMBOL(ns_to_timespec64);

/**
 * __msecs_to_jiffies: - convert milliseconds to jiffies
 * @m:        time in milliseconds
 *
 * conversion is done as follows:
 *
 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
 *
 * - 'too large' values [that would result in larger than
 *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
 *
 * - all other values are converted to jiffies by either multiplying
 *   the input value by a factor or dividing it with a factor and
 *   handling any 32-bit overflows.
 *   for the details see __msecs_to_jiffies()
 *
 * __msecs_to_jiffies() checks for the passed in value being a constant
 * via __builtin_constant_p() allowing gcc to eliminate most of the
 * code, __msecs_to_jiffies() is called if the value passed does not
 * allow constant folding and the actual conversion must be done at
 * runtime.
 * The _msecs_to_jiffies helpers are the HZ dependent conversion
 * routines found in include/linux/jiffies.h
 *
 * Return: jiffies value
 */
unsigned long __msecs_to_jiffies(const unsigned int m)
{
        /*
         * Negative value, means infinite timeout:
         */
        if ((int)m < 0)
                return MAX_JIFFY_OFFSET;
        return _msecs_to_jiffies(m);
}
EXPORT_SYMBOL(__msecs_to_jiffies);

/**
 * __usecs_to_jiffies: - convert microseconds to jiffies
 * @u:        time in milliseconds
 *
 * Return: jiffies value
 */
unsigned long __usecs_to_jiffies(const unsigned int u)
{
        if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
                return MAX_JIFFY_OFFSET;
        return _usecs_to_jiffies(u);
}
EXPORT_SYMBOL(__usecs_to_jiffies);

/**
 * timespec64_to_jiffies - convert a timespec64 value to jiffies
 * @value: pointer to &struct timespec64
 *
 * The TICK_NSEC - 1 rounds up the value to the next resolution.  Note
 * that a remainder subtract here would not do the right thing as the
 * resolution values don't fall on second boundaries.  I.e. the line:
 * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
 * Note that due to the small error in the multiplier here, this
 * rounding is incorrect for sufficiently large values of tv_nsec, but
 * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
 * OK.
 *
 * Rather, we just shift the bits off the right.
 *
 * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
 * value to a scaled second value.
 *
 * Return: jiffies value
 */
unsigned long
timespec64_to_jiffies(const struct timespec64 *value)
{
        u64 sec = value->tv_sec;
        long nsec = value->tv_nsec + TICK_NSEC - 1;

        if (sec >= MAX_SEC_IN_JIFFIES){
                sec = MAX_SEC_IN_JIFFIES;
                nsec = 0;
        }
        return ((sec * SEC_CONVERSION) +
                (((u64)nsec * NSEC_CONVERSION) >>
                 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;

}
EXPORT_SYMBOL(timespec64_to_jiffies);

/**
 * jiffies_to_timespec64 - convert jiffies value to &struct timespec64
 * @jiffies: jiffies value
 * @value: pointer to &struct timespec64
 */
void
jiffies_to_timespec64(const unsigned long jiffies, struct timespec64 *value)
{
        /*
         * Convert jiffies to nanoseconds and separate with
         * one divide.
         */
        u32 rem;
        value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
                                    NSEC_PER_SEC, &rem);
        value->tv_nsec = rem;
}
EXPORT_SYMBOL(jiffies_to_timespec64);

/*
 * Convert jiffies/jiffies_64 to clock_t and back.
 */

/**
 * jiffies_to_clock_t - Convert jiffies to clock_t
 * @x: jiffies value
 *
 * Return: jiffies converted to clock_t (CLOCKS_PER_SEC)
 */
clock_t jiffies_to_clock_t(unsigned long x)
{
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
# if HZ < USER_HZ
        return x * (USER_HZ / HZ);
# else
        return x / (HZ / USER_HZ);
# endif
#else
        return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
#endif
}
EXPORT_SYMBOL(jiffies_to_clock_t);

/**
 * clock_t_to_jiffies - Convert clock_t to jiffies
 * @x: clock_t value
 *
 * Return: clock_t value converted to jiffies
 */
unsigned long clock_t_to_jiffies(unsigned long x)
{
#if (HZ % USER_HZ)==0
        if (x >= ~0UL / (HZ / USER_HZ))
                return ~0UL;
        return x * (HZ / USER_HZ);
#else
        /* Don't worry about loss of precision here .. */
        if (x >= ~0UL / HZ * USER_HZ)
                return ~0UL;

        /* .. but do try to contain it here */
        return div_u64((u64)x * HZ, USER_HZ);
#endif
}
EXPORT_SYMBOL(clock_t_to_jiffies);

/**
 * jiffies_64_to_clock_t - Convert jiffies_64 to clock_t
 * @x: jiffies_64 value
 *
 * Return: jiffies_64 value converted to 64-bit "clock_t" (CLOCKS_PER_SEC)
 */
u64 jiffies_64_to_clock_t(u64 x)
{
#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
# if HZ < USER_HZ
        x = div_u64(x * USER_HZ, HZ);
# elif HZ > USER_HZ
        x = div_u64(x, HZ / USER_HZ);
# else
        /* Nothing to do */
# endif
#else
        /*
         * There are better ways that don't overflow early,
         * but even this doesn't overflow in hundreds of years
         * in 64 bits, so..
         */
        x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
#endif
        return x;
}
EXPORT_SYMBOL(jiffies_64_to_clock_t);

/**
 * nsec_to_clock_t - Convert nsec value to clock_t
 * @x: nsec value
 *
 * Return: nsec value converted to 64-bit "clock_t" (CLOCKS_PER_SEC)
 */
u64 nsec_to_clock_t(u64 x)
{
#if (NSEC_PER_SEC % USER_HZ) == 0
        return div_u64(x, NSEC_PER_SEC / USER_HZ);
#elif (USER_HZ % 512) == 0
        return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
#else
        /*
         * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
         * overflow after 64.99 years.
         * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
         */
        return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
#endif
}

/**
 * jiffies64_to_nsecs - Convert jiffies64 to nanoseconds
 * @j: jiffies64 value
 *
 * Return: nanoseconds value
 */
u64 jiffies64_to_nsecs(u64 j)
{
#if !(NSEC_PER_SEC % HZ)
        return (NSEC_PER_SEC / HZ) * j;
# else
        return div_u64(j * HZ_TO_NSEC_NUM, HZ_TO_NSEC_DEN);
#endif
}
EXPORT_SYMBOL(jiffies64_to_nsecs);

/**
 * jiffies64_to_msecs - Convert jiffies64 to milliseconds
 * @j: jiffies64 value
 *
 * Return: milliseconds value
 */
u64 jiffies64_to_msecs(const u64 j)
{
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
        return (MSEC_PER_SEC / HZ) * j;
#else
        return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
#endif
}
EXPORT_SYMBOL(jiffies64_to_msecs);

/**
 * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
 *
 * @n:        nsecs in u64
 *
 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
 * for scheduler, not for use in device drivers to calculate timeout value.
 *
 * note:
 *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
 *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
 *
 * Return: nsecs converted to jiffies64 value
 */
u64 nsecs_to_jiffies64(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
        /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
        return div_u64(n, NSEC_PER_SEC / HZ);
#elif (HZ % 512) == 0
        /* overflow after 292 years if HZ = 1024 */
        return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
#else
        /*
         * Generic case - optimized for cases where HZ is a multiple of 3.
         * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
         */
        return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
#endif
}
EXPORT_SYMBOL(nsecs_to_jiffies64);

/**
 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
 *
 * @n:        nsecs in u64
 *
 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
 * for scheduler, not for use in device drivers to calculate timeout value.
 *
 * note:
 *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
 *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
 *
 * Return: nsecs converted to jiffies value
 */
unsigned long nsecs_to_jiffies(u64 n)
{
        return (unsigned long)nsecs_to_jiffies64(n);
}
EXPORT_SYMBOL_GPL(nsecs_to_jiffies);

/**
 * timespec64_add_safe - Add two timespec64 values and do a safety check
 * for overflow.
 * @lhs: first (left) timespec64 to add
 * @rhs: second (right) timespec64 to add
 *
 * It's assumed that both values are valid (>= 0).
 * And, each timespec64 is in normalized form.
 *
 * Return: sum of @lhs + @rhs
 */
struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
                                const struct timespec64 rhs)
{
        struct timespec64 res;

        set_normalized_timespec64(&res, (timeu64_t) lhs.tv_sec + rhs.tv_sec,
                        lhs.tv_nsec + rhs.tv_nsec);

        if (unlikely(res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)) {
                res.tv_sec = TIME64_MAX;
                res.tv_nsec = 0;
        }

        return res;
}

/**
 * get_timespec64 - get user's time value into kernel space
 * @ts: destination &struct timespec64
 * @uts: user's time value as &struct __kernel_timespec
 *
 * Handles compat or 32-bit modes.
 *
 * Return: %0 on success or negative errno on error
 */
int get_timespec64(struct timespec64 *ts,
                   const struct __kernel_timespec __user *uts)
{
        struct __kernel_timespec kts;
        int ret;

        ret = copy_from_user(&kts, uts, sizeof(kts));
        if (ret)
                return -EFAULT;

        ts->tv_sec = kts.tv_sec;

        /* Zero out the padding in compat mode */
        if (in_compat_syscall())
                kts.tv_nsec &= 0xFFFFFFFFUL;

        /* In 32-bit mode, this drops the padding */
        ts->tv_nsec = kts.tv_nsec;

        return 0;
}
EXPORT_SYMBOL_GPL(get_timespec64);

/**
 * put_timespec64 - convert timespec64 value to __kernel_timespec format and
 *                     copy the latter to userspace
 * @ts: input &struct timespec64
 * @uts: user's &struct __kernel_timespec
 *
 * Return: %0 on success or negative errno on error
 */
int put_timespec64(const struct timespec64 *ts,
                   struct __kernel_timespec __user *uts)
{
        struct __kernel_timespec kts = {
                .tv_sec = ts->tv_sec,
                .tv_nsec = ts->tv_nsec
        };

        return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0;
}
EXPORT_SYMBOL_GPL(put_timespec64);

static int __get_old_timespec32(struct timespec64 *ts64,
                                   const struct old_timespec32 __user *cts)
{
        struct old_timespec32 ts;
        int ret;

        ret = copy_from_user(&ts, cts, sizeof(ts));
        if (ret)
                return -EFAULT;

        ts64->tv_sec = ts.tv_sec;
        ts64->tv_nsec = ts.tv_nsec;

        return 0;
}

static int __put_old_timespec32(const struct timespec64 *ts64,
                                   struct old_timespec32 __user *cts)
{
        struct old_timespec32 ts = {
                .tv_sec = ts64->tv_sec,
                .tv_nsec = ts64->tv_nsec
        };
        return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
}

/**
 * get_old_timespec32 - get user's old-format time value into kernel space
 * @ts: destination &struct timespec64
 * @uts: user's old-format time value (&struct old_timespec32)
 *
 * Handles X86_X32_ABI compatibility conversion.
 *
 * Return: %0 on success or negative errno on error
 */
int get_old_timespec32(struct timespec64 *ts, const void __user *uts)
{
        if (COMPAT_USE_64BIT_TIME)
                return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
        else
                return __get_old_timespec32(ts, uts);
}
EXPORT_SYMBOL_GPL(get_old_timespec32);

/**
 * put_old_timespec32 - convert timespec64 value to &struct old_timespec32 and
 *                         copy the latter to userspace
 * @ts: input &struct timespec64
 * @uts: user's &struct old_timespec32
 *
 * Handles X86_X32_ABI compatibility conversion.
 *
 * Return: %0 on success or negative errno on error
 */
int put_old_timespec32(const struct timespec64 *ts, void __user *uts)
{
        if (COMPAT_USE_64BIT_TIME)
                return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
        else
                return __put_old_timespec32(ts, uts);
}
EXPORT_SYMBOL_GPL(put_old_timespec32);

/**
 * get_itimerspec64 - get user's &struct __kernel_itimerspec into kernel space
 * @it: destination &struct itimerspec64
 * @uit: user's &struct __kernel_itimerspec
 *
 * Return: %0 on success or negative errno on error
 */
int get_itimerspec64(struct itimerspec64 *it,
                        const struct __kernel_itimerspec __user *uit)
{
        int ret;

        ret = get_timespec64(&it->it_interval, &uit->it_interval);
        if (ret)
                return ret;

        ret = get_timespec64(&it->it_value, &uit->it_value);

        return ret;
}
EXPORT_SYMBOL_GPL(get_itimerspec64);

/**
 * put_itimerspec64 - convert &struct itimerspec64 to __kernel_itimerspec format
 *                       and copy the latter to userspace
 * @it: input &struct itimerspec64
 * @uit: user's &struct __kernel_itimerspec
 *
 * Return: %0 on success or negative errno on error
 */
int put_itimerspec64(const struct itimerspec64 *it,
                        struct __kernel_itimerspec __user *uit)
{
        int ret;

        ret = put_timespec64(&it->it_interval, &uit->it_interval);
        if (ret)
                return ret;

        ret = put_timespec64(&it->it_value, &uit->it_value);

        return ret;
}
EXPORT_SYMBOL_GPL(put_itimerspec64);

/**
 * get_old_itimerspec32 - get user's &struct old_itimerspec32 into kernel space
 * @its: destination &struct itimerspec64
 * @uits: user's &struct old_itimerspec32
 *
 * Return: %0 on success or negative errno on error
 */
int get_old_itimerspec32(struct itimerspec64 *its,
                        const struct old_itimerspec32 __user *uits)
{

        if (__get_old_timespec32(&its->it_interval, &uits->it_interval) ||
            __get_old_timespec32(&its->it_value, &uits->it_value))
                return -EFAULT;
        return 0;
}
EXPORT_SYMBOL_GPL(get_old_itimerspec32);

/**
 * put_old_itimerspec32 - convert &struct itimerspec64 to &struct
 *                          old_itimerspec32 and copy the latter to userspace
 * @its: input &struct itimerspec64
 * @uits: user's &struct old_itimerspec32
 *
 * Return: %0 on success or negative errno on error
 */
int put_old_itimerspec32(const struct itimerspec64 *its,
                        struct old_itimerspec32 __user *uits)
{
        if (__put_old_timespec32(&its->it_interval, &uits->it_interval) ||
            __put_old_timespec32(&its->it_value, &uits->it_value))
                return -EFAULT;
        return 0;
}
EXPORT_SYMBOL_GPL(put_old_itimerspec32);









































































































  254 



































































































































































































































































































  256 




  256 












































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
// SPDX-License-Identifier: GPL-2.0
/*
 * Detect hard and soft lockups on a system
 *
 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
 *
 * Note: Most of this code is borrowed heavily from the original softlockup
 * detector, so thanks to Ingo for the initial implementation.
 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
 * to those contributors as well.
 */

#define pr_fmt(fmt) "watchdog: " fmt

#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/tick.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/isolation.h>
#include <linux/stop_machine.h>

#include <asm/irq_regs.h>
#include <linux/kvm_para.h>

static DEFINE_MUTEX(watchdog_mutex);

#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
# define WATCHDOG_HARDLOCKUP_DEFAULT        1
#else
# define WATCHDOG_HARDLOCKUP_DEFAULT        0
#endif

unsigned long __read_mostly watchdog_enabled;
int __read_mostly watchdog_user_enabled = 1;
static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
static int __read_mostly watchdog_softlockup_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly watchdog_hardlockup_available;

struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);

#ifdef CONFIG_HARDLOCKUP_DETECTOR

# ifdef CONFIG_SMP
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
# endif /* CONFIG_SMP */

/*
 * Should we panic when a soft-lockup or hard-lockup occurs:
 */
unsigned int __read_mostly hardlockup_panic =
                        IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
/*
 * We may not want to enable hard lockup detection by default in all cases,
 * for example when running the kernel as a guest on a hypervisor. In these
 * cases this function can be called to disable hard lockup detection. This
 * function should only be executed once by the boot processor before the
 * kernel command line parameters are parsed, because otherwise it is not
 * possible to override this in hardlockup_panic_setup().
 */
void __init hardlockup_detector_disable(void)
{
        watchdog_hardlockup_user_enabled = 0;
}

static int __init hardlockup_panic_setup(char *str)
{
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
        else if (!strncmp(str, "nopanic", 7))
                hardlockup_panic = 0;
        else if (!strncmp(str, "0", 1))
                watchdog_hardlockup_user_enabled = 0;
        else if (!strncmp(str, "1", 1))
                watchdog_hardlockup_user_enabled = 1;
        return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);

#endif /* CONFIG_HARDLOCKUP_DETECTOR */

#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)

static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
static unsigned long hard_lockup_nmi_warn;

notrace void arch_touch_nmi_watchdog(void)
{
        /*
         * Using __raw here because some code paths have
         * preemption enabled.  If preemption is enabled
         * then interrupts should be enabled too, in which
         * case we shouldn't have to worry about the watchdog
         * going off.
         */
        raw_cpu_write(watchdog_hardlockup_touched, true);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);

void watchdog_hardlockup_touch_cpu(unsigned int cpu)
{
        per_cpu(watchdog_hardlockup_touched, cpu) = true;
}

static bool is_hardlockup(unsigned int cpu)
{
        int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));

        if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
                return true;

        /*
         * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
         * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
         * written/read by a single CPU.
         */
        per_cpu(hrtimer_interrupts_saved, cpu) = hrint;

        return false;
}

static void watchdog_hardlockup_kick(void)
{
        int new_interrupts;

        new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
        watchdog_buddy_check_hardlockup(new_interrupts);
}

void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
{
        if (per_cpu(watchdog_hardlockup_touched, cpu)) {
                per_cpu(watchdog_hardlockup_touched, cpu) = false;
                return;
        }

        /*
         * Check for a hardlockup by making sure the CPU's timer
         * interrupt is incrementing. The timer interrupt should have
         * fired multiple times before we overflow'd. If it hasn't
         * then this is a good indication the cpu is stuck
         */
        if (is_hardlockup(cpu)) {
                unsigned int this_cpu = smp_processor_id();
                unsigned long flags;

                /* Only print hardlockups once. */
                if (per_cpu(watchdog_hardlockup_warned, cpu))
                        return;

                /*
                 * Prevent multiple hard-lockup reports if one cpu is already
                 * engaged in dumping all cpu back traces.
                 */
                if (sysctl_hardlockup_all_cpu_backtrace) {
                        if (test_and_set_bit_lock(0, &hard_lockup_nmi_warn))
                                return;
                }

                /*
                 * NOTE: we call printk_cpu_sync_get_irqsave() after printing
                 * the lockup message. While it would be nice to serialize
                 * that printout, we really want to make sure that if some
                 * other CPU somehow locked up while holding the lock associated
                 * with printk_cpu_sync_get_irqsave() that we can still at least
                 * get the message about the lockup out.
                 */
                pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
                printk_cpu_sync_get_irqsave(flags);

                print_modules();
                print_irqtrace_events(current);
                if (cpu == this_cpu) {
                        if (regs)
                                show_regs(regs);
                        else
                                dump_stack();
                        printk_cpu_sync_put_irqrestore(flags);
                } else {
                        printk_cpu_sync_put_irqrestore(flags);
                        trigger_single_cpu_backtrace(cpu);
                }

                if (sysctl_hardlockup_all_cpu_backtrace) {
                        trigger_allbutcpu_cpu_backtrace(cpu);
                        if (!hardlockup_panic)
                                clear_bit_unlock(0, &hard_lockup_nmi_warn);
                }

                if (hardlockup_panic)
                        nmi_panic(regs, "Hard LOCKUP");

                per_cpu(watchdog_hardlockup_warned, cpu) = true;
        } else {
                per_cpu(watchdog_hardlockup_warned, cpu) = false;
        }
}

#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */

static inline void watchdog_hardlockup_kick(void) { }

#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */

/*
 * These functions can be overridden based on the configured hardlockdup detector.
 *
 * watchdog_hardlockup_enable/disable can be implemented to start and stop when
 * softlockup watchdog start and stop. The detector must select the
 * SOFTLOCKUP_DETECTOR Kconfig.
 */
void __weak watchdog_hardlockup_enable(unsigned int cpu) { }

void __weak watchdog_hardlockup_disable(unsigned int cpu) { }

/*
 * Watchdog-detector specific API.
 *
 * Return 0 when hardlockup watchdog is available, negative value otherwise.
 * Note that the negative value means that a delayed probe might
 * succeed later.
 */
int __weak __init watchdog_hardlockup_probe(void)
{
        return -ENODEV;
}

/**
 * watchdog_hardlockup_stop - Stop the watchdog for reconfiguration
 *
 * The reconfiguration steps are:
 * watchdog_hardlockup_stop();
 * update_variables();
 * watchdog_hardlockup_start();
 */
void __weak watchdog_hardlockup_stop(void) { }

/**
 * watchdog_hardlockup_start - Start the watchdog after reconfiguration
 *
 * Counterpart to watchdog_hardlockup_stop().
 *
 * The following variables have been updated in update_variables() and
 * contain the currently valid configuration:
 * - watchdog_enabled
 * - watchdog_thresh
 * - watchdog_cpumask
 */
void __weak watchdog_hardlockup_start(void) { }

/**
 * lockup_detector_update_enable - Update the sysctl enable bit
 *
 * Caller needs to make sure that the hard watchdogs are off, so this
 * can't race with watchdog_hardlockup_disable().
 */
static void lockup_detector_update_enable(void)
{
        watchdog_enabled = 0;
        if (!watchdog_user_enabled)
                return;
        if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled)
                watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED;
        if (watchdog_softlockup_user_enabled)
                watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED;
}

#ifdef CONFIG_SOFTLOCKUP_DETECTOR

/*
 * Delay the soflockup report when running a known slow code.
 * It does _not_ affect the timestamp of the last successdul reschedule.
 */
#define SOFTLOCKUP_DELAY_REPORT        ULONG_MAX

#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#endif

static struct cpumask watchdog_allowed_mask __read_mostly;

/* Global variables, exported for sysctl */
unsigned int __read_mostly softlockup_panic =
                        IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);

static bool softlockup_initialized __read_mostly;
static u64 __read_mostly sample_period;

/* Timestamp taken after the last successful reschedule. */
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
/* Timestamp of the last softlockup report. */
static DEFINE_PER_CPU(unsigned long, watchdog_report_ts);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static unsigned long soft_lockup_nmi_warn;

static int __init softlockup_panic_setup(char *str)
{
        softlockup_panic = simple_strtoul(str, NULL, 0);
        return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);

static int __init nowatchdog_setup(char *str)
{
        watchdog_user_enabled = 0;
        return 1;
}
__setup("nowatchdog", nowatchdog_setup);

static int __init nosoftlockup_setup(char *str)
{
        watchdog_softlockup_user_enabled = 0;
        return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);

static int __init watchdog_thresh_setup(char *str)
{
        get_option(&str, &watchdog_thresh);
        return 1;
}
__setup("watchdog_thresh=", watchdog_thresh_setup);

static void __lockup_detector_cleanup(void);

/*
 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
 * lockups can have false positives under extreme conditions. So we generally
 * want a higher threshold for soft lockups than for hard lockups. So we couple
 * the thresholds with a factor: we make the soft threshold twice the amount of
 * time the hard threshold is.
 */
static int get_softlockup_thresh(void)
{
        return watchdog_thresh * 2;
}

/*
 * Returns seconds, approximately.  We don't need nanosecond
 * resolution, and we don't need to waste time with a big divide when
 * 2^30ns == 1.074s.
 */
static unsigned long get_timestamp(void)
{
        return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
}

static void set_sample_period(void)
{
        /*
         * convert watchdog_thresh from seconds to ns
         * the divide by 5 is to give hrtimer several chances (two
         * or three with the current relation between the soft
         * and hard thresholds) to increment before the
         * hardlockup detector generates a warning
         */
        sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
        watchdog_update_hrtimer_threshold(sample_period);
}

static void update_report_ts(void)
{
        __this_cpu_write(watchdog_report_ts, get_timestamp());
}

/* Commands for resetting the watchdog */
static void update_touch_ts(void)
{
        __this_cpu_write(watchdog_touch_ts, get_timestamp());
        update_report_ts();
}

/**
 * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
 *
 * Call when the scheduler may have stalled for legitimate reasons
 * preventing the watchdog task from executing - e.g. the scheduler
 * entering idle state.  This should only be used for scheduler events.
 * Use touch_softlockup_watchdog() for everything else.
 */
notrace void touch_softlockup_watchdog_sched(void)
{
        /*
         * Preemption can be enabled.  It doesn't matter which CPU's watchdog
         * report period gets restarted here, so use the raw_ operation.
         */
        raw_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
}

notrace void touch_softlockup_watchdog(void)
{
        touch_softlockup_watchdog_sched();
        wq_watchdog_touch(raw_smp_processor_id());
}
EXPORT_SYMBOL(touch_softlockup_watchdog);

void touch_all_softlockup_watchdogs(void)
{
        int cpu;

        /*
         * watchdog_mutex cannpt be taken here, as this might be called
         * from (soft)interrupt context, so the access to
         * watchdog_allowed_cpumask might race with a concurrent update.
         *
         * The watchdog time stamp can race against a concurrent real
         * update as well, the only side effect might be a cycle delay for
         * the softlockup check.
         */
        for_each_cpu(cpu, &watchdog_allowed_mask) {
                per_cpu(watchdog_report_ts, cpu) = SOFTLOCKUP_DELAY_REPORT;
                wq_watchdog_touch(cpu);
        }
}

void touch_softlockup_watchdog_sync(void)
{
        __this_cpu_write(softlockup_touch_sync, true);
        __this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
}

static int is_softlockup(unsigned long touch_ts,
                         unsigned long period_ts,
                         unsigned long now)
{
        if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
                /* Warn about unreasonable delays. */
                if (time_after(now, period_ts + get_softlockup_thresh()))
                        return now - touch_ts;
        }
        return 0;
}

/* watchdog detector functions */
static DEFINE_PER_CPU(struct completion, softlockup_completion);
static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);

/*
 * The watchdog feed function - touches the timestamp.
 *
 * It only runs once every sample_period seconds (4 seconds by
 * default) to reset the softlockup timestamp. If this gets delayed
 * for more than 2*watchdog_thresh seconds then the debug-printout
 * triggers in watchdog_timer_fn().
 */
static int softlockup_fn(void *data)
{
        update_touch_ts();
        complete(this_cpu_ptr(&softlockup_completion));

        return 0;
}

/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
        unsigned long touch_ts, period_ts, now;
        struct pt_regs *regs = get_irq_regs();
        int duration;
        int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
        unsigned long flags;

        if (!watchdog_enabled)
                return HRTIMER_NORESTART;

        watchdog_hardlockup_kick();

        /* kick the softlockup detector */
        if (completion_done(this_cpu_ptr(&softlockup_completion))) {
                reinit_completion(this_cpu_ptr(&softlockup_completion));
                stop_one_cpu_nowait(smp_processor_id(),
                                softlockup_fn, NULL,
                                this_cpu_ptr(&softlockup_stop_work));
        }

        /* .. and repeat */
        hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));

        /*
         * Read the current timestamp first. It might become invalid anytime
         * when a virtual machine is stopped by the host or when the watchog
         * is touched from NMI.
         */
        now = get_timestamp();
        /*
         * If a virtual machine is stopped by the host it can look to
         * the watchdog like a soft lockup. This function touches the watchdog.
         */
        kvm_check_and_clear_guest_paused();
        /*
         * The stored timestamp is comparable with @now only when not touched.
         * It might get touched anytime from NMI. Make sure that is_softlockup()
         * uses the same (valid) value.
         */
        period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));

        /* Reset the interval when touched by known problematic code. */
        if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
                if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
                        /*
                         * If the time stamp was touched atomically
                         * make sure the scheduler tick is up to date.
                         */
                        __this_cpu_write(softlockup_touch_sync, false);
                        sched_clock_tick();
                }

                update_report_ts();
                return HRTIMER_RESTART;
        }

        /* Check for a softlockup. */
        touch_ts = __this_cpu_read(watchdog_touch_ts);
        duration = is_softlockup(touch_ts, period_ts, now);
        if (unlikely(duration)) {
                /*
                 * Prevent multiple soft-lockup reports if one cpu is already
                 * engaged in dumping all cpu back traces.
                 */
                if (softlockup_all_cpu_backtrace) {
                        if (test_and_set_bit_lock(0, &soft_lockup_nmi_warn))
                                return HRTIMER_RESTART;
                }

                /* Start period for the next softlockup warning. */
                update_report_ts();

                printk_cpu_sync_get_irqsave(flags);
                pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
                print_modules();
                print_irqtrace_events(current);
                if (regs)
                        show_regs(regs);
                else
                        dump_stack();
                printk_cpu_sync_put_irqrestore(flags);

                if (softlockup_all_cpu_backtrace) {
                        trigger_allbutcpu_cpu_backtrace(smp_processor_id());
                        if (!softlockup_panic)
                                clear_bit_unlock(0, &soft_lockup_nmi_warn);
                }

                add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
                if (softlockup_panic)
                        panic("softlockup: hung tasks");
        }

        return HRTIMER_RESTART;
}

static void watchdog_enable(unsigned int cpu)
{
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
        struct completion *done = this_cpu_ptr(&softlockup_completion);

        WARN_ON_ONCE(cpu != smp_processor_id());

        init_completion(done);
        complete(done);

        /*
         * Start the timer first to prevent the hardlockup watchdog triggering
         * before the timer has a chance to fire.
         */
        hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
        hrtimer->function = watchdog_timer_fn;
        hrtimer_start(hrtimer, ns_to_ktime(sample_period),
                      HRTIMER_MODE_REL_PINNED_HARD);

        /* Initialize timestamp */
        update_touch_ts();
        /* Enable the hardlockup detector */
        if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)
                watchdog_hardlockup_enable(cpu);
}

static void watchdog_disable(unsigned int cpu)
{
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);

        WARN_ON_ONCE(cpu != smp_processor_id());

        /*
         * Disable the hardlockup detector first. That prevents that a large
         * delay between disabling the timer and disabling the hardlockup
         * detector causes a false positive.
         */
        watchdog_hardlockup_disable(cpu);
        hrtimer_cancel(hrtimer);
        wait_for_completion(this_cpu_ptr(&softlockup_completion));
}

static int softlockup_stop_fn(void *data)
{
        watchdog_disable(smp_processor_id());
        return 0;
}

static void softlockup_stop_all(void)
{
        int cpu;

        if (!softlockup_initialized)
                return;

        for_each_cpu(cpu, &watchdog_allowed_mask)
                smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);

        cpumask_clear(&watchdog_allowed_mask);
}

static int softlockup_start_fn(void *data)
{
        watchdog_enable(smp_processor_id());
        return 0;
}

static void softlockup_start_all(void)
{
        int cpu;

        cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
        for_each_cpu(cpu, &watchdog_allowed_mask)
                smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
}

int lockup_detector_online_cpu(unsigned int cpu)
{
        if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
                watchdog_enable(cpu);
        return 0;
}

int lockup_detector_offline_cpu(unsigned int cpu)
{
        if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
                watchdog_disable(cpu);
        return 0;
}

static void __lockup_detector_reconfigure(void)
{
        cpus_read_lock();
        watchdog_hardlockup_stop();

        softlockup_stop_all();
        set_sample_period();
        lockup_detector_update_enable();
        if (watchdog_enabled && watchdog_thresh)
                softlockup_start_all();

        watchdog_hardlockup_start();
        cpus_read_unlock();
        /*
         * Must be called outside the cpus locked section to prevent
         * recursive locking in the perf code.
         */
        __lockup_detector_cleanup();
}

void lockup_detector_reconfigure(void)
{
        mutex_lock(&watchdog_mutex);
        __lockup_detector_reconfigure();
        mutex_unlock(&watchdog_mutex);
}

/*
 * Create the watchdog infrastructure and configure the detector(s).
 */
static __init void lockup_detector_setup(void)
{
        /*
         * If sysctl is off and watchdog got disabled on the command line,
         * nothing to do here.
         */
        lockup_detector_update_enable();

        if (!IS_ENABLED(CONFIG_SYSCTL) &&
            !(watchdog_enabled && watchdog_thresh))
                return;

        mutex_lock(&watchdog_mutex);
        __lockup_detector_reconfigure();
        softlockup_initialized = true;
        mutex_unlock(&watchdog_mutex);
}

#else /* CONFIG_SOFTLOCKUP_DETECTOR */
static void __lockup_detector_reconfigure(void)
{
        cpus_read_lock();
        watchdog_hardlockup_stop();
        lockup_detector_update_enable();
        watchdog_hardlockup_start();
        cpus_read_unlock();
}
void lockup_detector_reconfigure(void)
{
        __lockup_detector_reconfigure();
}
static inline void lockup_detector_setup(void)
{
        __lockup_detector_reconfigure();
}
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */

static void __lockup_detector_cleanup(void)
{
        lockdep_assert_held(&watchdog_mutex);
        hardlockup_detector_perf_cleanup();
}

/**
 * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
 *
 * Caller must not hold the cpu hotplug rwsem.
 */
void lockup_detector_cleanup(void)
{
        mutex_lock(&watchdog_mutex);
        __lockup_detector_cleanup();
        mutex_unlock(&watchdog_mutex);
}

/**
 * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
 *
 * Special interface for parisc. It prevents lockup detector warnings from
 * the default pm_poweroff() function which busy loops forever.
 */
void lockup_detector_soft_poweroff(void)
{
        watchdog_enabled = 0;
}

#ifdef CONFIG_SYSCTL

/* Propagate any changes to the watchdog infrastructure */
static void proc_watchdog_update(void)
{
        /* Remove impossible cpus to keep sysctl output clean. */
        cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
        __lockup_detector_reconfigure();
}

/*
 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
 *
 * caller             | table->data points to            | 'which'
 * -------------------|----------------------------------|-------------------------------
 * proc_watchdog      | watchdog_user_enabled            | WATCHDOG_HARDLOCKUP_ENABLED |
 *                    |                                  | WATCHDOG_SOFTOCKUP_ENABLED
 * -------------------|----------------------------------|-------------------------------
 * proc_nmi_watchdog  | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED
 * -------------------|----------------------------------|-------------------------------
 * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED
 */
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
                                void *buffer, size_t *lenp, loff_t *ppos)
{
        int err, old, *param = table->data;

        mutex_lock(&watchdog_mutex);

        if (!write) {
                /*
                 * On read synchronize the userspace interface. This is a
                 * racy snapshot.
                 */
                *param = (watchdog_enabled & which) != 0;
                err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        } else {
                old = READ_ONCE(*param);
                err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
                if (!err && old != READ_ONCE(*param))
                        proc_watchdog_update();
        }
        mutex_unlock(&watchdog_mutex);
        return err;
}

/*
 * /proc/sys/kernel/watchdog
 */
static int proc_watchdog(struct ctl_table *table, int write,
                         void *buffer, size_t *lenp, loff_t *ppos)
{
        return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
                                    WATCHDOG_SOFTOCKUP_ENABLED,
                                    table, write, buffer, lenp, ppos);
}

/*
 * /proc/sys/kernel/nmi_watchdog
 */
static int proc_nmi_watchdog(struct ctl_table *table, int write,
                             void *buffer, size_t *lenp, loff_t *ppos)
{
        if (!watchdog_hardlockup_available && write)
                return -ENOTSUPP;
        return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED,
                                    table, write, buffer, lenp, ppos);
}

#ifdef CONFIG_SOFTLOCKUP_DETECTOR
/*
 * /proc/sys/kernel/soft_watchdog
 */
static int proc_soft_watchdog(struct ctl_table *table, int write,
                              void *buffer, size_t *lenp, loff_t *ppos)
{
        return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
                                    table, write, buffer, lenp, ppos);
}
#endif

/*
 * /proc/sys/kernel/watchdog_thresh
 */
static int proc_watchdog_thresh(struct ctl_table *table, int write,
                                void *buffer, size_t *lenp, loff_t *ppos)
{
        int err, old;

        mutex_lock(&watchdog_mutex);

        old = READ_ONCE(watchdog_thresh);
        err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);

        if (!err && write && old != READ_ONCE(watchdog_thresh))
                proc_watchdog_update();

        mutex_unlock(&watchdog_mutex);
        return err;
}

/*
 * The cpumask is the mask of possible cpus that the watchdog can run
 * on, not the mask of cpus it is actually running on.  This allows the
 * user to specify a mask that will include cpus that have not yet
 * been brought online, if desired.
 */
static int proc_watchdog_cpumask(struct ctl_table *table, int write,
                                 void *buffer, size_t *lenp, loff_t *ppos)
{
        int err;

        mutex_lock(&watchdog_mutex);

        err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
        if (!err && write)
                proc_watchdog_update();

        mutex_unlock(&watchdog_mutex);
        return err;
}

static const int sixty = 60;

static struct ctl_table watchdog_sysctls[] = {
        {
                .procname       = "watchdog",
                .data                = &watchdog_user_enabled,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler   = proc_watchdog,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {
                .procname        = "watchdog_thresh",
                .data                = &watchdog_thresh,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_watchdog_thresh,
                .extra1                = SYSCTL_ZERO,
                .extra2                = (void *)&sixty,
        },
        {
                .procname        = "watchdog_cpumask",
                .data                = &watchdog_cpumask_bits,
                .maxlen                = NR_CPUS,
                .mode                = 0644,
                .proc_handler        = proc_watchdog_cpumask,
        },
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
        {
                .procname       = "soft_watchdog",
                .data                = &watchdog_softlockup_user_enabled,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler   = proc_soft_watchdog,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {
                .procname        = "softlockup_panic",
                .data                = &softlockup_panic,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#ifdef CONFIG_SMP
        {
                .procname        = "softlockup_all_cpu_backtrace",
                .data                = &sysctl_softlockup_all_cpu_backtrace,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#endif /* CONFIG_SMP */
#endif
#ifdef CONFIG_HARDLOCKUP_DETECTOR
        {
                .procname        = "hardlockup_panic",
                .data                = &hardlockup_panic,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#ifdef CONFIG_SMP
        {
                .procname        = "hardlockup_all_cpu_backtrace",
                .data                = &sysctl_hardlockup_all_cpu_backtrace,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#endif /* CONFIG_SMP */
#endif
        {}
};

static struct ctl_table watchdog_hardlockup_sysctl[] = {
        {
                .procname       = "nmi_watchdog",
                .data                = &watchdog_hardlockup_user_enabled,
                .maxlen                = sizeof(int),
                .mode                = 0444,
                .proc_handler   = proc_nmi_watchdog,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {}
};

static void __init watchdog_sysctl_init(void)
{
        register_sysctl_init("kernel", watchdog_sysctls);

        if (watchdog_hardlockup_available)
                watchdog_hardlockup_sysctl[0].mode = 0644;
        register_sysctl_init("kernel", watchdog_hardlockup_sysctl);
}

#else
#define watchdog_sysctl_init() do { } while (0)
#endif /* CONFIG_SYSCTL */

static void __init lockup_detector_delay_init(struct work_struct *work);
static bool allow_lockup_detector_init_retry __initdata;

static struct work_struct detector_work __initdata =
                __WORK_INITIALIZER(detector_work, lockup_detector_delay_init);

static void __init lockup_detector_delay_init(struct work_struct *work)
{
        int ret;

        ret = watchdog_hardlockup_probe();
        if (ret) {
                pr_info("Delayed init of the lockup detector failed: %d\n", ret);
                pr_info("Hard watchdog permanently disabled\n");
                return;
        }

        allow_lockup_detector_init_retry = false;

        watchdog_hardlockup_available = true;
        lockup_detector_setup();
}

/*
 * lockup_detector_retry_init - retry init lockup detector if possible.
 *
 * Retry hardlockup detector init. It is useful when it requires some
 * functionality that has to be initialized later on a particular
 * platform.
 */
void __init lockup_detector_retry_init(void)
{
        /* Must be called before late init calls */
        if (!allow_lockup_detector_init_retry)
                return;

        schedule_work(&detector_work);
}

/*
 * Ensure that optional delayed hardlockup init is proceed before
 * the init code and memory is freed.
 */
static int __init lockup_detector_check(void)
{
        /* Prevent any later retry. */
        allow_lockup_detector_init_retry = false;

        /* Make sure no work is pending. */
        flush_work(&detector_work);

        watchdog_sysctl_init();

        return 0;

}
late_initcall_sync(lockup_detector_check);

void __init lockup_detector_init(void)
{
        if (tick_nohz_full_enabled())
                pr_info("Disabling watchdog on nohz_full cores by default\n");

        cpumask_copy(&watchdog_cpumask,
                     housekeeping_cpumask(HK_TYPE_TIMER));

        if (!watchdog_hardlockup_probe())
                watchdog_hardlockup_available = true;
        else
                allow_lockup_detector_init_retry = true;

        lockup_detector_setup();
}















































  339 
  340 



























   96 




   94 
   16 
   94 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Wrapper functions for accessing the file_struct fd array.
 */

#ifndef __LINUX_FILE_H
#define __LINUX_FILE_H

#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/posix_types.h>
#include <linux/errno.h>
#include <linux/cleanup.h>

struct file;

extern void fput(struct file *);

struct file_operations;
struct task_struct;
struct vfsmount;
struct dentry;
struct inode;
struct path;
extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
        const char *, int flags, const struct file_operations *);
extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *,
        const char *, int flags, const struct file_operations *);
extern struct file *alloc_file_clone(struct file *, int flags,
        const struct file_operations *);

static inline void fput_light(struct file *file, int fput_needed)
{
        if (fput_needed)
                fput(file);
}

struct fd {
        struct file *file;
        unsigned int flags;
};
#define FDPUT_FPUT       1
#define FDPUT_POS_UNLOCK 2

static inline void fdput(struct fd fd)
{
        if (fd.flags & FDPUT_FPUT)
                fput(fd.file);
}

extern struct file *fget(unsigned int fd);
extern struct file *fget_raw(unsigned int fd);
extern struct file *fget_task(struct task_struct *task, unsigned int fd);
extern unsigned long __fdget(unsigned int fd);
extern unsigned long __fdget_raw(unsigned int fd);
extern unsigned long __fdget_pos(unsigned int fd);
extern void __f_unlock_pos(struct file *);

static inline struct fd __to_fd(unsigned long v)
{
        return (struct fd){(struct file *)(v & ~3),v & 3};
}

static inline struct fd fdget(unsigned int fd)
{
        return __to_fd(__fdget(fd));
}

static inline struct fd fdget_raw(unsigned int fd)
{
        return __to_fd(__fdget_raw(fd));
}

static inline struct fd fdget_pos(int fd)
{
        return __to_fd(__fdget_pos(fd));
}

static inline void fdput_pos(struct fd f)
{
        if (f.flags & FDPUT_POS_UNLOCK)
                __f_unlock_pos(f.file);
        fdput(f);
}

DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)

extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
extern void set_close_on_exec(unsigned int fd, int flag);
extern bool get_close_on_exec(unsigned int fd);
extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
extern int get_unused_fd_flags(unsigned flags);
extern void put_unused_fd(unsigned int fd);

DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
             get_unused_fd_flags(flags), unsigned flags)

extern void fd_install(unsigned int fd, struct file *file);

int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);

int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);

extern void flush_delayed_fput(void);
extern void __fput_sync(struct file *);

extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;

#endif /* __LINUX_FILE_H */



























































































































































































































































































































































































































































































































































































































    1 



    1 













    1 











    1 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
// SPDX-License-Identifier: GPL-2.0-only
/***************************************************************************
 *   Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org>  *
 *                                                                         *
 *   Based on Logitech G13 driver (v0.4)                                   *
 *     Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu>   *
 *                                                                         *
 ***************************************************************************/

#include <linux/hid.h>
#include <linux/hid-debug.h>
#include <linux/input.h>
#include "hid-ids.h"

#include <linux/fb.h>
#include <linux/vmalloc.h>

#include <linux/completion.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/string.h>

#include "hid-picolcd.h"


/* Input device
 *
 * The PicoLCD has an IR receiver header, a built-in keypad with 5 keys
 * and header for 4x4 key matrix. The built-in keys are part of the matrix.
 */
static const unsigned short def_keymap[PICOLCD_KEYS] = {
        KEY_RESERVED,        /* none */
        KEY_BACK,        /* col 4 + row 1 */
        KEY_HOMEPAGE,        /* col 3 + row 1 */
        KEY_RESERVED,        /* col 2 + row 1 */
        KEY_RESERVED,        /* col 1 + row 1 */
        KEY_SCROLLUP,        /* col 4 + row 2 */
        KEY_OK,                /* col 3 + row 2 */
        KEY_SCROLLDOWN,        /* col 2 + row 2 */
        KEY_RESERVED,        /* col 1 + row 2 */
        KEY_RESERVED,        /* col 4 + row 3 */
        KEY_RESERVED,        /* col 3 + row 3 */
        KEY_RESERVED,        /* col 2 + row 3 */
        KEY_RESERVED,        /* col 1 + row 3 */
        KEY_RESERVED,        /* col 4 + row 4 */
        KEY_RESERVED,        /* col 3 + row 4 */
        KEY_RESERVED,        /* col 2 + row 4 */
        KEY_RESERVED,        /* col 1 + row 4 */
};


/* Find a given report */
struct hid_report *picolcd_report(int id, struct hid_device *hdev, int dir)
{
        struct list_head *feature_report_list = &hdev->report_enum[dir].report_list;
        struct hid_report *report = NULL;

        list_for_each_entry(report, feature_report_list, list) {
                if (report->id == id)
                        return report;
        }
        hid_warn(hdev, "No report with id 0x%x found\n", id);
        return NULL;
}

/* Submit a report and wait for a reply from device - if device fades away
 * or does not respond in time, return NULL */
struct picolcd_pending *picolcd_send_and_wait(struct hid_device *hdev,
                int report_id, const u8 *raw_data, int size)
{
        struct picolcd_data *data = hid_get_drvdata(hdev);
        struct picolcd_pending *work;
        struct hid_report *report = picolcd_out_report(report_id, hdev);
        unsigned long flags;
        int i, j, k;

        if (!report || !data)
                return NULL;
        if (data->status & PICOLCD_FAILED)
                return NULL;
        work = kzalloc(sizeof(*work), GFP_KERNEL);
        if (!work)
                return NULL;

        init_completion(&work->ready);
        work->out_report = report;
        work->in_report  = NULL;
        work->raw_size   = 0;

        mutex_lock(&data->mutex);
        spin_lock_irqsave(&data->lock, flags);
        for (i = k = 0; i < report->maxfield; i++)
                for (j = 0; j < report->field[i]->report_count; j++) {
                        hid_set_field(report->field[i], j, k < size ? raw_data[k] : 0);
                        k++;
                }
        if (data->status & PICOLCD_FAILED) {
                kfree(work);
                work = NULL;
        } else {
                data->pending = work;
                hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
                spin_unlock_irqrestore(&data->lock, flags);
                wait_for_completion_interruptible_timeout(&work->ready, HZ*2);
                spin_lock_irqsave(&data->lock, flags);
                data->pending = NULL;
        }
        spin_unlock_irqrestore(&data->lock, flags);
        mutex_unlock(&data->mutex);
        return work;
}

/*
 * input class device
 */
static int picolcd_raw_keypad(struct picolcd_data *data,
                struct hid_report *report, u8 *raw_data, int size)
{
        /*
         * Keypad event
         * First and second data bytes list currently pressed keys,
         * 0x00 means no key and at most 2 keys may be pressed at same time
         */
        int i, j;

        /* determine newly pressed keys */
        for (i = 0; i < size; i++) {
                unsigned int key_code;
                if (raw_data[i] == 0)
                        continue;
                for (j = 0; j < sizeof(data->pressed_keys); j++)
                        if (data->pressed_keys[j] == raw_data[i])
                                goto key_already_down;
                for (j = 0; j < sizeof(data->pressed_keys); j++)
                        if (data->pressed_keys[j] == 0) {
                                data->pressed_keys[j] = raw_data[i];
                                break;
                        }
                input_event(data->input_keys, EV_MSC, MSC_SCAN, raw_data[i]);
                if (raw_data[i] < PICOLCD_KEYS)
                        key_code = data->keycode[raw_data[i]];
                else
                        key_code = KEY_UNKNOWN;
                if (key_code != KEY_UNKNOWN) {
                        dbg_hid(PICOLCD_NAME " got key press for %u:%d",
                                        raw_data[i], key_code);
                        input_report_key(data->input_keys, key_code, 1);
                }
                input_sync(data->input_keys);
key_already_down:
                continue;
        }

        /* determine newly released keys */
        for (j = 0; j < sizeof(data->pressed_keys); j++) {
                unsigned int key_code;
                if (data->pressed_keys[j] == 0)
                        continue;
                for (i = 0; i < size; i++)
                        if (data->pressed_keys[j] == raw_data[i])
                                goto key_still_down;
                input_event(data->input_keys, EV_MSC, MSC_SCAN, data->pressed_keys[j]);
                if (data->pressed_keys[j] < PICOLCD_KEYS)
                        key_code = data->keycode[data->pressed_keys[j]];
                else
                        key_code = KEY_UNKNOWN;
                if (key_code != KEY_UNKNOWN) {
                        dbg_hid(PICOLCD_NAME " got key release for %u:%d",
                                        data->pressed_keys[j], key_code);
                        input_report_key(data->input_keys, key_code, 0);
                }
                input_sync(data->input_keys);
                data->pressed_keys[j] = 0;
key_still_down:
                continue;
        }
        return 1;
}

static int picolcd_check_version(struct hid_device *hdev)
{
        struct picolcd_data *data = hid_get_drvdata(hdev);
        struct picolcd_pending *verinfo;
        int ret = 0;

        if (!data)
                return -ENODEV;

        verinfo = picolcd_send_and_wait(hdev, REPORT_VERSION, NULL, 0);
        if (!verinfo) {
                hid_err(hdev, "no version response from PicoLCD\n");
                return -ENODEV;
        }

        if (verinfo->raw_size == 2) {
                data->version[0] = verinfo->raw_data[1];
                data->version[1] = verinfo->raw_data[0];
                if (data->status & PICOLCD_BOOTLOADER) {
                        hid_info(hdev, "PicoLCD, bootloader version %d.%d\n",
                                 verinfo->raw_data[1], verinfo->raw_data[0]);
                } else {
                        hid_info(hdev, "PicoLCD, firmware version %d.%d\n",
                                 verinfo->raw_data[1], verinfo->raw_data[0]);
                }
        } else {
                hid_err(hdev, "confused, got unexpected version response from PicoLCD\n");
                ret = -EINVAL;
        }
        kfree(verinfo);
        return ret;
}

/*
 * Reset our device and wait for answer to VERSION request
 */
int picolcd_reset(struct hid_device *hdev)
{
        struct picolcd_data *data = hid_get_drvdata(hdev);
        struct hid_report *report = picolcd_out_report(REPORT_RESET, hdev);
        unsigned long flags;
        int error;

        if (!data || !report || report->maxfield != 1)
                return -ENODEV;

        spin_lock_irqsave(&data->lock, flags);
        if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER)
                data->status |= PICOLCD_BOOTLOADER;

        /* perform the reset */
        hid_set_field(report->field[0], 0, 1);
        if (data->status & PICOLCD_FAILED) {
                spin_unlock_irqrestore(&data->lock, flags);
                return -ENODEV;
        }
        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);

        error = picolcd_check_version(hdev);
        if (error)
                return error;

        picolcd_resume_lcd(data);
        picolcd_resume_backlight(data);
        picolcd_fb_refresh(data);
        picolcd_leds_set(data);
        return 0;
}

/*
 * The "operation_mode" sysfs attribute
 */
static ssize_t picolcd_operation_mode_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct picolcd_data *data = dev_get_drvdata(dev);

        if (data->status & PICOLCD_BOOTLOADER)
                return snprintf(buf, PAGE_SIZE, "[bootloader] lcd\n");
        else
                return snprintf(buf, PAGE_SIZE, "bootloader [lcd]\n");
}

static ssize_t picolcd_operation_mode_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct picolcd_data *data = dev_get_drvdata(dev);
        struct hid_report *report = NULL;
        int timeout = data->opmode_delay;
        unsigned long flags;

        if (sysfs_streq(buf, "lcd")) {
                if (data->status & PICOLCD_BOOTLOADER)
                        report = picolcd_out_report(REPORT_EXIT_FLASHER, data->hdev);
        } else if (sysfs_streq(buf, "bootloader")) {
                if (!(data->status & PICOLCD_BOOTLOADER))
                        report = picolcd_out_report(REPORT_EXIT_KEYBOARD, data->hdev);
        } else {
                return -EINVAL;
        }

        if (!report || report->maxfield != 1)
                return -EINVAL;

        spin_lock_irqsave(&data->lock, flags);
        hid_set_field(report->field[0], 0, timeout & 0xff);
        hid_set_field(report->field[0], 1, (timeout >> 8) & 0xff);
        hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT);
        spin_unlock_irqrestore(&data->lock, flags);
        return count;
}

static DEVICE_ATTR(operation_mode, 0644, picolcd_operation_mode_show,
                picolcd_operation_mode_store);

/*
 * The "operation_mode_delay" sysfs attribute
 */
static ssize_t picolcd_operation_mode_delay_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct picolcd_data *data = dev_get_drvdata(dev);

        return snprintf(buf, PAGE_SIZE, "%hu\n", data->opmode_delay);
}

static ssize_t picolcd_operation_mode_delay_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct picolcd_data *data = dev_get_drvdata(dev);
        unsigned u;
        if (sscanf(buf, "%u", &u) != 1)
                return -EINVAL;
        if (u > 30000)
                return -EINVAL;
        else
                data->opmode_delay = u;
        return count;
}

static DEVICE_ATTR(operation_mode_delay, 0644, picolcd_operation_mode_delay_show,
                picolcd_operation_mode_delay_store);

/*
 * Handle raw report as sent by device
 */
static int picolcd_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *raw_data, int size)
{
        struct picolcd_data *data = hid_get_drvdata(hdev);
        unsigned long flags;

        if (!data)
                return 1;

        if (size > 64) {
                hid_warn(hdev, "invalid size value (%d) for picolcd raw event (%d)\n",
                                size, report->id);
                return 0;
        }

        if (report->id == REPORT_KEY_STATE) {
                if (data->input_keys)
                        picolcd_raw_keypad(data, report, raw_data+1, size-1);
        } else if (report->id == REPORT_IR_DATA) {
                picolcd_raw_cir(data, report, raw_data+1, size-1);
        } else {
                spin_lock_irqsave(&data->lock, flags);
                /*
                 * We let the caller of picolcd_send_and_wait() check if the
                 * report we got is one of the expected ones or not.
                 */
                if (data->pending) {
                        memcpy(data->pending->raw_data, raw_data+1, size-1);
                        data->pending->raw_size  = size-1;
                        data->pending->in_report = report;
                        complete(&data->pending->ready);
                }
                spin_unlock_irqrestore(&data->lock, flags);
        }

        picolcd_debug_raw_event(data, hdev, report, raw_data, size);
        return 1;
}

#ifdef CONFIG_PM
static int picolcd_suspend(struct hid_device *hdev, pm_message_t message)
{
        if (PMSG_IS_AUTO(message))
                return 0;

        picolcd_suspend_backlight(hid_get_drvdata(hdev));
        dbg_hid(PICOLCD_NAME " device ready for suspend\n");
        return 0;
}

static int picolcd_resume(struct hid_device *hdev)
{
        int ret;
        ret = picolcd_resume_backlight(hid_get_drvdata(hdev));
        if (ret)
                dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret);
        return 0;
}

static int picolcd_reset_resume(struct hid_device *hdev)
{
        int ret;
        ret = picolcd_reset(hdev);
        if (ret)
                dbg_hid(PICOLCD_NAME " resetting our device failed: %d\n", ret);
        ret = picolcd_fb_reset(hid_get_drvdata(hdev), 0);
        if (ret)
                dbg_hid(PICOLCD_NAME " restoring framebuffer content failed: %d\n", ret);
        ret = picolcd_resume_lcd(hid_get_drvdata(hdev));
        if (ret)
                dbg_hid(PICOLCD_NAME " restoring lcd failed: %d\n", ret);
        ret = picolcd_resume_backlight(hid_get_drvdata(hdev));
        if (ret)
                dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret);
        picolcd_leds_set(hid_get_drvdata(hdev));
        return 0;
}
#endif

/* initialize keypad input device */
static int picolcd_init_keys(struct picolcd_data *data,
                struct hid_report *report)
{
        struct hid_device *hdev = data->hdev;
        struct input_dev *idev;
        int error, i;

        if (!report)
                return -ENODEV;
        if (report->maxfield != 1 || report->field[0]->report_count != 2 ||
                        report->field[0]->report_size != 8) {
                hid_err(hdev, "unsupported KEY_STATE report\n");
                return -EINVAL;
        }

        idev = input_allocate_device();
        if (idev == NULL) {
                hid_err(hdev, "failed to allocate input device\n");
                return -ENOMEM;
        }
        input_set_drvdata(idev, hdev);
        memcpy(data->keycode, def_keymap, sizeof(def_keymap));
        idev->name = hdev->name;
        idev->phys = hdev->phys;
        idev->uniq = hdev->uniq;
        idev->id.bustype = hdev->bus;
        idev->id.vendor  = hdev->vendor;
        idev->id.product = hdev->product;
        idev->id.version = hdev->version;
        idev->dev.parent = &hdev->dev;
        idev->keycode     = &data->keycode;
        idev->keycodemax  = PICOLCD_KEYS;
        idev->keycodesize = sizeof(data->keycode[0]);
        input_set_capability(idev, EV_MSC, MSC_SCAN);
        set_bit(EV_REP, idev->evbit);
        for (i = 0; i < PICOLCD_KEYS; i++)
                input_set_capability(idev, EV_KEY, data->keycode[i]);
        error = input_register_device(idev);
        if (error) {
                hid_err(hdev, "error registering the input device\n");
                input_free_device(idev);
                return error;
        }
        data->input_keys = idev;
        return 0;
}

static void picolcd_exit_keys(struct picolcd_data *data)
{
        struct input_dev *idev = data->input_keys;

        data->input_keys = NULL;
        if (idev)
                input_unregister_device(idev);
}

static int picolcd_probe_lcd(struct hid_device *hdev, struct picolcd_data *data)
{
        int error;

        /* Setup keypad input device */
        error = picolcd_init_keys(data, picolcd_in_report(REPORT_KEY_STATE, hdev));
        if (error)
                goto err;

        /* Setup CIR input device */
        error = picolcd_init_cir(data, picolcd_in_report(REPORT_IR_DATA, hdev));
        if (error)
                goto err;

        /* Set up the framebuffer device */
        error = picolcd_init_framebuffer(data);
        if (error)
                goto err;

        /* Setup lcd class device */
        error = picolcd_init_lcd(data, picolcd_out_report(REPORT_CONTRAST, hdev));
        if (error)
                goto err;

        /* Setup backlight class device */
        error = picolcd_init_backlight(data, picolcd_out_report(REPORT_BRIGHTNESS, hdev));
        if (error)
                goto err;

        /* Setup the LED class devices */
        error = picolcd_init_leds(data, picolcd_out_report(REPORT_LED_STATE, hdev));
        if (error)
                goto err;

        picolcd_init_devfs(data, picolcd_out_report(REPORT_EE_READ, hdev),
                        picolcd_out_report(REPORT_EE_WRITE, hdev),
                        picolcd_out_report(REPORT_READ_MEMORY, hdev),
                        picolcd_out_report(REPORT_WRITE_MEMORY, hdev),
                        picolcd_out_report(REPORT_RESET, hdev));
        return 0;
err:
        picolcd_exit_leds(data);
        picolcd_exit_backlight(data);
        picolcd_exit_lcd(data);
        picolcd_exit_framebuffer(data);
        picolcd_exit_cir(data);
        picolcd_exit_keys(data);
        return error;
}

static int picolcd_probe_bootloader(struct hid_device *hdev, struct picolcd_data *data)
{
        picolcd_init_devfs(data, NULL, NULL,
                        picolcd_out_report(REPORT_BL_READ_MEMORY, hdev),
                        picolcd_out_report(REPORT_BL_WRITE_MEMORY, hdev), NULL);
        return 0;
}

static int picolcd_probe(struct hid_device *hdev,
                     const struct hid_device_id *id)
{
        struct picolcd_data *data;
        int error = -ENOMEM;

        dbg_hid(PICOLCD_NAME " hardware probe...\n");

        /*
         * Let's allocate the picolcd data structure, set some reasonable
         * defaults, and associate it with the device
         */
        data = kzalloc(sizeof(struct picolcd_data), GFP_KERNEL);
        if (data == NULL) {
                hid_err(hdev, "can't allocate space for Minibox PicoLCD device data\n");
                return -ENOMEM;
        }

        spin_lock_init(&data->lock);
        mutex_init(&data->mutex);
        data->hdev = hdev;
        data->opmode_delay = 5000;
        if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER)
                data->status |= PICOLCD_BOOTLOADER;
        hid_set_drvdata(hdev, data);

        /* Parse the device reports and start it up */
        error = hid_parse(hdev);
        if (error) {
                hid_err(hdev, "device report parse failed\n");
                goto err_cleanup_data;
        }

        error = hid_hw_start(hdev, 0);
        if (error) {
                hid_err(hdev, "hardware start failed\n");
                goto err_cleanup_data;
        }

        error = hid_hw_open(hdev);
        if (error) {
                hid_err(hdev, "failed to open input interrupt pipe for key and IR events\n");
                goto err_cleanup_hid_hw;
        }

        error = device_create_file(&hdev->dev, &dev_attr_operation_mode_delay);
        if (error) {
                hid_err(hdev, "failed to create sysfs attributes\n");
                goto err_cleanup_hid_ll;
        }

        error = device_create_file(&hdev->dev, &dev_attr_operation_mode);
        if (error) {
                hid_err(hdev, "failed to create sysfs attributes\n");
                goto err_cleanup_sysfs1;
        }

        if (data->status & PICOLCD_BOOTLOADER)
                error = picolcd_probe_bootloader(hdev, data);
        else
                error = picolcd_probe_lcd(hdev, data);
        if (error)
                goto err_cleanup_sysfs2;

        dbg_hid(PICOLCD_NAME " activated and initialized\n");
        return 0;

err_cleanup_sysfs2:
        device_remove_file(&hdev->dev, &dev_attr_operation_mode);
err_cleanup_sysfs1:
        device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay);
err_cleanup_hid_ll:
        hid_hw_close(hdev);
err_cleanup_hid_hw:
        hid_hw_stop(hdev);
err_cleanup_data:
        kfree(data);
        return error;
}

static void picolcd_remove(struct hid_device *hdev)
{
        struct picolcd_data *data = hid_get_drvdata(hdev);
        unsigned long flags;

        dbg_hid(PICOLCD_NAME " hardware remove...\n");
        spin_lock_irqsave(&data->lock, flags);
        data->status |= PICOLCD_FAILED;
        spin_unlock_irqrestore(&data->lock, flags);

        picolcd_exit_devfs(data);
        device_remove_file(&hdev->dev, &dev_attr_operation_mode);
        device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay);
        hid_hw_close(hdev);
        hid_hw_stop(hdev);

        /* Shortcut potential pending reply that will never arrive */
        spin_lock_irqsave(&data->lock, flags);
        if (data->pending)
                complete(&data->pending->ready);
        spin_unlock_irqrestore(&data->lock, flags);

        /* Cleanup LED */
        picolcd_exit_leds(data);
        /* Clean up the framebuffer */
        picolcd_exit_backlight(data);
        picolcd_exit_lcd(data);
        picolcd_exit_framebuffer(data);
        /* Cleanup input */
        picolcd_exit_cir(data);
        picolcd_exit_keys(data);

        mutex_destroy(&data->mutex);
        /* Finally, clean up the picolcd data itself */
        kfree(data);
}

static const struct hid_device_id picolcd_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
        { }
};
MODULE_DEVICE_TABLE(hid, picolcd_devices);

static struct hid_driver picolcd_driver = {
        .name =          "hid-picolcd",
        .id_table =      picolcd_devices,
        .probe =         picolcd_probe,
        .remove =        picolcd_remove,
        .raw_event =     picolcd_raw_event,
#ifdef CONFIG_PM
        .suspend =       picolcd_suspend,
        .resume =        picolcd_resume,
        .reset_resume =  picolcd_reset_resume,
#endif
};
module_hid_driver(picolcd_driver);

MODULE_DESCRIPTION("Minibox graphics PicoLCD Driver");
MODULE_LICENSE("GPL v2");





































































































































































































































































































































    1 
    1 





    1 





























































    1 




    1 

    1 
    1 








    1 


    1 









    1 
    1 
    1 








































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * usbusx2y.c - ALSA USB US-428 Driver
 *
2005-04-14 Karsten Wiese
        Version 0.8.7.2:
        Call snd_card_free() instead of snd_card_free_in_thread() to prevent oops with dead keyboard symptom.
        Tested ok with kernel 2.6.12-rc2.

2004-12-14 Karsten Wiese
        Version 0.8.7.1:
        snd_pcm_open for rawusb pcm-devices now returns -EBUSY if called without rawusb's hwdep device being open.

2004-12-02 Karsten Wiese
        Version 0.8.7:
        Use macro usb_maxpacket() for portability.

2004-10-26 Karsten Wiese
        Version 0.8.6:
        wake_up() process waiting in usx2y_urbs_start() on error.

2004-10-21 Karsten Wiese
        Version 0.8.5:
        nrpacks is runtime or compiletime configurable now with tested values from 1 to 4.

2004-10-03 Karsten Wiese
        Version 0.8.2:
        Avoid any possible racing while in prepare callback.

2004-09-30 Karsten Wiese
        Version 0.8.0:
        Simplified things and made ohci work again.

2004-09-20 Karsten Wiese
        Version 0.7.3:
        Use usb_kill_urb() instead of deprecated (kernel 2.6.9) usb_unlink_urb().

2004-07-13 Karsten Wiese
        Version 0.7.1:
        Don't sleep in START/STOP callbacks anymore.
        us428 channels C/D not handled just for this version, sorry.

2004-06-21 Karsten Wiese
        Version 0.6.4:
        Temporarely suspend midi input
        to sanely call usb_set_interface() when setting format.

2004-06-12 Karsten Wiese
        Version 0.6.3:
        Made it thus the following rule is enforced:
        "All pcm substreams of one usx2y have to operate at the same rate & format."

2004-04-06 Karsten Wiese
        Version 0.6.0:
        Runs on 2.6.5 kernel without any "--with-debug=" things.
        us224 reported running.

2004-01-14 Karsten Wiese
        Version 0.5.1:
        Runs with 2.6.1 kernel.

2003-12-30 Karsten Wiese
        Version 0.4.1:
        Fix 24Bit 4Channel capturing for the us428.

2003-11-27 Karsten Wiese, Martin Langer
        Version 0.4:
        us122 support.
        us224 could be tested by uncommenting the sections containing USB_ID_US224

2003-11-03 Karsten Wiese
        Version 0.3:
        24Bit support.
        "arecord -D hw:1 -c 2 -r 48000 -M -f S24_3LE|aplay -D hw:1 -c 2 -r 48000 -M -f S24_3LE" works.

2003-08-22 Karsten Wiese
        Version 0.0.8:
        Removed EZUSB Firmware. First Stage Firmwaredownload is now done by tascam-firmware downloader.
        See:
        http://usb-midi-fw.sourceforge.net/tascam-firmware.tar.gz

2003-06-18 Karsten Wiese
        Version 0.0.5:
        changed to compile with kernel 2.4.21 and alsa 0.9.4

2002-10-16 Karsten Wiese
        Version 0.0.4:
        compiles again with alsa-current.
        USB_ISO_ASAP not used anymore (most of the time), instead
        urb->start_frame is calculated here now, some calls inside usb-driver don't need to happen anymore.

        To get the best out of this:
        Disable APM-support in the kernel as APM-BIOS calls (once each second) hard disable interrupt for many precious milliseconds.
        This helped me much on my slowish PII 400 & PIII 500.
        ACPI yet untested but might cause the same bad behaviour.
        Use a kernel with lowlatency and preemptiv patches applied.
        To autoload snd-usb-midi append a line
                post-install snd-usb-us428 modprobe snd-usb-midi
        to /etc/modules.conf.

        known problems:
        sliders, knobs, lights not yet handled except MASTER Volume slider.
        "pcm -c 2" doesn't work. "pcm -c 2 -m direct_interleaved" does.
        KDE3: "Enable full duplex operation" deadlocks.

2002-08-31 Karsten Wiese
        Version 0.0.3: audio also simplex;
        simplifying: iso urbs only 1 packet, melted structs.
        ASYNC_UNLINK not used anymore: no more crashes so far.....
        for alsa 0.9 rc3.

2002-08-09 Karsten Wiese
        Version 0.0.2: midi works with snd-usb-midi, audio (only fullduplex now) with i.e. bristol.
        The firmware has been sniffed from win2k us-428 driver 3.09.

 *   Copyright (c) 2002 - 2004 Karsten Wiese
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/usb.h>
#include <sound/core.h>
#include <sound/initval.h>
#include <sound/pcm.h>

#include <sound/rawmidi.h>
#include "usx2y.h"
#include "usbusx2y.h"
#include "usX2Yhwdep.h"

MODULE_AUTHOR("Karsten Wiese <annabellesgarden@yahoo.de>");
MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2");
MODULE_LICENSE("GPL");

static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */
static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */
static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card */

module_param_array(index, int, NULL, 0444);
MODULE_PARM_DESC(index, "Index value for "NAME_ALLCAPS".");
module_param_array(id, charp, NULL, 0444);
MODULE_PARM_DESC(id, "ID string for "NAME_ALLCAPS".");
module_param_array(enable, bool, NULL, 0444);
MODULE_PARM_DESC(enable, "Enable "NAME_ALLCAPS".");

static int snd_usx2y_card_used[SNDRV_CARDS];

static void snd_usx2y_card_private_free(struct snd_card *card);
static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s);

/*
 * pipe 4 is used for switching the lamps, setting samplerate, volumes ....
 */
static void i_usx2y_out04_int(struct urb *urb)
{
#ifdef CONFIG_SND_DEBUG
        if (urb->status) {
                int i;
                struct usx2ydev *usx2y = urb->context;

                for (i = 0; i < 10 && usx2y->as04.urb[i] != urb; i++)
                        ;
                snd_printdd("%s urb %i status=%i\n", __func__, i, urb->status);
        }
#endif
}

static void i_usx2y_in04_int(struct urb *urb)
{
        int                        err = 0;
        struct usx2ydev                *usx2y = urb->context;
        struct us428ctls_sharedmem        *us428ctls = usx2y->us428ctls_sharedmem;
        struct us428_p4out *p4out;
        int i, j, n, diff, send;

        usx2y->in04_int_calls++;

        if (urb->status) {
                snd_printdd("Interrupt Pipe 4 came back with status=%i\n", urb->status);
                return;
        }

        //        printk("%i:0x%02X ", 8, (int)((unsigned char*)usx2y->in04_buf)[8]); Master volume shows 0 here if fader is at max during boot ?!?
        if (us428ctls) {
                diff = -1;
                if (us428ctls->ctl_snapshot_last == -2) {
                        diff = 0;
                        memcpy(usx2y->in04_last, usx2y->in04_buf, sizeof(usx2y->in04_last));
                        us428ctls->ctl_snapshot_last = -1;
                } else {
                        for (i = 0; i < 21; i++) {
                                if (usx2y->in04_last[i] != ((char *)usx2y->in04_buf)[i]) {
                                        if (diff < 0)
                                                diff = i;
                                        usx2y->in04_last[i] = ((char *)usx2y->in04_buf)[i];
                                }
                        }
                }
                if (diff >= 0) {
                        n = us428ctls->ctl_snapshot_last + 1;
                        if (n >= N_US428_CTL_BUFS || n < 0)
                                n = 0;
                        memcpy(us428ctls->ctl_snapshot + n, usx2y->in04_buf, sizeof(us428ctls->ctl_snapshot[0]));
                        us428ctls->ctl_snapshot_differs_at[n] = diff;
                        us428ctls->ctl_snapshot_last = n;
                        wake_up(&usx2y->us428ctls_wait_queue_head);
                }
        }

        if (usx2y->us04) {
                if (!usx2y->us04->submitted) {
                        do {
                                err = usb_submit_urb(usx2y->us04->urb[usx2y->us04->submitted++], GFP_ATOMIC);
                        } while (!err && usx2y->us04->submitted < usx2y->us04->len);
                }
        } else {
                if (us428ctls && us428ctls->p4out_last >= 0 && us428ctls->p4out_last < N_US428_P4OUT_BUFS) {
                        if (us428ctls->p4out_last != us428ctls->p4out_sent) {
                                send = us428ctls->p4out_sent + 1;
                                if (send >= N_US428_P4OUT_BUFS)
                                        send = 0;
                                for (j = 0; j < URBS_ASYNC_SEQ && !err; ++j) {
                                        if (!usx2y->as04.urb[j]->status) {
                                                p4out = us428ctls->p4out + send;        // FIXME if more than 1 p4out is new, 1 gets lost.
                                                usb_fill_bulk_urb(usx2y->as04.urb[j], usx2y->dev,
                                                                  usb_sndbulkpipe(usx2y->dev, 0x04), &p4out->val.vol,
                                                                  p4out->type == ELT_LIGHT ? sizeof(struct us428_lights) : 5,
                                                                  i_usx2y_out04_int, usx2y);
                                                err = usb_submit_urb(usx2y->as04.urb[j], GFP_ATOMIC);
                                                us428ctls->p4out_sent = send;
                                                break;
                                        }
                                }
                        }
                }
        }

        if (err)
                snd_printk(KERN_ERR "in04_int() usb_submit_urb err=%i\n", err);

        urb->dev = usx2y->dev;
        usb_submit_urb(urb, GFP_ATOMIC);
}

/*
 * Prepare some urbs
 */
int usx2y_async_seq04_init(struct usx2ydev *usx2y)
{
        int        err = 0, i;

        if (WARN_ON(usx2y->as04.buffer))
                return -EBUSY;

        usx2y->as04.buffer = kmalloc_array(URBS_ASYNC_SEQ,
                                           URB_DATA_LEN_ASYNC_SEQ, GFP_KERNEL);
        if (!usx2y->as04.buffer) {
                err = -ENOMEM;
        } else {
                for (i = 0; i < URBS_ASYNC_SEQ; ++i) {
                        usx2y->as04.urb[i] = usb_alloc_urb(0, GFP_KERNEL);
                        if (!usx2y->as04.urb[i]) {
                                err = -ENOMEM;
                                break;
                        }
                        usb_fill_bulk_urb(usx2y->as04.urb[i], usx2y->dev,
                                          usb_sndbulkpipe(usx2y->dev, 0x04),
                                          usx2y->as04.buffer + URB_DATA_LEN_ASYNC_SEQ * i, 0,
                                          i_usx2y_out04_int, usx2y);
                        err = usb_urb_ep_type_check(usx2y->as04.urb[i]);
                        if (err < 0)
                                break;
                }
        }
        if (err)
                usx2y_unlinkseq(&usx2y->as04);
        return err;
}

int usx2y_in04_init(struct usx2ydev *usx2y)
{
        int err;

        if (WARN_ON(usx2y->in04_urb))
                return -EBUSY;

        usx2y->in04_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!usx2y->in04_urb) {
                err = -ENOMEM;
                goto error;
        }

        usx2y->in04_buf = kmalloc(21, GFP_KERNEL);
        if (!usx2y->in04_buf) {
                err = -ENOMEM;
                goto error;
        }

        init_waitqueue_head(&usx2y->in04_wait_queue);
        usb_fill_int_urb(usx2y->in04_urb, usx2y->dev, usb_rcvintpipe(usx2y->dev, 0x4),
                         usx2y->in04_buf, 21,
                         i_usx2y_in04_int, usx2y,
                         10);
        if (usb_urb_ep_type_check(usx2y->in04_urb)) {
                err = -EINVAL;
                goto error;
        }
        return usb_submit_urb(usx2y->in04_urb, GFP_KERNEL);

 error:
        kfree(usx2y->in04_buf);
        usb_free_urb(usx2y->in04_urb);
        usx2y->in04_buf = NULL;
        usx2y->in04_urb = NULL;
        return err;
}

static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s)
{
        int        i;

        for (i = 0; i < URBS_ASYNC_SEQ; ++i) {
                if (!s->urb[i])
                        continue;
                usb_kill_urb(s->urb[i]);
                usb_free_urb(s->urb[i]);
                s->urb[i] = NULL;
        }
        kfree(s->buffer);
        s->buffer = NULL;
}

static const struct usb_device_id snd_usx2y_usb_id_table[] = {
        {
                .match_flags =        USB_DEVICE_ID_MATCH_DEVICE,
                .idVendor =        0x1604,
                .idProduct =        USB_ID_US428
        },
        {
                .match_flags =        USB_DEVICE_ID_MATCH_DEVICE,
                .idVendor =        0x1604,
                .idProduct =        USB_ID_US122
        },
        {
                .match_flags =        USB_DEVICE_ID_MATCH_DEVICE,
                .idVendor =        0x1604,
                .idProduct =        USB_ID_US224
        },
        { /* terminator */ }
};
MODULE_DEVICE_TABLE(usb, snd_usx2y_usb_id_table);

static int usx2y_create_card(struct usb_device *device,
                             struct usb_interface *intf,
                             struct snd_card **cardp)
{
        int                dev;
        struct snd_card *card;
        int err;

        for (dev = 0; dev < SNDRV_CARDS; ++dev)
                if (enable[dev] && !snd_usx2y_card_used[dev])
                        break;
        if (dev >= SNDRV_CARDS)
                return -ENODEV;
        err = snd_card_new(&intf->dev, index[dev], id[dev], THIS_MODULE,
                           sizeof(struct usx2ydev), &card);
        if (err < 0)
                return err;
        snd_usx2y_card_used[usx2y(card)->card_index = dev] = 1;
        card->private_free = snd_usx2y_card_private_free;
        usx2y(card)->dev = device;
        init_waitqueue_head(&usx2y(card)->prepare_wait_queue);
        init_waitqueue_head(&usx2y(card)->us428ctls_wait_queue_head);
        mutex_init(&usx2y(card)->pcm_mutex);
        INIT_LIST_HEAD(&usx2y(card)->midi_list);
        strcpy(card->driver, "USB "NAME_ALLCAPS"");
        sprintf(card->shortname, "TASCAM "NAME_ALLCAPS"");
        sprintf(card->longname, "%s (%x:%x if %d at %03d/%03d)",
                card->shortname,
                le16_to_cpu(device->descriptor.idVendor),
                le16_to_cpu(device->descriptor.idProduct),
                0,//us428(card)->usbmidi.ifnum,
                usx2y(card)->dev->bus->busnum, usx2y(card)->dev->devnum);
        *cardp = card;
        return 0;
}

static void snd_usx2y_card_private_free(struct snd_card *card)
{
        struct usx2ydev *usx2y = usx2y(card);

        kfree(usx2y->in04_buf);
        usb_free_urb(usx2y->in04_urb);
        if (usx2y->us428ctls_sharedmem)
                free_pages_exact(usx2y->us428ctls_sharedmem,
                                 US428_SHAREDMEM_PAGES);
        if (usx2y->card_index >= 0 && usx2y->card_index < SNDRV_CARDS)
                snd_usx2y_card_used[usx2y->card_index] = 0;
}

static void snd_usx2y_disconnect(struct usb_interface *intf)
{
        struct snd_card *card;
        struct usx2ydev *usx2y;
        struct list_head *p;

        card = usb_get_intfdata(intf);
        if (!card)
                return;
        usx2y = usx2y(card);
        usx2y->chip_status = USX2Y_STAT_CHIP_HUP;
        usx2y_unlinkseq(&usx2y->as04);
        usb_kill_urb(usx2y->in04_urb);
        snd_card_disconnect(card);

        /* release the midi resources */
        list_for_each(p, &usx2y->midi_list) {
                snd_usbmidi_disconnect(p);
        }
        if (usx2y->us428ctls_sharedmem)
                wake_up(&usx2y->us428ctls_wait_queue_head);
        snd_card_free(card);
}

static int snd_usx2y_probe(struct usb_interface *intf,
                           const struct usb_device_id *id)
{
        struct usb_device *device = interface_to_usbdev(intf);
        struct snd_card *card;
        int err;

        if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 ||
            (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 &&
             le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 &&
             le16_to_cpu(device->descriptor.idProduct) != USB_ID_US428))
                return -EINVAL;

        err = usx2y_create_card(device, intf, &card);
        if (err < 0)
                return err;
        err = usx2y_hwdep_new(card, device);
        if (err < 0)
                goto error;
        err = snd_card_register(card);
        if (err < 0)
                goto error;

        dev_set_drvdata(&intf->dev, card);
        return 0;

 error:
        snd_card_free(card);
        return err;
}

static struct usb_driver snd_usx2y_usb_driver = {
        .name =                "snd-usb-usx2y",
        .probe =        snd_usx2y_probe,
        .disconnect =        snd_usx2y_disconnect,
        .id_table =        snd_usx2y_usb_id_table,
};
module_usb_driver(snd_usx2y_usb_driver);



























































































































































































































































































    2 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
// SPDX-License-Identifier: GPL-2.0-only
/*
 * driver for the i2c-tiny-usb adapter - 1.0
 * http://www.harbaum.org/till/i2c_tiny_usb
 *
 * Copyright (C) 2006-2007 Till Harbaum (Till@Harbaum.org)
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>

/* include interfaces to usb layer */
#include <linux/usb.h>

/* include interface to i2c layer */
#include <linux/i2c.h>

/* commands via USB, must match command ids in the firmware */
#define CMD_ECHO                0
#define CMD_GET_FUNC                1
#define CMD_SET_DELAY                2
#define CMD_GET_STATUS                3

#define CMD_I2C_IO                4
#define CMD_I2C_IO_BEGIN        (1<<0)
#define CMD_I2C_IO_END                (1<<1)

/* i2c bit delay, default is 10us -> 100kHz max
   (in practice, due to additional delays in the i2c bitbanging
   code this results in a i2c clock of about 50kHz) */
static unsigned short delay = 10;
module_param(delay, ushort, 0);
MODULE_PARM_DESC(delay, "bit delay in microseconds "
                 "(default is 10us for 100kHz max)");

static int usb_read(struct i2c_adapter *adapter, int cmd,
                    int value, int index, void *data, int len);

static int usb_write(struct i2c_adapter *adapter, int cmd,
                     int value, int index, void *data, int len);

/* ----- begin of i2c layer ---------------------------------------------- */

#define STATUS_IDLE                0
#define STATUS_ADDRESS_ACK        1
#define STATUS_ADDRESS_NAK        2

static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
{
        unsigned char *pstatus;
        struct i2c_msg *pmsg;
        int i, ret;

        dev_dbg(&adapter->dev, "master xfer %d messages:\n", num);

        pstatus = kmalloc(sizeof(*pstatus), GFP_KERNEL);
        if (!pstatus)
                return -ENOMEM;

        for (i = 0 ; i < num ; i++) {
                int cmd = CMD_I2C_IO;

                if (i == 0)
                        cmd |= CMD_I2C_IO_BEGIN;

                if (i == num-1)
                        cmd |= CMD_I2C_IO_END;

                pmsg = &msgs[i];

                dev_dbg(&adapter->dev,
                        "  %d: %s (flags %d) %d bytes to 0x%02x\n",
                        i, pmsg->flags & I2C_M_RD ? "read" : "write",
                        pmsg->flags, pmsg->len, pmsg->addr);

                /* and directly send the message */
                if (pmsg->flags & I2C_M_RD) {
                        /* read data */
                        if (usb_read(adapter, cmd,
                                     pmsg->flags, pmsg->addr,
                                     pmsg->buf, pmsg->len) != pmsg->len) {
                                dev_err(&adapter->dev,
                                        "failure reading data\n");
                                ret = -EIO;
                                goto out;
                        }
                } else {
                        /* write data */
                        if (usb_write(adapter, cmd,
                                      pmsg->flags, pmsg->addr,
                                      pmsg->buf, pmsg->len) != pmsg->len) {
                                dev_err(&adapter->dev,
                                        "failure writing data\n");
                                ret = -EIO;
                                goto out;
                        }
                }

                /* read status */
                if (usb_read(adapter, CMD_GET_STATUS, 0, 0, pstatus, 1) != 1) {
                        dev_err(&adapter->dev, "failure reading status\n");
                        ret = -EIO;
                        goto out;
                }

                dev_dbg(&adapter->dev, "  status = %d\n", *pstatus);
                if (*pstatus == STATUS_ADDRESS_NAK) {
                        ret = -ENXIO;
                        goto out;
                }
        }

        ret = i;
out:
        kfree(pstatus);
        return ret;
}

static u32 usb_func(struct i2c_adapter *adapter)
{
        __le32 *pfunc;
        u32 ret;

        pfunc = kmalloc(sizeof(*pfunc), GFP_KERNEL);

        /* get functionality from adapter */
        if (!pfunc || usb_read(adapter, CMD_GET_FUNC, 0, 0, pfunc,
                               sizeof(*pfunc)) != sizeof(*pfunc)) {
                dev_err(&adapter->dev, "failure reading functionality\n");
                ret = 0;
                goto out;
        }

        ret = le32_to_cpup(pfunc);
out:
        kfree(pfunc);
        return ret;
}

/* This is the actual algorithm we define */
static const struct i2c_algorithm usb_algorithm = {
        .master_xfer        = usb_xfer,
        .functionality        = usb_func,
};

/* ----- end of i2c layer ------------------------------------------------ */

/* ----- begin of usb layer ---------------------------------------------- */

/*
 * Initially the usb i2c interface uses a vid/pid pair donated by
 * Future Technology Devices International Ltd., later a pair was
 * bought from EZPrototypes
 */
static const struct usb_device_id i2c_tiny_usb_table[] = {
        { USB_DEVICE(0x0403, 0xc631) },   /* FTDI */
        { USB_DEVICE(0x1c40, 0x0534) },   /* EZPrototypes */
        { }                               /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, i2c_tiny_usb_table);

/* Structure to hold all of our device specific stuff */
struct i2c_tiny_usb {
        struct usb_device *usb_dev; /* the usb device for this device */
        struct usb_interface *interface; /* the interface for this device */
        struct i2c_adapter adapter; /* i2c related things */
};

static int usb_read(struct i2c_adapter *adapter, int cmd,
                    int value, int index, void *data, int len)
{
        struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
        void *dmadata = kmalloc(len, GFP_KERNEL);
        int ret;

        if (!dmadata)
                return -ENOMEM;

        /* do control transfer */
        ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
                               cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE |
                               USB_DIR_IN, value, index, dmadata, len, 2000);

        memcpy(data, dmadata, len);
        kfree(dmadata);
        return ret;
}

static int usb_write(struct i2c_adapter *adapter, int cmd,
                     int value, int index, void *data, int len)
{
        struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
        void *dmadata = kmemdup(data, len, GFP_KERNEL);
        int ret;

        if (!dmadata)
                return -ENOMEM;

        /* do control transfer */
        ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
                               cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
                               value, index, dmadata, len, 2000);

        kfree(dmadata);
        return ret;
}

static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev)
{
        usb_put_dev(dev->usb_dev);
        kfree(dev);
}

static int i2c_tiny_usb_probe(struct usb_interface *interface,
                              const struct usb_device_id *id)
{
        struct i2c_tiny_usb *dev;
        int retval = -ENOMEM;
        u16 version;

        if (interface->intf_assoc &&
            interface->intf_assoc->bFunctionClass != USB_CLASS_VENDOR_SPEC)
                return -ENODEV;

        dev_dbg(&interface->dev, "probing usb device\n");

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                goto error;

        dev->usb_dev = usb_get_dev(interface_to_usbdev(interface));
        dev->interface = interface;

        /* save our data pointer in this interface device */
        usb_set_intfdata(interface, dev);

        version = le16_to_cpu(dev->usb_dev->descriptor.bcdDevice);
        dev_info(&interface->dev,
                 "version %x.%02x found at bus %03d address %03d\n",
                 version >> 8, version & 0xff,
                 dev->usb_dev->bus->busnum, dev->usb_dev->devnum);

        /* setup i2c adapter description */
        dev->adapter.owner = THIS_MODULE;
        dev->adapter.class = I2C_CLASS_HWMON;
        dev->adapter.algo = &usb_algorithm;
        dev->adapter.algo_data = dev;
        snprintf(dev->adapter.name, sizeof(dev->adapter.name),
                 "i2c-tiny-usb at bus %03d device %03d",
                 dev->usb_dev->bus->busnum, dev->usb_dev->devnum);

        if (usb_write(&dev->adapter, CMD_SET_DELAY, delay, 0, NULL, 0) != 0) {
                dev_err(&dev->adapter.dev,
                        "failure setting delay to %dus\n", delay);
                retval = -EIO;
                goto error;
        }

        dev->adapter.dev.parent = &dev->interface->dev;

        /* and finally attach to i2c layer */
        i2c_add_adapter(&dev->adapter);

        /* inform user about successful attachment to i2c layer */
        dev_info(&dev->adapter.dev, "connected i2c-tiny-usb device\n");

        return 0;

 error:
        if (dev)
                i2c_tiny_usb_free(dev);

        return retval;
}

static void i2c_tiny_usb_disconnect(struct usb_interface *interface)
{
        struct i2c_tiny_usb *dev = usb_get_intfdata(interface);

        i2c_del_adapter(&dev->adapter);
        usb_set_intfdata(interface, NULL);
        i2c_tiny_usb_free(dev);

        dev_dbg(&interface->dev, "disconnected\n");
}

static struct usb_driver i2c_tiny_usb_driver = {
        .name                = "i2c-tiny-usb",
        .probe                = i2c_tiny_usb_probe,
        .disconnect        = i2c_tiny_usb_disconnect,
        .id_table        = i2c_tiny_usb_table,
};

module_usb_driver(i2c_tiny_usb_driver);

/* ----- end of usb layer ------------------------------------------------ */

MODULE_AUTHOR("Till Harbaum <Till@Harbaum.org>");
MODULE_DESCRIPTION("i2c-tiny-usb driver v1.0");
MODULE_LICENSE("GPL");



























































































































   10 




    4 










    6 






















































   14 


   14 


    1 










   51 














   51 
   51 





   11 




   11 



   11 





   11 








   10 


   11 



   11 













































































































































   10 

   10 



















































































































































   51 





































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Integrity Measurement Architecture
 *
 * Copyright (C) 2005,2006,2007,2008 IBM Corporation
 *
 * Authors:
 * Reiner Sailer <sailer@watson.ibm.com>
 * Serge Hallyn <serue@us.ibm.com>
 * Kylene Hall <kylene@us.ibm.com>
 * Mimi Zohar <zohar@us.ibm.com>
 *
 * File: ima_main.c
 *        implements the IMA hooks: ima_bprm_check, ima_file_mmap,
 *        and ima_file_check.
 */

#include <linux/module.h>
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/kernel_read_file.h>
#include <linux/mount.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/ima.h>
#include <linux/fs.h>
#include <linux/iversion.h>

#include "ima.h"

#ifdef CONFIG_IMA_APPRAISE
int ima_appraise = IMA_APPRAISE_ENFORCE;
#else
int ima_appraise;
#endif

int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1;
static int hash_setup_done;

static struct notifier_block ima_lsm_policy_notifier = {
        .notifier_call = ima_lsm_policy_change,
};

static int __init hash_setup(char *str)
{
        struct ima_template_desc *template_desc = ima_template_desc_current();
        int i;

        if (hash_setup_done)
                return 1;

        if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
                if (strncmp(str, "sha1", 4) == 0) {
                        ima_hash_algo = HASH_ALGO_SHA1;
                } else if (strncmp(str, "md5", 3) == 0) {
                        ima_hash_algo = HASH_ALGO_MD5;
                } else {
                        pr_err("invalid hash algorithm \"%s\" for template \"%s\"",
                                str, IMA_TEMPLATE_IMA_NAME);
                        return 1;
                }
                goto out;
        }

        i = match_string(hash_algo_name, HASH_ALGO__LAST, str);
        if (i < 0) {
                pr_err("invalid hash algorithm \"%s\"", str);
                return 1;
        }

        ima_hash_algo = i;
out:
        hash_setup_done = 1;
        return 1;
}
__setup("ima_hash=", hash_setup);

enum hash_algo ima_get_current_hash_algo(void)
{
        return ima_hash_algo;
}

/* Prevent mmap'ing a file execute that is already mmap'ed write */
static int mmap_violation_check(enum ima_hooks func, struct file *file,
                                char **pathbuf, const char **pathname,
                                char *filename)
{
        struct inode *inode;
        int rc = 0;

        if ((func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) &&
            mapping_writably_mapped(file->f_mapping)) {
                rc = -ETXTBSY;
                inode = file_inode(file);

                if (!*pathbuf)        /* ima_rdwr_violation possibly pre-fetched */
                        *pathname = ima_d_path(&file->f_path, pathbuf,
                                               filename);
                integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, *pathname,
                                    "mmap_file", "mmapped_writers", rc, 0);
        }
        return rc;
}

/*
 * ima_rdwr_violation_check
 *
 * Only invalidate the PCR for measured files:
 *        - Opening a file for write when already open for read,
 *          results in a time of measure, time of use (ToMToU) error.
 *        - Opening a file for read when already open for write,
 *          could result in a file measurement error.
 *
 */
static void ima_rdwr_violation_check(struct file *file,
                                     struct ima_iint_cache *iint,
                                     int must_measure,
                                     char **pathbuf,
                                     const char **pathname,
                                     char *filename)
{
        struct inode *inode = file_inode(file);
        fmode_t mode = file->f_mode;
        bool send_tomtou = false, send_writers = false;

        if (mode & FMODE_WRITE) {
                if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) {
                        if (!iint)
                                iint = ima_iint_find(inode);
                        /* IMA_MEASURE is set from reader side */
                        if (iint && test_bit(IMA_MUST_MEASURE,
                                                &iint->atomic_flags))
                                send_tomtou = true;
                }
        } else {
                if (must_measure)
                        set_bit(IMA_MUST_MEASURE, &iint->atomic_flags);
                if (inode_is_open_for_write(inode) && must_measure)
                        send_writers = true;
        }

        if (!send_tomtou && !send_writers)
                return;

        *pathname = ima_d_path(&file->f_path, pathbuf, filename);

        if (send_tomtou)
                ima_add_violation(file, *pathname, iint,
                                  "invalid_pcr", "ToMToU");
        if (send_writers)
                ima_add_violation(file, *pathname, iint,
                                  "invalid_pcr", "open_writers");
}

static void ima_check_last_writer(struct ima_iint_cache *iint,
                                  struct inode *inode, struct file *file)
{
        fmode_t mode = file->f_mode;
        bool update;

        if (!(mode & FMODE_WRITE))
                return;

        mutex_lock(&iint->mutex);
        if (atomic_read(&inode->i_writecount) == 1) {
                struct kstat stat;

                update = test_and_clear_bit(IMA_UPDATE_XATTR,
                                            &iint->atomic_flags);
                if ((iint->flags & IMA_NEW_FILE) ||
                    vfs_getattr_nosec(&file->f_path, &stat,
                                      STATX_CHANGE_COOKIE,
                                      AT_STATX_SYNC_AS_STAT) ||
                    !(stat.result_mask & STATX_CHANGE_COOKIE) ||
                    stat.change_cookie != iint->version) {
                        iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE);
                        iint->measured_pcrs = 0;
                        if (update)
                                ima_update_xattr(iint, file);
                }
        }
        mutex_unlock(&iint->mutex);
}

/**
 * ima_file_free - called on __fput()
 * @file: pointer to file structure being freed
 *
 * Flag files that changed, based on i_version
 */
static void ima_file_free(struct file *file)
{
        struct inode *inode = file_inode(file);
        struct ima_iint_cache *iint;

        if (!ima_policy_flag || !S_ISREG(inode->i_mode))
                return;

        iint = ima_iint_find(inode);
        if (!iint)
                return;

        ima_check_last_writer(iint, inode, file);
}

static int process_measurement(struct file *file, const struct cred *cred,
                               u32 secid, char *buf, loff_t size, int mask,
                               enum ima_hooks func)
{
        struct inode *backing_inode, *inode = file_inode(file);
        struct ima_iint_cache *iint = NULL;
        struct ima_template_desc *template_desc = NULL;
        char *pathbuf = NULL;
        char filename[NAME_MAX];
        const char *pathname = NULL;
        int rc = 0, action, must_appraise = 0;
        int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
        struct evm_ima_xattr_data *xattr_value = NULL;
        struct modsig *modsig = NULL;
        int xattr_len = 0;
        bool violation_check;
        enum hash_algo hash_algo;
        unsigned int allowed_algos = 0;

        if (!ima_policy_flag || !S_ISREG(inode->i_mode))
                return 0;

        /* Return an IMA_MEASURE, IMA_APPRAISE, IMA_AUDIT action
         * bitmask based on the appraise/audit/measurement policy.
         * Included is the appraise submask.
         */
        action = ima_get_action(file_mnt_idmap(file), inode, cred, secid,
                                mask, func, &pcr, &template_desc, NULL,
                                &allowed_algos);
        violation_check = ((func == FILE_CHECK || func == MMAP_CHECK ||
                            func == MMAP_CHECK_REQPROT) &&
                           (ima_policy_flag & IMA_MEASURE));
        if (!action && !violation_check)
                return 0;

        must_appraise = action & IMA_APPRAISE;

        /*  Is the appraise rule hook specific?  */
        if (action & IMA_FILE_APPRAISE)
                func = FILE_CHECK;

        inode_lock(inode);

        if (action) {
                iint = ima_inode_get(inode);
                if (!iint)
                        rc = -ENOMEM;
        }

        if (!rc && violation_check)
                ima_rdwr_violation_check(file, iint, action & IMA_MEASURE,
                                         &pathbuf, &pathname, filename);

        inode_unlock(inode);

        if (rc)
                goto out;
        if (!action)
                goto out;

        mutex_lock(&iint->mutex);

        if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags))
                /* reset appraisal flags if ima_inode_post_setattr was called */
                iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED |
                                 IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
                                 IMA_NONACTION_FLAGS);

        /*
         * Re-evaulate the file if either the xattr has changed or the
         * kernel has no way of detecting file change on the filesystem.
         * (Limited to privileged mounted filesystems.)
         */
        if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags) ||
            ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) &&
             !(inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) &&
             !(action & IMA_FAIL_UNVERIFIABLE_SIGS))) {
                iint->flags &= ~IMA_DONE_MASK;
                iint->measured_pcrs = 0;
        }

        /* Detect and re-evaluate changes made to the backing file. */
        backing_inode = d_real_inode(file_dentry(file));
        if (backing_inode != inode &&
            (action & IMA_DO_MASK) && (iint->flags & IMA_DONE_MASK)) {
                if (!IS_I_VERSION(backing_inode) ||
                    backing_inode->i_sb->s_dev != iint->real_dev ||
                    backing_inode->i_ino != iint->real_ino ||
                    !inode_eq_iversion(backing_inode, iint->version)) {
                        iint->flags &= ~IMA_DONE_MASK;
                        iint->measured_pcrs = 0;
                }
        }

        /* Determine if already appraised/measured based on bitmask
         * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED,
         *  IMA_AUDIT, IMA_AUDITED)
         */
        iint->flags |= action;
        action &= IMA_DO_MASK;
        action &= ~((iint->flags & (IMA_DONE_MASK ^ IMA_MEASURED)) >> 1);

        /* If target pcr is already measured, unset IMA_MEASURE action */
        if ((action & IMA_MEASURE) && (iint->measured_pcrs & (0x1 << pcr)))
                action ^= IMA_MEASURE;

        /* HASH sets the digital signature and update flags, nothing else */
        if ((action & IMA_HASH) &&
            !(test_bit(IMA_DIGSIG, &iint->atomic_flags))) {
                xattr_len = ima_read_xattr(file_dentry(file),
                                           &xattr_value, xattr_len);
                if ((xattr_value && xattr_len > 2) &&
                    (xattr_value->type == EVM_IMA_XATTR_DIGSIG))
                        set_bit(IMA_DIGSIG, &iint->atomic_flags);
                iint->flags |= IMA_HASHED;
                action ^= IMA_HASH;
                set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags);
        }

        /* Nothing to do, just return existing appraised status */
        if (!action) {
                if (must_appraise) {
                        rc = mmap_violation_check(func, file, &pathbuf,
                                                  &pathname, filename);
                        if (!rc)
                                rc = ima_get_cache_status(iint, func);
                }
                goto out_locked;
        }

        if ((action & IMA_APPRAISE_SUBMASK) ||
            strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) {
                /* read 'security.ima' */
                xattr_len = ima_read_xattr(file_dentry(file),
                                           &xattr_value, xattr_len);

                /*
                 * Read the appended modsig if allowed by the policy, and allow
                 * an additional measurement list entry, if needed, based on the
                 * template format and whether the file was already measured.
                 */
                if (iint->flags & IMA_MODSIG_ALLOWED) {
                        rc = ima_read_modsig(func, buf, size, &modsig);

                        if (!rc && ima_template_has_modsig(template_desc) &&
                            iint->flags & IMA_MEASURED)
                                action |= IMA_MEASURE;
                }
        }

        hash_algo = ima_get_hash_algo(xattr_value, xattr_len);

        rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig);
        if (rc != 0 && rc != -EBADF && rc != -EINVAL)
                goto out_locked;

        if (!pathbuf)        /* ima_rdwr_violation possibly pre-fetched */
                pathname = ima_d_path(&file->f_path, &pathbuf, filename);

        if (action & IMA_MEASURE)
                ima_store_measurement(iint, file, pathname,
                                      xattr_value, xattr_len, modsig, pcr,
                                      template_desc);
        if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
                rc = ima_check_blacklist(iint, modsig, pcr);
                if (rc != -EPERM) {
                        inode_lock(inode);
                        rc = ima_appraise_measurement(func, iint, file,
                                                      pathname, xattr_value,
                                                      xattr_len, modsig);
                        inode_unlock(inode);
                }
                if (!rc)
                        rc = mmap_violation_check(func, file, &pathbuf,
                                                  &pathname, filename);
        }
        if (action & IMA_AUDIT)
                ima_audit_measurement(iint, pathname);

        if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO))
                rc = 0;

        /* Ensure the digest was generated using an allowed algorithm */
        if (rc == 0 && must_appraise && allowed_algos != 0 &&
            (allowed_algos & (1U << hash_algo)) == 0) {
                rc = -EACCES;

                integrity_audit_msg(AUDIT_INTEGRITY_DATA, file_inode(file),
                                    pathname, "collect_data",
                                    "denied-hash-algorithm", rc, 0);
        }
out_locked:
        if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) &&
             !(iint->flags & IMA_NEW_FILE))
                rc = -EACCES;
        mutex_unlock(&iint->mutex);
        kfree(xattr_value);
        ima_free_modsig(modsig);
out:
        if (pathbuf)
                __putname(pathbuf);
        if (must_appraise) {
                if (rc && (ima_appraise & IMA_APPRAISE_ENFORCE))
                        return -EACCES;
                if (file->f_mode & FMODE_WRITE)
                        set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags);
        }
        return 0;
}

/**
 * ima_file_mmap - based on policy, collect/store measurement.
 * @file: pointer to the file to be measured (May be NULL)
 * @reqprot: protection requested by the application
 * @prot: protection that will be applied by the kernel
 * @flags: operational flags
 *
 * Measure files being mmapped executable based on the ima_must_measure()
 * policy decision.
 *
 * On success return 0.  On integrity appraisal error, assuming the file
 * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
 */
static int ima_file_mmap(struct file *file, unsigned long reqprot,
                         unsigned long prot, unsigned long flags)
{
        u32 secid;
        int ret;

        if (!file)
                return 0;

        security_current_getsecid_subj(&secid);

        if (reqprot & PROT_EXEC) {
                ret = process_measurement(file, current_cred(), secid, NULL,
                                          0, MAY_EXEC, MMAP_CHECK_REQPROT);
                if (ret)
                        return ret;
        }

        if (prot & PROT_EXEC)
                return process_measurement(file, current_cred(), secid, NULL,
                                           0, MAY_EXEC, MMAP_CHECK);

        return 0;
}

/**
 * ima_file_mprotect - based on policy, limit mprotect change
 * @vma: vm_area_struct protection is set to
 * @reqprot: protection requested by the application
 * @prot: protection that will be applied by the kernel
 *
 * Files can be mmap'ed read/write and later changed to execute to circumvent
 * IMA's mmap appraisal policy rules.  Due to locking issues (mmap semaphore
 * would be taken before i_mutex), files can not be measured or appraised at
 * this point.  Eliminate this integrity gap by denying the mprotect
 * PROT_EXECUTE change, if an mmap appraise policy rule exists.
 *
 * On mprotect change success, return 0.  On failure, return -EACESS.
 */
static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
                             unsigned long prot)
{
        struct ima_template_desc *template = NULL;
        struct file *file;
        char filename[NAME_MAX];
        char *pathbuf = NULL;
        const char *pathname = NULL;
        struct inode *inode;
        int result = 0;
        int action;
        u32 secid;
        int pcr;

        /* Is mprotect making an mmap'ed file executable? */
        if (!(ima_policy_flag & IMA_APPRAISE) || !vma->vm_file ||
            !(prot & PROT_EXEC) || (vma->vm_flags & VM_EXEC))
                return 0;

        security_current_getsecid_subj(&secid);
        inode = file_inode(vma->vm_file);
        action = ima_get_action(file_mnt_idmap(vma->vm_file), inode,
                                current_cred(), secid, MAY_EXEC, MMAP_CHECK,
                                &pcr, &template, NULL, NULL);
        action |= ima_get_action(file_mnt_idmap(vma->vm_file), inode,
                                 current_cred(), secid, MAY_EXEC,
                                 MMAP_CHECK_REQPROT, &pcr, &template, NULL,
                                 NULL);

        /* Is the mmap'ed file in policy? */
        if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK)))
                return 0;

        if (action & IMA_APPRAISE_SUBMASK)
                result = -EPERM;

        file = vma->vm_file;
        pathname = ima_d_path(&file->f_path, &pathbuf, filename);
        integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, pathname,
                            "collect_data", "failed-mprotect", result, 0);
        if (pathbuf)
                __putname(pathbuf);

        return result;
}

/**
 * ima_bprm_check - based on policy, collect/store measurement.
 * @bprm: contains the linux_binprm structure
 *
 * The OS protects against an executable file, already open for write,
 * from being executed in deny_write_access() and an executable file,
 * already open for execute, from being modified in get_write_access().
 * So we can be certain that what we verify and measure here is actually
 * what is being executed.
 *
 * On success return 0.  On integrity appraisal error, assuming the file
 * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
 */
static int ima_bprm_check(struct linux_binprm *bprm)
{
        int ret;
        u32 secid;

        security_current_getsecid_subj(&secid);
        ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0,
                                  MAY_EXEC, BPRM_CHECK);
        if (ret)
                return ret;

        security_cred_getsecid(bprm->cred, &secid);
        return process_measurement(bprm->file, bprm->cred, secid, NULL, 0,
                                   MAY_EXEC, CREDS_CHECK);
}

/**
 * ima_file_check - based on policy, collect/store measurement.
 * @file: pointer to the file to be measured
 * @mask: contains MAY_READ, MAY_WRITE, MAY_EXEC or MAY_APPEND
 *
 * Measure files based on the ima_must_measure() policy decision.
 *
 * On success return 0.  On integrity appraisal error, assuming the file
 * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
 */
static int ima_file_check(struct file *file, int mask)
{
        u32 secid;

        security_current_getsecid_subj(&secid);
        return process_measurement(file, current_cred(), secid, NULL, 0,
                                   mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
                                           MAY_APPEND), FILE_CHECK);
}

static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf,
                            size_t buf_size)
{
        struct ima_iint_cache *iint = NULL, tmp_iint;
        int rc, hash_algo;

        if (ima_policy_flag) {
                iint = ima_iint_find(inode);
                if (iint)
                        mutex_lock(&iint->mutex);
        }

        if ((!iint || !(iint->flags & IMA_COLLECTED)) && file) {
                if (iint)
                        mutex_unlock(&iint->mutex);

                memset(&tmp_iint, 0, sizeof(tmp_iint));
                mutex_init(&tmp_iint.mutex);

                rc = ima_collect_measurement(&tmp_iint, file, NULL, 0,
                                             ima_hash_algo, NULL);
                if (rc < 0) {
                        /* ima_hash could be allocated in case of failure. */
                        if (rc != -ENOMEM)
                                kfree(tmp_iint.ima_hash);

                        return -EOPNOTSUPP;
                }

                iint = &tmp_iint;
                mutex_lock(&iint->mutex);
        }

        if (!iint)
                return -EOPNOTSUPP;

        /*
         * ima_file_hash can be called when ima_collect_measurement has still
         * not been called, we might not always have a hash.
         */
        if (!iint->ima_hash || !(iint->flags & IMA_COLLECTED)) {
                mutex_unlock(&iint->mutex);
                return -EOPNOTSUPP;
        }

        if (buf) {
                size_t copied_size;

                copied_size = min_t(size_t, iint->ima_hash->length, buf_size);
                memcpy(buf, iint->ima_hash->digest, copied_size);
        }
        hash_algo = iint->ima_hash->algo;
        mutex_unlock(&iint->mutex);

        if (iint == &tmp_iint)
                kfree(iint->ima_hash);

        return hash_algo;
}

/**
 * ima_file_hash - return a measurement of the file
 * @file: pointer to the file
 * @buf: buffer in which to store the hash
 * @buf_size: length of the buffer
 *
 * On success, return the hash algorithm (as defined in the enum hash_algo).
 * If buf is not NULL, this function also outputs the hash into buf.
 * If the hash is larger than buf_size, then only buf_size bytes will be copied.
 * It generally just makes sense to pass a buffer capable of holding the largest
 * possible hash: IMA_MAX_DIGEST_SIZE.
 * The file hash returned is based on the entire file, including the appended
 * signature.
 *
 * If the measurement cannot be performed, return -EOPNOTSUPP.
 * If the parameters are incorrect, return -EINVAL.
 */
int ima_file_hash(struct file *file, char *buf, size_t buf_size)
{
        if (!file)
                return -EINVAL;

        return __ima_inode_hash(file_inode(file), file, buf, buf_size);
}
EXPORT_SYMBOL_GPL(ima_file_hash);

/**
 * ima_inode_hash - return the stored measurement if the inode has been hashed
 * and is in the iint cache.
 * @inode: pointer to the inode
 * @buf: buffer in which to store the hash
 * @buf_size: length of the buffer
 *
 * On success, return the hash algorithm (as defined in the enum hash_algo).
 * If buf is not NULL, this function also outputs the hash into buf.
 * If the hash is larger than buf_size, then only buf_size bytes will be copied.
 * It generally just makes sense to pass a buffer capable of holding the largest
 * possible hash: IMA_MAX_DIGEST_SIZE.
 * The hash returned is based on the entire contents, including the appended
 * signature.
 *
 * If IMA is disabled or if no measurement is available, return -EOPNOTSUPP.
 * If the parameters are incorrect, return -EINVAL.
 */
int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size)
{
        if (!inode)
                return -EINVAL;

        return __ima_inode_hash(inode, NULL, buf, buf_size);
}
EXPORT_SYMBOL_GPL(ima_inode_hash);

/**
 * ima_post_create_tmpfile - mark newly created tmpfile as new
 * @idmap: idmap of the mount the inode was found from
 * @inode: inode of the newly created tmpfile
 *
 * No measuring, appraising or auditing of newly created tmpfiles is needed.
 * Skip calling process_measurement(), but indicate which newly, created
 * tmpfiles are in policy.
 */
static void ima_post_create_tmpfile(struct mnt_idmap *idmap,
                                    struct inode *inode)

{
        struct ima_iint_cache *iint;
        int must_appraise;

        if (!ima_policy_flag || !S_ISREG(inode->i_mode))
                return;

        must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS,
                                          FILE_CHECK);
        if (!must_appraise)
                return;

        /* Nothing to do if we can't allocate memory */
        iint = ima_inode_get(inode);
        if (!iint)
                return;

        /* needed for writing the security xattrs */
        set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags);
        iint->ima_file_status = INTEGRITY_PASS;
}

/**
 * ima_post_path_mknod - mark as a new inode
 * @idmap: idmap of the mount the inode was found from
 * @dentry: newly created dentry
 *
 * Mark files created via the mknodat syscall as new, so that the
 * file data can be written later.
 */
static void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
        struct ima_iint_cache *iint;
        struct inode *inode = dentry->d_inode;
        int must_appraise;

        if (!ima_policy_flag || !S_ISREG(inode->i_mode))
                return;

        must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS,
                                          FILE_CHECK);
        if (!must_appraise)
                return;

        /* Nothing to do if we can't allocate memory */
        iint = ima_inode_get(inode);
        if (!iint)
                return;

        /* needed for re-opening empty files */
        iint->flags |= IMA_NEW_FILE;
}

/**
 * ima_read_file - pre-measure/appraise hook decision based on policy
 * @file: pointer to the file to be measured/appraised/audit
 * @read_id: caller identifier
 * @contents: whether a subsequent call will be made to ima_post_read_file()
 *
 * Permit reading a file based on policy. The policy rules are written
 * in terms of the policy identifier.  Appraising the integrity of
 * a file requires a file descriptor.
 *
 * For permission return 0, otherwise return -EACCES.
 */
static int ima_read_file(struct file *file, enum kernel_read_file_id read_id,
                         bool contents)
{
        enum ima_hooks func;
        u32 secid;

        /*
         * Do devices using pre-allocated memory run the risk of the
         * firmware being accessible to the device prior to the completion
         * of IMA's signature verification any more than when using two
         * buffers? It may be desirable to include the buffer address
         * in this API and walk all the dma_map_single() mappings to check.
         */

        /*
         * There will be a call made to ima_post_read_file() with
         * a filled buffer, so we don't need to perform an extra
         * read early here.
         */
        if (contents)
                return 0;

        /* Read entire file for all partial reads. */
        func = read_idmap[read_id] ?: FILE_CHECK;
        security_current_getsecid_subj(&secid);
        return process_measurement(file, current_cred(), secid, NULL,
                                   0, MAY_READ, func);
}

const int read_idmap[READING_MAX_ID] = {
        [READING_FIRMWARE] = FIRMWARE_CHECK,
        [READING_MODULE] = MODULE_CHECK,
        [READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK,
        [READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK,
        [READING_POLICY] = POLICY_CHECK
};

/**
 * ima_post_read_file - in memory collect/appraise/audit measurement
 * @file: pointer to the file to be measured/appraised/audit
 * @buf: pointer to in memory file contents
 * @size: size of in memory file contents
 * @read_id: caller identifier
 *
 * Measure/appraise/audit in memory file based on policy.  Policy rules
 * are written in terms of a policy identifier.
 *
 * On success return 0.  On integrity appraisal error, assuming the file
 * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
 */
static int ima_post_read_file(struct file *file, char *buf, loff_t size,
                              enum kernel_read_file_id read_id)
{
        enum ima_hooks func;
        u32 secid;

        /* permit signed certs */
        if (!file && read_id == READING_X509_CERTIFICATE)
                return 0;

        if (!file || !buf || size == 0) { /* should never happen */
                if (ima_appraise & IMA_APPRAISE_ENFORCE)
                        return -EACCES;
                return 0;
        }

        func = read_idmap[read_id] ?: FILE_CHECK;
        security_current_getsecid_subj(&secid);
        return process_measurement(file, current_cred(), secid, buf, size,
                                   MAY_READ, func);
}

/**
 * ima_load_data - appraise decision based on policy
 * @id: kernel load data caller identifier
 * @contents: whether the full contents will be available in a later
 *              call to ima_post_load_data().
 *
 * Callers of this LSM hook can not measure, appraise, or audit the
 * data provided by userspace.  Enforce policy rules requiring a file
 * signature (eg. kexec'ed kernel image).
 *
 * For permission return 0, otherwise return -EACCES.
 */
static int ima_load_data(enum kernel_load_data_id id, bool contents)
{
        bool ima_enforce, sig_enforce;

        ima_enforce =
                (ima_appraise & IMA_APPRAISE_ENFORCE) == IMA_APPRAISE_ENFORCE;

        switch (id) {
        case LOADING_KEXEC_IMAGE:
                if (IS_ENABLED(CONFIG_KEXEC_SIG)
                    && arch_ima_get_secureboot()) {
                        pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
                        return -EACCES;
                }

                if (ima_enforce && (ima_appraise & IMA_APPRAISE_KEXEC)) {
                        pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
                        return -EACCES;        /* INTEGRITY_UNKNOWN */
                }
                break;
        case LOADING_FIRMWARE:
                if (ima_enforce && (ima_appraise & IMA_APPRAISE_FIRMWARE) && !contents) {
                        pr_err("Prevent firmware sysfs fallback loading.\n");
                        return -EACCES;        /* INTEGRITY_UNKNOWN */
                }
                break;
        case LOADING_MODULE:
                sig_enforce = is_module_sig_enforced();

                if (ima_enforce && (!sig_enforce
                                    && (ima_appraise & IMA_APPRAISE_MODULES))) {
                        pr_err("impossible to appraise a module without a file descriptor. sig_enforce kernel parameter might help\n");
                        return -EACCES;        /* INTEGRITY_UNKNOWN */
                }
                break;
        default:
                break;
        }
        return 0;
}

/**
 * ima_post_load_data - appraise decision based on policy
 * @buf: pointer to in memory file contents
 * @size: size of in memory file contents
 * @load_id: kernel load data caller identifier
 * @description: @load_id-specific description of contents
 *
 * Measure/appraise/audit in memory buffer based on policy.  Policy rules
 * are written in terms of a policy identifier.
 *
 * On success return 0.  On integrity appraisal error, assuming the file
 * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
 */
static int ima_post_load_data(char *buf, loff_t size,
                              enum kernel_load_data_id load_id,
                              char *description)
{
        if (load_id == LOADING_FIRMWARE) {
                if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
                    (ima_appraise & IMA_APPRAISE_ENFORCE)) {
                        pr_err("Prevent firmware loading_store.\n");
                        return -EACCES; /* INTEGRITY_UNKNOWN */
                }
                return 0;
        }

        return 0;
}

/**
 * process_buffer_measurement - Measure the buffer or the buffer data hash
 * @idmap: idmap of the mount the inode was found from
 * @inode: inode associated with the object being measured (NULL for KEY_CHECK)
 * @buf: pointer to the buffer that needs to be added to the log.
 * @size: size of buffer(in bytes).
 * @eventname: event name to be used for the buffer entry.
 * @func: IMA hook
 * @pcr: pcr to extend the measurement
 * @func_data: func specific data, may be NULL
 * @buf_hash: measure buffer data hash
 * @digest: buffer digest will be written to
 * @digest_len: buffer length
 *
 * Based on policy, either the buffer data or buffer data hash is measured
 *
 * Return: 0 if the buffer has been successfully measured, 1 if the digest
 * has been written to the passed location but not added to a measurement entry,
 * a negative value otherwise.
 */
int process_buffer_measurement(struct mnt_idmap *idmap,
                               struct inode *inode, const void *buf, int size,
                               const char *eventname, enum ima_hooks func,
                               int pcr, const char *func_data,
                               bool buf_hash, u8 *digest, size_t digest_len)
{
        int ret = 0;
        const char *audit_cause = "ENOMEM";
        struct ima_template_entry *entry = NULL;
        struct ima_iint_cache iint = {};
        struct ima_event_data event_data = {.iint = &iint,
                                            .filename = eventname,
                                            .buf = buf,
                                            .buf_len = size};
        struct ima_template_desc *template;
        struct ima_max_digest_data hash;
        char digest_hash[IMA_MAX_DIGEST_SIZE];
        int digest_hash_len = hash_digest_size[ima_hash_algo];
        int violation = 0;
        int action = 0;
        u32 secid;

        if (digest && digest_len < digest_hash_len)
                return -EINVAL;

        if (!ima_policy_flag && !digest)
                return -ENOENT;

        template = ima_template_desc_buf();
        if (!template) {
                ret = -EINVAL;
                audit_cause = "ima_template_desc_buf";
                goto out;
        }

        /*
         * Both LSM hooks and auxilary based buffer measurements are
         * based on policy.  To avoid code duplication, differentiate
         * between the LSM hooks and auxilary buffer measurements,
         * retrieving the policy rule information only for the LSM hook
         * buffer measurements.
         */
        if (func) {
                security_current_getsecid_subj(&secid);
                action = ima_get_action(idmap, inode, current_cred(),
                                        secid, 0, func, &pcr, &template,
                                        func_data, NULL);
                if (!(action & IMA_MEASURE) && !digest)
                        return -ENOENT;
        }

        if (!pcr)
                pcr = CONFIG_IMA_MEASURE_PCR_IDX;

        iint.ima_hash = &hash.hdr;
        iint.ima_hash->algo = ima_hash_algo;
        iint.ima_hash->length = hash_digest_size[ima_hash_algo];

        ret = ima_calc_buffer_hash(buf, size, iint.ima_hash);
        if (ret < 0) {
                audit_cause = "hashing_error";
                goto out;
        }

        if (buf_hash) {
                memcpy(digest_hash, hash.hdr.digest, digest_hash_len);

                ret = ima_calc_buffer_hash(digest_hash, digest_hash_len,
                                           iint.ima_hash);
                if (ret < 0) {
                        audit_cause = "hashing_error";
                        goto out;
                }

                event_data.buf = digest_hash;
                event_data.buf_len = digest_hash_len;
        }

        if (digest)
                memcpy(digest, iint.ima_hash->digest, digest_hash_len);

        if (!ima_policy_flag || (func && !(action & IMA_MEASURE)))
                return 1;

        ret = ima_alloc_init_template(&event_data, &entry, template);
        if (ret < 0) {
                audit_cause = "alloc_entry";
                goto out;
        }

        ret = ima_store_template(entry, violation, NULL, event_data.buf, pcr);
        if (ret < 0) {
                audit_cause = "store_entry";
                ima_free_template_entry(entry);
        }

out:
        if (ret < 0)
                integrity_audit_message(AUDIT_INTEGRITY_PCR, NULL, eventname,
                                        func_measure_str(func),
                                        audit_cause, ret, 0, ret);

        return ret;
}

/**
 * ima_kexec_cmdline - measure kexec cmdline boot args
 * @kernel_fd: file descriptor of the kexec kernel being loaded
 * @buf: pointer to buffer
 * @size: size of buffer
 *
 * Buffers can only be measured, not appraised.
 */
void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
{
        struct fd f;

        if (!buf || !size)
                return;

        f = fdget(kernel_fd);
        if (!f.file)
                return;

        process_buffer_measurement(file_mnt_idmap(f.file), file_inode(f.file),
                                   buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0,
                                   NULL, false, NULL, 0);
        fdput(f);
}

/**
 * ima_measure_critical_data - measure kernel integrity critical data
 * @event_label: unique event label for grouping and limiting critical data
 * @event_name: event name for the record in the IMA measurement list
 * @buf: pointer to buffer data
 * @buf_len: length of buffer data (in bytes)
 * @hash: measure buffer data hash
 * @digest: buffer digest will be written to
 * @digest_len: buffer length
 *
 * Measure data critical to the integrity of the kernel into the IMA log
 * and extend the pcr.  Examples of critical data could be various data
 * structures, policies, and states stored in kernel memory that can
 * impact the integrity of the system.
 *
 * Return: 0 if the buffer has been successfully measured, 1 if the digest
 * has been written to the passed location but not added to a measurement entry,
 * a negative value otherwise.
 */
int ima_measure_critical_data(const char *event_label,
                              const char *event_name,
                              const void *buf, size_t buf_len,
                              bool hash, u8 *digest, size_t digest_len)
{
        if (!event_name || !event_label || !buf || !buf_len)
                return -ENOPARAM;

        return process_buffer_measurement(&nop_mnt_idmap, NULL, buf, buf_len,
                                          event_name, CRITICAL_DATA, 0,
                                          event_label, hash, digest,
                                          digest_len);
}
EXPORT_SYMBOL_GPL(ima_measure_critical_data);

#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS

/**
 * ima_kernel_module_request - Prevent crypto-pkcs1pad(rsa,*) requests
 * @kmod_name: kernel module name
 *
 * Avoid a verification loop where verifying the signature of the modprobe
 * binary requires executing modprobe itself. Since the modprobe iint->mutex
 * is already held when the signature verification is performed, a deadlock
 * occurs as soon as modprobe is executed within the critical region, since
 * the same lock cannot be taken again.
 *
 * This happens when public_key_verify_signature(), in case of RSA algorithm,
 * use alg_name to store internal information in order to construct an
 * algorithm on the fly, but crypto_larval_lookup() will try to use alg_name
 * in order to load a kernel module with same name.
 *
 * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules,
 * we are safe to fail such module request from crypto_larval_lookup(), and
 * avoid the verification loop.
 *
 * Return: Zero if it is safe to load the kernel module, -EINVAL otherwise.
 */
static int ima_kernel_module_request(char *kmod_name)
{
        if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0)
                return -EINVAL;

        return 0;
}

#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */

static int __init init_ima(void)
{
        int error;

        ima_appraise_parse_cmdline();
        ima_init_template_list();
        hash_setup(CONFIG_IMA_DEFAULT_HASH);
        error = ima_init();

        if (error && strcmp(hash_algo_name[ima_hash_algo],
                            CONFIG_IMA_DEFAULT_HASH) != 0) {
                pr_info("Allocating %s failed, going to use default hash algorithm %s\n",
                        hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH);
                hash_setup_done = 0;
                hash_setup(CONFIG_IMA_DEFAULT_HASH);
                error = ima_init();
        }

        if (error)
                return error;

        error = register_blocking_lsm_notifier(&ima_lsm_policy_notifier);
        if (error)
                pr_warn("Couldn't register LSM notifier, error %d\n", error);

        if (!error)
                ima_update_policy_flags();

        return error;
}

static struct security_hook_list ima_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(bprm_check_security, ima_bprm_check),
        LSM_HOOK_INIT(file_post_open, ima_file_check),
        LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile),
        LSM_HOOK_INIT(file_release, ima_file_free),
        LSM_HOOK_INIT(mmap_file, ima_file_mmap),
        LSM_HOOK_INIT(file_mprotect, ima_file_mprotect),
        LSM_HOOK_INIT(kernel_load_data, ima_load_data),
        LSM_HOOK_INIT(kernel_post_load_data, ima_post_load_data),
        LSM_HOOK_INIT(kernel_read_file, ima_read_file),
        LSM_HOOK_INIT(kernel_post_read_file, ima_post_read_file),
        LSM_HOOK_INIT(path_post_mknod, ima_post_path_mknod),
#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS
        LSM_HOOK_INIT(key_post_create_or_update, ima_post_key_create_or_update),
#endif
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
        LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request),
#endif
        LSM_HOOK_INIT(inode_free_security, ima_inode_free),
};

static const struct lsm_id ima_lsmid = {
        .name = "ima",
        .id = LSM_ID_IMA,
};

static int __init init_ima_lsm(void)
{
        ima_iintcache_init();
        security_add_hooks(ima_hooks, ARRAY_SIZE(ima_hooks), &ima_lsmid);
        init_ima_appraise_lsm(&ima_lsmid);
        return 0;
}

struct lsm_blob_sizes ima_blob_sizes __ro_after_init = {
        .lbs_inode = sizeof(struct ima_iint_cache *),
};

DEFINE_LSM(ima) = {
        .name = "ima",
        .init = init_ima_lsm,
        .order = LSM_ORDER_LAST,
        .blobs = &ima_blob_sizes,
};

late_initcall(init_ima);        /* Start IMA after the TPM is available */




























































































































   19 






   19 
   19 


   19 



   19 




















   19 


   19 

   19 

   19 














































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
// SPDX-License-Identifier: GPL-2.0
/*
 * linux/drivers/char/misc.c
 *
 * Generic misc open routine by Johan Myreen
 *
 * Based on code from Linus
 *
 * Teemu Rantanen's Microsoft Busmouse support and Derrick Cole's
 *   changes incorporated into 0.97pl4
 *   by Peter Cervasio (pete%q106fm.uucp@wupost.wustl.edu) (08SEP92)
 *   See busmouse.c for particulars.
 *
 * Made things a lot mode modular - easy to compile in just one or two
 * of the misc drivers, as they are now completely independent. Linus.
 *
 * Support for loadable modules. 8-Sep-95 Philip Blundell <pjb27@cam.ac.uk>
 *
 * Fixed a failing symbol register to free the device registration
 *                Alan Cox <alan@lxorguk.ukuu.org.uk> 21-Jan-96
 *
 * Dynamic minors and /proc/mice by Alessandro Rubini. 26-Mar-96
 *
 * Renamed to misc and miscdevice to be more accurate. Alan Cox 26-Mar-96
 *
 * Handling of mouse minor numbers for kerneld:
 *  Idea by Jacques Gelinas <jack@solucorp.qc.ca>,
 *  adapted by Bjorn Ekwall <bj0rn@blox.se>
 *  corrected by Alan Cox <alan@lxorguk.ukuu.org.uk>
 *
 * Changes for kmod (from kerneld):
 *        Cyrus Durgin <cider@speakeasy.org>
 *
 * Added devfs support. Richard Gooch <rgooch@atnf.csiro.au>  10-Jan-1998
 */

#include <linux/module.h>

#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/miscdevice.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/tty.h>
#include <linux/kmod.h>
#include <linux/gfp.h>

/*
 * Head entry for the doubly linked miscdevice list
 */
static LIST_HEAD(misc_list);
static DEFINE_MUTEX(misc_mtx);

/*
 * Assigned numbers, used for dynamic minors
 */
#define DYNAMIC_MINORS 128 /* like dynamic majors */
static DEFINE_IDA(misc_minors_ida);

static int misc_minor_alloc(void)
{
        int ret;

        ret = ida_alloc_max(&misc_minors_ida, DYNAMIC_MINORS - 1, GFP_KERNEL);
        if (ret >= 0) {
                ret = DYNAMIC_MINORS - ret - 1;
        } else {
                ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1,
                                      MINORMASK, GFP_KERNEL);
        }
        return ret;
}

static void misc_minor_free(int minor)
{
        if (minor < DYNAMIC_MINORS)
                ida_free(&misc_minors_ida, DYNAMIC_MINORS - minor - 1);
        else if (minor > MISC_DYNAMIC_MINOR)
                ida_free(&misc_minors_ida, minor);
}

#ifdef CONFIG_PROC_FS
static void *misc_seq_start(struct seq_file *seq, loff_t *pos)
{
        mutex_lock(&misc_mtx);
        return seq_list_start(&misc_list, *pos);
}

static void *misc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        return seq_list_next(v, &misc_list, pos);
}

static void misc_seq_stop(struct seq_file *seq, void *v)
{
        mutex_unlock(&misc_mtx);
}

static int misc_seq_show(struct seq_file *seq, void *v)
{
        const struct miscdevice *p = list_entry(v, struct miscdevice, list);

        seq_printf(seq, "%3i %s\n", p->minor, p->name ? p->name : "");
        return 0;
}


static const struct seq_operations misc_seq_ops = {
        .start = misc_seq_start,
        .next  = misc_seq_next,
        .stop  = misc_seq_stop,
        .show  = misc_seq_show,
};
#endif

static int misc_open(struct inode *inode, struct file *file)
{
        int minor = iminor(inode);
        struct miscdevice *c = NULL, *iter;
        int err = -ENODEV;
        const struct file_operations *new_fops = NULL;

        mutex_lock(&misc_mtx);

        list_for_each_entry(iter, &misc_list, list) {
                if (iter->minor != minor)
                        continue;
                c = iter;
                new_fops = fops_get(iter->fops);
                break;
        }

        if (!new_fops) {
                mutex_unlock(&misc_mtx);
                request_module("char-major-%d-%d", MISC_MAJOR, minor);
                mutex_lock(&misc_mtx);

                list_for_each_entry(iter, &misc_list, list) {
                        if (iter->minor != minor)
                                continue;
                        c = iter;
                        new_fops = fops_get(iter->fops);
                        break;
                }
                if (!new_fops)
                        goto fail;
        }

        /*
         * Place the miscdevice in the file's
         * private_data so it can be used by the
         * file operations, including f_op->open below
         */
        file->private_data = c;

        err = 0;
        replace_fops(file, new_fops);
        if (file->f_op->open)
                err = file->f_op->open(inode, file);
fail:
        mutex_unlock(&misc_mtx);
        return err;
}

static char *misc_devnode(const struct device *dev, umode_t *mode)
{
        const struct miscdevice *c = dev_get_drvdata(dev);

        if (mode && c->mode)
                *mode = c->mode;
        if (c->nodename)
                return kstrdup(c->nodename, GFP_KERNEL);
        return NULL;
}

static const struct class misc_class = {
        .name                = "misc",
        .devnode        = misc_devnode,
};

static const struct file_operations misc_fops = {
        .owner                = THIS_MODULE,
        .open                = misc_open,
        .llseek                = noop_llseek,
};

/**
 *        misc_register        -        register a miscellaneous device
 *        @misc: device structure
 *
 *        Register a miscellaneous device with the kernel. If the minor
 *        number is set to %MISC_DYNAMIC_MINOR a minor number is assigned
 *        and placed in the minor field of the structure. For other cases
 *        the minor number requested is used.
 *
 *        The structure passed is linked into the kernel and may not be
 *        destroyed until it has been unregistered. By default, an open()
 *        syscall to the device sets file->private_data to point to the
 *        structure. Drivers don't need open in fops for this.
 *
 *        A zero is returned on success and a negative errno code for
 *        failure.
 */

int misc_register(struct miscdevice *misc)
{
        dev_t dev;
        int err = 0;
        bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR);

        INIT_LIST_HEAD(&misc->list);

        mutex_lock(&misc_mtx);

        if (is_dynamic) {
                int i = misc_minor_alloc();

                if (i < 0) {
                        err = -EBUSY;
                        goto out;
                }
                misc->minor = i;
        } else {
                struct miscdevice *c;

                list_for_each_entry(c, &misc_list, list) {
                        if (c->minor == misc->minor) {
                                err = -EBUSY;
                                goto out;
                        }
                }
        }

        dev = MKDEV(MISC_MAJOR, misc->minor);

        misc->this_device =
                device_create_with_groups(&misc_class, misc->parent, dev,
                                          misc, misc->groups, "%s", misc->name);
        if (IS_ERR(misc->this_device)) {
                if (is_dynamic) {
                        misc_minor_free(misc->minor);
                        misc->minor = MISC_DYNAMIC_MINOR;
                }
                err = PTR_ERR(misc->this_device);
                goto out;
        }

        /*
         * Add it to the front, so that later devices can "override"
         * earlier defaults
         */
        list_add(&misc->list, &misc_list);
 out:
        mutex_unlock(&misc_mtx);
        return err;
}
EXPORT_SYMBOL(misc_register);

/**
 *        misc_deregister - unregister a miscellaneous device
 *        @misc: device to unregister
 *
 *        Unregister a miscellaneous device that was previously
 *        successfully registered with misc_register().
 */

void misc_deregister(struct miscdevice *misc)
{
        if (WARN_ON(list_empty(&misc->list)))
                return;

        mutex_lock(&misc_mtx);
        list_del(&misc->list);
        device_destroy(&misc_class, MKDEV(MISC_MAJOR, misc->minor));
        misc_minor_free(misc->minor);
        mutex_unlock(&misc_mtx);
}
EXPORT_SYMBOL(misc_deregister);

static int __init misc_init(void)
{
        int err;
        struct proc_dir_entry *ret;

        ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
        err = class_register(&misc_class);
        if (err)
                goto fail_remove;

        err = -EIO;
        if (register_chrdev(MISC_MAJOR, "misc", &misc_fops))
                goto fail_printk;
        return 0;

fail_printk:
        pr_err("unable to get major %d for misc devices\n", MISC_MAJOR);
        class_unregister(&misc_class);
fail_remove:
        if (ret)
                remove_proc_entry("misc", NULL);
        return err;
}
subsys_initcall(misc_init);


































































































































































    2 




    2 



    2 



    2 




























































































































































































































































































































































































































































    2 


    2 

    2 




    2 



























    2 
































































    2 
    2 
























































































































    2 























    2 

























































































































































    2 


    2 




    2 

    2 
































































































































































    5 







    5 
    5 


    5 


    5 























































































































































































































































































































































































































































































    5 









    2 

    2 
    2 
    2 
    2 











    5 

















    5 



    5 



    5 
















    5 

















    5 



    5 




















































































































































































































































































































































































































































    2 




    2 
    2 

    2 



    2 







    2 









    2 

    2 


    2 

    2 





    2 























    2 



















    2 



    2 


    2 


















    2 











    2 














































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Linux I2C core
 *
 * Copyright (C) 1995-99 Simon G. Vogl
 *   With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>
 *   Mux support by Rodolfo Giometti <giometti@enneenne.com> and
 *   Michael Lawnick <michael.lawnick.ext@nsn.com>
 *
 * Copyright (C) 2013-2017 Wolfram Sang <wsa@kernel.org>
 */

#define pr_fmt(fmt) "i2c-core: " fmt

#include <dt-bindings/i2c/i2c.h>
#include <linux/acpi.h>
#include <linux/clk/clk-conf.h>
#include <linux/completion.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/i2c-smbus.h>
#include <linux/idr.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/pinctrl/consumer.h>
#include <linux/pinctrl/devinfo.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/pm_wakeirq.h>
#include <linux/property.h>
#include <linux/rwsem.h>
#include <linux/slab.h>

#include "i2c-core.h"

#define CREATE_TRACE_POINTS
#include <trace/events/i2c.h>

#define I2C_ADDR_OFFSET_TEN_BIT        0xa000
#define I2C_ADDR_OFFSET_SLAVE        0x1000

#define I2C_ADDR_7BITS_MAX        0x77
#define I2C_ADDR_7BITS_COUNT        (I2C_ADDR_7BITS_MAX + 1)

#define I2C_ADDR_DEVICE_ID        0x7c

/*
 * core_lock protects i2c_adapter_idr, and guarantees that device detection,
 * deletion of detected devices are serialized
 */
static DEFINE_MUTEX(core_lock);
static DEFINE_IDR(i2c_adapter_idr);

static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);

static DEFINE_STATIC_KEY_FALSE(i2c_trace_msg_key);
static bool is_registered;

static struct dentry *i2c_debugfs_root;

int i2c_transfer_trace_reg(void)
{
        static_branch_inc(&i2c_trace_msg_key);
        return 0;
}

void i2c_transfer_trace_unreg(void)
{
        static_branch_dec(&i2c_trace_msg_key);
}

const char *i2c_freq_mode_string(u32 bus_freq_hz)
{
        switch (bus_freq_hz) {
        case I2C_MAX_STANDARD_MODE_FREQ:
                return "Standard Mode (100 kHz)";
        case I2C_MAX_FAST_MODE_FREQ:
                return "Fast Mode (400 kHz)";
        case I2C_MAX_FAST_MODE_PLUS_FREQ:
                return "Fast Mode Plus (1.0 MHz)";
        case I2C_MAX_TURBO_MODE_FREQ:
                return "Turbo Mode (1.4 MHz)";
        case I2C_MAX_HIGH_SPEED_MODE_FREQ:
                return "High Speed Mode (3.4 MHz)";
        case I2C_MAX_ULTRA_FAST_MODE_FREQ:
                return "Ultra Fast Mode (5.0 MHz)";
        default:
                return "Unknown Mode";
        }
}
EXPORT_SYMBOL_GPL(i2c_freq_mode_string);

const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
                                                const struct i2c_client *client)
{
        if (!(id && client))
                return NULL;

        while (id->name[0]) {
                if (strcmp(client->name, id->name) == 0)
                        return id;
                id++;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(i2c_match_id);

const void *i2c_get_match_data(const struct i2c_client *client)
{
        struct i2c_driver *driver = to_i2c_driver(client->dev.driver);
        const struct i2c_device_id *match;
        const void *data;

        data = device_get_match_data(&client->dev);
        if (!data) {
                match = i2c_match_id(driver->id_table, client);
                if (!match)
                        return NULL;

                data = (const void *)match->driver_data;
        }

        return data;
}
EXPORT_SYMBOL(i2c_get_match_data);

static int i2c_device_match(struct device *dev, struct device_driver *drv)
{
        struct i2c_client        *client = i2c_verify_client(dev);
        struct i2c_driver        *driver;


        /* Attempt an OF style match */
        if (i2c_of_match_device(drv->of_match_table, client))
                return 1;

        /* Then ACPI style match */
        if (acpi_driver_match_device(dev, drv))
                return 1;

        driver = to_i2c_driver(drv);

        /* Finally an I2C match */
        if (i2c_match_id(driver->id_table, client))
                return 1;

        return 0;
}

static int i2c_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct i2c_client *client = to_i2c_client(dev);
        int rc;

        rc = of_device_uevent_modalias(dev, env);
        if (rc != -ENODEV)
                return rc;

        rc = acpi_device_uevent_modalias(dev, env);
        if (rc != -ENODEV)
                return rc;

        return add_uevent_var(env, "MODALIAS=%s%s", I2C_MODULE_PREFIX, client->name);
}

/* i2c bus recovery routines */
static int get_scl_gpio_value(struct i2c_adapter *adap)
{
        return gpiod_get_value_cansleep(adap->bus_recovery_info->scl_gpiod);
}

static void set_scl_gpio_value(struct i2c_adapter *adap, int val)
{
        gpiod_set_value_cansleep(adap->bus_recovery_info->scl_gpiod, val);
}

static int get_sda_gpio_value(struct i2c_adapter *adap)
{
        return gpiod_get_value_cansleep(adap->bus_recovery_info->sda_gpiod);
}

static void set_sda_gpio_value(struct i2c_adapter *adap, int val)
{
        gpiod_set_value_cansleep(adap->bus_recovery_info->sda_gpiod, val);
}

static int i2c_generic_bus_free(struct i2c_adapter *adap)
{
        struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        int ret = -EOPNOTSUPP;

        if (bri->get_bus_free)
                ret = bri->get_bus_free(adap);
        else if (bri->get_sda)
                ret = bri->get_sda(adap);

        if (ret < 0)
                return ret;

        return ret ? 0 : -EBUSY;
}

/*
 * We are generating clock pulses. ndelay() determines durating of clk pulses.
 * We will generate clock with rate 100 KHz and so duration of both clock levels
 * is: delay in ns = (10^6 / 100) / 2
 */
#define RECOVERY_NDELAY                5000
#define RECOVERY_CLK_CNT        9

int i2c_generic_scl_recovery(struct i2c_adapter *adap)
{
        struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        int i = 0, scl = 1, ret = 0;

        if (bri->prepare_recovery)
                bri->prepare_recovery(adap);
        if (bri->pinctrl)
                pinctrl_select_state(bri->pinctrl, bri->pins_gpio);

        /*
         * If we can set SDA, we will always create a STOP to ensure additional
         * pulses will do no harm. This is achieved by letting SDA follow SCL
         * half a cycle later. Check the 'incomplete_write_byte' fault injector
         * for details. Note that we must honour tsu:sto, 4us, but lets use 5us
         * here for simplicity.
         */
        bri->set_scl(adap, scl);
        ndelay(RECOVERY_NDELAY);
        if (bri->set_sda)
                bri->set_sda(adap, scl);
        ndelay(RECOVERY_NDELAY / 2);

        /*
         * By this time SCL is high, as we need to give 9 falling-rising edges
         */
        while (i++ < RECOVERY_CLK_CNT * 2) {
                if (scl) {
                        /* SCL shouldn't be low here */
                        if (!bri->get_scl(adap)) {
                                dev_err(&adap->dev,
                                        "SCL is stuck low, exit recovery\n");
                                ret = -EBUSY;
                                break;
                        }
                }

                scl = !scl;
                bri->set_scl(adap, scl);
                /* Creating STOP again, see above */
                if (scl)  {
                        /* Honour minimum tsu:sto */
                        ndelay(RECOVERY_NDELAY);
                } else {
                        /* Honour minimum tf and thd:dat */
                        ndelay(RECOVERY_NDELAY / 2);
                }
                if (bri->set_sda)
                        bri->set_sda(adap, scl);
                ndelay(RECOVERY_NDELAY / 2);

                if (scl) {
                        ret = i2c_generic_bus_free(adap);
                        if (ret == 0)
                                break;
                }
        }

        /* If we can't check bus status, assume recovery worked */
        if (ret == -EOPNOTSUPP)
                ret = 0;

        if (bri->unprepare_recovery)
                bri->unprepare_recovery(adap);
        if (bri->pinctrl)
                pinctrl_select_state(bri->pinctrl, bri->pins_default);

        return ret;
}
EXPORT_SYMBOL_GPL(i2c_generic_scl_recovery);

int i2c_recover_bus(struct i2c_adapter *adap)
{
        if (!adap->bus_recovery_info)
                return -EBUSY;

        dev_dbg(&adap->dev, "Trying i2c bus recovery\n");
        return adap->bus_recovery_info->recover_bus(adap);
}
EXPORT_SYMBOL_GPL(i2c_recover_bus);

static void i2c_gpio_init_pinctrl_recovery(struct i2c_adapter *adap)
{
        struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        struct device *dev = &adap->dev;
        struct pinctrl *p = bri->pinctrl ?: dev_pinctrl(dev->parent);

        bri->pinctrl = p;

        /*
         * we can't change states without pinctrl, so remove the states if
         * populated
         */
        if (!p) {
                bri->pins_default = NULL;
                bri->pins_gpio = NULL;
                return;
        }

        if (!bri->pins_default) {
                bri->pins_default = pinctrl_lookup_state(p,
                                                         PINCTRL_STATE_DEFAULT);
                if (IS_ERR(bri->pins_default)) {
                        dev_dbg(dev, PINCTRL_STATE_DEFAULT " state not found for GPIO recovery\n");
                        bri->pins_default = NULL;
                }
        }
        if (!bri->pins_gpio) {
                bri->pins_gpio = pinctrl_lookup_state(p, "gpio");
                if (IS_ERR(bri->pins_gpio))
                        bri->pins_gpio = pinctrl_lookup_state(p, "recovery");

                if (IS_ERR(bri->pins_gpio)) {
                        dev_dbg(dev, "no gpio or recovery state found for GPIO recovery\n");
                        bri->pins_gpio = NULL;
                }
        }

        /* for pinctrl state changes, we need all the information */
        if (bri->pins_default && bri->pins_gpio) {
                dev_info(dev, "using pinctrl states for GPIO recovery");
        } else {
                bri->pinctrl = NULL;
                bri->pins_default = NULL;
                bri->pins_gpio = NULL;
        }
}

static int i2c_gpio_init_generic_recovery(struct i2c_adapter *adap)
{
        struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        struct device *dev = &adap->dev;
        struct gpio_desc *gpiod;
        int ret = 0;

        /*
         * don't touch the recovery information if the driver is not using
         * generic SCL recovery
         */
        if (bri->recover_bus && bri->recover_bus != i2c_generic_scl_recovery)
                return 0;

        /*
         * pins might be taken as GPIO, so we should inform pinctrl about
         * this and move the state to GPIO
         */
        if (bri->pinctrl)
                pinctrl_select_state(bri->pinctrl, bri->pins_gpio);

        /*
         * if there is incomplete or no recovery information, see if generic
         * GPIO recovery is available
         */
        if (!bri->scl_gpiod) {
                gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
                if (PTR_ERR(gpiod) == -EPROBE_DEFER) {
                        ret  = -EPROBE_DEFER;
                        goto cleanup_pinctrl_state;
                }
                if (!IS_ERR(gpiod)) {
                        bri->scl_gpiod = gpiod;
                        bri->recover_bus = i2c_generic_scl_recovery;
                        dev_info(dev, "using generic GPIOs for recovery\n");
                }
        }

        /* SDA GPIOD line is optional, so we care about DEFER only */
        if (!bri->sda_gpiod) {
                /*
                 * We have SCL. Pull SCL low and wait a bit so that SDA glitches
                 * have no effect.
                 */
                gpiod_direction_output(bri->scl_gpiod, 0);
                udelay(10);
                gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN);

                /* Wait a bit in case of a SDA glitch, and then release SCL. */
                udelay(10);
                gpiod_direction_output(bri->scl_gpiod, 1);

                if (PTR_ERR(gpiod) == -EPROBE_DEFER) {
                        ret = -EPROBE_DEFER;
                        goto cleanup_pinctrl_state;
                }
                if (!IS_ERR(gpiod))
                        bri->sda_gpiod = gpiod;
        }

cleanup_pinctrl_state:
        /* change the state of the pins back to their default state */
        if (bri->pinctrl)
                pinctrl_select_state(bri->pinctrl, bri->pins_default);

        return ret;
}

static int i2c_gpio_init_recovery(struct i2c_adapter *adap)
{
        i2c_gpio_init_pinctrl_recovery(adap);
        return i2c_gpio_init_generic_recovery(adap);
}

static int i2c_init_recovery(struct i2c_adapter *adap)
{
        struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        bool is_error_level = true;
        char *err_str;

        if (!bri)
                return 0;

        if (i2c_gpio_init_recovery(adap) == -EPROBE_DEFER)
                return -EPROBE_DEFER;

        if (!bri->recover_bus) {
                err_str = "no suitable method provided";
                is_error_level = false;
                goto err;
        }

        if (bri->scl_gpiod && bri->recover_bus == i2c_generic_scl_recovery) {
                bri->get_scl = get_scl_gpio_value;
                bri->set_scl = set_scl_gpio_value;
                if (bri->sda_gpiod) {
                        bri->get_sda = get_sda_gpio_value;
                        /* FIXME: add proper flag instead of '0' once available */
                        if (gpiod_get_direction(bri->sda_gpiod) == 0)
                                bri->set_sda = set_sda_gpio_value;
                }
        } else if (bri->recover_bus == i2c_generic_scl_recovery) {
                /* Generic SCL recovery */
                if (!bri->set_scl || !bri->get_scl) {
                        err_str = "no {get|set}_scl() found";
                        goto err;
                }
                if (!bri->set_sda && !bri->get_sda) {
                        err_str = "either get_sda() or set_sda() needed";
                        goto err;
                }
        }

        return 0;
 err:
        if (is_error_level)
                dev_err(&adap->dev, "Not using recovery: %s\n", err_str);
        else
                dev_dbg(&adap->dev, "Not using recovery: %s\n", err_str);
        adap->bus_recovery_info = NULL;

        return -EINVAL;
}

static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
{
        struct i2c_adapter *adap = client->adapter;
        unsigned int irq;

        if (!adap->host_notify_domain)
                return -ENXIO;

        if (client->flags & I2C_CLIENT_TEN)
                return -EINVAL;

        irq = irq_create_mapping(adap->host_notify_domain, client->addr);

        return irq > 0 ? irq : -ENXIO;
}

static int i2c_device_probe(struct device *dev)
{
        struct i2c_client        *client = i2c_verify_client(dev);
        struct i2c_driver        *driver;
        bool do_power_on;
        int status;

        if (!client)
                return 0;

        client->irq = client->init_irq;

        if (!client->irq) {
                int irq = -ENOENT;

                if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
                        dev_dbg(dev, "Using Host Notify IRQ\n");
                        /* Keep adapter active when Host Notify is required */
                        pm_runtime_get_sync(&client->adapter->dev);
                        irq = i2c_smbus_host_notify_to_irq(client);
                } else if (dev->of_node) {
                        irq = of_irq_get_byname(dev->of_node, "irq");
                        if (irq == -EINVAL || irq == -ENODATA)
                                irq = of_irq_get(dev->of_node, 0);
                } else if (ACPI_COMPANION(dev)) {
                        bool wake_capable;

                        irq = i2c_acpi_get_irq(client, &wake_capable);
                        if (irq > 0 && wake_capable)
                                client->flags |= I2C_CLIENT_WAKE;
                }
                if (irq == -EPROBE_DEFER) {
                        status = irq;
                        goto put_sync_adapter;
                }

                if (irq < 0)
                        irq = 0;

                client->irq = irq;
        }

        driver = to_i2c_driver(dev->driver);

        /*
         * An I2C ID table is not mandatory, if and only if, a suitable OF
         * or ACPI ID table is supplied for the probing device.
         */
        if (!driver->id_table &&
            !acpi_driver_match_device(dev, dev->driver) &&
            !i2c_of_match_device(dev->driver->of_match_table, client)) {
                status = -ENODEV;
                goto put_sync_adapter;
        }

        if (client->flags & I2C_CLIENT_WAKE) {
                int wakeirq;

                wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
                if (wakeirq == -EPROBE_DEFER) {
                        status = wakeirq;
                        goto put_sync_adapter;
                }

                device_init_wakeup(&client->dev, true);

                if (wakeirq > 0 && wakeirq != client->irq)
                        status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
                else if (client->irq > 0)
                        status = dev_pm_set_wake_irq(dev, client->irq);
                else
                        status = 0;

                if (status)
                        dev_warn(&client->dev, "failed to set up wakeup irq\n");
        }

        dev_dbg(dev, "probe\n");

        status = of_clk_set_defaults(dev->of_node, false);
        if (status < 0)
                goto err_clear_wakeup_irq;

        do_power_on = !i2c_acpi_waive_d0_probe(dev);
        status = dev_pm_domain_attach(&client->dev, do_power_on);
        if (status)
                goto err_clear_wakeup_irq;

        client->devres_group_id = devres_open_group(&client->dev, NULL,
                                                    GFP_KERNEL);
        if (!client->devres_group_id) {
                status = -ENOMEM;
                goto err_detach_pm_domain;
        }

        if (driver->probe)
                status = driver->probe(client);
        else
                status = -EINVAL;

        /*
         * Note that we are not closing the devres group opened above so
         * even resources that were attached to the device after probe is
         * run are released when i2c_device_remove() is executed. This is
         * needed as some drivers would allocate additional resources,
         * for example when updating firmware.
         */

        if (status)
                goto err_release_driver_resources;

        return 0;

err_release_driver_resources:
        devres_release_group(&client->dev, client->devres_group_id);
err_detach_pm_domain:
        dev_pm_domain_detach(&client->dev, do_power_on);
err_clear_wakeup_irq:
        dev_pm_clear_wake_irq(&client->dev);
        device_init_wakeup(&client->dev, false);
put_sync_adapter:
        if (client->flags & I2C_CLIENT_HOST_NOTIFY)
                pm_runtime_put_sync(&client->adapter->dev);

        return status;
}

static void i2c_device_remove(struct device *dev)
{
        struct i2c_client        *client = to_i2c_client(dev);
        struct i2c_driver        *driver;

        driver = to_i2c_driver(dev->driver);
        if (driver->remove) {
                dev_dbg(dev, "remove\n");

                driver->remove(client);
        }

        devres_release_group(&client->dev, client->devres_group_id);

        dev_pm_domain_detach(&client->dev, true);

        dev_pm_clear_wake_irq(&client->dev);
        device_init_wakeup(&client->dev, false);

        client->irq = 0;
        if (client->flags & I2C_CLIENT_HOST_NOTIFY)
                pm_runtime_put(&client->adapter->dev);
}

static void i2c_device_shutdown(struct device *dev)
{
        struct i2c_client *client = i2c_verify_client(dev);
        struct i2c_driver *driver;

        if (!client || !dev->driver)
                return;
        driver = to_i2c_driver(dev->driver);
        if (driver->shutdown)
                driver->shutdown(client);
        else if (client->irq > 0)
                disable_irq(client->irq);
}

static void i2c_client_dev_release(struct device *dev)
{
        kfree(to_i2c_client(dev));
}

static ssize_t
name_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        return sprintf(buf, "%s\n", dev->type == &i2c_client_type ?
                       to_i2c_client(dev)->name : to_i2c_adapter(dev)->name);
}
static DEVICE_ATTR_RO(name);

static ssize_t
modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
{
        struct i2c_client *client = to_i2c_client(dev);
        int len;

        len = of_device_modalias(dev, buf, PAGE_SIZE);
        if (len != -ENODEV)
                return len;

        len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1);
        if (len != -ENODEV)
                return len;

        return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name);
}
static DEVICE_ATTR_RO(modalias);

static struct attribute *i2c_dev_attrs[] = {
        &dev_attr_name.attr,
        /* modalias helps coldplug:  modprobe $(cat .../modalias) */
        &dev_attr_modalias.attr,
        NULL
};
ATTRIBUTE_GROUPS(i2c_dev);

const struct bus_type i2c_bus_type = {
        .name                = "i2c",
        .match                = i2c_device_match,
        .probe                = i2c_device_probe,
        .remove                = i2c_device_remove,
        .shutdown        = i2c_device_shutdown,
};
EXPORT_SYMBOL_GPL(i2c_bus_type);

const struct device_type i2c_client_type = {
        .groups                = i2c_dev_groups,
        .uevent                = i2c_device_uevent,
        .release        = i2c_client_dev_release,
};
EXPORT_SYMBOL_GPL(i2c_client_type);


/**
 * i2c_verify_client - return parameter as i2c_client, or NULL
 * @dev: device, probably from some driver model iterator
 *
 * When traversing the driver model tree, perhaps using driver model
 * iterators like @device_for_each_child(), you can't assume very much
 * about the nodes you find.  Use this function to avoid oopses caused
 * by wrongly treating some non-I2C device as an i2c_client.
 */
struct i2c_client *i2c_verify_client(struct device *dev)
{
        return (dev->type == &i2c_client_type)
                        ? to_i2c_client(dev)
                        : NULL;
}
EXPORT_SYMBOL(i2c_verify_client);


/* Return a unique address which takes the flags of the client into account */
static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
{
        unsigned short addr = client->addr;

        /* For some client flags, add an arbitrary offset to avoid collisions */
        if (client->flags & I2C_CLIENT_TEN)
                addr |= I2C_ADDR_OFFSET_TEN_BIT;

        if (client->flags & I2C_CLIENT_SLAVE)
                addr |= I2C_ADDR_OFFSET_SLAVE;

        return addr;
}

/* This is a permissive address validity check, I2C address map constraints
 * are purposely not enforced, except for the general call address. */
static int i2c_check_addr_validity(unsigned int addr, unsigned short flags)
{
        if (flags & I2C_CLIENT_TEN) {
                /* 10-bit address, all values are valid */
                if (addr > 0x3ff)
                        return -EINVAL;
        } else {
                /* 7-bit address, reject the general call address */
                if (addr == 0x00 || addr > 0x7f)
                        return -EINVAL;
        }
        return 0;
}

/* And this is a strict address validity check, used when probing. If a
 * device uses a reserved address, then it shouldn't be probed. 7-bit
 * addressing is assumed, 10-bit address devices are rare and should be
 * explicitly enumerated. */
int i2c_check_7bit_addr_validity_strict(unsigned short addr)
{
        /*
         * Reserved addresses per I2C specification:
         *  0x00       General call address / START byte
         *  0x01       CBUS address
         *  0x02       Reserved for different bus format
         *  0x03       Reserved for future purposes
         *  0x04-0x07  Hs-mode master code
         *  0x78-0x7b  10-bit slave addressing
         *  0x7c-0x7f  Reserved for future purposes
         */
        if (addr < 0x08 || addr > 0x77)
                return -EINVAL;
        return 0;
}

static int __i2c_check_addr_busy(struct device *dev, void *addrp)
{
        struct i2c_client        *client = i2c_verify_client(dev);
        int                        addr = *(int *)addrp;

        if (client && i2c_encode_flags_to_addr(client) == addr)
                return -EBUSY;
        return 0;
}

/* walk up mux tree */
static int i2c_check_mux_parents(struct i2c_adapter *adapter, int addr)
{
        struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
        int result;

        result = device_for_each_child(&adapter->dev, &addr,
                                        __i2c_check_addr_busy);

        if (!result && parent)
                result = i2c_check_mux_parents(parent, addr);

        return result;
}

/* recurse down mux tree */
static int i2c_check_mux_children(struct device *dev, void *addrp)
{
        int result;

        if (dev->type == &i2c_adapter_type)
                result = device_for_each_child(dev, addrp,
                                                i2c_check_mux_children);
        else
                result = __i2c_check_addr_busy(dev, addrp);

        return result;
}

static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
{
        struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
        int result = 0;

        if (parent)
                result = i2c_check_mux_parents(parent, addr);

        if (!result)
                result = device_for_each_child(&adapter->dev, &addr,
                                                i2c_check_mux_children);

        return result;
}

/**
 * i2c_adapter_lock_bus - Get exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
 *        locks only this branch in the adapter tree
 */
static void i2c_adapter_lock_bus(struct i2c_adapter *adapter,
                                 unsigned int flags)
{
        rt_mutex_lock_nested(&adapter->bus_lock, i2c_adapter_depth(adapter));
}

/**
 * i2c_adapter_trylock_bus - Try to get exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER trylocks the root i2c adapter, I2C_LOCK_SEGMENT
 *        trylocks only this branch in the adapter tree
 */
static int i2c_adapter_trylock_bus(struct i2c_adapter *adapter,
                                   unsigned int flags)
{
        return rt_mutex_trylock(&adapter->bus_lock);
}

/**
 * i2c_adapter_unlock_bus - Release exclusive access to an I2C bus segment
 * @adapter: Target I2C bus segment
 * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
 *        unlocks only this branch in the adapter tree
 */
static void i2c_adapter_unlock_bus(struct i2c_adapter *adapter,
                                   unsigned int flags)
{
        rt_mutex_unlock(&adapter->bus_lock);
}

static void i2c_dev_set_name(struct i2c_adapter *adap,
                             struct i2c_client *client,
                             struct i2c_board_info const *info)
{
        struct acpi_device *adev = ACPI_COMPANION(&client->dev);

        if (info && info->dev_name) {
                dev_set_name(&client->dev, "i2c-%s", info->dev_name);
                return;
        }

        if (adev) {
                dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
                return;
        }

        dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
                     i2c_encode_flags_to_addr(client));
}

int i2c_dev_irq_from_resources(const struct resource *resources,
                               unsigned int num_resources)
{
        struct irq_data *irqd;
        int i;

        for (i = 0; i < num_resources; i++) {
                const struct resource *r = &resources[i];

                if (resource_type(r) != IORESOURCE_IRQ)
                        continue;

                if (r->flags & IORESOURCE_BITS) {
                        irqd = irq_get_irq_data(r->start);
                        if (!irqd)
                                break;

                        irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
                }

                return r->start;
        }

        return 0;
}

/**
 * i2c_new_client_device - instantiate an i2c device
 * @adap: the adapter managing the device
 * @info: describes one I2C device; bus_num is ignored
 * Context: can sleep
 *
 * Create an i2c device. Binding is handled through driver model
 * probe()/remove() methods.  A driver may be bound to this device when we
 * return from this function, or any later moment (e.g. maybe hotplugging will
 * load the driver module).  This call is not appropriate for use by mainboard
 * initialization logic, which usually runs during an arch_initcall() long
 * before any i2c_adapter could exist.
 *
 * This returns the new i2c client, which may be saved for later use with
 * i2c_unregister_device(); or an ERR_PTR to describe the error.
 */
struct i2c_client *
i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
{
        struct i2c_client *client;
        bool need_put = false;
        int status;

        client = kzalloc(sizeof *client, GFP_KERNEL);
        if (!client)
                return ERR_PTR(-ENOMEM);

        client->adapter = adap;

        client->dev.platform_data = info->platform_data;
        client->flags = info->flags;
        client->addr = info->addr;

        client->init_irq = info->irq;
        if (!client->init_irq)
                client->init_irq = i2c_dev_irq_from_resources(info->resources,
                                                         info->num_resources);

        strscpy(client->name, info->type, sizeof(client->name));

        status = i2c_check_addr_validity(client->addr, client->flags);
        if (status) {
                dev_err(&adap->dev, "Invalid %d-bit I2C address 0x%02hx\n",
                        client->flags & I2C_CLIENT_TEN ? 10 : 7, client->addr);
                goto out_err_silent;
        }

        /* Check for address business */
        status = i2c_check_addr_busy(adap, i2c_encode_flags_to_addr(client));
        if (status)
                goto out_err;

        client->dev.parent = &client->adapter->dev;
        client->dev.bus = &i2c_bus_type;
        client->dev.type = &i2c_client_type;
        client->dev.of_node = of_node_get(info->of_node);
        client->dev.fwnode = info->fwnode;

        device_enable_async_suspend(&client->dev);

        if (info->swnode) {
                status = device_add_software_node(&client->dev, info->swnode);
                if (status) {
                        dev_err(&adap->dev,
                                "Failed to add software node to client %s: %d\n",
                                client->name, status);
                        goto out_err_put_of_node;
                }
        }

        i2c_dev_set_name(adap, client, info);
        status = device_register(&client->dev);
        if (status)
                goto out_remove_swnode;

        dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
                client->name, dev_name(&client->dev));

        return client;

out_remove_swnode:
        device_remove_software_node(&client->dev);
        need_put = true;
out_err_put_of_node:
        of_node_put(info->of_node);
out_err:
        dev_err(&adap->dev,
                "Failed to register i2c client %s at 0x%02x (%d)\n",
                client->name, client->addr, status);
out_err_silent:
        if (need_put)
                put_device(&client->dev);
        else
                kfree(client);
        return ERR_PTR(status);
}
EXPORT_SYMBOL_GPL(i2c_new_client_device);

/**
 * i2c_unregister_device - reverse effect of i2c_new_*_device()
 * @client: value returned from i2c_new_*_device()
 * Context: can sleep
 */
void i2c_unregister_device(struct i2c_client *client)
{
        if (IS_ERR_OR_NULL(client))
                return;

        if (client->dev.of_node) {
                of_node_clear_flag(client->dev.of_node, OF_POPULATED);
                of_node_put(client->dev.of_node);
        }

        if (ACPI_COMPANION(&client->dev))
                acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
        device_remove_software_node(&client->dev);
        device_unregister(&client->dev);
}
EXPORT_SYMBOL_GPL(i2c_unregister_device);

/**
 * i2c_find_device_by_fwnode() - find an i2c_client for the fwnode
 * @fwnode: &struct fwnode_handle corresponding to the &struct i2c_client
 *
 * Look up and return the &struct i2c_client corresponding to the @fwnode.
 * If no client can be found, or @fwnode is NULL, this returns NULL.
 *
 * The user must call put_device(&client->dev) once done with the i2c client.
 */
struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode)
{
        struct i2c_client *client;
        struct device *dev;

        if (!fwnode)
                return NULL;

        dev = bus_find_device_by_fwnode(&i2c_bus_type, fwnode);
        if (!dev)
                return NULL;

        client = i2c_verify_client(dev);
        if (!client)
                put_device(dev);

        return client;
}
EXPORT_SYMBOL(i2c_find_device_by_fwnode);


static const struct i2c_device_id dummy_id[] = {
        { "dummy", 0 },
        { },
};

static int dummy_probe(struct i2c_client *client)
{
        return 0;
}

static struct i2c_driver dummy_driver = {
        .driver.name        = "dummy",
        .probe                = dummy_probe,
        .id_table        = dummy_id,
};

/**
 * i2c_new_dummy_device - return a new i2c device bound to a dummy driver
 * @adapter: the adapter managing the device
 * @address: seven bit address to be used
 * Context: can sleep
 *
 * This returns an I2C client bound to the "dummy" driver, intended for use
 * with devices that consume multiple addresses.  Examples of such chips
 * include various EEPROMS (like 24c04 and 24c08 models).
 *
 * These dummy devices have two main uses.  First, most I2C and SMBus calls
 * except i2c_transfer() need a client handle; the dummy will be that handle.
 * And second, this prevents the specified address from being bound to a
 * different driver.
 *
 * This returns the new i2c client, which should be saved for later use with
 * i2c_unregister_device(); or an ERR_PTR to describe the error.
 */
struct i2c_client *i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address)
{
        struct i2c_board_info info = {
                I2C_BOARD_INFO("dummy", address),
        };

        return i2c_new_client_device(adapter, &info);
}
EXPORT_SYMBOL_GPL(i2c_new_dummy_device);

static void devm_i2c_release_dummy(void *client)
{
        i2c_unregister_device(client);
}

/**
 * devm_i2c_new_dummy_device - return a new i2c device bound to a dummy driver
 * @dev: device the managed resource is bound to
 * @adapter: the adapter managing the device
 * @address: seven bit address to be used
 * Context: can sleep
 *
 * This is the device-managed version of @i2c_new_dummy_device. It returns the
 * new i2c client or an ERR_PTR in case of an error.
 */
struct i2c_client *devm_i2c_new_dummy_device(struct device *dev,
                                             struct i2c_adapter *adapter,
                                             u16 address)
{
        struct i2c_client *client;
        int ret;

        client = i2c_new_dummy_device(adapter, address);
        if (IS_ERR(client))
                return client;

        ret = devm_add_action_or_reset(dev, devm_i2c_release_dummy, client);
        if (ret)
                return ERR_PTR(ret);

        return client;
}
EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);

/**
 * i2c_new_ancillary_device - Helper to get the instantiated secondary address
 * and create the associated device
 * @client: Handle to the primary client
 * @name: Handle to specify which secondary address to get
 * @default_addr: Used as a fallback if no secondary address was specified
 * Context: can sleep
 *
 * I2C clients can be composed of multiple I2C slaves bound together in a single
 * component. The I2C client driver then binds to the master I2C slave and needs
 * to create I2C dummy clients to communicate with all the other slaves.
 *
 * This function creates and returns an I2C dummy client whose I2C address is
 * retrieved from the platform firmware based on the given slave name. If no
 * address is specified by the firmware default_addr is used.
 *
 * On DT-based platforms the address is retrieved from the "reg" property entry
 * cell whose "reg-names" value matches the slave name.
 *
 * This returns the new i2c client, which should be saved for later use with
 * i2c_unregister_device(); or an ERR_PTR to describe the error.
 */
struct i2c_client *i2c_new_ancillary_device(struct i2c_client *client,
                                                const char *name,
                                                u16 default_addr)
{
        struct device_node *np = client->dev.of_node;
        u32 addr = default_addr;
        int i;

        if (np) {
                i = of_property_match_string(np, "reg-names", name);
                if (i >= 0)
                        of_property_read_u32_index(np, "reg", i, &addr);
        }

        dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
        return i2c_new_dummy_device(client->adapter, addr);
}
EXPORT_SYMBOL_GPL(i2c_new_ancillary_device);

/* ------------------------------------------------------------------------- */

/* I2C bus adapters -- one roots each I2C or SMBUS segment */

static void i2c_adapter_dev_release(struct device *dev)
{
        struct i2c_adapter *adap = to_i2c_adapter(dev);
        complete(&adap->dev_released);
}

unsigned int i2c_adapter_depth(struct i2c_adapter *adapter)
{
        unsigned int depth = 0;
        struct device *parent;

        for (parent = adapter->dev.parent; parent; parent = parent->parent)
                if (parent->type == &i2c_adapter_type)
                        depth++;

        WARN_ONCE(depth >= MAX_LOCKDEP_SUBCLASSES,
                  "adapter depth exceeds lockdep subclass limit\n");

        return depth;
}
EXPORT_SYMBOL_GPL(i2c_adapter_depth);

/*
 * Let users instantiate I2C devices through sysfs. This can be used when
 * platform initialization code doesn't contain the proper data for
 * whatever reason. Also useful for drivers that do device detection and
 * detection fails, either because the device uses an unexpected address,
 * or this is a compatible device with different ID register values.
 *
 * Parameter checking may look overzealous, but we really don't want
 * the user to provide incorrect parameters.
 */
static ssize_t
new_device_store(struct device *dev, struct device_attribute *attr,
                 const char *buf, size_t count)
{
        struct i2c_adapter *adap = to_i2c_adapter(dev);
        struct i2c_board_info info;
        struct i2c_client *client;
        char *blank, end;
        int res;

        memset(&info, 0, sizeof(struct i2c_board_info));

        blank = strchr(buf, ' ');
        if (!blank) {
                dev_err(dev, "%s: Missing parameters\n", "new_device");
                return -EINVAL;
        }
        if (blank - buf > I2C_NAME_SIZE - 1) {
                dev_err(dev, "%s: Invalid device name\n", "new_device");
                return -EINVAL;
        }
        memcpy(info.type, buf, blank - buf);

        /* Parse remaining parameters, reject extra parameters */
        res = sscanf(++blank, "%hi%c", &info.addr, &end);
        if (res < 1) {
                dev_err(dev, "%s: Can't parse I2C address\n", "new_device");
                return -EINVAL;
        }
        if (res > 1  && end != '\n') {
                dev_err(dev, "%s: Extra parameters\n", "new_device");
                return -EINVAL;
        }

        if ((info.addr & I2C_ADDR_OFFSET_TEN_BIT) == I2C_ADDR_OFFSET_TEN_BIT) {
                info.addr &= ~I2C_ADDR_OFFSET_TEN_BIT;
                info.flags |= I2C_CLIENT_TEN;
        }

        if (info.addr & I2C_ADDR_OFFSET_SLAVE) {
                info.addr &= ~I2C_ADDR_OFFSET_SLAVE;
                info.flags |= I2C_CLIENT_SLAVE;
        }

        client = i2c_new_client_device(adap, &info);
        if (IS_ERR(client))
                return PTR_ERR(client);

        /* Keep track of the added device */
        mutex_lock(&adap->userspace_clients_lock);
        list_add_tail(&client->detected, &adap->userspace_clients);
        mutex_unlock(&adap->userspace_clients_lock);
        dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
                 info.type, info.addr);

        return count;
}
static DEVICE_ATTR_WO(new_device);

/*
 * And of course let the users delete the devices they instantiated, if
 * they got it wrong. This interface can only be used to delete devices
 * instantiated by i2c_sysfs_new_device above. This guarantees that we
 * don't delete devices to which some kernel code still has references.
 *
 * Parameter checking may look overzealous, but we really don't want
 * the user to delete the wrong device.
 */
static ssize_t
delete_device_store(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
{
        struct i2c_adapter *adap = to_i2c_adapter(dev);
        struct i2c_client *client, *next;
        unsigned short addr;
        char end;
        int res;

        /* Parse parameters, reject extra parameters */
        res = sscanf(buf, "%hi%c", &addr, &end);
        if (res < 1) {
                dev_err(dev, "%s: Can't parse I2C address\n", "delete_device");
                return -EINVAL;
        }
        if (res > 1  && end != '\n') {
                dev_err(dev, "%s: Extra parameters\n", "delete_device");
                return -EINVAL;
        }

        /* Make sure the device was added through sysfs */
        res = -ENOENT;
        mutex_lock_nested(&adap->userspace_clients_lock,
                          i2c_adapter_depth(adap));
        list_for_each_entry_safe(client, next, &adap->userspace_clients,
                                 detected) {
                if (i2c_encode_flags_to_addr(client) == addr) {
                        dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
                                 "delete_device", client->name, client->addr);

                        list_del(&client->detected);
                        i2c_unregister_device(client);
                        res = count;
                        break;
                }
        }
        mutex_unlock(&adap->userspace_clients_lock);

        if (res < 0)
                dev_err(dev, "%s: Can't find device in list\n",
                        "delete_device");
        return res;
}
static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL,
                                  delete_device_store);

static struct attribute *i2c_adapter_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_new_device.attr,
        &dev_attr_delete_device.attr,
        NULL
};
ATTRIBUTE_GROUPS(i2c_adapter);

const struct device_type i2c_adapter_type = {
        .groups                = i2c_adapter_groups,
        .release        = i2c_adapter_dev_release,
};
EXPORT_SYMBOL_GPL(i2c_adapter_type);

/**
 * i2c_verify_adapter - return parameter as i2c_adapter or NULL
 * @dev: device, probably from some driver model iterator
 *
 * When traversing the driver model tree, perhaps using driver model
 * iterators like @device_for_each_child(), you can't assume very much
 * about the nodes you find.  Use this function to avoid oopses caused
 * by wrongly treating some non-I2C device as an i2c_adapter.
 */
struct i2c_adapter *i2c_verify_adapter(struct device *dev)
{
        return (dev->type == &i2c_adapter_type)
                        ? to_i2c_adapter(dev)
                        : NULL;
}
EXPORT_SYMBOL(i2c_verify_adapter);

#ifdef CONFIG_I2C_COMPAT
static struct class_compat *i2c_adapter_compat_class;
#endif

static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
{
        struct i2c_devinfo        *devinfo;

        down_read(&__i2c_board_lock);
        list_for_each_entry(devinfo, &__i2c_board_list, list) {
                if (devinfo->busnum == adapter->nr &&
                    IS_ERR(i2c_new_client_device(adapter, &devinfo->board_info)))
                        dev_err(&adapter->dev,
                                "Can't create device at 0x%02x\n",
                                devinfo->board_info.addr);
        }
        up_read(&__i2c_board_lock);
}

static int i2c_do_add_adapter(struct i2c_driver *driver,
                              struct i2c_adapter *adap)
{
        /* Detect supported devices on that bus, and instantiate them */
        i2c_detect(adap, driver);

        return 0;
}

static int __process_new_adapter(struct device_driver *d, void *data)
{
        return i2c_do_add_adapter(to_i2c_driver(d), data);
}

static const struct i2c_lock_operations i2c_adapter_lock_ops = {
        .lock_bus =    i2c_adapter_lock_bus,
        .trylock_bus = i2c_adapter_trylock_bus,
        .unlock_bus =  i2c_adapter_unlock_bus,
};

static void i2c_host_notify_irq_teardown(struct i2c_adapter *adap)
{
        struct irq_domain *domain = adap->host_notify_domain;
        irq_hw_number_t hwirq;

        if (!domain)
                return;

        for (hwirq = 0 ; hwirq < I2C_ADDR_7BITS_COUNT ; hwirq++)
                irq_dispose_mapping(irq_find_mapping(domain, hwirq));

        irq_domain_remove(domain);
        adap->host_notify_domain = NULL;
}

static int i2c_host_notify_irq_map(struct irq_domain *h,
                                          unsigned int virq,
                                          irq_hw_number_t hw_irq_num)
{
        irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);

        return 0;
}

static const struct irq_domain_ops i2c_host_notify_irq_ops = {
        .map = i2c_host_notify_irq_map,
};

static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap)
{
        struct irq_domain *domain;

        if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY))
                return 0;

        domain = irq_domain_create_linear(adap->dev.parent->fwnode,
                                          I2C_ADDR_7BITS_COUNT,
                                          &i2c_host_notify_irq_ops, adap);
        if (!domain)
                return -ENOMEM;

        adap->host_notify_domain = domain;

        return 0;
}

/**
 * i2c_handle_smbus_host_notify - Forward a Host Notify event to the correct
 * I2C client.
 * @adap: the adapter
 * @addr: the I2C address of the notifying device
 * Context: can't sleep
 *
 * Helper function to be called from an I2C bus driver's interrupt
 * handler. It will schedule the Host Notify IRQ.
 */
int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
{
        int irq;

        if (!adap)
                return -EINVAL;

        irq = irq_find_mapping(adap->host_notify_domain, addr);
        if (irq <= 0)
                return -ENXIO;

        generic_handle_irq_safe(irq);

        return 0;
}
EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);

static int i2c_register_adapter(struct i2c_adapter *adap)
{
        int res = -EINVAL;

        /* Can't register until after driver model init */
        if (WARN_ON(!is_registered)) {
                res = -EAGAIN;
                goto out_list;
        }

        /* Sanity checks */
        if (WARN(!adap->name[0], "i2c adapter has no name"))
                goto out_list;

        if (!adap->algo) {
                pr_err("adapter '%s': no algo supplied!\n", adap->name);
                goto out_list;
        }

        if (!adap->lock_ops)
                adap->lock_ops = &i2c_adapter_lock_ops;

        adap->locked_flags = 0;
        rt_mutex_init(&adap->bus_lock);
        rt_mutex_init(&adap->mux_lock);
        mutex_init(&adap->userspace_clients_lock);
        INIT_LIST_HEAD(&adap->userspace_clients);

        /* Set default timeout to 1 second if not already set */
        if (adap->timeout == 0)
                adap->timeout = HZ;

        /* register soft irqs for Host Notify */
        res = i2c_setup_host_notify_irq_domain(adap);
        if (res) {
                pr_err("adapter '%s': can't create Host Notify IRQs (%d)\n",
                       adap->name, res);
                goto out_list;
        }

        dev_set_name(&adap->dev, "i2c-%d", adap->nr);
        adap->dev.bus = &i2c_bus_type;
        adap->dev.type = &i2c_adapter_type;
        res = device_register(&adap->dev);
        if (res) {
                pr_err("adapter '%s': can't register device (%d)\n", adap->name, res);
                goto out_list;
        }

        adap->debugfs = debugfs_create_dir(dev_name(&adap->dev), i2c_debugfs_root);

        res = i2c_setup_smbus_alert(adap);
        if (res)
                goto out_reg;

        device_enable_async_suspend(&adap->dev);
        pm_runtime_no_callbacks(&adap->dev);
        pm_suspend_ignore_children(&adap->dev, true);
        pm_runtime_enable(&adap->dev);

        res = i2c_init_recovery(adap);
        if (res == -EPROBE_DEFER)
                goto out_reg;

        dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);

#ifdef CONFIG_I2C_COMPAT
        res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
                                       adap->dev.parent);
        if (res)
                dev_warn(&adap->dev,
                         "Failed to create compatibility class link\n");
#endif

        /* create pre-declared device nodes */
        of_i2c_register_devices(adap);
        i2c_acpi_install_space_handler(adap);
        i2c_acpi_register_devices(adap);

        if (adap->nr < __i2c_first_dynamic_bus_num)
                i2c_scan_static_board_info(adap);

        /* Notify drivers */
        mutex_lock(&core_lock);
        bus_for_each_drv(&i2c_bus_type, NULL, adap, __process_new_adapter);
        mutex_unlock(&core_lock);

        return 0;

out_reg:
        debugfs_remove_recursive(adap->debugfs);
        init_completion(&adap->dev_released);
        device_unregister(&adap->dev);
        wait_for_completion(&adap->dev_released);
out_list:
        mutex_lock(&core_lock);
        idr_remove(&i2c_adapter_idr, adap->nr);
        mutex_unlock(&core_lock);
        return res;
}

/**
 * __i2c_add_numbered_adapter - i2c_add_numbered_adapter where nr is never -1
 * @adap: the adapter to register (with adap->nr initialized)
 * Context: can sleep
 *
 * See i2c_add_numbered_adapter() for details.
 */
static int __i2c_add_numbered_adapter(struct i2c_adapter *adap)
{
        int id;

        mutex_lock(&core_lock);
        id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, GFP_KERNEL);
        mutex_unlock(&core_lock);
        if (WARN(id < 0, "couldn't get idr"))
                return id == -ENOSPC ? -EBUSY : id;

        return i2c_register_adapter(adap);
}

/**
 * i2c_add_adapter - declare i2c adapter, use dynamic bus number
 * @adapter: the adapter to add
 * Context: can sleep
 *
 * This routine is used to declare an I2C adapter when its bus number
 * doesn't matter or when its bus number is specified by an dt alias.
 * Examples of bases when the bus number doesn't matter: I2C adapters
 * dynamically added by USB links or PCI plugin cards.
 *
 * When this returns zero, a new bus number was allocated and stored
 * in adap->nr, and the specified adapter became available for clients.
 * Otherwise, a negative errno value is returned.
 */
int i2c_add_adapter(struct i2c_adapter *adapter)
{
        struct device *dev = &adapter->dev;
        int id;

        if (dev->of_node) {
                id = of_alias_get_id(dev->of_node, "i2c");
                if (id >= 0) {
                        adapter->nr = id;
                        return __i2c_add_numbered_adapter(adapter);
                }
        }

        mutex_lock(&core_lock);
        id = idr_alloc(&i2c_adapter_idr, adapter,
                       __i2c_first_dynamic_bus_num, 0, GFP_KERNEL);
        mutex_unlock(&core_lock);
        if (WARN(id < 0, "couldn't get idr"))
                return id;

        adapter->nr = id;

        return i2c_register_adapter(adapter);
}
EXPORT_SYMBOL(i2c_add_adapter);

/**
 * i2c_add_numbered_adapter - declare i2c adapter, use static bus number
 * @adap: the adapter to register (with adap->nr initialized)
 * Context: can sleep
 *
 * This routine is used to declare an I2C adapter when its bus number
 * matters.  For example, use it for I2C adapters from system-on-chip CPUs,
 * or otherwise built in to the system's mainboard, and where i2c_board_info
 * is used to properly configure I2C devices.
 *
 * If the requested bus number is set to -1, then this function will behave
 * identically to i2c_add_adapter, and will dynamically assign a bus number.
 *
 * If no devices have pre-been declared for this bus, then be sure to
 * register the adapter before any dynamically allocated ones.  Otherwise
 * the required bus ID may not be available.
 *
 * When this returns zero, the specified adapter became available for
 * clients using the bus number provided in adap->nr.  Also, the table
 * of I2C devices pre-declared using i2c_register_board_info() is scanned,
 * and the appropriate driver model device nodes are created.  Otherwise, a
 * negative errno value is returned.
 */
int i2c_add_numbered_adapter(struct i2c_adapter *adap)
{
        if (adap->nr == -1) /* -1 means dynamically assign bus id */
                return i2c_add_adapter(adap);

        return __i2c_add_numbered_adapter(adap);
}
EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);

static void i2c_do_del_adapter(struct i2c_driver *driver,
                              struct i2c_adapter *adapter)
{
        struct i2c_client *client, *_n;

        /* Remove the devices we created ourselves as the result of hardware
         * probing (using a driver's detect method) */
        list_for_each_entry_safe(client, _n, &driver->clients, detected) {
                if (client->adapter == adapter) {
                        dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
                                client->name, client->addr);
                        list_del(&client->detected);
                        i2c_unregister_device(client);
                }
        }
}

static int __unregister_client(struct device *dev, void *dummy)
{
        struct i2c_client *client = i2c_verify_client(dev);
        if (client && strcmp(client->name, "dummy"))
                i2c_unregister_device(client);
        return 0;
}

static int __unregister_dummy(struct device *dev, void *dummy)
{
        struct i2c_client *client = i2c_verify_client(dev);
        i2c_unregister_device(client);
        return 0;
}

static int __process_removed_adapter(struct device_driver *d, void *data)
{
        i2c_do_del_adapter(to_i2c_driver(d), data);
        return 0;
}

/**
 * i2c_del_adapter - unregister I2C adapter
 * @adap: the adapter being unregistered
 * Context: can sleep
 *
 * This unregisters an I2C adapter which was previously registered
 * by @i2c_add_adapter or @i2c_add_numbered_adapter.
 */
void i2c_del_adapter(struct i2c_adapter *adap)
{
        struct i2c_adapter *found;
        struct i2c_client *client, *next;

        /* First make sure that this adapter was ever added */
        mutex_lock(&core_lock);
        found = idr_find(&i2c_adapter_idr, adap->nr);
        mutex_unlock(&core_lock);
        if (found != adap) {
                pr_debug("attempting to delete unregistered adapter [%s]\n", adap->name);
                return;
        }

        i2c_acpi_remove_space_handler(adap);
        /* Tell drivers about this removal */
        mutex_lock(&core_lock);
        bus_for_each_drv(&i2c_bus_type, NULL, adap,
                               __process_removed_adapter);
        mutex_unlock(&core_lock);

        /* Remove devices instantiated from sysfs */
        mutex_lock_nested(&adap->userspace_clients_lock,
                          i2c_adapter_depth(adap));
        list_for_each_entry_safe(client, next, &adap->userspace_clients,
                                 detected) {
                dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
                        client->addr);
                list_del(&client->detected);
                i2c_unregister_device(client);
        }
        mutex_unlock(&adap->userspace_clients_lock);

        /* Detach any active clients. This can't fail, thus we do not
         * check the returned value. This is a two-pass process, because
         * we can't remove the dummy devices during the first pass: they
         * could have been instantiated by real devices wishing to clean
         * them up properly, so we give them a chance to do that first. */
        device_for_each_child(&adap->dev, NULL, __unregister_client);
        device_for_each_child(&adap->dev, NULL, __unregister_dummy);

#ifdef CONFIG_I2C_COMPAT
        class_compat_remove_link(i2c_adapter_compat_class, &adap->dev,
                                 adap->dev.parent);
#endif

        /* device name is gone after device_unregister */
        dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name);

        pm_runtime_disable(&adap->dev);

        i2c_host_notify_irq_teardown(adap);

        debugfs_remove_recursive(adap->debugfs);

        /* wait until all references to the device are gone
         *
         * FIXME: This is old code and should ideally be replaced by an
         * alternative which results in decoupling the lifetime of the struct
         * device from the i2c_adapter, like spi or netdev do. Any solution
         * should be thoroughly tested with DEBUG_KOBJECT_RELEASE enabled!
         */
        init_completion(&adap->dev_released);
        device_unregister(&adap->dev);
        wait_for_completion(&adap->dev_released);

        /* free bus id */
        mutex_lock(&core_lock);
        idr_remove(&i2c_adapter_idr, adap->nr);
        mutex_unlock(&core_lock);

        /* Clear the device structure in case this adapter is ever going to be
           added again */
        memset(&adap->dev, 0, sizeof(adap->dev));
}
EXPORT_SYMBOL(i2c_del_adapter);

static void devm_i2c_del_adapter(void *adapter)
{
        i2c_del_adapter(adapter);
}

/**
 * devm_i2c_add_adapter - device-managed variant of i2c_add_adapter()
 * @dev: managing device for adding this I2C adapter
 * @adapter: the adapter to add
 * Context: can sleep
 *
 * Add adapter with dynamic bus number, same with i2c_add_adapter()
 * but the adapter will be auto deleted on driver detach.
 */
int devm_i2c_add_adapter(struct device *dev, struct i2c_adapter *adapter)
{
        int ret;

        ret = i2c_add_adapter(adapter);
        if (ret)
                return ret;

        return devm_add_action_or_reset(dev, devm_i2c_del_adapter, adapter);
}
EXPORT_SYMBOL_GPL(devm_i2c_add_adapter);

static int i2c_dev_or_parent_fwnode_match(struct device *dev, const void *data)
{
        if (dev_fwnode(dev) == data)
                return 1;

        if (dev->parent && dev_fwnode(dev->parent) == data)
                return 1;

        return 0;
}

/**
 * i2c_find_adapter_by_fwnode() - find an i2c_adapter for the fwnode
 * @fwnode: &struct fwnode_handle corresponding to the &struct i2c_adapter
 *
 * Look up and return the &struct i2c_adapter corresponding to the @fwnode.
 * If no adapter can be found, or @fwnode is NULL, this returns NULL.
 *
 * The user must call put_device(&adapter->dev) once done with the i2c adapter.
 */
struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode)
{
        struct i2c_adapter *adapter;
        struct device *dev;

        if (!fwnode)
                return NULL;

        dev = bus_find_device(&i2c_bus_type, NULL, fwnode,
                              i2c_dev_or_parent_fwnode_match);
        if (!dev)
                return NULL;

        adapter = i2c_verify_adapter(dev);
        if (!adapter)
                put_device(dev);

        return adapter;
}
EXPORT_SYMBOL(i2c_find_adapter_by_fwnode);

/**
 * i2c_get_adapter_by_fwnode() - find an i2c_adapter for the fwnode
 * @fwnode: &struct fwnode_handle corresponding to the &struct i2c_adapter
 *
 * Look up and return the &struct i2c_adapter corresponding to the @fwnode,
 * and increment the adapter module's use count. If no adapter can be found,
 * or @fwnode is NULL, this returns NULL.
 *
 * The user must call i2c_put_adapter(adapter) once done with the i2c adapter.
 * Note that this is different from i2c_find_adapter_by_node().
 */
struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode)
{
        struct i2c_adapter *adapter;

        adapter = i2c_find_adapter_by_fwnode(fwnode);
        if (!adapter)
                return NULL;

        if (!try_module_get(adapter->owner)) {
                put_device(&adapter->dev);
                adapter = NULL;
        }

        return adapter;
}
EXPORT_SYMBOL(i2c_get_adapter_by_fwnode);

static void i2c_parse_timing(struct device *dev, char *prop_name, u32 *cur_val_p,
                            u32 def_val, bool use_def)
{
        int ret;

        ret = device_property_read_u32(dev, prop_name, cur_val_p);
        if (ret && use_def)
                *cur_val_p = def_val;

        dev_dbg(dev, "%s: %u\n", prop_name, *cur_val_p);
}

/**
 * i2c_parse_fw_timings - get I2C related timing parameters from firmware
 * @dev: The device to scan for I2C timing properties
 * @t: the i2c_timings struct to be filled with values
 * @use_defaults: bool to use sane defaults derived from the I2C specification
 *                  when properties are not found, otherwise don't update
 *
 * Scan the device for the generic I2C properties describing timing parameters
 * for the signal and fill the given struct with the results. If a property was
 * not found and use_defaults was true, then maximum timings are assumed which
 * are derived from the I2C specification. If use_defaults is not used, the
 * results will be as before, so drivers can apply their own defaults before
 * calling this helper. The latter is mainly intended for avoiding regressions
 * of existing drivers which want to switch to this function. New drivers
 * almost always should use the defaults.
 */
void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults)
{
        bool u = use_defaults;
        u32 d;

        i2c_parse_timing(dev, "clock-frequency", &t->bus_freq_hz,
                         I2C_MAX_STANDARD_MODE_FREQ, u);

        d = t->bus_freq_hz <= I2C_MAX_STANDARD_MODE_FREQ ? 1000 :
            t->bus_freq_hz <= I2C_MAX_FAST_MODE_FREQ ? 300 : 120;
        i2c_parse_timing(dev, "i2c-scl-rising-time-ns", &t->scl_rise_ns, d, u);

        d = t->bus_freq_hz <= I2C_MAX_FAST_MODE_FREQ ? 300 : 120;
        i2c_parse_timing(dev, "i2c-scl-falling-time-ns", &t->scl_fall_ns, d, u);

        i2c_parse_timing(dev, "i2c-scl-internal-delay-ns",
                         &t->scl_int_delay_ns, 0, u);
        i2c_parse_timing(dev, "i2c-sda-falling-time-ns", &t->sda_fall_ns,
                         t->scl_fall_ns, u);
        i2c_parse_timing(dev, "i2c-sda-hold-time-ns", &t->sda_hold_ns, 0, u);
        i2c_parse_timing(dev, "i2c-digital-filter-width-ns",
                         &t->digital_filter_width_ns, 0, u);
        i2c_parse_timing(dev, "i2c-analog-filter-cutoff-frequency",
                         &t->analog_filter_cutoff_freq_hz, 0, u);
}
EXPORT_SYMBOL_GPL(i2c_parse_fw_timings);

/* ------------------------------------------------------------------------- */

int i2c_for_each_dev(void *data, int (*fn)(struct device *dev, void *data))
{
        int res;

        mutex_lock(&core_lock);
        res = bus_for_each_dev(&i2c_bus_type, NULL, data, fn);
        mutex_unlock(&core_lock);

        return res;
}
EXPORT_SYMBOL_GPL(i2c_for_each_dev);

static int __process_new_driver(struct device *dev, void *data)
{
        if (dev->type != &i2c_adapter_type)
                return 0;
        return i2c_do_add_adapter(data, to_i2c_adapter(dev));
}

/*
 * An i2c_driver is used with one or more i2c_client (device) nodes to access
 * i2c slave chips, on a bus instance associated with some i2c_adapter.
 */

int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
{
        int res;

        /* Can't register until after driver model init */
        if (WARN_ON(!is_registered))
                return -EAGAIN;

        /* add the driver to the list of i2c drivers in the driver core */
        driver->driver.owner = owner;
        driver->driver.bus = &i2c_bus_type;
        INIT_LIST_HEAD(&driver->clients);

        /* When registration returns, the driver core
         * will have called probe() for all matching-but-unbound devices.
         */
        res = driver_register(&driver->driver);
        if (res)
                return res;

        pr_debug("driver [%s] registered\n", driver->driver.name);

        /* Walk the adapters that are already present */
        i2c_for_each_dev(driver, __process_new_driver);

        return 0;
}
EXPORT_SYMBOL(i2c_register_driver);

static int __process_removed_driver(struct device *dev, void *data)
{
        if (dev->type == &i2c_adapter_type)
                i2c_do_del_adapter(data, to_i2c_adapter(dev));
        return 0;
}

/**
 * i2c_del_driver - unregister I2C driver
 * @driver: the driver being unregistered
 * Context: can sleep
 */
void i2c_del_driver(struct i2c_driver *driver)
{
        i2c_for_each_dev(driver, __process_removed_driver);

        driver_unregister(&driver->driver);
        pr_debug("driver [%s] unregistered\n", driver->driver.name);
}
EXPORT_SYMBOL(i2c_del_driver);

/* ------------------------------------------------------------------------- */

struct i2c_cmd_arg {
        unsigned        cmd;
        void                *arg;
};

static int i2c_cmd(struct device *dev, void *_arg)
{
        struct i2c_client        *client = i2c_verify_client(dev);
        struct i2c_cmd_arg        *arg = _arg;
        struct i2c_driver        *driver;

        if (!client || !client->dev.driver)
                return 0;

        driver = to_i2c_driver(client->dev.driver);
        if (driver->command)
                driver->command(client, arg->cmd, arg->arg);
        return 0;
}

void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg)
{
        struct i2c_cmd_arg        cmd_arg;

        cmd_arg.cmd = cmd;
        cmd_arg.arg = arg;
        device_for_each_child(&adap->dev, &cmd_arg, i2c_cmd);
}
EXPORT_SYMBOL(i2c_clients_command);

static int __init i2c_init(void)
{
        int retval;

        retval = of_alias_get_highest_id("i2c");

        down_write(&__i2c_board_lock);
        if (retval >= __i2c_first_dynamic_bus_num)
                __i2c_first_dynamic_bus_num = retval + 1;
        up_write(&__i2c_board_lock);

        retval = bus_register(&i2c_bus_type);
        if (retval)
                return retval;

        is_registered = true;

        i2c_debugfs_root = debugfs_create_dir("i2c", NULL);

#ifdef CONFIG_I2C_COMPAT
        i2c_adapter_compat_class = class_compat_register("i2c-adapter");
        if (!i2c_adapter_compat_class) {
                retval = -ENOMEM;
                goto bus_err;
        }
#endif
        retval = i2c_add_driver(&dummy_driver);
        if (retval)
                goto class_err;

        if (IS_ENABLED(CONFIG_OF_DYNAMIC))
                WARN_ON(of_reconfig_notifier_register(&i2c_of_notifier));
        if (IS_ENABLED(CONFIG_ACPI))
                WARN_ON(acpi_reconfig_notifier_register(&i2c_acpi_notifier));

        return 0;

class_err:
#ifdef CONFIG_I2C_COMPAT
        class_compat_unregister(i2c_adapter_compat_class);
bus_err:
#endif
        is_registered = false;
        bus_unregister(&i2c_bus_type);
        return retval;
}

static void __exit i2c_exit(void)
{
        if (IS_ENABLED(CONFIG_ACPI))
                WARN_ON(acpi_reconfig_notifier_unregister(&i2c_acpi_notifier));
        if (IS_ENABLED(CONFIG_OF_DYNAMIC))
                WARN_ON(of_reconfig_notifier_unregister(&i2c_of_notifier));
        i2c_del_driver(&dummy_driver);
#ifdef CONFIG_I2C_COMPAT
        class_compat_unregister(i2c_adapter_compat_class);
#endif
        debugfs_remove_recursive(i2c_debugfs_root);
        bus_unregister(&i2c_bus_type);
        tracepoint_synchronize_unregister();
}

/* We must initialize early, because some subsystems register i2c drivers
 * in subsys_initcall() code, but are linked (and initialized) before i2c.
 */
postcore_initcall(i2c_init);
module_exit(i2c_exit);

/* ----------------------------------------------------
 * the functional interface to the i2c busses.
 * ----------------------------------------------------
 */

/* Check if val is exceeding the quirk IFF quirk is non 0 */
#define i2c_quirk_exceeded(val, quirk) ((quirk) && ((val) > (quirk)))

static int i2c_quirk_error(struct i2c_adapter *adap, struct i2c_msg *msg, char *err_msg)
{
        dev_err_ratelimited(&adap->dev, "adapter quirk: %s (addr 0x%04x, size %u, %s)\n",
                            err_msg, msg->addr, msg->len,
                            msg->flags & I2C_M_RD ? "read" : "write");
        return -EOPNOTSUPP;
}

static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
        const struct i2c_adapter_quirks *q = adap->quirks;
        int max_num = q->max_num_msgs, i;
        bool do_len_check = true;

        if (q->flags & I2C_AQ_COMB) {
                max_num = 2;

                /* special checks for combined messages */
                if (num == 2) {
                        if (q->flags & I2C_AQ_COMB_WRITE_FIRST && msgs[0].flags & I2C_M_RD)
                                return i2c_quirk_error(adap, &msgs[0], "1st comb msg must be write");

                        if (q->flags & I2C_AQ_COMB_READ_SECOND && !(msgs[1].flags & I2C_M_RD))
                                return i2c_quirk_error(adap, &msgs[1], "2nd comb msg must be read");

                        if (q->flags & I2C_AQ_COMB_SAME_ADDR && msgs[0].addr != msgs[1].addr)
                                return i2c_quirk_error(adap, &msgs[0], "comb msg only to same addr");

                        if (i2c_quirk_exceeded(msgs[0].len, q->max_comb_1st_msg_len))
                                return i2c_quirk_error(adap, &msgs[0], "msg too long");

                        if (i2c_quirk_exceeded(msgs[1].len, q->max_comb_2nd_msg_len))
                                return i2c_quirk_error(adap, &msgs[1], "msg too long");

                        do_len_check = false;
                }
        }

        if (i2c_quirk_exceeded(num, max_num))
                return i2c_quirk_error(adap, &msgs[0], "too many messages");

        for (i = 0; i < num; i++) {
                u16 len = msgs[i].len;

                if (msgs[i].flags & I2C_M_RD) {
                        if (do_len_check && i2c_quirk_exceeded(len, q->max_read_len))
                                return i2c_quirk_error(adap, &msgs[i], "msg too long");

                        if (q->flags & I2C_AQ_NO_ZERO_LEN_READ && len == 0)
                                return i2c_quirk_error(adap, &msgs[i], "no zero length");
                } else {
                        if (do_len_check && i2c_quirk_exceeded(len, q->max_write_len))
                                return i2c_quirk_error(adap, &msgs[i], "msg too long");

                        if (q->flags & I2C_AQ_NO_ZERO_LEN_WRITE && len == 0)
                                return i2c_quirk_error(adap, &msgs[i], "no zero length");
                }
        }

        return 0;
}

/**
 * __i2c_transfer - unlocked flavor of i2c_transfer
 * @adap: Handle to I2C bus
 * @msgs: One or more messages to execute before STOP is issued to
 *        terminate the operation; each message begins with a START.
 * @num: Number of messages to be executed.
 *
 * Returns negative errno, else the number of messages executed.
 *
 * Adapter lock must be held when calling this function. No debug logging
 * takes place.
 */
int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
        unsigned long orig_jiffies;
        int ret, try;

        if (!adap->algo->master_xfer) {
                dev_dbg(&adap->dev, "I2C level transfers not supported\n");
                return -EOPNOTSUPP;
        }

        if (WARN_ON(!msgs || num < 1))
                return -EINVAL;

        ret = __i2c_check_suspended(adap);
        if (ret)
                return ret;

        if (adap->quirks && i2c_check_for_quirks(adap, msgs, num))
                return -EOPNOTSUPP;

        /*
         * i2c_trace_msg_key gets enabled when tracepoint i2c_transfer gets
         * enabled.  This is an efficient way of keeping the for-loop from
         * being executed when not needed.
         */
        if (static_branch_unlikely(&i2c_trace_msg_key)) {
                int i;
                for (i = 0; i < num; i++)
                        if (msgs[i].flags & I2C_M_RD)
                                trace_i2c_read(adap, &msgs[i], i);
                        else
                                trace_i2c_write(adap, &msgs[i], i);
        }

        /* Retry automatically on arbitration loss */
        orig_jiffies = jiffies;
        for (ret = 0, try = 0; try <= adap->retries; try++) {
                if (i2c_in_atomic_xfer_mode() && adap->algo->master_xfer_atomic)
                        ret = adap->algo->master_xfer_atomic(adap, msgs, num);
                else
                        ret = adap->algo->master_xfer(adap, msgs, num);

                if (ret != -EAGAIN)
                        break;
                if (time_after(jiffies, orig_jiffies + adap->timeout))
                        break;
        }

        if (static_branch_unlikely(&i2c_trace_msg_key)) {
                int i;
                for (i = 0; i < ret; i++)
                        if (msgs[i].flags & I2C_M_RD)
                                trace_i2c_reply(adap, &msgs[i], i);
                trace_i2c_result(adap, num, ret);
        }

        return ret;
}
EXPORT_SYMBOL(__i2c_transfer);

/**
 * i2c_transfer - execute a single or combined I2C message
 * @adap: Handle to I2C bus
 * @msgs: One or more messages to execute before STOP is issued to
 *        terminate the operation; each message begins with a START.
 * @num: Number of messages to be executed.
 *
 * Returns negative errno, else the number of messages executed.
 *
 * Note that there is no requirement that each message be sent to
 * the same slave address, although that is the most common model.
 */
int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
        int ret;

        /* REVISIT the fault reporting model here is weak:
         *
         *  - When we get an error after receiving N bytes from a slave,
         *    there is no way to report "N".
         *
         *  - When we get a NAK after transmitting N bytes to a slave,
         *    there is no way to report "N" ... or to let the master
         *    continue executing the rest of this combined message, if
         *    that's the appropriate response.
         *
         *  - When for example "num" is two and we successfully complete
         *    the first message but get an error part way through the
         *    second, it's unclear whether that should be reported as
         *    one (discarding status on the second message) or errno
         *    (discarding status on the first one).
         */
        ret = __i2c_lock_bus_helper(adap);
        if (ret)
                return ret;

        ret = __i2c_transfer(adap, msgs, num);
        i2c_unlock_bus(adap, I2C_LOCK_SEGMENT);

        return ret;
}
EXPORT_SYMBOL(i2c_transfer);

/**
 * i2c_transfer_buffer_flags - issue a single I2C message transferring data
 *                               to/from a buffer
 * @client: Handle to slave device
 * @buf: Where the data is stored
 * @count: How many bytes to transfer, must be less than 64k since msg.len is u16
 * @flags: The flags to be used for the message, e.g. I2C_M_RD for reads
 *
 * Returns negative errno, or else the number of bytes transferred.
 */
int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf,
                              int count, u16 flags)
{
        int ret;
        struct i2c_msg msg = {
                .addr = client->addr,
                .flags = flags | (client->flags & I2C_M_TEN),
                .len = count,
                .buf = buf,
        };

        ret = i2c_transfer(client->adapter, &msg, 1);

        /*
         * If everything went ok (i.e. 1 msg transferred), return #bytes
         * transferred, else error code.
         */
        return (ret == 1) ? count : ret;
}
EXPORT_SYMBOL(i2c_transfer_buffer_flags);

/**
 * i2c_get_device_id - get manufacturer, part id and die revision of a device
 * @client: The device to query
 * @id: The queried information
 *
 * Returns negative errno on error, zero on success.
 */
int i2c_get_device_id(const struct i2c_client *client,
                      struct i2c_device_identity *id)
{
        struct i2c_adapter *adap = client->adapter;
        union i2c_smbus_data raw_id;
        int ret;

        if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
                return -EOPNOTSUPP;

        raw_id.block[0] = 3;
        ret = i2c_smbus_xfer(adap, I2C_ADDR_DEVICE_ID, 0,
                             I2C_SMBUS_READ, client->addr << 1,
                             I2C_SMBUS_I2C_BLOCK_DATA, &raw_id);
        if (ret)
                return ret;

        id->manufacturer_id = (raw_id.block[1] << 4) | (raw_id.block[2] >> 4);
        id->part_id = ((raw_id.block[2] & 0xf) << 5) | (raw_id.block[3] >> 3);
        id->die_revision = raw_id.block[3] & 0x7;
        return 0;
}
EXPORT_SYMBOL_GPL(i2c_get_device_id);

/**
 * i2c_client_get_device_id - get the driver match table entry of a device
 * @client: the device to query. The device must be bound to a driver
 *
 * Returns a pointer to the matching entry if found, NULL otherwise.
 */
const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client)
{
        const struct i2c_driver *drv = to_i2c_driver(client->dev.driver);

        return i2c_match_id(drv->id_table, client);
}
EXPORT_SYMBOL_GPL(i2c_client_get_device_id);

/* ----------------------------------------------------
 * the i2c address scanning function
 * Will not work for 10-bit addresses!
 * ----------------------------------------------------
 */

/*
 * Legacy default probe function, mostly relevant for SMBus. The default
 * probe method is a quick write, but it is known to corrupt the 24RF08
 * EEPROMs due to a state machine bug, and could also irreversibly
 * write-protect some EEPROMs, so for address ranges 0x30-0x37 and 0x50-0x5f,
 * we use a short byte read instead. Also, some bus drivers don't implement
 * quick write, so we fallback to a byte read in that case too.
 * On x86, there is another special case for FSC hardware monitoring chips,
 * which want regular byte reads (address 0x73.) Fortunately, these are the
 * only known chips using this I2C address on PC hardware.
 * Returns 1 if probe succeeded, 0 if not.
 */
static int i2c_default_probe(struct i2c_adapter *adap, unsigned short addr)
{
        int err;
        union i2c_smbus_data dummy;

#ifdef CONFIG_X86
        if (addr == 0x73 && (adap->class & I2C_CLASS_HWMON)
         && i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE_DATA))
                err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
                                     I2C_SMBUS_BYTE_DATA, &dummy);
        else
#endif
        if (!((addr & ~0x07) == 0x30 || (addr & ~0x0f) == 0x50)
         && i2c_check_functionality(adap, I2C_FUNC_SMBUS_QUICK))
                err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_WRITE, 0,
                                     I2C_SMBUS_QUICK, NULL);
        else if (i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE))
                err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
                                     I2C_SMBUS_BYTE, &dummy);
        else {
                dev_warn(&adap->dev, "No suitable probing method supported for address 0x%02X\n",
                         addr);
                err = -EOPNOTSUPP;
        }

        return err >= 0;
}

static int i2c_detect_address(struct i2c_client *temp_client,
                              struct i2c_driver *driver)
{
        struct i2c_board_info info;
        struct i2c_adapter *adapter = temp_client->adapter;
        int addr = temp_client->addr;
        int err;

        /* Make sure the address is valid */
        err = i2c_check_7bit_addr_validity_strict(addr);
        if (err) {
                dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
                         addr);
                return err;
        }

        /* Skip if already in use (7 bit, no need to encode flags) */
        if (i2c_check_addr_busy(adapter, addr))
                return 0;

        /* Make sure there is something at this address */
        if (!i2c_default_probe(adapter, addr))
                return 0;

        /* Finally call the custom detection function */
        memset(&info, 0, sizeof(struct i2c_board_info));
        info.addr = addr;
        err = driver->detect(temp_client, &info);
        if (err) {
                /* -ENODEV is returned if the detection fails. We catch it
                   here as this isn't an error. */
                return err == -ENODEV ? 0 : err;
        }

        /* Consistency check */
        if (info.type[0] == '\0') {
                dev_err(&adapter->dev,
                        "%s detection function provided no name for 0x%x\n",
                        driver->driver.name, addr);
        } else {
                struct i2c_client *client;

                /* Detection succeeded, instantiate the device */
                if (adapter->class & I2C_CLASS_DEPRECATED)
                        dev_warn(&adapter->dev,
                                "This adapter will soon drop class based instantiation of devices. "
                                "Please make sure client 0x%02x gets instantiated by other means. "
                                "Check 'Documentation/i2c/instantiating-devices.rst' for details.\n",
                                info.addr);

                dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
                        info.type, info.addr);
                client = i2c_new_client_device(adapter, &info);
                if (!IS_ERR(client))
                        list_add_tail(&client->detected, &driver->clients);
                else
                        dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
                                info.type, info.addr);
        }
        return 0;
}

static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
{
        const unsigned short *address_list;
        struct i2c_client *temp_client;
        int i, err = 0;

        address_list = driver->address_list;
        if (!driver->detect || !address_list)
                return 0;

        /* Warn that the adapter lost class based instantiation */
        if (adapter->class == I2C_CLASS_DEPRECATED) {
                dev_dbg(&adapter->dev,
                        "This adapter dropped support for I2C classes and won't auto-detect %s devices anymore. "
                        "If you need it, check 'Documentation/i2c/instantiating-devices.rst' for alternatives.\n",
                        driver->driver.name);
                return 0;
        }

        /* Stop here if the classes do not match */
        if (!(adapter->class & driver->class))
                return 0;

        /* Set up a temporary client to help detect callback */
        temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
        if (!temp_client)
                return -ENOMEM;
        temp_client->adapter = adapter;

        for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) {
                dev_dbg(&adapter->dev,
                        "found normal entry for adapter %d, addr 0x%02x\n",
                        i2c_adapter_id(adapter), address_list[i]);
                temp_client->addr = address_list[i];
                err = i2c_detect_address(temp_client, driver);
                if (unlikely(err))
                        break;
        }

        kfree(temp_client);
        return err;
}

int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr)
{
        return i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
                              I2C_SMBUS_QUICK, NULL) >= 0;
}
EXPORT_SYMBOL_GPL(i2c_probe_func_quick_read);

struct i2c_client *
i2c_new_scanned_device(struct i2c_adapter *adap,
                       struct i2c_board_info *info,
                       unsigned short const *addr_list,
                       int (*probe)(struct i2c_adapter *adap, unsigned short addr))
{
        int i;

        if (!probe)
                probe = i2c_default_probe;

        for (i = 0; addr_list[i] != I2C_CLIENT_END; i++) {
                /* Check address validity */
                if (i2c_check_7bit_addr_validity_strict(addr_list[i]) < 0) {
                        dev_warn(&adap->dev, "Invalid 7-bit address 0x%02x\n",
                                 addr_list[i]);
                        continue;
                }

                /* Check address availability (7 bit, no need to encode flags) */
                if (i2c_check_addr_busy(adap, addr_list[i])) {
                        dev_dbg(&adap->dev,
                                "Address 0x%02x already in use, not probing\n",
                                addr_list[i]);
                        continue;
                }

                /* Test address responsiveness */
                if (probe(adap, addr_list[i]))
                        break;
        }

        if (addr_list[i] == I2C_CLIENT_END) {
                dev_dbg(&adap->dev, "Probing failed, no device found\n");
                return ERR_PTR(-ENODEV);
        }

        info->addr = addr_list[i];
        return i2c_new_client_device(adap, info);
}
EXPORT_SYMBOL_GPL(i2c_new_scanned_device);

struct i2c_adapter *i2c_get_adapter(int nr)
{
        struct i2c_adapter *adapter;

        mutex_lock(&core_lock);
        adapter = idr_find(&i2c_adapter_idr, nr);
        if (!adapter)
                goto exit;

        if (try_module_get(adapter->owner))
                get_device(&adapter->dev);
        else
                adapter = NULL;

 exit:
        mutex_unlock(&core_lock);
        return adapter;
}
EXPORT_SYMBOL(i2c_get_adapter);

void i2c_put_adapter(struct i2c_adapter *adap)
{
        if (!adap)
                return;

        module_put(adap->owner);
        /* Should be last, otherwise we risk use-after-free with 'adap' */
        put_device(&adap->dev);
}
EXPORT_SYMBOL(i2c_put_adapter);

/**
 * i2c_get_dma_safe_msg_buf() - get a DMA safe buffer for the given i2c_msg
 * @msg: the message to be checked
 * @threshold: the minimum number of bytes for which using DMA makes sense.
 *               Should at least be 1.
 *
 * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO.
 *           Or a valid pointer to be used with DMA. After use, release it by
 *           calling i2c_put_dma_safe_msg_buf().
 *
 * This function must only be called from process context!
 */
u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold)
{
        /* also skip 0-length msgs for bogus thresholds of 0 */
        if (!threshold)
                pr_debug("DMA buffer for addr=0x%02x with length 0 is bogus\n",
                         msg->addr);
        if (msg->len < threshold || msg->len == 0)
                return NULL;

        if (msg->flags & I2C_M_DMA_SAFE)
                return msg->buf;

        pr_debug("using bounce buffer for addr=0x%02x, len=%d\n",
                 msg->addr, msg->len);

        if (msg->flags & I2C_M_RD)
                return kzalloc(msg->len, GFP_KERNEL);
        else
                return kmemdup(msg->buf, msg->len, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(i2c_get_dma_safe_msg_buf);

/**
 * i2c_put_dma_safe_msg_buf - release DMA safe buffer and sync with i2c_msg
 * @buf: the buffer obtained from i2c_get_dma_safe_msg_buf(). May be NULL.
 * @msg: the message which the buffer corresponds to
 * @xferred: bool saying if the message was transferred
 */
void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred)
{
        if (!buf || buf == msg->buf)
                return;

        if (xferred && msg->flags & I2C_M_RD)
                memcpy(msg->buf, buf, msg->len);

        kfree(buf);
}
EXPORT_SYMBOL_GPL(i2c_put_dma_safe_msg_buf);

MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
MODULE_DESCRIPTION("I2C-Bus main module");
MODULE_LICENSE("GPL");




























































    6 





































































































































































































































































































































































































































































































































































































































   42 


   39 













   40 








   41 
   41 
   41 





    3 





    3 












    3 


    3 








































































































































































   40 











































   91 


   91 







   54 

























    3 
















































































































    2 















   13 
























    6 
   17 




   17 











































































































































    6 
    6 
























  109 

  110 
















    2 







































    5 












































































































   54 
   54 































   20 
    1 


















    6 







































   16 
































































































   68 





   55 





















































































































































































































   20 









   20 



















































































































































































   59 
























































































    8 








































































   10 










    1 








































































































































































































































































































































































    5 






























































































































   19 














































    4 






































   89 
   88 




    1 
   90 















    1 



















    1 


























   89 























   90 











   19 
   19 

   19 




















   19 

   19 
   19 























































   19 




   19 




   19 







    1 




























   19 








    1 

    1 
    1 



























    1 











































































































































































































































































































































































































































































































































































































































    8 










































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MM_H
#define _LINUX_MM_H

#include <linux/errno.h>
#include <linux/mmdebug.h>
#include <linux/gfp.h>
#include <linux/bug.h>
#include <linux/list.h>
#include <linux/mmzone.h>
#include <linux/rbtree.h>
#include <linux/atomic.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <linux/range.h>
#include <linux/pfn.h>
#include <linux/percpu-refcount.h>
#include <linux/bit_spinlock.h>
#include <linux/shrinker.h>
#include <linux/resource.h>
#include <linux/page_ext.h>
#include <linux/err.h>
#include <linux/page-flags.h>
#include <linux/page_ref.h>
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/sched.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
#include <linux/memremap.h>
#include <linux/slab.h>

struct mempolicy;
struct anon_vma;
struct anon_vma_chain;
struct user_struct;
struct pt_regs;
struct folio_batch;

extern int sysctl_page_lock_unfairness;

void mm_core_init(void);
void init_mm_internals(void);

#ifndef CONFIG_NUMA                /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;

static inline void set_max_mapnr(unsigned long limit)
{
        max_mapnr = limit;
}
#else
static inline void set_max_mapnr(unsigned long limit) { }
#endif

extern atomic_long_t _totalram_pages;
static inline unsigned long totalram_pages(void)
{
        return (unsigned long)atomic_long_read(&_totalram_pages);
}

static inline void totalram_pages_inc(void)
{
        atomic_long_inc(&_totalram_pages);
}

static inline void totalram_pages_dec(void)
{
        atomic_long_dec(&_totalram_pages);
}

static inline void totalram_pages_add(long count)
{
        atomic_long_add(count, &_totalram_pages);
}

extern void * high_memory;
extern int page_cluster;
extern const int page_cluster_max;

#ifdef CONFIG_SYSCTL
extern int sysctl_legacy_va_layout;
#else
#define sysctl_legacy_va_layout 0
#endif

#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
extern const int mmap_rnd_bits_min;
extern int mmap_rnd_bits_max __ro_after_init;
extern int mmap_rnd_bits __read_mostly;
#endif
#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
extern const int mmap_rnd_compat_bits_min;
extern const int mmap_rnd_compat_bits_max;
extern int mmap_rnd_compat_bits __read_mostly;
#endif

#include <asm/page.h>
#include <asm/processor.h>

#ifndef __pa_symbol
#define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif

#ifndef page_to_virt
#define page_to_virt(x)        __va(PFN_PHYS(page_to_pfn(x)))
#endif

#ifndef lm_alias
#define lm_alias(x)        __va(__pa_symbol(x))
#endif

/*
 * To prevent common memory management code establishing
 * a zero page mapping on a read fault.
 * This macro should be defined within <asm/pgtable.h>.
 * s390 does this to prevent multiplexing of hardware bits
 * related to the physical page in case of virtualization.
 */
#ifndef mm_forbids_zeropage
#define mm_forbids_zeropage(X)        (0)
#endif

/*
 * On some architectures it is expensive to call memset() for small sizes.
 * If an architecture decides to implement their own version of
 * mm_zero_struct_page they should wrap the defines below in a #ifndef and
 * define their own version of this macro in <asm/pgtable.h>
 */
#if BITS_PER_LONG == 64
/* This function must be updated when the size of struct page grows above 96
 * or reduces below 56. The idea that compiler optimizes out switch()
 * statement, and only leaves move/store instructions. Also the compiler can
 * combine write statements if they are both assignments and can be reordered,
 * this can result in several of the writes here being dropped.
 */
#define        mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
static inline void __mm_zero_struct_page(struct page *page)
{
        unsigned long *_pp = (void *)page;

         /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */
        BUILD_BUG_ON(sizeof(struct page) & 7);
        BUILD_BUG_ON(sizeof(struct page) < 56);
        BUILD_BUG_ON(sizeof(struct page) > 96);

        switch (sizeof(struct page)) {
        case 96:
                _pp[11] = 0;
                fallthrough;
        case 88:
                _pp[10] = 0;
                fallthrough;
        case 80:
                _pp[9] = 0;
                fallthrough;
        case 72:
                _pp[8] = 0;
                fallthrough;
        case 64:
                _pp[7] = 0;
                fallthrough;
        case 56:
                _pp[6] = 0;
                _pp[5] = 0;
                _pp[4] = 0;
                _pp[3] = 0;
                _pp[2] = 0;
                _pp[1] = 0;
                _pp[0] = 0;
        }
}
#else
#define mm_zero_struct_page(pp)  ((void)memset((pp), 0, sizeof(struct page)))
#endif

/*
 * Default maximum number of active map areas, this limits the number of vmas
 * per mm struct. Users can overwrite this number by sysctl but there is a
 * problem.
 *
 * When a program's coredump is generated as ELF format, a section is created
 * per a vma. In ELF, the number of sections is represented in unsigned short.
 * This means the number of sections should be smaller than 65535 at coredump.
 * Because the kernel adds some informative sections to a image of program at
 * generating coredump, we need some margin. The number of extra sections is
 * 1-3 now and depends on arch. We use "5" as safe margin, here.
 *
 * ELF extended numbering allows more than 65535 sections, so 16-bit bound is
 * not a hard limit any more. Although some userspace tools can be surprised by
 * that.
 */
#define MAPCOUNT_ELF_CORE_MARGIN        (5)
#define DEFAULT_MAX_MAP_COUNT        (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)

extern int sysctl_max_map_count;

extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;

extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;

int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);
int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);

#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
#define folio_page_idx(folio, p)        (page_to_pfn(p) - folio_pfn(folio))
#else
#define nth_page(page,n) ((page) + (n))
#define folio_page_idx(folio, p)        ((p) - &(folio)->page)
#endif

/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)

/* to align the pointer to the (prev) page boundary */
#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)

/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
#define PAGE_ALIGNED(addr)        IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)

static inline struct folio *lru_to_folio(struct list_head *head)
{
        return list_entry((head)->prev, struct folio, lru);
}

void setup_initial_init_mm(void *start_code, void *end_code,
                           void *end_data, void *brk);

/*
 * Linux kernel virtual memory manager primitives.
 * The idea being to have a "virtual" mm in the same way
 * we have a virtual fs - giving a cleaner interface to the
 * mm details, and allowing different kinds of memory mappings
 * (from shared memory to executable loading to arbitrary
 * mmap() functions).
 */

struct vm_area_struct *vm_area_alloc(struct mm_struct *);
struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
void vm_area_free(struct vm_area_struct *);
/* Use only if VMA has no other users */
void __vm_area_free(struct vm_area_struct *vma);

#ifndef CONFIG_MMU
extern struct rb_root nommu_region_tree;
extern struct rw_semaphore nommu_region_sem;

extern unsigned int kobjsize(const void *objp);
#endif

/*
 * vm_flags in vm_area_struct, see mm_types.h.
 * When changing, update also include/trace/events/mmflags.h
 */
#define VM_NONE                0x00000000

#define VM_READ                0x00000001        /* currently active flags */
#define VM_WRITE        0x00000002
#define VM_EXEC                0x00000004
#define VM_SHARED        0x00000008

/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
#define VM_MAYREAD        0x00000010        /* limits for mprotect() etc */
#define VM_MAYWRITE        0x00000020
#define VM_MAYEXEC        0x00000040
#define VM_MAYSHARE        0x00000080

#define VM_GROWSDOWN        0x00000100        /* general info on the segment */
#ifdef CONFIG_MMU
#define VM_UFFD_MISSING        0x00000200        /* missing pages tracking */
#else /* CONFIG_MMU */
#define VM_MAYOVERLAY        0x00000200        /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
#define VM_UFFD_MISSING        0
#endif /* CONFIG_MMU */
#define VM_PFNMAP        0x00000400        /* Page-ranges managed without "struct page", just pure PFN */
#define VM_UFFD_WP        0x00001000        /* wrprotect pages tracking */

#define VM_LOCKED        0x00002000
#define VM_IO           0x00004000        /* Memory mapped I/O or similar */

                                        /* Used by sys_madvise() */
#define VM_SEQ_READ        0x00008000        /* App will access data sequentially */
#define VM_RAND_READ        0x00010000        /* App will not benefit from clustered reads */

#define VM_DONTCOPY        0x00020000      /* Do not copy this vma on fork */
#define VM_DONTEXPAND        0x00040000        /* Cannot expand with mremap() */
#define VM_LOCKONFAULT        0x00080000        /* Lock the pages covered when they are faulted in */
#define VM_ACCOUNT        0x00100000        /* Is a VM accounted object */
#define VM_NORESERVE        0x00200000        /* should the VM suppress accounting */
#define VM_HUGETLB        0x00400000        /* Huge TLB Page VM */
#define VM_SYNC                0x00800000        /* Synchronous page faults */
#define VM_ARCH_1        0x01000000        /* Architecture-specific flag */
#define VM_WIPEONFORK        0x02000000        /* Wipe VMA contents in child. */
#define VM_DONTDUMP        0x04000000        /* Do not include in the core dump */

#ifdef CONFIG_MEM_SOFT_DIRTY
# define VM_SOFTDIRTY        0x08000000        /* Not soft dirty clean area */
#else
# define VM_SOFTDIRTY        0
#endif

#define VM_MIXEDMAP        0x10000000        /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE        0x20000000        /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE        0x40000000        /* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE        0x80000000        /* KSM may merge identical pages */

#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
#define VM_HIGH_ARCH_BIT_0        32        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_1        33        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_2        34        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_3        35        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_4        36        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_5        37        /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0        BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1        BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2        BIT(VM_HIGH_ARCH_BIT_2)
#define VM_HIGH_ARCH_3        BIT(VM_HIGH_ARCH_BIT_3)
#define VM_HIGH_ARCH_4        BIT(VM_HIGH_ARCH_BIT_4)
#define VM_HIGH_ARCH_5        BIT(VM_HIGH_ARCH_BIT_5)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */

#ifdef CONFIG_ARCH_HAS_PKEYS
# define VM_PKEY_SHIFT        VM_HIGH_ARCH_BIT_0
# define VM_PKEY_BIT0        VM_HIGH_ARCH_0        /* A protection key is a 4-bit value */
# define VM_PKEY_BIT1        VM_HIGH_ARCH_1        /* on x86 and 5-bit value on ppc64   */
# define VM_PKEY_BIT2        VM_HIGH_ARCH_2
# define VM_PKEY_BIT3        VM_HIGH_ARCH_3
#ifdef CONFIG_PPC
# define VM_PKEY_BIT4  VM_HIGH_ARCH_4
#else
# define VM_PKEY_BIT4  0
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */

#ifdef CONFIG_X86_USER_SHADOW_STACK
/*
 * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
 * support core mm.
 *
 * These VMAs will get a single end guard page. This helps userspace protect
 * itself from attacks. A single page is enough for current shadow stack archs
 * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c
 * for more details on the guard size.
 */
# define VM_SHADOW_STACK        VM_HIGH_ARCH_5
#else
# define VM_SHADOW_STACK        VM_NONE
#endif

#if defined(CONFIG_X86)
# define VM_PAT                VM_ARCH_1        /* PAT reserves whole VMA at once (x86) */
#elif defined(CONFIG_PPC)
# define VM_SAO                VM_ARCH_1        /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP        VM_ARCH_1
#elif defined(CONFIG_SPARC64)
# define VM_SPARC_ADI        VM_ARCH_1        /* Uses ADI tag for access control */
# define VM_ARCH_CLEAR        VM_SPARC_ADI
#elif defined(CONFIG_ARM64)
# define VM_ARM64_BTI        VM_ARCH_1        /* BTI guarded page, a.k.a. GP bit */
# define VM_ARCH_CLEAR        VM_ARM64_BTI
#elif !defined(CONFIG_MMU)
# define VM_MAPPED_COPY        VM_ARCH_1        /* T if mapped copy of data (nommu mmap) */
#endif

#if defined(CONFIG_ARM64_MTE)
# define VM_MTE                VM_HIGH_ARCH_0        /* Use Tagged memory for access control */
# define VM_MTE_ALLOWED        VM_HIGH_ARCH_1        /* Tagged memory permitted */
#else
# define VM_MTE                VM_NONE
# define VM_MTE_ALLOWED        VM_NONE
#endif

#ifndef VM_GROWSUP
# define VM_GROWSUP        VM_NONE
#endif

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
# define VM_UFFD_MINOR_BIT        38
# define VM_UFFD_MINOR                BIT(VM_UFFD_MINOR_BIT)        /* UFFD minor faults */
#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
# define VM_UFFD_MINOR                VM_NONE
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */

/*
 * This flag is used to connect VFIO to arch specific KVM code. It
 * indicates that the memory under this VMA is safe for use with any
 * non-cachable memory type inside KVM. Some VFIO devices, on some
 * platforms, are thought to be unsafe and can cause machine crashes
 * if KVM does not lock down the memory type.
 */
#ifdef CONFIG_64BIT
#define VM_ALLOW_ANY_UNCACHED_BIT        39
#define VM_ALLOW_ANY_UNCACHED                BIT(VM_ALLOW_ANY_UNCACHED_BIT)
#else
#define VM_ALLOW_ANY_UNCACHED                VM_NONE
#endif

/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)

#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)

/* Common data flag combinations */
#define VM_DATA_FLAGS_TSK_EXEC        (VM_READ | VM_WRITE | TASK_EXEC | \
                                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#define VM_DATA_FLAGS_NON_EXEC        (VM_READ | VM_WRITE | VM_MAYREAD | \
                                 VM_MAYWRITE | VM_MAYEXEC)
#define VM_DATA_FLAGS_EXEC        (VM_READ | VM_WRITE | VM_EXEC | \
                                 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)

#ifndef VM_DATA_DEFAULT_FLAGS                /* arch can override this */
#define VM_DATA_DEFAULT_FLAGS  VM_DATA_FLAGS_EXEC
#endif

#ifndef VM_STACK_DEFAULT_FLAGS                /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif

#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)

#ifdef CONFIG_STACK_GROWSUP
#define VM_STACK        VM_GROWSUP
#define VM_STACK_EARLY        VM_GROWSDOWN
#else
#define VM_STACK        VM_GROWSDOWN
#define VM_STACK_EARLY        0
#endif

#define VM_STACK_FLAGS        (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)

/* VMA basic access permission flags */
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)


/*
 * Special vmas that are non-mergable, non-mlock()able.
 */
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)

/* This mask prevents VMA from being scanned with khugepaged */
#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)

/* This mask defines which mm->def_flags a process can inherit its parent */
#define VM_INIT_DEF_MASK        VM_NOHUGEPAGE

/* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK        (VM_LOCKED | VM_LOCKONFAULT)

/* Arch-specific flags to clear when updating VM flags on protection change */
#ifndef VM_ARCH_CLEAR
# define VM_ARCH_CLEAR        VM_NONE
#endif
#define VM_FLAGS_CLEAR        (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)

/*
 * mapping from the currently active vm_flags protection bits (the
 * low four bits) to a page protection mask..
 */

/*
 * The default fault flags that should be used by most of the
 * arch-specific page fault handlers.
 */
#define FAULT_FLAG_DEFAULT  (FAULT_FLAG_ALLOW_RETRY | \
                             FAULT_FLAG_KILLABLE | \
                             FAULT_FLAG_INTERRUPTIBLE)

/**
 * fault_flag_allow_retry_first - check ALLOW_RETRY the first time
 * @flags: Fault flags.
 *
 * This is mostly used for places where we want to try to avoid taking
 * the mmap_lock for too long a time when waiting for another condition
 * to change, in which case we can try to be polite to release the
 * mmap_lock in the first round to avoid potential starvation of other
 * processes that would also want the mmap_lock.
 *
 * Return: true if the page fault allows retry and this is the first
 * attempt of the fault handling; false otherwise.
 */
static inline bool fault_flag_allow_retry_first(enum fault_flag flags)
{
        return (flags & FAULT_FLAG_ALLOW_RETRY) &&
            (!(flags & FAULT_FLAG_TRIED));
}

#define FAULT_FLAG_TRACE \
        { FAULT_FLAG_WRITE,                "WRITE" }, \
        { FAULT_FLAG_MKWRITE,                "MKWRITE" }, \
        { FAULT_FLAG_ALLOW_RETRY,        "ALLOW_RETRY" }, \
        { FAULT_FLAG_RETRY_NOWAIT,        "RETRY_NOWAIT" }, \
        { FAULT_FLAG_KILLABLE,                "KILLABLE" }, \
        { FAULT_FLAG_TRIED,                "TRIED" }, \
        { FAULT_FLAG_USER,                "USER" }, \
        { FAULT_FLAG_REMOTE,                "REMOTE" }, \
        { FAULT_FLAG_INSTRUCTION,        "INSTRUCTION" }, \
        { FAULT_FLAG_INTERRUPTIBLE,        "INTERRUPTIBLE" }, \
        { FAULT_FLAG_VMA_LOCK,                "VMA_LOCK" }

/*
 * vm_fault is filled by the pagefault handler and passed to the vma's
 * ->fault function. The vma's ->fault is responsible for returning a bitmask
 * of VM_FAULT_xxx flags that give details about how the fault was handled.
 *
 * MM layer fills up gfp_mask for page allocations but fault handler might
 * alter it if its implementation requires a different allocation context.
 *
 * pgoff should be used in favour of virtual_address, if possible.
 */
struct vm_fault {
        const struct {
                struct vm_area_struct *vma;        /* Target VMA */
                gfp_t gfp_mask;                        /* gfp mask to be used for allocations */
                pgoff_t pgoff;                        /* Logical page offset based on vma */
                unsigned long address;                /* Faulting virtual address - masked */
                unsigned long real_address;        /* Faulting virtual address - unmasked */
        };
        enum fault_flag flags;                /* FAULT_FLAG_xxx flags
                                         * XXX: should really be 'const' */
        pmd_t *pmd;                        /* Pointer to pmd entry matching
                                         * the 'address' */
        pud_t *pud;                        /* Pointer to pud entry matching
                                         * the 'address'
                                         */
        union {
                pte_t orig_pte;                /* Value of PTE at the time of fault */
                pmd_t orig_pmd;                /* Value of PMD at the time of fault,
                                         * used by PMD fault only.
                                         */
        };

        struct page *cow_page;                /* Page handler may use for COW fault */
        struct page *page;                /* ->fault handlers should return a
                                         * page here, unless VM_FAULT_NOPAGE
                                         * is set (which is also implied by
                                         * VM_FAULT_ERROR).
                                         */
        /* These three entries are valid only while holding ptl lock */
        pte_t *pte;                        /* Pointer to pte entry matching
                                         * the 'address'. NULL if the page
                                         * table hasn't been allocated.
                                         */
        spinlock_t *ptl;                /* Page table lock.
                                         * Protects pte page table if 'pte'
                                         * is not NULL, otherwise pmd.
                                         */
        pgtable_t prealloc_pte;                /* Pre-allocated pte page table.
                                         * vm_ops->map_pages() sets up a page
                                         * table from atomic context.
                                         * do_fault_around() pre-allocates
                                         * page table to avoid allocation from
                                         * atomic context.
                                         */
};

/*
 * These are the virtual MM functions - opening of an area, closing and
 * unmapping it (needed to keep files on disk up-to-date etc), pointer
 * to the functions called when a no-page or a wp-page exception occurs.
 */
struct vm_operations_struct {
        void (*open)(struct vm_area_struct * area);
        /**
         * @close: Called when the VMA is being removed from the MM.
         * Context: User context.  May sleep.  Caller holds mmap_lock.
         */
        void (*close)(struct vm_area_struct * area);
        /* Called any time before splitting to check if it's allowed */
        int (*may_split)(struct vm_area_struct *area, unsigned long addr);
        int (*mremap)(struct vm_area_struct *area);
        /*
         * Called by mprotect() to make driver-specific permission
         * checks before mprotect() is finalised.   The VMA must not
         * be modified.  Returns 0 if mprotect() can proceed.
         */
        int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
                        unsigned long end, unsigned long newflags);
        vm_fault_t (*fault)(struct vm_fault *vmf);
        vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
        vm_fault_t (*map_pages)(struct vm_fault *vmf,
                        pgoff_t start_pgoff, pgoff_t end_pgoff);
        unsigned long (*pagesize)(struct vm_area_struct * area);

        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
        vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);

        /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
        vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);

        /* called by access_process_vm when get_user_pages() fails, typically
         * for use by special VMAs. See also generic_access_phys() for a generic
         * implementation useful for any iomem mapping.
         */
        int (*access)(struct vm_area_struct *vma, unsigned long addr,
                      void *buf, int len, int write);

        /* Called by the /proc/PID/maps code to ask the vma whether it
         * has a special name.  Returning non-NULL will also cause this
         * vma to be dumped unconditionally. */
        const char *(*name)(struct vm_area_struct *vma);

#ifdef CONFIG_NUMA
        /*
         * set_policy() op must add a reference to any non-NULL @new mempolicy
         * to hold the policy upon return.  Caller should pass NULL @new to
         * remove a policy and fall back to surrounding context--i.e. do not
         * install a MPOL_DEFAULT policy, nor the task or system default
         * mempolicy.
         */
        int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);

        /*
         * get_policy() op must add reference [mpol_get()] to any policy at
         * (vma,addr) marked as MPOL_SHARED.  The shared policy infrastructure
         * in mm/mempolicy.c will do this automatically.
         * get_policy() must NOT add a ref if the policy at (vma,addr) is not
         * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
         * If no [shared/vma] mempolicy exists at the addr, get_policy() op
         * must return NULL--i.e., do not "fallback" to task or system default
         * policy.
         */
        struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
                                        unsigned long addr, pgoff_t *ilx);
#endif
        /*
         * Called by vm_normal_page() for special PTEs to find the
         * page for @addr.  This is useful if the default behavior
         * (using pte_page()) would not find the correct page.
         */
        struct page *(*find_special_page)(struct vm_area_struct *vma,
                                          unsigned long addr);
};

#ifdef CONFIG_NUMA_BALANCING
static inline void vma_numab_state_init(struct vm_area_struct *vma)
{
        vma->numab_state = NULL;
}
static inline void vma_numab_state_free(struct vm_area_struct *vma)
{
        kfree(vma->numab_state);
}
#else
static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_PER_VMA_LOCK
/*
 * Try to read-lock a vma. The function is allowed to occasionally yield false
 * locked result to avoid performance overhead, in which case we fall back to
 * using mmap_lock. The function should never yield false unlocked result.
 */
static inline bool vma_start_read(struct vm_area_struct *vma)
{
        /*
         * Check before locking. A race might cause false locked result.
         * We can use READ_ONCE() for the mm_lock_seq here, and don't need
         * ACQUIRE semantics, because this is just a lockless check whose result
         * we don't rely on for anything - the mm_lock_seq read against which we
         * need ordering is below.
         */
        if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
                return false;

        if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
                return false;

        /*
         * Overflow might produce false locked result.
         * False unlocked result is impossible because we modify and check
         * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
         * modification invalidates all existing locks.
         *
         * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
         * racing with vma_end_write_all(), we only start reading from the VMA
         * after it has been unlocked.
         * This pairs with RELEASE semantics in vma_end_write_all().
         */
        if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
                up_read(&vma->vm_lock->lock);
                return false;
        }
        return true;
}

static inline void vma_end_read(struct vm_area_struct *vma)
{
        rcu_read_lock(); /* keeps vma alive till the end of up_read */
        up_read(&vma->vm_lock->lock);
        rcu_read_unlock();
}

/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
{
        mmap_assert_write_locked(vma->vm_mm);

        /*
         * current task is holding mmap_write_lock, both vma->vm_lock_seq and
         * mm->mm_lock_seq can't be concurrently modified.
         */
        *mm_lock_seq = vma->vm_mm->mm_lock_seq;
        return (vma->vm_lock_seq == *mm_lock_seq);
}

/*
 * Begin writing to a VMA.
 * Exclude concurrent readers under the per-VMA lock until the currently
 * write-locked mmap_lock is dropped or downgraded.
 */
static inline void vma_start_write(struct vm_area_struct *vma)
{
        int mm_lock_seq;

        if (__is_vma_write_locked(vma, &mm_lock_seq))
                return;

        down_write(&vma->vm_lock->lock);
        /*
         * We should use WRITE_ONCE() here because we can have concurrent reads
         * from the early lockless pessimistic check in vma_start_read().
         * We don't really care about the correctness of that early check, but
         * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
         */
        WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
        up_write(&vma->vm_lock->lock);
}

static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{
        int mm_lock_seq;

        VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
}

static inline void vma_assert_locked(struct vm_area_struct *vma)
{
        if (!rwsem_is_locked(&vma->vm_lock->lock))
                vma_assert_write_locked(vma);
}

static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
{
        /* When detaching vma should be write-locked */
        if (detached)
                vma_assert_write_locked(vma);
        vma->detached = detached;
}

static inline void release_fault_lock(struct vm_fault *vmf)
{
        if (vmf->flags & FAULT_FLAG_VMA_LOCK)
                vma_end_read(vmf->vma);
        else
                mmap_read_unlock(vmf->vma->vm_mm);
}

static inline void assert_fault_locked(struct vm_fault *vmf)
{
        if (vmf->flags & FAULT_FLAG_VMA_LOCK)
                vma_assert_locked(vmf->vma);
        else
                mmap_assert_locked(vmf->vma->vm_mm);
}

struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
                                          unsigned long address);

#else /* CONFIG_PER_VMA_LOCK */

static inline bool vma_start_read(struct vm_area_struct *vma)
                { return false; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
static inline void vma_start_write(struct vm_area_struct *vma) {}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
                { mmap_assert_write_locked(vma->vm_mm); }
static inline void vma_mark_detached(struct vm_area_struct *vma,
                                     bool detached) {}

static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
                unsigned long address)
{
        return NULL;
}

static inline void vma_assert_locked(struct vm_area_struct *vma)
{
        mmap_assert_locked(vma->vm_mm);
}

static inline void release_fault_lock(struct vm_fault *vmf)
{
        mmap_read_unlock(vmf->vma->vm_mm);
}

static inline void assert_fault_locked(struct vm_fault *vmf)
{
        mmap_assert_locked(vmf->vma->vm_mm);
}

#endif /* CONFIG_PER_VMA_LOCK */

extern const struct vm_operations_struct vma_dummy_vm_ops;

/*
 * WARNING: vma_init does not initialize vma->vm_lock.
 * Use vm_area_alloc()/vm_area_free() if vma needs locking.
 */
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
{
        memset(vma, 0, sizeof(*vma));
        vma->vm_mm = mm;
        vma->vm_ops = &vma_dummy_vm_ops;
        INIT_LIST_HEAD(&vma->anon_vma_chain);
        vma_mark_detached(vma, false);
        vma_numab_state_init(vma);
}

/* Use when VMA is not part of the VMA tree and needs no locking */
static inline void vm_flags_init(struct vm_area_struct *vma,
                                 vm_flags_t flags)
{
        ACCESS_PRIVATE(vma, __vm_flags) = flags;
}

/*
 * Use when VMA is part of the VMA tree and modifications need coordination
 * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
 * it should be locked explicitly beforehand.
 */
static inline void vm_flags_reset(struct vm_area_struct *vma,
                                  vm_flags_t flags)
{
        vma_assert_write_locked(vma);
        vm_flags_init(vma, flags);
}

static inline void vm_flags_reset_once(struct vm_area_struct *vma,
                                       vm_flags_t flags)
{
        vma_assert_write_locked(vma);
        WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
}

static inline void vm_flags_set(struct vm_area_struct *vma,
                                vm_flags_t flags)
{
        vma_start_write(vma);
        ACCESS_PRIVATE(vma, __vm_flags) |= flags;
}

static inline void vm_flags_clear(struct vm_area_struct *vma,
                                  vm_flags_t flags)
{
        vma_start_write(vma);
        ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
}

/*
 * Use only if VMA is not part of the VMA tree or has no other users and
 * therefore needs no locking.
 */
static inline void __vm_flags_mod(struct vm_area_struct *vma,
                                  vm_flags_t set, vm_flags_t clear)
{
        vm_flags_init(vma, (vma->vm_flags | set) & ~clear);
}

/*
 * Use only when the order of set/clear operations is unimportant, otherwise
 * use vm_flags_{set|clear} explicitly.
 */
static inline void vm_flags_mod(struct vm_area_struct *vma,
                                vm_flags_t set, vm_flags_t clear)
{
        vma_start_write(vma);
        __vm_flags_mod(vma, set, clear);
}

static inline void vma_set_anonymous(struct vm_area_struct *vma)
{
        vma->vm_ops = NULL;
}

static inline bool vma_is_anonymous(struct vm_area_struct *vma)
{
        return !vma->vm_ops;
}

/*
 * Indicate if the VMA is a heap for the given task; for
 * /proc/PID/maps that is the heap of the main task.
 */
static inline bool vma_is_initial_heap(const struct vm_area_struct *vma)
{
        return vma->vm_start < vma->vm_mm->brk &&
                vma->vm_end > vma->vm_mm->start_brk;
}

/*
 * Indicate if the VMA is a stack for the given task; for
 * /proc/PID/maps that is the stack of the main task.
 */
static inline bool vma_is_initial_stack(const struct vm_area_struct *vma)
{
        /*
         * We make no effort to guess what a given thread considers to be
         * its "stack".  It's not even well-defined for programs written
         * languages like Go.
         */
        return vma->vm_start <= vma->vm_mm->start_stack &&
                vma->vm_end >= vma->vm_mm->start_stack;
}

static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
{
        int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);

        if (!maybe_stack)
                return false;

        if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
                                                VM_STACK_INCOMPLETE_SETUP)
                return true;

        return false;
}

static inline bool vma_is_foreign(struct vm_area_struct *vma)
{
        if (!current->mm)
                return true;

        if (current->mm != vma->vm_mm)
                return true;

        return false;
}

static inline bool vma_is_accessible(struct vm_area_struct *vma)
{
        return vma->vm_flags & VM_ACCESS_FLAGS;
}

static inline bool is_shared_maywrite(vm_flags_t vm_flags)
{
        return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
                (VM_SHARED | VM_MAYWRITE);
}

static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
{
        return is_shared_maywrite(vma->vm_flags);
}

static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
        return mas_find(&vmi->mas, max - 1);
}

static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
{
        /*
         * Uses mas_find() to get the first VMA when the iterator starts.
         * Calling mas_next() could skip the first entry.
         */
        return mas_find(&vmi->mas, ULONG_MAX);
}

static inline
struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
{
        return mas_next_range(&vmi->mas, ULONG_MAX);
}


static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
{
        return mas_prev(&vmi->mas, 0);
}

static inline
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
{
        return mas_prev_range(&vmi->mas, 0);
}

static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
        return vmi->mas.index;
}

static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
{
        return vmi->mas.last + 1;
}
static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
                                      unsigned long count)
{
        return mas_expected_entries(&vmi->mas, count);
}

static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
                        unsigned long start, unsigned long end, gfp_t gfp)
{
        __mas_set_range(&vmi->mas, start, end - 1);
        mas_store_gfp(&vmi->mas, NULL, gfp);
        if (unlikely(mas_is_err(&vmi->mas)))
                return -ENOMEM;

        return 0;
}

/* Free any unused preallocations */
static inline void vma_iter_free(struct vma_iterator *vmi)
{
        mas_destroy(&vmi->mas);
}

static inline int vma_iter_bulk_store(struct vma_iterator *vmi,
                                      struct vm_area_struct *vma)
{
        vmi->mas.index = vma->vm_start;
        vmi->mas.last = vma->vm_end - 1;
        mas_store(&vmi->mas, vma);
        if (unlikely(mas_is_err(&vmi->mas)))
                return -ENOMEM;

        return 0;
}

static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
        mas_pause(&vmi->mas);
}

static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
{
        mas_set(&vmi->mas, addr);
}

#define for_each_vma(__vmi, __vma)                                        \
        while (((__vma) = vma_next(&(__vmi))) != NULL)

/* The MM code likes to work with exclusive end addresses */
#define for_each_vma_range(__vmi, __vma, __end)                                \
        while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)

#ifdef CONFIG_SHMEM
/*
 * The vma_is_shmem is not inline because it is used only by slow
 * paths in userfault.
 */
bool vma_is_shmem(struct vm_area_struct *vma);
bool vma_is_anon_shmem(struct vm_area_struct *vma);
#else
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
#endif

int vma_is_stack_for_current(struct vm_area_struct *vma);

/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }

struct mmu_gather;
struct inode;

/*
 * compound_order() can be called without holding a reference, which means
 * that niceties like page_folio() don't work.  These callers should be
 * prepared to handle wild return values.  For example, PG_head may be
 * set before the order is initialised, or this may be a tail page.
 * See compaction.c for some good examples.
 */
static inline unsigned int compound_order(struct page *page)
{
        struct folio *folio = (struct folio *)page;

        if (!test_bit(PG_head, &folio->flags))
                return 0;
        return folio->_flags_1 & 0xff;
}

/**
 * folio_order - The allocation order of a folio.
 * @folio: The folio.
 *
 * A folio is composed of 2^order pages.  See get_order() for the definition
 * of order.
 *
 * Return: The order of the folio.
 */
static inline unsigned int folio_order(struct folio *folio)
{
        if (!folio_test_large(folio))
                return 0;
        return folio->_flags_1 & 0xff;
}

#include <linux/huge_mm.h>

/*
 * Methods to modify the page usage count.
 *
 * What counts for a page usage:
 * - cache mapping   (page->mapping)
 * - private data    (page->private)
 * - page mapped in a task's page tables, each mapping
 *   is counted separately
 *
 * Also, many kernel routines increase the page count before a critical
 * routine so they can be sure the page doesn't go away from under them.
 */

/*
 * Drop a ref, return true if the refcount fell to zero (the page has no users)
 */
static inline int put_page_testzero(struct page *page)
{
        VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
        return page_ref_dec_and_test(page);
}

static inline int folio_put_testzero(struct folio *folio)
{
        return put_page_testzero(&folio->page);
}

/*
 * Try to grab a ref unless the page has a refcount of zero, return false if
 * that is the case.
 * This can be called when MMU is off so it must not access
 * any of the virtual mappings.
 */
static inline bool get_page_unless_zero(struct page *page)
{
        return page_ref_add_unless(page, 1, 0);
}

static inline struct folio *folio_get_nontail_page(struct page *page)
{
        if (unlikely(!get_page_unless_zero(page)))
                return NULL;
        return (struct folio *)page;
}

extern int page_is_ram(unsigned long pfn);

enum {
        REGION_INTERSECTS,
        REGION_DISJOINT,
        REGION_MIXED,
};

int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
                      unsigned long desc);

/* Support for virtually mapped pages */
struct page *vmalloc_to_page(const void *addr);
unsigned long vmalloc_to_pfn(const void *addr);

/*
 * Determine if an address is within the vmalloc range
 *
 * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
 * is no special casing required.
 */
#ifdef CONFIG_MMU
extern bool is_vmalloc_addr(const void *x);
extern int is_vmalloc_or_module_addr(const void *x);
#else
static inline bool is_vmalloc_addr(const void *x)
{
        return false;
}
static inline int is_vmalloc_or_module_addr(const void *x)
{
        return 0;
}
#endif

/*
 * How many times the entire folio is mapped as a single unit (eg by a
 * PMD or PUD entry).  This is probably not what you want, except for
 * debugging purposes - it does not include PTE-mapped sub-pages; look
 * at folio_mapcount() or page_mapcount() instead.
 */
static inline int folio_entire_mapcount(struct folio *folio)
{
        VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
        return atomic_read(&folio->_entire_mapcount) + 1;
}

/*
 * The atomic page->_mapcount, starts from -1: so that transitions
 * both from it and to it can be tracked, using atomic_inc_and_test
 * and atomic_add_negative(-1).
 */
static inline void page_mapcount_reset(struct page *page)
{
        atomic_set(&(page)->_mapcount, -1);
}

/**
 * page_mapcount() - Number of times this precise page is mapped.
 * @page: The page.
 *
 * The number of times this page is mapped.  If this page is part of
 * a large folio, it includes the number of times this page is mapped
 * as part of that folio.
 *
 * Will report 0 for pages which cannot be mapped into userspace, eg
 * slab, page tables and similar.
 */
static inline int page_mapcount(struct page *page)
{
        int mapcount = atomic_read(&page->_mapcount) + 1;

        /* Handle page_has_type() pages */
        if (mapcount < 0)
                mapcount = 0;
        if (unlikely(PageCompound(page)))
                mapcount += folio_entire_mapcount(page_folio(page));

        return mapcount;
}

int folio_total_mapcount(struct folio *folio);

/**
 * folio_mapcount() - Calculate the number of mappings of this folio.
 * @folio: The folio.
 *
 * A large folio tracks both how many times the entire folio is mapped,
 * and how many times each individual page in the folio is mapped.
 * This function calculates the total number of times the folio is
 * mapped.
 *
 * Return: The number of times this folio is mapped.
 */
static inline int folio_mapcount(struct folio *folio)
{
        if (likely(!folio_test_large(folio)))
                return atomic_read(&folio->_mapcount) + 1;
        return folio_total_mapcount(folio);
}

static inline bool folio_large_is_mapped(struct folio *folio)
{
        /*
         * Reading _entire_mapcount below could be omitted if hugetlb
         * participated in incrementing nr_pages_mapped when compound mapped.
         */
        return atomic_read(&folio->_nr_pages_mapped) > 0 ||
                atomic_read(&folio->_entire_mapcount) >= 0;
}

/**
 * folio_mapped - Is this folio mapped into userspace?
 * @folio: The folio.
 *
 * Return: True if any page in this folio is referenced by user page tables.
 */
static inline bool folio_mapped(struct folio *folio)
{
        if (likely(!folio_test_large(folio)))
                return atomic_read(&folio->_mapcount) >= 0;
        return folio_large_is_mapped(folio);
}

/*
 * Return true if this page is mapped into pagetables.
 * For compound page it returns true if any sub-page of compound page is mapped,
 * even if this particular sub-page is not itself mapped by any PTE or PMD.
 */
static inline bool page_mapped(struct page *page)
{
        if (likely(!PageCompound(page)))
                return atomic_read(&page->_mapcount) >= 0;
        return folio_large_is_mapped(page_folio(page));
}

static inline struct page *virt_to_head_page(const void *x)
{
        struct page *page = virt_to_page(x);

        return compound_head(page);
}

static inline struct folio *virt_to_folio(const void *x)
{
        struct page *page = virt_to_page(x);

        return page_folio(page);
}

void __folio_put(struct folio *folio);

void put_pages_list(struct list_head *pages);

void split_page(struct page *page, unsigned int order);
void folio_copy(struct folio *dst, struct folio *src);

unsigned long nr_free_buffer_pages(void);

void destroy_large_folio(struct folio *folio);

/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
        return PAGE_SIZE << compound_order(page);
}

/* Returns the number of bits needed for the number of bytes in a page */
static inline unsigned int page_shift(struct page *page)
{
        return PAGE_SHIFT + compound_order(page);
}

/**
 * thp_order - Order of a transparent huge page.
 * @page: Head page of a transparent huge page.
 */
static inline unsigned int thp_order(struct page *page)
{
        VM_BUG_ON_PGFLAGS(PageTail(page), page);
        return compound_order(page);
}

/**
 * thp_size - Size of a transparent huge page.
 * @page: Head page of a transparent huge page.
 *
 * Return: Number of bytes in this page.
 */
static inline unsigned long thp_size(struct page *page)
{
        return PAGE_SIZE << thp_order(page);
}

#ifdef CONFIG_MMU
/*
 * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
 * servicing faults for write access.  In the normal case, do always want
 * pte_mkwrite.  But get_user_pages can cause write faults for mappings
 * that do not have writing enabled, when used by access_process_vm.
 */
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
        if (likely(vma->vm_flags & VM_WRITE))
                pte = pte_mkwrite(pte, vma);
        return pte;
}

vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
void set_pte_range(struct vm_fault *vmf, struct folio *folio,
                struct page *page, unsigned int nr, unsigned long addr);

vm_fault_t finish_fault(struct vm_fault *vmf);
#endif

/*
 * Multiple processes may "see" the same page. E.g. for untouched
 * mappings of /dev/null, all processes see the same page full of
 * zeroes, and text pages of executables and shared libraries have
 * only one copy in memory, at most, normally.
 *
 * For the non-reserved pages, page_count(page) denotes a reference count.
 *   page_count() == 0 means the page is free. page->lru is then used for
 *   freelist management in the buddy allocator.
 *   page_count() > 0  means the page has been allocated.
 *
 * Pages are allocated by the slab allocator in order to provide memory
 * to kmalloc and kmem_cache_alloc. In this case, the management of the
 * page, and the fields in 'struct page' are the responsibility of mm/slab.c
 * unless a particular usage is carefully commented. (the responsibility of
 * freeing the kmalloc memory is the caller's, of course).
 *
 * A page may be used by anyone else who does a __get_free_page().
 * In this case, page_count still tracks the references, and should only
 * be used through the normal accessor functions. The top bits of page->flags
 * and page->virtual store page management information, but all other fields
 * are unused and could be used privately, carefully. The management of this
 * page is the responsibility of the one who allocated it, and those who have
 * subsequently been given references to it.
 *
 * The other pages (we may call them "pagecache pages") are completely
 * managed by the Linux memory manager: I/O, buffers, swapping etc.
 * The following discussion applies only to them.
 *
 * A pagecache page contains an opaque `private' member, which belongs to the
 * page's address_space. Usually, this is the address of a circular list of
 * the page's disk buffers. PG_private must be set to tell the VM to call
 * into the filesystem to release these pages.
 *
 * A page may belong to an inode's memory mapping. In this case, page->mapping
 * is the pointer to the inode, and page->index is the file offset of the page,
 * in units of PAGE_SIZE.
 *
 * If pagecache pages are not associated with an inode, they are said to be
 * anonymous pages. These may become associated with the swapcache, and in that
 * case PG_swapcache is set, and page->private is an offset into the swapcache.
 *
 * In either case (swapcache or inode backed), the pagecache itself holds one
 * reference to the page. Setting PG_private should also increment the
 * refcount. The each user mapping also has a reference to the page.
 *
 * The pagecache pages are stored in a per-mapping radix tree, which is
 * rooted at mapping->i_pages, and indexed by offset.
 * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
 * lists, we instead now tag pages as dirty/writeback in the radix tree.
 *
 * All pagecache pages may be subject to I/O:
 * - inode pages may need to be read from disk,
 * - inode pages which have been modified and are MAP_SHARED may need
 *   to be written back to the inode on disk,
 * - anonymous pages (including MAP_PRIVATE file mappings) which have been
 *   modified may need to be swapped out to swap space and (later) to be read
 *   back into memory.
 */

#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);

bool __put_devmap_managed_page_refs(struct page *page, int refs);
static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
        if (!static_branch_unlikely(&devmap_managed_key))
                return false;
        if (!is_zone_device_page(page))
                return false;
        return __put_devmap_managed_page_refs(page, refs);
}
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
        return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */

static inline bool put_devmap_managed_page(struct page *page)
{
        return put_devmap_managed_page_refs(page, 1);
}

/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
        ((unsigned int) folio_ref_count(folio) + 127u <= 127u)

/**
 * folio_get - Increment the reference count on a folio.
 * @folio: The folio.
 *
 * Context: May be called in any context, as long as you know that
 * you have a refcount on the folio.  If you do not already have one,
 * folio_try_get() may be the right interface for you to use.
 */
static inline void folio_get(struct folio *folio)
{
        VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
        folio_ref_inc(folio);
}

static inline void get_page(struct page *page)
{
        folio_get(page_folio(page));
}

static inline __must_check bool try_get_page(struct page *page)
{
        page = compound_head(page);
        if (WARN_ON_ONCE(page_ref_count(page) <= 0))
                return false;
        page_ref_inc(page);
        return true;
}

/**
 * folio_put - Decrement the reference count on a folio.
 * @folio: The folio.
 *
 * If the folio's reference count reaches zero, the memory will be
 * released back to the page allocator and may be used by another
 * allocation immediately.  Do not access the memory or the struct folio
 * after calling folio_put() unless you can be sure that it wasn't the
 * last reference.
 *
 * Context: May be called in process or interrupt context, but not in NMI
 * context.  May be called while holding a spinlock.
 */
static inline void folio_put(struct folio *folio)
{
        if (folio_put_testzero(folio))
                __folio_put(folio);
}

/**
 * folio_put_refs - Reduce the reference count on a folio.
 * @folio: The folio.
 * @refs: The amount to subtract from the folio's reference count.
 *
 * If the folio's reference count reaches zero, the memory will be
 * released back to the page allocator and may be used by another
 * allocation immediately.  Do not access the memory or the struct folio
 * after calling folio_put_refs() unless you can be sure that these weren't
 * the last references.
 *
 * Context: May be called in process or interrupt context, but not in NMI
 * context.  May be called while holding a spinlock.
 */
static inline void folio_put_refs(struct folio *folio, int refs)
{
        if (folio_ref_sub_and_test(folio, refs))
                __folio_put(folio);
}

void folios_put_refs(struct folio_batch *folios, unsigned int *refs);

/*
 * union release_pages_arg - an array of pages or folios
 *
 * release_pages() releases a simple array of multiple pages, and
 * accepts various different forms of said page array: either
 * a regular old boring array of pages, an array of folios, or
 * an array of encoded page pointers.
 *
 * The transparent union syntax for this kind of "any of these
 * argument types" is all kinds of ugly, so look away.
 */
typedef union {
        struct page **pages;
        struct folio **folios;
        struct encoded_page **encoded_pages;
} release_pages_arg __attribute__ ((__transparent_union__));

void release_pages(release_pages_arg, int nr);

/**
 * folios_put - Decrement the reference count on an array of folios.
 * @folios: The folios.
 *
 * Like folio_put(), but for a batch of folios.  This is more efficient
 * than writing the loop yourself as it will optimise the locks which need
 * to be taken if the folios are freed.  The folios batch is returned
 * empty and ready to be reused for another batch; there is no need to
 * reinitialise it.
 *
 * Context: May be called in process or interrupt context, but not in NMI
 * context.  May be called while holding a spinlock.
 */
static inline void folios_put(struct folio_batch *folios)
{
        folios_put_refs(folios, NULL);
}

static inline void put_page(struct page *page)
{
        struct folio *folio = page_folio(page);

        /*
         * For some devmap managed pages we need to catch refcount transition
         * from 2 to 1:
         */
        if (put_devmap_managed_page(&folio->page))
                return;
        folio_put(folio);
}

/*
 * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload
 * the page's refcount so that two separate items are tracked: the original page
 * reference count, and also a new count of how many pin_user_pages() calls were
 * made against the page. ("gup-pinned" is another term for the latter).
 *
 * With this scheme, pin_user_pages() becomes special: such pages are marked as
 * distinct from normal pages. As such, the unpin_user_page() call (and its
 * variants) must be used in order to release gup-pinned pages.
 *
 * Choice of value:
 *
 * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference
 * counts with respect to pin_user_pages() and unpin_user_page() becomes
 * simpler, due to the fact that adding an even power of two to the page
 * refcount has the effect of using only the upper N bits, for the code that
 * counts up using the bias value. This means that the lower bits are left for
 * the exclusive use of the original code that increments and decrements by one
 * (or at least, by much smaller values than the bias value).
 *
 * Of course, once the lower bits overflow into the upper bits (and this is
 * OK, because subtraction recovers the original values), then visual inspection
 * no longer suffices to directly view the separate counts. However, for normal
 * applications that don't have huge page reference counts, this won't be an
 * issue.
 *
 * Locking: the lockless algorithm described in folio_try_get_rcu()
 * provides safe operation for get_user_pages(), page_mkclean() and
 * other calls that race to set up page table entries.
 */
#define GUP_PIN_COUNTING_BIAS (1U << 10)

void unpin_user_page(struct page *page);
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                                 bool make_dirty);
void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
                                      bool make_dirty);
void unpin_user_pages(struct page **pages, unsigned long npages);

static inline bool is_cow_mapping(vm_flags_t flags)
{
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}

#ifndef CONFIG_MMU
static inline bool is_nommu_shared_mapping(vm_flags_t flags)
{
        /*
         * NOMMU shared mappings are ordinary MAP_SHARED mappings and selected
         * R/O MAP_PRIVATE file mappings that are an effective R/O overlay of
         * a file mapping. R/O MAP_PRIVATE mappings might still modify
         * underlying memory if ptrace is active, so this is only possible if
         * ptrace does not apply. Note that there is no mprotect() to upgrade
         * write permissions later.
         */
        return flags & (VM_MAYSHARE | VM_MAYOVERLAY);
}
#endif

#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif

/*
 * The identification function is mainly used by the buddy allocator for
 * determining if two pages could be buddies. We are not really identifying
 * the zone since we could be using the section number id if we do not have
 * node id available in page flags.
 * We only guarantee that it will return the same value for two combinable
 * pages in a zone.
 */
static inline int page_zone_id(struct page *page)
{
        return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}

#ifdef NODE_NOT_IN_PAGE_FLAGS
int page_to_nid(const struct page *page);
#else
static inline int page_to_nid(const struct page *page)
{
        return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK;
}
#endif

static inline int folio_nid(const struct folio *folio)
{
        return page_to_nid(&folio->page);
}

#ifdef CONFIG_NUMA_BALANCING
/* page access time bits needs to hold at least 4 seconds */
#define PAGE_ACCESS_TIME_MIN_BITS        12
#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS
#define PAGE_ACCESS_TIME_BUCKETS                                \
        (PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT)
#else
#define PAGE_ACCESS_TIME_BUCKETS        0
#endif

#define PAGE_ACCESS_TIME_MASK                                \
        (LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS)

static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
        return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
}

static inline int cpupid_to_pid(int cpupid)
{
        return cpupid & LAST__PID_MASK;
}

static inline int cpupid_to_cpu(int cpupid)
{
        return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
}

static inline int cpupid_to_nid(int cpupid)
{
        return cpu_to_node(cpupid_to_cpu(cpupid));
}

static inline bool cpupid_pid_unset(int cpupid)
{
        return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);
}

static inline bool cpupid_cpu_unset(int cpupid)
{
        return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
}

static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
{
        return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
}

#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
{
        return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK);
}

static inline int folio_last_cpupid(struct folio *folio)
{
        return folio->_last_cpupid;
}
static inline void page_cpupid_reset_last(struct page *page)
{
        page->_last_cpupid = -1 & LAST_CPUPID_MASK;
}
#else
static inline int folio_last_cpupid(struct folio *folio)
{
        return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
}

int folio_xchg_last_cpupid(struct folio *folio, int cpupid);

static inline void page_cpupid_reset_last(struct page *page)
{
        page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
}
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */

static inline int folio_xchg_access_time(struct folio *folio, int time)
{
        int last_time;

        last_time = folio_xchg_last_cpupid(folio,
                                           time >> PAGE_ACCESS_TIME_BUCKETS);
        return last_time << PAGE_ACCESS_TIME_BUCKETS;
}

static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
{
        unsigned int pid_bit;

        pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG));
        if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) {
                __set_bit(pid_bit, &vma->numab_state->pids_active[1]);
        }
}
#else /* !CONFIG_NUMA_BALANCING */
static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
{
        return folio_nid(folio); /* XXX */
}

static inline int folio_xchg_access_time(struct folio *folio, int time)
{
        return 0;
}

static inline int folio_last_cpupid(struct folio *folio)
{
        return folio_nid(folio); /* XXX */
}

static inline int cpupid_to_nid(int cpupid)
{
        return -1;
}

static inline int cpupid_to_pid(int cpupid)
{
        return -1;
}

static inline int cpupid_to_cpu(int cpupid)
{
        return -1;
}

static inline int cpu_pid_to_cpupid(int nid, int pid)
{
        return -1;
}

static inline bool cpupid_pid_unset(int cpupid)
{
        return true;
}

static inline void page_cpupid_reset_last(struct page *page)
{
}

static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
{
        return false;
}

static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_NUMA_BALANCING */

#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)

/*
 * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid
 * setting tags for all pages to native kernel tag value 0xff, as the default
 * value 0x00 maps to 0xff.
 */

static inline u8 page_kasan_tag(const struct page *page)
{
        u8 tag = KASAN_TAG_KERNEL;

        if (kasan_enabled()) {
                tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
                tag ^= 0xff;
        }

        return tag;
}

static inline void page_kasan_tag_set(struct page *page, u8 tag)
{
        unsigned long old_flags, flags;

        if (!kasan_enabled())
                return;

        tag ^= 0xff;
        old_flags = READ_ONCE(page->flags);
        do {
                flags = old_flags;
                flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
                flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
        } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
}

static inline void page_kasan_tag_reset(struct page *page)
{
        if (kasan_enabled())
                page_kasan_tag_set(page, KASAN_TAG_KERNEL);
}

#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */

static inline u8 page_kasan_tag(const struct page *page)
{
        return 0xff;
}

static inline void page_kasan_tag_set(struct page *page, u8 tag) { }
static inline void page_kasan_tag_reset(struct page *page) { }

#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */

static inline struct zone *page_zone(const struct page *page)
{
        return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
}

static inline pg_data_t *page_pgdat(const struct page *page)
{
        return NODE_DATA(page_to_nid(page));
}

static inline struct zone *folio_zone(const struct folio *folio)
{
        return page_zone(&folio->page);
}

static inline pg_data_t *folio_pgdat(const struct folio *folio)
{
        return page_pgdat(&folio->page);
}

#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
        page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
        page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}

static inline unsigned long page_to_section(const struct page *page)
{
        return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
}
#endif

/**
 * folio_pfn - Return the Page Frame Number of a folio.
 * @folio: The folio.
 *
 * A folio may contain multiple pages.  The pages have consecutive
 * Page Frame Numbers.
 *
 * Return: The Page Frame Number of the first page in the folio.
 */
static inline unsigned long folio_pfn(struct folio *folio)
{
        return page_to_pfn(&folio->page);
}

static inline struct folio *pfn_folio(unsigned long pfn)
{
        return page_folio(pfn_to_page(pfn));
}

/**
 * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA.
 * @folio: The folio.
 *
 * This function checks if a folio has been pinned via a call to
 * a function in the pin_user_pages() family.
 *
 * For small folios, the return value is partially fuzzy: false is not fuzzy,
 * because it means "definitely not pinned for DMA", but true means "probably
 * pinned for DMA, but possibly a false positive due to having at least
 * GUP_PIN_COUNTING_BIAS worth of normal folio references".
 *
 * False positives are OK, because: a) it's unlikely for a folio to
 * get that many refcounts, and b) all the callers of this routine are
 * expected to be able to deal gracefully with a false positive.
 *
 * For large folios, the result will be exactly correct. That's because
 * we have more tracking data available: the _pincount field is used
 * instead of the GUP_PIN_COUNTING_BIAS scheme.
 *
 * For more information, please see Documentation/core-api/pin_user_pages.rst.
 *
 * Return: True, if it is likely that the page has been "dma-pinned".
 * False, if the page is definitely not dma-pinned.
 */
static inline bool folio_maybe_dma_pinned(struct folio *folio)
{
        if (folio_test_large(folio))
                return atomic_read(&folio->_pincount) > 0;

        /*
         * folio_ref_count() is signed. If that refcount overflows, then
         * folio_ref_count() returns a negative value, and callers will avoid
         * further incrementing the refcount.
         *
         * Here, for that overflow case, use the sign bit to count a little
         * bit higher via unsigned math, and thus still get an accurate result.
         */
        return ((unsigned int)folio_ref_count(folio)) >=
                GUP_PIN_COUNTING_BIAS;
}

static inline bool page_maybe_dma_pinned(struct page *page)
{
        return folio_maybe_dma_pinned(page_folio(page));
}

/*
 * This should most likely only be called during fork() to see whether we
 * should break the cow immediately for an anon page on the src mm.
 *
 * The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq.
 */
static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma,
                                          struct folio *folio)
{
        VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1));

        if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
                return false;

        return folio_maybe_dma_pinned(folio);
}

/**
 * is_zero_page - Query if a page is a zero page
 * @page: The page to query
 *
 * This returns true if @page is one of the permanent zero pages.
 */
static inline bool is_zero_page(const struct page *page)
{
        return is_zero_pfn(page_to_pfn(page));
}

/**
 * is_zero_folio - Query if a folio is a zero page
 * @folio: The folio to query
 *
 * This returns true if @folio is one of the permanent zero pages.
 */
static inline bool is_zero_folio(const struct folio *folio)
{
        return is_zero_page(&folio->page);
}

/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios */
#ifdef CONFIG_MIGRATION
static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
#ifdef CONFIG_CMA
        int mt = folio_migratetype(folio);

        if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
                return false;
#endif
        /* The zero page can be "pinned" but gets special handling. */
        if (is_zero_folio(folio))
                return true;

        /* Coherent device memory must always allow eviction. */
        if (folio_is_device_coherent(folio))
                return false;

        /* Otherwise, non-movable zone folios can be pinned. */
        return !folio_is_zone_movable(folio);

}
#else
static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
        return true;
}
#endif

static inline void set_page_zone(struct page *page, enum zone_type zone)
{
        page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
        page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}

static inline void set_page_node(struct page *page, unsigned long node)
{
        page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
        page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
}

static inline void set_page_links(struct page *page, enum zone_type zone,
        unsigned long node, unsigned long pfn)
{
        set_page_zone(page, zone);
        set_page_node(page, node);
#ifdef SECTION_IN_PAGE_FLAGS
        set_page_section(page, pfn_to_section_nr(pfn));
#endif
}

/**
 * folio_nr_pages - The number of pages in the folio.
 * @folio: The folio.
 *
 * Return: A positive power of two.
 */
static inline long folio_nr_pages(struct folio *folio)
{
        if (!folio_test_large(folio))
                return 1;
#ifdef CONFIG_64BIT
        return folio->_folio_nr_pages;
#else
        return 1L << (folio->_flags_1 & 0xff);
#endif
}

/* Only hugetlbfs can allocate folios larger than MAX_ORDER */
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
#define MAX_FOLIO_NR_PAGES        (1UL << PUD_ORDER)
#else
#define MAX_FOLIO_NR_PAGES        MAX_ORDER_NR_PAGES
#endif

/*
 * compound_nr() returns the number of pages in this potentially compound
 * page.  compound_nr() can be called on a tail page, and is defined to
 * return 1 in that case.
 */
static inline unsigned long compound_nr(struct page *page)
{
        struct folio *folio = (struct folio *)page;

        if (!test_bit(PG_head, &folio->flags))
                return 1;
#ifdef CONFIG_64BIT
        return folio->_folio_nr_pages;
#else
        return 1L << (folio->_flags_1 & 0xff);
#endif
}

/**
 * thp_nr_pages - The number of regular pages in this huge page.
 * @page: The head page of a huge page.
 */
static inline int thp_nr_pages(struct page *page)
{
        return folio_nr_pages((struct folio *)page);
}

/**
 * folio_next - Move to the next physical folio.
 * @folio: The folio we're currently operating on.
 *
 * If you have physically contiguous memory which may span more than
 * one folio (eg a &struct bio_vec), use this function to move from one
 * folio to the next.  Do not use it if the memory is only virtually
 * contiguous as the folios are almost certainly not adjacent to each
 * other.  This is the folio equivalent to writing ``page++``.
 *
 * Context: We assume that the folios are refcounted and/or locked at a
 * higher level and do not adjust the reference counts.
 * Return: The next struct folio.
 */
static inline struct folio *folio_next(struct folio *folio)
{
        return (struct folio *)folio_page(folio, folio_nr_pages(folio));
}

/**
 * folio_shift - The size of the memory described by this folio.
 * @folio: The folio.
 *
 * A folio represents a number of bytes which is a power-of-two in size.
 * This function tells you which power-of-two the folio is.  See also
 * folio_size() and folio_order().
 *
 * Context: The caller should have a reference on the folio to prevent
 * it from being split.  It is not necessary for the folio to be locked.
 * Return: The base-2 logarithm of the size of this folio.
 */
static inline unsigned int folio_shift(struct folio *folio)
{
        return PAGE_SHIFT + folio_order(folio);
}

/**
 * folio_size - The number of bytes in a folio.
 * @folio: The folio.
 *
 * Context: The caller should have a reference on the folio to prevent
 * it from being split.  It is not necessary for the folio to be locked.
 * Return: The number of bytes in this folio.
 */
static inline size_t folio_size(struct folio *folio)
{
        return PAGE_SIZE << folio_order(folio);
}

/**
 * folio_estimated_sharers - Estimate the number of sharers of a folio.
 * @folio: The folio.
 *
 * folio_estimated_sharers() aims to serve as a function to efficiently
 * estimate the number of processes sharing a folio. This is done by
 * looking at the precise mapcount of the first subpage in the folio, and
 * assuming the other subpages are the same. This may not be true for large
 * folios. If you want exact mapcounts for exact calculations, look at
 * page_mapcount() or folio_total_mapcount().
 *
 * Return: The estimated number of processes sharing a folio.
 */
static inline int folio_estimated_sharers(struct folio *folio)
{
        return page_mapcount(folio_page(folio, 0));
}

#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
static inline int arch_make_page_accessible(struct page *page)
{
        return 0;
}
#endif

#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
static inline int arch_make_folio_accessible(struct folio *folio)
{
        int ret;
        long i, nr = folio_nr_pages(folio);

        for (i = 0; i < nr; i++) {
                ret = arch_make_page_accessible(folio_page(folio, i));
                if (ret)
                        break;
        }

        return ret;
}
#endif

/*
 * Some inline functions in vmstat.h depend on page_zone()
 */
#include <linux/vmstat.h>

#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif

#if defined(WANT_PAGE_VIRTUAL)
static inline void *page_address(const struct page *page)
{
        return page->virtual;
}
static inline void set_page_address(struct page *page, void *address)
{
        page->virtual = address;
}
#define page_address_init()  do { } while(0)
#endif

#if defined(HASHED_PAGE_VIRTUAL)
void *page_address(const struct page *page);
void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif

static __always_inline void *lowmem_page_address(const struct page *page)
{
        return page_to_virt(page);
}

#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address)  do { } while(0)
#define page_address_init()  do { } while(0)
#endif

static inline void *folio_address(const struct folio *folio)
{
        return page_address(&folio->page);
}

extern pgoff_t __page_file_index(struct page *page);

/*
 * Return the pagecache index of the passed page.  Regular pagecache pages
 * use ->index whereas swapcache pages use swp_offset(->private)
 */
static inline pgoff_t page_index(struct page *page)
{
        if (unlikely(PageSwapCache(page)))
                return __page_file_index(page);
        return page->index;
}

/*
 * Return true only if the page has been allocated with
 * ALLOC_NO_WATERMARKS and the low watermark was not
 * met implying that the system is under some pressure.
 */
static inline bool page_is_pfmemalloc(const struct page *page)
{
        /*
         * lru.next has bit 1 set if the page is allocated from the
         * pfmemalloc reserves.  Callers may simply overwrite it if
         * they do not need to preserve that information.
         */
        return (uintptr_t)page->lru.next & BIT(1);
}

/*
 * Return true only if the folio has been allocated with
 * ALLOC_NO_WATERMARKS and the low watermark was not
 * met implying that the system is under some pressure.
 */
static inline bool folio_is_pfmemalloc(const struct folio *folio)
{
        /*
         * lru.next has bit 1 set if the page is allocated from the
         * pfmemalloc reserves.  Callers may simply overwrite it if
         * they do not need to preserve that information.
         */
        return (uintptr_t)folio->lru.next & BIT(1);
}

/*
 * Only to be called by the page allocator on a freshly allocated
 * page.
 */
static inline void set_page_pfmemalloc(struct page *page)
{
        page->lru.next = (void *)BIT(1);
}

static inline void clear_page_pfmemalloc(struct page *page)
{
        page->lru.next = NULL;
}

/*
 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
 */
extern void pagefault_out_of_memory(void);

#define offset_in_page(p)        ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p)        ((unsigned long)(p) & (thp_size(page) - 1))
#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))

/*
 * Parameter block passed down to zap_pte_range in exceptional cases.
 */
struct zap_details {
        struct folio *single_folio;        /* Locked folio to be unmapped */
        bool even_cows;                        /* Zap COWed private pages too? */
        zap_flags_t zap_flags;                /* Extra flags for zapping */
};

/*
 * Whether to drop the pte markers, for example, the uffd-wp information for
 * file-backed memory.  This should only be specified when we will completely
 * drop the page in the mm, either by truncation or unmapping of the vma.  By
 * default, the flag is not set.
 */
#define  ZAP_FLAG_DROP_MARKER        ((__force zap_flags_t) BIT(0))
/* Set in unmap_vmas() to indicate a final unmap call.  Only used by hugetlb */
#define  ZAP_FLAG_UNMAP              ((__force zap_flags_t) BIT(1))

#ifdef CONFIG_SCHED_MM_CID
void sched_mm_cid_before_execve(struct task_struct *t);
void sched_mm_cid_after_execve(struct task_struct *t);
void sched_mm_cid_fork(struct task_struct *t);
void sched_mm_cid_exit_signals(struct task_struct *t);
static inline int task_mm_cid(struct task_struct *t)
{
        return t->mm_cid;
}
#else
static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
static inline void sched_mm_cid_fork(struct task_struct *t) { }
static inline void sched_mm_cid_exit_signals(struct task_struct *t) { }
static inline int task_mm_cid(struct task_struct *t)
{
        /*
         * Use the processor id as a fall-back when the mm cid feature is
         * disabled. This provides functional per-cpu data structure accesses
         * in user-space, althrough it won't provide the memory usage benefits.
         */
        return raw_smp_processor_id();
}
#endif

#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
static inline bool can_do_mlock(void) { return false; }
#endif
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);

struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
                             pte_t pte);
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                             pte_t pte);
struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
                                  unsigned long addr, pmd_t pmd);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
                                pmd_t pmd);

void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                  unsigned long size);
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
                           unsigned long size, struct zap_details *details);
static inline void zap_vma_pages(struct vm_area_struct *vma)
{
        zap_page_range_single(vma, vma->vm_start,
                              vma->vm_end - vma->vm_start, NULL);
}
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
                struct vm_area_struct *start_vma, unsigned long start,
                unsigned long end, unsigned long tree_end, bool mm_wr_locked);

struct mmu_notifier_range;

void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
                unsigned long end, unsigned long floor, unsigned long ceiling);
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
int follow_pte(struct mm_struct *mm, unsigned long address,
               pte_t **ptepp, spinlock_t **ptlp);
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
        unsigned long *pfn);
int follow_phys(struct vm_area_struct *vma, unsigned long address,
                unsigned int flags, unsigned long *prot, resource_size_t *phys);
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
                        void *buf, int len, int write);

extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int generic_error_remove_folio(struct address_space *mapping,
                struct folio *folio);

struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
                unsigned long address, struct pt_regs *regs);

#ifdef CONFIG_MMU
extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
                                  unsigned long address, unsigned int flags,
                                  struct pt_regs *regs);
extern int fixup_user_fault(struct mm_struct *mm,
                            unsigned long address, unsigned int fault_flags,
                            bool *unlocked);
void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows);
#else
static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
                                         unsigned long address, unsigned int flags,
                                         struct pt_regs *regs)
{
        /* should never happen if there's no MMU */
        BUG();
        return VM_FAULT_SIGBUS;
}
static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
                unsigned int fault_flags, bool *unlocked)
{
        /* should never happen if there's no MMU */
        BUG();
        return -EFAULT;
}
static inline void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows) { }
#endif

static inline void unmap_shared_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen)
{
        unmap_mapping_range(mapping, holebegin, holelen, 0);
}

static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm,
                                                unsigned long addr);

extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
                void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
                void *buf, int len, unsigned int gup_flags);

long get_user_pages_remote(struct mm_struct *mm,
                           unsigned long start, unsigned long nr_pages,
                           unsigned int gup_flags, struct page **pages,
                           int *locked);
long pin_user_pages_remote(struct mm_struct *mm,
                           unsigned long start, unsigned long nr_pages,
                           unsigned int gup_flags, struct page **pages,
                           int *locked);

/*
 * Retrieves a single page alongside its VMA. Does not support FOLL_NOWAIT.
 */
static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
                                                    unsigned long addr,
                                                    int gup_flags,
                                                    struct vm_area_struct **vmap)
{
        struct page *page;
        struct vm_area_struct *vma;
        int got;

        if (WARN_ON_ONCE(unlikely(gup_flags & FOLL_NOWAIT)))
                return ERR_PTR(-EINVAL);

        got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);

        if (got < 0)
                return ERR_PTR(got);

        vma = vma_lookup(mm, addr);
        if (WARN_ON_ONCE(!vma)) {
                put_page(page);
                return ERR_PTR(-EINVAL);
        }

        *vmap = vma;
        return page;
}

long get_user_pages(unsigned long start, unsigned long nr_pages,
                    unsigned int gup_flags, struct page **pages);
long pin_user_pages(unsigned long start, unsigned long nr_pages,
                    unsigned int gup_flags, struct page **pages);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                    struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
                    struct page **pages, unsigned int gup_flags);

int get_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages);
int pin_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages);
void folio_add_pin(struct folio *folio);

int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
                        struct task_struct *task, bool bypass_rlim);

struct kvec;
struct page *get_dump_page(unsigned long addr);

bool folio_mark_dirty(struct folio *folio);
bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);

int get_cmdline(struct task_struct *task, char *buffer, int buflen);

extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
                unsigned long new_addr, unsigned long len,
                bool need_rmap_locks, bool for_stack);

/*
 * Flags used by change_protection().  For now we make it a bitmap so
 * that we can pass in multiple flags just like parameters.  However
 * for now all the callers are only use one of the flags at the same
 * time.
 */
/*
 * Whether we should manually check if we can map individual PTEs writable,
 * because something (e.g., COW, uffd-wp) blocks that from happening for all
 * PTEs automatically in a writable mapping.
 */
#define  MM_CP_TRY_CHANGE_WRITABLE           (1UL << 0)
/* Whether this protection change is for NUMA hints */
#define  MM_CP_PROT_NUMA                   (1UL << 1)
/* Whether this change is for write protecting */
#define  MM_CP_UFFD_WP                     (1UL << 2) /* do wp */
#define  MM_CP_UFFD_WP_RESOLVE             (1UL << 3) /* Resolve wp */
#define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
                                            MM_CP_UFFD_WP_RESOLVE)

bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
        /*
         * We want to check manually if we can change individual PTEs writable
         * if we can't do that automatically for all PTEs in a mapping. For
         * private mappings, that's always the case when we have write
         * permissions as we properly have to handle COW.
         */
        if (vma->vm_flags & VM_SHARED)
                return vma_wants_writenotify(vma, vma->vm_page_prot);
        return !!(vma->vm_flags & VM_WRITE);

}
bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
                             pte_t pte);
extern long change_protection(struct mmu_gather *tlb,
                              struct vm_area_struct *vma, unsigned long start,
                              unsigned long end, unsigned long cp_flags);
extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
          struct vm_area_struct *vma, struct vm_area_struct **pprev,
          unsigned long start, unsigned long end, unsigned long newflags);

/*
 * doesn't attempt to fault and will return short.
 */
int get_user_pages_fast_only(unsigned long start, int nr_pages,
                             unsigned int gup_flags, struct page **pages);

static inline bool get_user_page_fast_only(unsigned long addr,
                        unsigned int gup_flags, struct page **pagep)
{
        return get_user_pages_fast_only(addr, 1, gup_flags, pagep) == 1;
}
/*
 * per-process(per-mm_struct) statistics.
 */
static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
{
        return percpu_counter_read_positive(&mm->rss_stat[member]);
}

void mm_trace_rss_stat(struct mm_struct *mm, int member);

static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
{
        percpu_counter_add(&mm->rss_stat[member], value);

        mm_trace_rss_stat(mm, member);
}

static inline void inc_mm_counter(struct mm_struct *mm, int member)
{
        percpu_counter_inc(&mm->rss_stat[member]);

        mm_trace_rss_stat(mm, member);
}

static inline void dec_mm_counter(struct mm_struct *mm, int member)
{
        percpu_counter_dec(&mm->rss_stat[member]);

        mm_trace_rss_stat(mm, member);
}

/* Optimized variant when folio is already known not to be anon */
static inline int mm_counter_file(struct folio *folio)
{
        if (folio_test_swapbacked(folio))
                return MM_SHMEMPAGES;
        return MM_FILEPAGES;
}

static inline int mm_counter(struct folio *folio)
{
        if (folio_test_anon(folio))
                return MM_ANONPAGES;
        return mm_counter_file(folio);
}

static inline unsigned long get_mm_rss(struct mm_struct *mm)
{
        return get_mm_counter(mm, MM_FILEPAGES) +
                get_mm_counter(mm, MM_ANONPAGES) +
                get_mm_counter(mm, MM_SHMEMPAGES);
}

static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
{
        return max(mm->hiwater_rss, get_mm_rss(mm));
}

static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
{
        return max(mm->hiwater_vm, mm->total_vm);
}

static inline void update_hiwater_rss(struct mm_struct *mm)
{
        unsigned long _rss = get_mm_rss(mm);

        if ((mm)->hiwater_rss < _rss)
                (mm)->hiwater_rss = _rss;
}

static inline void update_hiwater_vm(struct mm_struct *mm)
{
        if (mm->hiwater_vm < mm->total_vm)
                mm->hiwater_vm = mm->total_vm;
}

static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
{
        mm->hiwater_rss = get_mm_rss(mm);
}

static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
                                         struct mm_struct *mm)
{
        unsigned long hiwater_rss = get_mm_hiwater_rss(mm);

        if (*maxrss < hiwater_rss)
                *maxrss = hiwater_rss;
}

#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL
static inline int pte_special(pte_t pte)
{
        return 0;
}

static inline pte_t pte_mkspecial(pte_t pte)
{
        return pte;
}
#endif

#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pte_devmap(pte_t pte)
{
        return 0;
}
#endif

extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
                               spinlock_t **ptl);
static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
                                    spinlock_t **ptl)
{
        pte_t *ptep;
        __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl));
        return ptep;
}

#ifdef __PAGETABLE_P4D_FOLDED
static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
                                                unsigned long address)
{
        return 0;
}
#else
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif

#if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU)
static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
                                                unsigned long address)
{
        return 0;
}
static inline void mm_inc_nr_puds(struct mm_struct *mm) {}
static inline void mm_dec_nr_puds(struct mm_struct *mm) {}

#else
int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);

static inline void mm_inc_nr_puds(struct mm_struct *mm)
{
        if (mm_pud_folded(mm))
                return;
        atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
}

static inline void mm_dec_nr_puds(struct mm_struct *mm)
{
        if (mm_pud_folded(mm))
                return;
        atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
}
#endif

#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
                                                unsigned long address)
{
        return 0;
}

static inline void mm_inc_nr_pmds(struct mm_struct *mm) {}
static inline void mm_dec_nr_pmds(struct mm_struct *mm) {}

#else
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);

static inline void mm_inc_nr_pmds(struct mm_struct *mm)
{
        if (mm_pmd_folded(mm))
                return;
        atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
}

static inline void mm_dec_nr_pmds(struct mm_struct *mm)
{
        if (mm_pmd_folded(mm))
                return;
        atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
}
#endif

#ifdef CONFIG_MMU
static inline void mm_pgtables_bytes_init(struct mm_struct *mm)
{
        atomic_long_set(&mm->pgtables_bytes, 0);
}

static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
{
        return atomic_long_read(&mm->pgtables_bytes);
}

static inline void mm_inc_nr_ptes(struct mm_struct *mm)
{
        atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
}

static inline void mm_dec_nr_ptes(struct mm_struct *mm)
{
        atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes);
}
#else

static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {}
static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm)
{
        return 0;
}

static inline void mm_inc_nr_ptes(struct mm_struct *mm) {}
static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
#endif

int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
int __pte_alloc_kernel(pmd_t *pmd);

#if defined(CONFIG_MMU)

static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
                unsigned long address)
{
        return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
                NULL : p4d_offset(pgd, address);
}

static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
                unsigned long address)
{
        return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
                NULL : pud_offset(p4d, address);
}

static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
        return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
                NULL: pmd_offset(pud, address);
}
#endif /* CONFIG_MMU */

static inline struct ptdesc *virt_to_ptdesc(const void *x)
{
        return page_ptdesc(virt_to_page(x));
}

static inline void *ptdesc_to_virt(const struct ptdesc *pt)
{
        return page_to_virt(ptdesc_page(pt));
}

static inline void *ptdesc_address(const struct ptdesc *pt)
{
        return folio_address(ptdesc_folio(pt));
}

static inline bool pagetable_is_reserved(struct ptdesc *pt)
{
        return folio_test_reserved(ptdesc_folio(pt));
}

/**
 * pagetable_alloc - Allocate pagetables
 * @gfp:    GFP flags
 * @order:  desired pagetable order
 *
 * pagetable_alloc allocates memory for page tables as well as a page table
 * descriptor to describe that memory.
 *
 * Return: The ptdesc describing the allocated page tables.
 */
static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order)
{
        struct page *page = alloc_pages(gfp | __GFP_COMP, order);

        return page_ptdesc(page);
}

/**
 * pagetable_free - Free pagetables
 * @pt:        The page table descriptor
 *
 * pagetable_free frees the memory of all page tables described by a page
 * table descriptor and the memory for the descriptor itself.
 */
static inline void pagetable_free(struct ptdesc *pt)
{
        struct page *page = ptdesc_page(pt);

        __free_pages(page, compound_order(page));
}

#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
void __init ptlock_cache_init(void);
bool ptlock_alloc(struct ptdesc *ptdesc);
void ptlock_free(struct ptdesc *ptdesc);

static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc)
{
        return ptdesc->ptl;
}
#else /* ALLOC_SPLIT_PTLOCKS */
static inline void ptlock_cache_init(void)
{
}

static inline bool ptlock_alloc(struct ptdesc *ptdesc)
{
        return true;
}

static inline void ptlock_free(struct ptdesc *ptdesc)
{
}

static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc)
{
        return &ptdesc->ptl;
}
#endif /* ALLOC_SPLIT_PTLOCKS */

static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
        return ptlock_ptr(page_ptdesc(pmd_page(*pmd)));
}

static inline bool ptlock_init(struct ptdesc *ptdesc)
{
        /*
         * prep_new_page() initialize page->private (and therefore page->ptl)
         * with 0. Make sure nobody took it in use in between.
         *
         * It can happen if arch try to use slab for page table allocation:
         * slab code uses page->slab_cache, which share storage with page->ptl.
         */
        VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc));
        if (!ptlock_alloc(ptdesc))
                return false;
        spin_lock_init(ptlock_ptr(ptdesc));
        return true;
}

#else        /* !USE_SPLIT_PTE_PTLOCKS */
/*
 * We use mm->page_table_lock to guard all pagetable pages of the mm.
 */
static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
        return &mm->page_table_lock;
}
static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void ptlock_free(struct ptdesc *ptdesc) {}
#endif /* USE_SPLIT_PTE_PTLOCKS */

static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        if (!ptlock_init(ptdesc))
                return false;
        __folio_set_pgtable(folio);
        lruvec_stat_add_folio(folio, NR_PAGETABLE);
        return true;
}

static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        ptlock_free(ptdesc);
        __folio_clear_pgtable(folio);
        lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}

pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
{
        return __pte_offset_map(pmd, addr, NULL);
}

pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, spinlock_t **ptlp);
static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, spinlock_t **ptlp)
{
        pte_t *pte;

        __cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
        return pte;
}

pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, spinlock_t **ptlp);

#define pte_unmap_unlock(pte, ptl)        do {                \
        spin_unlock(ptl);                                \
        pte_unmap(pte);                                        \
} while (0)

#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))

#define pte_alloc_map(mm, pmd, address)                        \
        (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address))

#define pte_alloc_map_lock(mm, pmd, address, ptlp)        \
        (pte_alloc(mm, pmd) ?                        \
                 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))

#define pte_alloc_kernel(pmd, address)                        \
        ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
                NULL: pte_offset_kernel(pmd, address))

#if USE_SPLIT_PMD_PTLOCKS

static inline struct page *pmd_pgtable_page(pmd_t *pmd)
{
        unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
        return virt_to_page((void *)((unsigned long) pmd & mask));
}

static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd)
{
        return page_ptdesc(pmd_pgtable_page(pmd));
}

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
        return ptlock_ptr(pmd_ptdesc(pmd));
}

static inline bool pmd_ptlock_init(struct ptdesc *ptdesc)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        ptdesc->pmd_huge_pte = NULL;
#endif
        return ptlock_init(ptdesc);
}

static inline void pmd_ptlock_free(struct ptdesc *ptdesc)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc));
#endif
        ptlock_free(ptdesc);
}

#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte)

#else

static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
        return &mm->page_table_lock;
}

static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; }
static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {}

#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)

#endif

static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
{
        spinlock_t *ptl = pmd_lockptr(mm, pmd);
        spin_lock(ptl);
        return ptl;
}

static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        if (!pmd_ptlock_init(ptdesc))
                return false;
        __folio_set_pgtable(folio);
        lruvec_stat_add_folio(folio, NR_PAGETABLE);
        return true;
}

static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        pmd_ptlock_free(ptdesc);
        __folio_clear_pgtable(folio);
        lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}

/*
 * No scalability reason to split PUD locks yet, but follow the same pattern
 * as the PMD locks to make it easier if we decide to.  The VM should not be
 * considered ready to switch to split PUD locks yet; there may be places
 * which need to be converted from page_table_lock.
 */
static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
{
        return &mm->page_table_lock;
}

static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
{
        spinlock_t *ptl = pud_lockptr(mm, pud);

        spin_lock(ptl);
        return ptl;
}

static inline void pagetable_pud_ctor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        __folio_set_pgtable(folio);
        lruvec_stat_add_folio(folio, NR_PAGETABLE);
}

static inline void pagetable_pud_dtor(struct ptdesc *ptdesc)
{
        struct folio *folio = ptdesc_folio(ptdesc);

        __folio_clear_pgtable(folio);
        lruvec_stat_sub_folio(folio, NR_PAGETABLE);
}

extern void __init pagecache_init(void);
extern void free_initmem(void);

/*
 * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
 * into the buddy system. The freed pages will be poisoned with pattern
 * "poison" if it's within range [0, UCHAR_MAX].
 * Return pages freed into the buddy system.
 */
extern unsigned long free_reserved_area(void *start, void *end,
                                        int poison, const char *s);

extern void adjust_managed_page_count(struct page *page, long count);

extern void reserve_bootmem_region(phys_addr_t start,
                                   phys_addr_t end, int nid);

/* Free the reserved page into the buddy system, so it gets managed. */
static inline void free_reserved_page(struct page *page)
{
        ClearPageReserved(page);
        init_page_count(page);
        __free_page(page);
        adjust_managed_page_count(page, 1);
}
#define free_highmem_page(page) free_reserved_page(page)

static inline void mark_page_reserved(struct page *page)
{
        SetPageReserved(page);
        adjust_managed_page_count(page, -1);
}

static inline void free_reserved_ptdesc(struct ptdesc *pt)
{
        free_reserved_page(ptdesc_page(pt));
}

/*
 * Default method to free all the __init memory into the buddy system.
 * The freed pages will be poisoned with pattern "poison" if it's within
 * range [0, UCHAR_MAX].
 * Return pages freed into the buddy system.
 */
static inline unsigned long free_initmem_default(int poison)
{
        extern char __init_begin[], __init_end[];

        return free_reserved_area(&__init_begin, &__init_end,
                                  poison, "unused kernel image (initmem)");
}

static inline unsigned long get_num_physpages(void)
{
        int nid;
        unsigned long phys_pages = 0;

        for_each_online_node(nid)
                phys_pages += node_present_pages(nid);

        return phys_pages;
}

/*
 * Using memblock node mappings, an architecture may initialise its
 * zones, allocate the backing mem_map and account for memory holes in an
 * architecture independent manner.
 *
 * An architecture is expected to register range of page frames backed by
 * physical memory with memblock_add[_node]() before calling
 * free_area_init() passing in the PFN each zone ends at. At a basic
 * usage, an architecture is expected to do something like
 *
 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
 *                                                          max_highmem_pfn};
 * for_each_valid_physical_page_range()
 *        memblock_add_node(base, size, nid, MEMBLOCK_NONE)
 * free_area_init(max_zone_pfns);
 */
void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
                                                unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
                                                unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
                        unsigned long *start_pfn, unsigned long *end_pfn);

#ifndef CONFIG_NUMA
static inline int early_pfn_to_nid(unsigned long pfn)
{
        return 0;
}
#else
/* please see mm/page_alloc.c */
extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif

extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void mem_init(void);
extern void __init mmap_init(void);

extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
static inline void show_mem(void)
{
        __show_mem(0, NULL, MAX_NR_ZONES - 1);
}
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
extern unsigned long arch_reserved_kernel_pages(void);
#endif

extern __printf(3, 4)
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);

extern void setup_per_cpu_pageset(void);

/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);

/* interval_tree.c */
void vma_interval_tree_insert(struct vm_area_struct *node,
                              struct rb_root_cached *root);
void vma_interval_tree_insert_after(struct vm_area_struct *node,
                                    struct vm_area_struct *prev,
                                    struct rb_root_cached *root);
void vma_interval_tree_remove(struct vm_area_struct *node,
                              struct rb_root_cached *root);
struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root,
                                unsigned long start, unsigned long last);
struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
                                unsigned long start, unsigned long last);

#define vma_interval_tree_foreach(vma, root, start, last)                \
        for (vma = vma_interval_tree_iter_first(root, start, last);        \
             vma; vma = vma_interval_tree_iter_next(vma, start, last))

void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
                                   struct rb_root_cached *root);
void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
                                   struct rb_root_cached *root);
struct anon_vma_chain *
anon_vma_interval_tree_iter_first(struct rb_root_cached *root,
                                  unsigned long start, unsigned long last);
struct anon_vma_chain *anon_vma_interval_tree_iter_next(
        struct anon_vma_chain *node, unsigned long start, unsigned long last);
#ifdef CONFIG_DEBUG_VM_RB
void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
#endif

#define anon_vma_interval_tree_foreach(avc, root, start, last)                 \
        for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
             avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))

/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
                      unsigned long start, unsigned long end, pgoff_t pgoff,
                      struct vm_area_struct *next);
extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
                       unsigned long start, unsigned long end, pgoff_t pgoff);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
        unsigned long addr, unsigned long len, pgoff_t pgoff,
        bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
                                  struct vm_area_struct *prev,
                                  struct vm_area_struct *vma,
                                  unsigned long start, unsigned long end,
                                  unsigned long vm_flags,
                                  struct mempolicy *policy,
                                  struct vm_userfaultfd_ctx uffd_ctx,
                                  struct anon_vma_name *anon_name);

/* We are about to modify the VMA's flags. */
static inline struct vm_area_struct
*vma_modify_flags(struct vma_iterator *vmi,
                  struct vm_area_struct *prev,
                  struct vm_area_struct *vma,
                  unsigned long start, unsigned long end,
                  unsigned long new_flags)
{
        return vma_modify(vmi, prev, vma, start, end, new_flags,
                          vma_policy(vma), vma->vm_userfaultfd_ctx,
                          anon_vma_name(vma));
}

/* We are about to modify the VMA's flags and/or anon_name. */
static inline struct vm_area_struct
*vma_modify_flags_name(struct vma_iterator *vmi,
                       struct vm_area_struct *prev,
                       struct vm_area_struct *vma,
                       unsigned long start,
                       unsigned long end,
                       unsigned long new_flags,
                       struct anon_vma_name *new_name)
{
        return vma_modify(vmi, prev, vma, start, end, new_flags,
                          vma_policy(vma), vma->vm_userfaultfd_ctx, new_name);
}

/* We are about to modify the VMA's memory policy. */
static inline struct vm_area_struct
*vma_modify_policy(struct vma_iterator *vmi,
                   struct vm_area_struct *prev,
                   struct vm_area_struct *vma,
                   unsigned long start, unsigned long end,
                   struct mempolicy *new_pol)
{
        return vma_modify(vmi, prev, vma, start, end, vma->vm_flags,
                          new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma));
}

/* We are about to modify the VMA's flags and/or uffd context. */
static inline struct vm_area_struct
*vma_modify_flags_uffd(struct vma_iterator *vmi,
                       struct vm_area_struct *prev,
                       struct vm_area_struct *vma,
                       unsigned long start, unsigned long end,
                       unsigned long new_flags,
                       struct vm_userfaultfd_ctx new_ctx)
{
        return vma_modify(vmi, prev, vma, start, end, new_flags,
                          vma_policy(vma), new_ctx, anon_vma_name(vma));
}

static inline int check_data_rlimit(unsigned long rlim,
                                    unsigned long new,
                                    unsigned long start,
                                    unsigned long end_data,
                                    unsigned long start_data)
{
        if (rlim < RLIM_INFINITY) {
                if (((new - start) + (end_data - start_data)) > rlim)
                        return -ENOSPC;
        }

        return 0;
}

extern int mm_take_all_locks(struct mm_struct *mm);
extern void mm_drop_all_locks(struct mm_struct *mm);

extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
extern struct file *get_mm_exe_file(struct mm_struct *mm);
extern struct file *get_task_exe_file(struct task_struct *task);

extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);

extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
                                   const struct vm_special_mapping *sm);
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
                                   unsigned long flags,
                                   const struct vm_special_mapping *spec);
/* This is an obsolete alternative to _install_special_mapping. */
extern int install_special_mapping(struct mm_struct *mm,
                                   unsigned long addr, unsigned long len,
                                   unsigned long flags, struct page **pages);

unsigned long randomize_stack_top(unsigned long stack_top);
unsigned long randomize_page(unsigned long start, unsigned long range);

extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

extern unsigned long mmap_region(struct file *file, unsigned long addr,
        unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
        struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot, unsigned long flags,
        vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
        struct list_head *uf);
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
                         unsigned long start, size_t len, struct list_head *uf,
                         bool unlock);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
                     struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);

#ifdef CONFIG_MMU
extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
                         unsigned long start, unsigned long end,
                         struct list_head *uf, bool unlock);
extern int __mm_populate(unsigned long addr, unsigned long len,
                         int ignore_errors);
static inline void mm_populate(unsigned long addr, unsigned long len)
{
        /* Ignore errors */
        (void) __mm_populate(addr, len, 1);
}
#else
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif

/* This takes the mm semaphore itself */
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
        unsigned long, unsigned long,
        unsigned long, unsigned long);

struct vm_unmapped_area_info {
#define VM_UNMAPPED_AREA_TOPDOWN 1
        unsigned long flags;
        unsigned long length;
        unsigned long low_limit;
        unsigned long high_limit;
        unsigned long align_mask;
        unsigned long align_offset;
};

extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);

/* truncate.c */
extern void truncate_inode_pages(struct address_space *, loff_t);
extern void truncate_inode_pages_range(struct address_space *,
                                       loff_t lstart, loff_t lend);
extern void truncate_inode_pages_final(struct address_space *);

/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
                pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);

extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
int expand_stack_locked(struct vm_area_struct *vma, unsigned long address);
struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr);

/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */
int expand_downwards(struct vm_area_struct *vma, unsigned long address);

/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
                                             struct vm_area_struct **pprev);

/*
 * Look up the first VMA which intersects the interval [start_addr, end_addr)
 * NULL if none.  Assume start_addr < end_addr.
 */
struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
                        unsigned long start_addr, unsigned long end_addr);

/**
 * vma_lookup() - Find a VMA at a specific address
 * @mm: The process address space.
 * @addr: The user address.
 *
 * Return: The vm_area_struct at the given address, %NULL otherwise.
 */
static inline
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
{
        return mtree_load(&mm->mm_mt, addr);
}

static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
{
        if (vma->vm_flags & VM_GROWSDOWN)
                return stack_guard_gap;

        /* See reasoning around the VM_SHADOW_STACK definition */
        if (vma->vm_flags & VM_SHADOW_STACK)
                return PAGE_SIZE;

        return 0;
}

static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
{
        unsigned long gap = stack_guard_start_gap(vma);
        unsigned long vm_start = vma->vm_start;

        vm_start -= gap;
        if (vm_start > vma->vm_start)
                vm_start = 0;
        return vm_start;
}

static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
{
        unsigned long vm_end = vma->vm_end;

        if (vma->vm_flags & VM_GROWSUP) {
                vm_end += stack_guard_gap;
                if (vm_end < vma->vm_end)
                        vm_end = -PAGE_SIZE;
        }
        return vm_end;
}

static inline unsigned long vma_pages(struct vm_area_struct *vma)
{
        return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}

/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
                                unsigned long vm_start, unsigned long vm_end)
{
        struct vm_area_struct *vma = vma_lookup(mm, vm_start);

        if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
                vma = NULL;

        return vma;
}

static inline bool range_in_vma(struct vm_area_struct *vma,
                                unsigned long start, unsigned long end)
{
        return (vma && vma->vm_start <= start && end <= vma->vm_end);
}

#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma);
#else
static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
        return __pgprot(0);
}
static inline void vma_set_page_prot(struct vm_area_struct *vma)
{
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
}
#endif

void vma_set_file(struct vm_area_struct *vma, struct file *file);

#ifdef CONFIG_NUMA_BALANCING
unsigned long change_prot_numa(struct vm_area_struct *vma,
                        unsigned long start, unsigned long end);
#endif

struct vm_area_struct *find_extend_vma_locked(struct mm_struct *,
                unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
                        unsigned long pfn, unsigned long size, pgprot_t);
int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
                unsigned long pfn, unsigned long size, pgprot_t prot);
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
                        struct page **pages, unsigned long *num);
int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
                                unsigned long num);
int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
                                unsigned long num);
vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn);
vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn, pgprot_t pgprot);
vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn);
vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
                unsigned long addr, pfn_t pfn);
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);

static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
                                unsigned long addr, struct page *page)
{
        int err = vm_insert_page(vma, addr, page);

        if (err == -ENOMEM)
                return VM_FAULT_OOM;
        if (err < 0 && err != -EBUSY)
                return VM_FAULT_SIGBUS;

        return VM_FAULT_NOPAGE;
}

#ifndef io_remap_pfn_range
static inline int io_remap_pfn_range(struct vm_area_struct *vma,
                                     unsigned long addr, unsigned long pfn,
                                     unsigned long size, pgprot_t prot)
{
        return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot));
}
#endif

static inline vm_fault_t vmf_error(int err)
{
        if (err == -ENOMEM)
                return VM_FAULT_OOM;
        else if (err == -EHWPOISON)
                return VM_FAULT_HWPOISON;
        return VM_FAULT_SIGBUS;
}

/*
 * Convert errno to return value for ->page_mkwrite() calls.
 *
 * This should eventually be merged with vmf_error() above, but will need a
 * careful audit of all vmf_error() callers.
 */
static inline vm_fault_t vmf_fs_error(int err)
{
        if (err == 0)
                return VM_FAULT_LOCKED;
        if (err == -EFAULT || err == -EAGAIN)
                return VM_FAULT_NOPAGE;
        if (err == -ENOMEM)
                return VM_FAULT_OOM;
        /* -ENOSPC, -EDQUOT, -EIO ... */
        return VM_FAULT_SIGBUS;
}

struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                         unsigned int foll_flags);

static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
{
        if (vm_fault & VM_FAULT_OOM)
                return -ENOMEM;
        if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
        if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                return -EFAULT;
        return 0;
}

/*
 * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
 * a (NUMA hinting) fault is required.
 */
static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
                                           unsigned int flags)
{
        /*
         * If callers don't want to honor NUMA hinting faults, no need to
         * determine if we would actually have to trigger a NUMA hinting fault.
         */
        if (!(flags & FOLL_HONOR_NUMA_FAULT))
                return true;

        /*
         * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
         *
         * Requiring a fault here even for inaccessible VMAs would mean that
         * FOLL_FORCE cannot make any progress, because handle_mm_fault()
         * refuses to process NUMA hinting faults in inaccessible VMAs.
         */
        return !vma_is_accessible(vma);
}

typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
                               unsigned long size, pte_fn_t fn, void *data);
extern int apply_to_existing_page_range(struct mm_struct *mm,
                                   unsigned long address, unsigned long size,
                                   pte_fn_t fn, void *data);

#ifdef CONFIG_PAGE_POISONING
extern void __kernel_poison_pages(struct page *page, int numpages);
extern void __kernel_unpoison_pages(struct page *page, int numpages);
extern bool _page_poisoning_enabled_early;
DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled);
static inline bool page_poisoning_enabled(void)
{
        return _page_poisoning_enabled_early;
}
/*
 * For use in fast paths after init_mem_debugging() has run, or when a
 * false negative result is not harmful when called too early.
 */
static inline bool page_poisoning_enabled_static(void)
{
        return static_branch_unlikely(&_page_poisoning_enabled);
}
static inline void kernel_poison_pages(struct page *page, int numpages)
{
        if (page_poisoning_enabled_static())
                __kernel_poison_pages(page, numpages);
}
static inline void kernel_unpoison_pages(struct page *page, int numpages)
{
        if (page_poisoning_enabled_static())
                __kernel_unpoison_pages(page, numpages);
}
#else
static inline bool page_poisoning_enabled(void) { return false; }
static inline bool page_poisoning_enabled_static(void) { return false; }
static inline void __kernel_poison_pages(struct page *page, int nunmpages) { }
static inline void kernel_poison_pages(struct page *page, int numpages) { }
static inline void kernel_unpoison_pages(struct page *page, int numpages) { }
#endif

DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
static inline bool want_init_on_alloc(gfp_t flags)
{
        if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
                                &init_on_alloc))
                return true;
        return flags & __GFP_ZERO;
}

DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
static inline bool want_init_on_free(void)
{
        return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
                                   &init_on_free);
}

extern bool _debug_pagealloc_enabled_early;
DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);

static inline bool debug_pagealloc_enabled(void)
{
        return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
                _debug_pagealloc_enabled_early;
}

/*
 * For use in fast paths after mem_debugging_and_hardening_init() has run,
 * or when a false negative result is not harmful when called too early.
 */
static inline bool debug_pagealloc_enabled_static(void)
{
        if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
                return false;

        return static_branch_unlikely(&_debug_pagealloc_enabled);
}

/*
 * To support DEBUG_PAGEALLOC architecture must ensure that
 * __kernel_map_pages() never fails
 */
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
#ifdef CONFIG_DEBUG_PAGEALLOC
static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
{
        if (debug_pagealloc_enabled_static())
                __kernel_map_pages(page, numpages, 1);
}

static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
{
        if (debug_pagealloc_enabled_static())
                __kernel_map_pages(page, numpages, 0);
}

extern unsigned int _debug_guardpage_minorder;
DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);

static inline unsigned int debug_guardpage_minorder(void)
{
        return _debug_guardpage_minorder;
}

static inline bool debug_guardpage_enabled(void)
{
        return static_branch_unlikely(&_debug_guardpage_enabled);
}

static inline bool page_is_guard(struct page *page)
{
        if (!debug_guardpage_enabled())
                return false;

        return PageGuard(page);
}

bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
                      int migratetype);
static inline bool set_page_guard(struct zone *zone, struct page *page,
                                  unsigned int order, int migratetype)
{
        if (!debug_guardpage_enabled())
                return false;
        return __set_page_guard(zone, page, order, migratetype);
}

void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
                        int migratetype);
static inline void clear_page_guard(struct zone *zone, struct page *page,
                                    unsigned int order, int migratetype)
{
        if (!debug_guardpage_enabled())
                return;
        __clear_page_guard(zone, page, order, migratetype);
}

#else        /* CONFIG_DEBUG_PAGEALLOC */
static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool debug_guardpage_enabled(void) { return false; }
static inline bool page_is_guard(struct page *page) { return false; }
static inline bool set_page_guard(struct zone *zone, struct page *page,
                        unsigned int order, int migratetype) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
                                unsigned int order, int migratetype) {}
#endif        /* CONFIG_DEBUG_PAGEALLOC */

#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
extern int in_gate_area_no_mm(unsigned long addr);
extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
#else
static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
        return NULL;
}
static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
        return 0;
}
#endif        /* __HAVE_ARCH_GATE_AREA */

extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);

#ifdef CONFIG_SYSCTL
extern int sysctl_drop_caches;
int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);
#endif

void drop_slab(void);

#ifndef CONFIG_MMU
#define randomize_va_space 0
#else
extern int randomize_va_space;
#endif

const char * arch_vma_name(struct vm_area_struct *vma);
#ifdef CONFIG_MMU
void print_vma_addr(char *prefix, unsigned long rip);
#else
static inline void print_vma_addr(char *prefix, unsigned long rip)
{
}
#endif

void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
                unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
                struct dev_pagemap *pgmap);
void pmd_init(void *addr);
void pud_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
                            struct vmem_altmap *altmap, struct page *reuse);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node,
                              struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
                     unsigned long addr, unsigned long next);
int vmemmap_check_pmd(pmd_t *pmd, int node,
                      unsigned long addr, unsigned long next);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
                               int node, struct vmem_altmap *altmap);
int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
                               int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
                struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end,
                struct vmem_altmap *altmap);
#endif

#ifdef CONFIG_SPARSEMEM_VMEMMAP
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
        /* number of pfns from base where pfn_to_page() is valid */
        if (altmap)
                return altmap->reserve + altmap->free;
        return 0;
}

static inline void vmem_altmap_free(struct vmem_altmap *altmap,
                                    unsigned long nr_pfns)
{
        altmap->alloc -= nr_pfns;
}
#else
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
        return 0;
}

static inline void vmem_altmap_free(struct vmem_altmap *altmap,
                                    unsigned long nr_pfns)
{
}
#endif

#define VMEMMAP_RESERVE_NR        2
#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
                                          struct dev_pagemap *pgmap)
{
        unsigned long nr_pages;
        unsigned long nr_vmemmap_pages;

        if (!pgmap || !is_power_of_2(sizeof(struct page)))
                return false;

        nr_pages = pgmap_vmemmap_nr(pgmap);
        nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
        /*
         * For vmemmap optimization with DAX we need minimum 2 vmemmap
         * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
         */
        return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
}
/*
 * If we don't have an architecture override, use the generic rule
 */
#ifndef vmemmap_can_optimize
#define vmemmap_can_optimize __vmemmap_can_optimize
#endif

#else
static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
                                           struct dev_pagemap *pgmap)
{
        return false;
}
#endif

void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
                                  unsigned long nr_pages);

enum mf_flags {
        MF_COUNT_INCREASED = 1 << 0,
        MF_ACTION_REQUIRED = 1 << 1,
        MF_MUST_KILL = 1 << 2,
        MF_SOFT_OFFLINE = 1 << 3,
        MF_UNPOISON = 1 << 4,
        MF_SW_SIMULATED = 1 << 5,
        MF_NO_RETRY = 1 << 6,
        MF_MEM_PRE_REMOVE = 1 << 7,
};
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
                      unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
/*
 * Sysfs entries for memory failure handling statistics.
 */
extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
                                        bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
#else
static inline void memory_failure_queue(unsigned long pfn, int flags)
{
}

static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
                                        bool *migratable_cleared)
{
        return 0;
}

static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}

static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
{
}
#endif

#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM)
void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
                     struct vm_area_struct *vma, struct list_head *to_kill,
                     unsigned long ksm_addr);
#endif

#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
extern void memblk_nr_poison_inc(unsigned long pfn);
extern void memblk_nr_poison_sub(unsigned long pfn, long i);
#else
static inline void memblk_nr_poison_inc(unsigned long pfn)
{
}

static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
{
}
#endif

#ifndef arch_memory_failure
static inline int arch_memory_failure(unsigned long pfn, int flags)
{
        return -ENXIO;
}
#endif

#ifndef arch_is_platform_page
static inline bool arch_is_platform_page(u64 paddr)
{
        return false;
}
#endif

/*
 * Error handlers for various types of pages.
 */
enum mf_result {
        MF_IGNORED,        /* Error: cannot be handled */
        MF_FAILED,        /* Error: handling failed */
        MF_DELAYED,        /* Will be handled later */
        MF_RECOVERED,        /* Successfully recovered */
};

enum mf_action_page_type {
        MF_MSG_KERNEL,
        MF_MSG_KERNEL_HIGH_ORDER,
        MF_MSG_SLAB,
        MF_MSG_DIFFERENT_COMPOUND,
        MF_MSG_HUGE,
        MF_MSG_FREE_HUGE,
        MF_MSG_UNMAP_FAILED,
        MF_MSG_DIRTY_SWAPCACHE,
        MF_MSG_CLEAN_SWAPCACHE,
        MF_MSG_DIRTY_MLOCKED_LRU,
        MF_MSG_CLEAN_MLOCKED_LRU,
        MF_MSG_DIRTY_UNEVICTABLE_LRU,
        MF_MSG_CLEAN_UNEVICTABLE_LRU,
        MF_MSG_DIRTY_LRU,
        MF_MSG_CLEAN_LRU,
        MF_MSG_TRUNCATED_LRU,
        MF_MSG_BUDDY,
        MF_MSG_DAX,
        MF_MSG_UNSPLIT_THP,
        MF_MSG_UNKNOWN,
};

#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
                            unsigned long addr_hint,
                            unsigned int pages_per_huge_page);
int copy_user_large_folio(struct folio *dst, struct folio *src,
                          unsigned long addr_hint,
                          struct vm_area_struct *vma);
long copy_folio_from_user(struct folio *dst_folio,
                           const void __user *usr_src,
                           bool allow_pagefault);

/**
 * vma_is_special_huge - Are transhuge page-table entries considered special?
 * @vma: Pointer to the struct vm_area_struct to consider
 *
 * Whether transhuge page-table entries are considered "special" following
 * the definition in vm_normal_page().
 *
 * Return: true if transhuge page-table entries should be considered special,
 * false otherwise.
 */
static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
{
        return vma_is_dax(vma) || (vma->vm_file &&
                                   (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
}

#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */

#if MAX_NUMNODES > 1
void __init setup_nr_node_ids(void);
#else
static inline void setup_nr_node_ids(void) {}
#endif

extern int memcmp_pages(struct page *page1, struct page *page2);

static inline int pages_identical(struct page *page1, struct page *page2)
{
        return !memcmp_pages(page1, page2);
}

#ifdef CONFIG_MAPPING_DIRTY_HELPERS
unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
                                                pgoff_t first_index, pgoff_t nr,
                                                pgoff_t bitmap_pgoff,
                                                unsigned long *bitmap,
                                                pgoff_t *start,
                                                pgoff_t *end);

unsigned long wp_shared_mapping_range(struct address_space *mapping,
                                      pgoff_t first_index, pgoff_t nr);
#endif

extern int sysctl_nr_trim_pages;

#ifdef CONFIG_PRINTK
void mem_dump_obj(void *object);
#else
static inline void mem_dump_obj(void *object) {}
#endif

/**
 * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
 *                    handle them.
 * @seals: the seals to check
 * @vma: the vma to operate on
 *
 * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper
 * check/handling on the vma flags.  Return 0 if check pass, or <0 for errors.
 */
static inline int seal_check_write(int seals, struct vm_area_struct *vma)
{
        if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
                /*
                 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
                 * write seals are active.
                 */
                if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
                        return -EPERM;

                /*
                 * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
                 * MAP_SHARED and read-only, take care to not allow mprotect to
                 * revert protections on such mappings. Do this only for shared
                 * mappings. For private mappings, don't need to mask
                 * VM_MAYWRITE as we still want them to be COW-writable.
                 */
                if (vma->vm_flags & VM_SHARED)
                        vm_flags_clear(vma, VM_MAYWRITE);
        }

        return 0;
}

#ifdef CONFIG_ANON_VMA_NAME
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
                          unsigned long len_in,
                          struct anon_vma_name *anon_name);
#else
static inline int
madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
                      unsigned long len_in, struct anon_vma_name *anon_name) {
        return 0;
}
#endif

#ifdef CONFIG_UNACCEPTED_MEMORY

bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end);
void accept_memory(phys_addr_t start, phys_addr_t end);

#else

static inline bool range_contains_unaccepted_memory(phys_addr_t start,
                                                    phys_addr_t end)
{
        return false;
}

static inline void accept_memory(phys_addr_t start, phys_addr_t end)
{
}

#endif

static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
{
        phys_addr_t paddr = pfn << PAGE_SHIFT;

        return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
}

#endif /* _LINUX_MM_H */






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  242 


















































































































  242 

  241 

  242 













  232 

  232 
   29 
  232 














    3 


















































   15 


































































































































































































































































































































  242 




   46 























































  242 











    6 
















































































    6 

























































































































  242 


































































































  242 




















































































   14 





























































































































































































































































































    1 


















    1 
    1 
























   16 








































































































































































  242 



























































































































































































   16 



























    6 


























































































































































































































































































































   14 

   14 





   14 
























































   14 




   14 














  235 




























    1 





    1 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  241 




































































  242 






  235 


























































































  234 

















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 *        Definitions for the 'struct sk_buff' memory handlers.
 *
 *        Authors:
 *                Alan Cox, <gw4pts@gw4pts.ampr.org>
 *                Florian La Roche, <rzsfl@rz.uni-sb.de>
 */

#ifndef _LINUX_SKBUFF_H
#define _LINUX_SKBUFF_H

#include <linux/kernel.h>
#include <linux/compiler.h>
#include <linux/time.h>
#include <linux/bug.h>
#include <linux/bvec.h>
#include <linux/cache.h>
#include <linux/rbtree.h>
#include <linux/socket.h>
#include <linux/refcount.h>

#include <linux/atomic.h>
#include <asm/types.h>
#include <linux/spinlock.h>
#include <net/checksum.h>
#include <linux/rcupdate.h>
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
#include <net/flow_dissector.h>
#include <linux/in6.h>
#include <linux/if_packet.h>
#include <linux/llist.h>
#include <net/flow.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>
#endif
#include <net/net_debug.h>
#include <net/dropreason-core.h>
#include <net/netmem.h>

/**
 * DOC: skb checksums
 *
 * The interface for checksum offload between the stack and networking drivers
 * is as follows...
 *
 * IP checksum related features
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * Drivers advertise checksum offload capabilities in the features of a device.
 * From the stack's point of view these are capabilities offered by the driver.
 * A driver typically only advertises features that it is capable of offloading
 * to its device.
 *
 * .. flat-table:: Checksum related device features
 *   :widths: 1 10
 *
 *   * - %NETIF_F_HW_CSUM
 *     - The driver (or its device) is able to compute one
 *         IP (one's complement) checksum for any combination
 *         of protocols or protocol layering. The checksum is
 *         computed and set in a packet per the CHECKSUM_PARTIAL
 *         interface (see below).
 *
 *   * - %NETIF_F_IP_CSUM
 *     - Driver (device) is only able to checksum plain
 *         TCP or UDP packets over IPv4. These are specifically
 *         unencapsulated packets of the form IPv4|TCP or
 *         IPv4|UDP where the Protocol field in the IPv4 header
 *         is TCP or UDP. The IPv4 header may contain IP options.
 *         This feature cannot be set in features for a device
 *         with NETIF_F_HW_CSUM also set. This feature is being
 *         DEPRECATED (see below).
 *
 *   * - %NETIF_F_IPV6_CSUM
 *     - Driver (device) is only able to checksum plain
 *         TCP or UDP packets over IPv6. These are specifically
 *         unencapsulated packets of the form IPv6|TCP or
 *         IPv6|UDP where the Next Header field in the IPv6
 *         header is either TCP or UDP. IPv6 extension headers
 *         are not supported with this feature. This feature
 *         cannot be set in features for a device with
 *         NETIF_F_HW_CSUM also set. This feature is being
 *         DEPRECATED (see below).
 *
 *   * - %NETIF_F_RXCSUM
 *     - Driver (device) performs receive checksum offload.
 *         This flag is only used to disable the RX checksum
 *         feature for a device. The stack will accept receive
 *         checksum indication in packets received on a device
 *         regardless of whether NETIF_F_RXCSUM is set.
 *
 * Checksumming of received packets by device
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * Indication of checksum verification is set in &sk_buff.ip_summed.
 * Possible values are:
 *
 * - %CHECKSUM_NONE
 *
 *   Device did not checksum this packet e.g. due to lack of capabilities.
 *   The packet contains full (though not verified) checksum in packet but
 *   not in skb->csum. Thus, skb->csum is undefined in this case.
 *
 * - %CHECKSUM_UNNECESSARY
 *
 *   The hardware you're dealing with doesn't calculate the full checksum
 *   (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums
 *   for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY
 *   if their checksums are okay. &sk_buff.csum is still undefined in this case
 *   though. A driver or device must never modify the checksum field in the
 *   packet even if checksum is verified.
 *
 *   %CHECKSUM_UNNECESSARY is applicable to following protocols:
 *
 *     - TCP: IPv6 and IPv4.
 *     - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
 *       zero UDP checksum for either IPv4 or IPv6, the networking stack
 *       may perform further validation in this case.
 *     - GRE: only if the checksum is present in the header.
 *     - SCTP: indicates the CRC in SCTP header has been validated.
 *     - FCOE: indicates the CRC in FC frame has been validated.
 *
 *   &sk_buff.csum_level indicates the number of consecutive checksums found in
 *   the packet minus one that have been verified as %CHECKSUM_UNNECESSARY.
 *   For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
 *   and a device is able to verify the checksums for UDP (possibly zero),
 *   GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to
 *   two. If the device were only able to verify the UDP checksum and not
 *   GRE, either because it doesn't support GRE checksum or because GRE
 *   checksum is bad, skb->csum_level would be set to zero (TCP checksum is
 *   not considered in this case).
 *
 * - %CHECKSUM_COMPLETE
 *
 *   This is the most generic way. The device supplied checksum of the _whole_
 *   packet as seen by netif_rx() and fills in &sk_buff.csum. This means the
 *   hardware doesn't need to parse L3/L4 headers to implement this.
 *
 *   Notes:
 *
 *   - Even if device supports only some protocols, but is able to produce
 *     skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
 *   - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols.
 *
 * - %CHECKSUM_PARTIAL
 *
 *   A checksum is set up to be offloaded to a device as described in the
 *   output description for CHECKSUM_PARTIAL. This may occur on a packet
 *   received directly from another Linux OS, e.g., a virtualized Linux kernel
 *   on the same host, or it may be set in the input path in GRO or remote
 *   checksum offload. For the purposes of checksum verification, the checksum
 *   referred to by skb->csum_start + skb->csum_offset and any preceding
 *   checksums in the packet are considered verified. Any checksums in the
 *   packet that are after the checksum being offloaded are not considered to
 *   be verified.
 *
 * Checksumming on transmit for non-GSO
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * The stack requests checksum offload in the &sk_buff.ip_summed for a packet.
 * Values are:
 *
 * - %CHECKSUM_PARTIAL
 *
 *   The driver is required to checksum the packet as seen by hard_start_xmit()
 *   from &sk_buff.csum_start up to the end, and to record/write the checksum at
 *   offset &sk_buff.csum_start + &sk_buff.csum_offset.
 *   A driver may verify that the
 *   csum_start and csum_offset values are valid values given the length and
 *   offset of the packet, but it should not attempt to validate that the
 *   checksum refers to a legitimate transport layer checksum -- it is the
 *   purview of the stack to validate that csum_start and csum_offset are set
 *   correctly.
 *
 *   When the stack requests checksum offload for a packet, the driver MUST
 *   ensure that the checksum is set correctly. A driver can either offload the
 *   checksum calculation to the device, or call skb_checksum_help (in the case
 *   that the device does not support offload for a particular checksum).
 *
 *   %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of
 *   %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate
 *   checksum offload capability.
 *   skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based
 *   on network device checksumming capabilities: if a packet does not match
 *   them, skb_checksum_help() or skb_crc32c_help() (depending on the value of
 *   &sk_buff.csum_not_inet, see :ref:`crc`)
 *   is called to resolve the checksum.
 *
 * - %CHECKSUM_NONE
 *
 *   The skb was already checksummed by the protocol, or a checksum is not
 *   required.
 *
 * - %CHECKSUM_UNNECESSARY
 *
 *   This has the same meaning as CHECKSUM_NONE for checksum offload on
 *   output.
 *
 * - %CHECKSUM_COMPLETE
 *
 *   Not used in checksum output. If a driver observes a packet with this value
 *   set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set.
 *
 * .. _crc:
 *
 * Non-IP checksum (CRC) offloads
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * .. flat-table::
 *   :widths: 1 10
 *
 *   * - %NETIF_F_SCTP_CRC
 *     - This feature indicates that a device is capable of
 *         offloading the SCTP CRC in a packet. To perform this offload the stack
 *         will set csum_start and csum_offset accordingly, set ip_summed to
 *         %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication
 *         in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c.
 *         A driver that supports both IP checksum offload and SCTP CRC32c offload
 *         must verify which offload is configured for a packet by testing the
 *         value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to
 *         resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1.
 *
 *   * - %NETIF_F_FCOE_CRC
 *     - This feature indicates that a device is capable of offloading the FCOE
 *         CRC in a packet. To perform this offload the stack will set ip_summed
 *         to %CHECKSUM_PARTIAL and set csum_start and csum_offset
 *         accordingly. Note that there is no indication in the skbuff that the
 *         %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports
 *         both IP checksum offload and FCOE CRC offload must verify which offload
 *         is configured for a packet, presumably by inspecting packet headers.
 *
 * Checksumming on output with GSO
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * In the case of a GSO packet (skb_is_gso() is true), checksum offload
 * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the
 * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as
 * part of the GSO operation is implied. If a checksum is being offloaded
 * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and
 * csum_offset are set to refer to the outermost checksum being offloaded
 * (two offloaded checksums are possible with UDP encapsulation).
 */

/* Don't change this without changing skb_csum_unnecessary! */
#define CHECKSUM_NONE                0
#define CHECKSUM_UNNECESSARY        1
#define CHECKSUM_COMPLETE        2
#define CHECKSUM_PARTIAL        3

/* Maximum value in skb->csum_level */
#define SKB_MAX_CSUM_LEVEL        3

#define SKB_DATA_ALIGN(X)        ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X)        \
        ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

/* For X bytes available in skb->head, what is the minimal
 * allocation needed, knowing struct skb_shared_info needs
 * to be aligned.
 */
#define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \
        SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

#define SKB_MAX_ORDER(X, ORDER) \
        SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
#define SKB_MAX_HEAD(X)                (SKB_MAX_ORDER((X), 0))
#define SKB_MAX_ALLOC                (SKB_MAX_ORDER(0, 2))

/* return minimum truesize of one skb containing X bytes of data */
#define SKB_TRUESIZE(X) ((X) +                                                \
                         SKB_DATA_ALIGN(sizeof(struct sk_buff)) +        \
                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

struct ahash_request;
struct net_device;
struct scatterlist;
struct pipe_inode_info;
struct iov_iter;
struct napi_struct;
struct bpf_prog;
union bpf_attr;
struct skb_ext;
struct ts_config;

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
        enum {
                BRNF_PROTO_UNCHANGED,
                BRNF_PROTO_8021Q,
                BRNF_PROTO_PPPOE
        } orig_proto:8;
        u8                        pkt_otherhost:1;
        u8                        in_prerouting:1;
        u8                        bridged_dnat:1;
        u8                        sabotage_in_done:1;
        __u16                        frag_max_size;
        int                        physinif;

        /* always valid & non-NULL from FORWARD on, for physdev match */
        struct net_device        *physoutdev;
        union {
                /* prerouting: detect dnat in orig/reply direction */
                __be32          ipv4_daddr;
                struct in6_addr ipv6_daddr;

                /* after prerouting + nat detected: store original source
                 * mac since neigh resolution overwrites it, only used while
                 * skb is out in neigh layer.
                 */
                char neigh_header[8];
        };
};
#endif

#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
/* Chain in tc_skb_ext will be used to share the tc chain with
 * ovs recirc_id. It will be set to the current chain by tc
 * and read by ovs to recirc_id.
 */
struct tc_skb_ext {
        union {
                u64 act_miss_cookie;
                __u32 chain;
        };
        __u16 mru;
        __u16 zone;
        u8 post_ct:1;
        u8 post_ct_snat:1;
        u8 post_ct_dnat:1;
        u8 act_miss:1; /* Set if act_miss_cookie is used */
        u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */
};
#endif

struct sk_buff_head {
        /* These two members must be first to match sk_buff. */
        struct_group_tagged(sk_buff_list, list,
                struct sk_buff        *next;
                struct sk_buff        *prev;
        );

        __u32                qlen;
        spinlock_t        lock;
};

struct sk_buff;

#ifndef CONFIG_MAX_SKB_FRAGS
# define CONFIG_MAX_SKB_FRAGS 17
#endif

#define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS

extern int sysctl_max_skb_frags;

/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
 * segment using its current segmentation instead.
 */
#define GSO_BY_FRAGS        0xFFFF

typedef struct skb_frag {
        netmem_ref netmem;
        unsigned int len;
        unsigned int offset;
} skb_frag_t;

/**
 * skb_frag_size() - Returns the size of a skb fragment
 * @frag: skb fragment
 */
static inline unsigned int skb_frag_size(const skb_frag_t *frag)
{
        return frag->len;
}

/**
 * skb_frag_size_set() - Sets the size of a skb fragment
 * @frag: skb fragment
 * @size: size of fragment
 */
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
{
        frag->len = size;
}

/**
 * skb_frag_size_add() - Increments the size of a skb fragment by @delta
 * @frag: skb fragment
 * @delta: value to add
 */
static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
{
        frag->len += delta;
}

/**
 * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
 * @frag: skb fragment
 * @delta: value to subtract
 */
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
{
        frag->len -= delta;
}

/**
 * skb_frag_must_loop - Test if %p is a high memory page
 * @p: fragment's page
 */
static inline bool skb_frag_must_loop(struct page *p)
{
#if defined(CONFIG_HIGHMEM)
        if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p))
                return true;
#endif
        return false;
}

/**
 *        skb_frag_foreach_page - loop over pages in a fragment
 *
 *        @f:                skb frag to operate on
 *        @f_off:                offset from start of f->netmem
 *        @f_len:                length from f_off to loop over
 *        @p:                (temp var) current page
 *        @p_off:                (temp var) offset from start of current page,
 *                                   non-zero only on first page.
 *        @p_len:                (temp var) length in current page,
 *                                   < PAGE_SIZE only on first and last page.
 *        @copied:        (temp var) length so far, excluding current p_len.
 *
 *        A fragment can hold a compound page, in which case per-page
 *        operations, notably kmap_atomic, must be called for each
 *        regular page.
 */
#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied)        \
        for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT),                \
             p_off = (f_off) & (PAGE_SIZE - 1),                                \
             p_len = skb_frag_must_loop(p) ?                                \
             min_t(u32, f_len, PAGE_SIZE - p_off) : f_len,                \
             copied = 0;                                                \
             copied < f_len;                                                \
             copied += p_len, p++, p_off = 0,                                \
             p_len = min_t(u32, f_len - copied, PAGE_SIZE))                \

/**
 * struct skb_shared_hwtstamps - hardware time stamps
 * @hwtstamp:                hardware time stamp transformed into duration
 *                        since arbitrary point in time
 * @netdev_data:        address/cookie of network device driver used as
 *                        reference to actual hardware time stamp
 *
 * Software time stamps generated by ktime_get_real() are stored in
 * skb->tstamp.
 *
 * hwtstamps can only be compared against other hwtstamps from
 * the same device.
 *
 * This structure is attached to packets as part of the
 * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
 */
struct skb_shared_hwtstamps {
        union {
                ktime_t        hwtstamp;
                void *netdev_data;
        };
};

/* Definitions for tx_flags in struct skb_shared_info */
enum {
        /* generate hardware time stamp */
        SKBTX_HW_TSTAMP = 1 << 0,

        /* generate software time stamp when queueing packet to NIC */
        SKBTX_SW_TSTAMP = 1 << 1,

        /* device driver is going to provide hardware time stamp */
        SKBTX_IN_PROGRESS = 1 << 2,

        /* generate hardware time stamp based on cycles if supported */
        SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3,

        /* generate wifi status information (where possible) */
        SKBTX_WIFI_STATUS = 1 << 4,

        /* determine hardware time stamp based on time or cycles */
        SKBTX_HW_TSTAMP_NETDEV = 1 << 5,

        /* generate software time stamp when entering packet scheduling */
        SKBTX_SCHED_TSTAMP = 1 << 6,
};

#define SKBTX_ANY_SW_TSTAMP        (SKBTX_SW_TSTAMP    | \
                                 SKBTX_SCHED_TSTAMP)
#define SKBTX_ANY_TSTAMP        (SKBTX_HW_TSTAMP | \
                                 SKBTX_HW_TSTAMP_USE_CYCLES | \
                                 SKBTX_ANY_SW_TSTAMP)

/* Definitions for flags in struct skb_shared_info */
enum {
        /* use zcopy routines */
        SKBFL_ZEROCOPY_ENABLE = BIT(0),

        /* This indicates at least one fragment might be overwritten
         * (as in vmsplice(), sendfile() ...)
         * If we need to compute a TX checksum, we'll need to copy
         * all frags to avoid possible bad checksum
         */
        SKBFL_SHARED_FRAG = BIT(1),

        /* segment contains only zerocopy data and should not be
         * charged to the kernel memory.
         */
        SKBFL_PURE_ZEROCOPY = BIT(2),

        SKBFL_DONT_ORPHAN = BIT(3),

        /* page references are managed by the ubuf_info, so it's safe to
         * use frags only up until ubuf_info is released
         */
        SKBFL_MANAGED_FRAG_REFS = BIT(4),
};

#define SKBFL_ZEROCOPY_FRAG        (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
#define SKBFL_ALL_ZEROCOPY        (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
                                 SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)

/*
 * The callback notifies userspace to release buffers when skb DMA is done in
 * lower device, the skb last reference should be 0 when calling this.
 * The zerocopy_success argument is true if zero copy transmit occurred,
 * false on data copy or out of memory error caused by data copy attempt.
 * The ctx field is used to track device context.
 * The desc field is used to track userspace buffer index.
 */
struct ubuf_info {
        void (*callback)(struct sk_buff *, struct ubuf_info *,
                         bool zerocopy_success);
        refcount_t refcnt;
        u8 flags;
};

struct ubuf_info_msgzc {
        struct ubuf_info ubuf;

        union {
                struct {
                        unsigned long desc;
                        void *ctx;
                };
                struct {
                        u32 id;
                        u16 len;
                        u16 zerocopy:1;
                        u32 bytelen;
                };
        };

        struct mmpin {
                struct user_struct *user;
                unsigned int num_pg;
        } mmp;
};

#define skb_uarg(SKB)        ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
#define uarg_to_msgzc(ubuf_ptr)        container_of((ubuf_ptr), struct ubuf_info_msgzc, \
                                             ubuf)

int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);

/* Preserve some data across TX submission and completion.
 *
 * Note, this state is stored in the driver. Extending the layout
 * might need some special care.
 */
struct xsk_tx_metadata_compl {
        __u64 *tx_timestamp;
};

/* This data is invariant across clones and lives at
 * the end of the header data, ie. at skb->end.
 */
struct skb_shared_info {
        __u8                flags;
        __u8                meta_len;
        __u8                nr_frags;
        __u8                tx_flags;
        unsigned short        gso_size;
        /* Warning: this field is not always filled in (UFO)! */
        unsigned short        gso_segs;
        struct sk_buff        *frag_list;
        union {
                struct skb_shared_hwtstamps hwtstamps;
                struct xsk_tx_metadata_compl xsk_meta;
        };
        unsigned int        gso_type;
        u32                tskey;

        /*
         * Warning : all fields before dataref are cleared in __alloc_skb()
         */
        atomic_t        dataref;
        unsigned int        xdp_frags_size;

        /* Intermediate layers must ensure that destructor_arg
         * remains valid until skb destructor */
        void *                destructor_arg;

        /* must be last field, see pskb_expand_head() */
        skb_frag_t        frags[MAX_SKB_FRAGS];
};

/**
 * DOC: dataref and headerless skbs
 *
 * Transport layers send out clones of payload skbs they hold for
 * retransmissions. To allow lower layers of the stack to prepend their headers
 * we split &skb_shared_info.dataref into two halves.
 * The lower 16 bits count the overall number of references.
 * The higher 16 bits indicate how many of the references are payload-only.
 * skb_header_cloned() checks if skb is allowed to add / write the headers.
 *
 * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr
 * (via __skb_header_release()). Any clone created from marked skb will get
 * &sk_buff.hdr_len populated with the available headroom.
 * If there's the only clone in existence it's able to modify the headroom
 * at will. The sequence of calls inside the transport layer is::
 *
 *  <alloc skb>
 *  skb_reserve()
 *  __skb_header_release()
 *  skb_clone()
 *  // send the clone down the stack
 *
 * This is not a very generic construct and it depends on the transport layers
 * doing the right thing. In practice there's usually only one payload-only skb.
 * Having multiple payload-only skbs with different lengths of hdr_len is not
 * possible. The payload-only skbs should never leave their owner.
 */
#define SKB_DATAREF_SHIFT 16
#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)


enum {
        SKB_FCLONE_UNAVAILABLE,        /* skb has no fclone (from head_cache) */
        SKB_FCLONE_ORIG,        /* orig skb (from fclone_cache) */
        SKB_FCLONE_CLONE,        /* companion fclone skb (from fclone_cache) */
};

enum {
        SKB_GSO_TCPV4 = 1 << 0,

        /* This indicates the skb is from an untrusted source. */
        SKB_GSO_DODGY = 1 << 1,

        /* This indicates the tcp segment has CWR set. */
        SKB_GSO_TCP_ECN = 1 << 2,

        SKB_GSO_TCP_FIXEDID = 1 << 3,

        SKB_GSO_TCPV6 = 1 << 4,

        SKB_GSO_FCOE = 1 << 5,

        SKB_GSO_GRE = 1 << 6,

        SKB_GSO_GRE_CSUM = 1 << 7,

        SKB_GSO_IPXIP4 = 1 << 8,

        SKB_GSO_IPXIP6 = 1 << 9,

        SKB_GSO_UDP_TUNNEL = 1 << 10,

        SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,

        SKB_GSO_PARTIAL = 1 << 12,

        SKB_GSO_TUNNEL_REMCSUM = 1 << 13,

        SKB_GSO_SCTP = 1 << 14,

        SKB_GSO_ESP = 1 << 15,

        SKB_GSO_UDP = 1 << 16,

        SKB_GSO_UDP_L4 = 1 << 17,

        SKB_GSO_FRAGLIST = 1 << 18,
};

#if BITS_PER_LONG > 32
#define NET_SKBUFF_DATA_USES_OFFSET 1
#endif

#ifdef NET_SKBUFF_DATA_USES_OFFSET
typedef unsigned int sk_buff_data_t;
#else
typedef unsigned char *sk_buff_data_t;
#endif

/**
 * DOC: Basic sk_buff geometry
 *
 * struct sk_buff itself is a metadata structure and does not hold any packet
 * data. All the data is held in associated buffers.
 *
 * &sk_buff.head points to the main "head" buffer. The head buffer is divided
 * into two parts:
 *
 *  - data buffer, containing headers and sometimes payload;
 *    this is the part of the skb operated on by the common helpers
 *    such as skb_put() or skb_pull();
 *  - shared info (struct skb_shared_info) which holds an array of pointers
 *    to read-only data in the (page, offset, length) format.
 *
 * Optionally &skb_shared_info.frag_list may point to another skb.
 *
 * Basic diagram may look like this::
 *
 *                                  ---------------
 *                                 | sk_buff       |
 *                                  ---------------
 *     ,---------------------------  + head
 *    /          ,-----------------  + data
 *   /          /      ,-----------  + tail
 *  |          |      |            , + end
 *  |          |      |           |
 *  v          v      v           v
 *   -----------------------------------------------
 *  | headroom | data |  tailroom | skb_shared_info |
 *   -----------------------------------------------
 *                                 + [page frag]
 *                                 + [page frag]
 *                                 + [page frag]
 *                                 + [page frag]       ---------
 *                                 + frag_list    --> | sk_buff |
 *                                                     ---------
 *
 */

/**
 *        struct sk_buff - socket buffer
 *        @next: Next buffer in list
 *        @prev: Previous buffer in list
 *        @tstamp: Time we arrived/left
 *        @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
 *                for retransmit timer
 *        @rbnode: RB tree node, alternative to next/prev for netem/tcp
 *        @list: queue head
 *        @ll_node: anchor in an llist (eg socket defer_list)
 *        @sk: Socket we are owned by
 *        @dev: Device we arrived on/are leaving by
 *        @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
 *        @cb: Control buffer. Free for use by every layer. Put private vars here
 *        @_skb_refdst: destination entry (with norefcount bit)
 *        @len: Length of actual data
 *        @data_len: Data length
 *        @mac_len: Length of link layer header
 *        @hdr_len: writable header length of cloned skb
 *        @csum: Checksum (must include start/offset pair)
 *        @csum_start: Offset from skb->head where checksumming should start
 *        @csum_offset: Offset from csum_start where checksum should be stored
 *        @priority: Packet queueing priority
 *        @ignore_df: allow local fragmentation
 *        @cloned: Head may be cloned (check refcnt to be sure)
 *        @ip_summed: Driver fed us an IP checksum
 *        @nohdr: Payload reference only, must not modify header
 *        @pkt_type: Packet class
 *        @fclone: skbuff clone status
 *        @ipvs_property: skbuff is owned by ipvs
 *        @inner_protocol_type: whether the inner protocol is
 *                ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
 *        @remcsum_offload: remote checksum offload is enabled
 *        @offload_fwd_mark: Packet was L2-forwarded in hardware
 *        @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
 *        @tc_skip_classify: do not classify packet. set by IFB device
 *        @tc_at_ingress: used within tc_classify to distinguish in/egress
 *        @redirected: packet was redirected by packet classifier
 *        @from_ingress: packet was redirected from the ingress path
 *        @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
 *        @peeked: this packet has been seen already, so stats have been
 *                done for it, don't do them again
 *        @nf_trace: netfilter packet trace flag
 *        @protocol: Packet protocol from driver
 *        @destructor: Destruct function
 *        @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
 *        @_sk_redir: socket redirection information for skmsg
 *        @_nfct: Associated connection, if any (with nfctinfo bits)
 *        @skb_iif: ifindex of device we arrived on
 *        @tc_index: Traffic control index
 *        @hash: the packet hash
 *        @queue_mapping: Queue mapping for multiqueue devices
 *        @head_frag: skb was allocated from page fragments,
 *                not allocated by kmalloc() or vmalloc().
 *        @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
 *        @pp_recycle: mark the packet for recycling instead of freeing (implies
 *                page_pool support on driver)
 *        @active_extensions: active extensions (skb_ext_id types)
 *        @ndisc_nodetype: router type (from link layer)
 *        @ooo_okay: allow the mapping of a socket to a queue to be changed
 *        @l4_hash: indicate hash is a canonical 4-tuple hash over transport
 *                ports.
 *        @sw_hash: indicates hash was computed in software stack
 *        @wifi_acked_valid: wifi_acked was set
 *        @wifi_acked: whether frame was acked on wifi or not
 *        @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
 *        @encapsulation: indicates the inner headers in the skbuff are valid
 *        @encap_hdr_csum: software checksum is needed
 *        @csum_valid: checksum is already valid
 *        @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
 *        @csum_complete_sw: checksum was completed by software
 *        @csum_level: indicates the number of consecutive checksums found in
 *                the packet minus one that have been verified as
 *                CHECKSUM_UNNECESSARY (max 3)
 *        @dst_pending_confirm: need to confirm neighbour
 *        @decrypted: Decrypted SKB
 *        @slow_gro: state present at GRO time, slower prepare step required
 *        @mono_delivery_time: When set, skb->tstamp has the
 *                delivery_time in mono clock base (i.e. EDT).  Otherwise, the
 *                skb->tstamp has the (rcv) timestamp at ingress and
 *                delivery_time at egress.
 *        @napi_id: id of the NAPI struct this skb came from
 *        @sender_cpu: (aka @napi_id) source CPU in XPS
 *        @alloc_cpu: CPU which did the skb allocation.
 *        @secmark: security marking
 *        @mark: Generic packet mark
 *        @reserved_tailroom: (aka @mark) number of bytes of free space available
 *                at the tail of an sk_buff
 *        @vlan_all: vlan fields (proto & tci)
 *        @vlan_proto: vlan encapsulation protocol
 *        @vlan_tci: vlan tag control information
 *        @inner_protocol: Protocol (encapsulation)
 *        @inner_ipproto: (aka @inner_protocol) stores ipproto when
 *                skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
 *        @inner_transport_header: Inner transport layer header (encapsulation)
 *        @inner_network_header: Network layer header (encapsulation)
 *        @inner_mac_header: Link layer header (encapsulation)
 *        @transport_header: Transport layer header
 *        @network_header: Network layer header
 *        @mac_header: Link layer header
 *        @kcov_handle: KCOV remote handle for remote coverage collection
 *        @tail: Tail pointer
 *        @end: End pointer
 *        @head: Head of buffer
 *        @data: Data head pointer
 *        @truesize: Buffer size
 *        @users: User count - see {datagram,tcp}.c
 *        @extensions: allocated extensions, valid if active_extensions is nonzero
 */

struct sk_buff {
        union {
                struct {
                        /* These two members must be first to match sk_buff_head. */
                        struct sk_buff                *next;
                        struct sk_buff                *prev;

                        union {
                                struct net_device        *dev;
                                /* Some protocols might use this space to store information,
                                 * while device pointer would be NULL.
                                 * UDP receive path is one user.
                                 */
                                unsigned long                dev_scratch;
                        };
                };
                struct rb_node                rbnode; /* used in netem, ip4 defrag, and tcp stack */
                struct list_head        list;
                struct llist_node        ll_node;
        };

        struct sock                *sk;

        union {
                ktime_t                tstamp;
                u64                skb_mstamp_ns; /* earliest departure time */
        };
        /*
         * This is the control buffer. It is free to use for every
         * layer. Please put your private variables there. If you
         * want to keep them across layers you have to do a skb_clone()
         * first. This is owned by whoever has the skb queued ATM.
         */
        char                        cb[48] __aligned(8);

        union {
                struct {
                        unsigned long        _skb_refdst;
                        void                (*destructor)(struct sk_buff *skb);
                };
                struct list_head        tcp_tsorted_anchor;
#ifdef CONFIG_NET_SOCK_MSG
                unsigned long                _sk_redir;
#endif
        };

#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        unsigned long                 _nfct;
#endif
        unsigned int                len,
                                data_len;
        __u16                        mac_len,
                                hdr_len;

        /* Following fields are _not_ copied in __copy_skb_header()
         * Note that queue_mapping is here mostly to fill a hole.
         */
        __u16                        queue_mapping;

/* if you move cloned around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define CLONED_MASK        (1 << 7)
#else
#define CLONED_MASK        1
#endif
#define CLONED_OFFSET                offsetof(struct sk_buff, __cloned_offset)

        /* private: */
        __u8                        __cloned_offset[0];
        /* public: */
        __u8                        cloned:1,
                                nohdr:1,
                                fclone:2,
                                peeked:1,
                                head_frag:1,
                                pfmemalloc:1,
                                pp_recycle:1; /* page_pool recycle indicator */
#ifdef CONFIG_SKB_EXTENSIONS
        __u8                        active_extensions;
#endif

        /* Fields enclosed in headers group are copied
         * using a single memcpy() in __copy_skb_header()
         */
        struct_group(headers,

        /* private: */
        __u8                        __pkt_type_offset[0];
        /* public: */
        __u8                        pkt_type:3; /* see PKT_TYPE_MAX */
        __u8                        ignore_df:1;
        __u8                        dst_pending_confirm:1;
        __u8                        ip_summed:2;
        __u8                        ooo_okay:1;

        /* private: */
        __u8                        __mono_tc_offset[0];
        /* public: */
        __u8                        mono_delivery_time:1;        /* See SKB_MONO_DELIVERY_TIME_MASK */
#ifdef CONFIG_NET_XGRESS
        __u8                        tc_at_ingress:1;        /* See TC_AT_INGRESS_MASK */
        __u8                        tc_skip_classify:1;
#endif
        __u8                        remcsum_offload:1;
        __u8                        csum_complete_sw:1;
        __u8                        csum_level:2;
        __u8                        inner_protocol_type:1;

        __u8                        l4_hash:1;
        __u8                        sw_hash:1;
#ifdef CONFIG_WIRELESS
        __u8                        wifi_acked_valid:1;
        __u8                        wifi_acked:1;
#endif
        __u8                        no_fcs:1;
        /* Indicates the inner headers are valid in the skbuff. */
        __u8                        encapsulation:1;
        __u8                        encap_hdr_csum:1;
        __u8                        csum_valid:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
        __u8                        ndisc_nodetype:2;
#endif

#if IS_ENABLED(CONFIG_IP_VS)
        __u8                        ipvs_property:1;
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
        __u8                        nf_trace:1;
#endif
#ifdef CONFIG_NET_SWITCHDEV
        __u8                        offload_fwd_mark:1;
        __u8                        offload_l3_fwd_mark:1;
#endif
        __u8                        redirected:1;
#ifdef CONFIG_NET_REDIRECT
        __u8                        from_ingress:1;
#endif
#ifdef CONFIG_NETFILTER_SKIP_EGRESS
        __u8                        nf_skip_egress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
        __u8                        decrypted:1;
#endif
        __u8                        slow_gro:1;
#if IS_ENABLED(CONFIG_IP_SCTP)
        __u8                        csum_not_inet:1;
#endif

#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
        __u16                        tc_index;        /* traffic control index */
#endif

        u16                        alloc_cpu;

        union {
                __wsum                csum;
                struct {
                        __u16        csum_start;
                        __u16        csum_offset;
                };
        };
        __u32                        priority;
        int                        skb_iif;
        __u32                        hash;
        union {
                u32                vlan_all;
                struct {
                        __be16        vlan_proto;
                        __u16        vlan_tci;
                };
        };
#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
        union {
                unsigned int        napi_id;
                unsigned int        sender_cpu;
        };
#endif
#ifdef CONFIG_NETWORK_SECMARK
        __u32                secmark;
#endif

        union {
                __u32                mark;
                __u32                reserved_tailroom;
        };

        union {
                __be16                inner_protocol;
                __u8                inner_ipproto;
        };

        __u16                        inner_transport_header;
        __u16                        inner_network_header;
        __u16                        inner_mac_header;

        __be16                        protocol;
        __u16                        transport_header;
        __u16                        network_header;
        __u16                        mac_header;

#ifdef CONFIG_KCOV
        u64                        kcov_handle;
#endif

        ); /* end headers group */

        /* These elements must be at the end, see alloc_skb() for details.  */
        sk_buff_data_t                tail;
        sk_buff_data_t                end;
        unsigned char                *head,
                                *data;
        unsigned int                truesize;
        refcount_t                users;

#ifdef CONFIG_SKB_EXTENSIONS
        /* only usable after checking ->active_extensions != 0 */
        struct skb_ext                *extensions;
#endif
};

/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
#define PKT_TYPE_MAX        (7 << 5)
#else
#define PKT_TYPE_MAX        7
#endif
#define PKT_TYPE_OFFSET                offsetof(struct sk_buff, __pkt_type_offset)

/* if you move tc_at_ingress or mono_delivery_time
 * around, you also must adapt these constants.
 */
#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_MONO_DELIVERY_TIME_MASK        (1 << 7)
#define TC_AT_INGRESS_MASK                (1 << 6)
#else
#define SKB_MONO_DELIVERY_TIME_MASK        (1 << 0)
#define TC_AT_INGRESS_MASK                (1 << 1)
#endif
#define SKB_BF_MONO_TC_OFFSET                offsetof(struct sk_buff, __mono_tc_offset)

#ifdef __KERNEL__
/*
 *        Handling routines are only of interest to the kernel
 */

#define SKB_ALLOC_FCLONE        0x01
#define SKB_ALLOC_RX                0x02
#define SKB_ALLOC_NAPI                0x04

/**
 * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves
 * @skb: buffer
 */
static inline bool skb_pfmemalloc(const struct sk_buff *skb)
{
        return unlikely(skb->pfmemalloc);
}

/*
 * skb might have a dst pointer attached, refcounted or not.
 * _skb_refdst low order bit is set if refcount was _not_ taken
 */
#define SKB_DST_NOREF        1UL
#define SKB_DST_PTRMASK        ~(SKB_DST_NOREF)

/**
 * skb_dst - returns skb dst_entry
 * @skb: buffer
 *
 * Returns skb dst_entry, regardless of reference taken or not.
 */
static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
{
        /* If refdst was not refcounted, check we still are in a
         * rcu_read_lock section
         */
        WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
                !rcu_read_lock_held() &&
                !rcu_read_lock_bh_held());
        return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
}

/**
 * skb_dst_set - sets skb dst
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was taken on dst and should
 * be released by skb_dst_drop()
 */
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
        skb->slow_gro |= !!dst;
        skb->_skb_refdst = (unsigned long)dst;
}

/**
 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
 * @skb: buffer
 * @dst: dst entry
 *
 * Sets skb dst, assuming a reference was not taken on dst.
 * If dst entry is cached, we do not take reference and dst_release
 * will be avoided by refdst_drop. If dst entry is not cached, we take
 * reference, so that last dst_release can destroy the dst immediately.
 */
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
        WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
        skb->slow_gro |= !!dst;
        skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}

/**
 * skb_dst_is_noref - Test if skb dst isn't refcounted
 * @skb: buffer
 */
static inline bool skb_dst_is_noref(const struct sk_buff *skb)
{
        return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
}

/**
 * skb_rtable - Returns the skb &rtable
 * @skb: buffer
 */
static inline struct rtable *skb_rtable(const struct sk_buff *skb)
{
        return (struct rtable *)skb_dst(skb);
}

/* For mangling skb->pkt_type from user space side from applications
 * such as nft, tc, etc, we only allow a conservative subset of
 * possible pkt_types to be set.
*/
static inline bool skb_pkt_type_ok(u32 ptype)
{
        return ptype <= PACKET_OTHERHOST;
}

/**
 * skb_napi_id - Returns the skb's NAPI id
 * @skb: buffer
 */
static inline unsigned int skb_napi_id(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
        return skb->napi_id;
#else
        return 0;
#endif
}

static inline bool skb_wifi_acked_valid(const struct sk_buff *skb)
{
#ifdef CONFIG_WIRELESS
        return skb->wifi_acked_valid;
#else
        return 0;
#endif
}

/**
 * skb_unref - decrement the skb's reference count
 * @skb: buffer
 *
 * Returns true if we can free the skb.
 */
static inline bool skb_unref(struct sk_buff *skb)
{
        if (unlikely(!skb))
                return false;
        if (likely(refcount_read(&skb->users) == 1))
                smp_rmb();
        else if (likely(!refcount_dec_and_test(&skb->users)))
                return false;

        return true;
}

static inline bool skb_data_unref(const struct sk_buff *skb,
                                  struct skb_shared_info *shinfo)
{
        int bias;

        if (!skb->cloned)
                return true;

        bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;

        if (atomic_read(&shinfo->dataref) == bias)
                smp_rmb();
        else if (atomic_sub_return(bias, &shinfo->dataref))
                return false;

        return true;
}

void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);

/**
 *        kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
 *        @skb: buffer to free
 */
static inline void kfree_skb(struct sk_buff *skb)
{
        kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}

void skb_release_head_state(struct sk_buff *skb);
void kfree_skb_list_reason(struct sk_buff *segs,
                           enum skb_drop_reason reason);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb);

static inline void kfree_skb_list(struct sk_buff *segs)
{
        kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED);
}

#ifdef CONFIG_TRACEPOINTS
void consume_skb(struct sk_buff *skb);
#else
static inline void consume_skb(struct sk_buff *skb)
{
        return kfree_skb(skb);
}
#endif

void __consume_stateless_skb(struct sk_buff *skb);
void  __kfree_skb(struct sk_buff *skb);

void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
                      bool *fragstolen, int *delta_truesize);

struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
                            int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb_around(struct sk_buff *skb,
                                 void *data, unsigned int frag_size);
void skb_attempt_defer_free(struct sk_buff *skb);

struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
struct sk_buff *slab_build_skb(void *data);

/**
 * alloc_skb - allocate a network buffer
 * @size: size to allocate
 * @priority: allocation mask
 *
 * This function is a convenient wrapper around __alloc_skb().
 */
static inline struct sk_buff *alloc_skb(unsigned int size,
                                        gfp_t priority)
{
        return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
}

struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
                                     unsigned long data_len,
                                     int max_page_order,
                                     int *errcode,
                                     gfp_t gfp_mask);
struct sk_buff *alloc_skb_for_msg(struct sk_buff *first);

/* Layout of fast clones : [skb1][skb2][fclone_ref] */
struct sk_buff_fclones {
        struct sk_buff        skb1;

        struct sk_buff        skb2;

        refcount_t        fclone_ref;
};

/**
 *        skb_fclone_busy - check if fclone is busy
 *        @sk: socket
 *        @skb: buffer
 *
 * Returns true if skb is a fast clone, and its clone is not freed.
 * Some drivers call skb_orphan() in their ndo_start_xmit(),
 * so we also check that didn't happen.
 */
static inline bool skb_fclone_busy(const struct sock *sk,
                                   const struct sk_buff *skb)
{
        const struct sk_buff_fclones *fclones;

        fclones = container_of(skb, struct sk_buff_fclones, skb1);

        return skb->fclone == SKB_FCLONE_ORIG &&
               refcount_read(&fclones->fclone_ref) > 1 &&
               READ_ONCE(fclones->skb2.sk) == sk;
}

/**
 * alloc_skb_fclone - allocate a network buffer from fclone cache
 * @size: size to allocate
 * @priority: allocation mask
 *
 * This function is a convenient wrapper around __alloc_skb().
 */
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
                                               gfp_t priority)
{
        return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
}

struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
void skb_headers_offset_update(struct sk_buff *skb, int off);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
                                   gfp_t gfp_mask, bool fclone);
static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
                                          gfp_t gfp_mask)
{
        return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
}

int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
                                     unsigned int headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
                                int newtailroom, gfp_t priority);
int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
                                     int offset, int len);
int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg,
                              int offset, int len);
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error);

/**
 *        skb_pad                        -        zero pad the tail of an skb
 *        @skb: buffer to pad
 *        @pad: space to pad
 *
 *        Ensure that a buffer is followed by a padding area that is zero
 *        filled. Used by network drivers which may DMA or transfer data
 *        beyond the buffer end onto the wire.
 *
 *        May return error in out of memory cases. The skb is freed on error.
 */
static inline int skb_pad(struct sk_buff *skb, int pad)
{
        return __skb_pad(skb, pad, true);
}
#define dev_kfree_skb(a)        consume_skb(a)

int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
                         int offset, size_t size, size_t max_frags);

struct skb_seq_state {
        __u32                lower_offset;
        __u32                upper_offset;
        __u32                frag_idx;
        __u32                stepped_offset;
        struct sk_buff        *root_skb;
        struct sk_buff        *cur_skb;
        __u8                *frag_data;
        __u32                frag_off;
};

void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
                          unsigned int to, struct skb_seq_state *st);
unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
                          struct skb_seq_state *st);
void skb_abort_seq_read(struct skb_seq_state *st);

unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
                           unsigned int to, struct ts_config *config);

/*
 * Packet hash types specify the type of hash in skb_set_hash.
 *
 * Hash types refer to the protocol layer addresses which are used to
 * construct a packet's hash. The hashes are used to differentiate or identify
 * flows of the protocol layer for the hash type. Hash types are either
 * layer-2 (L2), layer-3 (L3), or layer-4 (L4).
 *
 * Properties of hashes:
 *
 * 1) Two packets in different flows have different hash values
 * 2) Two packets in the same flow should have the same hash value
 *
 * A hash at a higher layer is considered to be more specific. A driver should
 * set the most specific hash possible.
 *
 * A driver cannot indicate a more specific hash than the layer at which a hash
 * was computed. For instance an L3 hash cannot be set as an L4 hash.
 *
 * A driver may indicate a hash level which is less specific than the
 * actual layer the hash was computed on. For instance, a hash computed
 * at L4 may be considered an L3 hash. This should only be done if the
 * driver can't unambiguously determine that the HW computed the hash at
 * the higher layer. Note that the "should" in the second property above
 * permits this.
 */
enum pkt_hash_types {
        PKT_HASH_TYPE_NONE,        /* Undefined type */
        PKT_HASH_TYPE_L2,        /* Input: src_MAC, dest_MAC */
        PKT_HASH_TYPE_L3,        /* Input: src_IP, dst_IP */
        PKT_HASH_TYPE_L4,        /* Input: src_IP, dst_IP, src_port, dst_port */
};

static inline void skb_clear_hash(struct sk_buff *skb)
{
        skb->hash = 0;
        skb->sw_hash = 0;
        skb->l4_hash = 0;
}

static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
{
        if (!skb->l4_hash)
                skb_clear_hash(skb);
}

static inline void
__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
{
        skb->l4_hash = is_l4;
        skb->sw_hash = is_sw;
        skb->hash = hash;
}

static inline void
skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
{
        /* Used by drivers to set hash from HW */
        __skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4);
}

static inline void
__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
{
        __skb_set_hash(skb, hash, true, is_l4);
}

void __skb_get_hash(struct sk_buff *skb);
u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
u32 skb_get_poff(const struct sk_buff *skb);
u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
                   const struct flow_keys_basic *keys, int hlen);
__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
                            const void *data, int hlen_proto);

static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
                                        int thoff, u8 ip_proto)
{
        return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
}

void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
                             const struct flow_dissector_key *key,
                             unsigned int key_count);

struct bpf_flow_dissector;
u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
                     __be16 proto, int nhoff, int hlen, unsigned int flags);

bool __skb_flow_dissect(const struct net *net,
                        const struct sk_buff *skb,
                        struct flow_dissector *flow_dissector,
                        void *target_container, const void *data,
                        __be16 proto, int nhoff, int hlen, unsigned int flags);

static inline bool skb_flow_dissect(const struct sk_buff *skb,
                                    struct flow_dissector *flow_dissector,
                                    void *target_container, unsigned int flags)
{
        return __skb_flow_dissect(NULL, skb, flow_dissector,
                                  target_container, NULL, 0, 0, 0, flags);
}

static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
                                              struct flow_keys *flow,
                                              unsigned int flags)
{
        memset(flow, 0, sizeof(*flow));
        return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
                                  flow, NULL, 0, 0, 0, flags);
}

static inline bool
skb_flow_dissect_flow_keys_basic(const struct net *net,
                                 const struct sk_buff *skb,
                                 struct flow_keys_basic *flow,
                                 const void *data, __be16 proto,
                                 int nhoff, int hlen, unsigned int flags)
{
        memset(flow, 0, sizeof(*flow));
        return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
                                  data, proto, nhoff, hlen, flags);
}

void skb_flow_dissect_meta(const struct sk_buff *skb,
                           struct flow_dissector *flow_dissector,
                           void *target_container);

/* Gets a skb connection tracking info, ctinfo map should be a
 * map of mapsize to translate enum ip_conntrack_info states
 * to user states.
 */
void
skb_flow_dissect_ct(const struct sk_buff *skb,
                    struct flow_dissector *flow_dissector,
                    void *target_container,
                    u16 *ctinfo_map, size_t mapsize,
                    bool post_ct, u16 zone);
void
skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
                             struct flow_dissector *flow_dissector,
                             void *target_container);

void skb_flow_dissect_hash(const struct sk_buff *skb,
                           struct flow_dissector *flow_dissector,
                           void *target_container);

static inline __u32 skb_get_hash(struct sk_buff *skb)
{
        if (!skb->l4_hash && !skb->sw_hash)
                __skb_get_hash(skb);

        return skb->hash;
}

static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
{
        if (!skb->l4_hash && !skb->sw_hash) {
                struct flow_keys keys;
                __u32 hash = __get_hash_from_flowi6(fl6, &keys);

                __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
        }

        return skb->hash;
}

__u32 skb_get_hash_perturb(const struct sk_buff *skb,
                           const siphash_key_t *perturb);

static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
        return skb->hash;
}

static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
{
        to->hash = from->hash;
        to->sw_hash = from->sw_hash;
        to->l4_hash = from->l4_hash;
};

static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
                                    const struct sk_buff *skb2)
{
#ifdef CONFIG_TLS_DEVICE
        return skb2->decrypted - skb1->decrypted;
#else
        return 0;
#endif
}

static inline void skb_copy_decrypted(struct sk_buff *to,
                                      const struct sk_buff *from)
{
#ifdef CONFIG_TLS_DEVICE
        to->decrypted = from->decrypted;
#endif
}

#ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
{
        return skb->head + skb->end;
}

static inline unsigned int skb_end_offset(const struct sk_buff *skb)
{
        return skb->end;
}

static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
{
        skb->end = offset;
}
#else
static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
{
        return skb->end;
}

static inline unsigned int skb_end_offset(const struct sk_buff *skb)
{
        return skb->end - skb->head;
}

static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset)
{
        skb->end = skb->head + offset;
}
#endif

struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
                                       struct ubuf_info *uarg);

void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);

void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
                           bool success);

int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
                            struct sk_buff *skb, struct iov_iter *from,
                            size_t length);

static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
                                          struct msghdr *msg, int len)
{
        return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
}

int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
                             struct msghdr *msg, int len,
                             struct ubuf_info *uarg);

/* Internal */
#define skb_shinfo(SKB)        ((struct skb_shared_info *)(skb_end_pointer(SKB)))

static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
{
        return &skb_shinfo(skb)->hwtstamps;
}

static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
{
        bool is_zcopy = skb && skb_shinfo(skb)->flags & SKBFL_ZEROCOPY_ENABLE;

        return is_zcopy ? skb_uarg(skb) : NULL;
}

static inline bool skb_zcopy_pure(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
}

static inline bool skb_zcopy_managed(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS;
}

static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
                                       const struct sk_buff *skb2)
{
        return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
}

static inline void net_zcopy_get(struct ubuf_info *uarg)
{
        refcount_inc(&uarg->refcnt);
}

static inline void skb_zcopy_init(struct sk_buff *skb, struct ubuf_info *uarg)
{
        skb_shinfo(skb)->destructor_arg = uarg;
        skb_shinfo(skb)->flags |= uarg->flags;
}

static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg,
                                 bool *have_ref)
{
        if (skb && uarg && !skb_zcopy(skb)) {
                if (unlikely(have_ref && *have_ref))
                        *have_ref = false;
                else
                        net_zcopy_get(uarg);
                skb_zcopy_init(skb, uarg);
        }
}

static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
{
        skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
        skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG;
}

static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
{
        return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
}

static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
{
        return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
}

static inline void net_zcopy_put(struct ubuf_info *uarg)
{
        if (uarg)
                uarg->callback(NULL, uarg, true);
}

static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
        if (uarg) {
                if (uarg->callback == msg_zerocopy_callback)
                        msg_zerocopy_put_abort(uarg, have_uref);
                else if (have_uref)
                        net_zcopy_put(uarg);
        }
}

/* Release a reference on a zerocopy structure */
static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
{
        struct ubuf_info *uarg = skb_zcopy(skb);

        if (uarg) {
                if (!skb_zcopy_is_nouarg(skb))
                        uarg->callback(skb, uarg, zerocopy_success);

                skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
        }
}

void __skb_zcopy_downgrade_managed(struct sk_buff *skb);

static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
{
        if (unlikely(skb_zcopy_managed(skb)))
                __skb_zcopy_downgrade_managed(skb);
}

static inline void skb_mark_not_on_list(struct sk_buff *skb)
{
        skb->next = NULL;
}

static inline void skb_poison_list(struct sk_buff *skb)
{
#ifdef CONFIG_DEBUG_NET
        skb->next = SKB_LIST_POISON_NEXT;
#endif
}

/* Iterate through singly-linked GSO fragments of an skb. */
#define skb_list_walk_safe(first, skb, next_skb)                               \
        for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb);  \
             (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)

static inline void skb_list_del_init(struct sk_buff *skb)
{
        __list_del_entry(&skb->list);
        skb_mark_not_on_list(skb);
}

/**
 *        skb_queue_empty - check if a queue is empty
 *        @list: queue head
 *
 *        Returns true if the queue is empty, false otherwise.
 */
static inline int skb_queue_empty(const struct sk_buff_head *list)
{
        return list->next == (const struct sk_buff *) list;
}

/**
 *        skb_queue_empty_lockless - check if a queue is empty
 *        @list: queue head
 *
 *        Returns true if the queue is empty, false otherwise.
 *        This variant can be used in lockless contexts.
 */
static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
{
        return READ_ONCE(list->next) == (const struct sk_buff *) list;
}


/**
 *        skb_queue_is_last - check if skb is the last entry in the queue
 *        @list: queue head
 *        @skb: buffer
 *
 *        Returns true if @skb is the last buffer on the list.
 */
static inline bool skb_queue_is_last(const struct sk_buff_head *list,
                                     const struct sk_buff *skb)
{
        return skb->next == (const struct sk_buff *) list;
}

/**
 *        skb_queue_is_first - check if skb is the first entry in the queue
 *        @list: queue head
 *        @skb: buffer
 *
 *        Returns true if @skb is the first buffer on the list.
 */
static inline bool skb_queue_is_first(const struct sk_buff_head *list,
                                      const struct sk_buff *skb)
{
        return skb->prev == (const struct sk_buff *) list;
}

/**
 *        skb_queue_next - return the next packet in the queue
 *        @list: queue head
 *        @skb: current buffer
 *
 *        Return the next packet in @list after @skb.  It is only valid to
 *        call this if skb_queue_is_last() evaluates to false.
 */
static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list,
                                             const struct sk_buff *skb)
{
        /* This BUG_ON may seem severe, but if we just return then we
         * are going to dereference garbage.
         */
        BUG_ON(skb_queue_is_last(list, skb));
        return skb->next;
}

/**
 *        skb_queue_prev - return the prev packet in the queue
 *        @list: queue head
 *        @skb: current buffer
 *
 *        Return the prev packet in @list before @skb.  It is only valid to
 *        call this if skb_queue_is_first() evaluates to false.
 */
static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
                                             const struct sk_buff *skb)
{
        /* This BUG_ON may seem severe, but if we just return then we
         * are going to dereference garbage.
         */
        BUG_ON(skb_queue_is_first(list, skb));
        return skb->prev;
}

/**
 *        skb_get - reference buffer
 *        @skb: buffer to reference
 *
 *        Makes another reference to a socket buffer and returns a pointer
 *        to the buffer.
 */
static inline struct sk_buff *skb_get(struct sk_buff *skb)
{
        refcount_inc(&skb->users);
        return skb;
}

/*
 * If users == 1, we are the only owner and can avoid redundant atomic changes.
 */

/**
 *        skb_cloned - is the buffer a clone
 *        @skb: buffer to check
 *
 *        Returns true if the buffer was generated with skb_clone() and is
 *        one of multiple shared copies of the buffer. Cloned buffers are
 *        shared data so must not be written to under normal circumstances.
 */
static inline int skb_cloned(const struct sk_buff *skb)
{
        return skb->cloned &&
               (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
}

static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
{
        might_sleep_if(gfpflags_allow_blocking(pri));

        if (skb_cloned(skb))
                return pskb_expand_head(skb, 0, 0, pri);

        return 0;
}

/* This variant of skb_unclone() makes sure skb->truesize
 * and skb_end_offset() are not changed, whenever a new skb->head is needed.
 *
 * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
 * when various debugging features are in place.
 */
int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
        might_sleep_if(gfpflags_allow_blocking(pri));

        if (skb_cloned(skb))
                return __skb_unclone_keeptruesize(skb, pri);
        return 0;
}

/**
 *        skb_header_cloned - is the header a clone
 *        @skb: buffer to check
 *
 *        Returns true if modifying the header part of the buffer requires
 *        the data to be copied.
 */
static inline int skb_header_cloned(const struct sk_buff *skb)
{
        int dataref;

        if (!skb->cloned)
                return 0;

        dataref = atomic_read(&skb_shinfo(skb)->dataref);
        dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
        return dataref != 1;
}

static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri)
{
        might_sleep_if(gfpflags_allow_blocking(pri));

        if (skb_header_cloned(skb))
                return pskb_expand_head(skb, 0, 0, pri);

        return 0;
}

/**
 * __skb_header_release() - allow clones to use the headroom
 * @skb: buffer to operate on
 *
 * See "DOC: dataref and headerless skbs".
 */
static inline void __skb_header_release(struct sk_buff *skb)
{
        skb->nohdr = 1;
        atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
}


/**
 *        skb_shared - is the buffer shared
 *        @skb: buffer to check
 *
 *        Returns true if more than one person has a reference to this
 *        buffer.
 */
static inline int skb_shared(const struct sk_buff *skb)
{
        return refcount_read(&skb->users) != 1;
}

/**
 *        skb_share_check - check if buffer is shared and if so clone it
 *        @skb: buffer to check
 *        @pri: priority for memory allocation
 *
 *        If the buffer is shared the buffer is cloned and the old copy
 *        drops a reference. A new clone with a single reference is returned.
 *        If the buffer is not shared the original buffer is returned. When
 *        being called from interrupt status or with spinlocks held pri must
 *        be GFP_ATOMIC.
 *
 *        NULL is returned on a memory allocation failure.
 */
static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
{
        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_shared(skb)) {
                struct sk_buff *nskb = skb_clone(skb, pri);

                if (likely(nskb))
                        consume_skb(skb);
                else
                        kfree_skb(skb);
                skb = nskb;
        }
        return skb;
}

/*
 *        Copy shared buffers into a new sk_buff. We effectively do COW on
 *        packets to handle cases where we have a local reader and forward
 *        and a couple of other messy ones. The normal one is tcpdumping
 *        a packet that's being forwarded.
 */

/**
 *        skb_unshare - make a copy of a shared buffer
 *        @skb: buffer to check
 *        @pri: priority for memory allocation
 *
 *        If the socket buffer is a clone then this function creates a new
 *        copy of the data, drops a reference count on the old copy and returns
 *        the new copy with the reference count at 1. If the buffer is not a clone
 *        the original buffer is returned. When called with a spinlock held or
 *        from interrupt state @pri must be %GFP_ATOMIC
 *
 *        %NULL is returned on a memory allocation failure.
 */
static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
                                          gfp_t pri)
{
        might_sleep_if(gfpflags_allow_blocking(pri));
        if (skb_cloned(skb)) {
                struct sk_buff *nskb = skb_copy(skb, pri);

                /* Free our shared copy */
                if (likely(nskb))
                        consume_skb(skb);
                else
                        kfree_skb(skb);
                skb = nskb;
        }
        return skb;
}

/**
 *        skb_peek - peek at the head of an &sk_buff_head
 *        @list_: list to peek at
 *
 *        Peek an &sk_buff. Unlike most other operations you _MUST_
 *        be careful with this one. A peek leaves the buffer on the
 *        list and someone else may run off with it. You must hold
 *        the appropriate locks or have a private queue to do this.
 *
 *        Returns %NULL for an empty list or a pointer to the head element.
 *        The reference count is not incremented and the reference is therefore
 *        volatile. Use with caution.
 */
static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
{
        struct sk_buff *skb = list_->next;

        if (skb == (struct sk_buff *)list_)
                skb = NULL;
        return skb;
}

/**
 *        __skb_peek - peek at the head of a non-empty &sk_buff_head
 *        @list_: list to peek at
 *
 *        Like skb_peek(), but the caller knows that the list is not empty.
 */
static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_)
{
        return list_->next;
}

/**
 *        skb_peek_next - peek skb following the given one from a queue
 *        @skb: skb to start from
 *        @list_: list to peek at
 *
 *        Returns %NULL when the end of the list is met or a pointer to the
 *        next element. The reference count is not incremented and the
 *        reference is therefore volatile. Use with caution.
 */
static inline struct sk_buff *skb_peek_next(struct sk_buff *skb,
                const struct sk_buff_head *list_)
{
        struct sk_buff *next = skb->next;

        if (next == (struct sk_buff *)list_)
                next = NULL;
        return next;
}

/**
 *        skb_peek_tail - peek at the tail of an &sk_buff_head
 *        @list_: list to peek at
 *
 *        Peek an &sk_buff. Unlike most other operations you _MUST_
 *        be careful with this one. A peek leaves the buffer on the
 *        list and someone else may run off with it. You must hold
 *        the appropriate locks or have a private queue to do this.
 *
 *        Returns %NULL for an empty list or a pointer to the tail element.
 *        The reference count is not incremented and the reference is therefore
 *        volatile. Use with caution.
 */
static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_)
{
        struct sk_buff *skb = READ_ONCE(list_->prev);

        if (skb == (struct sk_buff *)list_)
                skb = NULL;
        return skb;

}

/**
 *        skb_queue_len        - get queue length
 *        @list_: list to measure
 *
 *        Return the length of an &sk_buff queue.
 */
static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
{
        return list_->qlen;
}

/**
 *        skb_queue_len_lockless        - get queue length
 *        @list_: list to measure
 *
 *        Return the length of an &sk_buff queue.
 *        This variant can be used in lockless contexts.
 */
static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_)
{
        return READ_ONCE(list_->qlen);
}

/**
 *        __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head
 *        @list: queue to initialize
 *
 *        This initializes only the list and queue length aspects of
 *        an sk_buff_head object.  This allows to initialize the list
 *        aspects of an sk_buff_head without reinitializing things like
 *        the spinlock.  It can also be used for on-stack sk_buff_head
 *        objects where the spinlock is known to not be used.
 */
static inline void __skb_queue_head_init(struct sk_buff_head *list)
{
        list->prev = list->next = (struct sk_buff *)list;
        list->qlen = 0;
}

/*
 * This function creates a split out lock class for each invocation;
 * this is needed for now since a whole lot of users of the skb-queue
 * infrastructure in drivers have different locking usage (in hardirq)
 * than the networking core (in softirq only). In the long run either the
 * network layer or drivers should need annotation to consolidate the
 * main types of usage into 3 classes.
 */
static inline void skb_queue_head_init(struct sk_buff_head *list)
{
        spin_lock_init(&list->lock);
        __skb_queue_head_init(list);
}

static inline void skb_queue_head_init_class(struct sk_buff_head *list,
                struct lock_class_key *class)
{
        skb_queue_head_init(list);
        lockdep_set_class(&list->lock, class);
}

/*
 *        Insert an sk_buff on a list.
 *
 *        The "__skb_xxxx()" functions are the non-atomic ones that
 *        can only be called with interrupts disabled.
 */
static inline void __skb_insert(struct sk_buff *newsk,
                                struct sk_buff *prev, struct sk_buff *next,
                                struct sk_buff_head *list)
{
        /* See skb_queue_empty_lockless() and skb_peek_tail()
         * for the opposite READ_ONCE()
         */
        WRITE_ONCE(newsk->next, next);
        WRITE_ONCE(newsk->prev, prev);
        WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk);
        WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk);
        WRITE_ONCE(list->qlen, list->qlen + 1);
}

static inline void __skb_queue_splice(const struct sk_buff_head *list,
                                      struct sk_buff *prev,
                                      struct sk_buff *next)
{
        struct sk_buff *first = list->next;
        struct sk_buff *last = list->prev;

        WRITE_ONCE(first->prev, prev);
        WRITE_ONCE(prev->next, first);

        WRITE_ONCE(last->next, next);
        WRITE_ONCE(next->prev, last);
}

/**
 *        skb_queue_splice - join two skb lists, this is designed for stacks
 *        @list: the new list to add
 *        @head: the place to add it in the first list
 */
static inline void skb_queue_splice(const struct sk_buff_head *list,
                                    struct sk_buff_head *head)
{
        if (!skb_queue_empty(list)) {
                __skb_queue_splice(list, (struct sk_buff *) head, head->next);
                head->qlen += list->qlen;
        }
}

/**
 *        skb_queue_splice_init - join two skb lists and reinitialise the emptied list
 *        @list: the new list to add
 *        @head: the place to add it in the first list
 *
 *        The list at @list is reinitialised
 */
static inline void skb_queue_splice_init(struct sk_buff_head *list,
                                         struct sk_buff_head *head)
{
        if (!skb_queue_empty(list)) {
                __skb_queue_splice(list, (struct sk_buff *) head, head->next);
                head->qlen += list->qlen;
                __skb_queue_head_init(list);
        }
}

/**
 *        skb_queue_splice_tail - join two skb lists, each list being a queue
 *        @list: the new list to add
 *        @head: the place to add it in the first list
 */
static inline void skb_queue_splice_tail(const struct sk_buff_head *list,
                                         struct sk_buff_head *head)
{
        if (!skb_queue_empty(list)) {
                __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
                head->qlen += list->qlen;
        }
}

/**
 *        skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list
 *        @list: the new list to add
 *        @head: the place to add it in the first list
 *
 *        Each of the lists is a queue.
 *        The list at @list is reinitialised
 */
static inline void skb_queue_splice_tail_init(struct sk_buff_head *list,
                                              struct sk_buff_head *head)
{
        if (!skb_queue_empty(list)) {
                __skb_queue_splice(list, head->prev, (struct sk_buff *) head);
                head->qlen += list->qlen;
                __skb_queue_head_init(list);
        }
}

/**
 *        __skb_queue_after - queue a buffer at the list head
 *        @list: list to use
 *        @prev: place after this buffer
 *        @newsk: buffer to queue
 *
 *        Queue a buffer int the middle of a list. This function takes no locks
 *        and you must therefore hold required locks before calling it.
 *
 *        A buffer cannot be placed on two lists at the same time.
 */
static inline void __skb_queue_after(struct sk_buff_head *list,
                                     struct sk_buff *prev,
                                     struct sk_buff *newsk)
{
        __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list);
}

void skb_append(struct sk_buff *old, struct sk_buff *newsk,
                struct sk_buff_head *list);

static inline void __skb_queue_before(struct sk_buff_head *list,
                                      struct sk_buff *next,
                                      struct sk_buff *newsk)
{
        __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list);
}

/**
 *        __skb_queue_head - queue a buffer at the list head
 *        @list: list to use
 *        @newsk: buffer to queue
 *
 *        Queue a buffer at the start of a list. This function takes no locks
 *        and you must therefore hold required locks before calling it.
 *
 *        A buffer cannot be placed on two lists at the same time.
 */
static inline void __skb_queue_head(struct sk_buff_head *list,
                                    struct sk_buff *newsk)
{
        __skb_queue_after(list, (struct sk_buff *)list, newsk);
}
void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);

/**
 *        __skb_queue_tail - queue a buffer at the list tail
 *        @list: list to use
 *        @newsk: buffer to queue
 *
 *        Queue a buffer at the end of a list. This function takes no locks
 *        and you must therefore hold required locks before calling it.
 *
 *        A buffer cannot be placed on two lists at the same time.
 */
static inline void __skb_queue_tail(struct sk_buff_head *list,
                                   struct sk_buff *newsk)
{
        __skb_queue_before(list, (struct sk_buff *)list, newsk);
}
void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);

/*
 * remove sk_buff from list. _Must_ be called atomically, and with
 * the list known..
 */
void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
{
        struct sk_buff *next, *prev;

        WRITE_ONCE(list->qlen, list->qlen - 1);
        next           = skb->next;
        prev           = skb->prev;
        skb->next  = skb->prev = NULL;
        WRITE_ONCE(next->prev, prev);
        WRITE_ONCE(prev->next, next);
}

/**
 *        __skb_dequeue - remove from the head of the queue
 *        @list: list to dequeue from
 *
 *        Remove the head of the list. This function does not take any locks
 *        so must be used with appropriate locks held only. The head item is
 *        returned or %NULL if the list is empty.
 */
static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
{
        struct sk_buff *skb = skb_peek(list);
        if (skb)
                __skb_unlink(skb, list);
        return skb;
}
struct sk_buff *skb_dequeue(struct sk_buff_head *list);

/**
 *        __skb_dequeue_tail - remove from the tail of the queue
 *        @list: list to dequeue from
 *
 *        Remove the tail of the list. This function does not take any locks
 *        so must be used with appropriate locks held only. The tail item is
 *        returned or %NULL if the list is empty.
 */
static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
{
        struct sk_buff *skb = skb_peek_tail(list);
        if (skb)
                __skb_unlink(skb, list);
        return skb;
}
struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);


static inline bool skb_is_nonlinear(const struct sk_buff *skb)
{
        return skb->data_len;
}

static inline unsigned int skb_headlen(const struct sk_buff *skb)
{
        return skb->len - skb->data_len;
}

static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
{
        unsigned int i, len = 0;

        for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
                len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
        return len;
}

static inline unsigned int skb_pagelen(const struct sk_buff *skb)
{
        return skb_headlen(skb) + __skb_pagelen(skb);
}

static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
                                             netmem_ref netmem, int off,
                                             int size)
{
        frag->netmem = netmem;
        frag->offset = off;
        skb_frag_size_set(frag, size);
}

static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
                                           struct page *page,
                                           int off, int size)
{
        skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
}

static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
                                                int i, netmem_ref netmem,
                                                int off, int size)
{
        skb_frag_t *frag = &shinfo->frags[i];

        skb_frag_fill_netmem_desc(frag, netmem, off, size);
}

static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
                                              int i, struct page *page,
                                              int off, int size)
{
        __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
                                     size);
}

/**
 * skb_len_add - adds a number to len fields of skb
 * @skb: buffer to add len to
 * @delta: number of bytes to add
 */
static inline void skb_len_add(struct sk_buff *skb, int delta)
{
        skb->len += delta;
        skb->data_len += delta;
        skb->truesize += delta;
}

/**
 * __skb_fill_netmem_desc - initialise a fragment in an skb
 * @skb: buffer containing fragment to be initialised
 * @i: fragment index to initialise
 * @netmem: the netmem to use for this fragment
 * @off: the offset to the data with @page
 * @size: the length of the data
 *
 * Initialises the @i'th fragment of @skb to point to &size bytes at
 * offset @off within @page.
 *
 * Does not take any additional reference on the fragment.
 */
static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
                                          netmem_ref netmem, int off, int size)
{
        struct page *page = netmem_to_page(netmem);

        __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);

        /* Propagate page pfmemalloc to the skb if we can. The problem is
         * that not all callers have unique ownership of the page but rely
         * on page_is_pfmemalloc doing the right thing(tm).
         */
        page = compound_head(page);
        if (page_is_pfmemalloc(page))
                skb->pfmemalloc = true;
}

static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
                                        struct page *page, int off, int size)
{
        __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
}

static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
                                        netmem_ref netmem, int off, int size)
{
        __skb_fill_netmem_desc(skb, i, netmem, off, size);
        skb_shinfo(skb)->nr_frags = i + 1;
}

/**
 * skb_fill_page_desc - initialise a paged fragment in an skb
 * @skb: buffer containing fragment to be initialised
 * @i: paged fragment index to initialise
 * @page: the page to use for this fragment
 * @off: the offset to the data with @page
 * @size: the length of the data
 *
 * As per __skb_fill_page_desc() -- initialises the @i'th fragment of
 * @skb to point to @size bytes at offset @off within @page. In
 * addition updates @skb such that @i is the last fragment.
 *
 * Does not take any additional reference on the fragment.
 */
static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
                                      struct page *page, int off, int size)
{
        skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
}

/**
 * skb_fill_page_desc_noacc - initialise a paged fragment in an skb
 * @skb: buffer containing fragment to be initialised
 * @i: paged fragment index to initialise
 * @page: the page to use for this fragment
 * @off: the offset to the data with @page
 * @size: the length of the data
 *
 * Variant of skb_fill_page_desc() which does not deal with
 * pfmemalloc, if page is not owned by us.
 */
static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
                                            struct page *page, int off,
                                            int size)
{
        struct skb_shared_info *shinfo = skb_shinfo(skb);

        __skb_fill_page_desc_noacc(shinfo, i, page, off, size);
        shinfo->nr_frags = i + 1;
}

void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
                            int off, int size, unsigned int truesize);

static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
                                   struct page *page, int off, int size,
                                   unsigned int truesize)
{
        skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
                               truesize);
}

void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
                          unsigned int truesize);

#define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))

#ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
{
        return skb->head + skb->tail;
}

static inline void skb_reset_tail_pointer(struct sk_buff *skb)
{
        skb->tail = skb->data - skb->head;
}

static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
{
        skb_reset_tail_pointer(skb);
        skb->tail += offset;
}

#else /* NET_SKBUFF_DATA_USES_OFFSET */
static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
{
        return skb->tail;
}

static inline void skb_reset_tail_pointer(struct sk_buff *skb)
{
        skb->tail = skb->data;
}

static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
{
        skb->tail = skb->data + offset;
}

#endif /* NET_SKBUFF_DATA_USES_OFFSET */

static inline void skb_assert_len(struct sk_buff *skb)
{
#ifdef CONFIG_DEBUG_NET
        if (WARN_ONCE(!skb->len, "%s\n", __func__))
                DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
#endif /* CONFIG_DEBUG_NET */
}

/*
 *        Add data to an sk_buff
 */
void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
void *skb_put(struct sk_buff *skb, unsigned int len);
static inline void *__skb_put(struct sk_buff *skb, unsigned int len)
{
        void *tmp = skb_tail_pointer(skb);
        SKB_LINEAR_ASSERT(skb);
        skb->tail += len;
        skb->len  += len;
        return tmp;
}

static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len)
{
        void *tmp = __skb_put(skb, len);

        memset(tmp, 0, len);
        return tmp;
}

static inline void *__skb_put_data(struct sk_buff *skb, const void *data,
                                   unsigned int len)
{
        void *tmp = __skb_put(skb, len);

        memcpy(tmp, data, len);
        return tmp;
}

static inline void __skb_put_u8(struct sk_buff *skb, u8 val)
{
        *(u8 *)__skb_put(skb, 1) = val;
}

static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len)
{
        void *tmp = skb_put(skb, len);

        memset(tmp, 0, len);

        return tmp;
}

static inline void *skb_put_data(struct sk_buff *skb, const void *data,
                                 unsigned int len)
{
        void *tmp = skb_put(skb, len);

        memcpy(tmp, data, len);

        return tmp;
}

static inline void skb_put_u8(struct sk_buff *skb, u8 val)
{
        *(u8 *)skb_put(skb, 1) = val;
}

void *skb_push(struct sk_buff *skb, unsigned int len);
static inline void *__skb_push(struct sk_buff *skb, unsigned int len)
{
        DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);

        skb->data -= len;
        skb->len  += len;
        return skb->data;
}

void *skb_pull(struct sk_buff *skb, unsigned int len);
static inline void *__skb_pull(struct sk_buff *skb, unsigned int len)
{
        DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);

        skb->len -= len;
        if (unlikely(skb->len < skb->data_len)) {
#if defined(CONFIG_DEBUG_NET)
                skb->len += len;
                pr_err("__skb_pull(len=%u)\n", len);
                skb_dump(KERN_ERR, skb, false);
#endif
                BUG();
        }
        return skb->data += len;
}

static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len)
{
        return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
}

void *skb_pull_data(struct sk_buff *skb, size_t len);

void *__pskb_pull_tail(struct sk_buff *skb, int delta);

static inline enum skb_drop_reason
pskb_may_pull_reason(struct sk_buff *skb, unsigned int len)
{
        DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);

        if (likely(len <= skb_headlen(skb)))
                return SKB_NOT_DROPPED_YET;

        if (unlikely(len > skb->len))
                return SKB_DROP_REASON_PKT_TOO_SMALL;

        if (unlikely(!__pskb_pull_tail(skb, len - skb_headlen(skb))))
                return SKB_DROP_REASON_NOMEM;

        return SKB_NOT_DROPPED_YET;
}

static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len)
{
        return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET;
}

static inline void *pskb_pull(struct sk_buff *skb, unsigned int len)
{
        if (!pskb_may_pull(skb, len))
                return NULL;

        skb->len -= len;
        return skb->data += len;
}

void skb_condense(struct sk_buff *skb);

/**
 *        skb_headroom - bytes at buffer head
 *        @skb: buffer to check
 *
 *        Return the number of bytes of free space at the head of an &sk_buff.
 */
static inline unsigned int skb_headroom(const struct sk_buff *skb)
{
        return skb->data - skb->head;
}

/**
 *        skb_tailroom - bytes at buffer end
 *        @skb: buffer to check
 *
 *        Return the number of bytes of free space at the tail of an sk_buff
 */
static inline int skb_tailroom(const struct sk_buff *skb)
{
        return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
}

/**
 *        skb_availroom - bytes at buffer end
 *        @skb: buffer to check
 *
 *        Return the number of bytes of free space at the tail of an sk_buff
 *        allocated by sk_stream_alloc()
 */
static inline int skb_availroom(const struct sk_buff *skb)
{
        if (skb_is_nonlinear(skb))
                return 0;

        return skb->end - skb->tail - skb->reserved_tailroom;
}

/**
 *        skb_reserve - adjust headroom
 *        @skb: buffer to alter
 *        @len: bytes to move
 *
 *        Increase the headroom of an empty &sk_buff by reducing the tail
 *        room. This is only allowed for an empty buffer.
 */
static inline void skb_reserve(struct sk_buff *skb, int len)
{
        skb->data += len;
        skb->tail += len;
}

/**
 *        skb_tailroom_reserve - adjust reserved_tailroom
 *        @skb: buffer to alter
 *        @mtu: maximum amount of headlen permitted
 *        @needed_tailroom: minimum amount of reserved_tailroom
 *
 *        Set reserved_tailroom so that headlen can be as large as possible but
 *        not larger than mtu and tailroom cannot be smaller than
 *        needed_tailroom.
 *        The required headroom should already have been reserved before using
 *        this function.
 */
static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
                                        unsigned int needed_tailroom)
{
        SKB_LINEAR_ASSERT(skb);
        if (mtu < skb_tailroom(skb) - needed_tailroom)
                /* use at most mtu */
                skb->reserved_tailroom = skb_tailroom(skb) - mtu;
        else
                /* use up to all available space */
                skb->reserved_tailroom = needed_tailroom;
}

#define ENCAP_TYPE_ETHER        0
#define ENCAP_TYPE_IPPROTO        1

static inline void skb_set_inner_protocol(struct sk_buff *skb,
                                          __be16 protocol)
{
        skb->inner_protocol = protocol;
        skb->inner_protocol_type = ENCAP_TYPE_ETHER;
}

static inline void skb_set_inner_ipproto(struct sk_buff *skb,
                                         __u8 ipproto)
{
        skb->inner_ipproto = ipproto;
        skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
}

static inline void skb_reset_inner_headers(struct sk_buff *skb)
{
        skb->inner_mac_header = skb->mac_header;
        skb->inner_network_header = skb->network_header;
        skb->inner_transport_header = skb->transport_header;
}

static inline void skb_reset_mac_len(struct sk_buff *skb)
{
        skb->mac_len = skb->network_header - skb->mac_header;
}

static inline unsigned char *skb_inner_transport_header(const struct sk_buff
                                                        *skb)
{
        return skb->head + skb->inner_transport_header;
}

static inline int skb_inner_transport_offset(const struct sk_buff *skb)
{
        return skb_inner_transport_header(skb) - skb->data;
}

static inline void skb_reset_inner_transport_header(struct sk_buff *skb)
{
        skb->inner_transport_header = skb->data - skb->head;
}

static inline void skb_set_inner_transport_header(struct sk_buff *skb,
                                                   const int offset)
{
        skb_reset_inner_transport_header(skb);
        skb->inner_transport_header += offset;
}

static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb)
{
        return skb->head + skb->inner_network_header;
}

static inline void skb_reset_inner_network_header(struct sk_buff *skb)
{
        skb->inner_network_header = skb->data - skb->head;
}

static inline void skb_set_inner_network_header(struct sk_buff *skb,
                                                const int offset)
{
        skb_reset_inner_network_header(skb);
        skb->inner_network_header += offset;
}

static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb)
{
        return skb->inner_network_header > 0;
}

static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb)
{
        return skb->head + skb->inner_mac_header;
}

static inline void skb_reset_inner_mac_header(struct sk_buff *skb)
{
        skb->inner_mac_header = skb->data - skb->head;
}

static inline void skb_set_inner_mac_header(struct sk_buff *skb,
                                            const int offset)
{
        skb_reset_inner_mac_header(skb);
        skb->inner_mac_header += offset;
}
static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
{
        return skb->transport_header != (typeof(skb->transport_header))~0U;
}

static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
{
        DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb));
        return skb->head + skb->transport_header;
}

static inline void skb_reset_transport_header(struct sk_buff *skb)
{
        skb->transport_header = skb->data - skb->head;
}

static inline void skb_set_transport_header(struct sk_buff *skb,
                                            const int offset)
{
        skb_reset_transport_header(skb);
        skb->transport_header += offset;
}

static inline unsigned char *skb_network_header(const struct sk_buff *skb)
{
        return skb->head + skb->network_header;
}

static inline void skb_reset_network_header(struct sk_buff *skb)
{
        skb->network_header = skb->data - skb->head;
}

static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
{
        skb_reset_network_header(skb);
        skb->network_header += offset;
}

static inline int skb_mac_header_was_set(const struct sk_buff *skb)
{
        return skb->mac_header != (typeof(skb->mac_header))~0U;
}

static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
{
        DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
        return skb->head + skb->mac_header;
}

static inline int skb_mac_offset(const struct sk_buff *skb)
{
        return skb_mac_header(skb) - skb->data;
}

static inline u32 skb_mac_header_len(const struct sk_buff *skb)
{
        DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
        return skb->network_header - skb->mac_header;
}

static inline void skb_unset_mac_header(struct sk_buff *skb)
{
        skb->mac_header = (typeof(skb->mac_header))~0U;
}

static inline void skb_reset_mac_header(struct sk_buff *skb)
{
        skb->mac_header = skb->data - skb->head;
}

static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
{
        skb_reset_mac_header(skb);
        skb->mac_header += offset;
}

static inline void skb_pop_mac_header(struct sk_buff *skb)
{
        skb->mac_header = skb->network_header;
}

static inline void skb_probe_transport_header(struct sk_buff *skb)
{
        struct flow_keys_basic keys;

        if (skb_transport_header_was_set(skb))
                return;

        if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
                                             NULL, 0, 0, 0, 0))
                skb_set_transport_header(skb, keys.control.thoff);
}

static inline void skb_mac_header_rebuild(struct sk_buff *skb)
{
        if (skb_mac_header_was_set(skb)) {
                const unsigned char *old_mac = skb_mac_header(skb);

                skb_set_mac_header(skb, -skb->mac_len);
                memmove(skb_mac_header(skb), old_mac, skb->mac_len);
        }
}

static inline int skb_checksum_start_offset(const struct sk_buff *skb)
{
        return skb->csum_start - skb_headroom(skb);
}

static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
{
        return skb->head + skb->csum_start;
}

static inline int skb_transport_offset(const struct sk_buff *skb)
{
        return skb_transport_header(skb) - skb->data;
}

static inline u32 skb_network_header_len(const struct sk_buff *skb)
{
        DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb));
        return skb->transport_header - skb->network_header;
}

static inline u32 skb_inner_network_header_len(const struct sk_buff *skb)
{
        return skb->inner_transport_header - skb->inner_network_header;
}

static inline int skb_network_offset(const struct sk_buff *skb)
{
        return skb_network_header(skb) - skb->data;
}

static inline int skb_inner_network_offset(const struct sk_buff *skb)
{
        return skb_inner_network_header(skb) - skb->data;
}

static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
{
        return pskb_may_pull(skb, skb_network_offset(skb) + len);
}

/*
 * CPUs often take a performance hit when accessing unaligned memory
 * locations. The actual performance hit varies, it can be small if the
 * hardware handles it or large if we have to take an exception and fix it
 * in software.
 *
 * Since an ethernet header is 14 bytes network drivers often end up with
 * the IP header at an unaligned offset. The IP header can be aligned by
 * shifting the start of the packet by 2 bytes. Drivers should do this
 * with:
 *
 * skb_reserve(skb, NET_IP_ALIGN);
 *
 * The downside to this alignment of the IP header is that the DMA is now
 * unaligned. On some architectures the cost of an unaligned DMA is high
 * and this cost outweighs the gains made by aligning the IP header.
 *
 * Since this trade off varies between architectures, we allow NET_IP_ALIGN
 * to be overridden.
 */
#ifndef NET_IP_ALIGN
#define NET_IP_ALIGN        2
#endif

/*
 * The networking layer reserves some headroom in skb data (via
 * dev_alloc_skb). This is used to avoid having to reallocate skb data when
 * the header has to grow. In the default case, if the header has to grow
 * 32 bytes or less we avoid the reallocation.
 *
 * Unfortunately this headroom changes the DMA alignment of the resulting
 * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
 * on some architectures. An architecture can override this value,
 * perhaps setting it to a cacheline in size (since that will maintain
 * cacheline alignment of the DMA). It must be a power of 2.
 *
 * Various parts of the networking layer expect at least 32 bytes of
 * headroom, you should not reduce this.
 *
 * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
 * to reduce average number of cache lines per packet.
 * get_rps_cpu() for example only access one 64 bytes aligned block :
 * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
 */
#ifndef NET_SKB_PAD
#define NET_SKB_PAD        max(32, L1_CACHE_BYTES)
#endif

int ___pskb_trim(struct sk_buff *skb, unsigned int len);

static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
{
        if (WARN_ON(skb_is_nonlinear(skb)))
                return;
        skb->len = len;
        skb_set_tail_pointer(skb, len);
}

static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
{
        __skb_set_length(skb, len);
}

void skb_trim(struct sk_buff *skb, unsigned int len);

static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
{
        if (skb->data_len)
                return ___pskb_trim(skb, len);
        __skb_trim(skb, len);
        return 0;
}

static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
{
        return (len < skb->len) ? __pskb_trim(skb, len) : 0;
}

/**
 *        pskb_trim_unique - remove end from a paged unique (not cloned) buffer
 *        @skb: buffer to alter
 *        @len: new length
 *
 *        This is identical to pskb_trim except that the caller knows that
 *        the skb is not cloned so we should never get an error due to out-
 *        of-memory.
 */
static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
{
        int err = pskb_trim(skb, len);
        BUG_ON(err);
}

static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
{
        unsigned int diff = len - skb->len;

        if (skb_tailroom(skb) < diff) {
                int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
                                           GFP_ATOMIC);
                if (ret)
                        return ret;
        }
        __skb_set_length(skb, len);
        return 0;
}

/**
 *        skb_orphan - orphan a buffer
 *        @skb: buffer to orphan
 *
 *        If a buffer currently has an owner then we call the owner's
 *        destructor function and make the @skb unowned. The buffer continues
 *        to exist but is no longer charged to its former owner.
 */
static inline void skb_orphan(struct sk_buff *skb)
{
        if (skb->destructor) {
                skb->destructor(skb);
                skb->destructor = NULL;
                skb->sk                = NULL;
        } else {
                BUG_ON(skb->sk);
        }
}

/**
 *        skb_orphan_frags - orphan the frags contained in a buffer
 *        @skb: buffer to orphan frags from
 *        @gfp_mask: allocation mask for replacement pages
 *
 *        For each frag in the SKB which needs a destructor (i.e. has an
 *        owner) create a copy of that frag and release the original
 *        page by calling the destructor.
 */
static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
{
        if (likely(!skb_zcopy(skb)))
                return 0;
        if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN)
                return 0;
        return skb_copy_ubufs(skb, gfp_mask);
}

/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
{
        if (likely(!skb_zcopy(skb)))
                return 0;
        return skb_copy_ubufs(skb, gfp_mask);
}

/**
 *        __skb_queue_purge_reason - empty a list
 *        @list: list to empty
 *        @reason: drop reason
 *
 *        Delete all buffers on an &sk_buff list. Each buffer is removed from
 *        the list and one reference dropped. This function does not take the
 *        list lock and the caller must hold the relevant locks to use it.
 */
static inline void __skb_queue_purge_reason(struct sk_buff_head *list,
                                            enum skb_drop_reason reason)
{
        struct sk_buff *skb;

        while ((skb = __skb_dequeue(list)) != NULL)
                kfree_skb_reason(skb, reason);
}

static inline void __skb_queue_purge(struct sk_buff_head *list)
{
        __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE);
}

void skb_queue_purge_reason(struct sk_buff_head *list,
                            enum skb_drop_reason reason);

static inline void skb_queue_purge(struct sk_buff_head *list)
{
        skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE);
}

unsigned int skb_rbtree_purge(struct rb_root *root);
void skb_errqueue_purge(struct sk_buff_head *list);

void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);

/**
 * netdev_alloc_frag - allocate a page fragment
 * @fragsz: fragment size
 *
 * Allocates a frag from a page for receive buffer.
 * Uses GFP_ATOMIC allocations.
 */
static inline void *netdev_alloc_frag(unsigned int fragsz)
{
        return __netdev_alloc_frag_align(fragsz, ~0u);
}

static inline void *netdev_alloc_frag_align(unsigned int fragsz,
                                            unsigned int align)
{
        WARN_ON_ONCE(!is_power_of_2(align));
        return __netdev_alloc_frag_align(fragsz, -align);
}

struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
                                   gfp_t gfp_mask);

/**
 *        netdev_alloc_skb - allocate an skbuff for rx on a specific device
 *        @dev: network device to receive on
 *        @length: length to allocate
 *
 *        Allocate a new &sk_buff and assign it a usage count of one. The
 *        buffer has unspecified headroom built in. Users should allocate
 *        the headroom they think they need without accounting for the
 *        built in space. The built in space is used for optimisations.
 *
 *        %NULL is returned if there is no free memory. Although this function
 *        allocates memory it can be called from an interrupt.
 */
static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev,
                                               unsigned int length)
{
        return __netdev_alloc_skb(dev, length, GFP_ATOMIC);
}

/* legacy helper around __netdev_alloc_skb() */
static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
                                              gfp_t gfp_mask)
{
        return __netdev_alloc_skb(NULL, length, gfp_mask);
}

/* legacy helper around netdev_alloc_skb() */
static inline struct sk_buff *dev_alloc_skb(unsigned int length)
{
        return netdev_alloc_skb(NULL, length);
}


static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev,
                unsigned int length, gfp_t gfp)
{
        struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp);

        if (NET_IP_ALIGN && skb)
                skb_reserve(skb, NET_IP_ALIGN);
        return skb;
}

static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
                unsigned int length)
{
        return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
}

static inline void skb_free_frag(void *addr)
{
        page_frag_free(addr);
}

void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);

static inline void *napi_alloc_frag(unsigned int fragsz)
{
        return __napi_alloc_frag_align(fragsz, ~0u);
}

static inline void *napi_alloc_frag_align(unsigned int fragsz,
                                          unsigned int align)
{
        WARN_ON_ONCE(!is_power_of_2(align));
        return __napi_alloc_frag_align(fragsz, -align);
}

struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
                                 unsigned int length, gfp_t gfp_mask);
static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
                                             unsigned int length)
{
        return __napi_alloc_skb(napi, length, GFP_ATOMIC);
}
void napi_consume_skb(struct sk_buff *skb, int budget);

void napi_skb_free_stolen_head(struct sk_buff *skb);
void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason);

/**
 * __dev_alloc_pages - allocate page for network Rx
 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
 * @order: size of the allocation
 *
 * Allocate a new page.
 *
 * %NULL is returned if there is no free memory.
*/
static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
                                             unsigned int order)
{
        /* This piece of code contains several assumptions.
         * 1.  This is for device Rx, therefore a cold page is preferred.
         * 2.  The expectation is the user wants a compound page.
         * 3.  If requesting a order 0 page it will not be compound
         *     due to the check to see if order has a value in prep_new_page
         * 4.  __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
         *     code in gfp_to_alloc_flags that should be enforcing this.
         */
        gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;

        return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
}

static inline struct page *dev_alloc_pages(unsigned int order)
{
        return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order);
}

/**
 * __dev_alloc_page - allocate a page for network Rx
 * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
 *
 * Allocate a new page.
 *
 * %NULL is returned if there is no free memory.
 */
static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
{
        return __dev_alloc_pages(gfp_mask, 0);
}

static inline struct page *dev_alloc_page(void)
{
        return dev_alloc_pages(0);
}

/**
 * dev_page_is_reusable - check whether a page can be reused for network Rx
 * @page: the page to test
 *
 * A page shouldn't be considered for reusing/recycling if it was allocated
 * under memory pressure or at a distant memory node.
 *
 * Returns false if this page should be returned to page allocator, true
 * otherwise.
 */
static inline bool dev_page_is_reusable(const struct page *page)
{
        return likely(page_to_nid(page) == numa_mem_id() &&
                      !page_is_pfmemalloc(page));
}

/**
 *        skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
 *        @page: The page that was allocated from skb_alloc_page
 *        @skb: The skb that may need pfmemalloc set
 */
static inline void skb_propagate_pfmemalloc(const struct page *page,
                                            struct sk_buff *skb)
{
        if (page_is_pfmemalloc(page))
                skb->pfmemalloc = true;
}

/**
 * skb_frag_off() - Returns the offset of a skb fragment
 * @frag: the paged fragment
 */
static inline unsigned int skb_frag_off(const skb_frag_t *frag)
{
        return frag->offset;
}

/**
 * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
 * @frag: skb fragment
 * @delta: value to add
 */
static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
{
        frag->offset += delta;
}

/**
 * skb_frag_off_set() - Sets the offset of a skb fragment
 * @frag: skb fragment
 * @offset: offset of fragment
 */
static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
{
        frag->offset = offset;
}

/**
 * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
 * @fragto: skb fragment where offset is set
 * @fragfrom: skb fragment offset is copied from
 */
static inline void skb_frag_off_copy(skb_frag_t *fragto,
                                     const skb_frag_t *fragfrom)
{
        fragto->offset = fragfrom->offset;
}

/**
 * skb_frag_page - retrieve the page referred to by a paged fragment
 * @frag: the paged fragment
 *
 * Returns the &struct page associated with @frag.
 */
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
        return netmem_to_page(frag->netmem);
}

/**
 * __skb_frag_ref - take an addition reference on a paged fragment.
 * @frag: the paged fragment
 *
 * Takes an additional reference on the paged fragment @frag.
 */
static inline void __skb_frag_ref(skb_frag_t *frag)
{
        get_page(skb_frag_page(frag));
}

/**
 * skb_frag_ref - take an addition reference on a paged fragment of an skb.
 * @skb: the buffer
 * @f: the fragment offset.
 *
 * Takes an additional reference on the @f'th paged fragment of @skb.
 */
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
        __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}

int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
                    unsigned int headroom);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
                         struct bpf_prog *prog);
bool napi_pp_put_page(struct page *page, bool napi_safe);

static inline void
skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
{
#ifdef CONFIG_PAGE_POOL
        if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
                return;
#endif
        put_page(page);
}

static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
{
        struct page *page = skb_frag_page(frag);

#ifdef CONFIG_PAGE_POOL
        if (recycle && napi_pp_put_page(page, napi_safe))
                return;
#endif
        put_page(page);
}

/**
 * __skb_frag_unref - release a reference on a paged fragment.
 * @frag: the paged fragment
 * @recycle: recycle the page if allocated via page_pool
 *
 * Releases a reference on the paged fragment @frag
 * or recycles the page via the page_pool API.
 */
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
        napi_frag_unref(frag, recycle, false);
}

/**
 * skb_frag_unref - release a reference on a paged fragment of an skb.
 * @skb: the buffer
 * @f: the fragment offset
 *
 * Releases a reference on the @f'th paged fragment of @skb.
 */
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
        struct skb_shared_info *shinfo = skb_shinfo(skb);

        if (!skb_zcopy_managed(skb))
                __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
}

/**
 * skb_frag_address - gets the address of the data contained in a paged fragment
 * @frag: the paged fragment buffer
 *
 * Returns the address of the data within @frag. The page must already
 * be mapped.
 */
static inline void *skb_frag_address(const skb_frag_t *frag)
{
        return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
}

/**
 * skb_frag_address_safe - gets the address of the data contained in a paged fragment
 * @frag: the paged fragment buffer
 *
 * Returns the address of the data within @frag. Checks that the page
 * is mapped and returns %NULL otherwise.
 */
static inline void *skb_frag_address_safe(const skb_frag_t *frag)
{
        void *ptr = page_address(skb_frag_page(frag));
        if (unlikely(!ptr))
                return NULL;

        return ptr + skb_frag_off(frag);
}

/**
 * skb_frag_page_copy() - sets the page in a fragment from another fragment
 * @fragto: skb fragment where page is set
 * @fragfrom: skb fragment page is copied from
 */
static inline void skb_frag_page_copy(skb_frag_t *fragto,
                                      const skb_frag_t *fragfrom)
{
        fragto->netmem = fragfrom->netmem;
}

bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);

/**
 * skb_frag_dma_map - maps a paged fragment via the DMA API
 * @dev: the device to map the fragment to
 * @frag: the paged fragment to map
 * @offset: the offset within the fragment (starting at the
 *          fragment's own offset)
 * @size: the number of bytes to map
 * @dir: the direction of the mapping (``PCI_DMA_*``)
 *
 * Maps the page associated with @frag to @device.
 */
static inline dma_addr_t skb_frag_dma_map(struct device *dev,
                                          const skb_frag_t *frag,
                                          size_t offset, size_t size,
                                          enum dma_data_direction dir)
{
        return dma_map_page(dev, skb_frag_page(frag),
                            skb_frag_off(frag) + offset, size, dir);
}

static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
                                        gfp_t gfp_mask)
{
        return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
}


static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
                                                  gfp_t gfp_mask)
{
        return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
}


/**
 *        skb_clone_writable - is the header of a clone writable
 *        @skb: buffer to check
 *        @len: length up to which to write
 *
 *        Returns true if modifying the header part of the cloned buffer
 *        does not requires the data to be copied.
 */
static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len)
{
        return !skb_header_cloned(skb) &&
               skb_headroom(skb) + len <= skb->hdr_len;
}

static inline int skb_try_make_writable(struct sk_buff *skb,
                                        unsigned int write_len)
{
        return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
               pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}

static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
                            int cloned)
{
        int delta = 0;

        if (headroom > skb_headroom(skb))
                delta = headroom - skb_headroom(skb);

        if (delta || cloned)
                return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
                                        GFP_ATOMIC);
        return 0;
}

/**
 *        skb_cow - copy header of skb when it is required
 *        @skb: buffer to cow
 *        @headroom: needed headroom
 *
 *        If the skb passed lacks sufficient headroom or its data part
 *        is shared, data is reallocated. If reallocation fails, an error
 *        is returned and original skb is not changed.
 *
 *        The result is skb with writable area skb->head...skb->tail
 *        and at least @headroom of space at head.
 */
static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
{
        return __skb_cow(skb, headroom, skb_cloned(skb));
}

/**
 *        skb_cow_head - skb_cow but only making the head writable
 *        @skb: buffer to cow
 *        @headroom: needed headroom
 *
 *        This function is identical to skb_cow except that we replace the
 *        skb_cloned check by skb_header_cloned.  It should be used when
 *        you only need to push on some header and do not need to modify
 *        the data.
 */
static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
{
        return __skb_cow(skb, headroom, skb_header_cloned(skb));
}

/**
 *        skb_padto        - pad an skbuff up to a minimal size
 *        @skb: buffer to pad
 *        @len: minimal length
 *
 *        Pads up a buffer to ensure the trailing bytes exist and are
 *        blanked. If the buffer already contains sufficient data it
 *        is untouched. Otherwise it is extended. Returns zero on
 *        success. The skb is freed on error.
 */
static inline int skb_padto(struct sk_buff *skb, unsigned int len)
{
        unsigned int size = skb->len;
        if (likely(size >= len))
                return 0;
        return skb_pad(skb, len - size);
}

/**
 *        __skb_put_padto - increase size and pad an skbuff up to a minimal size
 *        @skb: buffer to pad
 *        @len: minimal length
 *        @free_on_error: free buffer on error
 *
 *        Pads up a buffer to ensure the trailing bytes exist and are
 *        blanked. If the buffer already contains sufficient data it
 *        is untouched. Otherwise it is extended. Returns zero on
 *        success. The skb is freed on error if @free_on_error is true.
 */
static inline int __must_check __skb_put_padto(struct sk_buff *skb,
                                               unsigned int len,
                                               bool free_on_error)
{
        unsigned int size = skb->len;

        if (unlikely(size < len)) {
                len -= size;
                if (__skb_pad(skb, len, free_on_error))
                        return -ENOMEM;
                __skb_put(skb, len);
        }
        return 0;
}

/**
 *        skb_put_padto - increase size and pad an skbuff up to a minimal size
 *        @skb: buffer to pad
 *        @len: minimal length
 *
 *        Pads up a buffer to ensure the trailing bytes exist and are
 *        blanked. If the buffer already contains sufficient data it
 *        is untouched. Otherwise it is extended. Returns zero on
 *        success. The skb is freed on error.
 */
static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len)
{
        return __skb_put_padto(skb, len, true);
}

bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
        __must_check;

static inline int skb_add_data(struct sk_buff *skb,
                               struct iov_iter *from, int copy)
{
        const int off = skb->len;

        if (skb->ip_summed == CHECKSUM_NONE) {
                __wsum csum = 0;
                if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy,
                                                 &csum, from)) {
                        skb->csum = csum_block_add(skb->csum, csum, off);
                        return 0;
                }
        } else if (copy_from_iter_full(skb_put(skb, copy), copy, from))
                return 0;

        __skb_trim(skb, off);
        return -EFAULT;
}

static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
                                    const struct page *page, int off)
{
        if (skb_zcopy(skb))
                return false;
        if (i) {
                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];

                return page == skb_frag_page(frag) &&
                       off == skb_frag_off(frag) + skb_frag_size(frag);
        }
        return false;
}

static inline int __skb_linearize(struct sk_buff *skb)
{
        return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
}

/**
 *        skb_linearize - convert paged skb to linear one
 *        @skb: buffer to linarize
 *
 *        If there is no free memory -ENOMEM is returned, otherwise zero
 *        is returned and the old skb data released.
 */
static inline int skb_linearize(struct sk_buff *skb)
{
        return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
}

/**
 * skb_has_shared_frag - can any frag be overwritten
 * @skb: buffer to test
 *
 * Return true if the skb has at least one frag that might be modified
 * by an external entity (as in vmsplice()/sendfile())
 */
static inline bool skb_has_shared_frag(const struct sk_buff *skb)
{
        return skb_is_nonlinear(skb) &&
               skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
}

/**
 *        skb_linearize_cow - make sure skb is linear and writable
 *        @skb: buffer to process
 *
 *        If there is no free memory -ENOMEM is returned, otherwise zero
 *        is returned and the old skb data released.
 */
static inline int skb_linearize_cow(struct sk_buff *skb)
{
        return skb_is_nonlinear(skb) || skb_cloned(skb) ?
               __skb_linearize(skb) : 0;
}

static __always_inline void
__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
                     unsigned int off)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->csum = csum_block_sub(skb->csum,
                                           csum_partial(start, len, 0), off);
        else if (skb->ip_summed == CHECKSUM_PARTIAL &&
                 skb_checksum_start_offset(skb) < 0)
                skb->ip_summed = CHECKSUM_NONE;
}

/**
 *        skb_postpull_rcsum - update checksum for received skb after pull
 *        @skb: buffer to update
 *        @start: start of data before pull
 *        @len: length of data pulled
 *
 *        After doing a pull on a received packet, you need to call this to
 *        update the CHECKSUM_COMPLETE checksum, or set ip_summed to
 *        CHECKSUM_NONE so that it can be recomputed from scratch.
 */
static inline void skb_postpull_rcsum(struct sk_buff *skb,
                                      const void *start, unsigned int len)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->csum = wsum_negate(csum_partial(start, len,
                                                     wsum_negate(skb->csum)));
        else if (skb->ip_summed == CHECKSUM_PARTIAL &&
                 skb_checksum_start_offset(skb) < 0)
                skb->ip_summed = CHECKSUM_NONE;
}

static __always_inline void
__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
                     unsigned int off)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->csum = csum_block_add(skb->csum,
                                           csum_partial(start, len, 0), off);
}

/**
 *        skb_postpush_rcsum - update checksum for received skb after push
 *        @skb: buffer to update
 *        @start: start of data after push
 *        @len: length of data pushed
 *
 *        After doing a push on a received packet, you need to call this to
 *        update the CHECKSUM_COMPLETE checksum.
 */
static inline void skb_postpush_rcsum(struct sk_buff *skb,
                                      const void *start, unsigned int len)
{
        __skb_postpush_rcsum(skb, start, len, 0);
}

void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);

/**
 *        skb_push_rcsum - push skb and update receive checksum
 *        @skb: buffer to update
 *        @len: length of data pulled
 *
 *        This function performs an skb_push on the packet and updates
 *        the CHECKSUM_COMPLETE checksum.  It should be used on
 *        receive path processing instead of skb_push unless you know
 *        that the checksum difference is zero (e.g., a valid IP header)
 *        or you are setting ip_summed to CHECKSUM_NONE.
 */
static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len)
{
        skb_push(skb, len);
        skb_postpush_rcsum(skb, skb->data, len);
        return skb->data;
}

int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
/**
 *        pskb_trim_rcsum - trim received skb and update checksum
 *        @skb: buffer to trim
 *        @len: new length
 *
 *        This is exactly the same as pskb_trim except that it ensures the
 *        checksum of received packets are still valid after the operation.
 *        It can change skb pointers.
 */

static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
        if (likely(len >= skb->len))
                return 0;
        return pskb_trim_rcsum_slow(skb, len);
}

static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->ip_summed = CHECKSUM_NONE;
        __skb_trim(skb, len);
        return 0;
}

static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->ip_summed = CHECKSUM_NONE;
        return __skb_grow(skb, len);
}

#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
#define skb_rb_first(root) rb_to_skb(rb_first(root))
#define skb_rb_last(root)  rb_to_skb(rb_last(root))
#define skb_rb_next(skb)   rb_to_skb(rb_next(&(skb)->rbnode))
#define skb_rb_prev(skb)   rb_to_skb(rb_prev(&(skb)->rbnode))

#define skb_queue_walk(queue, skb) \
                for (skb = (queue)->next;                                        \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = skb->next)

#define skb_queue_walk_safe(queue, skb, tmp)                                        \
                for (skb = (queue)->next, tmp = skb->next;                        \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = tmp, tmp = skb->next)

#define skb_queue_walk_from(queue, skb)                                                \
                for (; skb != (struct sk_buff *)(queue);                        \
                     skb = skb->next)

#define skb_rbtree_walk(skb, root)                                                \
                for (skb = skb_rb_first(root); skb != NULL;                        \
                     skb = skb_rb_next(skb))

#define skb_rbtree_walk_from(skb)                                                \
                for (; skb != NULL;                                                \
                     skb = skb_rb_next(skb))

#define skb_rbtree_walk_from_safe(skb, tmp)                                        \
                for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL);        \
                     skb = tmp)

#define skb_queue_walk_from_safe(queue, skb, tmp)                                \
                for (tmp = skb->next;                                                \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = tmp, tmp = skb->next)

#define skb_queue_reverse_walk(queue, skb) \
                for (skb = (queue)->prev;                                        \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = skb->prev)

#define skb_queue_reverse_walk_safe(queue, skb, tmp)                                \
                for (skb = (queue)->prev, tmp = skb->prev;                        \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = tmp, tmp = skb->prev)

#define skb_queue_reverse_walk_from_safe(queue, skb, tmp)                        \
                for (tmp = skb->prev;                                                \
                     skb != (struct sk_buff *)(queue);                                \
                     skb = tmp, tmp = skb->prev)

static inline bool skb_has_frag_list(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->frag_list != NULL;
}

static inline void skb_frag_list_init(struct sk_buff *skb)
{
        skb_shinfo(skb)->frag_list = NULL;
}

#define skb_walk_frags(skb, iter)        \
        for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)


int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
                                int *err, long *timeo_p,
                                const struct sk_buff *skb);
struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
                                          struct sk_buff_head *queue,
                                          unsigned int flags,
                                          int *off, int *err,
                                          struct sk_buff **last);
struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
                                        struct sk_buff_head *queue,
                                        unsigned int flags, int *off, int *err,
                                        struct sk_buff **last);
struct sk_buff *__skb_recv_datagram(struct sock *sk,
                                    struct sk_buff_head *sk_queue,
                                    unsigned int flags, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
__poll_t datagram_poll(struct file *file, struct socket *sock,
                           struct poll_table_struct *wait);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
                           struct iov_iter *to, int size);
static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
                                        struct msghdr *msg, int size)
{
        return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
}
int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
                                   struct msghdr *msg);
int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
                           struct iov_iter *to, int len,
                           struct ahash_request *hash);
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
                                 struct iov_iter *from, int len);
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len);
static inline void skb_free_datagram_locked(struct sock *sk,
                                            struct sk_buff *skb)
{
        __skb_free_datagram_locked(sk, skb, 0);
}
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
                              int len);
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int len,
                    unsigned int flags);
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
                         int len);
int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
                 int len, int hlen);
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
void skb_scrub_packet(struct sk_buff *skb, bool xnet);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
                                 unsigned int offset);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len);
int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
int skb_eth_pop(struct sk_buff *skb);
int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
                 const unsigned char *src);
int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
                  int mac_len, bool ethernet);
int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
                 bool ethernet);
int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
int skb_mpls_dec_ttl(struct sk_buff *skb);
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
                             gfp_t gfp);

static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
{
        return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT;
}

static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
{
        return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
}

struct skb_checksum_ops {
        __wsum (*update)(const void *mem, int len, __wsum wsum);
        __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
};

extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;

__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
                      __wsum csum, const struct skb_checksum_ops *ops);
__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
                    __wsum csum);

static inline void * __must_check
__skb_header_pointer(const struct sk_buff *skb, int offset, int len,
                     const void *data, int hlen, void *buffer)
{
        if (likely(hlen - offset >= len))
                return (void *)data + offset;

        if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
                return NULL;

        return buffer;
}

static inline void * __must_check
skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer)
{
        return __skb_header_pointer(skb, offset, len, skb->data,
                                    skb_headlen(skb), buffer);
}

static inline void * __must_check
skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len)
{
        if (likely(skb_headlen(skb) - offset >= len))
                return skb->data + offset;
        return NULL;
}

/**
 *        skb_needs_linearize - check if we need to linearize a given skb
 *                              depending on the given device features.
 *        @skb: socket buffer to check
 *        @features: net device features
 *
 *        Returns true if either:
 *        1. skb has frag_list and the device doesn't support FRAGLIST, or
 *        2. skb is fragmented and the device does not support SG.
 */
static inline bool skb_needs_linearize(struct sk_buff *skb,
                                       netdev_features_t features)
{
        return skb_is_nonlinear(skb) &&
               ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
                (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG)));
}

static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
                                             void *to,
                                             const unsigned int len)
{
        memcpy(to, skb->data, len);
}

static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
                                                    const int offset, void *to,
                                                    const unsigned int len)
{
        memcpy(to, skb->data + offset, len);
}

static inline void skb_copy_to_linear_data(struct sk_buff *skb,
                                           const void *from,
                                           const unsigned int len)
{
        memcpy(skb->data, from, len);
}

static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
                                                  const int offset,
                                                  const void *from,
                                                  const unsigned int len)
{
        memcpy(skb->data + offset, from, len);
}

void skb_init(void);

static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
{
        return skb->tstamp;
}

/**
 *        skb_get_timestamp - get timestamp from a skb
 *        @skb: skb to get stamp from
 *        @stamp: pointer to struct __kernel_old_timeval to store stamp in
 *
 *        Timestamps are stored in the skb as offsets to a base timestamp.
 *        This function converts the offset back to a struct timeval and stores
 *        it in stamp.
 */
static inline void skb_get_timestamp(const struct sk_buff *skb,
                                     struct __kernel_old_timeval *stamp)
{
        *stamp = ns_to_kernel_old_timeval(skb->tstamp);
}

static inline void skb_get_new_timestamp(const struct sk_buff *skb,
                                         struct __kernel_sock_timeval *stamp)
{
        struct timespec64 ts = ktime_to_timespec64(skb->tstamp);

        stamp->tv_sec = ts.tv_sec;
        stamp->tv_usec = ts.tv_nsec / 1000;
}

static inline void skb_get_timestampns(const struct sk_buff *skb,
                                       struct __kernel_old_timespec *stamp)
{
        struct timespec64 ts = ktime_to_timespec64(skb->tstamp);

        stamp->tv_sec = ts.tv_sec;
        stamp->tv_nsec = ts.tv_nsec;
}

static inline void skb_get_new_timestampns(const struct sk_buff *skb,
                                           struct __kernel_timespec *stamp)
{
        struct timespec64 ts = ktime_to_timespec64(skb->tstamp);

        stamp->tv_sec = ts.tv_sec;
        stamp->tv_nsec = ts.tv_nsec;
}

static inline void __net_timestamp(struct sk_buff *skb)
{
        skb->tstamp = ktime_get_real();
        skb->mono_delivery_time = 0;
}

static inline ktime_t net_timedelta(ktime_t t)
{
        return ktime_sub(ktime_get_real(), t);
}

static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
                                         bool mono)
{
        skb->tstamp = kt;
        skb->mono_delivery_time = kt && mono;
}

DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);

/* It is used in the ingress path to clear the delivery_time.
 * If needed, set the skb->tstamp to the (rcv) timestamp.
 */
static inline void skb_clear_delivery_time(struct sk_buff *skb)
{
        if (skb->mono_delivery_time) {
                skb->mono_delivery_time = 0;
                if (static_branch_unlikely(&netstamp_needed_key))
                        skb->tstamp = ktime_get_real();
                else
                        skb->tstamp = 0;
        }
}

static inline void skb_clear_tstamp(struct sk_buff *skb)
{
        if (skb->mono_delivery_time)
                return;

        skb->tstamp = 0;
}

static inline ktime_t skb_tstamp(const struct sk_buff *skb)
{
        if (skb->mono_delivery_time)
                return 0;

        return skb->tstamp;
}

static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
{
        if (!skb->mono_delivery_time && skb->tstamp)
                return skb->tstamp;

        if (static_branch_unlikely(&netstamp_needed_key) || cond)
                return ktime_get_real();

        return 0;
}

static inline u8 skb_metadata_len(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->meta_len;
}

static inline void *skb_metadata_end(const struct sk_buff *skb)
{
        return skb_mac_header(skb);
}

static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
                                          const struct sk_buff *skb_b,
                                          u8 meta_len)
{
        const void *a = skb_metadata_end(skb_a);
        const void *b = skb_metadata_end(skb_b);
        u64 diffs = 0;

        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
            BITS_PER_LONG != 64)
                goto slow;

        /* Using more efficient variant than plain call to memcmp(). */
        switch (meta_len) {
#define __it(x, op) (x -= sizeof(u##op))
#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
        case 32: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case 24: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case 16: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case  8: diffs |= __it_diff(a, b, 64);
                break;
        case 28: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case 20: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case 12: diffs |= __it_diff(a, b, 64);
                fallthrough;
        case  4: diffs |= __it_diff(a, b, 32);
                break;
        default:
slow:
                return memcmp(a - meta_len, b - meta_len, meta_len);
        }
        return diffs;
}

static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
                                        const struct sk_buff *skb_b)
{
        u8 len_a = skb_metadata_len(skb_a);
        u8 len_b = skb_metadata_len(skb_b);

        if (!(len_a | len_b))
                return false;

        return len_a != len_b ?
               true : __skb_metadata_differs(skb_a, skb_b, len_a);
}

static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len)
{
        skb_shinfo(skb)->meta_len = meta_len;
}

static inline void skb_metadata_clear(struct sk_buff *skb)
{
        skb_metadata_set(skb, 0);
}

struct sk_buff *skb_clone_sk(struct sk_buff *skb);

#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING

void skb_clone_tx_timestamp(struct sk_buff *skb);
bool skb_defer_rx_timestamp(struct sk_buff *skb);

#else /* CONFIG_NETWORK_PHY_TIMESTAMPING */

static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
{
}

static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
        return false;
}

#endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */

/**
 * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
 *
 * PHY drivers may accept clones of transmitted packets for
 * timestamping via their phy_driver.txtstamp method. These drivers
 * must call this function to return the skb back to the stack with a
 * timestamp.
 *
 * @skb: clone of the original outgoing packet
 * @hwtstamps: hardware time stamps
 *
 */
void skb_complete_tx_timestamp(struct sk_buff *skb,
                               struct skb_shared_hwtstamps *hwtstamps);

void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype);

/**
 * skb_tstamp_tx - queue clone of skb with send time stamps
 * @orig_skb:        the original outgoing packet
 * @hwtstamps:        hardware time stamps, may be NULL if not available
 *
 * If the skb has a socket associated, then this function clones the
 * skb (thus sharing the actual data and optional structures), stores
 * the optional hardware time stamping information (if non NULL) or
 * generates a software time stamp (otherwise), then queues the clone
 * to the error queue of the socket.  Errors are silently ignored.
 */
void skb_tstamp_tx(struct sk_buff *orig_skb,
                   struct skb_shared_hwtstamps *hwtstamps);

/**
 * skb_tx_timestamp() - Driver hook for transmit timestamping
 *
 * Ethernet MAC Drivers should call this function in their hard_xmit()
 * function immediately before giving the sk_buff to the MAC hardware.
 *
 * Specifically, one should make absolutely sure that this function is
 * called before TX completion of this packet can trigger.  Otherwise
 * the packet could potentially already be freed.
 *
 * @skb: A socket buffer.
 */
static inline void skb_tx_timestamp(struct sk_buff *skb)
{
        skb_clone_tx_timestamp(skb);
        if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP)
                skb_tstamp_tx(skb, NULL);
}

/**
 * skb_complete_wifi_ack - deliver skb with wifi status
 *
 * @skb: the original outgoing packet
 * @acked: ack status
 *
 */
void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);

__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
__sum16 __skb_checksum_complete(struct sk_buff *skb);

static inline int skb_csum_unnecessary(const struct sk_buff *skb)
{
        return ((skb->ip_summed == CHECKSUM_UNNECESSARY) ||
                skb->csum_valid ||
                (skb->ip_summed == CHECKSUM_PARTIAL &&
                 skb_checksum_start_offset(skb) >= 0));
}

/**
 *        skb_checksum_complete - Calculate checksum of an entire packet
 *        @skb: packet to process
 *
 *        This function calculates the checksum over the entire packet plus
 *        the value of skb->csum.  The latter can be used to supply the
 *        checksum of a pseudo header as used by TCP/UDP.  It returns the
 *        checksum.
 *
 *        For protocols that contain complete checksums such as ICMP/TCP/UDP,
 *        this function can be used to verify that checksum on received
 *        packets.  In that case the function should return zero if the
 *        checksum is correct.  In particular, this function will return zero
 *        if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
 *        hardware has already verified the correctness of the checksum.
 */
static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
{
        return skb_csum_unnecessary(skb) ?
               0 : __skb_checksum_complete(skb);
}

static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
{
        if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
                if (skb->csum_level == 0)
                        skb->ip_summed = CHECKSUM_NONE;
                else
                        skb->csum_level--;
        }
}

static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
{
        if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
                if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
                        skb->csum_level++;
        } else if (skb->ip_summed == CHECKSUM_NONE) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
                skb->csum_level = 0;
        }
}

static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb)
{
        if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
                skb->ip_summed = CHECKSUM_NONE;
                skb->csum_level = 0;
        }
}

/* Check if we need to perform checksum complete validation.
 *
 * Returns true if checksum complete is needed, false otherwise
 * (either checksum is unnecessary or zero checksum is allowed).
 */
static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
                                                  bool zero_okay,
                                                  __sum16 check)
{
        if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
                skb->csum_valid = 1;
                __skb_decr_checksum_unnecessary(skb);
                return false;
        }

        return true;
}

/* For small packets <= CHECKSUM_BREAK perform checksum complete directly
 * in checksum_init.
 */
#define CHECKSUM_BREAK 76

/* Unset checksum-complete
 *
 * Unset checksum complete can be done when packet is being modified
 * (uncompressed for instance) and checksum-complete value is
 * invalidated.
 */
static inline void skb_checksum_complete_unset(struct sk_buff *skb)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->ip_summed = CHECKSUM_NONE;
}

/* Validate (init) checksum based on checksum complete.
 *
 * Return values:
 *   0: checksum is validated or try to in skb_checksum_complete. In the latter
 *        case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
 *        checksum is stored in skb->csum for use in __skb_checksum_complete
 *   non-zero: value of invalid checksum
 *
 */
static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
                                                       bool complete,
                                                       __wsum psum)
{
        if (skb->ip_summed == CHECKSUM_COMPLETE) {
                if (!csum_fold(csum_add(psum, skb->csum))) {
                        skb->csum_valid = 1;
                        return 0;
                }
        }

        skb->csum = psum;

        if (complete || skb->len <= CHECKSUM_BREAK) {
                __sum16 csum;

                csum = __skb_checksum_complete(skb);
                skb->csum_valid = !csum;
                return csum;
        }

        return 0;
}

static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
{
        return 0;
}

/* Perform checksum validate (init). Note that this is a macro since we only
 * want to calculate the pseudo header which is an input function if necessary.
 * First we try to validate without any computation (checksum unnecessary) and
 * then calculate based on checksum complete calling the function to compute
 * pseudo header.
 *
 * Return values:
 *   0: checksum is validated or try to in skb_checksum_complete
 *   non-zero: value of invalid checksum
 */
#define __skb_checksum_validate(skb, proto, complete,                        \
                                zero_okay, check, compute_pseudo)        \
({                                                                        \
        __sum16 __ret = 0;                                                \
        skb->csum_valid = 0;                                                \
        if (__skb_checksum_validate_needed(skb, zero_okay, check))        \
                __ret = __skb_checksum_validate_complete(skb,                \
                                complete, compute_pseudo(skb, proto));        \
        __ret;                                                                \
})

#define skb_checksum_init(skb, proto, compute_pseudo)                        \
        __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)

#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo)        \
        __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)

#define skb_checksum_validate(skb, proto, compute_pseudo)                \
        __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)

#define skb_checksum_validate_zero_check(skb, proto, check,                \
                                         compute_pseudo)                \
        __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo)

#define skb_checksum_simple_validate(skb)                                \
        __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)

static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
{
        return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
}

static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo)
{
        skb->csum = ~pseudo;
        skb->ip_summed = CHECKSUM_COMPLETE;
}

#define skb_checksum_try_convert(skb, proto, compute_pseudo)        \
do {                                                                        \
        if (__skb_checksum_convert_check(skb))                                \
                __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \
} while (0)

static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr,
                                              u16 start, u16 offset)
{
        skb->ip_summed = CHECKSUM_PARTIAL;
        skb->csum_start = ((unsigned char *)ptr + start) - skb->head;
        skb->csum_offset = offset - start;
}

/* Update skbuf and packet to reflect the remote checksum offload operation.
 * When called, ptr indicates the starting point for skb->csum when
 * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
 * here, skb_postpull_rcsum is done so skb->csum start is ptr.
 */
static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
                                       int start, int offset, bool nopartial)
{
        __wsum delta;

        if (!nopartial) {
                skb_remcsum_adjust_partial(skb, ptr, start, offset);
                return;
        }

        if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
                __skb_checksum_complete(skb);
                skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
        }

        delta = remcsum_adjust(ptr, skb->csum, start, offset);

        /* Adjust skb->csum since we changed the packet */
        skb->csum = csum_add(skb->csum, delta);
}

static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        return (void *)(skb->_nfct & NFCT_PTRMASK);
#else
        return NULL;
#endif
}

static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        return skb->_nfct;
#else
        return 0UL;
#endif
}

static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        skb->slow_gro |= !!nfct;
        skb->_nfct = nfct;
#endif
}

#ifdef CONFIG_SKB_EXTENSIONS
enum skb_ext_id {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        SKB_EXT_BRIDGE_NF,
#endif
#ifdef CONFIG_XFRM
        SKB_EXT_SEC_PATH,
#endif
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
        TC_SKB_EXT,
#endif
#if IS_ENABLED(CONFIG_MPTCP)
        SKB_EXT_MPTCP,
#endif
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
        SKB_EXT_MCTP,
#endif
        SKB_EXT_NUM, /* must be last */
};

/**
 *        struct skb_ext - sk_buff extensions
 *        @refcnt: 1 on allocation, deallocated on 0
 *        @offset: offset to add to @data to obtain extension address
 *        @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
 *        @data: start of extension data, variable sized
 *
 *        Note: offsets/lengths are stored in chunks of 8 bytes, this allows
 *        to use 'u8' types while allowing up to 2kb worth of extension data.
 */
struct skb_ext {
        refcount_t refcnt;
        u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
        u8 chunks;                /* same */
        char data[] __aligned(8);
};

struct skb_ext *__skb_ext_alloc(gfp_t flags);
void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
                    struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);

static inline void skb_ext_put(struct sk_buff *skb)
{
        if (skb->active_extensions)
                __skb_ext_put(skb->extensions);
}

static inline void __skb_ext_copy(struct sk_buff *dst,
                                  const struct sk_buff *src)
{
        dst->active_extensions = src->active_extensions;

        if (src->active_extensions) {
                struct skb_ext *ext = src->extensions;

                refcount_inc(&ext->refcnt);
                dst->extensions = ext;
        }
}

static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src)
{
        skb_ext_put(dst);
        __skb_ext_copy(dst, src);
}

static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i)
{
        return !!ext->offset[i];
}

static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id)
{
        return skb->active_extensions & (1 << id);
}

static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
        if (skb_ext_exist(skb, id))
                __skb_ext_del(skb, id);
}

static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
{
        if (skb_ext_exist(skb, id)) {
                struct skb_ext *ext = skb->extensions;

                return (void *)ext + (ext->offset[id] << 3);
        }

        return NULL;
}

static inline void skb_ext_reset(struct sk_buff *skb)
{
        if (unlikely(skb->active_extensions)) {
                __skb_ext_put(skb->extensions);
                skb->active_extensions = 0;
        }
}

static inline bool skb_has_extensions(struct sk_buff *skb)
{
        return unlikely(skb->active_extensions);
}
#else
static inline void skb_ext_put(struct sk_buff *skb) {}
static inline void skb_ext_reset(struct sk_buff *skb) {}
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
static inline bool skb_has_extensions(struct sk_buff *skb) { return false; }
#endif /* CONFIG_SKB_EXTENSIONS */

static inline void nf_reset_ct(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb_nfct(skb));
        skb->_nfct = 0;
#endif
}

static inline void nf_reset_trace(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
        skb->nf_trace = 0;
#endif
}

static inline void ipvs_reset(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IP_VS)
        skb->ipvs_property = 0;
#endif
}

/* Note: This doesn't put any conntrack info in dst. */
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
                             bool copy)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        dst->_nfct = src->_nfct;
        nf_conntrack_get(skb_nfct(src));
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
        if (copy)
                dst->nf_trace = src->nf_trace;
#endif
}

static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb_nfct(dst));
#endif
        dst->slow_gro = src->slow_gro;
        __nf_copy(dst, src, true);
}

#ifdef CONFIG_NETWORK_SECMARK
static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
{
        to->secmark = from->secmark;
}

static inline void skb_init_secmark(struct sk_buff *skb)
{
        skb->secmark = 0;
}
#else
static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
{ }

static inline void skb_init_secmark(struct sk_buff *skb)
{ }
#endif

static inline int secpath_exists(const struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
        return skb_ext_exist(skb, SKB_EXT_SEC_PATH);
#else
        return 0;
#endif
}

static inline bool skb_irq_freeable(const struct sk_buff *skb)
{
        return !skb->destructor &&
                !secpath_exists(skb) &&
                !skb_nfct(skb) &&
                !skb->_skb_refdst &&
                !skb_has_frag_list(skb);
}

static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
{
        skb->queue_mapping = queue_mapping;
}

static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
{
        return skb->queue_mapping;
}

static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
{
        to->queue_mapping = from->queue_mapping;
}

static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)
{
        skb->queue_mapping = rx_queue + 1;
}

static inline u16 skb_get_rx_queue(const struct sk_buff *skb)
{
        return skb->queue_mapping - 1;
}

static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
{
        return skb->queue_mapping != 0;
}

static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
{
        skb->dst_pending_confirm = val;
}

static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
{
        return skb->dst_pending_confirm != 0;
}

static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
        return skb_ext_find(skb, SKB_EXT_SEC_PATH);
#else
        return NULL;
#endif
}

static inline bool skb_is_gso(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->gso_size;
}

/* Note: Should be called only if skb_is_gso(skb) is true */
static inline bool skb_is_gso_v6(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
}

/* Note: Should be called only if skb_is_gso(skb) is true */
static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
}

/* Note: Should be called only if skb_is_gso(skb) is true */
static inline bool skb_is_gso_tcp(const struct sk_buff *skb)
{
        return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
}

static inline void skb_gso_reset(struct sk_buff *skb)
{
        skb_shinfo(skb)->gso_size = 0;
        skb_shinfo(skb)->gso_segs = 0;
        skb_shinfo(skb)->gso_type = 0;
}

static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
                                         u16 increment)
{
        if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
                return;
        shinfo->gso_size += increment;
}

static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
                                         u16 decrement)
{
        if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
                return;
        shinfo->gso_size -= decrement;
}

void __skb_warn_lro_forwarding(const struct sk_buff *skb);

static inline bool skb_warn_if_lro(const struct sk_buff *skb)
{
        /* LRO sets gso_size but not gso_type, whereas if GSO is really
         * wanted then gso_type will be set. */
        const struct skb_shared_info *shinfo = skb_shinfo(skb);

        if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
            unlikely(shinfo->gso_type == 0)) {
                __skb_warn_lro_forwarding(skb);
                return true;
        }
        return false;
}

static inline void skb_forward_csum(struct sk_buff *skb)
{
        /* Unfortunately we don't support this one.  Any brave souls? */
        if (skb->ip_summed == CHECKSUM_COMPLETE)
                skb->ip_summed = CHECKSUM_NONE;
}

/**
 * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE
 * @skb: skb to check
 *
 * fresh skbs have their ip_summed set to CHECKSUM_NONE.
 * Instead of forcing ip_summed to CHECKSUM_NONE, we can
 * use this helper, to document places where we make this assertion.
 */
static inline void skb_checksum_none_assert(const struct sk_buff *skb)
{
        DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE);
}

bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);

int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
                                     unsigned int transport_len,
                                     __sum16(*skb_chkf)(struct sk_buff *skb));

/**
 * skb_head_is_locked - Determine if the skb->head is locked down
 * @skb: skb to check
 *
 * The head on skbs build around a head frag can be removed if they are
 * not cloned.  This function returns true if the skb head is locked down
 * due to either being allocated via kmalloc, or by being a clone with
 * multiple references to the head.
 */
static inline bool skb_head_is_locked(const struct sk_buff *skb)
{
        return !skb->head_frag || skb_cloned(skb);
}

/* Local Checksum Offload.
 * Compute outer checksum based on the assumption that the
 * inner checksum will be offloaded later.
 * See Documentation/networking/checksum-offloads.rst for
 * explanation of how this works.
 * Fill in outer checksum adjustment (e.g. with sum of outer
 * pseudo-header) before calling.
 * Also ensure that inner checksum is in linear data area.
 */
static inline __wsum lco_csum(struct sk_buff *skb)
{
        unsigned char *csum_start = skb_checksum_start(skb);
        unsigned char *l4_hdr = skb_transport_header(skb);
        __wsum partial;

        /* Start with complement of inner checksum adjustment */
        partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
                                                    skb->csum_offset));

        /* Add in checksum of our headers (incl. outer checksum
         * adjustment filled in by caller) and return result.
         */
        return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
}

static inline bool skb_is_redirected(const struct sk_buff *skb)
{
        return skb->redirected;
}

static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
{
        skb->redirected = 1;
#ifdef CONFIG_NET_REDIRECT
        skb->from_ingress = from_ingress;
        if (skb->from_ingress)
                skb_clear_tstamp(skb);
#endif
}

static inline void skb_reset_redirect(struct sk_buff *skb)
{
        skb->redirected = 0;
}

static inline void skb_set_redirected_noclear(struct sk_buff *skb,
                                              bool from_ingress)
{
        skb->redirected = 1;
#ifdef CONFIG_NET_REDIRECT
        skb->from_ingress = from_ingress;
#endif
}

static inline bool skb_csum_is_sctp(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IP_SCTP)
        return skb->csum_not_inet;
#else
        return 0;
#endif
}

static inline void skb_reset_csum_not_inet(struct sk_buff *skb)
{
        skb->ip_summed = CHECKSUM_NONE;
#if IS_ENABLED(CONFIG_IP_SCTP)
        skb->csum_not_inet = 0;
#endif
}

static inline void skb_set_kcov_handle(struct sk_buff *skb,
                                       const u64 kcov_handle)
{
#ifdef CONFIG_KCOV
        skb->kcov_handle = kcov_handle;
#endif
}

static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
{
#ifdef CONFIG_KCOV
        return skb->kcov_handle;
#else
        return 0;
#endif
}

static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
#ifdef CONFIG_PAGE_POOL
        skb->pp_recycle = 1;
#endif
}

ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
                             ssize_t maxsize, gfp_t gfp);

#endif        /* __KERNEL__ */
#endif        /* _LINUX_SKBUFF_H */














































    3 












  232 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM notifier

#if !defined(_TRACE_NOTIFIERS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_NOTIFIERS_H

#include <linux/tracepoint.h>

DECLARE_EVENT_CLASS(notifier_info,

        TP_PROTO(void *cb),

        TP_ARGS(cb),

        TP_STRUCT__entry(
                __field(void *, cb)
        ),

        TP_fast_assign(
                __entry->cb = cb;
        ),

        TP_printk("%ps", __entry->cb)
);

/*
 * notifier_register - called upon notifier callback registration
 *
 * @cb:                callback pointer
 *
 */
DEFINE_EVENT(notifier_info, notifier_register,

        TP_PROTO(void *cb),

        TP_ARGS(cb)
);

/*
 * notifier_unregister - called upon notifier callback unregistration
 *
 * @cb:                callback pointer
 *
 */
DEFINE_EVENT(notifier_info, notifier_unregister,

        TP_PROTO(void *cb),

        TP_ARGS(cb)
);

/*
 * notifier_run - called upon notifier callback execution
 *
 * @cb:                callback pointer
 *
 */
DEFINE_EVENT(notifier_info, notifier_run,

        TP_PROTO(void *cb),

        TP_ARGS(cb)
);

#endif /* _TRACE_NOTIFIERS_H */

/* This part must be outside protection */
#include <trace/define_trace.h>






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    9 















    7 









    6 














































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_H
#define _LINUX_SCHED_H

/*
 * Define 'struct task_struct' and provide the main scheduler
 * APIs (schedule(), wakeup variants, etc.)
 */

#include <uapi/linux/sched.h>

#include <asm/current.h>
#include <asm/processor.h>
#include <linux/thread_info.h>
#include <linux/preempt.h>
#include <linux/cpumask.h>

#include <linux/cache.h>
#include <linux/irqflags_types.h>
#include <linux/smp_types.h>
#include <linux/pid_types.h>
#include <linux/sem_types.h>
#include <linux/shm.h>
#include <linux/kmsan_types.h>
#include <linux/mutex_types.h>
#include <linux/plist_types.h>
#include <linux/hrtimer_types.h>
#include <linux/timer_types.h>
#include <linux/seccomp_types.h>
#include <linux/nodemask_types.h>
#include <linux/refcount_types.h>
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
#include <linux/syscall_user_dispatch_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
#include <uapi/linux/rseq.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
#include <linux/livepatch_sched.h>
#include <linux/uidgid_types.h>
#include <asm/kmap_size.h>

/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
struct futex_pi_state;
struct io_context;
struct io_uring_task;
struct mempolicy;
struct nameidata;
struct nsproxy;
struct perf_event_context;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
struct robust_list_head;
struct root_domain;
struct rq;
struct sched_attr;
struct sched_dl_entity;
struct seq_file;
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
struct task_struct;
struct user_event_mm;

/*
 * Task state bitmask. NOTE! These bits are also
 * encoded in fs/proc/array.c: get_task_state().
 *
 * We have two separate sets of flags: task->__state
 * is about runnability, while task->exit_state are
 * about the task exiting. Confusing, but this way
 * modifying one set can't modify the other one by
 * mistake.
 */

/* Used in tsk->__state: */
#define TASK_RUNNING                        0x00000000
#define TASK_INTERRUPTIBLE                0x00000001
#define TASK_UNINTERRUPTIBLE                0x00000002
#define __TASK_STOPPED                        0x00000004
#define __TASK_TRACED                        0x00000008
/* Used in tsk->exit_state: */
#define EXIT_DEAD                        0x00000010
#define EXIT_ZOMBIE                        0x00000020
#define EXIT_TRACE                        (EXIT_ZOMBIE | EXIT_DEAD)
/* Used in tsk->__state again: */
#define TASK_PARKED                        0x00000040
#define TASK_DEAD                        0x00000080
#define TASK_WAKEKILL                        0x00000100
#define TASK_WAKING                        0x00000200
#define TASK_NOLOAD                        0x00000400
#define TASK_NEW                        0x00000800
#define TASK_RTLOCK_WAIT                0x00001000
#define TASK_FREEZABLE                        0x00002000
#define __TASK_FREEZABLE_UNSAFE               (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
#define TASK_FROZEN                        0x00008000
#define TASK_STATE_MAX                        0x00010000

#define TASK_ANY                        (TASK_STATE_MAX-1)

/*
 * DO NOT ADD ANY NEW USERS !
 */
#define TASK_FREEZABLE_UNSAFE                (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE)

/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE                        (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED                        (TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED                        __TASK_TRACED

#define TASK_IDLE                        (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL                        (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)

/* get_task_state(): */
#define TASK_REPORT                        (TASK_RUNNING | TASK_INTERRUPTIBLE | \
                                         TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
                                         __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
                                         TASK_PARKED)

#define task_is_running(task)                (READ_ONCE((task)->__state) == TASK_RUNNING)

#define task_is_traced(task)                ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
#define task_is_stopped(task)                ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
#define task_is_stopped_or_traced(task)        ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0)

/*
 * Special states are those that do not use the normal wait-loop pattern. See
 * the comment with set_special_state().
 */
#define is_special_task_state(state)                                \
        ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define debug_normal_state_change(state_value)                                \
        do {                                                                \
                WARN_ON_ONCE(is_special_task_state(state_value));        \
                current->task_state_change = _THIS_IP_;                        \
        } while (0)

# define debug_special_state_change(state_value)                        \
        do {                                                                \
                WARN_ON_ONCE(!is_special_task_state(state_value));        \
                current->task_state_change = _THIS_IP_;                        \
        } while (0)

# define debug_rtlock_wait_set_state()                                        \
        do {                                                                 \
                current->saved_state_change = current->task_state_change;\
                current->task_state_change = _THIS_IP_;                         \
        } while (0)

# define debug_rtlock_wait_restore_state()                                \
        do {                                                                 \
                current->task_state_change = current->saved_state_change;\
        } while (0)

#else
# define debug_normal_state_change(cond)        do { } while (0)
# define debug_special_state_change(cond)        do { } while (0)
# define debug_rtlock_wait_set_state()                do { } while (0)
# define debug_rtlock_wait_restore_state()        do { } while (0)
#endif

/*
 * set_current_state() includes a barrier so that the write of current->__state
 * is correctly serialised wrt the caller's subsequent test of whether to
 * actually sleep:
 *
 *   for (;;) {
 *        set_current_state(TASK_UNINTERRUPTIBLE);
 *        if (CONDITION)
 *           break;
 *
 *        schedule();
 *   }
 *   __set_current_state(TASK_RUNNING);
 *
 * If the caller does not need such serialisation (because, for instance, the
 * CONDITION test and condition change and wakeup are under the same lock) then
 * use __set_current_state().
 *
 * The above is typically ordered against the wakeup, which does:
 *
 *   CONDITION = 1;
 *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
 *
 * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
 * accessing p->__state.
 *
 * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
 *
 * However, with slightly different timing the wakeup TASK_RUNNING store can
 * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
 * a problem either because that will result in one extra go around the loop
 * and our @cond test will save the day.
 *
 * Also see the comments of try_to_wake_up().
 */
#define __set_current_state(state_value)                                \
        do {                                                                \
                debug_normal_state_change((state_value));                \
                WRITE_ONCE(current->__state, (state_value));                \
        } while (0)

#define set_current_state(state_value)                                        \
        do {                                                                \
                debug_normal_state_change((state_value));                \
                smp_store_mb(current->__state, (state_value));                \
        } while (0)

/*
 * set_special_state() should be used for those states when the blocking task
 * can not use the regular condition based wait-loop. In that case we must
 * serialize against wakeups such that any possible in-flight TASK_RUNNING
 * stores will not collide with our state change.
 */
#define set_special_state(state_value)                                        \
        do {                                                                \
                unsigned long flags; /* may shadow */                        \
                                                                        \
                raw_spin_lock_irqsave(&current->pi_lock, flags);        \
                debug_special_state_change((state_value));                \
                WRITE_ONCE(current->__state, (state_value));                \
                raw_spin_unlock_irqrestore(&current->pi_lock, flags);        \
        } while (0)

/*
 * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
 *
 * RT's spin/rwlock substitutions are state preserving. The state of the
 * task when blocking on the lock is saved in task_struct::saved_state and
 * restored after the lock has been acquired.  These operations are
 * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
 * lock related wakeups while the task is blocked on the lock are
 * redirected to operate on task_struct::saved_state to ensure that these
 * are not dropped. On restore task_struct::saved_state is set to
 * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
 *
 * The lock operation looks like this:
 *
 *        current_save_and_set_rtlock_wait_state();
 *        for (;;) {
 *                if (try_lock())
 *                        break;
 *                raw_spin_unlock_irq(&lock->wait_lock);
 *                schedule_rtlock();
 *                raw_spin_lock_irq(&lock->wait_lock);
 *                set_current_state(TASK_RTLOCK_WAIT);
 *        }
 *        current_restore_rtlock_saved_state();
 */
#define current_save_and_set_rtlock_wait_state()                        \
        do {                                                                \
                lockdep_assert_irqs_disabled();                                \
                raw_spin_lock(&current->pi_lock);                        \
                current->saved_state = current->__state;                \
                debug_rtlock_wait_set_state();                                \
                WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT);                \
                raw_spin_unlock(&current->pi_lock);                        \
        } while (0);

#define current_restore_rtlock_saved_state()                                \
        do {                                                                \
                lockdep_assert_irqs_disabled();                                \
                raw_spin_lock(&current->pi_lock);                        \
                debug_rtlock_wait_restore_state();                        \
                WRITE_ONCE(current->__state, current->saved_state);        \
                current->saved_state = TASK_RUNNING;                        \
                raw_spin_unlock(&current->pi_lock);                        \
        } while (0);

#define get_current_state()        READ_ONCE(current->__state)

/*
 * Define the task command name length as enum, then it can be visible to
 * BPF programs.
 */
enum {
        TASK_COMM_LEN = 16,
};

extern void scheduler_tick(void);

#define        MAX_SCHEDULE_TIMEOUT                LONG_MAX

extern long schedule_timeout(long timeout);
extern long schedule_timeout_interruptible(long timeout);
extern long schedule_timeout_killable(long timeout);
extern long schedule_timeout_uninterruptible(long timeout);
extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
#ifdef CONFIG_PREEMPT_RT
 extern void schedule_rtlock(void);
#endif

extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);

/**
 * struct prev_cputime - snapshot of system and user cputime
 * @utime: time spent in user mode
 * @stime: time spent in system mode
 * @lock: protects the above two fields
 *
 * Stores previous user/system time values such that we can guarantee
 * monotonicity.
 */
struct prev_cputime {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        u64                                utime;
        u64                                stime;
        raw_spinlock_t                        lock;
#endif
};

enum vtime_state {
        /* Task is sleeping or running in a CPU with VTIME inactive: */
        VTIME_INACTIVE = 0,
        /* Task is idle */
        VTIME_IDLE,
        /* Task runs in kernelspace in a CPU with VTIME active: */
        VTIME_SYS,
        /* Task runs in userspace in a CPU with VTIME active: */
        VTIME_USER,
        /* Task runs as guests in a CPU with VTIME active: */
        VTIME_GUEST,
};

struct vtime {
        seqcount_t                seqcount;
        unsigned long long        starttime;
        enum vtime_state        state;
        unsigned int                cpu;
        u64                        utime;
        u64                        stime;
        u64                        gtime;
};

/*
 * Utilization clamp constraints.
 * @UCLAMP_MIN:        Minimum utilization
 * @UCLAMP_MAX:        Maximum utilization
 * @UCLAMP_CNT:        Utilization clamp constraints count
 */
enum uclamp_id {
        UCLAMP_MIN = 0,
        UCLAMP_MAX,
        UCLAMP_CNT
};

#ifdef CONFIG_SMP
extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;
#endif

struct sched_param {
        int sched_priority;
};

struct sched_info {
#ifdef CONFIG_SCHED_INFO
        /* Cumulative counters: */

        /* # of times we have run on this CPU: */
        unsigned long                        pcount;

        /* Time spent waiting on a runqueue: */
        unsigned long long                run_delay;

        /* Timestamps: */

        /* When did we last run on a CPU? */
        unsigned long long                last_arrival;

        /* When were we last queued to run? */
        unsigned long long                last_queued;

#endif /* CONFIG_SCHED_INFO */
};

/*
 * Integer metrics need fixed point arithmetic, e.g., sched/fair
 * has a few: load, load_avg, util_avg, freq, and capacity.
 *
 * We define a basic fixed point arithmetic range, and then formalize
 * all these metrics based on that basic range.
 */
# define SCHED_FIXEDPOINT_SHIFT                10
# define SCHED_FIXEDPOINT_SCALE                (1L << SCHED_FIXEDPOINT_SHIFT)

/* Increase resolution of cpu_capacity calculations */
# define SCHED_CAPACITY_SHIFT                SCHED_FIXEDPOINT_SHIFT
# define SCHED_CAPACITY_SCALE                (1L << SCHED_CAPACITY_SHIFT)

struct load_weight {
        unsigned long                        weight;
        u32                                inv_weight;
};

/*
 * The load/runnable/util_avg accumulates an infinite geometric series
 * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
 *
 * [load_avg definition]
 *
 *   load_avg = runnable% * scale_load_down(load)
 *
 * [runnable_avg definition]
 *
 *   runnable_avg = runnable% * SCHED_CAPACITY_SCALE
 *
 * [util_avg definition]
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE
 *
 * where runnable% is the time ratio that a sched_entity is runnable and
 * running% the time ratio that a sched_entity is running.
 *
 * For cfs_rq, they are the aggregated values of all runnable and blocked
 * sched_entities.
 *
 * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
 * capacity scaling. The scaling is done through the rq_clock_pelt that is used
 * for computing those signals (see update_rq_clock_pelt())
 *
 * N.B., the above ratios (runnable% and running%) themselves are in the
 * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
 * to as large a range as necessary. This is for example reflected by
 * util_avg's SCHED_CAPACITY_SCALE.
 *
 * [Overflow issue]
 *
 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
 * with the highest load (=88761), always runnable on a single cfs_rq,
 * and should not overflow as the number already hits PID_MAX_LIMIT.
 *
 * For all other cases (including 32-bit kernels), struct load_weight's
 * weight will overflow first before we do, because:
 *
 *    Max(load_avg) <= Max(load.weight)
 *
 * Then it is the load_weight's responsibility to consider overflow
 * issues.
 */
struct sched_avg {
        u64                                last_update_time;
        u64                                load_sum;
        u64                                runnable_sum;
        u32                                util_sum;
        u32                                period_contrib;
        unsigned long                        load_avg;
        unsigned long                        runnable_avg;
        unsigned long                        util_avg;
        unsigned int                        util_est;
} ____cacheline_aligned;

/*
 * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
 * updates. When a task is dequeued, its util_est should not be updated if its
 * util_avg has not been updated in the meantime.
 * This information is mapped into the MSB bit of util_est at dequeue time.
 * Since max value of util_est for a task is 1024 (PELT util_avg for a task)
 * it is safe to use MSB.
 */
#define UTIL_EST_WEIGHT_SHIFT                2
#define UTIL_AVG_UNCHANGED                0x80000000

struct sched_statistics {
#ifdef CONFIG_SCHEDSTATS
        u64                                wait_start;
        u64                                wait_max;
        u64                                wait_count;
        u64                                wait_sum;
        u64                                iowait_count;
        u64                                iowait_sum;

        u64                                sleep_start;
        u64                                sleep_max;
        s64                                sum_sleep_runtime;

        u64                                block_start;
        u64                                block_max;
        s64                                sum_block_runtime;

        s64                                exec_max;
        u64                                slice_max;

        u64                                nr_migrations_cold;
        u64                                nr_failed_migrations_affine;
        u64                                nr_failed_migrations_running;
        u64                                nr_failed_migrations_hot;
        u64                                nr_forced_migrations;

        u64                                nr_wakeups;
        u64                                nr_wakeups_sync;
        u64                                nr_wakeups_migrate;
        u64                                nr_wakeups_local;
        u64                                nr_wakeups_remote;
        u64                                nr_wakeups_affine;
        u64                                nr_wakeups_affine_attempts;
        u64                                nr_wakeups_passive;
        u64                                nr_wakeups_idle;

#ifdef CONFIG_SCHED_CORE
        u64                                core_forceidle_sum;
#endif
#endif /* CONFIG_SCHEDSTATS */
} ____cacheline_aligned;

struct sched_entity {
        /* For load-balancing: */
        struct load_weight                load;
        struct rb_node                        run_node;
        u64                                deadline;
        u64                                min_vruntime;

        struct list_head                group_node;
        unsigned int                        on_rq;

        u64                                exec_start;
        u64                                sum_exec_runtime;
        u64                                prev_sum_exec_runtime;
        u64                                vruntime;
        s64                                vlag;
        u64                                slice;

        u64                                nr_migrations;

#ifdef CONFIG_FAIR_GROUP_SCHED
        int                                depth;
        struct sched_entity                *parent;
        /* rq on which this entity is (to be) queued: */
        struct cfs_rq                        *cfs_rq;
        /* rq "owned" by this entity/group: */
        struct cfs_rq                        *my_q;
        /* cached value of my_q->h_nr_running */
        unsigned long                        runnable_weight;
#endif

#ifdef CONFIG_SMP
        /*
         * Per entity load average tracking.
         *
         * Put into separate cache line so it does not
         * collide with read-mostly values above.
         */
        struct sched_avg                avg;
#endif
};

struct sched_rt_entity {
        struct list_head                run_list;
        unsigned long                        timeout;
        unsigned long                        watchdog_stamp;
        unsigned int                        time_slice;
        unsigned short                        on_rq;
        unsigned short                        on_list;

        struct sched_rt_entity                *back;
#ifdef CONFIG_RT_GROUP_SCHED
        struct sched_rt_entity                *parent;
        /* rq on which this entity is (to be) queued: */
        struct rt_rq                        *rt_rq;
        /* rq "owned" by this entity/group: */
        struct rt_rq                        *my_q;
#endif
} __randomize_layout;

typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);

struct sched_dl_entity {
        struct rb_node                        rb_node;

        /*
         * Original scheduling parameters. Copied here from sched_attr
         * during sched_setattr(), they will remain the same until
         * the next sched_setattr().
         */
        u64                                dl_runtime;        /* Maximum runtime for each instance        */
        u64                                dl_deadline;        /* Relative deadline of each instance        */
        u64                                dl_period;        /* Separation of two instances (period) */
        u64                                dl_bw;                /* dl_runtime / dl_period                */
        u64                                dl_density;        /* dl_runtime / dl_deadline                */

        /*
         * Actual scheduling parameters. Initialized with the values above,
         * they are continuously updated during task execution. Note that
         * the remaining runtime could be < 0 in case we are in overrun.
         */
        s64                                runtime;        /* Remaining runtime for this instance        */
        u64                                deadline;        /* Absolute deadline for this instance        */
        unsigned int                        flags;                /* Specifying the scheduler behaviour        */

        /*
         * Some bool flags:
         *
         * @dl_throttled tells if we exhausted the runtime. If so, the
         * task has to wait for a replenishment to be performed at the
         * next firing of dl_timer.
         *
         * @dl_yielded tells if task gave up the CPU before consuming
         * all its available runtime during the last job.
         *
         * @dl_non_contending tells if the task is inactive while still
         * contributing to the active utilization. In other words, it
         * indicates if the inactive timer has been armed and its handler
         * has not been executed yet. This flag is useful to avoid race
         * conditions between the inactive timer handler and the wakeup
         * code.
         *
         * @dl_overrun tells if the task asked to be informed about runtime
         * overruns.
         */
        unsigned int                        dl_throttled      : 1;
        unsigned int                        dl_yielded        : 1;
        unsigned int                        dl_non_contending : 1;
        unsigned int                        dl_overrun          : 1;
        unsigned int                        dl_server         : 1;

        /*
         * Bandwidth enforcement timer. Each -deadline task has its
         * own bandwidth to be enforced, thus we need one timer per task.
         */
        struct hrtimer                        dl_timer;

        /*
         * Inactive timer, responsible for decreasing the active utilization
         * at the "0-lag time". When a -deadline task blocks, it contributes
         * to GRUB's active utilization until the "0-lag time", hence a
         * timer is needed to decrease the active utilization at the correct
         * time.
         */
        struct hrtimer                        inactive_timer;

        /*
         * Bits for DL-server functionality. Also see the comment near
         * dl_server_update().
         *
         * @rq the runqueue this server is for
         *
         * @server_has_tasks() returns true if @server_pick return a
         * runnable task.
         */
        struct rq                        *rq;
        dl_server_has_tasks_f                server_has_tasks;
        dl_server_pick_f                server_pick;

#ifdef CONFIG_RT_MUTEXES
        /*
         * Priority Inheritance. When a DEADLINE scheduling entity is boosted
         * pi_se points to the donor, otherwise points to the dl_se it belongs
         * to (the original one/itself).
         */
        struct sched_dl_entity *pi_se;
#endif
};

#ifdef CONFIG_UCLAMP_TASK
/* Number of utilization clamp buckets (shorter alias) */
#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT

/*
 * Utilization clamp for a scheduling entity
 * @value:                clamp value "assigned" to a se
 * @bucket_id:                bucket index corresponding to the "assigned" value
 * @active:                the se is currently refcounted in a rq's bucket
 * @user_defined:        the requested clamp value comes from user-space
 *
 * The bucket_id is the index of the clamp bucket matching the clamp value
 * which is pre-computed and stored to avoid expensive integer divisions from
 * the fast path.
 *
 * The active bit is set whenever a task has got an "effective" value assigned,
 * which can be different from the clamp value "requested" from user-space.
 * This allows to know a task is refcounted in the rq's bucket corresponding
 * to the "effective" bucket_id.
 *
 * The user_defined bit is set whenever a task has got a task-specific clamp
 * value requested from userspace, i.e. the system defaults apply to this task
 * just as a restriction. This allows to relax default clamps when a less
 * restrictive task-specific value has been requested, thus allowing to
 * implement a "nice" semantic. For example, a task running with a 20%
 * default boost can still drop its own boosting to 0%.
 */
struct uclamp_se {
        unsigned int value                : bits_per(SCHED_CAPACITY_SCALE);
        unsigned int bucket_id                : bits_per(UCLAMP_BUCKETS);
        unsigned int active                : 1;
        unsigned int user_defined        : 1;
};
#endif /* CONFIG_UCLAMP_TASK */

union rcu_special {
        struct {
                u8                        blocked;
                u8                        need_qs;
                u8                        exp_hint; /* Hint for performance. */
                u8                        need_mb; /* Readers need smp_mb(). */
        } b; /* Bits. */
        u32 s; /* Set of bits. */
};

enum perf_event_task_context {
        perf_invalid_context = -1,
        perf_hw_context = 0,
        perf_sw_context,
        perf_nr_task_contexts,
};

struct wake_q_node {
        struct wake_q_node *next;
};

struct kmap_ctrl {
#ifdef CONFIG_KMAP_LOCAL
        int                                idx;
        pte_t                                pteval[KM_MAX_IDX];
#endif
};

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
        /*
         * For reasons of header soup (see current_thread_info()), this
         * must be the first element of task_struct.
         */
        struct thread_info                thread_info;
#endif
        unsigned int                        __state;

        /* saved state for "spinlock sleepers" */
        unsigned int                        saved_state;

        /*
         * This begins the randomizable portion of task_struct. Only
         * scheduling-critical items should be added above here.
         */
        randomized_struct_fields_start

        void                                *stack;
        refcount_t                        usage;
        /* Per task flags (PF_*), defined further below: */
        unsigned int                        flags;
        unsigned int                        ptrace;

#ifdef CONFIG_SMP
        int                                on_cpu;
        struct __call_single_node        wake_entry;
        unsigned int                        wakee_flips;
        unsigned long                        wakee_flip_decay_ts;
        struct task_struct                *last_wakee;

        /*
         * recent_used_cpu is initially set as the last CPU used by a task
         * that wakes affine another task. Waker/wakee relationships can
         * push tasks around a CPU where each wakeup moves to the next one.
         * Tracking a recently used CPU allows a quick search for a recently
         * used CPU that may be idle.
         */
        int                                recent_used_cpu;
        int                                wake_cpu;
#endif
        int                                on_rq;

        int                                prio;
        int                                static_prio;
        int                                normal_prio;
        unsigned int                        rt_priority;

        struct sched_entity                se;
        struct sched_rt_entity                rt;
        struct sched_dl_entity                dl;
        struct sched_dl_entity                *dl_server;
        const struct sched_class        *sched_class;

#ifdef CONFIG_SCHED_CORE
        struct rb_node                        core_node;
        unsigned long                        core_cookie;
        unsigned int                        core_occupation;
#endif

#ifdef CONFIG_CGROUP_SCHED
        struct task_group                *sched_task_group;
#endif

#ifdef CONFIG_UCLAMP_TASK
        /*
         * Clamp values requested for a scheduling entity.
         * Must be updated with task_rq_lock() held.
         */
        struct uclamp_se                uclamp_req[UCLAMP_CNT];
        /*
         * Effective clamp values used for a scheduling entity.
         * Must be updated with task_rq_lock() held.
         */
        struct uclamp_se                uclamp[UCLAMP_CNT];
#endif

        struct sched_statistics         stats;

#ifdef CONFIG_PREEMPT_NOTIFIERS
        /* List of struct preempt_notifier: */
        struct hlist_head                preempt_notifiers;
#endif

#ifdef CONFIG_BLK_DEV_IO_TRACE
        unsigned int                        btrace_seq;
#endif

        unsigned int                        policy;
        int                                nr_cpus_allowed;
        const cpumask_t                        *cpus_ptr;
        cpumask_t                        *user_cpus_ptr;
        cpumask_t                        cpus_mask;
        void                                *migration_pending;
#ifdef CONFIG_SMP
        unsigned short                        migration_disabled;
#endif
        unsigned short                        migration_flags;

#ifdef CONFIG_PREEMPT_RCU
        int                                rcu_read_lock_nesting;
        union rcu_special                rcu_read_unlock_special;
        struct list_head                rcu_node_entry;
        struct rcu_node                        *rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */

#ifdef CONFIG_TASKS_RCU
        unsigned long                        rcu_tasks_nvcsw;
        u8                                rcu_tasks_holdout;
        u8                                rcu_tasks_idx;
        int                                rcu_tasks_idle_cpu;
        struct list_head                rcu_tasks_holdout_list;
        int                                rcu_tasks_exit_cpu;
        struct list_head                rcu_tasks_exit_list;
#endif /* #ifdef CONFIG_TASKS_RCU */

#ifdef CONFIG_TASKS_TRACE_RCU
        int                                trc_reader_nesting;
        int                                trc_ipi_to_cpu;
        union rcu_special                trc_reader_special;
        struct list_head                trc_holdout_list;
        struct list_head                trc_blkd_node;
        int                                trc_blkd_cpu;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */

        struct sched_info                sched_info;

        struct list_head                tasks;
#ifdef CONFIG_SMP
        struct plist_node                pushable_tasks;
        struct rb_node                        pushable_dl_tasks;
#endif

        struct mm_struct                *mm;
        struct mm_struct                *active_mm;
        struct address_space                *faults_disabled_mapping;

        int                                exit_state;
        int                                exit_code;
        int                                exit_signal;
        /* The signal sent when the parent dies: */
        int                                pdeath_signal;
        /* JOBCTL_*, siglock protected: */
        unsigned long                        jobctl;

        /* Used for emulating ABI behavior of previous Linux versions: */
        unsigned int                        personality;

        /* Scheduler bits, serialized by scheduler locks: */
        unsigned                        sched_reset_on_fork:1;
        unsigned                        sched_contributes_to_load:1;
        unsigned                        sched_migrated:1;

        /* Force alignment to the next boundary: */
        unsigned                        :0;

        /* Unserialized, strictly 'current' */

        /*
         * This field must not be in the scheduler word above due to wakelist
         * queueing no longer being serialized by p->on_cpu. However:
         *
         * p->XXX = X;                        ttwu()
         * schedule()                          if (p->on_rq && ..) // false
         *   smp_mb__after_spinlock();          if (smp_load_acquire(&p->on_cpu) && //true
         *   deactivate_task()                      ttwu_queue_wakelist())
         *     p->on_rq = 0;                        p->sched_remote_wakeup = Y;
         *
         * guarantees all stores of 'current' are visible before
         * ->sched_remote_wakeup gets used, so it can be in this word.
         */
        unsigned                        sched_remote_wakeup:1;
#ifdef CONFIG_RT_MUTEXES
        unsigned                        sched_rt_mutex:1;
#endif

        /* Bit to tell TOMOYO we're in execve(): */
        unsigned                        in_execve:1;
        unsigned                        in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
        unsigned                        restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
        unsigned                        in_user_fault:1;
#endif
#ifdef CONFIG_LRU_GEN
        /* whether the LRU algorithm may apply to this access */
        unsigned                        in_lru_fault:1;
#endif
#ifdef CONFIG_COMPAT_BRK
        unsigned                        brk_randomized:1;
#endif
#ifdef CONFIG_CGROUPS
        /* disallow userland-initiated cgroup migration */
        unsigned                        no_cgroup_migration:1;
        /* task is frozen/stopped (used by the cgroup freezer) */
        unsigned                        frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
        unsigned                        use_memdelay:1;
#endif
#ifdef CONFIG_PSI
        /* Stalled due to lack of memory */
        unsigned                        in_memstall:1;
#endif
#ifdef CONFIG_PAGE_OWNER
        /* Used by page_owner=on to detect recursion in page tracking. */
        unsigned                        in_page_owner:1;
#endif
#ifdef CONFIG_EVENTFD
        /* Recursion prevention for eventfd_signal() */
        unsigned                        in_eventfd:1;
#endif
#ifdef CONFIG_ARCH_HAS_CPU_PASID
        unsigned                        pasid_activated:1;
#endif
#ifdef        CONFIG_CPU_SUP_INTEL
        unsigned                        reported_split_lock:1;
#endif
#ifdef CONFIG_TASK_DELAY_ACCT
        /* delay due to memory thrashing */
        unsigned                        in_thrashing:1;
#endif

        unsigned long                        atomic_flags; /* Flags requiring atomic access. */

        struct restart_block                restart_block;

        pid_t                                pid;
        pid_t                                tgid;

#ifdef CONFIG_STACKPROTECTOR
        /* Canary value for the -fstack-protector GCC feature: */
        unsigned long                        stack_canary;
#endif
        /*
         * Pointers to the (original) parent process, youngest child, younger sibling,
         * older sibling, respectively.  (p->father can be replaced with
         * p->real_parent->pid)
         */

        /* Real parent process: */
        struct task_struct __rcu        *real_parent;

        /* Recipient of SIGCHLD, wait4() reports: */
        struct task_struct __rcu        *parent;

        /*
         * Children/sibling form the list of natural children:
         */
        struct list_head                children;
        struct list_head                sibling;
        struct task_struct                *group_leader;

        /*
         * 'ptraced' is the list of tasks this task is using ptrace() on.
         *
         * This includes both natural children and PTRACE_ATTACH targets.
         * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
         */
        struct list_head                ptraced;
        struct list_head                ptrace_entry;

        /* PID/PID hash table linkage. */
        struct pid                        *thread_pid;
        struct hlist_node                pid_links[PIDTYPE_MAX];
        struct list_head                thread_node;

        struct completion                *vfork_done;

        /* CLONE_CHILD_SETTID: */
        int __user                        *set_child_tid;

        /* CLONE_CHILD_CLEARTID: */
        int __user                        *clear_child_tid;

        /* PF_KTHREAD | PF_IO_WORKER */
        void                                *worker_private;

        u64                                utime;
        u64                                stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
        u64                                utimescaled;
        u64                                stimescaled;
#endif
        u64                                gtime;
        struct prev_cputime                prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
        struct vtime                        vtime;
#endif

#ifdef CONFIG_NO_HZ_FULL
        atomic_t                        tick_dep_mask;
#endif
        /* Context switch counts: */
        unsigned long                        nvcsw;
        unsigned long                        nivcsw;

        /* Monotonic time in nsecs: */
        u64                                start_time;

        /* Boot based time in nsecs: */
        u64                                start_boottime;

        /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
        unsigned long                        min_flt;
        unsigned long                        maj_flt;

        /* Empty if CONFIG_POSIX_CPUTIMERS=n */
        struct posix_cputimers                posix_cputimers;

#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
        struct posix_cputimers_work        posix_cputimers_work;
#endif

        /* Process credentials: */

        /* Tracer's credentials at attach: */
        const struct cred __rcu                *ptracer_cred;

        /* Objective and real subjective task credentials (COW): */
        const struct cred __rcu                *real_cred;

        /* Effective (overridable) subjective task credentials (COW): */
        const struct cred __rcu                *cred;

#ifdef CONFIG_KEYS
        /* Cached requested key. */
        struct key                        *cached_requested_key;
#endif

        /*
         * executable name, excluding path.
         *
         * - normally initialized setup_new_exec()
         * - access it with [gs]et_task_comm()
         * - lock it with task_lock()
         */
        char                                comm[TASK_COMM_LEN];

        struct nameidata                *nameidata;

#ifdef CONFIG_SYSVIPC
        struct sysv_sem                        sysvsem;
        struct sysv_shm                        sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
        unsigned long                        last_switch_count;
        unsigned long                        last_switch_time;
#endif
        /* Filesystem information: */
        struct fs_struct                *fs;

        /* Open file information: */
        struct files_struct                *files;

#ifdef CONFIG_IO_URING
        struct io_uring_task                *io_uring;
#endif

        /* Namespaces: */
        struct nsproxy                        *nsproxy;

        /* Signal handlers: */
        struct signal_struct                *signal;
        struct sighand_struct __rcu                *sighand;
        sigset_t                        blocked;
        sigset_t                        real_blocked;
        /* Restored if set_restore_sigmask() was used: */
        sigset_t                        saved_sigmask;
        struct sigpending                pending;
        unsigned long                        sas_ss_sp;
        size_t                                sas_ss_size;
        unsigned int                        sas_ss_flags;

        struct callback_head                *task_works;

#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALL
        struct audit_context                *audit_context;
#endif
        kuid_t                                loginuid;
        unsigned int                        sessionid;
#endif
        struct seccomp                        seccomp;
        struct syscall_user_dispatch        syscall_dispatch;

        /* Thread group tracking: */
        u64                                parent_exec_id;
        u64                                self_exec_id;

        /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
        spinlock_t                        alloc_lock;

        /* Protection of the PI data structures: */
        raw_spinlock_t                        pi_lock;

        struct wake_q_node                wake_q;

#ifdef CONFIG_RT_MUTEXES
        /* PI waiters blocked on a rt_mutex held by this task: */
        struct rb_root_cached                pi_waiters;
        /* Updated under owner's pi_lock and rq lock */
        struct task_struct                *pi_top_task;
        /* Deadlock detection and priority inheritance handling: */
        struct rt_mutex_waiter                *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
        /* Mutex deadlock detection: */
        struct mutex_waiter                *blocked_on;
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
        int                                non_block_count;
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
        struct irqtrace_events                irqtrace;
        unsigned int                        hardirq_threaded;
        u64                                hardirq_chain_key;
        int                                softirqs_enabled;
        int                                softirq_context;
        int                                irq_config;
#endif
#ifdef CONFIG_PREEMPT_RT
        int                                softirq_disable_cnt;
#endif

#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH                        48UL
        u64                                curr_chain_key;
        int                                lockdep_depth;
        unsigned int                        lockdep_recursion;
        struct held_lock                held_locks[MAX_LOCK_DEPTH];
#endif

#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
        unsigned int                        in_ubsan;
#endif

        /* Journalling filesystem info: */
        void                                *journal_info;

        /* Stacked block device info: */
        struct bio_list                        *bio_list;

        /* Stack plugging: */
        struct blk_plug                        *plug;

        /* VM state: */
        struct reclaim_state                *reclaim_state;

        struct io_context                *io_context;

#ifdef CONFIG_COMPACTION
        struct capture_control                *capture_control;
#endif
        /* Ptrace state: */
        unsigned long                        ptrace_message;
        kernel_siginfo_t                *last_siginfo;

        struct task_io_accounting        ioac;
#ifdef CONFIG_PSI
        /* Pressure stall state */
        unsigned int                        psi_flags;
#endif
#ifdef CONFIG_TASK_XACCT
        /* Accumulated RSS usage: */
        u64                                acct_rss_mem1;
        /* Accumulated virtual memory usage: */
        u64                                acct_vm_mem1;
        /* stime + utime since last update: */
        u64                                acct_timexpd;
#endif
#ifdef CONFIG_CPUSETS
        /* Protected by ->alloc_lock: */
        nodemask_t                        mems_allowed;
        /* Sequence number to catch updates: */
        seqcount_spinlock_t                mems_allowed_seq;
        int                                cpuset_mem_spread_rotor;
        int                                cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
        /* Control Group info protected by css_set_lock: */
        struct css_set __rcu                *cgroups;
        /* cg_list protected by css_set_lock and tsk->alloc_lock: */
        struct list_head                cg_list;
#endif
#ifdef CONFIG_X86_CPU_RESCTRL
        u32                                closid;
        u32                                rmid;
#endif
#ifdef CONFIG_FUTEX
        struct robust_list_head __user        *robust_list;
#ifdef CONFIG_COMPAT
        struct compat_robust_list_head __user *compat_robust_list;
#endif
        struct list_head                pi_state_list;
        struct futex_pi_state                *pi_state_cache;
        struct mutex                        futex_exit_mutex;
        unsigned int                        futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
        struct perf_event_context        *perf_event_ctxp;
        struct mutex                        perf_event_mutex;
        struct list_head                perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
        unsigned long                        preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
        /* Protected by alloc_lock: */
        struct mempolicy                *mempolicy;
        short                                il_prev;
        u8                                il_weight;
        short                                pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
        int                                numa_scan_seq;
        unsigned int                        numa_scan_period;
        unsigned int                        numa_scan_period_max;
        int                                numa_preferred_nid;
        unsigned long                        numa_migrate_retry;
        /* Migration stamp: */
        u64                                node_stamp;
        u64                                last_task_numa_placement;
        u64                                last_sum_exec_runtime;
        struct callback_head                numa_work;

        /*
         * This pointer is only modified for current in syscall and
         * pagefault context (and for tasks being destroyed), so it can be read
         * from any of the following contexts:
         *  - RCU read-side critical section
         *  - current->numa_group from everywhere
         *  - task's runqueue locked, task not running
         */
        struct numa_group __rcu                *numa_group;

        /*
         * numa_faults is an array split into four regions:
         * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
         * in this precise order.
         *
         * faults_memory: Exponential decaying average of faults on a per-node
         * basis. Scheduling placement decisions are made based on these
         * counts. The values remain static for the duration of a PTE scan.
         * faults_cpu: Track the nodes the process was running on when a NUMA
         * hinting fault was incurred.
         * faults_memory_buffer and faults_cpu_buffer: Record faults per node
         * during the current scan window. When the scan completes, the counts
         * in faults_memory and faults_cpu decay and these values are copied.
         */
        unsigned long                        *numa_faults;
        unsigned long                        total_numa_faults;

        /*
         * numa_faults_locality tracks if faults recorded during the last
         * scan window were remote/local or failed to migrate. The task scan
         * period is adapted based on the locality of the faults with different
         * weights depending on whether they were shared or private faults
         */
        unsigned long                        numa_faults_locality[3];

        unsigned long                        numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_RSEQ
        struct rseq __user *rseq;
        u32 rseq_len;
        u32 rseq_sig;
        /*
         * RmW on rseq_event_mask must be performed atomically
         * with respect to preemption.
         */
        unsigned long rseq_event_mask;
#endif

#ifdef CONFIG_SCHED_MM_CID
        int                                mm_cid;                /* Current cid in mm */
        int                                last_mm_cid;        /* Most recent cid in mm */
        int                                migrate_from_cpu;
        int                                mm_cid_active;        /* Whether cid bitmap is active */
        struct callback_head                cid_work;
#endif

        struct tlbflush_unmap_batch        tlb_ubc;

        /* Cache last used pipe for splice(): */
        struct pipe_inode_info                *splice_pipe;

        struct page_frag                task_frag;

#ifdef CONFIG_TASK_DELAY_ACCT
        struct task_delay_info                *delays;
#endif

#ifdef CONFIG_FAULT_INJECTION
        int                                make_it_fail;
        unsigned int                        fail_nth;
#endif
        /*
         * When (nr_dirtied >= nr_dirtied_pause), it's time to call
         * balance_dirty_pages() for a dirty throttling pause:
         */
        int                                nr_dirtied;
        int                                nr_dirtied_pause;
        /* Start of a write-and-pause period: */
        unsigned long                        dirty_paused_when;

#ifdef CONFIG_LATENCYTOP
        int                                latency_record_count;
        struct latency_record                latency_record[LT_SAVECOUNT];
#endif
        /*
         * Time slack values; these are used to round up poll() and
         * select() etc timeout values. These are in nanoseconds.
         */
        u64                                timer_slack_ns;
        u64                                default_timer_slack_ns;

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        unsigned int                        kasan_depth;
#endif

#ifdef CONFIG_KCSAN
        struct kcsan_ctx                kcsan_ctx;
#ifdef CONFIG_TRACE_IRQFLAGS
        struct irqtrace_events                kcsan_save_irqtrace;
#endif
#ifdef CONFIG_KCSAN_WEAK_MEMORY
        int                                kcsan_stack_depth;
#endif
#endif

#ifdef CONFIG_KMSAN
        struct kmsan_ctx                kmsan_ctx;
#endif

#if IS_ENABLED(CONFIG_KUNIT)
        struct kunit                        *kunit_test;
#endif

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
        /* Index of current stored address in ret_stack: */
        int                                curr_ret_stack;
        int                                curr_ret_depth;

        /* Stack of return addresses for return function tracing: */
        struct ftrace_ret_stack                *ret_stack;

        /* Timestamp for last schedule: */
        unsigned long long                ftrace_timestamp;

        /*
         * Number of functions that haven't been traced
         * because of depth overrun:
         */
        atomic_t                        trace_overrun;

        /* Pause tracing: */
        atomic_t                        tracing_graph_pause;
#endif

#ifdef CONFIG_TRACING
        /* Bitmask and counter of trace recursion: */
        unsigned long                        trace_recursion;
#endif /* CONFIG_TRACING */

#ifdef CONFIG_KCOV
        /* See kernel/kcov.c for more details. */

        /* Coverage collection mode enabled for this task (0 if disabled): */
        unsigned int                        kcov_mode;

        /* Size of the kcov_area: */
        unsigned int                        kcov_size;

        /* Buffer for coverage collection: */
        void                                *kcov_area;

        /* KCOV descriptor wired with this task or NULL: */
        struct kcov                        *kcov;

        /* KCOV common handle for remote coverage collection: */
        u64                                kcov_handle;

        /* KCOV sequence number: */
        int                                kcov_sequence;

        /* Collect coverage from softirq context: */
        unsigned int                        kcov_softirq;
#endif

#ifdef CONFIG_MEMCG
        struct mem_cgroup                *memcg_in_oom;
        gfp_t                                memcg_oom_gfp_mask;
        int                                memcg_oom_order;

        /* Number of pages to reclaim on returning to userland: */
        unsigned int                        memcg_nr_pages_over_high;

        /* Used by memcontrol for targeted memcg charge: */
        struct mem_cgroup                *active_memcg;
#endif

#ifdef CONFIG_MEMCG_KMEM
        struct obj_cgroup                *objcg;
#endif

#ifdef CONFIG_BLK_CGROUP
        struct gendisk                        *throttle_disk;
#endif

#ifdef CONFIG_UPROBES
        struct uprobe_task                *utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
        unsigned int                        sequential_io;
        unsigned int                        sequential_io_avg;
#endif
        struct kmap_ctrl                kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
        unsigned long                        task_state_change;
# ifdef CONFIG_PREEMPT_RT
        unsigned long                        saved_state_change;
# endif
#endif
        struct rcu_head                        rcu;
        refcount_t                        rcu_users;
        int                                pagefault_disabled;
#ifdef CONFIG_MMU
        struct task_struct                *oom_reaper_list;
        struct timer_list                oom_reaper_timer;
#endif
#ifdef CONFIG_VMAP_STACK
        struct vm_struct                *stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
        /* A live task holds one reference: */
        refcount_t                        stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
        int patch_state;
#endif
#ifdef CONFIG_SECURITY
        /* Used by LSM modules for access restriction: */
        void                                *security;
#endif
#ifdef CONFIG_BPF_SYSCALL
        /* Used by BPF task local storage */
        struct bpf_local_storage __rcu        *bpf_storage;
        /* Used for BPF run context */
        struct bpf_run_ctx                *bpf_ctx;
#endif

#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
        unsigned long                        lowest_stack;
        unsigned long                        prev_lowest_stack;
#endif

#ifdef CONFIG_X86_MCE
        void __user                        *mce_vaddr;
        __u64                                mce_kflags;
        u64                                mce_addr;
        __u64                                mce_ripv : 1,
                                        mce_whole_page : 1,
                                        __mce_reserved : 62;
        struct callback_head                mce_kill_me;
        int                                mce_count;
#endif

#ifdef CONFIG_KRETPROBES
        struct llist_head               kretprobe_instances;
#endif
#ifdef CONFIG_RETHOOK
        struct llist_head               rethooks;
#endif

#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
        /*
         * If L1D flush is supported on mm context switch
         * then we use this callback head to queue kill work
         * to kill tasks that are not running on SMT disabled
         * cores
         */
        struct callback_head                l1d_flush_kill;
#endif

#ifdef CONFIG_RV
        /*
         * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS.
         * If we find justification for more monitors, we can think
         * about adding more or developing a dynamic method. So far,
         * none of these are justified.
         */
        union rv_task_monitor                rv[RV_PER_TASK_MONITORS];
#endif

#ifdef CONFIG_USER_EVENTS
        struct user_event_mm                *user_event_mm;
#endif

        /*
         * New fields for task_struct should be added above here, so that
         * they are included in the randomized portion of task_struct.
         */
        randomized_struct_fields_end

        /* CPU-specific state of this task: */
        struct thread_struct                thread;

        /*
         * WARNING: on x86, 'thread_struct' contains a variable-sized
         * structure.  It *MUST* be at the end of 'task_struct'.
         *
         * Do not put anything below here!
         */
};

#define TASK_REPORT_IDLE        (TASK_REPORT + 1)
#define TASK_REPORT_MAX                (TASK_REPORT_IDLE << 1)

static inline unsigned int __task_state_index(unsigned int tsk_state,
                                              unsigned int tsk_exit_state)
{
        unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;

        BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);

        if ((tsk_state & TASK_IDLE) == TASK_IDLE)
                state = TASK_REPORT_IDLE;

        /*
         * We're lying here, but rather than expose a completely new task state
         * to userspace, we can make this appear as if the task has gone through
         * a regular rt_mutex_lock() call.
         */
        if (tsk_state & TASK_RTLOCK_WAIT)
                state = TASK_UNINTERRUPTIBLE;

        return fls(state);
}

static inline unsigned int task_state_index(struct task_struct *tsk)
{
        return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
}

static inline char task_index_to_char(unsigned int state)
{
        static const char state_char[] = "RSDTtXZPI";

        BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);

        return state_char[state];
}

static inline char task_state_to_char(struct task_struct *tsk)
{
        return task_index_to_char(task_state_index(tsk));
}

extern struct pid *cad_pid;

/*
 * Per process flags
 */
#define PF_VCPU                        0x00000001        /* I'm a virtual CPU */
#define PF_IDLE                        0x00000002        /* I am an IDLE thread */
#define PF_EXITING                0x00000004        /* Getting shut down */
#define PF_POSTCOREDUMP                0x00000008        /* Coredumps should ignore this task */
#define PF_IO_WORKER                0x00000010        /* Task is an IO worker */
#define PF_WQ_WORKER                0x00000020        /* I'm a workqueue worker */
#define PF_FORKNOEXEC                0x00000040        /* Forked but didn't exec */
#define PF_MCE_PROCESS                0x00000080      /* Process policy on mce errors */
#define PF_SUPERPRIV                0x00000100        /* Used super-user privileges */
#define PF_DUMPCORE                0x00000200        /* Dumped core */
#define PF_SIGNALED                0x00000400        /* Killed by a signal */
#define PF_MEMALLOC                0x00000800        /* Allocating memory to free memory. See memalloc_noreclaim_save() */
#define PF_NPROC_EXCEEDED        0x00001000        /* set_user() noticed that RLIMIT_NPROC was exceeded */
#define PF_USED_MATH                0x00002000        /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER                0x00004000        /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE                0x00008000        /* This thread should not be frozen */
#define PF__HOLE__00010000        0x00010000
#define PF_KSWAPD                0x00020000        /* I am kswapd */
#define PF_MEMALLOC_NOFS        0x00040000        /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
#define PF_MEMALLOC_NOIO        0x00080000        /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
#define PF_LOCAL_THROTTLE        0x00100000        /* Throttle writes only against the bdi I write to,
                                                 * I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD                0x00200000        /* I am a kernel thread */
#define PF_RANDOMIZE                0x00400000        /* Randomize virtual address space */
#define PF_MEMALLOC_NORECLAIM        0x00800000        /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
#define PF_MEMALLOC_NOWARN        0x01000000        /* All allocation requests will inherit __GFP_NOWARN */
#define PF__HOLE__02000000        0x02000000
#define PF_NO_SETAFFINITY        0x04000000        /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY                0x08000000      /* Early kill for mce process policy */
#define PF_MEMALLOC_PIN                0x10000000        /* Allocations constrained to zones which allow long term pinning.
                                                 * See memalloc_pin_save() */
#define PF_BLOCK_TS                0x20000000        /* plug has ts that needs updating */
#define PF__HOLE__40000000        0x40000000
#define PF_SUSPEND_TASK                0x80000000      /* This thread called freeze_processes() and should not be frozen */

/*
 * Only the _current_ task can read/write to tsk->flags, but other
 * tasks can access tsk->flags in readonly mode for example
 * with tsk_used_math (like during threaded core dumping).
 * There is however an exception to this rule during ptrace
 * or during fork: the ptracer task is allowed to write to the
 * child->flags of its traced child (same goes for fork, the parent
 * can write to the child->flags), because we're guaranteed the
 * child is not running and in turn not changing child->flags
 * at the same time the parent does it.
 */
#define clear_stopped_child_used_math(child)        do { (child)->flags &= ~PF_USED_MATH; } while (0)
#define set_stopped_child_used_math(child)        do { (child)->flags |= PF_USED_MATH; } while (0)
#define clear_used_math()                        clear_stopped_child_used_math(current)
#define set_used_math()                                set_stopped_child_used_math(current)

#define conditional_stopped_child_used_math(condition, child) \
        do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)

#define conditional_used_math(condition)        conditional_stopped_child_used_math(condition, current)

#define copy_to_stopped_child_used_math(child) \
        do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)

/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
#define tsk_used_math(p)                        ((p)->flags & PF_USED_MATH)
#define used_math()                                tsk_used_math(current)

static __always_inline bool is_percpu_thread(void)
{
#ifdef CONFIG_SMP
        return (current->flags & PF_NO_SETAFFINITY) &&
                (current->nr_cpus_allowed  == 1);
#else
        return true;
#endif
}

/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS                0        /* May not gain new privileges. */
#define PFA_SPREAD_PAGE                        1        /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB                        2        /* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE                3        /* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE        4        /* Speculative Store Bypass force disabled*/
#define PFA_SPEC_IB_DISABLE                5        /* Indirect branch speculation restricted */
#define PFA_SPEC_IB_FORCE_DISABLE        6        /* Indirect branch speculation permanently restricted */
#define PFA_SPEC_SSB_NOEXEC                7        /* Speculative Store Bypass clear on execve() */

#define TASK_PFA_TEST(name, func)                                        \
        static inline bool task_##func(struct task_struct *p)                \
        { return test_bit(PFA_##name, &p->atomic_flags); }

#define TASK_PFA_SET(name, func)                                        \
        static inline void task_set_##func(struct task_struct *p)        \
        { set_bit(PFA_##name, &p->atomic_flags); }

#define TASK_PFA_CLEAR(name, func)                                        \
        static inline void task_clear_##func(struct task_struct *p)        \
        { clear_bit(PFA_##name, &p->atomic_flags); }

TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)

TASK_PFA_TEST(SPREAD_PAGE, spread_page)
TASK_PFA_SET(SPREAD_PAGE, spread_page)
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)

TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)

TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)

TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)

TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)

TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)

TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)

static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
        current->flags &= ~flags;
        current->flags |= orig_flags & flags;
}

extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int task_can_attach(struct task_struct *p);
extern int dl_bw_alloc(int cpu, u64 dl_bw);
extern void dl_bw_free(int cpu, u64 dl_bw);
#ifdef CONFIG_SMP

/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);

/**
 * set_cpus_allowed_ptr - set CPU affinity mask of a task
 * @p: the task
 * @new_mask: CPU affinity mask
 *
 * Return: zero if successful, or a negative error code
 */
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
extern void release_user_cpus_ptr(struct task_struct *p);
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
}
static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
        if (!cpumask_test_cpu(0, new_mask))
                return -EINVAL;
        return 0;
}
static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
{
        if (src->user_cpus_ptr)
                return -EINVAL;
        return 0;
}
static inline void release_user_cpus_ptr(struct task_struct *p)
{
        WARN_ON(p->user_cpus_ptr);
}

static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
{
        return 0;
}
#endif

extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);

/**
 * task_nice - return the nice value of a given task.
 * @p: the task in question.
 *
 * Return: The nice value [ -20 ... 0 ... 19 ].
 */
static inline int task_nice(const struct task_struct *p)
{
        return PRIO_TO_NICE((p)->static_prio);
}

extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern void sched_set_fifo(struct task_struct *p);
extern void sched_set_fifo_low(struct task_struct *p);
extern void sched_set_normal(struct task_struct *p, int nice);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
extern struct task_struct *idle_task(int cpu);

/**
 * is_idle_task - is the specified task an idle task?
 * @p: the task in question.
 *
 * Return: 1 if @p is an idle task. 0 otherwise.
 */
static __always_inline bool is_idle_task(const struct task_struct *p)
{
        return !!(p->flags & PF_IDLE);
}

extern struct task_struct *curr_task(int cpu);
extern void ia64_set_curr_task(int cpu, struct task_struct *p);

void yield(void);

union thread_union {
        struct task_struct task;
#ifndef CONFIG_THREAD_INFO_IN_TASK
        struct thread_info thread_info;
#endif
        unsigned long stack[THREAD_SIZE/sizeof(long)];
};

#ifndef CONFIG_THREAD_INFO_IN_TASK
extern struct thread_info init_thread_info;
#endif

extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];

#ifdef CONFIG_THREAD_INFO_IN_TASK
# define task_thread_info(task)        (&(task)->thread_info)
#elif !defined(__HAVE_THREAD_FUNCTIONS)
# define task_thread_info(task)        ((struct thread_info *)(task)->stack)
#endif

/*
 * find a task by one of its numerical ids
 *
 * find_task_by_pid_ns():
 *      finds a task by its pid in the specified namespace
 * find_task_by_vpid():
 *      finds a task by its virtual pid
 *
 * see also find_vpid() etc in include/linux/pid.h
 */

extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);

/*
 * find a task by its virtual pid and get the task struct
 */
extern struct task_struct *find_get_task_by_vpid(pid_t nr);

extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk);

#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif

extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);

static inline void set_task_comm(struct task_struct *tsk, const char *from)
{
        __set_task_comm(tsk, from, false);
}

extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
#define get_task_comm(buf, tsk) ({                        \
        BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);        \
        __get_task_comm(buf, sizeof(buf), tsk);                \
})

#ifdef CONFIG_SMP
static __always_inline void scheduler_ipi(void)
{
        /*
         * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
         * TIF_NEED_RESCHED remotely (for the first time) will also send
         * this IPI.
         */
        preempt_fold_need_resched();
}
#else
static inline void scheduler_ipi(void) { }
#endif

extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);

/*
 * Set thread flags in other task's structures.
 * See asm/thread_info.h for TIF_xxxx flags available:
 */
static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
        set_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
        clear_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
                                          bool value)
{
        update_ti_thread_flag(task_thread_info(tsk), flag, value);
}

static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
        return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
        return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
{
        return test_ti_thread_flag(task_thread_info(tsk), flag);
}

static inline void set_tsk_need_resched(struct task_struct *tsk)
{
        set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
        clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline int test_tsk_need_resched(struct task_struct *tsk)
{
        return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}

/*
 * cond_resched() and cond_resched_lock(): latency reduction via
 * explicit rescheduling in places that are safe. The return
 * value indicates whether a reschedule was done in fact.
 * cond_resched_lock() will drop the spinlock before scheduling,
 */
#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
extern int __cond_resched(void);

#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)

void sched_dynamic_klp_enable(void);
void sched_dynamic_klp_disable(void);

DECLARE_STATIC_CALL(cond_resched, __cond_resched);

static __always_inline int _cond_resched(void)
{
        return static_call_mod(cond_resched)();
}

#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)

extern int dynamic_cond_resched(void);

static __always_inline int _cond_resched(void)
{
        return dynamic_cond_resched();
}

#else /* !CONFIG_PREEMPTION */

static inline int _cond_resched(void)
{
        klp_sched_try_switch();
        return __cond_resched();
}

#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */

#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */

static inline int _cond_resched(void)
{
        klp_sched_try_switch();
        return 0;
}

#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */

#define cond_resched() ({                        \
        __might_resched(__FILE__, __LINE__, 0);        \
        _cond_resched();                        \
})

extern int __cond_resched_lock(spinlock_t *lock);
extern int __cond_resched_rwlock_read(rwlock_t *lock);
extern int __cond_resched_rwlock_write(rwlock_t *lock);

#define MIGHT_RESCHED_RCU_SHIFT                8
#define MIGHT_RESCHED_PREEMPT_MASK        ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)

#ifndef CONFIG_PREEMPT_RT
/*
 * Non RT kernels have an elevated preempt count due to the held lock,
 * but are not allowed to be inside a RCU read side critical section
 */
# define PREEMPT_LOCK_RESCHED_OFFSETS        PREEMPT_LOCK_OFFSET
#else
/*
 * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
 * cond_resched*lock() has to take that into account because it checks for
 * preempt_count() and rcu_preempt_depth().
 */
# define PREEMPT_LOCK_RESCHED_OFFSETS        \
        (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
#endif

#define cond_resched_lock(lock) ({                                                \
        __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);        \
        __cond_resched_lock(lock);                                                \
})

#define cond_resched_rwlock_read(lock) ({                                        \
        __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);        \
        __cond_resched_rwlock_read(lock);                                        \
})

#define cond_resched_rwlock_write(lock) ({                                        \
        __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);        \
        __cond_resched_rwlock_write(lock);                                        \
})

#ifdef CONFIG_PREEMPT_DYNAMIC

extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void);

#else

static inline bool preempt_model_none(void)
{
        return IS_ENABLED(CONFIG_PREEMPT_NONE);
}
static inline bool preempt_model_voluntary(void)
{
        return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
}
static inline bool preempt_model_full(void)
{
        return IS_ENABLED(CONFIG_PREEMPT);
}

#endif

static inline bool preempt_model_rt(void)
{
        return IS_ENABLED(CONFIG_PREEMPT_RT);
}

/*
 * Does the preemption model allow non-cooperative preemption?
 *
 * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
 * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
 * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
 * PREEMPT_NONE model.
 */
static inline bool preempt_model_preemptible(void)
{
        return preempt_model_full() || preempt_model_rt();
}

static __always_inline bool need_resched(void)
{
        return unlikely(tif_need_resched());
}

/*
 * Wrappers for p->thread_info->cpu access. No-op on UP.
 */
#ifdef CONFIG_SMP

static inline unsigned int task_cpu(const struct task_struct *p)
{
        return READ_ONCE(task_thread_info(p)->cpu);
}

extern void set_task_cpu(struct task_struct *p, unsigned int cpu);

#else

static inline unsigned int task_cpu(const struct task_struct *p)
{
        return 0;
}

static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
}

#endif /* CONFIG_SMP */

extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);

#include <linux/spinlock.h>

/*
 * In order to reduce various lock holder preemption latencies provide an
 * interface to see if a vCPU is currently running or not.
 *
 * This allows us to terminate optimistic spin loops and block, analogous to
 * the native optimistic spin heuristic of testing if the lock owner task is
 * running or not.
 */
#ifndef vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
        return false;
}
#endif

extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

#ifndef TASK_SIZE_OF
#define TASK_SIZE_OF(tsk)        TASK_SIZE
#endif

#ifdef CONFIG_SMP
static inline bool owner_on_cpu(struct task_struct *owner)
{
        /*
         * As lock holder preemption issue, we both skip spinning if
         * task is not on cpu or its cpu is preempted
         */
        return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner));
}

/* Returns effective CPU energy utilization, as seen by the scheduler */
unsigned long sched_cpu_util(int cpu);
#endif /* CONFIG_SMP */

#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
                                unsigned long uaddr);
extern int sched_core_idle_cpu(int cpu);
#else
static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
#endif

extern void sched_set_stop_task(int cpu, struct task_struct *stop);

#endif





































    9 
    9 





    9 

    1 
    9 




















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _LINUX_RSEQ_H
#define _LINUX_RSEQ_H

#ifdef CONFIG_RSEQ

#include <linux/preempt.h>
#include <linux/sched.h>

/*
 * Map the event mask on the user-space ABI enum rseq_cs_flags
 * for direct mask checks.
 */
enum rseq_event_mask_bits {
        RSEQ_EVENT_PREEMPT_BIT        = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
        RSEQ_EVENT_SIGNAL_BIT        = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
        RSEQ_EVENT_MIGRATE_BIT        = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
};

enum rseq_event_mask {
        RSEQ_EVENT_PREEMPT        = (1U << RSEQ_EVENT_PREEMPT_BIT),
        RSEQ_EVENT_SIGNAL        = (1U << RSEQ_EVENT_SIGNAL_BIT),
        RSEQ_EVENT_MIGRATE        = (1U << RSEQ_EVENT_MIGRATE_BIT),
};

static inline void rseq_set_notify_resume(struct task_struct *t)
{
        if (t->rseq)
                set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}

void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);

static inline void rseq_handle_notify_resume(struct ksignal *ksig,
                                             struct pt_regs *regs)
{
        if (current->rseq)
                __rseq_handle_notify_resume(ksig, regs);
}

static inline void rseq_signal_deliver(struct ksignal *ksig,
                                       struct pt_regs *regs)
{
        preempt_disable();
        __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
        preempt_enable();
        rseq_handle_notify_resume(ksig, regs);
}

/* rseq_preempt() requires preemption to be disabled. */
static inline void rseq_preempt(struct task_struct *t)
{
        __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
        rseq_set_notify_resume(t);
}

/* rseq_migrate() requires preemption to be disabled. */
static inline void rseq_migrate(struct task_struct *t)
{
        __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
        rseq_set_notify_resume(t);
}

/*
 * If parent process has a registered restartable sequences area, the
 * child inherits. Unregister rseq for a clone with CLONE_VM set.
 */
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
        if (clone_flags & CLONE_VM) {
                t->rseq = NULL;
                t->rseq_len = 0;
                t->rseq_sig = 0;
                t->rseq_event_mask = 0;
        } else {
                t->rseq = current->rseq;
                t->rseq_len = current->rseq_len;
                t->rseq_sig = current->rseq_sig;
                t->rseq_event_mask = current->rseq_event_mask;
        }
}

static inline void rseq_execve(struct task_struct *t)
{
        t->rseq = NULL;
        t->rseq_len = 0;
        t->rseq_sig = 0;
        t->rseq_event_mask = 0;
}

#else

static inline void rseq_set_notify_resume(struct task_struct *t)
{
}
static inline void rseq_handle_notify_resume(struct ksignal *ksig,
                                             struct pt_regs *regs)
{
}
static inline void rseq_signal_deliver(struct ksignal *ksig,
                                       struct pt_regs *regs)
{
}
static inline void rseq_preempt(struct task_struct *t)
{
}
static inline void rseq_migrate(struct task_struct *t)
{
}
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
}
static inline void rseq_execve(struct task_struct *t)
{
}

#endif

#ifdef CONFIG_DEBUG_RSEQ

void rseq_syscall(struct pt_regs *regs);

#else

static inline void rseq_syscall(struct pt_regs *regs)
{
}

#endif

#endif /* _LINUX_RSEQ_H */






















































































































  241 










  242 


































  233 































  192 











   74 











  129 














































































































































  124 



















































  241 
















   10 















   87 

























































































  233 













  178 














   94 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * pm_runtime.h - Device run-time power management helper functions.
 *
 * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>
 */

#ifndef _LINUX_PM_RUNTIME_H
#define _LINUX_PM_RUNTIME_H

#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pm.h>

#include <linux/jiffies.h>

/* Runtime PM flag argument bits */
#define RPM_ASYNC                0x01        /* Request is asynchronous */
#define RPM_NOWAIT                0x02        /* Don't wait for concurrent
                                            state change */
#define RPM_GET_PUT                0x04        /* Increment/decrement the
                                            usage_count */
#define RPM_AUTO                0x08        /* Use autosuspend_delay */

/*
 * Use this for defining a set of PM operations to be used in all situations
 * (system suspend, hibernation or runtime PM).
 *
 * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS()
 * macro, which uses the provided callbacks for both runtime PM and system
 * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend()
 * and pm_runtime_force_resume() for its system sleep callbacks.
 *
 * If the underlying dev_pm_ops struct symbol has to be exported, use
 * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead.
 */
#define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
        _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \
                           pm_runtime_force_resume, suspend_fn, \
                           resume_fn, idle_fn)

#define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
        EXPORT_DEV_PM_OPS(name) = { \
                RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
        }
#define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
        EXPORT_GPL_DEV_PM_OPS(name) = { \
                RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
        }
#define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
        EXPORT_NS_DEV_PM_OPS(name, ns) = { \
                RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
        }
#define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \
        EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \
                RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
        }

#ifdef CONFIG_PM
extern struct workqueue_struct *pm_wq;

static inline bool queue_pm_work(struct work_struct *work)
{
        return queue_work(pm_wq, work);
}

extern int pm_generic_runtime_suspend(struct device *dev);
extern int pm_generic_runtime_resume(struct device *dev);
extern int pm_runtime_force_suspend(struct device *dev);
extern int pm_runtime_force_resume(struct device *dev);

extern int __pm_runtime_idle(struct device *dev, int rpmflags);
extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
extern int __pm_runtime_resume(struct device *dev, int rpmflags);
extern int pm_runtime_get_if_active(struct device *dev);
extern int pm_runtime_get_if_in_use(struct device *dev);
extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
extern int pm_runtime_barrier(struct device *dev);
extern void pm_runtime_enable(struct device *dev);
extern void __pm_runtime_disable(struct device *dev, bool check_resume);
extern void pm_runtime_allow(struct device *dev);
extern void pm_runtime_forbid(struct device *dev);
extern void pm_runtime_no_callbacks(struct device *dev);
extern void pm_runtime_irq_safe(struct device *dev);
extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
extern void pm_runtime_get_suppliers(struct device *dev);
extern void pm_runtime_put_suppliers(struct device *dev);
extern void pm_runtime_new_link(struct device *dev);
extern void pm_runtime_drop_link(struct device_link *link);
extern void pm_runtime_release_supplier(struct device_link *link);

extern int devm_pm_runtime_enable(struct device *dev);

/**
 * pm_suspend_ignore_children - Set runtime PM behavior regarding children.
 * @dev: Target device.
 * @enable: Whether or not to ignore possible dependencies on children.
 *
 * The dependencies of @dev on its children will not be taken into account by
 * the runtime PM framework going forward if @enable is %true, or they will
 * be taken into account otherwise.
 */
static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
{
        dev->power.ignore_children = enable;
}

/**
 * pm_runtime_get_noresume - Bump up runtime PM usage counter of a device.
 * @dev: Target device.
 */
static inline void pm_runtime_get_noresume(struct device *dev)
{
        atomic_inc(&dev->power.usage_count);
}

/**
 * pm_runtime_put_noidle - Drop runtime PM usage counter of a device.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev unless it is 0 already.
 */
static inline void pm_runtime_put_noidle(struct device *dev)
{
        atomic_add_unless(&dev->power.usage_count, -1, 0);
}

/**
 * pm_runtime_suspended - Check whether or not a device is runtime-suspended.
 * @dev: Target device.
 *
 * Return %true if runtime PM is enabled for @dev and its runtime PM status is
 * %RPM_SUSPENDED, or %false otherwise.
 *
 * Note that the return value of this function can only be trusted if it is
 * called under the runtime PM lock of @dev or under conditions in which
 * runtime PM cannot be either disabled or enabled for @dev and its runtime PM
 * status cannot change.
 */
static inline bool pm_runtime_suspended(struct device *dev)
{
        return dev->power.runtime_status == RPM_SUSPENDED
                && !dev->power.disable_depth;
}

/**
 * pm_runtime_active - Check whether or not a device is runtime-active.
 * @dev: Target device.
 *
 * Return %true if runtime PM is disabled for @dev or its runtime PM status is
 * %RPM_ACTIVE, or %false otherwise.
 *
 * Note that the return value of this function can only be trusted if it is
 * called under the runtime PM lock of @dev or under conditions in which
 * runtime PM cannot be either disabled or enabled for @dev and its runtime PM
 * status cannot change.
 */
static inline bool pm_runtime_active(struct device *dev)
{
        return dev->power.runtime_status == RPM_ACTIVE
                || dev->power.disable_depth;
}

/**
 * pm_runtime_status_suspended - Check if runtime PM status is "suspended".
 * @dev: Target device.
 *
 * Return %true if the runtime PM status of @dev is %RPM_SUSPENDED, or %false
 * otherwise, regardless of whether or not runtime PM has been enabled for @dev.
 *
 * Note that the return value of this function can only be trusted if it is
 * called under the runtime PM lock of @dev or under conditions in which the
 * runtime PM status of @dev cannot change.
 */
static inline bool pm_runtime_status_suspended(struct device *dev)
{
        return dev->power.runtime_status == RPM_SUSPENDED;
}

/**
 * pm_runtime_enabled - Check if runtime PM is enabled.
 * @dev: Target device.
 *
 * Return %true if runtime PM is enabled for @dev or %false otherwise.
 *
 * Note that the return value of this function can only be trusted if it is
 * called under the runtime PM lock of @dev or under conditions in which
 * runtime PM cannot be either disabled or enabled for @dev.
 */
static inline bool pm_runtime_enabled(struct device *dev)
{
        return !dev->power.disable_depth;
}

/**
 * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present.
 * @dev: Target device.
 *
 * Return %true if @dev is a special device without runtime PM callbacks or
 * %false otherwise.
 */
static inline bool pm_runtime_has_no_callbacks(struct device *dev)
{
        return dev->power.no_callbacks;
}

/**
 * pm_runtime_mark_last_busy - Update the last access time of a device.
 * @dev: Target device.
 *
 * Update the last access time of @dev used by the runtime PM autosuspend
 * mechanism to the current time as returned by ktime_get_mono_fast_ns().
 */
static inline void pm_runtime_mark_last_busy(struct device *dev)
{
        WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns());
}

/**
 * pm_runtime_is_irq_safe - Check if runtime PM can work in interrupt context.
 * @dev: Target device.
 *
 * Return %true if @dev has been marked as an "IRQ-safe" device (with respect
 * to runtime PM), in which case its runtime PM callabcks can be expected to
 * work correctly when invoked from interrupt handlers.
 */
static inline bool pm_runtime_is_irq_safe(struct device *dev)
{
        return dev->power.irq_safe;
}

extern u64 pm_runtime_suspended_time(struct device *dev);

#else /* !CONFIG_PM */

static inline bool queue_pm_work(struct work_struct *work) { return false; }

static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
static inline int pm_runtime_force_suspend(struct device *dev) { return 0; }
static inline int pm_runtime_force_resume(struct device *dev) { return 0; }

static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
{
        return -ENOSYS;
}
static inline int __pm_runtime_suspend(struct device *dev, int rpmflags)
{
        return -ENOSYS;
}
static inline int __pm_runtime_resume(struct device *dev, int rpmflags)
{
        return 1;
}
static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
{
        return -ENOSYS;
}
static inline int pm_runtime_get_if_in_use(struct device *dev)
{
        return -EINVAL;
}
static inline int pm_runtime_get_if_active(struct device *dev)
{
        return -EINVAL;
}
static inline int __pm_runtime_set_status(struct device *dev,
                                            unsigned int status) { return 0; }
static inline int pm_runtime_barrier(struct device *dev) { return 0; }
static inline void pm_runtime_enable(struct device *dev) {}
static inline void __pm_runtime_disable(struct device *dev, bool c) {}
static inline void pm_runtime_allow(struct device *dev) {}
static inline void pm_runtime_forbid(struct device *dev) {}

static inline int devm_pm_runtime_enable(struct device *dev) { return 0; }

static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
static inline void pm_runtime_get_noresume(struct device *dev) {}
static inline void pm_runtime_put_noidle(struct device *dev) {}
static inline bool pm_runtime_suspended(struct device *dev) { return false; }
static inline bool pm_runtime_active(struct device *dev) { return true; }
static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
static inline bool pm_runtime_enabled(struct device *dev) { return false; }

static inline void pm_runtime_no_callbacks(struct device *dev) {}
static inline void pm_runtime_irq_safe(struct device *dev) {}
static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; }

static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; }
static inline void pm_runtime_mark_last_busy(struct device *dev) {}
static inline void __pm_runtime_use_autosuspend(struct device *dev,
                                                bool use) {}
static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
                                                int delay) {}
static inline u64 pm_runtime_autosuspend_expiration(
                                struct device *dev) { return 0; }
static inline void pm_runtime_set_memalloc_noio(struct device *dev,
                                                bool enable){}
static inline void pm_runtime_get_suppliers(struct device *dev) {}
static inline void pm_runtime_put_suppliers(struct device *dev) {}
static inline void pm_runtime_new_link(struct device *dev) {}
static inline void pm_runtime_drop_link(struct device_link *link) {}
static inline void pm_runtime_release_supplier(struct device_link *link) {}

#endif /* !CONFIG_PM */

/**
 * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it.
 * @dev: Target device.
 *
 * Invoke the "idle check" callback of @dev and, depending on its return value,
 * set up autosuspend of @dev or suspend it (depending on whether or not
 * autosuspend has been enabled for it).
 */
static inline int pm_runtime_idle(struct device *dev)
{
        return __pm_runtime_idle(dev, 0);
}

/**
 * pm_runtime_suspend - Suspend a device synchronously.
 * @dev: Target device.
 */
static inline int pm_runtime_suspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, 0);
}

/**
 * pm_runtime_autosuspend - Set up autosuspend of a device or suspend it.
 * @dev: Target device.
 *
 * Set up autosuspend of @dev or suspend it (depending on whether or not
 * autosuspend is enabled for it) without engaging its "idle check" callback.
 */
static inline int pm_runtime_autosuspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, RPM_AUTO);
}

/**
 * pm_runtime_resume - Resume a device synchronously.
 * @dev: Target device.
 */
static inline int pm_runtime_resume(struct device *dev)
{
        return __pm_runtime_resume(dev, 0);
}

/**
 * pm_request_idle - Queue up "idle check" execution for a device.
 * @dev: Target device.
 *
 * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev
 * asynchronously.
 */
static inline int pm_request_idle(struct device *dev)
{
        return __pm_runtime_idle(dev, RPM_ASYNC);
}

/**
 * pm_request_resume - Queue up runtime-resume of a device.
 * @dev: Target device.
 */
static inline int pm_request_resume(struct device *dev)
{
        return __pm_runtime_resume(dev, RPM_ASYNC);
}

/**
 * pm_request_autosuspend - Queue up autosuspend of a device.
 * @dev: Target device.
 *
 * Queue up a work item to run an equivalent pm_runtime_autosuspend() for @dev
 * asynchronously.
 */
static inline int pm_request_autosuspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO);
}

/**
 * pm_runtime_get - Bump up usage counter and queue up resume of a device.
 * @dev: Target device.
 *
 * Bump up the runtime PM usage counter of @dev and queue up a work item to
 * carry out runtime-resume of it.
 */
static inline int pm_runtime_get(struct device *dev)
{
        return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC);
}

/**
 * pm_runtime_get_sync - Bump up usage counter of a device and resume it.
 * @dev: Target device.
 *
 * Bump up the runtime PM usage counter of @dev and carry out runtime-resume of
 * it synchronously.
 *
 * The possible return values of this function are the same as for
 * pm_runtime_resume() and the runtime PM usage counter of @dev remains
 * incremented in all cases, even if it returns an error code.
 * Consider using pm_runtime_resume_and_get() instead of it, especially
 * if its return value is checked by the caller, as this is likely to result
 * in cleaner code.
 */
static inline int pm_runtime_get_sync(struct device *dev)
{
        return __pm_runtime_resume(dev, RPM_GET_PUT);
}

/**
 * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
 * @dev: Target device.
 *
 * Resume @dev synchronously and if that is successful, increment its runtime
 * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been
 * incremented or a negative error code otherwise.
 */
static inline int pm_runtime_resume_and_get(struct device *dev)
{
        int ret;

        ret = __pm_runtime_resume(dev, RPM_GET_PUT);
        if (ret < 0) {
                pm_runtime_put_noidle(dev);
                return ret;
        }

        return 0;
}

/**
 * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, queue up a work item for @dev like in pm_request_idle().
 */
static inline int pm_runtime_put(struct device *dev)
{
        return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC);
}

/**
 * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, queue up a work item for @dev like in pm_request_autosuspend().
 */
static inline int __pm_runtime_put_autosuspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
}

/**
 * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, queue up a work item for @dev like in pm_request_autosuspend().
 */
static inline int pm_runtime_put_autosuspend(struct device *dev)
{
        return __pm_runtime_suspend(dev,
            RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
}

/**
 * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, invoke the "idle check" callback of @dev and, depending on its
 * return value, set up autosuspend of @dev or suspend it (depending on whether
 * or not autosuspend has been enabled for it).
 *
 * The possible return values of this function are the same as for
 * pm_runtime_idle() and the runtime PM usage counter of @dev remains
 * decremented in all cases, even if it returns an error code.
 */
static inline int pm_runtime_put_sync(struct device *dev)
{
        return __pm_runtime_idle(dev, RPM_GET_PUT);
}

/**
 * pm_runtime_put_sync_suspend - Drop device usage counter and suspend if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, carry out runtime-suspend of @dev synchronously.
 *
 * The possible return values of this function are the same as for
 * pm_runtime_suspend() and the runtime PM usage counter of @dev remains
 * decremented in all cases, even if it returns an error code.
 */
static inline int pm_runtime_put_sync_suspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, RPM_GET_PUT);
}

/**
 * pm_runtime_put_sync_autosuspend - Drop device usage counter and autosuspend if 0.
 * @dev: Target device.
 *
 * Decrement the runtime PM usage counter of @dev and if it turns out to be
 * equal to 0, set up autosuspend of @dev or suspend it synchronously (depending
 * on whether or not autosuspend has been enabled for it).
 *
 * The possible return values of this function are the same as for
 * pm_runtime_autosuspend() and the runtime PM usage counter of @dev remains
 * decremented in all cases, even if it returns an error code.
 */
static inline int pm_runtime_put_sync_autosuspend(struct device *dev)
{
        return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO);
}

/**
 * pm_runtime_set_active - Set runtime PM status to "active".
 * @dev: Target device.
 *
 * Set the runtime PM status of @dev to %RPM_ACTIVE and ensure that dependencies
 * of it will be taken into account.
 *
 * It is not valid to call this function for devices with runtime PM enabled.
 */
static inline int pm_runtime_set_active(struct device *dev)
{
        return __pm_runtime_set_status(dev, RPM_ACTIVE);
}

/**
 * pm_runtime_set_suspended - Set runtime PM status to "suspended".
 * @dev: Target device.
 *
 * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that
 * dependencies of it will be taken into account.
 *
 * It is not valid to call this function for devices with runtime PM enabled.
 */
static inline int pm_runtime_set_suspended(struct device *dev)
{
        return __pm_runtime_set_status(dev, RPM_SUSPENDED);
}

/**
 * pm_runtime_disable - Disable runtime PM for a device.
 * @dev: Target device.
 *
 * Prevent the runtime PM framework from working with @dev (by incrementing its
 * "blocking" counter).
 *
 * For each invocation of this function for @dev there must be a matching
 * pm_runtime_enable() call in order for runtime PM to be enabled for it.
 */
static inline void pm_runtime_disable(struct device *dev)
{
        __pm_runtime_disable(dev, true);
}

/**
 * pm_runtime_use_autosuspend - Allow autosuspend to be used for a device.
 * @dev: Target device.
 *
 * Allow the runtime PM autosuspend mechanism to be used for @dev whenever
 * requested (or "autosuspend" will be handled as direct runtime-suspend for
 * it).
 *
 * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend()
 * at driver exit time unless your driver initially enabled pm_runtime
 * with devm_pm_runtime_enable() (which handles it for you).
 */
static inline void pm_runtime_use_autosuspend(struct device *dev)
{
        __pm_runtime_use_autosuspend(dev, true);
}

/**
 * pm_runtime_dont_use_autosuspend - Prevent autosuspend from being used.
 * @dev: Target device.
 *
 * Prevent the runtime PM autosuspend mechanism from being used for @dev which
 * means that "autosuspend" will be handled as direct runtime-suspend for it
 * going forward.
 */
static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
{
        __pm_runtime_use_autosuspend(dev, false);
}

#endif




































  244 




  244 
  244 





   11 









































































































































































































































































  243 


  243 
  243 




  243 







  244 

  244 

  235 

  236 







  168 

















  168 


  167 





  168 

  167 

  168 

  167 



  168 



  168 

  168 
  168 
  168 



















  234 


  234 

  234 
  234 
  234 



  234 




























































  234 

  234 
  232 







  234 
  234 


   11 


   11 




   11 


   11 

   11 


   11 


  234 








  248 
  248 
  246 





























  244 

  234 







  234 



  232 
  233 

  233 







  232 





































  170 



  169 



  168 


  167 


  168 

  168 
  168 


  167 





















  168 
  167 
  168 























  169 











  170 


  168 
  167 

  167 

































































  167 
  167 












  167 


  168 



  168 






  167 







  166 









  166 
  168 




  168 
















  243 



  244 

  244 





  244 
  244 



  244 
  244 

  236 
  234 
  242 




















































  233 



  233 
  232 











































































































































  169 




  168 





  168 

























































































































































































































































  244 


  244 



    1 





















  244 


  244 
  244 


  244 



    1 




  244 




  168 

  168 

  166 


  168 
  167 

  168 




















  168 


  168 
  168 

  168 










































  233 


  234 





  234 


  234 



  233 
  233 

  233 




  234 







  234 

  234 





  233 

  234 


  234 






  234 



  234 













  234 


  234 




























































































































































  242 





  242 




  234 




  241 

  241 



























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
// SPDX-License-Identifier: GPL-2.0-only
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 */

#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

static DEFINE_RWLOCK(kernfs_rename_lock);        /* kn->parent and ->name */
/*
 * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
 * call pr_cont() while holding rename_lock. Because sometimes pr_cont()
 * will perform wakeups when releasing console_sem. Holding rename_lock
 * will introduce deadlock if the scheduler reads the kernfs_name in the
 * wakeup path.
 */
static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
static char kernfs_pr_cont_buf[PATH_MAX];        /* protected by pr_cont_lock */
static DEFINE_SPINLOCK(kernfs_idr_lock);        /* root->ino_idr */

#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)

static bool __kernfs_active(struct kernfs_node *kn)
{
        return atomic_read(&kn->active) >= 0;
}

static bool kernfs_active(struct kernfs_node *kn)
{
        lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
        return __kernfs_active(kn);
}

static bool kernfs_lockdep(struct kernfs_node *kn)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
        return kn->flags & KERNFS_LOCKDEP;
#else
        return false;
#endif
}

static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
{
        if (!kn)
                return strscpy(buf, "(null)", buflen);

        return strscpy(buf, kn->parent ? kn->name : "/", buflen);
}

/* kernfs_node_depth - compute depth from @from to @to */
static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
        size_t depth = 0;

        while (to->parent && to != from) {
                depth++;
                to = to->parent;
        }
        return depth;
}

static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
                                                  struct kernfs_node *b)
{
        size_t da, db;
        struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);

        if (ra != rb)
                return NULL;

        da = kernfs_depth(ra->kn, a);
        db = kernfs_depth(rb->kn, b);

        while (da > db) {
                a = a->parent;
                da--;
        }
        while (db > da) {
                b = b->parent;
                db--;
        }

        /* worst case b and a will be the same at root */
        while (b != a) {
                b = b->parent;
                a = a->parent;
        }

        return a;
}

/**
 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
 * where kn_from is treated as root of the path.
 * @kn_from: kernfs node which should be treated as root for the path
 * @kn_to: kernfs node to which path is needed
 * @buf: buffer to copy the path into
 * @buflen: size of @buf
 *
 * We need to handle couple of scenarios here:
 * [1] when @kn_from is an ancestor of @kn_to at some level
 * kn_from: /n1/n2/n3
 * kn_to:   /n1/n2/n3/n4/n5
 * result:  /n4/n5
 *
 * [2] when @kn_from is on a different hierarchy and we need to find common
 * ancestor between @kn_from and @kn_to.
 * kn_from: /n1/n2/n3/n4
 * kn_to:   /n1/n2/n5
 * result:  /../../n5
 * OR
 * kn_from: /n1/n2/n3/n4/n5   [depth=5]
 * kn_to:   /n1/n2/n3         [depth=3]
 * result:  /../..
 *
 * [3] when @kn_to is %NULL result will be "(null)"
 *
 * Return: the length of the constructed path.  If the path would have been
 * greater than @buflen, @buf contains the truncated path with the trailing
 * '\0'.  On error, -errno is returned.
 */
static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
                                        struct kernfs_node *kn_from,
                                        char *buf, size_t buflen)
{
        struct kernfs_node *kn, *common;
        const char parent_str[] = "/..";
        size_t depth_from, depth_to, len = 0;
        ssize_t copied;
        int i, j;

        if (!kn_to)
                return strscpy(buf, "(null)", buflen);

        if (!kn_from)
                kn_from = kernfs_root(kn_to)->kn;

        if (kn_from == kn_to)
                return strscpy(buf, "/", buflen);

        common = kernfs_common_ancestor(kn_from, kn_to);
        if (WARN_ON(!common))
                return -EINVAL;

        depth_to = kernfs_depth(common, kn_to);
        depth_from = kernfs_depth(common, kn_from);

        buf[0] = '\0';

        for (i = 0; i < depth_from; i++) {
                copied = strscpy(buf + len, parent_str, buflen - len);
                if (copied < 0)
                        return copied;
                len += copied;
        }

        /* Calculate how many bytes we need for the rest */
        for (i = depth_to - 1; i >= 0; i--) {
                for (kn = kn_to, j = 0; j < i; j++)
                        kn = kn->parent;

                len += scnprintf(buf + len, buflen - len, "/%s", kn->name);
        }

        return len;
}

/**
 * kernfs_name - obtain the name of a given node
 * @kn: kernfs_node of interest
 * @buf: buffer to copy @kn's name into
 * @buflen: size of @buf
 *
 * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
 * similar to strscpy().
 *
 * Fills buffer with "(null)" if @kn is %NULL.
 *
 * Return: the resulting length of @buf. If @buf isn't long enough,
 * it's filled up to @buflen-1 and nul terminated, and returns -E2BIG.
 *
 * This function can be called from any context.
 */
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{
        unsigned long flags;
        int ret;

        read_lock_irqsave(&kernfs_rename_lock, flags);
        ret = kernfs_name_locked(kn, buf, buflen);
        read_unlock_irqrestore(&kernfs_rename_lock, flags);
        return ret;
}

/**
 * kernfs_path_from_node - build path of node @to relative to @from.
 * @from: parent kernfs_node relative to which we need to build the path
 * @to: kernfs_node of interest
 * @buf: buffer to copy @to's path into
 * @buflen: size of @buf
 *
 * Builds @to's path relative to @from in @buf. @from and @to must
 * be on the same kernfs-root. If @from is not parent of @to, then a relative
 * path (which includes '..'s) as needed to reach from @from to @to is
 * returned.
 *
 * Return: the length of the constructed path.  If the path would have been
 * greater than @buflen, @buf contains the truncated path with the trailing
 * '\0'.  On error, -errno is returned.
 */
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
                          char *buf, size_t buflen)
{
        unsigned long flags;
        int ret;

        read_lock_irqsave(&kernfs_rename_lock, flags);
        ret = kernfs_path_from_node_locked(to, from, buf, buflen);
        read_unlock_irqrestore(&kernfs_rename_lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);

/**
 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
 * @kn: kernfs_node of interest
 *
 * This function can be called from any context.
 */
void pr_cont_kernfs_name(struct kernfs_node *kn)
{
        unsigned long flags;

        spin_lock_irqsave(&kernfs_pr_cont_lock, flags);

        kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
        pr_cont("%s", kernfs_pr_cont_buf);

        spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
}

/**
 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
 * @kn: kernfs_node of interest
 *
 * This function can be called from any context.
 */
void pr_cont_kernfs_path(struct kernfs_node *kn)
{
        unsigned long flags;
        int sz;

        spin_lock_irqsave(&kernfs_pr_cont_lock, flags);

        sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
                                   sizeof(kernfs_pr_cont_buf));
        if (sz < 0) {
                if (sz == -E2BIG)
                        pr_cont("(name too long)");
                else
                        pr_cont("(error)");
                goto out;
        }

        pr_cont("%s", kernfs_pr_cont_buf);

out:
        spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
}

/**
 * kernfs_get_parent - determine the parent node and pin it
 * @kn: kernfs_node of interest
 *
 * Determines @kn's parent, pins and returns it.  This function can be
 * called from any context.
 *
 * Return: parent node of @kn
 */
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
{
        struct kernfs_node *parent;
        unsigned long flags;

        read_lock_irqsave(&kernfs_rename_lock, flags);
        parent = kn->parent;
        kernfs_get(parent);
        read_unlock_irqrestore(&kernfs_rename_lock, flags);

        return parent;
}

/**
 *        kernfs_name_hash - calculate hash of @ns + @name
 *        @name: Null terminated string to hash
 *        @ns:   Namespace tag to hash
 *
 *        Return: 31-bit hash of ns + name (so it fits in an off_t)
 */
static unsigned int kernfs_name_hash(const char *name, const void *ns)
{
        unsigned long hash = init_name_hash(ns);
        unsigned int len = strlen(name);
        while (len--)
                hash = partial_name_hash(*name++, hash);
        hash = end_name_hash(hash);
        hash &= 0x7fffffffU;
        /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
        if (hash < 2)
                hash += 2;
        if (hash >= INT_MAX)
                hash = INT_MAX - 1;
        return hash;
}

static int kernfs_name_compare(unsigned int hash, const char *name,
                               const void *ns, const struct kernfs_node *kn)
{
        if (hash < kn->hash)
                return -1;
        if (hash > kn->hash)
                return 1;
        if (ns < kn->ns)
                return -1;
        if (ns > kn->ns)
                return 1;
        return strcmp(name, kn->name);
}

static int kernfs_sd_compare(const struct kernfs_node *left,
                             const struct kernfs_node *right)
{
        return kernfs_name_compare(left->hash, left->name, left->ns, right);
}

/**
 *        kernfs_link_sibling - link kernfs_node into sibling rbtree
 *        @kn: kernfs_node of interest
 *
 *        Link @kn into its sibling rbtree which starts from
 *        @kn->parent->dir.children.
 *
 *        Locking:
 *        kernfs_rwsem held exclusive
 *
 *        Return:
 *        %0 on success, -EEXIST on failure.
 */
static int kernfs_link_sibling(struct kernfs_node *kn)
{
        struct rb_node **node = &kn->parent->dir.children.rb_node;
        struct rb_node *parent = NULL;

        while (*node) {
                struct kernfs_node *pos;
                int result;

                pos = rb_to_kn(*node);
                parent = *node;
                result = kernfs_sd_compare(kn, pos);
                if (result < 0)
                        node = &pos->rb.rb_left;
                else if (result > 0)
                        node = &pos->rb.rb_right;
                else
                        return -EEXIST;
        }

        /* add new node and rebalance the tree */
        rb_link_node(&kn->rb, parent, node);
        rb_insert_color(&kn->rb, &kn->parent->dir.children);

        /* successfully added, account subdir number */
        down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
        if (kernfs_type(kn) == KERNFS_DIR)
                kn->parent->dir.subdirs++;
        kernfs_inc_rev(kn->parent);
        up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);

        return 0;
}

/**
 *        kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 *        @kn: kernfs_node of interest
 *
 *        Try to unlink @kn from its sibling rbtree which starts from
 *        kn->parent->dir.children.
 *
 *        Return: %true if @kn was actually removed,
 *        %false if @kn wasn't on the rbtree.
 *
 *        Locking:
 *        kernfs_rwsem held exclusive
 */
static bool kernfs_unlink_sibling(struct kernfs_node *kn)
{
        if (RB_EMPTY_NODE(&kn->rb))
                return false;

        down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
        if (kernfs_type(kn) == KERNFS_DIR)
                kn->parent->dir.subdirs--;
        kernfs_inc_rev(kn->parent);
        up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);

        rb_erase(&kn->rb, &kn->parent->dir.children);
        RB_CLEAR_NODE(&kn->rb);
        return true;
}

/**
 *        kernfs_get_active - get an active reference to kernfs_node
 *        @kn: kernfs_node to get an active reference to
 *
 *        Get an active reference of @kn.  This function is noop if @kn
 *        is %NULL.
 *
 *        Return:
 *        Pointer to @kn on success, %NULL on failure.
 */
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
{
        if (unlikely(!kn))
                return NULL;

        if (!atomic_inc_unless_negative(&kn->active))
                return NULL;

        if (kernfs_lockdep(kn))
                rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
        return kn;
}

/**
 *        kernfs_put_active - put an active reference to kernfs_node
 *        @kn: kernfs_node to put an active reference to
 *
 *        Put an active reference to @kn.  This function is noop if @kn
 *        is %NULL.
 */
void kernfs_put_active(struct kernfs_node *kn)
{
        int v;

        if (unlikely(!kn))
                return;

        if (kernfs_lockdep(kn))
                rwsem_release(&kn->dep_map, _RET_IP_);
        v = atomic_dec_return(&kn->active);
        if (likely(v != KN_DEACTIVATED_BIAS))
                return;

        wake_up_all(&kernfs_root(kn)->deactivate_waitq);
}

/**
 * kernfs_drain - drain kernfs_node
 * @kn: kernfs_node to drain
 *
 * Drain existing usages and nuke all existing mmaps of @kn.  Multiple
 * removers may invoke this function concurrently on @kn and all will
 * return after draining is complete.
 */
static void kernfs_drain(struct kernfs_node *kn)
        __releases(&kernfs_root(kn)->kernfs_rwsem)
        __acquires(&kernfs_root(kn)->kernfs_rwsem)
{
        struct kernfs_root *root = kernfs_root(kn);

        lockdep_assert_held_write(&root->kernfs_rwsem);
        WARN_ON_ONCE(kernfs_active(kn));

        /*
         * Skip draining if already fully drained. This avoids draining and its
         * lockdep annotations for nodes which have never been activated
         * allowing embedding kernfs_remove() in create error paths without
         * worrying about draining.
         */
        if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS &&
            !kernfs_should_drain_open_files(kn))
                return;

        up_write(&root->kernfs_rwsem);

        if (kernfs_lockdep(kn)) {
                rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
                if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
                        lock_contended(&kn->dep_map, _RET_IP_);
        }

        wait_event(root->deactivate_waitq,
                   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);

        if (kernfs_lockdep(kn)) {
                lock_acquired(&kn->dep_map, _RET_IP_);
                rwsem_release(&kn->dep_map, _RET_IP_);
        }

        if (kernfs_should_drain_open_files(kn))
                kernfs_drain_open_files(kn);

        down_write(&root->kernfs_rwsem);
}

/**
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
 */
void kernfs_get(struct kernfs_node *kn)
{
        if (kn) {
                WARN_ON(!atomic_read(&kn->count));
                atomic_inc(&kn->count);
        }
}
EXPORT_SYMBOL_GPL(kernfs_get);

static void kernfs_free_rcu(struct rcu_head *rcu)
{
        struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu);

        kfree_const(kn->name);

        if (kn->iattr) {
                simple_xattrs_free(&kn->iattr->xattrs, NULL);
                kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
        }

        kmem_cache_free(kernfs_node_cache, kn);
}

/**
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
 *
 * Put a reference count of @kn and destroy it if it reached zero.
 */
void kernfs_put(struct kernfs_node *kn)
{
        struct kernfs_node *parent;
        struct kernfs_root *root;

        if (!kn || !atomic_dec_and_test(&kn->count))
                return;
        root = kernfs_root(kn);
 repeat:
        /*
         * Moving/renaming is always done while holding reference.
         * kn->parent won't change beneath us.
         */
        parent = kn->parent;

        WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
                  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
                  parent ? parent->name : "", kn->name, atomic_read(&kn->active));

        if (kernfs_type(kn) == KERNFS_LINK)
                kernfs_put(kn->symlink.target_kn);

        spin_lock(&kernfs_idr_lock);
        idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
        spin_unlock(&kernfs_idr_lock);

        call_rcu(&kn->rcu, kernfs_free_rcu);

        kn = parent;
        if (kn) {
                if (atomic_dec_and_test(&kn->count))
                        goto repeat;
        } else {
                /* just released the root kn, free @root too */
                idr_destroy(&root->ino_idr);
                kfree_rcu(root, rcu);
        }
}
EXPORT_SYMBOL_GPL(kernfs_put);

/**
 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
 * @dentry: the dentry in question
 *
 * Return: the kernfs_node associated with @dentry.  If @dentry is not a
 * kernfs one, %NULL is returned.
 *
 * While the returned kernfs_node will stay accessible as long as @dentry
 * is accessible, the returned node can be in any state and the caller is
 * fully responsible for determining what's accessible.
 */
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{
        if (dentry->d_sb->s_op == &kernfs_sops)
                return kernfs_dentry_node(dentry);
        return NULL;
}

static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
                                             struct kernfs_node *parent,
                                             const char *name, umode_t mode,
                                             kuid_t uid, kgid_t gid,
                                             unsigned flags)
{
        struct kernfs_node *kn;
        u32 id_highbits;
        int ret;

        name = kstrdup_const(name, GFP_KERNEL);
        if (!name)
                return NULL;

        kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
        if (!kn)
                goto err_out1;

        idr_preload(GFP_KERNEL);
        spin_lock(&kernfs_idr_lock);
        ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
        if (ret >= 0 && ret < root->last_id_lowbits)
                root->id_highbits++;
        id_highbits = root->id_highbits;
        root->last_id_lowbits = ret;
        spin_unlock(&kernfs_idr_lock);
        idr_preload_end();
        if (ret < 0)
                goto err_out2;

        kn->id = (u64)id_highbits << 32 | ret;

        atomic_set(&kn->count, 1);
        atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
        RB_CLEAR_NODE(&kn->rb);

        kn->name = name;
        kn->mode = mode;
        kn->flags = flags;

        if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
                struct iattr iattr = {
                        .ia_valid = ATTR_UID | ATTR_GID,
                        .ia_uid = uid,
                        .ia_gid = gid,
                };

                ret = __kernfs_setattr(kn, &iattr);
                if (ret < 0)
                        goto err_out3;
        }

        if (parent) {
                ret = security_kernfs_init_security(parent, kn);
                if (ret)
                        goto err_out3;
        }

        return kn;

 err_out3:
        spin_lock(&kernfs_idr_lock);
        idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
        spin_unlock(&kernfs_idr_lock);
 err_out2:
        kmem_cache_free(kernfs_node_cache, kn);
 err_out1:
        kfree_const(name);
        return NULL;
}

struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
                                    const char *name, umode_t mode,
                                    kuid_t uid, kgid_t gid,
                                    unsigned flags)
{
        struct kernfs_node *kn;

        if (parent->mode & S_ISGID) {
                /* this code block imitates inode_init_owner() for
                 * kernfs
                 */

                if (parent->iattr)
                        gid = parent->iattr->ia_gid;

                if (flags & KERNFS_DIR)
                        mode |= S_ISGID;
        }

        kn = __kernfs_new_node(kernfs_root(parent), parent,
                               name, mode, uid, gid, flags);
        if (kn) {
                kernfs_get(parent);
                kn->parent = parent;
        }
        return kn;
}

/*
 * kernfs_find_and_get_node_by_id - get kernfs_node from node id
 * @root: the kernfs root
 * @id: the target node id
 *
 * @id's lower 32bits encode ino and upper gen.  If the gen portion is
 * zero, all generations are matched.
 *
 * Return: %NULL on failure,
 * otherwise a kernfs node with reference counter incremented.
 */
struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
                                                   u64 id)
{
        struct kernfs_node *kn;
        ino_t ino = kernfs_id_ino(id);
        u32 gen = kernfs_id_gen(id);

        rcu_read_lock();

        kn = idr_find(&root->ino_idr, (u32)ino);
        if (!kn)
                goto err_unlock;

        if (sizeof(ino_t) >= sizeof(u64)) {
                /* we looked up with the low 32bits, compare the whole */
                if (kernfs_ino(kn) != ino)
                        goto err_unlock;
        } else {
                /* 0 matches all generations */
                if (unlikely(gen && kernfs_gen(kn) != gen))
                        goto err_unlock;
        }

        /*
         * We should fail if @kn has never been activated and guarantee success
         * if the caller knows that @kn is active. Both can be achieved by
         * __kernfs_active() which tests @kn->active without kernfs_rwsem.
         */
        if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
                goto err_unlock;

        rcu_read_unlock();
        return kn;
err_unlock:
        rcu_read_unlock();
        return NULL;
}

/**
 *        kernfs_add_one - add kernfs_node to parent without warning
 *        @kn: kernfs_node to be added
 *
 *        The caller must already have initialized @kn->parent.  This
 *        function increments nlink of the parent's inode if @kn is a
 *        directory and link into the children list of the parent.
 *
 *        Return:
 *        %0 on success, -EEXIST if entry with the given name already
 *        exists.
 */
int kernfs_add_one(struct kernfs_node *kn)
{
        struct kernfs_node *parent = kn->parent;
        struct kernfs_root *root = kernfs_root(parent);
        struct kernfs_iattrs *ps_iattr;
        bool has_ns;
        int ret;

        down_write(&root->kernfs_rwsem);

        ret = -EINVAL;
        has_ns = kernfs_ns_enabled(parent);
        if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
                 has_ns ? "required" : "invalid", parent->name, kn->name))
                goto out_unlock;

        if (kernfs_type(parent) != KERNFS_DIR)
                goto out_unlock;

        ret = -ENOENT;
        if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
                goto out_unlock;

        kn->hash = kernfs_name_hash(kn->name, kn->ns);

        ret = kernfs_link_sibling(kn);
        if (ret)
                goto out_unlock;

        /* Update timestamps on the parent */
        down_write(&root->kernfs_iattr_rwsem);

        ps_iattr = parent->iattr;
        if (ps_iattr) {
                ktime_get_real_ts64(&ps_iattr->ia_ctime);
                ps_iattr->ia_mtime = ps_iattr->ia_ctime;
        }

        up_write(&root->kernfs_iattr_rwsem);
        up_write(&root->kernfs_rwsem);

        /*
         * Activate the new node unless CREATE_DEACTIVATED is requested.
         * If not activated here, the kernfs user is responsible for
         * activating the node with kernfs_activate().  A node which hasn't
         * been activated is not visible to userland and its removal won't
         * trigger deactivation.
         */
        if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
                kernfs_activate(kn);
        return 0;

out_unlock:
        up_write(&root->kernfs_rwsem);
        return ret;
}

/**
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
 * @name: name to look for
 * @ns: the namespace tag to use
 *
 * Look for kernfs_node with name @name under @parent.
 *
 * Return: pointer to the found kernfs_node on success, %NULL on failure.
 */
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
                                          const unsigned char *name,
                                          const void *ns)
{
        struct rb_node *node = parent->dir.children.rb_node;
        bool has_ns = kernfs_ns_enabled(parent);
        unsigned int hash;

        lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);

        if (has_ns != (bool)ns) {
                WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
                     has_ns ? "required" : "invalid", parent->name, name);
                return NULL;
        }

        hash = kernfs_name_hash(name, ns);
        while (node) {
                struct kernfs_node *kn;
                int result;

                kn = rb_to_kn(node);
                result = kernfs_name_compare(hash, name, ns, kn);
                if (result < 0)
                        node = node->rb_left;
                else if (result > 0)
                        node = node->rb_right;
                else
                        return kn;
        }
        return NULL;
}

static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
                                          const unsigned char *path,
                                          const void *ns)
{
        ssize_t len;
        char *p, *name;

        lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);

        spin_lock_irq(&kernfs_pr_cont_lock);

        len = strscpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));

        if (len < 0) {
                spin_unlock_irq(&kernfs_pr_cont_lock);
                return NULL;
        }

        p = kernfs_pr_cont_buf;

        while ((name = strsep(&p, "/")) && parent) {
                if (*name == '\0')
                        continue;
                parent = kernfs_find_ns(parent, name, ns);
        }

        spin_unlock_irq(&kernfs_pr_cont_lock);

        return parent;
}

/**
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
 * @name: name to look for
 * @ns: the namespace tag to use
 *
 * Look for kernfs_node with name @name under @parent and get a reference
 * if found.  This function may sleep.
 *
 * Return: pointer to the found kernfs_node on success, %NULL on failure.
 */
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
                                           const char *name, const void *ns)
{
        struct kernfs_node *kn;
        struct kernfs_root *root = kernfs_root(parent);

        down_read(&root->kernfs_rwsem);
        kn = kernfs_find_ns(parent, name, ns);
        kernfs_get(kn);
        up_read(&root->kernfs_rwsem);

        return kn;
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

/**
 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
 * @parent: kernfs_node to search under
 * @path: path to look for
 * @ns: the namespace tag to use
 *
 * Look for kernfs_node with path @path under @parent and get a reference
 * if found.  This function may sleep.
 *
 * Return: pointer to the found kernfs_node on success, %NULL on failure.
 */
struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
                                           const char *path, const void *ns)
{
        struct kernfs_node *kn;
        struct kernfs_root *root = kernfs_root(parent);

        down_read(&root->kernfs_rwsem);
        kn = kernfs_walk_ns(parent, path, ns);
        kernfs_get(kn);
        up_read(&root->kernfs_rwsem);

        return kn;
}

/**
 * kernfs_create_root - create a new kernfs hierarchy
 * @scops: optional syscall operations for the hierarchy
 * @flags: KERNFS_ROOT_* flags
 * @priv: opaque data associated with the new directory
 *
 * Return: the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
                                       unsigned int flags, void *priv)
{
        struct kernfs_root *root;
        struct kernfs_node *kn;

        root = kzalloc(sizeof(*root), GFP_KERNEL);
        if (!root)
                return ERR_PTR(-ENOMEM);

        idr_init(&root->ino_idr);
        init_rwsem(&root->kernfs_rwsem);
        init_rwsem(&root->kernfs_iattr_rwsem);
        init_rwsem(&root->kernfs_supers_rwsem);
        INIT_LIST_HEAD(&root->supers);

        /*
         * On 64bit ino setups, id is ino.  On 32bit, low 32bits are ino.
         * High bits generation.  The starting value for both ino and
         * genenration is 1.  Initialize upper 32bit allocation
         * accordingly.
         */
        if (sizeof(ino_t) >= sizeof(u64))
                root->id_highbits = 0;
        else
                root->id_highbits = 1;

        kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
                               KERNFS_DIR);
        if (!kn) {
                idr_destroy(&root->ino_idr);
                kfree(root);
                return ERR_PTR(-ENOMEM);
        }

        kn->priv = priv;
        kn->dir.root = root;

        root->syscall_ops = scops;
        root->flags = flags;
        root->kn = kn;
        init_waitqueue_head(&root->deactivate_waitq);

        if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
                kernfs_activate(kn);

        return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
        /*
         *  kernfs_remove holds kernfs_rwsem from the root so the root
         *  shouldn't be freed during the operation.
         */
        kernfs_get(root->kn);
        kernfs_remove(root->kn);
        kernfs_put(root->kn); /* will also free @root */
}

/**
 * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root
 * @root: root to use to lookup
 *
 * Return: @root's kernfs_node
 */
struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root)
{
        return root->kn;
}

/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
 * @mode: mode of the new directory
 * @uid: uid of the new directory
 * @gid: gid of the new directory
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Return: the created node on success, ERR_PTR() value on failure.
 */
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
                                         const char *name, umode_t mode,
                                         kuid_t uid, kgid_t gid,
                                         void *priv, const void *ns)
{
        struct kernfs_node *kn;
        int rc;

        /* allocate */
        kn = kernfs_new_node(parent, name, mode | S_IFDIR,
                             uid, gid, KERNFS_DIR);
        if (!kn)
                return ERR_PTR(-ENOMEM);

        kn->dir.root = parent->dir.root;
        kn->ns = ns;
        kn->priv = priv;

        /* link in */
        rc = kernfs_add_one(kn);
        if (!rc)
                return kn;

        kernfs_put(kn);
        return ERR_PTR(rc);
}

/**
 * kernfs_create_empty_dir - create an always empty directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
 *
 * Return: the created node on success, ERR_PTR() value on failure.
 */
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
                                            const char *name)
{
        struct kernfs_node *kn;
        int rc;

        /* allocate */
        kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
                             GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
        if (!kn)
                return ERR_PTR(-ENOMEM);

        kn->flags |= KERNFS_EMPTY_DIR;
        kn->dir.root = parent->dir.root;
        kn->ns = NULL;
        kn->priv = NULL;

        /* link in */
        rc = kernfs_add_one(kn);
        if (!rc)
                return kn;

        kernfs_put(kn);
        return ERR_PTR(rc);
}

static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
{
        struct kernfs_node *kn;
        struct kernfs_root *root;

        if (flags & LOOKUP_RCU)
                return -ECHILD;

        /* Negative hashed dentry? */
        if (d_really_is_negative(dentry)) {
                struct kernfs_node *parent;

                /* If the kernfs parent node has changed discard and
                 * proceed to ->lookup.
                 *
                 * There's nothing special needed here when getting the
                 * dentry parent, even if a concurrent rename is in
                 * progress. That's because the dentry is negative so
                 * it can only be the target of the rename and it will
                 * be doing a d_move() not a replace. Consequently the
                 * dentry d_parent won't change over the d_move().
                 *
                 * Also kernfs negative dentries transitioning from
                 * negative to positive during revalidate won't happen
                 * because they are invalidated on containing directory
                 * changes and the lookup re-done so that a new positive
                 * dentry can be properly created.
                 */
                root = kernfs_root_from_sb(dentry->d_sb);
                down_read(&root->kernfs_rwsem);
                parent = kernfs_dentry_node(dentry->d_parent);
                if (parent) {
                        if (kernfs_dir_changed(parent, dentry)) {
                                up_read(&root->kernfs_rwsem);
                                return 0;
                        }
                }
                up_read(&root->kernfs_rwsem);

                /* The kernfs parent node hasn't changed, leave the
                 * dentry negative and return success.
                 */
                return 1;
        }

        kn = kernfs_dentry_node(dentry);
        root = kernfs_root(kn);
        down_read(&root->kernfs_rwsem);

        /* The kernfs node has been deactivated */
        if (!kernfs_active(kn))
                goto out_bad;

        /* The kernfs node has been moved? */
        if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
                goto out_bad;

        /* The kernfs node has been renamed */
        if (strcmp(dentry->d_name.name, kn->name) != 0)
                goto out_bad;

        /* The kernfs node has been moved to a different namespace */
        if (kn->parent && kernfs_ns_enabled(kn->parent) &&
            kernfs_info(dentry->d_sb)->ns != kn->ns)
                goto out_bad;

        up_read(&root->kernfs_rwsem);
        return 1;
out_bad:
        up_read(&root->kernfs_rwsem);
        return 0;
}

const struct dentry_operations kernfs_dops = {
        .d_revalidate        = kernfs_dop_revalidate,
};

static struct dentry *kernfs_iop_lookup(struct inode *dir,
                                        struct dentry *dentry,
                                        unsigned int flags)
{
        struct kernfs_node *parent = dir->i_private;
        struct kernfs_node *kn;
        struct kernfs_root *root;
        struct inode *inode = NULL;
        const void *ns = NULL;

        root = kernfs_root(parent);
        down_read(&root->kernfs_rwsem);
        if (kernfs_ns_enabled(parent))
                ns = kernfs_info(dir->i_sb)->ns;

        kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
        /* attach dentry and inode */
        if (kn) {
                /* Inactive nodes are invisible to the VFS so don't
                 * create a negative.
                 */
                if (!kernfs_active(kn)) {
                        up_read(&root->kernfs_rwsem);
                        return NULL;
                }
                inode = kernfs_get_inode(dir->i_sb, kn);
                if (!inode)
                        inode = ERR_PTR(-ENOMEM);
        }
        /*
         * Needed for negative dentry validation.
         * The negative dentry can be created in kernfs_iop_lookup()
         * or transforms from positive dentry in dentry_unlink_inode()
         * called from vfs_rmdir().
         */
        if (!IS_ERR(inode))
                kernfs_set_rev(parent, dentry);
        up_read(&root->kernfs_rwsem);

        /* instantiate and hash (possibly negative) dentry */
        return d_splice_alias(inode, dentry);
}

static int kernfs_iop_mkdir(struct mnt_idmap *idmap,
                            struct inode *dir, struct dentry *dentry,
                            umode_t mode)
{
        struct kernfs_node *parent = dir->i_private;
        struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
        int ret;

        if (!scops || !scops->mkdir)
                return -EPERM;

        if (!kernfs_get_active(parent))
                return -ENODEV;

        ret = scops->mkdir(parent, dentry->d_name.name, mode);

        kernfs_put_active(parent);
        return ret;
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
        struct kernfs_node *kn  = kernfs_dentry_node(dentry);
        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
        int ret;

        if (!scops || !scops->rmdir)
                return -EPERM;

        if (!kernfs_get_active(kn))
                return -ENODEV;

        ret = scops->rmdir(kn);

        kernfs_put_active(kn);
        return ret;
}

static int kernfs_iop_rename(struct mnt_idmap *idmap,
                             struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry,
                             unsigned int flags)
{
        struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
        struct kernfs_node *new_parent = new_dir->i_private;
        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
        int ret;

        if (flags)
                return -EINVAL;

        if (!scops || !scops->rename)
                return -EPERM;

        if (!kernfs_get_active(kn))
                return -ENODEV;

        if (!kernfs_get_active(new_parent)) {
                kernfs_put_active(kn);
                return -ENODEV;
        }

        ret = scops->rename(kn, new_parent, new_dentry->d_name.name);

        kernfs_put_active(new_parent);
        kernfs_put_active(kn);
        return ret;
}

const struct inode_operations kernfs_dir_iops = {
        .lookup                = kernfs_iop_lookup,
        .permission        = kernfs_iop_permission,
        .setattr        = kernfs_iop_setattr,
        .getattr        = kernfs_iop_getattr,
        .listxattr        = kernfs_iop_listxattr,

        .mkdir                = kernfs_iop_mkdir,
        .rmdir                = kernfs_iop_rmdir,
        .rename                = kernfs_iop_rename,
};

static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
{
        struct kernfs_node *last;

        while (true) {
                struct rb_node *rbn;

                last = pos;

                if (kernfs_type(pos) != KERNFS_DIR)
                        break;

                rbn = rb_first(&pos->dir.children);
                if (!rbn)
                        break;

                pos = rb_to_kn(rbn);
        }

        return last;
}

/**
 * kernfs_next_descendant_post - find the next descendant for post-order walk
 * @pos: the current position (%NULL to initiate traversal)
 * @root: kernfs_node whose descendants to walk
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 *
 * Return: the next descendant to visit or %NULL when done.
 */
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
                                                       struct kernfs_node *root)
{
        struct rb_node *rbn;

        lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);

        /* if first iteration, visit leftmost descendant which may be root */
        if (!pos)
                return kernfs_leftmost_descendant(root);

        /* if we visited @root, we're done */
        if (pos == root)
                return NULL;

        /* if there's an unvisited sibling, visit its leftmost descendant */
        rbn = rb_next(&pos->rb);
        if (rbn)
                return kernfs_leftmost_descendant(rb_to_kn(rbn));

        /* no sibling left, visit parent */
        return pos->parent;
}

static void kernfs_activate_one(struct kernfs_node *kn)
{
        lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);

        kn->flags |= KERNFS_ACTIVATED;

        if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
                return;

        WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
        WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);

        atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
}

/**
 * kernfs_activate - activate a node which started deactivated
 * @kn: kernfs_node whose subtree is to be activated
 *
 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
 * needs to be explicitly activated.  A node which hasn't been activated
 * isn't visible to userland and deactivation is skipped during its
 * removal.  This is useful to construct atomic init sequences where
 * creation of multiple nodes should either succeed or fail atomically.
 *
 * The caller is responsible for ensuring that this function is not called
 * after kernfs_remove*() is invoked on @kn.
 */
void kernfs_activate(struct kernfs_node *kn)
{
        struct kernfs_node *pos;
        struct kernfs_root *root = kernfs_root(kn);

        down_write(&root->kernfs_rwsem);

        pos = NULL;
        while ((pos = kernfs_next_descendant_post(pos, kn)))
                kernfs_activate_one(pos);

        up_write(&root->kernfs_rwsem);
}

/**
 * kernfs_show - show or hide a node
 * @kn: kernfs_node to show or hide
 * @show: whether to show or hide
 *
 * If @show is %false, @kn is marked hidden and deactivated. A hidden node is
 * ignored in future activaitons. If %true, the mark is removed and activation
 * state is restored. This function won't implicitly activate a new node in a
 * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet.
 *
 * To avoid recursion complexities, directories aren't supported for now.
 */
void kernfs_show(struct kernfs_node *kn, bool show)
{
        struct kernfs_root *root = kernfs_root(kn);

        if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR))
                return;

        down_write(&root->kernfs_rwsem);

        if (show) {
                kn->flags &= ~KERNFS_HIDDEN;
                if (kn->flags & KERNFS_ACTIVATED)
                        kernfs_activate_one(kn);
        } else {
                kn->flags |= KERNFS_HIDDEN;
                if (kernfs_active(kn))
                        atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
                kernfs_drain(kn);
        }

        up_write(&root->kernfs_rwsem);
}

static void __kernfs_remove(struct kernfs_node *kn)
{
        struct kernfs_node *pos;

        /* Short-circuit if non-root @kn has already finished removal. */
        if (!kn)
                return;

        lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);

        /*
         * This is for kernfs_remove_self() which plays with active ref
         * after removal.
         */
        if (kn->parent && RB_EMPTY_NODE(&kn->rb))
                return;

        pr_debug("kernfs %s: removing\n", kn->name);

        /* prevent new usage by marking all nodes removing and deactivating */
        pos = NULL;
        while ((pos = kernfs_next_descendant_post(pos, kn))) {
                pos->flags |= KERNFS_REMOVING;
                if (kernfs_active(pos))
                        atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
        }

        /* deactivate and unlink the subtree node-by-node */
        do {
                pos = kernfs_leftmost_descendant(kn);

                /*
                 * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's
                 * base ref could have been put by someone else by the time
                 * the function returns.  Make sure it doesn't go away
                 * underneath us.
                 */
                kernfs_get(pos);

                kernfs_drain(pos);

                /*
                 * kernfs_unlink_sibling() succeeds once per node.  Use it
                 * to decide who's responsible for cleanups.
                 */
                if (!pos->parent || kernfs_unlink_sibling(pos)) {
                        struct kernfs_iattrs *ps_iattr =
                                pos->parent ? pos->parent->iattr : NULL;

                        /* update timestamps on the parent */
                        down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);

                        if (ps_iattr) {
                                ktime_get_real_ts64(&ps_iattr->ia_ctime);
                                ps_iattr->ia_mtime = ps_iattr->ia_ctime;
                        }

                        up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
                        kernfs_put(pos);
                }

                kernfs_put(pos);
        } while (pos != kn);
}

/**
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
 *
 * Remove @kn along with all its subdirectories and files.
 */
void kernfs_remove(struct kernfs_node *kn)
{
        struct kernfs_root *root;

        if (!kn)
                return;

        root = kernfs_root(kn);

        down_write(&root->kernfs_rwsem);
        __kernfs_remove(kn);
        up_write(&root->kernfs_rwsem);
}

/**
 * kernfs_break_active_protection - break out of active protection
 * @kn: the self kernfs_node
 *
 * The caller must be running off of a kernfs operation which is invoked
 * with an active reference - e.g. one of kernfs_ops.  Each invocation of
 * this function must also be matched with an invocation of
 * kernfs_unbreak_active_protection().
 *
 * This function releases the active reference of @kn the caller is
 * holding.  Once this function is called, @kn may be removed at any point
 * and the caller is solely responsible for ensuring that the objects it
 * dereferences are accessible.
 */
void kernfs_break_active_protection(struct kernfs_node *kn)
{
        /*
         * Take out ourself out of the active ref dependency chain.  If
         * we're called without an active ref, lockdep will complain.
         */
        kernfs_put_active(kn);
}

/**
 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
 * @kn: the self kernfs_node
 *
 * If kernfs_break_active_protection() was called, this function must be
 * invoked before finishing the kernfs operation.  Note that while this
 * function restores the active reference, it doesn't and can't actually
 * restore the active protection - @kn may already or be in the process of
 * being removed.  Once kernfs_break_active_protection() is invoked, that
 * protection is irreversibly gone for the kernfs operation instance.
 *
 * While this function may be called at any point after
 * kernfs_break_active_protection() is invoked, its most useful location
 * would be right before the enclosing kernfs operation returns.
 */
void kernfs_unbreak_active_protection(struct kernfs_node *kn)
{
        /*
         * @kn->active could be in any state; however, the increment we do
         * here will be undone as soon as the enclosing kernfs operation
         * finishes and this temporary bump can't break anything.  If @kn
         * is alive, nothing changes.  If @kn is being deactivated, the
         * soon-to-follow put will either finish deactivation or restore
         * deactivated state.  If @kn is already removed, the temporary
         * bump is guaranteed to be gone before @kn is released.
         */
        atomic_inc(&kn->active);
        if (kernfs_lockdep(kn))
                rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
}

/**
 * kernfs_remove_self - remove a kernfs_node from its own method
 * @kn: the self kernfs_node to remove
 *
 * The caller must be running off of a kernfs operation which is invoked
 * with an active reference - e.g. one of kernfs_ops.  This can be used to
 * implement a file operation which deletes itself.
 *
 * For example, the "delete" file for a sysfs device directory can be
 * implemented by invoking kernfs_remove_self() on the "delete" file
 * itself.  This function breaks the circular dependency of trying to
 * deactivate self while holding an active ref itself.  It isn't necessary
 * to modify the usual removal path to use kernfs_remove_self().  The
 * "delete" implementation can simply invoke kernfs_remove_self() on self
 * before proceeding with the usual removal path.  kernfs will ignore later
 * kernfs_remove() on self.
 *
 * kernfs_remove_self() can be called multiple times concurrently on the
 * same kernfs_node.  Only the first one actually performs removal and
 * returns %true.  All others will wait until the kernfs operation which
 * won self-removal finishes and return %false.  Note that the losers wait
 * for the completion of not only the winning kernfs_remove_self() but also
 * the whole kernfs_ops which won the arbitration.  This can be used to
 * guarantee, for example, all concurrent writes to a "delete" file to
 * finish only after the whole operation is complete.
 *
 * Return: %true if @kn is removed by this call, otherwise %false.
 */
bool kernfs_remove_self(struct kernfs_node *kn)
{
        bool ret;
        struct kernfs_root *root = kernfs_root(kn);

        down_write(&root->kernfs_rwsem);
        kernfs_break_active_protection(kn);

        /*
         * SUICIDAL is used to arbitrate among competing invocations.  Only
         * the first one will actually perform removal.  When the removal
         * is complete, SUICIDED is set and the active ref is restored
         * while kernfs_rwsem for held exclusive.  The ones which lost
         * arbitration waits for SUICIDED && drained which can happen only
         * after the enclosing kernfs operation which executed the winning
         * instance of kernfs_remove_self() finished.
         */
        if (!(kn->flags & KERNFS_SUICIDAL)) {
                kn->flags |= KERNFS_SUICIDAL;
                __kernfs_remove(kn);
                kn->flags |= KERNFS_SUICIDED;
                ret = true;
        } else {
                wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
                DEFINE_WAIT(wait);

                while (true) {
                        prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);

                        if ((kn->flags & KERNFS_SUICIDED) &&
                            atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
                                break;

                        up_write(&root->kernfs_rwsem);
                        schedule();
                        down_write(&root->kernfs_rwsem);
                }
                finish_wait(waitq, &wait);
                WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
                ret = false;
        }

        /*
         * This must be done while kernfs_rwsem held exclusive; otherwise,
         * waiting for SUICIDED && deactivated could finish prematurely.
         */
        kernfs_unbreak_active_protection(kn);

        up_write(&root->kernfs_rwsem);
        return ret;
}

/**
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
 *
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 *
 * Return: %0 on success, -ENOENT if such entry doesn't exist.
 */
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
                             const void *ns)
{
        struct kernfs_node *kn;
        struct kernfs_root *root;

        if (!parent) {
                WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
                        name);
                return -ENOENT;
        }

        root = kernfs_root(parent);
        down_write(&root->kernfs_rwsem);

        kn = kernfs_find_ns(parent, name, ns);
        if (kn) {
                kernfs_get(kn);
                __kernfs_remove(kn);
                kernfs_put(kn);
        }

        up_write(&root->kernfs_rwsem);

        if (kn)
                return 0;
        else
                return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
 * @kn: target node
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 *
 * Return: %0 on success, -errno on failure.
 */
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
                     const char *new_name, const void *new_ns)
{
        struct kernfs_node *old_parent;
        struct kernfs_root *root;
        const char *old_name = NULL;
        int error;

        /* can't move or rename root */
        if (!kn->parent)
                return -EINVAL;

        root = kernfs_root(kn);
        down_write(&root->kernfs_rwsem);

        error = -ENOENT;
        if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
            (new_parent->flags & KERNFS_EMPTY_DIR))
                goto out;

        error = 0;
        if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
            (strcmp(kn->name, new_name) == 0))
                goto out;        /* nothing to rename */

        error = -EEXIST;
        if (kernfs_find_ns(new_parent, new_name, new_ns))
                goto out;

        /* rename kernfs_node */
        if (strcmp(kn->name, new_name) != 0) {
                error = -ENOMEM;
                new_name = kstrdup_const(new_name, GFP_KERNEL);
                if (!new_name)
                        goto out;
        } else {
                new_name = NULL;
        }

        /*
         * Move to the appropriate place in the appropriate directories rbtree.
         */
        kernfs_unlink_sibling(kn);
        kernfs_get(new_parent);

        /* rename_lock protects ->parent and ->name accessors */
        write_lock_irq(&kernfs_rename_lock);

        old_parent = kn->parent;
        kn->parent = new_parent;

        kn->ns = new_ns;
        if (new_name) {
                old_name = kn->name;
                kn->name = new_name;
        }

        write_unlock_irq(&kernfs_rename_lock);

        kn->hash = kernfs_name_hash(kn->name, kn->ns);
        kernfs_link_sibling(kn);

        kernfs_put(old_parent);
        kfree_const(old_name);

        error = 0;
 out:
        up_write(&root->kernfs_rwsem);
        return error;
}

static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
{
        kernfs_put(filp->private_data);
        return 0;
}

static struct kernfs_node *kernfs_dir_pos(const void *ns,
        struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
{
        if (pos) {
                int valid = kernfs_active(pos) &&
                        pos->parent == parent && hash == pos->hash;
                kernfs_put(pos);
                if (!valid)
                        pos = NULL;
        }
        if (!pos && (hash > 1) && (hash < INT_MAX)) {
                struct rb_node *node = parent->dir.children.rb_node;
                while (node) {
                        pos = rb_to_kn(node);

                        if (hash < pos->hash)
                                node = node->rb_left;
                        else if (hash > pos->hash)
                                node = node->rb_right;
                        else
                                break;
                }
        }
        /* Skip over entries which are dying/dead or in the wrong namespace */
        while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
                struct rb_node *node = rb_next(&pos->rb);
                if (!node)
                        pos = NULL;
                else
                        pos = rb_to_kn(node);
        }
        return pos;
}

static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
        struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
{
        pos = kernfs_dir_pos(ns, parent, ino, pos);
        if (pos) {
                do {
                        struct rb_node *node = rb_next(&pos->rb);
                        if (!node)
                                pos = NULL;
                        else
                                pos = rb_to_kn(node);
                } while (pos && (!kernfs_active(pos) || pos->ns != ns));
        }
        return pos;
}

static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
{
        struct dentry *dentry = file->f_path.dentry;
        struct kernfs_node *parent = kernfs_dentry_node(dentry);
        struct kernfs_node *pos = file->private_data;
        struct kernfs_root *root;
        const void *ns = NULL;

        if (!dir_emit_dots(file, ctx))
                return 0;

        root = kernfs_root(parent);
        down_read(&root->kernfs_rwsem);

        if (kernfs_ns_enabled(parent))
                ns = kernfs_info(dentry->d_sb)->ns;

        for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
             pos;
             pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
                const char *name = pos->name;
                unsigned int type = fs_umode_to_dtype(pos->mode);
                int len = strlen(name);
                ino_t ino = kernfs_ino(pos);

                ctx->pos = pos->hash;
                file->private_data = pos;
                kernfs_get(pos);

                up_read(&root->kernfs_rwsem);
                if (!dir_emit(ctx, name, len, ino, type))
                        return 0;
                down_read(&root->kernfs_rwsem);
        }
        up_read(&root->kernfs_rwsem);
        file->private_data = NULL;
        ctx->pos = INT_MAX;
        return 0;
}

const struct file_operations kernfs_dir_fops = {
        .read                = generic_read_dir,
        .iterate_shared        = kernfs_fop_readdir,
        .release        = kernfs_dir_fop_release,
        .llseek                = generic_file_llseek,
};








































































    9 





    9 







    9 





    9 




    9 



    9 









    9 







    9 


    9 



























    9 









    9 
    9 
    9 












    9 

    9 
    9 

















































































































































































































































    9 
    9 

    9 



    9 
    9 







    9 

    9 




    9 



    9 
    9 


    9 














































































































































































































































































    9 

    9 
    9 









    9 




    9 


    9 

    9 
    9 















    9 

    9 









    9 


































































































































    9 





    9 




    9 







    9 










































    9 










    9 

    9 


    9 


    9 


    9 


    9 




    9 







    9 





























    9 





























    9 





    9 










    9 


    9 


    9 






    9 






    9 












    9 






    9 
    9 






















    9 





















    9 
    9 















    9 

    9 

































    9 


    9 


    9 



    2 
    2 




    2 


    2 







    9 



















































































    9 




    9 
    9 










    9 


    9 



    9 


























































































































































































































































































































    2 


































































    6 





















































































































































































































































































































































































































































































































































































































































































































































































































































    9 






























































































    9 





















    9 





    1 

    9 
    9 

    9 







    9 


    9 







































    9 



    9 












    9 



    9 














    9 











    9 
    9 
    9 






    9 
    9 





    9 






    9 

    9 







































































































    9 


    9 






















    9 




    9 
    9 

    9 





    9 


    9 
    9 



























































































































    9 























































































































































































































































    7 


    9 
    7 
    7 
    7 




    7 


    7 















    2 

    2 

    2 


    9 




    9 




    9 


    9 













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  linux/kernel/signal.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
 *
 *  2003-06-02  Jim Houston - Concurrent Computer Corp.
 *                Changes to use preallocated sigqueue structures
 *                to allow signals to be sent reliably.
 */

#include <linux/slab.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
#include <linux/sched/user.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/signalfd.h>
#include <linux/ratelimit.h>
#include <linux/task_work.h>
#include <linux/capability.h>
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/uprobes.h>
#include <linux/compat.h>
#include <linux/cn_proc.h>
#include <linux/compiler.h>
#include <linux/posix-timers.h>
#include <linux/cgroup.h>
#include <linux/audit.h>
#include <linux/sysctl.h>
#include <uapi/linux/pidfd.h>

#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>

#include <asm/param.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/siginfo.h>
#include <asm/cacheflush.h>
#include <asm/syscall.h>        /* for syscall_get_* */

/*
 * SLAB caches for signal bits.
 */

static struct kmem_cache *sigqueue_cachep;

int print_fatal_signals __read_mostly;

static void __user *sig_handler(struct task_struct *t, int sig)
{
        return t->sighand->action[sig - 1].sa.sa_handler;
}

static inline bool sig_handler_ignored(void __user *handler, int sig)
{
        /* Is it explicitly or implicitly ignored? */
        return handler == SIG_IGN ||
               (handler == SIG_DFL && sig_kernel_ignore(sig));
}

static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
{
        void __user *handler;

        handler = sig_handler(t, sig);

        /* SIGKILL and SIGSTOP may not be sent to the global init */
        if (unlikely(is_global_init(t) && sig_kernel_only(sig)))
                return true;

        if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
            handler == SIG_DFL && !(force && sig_kernel_only(sig)))
                return true;

        /* Only allow kernel generated signals to this kthread */
        if (unlikely((t->flags & PF_KTHREAD) &&
                     (handler == SIG_KTHREAD_KERNEL) && !force))
                return true;

        return sig_handler_ignored(handler, sig);
}

static bool sig_ignored(struct task_struct *t, int sig, bool force)
{
        /*
         * Blocked signals are never ignored, since the
         * signal handler may change by the time it is
         * unblocked.
         */
        if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
                return false;

        /*
         * Tracers may want to know about even ignored signal unless it
         * is SIGKILL which can't be reported anyway but can be ignored
         * by SIGNAL_UNKILLABLE task.
         */
        if (t->ptrace && sig != SIGKILL)
                return false;

        return sig_task_ignored(t, sig, force);
}

/*
 * Re-calculate pending state from the set of locally pending
 * signals, globally pending signals, and blocked signals.
 */
static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
{
        unsigned long ready;
        long i;

        switch (_NSIG_WORDS) {
        default:
                for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
                        ready |= signal->sig[i] &~ blocked->sig[i];
                break;

        case 4: ready  = signal->sig[3] &~ blocked->sig[3];
                ready |= signal->sig[2] &~ blocked->sig[2];
                ready |= signal->sig[1] &~ blocked->sig[1];
                ready |= signal->sig[0] &~ blocked->sig[0];
                break;

        case 2: ready  = signal->sig[1] &~ blocked->sig[1];
                ready |= signal->sig[0] &~ blocked->sig[0];
                break;

        case 1: ready  = signal->sig[0] &~ blocked->sig[0];
        }
        return ready !=        0;
}

#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))

static bool recalc_sigpending_tsk(struct task_struct *t)
{
        if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
            PENDING(&t->pending, &t->blocked) ||
            PENDING(&t->signal->shared_pending, &t->blocked) ||
            cgroup_task_frozen(t)) {
                set_tsk_thread_flag(t, TIF_SIGPENDING);
                return true;
        }

        /*
         * We must never clear the flag in another thread, or in current
         * when it's possible the current syscall is returning -ERESTART*.
         * So we don't clear it here, and only callers who know they should do.
         */
        return false;
}

void recalc_sigpending(void)
{
        if (!recalc_sigpending_tsk(current) && !freezing(current))
                clear_thread_flag(TIF_SIGPENDING);

}
EXPORT_SYMBOL(recalc_sigpending);

void calculate_sigpending(void)
{
        /* Have any signals or users of TIF_SIGPENDING been delayed
         * until after fork?
         */
        spin_lock_irq(&current->sighand->siglock);
        set_tsk_thread_flag(current, TIF_SIGPENDING);
        recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
}

/* Given the mask, find the first available signal that should be serviced. */

#define SYNCHRONOUS_MASK \
        (sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \
         sigmask(SIGTRAP) | sigmask(SIGFPE) | sigmask(SIGSYS))

int next_signal(struct sigpending *pending, sigset_t *mask)
{
        unsigned long i, *s, *m, x;
        int sig = 0;

        s = pending->signal.sig;
        m = mask->sig;

        /*
         * Handle the first word specially: it contains the
         * synchronous signals that need to be dequeued first.
         */
        x = *s &~ *m;
        if (x) {
                if (x & SYNCHRONOUS_MASK)
                        x &= SYNCHRONOUS_MASK;
                sig = ffz(~x) + 1;
                return sig;
        }

        switch (_NSIG_WORDS) {
        default:
                for (i = 1; i < _NSIG_WORDS; ++i) {
                        x = *++s &~ *++m;
                        if (!x)
                                continue;
                        sig = ffz(~x) + i*_NSIG_BPW + 1;
                        break;
                }
                break;

        case 2:
                x = s[1] &~ m[1];
                if (!x)
                        break;
                sig = ffz(~x) + _NSIG_BPW + 1;
                break;

        case 1:
                /* Nothing to do */
                break;
        }

        return sig;
}

static inline void print_dropped_signal(int sig)
{
        static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);

        if (!print_fatal_signals)
                return;

        if (!__ratelimit(&ratelimit_state))
                return;

        pr_info("%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n",
                                current->comm, current->pid, sig);
}

/**
 * task_set_jobctl_pending - set jobctl pending bits
 * @task: target task
 * @mask: pending bits to set
 *
 * Clear @mask from @task->jobctl.  @mask must be subset of
 * %JOBCTL_PENDING_MASK | %JOBCTL_STOP_CONSUME | %JOBCTL_STOP_SIGMASK |
 * %JOBCTL_TRAPPING.  If stop signo is being set, the existing signo is
 * cleared.  If @task is already being killed or exiting, this function
 * becomes noop.
 *
 * CONTEXT:
 * Must be called with @task->sighand->siglock held.
 *
 * RETURNS:
 * %true if @mask is set, %false if made noop because @task was dying.
 */
bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask)
{
        BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME |
                        JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING));
        BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK));

        if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING)))
                return false;

        if (mask & JOBCTL_STOP_SIGMASK)
                task->jobctl &= ~JOBCTL_STOP_SIGMASK;

        task->jobctl |= mask;
        return true;
}

/**
 * task_clear_jobctl_trapping - clear jobctl trapping bit
 * @task: target task
 *
 * If JOBCTL_TRAPPING is set, a ptracer is waiting for us to enter TRACED.
 * Clear it and wake up the ptracer.  Note that we don't need any further
 * locking.  @task->siglock guarantees that @task->parent points to the
 * ptracer.
 *
 * CONTEXT:
 * Must be called with @task->sighand->siglock held.
 */
void task_clear_jobctl_trapping(struct task_struct *task)
{
        if (unlikely(task->jobctl & JOBCTL_TRAPPING)) {
                task->jobctl &= ~JOBCTL_TRAPPING;
                smp_mb();        /* advised by wake_up_bit() */
                wake_up_bit(&task->jobctl, JOBCTL_TRAPPING_BIT);
        }
}

/**
 * task_clear_jobctl_pending - clear jobctl pending bits
 * @task: target task
 * @mask: pending bits to clear
 *
 * Clear @mask from @task->jobctl.  @mask must be subset of
 * %JOBCTL_PENDING_MASK.  If %JOBCTL_STOP_PENDING is being cleared, other
 * STOP bits are cleared together.
 *
 * If clearing of @mask leaves no stop or trap pending, this function calls
 * task_clear_jobctl_trapping().
 *
 * CONTEXT:
 * Must be called with @task->sighand->siglock held.
 */
void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask)
{
        BUG_ON(mask & ~JOBCTL_PENDING_MASK);

        if (mask & JOBCTL_STOP_PENDING)
                mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED;

        task->jobctl &= ~mask;

        if (!(task->jobctl & JOBCTL_PENDING_MASK))
                task_clear_jobctl_trapping(task);
}

/**
 * task_participate_group_stop - participate in a group stop
 * @task: task participating in a group stop
 *
 * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop.
 * Group stop states are cleared and the group stop count is consumed if
 * %JOBCTL_STOP_CONSUME was set.  If the consumption completes the group
 * stop, the appropriate `SIGNAL_*` flags are set.
 *
 * CONTEXT:
 * Must be called with @task->sighand->siglock held.
 *
 * RETURNS:
 * %true if group stop completion should be notified to the parent, %false
 * otherwise.
 */
static bool task_participate_group_stop(struct task_struct *task)
{
        struct signal_struct *sig = task->signal;
        bool consume = task->jobctl & JOBCTL_STOP_CONSUME;

        WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING));

        task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING);

        if (!consume)
                return false;

        if (!WARN_ON_ONCE(sig->group_stop_count == 0))
                sig->group_stop_count--;

        /*
         * Tell the caller to notify completion iff we are entering into a
         * fresh group stop.  Read comment in do_signal_stop() for details.
         */
        if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
                signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
                return true;
        }
        return false;
}

void task_join_group_stop(struct task_struct *task)
{
        unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK;
        struct signal_struct *sig = current->signal;

        if (sig->group_stop_count) {
                sig->group_stop_count++;
                mask |= JOBCTL_STOP_CONSUME;
        } else if (!(sig->flags & SIGNAL_STOP_STOPPED))
                return;

        /* Have the new thread join an on-going signal group stop */
        task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}

/*
 * allocate a new signal queue record
 * - this may be called without locks if and only if t == current, otherwise an
 *   appropriate lock must be held to stop the target task from exiting
 */
static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
                 int override_rlimit, const unsigned int sigqueue_flags)
{
        struct sigqueue *q = NULL;
        struct ucounts *ucounts;
        long sigpending;

        /*
         * Protect access to @t credentials. This can go away when all
         * callers hold rcu read lock.
         *
         * NOTE! A pending signal will hold on to the user refcount,
         * and we get/put the refcount only when the sigpending count
         * changes from/to zero.
         */
        rcu_read_lock();
        ucounts = task_ucounts(t);
        sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        rcu_read_unlock();
        if (!sigpending)
                return NULL;

        if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
                q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
        } else {
                print_dropped_signal(sig);
        }

        if (unlikely(q == NULL)) {
                dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        } else {
                INIT_LIST_HEAD(&q->list);
                q->flags = sigqueue_flags;
                q->ucounts = ucounts;
        }
        return q;
}

static void __sigqueue_free(struct sigqueue *q)
{
        if (q->flags & SIGQUEUE_PREALLOC)
                return;
        if (q->ucounts) {
                dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
                q->ucounts = NULL;
        }
        kmem_cache_free(sigqueue_cachep, q);
}

void flush_sigqueue(struct sigpending *queue)
{
        struct sigqueue *q;

        sigemptyset(&queue->signal);
        while (!list_empty(&queue->list)) {
                q = list_entry(queue->list.next, struct sigqueue , list);
                list_del_init(&q->list);
                __sigqueue_free(q);
        }
}

/*
 * Flush all pending signals for this kthread.
 */
void flush_signals(struct task_struct *t)
{
        unsigned long flags;

        spin_lock_irqsave(&t->sighand->siglock, flags);
        clear_tsk_thread_flag(t, TIF_SIGPENDING);
        flush_sigqueue(&t->pending);
        flush_sigqueue(&t->signal->shared_pending);
        spin_unlock_irqrestore(&t->sighand->siglock, flags);
}
EXPORT_SYMBOL(flush_signals);

#ifdef CONFIG_POSIX_TIMERS
static void __flush_itimer_signals(struct sigpending *pending)
{
        sigset_t signal, retain;
        struct sigqueue *q, *n;

        signal = pending->signal;
        sigemptyset(&retain);

        list_for_each_entry_safe(q, n, &pending->list, list) {
                int sig = q->info.si_signo;

                if (likely(q->info.si_code != SI_TIMER)) {
                        sigaddset(&retain, sig);
                } else {
                        sigdelset(&signal, sig);
                        list_del_init(&q->list);
                        __sigqueue_free(q);
                }
        }

        sigorsets(&pending->signal, &signal, &retain);
}

void flush_itimer_signals(void)
{
        struct task_struct *tsk = current;
        unsigned long flags;

        spin_lock_irqsave(&tsk->sighand->siglock, flags);
        __flush_itimer_signals(&tsk->pending);
        __flush_itimer_signals(&tsk->signal->shared_pending);
        spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
}
#endif

void ignore_signals(struct task_struct *t)
{
        int i;

        for (i = 0; i < _NSIG; ++i)
                t->sighand->action[i].sa.sa_handler = SIG_IGN;

        flush_signals(t);
}

/*
 * Flush all handlers for a task.
 */

void
flush_signal_handlers(struct task_struct *t, int force_default)
{
        int i;
        struct k_sigaction *ka = &t->sighand->action[0];
        for (i = _NSIG ; i != 0 ; i--) {
                if (force_default || ka->sa.sa_handler != SIG_IGN)
                        ka->sa.sa_handler = SIG_DFL;
                ka->sa.sa_flags = 0;
#ifdef __ARCH_HAS_SA_RESTORER
                ka->sa.sa_restorer = NULL;
#endif
                sigemptyset(&ka->sa.sa_mask);
                ka++;
        }
}

bool unhandled_signal(struct task_struct *tsk, int sig)
{
        void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
        if (is_global_init(tsk))
                return true;

        if (handler != SIG_IGN && handler != SIG_DFL)
                return false;

        /* If dying, we handle all new signals by ignoring them */
        if (fatal_signal_pending(tsk))
                return false;

        /* if ptraced, let the tracer determine */
        return !tsk->ptrace;
}

static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *info,
                           bool *resched_timer)
{
        struct sigqueue *q, *first = NULL;

        /*
         * Collect the siginfo appropriate to this signal.  Check if
         * there is another siginfo for the same signal.
        */
        list_for_each_entry(q, &list->list, list) {
                if (q->info.si_signo == sig) {
                        if (first)
                                goto still_pending;
                        first = q;
                }
        }

        sigdelset(&list->signal, sig);

        if (first) {
still_pending:
                list_del_init(&first->list);
                copy_siginfo(info, &first->info);

                *resched_timer =
                        (first->flags & SIGQUEUE_PREALLOC) &&
                        (info->si_code == SI_TIMER) &&
                        (info->si_sys_private);

                __sigqueue_free(first);
        } else {
                /*
                 * Ok, it wasn't in the queue.  This must be
                 * a fast-pathed signal or we must have been
                 * out of queue space.  So zero out the info.
                 */
                clear_siginfo(info);
                info->si_signo = sig;
                info->si_errno = 0;
                info->si_code = SI_USER;
                info->si_pid = 0;
                info->si_uid = 0;
        }
}

static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
                        kernel_siginfo_t *info, bool *resched_timer)
{
        int sig = next_signal(pending, mask);

        if (sig)
                collect_signal(sig, pending, info, resched_timer);
        return sig;
}

/*
 * Dequeue a signal and return the element to the caller, which is
 * expected to free it.
 *
 * All callers have to hold the siglock.
 */
int dequeue_signal(struct task_struct *tsk, sigset_t *mask,
                   kernel_siginfo_t *info, enum pid_type *type)
{
        bool resched_timer = false;
        int signr;

        /* We only dequeue private signals from ourselves, we don't let
         * signalfd steal them
         */
        *type = PIDTYPE_PID;
        signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
        if (!signr) {
                *type = PIDTYPE_TGID;
                signr = __dequeue_signal(&tsk->signal->shared_pending,
                                         mask, info, &resched_timer);
#ifdef CONFIG_POSIX_TIMERS
                /*
                 * itimer signal ?
                 *
                 * itimers are process shared and we restart periodic
                 * itimers in the signal delivery path to prevent DoS
                 * attacks in the high resolution timer case. This is
                 * compliant with the old way of self-restarting
                 * itimers, as the SIGALRM is a legacy signal and only
                 * queued once. Changing the restart behaviour to
                 * restart the timer in the signal dequeue path is
                 * reducing the timer noise on heavy loaded !highres
                 * systems too.
                 */
                if (unlikely(signr == SIGALRM)) {
                        struct hrtimer *tmr = &tsk->signal->real_timer;

                        if (!hrtimer_is_queued(tmr) &&
                            tsk->signal->it_real_incr != 0) {
                                hrtimer_forward(tmr, tmr->base->get_time(),
                                                tsk->signal->it_real_incr);
                                hrtimer_restart(tmr);
                        }
                }
#endif
        }

        recalc_sigpending();
        if (!signr)
                return 0;

        if (unlikely(sig_kernel_stop(signr))) {
                /*
                 * Set a marker that we have dequeued a stop signal.  Our
                 * caller might release the siglock and then the pending
                 * stop signal it is about to process is no longer in the
                 * pending bitmasks, but must still be cleared by a SIGCONT
                 * (and overruled by a SIGKILL).  So those cases clear this
                 * shared flag after we've set it.  Note that this flag may
                 * remain set after the signal we return is ignored or
                 * handled.  That doesn't matter because its only purpose
                 * is to alert stop-signal processing code when another
                 * processor has come along and cleared the flag.
                 */
                current->jobctl |= JOBCTL_STOP_DEQUEUED;
        }
#ifdef CONFIG_POSIX_TIMERS
        if (resched_timer) {
                /*
                 * Release the siglock to ensure proper locking order
                 * of timer locks outside of siglocks.  Note, we leave
                 * irqs disabled here, since the posix-timers code is
                 * about to disable them again anyway.
                 */
                spin_unlock(&tsk->sighand->siglock);
                posixtimer_rearm(info);
                spin_lock(&tsk->sighand->siglock);

                /* Don't expose the si_sys_private value to userspace */
                info->si_sys_private = 0;
        }
#endif
        return signr;
}
EXPORT_SYMBOL_GPL(dequeue_signal);

static int dequeue_synchronous_signal(kernel_siginfo_t *info)
{
        struct task_struct *tsk = current;
        struct sigpending *pending = &tsk->pending;
        struct sigqueue *q, *sync = NULL;

        /*
         * Might a synchronous signal be in the queue?
         */
        if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
                return 0;

        /*
         * Return the first synchronous signal in the queue.
         */
        list_for_each_entry(q, &pending->list, list) {
                /* Synchronous signals have a positive si_code */
                if ((q->info.si_code > SI_USER) &&
                    (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
                        sync = q;
                        goto next;
                }
        }
        return 0;
next:
        /*
         * Check if there is another siginfo for the same signal.
         */
        list_for_each_entry_continue(q, &pending->list, list) {
                if (q->info.si_signo == sync->info.si_signo)
                        goto still_pending;
        }

        sigdelset(&pending->signal, sync->info.si_signo);
        recalc_sigpending();
still_pending:
        list_del_init(&sync->list);
        copy_siginfo(info, &sync->info);
        __sigqueue_free(sync);
        return info->si_signo;
}

/*
 * Tell a process that it has a new active signal..
 *
 * NOTE! we rely on the previous spin_lock to
 * lock interrupts for us! We can only be called with
 * "siglock" held, and the local interrupt must
 * have been disabled when that got acquired!
 *
 * No need to set need_resched since signal event passing
 * goes through ->blocked
 */
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
        lockdep_assert_held(&t->sighand->siglock);

        set_tsk_thread_flag(t, TIF_SIGPENDING);

        /*
         * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
         * case. We don't check t->state here because there is a race with it
         * executing another processor and just now entering stopped state.
         * By using wake_up_state, we ensure the process will wake up and
         * handle its death signal.
         */
        if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
                kick_process(t);
}

/*
 * Remove signals in mask from the pending set and queue.
 * Returns 1 if any signals were found.
 *
 * All callers must be holding the siglock.
 */
static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
{
        struct sigqueue *q, *n;
        sigset_t m;

        sigandsets(&m, mask, &s->signal);
        if (sigisemptyset(&m))
                return;

        sigandnsets(&s->signal, &s->signal, mask);
        list_for_each_entry_safe(q, n, &s->list, list) {
                if (sigismember(mask, q->info.si_signo)) {
                        list_del_init(&q->list);
                        __sigqueue_free(q);
                }
        }
}

static inline int is_si_special(const struct kernel_siginfo *info)
{
        return info <= SEND_SIG_PRIV;
}

static inline bool si_fromuser(const struct kernel_siginfo *info)
{
        return info == SEND_SIG_NOINFO ||
                (!is_si_special(info) && SI_FROMUSER(info));
}

/*
 * called with RCU read lock from check_kill_permission()
 */
static bool kill_ok_by_cred(struct task_struct *t)
{
        const struct cred *cred = current_cred();
        const struct cred *tcred = __task_cred(t);

        return uid_eq(cred->euid, tcred->suid) ||
               uid_eq(cred->euid, tcred->uid) ||
               uid_eq(cred->uid, tcred->suid) ||
               uid_eq(cred->uid, tcred->uid) ||
               ns_capable(tcred->user_ns, CAP_KILL);
}

/*
 * Bad permissions for sending the signal
 * - the caller must hold the RCU read lock
 */
static int check_kill_permission(int sig, struct kernel_siginfo *info,
                                 struct task_struct *t)
{
        struct pid *sid;
        int error;

        if (!valid_signal(sig))
                return -EINVAL;

        if (!si_fromuser(info))
                return 0;

        error = audit_signal_info(sig, t); /* Let audit system see the signal */
        if (error)
                return error;

        if (!same_thread_group(current, t) &&
            !kill_ok_by_cred(t)) {
                switch (sig) {
                case SIGCONT:
                        sid = task_session(t);
                        /*
                         * We don't return the error if sid == NULL. The
                         * task was unhashed, the caller must notice this.
                         */
                        if (!sid || sid == task_session(current))
                                break;
                        fallthrough;
                default:
                        return -EPERM;
                }
        }

        return security_task_kill(t, info, sig, NULL);
}

/**
 * ptrace_trap_notify - schedule trap to notify ptracer
 * @t: tracee wanting to notify tracer
 *
 * This function schedules sticky ptrace trap which is cleared on the next
 * TRAP_STOP to notify ptracer of an event.  @t must have been seized by
 * ptracer.
 *
 * If @t is running, STOP trap will be taken.  If trapped for STOP and
 * ptracer is listening for events, tracee is woken up so that it can
 * re-trap for the new event.  If trapped otherwise, STOP trap will be
 * eventually taken without returning to userland after the existing traps
 * are finished by PTRACE_CONT.
 *
 * CONTEXT:
 * Must be called with @task->sighand->siglock held.
 */
static void ptrace_trap_notify(struct task_struct *t)
{
        WARN_ON_ONCE(!(t->ptrace & PT_SEIZED));
        lockdep_assert_held(&t->sighand->siglock);

        task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
        ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
}

/*
 * Handle magic process-wide effects of stop/continue signals. Unlike
 * the signal actions, these happen immediately at signal-generation
 * time regardless of blocking, ignoring, or handling.  This does the
 * actual continuing for SIGCONT, but not the actual stopping for stop
 * signals. The process stop is done as a signal action for SIG_DFL.
 *
 * Returns true if the signal should be actually delivered, otherwise
 * it should be dropped.
 */
static bool prepare_signal(int sig, struct task_struct *p, bool force)
{
        struct signal_struct *signal = p->signal;
        struct task_struct *t;
        sigset_t flush;

        if (signal->flags & SIGNAL_GROUP_EXIT) {
                if (signal->core_state)
                        return sig == SIGKILL;
                /*
                 * The process is in the middle of dying, drop the signal.
                 */
                return false;
        } else if (sig_kernel_stop(sig)) {
                /*
                 * This is a stop signal.  Remove SIGCONT from all queues.
                 */
                siginitset(&flush, sigmask(SIGCONT));
                flush_sigqueue_mask(&flush, &signal->shared_pending);
                for_each_thread(p, t)
                        flush_sigqueue_mask(&flush, &t->pending);
        } else if (sig == SIGCONT) {
                unsigned int why;
                /*
                 * Remove all stop signals from all queues, wake all threads.
                 */
                siginitset(&flush, SIG_KERNEL_STOP_MASK);
                flush_sigqueue_mask(&flush, &signal->shared_pending);
                for_each_thread(p, t) {
                        flush_sigqueue_mask(&flush, &t->pending);
                        task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
                        if (likely(!(t->ptrace & PT_SEIZED))) {
                                t->jobctl &= ~JOBCTL_STOPPED;
                                wake_up_state(t, __TASK_STOPPED);
                        } else
                                ptrace_trap_notify(t);
                }

                /*
                 * Notify the parent with CLD_CONTINUED if we were stopped.
                 *
                 * If we were in the middle of a group stop, we pretend it
                 * was already finished, and then continued. Since SIGCHLD
                 * doesn't queue we report only CLD_STOPPED, as if the next
                 * CLD_CONTINUED was dropped.
                 */
                why = 0;
                if (signal->flags & SIGNAL_STOP_STOPPED)
                        why |= SIGNAL_CLD_CONTINUED;
                else if (signal->group_stop_count)
                        why |= SIGNAL_CLD_STOPPED;

                if (why) {
                        /*
                         * The first thread which returns from do_signal_stop()
                         * will take ->siglock, notice SIGNAL_CLD_MASK, and
                         * notify its parent. See get_signal().
                         */
                        signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
                        signal->group_stop_count = 0;
                        signal->group_exit_code = 0;
                }
        }

        return !sig_ignored(p, sig, force);
}

/*
 * Test if P wants to take SIG.  After we've checked all threads with this,
 * it's equivalent to finding no threads not blocking SIG.  Any threads not
 * blocking SIG were ruled out because they are not running and already
 * have pending signals.  Such threads will dequeue from the shared queue
 * as soon as they're available, so putting the signal on the shared queue
 * will be equivalent to sending it to one such thread.
 */
static inline bool wants_signal(int sig, struct task_struct *p)
{
        if (sigismember(&p->blocked, sig))
                return false;

        if (p->flags & PF_EXITING)
                return false;

        if (sig == SIGKILL)
                return true;

        if (task_is_stopped_or_traced(p))
                return false;

        return task_curr(p) || !task_sigpending(p);
}

static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
{
        struct signal_struct *signal = p->signal;
        struct task_struct *t;

        /*
         * Now find a thread we can wake up to take the signal off the queue.
         *
         * Try the suggested task first (may or may not be the main thread).
         */
        if (wants_signal(sig, p))
                t = p;
        else if ((type == PIDTYPE_PID) || thread_group_empty(p))
                /*
                 * There is just one thread and it does not need to be woken.
                 * It will dequeue unblocked signals before it runs again.
                 */
                return;
        else {
                /*
                 * Otherwise try to find a suitable thread.
                 */
                t = signal->curr_target;
                while (!wants_signal(sig, t)) {
                        t = next_thread(t);
                        if (t == signal->curr_target)
                                /*
                                 * No thread needs to be woken.
                                 * Any eligible threads will see
                                 * the signal in the queue soon.
                                 */
                                return;
                }
                signal->curr_target = t;
        }

        /*
         * Found a killable thread.  If the signal will be fatal,
         * then start taking the whole group down immediately.
         */
        if (sig_fatal(p, sig) &&
            (signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT)) &&
            !sigismember(&t->real_blocked, sig) &&
            (sig == SIGKILL || !p->ptrace)) {
                /*
                 * This signal will be fatal to the whole group.
                 */
                if (!sig_kernel_coredump(sig)) {
                        /*
                         * Start a group exit and wake everybody up.
                         * This way we don't have other threads
                         * running and doing things after a slower
                         * thread has the fatal signal pending.
                         */
                        signal->flags = SIGNAL_GROUP_EXIT;
                        signal->group_exit_code = sig;
                        signal->group_stop_count = 0;
                        __for_each_thread(signal, t) {
                                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                                sigaddset(&t->pending.signal, SIGKILL);
                                signal_wake_up(t, 1);
                        }
                        return;
                }
        }

        /*
         * The signal is already in the shared-pending queue.
         * Tell the chosen thread to wake up and dequeue it.
         */
        signal_wake_up(t, sig == SIGKILL);
        return;
}

static inline bool legacy_queue(struct sigpending *signals, int sig)
{
        return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}

static int __send_signal_locked(int sig, struct kernel_siginfo *info,
                                struct task_struct *t, enum pid_type type, bool force)
{
        struct sigpending *pending;
        struct sigqueue *q;
        int override_rlimit;
        int ret = 0, result;

        lockdep_assert_held(&t->sighand->siglock);

        result = TRACE_SIGNAL_IGNORED;
        if (!prepare_signal(sig, t, force))
                goto ret;

        pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
        /*
         * Short-circuit ignored signals and support queuing
         * exactly one non-rt signal, so that we can get more
         * detailed information about the cause of the signal.
         */
        result = TRACE_SIGNAL_ALREADY_PENDING;
        if (legacy_queue(pending, sig))
                goto ret;

        result = TRACE_SIGNAL_DELIVERED;
        /*
         * Skip useless siginfo allocation for SIGKILL and kernel threads.
         */
        if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
                goto out_set;

        /*
         * Real-time signals must be queued if sent by sigqueue, or
         * some other real-time mechanism.  It is implementation
         * defined whether kill() does so.  We attempt to do so, on
         * the principle of least surprise, but since kill is not
         * allowed to fail with EAGAIN when low on memory we just
         * make sure at least one signal gets delivered and don't
         * pass on the info struct.
         */
        if (sig < SIGRTMIN)
                override_rlimit = (is_si_special(info) || info->si_code >= 0);
        else
                override_rlimit = 0;

        q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit, 0);

        if (q) {
                list_add_tail(&q->list, &pending->list);
                switch ((unsigned long) info) {
                case (unsigned long) SEND_SIG_NOINFO:
                        clear_siginfo(&q->info);
                        q->info.si_signo = sig;
                        q->info.si_errno = 0;
                        q->info.si_code = SI_USER;
                        q->info.si_pid = task_tgid_nr_ns(current,
                                                        task_active_pid_ns(t));
                        rcu_read_lock();
                        q->info.si_uid =
                                from_kuid_munged(task_cred_xxx(t, user_ns),
                                                 current_uid());
                        rcu_read_unlock();
                        break;
                case (unsigned long) SEND_SIG_PRIV:
                        clear_siginfo(&q->info);
                        q->info.si_signo = sig;
                        q->info.si_errno = 0;
                        q->info.si_code = SI_KERNEL;
                        q->info.si_pid = 0;
                        q->info.si_uid = 0;
                        break;
                default:
                        copy_siginfo(&q->info, info);
                        break;
                }
        } else if (!is_si_special(info) &&
                   sig >= SIGRTMIN && info->si_code != SI_USER) {
                /*
                 * Queue overflow, abort.  We may abort if the
                 * signal was rt and sent by user using something
                 * other than kill().
                 */
                result = TRACE_SIGNAL_OVERFLOW_FAIL;
                ret = -EAGAIN;
                goto ret;
        } else {
                /*
                 * This is a silent loss of information.  We still
                 * send the signal, but the *info bits are lost.
                 */
                result = TRACE_SIGNAL_LOSE_INFO;
        }

out_set:
        signalfd_notify(t, sig);
        sigaddset(&pending->signal, sig);

        /* Let multiprocess signals appear after on-going forks */
        if (type > PIDTYPE_TGID) {
                struct multiprocess_signals *delayed;
                hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
                        sigset_t *signal = &delayed->signal;
                        /* Can't queue both a stop and a continue signal */
                        if (sig == SIGCONT)
                                sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
                        else if (sig_kernel_stop(sig))
                                sigdelset(signal, SIGCONT);
                        sigaddset(signal, sig);
                }
        }

        complete_signal(sig, t, type);
ret:
        trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
        return ret;
}

static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
{
        bool ret = false;
        switch (siginfo_layout(info->si_signo, info->si_code)) {
        case SIL_KILL:
        case SIL_CHLD:
        case SIL_RT:
                ret = true;
                break;
        case SIL_TIMER:
        case SIL_POLL:
        case SIL_FAULT:
        case SIL_FAULT_TRAPNO:
        case SIL_FAULT_MCEERR:
        case SIL_FAULT_BNDERR:
        case SIL_FAULT_PKUERR:
        case SIL_FAULT_PERF_EVENT:
        case SIL_SYS:
                ret = false;
                break;
        }
        return ret;
}

int send_signal_locked(int sig, struct kernel_siginfo *info,
                       struct task_struct *t, enum pid_type type)
{
        /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */
        bool force = false;

        if (info == SEND_SIG_NOINFO) {
                /* Force if sent from an ancestor pid namespace */
                force = !task_pid_nr_ns(current, task_active_pid_ns(t));
        } else if (info == SEND_SIG_PRIV) {
                /* Don't ignore kernel generated signals */
                force = true;
        } else if (has_si_pid_and_uid(info)) {
                /* SIGKILL and SIGSTOP is special or has ids */
                struct user_namespace *t_user_ns;

                rcu_read_lock();
                t_user_ns = task_cred_xxx(t, user_ns);
                if (current_user_ns() != t_user_ns) {
                        kuid_t uid = make_kuid(current_user_ns(), info->si_uid);
                        info->si_uid = from_kuid_munged(t_user_ns, uid);
                }
                rcu_read_unlock();

                /* A kernel generated signal? */
                force = (info->si_code == SI_KERNEL);

                /* From an ancestor pid namespace? */
                if (!task_pid_nr_ns(current, task_active_pid_ns(t))) {
                        info->si_pid = 0;
                        force = true;
                }
        }
        return __send_signal_locked(sig, info, t, type, force);
}

static void print_fatal_signal(int signr)
{
        struct pt_regs *regs = task_pt_regs(current);
        struct file *exe_file;

        exe_file = get_task_exe_file(current);
        if (exe_file) {
                pr_info("%pD: %s: potentially unexpected fatal signal %d.\n",
                        exe_file, current->comm, signr);
                fput(exe_file);
        } else {
                pr_info("%s: potentially unexpected fatal signal %d.\n",
                        current->comm, signr);
        }

#if defined(__i386__) && !defined(__arch_um__)
        pr_info("code at %08lx: ", regs->ip);
        {
                int i;
                for (i = 0; i < 16; i++) {
                        unsigned char insn;

                        if (get_user(insn, (unsigned char *)(regs->ip + i)))
                                break;
                        pr_cont("%02x ", insn);
                }
        }
        pr_cont("\n");
#endif
        preempt_disable();
        show_regs(regs);
        preempt_enable();
}

static int __init setup_print_fatal_signals(char *str)
{
        get_option (&str, &print_fatal_signals);

        return 1;
}

__setup("print-fatal-signals=", setup_print_fatal_signals);

int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p,
                        enum pid_type type)
{
        unsigned long flags;
        int ret = -ESRCH;

        if (lock_task_sighand(p, &flags)) {
                ret = send_signal_locked(sig, info, p, type);
                unlock_task_sighand(p, &flags);
        }

        return ret;
}

enum sig_handler {
        HANDLER_CURRENT, /* If reachable use the current handler */
        HANDLER_SIG_DFL, /* Always use SIG_DFL handler semantics */
        HANDLER_EXIT,         /* Only visible as the process exit code */
};

/*
 * Force a signal that the process can't ignore: if necessary
 * we unblock the signal and change any SIG_IGN to SIG_DFL.
 *
 * Note: If we unblock the signal, we always reset it to SIG_DFL,
 * since we do not want to have a signal handler that was blocked
 * be invoked when user space had explicitly blocked it.
 *
 * We don't want to have recursive SIGSEGV's etc, for example,
 * that is why we also clear SIGNAL_UNKILLABLE.
 */
static int
force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
        enum sig_handler handler)
{
        unsigned long int flags;
        int ret, blocked, ignored;
        struct k_sigaction *action;
        int sig = info->si_signo;

        spin_lock_irqsave(&t->sighand->siglock, flags);
        action = &t->sighand->action[sig-1];
        ignored = action->sa.sa_handler == SIG_IGN;
        blocked = sigismember(&t->blocked, sig);
        if (blocked || ignored || (handler != HANDLER_CURRENT)) {
                action->sa.sa_handler = SIG_DFL;
                if (handler == HANDLER_EXIT)
                        action->sa.sa_flags |= SA_IMMUTABLE;
                if (blocked)
                        sigdelset(&t->blocked, sig);
        }
        /*
         * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
         * debugging to leave init killable. But HANDLER_EXIT is always fatal.
         */
        if (action->sa.sa_handler == SIG_DFL &&
            (!t->ptrace || (handler == HANDLER_EXIT)))
                t->signal->flags &= ~SIGNAL_UNKILLABLE;
        ret = send_signal_locked(sig, info, t, PIDTYPE_PID);
        /* This can happen if the signal was already pending and blocked */
        if (!task_sigpending(t))
                signal_wake_up(t, 0);
        spin_unlock_irqrestore(&t->sighand->siglock, flags);

        return ret;
}

int force_sig_info(struct kernel_siginfo *info)
{
        return force_sig_info_to_task(info, current, HANDLER_CURRENT);
}

/*
 * Nuke all other threads in the group.
 */
int zap_other_threads(struct task_struct *p)
{
        struct task_struct *t;
        int count = 0;

        p->signal->group_stop_count = 0;

        for_other_threads(p, t) {
                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                /* Don't require de_thread to wait for the vhost_worker */
                if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)
                        count++;

                /* Don't bother with already dead threads */
                if (t->exit_state)
                        continue;
                sigaddset(&t->pending.signal, SIGKILL);
                signal_wake_up(t, 1);
        }

        return count;
}

struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
                                           unsigned long *flags)
{
        struct sighand_struct *sighand;

        rcu_read_lock();
        for (;;) {
                sighand = rcu_dereference(tsk->sighand);
                if (unlikely(sighand == NULL))
                        break;

                /*
                 * This sighand can be already freed and even reused, but
                 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
                 * initializes ->siglock: this slab can't go away, it has
                 * the same object type, ->siglock can't be reinitialized.
                 *
                 * We need to ensure that tsk->sighand is still the same
                 * after we take the lock, we can race with de_thread() or
                 * __exit_signal(). In the latter case the next iteration
                 * must see ->sighand == NULL.
                 */
                spin_lock_irqsave(&sighand->siglock, *flags);
                if (likely(sighand == rcu_access_pointer(tsk->sighand)))
                        break;
                spin_unlock_irqrestore(&sighand->siglock, *flags);
        }
        rcu_read_unlock();

        return sighand;
}

#ifdef CONFIG_LOCKDEP
void lockdep_assert_task_sighand_held(struct task_struct *task)
{
        struct sighand_struct *sighand;

        rcu_read_lock();
        sighand = rcu_dereference(task->sighand);
        if (sighand)
                lockdep_assert_held(&sighand->siglock);
        else
                WARN_ON_ONCE(1);
        rcu_read_unlock();
}
#endif

/*
 * send signal info to all the members of a thread group or to the
 * individual thread if type == PIDTYPE_PID.
 */
int group_send_sig_info(int sig, struct kernel_siginfo *info,
                        struct task_struct *p, enum pid_type type)
{
        int ret;

        rcu_read_lock();
        ret = check_kill_permission(sig, info, p);
        rcu_read_unlock();

        if (!ret && sig)
                ret = do_send_sig_info(sig, info, p, type);

        return ret;
}

/*
 * __kill_pgrp_info() sends a signal to a process group: this is what the tty
 * control characters do (^C, ^Z etc)
 * - the caller must hold at least a readlock on tasklist_lock
 */
int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
{
        struct task_struct *p = NULL;
        int ret = -ESRCH;

        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
                /*
                 * If group_send_sig_info() succeeds at least once ret
                 * becomes 0 and after that the code below has no effect.
                 * Otherwise we return the last err or -ESRCH if this
                 * process group is empty.
                 */
                if (ret)
                        ret = err;
        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);

        return ret;
}

static int kill_pid_info_type(int sig, struct kernel_siginfo *info,
                                struct pid *pid, enum pid_type type)
{
        int error = -ESRCH;
        struct task_struct *p;

        for (;;) {
                rcu_read_lock();
                p = pid_task(pid, PIDTYPE_PID);
                if (p)
                        error = group_send_sig_info(sig, info, p, type);
                rcu_read_unlock();
                if (likely(!p || error != -ESRCH))
                        return error;
                /*
                 * The task was unhashed in between, try again.  If it
                 * is dead, pid_task() will return NULL, if we race with
                 * de_thread() it will find the new leader.
                 */
        }
}

int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
{
        return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID);
}

static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
{
        int error;
        rcu_read_lock();
        error = kill_pid_info(sig, info, find_vpid(pid));
        rcu_read_unlock();
        return error;
}

static inline bool kill_as_cred_perm(const struct cred *cred,
                                     struct task_struct *target)
{
        const struct cred *pcred = __task_cred(target);

        return uid_eq(cred->euid, pcred->suid) ||
               uid_eq(cred->euid, pcred->uid) ||
               uid_eq(cred->uid, pcred->suid) ||
               uid_eq(cred->uid, pcred->uid);
}

/*
 * The usb asyncio usage of siginfo is wrong.  The glibc support
 * for asyncio which uses SI_ASYNCIO assumes the layout is SIL_RT.
 * AKA after the generic fields:
 *        kernel_pid_t        si_pid;
 *        kernel_uid32_t        si_uid;
 *        sigval_t        si_value;
 *
 * Unfortunately when usb generates SI_ASYNCIO it assumes the layout
 * after the generic fields is:
 *        void __user         *si_addr;
 *
 * This is a practical problem when there is a 64bit big endian kernel
 * and a 32bit userspace.  As the 32bit address will encoded in the low
 * 32bits of the pointer.  Those low 32bits will be stored at higher
 * address than appear in a 32 bit pointer.  So userspace will not
 * see the address it was expecting for it's completions.
 *
 * There is nothing in the encoding that can allow
 * copy_siginfo_to_user32 to detect this confusion of formats, so
 * handle this by requiring the caller of kill_pid_usb_asyncio to
 * notice when this situration takes place and to store the 32bit
 * pointer in sival_int, instead of sival_addr of the sigval_t addr
 * parameter.
 */
int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr,
                         struct pid *pid, const struct cred *cred)
{
        struct kernel_siginfo info;
        struct task_struct *p;
        unsigned long flags;
        int ret = -EINVAL;

        if (!valid_signal(sig))
                return ret;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = errno;
        info.si_code = SI_ASYNCIO;
        *((sigval_t *)&info.si_pid) = addr;

        rcu_read_lock();
        p = pid_task(pid, PIDTYPE_PID);
        if (!p) {
                ret = -ESRCH;
                goto out_unlock;
        }
        if (!kill_as_cred_perm(cred, p)) {
                ret = -EPERM;
                goto out_unlock;
        }
        ret = security_task_kill(p, &info, sig, cred);
        if (ret)
                goto out_unlock;

        if (sig) {
                if (lock_task_sighand(p, &flags)) {
                        ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false);
                        unlock_task_sighand(p, &flags);
                } else
                        ret = -ESRCH;
        }
out_unlock:
        rcu_read_unlock();
        return ret;
}
EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio);

/*
 * kill_something_info() interprets pid in interesting ways just like kill(2).
 *
 * POSIX specifies that kill(-1,sig) is unspecified, but what we have
 * is probably wrong.  Should make it like BSD or SYSV.
 */

static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
{
        int ret;

        if (pid > 0)
                return kill_proc_info(sig, info, pid);

        /* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
        if (pid == INT_MIN)
                return -ESRCH;

        read_lock(&tasklist_lock);
        if (pid != -1) {
                ret = __kill_pgrp_info(sig, info,
                                pid ? find_vpid(-pid) : task_pgrp(current));
        } else {
                int retval = 0, count = 0;
                struct task_struct * p;

                for_each_process(p) {
                        if (task_pid_vnr(p) > 1 &&
                                        !same_thread_group(p, current)) {
                                int err = group_send_sig_info(sig, info, p,
                                                              PIDTYPE_MAX);
                                ++count;
                                if (err != -EPERM)
                                        retval = err;
                        }
                }
                ret = count ? retval : -ESRCH;
        }
        read_unlock(&tasklist_lock);

        return ret;
}

/*
 * These are for backward compatibility with the rest of the kernel source.
 */

int send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p)
{
        /*
         * Make sure legacy kernel users don't send in bad values
         * (normal paths check this in check_kill_permission).
         */
        if (!valid_signal(sig))
                return -EINVAL;

        return do_send_sig_info(sig, info, p, PIDTYPE_PID);
}
EXPORT_SYMBOL(send_sig_info);

#define __si_special(priv) \
        ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)

int
send_sig(int sig, struct task_struct *p, int priv)
{
        return send_sig_info(sig, __si_special(priv), p);
}
EXPORT_SYMBOL(send_sig);

void force_sig(int sig)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code = SI_KERNEL;
        info.si_pid = 0;
        info.si_uid = 0;
        force_sig_info(&info);
}
EXPORT_SYMBOL(force_sig);

void force_fatal_sig(int sig)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code = SI_KERNEL;
        info.si_pid = 0;
        info.si_uid = 0;
        force_sig_info_to_task(&info, current, HANDLER_SIG_DFL);
}

void force_exit_sig(int sig)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code = SI_KERNEL;
        info.si_pid = 0;
        info.si_uid = 0;
        force_sig_info_to_task(&info, current, HANDLER_EXIT);
}

/*
 * When things go south during signal handling, we
 * will force a SIGSEGV. And if the signal that caused
 * the problem was already a SIGSEGV, we'll want to
 * make sure we don't even try to deliver the signal..
 */
void force_sigsegv(int sig)
{
        if (sig == SIGSEGV)
                force_fatal_sig(SIGSEGV);
        else
                force_sig(SIGSEGV);
}

int force_sig_fault_to_task(int sig, int code, void __user *addr,
                            struct task_struct *t)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code  = code;
        info.si_addr  = addr;
        return force_sig_info_to_task(&info, t, HANDLER_CURRENT);
}

int force_sig_fault(int sig, int code, void __user *addr)
{
        return force_sig_fault_to_task(sig, code, addr, current);
}

int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code  = code;
        info.si_addr  = addr;
        return send_sig_info(info.si_signo, &info, t);
}

int force_sig_mceerr(int code, void __user *addr, short lsb)
{
        struct kernel_siginfo info;

        WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
        clear_siginfo(&info);
        info.si_signo = SIGBUS;
        info.si_errno = 0;
        info.si_code = code;
        info.si_addr = addr;
        info.si_addr_lsb = lsb;
        return force_sig_info(&info);
}

int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
{
        struct kernel_siginfo info;

        WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
        clear_siginfo(&info);
        info.si_signo = SIGBUS;
        info.si_errno = 0;
        info.si_code = code;
        info.si_addr = addr;
        info.si_addr_lsb = lsb;
        return send_sig_info(info.si_signo, &info, t);
}
EXPORT_SYMBOL(send_sig_mceerr);

int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = SIGSEGV;
        info.si_errno = 0;
        info.si_code  = SEGV_BNDERR;
        info.si_addr  = addr;
        info.si_lower = lower;
        info.si_upper = upper;
        return force_sig_info(&info);
}

#ifdef SEGV_PKUERR
int force_sig_pkuerr(void __user *addr, u32 pkey)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = SIGSEGV;
        info.si_errno = 0;
        info.si_code  = SEGV_PKUERR;
        info.si_addr  = addr;
        info.si_pkey  = pkey;
        return force_sig_info(&info);
}
#endif

int send_sig_perf(void __user *addr, u32 type, u64 sig_data)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo     = SIGTRAP;
        info.si_errno     = 0;
        info.si_code      = TRAP_PERF;
        info.si_addr      = addr;
        info.si_perf_data = sig_data;
        info.si_perf_type = type;

        /*
         * Signals generated by perf events should not terminate the whole
         * process if SIGTRAP is blocked, however, delivering the signal
         * asynchronously is better than not delivering at all. But tell user
         * space if the signal was asynchronous, so it can clearly be
         * distinguished from normal synchronous ones.
         */
        info.si_perf_flags = sigismember(&current->blocked, info.si_signo) ?
                                     TRAP_PERF_FLAG_ASYNC :
                                     0;

        return send_sig_info(info.si_signo, &info, current);
}

/**
 * force_sig_seccomp - signals the task to allow in-process syscall emulation
 * @syscall: syscall number to send to userland
 * @reason: filter-supplied reason code to send to userland (via si_errno)
 * @force_coredump: true to trigger a coredump
 *
 * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
 */
int force_sig_seccomp(int syscall, int reason, bool force_coredump)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = SIGSYS;
        info.si_code = SYS_SECCOMP;
        info.si_call_addr = (void __user *)KSTK_EIP(current);
        info.si_errno = reason;
        info.si_arch = syscall_get_arch(current);
        info.si_syscall = syscall;
        return force_sig_info_to_task(&info, current,
                force_coredump ? HANDLER_EXIT : HANDLER_CURRENT);
}

/* For the crazy architectures that include trap information in
 * the errno field, instead of an actual errno value.
 */
int force_sig_ptrace_errno_trap(int errno, void __user *addr)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = SIGTRAP;
        info.si_errno = errno;
        info.si_code  = TRAP_HWBKPT;
        info.si_addr  = addr;
        return force_sig_info(&info);
}

/* For the rare architectures that include trap information using
 * si_trapno.
 */
int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code  = code;
        info.si_addr  = addr;
        info.si_trapno = trapno;
        return force_sig_info(&info);
}

/* For the rare architectures that include trap information using
 * si_trapno.
 */
int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno,
                          struct task_struct *t)
{
        struct kernel_siginfo info;

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code  = code;
        info.si_addr  = addr;
        info.si_trapno = trapno;
        return send_sig_info(info.si_signo, &info, t);
}

static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
{
        int ret;
        read_lock(&tasklist_lock);
        ret = __kill_pgrp_info(sig, info, pgrp);
        read_unlock(&tasklist_lock);
        return ret;
}

int kill_pgrp(struct pid *pid, int sig, int priv)
{
        return kill_pgrp_info(sig, __si_special(priv), pid);
}
EXPORT_SYMBOL(kill_pgrp);

int kill_pid(struct pid *pid, int sig, int priv)
{
        return kill_pid_info(sig, __si_special(priv), pid);
}
EXPORT_SYMBOL(kill_pid);

/*
 * These functions support sending signals using preallocated sigqueue
 * structures.  This is needed "because realtime applications cannot
 * afford to lose notifications of asynchronous events, like timer
 * expirations or I/O completions".  In the case of POSIX Timers
 * we allocate the sigqueue structure from the timer_create.  If this
 * allocation fails we are able to report the failure to the application
 * with an EAGAIN error.
 */
struct sigqueue *sigqueue_alloc(void)
{
        return __sigqueue_alloc(-1, current, GFP_KERNEL, 0, SIGQUEUE_PREALLOC);
}

void sigqueue_free(struct sigqueue *q)
{
        unsigned long flags;
        spinlock_t *lock = &current->sighand->siglock;

        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
        /*
         * We must hold ->siglock while testing q->list
         * to serialize with collect_signal() or with
         * __exit_signal()->flush_sigqueue().
         */
        spin_lock_irqsave(lock, flags);
        q->flags &= ~SIGQUEUE_PREALLOC;
        /*
         * If it is queued it will be freed when dequeued,
         * like the "regular" sigqueue.
         */
        if (!list_empty(&q->list))
                q = NULL;
        spin_unlock_irqrestore(lock, flags);

        if (q)
                __sigqueue_free(q);
}

int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
{
        int sig = q->info.si_signo;
        struct sigpending *pending;
        struct task_struct *t;
        unsigned long flags;
        int ret, result;

        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));

        ret = -1;
        rcu_read_lock();

        /*
         * This function is used by POSIX timers to deliver a timer signal.
         * Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID
         * set), the signal must be delivered to the specific thread (queues
         * into t->pending).
         *
         * Where type is not PIDTYPE_PID, signals must be delivered to the
         * process. In this case, prefer to deliver to current if it is in
         * the same thread group as the target process, which avoids
         * unnecessarily waking up a potentially idle task.
         */
        t = pid_task(pid, type);
        if (!t)
                goto ret;
        if (type != PIDTYPE_PID && same_thread_group(t, current))
                t = current;
        if (!likely(lock_task_sighand(t, &flags)))
                goto ret;

        ret = 1; /* the signal is ignored */
        result = TRACE_SIGNAL_IGNORED;
        if (!prepare_signal(sig, t, false))
                goto out;

        ret = 0;
        if (unlikely(!list_empty(&q->list))) {
                /*
                 * If an SI_TIMER entry is already queue just increment
                 * the overrun count.
                 */
                BUG_ON(q->info.si_code != SI_TIMER);
                q->info.si_overrun++;
                result = TRACE_SIGNAL_ALREADY_PENDING;
                goto out;
        }
        q->info.si_overrun = 0;

        signalfd_notify(t, sig);
        pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
        list_add_tail(&q->list, &pending->list);
        sigaddset(&pending->signal, sig);
        complete_signal(sig, t, type);
        result = TRACE_SIGNAL_DELIVERED;
out:
        trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
        unlock_task_sighand(t, &flags);
ret:
        rcu_read_unlock();
        return ret;
}

void do_notify_pidfd(struct task_struct *task)
{
        struct pid *pid = task_pid(task);

        WARN_ON(task->exit_state == 0);

        __wake_up(&pid->wait_pidfd, TASK_NORMAL, 0,
                        poll_to_key(EPOLLIN | EPOLLRDNORM));
}

/*
 * Let a parent know about the death of a child.
 * For a stopped/continued status change, use do_notify_parent_cldstop instead.
 *
 * Returns true if our parent ignored us and so we've switched to
 * self-reaping.
 */
bool do_notify_parent(struct task_struct *tsk, int sig)
{
        struct kernel_siginfo info;
        unsigned long flags;
        struct sighand_struct *psig;
        bool autoreap = false;
        u64 utime, stime;

        WARN_ON_ONCE(sig == -1);

        /* do_notify_parent_cldstop should have been called instead.  */
        WARN_ON_ONCE(task_is_stopped_or_traced(tsk));

        WARN_ON_ONCE(!tsk->ptrace &&
               (tsk->group_leader != tsk || !thread_group_empty(tsk)));
        /*
         * tsk is a group leader and has no threads, wake up the
         * non-PIDFD_THREAD waiters.
         */
        if (thread_group_empty(tsk))
                do_notify_pidfd(tsk);

        if (sig != SIGCHLD) {
                /*
                 * This is only possible if parent == real_parent.
                 * Check if it has changed security domain.
                 */
                if (tsk->parent_exec_id != READ_ONCE(tsk->parent->self_exec_id))
                        sig = SIGCHLD;
        }

        clear_siginfo(&info);
        info.si_signo = sig;
        info.si_errno = 0;
        /*
         * We are under tasklist_lock here so our parent is tied to
         * us and cannot change.
         *
         * task_active_pid_ns will always return the same pid namespace
         * until a task passes through release_task.
         *
         * write_lock() currently calls preempt_disable() which is the
         * same as rcu_read_lock(), but according to Oleg, this is not
         * correct to rely on this
         */
        rcu_read_lock();
        info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
        info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
                                       task_uid(tsk));
        rcu_read_unlock();

        task_cputime(tsk, &utime, &stime);
        info.si_utime = nsec_to_clock_t(utime + tsk->signal->utime);
        info.si_stime = nsec_to_clock_t(stime + tsk->signal->stime);

        info.si_status = tsk->exit_code & 0x7f;
        if (tsk->exit_code & 0x80)
                info.si_code = CLD_DUMPED;
        else if (tsk->exit_code & 0x7f)
                info.si_code = CLD_KILLED;
        else {
                info.si_code = CLD_EXITED;
                info.si_status = tsk->exit_code >> 8;
        }

        psig = tsk->parent->sighand;
        spin_lock_irqsave(&psig->siglock, flags);
        if (!tsk->ptrace && sig == SIGCHLD &&
            (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
             (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
                /*
                 * We are exiting and our parent doesn't care.  POSIX.1
                 * defines special semantics for setting SIGCHLD to SIG_IGN
                 * or setting the SA_NOCLDWAIT flag: we should be reaped
                 * automatically and not left for our parent's wait4 call.
                 * Rather than having the parent do it as a magic kind of
                 * signal handler, we just set this to tell do_exit that we
                 * can be cleaned up without becoming a zombie.  Note that
                 * we still call __wake_up_parent in this case, because a
                 * blocked sys_wait4 might now return -ECHILD.
                 *
                 * Whether we send SIGCHLD or not for SA_NOCLDWAIT
                 * is implementation-defined: we do (if you don't want
                 * it, just use SIG_IGN instead).
                 */
                autoreap = true;
                if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
                        sig = 0;
        }
        /*
         * Send with __send_signal as si_pid and si_uid are in the
         * parent's namespaces.
         */
        if (valid_signal(sig) && sig)
                __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false);
        __wake_up_parent(tsk, tsk->parent);
        spin_unlock_irqrestore(&psig->siglock, flags);

        return autoreap;
}

/**
 * do_notify_parent_cldstop - notify parent of stopped/continued state change
 * @tsk: task reporting the state change
 * @for_ptracer: the notification is for ptracer
 * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
 *
 * Notify @tsk's parent that the stopped/continued state has changed.  If
 * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
 * If %true, @tsk reports to @tsk->parent which should be the ptracer.
 *
 * CONTEXT:
 * Must be called with tasklist_lock at least read locked.
 */
static void do_notify_parent_cldstop(struct task_struct *tsk,
                                     bool for_ptracer, int why)
{
        struct kernel_siginfo info;
        unsigned long flags;
        struct task_struct *parent;
        struct sighand_struct *sighand;
        u64 utime, stime;

        if (for_ptracer) {
                parent = tsk->parent;
        } else {
                tsk = tsk->group_leader;
                parent = tsk->real_parent;
        }

        clear_siginfo(&info);
        info.si_signo = SIGCHLD;
        info.si_errno = 0;
        /*
         * see comment in do_notify_parent() about the following 4 lines
         */
        rcu_read_lock();
        info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
        info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
        rcu_read_unlock();

        task_cputime(tsk, &utime, &stime);
        info.si_utime = nsec_to_clock_t(utime);
        info.si_stime = nsec_to_clock_t(stime);

         info.si_code = why;
         switch (why) {
         case CLD_CONTINUED:
                 info.si_status = SIGCONT;
                 break;
         case CLD_STOPPED:
                 info.si_status = tsk->signal->group_exit_code & 0x7f;
                 break;
         case CLD_TRAPPED:
                 info.si_status = tsk->exit_code & 0x7f;
                 break;
         default:
                 BUG();
         }

        sighand = parent->sighand;
        spin_lock_irqsave(&sighand->siglock, flags);
        if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
            !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
                send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID);
        /*
         * Even if SIGCHLD is not generated, we must wake up wait4 calls.
         */
        __wake_up_parent(tsk, parent);
        spin_unlock_irqrestore(&sighand->siglock, flags);
}

/*
 * This must be called with current->sighand->siglock held.
 *
 * This should be the path for all ptrace stops.
 * We always set current->last_siginfo while stopped here.
 * That makes it a way to test a stopped process for
 * being ptrace-stopped vs being job-control-stopped.
 *
 * Returns the signal the ptracer requested the code resume
 * with.  If the code did not stop because the tracer is gone,
 * the stop signal remains unchanged unless clear_code.
 */
static int ptrace_stop(int exit_code, int why, unsigned long message,
                       kernel_siginfo_t *info)
        __releases(&current->sighand->siglock)
        __acquires(&current->sighand->siglock)
{
        bool gstop_done = false;

        if (arch_ptrace_stop_needed()) {
                /*
                 * The arch code has something special to do before a
                 * ptrace stop.  This is allowed to block, e.g. for faults
                 * on user stack pages.  We can't keep the siglock while
                 * calling arch_ptrace_stop, so we must release it now.
                 * To preserve proper semantics, we must do this before
                 * any signal bookkeeping like checking group_stop_count.
                 */
                spin_unlock_irq(&current->sighand->siglock);
                arch_ptrace_stop();
                spin_lock_irq(&current->sighand->siglock);
        }

        /*
         * After this point ptrace_signal_wake_up or signal_wake_up
         * will clear TASK_TRACED if ptrace_unlink happens or a fatal
         * signal comes in.  Handle previous ptrace_unlinks and fatal
         * signals here to prevent ptrace_stop sleeping in schedule.
         */
        if (!current->ptrace || __fatal_signal_pending(current))
                return exit_code;

        set_special_state(TASK_TRACED);
        current->jobctl |= JOBCTL_TRACED;

        /*
         * We're committing to trapping.  TRACED should be visible before
         * TRAPPING is cleared; otherwise, the tracer might fail do_wait().
         * Also, transition to TRACED and updates to ->jobctl should be
         * atomic with respect to siglock and should be done after the arch
         * hook as siglock is released and regrabbed across it.
         *
         *     TRACER                                    TRACEE
         *
         *     ptrace_attach()
         * [L]   wait_on_bit(JOBCTL_TRAPPING)        [S] set_special_state(TRACED)
         *     do_wait()
         *       set_current_state()                smp_wmb();
         *       ptrace_do_wait()
         *         wait_task_stopped()
         *           task_stopped_code()
         * [L]         task_is_traced()                [S] task_clear_jobctl_trapping();
         */
        smp_wmb();

        current->ptrace_message = message;
        current->last_siginfo = info;
        current->exit_code = exit_code;

        /*
         * If @why is CLD_STOPPED, we're trapping to participate in a group
         * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
         * across siglock relocks since INTERRUPT was scheduled, PENDING
         * could be clear now.  We act as if SIGCONT is received after
         * TASK_TRACED is entered - ignore it.
         */
        if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING))
                gstop_done = task_participate_group_stop(current);

        /* any trap clears pending STOP trap, STOP trap clears NOTIFY */
        task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
        if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
                task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);

        /* entering a trap, clear TRAPPING */
        task_clear_jobctl_trapping(current);

        spin_unlock_irq(&current->sighand->siglock);
        read_lock(&tasklist_lock);
        /*
         * Notify parents of the stop.
         *
         * While ptraced, there are two parents - the ptracer and
         * the real_parent of the group_leader.  The ptracer should
         * know about every stop while the real parent is only
         * interested in the completion of group stop.  The states
         * for the two don't interact with each other.  Notify
         * separately unless they're gonna be duplicates.
         */
        if (current->ptrace)
                do_notify_parent_cldstop(current, true, why);
        if (gstop_done && (!current->ptrace || ptrace_reparented(current)))
                do_notify_parent_cldstop(current, false, why);

        /*
         * The previous do_notify_parent_cldstop() invocation woke ptracer.
         * One a PREEMPTION kernel this can result in preemption requirement
         * which will be fulfilled after read_unlock() and the ptracer will be
         * put on the CPU.
         * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for
         * this task wait in schedule(). If this task gets preempted then it
         * remains enqueued on the runqueue. The ptracer will observe this and
         * then sleep for a delay of one HZ tick. In the meantime this task
         * gets scheduled, enters schedule() and will wait for the ptracer.
         *
         * This preemption point is not bad from a correctness point of
         * view but extends the runtime by one HZ tick time due to the
         * ptracer's sleep.  The preempt-disable section ensures that there
         * will be no preemption between unlock and schedule() and so
         * improving the performance since the ptracer will observe that
         * the tracee is scheduled out once it gets on the CPU.
         *
         * On PREEMPT_RT locking tasklist_lock does not disable preemption.
         * Therefore the task can be preempted after do_notify_parent_cldstop()
         * before unlocking tasklist_lock so there is no benefit in doing this.
         *
         * In fact disabling preemption is harmful on PREEMPT_RT because
         * the spinlock_t in cgroup_enter_frozen() must not be acquired
         * with preemption disabled due to the 'sleeping' spinlock
         * substitution of RT.
         */
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_disable();
        read_unlock(&tasklist_lock);
        cgroup_enter_frozen();
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_enable_no_resched();
        schedule();
        cgroup_leave_frozen(true);

        /*
         * We are back.  Now reacquire the siglock before touching
         * last_siginfo, so that we are sure to have synchronized with
         * any signal-sending on another CPU that wants to examine it.
         */
        spin_lock_irq(&current->sighand->siglock);
        exit_code = current->exit_code;
        current->last_siginfo = NULL;
        current->ptrace_message = 0;
        current->exit_code = 0;

        /* LISTENING can be set only during STOP traps, clear it */
        current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN);

        /*
         * Queued signals ignored us while we were stopped for tracing.
         * So check for any that we should take before resuming user mode.
         * This sets TIF_SIGPENDING, but never clears it.
         */
        recalc_sigpending_tsk(current);
        return exit_code;
}

static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message)
{
        kernel_siginfo_t info;

        clear_siginfo(&info);
        info.si_signo = signr;
        info.si_code = exit_code;
        info.si_pid = task_pid_vnr(current);
        info.si_uid = from_kuid_munged(current_user_ns(), current_uid());

        /* Let the debugger run.  */
        return ptrace_stop(exit_code, why, message, &info);
}

int ptrace_notify(int exit_code, unsigned long message)
{
        int signr;

        BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
        if (unlikely(task_work_pending(current)))
                task_work_run();

        spin_lock_irq(&current->sighand->siglock);
        signr = ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message);
        spin_unlock_irq(&current->sighand->siglock);
        return signr;
}

/**
 * do_signal_stop - handle group stop for SIGSTOP and other stop signals
 * @signr: signr causing group stop if initiating
 *
 * If %JOBCTL_STOP_PENDING is not set yet, initiate group stop with @signr
 * and participate in it.  If already set, participate in the existing
 * group stop.  If participated in a group stop (and thus slept), %true is
 * returned with siglock released.
 *
 * If ptraced, this function doesn't handle stop itself.  Instead,
 * %JOBCTL_TRAP_STOP is scheduled and %false is returned with siglock
 * untouched.  The caller must ensure that INTERRUPT trap handling takes
 * places afterwards.
 *
 * CONTEXT:
 * Must be called with @current->sighand->siglock held, which is released
 * on %true return.
 *
 * RETURNS:
 * %false if group stop is already cancelled or ptrace trap is scheduled.
 * %true if participated in group stop.
 */
static bool do_signal_stop(int signr)
        __releases(&current->sighand->siglock)
{
        struct signal_struct *sig = current->signal;

        if (!(current->jobctl & JOBCTL_STOP_PENDING)) {
                unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
                struct task_struct *t;

                /* signr will be recorded in task->jobctl for retries */
                WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK);

                if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) ||
                    unlikely(sig->flags & SIGNAL_GROUP_EXIT) ||
                    unlikely(sig->group_exec_task))
                        return false;
                /*
                 * There is no group stop already in progress.  We must
                 * initiate one now.
                 *
                 * While ptraced, a task may be resumed while group stop is
                 * still in effect and then receive a stop signal and
                 * initiate another group stop.  This deviates from the
                 * usual behavior as two consecutive stop signals can't
                 * cause two group stops when !ptraced.  That is why we
                 * also check !task_is_stopped(t) below.
                 *
                 * The condition can be distinguished by testing whether
                 * SIGNAL_STOP_STOPPED is already set.  Don't generate
                 * group_exit_code in such case.
                 *
                 * This is not necessary for SIGNAL_STOP_CONTINUED because
                 * an intervening stop signal is required to cause two
                 * continued events regardless of ptrace.
                 */
                if (!(sig->flags & SIGNAL_STOP_STOPPED))
                        sig->group_exit_code = signr;

                sig->group_stop_count = 0;
                if (task_set_jobctl_pending(current, signr | gstop))
                        sig->group_stop_count++;

                for_other_threads(current, t) {
                        /*
                         * Setting state to TASK_STOPPED for a group
                         * stop is always done with the siglock held,
                         * so this check has no races.
                         */
                        if (!task_is_stopped(t) &&
                            task_set_jobctl_pending(t, signr | gstop)) {
                                sig->group_stop_count++;
                                if (likely(!(t->ptrace & PT_SEIZED)))
                                        signal_wake_up(t, 0);
                                else
                                        ptrace_trap_notify(t);
                        }
                }
        }

        if (likely(!current->ptrace)) {
                int notify = 0;

                /*
                 * If there are no other threads in the group, or if there
                 * is a group stop in progress and we are the last to stop,
                 * report to the parent.
                 */
                if (task_participate_group_stop(current))
                        notify = CLD_STOPPED;

                current->jobctl |= JOBCTL_STOPPED;
                set_special_state(TASK_STOPPED);
                spin_unlock_irq(&current->sighand->siglock);

                /*
                 * Notify the parent of the group stop completion.  Because
                 * we're not holding either the siglock or tasklist_lock
                 * here, ptracer may attach inbetween; however, this is for
                 * group stop and should always be delivered to the real
                 * parent of the group leader.  The new ptracer will get
                 * its notification when this task transitions into
                 * TASK_TRACED.
                 */
                if (notify) {
                        read_lock(&tasklist_lock);
                        do_notify_parent_cldstop(current, false, notify);
                        read_unlock(&tasklist_lock);
                }

                /* Now we don't run again until woken by SIGCONT or SIGKILL */
                cgroup_enter_frozen();
                schedule();
                return true;
        } else {
                /*
                 * While ptraced, group stop is handled by STOP trap.
                 * Schedule it and let the caller deal with it.
                 */
                task_set_jobctl_pending(current, JOBCTL_TRAP_STOP);
                return false;
        }
}

/**
 * do_jobctl_trap - take care of ptrace jobctl traps
 *
 * When PT_SEIZED, it's used for both group stop and explicit
 * SEIZE/INTERRUPT traps.  Both generate PTRACE_EVENT_STOP trap with
 * accompanying siginfo.  If stopped, lower eight bits of exit_code contain
 * the stop signal; otherwise, %SIGTRAP.
 *
 * When !PT_SEIZED, it's used only for group stop trap with stop signal
 * number as exit_code and no siginfo.
 *
 * CONTEXT:
 * Must be called with @current->sighand->siglock held, which may be
 * released and re-acquired before returning with intervening sleep.
 */
static void do_jobctl_trap(void)
{
        struct signal_struct *signal = current->signal;
        int signr = current->jobctl & JOBCTL_STOP_SIGMASK;

        if (current->ptrace & PT_SEIZED) {
                if (!signal->group_stop_count &&
                    !(signal->flags & SIGNAL_STOP_STOPPED))
                        signr = SIGTRAP;
                WARN_ON_ONCE(!signr);
                ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8),
                                 CLD_STOPPED, 0);
        } else {
                WARN_ON_ONCE(!signr);
                ptrace_stop(signr, CLD_STOPPED, 0, NULL);
        }
}

/**
 * do_freezer_trap - handle the freezer jobctl trap
 *
 * Puts the task into frozen state, if only the task is not about to quit.
 * In this case it drops JOBCTL_TRAP_FREEZE.
 *
 * CONTEXT:
 * Must be called with @current->sighand->siglock held,
 * which is always released before returning.
 */
static void do_freezer_trap(void)
        __releases(&current->sighand->siglock)
{
        /*
         * If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
         * let's make another loop to give it a chance to be handled.
         * In any case, we'll return back.
         */
        if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
             JOBCTL_TRAP_FREEZE) {
                spin_unlock_irq(&current->sighand->siglock);
                return;
        }

        /*
         * Now we're sure that there is no pending fatal signal and no
         * pending traps. Clear TIF_SIGPENDING to not get out of schedule()
         * immediately (if there is a non-fatal signal pending), and
         * put the task into sleep.
         */
        __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
        clear_thread_flag(TIF_SIGPENDING);
        spin_unlock_irq(&current->sighand->siglock);
        cgroup_enter_frozen();
        schedule();
}

static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
{
        /*
         * We do not check sig_kernel_stop(signr) but set this marker
         * unconditionally because we do not know whether debugger will
         * change signr. This flag has no meaning unless we are going
         * to stop after return from ptrace_stop(). In this case it will
         * be checked in do_signal_stop(), we should only stop if it was
         * not cleared by SIGCONT while we were sleeping. See also the
         * comment in dequeue_signal().
         */
        current->jobctl |= JOBCTL_STOP_DEQUEUED;
        signr = ptrace_stop(signr, CLD_TRAPPED, 0, info);

        /* We're back.  Did the debugger cancel the sig?  */
        if (signr == 0)
                return signr;

        /*
         * Update the siginfo structure if the signal has
         * changed.  If the debugger wanted something
         * specific in the siginfo structure then it should
         * have updated *info via PTRACE_SETSIGINFO.
         */
        if (signr != info->si_signo) {
                clear_siginfo(info);
                info->si_signo = signr;
                info->si_errno = 0;
                info->si_code = SI_USER;
                rcu_read_lock();
                info->si_pid = task_pid_vnr(current->parent);
                info->si_uid = from_kuid_munged(current_user_ns(),
                                                task_uid(current->parent));
                rcu_read_unlock();
        }

        /* If the (new) signal is now blocked, requeue it.  */
        if (sigismember(&current->blocked, signr) ||
            fatal_signal_pending(current)) {
                send_signal_locked(signr, info, current, type);
                signr = 0;
        }

        return signr;
}

static void hide_si_addr_tag_bits(struct ksignal *ksig)
{
        switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
        case SIL_FAULT:
        case SIL_FAULT_TRAPNO:
        case SIL_FAULT_MCEERR:
        case SIL_FAULT_BNDERR:
        case SIL_FAULT_PKUERR:
        case SIL_FAULT_PERF_EVENT:
                ksig->info.si_addr = arch_untagged_si_addr(
                        ksig->info.si_addr, ksig->sig, ksig->info.si_code);
                break;
        case SIL_KILL:
        case SIL_TIMER:
        case SIL_POLL:
        case SIL_CHLD:
        case SIL_RT:
        case SIL_SYS:
                break;
        }
}

bool get_signal(struct ksignal *ksig)
{
        struct sighand_struct *sighand = current->sighand;
        struct signal_struct *signal = current->signal;
        int signr;

        clear_notify_signal();
        if (unlikely(task_work_pending(current)))
                task_work_run();

        if (!task_sigpending(current))
                return false;

        if (unlikely(uprobe_deny_signal()))
                return false;

        /*
         * Do this once, we can't return to user-mode if freezing() == T.
         * do_signal_stop() and ptrace_stop() do freezable_schedule() and
         * thus do not need another check after return.
         */
        try_to_freeze();

relock:
        spin_lock_irq(&sighand->siglock);

        /*
         * Every stopped thread goes here after wakeup. Check to see if
         * we should notify the parent, prepare_signal(SIGCONT) encodes
         * the CLD_ si_code into SIGNAL_CLD_MASK bits.
         */
        if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
                int why;

                if (signal->flags & SIGNAL_CLD_CONTINUED)
                        why = CLD_CONTINUED;
                else
                        why = CLD_STOPPED;

                signal->flags &= ~SIGNAL_CLD_MASK;

                spin_unlock_irq(&sighand->siglock);

                /*
                 * Notify the parent that we're continuing.  This event is
                 * always per-process and doesn't make whole lot of sense
                 * for ptracers, who shouldn't consume the state via
                 * wait(2) either, but, for backward compatibility, notify
                 * the ptracer of the group leader too unless it's gonna be
                 * a duplicate.
                 */
                read_lock(&tasklist_lock);
                do_notify_parent_cldstop(current, false, why);

                if (ptrace_reparented(current->group_leader))
                        do_notify_parent_cldstop(current->group_leader,
                                                true, why);
                read_unlock(&tasklist_lock);

                goto relock;
        }

        for (;;) {
                struct k_sigaction *ka;
                enum pid_type type;

                /* Has this task already been marked for death? */
                if ((signal->flags & SIGNAL_GROUP_EXIT) ||
                     signal->group_exec_task) {
                        signr = SIGKILL;
                        sigdelset(&current->pending.signal, SIGKILL);
                        trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
                                             &sighand->action[SIGKILL-1]);
                        recalc_sigpending();
                        /*
                         * implies do_group_exit() or return to PF_USER_WORKER,
                         * no need to initialize ksig->info/etc.
                         */
                        goto fatal;
                }

                if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
                    do_signal_stop(0))
                        goto relock;

                if (unlikely(current->jobctl &
                             (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
                        if (current->jobctl & JOBCTL_TRAP_MASK) {
                                do_jobctl_trap();
                                spin_unlock_irq(&sighand->siglock);
                        } else if (current->jobctl & JOBCTL_TRAP_FREEZE)
                                do_freezer_trap();

                        goto relock;
                }

                /*
                 * If the task is leaving the frozen state, let's update
                 * cgroup counters and reset the frozen bit.
                 */
                if (unlikely(cgroup_task_frozen(current))) {
                        spin_unlock_irq(&sighand->siglock);
                        cgroup_leave_frozen(false);
                        goto relock;
                }

                /*
                 * Signals generated by the execution of an instruction
                 * need to be delivered before any other pending signals
                 * so that the instruction pointer in the signal stack
                 * frame points to the faulting instruction.
                 */
                type = PIDTYPE_PID;
                signr = dequeue_synchronous_signal(&ksig->info);
                if (!signr)
                        signr = dequeue_signal(current, &current->blocked,
                                               &ksig->info, &type);

                if (!signr)
                        break; /* will return 0 */

                if (unlikely(current->ptrace) && (signr != SIGKILL) &&
                    !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
                        signr = ptrace_signal(signr, &ksig->info, type);
                        if (!signr)
                                continue;
                }

                ka = &sighand->action[signr-1];

                /* Trace actually delivered signals. */
                trace_signal_deliver(signr, &ksig->info, ka);

                if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
                        continue;
                if (ka->sa.sa_handler != SIG_DFL) {
                        /* Run the handler.  */
                        ksig->ka = *ka;

                        if (ka->sa.sa_flags & SA_ONESHOT)
                                ka->sa.sa_handler = SIG_DFL;

                        break; /* will return non-zero "signr" value */
                }

                /*
                 * Now we are doing the default action for this signal.
                 */
                if (sig_kernel_ignore(signr)) /* Default is nothing. */
                        continue;

                /*
                 * Global init gets no signals it doesn't want.
                 * Container-init gets no signals it doesn't want from same
                 * container.
                 *
                 * Note that if global/container-init sees a sig_kernel_only()
                 * signal here, the signal must have been generated internally
                 * or must have come from an ancestor namespace. In either
                 * case, the signal cannot be dropped.
                 */
                if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
                                !sig_kernel_only(signr))
                        continue;

                if (sig_kernel_stop(signr)) {
                        /*
                         * The default action is to stop all threads in
                         * the thread group.  The job control signals
                         * do nothing in an orphaned pgrp, but SIGSTOP
                         * always works.  Note that siglock needs to be
                         * dropped during the call to is_orphaned_pgrp()
                         * because of lock ordering with tasklist_lock.
                         * This allows an intervening SIGCONT to be posted.
                         * We need to check for that and bail out if necessary.
                         */
                        if (signr != SIGSTOP) {
                                spin_unlock_irq(&sighand->siglock);

                                /* signals can be posted during this window */

                                if (is_current_pgrp_orphaned())
                                        goto relock;

                                spin_lock_irq(&sighand->siglock);
                        }

                        if (likely(do_signal_stop(signr))) {
                                /* It released the siglock.  */
                                goto relock;
                        }

                        /*
                         * We didn't actually stop, due to a race
                         * with SIGCONT or something like that.
                         */
                        continue;
                }

        fatal:
                spin_unlock_irq(&sighand->siglock);
                if (unlikely(cgroup_task_frozen(current)))
                        cgroup_leave_frozen(true);

                /*
                 * Anything else is fatal, maybe with a core dump.
                 */
                current->flags |= PF_SIGNALED;

                if (sig_kernel_coredump(signr)) {
                        if (print_fatal_signals)
                                print_fatal_signal(signr);
                        proc_coredump_connector(current);
                        /*
                         * If it was able to dump core, this kills all
                         * other threads in the group and synchronizes with
                         * their demise.  If we lost the race with another
                         * thread getting here, it set group_exit_code
                         * first and our do_group_exit call below will use
                         * that value and ignore the one we pass it.
                         */
                        do_coredump(&ksig->info);
                }

                /*
                 * PF_USER_WORKER threads will catch and exit on fatal signals
                 * themselves. They have cleanup that must be performed, so we
                 * cannot call do_exit() on their behalf. Note that ksig won't
                 * be properly initialized, PF_USER_WORKER's shouldn't use it.
                 */
                if (current->flags & PF_USER_WORKER)
                        goto out;

                /*
                 * Death signals, no core dump.
                 */
                do_group_exit(signr);
                /* NOTREACHED */
        }
        spin_unlock_irq(&sighand->siglock);

        ksig->sig = signr;

        if (signr && !(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
                hide_si_addr_tag_bits(ksig);
out:
        return signr > 0;
}

/**
 * signal_delivered - called after signal delivery to update blocked signals
 * @ksig:                kernel signal struct
 * @stepping:                nonzero if debugger single-step or block-step in use
 *
 * This function should be called when a signal has successfully been
 * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask
 * is always blocked), and the signal itself is blocked unless %SA_NODEFER
 * is set in @ksig->ka.sa.sa_flags.  Tracing is notified.
 */
static void signal_delivered(struct ksignal *ksig, int stepping)
{
        sigset_t blocked;

        /* A signal was successfully delivered, and the
           saved sigmask was stored on the signal frame,
           and will be restored by sigreturn.  So we can
           simply clear the restore sigmask flag.  */
        clear_restore_sigmask();

        sigorsets(&blocked, &current->blocked, &ksig->ka.sa.sa_mask);
        if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
                sigaddset(&blocked, ksig->sig);
        set_current_blocked(&blocked);
        if (current->sas_ss_flags & SS_AUTODISARM)
                sas_ss_reset(current);
        if (stepping)
                ptrace_notify(SIGTRAP, 0);
}

void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
{
        if (failed)
                force_sigsegv(ksig->sig);
        else
                signal_delivered(ksig, stepping);
}

/*
 * It could be that complete_signal() picked us to notify about the
 * group-wide signal. Other threads should be notified now to take
 * the shared signals in @which since we will not.
 */
static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
{
        sigset_t retarget;
        struct task_struct *t;

        sigandsets(&retarget, &tsk->signal->shared_pending.signal, which);
        if (sigisemptyset(&retarget))
                return;

        for_other_threads(tsk, t) {
                if (t->flags & PF_EXITING)
                        continue;

                if (!has_pending_signals(&retarget, &t->blocked))
                        continue;
                /* Remove the signals this thread can handle. */
                sigandsets(&retarget, &retarget, &t->blocked);

                if (!task_sigpending(t))
                        signal_wake_up(t, 0);

                if (sigisemptyset(&retarget))
                        break;
        }
}

void exit_signals(struct task_struct *tsk)
{
        int group_stop = 0;
        sigset_t unblocked;

        /*
         * @tsk is about to have PF_EXITING set - lock out users which
         * expect stable threadgroup.
         */
        cgroup_threadgroup_change_begin(tsk);

        if (thread_group_empty(tsk) || (tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
                sched_mm_cid_exit_signals(tsk);
                tsk->flags |= PF_EXITING;
                cgroup_threadgroup_change_end(tsk);
                return;
        }

        spin_lock_irq(&tsk->sighand->siglock);
        /*
         * From now this task is not visible for group-wide signals,
         * see wants_signal(), do_signal_stop().
         */
        sched_mm_cid_exit_signals(tsk);
        tsk->flags |= PF_EXITING;

        cgroup_threadgroup_change_end(tsk);

        if (!task_sigpending(tsk))
                goto out;

        unblocked = tsk->blocked;
        signotset(&unblocked);
        retarget_shared_pending(tsk, &unblocked);

        if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) &&
            task_participate_group_stop(tsk))
                group_stop = CLD_STOPPED;
out:
        spin_unlock_irq(&tsk->sighand->siglock);

        /*
         * If group stop has completed, deliver the notification.  This
         * should always go to the real parent of the group leader.
         */
        if (unlikely(group_stop)) {
                read_lock(&tasklist_lock);
                do_notify_parent_cldstop(tsk, false, group_stop);
                read_unlock(&tasklist_lock);
        }
}

/*
 * System call entry points.
 */

/**
 *  sys_restart_syscall - restart a system call
 */
SYSCALL_DEFINE0(restart_syscall)
{
        struct restart_block *restart = &current->restart_block;
        return restart->fn(restart);
}

long do_no_restart_syscall(struct restart_block *param)
{
        return -EINTR;
}

static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
{
        if (task_sigpending(tsk) && !thread_group_empty(tsk)) {
                sigset_t newblocked;
                /* A set of now blocked but previously unblocked signals. */
                sigandnsets(&newblocked, newset, &current->blocked);
                retarget_shared_pending(tsk, &newblocked);
        }
        tsk->blocked = *newset;
        recalc_sigpending();
}

/**
 * set_current_blocked - change current->blocked mask
 * @newset: new mask
 *
 * It is wrong to change ->blocked directly, this helper should be used
 * to ensure the process can't miss a shared signal we are going to block.
 */
void set_current_blocked(sigset_t *newset)
{
        sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
        __set_current_blocked(newset);
}

void __set_current_blocked(const sigset_t *newset)
{
        struct task_struct *tsk = current;

        /*
         * In case the signal mask hasn't changed, there is nothing we need
         * to do. The current->blocked shouldn't be modified by other task.
         */
        if (sigequalsets(&tsk->blocked, newset))
                return;

        spin_lock_irq(&tsk->sighand->siglock);
        __set_task_blocked(tsk, newset);
        spin_unlock_irq(&tsk->sighand->siglock);
}

/*
 * This is also useful for kernel threads that want to temporarily
 * (or permanently) block certain signals.
 *
 * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
 * interface happily blocks "unblockable" signals like SIGKILL
 * and friends.
 */
int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
{
        struct task_struct *tsk = current;
        sigset_t newset;

        /* Lockless, only current can change ->blocked, never from irq */
        if (oldset)
                *oldset = tsk->blocked;

        switch (how) {
        case SIG_BLOCK:
                sigorsets(&newset, &tsk->blocked, set);
                break;
        case SIG_UNBLOCK:
                sigandnsets(&newset, &tsk->blocked, set);
                break;
        case SIG_SETMASK:
                newset = *set;
                break;
        default:
                return -EINVAL;
        }

        __set_current_blocked(&newset);
        return 0;
}
EXPORT_SYMBOL(sigprocmask);

/*
 * The api helps set app-provided sigmasks.
 *
 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
 * epoll_pwait where a new sigmask is passed from userland for the syscalls.
 *
 * Note that it does set_restore_sigmask() in advance, so it must be always
 * paired with restore_saved_sigmask_unless() before return from syscall.
 */
int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize)
{
        sigset_t kmask;

        if (!umask)
                return 0;
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;
        if (copy_from_user(&kmask, umask, sizeof(sigset_t)))
                return -EFAULT;

        set_restore_sigmask();
        current->saved_sigmask = current->blocked;
        set_current_blocked(&kmask);

        return 0;
}

#ifdef CONFIG_COMPAT
int set_compat_user_sigmask(const compat_sigset_t __user *umask,
                            size_t sigsetsize)
{
        sigset_t kmask;

        if (!umask)
                return 0;
        if (sigsetsize != sizeof(compat_sigset_t))
                return -EINVAL;
        if (get_compat_sigset(&kmask, umask))
                return -EFAULT;

        set_restore_sigmask();
        current->saved_sigmask = current->blocked;
        set_current_blocked(&kmask);

        return 0;
}
#endif

/**
 *  sys_rt_sigprocmask - change the list of currently blocked signals
 *  @how: whether to add, remove, or set signals
 *  @nset: stores pending signals
 *  @oset: previous value of signal mask if non-null
 *  @sigsetsize: size of sigset_t type
 */
SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
                sigset_t __user *, oset, size_t, sigsetsize)
{
        sigset_t old_set, new_set;
        int error;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        old_set = current->blocked;

        if (nset) {
                if (copy_from_user(&new_set, nset, sizeof(sigset_t)))
                        return -EFAULT;
                sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));

                error = sigprocmask(how, &new_set, NULL);
                if (error)
                        return error;
        }

        if (oset) {
                if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
                        return -EFAULT;
        }

        return 0;
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
                compat_sigset_t __user *, oset, compat_size_t, sigsetsize)
{
        sigset_t old_set = current->blocked;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (nset) {
                sigset_t new_set;
                int error;
                if (get_compat_sigset(&new_set, nset))
                        return -EFAULT;
                sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));

                error = sigprocmask(how, &new_set, NULL);
                if (error)
                        return error;
        }
        return oset ? put_compat_sigset(oset, &old_set, sizeof(*oset)) : 0;
}
#endif

static void do_sigpending(sigset_t *set)
{
        spin_lock_irq(&current->sighand->siglock);
        sigorsets(set, &current->pending.signal,
                  &current->signal->shared_pending.signal);
        spin_unlock_irq(&current->sighand->siglock);

        /* Outside the lock because only this thread touches it.  */
        sigandsets(set, &current->blocked, set);
}

/**
 *  sys_rt_sigpending - examine a pending signal that has been raised
 *                        while blocked
 *  @uset: stores pending signals
 *  @sigsetsize: size of sigset_t type or larger
 */
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
{
        sigset_t set;

        if (sigsetsize > sizeof(*uset))
                return -EINVAL;

        do_sigpending(&set);

        if (copy_to_user(uset, &set, sigsetsize))
                return -EFAULT;

        return 0;
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
                compat_size_t, sigsetsize)
{
        sigset_t set;

        if (sigsetsize > sizeof(*uset))
                return -EINVAL;

        do_sigpending(&set);

        return put_compat_sigset(uset, &set, sigsetsize);
}
#endif

static const struct {
        unsigned char limit, layout;
} sig_sicodes[] = {
        [SIGILL]  = { NSIGILL,  SIL_FAULT },
        [SIGFPE]  = { NSIGFPE,  SIL_FAULT },
        [SIGSEGV] = { NSIGSEGV, SIL_FAULT },
        [SIGBUS]  = { NSIGBUS,  SIL_FAULT },
        [SIGTRAP] = { NSIGTRAP, SIL_FAULT },
#if defined(SIGEMT)
        [SIGEMT]  = { NSIGEMT,  SIL_FAULT },
#endif
        [SIGCHLD] = { NSIGCHLD, SIL_CHLD },
        [SIGPOLL] = { NSIGPOLL, SIL_POLL },
        [SIGSYS]  = { NSIGSYS,  SIL_SYS },
};

static bool known_siginfo_layout(unsigned sig, int si_code)
{
        if (si_code == SI_KERNEL)
                return true;
        else if ((si_code > SI_USER)) {
                if (sig_specific_sicodes(sig)) {
                        if (si_code <= sig_sicodes[sig].limit)
                                return true;
                }
                else if (si_code <= NSIGPOLL)
                        return true;
        }
        else if (si_code >= SI_DETHREAD)
                return true;
        else if (si_code == SI_ASYNCNL)
                return true;
        return false;
}

enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
{
        enum siginfo_layout layout = SIL_KILL;
        if ((si_code > SI_USER) && (si_code < SI_KERNEL)) {
                if ((sig < ARRAY_SIZE(sig_sicodes)) &&
                    (si_code <= sig_sicodes[sig].limit)) {
                        layout = sig_sicodes[sig].layout;
                        /* Handle the exceptions */
                        if ((sig == SIGBUS) &&
                            (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO))
                                layout = SIL_FAULT_MCEERR;
                        else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR))
                                layout = SIL_FAULT_BNDERR;
#ifdef SEGV_PKUERR
                        else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR))
                                layout = SIL_FAULT_PKUERR;
#endif
                        else if ((sig == SIGTRAP) && (si_code == TRAP_PERF))
                                layout = SIL_FAULT_PERF_EVENT;
                        else if (IS_ENABLED(CONFIG_SPARC) &&
                                 (sig == SIGILL) && (si_code == ILL_ILLTRP))
                                layout = SIL_FAULT_TRAPNO;
                        else if (IS_ENABLED(CONFIG_ALPHA) &&
                                 ((sig == SIGFPE) ||
                                  ((sig == SIGTRAP) && (si_code == TRAP_UNK))))
                                layout = SIL_FAULT_TRAPNO;
                }
                else if (si_code <= NSIGPOLL)
                        layout = SIL_POLL;
        } else {
                if (si_code == SI_TIMER)
                        layout = SIL_TIMER;
                else if (si_code == SI_SIGIO)
                        layout = SIL_POLL;
                else if (si_code < 0)
                        layout = SIL_RT;
        }
        return layout;
}

static inline char __user *si_expansion(const siginfo_t __user *info)
{
        return ((char __user *)info) + sizeof(struct kernel_siginfo);
}

int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from)
{
        char __user *expansion = si_expansion(to);
        if (copy_to_user(to, from , sizeof(struct kernel_siginfo)))
                return -EFAULT;
        if (clear_user(expansion, SI_EXPANSION_SIZE))
                return -EFAULT;
        return 0;
}

static int post_copy_siginfo_from_user(kernel_siginfo_t *info,
                                       const siginfo_t __user *from)
{
        if (unlikely(!known_siginfo_layout(info->si_signo, info->si_code))) {
                char __user *expansion = si_expansion(from);
                char buf[SI_EXPANSION_SIZE];
                int i;
                /*
                 * An unknown si_code might need more than
                 * sizeof(struct kernel_siginfo) bytes.  Verify all of the
                 * extra bytes are 0.  This guarantees copy_siginfo_to_user
                 * will return this data to userspace exactly.
                 */
                if (copy_from_user(&buf, expansion, SI_EXPANSION_SIZE))
                        return -EFAULT;
                for (i = 0; i < SI_EXPANSION_SIZE; i++) {
                        if (buf[i] != 0)
                                return -E2BIG;
                }
        }
        return 0;
}

static int __copy_siginfo_from_user(int signo, kernel_siginfo_t *to,
                                    const siginfo_t __user *from)
{
        if (copy_from_user(to, from, sizeof(struct kernel_siginfo)))
                return -EFAULT;
        to->si_signo = signo;
        return post_copy_siginfo_from_user(to, from);
}

int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
{
        if (copy_from_user(to, from, sizeof(struct kernel_siginfo)))
                return -EFAULT;
        return post_copy_siginfo_from_user(to, from);
}

#ifdef CONFIG_COMPAT
/**
 * copy_siginfo_to_external32 - copy a kernel siginfo into a compat user siginfo
 * @to: compat siginfo destination
 * @from: kernel siginfo source
 *
 * Note: This function does not work properly for the SIGCHLD on x32, but
 * fortunately it doesn't have to.  The only valid callers for this function are
 * copy_siginfo_to_user32, which is overriden for x32 and the coredump code.
 * The latter does not care because SIGCHLD will never cause a coredump.
 */
void copy_siginfo_to_external32(struct compat_siginfo *to,
                const struct kernel_siginfo *from)
{
        memset(to, 0, sizeof(*to));

        to->si_signo = from->si_signo;
        to->si_errno = from->si_errno;
        to->si_code  = from->si_code;
        switch(siginfo_layout(from->si_signo, from->si_code)) {
        case SIL_KILL:
                to->si_pid = from->si_pid;
                to->si_uid = from->si_uid;
                break;
        case SIL_TIMER:
                to->si_tid     = from->si_tid;
                to->si_overrun = from->si_overrun;
                to->si_int     = from->si_int;
                break;
        case SIL_POLL:
                to->si_band = from->si_band;
                to->si_fd   = from->si_fd;
                break;
        case SIL_FAULT:
                to->si_addr = ptr_to_compat(from->si_addr);
                break;
        case SIL_FAULT_TRAPNO:
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_trapno = from->si_trapno;
                break;
        case SIL_FAULT_MCEERR:
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_addr_lsb = from->si_addr_lsb;
                break;
        case SIL_FAULT_BNDERR:
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_lower = ptr_to_compat(from->si_lower);
                to->si_upper = ptr_to_compat(from->si_upper);
                break;
        case SIL_FAULT_PKUERR:
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_pkey = from->si_pkey;
                break;
        case SIL_FAULT_PERF_EVENT:
                to->si_addr = ptr_to_compat(from->si_addr);
                to->si_perf_data = from->si_perf_data;
                to->si_perf_type = from->si_perf_type;
                to->si_perf_flags = from->si_perf_flags;
                break;
        case SIL_CHLD:
                to->si_pid = from->si_pid;
                to->si_uid = from->si_uid;
                to->si_status = from->si_status;
                to->si_utime = from->si_utime;
                to->si_stime = from->si_stime;
                break;
        case SIL_RT:
                to->si_pid = from->si_pid;
                to->si_uid = from->si_uid;
                to->si_int = from->si_int;
                break;
        case SIL_SYS:
                to->si_call_addr = ptr_to_compat(from->si_call_addr);
                to->si_syscall   = from->si_syscall;
                to->si_arch      = from->si_arch;
                break;
        }
}

int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
                           const struct kernel_siginfo *from)
{
        struct compat_siginfo new;

        copy_siginfo_to_external32(&new, from);
        if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
                return -EFAULT;
        return 0;
}

static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
                                         const struct compat_siginfo *from)
{
        clear_siginfo(to);
        to->si_signo = from->si_signo;
        to->si_errno = from->si_errno;
        to->si_code  = from->si_code;
        switch(siginfo_layout(from->si_signo, from->si_code)) {
        case SIL_KILL:
                to->si_pid = from->si_pid;
                to->si_uid = from->si_uid;
                break;
        case SIL_TIMER:
                to->si_tid     = from->si_tid;
                to->si_overrun = from->si_overrun;
                to->si_int     = from->si_int;
                break;
        case SIL_POLL:
                to->si_band = from->si_band;
                to->si_fd   = from->si_fd;
                break;
        case SIL_FAULT:
                to->si_addr = compat_ptr(from->si_addr);
                break;
        case SIL_FAULT_TRAPNO:
                to->si_addr = compat_ptr(from->si_addr);
                to->si_trapno = from->si_trapno;
                break;
        case SIL_FAULT_MCEERR:
                to->si_addr = compat_ptr(from->si_addr);
                to->si_addr_lsb = from->si_addr_lsb;
                break;
        case SIL_FAULT_BNDERR:
                to->si_addr = compat_ptr(from->si_addr);
                to->si_lower = compat_ptr(from->si_lower);
                to->si_upper = compat_ptr(from->si_upper);
                break;
        case SIL_FAULT_PKUERR:
                to->si_addr = compat_ptr(from->si_addr);
                to->si_pkey = from->si_pkey;
                break;
        case SIL_FAULT_PERF_EVENT:
                to->si_addr = compat_ptr(from->si_addr);
                to->si_perf_data = from->si_perf_data;
                to->si_perf_type = from->si_perf_type;
                to->si_perf_flags = from->si_perf_flags;
                break;
        case SIL_CHLD:
                to->si_pid    = from->si_pid;
                to->si_uid    = from->si_uid;
                to->si_status = from->si_status;
#ifdef CONFIG_X86_X32_ABI
                if (in_x32_syscall()) {
                        to->si_utime = from->_sifields._sigchld_x32._utime;
                        to->si_stime = from->_sifields._sigchld_x32._stime;
                } else
#endif
                {
                        to->si_utime = from->si_utime;
                        to->si_stime = from->si_stime;
                }
                break;
        case SIL_RT:
                to->si_pid = from->si_pid;
                to->si_uid = from->si_uid;
                to->si_int = from->si_int;
                break;
        case SIL_SYS:
                to->si_call_addr = compat_ptr(from->si_call_addr);
                to->si_syscall   = from->si_syscall;
                to->si_arch      = from->si_arch;
                break;
        }
        return 0;
}

static int __copy_siginfo_from_user32(int signo, struct kernel_siginfo *to,
                                      const struct compat_siginfo __user *ufrom)
{
        struct compat_siginfo from;

        if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo)))
                return -EFAULT;

        from.si_signo = signo;
        return post_copy_siginfo_from_user32(to, &from);
}

int copy_siginfo_from_user32(struct kernel_siginfo *to,
                             const struct compat_siginfo __user *ufrom)
{
        struct compat_siginfo from;

        if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo)))
                return -EFAULT;

        return post_copy_siginfo_from_user32(to, &from);
}
#endif /* CONFIG_COMPAT */

/**
 *  do_sigtimedwait - wait for queued signals specified in @which
 *  @which: queued signals to wait for
 *  @info: if non-null, the signal's siginfo is returned here
 *  @ts: upper bound on process time suspension
 */
static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
                    const struct timespec64 *ts)
{
        ktime_t *to = NULL, timeout = KTIME_MAX;
        struct task_struct *tsk = current;
        sigset_t mask = *which;
        enum pid_type type;
        int sig, ret = 0;

        if (ts) {
                if (!timespec64_valid(ts))
                        return -EINVAL;
                timeout = timespec64_to_ktime(*ts);
                to = &timeout;
        }

        /*
         * Invert the set of allowed signals to get those we want to block.
         */
        sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
        signotset(&mask);

        spin_lock_irq(&tsk->sighand->siglock);
        sig = dequeue_signal(tsk, &mask, info, &type);
        if (!sig && timeout) {
                /*
                 * None ready, temporarily unblock those we're interested
                 * while we are sleeping in so that we'll be awakened when
                 * they arrive. Unblocking is always fine, we can avoid
                 * set_current_blocked().
                 */
                tsk->real_blocked = tsk->blocked;
                sigandsets(&tsk->blocked, &tsk->blocked, &mask);
                recalc_sigpending();
                spin_unlock_irq(&tsk->sighand->siglock);

                __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
                ret = schedule_hrtimeout_range(to, tsk->timer_slack_ns,
                                               HRTIMER_MODE_REL);
                spin_lock_irq(&tsk->sighand->siglock);
                __set_task_blocked(tsk, &tsk->real_blocked);
                sigemptyset(&tsk->real_blocked);
                sig = dequeue_signal(tsk, &mask, info, &type);
        }
        spin_unlock_irq(&tsk->sighand->siglock);

        if (sig)
                return sig;
        return ret ? -EINTR : -EAGAIN;
}

/**
 *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
 *                        in @uthese
 *  @uthese: queued signals to wait for
 *  @uinfo: if non-null, the signal's siginfo is returned here
 *  @uts: upper bound on process time suspension
 *  @sigsetsize: size of sigset_t type
 */
SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
                siginfo_t __user *, uinfo,
                const struct __kernel_timespec __user *, uts,
                size_t, sigsetsize)
{
        sigset_t these;
        struct timespec64 ts;
        kernel_siginfo_t info;
        int ret;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (copy_from_user(&these, uthese, sizeof(these)))
                return -EFAULT;

        if (uts) {
                if (get_timespec64(&ts, uts))
                        return -EFAULT;
        }

        ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);

        if (ret > 0 && uinfo) {
                if (copy_siginfo_to_user(uinfo, &info))
                        ret = -EFAULT;
        }

        return ret;
}

#ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(rt_sigtimedwait_time32, const sigset_t __user *, uthese,
                siginfo_t __user *, uinfo,
                const struct old_timespec32 __user *, uts,
                size_t, sigsetsize)
{
        sigset_t these;
        struct timespec64 ts;
        kernel_siginfo_t info;
        int ret;

        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (copy_from_user(&these, uthese, sizeof(these)))
                return -EFAULT;

        if (uts) {
                if (get_old_timespec32(&ts, uts))
                        return -EFAULT;
        }

        ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);

        if (ret > 0 && uinfo) {
                if (copy_siginfo_to_user(uinfo, &info))
                        ret = -EFAULT;
        }

        return ret;
}
#endif

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese,
                struct compat_siginfo __user *, uinfo,
                struct __kernel_timespec __user *, uts, compat_size_t, sigsetsize)
{
        sigset_t s;
        struct timespec64 t;
        kernel_siginfo_t info;
        long ret;

        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (get_compat_sigset(&s, uthese))
                return -EFAULT;

        if (uts) {
                if (get_timespec64(&t, uts))
                        return -EFAULT;
        }

        ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);

        if (ret > 0 && uinfo) {
                if (copy_siginfo_to_user32(uinfo, &info))
                        ret = -EFAULT;
        }

        return ret;
}

#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
                struct compat_siginfo __user *, uinfo,
                struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
{
        sigset_t s;
        struct timespec64 t;
        kernel_siginfo_t info;
        long ret;

        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (get_compat_sigset(&s, uthese))
                return -EFAULT;

        if (uts) {
                if (get_old_timespec32(&t, uts))
                        return -EFAULT;
        }

        ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);

        if (ret > 0 && uinfo) {
                if (copy_siginfo_to_user32(uinfo, &info))
                        ret = -EFAULT;
        }

        return ret;
}
#endif
#endif

static void prepare_kill_siginfo(int sig, struct kernel_siginfo *info,
                                 enum pid_type type)
{
        clear_siginfo(info);
        info->si_signo = sig;
        info->si_errno = 0;
        info->si_code = (type == PIDTYPE_PID) ? SI_TKILL : SI_USER;
        info->si_pid = task_tgid_vnr(current);
        info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
}

/**
 *  sys_kill - send a signal to a process
 *  @pid: the PID of the process
 *  @sig: signal to be sent
 */
SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
        struct kernel_siginfo info;

        prepare_kill_siginfo(sig, &info, PIDTYPE_TGID);

        return kill_something_info(sig, &info, pid);
}

/*
 * Verify that the signaler and signalee either are in the same pid namespace
 * or that the signaler's pid namespace is an ancestor of the signalee's pid
 * namespace.
 */
static bool access_pidfd_pidns(struct pid *pid)
{
        struct pid_namespace *active = task_active_pid_ns(current);
        struct pid_namespace *p = ns_of_pid(pid);

        for (;;) {
                if (!p)
                        return false;
                if (p == active)
                        break;
                p = p->parent;
        }

        return true;
}

static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo,
                siginfo_t __user *info)
{
#ifdef CONFIG_COMPAT
        /*
         * Avoid hooking up compat syscalls and instead handle necessary
         * conversions here. Note, this is a stop-gap measure and should not be
         * considered a generic solution.
         */
        if (in_compat_syscall())
                return copy_siginfo_from_user32(
                        kinfo, (struct compat_siginfo __user *)info);
#endif
        return copy_siginfo_from_user(kinfo, info);
}

static struct pid *pidfd_to_pid(const struct file *file)
{
        struct pid *pid;

        pid = pidfd_pid(file);
        if (!IS_ERR(pid))
                return pid;

        return tgid_pidfd_to_pid(file);
}

#define PIDFD_SEND_SIGNAL_FLAGS                            \
        (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \
         PIDFD_SIGNAL_PROCESS_GROUP)

/**
 * sys_pidfd_send_signal - Signal a process through a pidfd
 * @pidfd:  file descriptor of the process
 * @sig:    signal to send
 * @info:   signal info
 * @flags:  future flags
 *
 * Send the signal to the thread group or to the individual thread depending
 * on PIDFD_THREAD.
 * In the future extension to @flags may be used to override the default scope
 * of @pidfd.
 *
 * Return: 0 on success, negative errno on failure
 */
SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
                siginfo_t __user *, info, unsigned int, flags)
{
        int ret;
        struct fd f;
        struct pid *pid;
        kernel_siginfo_t kinfo;
        enum pid_type type;

        /* Enforce flags be set to 0 until we add an extension. */
        if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
                return -EINVAL;

        /* Ensure that only a single signal scope determining flag is set. */
        if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
                return -EINVAL;

        f = fdget(pidfd);
        if (!f.file)
                return -EBADF;

        /* Is this a pidfd? */
        pid = pidfd_to_pid(f.file);
        if (IS_ERR(pid)) {
                ret = PTR_ERR(pid);
                goto err;
        }

        ret = -EINVAL;
        if (!access_pidfd_pidns(pid))
                goto err;

        switch (flags) {
        case 0:
                /* Infer scope from the type of pidfd. */
                if (f.file->f_flags & PIDFD_THREAD)
                        type = PIDTYPE_PID;
                else
                        type = PIDTYPE_TGID;
                break;
        case PIDFD_SIGNAL_THREAD:
                type = PIDTYPE_PID;
                break;
        case PIDFD_SIGNAL_THREAD_GROUP:
                type = PIDTYPE_TGID;
                break;
        case PIDFD_SIGNAL_PROCESS_GROUP:
                type = PIDTYPE_PGID;
                break;
        }

        if (info) {
                ret = copy_siginfo_from_user_any(&kinfo, info);
                if (unlikely(ret))
                        goto err;

                ret = -EINVAL;
                if (unlikely(sig != kinfo.si_signo))
                        goto err;

                /* Only allow sending arbitrary signals to yourself. */
                ret = -EPERM;
                if ((task_pid(current) != pid || type > PIDTYPE_TGID) &&
                    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
                        goto err;
        } else {
                prepare_kill_siginfo(sig, &kinfo, type);
        }

        if (type == PIDTYPE_PGID)
                ret = kill_pgrp_info(sig, &kinfo, pid);
        else
                ret = kill_pid_info_type(sig, &kinfo, pid, type);
err:
        fdput(f);
        return ret;
}

static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
{
        struct task_struct *p;
        int error = -ESRCH;

        rcu_read_lock();
        p = find_task_by_vpid(pid);
        if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
                error = check_kill_permission(sig, info, p);
                /*
                 * The null signal is a permissions and process existence
                 * probe.  No signal is actually delivered.
                 */
                if (!error && sig) {
                        error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
                        /*
                         * If lock_task_sighand() failed we pretend the task
                         * dies after receiving the signal. The window is tiny,
                         * and the signal is private anyway.
                         */
                        if (unlikely(error == -ESRCH))
                                error = 0;
                }
        }
        rcu_read_unlock();

        return error;
}

static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
        struct kernel_siginfo info;

        prepare_kill_siginfo(sig, &info, PIDTYPE_PID);

        return do_send_specific(tgid, pid, sig, &info);
}

/**
 *  sys_tgkill - send signal to one specific thread
 *  @tgid: the thread group ID of the thread
 *  @pid: the PID of the thread
 *  @sig: signal to be sent
 *
 *  This syscall also checks the @tgid and returns -ESRCH even if the PID
 *  exists but it's not belonging to the target process anymore. This
 *  method solves the problem of threads exiting and PIDs getting reused.
 */
SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
{
        /* This is only valid for single tasks */
        if (pid <= 0 || tgid <= 0)
                return -EINVAL;

        return do_tkill(tgid, pid, sig);
}

/**
 *  sys_tkill - send signal to one specific task
 *  @pid: the PID of the task
 *  @sig: signal to be sent
 *
 *  Send a signal to only one task, even if it's a CLONE_THREAD task.
 */
SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
{
        /* This is only valid for single tasks */
        if (pid <= 0)
                return -EINVAL;

        return do_tkill(0, pid, sig);
}

static int do_rt_sigqueueinfo(pid_t pid, int sig, kernel_siginfo_t *info)
{
        /* Not even root can pretend to send signals from the kernel.
         * Nor can they impersonate a kill()/tgkill(), which adds source info.
         */
        if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
            (task_pid_vnr(current) != pid))
                return -EPERM;

        /* POSIX.1b doesn't mention process groups.  */
        return kill_proc_info(sig, info, pid);
}

/**
 *  sys_rt_sigqueueinfo - send signal information to a signal
 *  @pid: the PID of the thread
 *  @sig: signal to be sent
 *  @uinfo: signal info to be sent
 */
SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
                siginfo_t __user *, uinfo)
{
        kernel_siginfo_t info;
        int ret = __copy_siginfo_from_user(sig, &info, uinfo);
        if (unlikely(ret))
                return ret;
        return do_rt_sigqueueinfo(pid, sig, &info);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
                        compat_pid_t, pid,
                        int, sig,
                        struct compat_siginfo __user *, uinfo)
{
        kernel_siginfo_t info;
        int ret = __copy_siginfo_from_user32(sig, &info, uinfo);
        if (unlikely(ret))
                return ret;
        return do_rt_sigqueueinfo(pid, sig, &info);
}
#endif

static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, kernel_siginfo_t *info)
{
        /* This is only valid for single tasks */
        if (pid <= 0 || tgid <= 0)
                return -EINVAL;

        /* Not even root can pretend to send signals from the kernel.
         * Nor can they impersonate a kill()/tgkill(), which adds source info.
         */
        if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
            (task_pid_vnr(current) != pid))
                return -EPERM;

        return do_send_specific(tgid, pid, sig, info);
}

SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
                siginfo_t __user *, uinfo)
{
        kernel_siginfo_t info;
        int ret = __copy_siginfo_from_user(sig, &info, uinfo);
        if (unlikely(ret))
                return ret;
        return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
                        compat_pid_t, tgid,
                        compat_pid_t, pid,
                        int, sig,
                        struct compat_siginfo __user *, uinfo)
{
        kernel_siginfo_t info;
        int ret = __copy_siginfo_from_user32(sig, &info, uinfo);
        if (unlikely(ret))
                return ret;
        return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
}
#endif

/*
 * For kthreads only, must not be used if cloned with CLONE_SIGHAND
 */
void kernel_sigaction(int sig, __sighandler_t action)
{
        spin_lock_irq(&current->sighand->siglock);
        current->sighand->action[sig - 1].sa.sa_handler = action;
        if (action == SIG_IGN) {
                sigset_t mask;

                sigemptyset(&mask);
                sigaddset(&mask, sig);

                flush_sigqueue_mask(&mask, &current->signal->shared_pending);
                flush_sigqueue_mask(&mask, &current->pending);
                recalc_sigpending();
        }
        spin_unlock_irq(&current->sighand->siglock);
}
EXPORT_SYMBOL(kernel_sigaction);

void __weak sigaction_compat_abi(struct k_sigaction *act,
                struct k_sigaction *oact)
{
}

int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
        struct task_struct *p = current, *t;
        struct k_sigaction *k;
        sigset_t mask;

        if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
                return -EINVAL;

        k = &p->sighand->action[sig-1];

        spin_lock_irq(&p->sighand->siglock);
        if (k->sa.sa_flags & SA_IMMUTABLE) {
                spin_unlock_irq(&p->sighand->siglock);
                return -EINVAL;
        }
        if (oact)
                *oact = *k;

        /*
         * Make sure that we never accidentally claim to support SA_UNSUPPORTED,
         * e.g. by having an architecture use the bit in their uapi.
         */
        BUILD_BUG_ON(UAPI_SA_FLAGS & SA_UNSUPPORTED);

        /*
         * Clear unknown flag bits in order to allow userspace to detect missing
         * support for flag bits and to allow the kernel to use non-uapi bits
         * internally.
         */
        if (act)
                act->sa.sa_flags &= UAPI_SA_FLAGS;
        if (oact)
                oact->sa.sa_flags &= UAPI_SA_FLAGS;

        sigaction_compat_abi(act, oact);

        if (act) {
                sigdelsetmask(&act->sa.sa_mask,
                              sigmask(SIGKILL) | sigmask(SIGSTOP));
                *k = *act;
                /*
                 * POSIX 3.3.1.3:
                 *  "Setting a signal action to SIG_IGN for a signal that is
                 *   pending shall cause the pending signal to be discarded,
                 *   whether or not it is blocked."
                 *
                 *  "Setting a signal action to SIG_DFL for a signal that is
                 *   pending and whose default action is to ignore the signal
                 *   (for example, SIGCHLD), shall cause the pending signal to
                 *   be discarded, whether or not it is blocked"
                 */
                if (sig_handler_ignored(sig_handler(p, sig), sig)) {
                        sigemptyset(&mask);
                        sigaddset(&mask, sig);
                        flush_sigqueue_mask(&mask, &p->signal->shared_pending);
                        for_each_thread(p, t)
                                flush_sigqueue_mask(&mask, &t->pending);
                }
        }

        spin_unlock_irq(&p->sighand->siglock);
        return 0;
}

#ifdef CONFIG_DYNAMIC_SIGFRAME
static inline void sigaltstack_lock(void)
        __acquires(&current->sighand->siglock)
{
        spin_lock_irq(&current->sighand->siglock);
}

static inline void sigaltstack_unlock(void)
        __releases(&current->sighand->siglock)
{
        spin_unlock_irq(&current->sighand->siglock);
}
#else
static inline void sigaltstack_lock(void) { }
static inline void sigaltstack_unlock(void) { }
#endif

static int
do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
                size_t min_ss_size)
{
        struct task_struct *t = current;
        int ret = 0;

        if (oss) {
                memset(oss, 0, sizeof(stack_t));
                oss->ss_sp = (void __user *) t->sas_ss_sp;
                oss->ss_size = t->sas_ss_size;
                oss->ss_flags = sas_ss_flags(sp) |
                        (current->sas_ss_flags & SS_FLAG_BITS);
        }

        if (ss) {
                void __user *ss_sp = ss->ss_sp;
                size_t ss_size = ss->ss_size;
                unsigned ss_flags = ss->ss_flags;
                int ss_mode;

                if (unlikely(on_sig_stack(sp)))
                        return -EPERM;

                ss_mode = ss_flags & ~SS_FLAG_BITS;
                if (unlikely(ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK &&
                                ss_mode != 0))
                        return -EINVAL;

                /*
                 * Return before taking any locks if no actual
                 * sigaltstack changes were requested.
                 */
                if (t->sas_ss_sp == (unsigned long)ss_sp &&
                    t->sas_ss_size == ss_size &&
                    t->sas_ss_flags == ss_flags)
                        return 0;

                sigaltstack_lock();
                if (ss_mode == SS_DISABLE) {
                        ss_size = 0;
                        ss_sp = NULL;
                } else {
                        if (unlikely(ss_size < min_ss_size))
                                ret = -ENOMEM;
                        if (!sigaltstack_size_valid(ss_size))
                                ret = -ENOMEM;
                }
                if (!ret) {
                        t->sas_ss_sp = (unsigned long) ss_sp;
                        t->sas_ss_size = ss_size;
                        t->sas_ss_flags = ss_flags;
                }
                sigaltstack_unlock();
        }
        return ret;
}

SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
{
        stack_t new, old;
        int err;
        if (uss && copy_from_user(&new, uss, sizeof(stack_t)))
                return -EFAULT;
        err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL,
                              current_user_stack_pointer(),
                              MINSIGSTKSZ);
        if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t)))
                err = -EFAULT;
        return err;
}

int restore_altstack(const stack_t __user *uss)
{
        stack_t new;
        if (copy_from_user(&new, uss, sizeof(stack_t)))
                return -EFAULT;
        (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(),
                             MINSIGSTKSZ);
        /* squash all but EFAULT for now */
        return 0;
}

int __save_altstack(stack_t __user *uss, unsigned long sp)
{
        struct task_struct *t = current;
        int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
                __put_user(t->sas_ss_flags, &uss->ss_flags) |
                __put_user(t->sas_ss_size, &uss->ss_size);
        return err;
}

#ifdef CONFIG_COMPAT
static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr,
                                 compat_stack_t __user *uoss_ptr)
{
        stack_t uss, uoss;
        int ret;

        if (uss_ptr) {
                compat_stack_t uss32;
                if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
                        return -EFAULT;
                uss.ss_sp = compat_ptr(uss32.ss_sp);
                uss.ss_flags = uss32.ss_flags;
                uss.ss_size = uss32.ss_size;
        }
        ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss,
                             compat_user_stack_pointer(),
                             COMPAT_MINSIGSTKSZ);
        if (ret >= 0 && uoss_ptr)  {
                compat_stack_t old;
                memset(&old, 0, sizeof(old));
                old.ss_sp = ptr_to_compat(uoss.ss_sp);
                old.ss_flags = uoss.ss_flags;
                old.ss_size = uoss.ss_size;
                if (copy_to_user(uoss_ptr, &old, sizeof(compat_stack_t)))
                        ret = -EFAULT;
        }
        return ret;
}

COMPAT_SYSCALL_DEFINE2(sigaltstack,
                        const compat_stack_t __user *, uss_ptr,
                        compat_stack_t __user *, uoss_ptr)
{
        return do_compat_sigaltstack(uss_ptr, uoss_ptr);
}

int compat_restore_altstack(const compat_stack_t __user *uss)
{
        int err = do_compat_sigaltstack(uss, NULL);
        /* squash all but -EFAULT for now */
        return err == -EFAULT ? err : 0;
}

int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
{
        int err;
        struct task_struct *t = current;
        err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp),
                         &uss->ss_sp) |
                __put_user(t->sas_ss_flags, &uss->ss_flags) |
                __put_user(t->sas_ss_size, &uss->ss_size);
        return err;
}
#endif

#ifdef __ARCH_WANT_SYS_SIGPENDING

/**
 *  sys_sigpending - examine pending signals
 *  @uset: where mask of pending signal is returned
 */
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
{
        sigset_t set;

        if (sizeof(old_sigset_t) > sizeof(*uset))
                return -EINVAL;

        do_sigpending(&set);

        if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
                return -EFAULT;

        return 0;
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
{
        sigset_t set;

        do_sigpending(&set);

        return put_user(set.sig[0], set32);
}
#endif

#endif

#ifdef __ARCH_WANT_SYS_SIGPROCMASK
/**
 *  sys_sigprocmask - examine and change blocked signals
 *  @how: whether to add, remove, or set signals
 *  @nset: signals to add or remove (if non-null)
 *  @oset: previous value of signal mask if non-null
 *
 * Some platforms have their own version with special arguments;
 * others support only sys_rt_sigprocmask.
 */

SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
                old_sigset_t __user *, oset)
{
        old_sigset_t old_set, new_set;
        sigset_t new_blocked;

        old_set = current->blocked.sig[0];

        if (nset) {
                if (copy_from_user(&new_set, nset, sizeof(*nset)))
                        return -EFAULT;

                new_blocked = current->blocked;

                switch (how) {
                case SIG_BLOCK:
                        sigaddsetmask(&new_blocked, new_set);
                        break;
                case SIG_UNBLOCK:
                        sigdelsetmask(&new_blocked, new_set);
                        break;
                case SIG_SETMASK:
                        new_blocked.sig[0] = new_set;
                        break;
                default:
                        return -EINVAL;
                }

                set_current_blocked(&new_blocked);
        }

        if (oset) {
                if (copy_to_user(oset, &old_set, sizeof(*oset)))
                        return -EFAULT;
        }

        return 0;
}
#endif /* __ARCH_WANT_SYS_SIGPROCMASK */

#ifndef CONFIG_ODD_RT_SIGACTION
/**
 *  sys_rt_sigaction - alter an action taken by a process
 *  @sig: signal to be sent
 *  @act: new sigaction
 *  @oact: used to save the previous sigaction
 *  @sigsetsize: size of sigset_t type
 */
SYSCALL_DEFINE4(rt_sigaction, int, sig,
                const struct sigaction __user *, act,
                struct sigaction __user *, oact,
                size_t, sigsetsize)
{
        struct k_sigaction new_sa, old_sa;
        int ret;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
                return -EFAULT;

        ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
        if (ret)
                return ret;

        if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
                return -EFAULT;

        return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
                const struct compat_sigaction __user *, act,
                struct compat_sigaction __user *, oact,
                compat_size_t, sigsetsize)
{
        struct k_sigaction new_ka, old_ka;
#ifdef __ARCH_HAS_SA_RESTORER
        compat_uptr_t restorer;
#endif
        int ret;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(compat_sigset_t))
                return -EINVAL;

        if (act) {
                compat_uptr_t handler;
                ret = get_user(handler, &act->sa_handler);
                new_ka.sa.sa_handler = compat_ptr(handler);
#ifdef __ARCH_HAS_SA_RESTORER
                ret |= get_user(restorer, &act->sa_restorer);
                new_ka.sa.sa_restorer = compat_ptr(restorer);
#endif
                ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask);
                ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
                if (ret)
                        return -EFAULT;
        }

        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
        if (!ret && oact) {
                ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), 
                               &oact->sa_handler);
                ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask,
                                         sizeof(oact->sa_mask));
                ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
#ifdef __ARCH_HAS_SA_RESTORER
                ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
                                &oact->sa_restorer);
#endif
        }
        return ret;
}
#endif
#endif /* !CONFIG_ODD_RT_SIGACTION */

#ifdef CONFIG_OLD_SIGACTION
SYSCALL_DEFINE3(sigaction, int, sig,
                const struct old_sigaction __user *, act,
                struct old_sigaction __user *, oact)
{
        struct k_sigaction new_ka, old_ka;
        int ret;

        if (act) {
                old_sigset_t mask;
                if (!access_ok(act, sizeof(*act)) ||
                    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
                    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
                    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
                    __get_user(mask, &act->sa_mask))
                        return -EFAULT;
#ifdef __ARCH_HAS_KA_RESTORER
                new_ka.ka_restorer = NULL;
#endif
                siginitset(&new_ka.sa.sa_mask, mask);
        }

        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);

        if (!ret && oact) {
                if (!access_ok(oact, sizeof(*oact)) ||
                    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
                    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
                    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
                    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
                        return -EFAULT;
        }

        return ret;
}
#endif
#ifdef CONFIG_COMPAT_OLD_SIGACTION
COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
                const struct compat_old_sigaction __user *, act,
                struct compat_old_sigaction __user *, oact)
{
        struct k_sigaction new_ka, old_ka;
        int ret;
        compat_old_sigset_t mask;
        compat_uptr_t handler, restorer;

        if (act) {
                if (!access_ok(act, sizeof(*act)) ||
                    __get_user(handler, &act->sa_handler) ||
                    __get_user(restorer, &act->sa_restorer) ||
                    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
                    __get_user(mask, &act->sa_mask))
                        return -EFAULT;

#ifdef __ARCH_HAS_KA_RESTORER
                new_ka.ka_restorer = NULL;
#endif
                new_ka.sa.sa_handler = compat_ptr(handler);
                new_ka.sa.sa_restorer = compat_ptr(restorer);
                siginitset(&new_ka.sa.sa_mask, mask);
        }

        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);

        if (!ret && oact) {
                if (!access_ok(oact, sizeof(*oact)) ||
                    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
                               &oact->sa_handler) ||
                    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
                               &oact->sa_restorer) ||
                    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
                    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
                        return -EFAULT;
        }
        return ret;
}
#endif

#ifdef CONFIG_SGETMASK_SYSCALL

/*
 * For backwards compatibility.  Functionality superseded by sigprocmask.
 */
SYSCALL_DEFINE0(sgetmask)
{
        /* SMP safe */
        return current->blocked.sig[0];
}

SYSCALL_DEFINE1(ssetmask, int, newmask)
{
        int old = current->blocked.sig[0];
        sigset_t newset;

        siginitset(&newset, newmask);
        set_current_blocked(&newset);

        return old;
}
#endif /* CONFIG_SGETMASK_SYSCALL */

#ifdef __ARCH_WANT_SYS_SIGNAL
/*
 * For backwards compatibility.  Functionality superseded by sigaction.
 */
SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
{
        struct k_sigaction new_sa, old_sa;
        int ret;

        new_sa.sa.sa_handler = handler;
        new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
        sigemptyset(&new_sa.sa.sa_mask);

        ret = do_sigaction(sig, &new_sa, &old_sa);

        return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
}
#endif /* __ARCH_WANT_SYS_SIGNAL */

#ifdef __ARCH_WANT_SYS_PAUSE

SYSCALL_DEFINE0(pause)
{
        while (!signal_pending(current)) {
                __set_current_state(TASK_INTERRUPTIBLE);
                schedule();
        }
        return -ERESTARTNOHAND;
}

#endif

static int sigsuspend(sigset_t *set)
{
        current->saved_sigmask = current->blocked;
        set_current_blocked(set);

        while (!signal_pending(current)) {
                __set_current_state(TASK_INTERRUPTIBLE);
                schedule();
        }
        set_restore_sigmask();
        return -ERESTARTNOHAND;
}

/**
 *  sys_rt_sigsuspend - replace the signal mask for a value with the
 *        @unewset value until a signal is received
 *  @unewset: new signal mask value
 *  @sigsetsize: size of sigset_t type
 */
SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
{
        sigset_t newset;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (copy_from_user(&newset, unewset, sizeof(newset)))
                return -EFAULT;
        return sigsuspend(&newset);
}
 
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize)
{
        sigset_t newset;

        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
                return -EINVAL;

        if (get_compat_sigset(&newset, unewset))
                return -EFAULT;
        return sigsuspend(&newset);
}
#endif

#ifdef CONFIG_OLD_SIGSUSPEND
SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask)
{
        sigset_t blocked;
        siginitset(&blocked, mask);
        return sigsuspend(&blocked);
}
#endif
#ifdef CONFIG_OLD_SIGSUSPEND3
SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask)
{
        sigset_t blocked;
        siginitset(&blocked, mask);
        return sigsuspend(&blocked);
}
#endif

__weak const char *arch_vma_name(struct vm_area_struct *vma)
{
        return NULL;
}

static inline void siginfo_buildtime_checks(void)
{
        BUILD_BUG_ON(sizeof(struct siginfo) != SI_MAX_SIZE);

        /* Verify the offsets in the two siginfos match */
#define CHECK_OFFSET(field) \
        BUILD_BUG_ON(offsetof(siginfo_t, field) != offsetof(kernel_siginfo_t, field))

        /* kill */
        CHECK_OFFSET(si_pid);
        CHECK_OFFSET(si_uid);

        /* timer */
        CHECK_OFFSET(si_tid);
        CHECK_OFFSET(si_overrun);
        CHECK_OFFSET(si_value);

        /* rt */
        CHECK_OFFSET(si_pid);
        CHECK_OFFSET(si_uid);
        CHECK_OFFSET(si_value);

        /* sigchld */
        CHECK_OFFSET(si_pid);
        CHECK_OFFSET(si_uid);
        CHECK_OFFSET(si_status);
        CHECK_OFFSET(si_utime);
        CHECK_OFFSET(si_stime);

        /* sigfault */
        CHECK_OFFSET(si_addr);
        CHECK_OFFSET(si_trapno);
        CHECK_OFFSET(si_addr_lsb);
        CHECK_OFFSET(si_lower);
        CHECK_OFFSET(si_upper);
        CHECK_OFFSET(si_pkey);
        CHECK_OFFSET(si_perf_data);
        CHECK_OFFSET(si_perf_type);
        CHECK_OFFSET(si_perf_flags);

        /* sigpoll */
        CHECK_OFFSET(si_band);
        CHECK_OFFSET(si_fd);

        /* sigsys */
        CHECK_OFFSET(si_call_addr);
        CHECK_OFFSET(si_syscall);
        CHECK_OFFSET(si_arch);
#undef CHECK_OFFSET

        /* usb asyncio */
        BUILD_BUG_ON(offsetof(struct siginfo, si_pid) !=
                     offsetof(struct siginfo, si_addr));
        if (sizeof(int) == sizeof(void __user *)) {
                BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) !=
                             sizeof(void __user *));
        } else {
                BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) +
                              sizeof_field(struct siginfo, si_uid)) !=
                             sizeof(void __user *));
                BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) !=
                             offsetof(struct siginfo, si_uid));
        }
#ifdef CONFIG_COMPAT
        BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) !=
                     offsetof(struct compat_siginfo, si_addr));
        BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
                     sizeof(compat_uptr_t));
        BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
                     sizeof_field(struct siginfo, si_pid));
#endif
}

#if defined(CONFIG_SYSCTL)
static struct ctl_table signal_debug_table[] = {
#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
        {
                .procname        = "exception-trace",
                .data                = &show_unhandled_signals,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec
        },
#endif
        { }
};

static int __init init_signal_sysctls(void)
{
        register_sysctl_init("debug", signal_debug_table);
        return 0;
}
early_initcall(init_signal_sysctls);
#endif /* CONFIG_SYSCTL */

void __init signals_init(void)
{
        siginfo_buildtime_checks();

        sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC | SLAB_ACCOUNT);
}

#ifdef CONFIG_KGDB_KDB
#include <linux/kdb.h>
/*
 * kdb_send_sig - Allows kdb to send signals without exposing
 * signal internals.  This function checks if the required locks are
 * available before calling the main signal code, to avoid kdb
 * deadlocks.
 */
void kdb_send_sig(struct task_struct *t, int sig)
{
        static struct task_struct *kdb_prev_t;
        int new_t, ret;
        if (!spin_trylock(&t->sighand->siglock)) {
                kdb_printf("Can't do kill command now.\n"
                           "The sigmask lock is held somewhere else in "
                           "kernel, try again later\n");
                return;
        }
        new_t = kdb_prev_t != t;
        kdb_prev_t = t;
        if (!task_is_running(t) && new_t) {
                spin_unlock(&t->sighand->siglock);
                kdb_printf("Process is not RUNNING, sending a signal from "
                           "kdb risks deadlock\n"
                           "on the run queue locks. "
                           "The signal has _not_ been sent.\n"
                           "Reissue the kill command if you want to risk "
                           "the deadlock.\n");
                return;
        }
        ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
        spin_unlock(&t->sighand->siglock);
        if (ret)
                kdb_printf("Fail to deliver Signal %d to process %d.\n",
                           sig, t->pid);
        else
                kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid);
}
#endif        /* CONFIG_KGDB_KDB */
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  232 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
// SPDX-License-Identifier: GPL-2.0-only
/*
 * ACPI device specific properties support.
 *
 * Copyright (C) 2014 - 2023, Intel Corporation
 * All rights reserved.
 *
 * Authors: Mika Westerberg <mika.westerberg@linux.intel.com>
 *          Darren Hart <dvhart@linux.intel.com>
 *          Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 *          Sakari Ailus <sakari.ailus@linux.intel.com>
 */

#define pr_fmt(fmt) "ACPI: " fmt

#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/export.h>

#include "internal.h"

static int acpi_data_get_property_array(const struct acpi_device_data *data,
                                        const char *name,
                                        acpi_object_type type,
                                        const union acpi_object **obj);

/*
 * The GUIDs here are made equivalent to each other in order to avoid extra
 * complexity in the properties handling code, with the caveat that the
 * kernel will accept certain combinations of GUID and properties that are
 * not defined without a warning. For instance if any of the properties
 * from different GUID appear in a property list of another, it will be
 * accepted by the kernel. Firmware validation tools should catch these.
 */
static const guid_t prp_guids[] = {
        /* ACPI _DSD device properties GUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */
        GUID_INIT(0xdaffd814, 0x6eba, 0x4d8c,
                  0x8a, 0x91, 0xbc, 0x9b, 0xbf, 0x4a, 0xa3, 0x01),
        /* Hotplug in D3 GUID: 6211e2c0-58a3-4af3-90e1-927a4e0c55a4 */
        GUID_INIT(0x6211e2c0, 0x58a3, 0x4af3,
                  0x90, 0xe1, 0x92, 0x7a, 0x4e, 0x0c, 0x55, 0xa4),
        /* External facing port GUID: efcc06cc-73ac-4bc3-bff0-76143807c389 */
        GUID_INIT(0xefcc06cc, 0x73ac, 0x4bc3,
                  0xbf, 0xf0, 0x76, 0x14, 0x38, 0x07, 0xc3, 0x89),
        /* Thunderbolt GUID for IMR_VALID: c44d002f-69f9-4e7d-a904-a7baabdf43f7 */
        GUID_INIT(0xc44d002f, 0x69f9, 0x4e7d,
                  0xa9, 0x04, 0xa7, 0xba, 0xab, 0xdf, 0x43, 0xf7),
        /* Thunderbolt GUID for WAKE_SUPPORTED: 6c501103-c189-4296-ba72-9bf5a26ebe5d */
        GUID_INIT(0x6c501103, 0xc189, 0x4296,
                  0xba, 0x72, 0x9b, 0xf5, 0xa2, 0x6e, 0xbe, 0x5d),
        /* Storage device needs D3 GUID: 5025030f-842f-4ab4-a561-99a5189762d0 */
        GUID_INIT(0x5025030f, 0x842f, 0x4ab4,
                  0xa5, 0x61, 0x99, 0xa5, 0x18, 0x97, 0x62, 0xd0),
};

/* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
static const guid_t ads_guid =
        GUID_INIT(0xdbb8e3e6, 0x5886, 0x4ba6,
                  0x87, 0x95, 0x13, 0x19, 0xf5, 0x2a, 0x96, 0x6b);

/* ACPI _DSD data buffer GUID: edb12dd0-363d-4085-a3d2-49522ca160c4 */
static const guid_t buffer_prop_guid =
        GUID_INIT(0xedb12dd0, 0x363d, 0x4085,
                  0xa3, 0xd2, 0x49, 0x52, 0x2c, 0xa1, 0x60, 0xc4);

static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
                                           union acpi_object *desc,
                                           struct acpi_device_data *data,
                                           struct fwnode_handle *parent);
static bool acpi_extract_properties(acpi_handle handle,
                                    union acpi_object *desc,
                                    struct acpi_device_data *data);

static bool acpi_nondev_subnode_extract(union acpi_object *desc,
                                        acpi_handle handle,
                                        const union acpi_object *link,
                                        struct list_head *list,
                                        struct fwnode_handle *parent)
{
        struct acpi_data_node *dn;
        bool result;

        if (acpi_graph_ignore_port(handle))
                return false;

        dn = kzalloc(sizeof(*dn), GFP_KERNEL);
        if (!dn)
                return false;

        dn->name = link->package.elements[0].string.pointer;
        fwnode_init(&dn->fwnode, &acpi_data_fwnode_ops);
        dn->parent = parent;
        INIT_LIST_HEAD(&dn->data.properties);
        INIT_LIST_HEAD(&dn->data.subnodes);

        result = acpi_extract_properties(handle, desc, &dn->data);

        if (handle) {
                acpi_handle scope;
                acpi_status status;

                /*
                 * The scope for the subnode object lookup is the one of the
                 * namespace node (device) containing the object that has
                 * returned the package.  That is, it's the scope of that
                 * object's parent.
                 */
                status = acpi_get_parent(handle, &scope);
                if (ACPI_SUCCESS(status)
                    && acpi_enumerate_nondev_subnodes(scope, desc, &dn->data,
                                                      &dn->fwnode))
                        result = true;
        } else if (acpi_enumerate_nondev_subnodes(NULL, desc, &dn->data,
                                                  &dn->fwnode)) {
                result = true;
        }

        if (result) {
                dn->handle = handle;
                dn->data.pointer = desc;
                list_add_tail(&dn->sibling, list);
                return true;
        }

        kfree(dn);
        acpi_handle_debug(handle, "Invalid properties/subnodes data, skipping\n");
        return false;
}

static bool acpi_nondev_subnode_data_ok(acpi_handle handle,
                                        const union acpi_object *link,
                                        struct list_head *list,
                                        struct fwnode_handle *parent)
{
        struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
        acpi_status status;

        status = acpi_evaluate_object_typed(handle, NULL, NULL, &buf,
                                            ACPI_TYPE_PACKAGE);
        if (ACPI_FAILURE(status))
                return false;

        if (acpi_nondev_subnode_extract(buf.pointer, handle, link, list,
                                        parent))
                return true;

        ACPI_FREE(buf.pointer);
        return false;
}

static bool acpi_nondev_subnode_ok(acpi_handle scope,
                                   const union acpi_object *link,
                                   struct list_head *list,
                                   struct fwnode_handle *parent)
{
        acpi_handle handle;
        acpi_status status;

        if (!scope)
                return false;

        status = acpi_get_handle(scope, link->package.elements[1].string.pointer,
                                 &handle);
        if (ACPI_FAILURE(status))
                return false;

        return acpi_nondev_subnode_data_ok(handle, link, list, parent);
}

static bool acpi_add_nondev_subnodes(acpi_handle scope,
                                     union acpi_object *links,
                                     struct list_head *list,
                                     struct fwnode_handle *parent)
{
        bool ret = false;
        int i;

        for (i = 0; i < links->package.count; i++) {
                union acpi_object *link, *desc;
                acpi_handle handle;
                bool result;

                link = &links->package.elements[i];
                /* Only two elements allowed. */
                if (link->package.count != 2)
                        continue;

                /* The first one must be a string. */
                if (link->package.elements[0].type != ACPI_TYPE_STRING)
                        continue;

                /* The second one may be a string, a reference or a package. */
                switch (link->package.elements[1].type) {
                case ACPI_TYPE_STRING:
                        result = acpi_nondev_subnode_ok(scope, link, list,
                                                         parent);
                        break;
                case ACPI_TYPE_LOCAL_REFERENCE:
                        handle = link->package.elements[1].reference.handle;
                        result = acpi_nondev_subnode_data_ok(handle, link, list,
                                                             parent);
                        break;
                case ACPI_TYPE_PACKAGE:
                        desc = &link->package.elements[1];
                        result = acpi_nondev_subnode_extract(desc, NULL, link,
                                                             list, parent);
                        break;
                default:
                        result = false;
                        break;
                }
                ret = ret || result;
        }

        return ret;
}

static bool acpi_enumerate_nondev_subnodes(acpi_handle scope,
                                           union acpi_object *desc,
                                           struct acpi_device_data *data,
                                           struct fwnode_handle *parent)
{
        int i;

        /* Look for the ACPI data subnodes GUID. */
        for (i = 0; i < desc->package.count; i += 2) {
                const union acpi_object *guid;
                union acpi_object *links;

                guid = &desc->package.elements[i];
                links = &desc->package.elements[i + 1];

                /*
                 * The first element must be a GUID and the second one must be
                 * a package.
                 */
                if (guid->type != ACPI_TYPE_BUFFER ||
                    guid->buffer.length != 16 ||
                    links->type != ACPI_TYPE_PACKAGE)
                        break;

                if (!guid_equal((guid_t *)guid->buffer.pointer, &ads_guid))
                        continue;

                return acpi_add_nondev_subnodes(scope, links, &data->subnodes,
                                                parent);
        }

        return false;
}

static bool acpi_property_value_ok(const union acpi_object *value)
{
        int j;

        /*
         * The value must be an integer, a string, a reference, or a package
         * whose every element must be an integer, a string, or a reference.
         */
        switch (value->type) {
        case ACPI_TYPE_INTEGER:
        case ACPI_TYPE_STRING:
        case ACPI_TYPE_LOCAL_REFERENCE:
                return true;

        case ACPI_TYPE_PACKAGE:
                for (j = 0; j < value->package.count; j++)
                        switch (value->package.elements[j].type) {
                        case ACPI_TYPE_INTEGER:
                        case ACPI_TYPE_STRING:
                        case ACPI_TYPE_LOCAL_REFERENCE:
                                continue;

                        default:
                                return false;
                        }

                return true;
        }
        return false;
}

static bool acpi_properties_format_valid(const union acpi_object *properties)
{
        int i;

        for (i = 0; i < properties->package.count; i++) {
                const union acpi_object *property;

                property = &properties->package.elements[i];
                /*
                 * Only two elements allowed, the first one must be a string and
                 * the second one has to satisfy certain conditions.
                 */
                if (property->package.count != 2
                    || property->package.elements[0].type != ACPI_TYPE_STRING
                    || !acpi_property_value_ok(&property->package.elements[1]))
                        return false;
        }
        return true;
}

static void acpi_init_of_compatible(struct acpi_device *adev)
{
        const union acpi_object *of_compatible;
        int ret;

        ret = acpi_data_get_property_array(&adev->data, "compatible",
                                           ACPI_TYPE_STRING, &of_compatible);
        if (ret) {
                ret = acpi_dev_get_property(adev, "compatible",
                                            ACPI_TYPE_STRING, &of_compatible);
                if (ret) {
                        struct acpi_device *parent;

                        parent = acpi_dev_parent(adev);
                        if (parent && parent->flags.of_compatible_ok)
                                goto out;

                        return;
                }
        }
        adev->data.of_compatible = of_compatible;

 out:
        adev->flags.of_compatible_ok = 1;
}

static bool acpi_is_property_guid(const guid_t *guid)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(prp_guids); i++) {
                if (guid_equal(guid, &prp_guids[i]))
                        return true;
        }

        return false;
}

struct acpi_device_properties *
acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
                    union acpi_object *properties)
{
        struct acpi_device_properties *props;

        props = kzalloc(sizeof(*props), GFP_KERNEL);
        if (props) {
                INIT_LIST_HEAD(&props->list);
                props->guid = guid;
                props->properties = properties;
                list_add_tail(&props->list, &data->properties);
        }

        return props;
}

static void acpi_nondev_subnode_tag(acpi_handle handle, void *context)
{
}

static void acpi_untie_nondev_subnodes(struct acpi_device_data *data)
{
        struct acpi_data_node *dn;

        list_for_each_entry(dn, &data->subnodes, sibling) {
                acpi_detach_data(dn->handle, acpi_nondev_subnode_tag);

                acpi_untie_nondev_subnodes(&dn->data);
        }
}

static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
{
        struct acpi_data_node *dn;

        list_for_each_entry(dn, &data->subnodes, sibling) {
                acpi_status status;
                bool ret;

                status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn);
                if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
                        acpi_handle_err(dn->handle, "Can't tag data node\n");
                        return false;
                }

                ret = acpi_tie_nondev_subnodes(&dn->data);
                if (!ret)
                        return ret;
        }

        return true;
}

static void acpi_data_add_buffer_props(acpi_handle handle,
                                       struct acpi_device_data *data,
                                       union acpi_object *properties)
{
        struct acpi_device_properties *props;
        union acpi_object *package;
        size_t alloc_size;
        unsigned int i;
        u32 *count;

        if (check_mul_overflow((size_t)properties->package.count,
                               sizeof(*package) + sizeof(void *),
                               &alloc_size) ||
            check_add_overflow(sizeof(*props) + sizeof(*package), alloc_size,
                               &alloc_size)) {
                acpi_handle_warn(handle,
                                 "can't allocate memory for %u buffer props",
                                 properties->package.count);
                return;
        }

        props = kvzalloc(alloc_size, GFP_KERNEL);
        if (!props)
                return;

        props->guid = &buffer_prop_guid;
        props->bufs = (void *)(props + 1);
        props->properties = (void *)(props->bufs + properties->package.count);

        /* Outer package */
        package = props->properties;
        package->type = ACPI_TYPE_PACKAGE;
        package->package.elements = package + 1;
        count = &package->package.count;
        *count = 0;

        /* Inner packages */
        package++;

        for (i = 0; i < properties->package.count; i++) {
                struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
                union acpi_object *property = &properties->package.elements[i];
                union acpi_object *prop, *obj, *buf_obj;
                acpi_status status;

                if (property->type != ACPI_TYPE_PACKAGE ||
                    property->package.count != 2) {
                        acpi_handle_warn(handle,
                                         "buffer property %u has %u entries\n",
                                         i, property->package.count);
                        continue;
                }

                prop = &property->package.elements[0];
                obj = &property->package.elements[1];

                if (prop->type != ACPI_TYPE_STRING ||
                    obj->type != ACPI_TYPE_STRING) {
                        acpi_handle_warn(handle,
                                         "wrong object types %u and %u\n",
                                         prop->type, obj->type);
                        continue;
                }

                status = acpi_evaluate_object_typed(handle, obj->string.pointer,
                                                    NULL, &buf,
                                                    ACPI_TYPE_BUFFER);
                if (ACPI_FAILURE(status)) {
                        acpi_handle_warn(handle,
                                         "can't evaluate \"%*pE\" as buffer\n",
                                         obj->string.length,
                                         obj->string.pointer);
                        continue;
                }

                package->type = ACPI_TYPE_PACKAGE;
                package->package.elements = prop;
                package->package.count = 2;

                buf_obj = buf.pointer;

                /* Replace the string object with a buffer object */
                obj->type = ACPI_TYPE_BUFFER;
                obj->buffer.length = buf_obj->buffer.length;
                obj->buffer.pointer = buf_obj->buffer.pointer;

                props->bufs[i] = buf.pointer;
                package++;
                (*count)++;
        }

        if (*count)
                list_add(&props->list, &data->properties);
        else
                kvfree(props);
}

static bool acpi_extract_properties(acpi_handle scope, union acpi_object *desc,
                                    struct acpi_device_data *data)
{
        int i;

        if (desc->package.count % 2)
                return false;

        /* Look for the device properties GUID. */
        for (i = 0; i < desc->package.count; i += 2) {
                const union acpi_object *guid;
                union acpi_object *properties;

                guid = &desc->package.elements[i];
                properties = &desc->package.elements[i + 1];

                /*
                 * The first element must be a GUID and the second one must be
                 * a package.
                 */
                if (guid->type != ACPI_TYPE_BUFFER ||
                    guid->buffer.length != 16 ||
                    properties->type != ACPI_TYPE_PACKAGE)
                        break;

                if (guid_equal((guid_t *)guid->buffer.pointer,
                               &buffer_prop_guid)) {
                        acpi_data_add_buffer_props(scope, data, properties);
                        continue;
                }

                if (!acpi_is_property_guid((guid_t *)guid->buffer.pointer))
                        continue;

                /*
                 * We found the matching GUID. Now validate the format of the
                 * package immediately following it.
                 */
                if (!acpi_properties_format_valid(properties))
                        continue;

                acpi_data_add_props(data, (const guid_t *)guid->buffer.pointer,
                                    properties);
        }

        return !list_empty(&data->properties);
}

void acpi_init_properties(struct acpi_device *adev)
{
        struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
        struct acpi_hardware_id *hwid;
        acpi_status status;
        bool acpi_of = false;

        INIT_LIST_HEAD(&adev->data.properties);
        INIT_LIST_HEAD(&adev->data.subnodes);

        if (!adev->handle)
                return;

        /*
         * Check if ACPI_DT_NAMESPACE_HID is present and inthat case we fill in
         * Device Tree compatible properties for this device.
         */
        list_for_each_entry(hwid, &adev->pnp.ids, list) {
                if (!strcmp(hwid->id, ACPI_DT_NAMESPACE_HID)) {
                        acpi_of = true;
                        break;
                }
        }

        status = acpi_evaluate_object_typed(adev->handle, "_DSD", NULL, &buf,
                                            ACPI_TYPE_PACKAGE);
        if (ACPI_FAILURE(status))
                goto out;

        if (acpi_extract_properties(adev->handle, buf.pointer, &adev->data)) {
                adev->data.pointer = buf.pointer;
                if (acpi_of)
                        acpi_init_of_compatible(adev);
        }
        if (acpi_enumerate_nondev_subnodes(adev->handle, buf.pointer,
                                        &adev->data, acpi_fwnode_handle(adev)))
                adev->data.pointer = buf.pointer;

        if (!adev->data.pointer) {
                acpi_handle_debug(adev->handle, "Invalid _DSD data, skipping\n");
                ACPI_FREE(buf.pointer);
        } else {
                if (!acpi_tie_nondev_subnodes(&adev->data))
                        acpi_untie_nondev_subnodes(&adev->data);
        }

 out:
        if (acpi_of && !adev->flags.of_compatible_ok)
                acpi_handle_info(adev->handle,
                         ACPI_DT_NAMESPACE_HID " requires 'compatible' property\n");

        if (!adev->data.pointer)
                acpi_extract_apple_properties(adev);
}

static void acpi_free_device_properties(struct list_head *list)
{
        struct acpi_device_properties *props, *tmp;

        list_for_each_entry_safe(props, tmp, list, list) {
                u32 i;

                list_del(&props->list);
                /* Buffer data properties were separately allocated */
                if (props->bufs)
                        for (i = 0; i < props->properties->package.count; i++)
                                ACPI_FREE(props->bufs[i]);
                kvfree(props);
        }
}

static void acpi_destroy_nondev_subnodes(struct list_head *list)
{
        struct acpi_data_node *dn, *next;

        if (list_empty(list))
                return;

        list_for_each_entry_safe_reverse(dn, next, list, sibling) {
                acpi_destroy_nondev_subnodes(&dn->data.subnodes);
                wait_for_completion(&dn->kobj_done);
                list_del(&dn->sibling);
                ACPI_FREE((void *)dn->data.pointer);
                acpi_free_device_properties(&dn->data.properties);
                kfree(dn);
        }
}

void acpi_free_properties(struct acpi_device *adev)
{
        acpi_untie_nondev_subnodes(&adev->data);
        acpi_destroy_nondev_subnodes(&adev->data.subnodes);
        ACPI_FREE((void *)adev->data.pointer);
        adev->data.of_compatible = NULL;
        adev->data.pointer = NULL;
        acpi_free_device_properties(&adev->data.properties);
}

/**
 * acpi_data_get_property - return an ACPI property with given name
 * @data: ACPI device deta object to get the property from
 * @name: Name of the property
 * @type: Expected property type
 * @obj: Location to store the property value (if not %NULL)
 *
 * Look up a property with @name and store a pointer to the resulting ACPI
 * object at the location pointed to by @obj if found.
 *
 * Callers must not attempt to free the returned objects.  These objects will be
 * freed by the ACPI core automatically during the removal of @data.
 *
 * Return: %0 if property with @name has been found (success),
 *         %-EINVAL if the arguments are invalid,
 *         %-EINVAL if the property doesn't exist,
 *         %-EPROTO if the property value type doesn't match @type.
 */
static int acpi_data_get_property(const struct acpi_device_data *data,
                                  const char *name, acpi_object_type type,
                                  const union acpi_object **obj)
{
        const struct acpi_device_properties *props;

        if (!data || !name)
                return -EINVAL;

        if (!data->pointer || list_empty(&data->properties))
                return -EINVAL;

        list_for_each_entry(props, &data->properties, list) {
                const union acpi_object *properties;
                unsigned int i;

                properties = props->properties;
                for (i = 0; i < properties->package.count; i++) {
                        const union acpi_object *propname, *propvalue;
                        const union acpi_object *property;

                        property = &properties->package.elements[i];

                        propname = &property->package.elements[0];
                        propvalue = &property->package.elements[1];

                        if (!strcmp(name, propname->string.pointer)) {
                                if (type != ACPI_TYPE_ANY &&
                                    propvalue->type != type)
                                        return -EPROTO;
                                if (obj)
                                        *obj = propvalue;

                                return 0;
                        }
                }
        }
        return -EINVAL;
}

/**
 * acpi_dev_get_property - return an ACPI property with given name.
 * @adev: ACPI device to get the property from.
 * @name: Name of the property.
 * @type: Expected property type.
 * @obj: Location to store the property value (if not %NULL).
 */
int acpi_dev_get_property(const struct acpi_device *adev, const char *name,
                          acpi_object_type type, const union acpi_object **obj)
{
        return adev ? acpi_data_get_property(&adev->data, name, type, obj) : -EINVAL;
}
EXPORT_SYMBOL_GPL(acpi_dev_get_property);

static const struct acpi_device_data *
acpi_device_data_of_node(const struct fwnode_handle *fwnode)
{
        if (is_acpi_device_node(fwnode)) {
                const struct acpi_device *adev = to_acpi_device_node(fwnode);
                return &adev->data;
        }
        if (is_acpi_data_node(fwnode)) {
                const struct acpi_data_node *dn = to_acpi_data_node(fwnode);
                return &dn->data;
        }
        return NULL;
}

/**
 * acpi_node_prop_get - return an ACPI property with given name.
 * @fwnode: Firmware node to get the property from.
 * @propname: Name of the property.
 * @valptr: Location to store a pointer to the property value (if not %NULL).
 */
int acpi_node_prop_get(const struct fwnode_handle *fwnode,
                       const char *propname, void **valptr)
{
        return acpi_data_get_property(acpi_device_data_of_node(fwnode),
                                      propname, ACPI_TYPE_ANY,
                                      (const union acpi_object **)valptr);
}

/**
 * acpi_data_get_property_array - return an ACPI array property with given name
 * @data: ACPI data object to get the property from
 * @name: Name of the property
 * @type: Expected type of array elements
 * @obj: Location to store a pointer to the property value (if not NULL)
 *
 * Look up an array property with @name and store a pointer to the resulting
 * ACPI object at the location pointed to by @obj if found.
 *
 * Callers must not attempt to free the returned objects.  Those objects will be
 * freed by the ACPI core automatically during the removal of @data.
 *
 * Return: %0 if array property (package) with @name has been found (success),
 *         %-EINVAL if the arguments are invalid,
 *         %-EINVAL if the property doesn't exist,
 *         %-EPROTO if the property is not a package or the type of its elements
 *           doesn't match @type.
 */
static int acpi_data_get_property_array(const struct acpi_device_data *data,
                                        const char *name,
                                        acpi_object_type type,
                                        const union acpi_object **obj)
{
        const union acpi_object *prop;
        int ret, i;

        ret = acpi_data_get_property(data, name, ACPI_TYPE_PACKAGE, &prop);
        if (ret)
                return ret;

        if (type != ACPI_TYPE_ANY) {
                /* Check that all elements are of correct type. */
                for (i = 0; i < prop->package.count; i++)
                        if (prop->package.elements[i].type != type)
                                return -EPROTO;
        }
        if (obj)
                *obj = prop;

        return 0;
}

static struct fwnode_handle *
acpi_fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
                                 const char *childname)
{
        struct fwnode_handle *child;

        fwnode_for_each_child_node(fwnode, child) {
                if (is_acpi_data_node(child)) {
                        if (acpi_data_node_match(child, childname))
                                return child;
                        continue;
                }

                if (!strncmp(acpi_device_bid(to_acpi_device_node(child)),
                             childname, ACPI_NAMESEG_SIZE))
                        return child;
        }

        return NULL;
}

static int acpi_get_ref_args(struct fwnode_reference_args *args,
                             struct fwnode_handle *ref_fwnode,
                             const union acpi_object **element,
                             const union acpi_object *end, size_t num_args)
{
        u32 nargs = 0, i;

        /*
         * Assume the following integer elements are all args. Stop counting on
         * the first reference (possibly represented as a string) or end of the
         * package arguments. In case of neither reference, nor integer, return
         * an error, we can't parse it.
         */
        for (i = 0; (*element) + i < end && i < num_args; i++) {
                acpi_object_type type = (*element)[i].type;

                if (type == ACPI_TYPE_LOCAL_REFERENCE || type == ACPI_TYPE_STRING)
                        break;

                if (type == ACPI_TYPE_INTEGER)
                        nargs++;
                else
                        return -EINVAL;
        }

        if (nargs > NR_FWNODE_REFERENCE_ARGS)
                return -EINVAL;

        if (args) {
                args->fwnode = ref_fwnode;
                args->nargs = nargs;
                for (i = 0; i < nargs; i++)
                        args->args[i] = (*element)[i].integer.value;
        }

        (*element) += nargs;

        return 0;
}

static struct fwnode_handle *acpi_parse_string_ref(const struct fwnode_handle *fwnode,
                                                   const char *refstring)
{
        acpi_handle scope, handle;
        struct acpi_data_node *dn;
        struct acpi_device *device;
        acpi_status status;

        if (is_acpi_device_node(fwnode)) {
                scope = to_acpi_device_node(fwnode)->handle;
        } else if (is_acpi_data_node(fwnode)) {
                scope = to_acpi_data_node(fwnode)->handle;
        } else {
                pr_debug("Bad node type for node %pfw\n", fwnode);
                return NULL;
        }

        status = acpi_get_handle(scope, refstring, &handle);
        if (ACPI_FAILURE(status)) {
                acpi_handle_debug(scope, "Unable to get an ACPI handle for %s\n",
                                  refstring);
                return NULL;
        }

        device = acpi_fetch_acpi_dev(handle);
        if (device)
                return acpi_fwnode_handle(device);

        status = acpi_get_data_full(handle, acpi_nondev_subnode_tag,
                                    (void **)&dn, NULL);
        if (ACPI_FAILURE(status) || !dn) {
                acpi_handle_debug(handle, "Subnode not found\n");
                return NULL;
        }

        return &dn->fwnode;
}

/**
 * __acpi_node_get_property_reference - returns handle to the referenced object
 * @fwnode: Firmware node to get the property from
 * @propname: Name of the property
 * @index: Index of the reference to return
 * @num_args: Maximum number of arguments after each reference
 * @args: Location to store the returned reference with optional arguments
 *          (may be NULL)
 *
 * Find property with @name, verifify that it is a package containing at least
 * one object reference and if so, store the ACPI device object pointer to the
 * target object in @args->adev.  If the reference includes arguments, store
 * them in the @args->args[] array.
 *
 * If there's more than one reference in the property value package, @index is
 * used to select the one to return.
 *
 * It is possible to leave holes in the property value set like in the
 * example below:
 *
 * Package () {
 *     "cs-gpios",
 *     Package () {
 *        ^GPIO, 19, 0, 0,
 *        ^GPIO, 20, 0, 0,
 *        0,
 *        ^GPIO, 21, 0, 0,
 *     }
 * }
 *
 * Calling this function with index %2 or index %3 return %-ENOENT. If the
 * property does not contain any more values %-ENOENT is returned. The NULL
 * entry must be single integer and preferably contain value %0.
 *
 * Return: %0 on success, negative error code on failure.
 */
int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
        const char *propname, size_t index, size_t num_args,
        struct fwnode_reference_args *args)
{
        const union acpi_object *element, *end;
        const union acpi_object *obj;
        const struct acpi_device_data *data;
        struct fwnode_handle *ref_fwnode;
        struct acpi_device *device;
        int ret, idx = 0;

        data = acpi_device_data_of_node(fwnode);
        if (!data)
                return -ENOENT;

        ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj);
        if (ret)
                return ret == -EINVAL ? -ENOENT : -EINVAL;

        switch (obj->type) {
        case ACPI_TYPE_LOCAL_REFERENCE:
                /* Plain single reference without arguments. */
                if (index)
                        return -ENOENT;

                device = acpi_fetch_acpi_dev(obj->reference.handle);
                if (!device)
                        return -EINVAL;

                if (!args)
                        return 0;

                args->fwnode = acpi_fwnode_handle(device);
                args->nargs = 0;

                return 0;
        case ACPI_TYPE_STRING:
                if (index)
                        return -ENOENT;

                ref_fwnode = acpi_parse_string_ref(fwnode, obj->string.pointer);
                if (!ref_fwnode)
                        return -EINVAL;

                args->fwnode = ref_fwnode;
                args->nargs = 0;

                return 0;
        case ACPI_TYPE_PACKAGE:
                /*
                 * If it is not a single reference, then it is a package of
                 * references, followed by number of ints as follows:
                 *
                 *  Package () { REF, INT, REF, INT, INT }
                 *
                 * Here, REF may be either a local reference or a string. The
                 * index argument is then used to determine which reference the
                 * caller wants (along with the arguments).
                 */
                break;
        default:
                return -EINVAL;
        }

        if (index >= obj->package.count)
                return -ENOENT;

        element = obj->package.elements;
        end = element + obj->package.count;

        while (element < end) {
                switch (element->type) {
                case ACPI_TYPE_LOCAL_REFERENCE:
                        device = acpi_fetch_acpi_dev(element->reference.handle);
                        if (!device)
                                return -EINVAL;

                        element++;

                        ret = acpi_get_ref_args(idx == index ? args : NULL,
                                                acpi_fwnode_handle(device),
                                                &element, end, num_args);
                        if (ret < 0)
                                return ret;

                        if (idx == index)
                                return 0;

                        break;
                case ACPI_TYPE_STRING:
                        ref_fwnode = acpi_parse_string_ref(fwnode,
                                                           element->string.pointer);
                        if (!ref_fwnode)
                                return -EINVAL;

                        element++;

                        ret = acpi_get_ref_args(idx == index ? args : NULL,
                                                ref_fwnode, &element, end,
                                                num_args);
                        if (ret < 0)
                                return ret;

                        if (idx == index)
                                return 0;

                        break;
                case ACPI_TYPE_INTEGER:
                        if (idx == index)
                                return -ENOENT;
                        element++;
                        break;
                default:
                        return -EINVAL;
                }

                idx++;
        }

        return -ENOENT;
}
EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference);

static int acpi_data_prop_read_single(const struct acpi_device_data *data,
                                      const char *propname,
                                      enum dev_prop_type proptype, void *val)
{
        const union acpi_object *obj;
        int ret = 0;

        if (proptype >= DEV_PROP_U8 && proptype <= DEV_PROP_U64)
                ret = acpi_data_get_property(data, propname, ACPI_TYPE_INTEGER, &obj);
        else if (proptype == DEV_PROP_STRING)
                ret = acpi_data_get_property(data, propname, ACPI_TYPE_STRING, &obj);
        if (ret)
                return ret;

        switch (proptype) {
        case DEV_PROP_U8:
                if (obj->integer.value > U8_MAX)
                        return -EOVERFLOW;
                if (val)
                        *(u8 *)val = obj->integer.value;
                break;
        case DEV_PROP_U16:
                if (obj->integer.value > U16_MAX)
                        return -EOVERFLOW;
                if (val)
                        *(u16 *)val = obj->integer.value;
                break;
        case DEV_PROP_U32:
                if (obj->integer.value > U32_MAX)
                        return -EOVERFLOW;
                if (val)
                        *(u32 *)val = obj->integer.value;
                break;
        case DEV_PROP_U64:
                if (val)
                        *(u64 *)val = obj->integer.value;
                break;
        case DEV_PROP_STRING:
                if (val)
                        *(char **)val = obj->string.pointer;
                return 1;
        default:
                return -EINVAL;
        }

        /* When no storage provided return number of available values */
        return val ? 0 : 1;
}

#define acpi_copy_property_array_uint(items, val, nval)                        \
        ({                                                                \
                typeof(items) __items = items;                                \
                typeof(val) __val = val;                                \
                typeof(nval) __nval = nval;                                \
                size_t i;                                                \
                int ret = 0;                                                \
                                                                        \
                for (i = 0; i < __nval; i++) {                                \
                        if (__items->type == ACPI_TYPE_BUFFER) {        \
                                __val[i] = __items->buffer.pointer[i];        \
                                continue;                                \
                        }                                                \
                        if (__items[i].type != ACPI_TYPE_INTEGER) {        \
                                ret = -EPROTO;                                \
                                break;                                        \
                        }                                                \
                        if (__items[i].integer.value > _Generic(__val,        \
                                                                u8 *: U8_MAX, \
                                                                u16 *: U16_MAX, \
                                                                u32 *: U32_MAX, \
                                                                u64 *: U64_MAX)) { \
                                ret = -EOVERFLOW;                        \
                                break;                                        \
                        }                                                \
                                                                        \
                        __val[i] = __items[i].integer.value;                \
                }                                                        \
                ret;                                                        \
        })

static int acpi_copy_property_array_string(const union acpi_object *items,
                                           char **val, size_t nval)
{
        int i;

        for (i = 0; i < nval; i++) {
                if (items[i].type != ACPI_TYPE_STRING)
                        return -EPROTO;

                val[i] = items[i].string.pointer;
        }
        return nval;
}

static int acpi_data_prop_read(const struct acpi_device_data *data,
                               const char *propname,
                               enum dev_prop_type proptype,
                               void *val, size_t nval)
{
        const union acpi_object *obj;
        const union acpi_object *items;
        int ret;

        if (nval == 1 || !val) {
                ret = acpi_data_prop_read_single(data, propname, proptype, val);
                /*
                 * The overflow error means that the property is there and it is
                 * single-value, but its type does not match, so return.
                 */
                if (ret >= 0 || ret == -EOVERFLOW)
                        return ret;

                /*
                 * Reading this property as a single-value one failed, but its
                 * value may still be represented as one-element array, so
                 * continue.
                 */
        }

        ret = acpi_data_get_property_array(data, propname, ACPI_TYPE_ANY, &obj);
        if (ret && proptype >= DEV_PROP_U8 && proptype <= DEV_PROP_U64)
                ret = acpi_data_get_property(data, propname, ACPI_TYPE_BUFFER,
                                             &obj);
        if (ret)
                return ret;

        if (!val) {
                if (obj->type == ACPI_TYPE_BUFFER)
                        return obj->buffer.length;

                return obj->package.count;
        }

        switch (proptype) {
        case DEV_PROP_STRING:
                break;
        default:
                if (obj->type == ACPI_TYPE_BUFFER) {
                        if (nval > obj->buffer.length)
                                return -EOVERFLOW;
                } else {
                        if (nval > obj->package.count)
                                return -EOVERFLOW;
                }
                break;
        }
        if (nval == 0)
                return -EINVAL;

        if (obj->type == ACPI_TYPE_BUFFER) {
                if (proptype != DEV_PROP_U8)
                        return -EPROTO;
                items = obj;
        } else {
                items = obj->package.elements;
        }

        switch (proptype) {
        case DEV_PROP_U8:
                ret = acpi_copy_property_array_uint(items, (u8 *)val, nval);
                break;
        case DEV_PROP_U16:
                ret = acpi_copy_property_array_uint(items, (u16 *)val, nval);
                break;
        case DEV_PROP_U32:
                ret = acpi_copy_property_array_uint(items, (u32 *)val, nval);
                break;
        case DEV_PROP_U64:
                ret = acpi_copy_property_array_uint(items, (u64 *)val, nval);
                break;
        case DEV_PROP_STRING:
                ret = acpi_copy_property_array_string(
                        items, (char **)val,
                        min_t(u32, nval, obj->package.count));
                break;
        default:
                ret = -EINVAL;
                break;
        }
        return ret;
}

/**
 * acpi_node_prop_read - retrieve the value of an ACPI property with given name.
 * @fwnode: Firmware node to get the property from.
 * @propname: Name of the property.
 * @proptype: Expected property type.
 * @val: Location to store the property value (if not %NULL).
 * @nval: Size of the array pointed to by @val.
 *
 * If @val is %NULL, return the number of array elements comprising the value
 * of the property.  Otherwise, read at most @nval values to the array at the
 * location pointed to by @val.
 */
static int acpi_node_prop_read(const struct fwnode_handle *fwnode,
                               const char *propname, enum dev_prop_type proptype,
                               void *val, size_t nval)
{
        return acpi_data_prop_read(acpi_device_data_of_node(fwnode),
                                   propname, proptype, val, nval);
}

static int stop_on_next(struct acpi_device *adev, void *data)
{
        struct acpi_device **ret_p = data;

        if (!*ret_p) {
                *ret_p = adev;
                return 1;
        }

        /* Skip until the "previous" object is found. */
        if (*ret_p == adev)
                *ret_p = NULL;

        return 0;
}

/**
 * acpi_get_next_subnode - Return the next child node handle for a fwnode
 * @fwnode: Firmware node to find the next child node for.
 * @child: Handle to one of the device's child nodes or a null handle.
 */
struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                                            struct fwnode_handle *child)
{
        struct acpi_device *adev = to_acpi_device_node(fwnode);

        if ((!child || is_acpi_device_node(child)) && adev) {
                struct acpi_device *child_adev = to_acpi_device_node(child);

                acpi_dev_for_each_child(adev, stop_on_next, &child_adev);
                if (child_adev)
                        return acpi_fwnode_handle(child_adev);

                child = NULL;
        }

        if (!child || is_acpi_data_node(child)) {
                const struct acpi_data_node *data = to_acpi_data_node(fwnode);
                const struct list_head *head;
                struct list_head *next;
                struct acpi_data_node *dn;

                /*
                 * We can have a combination of device and data nodes, e.g. with
                 * hierarchical _DSD properties. Make sure the adev pointer is
                 * restored before going through data nodes, otherwise we will
                 * be looking for data_nodes below the last device found instead
                 * of the common fwnode shared by device_nodes and data_nodes.
                 */
                adev = to_acpi_device_node(fwnode);
                if (adev)
                        head = &adev->data.subnodes;
                else if (data)
                        head = &data->data.subnodes;
                else
                        return NULL;

                if (list_empty(head))
                        return NULL;

                if (child) {
                        dn = to_acpi_data_node(child);
                        next = dn->sibling.next;
                        if (next == head)
                                return NULL;

                        dn = list_entry(next, struct acpi_data_node, sibling);
                } else {
                        dn = list_first_entry(head, struct acpi_data_node, sibling);
                }
                return &dn->fwnode;
        }
        return NULL;
}

/**
 * acpi_node_get_parent - Return parent fwnode of this fwnode
 * @fwnode: Firmware node whose parent to get
 *
 * Returns parent node of an ACPI device or data firmware node or %NULL if
 * not available.
 */
static struct fwnode_handle *
acpi_node_get_parent(const struct fwnode_handle *fwnode)
{
        if (is_acpi_data_node(fwnode)) {
                /* All data nodes have parent pointer so just return that */
                return to_acpi_data_node(fwnode)->parent;
        }
        if (is_acpi_device_node(fwnode)) {
                struct acpi_device *parent;

                parent = acpi_dev_parent(to_acpi_device_node(fwnode));
                if (parent)
                        return acpi_fwnode_handle(parent);
        }

        return NULL;
}

/*
 * Return true if the node is an ACPI graph node. Called on either ports
 * or endpoints.
 */
static bool is_acpi_graph_node(struct fwnode_handle *fwnode,
                               const char *str)
{
        unsigned int len = strlen(str);
        const char *name;

        if (!len || !is_acpi_data_node(fwnode))
                return false;

        name = to_acpi_data_node(fwnode)->name;

        return (fwnode_property_present(fwnode, "reg") &&
                !strncmp(name, str, len) && name[len] == '@') ||
                fwnode_property_present(fwnode, str);
}

/**
 * acpi_graph_get_next_endpoint - Get next endpoint ACPI firmware node
 * @fwnode: Pointer to the parent firmware node
 * @prev: Previous endpoint node or %NULL to get the first
 *
 * Looks up next endpoint ACPI firmware node below a given @fwnode. Returns
 * %NULL if there is no next endpoint or in case of error. In case of success
 * the next endpoint is returned.
 */
static struct fwnode_handle *acpi_graph_get_next_endpoint(
        const struct fwnode_handle *fwnode, struct fwnode_handle *prev)
{
        struct fwnode_handle *port = NULL;
        struct fwnode_handle *endpoint;

        if (!prev) {
                do {
                        port = fwnode_get_next_child_node(fwnode, port);
                        /*
                         * The names of the port nodes begin with "port@"
                         * followed by the number of the port node and they also
                         * have a "reg" property that also has the number of the
                         * port node. For compatibility reasons a node is also
                         * recognised as a port node from the "port" property.
                         */
                        if (is_acpi_graph_node(port, "port"))
                                break;
                } while (port);
        } else {
                port = fwnode_get_parent(prev);
        }

        if (!port)
                return NULL;

        endpoint = fwnode_get_next_child_node(port, prev);
        while (!endpoint) {
                port = fwnode_get_next_child_node(fwnode, port);
                if (!port)
                        break;
                if (is_acpi_graph_node(port, "port"))
                        endpoint = fwnode_get_next_child_node(port, NULL);
        }

        /*
         * The names of the endpoint nodes begin with "endpoint@" followed by
         * the number of the endpoint node and they also have a "reg" property
         * that also has the number of the endpoint node. For compatibility
         * reasons a node is also recognised as an endpoint node from the
         * "endpoint" property.
         */
        if (!is_acpi_graph_node(endpoint, "endpoint"))
                return NULL;

        return endpoint;
}

/**
 * acpi_graph_get_child_prop_value - Return a child with a given property value
 * @fwnode: device fwnode
 * @prop_name: The name of the property to look for
 * @val: the desired property value
 *
 * Return the port node corresponding to a given port number. Returns
 * the child node on success, NULL otherwise.
 */
static struct fwnode_handle *acpi_graph_get_child_prop_value(
        const struct fwnode_handle *fwnode, const char *prop_name,
        unsigned int val)
{
        struct fwnode_handle *child;

        fwnode_for_each_child_node(fwnode, child) {
                u32 nr;

                if (fwnode_property_read_u32(child, prop_name, &nr))
                        continue;

                if (val == nr)
                        return child;
        }

        return NULL;
}


/**
 * acpi_graph_get_remote_endpoint - Parses and returns remote end of an endpoint
 * @__fwnode: Endpoint firmware node pointing to a remote device
 *
 * Returns the remote endpoint corresponding to @__fwnode. NULL on error.
 */
static struct fwnode_handle *
acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode)
{
        struct fwnode_handle *fwnode;
        unsigned int port_nr, endpoint_nr;
        struct fwnode_reference_args args;
        int ret;

        memset(&args, 0, sizeof(args));
        ret = acpi_node_get_property_reference(__fwnode, "remote-endpoint", 0,
                                               &args);
        if (ret)
                return NULL;

        /* Direct endpoint reference? */
        if (!is_acpi_device_node(args.fwnode))
                return args.nargs ? NULL : args.fwnode;

        /*
         * Always require two arguments with the reference: port and
         * endpoint indices.
         */
        if (args.nargs != 2)
                return NULL;

        fwnode = args.fwnode;
        port_nr = args.args[0];
        endpoint_nr = args.args[1];

        fwnode = acpi_graph_get_child_prop_value(fwnode, "port", port_nr);

        return acpi_graph_get_child_prop_value(fwnode, "endpoint", endpoint_nr);
}

static bool acpi_fwnode_device_is_available(const struct fwnode_handle *fwnode)
{
        if (!is_acpi_device_node(fwnode))
                return false;

        return acpi_device_is_present(to_acpi_device_node(fwnode));
}

static const void *
acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
                                  const struct device *dev)
{
        return acpi_device_get_match_data(dev);
}

static bool acpi_fwnode_device_dma_supported(const struct fwnode_handle *fwnode)
{
        return acpi_dma_supported(to_acpi_device_node(fwnode));
}

static enum dev_dma_attr
acpi_fwnode_device_get_dma_attr(const struct fwnode_handle *fwnode)
{
        return acpi_get_dma_attr(to_acpi_device_node(fwnode));
}

static bool acpi_fwnode_property_present(const struct fwnode_handle *fwnode,
                                         const char *propname)
{
        return !acpi_node_prop_get(fwnode, propname, NULL);
}

static int
acpi_fwnode_property_read_int_array(const struct fwnode_handle *fwnode,
                                    const char *propname,
                                    unsigned int elem_size, void *val,
                                    size_t nval)
{
        enum dev_prop_type type;

        switch (elem_size) {
        case sizeof(u8):
                type = DEV_PROP_U8;
                break;
        case sizeof(u16):
                type = DEV_PROP_U16;
                break;
        case sizeof(u32):
                type = DEV_PROP_U32;
                break;
        case sizeof(u64):
                type = DEV_PROP_U64;
                break;
        default:
                return -ENXIO;
        }

        return acpi_node_prop_read(fwnode, propname, type, val, nval);
}

static int
acpi_fwnode_property_read_string_array(const struct fwnode_handle *fwnode,
                                       const char *propname, const char **val,
                                       size_t nval)
{
        return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
                                   val, nval);
}

static int
acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
                               const char *prop, const char *nargs_prop,
                               unsigned int args_count, unsigned int index,
                               struct fwnode_reference_args *args)
{
        return __acpi_node_get_property_reference(fwnode, prop, index,
                                                  args_count, args);
}

static const char *acpi_fwnode_get_name(const struct fwnode_handle *fwnode)
{
        const struct acpi_device *adev;
        struct fwnode_handle *parent;

        /* Is this the root node? */
        parent = fwnode_get_parent(fwnode);
        if (!parent)
                return "\\";

        fwnode_handle_put(parent);

        if (is_acpi_data_node(fwnode)) {
                const struct acpi_data_node *dn = to_acpi_data_node(fwnode);

                return dn->name;
        }

        adev = to_acpi_device_node(fwnode);
        if (WARN_ON(!adev))
                return NULL;

        return acpi_device_bid(adev);
}

static const char *
acpi_fwnode_get_name_prefix(const struct fwnode_handle *fwnode)
{
        struct fwnode_handle *parent;

        /* Is this the root node? */
        parent = fwnode_get_parent(fwnode);
        if (!parent)
                return "";

        /* Is this 2nd node from the root? */
        parent = fwnode_get_next_parent(parent);
        if (!parent)
                return "";

        fwnode_handle_put(parent);

        /* ACPI device or data node. */
        return ".";
}

static struct fwnode_handle *
acpi_fwnode_get_parent(struct fwnode_handle *fwnode)
{
        return acpi_node_get_parent(fwnode);
}

static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
                                            struct fwnode_endpoint *endpoint)
{
        struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);

        endpoint->local_fwnode = fwnode;

        if (fwnode_property_read_u32(port_fwnode, "reg", &endpoint->port))
                fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
        if (fwnode_property_read_u32(fwnode, "reg", &endpoint->id))
                fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);

        return 0;
}

static int acpi_fwnode_irq_get(const struct fwnode_handle *fwnode,
                               unsigned int index)
{
        struct resource res;
        int ret;

        ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), index, &res);
        if (ret)
                return ret;

        return res.start;
}

#define DECLARE_ACPI_FWNODE_OPS(ops) \
        const struct fwnode_operations ops = {                                \
                .device_is_available = acpi_fwnode_device_is_available, \
                .device_get_match_data = acpi_fwnode_device_get_match_data, \
                .device_dma_supported =                                \
                        acpi_fwnode_device_dma_supported,                \
                .device_get_dma_attr = acpi_fwnode_device_get_dma_attr,        \
                .property_present = acpi_fwnode_property_present,        \
                .property_read_int_array =                                \
                        acpi_fwnode_property_read_int_array,                \
                .property_read_string_array =                                \
                        acpi_fwnode_property_read_string_array,                \
                .get_parent = acpi_node_get_parent,                        \
                .get_next_child_node = acpi_get_next_subnode,                \
                .get_named_child_node = acpi_fwnode_get_named_child_node, \
                .get_name = acpi_fwnode_get_name,                        \
                .get_name_prefix = acpi_fwnode_get_name_prefix,                \
                .get_reference_args = acpi_fwnode_get_reference_args,        \
                .graph_get_next_endpoint =                                \
                        acpi_graph_get_next_endpoint,                        \
                .graph_get_remote_endpoint =                                \
                        acpi_graph_get_remote_endpoint,                        \
                .graph_get_port_parent = acpi_fwnode_get_parent,        \
                .graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint, \
                .irq_get = acpi_fwnode_irq_get,                                \
        };                                                                \
        EXPORT_SYMBOL_GPL(ops)

DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);
DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops);
const struct fwnode_operations acpi_static_fwnode_ops;

bool is_acpi_device_node(const struct fwnode_handle *fwnode)
{
        return !IS_ERR_OR_NULL(fwnode) &&
                fwnode->ops == &acpi_device_fwnode_ops;
}
EXPORT_SYMBOL(is_acpi_device_node);

bool is_acpi_data_node(const struct fwnode_handle *fwnode)
{
        return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
}
EXPORT_SYMBOL(is_acpi_data_node);









































































































































































































































































































































































  256 








  256 





  256 











  255 





















  255 
  254 









    2 


  256 















  256 
  255 






  255 

























  256 











  256 
  256 








































  255 






  256 


  254 











   12 

   12 




   12 

















   12 







   12 
   12 
























   12 

   12 












   12 















  256 
  255 



  255 




























  256 




   12 










































































   12 



















    2 









































    2 











    2 










    2 










































  254 








  254 
  255 































    2 












































  254 


  256 







    2 












  256 



















  255 


  256 



  256 










  256 

  255 


















  256 

  255 

  255 





















  256 


  256 


  255 










  256 



























































































  254 


  256 

  256 








  255 


















  256 












  256 



  256 

  256 
  255 















  256 




  256 



  256 





























































































































































































































  254 
















  254 







  256 



  256 




  254 
  254 

  256 


























    1 













  256 
  256 



  256 





































  256 








  255 

  256 






  256 
  256 



























  256 
  255 

    2 








  255 

  256 

  256 





  256 








  256 





  254 
  255 

































  256 




















































  255 











































  256 


  256 










  256 



  256 



  256 


  255 


  256 



















  256 









  256 

  256 








  256 
  255 



  253 











  256 
  256 
  256 















  256 



  255 
  256 


  256 













  254 




  256 






























  253 






























































































































  256 
  256 








  256 






















  256 
  256 





  256 




























  254 











































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
// SPDX-License-Identifier: GPL-2.0

#include <linux/kernel.h>
#include <linux/irqflags.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/bug.h>
#include "printk_ringbuffer.h"
#include "internal.h"

/**
 * DOC: printk_ringbuffer overview
 *
 * Data Structure
 * --------------
 * The printk_ringbuffer is made up of 3 internal ringbuffers:
 *
 *   desc_ring
 *     A ring of descriptors and their meta data (such as sequence number,
 *     timestamp, loglevel, etc.) as well as internal state information about
 *     the record and logical positions specifying where in the other
 *     ringbuffer the text strings are located.
 *
 *   text_data_ring
 *     A ring of data blocks. A data block consists of an unsigned long
 *     integer (ID) that maps to a desc_ring index followed by the text
 *     string of the record.
 *
 * The internal state information of a descriptor is the key element to allow
 * readers and writers to locklessly synchronize access to the data.
 *
 * Implementation
 * --------------
 *
 * Descriptor Ring
 * ~~~~~~~~~~~~~~~
 * The descriptor ring is an array of descriptors. A descriptor contains
 * essential meta data to track the data of a printk record using
 * blk_lpos structs pointing to associated text data blocks (see
 * "Data Rings" below). Each descriptor is assigned an ID that maps
 * directly to index values of the descriptor array and has a state. The ID
 * and the state are bitwise combined into a single descriptor field named
 * @state_var, allowing ID and state to be synchronously and atomically
 * updated.
 *
 * Descriptors have four states:
 *
 *   reserved
 *     A writer is modifying the record.
 *
 *   committed
 *     The record and all its data are written. A writer can reopen the
 *     descriptor (transitioning it back to reserved), but in the committed
 *     state the data is consistent.
 *
 *   finalized
 *     The record and all its data are complete and available for reading. A
 *     writer cannot reopen the descriptor.
 *
 *   reusable
 *     The record exists, but its text and/or meta data may no longer be
 *     available.
 *
 * Querying the @state_var of a record requires providing the ID of the
 * descriptor to query. This can yield a possible fifth (pseudo) state:
 *
 *   miss
 *     The descriptor being queried has an unexpected ID.
 *
 * The descriptor ring has a @tail_id that contains the ID of the oldest
 * descriptor and @head_id that contains the ID of the newest descriptor.
 *
 * When a new descriptor should be created (and the ring is full), the tail
 * descriptor is invalidated by first transitioning to the reusable state and
 * then invalidating all tail data blocks up to and including the data blocks
 * associated with the tail descriptor (for the text ring). Then
 * @tail_id is advanced, followed by advancing @head_id. And finally the
 * @state_var of the new descriptor is initialized to the new ID and reserved
 * state.
 *
 * The @tail_id can only be advanced if the new @tail_id would be in the
 * committed or reusable queried state. This makes it possible that a valid
 * sequence number of the tail is always available.
 *
 * Descriptor Finalization
 * ~~~~~~~~~~~~~~~~~~~~~~~
 * When a writer calls the commit function prb_commit(), record data is
 * fully stored and is consistent within the ringbuffer. However, a writer can
 * reopen that record, claiming exclusive access (as with prb_reserve()), and
 * modify that record. When finished, the writer must again commit the record.
 *
 * In order for a record to be made available to readers (and also become
 * recyclable for writers), it must be finalized. A finalized record cannot be
 * reopened and can never become "unfinalized". Record finalization can occur
 * in three different scenarios:
 *
 *   1) A writer can simultaneously commit and finalize its record by calling
 *      prb_final_commit() instead of prb_commit().
 *
 *   2) When a new record is reserved and the previous record has been
 *      committed via prb_commit(), that previous record is automatically
 *      finalized.
 *
 *   3) When a record is committed via prb_commit() and a newer record
 *      already exists, the record being committed is automatically finalized.
 *
 * Data Ring
 * ~~~~~~~~~
 * The text data ring is a byte array composed of data blocks. Data blocks are
 * referenced by blk_lpos structs that point to the logical position of the
 * beginning of a data block and the beginning of the next adjacent data
 * block. Logical positions are mapped directly to index values of the byte
 * array ringbuffer.
 *
 * Each data block consists of an ID followed by the writer data. The ID is
 * the identifier of a descriptor that is associated with the data block. A
 * given data block is considered valid if all of the following conditions
 * are met:
 *
 *   1) The descriptor associated with the data block is in the committed
 *      or finalized queried state.
 *
 *   2) The blk_lpos struct within the descriptor associated with the data
 *      block references back to the same data block.
 *
 *   3) The data block is within the head/tail logical position range.
 *
 * If the writer data of a data block would extend beyond the end of the
 * byte array, only the ID of the data block is stored at the logical
 * position and the full data block (ID and writer data) is stored at the
 * beginning of the byte array. The referencing blk_lpos will point to the
 * ID before the wrap and the next data block will be at the logical
 * position adjacent the full data block after the wrap.
 *
 * Data rings have a @tail_lpos that points to the beginning of the oldest
 * data block and a @head_lpos that points to the logical position of the
 * next (not yet existing) data block.
 *
 * When a new data block should be created (and the ring is full), tail data
 * blocks will first be invalidated by putting their associated descriptors
 * into the reusable state and then pushing the @tail_lpos forward beyond
 * them. Then the @head_lpos is pushed forward and is associated with a new
 * descriptor. If a data block is not valid, the @tail_lpos cannot be
 * advanced beyond it.
 *
 * Info Array
 * ~~~~~~~~~~
 * The general meta data of printk records are stored in printk_info structs,
 * stored in an array with the same number of elements as the descriptor ring.
 * Each info corresponds to the descriptor of the same index in the
 * descriptor ring. Info validity is confirmed by evaluating the corresponding
 * descriptor before and after loading the info.
 *
 * Usage
 * -----
 * Here are some simple examples demonstrating writers and readers. For the
 * examples a global ringbuffer (test_rb) is available (which is not the
 * actual ringbuffer used by printk)::
 *
 *        DEFINE_PRINTKRB(test_rb, 15, 5);
 *
 * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
 * 1 MiB (2 ^ (15 + 5)) for text data.
 *
 * Sample writer code::
 *
 *        const char *textstr = "message text";
 *        struct prb_reserved_entry e;
 *        struct printk_record r;
 *
 *        // specify how much to allocate
 *        prb_rec_init_wr(&r, strlen(textstr) + 1);
 *
 *        if (prb_reserve(&e, &test_rb, &r)) {
 *                snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
 *
 *                r.info->text_len = strlen(textstr);
 *                r.info->ts_nsec = local_clock();
 *                r.info->caller_id = printk_caller_id();
 *
 *                // commit and finalize the record
 *                prb_final_commit(&e);
 *        }
 *
 * Note that additional writer functions are available to extend a record
 * after it has been committed but not yet finalized. This can be done as
 * long as no new records have been reserved and the caller is the same.
 *
 * Sample writer code (record extending)::
 *
 *                // alternate rest of previous example
 *
 *                r.info->text_len = strlen(textstr);
 *                r.info->ts_nsec = local_clock();
 *                r.info->caller_id = printk_caller_id();
 *
 *                // commit the record (but do not finalize yet)
 *                prb_commit(&e);
 *        }
 *
 *        ...
 *
 *        // specify additional 5 bytes text space to extend
 *        prb_rec_init_wr(&r, 5);
 *
 *        // try to extend, but only if it does not exceed 32 bytes
 *        if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) {
 *                snprintf(&r.text_buf[r.info->text_len],
 *                         r.text_buf_size - r.info->text_len, "hello");
 *
 *                r.info->text_len += 5;
 *
 *                // commit and finalize the record
 *                prb_final_commit(&e);
 *        }
 *
 * Sample reader code::
 *
 *        struct printk_info info;
 *        struct printk_record r;
 *        char text_buf[32];
 *        u64 seq;
 *
 *        prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
 *
 *        prb_for_each_record(0, &test_rb, &seq, &r) {
 *                if (info.seq != seq)
 *                        pr_warn("lost %llu records\n", info.seq - seq);
 *
 *                if (info.text_len > r.text_buf_size) {
 *                        pr_warn("record %llu text truncated\n", info.seq);
 *                        text_buf[r.text_buf_size - 1] = 0;
 *                }
 *
 *                pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
 *                        &text_buf[0]);
 *        }
 *
 * Note that additional less convenient reader functions are available to
 * allow complex record access.
 *
 * ABA Issues
 * ~~~~~~~~~~
 * To help avoid ABA issues, descriptors are referenced by IDs (array index
 * values combined with tagged bits counting array wraps) and data blocks are
 * referenced by logical positions (array index values combined with tagged
 * bits counting array wraps). However, on 32-bit systems the number of
 * tagged bits is relatively small such that an ABA incident is (at least
 * theoretically) possible. For example, if 4 million maximally sized (1KiB)
 * printk messages were to occur in NMI context on a 32-bit system, the
 * interrupted context would not be able to recognize that the 32-bit integer
 * completely wrapped and thus represents a different data block than the one
 * the interrupted context expects.
 *
 * To help combat this possibility, additional state checking is performed
 * (such as using cmpxchg() even though set() would suffice). These extra
 * checks are commented as such and will hopefully catch any ABA issue that
 * a 32-bit system might experience.
 *
 * Memory Barriers
 * ~~~~~~~~~~~~~~~
 * Multiple memory barriers are used. To simplify proving correctness and
 * generating litmus tests, lines of code related to memory barriers
 * (loads, stores, and the associated memory barriers) are labeled::
 *
 *        LMM(function:letter)
 *
 * Comments reference the labels using only the "function:letter" part.
 *
 * The memory barrier pairs and their ordering are:
 *
 *   desc_reserve:D / desc_reserve:B
 *     push descriptor tail (id), then push descriptor head (id)
 *
 *   desc_reserve:D / data_push_tail:B
 *     push data tail (lpos), then set new descriptor reserved (state)
 *
 *   desc_reserve:D / desc_push_tail:C
 *     push descriptor tail (id), then set new descriptor reserved (state)
 *
 *   desc_reserve:D / prb_first_seq:C
 *     push descriptor tail (id), then set new descriptor reserved (state)
 *
 *   desc_reserve:F / desc_read:D
 *     set new descriptor id and reserved (state), then allow writer changes
 *
 *   data_alloc:A (or data_realloc:A) / desc_read:D
 *     set old descriptor reusable (state), then modify new data block area
 *
 *   data_alloc:A (or data_realloc:A) / data_push_tail:B
 *     push data tail (lpos), then modify new data block area
 *
 *   _prb_commit:B / desc_read:B
 *     store writer changes, then set new descriptor committed (state)
 *
 *   desc_reopen_last:A / _prb_commit:B
 *     set descriptor reserved (state), then read descriptor data
 *
 *   _prb_commit:B / desc_reserve:D
 *     set new descriptor committed (state), then check descriptor head (id)
 *
 *   data_push_tail:D / data_push_tail:A
 *     set descriptor reusable (state), then push data tail (lpos)
 *
 *   desc_push_tail:B / desc_reserve:D
 *     set descriptor reusable (state), then push descriptor tail (id)
 *
 *   desc_update_last_finalized:A / desc_last_finalized_seq:A
 *     store finalized record, then set new highest finalized sequence number
 */

#define DATA_SIZE(data_ring)                _DATA_SIZE((data_ring)->size_bits)
#define DATA_SIZE_MASK(data_ring)        (DATA_SIZE(data_ring) - 1)

#define DESCS_COUNT(desc_ring)                _DESCS_COUNT((desc_ring)->count_bits)
#define DESCS_COUNT_MASK(desc_ring)        (DESCS_COUNT(desc_ring) - 1)

/* Determine the data array index from a logical position. */
#define DATA_INDEX(data_ring, lpos)        ((lpos) & DATA_SIZE_MASK(data_ring))

/* Determine the desc array index from an ID or sequence number. */
#define DESC_INDEX(desc_ring, n)        ((n) & DESCS_COUNT_MASK(desc_ring))

/* Determine how many times the data array has wrapped. */
#define DATA_WRAPS(data_ring, lpos)        ((lpos) >> (data_ring)->size_bits)

/* Determine if a logical position refers to a data-less block. */
#define LPOS_DATALESS(lpos)                ((lpos) & 1UL)
#define BLK_DATALESS(blk)                (LPOS_DATALESS((blk)->begin) && \
                                         LPOS_DATALESS((blk)->next))

/* Get the logical position at index 0 of the current wrap. */
#define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
((lpos) & ~DATA_SIZE_MASK(data_ring))

/* Get the ID for the same index of the previous wrap as the given ID. */
#define DESC_ID_PREV_WRAP(desc_ring, id) \
DESC_ID((id) - DESCS_COUNT(desc_ring))

/*
 * A data block: mapped directly to the beginning of the data block area
 * specified as a logical position within the data ring.
 *
 * @id:   the ID of the associated descriptor
 * @data: the writer data
 *
 * Note that the size of a data block is only known by its associated
 * descriptor.
 */
struct prb_data_block {
        unsigned long        id;
        char                data[];
};

/*
 * Return the descriptor associated with @n. @n can be either a
 * descriptor ID or a sequence number.
 */
static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
{
        return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
}

/*
 * Return the printk_info associated with @n. @n can be either a
 * descriptor ID or a sequence number.
 */
static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
{
        return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
}

static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
                                       unsigned long begin_lpos)
{
        return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
}

/*
 * Increase the data size to account for data block meta data plus any
 * padding so that the adjacent data block is aligned on the ID size.
 */
static unsigned int to_blk_size(unsigned int size)
{
        struct prb_data_block *db = NULL;

        size += sizeof(*db);
        size = ALIGN(size, sizeof(db->id));
        return size;
}

/*
 * Sanity checker for reserve size. The ringbuffer code assumes that a data
 * block does not exceed the maximum possible size that could fit within the
 * ringbuffer. This function provides that basic size check so that the
 * assumption is safe.
 */
static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
{
        struct prb_data_block *db = NULL;

        if (size == 0)
                return true;

        /*
         * Ensure the alignment padded size could possibly fit in the data
         * array. The largest possible data block must still leave room for
         * at least the ID of the next block.
         */
        size = to_blk_size(size);
        if (size > DATA_SIZE(data_ring) - sizeof(db->id))
                return false;

        return true;
}

/* Query the state of a descriptor. */
static enum desc_state get_desc_state(unsigned long id,
                                      unsigned long state_val)
{
        if (id != DESC_ID(state_val))
                return desc_miss;

        return DESC_STATE(state_val);
}

/*
 * Get a copy of a specified descriptor and return its queried state. If the
 * descriptor is in an inconsistent state (miss or reserved), the caller can
 * only expect the descriptor's @state_var field to be valid.
 *
 * The sequence number and caller_id can be optionally retrieved. Like all
 * non-state_var data, they are only valid if the descriptor is in a
 * consistent state.
 */
static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
                                 unsigned long id, struct prb_desc *desc_out,
                                 u64 *seq_out, u32 *caller_id_out)
{
        struct printk_info *info = to_info(desc_ring, id);
        struct prb_desc *desc = to_desc(desc_ring, id);
        atomic_long_t *state_var = &desc->state_var;
        enum desc_state d_state;
        unsigned long state_val;

        /* Check the descriptor state. */
        state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
        d_state = get_desc_state(id, state_val);
        if (d_state == desc_miss || d_state == desc_reserved) {
                /*
                 * The descriptor is in an inconsistent state. Set at least
                 * @state_var so that the caller can see the details of
                 * the inconsistent state.
                 */
                goto out;
        }

        /*
         * Guarantee the state is loaded before copying the descriptor
         * content. This avoids copying obsolete descriptor content that might
         * not apply to the descriptor state. This pairs with _prb_commit:B.
         *
         * Memory barrier involvement:
         *
         * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
         * from _prb_commit:A.
         *
         * Relies on:
         *
         * WMB from _prb_commit:A to _prb_commit:B
         *    matching
         * RMB from desc_read:A to desc_read:C
         */
        smp_rmb(); /* LMM(desc_read:B) */

        /*
         * Copy the descriptor data. The data is not valid until the
         * state has been re-checked. A memcpy() for all of @desc
         * cannot be used because of the atomic_t @state_var field.
         */
        if (desc_out) {
                memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
                       sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
        }
        if (seq_out)
                *seq_out = info->seq; /* also part of desc_read:C */
        if (caller_id_out)
                *caller_id_out = info->caller_id; /* also part of desc_read:C */

        /*
         * 1. Guarantee the descriptor content is loaded before re-checking
         *    the state. This avoids reading an obsolete descriptor state
         *    that may not apply to the copied content. This pairs with
         *    desc_reserve:F.
         *
         *    Memory barrier involvement:
         *
         *    If desc_read:C reads from desc_reserve:G, then desc_read:E
         *    reads from desc_reserve:F.
         *
         *    Relies on:
         *
         *    WMB from desc_reserve:F to desc_reserve:G
         *       matching
         *    RMB from desc_read:C to desc_read:E
         *
         * 2. Guarantee the record data is loaded before re-checking the
         *    state. This avoids reading an obsolete descriptor state that may
         *    not apply to the copied data. This pairs with data_alloc:A and
         *    data_realloc:A.
         *
         *    Memory barrier involvement:
         *
         *    If copy_data:A reads from data_alloc:B, then desc_read:E
         *    reads from desc_make_reusable:A.
         *
         *    Relies on:
         *
         *    MB from desc_make_reusable:A to data_alloc:B
         *       matching
         *    RMB from desc_read:C to desc_read:E
         *
         *    Note: desc_make_reusable:A and data_alloc:B can be different
         *          CPUs. However, the data_alloc:B CPU (which performs the
         *          full memory barrier) must have previously seen
         *          desc_make_reusable:A.
         */
        smp_rmb(); /* LMM(desc_read:D) */

        /*
         * The data has been copied. Return the current descriptor state,
         * which may have changed since the load above.
         */
        state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
        d_state = get_desc_state(id, state_val);
out:
        if (desc_out)
                atomic_long_set(&desc_out->state_var, state_val);
        return d_state;
}

/*
 * Take a specified descriptor out of the finalized state by attempting
 * the transition from finalized to reusable. Either this context or some
 * other context will have been successful.
 */
static void desc_make_reusable(struct prb_desc_ring *desc_ring,
                               unsigned long id)
{
        unsigned long val_finalized = DESC_SV(id, desc_finalized);
        unsigned long val_reusable = DESC_SV(id, desc_reusable);
        struct prb_desc *desc = to_desc(desc_ring, id);
        atomic_long_t *state_var = &desc->state_var;

        atomic_long_cmpxchg_relaxed(state_var, val_finalized,
                                    val_reusable); /* LMM(desc_make_reusable:A) */
}

/*
 * Given the text data ring, put the associated descriptor of each
 * data block from @lpos_begin until @lpos_end into the reusable state.
 *
 * If there is any problem making the associated descriptor reusable, either
 * the descriptor has not yet been finalized or another writer context has
 * already pushed the tail lpos past the problematic data block. Regardless,
 * on error the caller can re-load the tail lpos to determine the situation.
 */
static bool data_make_reusable(struct printk_ringbuffer *rb,
                               unsigned long lpos_begin,
                               unsigned long lpos_end,
                               unsigned long *lpos_out)
{

        struct prb_data_ring *data_ring = &rb->text_data_ring;
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        struct prb_data_block *blk;
        enum desc_state d_state;
        struct prb_desc desc;
        struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
        unsigned long id;

        /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
        while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
                blk = to_block(data_ring, lpos_begin);

                /*
                 * Load the block ID from the data block. This is a data race
                 * against a writer that may have newly reserved this data
                 * area. If the loaded value matches a valid descriptor ID,
                 * the blk_lpos of that descriptor will be checked to make
                 * sure it points back to this data block. If the check fails,
                 * the data area has been recycled by another writer.
                 */
                id = blk->id; /* LMM(data_make_reusable:A) */

                d_state = desc_read(desc_ring, id, &desc,
                                    NULL, NULL); /* LMM(data_make_reusable:B) */

                switch (d_state) {
                case desc_miss:
                case desc_reserved:
                case desc_committed:
                        return false;
                case desc_finalized:
                        /*
                         * This data block is invalid if the descriptor
                         * does not point back to it.
                         */
                        if (blk_lpos->begin != lpos_begin)
                                return false;
                        desc_make_reusable(desc_ring, id);
                        break;
                case desc_reusable:
                        /*
                         * This data block is invalid if the descriptor
                         * does not point back to it.
                         */
                        if (blk_lpos->begin != lpos_begin)
                                return false;
                        break;
                }

                /* Advance @lpos_begin to the next data block. */
                lpos_begin = blk_lpos->next;
        }

        *lpos_out = lpos_begin;
        return true;
}

/*
 * Advance the data ring tail to at least @lpos. This function puts
 * descriptors into the reusable state if the tail is pushed beyond
 * their associated data block.
 */
static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos)
{
        struct prb_data_ring *data_ring = &rb->text_data_ring;
        unsigned long tail_lpos_new;
        unsigned long tail_lpos;
        unsigned long next_lpos;

        /* If @lpos is from a data-less block, there is nothing to do. */
        if (LPOS_DATALESS(lpos))
                return true;

        /*
         * Any descriptor states that have transitioned to reusable due to the
         * data tail being pushed to this loaded value will be visible to this
         * CPU. This pairs with data_push_tail:D.
         *
         * Memory barrier involvement:
         *
         * If data_push_tail:A reads from data_push_tail:D, then this CPU can
         * see desc_make_reusable:A.
         *
         * Relies on:
         *
         * MB from desc_make_reusable:A to data_push_tail:D
         *    matches
         * READFROM from data_push_tail:D to data_push_tail:A
         *    thus
         * READFROM from desc_make_reusable:A to this CPU
         */
        tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */

        /*
         * Loop until the tail lpos is at or beyond @lpos. This condition
         * may already be satisfied, resulting in no full memory barrier
         * from data_push_tail:D being performed. However, since this CPU
         * sees the new tail lpos, any descriptor states that transitioned to
         * the reusable state must already be visible.
         */
        while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
                /*
                 * Make all descriptors reusable that are associated with
                 * data blocks before @lpos.
                 */
                if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) {
                        /*
                         * 1. Guarantee the block ID loaded in
                         *    data_make_reusable() is performed before
                         *    reloading the tail lpos. The failed
                         *    data_make_reusable() may be due to a newly
                         *    recycled data area causing the tail lpos to
                         *    have been previously pushed. This pairs with
                         *    data_alloc:A and data_realloc:A.
                         *
                         *    Memory barrier involvement:
                         *
                         *    If data_make_reusable:A reads from data_alloc:B,
                         *    then data_push_tail:C reads from
                         *    data_push_tail:D.
                         *
                         *    Relies on:
                         *
                         *    MB from data_push_tail:D to data_alloc:B
                         *       matching
                         *    RMB from data_make_reusable:A to
                         *    data_push_tail:C
                         *
                         *    Note: data_push_tail:D and data_alloc:B can be
                         *          different CPUs. However, the data_alloc:B
                         *          CPU (which performs the full memory
                         *          barrier) must have previously seen
                         *          data_push_tail:D.
                         *
                         * 2. Guarantee the descriptor state loaded in
                         *    data_make_reusable() is performed before
                         *    reloading the tail lpos. The failed
                         *    data_make_reusable() may be due to a newly
                         *    recycled descriptor causing the tail lpos to
                         *    have been previously pushed. This pairs with
                         *    desc_reserve:D.
                         *
                         *    Memory barrier involvement:
                         *
                         *    If data_make_reusable:B reads from
                         *    desc_reserve:F, then data_push_tail:C reads
                         *    from data_push_tail:D.
                         *
                         *    Relies on:
                         *
                         *    MB from data_push_tail:D to desc_reserve:F
                         *       matching
                         *    RMB from data_make_reusable:B to
                         *    data_push_tail:C
                         *
                         *    Note: data_push_tail:D and desc_reserve:F can
                         *          be different CPUs. However, the
                         *          desc_reserve:F CPU (which performs the
                         *          full memory barrier) must have previously
                         *          seen data_push_tail:D.
                         */
                        smp_rmb(); /* LMM(data_push_tail:B) */

                        tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
                                                        ); /* LMM(data_push_tail:C) */
                        if (tail_lpos_new == tail_lpos)
                                return false;

                        /* Another CPU pushed the tail. Try again. */
                        tail_lpos = tail_lpos_new;
                        continue;
                }

                /*
                 * Guarantee any descriptor states that have transitioned to
                 * reusable are stored before pushing the tail lpos. A full
                 * memory barrier is needed since other CPUs may have made
                 * the descriptor states reusable. This pairs with
                 * data_push_tail:A.
                 */
                if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
                                            next_lpos)) { /* LMM(data_push_tail:D) */
                        break;
                }
        }

        return true;
}

/*
 * Advance the desc ring tail. This function advances the tail by one
 * descriptor, thus invalidating the oldest descriptor. Before advancing
 * the tail, the tail descriptor is made reusable and all data blocks up to
 * and including the descriptor's data block are invalidated (i.e. the data
 * ring tail is pushed past the data block of the descriptor being made
 * reusable).
 */
static bool desc_push_tail(struct printk_ringbuffer *rb,
                           unsigned long tail_id)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        enum desc_state d_state;
        struct prb_desc desc;

        d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);

        switch (d_state) {
        case desc_miss:
                /*
                 * If the ID is exactly 1 wrap behind the expected, it is
                 * in the process of being reserved by another writer and
                 * must be considered reserved.
                 */
                if (DESC_ID(atomic_long_read(&desc.state_var)) ==
                    DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
                        return false;
                }

                /*
                 * The ID has changed. Another writer must have pushed the
                 * tail and recycled the descriptor already. Success is
                 * returned because the caller is only interested in the
                 * specified tail being pushed, which it was.
                 */
                return true;
        case desc_reserved:
        case desc_committed:
                return false;
        case desc_finalized:
                desc_make_reusable(desc_ring, tail_id);
                break;
        case desc_reusable:
                break;
        }

        /*
         * Data blocks must be invalidated before their associated
         * descriptor can be made available for recycling. Invalidating
         * them later is not possible because there is no way to trust
         * data blocks once their associated descriptor is gone.
         */

        if (!data_push_tail(rb, desc.text_blk_lpos.next))
                return false;

        /*
         * Check the next descriptor after @tail_id before pushing the tail
         * to it because the tail must always be in a finalized or reusable
         * state. The implementation of prb_first_seq() relies on this.
         *
         * A successful read implies that the next descriptor is less than or
         * equal to @head_id so there is no risk of pushing the tail past the
         * head.
         */
        d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
                            NULL, NULL); /* LMM(desc_push_tail:A) */

        if (d_state == desc_finalized || d_state == desc_reusable) {
                /*
                 * Guarantee any descriptor states that have transitioned to
                 * reusable are stored before pushing the tail ID. This allows
                 * verifying the recycled descriptor state. A full memory
                 * barrier is needed since other CPUs may have made the
                 * descriptor states reusable. This pairs with desc_reserve:D.
                 */
                atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
                                    DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
        } else {
                /*
                 * Guarantee the last state load from desc_read() is before
                 * reloading @tail_id in order to see a new tail ID in the
                 * case that the descriptor has been recycled. This pairs
                 * with desc_reserve:D.
                 *
                 * Memory barrier involvement:
                 *
                 * If desc_push_tail:A reads from desc_reserve:F, then
                 * desc_push_tail:D reads from desc_push_tail:B.
                 *
                 * Relies on:
                 *
                 * MB from desc_push_tail:B to desc_reserve:F
                 *    matching
                 * RMB from desc_push_tail:A to desc_push_tail:D
                 *
                 * Note: desc_push_tail:B and desc_reserve:F can be different
                 *       CPUs. However, the desc_reserve:F CPU (which performs
                 *       the full memory barrier) must have previously seen
                 *       desc_push_tail:B.
                 */
                smp_rmb(); /* LMM(desc_push_tail:C) */

                /*
                 * Re-check the tail ID. The descriptor following @tail_id is
                 * not in an allowed tail state. But if the tail has since
                 * been moved by another CPU, then it does not matter.
                 */
                if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
                        return false;
        }

        return true;
}

/* Reserve a new descriptor, invalidating the oldest if necessary. */
static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        unsigned long prev_state_val;
        unsigned long id_prev_wrap;
        struct prb_desc *desc;
        unsigned long head_id;
        unsigned long id;

        head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */

        do {
                id = DESC_ID(head_id + 1);
                id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);

                /*
                 * Guarantee the head ID is read before reading the tail ID.
                 * Since the tail ID is updated before the head ID, this
                 * guarantees that @id_prev_wrap is never ahead of the tail
                 * ID. This pairs with desc_reserve:D.
                 *
                 * Memory barrier involvement:
                 *
                 * If desc_reserve:A reads from desc_reserve:D, then
                 * desc_reserve:C reads from desc_push_tail:B.
                 *
                 * Relies on:
                 *
                 * MB from desc_push_tail:B to desc_reserve:D
                 *    matching
                 * RMB from desc_reserve:A to desc_reserve:C
                 *
                 * Note: desc_push_tail:B and desc_reserve:D can be different
                 *       CPUs. However, the desc_reserve:D CPU (which performs
                 *       the full memory barrier) must have previously seen
                 *       desc_push_tail:B.
                 */
                smp_rmb(); /* LMM(desc_reserve:B) */

                if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
                                                    )) { /* LMM(desc_reserve:C) */
                        /*
                         * Make space for the new descriptor by
                         * advancing the tail.
                         */
                        if (!desc_push_tail(rb, id_prev_wrap))
                                return false;
                }

                /*
                 * 1. Guarantee the tail ID is read before validating the
                 *    recycled descriptor state. A read memory barrier is
                 *    sufficient for this. This pairs with desc_push_tail:B.
                 *
                 *    Memory barrier involvement:
                 *
                 *    If desc_reserve:C reads from desc_push_tail:B, then
                 *    desc_reserve:E reads from desc_make_reusable:A.
                 *
                 *    Relies on:
                 *
                 *    MB from desc_make_reusable:A to desc_push_tail:B
                 *       matching
                 *    RMB from desc_reserve:C to desc_reserve:E
                 *
                 *    Note: desc_make_reusable:A and desc_push_tail:B can be
                 *          different CPUs. However, the desc_push_tail:B CPU
                 *          (which performs the full memory barrier) must have
                 *          previously seen desc_make_reusable:A.
                 *
                 * 2. Guarantee the tail ID is stored before storing the head
                 *    ID. This pairs with desc_reserve:B.
                 *
                 * 3. Guarantee any data ring tail changes are stored before
                 *    recycling the descriptor. Data ring tail changes can
                 *    happen via desc_push_tail()->data_push_tail(). A full
                 *    memory barrier is needed since another CPU may have
                 *    pushed the data ring tails. This pairs with
                 *    data_push_tail:B.
                 *
                 * 4. Guarantee a new tail ID is stored before recycling the
                 *    descriptor. A full memory barrier is needed since
                 *    another CPU may have pushed the tail ID. This pairs
                 *    with desc_push_tail:C and this also pairs with
                 *    prb_first_seq:C.
                 *
                 * 5. Guarantee the head ID is stored before trying to
                 *    finalize the previous descriptor. This pairs with
                 *    _prb_commit:B.
                 */
        } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
                                          id)); /* LMM(desc_reserve:D) */

        desc = to_desc(desc_ring, id);

        /*
         * If the descriptor has been recycled, verify the old state val.
         * See "ABA Issues" about why this verification is performed.
         */
        prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
        if (prev_state_val &&
            get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
                WARN_ON_ONCE(1);
                return false;
        }

        /*
         * Assign the descriptor a new ID and set its state to reserved.
         * See "ABA Issues" about why cmpxchg() instead of set() is used.
         *
         * Guarantee the new descriptor ID and state is stored before making
         * any other changes. A write memory barrier is sufficient for this.
         * This pairs with desc_read:D.
         */
        if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
                        DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
                WARN_ON_ONCE(1);
                return false;
        }

        /* Now data in @desc can be modified: LMM(desc_reserve:G) */

        *id_out = id;
        return true;
}

/* Determine the end of a data block. */
static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
                                   unsigned long lpos, unsigned int size)
{
        unsigned long begin_lpos;
        unsigned long next_lpos;

        begin_lpos = lpos;
        next_lpos = lpos + size;

        /* First check if the data block does not wrap. */
        if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
                return next_lpos;

        /* Wrapping data blocks store their data at the beginning. */
        return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
}

/*
 * Allocate a new data block, invalidating the oldest data block(s)
 * if necessary. This function also associates the data block with
 * a specified descriptor.
 */
static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
                        struct prb_data_blk_lpos *blk_lpos, unsigned long id)
{
        struct prb_data_ring *data_ring = &rb->text_data_ring;
        struct prb_data_block *blk;
        unsigned long begin_lpos;
        unsigned long next_lpos;

        if (size == 0) {
                /*
                 * Data blocks are not created for empty lines. Instead, the
                 * reader will recognize these special lpos values and handle
                 * it appropriately.
                 */
                blk_lpos->begin = EMPTY_LINE_LPOS;
                blk_lpos->next = EMPTY_LINE_LPOS;
                return NULL;
        }

        size = to_blk_size(size);

        begin_lpos = atomic_long_read(&data_ring->head_lpos);

        do {
                next_lpos = get_next_lpos(data_ring, begin_lpos, size);

                if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
                        /* Failed to allocate, specify a data-less block. */
                        blk_lpos->begin = FAILED_LPOS;
                        blk_lpos->next = FAILED_LPOS;
                        return NULL;
                }

                /*
                 * 1. Guarantee any descriptor states that have transitioned
                 *    to reusable are stored before modifying the newly
                 *    allocated data area. A full memory barrier is needed
                 *    since other CPUs may have made the descriptor states
                 *    reusable. See data_push_tail:A about why the reusable
                 *    states are visible. This pairs with desc_read:D.
                 *
                 * 2. Guarantee any updated tail lpos is stored before
                 *    modifying the newly allocated data area. Another CPU may
                 *    be in data_make_reusable() and is reading a block ID
                 *    from this area. data_make_reusable() can handle reading
                 *    a garbage block ID value, but then it must be able to
                 *    load a new tail lpos. A full memory barrier is needed
                 *    since other CPUs may have updated the tail lpos. This
                 *    pairs with data_push_tail:B.
                 */
        } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos,
                                          next_lpos)); /* LMM(data_alloc:A) */

        blk = to_block(data_ring, begin_lpos);
        blk->id = id; /* LMM(data_alloc:B) */

        if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
                /* Wrapping data blocks store their data at the beginning. */
                blk = to_block(data_ring, 0);

                /*
                 * Store the ID on the wrapped block for consistency.
                 * The printk_ringbuffer does not actually use it.
                 */
                blk->id = id;
        }

        blk_lpos->begin = begin_lpos;
        blk_lpos->next = next_lpos;

        return &blk->data[0];
}

/*
 * Try to resize an existing data block associated with the descriptor
 * specified by @id. If the resized data block should become wrapped, it
 * copies the old data to the new data block. If @size yields a data block
 * with the same or less size, the data block is left as is.
 *
 * Fail if this is not the last allocated data block or if there is not
 * enough space or it is not possible make enough space.
 *
 * Return a pointer to the beginning of the entire data buffer or NULL on
 * failure.
 */
static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
                          struct prb_data_blk_lpos *blk_lpos, unsigned long id)
{
        struct prb_data_ring *data_ring = &rb->text_data_ring;
        struct prb_data_block *blk;
        unsigned long head_lpos;
        unsigned long next_lpos;
        bool wrapped;

        /* Reallocation only works if @blk_lpos is the newest data block. */
        head_lpos = atomic_long_read(&data_ring->head_lpos);
        if (head_lpos != blk_lpos->next)
                return NULL;

        /* Keep track if @blk_lpos was a wrapping data block. */
        wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));

        size = to_blk_size(size);

        next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);

        /* If the data block does not increase, there is nothing to do. */
        if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
                if (wrapped)
                        blk = to_block(data_ring, 0);
                else
                        blk = to_block(data_ring, blk_lpos->begin);
                return &blk->data[0];
        }

        if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring)))
                return NULL;

        /* The memory barrier involvement is the same as data_alloc:A. */
        if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
                                     next_lpos)) { /* LMM(data_realloc:A) */
                return NULL;
        }

        blk = to_block(data_ring, blk_lpos->begin);

        if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
                struct prb_data_block *old_blk = blk;

                /* Wrapping data blocks store their data at the beginning. */
                blk = to_block(data_ring, 0);

                /*
                 * Store the ID on the wrapped block for consistency.
                 * The printk_ringbuffer does not actually use it.
                 */
                blk->id = id;

                if (!wrapped) {
                        /*
                         * Since the allocated space is now in the newly
                         * created wrapping data block, copy the content
                         * from the old data block.
                         */
                        memcpy(&blk->data[0], &old_blk->data[0],
                               (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id));
                }
        }

        blk_lpos->next = next_lpos;

        return &blk->data[0];
}

/* Return the number of bytes used by a data block. */
static unsigned int space_used(struct prb_data_ring *data_ring,
                               struct prb_data_blk_lpos *blk_lpos)
{
        /* Data-less blocks take no space. */
        if (BLK_DATALESS(blk_lpos))
                return 0;

        if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
                /* Data block does not wrap. */
                return (DATA_INDEX(data_ring, blk_lpos->next) -
                        DATA_INDEX(data_ring, blk_lpos->begin));
        }

        /*
         * For wrapping data blocks, the trailing (wasted) space is
         * also counted.
         */
        return (DATA_INDEX(data_ring, blk_lpos->next) +
                DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
}

/*
 * Given @blk_lpos, return a pointer to the writer data from the data block
 * and calculate the size of the data part. A NULL pointer is returned if
 * @blk_lpos specifies values that could never be legal.
 *
 * This function (used by readers) performs strict validation on the lpos
 * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
 * triggered if an internal error is detected.
 */
static const char *get_data(struct prb_data_ring *data_ring,
                            struct prb_data_blk_lpos *blk_lpos,
                            unsigned int *data_size)
{
        struct prb_data_block *db;

        /* Data-less data block description. */
        if (BLK_DATALESS(blk_lpos)) {
                /*
                 * Records that are just empty lines are also valid, even
                 * though they do not have a data block. For such records
                 * explicitly return empty string data to signify success.
                 */
                if (blk_lpos->begin == EMPTY_LINE_LPOS &&
                    blk_lpos->next == EMPTY_LINE_LPOS) {
                        *data_size = 0;
                        return "";
                }

                /* Data lost, invalid, or otherwise unavailable. */
                return NULL;
        }

        /* Regular data block: @begin less than @next and in same wrap. */
        if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
            blk_lpos->begin < blk_lpos->next) {
                db = to_block(data_ring, blk_lpos->begin);
                *data_size = blk_lpos->next - blk_lpos->begin;

        /* Wrapping data block: @begin is one wrap behind @next. */
        } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
                   DATA_WRAPS(data_ring, blk_lpos->next)) {
                db = to_block(data_ring, 0);
                *data_size = DATA_INDEX(data_ring, blk_lpos->next);

        /* Illegal block description. */
        } else {
                WARN_ON_ONCE(1);
                return NULL;
        }

        /* A valid data block will always be aligned to the ID size. */
        if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
            WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
                return NULL;
        }

        /* A valid data block will always have at least an ID. */
        if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
                return NULL;

        /* Subtract block ID space from size to reflect data size. */
        *data_size -= sizeof(db->id);

        return &db->data[0];
}

/*
 * Attempt to transition the newest descriptor from committed back to reserved
 * so that the record can be modified by a writer again. This is only possible
 * if the descriptor is not yet finalized and the provided @caller_id matches.
 */
static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
                                         u32 caller_id, unsigned long *id_out)
{
        unsigned long prev_state_val;
        enum desc_state d_state;
        struct prb_desc desc;
        struct prb_desc *d;
        unsigned long id;
        u32 cid;

        id = atomic_long_read(&desc_ring->head_id);

        /*
         * To reduce unnecessarily reopening, first check if the descriptor
         * state and caller ID are correct.
         */
        d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
        if (d_state != desc_committed || cid != caller_id)
                return NULL;

        d = to_desc(desc_ring, id);

        prev_state_val = DESC_SV(id, desc_committed);

        /*
         * Guarantee the reserved state is stored before reading any
         * record data. A full memory barrier is needed because @state_var
         * modification is followed by reading. This pairs with _prb_commit:B.
         *
         * Memory barrier involvement:
         *
         * If desc_reopen_last:A reads from _prb_commit:B, then
         * prb_reserve_in_last:A reads from _prb_commit:A.
         *
         * Relies on:
         *
         * WMB from _prb_commit:A to _prb_commit:B
         *    matching
         * MB If desc_reopen_last:A to prb_reserve_in_last:A
         */
        if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
                        DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */
                return NULL;
        }

        *id_out = id;
        return d;
}

/**
 * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer
 *                         used by the newest record.
 *
 * @e:         The entry structure to setup.
 * @rb:        The ringbuffer to re-reserve and extend data in.
 * @r:         The record structure to allocate buffers for.
 * @caller_id: The caller ID of the caller (reserving writer).
 * @max_size:  Fail if the extended size would be greater than this.
 *
 * This is the public function available to writers to re-reserve and extend
 * data.
 *
 * The writer specifies the text size to extend (not the new total size) by
 * setting the @text_buf_size field of @r. To ensure proper initialization
 * of @r, prb_rec_init_wr() should be used.
 *
 * This function will fail if @caller_id does not match the caller ID of the
 * newest record. In that case the caller must reserve new data using
 * prb_reserve().
 *
 * Context: Any context. Disables local interrupts on success.
 * Return: true if text data could be extended, otherwise false.
 *
 * On success:
 *
 *   - @r->text_buf points to the beginning of the entire text buffer.
 *
 *   - @r->text_buf_size is set to the new total size of the buffer.
 *
 *   - @r->info is not touched so that @r->info->text_len could be used
 *     to append the text.
 *
 *   - prb_record_text_space() can be used on @e to query the new
 *     actually used space.
 *
 * Important: All @r->info fields will already be set with the current values
 *            for the record. I.e. @r->info->text_len will be less than
 *            @text_buf_size. Writers can use @r->info->text_len to know
 *            where concatenation begins and writers should update
 *            @r->info->text_len after concatenating.
 */
bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
                         struct printk_record *r, u32 caller_id, unsigned int max_size)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        struct printk_info *info;
        unsigned int data_size;
        struct prb_desc *d;
        unsigned long id;

        local_irq_save(e->irqflags);

        /* Transition the newest descriptor back to the reserved state. */
        d = desc_reopen_last(desc_ring, caller_id, &id);
        if (!d) {
                local_irq_restore(e->irqflags);
                goto fail_reopen;
        }

        /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */

        info = to_info(desc_ring, id);

        /*
         * Set the @e fields here so that prb_commit() can be used if
         * anything fails from now on.
         */
        e->rb = rb;
        e->id = id;

        /*
         * desc_reopen_last() checked the caller_id, but there was no
         * exclusive access at that point. The descriptor may have
         * changed since then.
         */
        if (caller_id != info->caller_id)
                goto fail;

        if (BLK_DATALESS(&d->text_blk_lpos)) {
                if (WARN_ON_ONCE(info->text_len != 0)) {
                        pr_warn_once("wrong text_len value (%hu, expecting 0)\n",
                                     info->text_len);
                        info->text_len = 0;
                }

                if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
                        goto fail;

                if (r->text_buf_size > max_size)
                        goto fail;

                r->text_buf = data_alloc(rb, r->text_buf_size,
                                         &d->text_blk_lpos, id);
        } else {
                if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size))
                        goto fail;

                /*
                 * Increase the buffer size to include the original size. If
                 * the meta data (@text_len) is not sane, use the full data
                 * block size.
                 */
                if (WARN_ON_ONCE(info->text_len > data_size)) {
                        pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n",
                                     info->text_len, data_size);
                        info->text_len = data_size;
                }
                r->text_buf_size += info->text_len;

                if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
                        goto fail;

                if (r->text_buf_size > max_size)
                        goto fail;

                r->text_buf = data_realloc(rb, r->text_buf_size,
                                           &d->text_blk_lpos, id);
        }
        if (r->text_buf_size && !r->text_buf)
                goto fail;

        r->info = info;

        e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);

        return true;
fail:
        prb_commit(e);
        /* prb_commit() re-enabled interrupts. */
fail_reopen:
        /* Make it clear to the caller that the re-reserve failed. */
        memset(r, 0, sizeof(*r));
        return false;
}

/*
 * @last_finalized_seq value guarantees that all records up to and including
 * this sequence number are finalized and can be read. The only exception are
 * too old records which have already been overwritten.
 *
 * It is also guaranteed that @last_finalized_seq only increases.
 *
 * Be aware that finalized records following non-finalized records are not
 * reported because they are not yet available to the reader. For example,
 * a new record stored via printk() will not be available to a printer if
 * it follows a record that has not been finalized yet. However, once that
 * non-finalized record becomes finalized, @last_finalized_seq will be
 * appropriately updated and the full set of finalized records will be
 * available to the printer. And since each printk() caller will either
 * directly print or trigger deferred printing of all available unprinted
 * records, all printk() messages will get printed.
 */
static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        unsigned long ulseq;

        /*
         * Guarantee the sequence number is loaded before loading the
         * associated record in order to guarantee that the record can be
         * seen by this CPU. This pairs with desc_update_last_finalized:A.
         */
        ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq
                                        ); /* LMM(desc_last_finalized_seq:A) */

        return __ulseq_to_u64seq(rb, ulseq);
}

static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
                            struct printk_record *r, unsigned int *line_count);

/*
 * Check if there are records directly following @last_finalized_seq that are
 * finalized. If so, update @last_finalized_seq to the latest of these
 * records. It is not allowed to skip over records that are not yet finalized.
 */
static void desc_update_last_finalized(struct printk_ringbuffer *rb)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        u64 old_seq = desc_last_finalized_seq(rb);
        unsigned long oldval;
        unsigned long newval;
        u64 finalized_seq;
        u64 try_seq;

try_again:
        finalized_seq = old_seq;
        try_seq = finalized_seq + 1;

        /* Try to find later finalized records. */
        while (_prb_read_valid(rb, &try_seq, NULL, NULL)) {
                finalized_seq = try_seq;
                try_seq++;
        }

        /* No update needed if no later finalized record was found. */
        if (finalized_seq == old_seq)
                return;

        oldval = __u64seq_to_ulseq(old_seq);
        newval = __u64seq_to_ulseq(finalized_seq);

        /*
         * Set the sequence number of a later finalized record that has been
         * seen.
         *
         * Guarantee the record data is visible to other CPUs before storing
         * its sequence number. This pairs with desc_last_finalized_seq:A.
         *
         * Memory barrier involvement:
         *
         * If desc_last_finalized_seq:A reads from
         * desc_update_last_finalized:A, then desc_read:A reads from
         * _prb_commit:B.
         *
         * Relies on:
         *
         * RELEASE from _prb_commit:B to desc_update_last_finalized:A
         *    matching
         * ACQUIRE from desc_last_finalized_seq:A to desc_read:A
         *
         * Note: _prb_commit:B and desc_update_last_finalized:A can be
         *       different CPUs. However, the desc_update_last_finalized:A
         *       CPU (which performs the release) must have previously seen
         *       _prb_commit:B.
         */
        if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq,
                                &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */
                old_seq = __ulseq_to_u64seq(rb, oldval);
                goto try_again;
        }
}

/*
 * Attempt to finalize a specified descriptor. If this fails, the descriptor
 * is either already final or it will finalize itself when the writer commits.
 */
static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        unsigned long prev_state_val = DESC_SV(id, desc_committed);
        struct prb_desc *d = to_desc(desc_ring, id);

        if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val,
                        DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */
                desc_update_last_finalized(rb);
        }
}

/**
 * prb_reserve() - Reserve space in the ringbuffer.
 *
 * @e:  The entry structure to setup.
 * @rb: The ringbuffer to reserve data in.
 * @r:  The record structure to allocate buffers for.
 *
 * This is the public function available to writers to reserve data.
 *
 * The writer specifies the text size to reserve by setting the
 * @text_buf_size field of @r. To ensure proper initialization of @r,
 * prb_rec_init_wr() should be used.
 *
 * Context: Any context. Disables local interrupts on success.
 * Return: true if at least text data could be allocated, otherwise false.
 *
 * On success, the fields @info and @text_buf of @r will be set by this
 * function and should be filled in by the writer before committing. Also
 * on success, prb_record_text_space() can be used on @e to query the actual
 * space used for the text data block.
 *
 * Important: @info->text_len needs to be set correctly by the writer in
 *            order for data to be readable and/or extended. Its value
 *            is initialized to 0.
 */
bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
                 struct printk_record *r)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        struct printk_info *info;
        struct prb_desc *d;
        unsigned long id;
        u64 seq;

        if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
                goto fail;

        /*
         * Descriptors in the reserved state act as blockers to all further
         * reservations once the desc_ring has fully wrapped. Disable
         * interrupts during the reserve/commit window in order to minimize
         * the likelihood of this happening.
         */
        local_irq_save(e->irqflags);

        if (!desc_reserve(rb, &id)) {
                /* Descriptor reservation failures are tracked. */
                atomic_long_inc(&rb->fail);
                local_irq_restore(e->irqflags);
                goto fail;
        }

        d = to_desc(desc_ring, id);
        info = to_info(desc_ring, id);

        /*
         * All @info fields (except @seq) are cleared and must be filled in
         * by the writer. Save @seq before clearing because it is used to
         * determine the new sequence number.
         */
        seq = info->seq;
        memset(info, 0, sizeof(*info));

        /*
         * Set the @e fields here so that prb_commit() can be used if
         * text data allocation fails.
         */
        e->rb = rb;
        e->id = id;

        /*
         * Initialize the sequence number if it has "never been set".
         * Otherwise just increment it by a full wrap.
         *
         * @seq is considered "never been set" if it has a value of 0,
         * _except_ for @infos[0], which was specially setup by the ringbuffer
         * initializer and therefore is always considered as set.
         *
         * See the "Bootstrap" comment block in printk_ringbuffer.h for
         * details about how the initializer bootstraps the descriptors.
         */
        if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
                info->seq = DESC_INDEX(desc_ring, id);
        else
                info->seq = seq + DESCS_COUNT(desc_ring);

        /*
         * New data is about to be reserved. Once that happens, previous
         * descriptors are no longer able to be extended. Finalize the
         * previous descriptor now so that it can be made available to
         * readers. (For seq==0 there is no previous descriptor.)
         */
        if (info->seq > 0)
                desc_make_final(rb, DESC_ID(id - 1));

        r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
        /* If text data allocation fails, a data-less record is committed. */
        if (r->text_buf_size && !r->text_buf) {
                prb_commit(e);
                /* prb_commit() re-enabled interrupts. */
                goto fail;
        }

        r->info = info;

        /* Record full text space used by record. */
        e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);

        return true;
fail:
        /* Make it clear to the caller that the reserve failed. */
        memset(r, 0, sizeof(*r));
        return false;
}

/* Commit the data (possibly finalizing it) and restore interrupts. */
static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
{
        struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
        struct prb_desc *d = to_desc(desc_ring, e->id);
        unsigned long prev_state_val = DESC_SV(e->id, desc_reserved);

        /* Now the writer has finished all writing: LMM(_prb_commit:A) */

        /*
         * Set the descriptor as committed. See "ABA Issues" about why
         * cmpxchg() instead of set() is used.
         *
         * 1  Guarantee all record data is stored before the descriptor state
         *    is stored as committed. A write memory barrier is sufficient
         *    for this. This pairs with desc_read:B and desc_reopen_last:A.
         *
         * 2. Guarantee the descriptor state is stored as committed before
         *    re-checking the head ID in order to possibly finalize this
         *    descriptor. This pairs with desc_reserve:D.
         *
         *    Memory barrier involvement:
         *
         *    If prb_commit:A reads from desc_reserve:D, then
         *    desc_make_final:A reads from _prb_commit:B.
         *
         *    Relies on:
         *
         *    MB _prb_commit:B to prb_commit:A
         *       matching
         *    MB desc_reserve:D to desc_make_final:A
         */
        if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
                        DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */
                WARN_ON_ONCE(1);
        }

        /* Restore interrupts, the reserve/commit window is finished. */
        local_irq_restore(e->irqflags);
}

/**
 * prb_commit() - Commit (previously reserved) data to the ringbuffer.
 *
 * @e: The entry containing the reserved data information.
 *
 * This is the public function available to writers to commit data.
 *
 * Note that the data is not yet available to readers until it is finalized.
 * Finalizing happens automatically when space for the next record is
 * reserved.
 *
 * See prb_final_commit() for a version of this function that finalizes
 * immediately.
 *
 * Context: Any context. Enables local interrupts.
 */
void prb_commit(struct prb_reserved_entry *e)
{
        struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
        unsigned long head_id;

        _prb_commit(e, desc_committed);

        /*
         * If this descriptor is no longer the head (i.e. a new record has
         * been allocated), extending the data for this record is no longer
         * allowed and therefore it must be finalized.
         */
        head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
        if (head_id != e->id)
                desc_make_final(e->rb, e->id);
}

/**
 * prb_final_commit() - Commit and finalize (previously reserved) data to
 *                      the ringbuffer.
 *
 * @e: The entry containing the reserved data information.
 *
 * This is the public function available to writers to commit+finalize data.
 *
 * By finalizing, the data is made immediately available to readers.
 *
 * This function should only be used if there are no intentions of extending
 * this data using prb_reserve_in_last().
 *
 * Context: Any context. Enables local interrupts.
 */
void prb_final_commit(struct prb_reserved_entry *e)
{
        _prb_commit(e, desc_finalized);

        desc_update_last_finalized(e->rb);
}

/*
 * Count the number of lines in provided text. All text has at least 1 line
 * (even if @text_size is 0). Each '\n' processed is counted as an additional
 * line.
 */
static unsigned int count_lines(const char *text, unsigned int text_size)
{
        unsigned int next_size = text_size;
        unsigned int line_count = 1;
        const char *next = text;

        while (next_size) {
                next = memchr(next, '\n', next_size);
                if (!next)
                        break;
                line_count++;
                next++;
                next_size = text_size - (next - text);
        }

        return line_count;
}

/*
 * Given @blk_lpos, copy an expected @len of data into the provided buffer.
 * If @line_count is provided, count the number of lines in the data.
 *
 * This function (used by readers) performs strict validation on the data
 * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
 * triggered if an internal error is detected.
 */
static bool copy_data(struct prb_data_ring *data_ring,
                      struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf,
                      unsigned int buf_size, unsigned int *line_count)
{
        unsigned int data_size;
        const char *data;

        /* Caller might not want any data. */
        if ((!buf || !buf_size) && !line_count)
                return true;

        data = get_data(data_ring, blk_lpos, &data_size);
        if (!data)
                return false;

        /*
         * Actual cannot be less than expected. It can be more than expected
         * because of the trailing alignment padding.
         *
         * Note that invalid @len values can occur because the caller loads
         * the value during an allowed data race.
         */
        if (data_size < (unsigned int)len)
                return false;

        /* Caller interested in the line count? */
        if (line_count)
                *line_count = count_lines(data, len);

        /* Caller interested in the data content? */
        if (!buf || !buf_size)
                return true;

        data_size = min_t(unsigned int, buf_size, len);

        memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
        return true;
}

/*
 * This is an extended version of desc_read(). It gets a copy of a specified
 * descriptor. However, it also verifies that the record is finalized and has
 * the sequence number @seq. On success, 0 is returned.
 *
 * Error return values:
 * -EINVAL: A finalized record with sequence number @seq does not exist.
 * -ENOENT: A finalized record with sequence number @seq exists, but its data
 *          is not available. This is a valid record, so readers should
 *          continue with the next record.
 */
static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring,
                                   unsigned long id, u64 seq,
                                   struct prb_desc *desc_out)
{
        struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos;
        enum desc_state d_state;
        u64 s;

        d_state = desc_read(desc_ring, id, desc_out, &s, NULL);

        /*
         * An unexpected @id (desc_miss) or @seq mismatch means the record
         * does not exist. A descriptor in the reserved or committed state
         * means the record does not yet exist for the reader.
         */
        if (d_state == desc_miss ||
            d_state == desc_reserved ||
            d_state == desc_committed ||
            s != seq) {
                return -EINVAL;
        }

        /*
         * A descriptor in the reusable state may no longer have its data
         * available; report it as existing but with lost data. Or the record
         * may actually be a record with lost data.
         */
        if (d_state == desc_reusable ||
            (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) {
                return -ENOENT;
        }

        return 0;
}

/*
 * Copy the ringbuffer data from the record with @seq to the provided
 * @r buffer. On success, 0 is returned.
 *
 * See desc_read_finalized_seq() for error return values.
 */
static int prb_read(struct printk_ringbuffer *rb, u64 seq,
                    struct printk_record *r, unsigned int *line_count)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        struct printk_info *info = to_info(desc_ring, seq);
        struct prb_desc *rdesc = to_desc(desc_ring, seq);
        atomic_long_t *state_var = &rdesc->state_var;
        struct prb_desc desc;
        unsigned long id;
        int err;

        /* Extract the ID, used to specify the descriptor to read. */
        id = DESC_ID(atomic_long_read(state_var));

        /* Get a local copy of the correct descriptor (if available). */
        err = desc_read_finalized_seq(desc_ring, id, seq, &desc);

        /*
         * If @r is NULL, the caller is only interested in the availability
         * of the record.
         */
        if (err || !r)
                return err;

        /* If requested, copy meta data. */
        if (r->info)
                memcpy(r->info, info, sizeof(*(r->info)));

        /* Copy text data. If it fails, this is a data-less record. */
        if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len,
                       r->text_buf, r->text_buf_size, line_count)) {
                return -ENOENT;
        }

        /* Ensure the record is still finalized and has the same @seq. */
        return desc_read_finalized_seq(desc_ring, id, seq, &desc);
}

/* Get the sequence number of the tail descriptor. */
u64 prb_first_seq(struct printk_ringbuffer *rb)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        enum desc_state d_state;
        struct prb_desc desc;
        unsigned long id;
        u64 seq;

        for (;;) {
                id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */

                d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */

                /*
                 * This loop will not be infinite because the tail is
                 * _always_ in the finalized or reusable state.
                 */
                if (d_state == desc_finalized || d_state == desc_reusable)
                        break;

                /*
                 * Guarantee the last state load from desc_read() is before
                 * reloading @tail_id in order to see a new tail in the case
                 * that the descriptor has been recycled. This pairs with
                 * desc_reserve:D.
                 *
                 * Memory barrier involvement:
                 *
                 * If prb_first_seq:B reads from desc_reserve:F, then
                 * prb_first_seq:A reads from desc_push_tail:B.
                 *
                 * Relies on:
                 *
                 * MB from desc_push_tail:B to desc_reserve:F
                 *    matching
                 * RMB prb_first_seq:B to prb_first_seq:A
                 */
                smp_rmb(); /* LMM(prb_first_seq:C) */
        }

        return seq;
}

/**
 * prb_next_reserve_seq() - Get the sequence number after the most recently
 *                  reserved record.
 *
 * @rb:  The ringbuffer to get the sequence number from.
 *
 * This is the public function available to readers to see what sequence
 * number will be assigned to the next reserved record.
 *
 * Note that depending on the situation, this value can be equal to or
 * higher than the sequence number returned by prb_next_seq().
 *
 * Context: Any context.
 * Return: The sequence number that will be assigned to the next record
 *         reserved.
 */
u64 prb_next_reserve_seq(struct printk_ringbuffer *rb)
{
        struct prb_desc_ring *desc_ring = &rb->desc_ring;
        unsigned long last_finalized_id;
        atomic_long_t *state_var;
        u64 last_finalized_seq;
        unsigned long head_id;
        struct prb_desc desc;
        unsigned long diff;
        struct prb_desc *d;
        int err;

        /*
         * It may not be possible to read a sequence number for @head_id.
         * So the ID of @last_finailzed_seq is used to calculate what the
         * sequence number of @head_id will be.
         */

try_again:
        last_finalized_seq = desc_last_finalized_seq(rb);

        /*
         * @head_id is loaded after @last_finalized_seq to ensure that
         * it points to the record with @last_finalized_seq or newer.
         *
         * Memory barrier involvement:
         *
         * If desc_last_finalized_seq:A reads from
         * desc_update_last_finalized:A, then
         * prb_next_reserve_seq:A reads from desc_reserve:D.
         *
         * Relies on:
         *
         * RELEASE from desc_reserve:D to desc_update_last_finalized:A
         *    matching
         * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A
         *
         * Note: desc_reserve:D and desc_update_last_finalized:A can be
         *       different CPUs. However, the desc_update_last_finalized:A CPU
         *       (which performs the release) must have previously seen
         *       desc_read:C, which implies desc_reserve:D can be seen.
         */
        head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */

        d = to_desc(desc_ring, last_finalized_seq);
        state_var = &d->state_var;

        /* Extract the ID, used to specify the descriptor to read. */
        last_finalized_id = DESC_ID(atomic_long_read(state_var));

        /* Ensure @last_finalized_id is correct. */
        err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc);

        if (err == -EINVAL) {
                if (last_finalized_seq == 0) {
                        /*
                         * No record has been finalized or even reserved yet.
                         *
                         * The @head_id is initialized such that the first
                         * increment will yield the first record (seq=0).
                         * Handle it separately to avoid a negative @diff
                         * below.
                         */
                        if (head_id == DESC0_ID(desc_ring->count_bits))
                                return 0;

                        /*
                         * One or more descriptors are already reserved. Use
                         * the descriptor ID of the first one (@seq=0) for
                         * the @diff below.
                         */
                        last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1;
                } else {
                        /* Record must have been overwritten. Try again. */
                        goto try_again;
                }
        }

        /* Diff of known descriptor IDs to compute related sequence numbers. */
        diff = head_id - last_finalized_id;

        /*
         * @head_id points to the most recently reserved record, but this
         * function returns the sequence number that will be assigned to the
         * next (not yet reserved) record. Thus +1 is needed.
         */
        return (last_finalized_seq + diff + 1);
}

/*
 * Non-blocking read of a record.
 *
 * On success @seq is updated to the record that was read and (if provided)
 * @r and @line_count will contain the read/calculated data.
 *
 * On failure @seq is updated to a record that is not yet available to the
 * reader, but it will be the next record available to the reader.
 *
 * Note: When the current CPU is in panic, this function will skip over any
 *       non-existent/non-finalized records in order to allow the panic CPU
 *       to print any and all records that have been finalized.
 */
static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
                            struct printk_record *r, unsigned int *line_count)
{
        u64 tail_seq;
        int err;

        while ((err = prb_read(rb, *seq, r, line_count))) {
                tail_seq = prb_first_seq(rb);

                if (*seq < tail_seq) {
                        /*
                         * Behind the tail. Catch up and try again. This
                         * can happen for -ENOENT and -EINVAL cases.
                         */
                        *seq = tail_seq;

                } else if (err == -ENOENT) {
                        /* Record exists, but the data was lost. Skip. */
                        (*seq)++;

                } else {
                        /*
                         * Non-existent/non-finalized record. Must stop.
                         *
                         * For panic situations it cannot be expected that
                         * non-finalized records will become finalized. But
                         * there may be other finalized records beyond that
                         * need to be printed for a panic situation. If this
                         * is the panic CPU, skip this
                         * non-existent/non-finalized record unless it is
                         * at or beyond the head, in which case it is not
                         * possible to continue.
                         *
                         * Note that new messages printed on panic CPU are
                         * finalized when we are here. The only exception
                         * might be the last message without trailing newline.
                         * But it would have the sequence number returned
                         * by "prb_next_reserve_seq() - 1".
                         */
                        if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
                                (*seq)++;
                        else
                                return false;
                }
        }

        return true;
}

/**
 * prb_read_valid() - Non-blocking read of a requested record or (if gone)
 *                    the next available record.
 *
 * @rb:  The ringbuffer to read from.
 * @seq: The sequence number of the record to read.
 * @r:   A record data buffer to store the read record to.
 *
 * This is the public function available to readers to read a record.
 *
 * The reader provides the @info and @text_buf buffers of @r to be
 * filled in. Any of the buffer pointers can be set to NULL if the reader
 * is not interested in that data. To ensure proper initialization of @r,
 * prb_rec_init_rd() should be used.
 *
 * Context: Any context.
 * Return: true if a record was read, otherwise false.
 *
 * On success, the reader must check r->info.seq to see which record was
 * actually read. This allows the reader to detect dropped records.
 *
 * Failure means @seq refers to a record not yet available to the reader.
 */
bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
                    struct printk_record *r)
{
        return _prb_read_valid(rb, &seq, r, NULL);
}

/**
 * prb_read_valid_info() - Non-blocking read of meta data for a requested
 *                         record or (if gone) the next available record.
 *
 * @rb:         The ringbuffer to read from.
 * @seq:        The sequence number of the record to read.
 * @info:       A buffer to store the read record meta data to.
 * @line_count: A buffer to store the number of lines in the record text.
 *
 * This is the public function available to readers to read only the
 * meta data of a record.
 *
 * The reader provides the @info, @line_count buffers to be filled in.
 * Either of the buffer pointers can be set to NULL if the reader is not
 * interested in that data.
 *
 * Context: Any context.
 * Return: true if a record's meta data was read, otherwise false.
 *
 * On success, the reader must check info->seq to see which record meta data
 * was actually read. This allows the reader to detect dropped records.
 *
 * Failure means @seq refers to a record not yet available to the reader.
 */
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
                         struct printk_info *info, unsigned int *line_count)
{
        struct printk_record r;

        prb_rec_init_rd(&r, info, NULL, 0);

        return _prb_read_valid(rb, &seq, &r, line_count);
}

/**
 * prb_first_valid_seq() - Get the sequence number of the oldest available
 *                         record.
 *
 * @rb: The ringbuffer to get the sequence number from.
 *
 * This is the public function available to readers to see what the
 * first/oldest valid sequence number is.
 *
 * This provides readers a starting point to begin iterating the ringbuffer.
 *
 * Context: Any context.
 * Return: The sequence number of the first/oldest record or, if the
 *         ringbuffer is empty, 0 is returned.
 */
u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
{
        u64 seq = 0;

        if (!_prb_read_valid(rb, &seq, NULL, NULL))
                return 0;

        return seq;
}

/**
 * prb_next_seq() - Get the sequence number after the last available record.
 *
 * @rb:  The ringbuffer to get the sequence number from.
 *
 * This is the public function available to readers to see what the next
 * newest sequence number available to readers will be.
 *
 * This provides readers a sequence number to jump to if all currently
 * available records should be skipped. It is guaranteed that all records
 * previous to the returned value have been finalized and are (or were)
 * available to the reader.
 *
 * Context: Any context.
 * Return: The sequence number of the next newest (not yet available) record
 *         for readers.
 */
u64 prb_next_seq(struct printk_ringbuffer *rb)
{
        u64 seq;

        seq = desc_last_finalized_seq(rb);

        /*
         * Begin searching after the last finalized record.
         *
         * On 0, the search must begin at 0 because of hack#2
         * of the bootstrapping phase it is not known if a
         * record at index 0 exists.
         */
        if (seq != 0)
                seq++;

        /*
         * The information about the last finalized @seq might be inaccurate.
         * Search forward to find the current one.
         */
        while (_prb_read_valid(rb, &seq, NULL, NULL))
                seq++;

        return seq;
}

/**
 * prb_init() - Initialize a ringbuffer to use provided external buffers.
 *
 * @rb:       The ringbuffer to initialize.
 * @text_buf: The data buffer for text data.
 * @textbits: The size of @text_buf as a power-of-2 value.
 * @descs:    The descriptor buffer for ringbuffer records.
 * @descbits: The count of @descs items as a power-of-2 value.
 * @infos:    The printk_info buffer for ringbuffer records.
 *
 * This is the public function available to writers to setup a ringbuffer
 * during runtime using provided buffers.
 *
 * This must match the initialization of DEFINE_PRINTKRB().
 *
 * Context: Any context.
 */
void prb_init(struct printk_ringbuffer *rb,
              char *text_buf, unsigned int textbits,
              struct prb_desc *descs, unsigned int descbits,
              struct printk_info *infos)
{
        memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0]));
        memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0]));

        rb->desc_ring.count_bits = descbits;
        rb->desc_ring.descs = descs;
        rb->desc_ring.infos = infos;
        atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
        atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
        atomic_long_set(&rb->desc_ring.last_finalized_seq, 0);

        rb->text_data_ring.size_bits = textbits;
        rb->text_data_ring.data = text_buf;
        atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits));
        atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits));

        atomic_long_set(&rb->fail, 0);

        atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits));
        descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS;
        descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS;

        infos[0].seq = -(u64)_DESCS_COUNT(descbits);
        infos[_DESCS_COUNT(descbits) - 1].seq = 0;
}

/**
 * prb_record_text_space() - Query the full actual used ringbuffer space for
 *                           the text data of a reserved entry.
 *
 * @e: The successfully reserved entry to query.
 *
 * This is the public function available to writers to see how much actual
 * space is used in the ringbuffer to store the text data of the specified
 * entry.
 *
 * This function is only valid if @e has been successfully reserved using
 * prb_reserve().
 *
 * Context: Any context.
 * Return: The size in bytes used by the text data of the associated record.
 */
unsigned int prb_record_text_space(struct prb_reserved_entry *e)
{
        return e->text_space;
}






























































































































































































































































































































































































































































































































































































































































































































































































































    1 





    1 











































































































































    1 



    1 






    1 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Apple USB Touchpad (for post-February 2005 PowerBooks and MacBooks) driver
 *
 * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
 * Copyright (C) 2005-2008 Johannes Berg (johannes@sipsolutions.net)
 * Copyright (C) 2005-2008 Stelian Pop (stelian@popies.net)
 * Copyright (C) 2005      Frank Arnold (frank@scirocco-5v-turbo.de)
 * Copyright (C) 2005      Peter Osterlund (petero2@telia.com)
 * Copyright (C) 2005      Michael Hanselmann (linux-kernel@hansmi.ch)
 * Copyright (C) 2006      Nicolas Boichat (nicolas@boichat.ch)
 * Copyright (C) 2007-2008 Sven Anders (anders@anduras.de)
 *
 * Thanks to Alex Harper <basilisk@foobox.net> for his inputs.
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/usb/input.h>

/*
 * Note: We try to keep the touchpad aspect ratio while still doing only
 * simple arithmetics:
 *        0 <= x <= (xsensors - 1) * xfact
 *        0 <= y <= (ysensors - 1) * yfact
 */
struct atp_info {
        int xsensors;                                /* number of X sensors */
        int xsensors_17;                        /* 17" models have more sensors */
        int ysensors;                                /* number of Y sensors */
        int xfact;                                /* X multiplication factor */
        int yfact;                                /* Y multiplication factor */
        int datalen;                                /* size of USB transfers */
        void (*callback)(struct urb *);                /* callback function */
        int fuzz;                                /* fuzz touchpad generates */
};

static void atp_complete_geyser_1_2(struct urb *urb);
static void atp_complete_geyser_3_4(struct urb *urb);

static const struct atp_info fountain_info = {
        .xsensors        = 16,
        .xsensors_17        = 26,
        .ysensors        = 16,
        .xfact                = 64,
        .yfact                = 43,
        .datalen        = 81,
        .callback        = atp_complete_geyser_1_2,
        .fuzz                = 16,
};

static const struct atp_info geyser1_info = {
        .xsensors        = 16,
        .xsensors_17        = 26,
        .ysensors        = 16,
        .xfact                = 64,
        .yfact                = 43,
        .datalen        = 81,
        .callback        = atp_complete_geyser_1_2,
        .fuzz                = 16,
};

static const struct atp_info geyser2_info = {
        .xsensors        = 15,
        .xsensors_17        = 20,
        .ysensors        = 9,
        .xfact                = 64,
        .yfact                = 43,
        .datalen        = 64,
        .callback        = atp_complete_geyser_1_2,
        .fuzz                = 0,
};

static const struct atp_info geyser3_info = {
        .xsensors        = 20,
        .ysensors        = 10,
        .xfact                = 64,
        .yfact                = 64,
        .datalen        = 64,
        .callback        = atp_complete_geyser_3_4,
        .fuzz                = 0,
};

static const struct atp_info geyser4_info = {
        .xsensors        = 20,
        .ysensors        = 10,
        .xfact                = 64,
        .yfact                = 64,
        .datalen        = 64,
        .callback        = atp_complete_geyser_3_4,
        .fuzz                = 0,
};

#define ATP_DEVICE(prod, info)                                        \
{                                                                \
        .match_flags = USB_DEVICE_ID_MATCH_DEVICE |                \
                       USB_DEVICE_ID_MATCH_INT_CLASS |                \
                       USB_DEVICE_ID_MATCH_INT_PROTOCOL,        \
        .idVendor = 0x05ac, /* Apple */                                \
        .idProduct = (prod),                                        \
        .bInterfaceClass = 0x03,                                \
        .bInterfaceProtocol = 0x02,                                \
        .driver_info = (unsigned long) &info,                        \
}

/*
 * Table of devices (Product IDs) that work with this driver.
 * (The names come from Info.plist in AppleUSBTrackpad.kext,
 *  According to Info.plist Geyser IV is the same as Geyser III.)
 */

static const struct usb_device_id atp_table[] = {
        /* PowerBooks Feb 2005, iBooks G4 */
        ATP_DEVICE(0x020e, fountain_info),        /* FOUNTAIN ANSI */
        ATP_DEVICE(0x020f, fountain_info),        /* FOUNTAIN ISO */
        ATP_DEVICE(0x030a, fountain_info),        /* FOUNTAIN TP ONLY */
        ATP_DEVICE(0x030b, geyser1_info),        /* GEYSER 1 TP ONLY */

        /* PowerBooks Oct 2005 */
        ATP_DEVICE(0x0214, geyser2_info),        /* GEYSER 2 ANSI */
        ATP_DEVICE(0x0215, geyser2_info),        /* GEYSER 2 ISO */
        ATP_DEVICE(0x0216, geyser2_info),        /* GEYSER 2 JIS */

        /* Core Duo MacBook & MacBook Pro */
        ATP_DEVICE(0x0217, geyser3_info),        /* GEYSER 3 ANSI */
        ATP_DEVICE(0x0218, geyser3_info),        /* GEYSER 3 ISO */
        ATP_DEVICE(0x0219, geyser3_info),        /* GEYSER 3 JIS */

        /* Core2 Duo MacBook & MacBook Pro */
        ATP_DEVICE(0x021a, geyser4_info),        /* GEYSER 4 ANSI */
        ATP_DEVICE(0x021b, geyser4_info),        /* GEYSER 4 ISO */
        ATP_DEVICE(0x021c, geyser4_info),        /* GEYSER 4 JIS */

        /* Core2 Duo MacBook3,1 */
        ATP_DEVICE(0x0229, geyser4_info),        /* GEYSER 4 HF ANSI */
        ATP_DEVICE(0x022a, geyser4_info),        /* GEYSER 4 HF ISO */
        ATP_DEVICE(0x022b, geyser4_info),        /* GEYSER 4 HF JIS */

        /* Terminating entry */
        { }
};
MODULE_DEVICE_TABLE(usb, atp_table);

/* maximum number of sensors */
#define ATP_XSENSORS        26
#define ATP_YSENSORS        16

/*
 * The largest possible bank of sensors with additional buffer of 4 extra values
 * on either side, for an array of smoothed sensor values.
 */
#define ATP_SMOOTHSIZE        34

/* maximum pressure this driver will report */
#define ATP_PRESSURE        300

/*
 * Threshold for the touchpad sensors. Any change less than ATP_THRESHOLD is
 * ignored.
 */
#define ATP_THRESHOLD        5

/*
 * How far we'll bitshift our sensor values before averaging them. Mitigates
 * rounding errors.
 */
#define ATP_SCALE        12

/* Geyser initialization constants */
#define ATP_GEYSER_MODE_READ_REQUEST_ID                1
#define ATP_GEYSER_MODE_WRITE_REQUEST_ID        9
#define ATP_GEYSER_MODE_REQUEST_VALUE                0x300
#define ATP_GEYSER_MODE_REQUEST_INDEX                0
#define ATP_GEYSER_MODE_VENDOR_VALUE                0x04

/**
 * enum atp_status_bits - status bit meanings
 *
 * These constants represent the meaning of the status bits.
 * (only Geyser 3/4)
 *
 * @ATP_STATUS_BUTTON: The button was pressed
 * @ATP_STATUS_BASE_UPDATE: Update of the base values (untouched pad)
 * @ATP_STATUS_FROM_RESET: Reset previously performed
 */
enum atp_status_bits {
        ATP_STATUS_BUTTON        = BIT(0),
        ATP_STATUS_BASE_UPDATE        = BIT(2),
        ATP_STATUS_FROM_RESET        = BIT(4),
};

/* Structure to hold all of our device specific stuff */
struct atp {
        char                        phys[64];
        struct usb_device        *udev;                /* usb device */
        struct usb_interface        *intf;                /* usb interface */
        struct urb                *urb;                /* usb request block */
        u8                        *data;                /* transferred data */
        struct input_dev        *input;                /* input dev */
        const struct atp_info        *info;                /* touchpad model */
        bool                        open;
        bool                        valid;                /* are the samples valid? */
        bool                        size_detect_done;
        bool                        overflow_warned;
        int                        fingers_old;        /* last reported finger count */
        int                        x_old;                /* last reported x/y, */
        int                        y_old;                /* used for smoothing */
        signed char                xy_cur[ATP_XSENSORS + ATP_YSENSORS];
        signed char                xy_old[ATP_XSENSORS + ATP_YSENSORS];
        int                        xy_acc[ATP_XSENSORS + ATP_YSENSORS];
        int                        smooth[ATP_SMOOTHSIZE];
        int                        smooth_tmp[ATP_SMOOTHSIZE];
        int                        idlecount;        /* number of empty packets */
        struct work_struct        work;
};

#define dbg_dump(msg, tab) \
        if (debug > 1) {                                                \
                int __i;                                                \
                printk(KERN_DEBUG "appletouch: %s", msg);                \
                for (__i = 0; __i < ATP_XSENSORS + ATP_YSENSORS; __i++)        \
                        printk(" %02x", tab[__i]);                        \
                printk("\n");                                                \
        }

#define dprintk(format, a...)                                                \
        do {                                                                \
                if (debug)                                                \
                        printk(KERN_DEBUG format, ##a);                        \
        } while (0)

MODULE_AUTHOR("Johannes Berg");
MODULE_AUTHOR("Stelian Pop");
MODULE_AUTHOR("Frank Arnold");
MODULE_AUTHOR("Michael Hanselmann");
MODULE_AUTHOR("Sven Anders");
MODULE_DESCRIPTION("Apple PowerBook and MacBook USB touchpad driver");
MODULE_LICENSE("GPL");

/*
 * Make the threshold a module parameter
 */
static int threshold = ATP_THRESHOLD;
module_param(threshold, int, 0644);
MODULE_PARM_DESC(threshold, "Discard any change in data from a sensor"
                            " (the trackpad has many of these sensors)"
                            " less than this value.");

static int debug;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Activate debugging output");

/*
 * By default newer Geyser devices send standard USB HID mouse
 * packets (Report ID 2). This code changes device mode, so it
 * sends raw sensor reports (Report ID 5).
 */
static int atp_geyser_init(struct atp *dev)
{
        struct usb_device *udev = dev->udev;
        char *data;
        int size;
        int i;
        int ret;

        data = kmalloc(8, GFP_KERNEL);
        if (!data) {
                dev_err(&dev->intf->dev, "Out of memory\n");
                return -ENOMEM;
        }

        size = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                        ATP_GEYSER_MODE_READ_REQUEST_ID,
                        USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        ATP_GEYSER_MODE_REQUEST_VALUE,
                        ATP_GEYSER_MODE_REQUEST_INDEX, data, 8, 5000);

        if (size != 8) {
                dprintk("atp_geyser_init: read error\n");
                for (i = 0; i < 8; i++)
                        dprintk("appletouch[%d]: %d\n", i, data[i]);

                dev_err(&dev->intf->dev, "Failed to read mode from device.\n");
                ret = -EIO;
                goto out_free;
        }

        /* Apply the mode switch */
        data[0] = ATP_GEYSER_MODE_VENDOR_VALUE;

        size = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        ATP_GEYSER_MODE_WRITE_REQUEST_ID,
                        USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
                        ATP_GEYSER_MODE_REQUEST_VALUE,
                        ATP_GEYSER_MODE_REQUEST_INDEX, data, 8, 5000);

        if (size != 8) {
                dprintk("atp_geyser_init: write error\n");
                for (i = 0; i < 8; i++)
                        dprintk("appletouch[%d]: %d\n", i, data[i]);

                dev_err(&dev->intf->dev, "Failed to request geyser raw mode\n");
                ret = -EIO;
                goto out_free;
        }
        ret = 0;
out_free:
        kfree(data);
        return ret;
}

/*
 * Reinitialise the device. This usually stops stream of empty packets
 * coming from it.
 */
static void atp_reinit(struct work_struct *work)
{
        struct atp *dev = container_of(work, struct atp, work);
        int retval;

        dprintk("appletouch: putting appletouch to sleep (reinit)\n");
        atp_geyser_init(dev);

        retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
        if (retval)
                dev_err(&dev->intf->dev,
                        "atp_reinit: usb_submit_urb failed with error %d\n",
                        retval);
}

static int atp_calculate_abs(struct atp *dev, int offset, int nb_sensors,
                             int fact, int *z, int *fingers)
{
        int i, pass;

        /*
         * Use offset to point xy_sensors at the first value in dev->xy_acc
         * for whichever dimension we're looking at this particular go-round.
         */
        int *xy_sensors = dev->xy_acc + offset;

        /* values to calculate mean */
        int pcum = 0, psum = 0;
        int is_increasing = 0;

        *fingers = 0;

        for (i = 0; i < nb_sensors; i++) {
                if (xy_sensors[i] < threshold) {
                        if (is_increasing)
                                is_increasing = 0;

                /*
                 * Makes the finger detection more versatile.  For example,
                 * two fingers with no gap will be detected.  Also, my
                 * tests show it less likely to have intermittent loss
                 * of multiple finger readings while moving around (scrolling).
                 *
                 * Changes the multiple finger detection to counting humps on
                 * sensors (transitions from nonincreasing to increasing)
                 * instead of counting transitions from low sensors (no
                 * finger reading) to high sensors (finger above
                 * sensor)
                 *
                 * - Jason Parekh <jasonparekh@gmail.com>
                 */

                } else if (i < 1 ||
                    (!is_increasing && xy_sensors[i - 1] < xy_sensors[i])) {
                        (*fingers)++;
                        is_increasing = 1;
                } else if (i > 0 && (xy_sensors[i - 1] - xy_sensors[i] > threshold)) {
                        is_increasing = 0;
                }
        }

        if (*fingers < 1)     /* No need to continue if no fingers are found. */
                return 0;

        /*
         * Use a smoothed version of sensor data for movement calculations, to
         * combat noise without needing to rely so heavily on a threshold.
         * This improves tracking.
         *
         * The smoothed array is bigger than the original so that the smoothing
         * doesn't result in edge values being truncated.
         */

        memset(dev->smooth, 0, 4 * sizeof(dev->smooth[0]));
        /* Pull base values, scaled up to help avoid truncation errors. */
        for (i = 0; i < nb_sensors; i++)
                dev->smooth[i + 4] = xy_sensors[i] << ATP_SCALE;
        memset(&dev->smooth[nb_sensors + 4], 0, 4 * sizeof(dev->smooth[0]));

        for (pass = 0; pass < 4; pass++) {
                /* Handle edge. */
                dev->smooth_tmp[0] = (dev->smooth[0] + dev->smooth[1]) / 2;

                /* Average values with neighbors. */
                for (i = 1; i < nb_sensors + 7; i++)
                        dev->smooth_tmp[i] = (dev->smooth[i - 1] +
                                              dev->smooth[i] * 2 +
                                              dev->smooth[i + 1]) / 4;

                /* Handle other edge. */
                dev->smooth_tmp[i] = (dev->smooth[i - 1] + dev->smooth[i]) / 2;

                memcpy(dev->smooth, dev->smooth_tmp, sizeof(dev->smooth));
        }

        for (i = 0; i < nb_sensors + 8; i++) {
                /*
                 * Skip values if they're small enough to be truncated to 0
                 * by scale. Mostly noise.
                 */
                if ((dev->smooth[i] >> ATP_SCALE) > 0) {
                        pcum += dev->smooth[i] * i;
                        psum += dev->smooth[i];
                }
        }

        if (psum > 0) {
                *z = psum >> ATP_SCALE;        /* Scale down pressure output. */
                return pcum * fact / psum;
        }

        return 0;
}

static inline void atp_report_fingers(struct input_dev *input, int fingers)
{
        input_report_key(input, BTN_TOOL_FINGER, fingers == 1);
        input_report_key(input, BTN_TOOL_DOUBLETAP, fingers == 2);
        input_report_key(input, BTN_TOOL_TRIPLETAP, fingers > 2);
}

/* Check URB status and for correct length of data package */

#define ATP_URB_STATUS_SUCCESS                0
#define ATP_URB_STATUS_ERROR                1
#define ATP_URB_STATUS_ERROR_FATAL        2

static int atp_status_check(struct urb *urb)
{
        struct atp *dev = urb->context;
        struct usb_interface *intf = dev->intf;

        switch (urb->status) {
        case 0:
                /* success */
                break;
        case -EOVERFLOW:
                if (!dev->overflow_warned) {
                        dev_warn(&intf->dev,
                                "appletouch: OVERFLOW with data length %d, actual length is %d\n",
                                dev->info->datalen, dev->urb->actual_length);
                        dev->overflow_warned = true;
                }
                fallthrough;
        case -ECONNRESET:
        case -ENOENT:
        case -ESHUTDOWN:
                /* This urb is terminated, clean up */
                dev_dbg(&intf->dev,
                        "atp_complete: urb shutting down with status: %d\n",
                        urb->status);
                return ATP_URB_STATUS_ERROR_FATAL;

        default:
                dev_dbg(&intf->dev,
                        "atp_complete: nonzero urb status received: %d\n",
                        urb->status);
                return ATP_URB_STATUS_ERROR;
        }

        /* drop incomplete datasets */
        if (dev->urb->actual_length != dev->info->datalen) {
                dprintk("appletouch: incomplete data package"
                        " (first byte: %d, length: %d).\n",
                        dev->data[0], dev->urb->actual_length);
                return ATP_URB_STATUS_ERROR;
        }

        return ATP_URB_STATUS_SUCCESS;
}

static void atp_detect_size(struct atp *dev)
{
        int i;

        /* 17" Powerbooks have extra X sensors */
        for (i = dev->info->xsensors; i < ATP_XSENSORS; i++) {
                if (dev->xy_cur[i]) {

                        dev_info(&dev->intf->dev,
                                "appletouch: 17\" model detected.\n");

                        input_set_abs_params(dev->input, ABS_X, 0,
                                             (dev->info->xsensors_17 - 1) *
                                                        dev->info->xfact - 1,
                                             dev->info->fuzz, 0);
                        break;
                }
        }
}

/*
 * USB interrupt callback functions
 */

/* Interrupt function for older touchpads: FOUNTAIN/GEYSER1/GEYSER2 */

static void atp_complete_geyser_1_2(struct urb *urb)
{
        int x, y, x_z, y_z, x_f, y_f;
        int retval, i, j;
        int key, fingers;
        struct atp *dev = urb->context;
        int status = atp_status_check(urb);

        if (status == ATP_URB_STATUS_ERROR_FATAL)
                return;
        else if (status == ATP_URB_STATUS_ERROR)
                goto exit;

        /* reorder the sensors values */
        if (dev->info == &geyser2_info) {
                memset(dev->xy_cur, 0, sizeof(dev->xy_cur));

                /*
                 * The values are laid out like this:
                 * Y1, Y2, -, Y3, Y4, -, ..., X1, X2, -, X3, X4, -, ...
                 * '-' is an unused value.
                 */

                /* read X values */
                for (i = 0, j = 19; i < 20; i += 2, j += 3) {
                        dev->xy_cur[i] = dev->data[j];
                        dev->xy_cur[i + 1] = dev->data[j + 1];
                }

                /* read Y values */
                for (i = 0, j = 1; i < 9; i += 2, j += 3) {
                        dev->xy_cur[ATP_XSENSORS + i] = dev->data[j];
                        dev->xy_cur[ATP_XSENSORS + i + 1] = dev->data[j + 1];
                }
        } else {
                for (i = 0; i < 8; i++) {
                        /* X values */
                        dev->xy_cur[i +  0] = dev->data[5 * i +  2];
                        dev->xy_cur[i +  8] = dev->data[5 * i +  4];
                        dev->xy_cur[i + 16] = dev->data[5 * i + 42];
                        if (i < 2)
                                dev->xy_cur[i + 24] = dev->data[5 * i + 44];

                        /* Y values */
                        dev->xy_cur[ATP_XSENSORS + i] = dev->data[5 * i +  1];
                        dev->xy_cur[ATP_XSENSORS + i + 8] = dev->data[5 * i + 3];
                }
        }

        dbg_dump("sample", dev->xy_cur);

        if (!dev->valid) {
                /* first sample */
                dev->valid = true;
                dev->x_old = dev->y_old = -1;

                /* Store first sample */
                memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old));

                /* Perform size detection, if not done already */
                if (unlikely(!dev->size_detect_done)) {
                        atp_detect_size(dev);
                        dev->size_detect_done = true;
                        goto exit;
                }
        }

        for (i = 0; i < ATP_XSENSORS + ATP_YSENSORS; i++) {
                /* accumulate the change */
                signed char change = dev->xy_old[i] - dev->xy_cur[i];
                dev->xy_acc[i] -= change;

                /* prevent down drifting */
                if (dev->xy_acc[i] < 0)
                        dev->xy_acc[i] = 0;
        }

        memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old));

        dbg_dump("accumulator", dev->xy_acc);

        x = atp_calculate_abs(dev, 0, ATP_XSENSORS,
                              dev->info->xfact, &x_z, &x_f);
        y = atp_calculate_abs(dev, ATP_XSENSORS, ATP_YSENSORS,
                              dev->info->yfact, &y_z, &y_f);
        key = dev->data[dev->info->datalen - 1] & ATP_STATUS_BUTTON;

        fingers = max(x_f, y_f);

        if (x && y && fingers == dev->fingers_old) {
                if (dev->x_old != -1) {
                        x = (dev->x_old * 7 + x) >> 3;
                        y = (dev->y_old * 7 + y) >> 3;
                        dev->x_old = x;
                        dev->y_old = y;

                        if (debug > 1)
                                printk(KERN_DEBUG "appletouch: "
                                        "X: %3d Y: %3d Xz: %3d Yz: %3d\n",
                                        x, y, x_z, y_z);

                        input_report_key(dev->input, BTN_TOUCH, 1);
                        input_report_abs(dev->input, ABS_X, x);
                        input_report_abs(dev->input, ABS_Y, y);
                        input_report_abs(dev->input, ABS_PRESSURE,
                                         min(ATP_PRESSURE, x_z + y_z));
                        atp_report_fingers(dev->input, fingers);
                }
                dev->x_old = x;
                dev->y_old = y;

        } else if (!x && !y) {

                dev->x_old = dev->y_old = -1;
                dev->fingers_old = 0;
                input_report_key(dev->input, BTN_TOUCH, 0);
                input_report_abs(dev->input, ABS_PRESSURE, 0);
                atp_report_fingers(dev->input, 0);

                /* reset the accumulator on release */
                memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
        }

        if (fingers != dev->fingers_old)
                dev->x_old = dev->y_old = -1;
        dev->fingers_old = fingers;

        input_report_key(dev->input, BTN_LEFT, key);
        input_sync(dev->input);

 exit:
        retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
        if (retval)
                dev_err(&dev->intf->dev,
                        "atp_complete: usb_submit_urb failed with result %d\n",
                        retval);
}

/* Interrupt function for older touchpads: GEYSER3/GEYSER4 */

static void atp_complete_geyser_3_4(struct urb *urb)
{
        int x, y, x_z, y_z, x_f, y_f;
        int retval, i, j;
        int key, fingers;
        struct atp *dev = urb->context;
        int status = atp_status_check(urb);

        if (status == ATP_URB_STATUS_ERROR_FATAL)
                return;
        else if (status == ATP_URB_STATUS_ERROR)
                goto exit;

        /* Reorder the sensors values:
         *
         * The values are laid out like this:
         * -, Y1, Y2, -, Y3, Y4, -, ..., -, X1, X2, -, X3, X4, ...
         * '-' is an unused value.
         */

        /* read X values */
        for (i = 0, j = 19; i < 20; i += 2, j += 3) {
                dev->xy_cur[i] = dev->data[j + 1];
                dev->xy_cur[i + 1] = dev->data[j + 2];
        }
        /* read Y values */
        for (i = 0, j = 1; i < 9; i += 2, j += 3) {
                dev->xy_cur[ATP_XSENSORS + i] = dev->data[j + 1];
                dev->xy_cur[ATP_XSENSORS + i + 1] = dev->data[j + 2];
        }

        dbg_dump("sample", dev->xy_cur);

        /* Just update the base values (i.e. touchpad in untouched state) */
        if (dev->data[dev->info->datalen - 1] & ATP_STATUS_BASE_UPDATE) {

                dprintk("appletouch: updated base values\n");

                memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old));
                goto exit;
        }

        for (i = 0; i < ATP_XSENSORS + ATP_YSENSORS; i++) {
                /* calculate the change */
                dev->xy_acc[i] = dev->xy_cur[i] - dev->xy_old[i];

                /* this is a round-robin value, so couple with that */
                if (dev->xy_acc[i] > 127)
                        dev->xy_acc[i] -= 256;

                if (dev->xy_acc[i] < -127)
                        dev->xy_acc[i] += 256;

                /* prevent down drifting */
                if (dev->xy_acc[i] < 0)
                        dev->xy_acc[i] = 0;
        }

        dbg_dump("accumulator", dev->xy_acc);

        x = atp_calculate_abs(dev, 0, ATP_XSENSORS,
                              dev->info->xfact, &x_z, &x_f);
        y = atp_calculate_abs(dev, ATP_XSENSORS, ATP_YSENSORS,
                              dev->info->yfact, &y_z, &y_f);

        key = dev->data[dev->info->datalen - 1] & ATP_STATUS_BUTTON;

        fingers = max(x_f, y_f);

        if (x && y && fingers == dev->fingers_old) {
                if (dev->x_old != -1) {
                        x = (dev->x_old * 7 + x) >> 3;
                        y = (dev->y_old * 7 + y) >> 3;
                        dev->x_old = x;
                        dev->y_old = y;

                        if (debug > 1)
                                printk(KERN_DEBUG "appletouch: X: %3d Y: %3d "
                                       "Xz: %3d Yz: %3d\n",
                                       x, y, x_z, y_z);

                        input_report_key(dev->input, BTN_TOUCH, 1);
                        input_report_abs(dev->input, ABS_X, x);
                        input_report_abs(dev->input, ABS_Y, y);
                        input_report_abs(dev->input, ABS_PRESSURE,
                                         min(ATP_PRESSURE, x_z + y_z));
                        atp_report_fingers(dev->input, fingers);
                }
                dev->x_old = x;
                dev->y_old = y;

        } else if (!x && !y) {

                dev->x_old = dev->y_old = -1;
                dev->fingers_old = 0;
                input_report_key(dev->input, BTN_TOUCH, 0);
                input_report_abs(dev->input, ABS_PRESSURE, 0);
                atp_report_fingers(dev->input, 0);

                /* reset the accumulator on release */
                memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
        }

        if (fingers != dev->fingers_old)
                dev->x_old = dev->y_old = -1;
        dev->fingers_old = fingers;

        input_report_key(dev->input, BTN_LEFT, key);
        input_sync(dev->input);

        /*
         * Geysers 3/4 will continue to send packets continually after
         * the first touch unless reinitialised. Do so if it's been
         * idle for a while in order to avoid waking the kernel up
         * several hundred times a second.
         */

        /*
         * Button must not be pressed when entering suspend,
         * otherwise we will never release the button.
         */
        if (!x && !y && !key) {
                dev->idlecount++;
                if (dev->idlecount == 10) {
                        dev->x_old = dev->y_old = -1;
                        dev->idlecount = 0;
                        schedule_work(&dev->work);
                        /* Don't resubmit urb here, wait for reinit */
                        return;
                }
        } else
                dev->idlecount = 0;

 exit:
        retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
        if (retval)
                dev_err(&dev->intf->dev,
                        "atp_complete: usb_submit_urb failed with result %d\n",
                        retval);
}

static int atp_open(struct input_dev *input)
{
        struct atp *dev = input_get_drvdata(input);

        if (usb_submit_urb(dev->urb, GFP_KERNEL))
                return -EIO;

        dev->open = true;
        return 0;
}

static void atp_close(struct input_dev *input)
{
        struct atp *dev = input_get_drvdata(input);

        usb_kill_urb(dev->urb);
        cancel_work_sync(&dev->work);
        dev->open = false;
}

static int atp_handle_geyser(struct atp *dev)
{
        if (dev->info != &fountain_info) {
                /* switch to raw sensor mode */
                if (atp_geyser_init(dev))
                        return -EIO;

                dev_info(&dev->intf->dev, "Geyser mode initialized.\n");
        }

        return 0;
}

static int atp_probe(struct usb_interface *iface,
                     const struct usb_device_id *id)
{
        struct atp *dev;
        struct input_dev *input_dev;
        struct usb_device *udev = interface_to_usbdev(iface);
        struct usb_host_interface *iface_desc;
        struct usb_endpoint_descriptor *endpoint;
        int int_in_endpointAddr = 0;
        int i, error = -ENOMEM;
        const struct atp_info *info = (const struct atp_info *)id->driver_info;

        /* set up the endpoint information */
        /* use only the first interrupt-in endpoint */
        iface_desc = iface->cur_altsetting;
        for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
                endpoint = &iface_desc->endpoint[i].desc;
                if (!int_in_endpointAddr && usb_endpoint_is_int_in(endpoint)) {
                        /* we found an interrupt in endpoint */
                        int_in_endpointAddr = endpoint->bEndpointAddress;
                        break;
                }
        }
        if (!int_in_endpointAddr) {
                dev_err(&iface->dev, "Could not find int-in endpoint\n");
                return -EIO;
        }

        /* allocate memory for our device state and initialize it */
        dev = kzalloc(sizeof(struct atp), GFP_KERNEL);
        input_dev = input_allocate_device();
        if (!dev || !input_dev) {
                dev_err(&iface->dev, "Out of memory\n");
                goto err_free_devs;
        }

        dev->udev = udev;
        dev->intf = iface;
        dev->input = input_dev;
        dev->info = info;
        dev->overflow_warned = false;

        dev->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!dev->urb)
                goto err_free_devs;

        dev->data = usb_alloc_coherent(dev->udev, dev->info->datalen, GFP_KERNEL,
                                       &dev->urb->transfer_dma);
        if (!dev->data)
                goto err_free_urb;

        usb_fill_int_urb(dev->urb, udev,
                         usb_rcvintpipe(udev, int_in_endpointAddr),
                         dev->data, dev->info->datalen,
                         dev->info->callback, dev, 1);

        error = atp_handle_geyser(dev);
        if (error)
                goto err_free_buffer;

        usb_make_path(udev, dev->phys, sizeof(dev->phys));
        strlcat(dev->phys, "/input0", sizeof(dev->phys));

        input_dev->name = "appletouch";
        input_dev->phys = dev->phys;
        usb_to_input_id(dev->udev, &input_dev->id);
        input_dev->dev.parent = &iface->dev;

        input_set_drvdata(input_dev, dev);

        input_dev->open = atp_open;
        input_dev->close = atp_close;

        set_bit(EV_ABS, input_dev->evbit);

        input_set_abs_params(input_dev, ABS_X, 0,
                             (dev->info->xsensors - 1) * dev->info->xfact - 1,
                             dev->info->fuzz, 0);
        input_set_abs_params(input_dev, ABS_Y, 0,
                             (dev->info->ysensors - 1) * dev->info->yfact - 1,
                             dev->info->fuzz, 0);
        input_set_abs_params(input_dev, ABS_PRESSURE, 0, ATP_PRESSURE, 0, 0);

        set_bit(EV_KEY, input_dev->evbit);
        set_bit(BTN_TOUCH, input_dev->keybit);
        set_bit(BTN_TOOL_FINGER, input_dev->keybit);
        set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
        set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
        set_bit(BTN_LEFT, input_dev->keybit);

        INIT_WORK(&dev->work, atp_reinit);

        error = input_register_device(dev->input);
        if (error)
                goto err_free_buffer;

        /* save our data pointer in this interface device */
        usb_set_intfdata(iface, dev);

        return 0;

 err_free_buffer:
        usb_free_coherent(dev->udev, dev->info->datalen,
                          dev->data, dev->urb->transfer_dma);
 err_free_urb:
        usb_free_urb(dev->urb);
 err_free_devs:
        usb_set_intfdata(iface, NULL);
        kfree(dev);
        input_free_device(input_dev);
        return error;
}

static void atp_disconnect(struct usb_interface *iface)
{
        struct atp *dev = usb_get_intfdata(iface);

        usb_set_intfdata(iface, NULL);
        if (dev) {
                usb_kill_urb(dev->urb);
                input_unregister_device(dev->input);
                usb_free_coherent(dev->udev, dev->info->datalen,
                                  dev->data, dev->urb->transfer_dma);
                usb_free_urb(dev->urb);
                kfree(dev);
        }
        dev_info(&iface->dev, "input: appletouch disconnected\n");
}

static int atp_recover(struct atp *dev)
{
        int error;

        error = atp_handle_geyser(dev);
        if (error)
                return error;

        if (dev->open && usb_submit_urb(dev->urb, GFP_KERNEL))
                return -EIO;

        return 0;
}

static int atp_suspend(struct usb_interface *iface, pm_message_t message)
{
        struct atp *dev = usb_get_intfdata(iface);

        usb_kill_urb(dev->urb);
        return 0;
}

static int atp_resume(struct usb_interface *iface)
{
        struct atp *dev = usb_get_intfdata(iface);

        if (dev->open && usb_submit_urb(dev->urb, GFP_KERNEL))
                return -EIO;

        return 0;
}

static int atp_reset_resume(struct usb_interface *iface)
{
        struct atp *dev = usb_get_intfdata(iface);

        return atp_recover(dev);
}

static struct usb_driver atp_driver = {
        .name                = "appletouch",
        .probe                = atp_probe,
        .disconnect        = atp_disconnect,
        .suspend        = atp_suspend,
        .resume                = atp_resume,
        .reset_resume        = atp_reset_resume,
        .id_table        = atp_table,
};

module_usb_driver(atp_driver);










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Portions of this file
 * Copyright(c) 2016-2017 Intel Deutschland GmbH
 * Copyright (C) 2018, 2021-2023 Intel Corporation
 */
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS

#include <linux/rtnetlink.h>
#include <net/cfg80211.h>
#include "core.h"
#include "trace.h"

static inline int rdev_suspend(struct cfg80211_registered_device *rdev,
                               struct cfg80211_wowlan *wowlan)
{
        int ret;
        trace_rdev_suspend(&rdev->wiphy, wowlan);
        ret = rdev->ops->suspend(&rdev->wiphy, wowlan);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_resume(struct cfg80211_registered_device *rdev)
{
        int ret;
        trace_rdev_resume(&rdev->wiphy);
        ret = rdev->ops->resume(&rdev->wiphy);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev,
                                   bool enabled)
{
        trace_rdev_set_wakeup(&rdev->wiphy, enabled);
        rdev->ops->set_wakeup(&rdev->wiphy, enabled);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline struct wireless_dev
*rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name,
                       unsigned char name_assign_type,
                       enum nl80211_iftype type,
                       struct vif_params *params)
{
        struct wireless_dev *ret;
        trace_rdev_add_virtual_intf(&rdev->wiphy, name, type);
        ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type,
                                          type, params);
        trace_rdev_return_wdev(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_del_virtual_intf(struct cfg80211_registered_device *rdev,
                      struct wireless_dev *wdev)
{
        int ret;
        trace_rdev_del_virtual_intf(&rdev->wiphy, wdev);
        ret = rdev->ops->del_virtual_intf(&rdev->wiphy, wdev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_change_virtual_intf(struct cfg80211_registered_device *rdev,
                         struct net_device *dev, enum nl80211_iftype type,
                         struct vif_params *params)
{
        int ret;
        trace_rdev_change_virtual_intf(&rdev->wiphy, dev, type);
        ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_add_key(struct cfg80211_registered_device *rdev,
                               struct net_device *netdev, int link_id,
                               u8 key_index, bool pairwise, const u8 *mac_addr,
                               struct key_params *params)
{
        int ret;
        trace_rdev_add_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
                           mac_addr, params->mode);
        ret = rdev->ops->add_key(&rdev->wiphy, netdev, link_id, key_index,
                                  pairwise, mac_addr, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_get_key(struct cfg80211_registered_device *rdev, struct net_device *netdev,
             int link_id, u8 key_index, bool pairwise, const u8 *mac_addr,
             void *cookie,
             void (*callback)(void *cookie, struct key_params*))
{
        int ret;
        trace_rdev_get_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
                           mac_addr);
        ret = rdev->ops->get_key(&rdev->wiphy, netdev, link_id, key_index,
                                  pairwise, mac_addr, cookie, callback);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_del_key(struct cfg80211_registered_device *rdev,
                               struct net_device *netdev, int link_id,
                               u8 key_index, bool pairwise, const u8 *mac_addr)
{
        int ret;
        trace_rdev_del_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
                           mac_addr);
        ret = rdev->ops->del_key(&rdev->wiphy, netdev, link_id, key_index,
                                  pairwise, mac_addr);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_default_key(struct cfg80211_registered_device *rdev,
                     struct net_device *netdev, int link_id, u8 key_index,
                     bool unicast, bool multicast)
{
        int ret;
        trace_rdev_set_default_key(&rdev->wiphy, netdev, link_id, key_index,
                                   unicast, multicast);
        ret = rdev->ops->set_default_key(&rdev->wiphy, netdev, link_id,
                                          key_index, unicast, multicast);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev,
                          struct net_device *netdev, int link_id, u8 key_index)
{
        int ret;
        trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, link_id,
                                        key_index);
        ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, link_id,
                                               key_index);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev,
                            struct net_device *netdev, int link_id,
                            u8 key_index)
{
        int ret;

        trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, link_id,
                                          key_index);
        ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, link_id,
                                                 key_index);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_start_ap(struct cfg80211_registered_device *rdev,
                                struct net_device *dev,
                                struct cfg80211_ap_settings *settings)
{
        int ret;
        trace_rdev_start_ap(&rdev->wiphy, dev, settings);
        ret = rdev->ops->start_ap(&rdev->wiphy, dev, settings);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev,
                                     struct net_device *dev,
                                     struct cfg80211_ap_update *info)
{
        int ret;
        trace_rdev_change_beacon(&rdev->wiphy, dev, info);
        ret = rdev->ops->change_beacon(&rdev->wiphy, dev, info);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev,
                               struct net_device *dev, unsigned int link_id)
{
        int ret;
        trace_rdev_stop_ap(&rdev->wiphy, dev, link_id);
        ret = rdev->ops->stop_ap(&rdev->wiphy, dev, link_id);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_add_station(struct cfg80211_registered_device *rdev,
                                   struct net_device *dev, u8 *mac,
                                   struct station_parameters *params)
{
        int ret;
        trace_rdev_add_station(&rdev->wiphy, dev, mac, params);
        ret = rdev->ops->add_station(&rdev->wiphy, dev, mac, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_del_station(struct cfg80211_registered_device *rdev,
                                   struct net_device *dev,
                                   struct station_del_parameters *params)
{
        int ret;
        trace_rdev_del_station(&rdev->wiphy, dev, params);
        ret = rdev->ops->del_station(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_change_station(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev, u8 *mac,
                                      struct station_parameters *params)
{
        int ret;
        trace_rdev_change_station(&rdev->wiphy, dev, mac, params);
        ret = rdev->ops->change_station(&rdev->wiphy, dev, mac, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_get_station(struct cfg80211_registered_device *rdev,
                                   struct net_device *dev, const u8 *mac,
                                   struct station_info *sinfo)
{
        int ret;
        trace_rdev_get_station(&rdev->wiphy, dev, mac);
        ret = rdev->ops->get_station(&rdev->wiphy, dev, mac, sinfo);
        trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo);
        return ret;
}

static inline int rdev_dump_station(struct cfg80211_registered_device *rdev,
                                    struct net_device *dev, int idx, u8 *mac,
                                    struct station_info *sinfo)
{
        int ret;
        trace_rdev_dump_station(&rdev->wiphy, dev, idx, mac);
        ret = rdev->ops->dump_station(&rdev->wiphy, dev, idx, mac, sinfo);
        trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo);
        return ret;
}

static inline int rdev_add_mpath(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev, u8 *dst, u8 *next_hop)
{
        int ret;
        trace_rdev_add_mpath(&rdev->wiphy, dev, dst, next_hop);
        ret = rdev->ops->add_mpath(&rdev->wiphy, dev, dst, next_hop);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_del_mpath(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev, u8 *dst)
{
        int ret;
        trace_rdev_del_mpath(&rdev->wiphy, dev, dst);
        ret = rdev->ops->del_mpath(&rdev->wiphy, dev, dst);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_change_mpath(struct cfg80211_registered_device *rdev,
                                    struct net_device *dev, u8 *dst,
                                    u8 *next_hop)
{
        int ret;
        trace_rdev_change_mpath(&rdev->wiphy, dev, dst, next_hop);
        ret = rdev->ops->change_mpath(&rdev->wiphy, dev, dst, next_hop);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_get_mpath(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev, u8 *dst, u8 *next_hop,
                                 struct mpath_info *pinfo)
{
        int ret;
        trace_rdev_get_mpath(&rdev->wiphy, dev, dst, next_hop);
        ret = rdev->ops->get_mpath(&rdev->wiphy, dev, dst, next_hop, pinfo);
        trace_rdev_return_int_mpath_info(&rdev->wiphy, ret, pinfo);
        return ret;

}

static inline int rdev_get_mpp(struct cfg80211_registered_device *rdev,
                               struct net_device *dev, u8 *dst, u8 *mpp,
                               struct mpath_info *pinfo)
{
        int ret;

        trace_rdev_get_mpp(&rdev->wiphy, dev, dst, mpp);
        ret = rdev->ops->get_mpp(&rdev->wiphy, dev, dst, mpp, pinfo);
        trace_rdev_return_int_mpath_info(&rdev->wiphy, ret, pinfo);
        return ret;
}

static inline int rdev_dump_mpath(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev, int idx, u8 *dst,
                                  u8 *next_hop, struct mpath_info *pinfo)

{
        int ret;
        trace_rdev_dump_mpath(&rdev->wiphy, dev, idx, dst, next_hop);
        ret = rdev->ops->dump_mpath(&rdev->wiphy, dev, idx, dst, next_hop,
                                    pinfo);
        trace_rdev_return_int_mpath_info(&rdev->wiphy, ret, pinfo);
        return ret;
}

static inline int rdev_dump_mpp(struct cfg80211_registered_device *rdev,
                                struct net_device *dev, int idx, u8 *dst,
                                u8 *mpp, struct mpath_info *pinfo)

{
        int ret;

        trace_rdev_dump_mpp(&rdev->wiphy, dev, idx, dst, mpp);
        ret = rdev->ops->dump_mpp(&rdev->wiphy, dev, idx, dst, mpp, pinfo);
        trace_rdev_return_int_mpath_info(&rdev->wiphy, ret, pinfo);
        return ret;
}

static inline int
rdev_get_mesh_config(struct cfg80211_registered_device *rdev,
                     struct net_device *dev, struct mesh_config *conf)
{
        int ret;
        trace_rdev_get_mesh_config(&rdev->wiphy, dev);
        ret = rdev->ops->get_mesh_config(&rdev->wiphy, dev, conf);
        trace_rdev_return_int_mesh_config(&rdev->wiphy, ret, conf);
        return ret;
}

static inline int
rdev_update_mesh_config(struct cfg80211_registered_device *rdev,
                        struct net_device *dev, u32 mask,
                        const struct mesh_config *nconf)
{
        int ret;
        trace_rdev_update_mesh_config(&rdev->wiphy, dev, mask, nconf);
        ret = rdev->ops->update_mesh_config(&rdev->wiphy, dev, mask, nconf);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_join_mesh(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev,
                                 const struct mesh_config *conf,
                                 const struct mesh_setup *setup)
{
        int ret;
        trace_rdev_join_mesh(&rdev->wiphy, dev, conf, setup);
        ret = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}


static inline int rdev_leave_mesh(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev)
{
        int ret;
        trace_rdev_leave_mesh(&rdev->wiphy, dev);
        ret = rdev->ops->leave_mesh(&rdev->wiphy, dev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_join_ocb(struct cfg80211_registered_device *rdev,
                                struct net_device *dev,
                                struct ocb_setup *setup)
{
        int ret;
        trace_rdev_join_ocb(&rdev->wiphy, dev, setup);
        ret = rdev->ops->join_ocb(&rdev->wiphy, dev, setup);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_leave_ocb(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev)
{
        int ret;
        trace_rdev_leave_ocb(&rdev->wiphy, dev);
        ret = rdev->ops->leave_ocb(&rdev->wiphy, dev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_change_bss(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev,
                                  struct bss_parameters *params)

{
        int ret;
        trace_rdev_change_bss(&rdev->wiphy, dev, params);
        ret = rdev->ops->change_bss(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_inform_bss(struct cfg80211_registered_device *rdev,
                                   struct cfg80211_bss *bss,
                                   const struct cfg80211_bss_ies *ies,
                                   void *drv_data)

{
        trace_rdev_inform_bss(&rdev->wiphy, bss);
        if (rdev->ops->inform_bss)
                rdev->ops->inform_bss(&rdev->wiphy, bss, ies, drv_data);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_set_txq_params(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev,
                                      struct ieee80211_txq_params *params)

{
        int ret;
        trace_rdev_set_txq_params(&rdev->wiphy, dev, params);
        ret = rdev->ops->set_txq_params(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_libertas_set_mesh_channel(struct cfg80211_registered_device *rdev,
                               struct net_device *dev,
                               struct ieee80211_channel *chan)
{
        int ret;
        trace_rdev_libertas_set_mesh_channel(&rdev->wiphy, dev, chan);
        ret = rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, dev, chan);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_monitor_channel(struct cfg80211_registered_device *rdev,
                         struct cfg80211_chan_def *chandef)
{
        int ret;
        trace_rdev_set_monitor_channel(&rdev->wiphy, chandef);
        ret = rdev->ops->set_monitor_channel(&rdev->wiphy, chandef);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_scan(struct cfg80211_registered_device *rdev,
                            struct cfg80211_scan_request *request)
{
        int ret;
        trace_rdev_scan(&rdev->wiphy, request);
        ret = rdev->ops->scan(&rdev->wiphy, request);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_abort_scan(struct cfg80211_registered_device *rdev,
                                   struct wireless_dev *wdev)
{
        trace_rdev_abort_scan(&rdev->wiphy, wdev);
        rdev->ops->abort_scan(&rdev->wiphy, wdev);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_auth(struct cfg80211_registered_device *rdev,
                            struct net_device *dev,
                            struct cfg80211_auth_request *req)
{
        int ret;
        trace_rdev_auth(&rdev->wiphy, dev, req);
        ret = rdev->ops->auth(&rdev->wiphy, dev, req);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_assoc(struct cfg80211_registered_device *rdev,
                             struct net_device *dev,
                             struct cfg80211_assoc_request *req)
{
        int ret;

        trace_rdev_assoc(&rdev->wiphy, dev, req);
        ret = rdev->ops->assoc(&rdev->wiphy, dev, req);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_deauth(struct cfg80211_registered_device *rdev,
                              struct net_device *dev,
                              struct cfg80211_deauth_request *req)
{
        int ret;
        trace_rdev_deauth(&rdev->wiphy, dev, req);
        ret = rdev->ops->deauth(&rdev->wiphy, dev, req);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_disassoc(struct cfg80211_registered_device *rdev,
                                struct net_device *dev,
                                struct cfg80211_disassoc_request *req)
{
        int ret;
        trace_rdev_disassoc(&rdev->wiphy, dev, req);
        ret = rdev->ops->disassoc(&rdev->wiphy, dev, req);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_connect(struct cfg80211_registered_device *rdev,
                               struct net_device *dev,
                               struct cfg80211_connect_params *sme)
{
        int ret;
        trace_rdev_connect(&rdev->wiphy, dev, sme);
        ret = rdev->ops->connect(&rdev->wiphy, dev, sme);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_update_connect_params(struct cfg80211_registered_device *rdev,
                           struct net_device *dev,
                           struct cfg80211_connect_params *sme, u32 changed)
{
        int ret;
        trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed);
        ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_disconnect(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev, u16 reason_code)
{
        int ret;
        trace_rdev_disconnect(&rdev->wiphy, dev, reason_code);
        ret = rdev->ops->disconnect(&rdev->wiphy, dev, reason_code);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_join_ibss(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev,
                                 struct cfg80211_ibss_params *params)
{
        int ret;
        trace_rdev_join_ibss(&rdev->wiphy, dev, params);
        ret = rdev->ops->join_ibss(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_leave_ibss(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev)
{
        int ret;
        trace_rdev_leave_ibss(&rdev->wiphy, dev);
        ret = rdev->ops->leave_ibss(&rdev->wiphy, dev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed)
{
        int ret;

        if (!rdev->ops->set_wiphy_params)
                return -EOPNOTSUPP;

        trace_rdev_set_wiphy_params(&rdev->wiphy, changed);
        ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev,
                                    struct wireless_dev *wdev,
                                    enum nl80211_tx_power_setting type, int mbm)
{
        int ret;
        trace_rdev_set_tx_power(&rdev->wiphy, wdev, type, mbm);
        ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, type, mbm);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev,
                                    struct wireless_dev *wdev, int *dbm)
{
        int ret;
        trace_rdev_get_tx_power(&rdev->wiphy, wdev);
        ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, dbm);
        trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm);
        return ret;
}

static inline int
rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev,
                              struct net_device *dev,
                              const bool enabled)
{
        int ret;
        trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
        ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_get_txq_stats(struct cfg80211_registered_device *rdev,
                   struct wireless_dev *wdev,
                   struct cfg80211_txq_stats *txqstats)
{
        int ret;
        trace_rdev_get_txq_stats(&rdev->wiphy, wdev);
        ret = rdev->ops->get_txq_stats(&rdev->wiphy, wdev, txqstats);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
{
        trace_rdev_rfkill_poll(&rdev->wiphy);
        rdev->ops->rfkill_poll(&rdev->wiphy);
        trace_rdev_return_void(&rdev->wiphy);
}


#ifdef CONFIG_NL80211_TESTMODE
static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev,
                                    struct wireless_dev *wdev,
                                    void *data, int len)
{
        int ret;
        trace_rdev_testmode_cmd(&rdev->wiphy, wdev);
        ret = rdev->ops->testmode_cmd(&rdev->wiphy, wdev, data, len);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_testmode_dump(struct cfg80211_registered_device *rdev,
                                     struct sk_buff *skb,
                                     struct netlink_callback *cb, void *data,
                                     int len)
{
        int ret;
        trace_rdev_testmode_dump(&rdev->wiphy);
        ret = rdev->ops->testmode_dump(&rdev->wiphy, skb, cb, data, len);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}
#endif

static inline int
rdev_set_bitrate_mask(struct cfg80211_registered_device *rdev,
                      struct net_device *dev, unsigned int link_id,
                      const u8 *peer,
                      const struct cfg80211_bitrate_mask *mask)
{
        int ret;
        trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, link_id, peer, mask);
        ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, link_id,
                                          peer, mask);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_dump_survey(struct cfg80211_registered_device *rdev,
                                   struct net_device *netdev, int idx,
                                   struct survey_info *info)
{
        int ret;
        trace_rdev_dump_survey(&rdev->wiphy, netdev, idx);
        ret = rdev->ops->dump_survey(&rdev->wiphy, netdev, idx, info);
        if (ret < 0)
                trace_rdev_return_int(&rdev->wiphy, ret);
        else
                trace_rdev_return_int_survey_info(&rdev->wiphy, ret, info);
        return ret;
}

static inline int rdev_set_pmksa(struct cfg80211_registered_device *rdev,
                                 struct net_device *netdev,
                                 struct cfg80211_pmksa *pmksa)
{
        int ret;
        trace_rdev_set_pmksa(&rdev->wiphy, netdev, pmksa);
        ret = rdev->ops->set_pmksa(&rdev->wiphy, netdev, pmksa);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_del_pmksa(struct cfg80211_registered_device *rdev,
                                 struct net_device *netdev,
                                 struct cfg80211_pmksa *pmksa)
{
        int ret;
        trace_rdev_del_pmksa(&rdev->wiphy, netdev, pmksa);
        ret = rdev->ops->del_pmksa(&rdev->wiphy, netdev, pmksa);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_flush_pmksa(struct cfg80211_registered_device *rdev,
                                   struct net_device *netdev)
{
        int ret;
        trace_rdev_flush_pmksa(&rdev->wiphy, netdev);
        ret = rdev->ops->flush_pmksa(&rdev->wiphy, netdev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_remain_on_channel(struct cfg80211_registered_device *rdev,
                       struct wireless_dev *wdev,
                       struct ieee80211_channel *chan,
                       unsigned int duration, u64 *cookie)
{
        int ret;
        trace_rdev_remain_on_channel(&rdev->wiphy, wdev, chan, duration);
        ret = rdev->ops->remain_on_channel(&rdev->wiphy, wdev, chan,
                                           duration, cookie);
        trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
        return ret;
}

static inline int
rdev_cancel_remain_on_channel(struct cfg80211_registered_device *rdev,
                              struct wireless_dev *wdev, u64 cookie)
{
        int ret;
        trace_rdev_cancel_remain_on_channel(&rdev->wiphy, wdev, cookie);
        ret = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, wdev, cookie);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev,
                               struct wireless_dev *wdev,
                               struct cfg80211_mgmt_tx_params *params,
                               u64 *cookie)
{
        int ret;
        trace_rdev_mgmt_tx(&rdev->wiphy, wdev, params);
        ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, params, cookie);
        trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
        return ret;
}

static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
                                       struct net_device *dev,
                                       const void *buf, size_t len,
                                       const u8 *dest, __be16 proto,
                                       const bool noencrypt, int link,
                                       u64 *cookie)
{
        int ret;
        trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
                                   dest, proto, noencrypt, link);
        ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
                                         dest, proto, noencrypt, link, cookie);
        if (cookie)
                trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
        else
                trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_mgmt_tx_cancel_wait(struct cfg80211_registered_device *rdev,
                         struct wireless_dev *wdev, u64 cookie)
{
        int ret;
        trace_rdev_mgmt_tx_cancel_wait(&rdev->wiphy, wdev, cookie);
        ret = rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, wdev, cookie);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_power_mgmt(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev, bool enabled,
                                      int timeout)
{
        int ret;
        trace_rdev_set_power_mgmt(&rdev->wiphy, dev, enabled, timeout);
        ret = rdev->ops->set_power_mgmt(&rdev->wiphy, dev, enabled, timeout);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_cqm_rssi_config(struct cfg80211_registered_device *rdev,
                         struct net_device *dev, s32 rssi_thold, u32 rssi_hyst)
{
        int ret;
        trace_rdev_set_cqm_rssi_config(&rdev->wiphy, dev, rssi_thold,
                                       rssi_hyst);
        ret = rdev->ops->set_cqm_rssi_config(&rdev->wiphy, dev, rssi_thold,
                                       rssi_hyst);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_cqm_rssi_range_config(struct cfg80211_registered_device *rdev,
                               struct net_device *dev, s32 low, s32 high)
{
        int ret;
        trace_rdev_set_cqm_rssi_range_config(&rdev->wiphy, dev, low, high);
        ret = rdev->ops->set_cqm_rssi_range_config(&rdev->wiphy, dev,
                                                   low, high);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev,
                        struct net_device *dev, u32 rate, u32 pkts, u32 intvl)
{
        int ret;
        trace_rdev_set_cqm_txe_config(&rdev->wiphy, dev, rate, pkts, intvl);
        ret = rdev->ops->set_cqm_txe_config(&rdev->wiphy, dev, rate, pkts,
                                             intvl);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void
rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev,
                                     struct wireless_dev *wdev,
                                     struct mgmt_frame_regs *upd)
{
        might_sleep();

        trace_rdev_update_mgmt_frame_registrations(&rdev->wiphy, wdev, upd);
        if (rdev->ops->update_mgmt_frame_registrations)
                rdev->ops->update_mgmt_frame_registrations(&rdev->wiphy, wdev,
                                                           upd);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_set_antenna(struct cfg80211_registered_device *rdev,
                                   u32 tx_ant, u32 rx_ant)
{
        int ret;
        trace_rdev_set_antenna(&rdev->wiphy, tx_ant, rx_ant);
        ret = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_get_antenna(struct cfg80211_registered_device *rdev,
                                   u32 *tx_ant, u32 *rx_ant)
{
        int ret;
        trace_rdev_get_antenna(&rdev->wiphy);
        ret = rdev->ops->get_antenna(&rdev->wiphy, tx_ant, rx_ant);
        if (ret)
                trace_rdev_return_int(&rdev->wiphy, ret);
        else
                trace_rdev_return_int_tx_rx(&rdev->wiphy, ret, *tx_ant,
                                            *rx_ant);
        return ret;
}

static inline int
rdev_sched_scan_start(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct cfg80211_sched_scan_request *request)
{
        int ret;
        trace_rdev_sched_scan_start(&rdev->wiphy, dev, request->reqid);
        ret = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_sched_scan_stop(struct cfg80211_registered_device *rdev,
                                       struct net_device *dev, u64 reqid)
{
        int ret;
        trace_rdev_sched_scan_stop(&rdev->wiphy, dev, reqid);
        ret = rdev->ops->sched_scan_stop(&rdev->wiphy, dev, reqid);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_rekey_data(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev,
                                      struct cfg80211_gtk_rekey_data *data)
{
        int ret;
        trace_rdev_set_rekey_data(&rdev->wiphy, dev);
        ret = rdev->ops->set_rekey_data(&rdev->wiphy, dev, data);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev, u8 *peer,
                                 int link_id, u8 action_code,
                                 u8 dialog_token, u16 status_code,
                                 u32 peer_capability, bool initiator,
                                 const u8 *buf, size_t len)
{
        int ret;
        trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, link_id, action_code,
                             dialog_token, status_code, peer_capability,
                             initiator, buf, len);
        ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, link_id,
                                   action_code, dialog_token, status_code,
                                   peer_capability, initiator, buf, len);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_tdls_oper(struct cfg80211_registered_device *rdev,
                                 struct net_device *dev, u8 *peer,
                                 enum nl80211_tdls_operation oper)
{
        int ret;
        trace_rdev_tdls_oper(&rdev->wiphy, dev, peer, oper);
        ret = rdev->ops->tdls_oper(&rdev->wiphy, dev, peer, oper);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_probe_client(struct cfg80211_registered_device *rdev,
                                    struct net_device *dev, const u8 *peer,
                                    u64 *cookie)
{
        int ret;
        trace_rdev_probe_client(&rdev->wiphy, dev, peer);
        ret = rdev->ops->probe_client(&rdev->wiphy, dev, peer, cookie);
        trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
        return ret;
}

static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev,
                                     struct net_device *dev, u16 noack_map)
{
        int ret;
        trace_rdev_set_noack_map(&rdev->wiphy, dev, noack_map);
        ret = rdev->ops->set_noack_map(&rdev->wiphy, dev, noack_map);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_get_channel(struct cfg80211_registered_device *rdev,
                 struct wireless_dev *wdev,
                 unsigned int link_id,
                 struct cfg80211_chan_def *chandef)
{
        int ret;

        trace_rdev_get_channel(&rdev->wiphy, wdev, link_id);
        ret = rdev->ops->get_channel(&rdev->wiphy, wdev, link_id, chandef);
        trace_rdev_return_chandef(&rdev->wiphy, ret, chandef);

        return ret;
}

static inline int rdev_start_p2p_device(struct cfg80211_registered_device *rdev,
                                        struct wireless_dev *wdev)
{
        int ret;

        trace_rdev_start_p2p_device(&rdev->wiphy, wdev);
        ret = rdev->ops->start_p2p_device(&rdev->wiphy, wdev);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
                                        struct wireless_dev *wdev)
{
        trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);
        rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_start_nan(struct cfg80211_registered_device *rdev,
                                 struct wireless_dev *wdev,
                                 struct cfg80211_nan_conf *conf)
{
        int ret;

        trace_rdev_start_nan(&rdev->wiphy, wdev, conf);
        ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev,
                                 struct wireless_dev *wdev)
{
        trace_rdev_stop_nan(&rdev->wiphy, wdev);
        rdev->ops->stop_nan(&rdev->wiphy, wdev);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int
rdev_add_nan_func(struct cfg80211_registered_device *rdev,
                  struct wireless_dev *wdev,
                  struct cfg80211_nan_func *nan_func)
{
        int ret;

        trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func);
        ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev,
                                    struct wireless_dev *wdev, u64 cookie)
{
        trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie);
        rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int
rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
                     struct wireless_dev *wdev,
                     struct cfg80211_nan_conf *conf, u32 changes)
{
        int ret;

        trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes);
        if (rdev->ops->nan_change_conf)
                ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
                                                 changes);
        else
                ret = -EOPNOTSUPP;
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
                                   struct net_device *dev,
                                   struct cfg80211_acl_data *params)
{
        int ret;

        trace_rdev_set_mac_acl(&rdev->wiphy, dev, params);
        ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
                                     struct net_device *dev,
                                     struct cfg80211_update_ft_ies_params *ftie)
{
        int ret;

        trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
        ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev,
                                        struct wireless_dev *wdev,
                                        enum nl80211_crit_proto_id protocol,
                                        u16 duration)
{
        int ret;

        trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration);
        ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev,
                                          protocol, duration);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
                                       struct wireless_dev *wdev)
{
        trace_rdev_crit_proto_stop(&rdev->wiphy, wdev);
        rdev->ops->crit_proto_stop(&rdev->wiphy, wdev);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev,
                                      struct cfg80211_csa_settings *params)
{
        int ret;

        trace_rdev_channel_switch(&rdev->wiphy, dev, params);
        ret = rdev->ops->channel_switch(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev,
                                   struct net_device *dev,
                                   struct cfg80211_qos_map *qos_map)
{
        int ret = -EOPNOTSUPP;

        if (rdev->ops->set_qos_map) {
                trace_rdev_set_qos_map(&rdev->wiphy, dev, qos_map);
                ret = rdev->ops->set_qos_map(&rdev->wiphy, dev, qos_map);
                trace_rdev_return_int(&rdev->wiphy, ret);
        }

        return ret;
}

static inline int
rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      unsigned int link_id,
                      struct cfg80211_chan_def *chandef)
{
        int ret;

        trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef);
        ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int
rdev_add_tx_ts(struct cfg80211_registered_device *rdev,
               struct net_device *dev, u8 tsid, const u8 *peer,
               u8 user_prio, u16 admitted_time)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer,
                             user_prio, admitted_time);
        if (rdev->ops->add_tx_ts)
                ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer,
                                           user_prio, admitted_time);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int
rdev_del_tx_ts(struct cfg80211_registered_device *rdev,
               struct net_device *dev, u8 tsid, const u8 *peer)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer);
        if (rdev->ops->del_tx_ts)
                ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int
rdev_tdls_channel_switch(struct cfg80211_registered_device *rdev,
                         struct net_device *dev, const u8 *addr,
                         u8 oper_class, struct cfg80211_chan_def *chandef)
{
        int ret;

        trace_rdev_tdls_channel_switch(&rdev->wiphy, dev, addr, oper_class,
                                       chandef);
        ret = rdev->ops->tdls_channel_switch(&rdev->wiphy, dev, addr,
                                             oper_class, chandef);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void
rdev_tdls_cancel_channel_switch(struct cfg80211_registered_device *rdev,
                                struct net_device *dev, const u8 *addr)
{
        trace_rdev_tdls_cancel_channel_switch(&rdev->wiphy, dev, addr);
        rdev->ops->tdls_cancel_channel_switch(&rdev->wiphy, dev, addr);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int
rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
                           struct net_device *dev,
                           struct cfg80211_chan_def *chandef,
                           u32 cac_time_ms)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
                                         cac_time_ms);
        if (rdev->ops->start_radar_detection)
                ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev,
                                                       chandef, cac_time_ms);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void
rdev_end_cac(struct cfg80211_registered_device *rdev,
             struct net_device *dev)
{
        trace_rdev_end_cac(&rdev->wiphy, dev);
        if (rdev->ops->end_cac)
                rdev->ops->end_cac(&rdev->wiphy, dev);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int
rdev_set_mcast_rate(struct cfg80211_registered_device *rdev,
                    struct net_device *dev,
                    int mcast_rate[NUM_NL80211_BANDS])
{
        int ret = -EOPNOTSUPP;

        trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
        if (rdev->ops->set_mcast_rate)
                ret = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_coalesce(struct cfg80211_registered_device *rdev,
                  struct cfg80211_coalesce *coalesce)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_set_coalesce(&rdev->wiphy, coalesce);
        if (rdev->ops->set_coalesce)
                ret = rdev->ops->set_coalesce(&rdev->wiphy, coalesce);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_pmk(struct cfg80211_registered_device *rdev,
                               struct net_device *dev,
                               struct cfg80211_pmk_conf *pmk_conf)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_set_pmk(&rdev->wiphy, dev, pmk_conf);
        if (rdev->ops->set_pmk)
                ret = rdev->ops->set_pmk(&rdev->wiphy, dev, pmk_conf);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
                               struct net_device *dev, const u8 *aa)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_del_pmk(&rdev->wiphy, dev, aa);
        if (rdev->ops->del_pmk)
                ret = rdev->ops->del_pmk(&rdev->wiphy, dev, aa);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_external_auth(struct cfg80211_registered_device *rdev,
                   struct net_device *dev,
                   struct cfg80211_external_auth_params *params)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_external_auth(&rdev->wiphy, dev, params);
        if (rdev->ops->external_auth)
                ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_get_ftm_responder_stats(struct cfg80211_registered_device *rdev,
                             struct net_device *dev,
                             struct cfg80211_ftm_responder_stats *ftm_stats)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_get_ftm_responder_stats(&rdev->wiphy, dev, ftm_stats);
        if (rdev->ops->get_ftm_responder_stats)
                ret = rdev->ops->get_ftm_responder_stats(&rdev->wiphy, dev,
                                                        ftm_stats);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_start_pmsr(struct cfg80211_registered_device *rdev,
                struct wireless_dev *wdev,
                struct cfg80211_pmsr_request *request)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_start_pmsr(&rdev->wiphy, wdev, request->cookie);
        if (rdev->ops->start_pmsr)
                ret = rdev->ops->start_pmsr(&rdev->wiphy, wdev, request);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline void
rdev_abort_pmsr(struct cfg80211_registered_device *rdev,
                struct wireless_dev *wdev,
                struct cfg80211_pmsr_request *request)
{
        trace_rdev_abort_pmsr(&rdev->wiphy, wdev, request->cookie);
        if (rdev->ops->abort_pmsr)
                rdev->ops->abort_pmsr(&rdev->wiphy, wdev, request);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int rdev_update_owe_info(struct cfg80211_registered_device *rdev,
                                       struct net_device *dev,
                                       struct cfg80211_update_owe_info *oweinfo)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_update_owe_info(&rdev->wiphy, dev, oweinfo);
        if (rdev->ops->update_owe_info)
                ret = rdev->ops->update_owe_info(&rdev->wiphy, dev, oweinfo);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_probe_mesh_link(struct cfg80211_registered_device *rdev,
                     struct net_device *dev, const u8 *dest,
                     const void *buf, size_t len)
{
        int ret;

        trace_rdev_probe_mesh_link(&rdev->wiphy, dev, dest, buf, len);
        ret = rdev->ops->probe_mesh_link(&rdev->wiphy, dev, buf, len);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev,
                                      struct net_device *dev,
                                      struct cfg80211_tid_config *tid_conf)
{
        int ret;

        trace_rdev_set_tid_config(&rdev->wiphy, dev, tid_conf);
        ret = rdev->ops->set_tid_config(&rdev->wiphy, dev, tid_conf);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev,
                                        struct net_device *dev, const u8 *peer,
                                        u8 tids)
{
        int ret;

        trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tids);
        ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tids);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev,
                                     struct cfg80211_sar_specs *sar)
{
        int ret;

        trace_rdev_set_sar_specs(&rdev->wiphy, sar);
        ret = rdev->ops->set_sar_specs(&rdev->wiphy, sar);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int rdev_color_change(struct cfg80211_registered_device *rdev,
                                    struct net_device *dev,
                                    struct cfg80211_color_change_settings *params)
{
        int ret;

        trace_rdev_color_change(&rdev->wiphy, dev, params);
        ret = rdev->ops->color_change(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int
rdev_set_fils_aad(struct cfg80211_registered_device *rdev,
                  struct net_device *dev, struct cfg80211_fils_aad *fils_aad)
{
        int ret = -EOPNOTSUPP;

        trace_rdev_set_fils_aad(&rdev->wiphy, dev, fils_aad);
        if (rdev->ops->set_fils_aad)
                ret = rdev->ops->set_fils_aad(&rdev->wiphy, dev, fils_aad);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline int
rdev_set_radar_background(struct cfg80211_registered_device *rdev,
                          struct cfg80211_chan_def *chandef)
{
        struct wiphy *wiphy = &rdev->wiphy;
        int ret;

        if (!rdev->ops->set_radar_background)
                return -EOPNOTSUPP;

        trace_rdev_set_radar_background(wiphy, chandef);
        ret = rdev->ops->set_radar_background(wiphy, chandef);
        trace_rdev_return_int(wiphy, ret);

        return ret;
}

static inline int
rdev_add_intf_link(struct cfg80211_registered_device *rdev,
                   struct wireless_dev *wdev,
                   unsigned int link_id)
{
        int ret = 0;

        trace_rdev_add_intf_link(&rdev->wiphy, wdev, link_id);
        if (rdev->ops->add_intf_link)
                ret = rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id);
        trace_rdev_return_int(&rdev->wiphy, ret);

        return ret;
}

static inline void
rdev_del_intf_link(struct cfg80211_registered_device *rdev,
                   struct wireless_dev *wdev,
                   unsigned int link_id)
{
        trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id);
        if (rdev->ops->del_intf_link)
                rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id);
        trace_rdev_return_void(&rdev->wiphy);
}

static inline int
rdev_add_link_station(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct link_station_parameters *params)
{
        int ret;

        if (!rdev->ops->add_link_station)
                return -EOPNOTSUPP;

        trace_rdev_add_link_station(&rdev->wiphy, dev, params);
        ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_mod_link_station(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct link_station_parameters *params)
{
        int ret;

        if (!rdev->ops->mod_link_station)
                return -EOPNOTSUPP;

        trace_rdev_mod_link_station(&rdev->wiphy, dev, params);
        ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_del_link_station(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct link_station_del_parameters *params)
{
        int ret;

        if (!rdev->ops->del_link_station)
                return -EOPNOTSUPP;

        trace_rdev_del_link_station(&rdev->wiphy, dev, params);
        ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params);
        trace_rdev_return_int(&rdev->wiphy, ret);
        return ret;
}

static inline int
rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev,
                      struct net_device *dev,
                      struct cfg80211_set_hw_timestamp *hwts)
{
        struct wiphy *wiphy = &rdev->wiphy;
        int ret;

        if (!rdev->ops->set_hw_timestamp)
                return -EOPNOTSUPP;

        trace_rdev_set_hw_timestamp(wiphy, dev, hwts);
        ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts);
        trace_rdev_return_int(wiphy, ret);

        return ret;
}

static inline int
rdev_set_ttlm(struct cfg80211_registered_device *rdev,
              struct net_device *dev,
              struct cfg80211_ttlm_params *params)
{
        struct wiphy *wiphy = &rdev->wiphy;
        int ret;

        if (!rdev->ops->set_ttlm)
                return -EOPNOTSUPP;

        trace_rdev_set_ttlm(wiphy, dev, params);
        ret = rdev->ops->set_ttlm(wiphy, dev, params);
        trace_rdev_return_int(wiphy, ret);

        return ret;
}
#endif /* __CFG80211_RDEV_OPS */





























































   11 












   12 

   11 

   11 
   12 
   12 
   12 









































































































































































































    1 





    1 




    1 
    1 


















































































































































































































































   53 
   53 
    4 
    4 
    4 
    4 
    4 





   53 







   53 


   53 



   52 
   53 


   53 






   53 
   53 
   36 
   52 

   36 
   53 
   53 
   53 

   53 
   53 


   52 
   53 




    4 
































































































































































































































































































































































































































   53 









   53 



   53 






   53 



   53 





















    1 




    1 
    1 


































































































































































































































































































































































































































































































































































































































































































































































































































   11 















   10 



   10 




























































































    1 






















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
// SPDX-License-Identifier: GPL-2.0-only
/*
 *        fs/libfs.c
 *        Library for filesystems writers.
 */

#include <linux/blkdev.h>
#include <linux/export.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/mount.h>
#include <linux/vfs.h>
#include <linux/quotaops.h>
#include <linux/mutex.h>
#include <linux/namei.h>
#include <linux/exportfs.h>
#include <linux/iversion.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* sync_mapping_buffers */
#include <linux/fs_context.h>
#include <linux/pseudo_fs.h>
#include <linux/fsnotify.h>
#include <linux/unicode.h>
#include <linux/fscrypt.h>
#include <linux/pidfs.h>

#include <linux/uaccess.h>

#include "internal.h"

int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
                   struct kstat *stat, u32 request_mask,
                   unsigned int query_flags)
{
        struct inode *inode = d_inode(path->dentry);
        generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
        return 0;
}
EXPORT_SYMBOL(simple_getattr);

int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
{
        u64 id = huge_encode_dev(dentry->d_sb->s_dev);

        buf->f_fsid = u64_to_fsid(id);
        buf->f_type = dentry->d_sb->s_magic;
        buf->f_bsize = PAGE_SIZE;
        buf->f_namelen = NAME_MAX;
        return 0;
}
EXPORT_SYMBOL(simple_statfs);

/*
 * Retaining negative dentries for an in-memory filesystem just wastes
 * memory and lookup time: arrange for them to be deleted immediately.
 */
int always_delete_dentry(const struct dentry *dentry)
{
        return 1;
}
EXPORT_SYMBOL(always_delete_dentry);

const struct dentry_operations simple_dentry_operations = {
        .d_delete = always_delete_dentry,
};
EXPORT_SYMBOL(simple_dentry_operations);

/*
 * Lookup the data. This is trivial - if the dentry didn't already
 * exist, we know it is negative.  Set d_op to delete negative dentries.
 */
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
        if (!dentry->d_sb->s_d_op)
                d_set_d_op(dentry, &simple_dentry_operations);
        d_add(dentry, NULL);
        return NULL;
}
EXPORT_SYMBOL(simple_lookup);

int dcache_dir_open(struct inode *inode, struct file *file)
{
        file->private_data = d_alloc_cursor(file->f_path.dentry);

        return file->private_data ? 0 : -ENOMEM;
}
EXPORT_SYMBOL(dcache_dir_open);

int dcache_dir_close(struct inode *inode, struct file *file)
{
        dput(file->private_data);
        return 0;
}
EXPORT_SYMBOL(dcache_dir_close);

/* parent is locked at least shared */
/*
 * Returns an element of siblings' list.
 * We are looking for <count>th positive after <p>; if
 * found, dentry is grabbed and returned to caller.
 * If no such element exists, NULL is returned.
 */
static struct dentry *scan_positives(struct dentry *cursor,
                                        struct hlist_node **p,
                                        loff_t count,
                                        struct dentry *last)
{
        struct dentry *dentry = cursor->d_parent, *found = NULL;

        spin_lock(&dentry->d_lock);
        while (*p) {
                struct dentry *d = hlist_entry(*p, struct dentry, d_sib);
                p = &d->d_sib.next;
                // we must at least skip cursors, to avoid livelocks
                if (d->d_flags & DCACHE_DENTRY_CURSOR)
                        continue;
                if (simple_positive(d) && !--count) {
                        spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
                        if (simple_positive(d))
                                found = dget_dlock(d);
                        spin_unlock(&d->d_lock);
                        if (likely(found))
                                break;
                        count = 1;
                }
                if (need_resched()) {
                        if (!hlist_unhashed(&cursor->d_sib))
                                __hlist_del(&cursor->d_sib);
                        hlist_add_behind(&cursor->d_sib, &d->d_sib);
                        p = &cursor->d_sib.next;
                        spin_unlock(&dentry->d_lock);
                        cond_resched();
                        spin_lock(&dentry->d_lock);
                }
        }
        spin_unlock(&dentry->d_lock);
        dput(last);
        return found;
}

loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
        struct dentry *dentry = file->f_path.dentry;
        switch (whence) {
                case 1:
                        offset += file->f_pos;
                        fallthrough;
                case 0:
                        if (offset >= 0)
                                break;
                        fallthrough;
                default:
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
                struct dentry *cursor = file->private_data;
                struct dentry *to = NULL;

                inode_lock_shared(dentry->d_inode);

                if (offset > 2)
                        to = scan_positives(cursor, &dentry->d_children.first,
                                            offset - 2, NULL);
                spin_lock(&dentry->d_lock);
                hlist_del_init(&cursor->d_sib);
                if (to)
                        hlist_add_behind(&cursor->d_sib, &to->d_sib);
                spin_unlock(&dentry->d_lock);
                dput(to);

                file->f_pos = offset;

                inode_unlock_shared(dentry->d_inode);
        }
        return offset;
}
EXPORT_SYMBOL(dcache_dir_lseek);

/*
 * Directory is locked and all positive dentries in it are safe, since
 * for ramfs-type trees they can't go away without unlink() or rmdir(),
 * both impossible due to the lock on directory.
 */

int dcache_readdir(struct file *file, struct dir_context *ctx)
{
        struct dentry *dentry = file->f_path.dentry;
        struct dentry *cursor = file->private_data;
        struct dentry *next = NULL;
        struct hlist_node **p;

        if (!dir_emit_dots(file, ctx))
                return 0;

        if (ctx->pos == 2)
                p = &dentry->d_children.first;
        else
                p = &cursor->d_sib.next;

        while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
                if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
                              d_inode(next)->i_ino,
                              fs_umode_to_dtype(d_inode(next)->i_mode)))
                        break;
                ctx->pos++;
                p = &next->d_sib.next;
        }
        spin_lock(&dentry->d_lock);
        hlist_del_init(&cursor->d_sib);
        if (next)
                hlist_add_before(&cursor->d_sib, &next->d_sib);
        spin_unlock(&dentry->d_lock);
        dput(next);

        return 0;
}
EXPORT_SYMBOL(dcache_readdir);

ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
{
        return -EISDIR;
}
EXPORT_SYMBOL(generic_read_dir);

const struct file_operations simple_dir_operations = {
        .open                = dcache_dir_open,
        .release        = dcache_dir_close,
        .llseek                = dcache_dir_lseek,
        .read                = generic_read_dir,
        .iterate_shared        = dcache_readdir,
        .fsync                = noop_fsync,
};
EXPORT_SYMBOL(simple_dir_operations);

const struct inode_operations simple_dir_inode_operations = {
        .lookup                = simple_lookup,
};
EXPORT_SYMBOL(simple_dir_inode_operations);

/* 0 is '.', 1 is '..', so always start with offset 2 or more */
enum {
        DIR_OFFSET_MIN        = 2,
};

static void offset_set(struct dentry *dentry, long offset)
{
        dentry->d_fsdata = (void *)offset;
}

static long dentry2offset(struct dentry *dentry)
{
        return (long)dentry->d_fsdata;
}

static struct lock_class_key simple_offset_lock_class;

/**
 * simple_offset_init - initialize an offset_ctx
 * @octx: directory offset map to be initialized
 *
 */
void simple_offset_init(struct offset_ctx *octx)
{
        mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
        lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
        octx->next_offset = DIR_OFFSET_MIN;
}

/**
 * simple_offset_add - Add an entry to a directory's offset map
 * @octx: directory offset ctx to be updated
 * @dentry: new dentry being added
 *
 * Returns zero on success. @octx and the dentry's offset are updated.
 * Otherwise, a negative errno value is returned.
 */
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
{
        unsigned long offset;
        int ret;

        if (dentry2offset(dentry) != 0)
                return -EBUSY;

        ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
                                 LONG_MAX, &octx->next_offset, GFP_KERNEL);
        if (ret < 0)
                return ret;

        offset_set(dentry, offset);
        return 0;
}

/**
 * simple_offset_remove - Remove an entry to a directory's offset map
 * @octx: directory offset ctx to be updated
 * @dentry: dentry being removed
 *
 */
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
{
        long offset;

        offset = dentry2offset(dentry);
        if (offset == 0)
                return;

        mtree_erase(&octx->mt, offset);
        offset_set(dentry, 0);
}

/**
 * simple_offset_empty - Check if a dentry can be unlinked
 * @dentry: dentry to be tested
 *
 * Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
 */
int simple_offset_empty(struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);
        struct offset_ctx *octx;
        struct dentry *child;
        unsigned long index;
        int ret = 1;

        if (!inode || !S_ISDIR(inode->i_mode))
                return ret;

        index = DIR_OFFSET_MIN;
        octx = inode->i_op->get_offset_ctx(inode);
        mt_for_each(&octx->mt, child, index, LONG_MAX) {
                spin_lock(&child->d_lock);
                if (simple_positive(child)) {
                        spin_unlock(&child->d_lock);
                        ret = 0;
                        break;
                }
                spin_unlock(&child->d_lock);
        }

        return ret;
}

/**
 * simple_offset_rename_exchange - exchange rename with directory offsets
 * @old_dir: parent of dentry being moved
 * @old_dentry: dentry being moved
 * @new_dir: destination parent
 * @new_dentry: destination dentry
 *
 * Returns zero on success. Otherwise a negative errno is returned and the
 * rename is rolled back.
 */
int simple_offset_rename_exchange(struct inode *old_dir,
                                  struct dentry *old_dentry,
                                  struct inode *new_dir,
                                  struct dentry *new_dentry)
{
        struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
        struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
        long old_index = dentry2offset(old_dentry);
        long new_index = dentry2offset(new_dentry);
        int ret;

        simple_offset_remove(old_ctx, old_dentry);
        simple_offset_remove(new_ctx, new_dentry);

        ret = simple_offset_add(new_ctx, old_dentry);
        if (ret)
                goto out_restore;

        ret = simple_offset_add(old_ctx, new_dentry);
        if (ret) {
                simple_offset_remove(new_ctx, old_dentry);
                goto out_restore;
        }

        ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
        if (ret) {
                simple_offset_remove(new_ctx, old_dentry);
                simple_offset_remove(old_ctx, new_dentry);
                goto out_restore;
        }
        return 0;

out_restore:
        offset_set(old_dentry, old_index);
        mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL);
        offset_set(new_dentry, new_index);
        mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
        return ret;
}

/**
 * simple_offset_destroy - Release offset map
 * @octx: directory offset ctx that is about to be destroyed
 *
 * During fs teardown (eg. umount), a directory's offset map might still
 * contain entries. xa_destroy() cleans out anything that remains.
 */
void simple_offset_destroy(struct offset_ctx *octx)
{
        mtree_destroy(&octx->mt);
}

/**
 * offset_dir_llseek - Advance the read position of a directory descriptor
 * @file: an open directory whose position is to be updated
 * @offset: a byte offset
 * @whence: enumerator describing the starting position for this update
 *
 * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
 *
 * Returns the updated read position if successful; otherwise a
 * negative errno is returned and the read position remains unchanged.
 */
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
        switch (whence) {
        case SEEK_CUR:
                offset += file->f_pos;
                fallthrough;
        case SEEK_SET:
                if (offset >= 0)
                        break;
                fallthrough;
        default:
                return -EINVAL;
        }

        /* In this case, ->private_data is protected by f_pos_lock */
        file->private_data = NULL;
        return vfs_setpos(file, offset, LONG_MAX);
}

static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
{
        MA_STATE(mas, &octx->mt, offset, offset);
        struct dentry *child, *found = NULL;

        rcu_read_lock();
        child = mas_find(&mas, LONG_MAX);
        if (!child)
                goto out;
        spin_lock(&child->d_lock);
        if (simple_positive(child))
                found = dget_dlock(child);
        spin_unlock(&child->d_lock);
out:
        rcu_read_unlock();
        return found;
}

static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);
        long offset = dentry2offset(dentry);

        return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
                          inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}

static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
{
        struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
        struct dentry *dentry;

        while (true) {
                dentry = offset_find_next(octx, ctx->pos);
                if (!dentry)
                        return ERR_PTR(-ENOENT);

                if (!offset_dir_emit(ctx, dentry)) {
                        dput(dentry);
                        break;
                }

                ctx->pos = dentry2offset(dentry) + 1;
                dput(dentry);
        }
        return NULL;
}

/**
 * offset_readdir - Emit entries starting at offset @ctx->pos
 * @file: an open directory to iterate over
 * @ctx: directory iteration context
 *
 * Caller must hold @file's i_rwsem to prevent insertion or removal of
 * entries during this call.
 *
 * On entry, @ctx->pos contains an offset that represents the first entry
 * to be read from the directory.
 *
 * The operation continues until there are no more entries to read, or
 * until the ctx->actor indicates there is no more space in the caller's
 * output buffer.
 *
 * On return, @ctx->pos contains an offset that will read the next entry
 * in this directory when offset_readdir() is called again with @ctx.
 *
 * Return values:
 *   %0 - Complete
 */
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
        struct dentry *dir = file->f_path.dentry;

        lockdep_assert_held(&d_inode(dir)->i_rwsem);

        if (!dir_emit_dots(file, ctx))
                return 0;

        /* In this case, ->private_data is protected by f_pos_lock */
        if (ctx->pos == DIR_OFFSET_MIN)
                file->private_data = NULL;
        else if (file->private_data == ERR_PTR(-ENOENT))
                return 0;
        file->private_data = offset_iterate_dir(d_inode(dir), ctx);
        return 0;
}

const struct file_operations simple_offset_dir_operations = {
        .llseek                = offset_dir_llseek,
        .iterate_shared        = offset_readdir,
        .read                = generic_read_dir,
        .fsync                = noop_fsync,
};

static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{
        struct dentry *child = NULL, *d;

        spin_lock(&parent->d_lock);
        d = prev ? d_next_sibling(prev) : d_first_child(parent);
        hlist_for_each_entry_from(d, d_sib) {
                if (simple_positive(d)) {
                        spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
                        if (simple_positive(d))
                                child = dget_dlock(d);
                        spin_unlock(&d->d_lock);
                        if (likely(child))
                                break;
                }
        }
        spin_unlock(&parent->d_lock);
        dput(prev);
        return child;
}

void simple_recursive_removal(struct dentry *dentry,
                              void (*callback)(struct dentry *))
{
        struct dentry *this = dget(dentry);
        while (true) {
                struct dentry *victim = NULL, *child;
                struct inode *inode = this->d_inode;

                inode_lock(inode);
                if (d_is_dir(this))
                        inode->i_flags |= S_DEAD;
                while ((child = find_next_child(this, victim)) == NULL) {
                        // kill and ascend
                        // update metadata while it's still locked
                        inode_set_ctime_current(inode);
                        clear_nlink(inode);
                        inode_unlock(inode);
                        victim = this;
                        this = this->d_parent;
                        inode = this->d_inode;
                        inode_lock(inode);
                        if (simple_positive(victim)) {
                                d_invalidate(victim);        // avoid lost mounts
                                if (d_is_dir(victim))
                                        fsnotify_rmdir(inode, victim);
                                else
                                        fsnotify_unlink(inode, victim);
                                if (callback)
                                        callback(victim);
                                dput(victim);                // unpin it
                        }
                        if (victim == dentry) {
                                inode_set_mtime_to_ts(inode,
                                                      inode_set_ctime_current(inode));
                                if (d_is_dir(dentry))
                                        drop_nlink(inode);
                                inode_unlock(inode);
                                dput(dentry);
                                return;
                        }
                }
                inode_unlock(inode);
                this = child;
        }
}
EXPORT_SYMBOL(simple_recursive_removal);

static const struct super_operations simple_super_operations = {
        .statfs                = simple_statfs,
};

static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
{
        struct pseudo_fs_context *ctx = fc->fs_private;
        struct inode *root;

        s->s_maxbytes = MAX_LFS_FILESIZE;
        s->s_blocksize = PAGE_SIZE;
        s->s_blocksize_bits = PAGE_SHIFT;
        s->s_magic = ctx->magic;
        s->s_op = ctx->ops ?: &simple_super_operations;
        s->s_xattr = ctx->xattr;
        s->s_time_gran = 1;
        root = new_inode(s);
        if (!root)
                return -ENOMEM;

        /*
         * since this is the first inode, make it number 1. New inodes created
         * after this must take care not to collide with it (by passing
         * max_reserved of 1 to iunique).
         */
        root->i_ino = 1;
        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
        simple_inode_init_ts(root);
        s->s_root = d_make_root(root);
        if (!s->s_root)
                return -ENOMEM;
        s->s_d_op = ctx->dops;
        return 0;
}

static int pseudo_fs_get_tree(struct fs_context *fc)
{
        return get_tree_nodev(fc, pseudo_fs_fill_super);
}

static void pseudo_fs_free(struct fs_context *fc)
{
        kfree(fc->fs_private);
}

static const struct fs_context_operations pseudo_fs_context_ops = {
        .free                = pseudo_fs_free,
        .get_tree        = pseudo_fs_get_tree,
};

/*
 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
 * will never be mountable)
 */
struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
                                        unsigned long magic)
{
        struct pseudo_fs_context *ctx;

        ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
        if (likely(ctx)) {
                ctx->magic = magic;
                fc->fs_private = ctx;
                fc->ops = &pseudo_fs_context_ops;
                fc->sb_flags |= SB_NOUSER;
                fc->global = true;
        }
        return ctx;
}
EXPORT_SYMBOL(init_pseudo);

int simple_open(struct inode *inode, struct file *file)
{
        if (inode->i_private)
                file->private_data = inode->i_private;
        return 0;
}
EXPORT_SYMBOL(simple_open);

int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
        struct inode *inode = d_inode(old_dentry);

        inode_set_mtime_to_ts(dir,
                              inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
        inc_nlink(inode);
        ihold(inode);
        dget(dentry);
        d_instantiate(dentry, inode);
        return 0;
}
EXPORT_SYMBOL(simple_link);

int simple_empty(struct dentry *dentry)
{
        struct dentry *child;
        int ret = 0;

        spin_lock(&dentry->d_lock);
        hlist_for_each_entry(child, &dentry->d_children, d_sib) {
                spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
                if (simple_positive(child)) {
                        spin_unlock(&child->d_lock);
                        goto out;
                }
                spin_unlock(&child->d_lock);
        }
        ret = 1;
out:
        spin_unlock(&dentry->d_lock);
        return ret;
}
EXPORT_SYMBOL(simple_empty);

int simple_unlink(struct inode *dir, struct dentry *dentry)
{
        struct inode *inode = d_inode(dentry);

        inode_set_mtime_to_ts(dir,
                              inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
        drop_nlink(inode);
        dput(dentry);
        return 0;
}
EXPORT_SYMBOL(simple_unlink);

int simple_rmdir(struct inode *dir, struct dentry *dentry)
{
        if (!simple_empty(dentry))
                return -ENOTEMPTY;

        drop_nlink(d_inode(dentry));
        simple_unlink(dir, dentry);
        drop_nlink(dir);
        return 0;
}
EXPORT_SYMBOL(simple_rmdir);

/**
 * simple_rename_timestamp - update the various inode timestamps for rename
 * @old_dir: old parent directory
 * @old_dentry: dentry that is being renamed
 * @new_dir: new parent directory
 * @new_dentry: target for rename
 *
 * POSIX mandates that the old and new parent directories have their ctime and
 * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
 * their ctime updated.
 */
void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry)
{
        struct inode *newino = d_inode(new_dentry);

        inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir));
        if (new_dir != old_dir)
                inode_set_mtime_to_ts(new_dir,
                                      inode_set_ctime_current(new_dir));
        inode_set_ctime_current(d_inode(old_dentry));
        if (newino)
                inode_set_ctime_current(newino);
}
EXPORT_SYMBOL_GPL(simple_rename_timestamp);

int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry)
{
        bool old_is_dir = d_is_dir(old_dentry);
        bool new_is_dir = d_is_dir(new_dentry);

        if (old_dir != new_dir && old_is_dir != new_is_dir) {
                if (old_is_dir) {
                        drop_nlink(old_dir);
                        inc_nlink(new_dir);
                } else {
                        drop_nlink(new_dir);
                        inc_nlink(old_dir);
                }
        }
        simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
}
EXPORT_SYMBOL_GPL(simple_rename_exchange);

int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                  struct dentry *old_dentry, struct inode *new_dir,
                  struct dentry *new_dentry, unsigned int flags)
{
        int they_are_dirs = d_is_dir(old_dentry);

        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
                return -EINVAL;

        if (flags & RENAME_EXCHANGE)
                return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);

        if (!simple_empty(new_dentry))
                return -ENOTEMPTY;

        if (d_really_is_positive(new_dentry)) {
                simple_unlink(new_dir, new_dentry);
                if (they_are_dirs) {
                        drop_nlink(d_inode(new_dentry));
                        drop_nlink(old_dir);
                }
        } else if (they_are_dirs) {
                drop_nlink(old_dir);
                inc_nlink(new_dir);
        }

        simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
}
EXPORT_SYMBOL(simple_rename);

/**
 * simple_setattr - setattr for simple filesystem
 * @idmap: idmap of the target mount
 * @dentry: dentry
 * @iattr: iattr structure
 *
 * Returns 0 on success, -error on failure.
 *
 * simple_setattr is a simple ->setattr implementation without a proper
 * implementation of size changes.
 *
 * It can either be used for in-memory filesystems or special files
 * on simple regular filesystems.  Anything that needs to change on-disk
 * or wire state on size changes needs its own setattr method.
 */
int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                   struct iattr *iattr)
{
        struct inode *inode = d_inode(dentry);
        int error;

        error = setattr_prepare(idmap, dentry, iattr);
        if (error)
                return error;

        if (iattr->ia_valid & ATTR_SIZE)
                truncate_setsize(inode, iattr->ia_size);
        setattr_copy(idmap, inode, iattr);
        mark_inode_dirty(inode);
        return 0;
}
EXPORT_SYMBOL(simple_setattr);

static int simple_read_folio(struct file *file, struct folio *folio)
{
        folio_zero_range(folio, 0, folio_size(folio));
        flush_dcache_folio(folio);
        folio_mark_uptodate(folio);
        folio_unlock(folio);
        return 0;
}

int simple_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata)
{
        struct folio *folio;

        folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
                        mapping_gfp_mask(mapping));
        if (IS_ERR(folio))
                return PTR_ERR(folio);

        *pagep = &folio->page;

        if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
                size_t from = offset_in_folio(folio, pos);

                folio_zero_segments(folio, 0, from,
                                from + len, folio_size(folio));
        }
        return 0;
}
EXPORT_SYMBOL(simple_write_begin);

/**
 * simple_write_end - .write_end helper for non-block-device FSes
 * @file: See .write_end of address_space_operations
 * @mapping:                 "
 * @pos:                 "
 * @len:                 "
 * @copied:                 "
 * @page:                 "
 * @fsdata:                 "
 *
 * simple_write_end does the minimum needed for updating a page after writing is
 * done. It has the same API signature as the .write_end of
 * address_space_operations vector. So it can just be set onto .write_end for
 * FSes that don't need any other processing. i_mutex is assumed to be held.
 * Block based filesystems should use generic_write_end().
 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
 * is not called, so a filesystem that actually does store data in .write_inode
 * should extend on what's done here with a call to mark_inode_dirty() in the
 * case that i_size has changed.
 *
 * Use *ONLY* with simple_read_folio()
 */
static int simple_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
{
        struct folio *folio = page_folio(page);
        struct inode *inode = folio->mapping->host;
        loff_t last_pos = pos + copied;

        /* zero the stale part of the folio if we did a short copy */
        if (!folio_test_uptodate(folio)) {
                if (copied < len) {
                        size_t from = offset_in_folio(folio, pos);

                        folio_zero_range(folio, from + copied, len - copied);
                }
                folio_mark_uptodate(folio);
        }
        /*
         * No need to use i_size_read() here, the i_size
         * cannot change under us because we hold the i_mutex.
         */
        if (last_pos > inode->i_size)
                i_size_write(inode, last_pos);

        folio_mark_dirty(folio);
        folio_unlock(folio);
        folio_put(folio);

        return copied;
}

/*
 * Provides ramfs-style behavior: data in the pagecache, but no writeback.
 */
const struct address_space_operations ram_aops = {
        .read_folio        = simple_read_folio,
        .write_begin        = simple_write_begin,
        .write_end        = simple_write_end,
        .dirty_folio        = noop_dirty_folio,
};
EXPORT_SYMBOL(ram_aops);

/*
 * the inodes created here are not hashed. If you use iunique to generate
 * unique inode values later for this filesystem, then you must take care
 * to pass it an appropriate max_reserved value to avoid collisions.
 */
int simple_fill_super(struct super_block *s, unsigned long magic,
                      const struct tree_descr *files)
{
        struct inode *inode;
        struct dentry *dentry;
        int i;

        s->s_blocksize = PAGE_SIZE;
        s->s_blocksize_bits = PAGE_SHIFT;
        s->s_magic = magic;
        s->s_op = &simple_super_operations;
        s->s_time_gran = 1;

        inode = new_inode(s);
        if (!inode)
                return -ENOMEM;
        /*
         * because the root inode is 1, the files array must not contain an
         * entry at index 1
         */
        inode->i_ino = 1;
        inode->i_mode = S_IFDIR | 0755;
        simple_inode_init_ts(inode);
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        set_nlink(inode, 2);
        s->s_root = d_make_root(inode);
        if (!s->s_root)
                return -ENOMEM;
        for (i = 0; !files->name || files->name[0]; i++, files++) {
                if (!files->name)
                        continue;

                /* warn if it tries to conflict with the root inode */
                if (unlikely(i == 1))
                        printk(KERN_WARNING "%s: %s passed in a files array"
                                "with an index of 1!\n", __func__,
                                s->s_type->name);

                dentry = d_alloc_name(s->s_root, files->name);
                if (!dentry)
                        return -ENOMEM;
                inode = new_inode(s);
                if (!inode) {
                        dput(dentry);
                        return -ENOMEM;
                }
                inode->i_mode = S_IFREG | files->mode;
                simple_inode_init_ts(inode);
                inode->i_fop = files->ops;
                inode->i_ino = i;
                d_add(dentry, inode);
        }
        return 0;
}
EXPORT_SYMBOL(simple_fill_super);

static DEFINE_SPINLOCK(pin_fs_lock);

int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
{
        struct vfsmount *mnt = NULL;
        spin_lock(&pin_fs_lock);
        if (unlikely(!*mount)) {
                spin_unlock(&pin_fs_lock);
                mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
                if (IS_ERR(mnt))
                        return PTR_ERR(mnt);
                spin_lock(&pin_fs_lock);
                if (!*mount)
                        *mount = mnt;
        }
        mntget(*mount);
        ++*count;
        spin_unlock(&pin_fs_lock);
        mntput(mnt);
        return 0;
}
EXPORT_SYMBOL(simple_pin_fs);

void simple_release_fs(struct vfsmount **mount, int *count)
{
        struct vfsmount *mnt;
        spin_lock(&pin_fs_lock);
        mnt = *mount;
        if (!--*count)
                *mount = NULL;
        spin_unlock(&pin_fs_lock);
        mntput(mnt);
}
EXPORT_SYMBOL(simple_release_fs);

/**
 * simple_read_from_buffer - copy data from the buffer to user space
 * @to: the user space buffer to read to
 * @count: the maximum number of bytes to read
 * @ppos: the current position in the buffer
 * @from: the buffer to read from
 * @available: the size of the buffer
 *
 * The simple_read_from_buffer() function reads up to @count bytes from the
 * buffer @from at offset @ppos into the user space address starting at @to.
 *
 * On success, the number of bytes read is returned and the offset @ppos is
 * advanced by this number, or negative value is returned on error.
 **/
ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
                                const void *from, size_t available)
{
        loff_t pos = *ppos;
        size_t ret;

        if (pos < 0)
                return -EINVAL;
        if (pos >= available || !count)
                return 0;
        if (count > available - pos)
                count = available - pos;
        ret = copy_to_user(to, from + pos, count);
        if (ret == count)
                return -EFAULT;
        count -= ret;
        *ppos = pos + count;
        return count;
}
EXPORT_SYMBOL(simple_read_from_buffer);

/**
 * simple_write_to_buffer - copy data from user space to the buffer
 * @to: the buffer to write to
 * @available: the size of the buffer
 * @ppos: the current position in the buffer
 * @from: the user space buffer to read from
 * @count: the maximum number of bytes to read
 *
 * The simple_write_to_buffer() function reads up to @count bytes from the user
 * space address starting at @from into the buffer @to at offset @ppos.
 *
 * On success, the number of bytes written is returned and the offset @ppos is
 * advanced by this number, or negative value is returned on error.
 **/
ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
                const void __user *from, size_t count)
{
        loff_t pos = *ppos;
        size_t res;

        if (pos < 0)
                return -EINVAL;
        if (pos >= available || !count)
                return 0;
        if (count > available - pos)
                count = available - pos;
        res = copy_from_user(to + pos, from, count);
        if (res == count)
                return -EFAULT;
        count -= res;
        *ppos = pos + count;
        return count;
}
EXPORT_SYMBOL(simple_write_to_buffer);

/**
 * memory_read_from_buffer - copy data from the buffer
 * @to: the kernel space buffer to read to
 * @count: the maximum number of bytes to read
 * @ppos: the current position in the buffer
 * @from: the buffer to read from
 * @available: the size of the buffer
 *
 * The memory_read_from_buffer() function reads up to @count bytes from the
 * buffer @from at offset @ppos into the kernel space address starting at @to.
 *
 * On success, the number of bytes read is returned and the offset @ppos is
 * advanced by this number, or negative value is returned on error.
 **/
ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
                                const void *from, size_t available)
{
        loff_t pos = *ppos;

        if (pos < 0)
                return -EINVAL;
        if (pos >= available)
                return 0;
        if (count > available - pos)
                count = available - pos;
        memcpy(to, from + pos, count);
        *ppos = pos + count;

        return count;
}
EXPORT_SYMBOL(memory_read_from_buffer);

/*
 * Transaction based IO.
 * The file expects a single write which triggers the transaction, and then
 * possibly a read which collects the result - which is stored in a
 * file-local buffer.
 */

void simple_transaction_set(struct file *file, size_t n)
{
        struct simple_transaction_argresp *ar = file->private_data;

        BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);

        /*
         * The barrier ensures that ar->size will really remain zero until
         * ar->data is ready for reading.
         */
        smp_mb();
        ar->size = n;
}
EXPORT_SYMBOL(simple_transaction_set);

char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
{
        struct simple_transaction_argresp *ar;
        static DEFINE_SPINLOCK(simple_transaction_lock);

        if (size > SIMPLE_TRANSACTION_LIMIT - 1)
                return ERR_PTR(-EFBIG);

        ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
        if (!ar)
                return ERR_PTR(-ENOMEM);

        spin_lock(&simple_transaction_lock);

        /* only one write allowed per open */
        if (file->private_data) {
                spin_unlock(&simple_transaction_lock);
                free_page((unsigned long)ar);
                return ERR_PTR(-EBUSY);
        }

        file->private_data = ar;

        spin_unlock(&simple_transaction_lock);

        if (copy_from_user(ar->data, buf, size))
                return ERR_PTR(-EFAULT);

        return ar->data;
}
EXPORT_SYMBOL(simple_transaction_get);

ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
{
        struct simple_transaction_argresp *ar = file->private_data;

        if (!ar)
                return 0;
        return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
}
EXPORT_SYMBOL(simple_transaction_read);

int simple_transaction_release(struct inode *inode, struct file *file)
{
        free_page((unsigned long)file->private_data);
        return 0;
}
EXPORT_SYMBOL(simple_transaction_release);

/* Simple attribute files */

struct simple_attr {
        int (*get)(void *, u64 *);
        int (*set)(void *, u64);
        char get_buf[24];        /* enough to store a u64 and "\n\0" */
        char set_buf[24];
        void *data;
        const char *fmt;        /* format for read operation */
        struct mutex mutex;        /* protects access to these buffers */
};

/* simple_attr_open is called by an actual attribute open file operation
 * to set the attribute specific access operations. */
int simple_attr_open(struct inode *inode, struct file *file,
                     int (*get)(void *, u64 *), int (*set)(void *, u64),
                     const char *fmt)
{
        struct simple_attr *attr;

        attr = kzalloc(sizeof(*attr), GFP_KERNEL);
        if (!attr)
                return -ENOMEM;

        attr->get = get;
        attr->set = set;
        attr->data = inode->i_private;
        attr->fmt = fmt;
        mutex_init(&attr->mutex);

        file->private_data = attr;

        return nonseekable_open(inode, file);
}
EXPORT_SYMBOL_GPL(simple_attr_open);

int simple_attr_release(struct inode *inode, struct file *file)
{
        kfree(file->private_data);
        return 0;
}
EXPORT_SYMBOL_GPL(simple_attr_release);        /* GPL-only?  This?  Really? */

/* read from the buffer that is filled with the get function */
ssize_t simple_attr_read(struct file *file, char __user *buf,
                         size_t len, loff_t *ppos)
{
        struct simple_attr *attr;
        size_t size;
        ssize_t ret;

        attr = file->private_data;

        if (!attr->get)
                return -EACCES;

        ret = mutex_lock_interruptible(&attr->mutex);
        if (ret)
                return ret;

        if (*ppos && attr->get_buf[0]) {
                /* continued read */
                size = strlen(attr->get_buf);
        } else {
                /* first read */
                u64 val;
                ret = attr->get(attr->data, &val);
                if (ret)
                        goto out;

                size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
                                 attr->fmt, (unsigned long long)val);
        }

        ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
out:
        mutex_unlock(&attr->mutex);
        return ret;
}
EXPORT_SYMBOL_GPL(simple_attr_read);

/* interpret the buffer as a number to call the set function with */
static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos, bool is_signed)
{
        struct simple_attr *attr;
        unsigned long long val;
        size_t size;
        ssize_t ret;

        attr = file->private_data;
        if (!attr->set)
                return -EACCES;

        ret = mutex_lock_interruptible(&attr->mutex);
        if (ret)
                return ret;

        ret = -EFAULT;
        size = min(sizeof(attr->set_buf) - 1, len);
        if (copy_from_user(attr->set_buf, buf, size))
                goto out;

        attr->set_buf[size] = '\0';
        if (is_signed)
                ret = kstrtoll(attr->set_buf, 0, &val);
        else
                ret = kstrtoull(attr->set_buf, 0, &val);
        if (ret)
                goto out;
        ret = attr->set(attr->data, val);
        if (ret == 0)
                ret = len; /* on success, claim we got the whole input */
out:
        mutex_unlock(&attr->mutex);
        return ret;
}

ssize_t simple_attr_write(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos)
{
        return simple_attr_write_xsigned(file, buf, len, ppos, false);
}
EXPORT_SYMBOL_GPL(simple_attr_write);

ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos)
{
        return simple_attr_write_xsigned(file, buf, len, ppos, true);
}
EXPORT_SYMBOL_GPL(simple_attr_write_signed);

/**
 * generic_encode_ino32_fh - generic export_operations->encode_fh function
 * @inode:   the object to encode
 * @fh:      where to store the file handle fragment
 * @max_len: maximum length to store there (in 4 byte units)
 * @parent:  parent directory inode, if wanted
 *
 * This generic encode_fh function assumes that the 32 inode number
 * is suitable for locating an inode, and that the generation number
 * can be used to check that it is still valid.  It places them in the
 * filehandle fragment where export_decode_fh expects to find them.
 */
int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len,
                            struct inode *parent)
{
        struct fid *fid = (void *)fh;
        int len = *max_len;
        int type = FILEID_INO32_GEN;

        if (parent && (len < 4)) {
                *max_len = 4;
                return FILEID_INVALID;
        } else if (len < 2) {
                *max_len = 2;
                return FILEID_INVALID;
        }

        len = 2;
        fid->i32.ino = inode->i_ino;
        fid->i32.gen = inode->i_generation;
        if (parent) {
                fid->i32.parent_ino = parent->i_ino;
                fid->i32.parent_gen = parent->i_generation;
                len = 4;
                type = FILEID_INO32_GEN_PARENT;
        }
        *max_len = len;
        return type;
}
EXPORT_SYMBOL_GPL(generic_encode_ino32_fh);

/**
 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
 * @sb:                filesystem to do the file handle conversion on
 * @fid:        file handle to convert
 * @fh_len:        length of the file handle in bytes
 * @fh_type:        type of file handle
 * @get_inode:        filesystem callback to retrieve inode
 *
 * This function decodes @fid as long as it has one of the well-known
 * Linux filehandle types and calls @get_inode on it to retrieve the
 * inode for the object specified in the file handle.
 */
struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
                int fh_len, int fh_type, struct inode *(*get_inode)
                        (struct super_block *sb, u64 ino, u32 gen))
{
        struct inode *inode = NULL;

        if (fh_len < 2)
                return NULL;

        switch (fh_type) {
        case FILEID_INO32_GEN:
        case FILEID_INO32_GEN_PARENT:
                inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
                break;
        }

        return d_obtain_alias(inode);
}
EXPORT_SYMBOL_GPL(generic_fh_to_dentry);

/**
 * generic_fh_to_parent - generic helper for the fh_to_parent export operation
 * @sb:                filesystem to do the file handle conversion on
 * @fid:        file handle to convert
 * @fh_len:        length of the file handle in bytes
 * @fh_type:        type of file handle
 * @get_inode:        filesystem callback to retrieve inode
 *
 * This function decodes @fid as long as it has one of the well-known
 * Linux filehandle types and calls @get_inode on it to retrieve the
 * inode for the _parent_ object specified in the file handle if it
 * is specified in the file handle, or NULL otherwise.
 */
struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
                int fh_len, int fh_type, struct inode *(*get_inode)
                        (struct super_block *sb, u64 ino, u32 gen))
{
        struct inode *inode = NULL;

        if (fh_len <= 2)
                return NULL;

        switch (fh_type) {
        case FILEID_INO32_GEN_PARENT:
                inode = get_inode(sb, fid->i32.parent_ino,
                                  (fh_len > 3 ? fid->i32.parent_gen : 0));
                break;
        }

        return d_obtain_alias(inode);
}
EXPORT_SYMBOL_GPL(generic_fh_to_parent);

/**
 * __generic_file_fsync - generic fsync implementation for simple filesystems
 *
 * @file:        file to synchronize
 * @start:        start offset in bytes
 * @end:        end offset in bytes (inclusive)
 * @datasync:        only synchronize essential metadata if true
 *
 * This is a generic implementation of the fsync method for simple
 * filesystems which track all non-inode metadata in the buffers list
 * hanging off the address_space structure.
 */
int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
                                 int datasync)
{
        struct inode *inode = file->f_mapping->host;
        int err;
        int ret;

        err = file_write_and_wait_range(file, start, end);
        if (err)
                return err;

        inode_lock(inode);
        ret = sync_mapping_buffers(inode->i_mapping);
        if (!(inode->i_state & I_DIRTY_ALL))
                goto out;
        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
                goto out;

        err = sync_inode_metadata(inode, 1);
        if (ret == 0)
                ret = err;

out:
        inode_unlock(inode);
        /* check and advance again to catch errors after syncing out buffers */
        err = file_check_and_advance_wb_err(file);
        if (ret == 0)
                ret = err;
        return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);

/**
 * generic_file_fsync - generic fsync implementation for simple filesystems
 *                        with flush
 * @file:        file to synchronize
 * @start:        start offset in bytes
 * @end:        end offset in bytes (inclusive)
 * @datasync:        only synchronize essential metadata if true
 *
 */

int generic_file_fsync(struct file *file, loff_t start, loff_t end,
                       int datasync)
{
        struct inode *inode = file->f_mapping->host;
        int err;

        err = __generic_file_fsync(file, start, end, datasync);
        if (err)
                return err;
        return blkdev_issue_flush(inode->i_sb->s_bdev);
}
EXPORT_SYMBOL(generic_file_fsync);

/**
 * generic_check_addressable - Check addressability of file system
 * @blocksize_bits:        log of file system block size
 * @num_blocks:                number of blocks in file system
 *
 * Determine whether a file system with @num_blocks blocks (and a
 * block size of 2**@blocksize_bits) is addressable by the sector_t
 * and page cache of the system.  Return 0 if so and -EFBIG otherwise.
 */
int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
{
        u64 last_fs_block = num_blocks - 1;
        u64 last_fs_page =
                last_fs_block >> (PAGE_SHIFT - blocksize_bits);

        if (unlikely(num_blocks == 0))
                return 0;

        if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
                return -EINVAL;

        if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
            (last_fs_page > (pgoff_t)(~0ULL))) {
                return -EFBIG;
        }
        return 0;
}
EXPORT_SYMBOL(generic_check_addressable);

/*
 * No-op implementation of ->fsync for in-memory filesystems.
 */
int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
        return 0;
}
EXPORT_SYMBOL(noop_fsync);

ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
        /*
         * iomap based filesystems support direct I/O without need for
         * this callback. However, it still needs to be set in
         * inode->a_ops so that open/fcntl know that direct I/O is
         * generally supported.
         */
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(noop_direct_IO);

/* Because kfree isn't assignment-compatible with void(void*) ;-/ */
void kfree_link(void *p)
{
        kfree(p);
}
EXPORT_SYMBOL(kfree_link);

struct inode *alloc_anon_inode(struct super_block *s)
{
        static const struct address_space_operations anon_aops = {
                .dirty_folio        = noop_dirty_folio,
        };
        struct inode *inode = new_inode_pseudo(s);

        if (!inode)
                return ERR_PTR(-ENOMEM);

        inode->i_ino = get_next_ino();
        inode->i_mapping->a_ops = &anon_aops;

        /*
         * Mark the inode dirty from the very beginning,
         * that way it will never be moved to the dirty
         * list because mark_inode_dirty() will think
         * that it already _is_ on the dirty list.
         */
        inode->i_state = I_DIRTY;
        inode->i_mode = S_IRUSR | S_IWUSR;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
        inode->i_flags |= S_PRIVATE;
        simple_inode_init_ts(inode);
        return inode;
}
EXPORT_SYMBOL(alloc_anon_inode);

/**
 * simple_nosetlease - generic helper for prohibiting leases
 * @filp: file pointer
 * @arg: type of lease to obtain
 * @flp: new lease supplied for insertion
 * @priv: private data for lm_setup operation
 *
 * Generic helper for filesystems that do not wish to allow leases to be set.
 * All arguments are ignored and it just returns -EINVAL.
 */
int
simple_nosetlease(struct file *filp, int arg, struct file_lease **flp,
                  void **priv)
{
        return -EINVAL;
}
EXPORT_SYMBOL(simple_nosetlease);

/**
 * simple_get_link - generic helper to get the target of "fast" symlinks
 * @dentry: not used here
 * @inode: the symlink inode
 * @done: not used here
 *
 * Generic helper for filesystems to use for symlink inodes where a pointer to
 * the symlink target is stored in ->i_link.  NOTE: this isn't normally called,
 * since as an optimization the path lookup code uses any non-NULL ->i_link
 * directly, without calling ->get_link().  But ->get_link() still must be set,
 * to mark the inode_operations as being for a symlink.
 *
 * Return: the symlink target
 */
const char *simple_get_link(struct dentry *dentry, struct inode *inode,
                            struct delayed_call *done)
{
        return inode->i_link;
}
EXPORT_SYMBOL(simple_get_link);

const struct inode_operations simple_symlink_inode_operations = {
        .get_link = simple_get_link,
};
EXPORT_SYMBOL(simple_symlink_inode_operations);

/*
 * Operations for a permanently empty directory.
 */
static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
        return ERR_PTR(-ENOENT);
}

static int empty_dir_getattr(struct mnt_idmap *idmap,
                             const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
{
        struct inode *inode = d_inode(path->dentry);
        generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
}

static int empty_dir_setattr(struct mnt_idmap *idmap,
                             struct dentry *dentry, struct iattr *attr)
{
        return -EPERM;
}

static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
{
        return -EOPNOTSUPP;
}

static const struct inode_operations empty_dir_inode_operations = {
        .lookup                = empty_dir_lookup,
        .permission        = generic_permission,
        .setattr        = empty_dir_setattr,
        .getattr        = empty_dir_getattr,
        .listxattr        = empty_dir_listxattr,
};

static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
{
        /* An empty directory has two entries . and .. at offsets 0 and 1 */
        return generic_file_llseek_size(file, offset, whence, 2, 2);
}

static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
{
        dir_emit_dots(file, ctx);
        return 0;
}

static const struct file_operations empty_dir_operations = {
        .llseek                = empty_dir_llseek,
        .read                = generic_read_dir,
        .iterate_shared        = empty_dir_readdir,
        .fsync                = noop_fsync,
};


void make_empty_dir_inode(struct inode *inode)
{
        set_nlink(inode, 2);
        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
        inode->i_uid = GLOBAL_ROOT_UID;
        inode->i_gid = GLOBAL_ROOT_GID;
        inode->i_rdev = 0;
        inode->i_size = 0;
        inode->i_blkbits = PAGE_SHIFT;
        inode->i_blocks = 0;

        inode->i_op = &empty_dir_inode_operations;
        inode->i_opflags &= ~IOP_XATTR;
        inode->i_fop = &empty_dir_operations;
}

bool is_empty_dir_inode(struct inode *inode)
{
        return (inode->i_fop == &empty_dir_operations) &&
                (inode->i_op == &empty_dir_inode_operations);
}

#if IS_ENABLED(CONFIG_UNICODE)
/**
 * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
 * @dentry:        dentry whose name we are checking against
 * @len:        len of name of dentry
 * @str:        str pointer to name of dentry
 * @name:        Name to compare against
 *
 * Return: 0 if names match, 1 if mismatch, or -ERRNO
 */
static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
                                const char *str, const struct qstr *name)
{
        const struct dentry *parent;
        const struct inode *dir;
        char strbuf[DNAME_INLINE_LEN];
        struct qstr qstr;

        /*
         * Attempt a case-sensitive match first. It is cheaper and
         * should cover most lookups, including all the sane
         * applications that expect a case-sensitive filesystem.
         *
         * This comparison is safe under RCU because the caller
         * guarantees the consistency between str and len. See
         * __d_lookup_rcu_op_compare() for details.
         */
        if (len == name->len && !memcmp(str, name->name, len))
                return 0;

        parent = READ_ONCE(dentry->d_parent);
        dir = READ_ONCE(parent->d_inode);
        if (!dir || !IS_CASEFOLDED(dir))
                return 1;

        /*
         * If the dentry name is stored in-line, then it may be concurrently
         * modified by a rename.  If this happens, the VFS will eventually retry
         * the lookup, so it doesn't matter what ->d_compare() returns.
         * However, it's unsafe to call utf8_strncasecmp() with an unstable
         * string.  Therefore, we have to copy the name into a temporary buffer.
         */
        if (len <= DNAME_INLINE_LEN - 1) {
                memcpy(strbuf, str, len);
                strbuf[len] = 0;
                str = strbuf;
                /* prevent compiler from optimizing out the temporary buffer */
                barrier();
        }
        qstr.len = len;
        qstr.name = str;

        return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
}

/**
 * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
 * @dentry:        dentry of the parent directory
 * @str:        qstr of name whose hash we should fill in
 *
 * Return: 0 if hash was successful or unchanged, and -EINVAL on error
 */
static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
{
        const struct inode *dir = READ_ONCE(dentry->d_inode);
        struct super_block *sb = dentry->d_sb;
        const struct unicode_map *um = sb->s_encoding;
        int ret;

        if (!dir || !IS_CASEFOLDED(dir))
                return 0;

        ret = utf8_casefold_hash(um, dentry, str);
        if (ret < 0 && sb_has_strict_encoding(sb))
                return -EINVAL;
        return 0;
}

static const struct dentry_operations generic_ci_dentry_ops = {
        .d_hash = generic_ci_d_hash,
        .d_compare = generic_ci_d_compare,
#ifdef CONFIG_FS_ENCRYPTION
        .d_revalidate = fscrypt_d_revalidate,
#endif
};
#endif

#ifdef CONFIG_FS_ENCRYPTION
static const struct dentry_operations generic_encrypted_dentry_ops = {
        .d_revalidate = fscrypt_d_revalidate,
};
#endif

/**
 * generic_set_sb_d_ops - helper for choosing the set of
 * filesystem-wide dentry operations for the enabled features
 * @sb: superblock to be configured
 *
 * Filesystems supporting casefolding and/or fscrypt can call this
 * helper at mount-time to configure sb->s_d_op to best set of dentry
 * operations required for the enabled features. The helper must be
 * called after these have been configured, but before the root dentry
 * is created.
 */
void generic_set_sb_d_ops(struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
        if (sb->s_encoding) {
                sb->s_d_op = &generic_ci_dentry_ops;
                return;
        }
#endif
#ifdef CONFIG_FS_ENCRYPTION
        if (sb->s_cop) {
                sb->s_d_op = &generic_encrypted_dentry_ops;
                return;
        }
#endif
}
EXPORT_SYMBOL(generic_set_sb_d_ops);

/**
 * inode_maybe_inc_iversion - increments i_version
 * @inode: inode with the i_version that should be updated
 * @force: increment the counter even if it's not necessary?
 *
 * Every time the inode is modified, the i_version field must be seen to have
 * changed by any observer.
 *
 * If "force" is set or the QUERIED flag is set, then ensure that we increment
 * the value, and clear the queried flag.
 *
 * In the common case where neither is set, then we can return "false" without
 * updating i_version.
 *
 * If this function returns false, and no other metadata has changed, then we
 * can avoid logging the metadata.
 */
bool inode_maybe_inc_iversion(struct inode *inode, bool force)
{
        u64 cur, new;

        /*
         * The i_version field is not strictly ordered with any other inode
         * information, but the legacy inode_inc_iversion code used a spinlock
         * to serialize increments.
         *
         * Here, we add full memory barriers to ensure that any de-facto
         * ordering with other info is preserved.
         *
         * This barrier pairs with the barrier in inode_query_iversion()
         */
        smp_mb();
        cur = inode_peek_iversion_raw(inode);
        do {
                /* If flag is clear then we needn't do anything */
                if (!force && !(cur & I_VERSION_QUERIED))
                        return false;

                /* Since lowest bit is flag, add 2 to avoid it */
                new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
        } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
        return true;
}
EXPORT_SYMBOL(inode_maybe_inc_iversion);

/**
 * inode_query_iversion - read i_version for later use
 * @inode: inode from which i_version should be read
 *
 * Read the inode i_version counter. This should be used by callers that wish
 * to store the returned i_version for later comparison. This will guarantee
 * that a later query of the i_version will result in a different value if
 * anything has changed.
 *
 * In this implementation, we fetch the current value, set the QUERIED flag and
 * then try to swap it into place with a cmpxchg, if it wasn't already set. If
 * that fails, we try again with the newly fetched value from the cmpxchg.
 */
u64 inode_query_iversion(struct inode *inode)
{
        u64 cur, new;

        cur = inode_peek_iversion_raw(inode);
        do {
                /* If flag is already set, then no need to swap */
                if (cur & I_VERSION_QUERIED) {
                        /*
                         * This barrier (and the implicit barrier in the
                         * cmpxchg below) pairs with the barrier in
                         * inode_maybe_inc_iversion().
                         */
                        smp_mb();
                        break;
                }

                new = cur | I_VERSION_QUERIED;
        } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
        return cur >> I_VERSION_QUERIED_SHIFT;
}
EXPORT_SYMBOL(inode_query_iversion);

ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
                ssize_t direct_written, ssize_t buffered_written)
{
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        loff_t pos = iocb->ki_pos - buffered_written;
        loff_t end = iocb->ki_pos - 1;
        int err;

        /*
         * If the buffered write fallback returned an error, we want to return
         * the number of bytes which were written by direct I/O, or the error
         * code if that was zero.
         *
         * Note that this differs from normal direct-io semantics, which will
         * return -EFOO even if some bytes were written.
         */
        if (unlikely(buffered_written < 0)) {
                if (direct_written)
                        return direct_written;
                return buffered_written;
        }

        /*
         * We need to ensure that the page cache pages are written to disk and
         * invalidated to preserve the expected O_DIRECT semantics.
         */
        err = filemap_write_and_wait_range(mapping, pos, end);
        if (err < 0) {
                /*
                 * We don't know how much we wrote, so just return the number of
                 * bytes which were direct-written
                 */
                iocb->ki_pos -= buffered_written;
                if (direct_written)
                        return direct_written;
                return err;
        }
        invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
        return direct_written + buffered_written;
}
EXPORT_SYMBOL_GPL(direct_write_fallback);

/**
 * simple_inode_init_ts - initialize the timestamps for a new inode
 * @inode: inode to be initialized
 *
 * When a new inode is created, most filesystems set the timestamps to the
 * current time. Add a helper to do this.
 */
struct timespec64 simple_inode_init_ts(struct inode *inode)
{
        struct timespec64 ts = inode_set_ctime_current(inode);

        inode_set_atime_to_ts(inode, ts);
        inode_set_mtime_to_ts(inode, ts);
        return ts;
}
EXPORT_SYMBOL(simple_inode_init_ts);

static inline struct dentry *get_stashed_dentry(struct dentry *stashed)
{
        struct dentry *dentry;

        guard(rcu)();
        dentry = READ_ONCE(stashed);
        if (!dentry)
                return NULL;
        if (!lockref_get_not_dead(&dentry->d_lockref))
                return NULL;
        return dentry;
}

static struct dentry *prepare_anon_dentry(struct dentry **stashed,
                                          struct super_block *sb,
                                          void *data)
{
        struct dentry *dentry;
        struct inode *inode;
        const struct stashed_operations *sops = sb->s_fs_info;
        int ret;

        inode = new_inode_pseudo(sb);
        if (!inode) {
                sops->put_data(data);
                return ERR_PTR(-ENOMEM);
        }

        inode->i_flags |= S_IMMUTABLE;
        inode->i_mode = S_IFREG;
        simple_inode_init_ts(inode);

        ret = sops->init_inode(inode, data);
        if (ret < 0) {
                iput(inode);
                return ERR_PTR(ret);
        }

        /* Notice when this is changed. */
        WARN_ON_ONCE(!S_ISREG(inode->i_mode));
        WARN_ON_ONCE(!IS_IMMUTABLE(inode));

        dentry = d_alloc_anon(sb);
        if (!dentry) {
                iput(inode);
                return ERR_PTR(-ENOMEM);
        }

        /* Store address of location where dentry's supposed to be stashed. */
        dentry->d_fsdata = stashed;

        /* @data is now owned by the fs */
        d_instantiate(dentry, inode);
        return dentry;
}

static struct dentry *stash_dentry(struct dentry **stashed,
                                   struct dentry *dentry)
{
        guard(rcu)();
        for (;;) {
                struct dentry *old;

                /* Assume any old dentry was cleared out. */
                old = cmpxchg(stashed, NULL, dentry);
                if (likely(!old))
                        return dentry;

                /* Check if somebody else installed a reusable dentry. */
                if (lockref_get_not_dead(&old->d_lockref))
                        return old;

                /* There's an old dead dentry there, try to take it over. */
                if (likely(try_cmpxchg(stashed, &old, dentry)))
                        return dentry;
        }
}

/**
 * path_from_stashed - create path from stashed or new dentry
 * @stashed:    where to retrieve or stash dentry
 * @mnt:        mnt of the filesystems to use
 * @data:       data to store in inode->i_private
 * @path:       path to create
 *
 * The function tries to retrieve a stashed dentry from @stashed. If the dentry
 * is still valid then it will be reused. If the dentry isn't able the function
 * will allocate a new dentry and inode. It will then check again whether it
 * can reuse an existing dentry in case one has been added in the meantime or
 * update @stashed with the newly added dentry.
 *
 * Special-purpose helper for nsfs and pidfs.
 *
 * Return: On success zero and on failure a negative error is returned.
 */
int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
                      struct path *path)
{
        struct dentry *dentry;
        const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info;

        /* See if dentry can be reused. */
        path->dentry = get_stashed_dentry(*stashed);
        if (path->dentry) {
                sops->put_data(data);
                goto out_path;
        }

        /* Allocate a new dentry. */
        dentry = prepare_anon_dentry(stashed, mnt->mnt_sb, data);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);

        /* Added a new dentry. @data is now owned by the filesystem. */
        path->dentry = stash_dentry(stashed, dentry);
        if (path->dentry != dentry)
                dput(dentry);

out_path:
        WARN_ON_ONCE(path->dentry->d_fsdata != stashed);
        WARN_ON_ONCE(d_inode(path->dentry)->i_private != data);
        path->mnt = mntget(mnt);
        return 0;
}

void stashed_dentry_prune(struct dentry *dentry)
{
        struct dentry **stashed = dentry->d_fsdata;
        struct inode *inode = d_inode(dentry);

        if (WARN_ON_ONCE(!stashed))
                return;

        if (!inode)
                return;

        /*
         * Only replace our own @dentry as someone else might've
         * already cleared out @dentry and stashed their own
         * dentry in there.
         */
        cmpxchg(stashed, dentry, NULL);
}













    1 





    2 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __VDSO_MATH64_H
#define __VDSO_MATH64_H

static __always_inline u32
__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
        u32 ret = 0;

        while (dividend >= divisor) {
                /* The following asm() prevents the compiler from
                   optimising this loop into a modulo operation.  */
                asm("" : "+rm"(dividend));

                dividend -= divisor;
                ret++;
        }

        *remainder = dividend;

        return ret;
}

#endif /* __VDSO_MATH64_H */



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 







    1 


    1 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
// SPDX-License-Identifier: GPL-2.0-or-later
/****************************************************************
 *
 *     kaweth.c - driver for KL5KUSB101 based USB->Ethernet
 *
 *     (c) 2000 Interlan Communications
 *     (c) 2000 Stephane Alnet
 *     (C) 2001 Brad Hards
 *     (C) 2002 Oliver Neukum
 *
 *     Original author: The Zapman <zapman@interlan.net>
 *     Inspired by, and much credit goes to Michael Rothwell
 *     <rothwell@interlan.net> for the test equipment, help, and patience
 *     Based off of (and with thanks to) Petko Manolov's pegaus.c driver.
 *     Also many thanks to Joel Silverman and Ed Surprenant at Kawasaki
 *     for providing the firmware and driver resources.
 *
 ****************************************************************/

/* TODO:
 * Develop test procedures for USB net interfaces
 * Run test procedures
 * Fix bugs from previous two steps
 * Snoop other OSs for any tricks we're not doing
 * Reduce arbitrary timeouts
 * Smart multicast support
 * Temporary MAC change support
 * Tunable SOFs parameter - ioctl()?
 * Ethernet stats collection
 * Code formatting improvements
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/usb.h>
#include <linux/types.h>
#include <linux/ethtool.h>
#include <linux/dma-mapping.h>
#include <linux/wait.h>
#include <linux/firmware.h>
#include <linux/uaccess.h>
#include <asm/byteorder.h>

#undef DEBUG

#define KAWETH_MTU                        1514
#define KAWETH_BUF_SIZE                        1664
#define KAWETH_TX_TIMEOUT                (5 * HZ)
#define KAWETH_SCRATCH_SIZE                32
#define KAWETH_FIRMWARE_BUF_SIZE        4096
#define KAWETH_CONTROL_TIMEOUT                (30000)

#define KAWETH_STATUS_BROKEN                0x0000001
#define KAWETH_STATUS_CLOSING                0x0000002
#define KAWETH_STATUS_SUSPENDING        0x0000004

#define KAWETH_STATUS_BLOCKED (KAWETH_STATUS_CLOSING | KAWETH_STATUS_SUSPENDING)

#define KAWETH_PACKET_FILTER_PROMISCUOUS        0x01
#define KAWETH_PACKET_FILTER_ALL_MULTICAST        0x02
#define KAWETH_PACKET_FILTER_DIRECTED                0x04
#define KAWETH_PACKET_FILTER_BROADCAST                0x08
#define KAWETH_PACKET_FILTER_MULTICAST                0x10

/* Table 7 */
#define KAWETH_COMMAND_GET_ETHERNET_DESC        0x00
#define KAWETH_COMMAND_MULTICAST_FILTERS        0x01
#define KAWETH_COMMAND_SET_PACKET_FILTER        0x02
#define KAWETH_COMMAND_STATISTICS               0x03
#define KAWETH_COMMAND_SET_TEMP_MAC             0x06
#define KAWETH_COMMAND_GET_TEMP_MAC             0x07
#define KAWETH_COMMAND_SET_URB_SIZE                0x08
#define KAWETH_COMMAND_SET_SOFS_WAIT                0x09
#define KAWETH_COMMAND_SCAN                        0xFF

#define KAWETH_SOFS_TO_WAIT                        0x05

#define INTBUFFERSIZE                                4

#define STATE_OFFSET                                0
#define STATE_MASK                                0x40
#define        STATE_SHIFT                                5

#define IS_BLOCKED(s) (s & KAWETH_STATUS_BLOCKED)


MODULE_AUTHOR("Michael Zappe <zapman@interlan.net>, Stephane Alnet <stephane@u-picardie.fr>, Brad Hards <bhards@bigpond.net.au> and Oliver Neukum <oliver@neukum.org>");
MODULE_DESCRIPTION("KL5USB101 USB Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_FIRMWARE("kaweth/new_code.bin");
MODULE_FIRMWARE("kaweth/new_code_fix.bin");
MODULE_FIRMWARE("kaweth/trigger_code.bin");
MODULE_FIRMWARE("kaweth/trigger_code_fix.bin");

static const char driver_name[] = "kaweth";

static int kaweth_probe(
                struct usb_interface *intf,
                const struct usb_device_id *id        /* from id_table */
        );
static void kaweth_disconnect(struct usb_interface *intf);
static int kaweth_suspend(struct usb_interface *intf, pm_message_t message);
static int kaweth_resume(struct usb_interface *intf);

/****************************************************************
 *     usb_device_id
 ****************************************************************/
static const struct usb_device_id usb_klsi_table[] = {
        { USB_DEVICE(0x03e8, 0x0008) }, /* AOX Endpoints USB Ethernet */
        { USB_DEVICE(0x04bb, 0x0901) }, /* I-O DATA USB-ET/T */
        { USB_DEVICE(0x0506, 0x03e8) }, /* 3Com 3C19250 */
        { USB_DEVICE(0x0506, 0x11f8) }, /* 3Com 3C460 */
        { USB_DEVICE(0x0557, 0x2002) }, /* ATEN USB Ethernet */
        { USB_DEVICE(0x0557, 0x4000) }, /* D-Link DSB-650C */
        { USB_DEVICE(0x0565, 0x0002) }, /* Peracom Enet */
        { USB_DEVICE(0x0565, 0x0003) }, /* Optus@Home UEP1045A */
        { USB_DEVICE(0x0565, 0x0005) }, /* Peracom Enet2 */
        { USB_DEVICE(0x05e9, 0x0008) }, /* KLSI KL5KUSB101B */
        { USB_DEVICE(0x05e9, 0x0009) }, /* KLSI KL5KUSB101B (Board change) */
        { USB_DEVICE(0x066b, 0x2202) }, /* Linksys USB10T */
        { USB_DEVICE(0x06e1, 0x0008) }, /* ADS USB-10BT */
        { USB_DEVICE(0x06e1, 0x0009) }, /* ADS USB-10BT */
        { USB_DEVICE(0x0707, 0x0100) }, /* SMC 2202USB */
        { USB_DEVICE(0x07aa, 0x0001) }, /* Correga K.K. */
        { USB_DEVICE(0x07b8, 0x4000) }, /* D-Link DU-E10 */
        { USB_DEVICE(0x07c9, 0xb010) }, /* Allied Telesyn AT-USB10 USB Ethernet Adapter */
        { USB_DEVICE(0x0846, 0x1001) }, /* NetGear EA-101 */
        { USB_DEVICE(0x0846, 0x1002) }, /* NetGear EA-101 */
        { USB_DEVICE(0x085a, 0x0008) }, /* PortGear Ethernet Adapter */
        { USB_DEVICE(0x085a, 0x0009) }, /* PortGear Ethernet Adapter */
        { USB_DEVICE(0x087d, 0x5704) }, /* Jaton USB Ethernet Device Adapter */
        { USB_DEVICE(0x0951, 0x0008) }, /* Kingston Technology USB Ethernet Adapter */
        { USB_DEVICE(0x095a, 0x3003) }, /* Portsmith Express Ethernet Adapter */
        { USB_DEVICE(0x10bd, 0x1427) }, /* ASANTE USB To Ethernet Adapter */
        { USB_DEVICE(0x1342, 0x0204) }, /* Mobility USB-Ethernet Adapter */
        { USB_DEVICE(0x13d2, 0x0400) }, /* Shark Pocket Adapter */
        { USB_DEVICE(0x1485, 0x0001) },        /* Silicom U2E */
        { USB_DEVICE(0x1485, 0x0002) }, /* Psion Dacom Gold Port Ethernet */
        { USB_DEVICE(0x1645, 0x0005) }, /* Entrega E45 */
        { USB_DEVICE(0x1645, 0x0008) }, /* Entrega USB Ethernet Adapter */
        { USB_DEVICE(0x1645, 0x8005) }, /* PortGear Ethernet Adapter */
        { USB_DEVICE(0x1668, 0x0323) }, /* Actiontec USB Ethernet */
        { USB_DEVICE(0x2001, 0x4000) }, /* D-link DSB-650C */
        {} /* Null terminator */
};

MODULE_DEVICE_TABLE (usb, usb_klsi_table);

/****************************************************************
 *     kaweth_driver
 ****************************************************************/
static struct usb_driver kaweth_driver = {
        .name =                driver_name,
        .probe =        kaweth_probe,
        .disconnect =        kaweth_disconnect,
        .suspend =        kaweth_suspend,
        .resume =        kaweth_resume,
        .id_table =     usb_klsi_table,
        .supports_autosuspend =        1,
        .disable_hub_initiated_lpm = 1,
};

typedef __u8 eth_addr_t[6];

/****************************************************************
 *     usb_eth_dev
 ****************************************************************/
struct usb_eth_dev {
        char *name;
        __u16 vendor;
        __u16 device;
        void *pdata;
};

/****************************************************************
 *     kaweth_ethernet_configuration
 *     Refer Table 8
 ****************************************************************/
struct kaweth_ethernet_configuration
{
        __u8 size;
        __u8 reserved1;
        __u8 reserved2;
        eth_addr_t hw_addr;
        __u32 statistics_mask;
        __le16 segment_size;
        __u16 max_multicast_filters;
        __u8 reserved3;
} __packed;

/****************************************************************
 *     kaweth_device
 ****************************************************************/
struct kaweth_device
{
        spinlock_t device_lock;

        __u32 status;
        int end;
        int suspend_lowmem_rx;
        int suspend_lowmem_ctrl;
        int linkstate;
        int opened;
        struct delayed_work lowmem_work;

        struct usb_device *dev;
        struct usb_interface *intf;
        struct net_device *net;
        wait_queue_head_t term_wait;

        struct urb *rx_urb;
        struct urb *tx_urb;
        struct urb *irq_urb;

        dma_addr_t intbufferhandle;
        __u8 *intbuffer;
        dma_addr_t rxbufferhandle;
        __u8 *rx_buf;


        struct sk_buff *tx_skb;

        __u8 *firmware_buf;
        __u8 scratch[KAWETH_SCRATCH_SIZE];
        __u16 packet_filter_bitmap;

        struct kaweth_ethernet_configuration configuration;
};

/****************************************************************
 *     kaweth_read_configuration
 ****************************************************************/
static int kaweth_read_configuration(struct kaweth_device *kaweth)
{
        return usb_control_msg(kaweth->dev, usb_rcvctrlpipe(kaweth->dev, 0),
                                KAWETH_COMMAND_GET_ETHERNET_DESC,
                                USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE,
                                0, 0,
                                &kaweth->configuration,
                                sizeof(kaweth->configuration),
                                KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_set_urb_size
 ****************************************************************/
static int kaweth_set_urb_size(struct kaweth_device *kaweth, __u16 urb_size)
{
        netdev_dbg(kaweth->net, "Setting URB size to %d\n", (unsigned)urb_size);

        return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                               KAWETH_COMMAND_SET_URB_SIZE,
                               USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                               urb_size, 0,
                               &kaweth->scratch, 0,
                               KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_set_sofs_wait
 ****************************************************************/
static int kaweth_set_sofs_wait(struct kaweth_device *kaweth, __u16 sofs_wait)
{
        netdev_dbg(kaweth->net, "Set SOFS wait to %d\n", (unsigned)sofs_wait);

        return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                               KAWETH_COMMAND_SET_SOFS_WAIT,
                               USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                               sofs_wait, 0,
                               &kaweth->scratch, 0,
                               KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_set_receive_filter
 ****************************************************************/
static int kaweth_set_receive_filter(struct kaweth_device *kaweth,
                                     __u16 receive_filter)
{
        netdev_dbg(kaweth->net, "Set receive filter to %d\n",
                   (unsigned)receive_filter);

        return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                               KAWETH_COMMAND_SET_PACKET_FILTER,
                               USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                               receive_filter, 0,
                               &kaweth->scratch, 0,
                               KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_download_firmware
 ****************************************************************/
static int kaweth_download_firmware(struct kaweth_device *kaweth,
                                    const char *fwname,
                                    __u8 interrupt,
                                    __u8 type)
{
        const struct firmware *fw;
        int data_len;
        int ret;

        ret = request_firmware(&fw, fwname, &kaweth->dev->dev);
        if (ret) {
                dev_err(&kaweth->intf->dev, "Firmware request failed\n");
                return ret;
        }

        if (fw->size > KAWETH_FIRMWARE_BUF_SIZE) {
                dev_err(&kaweth->intf->dev, "Firmware too big: %zu\n",
                        fw->size);
                release_firmware(fw);
                return -ENOSPC;
        }
        data_len = fw->size;
        memcpy(kaweth->firmware_buf, fw->data, fw->size);

        release_firmware(fw);

        kaweth->firmware_buf[2] = (data_len & 0xFF) - 7;
        kaweth->firmware_buf[3] = data_len >> 8;
        kaweth->firmware_buf[4] = type;
        kaweth->firmware_buf[5] = interrupt;

        netdev_dbg(kaweth->net, "High: %i, Low:%i\n", kaweth->firmware_buf[3],
                   kaweth->firmware_buf[2]);

        netdev_dbg(kaweth->net,
                   "Downloading firmware at %p to kaweth device at %p\n",
                   kaweth->firmware_buf, kaweth);
        netdev_dbg(kaweth->net, "Firmware length: %d\n", data_len);

        return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                              KAWETH_COMMAND_SCAN,
                              USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                              0, 0,
                              kaweth->firmware_buf, data_len,
                              KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_trigger_firmware
 ****************************************************************/
static int kaweth_trigger_firmware(struct kaweth_device *kaweth,
                                   __u8 interrupt)
{
        kaweth->firmware_buf[0] = 0xB6;
        kaweth->firmware_buf[1] = 0xC3;
        kaweth->firmware_buf[2] = 0x01;
        kaweth->firmware_buf[3] = 0x00;
        kaweth->firmware_buf[4] = 0x06;
        kaweth->firmware_buf[5] = interrupt;
        kaweth->firmware_buf[6] = 0x00;
        kaweth->firmware_buf[7] = 0x00;

        return usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                               KAWETH_COMMAND_SCAN,
                               USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                               0, 0,
                               (void *)kaweth->firmware_buf, 8,
                               KAWETH_CONTROL_TIMEOUT);
}

/****************************************************************
 *     kaweth_reset
 ****************************************************************/
static int kaweth_reset(struct kaweth_device *kaweth)
{
        int result;

        result = usb_reset_configuration(kaweth->dev);
        mdelay(10);

        netdev_dbg(kaweth->net, "kaweth_reset() returns %d.\n", result);

        return result;
}

static void kaweth_usb_receive(struct urb *);
static int kaweth_resubmit_rx_urb(struct kaweth_device *, gfp_t);

/****************************************************************
        int_callback
*****************************************************************/

static void kaweth_resubmit_int_urb(struct kaweth_device *kaweth, gfp_t mf)
{
        int status;

        status = usb_submit_urb (kaweth->irq_urb, mf);
        if (unlikely(status == -ENOMEM)) {
                kaweth->suspend_lowmem_ctrl = 1;
                schedule_delayed_work(&kaweth->lowmem_work, HZ/4);
        } else {
                kaweth->suspend_lowmem_ctrl = 0;
        }

        if (status)
                dev_err(&kaweth->intf->dev,
                        "can't resubmit intr, %s-%s, status %d\n",
                        kaweth->dev->bus->bus_name,
                        kaweth->dev->devpath, status);
}

static void int_callback(struct urb *u)
{
        struct kaweth_device *kaweth = u->context;
        int act_state;
        int status = u->status;

        switch (status) {
        case 0:                        /* success */
                break;
        case -ECONNRESET:        /* unlink */
        case -ENOENT:
        case -ESHUTDOWN:
                return;
        /* -EPIPE:  should clear the halt */
        default:                /* error */
                goto resubmit;
        }

        /* we check the link state to report changes */
        if (kaweth->linkstate != (act_state = ( kaweth->intbuffer[STATE_OFFSET] | STATE_MASK) >> STATE_SHIFT)) {
                if (act_state)
                        netif_carrier_on(kaweth->net);
                else
                        netif_carrier_off(kaweth->net);

                kaweth->linkstate = act_state;
        }
resubmit:
        kaweth_resubmit_int_urb(kaweth, GFP_ATOMIC);
}

static void kaweth_resubmit_tl(struct work_struct *work)
{
        struct kaweth_device *kaweth =
                container_of(work, struct kaweth_device, lowmem_work.work);

        if (IS_BLOCKED(kaweth->status))
                return;

        if (kaweth->suspend_lowmem_rx)
                kaweth_resubmit_rx_urb(kaweth, GFP_NOIO);

        if (kaweth->suspend_lowmem_ctrl)
                kaweth_resubmit_int_urb(kaweth, GFP_NOIO);
}


/****************************************************************
 *     kaweth_resubmit_rx_urb
 ****************************************************************/
static int kaweth_resubmit_rx_urb(struct kaweth_device *kaweth,
                                                gfp_t mem_flags)
{
        int result;

        usb_fill_bulk_urb(kaweth->rx_urb,
                      kaweth->dev,
                      usb_rcvbulkpipe(kaweth->dev, 1),
                      kaweth->rx_buf,
                      KAWETH_BUF_SIZE,
                      kaweth_usb_receive,
                      kaweth);
        kaweth->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        kaweth->rx_urb->transfer_dma = kaweth->rxbufferhandle;

        if((result = usb_submit_urb(kaweth->rx_urb, mem_flags))) {
                if (result == -ENOMEM) {
                        kaweth->suspend_lowmem_rx = 1;
                        schedule_delayed_work(&kaweth->lowmem_work, HZ/4);
                }
                dev_err(&kaweth->intf->dev, "resubmitting rx_urb %d failed\n",
                        result);
        } else {
                kaweth->suspend_lowmem_rx = 0;
        }

        return result;
}

static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth,
                                     bool may_sleep);

/****************************************************************
 *     kaweth_usb_receive
 ****************************************************************/
static void kaweth_usb_receive(struct urb *urb)
{
        struct device *dev = &urb->dev->dev;
        struct kaweth_device *kaweth = urb->context;
        struct net_device *net = kaweth->net;
        int status = urb->status;
        unsigned long flags;
        int count = urb->actual_length;
        int count2 = urb->transfer_buffer_length;

        __u16 pkt_len = le16_to_cpup((__le16 *)kaweth->rx_buf);

        struct sk_buff *skb;

        if (unlikely(status == -EPIPE)) {
                net->stats.rx_errors++;
                kaweth->end = 1;
                wake_up(&kaweth->term_wait);
                dev_dbg(dev, "Status was -EPIPE.\n");
                return;
        }
        if (unlikely(status == -ECONNRESET || status == -ESHUTDOWN)) {
                /* we are killed - set a flag and wake the disconnect handler */
                kaweth->end = 1;
                wake_up(&kaweth->term_wait);
                dev_dbg(dev, "Status was -ECONNRESET or -ESHUTDOWN.\n");
                return;
        }
        if (unlikely(status == -EPROTO || status == -ETIME ||
                     status == -EILSEQ)) {
                net->stats.rx_errors++;
                dev_dbg(dev, "Status was -EPROTO, -ETIME, or -EILSEQ.\n");
                return;
        }
        if (unlikely(status == -EOVERFLOW)) {
                net->stats.rx_errors++;
                dev_dbg(dev, "Status was -EOVERFLOW.\n");
        }
        spin_lock_irqsave(&kaweth->device_lock, flags);
        if (IS_BLOCKED(kaweth->status)) {
                spin_unlock_irqrestore(&kaweth->device_lock, flags);
                return;
        }
        spin_unlock_irqrestore(&kaweth->device_lock, flags);

        if(status && status != -EREMOTEIO && count != 1) {
                dev_err(&kaweth->intf->dev,
                        "%s RX status: %d count: %d packet_len: %d\n",
                        net->name, status, count, (int)pkt_len);
                kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC);
                return;
        }

        if(kaweth->net && (count > 2)) {
                if(pkt_len > (count - 2)) {
                        dev_err(&kaweth->intf->dev,
                                "Packet length too long for USB frame (pkt_len: %x, count: %x)\n",
                                pkt_len, count);
                        dev_err(&kaweth->intf->dev, "Packet len & 2047: %x\n",
                                pkt_len & 2047);
                        dev_err(&kaweth->intf->dev, "Count 2: %x\n", count2);
                        kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC);
                        return;
                }

                if(!(skb = dev_alloc_skb(pkt_len+2))) {
                        kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC);
                        return;
                }

                skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */

                skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len);

                skb_put(skb, pkt_len);

                skb->protocol = eth_type_trans(skb, net);

                netif_rx(skb);

                net->stats.rx_packets++;
                net->stats.rx_bytes += pkt_len;
        }

        kaweth_resubmit_rx_urb(kaweth, GFP_ATOMIC);
}

/****************************************************************
 *     kaweth_open
 ****************************************************************/
static int kaweth_open(struct net_device *net)
{
        struct kaweth_device *kaweth = netdev_priv(net);
        int res;

        res = usb_autopm_get_interface(kaweth->intf);
        if (res) {
                dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n");
                return -EIO;
        }
        res = kaweth_resubmit_rx_urb(kaweth, GFP_KERNEL);
        if (res)
                goto err_out;

        usb_fill_int_urb(
                kaweth->irq_urb,
                kaweth->dev,
                usb_rcvintpipe(kaweth->dev, 3),
                kaweth->intbuffer,
                INTBUFFERSIZE,
                int_callback,
                kaweth,
                250); /* overriding the descriptor */
        kaweth->irq_urb->transfer_dma = kaweth->intbufferhandle;
        kaweth->irq_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;

        res = usb_submit_urb(kaweth->irq_urb, GFP_KERNEL);
        if (res) {
                usb_kill_urb(kaweth->rx_urb);
                goto err_out;
        }
        kaweth->opened = 1;

        netif_start_queue(net);

        kaweth_async_set_rx_mode(kaweth, true);
        return 0;

err_out:
        usb_autopm_put_interface(kaweth->intf);
        return -EIO;
}

/****************************************************************
 *     kaweth_kill_urbs
 ****************************************************************/
static void kaweth_kill_urbs(struct kaweth_device *kaweth)
{
        usb_kill_urb(kaweth->irq_urb);
        usb_kill_urb(kaweth->rx_urb);
        usb_kill_urb(kaweth->tx_urb);

        cancel_delayed_work_sync(&kaweth->lowmem_work);

        /* a scheduled work may have resubmitted,
           we hit them again */
        usb_kill_urb(kaweth->irq_urb);
        usb_kill_urb(kaweth->rx_urb);
}

/****************************************************************
 *     kaweth_close
 ****************************************************************/
static int kaweth_close(struct net_device *net)
{
        struct kaweth_device *kaweth = netdev_priv(net);

        netif_stop_queue(net);
        kaweth->opened = 0;

        kaweth->status |= KAWETH_STATUS_CLOSING;

        kaweth_kill_urbs(kaweth);

        kaweth->status &= ~KAWETH_STATUS_CLOSING;

        usb_autopm_put_interface(kaweth->intf);

        return 0;
}

static u32 kaweth_get_link(struct net_device *dev)
{
        struct kaweth_device *kaweth = netdev_priv(dev);

        return kaweth->linkstate;
}

static const struct ethtool_ops ops = {
        .get_link        = kaweth_get_link
};

/****************************************************************
 *     kaweth_usb_transmit_complete
 ****************************************************************/
static void kaweth_usb_transmit_complete(struct urb *urb)
{
        struct kaweth_device *kaweth = urb->context;
        struct sk_buff *skb = kaweth->tx_skb;
        int status = urb->status;

        if (unlikely(status != 0))
                if (status != -ENOENT)
                        dev_dbg(&urb->dev->dev, "%s: TX status %d.\n",
                                kaweth->net->name, status);

        netif_wake_queue(kaweth->net);
        dev_kfree_skb_irq(skb);
}

/****************************************************************
 *     kaweth_start_xmit
 ****************************************************************/
static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
                                           struct net_device *net)
{
        struct kaweth_device *kaweth = netdev_priv(net);
        __le16 *private_header;

        int res;

        spin_lock_irq(&kaweth->device_lock);

        kaweth_async_set_rx_mode(kaweth, false);
        netif_stop_queue(net);
        if (IS_BLOCKED(kaweth->status)) {
                goto skip;
        }

        /* We now decide whether we can put our special header into the sk_buff */
        if (skb_cow_head(skb, 2)) {
                net->stats.tx_errors++;
                netif_start_queue(net);
                spin_unlock_irq(&kaweth->device_lock);
                dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
        }

        private_header = __skb_push(skb, 2);
        *private_header = cpu_to_le16(skb->len-2);
        kaweth->tx_skb = skb;

        usb_fill_bulk_urb(kaweth->tx_urb,
                      kaweth->dev,
                      usb_sndbulkpipe(kaweth->dev, 2),
                      private_header,
                      skb->len,
                      kaweth_usb_transmit_complete,
                      kaweth);
        kaweth->end = 0;

        if((res = usb_submit_urb(kaweth->tx_urb, GFP_ATOMIC)))
        {
                dev_warn(&net->dev, "kaweth failed tx_urb %d\n", res);
skip:
                net->stats.tx_errors++;

                netif_start_queue(net);
                dev_kfree_skb_irq(skb);
        }
        else
        {
                net->stats.tx_packets++;
                net->stats.tx_bytes += skb->len;
        }

        spin_unlock_irq(&kaweth->device_lock);

        return NETDEV_TX_OK;
}

/****************************************************************
 *     kaweth_set_rx_mode
 ****************************************************************/
static void kaweth_set_rx_mode(struct net_device *net)
{
        struct kaweth_device *kaweth = netdev_priv(net);

        __u16 packet_filter_bitmap = KAWETH_PACKET_FILTER_DIRECTED |
                                     KAWETH_PACKET_FILTER_BROADCAST |
                                     KAWETH_PACKET_FILTER_MULTICAST;

        netdev_dbg(net, "Setting Rx mode to %d\n", packet_filter_bitmap);

        netif_stop_queue(net);

        if (net->flags & IFF_PROMISC) {
                packet_filter_bitmap |= KAWETH_PACKET_FILTER_PROMISCUOUS;
        }
        else if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) {
                packet_filter_bitmap |= KAWETH_PACKET_FILTER_ALL_MULTICAST;
        }

        kaweth->packet_filter_bitmap = packet_filter_bitmap;
        netif_wake_queue(net);
}

/****************************************************************
 *     kaweth_async_set_rx_mode
 ****************************************************************/
static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth,
                                     bool may_sleep)
{
        int ret;
        __u16 packet_filter_bitmap = kaweth->packet_filter_bitmap;

        kaweth->packet_filter_bitmap = 0;
        if (packet_filter_bitmap == 0)
                return;

        if (!may_sleep)
                return;

        ret = usb_control_msg(kaweth->dev, usb_sndctrlpipe(kaweth->dev, 0),
                              KAWETH_COMMAND_SET_PACKET_FILTER,
                              USB_TYPE_VENDOR | USB_DIR_OUT | USB_RECIP_DEVICE,
                              packet_filter_bitmap, 0,
                              &kaweth->scratch, 0,
                              KAWETH_CONTROL_TIMEOUT);
        if (ret < 0)
                dev_err(&kaweth->intf->dev, "Failed to set Rx mode: %d\n",
                        ret);
        else
                netdev_dbg(kaweth->net, "Set Rx mode to %d\n",
                           packet_filter_bitmap);
}

/****************************************************************
 *     kaweth_tx_timeout
 ****************************************************************/
static void kaweth_tx_timeout(struct net_device *net, unsigned int txqueue)
{
        struct kaweth_device *kaweth = netdev_priv(net);

        dev_warn(&net->dev, "%s: Tx timed out. Resetting.\n", net->name);
        net->stats.tx_errors++;
        netif_trans_update(net);

        usb_unlink_urb(kaweth->tx_urb);
}

/****************************************************************
 *     kaweth_suspend
 ****************************************************************/
static int kaweth_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        unsigned long flags;

        spin_lock_irqsave(&kaweth->device_lock, flags);
        kaweth->status |= KAWETH_STATUS_SUSPENDING;
        spin_unlock_irqrestore(&kaweth->device_lock, flags);

        kaweth_kill_urbs(kaweth);
        return 0;
}

/****************************************************************
 *     kaweth_resume
 ****************************************************************/
static int kaweth_resume(struct usb_interface *intf)
{
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        unsigned long flags;

        spin_lock_irqsave(&kaweth->device_lock, flags);
        kaweth->status &= ~KAWETH_STATUS_SUSPENDING;
        spin_unlock_irqrestore(&kaweth->device_lock, flags);

        if (!kaweth->opened)
                return 0;
        kaweth_resubmit_rx_urb(kaweth, GFP_NOIO);
        kaweth_resubmit_int_urb(kaweth, GFP_NOIO);

        return 0;
}

/****************************************************************
 *     kaweth_probe
 ****************************************************************/


static const struct net_device_ops kaweth_netdev_ops = {
        .ndo_open =                        kaweth_open,
        .ndo_stop =                        kaweth_close,
        .ndo_start_xmit =                kaweth_start_xmit,
        .ndo_tx_timeout =                kaweth_tx_timeout,
        .ndo_set_rx_mode =                kaweth_set_rx_mode,
        .ndo_set_mac_address =                eth_mac_addr,
        .ndo_validate_addr =                eth_validate_addr,
};

static int kaweth_probe(
                struct usb_interface *intf,
                const struct usb_device_id *id      /* from id_table */
        )
{
        struct device *dev = &intf->dev;
        struct usb_device *udev = interface_to_usbdev(intf);
        struct kaweth_device *kaweth;
        struct net_device *netdev;
        const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
        int result = 0;
        int rv = -EIO;

        dev_dbg(dev,
                "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
                udev->devnum, le16_to_cpu(udev->descriptor.idVendor),
                le16_to_cpu(udev->descriptor.idProduct),
                le16_to_cpu(udev->descriptor.bcdDevice));

        dev_dbg(dev, "Device at %p\n", udev);

        dev_dbg(dev, "Descriptor length: %x type: %x\n",
                (int)udev->descriptor.bLength,
                (int)udev->descriptor.bDescriptorType);

        netdev = alloc_etherdev(sizeof(*kaweth));
        if (!netdev)
                return -ENOMEM;

        kaweth = netdev_priv(netdev);
        kaweth->dev = udev;
        kaweth->net = netdev;
        kaweth->intf = intf;

        spin_lock_init(&kaweth->device_lock);
        init_waitqueue_head(&kaweth->term_wait);

        dev_dbg(dev, "Resetting.\n");

        kaweth_reset(kaweth);

        /*
         * If high byte of bcdDevice is nonzero, firmware is already
         * downloaded. Don't try to do it again, or we'll hang the device.
         */

        if (le16_to_cpu(udev->descriptor.bcdDevice) >> 8) {
                dev_info(dev, "Firmware present in device.\n");
        } else {
                /* Download the firmware */
                dev_info(dev, "Downloading firmware...\n");
                kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL);
                if (!kaweth->firmware_buf) {
                        rv = -ENOMEM;
                        goto err_free_netdev;
                }
                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/new_code.bin",
                                                      100,
                                                      2)) < 0) {
                        dev_err(dev, "Error downloading firmware (%d)\n",
                                result);
                        goto err_fw;
                }

                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/new_code_fix.bin",
                                                      100,
                                                      3)) < 0) {
                        dev_err(dev, "Error downloading firmware fix (%d)\n",
                                result);
                        goto err_fw;
                }

                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/trigger_code.bin",
                                                      126,
                                                      2)) < 0) {
                        dev_err(dev, "Error downloading trigger code (%d)\n",
                                result);
                        goto err_fw;

                }

                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/trigger_code_fix.bin",
                                                      126,
                                                      3)) < 0) {
                        dev_err(dev, "Error downloading trigger code fix (%d)\n", result);
                        goto err_fw;
                }


                if ((result = kaweth_trigger_firmware(kaweth, 126)) < 0) {
                        dev_err(dev, "Error triggering firmware (%d)\n", result);
                        goto err_fw;
                }

                /* Device will now disappear for a moment...  */
                dev_info(dev, "Firmware loaded.  I'll be back...\n");
err_fw:
                free_page((unsigned long)kaweth->firmware_buf);
                free_netdev(netdev);
                return -EIO;
        }

        result = kaweth_read_configuration(kaweth);

        if(result < 0) {
                dev_err(dev, "Error reading configuration (%d), no net device created\n", result);
                goto err_free_netdev;
        }

        dev_info(dev, "Statistics collection: %x\n", kaweth->configuration.statistics_mask);
        dev_info(dev, "Multicast filter limit: %x\n", kaweth->configuration.max_multicast_filters & ((1 << 15) - 1));
        dev_info(dev, "MTU: %d\n", le16_to_cpu(kaweth->configuration.segment_size));
        dev_info(dev, "Read MAC address %pM\n", kaweth->configuration.hw_addr);

        if(!memcmp(&kaweth->configuration.hw_addr,
                   &bcast_addr,
                   sizeof(bcast_addr))) {
                dev_err(dev, "Firmware not functioning properly, no net device created\n");
                goto err_free_netdev;
        }

        if(kaweth_set_urb_size(kaweth, KAWETH_BUF_SIZE) < 0) {
                dev_dbg(dev, "Error setting URB size\n");
                goto err_free_netdev;
        }

        if(kaweth_set_sofs_wait(kaweth, KAWETH_SOFS_TO_WAIT) < 0) {
                dev_err(dev, "Error setting SOFS wait\n");
                goto err_free_netdev;
        }

        result = kaweth_set_receive_filter(kaweth,
                                           KAWETH_PACKET_FILTER_DIRECTED |
                                           KAWETH_PACKET_FILTER_BROADCAST |
                                           KAWETH_PACKET_FILTER_MULTICAST);

        if(result < 0) {
                dev_err(dev, "Error setting receive filter\n");
                goto err_free_netdev;
        }

        dev_dbg(dev, "Initializing net device.\n");

        kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!kaweth->tx_urb)
                goto err_free_netdev;
        kaweth->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!kaweth->rx_urb)
                goto err_only_tx;
        kaweth->irq_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!kaweth->irq_urb)
                goto err_tx_and_rx;

        kaweth->intbuffer = usb_alloc_coherent(        kaweth->dev,
                                                INTBUFFERSIZE,
                                                GFP_KERNEL,
                                                &kaweth->intbufferhandle);
        if (!kaweth->intbuffer)
                goto err_tx_and_rx_and_irq;
        kaweth->rx_buf = usb_alloc_coherent(        kaweth->dev,
                                                KAWETH_BUF_SIZE,
                                                GFP_KERNEL,
                                                &kaweth->rxbufferhandle);
        if (!kaweth->rx_buf)
                goto err_all_but_rxbuf;

        memcpy(netdev->broadcast, &bcast_addr, sizeof(bcast_addr));
        eth_hw_addr_set(netdev, (u8 *)&kaweth->configuration.hw_addr);

        netdev->netdev_ops = &kaweth_netdev_ops;
        netdev->watchdog_timeo = KAWETH_TX_TIMEOUT;
        netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size);
        netdev->ethtool_ops = &ops;

        /* kaweth is zeroed as part of alloc_netdev */
        INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl);
        usb_set_intfdata(intf, kaweth);

        SET_NETDEV_DEV(netdev, dev);
        if (register_netdev(netdev) != 0) {
                dev_err(dev, "Error registering netdev.\n");
                goto err_intfdata;
        }

        dev_info(dev, "kaweth interface created at %s\n",
                 kaweth->net->name);

        return 0;

err_intfdata:
        usb_set_intfdata(intf, NULL);
        usb_free_coherent(kaweth->dev, KAWETH_BUF_SIZE, (void *)kaweth->rx_buf, kaweth->rxbufferhandle);
err_all_but_rxbuf:
        usb_free_coherent(kaweth->dev, INTBUFFERSIZE, (void *)kaweth->intbuffer, kaweth->intbufferhandle);
err_tx_and_rx_and_irq:
        usb_free_urb(kaweth->irq_urb);
err_tx_and_rx:
        usb_free_urb(kaweth->rx_urb);
err_only_tx:
        usb_free_urb(kaweth->tx_urb);
err_free_netdev:
        free_netdev(netdev);

        return rv;
}

/****************************************************************
 *     kaweth_disconnect
 ****************************************************************/
static void kaweth_disconnect(struct usb_interface *intf)
{
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        struct net_device *netdev;

        usb_set_intfdata(intf, NULL);
        if (!kaweth) {
                dev_warn(&intf->dev, "unregistering non-existent device\n");
                return;
        }
        netdev = kaweth->net;

        netdev_dbg(kaweth->net, "Unregistering net device\n");
        unregister_netdev(netdev);

        usb_free_urb(kaweth->rx_urb);
        usb_free_urb(kaweth->tx_urb);
        usb_free_urb(kaweth->irq_urb);

        usb_free_coherent(kaweth->dev, KAWETH_BUF_SIZE, (void *)kaweth->rx_buf, kaweth->rxbufferhandle);
        usb_free_coherent(kaweth->dev, INTBUFFERSIZE, (void *)kaweth->intbuffer, kaweth->intbufferhandle);

        free_netdev(netdev);
}


module_usb_driver(kaweth_driver);

























































































































































































   13 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// SPDX-License-Identifier: GPL-2.0-only
/* -*- linux-c -*-
 * sysctl_net.c: sysctl interface to net subsystem.
 *
 * Begun April 1, 1996, Mike Shaver.
 * Added /proc/sys/net directories for each protocol family. [MS]
 *
 * Revision 1.2  1996/05/08  20:24:40  shaver
 * Added bits for NET_BRIDGE and the NET_IPV4_ARP stuff and
 * NET_IPV4_IP_FORWARD.
 *
 *
 */

#include <linux/mm.h>
#include <linux/export.h>
#include <linux/sysctl.h>
#include <linux/nsproxy.h>

#include <net/sock.h>

#ifdef CONFIG_INET
#include <net/ip.h>
#endif

#ifdef CONFIG_NET
#include <linux/if_ether.h>
#endif

static struct ctl_table_set *
net_ctl_header_lookup(struct ctl_table_root *root)
{
        return &current->nsproxy->net_ns->sysctls;
}

static int is_seen(struct ctl_table_set *set)
{
        return &current->nsproxy->net_ns->sysctls == set;
}

/* Return standard mode bits for table entry. */
static int net_ctl_permissions(struct ctl_table_header *head,
                               struct ctl_table *table)
{
        struct net *net = container_of(head->set, struct net, sysctls);

        /* Allow network administrator to have same access as root. */
        if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) {
                int mode = (table->mode >> 6) & 7;
                return (mode << 6) | (mode << 3) | mode;
        }

        return table->mode;
}

static void net_ctl_set_ownership(struct ctl_table_header *head,
                                  struct ctl_table *table,
                                  kuid_t *uid, kgid_t *gid)
{
        struct net *net = container_of(head->set, struct net, sysctls);
        kuid_t ns_root_uid;
        kgid_t ns_root_gid;

        ns_root_uid = make_kuid(net->user_ns, 0);
        if (uid_valid(ns_root_uid))
                *uid = ns_root_uid;

        ns_root_gid = make_kgid(net->user_ns, 0);
        if (gid_valid(ns_root_gid))
                *gid = ns_root_gid;
}

static struct ctl_table_root net_sysctl_root = {
        .lookup = net_ctl_header_lookup,
        .permissions = net_ctl_permissions,
        .set_ownership = net_ctl_set_ownership,
};

static int __net_init sysctl_net_init(struct net *net)
{
        setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen);
        return 0;
}

static void __net_exit sysctl_net_exit(struct net *net)
{
        retire_sysctl_set(&net->sysctls);
}

static struct pernet_operations sysctl_pernet_ops = {
        .init = sysctl_net_init,
        .exit = sysctl_net_exit,
};

static struct ctl_table_header *net_header;
__init int net_sysctl_init(void)
{
        static struct ctl_table empty[1];
        int ret = -ENOMEM;
        /* Avoid limitations in the sysctl implementation by
         * registering "/proc/sys/net" as an empty directory not in a
         * network namespace.
         */
        net_header = register_sysctl_sz("net", empty, 0);
        if (!net_header)
                goto out;
        ret = register_pernet_subsys(&sysctl_pernet_ops);
        if (ret)
                goto out1;
out:
        return ret;
out1:
        unregister_sysctl_table(net_header);
        net_header = NULL;
        goto out;
}

/* Verify that sysctls for non-init netns are safe by either:
 * 1) being read-only, or
 * 2) having a data pointer which points outside of the global kernel/module
 *    data segment, and rather into the heap where a per-net object was
 *    allocated.
 */
static void ensure_safe_net_sysctl(struct net *net, const char *path,
                                   struct ctl_table *table, size_t table_size)
{
        struct ctl_table *ent;

        pr_debug("Registering net sysctl (net %p): %s\n", net, path);
        ent = table;
        for (size_t i = 0; i < table_size && ent->procname; ent++, i++) {
                unsigned long addr;
                const char *where;

                pr_debug("  procname=%s mode=%o proc_handler=%ps data=%p\n",
                         ent->procname, ent->mode, ent->proc_handler, ent->data);

                /* If it's not writable inside the netns, then it can't hurt. */
                if ((ent->mode & 0222) == 0) {
                        pr_debug("    Not writable by anyone\n");
                        continue;
                }

                /* Where does data point? */
                addr = (unsigned long)ent->data;
                if (is_module_address(addr))
                        where = "module";
                else if (is_kernel_core_data(addr))
                        where = "kernel";
                else
                        continue;

                /* If it is writable and points to kernel/module global
                 * data, then it's probably a netns leak.
                 */
                WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n",
                     path, ent->procname, where, ent->data);

                /* Make it "safe" by dropping writable perms */
                ent->mode &= ~0222;
        }
}

struct ctl_table_header *register_net_sysctl_sz(struct net *net,
                                                const char *path,
                                                struct ctl_table *table,
                                                size_t table_size)
{
        int count;
        struct ctl_table *entry;

        if (!net_eq(net, &init_net))
                ensure_safe_net_sysctl(net, path, table, table_size);

        entry = table;
        for (count = 0 ; count < table_size && entry->procname; entry++, count++)
                ;

        return __register_sysctl_table(&net->sysctls, path, table, count);
}
EXPORT_SYMBOL_GPL(register_net_sysctl_sz);

void unregister_net_sysctl_table(struct ctl_table_header *header)
{
        unregister_sysctl_table(header);
}
EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);








































   18 
   28 

   28 




   28 

   28 


   28 




   24 















   28 
   28 





   22 
   28 
   28 














































































   28 





   28 
   28 


   28 




   28 
















































































































































































































































































































































































































































































































   28 

   28 

   28 









    4 







    4 

    4 
    4 
    4 



    4 

    4 


    4 





    4 




    4 






   24 







   24 

   24 
   24 



   24 



   24 

   24 



   24 

   18 





   18 



   24 

   18 
   24 




   17 




   24 











   24 

   24 
   24 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
// SPDX-License-Identifier: GPL-2.0-only
/*
 * proc/fs/generic.c --- generic routines for the proc-fs
 *
 * This file contains generic proc-fs routines for handling
 * directories and files.
 * 
 * Copyright (C) 1991, 1992 Linus Torvalds.
 * Copyright (C) 1997 Theodore Ts'o
 */

#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/printk.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/idr.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/uaccess.h>
#include <linux/seq_file.h>

#include "internal.h"

static DEFINE_RWLOCK(proc_subdir_lock);

struct kmem_cache *proc_dir_entry_cache __ro_after_init;

void pde_free(struct proc_dir_entry *pde)
{
        if (S_ISLNK(pde->mode))
                kfree(pde->data);
        if (pde->name != pde->inline_name)
                kfree(pde->name);
        kmem_cache_free(proc_dir_entry_cache, pde);
}

static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
{
        if (len < de->namelen)
                return -1;
        if (len > de->namelen)
                return 1;

        return memcmp(name, de->name, len);
}

static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
{
        return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
                             subdir_node);
}

static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
{
        return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry,
                             subdir_node);
}

static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
                                              const char *name,
                                              unsigned int len)
{
        struct rb_node *node = dir->subdir.rb_node;

        while (node) {
                struct proc_dir_entry *de = rb_entry(node,
                                                     struct proc_dir_entry,
                                                     subdir_node);
                int result = proc_match(name, de, len);

                if (result < 0)
                        node = node->rb_left;
                else if (result > 0)
                        node = node->rb_right;
                else
                        return de;
        }
        return NULL;
}

static bool pde_subdir_insert(struct proc_dir_entry *dir,
                              struct proc_dir_entry *de)
{
        struct rb_root *root = &dir->subdir;
        struct rb_node **new = &root->rb_node, *parent = NULL;

        /* Figure out where to put new node */
        while (*new) {
                struct proc_dir_entry *this = rb_entry(*new,
                                                       struct proc_dir_entry,
                                                       subdir_node);
                int result = proc_match(de->name, this, de->namelen);

                parent = *new;
                if (result < 0)
                        new = &(*new)->rb_left;
                else if (result > 0)
                        new = &(*new)->rb_right;
                else
                        return false;
        }

        /* Add new node and rebalance tree. */
        rb_link_node(&de->subdir_node, parent, new);
        rb_insert_color(&de->subdir_node, root);
        return true;
}

static int proc_notify_change(struct mnt_idmap *idmap,
                              struct dentry *dentry, struct iattr *iattr)
{
        struct inode *inode = d_inode(dentry);
        struct proc_dir_entry *de = PDE(inode);
        int error;

        error = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
        if (error)
                return error;

        setattr_copy(&nop_mnt_idmap, inode, iattr);

        proc_set_user(de, inode->i_uid, inode->i_gid);
        de->mode = inode->i_mode;
        return 0;
}

static int proc_getattr(struct mnt_idmap *idmap,
                        const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int query_flags)
{
        struct inode *inode = d_inode(path->dentry);
        struct proc_dir_entry *de = PDE(inode);
        if (de) {
                nlink_t nlink = READ_ONCE(de->nlink);
                if (nlink > 0) {
                        set_nlink(inode, nlink);
                }
        }

        generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
}

static const struct inode_operations proc_file_inode_operations = {
        .setattr        = proc_notify_change,
};

/*
 * This function parses a name such as "tty/driver/serial", and
 * returns the struct proc_dir_entry for "/proc/tty/driver", and
 * returns "serial" in residual.
 */
static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
                             const char **residual)
{
        const char                     *cp = name, *next;
        struct proc_dir_entry        *de;

        de = *ret ?: &proc_root;
        while ((next = strchr(cp, '/')) != NULL) {
                de = pde_subdir_find(de, cp, next - cp);
                if (!de) {
                        WARN(1, "name '%s'\n", name);
                        return -ENOENT;
                }
                cp = next + 1;
        }
        *residual = cp;
        *ret = de;
        return 0;
}

static int xlate_proc_name(const char *name, struct proc_dir_entry **ret,
                           const char **residual)
{
        int rv;

        read_lock(&proc_subdir_lock);
        rv = __xlate_proc_name(name, ret, residual);
        read_unlock(&proc_subdir_lock);
        return rv;
}

static DEFINE_IDA(proc_inum_ida);

#define PROC_DYNAMIC_FIRST 0xF0000000U

/*
 * Return an inode number between PROC_DYNAMIC_FIRST and
 * 0xffffffff, or zero on failure.
 */
int proc_alloc_inum(unsigned int *inum)
{
        int i;

        i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1,
                           GFP_KERNEL);
        if (i < 0)
                return i;

        *inum = PROC_DYNAMIC_FIRST + (unsigned int)i;
        return 0;
}

void proc_free_inum(unsigned int inum)
{
        ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
}

static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags)
{
        if (flags & LOOKUP_RCU)
                return -ECHILD;

        if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0)
                return 0; /* revalidate */
        return 1;
}

static int proc_misc_d_delete(const struct dentry *dentry)
{
        return atomic_read(&PDE(d_inode(dentry))->in_use) < 0;
}

static const struct dentry_operations proc_misc_dentry_ops = {
        .d_revalidate        = proc_misc_d_revalidate,
        .d_delete        = proc_misc_d_delete,
};

/*
 * Don't create negative dentries here, return -ENOENT by hand
 * instead.
 */
struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
                              struct proc_dir_entry *de)
{
        struct inode *inode;

        read_lock(&proc_subdir_lock);
        de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
        if (de) {
                pde_get(de);
                read_unlock(&proc_subdir_lock);
                inode = proc_get_inode(dir->i_sb, de);
                if (!inode)
                        return ERR_PTR(-ENOMEM);
                d_set_d_op(dentry, de->proc_dops);
                return d_splice_alias(inode, dentry);
        }
        read_unlock(&proc_subdir_lock);
        return ERR_PTR(-ENOENT);
}

struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
                unsigned int flags)
{
        struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);

        if (fs_info->pidonly == PROC_PIDONLY_ON)
                return ERR_PTR(-ENOENT);

        return proc_lookup_de(dir, dentry, PDE(dir));
}

/*
 * This returns non-zero if at EOF, so that the /proc
 * root directory can use this and check if it should
 * continue with the <pid> entries..
 *
 * Note that the VFS-layer doesn't care about the return
 * value of the readdir() call, as long as it's non-negative
 * for success..
 */
int proc_readdir_de(struct file *file, struct dir_context *ctx,
                    struct proc_dir_entry *de)
{
        int i;

        if (!dir_emit_dots(file, ctx))
                return 0;

        i = ctx->pos - 2;
        read_lock(&proc_subdir_lock);
        de = pde_subdir_first(de);
        for (;;) {
                if (!de) {
                        read_unlock(&proc_subdir_lock);
                        return 0;
                }
                if (!i)
                        break;
                de = pde_subdir_next(de);
                i--;
        }

        do {
                struct proc_dir_entry *next;
                pde_get(de);
                read_unlock(&proc_subdir_lock);
                if (!dir_emit(ctx, de->name, de->namelen,
                            de->low_ino, de->mode >> 12)) {
                        pde_put(de);
                        return 0;
                }
                ctx->pos++;
                read_lock(&proc_subdir_lock);
                next = pde_subdir_next(de);
                pde_put(de);
                de = next;
        } while (de);
        read_unlock(&proc_subdir_lock);
        return 1;
}

int proc_readdir(struct file *file, struct dir_context *ctx)
{
        struct inode *inode = file_inode(file);
        struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);

        if (fs_info->pidonly == PROC_PIDONLY_ON)
                return 1;

        return proc_readdir_de(file, ctx, PDE(inode));
}

/*
 * These are the generic /proc directory operations. They
 * use the in-memory "struct proc_dir_entry" tree to parse
 * the /proc directory.
 */
static const struct file_operations proc_dir_operations = {
        .llseek                        = generic_file_llseek,
        .read                        = generic_read_dir,
        .iterate_shared                = proc_readdir,
};

static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
{
        return 0;
}

const struct dentry_operations proc_net_dentry_ops = {
        .d_revalidate        = proc_net_d_revalidate,
        .d_delete        = always_delete_dentry,
};

/*
 * proc directories can do almost nothing..
 */
static const struct inode_operations proc_dir_inode_operations = {
        .lookup                = proc_lookup,
        .getattr        = proc_getattr,
        .setattr        = proc_notify_change,
};

/* returns the registered entry, or frees dp and returns NULL on failure */
struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
                struct proc_dir_entry *dp)
{
        if (proc_alloc_inum(&dp->low_ino))
                goto out_free_entry;

        write_lock(&proc_subdir_lock);
        dp->parent = dir;
        if (pde_subdir_insert(dir, dp) == false) {
                WARN(1, "proc_dir_entry '%s/%s' already registered\n",
                     dir->name, dp->name);
                write_unlock(&proc_subdir_lock);
                goto out_free_inum;
        }
        dir->nlink++;
        write_unlock(&proc_subdir_lock);

        return dp;
out_free_inum:
        proc_free_inum(dp->low_ino);
out_free_entry:
        pde_free(dp);
        return NULL;
}

static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
                                          const char *name,
                                          umode_t mode,
                                          nlink_t nlink)
{
        struct proc_dir_entry *ent = NULL;
        const char *fn;
        struct qstr qstr;

        if (xlate_proc_name(name, parent, &fn) != 0)
                goto out;
        qstr.name = fn;
        qstr.len = strlen(fn);
        if (qstr.len == 0 || qstr.len >= 256) {
                WARN(1, "name len %u\n", qstr.len);
                return NULL;
        }
        if (qstr.len == 1 && fn[0] == '.') {
                WARN(1, "name '.'\n");
                return NULL;
        }
        if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
                WARN(1, "name '..'\n");
                return NULL;
        }
        if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
                WARN(1, "create '/proc/%s' by hand\n", qstr.name);
                return NULL;
        }
        if (is_empty_pde(*parent)) {
                WARN(1, "attempt to add to permanently empty directory");
                return NULL;
        }

        ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
        if (!ent)
                goto out;

        if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) {
                ent->name = ent->inline_name;
        } else {
                ent->name = kmalloc(qstr.len + 1, GFP_KERNEL);
                if (!ent->name) {
                        pde_free(ent);
                        return NULL;
                }
        }

        memcpy(ent->name, fn, qstr.len + 1);
        ent->namelen = qstr.len;
        ent->mode = mode;
        ent->nlink = nlink;
        ent->subdir = RB_ROOT;
        refcount_set(&ent->refcnt, 1);
        spin_lock_init(&ent->pde_unload_lock);
        INIT_LIST_HEAD(&ent->pde_openers);
        proc_set_user(ent, (*parent)->uid, (*parent)->gid);

        ent->proc_dops = &proc_misc_dentry_ops;
        /* Revalidate everything under /proc/${pid}/net */
        if ((*parent)->proc_dops == &proc_net_dentry_ops)
                pde_force_lookup(ent);

out:
        return ent;
}

struct proc_dir_entry *proc_symlink(const char *name,
                struct proc_dir_entry *parent, const char *dest)
{
        struct proc_dir_entry *ent;

        ent = __proc_create(&parent, name,
                          (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);

        if (ent) {
                ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL);
                if (ent->data) {
                        strcpy((char*)ent->data,dest);
                        ent->proc_iops = &proc_link_inode_operations;
                        ent = proc_register(parent, ent);
                } else {
                        pde_free(ent);
                        ent = NULL;
                }
        }
        return ent;
}
EXPORT_SYMBOL(proc_symlink);

struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode,
                struct proc_dir_entry *parent, void *data, bool force_lookup)
{
        struct proc_dir_entry *ent;

        if (mode == 0)
                mode = S_IRUGO | S_IXUGO;

        ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
        if (ent) {
                ent->data = data;
                ent->proc_dir_ops = &proc_dir_operations;
                ent->proc_iops = &proc_dir_inode_operations;
                if (force_lookup) {
                        pde_force_lookup(ent);
                }
                ent = proc_register(parent, ent);
        }
        return ent;
}
EXPORT_SYMBOL_GPL(_proc_mkdir);

struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
                struct proc_dir_entry *parent, void *data)
{
        return _proc_mkdir(name, mode, parent, data, false);
}
EXPORT_SYMBOL_GPL(proc_mkdir_data);

struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
                                       struct proc_dir_entry *parent)
{
        return proc_mkdir_data(name, mode, parent, NULL);
}
EXPORT_SYMBOL(proc_mkdir_mode);

struct proc_dir_entry *proc_mkdir(const char *name,
                struct proc_dir_entry *parent)
{
        return proc_mkdir_data(name, 0, parent, NULL);
}
EXPORT_SYMBOL(proc_mkdir);

struct proc_dir_entry *proc_create_mount_point(const char *name)
{
        umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO;
        struct proc_dir_entry *ent, *parent = NULL;

        ent = __proc_create(&parent, name, mode, 2);
        if (ent) {
                ent->data = NULL;
                ent->proc_dir_ops = NULL;
                ent->proc_iops = NULL;
                ent = proc_register(parent, ent);
        }
        return ent;
}
EXPORT_SYMBOL(proc_create_mount_point);

struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
                struct proc_dir_entry **parent, void *data)
{
        struct proc_dir_entry *p;

        if ((mode & S_IFMT) == 0)
                mode |= S_IFREG;
        if ((mode & S_IALLUGO) == 0)
                mode |= S_IRUGO;
        if (WARN_ON_ONCE(!S_ISREG(mode)))
                return NULL;

        p = __proc_create(parent, name, mode, 1);
        if (p) {
                p->proc_iops = &proc_file_inode_operations;
                p->data = data;
        }
        return p;
}

static inline void pde_set_flags(struct proc_dir_entry *pde)
{
        if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
                pde->flags |= PROC_ENTRY_PERMANENT;
}

struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
                struct proc_dir_entry *parent,
                const struct proc_ops *proc_ops, void *data)
{
        struct proc_dir_entry *p;

        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
        p->proc_ops = proc_ops;
        pde_set_flags(p);
        return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
 
struct proc_dir_entry *proc_create(const char *name, umode_t mode,
                                   struct proc_dir_entry *parent,
                                   const struct proc_ops *proc_ops)
{
        return proc_create_data(name, mode, parent, proc_ops, NULL);
}
EXPORT_SYMBOL(proc_create);

static int proc_seq_open(struct inode *inode, struct file *file)
{
        struct proc_dir_entry *de = PDE(inode);

        if (de->state_size)
                return seq_open_private(file, de->seq_ops, de->state_size);
        return seq_open(file, de->seq_ops);
}

static int proc_seq_release(struct inode *inode, struct file *file)
{
        struct proc_dir_entry *de = PDE(inode);

        if (de->state_size)
                return seq_release_private(inode, file);
        return seq_release(inode, file);
}

static const struct proc_ops proc_seq_ops = {
        /* not permanent -- can call into arbitrary seq_operations */
        .proc_open        = proc_seq_open,
        .proc_read_iter        = seq_read_iter,
        .proc_lseek        = seq_lseek,
        .proc_release        = proc_seq_release,
};

struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
                struct proc_dir_entry *parent, const struct seq_operations *ops,
                unsigned int state_size, void *data)
{
        struct proc_dir_entry *p;

        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
        p->proc_ops = &proc_seq_ops;
        p->seq_ops = ops;
        p->state_size = state_size;
        return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_seq_private);

static int proc_single_open(struct inode *inode, struct file *file)
{
        struct proc_dir_entry *de = PDE(inode);

        return single_open(file, de->single_show, de->data);
}

static const struct proc_ops proc_single_ops = {
        /* not permanent -- can call into arbitrary ->single_show */
        .proc_open        = proc_single_open,
        .proc_read_iter = seq_read_iter,
        .proc_lseek        = seq_lseek,
        .proc_release        = single_release,
};

struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
                struct proc_dir_entry *parent,
                int (*show)(struct seq_file *, void *), void *data)
{
        struct proc_dir_entry *p;

        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
        p->proc_ops = &proc_single_ops;
        p->single_show = show;
        return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_single_data);

void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
        de->size = size;
}
EXPORT_SYMBOL(proc_set_size);

void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
{
        de->uid = uid;
        de->gid = gid;
}
EXPORT_SYMBOL(proc_set_user);

void pde_put(struct proc_dir_entry *pde)
{
        if (refcount_dec_and_test(&pde->refcnt)) {
                proc_free_inum(pde->low_ino);
                pde_free(pde);
        }
}

/*
 * Remove a /proc entry and free it if it's not currently in use.
 */
void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
{
        struct proc_dir_entry *de = NULL;
        const char *fn = name;
        unsigned int len;

        write_lock(&proc_subdir_lock);
        if (__xlate_proc_name(name, &parent, &fn) != 0) {
                write_unlock(&proc_subdir_lock);
                return;
        }
        len = strlen(fn);

        de = pde_subdir_find(parent, fn, len);
        if (de) {
                if (unlikely(pde_is_permanent(de))) {
                        WARN(1, "removing permanent /proc entry '%s'", de->name);
                        de = NULL;
                } else {
                        rb_erase(&de->subdir_node, &parent->subdir);
                        if (S_ISDIR(de->mode))
                                parent->nlink--;
                }
        }
        write_unlock(&proc_subdir_lock);
        if (!de) {
                WARN(1, "name '%s'\n", name);
                return;
        }

        proc_entry_rundown(de);

        WARN(pde_subdir_first(de),
             "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
             __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
        pde_put(de);
}
EXPORT_SYMBOL(remove_proc_entry);

int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
{
        struct proc_dir_entry *root = NULL, *de, *next;
        const char *fn = name;
        unsigned int len;

        write_lock(&proc_subdir_lock);
        if (__xlate_proc_name(name, &parent, &fn) != 0) {
                write_unlock(&proc_subdir_lock);
                return -ENOENT;
        }
        len = strlen(fn);

        root = pde_subdir_find(parent, fn, len);
        if (!root) {
                write_unlock(&proc_subdir_lock);
                return -ENOENT;
        }
        if (unlikely(pde_is_permanent(root))) {
                write_unlock(&proc_subdir_lock);
                WARN(1, "removing permanent /proc entry '%s/%s'",
                        root->parent->name, root->name);
                return -EINVAL;
        }
        rb_erase(&root->subdir_node, &parent->subdir);

        de = root;
        while (1) {
                next = pde_subdir_first(de);
                if (next) {
                        if (unlikely(pde_is_permanent(next))) {
                                write_unlock(&proc_subdir_lock);
                                WARN(1, "removing permanent /proc entry '%s/%s'",
                                        next->parent->name, next->name);
                                return -EINVAL;
                        }
                        rb_erase(&next->subdir_node, &de->subdir);
                        de = next;
                        continue;
                }
                next = de->parent;
                if (S_ISDIR(de->mode))
                        next->nlink--;
                write_unlock(&proc_subdir_lock);

                proc_entry_rundown(de);
                if (de == root)
                        break;
                pde_put(de);

                write_lock(&proc_subdir_lock);
                de = next;
        }
        pde_put(root);
        return 0;
}
EXPORT_SYMBOL(remove_proc_subtree);

void *proc_get_parent_data(const struct inode *inode)
{
        struct proc_dir_entry *de = PDE(inode);
        return de->parent->data;
}
EXPORT_SYMBOL_GPL(proc_get_parent_data);

void proc_remove(struct proc_dir_entry *de)
{
        if (de)
                remove_proc_subtree(de->name, de->parent);
}
EXPORT_SYMBOL(proc_remove);

/*
 * Pull a user buffer into memory and pass it to the file's write handler if
 * one is supplied.  The ->write() method is permitted to modify the
 * kernel-side buffer.
 */
ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size,
                          loff_t *_pos)
{
        struct proc_dir_entry *pde = PDE(file_inode(f));
        char *buf;
        int ret;

        if (!pde->write)
                return -EACCES;
        if (size == 0 || size > PAGE_SIZE - 1)
                return -EINVAL;
        buf = memdup_user_nul(ubuf, size);
        if (IS_ERR(buf))
                return PTR_ERR(buf);
        ret = pde->write(f, buf, size);
        kfree(buf);
        return ret == 0 ? size : ret;
}
































































































































































































































































































































































































































































































































































































































































































    3 


    3 














    4 




    4 



    3 


    3 
    3 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
// SPDX-License-Identifier: GPL-2.0-only
/*
 * AT and PS/2 keyboard driver
 *
 * Copyright (c) 1999-2002 Vojtech Pavlik
 */


/*
 * This driver can handle standard AT keyboards and PS/2 keyboards in
 * Translated and Raw Set 2 and Set 3, as well as AT keyboards on dumb
 * input-only controllers and AT keyboards connected over a one way RS232
 * converter.
 */

#include <linux/delay.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/input.h>
#include <linux/input/vivaldi-fmap.h>
#include <linux/serio.h>
#include <linux/workqueue.h>
#include <linux/libps2.h>
#include <linux/mutex.h>
#include <linux/dmi.h>
#include <linux/property.h>

#define DRIVER_DESC        "AT and PS/2 keyboard driver"

MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

static int atkbd_set = 2;
module_param_named(set, atkbd_set, int, 0);
MODULE_PARM_DESC(set, "Select keyboard code set (2 = default, 3 = PS/2 native)");

#if defined(__i386__) || defined(__x86_64__) || defined(__hppa__)
static bool atkbd_reset;
#else
static bool atkbd_reset = true;
#endif
module_param_named(reset, atkbd_reset, bool, 0);
MODULE_PARM_DESC(reset, "Reset keyboard during initialization");

static bool atkbd_softrepeat;
module_param_named(softrepeat, atkbd_softrepeat, bool, 0);
MODULE_PARM_DESC(softrepeat, "Use software keyboard repeat");

static bool atkbd_softraw = true;
module_param_named(softraw, atkbd_softraw, bool, 0);
MODULE_PARM_DESC(softraw, "Use software generated rawmode");

static bool atkbd_scroll;
module_param_named(scroll, atkbd_scroll, bool, 0);
MODULE_PARM_DESC(scroll, "Enable scroll-wheel on MS Office and similar keyboards");

static bool atkbd_extra;
module_param_named(extra, atkbd_extra, bool, 0);
MODULE_PARM_DESC(extra, "Enable extra LEDs and keys on IBM RapidAcces, EzKey and similar keyboards");

static bool atkbd_terminal;
module_param_named(terminal, atkbd_terminal, bool, 0);
MODULE_PARM_DESC(terminal, "Enable break codes on an IBM Terminal keyboard connected via AT/PS2");

#define SCANCODE(keymap)        ((keymap >> 16) & 0xFFFF)
#define KEYCODE(keymap)                (keymap & 0xFFFF)

/*
 * Scancode to keycode tables. These are just the default setting, and
 * are loadable via a userland utility.
 */

#define ATKBD_KEYMAP_SIZE        512

static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = {

#ifdef CONFIG_KEYBOARD_ATKBD_HP_KEYCODES

/* XXX: need a more general approach */

#include "hpps2atkbd.h"        /* include the keyboard scancodes */

#else
          0, 67, 65, 63, 61, 59, 60, 88,  0, 68, 66, 64, 62, 15, 41,117,
          0, 56, 42, 93, 29, 16,  2,  0,  0,  0, 44, 31, 30, 17,  3,  0,
          0, 46, 45, 32, 18,  5,  4, 95,  0, 57, 47, 33, 20, 19,  6,183,
          0, 49, 48, 35, 34, 21,  7,184,  0,  0, 50, 36, 22,  8,  9,185,
          0, 51, 37, 23, 24, 11, 10,  0,  0, 52, 53, 38, 39, 25, 12,  0,
          0, 89, 40,  0, 26, 13,  0,  0, 58, 54, 28, 27,  0, 43,  0, 85,
          0, 86, 91, 90, 92,  0, 14, 94,  0, 79,124, 75, 71,121,  0,  0,
         82, 83, 80, 76, 77, 72,  1, 69, 87, 78, 81, 74, 55, 73, 70, 99,

          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        217,100,255,  0, 97,165,  0,  0,156,  0,  0,  0,  0,  0,  0,125,
        173,114,  0,113,  0,  0,  0,126,128,  0,  0,140,  0,  0,  0,127,
        159,  0,115,  0,164,  0,  0,116,158,  0,172,166,  0,  0,  0,142,
        157,  0,  0,  0,  0,  0,  0,  0,155,  0, 98,  0,  0,163,  0,  0,
        226,  0,  0,  0,  0,  0,  0,  0,  0,255, 96,  0,  0,  0,143,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,107,  0,105,102,  0,  0,112,
        110,111,108,112,106,103,  0,119,  0,118,109,  0, 99,104,119,  0,

          0,  0,  0, 65, 99,
#endif
};

static const unsigned short atkbd_set3_keycode[ATKBD_KEYMAP_SIZE] = {

          0,  0,  0,  0,  0,  0,  0, 59,  1,138,128,129,130, 15, 41, 60,
        131, 29, 42, 86, 58, 16,  2, 61,133, 56, 44, 31, 30, 17,  3, 62,
        134, 46, 45, 32, 18,  5,  4, 63,135, 57, 47, 33, 20, 19,  6, 64,
        136, 49, 48, 35, 34, 21,  7, 65,137,100, 50, 36, 22,  8,  9, 66,
        125, 51, 37, 23, 24, 11, 10, 67,126, 52, 53, 38, 39, 25, 12, 68,
        113,114, 40, 43, 26, 13, 87, 99, 97, 54, 28, 27, 43, 43, 88, 70,
        108,105,119,103,111,107, 14,110,  0, 79,106, 75, 71,109,102,104,
         82, 83, 80, 76, 77, 72, 69, 98,  0, 96, 81,  0, 78, 73, 55,183,

        184,185,186,187, 74, 94, 92, 93,  0,  0,  0,125,126,127,112,  0,
          0,139,172,163,165,115,152,172,166,140,160,154,113,114,167,168,
        148,149,147,140
};

static const unsigned short atkbd_unxlate_table[128] = {
          0,118, 22, 30, 38, 37, 46, 54, 61, 62, 70, 69, 78, 85,102, 13,
         21, 29, 36, 45, 44, 53, 60, 67, 68, 77, 84, 91, 90, 20, 28, 27,
         35, 43, 52, 51, 59, 66, 75, 76, 82, 14, 18, 93, 26, 34, 33, 42,
         50, 49, 58, 65, 73, 74, 89,124, 17, 41, 88,  5,  6,  4, 12,  3,
         11,  2, 10,  1,  9,119,126,108,117,125,123,107,115,116,121,105,
        114,122,112,113,127, 96, 97,120,  7, 15, 23, 31, 39, 47, 55, 63,
         71, 79, 86, 94,  8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 87,111,
         19, 25, 57, 81, 83, 92, 95, 98, 99,100,101,103,104,106,109,110
};

#define ATKBD_CMD_SETLEDS        0x10ed
#define ATKBD_CMD_GSCANSET        0x11f0
#define ATKBD_CMD_SSCANSET        0x10f0
#define ATKBD_CMD_GETID                0x02f2
#define ATKBD_CMD_SETREP        0x10f3
#define ATKBD_CMD_ENABLE        0x00f4
#define ATKBD_CMD_RESET_DIS        0x00f5        /* Reset to defaults and disable */
#define ATKBD_CMD_RESET_DEF        0x00f6        /* Reset to defaults */
#define ATKBD_CMD_SETALL_MB        0x00f8        /* Set all keys to give break codes */
#define ATKBD_CMD_SETALL_MBR        0x00fa  /* ... and repeat */
#define ATKBD_CMD_RESET_BAT        0x02ff
#define ATKBD_CMD_RESEND        0x00fe
#define ATKBD_CMD_EX_ENABLE        0x10ea
#define ATKBD_CMD_EX_SETLEDS        0x20eb
#define ATKBD_CMD_OK_GETID        0x02e8

#define ATKBD_RET_ACK                0xfa
#define ATKBD_RET_NAK                0xfe
#define ATKBD_RET_BAT                0xaa
#define ATKBD_RET_EMUL0                0xe0
#define ATKBD_RET_EMUL1                0xe1
#define ATKBD_RET_RELEASE        0xf0
#define ATKBD_RET_HANJA                0xf1
#define ATKBD_RET_HANGEUL        0xf2
#define ATKBD_RET_ERR                0xff

#define ATKBD_KEY_UNKNOWN        0
#define ATKBD_KEY_NULL                255

#define ATKBD_SCR_1                0xfffe
#define ATKBD_SCR_2                0xfffd
#define ATKBD_SCR_4                0xfffc
#define ATKBD_SCR_8                0xfffb
#define ATKBD_SCR_CLICK                0xfffa
#define ATKBD_SCR_LEFT                0xfff9
#define ATKBD_SCR_RIGHT                0xfff8

#define ATKBD_SPECIAL                ATKBD_SCR_RIGHT

#define ATKBD_LED_EVENT_BIT        0
#define ATKBD_REP_EVENT_BIT        1

#define ATKBD_XL_ERR                0x01
#define ATKBD_XL_BAT                0x02
#define ATKBD_XL_ACK                0x04
#define ATKBD_XL_NAK                0x08
#define ATKBD_XL_HANGEUL        0x10
#define ATKBD_XL_HANJA                0x20

static const struct {
        unsigned short keycode;
        unsigned char set2;
} atkbd_scroll_keys[] = {
        { ATKBD_SCR_1,     0xc5 },
        { ATKBD_SCR_2,     0x9d },
        { ATKBD_SCR_4,     0xa4 },
        { ATKBD_SCR_8,     0x9b },
        { ATKBD_SCR_CLICK, 0xe0 },
        { ATKBD_SCR_LEFT,  0xcb },
        { ATKBD_SCR_RIGHT, 0xd2 },
};

/*
 * The atkbd control structure
 */

struct atkbd {

        struct ps2dev ps2dev;
        struct input_dev *dev;

        /* Written only during init */
        char name[64];
        char phys[32];

        unsigned short id;
        unsigned short keycode[ATKBD_KEYMAP_SIZE];
        DECLARE_BITMAP(force_release_mask, ATKBD_KEYMAP_SIZE);
        unsigned char set;
        bool translated;
        bool extra;
        bool write;
        bool softrepeat;
        bool softraw;
        bool scroll;
        bool enabled;

        /* Accessed only from interrupt */
        unsigned char emul;
        bool resend;
        bool release;
        unsigned long xl_bit;
        unsigned int last;
        unsigned long time;
        unsigned long err_count;

        struct delayed_work event_work;
        unsigned long event_jiffies;
        unsigned long event_mask;

        /* Serializes reconnect(), attr->set() and event work */
        struct mutex mutex;

        struct vivaldi_data vdata;
};

/*
 * System-specific keymap fixup routine
 */
static void (*atkbd_platform_fixup)(struct atkbd *, const void *data);
static void *atkbd_platform_fixup_data;
static unsigned int (*atkbd_platform_scancode_fixup)(struct atkbd *, unsigned int);

/*
 * Certain keyboards to not like ATKBD_CMD_RESET_DIS and stop responding
 * to many commands until full reset (ATKBD_CMD_RESET_BAT) is performed.
 */
static bool atkbd_skip_deactivate;

static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
                                ssize_t (*handler)(struct atkbd *, char *));
static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
                                ssize_t (*handler)(struct atkbd *, const char *, size_t));
#define ATKBD_DEFINE_ATTR(_name)                                                \
static ssize_t atkbd_show_##_name(struct atkbd *, char *);                        \
static ssize_t atkbd_set_##_name(struct atkbd *, const char *, size_t);                \
static ssize_t atkbd_do_show_##_name(struct device *d,                                \
                                struct device_attribute *attr, char *b)                \
{                                                                                \
        return atkbd_attr_show_helper(d, b, atkbd_show_##_name);                \
}                                                                                \
static ssize_t atkbd_do_set_##_name(struct device *d,                                \
                        struct device_attribute *attr, const char *b, size_t s)        \
{                                                                                \
        return atkbd_attr_set_helper(d, b, s, atkbd_set_##_name);                \
}                                                                                \
static struct device_attribute atkbd_attr_##_name =                                \
        __ATTR(_name, S_IWUSR | S_IRUGO, atkbd_do_show_##_name, atkbd_do_set_##_name);

ATKBD_DEFINE_ATTR(extra);
ATKBD_DEFINE_ATTR(force_release);
ATKBD_DEFINE_ATTR(scroll);
ATKBD_DEFINE_ATTR(set);
ATKBD_DEFINE_ATTR(softrepeat);
ATKBD_DEFINE_ATTR(softraw);

#define ATKBD_DEFINE_RO_ATTR(_name)                                                \
static ssize_t atkbd_show_##_name(struct atkbd *, char *);                        \
static ssize_t atkbd_do_show_##_name(struct device *d,                                \
                                struct device_attribute *attr, char *b)                \
{                                                                                \
        return atkbd_attr_show_helper(d, b, atkbd_show_##_name);                \
}                                                                                \
static struct device_attribute atkbd_attr_##_name =                                \
        __ATTR(_name, S_IRUGO, atkbd_do_show_##_name, NULL);

ATKBD_DEFINE_RO_ATTR(err_count);
ATKBD_DEFINE_RO_ATTR(function_row_physmap);

static struct attribute *atkbd_attributes[] = {
        &atkbd_attr_extra.attr,
        &atkbd_attr_force_release.attr,
        &atkbd_attr_scroll.attr,
        &atkbd_attr_set.attr,
        &atkbd_attr_softrepeat.attr,
        &atkbd_attr_softraw.attr,
        &atkbd_attr_err_count.attr,
        &atkbd_attr_function_row_physmap.attr,
        NULL
};

static ssize_t atkbd_show_function_row_physmap(struct atkbd *atkbd, char *buf)
{
        return vivaldi_function_row_physmap_show(&atkbd->vdata, buf);
}

static struct atkbd *atkbd_from_serio(struct serio *serio)
{
        struct ps2dev *ps2dev = serio_get_drvdata(serio);

        return container_of(ps2dev, struct atkbd, ps2dev);
}

static umode_t atkbd_attr_is_visible(struct kobject *kobj,
                                struct attribute *attr, int i)
{
        struct device *dev = kobj_to_dev(kobj);
        struct serio *serio = to_serio_port(dev);
        struct atkbd *atkbd = atkbd_from_serio(serio);

        if (attr == &atkbd_attr_function_row_physmap.attr &&
            !atkbd->vdata.num_function_row_keys)
                return 0;

        return attr->mode;
}

static const struct attribute_group atkbd_attribute_group = {
        .attrs        = atkbd_attributes,
        .is_visible = atkbd_attr_is_visible,
};

__ATTRIBUTE_GROUPS(atkbd_attribute);

static const unsigned int xl_table[] = {
        ATKBD_RET_BAT, ATKBD_RET_ERR, ATKBD_RET_ACK,
        ATKBD_RET_NAK, ATKBD_RET_HANJA, ATKBD_RET_HANGEUL,
};

/*
 * Checks if we should mangle the scancode to extract 'release' bit
 * in translated mode.
 */
static bool atkbd_need_xlate(unsigned long xl_bit, unsigned char code)
{
        int i;

        if (code == ATKBD_RET_EMUL0 || code == ATKBD_RET_EMUL1)
                return false;

        for (i = 0; i < ARRAY_SIZE(xl_table); i++)
                if (code == xl_table[i])
                        return test_bit(i, &xl_bit);

        return true;
}

/*
 * Calculates new value of xl_bit so the driver can distinguish
 * between make/break pair of scancodes for select keys and PS/2
 * protocol responses.
 */
static void atkbd_calculate_xl_bit(struct atkbd *atkbd, unsigned char code)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(xl_table); i++) {
                if (!((code ^ xl_table[i]) & 0x7f)) {
                        if (code & 0x80)
                                __clear_bit(i, &atkbd->xl_bit);
                        else
                                __set_bit(i, &atkbd->xl_bit);
                        break;
                }
        }
}

/*
 * Encode the scancode, 0xe0 prefix, and high bit into a single integer,
 * keeping kernel 2.4 compatibility for set 2
 */
static unsigned int atkbd_compat_scancode(struct atkbd *atkbd, unsigned int code)
{
        if (atkbd->set == 3) {
                if (atkbd->emul == 1)
                        code |= 0x100;
        } else {
                code = (code & 0x7f) | ((code & 0x80) << 1);
                if (atkbd->emul == 1)
                        code |= 0x80;
        }

        return code;
}

/*
 * Tries to handle frame or parity error by requesting the keyboard controller
 * to resend the last byte. This historically not done on x86 as controllers
 * there typically do not implement this command.
 */
static bool __maybe_unused atkbd_handle_frame_error(struct ps2dev *ps2dev,
                                                    u8 data, unsigned int flags)
{
        struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev);
        struct serio *serio = ps2dev->serio;

        if ((flags & (SERIO_FRAME | SERIO_PARITY)) &&
            (~flags & SERIO_TIMEOUT) &&
            !atkbd->resend && atkbd->write) {
                dev_warn(&serio->dev, "Frame/parity error: %02x\n", flags);
                serio_write(serio, ATKBD_CMD_RESEND);
                atkbd->resend = true;
                return true;
        }

        if (!flags && data == ATKBD_RET_ACK)
                atkbd->resend = false;

        return false;
}

static enum ps2_disposition atkbd_pre_receive_byte(struct ps2dev *ps2dev,
                                                   u8 data, unsigned int flags)
{
        struct serio *serio = ps2dev->serio;

        dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, flags);

#if !defined(__i386__) && !defined (__x86_64__)
        if (atkbd_handle_frame_error(ps2dev, data, flags))
                return PS2_IGNORE;
#endif

        return PS2_PROCESS;
}

static void atkbd_receive_byte(struct ps2dev *ps2dev, u8 data)
{
        struct serio *serio = ps2dev->serio;
        struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev);
        struct input_dev *dev = atkbd->dev;
        unsigned int code = data;
        int scroll = 0, hscroll = 0, click = -1;
        int value;
        unsigned short keycode;

        pm_wakeup_event(&serio->dev, 0);

        if (!atkbd->enabled)
                return;

        input_event(dev, EV_MSC, MSC_RAW, code);

        if (atkbd_platform_scancode_fixup)
                code = atkbd_platform_scancode_fixup(atkbd, code);

        if (atkbd->translated) {

                if (atkbd->emul || atkbd_need_xlate(atkbd->xl_bit, code)) {
                        atkbd->release = code >> 7;
                        code &= 0x7f;
                }

                if (!atkbd->emul)
                        atkbd_calculate_xl_bit(atkbd, data);
        }

        switch (code) {
        case ATKBD_RET_BAT:
                atkbd->enabled = false;
                serio_reconnect(atkbd->ps2dev.serio);
                return;
        case ATKBD_RET_EMUL0:
                atkbd->emul = 1;
                return;
        case ATKBD_RET_EMUL1:
                atkbd->emul = 2;
                return;
        case ATKBD_RET_RELEASE:
                atkbd->release = true;
                return;
        case ATKBD_RET_ACK:
        case ATKBD_RET_NAK:
                if (printk_ratelimit())
                        dev_warn(&serio->dev,
                                 "Spurious %s on %s. "
                                 "Some program might be trying to access hardware directly.\n",
                                 data == ATKBD_RET_ACK ? "ACK" : "NAK", serio->phys);
                return;
        case ATKBD_RET_ERR:
                atkbd->err_count++;
                dev_dbg(&serio->dev, "Keyboard on %s reports too many keys pressed.\n",
                        serio->phys);
                return;
        }

        code = atkbd_compat_scancode(atkbd, code);

        if (atkbd->emul && --atkbd->emul)
                return;

        keycode = atkbd->keycode[code];

        if (!(atkbd->release && test_bit(code, atkbd->force_release_mask)))
                if (keycode != ATKBD_KEY_NULL)
                        input_event(dev, EV_MSC, MSC_SCAN, code);

        switch (keycode) {
        case ATKBD_KEY_NULL:
                break;
        case ATKBD_KEY_UNKNOWN:
                dev_warn(&serio->dev,
                         "Unknown key %s (%s set %d, code %#x on %s).\n",
                         atkbd->release ? "released" : "pressed",
                         atkbd->translated ? "translated" : "raw",
                         atkbd->set, code, serio->phys);
                dev_warn(&serio->dev,
                         "Use 'setkeycodes %s%02x <keycode>' to make it known.\n",
                         code & 0x80 ? "e0" : "", code & 0x7f);
                input_sync(dev);
                break;
        case ATKBD_SCR_1:
                scroll = 1;
                break;
        case ATKBD_SCR_2:
                scroll = 2;
                break;
        case ATKBD_SCR_4:
                scroll = 4;
                break;
        case ATKBD_SCR_8:
                scroll = 8;
                break;
        case ATKBD_SCR_CLICK:
                click = !atkbd->release;
                break;
        case ATKBD_SCR_LEFT:
                hscroll = -1;
                break;
        case ATKBD_SCR_RIGHT:
                hscroll = 1;
                break;
        default:
                if (atkbd->release) {
                        value = 0;
                        atkbd->last = 0;
                } else if (!atkbd->softrepeat && test_bit(keycode, dev->key)) {
                        /* Workaround Toshiba laptop multiple keypress */
                        value = time_before(jiffies, atkbd->time) && atkbd->last == code ? 1 : 2;
                } else {
                        value = 1;
                        atkbd->last = code;
                        atkbd->time = jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]) / 2;
                }

                input_event(dev, EV_KEY, keycode, value);
                input_sync(dev);

                if (value && test_bit(code, atkbd->force_release_mask)) {
                        input_event(dev, EV_MSC, MSC_SCAN, code);
                        input_report_key(dev, keycode, 0);
                        input_sync(dev);
                }
        }

        if (atkbd->scroll) {
                if (click != -1)
                        input_report_key(dev, BTN_MIDDLE, click);
                input_report_rel(dev, REL_WHEEL,
                                 atkbd->release ? -scroll : scroll);
                input_report_rel(dev, REL_HWHEEL, hscroll);
                input_sync(dev);
        }

        atkbd->release = false;
}

static int atkbd_set_repeat_rate(struct atkbd *atkbd)
{
        const short period[32] =
                { 33,  37,  42,  46,  50,  54,  58,  63,  67,  75,  83,  92, 100, 109, 116, 125,
                 133, 149, 167, 182, 200, 217, 232, 250, 270, 303, 333, 370, 400, 435, 470, 500 };
        const short delay[4] =
                { 250, 500, 750, 1000 };

        struct input_dev *dev = atkbd->dev;
        unsigned char param;
        int i = 0, j = 0;

        while (i < ARRAY_SIZE(period) - 1 && period[i] < dev->rep[REP_PERIOD])
                i++;
        dev->rep[REP_PERIOD] = period[i];

        while (j < ARRAY_SIZE(delay) - 1 && delay[j] < dev->rep[REP_DELAY])
                j++;
        dev->rep[REP_DELAY] = delay[j];

        param = i | (j << 5);
        return ps2_command(&atkbd->ps2dev, &param, ATKBD_CMD_SETREP);
}

static int atkbd_set_leds(struct atkbd *atkbd)
{
        struct input_dev *dev = atkbd->dev;
        unsigned char param[2];

        param[0] = (test_bit(LED_SCROLLL, dev->led) ? 1 : 0)
                 | (test_bit(LED_NUML,    dev->led) ? 2 : 0)
                 | (test_bit(LED_CAPSL,   dev->led) ? 4 : 0);
        if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_SETLEDS))
                return -1;

        if (atkbd->extra) {
                param[0] = 0;
                param[1] = (test_bit(LED_COMPOSE, dev->led) ? 0x01 : 0)
                         | (test_bit(LED_SLEEP,   dev->led) ? 0x02 : 0)
                         | (test_bit(LED_SUSPEND, dev->led) ? 0x04 : 0)
                         | (test_bit(LED_MISC,    dev->led) ? 0x10 : 0)
                         | (test_bit(LED_MUTE,    dev->led) ? 0x20 : 0);
                if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_EX_SETLEDS))
                        return -1;
        }

        return 0;
}

/*
 * atkbd_event_work() is used to complete processing of events that
 * can not be processed by input_event() which is often called from
 * interrupt context.
 */

static void atkbd_event_work(struct work_struct *work)
{
        struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);

        mutex_lock(&atkbd->mutex);

        if (!atkbd->enabled) {
                /*
                 * Serio ports are resumed asynchronously so while driver core
                 * thinks that device is already fully operational in reality
                 * it may not be ready yet. In this case we need to keep
                 * rescheduling till reconnect completes.
                 */
                schedule_delayed_work(&atkbd->event_work,
                                        msecs_to_jiffies(100));
        } else {
                if (test_and_clear_bit(ATKBD_LED_EVENT_BIT, &atkbd->event_mask))
                        atkbd_set_leds(atkbd);

                if (test_and_clear_bit(ATKBD_REP_EVENT_BIT, &atkbd->event_mask))
                        atkbd_set_repeat_rate(atkbd);
        }

        mutex_unlock(&atkbd->mutex);
}

/*
 * Schedule switch for execution. We need to throttle requests,
 * otherwise keyboard may become unresponsive.
 */
static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
{
        unsigned long delay = msecs_to_jiffies(50);

        if (time_after(jiffies, atkbd->event_jiffies + delay))
                delay = 0;

        atkbd->event_jiffies = jiffies;
        set_bit(event_bit, &atkbd->event_mask);
        mb();
        schedule_delayed_work(&atkbd->event_work, delay);
}

/*
 * Event callback from the input module. Events that change the state of
 * the hardware are processed here. If action can not be performed in
 * interrupt context it is offloaded to atkbd_event_work.
 */

static int atkbd_event(struct input_dev *dev,
                        unsigned int type, unsigned int code, int value)
{
        struct atkbd *atkbd = input_get_drvdata(dev);

        if (!atkbd->write)
                return -1;

        switch (type) {

        case EV_LED:
                atkbd_schedule_event_work(atkbd, ATKBD_LED_EVENT_BIT);
                return 0;

        case EV_REP:
                if (!atkbd->softrepeat)
                        atkbd_schedule_event_work(atkbd, ATKBD_REP_EVENT_BIT);
                return 0;

        default:
                return -1;
        }
}

/*
 * atkbd_enable() signals that interrupt handler is allowed to
 * generate input events.
 */

static inline void atkbd_enable(struct atkbd *atkbd)
{
        serio_pause_rx(atkbd->ps2dev.serio);
        atkbd->enabled = true;
        serio_continue_rx(atkbd->ps2dev.serio);
}

/*
 * atkbd_disable() tells input handler that all incoming data except
 * for ACKs and command response should be dropped.
 */

static inline void atkbd_disable(struct atkbd *atkbd)
{
        serio_pause_rx(atkbd->ps2dev.serio);
        atkbd->enabled = false;
        serio_continue_rx(atkbd->ps2dev.serio);
}

static int atkbd_activate(struct atkbd *atkbd)
{
        struct ps2dev *ps2dev = &atkbd->ps2dev;

/*
 * Enable the keyboard to receive keystrokes.
 */

        if (ps2_command(ps2dev, NULL, ATKBD_CMD_ENABLE)) {
                dev_err(&ps2dev->serio->dev,
                        "Failed to enable keyboard on %s\n",
                        ps2dev->serio->phys);
                return -1;
        }

        return 0;
}

/*
 * atkbd_deactivate() resets and disables the keyboard from sending
 * keystrokes.
 */

static void atkbd_deactivate(struct atkbd *atkbd)
{
        struct ps2dev *ps2dev = &atkbd->ps2dev;

        if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_DIS))
                dev_err(&ps2dev->serio->dev,
                        "Failed to deactivate keyboard on %s\n",
                        ps2dev->serio->phys);
}

#ifdef CONFIG_X86
static bool atkbd_is_portable_device(void)
{
        static const char * const chassis_types[] = {
                "8",        /* Portable */
                "9",        /* Laptop */
                "10",        /* Notebook */
                "14",        /* Sub-Notebook */
                "31",        /* Convertible */
                "32",        /* Detachable */
        };
        int i;

        for (i = 0; i < ARRAY_SIZE(chassis_types); i++)
                if (dmi_match(DMI_CHASSIS_TYPE, chassis_types[i]))
                        return true;

        return false;
}

/*
 * On many modern laptops ATKBD_CMD_GETID may cause problems, on these laptops
 * the controller is always in translated mode. In this mode mice/touchpads will
 * not work. So in this case simply assume a keyboard is connected to avoid
 * confusing some laptop keyboards.
 *
 * Skipping ATKBD_CMD_GETID ends up using a fake keyboard id. Using the standard
 * 0xab83 id is ok in translated mode, only atkbd_select_set() checks atkbd->id
 * and in translated mode that is a no-op.
 */
static bool atkbd_skip_getid(struct atkbd *atkbd)
{
        return atkbd->translated && atkbd_is_portable_device();
}
#else
static inline bool atkbd_skip_getid(struct atkbd *atkbd) { return false; }
#endif

/*
 * atkbd_probe() probes for an AT keyboard on a serio port.
 */

static int atkbd_probe(struct atkbd *atkbd)
{
        struct ps2dev *ps2dev = &atkbd->ps2dev;
        unsigned char param[2];

/*
 * Some systems, where the bit-twiddling when testing the io-lines of the
 * controller may confuse the keyboard need a full reset of the keyboard. On
 * these systems the BIOS also usually doesn't do it for us.
 */

        if (atkbd_reset)
                if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_BAT))
                        dev_warn(&ps2dev->serio->dev,
                                 "keyboard reset failed on %s\n",
                                 ps2dev->serio->phys);

        if (atkbd_skip_getid(atkbd)) {
                atkbd->id = 0xab83;
                goto deactivate_kbd;
        }

/*
 * Then we check the keyboard ID. We should get 0xab83 under normal conditions.
 * Some keyboards report different values, but the first byte is always 0xab or
 * 0xac. Some old AT keyboards don't report anything. If a mouse is connected, this
 * should make sure we don't try to set the LEDs on it.
 */

        param[0] = param[1] = 0xa5;        /* initialize with invalid values */
        if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) {

/*
 * If the get ID command failed, we check if we can at least set
 * the LEDs on the keyboard. This should work on every keyboard out there.
 * It also turns the LEDs off, which we want anyway.
 */
                param[0] = 0;
                if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
                        return -1;
                atkbd->id = 0xabba;
                return 0;
        }

        if (!ps2_is_keyboard_id(param[0]))
                return -1;

        atkbd->id = (param[0] << 8) | param[1];

        if (atkbd->id == 0xaca1 && atkbd->translated) {
                dev_err(&ps2dev->serio->dev,
                        "NCD terminal keyboards are only supported on non-translating controllers. "
                        "Use i8042.direct=1 to disable translation.\n");
                return -1;
        }

deactivate_kbd:
/*
 * Make sure nothing is coming from the keyboard and disturbs our
 * internal state.
 */
        if (!atkbd_skip_deactivate)
                atkbd_deactivate(atkbd);

        return 0;
}

/*
 * atkbd_select_set checks if a keyboard has a working Set 3 support, and
 * sets it into that. Unfortunately there are keyboards that can be switched
 * to Set 3, but don't work well in that (BTC Multimedia ...)
 */

static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra)
{
        struct ps2dev *ps2dev = &atkbd->ps2dev;
        unsigned char param[2];

        atkbd->extra = false;
/*
 * For known special keyboards we can go ahead and set the correct set.
 * We check for NCD PS/2 Sun, NorthGate OmniKey 101 and
 * IBM RapidAccess / IBM EzButton / Chicony KBP-8993 keyboards.
 */

        if (atkbd->translated)
                return 2;

        if (atkbd->id == 0xaca1) {
                param[0] = 3;
                ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET);
                return 3;
        }

        if (allow_extra) {
                param[0] = 0x71;
                if (!ps2_command(ps2dev, param, ATKBD_CMD_EX_ENABLE)) {
                        atkbd->extra = true;
                        return 2;
                }
        }

        if (atkbd_terminal) {
                ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MB);
                return 3;
        }

        if (target_set != 3)
                return 2;

        if (!ps2_command(ps2dev, param, ATKBD_CMD_OK_GETID)) {
                atkbd->id = param[0] << 8 | param[1];
                return 2;
        }

        param[0] = 3;
        if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET))
                return 2;

        param[0] = 0;
        if (ps2_command(ps2dev, param, ATKBD_CMD_GSCANSET))
                return 2;

        if (param[0] != 3) {
                param[0] = 2;
                if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET))
                        return 2;
        }

        ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MBR);

        return 3;
}

static int atkbd_reset_state(struct atkbd *atkbd)
{
        struct ps2dev *ps2dev = &atkbd->ps2dev;
        unsigned char param[1];

/*
 * Set the LEDs to a predefined state (all off).
 */

        param[0] = 0;
        if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS))
                return -1;

/*
 * Set autorepeat to fastest possible.
 */

        param[0] = 0;
        if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP))
                return -1;

        return 0;
}

/*
 * atkbd_cleanup() restores the keyboard state so that BIOS is happy after a
 * reboot.
 */

static void atkbd_cleanup(struct serio *serio)
{
        struct atkbd *atkbd = atkbd_from_serio(serio);

        atkbd_disable(atkbd);
        ps2_command(&atkbd->ps2dev, NULL, ATKBD_CMD_RESET_DEF);
}


/*
 * atkbd_disconnect() closes and frees.
 */

static void atkbd_disconnect(struct serio *serio)
{
        struct atkbd *atkbd = atkbd_from_serio(serio);

        atkbd_disable(atkbd);

        input_unregister_device(atkbd->dev);

        /*
         * Make sure we don't have a command in flight.
         * Note that since atkbd->enabled is false event work will keep
         * rescheduling itself until it gets canceled and will not try
         * accessing freed input device or serio port.
         */
        cancel_delayed_work_sync(&atkbd->event_work);

        serio_close(serio);
        serio_set_drvdata(serio, NULL);
        kfree(atkbd);
}

/*
 * generate release events for the keycodes given in data
 */
static void atkbd_apply_forced_release_keylist(struct atkbd* atkbd,
                                                const void *data)
{
        const unsigned int *keys = data;
        unsigned int i;

        if (atkbd->set == 2)
                for (i = 0; keys[i] != -1U; i++)
                        __set_bit(keys[i], atkbd->force_release_mask);
}

/*
 * Most special keys (Fn+F?) on Dell laptops do not generate release
 * events so we have to do it ourselves.
 */
static unsigned int atkbd_dell_laptop_forced_release_keys[] = {
        0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8f, 0x93, -1U
};

/*
 * Perform fixup for HP system that doesn't generate release
 * for its video switch
 */
static unsigned int atkbd_hp_forced_release_keys[] = {
        0x94, -1U
};

/*
 * Samsung NC10,NC20 with Fn+F? key release not working
 */
static unsigned int atkbd_samsung_forced_release_keys[] = {
        0x82, 0x83, 0x84, 0x86, 0x88, 0x89, 0xb3, 0xf7, 0xf9, -1U
};

/*
 * Amilo Pi 3525 key release for Fn+Volume keys not working
 */
static unsigned int atkbd_amilo_pi3525_forced_release_keys[] = {
        0x20, 0xa0, 0x2e, 0xae, 0x30, 0xb0, -1U
};

/*
 * Amilo Xi 3650 key release for light touch bar not working
 */
static unsigned int atkbd_amilo_xi3650_forced_release_keys[] = {
        0x67, 0xed, 0x90, 0xa2, 0x99, 0xa4, 0xae, 0xb0, -1U
};

/*
 * Soltech TA12 system with broken key release on volume keys and mute key
 */
static unsigned int atkdb_soltech_ta12_forced_release_keys[] = {
        0xa0, 0xae, 0xb0, -1U
};

/*
 * Many notebooks don't send key release event for volume up/down
 * keys, with key list below common among them
 */
static unsigned int atkbd_volume_forced_release_keys[] = {
        0xae, 0xb0, -1U
};

/*
 * OQO 01+ multimedia keys (64--66) generate e0 6x upon release whereas
 * they should be generating e4-e6 (0x80 | code).
 */
static unsigned int atkbd_oqo_01plus_scancode_fixup(struct atkbd *atkbd,
                                                    unsigned int code)
{
        if (atkbd->translated && atkbd->emul == 1 &&
            (code == 0x64 || code == 0x65 || code == 0x66)) {
                atkbd->emul = 0;
                code |= 0x80;
        }

        return code;
}

static int atkbd_get_keymap_from_fwnode(struct atkbd *atkbd)
{
        struct device *dev = &atkbd->ps2dev.serio->dev;
        int i, n;
        u32 *ptr;
        u16 scancode, keycode;

        /* Parse "linux,keymap" property */
        n = device_property_count_u32(dev, "linux,keymap");
        if (n <= 0 || n > ATKBD_KEYMAP_SIZE)
                return -ENXIO;

        ptr = kcalloc(n, sizeof(u32), GFP_KERNEL);
        if (!ptr)
                return -ENOMEM;

        if (device_property_read_u32_array(dev, "linux,keymap", ptr, n)) {
                dev_err(dev, "problem parsing FW keymap property\n");
                kfree(ptr);
                return -EINVAL;
        }

        memset(atkbd->keycode, 0, sizeof(atkbd->keycode));
        for (i = 0; i < n; i++) {
                scancode = SCANCODE(ptr[i]);
                keycode = KEYCODE(ptr[i]);
                atkbd->keycode[scancode] = keycode;
        }

        kfree(ptr);
        return 0;
}

/*
 * atkbd_set_keycode_table() initializes keyboard's keycode table
 * according to the selected scancode set
 */

static void atkbd_set_keycode_table(struct atkbd *atkbd)
{
        struct device *dev = &atkbd->ps2dev.serio->dev;
        unsigned int scancode;
        int i, j;

        memset(atkbd->keycode, 0, sizeof(atkbd->keycode));
        bitmap_zero(atkbd->force_release_mask, ATKBD_KEYMAP_SIZE);

        if (!atkbd_get_keymap_from_fwnode(atkbd)) {
                dev_dbg(dev, "Using FW keymap\n");
        } else if (atkbd->translated) {
                for (i = 0; i < 128; i++) {
                        scancode = atkbd_unxlate_table[i];
                        atkbd->keycode[i] = atkbd_set2_keycode[scancode];
                        atkbd->keycode[i | 0x80] = atkbd_set2_keycode[scancode | 0x80];
                        if (atkbd->scroll)
                                for (j = 0; j < ARRAY_SIZE(atkbd_scroll_keys); j++)
                                        if ((scancode | 0x80) == atkbd_scroll_keys[j].set2)
                                                atkbd->keycode[i | 0x80] = atkbd_scroll_keys[j].keycode;
                }
        } else if (atkbd->set == 3) {
                memcpy(atkbd->keycode, atkbd_set3_keycode, sizeof(atkbd->keycode));
        } else {
                memcpy(atkbd->keycode, atkbd_set2_keycode, sizeof(atkbd->keycode));

                if (atkbd->scroll)
                        for (i = 0; i < ARRAY_SIZE(atkbd_scroll_keys); i++) {
                                scancode = atkbd_scroll_keys[i].set2;
                                atkbd->keycode[scancode] = atkbd_scroll_keys[i].keycode;
                }
        }

/*
 * HANGEUL and HANJA keys do not send release events so we need to
 * generate such events ourselves
 */
        scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANGEUL);
        atkbd->keycode[scancode] = KEY_HANGEUL;
        __set_bit(scancode, atkbd->force_release_mask);

        scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANJA);
        atkbd->keycode[scancode] = KEY_HANJA;
        __set_bit(scancode, atkbd->force_release_mask);

/*
 * Perform additional fixups
 */
        if (atkbd_platform_fixup)
                atkbd_platform_fixup(atkbd, atkbd_platform_fixup_data);
}

/*
 * atkbd_set_device_attrs() sets up keyboard's input device structure
 */

static void atkbd_set_device_attrs(struct atkbd *atkbd)
{
        struct input_dev *input_dev = atkbd->dev;
        int i;

        if (atkbd->extra)
                snprintf(atkbd->name, sizeof(atkbd->name),
                         "AT Set 2 Extra keyboard");
        else
                snprintf(atkbd->name, sizeof(atkbd->name),
                         "AT %s Set %d keyboard",
                         atkbd->translated ? "Translated" : "Raw", atkbd->set);

        snprintf(atkbd->phys, sizeof(atkbd->phys),
                 "%s/input0", atkbd->ps2dev.serio->phys);

        input_dev->name = atkbd->name;
        input_dev->phys = atkbd->phys;
        input_dev->id.bustype = BUS_I8042;
        input_dev->id.vendor = 0x0001;
        input_dev->id.product = atkbd->translated ? 1 : atkbd->set;
        input_dev->id.version = atkbd->id;
        input_dev->event = atkbd_event;
        input_dev->dev.parent = &atkbd->ps2dev.serio->dev;

        input_set_drvdata(input_dev, atkbd);

        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
                BIT_MASK(EV_MSC);

        if (atkbd->write) {
                input_dev->evbit[0] |= BIT_MASK(EV_LED);
                input_dev->ledbit[0] = BIT_MASK(LED_NUML) |
                        BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL);
        }

        if (atkbd->extra)
                input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) |
                        BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) |
                        BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC);

        if (!atkbd->softrepeat) {
                input_dev->rep[REP_DELAY] = 250;
                input_dev->rep[REP_PERIOD] = 33;
        }

        input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) :
                BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN);

        if (atkbd->scroll) {
                input_dev->evbit[0] |= BIT_MASK(EV_REL);
                input_dev->relbit[0] = BIT_MASK(REL_WHEEL) |
                        BIT_MASK(REL_HWHEEL);
                __set_bit(BTN_MIDDLE, input_dev->keybit);
        }

        input_dev->keycode = atkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned short);
        input_dev->keycodemax = ARRAY_SIZE(atkbd_set2_keycode);

        for (i = 0; i < ATKBD_KEYMAP_SIZE; i++) {
                if (atkbd->keycode[i] != KEY_RESERVED &&
                    atkbd->keycode[i] != ATKBD_KEY_NULL &&
                    atkbd->keycode[i] < ATKBD_SPECIAL) {
                        __set_bit(atkbd->keycode[i], input_dev->keybit);
                }
        }
}

static void atkbd_parse_fwnode_data(struct serio *serio)
{
        struct atkbd *atkbd = atkbd_from_serio(serio);
        struct device *dev = &serio->dev;
        int n;

        /* Parse "function-row-physmap" property */
        n = device_property_count_u32(dev, "function-row-physmap");
        if (n > 0 && n <= VIVALDI_MAX_FUNCTION_ROW_KEYS &&
            !device_property_read_u32_array(dev, "function-row-physmap",
                                            atkbd->vdata.function_row_physmap,
                                            n)) {
                atkbd->vdata.num_function_row_keys = n;
                dev_dbg(dev, "FW reported %d function-row key locations\n", n);
        }
}

/*
 * atkbd_connect() is called when the serio module finds an interface
 * that isn't handled yet by an appropriate device driver. We check if
 * there is an AT keyboard out there and if yes, we register ourselves
 * to the input module.
 */

static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
{
        struct atkbd *atkbd;
        struct input_dev *dev;
        int err = -ENOMEM;

        atkbd = kzalloc(sizeof(struct atkbd), GFP_KERNEL);
        dev = input_allocate_device();
        if (!atkbd || !dev)
                goto fail1;

        atkbd->dev = dev;
        ps2_init(&atkbd->ps2dev, serio,
                 atkbd_pre_receive_byte, atkbd_receive_byte);
        INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
        mutex_init(&atkbd->mutex);

        switch (serio->id.type) {

        case SERIO_8042_XL:
                atkbd->translated = true;
                fallthrough;

        case SERIO_8042:
                if (serio->write)
                        atkbd->write = true;
                break;
        }

        atkbd->softraw = atkbd_softraw;
        atkbd->softrepeat = atkbd_softrepeat;
        atkbd->scroll = atkbd_scroll;

        if (atkbd->softrepeat)
                atkbd->softraw = true;

        serio_set_drvdata(serio, atkbd);

        err = serio_open(serio, drv);
        if (err)
                goto fail2;

        if (atkbd->write) {

                if (atkbd_probe(atkbd)) {
                        err = -ENODEV;
                        goto fail3;
                }

                atkbd->set = atkbd_select_set(atkbd, atkbd_set, atkbd_extra);
                atkbd_reset_state(atkbd);

        } else {
                atkbd->set = 2;
                atkbd->id = 0xab00;
        }

        atkbd_parse_fwnode_data(serio);

        atkbd_set_keycode_table(atkbd);
        atkbd_set_device_attrs(atkbd);

        atkbd_enable(atkbd);
        if (serio->write)
                atkbd_activate(atkbd);

        err = input_register_device(atkbd->dev);
        if (err)
                goto fail3;

        return 0;

 fail3:        serio_close(serio);
 fail2:        serio_set_drvdata(serio, NULL);
 fail1:        input_free_device(dev);
        kfree(atkbd);
        return err;
}

/*
 * atkbd_reconnect() tries to restore keyboard into a sane state and is
 * most likely called on resume.
 */

static int atkbd_reconnect(struct serio *serio)
{
        struct atkbd *atkbd = atkbd_from_serio(serio);
        struct serio_driver *drv = serio->drv;
        int retval = -1;

        if (!atkbd || !drv) {
                dev_dbg(&serio->dev,
                        "reconnect request, but serio is disconnected, ignoring...\n");
                return -1;
        }

        mutex_lock(&atkbd->mutex);

        atkbd_disable(atkbd);

        if (atkbd->write) {
                if (atkbd_probe(atkbd))
                        goto out;

                if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
                        goto out;

                /*
                 * Restore LED state and repeat rate. While input core
                 * will do this for us at resume time reconnect may happen
                 * because user requested it via sysfs or simply because
                 * keyboard was unplugged and plugged in again so we need
                 * to do it ourselves here.
                 */
                atkbd_set_leds(atkbd);
                if (!atkbd->softrepeat)
                        atkbd_set_repeat_rate(atkbd);

        }

        /*
         * Reset our state machine in case reconnect happened in the middle
         * of multi-byte scancode.
         */
        atkbd->xl_bit = 0;
        atkbd->emul = 0;

        atkbd_enable(atkbd);
        if (atkbd->write)
                atkbd_activate(atkbd);

        retval = 0;

 out:
        mutex_unlock(&atkbd->mutex);
        return retval;
}

static const struct serio_device_id atkbd_serio_ids[] = {
        {
                .type        = SERIO_8042,
                .proto        = SERIO_ANY,
                .id        = SERIO_ANY,
                .extra        = SERIO_ANY,
        },
        {
                .type        = SERIO_8042_XL,
                .proto        = SERIO_ANY,
                .id        = SERIO_ANY,
                .extra        = SERIO_ANY,
        },
        {
                .type        = SERIO_RS232,
                .proto        = SERIO_PS2SER,
                .id        = SERIO_ANY,
                .extra        = SERIO_ANY,
        },
        { 0 }
};

MODULE_DEVICE_TABLE(serio, atkbd_serio_ids);

static struct serio_driver atkbd_drv = {
        .driver                = {
                .name                = "atkbd",
                .dev_groups        = atkbd_attribute_groups,
        },
        .description        = DRIVER_DESC,
        .id_table        = atkbd_serio_ids,
        .interrupt        = ps2_interrupt,
        .connect        = atkbd_connect,
        .reconnect        = atkbd_reconnect,
        .disconnect        = atkbd_disconnect,
        .cleanup        = atkbd_cleanup,
};

static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
                                ssize_t (*handler)(struct atkbd *, char *))
{
        struct serio *serio = to_serio_port(dev);
        struct atkbd *atkbd = atkbd_from_serio(serio);

        return handler(atkbd, buf);
}

static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
                                ssize_t (*handler)(struct atkbd *, const char *, size_t))
{
        struct serio *serio = to_serio_port(dev);
        struct atkbd *atkbd = atkbd_from_serio(serio);
        int retval;

        retval = mutex_lock_interruptible(&atkbd->mutex);
        if (retval)
                return retval;

        atkbd_disable(atkbd);
        retval = handler(atkbd, buf, count);
        atkbd_enable(atkbd);

        mutex_unlock(&atkbd->mutex);

        return retval;
}

static ssize_t atkbd_show_extra(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%d\n", atkbd->extra ? 1 : 0);
}

static ssize_t atkbd_set_extra(struct atkbd *atkbd, const char *buf, size_t count)
{
        struct input_dev *old_dev, *new_dev;
        unsigned int value;
        int err;
        bool old_extra;
        unsigned char old_set;

        if (!atkbd->write)
                return -EIO;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        if (value > 1)
                return -EINVAL;

        if (atkbd->extra != value) {
                /*
                 * Since device's properties will change we need to
                 * unregister old device. But allocate and register
                 * new one first to make sure we have it.
                 */
                old_dev = atkbd->dev;
                old_extra = atkbd->extra;
                old_set = atkbd->set;

                new_dev = input_allocate_device();
                if (!new_dev)
                        return -ENOMEM;

                atkbd->dev = new_dev;
                atkbd->set = atkbd_select_set(atkbd, atkbd->set, value);
                atkbd_reset_state(atkbd);
                atkbd_activate(atkbd);
                atkbd_set_keycode_table(atkbd);
                atkbd_set_device_attrs(atkbd);

                err = input_register_device(atkbd->dev);
                if (err) {
                        input_free_device(new_dev);

                        atkbd->dev = old_dev;
                        atkbd->set = atkbd_select_set(atkbd, old_set, old_extra);
                        atkbd_set_keycode_table(atkbd);
                        atkbd_set_device_attrs(atkbd);

                        return err;
                }
                input_unregister_device(old_dev);

        }
        return count;
}

static ssize_t atkbd_show_force_release(struct atkbd *atkbd, char *buf)
{
        size_t len = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
                               ATKBD_KEYMAP_SIZE, atkbd->force_release_mask);

        buf[len++] = '\n';
        buf[len] = '\0';

        return len;
}

static ssize_t atkbd_set_force_release(struct atkbd *atkbd,
                                        const char *buf, size_t count)
{
        /* 64 bytes on stack should be acceptable */
        DECLARE_BITMAP(new_mask, ATKBD_KEYMAP_SIZE);
        int err;

        err = bitmap_parselist(buf, new_mask, ATKBD_KEYMAP_SIZE);
        if (err)
                return err;

        memcpy(atkbd->force_release_mask, new_mask, sizeof(atkbd->force_release_mask));
        return count;
}


static ssize_t atkbd_show_scroll(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%d\n", atkbd->scroll ? 1 : 0);
}

static ssize_t atkbd_set_scroll(struct atkbd *atkbd, const char *buf, size_t count)
{
        struct input_dev *old_dev, *new_dev;
        unsigned int value;
        int err;
        bool old_scroll;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        if (value > 1)
                return -EINVAL;

        if (atkbd->scroll != value) {
                old_dev = atkbd->dev;
                old_scroll = atkbd->scroll;

                new_dev = input_allocate_device();
                if (!new_dev)
                        return -ENOMEM;

                atkbd->dev = new_dev;
                atkbd->scroll = value;
                atkbd_set_keycode_table(atkbd);
                atkbd_set_device_attrs(atkbd);

                err = input_register_device(atkbd->dev);
                if (err) {
                        input_free_device(new_dev);

                        atkbd->scroll = old_scroll;
                        atkbd->dev = old_dev;
                        atkbd_set_keycode_table(atkbd);
                        atkbd_set_device_attrs(atkbd);

                        return err;
                }
                input_unregister_device(old_dev);
        }
        return count;
}

static ssize_t atkbd_show_set(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%d\n", atkbd->set);
}

static ssize_t atkbd_set_set(struct atkbd *atkbd, const char *buf, size_t count)
{
        struct input_dev *old_dev, *new_dev;
        unsigned int value;
        int err;
        unsigned char old_set;
        bool old_extra;

        if (!atkbd->write)
                return -EIO;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        if (value != 2 && value != 3)
                return -EINVAL;

        if (atkbd->set != value) {
                old_dev = atkbd->dev;
                old_extra = atkbd->extra;
                old_set = atkbd->set;

                new_dev = input_allocate_device();
                if (!new_dev)
                        return -ENOMEM;

                atkbd->dev = new_dev;
                atkbd->set = atkbd_select_set(atkbd, value, atkbd->extra);
                atkbd_reset_state(atkbd);
                atkbd_activate(atkbd);
                atkbd_set_keycode_table(atkbd);
                atkbd_set_device_attrs(atkbd);

                err = input_register_device(atkbd->dev);
                if (err) {
                        input_free_device(new_dev);

                        atkbd->dev = old_dev;
                        atkbd->set = atkbd_select_set(atkbd, old_set, old_extra);
                        atkbd_set_keycode_table(atkbd);
                        atkbd_set_device_attrs(atkbd);

                        return err;
                }
                input_unregister_device(old_dev);
        }
        return count;
}

static ssize_t atkbd_show_softrepeat(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%d\n", atkbd->softrepeat ? 1 : 0);
}

static ssize_t atkbd_set_softrepeat(struct atkbd *atkbd, const char *buf, size_t count)
{
        struct input_dev *old_dev, *new_dev;
        unsigned int value;
        int err;
        bool old_softrepeat, old_softraw;

        if (!atkbd->write)
                return -EIO;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        if (value > 1)
                return -EINVAL;

        if (atkbd->softrepeat != value) {
                old_dev = atkbd->dev;
                old_softrepeat = atkbd->softrepeat;
                old_softraw = atkbd->softraw;

                new_dev = input_allocate_device();
                if (!new_dev)
                        return -ENOMEM;

                atkbd->dev = new_dev;
                atkbd->softrepeat = value;
                if (atkbd->softrepeat)
                        atkbd->softraw = true;
                atkbd_set_device_attrs(atkbd);

                err = input_register_device(atkbd->dev);
                if (err) {
                        input_free_device(new_dev);

                        atkbd->dev = old_dev;
                        atkbd->softrepeat = old_softrepeat;
                        atkbd->softraw = old_softraw;
                        atkbd_set_device_attrs(atkbd);

                        return err;
                }
                input_unregister_device(old_dev);
        }
        return count;
}


static ssize_t atkbd_show_softraw(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%d\n", atkbd->softraw ? 1 : 0);
}

static ssize_t atkbd_set_softraw(struct atkbd *atkbd, const char *buf, size_t count)
{
        struct input_dev *old_dev, *new_dev;
        unsigned int value;
        int err;
        bool old_softraw;

        err = kstrtouint(buf, 10, &value);
        if (err)
                return err;

        if (value > 1)
                return -EINVAL;

        if (atkbd->softraw != value) {
                old_dev = atkbd->dev;
                old_softraw = atkbd->softraw;

                new_dev = input_allocate_device();
                if (!new_dev)
                        return -ENOMEM;

                atkbd->dev = new_dev;
                atkbd->softraw = value;
                atkbd_set_device_attrs(atkbd);

                err = input_register_device(atkbd->dev);
                if (err) {
                        input_free_device(new_dev);

                        atkbd->dev = old_dev;
                        atkbd->softraw = old_softraw;
                        atkbd_set_device_attrs(atkbd);

                        return err;
                }
                input_unregister_device(old_dev);
        }
        return count;
}

static ssize_t atkbd_show_err_count(struct atkbd *atkbd, char *buf)
{
        return sprintf(buf, "%lu\n", atkbd->err_count);
}

static int __init atkbd_setup_forced_release(const struct dmi_system_id *id)
{
        atkbd_platform_fixup = atkbd_apply_forced_release_keylist;
        atkbd_platform_fixup_data = id->driver_data;

        return 1;
}

static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id)
{
        atkbd_platform_scancode_fixup = id->driver_data;

        return 1;
}

static int __init atkbd_deactivate_fixup(const struct dmi_system_id *id)
{
        atkbd_skip_deactivate = true;
        return 1;
}

/*
 * NOTE: do not add any more "force release" quirks to this table.  The
 * task of adjusting list of keys that should be "released" automatically
 * by the driver is now delegated to userspace tools, such as udev, so
 * submit such quirks there.
 */
static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_dell_laptop_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
                        DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_dell_laptop_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP 2133"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_hp_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion ZV6100"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                /* Inventec Symphony */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "INVENTEC"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "SYMPHONY 6.0/7.0"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                /* Samsung NC10 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "NC10"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_samsung_forced_release_keys,
        },
        {
                /* Samsung NC20 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "NC20"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_samsung_forced_release_keys,
        },
        {
                /* Samsung SQ45S70S */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_samsung_forced_release_keys,
        },
        {
                /* Fujitsu Amilo PA 1510 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pa 1510"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_volume_forced_release_keys,
        },
        {
                /* Fujitsu Amilo Pi 3525 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pi 3525"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_amilo_pi3525_forced_release_keys,
        },
        {
                /* Fujitsu Amilo Xi 3650 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xi 3650"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkbd_amilo_xi3650_forced_release_keys,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Soltech Corporation"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "TA12"),
                },
                .callback = atkbd_setup_forced_release,
                .driver_data = atkdb_soltech_ta12_forced_release_keys,
        },
        {
                /* OQO Model 01+ */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "OQO"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "ZEPTO"),
                },
                .callback = atkbd_setup_scancode_fixup,
                .driver_data = atkbd_oqo_01plus_scancode_fixup,
        },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
                },
                .callback = atkbd_deactivate_fixup,
        },
        { }
};

static int __init atkbd_init(void)
{
        dmi_check_system(atkbd_dmi_quirk_table);

        return serio_register_driver(&atkbd_drv);
}

static void __exit atkbd_exit(void)
{
        serio_unregister_driver(&atkbd_drv);
}

module_init(atkbd_init);
module_exit(atkbd_exit);





















































































































   13 



































   17 























































































































































































































































































































































   19 


   19 
   19 

   19 
























   19 




   18 

   18 




   17 






   17 

































   12 
   12 




   12 



    1 
   12 
   12 














   17 

   16 


   17 




















    1 










    1 
    1 


    1 































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Initialization routines
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/init.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/ctype.h>
#include <linux/pm.h>
#include <linux/debugfs.h>
#include <linux/completion.h>
#include <linux/interrupt.h>

#include <sound/core.h>
#include <sound/control.h>
#include <sound/info.h>

/* monitor files for graceful shutdown (hotplug) */
struct snd_monitor_file {
        struct file *file;
        const struct file_operations *disconnected_f_op;
        struct list_head shutdown_list;        /* still need to shutdown */
        struct list_head list;        /* link of monitor files */
};

static DEFINE_SPINLOCK(shutdown_lock);
static LIST_HEAD(shutdown_files);

static const struct file_operations snd_shutdown_f_ops;

/* locked for registering/using */
static DECLARE_BITMAP(snd_cards_lock, SNDRV_CARDS);
static struct snd_card *snd_cards[SNDRV_CARDS];

static DEFINE_MUTEX(snd_card_mutex);

static char *slots[SNDRV_CARDS];
module_param_array(slots, charp, NULL, 0444);
MODULE_PARM_DESC(slots, "Module names assigned to the slots.");

/* return non-zero if the given index is reserved for the given
 * module via slots option
 */
static int module_slot_match(struct module *module, int idx)
{
        int match = 1;
#ifdef MODULE
        const char *s1, *s2;

        if (!module || !*module->name || !slots[idx])
                return 0;

        s1 = module->name;
        s2 = slots[idx];
        if (*s2 == '!') {
                match = 0; /* negative match */
                s2++;
        }
        /* compare module name strings
         * hyphens are handled as equivalent with underscore
         */
        for (;;) {
                char c1 = *s1++;
                char c2 = *s2++;
                if (c1 == '-')
                        c1 = '_';
                if (c2 == '-')
                        c2 = '_';
                if (c1 != c2)
                        return !match;
                if (!c1)
                        break;
        }
#endif /* MODULE */
        return match;
}

#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
int (*snd_mixer_oss_notify_callback)(struct snd_card *card, int free_flag);
EXPORT_SYMBOL(snd_mixer_oss_notify_callback);
#endif

static int check_empty_slot(struct module *module, int slot)
{
        return !slots[slot] || !*slots[slot];
}

/* return an empty slot number (>= 0) found in the given bitmask @mask.
 * @mask == -1 == 0xffffffff means: take any free slot up to 32
 * when no slot is available, return the original @mask as is.
 */
static int get_slot_from_bitmask(int mask, int (*check)(struct module *, int),
                                 struct module *module)
{
        int slot;

        for (slot = 0; slot < SNDRV_CARDS; slot++) {
                if (slot < 32 && !(mask & (1U << slot)))
                        continue;
                if (!test_bit(slot, snd_cards_lock)) {
                        if (check(module, slot))
                                return slot; /* found */
                }
        }
        return mask; /* unchanged */
}

/* the default release callback set in snd_device_alloc() */
static void default_release_alloc(struct device *dev)
{
        kfree(dev);
}

/**
 * snd_device_alloc - Allocate and initialize struct device for sound devices
 * @dev_p: pointer to store the allocated device
 * @card: card to assign, optional
 *
 * For releasing the allocated device, call put_device().
 */
int snd_device_alloc(struct device **dev_p, struct snd_card *card)
{
        struct device *dev;

        *dev_p = NULL;
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;
        device_initialize(dev);
        if (card)
                dev->parent = &card->card_dev;
        dev->class = &sound_class;
        dev->release = default_release_alloc;
        *dev_p = dev;
        return 0;
}
EXPORT_SYMBOL_GPL(snd_device_alloc);

static int snd_card_init(struct snd_card *card, struct device *parent,
                         int idx, const char *xid, struct module *module,
                         size_t extra_size);
static int snd_card_do_free(struct snd_card *card);
static const struct attribute_group card_dev_attr_group;

static void release_card_device(struct device *dev)
{
        snd_card_do_free(dev_to_snd_card(dev));
}

/**
 *  snd_card_new - create and initialize a soundcard structure
 *  @parent: the parent device object
 *  @idx: card index (address) [0 ... (SNDRV_CARDS-1)]
 *  @xid: card identification (ASCII string)
 *  @module: top level module for locking
 *  @extra_size: allocate this extra size after the main soundcard structure
 *  @card_ret: the pointer to store the created card instance
 *
 *  The function allocates snd_card instance via kzalloc with the given
 *  space for the driver to use freely.  The allocated struct is stored
 *  in the given card_ret pointer.
 *
 *  Return: Zero if successful or a negative error code.
 */
int snd_card_new(struct device *parent, int idx, const char *xid,
                    struct module *module, int extra_size,
                    struct snd_card **card_ret)
{
        struct snd_card *card;
        int err;

        if (snd_BUG_ON(!card_ret))
                return -EINVAL;
        *card_ret = NULL;

        if (extra_size < 0)
                extra_size = 0;
        card = kzalloc(sizeof(*card) + extra_size, GFP_KERNEL);
        if (!card)
                return -ENOMEM;

        err = snd_card_init(card, parent, idx, xid, module, extra_size);
        if (err < 0)
                return err; /* card is freed by error handler */

        *card_ret = card;
        return 0;
}
EXPORT_SYMBOL(snd_card_new);

static void __snd_card_release(struct device *dev, void *data)
{
        snd_card_free(data);
}

/**
 * snd_devm_card_new - managed snd_card object creation
 * @parent: the parent device object
 * @idx: card index (address) [0 ... (SNDRV_CARDS-1)]
 * @xid: card identification (ASCII string)
 * @module: top level module for locking
 * @extra_size: allocate this extra size after the main soundcard structure
 * @card_ret: the pointer to store the created card instance
 *
 * This function works like snd_card_new() but manages the allocated resource
 * via devres, i.e. you don't need to free explicitly.
 *
 * When a snd_card object is created with this function and registered via
 * snd_card_register(), the very first devres action to call snd_card_free()
 * is added automatically.  In that way, the resource disconnection is assured
 * at first, then released in the expected order.
 *
 * If an error happens at the probe before snd_card_register() is called and
 * there have been other devres resources, you'd need to free the card manually
 * via snd_card_free() call in the error; otherwise it may lead to UAF due to
 * devres call orders.  You can use snd_card_free_on_error() helper for
 * handling it more easily.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_devm_card_new(struct device *parent, int idx, const char *xid,
                      struct module *module, size_t extra_size,
                      struct snd_card **card_ret)
{
        struct snd_card *card;
        int err;

        *card_ret = NULL;
        card = devres_alloc(__snd_card_release, sizeof(*card) + extra_size,
                            GFP_KERNEL);
        if (!card)
                return -ENOMEM;
        card->managed = true;
        err = snd_card_init(card, parent, idx, xid, module, extra_size);
        if (err < 0) {
                devres_free(card); /* in managed mode, we need to free manually */
                return err;
        }

        devres_add(parent, card);
        *card_ret = card;
        return 0;
}
EXPORT_SYMBOL_GPL(snd_devm_card_new);

/**
 * snd_card_free_on_error - a small helper for handling devm probe errors
 * @dev: the managed device object
 * @ret: the return code from the probe callback
 *
 * This function handles the explicit snd_card_free() call at the error from
 * the probe callback.  It's just a small helper for simplifying the error
 * handling with the managed devices.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_card_free_on_error(struct device *dev, int ret)
{
        struct snd_card *card;

        if (!ret)
                return 0;
        card = devres_find(dev, __snd_card_release, NULL, NULL);
        if (card)
                snd_card_free(card);
        return ret;
}
EXPORT_SYMBOL_GPL(snd_card_free_on_error);

static int snd_card_init(struct snd_card *card, struct device *parent,
                         int idx, const char *xid, struct module *module,
                         size_t extra_size)
{
        int err;

        if (extra_size > 0)
                card->private_data = (char *)card + sizeof(struct snd_card);
        if (xid)
                strscpy(card->id, xid, sizeof(card->id));
        err = 0;
        scoped_guard(mutex, &snd_card_mutex) {
                if (idx < 0) /* first check the matching module-name slot */
                        idx = get_slot_from_bitmask(idx, module_slot_match, module);
                if (idx < 0) /* if not matched, assign an empty slot */
                        idx = get_slot_from_bitmask(idx, check_empty_slot, module);
                if (idx < 0)
                        err = -ENODEV;
                else if (idx < snd_ecards_limit) {
                        if (test_bit(idx, snd_cards_lock))
                                err = -EBUSY;        /* invalid */
                } else if (idx >= SNDRV_CARDS)
                        err = -ENODEV;
                if (!err) {
                        set_bit(idx, snd_cards_lock);                /* lock it */
                        if (idx >= snd_ecards_limit)
                                snd_ecards_limit = idx + 1; /* increase the limit */
                }
        }
        if (err < 0) {
                dev_err(parent, "cannot find the slot for index %d (range 0-%i), error: %d\n",
                        idx, snd_ecards_limit - 1, err);
                if (!card->managed)
                        kfree(card); /* manually free here, as no destructor called */
                return err;
        }
        card->dev = parent;
        card->number = idx;
#ifdef MODULE
        WARN_ON(!module);
        card->module = module;
#endif
        INIT_LIST_HEAD(&card->devices);
        init_rwsem(&card->controls_rwsem);
        rwlock_init(&card->ctl_files_rwlock);
        INIT_LIST_HEAD(&card->controls);
        INIT_LIST_HEAD(&card->ctl_files);
#ifdef CONFIG_SND_CTL_FAST_LOOKUP
        xa_init(&card->ctl_numids);
        xa_init(&card->ctl_hash);
#endif
        spin_lock_init(&card->files_lock);
        INIT_LIST_HEAD(&card->files_list);
        mutex_init(&card->memory_mutex);
#ifdef CONFIG_PM
        init_waitqueue_head(&card->power_sleep);
        init_waitqueue_head(&card->power_ref_sleep);
        atomic_set(&card->power_ref, 0);
#endif
        init_waitqueue_head(&card->remove_sleep);
        card->sync_irq = -1;

        device_initialize(&card->card_dev);
        card->card_dev.parent = parent;
        card->card_dev.class = &sound_class;
        card->card_dev.release = release_card_device;
        card->card_dev.groups = card->dev_groups;
        card->dev_groups[0] = &card_dev_attr_group;
        err = kobject_set_name(&card->card_dev.kobj, "card%d", idx);
        if (err < 0)
                goto __error;

        snprintf(card->irq_descr, sizeof(card->irq_descr), "%s:%s",
                 dev_driver_string(card->dev), dev_name(&card->card_dev));

        /* the control interface cannot be accessed from the user space until */
        /* snd_cards_bitmask and snd_cards are set with snd_card_register */
        err = snd_ctl_create(card);
        if (err < 0) {
                dev_err(parent, "unable to register control minors\n");
                goto __error;
        }
        err = snd_info_card_create(card);
        if (err < 0) {
                dev_err(parent, "unable to create card info\n");
                goto __error_ctl;
        }

#ifdef CONFIG_SND_DEBUG
        card->debugfs_root = debugfs_create_dir(dev_name(&card->card_dev),
                                                sound_debugfs_root);
#endif
        return 0;

      __error_ctl:
        snd_device_free_all(card);
      __error:
        put_device(&card->card_dev);
          return err;
}

/**
 * snd_card_ref - Get the card object from the index
 * @idx: the card index
 *
 * Returns a card object corresponding to the given index or NULL if not found.
 * Release the object via snd_card_unref().
 *
 * Return: a card object or NULL
 */
struct snd_card *snd_card_ref(int idx)
{
        struct snd_card *card;

        guard(mutex)(&snd_card_mutex);
        card = snd_cards[idx];
        if (card)
                get_device(&card->card_dev);
        return card;
}
EXPORT_SYMBOL_GPL(snd_card_ref);

/* return non-zero if a card is already locked */
int snd_card_locked(int card)
{
        guard(mutex)(&snd_card_mutex);
        return test_bit(card, snd_cards_lock);
}

static loff_t snd_disconnect_llseek(struct file *file, loff_t offset, int orig)
{
        return -ENODEV;
}

static ssize_t snd_disconnect_read(struct file *file, char __user *buf,
                                   size_t count, loff_t *offset)
{
        return -ENODEV;
}

static ssize_t snd_disconnect_write(struct file *file, const char __user *buf,
                                    size_t count, loff_t *offset)
{
        return -ENODEV;
}

static int snd_disconnect_release(struct inode *inode, struct file *file)
{
        struct snd_monitor_file *df = NULL, *_df;

        scoped_guard(spinlock, &shutdown_lock) {
                list_for_each_entry(_df, &shutdown_files, shutdown_list) {
                        if (_df->file == file) {
                                df = _df;
                                list_del_init(&df->shutdown_list);
                                break;
                        }
                }
        }

        if (likely(df)) {
                if ((file->f_flags & FASYNC) && df->disconnected_f_op->fasync)
                        df->disconnected_f_op->fasync(-1, file, 0);
                return df->disconnected_f_op->release(inode, file);
        }

        panic("%s(%p, %p) failed!", __func__, inode, file);
}

static __poll_t snd_disconnect_poll(struct file * file, poll_table * wait)
{
        return EPOLLERR | EPOLLNVAL;
}

static long snd_disconnect_ioctl(struct file *file,
                                 unsigned int cmd, unsigned long arg)
{
        return -ENODEV;
}

static int snd_disconnect_mmap(struct file *file, struct vm_area_struct *vma)
{
        return -ENODEV;
}

static int snd_disconnect_fasync(int fd, struct file *file, int on)
{
        return -ENODEV;
}

static const struct file_operations snd_shutdown_f_ops =
{
        .owner =         THIS_MODULE,
        .llseek =        snd_disconnect_llseek,
        .read =         snd_disconnect_read,
        .write =        snd_disconnect_write,
        .release =        snd_disconnect_release,
        .poll =                snd_disconnect_poll,
        .unlocked_ioctl = snd_disconnect_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl = snd_disconnect_ioctl,
#endif
        .mmap =                snd_disconnect_mmap,
        .fasync =        snd_disconnect_fasync
};

/**
 *  snd_card_disconnect - disconnect all APIs from the file-operations (user space)
 *  @card: soundcard structure
 *
 *  Disconnects all APIs from the file-operations (user space).
 *
 *  Return: Zero, otherwise a negative error code.
 *
 *  Note: The current implementation replaces all active file->f_op with special
 *        dummy file operations (they do nothing except release).
 */
void snd_card_disconnect(struct snd_card *card)
{
        struct snd_monitor_file *mfile;

        if (!card)
                return;

        scoped_guard(spinlock, &card->files_lock) {
                if (card->shutdown)
                        return;
                card->shutdown = 1;

                /* replace file->f_op with special dummy operations */
                list_for_each_entry(mfile, &card->files_list, list) {
                        /* it's critical part, use endless loop */
                        /* we have no room to fail */
                        mfile->disconnected_f_op = mfile->file->f_op;

                        scoped_guard(spinlock, &shutdown_lock)
                                list_add(&mfile->shutdown_list, &shutdown_files);

                        mfile->file->f_op = &snd_shutdown_f_ops;
                        fops_get(mfile->file->f_op);
                }
        }

        /* notify all connected devices about disconnection */
        /* at this point, they cannot respond to any calls except release() */

#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
        if (snd_mixer_oss_notify_callback)
                snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_DISCONNECT);
#endif

        /* notify all devices that we are disconnected */
        snd_device_disconnect_all(card);

        if (card->sync_irq > 0)
                synchronize_irq(card->sync_irq);

        snd_info_card_disconnect(card);
        if (card->registered) {
                device_del(&card->card_dev);
                card->registered = false;
        }

        /* disable fops (user space) operations for ALSA API */
        scoped_guard(mutex, &snd_card_mutex) {
                snd_cards[card->number] = NULL;
                clear_bit(card->number, snd_cards_lock);
        }

#ifdef CONFIG_PM
        wake_up(&card->power_sleep);
        snd_power_sync_ref(card);
#endif
}
EXPORT_SYMBOL(snd_card_disconnect);

/**
 * snd_card_disconnect_sync - disconnect card and wait until files get closed
 * @card: card object to disconnect
 *
 * This calls snd_card_disconnect() for disconnecting all belonging components
 * and waits until all pending files get closed.
 * It assures that all accesses from user-space finished so that the driver
 * can release its resources gracefully.
 */
void snd_card_disconnect_sync(struct snd_card *card)
{
        snd_card_disconnect(card);

        guard(spinlock_irq)(&card->files_lock);
        wait_event_lock_irq(card->remove_sleep,
                            list_empty(&card->files_list),
                            card->files_lock);
}
EXPORT_SYMBOL_GPL(snd_card_disconnect_sync);

static int snd_card_do_free(struct snd_card *card)
{
        card->releasing = true;
#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
        if (snd_mixer_oss_notify_callback)
                snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_FREE);
#endif
        snd_device_free_all(card);
        if (card->private_free)
                card->private_free(card);
        if (snd_info_card_free(card) < 0) {
                dev_warn(card->dev, "unable to free card info\n");
                /* Not fatal error */
        }
#ifdef CONFIG_SND_DEBUG
        debugfs_remove(card->debugfs_root);
        card->debugfs_root = NULL;
#endif
        if (card->release_completion)
                complete(card->release_completion);
        if (!card->managed)
                kfree(card);
        return 0;
}

/**
 * snd_card_free_when_closed - Disconnect the card, free it later eventually
 * @card: soundcard structure
 *
 * Unlike snd_card_free(), this function doesn't try to release the card
 * resource immediately, but tries to disconnect at first.  When the card
 * is still in use, the function returns before freeing the resources.
 * The card resources will be freed when the refcount gets to zero.
 *
 * Return: zero if successful, or a negative error code
 */
void snd_card_free_when_closed(struct snd_card *card)
{
        if (!card)
                return;

        snd_card_disconnect(card);
        put_device(&card->card_dev);
        return;
}
EXPORT_SYMBOL(snd_card_free_when_closed);

/**
 * snd_card_free - frees given soundcard structure
 * @card: soundcard structure
 *
 * This function releases the soundcard structure and the all assigned
 * devices automatically.  That is, you don't have to release the devices
 * by yourself.
 *
 * This function waits until the all resources are properly released.
 *
 * Return: Zero. Frees all associated devices and frees the control
 * interface associated to given soundcard.
 */
void snd_card_free(struct snd_card *card)
{
        DECLARE_COMPLETION_ONSTACK(released);

        /* The call of snd_card_free() is allowed from various code paths;
         * a manual call from the driver and the call via devres_free, and
         * we need to avoid double-free. Moreover, the release via devres
         * may call snd_card_free() twice due to its nature, we need to have
         * the check here at the beginning.
         */
        if (card->releasing)
                return;

        card->release_completion = &released;
        snd_card_free_when_closed(card);

        /* wait, until all devices are ready for the free operation */
        wait_for_completion(&released);
}
EXPORT_SYMBOL(snd_card_free);

/* retrieve the last word of shortname or longname */
static const char *retrieve_id_from_card_name(const char *name)
{
        const char *spos = name;

        while (*name) {
                if (isspace(*name) && isalnum(name[1]))
                        spos = name + 1;
                name++;
        }
        return spos;
}

/* return true if the given id string doesn't conflict any other card ids */
static bool card_id_ok(struct snd_card *card, const char *id)
{
        int i;
        if (!snd_info_check_reserved_words(id))
                return false;
        for (i = 0; i < snd_ecards_limit; i++) {
                if (snd_cards[i] && snd_cards[i] != card &&
                    !strcmp(snd_cards[i]->id, id))
                        return false;
        }
        return true;
}

/* copy to card->id only with valid letters from nid */
static void copy_valid_id_string(struct snd_card *card, const char *src,
                                 const char *nid)
{
        char *id = card->id;

        while (*nid && !isalnum(*nid))
                nid++;
        if (isdigit(*nid))
                *id++ = isalpha(*src) ? *src : 'D';
        while (*nid && (size_t)(id - card->id) < sizeof(card->id) - 1) {
                if (isalnum(*nid))
                        *id++ = *nid;
                nid++;
        }
        *id = 0;
}

/* Set card->id from the given string
 * If the string conflicts with other ids, add a suffix to make it unique.
 */
static void snd_card_set_id_no_lock(struct snd_card *card, const char *src,
                                    const char *nid)
{
        int len, loops;
        bool is_default = false;
        char *id;
        
        copy_valid_id_string(card, src, nid);
        id = card->id;

 again:
        /* use "Default" for obviously invalid strings
         * ("card" conflicts with proc directories)
         */
        if (!*id || !strncmp(id, "card", 4)) {
                strcpy(id, "Default");
                is_default = true;
        }

        len = strlen(id);
        for (loops = 0; loops < SNDRV_CARDS; loops++) {
                char *spos;
                char sfxstr[5]; /* "_012" */
                int sfxlen;

                if (card_id_ok(card, id))
                        return; /* OK */

                /* Add _XYZ suffix */
                sprintf(sfxstr, "_%X", loops + 1);
                sfxlen = strlen(sfxstr);
                if (len + sfxlen >= sizeof(card->id))
                        spos = id + sizeof(card->id) - sfxlen - 1;
                else
                        spos = id + len;
                strcpy(spos, sfxstr);
        }
        /* fallback to the default id */
        if (!is_default) {
                *id = 0;
                goto again;
        }
        /* last resort... */
        dev_err(card->dev, "unable to set card id (%s)\n", id);
        if (card->proc_root->name)
                strscpy(card->id, card->proc_root->name, sizeof(card->id));
}

/**
 *  snd_card_set_id - set card identification name
 *  @card: soundcard structure
 *  @nid: new identification string
 *
 *  This function sets the card identification and checks for name
 *  collisions.
 */
void snd_card_set_id(struct snd_card *card, const char *nid)
{
        /* check if user specified own card->id */
        if (card->id[0] != '\0')
                return;
        guard(mutex)(&snd_card_mutex);
        snd_card_set_id_no_lock(card, nid, nid);
}
EXPORT_SYMBOL(snd_card_set_id);

static ssize_t id_show(struct device *dev,
                       struct device_attribute *attr, char *buf)
{
        struct snd_card *card = container_of(dev, struct snd_card, card_dev);
        return sysfs_emit(buf, "%s\n", card->id);
}

static ssize_t id_store(struct device *dev, struct device_attribute *attr,
                        const char *buf, size_t count)
{
        struct snd_card *card = container_of(dev, struct snd_card, card_dev);
        char buf1[sizeof(card->id)];
        size_t copy = count > sizeof(card->id) - 1 ?
                                        sizeof(card->id) - 1 : count;
        size_t idx;
        int c;

        for (idx = 0; idx < copy; idx++) {
                c = buf[idx];
                if (!isalnum(c) && c != '_' && c != '-')
                        return -EINVAL;
        }
        memcpy(buf1, buf, copy);
        buf1[copy] = '\0';
        guard(mutex)(&snd_card_mutex);
        if (!card_id_ok(NULL, buf1))
                return -EEXIST;
        strcpy(card->id, buf1);
        snd_info_card_id_change(card);

        return count;
}

static DEVICE_ATTR_RW(id);

static ssize_t number_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
{
        struct snd_card *card = container_of(dev, struct snd_card, card_dev);
        return sysfs_emit(buf, "%i\n", card->number);
}

static DEVICE_ATTR_RO(number);

static struct attribute *card_dev_attrs[] = {
        &dev_attr_id.attr,
        &dev_attr_number.attr,
        NULL
};

static const struct attribute_group card_dev_attr_group = {
        .attrs        = card_dev_attrs,
};

/**
 * snd_card_add_dev_attr - Append a new sysfs attribute group to card
 * @card: card instance
 * @group: attribute group to append
 *
 * Return: zero if successful, or a negative error code
 */
int snd_card_add_dev_attr(struct snd_card *card,
                          const struct attribute_group *group)
{
        int i;

        /* loop for (arraysize-1) here to keep NULL at the last entry */
        for (i = 0; i < ARRAY_SIZE(card->dev_groups) - 1; i++) {
                if (!card->dev_groups[i]) {
                        card->dev_groups[i] = group;
                        return 0;
                }
        }

        dev_err(card->dev, "Too many groups assigned\n");
        return -ENOSPC;
}
EXPORT_SYMBOL_GPL(snd_card_add_dev_attr);

static void trigger_card_free(void *data)
{
        snd_card_free(data);
}

/**
 *  snd_card_register - register the soundcard
 *  @card: soundcard structure
 *
 *  This function registers all the devices assigned to the soundcard.
 *  Until calling this, the ALSA control interface is blocked from the
 *  external accesses.  Thus, you should call this function at the end
 *  of the initialization of the card.
 *
 *  Return: Zero otherwise a negative error code if the registration failed.
 */
int snd_card_register(struct snd_card *card)
{
        int err;

        if (snd_BUG_ON(!card))
                return -EINVAL;

        if (!card->registered) {
                err = device_add(&card->card_dev);
                if (err < 0)
                        return err;
                card->registered = true;
        } else {
                if (card->managed)
                        devm_remove_action(card->dev, trigger_card_free, card);
        }

        if (card->managed) {
                err = devm_add_action(card->dev, trigger_card_free, card);
                if (err < 0)
                        return err;
        }

        err = snd_device_register_all(card);
        if (err < 0)
                return err;
        scoped_guard(mutex, &snd_card_mutex) {
                if (snd_cards[card->number]) {
                        /* already registered */
                        return snd_info_card_register(card); /* register pending info */
                }
                if (*card->id) {
                        /* make a unique id name from the given string */
                        char tmpid[sizeof(card->id)];

                        memcpy(tmpid, card->id, sizeof(card->id));
                        snd_card_set_id_no_lock(card, tmpid, tmpid);
                } else {
                        /* create an id from either shortname or longname */
                        const char *src;

                        src = *card->shortname ? card->shortname : card->longname;
                        snd_card_set_id_no_lock(card, src,
                                                retrieve_id_from_card_name(src));
                }
                snd_cards[card->number] = card;
        }
        err = snd_info_card_register(card);
        if (err < 0)
                return err;

#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
        if (snd_mixer_oss_notify_callback)
                snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_REGISTER);
#endif
        return 0;
}
EXPORT_SYMBOL(snd_card_register);

#ifdef CONFIG_SND_PROC_FS
static void snd_card_info_read(struct snd_info_entry *entry,
                               struct snd_info_buffer *buffer)
{
        int idx, count;
        struct snd_card *card;

        for (idx = count = 0; idx < SNDRV_CARDS; idx++) {
                guard(mutex)(&snd_card_mutex);
                card = snd_cards[idx];
                if (card) {
                        count++;
                        snd_iprintf(buffer, "%2i [%-15s]: %s - %s\n",
                                        idx,
                                        card->id,
                                        card->driver,
                                        card->shortname);
                        snd_iprintf(buffer, "                      %s\n",
                                        card->longname);
                }
        }
        if (!count)
                snd_iprintf(buffer, "--- no soundcards ---\n");
}

#ifdef CONFIG_SND_OSSEMUL
void snd_card_info_read_oss(struct snd_info_buffer *buffer)
{
        int idx, count;
        struct snd_card *card;

        for (idx = count = 0; idx < SNDRV_CARDS; idx++) {
                guard(mutex)(&snd_card_mutex);
                card = snd_cards[idx];
                if (card) {
                        count++;
                        snd_iprintf(buffer, "%s\n", card->longname);
                }
        }
        if (!count) {
                snd_iprintf(buffer, "--- no soundcards ---\n");
        }
}

#endif

#ifdef MODULE
static void snd_card_module_info_read(struct snd_info_entry *entry,
                                      struct snd_info_buffer *buffer)
{
        int idx;
        struct snd_card *card;

        for (idx = 0; idx < SNDRV_CARDS; idx++) {
                guard(mutex)(&snd_card_mutex);
                card = snd_cards[idx];
                if (card)
                        snd_iprintf(buffer, "%2i %s\n",
                                    idx, card->module->name);
        }
}
#endif

int __init snd_card_info_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "cards", NULL);
        if (! entry)
                return -ENOMEM;
        entry->c.text.read = snd_card_info_read;
        if (snd_info_register(entry) < 0)
                return -ENOMEM; /* freed in error path */

#ifdef MODULE
        entry = snd_info_create_module_entry(THIS_MODULE, "modules", NULL);
        if (!entry)
                return -ENOMEM;
        entry->c.text.read = snd_card_module_info_read;
        if (snd_info_register(entry) < 0)
                return -ENOMEM; /* freed in error path */
#endif

        return 0;
}
#endif /* CONFIG_SND_PROC_FS */

/**
 *  snd_component_add - add a component string
 *  @card: soundcard structure
 *  @component: the component id string
 *
 *  This function adds the component id string to the supported list.
 *  The component can be referred from the alsa-lib.
 *
 *  Return: Zero otherwise a negative error code.
 */
  
int snd_component_add(struct snd_card *card, const char *component)
{
        char *ptr;
        int len = strlen(component);

        ptr = strstr(card->components, component);
        if (ptr != NULL) {
                if (ptr[len] == '\0' || ptr[len] == ' ')        /* already there */
                        return 1;
        }
        if (strlen(card->components) + 1 + len + 1 > sizeof(card->components)) {
                snd_BUG();
                return -ENOMEM;
        }
        if (card->components[0] != '\0')
                strcat(card->components, " ");
        strcat(card->components, component);
        return 0;
}
EXPORT_SYMBOL(snd_component_add);

/**
 *  snd_card_file_add - add the file to the file list of the card
 *  @card: soundcard structure
 *  @file: file pointer
 *
 *  This function adds the file to the file linked-list of the card.
 *  This linked-list is used to keep tracking the connection state,
 *  and to avoid the release of busy resources by hotplug.
 *
 *  Return: zero or a negative error code.
 */
int snd_card_file_add(struct snd_card *card, struct file *file)
{
        struct snd_monitor_file *mfile;

        mfile = kmalloc(sizeof(*mfile), GFP_KERNEL);
        if (mfile == NULL)
                return -ENOMEM;
        mfile->file = file;
        mfile->disconnected_f_op = NULL;
        INIT_LIST_HEAD(&mfile->shutdown_list);
        guard(spinlock)(&card->files_lock);
        if (card->shutdown) {
                kfree(mfile);
                return -ENODEV;
        }
        list_add(&mfile->list, &card->files_list);
        get_device(&card->card_dev);
        return 0;
}
EXPORT_SYMBOL(snd_card_file_add);

/**
 *  snd_card_file_remove - remove the file from the file list
 *  @card: soundcard structure
 *  @file: file pointer
 *
 *  This function removes the file formerly added to the card via
 *  snd_card_file_add() function.
 *  If all files are removed and snd_card_free_when_closed() was
 *  called beforehand, it processes the pending release of
 *  resources.
 *
 *  Return: Zero or a negative error code.
 */
int snd_card_file_remove(struct snd_card *card, struct file *file)
{
        struct snd_monitor_file *mfile, *found = NULL;

        scoped_guard(spinlock, &card->files_lock) {
                list_for_each_entry(mfile, &card->files_list, list) {
                        if (mfile->file == file) {
                                list_del(&mfile->list);
                                scoped_guard(spinlock, &shutdown_lock)
                                        list_del(&mfile->shutdown_list);
                                if (mfile->disconnected_f_op)
                                        fops_put(mfile->disconnected_f_op);
                                found = mfile;
                                break;
                        }
                }
                if (list_empty(&card->files_list))
                        wake_up_all(&card->remove_sleep);
        }
        if (!found) {
                dev_err(card->dev, "card file remove problem (%p)\n", file);
                return -ENOENT;
        }
        kfree(found);
        put_device(&card->card_dev);
        return 0;
}
EXPORT_SYMBOL(snd_card_file_remove);

#ifdef CONFIG_PM
/**
 * snd_power_ref_and_wait - wait until the card gets powered up
 * @card: soundcard structure
 *
 * Take the power_ref reference count of the given card, and
 * wait until the card gets powered up to SNDRV_CTL_POWER_D0 state.
 * The refcount is down again while sleeping until power-up, hence this
 * function can be used for syncing the floating control ops accesses,
 * typically around calling control ops.
 *
 * The caller needs to pull down the refcount via snd_power_unref() later
 * no matter whether the error is returned from this function or not.
 *
 * Return: Zero if successful, or a negative error code.
 */
int snd_power_ref_and_wait(struct snd_card *card)
{
        snd_power_ref(card);
        if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
                return 0;
        wait_event_cmd(card->power_sleep,
                       card->shutdown ||
                       snd_power_get_state(card) == SNDRV_CTL_POWER_D0,
                       snd_power_unref(card), snd_power_ref(card));
        return card->shutdown ? -ENODEV : 0;
}
EXPORT_SYMBOL_GPL(snd_power_ref_and_wait);

/**
 * snd_power_wait - wait until the card gets powered up (old form)
 * @card: soundcard structure
 *
 * Wait until the card gets powered up to SNDRV_CTL_POWER_D0 state.
 *
 * Return: Zero if successful, or a negative error code.
 */
int snd_power_wait(struct snd_card *card)
{
        int ret;

        ret = snd_power_ref_and_wait(card);
        snd_power_unref(card);
        return ret;
}
EXPORT_SYMBOL(snd_power_wait);
#endif /* CONFIG_PM */





































































































































































































   11 
































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2008 IBM Corporation
 *
 * Author: Mimi Zohar <zohar@us.ibm.com>
 *
 * File: ima_api.c
 *        Implements must_appraise_or_measure, collect_measurement,
 *        appraise_measurement, store_measurement and store_template.
 */
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/xattr.h>
#include <linux/evm.h>
#include <linux/fsverity.h>

#include "ima.h"

/*
 * ima_free_template_entry - free an existing template entry
 */
void ima_free_template_entry(struct ima_template_entry *entry)
{
        int i;

        for (i = 0; i < entry->template_desc->num_fields; i++)
                kfree(entry->template_data[i].data);

        kfree(entry->digests);
        kfree(entry);
}

/*
 * ima_alloc_init_template - create and initialize a new template entry
 */
int ima_alloc_init_template(struct ima_event_data *event_data,
                            struct ima_template_entry **entry,
                            struct ima_template_desc *desc)
{
        struct ima_template_desc *template_desc;
        struct tpm_digest *digests;
        int i, result = 0;

        if (desc)
                template_desc = desc;
        else
                template_desc = ima_template_desc_current();

        *entry = kzalloc(struct_size(*entry, template_data,
                                     template_desc->num_fields), GFP_NOFS);
        if (!*entry)
                return -ENOMEM;

        digests = kcalloc(NR_BANKS(ima_tpm_chip) + ima_extra_slots,
                          sizeof(*digests), GFP_NOFS);
        if (!digests) {
                kfree(*entry);
                *entry = NULL;
                return -ENOMEM;
        }

        (*entry)->digests = digests;
        (*entry)->template_desc = template_desc;
        for (i = 0; i < template_desc->num_fields; i++) {
                const struct ima_template_field *field =
                        template_desc->fields[i];
                u32 len;

                result = field->field_init(event_data,
                                           &((*entry)->template_data[i]));
                if (result != 0)
                        goto out;

                len = (*entry)->template_data[i].len;
                (*entry)->template_data_len += sizeof(len);
                (*entry)->template_data_len += len;
        }
        return 0;
out:
        ima_free_template_entry(*entry);
        *entry = NULL;
        return result;
}

/*
 * ima_store_template - store ima template measurements
 *
 * Calculate the hash of a template entry, add the template entry
 * to an ordered list of measurement entries maintained inside the kernel,
 * and also update the aggregate integrity value (maintained inside the
 * configured TPM PCR) over the hashes of the current list of measurement
 * entries.
 *
 * Applications retrieve the current kernel-held measurement list through
 * the securityfs entries in /sys/kernel/security/ima. The signed aggregate
 * TPM PCR (called quote) can be retrieved using a TPM user space library
 * and is used to validate the measurement list.
 *
 * Returns 0 on success, error code otherwise
 */
int ima_store_template(struct ima_template_entry *entry,
                       int violation, struct inode *inode,
                       const unsigned char *filename, int pcr)
{
        static const char op[] = "add_template_measure";
        static const char audit_cause[] = "hashing_error";
        char *template_name = entry->template_desc->name;
        int result;

        if (!violation) {
                result = ima_calc_field_array_hash(&entry->template_data[0],
                                                   entry);
                if (result < 0) {
                        integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
                                            template_name, op,
                                            audit_cause, result, 0);
                        return result;
                }
        }
        entry->pcr = pcr;
        result = ima_add_template_entry(entry, violation, op, inode, filename);
        return result;
}

/*
 * ima_add_violation - add violation to measurement list.
 *
 * Violations are flagged in the measurement list with zero hash values.
 * By extending the PCR with 0xFF's instead of with zeroes, the PCR
 * value is invalidated.
 */
void ima_add_violation(struct file *file, const unsigned char *filename,
                       struct ima_iint_cache *iint, const char *op,
                       const char *cause)
{
        struct ima_template_entry *entry;
        struct inode *inode = file_inode(file);
        struct ima_event_data event_data = { .iint = iint,
                                             .file = file,
                                             .filename = filename,
                                             .violation = cause };
        int violation = 1;
        int result;

        /* can overflow, only indicator */
        atomic_long_inc(&ima_htable.violations);

        result = ima_alloc_init_template(&event_data, &entry, NULL);
        if (result < 0) {
                result = -ENOMEM;
                goto err_out;
        }
        result = ima_store_template(entry, violation, inode,
                                    filename, CONFIG_IMA_MEASURE_PCR_IDX);
        if (result < 0)
                ima_free_template_entry(entry);
err_out:
        integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                            op, cause, result, 0);
}

/**
 * ima_get_action - appraise & measure decision based on policy.
 * @idmap: idmap of the mount the inode was found from
 * @inode: pointer to the inode associated with the object being validated
 * @cred: pointer to credentials structure to validate
 * @secid: secid of the task being validated
 * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXEC,
 *        MAY_APPEND)
 * @func: caller identifier
 * @pcr: pointer filled in if matched measure policy sets pcr=
 * @template_desc: pointer filled in if matched measure policy sets template=
 * @func_data: func specific data, may be NULL
 * @allowed_algos: allowlist of hash algorithms for the IMA xattr
 *
 * The policy is defined in terms of keypairs:
 *                subj=, obj=, type=, func=, mask=, fsmagic=
 *        subj,obj, and type: are LSM specific.
 *        func: FILE_CHECK | BPRM_CHECK | CREDS_CHECK | MMAP_CHECK | MODULE_CHECK
 *        | KEXEC_CMDLINE | KEY_CHECK | CRITICAL_DATA | SETXATTR_CHECK
 *        | MMAP_CHECK_REQPROT
 *        mask: contains the permission mask
 *        fsmagic: hex value
 *
 * Returns IMA_MEASURE, IMA_APPRAISE mask.
 *
 */
int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
                   const struct cred *cred, u32 secid, int mask,
                   enum ima_hooks func, int *pcr,
                   struct ima_template_desc **template_desc,
                   const char *func_data, unsigned int *allowed_algos)
{
        int flags = IMA_MEASURE | IMA_AUDIT | IMA_APPRAISE | IMA_HASH;

        flags &= ima_policy_flag;

        return ima_match_policy(idmap, inode, cred, secid, func, mask,
                                flags, pcr, template_desc, func_data,
                                allowed_algos);
}

static bool ima_get_verity_digest(struct ima_iint_cache *iint,
                                  struct inode *inode,
                                  struct ima_max_digest_data *hash)
{
        enum hash_algo alg;
        int digest_len;

        /*
         * On failure, 'measure' policy rules will result in a file data
         * hash containing 0's.
         */
        digest_len = fsverity_get_digest(inode, hash->digest, NULL, &alg);
        if (digest_len == 0)
                return false;

        /*
         * Unlike in the case of actually calculating the file hash, in
         * the fsverity case regardless of the hash algorithm, return
         * the verity digest to be included in the measurement list. A
         * mismatch between the verity algorithm and the xattr signature
         * algorithm, if one exists, will be detected later.
         */
        hash->hdr.algo = alg;
        hash->hdr.length = digest_len;
        return true;
}

/*
 * ima_collect_measurement - collect file measurement
 *
 * Calculate the file hash, if it doesn't already exist,
 * storing the measurement and i_version in the iint.
 *
 * Must be called with iint->mutex held.
 *
 * Return 0 on success, error code otherwise
 */
int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file,
                            void *buf, loff_t size, enum hash_algo algo,
                            struct modsig *modsig)
{
        const char *audit_cause = "failed";
        struct inode *inode = file_inode(file);
        struct inode *real_inode = d_real_inode(file_dentry(file));
        const char *filename = file->f_path.dentry->d_name.name;
        struct ima_max_digest_data hash;
        struct kstat stat;
        int result = 0;
        int length;
        void *tmpbuf;
        u64 i_version = 0;

        /*
         * Always collect the modsig, because IMA might have already collected
         * the file digest without collecting the modsig in a previous
         * measurement rule.
         */
        if (modsig)
                ima_collect_modsig(modsig, buf, size);

        if (iint->flags & IMA_COLLECTED)
                goto out;

        /*
         * Detecting file change is based on i_version. On filesystems
         * which do not support i_version, support was originally limited
         * to an initial measurement/appraisal/audit, but was modified to
         * assume the file changed.
         */
        result = vfs_getattr_nosec(&file->f_path, &stat, STATX_CHANGE_COOKIE,
                                   AT_STATX_SYNC_AS_STAT);
        if (!result && (stat.result_mask & STATX_CHANGE_COOKIE))
                i_version = stat.change_cookie;
        hash.hdr.algo = algo;
        hash.hdr.length = hash_digest_size[algo];

        /* Initialize hash digest to 0's in case of failure */
        memset(&hash.digest, 0, sizeof(hash.digest));

        if (iint->flags & IMA_VERITY_REQUIRED) {
                if (!ima_get_verity_digest(iint, inode, &hash)) {
                        audit_cause = "no-verity-digest";
                        result = -ENODATA;
                }
        } else if (buf) {
                result = ima_calc_buffer_hash(buf, size, &hash.hdr);
        } else {
                result = ima_calc_file_hash(file, &hash.hdr);
        }

        if (result && result != -EBADF && result != -EINVAL)
                goto out;

        length = sizeof(hash.hdr) + hash.hdr.length;
        tmpbuf = krealloc(iint->ima_hash, length, GFP_NOFS);
        if (!tmpbuf) {
                result = -ENOMEM;
                goto out;
        }

        iint->ima_hash = tmpbuf;
        memcpy(iint->ima_hash, &hash, length);
        iint->version = i_version;
        if (real_inode != inode) {
                iint->real_ino = real_inode->i_ino;
                iint->real_dev = real_inode->i_sb->s_dev;
        }

        /* Possibly temporary failure due to type of read (eg. O_DIRECT) */
        if (!result)
                iint->flags |= IMA_COLLECTED;
out:
        if (result) {
                if (file->f_flags & O_DIRECT)
                        audit_cause = "failed(directio)";

                integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode,
                                    filename, "collect_data", audit_cause,
                                    result, 0);
        }
        return result;
}

/*
 * ima_store_measurement - store file measurement
 *
 * Create an "ima" template and then store the template by calling
 * ima_store_template.
 *
 * We only get here if the inode has not already been measured,
 * but the measurement could already exist:
 *        - multiple copies of the same file on either the same or
 *          different filesystems.
 *        - the inode was previously flushed as well as the iint info,
 *          containing the hashing info.
 *
 * Must be called with iint->mutex held.
 */
void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
                           const unsigned char *filename,
                           struct evm_ima_xattr_data *xattr_value,
                           int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc)
{
        static const char op[] = "add_template_measure";
        static const char audit_cause[] = "ENOMEM";
        int result = -ENOMEM;
        struct inode *inode = file_inode(file);
        struct ima_template_entry *entry;
        struct ima_event_data event_data = { .iint = iint,
                                             .file = file,
                                             .filename = filename,
                                             .xattr_value = xattr_value,
                                             .xattr_len = xattr_len,
                                             .modsig = modsig };
        int violation = 0;

        /*
         * We still need to store the measurement in the case of MODSIG because
         * we only have its contents to put in the list at the time of
         * appraisal, but a file measurement from earlier might already exist in
         * the measurement list.
         */
        if (iint->measured_pcrs & (0x1 << pcr) && !modsig)
                return;

        result = ima_alloc_init_template(&event_data, &entry, template_desc);
        if (result < 0) {
                integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                                    op, audit_cause, result, 0);
                return;
        }

        result = ima_store_template(entry, violation, inode, filename, pcr);
        if ((!result || result == -EEXIST) && !(file->f_flags & O_DIRECT)) {
                iint->flags |= IMA_MEASURED;
                iint->measured_pcrs |= (0x1 << pcr);
        }
        if (result < 0)
                ima_free_template_entry(entry);
}

void ima_audit_measurement(struct ima_iint_cache *iint,
                           const unsigned char *filename)
{
        struct audit_buffer *ab;
        char *hash;
        const char *algo_name = hash_algo_name[iint->ima_hash->algo];
        int i;

        if (iint->flags & IMA_AUDITED)
                return;

        hash = kzalloc((iint->ima_hash->length * 2) + 1, GFP_KERNEL);
        if (!hash)
                return;

        for (i = 0; i < iint->ima_hash->length; i++)
                hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
        hash[i * 2] = '\0';

        ab = audit_log_start(audit_context(), GFP_KERNEL,
                             AUDIT_INTEGRITY_RULE);
        if (!ab)
                goto out;

        audit_log_format(ab, "file=");
        audit_log_untrustedstring(ab, filename);
        audit_log_format(ab, " hash=\"%s:%s\"", algo_name, hash);

        audit_log_task_info(ab);
        audit_log_end(ab);

        iint->flags |= IMA_AUDITED;
out:
        kfree(hash);
        return;
}

/*
 * ima_d_path - return a pointer to the full pathname
 *
 * Attempt to return a pointer to the full pathname for use in the
 * IMA measurement list, IMA audit records, and auditing logs.
 *
 * On failure, return a pointer to a copy of the filename, not dname.
 * Returning a pointer to dname, could result in using the pointer
 * after the memory has been freed.
 */
const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf)
{
        char *pathname = NULL;

        *pathbuf = __getname();
        if (*pathbuf) {
                pathname = d_absolute_path(path, *pathbuf, PATH_MAX);
                if (IS_ERR(pathname)) {
                        __putname(*pathbuf);
                        *pathbuf = NULL;
                        pathname = NULL;
                }
        }

        if (!pathname) {
                strscpy(namebuf, path->dentry->d_name.name, NAME_MAX);
                pathname = namebuf;
        }

        return pathname;
}

































  242 





  242 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Supervisor Mode Access Prevention support
 *
 * Copyright (C) 2012 Intel Corporation
 * Author: H. Peter Anvin <hpa@linux.intel.com>
 */

#ifndef _ASM_X86_SMAP_H
#define _ASM_X86_SMAP_H

#include <asm/nops.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>

/* "Raw" instruction opcodes */
#define __ASM_CLAC        ".byte 0x0f,0x01,0xca"
#define __ASM_STAC        ".byte 0x0f,0x01,0xcb"

#ifdef __ASSEMBLY__

#define ASM_CLAC \
        ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP

#define ASM_STAC \
        ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP

#else /* __ASSEMBLY__ */

static __always_inline void clac(void)
{
        /* Note: a barrier is implicit in alternative() */
        alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
}

static __always_inline void stac(void)
{
        /* Note: a barrier is implicit in alternative() */
        alternative("", __ASM_STAC, X86_FEATURE_SMAP);
}

static __always_inline unsigned long smap_save(void)
{
        unsigned long flags;

        asm volatile ("# smap_save\n\t"
                      ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
                                  X86_FEATURE_SMAP)
                      : "=rm" (flags) : : "memory", "cc");

        return flags;
}

static __always_inline void smap_restore(unsigned long flags)
{
        asm volatile ("# smap_restore\n\t"
                      ALTERNATIVE("", "push %0; popf\n\t",
                                  X86_FEATURE_SMAP)
                      : : "g" (flags) : "memory", "cc");
}

/* These macros can be used in asm() statements */
#define ASM_CLAC \
        ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
#define ASM_STAC \
        ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)

#endif /* __ASSEMBLY__ */

#endif /* _ASM_X86_SMAP_H */


































  328 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_VSYSCALL_H
#define _ASM_X86_VSYSCALL_H

#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
#include <asm/page_types.h>

#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
extern void set_vsyscall_pgtable_user_bits(pgd_t *root);

/*
 * Called on instruction fetch fault in vsyscall page.
 * Returns true if handled.
 */
extern bool emulate_vsyscall(unsigned long error_code,
                             struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(unsigned long error_code,
                                    struct pt_regs *regs, unsigned long address)
{
        return false;
}
#endif

/*
 * The (legacy) vsyscall page is the long page in the kernel portion
 * of the address space that has user-accessible permissions.
 */
static inline bool is_vsyscall_vaddr(unsigned long vaddr)
{
        return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}

#endif /* _ASM_X86_VSYSCALL_H */



























































  167 





  253 





  252 





  179 






  253 











































  241 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/* SPDX-License-Identifier: GPL-2.0 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM rpm

#if !defined(_TRACE_RUNTIME_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RUNTIME_POWER_H

#include <linux/ktime.h>
#include <linux/tracepoint.h>

struct device;

/*
 * The rpm_internal events are used for tracing some important
 * runtime pm internal functions.
 */
DECLARE_EVENT_CLASS(rpm_internal,

        TP_PROTO(struct device *dev, int flags),

        TP_ARGS(dev, flags),

        TP_STRUCT__entry(
                __string(       name,                dev_name(dev)        )
                __field(        int,            flags           )
                __field(        int ,           usage_count        )
                __field(        int ,           disable_depth   )
                __field(        int ,           runtime_auto        )
                __field(        int ,           request_pending        )
                __field(        int ,           irq_safe        )
                __field(        int ,           child_count         )
        ),

        TP_fast_assign(
                __assign_str(name, dev_name(dev));
                __entry->flags = flags;
                __entry->usage_count = atomic_read(
                        &dev->power.usage_count);
                __entry->disable_depth = dev->power.disable_depth;
                __entry->runtime_auto = dev->power.runtime_auto;
                __entry->request_pending = dev->power.request_pending;
                __entry->irq_safe = dev->power.irq_safe;
                __entry->child_count = atomic_read(
                        &dev->power.child_count);
        ),

        TP_printk("%s flags-%x cnt-%-2d dep-%-2d auto-%-1d p-%-1d"
                        " irq-%-1d child-%d",
                        __get_str(name), __entry->flags,
                        __entry->usage_count,
                        __entry->disable_depth,
                        __entry->runtime_auto,
                        __entry->request_pending,
                        __entry->irq_safe,
                        __entry->child_count
                 )
);
DEFINE_EVENT(rpm_internal, rpm_suspend,

        TP_PROTO(struct device *dev, int flags),

        TP_ARGS(dev, flags)
);
DEFINE_EVENT(rpm_internal, rpm_resume,

        TP_PROTO(struct device *dev, int flags),

        TP_ARGS(dev, flags)
);
DEFINE_EVENT(rpm_internal, rpm_idle,

        TP_PROTO(struct device *dev, int flags),

        TP_ARGS(dev, flags)
);
DEFINE_EVENT(rpm_internal, rpm_usage,

        TP_PROTO(struct device *dev, int flags),

        TP_ARGS(dev, flags)
);

TRACE_EVENT(rpm_return_int,
        TP_PROTO(struct device *dev, unsigned long ip, int ret),
        TP_ARGS(dev, ip, ret),

        TP_STRUCT__entry(
                __string(       name,                dev_name(dev))
                __field(        unsigned long,                ip        )
                __field(        int,                        ret        )
        ),

        TP_fast_assign(
                __assign_str(name, dev_name(dev));
                __entry->ip = ip;
                __entry->ret = ret;
        ),

        TP_printk("%pS:%s ret=%d", (void *)__entry->ip, __get_str(name),
                __entry->ret)
);

#define RPM_STATUS_STRINGS \
        EM(RPM_INVALID, "RPM_INVALID") \
        EM(RPM_ACTIVE, "RPM_ACTIVE") \
        EM(RPM_RESUMING, "RPM_RESUMING") \
        EM(RPM_SUSPENDED, "RPM_SUSPENDED") \
        EMe(RPM_SUSPENDING, "RPM_SUSPENDING")

/* Enums require being exported to userspace, for user tool parsing. */
#undef EM
#undef EMe
#define EM(a, b)        TRACE_DEFINE_ENUM(a);
#define EMe(a, b)        TRACE_DEFINE_ENUM(a);

RPM_STATUS_STRINGS

/*
 * Now redefine the EM() and EMe() macros to map the enums to the strings that
 * will be printed in the output.
 */
#undef EM
#undef EMe
#define EM(a, b)        { a, b },
#define EMe(a, b)        { a, b }

TRACE_EVENT(rpm_status,
        TP_PROTO(struct device *dev, enum rpm_status status),
        TP_ARGS(dev, status),

        TP_STRUCT__entry(
                __string(name,        dev_name(dev))
                __field(int,        status)
        ),

        TP_fast_assign(
                __assign_str(name, dev_name(dev));
                __entry->status = status;
        ),

        TP_printk("%s status=%s", __get_str(name),
                __print_symbolic(__entry->status, RPM_STATUS_STRINGS))
);

#endif /* _TRACE_RUNTIME_POWER_H */

/* This part must be outside protection */
#include <trace/define_trace.h>




























































































































































































    4 




































































   12 




   61 























































































































































































































































































































































































































































































































































    6 

    6 










    9 





























    6 














































































    2 













    2 












































































   11 


































   10 














































































































    6 
    6 









































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PAGEMAP_H
#define _LINUX_PAGEMAP_H

/*
 * Copyright 1995 Linus Torvalds
 */
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/list.h>
#include <linux/highmem.h>
#include <linux/compiler.h>
#include <linux/uaccess.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
#include <linux/hardirq.h> /* for in_interrupt() */
#include <linux/hugetlb_inline.h>

struct folio_batch;

unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                        pgoff_t start, pgoff_t end);

static inline void invalidate_remote_inode(struct inode *inode)
{
        if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
            S_ISLNK(inode->i_mode))
                invalidate_mapping_pages(inode->i_mapping, 0, -1);
}
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
                pgoff_t start, pgoff_t end);
int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);

int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *);
int filemap_fdatawait_keep_errors(struct address_space *mapping);
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
                loff_t start_byte, loff_t end_byte);

static inline int filemap_fdatawait(struct address_space *mapping)
{
        return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
}

bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
int filemap_write_and_wait_range(struct address_space *mapping,
                loff_t lstart, loff_t lend);
int __filemap_fdatawrite_range(struct address_space *mapping,
                loff_t start, loff_t end, int sync_mode);
int filemap_fdatawrite_range(struct address_space *mapping,
                loff_t start, loff_t end);
int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err);
int filemap_fdatawrite_wbc(struct address_space *mapping,
                           struct writeback_control *wbc);
int kiocb_write_and_wait(struct kiocb *iocb, size_t count);

static inline int filemap_write_and_wait(struct address_space *mapping)
{
        return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
}

/**
 * filemap_set_wb_err - set a writeback error on an address_space
 * @mapping: mapping in which to set writeback error
 * @err: error to be set in mapping
 *
 * When writeback fails in some way, we must record that error so that
 * userspace can be informed when fsync and the like are called.  We endeavor
 * to report errors on any file that was open at the time of the error.  Some
 * internal callers also need to know when writeback errors have occurred.
 *
 * When a writeback error occurs, most filesystems will want to call
 * filemap_set_wb_err to record the error in the mapping so that it will be
 * automatically reported whenever fsync is called on the file.
 */
static inline void filemap_set_wb_err(struct address_space *mapping, int err)
{
        /* Fastpath for common case of no error */
        if (unlikely(err))
                __filemap_set_wb_err(mapping, err);
}

/**
 * filemap_check_wb_err - has an error occurred since the mark was sampled?
 * @mapping: mapping to check for writeback errors
 * @since: previously-sampled errseq_t
 *
 * Grab the errseq_t value from the mapping, and see if it has changed "since"
 * the given value was sampled.
 *
 * If it has then report the latest error set, otherwise return 0.
 */
static inline int filemap_check_wb_err(struct address_space *mapping,
                                        errseq_t since)
{
        return errseq_check(&mapping->wb_err, since);
}

/**
 * filemap_sample_wb_err - sample the current errseq_t to test for later errors
 * @mapping: mapping to be sampled
 *
 * Writeback errors are always reported relative to a particular sample point
 * in the past. This function provides those sample points.
 */
static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
{
        return errseq_sample(&mapping->wb_err);
}

/**
 * file_sample_sb_err - sample the current errseq_t to test for later errors
 * @file: file pointer to be sampled
 *
 * Grab the most current superblock-level errseq_t value for the given
 * struct file.
 */
static inline errseq_t file_sample_sb_err(struct file *file)
{
        return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
}

/*
 * Flush file data before changing attributes.  Caller must hold any locks
 * required to prevent further writes to this file until we're done setting
 * flags.
 */
static inline int inode_drain_writes(struct inode *inode)
{
        inode_dio_wait(inode);
        return filemap_write_and_wait(inode->i_mapping);
}

static inline bool mapping_empty(struct address_space *mapping)
{
        return xa_empty(&mapping->i_pages);
}

/*
 * mapping_shrinkable - test if page cache state allows inode reclaim
 * @mapping: the page cache mapping
 *
 * This checks the mapping's cache state for the pupose of inode
 * reclaim and LRU management.
 *
 * The caller is expected to hold the i_lock, but is not required to
 * hold the i_pages lock, which usually protects cache state. That's
 * because the i_lock and the list_lru lock that protect the inode and
 * its LRU state don't nest inside the irq-safe i_pages lock.
 *
 * Cache deletions are performed under the i_lock, which ensures that
 * when an inode goes empty, it will reliably get queued on the LRU.
 *
 * Cache additions do not acquire the i_lock and may race with this
 * check, in which case we'll report the inode as shrinkable when it
 * has cache pages. This is okay: the shrinker also checks the
 * refcount and the referenced bit, which will be elevated or set in
 * the process of adding new cache pages to an inode.
 */
static inline bool mapping_shrinkable(struct address_space *mapping)
{
        void *head;

        /*
         * On highmem systems, there could be lowmem pressure from the
         * inodes before there is highmem pressure from the page
         * cache. Make inodes shrinkable regardless of cache state.
         */
        if (IS_ENABLED(CONFIG_HIGHMEM))
                return true;

        /* Cache completely empty? Shrink away. */
        head = rcu_access_pointer(mapping->i_pages.xa_head);
        if (!head)
                return true;

        /*
         * The xarray stores single offset-0 entries directly in the
         * head pointer, which allows non-resident page cache entries
         * to escape the shadow shrinker's list of xarray nodes. The
         * inode shrinker needs to pick them up under memory pressure.
         */
        if (!xa_is_node(head) && xa_is_value(head))
                return true;

        return false;
}

/*
 * Bits in mapping->flags.
 */
enum mapping_flags {
        AS_EIO                = 0,        /* IO error on async write */
        AS_ENOSPC        = 1,        /* ENOSPC on async write */
        AS_MM_ALL_LOCKS        = 2,        /* under mm_take_all_locks() */
        AS_UNEVICTABLE        = 3,        /* e.g., ramdisk, SHM_LOCK */
        AS_EXITING        = 4,         /* final truncate in progress */
        /* writeback related tags are not used */
        AS_NO_WRITEBACK_TAGS = 5,
        AS_LARGE_FOLIO_SUPPORT = 6,
        AS_RELEASE_ALWAYS,        /* Call ->release_folio(), even if no private data */
        AS_STABLE_WRITES,        /* must wait for writeback before modifying
                                   folio contents */
        AS_UNMOVABLE,                /* The mapping cannot be moved, ever */
};

/**
 * mapping_set_error - record a writeback error in the address_space
 * @mapping: the mapping in which an error should be set
 * @error: the error to set in the mapping
 *
 * When writeback fails in some way, we must record that error so that
 * userspace can be informed when fsync and the like are called.  We endeavor
 * to report errors on any file that was open at the time of the error.  Some
 * internal callers also need to know when writeback errors have occurred.
 *
 * When a writeback error occurs, most filesystems will want to call
 * mapping_set_error to record the error in the mapping so that it can be
 * reported when the application calls fsync(2).
 */
static inline void mapping_set_error(struct address_space *mapping, int error)
{
        if (likely(!error))
                return;

        /* Record in wb_err for checkers using errseq_t based tracking */
        __filemap_set_wb_err(mapping, error);

        /* Record it in superblock */
        if (mapping->host)
                errseq_set(&mapping->host->i_sb->s_wb_err, error);

        /* Record it in flags for now, for legacy callers */
        if (error == -ENOSPC)
                set_bit(AS_ENOSPC, &mapping->flags);
        else
                set_bit(AS_EIO, &mapping->flags);
}

static inline void mapping_set_unevictable(struct address_space *mapping)
{
        set_bit(AS_UNEVICTABLE, &mapping->flags);
}

static inline void mapping_clear_unevictable(struct address_space *mapping)
{
        clear_bit(AS_UNEVICTABLE, &mapping->flags);
}

static inline bool mapping_unevictable(struct address_space *mapping)
{
        return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
}

static inline void mapping_set_exiting(struct address_space *mapping)
{
        set_bit(AS_EXITING, &mapping->flags);
}

static inline int mapping_exiting(struct address_space *mapping)
{
        return test_bit(AS_EXITING, &mapping->flags);
}

static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
{
        set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
}

static inline int mapping_use_writeback_tags(struct address_space *mapping)
{
        return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
}

static inline bool mapping_release_always(const struct address_space *mapping)
{
        return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
}

static inline void mapping_set_release_always(struct address_space *mapping)
{
        set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
}

static inline void mapping_clear_release_always(struct address_space *mapping)
{
        clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
}

static inline bool mapping_stable_writes(const struct address_space *mapping)
{
        return test_bit(AS_STABLE_WRITES, &mapping->flags);
}

static inline void mapping_set_stable_writes(struct address_space *mapping)
{
        set_bit(AS_STABLE_WRITES, &mapping->flags);
}

static inline void mapping_clear_stable_writes(struct address_space *mapping)
{
        clear_bit(AS_STABLE_WRITES, &mapping->flags);
}

static inline void mapping_set_unmovable(struct address_space *mapping)
{
        /*
         * It's expected unmovable mappings are also unevictable. Compaction
         * migrate scanner (isolate_migratepages_block()) relies on this to
         * reduce page locking.
         */
        set_bit(AS_UNEVICTABLE, &mapping->flags);
        set_bit(AS_UNMOVABLE, &mapping->flags);
}

static inline bool mapping_unmovable(struct address_space *mapping)
{
        return test_bit(AS_UNMOVABLE, &mapping->flags);
}

static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
{
        return mapping->gfp_mask;
}

/* Restricts the given gfp_mask to what the mapping allows. */
static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
                gfp_t gfp_mask)
{
        return mapping_gfp_mask(mapping) & gfp_mask;
}

/*
 * This is non-atomic.  Only to be used before the mapping is activated.
 * Probably needs a barrier...
 */
static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
{
        m->gfp_mask = mask;
}

/**
 * mapping_set_large_folios() - Indicate the file supports large folios.
 * @mapping: The file.
 *
 * The filesystem should call this function in its inode constructor to
 * indicate that the VFS can use large folios to cache the contents of
 * the file.
 *
 * Context: This should not be called while the inode is active as it
 * is non-atomic.
 */
static inline void mapping_set_large_folios(struct address_space *mapping)
{
        __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}

/*
 * Large folio support currently depends on THP.  These dependencies are
 * being worked on but are not yet fixed.
 */
static inline bool mapping_large_folio_support(struct address_space *mapping)
{
        return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
                test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}

static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
        return atomic_read(&mapping->nr_thps);
#else
        return 0;
#endif
}

static inline void filemap_nr_thps_inc(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
        if (!mapping_large_folio_support(mapping))
                atomic_inc(&mapping->nr_thps);
#else
        WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
#endif
}

static inline void filemap_nr_thps_dec(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
        if (!mapping_large_folio_support(mapping))
                atomic_dec(&mapping->nr_thps);
#else
        WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
#endif
}

struct address_space *page_mapping(struct page *);
struct address_space *folio_mapping(struct folio *);
struct address_space *swapcache_mapping(struct folio *);

/**
 * folio_file_mapping - Find the mapping this folio belongs to.
 * @folio: The folio.
 *
 * For folios which are in the page cache, return the mapping that this
 * page belongs to.  Folios in the swap cache return the mapping of the
 * swap file or swap device where the data is stored.  This is different
 * from the mapping returned by folio_mapping().  The only reason to
 * use it is if, like NFS, you return 0 from ->activate_swapfile.
 *
 * Do not call this for folios which aren't in the page cache or swap cache.
 */
static inline struct address_space *folio_file_mapping(struct folio *folio)
{
        if (unlikely(folio_test_swapcache(folio)))
                return swapcache_mapping(folio);

        return folio->mapping;
}

/**
 * folio_flush_mapping - Find the file mapping this folio belongs to.
 * @folio: The folio.
 *
 * For folios which are in the page cache, return the mapping that this
 * page belongs to.  Anonymous folios return NULL, even if they're in
 * the swap cache.  Other kinds of folio also return NULL.
 *
 * This is ONLY used by architecture cache flushing code.  If you aren't
 * writing cache flushing code, you want either folio_mapping() or
 * folio_file_mapping().
 */
static inline struct address_space *folio_flush_mapping(struct folio *folio)
{
        if (unlikely(folio_test_swapcache(folio)))
                return NULL;

        return folio_mapping(folio);
}

static inline struct address_space *page_file_mapping(struct page *page)
{
        return folio_file_mapping(page_folio(page));
}

/**
 * folio_inode - Get the host inode for this folio.
 * @folio: The folio.
 *
 * For folios which are in the page cache, return the inode that this folio
 * belongs to.
 *
 * Do not call this for folios which aren't in the page cache.
 */
static inline struct inode *folio_inode(struct folio *folio)
{
        return folio->mapping->host;
}

/**
 * folio_attach_private - Attach private data to a folio.
 * @folio: Folio to attach data to.
 * @data: Data to attach to folio.
 *
 * Attaching private data to a folio increments the page's reference count.
 * The data must be detached before the folio will be freed.
 */
static inline void folio_attach_private(struct folio *folio, void *data)
{
        folio_get(folio);
        folio->private = data;
        folio_set_private(folio);
}

/**
 * folio_change_private - Change private data on a folio.
 * @folio: Folio to change the data on.
 * @data: Data to set on the folio.
 *
 * Change the private data attached to a folio and return the old
 * data.  The page must previously have had data attached and the data
 * must be detached before the folio will be freed.
 *
 * Return: Data that was previously attached to the folio.
 */
static inline void *folio_change_private(struct folio *folio, void *data)
{
        void *old = folio_get_private(folio);

        folio->private = data;
        return old;
}

/**
 * folio_detach_private - Detach private data from a folio.
 * @folio: Folio to detach data from.
 *
 * Removes the data that was previously attached to the folio and decrements
 * the refcount on the page.
 *
 * Return: Data that was attached to the folio.
 */
static inline void *folio_detach_private(struct folio *folio)
{
        void *data = folio_get_private(folio);

        if (!folio_test_private(folio))
                return NULL;
        folio_clear_private(folio);
        folio->private = NULL;
        folio_put(folio);

        return data;
}

static inline void attach_page_private(struct page *page, void *data)
{
        folio_attach_private(page_folio(page), data);
}

static inline void *detach_page_private(struct page *page)
{
        return folio_detach_private(page_folio(page));
}

/*
 * There are some parts of the kernel which assume that PMD entries
 * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
 * limit the maximum allocation order to PMD size.  I'm not aware of any
 * assumptions about maximum order if THP are disabled, but 8 seems like
 * a good order (that's 1MB if you're using 4kB pages)
 */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER        HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER        8
#endif

#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
        return folio_alloc(gfp, order);
}
#endif

static inline struct page *__page_cache_alloc(gfp_t gfp)
{
        return &filemap_alloc_folio(gfp, 0)->page;
}

static inline struct page *page_cache_alloc(struct address_space *x)
{
        return __page_cache_alloc(mapping_gfp_mask(x));
}

static inline gfp_t readahead_gfp_mask(struct address_space *x)
{
        return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
}

typedef int filler_t(struct file *, struct folio *);

pgoff_t page_cache_next_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan);
pgoff_t page_cache_prev_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan);

/**
 * typedef fgf_t - Flags for getting folios from the page cache.
 *
 * Most users of the page cache will not need to use these flags;
 * there are convenience functions such as filemap_get_folio() and
 * filemap_lock_folio().  For users which need more control over exactly
 * what is done with the folios, these flags to __filemap_get_folio()
 * are available.
 *
 * * %FGP_ACCESSED - The folio will be marked accessed.
 * * %FGP_LOCK - The folio is returned locked.
 * * %FGP_CREAT - If no folio is present then a new folio is allocated,
 *   added to the page cache and the VM's LRU list.  The folio is
 *   returned locked.
 * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
 *   folio is already in cache.  If the folio was allocated, unlock it
 *   before returning so the caller can do the same dance.
 * * %FGP_WRITE - The folio will be written to by the caller.
 * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
 * * %FGP_NOWAIT - Don't block on the folio lock.
 * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
 * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
 *   implementation.
 */
typedef unsigned int __bitwise fgf_t;

#define FGP_ACCESSED                ((__force fgf_t)0x00000001)
#define FGP_LOCK                ((__force fgf_t)0x00000002)
#define FGP_CREAT                ((__force fgf_t)0x00000004)
#define FGP_WRITE                ((__force fgf_t)0x00000008)
#define FGP_NOFS                ((__force fgf_t)0x00000010)
#define FGP_NOWAIT                ((__force fgf_t)0x00000020)
#define FGP_FOR_MMAP                ((__force fgf_t)0x00000040)
#define FGP_STABLE                ((__force fgf_t)0x00000080)
#define FGF_GET_ORDER(fgf)        (((__force unsigned)fgf) >> 26)        /* top 6 bits */

#define FGP_WRITEBEGIN                (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)

/**
 * fgf_set_order - Encode a length in the fgf_t flags.
 * @size: The suggested size of the folio to create.
 *
 * The caller of __filemap_get_folio() can use this to suggest a preferred
 * size for the folio that is created.  If there is already a folio at
 * the index, it will be returned, no matter what its size.  If a folio
 * is freshly created, it may be of a different size than requested
 * due to alignment constraints, memory pressure, or the presence of
 * other folios at nearby indices.
 */
static inline fgf_t fgf_set_order(size_t size)
{
        unsigned int shift = ilog2(size);

        if (shift <= PAGE_SHIFT)
                return 0;
        return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
}

void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
                fgf_t fgp_flags, gfp_t gfp);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
                fgf_t fgp_flags, gfp_t gfp);

/**
 * filemap_get_folio - Find and get a folio.
 * @mapping: The address_space to search.
 * @index: The page index.
 *
 * Looks up the page cache entry at @mapping & @index.  If a folio is
 * present, it is returned with an increased refcount.
 *
 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 * this index.  Will not return a shadow, swap or DAX entry.
 */
static inline struct folio *filemap_get_folio(struct address_space *mapping,
                                        pgoff_t index)
{
        return __filemap_get_folio(mapping, index, 0, 0);
}

/**
 * filemap_lock_folio - Find and lock a folio.
 * @mapping: The address_space to search.
 * @index: The page index.
 *
 * Looks up the page cache entry at @mapping & @index.  If a folio is
 * present, it is returned locked with an increased refcount.
 *
 * Context: May sleep.
 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 * this index.  Will not return a shadow, swap or DAX entry.
 */
static inline struct folio *filemap_lock_folio(struct address_space *mapping,
                                        pgoff_t index)
{
        return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
}

/**
 * filemap_grab_folio - grab a folio from the page cache
 * @mapping: The address space to search
 * @index: The page index
 *
 * Looks up the page cache entry at @mapping & @index. If no folio is found,
 * a new folio is created. The folio is locked, marked as accessed, and
 * returned.
 *
 * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
 * and failed to create a folio.
 */
static inline struct folio *filemap_grab_folio(struct address_space *mapping,
                                        pgoff_t index)
{
        return __filemap_get_folio(mapping, index,
                        FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
                        mapping_gfp_mask(mapping));
}

/**
 * find_get_page - find and get a page reference
 * @mapping: the address_space to search
 * @offset: the page index
 *
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned with an increased refcount.
 *
 * Otherwise, %NULL is returned.
 */
static inline struct page *find_get_page(struct address_space *mapping,
                                        pgoff_t offset)
{
        return pagecache_get_page(mapping, offset, 0, 0);
}

static inline struct page *find_get_page_flags(struct address_space *mapping,
                                        pgoff_t offset, fgf_t fgp_flags)
{
        return pagecache_get_page(mapping, offset, fgp_flags, 0);
}

/**
 * find_lock_page - locate, pin and lock a pagecache page
 * @mapping: the address_space to search
 * @index: the page index
 *
 * Looks up the page cache entry at @mapping & @index.  If there is a
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
 * Context: May sleep.
 * Return: A struct page or %NULL if there is no page in the cache for this
 * index.
 */
static inline struct page *find_lock_page(struct address_space *mapping,
                                        pgoff_t index)
{
        return pagecache_get_page(mapping, index, FGP_LOCK, 0);
}

/**
 * find_or_create_page - locate or add a pagecache page
 * @mapping: the page's address_space
 * @index: the page's index into the mapping
 * @gfp_mask: page allocation mode
 *
 * Looks up the page cache slot at @mapping & @offset.  If there is a
 * page cache page, it is returned locked and with an increased
 * refcount.
 *
 * If the page is not present, a new page is allocated using @gfp_mask
 * and added to the page cache and the VM's LRU list.  The page is
 * returned locked and with an increased refcount.
 *
 * On memory exhaustion, %NULL is returned.
 *
 * find_or_create_page() may sleep, even if @gfp_flags specifies an
 * atomic allocation!
 */
static inline struct page *find_or_create_page(struct address_space *mapping,
                                        pgoff_t index, gfp_t gfp_mask)
{
        return pagecache_get_page(mapping, index,
                                        FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
                                        gfp_mask);
}

/**
 * grab_cache_page_nowait - returns locked page at given index in given cache
 * @mapping: target address_space
 * @index: the page index
 *
 * Same as grab_cache_page(), but do not wait if the page is unavailable.
 * This is intended for speculative data generators, where the data can
 * be regenerated if the page couldn't be grabbed.  This routine should
 * be safe to call while holding the lock for another page.
 *
 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 * and deadlock against the caller's locked page.
 */
static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
                                pgoff_t index)
{
        return pagecache_get_page(mapping, index,
                        FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
                        mapping_gfp_mask(mapping));
}

#define swapcache_index(folio)        __page_file_index(&(folio)->page)

/**
 * folio_index - File index of a folio.
 * @folio: The folio.
 *
 * For a folio which is either in the page cache or the swap cache,
 * return its index within the address_space it belongs to.  If you know
 * the page is definitely in the page cache, you can look at the folio's
 * index directly.
 *
 * Return: The index (offset in units of pages) of a folio in its file.
 */
static inline pgoff_t folio_index(struct folio *folio)
{
        if (unlikely(folio_test_swapcache(folio)))
                return swapcache_index(folio);
        return folio->index;
}

/**
 * folio_next_index - Get the index of the next folio.
 * @folio: The current folio.
 *
 * Return: The index of the folio which follows this folio in the file.
 */
static inline pgoff_t folio_next_index(struct folio *folio)
{
        return folio->index + folio_nr_pages(folio);
}

/**
 * folio_file_page - The page for a particular index.
 * @folio: The folio which contains this index.
 * @index: The index we want to look up.
 *
 * Sometimes after looking up a folio in the page cache, we need to
 * obtain the specific page for an index (eg a page fault).
 *
 * Return: The page containing the file data for this index.
 */
static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
{
        return folio_page(folio, index & (folio_nr_pages(folio) - 1));
}

/**
 * folio_contains - Does this folio contain this index?
 * @folio: The folio.
 * @index: The page index within the file.
 *
 * Context: The caller should have the page locked in order to prevent
 * (eg) shmem from moving the page between the page cache and swap cache
 * and changing its index in the middle of the operation.
 * Return: true or false.
 */
static inline bool folio_contains(struct folio *folio, pgoff_t index)
{
        return index - folio_index(folio) < folio_nr_pages(folio);
}

/*
 * Given the page we found in the page cache, return the page corresponding
 * to this index in the file
 */
static inline struct page *find_subpage(struct page *head, pgoff_t index)
{
        /* HugeTLBfs wants the head page regardless */
        if (PageHuge(head))
                return head;

        return head + (index & (thp_nr_pages(head) - 1));
}

unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, struct folio_batch *fbatch);
unsigned filemap_get_folios_contig(struct address_space *mapping,
                pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
                pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);

struct page *grab_cache_page_write_begin(struct address_space *mapping,
                        pgoff_t index);

/*
 * Returns locked page at given index in given cache, creating it if needed.
 */
static inline struct page *grab_cache_page(struct address_space *mapping,
                                                                pgoff_t index)
{
        return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
}

struct folio *read_cache_folio(struct address_space *, pgoff_t index,
                filler_t *filler, struct file *file);
struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
                gfp_t flags);
struct page *read_cache_page(struct address_space *, pgoff_t index,
                filler_t *filler, struct file *file);
extern struct page * read_cache_page_gfp(struct address_space *mapping,
                                pgoff_t index, gfp_t gfp_mask);

static inline struct page *read_mapping_page(struct address_space *mapping,
                                pgoff_t index, struct file *file)
{
        return read_cache_page(mapping, index, NULL, file);
}

static inline struct folio *read_mapping_folio(struct address_space *mapping,
                                pgoff_t index, struct file *file)
{
        return read_cache_folio(mapping, index, NULL, file);
}

/*
 * Get the offset in PAGE_SIZE (even for hugetlb pages).
 */
static inline pgoff_t page_to_pgoff(struct page *page)
{
        struct page *head;

        if (likely(!PageTransTail(page)))
                return page->index;

        head = compound_head(page);
        /*
         *  We don't initialize ->index for tail pages: calculate based on
         *  head page
         */
        return head->index + page - head;
}

/*
 * Return byte-offset into filesystem object for page.
 */
static inline loff_t page_offset(struct page *page)
{
        return ((loff_t)page->index) << PAGE_SHIFT;
}

static inline loff_t page_file_offset(struct page *page)
{
        return ((loff_t)page_index(page)) << PAGE_SHIFT;
}

/**
 * folio_pos - Returns the byte position of this folio in its file.
 * @folio: The folio.
 */
static inline loff_t folio_pos(struct folio *folio)
{
        return page_offset(&folio->page);
}

/**
 * folio_file_pos - Returns the byte position of this folio in its file.
 * @folio: The folio.
 *
 * This differs from folio_pos() for folios which belong to a swap file.
 * NFS is the only filesystem today which needs to use folio_file_pos().
 */
static inline loff_t folio_file_pos(struct folio *folio)
{
        return page_file_offset(&folio->page);
}

/*
 * Get the offset in PAGE_SIZE (even for hugetlb folios).
 */
static inline pgoff_t folio_pgoff(struct folio *folio)
{
        return folio->index;
}

static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
                                        unsigned long address)
{
        pgoff_t pgoff;
        pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
        pgoff += vma->vm_pgoff;
        return pgoff;
}

struct wait_page_key {
        struct folio *folio;
        int bit_nr;
        int page_match;
};

struct wait_page_queue {
        struct folio *folio;
        int bit_nr;
        wait_queue_entry_t wait;
};

static inline bool wake_page_match(struct wait_page_queue *wait_page,
                                  struct wait_page_key *key)
{
        if (wait_page->folio != key->folio)
               return false;
        key->page_match = 1;

        if (wait_page->bit_nr != key->bit_nr)
                return false;

        return true;
}

void __folio_lock(struct folio *folio);
int __folio_lock_killable(struct folio *folio);
vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
void unlock_page(struct page *page);
void folio_unlock(struct folio *folio);

/**
 * folio_trylock() - Attempt to lock a folio.
 * @folio: The folio to attempt to lock.
 *
 * Sometimes it is undesirable to wait for a folio to be unlocked (eg
 * when the locks are being taken in the wrong order, or if making
 * progress through a batch of folios is more important than processing
 * them in order).  Usually folio_lock() is the correct function to call.
 *
 * Context: Any context.
 * Return: Whether the lock was successfully acquired.
 */
static inline bool folio_trylock(struct folio *folio)
{
        return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
}

/*
 * Return true if the page was successfully locked
 */
static inline int trylock_page(struct page *page)
{
        return folio_trylock(page_folio(page));
}

/**
 * folio_lock() - Lock this folio.
 * @folio: The folio to lock.
 *
 * The folio lock protects against many things, probably more than it
 * should.  It is primarily held while a folio is being brought uptodate,
 * either from its backing file or from swap.  It is also held while a
 * folio is being truncated from its address_space, so holding the lock
 * is sufficient to keep folio->mapping stable.
 *
 * The folio lock is also held while write() is modifying the page to
 * provide POSIX atomicity guarantees (as long as the write does not
 * cross a page boundary).  Other modifications to the data in the folio
 * do not hold the folio lock and can race with writes, eg DMA and stores
 * to mapped pages.
 *
 * Context: May sleep.  If you need to acquire the locks of two or
 * more folios, they must be in order of ascending index, if they are
 * in the same address_space.  If they are in different address_spaces,
 * acquire the lock of the folio which belongs to the address_space which
 * has the lowest address in memory first.
 */
static inline void folio_lock(struct folio *folio)
{
        might_sleep();
        if (!folio_trylock(folio))
                __folio_lock(folio);
}

/**
 * lock_page() - Lock the folio containing this page.
 * @page: The page to lock.
 *
 * See folio_lock() for a description of what the lock protects.
 * This is a legacy function and new code should probably use folio_lock()
 * instead.
 *
 * Context: May sleep.  Pages in the same folio share a lock, so do not
 * attempt to lock two pages which share a folio.
 */
static inline void lock_page(struct page *page)
{
        struct folio *folio;
        might_sleep();

        folio = page_folio(page);
        if (!folio_trylock(folio))
                __folio_lock(folio);
}

/**
 * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
 * @folio: The folio to lock.
 *
 * Attempts to lock the folio, like folio_lock(), except that the sleep
 * to acquire the lock is interruptible by a fatal signal.
 *
 * Context: May sleep; see folio_lock().
 * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
 */
static inline int folio_lock_killable(struct folio *folio)
{
        might_sleep();
        if (!folio_trylock(folio))
                return __folio_lock_killable(folio);
        return 0;
}

/*
 * folio_lock_or_retry - Lock the folio, unless this would block and the
 * caller indicated that it can handle a retry.
 *
 * Return value and mmap_lock implications depend on flags; see
 * __folio_lock_or_retry().
 */
static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
                                             struct vm_fault *vmf)
{
        might_sleep();
        if (!folio_trylock(folio))
                return __folio_lock_or_retry(folio, vmf);
        return 0;
}

/*
 * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
 * and should not be used directly.
 */
void folio_wait_bit(struct folio *folio, int bit_nr);
int folio_wait_bit_killable(struct folio *folio, int bit_nr);

/* 
 * Wait for a folio to be unlocked.
 *
 * This must be called with the caller "holding" the folio,
 * ie with increased folio reference count so that the folio won't
 * go away during the wait.
 */
static inline void folio_wait_locked(struct folio *folio)
{
        if (folio_test_locked(folio))
                folio_wait_bit(folio, PG_locked);
}

static inline int folio_wait_locked_killable(struct folio *folio)
{
        if (!folio_test_locked(folio))
                return 0;
        return folio_wait_bit_killable(folio, PG_locked);
}

static inline void wait_on_page_locked(struct page *page)
{
        folio_wait_locked(page_folio(page));
}

void folio_end_read(struct folio *folio, bool success);
void wait_on_page_writeback(struct page *page);
void folio_wait_writeback(struct folio *folio);
int folio_wait_writeback_killable(struct folio *folio);
void end_page_writeback(struct page *page);
void folio_end_writeback(struct folio *folio);
void wait_for_stable_page(struct page *page);
void folio_wait_stable(struct folio *folio);
void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
static inline void __set_page_dirty(struct page *page,
                struct address_space *mapping, int warn)
{
        __folio_mark_dirty(page_folio(page), mapping, warn);
}
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
void __folio_cancel_dirty(struct folio *folio);
static inline void folio_cancel_dirty(struct folio *folio)
{
        /* Avoid atomic ops, locking, etc. when not actually needed. */
        if (folio_test_dirty(folio))
                __folio_cancel_dirty(folio);
}
bool folio_clear_dirty_for_io(struct folio *folio);
bool clear_page_dirty_for_io(struct page *page);
void folio_invalidate(struct folio *folio, size_t offset, size_t length);
int __set_page_dirty_nobuffers(struct page *page);
bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);

#ifdef CONFIG_MIGRATION
int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
                struct folio *src, enum migrate_mode mode);
#else
#define filemap_migrate_folio NULL
#endif
void folio_end_private_2(struct folio *folio);
void folio_wait_private_2(struct folio *folio);
int folio_wait_private_2_killable(struct folio *folio);

/*
 * Add an arbitrary waiter to a page's wait queue
 */
void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);

/*
 * Fault in userspace address range.
 */
size_t fault_in_writeable(char __user *uaddr, size_t size);
size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
size_t fault_in_readable(const char __user *uaddr, size_t size);

int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                pgoff_t index, gfp_t gfp);
int filemap_add_folio(struct address_space *mapping, struct folio *folio,
                pgoff_t index, gfp_t gfp);
void filemap_remove_folio(struct folio *folio);
void __filemap_remove_folio(struct folio *folio, void *shadow);
void replace_page_cache_folio(struct folio *old, struct folio *new);
void delete_from_page_cache_batch(struct address_space *mapping,
                                  struct folio_batch *fbatch);
bool filemap_release_folio(struct folio *folio, gfp_t gfp);
loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
                int whence);

/* Must be non-static for BPF error injection */
int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
                pgoff_t index, gfp_t gfp, void **shadowp);

bool filemap_range_has_writeback(struct address_space *mapping,
                                 loff_t start_byte, loff_t end_byte);

/**
 * filemap_range_needs_writeback - check if range potentially needs writeback
 * @mapping:           address space within which to check
 * @start_byte:        offset in bytes where the range starts
 * @end_byte:          offset in bytes where the range ends (inclusive)
 *
 * Find at least one page in the range supplied, usually used to check if
 * direct writing in this range will trigger a writeback. Used by O_DIRECT
 * read/write with IOCB_NOWAIT, to see if the caller needs to do
 * filemap_write_and_wait_range() before proceeding.
 *
 * Return: %true if the caller should do filemap_write_and_wait_range() before
 * doing O_DIRECT to a page in this range, %false otherwise.
 */
static inline bool filemap_range_needs_writeback(struct address_space *mapping,
                                                 loff_t start_byte,
                                                 loff_t end_byte)
{
        if (!mapping->nrpages)
                return false;
        if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
            !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
                return false;
        return filemap_range_has_writeback(mapping, start_byte, end_byte);
}

/**
 * struct readahead_control - Describes a readahead request.
 *
 * A readahead request is for consecutive pages.  Filesystems which
 * implement the ->readahead method should call readahead_page() or
 * readahead_page_batch() in a loop and attempt to start I/O against
 * each page in the request.
 *
 * Most of the fields in this struct are private and should be accessed
 * by the functions below.
 *
 * @file: The file, used primarily by network filesystems for authentication.
 *          May be NULL if invoked internally by the filesystem.
 * @mapping: Readahead this filesystem object.
 * @ra: File readahead state.  May be NULL.
 */
struct readahead_control {
        struct file *file;
        struct address_space *mapping;
        struct file_ra_state *ra;
/* private: use the readahead_* accessors instead */
        pgoff_t _index;
        unsigned int _nr_pages;
        unsigned int _batch_count;
        bool _workingset;
        unsigned long _pflags;
};

#define DEFINE_READAHEAD(ractl, f, r, m, i)                                \
        struct readahead_control ractl = {                                \
                .file = f,                                                \
                .mapping = m,                                                \
                .ra = r,                                                \
                ._index = i,                                                \
        }

#define VM_READAHEAD_PAGES        (SZ_128K / PAGE_SIZE)

void page_cache_ra_unbounded(struct readahead_control *,
                unsigned long nr_to_read, unsigned long lookahead_count);
void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
void page_cache_async_ra(struct readahead_control *, struct folio *,
                unsigned long req_count);
void readahead_expand(struct readahead_control *ractl,
                      loff_t new_start, size_t new_len);

/**
 * page_cache_sync_readahead - generic file readahead
 * @mapping: address_space which holds the pagecache and I/O vectors
 * @ra: file_ra_state which holds the readahead state
 * @file: Used by the filesystem for authentication.
 * @index: Index of first page to be read.
 * @req_count: Total number of pages being read by the caller.
 *
 * page_cache_sync_readahead() should be called when a cache miss happened:
 * it will submit the read.  The readahead logic may decide to piggyback more
 * pages onto the read request if access patterns suggest it will improve
 * performance.
 */
static inline
void page_cache_sync_readahead(struct address_space *mapping,
                struct file_ra_state *ra, struct file *file, pgoff_t index,
                unsigned long req_count)
{
        DEFINE_READAHEAD(ractl, file, ra, mapping, index);
        page_cache_sync_ra(&ractl, req_count);
}

/**
 * page_cache_async_readahead - file readahead for marked pages
 * @mapping: address_space which holds the pagecache and I/O vectors
 * @ra: file_ra_state which holds the readahead state
 * @file: Used by the filesystem for authentication.
 * @folio: The folio at @index which triggered the readahead call.
 * @index: Index of first page to be read.
 * @req_count: Total number of pages being read by the caller.
 *
 * page_cache_async_readahead() should be called when a page is used which
 * is marked as PageReadahead; this is a marker to suggest that the application
 * has used up enough of the readahead window that we should start pulling in
 * more pages.
 */
static inline
void page_cache_async_readahead(struct address_space *mapping,
                struct file_ra_state *ra, struct file *file,
                struct folio *folio, pgoff_t index, unsigned long req_count)
{
        DEFINE_READAHEAD(ractl, file, ra, mapping, index);
        page_cache_async_ra(&ractl, folio, req_count);
}

static inline struct folio *__readahead_folio(struct readahead_control *ractl)
{
        struct folio *folio;

        BUG_ON(ractl->_batch_count > ractl->_nr_pages);
        ractl->_nr_pages -= ractl->_batch_count;
        ractl->_index += ractl->_batch_count;

        if (!ractl->_nr_pages) {
                ractl->_batch_count = 0;
                return NULL;
        }

        folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        ractl->_batch_count = folio_nr_pages(folio);

        return folio;
}

/**
 * readahead_page - Get the next page to read.
 * @ractl: The current readahead request.
 *
 * Context: The page is locked and has an elevated refcount.  The caller
 * should decreases the refcount once the page has been submitted for I/O
 * and unlock the page once all I/O to that page has completed.
 * Return: A pointer to the next page, or %NULL if we are done.
 */
static inline struct page *readahead_page(struct readahead_control *ractl)
{
        struct folio *folio = __readahead_folio(ractl);

        return &folio->page;
}

/**
 * readahead_folio - Get the next folio to read.
 * @ractl: The current readahead request.
 *
 * Context: The folio is locked.  The caller should unlock the folio once
 * all I/O to that folio has completed.
 * Return: A pointer to the next folio, or %NULL if we are done.
 */
static inline struct folio *readahead_folio(struct readahead_control *ractl)
{
        struct folio *folio = __readahead_folio(ractl);

        if (folio)
                folio_put(folio);
        return folio;
}

static inline unsigned int __readahead_batch(struct readahead_control *rac,
                struct page **array, unsigned int array_sz)
{
        unsigned int i = 0;
        XA_STATE(xas, &rac->mapping->i_pages, 0);
        struct page *page;

        BUG_ON(rac->_batch_count > rac->_nr_pages);
        rac->_nr_pages -= rac->_batch_count;
        rac->_index += rac->_batch_count;
        rac->_batch_count = 0;

        xas_set(&xas, rac->_index);
        rcu_read_lock();
        xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
                if (xas_retry(&xas, page))
                        continue;
                VM_BUG_ON_PAGE(!PageLocked(page), page);
                VM_BUG_ON_PAGE(PageTail(page), page);
                array[i++] = page;
                rac->_batch_count += thp_nr_pages(page);
                if (i == array_sz)
                        break;
        }
        rcu_read_unlock();

        return i;
}

/**
 * readahead_page_batch - Get a batch of pages to read.
 * @rac: The current readahead request.
 * @array: An array of pointers to struct page.
 *
 * Context: The pages are locked and have an elevated refcount.  The caller
 * should decreases the refcount once the page has been submitted for I/O
 * and unlock the page once all I/O to that page has completed.
 * Return: The number of pages placed in the array.  0 indicates the request
 * is complete.
 */
#define readahead_page_batch(rac, array)                                \
        __readahead_batch(rac, array, ARRAY_SIZE(array))

/**
 * readahead_pos - The byte offset into the file of this readahead request.
 * @rac: The readahead request.
 */
static inline loff_t readahead_pos(struct readahead_control *rac)
{
        return (loff_t)rac->_index * PAGE_SIZE;
}

/**
 * readahead_length - The number of bytes in this readahead request.
 * @rac: The readahead request.
 */
static inline size_t readahead_length(struct readahead_control *rac)
{
        return rac->_nr_pages * PAGE_SIZE;
}

/**
 * readahead_index - The index of the first page in this readahead request.
 * @rac: The readahead request.
 */
static inline pgoff_t readahead_index(struct readahead_control *rac)
{
        return rac->_index;
}

/**
 * readahead_count - The number of pages in this readahead request.
 * @rac: The readahead request.
 */
static inline unsigned int readahead_count(struct readahead_control *rac)
{
        return rac->_nr_pages;
}

/**
 * readahead_batch_length - The number of bytes in the current batch.
 * @rac: The readahead request.
 */
static inline size_t readahead_batch_length(struct readahead_control *rac)
{
        return rac->_batch_count * PAGE_SIZE;
}

static inline unsigned long dir_pages(struct inode *inode)
{
        return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
                               PAGE_SHIFT;
}

/**
 * folio_mkwrite_check_truncate - check if folio was truncated
 * @folio: the folio to check
 * @inode: the inode to check the folio against
 *
 * Return: the number of bytes in the folio up to EOF,
 * or -EFAULT if the folio was truncated.
 */
static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
                                              struct inode *inode)
{
        loff_t size = i_size_read(inode);
        pgoff_t index = size >> PAGE_SHIFT;
        size_t offset = offset_in_folio(folio, size);

        if (!folio->mapping)
                return -EFAULT;

        /* folio is wholly inside EOF */
        if (folio_next_index(folio) - 1 < index)
                return folio_size(folio);
        /* folio is wholly past EOF */
        if (folio->index > index || !offset)
                return -EFAULT;
        /* folio is partially inside EOF */
        return offset;
}

/**
 * page_mkwrite_check_truncate - check if page was truncated
 * @page: the page to check
 * @inode: the inode to check the page against
 *
 * Returns the number of bytes in the page up to EOF,
 * or -EFAULT if the page was truncated.
 */
static inline int page_mkwrite_check_truncate(struct page *page,
                                              struct inode *inode)
{
        loff_t size = i_size_read(inode);
        pgoff_t index = size >> PAGE_SHIFT;
        int offset = offset_in_page(size);

        if (page->mapping != inode->i_mapping)
                return -EFAULT;

        /* page is wholly inside EOF */
        if (page->index < index)
                return PAGE_SIZE;
        /* page is wholly past EOF */
        if (page->index > index || !offset)
                return -EFAULT;
        /* page is partially inside EOF */
        return offset;
}

/**
 * i_blocks_per_folio - How many blocks fit in this folio.
 * @inode: The inode which contains the blocks.
 * @folio: The folio.
 *
 * If the block size is larger than the size of this folio, return zero.
 *
 * Context: The caller should hold a refcount on the folio to prevent it
 * from being split.
 * Return: The number of filesystem blocks covered by this folio.
 */
static inline
unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
{
        return folio_size(folio) >> inode->i_blkbits;
}

static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
        return i_blocks_per_folio(inode, page_folio(page));
}
#endif /* _LINUX_PAGEMAP_H */







































































   84 

















































   73 

















































































































































   74 




   37 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PERCPU_COUNTER_H
#define _LINUX_PERCPU_COUNTER_H
/*
 * A simple "approximate counter" for use in ext2 and ext3 superblocks.
 *
 * WARNING: these things are HUGE.  4 kbytes per counter on 32-way P4.
 */

#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/list.h>
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/types.h>

/* percpu_counter batch for local add or sub */
#define PERCPU_COUNTER_LOCAL_BATCH        INT_MAX

#ifdef CONFIG_SMP

struct percpu_counter {
        raw_spinlock_t lock;
        s64 count;
#ifdef CONFIG_HOTPLUG_CPU
        struct list_head list;        /* All percpu_counters are on a list */
#endif
        s32 __percpu *counters;
};

extern int percpu_counter_batch;

int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
                               gfp_t gfp, u32 nr_counters,
                               struct lock_class_key *key);

#define percpu_counter_init_many(fbc, value, gfp, nr_counters)                \
        ({                                                                \
                static struct lock_class_key __key;                        \
                                                                        \
                __percpu_counter_init_many(fbc, value, gfp, nr_counters,\
                                           &__key);                        \
        })


#define percpu_counter_init(fbc, value, gfp)                                \
        percpu_counter_init_many(fbc, value, gfp, 1)

void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters);
static inline void percpu_counter_destroy(struct percpu_counter *fbc)
{
        percpu_counter_destroy_many(fbc, 1);
}

void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
                              s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit,
                                  s64 amount, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);

static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
        return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
}

static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
        percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
}

static inline bool
percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
{
        return __percpu_counter_limited_add(fbc, limit, amount,
                                            percpu_counter_batch);
}

/*
 * With percpu_counter_add_local() and percpu_counter_sub_local(), counts
 * are accumulated in local per cpu counter and not in fbc->count until
 * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
 * write efficient.
 * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be
 * used to add up the counts from each CPU to account for all the local
 * counts. So percpu_counter_add_local() and percpu_counter_sub_local()
 * should be used when a counter is updated frequently and read rarely.
 */
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
{
        percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
}

static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
        s64 ret = __percpu_counter_sum(fbc);
        return ret < 0 ? 0 : ret;
}

static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
{
        return __percpu_counter_sum(fbc);
}

static inline s64 percpu_counter_read(struct percpu_counter *fbc)
{
        return fbc->count;
}

/*
 * It is possible for the percpu_counter_read() to return a small negative
 * number for some counter which should never be negative.
 *
 */
static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
{
        /* Prevent reloads of fbc->count */
        s64 ret = READ_ONCE(fbc->count);

        if (ret >= 0)
                return ret;
        return 0;
}

static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
        return (fbc->counters != NULL);
}

#else /* !CONFIG_SMP */

struct percpu_counter {
        s64 count;
};

static inline int percpu_counter_init_many(struct percpu_counter *fbc,
                                           s64 amount, gfp_t gfp,
                                           u32 nr_counters)
{
        u32 i;

        for (i = 0; i < nr_counters; i++)
                fbc[i].count = amount;

        return 0;
}

static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
                                      gfp_t gfp)
{
        return percpu_counter_init_many(fbc, amount, gfp, 1);
}

static inline void percpu_counter_destroy_many(struct percpu_counter *fbc,
                                               u32 nr_counters)
{
}

static inline void percpu_counter_destroy(struct percpu_counter *fbc)
{
}

static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
        fbc->count = amount;
}

static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
        if (fbc->count > rhs)
                return 1;
        else if (fbc->count < rhs)
                return -1;
        else
                return 0;
}

static inline int
__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
        return percpu_counter_compare(fbc, rhs);
}

static inline void
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
        unsigned long flags;

        local_irq_save(flags);
        fbc->count += amount;
        local_irq_restore(flags);
}

static inline bool
percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
{
        unsigned long flags;
        bool good = false;
        s64 count;

        if (amount == 0)
                return true;

        local_irq_save(flags);
        count = fbc->count + amount;
        if ((amount > 0 && count <= limit) ||
            (amount < 0 && count >= limit)) {
                fbc->count = count;
                good = true;
        }
        local_irq_restore(flags);
        return good;
}

/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
{
        percpu_counter_add(fbc, amount);
}

static inline void
percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
        percpu_counter_add(fbc, amount);
}

static inline s64 percpu_counter_read(struct percpu_counter *fbc)
{
        return fbc->count;
}

/*
 * percpu_counter is intended to track positive numbers. In the UP case the
 * number should never be negative.
 */
static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
{
        return fbc->count;
}

static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
        return percpu_counter_read_positive(fbc);
}

static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
{
        return percpu_counter_read(fbc);
}

static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
        return true;
}

static inline void percpu_counter_sync(struct percpu_counter *fbc)
{
}
#endif        /* CONFIG_SMP */

static inline void percpu_counter_inc(struct percpu_counter *fbc)
{
        percpu_counter_add(fbc, 1);
}

static inline void percpu_counter_dec(struct percpu_counter *fbc)
{
        percpu_counter_add(fbc, -1);
}

static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
{
        percpu_counter_add(fbc, -amount);
}

static inline void
percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
{
        percpu_counter_add_local(fbc, -amount);
}

#endif /* _LINUX_PERCPU_COUNTER_H */














































































































































    4 










    4 
    4 
    4 

    4 
    4 
    4 

    4 



    4 

















    4 


    4 











    4 







    4 



    4 

    4 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
 * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
 *
 * This file contains the core interrupt handling code. Detailed
 * information is available in Documentation/core-api/genericirq.rst
 *
 */

#include <linux/irq.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>

#include <asm/irq_regs.h>

#include <trace/events/irq.h>

#include "internals.h"

#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
#endif

/**
 * handle_bad_irq - handle spurious and unhandled irqs
 * @desc:      description of the interrupt
 *
 * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
 */
void handle_bad_irq(struct irq_desc *desc)
{
        unsigned int irq = irq_desc_get_irq(desc);

        print_irq_desc(irq, desc);
        kstat_incr_irqs_this_cpu(desc);
        ack_bad_irq(irq);
}
EXPORT_SYMBOL_GPL(handle_bad_irq);

/*
 * Special, empty irq handler:
 */
irqreturn_t no_action(int cpl, void *dev_id)
{
        return IRQ_NONE;
}
EXPORT_SYMBOL_GPL(no_action);

static void warn_no_thread(unsigned int irq, struct irqaction *action)
{
        if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
                return;

        printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
               "but no thread function available.", irq, action->name);
}

void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
{
        /*
         * In case the thread crashed and was killed we just pretend that
         * we handled the interrupt. The hardirq handler has disabled the
         * device interrupt, so no irq storm is lurking.
         */
        if (action->thread->flags & PF_EXITING)
                return;

        /*
         * Wake up the handler thread for this action. If the
         * RUNTHREAD bit is already set, nothing to do.
         */
        if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
                return;

        /*
         * It's safe to OR the mask lockless here. We have only two
         * places which write to threads_oneshot: This code and the
         * irq thread.
         *
         * This code is the hard irq context and can never run on two
         * cpus in parallel. If it ever does we have more serious
         * problems than this bitmask.
         *
         * The irq threads of this irq which clear their "running" bit
         * in threads_oneshot are serialized via desc->lock against
         * each other and they are serialized against this code by
         * IRQS_INPROGRESS.
         *
         * Hard irq handler:
         *
         *        spin_lock(desc->lock);
         *        desc->state |= IRQS_INPROGRESS;
         *        spin_unlock(desc->lock);
         *        set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
         *        desc->threads_oneshot |= mask;
         *        spin_lock(desc->lock);
         *        desc->state &= ~IRQS_INPROGRESS;
         *        spin_unlock(desc->lock);
         *
         * irq thread:
         *
         * again:
         *        spin_lock(desc->lock);
         *        if (desc->state & IRQS_INPROGRESS) {
         *                spin_unlock(desc->lock);
         *                while(desc->state & IRQS_INPROGRESS)
         *                        cpu_relax();
         *                goto again;
         *        }
         *        if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
         *                desc->threads_oneshot &= ~mask;
         *        spin_unlock(desc->lock);
         *
         * So either the thread waits for us to clear IRQS_INPROGRESS
         * or we are waiting in the flow handler for desc->lock to be
         * released before we reach this point. The thread also checks
         * IRQTF_RUNTHREAD under desc->lock. If set it leaves
         * threads_oneshot untouched and runs the thread another time.
         */
        desc->threads_oneshot |= action->thread_mask;

        /*
         * We increment the threads_active counter in case we wake up
         * the irq thread. The irq thread decrements the counter when
         * it returns from the handler or in the exit path and wakes
         * up waiters which are stuck in synchronize_irq() when the
         * active count becomes zero. synchronize_irq() is serialized
         * against this code (hard irq handler) via IRQS_INPROGRESS
         * like the finalize_oneshot() code. See comment above.
         */
        atomic_inc(&desc->threads_active);

        wake_up_process(action->thread);
}

irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
{
        irqreturn_t retval = IRQ_NONE;
        unsigned int irq = desc->irq_data.irq;
        struct irqaction *action;

        record_irq_time(desc);

        for_each_action_of_desc(desc, action) {
                irqreturn_t res;

                /*
                 * If this IRQ would be threaded under force_irqthreads, mark it so.
                 */
                if (irq_settings_can_thread(desc) &&
                    !(action->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)))
                        lockdep_hardirq_threaded();

                trace_irq_handler_entry(irq, action);
                res = action->handler(irq, action->dev_id);
                trace_irq_handler_exit(irq, action, res);

                if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
                              irq, action->handler))
                        local_irq_disable();

                switch (res) {
                case IRQ_WAKE_THREAD:
                        /*
                         * Catch drivers which return WAKE_THREAD but
                         * did not set up a thread function
                         */
                        if (unlikely(!action->thread_fn)) {
                                warn_no_thread(irq, action);
                                break;
                        }

                        __irq_wake_thread(desc, action);
                        break;

                default:
                        break;
                }

                retval |= res;
        }

        return retval;
}

irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
        irqreturn_t retval;

        retval = __handle_irq_event_percpu(desc);

        add_interrupt_randomness(desc->irq_data.irq);

        if (!irq_settings_no_debug(desc))
                note_interrupt(desc, retval);
        return retval;
}

irqreturn_t handle_irq_event(struct irq_desc *desc)
{
        irqreturn_t ret;

        desc->istate &= ~IRQS_PENDING;
        irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
        raw_spin_unlock(&desc->lock);

        ret = handle_irq_event_percpu(desc);

        raw_spin_lock(&desc->lock);
        irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
        return ret;
}

#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
        if (handle_arch_irq)
                return -EBUSY;

        handle_arch_irq = handle_irq;
        return 0;
}

/**
 * generic_handle_arch_irq - root irq handler for architectures which do no
 *                           entry accounting themselves
 * @regs:        Register file coming from the low-level handling code
 */
asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs)
{
        struct pt_regs *old_regs;

        irq_enter();
        old_regs = set_irq_regs(regs);
        handle_arch_irq(regs);
        set_irq_regs(old_regs);
        irq_exit();
}
#endif











































































































































































































































































   17 

















   18 





   18 









































   18 








   17 




   18 
   17 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   61 



   61 












   61 




















































































































































































































































































































































































































































































































































































































































































































































































































































































































































   26 



   27 

   27 





   26 















   27 
   26 


   27 


   27 














   27 




   18 

   18 

   17 

   18 








   18 
















   18 
   18 










































   18 
   17 
   18 































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
// SPDX-License-Identifier: GPL-2.0-only
/*
 * fs/fs-writeback.c
 *
 * Copyright (C) 2002, Linus Torvalds.
 *
 * Contains all the functions related to writing back and waiting
 * upon dirty inodes against superblocks, and writing back dirty
 * pages against inodes.  ie: data writeback.  Writeout of the
 * inode itself is not handled here.
 *
 * 10Apr2002        Andrew Morton
 *                Split out of fs/inode.c
 *                Additions for address_space-based writeback
 */

#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/kthread.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/tracepoint.h>
#include <linux/device.h>
#include <linux/memcontrol.h>
#include "internal.h"

/*
 * 4MB minimal write chunk size
 */
#define MIN_WRITEBACK_PAGES        (4096UL >> (PAGE_SHIFT - 10))

/*
 * Passed into wb_writeback(), essentially a subset of writeback_control
 */
struct wb_writeback_work {
        long nr_pages;
        struct super_block *sb;
        enum writeback_sync_modes sync_mode;
        unsigned int tagged_writepages:1;
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
        unsigned int for_background:1;
        unsigned int for_sync:1;        /* sync(2) WB_SYNC_ALL writeback */
        unsigned int auto_free:1;        /* free on completion */
        enum wb_reason reason;                /* why was writeback initiated? */

        struct list_head list;                /* pending work list */
        struct wb_completion *done;        /* set if the caller waits */
};

/*
 * If an inode is constantly having its pages dirtied, but then the
 * updates stop dirtytime_expire_interval seconds in the past, it's
 * possible for the worst case time between when an inode has its
 * timestamps updated and when they finally get written out to be two
 * dirtytime_expire_intervals.  We set the default to 12 hours (in
 * seconds), which means most of the time inodes will have their
 * timestamps written to disk after 12 hours, but in the worst case a
 * few inodes might not their timestamps updated for 24 hours.
 */
unsigned int dirtytime_expire_interval = 12 * 60 * 60;

static inline struct inode *wb_inode(struct list_head *head)
{
        return list_entry(head, struct inode, i_io_list);
}

/*
 * Include the creation of the trace points after defining the
 * wb_writeback_work structure and inline functions so that the definition
 * remains local to this file.
 */
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>

EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);

static bool wb_io_lists_populated(struct bdi_writeback *wb)
{
        if (wb_has_dirty_io(wb)) {
                return false;
        } else {
                set_bit(WB_has_dirty_io, &wb->state);
                WARN_ON_ONCE(!wb->avg_write_bandwidth);
                atomic_long_add(wb->avg_write_bandwidth,
                                &wb->bdi->tot_write_bandwidth);
                return true;
        }
}

static void wb_io_lists_depopulated(struct bdi_writeback *wb)
{
        if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
            list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
                clear_bit(WB_has_dirty_io, &wb->state);
                WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
                                        &wb->bdi->tot_write_bandwidth) < 0);
        }
}

/**
 * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
 * @inode: inode to be moved
 * @wb: target bdi_writeback
 * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
 *
 * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
 * Returns %true if @inode is the first occupant of the !dirty_time IO
 * lists; otherwise, %false.
 */
static bool inode_io_list_move_locked(struct inode *inode,
                                      struct bdi_writeback *wb,
                                      struct list_head *head)
{
        assert_spin_locked(&wb->list_lock);
        assert_spin_locked(&inode->i_lock);
        WARN_ON_ONCE(inode->i_state & I_FREEING);

        list_move(&inode->i_io_list, head);

        /* dirty_time doesn't count as dirty_io until expiration */
        if (head != &wb->b_dirty_time)
                return wb_io_lists_populated(wb);

        wb_io_lists_depopulated(wb);
        return false;
}

static void wb_wakeup(struct bdi_writeback *wb)
{
        spin_lock_irq(&wb->work_lock);
        if (test_bit(WB_registered, &wb->state))
                mod_delayed_work(bdi_wq, &wb->dwork, 0);
        spin_unlock_irq(&wb->work_lock);
}

/*
 * This function is used when the first inode for this wb is marked dirty. It
 * wakes-up the corresponding bdi thread which should then take care of the
 * periodic background write-out of dirty inodes. Since the write-out would
 * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
 * set up a timer which wakes the bdi thread up later.
 *
 * Note, we wouldn't bother setting up the timer, but this function is on the
 * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
 * by delaying the wake-up.
 *
 * We have to be careful not to postpone flush work if it is scheduled for
 * earlier. Thus we use queue_delayed_work().
 */
static void wb_wakeup_delayed(struct bdi_writeback *wb)
{
        unsigned long timeout;

        timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
        spin_lock_irq(&wb->work_lock);
        if (test_bit(WB_registered, &wb->state))
                queue_delayed_work(bdi_wq, &wb->dwork, timeout);
        spin_unlock_irq(&wb->work_lock);
}

static void finish_writeback_work(struct bdi_writeback *wb,
                                  struct wb_writeback_work *work)
{
        struct wb_completion *done = work->done;

        if (work->auto_free)
                kfree(work);
        if (done) {
                wait_queue_head_t *waitq = done->waitq;

                /* @done can't be accessed after the following dec */
                if (atomic_dec_and_test(&done->cnt))
                        wake_up_all(waitq);
        }
}

static void wb_queue_work(struct bdi_writeback *wb,
                          struct wb_writeback_work *work)
{
        trace_writeback_queue(wb, work);

        if (work->done)
                atomic_inc(&work->done->cnt);

        spin_lock_irq(&wb->work_lock);

        if (test_bit(WB_registered, &wb->state)) {
                list_add_tail(&work->list, &wb->work_list);
                mod_delayed_work(bdi_wq, &wb->dwork, 0);
        } else
                finish_writeback_work(wb, work);

        spin_unlock_irq(&wb->work_lock);
}

/**
 * wb_wait_for_completion - wait for completion of bdi_writeback_works
 * @done: target wb_completion
 *
 * Wait for one or more work items issued to @bdi with their ->done field
 * set to @done, which should have been initialized with
 * DEFINE_WB_COMPLETION().  This function returns after all such work items
 * are completed.  Work items which are waited upon aren't freed
 * automatically on completion.
 */
void wb_wait_for_completion(struct wb_completion *done)
{
        atomic_dec(&done->cnt);                /* put down the initial count */
        wait_event(*done->waitq, !atomic_read(&done->cnt));
}

#ifdef CONFIG_CGROUP_WRITEBACK

/*
 * Parameters for foreign inode detection, see wbc_detach_inode() to see
 * how they're used.
 *
 * These paramters are inherently heuristical as the detection target
 * itself is fuzzy.  All we want to do is detaching an inode from the
 * current owner if it's being written to by some other cgroups too much.
 *
 * The current cgroup writeback is built on the assumption that multiple
 * cgroups writing to the same inode concurrently is very rare and a mode
 * of operation which isn't well supported.  As such, the goal is not
 * taking too long when a different cgroup takes over an inode while
 * avoiding too aggressive flip-flops from occasional foreign writes.
 *
 * We record, very roughly, 2s worth of IO time history and if more than
 * half of that is foreign, trigger the switch.  The recording is quantized
 * to 16 slots.  To avoid tiny writes from swinging the decision too much,
 * writes smaller than 1/8 of avg size are ignored.
 */
#define WB_FRN_TIME_SHIFT        13        /* 1s = 2^13, upto 8 secs w/ 16bit */
#define WB_FRN_TIME_AVG_SHIFT        3        /* avg = avg * 7/8 + new * 1/8 */
#define WB_FRN_TIME_CUT_DIV        8        /* ignore rounds < avg / 8 */
#define WB_FRN_TIME_PERIOD        (2 * (1 << WB_FRN_TIME_SHIFT))        /* 2s */

#define WB_FRN_HIST_SLOTS        16        /* inode->i_wb_frn_history is 16bit */
#define WB_FRN_HIST_UNIT        (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
                                        /* each slot's duration is 2s / 16 */
#define WB_FRN_HIST_THR_SLOTS        (WB_FRN_HIST_SLOTS / 2)
                                        /* if foreign slots >= 8, switch */
#define WB_FRN_HIST_MAX_SLOTS        (WB_FRN_HIST_THR_SLOTS / 2 + 1)
                                        /* one round can affect upto 5 slots */
#define WB_FRN_MAX_IN_FLIGHT        1024        /* don't queue too many concurrently */

/*
 * Maximum inodes per isw.  A specific value has been chosen to make
 * struct inode_switch_wbs_context fit into 1024 bytes kmalloc.
 */
#define WB_MAX_INODES_PER_ISW  ((1024UL - sizeof(struct inode_switch_wbs_context)) \
                                / sizeof(struct inode *))

static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;

void __inode_attach_wb(struct inode *inode, struct folio *folio)
{
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        struct bdi_writeback *wb = NULL;

        if (inode_cgwb_enabled(inode)) {
                struct cgroup_subsys_state *memcg_css;

                if (folio) {
                        memcg_css = mem_cgroup_css_from_folio(folio);
                        wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
                } else {
                        /* must pin memcg_css, see wb_get_create() */
                        memcg_css = task_get_css(current, memory_cgrp_id);
                        wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
                        css_put(memcg_css);
                }
        }

        if (!wb)
                wb = &bdi->wb;

        /*
         * There may be multiple instances of this function racing to
         * update the same inode.  Use cmpxchg() to tell the winner.
         */
        if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
                wb_put(wb);
}
EXPORT_SYMBOL_GPL(__inode_attach_wb);

/**
 * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
 * @inode: inode of interest with i_lock held
 * @wb: target bdi_writeback
 *
 * Remove the inode from wb's io lists and if necessarily put onto b_attached
 * list.  Only inodes attached to cgwb's are kept on this list.
 */
static void inode_cgwb_move_to_attached(struct inode *inode,
                                        struct bdi_writeback *wb)
{
        assert_spin_locked(&wb->list_lock);
        assert_spin_locked(&inode->i_lock);
        WARN_ON_ONCE(inode->i_state & I_FREEING);

        inode->i_state &= ~I_SYNC_QUEUED;
        if (wb != &wb->bdi->wb)
                list_move(&inode->i_io_list, &wb->b_attached);
        else
                list_del_init(&inode->i_io_list);
        wb_io_lists_depopulated(wb);
}

/**
 * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
 * @inode: inode of interest with i_lock held
 *
 * Returns @inode's wb with its list_lock held.  @inode->i_lock must be
 * held on entry and is released on return.  The returned wb is guaranteed
 * to stay @inode's associated wb until its list_lock is released.
 */
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
        __releases(&inode->i_lock)
        __acquires(&wb->list_lock)
{
        while (true) {
                struct bdi_writeback *wb = inode_to_wb(inode);

                /*
                 * inode_to_wb() association is protected by both
                 * @inode->i_lock and @wb->list_lock but list_lock nests
                 * outside i_lock.  Drop i_lock and verify that the
                 * association hasn't changed after acquiring list_lock.
                 */
                wb_get(wb);
                spin_unlock(&inode->i_lock);
                spin_lock(&wb->list_lock);

                /* i_wb may have changed inbetween, can't use inode_to_wb() */
                if (likely(wb == inode->i_wb)) {
                        wb_put(wb);        /* @inode already has ref */
                        return wb;
                }

                spin_unlock(&wb->list_lock);
                wb_put(wb);
                cpu_relax();
                spin_lock(&inode->i_lock);
        }
}

/**
 * inode_to_wb_and_lock_list - determine an inode's wb and lock it
 * @inode: inode of interest
 *
 * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
 * on entry.
 */
static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
        __acquires(&wb->list_lock)
{
        spin_lock(&inode->i_lock);
        return locked_inode_to_wb_and_lock_list(inode);
}

struct inode_switch_wbs_context {
        struct rcu_work                work;

        /*
         * Multiple inodes can be switched at once.  The switching procedure
         * consists of two parts, separated by a RCU grace period.  To make
         * sure that the second part is executed for each inode gone through
         * the first part, all inode pointers are placed into a NULL-terminated
         * array embedded into struct inode_switch_wbs_context.  Otherwise
         * an inode could be left in a non-consistent state.
         */
        struct bdi_writeback        *new_wb;
        struct inode                *inodes[];
};

static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
{
        down_write(&bdi->wb_switch_rwsem);
}

static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
{
        up_write(&bdi->wb_switch_rwsem);
}

static bool inode_do_switch_wbs(struct inode *inode,
                                struct bdi_writeback *old_wb,
                                struct bdi_writeback *new_wb)
{
        struct address_space *mapping = inode->i_mapping;
        XA_STATE(xas, &mapping->i_pages, 0);
        struct folio *folio;
        bool switched = false;

        spin_lock(&inode->i_lock);
        xa_lock_irq(&mapping->i_pages);

        /*
         * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction
         * path owns the inode and we shouldn't modify ->i_io_list.
         */
        if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE)))
                goto skip_switch;

        trace_inode_switch_wbs(inode, old_wb, new_wb);

        /*
         * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
         * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to
         * folios actually under writeback.
         */
        xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
                if (folio_test_dirty(folio)) {
                        long nr = folio_nr_pages(folio);
                        wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr);
                        wb_stat_mod(new_wb, WB_RECLAIMABLE, nr);
                }
        }

        xas_set(&xas, 0);
        xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
                long nr = folio_nr_pages(folio);
                WARN_ON_ONCE(!folio_test_writeback(folio));
                wb_stat_mod(old_wb, WB_WRITEBACK, -nr);
                wb_stat_mod(new_wb, WB_WRITEBACK, nr);
        }

        if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
                atomic_dec(&old_wb->writeback_inodes);
                atomic_inc(&new_wb->writeback_inodes);
        }

        wb_get(new_wb);

        /*
         * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
         * the specific list @inode was on is ignored and the @inode is put on
         * ->b_dirty which is always correct including from ->b_dirty_time.
         * The transfer preserves @inode->dirtied_when ordering.  If the @inode
         * was clean, it means it was on the b_attached list, so move it onto
         * the b_attached list of @new_wb.
         */
        if (!list_empty(&inode->i_io_list)) {
                inode->i_wb = new_wb;

                if (inode->i_state & I_DIRTY_ALL) {
                        struct inode *pos;

                        list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
                                if (time_after_eq(inode->dirtied_when,
                                                  pos->dirtied_when))
                                        break;
                        inode_io_list_move_locked(inode, new_wb,
                                                  pos->i_io_list.prev);
                } else {
                        inode_cgwb_move_to_attached(inode, new_wb);
                }
        } else {
                inode->i_wb = new_wb;
        }

        /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */
        inode->i_wb_frn_winner = 0;
        inode->i_wb_frn_avg_time = 0;
        inode->i_wb_frn_history = 0;
        switched = true;
skip_switch:
        /*
         * Paired with load_acquire in unlocked_inode_to_wb_begin() and
         * ensures that the new wb is visible if they see !I_WB_SWITCH.
         */
        smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);

        xa_unlock_irq(&mapping->i_pages);
        spin_unlock(&inode->i_lock);

        return switched;
}

static void inode_switch_wbs_work_fn(struct work_struct *work)
{
        struct inode_switch_wbs_context *isw =
                container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
        struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
        struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
        struct bdi_writeback *new_wb = isw->new_wb;
        unsigned long nr_switched = 0;
        struct inode **inodep;

        /*
         * If @inode switches cgwb membership while sync_inodes_sb() is
         * being issued, sync_inodes_sb() might miss it.  Synchronize.
         */
        down_read(&bdi->wb_switch_rwsem);

        /*
         * By the time control reaches here, RCU grace period has passed
         * since I_WB_SWITCH assertion and all wb stat update transactions
         * between unlocked_inode_to_wb_begin/end() are guaranteed to be
         * synchronizing against the i_pages lock.
         *
         * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
         * gives us exclusion against all wb related operations on @inode
         * including IO list manipulations and stat updates.
         */
        if (old_wb < new_wb) {
                spin_lock(&old_wb->list_lock);
                spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
        } else {
                spin_lock(&new_wb->list_lock);
                spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
        }

        for (inodep = isw->inodes; *inodep; inodep++) {
                WARN_ON_ONCE((*inodep)->i_wb != old_wb);
                if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
                        nr_switched++;
        }

        spin_unlock(&new_wb->list_lock);
        spin_unlock(&old_wb->list_lock);

        up_read(&bdi->wb_switch_rwsem);

        if (nr_switched) {
                wb_wakeup(new_wb);
                wb_put_many(old_wb, nr_switched);
        }

        for (inodep = isw->inodes; *inodep; inodep++)
                iput(*inodep);
        wb_put(new_wb);
        kfree(isw);
        atomic_dec(&isw_nr_in_flight);
}

static bool inode_prepare_wbs_switch(struct inode *inode,
                                     struct bdi_writeback *new_wb)
{
        /*
         * Paired with smp_mb() in cgroup_writeback_umount().
         * isw_nr_in_flight must be increased before checking SB_ACTIVE and
         * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0
         * in cgroup_writeback_umount() and the isw_wq will be not flushed.
         */
        smp_mb();

        if (IS_DAX(inode))
                return false;

        /* while holding I_WB_SWITCH, no one else can update the association */
        spin_lock(&inode->i_lock);
        if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
            inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) ||
            inode_to_wb(inode) == new_wb) {
                spin_unlock(&inode->i_lock);
                return false;
        }
        inode->i_state |= I_WB_SWITCH;
        __iget(inode);
        spin_unlock(&inode->i_lock);

        return true;
}

/**
 * inode_switch_wbs - change the wb association of an inode
 * @inode: target inode
 * @new_wb_id: ID of the new wb
 *
 * Switch @inode's wb association to the wb identified by @new_wb_id.  The
 * switching is performed asynchronously and may fail silently.
 */
static void inode_switch_wbs(struct inode *inode, int new_wb_id)
{
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        struct cgroup_subsys_state *memcg_css;
        struct inode_switch_wbs_context *isw;

        /* noop if seems to be already in progress */
        if (inode->i_state & I_WB_SWITCH)
                return;

        /* avoid queueing a new switch if too many are already in flight */
        if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
                return;

        isw = kzalloc(struct_size(isw, inodes, 2), GFP_ATOMIC);
        if (!isw)
                return;

        atomic_inc(&isw_nr_in_flight);

        /* find and pin the new wb */
        rcu_read_lock();
        memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
        if (memcg_css && !css_tryget(memcg_css))
                memcg_css = NULL;
        rcu_read_unlock();
        if (!memcg_css)
                goto out_free;

        isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
        css_put(memcg_css);
        if (!isw->new_wb)
                goto out_free;

        if (!inode_prepare_wbs_switch(inode, isw->new_wb))
                goto out_free;

        isw->inodes[0] = inode;

        /*
         * In addition to synchronizing among switchers, I_WB_SWITCH tells
         * the RCU protected stat update paths to grab the i_page
         * lock so that stat transfer can synchronize against them.
         * Let's continue after I_WB_SWITCH is guaranteed to be visible.
         */
        INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
        queue_rcu_work(isw_wq, &isw->work);
        return;

out_free:
        atomic_dec(&isw_nr_in_flight);
        if (isw->new_wb)
                wb_put(isw->new_wb);
        kfree(isw);
}

static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw,
                                   struct list_head *list, int *nr)
{
        struct inode *inode;

        list_for_each_entry(inode, list, i_io_list) {
                if (!inode_prepare_wbs_switch(inode, isw->new_wb))
                        continue;

                isw->inodes[*nr] = inode;
                (*nr)++;

                if (*nr >= WB_MAX_INODES_PER_ISW - 1)
                        return true;
        }
        return false;
}

/**
 * cleanup_offline_cgwb - detach associated inodes
 * @wb: target wb
 *
 * Switch all inodes attached to @wb to a nearest living ancestor's wb in order
 * to eventually release the dying @wb.  Returns %true if not all inodes were
 * switched and the function has to be restarted.
 */
bool cleanup_offline_cgwb(struct bdi_writeback *wb)
{
        struct cgroup_subsys_state *memcg_css;
        struct inode_switch_wbs_context *isw;
        int nr;
        bool restart = false;

        isw = kzalloc(struct_size(isw, inodes, WB_MAX_INODES_PER_ISW),
                      GFP_KERNEL);
        if (!isw)
                return restart;

        atomic_inc(&isw_nr_in_flight);

        for (memcg_css = wb->memcg_css->parent; memcg_css;
             memcg_css = memcg_css->parent) {
                isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL);
                if (isw->new_wb)
                        break;
        }
        if (unlikely(!isw->new_wb))
                isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */

        nr = 0;
        spin_lock(&wb->list_lock);
        /*
         * In addition to the inodes that have completed writeback, also switch
         * cgwbs for those inodes only with dirty timestamps. Otherwise, those
         * inodes won't be written back for a long time when lazytime is
         * enabled, and thus pinning the dying cgwbs. It won't break the
         * bandwidth restrictions, as writeback of inode metadata is not
         * accounted for.
         */
        restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr);
        if (!restart)
                restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr);
        spin_unlock(&wb->list_lock);

        /* no attached inodes? bail out */
        if (nr == 0) {
                atomic_dec(&isw_nr_in_flight);
                wb_put(isw->new_wb);
                kfree(isw);
                return restart;
        }

        /*
         * In addition to synchronizing among switchers, I_WB_SWITCH tells
         * the RCU protected stat update paths to grab the i_page
         * lock so that stat transfer can synchronize against them.
         * Let's continue after I_WB_SWITCH is guaranteed to be visible.
         */
        INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
        queue_rcu_work(isw_wq, &isw->work);

        return restart;
}

/**
 * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
 * @wbc: writeback_control of interest
 * @inode: target inode
 *
 * @inode is locked and about to be written back under the control of @wbc.
 * Record @inode's writeback context into @wbc and unlock the i_lock.  On
 * writeback completion, wbc_detach_inode() should be called.  This is used
 * to track the cgroup writeback context.
 */
void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
                                 struct inode *inode)
{
        if (!inode_cgwb_enabled(inode)) {
                spin_unlock(&inode->i_lock);
                return;
        }

        wbc->wb = inode_to_wb(inode);
        wbc->inode = inode;

        wbc->wb_id = wbc->wb->memcg_css->id;
        wbc->wb_lcand_id = inode->i_wb_frn_winner;
        wbc->wb_tcand_id = 0;
        wbc->wb_bytes = 0;
        wbc->wb_lcand_bytes = 0;
        wbc->wb_tcand_bytes = 0;

        wb_get(wbc->wb);
        spin_unlock(&inode->i_lock);

        /*
         * A dying wb indicates that either the blkcg associated with the
         * memcg changed or the associated memcg is dying.  In the first
         * case, a replacement wb should already be available and we should
         * refresh the wb immediately.  In the second case, trying to
         * refresh will keep failing.
         */
        if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
                inode_switch_wbs(inode, wbc->wb_id);
}
EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);

/**
 * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
 * @wbc: writeback_control of the just finished writeback
 *
 * To be called after a writeback attempt of an inode finishes and undoes
 * wbc_attach_and_unlock_inode().  Can be called under any context.
 *
 * As concurrent write sharing of an inode is expected to be very rare and
 * memcg only tracks page ownership on first-use basis severely confining
 * the usefulness of such sharing, cgroup writeback tracks ownership
 * per-inode.  While the support for concurrent write sharing of an inode
 * is deemed unnecessary, an inode being written to by different cgroups at
 * different points in time is a lot more common, and, more importantly,
 * charging only by first-use can too readily lead to grossly incorrect
 * behaviors (single foreign page can lead to gigabytes of writeback to be
 * incorrectly attributed).
 *
 * To resolve this issue, cgroup writeback detects the majority dirtier of
 * an inode and transfers the ownership to it.  To avoid unnecessary
 * oscillation, the detection mechanism keeps track of history and gives
 * out the switch verdict only if the foreign usage pattern is stable over
 * a certain amount of time and/or writeback attempts.
 *
 * On each writeback attempt, @wbc tries to detect the majority writer
 * using Boyer-Moore majority vote algorithm.  In addition to the byte
 * count from the majority voting, it also counts the bytes written for the
 * current wb and the last round's winner wb (max of last round's current
 * wb, the winner from two rounds ago, and the last round's majority
 * candidate).  Keeping track of the historical winner helps the algorithm
 * to semi-reliably detect the most active writer even when it's not the
 * absolute majority.
 *
 * Once the winner of the round is determined, whether the winner is
 * foreign or not and how much IO time the round consumed is recorded in
 * inode->i_wb_frn_history.  If the amount of recorded foreign IO time is
 * over a certain threshold, the switch verdict is given.
 */
void wbc_detach_inode(struct writeback_control *wbc)
{
        struct bdi_writeback *wb = wbc->wb;
        struct inode *inode = wbc->inode;
        unsigned long avg_time, max_bytes, max_time;
        u16 history;
        int max_id;

        if (!wb)
                return;

        history = inode->i_wb_frn_history;
        avg_time = inode->i_wb_frn_avg_time;

        /* pick the winner of this round */
        if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
            wbc->wb_bytes >= wbc->wb_tcand_bytes) {
                max_id = wbc->wb_id;
                max_bytes = wbc->wb_bytes;
        } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
                max_id = wbc->wb_lcand_id;
                max_bytes = wbc->wb_lcand_bytes;
        } else {
                max_id = wbc->wb_tcand_id;
                max_bytes = wbc->wb_tcand_bytes;
        }

        /*
         * Calculate the amount of IO time the winner consumed and fold it
         * into the running average kept per inode.  If the consumed IO
         * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for
         * deciding whether to switch or not.  This is to prevent one-off
         * small dirtiers from skewing the verdict.
         */
        max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
                                wb->avg_write_bandwidth);
        if (avg_time)
                avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
                            (avg_time >> WB_FRN_TIME_AVG_SHIFT);
        else
                avg_time = max_time;        /* immediate catch up on first run */

        if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
                int slots;

                /*
                 * The switch verdict is reached if foreign wb's consume
                 * more than a certain proportion of IO time in a
                 * WB_FRN_TIME_PERIOD.  This is loosely tracked by 16 slot
                 * history mask where each bit represents one sixteenth of
                 * the period.  Determine the number of slots to shift into
                 * history from @max_time.
                 */
                slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
                            (unsigned long)WB_FRN_HIST_MAX_SLOTS);
                history <<= slots;
                if (wbc->wb_id != max_id)
                        history |= (1U << slots) - 1;

                if (history)
                        trace_inode_foreign_history(inode, wbc, history);

                /*
                 * Switch if the current wb isn't the consistent winner.
                 * If there are multiple closely competing dirtiers, the
                 * inode may switch across them repeatedly over time, which
                 * is okay.  The main goal is avoiding keeping an inode on
                 * the wrong wb for an extended period of time.
                 */
                if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
                        inode_switch_wbs(inode, max_id);
        }

        /*
         * Multiple instances of this function may race to update the
         * following fields but we don't mind occassional inaccuracies.
         */
        inode->i_wb_frn_winner = max_id;
        inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
        inode->i_wb_frn_history = history;

        wb_put(wbc->wb);
        wbc->wb = NULL;
}
EXPORT_SYMBOL_GPL(wbc_detach_inode);

/**
 * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
 * @wbc: writeback_control of the writeback in progress
 * @page: page being written out
 * @bytes: number of bytes being written out
 *
 * @bytes from @page are about to written out during the writeback
 * controlled by @wbc.  Keep the book for foreign inode detection.  See
 * wbc_detach_inode().
 */
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
                              size_t bytes)
{
        struct folio *folio;
        struct cgroup_subsys_state *css;
        int id;

        /*
         * pageout() path doesn't attach @wbc to the inode being written
         * out.  This is intentional as we don't want the function to block
         * behind a slow cgroup.  Ultimately, we want pageout() to kick off
         * regular writeback instead of writing things out itself.
         */
        if (!wbc->wb || wbc->no_cgroup_owner)
                return;

        folio = page_folio(page);
        css = mem_cgroup_css_from_folio(folio);
        /* dead cgroups shouldn't contribute to inode ownership arbitration */
        if (!(css->flags & CSS_ONLINE))
                return;

        id = css->id;

        if (id == wbc->wb_id) {
                wbc->wb_bytes += bytes;
                return;
        }

        if (id == wbc->wb_lcand_id)
                wbc->wb_lcand_bytes += bytes;

        /* Boyer-Moore majority vote algorithm */
        if (!wbc->wb_tcand_bytes)
                wbc->wb_tcand_id = id;
        if (id == wbc->wb_tcand_id)
                wbc->wb_tcand_bytes += bytes;
        else
                wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
}
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);

/**
 * wb_split_bdi_pages - split nr_pages to write according to bandwidth
 * @wb: target bdi_writeback to split @nr_pages to
 * @nr_pages: number of pages to write for the whole bdi
 *
 * Split @wb's portion of @nr_pages according to @wb's write bandwidth in
 * relation to the total write bandwidth of all wb's w/ dirty inodes on
 * @wb->bdi.
 */
static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
{
        unsigned long this_bw = wb->avg_write_bandwidth;
        unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);

        if (nr_pages == LONG_MAX)
                return LONG_MAX;

        /*
         * This may be called on clean wb's and proportional distribution
         * may not make sense, just use the original @nr_pages in those
         * cases.  In general, we wanna err on the side of writing more.
         */
        if (!tot_bw || this_bw >= tot_bw)
                return nr_pages;
        else
                return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
}

/**
 * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
 * @bdi: target backing_dev_info
 * @base_work: wb_writeback_work to issue
 * @skip_if_busy: skip wb's which already have writeback in progress
 *
 * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which
 * have dirty inodes.  If @base_work->nr_page isn't %LONG_MAX, it's
 * distributed to the busy wbs according to each wb's proportion in the
 * total active write bandwidth of @bdi.
 */
static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
                                  struct wb_writeback_work *base_work,
                                  bool skip_if_busy)
{
        struct bdi_writeback *last_wb = NULL;
        struct bdi_writeback *wb = list_entry(&bdi->wb_list,
                                              struct bdi_writeback, bdi_node);

        might_sleep();
restart:
        rcu_read_lock();
        list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
                DEFINE_WB_COMPLETION(fallback_work_done, bdi);
                struct wb_writeback_work fallback_work;
                struct wb_writeback_work *work;
                long nr_pages;

                if (last_wb) {
                        wb_put(last_wb);
                        last_wb = NULL;
                }

                /* SYNC_ALL writes out I_DIRTY_TIME too */
                if (!wb_has_dirty_io(wb) &&
                    (base_work->sync_mode == WB_SYNC_NONE ||
                     list_empty(&wb->b_dirty_time)))
                        continue;
                if (skip_if_busy && writeback_in_progress(wb))
                        continue;

                nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);

                work = kmalloc(sizeof(*work), GFP_ATOMIC);
                if (work) {
                        *work = *base_work;
                        work->nr_pages = nr_pages;
                        work->auto_free = 1;
                        wb_queue_work(wb, work);
                        continue;
                }

                /*
                 * If wb_tryget fails, the wb has been shutdown, skip it.
                 *
                 * Pin @wb so that it stays on @bdi->wb_list.  This allows
                 * continuing iteration from @wb after dropping and
                 * regrabbing rcu read lock.
                 */
                if (!wb_tryget(wb))
                        continue;

                /* alloc failed, execute synchronously using on-stack fallback */
                work = &fallback_work;
                *work = *base_work;
                work->nr_pages = nr_pages;
                work->auto_free = 0;
                work->done = &fallback_work_done;

                wb_queue_work(wb, work);
                last_wb = wb;

                rcu_read_unlock();
                wb_wait_for_completion(&fallback_work_done);
                goto restart;
        }
        rcu_read_unlock();

        if (last_wb)
                wb_put(last_wb);
}

/**
 * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
 * @bdi_id: target bdi id
 * @memcg_id: target memcg css id
 * @reason: reason why some writeback work initiated
 * @done: target wb_completion
 *
 * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
 * with the specified parameters.
 */
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                           enum wb_reason reason, struct wb_completion *done)
{
        struct backing_dev_info *bdi;
        struct cgroup_subsys_state *memcg_css;
        struct bdi_writeback *wb;
        struct wb_writeback_work *work;
        unsigned long dirty;
        int ret;

        /* lookup bdi and memcg */
        bdi = bdi_get_by_id(bdi_id);
        if (!bdi)
                return -ENOENT;

        rcu_read_lock();
        memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
        if (memcg_css && !css_tryget(memcg_css))
                memcg_css = NULL;
        rcu_read_unlock();
        if (!memcg_css) {
                ret = -ENOENT;
                goto out_bdi_put;
        }

        /*
         * And find the associated wb.  If the wb isn't there already
         * there's nothing to flush, don't create one.
         */
        wb = wb_get_lookup(bdi, memcg_css);
        if (!wb) {
                ret = -ENOENT;
                goto out_css_put;
        }

        /*
         * The caller is attempting to write out most of
         * the currently dirty pages.  Let's take the current dirty page
         * count and inflate it by 25% which should be large enough to
         * flush out most dirty pages while avoiding getting livelocked by
         * concurrent dirtiers.
         *
         * BTW the memcg stats are flushed periodically and this is best-effort
         * estimation, so some potential error is ok.
         */
        dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
        dirty = dirty * 10 / 8;

        /* issue the writeback work */
        work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
        if (work) {
                work->nr_pages = dirty;
                work->sync_mode = WB_SYNC_NONE;
                work->range_cyclic = 1;
                work->reason = reason;
                work->done = done;
                work->auto_free = 1;
                wb_queue_work(wb, work);
                ret = 0;
        } else {
                ret = -ENOMEM;
        }

        wb_put(wb);
out_css_put:
        css_put(memcg_css);
out_bdi_put:
        bdi_put(bdi);
        return ret;
}

/**
 * cgroup_writeback_umount - flush inode wb switches for umount
 *
 * This function is called when a super_block is about to be destroyed and
 * flushes in-flight inode wb switches.  An inode wb switch goes through
 * RCU and then workqueue, so the two need to be flushed in order to ensure
 * that all previously scheduled switches are finished.  As wb switches are
 * rare occurrences and synchronize_rcu() can take a while, perform
 * flushing iff wb switches are in flight.
 */
void cgroup_writeback_umount(void)
{
        /*
         * SB_ACTIVE should be reliably cleared before checking
         * isw_nr_in_flight, see generic_shutdown_super().
         */
        smp_mb();

        if (atomic_read(&isw_nr_in_flight)) {
                /*
                 * Use rcu_barrier() to wait for all pending callbacks to
                 * ensure that all in-flight wb switches are in the workqueue.
                 */
                rcu_barrier();
                flush_workqueue(isw_wq);
        }
}

static int __init cgroup_writeback_init(void)
{
        isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
        if (!isw_wq)
                return -ENOMEM;
        return 0;
}
fs_initcall(cgroup_writeback_init);

#else        /* CONFIG_CGROUP_WRITEBACK */

static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }

static void inode_cgwb_move_to_attached(struct inode *inode,
                                        struct bdi_writeback *wb)
{
        assert_spin_locked(&wb->list_lock);
        assert_spin_locked(&inode->i_lock);
        WARN_ON_ONCE(inode->i_state & I_FREEING);

        inode->i_state &= ~I_SYNC_QUEUED;
        list_del_init(&inode->i_io_list);
        wb_io_lists_depopulated(wb);
}

static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
        __releases(&inode->i_lock)
        __acquires(&wb->list_lock)
{
        struct bdi_writeback *wb = inode_to_wb(inode);

        spin_unlock(&inode->i_lock);
        spin_lock(&wb->list_lock);
        return wb;
}

static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
        __acquires(&wb->list_lock)
{
        struct bdi_writeback *wb = inode_to_wb(inode);

        spin_lock(&wb->list_lock);
        return wb;
}

static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
{
        return nr_pages;
}

static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
                                  struct wb_writeback_work *base_work,
                                  bool skip_if_busy)
{
        might_sleep();

        if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
                base_work->auto_free = 0;
                wb_queue_work(&bdi->wb, base_work);
        }
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

/*
 * Add in the number of potentially dirty inodes, because each inode
 * write can dirty pagecache in the underlying blockdev.
 */
static unsigned long get_nr_dirty_pages(void)
{
        return global_node_page_state(NR_FILE_DIRTY) +
                get_nr_dirty_inodes();
}

static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
{
        if (!wb_has_dirty_io(wb))
                return;

        /*
         * All callers of this function want to start writeback of all
         * dirty pages. Places like vmscan can call this at a very
         * high frequency, causing pointless allocations of tons of
         * work items and keeping the flusher threads busy retrieving
         * that work. Ensure that we only allow one of them pending and
         * inflight at the time.
         */
        if (test_bit(WB_start_all, &wb->state) ||
            test_and_set_bit(WB_start_all, &wb->state))
                return;

        wb->start_all_reason = reason;
        wb_wakeup(wb);
}

/**
 * wb_start_background_writeback - start background writeback
 * @wb: bdi_writback to write from
 *
 * Description:
 *   This makes sure WB_SYNC_NONE background writeback happens. When
 *   this function returns, it is only guaranteed that for given wb
 *   some IO is happening if we are over background dirty threshold.
 *   Caller need not hold sb s_umount semaphore.
 */
void wb_start_background_writeback(struct bdi_writeback *wb)
{
        /*
         * We just wake up the flusher thread. It will perform background
         * writeback as soon as there is no other work to do.
         */
        trace_writeback_wake_background(wb);
        wb_wakeup(wb);
}

/*
 * Remove the inode from the writeback list it is on.
 */
void inode_io_list_del(struct inode *inode)
{
        struct bdi_writeback *wb;

        wb = inode_to_wb_and_lock_list(inode);
        spin_lock(&inode->i_lock);

        inode->i_state &= ~I_SYNC_QUEUED;
        list_del_init(&inode->i_io_list);
        wb_io_lists_depopulated(wb);

        spin_unlock(&inode->i_lock);
        spin_unlock(&wb->list_lock);
}
EXPORT_SYMBOL(inode_io_list_del);

/*
 * mark an inode as under writeback on the sb
 */
void sb_mark_inode_writeback(struct inode *inode)
{
        struct super_block *sb = inode->i_sb;
        unsigned long flags;

        if (list_empty(&inode->i_wb_list)) {
                spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
                if (list_empty(&inode->i_wb_list)) {
                        list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
                        trace_sb_mark_inode_writeback(inode);
                }
                spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
        }
}

/*
 * clear an inode as under writeback on the sb
 */
void sb_clear_inode_writeback(struct inode *inode)
{
        struct super_block *sb = inode->i_sb;
        unsigned long flags;

        if (!list_empty(&inode->i_wb_list)) {
                spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
                if (!list_empty(&inode->i_wb_list)) {
                        list_del_init(&inode->i_wb_list);
                        trace_sb_clear_inode_writeback(inode);
                }
                spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
        }
}

/*
 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
 * furthest end of its superblock's dirty-inode list.
 *
 * Before stamping the inode's ->dirtied_when, we check to see whether it is
 * already the most-recently-dirtied inode on the b_dirty list.  If that is
 * the case then the inode must have been redirtied while it was being written
 * out and we don't reset its dirtied_when.
 */
static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
{
        assert_spin_locked(&inode->i_lock);

        inode->i_state &= ~I_SYNC_QUEUED;
        /*
         * When the inode is being freed just don't bother with dirty list
         * tracking. Flush worker will ignore this inode anyway and it will
         * trigger assertions in inode_io_list_move_locked().
         */
        if (inode->i_state & I_FREEING) {
                list_del_init(&inode->i_io_list);
                wb_io_lists_depopulated(wb);
                return;
        }
        if (!list_empty(&wb->b_dirty)) {
                struct inode *tail;

                tail = wb_inode(wb->b_dirty.next);
                if (time_before(inode->dirtied_when, tail->dirtied_when))
                        inode->dirtied_when = jiffies;
        }
        inode_io_list_move_locked(inode, wb, &wb->b_dirty);
}

static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
{
        spin_lock(&inode->i_lock);
        redirty_tail_locked(inode, wb);
        spin_unlock(&inode->i_lock);
}

/*
 * requeue inode for re-scanning after bdi->b_io list is exhausted.
 */
static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{
        inode_io_list_move_locked(inode, wb, &wb->b_more_io);
}

static void inode_sync_complete(struct inode *inode)
{
        inode->i_state &= ~I_SYNC;
        /* If inode is clean an unused, put it into LRU now... */
        inode_add_lru(inode);
        /* Waiters must see I_SYNC cleared before being woken up */
        smp_mb();
        wake_up_bit(&inode->i_state, __I_SYNC);
}

static bool inode_dirtied_after(struct inode *inode, unsigned long t)
{
        bool ret = time_after(inode->dirtied_when, t);
#ifndef CONFIG_64BIT
        /*
         * For inodes being constantly redirtied, dirtied_when can get stuck.
         * It _appears_ to be in the future, but is actually in distant past.
         * This test is necessary to prevent such wrapped-around relative times
         * from permanently stopping the whole bdi writeback.
         */
        ret = ret && time_before_eq(inode->dirtied_when, jiffies);
#endif
        return ret;
}

/*
 * Move expired (dirtied before dirtied_before) dirty inodes from
 * @delaying_queue to @dispatch_queue.
 */
static int move_expired_inodes(struct list_head *delaying_queue,
                               struct list_head *dispatch_queue,
                               unsigned long dirtied_before)
{
        LIST_HEAD(tmp);
        struct list_head *pos, *node;
        struct super_block *sb = NULL;
        struct inode *inode;
        int do_sb_sort = 0;
        int moved = 0;

        while (!list_empty(delaying_queue)) {
                inode = wb_inode(delaying_queue->prev);
                if (inode_dirtied_after(inode, dirtied_before))
                        break;
                spin_lock(&inode->i_lock);
                list_move(&inode->i_io_list, &tmp);
                moved++;
                inode->i_state |= I_SYNC_QUEUED;
                spin_unlock(&inode->i_lock);
                if (sb_is_blkdev_sb(inode->i_sb))
                        continue;
                if (sb && sb != inode->i_sb)
                        do_sb_sort = 1;
                sb = inode->i_sb;
        }

        /* just one sb in list, splice to dispatch_queue and we're done */
        if (!do_sb_sort) {
                list_splice(&tmp, dispatch_queue);
                goto out;
        }

        /*
         * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
         * we don't take inode->i_lock here because it is just a pointless overhead.
         * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
         * fully under our control.
         */
        while (!list_empty(&tmp)) {
                sb = wb_inode(tmp.prev)->i_sb;
                list_for_each_prev_safe(pos, node, &tmp) {
                        inode = wb_inode(pos);
                        if (inode->i_sb == sb)
                                list_move(&inode->i_io_list, dispatch_queue);
                }
        }
out:
        return moved;
}

/*
 * Queue all expired dirty inodes for io, eldest first.
 * Before
 *         newly dirtied     b_dirty    b_io    b_more_io
 *         =============>    gf         edc     BA
 * After
 *         newly dirtied     b_dirty    b_io    b_more_io
 *         =============>    g          fBAedc
 *                                           |
 *                                           +--> dequeue for IO
 */
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
                     unsigned long dirtied_before)
{
        int moved;
        unsigned long time_expire_jif = dirtied_before;

        assert_spin_locked(&wb->list_lock);
        list_splice_init(&wb->b_more_io, &wb->b_io);
        moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
        if (!work->for_sync)
                time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
        moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
                                     time_expire_jif);
        if (moved)
                wb_io_lists_populated(wb);
        trace_writeback_queue_io(wb, work, dirtied_before, moved);
}

static int write_inode(struct inode *inode, struct writeback_control *wbc)
{
        int ret;

        if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
                trace_writeback_write_inode_start(inode, wbc);
                ret = inode->i_sb->s_op->write_inode(inode, wbc);
                trace_writeback_write_inode(inode, wbc);
                return ret;
        }
        return 0;
}

/*
 * Wait for writeback on an inode to complete. Called with i_lock held.
 * Caller must make sure inode cannot go away when we drop i_lock.
 */
static void __inode_wait_for_writeback(struct inode *inode)
        __releases(inode->i_lock)
        __acquires(inode->i_lock)
{
        DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
        wait_queue_head_t *wqh;

        wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
        while (inode->i_state & I_SYNC) {
                spin_unlock(&inode->i_lock);
                __wait_on_bit(wqh, &wq, bit_wait,
                              TASK_UNINTERRUPTIBLE);
                spin_lock(&inode->i_lock);
        }
}

/*
 * Wait for writeback on an inode to complete. Caller must have inode pinned.
 */
void inode_wait_for_writeback(struct inode *inode)
{
        spin_lock(&inode->i_lock);
        __inode_wait_for_writeback(inode);
        spin_unlock(&inode->i_lock);
}

/*
 * Sleep until I_SYNC is cleared. This function must be called with i_lock
 * held and drops it. It is aimed for callers not holding any inode reference
 * so once i_lock is dropped, inode can go away.
 */
static void inode_sleep_on_writeback(struct inode *inode)
        __releases(inode->i_lock)
{
        DEFINE_WAIT(wait);
        wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
        int sleep;

        prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
        sleep = inode->i_state & I_SYNC;
        spin_unlock(&inode->i_lock);
        if (sleep)
                schedule();
        finish_wait(wqh, &wait);
}

/*
 * Find proper writeback list for the inode depending on its current state and
 * possibly also change of its state while we were doing writeback.  Here we
 * handle things such as livelock prevention or fairness of writeback among
 * inodes. This function can be called only by flusher thread - noone else
 * processes all inodes in writeback lists and requeueing inodes behind flusher
 * thread's back can have unexpected consequences.
 */
static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
                          struct writeback_control *wbc)
{
        if (inode->i_state & I_FREEING)
                return;

        /*
         * Sync livelock prevention. Each inode is tagged and synced in one
         * shot. If still dirty, it will be redirty_tail()'ed below.  Update
         * the dirty time to prevent enqueue and sync it again.
         */
        if ((inode->i_state & I_DIRTY) &&
            (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
                inode->dirtied_when = jiffies;

        if (wbc->pages_skipped) {
                /*
                 * Writeback is not making progress due to locked buffers.
                 * Skip this inode for now. Although having skipped pages
                 * is odd for clean inodes, it can happen for some
                 * filesystems so handle that gracefully.
                 */
                if (inode->i_state & I_DIRTY_ALL)
                        redirty_tail_locked(inode, wb);
                else
                        inode_cgwb_move_to_attached(inode, wb);
                return;
        }

        if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
                /*
                 * We didn't write back all the pages.  nfs_writepages()
                 * sometimes bales out without doing anything.
                 */
                if (wbc->nr_to_write <= 0) {
                        /* Slice used up. Queue for next turn. */
                        requeue_io(inode, wb);
                } else {
                        /*
                         * Writeback blocked by something other than
                         * congestion. Delay the inode for some time to
                         * avoid spinning on the CPU (100% iowait)
                         * retrying writeback of the dirty page/inode
                         * that cannot be performed immediately.
                         */
                        redirty_tail_locked(inode, wb);
                }
        } else if (inode->i_state & I_DIRTY) {
                /*
                 * Filesystems can dirty the inode during writeback operations,
                 * such as delayed allocation during submission or metadata
                 * updates after data IO completion.
                 */
                redirty_tail_locked(inode, wb);
        } else if (inode->i_state & I_DIRTY_TIME) {
                inode->dirtied_when = jiffies;
                inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
                inode->i_state &= ~I_SYNC_QUEUED;
        } else {
                /* The inode is clean. Remove from writeback lists. */
                inode_cgwb_move_to_attached(inode, wb);
        }
}

/*
 * Write out an inode and its dirty pages (or some of its dirty pages, depending
 * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
 *
 * This doesn't remove the inode from the writeback list it is on, except
 * potentially to move it from b_dirty_time to b_dirty due to timestamp
 * expiration.  The caller is otherwise responsible for writeback list handling.
 *
 * The caller is also responsible for setting the I_SYNC flag beforehand and
 * calling inode_sync_complete() to clear it afterwards.
 */
static int
__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
{
        struct address_space *mapping = inode->i_mapping;
        long nr_to_write = wbc->nr_to_write;
        unsigned dirty;
        int ret;

        WARN_ON(!(inode->i_state & I_SYNC));

        trace_writeback_single_inode_start(inode, wbc, nr_to_write);

        ret = do_writepages(mapping, wbc);

        /*
         * Make sure to wait on the data before writing out the metadata.
         * This is important for filesystems that modify metadata on data
         * I/O completion. We don't do it for sync(2) writeback because it has a
         * separate, external IO completion path and ->sync_fs for guaranteeing
         * inode metadata is written back correctly.
         */
        if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
                int err = filemap_fdatawait(mapping);
                if (ret == 0)
                        ret = err;
        }

        /*
         * If the inode has dirty timestamps and we need to write them, call
         * mark_inode_dirty_sync() to notify the filesystem about it and to
         * change I_DIRTY_TIME into I_DIRTY_SYNC.
         */
        if ((inode->i_state & I_DIRTY_TIME) &&
            (wbc->sync_mode == WB_SYNC_ALL ||
             time_after(jiffies, inode->dirtied_time_when +
                        dirtytime_expire_interval * HZ))) {
                trace_writeback_lazytime(inode);
                mark_inode_dirty_sync(inode);
        }

        /*
         * Get and clear the dirty flags from i_state.  This needs to be done
         * after calling writepages because some filesystems may redirty the
         * inode during writepages due to delalloc.  It also needs to be done
         * after handling timestamp expiration, as that may dirty the inode too.
         */
        spin_lock(&inode->i_lock);
        dirty = inode->i_state & I_DIRTY;
        inode->i_state &= ~dirty;

        /*
         * Paired with smp_mb() in __mark_inode_dirty().  This allows
         * __mark_inode_dirty() to test i_state without grabbing i_lock -
         * either they see the I_DIRTY bits cleared or we see the dirtied
         * inode.
         *
         * I_DIRTY_PAGES is always cleared together above even if @mapping
         * still has dirty pages.  The flag is reinstated after smp_mb() if
         * necessary.  This guarantees that either __mark_inode_dirty()
         * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
         */
        smp_mb();

        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                inode->i_state |= I_DIRTY_PAGES;
        else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) {
                if (!(inode->i_state & I_DIRTY_PAGES)) {
                        inode->i_state &= ~I_PINNING_NETFS_WB;
                        wbc->unpinned_netfs_wb = true;
                        dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */
                }
        }

        spin_unlock(&inode->i_lock);

        /* Don't write the inode if only I_DIRTY_PAGES was set */
        if (dirty & ~I_DIRTY_PAGES) {
                int err = write_inode(inode, wbc);
                if (ret == 0)
                        ret = err;
        }
        wbc->unpinned_netfs_wb = false;
        trace_writeback_single_inode(inode, wbc, nr_to_write);
        return ret;
}

/*
 * Write out an inode's dirty data and metadata on-demand, i.e. separately from
 * the regular batched writeback done by the flusher threads in
 * writeback_sb_inodes().  @wbc controls various aspects of the write, such as
 * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
 *
 * To prevent the inode from going away, either the caller must have a reference
 * to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
 */
static int writeback_single_inode(struct inode *inode,
                                  struct writeback_control *wbc)
{
        struct bdi_writeback *wb;
        int ret = 0;

        spin_lock(&inode->i_lock);
        if (!atomic_read(&inode->i_count))
                WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
        else
                WARN_ON(inode->i_state & I_WILL_FREE);

        if (inode->i_state & I_SYNC) {
                /*
                 * Writeback is already running on the inode.  For WB_SYNC_NONE,
                 * that's enough and we can just return.  For WB_SYNC_ALL, we
                 * must wait for the existing writeback to complete, then do
                 * writeback again if there's anything left.
                 */
                if (wbc->sync_mode != WB_SYNC_ALL)
                        goto out;
                __inode_wait_for_writeback(inode);
        }
        WARN_ON(inode->i_state & I_SYNC);
        /*
         * If the inode is already fully clean, then there's nothing to do.
         *
         * For data-integrity syncs we also need to check whether any pages are
         * still under writeback, e.g. due to prior WB_SYNC_NONE writeback.  If
         * there are any such pages, we'll need to wait for them.
         */
        if (!(inode->i_state & I_DIRTY_ALL) &&
            (wbc->sync_mode != WB_SYNC_ALL ||
             !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
                goto out;
        inode->i_state |= I_SYNC;
        wbc_attach_and_unlock_inode(wbc, inode);

        ret = __writeback_single_inode(inode, wbc);

        wbc_detach_inode(wbc);

        wb = inode_to_wb_and_lock_list(inode);
        spin_lock(&inode->i_lock);
        /*
         * If the inode is freeing, its i_io_list shoudn't be updated
         * as it can be finally deleted at this moment.
         */
        if (!(inode->i_state & I_FREEING)) {
                /*
                 * If the inode is now fully clean, then it can be safely
                 * removed from its writeback list (if any). Otherwise the
                 * flusher threads are responsible for the writeback lists.
                 */
                if (!(inode->i_state & I_DIRTY_ALL))
                        inode_cgwb_move_to_attached(inode, wb);
                else if (!(inode->i_state & I_SYNC_QUEUED)) {
                        if ((inode->i_state & I_DIRTY))
                                redirty_tail_locked(inode, wb);
                        else if (inode->i_state & I_DIRTY_TIME) {
                                inode->dirtied_when = jiffies;
                                inode_io_list_move_locked(inode,
                                                          wb,
                                                          &wb->b_dirty_time);
                        }
                }
        }

        spin_unlock(&wb->list_lock);
        inode_sync_complete(inode);
out:
        spin_unlock(&inode->i_lock);
        return ret;
}

static long writeback_chunk_size(struct bdi_writeback *wb,
                                 struct wb_writeback_work *work)
{
        long pages;

        /*
         * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
         * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
         * here avoids calling into writeback_inodes_wb() more than once.
         *
         * The intended call sequence for WB_SYNC_ALL writeback is:
         *
         *      wb_writeback()
         *          writeback_sb_inodes()       <== called only once
         *              write_cache_pages()     <== called once for each inode
         *                   (quickly) tag currently dirty pages
         *                   (maybe slowly) sync all tagged pages
         */
        if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
                pages = LONG_MAX;
        else {
                pages = min(wb->avg_write_bandwidth / 2,
                            global_wb_domain.dirty_limit / DIRTY_SCOPE);
                pages = min(pages, work->nr_pages);
                pages = round_down(pages + MIN_WRITEBACK_PAGES,
                                   MIN_WRITEBACK_PAGES);
        }

        return pages;
}

/*
 * Write a portion of b_io inodes which belong to @sb.
 *
 * Return the number of pages and/or inodes written.
 *
 * NOTE! This is called with wb->list_lock held, and will
 * unlock and relock that for each inode it ends up doing
 * IO for.
 */
static long writeback_sb_inodes(struct super_block *sb,
                                struct bdi_writeback *wb,
                                struct wb_writeback_work *work)
{
        struct writeback_control wbc = {
                .sync_mode                = work->sync_mode,
                .tagged_writepages        = work->tagged_writepages,
                .for_kupdate                = work->for_kupdate,
                .for_background                = work->for_background,
                .for_sync                = work->for_sync,
                .range_cyclic                = work->range_cyclic,
                .range_start                = 0,
                .range_end                = LLONG_MAX,
        };
        unsigned long start_time = jiffies;
        long write_chunk;
        long total_wrote = 0;  /* count both pages and inodes */

        while (!list_empty(&wb->b_io)) {
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct bdi_writeback *tmp_wb;
                long wrote;

                if (inode->i_sb != sb) {
                        if (work->sb) {
                                /*
                                 * We only want to write back data for this
                                 * superblock, move all inodes not belonging
                                 * to it back onto the dirty list.
                                 */
                                redirty_tail(inode, wb);
                                continue;
                        }

                        /*
                         * The inode belongs to a different superblock.
                         * Bounce back to the caller to unpin this and
                         * pin the next superblock.
                         */
                        break;
                }

                /*
                 * Don't bother with new inodes or inodes being freed, first
                 * kind does not need periodic writeout yet, and for the latter
                 * kind writeout is handled by the freer.
                 */
                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
                        redirty_tail_locked(inode, wb);
                        spin_unlock(&inode->i_lock);
                        continue;
                }
                if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
                        /*
                         * If this inode is locked for writeback and we are not
                         * doing writeback-for-data-integrity, move it to
                         * b_more_io so that writeback can proceed with the
                         * other inodes on s_io.
                         *
                         * We'll have another go at writing back this inode
                         * when we completed a full scan of b_io.
                         */
                        requeue_io(inode, wb);
                        spin_unlock(&inode->i_lock);
                        trace_writeback_sb_inodes_requeue(inode);
                        continue;
                }
                spin_unlock(&wb->list_lock);

                /*
                 * We already requeued the inode if it had I_SYNC set and we
                 * are doing WB_SYNC_NONE writeback. So this catches only the
                 * WB_SYNC_ALL case.
                 */
                if (inode->i_state & I_SYNC) {
                        /* Wait for I_SYNC. This function drops i_lock... */
                        inode_sleep_on_writeback(inode);
                        /* Inode may be gone, start again */
                        spin_lock(&wb->list_lock);
                        continue;
                }
                inode->i_state |= I_SYNC;
                wbc_attach_and_unlock_inode(&wbc, inode);

                write_chunk = writeback_chunk_size(wb, work);
                wbc.nr_to_write = write_chunk;
                wbc.pages_skipped = 0;

                /*
                 * We use I_SYNC to pin the inode in memory. While it is set
                 * evict_inode() will wait so the inode cannot be freed.
                 */
                __writeback_single_inode(inode, &wbc);

                wbc_detach_inode(&wbc);
                work->nr_pages -= write_chunk - wbc.nr_to_write;
                wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
                wrote = wrote < 0 ? 0 : wrote;
                total_wrote += wrote;

                if (need_resched()) {
                        /*
                         * We're trying to balance between building up a nice
                         * long list of IOs to improve our merge rate, and
                         * getting those IOs out quickly for anyone throttling
                         * in balance_dirty_pages().  cond_resched() doesn't
                         * unplug, so get our IOs out the door before we
                         * give up the CPU.
                         */
                        blk_flush_plug(current->plug, false);
                        cond_resched();
                }

                /*
                 * Requeue @inode if still dirty.  Be careful as @inode may
                 * have been switched to another wb in the meantime.
                 */
                tmp_wb = inode_to_wb_and_lock_list(inode);
                spin_lock(&inode->i_lock);
                if (!(inode->i_state & I_DIRTY_ALL))
                        total_wrote++;
                requeue_inode(inode, tmp_wb, &wbc);
                inode_sync_complete(inode);
                spin_unlock(&inode->i_lock);

                if (unlikely(tmp_wb != wb)) {
                        spin_unlock(&tmp_wb->list_lock);
                        spin_lock(&wb->list_lock);
                }

                /*
                 * bail out to wb_writeback() often enough to check
                 * background threshold and other termination conditions.
                 */
                if (total_wrote) {
                        if (time_is_before_jiffies(start_time + HZ / 10UL))
                                break;
                        if (work->nr_pages <= 0)
                                break;
                }
        }
        return total_wrote;
}

static long __writeback_inodes_wb(struct bdi_writeback *wb,
                                  struct wb_writeback_work *work)
{
        unsigned long start_time = jiffies;
        long wrote = 0;

        while (!list_empty(&wb->b_io)) {
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct super_block *sb = inode->i_sb;

                if (!super_trylock_shared(sb)) {
                        /*
                         * super_trylock_shared() may fail consistently due to
                         * s_umount being grabbed by someone else. Don't use
                         * requeue_io() to avoid busy retrying the inode/sb.
                         */
                        redirty_tail(inode, wb);
                        continue;
                }
                wrote += writeback_sb_inodes(sb, wb, work);
                up_read(&sb->s_umount);

                /* refer to the same tests at the end of writeback_sb_inodes */
                if (wrote) {
                        if (time_is_before_jiffies(start_time + HZ / 10UL))
                                break;
                        if (work->nr_pages <= 0)
                                break;
                }
        }
        /* Leave any unwritten inodes on b_io */
        return wrote;
}

static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
                                enum wb_reason reason)
{
        struct wb_writeback_work work = {
                .nr_pages        = nr_pages,
                .sync_mode        = WB_SYNC_NONE,
                .range_cyclic        = 1,
                .reason                = reason,
        };
        struct blk_plug plug;

        blk_start_plug(&plug);
        spin_lock(&wb->list_lock);
        if (list_empty(&wb->b_io))
                queue_io(wb, &work, jiffies);
        __writeback_inodes_wb(wb, &work);
        spin_unlock(&wb->list_lock);
        blk_finish_plug(&plug);

        return nr_pages - work.nr_pages;
}

/*
 * Explicit flushing or periodic writeback of "old" data.
 *
 * Define "old": the first time one of an inode's pages is dirtied, we mark the
 * dirtying-time in the inode's address_space.  So this periodic writeback code
 * just walks the superblock inode list, writing back any inodes which are
 * older than a specific point in time.
 *
 * Try to run once per dirty_writeback_interval.  But if a writeback event
 * takes longer than a dirty_writeback_interval interval, then leave a
 * one-second gap.
 *
 * dirtied_before takes precedence over nr_to_write.  So we'll only write back
 * all dirty pages if they are all attached to "old" mappings.
 */
static long wb_writeback(struct bdi_writeback *wb,
                         struct wb_writeback_work *work)
{
        long nr_pages = work->nr_pages;
        unsigned long dirtied_before = jiffies;
        struct inode *inode;
        long progress;
        struct blk_plug plug;

        blk_start_plug(&plug);
        for (;;) {
                /*
                 * Stop writeback when nr_pages has been consumed
                 */
                if (work->nr_pages <= 0)
                        break;

                /*
                 * Background writeout and kupdate-style writeback may
                 * run forever. Stop them if there is other work to do
                 * so that e.g. sync can proceed. They'll be restarted
                 * after the other works are all done.
                 */
                if ((work->for_background || work->for_kupdate) &&
                    !list_empty(&wb->work_list))
                        break;

                /*
                 * For background writeout, stop when we are below the
                 * background dirty threshold
                 */
                if (work->for_background && !wb_over_bg_thresh(wb))
                        break;


                spin_lock(&wb->list_lock);

                /*
                 * Kupdate and background works are special and we want to
                 * include all inodes that need writing. Livelock avoidance is
                 * handled by these works yielding to any other work so we are
                 * safe.
                 */
                if (work->for_kupdate) {
                        dirtied_before = jiffies -
                                msecs_to_jiffies(dirty_expire_interval * 10);
                } else if (work->for_background)
                        dirtied_before = jiffies;

                trace_writeback_start(wb, work);
                if (list_empty(&wb->b_io))
                        queue_io(wb, work, dirtied_before);
                if (work->sb)
                        progress = writeback_sb_inodes(work->sb, wb, work);
                else
                        progress = __writeback_inodes_wb(wb, work);
                trace_writeback_written(wb, work);

                /*
                 * Did we write something? Try for more
                 *
                 * Dirty inodes are moved to b_io for writeback in batches.
                 * The completion of the current batch does not necessarily
                 * mean the overall work is done. So we keep looping as long
                 * as made some progress on cleaning pages or inodes.
                 */
                if (progress) {
                        spin_unlock(&wb->list_lock);
                        continue;
                }

                /*
                 * No more inodes for IO, bail
                 */
                if (list_empty(&wb->b_more_io)) {
                        spin_unlock(&wb->list_lock);
                        break;
                }

                /*
                 * Nothing written. Wait for some inode to
                 * become available for writeback. Otherwise
                 * we'll just busyloop.
                 */
                trace_writeback_wait(wb, work);
                inode = wb_inode(wb->b_more_io.prev);
                spin_lock(&inode->i_lock);
                spin_unlock(&wb->list_lock);
                /* This function drops i_lock... */
                inode_sleep_on_writeback(inode);
        }
        blk_finish_plug(&plug);

        return nr_pages - work->nr_pages;
}

/*
 * Return the next wb_writeback_work struct that hasn't been processed yet.
 */
static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
{
        struct wb_writeback_work *work = NULL;

        spin_lock_irq(&wb->work_lock);
        if (!list_empty(&wb->work_list)) {
                work = list_entry(wb->work_list.next,
                                  struct wb_writeback_work, list);
                list_del_init(&work->list);
        }
        spin_unlock_irq(&wb->work_lock);
        return work;
}

static long wb_check_background_flush(struct bdi_writeback *wb)
{
        if (wb_over_bg_thresh(wb)) {

                struct wb_writeback_work work = {
                        .nr_pages        = LONG_MAX,
                        .sync_mode        = WB_SYNC_NONE,
                        .for_background        = 1,
                        .range_cyclic        = 1,
                        .reason                = WB_REASON_BACKGROUND,
                };

                return wb_writeback(wb, &work);
        }

        return 0;
}

static long wb_check_old_data_flush(struct bdi_writeback *wb)
{
        unsigned long expired;
        long nr_pages;

        /*
         * When set to zero, disable periodic writeback
         */
        if (!dirty_writeback_interval)
                return 0;

        expired = wb->last_old_flush +
                        msecs_to_jiffies(dirty_writeback_interval * 10);
        if (time_before(jiffies, expired))
                return 0;

        wb->last_old_flush = jiffies;
        nr_pages = get_nr_dirty_pages();

        if (nr_pages) {
                struct wb_writeback_work work = {
                        .nr_pages        = nr_pages,
                        .sync_mode        = WB_SYNC_NONE,
                        .for_kupdate        = 1,
                        .range_cyclic        = 1,
                        .reason                = WB_REASON_PERIODIC,
                };

                return wb_writeback(wb, &work);
        }

        return 0;
}

static long wb_check_start_all(struct bdi_writeback *wb)
{
        long nr_pages;

        if (!test_bit(WB_start_all, &wb->state))
                return 0;

        nr_pages = get_nr_dirty_pages();
        if (nr_pages) {
                struct wb_writeback_work work = {
                        .nr_pages        = wb_split_bdi_pages(wb, nr_pages),
                        .sync_mode        = WB_SYNC_NONE,
                        .range_cyclic        = 1,
                        .reason                = wb->start_all_reason,
                };

                nr_pages = wb_writeback(wb, &work);
        }

        clear_bit(WB_start_all, &wb->state);
        return nr_pages;
}


/*
 * Retrieve work items and do the writeback they describe
 */
static long wb_do_writeback(struct bdi_writeback *wb)
{
        struct wb_writeback_work *work;
        long wrote = 0;

        set_bit(WB_writeback_running, &wb->state);
        while ((work = get_next_work_item(wb)) != NULL) {
                trace_writeback_exec(wb, work);
                wrote += wb_writeback(wb, work);
                finish_writeback_work(wb, work);
        }

        /*
         * Check for a flush-everything request
         */
        wrote += wb_check_start_all(wb);

        /*
         * Check for periodic writeback, kupdated() style
         */
        wrote += wb_check_old_data_flush(wb);
        wrote += wb_check_background_flush(wb);
        clear_bit(WB_writeback_running, &wb->state);

        return wrote;
}

/*
 * Handle writeback of dirty data for the device backed by this bdi. Also
 * reschedules periodically and does kupdated style flushing.
 */
void wb_workfn(struct work_struct *work)
{
        struct bdi_writeback *wb = container_of(to_delayed_work(work),
                                                struct bdi_writeback, dwork);
        long pages_written;

        set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));

        if (likely(!current_is_workqueue_rescuer() ||
                   !test_bit(WB_registered, &wb->state))) {
                /*
                 * The normal path.  Keep writing back @wb until its
                 * work_list is empty.  Note that this path is also taken
                 * if @wb is shutting down even when we're running off the
                 * rescuer as work_list needs to be drained.
                 */
                do {
                        pages_written = wb_do_writeback(wb);
                        trace_writeback_pages_written(pages_written);
                } while (!list_empty(&wb->work_list));
        } else {
                /*
                 * bdi_wq can't get enough workers and we're running off
                 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
                 * enough for efficient IO.
                 */
                pages_written = writeback_inodes_wb(wb, 1024,
                                                    WB_REASON_FORKER_THREAD);
                trace_writeback_pages_written(pages_written);
        }

        if (!list_empty(&wb->work_list))
                wb_wakeup(wb);
        else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
                wb_wakeup_delayed(wb);
}

/*
 * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
 * write back the whole world.
 */
static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
                                         enum wb_reason reason)
{
        struct bdi_writeback *wb;

        if (!bdi_has_dirty_io(bdi))
                return;

        list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
                wb_start_writeback(wb, reason);
}

void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
                                enum wb_reason reason)
{
        rcu_read_lock();
        __wakeup_flusher_threads_bdi(bdi, reason);
        rcu_read_unlock();
}

/*
 * Wakeup the flusher threads to start writeback of all currently dirty pages
 */
void wakeup_flusher_threads(enum wb_reason reason)
{
        struct backing_dev_info *bdi;

        /*
         * If we are expecting writeback progress we must submit plugged IO.
         */
        blk_flush_plug(current->plug, true);

        rcu_read_lock();
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
                __wakeup_flusher_threads_bdi(bdi, reason);
        rcu_read_unlock();
}

/*
 * Wake up bdi's periodically to make sure dirtytime inodes gets
 * written back periodically.  We deliberately do *not* check the
 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
 * kernel to be constantly waking up once there are any dirtytime
 * inodes on the system.  So instead we define a separate delayed work
 * function which gets called much more rarely.  (By default, only
 * once every 12 hours.)
 *
 * If there is any other write activity going on in the file system,
 * this function won't be necessary.  But if the only thing that has
 * happened on the file system is a dirtytime inode caused by an atime
 * update, we need this infrastructure below to make sure that inode
 * eventually gets pushed out to disk.
 */
static void wakeup_dirtytime_writeback(struct work_struct *w);
static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);

static void wakeup_dirtytime_writeback(struct work_struct *w)
{
        struct backing_dev_info *bdi;

        rcu_read_lock();
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                struct bdi_writeback *wb;

                list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
                        if (!list_empty(&wb->b_dirty_time))
                                wb_wakeup(wb);
        }
        rcu_read_unlock();
        schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}

static int __init start_dirtytime_writeback(void)
{
        schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
        return 0;
}
__initcall(start_dirtytime_writeback);

int dirtytime_interval_handler(struct ctl_table *table, int write,
                               void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret;

        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                mod_delayed_work(system_wq, &dirtytime_work, 0);
        return ret;
}

/**
 * __mark_inode_dirty -        internal function to mark an inode dirty
 *
 * @inode: inode to mark
 * @flags: what kind of dirty, e.g. I_DIRTY_SYNC.  This can be a combination of
 *           multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
 *           with I_DIRTY_PAGES.
 *
 * Mark an inode as dirty.  We notify the filesystem, then update the inode's
 * dirty flags.  Then, if needed we add the inode to the appropriate dirty list.
 *
 * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
 * instead of calling this directly.
 *
 * CAREFUL!  We only add the inode to the dirty list if it is hashed or if it
 * refers to a blockdev.  Unhashed inodes will never be added to the dirty list
 * even if they are later hashed, as they will have been marked dirty already.
 *
 * In short, ensure you hash any inodes _before_ you start marking them dirty.
 *
 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
 * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
 * the kernel-internal blockdev inode represents the dirtying time of the
 * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
 * page->mapping->host, so the page-dirtying time is recorded in the internal
 * blockdev inode.
 */
void __mark_inode_dirty(struct inode *inode, int flags)
{
        struct super_block *sb = inode->i_sb;
        int dirtytime = 0;
        struct bdi_writeback *wb = NULL;

        trace_writeback_mark_inode_dirty(inode, flags);

        if (flags & I_DIRTY_INODE) {
                /*
                 * Inode timestamp update will piggback on this dirtying.
                 * We tell ->dirty_inode callback that timestamps need to
                 * be updated by setting I_DIRTY_TIME in flags.
                 */
                if (inode->i_state & I_DIRTY_TIME) {
                        spin_lock(&inode->i_lock);
                        if (inode->i_state & I_DIRTY_TIME) {
                                inode->i_state &= ~I_DIRTY_TIME;
                                flags |= I_DIRTY_TIME;
                        }
                        spin_unlock(&inode->i_lock);
                }

                /*
                 * Notify the filesystem about the inode being dirtied, so that
                 * (if needed) it can update on-disk fields and journal the
                 * inode.  This is only needed when the inode itself is being
                 * dirtied now.  I.e. it's only needed for I_DIRTY_INODE, not
                 * for just I_DIRTY_PAGES or I_DIRTY_TIME.
                 */
                trace_writeback_dirty_inode_start(inode, flags);
                if (sb->s_op->dirty_inode)
                        sb->s_op->dirty_inode(inode,
                                flags & (I_DIRTY_INODE | I_DIRTY_TIME));
                trace_writeback_dirty_inode(inode, flags);

                /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
                flags &= ~I_DIRTY_TIME;
        } else {
                /*
                 * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
                 * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
                 * in one call to __mark_inode_dirty().)
                 */
                dirtytime = flags & I_DIRTY_TIME;
                WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
        }

        /*
         * Paired with smp_mb() in __writeback_single_inode() for the
         * following lockless i_state test.  See there for details.
         */
        smp_mb();

        if ((inode->i_state & flags) == flags)
                return;

        spin_lock(&inode->i_lock);
        if ((inode->i_state & flags) != flags) {
                const int was_dirty = inode->i_state & I_DIRTY;

                inode_attach_wb(inode, NULL);

                inode->i_state |= flags;

                /*
                 * Grab inode's wb early because it requires dropping i_lock and we
                 * need to make sure following checks happen atomically with dirty
                 * list handling so that we don't move inodes under flush worker's
                 * hands.
                 */
                if (!was_dirty) {
                        wb = locked_inode_to_wb_and_lock_list(inode);
                        spin_lock(&inode->i_lock);
                }

                /*
                 * If the inode is queued for writeback by flush worker, just
                 * update its dirty state. Once the flush worker is done with
                 * the inode it will place it on the appropriate superblock
                 * list, based upon its state.
                 */
                if (inode->i_state & I_SYNC_QUEUED)
                        goto out_unlock;

                /*
                 * Only add valid (hashed) inodes to the superblock's
                 * dirty list.  Add blockdev inodes as well.
                 */
                if (!S_ISBLK(inode->i_mode)) {
                        if (inode_unhashed(inode))
                                goto out_unlock;
                }
                if (inode->i_state & I_FREEING)
                        goto out_unlock;

                /*
                 * If the inode was already on b_dirty/b_io/b_more_io, don't
                 * reposition it (that would break b_dirty time-ordering).
                 */
                if (!was_dirty) {
                        struct list_head *dirty_list;
                        bool wakeup_bdi = false;

                        inode->dirtied_when = jiffies;
                        if (dirtytime)
                                inode->dirtied_time_when = jiffies;

                        if (inode->i_state & I_DIRTY)
                                dirty_list = &wb->b_dirty;
                        else
                                dirty_list = &wb->b_dirty_time;

                        wakeup_bdi = inode_io_list_move_locked(inode, wb,
                                                               dirty_list);

                        spin_unlock(&wb->list_lock);
                        spin_unlock(&inode->i_lock);
                        trace_writeback_dirty_inode_enqueue(inode);

                        /*
                         * If this is the first dirty inode for this bdi,
                         * we have to wake-up the corresponding bdi thread
                         * to make sure background write-back happens
                         * later.
                         */
                        if (wakeup_bdi &&
                            (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
                                wb_wakeup_delayed(wb);
                        return;
                }
        }
out_unlock:
        if (wb)
                spin_unlock(&wb->list_lock);
        spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(__mark_inode_dirty);

/*
 * The @s_sync_lock is used to serialise concurrent sync operations
 * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
 * Concurrent callers will block on the s_sync_lock rather than doing contending
 * walks. The queueing maintains sync(2) required behaviour as all the IO that
 * has been issued up to the time this function is enter is guaranteed to be
 * completed by the time we have gained the lock and waited for all IO that is
 * in progress regardless of the order callers are granted the lock.
 */
static void wait_sb_inodes(struct super_block *sb)
{
        LIST_HEAD(sync_list);

        /*
         * We need to be protected against the filesystem going from
         * r/o to r/w or vice versa.
         */
        WARN_ON(!rwsem_is_locked(&sb->s_umount));

        mutex_lock(&sb->s_sync_lock);

        /*
         * Splice the writeback list onto a temporary list to avoid waiting on
         * inodes that have started writeback after this point.
         *
         * Use rcu_read_lock() to keep the inodes around until we have a
         * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
         * the local list because inodes can be dropped from either by writeback
         * completion.
         */
        rcu_read_lock();
        spin_lock_irq(&sb->s_inode_wblist_lock);
        list_splice_init(&sb->s_inodes_wb, &sync_list);

        /*
         * Data integrity sync. Must wait for all pages under writeback, because
         * there may have been pages dirtied before our sync call, but which had
         * writeout started before we write it out.  In which case, the inode
         * may not be on the dirty list, but we still have to wait for that
         * writeout.
         */
        while (!list_empty(&sync_list)) {
                struct inode *inode = list_first_entry(&sync_list, struct inode,
                                                       i_wb_list);
                struct address_space *mapping = inode->i_mapping;

                /*
                 * Move each inode back to the wb list before we drop the lock
                 * to preserve consistency between i_wb_list and the mapping
                 * writeback tag. Writeback completion is responsible to remove
                 * the inode from either list once the writeback tag is cleared.
                 */
                list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);

                /*
                 * The mapping can appear untagged while still on-list since we
                 * do not have the mapping lock. Skip it here, wb completion
                 * will remove it.
                 */
                if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
                        continue;

                spin_unlock_irq(&sb->s_inode_wblist_lock);

                spin_lock(&inode->i_lock);
                if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
                        spin_unlock(&inode->i_lock);

                        spin_lock_irq(&sb->s_inode_wblist_lock);
                        continue;
                }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                rcu_read_unlock();

                /*
                 * We keep the error status of individual mapping so that
                 * applications can catch the writeback error using fsync(2).
                 * See filemap_fdatawait_keep_errors() for details.
                 */
                filemap_fdatawait_keep_errors(mapping);

                cond_resched();

                iput(inode);

                rcu_read_lock();
                spin_lock_irq(&sb->s_inode_wblist_lock);
        }
        spin_unlock_irq(&sb->s_inode_wblist_lock);
        rcu_read_unlock();
        mutex_unlock(&sb->s_sync_lock);
}

static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
                                     enum wb_reason reason, bool skip_if_busy)
{
        struct backing_dev_info *bdi = sb->s_bdi;
        DEFINE_WB_COMPLETION(done, bdi);
        struct wb_writeback_work work = {
                .sb                        = sb,
                .sync_mode                = WB_SYNC_NONE,
                .tagged_writepages        = 1,
                .done                        = &done,
                .nr_pages                = nr,
                .reason                        = reason,
        };

        if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
                return;
        WARN_ON(!rwsem_is_locked(&sb->s_umount));

        bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
        wb_wait_for_completion(&done);
}

/**
 * writeback_inodes_sb_nr -        writeback dirty inodes from given super_block
 * @sb: the superblock
 * @nr: the number of pages to write
 * @reason: reason why some writeback work initiated
 *
 * Start writeback on some inodes on this super_block. No guarantees are made
 * on how many (if any) will be written, and this function does not wait
 * for IO completion of submitted IO.
 */
void writeback_inodes_sb_nr(struct super_block *sb,
                            unsigned long nr,
                            enum wb_reason reason)
{
        __writeback_inodes_sb_nr(sb, nr, reason, false);
}
EXPORT_SYMBOL(writeback_inodes_sb_nr);

/**
 * writeback_inodes_sb        -        writeback dirty inodes from given super_block
 * @sb: the superblock
 * @reason: reason why some writeback work was initiated
 *
 * Start writeback on some inodes on this super_block. No guarantees are made
 * on how many (if any) will be written, and this function does not wait
 * for IO completion of submitted IO.
 */
void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
{
        return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
}
EXPORT_SYMBOL(writeback_inodes_sb);

/**
 * try_to_writeback_inodes_sb - try to start writeback if none underway
 * @sb: the superblock
 * @reason: reason why some writeback work was initiated
 *
 * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
 */
void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
{
        if (!down_read_trylock(&sb->s_umount))
                return;

        __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
        up_read(&sb->s_umount);
}
EXPORT_SYMBOL(try_to_writeback_inodes_sb);

/**
 * sync_inodes_sb        -        sync sb inode pages
 * @sb: the superblock
 *
 * This function writes and waits on any dirty inode belonging to this
 * super_block.
 */
void sync_inodes_sb(struct super_block *sb)
{
        struct backing_dev_info *bdi = sb->s_bdi;
        DEFINE_WB_COMPLETION(done, bdi);
        struct wb_writeback_work work = {
                .sb                = sb,
                .sync_mode        = WB_SYNC_ALL,
                .nr_pages        = LONG_MAX,
                .range_cyclic        = 0,
                .done                = &done,
                .reason                = WB_REASON_SYNC,
                .for_sync        = 1,
        };

        /*
         * Can't skip on !bdi_has_dirty() because we should wait for !dirty
         * inodes under writeback and I_DIRTY_TIME inodes ignored by
         * bdi_has_dirty() need to be written out too.
         */
        if (bdi == &noop_backing_dev_info)
                return;
        WARN_ON(!rwsem_is_locked(&sb->s_umount));

        /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
        bdi_down_write_wb_switch_rwsem(bdi);
        bdi_split_work_to_wbs(bdi, &work, false);
        wb_wait_for_completion(&done);
        bdi_up_write_wb_switch_rwsem(bdi);

        wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);

/**
 * write_inode_now        -        write an inode to disk
 * @inode: inode to write to disk
 * @sync: whether the write should be synchronous or not
 *
 * This function commits an inode to disk immediately if it is dirty. This is
 * primarily needed by knfsd.
 *
 * The caller must either have a ref on the inode or must have set I_WILL_FREE.
 */
int write_inode_now(struct inode *inode, int sync)
{
        struct writeback_control wbc = {
                .nr_to_write = LONG_MAX,
                .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
                .range_start = 0,
                .range_end = LLONG_MAX,
        };

        if (!mapping_can_writeback(inode->i_mapping))
                wbc.nr_to_write = 0;

        might_sleep();
        return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(write_inode_now);

/**
 * sync_inode_metadata - write an inode to disk
 * @inode: the inode to sync
 * @wait: wait for I/O to complete.
 *
 * Write an inode to disk and adjust its dirty state after completion.
 *
 * Note: only writes the actual inode, no associated data or other metadata.
 */
int sync_inode_metadata(struct inode *inode, int wait)
{
        struct writeback_control wbc = {
                .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
                .nr_to_write = 0, /* metadata-only */
        };

        return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(sync_inode_metadata);

























































   10 











   39 











   77 

























































































  212 































  116 


















  116 





























































































































































    1 







































   98 











































































































































































































































































































































































































































































































































































































































































































































































































  126 






   46 




    4 



   50 








   31 







    4 







   23 






    4 





   48 





  212 






















   25 















































































































































  126 
























  125 










  116 
















   11 


























   16 
















    5 

   38 


























































    5 

   20 



   16 
   21 

   16 
   19 
   20 



















































    6 









































    5 


    5 




    5 

    5 

    5 
    5 
    5 
    5 





















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
/* SPDX-License-Identifier: GPL-2.0+ */
#ifndef _LINUX_XARRAY_H
#define _LINUX_XARRAY_H
/*
 * eXtensible Arrays
 * Copyright (c) 2017 Microsoft Corporation
 * Author: Matthew Wilcox <willy@infradead.org>
 *
 * See Documentation/core-api/xarray.rst for how to use the XArray.
 */

#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/kconfig.h>
#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include <linux/sched/mm.h>
#include <linux/spinlock.h>
#include <linux/types.h>

/*
 * The bottom two bits of the entry determine how the XArray interprets
 * the contents:
 *
 * 00: Pointer entry
 * 10: Internal entry
 * x1: Value entry or tagged pointer
 *
 * Attempting to store internal entries in the XArray is a bug.
 *
 * Most internal entries are pointers to the next node in the tree.
 * The following internal entries have a special meaning:
 *
 * 0-62: Sibling entries
 * 256: Retry entry
 * 257: Zero entry
 *
 * Errors are also represented as internal entries, but use the negative
 * space (-4094 to -2).  They're never stored in the slots array; only
 * returned by the normal API.
 */

#define BITS_PER_XA_VALUE        (BITS_PER_LONG - 1)

/**
 * xa_mk_value() - Create an XArray entry from an integer.
 * @v: Value to store in XArray.
 *
 * Context: Any context.
 * Return: An entry suitable for storing in the XArray.
 */
static inline void *xa_mk_value(unsigned long v)
{
        WARN_ON((long)v < 0);
        return (void *)((v << 1) | 1);
}

/**
 * xa_to_value() - Get value stored in an XArray entry.
 * @entry: XArray entry.
 *
 * Context: Any context.
 * Return: The value stored in the XArray entry.
 */
static inline unsigned long xa_to_value(const void *entry)
{
        return (unsigned long)entry >> 1;
}

/**
 * xa_is_value() - Determine if an entry is a value.
 * @entry: XArray entry.
 *
 * Context: Any context.
 * Return: True if the entry is a value, false if it is a pointer.
 */
static inline bool xa_is_value(const void *entry)
{
        return (unsigned long)entry & 1;
}

/**
 * xa_tag_pointer() - Create an XArray entry for a tagged pointer.
 * @p: Plain pointer.
 * @tag: Tag value (0, 1 or 3).
 *
 * If the user of the XArray prefers, they can tag their pointers instead
 * of storing value entries.  Three tags are available (0, 1 and 3).
 * These are distinct from the xa_mark_t as they are not replicated up
 * through the array and cannot be searched for.
 *
 * Context: Any context.
 * Return: An XArray entry.
 */
static inline void *xa_tag_pointer(void *p, unsigned long tag)
{
        return (void *)((unsigned long)p | tag);
}

/**
 * xa_untag_pointer() - Turn an XArray entry into a plain pointer.
 * @entry: XArray entry.
 *
 * If you have stored a tagged pointer in the XArray, call this function
 * to get the untagged version of the pointer.
 *
 * Context: Any context.
 * Return: A pointer.
 */
static inline void *xa_untag_pointer(void *entry)
{
        return (void *)((unsigned long)entry & ~3UL);
}

/**
 * xa_pointer_tag() - Get the tag stored in an XArray entry.
 * @entry: XArray entry.
 *
 * If you have stored a tagged pointer in the XArray, call this function
 * to get the tag of that pointer.
 *
 * Context: Any context.
 * Return: A tag.
 */
static inline unsigned int xa_pointer_tag(void *entry)
{
        return (unsigned long)entry & 3UL;
}

/*
 * xa_mk_internal() - Create an internal entry.
 * @v: Value to turn into an internal entry.
 *
 * Internal entries are used for a number of purposes.  Entries 0-255 are
 * used for sibling entries (only 0-62 are used by the current code).  256
 * is used for the retry entry.  257 is used for the reserved / zero entry.
 * Negative internal entries are used to represent errnos.  Node pointers
 * are also tagged as internal entries in some situations.
 *
 * Context: Any context.
 * Return: An XArray internal entry corresponding to this value.
 */
static inline void *xa_mk_internal(unsigned long v)
{
        return (void *)((v << 2) | 2);
}

/*
 * xa_to_internal() - Extract the value from an internal entry.
 * @entry: XArray entry.
 *
 * Context: Any context.
 * Return: The value which was stored in the internal entry.
 */
static inline unsigned long xa_to_internal(const void *entry)
{
        return (unsigned long)entry >> 2;
}

/*
 * xa_is_internal() - Is the entry an internal entry?
 * @entry: XArray entry.
 *
 * Context: Any context.
 * Return: %true if the entry is an internal entry.
 */
static inline bool xa_is_internal(const void *entry)
{
        return ((unsigned long)entry & 3) == 2;
}

#define XA_ZERO_ENTRY                xa_mk_internal(257)

/**
 * xa_is_zero() - Is the entry a zero entry?
 * @entry: Entry retrieved from the XArray
 *
 * The normal API will return NULL as the contents of a slot containing
 * a zero entry.  You can only see zero entries by using the advanced API.
 *
 * Return: %true if the entry is a zero entry.
 */
static inline bool xa_is_zero(const void *entry)
{
        return unlikely(entry == XA_ZERO_ENTRY);
}

/**
 * xa_is_err() - Report whether an XArray operation returned an error
 * @entry: Result from calling an XArray function
 *
 * If an XArray operation cannot complete an operation, it will return
 * a special value indicating an error.  This function tells you
 * whether an error occurred; xa_err() tells you which error occurred.
 *
 * Context: Any context.
 * Return: %true if the entry indicates an error.
 */
static inline bool xa_is_err(const void *entry)
{
        return unlikely(xa_is_internal(entry) &&
                        entry >= xa_mk_internal(-MAX_ERRNO));
}

/**
 * xa_err() - Turn an XArray result into an errno.
 * @entry: Result from calling an XArray function.
 *
 * If an XArray operation cannot complete an operation, it will return
 * a special pointer value which encodes an errno.  This function extracts
 * the errno from the pointer value, or returns 0 if the pointer does not
 * represent an errno.
 *
 * Context: Any context.
 * Return: A negative errno or 0.
 */
static inline int xa_err(void *entry)
{
        /* xa_to_internal() would not do sign extension. */
        if (xa_is_err(entry))
                return (long)entry >> 2;
        return 0;
}

/**
 * struct xa_limit - Represents a range of IDs.
 * @min: The lowest ID to allocate (inclusive).
 * @max: The maximum ID to allocate (inclusive).
 *
 * This structure is used either directly or via the XA_LIMIT() macro
 * to communicate the range of IDs that are valid for allocation.
 * Three common ranges are predefined for you:
 * * xa_limit_32b        - [0 - UINT_MAX]
 * * xa_limit_31b        - [0 - INT_MAX]
 * * xa_limit_16b        - [0 - USHRT_MAX]
 */
struct xa_limit {
        u32 max;
        u32 min;
};

#define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max }

#define xa_limit_32b        XA_LIMIT(0, UINT_MAX)
#define xa_limit_31b        XA_LIMIT(0, INT_MAX)
#define xa_limit_16b        XA_LIMIT(0, USHRT_MAX)

typedef unsigned __bitwise xa_mark_t;
#define XA_MARK_0                ((__force xa_mark_t)0U)
#define XA_MARK_1                ((__force xa_mark_t)1U)
#define XA_MARK_2                ((__force xa_mark_t)2U)
#define XA_PRESENT                ((__force xa_mark_t)8U)
#define XA_MARK_MAX                XA_MARK_2
#define XA_FREE_MARK                XA_MARK_0

enum xa_lock_type {
        XA_LOCK_IRQ = 1,
        XA_LOCK_BH = 2,
};

/*
 * Values for xa_flags.  The radix tree stores its GFP flags in the xa_flags,
 * and we remain compatible with that.
 */
#define XA_FLAGS_LOCK_IRQ        ((__force gfp_t)XA_LOCK_IRQ)
#define XA_FLAGS_LOCK_BH        ((__force gfp_t)XA_LOCK_BH)
#define XA_FLAGS_TRACK_FREE        ((__force gfp_t)4U)
#define XA_FLAGS_ZERO_BUSY        ((__force gfp_t)8U)
#define XA_FLAGS_ALLOC_WRAPPED        ((__force gfp_t)16U)
#define XA_FLAGS_ACCOUNT        ((__force gfp_t)32U)
#define XA_FLAGS_MARK(mark)        ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
                                                (__force unsigned)(mark)))

/* ALLOC is for a normal 0-based alloc.  ALLOC1 is for an 1-based alloc */
#define XA_FLAGS_ALLOC        (XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK))
#define XA_FLAGS_ALLOC1        (XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY)

/**
 * struct xarray - The anchor of the XArray.
 * @xa_lock: Lock that protects the contents of the XArray.
 *
 * To use the xarray, define it statically or embed it in your data structure.
 * It is a very small data structure, so it does not usually make sense to
 * allocate it separately and keep a pointer to it in your data structure.
 *
 * You may use the xa_lock to protect your own data structures as well.
 */
/*
 * If all of the entries in the array are NULL, @xa_head is a NULL pointer.
 * If the only non-NULL entry in the array is at index 0, @xa_head is that
 * entry.  If any other entry in the array is non-NULL, @xa_head points
 * to an @xa_node.
 */
struct xarray {
        spinlock_t        xa_lock;
/* private: The rest of the data structure is not to be used directly. */
        gfp_t                xa_flags;
        void __rcu *        xa_head;
};

#define XARRAY_INIT(name, flags) {                                \
        .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),                \
        .xa_flags = flags,                                        \
        .xa_head = NULL,                                        \
}

/**
 * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags.
 * @name: A string that names your XArray.
 * @flags: XA_FLAG values.
 *
 * This is intended for file scope definitions of XArrays.  It declares
 * and initialises an empty XArray with the chosen name and flags.  It is
 * equivalent to calling xa_init_flags() on the array, but it does the
 * initialisation at compiletime instead of runtime.
 */
#define DEFINE_XARRAY_FLAGS(name, flags)                                \
        struct xarray name = XARRAY_INIT(name, flags)

/**
 * DEFINE_XARRAY() - Define an XArray.
 * @name: A string that names your XArray.
 *
 * This is intended for file scope definitions of XArrays.  It declares
 * and initialises an empty XArray with the chosen name.  It is equivalent
 * to calling xa_init() on the array, but it does the initialisation at
 * compiletime instead of runtime.
 */
#define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)

/**
 * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0.
 * @name: A string that names your XArray.
 *
 * This is intended for file scope definitions of allocating XArrays.
 * See also DEFINE_XARRAY().
 */
#define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC)

/**
 * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1.
 * @name: A string that names your XArray.
 *
 * This is intended for file scope definitions of allocating XArrays.
 * See also DEFINE_XARRAY().
 */
#define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1)

void *xa_load(struct xarray *, unsigned long index);
void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
void *xa_erase(struct xarray *, unsigned long index);
void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
                        void *entry, gfp_t);
bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
void *xa_find(struct xarray *xa, unsigned long *index,
                unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
void *xa_find_after(struct xarray *xa, unsigned long *index,
                unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
                unsigned long max, unsigned int n, xa_mark_t);
void xa_destroy(struct xarray *);

/**
 * xa_init_flags() - Initialise an empty XArray with flags.
 * @xa: XArray.
 * @flags: XA_FLAG values.
 *
 * If you need to initialise an XArray with special flags (eg you need
 * to take the lock from interrupt context), use this function instead
 * of xa_init().
 *
 * Context: Any context.
 */
static inline void xa_init_flags(struct xarray *xa, gfp_t flags)
{
        spin_lock_init(&xa->xa_lock);
        xa->xa_flags = flags;
        xa->xa_head = NULL;
}

/**
 * xa_init() - Initialise an empty XArray.
 * @xa: XArray.
 *
 * An empty XArray is full of NULL entries.
 *
 * Context: Any context.
 */
static inline void xa_init(struct xarray *xa)
{
        xa_init_flags(xa, 0);
}

/**
 * xa_empty() - Determine if an array has any present entries.
 * @xa: XArray.
 *
 * Context: Any context.
 * Return: %true if the array contains only NULL pointers.
 */
static inline bool xa_empty(const struct xarray *xa)
{
        return xa->xa_head == NULL;
}

/**
 * xa_marked() - Inquire whether any entry in this array has a mark set
 * @xa: Array
 * @mark: Mark value
 *
 * Context: Any context.
 * Return: %true if any entry has this mark set.
 */
static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
{
        return xa->xa_flags & XA_FLAGS_MARK(mark);
}

/**
 * xa_for_each_range() - Iterate over a portion of an XArray.
 * @xa: XArray.
 * @index: Index of @entry.
 * @entry: Entry retrieved from array.
 * @start: First index to retrieve from array.
 * @last: Last index to retrieve from array.
 *
 * During the iteration, @entry will have the value of the entry stored
 * in @xa at @index.  You may modify @index during the iteration if you
 * want to skip or reprocess indices.  It is safe to modify the array
 * during the iteration.  At the end of the iteration, @entry will be set
 * to NULL and @index will have a value less than or equal to max.
 *
 * xa_for_each_range() is O(n.log(n)) while xas_for_each() is O(n).  You have
 * to handle your own locking with xas_for_each(), and if you have to unlock
 * after each iteration, it will also end up being O(n.log(n)).
 * xa_for_each_range() will spin if it hits a retry entry; if you intend to
 * see retry entries, you should use the xas_for_each() iterator instead.
 * The xas_for_each() iterator will expand into more inline code than
 * xa_for_each_range().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 */
#define xa_for_each_range(xa, index, entry, start, last)                \
        for (index = start,                                                \
             entry = xa_find(xa, &index, last, XA_PRESENT);                \
             entry;                                                        \
             entry = xa_find_after(xa, &index, last, XA_PRESENT))

/**
 * xa_for_each_start() - Iterate over a portion of an XArray.
 * @xa: XArray.
 * @index: Index of @entry.
 * @entry: Entry retrieved from array.
 * @start: First index to retrieve from array.
 *
 * During the iteration, @entry will have the value of the entry stored
 * in @xa at @index.  You may modify @index during the iteration if you
 * want to skip or reprocess indices.  It is safe to modify the array
 * during the iteration.  At the end of the iteration, @entry will be set
 * to NULL and @index will have a value less than or equal to max.
 *
 * xa_for_each_start() is O(n.log(n)) while xas_for_each() is O(n).  You have
 * to handle your own locking with xas_for_each(), and if you have to unlock
 * after each iteration, it will also end up being O(n.log(n)).
 * xa_for_each_start() will spin if it hits a retry entry; if you intend to
 * see retry entries, you should use the xas_for_each() iterator instead.
 * The xas_for_each() iterator will expand into more inline code than
 * xa_for_each_start().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 */
#define xa_for_each_start(xa, index, entry, start) \
        xa_for_each_range(xa, index, entry, start, ULONG_MAX)

/**
 * xa_for_each() - Iterate over present entries in an XArray.
 * @xa: XArray.
 * @index: Index of @entry.
 * @entry: Entry retrieved from array.
 *
 * During the iteration, @entry will have the value of the entry stored
 * in @xa at @index.  You may modify @index during the iteration if you want
 * to skip or reprocess indices.  It is safe to modify the array during the
 * iteration.  At the end of the iteration, @entry will be set to NULL and
 * @index will have a value less than or equal to max.
 *
 * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n).  You have
 * to handle your own locking with xas_for_each(), and if you have to unlock
 * after each iteration, it will also end up being O(n.log(n)).  xa_for_each()
 * will spin if it hits a retry entry; if you intend to see retry entries,
 * you should use the xas_for_each() iterator instead.  The xas_for_each()
 * iterator will expand into more inline code than xa_for_each().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 */
#define xa_for_each(xa, index, entry) \
        xa_for_each_start(xa, index, entry, 0)

/**
 * xa_for_each_marked() - Iterate over marked entries in an XArray.
 * @xa: XArray.
 * @index: Index of @entry.
 * @entry: Entry retrieved from array.
 * @filter: Selection criterion.
 *
 * During the iteration, @entry will have the value of the entry stored
 * in @xa at @index.  The iteration will skip all entries in the array
 * which do not match @filter.  You may modify @index during the iteration
 * if you want to skip or reprocess indices.  It is safe to modify the array
 * during the iteration.  At the end of the iteration, @entry will be set to
 * NULL and @index will have a value less than or equal to max.
 *
 * xa_for_each_marked() is O(n.log(n)) while xas_for_each_marked() is O(n).
 * You have to handle your own locking with xas_for_each(), and if you have
 * to unlock after each iteration, it will also end up being O(n.log(n)).
 * xa_for_each_marked() will spin if it hits a retry entry; if you intend to
 * see retry entries, you should use the xas_for_each_marked() iterator
 * instead.  The xas_for_each_marked() iterator will expand into more inline
 * code than xa_for_each_marked().
 *
 * Context: Any context.  Takes and releases the RCU lock.
 */
#define xa_for_each_marked(xa, index, entry, filter) \
        for (index = 0, entry = xa_find(xa, &index, ULONG_MAX, filter); \
             entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter))

#define xa_trylock(xa)                spin_trylock(&(xa)->xa_lock)
#define xa_lock(xa)                spin_lock(&(xa)->xa_lock)
#define xa_unlock(xa)                spin_unlock(&(xa)->xa_lock)
#define xa_lock_bh(xa)                spin_lock_bh(&(xa)->xa_lock)
#define xa_unlock_bh(xa)        spin_unlock_bh(&(xa)->xa_lock)
#define xa_lock_irq(xa)                spin_lock_irq(&(xa)->xa_lock)
#define xa_unlock_irq(xa)        spin_unlock_irq(&(xa)->xa_lock)
#define xa_lock_irqsave(xa, flags) \
                                spin_lock_irqsave(&(xa)->xa_lock, flags)
#define xa_unlock_irqrestore(xa, flags) \
                                spin_unlock_irqrestore(&(xa)->xa_lock, flags)
#define xa_lock_nested(xa, subclass) \
                                spin_lock_nested(&(xa)->xa_lock, subclass)
#define xa_lock_bh_nested(xa, subclass) \
                                spin_lock_bh_nested(&(xa)->xa_lock, subclass)
#define xa_lock_irq_nested(xa, subclass) \
                                spin_lock_irq_nested(&(xa)->xa_lock, subclass)
#define xa_lock_irqsave_nested(xa, flags, subclass) \
                spin_lock_irqsave_nested(&(xa)->xa_lock, flags, subclass)

/*
 * Versions of the normal API which require the caller to hold the
 * xa_lock.  If the GFP flags allow it, they will drop the lock to
 * allocate memory, then reacquire it afterwards.  These functions
 * may also re-enable interrupts if the XArray flags indicate the
 * locking should be interrupt safe.
 */
void *__xa_erase(struct xarray *, unsigned long index);
void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
                void *entry, gfp_t);
int __must_check __xa_insert(struct xarray *, unsigned long index,
                void *entry, gfp_t);
int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry,
                struct xa_limit, gfp_t);
int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry,
                struct xa_limit, u32 *next, gfp_t);
void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);

/**
 * xa_store_bh() - Store this entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * This function is like calling xa_store() except it disables softirqs
 * while holding the array lock.
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.
 * Return: The old entry at this index or xa_err() if an error happened.
 */
static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
                void *entry, gfp_t gfp)
{
        void *curr;

        might_alloc(gfp);
        xa_lock_bh(xa);
        curr = __xa_store(xa, index, entry, gfp);
        xa_unlock_bh(xa);

        return curr;
}

/**
 * xa_store_irq() - Store this entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * This function is like calling xa_store() except it disables interrupts
 * while holding the array lock.
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.
 * Return: The old entry at this index or xa_err() if an error happened.
 */
static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
                void *entry, gfp_t gfp)
{
        void *curr;

        might_alloc(gfp);
        xa_lock_irq(xa);
        curr = __xa_store(xa, index, entry, gfp);
        xa_unlock_irq(xa);

        return curr;
}

/**
 * xa_erase_bh() - Erase this entry from the XArray.
 * @xa: XArray.
 * @index: Index of entry.
 *
 * After this function returns, loading from @index will return %NULL.
 * If the index is part of a multi-index entry, all indices will be erased
 * and none of the entries will be part of a multi-index entry.
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.
 * Return: The entry which used to be at this index.
 */
static inline void *xa_erase_bh(struct xarray *xa, unsigned long index)
{
        void *entry;

        xa_lock_bh(xa);
        entry = __xa_erase(xa, index);
        xa_unlock_bh(xa);

        return entry;
}

/**
 * xa_erase_irq() - Erase this entry from the XArray.
 * @xa: XArray.
 * @index: Index of entry.
 *
 * After this function returns, loading from @index will return %NULL.
 * If the index is part of a multi-index entry, all indices will be erased
 * and none of the entries will be part of a multi-index entry.
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.
 * Return: The entry which used to be at this index.
 */
static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
{
        void *entry;

        xa_lock_irq(xa);
        entry = __xa_erase(xa, index);
        xa_unlock_irq(xa);

        return entry;
}

/**
 * xa_cmpxchg() - Conditionally replace an entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @old: Old value to test against.
 * @entry: New value to place in array.
 * @gfp: Memory allocation flags.
 *
 * If the entry at @index is the same as @old, replace it with @entry.
 * If the return value is equal to @old, then the exchange was successful.
 *
 * Context: Any context.  Takes and releases the xa_lock.  May sleep
 * if the @gfp flags permit.
 * Return: The old value at this index or xa_err() if an error happened.
 */
static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index,
                        void *old, void *entry, gfp_t gfp)
{
        void *curr;

        might_alloc(gfp);
        xa_lock(xa);
        curr = __xa_cmpxchg(xa, index, old, entry, gfp);
        xa_unlock(xa);

        return curr;
}

/**
 * xa_cmpxchg_bh() - Conditionally replace an entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @old: Old value to test against.
 * @entry: New value to place in array.
 * @gfp: Memory allocation flags.
 *
 * This function is like calling xa_cmpxchg() except it disables softirqs
 * while holding the array lock.
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.  May sleep if the @gfp flags permit.
 * Return: The old value at this index or xa_err() if an error happened.
 */
static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index,
                        void *old, void *entry, gfp_t gfp)
{
        void *curr;

        might_alloc(gfp);
        xa_lock_bh(xa);
        curr = __xa_cmpxchg(xa, index, old, entry, gfp);
        xa_unlock_bh(xa);

        return curr;
}

/**
 * xa_cmpxchg_irq() - Conditionally replace an entry in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @old: Old value to test against.
 * @entry: New value to place in array.
 * @gfp: Memory allocation flags.
 *
 * This function is like calling xa_cmpxchg() except it disables interrupts
 * while holding the array lock.
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.  May sleep if the @gfp flags permit.
 * Return: The old value at this index or xa_err() if an error happened.
 */
static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index,
                        void *old, void *entry, gfp_t gfp)
{
        void *curr;

        might_alloc(gfp);
        xa_lock_irq(xa);
        curr = __xa_cmpxchg(xa, index, old, entry, gfp);
        xa_unlock_irq(xa);

        return curr;
}

/**
 * xa_insert() - Store this entry in the XArray unless another entry is
 *                        already present.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * Inserting a NULL entry will store a reserved entry (like xa_reserve())
 * if no entry is present.  Inserting will fail if a reserved entry is
 * present, even though loading from this index will return NULL.
 *
 * Context: Any context.  Takes and releases the xa_lock.  May sleep if
 * the @gfp flags permit.
 * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
 * -ENOMEM if memory could not be allocated.
 */
static inline int __must_check xa_insert(struct xarray *xa,
                unsigned long index, void *entry, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock(xa);
        err = __xa_insert(xa, index, entry, gfp);
        xa_unlock(xa);

        return err;
}

/**
 * xa_insert_bh() - Store this entry in the XArray unless another entry is
 *                        already present.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * Inserting a NULL entry will store a reserved entry (like xa_reserve())
 * if no entry is present.  Inserting will fail if a reserved entry is
 * present, even though loading from this index will return NULL.
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.  May sleep if the @gfp flags permit.
 * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
 * -ENOMEM if memory could not be allocated.
 */
static inline int __must_check xa_insert_bh(struct xarray *xa,
                unsigned long index, void *entry, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_bh(xa);
        err = __xa_insert(xa, index, entry, gfp);
        xa_unlock_bh(xa);

        return err;
}

/**
 * xa_insert_irq() - Store this entry in the XArray unless another entry is
 *                        already present.
 * @xa: XArray.
 * @index: Index into array.
 * @entry: New entry.
 * @gfp: Memory allocation flags.
 *
 * Inserting a NULL entry will store a reserved entry (like xa_reserve())
 * if no entry is present.  Inserting will fail if a reserved entry is
 * present, even though loading from this index will return NULL.
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.  May sleep if the @gfp flags permit.
 * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
 * -ENOMEM if memory could not be allocated.
 */
static inline int __must_check xa_insert_irq(struct xarray *xa,
                unsigned long index, void *entry, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_irq(xa);
        err = __xa_insert(xa, index, entry, gfp);
        xa_unlock_irq(xa);

        return err;
}

/**
 * xa_alloc() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Takes and releases the xa_lock.  May sleep if
 * the @gfp flags permit.
 * Return: 0 on success, -ENOMEM if memory could not be allocated or
 * -EBUSY if there are no free entries in @limit.
 */
static inline __must_check int xa_alloc(struct xarray *xa, u32 *id,
                void *entry, struct xa_limit limit, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock(xa);
        err = __xa_alloc(xa, id, entry, limit, gfp);
        xa_unlock(xa);

        return err;
}

/**
 * xa_alloc_bh() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.  May sleep if the @gfp flags permit.
 * Return: 0 on success, -ENOMEM if memory could not be allocated or
 * -EBUSY if there are no free entries in @limit.
 */
static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id,
                void *entry, struct xa_limit limit, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_bh(xa);
        err = __xa_alloc(xa, id, entry, limit, gfp);
        xa_unlock_bh(xa);

        return err;
}

/**
 * xa_alloc_irq() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.  May sleep if the @gfp flags permit.
 * Return: 0 on success, -ENOMEM if memory could not be allocated or
 * -EBUSY if there are no free entries in @limit.
 */
static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
                void *entry, struct xa_limit limit, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_irq(xa);
        err = __xa_alloc(xa, id, entry, limit, gfp);
        xa_unlock_irq(xa);

        return err;
}

/**
 * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of allocated ID.
 * @next: Pointer to next ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 * The search for an empty entry will start at @next and will wrap
 * around if necessary.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Takes and releases the xa_lock.  May sleep if
 * the @gfp flags permit.
 * Return: 0 if the allocation succeeded without wrapping.  1 if the
 * allocation succeeded after wrapping, -ENOMEM if memory could not be
 * allocated or -EBUSY if there are no free entries in @limit.
 */
static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
                struct xa_limit limit, u32 *next, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock(xa);
        err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
        xa_unlock(xa);

        return err;
}

/**
 * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of allocated ID.
 * @next: Pointer to next ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 * The search for an empty entry will start at @next and will wrap
 * around if necessary.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.  May sleep if the @gfp flags permit.
 * Return: 0 if the allocation succeeded without wrapping.  1 if the
 * allocation succeeded after wrapping, -ENOMEM if memory could not be
 * allocated or -EBUSY if there are no free entries in @limit.
 */
static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
                struct xa_limit limit, u32 *next, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_bh(xa);
        err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
        xa_unlock_bh(xa);

        return err;
}

/**
 * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray.
 * @xa: XArray.
 * @id: Pointer to ID.
 * @entry: New entry.
 * @limit: Range of allocated ID.
 * @next: Pointer to next ID to allocate.
 * @gfp: Memory allocation flags.
 *
 * Finds an empty entry in @xa between @limit.min and @limit.max,
 * stores the index into the @id pointer, then stores the entry at
 * that index.  A concurrent lookup will not see an uninitialised @id.
 * The search for an empty entry will start at @next and will wrap
 * around if necessary.
 *
 * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
 * in xa_init_flags().
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.  May sleep if the @gfp flags permit.
 * Return: 0 if the allocation succeeded without wrapping.  1 if the
 * allocation succeeded after wrapping, -ENOMEM if memory could not be
 * allocated or -EBUSY if there are no free entries in @limit.
 */
static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
                struct xa_limit limit, u32 *next, gfp_t gfp)
{
        int err;

        might_alloc(gfp);
        xa_lock_irq(xa);
        err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
        xa_unlock_irq(xa);

        return err;
}

/**
 * xa_reserve() - Reserve this index in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @gfp: Memory allocation flags.
 *
 * Ensures there is somewhere to store an entry at @index in the array.
 * If there is already something stored at @index, this function does
 * nothing.  If there was nothing there, the entry is marked as reserved.
 * Loading from a reserved entry returns a %NULL pointer.
 *
 * If you do not use the entry that you have reserved, call xa_release()
 * or xa_erase() to free any unnecessary memory.
 *
 * Context: Any context.  Takes and releases the xa_lock.
 * May sleep if the @gfp flags permit.
 * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
 */
static inline __must_check
int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
{
        return xa_err(xa_cmpxchg(xa, index, NULL, XA_ZERO_ENTRY, gfp));
}

/**
 * xa_reserve_bh() - Reserve this index in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @gfp: Memory allocation flags.
 *
 * A softirq-disabling version of xa_reserve().
 *
 * Context: Any context.  Takes and releases the xa_lock while
 * disabling softirqs.
 * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
 */
static inline __must_check
int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
{
        return xa_err(xa_cmpxchg_bh(xa, index, NULL, XA_ZERO_ENTRY, gfp));
}

/**
 * xa_reserve_irq() - Reserve this index in the XArray.
 * @xa: XArray.
 * @index: Index into array.
 * @gfp: Memory allocation flags.
 *
 * An interrupt-disabling version of xa_reserve().
 *
 * Context: Process context.  Takes and releases the xa_lock while
 * disabling interrupts.
 * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
 */
static inline __must_check
int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
{
        return xa_err(xa_cmpxchg_irq(xa, index, NULL, XA_ZERO_ENTRY, gfp));
}

/**
 * xa_release() - Release a reserved entry.
 * @xa: XArray.
 * @index: Index of entry.
 *
 * After calling xa_reserve(), you can call this function to release the
 * reservation.  If the entry at @index has been stored to, this function
 * will do nothing.
 */
static inline void xa_release(struct xarray *xa, unsigned long index)
{
        xa_cmpxchg(xa, index, XA_ZERO_ENTRY, NULL, 0);
}

/* Everything below here is the Advanced API.  Proceed with caution. */

/*
 * The xarray is constructed out of a set of 'chunks' of pointers.  Choosing
 * the best chunk size requires some tradeoffs.  A power of two recommends
 * itself so that we can walk the tree based purely on shifts and masks.
 * Generally, the larger the better; as the number of slots per level of the
 * tree increases, the less tall the tree needs to be.  But that needs to be
 * balanced against the memory consumption of each node.  On a 64-bit system,
 * xa_node is currently 576 bytes, and we get 7 of them per 4kB page.  If we
 * doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
 */
#ifndef XA_CHUNK_SHIFT
#define XA_CHUNK_SHIFT                (CONFIG_BASE_SMALL ? 4 : 6)
#endif
#define XA_CHUNK_SIZE                (1UL << XA_CHUNK_SHIFT)
#define XA_CHUNK_MASK                (XA_CHUNK_SIZE - 1)
#define XA_MAX_MARKS                3
#define XA_MARK_LONGS                DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)

/*
 * @count is the count of every non-NULL element in the ->slots array
 * whether that is a value entry, a retry entry, a user pointer,
 * a sibling entry or a pointer to the next level of the tree.
 * @nr_values is the count of every element in ->slots which is
 * either a value entry or a sibling of a value entry.
 */
struct xa_node {
        unsigned char        shift;                /* Bits remaining in each slot */
        unsigned char        offset;                /* Slot offset in parent */
        unsigned char        count;                /* Total entry count */
        unsigned char        nr_values;        /* Value entry count */
        struct xa_node __rcu *parent;        /* NULL at top of tree */
        struct xarray        *array;                /* The array we belong to */
        union {
                struct list_head private_list;        /* For tree user */
                struct rcu_head        rcu_head;        /* Used when freeing node */
        };
        void __rcu        *slots[XA_CHUNK_SIZE];
        union {
                unsigned long        tags[XA_MAX_MARKS][XA_MARK_LONGS];
                unsigned long        marks[XA_MAX_MARKS][XA_MARK_LONGS];
        };
};

void xa_dump(const struct xarray *);
void xa_dump_node(const struct xa_node *);

#ifdef XA_DEBUG
#define XA_BUG_ON(xa, x) do {                                        \
                if (x) {                                        \
                        xa_dump(xa);                                \
                        BUG();                                        \
                }                                                \
        } while (0)
#define XA_NODE_BUG_ON(node, x) do {                                \
                if (x) {                                        \
                        if (node) xa_dump_node(node);                \
                        BUG();                                        \
                }                                                \
        } while (0)
#else
#define XA_BUG_ON(xa, x)        do { } while (0)
#define XA_NODE_BUG_ON(node, x)        do { } while (0)
#endif

/* Private */
static inline void *xa_head(const struct xarray *xa)
{
        return rcu_dereference_check(xa->xa_head,
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline void *xa_head_locked(const struct xarray *xa)
{
        return rcu_dereference_protected(xa->xa_head,
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline void *xa_entry(const struct xarray *xa,
                                const struct xa_node *node, unsigned int offset)
{
        XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
        return rcu_dereference_check(node->slots[offset],
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline void *xa_entry_locked(const struct xarray *xa,
                                const struct xa_node *node, unsigned int offset)
{
        XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
        return rcu_dereference_protected(node->slots[offset],
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline struct xa_node *xa_parent(const struct xarray *xa,
                                        const struct xa_node *node)
{
        return rcu_dereference_check(node->parent,
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline struct xa_node *xa_parent_locked(const struct xarray *xa,
                                        const struct xa_node *node)
{
        return rcu_dereference_protected(node->parent,
                                                lockdep_is_held(&xa->xa_lock));
}

/* Private */
static inline void *xa_mk_node(const struct xa_node *node)
{
        return (void *)((unsigned long)node | 2);
}

/* Private */
static inline struct xa_node *xa_to_node(const void *entry)
{
        return (struct xa_node *)((unsigned long)entry - 2);
}

/* Private */
static inline bool xa_is_node(const void *entry)
{
        return xa_is_internal(entry) && (unsigned long)entry > 4096;
}

/* Private */
static inline void *xa_mk_sibling(unsigned int offset)
{
        return xa_mk_internal(offset);
}

/* Private */
static inline unsigned long xa_to_sibling(const void *entry)
{
        return xa_to_internal(entry);
}

/**
 * xa_is_sibling() - Is the entry a sibling entry?
 * @entry: Entry retrieved from the XArray
 *
 * Return: %true if the entry is a sibling entry.
 */
static inline bool xa_is_sibling(const void *entry)
{
        return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
                (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
}

#define XA_RETRY_ENTRY                xa_mk_internal(256)

/**
 * xa_is_retry() - Is the entry a retry entry?
 * @entry: Entry retrieved from the XArray
 *
 * Return: %true if the entry is a retry entry.
 */
static inline bool xa_is_retry(const void *entry)
{
        return unlikely(entry == XA_RETRY_ENTRY);
}

/**
 * xa_is_advanced() - Is the entry only permitted for the advanced API?
 * @entry: Entry to be stored in the XArray.
 *
 * Return: %true if the entry cannot be stored by the normal API.
 */
static inline bool xa_is_advanced(const void *entry)
{
        return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY);
}

/**
 * typedef xa_update_node_t - A callback function from the XArray.
 * @node: The node which is being processed
 *
 * This function is called every time the XArray updates the count of
 * present and value entries in a node.  It allows advanced users to
 * maintain the private_list in the node.
 *
 * Context: The xa_lock is held and interrupts may be disabled.
 *            Implementations should not drop the xa_lock, nor re-enable
 *            interrupts.
 */
typedef void (*xa_update_node_t)(struct xa_node *node);

void xa_delete_node(struct xa_node *, xa_update_node_t);

/*
 * The xa_state is opaque to its users.  It contains various different pieces
 * of state involved in the current operation on the XArray.  It should be
 * declared on the stack and passed between the various internal routines.
 * The various elements in it should not be accessed directly, but only
 * through the provided accessor functions.  The below documentation is for
 * the benefit of those working on the code, not for users of the XArray.
 *
 * @xa_node usually points to the xa_node containing the slot we're operating
 * on (and @xa_offset is the offset in the slots array).  If there is a
 * single entry in the array at index 0, there are no allocated xa_nodes to
 * point to, and so we store %NULL in @xa_node.  @xa_node is set to
 * the value %XAS_RESTART if the xa_state is not walked to the correct
 * position in the tree of nodes for this operation.  If an error occurs
 * during an operation, it is set to an %XAS_ERROR value.  If we run off the
 * end of the allocated nodes, it is set to %XAS_BOUNDS.
 */
struct xa_state {
        struct xarray *xa;
        unsigned long xa_index;
        unsigned char xa_shift;
        unsigned char xa_sibs;
        unsigned char xa_offset;
        unsigned char xa_pad;                /* Helps gcc generate better code */
        struct xa_node *xa_node;
        struct xa_node *xa_alloc;
        xa_update_node_t xa_update;
        struct list_lru *xa_lru;
};

/*
 * We encode errnos in the xas->xa_node.  If an error has happened, we need to
 * drop the lock to fix it, and once we've done so the xa_state is invalid.
 */
#define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL))
#define XAS_BOUNDS        ((struct xa_node *)1UL)
#define XAS_RESTART        ((struct xa_node *)3UL)

#define __XA_STATE(array, index, shift, sibs)  {        \
        .xa = array,                                        \
        .xa_index = index,                                \
        .xa_shift = shift,                                \
        .xa_sibs = sibs,                                \
        .xa_offset = 0,                                        \
        .xa_pad = 0,                                        \
        .xa_node = XAS_RESTART,                                \
        .xa_alloc = NULL,                                \
        .xa_update = NULL,                                \
        .xa_lru = NULL,                                        \
}

/**
 * XA_STATE() - Declare an XArray operation state.
 * @name: Name of this operation state (usually xas).
 * @array: Array to operate on.
 * @index: Initial index of interest.
 *
 * Declare and initialise an xa_state on the stack.
 */
#define XA_STATE(name, array, index)                                \
        struct xa_state name = __XA_STATE(array, index, 0, 0)

/**
 * XA_STATE_ORDER() - Declare an XArray operation state.
 * @name: Name of this operation state (usually xas).
 * @array: Array to operate on.
 * @index: Initial index of interest.
 * @order: Order of entry.
 *
 * Declare and initialise an xa_state on the stack.  This variant of
 * XA_STATE() allows you to specify the 'order' of the element you
 * want to operate on.`
 */
#define XA_STATE_ORDER(name, array, index, order)                \
        struct xa_state name = __XA_STATE(array,                \
                        (index >> order) << order,                \
                        order - (order % XA_CHUNK_SHIFT),        \
                        (1U << (order % XA_CHUNK_SHIFT)) - 1)

#define xas_marked(xas, mark)        xa_marked((xas)->xa, (mark))
#define xas_trylock(xas)        xa_trylock((xas)->xa)
#define xas_lock(xas)                xa_lock((xas)->xa)
#define xas_unlock(xas)                xa_unlock((xas)->xa)
#define xas_lock_bh(xas)        xa_lock_bh((xas)->xa)
#define xas_unlock_bh(xas)        xa_unlock_bh((xas)->xa)
#define xas_lock_irq(xas)        xa_lock_irq((xas)->xa)
#define xas_unlock_irq(xas)        xa_unlock_irq((xas)->xa)
#define xas_lock_irqsave(xas, flags) \
                                xa_lock_irqsave((xas)->xa, flags)
#define xas_unlock_irqrestore(xas, flags) \
                                xa_unlock_irqrestore((xas)->xa, flags)

/**
 * xas_error() - Return an errno stored in the xa_state.
 * @xas: XArray operation state.
 *
 * Return: 0 if no error has been noted.  A negative errno if one has.
 */
static inline int xas_error(const struct xa_state *xas)
{
        return xa_err(xas->xa_node);
}

/**
 * xas_set_err() - Note an error in the xa_state.
 * @xas: XArray operation state.
 * @err: Negative error number.
 *
 * Only call this function with a negative @err; zero or positive errors
 * will probably not behave the way you think they should.  If you want
 * to clear the error from an xa_state, use xas_reset().
 */
static inline void xas_set_err(struct xa_state *xas, long err)
{
        xas->xa_node = XA_ERROR(err);
}

/**
 * xas_invalid() - Is the xas in a retry or error state?
 * @xas: XArray operation state.
 *
 * Return: %true if the xas cannot be used for operations.
 */
static inline bool xas_invalid(const struct xa_state *xas)
{
        return (unsigned long)xas->xa_node & 3;
}

/**
 * xas_valid() - Is the xas a valid cursor into the array?
 * @xas: XArray operation state.
 *
 * Return: %true if the xas can be used for operations.
 */
static inline bool xas_valid(const struct xa_state *xas)
{
        return !xas_invalid(xas);
}

/**
 * xas_is_node() - Does the xas point to a node?
 * @xas: XArray operation state.
 *
 * Return: %true if the xas currently references a node.
 */
static inline bool xas_is_node(const struct xa_state *xas)
{
        return xas_valid(xas) && xas->xa_node;
}

/* True if the pointer is something other than a node */
static inline bool xas_not_node(struct xa_node *node)
{
        return ((unsigned long)node & 3) || !node;
}

/* True if the node represents RESTART or an error */
static inline bool xas_frozen(struct xa_node *node)
{
        return (unsigned long)node & 2;
}

/* True if the node represents head-of-tree, RESTART or BOUNDS */
static inline bool xas_top(struct xa_node *node)
{
        return node <= XAS_RESTART;
}

/**
 * xas_reset() - Reset an XArray operation state.
 * @xas: XArray operation state.
 *
 * Resets the error or walk state of the @xas so future walks of the
 * array will start from the root.  Use this if you have dropped the
 * xarray lock and want to reuse the xa_state.
 *
 * Context: Any context.
 */
static inline void xas_reset(struct xa_state *xas)
{
        xas->xa_node = XAS_RESTART;
}

/**
 * xas_retry() - Retry the operation if appropriate.
 * @xas: XArray operation state.
 * @entry: Entry from xarray.
 *
 * The advanced functions may sometimes return an internal entry, such as
 * a retry entry or a zero entry.  This function sets up the @xas to restart
 * the walk from the head of the array if needed.
 *
 * Context: Any context.
 * Return: true if the operation needs to be retried.
 */
static inline bool xas_retry(struct xa_state *xas, const void *entry)
{
        if (xa_is_zero(entry))
                return true;
        if (!xa_is_retry(entry))
                return false;
        xas_reset(xas);
        return true;
}

void *xas_load(struct xa_state *);
void *xas_store(struct xa_state *, void *entry);
void *xas_find(struct xa_state *, unsigned long max);
void *xas_find_conflict(struct xa_state *);

bool xas_get_mark(const struct xa_state *, xa_mark_t);
void xas_set_mark(const struct xa_state *, xa_mark_t);
void xas_clear_mark(const struct xa_state *, xa_mark_t);
void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t);
void xas_init_marks(const struct xa_state *);

bool xas_nomem(struct xa_state *, gfp_t);
void xas_destroy(struct xa_state *);
void xas_pause(struct xa_state *);

void xas_create_range(struct xa_state *);

#ifdef CONFIG_XARRAY_MULTI
int xa_get_order(struct xarray *, unsigned long index);
void xas_split(struct xa_state *, void *entry, unsigned int order);
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
#else
static inline int xa_get_order(struct xarray *xa, unsigned long index)
{
        return 0;
}

static inline void xas_split(struct xa_state *xas, void *entry,
                unsigned int order)
{
        xas_store(xas, entry);
}

static inline void xas_split_alloc(struct xa_state *xas, void *entry,
                unsigned int order, gfp_t gfp)
{
}
#endif

/**
 * xas_reload() - Refetch an entry from the xarray.
 * @xas: XArray operation state.
 *
 * Use this function to check that a previously loaded entry still has
 * the same value.  This is useful for the lockless pagecache lookup where
 * we walk the array with only the RCU lock to protect us, lock the page,
 * then check that the page hasn't moved since we looked it up.
 *
 * The caller guarantees that @xas is still valid.  If it may be in an
 * error or restart state, call xas_load() instead.
 *
 * Return: The entry at this location in the xarray.
 */
static inline void *xas_reload(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;
        void *entry;
        char offset;

        if (!node)
                return xa_head(xas->xa);
        if (IS_ENABLED(CONFIG_XARRAY_MULTI)) {
                offset = (xas->xa_index >> node->shift) & XA_CHUNK_MASK;
                entry = xa_entry(xas->xa, node, offset);
                if (!xa_is_sibling(entry))
                        return entry;
                offset = xa_to_sibling(entry);
        } else {
                offset = xas->xa_offset;
        }
        return xa_entry(xas->xa, node, offset);
}

/**
 * xas_set() - Set up XArray operation state for a different index.
 * @xas: XArray operation state.
 * @index: New index into the XArray.
 *
 * Move the operation state to refer to a different index.  This will
 * have the effect of starting a walk from the top; see xas_next()
 * to move to an adjacent index.
 */
static inline void xas_set(struct xa_state *xas, unsigned long index)
{
        xas->xa_index = index;
        xas->xa_node = XAS_RESTART;
}

/**
 * xas_advance() - Skip over sibling entries.
 * @xas: XArray operation state.
 * @index: Index of last sibling entry.
 *
 * Move the operation state to refer to the last sibling entry.
 * This is useful for loops that normally want to see sibling
 * entries but sometimes want to skip them.  Use xas_set() if you
 * want to move to an index which is not part of this entry.
 */
static inline void xas_advance(struct xa_state *xas, unsigned long index)
{
        unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0;

        xas->xa_index = index;
        xas->xa_offset = (index >> shift) & XA_CHUNK_MASK;
}

/**
 * xas_set_order() - Set up XArray operation state for a multislot entry.
 * @xas: XArray operation state.
 * @index: Target of the operation.
 * @order: Entry occupies 2^@order indices.
 */
static inline void xas_set_order(struct xa_state *xas, unsigned long index,
                                        unsigned int order)
{
#ifdef CONFIG_XARRAY_MULTI
        xas->xa_index = order < BITS_PER_LONG ? (index >> order) << order : 0;
        xas->xa_shift = order - (order % XA_CHUNK_SHIFT);
        xas->xa_sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
        xas->xa_node = XAS_RESTART;
#else
        BUG_ON(order > 0);
        xas_set(xas, index);
#endif
}

/**
 * xas_set_update() - Set up XArray operation state for a callback.
 * @xas: XArray operation state.
 * @update: Function to call when updating a node.
 *
 * The XArray can notify a caller after it has updated an xa_node.
 * This is advanced functionality and is only needed by the page
 * cache and swap cache.
 */
static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
{
        xas->xa_update = update;
}

static inline void xas_set_lru(struct xa_state *xas, struct list_lru *lru)
{
        xas->xa_lru = lru;
}

/**
 * xas_next_entry() - Advance iterator to next present entry.
 * @xas: XArray operation state.
 * @max: Highest index to return.
 *
 * xas_next_entry() is an inline function to optimise xarray traversal for
 * speed.  It is equivalent to calling xas_find(), and will call xas_find()
 * for all the hard cases.
 *
 * Return: The next present entry after the one currently referred to by @xas.
 */
static inline void *xas_next_entry(struct xa_state *xas, unsigned long max)
{
        struct xa_node *node = xas->xa_node;
        void *entry;

        if (unlikely(xas_not_node(node) || node->shift ||
                        xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)))
                return xas_find(xas, max);

        do {
                if (unlikely(xas->xa_index >= max))
                        return xas_find(xas, max);
                if (unlikely(xas->xa_offset == XA_CHUNK_MASK))
                        return xas_find(xas, max);
                entry = xa_entry(xas->xa, node, xas->xa_offset + 1);
                if (unlikely(xa_is_internal(entry)))
                        return xas_find(xas, max);
                xas->xa_offset++;
                xas->xa_index++;
        } while (!entry);

        return entry;
}

/* Private */
static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance,
                xa_mark_t mark)
{
        unsigned long *addr = xas->xa_node->marks[(__force unsigned)mark];
        unsigned int offset = xas->xa_offset;

        if (advance)
                offset++;
        if (XA_CHUNK_SIZE == BITS_PER_LONG) {
                if (offset < XA_CHUNK_SIZE) {
                        unsigned long data = *addr & (~0UL << offset);
                        if (data)
                                return __ffs(data);
                }
                return XA_CHUNK_SIZE;
        }

        return find_next_bit(addr, XA_CHUNK_SIZE, offset);
}

/**
 * xas_next_marked() - Advance iterator to next marked entry.
 * @xas: XArray operation state.
 * @max: Highest index to return.
 * @mark: Mark to search for.
 *
 * xas_next_marked() is an inline function to optimise xarray traversal for
 * speed.  It is equivalent to calling xas_find_marked(), and will call
 * xas_find_marked() for all the hard cases.
 *
 * Return: The next marked entry after the one currently referred to by @xas.
 */
static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
                                                                xa_mark_t mark)
{
        struct xa_node *node = xas->xa_node;
        void *entry;
        unsigned int offset;

        if (unlikely(xas_not_node(node) || node->shift))
                return xas_find_marked(xas, max, mark);
        offset = xas_find_chunk(xas, true, mark);
        xas->xa_offset = offset;
        xas->xa_index = (xas->xa_index & ~XA_CHUNK_MASK) + offset;
        if (xas->xa_index > max)
                return NULL;
        if (offset == XA_CHUNK_SIZE)
                return xas_find_marked(xas, max, mark);
        entry = xa_entry(xas->xa, node, offset);
        if (!entry)
                return xas_find_marked(xas, max, mark);
        return entry;
}

/*
 * If iterating while holding a lock, drop the lock and reschedule
 * every %XA_CHECK_SCHED loops.
 */
enum {
        XA_CHECK_SCHED = 4096,
};

/**
 * xas_for_each() - Iterate over a range of an XArray.
 * @xas: XArray operation state.
 * @entry: Entry retrieved from the array.
 * @max: Maximum index to retrieve from array.
 *
 * The loop body will be executed for each entry present in the xarray
 * between the current xas position and @max.  @entry will be set to
 * the entry retrieved from the xarray.  It is safe to delete entries
 * from the array in the loop body.  You should hold either the RCU lock
 * or the xa_lock while iterating.  If you need to drop the lock, call
 * xas_pause() first.
 */
#define xas_for_each(xas, entry, max) \
        for (entry = xas_find(xas, max); entry; \
             entry = xas_next_entry(xas, max))

/**
 * xas_for_each_marked() - Iterate over a range of an XArray.
 * @xas: XArray operation state.
 * @entry: Entry retrieved from the array.
 * @max: Maximum index to retrieve from array.
 * @mark: Mark to search for.
 *
 * The loop body will be executed for each marked entry in the xarray
 * between the current xas position and @max.  @entry will be set to
 * the entry retrieved from the xarray.  It is safe to delete entries
 * from the array in the loop body.  You should hold either the RCU lock
 * or the xa_lock while iterating.  If you need to drop the lock, call
 * xas_pause() first.
 */
#define xas_for_each_marked(xas, entry, max, mark) \
        for (entry = xas_find_marked(xas, max, mark); entry; \
             entry = xas_next_marked(xas, max, mark))

/**
 * xas_for_each_conflict() - Iterate over a range of an XArray.
 * @xas: XArray operation state.
 * @entry: Entry retrieved from the array.
 *
 * The loop body will be executed for each entry in the XArray that
 * lies within the range specified by @xas.  If the loop terminates
 * normally, @entry will be %NULL.  The user may break out of the loop,
 * which will leave @entry set to the conflicting entry.  The caller
 * may also call xa_set_err() to exit the loop while setting an error
 * to record the reason.
 */
#define xas_for_each_conflict(xas, entry) \
        while ((entry = xas_find_conflict(xas)))

void *__xas_next(struct xa_state *);
void *__xas_prev(struct xa_state *);

/**
 * xas_prev() - Move iterator to previous index.
 * @xas: XArray operation state.
 *
 * If the @xas was in an error state, it will remain in an error state
 * and this function will return %NULL.  If the @xas has never been walked,
 * it will have the effect of calling xas_load().  Otherwise one will be
 * subtracted from the index and the state will be walked to the correct
 * location in the array for the next operation.
 *
 * If the iterator was referencing index 0, this function wraps
 * around to %ULONG_MAX.
 *
 * Return: The entry at the new index.  This may be %NULL or an internal
 * entry.
 */
static inline void *xas_prev(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;

        if (unlikely(xas_not_node(node) || node->shift ||
                                xas->xa_offset == 0))
                return __xas_prev(xas);

        xas->xa_index--;
        xas->xa_offset--;
        return xa_entry(xas->xa, node, xas->xa_offset);
}

/**
 * xas_next() - Move state to next index.
 * @xas: XArray operation state.
 *
 * If the @xas was in an error state, it will remain in an error state
 * and this function will return %NULL.  If the @xas has never been walked,
 * it will have the effect of calling xas_load().  Otherwise one will be
 * added to the index and the state will be walked to the correct
 * location in the array for the next operation.
 *
 * If the iterator was referencing index %ULONG_MAX, this function wraps
 * around to 0.
 *
 * Return: The entry at the new index.  This may be %NULL or an internal
 * entry.
 */
static inline void *xas_next(struct xa_state *xas)
{
        struct xa_node *node = xas->xa_node;

        if (unlikely(xas_not_node(node) || node->shift ||
                                xas->xa_offset == XA_CHUNK_MASK))
                return __xas_next(xas);

        xas->xa_index++;
        xas->xa_offset++;
        return xa_entry(xas->xa, node, xas->xa_offset);
}

#endif /* _LINUX_XARRAY_H */




















































































































   14 
























































































































































































































    2 




























































































































































    2 

    2 
    2 
















    2 
    2 

    2 
    2 







    2 























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Generic pidhash and scalable, time-bounded PID allocator
 *
 * (C) 2002-2003 Nadia Yvette Chambers, IBM
 * (C) 2004 Nadia Yvette Chambers, Oracle
 * (C) 2002-2004 Ingo Molnar, Red Hat
 *
 * pid-structures are backing objects for tasks sharing a given ID to chain
 * against. There is very little to them aside from hashing them and
 * parking tasks using given ID's on a list.
 *
 * The hash is always changed with the tasklist_lock write-acquired,
 * and the hash is only accessed with the tasklist_lock at least
 * read-acquired, so there's no additional SMP locking needed here.
 *
 * We have a list of bitmap pages, which bitmaps represent the PID space.
 * Allocating and freeing PIDs is completely lockless. The worst-case
 * allocation scenario when all but one out of 1 million PIDs possible are
 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
 *
 * Pid namespaces:
 *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
 *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
 *     Many thanks to Oleg Nesterov for comments and help
 *
 */

#include <linux/mm.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <linux/memblock.h>
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
#include <linux/proc_ns.h>
#include <linux/refcount.h>
#include <linux/anon_inodes.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/idr.h>
#include <linux/pidfs.h>
#include <net/sock.h>
#include <uapi/linux/pidfd.h>

struct pid init_struct_pid = {
        .count                = REFCOUNT_INIT(1),
        .tasks                = {
                { .first = NULL },
                { .first = NULL },
                { .first = NULL },
        },
        .level                = 0,
        .numbers        = { {
                .nr                = 0,
                .ns                = &init_pid_ns,
        }, }
};

int pid_max = PID_MAX_DEFAULT;

int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
/*
 * Pseudo filesystems start inode numbering after one. We use Reserved
 * PIDs as a natural offset.
 */
static u64 pidfs_ino = RESERVED_PIDS;

/*
 * PID-map pages start out as NULL, they get allocated upon
 * first use and are never deallocated. This way a low pid_max
 * value does not cause lots of bitmaps to be allocated, but
 * the scheme scales to up to 4 million PIDs, runtime.
 */
struct pid_namespace init_pid_ns = {
        .ns.count = REFCOUNT_INIT(2),
        .idr = IDR_INIT(init_pid_ns.idr),
        .pid_allocated = PIDNS_ADDING,
        .level = 0,
        .child_reaper = &init_task,
        .user_ns = &init_user_ns,
        .ns.inum = PROC_PID_INIT_INO,
#ifdef CONFIG_PID_NS
        .ns.ops = &pidns_operations,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
        .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);

/*
 * Note: disable interrupts while the pidmap_lock is held as an
 * interrupt might come in and do read_lock(&tasklist_lock).
 *
 * If we don't disable interrupts there is a nasty deadlock between
 * detach_pid()->free_pid() and another cpu that does
 * spin_lock(&pidmap_lock) followed by an interrupt routine that does
 * read_lock(&tasklist_lock);
 *
 * After we clean up the tasklist_lock and know there are no
 * irq handlers that take it we can leave the interrupts enabled.
 * For now it is easier to be safe than to prove it can't happen.
 */

static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);

void put_pid(struct pid *pid)
{
        struct pid_namespace *ns;

        if (!pid)
                return;

        ns = pid->numbers[pid->level].ns;
        if (refcount_dec_and_test(&pid->count)) {
                kmem_cache_free(ns->pid_cachep, pid);
                put_pid_ns(ns);
        }
}
EXPORT_SYMBOL_GPL(put_pid);

static void delayed_put_pid(struct rcu_head *rhp)
{
        struct pid *pid = container_of(rhp, struct pid, rcu);
        put_pid(pid);
}

void free_pid(struct pid *pid)
{
        /* We can be called with write_lock_irq(&tasklist_lock) held */
        int i;
        unsigned long flags;

        spin_lock_irqsave(&pidmap_lock, flags);
        for (i = 0; i <= pid->level; i++) {
                struct upid *upid = pid->numbers + i;
                struct pid_namespace *ns = upid->ns;
                switch (--ns->pid_allocated) {
                case 2:
                case 1:
                        /* When all that is left in the pid namespace
                         * is the reaper wake up the reaper.  The reaper
                         * may be sleeping in zap_pid_ns_processes().
                         */
                        wake_up_process(ns->child_reaper);
                        break;
                case PIDNS_ADDING:
                        /* Handle a fork failure of the first process */
                        WARN_ON(ns->child_reaper);
                        ns->pid_allocated = 0;
                        break;
                }

                idr_remove(&ns->idr, upid->nr);
        }
        spin_unlock_irqrestore(&pidmap_lock, flags);

        call_rcu(&pid->rcu, delayed_put_pid);
}

struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
                      size_t set_tid_size)
{
        struct pid *pid;
        enum pid_type type;
        int i, nr;
        struct pid_namespace *tmp;
        struct upid *upid;
        int retval = -ENOMEM;

        /*
         * set_tid_size contains the size of the set_tid array. Starting at
         * the most nested currently active PID namespace it tells alloc_pid()
         * which PID to set for a process in that most nested PID namespace
         * up to set_tid_size PID namespaces. It does not have to set the PID
         * for a process in all nested PID namespaces but set_tid_size must
         * never be greater than the current ns->level + 1.
         */
        if (set_tid_size > ns->level + 1)
                return ERR_PTR(-EINVAL);

        pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
        if (!pid)
                return ERR_PTR(retval);

        tmp = ns;
        pid->level = ns->level;

        for (i = ns->level; i >= 0; i--) {
                int tid = 0;

                if (set_tid_size) {
                        tid = set_tid[ns->level - i];

                        retval = -EINVAL;
                        if (tid < 1 || tid >= pid_max)
                                goto out_free;
                        /*
                         * Also fail if a PID != 1 is requested and
                         * no PID 1 exists.
                         */
                        if (tid != 1 && !tmp->child_reaper)
                                goto out_free;
                        retval = -EPERM;
                        if (!checkpoint_restore_ns_capable(tmp->user_ns))
                                goto out_free;
                        set_tid_size--;
                }

                idr_preload(GFP_KERNEL);
                spin_lock_irq(&pidmap_lock);

                if (tid) {
                        nr = idr_alloc(&tmp->idr, NULL, tid,
                                       tid + 1, GFP_ATOMIC);
                        /*
                         * If ENOSPC is returned it means that the PID is
                         * alreay in use. Return EEXIST in that case.
                         */
                        if (nr == -ENOSPC)
                                nr = -EEXIST;
                } else {
                        int pid_min = 1;
                        /*
                         * init really needs pid 1, but after reaching the
                         * maximum wrap back to RESERVED_PIDS
                         */
                        if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
                                pid_min = RESERVED_PIDS;

                        /*
                         * Store a null pointer so find_pid_ns does not find
                         * a partially initialized PID (see below).
                         */
                        nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
                                              pid_max, GFP_ATOMIC);
                }
                spin_unlock_irq(&pidmap_lock);
                idr_preload_end();

                if (nr < 0) {
                        retval = (nr == -ENOSPC) ? -EAGAIN : nr;
                        goto out_free;
                }

                pid->numbers[i].nr = nr;
                pid->numbers[i].ns = tmp;
                tmp = tmp->parent;
        }

        /*
         * ENOMEM is not the most obvious choice especially for the case
         * where the child subreaper has already exited and the pid
         * namespace denies the creation of any new processes. But ENOMEM
         * is what we have exposed to userspace for a long time and it is
         * documented behavior for pid namespaces. So we can't easily
         * change it even if there were an error code better suited.
         */
        retval = -ENOMEM;

        get_pid_ns(ns);
        refcount_set(&pid->count, 1);
        spin_lock_init(&pid->lock);
        for (type = 0; type < PIDTYPE_MAX; ++type)
                INIT_HLIST_HEAD(&pid->tasks[type]);

        init_waitqueue_head(&pid->wait_pidfd);
        INIT_HLIST_HEAD(&pid->inodes);

        upid = pid->numbers + ns->level;
        spin_lock_irq(&pidmap_lock);
        if (!(ns->pid_allocated & PIDNS_ADDING))
                goto out_unlock;
        pid->stashed = NULL;
        pid->ino = ++pidfs_ino;
        for ( ; upid >= pid->numbers; --upid) {
                /* Make the PID visible to find_pid_ns. */
                idr_replace(&upid->ns->idr, pid, upid->nr);
                upid->ns->pid_allocated++;
        }
        spin_unlock_irq(&pidmap_lock);

        return pid;

out_unlock:
        spin_unlock_irq(&pidmap_lock);
        put_pid_ns(ns);

out_free:
        spin_lock_irq(&pidmap_lock);
        while (++i <= ns->level) {
                upid = pid->numbers + i;
                idr_remove(&upid->ns->idr, upid->nr);
        }

        /* On failure to allocate the first pid, reset the state */
        if (ns->pid_allocated == PIDNS_ADDING)
                idr_set_cursor(&ns->idr, 0);

        spin_unlock_irq(&pidmap_lock);

        kmem_cache_free(ns->pid_cachep, pid);
        return ERR_PTR(retval);
}

void disable_pid_allocation(struct pid_namespace *ns)
{
        spin_lock_irq(&pidmap_lock);
        ns->pid_allocated &= ~PIDNS_ADDING;
        spin_unlock_irq(&pidmap_lock);
}

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
        return idr_find(&ns->idr, nr);
}
EXPORT_SYMBOL_GPL(find_pid_ns);

struct pid *find_vpid(int nr)
{
        return find_pid_ns(nr, task_active_pid_ns(current));
}
EXPORT_SYMBOL_GPL(find_vpid);

static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
{
        return (type == PIDTYPE_PID) ?
                &task->thread_pid :
                &task->signal->pids[type];
}

/*
 * attach_pid() must be called with the tasklist_lock write-held.
 */
void attach_pid(struct task_struct *task, enum pid_type type)
{
        struct pid *pid = *task_pid_ptr(task, type);
        hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
}

static void __change_pid(struct task_struct *task, enum pid_type type,
                        struct pid *new)
{
        struct pid **pid_ptr = task_pid_ptr(task, type);
        struct pid *pid;
        int tmp;

        pid = *pid_ptr;

        hlist_del_rcu(&task->pid_links[type]);
        *pid_ptr = new;

        if (type == PIDTYPE_PID) {
                WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID));
                wake_up_all(&pid->wait_pidfd);
        }

        for (tmp = PIDTYPE_MAX; --tmp >= 0; )
                if (pid_has_task(pid, tmp))
                        return;

        free_pid(pid);
}

void detach_pid(struct task_struct *task, enum pid_type type)
{
        __change_pid(task, type, NULL);
}

void change_pid(struct task_struct *task, enum pid_type type,
                struct pid *pid)
{
        __change_pid(task, type, pid);
        attach_pid(task, type);
}

void exchange_tids(struct task_struct *left, struct task_struct *right)
{
        struct pid *pid1 = left->thread_pid;
        struct pid *pid2 = right->thread_pid;
        struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
        struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];

        /* Swap the single entry tid lists */
        hlists_swap_heads_rcu(head1, head2);

        /* Swap the per task_struct pid */
        rcu_assign_pointer(left->thread_pid, pid2);
        rcu_assign_pointer(right->thread_pid, pid1);

        /* Swap the cached value */
        WRITE_ONCE(left->pid, pid_nr(pid2));
        WRITE_ONCE(right->pid, pid_nr(pid1));
}

/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
void transfer_pid(struct task_struct *old, struct task_struct *new,
                           enum pid_type type)
{
        WARN_ON_ONCE(type == PIDTYPE_PID);
        hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
}

struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
        struct task_struct *result = NULL;
        if (pid) {
                struct hlist_node *first;
                first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
                                              lockdep_tasklist_lock_is_held());
                if (first)
                        result = hlist_entry(first, struct task_struct, pid_links[(type)]);
        }
        return result;
}
EXPORT_SYMBOL(pid_task);

/*
 * Must be called under rcu_read_lock().
 */
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
        RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
                         "find_task_by_pid_ns() needs rcu_read_lock() protection");
        return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}

struct task_struct *find_task_by_vpid(pid_t vnr)
{
        return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
}

struct task_struct *find_get_task_by_vpid(pid_t nr)
{
        struct task_struct *task;

        rcu_read_lock();
        task = find_task_by_vpid(nr);
        if (task)
                get_task_struct(task);
        rcu_read_unlock();

        return task;
}

struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
{
        struct pid *pid;
        rcu_read_lock();
        pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
        rcu_read_unlock();
        return pid;
}
EXPORT_SYMBOL_GPL(get_task_pid);

struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
{
        struct task_struct *result;
        rcu_read_lock();
        result = pid_task(pid, type);
        if (result)
                get_task_struct(result);
        rcu_read_unlock();
        return result;
}
EXPORT_SYMBOL_GPL(get_pid_task);

struct pid *find_get_pid(pid_t nr)
{
        struct pid *pid;

        rcu_read_lock();
        pid = get_pid(find_vpid(nr));
        rcu_read_unlock();

        return pid;
}
EXPORT_SYMBOL_GPL(find_get_pid);

pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
{
        struct upid *upid;
        pid_t nr = 0;

        if (pid && ns->level <= pid->level) {
                upid = &pid->numbers[ns->level];
                if (upid->ns == ns)
                        nr = upid->nr;
        }
        return nr;
}
EXPORT_SYMBOL_GPL(pid_nr_ns);

pid_t pid_vnr(struct pid *pid)
{
        return pid_nr_ns(pid, task_active_pid_ns(current));
}
EXPORT_SYMBOL_GPL(pid_vnr);

pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
                        struct pid_namespace *ns)
{
        pid_t nr = 0;

        rcu_read_lock();
        if (!ns)
                ns = task_active_pid_ns(current);
        nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
        rcu_read_unlock();

        return nr;
}
EXPORT_SYMBOL(__task_pid_nr_ns);

struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
        return ns_of_pid(task_pid(tsk));
}
EXPORT_SYMBOL_GPL(task_active_pid_ns);

/*
 * Used by proc to find the first pid that is greater than or equal to nr.
 *
 * If there is a pid at nr this function is exactly the same as find_pid_ns.
 */
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
        return idr_get_next(&ns->idr, &nr);
}
EXPORT_SYMBOL_GPL(find_ge_pid);

struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
{
        struct fd f;
        struct pid *pid;

        f = fdget(fd);
        if (!f.file)
                return ERR_PTR(-EBADF);

        pid = pidfd_pid(f.file);
        if (!IS_ERR(pid)) {
                get_pid(pid);
                *flags = f.file->f_flags;
        }

        fdput(f);
        return pid;
}

/**
 * pidfd_get_task() - Get the task associated with a pidfd
 *
 * @pidfd: pidfd for which to get the task
 * @flags: flags associated with this pidfd
 *
 * Return the task associated with @pidfd. The function takes a reference on
 * the returned task. The caller is responsible for releasing that reference.
 *
 * Return: On success, the task_struct associated with the pidfd.
 *           On error, a negative errno number will be returned.
 */
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
{
        unsigned int f_flags;
        struct pid *pid;
        struct task_struct *task;

        pid = pidfd_get_pid(pidfd, &f_flags);
        if (IS_ERR(pid))
                return ERR_CAST(pid);

        task = get_pid_task(pid, PIDTYPE_TGID);
        put_pid(pid);
        if (!task)
                return ERR_PTR(-ESRCH);

        *flags = f_flags;
        return task;
}

/**
 * pidfd_create() - Create a new pid file descriptor.
 *
 * @pid:   struct pid that the pidfd will reference
 * @flags: flags to pass
 *
 * This creates a new pid file descriptor with the O_CLOEXEC flag set.
 *
 * Note, that this function can only be called after the fd table has
 * been unshared to avoid leaking the pidfd to the new process.
 *
 * This symbol should not be explicitly exported to loadable modules.
 *
 * Return: On success, a cloexec pidfd is returned.
 *         On error, a negative errno number will be returned.
 */
static int pidfd_create(struct pid *pid, unsigned int flags)
{
        int pidfd;
        struct file *pidfd_file;

        pidfd = pidfd_prepare(pid, flags, &pidfd_file);
        if (pidfd < 0)
                return pidfd;

        fd_install(pidfd, pidfd_file);
        return pidfd;
}

/**
 * sys_pidfd_open() - Open new pid file descriptor.
 *
 * @pid:   pid for which to retrieve a pidfd
 * @flags: flags to pass
 *
 * This creates a new pid file descriptor with the O_CLOEXEC flag set for
 * the task identified by @pid. Without PIDFD_THREAD flag the target task
 * must be a thread-group leader.
 *
 * Return: On success, a cloexec pidfd is returned.
 *         On error, a negative errno number will be returned.
 */
SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
{
        int fd;
        struct pid *p;

        if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD))
                return -EINVAL;

        if (pid <= 0)
                return -EINVAL;

        p = find_get_pid(pid);
        if (!p)
                return -ESRCH;

        fd = pidfd_create(p, flags);

        put_pid(p);
        return fd;
}

void __init pid_idr_init(void)
{
        /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);

        /* bump default and minimum pid_max based on number of cpus */
        pid_max = min(pid_max_max, max_t(int, pid_max,
                                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
        pid_max_min = max_t(int, pid_max_min,
                                PIDS_PER_CPU_MIN * num_possible_cpus());
        pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);

        idr_init(&init_pid_ns.idr);

        init_pid_ns.pid_cachep = kmem_cache_create("pid",
                        struct_size_t(struct pid, numbers, 1),
                        __alignof__(struct pid),
                        SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
                        NULL);
}

static struct file *__pidfd_fget(struct task_struct *task, int fd)
{
        struct file *file;
        int ret;

        ret = down_read_killable(&task->signal->exec_update_lock);
        if (ret)
                return ERR_PTR(ret);

        if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
                file = fget_task(task, fd);
        else
                file = ERR_PTR(-EPERM);

        up_read(&task->signal->exec_update_lock);

        if (!file) {
                /*
                 * It is possible that the target thread is exiting; it can be
                 * either:
                 * 1. before exit_signals(), which gives a real fd
                 * 2. before exit_files() takes the task_lock() gives a real fd
                 * 3. after exit_files() releases task_lock(), ->files is NULL;
                 *    this has PF_EXITING, since it was set in exit_signals(),
                 *    __pidfd_fget() returns EBADF.
                 * In case 3 we get EBADF, but that really means ESRCH, since
                 * the task is currently exiting and has freed its files
                 * struct, so we fix it up.
                 */
                if (task->flags & PF_EXITING)
                        file = ERR_PTR(-ESRCH);
                else
                        file = ERR_PTR(-EBADF);
        }

        return file;
}

static int pidfd_getfd(struct pid *pid, int fd)
{
        struct task_struct *task;
        struct file *file;
        int ret;

        task = get_pid_task(pid, PIDTYPE_PID);
        if (!task)
                return -ESRCH;

        file = __pidfd_fget(task, fd);
        put_task_struct(task);
        if (IS_ERR(file))
                return PTR_ERR(file);

        ret = receive_fd(file, NULL, O_CLOEXEC);
        fput(file);

        return ret;
}

/**
 * sys_pidfd_getfd() - Get a file descriptor from another process
 *
 * @pidfd:        the pidfd file descriptor of the process
 * @fd:                the file descriptor number to get
 * @flags:        flags on how to get the fd (reserved)
 *
 * This syscall gets a copy of a file descriptor from another process
 * based on the pidfd, and file descriptor number. It requires that
 * the calling process has the ability to ptrace the process represented
 * by the pidfd. The process which is having its file descriptor copied
 * is otherwise unaffected.
 *
 * Return: On success, a cloexec file descriptor is returned.
 *         On error, a negative errno number will be returned.
 */
SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
                unsigned int, flags)
{
        struct pid *pid;
        struct fd f;
        int ret;

        /* flags is currently unused - make sure it's unset */
        if (flags)
                return -EINVAL;

        f = fdget(pidfd);
        if (!f.file)
                return -EBADF;

        pid = pidfd_pid(f.file);
        if (IS_ERR(pid))
                ret = PTR_ERR(pid);
        else
                ret = pidfd_getfd(pid, fd);

        fdput(f);
        return ret;
}










































































































































































































































































  163 





    5 





































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_NODEMASK_H
#define __LINUX_NODEMASK_H

/*
 * Nodemasks provide a bitmap suitable for representing the
 * set of Node's in a system, one bit position per Node number.
 *
 * See detailed comments in the file linux/bitmap.h describing the
 * data type on which these nodemasks are based.
 *
 * For details of nodemask_parse_user(), see bitmap_parse_user() in
 * lib/bitmap.c.  For details of nodelist_parse(), see bitmap_parselist(),
 * also in bitmap.c.  For details of node_remap(), see bitmap_bitremap in
 * lib/bitmap.c.  For details of nodes_remap(), see bitmap_remap in
 * lib/bitmap.c.  For details of nodes_onto(), see bitmap_onto in
 * lib/bitmap.c.  For details of nodes_fold(), see bitmap_fold in
 * lib/bitmap.c.
 *
 * The available nodemask operations are:
 *
 * void node_set(node, mask)                turn on bit 'node' in mask
 * void node_clear(node, mask)                turn off bit 'node' in mask
 * void nodes_setall(mask)                set all bits
 * void nodes_clear(mask)                clear all bits
 * int node_isset(node, mask)                true iff bit 'node' set in mask
 * int node_test_and_set(node, mask)        test and set bit 'node' in mask
 *
 * void nodes_and(dst, src1, src2)        dst = src1 & src2  [intersection]
 * void nodes_or(dst, src1, src2)        dst = src1 | src2  [union]
 * void nodes_xor(dst, src1, src2)        dst = src1 ^ src2
 * void nodes_andnot(dst, src1, src2)        dst = src1 & ~src2
 * void nodes_complement(dst, src)        dst = ~src
 *
 * int nodes_equal(mask1, mask2)        Does mask1 == mask2?
 * int nodes_intersects(mask1, mask2)        Do mask1 and mask2 intersect?
 * int nodes_subset(mask1, mask2)        Is mask1 a subset of mask2?
 * int nodes_empty(mask)                Is mask empty (no bits sets)?
 * int nodes_full(mask)                        Is mask full (all bits sets)?
 * int nodes_weight(mask)                Hamming weight - number of set bits
 *
 * void nodes_shift_right(dst, src, n)        Shift right
 * void nodes_shift_left(dst, src, n)        Shift left
 *
 * unsigned int first_node(mask)        Number lowest set bit, or MAX_NUMNODES
 * unsigend int next_node(node, mask)        Next node past 'node', or MAX_NUMNODES
 * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first,
 *                                        or MAX_NUMNODES
 * unsigned int first_unset_node(mask)        First node not set in mask, or
 *                                        MAX_NUMNODES
 *
 * nodemask_t nodemask_of_node(node)        Return nodemask with bit 'node' set
 * NODE_MASK_ALL                        Initializer - all bits set
 * NODE_MASK_NONE                        Initializer - no bits set
 * unsigned long *nodes_addr(mask)        Array of unsigned long's in mask
 *
 * int nodemask_parse_user(ubuf, ulen, mask)        Parse ascii string as nodemask
 * int nodelist_parse(buf, map)                Parse ascii string as nodelist
 * int node_remap(oldbit, old, new)        newbit = map(old, new)(oldbit)
 * void nodes_remap(dst, src, old, new)        *dst = map(old, new)(src)
 * void nodes_onto(dst, orig, relmap)        *dst = orig relative to relmap
 * void nodes_fold(dst, orig, sz)        dst bits = orig bits mod sz
 *
 * for_each_node_mask(node, mask)        for-loop node over mask
 *
 * int num_online_nodes()                Number of online Nodes
 * int num_possible_nodes()                Number of all possible Nodes
 *
 * int node_random(mask)                Random node with set bit in mask
 *
 * int node_online(node)                Is some node online?
 * int node_possible(node)                Is some node possible?
 *
 * node_set_online(node)                set bit 'node' in node_online_map
 * node_set_offline(node)                clear bit 'node' in node_online_map
 *
 * for_each_node(node)                        for-loop node over node_possible_map
 * for_each_online_node(node)                for-loop node over node_online_map
 *
 * Subtlety:
 * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway)
 *    to generate slightly worse code.  So use a simple one-line #define
 *    for node_isset(), instead of wrapping an inline inside a macro, the
 *    way we do the other calls.
 *
 * NODEMASK_SCRATCH
 * When doing above logical AND, OR, XOR, Remap operations the callers tend to
 * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large,
 * nodemask_t's consume too much stack space.  NODEMASK_SCRATCH is a helper
 * for such situations. See below and CPUMASK_ALLOC also.
 */

#include <linux/threads.h>
#include <linux/bitmap.h>
#include <linux/minmax.h>
#include <linux/nodemask_types.h>
#include <linux/numa.h>
#include <linux/random.h>

extern nodemask_t _unused_nodemask_arg_;

/**
 * nodemask_pr_args - printf args to output a nodemask
 * @maskp: nodemask to be printed
 *
 * Can be used to provide arguments for '%*pb[l]' when printing a nodemask.
 */
#define nodemask_pr_args(maskp)        __nodemask_pr_numnodes(maskp), \
                                __nodemask_pr_bits(maskp)
static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
{
        return m ? MAX_NUMNODES : 0;
}
static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
{
        return m ? m->bits : NULL;
}

/*
 * The inline keyword gives the compiler room to decide to inline, or
 * not inline a function as it sees best.  However, as these functions
 * are called in both __init and non-__init functions, if they are not
 * inlined we will end up with a section mismatch error (of the type of
 * freeable items not being freed).  So we must use __always_inline here
 * to fix the problem.  If other functions in the future also end up in
 * this situation they will also need to be annotated as __always_inline
 */
#define node_set(node, dst) __node_set((node), &(dst))
static __always_inline void __node_set(int node, volatile nodemask_t *dstp)
{
        set_bit(node, dstp->bits);
}

#define node_clear(node, dst) __node_clear((node), &(dst))
static inline void __node_clear(int node, volatile nodemask_t *dstp)
{
        clear_bit(node, dstp->bits);
}

#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
{
        bitmap_fill(dstp->bits, nbits);
}

#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
{
        bitmap_zero(dstp->bits, nbits);
}

/* No static inline type checking - see Subtlety (1) above. */
#define node_isset(node, nodemask) test_bit((node), (nodemask).bits)

#define node_test_and_set(node, nodemask) \
                        __node_test_and_set((node), &(nodemask))
static inline bool __node_test_and_set(int node, nodemask_t *addr)
{
        return test_and_set_bit(node, addr->bits);
}

#define nodes_and(dst, src1, src2) \
                        __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
}

#define nodes_or(dst, src1, src2) \
                        __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
}

#define nodes_xor(dst, src1, src2) \
                        __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
}

#define nodes_andnot(dst, src1, src2) \
                        __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
}

#define nodes_complement(dst, src) \
                        __nodes_complement(&(dst), &(src), MAX_NUMNODES)
static inline void __nodes_complement(nodemask_t *dstp,
                                        const nodemask_t *srcp, unsigned int nbits)
{
        bitmap_complement(dstp->bits, srcp->bits, nbits);
}

#define nodes_equal(src1, src2) \
                        __nodes_equal(&(src1), &(src2), MAX_NUMNODES)
static inline bool __nodes_equal(const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        return bitmap_equal(src1p->bits, src2p->bits, nbits);
}

#define nodes_intersects(src1, src2) \
                        __nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
static inline bool __nodes_intersects(const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        return bitmap_intersects(src1p->bits, src2p->bits, nbits);
}

#define nodes_subset(src1, src2) \
                        __nodes_subset(&(src1), &(src2), MAX_NUMNODES)
static inline bool __nodes_subset(const nodemask_t *src1p,
                                        const nodemask_t *src2p, unsigned int nbits)
{
        return bitmap_subset(src1p->bits, src2p->bits, nbits);
}

#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
{
        return bitmap_empty(srcp->bits, nbits);
}

#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
{
        return bitmap_full(srcp->bits, nbits);
}

#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
{
        return bitmap_weight(srcp->bits, nbits);
}

#define nodes_shift_right(dst, src, n) \
                        __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
static inline void __nodes_shift_right(nodemask_t *dstp,
                                        const nodemask_t *srcp, int n, int nbits)
{
        bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
}

#define nodes_shift_left(dst, src, n) \
                        __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
static inline void __nodes_shift_left(nodemask_t *dstp,
                                        const nodemask_t *srcp, int n, int nbits)
{
        bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
}

/* FIXME: better would be to fix all architectures to never return
          > MAX_NUMNODES, then the silly min_ts could be dropped. */

#define first_node(src) __first_node(&(src))
static inline unsigned int __first_node(const nodemask_t *srcp)
{
        return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
}

#define next_node(n, src) __next_node((n), &(src))
static inline unsigned int __next_node(int n, const nodemask_t *srcp)
{
        return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
}

/*
 * Find the next present node in src, starting after node n, wrapping around to
 * the first node in src if needed.  Returns MAX_NUMNODES if src is empty.
 */
#define next_node_in(n, src) __next_node_in((n), &(src))
static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
{
        unsigned int ret = __next_node(node, srcp);

        if (ret == MAX_NUMNODES)
                ret = __first_node(srcp);
        return ret;
}

static inline void init_nodemask_of_node(nodemask_t *mask, int node)
{
        nodes_clear(*mask);
        node_set(node, *mask);
}

#define nodemask_of_node(node)                                                \
({                                                                        \
        typeof(_unused_nodemask_arg_) m;                                \
        if (sizeof(m) == sizeof(unsigned long)) {                        \
                m.bits[0] = 1UL << (node);                                \
        } else {                                                        \
                init_nodemask_of_node(&m, (node));                        \
        }                                                                \
        m;                                                                \
})

#define first_unset_node(mask) __first_unset_node(&(mask))
static inline unsigned int __first_unset_node(const nodemask_t *maskp)
{
        return min_t(unsigned int, MAX_NUMNODES,
                        find_first_zero_bit(maskp->bits, MAX_NUMNODES));
}

#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)

#if MAX_NUMNODES <= BITS_PER_LONG

#define NODE_MASK_ALL                                                        \
((nodemask_t) { {                                                        \
        [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD                \
} })

#else

#define NODE_MASK_ALL                                                        \
((nodemask_t) { {                                                        \
        [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL,                        \
        [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD                \
} })

#endif

#define NODE_MASK_NONE                                                        \
((nodemask_t) { {                                                        \
        [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] =  0UL                        \
} })

#define nodes_addr(src) ((src).bits)

#define nodemask_parse_user(ubuf, ulen, dst) \
                __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES)
static inline int __nodemask_parse_user(const char __user *buf, int len,
                                        nodemask_t *dstp, int nbits)
{
        return bitmap_parse_user(buf, len, dstp->bits, nbits);
}

#define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES)
static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
{
        return bitmap_parselist(buf, dstp->bits, nbits);
}

#define node_remap(oldbit, old, new) \
                __node_remap((oldbit), &(old), &(new), MAX_NUMNODES)
static inline int __node_remap(int oldbit,
                const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
        return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
}

#define nodes_remap(dst, src, old, new) \
                __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES)
static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
                const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
        bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
}

#define nodes_onto(dst, orig, relmap) \
                __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES)
static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
                const nodemask_t *relmapp, int nbits)
{
        bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
}

#define nodes_fold(dst, orig, sz) \
                __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES)
static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
                int sz, int nbits)
{
        bitmap_fold(dstp->bits, origp->bits, sz, nbits);
}

#if MAX_NUMNODES > 1
#define for_each_node_mask(node, mask)                                    \
        for ((node) = first_node(mask);                                    \
             (node) < MAX_NUMNODES;                                    \
             (node) = next_node((node), (mask)))
#else /* MAX_NUMNODES == 1 */
#define for_each_node_mask(node, mask)                                  \
        for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++)
#endif /* MAX_NUMNODES */

/*
 * Bitmasks that are kept for all the nodes.
 */
enum node_states {
        N_POSSIBLE,                /* The node could become online at some point */
        N_ONLINE,                /* The node is online */
        N_NORMAL_MEMORY,        /* The node has regular memory */
#ifdef CONFIG_HIGHMEM
        N_HIGH_MEMORY,                /* The node has regular or high memory */
#else
        N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
        N_MEMORY,                /* The node has memory(regular, high, movable) */
        N_CPU,                /* The node has one or more cpus */
        N_GENERIC_INITIATOR,        /* The node has one or more Generic Initiators */
        NR_NODE_STATES
};

/*
 * The following particular system nodemasks and operations
 * on them manage all possible and online nodes.
 */

extern nodemask_t node_states[NR_NODE_STATES];

#if MAX_NUMNODES > 1
static inline int node_state(int node, enum node_states state)
{
        return node_isset(node, node_states[state]);
}

static inline void node_set_state(int node, enum node_states state)
{
        __node_set(node, &node_states[state]);
}

static inline void node_clear_state(int node, enum node_states state)
{
        __node_clear(node, &node_states[state]);
}

static inline int num_node_state(enum node_states state)
{
        return nodes_weight(node_states[state]);
}

#define for_each_node_state(__node, __state) \
        for_each_node_mask((__node), node_states[__state])

#define first_online_node        first_node(node_states[N_ONLINE])
#define first_memory_node        first_node(node_states[N_MEMORY])
static inline unsigned int next_online_node(int nid)
{
        return next_node(nid, node_states[N_ONLINE]);
}
static inline unsigned int next_memory_node(int nid)
{
        return next_node(nid, node_states[N_MEMORY]);
}

extern unsigned int nr_node_ids;
extern unsigned int nr_online_nodes;

static inline void node_set_online(int nid)
{
        node_set_state(nid, N_ONLINE);
        nr_online_nodes = num_node_state(N_ONLINE);
}

static inline void node_set_offline(int nid)
{
        node_clear_state(nid, N_ONLINE);
        nr_online_nodes = num_node_state(N_ONLINE);
}

#else

static inline int node_state(int node, enum node_states state)
{
        return node == 0;
}

static inline void node_set_state(int node, enum node_states state)
{
}

static inline void node_clear_state(int node, enum node_states state)
{
}

static inline int num_node_state(enum node_states state)
{
        return 1;
}

#define for_each_node_state(node, __state) \
        for ( (node) = 0; (node) == 0; (node) = 1)

#define first_online_node        0
#define first_memory_node        0
#define next_online_node(nid)        (MAX_NUMNODES)
#define next_memory_node(nid)        (MAX_NUMNODES)
#define nr_node_ids                1U
#define nr_online_nodes                1U

#define node_set_online(node)           node_set_state((node), N_ONLINE)
#define node_set_offline(node)           node_clear_state((node), N_ONLINE)

#endif

static inline int node_random(const nodemask_t *maskp)
{
#if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
        int w, bit;

        w = nodes_weight(*maskp);
        switch (w) {
        case 0:
                bit = NUMA_NO_NODE;
                break;
        case 1:
                bit = first_node(*maskp);
                break;
        default:
                bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w));
                break;
        }
        return bit;
#else
        return 0;
#endif
}

#define node_online_map         node_states[N_ONLINE]
#define node_possible_map         node_states[N_POSSIBLE]

#define num_online_nodes()        num_node_state(N_ONLINE)
#define num_possible_nodes()        num_node_state(N_POSSIBLE)
#define node_online(node)        node_state((node), N_ONLINE)
#define node_possible(node)        node_state((node), N_POSSIBLE)

#define for_each_node(node)           for_each_node_state(node, N_POSSIBLE)
#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)

/*
 * For nodemask scratch area.
 * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
 * name.
 */
#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
#define NODEMASK_ALLOC(type, name, gfp_flags)        \
                        type *name = kmalloc(sizeof(*name), gfp_flags)
#define NODEMASK_FREE(m)                        kfree(m)
#else
#define NODEMASK_ALLOC(type, name, gfp_flags)        type _##name, *name = &_##name
#define NODEMASK_FREE(m)                        do {} while (0)
#endif

/* Example structure for using NODEMASK_ALLOC, used in mempolicy. */
struct nodemask_scratch {
        nodemask_t        mask1;
        nodemask_t        mask2;
};

#define NODEMASK_SCRATCH(x)                                                \
                        NODEMASK_ALLOC(struct nodemask_scratch, x,        \
                                        GFP_KERNEL | __GFP_NORETRY)
#define NODEMASK_SCRATCH_FREE(x)        NODEMASK_FREE(x)


#endif /* __LINUX_NODEMASK_H */
























































   44 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _INPUT_COMPAT_H
#define _INPUT_COMPAT_H

/*
 * 32bit compatibility wrappers for the input subsystem.
 *
 * Very heavily based on evdev.c - Copyright (c) 1999-2002 Vojtech Pavlik
 */

#include <linux/compiler.h>
#include <linux/compat.h>
#include <linux/input.h>

#ifdef CONFIG_COMPAT

struct input_event_compat {
        compat_ulong_t sec;
        compat_ulong_t usec;
        __u16 type;
        __u16 code;
        __s32 value;
};

struct ff_periodic_effect_compat {
        __u16 waveform;
        __u16 period;
        __s16 magnitude;
        __s16 offset;
        __u16 phase;

        struct ff_envelope envelope;

        __u32 custom_len;
        compat_uptr_t custom_data;
};

struct ff_effect_compat {
        __u16 type;
        __s16 id;
        __u16 direction;
        struct ff_trigger trigger;
        struct ff_replay replay;

        union {
                struct ff_constant_effect constant;
                struct ff_ramp_effect ramp;
                struct ff_periodic_effect_compat periodic;
                struct ff_condition_effect condition[2]; /* One for each axis */
                struct ff_rumble_effect rumble;
        } u;
};

static inline size_t input_event_size(void)
{
        return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
                sizeof(struct input_event_compat) : sizeof(struct input_event);
}

#else

static inline size_t input_event_size(void)
{
        return sizeof(struct input_event);
}

#endif /* CONFIG_COMPAT */

int input_event_from_user(const char __user *buffer,
                         struct input_event *event);

int input_event_to_user(char __user *buffer,
                        const struct input_event *event);

int input_ff_effect_from_user(const char __user *buffer, size_t size,
                              struct ff_effect *effect);

#endif /* _INPUT_COMPAT_H */




































































































































































































































































































































































































































    2 




    1 

    1 
    1 







































    2 
    2 











































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Kone[+] driver for Linux
 *
 * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Kone[+] is an updated/improved version of the Kone with more memory
 * and functionality and without the non-standard behaviours the Kone had.
 * KoneXTD has same capabilities but updated sensor.
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-koneplus.h"

static uint profile_numbers[5] = {0, 1, 2, 3, 4};

static void koneplus_profile_activated(struct koneplus_device *koneplus,
                uint new_profile)
{
        koneplus->actual_profile = new_profile;
}

static int koneplus_send_control(struct usb_device *usb_dev, uint value,
                enum koneplus_control_requests request)
{
        struct roccat_common2_control control;

        if ((request == KONEPLUS_CONTROL_REQUEST_PROFILE_SETTINGS ||
                        request == KONEPLUS_CONTROL_REQUEST_PROFILE_BUTTONS) &&
                        value > 4)
                return -EINVAL;

        control.command = ROCCAT_COMMON_COMMAND_CONTROL;
        control.value = value;
        control.request = request;

        return roccat_common2_send_with_status(usb_dev,
                        ROCCAT_COMMON_COMMAND_CONTROL,
                        &control, sizeof(struct roccat_common2_control));
}


/* retval is 0-4 on success, < 0 on error */
static int koneplus_get_actual_profile(struct usb_device *usb_dev)
{
        struct koneplus_actual_profile buf;
        int retval;

        retval = roccat_common2_receive(usb_dev, KONEPLUS_COMMAND_ACTUAL_PROFILE,
                        &buf, KONEPLUS_SIZE_ACTUAL_PROFILE);

        return retval ? retval : buf.actual_profile;
}

static int koneplus_set_actual_profile(struct usb_device *usb_dev,
                int new_profile)
{
        struct koneplus_actual_profile buf;

        buf.command = KONEPLUS_COMMAND_ACTUAL_PROFILE;
        buf.size = KONEPLUS_SIZE_ACTUAL_PROFILE;
        buf.actual_profile = new_profile;

        return roccat_common2_send_with_status(usb_dev,
                        KONEPLUS_COMMAND_ACTUAL_PROFILE,
                        &buf, KONEPLUS_SIZE_ACTUAL_PROFILE);
}

static ssize_t koneplus_sysfs_read(struct file *fp, struct kobject *kobj,
                char *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct koneplus_device *koneplus = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off >= real_size)
                return 0;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&koneplus->koneplus_lock);
        retval = roccat_common2_receive(usb_dev, command, buf, real_size);
        mutex_unlock(&koneplus->koneplus_lock);

        if (retval)
                return retval;

        return real_size;
}

static ssize_t koneplus_sysfs_write(struct file *fp, struct kobject *kobj,
                void const *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct koneplus_device *koneplus = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&koneplus->koneplus_lock);
        retval = roccat_common2_send_with_status(usb_dev, command,
                        buf, real_size);
        mutex_unlock(&koneplus->koneplus_lock);

        if (retval)
                return retval;

        return real_size;
}

#define KONEPLUS_SYSFS_W(thingy, THINGY) \
static ssize_t koneplus_sysfs_write_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return koneplus_sysfs_write(fp, kobj, buf, off, count, \
                        KONEPLUS_SIZE_ ## THINGY, KONEPLUS_COMMAND_ ## THINGY); \
}

#define KONEPLUS_SYSFS_R(thingy, THINGY) \
static ssize_t koneplus_sysfs_read_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return koneplus_sysfs_read(fp, kobj, buf, off, count, \
                        KONEPLUS_SIZE_ ## THINGY, KONEPLUS_COMMAND_ ## THINGY); \
}

#define KONEPLUS_SYSFS_RW(thingy, THINGY) \
KONEPLUS_SYSFS_W(thingy, THINGY) \
KONEPLUS_SYSFS_R(thingy, THINGY)

#define KONEPLUS_BIN_ATTRIBUTE_RW(thingy, THINGY) \
KONEPLUS_SYSFS_RW(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0660 }, \
        .size = KONEPLUS_SIZE_ ## THINGY, \
        .read = koneplus_sysfs_read_ ## thingy, \
        .write = koneplus_sysfs_write_ ## thingy \
}

#define KONEPLUS_BIN_ATTRIBUTE_R(thingy, THINGY) \
KONEPLUS_SYSFS_R(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0440 }, \
        .size = KONEPLUS_SIZE_ ## THINGY, \
        .read = koneplus_sysfs_read_ ## thingy, \
}

#define KONEPLUS_BIN_ATTRIBUTE_W(thingy, THINGY) \
KONEPLUS_SYSFS_W(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0220 }, \
        .size = KONEPLUS_SIZE_ ## THINGY, \
        .write = koneplus_sysfs_write_ ## thingy \
}
KONEPLUS_BIN_ATTRIBUTE_W(control, CONTROL);
KONEPLUS_BIN_ATTRIBUTE_W(talk, TALK);
KONEPLUS_BIN_ATTRIBUTE_W(macro, MACRO);
KONEPLUS_BIN_ATTRIBUTE_R(tcu_image, TCU_IMAGE);
KONEPLUS_BIN_ATTRIBUTE_RW(info, INFO);
KONEPLUS_BIN_ATTRIBUTE_RW(sensor, SENSOR);
KONEPLUS_BIN_ATTRIBUTE_RW(tcu, TCU);
KONEPLUS_BIN_ATTRIBUTE_RW(profile_settings, PROFILE_SETTINGS);
KONEPLUS_BIN_ATTRIBUTE_RW(profile_buttons, PROFILE_BUTTONS);

static ssize_t koneplus_sysfs_read_profilex_settings(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = koneplus_send_control(usb_dev, *(uint *)(attr->private),
                        KONEPLUS_CONTROL_REQUEST_PROFILE_SETTINGS);
        if (retval)
                return retval;

        return koneplus_sysfs_read(fp, kobj, buf, off, count,
                        KONEPLUS_SIZE_PROFILE_SETTINGS,
                        KONEPLUS_COMMAND_PROFILE_SETTINGS);
}

static ssize_t koneplus_sysfs_read_profilex_buttons(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = koneplus_send_control(usb_dev, *(uint *)(attr->private),
                        KONEPLUS_CONTROL_REQUEST_PROFILE_BUTTONS);
        if (retval)
                return retval;

        return koneplus_sysfs_read(fp, kobj, buf, off, count,
                        KONEPLUS_SIZE_PROFILE_BUTTONS,
                        KONEPLUS_COMMAND_PROFILE_BUTTONS);
}

#define PROFILE_ATTR(number)                                                \
static struct bin_attribute bin_attr_profile##number##_settings = {        \
        .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = KONEPLUS_SIZE_PROFILE_SETTINGS,                                \
        .read = koneplus_sysfs_read_profilex_settings,                        \
        .private = &profile_numbers[number-1],                                \
};                                                                        \
static struct bin_attribute bin_attr_profile##number##_buttons = {        \
        .attr = { .name = "profile" #number "_buttons", .mode = 0440 },        \
        .size = KONEPLUS_SIZE_PROFILE_BUTTONS,                                \
        .read = koneplus_sysfs_read_profilex_buttons,                        \
        .private = &profile_numbers[number-1],                                \
};
PROFILE_ATTR(1);
PROFILE_ATTR(2);
PROFILE_ATTR(3);
PROFILE_ATTR(4);
PROFILE_ATTR(5);

static ssize_t koneplus_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct koneplus_device *koneplus =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", koneplus->actual_profile);
}

static ssize_t koneplus_sysfs_set_actual_profile(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct koneplus_device *koneplus;
        struct usb_device *usb_dev;
        unsigned long profile;
        int retval;
        struct koneplus_roccat_report roccat_report;

        dev = dev->parent->parent;
        koneplus = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        retval = kstrtoul(buf, 10, &profile);
        if (retval)
                return retval;

        if (profile > 4)
                return -EINVAL;

        mutex_lock(&koneplus->koneplus_lock);

        retval = koneplus_set_actual_profile(usb_dev, profile);
        if (retval) {
                mutex_unlock(&koneplus->koneplus_lock);
                return retval;
        }

        koneplus_profile_activated(koneplus, profile);

        roccat_report.type = KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE;
        roccat_report.data1 = profile + 1;
        roccat_report.data2 = 0;
        roccat_report.profile = profile + 1;
        roccat_report_event(koneplus->chrdev_minor,
                        (uint8_t const *)&roccat_report);

        mutex_unlock(&koneplus->koneplus_lock);

        return size;
}
static DEVICE_ATTR(actual_profile, 0660,
                   koneplus_sysfs_show_actual_profile,
                   koneplus_sysfs_set_actual_profile);
static DEVICE_ATTR(startup_profile, 0660,
                   koneplus_sysfs_show_actual_profile,
                   koneplus_sysfs_set_actual_profile);

static ssize_t koneplus_sysfs_show_firmware_version(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct koneplus_device *koneplus;
        struct usb_device *usb_dev;
        struct koneplus_info info;

        dev = dev->parent->parent;
        koneplus = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        mutex_lock(&koneplus->koneplus_lock);
        roccat_common2_receive(usb_dev, KONEPLUS_COMMAND_INFO,
                        &info, KONEPLUS_SIZE_INFO);
        mutex_unlock(&koneplus->koneplus_lock);

        return snprintf(buf, PAGE_SIZE, "%d\n", info.firmware_version);
}
static DEVICE_ATTR(firmware_version, 0440,
                   koneplus_sysfs_show_firmware_version, NULL);

static struct attribute *koneplus_attrs[] = {
        &dev_attr_actual_profile.attr,
        &dev_attr_startup_profile.attr,
        &dev_attr_firmware_version.attr,
        NULL,
};

static struct bin_attribute *koneplus_bin_attributes[] = {
        &bin_attr_control,
        &bin_attr_talk,
        &bin_attr_macro,
        &bin_attr_tcu_image,
        &bin_attr_info,
        &bin_attr_sensor,
        &bin_attr_tcu,
        &bin_attr_profile_settings,
        &bin_attr_profile_buttons,
        &bin_attr_profile1_settings,
        &bin_attr_profile2_settings,
        &bin_attr_profile3_settings,
        &bin_attr_profile4_settings,
        &bin_attr_profile5_settings,
        &bin_attr_profile1_buttons,
        &bin_attr_profile2_buttons,
        &bin_attr_profile3_buttons,
        &bin_attr_profile4_buttons,
        &bin_attr_profile5_buttons,
        NULL,
};

static const struct attribute_group koneplus_group = {
        .attrs = koneplus_attrs,
        .bin_attrs = koneplus_bin_attributes,
};

static const struct attribute_group *koneplus_groups[] = {
        &koneplus_group,
        NULL,
};

static const struct class koneplus_class = {
        .name = "koneplus",
        .dev_groups = koneplus_groups,
};

static int koneplus_init_koneplus_device_struct(struct usb_device *usb_dev,
                struct koneplus_device *koneplus)
{
        int retval;

        mutex_init(&koneplus->koneplus_lock);

        retval = koneplus_get_actual_profile(usb_dev);
        if (retval < 0)
                return retval;
        koneplus_profile_activated(koneplus, retval);

        return 0;
}

static int koneplus_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct koneplus_device *koneplus;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {

                koneplus = kzalloc(sizeof(*koneplus), GFP_KERNEL);
                if (!koneplus) {
                        hid_err(hdev, "can't alloc device descriptor\n");
                        return -ENOMEM;
                }
                hid_set_drvdata(hdev, koneplus);

                retval = koneplus_init_koneplus_device_struct(usb_dev, koneplus);
                if (retval) {
                        hid_err(hdev, "couldn't init struct koneplus_device\n");
                        goto exit_free;
                }

                retval = roccat_connect(&koneplus_class, hdev,
                                        sizeof(struct koneplus_roccat_report));
                if (retval < 0) {
                        hid_err(hdev, "couldn't init char dev\n");
                } else {
                        koneplus->chrdev_minor = retval;
                        koneplus->roccat_claimed = 1;
                }
        } else {
                hid_set_drvdata(hdev, NULL);
        }

        return 0;
exit_free:
        kfree(koneplus);
        return retval;
}

static void koneplus_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct koneplus_device *koneplus;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {
                koneplus = hid_get_drvdata(hdev);
                if (koneplus->roccat_claimed)
                        roccat_disconnect(koneplus->chrdev_minor);
                kfree(koneplus);
        }
}

static int koneplus_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = koneplus_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void koneplus_remove(struct hid_device *hdev)
{
        koneplus_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void koneplus_keep_values_up_to_date(struct koneplus_device *koneplus,
                u8 const *data)
{
        struct koneplus_mouse_report_button const *button_report;

        switch (data[0]) {
        case KONEPLUS_MOUSE_REPORT_NUMBER_BUTTON:
                button_report = (struct koneplus_mouse_report_button const *)data;
                switch (button_report->type) {
                case KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE:
                        koneplus_profile_activated(koneplus, button_report->data1 - 1);
                        break;
                }
                break;
        }
}

static void koneplus_report_to_chrdev(struct koneplus_device const *koneplus,
                u8 const *data)
{
        struct koneplus_roccat_report roccat_report;
        struct koneplus_mouse_report_button const *button_report;

        if (data[0] != KONEPLUS_MOUSE_REPORT_NUMBER_BUTTON)
                return;

        button_report = (struct koneplus_mouse_report_button const *)data;

        if ((button_report->type == KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_QUICKLAUNCH ||
                        button_report->type == KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_TIMER) &&
                        button_report->data2 != KONEPLUS_MOUSE_REPORT_BUTTON_ACTION_PRESS)
                return;

        roccat_report.type = button_report->type;
        roccat_report.data1 = button_report->data1;
        roccat_report.data2 = button_report->data2;
        roccat_report.profile = koneplus->actual_profile + 1;
        roccat_report_event(koneplus->chrdev_minor,
                        (uint8_t const *)&roccat_report);
}

static int koneplus_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct koneplus_device *koneplus = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return 0;

        if (koneplus == NULL)
                return 0;

        koneplus_keep_values_up_to_date(koneplus, data);

        if (koneplus->roccat_claimed)
                koneplus_report_to_chrdev(koneplus, data);

        return 0;
}

static const struct hid_device_id koneplus_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPLUS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEXTD) },
        { }
};

MODULE_DEVICE_TABLE(hid, koneplus_devices);

static struct hid_driver koneplus_driver = {
                .name = "koneplus",
                .id_table = koneplus_devices,
                .probe = koneplus_probe,
                .remove = koneplus_remove,
                .raw_event = koneplus_raw_event
};

static int __init koneplus_init(void)
{
        int retval;

        /* class name has to be same as driver name */
        retval = class_register(&koneplus_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&koneplus_driver);
        if (retval)
                class_unregister(&koneplus_class);
        return retval;
}

static void __exit koneplus_exit(void)
{
        hid_unregister_driver(&koneplus_driver);
        class_unregister(&koneplus_class);
}

module_init(koneplus_init);
module_exit(koneplus_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Kone[+]/XTD driver");
MODULE_LICENSE("GPL v2");




























































































   99 
   98 

   10 




   96 

   99 








































    4 

    4 
    4 


    4 











































    4 




    4 


    4 


    4 
    4 


    4 

    4 
    4 





    4 



















































































    6 


    6 
    6 




    6 

    6 
    6 



    2 




    2 

    2 


























    2 



    2 
    6 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
// SPDX-License-Identifier: GPL-2.0
/*
 * Fast batching percpu counters.
 */

#include <linux/percpu_counter.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/debugobjects.h>

#ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD(percpu_counters);
static DEFINE_SPINLOCK(percpu_counters_lock);
#endif

#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER

static const struct debug_obj_descr percpu_counter_debug_descr;

static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
{
        struct percpu_counter *fbc = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                percpu_counter_destroy(fbc);
                debug_object_free(fbc, &percpu_counter_debug_descr);
                return true;
        default:
                return false;
        }
}

static const struct debug_obj_descr percpu_counter_debug_descr = {
        .name                = "percpu_counter",
        .fixup_free        = percpu_counter_fixup_free,
};

static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
{
        debug_object_init(fbc, &percpu_counter_debug_descr);
        debug_object_activate(fbc, &percpu_counter_debug_descr);
}

static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
{
        debug_object_deactivate(fbc, &percpu_counter_debug_descr);
        debug_object_free(fbc, &percpu_counter_debug_descr);
}

#else        /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
{ }
static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
{ }
#endif        /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */

void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
        int cpu;
        unsigned long flags;

        raw_spin_lock_irqsave(&fbc->lock, flags);
        for_each_possible_cpu(cpu) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                *pcount = 0;
        }
        fbc->count = amount;
        raw_spin_unlock_irqrestore(&fbc->lock, flags);
}
EXPORT_SYMBOL(percpu_counter_set);

/*
 * local_irq_save() is needed to make the function irq safe:
 * - The slow path would be ok as protected by an irq-safe spinlock.
 * - this_cpu_add would be ok as it is irq-safe by definition.
 * But:
 * The decision slow path/fast path and the actual update must be atomic, too.
 * Otherwise a call in process context could check the current values and
 * decide that the fast path can be used. If now an interrupt occurs before
 * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters),
 * then the this_cpu_add() that is executed after the interrupt has completed
 * can produce values larger than "batch" or even overflows.
 */
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
        s64 count;
        unsigned long flags;

        local_irq_save(flags);
        count = __this_cpu_read(*fbc->counters) + amount;
        if (abs(count) >= batch) {
                raw_spin_lock(&fbc->lock);
                fbc->count += count;
                __this_cpu_sub(*fbc->counters, count - amount);
                raw_spin_unlock(&fbc->lock);
        } else {
                this_cpu_add(*fbc->counters, amount);
        }
        local_irq_restore(flags);
}
EXPORT_SYMBOL(percpu_counter_add_batch);

/*
 * For percpu_counter with a big batch, the devication of its count could
 * be big, and there is requirement to reduce the deviation, like when the
 * counter's batch could be runtime decreased to get a better accuracy,
 * which can be achieved by running this sync function on each CPU.
 */
void percpu_counter_sync(struct percpu_counter *fbc)
{
        unsigned long flags;
        s64 count;

        raw_spin_lock_irqsave(&fbc->lock, flags);
        count = __this_cpu_read(*fbc->counters);
        fbc->count += count;
        __this_cpu_sub(*fbc->counters, count);
        raw_spin_unlock_irqrestore(&fbc->lock, flags);
}
EXPORT_SYMBOL(percpu_counter_sync);

/*
 * Add up all the per-cpu counts, return the result.  This is a more accurate
 * but much slower version of percpu_counter_read_positive().
 *
 * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums
 * from CPUs that are in the process of being taken offline. Dying cpus have
 * been removed from the online mask, but may not have had the hotplug dead
 * notifier called to fold the percpu count back into the global counter sum.
 * By including dying CPUs in the iteration mask, we avoid this race condition
 * so __percpu_counter_sum() just does the right thing when CPUs are being taken
 * offline.
 */
s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
        s64 ret;
        int cpu;
        unsigned long flags;

        raw_spin_lock_irqsave(&fbc->lock, flags);
        ret = fbc->count;
        for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                ret += *pcount;
        }
        raw_spin_unlock_irqrestore(&fbc->lock, flags);
        return ret;
}
EXPORT_SYMBOL(__percpu_counter_sum);

int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
                               gfp_t gfp, u32 nr_counters,
                               struct lock_class_key *key)
{
        unsigned long flags __maybe_unused;
        size_t counter_size;
        s32 __percpu *counters;
        u32 i;

        counter_size = ALIGN(sizeof(*counters), __alignof__(*counters));
        counters = __alloc_percpu_gfp(nr_counters * counter_size,
                                      __alignof__(*counters), gfp);
        if (!counters) {
                fbc[0].counters = NULL;
                return -ENOMEM;
        }

        for (i = 0; i < nr_counters; i++) {
                raw_spin_lock_init(&fbc[i].lock);
                lockdep_set_class(&fbc[i].lock, key);
#ifdef CONFIG_HOTPLUG_CPU
                INIT_LIST_HEAD(&fbc[i].list);
#endif
                fbc[i].count = amount;
                fbc[i].counters = (void *)counters + (i * counter_size);

                debug_percpu_counter_activate(&fbc[i]);
        }

#ifdef CONFIG_HOTPLUG_CPU
        spin_lock_irqsave(&percpu_counters_lock, flags);
        for (i = 0; i < nr_counters; i++)
                list_add(&fbc[i].list, &percpu_counters);
        spin_unlock_irqrestore(&percpu_counters_lock, flags);
#endif
        return 0;
}
EXPORT_SYMBOL(__percpu_counter_init_many);

void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters)
{
        unsigned long flags __maybe_unused;
        u32 i;

        if (WARN_ON_ONCE(!fbc))
                return;

        if (!fbc[0].counters)
                return;

        for (i = 0; i < nr_counters; i++)
                debug_percpu_counter_deactivate(&fbc[i]);

#ifdef CONFIG_HOTPLUG_CPU
        spin_lock_irqsave(&percpu_counters_lock, flags);
        for (i = 0; i < nr_counters; i++)
                list_del(&fbc[i].list);
        spin_unlock_irqrestore(&percpu_counters_lock, flags);
#endif

        free_percpu(fbc[0].counters);

        for (i = 0; i < nr_counters; i++)
                fbc[i].counters = NULL;
}
EXPORT_SYMBOL(percpu_counter_destroy_many);

int percpu_counter_batch __read_mostly = 32;
EXPORT_SYMBOL(percpu_counter_batch);

static int compute_batch_value(unsigned int cpu)
{
        int nr = num_online_cpus();

        percpu_counter_batch = max(32, nr*2);
        return 0;
}

static int percpu_counter_cpu_dead(unsigned int cpu)
{
#ifdef CONFIG_HOTPLUG_CPU
        struct percpu_counter *fbc;

        compute_batch_value(cpu);

        spin_lock_irq(&percpu_counters_lock);
        list_for_each_entry(fbc, &percpu_counters, list) {
                s32 *pcount;

                raw_spin_lock(&fbc->lock);
                pcount = per_cpu_ptr(fbc->counters, cpu);
                fbc->count += *pcount;
                *pcount = 0;
                raw_spin_unlock(&fbc->lock);
        }
        spin_unlock_irq(&percpu_counters_lock);
#endif
        return 0;
}

/*
 * Compare counter against given value.
 * Return 1 if greater, 0 if equal and -1 if less
 */
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
        s64        count;

        count = percpu_counter_read(fbc);
        /* Check to see if rough count will be sufficient for comparison */
        if (abs(count - rhs) > (batch * num_online_cpus())) {
                if (count > rhs)
                        return 1;
                else
                        return -1;
        }
        /* Need to use precise count */
        count = percpu_counter_sum(fbc);
        if (count > rhs)
                return 1;
        else if (count < rhs)
                return -1;
        else
                return 0;
}
EXPORT_SYMBOL(__percpu_counter_compare);

/*
 * Compare counter, and add amount if total is: less than or equal to limit if
 * amount is positive, or greater than or equal to limit if amount is negative.
 * Return true if amount is added, or false if total would be beyond the limit.
 *
 * Negative limit is allowed, but unusual.
 * When negative amounts (subs) are given to percpu_counter_limited_add(),
 * the limit would most naturally be 0 - but other limits are also allowed.
 *
 * Overflow beyond S64_MAX is not allowed for: counter, limit and amount
 * are all assumed to be sane (far from S64_MIN and S64_MAX).
 */
bool __percpu_counter_limited_add(struct percpu_counter *fbc,
                                  s64 limit, s64 amount, s32 batch)
{
        s64 count;
        s64 unknown;
        unsigned long flags;
        bool good = false;

        if (amount == 0)
                return true;

        local_irq_save(flags);
        unknown = batch * num_online_cpus();
        count = __this_cpu_read(*fbc->counters);

        /* Skip taking the lock when safe */
        if (abs(count + amount) <= batch &&
            ((amount > 0 && fbc->count + unknown <= limit) ||
             (amount < 0 && fbc->count - unknown >= limit))) {
                this_cpu_add(*fbc->counters, amount);
                local_irq_restore(flags);
                return true;
        }

        raw_spin_lock(&fbc->lock);
        count = fbc->count + amount;

        /* Skip percpu_counter_sum() when safe */
        if (amount > 0) {
                if (count - unknown > limit)
                        goto out;
                if (count + unknown <= limit)
                        good = true;
        } else {
                if (count + unknown < limit)
                        goto out;
                if (count - unknown >= limit)
                        good = true;
        }

        if (!good) {
                s32 *pcount;
                int cpu;

                for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
                        pcount = per_cpu_ptr(fbc->counters, cpu);
                        count += *pcount;
                }
                if (amount > 0) {
                        if (count > limit)
                                goto out;
                } else {
                        if (count < limit)
                                goto out;
                }
                good = true;
        }

        count = __this_cpu_read(*fbc->counters);
        fbc->count += count + amount;
        __this_cpu_sub(*fbc->counters, count);
out:
        raw_spin_unlock(&fbc->lock);
        local_irq_restore(flags);
        return good;
}

static int __init percpu_counter_startup(void)
{
        int ret;

        ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online",
                                compute_batch_value, NULL);
        WARN_ON(ret < 0);
        ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD,
                                        "lib/percpu_cnt:dead", NULL,
                                        percpu_counter_cpu_dead);
        WARN_ON(ret < 0);
        return 0;
}
module_init(percpu_counter_startup);



























































































































































    5 

    5 













































    5 





    5 




















































































































    5 
    5 
    5 




























































    4 


    4 
    4 
    4 































    4 




    4 
    4 


    4 

    4 
    4 











































































































































    5 

    5 

    5 


    4 

    5 

    5 
    5 























    4 
    4 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   19 


   19 
   19 





   19 
   19 







   12 


   12 
   12 
    1 




   12 



   12 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Routines for driver control interface
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/threads.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/math64.h>
#include <linux/sched/signal.h>
#include <sound/core.h>
#include <sound/minors.h>
#include <sound/info.h>
#include <sound/control.h>

// Max allocation size for user controls.
static int max_user_ctl_alloc_size = 8 * 1024 * 1024;
module_param_named(max_user_ctl_alloc_size, max_user_ctl_alloc_size, int, 0444);
MODULE_PARM_DESC(max_user_ctl_alloc_size, "Max allocation size for user controls");

#define MAX_CONTROL_COUNT        1028

struct snd_kctl_ioctl {
        struct list_head list;                /* list of all ioctls */
        snd_kctl_ioctl_func_t fioctl;
};

static DECLARE_RWSEM(snd_ioctl_rwsem);
static DECLARE_RWSEM(snd_ctl_layer_rwsem);
static LIST_HEAD(snd_control_ioctls);
#ifdef CONFIG_COMPAT
static LIST_HEAD(snd_control_compat_ioctls);
#endif
static struct snd_ctl_layer_ops *snd_ctl_layer;

static int snd_ctl_remove_locked(struct snd_card *card,
                                 struct snd_kcontrol *kcontrol);

static int snd_ctl_open(struct inode *inode, struct file *file)
{
        struct snd_card *card;
        struct snd_ctl_file *ctl;
        int i, err;

        err = stream_open(inode, file);
        if (err < 0)
                return err;

        card = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_CONTROL);
        if (!card) {
                err = -ENODEV;
                goto __error1;
        }
        err = snd_card_file_add(card, file);
        if (err < 0) {
                err = -ENODEV;
                goto __error1;
        }
        if (!try_module_get(card->module)) {
                err = -EFAULT;
                goto __error2;
        }
        ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
        if (ctl == NULL) {
                err = -ENOMEM;
                goto __error;
        }
        INIT_LIST_HEAD(&ctl->events);
        init_waitqueue_head(&ctl->change_sleep);
        spin_lock_init(&ctl->read_lock);
        ctl->card = card;
        for (i = 0; i < SND_CTL_SUBDEV_ITEMS; i++)
                ctl->preferred_subdevice[i] = -1;
        ctl->pid = get_pid(task_pid(current));
        file->private_data = ctl;
        scoped_guard(write_lock_irqsave, &card->ctl_files_rwlock)
                list_add_tail(&ctl->list, &card->ctl_files);
        snd_card_unref(card);
        return 0;

      __error:
        module_put(card->module);
      __error2:
        snd_card_file_remove(card, file);
      __error1:
        if (card)
                snd_card_unref(card);
              return err;
}

static void snd_ctl_empty_read_queue(struct snd_ctl_file * ctl)
{
        struct snd_kctl_event *cread;

        guard(spinlock_irqsave)(&ctl->read_lock);
        while (!list_empty(&ctl->events)) {
                cread = snd_kctl_event(ctl->events.next);
                list_del(&cread->list);
                kfree(cread);
        }
}

static int snd_ctl_release(struct inode *inode, struct file *file)
{
        struct snd_card *card;
        struct snd_ctl_file *ctl;
        struct snd_kcontrol *control;
        unsigned int idx;

        ctl = file->private_data;
        file->private_data = NULL;
        card = ctl->card;

        scoped_guard(write_lock_irqsave, &card->ctl_files_rwlock)
                list_del(&ctl->list);

        scoped_guard(rwsem_write, &card->controls_rwsem) {
                list_for_each_entry(control, &card->controls, list)
                        for (idx = 0; idx < control->count; idx++)
                                if (control->vd[idx].owner == ctl)
                                        control->vd[idx].owner = NULL;
        }

        snd_fasync_free(ctl->fasync);
        snd_ctl_empty_read_queue(ctl);
        put_pid(ctl->pid);
        kfree(ctl);
        module_put(card->module);
        snd_card_file_remove(card, file);
        return 0;
}

/**
 * snd_ctl_notify - Send notification to user-space for a control change
 * @card: the card to send notification
 * @mask: the event mask, SNDRV_CTL_EVENT_*
 * @id: the ctl element id to send notification
 *
 * This function adds an event record with the given id and mask, appends
 * to the list and wakes up the user-space for notification.  This can be
 * called in the atomic context.
 */
void snd_ctl_notify(struct snd_card *card, unsigned int mask,
                    struct snd_ctl_elem_id *id)
{
        struct snd_ctl_file *ctl;
        struct snd_kctl_event *ev;

        if (snd_BUG_ON(!card || !id))
                return;
        if (card->shutdown)
                return;

        guard(read_lock_irqsave)(&card->ctl_files_rwlock);
#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
        card->mixer_oss_change_count++;
#endif
        list_for_each_entry(ctl, &card->ctl_files, list) {
                if (!ctl->subscribed)
                        continue;
                scoped_guard(spinlock, &ctl->read_lock) {
                        list_for_each_entry(ev, &ctl->events, list) {
                                if (ev->id.numid == id->numid) {
                                        ev->mask |= mask;
                                        goto _found;
                                }
                        }
                        ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
                        if (ev) {
                                ev->id = *id;
                                ev->mask = mask;
                                list_add_tail(&ev->list, &ctl->events);
                        } else {
                                dev_err(card->dev, "No memory available to allocate event\n");
                        }
_found:
                        wake_up(&ctl->change_sleep);
                }
                snd_kill_fasync(ctl->fasync, SIGIO, POLL_IN);
        }
}
EXPORT_SYMBOL(snd_ctl_notify);

/**
 * snd_ctl_notify_one - Send notification to user-space for a control change
 * @card: the card to send notification
 * @mask: the event mask, SNDRV_CTL_EVENT_*
 * @kctl: the pointer with the control instance
 * @ioff: the additional offset to the control index
 *
 * This function calls snd_ctl_notify() and does additional jobs
 * like LED state changes.
 */
void snd_ctl_notify_one(struct snd_card *card, unsigned int mask,
                        struct snd_kcontrol *kctl, unsigned int ioff)
{
        struct snd_ctl_elem_id id = kctl->id;
        struct snd_ctl_layer_ops *lops;

        id.index += ioff;
        id.numid += ioff;
        snd_ctl_notify(card, mask, &id);
        guard(rwsem_read)(&snd_ctl_layer_rwsem);
        for (lops = snd_ctl_layer; lops; lops = lops->next)
                lops->lnotify(card, mask, kctl, ioff);
}
EXPORT_SYMBOL(snd_ctl_notify_one);

/**
 * snd_ctl_new - create a new control instance with some elements
 * @kctl: the pointer to store new control instance
 * @count: the number of elements in this control
 * @access: the default access flags for elements in this control
 * @file: given when locking these elements
 *
 * Allocates a memory object for a new control instance. The instance has
 * elements as many as the given number (@count). Each element has given
 * access permissions (@access). Each element is locked when @file is given.
 *
 * Return: 0 on success, error code on failure
 */
static int snd_ctl_new(struct snd_kcontrol **kctl, unsigned int count,
                       unsigned int access, struct snd_ctl_file *file)
{
        unsigned int idx;

        if (count == 0 || count > MAX_CONTROL_COUNT)
                return -EINVAL;

        *kctl = kzalloc(struct_size(*kctl, vd, count), GFP_KERNEL);
        if (!*kctl)
                return -ENOMEM;

        for (idx = 0; idx < count; idx++) {
                (*kctl)->vd[idx].access = access;
                (*kctl)->vd[idx].owner = file;
        }
        (*kctl)->count = count;

        return 0;
}

/**
 * snd_ctl_new1 - create a control instance from the template
 * @ncontrol: the initialization record
 * @private_data: the private data to set
 *
 * Allocates a new struct snd_kcontrol instance and initialize from the given
 * template.  When the access field of ncontrol is 0, it's assumed as
 * READWRITE access. When the count field is 0, it's assumes as one.
 *
 * Return: The pointer of the newly generated instance, or %NULL on failure.
 */
struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol,
                                  void *private_data)
{
        struct snd_kcontrol *kctl;
        unsigned int count;
        unsigned int access;
        int err;

        if (snd_BUG_ON(!ncontrol || !ncontrol->info))
                return NULL;

        count = ncontrol->count;
        if (count == 0)
                count = 1;

        access = ncontrol->access;
        if (access == 0)
                access = SNDRV_CTL_ELEM_ACCESS_READWRITE;
        access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE |
                   SNDRV_CTL_ELEM_ACCESS_VOLATILE |
                   SNDRV_CTL_ELEM_ACCESS_INACTIVE |
                   SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE |
                   SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND |
                   SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK |
                   SNDRV_CTL_ELEM_ACCESS_LED_MASK |
                   SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK);

        err = snd_ctl_new(&kctl, count, access, NULL);
        if (err < 0)
                return NULL;

        /* The 'numid' member is decided when calling snd_ctl_add(). */
        kctl->id.iface = ncontrol->iface;
        kctl->id.device = ncontrol->device;
        kctl->id.subdevice = ncontrol->subdevice;
        if (ncontrol->name) {
                strscpy(kctl->id.name, ncontrol->name, sizeof(kctl->id.name));
                if (strcmp(ncontrol->name, kctl->id.name) != 0)
                        pr_warn("ALSA: Control name '%s' truncated to '%s'\n",
                                ncontrol->name, kctl->id.name);
        }
        kctl->id.index = ncontrol->index;

        kctl->info = ncontrol->info;
        kctl->get = ncontrol->get;
        kctl->put = ncontrol->put;
        kctl->tlv.p = ncontrol->tlv.p;

        kctl->private_value = ncontrol->private_value;
        kctl->private_data = private_data;

        return kctl;
}
EXPORT_SYMBOL(snd_ctl_new1);

/**
 * snd_ctl_free_one - release the control instance
 * @kcontrol: the control instance
 *
 * Releases the control instance created via snd_ctl_new()
 * or snd_ctl_new1().
 * Don't call this after the control was added to the card.
 */
void snd_ctl_free_one(struct snd_kcontrol *kcontrol)
{
        if (kcontrol) {
                if (kcontrol->private_free)
                        kcontrol->private_free(kcontrol);
                kfree(kcontrol);
        }
}
EXPORT_SYMBOL(snd_ctl_free_one);

static bool snd_ctl_remove_numid_conflict(struct snd_card *card,
                                          unsigned int count)
{
        struct snd_kcontrol *kctl;

        /* Make sure that the ids assigned to the control do not wrap around */
        if (card->last_numid >= UINT_MAX - count)
                card->last_numid = 0;

        list_for_each_entry(kctl, &card->controls, list) {
                if (kctl->id.numid < card->last_numid + 1 + count &&
                    kctl->id.numid + kctl->count > card->last_numid + 1) {
                            card->last_numid = kctl->id.numid + kctl->count - 1;
                        return true;
                }
        }
        return false;
}

static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
{
        unsigned int iter = 100000;

        while (snd_ctl_remove_numid_conflict(card, count)) {
                if (--iter == 0) {
                        /* this situation is very unlikely */
                        dev_err(card->dev, "unable to allocate new control numid\n");
                        return -ENOMEM;
                }
        }
        return 0;
}

/* check whether the given id is contained in the given kctl */
static bool elem_id_matches(const struct snd_kcontrol *kctl,
                            const struct snd_ctl_elem_id *id)
{
        return kctl->id.iface == id->iface &&
                kctl->id.device == id->device &&
                kctl->id.subdevice == id->subdevice &&
                !strncmp(kctl->id.name, id->name, sizeof(kctl->id.name)) &&
                kctl->id.index <= id->index &&
                kctl->id.index + kctl->count > id->index;
}

#ifdef CONFIG_SND_CTL_FAST_LOOKUP
/* Compute a hash key for the corresponding ctl id
 * It's for the name lookup, hence the numid is excluded.
 * The hash key is bound in LONG_MAX to be used for Xarray key.
 */
#define MULTIPLIER        37
static unsigned long get_ctl_id_hash(const struct snd_ctl_elem_id *id)
{
        int i;
        unsigned long h;

        h = id->iface;
        h = MULTIPLIER * h + id->device;
        h = MULTIPLIER * h + id->subdevice;
        for (i = 0; i < SNDRV_CTL_ELEM_ID_NAME_MAXLEN && id->name[i]; i++)
                h = MULTIPLIER * h + id->name[i];
        h = MULTIPLIER * h + id->index;
        h &= LONG_MAX;
        return h;
}

/* add hash entries to numid and ctl xarray tables */
static void add_hash_entries(struct snd_card *card,
                             struct snd_kcontrol *kcontrol)
{
        struct snd_ctl_elem_id id = kcontrol->id;
        int i;

        xa_store_range(&card->ctl_numids, kcontrol->id.numid,
                       kcontrol->id.numid + kcontrol->count - 1,
                       kcontrol, GFP_KERNEL);

        for (i = 0; i < kcontrol->count; i++) {
                id.index = kcontrol->id.index + i;
                if (xa_insert(&card->ctl_hash, get_ctl_id_hash(&id),
                              kcontrol, GFP_KERNEL)) {
                        /* skip hash for this entry, noting we had collision */
                        card->ctl_hash_collision = true;
                        dev_dbg(card->dev, "ctl_hash collision %d:%s:%d\n",
                                id.iface, id.name, id.index);
                }
        }
}

/* remove hash entries that have been added */
static void remove_hash_entries(struct snd_card *card,
                                struct snd_kcontrol *kcontrol)
{
        struct snd_ctl_elem_id id = kcontrol->id;
        struct snd_kcontrol *matched;
        unsigned long h;
        int i;

        for (i = 0; i < kcontrol->count; i++) {
                xa_erase(&card->ctl_numids, id.numid);
                h = get_ctl_id_hash(&id);
                matched = xa_load(&card->ctl_hash, h);
                if (matched && (matched == kcontrol ||
                                elem_id_matches(matched, &id)))
                        xa_erase(&card->ctl_hash, h);
                id.index++;
                id.numid++;
        }
}
#else /* CONFIG_SND_CTL_FAST_LOOKUP */
static inline void add_hash_entries(struct snd_card *card,
                                    struct snd_kcontrol *kcontrol)
{
}
static inline void remove_hash_entries(struct snd_card *card,
                                       struct snd_kcontrol *kcontrol)
{
}
#endif /* CONFIG_SND_CTL_FAST_LOOKUP */

enum snd_ctl_add_mode {
        CTL_ADD_EXCLUSIVE, CTL_REPLACE, CTL_ADD_ON_REPLACE,
};

/* add/replace a new kcontrol object; call with card->controls_rwsem locked */
static int __snd_ctl_add_replace(struct snd_card *card,
                                 struct snd_kcontrol *kcontrol,
                                 enum snd_ctl_add_mode mode)
{
        struct snd_ctl_elem_id id;
        unsigned int idx;
        struct snd_kcontrol *old;
        int err;

        lockdep_assert_held_write(&card->controls_rwsem);

        id = kcontrol->id;
        if (id.index > UINT_MAX - kcontrol->count)
                return -EINVAL;

        old = snd_ctl_find_id_locked(card, &id);
        if (!old) {
                if (mode == CTL_REPLACE)
                        return -EINVAL;
        } else {
                if (mode == CTL_ADD_EXCLUSIVE) {
                        dev_err(card->dev,
                                "control %i:%i:%i:%s:%i is already present\n",
                                id.iface, id.device, id.subdevice, id.name,
                                id.index);
                        return -EBUSY;
                }

                err = snd_ctl_remove_locked(card, old);
                if (err < 0)
                        return err;
        }

        if (snd_ctl_find_hole(card, kcontrol->count) < 0)
                return -ENOMEM;

        list_add_tail(&kcontrol->list, &card->controls);
        card->controls_count += kcontrol->count;
        kcontrol->id.numid = card->last_numid + 1;
        card->last_numid += kcontrol->count;

        add_hash_entries(card, kcontrol);

        for (idx = 0; idx < kcontrol->count; idx++)
                snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_ADD, kcontrol, idx);

        return 0;
}

static int snd_ctl_add_replace(struct snd_card *card,
                               struct snd_kcontrol *kcontrol,
                               enum snd_ctl_add_mode mode)
{
        int err = -EINVAL;

        if (! kcontrol)
                return err;
        if (snd_BUG_ON(!card || !kcontrol->info))
                goto error;

        scoped_guard(rwsem_write, &card->controls_rwsem)
                err = __snd_ctl_add_replace(card, kcontrol, mode);

        if (err < 0)
                goto error;
        return 0;

 error:
        snd_ctl_free_one(kcontrol);
        return err;
}

/**
 * snd_ctl_add - add the control instance to the card
 * @card: the card instance
 * @kcontrol: the control instance to add
 *
 * Adds the control instance created via snd_ctl_new() or
 * snd_ctl_new1() to the given card. Assigns also an unique
 * numid used for fast search.
 *
 * It frees automatically the control which cannot be added.
 *
 * Return: Zero if successful, or a negative error code on failure.
 *
 */
int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
{
        return snd_ctl_add_replace(card, kcontrol, CTL_ADD_EXCLUSIVE);
}
EXPORT_SYMBOL(snd_ctl_add);

/**
 * snd_ctl_replace - replace the control instance of the card
 * @card: the card instance
 * @kcontrol: the control instance to replace
 * @add_on_replace: add the control if not already added
 *
 * Replaces the given control.  If the given control does not exist
 * and the add_on_replace flag is set, the control is added.  If the
 * control exists, it is destroyed first.
 *
 * It frees automatically the control which cannot be added or replaced.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_ctl_replace(struct snd_card *card, struct snd_kcontrol *kcontrol,
                    bool add_on_replace)
{
        return snd_ctl_add_replace(card, kcontrol,
                                   add_on_replace ? CTL_ADD_ON_REPLACE : CTL_REPLACE);
}
EXPORT_SYMBOL(snd_ctl_replace);

static int __snd_ctl_remove(struct snd_card *card,
                            struct snd_kcontrol *kcontrol,
                            bool remove_hash)
{
        unsigned int idx;

        lockdep_assert_held_write(&card->controls_rwsem);

        if (snd_BUG_ON(!card || !kcontrol))
                return -EINVAL;
        list_del(&kcontrol->list);

        if (remove_hash)
                remove_hash_entries(card, kcontrol);

        card->controls_count -= kcontrol->count;
        for (idx = 0; idx < kcontrol->count; idx++)
                snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_REMOVE, kcontrol, idx);
        snd_ctl_free_one(kcontrol);
        return 0;
}

static inline int snd_ctl_remove_locked(struct snd_card *card,
                                        struct snd_kcontrol *kcontrol)
{
        return __snd_ctl_remove(card, kcontrol, true);
}

/**
 * snd_ctl_remove - remove the control from the card and release it
 * @card: the card instance
 * @kcontrol: the control instance to remove
 *
 * Removes the control from the card and then releases the instance.
 * You don't need to call snd_ctl_free_one().
 *
 * Return: 0 if successful, or a negative error code on failure.
 *
 * Note that this function takes card->controls_rwsem lock internally.
 */
int snd_ctl_remove(struct snd_card *card, struct snd_kcontrol *kcontrol)
{
        guard(rwsem_write)(&card->controls_rwsem);
        return snd_ctl_remove_locked(card, kcontrol);
}
EXPORT_SYMBOL(snd_ctl_remove);

/**
 * snd_ctl_remove_id - remove the control of the given id and release it
 * @card: the card instance
 * @id: the control id to remove
 *
 * Finds the control instance with the given id, removes it from the
 * card list and releases it.
 *
 * Return: 0 if successful, or a negative error code on failure.
 */
int snd_ctl_remove_id(struct snd_card *card, struct snd_ctl_elem_id *id)
{
        struct snd_kcontrol *kctl;

        guard(rwsem_write)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, id);
        if (kctl == NULL)
                return -ENOENT;
        return snd_ctl_remove_locked(card, kctl);
}
EXPORT_SYMBOL(snd_ctl_remove_id);

/**
 * snd_ctl_remove_user_ctl - remove and release the unlocked user control
 * @file: active control handle
 * @id: the control id to remove
 *
 * Finds the control instance with the given id, removes it from the
 * card list and releases it.
 *
 * Return: 0 if successful, or a negative error code on failure.
 */
static int snd_ctl_remove_user_ctl(struct snd_ctl_file * file,
                                   struct snd_ctl_elem_id *id)
{
        struct snd_card *card = file->card;
        struct snd_kcontrol *kctl;
        int idx;

        guard(rwsem_write)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, id);
        if (kctl == NULL)
                return -ENOENT;
        if (!(kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_USER))
                return -EINVAL;
        for (idx = 0; idx < kctl->count; idx++)
                if (kctl->vd[idx].owner != NULL && kctl->vd[idx].owner != file)
                        return -EBUSY;
        return snd_ctl_remove_locked(card, kctl);
}

/**
 * snd_ctl_activate_id - activate/inactivate the control of the given id
 * @card: the card instance
 * @id: the control id to activate/inactivate
 * @active: non-zero to activate
 *
 * Finds the control instance with the given id, and activate or
 * inactivate the control together with notification, if changed.
 * The given ID data is filled with full information.
 *
 * Return: 0 if unchanged, 1 if changed, or a negative error code on failure.
 */
int snd_ctl_activate_id(struct snd_card *card, struct snd_ctl_elem_id *id,
                        int active)
{
        struct snd_kcontrol *kctl;
        struct snd_kcontrol_volatile *vd;
        unsigned int index_offset;
        int ret;

        down_write(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, id);
        if (kctl == NULL) {
                ret = -ENOENT;
                goto unlock;
        }
        index_offset = snd_ctl_get_ioff(kctl, id);
        vd = &kctl->vd[index_offset];
        ret = 0;
        if (active) {
                if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE))
                        goto unlock;
                vd->access &= ~SNDRV_CTL_ELEM_ACCESS_INACTIVE;
        } else {
                if (vd->access & SNDRV_CTL_ELEM_ACCESS_INACTIVE)
                        goto unlock;
                vd->access |= SNDRV_CTL_ELEM_ACCESS_INACTIVE;
        }
        snd_ctl_build_ioff(id, kctl, index_offset);
        downgrade_write(&card->controls_rwsem);
        snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_INFO, kctl, index_offset);
        up_read(&card->controls_rwsem);
        return 1;

 unlock:
        up_write(&card->controls_rwsem);
        return ret;
}
EXPORT_SYMBOL_GPL(snd_ctl_activate_id);

/**
 * snd_ctl_rename_id - replace the id of a control on the card
 * @card: the card instance
 * @src_id: the old id
 * @dst_id: the new id
 *
 * Finds the control with the old id from the card, and replaces the
 * id with the new one.
 *
 * The function tries to keep the already assigned numid while replacing
 * the rest.
 *
 * Note that this function should be used only in the card initialization
 * phase.  Calling after the card instantiation may cause issues with
 * user-space expecting persistent numids.
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_ctl_rename_id(struct snd_card *card, struct snd_ctl_elem_id *src_id,
                      struct snd_ctl_elem_id *dst_id)
{
        struct snd_kcontrol *kctl;
        int saved_numid;

        guard(rwsem_write)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, src_id);
        if (kctl == NULL)
                return -ENOENT;
        saved_numid = kctl->id.numid;
        remove_hash_entries(card, kctl);
        kctl->id = *dst_id;
        kctl->id.numid = saved_numid;
        add_hash_entries(card, kctl);
        return 0;
}
EXPORT_SYMBOL(snd_ctl_rename_id);

/**
 * snd_ctl_rename - rename the control on the card
 * @card: the card instance
 * @kctl: the control to rename
 * @name: the new name
 *
 * Renames the specified control on the card to the new name.
 *
 * Note that this function takes card->controls_rwsem lock internally.
 */
void snd_ctl_rename(struct snd_card *card, struct snd_kcontrol *kctl,
                    const char *name)
{
        guard(rwsem_write)(&card->controls_rwsem);
        remove_hash_entries(card, kctl);

        if (strscpy(kctl->id.name, name, sizeof(kctl->id.name)) < 0)
                pr_warn("ALSA: Renamed control new name '%s' truncated to '%s'\n",
                        name, kctl->id.name);

        add_hash_entries(card, kctl);
}
EXPORT_SYMBOL(snd_ctl_rename);

#ifndef CONFIG_SND_CTL_FAST_LOOKUP
static struct snd_kcontrol *
snd_ctl_find_numid_slow(struct snd_card *card, unsigned int numid)
{
        struct snd_kcontrol *kctl;

        list_for_each_entry(kctl, &card->controls, list) {
                if (kctl->id.numid <= numid && kctl->id.numid + kctl->count > numid)
                        return kctl;
        }
        return NULL;
}
#endif /* !CONFIG_SND_CTL_FAST_LOOKUP */

/**
 * snd_ctl_find_numid_locked - find the control instance with the given number-id
 * @card: the card instance
 * @numid: the number-id to search
 *
 * Finds the control instance with the given number-id from the card.
 *
 * The caller must down card->controls_rwsem before calling this function
 * (if the race condition can happen).
 *
 * Return: The pointer of the instance if found, or %NULL if not.
 */
struct snd_kcontrol *
snd_ctl_find_numid_locked(struct snd_card *card, unsigned int numid)
{
        if (snd_BUG_ON(!card || !numid))
                return NULL;
        lockdep_assert_held(&card->controls_rwsem);
#ifdef CONFIG_SND_CTL_FAST_LOOKUP
        return xa_load(&card->ctl_numids, numid);
#else
        return snd_ctl_find_numid_slow(card, numid);
#endif
}
EXPORT_SYMBOL(snd_ctl_find_numid_locked);

/**
 * snd_ctl_find_numid - find the control instance with the given number-id
 * @card: the card instance
 * @numid: the number-id to search
 *
 * Finds the control instance with the given number-id from the card.
 *
 * Return: The pointer of the instance if found, or %NULL if not.
 *
 * Note that this function takes card->controls_rwsem lock internally.
 */
struct snd_kcontrol *snd_ctl_find_numid(struct snd_card *card,
                                        unsigned int numid)
{
        guard(rwsem_read)(&card->controls_rwsem);
        return snd_ctl_find_numid_locked(card, numid);
}
EXPORT_SYMBOL(snd_ctl_find_numid);

/**
 * snd_ctl_find_id_locked - find the control instance with the given id
 * @card: the card instance
 * @id: the id to search
 *
 * Finds the control instance with the given id from the card.
 *
 * The caller must down card->controls_rwsem before calling this function
 * (if the race condition can happen).
 *
 * Return: The pointer of the instance if found, or %NULL if not.
 */
struct snd_kcontrol *snd_ctl_find_id_locked(struct snd_card *card,
                                            const struct snd_ctl_elem_id *id)
{
        struct snd_kcontrol *kctl;

        if (snd_BUG_ON(!card || !id))
                return NULL;
        lockdep_assert_held(&card->controls_rwsem);
        if (id->numid != 0)
                return snd_ctl_find_numid_locked(card, id->numid);
#ifdef CONFIG_SND_CTL_FAST_LOOKUP
        kctl = xa_load(&card->ctl_hash, get_ctl_id_hash(id));
        if (kctl && elem_id_matches(kctl, id))
                return kctl;
        if (!card->ctl_hash_collision)
                return NULL; /* we can rely on only hash table */
#endif
        /* no matching in hash table - try all as the last resort */
        list_for_each_entry(kctl, &card->controls, list)
                if (elem_id_matches(kctl, id))
                        return kctl;

        return NULL;
}
EXPORT_SYMBOL(snd_ctl_find_id_locked);

/**
 * snd_ctl_find_id - find the control instance with the given id
 * @card: the card instance
 * @id: the id to search
 *
 * Finds the control instance with the given id from the card.
 *
 * Return: The pointer of the instance if found, or %NULL if not.
 *
 * Note that this function takes card->controls_rwsem lock internally.
 */
struct snd_kcontrol *snd_ctl_find_id(struct snd_card *card,
                                     const struct snd_ctl_elem_id *id)
{
        guard(rwsem_read)(&card->controls_rwsem);
        return snd_ctl_find_id_locked(card, id);
}
EXPORT_SYMBOL(snd_ctl_find_id);

static int snd_ctl_card_info(struct snd_card *card, struct snd_ctl_file * ctl,
                             unsigned int cmd, void __user *arg)
{
        struct snd_ctl_card_info *info __free(kfree) = NULL;

        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (! info)
                return -ENOMEM;
        scoped_guard(rwsem_read, &snd_ioctl_rwsem) {
                info->card = card->number;
                strscpy(info->id, card->id, sizeof(info->id));
                strscpy(info->driver, card->driver, sizeof(info->driver));
                strscpy(info->name, card->shortname, sizeof(info->name));
                strscpy(info->longname, card->longname, sizeof(info->longname));
                strscpy(info->mixername, card->mixername, sizeof(info->mixername));
                strscpy(info->components, card->components, sizeof(info->components));
        }
        if (copy_to_user(arg, info, sizeof(struct snd_ctl_card_info)))
                return -EFAULT;
        return 0;
}

static int snd_ctl_elem_list(struct snd_card *card,
                             struct snd_ctl_elem_list *list)
{
        struct snd_kcontrol *kctl;
        struct snd_ctl_elem_id id;
        unsigned int offset, space, jidx;

        offset = list->offset;
        space = list->space;

        guard(rwsem_read)(&card->controls_rwsem);
        list->count = card->controls_count;
        list->used = 0;
        if (!space)
                return 0;
        list_for_each_entry(kctl, &card->controls, list) {
                if (offset >= kctl->count) {
                        offset -= kctl->count;
                        continue;
                }
                for (jidx = offset; jidx < kctl->count; jidx++) {
                        snd_ctl_build_ioff(&id, kctl, jidx);
                        if (copy_to_user(list->pids + list->used, &id, sizeof(id)))
                                return -EFAULT;
                        list->used++;
                        if (!--space)
                                return 0;
                }
                offset = 0;
        }
        return 0;
}

static int snd_ctl_elem_list_user(struct snd_card *card,
                                  struct snd_ctl_elem_list __user *_list)
{
        struct snd_ctl_elem_list list;
        int err;

        if (copy_from_user(&list, _list, sizeof(list)))
                return -EFAULT;
        err = snd_ctl_elem_list(card, &list);
        if (err)
                return err;
        if (copy_to_user(_list, &list, sizeof(list)))
                return -EFAULT;

        return 0;
}

/* Check whether the given kctl info is valid */
static int snd_ctl_check_elem_info(struct snd_card *card,
                                   const struct snd_ctl_elem_info *info)
{
        static const unsigned int max_value_counts[] = {
                [SNDRV_CTL_ELEM_TYPE_BOOLEAN]        = 128,
                [SNDRV_CTL_ELEM_TYPE_INTEGER]        = 128,
                [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = 128,
                [SNDRV_CTL_ELEM_TYPE_BYTES]        = 512,
                [SNDRV_CTL_ELEM_TYPE_IEC958]        = 1,
                [SNDRV_CTL_ELEM_TYPE_INTEGER64] = 64,
        };

        if (info->type < SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
            info->type > SNDRV_CTL_ELEM_TYPE_INTEGER64) {
                if (card)
                        dev_err(card->dev,
                                "control %i:%i:%i:%s:%i: invalid type %d\n",
                                info->id.iface, info->id.device,
                                info->id.subdevice, info->id.name,
                                info->id.index, info->type);
                return -EINVAL;
        }
        if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED &&
            info->value.enumerated.items == 0) {
                if (card)
                        dev_err(card->dev,
                                "control %i:%i:%i:%s:%i: zero enum items\n",
                                info->id.iface, info->id.device,
                                info->id.subdevice, info->id.name,
                                info->id.index);
                return -EINVAL;
        }
        if (info->count > max_value_counts[info->type]) {
                if (card)
                        dev_err(card->dev,
                                "control %i:%i:%i:%s:%i: invalid count %d\n",
                                info->id.iface, info->id.device,
                                info->id.subdevice, info->id.name,
                                info->id.index, info->count);
                return -EINVAL;
        }

        return 0;
}

/* The capacity of struct snd_ctl_elem_value.value.*/
static const unsigned int value_sizes[] = {
        [SNDRV_CTL_ELEM_TYPE_BOOLEAN]        = sizeof(long),
        [SNDRV_CTL_ELEM_TYPE_INTEGER]        = sizeof(long),
        [SNDRV_CTL_ELEM_TYPE_ENUMERATED] = sizeof(unsigned int),
        [SNDRV_CTL_ELEM_TYPE_BYTES]        = sizeof(unsigned char),
        [SNDRV_CTL_ELEM_TYPE_IEC958]        = sizeof(struct snd_aes_iec958),
        [SNDRV_CTL_ELEM_TYPE_INTEGER64] = sizeof(long long),
};

/* fill the remaining snd_ctl_elem_value data with the given pattern */
static void fill_remaining_elem_value(struct snd_ctl_elem_value *control,
                                      struct snd_ctl_elem_info *info,
                                      u32 pattern)
{
        size_t offset = value_sizes[info->type] * info->count;

        offset = DIV_ROUND_UP(offset, sizeof(u32));
        memset32((u32 *)control->value.bytes.data + offset, pattern,
                 sizeof(control->value) / sizeof(u32) - offset);
}

/* check whether the given integer ctl value is valid */
static int sanity_check_int_value(struct snd_card *card,
                                  const struct snd_ctl_elem_value *control,
                                  const struct snd_ctl_elem_info *info,
                                  int i, bool print_error)
{
        long long lval, lmin, lmax, lstep;
        u64 rem;

        switch (info->type) {
        default:
        case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
                lval = control->value.integer.value[i];
                lmin = 0;
                lmax = 1;
                lstep = 0;
                break;
        case SNDRV_CTL_ELEM_TYPE_INTEGER:
                lval = control->value.integer.value[i];
                lmin = info->value.integer.min;
                lmax = info->value.integer.max;
                lstep = info->value.integer.step;
                break;
        case SNDRV_CTL_ELEM_TYPE_INTEGER64:
                lval = control->value.integer64.value[i];
                lmin = info->value.integer64.min;
                lmax = info->value.integer64.max;
                lstep = info->value.integer64.step;
                break;
        case SNDRV_CTL_ELEM_TYPE_ENUMERATED:
                lval = control->value.enumerated.item[i];
                lmin = 0;
                lmax = info->value.enumerated.items - 1;
                lstep = 0;
                break;
        }

        if (lval < lmin || lval > lmax) {
                if (print_error)
                        dev_err(card->dev,
                                "control %i:%i:%i:%s:%i: value out of range %lld (%lld/%lld) at count %i\n",
                                control->id.iface, control->id.device,
                                control->id.subdevice, control->id.name,
                                control->id.index, lval, lmin, lmax, i);
                return -EINVAL;
        }
        if (lstep) {
                div64_u64_rem(lval, lstep, &rem);
                if (rem) {
                        if (print_error)
                                dev_err(card->dev,
                                        "control %i:%i:%i:%s:%i: unaligned value %lld (step %lld) at count %i\n",
                                        control->id.iface, control->id.device,
                                        control->id.subdevice, control->id.name,
                                        control->id.index, lval, lstep, i);
                        return -EINVAL;
                }
        }

        return 0;
}

/* check whether the all input values are valid for the given elem value */
static int sanity_check_input_values(struct snd_card *card,
                                     const struct snd_ctl_elem_value *control,
                                     const struct snd_ctl_elem_info *info,
                                     bool print_error)
{
        int i, ret;

        switch (info->type) {
        case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
        case SNDRV_CTL_ELEM_TYPE_INTEGER:
        case SNDRV_CTL_ELEM_TYPE_INTEGER64:
        case SNDRV_CTL_ELEM_TYPE_ENUMERATED:
                for (i = 0; i < info->count; i++) {
                        ret = sanity_check_int_value(card, control, info, i,
                                                     print_error);
                        if (ret < 0)
                                return ret;
                }
                break;
        default:
                break;
        }

        return 0;
}

/* perform sanity checks to the given snd_ctl_elem_value object */
static int sanity_check_elem_value(struct snd_card *card,
                                   const struct snd_ctl_elem_value *control,
                                   const struct snd_ctl_elem_info *info,
                                   u32 pattern)
{
        size_t offset;
        int ret;
        u32 *p;

        ret = sanity_check_input_values(card, control, info, true);
        if (ret < 0)
                return ret;

        /* check whether the remaining area kept untouched */
        offset = value_sizes[info->type] * info->count;
        offset = DIV_ROUND_UP(offset, sizeof(u32));
        p = (u32 *)control->value.bytes.data + offset;
        for (; offset < sizeof(control->value) / sizeof(u32); offset++, p++) {
                if (*p != pattern) {
                        ret = -EINVAL;
                        break;
                }
                *p = 0; /* clear the checked area */
        }

        return ret;
}

static int __snd_ctl_elem_info(struct snd_card *card,
                               struct snd_kcontrol *kctl,
                               struct snd_ctl_elem_info *info,
                               struct snd_ctl_file *ctl)
{
        struct snd_kcontrol_volatile *vd;
        unsigned int index_offset;
        int result;

#ifdef CONFIG_SND_DEBUG
        info->access = 0;
#endif
        result = snd_power_ref_and_wait(card);
        if (!result)
                result = kctl->info(kctl, info);
        snd_power_unref(card);
        if (result >= 0) {
                snd_BUG_ON(info->access);
                index_offset = snd_ctl_get_ioff(kctl, &info->id);
                vd = &kctl->vd[index_offset];
                snd_ctl_build_ioff(&info->id, kctl, index_offset);
                info->access = vd->access;
                if (vd->owner) {
                        info->access |= SNDRV_CTL_ELEM_ACCESS_LOCK;
                        if (vd->owner == ctl)
                                info->access |= SNDRV_CTL_ELEM_ACCESS_OWNER;
                        info->owner = pid_vnr(vd->owner->pid);
                } else {
                        info->owner = -1;
                }
                if (!snd_ctl_skip_validation(info) &&
                    snd_ctl_check_elem_info(card, info) < 0)
                        result = -EINVAL;
        }
        return result;
}

static int snd_ctl_elem_info(struct snd_ctl_file *ctl,
                             struct snd_ctl_elem_info *info)
{
        struct snd_card *card = ctl->card;
        struct snd_kcontrol *kctl;

        guard(rwsem_read)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, &info->id);
        if (!kctl)
                return -ENOENT;
        return __snd_ctl_elem_info(card, kctl, info, ctl);
}

static int snd_ctl_elem_info_user(struct snd_ctl_file *ctl,
                                  struct snd_ctl_elem_info __user *_info)
{
        struct snd_ctl_elem_info info;
        int result;

        if (copy_from_user(&info, _info, sizeof(info)))
                return -EFAULT;
        result = snd_ctl_elem_info(ctl, &info);
        if (result < 0)
                return result;
        /* drop internal access flags */
        info.access &= ~(SNDRV_CTL_ELEM_ACCESS_SKIP_CHECK|
                         SNDRV_CTL_ELEM_ACCESS_LED_MASK);
        if (copy_to_user(_info, &info, sizeof(info)))
                return -EFAULT;
        return result;
}

static int snd_ctl_elem_read(struct snd_card *card,
                             struct snd_ctl_elem_value *control)
{
        struct snd_kcontrol *kctl;
        struct snd_kcontrol_volatile *vd;
        unsigned int index_offset;
        struct snd_ctl_elem_info info;
        const u32 pattern = 0xdeadbeef;
        int ret;

        guard(rwsem_read)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, &control->id);
        if (!kctl)
                return -ENOENT;

        index_offset = snd_ctl_get_ioff(kctl, &control->id);
        vd = &kctl->vd[index_offset];
        if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || !kctl->get)
                return -EPERM;

        snd_ctl_build_ioff(&control->id, kctl, index_offset);

#ifdef CONFIG_SND_CTL_DEBUG
        /* info is needed only for validation */
        memset(&info, 0, sizeof(info));
        info.id = control->id;
        ret = __snd_ctl_elem_info(card, kctl, &info, NULL);
        if (ret < 0)
                return ret;
#endif

        if (!snd_ctl_skip_validation(&info))
                fill_remaining_elem_value(control, &info, pattern);
        ret = snd_power_ref_and_wait(card);
        if (!ret)
                ret = kctl->get(kctl, control);
        snd_power_unref(card);
        if (ret < 0)
                return ret;
        if (!snd_ctl_skip_validation(&info) &&
            sanity_check_elem_value(card, control, &info, pattern) < 0) {
                dev_err(card->dev,
                        "control %i:%i:%i:%s:%i: access overflow\n",
                        control->id.iface, control->id.device,
                        control->id.subdevice, control->id.name,
                        control->id.index);
                return -EINVAL;
        }
        return 0;
}

static int snd_ctl_elem_read_user(struct snd_card *card,
                                  struct snd_ctl_elem_value __user *_control)
{
        struct snd_ctl_elem_value *control __free(kfree) = NULL;
        int result;

        control = memdup_user(_control, sizeof(*control));
        if (IS_ERR(control))
                return PTR_ERR(no_free_ptr(control));

        result = snd_ctl_elem_read(card, control);
        if (result < 0)
                return result;

        if (copy_to_user(_control, control, sizeof(*control)))
                return -EFAULT;
        return result;
}

static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file,
                              struct snd_ctl_elem_value *control)
{
        struct snd_kcontrol *kctl;
        struct snd_kcontrol_volatile *vd;
        unsigned int index_offset;
        int result;

        down_write(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, &control->id);
        if (kctl == NULL) {
                up_write(&card->controls_rwsem);
                return -ENOENT;
        }

        index_offset = snd_ctl_get_ioff(kctl, &control->id);
        vd = &kctl->vd[index_offset];
        if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) || kctl->put == NULL ||
            (file && vd->owner && vd->owner != file)) {
                up_write(&card->controls_rwsem);
                return -EPERM;
        }

        snd_ctl_build_ioff(&control->id, kctl, index_offset);
        result = snd_power_ref_and_wait(card);
        /* validate input values */
        if (IS_ENABLED(CONFIG_SND_CTL_INPUT_VALIDATION) && !result) {
                struct snd_ctl_elem_info info;

                memset(&info, 0, sizeof(info));
                info.id = control->id;
                result = __snd_ctl_elem_info(card, kctl, &info, NULL);
                if (!result)
                        result = sanity_check_input_values(card, control, &info,
                                                           false);
        }
        if (!result)
                result = kctl->put(kctl, control);
        snd_power_unref(card);
        if (result < 0) {
                up_write(&card->controls_rwsem);
                return result;
        }

        if (result > 0) {
                downgrade_write(&card->controls_rwsem);
                snd_ctl_notify_one(card, SNDRV_CTL_EVENT_MASK_VALUE, kctl, index_offset);
                up_read(&card->controls_rwsem);
        } else {
                up_write(&card->controls_rwsem);
        }

        return 0;
}

static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
                                   struct snd_ctl_elem_value __user *_control)
{
        struct snd_ctl_elem_value *control __free(kfree) = NULL;
        struct snd_card *card;
        int result;

        control = memdup_user(_control, sizeof(*control));
        if (IS_ERR(control))
                return PTR_ERR(no_free_ptr(control));

        card = file->card;
        result = snd_ctl_elem_write(card, file, control);
        if (result < 0)
                return result;

        if (copy_to_user(_control, control, sizeof(*control)))
                return -EFAULT;
        return result;
}

static int snd_ctl_elem_lock(struct snd_ctl_file *file,
                             struct snd_ctl_elem_id __user *_id)
{
        struct snd_card *card = file->card;
        struct snd_ctl_elem_id id;
        struct snd_kcontrol *kctl;
        struct snd_kcontrol_volatile *vd;

        if (copy_from_user(&id, _id, sizeof(id)))
                return -EFAULT;
        guard(rwsem_write)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, &id);
        if (!kctl)
                return -ENOENT;
        vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)];
        if (vd->owner)
                return -EBUSY;
        vd->owner = file;
        return 0;
}

static int snd_ctl_elem_unlock(struct snd_ctl_file *file,
                               struct snd_ctl_elem_id __user *_id)
{
        struct snd_card *card = file->card;
        struct snd_ctl_elem_id id;
        struct snd_kcontrol *kctl;
        struct snd_kcontrol_volatile *vd;

        if (copy_from_user(&id, _id, sizeof(id)))
                return -EFAULT;
        guard(rwsem_write)(&card->controls_rwsem);
        kctl = snd_ctl_find_id_locked(card, &id);
        if (!kctl)
                return -ENOENT;
        vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)];
        if (!vd->owner)
                return -EINVAL;
        if (vd->owner != file)
                return -EPERM;
        vd->owner = NULL;
        return 0;
}

struct user_element {
        struct snd_ctl_elem_info info;
        struct snd_card *card;
        char *elem_data;                /* element data */
        unsigned long elem_data_size;        /* size of element data in bytes */
        void *tlv_data;                        /* TLV data */
        unsigned long tlv_data_size;        /* TLV data size */
        void *priv_data;                /* private data (like strings for enumerated type) */
};

// check whether the addition (in bytes) of user ctl element may overflow the limit.
static bool check_user_elem_overflow(struct snd_card *card, ssize_t add)
{
        return (ssize_t)card->user_ctl_alloc_size + add > max_user_ctl_alloc_size;
}

static int snd_ctl_elem_user_info(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_info *uinfo)
{
        struct user_element *ue = kcontrol->private_data;
        unsigned int offset;

        offset = snd_ctl_get_ioff(kcontrol, &uinfo->id);
        *uinfo = ue->info;
        snd_ctl_build_ioff(&uinfo->id, kcontrol, offset);

        return 0;
}

static int snd_ctl_elem_user_enum_info(struct snd_kcontrol *kcontrol,
                                       struct snd_ctl_elem_info *uinfo)
{
        struct user_element *ue = kcontrol->private_data;
        const char *names;
        unsigned int item;
        unsigned int offset;

        item = uinfo->value.enumerated.item;

        offset = snd_ctl_get_ioff(kcontrol, &uinfo->id);
        *uinfo = ue->info;
        snd_ctl_build_ioff(&uinfo->id, kcontrol, offset);

        item = min(item, uinfo->value.enumerated.items - 1);
        uinfo->value.enumerated.item = item;

        names = ue->priv_data;
        for (; item > 0; --item)
                names += strlen(names) + 1;
        strcpy(uinfo->value.enumerated.name, names);

        return 0;
}

static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
{
        struct user_element *ue = kcontrol->private_data;
        unsigned int size = ue->elem_data_size;
        char *src = ue->elem_data +
                        snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size;

        memcpy(&ucontrol->value, src, size);
        return 0;
}

static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
{
        int change;
        struct user_element *ue = kcontrol->private_data;
        unsigned int size = ue->elem_data_size;
        char *dst = ue->elem_data +
                        snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size;

        change = memcmp(&ucontrol->value, dst, size) != 0;
        if (change)
                memcpy(dst, &ucontrol->value, size);
        return change;
}

/* called in controls_rwsem write lock */
static int replace_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf,
                            unsigned int size)
{
        struct user_element *ue = kctl->private_data;
        unsigned int *container;
        unsigned int mask = 0;
        int i;
        int change;

        lockdep_assert_held_write(&ue->card->controls_rwsem);

        if (size > 1024 * 128)        /* sane value */
                return -EINVAL;

        // does the TLV size change cause overflow?
        if (check_user_elem_overflow(ue->card, (ssize_t)(size - ue->tlv_data_size)))
                return -ENOMEM;

        container = vmemdup_user(buf, size);
        if (IS_ERR(container))
                return PTR_ERR(container);

        change = ue->tlv_data_size != size;
        if (!change)
                change = memcmp(ue->tlv_data, container, size) != 0;
        if (!change) {
                kvfree(container);
                return 0;
        }

        if (ue->tlv_data == NULL) {
                /* Now TLV data is available. */
                for (i = 0; i < kctl->count; ++i)
                        kctl->vd[i].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
                mask = SNDRV_CTL_EVENT_MASK_INFO;
        } else {
                ue->card->user_ctl_alloc_size -= ue->tlv_data_size;
                ue->tlv_data_size = 0;
                kvfree(ue->tlv_data);
        }

        ue->tlv_data = container;
        ue->tlv_data_size = size;
        // decremented at private_free.
        ue->card->user_ctl_alloc_size += size;

        mask |= SNDRV_CTL_EVENT_MASK_TLV;
        for (i = 0; i < kctl->count; ++i)
                snd_ctl_notify_one(ue->card, mask, kctl, i);

        return change;
}

static int read_user_tlv(struct snd_kcontrol *kctl, unsigned int __user *buf,
                         unsigned int size)
{
        struct user_element *ue = kctl->private_data;

        if (ue->tlv_data_size == 0 || ue->tlv_data == NULL)
                return -ENXIO;

        if (size < ue->tlv_data_size)
                return -ENOSPC;

        if (copy_to_user(buf, ue->tlv_data, ue->tlv_data_size))
                return -EFAULT;

        return 0;
}

static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kctl, int op_flag,
                                 unsigned int size, unsigned int __user *buf)
{
        if (op_flag == SNDRV_CTL_TLV_OP_WRITE)
                return replace_user_tlv(kctl, buf, size);
        else
                return read_user_tlv(kctl, buf, size);
}

/* called in controls_rwsem write lock */
static int snd_ctl_elem_init_enum_names(struct user_element *ue)
{
        char *names, *p;
        size_t buf_len, name_len;
        unsigned int i;
        const uintptr_t user_ptrval = ue->info.value.enumerated.names_ptr;

        lockdep_assert_held_write(&ue->card->controls_rwsem);

        buf_len = ue->info.value.enumerated.names_length;
        if (buf_len > 64 * 1024)
                return -EINVAL;

        if (check_user_elem_overflow(ue->card, buf_len))
                return -ENOMEM;
        names = vmemdup_user((const void __user *)user_ptrval, buf_len);
        if (IS_ERR(names))
                return PTR_ERR(names);

        /* check that there are enough valid names */
        p = names;
        for (i = 0; i < ue->info.value.enumerated.items; ++i) {
                name_len = strnlen(p, buf_len);
                if (name_len == 0 || name_len >= 64 || name_len == buf_len) {
                        kvfree(names);
                        return -EINVAL;
                }
                p += name_len + 1;
                buf_len -= name_len + 1;
        }

        ue->priv_data = names;
        ue->info.value.enumerated.names_ptr = 0;
        // increment the allocation size; decremented again at private_free.
        ue->card->user_ctl_alloc_size += ue->info.value.enumerated.names_length;

        return 0;
}

static size_t compute_user_elem_size(size_t size, unsigned int count)
{
        return sizeof(struct user_element) + size * count;
}

static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol)
{
        struct user_element *ue = kcontrol->private_data;

        // decrement the allocation size.
        ue->card->user_ctl_alloc_size -= compute_user_elem_size(ue->elem_data_size, kcontrol->count);
        ue->card->user_ctl_alloc_size -= ue->tlv_data_size;
        if (ue->priv_data)
                ue->card->user_ctl_alloc_size -= ue->info.value.enumerated.names_length;

        kvfree(ue->tlv_data);
        kvfree(ue->priv_data);
        kfree(ue);
}

static int snd_ctl_elem_add(struct snd_ctl_file *file,
                            struct snd_ctl_elem_info *info, int replace)
{
        struct snd_card *card = file->card;
        struct snd_kcontrol *kctl;
        unsigned int count;
        unsigned int access;
        long private_size;
        size_t alloc_size;
        struct user_element *ue;
        unsigned int offset;
        int err;

        if (!*info->id.name)
                return -EINVAL;
        if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name))
                return -EINVAL;

        /* Delete a control to replace them if needed. */
        if (replace) {
                info->id.numid = 0;
                err = snd_ctl_remove_user_ctl(file, &info->id);
                if (err)
                        return err;
        }

        /* Check the number of elements for this userspace control. */
        count = info->owner;
        if (count == 0)
                count = 1;

        /* Arrange access permissions if needed. */
        access = info->access;
        if (access == 0)
                access = SNDRV_CTL_ELEM_ACCESS_READWRITE;
        access &= (SNDRV_CTL_ELEM_ACCESS_READWRITE |
                   SNDRV_CTL_ELEM_ACCESS_INACTIVE |
                   SNDRV_CTL_ELEM_ACCESS_TLV_WRITE);

        /* In initial state, nothing is available as TLV container. */
        if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
                access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
        access |= SNDRV_CTL_ELEM_ACCESS_USER;

        /*
         * Check information and calculate the size of data specific to
         * this userspace control.
         */
        /* pass NULL to card for suppressing error messages */
        err = snd_ctl_check_elem_info(NULL, info);
        if (err < 0)
                return err;
        /* user-space control doesn't allow zero-size data */
        if (info->count < 1)
                return -EINVAL;
        private_size = value_sizes[info->type] * info->count;
        alloc_size = compute_user_elem_size(private_size, count);

        guard(rwsem_write)(&card->controls_rwsem);
        if (check_user_elem_overflow(card, alloc_size))
                return -ENOMEM;

        /*
         * Keep memory object for this userspace control. After passing this
         * code block, the instance should be freed by snd_ctl_free_one().
         *
         * Note that these elements in this control are locked.
         */
        err = snd_ctl_new(&kctl, count, access, file);
        if (err < 0)
                return err;
        memcpy(&kctl->id, &info->id, sizeof(kctl->id));
        ue = kzalloc(alloc_size, GFP_KERNEL);
        if (!ue) {
                kfree(kctl);
                return -ENOMEM;
        }
        kctl->private_data = ue;
        kctl->private_free = snd_ctl_elem_user_free;

        // increment the allocated size; decremented again at private_free.
        card->user_ctl_alloc_size += alloc_size;

        /* Set private data for this userspace control. */
        ue->card = card;
        ue->info = *info;
        ue->info.access = 0;
        ue->elem_data = (char *)ue + sizeof(*ue);
        ue->elem_data_size = private_size;
        if (ue->info.type == SNDRV_CTL_ELEM_TYPE_ENUMERATED) {
                err = snd_ctl_elem_init_enum_names(ue);
                if (err < 0) {
                        snd_ctl_free_one(kctl);
                        return err;
                }
        }

        /* Set callback functions. */
        if (info->type == SNDRV_CTL_ELEM_TYPE_ENUMERATED)
                kctl->info = snd_ctl_elem_user_enum_info;
        else
                kctl->info = snd_ctl_elem_user_info;
        if (access & SNDRV_CTL_ELEM_ACCESS_READ)
                kctl->get = snd_ctl_elem_user_get;
        if (access & SNDRV_CTL_ELEM_ACCESS_WRITE)
                kctl->put = snd_ctl_elem_user_put;
        if (access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
                kctl->tlv.c = snd_ctl_elem_user_tlv;

        /* This function manage to free the instance on failure. */
        err = __snd_ctl_add_replace(card, kctl, CTL_ADD_EXCLUSIVE);
        if (err < 0) {
                snd_ctl_free_one(kctl);
                return err;
        }
        offset = snd_ctl_get_ioff(kctl, &info->id);
        snd_ctl_build_ioff(&info->id, kctl, offset);
        /*
         * Here we cannot fill any field for the number of elements added by
         * this operation because there're no specific fields. The usage of
         * 'owner' field for this purpose may cause any bugs to userspace
         * applications because the field originally means PID of a process
         * which locks the element.
         */
        return 0;
}

static int snd_ctl_elem_add_user(struct snd_ctl_file *file,
                                 struct snd_ctl_elem_info __user *_info, int replace)
{
        struct snd_ctl_elem_info info;
        int err;

        if (copy_from_user(&info, _info, sizeof(info)))
                return -EFAULT;
        err = snd_ctl_elem_add(file, &info, replace);
        if (err < 0)
                return err;
        if (copy_to_user(_info, &info, sizeof(info))) {
                snd_ctl_remove_user_ctl(file, &info.id);
                return -EFAULT;
        }

        return 0;
}

static int snd_ctl_elem_remove(struct snd_ctl_file *file,
                               struct snd_ctl_elem_id __user *_id)
{
        struct snd_ctl_elem_id id;

        if (copy_from_user(&id, _id, sizeof(id)))
                return -EFAULT;
        return snd_ctl_remove_user_ctl(file, &id);
}

static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr)
{
        int subscribe;
        if (get_user(subscribe, ptr))
                return -EFAULT;
        if (subscribe < 0) {
                subscribe = file->subscribed;
                if (put_user(subscribe, ptr))
                        return -EFAULT;
                return 0;
        }
        if (subscribe) {
                file->subscribed = 1;
                return 0;
        } else if (file->subscribed) {
                snd_ctl_empty_read_queue(file);
                file->subscribed = 0;
        }
        return 0;
}

static int call_tlv_handler(struct snd_ctl_file *file, int op_flag,
                            struct snd_kcontrol *kctl,
                            struct snd_ctl_elem_id *id,
                            unsigned int __user *buf, unsigned int size)
{
        static const struct {
                int op;
                int perm;
        } pairs[] = {
                {SNDRV_CTL_TLV_OP_READ,  SNDRV_CTL_ELEM_ACCESS_TLV_READ},
                {SNDRV_CTL_TLV_OP_WRITE, SNDRV_CTL_ELEM_ACCESS_TLV_WRITE},
                {SNDRV_CTL_TLV_OP_CMD,   SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND},
        };
        struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)];
        int i, ret;

        /* Check support of the request for this element. */
        for (i = 0; i < ARRAY_SIZE(pairs); ++i) {
                if (op_flag == pairs[i].op && (vd->access & pairs[i].perm))
                        break;
        }
        if (i == ARRAY_SIZE(pairs))
                return -ENXIO;

        if (kctl->tlv.c == NULL)
                return -ENXIO;

        /* Write and command operations are not allowed for locked element. */
        if (op_flag != SNDRV_CTL_TLV_OP_READ &&
            vd->owner != NULL && vd->owner != file)
                return -EPERM;

        ret = snd_power_ref_and_wait(file->card);
        if (!ret)
                ret = kctl->tlv.c(kctl, op_flag, size, buf);
        snd_power_unref(file->card);
        return ret;
}

static int read_tlv_buf(struct snd_kcontrol *kctl, struct snd_ctl_elem_id *id,
                        unsigned int __user *buf, unsigned int size)
{
        struct snd_kcontrol_volatile *vd = &kctl->vd[snd_ctl_get_ioff(kctl, id)];
        unsigned int len;

        if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ))
                return -ENXIO;

        if (kctl->tlv.p == NULL)
                return -ENXIO;

        len = sizeof(unsigned int) * 2 + kctl->tlv.p[1];
        if (size < len)
                return -ENOMEM;

        if (copy_to_user(buf, kctl->tlv.p, len))
                return -EFAULT;

        return 0;
}

static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
                             struct snd_ctl_tlv __user *buf,
                             int op_flag)
{
        struct snd_ctl_tlv header;
        unsigned int __user *container;
        unsigned int container_size;
        struct snd_kcontrol *kctl;
        struct snd_ctl_elem_id id;
        struct snd_kcontrol_volatile *vd;

        lockdep_assert_held(&file->card->controls_rwsem);

        if (copy_from_user(&header, buf, sizeof(header)))
                return -EFAULT;

        /* In design of control core, numerical ID starts at 1. */
        if (header.numid == 0)
                return -EINVAL;

        /* At least, container should include type and length fields.  */
        if (header.length < sizeof(unsigned int) * 2)
                return -EINVAL;
        container_size = header.length;
        container = buf->tlv;

        kctl = snd_ctl_find_numid_locked(file->card, header.numid);
        if (kctl == NULL)
                return -ENOENT;

        /* Calculate index of the element in this set. */
        id = kctl->id;
        snd_ctl_build_ioff(&id, kctl, header.numid - id.numid);
        vd = &kctl->vd[snd_ctl_get_ioff(kctl, &id)];

        if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
                return call_tlv_handler(file, op_flag, kctl, &id, container,
                                        container_size);
        } else {
                if (op_flag == SNDRV_CTL_TLV_OP_READ) {
                        return read_tlv_buf(kctl, &id, container,
                                            container_size);
                }
        }

        /* Not supported. */
        return -ENXIO;
}

static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        struct snd_ctl_file *ctl;
        struct snd_card *card;
        struct snd_kctl_ioctl *p;
        void __user *argp = (void __user *)arg;
        int __user *ip = argp;
        int err;

        ctl = file->private_data;
        card = ctl->card;
        if (snd_BUG_ON(!card))
                return -ENXIO;
        switch (cmd) {
        case SNDRV_CTL_IOCTL_PVERSION:
                return put_user(SNDRV_CTL_VERSION, ip) ? -EFAULT : 0;
        case SNDRV_CTL_IOCTL_CARD_INFO:
                return snd_ctl_card_info(card, ctl, cmd, argp);
        case SNDRV_CTL_IOCTL_ELEM_LIST:
                return snd_ctl_elem_list_user(card, argp);
        case SNDRV_CTL_IOCTL_ELEM_INFO:
                return snd_ctl_elem_info_user(ctl, argp);
        case SNDRV_CTL_IOCTL_ELEM_READ:
                return snd_ctl_elem_read_user(card, argp);
        case SNDRV_CTL_IOCTL_ELEM_WRITE:
                return snd_ctl_elem_write_user(ctl, argp);
        case SNDRV_CTL_IOCTL_ELEM_LOCK:
                return snd_ctl_elem_lock(ctl, argp);
        case SNDRV_CTL_IOCTL_ELEM_UNLOCK:
                return snd_ctl_elem_unlock(ctl, argp);
        case SNDRV_CTL_IOCTL_ELEM_ADD:
                return snd_ctl_elem_add_user(ctl, argp, 0);
        case SNDRV_CTL_IOCTL_ELEM_REPLACE:
                return snd_ctl_elem_add_user(ctl, argp, 1);
        case SNDRV_CTL_IOCTL_ELEM_REMOVE:
                return snd_ctl_elem_remove(ctl, argp);
        case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS:
                return snd_ctl_subscribe_events(ctl, ip);
        case SNDRV_CTL_IOCTL_TLV_READ:
                scoped_guard(rwsem_read, &ctl->card->controls_rwsem)
                        err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_READ);
                return err;
        case SNDRV_CTL_IOCTL_TLV_WRITE:
                scoped_guard(rwsem_write, &ctl->card->controls_rwsem)
                        err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_WRITE);
                return err;
        case SNDRV_CTL_IOCTL_TLV_COMMAND:
                scoped_guard(rwsem_write, &ctl->card->controls_rwsem)
                        err = snd_ctl_tlv_ioctl(ctl, argp, SNDRV_CTL_TLV_OP_CMD);
                return err;
        case SNDRV_CTL_IOCTL_POWER:
                return -ENOPROTOOPT;
        case SNDRV_CTL_IOCTL_POWER_STATE:
                return put_user(SNDRV_CTL_POWER_D0, ip) ? -EFAULT : 0;
        }

        guard(rwsem_read)(&snd_ioctl_rwsem);
        list_for_each_entry(p, &snd_control_ioctls, list) {
                err = p->fioctl(card, ctl, cmd, arg);
                if (err != -ENOIOCTLCMD)
                        return err;
        }
        dev_dbg(card->dev, "unknown ioctl = 0x%x\n", cmd);
        return -ENOTTY;
}

static ssize_t snd_ctl_read(struct file *file, char __user *buffer,
                            size_t count, loff_t * offset)
{
        struct snd_ctl_file *ctl;
        int err = 0;
        ssize_t result = 0;

        ctl = file->private_data;
        if (snd_BUG_ON(!ctl || !ctl->card))
                return -ENXIO;
        if (!ctl->subscribed)
                return -EBADFD;
        if (count < sizeof(struct snd_ctl_event))
                return -EINVAL;
        spin_lock_irq(&ctl->read_lock);
        while (count >= sizeof(struct snd_ctl_event)) {
                struct snd_ctl_event ev;
                struct snd_kctl_event *kev;
                while (list_empty(&ctl->events)) {
                        wait_queue_entry_t wait;
                        if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
                                err = -EAGAIN;
                                goto __end_lock;
                        }
                        init_waitqueue_entry(&wait, current);
                        add_wait_queue(&ctl->change_sleep, &wait);
                        set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&ctl->read_lock);
                        schedule();
                        remove_wait_queue(&ctl->change_sleep, &wait);
                        if (ctl->card->shutdown)
                                return -ENODEV;
                        if (signal_pending(current))
                                return -ERESTARTSYS;
                        spin_lock_irq(&ctl->read_lock);
                }
                kev = snd_kctl_event(ctl->events.next);
                ev.type = SNDRV_CTL_EVENT_ELEM;
                ev.data.elem.mask = kev->mask;
                ev.data.elem.id = kev->id;
                list_del(&kev->list);
                spin_unlock_irq(&ctl->read_lock);
                kfree(kev);
                if (copy_to_user(buffer, &ev, sizeof(struct snd_ctl_event))) {
                        err = -EFAULT;
                        goto __end;
                }
                spin_lock_irq(&ctl->read_lock);
                buffer += sizeof(struct snd_ctl_event);
                count -= sizeof(struct snd_ctl_event);
                result += sizeof(struct snd_ctl_event);
        }
      __end_lock:
        spin_unlock_irq(&ctl->read_lock);
      __end:
              return result > 0 ? result : err;
}

static __poll_t snd_ctl_poll(struct file *file, poll_table * wait)
{
        __poll_t mask;
        struct snd_ctl_file *ctl;

        ctl = file->private_data;
        if (!ctl->subscribed)
                return 0;
        poll_wait(file, &ctl->change_sleep, wait);

        mask = 0;
        if (!list_empty(&ctl->events))
                mask |= EPOLLIN | EPOLLRDNORM;

        return mask;
}

/*
 * register the device-specific control-ioctls.
 * called from each device manager like pcm.c, hwdep.c, etc.
 */
static int _snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn, struct list_head *lists)
{
        struct snd_kctl_ioctl *pn;

        pn = kzalloc(sizeof(struct snd_kctl_ioctl), GFP_KERNEL);
        if (pn == NULL)
                return -ENOMEM;
        pn->fioctl = fcn;
        guard(rwsem_write)(&snd_ioctl_rwsem);
        list_add_tail(&pn->list, lists);
        return 0;
}

/**
 * snd_ctl_register_ioctl - register the device-specific control-ioctls
 * @fcn: ioctl callback function
 *
 * called from each device manager like pcm.c, hwdep.c, etc.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_ctl_register_ioctl(snd_kctl_ioctl_func_t fcn)
{
        return _snd_ctl_register_ioctl(fcn, &snd_control_ioctls);
}
EXPORT_SYMBOL(snd_ctl_register_ioctl);

#ifdef CONFIG_COMPAT
/**
 * snd_ctl_register_ioctl_compat - register the device-specific 32bit compat
 * control-ioctls
 * @fcn: ioctl callback function
 *
 * Return: zero if successful, or a negative error code
 */
int snd_ctl_register_ioctl_compat(snd_kctl_ioctl_func_t fcn)
{
        return _snd_ctl_register_ioctl(fcn, &snd_control_compat_ioctls);
}
EXPORT_SYMBOL(snd_ctl_register_ioctl_compat);
#endif

/*
 * de-register the device-specific control-ioctls.
 */
static int _snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn,
                                     struct list_head *lists)
{
        struct snd_kctl_ioctl *p;

        if (snd_BUG_ON(!fcn))
                return -EINVAL;
        guard(rwsem_write)(&snd_ioctl_rwsem);
        list_for_each_entry(p, lists, list) {
                if (p->fioctl == fcn) {
                        list_del(&p->list);
                        kfree(p);
                        return 0;
                }
        }
        snd_BUG();
        return -EINVAL;
}

/**
 * snd_ctl_unregister_ioctl - de-register the device-specific control-ioctls
 * @fcn: ioctl callback function to unregister
 *
 * Return: zero if successful, or a negative error code
 */
int snd_ctl_unregister_ioctl(snd_kctl_ioctl_func_t fcn)
{
        return _snd_ctl_unregister_ioctl(fcn, &snd_control_ioctls);
}
EXPORT_SYMBOL(snd_ctl_unregister_ioctl);

#ifdef CONFIG_COMPAT
/**
 * snd_ctl_unregister_ioctl_compat - de-register the device-specific compat
 * 32bit control-ioctls
 * @fcn: ioctl callback function to unregister
 *
 * Return: zero if successful, or a negative error code
 */
int snd_ctl_unregister_ioctl_compat(snd_kctl_ioctl_func_t fcn)
{
        return _snd_ctl_unregister_ioctl(fcn, &snd_control_compat_ioctls);
}
EXPORT_SYMBOL(snd_ctl_unregister_ioctl_compat);
#endif

static int snd_ctl_fasync(int fd, struct file * file, int on)
{
        struct snd_ctl_file *ctl;

        ctl = file->private_data;
        return snd_fasync_helper(fd, file, on, &ctl->fasync);
}

/* return the preferred subdevice number if already assigned;
 * otherwise return -1
 */
int snd_ctl_get_preferred_subdevice(struct snd_card *card, int type)
{
        struct snd_ctl_file *kctl;
        int subdevice = -1;

        guard(read_lock_irqsave)(&card->ctl_files_rwlock);
        list_for_each_entry(kctl, &card->ctl_files, list) {
                if (kctl->pid == task_pid(current)) {
                        subdevice = kctl->preferred_subdevice[type];
                        if (subdevice != -1)
                                break;
                }
        }
        return subdevice;
}
EXPORT_SYMBOL_GPL(snd_ctl_get_preferred_subdevice);

/*
 * ioctl32 compat
 */
#ifdef CONFIG_COMPAT
#include "control_compat.c"
#else
#define snd_ctl_ioctl_compat        NULL
#endif

/*
 * control layers (audio LED etc.)
 */

/**
 * snd_ctl_request_layer - request to use the layer
 * @module_name: Name of the kernel module (NULL == build-in)
 *
 * Return: zero if successful, or an error code when the module cannot be loaded
 */
int snd_ctl_request_layer(const char *module_name)
{
        struct snd_ctl_layer_ops *lops;

        if (module_name == NULL)
                return 0;
        scoped_guard(rwsem_read, &snd_ctl_layer_rwsem) {
                for (lops = snd_ctl_layer; lops; lops = lops->next)
                        if (strcmp(lops->module_name, module_name) == 0)
                                return 0;
        }
        return request_module(module_name);
}
EXPORT_SYMBOL_GPL(snd_ctl_request_layer);

/**
 * snd_ctl_register_layer - register new control layer
 * @lops: operation structure
 *
 * The new layer can track all control elements and do additional
 * operations on top (like audio LED handling).
 */
void snd_ctl_register_layer(struct snd_ctl_layer_ops *lops)
{
        struct snd_card *card;
        int card_number;

        scoped_guard(rwsem_write, &snd_ctl_layer_rwsem) {
                lops->next = snd_ctl_layer;
                snd_ctl_layer = lops;
        }
        for (card_number = 0; card_number < SNDRV_CARDS; card_number++) {
                card = snd_card_ref(card_number);
                if (card) {
                        scoped_guard(rwsem_read, &card->controls_rwsem)
                                lops->lregister(card);
                        snd_card_unref(card);
                }
        }
}
EXPORT_SYMBOL_GPL(snd_ctl_register_layer);

/**
 * snd_ctl_disconnect_layer - disconnect control layer
 * @lops: operation structure
 *
 * It is expected that the information about tracked cards
 * is freed before this call (the disconnect callback is
 * not called here).
 */
void snd_ctl_disconnect_layer(struct snd_ctl_layer_ops *lops)
{
        struct snd_ctl_layer_ops *lops2, *prev_lops2;

        guard(rwsem_write)(&snd_ctl_layer_rwsem);
        for (lops2 = snd_ctl_layer, prev_lops2 = NULL; lops2; lops2 = lops2->next) {
                if (lops2 == lops) {
                        if (!prev_lops2)
                                snd_ctl_layer = lops->next;
                        else
                                prev_lops2->next = lops->next;
                        break;
                }
                prev_lops2 = lops2;
        }
}
EXPORT_SYMBOL_GPL(snd_ctl_disconnect_layer);

/*
 *  INIT PART
 */

static const struct file_operations snd_ctl_f_ops =
{
        .owner =        THIS_MODULE,
        .read =                snd_ctl_read,
        .open =                snd_ctl_open,
        .release =        snd_ctl_release,
        .llseek =        no_llseek,
        .poll =                snd_ctl_poll,
        .unlocked_ioctl =        snd_ctl_ioctl,
        .compat_ioctl =        snd_ctl_ioctl_compat,
        .fasync =        snd_ctl_fasync,
};

/* call lops under rwsems; called from snd_ctl_dev_*() below() */
#define call_snd_ctl_lops(_card, _op)                                    \
        do {                                                            \
                struct snd_ctl_layer_ops *lops;                            \
                guard(rwsem_read)(&(_card)->controls_rwsem);            \
                guard(rwsem_read)(&snd_ctl_layer_rwsem);            \
                for (lops = snd_ctl_layer; lops; lops = lops->next) \
                        lops->_op(_card);                            \
        } while (0)

/*
 * registration of the control device
 */
static int snd_ctl_dev_register(struct snd_device *device)
{
        struct snd_card *card = device->device_data;
        int err;

        err = snd_register_device(SNDRV_DEVICE_TYPE_CONTROL, card, -1,
                                  &snd_ctl_f_ops, card, card->ctl_dev);
        if (err < 0)
                return err;
        call_snd_ctl_lops(card, lregister);
        return 0;
}

/*
 * disconnection of the control device
 */
static int snd_ctl_dev_disconnect(struct snd_device *device)
{
        struct snd_card *card = device->device_data;
        struct snd_ctl_file *ctl;

        scoped_guard(read_lock_irqsave, &card->ctl_files_rwlock) {
                list_for_each_entry(ctl, &card->ctl_files, list) {
                        wake_up(&ctl->change_sleep);
                        snd_kill_fasync(ctl->fasync, SIGIO, POLL_ERR);
                }
        }

        call_snd_ctl_lops(card, ldisconnect);
        return snd_unregister_device(card->ctl_dev);
}

/*
 * free all controls
 */
static int snd_ctl_dev_free(struct snd_device *device)
{
        struct snd_card *card = device->device_data;
        struct snd_kcontrol *control;

        scoped_guard(rwsem_write, &card->controls_rwsem) {
                while (!list_empty(&card->controls)) {
                        control = snd_kcontrol(card->controls.next);
                        __snd_ctl_remove(card, control, false);
                }

#ifdef CONFIG_SND_CTL_FAST_LOOKUP
                xa_destroy(&card->ctl_numids);
                xa_destroy(&card->ctl_hash);
#endif
        }
        put_device(card->ctl_dev);
        return 0;
}

/*
 * create control core:
 * called from init.c
 */
int snd_ctl_create(struct snd_card *card)
{
        static const struct snd_device_ops ops = {
                .dev_free = snd_ctl_dev_free,
                .dev_register =        snd_ctl_dev_register,
                .dev_disconnect = snd_ctl_dev_disconnect,
        };
        int err;

        if (snd_BUG_ON(!card))
                return -ENXIO;
        if (snd_BUG_ON(card->number < 0 || card->number >= SNDRV_CARDS))
                return -ENXIO;

        err = snd_device_alloc(&card->ctl_dev, card);
        if (err < 0)
                return err;
        dev_set_name(card->ctl_dev, "controlC%d", card->number);

        err = snd_device_new(card, SNDRV_DEV_CONTROL, card, &ops);
        if (err < 0)
                put_device(card->ctl_dev);
        return err;
}

/*
 * Frequently used control callbacks/helpers
 */

/**
 * snd_ctl_boolean_mono_info - Helper function for a standard boolean info
 * callback with a mono channel
 * @kcontrol: the kcontrol instance
 * @uinfo: info to store
 *
 * This is a function that can be used as info callback for a standard
 * boolean control with a single mono channel.
 *
 * Return: Zero (always successful)
 */
int snd_ctl_boolean_mono_info(struct snd_kcontrol *kcontrol,
                              struct snd_ctl_elem_info *uinfo)
{
        uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
        uinfo->count = 1;
        uinfo->value.integer.min = 0;
        uinfo->value.integer.max = 1;
        return 0;
}
EXPORT_SYMBOL(snd_ctl_boolean_mono_info);

/**
 * snd_ctl_boolean_stereo_info - Helper function for a standard boolean info
 * callback with stereo two channels
 * @kcontrol: the kcontrol instance
 * @uinfo: info to store
 *
 * This is a function that can be used as info callback for a standard
 * boolean control with stereo two channels.
 *
 * Return: Zero (always successful)
 */
int snd_ctl_boolean_stereo_info(struct snd_kcontrol *kcontrol,
                                struct snd_ctl_elem_info *uinfo)
{
        uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
        uinfo->count = 2;
        uinfo->value.integer.min = 0;
        uinfo->value.integer.max = 1;
        return 0;
}
EXPORT_SYMBOL(snd_ctl_boolean_stereo_info);

/**
 * snd_ctl_enum_info - fills the info structure for an enumerated control
 * @info: the structure to be filled
 * @channels: the number of the control's channels; often one
 * @items: the number of control values; also the size of @names
 * @names: an array containing the names of all control values
 *
 * Sets all required fields in @info to their appropriate values.
 * If the control's accessibility is not the default (readable and writable),
 * the caller has to fill @info->access.
 *
 * Return: Zero (always successful)
 */
int snd_ctl_enum_info(struct snd_ctl_elem_info *info, unsigned int channels,
                      unsigned int items, const char *const names[])
{
        info->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
        info->count = channels;
        info->value.enumerated.items = items;
        if (!items)
                return 0;
        if (info->value.enumerated.item >= items)
                info->value.enumerated.item = items - 1;
        WARN(strlen(names[info->value.enumerated.item]) >= sizeof(info->value.enumerated.name),
             "ALSA: too long item name '%s'\n",
             names[info->value.enumerated.item]);
        strscpy(info->value.enumerated.name,
                names[info->value.enumerated.item],
                sizeof(info->value.enumerated.name));
        return 0;
}
EXPORT_SYMBOL(snd_ctl_enum_info);






























































































































    2 

    2 
    2 


































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  MIDI byte <-> sequencer event coder
 *
 *  Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de>,
 *                        Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/module.h>
#include <sound/core.h>
#include <sound/seq_kernel.h>
#include <sound/seq_midi_event.h>
#include <sound/asoundef.h>

MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>, Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("MIDI byte <-> sequencer event coder");
MODULE_LICENSE("GPL");

/* event type, index into status_event[] */
/* from 0 to 6 are normal commands (note off, on, etc.) for 0x9?-0xe? */
#define ST_INVALID        7
#define ST_SPECIAL        8
#define ST_SYSEX        ST_SPECIAL
/* from 8 to 15 are events for 0xf0-0xf7 */


/*
 * prototypes
 */
static void note_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void one_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void pitchbend_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void two_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void one_param_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void songpos_event(struct snd_midi_event *dev, struct snd_seq_event *ev);
static void note_decode(struct snd_seq_event *ev, unsigned char *buf);
static void one_param_decode(struct snd_seq_event *ev, unsigned char *buf);
static void pitchbend_decode(struct snd_seq_event *ev, unsigned char *buf);
static void two_param_decode(struct snd_seq_event *ev, unsigned char *buf);
static void songpos_decode(struct snd_seq_event *ev, unsigned char *buf);

/*
 * event list
 */
static struct status_event_list {
        int event;
        int qlen;
        void (*encode)(struct snd_midi_event *dev, struct snd_seq_event *ev);
        void (*decode)(struct snd_seq_event *ev, unsigned char *buf);
} status_event[] = {
        /* 0x80 - 0xef */
        {SNDRV_SEQ_EVENT_NOTEOFF,         2, note_event, note_decode},
        {SNDRV_SEQ_EVENT_NOTEON,         2, note_event, note_decode},
        {SNDRV_SEQ_EVENT_KEYPRESS,         2, note_event, note_decode},
        {SNDRV_SEQ_EVENT_CONTROLLER,         2, two_param_ctrl_event, two_param_decode},
        {SNDRV_SEQ_EVENT_PGMCHANGE,         1, one_param_ctrl_event, one_param_decode},
        {SNDRV_SEQ_EVENT_CHANPRESS,         1, one_param_ctrl_event, one_param_decode},
        {SNDRV_SEQ_EVENT_PITCHBEND,         2, pitchbend_ctrl_event, pitchbend_decode},
        /* invalid */
        {SNDRV_SEQ_EVENT_NONE,                -1, NULL, NULL},
        /* 0xf0 - 0xff */
        {SNDRV_SEQ_EVENT_SYSEX,                 1, NULL, NULL}, /* sysex: 0xf0 */
        {SNDRV_SEQ_EVENT_QFRAME,         1, one_param_event, one_param_decode}, /* 0xf1 */
        {SNDRV_SEQ_EVENT_SONGPOS,         2, songpos_event, songpos_decode}, /* 0xf2 */
        {SNDRV_SEQ_EVENT_SONGSEL,         1, one_param_event, one_param_decode}, /* 0xf3 */
        {SNDRV_SEQ_EVENT_NONE,                -1, NULL, NULL}, /* 0xf4 */
        {SNDRV_SEQ_EVENT_NONE,                -1, NULL, NULL}, /* 0xf5 */
        {SNDRV_SEQ_EVENT_TUNE_REQUEST,         0, NULL, NULL}, /* 0xf6 */
        {SNDRV_SEQ_EVENT_NONE,                -1, NULL, NULL}, /* 0xf7 */
        {SNDRV_SEQ_EVENT_CLOCK,                 0, NULL, NULL}, /* 0xf8 */
        {SNDRV_SEQ_EVENT_NONE,                -1, NULL, NULL}, /* 0xf9 */
        {SNDRV_SEQ_EVENT_START,                 0, NULL, NULL}, /* 0xfa */
        {SNDRV_SEQ_EVENT_CONTINUE,         0, NULL, NULL}, /* 0xfb */
        {SNDRV_SEQ_EVENT_STOP,                  0, NULL, NULL}, /* 0xfc */
        {SNDRV_SEQ_EVENT_NONE,                 -1, NULL, NULL}, /* 0xfd */
        {SNDRV_SEQ_EVENT_SENSING,          0, NULL, NULL}, /* 0xfe */
        {SNDRV_SEQ_EVENT_RESET,          0, NULL, NULL}, /* 0xff */
};

static int extra_decode_ctrl14(struct snd_midi_event *dev, unsigned char *buf, int len,
                               struct snd_seq_event *ev);
static int extra_decode_xrpn(struct snd_midi_event *dev, unsigned char *buf, int count,
                             struct snd_seq_event *ev);

static struct extra_event_list {
        int event;
        int (*decode)(struct snd_midi_event *dev, unsigned char *buf, int len,
                      struct snd_seq_event *ev);
} extra_event[] = {
        {SNDRV_SEQ_EVENT_CONTROL14, extra_decode_ctrl14},
        {SNDRV_SEQ_EVENT_NONREGPARAM, extra_decode_xrpn},
        {SNDRV_SEQ_EVENT_REGPARAM, extra_decode_xrpn},
};

/*
 *  new/delete record
 */

int snd_midi_event_new(int bufsize, struct snd_midi_event **rdev)
{
        struct snd_midi_event *dev;

        *rdev = NULL;
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (dev == NULL)
                return -ENOMEM;
        if (bufsize > 0) {
                dev->buf = kmalloc(bufsize, GFP_KERNEL);
                if (dev->buf == NULL) {
                        kfree(dev);
                        return -ENOMEM;
                }
        }
        dev->bufsize = bufsize;
        dev->lastcmd = 0xff;
        dev->type = ST_INVALID;
        spin_lock_init(&dev->lock);
        *rdev = dev;
        return 0;
}
EXPORT_SYMBOL(snd_midi_event_new);

void snd_midi_event_free(struct snd_midi_event *dev)
{
        if (dev != NULL) {
                kfree(dev->buf);
                kfree(dev);
        }
}
EXPORT_SYMBOL(snd_midi_event_free);

/*
 * initialize record
 */
static inline void reset_encode(struct snd_midi_event *dev)
{
        dev->read = 0;
        dev->qlen = 0;
        dev->type = ST_INVALID;
}

void snd_midi_event_reset_encode(struct snd_midi_event *dev)
{
        guard(spinlock_irqsave)(&dev->lock);
        reset_encode(dev);
}
EXPORT_SYMBOL(snd_midi_event_reset_encode);

void snd_midi_event_reset_decode(struct snd_midi_event *dev)
{
        guard(spinlock_irqsave)(&dev->lock);
        dev->lastcmd = 0xff;
}
EXPORT_SYMBOL(snd_midi_event_reset_decode);

void snd_midi_event_no_status(struct snd_midi_event *dev, int on)
{
        dev->nostat = on ? 1 : 0;
}
EXPORT_SYMBOL(snd_midi_event_no_status);

/*
 *  read one byte and encode to sequencer event:
 *  return true if MIDI bytes are encoded to an event
 *         false data is not finished
 */
bool snd_midi_event_encode_byte(struct snd_midi_event *dev, unsigned char c,
                                struct snd_seq_event *ev)
{
        bool rc = false;

        if (c >= MIDI_CMD_COMMON_CLOCK) {
                /* real-time event */
                ev->type = status_event[ST_SPECIAL + c - 0xf0].event;
                ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK;
                ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED;
                return ev->type != SNDRV_SEQ_EVENT_NONE;
        }

        guard(spinlock_irqsave)(&dev->lock);
        if ((c & 0x80) &&
            (c != MIDI_CMD_COMMON_SYSEX_END || dev->type != ST_SYSEX)) {
                /* new command */
                dev->buf[0] = c;
                if ((c & 0xf0) == 0xf0) /* system messages */
                        dev->type = (c & 0x0f) + ST_SPECIAL;
                else
                        dev->type = (c >> 4) & 0x07;
                dev->read = 1;
                dev->qlen = status_event[dev->type].qlen;
        } else {
                if (dev->qlen > 0) {
                        /* rest of command */
                        dev->buf[dev->read++] = c;
                        if (dev->type != ST_SYSEX)
                                dev->qlen--;
                } else {
                        /* running status */
                        dev->buf[1] = c;
                        dev->qlen = status_event[dev->type].qlen - 1;
                        dev->read = 2;
                }
        }
        if (dev->qlen == 0) {
                ev->type = status_event[dev->type].event;
                ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK;
                ev->flags |= SNDRV_SEQ_EVENT_LENGTH_FIXED;
                if (status_event[dev->type].encode) /* set data values */
                        status_event[dev->type].encode(dev, ev);
                if (dev->type >= ST_SPECIAL)
                        dev->type = ST_INVALID;
                rc = true;
        } else         if (dev->type == ST_SYSEX) {
                if (c == MIDI_CMD_COMMON_SYSEX_END ||
                    dev->read >= dev->bufsize) {
                        ev->flags &= ~SNDRV_SEQ_EVENT_LENGTH_MASK;
                        ev->flags |= SNDRV_SEQ_EVENT_LENGTH_VARIABLE;
                        ev->type = SNDRV_SEQ_EVENT_SYSEX;
                        ev->data.ext.len = dev->read;
                        ev->data.ext.ptr = dev->buf;
                        if (c != MIDI_CMD_COMMON_SYSEX_END)
                                dev->read = 0; /* continue to parse */
                        else
                                reset_encode(dev); /* all parsed */
                        rc = true;
                }
        }

        return rc;
}
EXPORT_SYMBOL(snd_midi_event_encode_byte);

/* encode note event */
static void note_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.note.channel = dev->buf[0] & 0x0f;
        ev->data.note.note = dev->buf[1];
        ev->data.note.velocity = dev->buf[2];
}

/* encode one parameter controls */
static void one_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.control.channel = dev->buf[0] & 0x0f;
        ev->data.control.value = dev->buf[1];
}

/* encode pitch wheel change */
static void pitchbend_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.control.channel = dev->buf[0] & 0x0f;
        ev->data.control.value = (int)dev->buf[2] * 128 + (int)dev->buf[1] - 8192;
}

/* encode midi control change */
static void two_param_ctrl_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.control.channel = dev->buf[0] & 0x0f;
        ev->data.control.param = dev->buf[1];
        ev->data.control.value = dev->buf[2];
}

/* encode one parameter value*/
static void one_param_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.control.value = dev->buf[1];
}

/* encode song position */
static void songpos_event(struct snd_midi_event *dev, struct snd_seq_event *ev)
{
        ev->data.control.value = (int)dev->buf[2] * 128 + (int)dev->buf[1];
}

/*
 * decode from a sequencer event to midi bytes
 * return the size of decoded midi events
 */
long snd_midi_event_decode(struct snd_midi_event *dev, unsigned char *buf, long count,
                           struct snd_seq_event *ev)
{
        unsigned int cmd, type;

        if (ev->type == SNDRV_SEQ_EVENT_NONE)
                return -ENOENT;

        for (type = 0; type < ARRAY_SIZE(status_event); type++) {
                if (ev->type == status_event[type].event)
                        goto __found;
        }
        for (type = 0; type < ARRAY_SIZE(extra_event); type++) {
                if (ev->type == extra_event[type].event)
                        return extra_event[type].decode(dev, buf, count, ev);
        }
        return -ENOENT;

      __found:
        if (type >= ST_SPECIAL)
                cmd = 0xf0 + (type - ST_SPECIAL);
        else
                /* data.note.channel and data.control.channel is identical */
                cmd = 0x80 | (type << 4) | (ev->data.note.channel & 0x0f);


        if (cmd == MIDI_CMD_COMMON_SYSEX) {
                snd_midi_event_reset_decode(dev);
                return snd_seq_expand_var_event(ev, count, buf, 1, 0);
        } else {
                int qlen;
                unsigned char xbuf[4];
                unsigned long flags;

                spin_lock_irqsave(&dev->lock, flags);
                if ((cmd & 0xf0) == 0xf0 || dev->lastcmd != cmd || dev->nostat) {
                        dev->lastcmd = cmd;
                        spin_unlock_irqrestore(&dev->lock, flags);
                        xbuf[0] = cmd;
                        if (status_event[type].decode)
                                status_event[type].decode(ev, xbuf + 1);
                        qlen = status_event[type].qlen + 1;
                } else {
                        spin_unlock_irqrestore(&dev->lock, flags);
                        if (status_event[type].decode)
                                status_event[type].decode(ev, xbuf + 0);
                        qlen = status_event[type].qlen;
                }
                if (count < qlen)
                        return -ENOMEM;
                memcpy(buf, xbuf, qlen);
                return qlen;
        }
}
EXPORT_SYMBOL(snd_midi_event_decode);


/* decode note event */
static void note_decode(struct snd_seq_event *ev, unsigned char *buf)
{
        buf[0] = ev->data.note.note & 0x7f;
        buf[1] = ev->data.note.velocity & 0x7f;
}

/* decode one parameter controls */
static void one_param_decode(struct snd_seq_event *ev, unsigned char *buf)
{
        buf[0] = ev->data.control.value & 0x7f;
}

/* decode pitch wheel change */
static void pitchbend_decode(struct snd_seq_event *ev, unsigned char *buf)
{
        int value = ev->data.control.value + 8192;
        buf[0] = value & 0x7f;
        buf[1] = (value >> 7) & 0x7f;
}

/* decode midi control change */
static void two_param_decode(struct snd_seq_event *ev, unsigned char *buf)
{
        buf[0] = ev->data.control.param & 0x7f;
        buf[1] = ev->data.control.value & 0x7f;
}

/* decode song position */
static void songpos_decode(struct snd_seq_event *ev, unsigned char *buf)
{
        buf[0] = ev->data.control.value & 0x7f;
        buf[1] = (ev->data.control.value >> 7) & 0x7f;
}

/* decode 14bit control */
static int extra_decode_ctrl14(struct snd_midi_event *dev, unsigned char *buf,
                               int count, struct snd_seq_event *ev)
{
        unsigned char cmd;
        int idx = 0;

        cmd = MIDI_CMD_CONTROL|(ev->data.control.channel & 0x0f);
        if (ev->data.control.param < 0x20) {
                if (count < 4)
                        return -ENOMEM;
                if (dev->nostat && count < 6)
                        return -ENOMEM;
                if (cmd != dev->lastcmd || dev->nostat) {
                        if (count < 5)
                                return -ENOMEM;
                        buf[idx++] = dev->lastcmd = cmd;
                }
                buf[idx++] = ev->data.control.param;
                buf[idx++] = (ev->data.control.value >> 7) & 0x7f;
                if (dev->nostat)
                        buf[idx++] = cmd;
                buf[idx++] = ev->data.control.param + 0x20;
                buf[idx++] = ev->data.control.value & 0x7f;
        } else {
                if (count < 2)
                        return -ENOMEM;
                if (cmd != dev->lastcmd || dev->nostat) {
                        if (count < 3)
                                return -ENOMEM;
                        buf[idx++] = dev->lastcmd = cmd;
                }
                buf[idx++] = ev->data.control.param & 0x7f;
                buf[idx++] = ev->data.control.value & 0x7f;
        }
        return idx;
}

/* decode reg/nonreg param */
static int extra_decode_xrpn(struct snd_midi_event *dev, unsigned char *buf,
                             int count, struct snd_seq_event *ev)
{
        unsigned char cmd;
        const char *cbytes;
        static const char cbytes_nrpn[4] = { MIDI_CTL_NONREG_PARM_NUM_MSB,
                                       MIDI_CTL_NONREG_PARM_NUM_LSB,
                                       MIDI_CTL_MSB_DATA_ENTRY,
                                       MIDI_CTL_LSB_DATA_ENTRY };
        static const char cbytes_rpn[4] =  { MIDI_CTL_REGIST_PARM_NUM_MSB,
                                       MIDI_CTL_REGIST_PARM_NUM_LSB,
                                       MIDI_CTL_MSB_DATA_ENTRY,
                                       MIDI_CTL_LSB_DATA_ENTRY };
        unsigned char bytes[4];
        int idx = 0, i;

        if (count < 8)
                return -ENOMEM;
        if (dev->nostat && count < 12)
                return -ENOMEM;
        cmd = MIDI_CMD_CONTROL|(ev->data.control.channel & 0x0f);
        bytes[0] = (ev->data.control.param & 0x3f80) >> 7;
        bytes[1] = ev->data.control.param & 0x007f;
        bytes[2] = (ev->data.control.value & 0x3f80) >> 7;
        bytes[3] = ev->data.control.value & 0x007f;
        if (cmd != dev->lastcmd && !dev->nostat) {
                if (count < 9)
                        return -ENOMEM;
                buf[idx++] = dev->lastcmd = cmd;
        }
        cbytes = ev->type == SNDRV_SEQ_EVENT_NONREGPARAM ? cbytes_nrpn : cbytes_rpn;
        for (i = 0; i < 4; i++) {
                if (dev->nostat)
                        buf[idx++] = dev->lastcmd = cmd;
                buf[idx++] = cbytes[i];
                buf[idx++] = bytes[i];
        }
        return idx;
}
































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BLK_CGROUP_PRIVATE_H
#define _BLK_CGROUP_PRIVATE_H
/*
 * block cgroup private header
 *
 * Based on ideas and code from CFQ, CFS and BFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *                      Paolo Valente <paolo.valente@unimore.it>
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 *                       Nauman Rafique <nauman@google.com>
 */

#include <linux/blk-cgroup.h>
#include <linux/cgroup.h>
#include <linux/kthread.h>
#include <linux/blk-mq.h>
#include <linux/llist.h>
#include "blk.h"

struct blkcg_gq;
struct blkg_policy_data;


/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH        (INT_MAX / 2)

#ifdef CONFIG_BLK_CGROUP

enum blkg_iostat_type {
        BLKG_IOSTAT_READ,
        BLKG_IOSTAT_WRITE,
        BLKG_IOSTAT_DISCARD,

        BLKG_IOSTAT_NR,
};

struct blkg_iostat {
        u64                                bytes[BLKG_IOSTAT_NR];
        u64                                ios[BLKG_IOSTAT_NR];
};

struct blkg_iostat_set {
        struct u64_stats_sync                sync;
        struct blkcg_gq                       *blkg;
        struct llist_node                lnode;
        int                                lqueued;        /* queued in llist */
        struct blkg_iostat                cur;
        struct blkg_iostat                last;
};

/* association between a blk cgroup and a request queue */
struct blkcg_gq {
        /* Pointer to the associated request_queue */
        struct request_queue                *q;
        struct list_head                q_node;
        struct hlist_node                blkcg_node;
        struct blkcg                        *blkcg;

        /* all non-root blkcg_gq's are guaranteed to have access to parent */
        struct blkcg_gq                        *parent;

        /* reference count */
        struct percpu_ref                refcnt;

        /* is this blkg online? protected by both blkcg and q locks */
        bool                                online;

        struct blkg_iostat_set __percpu        *iostat_cpu;
        struct blkg_iostat_set                iostat;

        struct blkg_policy_data                *pd[BLKCG_MAX_POLS];
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
        spinlock_t                        async_bio_lock;
        struct bio_list                        async_bios;
#endif
        union {
                struct work_struct        async_bio_work;
                struct work_struct        free_work;
        };

        atomic_t                        use_delay;
        atomic64_t                        delay_nsec;
        atomic64_t                        delay_start;
        u64                                last_delay;
        int                                last_use;

        struct rcu_head                        rcu_head;
};

struct blkcg {
        struct cgroup_subsys_state        css;
        spinlock_t                        lock;
        refcount_t                        online_pin;

        struct radix_tree_root                blkg_tree;
        struct blkcg_gq        __rcu                *blkg_hint;
        struct hlist_head                blkg_list;

        struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];

        struct list_head                all_blkcgs_node;

        /*
         * List of updated percpu blkg_iostat_set's since the last flush.
         */
        struct llist_head __percpu        *lhead;

#ifdef CONFIG_BLK_CGROUP_FC_APPID
        char                            fc_app_id[FC_APPID_LEN];
#endif
#ifdef CONFIG_CGROUP_WRITEBACK
        struct list_head                cgwb_list;
#endif
};

static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
        return css ? container_of(css, struct blkcg, css) : NULL;
}

/*
 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
 * request_queue (q).  This is used by blkcg policies which need to track
 * information per blkcg - q pair.
 *
 * There can be multiple active blkcg policies and each blkg:policy pair is
 * represented by a blkg_policy_data which is allocated and freed by each
 * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
 * area by allocating larger data structure which embeds blkg_policy_data
 * at the beginning.
 */
struct blkg_policy_data {
        /* the blkg and policy id this per-policy data belongs to */
        struct blkcg_gq                        *blkg;
        int                                plid;
        bool                                online;
};

/*
 * Policies that need to keep per-blkcg data which is independent from any
 * request_queue associated to it should implement cpd_alloc/free_fn()
 * methods.  A policy can allocate private data area by allocating larger
 * data structure which embeds blkcg_policy_data at the beginning.
 * cpd_init() is invoked to let each policy handle per-blkcg data.
 */
struct blkcg_policy_data {
        /* the blkcg and policy id this per-policy data belongs to */
        struct blkcg                        *blkcg;
        int                                plid;
};

typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk,
                struct blkcg *blkcg, gfp_t gfp);
typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
                                struct seq_file *s);

struct blkcg_policy {
        int                                plid;
        /* cgroup files for the policy */
        struct cftype                        *dfl_cftypes;
        struct cftype                        *legacy_cftypes;

        /* operations */
        blkcg_pol_alloc_cpd_fn                *cpd_alloc_fn;
        blkcg_pol_free_cpd_fn                *cpd_free_fn;

        blkcg_pol_alloc_pd_fn                *pd_alloc_fn;
        blkcg_pol_init_pd_fn                *pd_init_fn;
        blkcg_pol_online_pd_fn                *pd_online_fn;
        blkcg_pol_offline_pd_fn                *pd_offline_fn;
        blkcg_pol_free_pd_fn                *pd_free_fn;
        blkcg_pol_reset_pd_stats_fn        *pd_reset_stats_fn;
        blkcg_pol_stat_pd_fn                *pd_stat_fn;
};

extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;

void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);

/* Blkio controller policy registration */
int blkcg_policy_register(struct blkcg_policy *pol);
void blkcg_policy_unregister(struct blkcg_policy *pol);
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol);
void blkcg_deactivate_policy(struct gendisk *disk,
                             const struct blkcg_policy *pol);

const char *blkg_dev_name(struct blkcg_gq *blkg);
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
                       u64 (*prfill)(struct seq_file *,
                                     struct blkg_policy_data *, int),
                       const struct blkcg_policy *pol, int data,
                       bool show_total);
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);

struct blkg_conf_ctx {
        char                                *input;
        char                                *body;
        struct block_device                *bdev;
        struct blkcg_gq                        *blkg;
};

void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                   struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);

/**
 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
 * @return: true if this bio needs to be submitted with the root blkg context.
 *
 * In order to avoid priority inversions we sometimes need to issue a bio as if
 * it were attached to the root blkg, and then backcharge to the actual owning
 * blkg.  The idea is we do bio_blkcg_css() to look up the actual context for
 * the bio and attach the appropriate blkg to the bio.  Then we call this helper
 * and if it is true run with the root blkg for that queue and then do any
 * backcharging to the originating cgroup once the io is complete.
 */
static inline bool bio_issue_as_root_blkg(struct bio *bio)
{
        return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
}

/**
 * blkg_lookup - lookup blkg for the specified blkcg - q pair
 * @blkcg: blkcg of interest
 * @q: request_queue of interest
 *
 * Lookup blkg for the @blkcg - @q pair.

 * Must be called in a RCU critical section.
 */
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
                                           struct request_queue *q)
{
        struct blkcg_gq *blkg;

        if (blkcg == &blkcg_root)
                return q->root_blkg;

        blkg = rcu_dereference_check(blkcg->blkg_hint,
                        lockdep_is_held(&q->queue_lock));
        if (blkg && blkg->q == q)
                return blkg;

        blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
        if (blkg && blkg->q != q)
                blkg = NULL;
        return blkg;
}

/**
 * blkg_to_pdata - get policy private data
 * @blkg: blkg of interest
 * @pol: policy of interest
 *
 * Return pointer to private data associated with the @blkg-@pol pair.
 */
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
                                                  struct blkcg_policy *pol)
{
        return blkg ? blkg->pd[pol->plid] : NULL;
}

static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
                                                     struct blkcg_policy *pol)
{
        return blkcg ? blkcg->cpd[pol->plid] : NULL;
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pd: policy private data of interest
 *
 * @pd is policy private data.  Determine the blkg it's associated with.
 */
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
{
        return pd ? pd->blkg : NULL;
}

static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
{
        return cpd ? cpd->blkcg : NULL;
}

/**
 * blkg_path - format cgroup path of blkg
 * @blkg: blkg of interest
 * @buf: target buffer
 * @buflen: target buffer length
 *
 * Format the path of the cgroup of @blkg into @buf.
 */
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
        return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
}

/**
 * blkg_get - get a blkg reference
 * @blkg: blkg to get
 *
 * The caller should be holding an existing reference.
 */
static inline void blkg_get(struct blkcg_gq *blkg)
{
        percpu_ref_get(&blkg->refcnt);
}

/**
 * blkg_tryget - try and get a blkg reference
 * @blkg: blkg to get
 *
 * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
 * of freeing this blkg, so we can only use it if the refcnt is not zero.
 */
static inline bool blkg_tryget(struct blkcg_gq *blkg)
{
        return blkg && percpu_ref_tryget(&blkg->refcnt);
}

/**
 * blkg_put - put a blkg reference
 * @blkg: blkg to put
 */
static inline void blkg_put(struct blkcg_gq *blkg)
{
        percpu_ref_put(&blkg->refcnt);
}

/**
 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
 * @d_blkg: loop cursor pointing to the current descendant
 * @pos_css: used for iteration
 * @p_blkg: target blkg to walk descendants of
 *
 * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
 * read locked.  If called under either blkcg or queue lock, the iteration
 * is guaranteed to include all and only online blkgs.  The caller may
 * update @pos_css by calling css_rightmost_descendant() to skip subtree.
 * @p_blkg is included in the iteration and the first node to be visited.
 */
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)                \
        css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)        \
                if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),        \
                                            (p_blkg)->q)))

/**
 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
 * @d_blkg: loop cursor pointing to the current descendant
 * @pos_css: used for iteration
 * @p_blkg: target blkg to walk descendants of
 *
 * Similar to blkg_for_each_descendant_pre() but performs post-order
 * traversal instead.  Synchronization rules are the same.  @p_blkg is
 * included in the iteration and the last node to be visited.
 */
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)                \
        css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)        \
                if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css),        \
                                            (p_blkg)->q)))

static inline void blkcg_bio_issue_init(struct bio *bio)
{
        bio_issue_init(&bio->bi_issue, bio_sectors(bio));
}

static inline void blkcg_use_delay(struct blkcg_gq *blkg)
{
        if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
                return;
        if (atomic_add_return(1, &blkg->use_delay) == 1)
                atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
}

static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
{
        int old = atomic_read(&blkg->use_delay);

        if (WARN_ON_ONCE(old < 0))
                return 0;
        if (old == 0)
                return 0;

        /*
         * We do this song and dance because we can race with somebody else
         * adding or removing delay.  If we just did an atomic_dec we'd end up
         * negative and we'd already be in trouble.  We need to subtract 1 and
         * then check to see if we were the last delay so we can drop the
         * congestion count on the cgroup.
         */
        while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old - 1))
                ;

        if (old == 0)
                return 0;
        if (old == 1)
                atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
        return 1;
}

/**
 * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
 * @blkg: target blkg
 * @delay: delay duration in nsecs
 *
 * When enabled with this function, the delay is not decayed and must be
 * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
 * blkcg_[un]use_delay() and blkcg_add_delay() usages.
 */
static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
{
        int old = atomic_read(&blkg->use_delay);

        /* We only want 1 person setting the congestion count for this blkg. */
        if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1))
                atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);

        atomic64_set(&blkg->delay_nsec, delay);
}

/**
 * blkcg_clear_delay - Disable allocator delay mechanism
 * @blkg: target blkg
 *
 * Disable use_delay mechanism. See blkcg_set_delay().
 */
static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
{
        int old = atomic_read(&blkg->use_delay);

        /* We only want 1 person clearing the congestion count for this blkg. */
        if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0))
                atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
}

/**
 * blk_cgroup_mergeable - Determine whether to allow or disallow merges
 * @rq: request to merge into
 * @bio: bio to merge
 *
 * @bio and @rq should belong to the same cgroup and their issue_as_root should
 * match. The latter is necessary as we don't want to throttle e.g. a metadata
 * update because it happens to be next to a regular IO.
 */
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
{
        return rq->bio->bi_blkg == bio->bi_blkg &&
                bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
}

void blk_cgroup_bio_start(struct bio *bio);
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
#else        /* CONFIG_BLK_CGROUP */

struct blkg_policy_data {
};

struct blkcg_policy_data {
};

struct blkcg_policy {
};

struct blkcg {
};

static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
static inline int blkcg_activate_policy(struct gendisk *disk,
                                        const struct blkcg_policy *pol) { return 0; }
static inline void blkcg_deactivate_policy(struct gendisk *disk,
                                           const struct blkcg_policy *pol) { }

static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
                                                  struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }

#define blk_queue_for_each_rl(rl, q)        \
        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)

#endif        /* CONFIG_BLK_CGROUP */

#endif /* _BLK_CGROUP_PRIVATE_H */












































































































































































































































































































































































































































































































































    1 
























































































































































































































































































    1 
    4 














    1 















    1 
    1 




















    4 
    4 
    1 








    4 
    4 





















































































































































































































    4 



    4 
    4 

    4 

    4 
    4 









    4 


    4 
    4 


    4 
    4 
    4 
    4 

















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Digital Audio (PCM) abstract layer
 *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
 */

#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/time.h>
#include <linux/mutex.h>
#include <linux/device.h>
#include <linux/nospec.h>
#include <sound/core.h>
#include <sound/minors.h>
#include <sound/pcm.h>
#include <sound/timer.h>
#include <sound/control.h>
#include <sound/info.h>

#include "pcm_local.h"

MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Abramo Bagnara <abramo@alsa-project.org>");
MODULE_DESCRIPTION("Midlevel PCM code for ALSA.");
MODULE_LICENSE("GPL");

static LIST_HEAD(snd_pcm_devices);
static DEFINE_MUTEX(register_mutex);
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
static LIST_HEAD(snd_pcm_notify_list);
#endif

static int snd_pcm_free(struct snd_pcm *pcm);
static int snd_pcm_dev_free(struct snd_device *device);
static int snd_pcm_dev_register(struct snd_device *device);
static int snd_pcm_dev_disconnect(struct snd_device *device);

static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device)
{
        struct snd_pcm *pcm;

        list_for_each_entry(pcm, &snd_pcm_devices, list) {
                if (pcm->card == card && pcm->device == device)
                        return pcm;
        }
        return NULL;
}

static int snd_pcm_next(struct snd_card *card, int device)
{
        struct snd_pcm *pcm;

        list_for_each_entry(pcm, &snd_pcm_devices, list) {
                if (pcm->card == card && pcm->device > device)
                        return pcm->device;
                else if (pcm->card->number > card->number)
                        return -1;
        }
        return -1;
}

static int snd_pcm_add(struct snd_pcm *newpcm)
{
        struct snd_pcm *pcm;

        if (newpcm->internal)
                return 0;

        list_for_each_entry(pcm, &snd_pcm_devices, list) {
                if (pcm->card == newpcm->card && pcm->device == newpcm->device)
                        return -EBUSY;
                if (pcm->card->number > newpcm->card->number ||
                                (pcm->card == newpcm->card &&
                                pcm->device > newpcm->device)) {
                        list_add(&newpcm->list, pcm->list.prev);
                        return 0;
                }
        }
        list_add_tail(&newpcm->list, &snd_pcm_devices);
        return 0;
}

static int snd_pcm_control_ioctl(struct snd_card *card,
                                 struct snd_ctl_file *control,
                                 unsigned int cmd, unsigned long arg)
{
        switch (cmd) {
        case SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE:
                {
                        int device;

                        if (get_user(device, (int __user *)arg))
                                return -EFAULT;
                        scoped_guard(mutex, &register_mutex)
                                device = snd_pcm_next(card, device);
                        if (put_user(device, (int __user *)arg))
                                return -EFAULT;
                        return 0;
                }
        case SNDRV_CTL_IOCTL_PCM_INFO:
                {
                        struct snd_pcm_info __user *info;
                        unsigned int device, subdevice;
                        int stream;
                        struct snd_pcm *pcm;
                        struct snd_pcm_str *pstr;
                        struct snd_pcm_substream *substream;

                        info = (struct snd_pcm_info __user *)arg;
                        if (get_user(device, &info->device))
                                return -EFAULT;
                        if (get_user(stream, &info->stream))
                                return -EFAULT;
                        if (stream < 0 || stream > 1)
                                return -EINVAL;
                        stream = array_index_nospec(stream, 2);
                        if (get_user(subdevice, &info->subdevice))
                                return -EFAULT;
                        guard(mutex)(&register_mutex);
                        pcm = snd_pcm_get(card, device);
                        if (pcm == NULL)
                                return -ENXIO;
                        pstr = &pcm->streams[stream];
                        if (pstr->substream_count == 0)
                                return -ENOENT;
                        if (subdevice >= pstr->substream_count)
                                return -ENXIO;
                        for (substream = pstr->substream; substream;
                             substream = substream->next)
                                if (substream->number == (int)subdevice)
                                        break;
                        if (substream == NULL)
                                return -ENXIO;
                        guard(mutex)(&pcm->open_mutex);
                        return snd_pcm_info_user(substream, info);
                }
        case SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE:
                {
                        int val;
                        
                        if (get_user(val, (int __user *)arg))
                                return -EFAULT;
                        control->preferred_subdevice[SND_CTL_SUBDEV_PCM] = val;
                        return 0;
                }
        }
        return -ENOIOCTLCMD;
}

#define FORMAT(v) [SNDRV_PCM_FORMAT_##v] = #v

static const char * const snd_pcm_format_names[] = {
        FORMAT(S8),
        FORMAT(U8),
        FORMAT(S16_LE),
        FORMAT(S16_BE),
        FORMAT(U16_LE),
        FORMAT(U16_BE),
        FORMAT(S24_LE),
        FORMAT(S24_BE),
        FORMAT(U24_LE),
        FORMAT(U24_BE),
        FORMAT(S32_LE),
        FORMAT(S32_BE),
        FORMAT(U32_LE),
        FORMAT(U32_BE),
        FORMAT(FLOAT_LE),
        FORMAT(FLOAT_BE),
        FORMAT(FLOAT64_LE),
        FORMAT(FLOAT64_BE),
        FORMAT(IEC958_SUBFRAME_LE),
        FORMAT(IEC958_SUBFRAME_BE),
        FORMAT(MU_LAW),
        FORMAT(A_LAW),
        FORMAT(IMA_ADPCM),
        FORMAT(MPEG),
        FORMAT(GSM),
        FORMAT(SPECIAL),
        FORMAT(S24_3LE),
        FORMAT(S24_3BE),
        FORMAT(U24_3LE),
        FORMAT(U24_3BE),
        FORMAT(S20_3LE),
        FORMAT(S20_3BE),
        FORMAT(U20_3LE),
        FORMAT(U20_3BE),
        FORMAT(S18_3LE),
        FORMAT(S18_3BE),
        FORMAT(U18_3LE),
        FORMAT(U18_3BE),
        FORMAT(G723_24),
        FORMAT(G723_24_1B),
        FORMAT(G723_40),
        FORMAT(G723_40_1B),
        FORMAT(DSD_U8),
        FORMAT(DSD_U16_LE),
        FORMAT(DSD_U32_LE),
        FORMAT(DSD_U16_BE),
        FORMAT(DSD_U32_BE),
        FORMAT(S20_LE),
        FORMAT(S20_BE),
        FORMAT(U20_LE),
        FORMAT(U20_BE),
};

/**
 * snd_pcm_format_name - Return a name string for the given PCM format
 * @format: PCM format
 *
 * Return: the format name string
 */
const char *snd_pcm_format_name(snd_pcm_format_t format)
{
        unsigned int format_num = (__force unsigned int)format;

        if (format_num >= ARRAY_SIZE(snd_pcm_format_names) || !snd_pcm_format_names[format_num])
                return "Unknown";
        return snd_pcm_format_names[format_num];
}
EXPORT_SYMBOL_GPL(snd_pcm_format_name);

#ifdef CONFIG_SND_VERBOSE_PROCFS

#define STATE(v) [SNDRV_PCM_STATE_##v] = #v
#define STREAM(v) [SNDRV_PCM_STREAM_##v] = #v
#define READY(v) [SNDRV_PCM_READY_##v] = #v
#define XRUN(v) [SNDRV_PCM_XRUN_##v] = #v
#define SILENCE(v) [SNDRV_PCM_SILENCE_##v] = #v
#define TSTAMP(v) [SNDRV_PCM_TSTAMP_##v] = #v
#define ACCESS(v) [SNDRV_PCM_ACCESS_##v] = #v
#define START(v) [SNDRV_PCM_START_##v] = #v
#define SUBFORMAT(v) [SNDRV_PCM_SUBFORMAT_##v] = #v 

static const char * const snd_pcm_stream_names[] = {
        STREAM(PLAYBACK),
        STREAM(CAPTURE),
};

static const char * const snd_pcm_state_names[] = {
        STATE(OPEN),
        STATE(SETUP),
        STATE(PREPARED),
        STATE(RUNNING),
        STATE(XRUN),
        STATE(DRAINING),
        STATE(PAUSED),
        STATE(SUSPENDED),
        STATE(DISCONNECTED),
};

static const char * const snd_pcm_access_names[] = {
        ACCESS(MMAP_INTERLEAVED), 
        ACCESS(MMAP_NONINTERLEAVED),
        ACCESS(MMAP_COMPLEX),
        ACCESS(RW_INTERLEAVED),
        ACCESS(RW_NONINTERLEAVED),
};

static const char * const snd_pcm_subformat_names[] = {
        SUBFORMAT(STD), 
        SUBFORMAT(MSBITS_MAX),
        SUBFORMAT(MSBITS_20),
        SUBFORMAT(MSBITS_24),
};

static const char * const snd_pcm_tstamp_mode_names[] = {
        TSTAMP(NONE),
        TSTAMP(ENABLE),
};

static const char *snd_pcm_stream_name(int stream)
{
        return snd_pcm_stream_names[stream];
}

static const char *snd_pcm_access_name(snd_pcm_access_t access)
{
        return snd_pcm_access_names[(__force int)access];
}

static const char *snd_pcm_subformat_name(snd_pcm_subformat_t subformat)
{
        return snd_pcm_subformat_names[(__force int)subformat];
}

static const char *snd_pcm_tstamp_mode_name(int mode)
{
        return snd_pcm_tstamp_mode_names[mode];
}

static const char *snd_pcm_state_name(snd_pcm_state_t state)
{
        return snd_pcm_state_names[(__force int)state];
}

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
#include <linux/soundcard.h>

static const char *snd_pcm_oss_format_name(int format)
{
        switch (format) {
        case AFMT_MU_LAW:
                return "MU_LAW";
        case AFMT_A_LAW:
                return "A_LAW";
        case AFMT_IMA_ADPCM:
                return "IMA_ADPCM";
        case AFMT_U8:
                return "U8";
        case AFMT_S16_LE:
                return "S16_LE";
        case AFMT_S16_BE:
                return "S16_BE";
        case AFMT_S8:
                return "S8";
        case AFMT_U16_LE:
                return "U16_LE";
        case AFMT_U16_BE:
                return "U16_BE";
        case AFMT_MPEG:
                return "MPEG";
        default:
                return "unknown";
        }
}
#endif

static void snd_pcm_proc_info_read(struct snd_pcm_substream *substream,
                                   struct snd_info_buffer *buffer)
{
        struct snd_pcm_info *info __free(kfree) = NULL;
        int err;

        if (! substream)
                return;

        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
                return;

        err = snd_pcm_info(substream, info);
        if (err < 0) {
                snd_iprintf(buffer, "error %d\n", err);
                return;
        }
        snd_iprintf(buffer, "card: %d\n", info->card);
        snd_iprintf(buffer, "device: %d\n", info->device);
        snd_iprintf(buffer, "subdevice: %d\n", info->subdevice);
        snd_iprintf(buffer, "stream: %s\n", snd_pcm_stream_name(info->stream));
        snd_iprintf(buffer, "id: %s\n", info->id);
        snd_iprintf(buffer, "name: %s\n", info->name);
        snd_iprintf(buffer, "subname: %s\n", info->subname);
        snd_iprintf(buffer, "class: %d\n", info->dev_class);
        snd_iprintf(buffer, "subclass: %d\n", info->dev_subclass);
        snd_iprintf(buffer, "subdevices_count: %d\n", info->subdevices_count);
        snd_iprintf(buffer, "subdevices_avail: %d\n", info->subdevices_avail);
}

static void snd_pcm_stream_proc_info_read(struct snd_info_entry *entry,
                                          struct snd_info_buffer *buffer)
{
        snd_pcm_proc_info_read(((struct snd_pcm_str *)entry->private_data)->substream,
                               buffer);
}

static void snd_pcm_substream_proc_info_read(struct snd_info_entry *entry,
                                             struct snd_info_buffer *buffer)
{
        snd_pcm_proc_info_read(entry->private_data, buffer);
}

static void snd_pcm_substream_proc_hw_params_read(struct snd_info_entry *entry,
                                                  struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        struct snd_pcm_runtime *runtime;

        guard(mutex)(&substream->pcm->open_mutex);
        runtime = substream->runtime;
        if (!runtime) {
                snd_iprintf(buffer, "closed\n");
                return;
        }
        if (runtime->state == SNDRV_PCM_STATE_OPEN) {
                snd_iprintf(buffer, "no setup\n");
                return;
        }
        snd_iprintf(buffer, "access: %s\n", snd_pcm_access_name(runtime->access));
        snd_iprintf(buffer, "format: %s\n", snd_pcm_format_name(runtime->format));
        snd_iprintf(buffer, "subformat: %s\n", snd_pcm_subformat_name(runtime->subformat));
        snd_iprintf(buffer, "channels: %u\n", runtime->channels);        
        snd_iprintf(buffer, "rate: %u (%u/%u)\n", runtime->rate, runtime->rate_num, runtime->rate_den);        
        snd_iprintf(buffer, "period_size: %lu\n", runtime->period_size);        
        snd_iprintf(buffer, "buffer_size: %lu\n", runtime->buffer_size);        
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        if (substream->oss.oss) {
                snd_iprintf(buffer, "OSS format: %s\n", snd_pcm_oss_format_name(runtime->oss.format));
                snd_iprintf(buffer, "OSS channels: %u\n", runtime->oss.channels);        
                snd_iprintf(buffer, "OSS rate: %u\n", runtime->oss.rate);
                snd_iprintf(buffer, "OSS period bytes: %lu\n", (unsigned long)runtime->oss.period_bytes);
                snd_iprintf(buffer, "OSS periods: %u\n", runtime->oss.periods);
                snd_iprintf(buffer, "OSS period frames: %lu\n", (unsigned long)runtime->oss.period_frames);
        }
#endif
}

static void snd_pcm_substream_proc_sw_params_read(struct snd_info_entry *entry,
                                                  struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        struct snd_pcm_runtime *runtime;

        guard(mutex)(&substream->pcm->open_mutex);
        runtime = substream->runtime;
        if (!runtime) {
                snd_iprintf(buffer, "closed\n");
                return;
        }
        if (runtime->state == SNDRV_PCM_STATE_OPEN) {
                snd_iprintf(buffer, "no setup\n");
                return;
        }
        snd_iprintf(buffer, "tstamp_mode: %s\n", snd_pcm_tstamp_mode_name(runtime->tstamp_mode));
        snd_iprintf(buffer, "period_step: %u\n", runtime->period_step);
        snd_iprintf(buffer, "avail_min: %lu\n", runtime->control->avail_min);
        snd_iprintf(buffer, "start_threshold: %lu\n", runtime->start_threshold);
        snd_iprintf(buffer, "stop_threshold: %lu\n", runtime->stop_threshold);
        snd_iprintf(buffer, "silence_threshold: %lu\n", runtime->silence_threshold);
        snd_iprintf(buffer, "silence_size: %lu\n", runtime->silence_size);
        snd_iprintf(buffer, "boundary: %lu\n", runtime->boundary);
}

static void snd_pcm_substream_proc_status_read(struct snd_info_entry *entry,
                                               struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;
        struct snd_pcm_runtime *runtime;
        struct snd_pcm_status64 status;
        int err;

        guard(mutex)(&substream->pcm->open_mutex);
        runtime = substream->runtime;
        if (!runtime) {
                snd_iprintf(buffer, "closed\n");
                return;
        }
        memset(&status, 0, sizeof(status));
        err = snd_pcm_status64(substream, &status);
        if (err < 0) {
                snd_iprintf(buffer, "error %d\n", err);
                return;
        }
        snd_iprintf(buffer, "state: %s\n", snd_pcm_state_name(status.state));
        snd_iprintf(buffer, "owner_pid   : %d\n", pid_vnr(substream->pid));
        snd_iprintf(buffer, "trigger_time: %lld.%09lld\n",
                status.trigger_tstamp_sec, status.trigger_tstamp_nsec);
        snd_iprintf(buffer, "tstamp      : %lld.%09lld\n",
                status.tstamp_sec, status.tstamp_nsec);
        snd_iprintf(buffer, "delay       : %ld\n", status.delay);
        snd_iprintf(buffer, "avail       : %ld\n", status.avail);
        snd_iprintf(buffer, "avail_max   : %ld\n", status.avail_max);
        snd_iprintf(buffer, "-----\n");
        snd_iprintf(buffer, "hw_ptr      : %ld\n", runtime->status->hw_ptr);
        snd_iprintf(buffer, "appl_ptr    : %ld\n", runtime->control->appl_ptr);
}

#ifdef CONFIG_SND_PCM_XRUN_DEBUG
static void snd_pcm_xrun_injection_write(struct snd_info_entry *entry,
                                         struct snd_info_buffer *buffer)
{
        struct snd_pcm_substream *substream = entry->private_data;

        snd_pcm_stop_xrun(substream);
}

static void snd_pcm_xrun_debug_read(struct snd_info_entry *entry,
                                    struct snd_info_buffer *buffer)
{
        struct snd_pcm_str *pstr = entry->private_data;
        snd_iprintf(buffer, "%d\n", pstr->xrun_debug);
}

static void snd_pcm_xrun_debug_write(struct snd_info_entry *entry,
                                     struct snd_info_buffer *buffer)
{
        struct snd_pcm_str *pstr = entry->private_data;
        char line[64];
        if (!snd_info_get_line(buffer, line, sizeof(line)))
                pstr->xrun_debug = simple_strtoul(line, NULL, 10);
}
#endif

static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr)
{
        struct snd_pcm *pcm = pstr->pcm;
        struct snd_info_entry *entry;
        char name[16];

        sprintf(name, "pcm%i%c", pcm->device, 
                pstr->stream == SNDRV_PCM_STREAM_PLAYBACK ? 'p' : 'c');
        entry = snd_info_create_card_entry(pcm->card, name,
                                           pcm->card->proc_root);
        if (!entry)
                return -ENOMEM;
        entry->mode = S_IFDIR | 0555;
        pstr->proc_root = entry;
        entry = snd_info_create_card_entry(pcm->card, "info", pstr->proc_root);
        if (entry)
                snd_info_set_text_ops(entry, pstr, snd_pcm_stream_proc_info_read);
#ifdef CONFIG_SND_PCM_XRUN_DEBUG
        entry = snd_info_create_card_entry(pcm->card, "xrun_debug",
                                           pstr->proc_root);
        if (entry) {
                snd_info_set_text_ops(entry, pstr, snd_pcm_xrun_debug_read);
                entry->c.text.write = snd_pcm_xrun_debug_write;
                entry->mode |= 0200;
        }
#endif
        return 0;
}

static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr)
{
        snd_info_free_entry(pstr->proc_root);
        pstr->proc_root = NULL;
        return 0;
}

static struct snd_info_entry *
create_substream_info_entry(struct snd_pcm_substream *substream,
                            const char *name,
                            void (*read)(struct snd_info_entry *,
                                         struct snd_info_buffer *))
{
        struct snd_info_entry *entry;

        entry = snd_info_create_card_entry(substream->pcm->card, name,
                                           substream->proc_root);
        if (entry)
                snd_info_set_text_ops(entry, substream, read);
        return entry;
}

static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream)
{
        struct snd_info_entry *entry;
        struct snd_card *card;
        char name[16];

        card = substream->pcm->card;

        sprintf(name, "sub%i", substream->number);
        entry = snd_info_create_card_entry(card, name,
                                           substream->pstr->proc_root);
        if (!entry)
                return -ENOMEM;
        entry->mode = S_IFDIR | 0555;
        substream->proc_root = entry;

        create_substream_info_entry(substream, "info",
                                    snd_pcm_substream_proc_info_read);
        create_substream_info_entry(substream, "hw_params",
                                    snd_pcm_substream_proc_hw_params_read);
        create_substream_info_entry(substream, "sw_params",
                                    snd_pcm_substream_proc_sw_params_read);
        create_substream_info_entry(substream, "status",
                                    snd_pcm_substream_proc_status_read);

#ifdef CONFIG_SND_PCM_XRUN_DEBUG
        entry = create_substream_info_entry(substream, "xrun_injection", NULL);
        if (entry) {
                entry->c.text.write = snd_pcm_xrun_injection_write;
                entry->mode = S_IFREG | 0200;
        }
#endif /* CONFIG_SND_PCM_XRUN_DEBUG */

        return 0;
}

#else /* !CONFIG_SND_VERBOSE_PROCFS */
static inline int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr) { return 0; }
static inline int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr) { return 0; }
static inline int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream) { return 0; }
#endif /* CONFIG_SND_VERBOSE_PROCFS */

static const struct attribute_group *pcm_dev_attr_groups[];

/*
 * PM callbacks: we need to deal only with suspend here, as the resume is
 * triggered either from user-space or the driver's resume callback
 */
#ifdef CONFIG_PM_SLEEP
static int do_pcm_suspend(struct device *dev)
{
        struct snd_pcm_str *pstr = dev_get_drvdata(dev);

        if (!pstr->pcm->no_device_suspend)
                snd_pcm_suspend_all(pstr->pcm);
        return 0;
}
#endif

static const struct dev_pm_ops pcm_dev_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(do_pcm_suspend, NULL)
};

/* device type for PCM -- basically only for passing PM callbacks */
static const struct device_type pcm_dev_type = {
        .name = "pcm",
        .pm = &pcm_dev_pm_ops,
};

/**
 * snd_pcm_new_stream - create a new PCM stream
 * @pcm: the pcm instance
 * @stream: the stream direction, SNDRV_PCM_STREAM_XXX
 * @substream_count: the number of substreams
 *
 * Creates a new stream for the pcm.
 * The corresponding stream on the pcm must have been empty before
 * calling this, i.e. zero must be given to the argument of
 * snd_pcm_new().
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count)
{
        int idx, err;
        struct snd_pcm_str *pstr = &pcm->streams[stream];
        struct snd_pcm_substream *substream, *prev;

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        mutex_init(&pstr->oss.setup_mutex);
#endif
        pstr->stream = stream;
        pstr->pcm = pcm;
        pstr->substream_count = substream_count;
        if (!substream_count)
                return 0;

        err = snd_device_alloc(&pstr->dev, pcm->card);
        if (err < 0)
                return err;
        dev_set_name(pstr->dev, "pcmC%iD%i%c", pcm->card->number, pcm->device,
                     stream == SNDRV_PCM_STREAM_PLAYBACK ? 'p' : 'c');
        pstr->dev->groups = pcm_dev_attr_groups;
        pstr->dev->type = &pcm_dev_type;
        dev_set_drvdata(pstr->dev, pstr);

        if (!pcm->internal) {
                err = snd_pcm_stream_proc_init(pstr);
                if (err < 0) {
                        pcm_err(pcm, "Error in snd_pcm_stream_proc_init\n");
                        return err;
                }
        }
        prev = NULL;
        for (idx = 0, prev = NULL; idx < substream_count; idx++) {
                substream = kzalloc(sizeof(*substream), GFP_KERNEL);
                if (!substream)
                        return -ENOMEM;
                substream->pcm = pcm;
                substream->pstr = pstr;
                substream->number = idx;
                substream->stream = stream;
                sprintf(substream->name, "subdevice #%i", idx);
                substream->buffer_bytes_max = UINT_MAX;
                if (prev == NULL)
                        pstr->substream = substream;
                else
                        prev->next = substream;

                if (!pcm->internal) {
                        err = snd_pcm_substream_proc_init(substream);
                        if (err < 0) {
                                pcm_err(pcm,
                                        "Error in snd_pcm_stream_proc_init\n");
                                if (prev == NULL)
                                        pstr->substream = NULL;
                                else
                                        prev->next = NULL;
                                kfree(substream);
                                return err;
                        }
                }
                substream->group = &substream->self_group;
                snd_pcm_group_init(&substream->self_group);
                list_add_tail(&substream->link_list, &substream->self_group.substreams);
                atomic_set(&substream->mmap_count, 0);
                prev = substream;
        }
        return 0;
}                                
EXPORT_SYMBOL(snd_pcm_new_stream);

static int _snd_pcm_new(struct snd_card *card, const char *id, int device,
                int playback_count, int capture_count, bool internal,
                struct snd_pcm **rpcm)
{
        struct snd_pcm *pcm;
        int err;
        static const struct snd_device_ops ops = {
                .dev_free = snd_pcm_dev_free,
                .dev_register =        snd_pcm_dev_register,
                .dev_disconnect = snd_pcm_dev_disconnect,
        };
        static const struct snd_device_ops internal_ops = {
                .dev_free = snd_pcm_dev_free,
        };

        if (snd_BUG_ON(!card))
                return -ENXIO;
        if (rpcm)
                *rpcm = NULL;
        pcm = kzalloc(sizeof(*pcm), GFP_KERNEL);
        if (!pcm)
                return -ENOMEM;
        pcm->card = card;
        pcm->device = device;
        pcm->internal = internal;
        mutex_init(&pcm->open_mutex);
        init_waitqueue_head(&pcm->open_wait);
        INIT_LIST_HEAD(&pcm->list);
        if (id)
                strscpy(pcm->id, id, sizeof(pcm->id));

        err = snd_pcm_new_stream(pcm, SNDRV_PCM_STREAM_PLAYBACK,
                                 playback_count);
        if (err < 0)
                goto free_pcm;

        err = snd_pcm_new_stream(pcm, SNDRV_PCM_STREAM_CAPTURE, capture_count);
        if (err < 0)
                goto free_pcm;

        err = snd_device_new(card, SNDRV_DEV_PCM, pcm,
                             internal ? &internal_ops : &ops);
        if (err < 0)
                goto free_pcm;

        if (rpcm)
                *rpcm = pcm;
        return 0;

free_pcm:
        snd_pcm_free(pcm);
        return err;
}

/**
 * snd_pcm_new - create a new PCM instance
 * @card: the card instance
 * @id: the id string
 * @device: the device index (zero based)
 * @playback_count: the number of substreams for playback
 * @capture_count: the number of substreams for capture
 * @rpcm: the pointer to store the new pcm instance
 *
 * Creates a new PCM instance.
 *
 * The pcm operators have to be set afterwards to the new instance
 * via snd_pcm_set_ops().
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_new(struct snd_card *card, const char *id, int device,
                int playback_count, int capture_count, struct snd_pcm **rpcm)
{
        return _snd_pcm_new(card, id, device, playback_count, capture_count,
                        false, rpcm);
}
EXPORT_SYMBOL(snd_pcm_new);

/**
 * snd_pcm_new_internal - create a new internal PCM instance
 * @card: the card instance
 * @id: the id string
 * @device: the device index (zero based - shared with normal PCMs)
 * @playback_count: the number of substreams for playback
 * @capture_count: the number of substreams for capture
 * @rpcm: the pointer to store the new pcm instance
 *
 * Creates a new internal PCM instance with no userspace device or procfs
 * entries. This is used by ASoC Back End PCMs in order to create a PCM that
 * will only be used internally by kernel drivers. i.e. it cannot be opened
 * by userspace. It provides existing ASoC components drivers with a substream
 * and access to any private data.
 *
 * The pcm operators have to be set afterwards to the new instance
 * via snd_pcm_set_ops().
 *
 * Return: Zero if successful, or a negative error code on failure.
 */
int snd_pcm_new_internal(struct snd_card *card, const char *id, int device,
        int playback_count, int capture_count,
        struct snd_pcm **rpcm)
{
        return _snd_pcm_new(card, id, device, playback_count, capture_count,
                        true, rpcm);
}
EXPORT_SYMBOL(snd_pcm_new_internal);

static void free_chmap(struct snd_pcm_str *pstr)
{
        if (pstr->chmap_kctl) {
                struct snd_card *card = pstr->pcm->card;

                snd_ctl_remove(card, pstr->chmap_kctl);
                pstr->chmap_kctl = NULL;
        }
}

static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
{
        struct snd_pcm_substream *substream, *substream_next;
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        struct snd_pcm_oss_setup *setup, *setupn;
#endif

        /* free all proc files under the stream */
        snd_pcm_stream_proc_done(pstr);

        substream = pstr->substream;
        while (substream) {
                substream_next = substream->next;
                snd_pcm_timer_done(substream);
                kfree(substream);
                substream = substream_next;
        }
#if IS_ENABLED(CONFIG_SND_PCM_OSS)
        for (setup = pstr->oss.setup_list; setup; setup = setupn) {
                setupn = setup->next;
                kfree(setup->task_name);
                kfree(setup);
        }
#endif
        free_chmap(pstr);
        if (pstr->substream_count)
                put_device(pstr->dev);
}

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
#define pcm_call_notify(pcm, call)                                        \
        do {                                                                \
                struct snd_pcm_notify *_notify;                                \
                list_for_each_entry(_notify, &snd_pcm_notify_list, list) \
                        _notify->call(pcm);                                \
        } while (0)
#else
#define pcm_call_notify(pcm, call) do {} while (0)
#endif

static int snd_pcm_free(struct snd_pcm *pcm)
{
        if (!pcm)
                return 0;
        if (!pcm->internal)
                pcm_call_notify(pcm, n_unregister);
        if (pcm->private_free)
                pcm->private_free(pcm);
        snd_pcm_lib_preallocate_free_for_all(pcm);
        snd_pcm_free_stream(&pcm->streams[SNDRV_PCM_STREAM_PLAYBACK]);
        snd_pcm_free_stream(&pcm->streams[SNDRV_PCM_STREAM_CAPTURE]);
        kfree(pcm);
        return 0;
}

static int snd_pcm_dev_free(struct snd_device *device)
{
        struct snd_pcm *pcm = device->device_data;
        return snd_pcm_free(pcm);
}

int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream,
                             struct file *file,
                             struct snd_pcm_substream **rsubstream)
{
        struct snd_pcm_str * pstr;
        struct snd_pcm_substream *substream;
        struct snd_pcm_runtime *runtime;
        struct snd_card *card;
        int prefer_subdevice;
        size_t size;

        if (snd_BUG_ON(!pcm || !rsubstream))
                return -ENXIO;
        if (snd_BUG_ON(stream != SNDRV_PCM_STREAM_PLAYBACK &&
                       stream != SNDRV_PCM_STREAM_CAPTURE))
                return -EINVAL;
        *rsubstream = NULL;
        pstr = &pcm->streams[stream];
        if (pstr->substream == NULL || pstr->substream_count == 0)
                return -ENODEV;

        card = pcm->card;
        prefer_subdevice = snd_ctl_get_preferred_subdevice(card, SND_CTL_SUBDEV_PCM);

        if (pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX) {
                int opposite = !stream;

                for (substream = pcm->streams[opposite].substream; substream;
                     substream = substream->next) {
                        if (SUBSTREAM_BUSY(substream))
                                return -EAGAIN;
                }
        }

        if (file->f_flags & O_APPEND) {
                if (prefer_subdevice < 0) {
                        if (pstr->substream_count > 1)
                                return -EINVAL; /* must be unique */
                        substream = pstr->substream;
                } else {
                        for (substream = pstr->substream; substream;
                             substream = substream->next)
                                if (substream->number == prefer_subdevice)
                                        break;
                }
                if (! substream)
                        return -ENODEV;
                if (! SUBSTREAM_BUSY(substream))
                        return -EBADFD;
                substream->ref_count++;
                *rsubstream = substream;
                return 0;
        }

        for (substream = pstr->substream; substream; substream = substream->next) {
                if (!SUBSTREAM_BUSY(substream) &&
                    (prefer_subdevice == -1 ||
                     substream->number == prefer_subdevice))
                        break;
        }
        if (substream == NULL)
                return -EAGAIN;

        runtime = kzalloc(sizeof(*runtime), GFP_KERNEL);
        if (runtime == NULL)
                return -ENOMEM;

        size = PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status));
        runtime->status = alloc_pages_exact(size, GFP_KERNEL);
        if (runtime->status == NULL) {
                kfree(runtime);
                return -ENOMEM;
        }
        memset(runtime->status, 0, size);

        size = PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control));
        runtime->control = alloc_pages_exact(size, GFP_KERNEL);
        if (runtime->control == NULL) {
                free_pages_exact(runtime->status,
                               PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status)));
                kfree(runtime);
                return -ENOMEM;
        }
        memset(runtime->control, 0, size);

        init_waitqueue_head(&runtime->sleep);
        init_waitqueue_head(&runtime->tsleep);

        __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_OPEN);
        mutex_init(&runtime->buffer_mutex);
        atomic_set(&runtime->buffer_accessing, 0);

        substream->runtime = runtime;
        substream->private_data = pcm->private_data;
        substream->ref_count = 1;
        substream->f_flags = file->f_flags;
        substream->pid = get_pid(task_pid(current));
        pstr->substream_opened++;
        *rsubstream = substream;
        return 0;
}

void snd_pcm_detach_substream(struct snd_pcm_substream *substream)
{
        struct snd_pcm_runtime *runtime;

        if (PCM_RUNTIME_CHECK(substream))
                return;
        runtime = substream->runtime;
        if (runtime->private_free != NULL)
                runtime->private_free(runtime);
        free_pages_exact(runtime->status,
                       PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status)));
        free_pages_exact(runtime->control,
                       PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control)));
        kfree(runtime->hw_constraints.rules);
        /* Avoid concurrent access to runtime via PCM timer interface */
        if (substream->timer) {
                scoped_guard(spinlock_irq, &substream->timer->lock)
                        substream->runtime = NULL;
        } else {
                substream->runtime = NULL;
        }
        mutex_destroy(&runtime->buffer_mutex);
        snd_fasync_free(runtime->fasync);
        kfree(runtime);
        put_pid(substream->pid);
        substream->pid = NULL;
        substream->pstr->substream_opened--;
}

static ssize_t pcm_class_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        struct snd_pcm_str *pstr = dev_get_drvdata(dev);
        struct snd_pcm *pcm = pstr->pcm;
        const char *str;
        static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = {
                [SNDRV_PCM_CLASS_GENERIC] = "generic",
                [SNDRV_PCM_CLASS_MULTI] = "multi",
                [SNDRV_PCM_CLASS_MODEM] = "modem",
                [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer",
        };

        if (pcm->dev_class > SNDRV_PCM_CLASS_LAST)
                str = "none";
        else
                str = strs[pcm->dev_class];
        return sysfs_emit(buf, "%s\n", str);
}

static DEVICE_ATTR_RO(pcm_class);
static struct attribute *pcm_dev_attrs[] = {
        &dev_attr_pcm_class.attr,
        NULL
};

static const struct attribute_group pcm_dev_attr_group = {
        .attrs        = pcm_dev_attrs,
};

static const struct attribute_group *pcm_dev_attr_groups[] = {
        &pcm_dev_attr_group,
        NULL
};

static int snd_pcm_dev_register(struct snd_device *device)
{
        int cidx, err;
        struct snd_pcm_substream *substream;
        struct snd_pcm *pcm;

        if (snd_BUG_ON(!device || !device->device_data))
                return -ENXIO;
        pcm = device->device_data;

        guard(mutex)(&register_mutex);
        err = snd_pcm_add(pcm);
        if (err)
                return err;
        for (cidx = 0; cidx < 2; cidx++) {
                int devtype = -1;
                if (pcm->streams[cidx].substream == NULL)
                        continue;
                switch (cidx) {
                case SNDRV_PCM_STREAM_PLAYBACK:
                        devtype = SNDRV_DEVICE_TYPE_PCM_PLAYBACK;
                        break;
                case SNDRV_PCM_STREAM_CAPTURE:
                        devtype = SNDRV_DEVICE_TYPE_PCM_CAPTURE;
                        break;
                }
                /* register pcm */
                err = snd_register_device(devtype, pcm->card, pcm->device,
                                          &snd_pcm_f_ops[cidx], pcm,
                                          pcm->streams[cidx].dev);
                if (err < 0) {
                        list_del_init(&pcm->list);
                        return err;
                }

                for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
                        snd_pcm_timer_init(substream);
        }

        pcm_call_notify(pcm, n_register);
        return err;
}

static int snd_pcm_dev_disconnect(struct snd_device *device)
{
        struct snd_pcm *pcm = device->device_data;
        struct snd_pcm_substream *substream;
        int cidx;

        guard(mutex)(&register_mutex);
        guard(mutex)(&pcm->open_mutex);
        wake_up(&pcm->open_wait);
        list_del_init(&pcm->list);

        for_each_pcm_substream(pcm, cidx, substream) {
                snd_pcm_stream_lock_irq(substream);
                if (substream->runtime) {
                        if (snd_pcm_running(substream))
                                snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED);
                        /* to be sure, set the state unconditionally */
                        __snd_pcm_set_state(substream->runtime,
                                            SNDRV_PCM_STATE_DISCONNECTED);
                        wake_up(&substream->runtime->sleep);
                        wake_up(&substream->runtime->tsleep);
                }
                snd_pcm_stream_unlock_irq(substream);
        }

        for_each_pcm_substream(pcm, cidx, substream)
                snd_pcm_sync_stop(substream, false);

        pcm_call_notify(pcm, n_disconnect);
        for (cidx = 0; cidx < 2; cidx++) {
                if (pcm->streams[cidx].dev)
                        snd_unregister_device(pcm->streams[cidx].dev);
                free_chmap(&pcm->streams[cidx]);
        }
        return 0;
}

#if IS_ENABLED(CONFIG_SND_PCM_OSS)
/**
 * snd_pcm_notify - Add/remove the notify list
 * @notify: PCM notify list
 * @nfree: 0 = register, 1 = unregister
 *
 * This adds the given notifier to the global list so that the callback is
 * called for each registered PCM devices.  This exists only for PCM OSS
 * emulation, so far.
 *
 * Return: zero if successful, or a negative error code
 */
int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree)
{
        struct snd_pcm *pcm;

        if (snd_BUG_ON(!notify ||
                       !notify->n_register ||
                       !notify->n_unregister ||
                       !notify->n_disconnect))
                return -EINVAL;
        guard(mutex)(&register_mutex);
        if (nfree) {
                list_del(&notify->list);
                list_for_each_entry(pcm, &snd_pcm_devices, list)
                        notify->n_unregister(pcm);
        } else {
                list_add_tail(&notify->list, &snd_pcm_notify_list);
                list_for_each_entry(pcm, &snd_pcm_devices, list)
                        notify->n_register(pcm);
        }
        return 0;
}
EXPORT_SYMBOL(snd_pcm_notify);
#endif /* CONFIG_SND_PCM_OSS */

#ifdef CONFIG_SND_PROC_FS
/*
 *  Info interface
 */

static void snd_pcm_proc_read(struct snd_info_entry *entry,
                              struct snd_info_buffer *buffer)
{
        struct snd_pcm *pcm;

        guard(mutex)(&register_mutex);
        list_for_each_entry(pcm, &snd_pcm_devices, list) {
                snd_iprintf(buffer, "%02i-%02i: %s : %s",
                            pcm->card->number, pcm->device, pcm->id, pcm->name);
                if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream)
                        snd_iprintf(buffer, " : playback %i",
                                    pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream_count);
                if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream)
                        snd_iprintf(buffer, " : capture %i",
                                    pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream_count);
                snd_iprintf(buffer, "\n");
        }
}

static struct snd_info_entry *snd_pcm_proc_entry;

static void snd_pcm_proc_init(void)
{
        struct snd_info_entry *entry;

        entry = snd_info_create_module_entry(THIS_MODULE, "pcm", NULL);
        if (entry) {
                snd_info_set_text_ops(entry, NULL, snd_pcm_proc_read);
                if (snd_info_register(entry) < 0) {
                        snd_info_free_entry(entry);
                        entry = NULL;
                }
        }
        snd_pcm_proc_entry = entry;
}

static void snd_pcm_proc_done(void)
{
        snd_info_free_entry(snd_pcm_proc_entry);
}

#else /* !CONFIG_SND_PROC_FS */
#define snd_pcm_proc_init()
#define snd_pcm_proc_done()
#endif /* CONFIG_SND_PROC_FS */


/*
 *  ENTRY functions
 */

static int __init alsa_pcm_init(void)
{
        snd_ctl_register_ioctl(snd_pcm_control_ioctl);
        snd_ctl_register_ioctl_compat(snd_pcm_control_ioctl);
        snd_pcm_proc_init();
        return 0;
}

static void __exit alsa_pcm_exit(void)
{
        snd_ctl_unregister_ioctl(snd_pcm_control_ioctl);
        snd_ctl_unregister_ioctl_compat(snd_pcm_control_ioctl);
        snd_pcm_proc_done();
}

module_init(alsa_pcm_init)
module_exit(alsa_pcm_exit)























































































  243 





















































































































































































































































  242 

  241 

  241 


  242 














































  242 
  242 























































































  242 


  242 
























































































































































































































































































































































































































































































































































































































































































































































































































































  242 
  242 

  241 

    6 









    6 

































































  243 

  243 
  243 

  243 
  242 

  243 















  241 
  242 
  242 



























  242 


  242 
  241 


  242 
  235 










  242 




  242 
  242 
  242 
  235 

   14 




   14 


  242 








  242 





  240 




  242 


  242 










  242 








  242 























  242 
  242 

  242 

  242 

  242 



  240 


  242 










  239 









































































































































































































































































































































































































































































































































































































   16 





















































































































































































































































































































































































































































   14 


   14 






   14 




   14 







   14 
































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * NETLINK      Kernel-user communication protocol.
 *
 *                 Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
 *                                 Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 *                                 Patrick McHardy <kaber@trash.net>
 *
 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
 *                               added netlink_proto_exit
 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
 *                                  use nlk_sk, as sk->protinfo is on a diet 8)
 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
 *                                  - inc module use count of module that owns
 *                                    the kernel socket in case userspace opens
 *                                    socket of same protocol
 *                                  - remove all module support, since netlink is
 *                                    mandatory if CONFIG_NET=y these days
 */

#include <linux/module.h>

#include <linux/bpf.h>
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/filter.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/fcntl.h>
#include <linux/termios.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/security.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/random.h>
#include <linux/bitops.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/audit.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/if_arp.h>
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
#include <linux/btf_ids.h>

#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
#define CREATE_TRACE_POINTS
#include <trace/events/netlink.h>

#include "af_netlink.h"

struct listeners {
        struct rcu_head                rcu;
        unsigned long                masks[];
};

/* state bits */
#define NETLINK_S_CONGESTED                0x0

static inline int netlink_is_kernel(struct sock *sk)
{
        return nlk_test_bit(KERNEL_SOCKET, sk);
}

struct netlink_table *nl_table __read_mostly;
EXPORT_SYMBOL_GPL(nl_table);

static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);

static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];

static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
        "nlk_cb_mutex-ROUTE",
        "nlk_cb_mutex-1",
        "nlk_cb_mutex-USERSOCK",
        "nlk_cb_mutex-FIREWALL",
        "nlk_cb_mutex-SOCK_DIAG",
        "nlk_cb_mutex-NFLOG",
        "nlk_cb_mutex-XFRM",
        "nlk_cb_mutex-SELINUX",
        "nlk_cb_mutex-ISCSI",
        "nlk_cb_mutex-AUDIT",
        "nlk_cb_mutex-FIB_LOOKUP",
        "nlk_cb_mutex-CONNECTOR",
        "nlk_cb_mutex-NETFILTER",
        "nlk_cb_mutex-IP6_FW",
        "nlk_cb_mutex-DNRTMSG",
        "nlk_cb_mutex-KOBJECT_UEVENT",
        "nlk_cb_mutex-GENERIC",
        "nlk_cb_mutex-17",
        "nlk_cb_mutex-SCSITRANSPORT",
        "nlk_cb_mutex-ECRYPTFS",
        "nlk_cb_mutex-RDMA",
        "nlk_cb_mutex-CRYPTO",
        "nlk_cb_mutex-SMC",
        "nlk_cb_mutex-23",
        "nlk_cb_mutex-24",
        "nlk_cb_mutex-25",
        "nlk_cb_mutex-26",
        "nlk_cb_mutex-27",
        "nlk_cb_mutex-28",
        "nlk_cb_mutex-29",
        "nlk_cb_mutex-30",
        "nlk_cb_mutex-31",
        "nlk_cb_mutex-MAX_LINKS"
};

static int netlink_dump(struct sock *sk, bool lock_taken);

/* nl_table locking explained:
 * Lookup and traversal are protected with an RCU read-side lock. Insertion
 * and removal are protected with per bucket lock while using RCU list
 * modification primitives and may run in parallel to RCU protected lookups.
 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
 * been acquired * either during or after the socket has been removed from
 * the list and after an RCU grace period.
 */
DEFINE_RWLOCK(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);

#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));

static BLOCKING_NOTIFIER_HEAD(netlink_chain);


static const struct rhashtable_params netlink_rhashtable_params;

void do_trace_netlink_extack(const char *msg)
{
        trace_netlink_extack(msg);
}
EXPORT_SYMBOL(do_trace_netlink_extack);

static inline u32 netlink_group_mask(u32 group)
{
        if (group > 32)
                return 0;
        return group ? 1 << (group - 1) : 0;
}

static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
                                           gfp_t gfp_mask)
{
        unsigned int len = skb->len;
        struct sk_buff *new;

        new = alloc_skb(len, gfp_mask);
        if (new == NULL)
                return NULL;

        NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
        NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
        NETLINK_CB(new).creds = NETLINK_CB(skb).creds;

        skb_put_data(new, skb->data, len);
        return new;
}

static unsigned int netlink_tap_net_id;

struct netlink_tap_net {
        struct list_head netlink_tap_all;
        struct mutex netlink_tap_lock;
};

int netlink_add_tap(struct netlink_tap *nt)
{
        struct net *net = dev_net(nt->dev);
        struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);

        if (unlikely(nt->dev->type != ARPHRD_NETLINK))
                return -EINVAL;

        mutex_lock(&nn->netlink_tap_lock);
        list_add_rcu(&nt->list, &nn->netlink_tap_all);
        mutex_unlock(&nn->netlink_tap_lock);

        __module_get(nt->module);

        return 0;
}
EXPORT_SYMBOL_GPL(netlink_add_tap);

static int __netlink_remove_tap(struct netlink_tap *nt)
{
        struct net *net = dev_net(nt->dev);
        struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);
        bool found = false;
        struct netlink_tap *tmp;

        mutex_lock(&nn->netlink_tap_lock);

        list_for_each_entry(tmp, &nn->netlink_tap_all, list) {
                if (nt == tmp) {
                        list_del_rcu(&nt->list);
                        found = true;
                        goto out;
                }
        }

        pr_warn("__netlink_remove_tap: %p not found\n", nt);
out:
        mutex_unlock(&nn->netlink_tap_lock);

        if (found)
                module_put(nt->module);

        return found ? 0 : -ENODEV;
}

int netlink_remove_tap(struct netlink_tap *nt)
{
        int ret;

        ret = __netlink_remove_tap(nt);
        synchronize_net();

        return ret;
}
EXPORT_SYMBOL_GPL(netlink_remove_tap);

static __net_init int netlink_tap_init_net(struct net *net)
{
        struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);

        INIT_LIST_HEAD(&nn->netlink_tap_all);
        mutex_init(&nn->netlink_tap_lock);
        return 0;
}

static struct pernet_operations netlink_tap_net_ops = {
        .init = netlink_tap_init_net,
        .id   = &netlink_tap_net_id,
        .size = sizeof(struct netlink_tap_net),
};

static bool netlink_filter_tap(const struct sk_buff *skb)
{
        struct sock *sk = skb->sk;

        /* We take the more conservative approach and
         * whitelist socket protocols that may pass.
         */
        switch (sk->sk_protocol) {
        case NETLINK_ROUTE:
        case NETLINK_USERSOCK:
        case NETLINK_SOCK_DIAG:
        case NETLINK_NFLOG:
        case NETLINK_XFRM:
        case NETLINK_FIB_LOOKUP:
        case NETLINK_NETFILTER:
        case NETLINK_GENERIC:
                return true;
        }

        return false;
}

static int __netlink_deliver_tap_skb(struct sk_buff *skb,
                                     struct net_device *dev)
{
        struct sk_buff *nskb;
        struct sock *sk = skb->sk;
        int ret = -ENOMEM;

        if (!net_eq(dev_net(dev), sock_net(sk)))
                return 0;

        dev_hold(dev);

        if (is_vmalloc_addr(skb->head))
                nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
        else
                nskb = skb_clone(skb, GFP_ATOMIC);
        if (nskb) {
                nskb->dev = dev;
                nskb->protocol = htons((u16) sk->sk_protocol);
                nskb->pkt_type = netlink_is_kernel(sk) ?
                                 PACKET_KERNEL : PACKET_USER;
                skb_reset_network_header(nskb);
                ret = dev_queue_xmit(nskb);
                if (unlikely(ret > 0))
                        ret = net_xmit_errno(ret);
        }

        dev_put(dev);
        return ret;
}

static void __netlink_deliver_tap(struct sk_buff *skb, struct netlink_tap_net *nn)
{
        int ret;
        struct netlink_tap *tmp;

        if (!netlink_filter_tap(skb))
                return;

        list_for_each_entry_rcu(tmp, &nn->netlink_tap_all, list) {
                ret = __netlink_deliver_tap_skb(skb, tmp->dev);
                if (unlikely(ret))
                        break;
        }
}

static void netlink_deliver_tap(struct net *net, struct sk_buff *skb)
{
        struct netlink_tap_net *nn = net_generic(net, netlink_tap_net_id);

        rcu_read_lock();

        if (unlikely(!list_empty(&nn->netlink_tap_all)))
                __netlink_deliver_tap(skb, nn);

        rcu_read_unlock();
}

static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
                                       struct sk_buff *skb)
{
        if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
                netlink_deliver_tap(sock_net(dst), skb);
}

static void netlink_overrun(struct sock *sk)
{
        if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
                if (!test_and_set_bit(NETLINK_S_CONGESTED,
                                      &nlk_sk(sk)->state)) {
                        WRITE_ONCE(sk->sk_err, ENOBUFS);
                        sk_error_report(sk);
                }
        }
        atomic_inc(&sk->sk_drops);
}

static void netlink_rcv_wake(struct sock *sk)
{
        struct netlink_sock *nlk = nlk_sk(sk);

        if (skb_queue_empty_lockless(&sk->sk_receive_queue))
                clear_bit(NETLINK_S_CONGESTED, &nlk->state);
        if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
                wake_up_interruptible(&nlk->wait);
}

static void netlink_skb_destructor(struct sk_buff *skb)
{
        if (is_vmalloc_addr(skb->head)) {
                if (!skb->cloned ||
                    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
                        vfree_atomic(skb->head);

                skb->head = NULL;
        }
        if (skb->sk != NULL)
                sock_rfree(skb);
}

static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
        WARN_ON(skb->sk != NULL);
        skb->sk = sk;
        skb->destructor = netlink_skb_destructor;
        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
        sk_mem_charge(sk, skb->truesize);
}

static void netlink_sock_destruct(struct sock *sk)
{
        struct netlink_sock *nlk = nlk_sk(sk);

        if (nlk->cb_running) {
                if (nlk->cb.done)
                        nlk->cb.done(&nlk->cb);
                module_put(nlk->cb.module);
                kfree_skb(nlk->cb.skb);
        }

        skb_queue_purge(&sk->sk_receive_queue);

        if (!sock_flag(sk, SOCK_DEAD)) {
                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
                return;
        }

        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
        WARN_ON(nlk_sk(sk)->groups);
}

static void netlink_sock_destruct_work(struct work_struct *work)
{
        struct netlink_sock *nlk = container_of(work, struct netlink_sock,
                                                work);

        sk_free(&nlk->sk);
}

/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 * this, _but_ remember, it adds useless work on UP machines.
 */

void netlink_table_grab(void)
        __acquires(nl_table_lock)
{
        might_sleep();

        write_lock_irq(&nl_table_lock);

        if (atomic_read(&nl_table_users)) {
                DECLARE_WAITQUEUE(wait, current);

                add_wait_queue_exclusive(&nl_table_wait, &wait);
                for (;;) {
                        set_current_state(TASK_UNINTERRUPTIBLE);
                        if (atomic_read(&nl_table_users) == 0)
                                break;
                        write_unlock_irq(&nl_table_lock);
                        schedule();
                        write_lock_irq(&nl_table_lock);
                }

                __set_current_state(TASK_RUNNING);
                remove_wait_queue(&nl_table_wait, &wait);
        }
}

void netlink_table_ungrab(void)
        __releases(nl_table_lock)
{
        write_unlock_irq(&nl_table_lock);
        wake_up(&nl_table_wait);
}

static inline void
netlink_lock_table(void)
{
        unsigned long flags;

        /* read_lock() synchronizes us to netlink_table_grab */

        read_lock_irqsave(&nl_table_lock, flags);
        atomic_inc(&nl_table_users);
        read_unlock_irqrestore(&nl_table_lock, flags);
}

static inline void
netlink_unlock_table(void)
{
        if (atomic_dec_and_test(&nl_table_users))
                wake_up(&nl_table_wait);
}

struct netlink_compare_arg
{
        possible_net_t pnet;
        u32 portid;
};

/* Doing sizeof directly may yield 4 extra bytes on 64-bit. */
#define netlink_compare_arg_len \
        (offsetof(struct netlink_compare_arg, portid) + sizeof(u32))

static inline int netlink_compare(struct rhashtable_compare_arg *arg,
                                  const void *ptr)
{
        const struct netlink_compare_arg *x = arg->key;
        const struct netlink_sock *nlk = ptr;

        return nlk->portid != x->portid ||
               !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
}

static void netlink_compare_arg_init(struct netlink_compare_arg *arg,
                                     struct net *net, u32 portid)
{
        memset(arg, 0, sizeof(*arg));
        write_pnet(&arg->pnet, net);
        arg->portid = portid;
}

static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
                                     struct net *net)
{
        struct netlink_compare_arg arg;

        netlink_compare_arg_init(&arg, net, portid);
        return rhashtable_lookup_fast(&table->hash, &arg,
                                      netlink_rhashtable_params);
}

static int __netlink_insert(struct netlink_table *table, struct sock *sk)
{
        struct netlink_compare_arg arg;

        netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
        return rhashtable_lookup_insert_key(&table->hash, &arg,
                                            &nlk_sk(sk)->node,
                                            netlink_rhashtable_params);
}

static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
        struct netlink_table *table = &nl_table[protocol];
        struct sock *sk;

        rcu_read_lock();
        sk = __netlink_lookup(table, portid, net);
        if (sk)
                sock_hold(sk);
        rcu_read_unlock();

        return sk;
}

static const struct proto_ops netlink_ops;

static void
netlink_update_listeners(struct sock *sk)
{
        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
        unsigned long mask;
        unsigned int i;
        struct listeners *listeners;

        listeners = nl_deref_protected(tbl->listeners);
        if (!listeners)
                return;

        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
                mask = 0;
                sk_for_each_bound(sk, &tbl->mc_list) {
                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
                                mask |= nlk_sk(sk)->groups[i];
                }
                listeners->masks[i] = mask;
        }
        /* this function is only called with the netlink table "grabbed", which
         * makes sure updates are visible before bind or setsockopt return. */
}

static int netlink_insert(struct sock *sk, u32 portid)
{
        struct netlink_table *table = &nl_table[sk->sk_protocol];
        int err;

        lock_sock(sk);

        err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
        if (nlk_sk(sk)->bound)
                goto err;

        /* portid can be read locklessly from netlink_getname(). */
        WRITE_ONCE(nlk_sk(sk)->portid, portid);

        sock_hold(sk);

        err = __netlink_insert(table, sk);
        if (err) {
                /* In case the hashtable backend returns with -EBUSY
                 * from here, it must not escape to the caller.
                 */
                if (unlikely(err == -EBUSY))
                        err = -EOVERFLOW;
                if (err == -EEXIST)
                        err = -EADDRINUSE;
                sock_put(sk);
                goto err;
        }

        /* We need to ensure that the socket is hashed and visible. */
        smp_wmb();
        /* Paired with lockless reads from netlink_bind(),
         * netlink_connect() and netlink_sendmsg().
         */
        WRITE_ONCE(nlk_sk(sk)->bound, portid);

err:
        release_sock(sk);
        return err;
}

static void netlink_remove(struct sock *sk)
{
        struct netlink_table *table;

        table = &nl_table[sk->sk_protocol];
        if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
                                    netlink_rhashtable_params)) {
                WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                __sock_put(sk);
        }

        netlink_table_grab();
        if (nlk_sk(sk)->subscriptions) {
                __sk_del_bind_node(sk);
                netlink_update_listeners(sk);
        }
        if (sk->sk_protocol == NETLINK_GENERIC)
                atomic_inc(&genl_sk_destructing_cnt);
        netlink_table_ungrab();
}

static struct proto netlink_proto = {
        .name          = "NETLINK",
        .owner          = THIS_MODULE,
        .obj_size = sizeof(struct netlink_sock),
};

static int __netlink_create(struct net *net, struct socket *sock,
                            struct mutex *dump_cb_mutex, int protocol,
                            int kern)
{
        struct sock *sk;
        struct netlink_sock *nlk;

        sock->ops = &netlink_ops;

        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
        if (!sk)
                return -ENOMEM;

        sock_init_data(sock, sk);

        nlk = nlk_sk(sk);
        mutex_init(&nlk->nl_cb_mutex);
        lockdep_set_class_and_name(&nlk->nl_cb_mutex,
                                           nlk_cb_mutex_keys + protocol,
                                           nlk_cb_mutex_key_strings[protocol]);
        nlk->dump_cb_mutex = dump_cb_mutex;
        init_waitqueue_head(&nlk->wait);

        sk->sk_destruct = netlink_sock_destruct;
        sk->sk_protocol = protocol;
        return 0;
}

static int netlink_create(struct net *net, struct socket *sock, int protocol,
                          int kern)
{
        struct module *module = NULL;
        struct mutex *cb_mutex;
        struct netlink_sock *nlk;
        int (*bind)(struct net *net, int group);
        void (*unbind)(struct net *net, int group);
        void (*release)(struct sock *sock, unsigned long *groups);
        int err = 0;

        sock->state = SS_UNCONNECTED;

        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
                return -ESOCKTNOSUPPORT;

        if (protocol < 0 || protocol >= MAX_LINKS)
                return -EPROTONOSUPPORT;
        protocol = array_index_nospec(protocol, MAX_LINKS);

        netlink_lock_table();
#ifdef CONFIG_MODULES
        if (!nl_table[protocol].registered) {
                netlink_unlock_table();
                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
                netlink_lock_table();
        }
#endif
        if (nl_table[protocol].registered &&
            try_module_get(nl_table[protocol].module))
                module = nl_table[protocol].module;
        else
                err = -EPROTONOSUPPORT;
        cb_mutex = nl_table[protocol].cb_mutex;
        bind = nl_table[protocol].bind;
        unbind = nl_table[protocol].unbind;
        release = nl_table[protocol].release;
        netlink_unlock_table();

        if (err < 0)
                goto out;

        err = __netlink_create(net, sock, cb_mutex, protocol, kern);
        if (err < 0)
                goto out_module;

        sock_prot_inuse_add(net, &netlink_proto, 1);

        nlk = nlk_sk(sock->sk);
        nlk->module = module;
        nlk->netlink_bind = bind;
        nlk->netlink_unbind = unbind;
        nlk->netlink_release = release;
out:
        return err;

out_module:
        module_put(module);
        goto out;
}

static void deferred_put_nlk_sk(struct rcu_head *head)
{
        struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
        struct sock *sk = &nlk->sk;

        kfree(nlk->groups);
        nlk->groups = NULL;

        if (!refcount_dec_and_test(&sk->sk_refcnt))
                return;

        if (nlk->cb_running && nlk->cb.done) {
                INIT_WORK(&nlk->work, netlink_sock_destruct_work);
                schedule_work(&nlk->work);
                return;
        }

        sk_free(sk);
}

static int netlink_release(struct socket *sock)
{
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk;

        if (!sk)
                return 0;

        netlink_remove(sk);
        sock_orphan(sk);
        nlk = nlk_sk(sk);

        /*
         * OK. Socket is unlinked, any packets that arrive now
         * will be purged.
         */
        if (nlk->netlink_release)
                nlk->netlink_release(sk, nlk->groups);

        /* must not acquire netlink_table_lock in any way again before unbind
         * and notifying genetlink is done as otherwise it might deadlock
         */
        if (nlk->netlink_unbind) {
                int i;

                for (i = 0; i < nlk->ngroups; i++)
                        if (test_bit(i, nlk->groups))
                                nlk->netlink_unbind(sock_net(sk), i + 1);
        }
        if (sk->sk_protocol == NETLINK_GENERIC &&
            atomic_dec_return(&genl_sk_destructing_cnt) == 0)
                wake_up(&genl_sk_destructing_waitq);

        sock->sk = NULL;
        wake_up_interruptible_all(&nlk->wait);

        skb_queue_purge(&sk->sk_write_queue);

        if (nlk->portid && nlk->bound) {
                struct netlink_notify n = {
                                                .net = sock_net(sk),
                                                .protocol = sk->sk_protocol,
                                                .portid = nlk->portid,
                                          };
                blocking_notifier_call_chain(&netlink_chain,
                                NETLINK_URELEASE, &n);
        }

        module_put(nlk->module);

        if (netlink_is_kernel(sk)) {
                netlink_table_grab();
                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
                if (--nl_table[sk->sk_protocol].registered == 0) {
                        struct listeners *old;

                        old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
                        RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
                        kfree_rcu(old, rcu);
                        nl_table[sk->sk_protocol].module = NULL;
                        nl_table[sk->sk_protocol].bind = NULL;
                        nl_table[sk->sk_protocol].unbind = NULL;
                        nl_table[sk->sk_protocol].flags = 0;
                        nl_table[sk->sk_protocol].registered = 0;
                }
                netlink_table_ungrab();
        }

        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);

        /* Because struct net might disappear soon, do not keep a pointer. */
        if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
                __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
                /* Because of deferred_put_nlk_sk and use of work queue,
                 * it is possible  netns will be freed before this socket.
                 */
                sock_net_set(sk, &init_net);
                __netns_tracker_alloc(&init_net, &sk->ns_tracker,
                                      false, GFP_KERNEL);
        }
        call_rcu(&nlk->rcu, deferred_put_nlk_sk);
        return 0;
}

static int netlink_autobind(struct socket *sock)
{
        struct sock *sk = sock->sk;
        struct net *net = sock_net(sk);
        struct netlink_table *table = &nl_table[sk->sk_protocol];
        s32 portid = task_tgid_vnr(current);
        int err;
        s32 rover = -4096;
        bool ok;

retry:
        cond_resched();
        rcu_read_lock();
        ok = !__netlink_lookup(table, portid, net);
        rcu_read_unlock();
        if (!ok) {
                /* Bind collision, search negative portid values. */
                if (rover == -4096)
                        /* rover will be in range [S32_MIN, -4097] */
                        rover = S32_MIN + get_random_u32_below(-4096 - S32_MIN);
                else if (rover >= -4096)
                        rover = -4097;
                portid = rover--;
                goto retry;
        }

        err = netlink_insert(sk, portid);
        if (err == -EADDRINUSE)
                goto retry;

        /* If 2 threads race to autobind, that is fine.  */
        if (err == -EBUSY)
                err = 0;

        return err;
}

/**
 * __netlink_ns_capable - General netlink message capability test
 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
 * @user_ns: The user namespace of the capability to use
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket we received the message
 * from had when the netlink socket was created and the sender of the
 * message has the capability @cap in the user namespace @user_ns.
 */
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
                        struct user_namespace *user_ns, int cap)
{
        return ((nsp->flags & NETLINK_SKB_DST) ||
                file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
                ns_capable(user_ns, cap);
}
EXPORT_SYMBOL(__netlink_ns_capable);

/**
 * netlink_ns_capable - General netlink message capability test
 * @skb: socket buffer holding a netlink command from userspace
 * @user_ns: The user namespace of the capability to use
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket we received the message
 * from had when the netlink socket was created and the sender of the
 * message has the capability @cap in the user namespace @user_ns.
 */
bool netlink_ns_capable(const struct sk_buff *skb,
                        struct user_namespace *user_ns, int cap)
{
        return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
}
EXPORT_SYMBOL(netlink_ns_capable);

/**
 * netlink_capable - Netlink global message capability test
 * @skb: socket buffer holding a netlink command from userspace
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket we received the message
 * from had when the netlink socket was created and the sender of the
 * message has the capability @cap in all user namespaces.
 */
bool netlink_capable(const struct sk_buff *skb, int cap)
{
        return netlink_ns_capable(skb, &init_user_ns, cap);
}
EXPORT_SYMBOL(netlink_capable);

/**
 * netlink_net_capable - Netlink network namespace message capability test
 * @skb: socket buffer holding a netlink command from userspace
 * @cap: The capability to use
 *
 * Test to see if the opener of the socket we received the message
 * from had when the netlink socket was created and the sender of the
 * message has the capability @cap over the network namespace of
 * the socket we received the message from.
 */
bool netlink_net_capable(const struct sk_buff *skb, int cap)
{
        return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
}
EXPORT_SYMBOL(netlink_net_capable);

static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
{
        return (nl_table[sock->sk->sk_protocol].flags & flag) ||
                ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
}

static void
netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
{
        struct netlink_sock *nlk = nlk_sk(sk);

        if (nlk->subscriptions && !subscriptions)
                __sk_del_bind_node(sk);
        else if (!nlk->subscriptions && subscriptions)
                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
        nlk->subscriptions = subscriptions;
}

static int netlink_realloc_groups(struct sock *sk)
{
        struct netlink_sock *nlk = nlk_sk(sk);
        unsigned int groups;
        unsigned long *new_groups;
        int err = 0;

        netlink_table_grab();

        groups = nl_table[sk->sk_protocol].groups;
        if (!nl_table[sk->sk_protocol].registered) {
                err = -ENOENT;
                goto out_unlock;
        }

        if (nlk->ngroups >= groups)
                goto out_unlock;

        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
        if (new_groups == NULL) {
                err = -ENOMEM;
                goto out_unlock;
        }
        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));

        nlk->groups = new_groups;
        nlk->ngroups = groups;
 out_unlock:
        netlink_table_ungrab();
        return err;
}

static void netlink_undo_bind(int group, long unsigned int groups,
                              struct sock *sk)
{
        struct netlink_sock *nlk = nlk_sk(sk);
        int undo;

        if (!nlk->netlink_unbind)
                return;

        for (undo = 0; undo < group; undo++)
                if (test_bit(undo, &groups))
                        nlk->netlink_unbind(sock_net(sk), undo + 1);
}

static int netlink_bind(struct socket *sock, struct sockaddr *addr,
                        int addr_len)
{
        struct sock *sk = sock->sk;
        struct net *net = sock_net(sk);
        struct netlink_sock *nlk = nlk_sk(sk);
        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
        int err = 0;
        unsigned long groups;
        bool bound;

        if (addr_len < sizeof(struct sockaddr_nl))
                return -EINVAL;

        if (nladdr->nl_family != AF_NETLINK)
                return -EINVAL;
        groups = nladdr->nl_groups;

        /* Only superuser is allowed to listen multicasts */
        if (groups) {
                if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
                        return -EPERM;
                err = netlink_realloc_groups(sk);
                if (err)
                        return err;
        }

        if (nlk->ngroups < BITS_PER_LONG)
                groups &= (1UL << nlk->ngroups) - 1;

        /* Paired with WRITE_ONCE() in netlink_insert() */
        bound = READ_ONCE(nlk->bound);
        if (bound) {
                /* Ensure nlk->portid is up-to-date. */
                smp_rmb();

                if (nladdr->nl_pid != nlk->portid)
                        return -EINVAL;
        }

        if (nlk->netlink_bind && groups) {
                int group;

                /* nl_groups is a u32, so cap the maximum groups we can bind */
                for (group = 0; group < BITS_PER_TYPE(u32); group++) {
                        if (!test_bit(group, &groups))
                                continue;
                        err = nlk->netlink_bind(net, group + 1);
                        if (!err)
                                continue;
                        netlink_undo_bind(group, groups, sk);
                        return err;
                }
        }

        /* No need for barriers here as we return to user-space without
         * using any of the bound attributes.
         */
        netlink_lock_table();
        if (!bound) {
                err = nladdr->nl_pid ?
                        netlink_insert(sk, nladdr->nl_pid) :
                        netlink_autobind(sock);
                if (err) {
                        netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk);
                        goto unlock;
                }
        }

        if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
                goto unlock;
        netlink_unlock_table();

        netlink_table_grab();
        netlink_update_subscriptions(sk, nlk->subscriptions +
                                         hweight32(groups) -
                                         hweight32(nlk->groups[0]));
        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
        netlink_update_listeners(sk);
        netlink_table_ungrab();

        return 0;

unlock:
        netlink_unlock_table();
        return err;
}

static int netlink_connect(struct socket *sock, struct sockaddr *addr,
                           int alen, int flags)
{
        int err = 0;
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;

        if (alen < sizeof(addr->sa_family))
                return -EINVAL;

        if (addr->sa_family == AF_UNSPEC) {
                /* paired with READ_ONCE() in netlink_getsockbyportid() */
                WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
                /* dst_portid and dst_group can be read locklessly */
                WRITE_ONCE(nlk->dst_portid, 0);
                WRITE_ONCE(nlk->dst_group, 0);
                return 0;
        }
        if (addr->sa_family != AF_NETLINK)
                return -EINVAL;

        if (alen < sizeof(struct sockaddr_nl))
                return -EINVAL;

        if ((nladdr->nl_groups || nladdr->nl_pid) &&
            !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
                return -EPERM;

        /* No need for barriers here as we return to user-space without
         * using any of the bound attributes.
         * Paired with WRITE_ONCE() in netlink_insert().
         */
        if (!READ_ONCE(nlk->bound))
                err = netlink_autobind(sock);

        if (err == 0) {
                /* paired with READ_ONCE() in netlink_getsockbyportid() */
                WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
                /* dst_portid and dst_group can be read locklessly */
                WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
                WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
        }

        return err;
}

static int netlink_getname(struct socket *sock, struct sockaddr *addr,
                           int peer)
{
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);

        nladdr->nl_family = AF_NETLINK;
        nladdr->nl_pad = 0;

        if (peer) {
                /* Paired with WRITE_ONCE() in netlink_connect() */
                nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
                nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
        } else {
                /* Paired with WRITE_ONCE() in netlink_insert() */
                nladdr->nl_pid = READ_ONCE(nlk->portid);
                netlink_lock_table();
                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
                netlink_unlock_table();
        }
        return sizeof(*nladdr);
}

static int netlink_ioctl(struct socket *sock, unsigned int cmd,
                         unsigned long arg)
{
        /* try to hand this ioctl down to the NIC drivers.
         */
        return -ENOIOCTLCMD;
}

static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
{
        struct sock *sock;
        struct netlink_sock *nlk;

        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
        if (!sock)
                return ERR_PTR(-ECONNREFUSED);

        /* Don't bother queuing skb if kernel socket has no input function */
        nlk = nlk_sk(sock);
        /* dst_portid and sk_state can be changed in netlink_connect() */
        if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
            READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
                sock_put(sock);
                return ERR_PTR(-ECONNREFUSED);
        }
        return sock;
}

struct sock *netlink_getsockbyfilp(struct file *filp)
{
        struct inode *inode = file_inode(filp);
        struct sock *sock;

        if (!S_ISSOCK(inode->i_mode))
                return ERR_PTR(-ENOTSOCK);

        sock = SOCKET_I(inode)->sk;
        if (sock->sk_family != AF_NETLINK)
                return ERR_PTR(-EINVAL);

        sock_hold(sock);
        return sock;
}

struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
        size_t head_size = SKB_HEAD_ALIGN(size);
        struct sk_buff *skb;
        void *data;

        if (head_size <= PAGE_SIZE || broadcast)
                return alloc_skb(size, GFP_KERNEL);

        data = kvmalloc(head_size, GFP_KERNEL);
        if (!data)
                return NULL;

        skb = __build_skb(data, head_size);
        if (!skb)
                kvfree(data);
        else if (is_vmalloc_addr(data))
                skb->destructor = netlink_skb_destructor;

        return skb;
}

/*
 * Attach a skb to a netlink socket.
 * The caller must hold a reference to the destination socket. On error, the
 * reference is dropped. The skb is not send to the destination, just all
 * all error checks are performed and memory in the queue is reserved.
 * Return values:
 * < 0: error. skb freed, reference to sock dropped.
 * 0: continue
 * 1: repeat lookup - reference dropped while waiting for socket memory.
 */
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
                      long *timeo, struct sock *ssk)
{
        struct netlink_sock *nlk;

        nlk = nlk_sk(sk);

        if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
             test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
                DECLARE_WAITQUEUE(wait, current);
                if (!*timeo) {
                        if (!ssk || netlink_is_kernel(ssk))
                                netlink_overrun(sk);
                        sock_put(sk);
                        kfree_skb(skb);
                        return -EAGAIN;
                }

                __set_current_state(TASK_INTERRUPTIBLE);
                add_wait_queue(&nlk->wait, &wait);

                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
                     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
                    !sock_flag(sk, SOCK_DEAD))
                        *timeo = schedule_timeout(*timeo);

                __set_current_state(TASK_RUNNING);
                remove_wait_queue(&nlk->wait, &wait);
                sock_put(sk);

                if (signal_pending(current)) {
                        kfree_skb(skb);
                        return sock_intr_errno(*timeo);
                }
                return 1;
        }
        netlink_skb_set_owner_r(skb, sk);
        return 0;
}

static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
        int len = skb->len;

        netlink_deliver_tap(sock_net(sk), skb);

        skb_queue_tail(&sk->sk_receive_queue, skb);
        sk->sk_data_ready(sk);
        return len;
}

int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
        int len = __netlink_sendskb(sk, skb);

        sock_put(sk);
        return len;
}

void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
{
        kfree_skb(skb);
        sock_put(sk);
}

static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
{
        int delta;

        WARN_ON(skb->sk != NULL);
        delta = skb->end - skb->tail;
        if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
                return skb;

        if (skb_shared(skb)) {
                struct sk_buff *nskb = skb_clone(skb, allocation);
                if (!nskb)
                        return skb;
                consume_skb(skb);
                skb = nskb;
        }

        pskb_expand_head(skb, 0, -delta,
                         (allocation & ~__GFP_DIRECT_RECLAIM) |
                         __GFP_NOWARN | __GFP_NORETRY);
        return skb;
}

static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
                                  struct sock *ssk)
{
        int ret;
        struct netlink_sock *nlk = nlk_sk(sk);

        ret = -ECONNREFUSED;
        if (nlk->netlink_rcv != NULL) {
                ret = skb->len;
                netlink_skb_set_owner_r(skb, sk);
                NETLINK_CB(skb).sk = ssk;
                netlink_deliver_tap_kernel(sk, ssk, skb);
                nlk->netlink_rcv(skb);
                consume_skb(skb);
        } else {
                kfree_skb(skb);
        }
        sock_put(sk);
        return ret;
}

int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
                    u32 portid, int nonblock)
{
        struct sock *sk;
        int err;
        long timeo;

        skb = netlink_trim(skb, gfp_any());

        timeo = sock_sndtimeo(ssk, nonblock);
retry:
        sk = netlink_getsockbyportid(ssk, portid);
        if (IS_ERR(sk)) {
                kfree_skb(skb);
                return PTR_ERR(sk);
        }
        if (netlink_is_kernel(sk))
                return netlink_unicast_kernel(sk, skb, ssk);

        if (sk_filter(sk, skb)) {
                err = skb->len;
                kfree_skb(skb);
                sock_put(sk);
                return err;
        }

        err = netlink_attachskb(sk, skb, &timeo, ssk);
        if (err == 1)
                goto retry;
        if (err)
                return err;

        return netlink_sendskb(sk, skb);
}
EXPORT_SYMBOL(netlink_unicast);

int netlink_has_listeners(struct sock *sk, unsigned int group)
{
        int res = 0;
        struct listeners *listeners;

        BUG_ON(!netlink_is_kernel(sk));

        rcu_read_lock();
        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);

        if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
                res = test_bit(group - 1, listeners->masks);

        rcu_read_unlock();

        return res;
}
EXPORT_SYMBOL_GPL(netlink_has_listeners);

bool netlink_strict_get_check(struct sk_buff *skb)
{
        return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
}
EXPORT_SYMBOL_GPL(netlink_strict_get_check);

static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
        struct netlink_sock *nlk = nlk_sk(sk);

        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
            !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
                netlink_skb_set_owner_r(skb, sk);
                __netlink_sendskb(sk, skb);
                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
        }
        return -1;
}

struct netlink_broadcast_data {
        struct sock *exclude_sk;
        struct net *net;
        u32 portid;
        u32 group;
        int failure;
        int delivery_failure;
        int congested;
        int delivered;
        gfp_t allocation;
        struct sk_buff *skb, *skb2;
        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
        void *tx_data;
};

static void do_one_broadcast(struct sock *sk,
                                    struct netlink_broadcast_data *p)
{
        struct netlink_sock *nlk = nlk_sk(sk);
        int val;

        if (p->exclude_sk == sk)
                return;

        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
            !test_bit(p->group - 1, nlk->groups))
                return;

        if (!net_eq(sock_net(sk), p->net)) {
                if (!nlk_test_bit(LISTEN_ALL_NSID, sk))
                        return;

                if (!peernet_has_id(sock_net(sk), p->net))
                        return;

                if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
                                     CAP_NET_BROADCAST))
                        return;
        }

        if (p->failure) {
                netlink_overrun(sk);
                return;
        }

        sock_hold(sk);
        if (p->skb2 == NULL) {
                if (skb_shared(p->skb)) {
                        p->skb2 = skb_clone(p->skb, p->allocation);
                } else {
                        p->skb2 = skb_get(p->skb);
                        /*
                         * skb ownership may have been set when
                         * delivered to a previous socket.
                         */
                        skb_orphan(p->skb2);
                }
        }
        if (p->skb2 == NULL) {
                netlink_overrun(sk);
                /* Clone failed. Notify ALL listeners. */
                p->failure = 1;
                if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
                        p->delivery_failure = 1;
                goto out;
        }

        if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
                kfree_skb(p->skb2);
                p->skb2 = NULL;
                goto out;
        }

        if (sk_filter(sk, p->skb2)) {
                kfree_skb(p->skb2);
                p->skb2 = NULL;
                goto out;
        }
        NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
        if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED)
                NETLINK_CB(p->skb2).nsid_is_set = true;
        val = netlink_broadcast_deliver(sk, p->skb2);
        if (val < 0) {
                netlink_overrun(sk);
                if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
                        p->delivery_failure = 1;
        } else {
                p->congested |= val;
                p->delivered = 1;
                p->skb2 = NULL;
        }
out:
        sock_put(sk);
}

int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
                               u32 portid,
                               u32 group, gfp_t allocation,
                               netlink_filter_fn filter,
                               void *filter_data)
{
        struct net *net = sock_net(ssk);
        struct netlink_broadcast_data info;
        struct sock *sk;

        skb = netlink_trim(skb, allocation);

        info.exclude_sk = ssk;
        info.net = net;
        info.portid = portid;
        info.group = group;
        info.failure = 0;
        info.delivery_failure = 0;
        info.congested = 0;
        info.delivered = 0;
        info.allocation = allocation;
        info.skb = skb;
        info.skb2 = NULL;
        info.tx_filter = filter;
        info.tx_data = filter_data;

        /* While we sleep in clone, do not allow to change socket list */

        netlink_lock_table();

        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
                do_one_broadcast(sk, &info);

        consume_skb(skb);

        netlink_unlock_table();

        if (info.delivery_failure) {
                kfree_skb(info.skb2);
                return -ENOBUFS;
        }
        consume_skb(info.skb2);

        if (info.delivered) {
                if (info.congested && gfpflags_allow_blocking(allocation))
                        yield();
                return 0;
        }
        return -ESRCH;
}
EXPORT_SYMBOL(netlink_broadcast_filtered);

int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
                      u32 group, gfp_t allocation)
{
        return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
                                          NULL, NULL);
}
EXPORT_SYMBOL(netlink_broadcast);

struct netlink_set_err_data {
        struct sock *exclude_sk;
        u32 portid;
        u32 group;
        int code;
};

static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
{
        struct netlink_sock *nlk = nlk_sk(sk);
        int ret = 0;

        if (sk == p->exclude_sk)
                goto out;

        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
                goto out;

        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
            !test_bit(p->group - 1, nlk->groups))
                goto out;

        if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
                ret = 1;
                goto out;
        }

        WRITE_ONCE(sk->sk_err, p->code);
        sk_error_report(sk);
out:
        return ret;
}

/**
 * netlink_set_err - report error to broadcast listeners
 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
 * @portid: the PORTID of a process that we want to skip (if any)
 * @group: the broadcast group that will notice the error
 * @code: error code, must be negative (as usual in kernelspace)
 *
 * This function returns the number of broadcast listeners that have set the
 * NETLINK_NO_ENOBUFS socket option.
 */
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
        struct netlink_set_err_data info;
        unsigned long flags;
        struct sock *sk;
        int ret = 0;

        info.exclude_sk = ssk;
        info.portid = portid;
        info.group = group;
        /* sk->sk_err wants a positive error value */
        info.code = -code;

        read_lock_irqsave(&nl_table_lock, flags);

        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
                ret += do_one_set_err(sk, &info);

        read_unlock_irqrestore(&nl_table_lock, flags);
        return ret;
}
EXPORT_SYMBOL(netlink_set_err);

/* must be called with netlink table grabbed */
static void netlink_update_socket_mc(struct netlink_sock *nlk,
                                     unsigned int group,
                                     int is_new)
{
        int old, new = !!is_new, subscriptions;

        old = test_bit(group - 1, nlk->groups);
        subscriptions = nlk->subscriptions - old + new;
        __assign_bit(group - 1, nlk->groups, new);
        netlink_update_subscriptions(&nlk->sk, subscriptions);
        netlink_update_listeners(&nlk->sk);
}

static int netlink_setsockopt(struct socket *sock, int level, int optname,
                              sockptr_t optval, unsigned int optlen)
{
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        unsigned int val = 0;
        int nr = -1;

        if (level != SOL_NETLINK)
                return -ENOPROTOOPT;

        if (optlen >= sizeof(int) &&
            copy_from_sockptr(&val, optval, sizeof(val)))
                return -EFAULT;

        switch (optname) {
        case NETLINK_PKTINFO:
                nr = NETLINK_F_RECV_PKTINFO;
                break;
        case NETLINK_ADD_MEMBERSHIP:
        case NETLINK_DROP_MEMBERSHIP: {
                int err;

                if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
                        return -EPERM;
                err = netlink_realloc_groups(sk);
                if (err)
                        return err;
                if (!val || val - 1 >= nlk->ngroups)
                        return -EINVAL;
                if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
                        err = nlk->netlink_bind(sock_net(sk), val);
                        if (err)
                                return err;
                }
                netlink_table_grab();
                netlink_update_socket_mc(nlk, val,
                                         optname == NETLINK_ADD_MEMBERSHIP);
                netlink_table_ungrab();
                if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
                        nlk->netlink_unbind(sock_net(sk), val);

                break;
        }
        case NETLINK_BROADCAST_ERROR:
                nr = NETLINK_F_BROADCAST_SEND_ERROR;
                break;
        case NETLINK_NO_ENOBUFS:
                assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val);
                if (val) {
                        clear_bit(NETLINK_S_CONGESTED, &nlk->state);
                        wake_up_interruptible(&nlk->wait);
                }
                break;
        case NETLINK_LISTEN_ALL_NSID:
                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
                        return -EPERM;
                nr = NETLINK_F_LISTEN_ALL_NSID;
                break;
        case NETLINK_CAP_ACK:
                nr = NETLINK_F_CAP_ACK;
                break;
        case NETLINK_EXT_ACK:
                nr = NETLINK_F_EXT_ACK;
                break;
        case NETLINK_GET_STRICT_CHK:
                nr = NETLINK_F_STRICT_CHK;
                break;
        default:
                return -ENOPROTOOPT;
        }
        if (nr >= 0)
                assign_bit(nr, &nlk->flags, val);
        return 0;
}

static int netlink_getsockopt(struct socket *sock, int level, int optname,
                              char __user *optval, int __user *optlen)
{
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        unsigned int flag;
        int len, val;

        if (level != SOL_NETLINK)
                return -ENOPROTOOPT;

        if (get_user(len, optlen))
                return -EFAULT;
        if (len < 0)
                return -EINVAL;

        switch (optname) {
        case NETLINK_PKTINFO:
                flag = NETLINK_F_RECV_PKTINFO;
                break;
        case NETLINK_BROADCAST_ERROR:
                flag = NETLINK_F_BROADCAST_SEND_ERROR;
                break;
        case NETLINK_NO_ENOBUFS:
                flag = NETLINK_F_RECV_NO_ENOBUFS;
                break;
        case NETLINK_LIST_MEMBERSHIPS: {
                int pos, idx, shift, err = 0;

                netlink_lock_table();
                for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
                        if (len - pos < sizeof(u32))
                                break;

                        idx = pos / sizeof(unsigned long);
                        shift = (pos % sizeof(unsigned long)) * 8;
                        if (put_user((u32)(nlk->groups[idx] >> shift),
                                     (u32 __user *)(optval + pos))) {
                                err = -EFAULT;
                                break;
                        }
                }
                if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen))
                        err = -EFAULT;
                netlink_unlock_table();
                return err;
        }
        case NETLINK_LISTEN_ALL_NSID:
                flag = NETLINK_F_LISTEN_ALL_NSID;
                break;
        case NETLINK_CAP_ACK:
                flag = NETLINK_F_CAP_ACK;
                break;
        case NETLINK_EXT_ACK:
                flag = NETLINK_F_EXT_ACK;
                break;
        case NETLINK_GET_STRICT_CHK:
                flag = NETLINK_F_STRICT_CHK;
                break;
        default:
                return -ENOPROTOOPT;
        }

        if (len < sizeof(int))
                return -EINVAL;

        len = sizeof(int);
        val = test_bit(flag, &nlk->flags);

        if (put_user(len, optlen) ||
            copy_to_user(optval, &val, len))
                return -EFAULT;

        return 0;
}

static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
        struct nl_pktinfo info;

        info.group = NETLINK_CB(skb).dst_group;
        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}

static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
                                         struct sk_buff *skb)
{
        if (!NETLINK_CB(skb).nsid_is_set)
                return;

        put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
                 &NETLINK_CB(skb).nsid);
}

static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
        u32 dst_portid;
        u32 dst_group;
        struct sk_buff *skb;
        int err;
        struct scm_cookie scm;
        u32 netlink_skb_flags = 0;

        if (msg->msg_flags & MSG_OOB)
                return -EOPNOTSUPP;

        if (len == 0) {
                pr_warn_once("Zero length message leads to an empty skb\n");
                return -ENODATA;
        }

        err = scm_send(sock, msg, &scm, true);
        if (err < 0)
                return err;

        if (msg->msg_namelen) {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_nl))
                        goto out;
                if (addr->nl_family != AF_NETLINK)
                        goto out;
                dst_portid = addr->nl_pid;
                dst_group = ffs(addr->nl_groups);
                err =  -EPERM;
                if ((dst_group || dst_portid) &&
                    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
                        goto out;
                netlink_skb_flags |= NETLINK_SKB_DST;
        } else {
                /* Paired with WRITE_ONCE() in netlink_connect() */
                dst_portid = READ_ONCE(nlk->dst_portid);
                dst_group = READ_ONCE(nlk->dst_group);
        }

        /* Paired with WRITE_ONCE() in netlink_insert() */
        if (!READ_ONCE(nlk->bound)) {
                err = netlink_autobind(sock);
                if (err)
                        goto out;
        } else {
                /* Ensure nlk is hashed and visible. */
                smp_rmb();
        }

        err = -EMSGSIZE;
        if (len > sk->sk_sndbuf - 32)
                goto out;
        err = -ENOBUFS;
        skb = netlink_alloc_large_skb(len, dst_group);
        if (skb == NULL)
                goto out;

        NETLINK_CB(skb).portid        = nlk->portid;
        NETLINK_CB(skb).dst_group = dst_group;
        NETLINK_CB(skb).creds        = scm.creds;
        NETLINK_CB(skb).flags        = netlink_skb_flags;

        err = -EFAULT;
        if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
                kfree_skb(skb);
                goto out;
        }

        err = security_netlink_send(sk, skb);
        if (err) {
                kfree_skb(skb);
                goto out;
        }

        if (dst_group) {
                refcount_inc(&skb->users);
                netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
        }
        err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT);

out:
        scm_destroy(&scm);
        return err;
}

static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                           int flags)
{
        struct scm_cookie scm;
        struct sock *sk = sock->sk;
        struct netlink_sock *nlk = nlk_sk(sk);
        size_t copied, max_recvmsg_len;
        struct sk_buff *skb, *data_skb;
        int err, ret;

        if (flags & MSG_OOB)
                return -EOPNOTSUPP;

        copied = 0;

        skb = skb_recv_datagram(sk, flags, &err);
        if (skb == NULL)
                goto out;

        data_skb = skb;

#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
        if (unlikely(skb_shinfo(skb)->frag_list)) {
                /*
                 * If this skb has a frag_list, then here that means that we
                 * will have to use the frag_list skb's data for compat tasks
                 * and the regular skb's data for normal (non-compat) tasks.
                 *
                 * If we need to send the compat skb, assign it to the
                 * 'data_skb' variable so that it will be used below for data
                 * copying. We keep 'skb' for everything else, including
                 * freeing both later.
                 */
                if (flags & MSG_CMSG_COMPAT)
                        data_skb = skb_shinfo(skb)->frag_list;
        }
#endif

        /* Record the max length of recvmsg() calls for future allocations */
        max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len);
        max_recvmsg_len = min_t(size_t, max_recvmsg_len,
                                SKB_WITH_OVERHEAD(32768));
        WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len);

        copied = data_skb->len;
        if (len < copied) {
                msg->msg_flags |= MSG_TRUNC;
                copied = len;
        }

        err = skb_copy_datagram_msg(data_skb, 0, msg, copied);

        if (msg->msg_name) {
                DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
                addr->nl_family = AF_NETLINK;
                addr->nl_pad    = 0;
                addr->nl_pid        = NETLINK_CB(skb).portid;
                addr->nl_groups        = netlink_group_mask(NETLINK_CB(skb).dst_group);
                msg->msg_namelen = sizeof(*addr);
        }

        if (nlk_test_bit(RECV_PKTINFO, sk))
                netlink_cmsg_recv_pktinfo(msg, skb);
        if (nlk_test_bit(LISTEN_ALL_NSID, sk))
                netlink_cmsg_listen_all_nsid(sk, msg, skb);

        memset(&scm, 0, sizeof(scm));
        scm.creds = *NETLINK_CREDS(skb);
        if (flags & MSG_TRUNC)
                copied = data_skb->len;

        skb_free_datagram(sk, skb);

        if (READ_ONCE(nlk->cb_running) &&
            atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
                ret = netlink_dump(sk, false);
                if (ret) {
                        WRITE_ONCE(sk->sk_err, -ret);
                        sk_error_report(sk);
                }
        }

        scm_recv(sock, msg, &scm, flags);
out:
        netlink_rcv_wake(sk);
        return err ? : copied;
}

static void netlink_data_ready(struct sock *sk)
{
        BUG();
}

/*
 *        We export these functions to other modules. They provide a
 *        complete set of kernel non-blocking support for message
 *        queueing.
 */

struct sock *
__netlink_kernel_create(struct net *net, int unit, struct module *module,
                        struct netlink_kernel_cfg *cfg)
{
        struct socket *sock;
        struct sock *sk;
        struct netlink_sock *nlk;
        struct listeners *listeners = NULL;
        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
        unsigned int groups;

        BUG_ON(!nl_table);

        if (unit < 0 || unit >= MAX_LINKS)
                return NULL;

        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
                return NULL;

        if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
                goto out_sock_release_nosk;

        sk = sock->sk;

        if (!cfg || cfg->groups < 32)
                groups = 32;
        else
                groups = cfg->groups;

        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
        if (!listeners)
                goto out_sock_release;

        sk->sk_data_ready = netlink_data_ready;
        if (cfg && cfg->input)
                nlk_sk(sk)->netlink_rcv = cfg->input;

        if (netlink_insert(sk, 0))
                goto out_sock_release;

        nlk = nlk_sk(sk);
        set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags);

        netlink_table_grab();
        if (!nl_table[unit].registered) {
                nl_table[unit].groups = groups;
                rcu_assign_pointer(nl_table[unit].listeners, listeners);
                nl_table[unit].cb_mutex = cb_mutex;
                nl_table[unit].module = module;
                if (cfg) {
                        nl_table[unit].bind = cfg->bind;
                        nl_table[unit].unbind = cfg->unbind;
                        nl_table[unit].release = cfg->release;
                        nl_table[unit].flags = cfg->flags;
                }
                nl_table[unit].registered = 1;
        } else {
                kfree(listeners);
                nl_table[unit].registered++;
        }
        netlink_table_ungrab();
        return sk;

out_sock_release:
        kfree(listeners);
        netlink_kernel_release(sk);
        return NULL;

out_sock_release_nosk:
        sock_release(sock);
        return NULL;
}
EXPORT_SYMBOL(__netlink_kernel_create);

void
netlink_kernel_release(struct sock *sk)
{
        if (sk == NULL || sk->sk_socket == NULL)
                return;

        sock_release(sk->sk_socket);
}
EXPORT_SYMBOL(netlink_kernel_release);

int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
{
        struct listeners *new, *old;
        struct netlink_table *tbl = &nl_table[sk->sk_protocol];

        if (groups < 32)
                groups = 32;

        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
                if (!new)
                        return -ENOMEM;
                old = nl_deref_protected(tbl->listeners);
                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
                rcu_assign_pointer(tbl->listeners, new);

                kfree_rcu(old, rcu);
        }
        tbl->groups = groups;

        return 0;
}

/**
 * netlink_change_ngroups - change number of multicast groups
 *
 * This changes the number of multicast groups that are available
 * on a certain netlink family. Note that it is not possible to
 * change the number of groups to below 32. Also note that it does
 * not implicitly call netlink_clear_multicast_users() when the
 * number of groups is reduced.
 *
 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
 * @groups: The new number of groups.
 */
int netlink_change_ngroups(struct sock *sk, unsigned int groups)
{
        int err;

        netlink_table_grab();
        err = __netlink_change_ngroups(sk, groups);
        netlink_table_ungrab();

        return err;
}

void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
        struct sock *sk;
        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];

        sk_for_each_bound(sk, &tbl->mc_list)
                netlink_update_socket_mc(nlk_sk(sk), group, 0);
}

struct nlmsghdr *
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
{
        struct nlmsghdr *nlh;
        int size = nlmsg_msg_size(len);

        nlh = skb_put(skb, NLMSG_ALIGN(size));
        nlh->nlmsg_type = type;
        nlh->nlmsg_len = size;
        nlh->nlmsg_flags = flags;
        nlh->nlmsg_pid = portid;
        nlh->nlmsg_seq = seq;
        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
                memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
        return nlh;
}
EXPORT_SYMBOL(__nlmsg_put);

/*
 * It looks a bit ugly.
 * It would be better to create kernel thread.
 */

static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
                             struct netlink_callback *cb,
                             struct netlink_ext_ack *extack)
{
        struct nlmsghdr *nlh;

        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(nlk->dump_done_errno),
                               NLM_F_MULTI | cb->answer_flags);
        if (WARN_ON(!nlh))
                return -ENOBUFS;

        nl_dump_check_consistent(cb, nlh);
        memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));

        if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
                nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
                if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
                        nlmsg_end(skb, nlh);
        }

        return 0;
}

static int netlink_dump(struct sock *sk, bool lock_taken)
{
        struct netlink_sock *nlk = nlk_sk(sk);
        struct netlink_ext_ack extack = {};
        struct netlink_callback *cb;
        struct sk_buff *skb = NULL;
        size_t max_recvmsg_len;
        struct module *module;
        int err = -ENOBUFS;
        int alloc_min_size;
        int alloc_size;

        if (!lock_taken)
                mutex_lock(&nlk->nl_cb_mutex);
        if (!nlk->cb_running) {
                err = -EINVAL;
                goto errout_skb;
        }

        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
                goto errout_skb;

        /* NLMSG_GOODSIZE is small to avoid high order allocations being
         * required, but it makes sense to _attempt_ a 16K bytes allocation
         * to reduce number of system calls on dump operations, if user
         * ever provided a big enough buffer.
         */
        cb = &nlk->cb;
        alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);

        max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len);
        if (alloc_min_size < max_recvmsg_len) {
                alloc_size = max_recvmsg_len;
                skb = alloc_skb(alloc_size,
                                (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
                                __GFP_NOWARN | __GFP_NORETRY);
        }
        if (!skb) {
                alloc_size = alloc_min_size;
                skb = alloc_skb(alloc_size, GFP_KERNEL);
        }
        if (!skb)
                goto errout_skb;

        /* Trim skb to allocated size. User is expected to provide buffer as
         * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
         * netlink_recvmsg())). dump will pack as many smaller messages as
         * could fit within the allocated skb. skb is typically allocated
         * with larger space than required (could be as much as near 2x the
         * requested size with align to next power of 2 approach). Allowing
         * dump to use the excess space makes it difficult for a user to have a
         * reasonable static buffer based on the expected largest dump of a
         * single netdev. The outcome is MSG_TRUNC error.
         */
        skb_reserve(skb, skb_tailroom(skb) - alloc_size);

        /* Make sure malicious BPF programs can not read unitialized memory
         * from skb->head -> skb->data
         */
        skb_reset_network_header(skb);
        skb_reset_mac_header(skb);

        netlink_skb_set_owner_r(skb, sk);

        if (nlk->dump_done_errno > 0) {
                struct mutex *extra_mutex = nlk->dump_cb_mutex;

                cb->extack = &extack;

                if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED)
                        extra_mutex = NULL;
                if (extra_mutex)
                        mutex_lock(extra_mutex);
                nlk->dump_done_errno = cb->dump(skb, cb);
                if (extra_mutex)
                        mutex_unlock(extra_mutex);

                /* EMSGSIZE plus something already in the skb means
                 * that there's more to dump but current skb has filled up.
                 * If the callback really wants to return EMSGSIZE to user space
                 * it needs to do so again, on the next cb->dump() call,
                 * without putting data in the skb.
                 */
                if (nlk->dump_done_errno == -EMSGSIZE && skb->len)
                        nlk->dump_done_errno = skb->len;

                cb->extack = NULL;
        }

        if (nlk->dump_done_errno > 0 ||
            skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
                mutex_unlock(&nlk->nl_cb_mutex);

                if (sk_filter(sk, skb))
                        kfree_skb(skb);
                else
                        __netlink_sendskb(sk, skb);
                return 0;
        }

        if (netlink_dump_done(nlk, skb, cb, &extack))
                goto errout_skb;

#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
        /* frag_list skb's data is used for compat tasks
         * and the regular skb's data for normal (non-compat) tasks.
         * See netlink_recvmsg().
         */
        if (unlikely(skb_shinfo(skb)->frag_list)) {
                if (netlink_dump_done(nlk, skb_shinfo(skb)->frag_list, cb, &extack))
                        goto errout_skb;
        }
#endif

        if (sk_filter(sk, skb))
                kfree_skb(skb);
        else
                __netlink_sendskb(sk, skb);

        if (cb->done)
                cb->done(cb);

        WRITE_ONCE(nlk->cb_running, false);
        module = cb->module;
        skb = cb->skb;
        mutex_unlock(&nlk->nl_cb_mutex);
        module_put(module);
        consume_skb(skb);
        return 0;

errout_skb:
        mutex_unlock(&nlk->nl_cb_mutex);
        kfree_skb(skb);
        return err;
}

int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
                         const struct nlmsghdr *nlh,
                         struct netlink_dump_control *control)
{
        struct netlink_callback *cb;
        struct netlink_sock *nlk;
        struct sock *sk;
        int ret;

        refcount_inc(&skb->users);

        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
        if (sk == NULL) {
                ret = -ECONNREFUSED;
                goto error_free;
        }

        nlk = nlk_sk(sk);
        mutex_lock(&nlk->nl_cb_mutex);
        /* A dump is in progress... */
        if (nlk->cb_running) {
                ret = -EBUSY;
                goto error_unlock;
        }
        /* add reference of module which cb->dump belongs to */
        if (!try_module_get(control->module)) {
                ret = -EPROTONOSUPPORT;
                goto error_unlock;
        }

        cb = &nlk->cb;
        memset(cb, 0, sizeof(*cb));
        cb->dump = control->dump;
        cb->done = control->done;
        cb->nlh = nlh;
        cb->data = control->data;
        cb->module = control->module;
        cb->min_dump_alloc = control->min_dump_alloc;
        cb->flags = control->flags;
        cb->skb = skb;

        cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);

        if (control->start) {
                cb->extack = control->extack;
                ret = control->start(cb);
                cb->extack = NULL;
                if (ret)
                        goto error_put;
        }

        WRITE_ONCE(nlk->cb_running, true);
        nlk->dump_done_errno = INT_MAX;

        ret = netlink_dump(sk, true);

        sock_put(sk);

        if (ret)
                return ret;

        /* We successfully started a dump, by returning -EINTR we
         * signal not to send ACK even if it was requested.
         */
        return -EINTR;

error_put:
        module_put(control->module);
error_unlock:
        sock_put(sk);
        mutex_unlock(&nlk->nl_cb_mutex);
error_free:
        kfree_skb(skb);
        return ret;
}
EXPORT_SYMBOL(__netlink_dump_start);

static size_t
netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
                    const struct netlink_ext_ack *extack)
{
        size_t tlvlen;

        if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
                return 0;

        tlvlen = 0;
        if (extack->_msg)
                tlvlen += nla_total_size(strlen(extack->_msg) + 1);
        if (extack->cookie_len)
                tlvlen += nla_total_size(extack->cookie_len);

        /* Following attributes are only reported as error (not warning) */
        if (!err)
                return tlvlen;

        if (extack->bad_attr)
                tlvlen += nla_total_size(sizeof(u32));
        if (extack->policy)
                tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
        if (extack->miss_type)
                tlvlen += nla_total_size(sizeof(u32));
        if (extack->miss_nest)
                tlvlen += nla_total_size(sizeof(u32));

        return tlvlen;
}

static void
netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
                     struct nlmsghdr *nlh, int err,
                     const struct netlink_ext_ack *extack)
{
        if (extack->_msg)
                WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
        if (extack->cookie_len)
                WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
                                extack->cookie_len, extack->cookie));

        if (!err)
                return;

        if (extack->bad_attr &&
            !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
                     (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
                WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
                                    (u8 *)extack->bad_attr - (u8 *)nlh));
        if (extack->policy)
                netlink_policy_dump_write_attr(skb, extack->policy,
                                               NLMSGERR_ATTR_POLICY);
        if (extack->miss_type)
                WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
                                    extack->miss_type));
        if (extack->miss_nest &&
            !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
                     (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
                WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
                                    (u8 *)extack->miss_nest - (u8 *)nlh));
}

void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
                 const struct netlink_ext_ack *extack)
{
        struct sk_buff *skb;
        struct nlmsghdr *rep;
        struct nlmsgerr *errmsg;
        size_t payload = sizeof(*errmsg);
        struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
        unsigned int flags = 0;
        size_t tlvlen;

        /* Error messages get the original request appened, unless the user
         * requests to cap the error message, and get extra error data if
         * requested.
         */
        if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags))
                payload += nlmsg_len(nlh);
        else
                flags |= NLM_F_CAPPED;

        tlvlen = netlink_ack_tlv_len(nlk, err, extack);
        if (tlvlen)
                flags |= NLM_F_ACK_TLVS;

        skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
        if (!skb)
                goto err_skb;

        rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
                        NLMSG_ERROR, sizeof(*errmsg), flags);
        if (!rep)
                goto err_bad_put;
        errmsg = nlmsg_data(rep);
        errmsg->error = err;
        errmsg->msg = *nlh;

        if (!(flags & NLM_F_CAPPED)) {
                if (!nlmsg_append(skb, nlmsg_len(nlh)))
                        goto err_bad_put;

                memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh),
                       nlmsg_len(nlh));
        }

        if (tlvlen)
                netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);

        nlmsg_end(skb, rep);

        nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid);

        return;

err_bad_put:
        nlmsg_free(skb);
err_skb:
        WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
        sk_error_report(NETLINK_CB(in_skb).sk);
}
EXPORT_SYMBOL(netlink_ack);

int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
                                                   struct nlmsghdr *,
                                                   struct netlink_ext_ack *))
{
        struct netlink_ext_ack extack;
        struct nlmsghdr *nlh;
        int err;

        while (skb->len >= nlmsg_total_size(0)) {
                int msglen;

                memset(&extack, 0, sizeof(extack));
                nlh = nlmsg_hdr(skb);
                err = 0;

                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
                        return 0;

                /* Only requests are handled by the kernel */
                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
                        goto ack;

                /* Skip control messages */
                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
                        goto ack;

                err = cb(skb, nlh, &extack);
                if (err == -EINTR)
                        goto skip;

ack:
                if (nlh->nlmsg_flags & NLM_F_ACK || err)
                        netlink_ack(skb, nlh, err, &extack);

skip:
                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
                if (msglen > skb->len)
                        msglen = skb->len;
                skb_pull(skb, msglen);
        }

        return 0;
}
EXPORT_SYMBOL(netlink_rcv_skb);

/**
 * nlmsg_notify - send a notification netlink message
 * @sk: netlink socket to use
 * @skb: notification message
 * @portid: destination netlink portid for reports or 0
 * @group: destination multicast group or 0
 * @report: 1 to report back, 0 to disable
 * @flags: allocation flags
 */
int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
                 unsigned int group, int report, gfp_t flags)
{
        int err = 0;

        if (group) {
                int exclude_portid = 0;

                if (report) {
                        refcount_inc(&skb->users);
                        exclude_portid = portid;
                }

                /* errors reported via destination sk->sk_err, but propagate
                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
                if (err == -ESRCH)
                        err = 0;
        }

        if (report) {
                int err2;

                err2 = nlmsg_unicast(sk, skb, portid);
                if (!err)
                        err = err2;
        }

        return err;
}
EXPORT_SYMBOL(nlmsg_notify);

#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
        struct seq_net_private p;
        struct rhashtable_iter hti;
        int link;
};

static void netlink_walk_start(struct nl_seq_iter *iter)
{
        rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti);
        rhashtable_walk_start(&iter->hti);
}

static void netlink_walk_stop(struct nl_seq_iter *iter)
{
        rhashtable_walk_stop(&iter->hti);
        rhashtable_walk_exit(&iter->hti);
}

static void *__netlink_seq_next(struct seq_file *seq)
{
        struct nl_seq_iter *iter = seq->private;
        struct netlink_sock *nlk;

        do {
                for (;;) {
                        nlk = rhashtable_walk_next(&iter->hti);

                        if (IS_ERR(nlk)) {
                                if (PTR_ERR(nlk) == -EAGAIN)
                                        continue;

                                return nlk;
                        }

                        if (nlk)
                                break;

                        netlink_walk_stop(iter);
                        if (++iter->link >= MAX_LINKS)
                                return NULL;

                        netlink_walk_start(iter);
                }
        } while (sock_net(&nlk->sk) != seq_file_net(seq));

        return nlk;
}

static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
        __acquires(RCU)
{
        struct nl_seq_iter *iter = seq->private;
        void *obj = SEQ_START_TOKEN;
        loff_t pos;

        iter->link = 0;

        netlink_walk_start(iter);

        for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
                obj = __netlink_seq_next(seq);

        return obj;
}

static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
        ++*pos;
        return __netlink_seq_next(seq);
}

static void netlink_native_seq_stop(struct seq_file *seq, void *v)
{
        struct nl_seq_iter *iter = seq->private;

        if (iter->link >= MAX_LINKS)
                return;

        netlink_walk_stop(iter);
}


static int netlink_native_seq_show(struct seq_file *seq, void *v)
{
        if (v == SEQ_START_TOKEN) {
                seq_puts(seq,
                         "sk               Eth Pid        Groups   "
                         "Rmem     Wmem     Dump  Locks    Drops    Inode\n");
        } else {
                struct sock *s = v;
                struct netlink_sock *nlk = nlk_sk(s);

                seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n",
                           s,
                           s->sk_protocol,
                           nlk->portid,
                           nlk->groups ? (u32)nlk->groups[0] : 0,
                           sk_rmem_alloc_get(s),
                           sk_wmem_alloc_get(s),
                           READ_ONCE(nlk->cb_running),
                           refcount_read(&s->sk_refcnt),
                           atomic_read(&s->sk_drops),
                           sock_i_ino(s)
                        );

        }
        return 0;
}

#ifdef CONFIG_BPF_SYSCALL
struct bpf_iter__netlink {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct netlink_sock *, sk);
};

DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)

static int netlink_prog_seq_show(struct bpf_prog *prog,
                                  struct bpf_iter_meta *meta,
                                  void *v)
{
        struct bpf_iter__netlink ctx;

        meta->seq_num--;  /* skip SEQ_START_TOKEN */
        ctx.meta = meta;
        ctx.sk = nlk_sk((struct sock *)v);
        return bpf_iter_run_prog(prog, &ctx);
}

static int netlink_seq_show(struct seq_file *seq, void *v)
{
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;

        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
        if (!prog)
                return netlink_native_seq_show(seq, v);

        if (v != SEQ_START_TOKEN)
                return netlink_prog_seq_show(prog, &meta, v);

        return 0;
}

static void netlink_seq_stop(struct seq_file *seq, void *v)
{
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;

        if (!v) {
                meta.seq = seq;
                prog = bpf_iter_get_info(&meta, true);
                if (prog)
                        (void)netlink_prog_seq_show(prog, &meta, v);
        }

        netlink_native_seq_stop(seq, v);
}
#else
static int netlink_seq_show(struct seq_file *seq, void *v)
{
        return netlink_native_seq_show(seq, v);
}

static void netlink_seq_stop(struct seq_file *seq, void *v)
{
        netlink_native_seq_stop(seq, v);
}
#endif

static const struct seq_operations netlink_seq_ops = {
        .start  = netlink_seq_start,
        .next   = netlink_seq_next,
        .stop   = netlink_seq_stop,
        .show   = netlink_seq_show,
};
#endif

int netlink_register_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&netlink_chain, nb);
}
EXPORT_SYMBOL(netlink_register_notifier);

int netlink_unregister_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&netlink_chain, nb);
}
EXPORT_SYMBOL(netlink_unregister_notifier);

static const struct proto_ops netlink_ops = {
        .family =        PF_NETLINK,
        .owner =        THIS_MODULE,
        .release =        netlink_release,
        .bind =                netlink_bind,
        .connect =        netlink_connect,
        .socketpair =        sock_no_socketpair,
        .accept =        sock_no_accept,
        .getname =        netlink_getname,
        .poll =                datagram_poll,
        .ioctl =        netlink_ioctl,
        .listen =        sock_no_listen,
        .shutdown =        sock_no_shutdown,
        .setsockopt =        netlink_setsockopt,
        .getsockopt =        netlink_getsockopt,
        .sendmsg =        netlink_sendmsg,
        .recvmsg =        netlink_recvmsg,
        .mmap =                sock_no_mmap,
};

static const struct net_proto_family netlink_family_ops = {
        .family = PF_NETLINK,
        .create = netlink_create,
        .owner        = THIS_MODULE,        /* for consistency 8) */
};

static int __net_init netlink_net_init(struct net *net)
{
#ifdef CONFIG_PROC_FS
        if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops,
                        sizeof(struct nl_seq_iter)))
                return -ENOMEM;
#endif
        return 0;
}

static void __net_exit netlink_net_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
        remove_proc_entry("netlink", net->proc_net);
#endif
}

static void __init netlink_add_usersock_entry(void)
{
        struct listeners *listeners;
        int groups = 32;

        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
        if (!listeners)
                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");

        netlink_table_grab();

        nl_table[NETLINK_USERSOCK].groups = groups;
        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
        nl_table[NETLINK_USERSOCK].registered = 1;
        nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;

        netlink_table_ungrab();
}

static struct pernet_operations __net_initdata netlink_net_ops = {
        .init = netlink_net_init,
        .exit = netlink_net_exit,
};

static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
{
        const struct netlink_sock *nlk = data;
        struct netlink_compare_arg arg;

        netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
        return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
}

static const struct rhashtable_params netlink_rhashtable_params = {
        .head_offset = offsetof(struct netlink_sock, node),
        .key_len = netlink_compare_arg_len,
        .obj_hashfn = netlink_hash,
        .obj_cmpfn = netlink_compare,
        .automatic_shrinking = true,
};

#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
BTF_ID_LIST(btf_netlink_sock_id)
BTF_ID(struct, netlink_sock)

static const struct bpf_iter_seq_info netlink_seq_info = {
        .seq_ops                = &netlink_seq_ops,
        .init_seq_private        = bpf_iter_init_seq_net,
        .fini_seq_private        = bpf_iter_fini_seq_net,
        .seq_priv_size                = sizeof(struct nl_seq_iter),
};

static struct bpf_iter_reg netlink_reg_info = {
        .target                        = "netlink",
        .ctx_arg_info_size        = 1,
        .ctx_arg_info                = {
                { offsetof(struct bpf_iter__netlink, sk),
                  PTR_TO_BTF_ID_OR_NULL },
        },
        .seq_info                = &netlink_seq_info,
};

static int __init bpf_iter_register(void)
{
        netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
        return bpf_iter_reg_target(&netlink_reg_info);
}
#endif

static int __init netlink_proto_init(void)
{
        int i;
        int err = proto_register(&netlink_proto, 0);

        if (err != 0)
                goto out;

#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
        err = bpf_iter_register();
        if (err)
                goto out;
#endif

        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));

        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
        if (!nl_table)
                goto panic;

        for (i = 0; i < MAX_LINKS; i++) {
                if (rhashtable_init(&nl_table[i].hash,
                                    &netlink_rhashtable_params) < 0) {
                        while (--i > 0)
                                rhashtable_destroy(&nl_table[i].hash);
                        kfree(nl_table);
                        goto panic;
                }
        }

        netlink_add_usersock_entry();

        sock_register(&netlink_family_ops);
        register_pernet_subsys(&netlink_net_ops);
        register_pernet_subsys(&netlink_tap_net_ops);
        /* The netlink device handler may be needed early. */
        rtnetlink_init();
out:
        return err;
panic:
        panic("netlink_init: Cannot allocate nl_table\n");
}

core_initcall(netlink_proto_init);















































































































  267 
  268 










   10 
   10 










  192 













  192 





























































































































  253 





























































  229 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
// SPDX-License-Identifier: GPL-2.0
/*
 *  hrtimers - High-resolution kernel timers
 *
 *   Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
 *   Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
 *
 *  data type definitions, declarations, prototypes
 *
 *  Started by: Thomas Gleixner and Ingo Molnar
 */
#ifndef _LINUX_HRTIMER_H
#define _LINUX_HRTIMER_H

#include <linux/hrtimer_defs.h>
#include <linux/hrtimer_types.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu-defs.h>
#include <linux/rbtree.h>
#include <linux/timer.h>

/*
 * Mode arguments of xxx_hrtimer functions:
 *
 * HRTIMER_MODE_ABS                - Time value is absolute
 * HRTIMER_MODE_REL                - Time value is relative to now
 * HRTIMER_MODE_PINNED                - Timer is bound to CPU (is only considered
 *                                  when starting the timer)
 * HRTIMER_MODE_SOFT                - Timer callback function will be executed in
 *                                  soft irq context
 * HRTIMER_MODE_HARD                - Timer callback function will be executed in
 *                                  hard irq context even on PREEMPT_RT.
 */
enum hrtimer_mode {
        HRTIMER_MODE_ABS        = 0x00,
        HRTIMER_MODE_REL        = 0x01,
        HRTIMER_MODE_PINNED        = 0x02,
        HRTIMER_MODE_SOFT        = 0x04,
        HRTIMER_MODE_HARD        = 0x08,

        HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
        HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,

        HRTIMER_MODE_ABS_SOFT        = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT,
        HRTIMER_MODE_REL_SOFT        = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT,

        HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
        HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,

        HRTIMER_MODE_ABS_HARD        = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
        HRTIMER_MODE_REL_HARD        = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,

        HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
        HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};

/*
 * Values to track state of the timer
 *
 * Possible states:
 *
 * 0x00                inactive
 * 0x01                enqueued into rbtree
 *
 * The callback state is not part of the timer->state because clearing it would
 * mean touching the timer after the callback, this makes it impossible to free
 * the timer from the callback function.
 *
 * Therefore we track the callback state in:
 *
 *        timer->base->cpu_base->running == timer
 *
 * On SMP it is possible to have a "callback function running and enqueued"
 * status. It happens for example when a posix timer expired and the callback
 * queued a signal. Between dropping the lock which protects the posix timer
 * and reacquiring the base lock of the hrtimer, another CPU can deliver the
 * signal and rearm the timer.
 *
 * All state transitions are protected by cpu_base->lock.
 */
#define HRTIMER_STATE_INACTIVE        0x00
#define HRTIMER_STATE_ENQUEUED        0x01

/**
 * struct hrtimer_sleeper - simple sleeper structure
 * @timer:        embedded timer structure
 * @task:        task to wake up
 *
 * task is set to NULL, when the timer expires.
 */
struct hrtimer_sleeper {
        struct hrtimer timer;
        struct task_struct *task;
};

static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
        timer->node.expires = time;
        timer->_softexpires = time;
}

static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
{
        timer->_softexpires = time;
        timer->node.expires = ktime_add_safe(time, delta);
}

static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta)
{
        timer->_softexpires = time;
        timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
}

static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
{
        timer->node.expires = tv64;
        timer->_softexpires = tv64;
}

static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
{
        timer->node.expires = ktime_add_safe(timer->node.expires, time);
        timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
}

static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
{
        timer->node.expires = ktime_add_ns(timer->node.expires, ns);
        timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
}

static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
{
        return timer->node.expires;
}

static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
{
        return timer->_softexpires;
}

static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
{
        return timer->node.expires;
}
static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
{
        return timer->_softexpires;
}

static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
{
        return ktime_to_ns(timer->node.expires);
}

static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
{
        return ktime_sub(timer->node.expires, timer->base->get_time());
}

static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
{
        return timer->base->get_time();
}

static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
        return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
                timer->base->cpu_base->hres_active : 0;
}

#ifdef CONFIG_HIGH_RES_TIMERS
struct clock_event_device;

extern void hrtimer_interrupt(struct clock_event_device *dev);

extern unsigned int hrtimer_resolution;

#else

#define hrtimer_resolution        (unsigned int)LOW_RES_NSEC

#endif

static inline ktime_t
__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
{
        ktime_t rem = ktime_sub(timer->node.expires, now);

        /*
         * Adjust relative timers for the extra we added in
         * hrtimer_start_range_ns() to prevent short timeouts.
         */
        if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
                rem -= hrtimer_resolution;
        return rem;
}

static inline ktime_t
hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
{
        return __hrtimer_expires_remaining_adjusted(timer,
                                                    timer->base->get_time());
}

#ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void);
extern void timerfd_resume(void);
#else
static inline void timerfd_clock_was_set(void) { }
static inline void timerfd_resume(void) { }
#endif

DECLARE_PER_CPU(struct tick_device, tick_cpu_device);

#ifdef CONFIG_PREEMPT_RT
void hrtimer_cancel_wait_running(const struct hrtimer *timer);
#else
static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
{
        cpu_relax();
}
#endif

/* Exported timer functions: */

/* Initialize timers: */
extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
                         enum hrtimer_mode mode);
extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
                                 enum hrtimer_mode mode);

#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
                                  enum hrtimer_mode mode);
extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
                                          clockid_t clock_id,
                                          enum hrtimer_mode mode);

extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
#else
static inline void hrtimer_init_on_stack(struct hrtimer *timer,
                                         clockid_t which_clock,
                                         enum hrtimer_mode mode)
{
        hrtimer_init(timer, which_clock, mode);
}

static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
                                                 clockid_t clock_id,
                                                 enum hrtimer_mode mode)
{
        hrtimer_init_sleeper(sl, clock_id, mode);
}

static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
#endif

/* Basic timer operations: */
extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
                                   u64 range_ns, const enum hrtimer_mode mode);

/**
 * hrtimer_start - (re)start an hrtimer
 * @timer:        the timer to be added
 * @tim:        expiry time
 * @mode:        timer mode: absolute (HRTIMER_MODE_ABS) or
 *                relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
 *                softirq based mode is considered for debug purpose only!
 */
static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
                                 const enum hrtimer_mode mode)
{
        hrtimer_start_range_ns(timer, tim, 0, mode);
}

extern int hrtimer_cancel(struct hrtimer *timer);
extern int hrtimer_try_to_cancel(struct hrtimer *timer);

static inline void hrtimer_start_expires(struct hrtimer *timer,
                                         enum hrtimer_mode mode)
{
        u64 delta;
        ktime_t soft, hard;
        soft = hrtimer_get_softexpires(timer);
        hard = hrtimer_get_expires(timer);
        delta = ktime_to_ns(ktime_sub(hard, soft));
        hrtimer_start_range_ns(timer, soft, delta, mode);
}

void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
                                   enum hrtimer_mode mode);

static inline void hrtimer_restart(struct hrtimer *timer)
{
        hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}

/* Query timers: */
extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);

/**
 * hrtimer_get_remaining - get remaining time for the timer
 * @timer:        the timer to read
 */
static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
{
        return __hrtimer_get_remaining(timer, false);
}

extern u64 hrtimer_get_next_event(void);
extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);

extern bool hrtimer_active(const struct hrtimer *timer);

/**
 * hrtimer_is_queued - check, whether the timer is on one of the queues
 * @timer:        Timer to check
 *
 * Returns: True if the timer is queued, false otherwise
 *
 * The function can be used lockless, but it gives only a current snapshot.
 */
static inline bool hrtimer_is_queued(struct hrtimer *timer)
{
        /* The READ_ONCE pairs with the update functions of timer->state */
        return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED);
}

/*
 * Helper function to check, whether the timer is running the callback
 * function
 */
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
        return timer->base->running == timer;
}

/* Forward a hrtimer so it expires after now: */
extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);

/**
 * hrtimer_forward_now() - forward the timer expiry so it expires after now
 * @timer:        hrtimer to forward
 * @interval:        the interval to forward
 *
 * It is a variant of hrtimer_forward(). The timer will expire after the current
 * time of the hrtimer clock base. See hrtimer_forward() for details.
 */
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
                                      ktime_t interval)
{
        return hrtimer_forward(timer, timer->base->get_time(), interval);
}

/* Precise sleep: */

extern int nanosleep_copyout(struct restart_block *, struct timespec64 *);
extern long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
                              const clockid_t clockid);

extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
                                    const enum hrtimer_mode mode);
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
                                          u64 delta,
                                          const enum hrtimer_mode mode,
                                          clockid_t clock_id);
extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);

/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);

/* Bootup initialization: */
extern void __init hrtimers_init(void);

/* Show pending timers: */
extern void sysrq_timer_list_show(void);

int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
#else
#define hrtimers_cpu_dying        NULL
#endif

#endif





































































































































    9 


    9 













































   37 
  107 






   27 
    3 


















  295 
















  295 













  301 








  335 



































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__

#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
#include <linux/minmax.h>
#include <linux/sched.h>
#include <linux/thread_info.h>

#include <asm/uaccess.h>

/*
 * Architectures that support memory tagging (assigning tags to memory regions,
 * embedding these tags into addresses that point to these memory regions, and
 * checking that the memory and the pointer tags match on memory accesses)
 * redefine this macro to strip tags from pointers.
 *
 * Passing down mm_struct allows to define untagging rules on per-process
 * basis.
 *
 * It's defined as noop for architectures that don't support memory tagging.
 */
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
#endif

#ifndef untagged_addr_remote
#define untagged_addr_remote(mm, addr)        ({                \
        mmap_assert_locked(mm);                                \
        untagged_addr(addr);                                \
})
#endif

/*
 * Architectures should provide two primitives (raw_copy_{to,from}_user())
 * and get rid of their private instances of copy_{to,from}_user() and
 * __copy_{to,from}_user{,_inatomic}().
 *
 * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and
 * return the amount left to copy.  They should assume that access_ok() has
 * already been checked (and succeeded); they should *not* zero-pad anything.
 * No KASAN or object size checks either - those belong here.
 *
 * Both of these functions should attempt to copy size bytes starting at from
 * into the area starting at to.  They must not fetch or store anything
 * outside of those areas.  Return value must be between 0 (everything
 * copied successfully) and size (nothing copied).
 *
 * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting
 * at to must become equal to the bytes fetched from the corresponding area
 * starting at from.  All data past to + size - N must be left unmodified.
 *
 * If copying succeeds, the return value must be 0.  If some data cannot be
 * fetched, it is permitted to copy less than had been fetched; the only
 * hard requirement is that not storing anything at all (i.e. returning size)
 * should happen only when nothing could be copied.  In other words, you don't
 * have to squeeze as much as possible - it is allowed, but not necessary.
 *
 * For raw_copy_from_user() to always points to kernel memory and no faults
 * on store should happen.  Interpretation of from is affected by set_fs().
 * For raw_copy_to_user() it's the other way round.
 *
 * Both can be inlined - it's up to architectures whether it wants to bother
 * with that.  They should not be used directly; they are used to implement
 * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic())
 * that are used instead.  Out of those, __... ones are inlined.  Plain
 * copy_{to,from}_user() might or might not be inlined.  If you want them
 * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER.
 *
 * NOTE: only copy_from_user() zero-pads the destination in case of short copy.
 * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything
 * at all; their callers absolutely must check the return value.
 *
 * Biarch ones should also provide raw_copy_in_user() - similar to the above,
 * but both source and destination are __user pointers (affected by set_fs()
 * as usual) and both source and destination can trigger faults.
 */

static __always_inline __must_check unsigned long
__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
{
        unsigned long res;

        instrument_copy_from_user_before(to, from, n);
        check_object_size(to, n, false);
        res = raw_copy_from_user(to, from, n);
        instrument_copy_from_user_after(to, from, n, res);
        return res;
}

static __always_inline __must_check unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
        unsigned long res;

        might_fault();
        instrument_copy_from_user_before(to, from, n);
        if (should_fail_usercopy())
                return n;
        check_object_size(to, n, false);
        res = raw_copy_from_user(to, from, n);
        instrument_copy_from_user_after(to, from, n, res);
        return res;
}

/**
 * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking.
 * @to:   Destination address, in user space.
 * @from: Source address, in kernel space.
 * @n:    Number of bytes to copy.
 *
 * Context: User context only.
 *
 * Copy data from kernel space to user space.  Caller must check
 * the specified block with access_ok() before calling this function.
 * The caller should also make sure he pins the user space address
 * so that we don't result in page fault and sleep.
 */
static __always_inline __must_check unsigned long
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
        if (should_fail_usercopy())
                return n;
        instrument_copy_to_user(to, from, n);
        check_object_size(from, n, true);
        return raw_copy_to_user(to, from, n);
}

static __always_inline __must_check unsigned long
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
        might_fault();
        if (should_fail_usercopy())
                return n;
        instrument_copy_to_user(to, from, n);
        check_object_size(from, n, true);
        return raw_copy_to_user(to, from, n);
}

#ifdef INLINE_COPY_FROM_USER
static inline __must_check unsigned long
_copy_from_user(void *to, const void __user *from, unsigned long n)
{
        unsigned long res = n;
        might_fault();
        if (!should_fail_usercopy() && likely(access_ok(from, n))) {
                instrument_copy_from_user_before(to, from, n);
                res = raw_copy_from_user(to, from, n);
                instrument_copy_from_user_after(to, from, n, res);
        }
        if (unlikely(res))
                memset(to + (n - res), 0, res);
        return res;
}
#else
extern __must_check unsigned long
_copy_from_user(void *, const void __user *, unsigned long);
#endif

#ifdef INLINE_COPY_TO_USER
static inline __must_check unsigned long
_copy_to_user(void __user *to, const void *from, unsigned long n)
{
        might_fault();
        if (should_fail_usercopy())
                return n;
        if (access_ok(to, n)) {
                instrument_copy_to_user(to, from, n);
                n = raw_copy_to_user(to, from, n);
        }
        return n;
}
#else
extern __must_check unsigned long
_copy_to_user(void __user *, const void *, unsigned long);
#endif

static __always_inline unsigned long __must_check
copy_from_user(void *to, const void __user *from, unsigned long n)
{
        if (check_copy_size(to, n, false))
                n = _copy_from_user(to, from, n);
        return n;
}

static __always_inline unsigned long __must_check
copy_to_user(void __user *to, const void *from, unsigned long n)
{
        if (check_copy_size(from, n, true))
                n = _copy_to_user(to, from, n);
        return n;
}

#ifndef copy_mc_to_kernel
/*
 * Without arch opt-in this generic copy_mc_to_kernel() will not handle
 * #MC (or arch equivalent) during source read.
 */
static inline unsigned long __must_check
copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
{
        memcpy(dst, src, cnt);
        return 0;
}
#endif

static __always_inline void pagefault_disabled_inc(void)
{
        current->pagefault_disabled++;
}

static __always_inline void pagefault_disabled_dec(void)
{
        current->pagefault_disabled--;
}

/*
 * These routines enable/disable the pagefault handler. If disabled, it will
 * not take any locks and go straight to the fixup table.
 *
 * User access methods will not sleep when called from a pagefault_disabled()
 * environment.
 */
static inline void pagefault_disable(void)
{
        pagefault_disabled_inc();
        /*
         * make sure to have issued the store before a pagefault
         * can hit.
         */
        barrier();
}

static inline void pagefault_enable(void)
{
        /*
         * make sure to issue those last loads/stores before enabling
         * the pagefault handler again.
         */
        barrier();
        pagefault_disabled_dec();
}

/*
 * Is the pagefault handler disabled? If so, user access methods will not sleep.
 */
static inline bool pagefault_disabled(void)
{
        return current->pagefault_disabled != 0;
}

/*
 * The pagefault handler is in general disabled by pagefault_disable() or
 * when in irq context (via in_atomic()).
 *
 * This function should only be used by the fault handlers. Other users should
 * stick to pagefault_disabled().
 * Please NEVER use preempt_disable() to disable the fault handler. With
 * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
 * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
 */
#define faulthandler_disabled() (pagefault_disabled() || in_atomic())

#ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS

/**
 * probe_subpage_writeable: probe the user range for write faults at sub-page
 *                            granularity (e.g. arm64 MTE)
 * @uaddr: start of address range
 * @size: size of address range
 *
 * Returns 0 on success, the number of bytes not probed on fault.
 *
 * It is expected that the caller checked for the write permission of each
 * page in the range either by put_user() or GUP. The architecture port can
 * implement a more efficient get_user() probing if the same sub-page faults
 * are triggered by either a read or a write.
 */
static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size)
{
        return 0;
}

#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */

#ifndef ARCH_HAS_NOCACHE_UACCESS

static inline __must_check unsigned long
__copy_from_user_inatomic_nocache(void *to, const void __user *from,
                                  unsigned long n)
{
        return __copy_from_user_inatomic(to, from, n);
}

#endif                /* ARCH_HAS_NOCACHE_UACCESS */

extern __must_check int check_zeroed_user(const void __user *from, size_t size);

/**
 * copy_struct_from_user: copy a struct from userspace
 * @dst:   Destination address, in kernel space. This buffer must be @ksize
 *         bytes long.
 * @ksize: Size of @dst struct.
 * @src:   Source address, in userspace.
 * @usize: (Alleged) size of @src struct.
 *
 * Copies a struct from userspace to kernel space, in a way that guarantees
 * backwards-compatibility for struct syscall arguments (as long as future
 * struct extensions are made such that all new fields are *appended* to the
 * old struct, and zeroed-out new fields have the same meaning as the old
 * struct).
 *
 * @ksize is just sizeof(*dst), and @usize should've been passed by userspace.
 * The recommended usage is something like the following:
 *
 *   SYSCALL_DEFINE2(foobar, const struct foo __user *, uarg, size_t, usize)
 *   {
 *      int err;
 *      struct foo karg = {};
 *
 *      if (usize > PAGE_SIZE)
 *        return -E2BIG;
 *      if (usize < FOO_SIZE_VER0)
 *        return -EINVAL;
 *
 *      err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize);
 *      if (err)
 *        return err;
 *
 *      // ...
 *   }
 *
 * There are three cases to consider:
 *  * If @usize == @ksize, then it's copied verbatim.
 *  * If @usize < @ksize, then the userspace has passed an old struct to a
 *    newer kernel. The rest of the trailing bytes in @dst (@ksize - @usize)
 *    are to be zero-filled.
 *  * If @usize > @ksize, then the userspace has passed a new struct to an
 *    older kernel. The trailing bytes unknown to the kernel (@usize - @ksize)
 *    are checked to ensure they are zeroed, otherwise -E2BIG is returned.
 *
 * Returns (in all cases, some data may have been copied):
 *  * -E2BIG:  (@usize > @ksize) and there are non-zero trailing bytes in @src.
 *  * -EFAULT: access to userspace failed.
 */
static __always_inline __must_check int
copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
                      size_t usize)
{
        size_t size = min(ksize, usize);
        size_t rest = max(ksize, usize) - size;

        /* Double check if ksize is larger than a known object size. */
        if (WARN_ON_ONCE(ksize > __builtin_object_size(dst, 1)))
                return -E2BIG;

        /* Deal with trailing bytes. */
        if (usize < ksize) {
                memset(dst + size, 0, rest);
        } else if (usize > ksize) {
                int ret = check_zeroed_user(src + size, rest);
                if (ret <= 0)
                        return ret ?: -E2BIG;
        }
        /* Copy the interoperable parts of the struct. */
        if (copy_from_user(dst, src, size))
                return -EFAULT;
        return 0;
}

bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);

long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size);

long copy_from_user_nofault(void *dst, const void __user *src, size_t size);
long notrace copy_to_user_nofault(void __user *dst, const void *src,
                size_t size);

long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr,
                long count);

long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr,
                long count);
long strnlen_user_nofault(const void __user *unsafe_addr, long count);

#ifndef __get_kernel_nofault
#define __get_kernel_nofault(dst, src, type, label)        \
do {                                                        \
        type __user *p = (type __force __user *)(src);        \
        type data;                                        \
        if (__get_user(data, p))                        \
                goto label;                                \
        *(type *)dst = data;                                \
} while (0)

#define __put_kernel_nofault(dst, src, type, label)        \
do {                                                        \
        type __user *p = (type __force __user *)(dst);        \
        type data = *(type *)src;                        \
        if (__put_user(data, p))                        \
                goto label;                                \
} while (0)
#endif

/**
 * get_kernel_nofault(): safely attempt to read from a location
 * @val: read into this variable
 * @ptr: address to read from
 *
 * Returns 0 on success, or -EFAULT.
 */
#define get_kernel_nofault(val, ptr) ({                                \
        const typeof(val) *__gk_ptr = (ptr);                        \
        copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\
})

#ifndef user_access_begin
#define user_access_begin(ptr,len) access_ok(ptr, len)
#define user_access_end() do { } while (0)
#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
#define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
#define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
#define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
static inline unsigned long user_access_save(void) { return 0UL; }
static inline void user_access_restore(unsigned long flags) { }
#endif
#ifndef user_write_access_begin
#define user_write_access_begin user_access_begin
#define user_write_access_end user_access_end
#endif
#ifndef user_read_access_begin
#define user_read_access_begin user_access_begin
#define user_read_access_end user_access_end
#endif

#ifdef CONFIG_HARDENED_USERCOPY
void __noreturn usercopy_abort(const char *name, const char *detail,
                               bool to_user, unsigned long offset,
                               unsigned long len);
#endif

#endif                /* __LINUX_UACCESS_H__ */

































































































   14 
   15 


































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
// SPDX-License-Identifier: GPL-2.0
/*
 * SafeSetID Linux Security Module
 *
 * Author: Micah Morton <mortonm@chromium.org>
 *
 * Copyright (C) 2018 The Chromium OS Authors.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2, as
 * published by the Free Software Foundation.
 *
 */

#define pr_fmt(fmt) "SafeSetID: " fmt

#include <linux/lsm_hooks.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/sched/task_stack.h>
#include <linux/security.h>
#include <uapi/linux/lsm.h>
#include "lsm.h"

/* Flag indicating whether initialization completed */
int safesetid_initialized __initdata;

struct setid_ruleset __rcu *safesetid_setuid_rules;
struct setid_ruleset __rcu *safesetid_setgid_rules;


/* Compute a decision for a transition from @src to @dst under @policy. */
enum sid_policy_type _setid_policy_lookup(struct setid_ruleset *policy,
                kid_t src, kid_t dst)
{
        struct setid_rule *rule;
        enum sid_policy_type result = SIDPOL_DEFAULT;

        if (policy->type == UID) {
                hash_for_each_possible(policy->rules, rule, next, __kuid_val(src.uid)) {
                        if (!uid_eq(rule->src_id.uid, src.uid))
                                continue;
                        if (uid_eq(rule->dst_id.uid, dst.uid))
                                return SIDPOL_ALLOWED;
                        result = SIDPOL_CONSTRAINED;
                }
        } else if (policy->type == GID) {
                hash_for_each_possible(policy->rules, rule, next, __kgid_val(src.gid)) {
                        if (!gid_eq(rule->src_id.gid, src.gid))
                                continue;
                        if (gid_eq(rule->dst_id.gid, dst.gid)){
                                return SIDPOL_ALLOWED;
                        }
                        result = SIDPOL_CONSTRAINED;
                }
        } else {
                /* Should not reach here, report the ID as contrainsted */
                result = SIDPOL_CONSTRAINED;
        }
        return result;
}

/*
 * Compute a decision for a transition from @src to @dst under the active
 * policy.
 */
static enum sid_policy_type setid_policy_lookup(kid_t src, kid_t dst, enum setid_type new_type)
{
        enum sid_policy_type result = SIDPOL_DEFAULT;
        struct setid_ruleset *pol;

        rcu_read_lock();
        if (new_type == UID)
                pol = rcu_dereference(safesetid_setuid_rules);
        else if (new_type == GID)
                pol = rcu_dereference(safesetid_setgid_rules);
        else { /* Should not reach here */
                result = SIDPOL_CONSTRAINED;
                rcu_read_unlock();
                return result;
        }

        if (pol) {
                pol->type = new_type;
                result = _setid_policy_lookup(pol, src, dst);
        }
        rcu_read_unlock();
        return result;
}

static int safesetid_security_capable(const struct cred *cred,
                                      struct user_namespace *ns,
                                      int cap,
                                      unsigned int opts)
{
        /* We're only interested in CAP_SETUID and CAP_SETGID. */
        if (cap != CAP_SETUID && cap != CAP_SETGID)
                return 0;

        /*
         * If CAP_SET{U/G}ID is currently used for a setid or setgroups syscall, we
         * want to let it go through here; the real security check happens later, in
         * the task_fix_set{u/g}id or task_fix_setgroups hooks.
         */
        if ((opts & CAP_OPT_INSETID) != 0)
                return 0;

        switch (cap) {
        case CAP_SETUID:
                /*
                * If no policy applies to this task, allow the use of CAP_SETUID for
                * other purposes.
                */
                if (setid_policy_lookup((kid_t){.uid = cred->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT)
                        return 0;
                /*
                 * Reject use of CAP_SETUID for functionality other than calling
                 * set*uid() (e.g. setting up userns uid mappings).
                 */
                pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
                        __kuid_val(cred->uid));
                return -EPERM;
        case CAP_SETGID:
                /*
                * If no policy applies to this task, allow the use of CAP_SETGID for
                * other purposes.
                */
                if (setid_policy_lookup((kid_t){.gid = cred->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT)
                        return 0;
                /*
                 * Reject use of CAP_SETUID for functionality other than calling
                 * set*gid() (e.g. setting up userns gid mappings).
                 */
                pr_warn("Operation requires CAP_SETGID, which is not available to GID %u for operations besides approved set*gid transitions\n",
                        __kgid_val(cred->gid));
                return -EPERM;
        default:
                /* Error, the only capabilities were checking for is CAP_SETUID/GID */
                return 0;
        }
        return 0;
}

/*
 * Check whether a caller with old credentials @old is allowed to switch to
 * credentials that contain @new_id.
 */
static bool id_permitted_for_cred(const struct cred *old, kid_t new_id, enum setid_type new_type)
{
        bool permitted;

        /* If our old creds already had this ID in it, it's fine. */
        if (new_type == UID) {
                if (uid_eq(new_id.uid, old->uid) || uid_eq(new_id.uid, old->euid) ||
                        uid_eq(new_id.uid, old->suid))
                        return true;
        } else if (new_type == GID){
                if (gid_eq(new_id.gid, old->gid) || gid_eq(new_id.gid, old->egid) ||
                        gid_eq(new_id.gid, old->sgid))
                        return true;
        } else /* Error, new_type is an invalid type */
                return false;

        /*
         * Transitions to new UIDs require a check against the policy of the old
         * RUID.
         */
        permitted =
            setid_policy_lookup((kid_t){.uid = old->uid}, new_id, new_type) != SIDPOL_CONSTRAINED;

        if (!permitted) {
                if (new_type == UID) {
                        pr_warn("UID transition ((%d,%d,%d) -> %d) blocked\n",
                                __kuid_val(old->uid), __kuid_val(old->euid),
                                __kuid_val(old->suid), __kuid_val(new_id.uid));
                } else if (new_type == GID) {
                        pr_warn("GID transition ((%d,%d,%d) -> %d) blocked\n",
                                __kgid_val(old->gid), __kgid_val(old->egid),
                                __kgid_val(old->sgid), __kgid_val(new_id.gid));
                } else /* Error, new_type is an invalid type */
                        return false;
        }
        return permitted;
}

/*
 * Check whether there is either an exception for user under old cred struct to
 * set*uid to user under new cred struct, or the UID transition is allowed (by
 * Linux set*uid rules) even without CAP_SETUID.
 */
static int safesetid_task_fix_setuid(struct cred *new,
                                     const struct cred *old,
                                     int flags)
{

        /* Do nothing if there are no setuid restrictions for our old RUID. */
        if (setid_policy_lookup((kid_t){.uid = old->uid}, INVALID_ID, UID) == SIDPOL_DEFAULT)
                return 0;

        if (id_permitted_for_cred(old, (kid_t){.uid = new->uid}, UID) &&
            id_permitted_for_cred(old, (kid_t){.uid = new->euid}, UID) &&
            id_permitted_for_cred(old, (kid_t){.uid = new->suid}, UID) &&
            id_permitted_for_cred(old, (kid_t){.uid = new->fsuid}, UID))
                return 0;

        /*
         * Kill this process to avoid potential security vulnerabilities
         * that could arise from a missing allowlist entry preventing a
         * privileged process from dropping to a lesser-privileged one.
         */
        force_sig(SIGKILL);
        return -EACCES;
}

static int safesetid_task_fix_setgid(struct cred *new,
                                     const struct cred *old,
                                     int flags)
{

        /* Do nothing if there are no setgid restrictions for our old RGID. */
        if (setid_policy_lookup((kid_t){.gid = old->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT)
                return 0;

        if (id_permitted_for_cred(old, (kid_t){.gid = new->gid}, GID) &&
            id_permitted_for_cred(old, (kid_t){.gid = new->egid}, GID) &&
            id_permitted_for_cred(old, (kid_t){.gid = new->sgid}, GID) &&
            id_permitted_for_cred(old, (kid_t){.gid = new->fsgid}, GID))
                return 0;

        /*
         * Kill this process to avoid potential security vulnerabilities
         * that could arise from a missing allowlist entry preventing a
         * privileged process from dropping to a lesser-privileged one.
         */
        force_sig(SIGKILL);
        return -EACCES;
}

static int safesetid_task_fix_setgroups(struct cred *new, const struct cred *old)
{
        int i;

        /* Do nothing if there are no setgid restrictions for our old RGID. */
        if (setid_policy_lookup((kid_t){.gid = old->gid}, INVALID_ID, GID) == SIDPOL_DEFAULT)
                return 0;

        get_group_info(new->group_info);
        for (i = 0; i < new->group_info->ngroups; i++) {
                if (!id_permitted_for_cred(old, (kid_t){.gid = new->group_info->gid[i]}, GID)) {
                        put_group_info(new->group_info);
                        /*
                         * Kill this process to avoid potential security vulnerabilities
                         * that could arise from a missing allowlist entry preventing a
                         * privileged process from dropping to a lesser-privileged one.
                         */
                        force_sig(SIGKILL);
                        return -EACCES;
                }
        }

        put_group_info(new->group_info);
        return 0;
}

static const struct lsm_id safesetid_lsmid = {
        .name = "safesetid",
        .id = LSM_ID_SAFESETID,
};

static struct security_hook_list safesetid_security_hooks[] = {
        LSM_HOOK_INIT(task_fix_setuid, safesetid_task_fix_setuid),
        LSM_HOOK_INIT(task_fix_setgid, safesetid_task_fix_setgid),
        LSM_HOOK_INIT(task_fix_setgroups, safesetid_task_fix_setgroups),
        LSM_HOOK_INIT(capable, safesetid_security_capable)
};

static int __init safesetid_security_init(void)
{
        security_add_hooks(safesetid_security_hooks,
                           ARRAY_SIZE(safesetid_security_hooks),
                           &safesetid_lsmid);

        /* Report that SafeSetID successfully initialized */
        safesetid_initialized = 1;

        return 0;
}

DEFINE_LSM(safesetid_security_init) = {
        .init = safesetid_security_init,
        .name = "safesetid",
};
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   12 





   15 


   12 




   11 


   11 


   11 
   10 


   10 


   11 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * V4L2 controls framework core implementation.
 *
 * Copyright (C) 2010-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
 */

#include <linux/export.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>
#include <media/v4l2-fwnode.h>

#include "v4l2-ctrls-priv.h"

static const union v4l2_ctrl_ptr ptr_null;

static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl,
                       u32 changes)
{
        memset(ev, 0, sizeof(*ev));
        ev->type = V4L2_EVENT_CTRL;
        ev->id = ctrl->id;
        ev->u.ctrl.changes = changes;
        ev->u.ctrl.type = ctrl->type;
        ev->u.ctrl.flags = user_flags(ctrl);
        if (ctrl->is_ptr)
                ev->u.ctrl.value64 = 0;
        else
                ev->u.ctrl.value64 = *ctrl->p_cur.p_s64;
        ev->u.ctrl.minimum = ctrl->minimum;
        ev->u.ctrl.maximum = ctrl->maximum;
        if (ctrl->type == V4L2_CTRL_TYPE_MENU
            || ctrl->type == V4L2_CTRL_TYPE_INTEGER_MENU)
                ev->u.ctrl.step = 1;
        else
                ev->u.ctrl.step = ctrl->step;
        ev->u.ctrl.default_value = ctrl->default_value;
}

void send_initial_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl)
{
        struct v4l2_event ev;
        u32 changes = V4L2_EVENT_CTRL_CH_FLAGS;

        if (!(ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY))
                changes |= V4L2_EVENT_CTRL_CH_VALUE;
        fill_event(&ev, ctrl, changes);
        v4l2_event_queue_fh(fh, &ev);
}

void send_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 changes)
{
        struct v4l2_event ev;
        struct v4l2_subscribed_event *sev;

        if (list_empty(&ctrl->ev_subs))
                return;
        fill_event(&ev, ctrl, changes);

        list_for_each_entry(sev, &ctrl->ev_subs, node)
                if (sev->fh != fh ||
                    (sev->flags & V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK))
                        v4l2_event_queue_fh(sev->fh, &ev);
}

bool v4l2_ctrl_type_op_equal(const struct v4l2_ctrl *ctrl,
                             union v4l2_ctrl_ptr ptr1, union v4l2_ctrl_ptr ptr2)
{
        unsigned int i;

        switch (ctrl->type) {
        case V4L2_CTRL_TYPE_BUTTON:
                return false;
        case V4L2_CTRL_TYPE_STRING:
                for (i = 0; i < ctrl->elems; i++) {
                        unsigned int idx = i * ctrl->elem_size;

                        /* strings are always 0-terminated */
                        if (strcmp(ptr1.p_char + idx, ptr2.p_char + idx))
                                return false;
                }
                return true;
        default:
                return !memcmp(ptr1.p_const, ptr2.p_const,
                               ctrl->elems * ctrl->elem_size);
        }
}
EXPORT_SYMBOL(v4l2_ctrl_type_op_equal);

/* Default intra MPEG-2 quantisation coefficients, from the specification. */
static const u8 mpeg2_intra_quant_matrix[64] = {
        8,  16, 16, 19, 16, 19, 22, 22,
        22, 22, 22, 22, 26, 24, 26, 27,
        27, 27, 26, 26, 26, 26, 27, 27,
        27, 29, 29, 29, 34, 34, 34, 29,
        29, 29, 27, 27, 29, 29, 32, 32,
        34, 34, 37, 38, 37, 35, 35, 34,
        35, 38, 38, 40, 40, 40, 48, 48,
        46, 46, 56, 56, 58, 69, 69, 83
};

static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
                              union v4l2_ctrl_ptr ptr)
{
        struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
        struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
        struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
        struct v4l2_ctrl_vp8_frame *p_vp8_frame;
        struct v4l2_ctrl_vp9_frame *p_vp9_frame;
        struct v4l2_ctrl_fwht_params *p_fwht_params;
        struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix;
        struct v4l2_ctrl_av1_sequence *p_av1_sequence;
        void *p = ptr.p + idx * ctrl->elem_size;

        if (ctrl->p_def.p_const)
                memcpy(p, ctrl->p_def.p_const, ctrl->elem_size);
        else
                memset(p, 0, ctrl->elem_size);

        switch ((u32)ctrl->type) {
        case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
                p_mpeg2_sequence = p;

                /* 4:2:0 */
                p_mpeg2_sequence->chroma_format = 1;
                break;
        case V4L2_CTRL_TYPE_MPEG2_PICTURE:
                p_mpeg2_picture = p;

                /* interlaced top field */
                p_mpeg2_picture->picture_structure = V4L2_MPEG2_PIC_TOP_FIELD;
                p_mpeg2_picture->picture_coding_type =
                                        V4L2_MPEG2_PIC_CODING_TYPE_I;
                break;
        case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
                p_mpeg2_quant = p;

                memcpy(p_mpeg2_quant->intra_quantiser_matrix,
                       mpeg2_intra_quant_matrix,
                       ARRAY_SIZE(mpeg2_intra_quant_matrix));
                /*
                 * The default non-intra MPEG-2 quantisation
                 * coefficients are all 16, as per the specification.
                 */
                memset(p_mpeg2_quant->non_intra_quantiser_matrix, 16,
                       sizeof(p_mpeg2_quant->non_intra_quantiser_matrix));
                break;
        case V4L2_CTRL_TYPE_VP8_FRAME:
                p_vp8_frame = p;
                p_vp8_frame->num_dct_parts = 1;
                break;
        case V4L2_CTRL_TYPE_VP9_FRAME:
                p_vp9_frame = p;
                p_vp9_frame->profile = 0;
                p_vp9_frame->bit_depth = 8;
                p_vp9_frame->flags |= V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING |
                        V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING;
                break;
        case V4L2_CTRL_TYPE_AV1_SEQUENCE:
                p_av1_sequence = p;
                p_av1_sequence->bit_depth = 8;
                break;
        case V4L2_CTRL_TYPE_FWHT_PARAMS:
                p_fwht_params = p;
                p_fwht_params->version = V4L2_FWHT_VERSION;
                p_fwht_params->width = 1280;
                p_fwht_params->height = 720;
                p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV |
                        (2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET);
                break;
        case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
                p_h264_scaling_matrix = p;
                /*
                 * The default (flat) H.264 scaling matrix when none are
                 * specified in the bitstream, this is according to formulas
                 *  (7-8) and (7-9) of the specification.
                 */
                memset(p_h264_scaling_matrix, 16, sizeof(*p_h264_scaling_matrix));
                break;
        }
}

void v4l2_ctrl_type_op_init(const struct v4l2_ctrl *ctrl, u32 from_idx,
                            union v4l2_ctrl_ptr ptr)
{
        unsigned int i;
        u32 tot_elems = ctrl->elems;
        u32 elems = tot_elems - from_idx;

        if (from_idx >= tot_elems)
                return;

        switch (ctrl->type) {
        case V4L2_CTRL_TYPE_STRING:
                for (i = from_idx; i < tot_elems; i++) {
                        unsigned int offset = i * ctrl->elem_size;

                        memset(ptr.p_char + offset, ' ', ctrl->minimum);
                        ptr.p_char[offset + ctrl->minimum] = '\0';
                }
                break;
        case V4L2_CTRL_TYPE_INTEGER64:
                if (ctrl->default_value) {
                        for (i = from_idx; i < tot_elems; i++)
                                ptr.p_s64[i] = ctrl->default_value;
                } else {
                        memset(ptr.p_s64 + from_idx, 0, elems * sizeof(s64));
                }
                break;
        case V4L2_CTRL_TYPE_INTEGER:
        case V4L2_CTRL_TYPE_INTEGER_MENU:
        case V4L2_CTRL_TYPE_MENU:
        case V4L2_CTRL_TYPE_BITMASK:
        case V4L2_CTRL_TYPE_BOOLEAN:
                if (ctrl->default_value) {
                        for (i = from_idx; i < tot_elems; i++)
                                ptr.p_s32[i] = ctrl->default_value;
                } else {
                        memset(ptr.p_s32 + from_idx, 0, elems * sizeof(s32));
                }
                break;
        case V4L2_CTRL_TYPE_BUTTON:
        case V4L2_CTRL_TYPE_CTRL_CLASS:
                memset(ptr.p_s32 + from_idx, 0, elems * sizeof(s32));
                break;
        case V4L2_CTRL_TYPE_U8:
                memset(ptr.p_u8 + from_idx, ctrl->default_value, elems);
                break;
        case V4L2_CTRL_TYPE_U16:
                if (ctrl->default_value) {
                        for (i = from_idx; i < tot_elems; i++)
                                ptr.p_u16[i] = ctrl->default_value;
                } else {
                        memset(ptr.p_u16 + from_idx, 0, elems * sizeof(u16));
                }
                break;
        case V4L2_CTRL_TYPE_U32:
                if (ctrl->default_value) {
                        for (i = from_idx; i < tot_elems; i++)
                                ptr.p_u32[i] = ctrl->default_value;
                } else {
                        memset(ptr.p_u32 + from_idx, 0, elems * sizeof(u32));
                }
                break;
        default:
                for (i = from_idx; i < tot_elems; i++)
                        std_init_compound(ctrl, i, ptr);
                break;
        }
}
EXPORT_SYMBOL(v4l2_ctrl_type_op_init);

void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
{
        union v4l2_ctrl_ptr ptr = ctrl->p_cur;

        if (ctrl->is_array) {
                unsigned i;

                for (i = 0; i < ctrl->nr_of_dims; i++)
                        pr_cont("[%u]", ctrl->dims[i]);
                pr_cont(" ");
        }

        switch (ctrl->type) {
        case V4L2_CTRL_TYPE_INTEGER:
                pr_cont("%d", *ptr.p_s32);
                break;
        case V4L2_CTRL_TYPE_BOOLEAN:
                pr_cont("%s", *ptr.p_s32 ? "true" : "false");
                break;
        case V4L2_CTRL_TYPE_MENU:
                pr_cont("%s", ctrl->qmenu[*ptr.p_s32]);
                break;
        case V4L2_CTRL_TYPE_INTEGER_MENU:
                pr_cont("%lld", ctrl->qmenu_int[*ptr.p_s32]);
                break;
        case V4L2_CTRL_TYPE_BITMASK:
                pr_cont("0x%08x", *ptr.p_s32);
                break;
        case V4L2_CTRL_TYPE_INTEGER64:
                pr_cont("%lld", *ptr.p_s64);
                break;
        case V4L2_CTRL_TYPE_STRING:
                pr_cont("%s", ptr.p_char);
                break;
        case V4L2_CTRL_TYPE_U8:
                pr_cont("%u", (unsigned)*ptr.p_u8);
                break;
        case V4L2_CTRL_TYPE_U16:
                pr_cont("%u", (unsigned)*ptr.p_u16);
                break;
        case V4L2_CTRL_TYPE_U32:
                pr_cont("%u", (unsigned)*ptr.p_u32);
                break;
        case V4L2_CTRL_TYPE_H264_SPS:
                pr_cont("H264_SPS");
                break;
        case V4L2_CTRL_TYPE_H264_PPS:
                pr_cont("H264_PPS");
                break;
        case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
                pr_cont("H264_SCALING_MATRIX");
                break;
        case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
                pr_cont("H264_SLICE_PARAMS");
                break;
        case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
                pr_cont("H264_DECODE_PARAMS");
                break;
        case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
                pr_cont("H264_PRED_WEIGHTS");
                break;
        case V4L2_CTRL_TYPE_FWHT_PARAMS:
                pr_cont("FWHT_PARAMS");
                break;
        case V4L2_CTRL_TYPE_VP8_FRAME:
                pr_cont("VP8_FRAME");
                break;
        case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
                pr_cont("HDR10_CLL_INFO");
                break;
        case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
                pr_cont("HDR10_MASTERING_DISPLAY");
                break;
        case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
                pr_cont("MPEG2_QUANTISATION");
                break;
        case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
                pr_cont("MPEG2_SEQUENCE");
                break;
        case V4L2_CTRL_TYPE_MPEG2_PICTURE:
                pr_cont("MPEG2_PICTURE");
                break;
        case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR:
                pr_cont("VP9_COMPRESSED_HDR");
                break;
        case V4L2_CTRL_TYPE_VP9_FRAME:
                pr_cont("VP9_FRAME");
                break;
        case V4L2_CTRL_TYPE_HEVC_SPS:
                pr_cont("HEVC_SPS");
                break;
        case V4L2_CTRL_TYPE_HEVC_PPS:
                pr_cont("HEVC_PPS");
                break;
        case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
                pr_cont("HEVC_SLICE_PARAMS");
                break;
        case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX:
                pr_cont("HEVC_SCALING_MATRIX");
                break;
        case V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS:
                pr_cont("HEVC_DECODE_PARAMS");
                break;
        case V4L2_CTRL_TYPE_AV1_SEQUENCE:
                pr_cont("AV1_SEQUENCE");
                break;
        case V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY:
                pr_cont("AV1_TILE_GROUP_ENTRY");
                break;
        case V4L2_CTRL_TYPE_AV1_FRAME:
                pr_cont("AV1_FRAME");
                break;
        case V4L2_CTRL_TYPE_AV1_FILM_GRAIN:
                pr_cont("AV1_FILM_GRAIN");
                break;

        default:
                pr_cont("unknown type %d", ctrl->type);
                break;
        }
}
EXPORT_SYMBOL(v4l2_ctrl_type_op_log);

/*
 * Round towards the closest legal value. Be careful when we are
 * close to the maximum range of the control type to prevent
 * wrap-arounds.
 */
#define ROUND_TO_RANGE(val, offset_type, ctrl)                        \
({                                                                \
        offset_type offset;                                        \
        if ((ctrl)->maximum >= 0 &&                                \
            val >= (ctrl)->maximum - (s32)((ctrl)->step / 2))        \
                val = (ctrl)->maximum;                                \
        else                                                        \
                val += (s32)((ctrl)->step / 2);                        \
        val = clamp_t(typeof(val), val,                                \
                      (ctrl)->minimum, (ctrl)->maximum);        \
        offset = (val) - (ctrl)->minimum;                        \
        offset = (ctrl)->step * (offset / (u32)(ctrl)->step);        \
        val = (ctrl)->minimum + offset;                                \
        0;                                                        \
})

/* Validate a new control */

#define zero_padding(s) \
        memset(&(s).padding, 0, sizeof((s).padding))
#define zero_reserved(s) \
        memset(&(s).reserved, 0, sizeof((s).reserved))

static int
validate_vp9_lf_params(struct v4l2_vp9_loop_filter *lf)
{
        unsigned int i;

        if (lf->flags & ~(V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED |
                          V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE))
                return -EINVAL;

        /* That all values are in the accepted range. */
        if (lf->level > GENMASK(5, 0))
                return -EINVAL;

        if (lf->sharpness > GENMASK(2, 0))
                return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(lf->ref_deltas); i++)
                if (lf->ref_deltas[i] < -63 || lf->ref_deltas[i] > 63)
                        return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(lf->mode_deltas); i++)
                if (lf->mode_deltas[i] < -63 || lf->mode_deltas[i] > 63)
                        return -EINVAL;

        zero_reserved(*lf);
        return 0;
}

static int
validate_vp9_quant_params(struct v4l2_vp9_quantization *quant)
{
        if (quant->delta_q_y_dc < -15 || quant->delta_q_y_dc > 15 ||
            quant->delta_q_uv_dc < -15 || quant->delta_q_uv_dc > 15 ||
            quant->delta_q_uv_ac < -15 || quant->delta_q_uv_ac > 15)
                return -EINVAL;

        zero_reserved(*quant);
        return 0;
}

static int
validate_vp9_seg_params(struct v4l2_vp9_segmentation *seg)
{
        unsigned int i, j;

        if (seg->flags & ~(V4L2_VP9_SEGMENTATION_FLAG_ENABLED |
                           V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP |
                           V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE |
                           V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA |
                           V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE))
                return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(seg->feature_enabled); i++) {
                if (seg->feature_enabled[i] &
                    ~V4L2_VP9_SEGMENT_FEATURE_ENABLED_MASK)
                        return -EINVAL;
        }

        for (i = 0; i < ARRAY_SIZE(seg->feature_data); i++) {
                static const int range[] = { 255, 63, 3, 0 };

                for (j = 0; j < ARRAY_SIZE(seg->feature_data[j]); j++) {
                        if (seg->feature_data[i][j] < -range[j] ||
                            seg->feature_data[i][j] > range[j])
                                return -EINVAL;
                }
        }

        zero_reserved(*seg);
        return 0;
}

static int
validate_vp9_compressed_hdr(struct v4l2_ctrl_vp9_compressed_hdr *hdr)
{
        if (hdr->tx_mode > V4L2_VP9_TX_MODE_SELECT)
                return -EINVAL;

        return 0;
}

static int
validate_vp9_frame(struct v4l2_ctrl_vp9_frame *frame)
{
        int ret;

        /* Make sure we're not passed invalid flags. */
        if (frame->flags & ~(V4L2_VP9_FRAME_FLAG_KEY_FRAME |
                  V4L2_VP9_FRAME_FLAG_SHOW_FRAME |
                  V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT |
                  V4L2_VP9_FRAME_FLAG_INTRA_ONLY |
                  V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV |
                  V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX |
                  V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE |
                  V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING |
                  V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING |
                  V4L2_VP9_FRAME_FLAG_COLOR_RANGE_FULL_SWING))
                return -EINVAL;

        if (frame->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT &&
            frame->flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX)
                return -EINVAL;

        if (frame->profile > V4L2_VP9_PROFILE_MAX)
                return -EINVAL;

        if (frame->reset_frame_context > V4L2_VP9_RESET_FRAME_CTX_ALL)
                return -EINVAL;

        if (frame->frame_context_idx >= V4L2_VP9_NUM_FRAME_CTX)
                return -EINVAL;

        /*
         * Profiles 0 and 1 only support 8-bit depth, profiles 2 and 3 only 10
         * and 12 bit depths.
         */
        if ((frame->profile < 2 && frame->bit_depth != 8) ||
            (frame->profile >= 2 &&
             (frame->bit_depth != 10 && frame->bit_depth != 12)))
                return -EINVAL;

        /* Profile 0 and 2 only accept YUV 4:2:0. */
        if ((frame->profile == 0 || frame->profile == 2) &&
            (!(frame->flags & V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING) ||
             !(frame->flags & V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING)))
                return -EINVAL;

        /* Profile 1 and 3 only accept YUV 4:2:2, 4:4:0 and 4:4:4. */
        if ((frame->profile == 1 || frame->profile == 3) &&
            ((frame->flags & V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING) &&
             (frame->flags & V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING)))
                return -EINVAL;

        if (frame->interpolation_filter > V4L2_VP9_INTERP_FILTER_SWITCHABLE)
                return -EINVAL;

        /*
         * According to the spec, tile_cols_log2 shall be less than or equal
         * to 6.
         */
        if (frame->tile_cols_log2 > 6)
                return -EINVAL;

        if (frame->reference_mode > V4L2_VP9_REFERENCE_MODE_SELECT)
                return -EINVAL;

        ret = validate_vp9_lf_params(&frame->lf);
        if (ret)
                return ret;

        ret = validate_vp9_quant_params(&frame->quant);
        if (ret)
                return ret;

        ret = validate_vp9_seg_params(&frame->seg);
        if (ret)
                return ret;

        zero_reserved(*frame);
        return 0;
}

static int validate_av1_quantization(struct v4l2_av1_quantization *q)
{
        if (q->flags > GENMASK(2, 0))
                return -EINVAL;

        if (q->delta_q_y_dc < -64 || q->delta_q_y_dc > 63 ||
            q->delta_q_u_dc < -64 || q->delta_q_u_dc > 63 ||
            q->delta_q_v_dc < -64 || q->delta_q_v_dc > 63 ||
            q->delta_q_u_ac < -64 || q->delta_q_u_ac > 63 ||
            q->delta_q_v_ac < -64 || q->delta_q_v_ac > 63 ||
            q->delta_q_res > GENMASK(1, 0))
                return -EINVAL;

        if (q->qm_y > GENMASK(3, 0) ||
            q->qm_u > GENMASK(3, 0) ||
            q->qm_v > GENMASK(3, 0))
                return -EINVAL;

        return 0;
}

static int validate_av1_segmentation(struct v4l2_av1_segmentation *s)
{
        u32 i;
        u32 j;

        if (s->flags > GENMASK(4, 0))
                return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(s->feature_data); i++) {
                static const int segmentation_feature_signed[] = { 1, 1, 1, 1, 1, 0, 0, 0 };
                static const int segmentation_feature_max[] = { 255, 63, 63, 63, 63, 7, 0, 0};

                for (j = 0; j < ARRAY_SIZE(s->feature_data[j]); j++) {
                        s32 limit = segmentation_feature_max[j];

                        if (segmentation_feature_signed[j]) {
                                if (s->feature_data[i][j] < -limit ||
                                    s->feature_data[i][j] > limit)
                                        return -EINVAL;
                        } else {
                                if (s->feature_data[i][j] < 0 || s->feature_data[i][j] > limit)
                                        return -EINVAL;
                        }
                }
        }

        return 0;
}

static int validate_av1_loop_filter(struct v4l2_av1_loop_filter *lf)
{
        u32 i;

        if (lf->flags > GENMASK(3, 0))
                return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(lf->level); i++) {
                if (lf->level[i] > GENMASK(5, 0))
                        return -EINVAL;
        }

        if (lf->sharpness > GENMASK(2, 0))
                return -EINVAL;

        for (i = 0; i < ARRAY_SIZE(lf->ref_deltas); i++) {
                if (lf->ref_deltas[i] < -64 || lf->ref_deltas[i] > 63)
                        return -EINVAL;
        }

        for (i = 0; i < ARRAY_SIZE(lf->mode_deltas); i++) {
                if (lf->mode_deltas[i] < -64 || lf->mode_deltas[i] > 63)
                        return -EINVAL;
        }

        return 0;
}

static int validate_av1_cdef(struct v4l2_av1_cdef *cdef)
{
        u32 i;

        if (cdef->damping_minus_3 > GENMASK(1, 0) ||
            cdef->bits > GENMASK(1, 0))
                return -EINVAL;

        for (i = 0; i < 1 << cdef->bits; i++) {
                if (cdef->y_pri_strength[i] > GENMASK(3, 0) ||
                    cdef->y_sec_strength[i] > 4 ||
                    cdef->uv_pri_strength[i] > GENMASK(3, 0) ||
                    cdef->uv_sec_strength[i] > 4)
                        return -EINVAL;
        }

        return 0;
}

static int validate_av1_loop_restauration(struct v4l2_av1_loop_restoration *lr)
{
        if (lr->lr_unit_shift > 3 || lr->lr_uv_shift > 1)
                return -EINVAL;

        return 0;
}

static int validate_av1_film_grain(struct v4l2_ctrl_av1_film_grain *fg)
{
        u32 i;

        if (fg->flags > GENMASK(4, 0))
                return -EINVAL;

        if (fg->film_grain_params_ref_idx > GENMASK(2, 0) ||
            fg->num_y_points > 14 ||
            fg->num_cb_points > 10 ||
            fg->num_cr_points > GENMASK(3, 0) ||
            fg->grain_scaling_minus_8 > GENMASK(1, 0) ||
            fg->ar_coeff_lag > GENMASK(1, 0) ||
            fg->ar_coeff_shift_minus_6 > GENMASK(1, 0) ||
            fg->grain_scale_shift > GENMASK(1, 0))
                return -EINVAL;

        if (!(fg->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN))
                return 0;

        for (i = 1; i < fg->num_y_points; i++)
                if (fg->point_y_value[i] <= fg->point_y_value[i - 1])
                        return -EINVAL;

        for (i = 1; i < fg->num_cb_points; i++)
                if (fg->point_cb_value[i] <= fg->point_cb_value[i - 1])
                        return -EINVAL;

        for (i = 1; i < fg->num_cr_points; i++)
                if (fg->point_cr_value[i] <= fg->point_cr_value[i - 1])
                        return -EINVAL;

        return 0;
}

static int validate_av1_frame(struct v4l2_ctrl_av1_frame *f)
{
        int ret = 0;

        ret = validate_av1_quantization(&f->quantization);
        if (ret)
                return ret;
        ret = validate_av1_segmentation(&f->segmentation);
        if (ret)
                return ret;
        ret = validate_av1_loop_filter(&f->loop_filter);
        if (ret)
                return ret;
        ret = validate_av1_cdef(&f->cdef);
        if (ret)
                return ret;
        ret = validate_av1_loop_restauration(&f->loop_restoration);
        if (ret)
                return ret;

        if (f->flags &
        ~(V4L2_AV1_FRAME_FLAG_SHOW_FRAME |
          V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME |
          V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE |
          V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE |
          V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS |
          V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV |
          V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC |
          V4L2_AV1_FRAME_FLAG_USE_SUPERRES |
          V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV |
          V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE |
          V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS |
          V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF |
          V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION |
          V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT |
          V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET |
          V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED |
          V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT |
          V4L2_AV1_FRAME_FLAG_FRAME_SIZE_OVERRIDE |
          V4L2_AV1_FRAME_FLAG_BUFFER_REMOVAL_TIME_PRESENT |
          V4L2_AV1_FRAME_FLAG_FRAME_REFS_SHORT_SIGNALING))
                return -EINVAL;

        if (f->superres_denom > GENMASK(2, 0) + 9)
                return -EINVAL;

        return 0;
}

static int validate_av1_sequence(struct v4l2_ctrl_av1_sequence *s)
{
        if (s->flags &
        ~(V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE |
         V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF |
         V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION |
         V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME |
         V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE |
         V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X |
         V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y |
         V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT |
         V4L2_AV1_SEQUENCE_FLAG_SEPARATE_UV_DELTA_Q))
                return -EINVAL;

        if (s->seq_profile == 1 && s->flags & V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME)
                return -EINVAL;

        /* reserved */
        if (s->seq_profile > 2)
                return -EINVAL;

        /* TODO: PROFILES */
        return 0;
}

/*
 * Compound controls validation requires setting unused fields/flags to zero
 * in order to properly detect unchanged controls with v4l2_ctrl_type_op_equal's
 * memcmp.
 */
static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
                                 union v4l2_ctrl_ptr ptr)
{
        struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
        struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
        struct v4l2_ctrl_vp8_frame *p_vp8_frame;
        struct v4l2_ctrl_fwht_params *p_fwht_params;
        struct v4l2_ctrl_h264_sps *p_h264_sps;
        struct v4l2_ctrl_h264_pps *p_h264_pps;
        struct v4l2_ctrl_h264_pred_weights *p_h264_pred_weights;
        struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
        struct v4l2_ctrl_h264_decode_params *p_h264_dec_params;
        struct v4l2_ctrl_hevc_sps *p_hevc_sps;
        struct v4l2_ctrl_hevc_pps *p_hevc_pps;
        struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering;
        struct v4l2_ctrl_hevc_decode_params *p_hevc_decode_params;
        struct v4l2_area *area;
        void *p = ptr.p + idx * ctrl->elem_size;
        unsigned int i;

        switch ((u32)ctrl->type) {
        case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
                p_mpeg2_sequence = p;

                switch (p_mpeg2_sequence->chroma_format) {
                case 1: /* 4:2:0 */
                case 2: /* 4:2:2 */
                case 3: /* 4:4:4 */
                        break;
                default:
                        return -EINVAL;
                }
                break;

        case V4L2_CTRL_TYPE_MPEG2_PICTURE:
                p_mpeg2_picture = p;

                switch (p_mpeg2_picture->intra_dc_precision) {
                case 0: /* 8 bits */
                case 1: /* 9 bits */
                case 2: /* 10 bits */
                case 3: /* 11 bits */
                        break;
                default:
                        return -EINVAL;
                }

                switch (p_mpeg2_picture->picture_structure) {
                case V4L2_MPEG2_PIC_TOP_FIELD:
                case V4L2_MPEG2_PIC_BOTTOM_FIELD:
                case V4L2_MPEG2_PIC_FRAME:
                        break;
                default:
                        return -EINVAL;
                }

                switch (p_mpeg2_picture->picture_coding_type) {
                case V4L2_MPEG2_PIC_CODING_TYPE_I:
                case V4L2_MPEG2_PIC_CODING_TYPE_P:
                case V4L2_MPEG2_PIC_CODING_TYPE_B:
                        break;
                default:
                        return -EINVAL;
                }
                zero_reserved(*p_mpeg2_picture);
                break;

        case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
                break;

        case V4L2_CTRL_TYPE_FWHT_PARAMS:
                p_fwht_params = p;
                if (p_fwht_params->version < V4L2_FWHT_VERSION)
                        return -EINVAL;
                if (!p_fwht_params->width || !p_fwht_params->height)
                        return -EINVAL;
                break;

        case V4L2_CTRL_TYPE_H264_SPS:
                p_h264_sps = p;

                /* Some syntax elements are only conditionally valid */
                if (p_h264_sps->pic_order_cnt_type != 0) {
                        p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 = 0;
                } else if (p_h264_sps->pic_order_cnt_type != 1) {
                        p_h264_sps->num_ref_frames_in_pic_order_cnt_cycle = 0;
                        p_h264_sps->offset_for_non_ref_pic = 0;
                        p_h264_sps->offset_for_top_to_bottom_field = 0;
                        memset(&p_h264_sps->offset_for_ref_frame, 0,
                               sizeof(p_h264_sps->offset_for_ref_frame));
                }

                if (!V4L2_H264_SPS_HAS_CHROMA_FORMAT(p_h264_sps)) {
                        p_h264_sps->chroma_format_idc = 1;
                        p_h264_sps->bit_depth_luma_minus8 = 0;
                        p_h264_sps->bit_depth_chroma_minus8 = 0;

                        p_h264_sps->flags &=
                                ~V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;

                        if (p_h264_sps->chroma_format_idc < 3)
                                p_h264_sps->flags &=
                                        ~V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
                }

                if (p_h264_sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
                        p_h264_sps->flags &=
                                ~V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;

                /*
                 * Chroma 4:2:2 format require at least High 4:2:2 profile.
                 *
                 * The H264 specification and well-known parser implementations
                 * use profile-idc values directly, as that is clearer and
                 * less ambiguous. We do the same here.
                 */
                if (p_h264_sps->profile_idc < 122 &&
                    p_h264_sps->chroma_format_idc > 1)
                        return -EINVAL;
                /* Chroma 4:4:4 format require at least High 4:2:2 profile */
                if (p_h264_sps->profile_idc < 244 &&
                    p_h264_sps->chroma_format_idc > 2)
                        return -EINVAL;
                if (p_h264_sps->chroma_format_idc > 3)
                        return -EINVAL;

                if (p_h264_sps->bit_depth_luma_minus8 > 6)
                        return -EINVAL;
                if (p_h264_sps->bit_depth_chroma_minus8 > 6)
                        return -EINVAL;
                if (p_h264_sps->log2_max_frame_num_minus4 > 12)
                        return -EINVAL;
                if (p_h264_sps->pic_order_cnt_type > 2)
                        return -EINVAL;
                if (p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
                        return -EINVAL;
                if (p_h264_sps->max_num_ref_frames > V4L2_H264_REF_LIST_LEN)
                        return -EINVAL;
                break;

        case V4L2_CTRL_TYPE_H264_PPS:
                p_h264_pps = p;

                if (p_h264_pps->num_slice_groups_minus1 > 7)
                        return -EINVAL;
                if (p_h264_pps->num_ref_idx_l0_default_active_minus1 >
                    (V4L2_H264_REF_LIST_LEN - 1))
                        return -EINVAL;
                if (p_h264_pps->num_ref_idx_l1_default_active_minus1 >
                    (V4L2_H264_REF_LIST_LEN - 1))
                        return -EINVAL;
                if (p_h264_pps->weighted_bipred_idc > 2)
                        return -EINVAL;
                /*
                 * pic_init_qp_minus26 shall be in the range of
                 * -(26 + QpBdOffset_y) to +25, inclusive,
                 *  where QpBdOffset_y is 6 * bit_depth_luma_minus8
                 */
                if (p_h264_pps->pic_init_qp_minus26 < -62 ||
                    p_h264_pps->pic_init_qp_minus26 > 25)
                        return -EINVAL;
                if (p_h264_pps->pic_init_qs_minus26 < -26 ||
                    p_h264_pps->pic_init_qs_minus26 > 25)
                        return -EINVAL;
                if (p_h264_pps->chroma_qp_index_offset < -12 ||
                    p_h264_pps->chroma_qp_index_offset > 12)
                        return -EINVAL;
                if (p_h264_pps->second_chroma_qp_index_offset < -12 ||
                    p_h264_pps->second_chroma_qp_index_offset > 12)
                        return -EINVAL;
                break;

        case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
                break;

        case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
                p_h264_pred_weights = p;

                if (p_h264_pred_weights->luma_log2_weight_denom > 7)
                        return -EINVAL;
                if (p_h264_pred_weights->chroma_log2_weight_denom > 7)
                        return -EINVAL;
                break;

        case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
                p_h264_slice_params = p;

                if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
                        p_h264_slice_params->flags &=
                                ~V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;

                if (p_h264_slice_params->colour_plane_id > 2)
                        return -EINVAL;
                if (p_h264_slice_params->cabac_init_idc > 2)
                        return -EINVAL;
                if (p_h264_slice_params->disable_deblocking_filter_idc > 2)
                        return -EINVAL;
                if (p_h264_slice_params->slice_alpha_c0_offset_div2 < -6 ||
                    p_h264_slice_params->slice_alpha_c0_offset_div2 > 6)
                        return -EINVAL;
                if (p_h264_slice_params->slice_beta_offset_div2 < -6 ||
                    p_h264_slice_params->slice_beta_offset_div2 > 6)
                        return -EINVAL;

                if (p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_I ||
                    p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_SI)
                        p_h264_slice_params->num_ref_idx_l0_active_minus1 = 0;
                if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
                        p_h264_slice_params->num_ref_idx_l1_active_minus1 = 0;

                if (p_h264_slice_params->num_ref_idx_l0_active_minus1 >
                    (V4L2_H264_REF_LIST_LEN - 1))
                        return -EINVAL;
                if (p_h264_slice_params->num_ref_idx_l1_active_minus1 >
                    (V4L2_H264_REF_LIST_LEN - 1))
                        return -EINVAL;
                zero_reserved(*p_h264_slice_params);
                break;

        case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
                p_h264_dec_params = p;

                if (p_h264_dec_params->nal_ref_idc > 3)
                        return -EINVAL;
                for (i = 0; i < V4L2_H264_NUM_DPB_ENTRIES; i++) {
                        struct v4l2_h264_dpb_entry *dpb_entry =
                                &p_h264_dec_params->dpb[i];

                        zero_reserved(*dpb_entry);
                }
                zero_reserved(*p_h264_dec_params);
                break;

        case V4L2_CTRL_TYPE_VP8_FRAME:
                p_vp8_frame = p;

                switch (p_vp8_frame->num_dct_parts) {
                case 1:
                case 2:
                case 4:
                case 8:
                        break;
                default:
                        return -EINVAL;
                }
                zero_padding(p_vp8_frame->segment);
                zero_padding(p_vp8_frame->lf);
                zero_padding(p_vp8_frame->quant);
                zero_padding(p_vp8_frame->entropy);
                zero_padding(p_vp8_frame->coder_state);
                break;

        case V4L2_CTRL_TYPE_HEVC_SPS:
                p_hevc_sps = p;

                if (!(p_hevc_sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) {
                        p_hevc_sps->pcm_sample_bit_depth_luma_minus1 = 0;
                        p_hevc_sps->pcm_sample_bit_depth_chroma_minus1 = 0;
                        p_hevc_sps->log2_min_pcm_luma_coding_block_size_minus3 = 0;
                        p_hevc_sps->log2_diff_max_min_pcm_luma_coding_block_size = 0;
                }

                if (!(p_hevc_sps->flags &
                      V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT))
                        p_hevc_sps->num_long_term_ref_pics_sps = 0;
                break;

        case V4L2_CTRL_TYPE_HEVC_PPS:
                p_hevc_pps = p;

                if (!(p_hevc_pps->flags &
                      V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
                        p_hevc_pps->diff_cu_qp_delta_depth = 0;

                if (!(p_hevc_pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
                        p_hevc_pps->num_tile_columns_minus1 = 0;
                        p_hevc_pps->num_tile_rows_minus1 = 0;
                        memset(&p_hevc_pps->column_width_minus1, 0,
                               sizeof(p_hevc_pps->column_width_minus1));
                        memset(&p_hevc_pps->row_height_minus1, 0,
                               sizeof(p_hevc_pps->row_height_minus1));

                        p_hevc_pps->flags &=
                                ~V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
                }

                if (p_hevc_pps->flags &
                    V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER) {
                        p_hevc_pps->pps_beta_offset_div2 = 0;
                        p_hevc_pps->pps_tc_offset_div2 = 0;
                }
                break;

        case V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS:
                p_hevc_decode_params = p;

                if (p_hevc_decode_params->num_active_dpb_entries >
                    V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
                        return -EINVAL;
                break;

        case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
                break;

        case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
                break;

        case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
                p_hdr10_mastering = p;

                for (i = 0; i < 3; ++i) {
                        if (p_hdr10_mastering->display_primaries_x[i] <
                                V4L2_HDR10_MASTERING_PRIMARIES_X_LOW ||
                            p_hdr10_mastering->display_primaries_x[i] >
                                V4L2_HDR10_MASTERING_PRIMARIES_X_HIGH ||
                            p_hdr10_mastering->display_primaries_y[i] <
                                V4L2_HDR10_MASTERING_PRIMARIES_Y_LOW ||
                            p_hdr10_mastering->display_primaries_y[i] >
                                V4L2_HDR10_MASTERING_PRIMARIES_Y_HIGH)
                                return -EINVAL;
                }

                if (p_hdr10_mastering->white_point_x <
                        V4L2_HDR10_MASTERING_WHITE_POINT_X_LOW ||
                    p_hdr10_mastering->white_point_x >
                        V4L2_HDR10_MASTERING_WHITE_POINT_X_HIGH ||
                    p_hdr10_mastering->white_point_y <
                        V4L2_HDR10_MASTERING_WHITE_POINT_Y_LOW ||
                    p_hdr10_mastering->white_point_y >
                        V4L2_HDR10_MASTERING_WHITE_POINT_Y_HIGH)
                        return -EINVAL;

                if (p_hdr10_mastering->max_display_mastering_luminance <
                        V4L2_HDR10_MASTERING_MAX_LUMA_LOW ||
                    p_hdr10_mastering->max_display_mastering_luminance >
                        V4L2_HDR10_MASTERING_MAX_LUMA_HIGH ||
                    p_hdr10_mastering->min_display_mastering_luminance <
                        V4L2_HDR10_MASTERING_MIN_LUMA_LOW ||
                    p_hdr10_mastering->min_display_mastering_luminance >
                        V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
                        return -EINVAL;

                /* The following restriction comes from ITU-T Rec. H.265 spec */
                if (p_hdr10_mastering->max_display_mastering_luminance ==
                        V4L2_HDR10_MASTERING_MAX_LUMA_LOW &&
                    p_hdr10_mastering->min_display_mastering_luminance ==
                        V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
                        return -EINVAL;

                break;

        case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX:
                break;

        case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR:
                return validate_vp9_compressed_hdr(p);

        case V4L2_CTRL_TYPE_VP9_FRAME:
                return validate_vp9_frame(p);
        case V4L2_CTRL_TYPE_AV1_FRAME:
                return validate_av1_frame(p);
        case V4L2_CTRL_TYPE_AV1_SEQUENCE:
                return validate_av1_sequence(p);
        case V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY:
                break;
        case V4L2_CTRL_TYPE_AV1_FILM_GRAIN:
                return validate_av1_film_grain(p);

        case V4L2_CTRL_TYPE_AREA:
                area = p;
                if (!area->width || !area->height)
                        return -EINVAL;
                break;

        default:
                return -EINVAL;
        }

        return 0;
}

static int std_validate_elem(const struct v4l2_ctrl *ctrl, u32 idx,
                             union v4l2_ctrl_ptr ptr)
{
        size_t len;
        u64 offset;
        s64 val;

        switch ((u32)ctrl->type) {
        case V4L2_CTRL_TYPE_INTEGER:
                return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl);
        case V4L2_CTRL_TYPE_INTEGER64:
                /*
                 * We can't use the ROUND_TO_RANGE define here due to
                 * the u64 divide that needs special care.
                 */
                val = ptr.p_s64[idx];
                if (ctrl->maximum >= 0 && val >= ctrl->maximum - (s64)(ctrl->step / 2))
                        val = ctrl->maximum;
                else
                        val += (s64)(ctrl->step / 2);
                val = clamp_t(s64, val, ctrl->minimum, ctrl->maximum);
                offset = val - ctrl->minimum;
                do_div(offset, ctrl->step);
                ptr.p_s64[idx] = ctrl->minimum + offset * ctrl->step;
                return 0;
        case V4L2_CTRL_TYPE_U8:
                return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl);
        case V4L2_CTRL_TYPE_U16:
                return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl);
        case V4L2_CTRL_TYPE_U32:
                return ROUND_TO_RANGE(ptr.p_u32[idx], u32, ctrl);

        case V4L2_CTRL_TYPE_BOOLEAN:
                ptr.p_s32[idx] = !!ptr.p_s32[idx];
                return 0;

        case V4L2_CTRL_TYPE_MENU:
        case V4L2_CTRL_TYPE_INTEGER_MENU:
                if (ptr.p_s32[idx] < ctrl->minimum || ptr.p_s32[idx] > ctrl->maximum)
                        return -ERANGE;
                if (ptr.p_s32[idx] < BITS_PER_LONG_LONG &&
                    (ctrl->menu_skip_mask & BIT_ULL(ptr.p_s32[idx])))
                        return -EINVAL;
                if (ctrl->type == V4L2_CTRL_TYPE_MENU &&
                    ctrl->qmenu[ptr.p_s32[idx]][0] == '\0')
                        return -EINVAL;
                return 0;

        case V4L2_CTRL_TYPE_BITMASK:
                ptr.p_s32[idx] &= ctrl->maximum;
                return 0;

        case V4L2_CTRL_TYPE_BUTTON:
        case V4L2_CTRL_TYPE_CTRL_CLASS:
                ptr.p_s32[idx] = 0;
                return 0;

        case V4L2_CTRL_TYPE_STRING:
                idx *= ctrl->elem_size;
                len = strlen(ptr.p_char + idx);
                if (len < ctrl->minimum)
                        return -ERANGE;
                if ((len - (u32)ctrl->minimum) % (u32)ctrl->step)
                        return -ERANGE;
                return 0;

        default:
                return std_validate_compound(ctrl, idx, ptr);
        }
}

int v4l2_ctrl_type_op_validate(const struct v4l2_ctrl *ctrl,
                               union v4l2_ctrl_ptr ptr)
{
        unsigned int i;
        int ret = 0;

        switch ((u32)ctrl->type) {
        case V4L2_CTRL_TYPE_U8:
                if (ctrl->maximum == 0xff && ctrl->minimum == 0 && ctrl->step == 1)
                        return 0;
                break;
        case V4L2_CTRL_TYPE_U16:
                if (ctrl->maximum == 0xffff && ctrl->minimum == 0 && ctrl->step == 1)
                        return 0;
                break;
        case V4L2_CTRL_TYPE_U32:
                if (ctrl->maximum == 0xffffffff && ctrl->minimum == 0 && ctrl->step == 1)
                        return 0;
                break;

        case V4L2_CTRL_TYPE_BUTTON:
        case V4L2_CTRL_TYPE_CTRL_CLASS:
                memset(ptr.p_s32, 0, ctrl->new_elems * sizeof(s32));
                return 0;
        }

        for (i = 0; !ret && i < ctrl->new_elems; i++)
                ret = std_validate_elem(ctrl, i, ptr);
        return ret;
}
EXPORT_SYMBOL(v4l2_ctrl_type_op_validate);

static const struct v4l2_ctrl_type_ops std_type_ops = {
        .equal = v4l2_ctrl_type_op_equal,
        .init = v4l2_ctrl_type_op_init,
        .log = v4l2_ctrl_type_op_log,
        .validate = v4l2_ctrl_type_op_validate,
};

void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void *priv)
{
        if (!ctrl)
                return;
        if (!notify) {
                ctrl->call_notify = 0;
                return;
        }
        if (WARN_ON(ctrl->handler->notify && ctrl->handler->notify != notify))
                return;
        ctrl->handler->notify = notify;
        ctrl->handler->notify_priv = priv;
        ctrl->call_notify = 1;
}
EXPORT_SYMBOL(v4l2_ctrl_notify);

/* Copy the one value to another. */
static void ptr_to_ptr(struct v4l2_ctrl *ctrl,
                       union v4l2_ctrl_ptr from, union v4l2_ctrl_ptr to,
                       unsigned int elems)
{
        if (ctrl == NULL)
                return;
        memcpy(to.p, from.p_const, elems * ctrl->elem_size);
}

/* Copy the new value to the current value. */
void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
{
        bool changed;

        if (ctrl == NULL)
                return;

        /* has_changed is set by cluster_changed */
        changed = ctrl->has_changed;
        if (changed) {
                if (ctrl->is_dyn_array)
                        ctrl->elems = ctrl->new_elems;
                ptr_to_ptr(ctrl, ctrl->p_new, ctrl->p_cur, ctrl->elems);
        }

        if (ch_flags & V4L2_EVENT_CTRL_CH_FLAGS) {
                /* Note: CH_FLAGS is only set for auto clusters. */
                ctrl->flags &=
                        ~(V4L2_CTRL_FLAG_INACTIVE | V4L2_CTRL_FLAG_VOLATILE);
                if (!is_cur_manual(ctrl->cluster[0])) {
                        ctrl->flags |= V4L2_CTRL_FLAG_INACTIVE;
                        if (ctrl->cluster[0]->has_volatiles)
                                ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
                }
                fh = NULL;
        }
        if (changed || ch_flags) {
                /* If a control was changed that was not one of the controls
                   modified by the application, then send the event to all. */
                if (!ctrl->is_new)
                        fh = NULL;
                send_event(fh, ctrl,
                        (changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | ch_flags);
                if (ctrl->call_notify && changed && ctrl->handler->notify)
                        ctrl->handler->notify(ctrl, ctrl->handler->notify_priv);
        }
}

/* Copy the current value to the new value */
void cur_to_new(struct v4l2_ctrl *ctrl)
{
        if (ctrl == NULL)
                return;
        if (ctrl->is_dyn_array)
                ctrl->new_elems = ctrl->elems;
        ptr_to_ptr(ctrl, ctrl->p_cur, ctrl->p_new, ctrl->new_elems);
}

static bool req_alloc_array(struct v4l2_ctrl_ref *ref, u32 elems)
{
        void *tmp;

        if (elems == ref->p_req_array_alloc_elems)
                return true;
        if (ref->ctrl->is_dyn_array &&
            elems < ref->p_req_array_alloc_elems)
                return true;

        tmp = kvmalloc(elems * ref->ctrl->elem_size, GFP_KERNEL);

        if (!tmp) {
                ref->p_req_array_enomem = true;
                return false;
        }
        ref->p_req_array_enomem = false;
        kvfree(ref->p_req.p);
        ref->p_req.p = tmp;
        ref->p_req_array_alloc_elems = elems;
        return true;
}

/* Copy the new value to the request value */
void new_to_req(struct v4l2_ctrl_ref *ref)
{
        struct v4l2_ctrl *ctrl;

        if (!ref)
                return;

        ctrl = ref->ctrl;
        if (ctrl->is_array && !req_alloc_array(ref, ctrl->new_elems))
                return;

        ref->p_req_elems = ctrl->new_elems;
        ptr_to_ptr(ctrl, ctrl->p_new, ref->p_req, ref->p_req_elems);
        ref->p_req_valid = true;
}

/* Copy the current value to the request value */
void cur_to_req(struct v4l2_ctrl_ref *ref)
{
        struct v4l2_ctrl *ctrl;

        if (!ref)
                return;

        ctrl = ref->ctrl;
        if (ctrl->is_array && !req_alloc_array(ref, ctrl->elems))
                return;

        ref->p_req_elems = ctrl->elems;
        ptr_to_ptr(ctrl, ctrl->p_cur, ref->p_req, ctrl->elems);
        ref->p_req_valid = true;
}

/* Copy the request value to the new value */
int req_to_new(struct v4l2_ctrl_ref *ref)
{
        struct v4l2_ctrl *ctrl;

        if (!ref)
                return 0;

        ctrl = ref->ctrl;

        /*
         * This control was never set in the request, so just use the current
         * value.
         */
        if (!ref->p_req_valid) {
                if (ctrl->is_dyn_array)
                        ctrl->new_elems = ctrl->elems;
                ptr_to_ptr(ctrl, ctrl->p_cur, ctrl->p_new, ctrl->new_elems);
                return 0;
        }

        /* Not an array, so just copy the request value */
        if (!ctrl->is_array) {
                ptr_to_ptr(ctrl, ref->p_req, ctrl->p_new, ctrl->new_elems);
                return 0;
        }

        /* Sanity check, should never happen */
        if (WARN_ON(!ref->p_req_array_alloc_elems))
                return -ENOMEM;

        if (!ctrl->is_dyn_array &&
            ref->p_req_elems != ctrl->p_array_alloc_elems)
                return -ENOMEM;

        /*
         * Check if the number of elements in the request is more than the
         * elements in ctrl->p_array. If so, attempt to realloc ctrl->p_array.
         * Note that p_array is allocated with twice the number of elements
         * in the dynamic array since it has to store both the current and
         * new value of such a control.
         */
        if (ref->p_req_elems > ctrl->p_array_alloc_elems) {
                unsigned int sz = ref->p_req_elems * ctrl->elem_size;
                void *old = ctrl->p_array;
                void *tmp = kvzalloc(2 * sz, GFP_KERNEL);

                if (!tmp)
                        return -ENOMEM;
                memcpy(tmp, ctrl->p_new.p, ctrl->elems * ctrl->elem_size);
                memcpy(tmp + sz, ctrl->p_cur.p, ctrl->elems * ctrl->elem_size);
                ctrl->p_new.p = tmp;
                ctrl->p_cur.p = tmp + sz;
                ctrl->p_array = tmp;
                ctrl->p_array_alloc_elems = ref->p_req_elems;
                kvfree(old);
        }

        ctrl->new_elems = ref->p_req_elems;
        ptr_to_ptr(ctrl, ref->p_req, ctrl->p_new, ctrl->new_elems);
        return 0;
}

/* Control range checking */
int check_range(enum v4l2_ctrl_type type,
                s64 min, s64 max, u64 step, s64 def)
{
        switch (type) {
        case V4L2_CTRL_TYPE_BOOLEAN:
                if (step != 1 || max > 1 || min < 0)
                        return -ERANGE;
                fallthrough;
        case V4L2_CTRL_TYPE_U8:
        case V4L2_CTRL_TYPE_U16:
        case V4L2_CTRL_TYPE_U32:
        case V4L2_CTRL_TYPE_INTEGER:
        case V4L2_CTRL_TYPE_INTEGER64:
                if (step == 0 || min > max || def < min || def > max)
                        return -ERANGE;
                return 0;
        case V4L2_CTRL_TYPE_BITMASK:
                if (step || min || !max || (def & ~max))
                        return -ERANGE;
                return 0;
        case V4L2_CTRL_TYPE_MENU:
        case V4L2_CTRL_TYPE_INTEGER_MENU:
                if (min > max || def < min || def > max ||
                    min < 0 || (step && max >= BITS_PER_LONG_LONG))
                        return -ERANGE;
                /* Note: step == menu_skip_mask for menu controls.
                   So here we check if the default value is masked out. */
                if (def < BITS_PER_LONG_LONG && (step & BIT_ULL(def)))
                        return -EINVAL;
                return 0;
        case V4L2_CTRL_TYPE_STRING:
                if (min > max || min < 0 || step < 1 || def)
                        return -ERANGE;
                return 0;
        default:
                return 0;
        }
}

/* Set the handler's error code if it wasn't set earlier already */
static inline int handler_set_err(struct v4l2_ctrl_handler *hdl, int err)
{
        if (hdl->error == 0)
                hdl->error = err;
        return err;
}

/* Initialize the handler */
int v4l2_ctrl_handler_init_class(struct v4l2_ctrl_handler *hdl,
                                 unsigned nr_of_controls_hint,
                                 struct lock_class_key *key, const char *name)
{
        mutex_init(&hdl->_lock);
        hdl->lock = &hdl->_lock;
        lockdep_set_class_and_name(hdl->lock, key, name);
        INIT_LIST_HEAD(&hdl->ctrls);
        INIT_LIST_HEAD(&hdl->ctrl_refs);
        hdl->nr_of_buckets = 1 + nr_of_controls_hint / 8;
        hdl->buckets = kvcalloc(hdl->nr_of_buckets, sizeof(hdl->buckets[0]),
                                GFP_KERNEL);
        hdl->error = hdl->buckets ? 0 : -ENOMEM;
        v4l2_ctrl_handler_init_request(hdl);
        return hdl->error;
}
EXPORT_SYMBOL(v4l2_ctrl_handler_init_class);

/* Free all controls and control refs */
void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl)
{
        struct v4l2_ctrl_ref *ref, *next_ref;
        struct v4l2_ctrl *ctrl, *next_ctrl;
        struct v4l2_subscribed_event *sev, *next_sev;

        if (hdl == NULL || hdl->buckets == NULL)
                return;

        v4l2_ctrl_handler_free_request(hdl);

        mutex_lock(hdl->lock);
        /* Free all nodes */
        list_for_each_entry_safe(ref, next_ref, &hdl->ctrl_refs, node) {
                list_del(&ref->node);
                if (ref->p_req_array_alloc_elems)
                        kvfree(ref->p_req.p);
                kfree(ref);
        }
        /* Free all controls owned by the handler */
        list_for_each_entry_safe(ctrl, next_ctrl, &hdl->ctrls, node) {
                list_del(&ctrl->node);
                list_for_each_entry_safe(sev, next_sev, &ctrl->ev_subs, node)
                        list_del(&sev->node);
                kvfree(ctrl->p_array);
                kvfree(ctrl);
        }
        kvfree(hdl->buckets);
        hdl->buckets = NULL;
        hdl->cached = NULL;
        hdl->error = 0;
        mutex_unlock(hdl->lock);
        mutex_destroy(&hdl->_lock);
}
EXPORT_SYMBOL(v4l2_ctrl_handler_free);

/* For backwards compatibility: V4L2_CID_PRIVATE_BASE should no longer
   be used except in G_CTRL, S_CTRL, QUERYCTRL and QUERYMENU when dealing
   with applications that do not use the NEXT_CTRL flag.

   We just find the n-th private user control. It's O(N), but that should not
   be an issue in this particular case. */
static struct v4l2_ctrl_ref *find_private_ref(
                struct v4l2_ctrl_handler *hdl, u32 id)
{
        struct v4l2_ctrl_ref *ref;

        id -= V4L2_CID_PRIVATE_BASE;
        list_for_each_entry(ref, &hdl->ctrl_refs, node) {
                /* Search for private user controls that are compatible with
                   VIDIOC_G/S_CTRL. */
                if (V4L2_CTRL_ID2WHICH(ref->ctrl->id) == V4L2_CTRL_CLASS_USER &&
                    V4L2_CTRL_DRIVER_PRIV(ref->ctrl->id)) {
                        if (!ref->ctrl->is_int)
                                continue;
                        if (id == 0)
                                return ref;
                        id--;
                }
        }
        return NULL;
}

/* Find a control with the given ID. */
struct v4l2_ctrl_ref *find_ref(struct v4l2_ctrl_handler *hdl, u32 id)
{
        struct v4l2_ctrl_ref *ref;
        int bucket;

        id &= V4L2_CTRL_ID_MASK;

        /* Old-style private controls need special handling */
        if (id >= V4L2_CID_PRIVATE_BASE)
                return find_private_ref(hdl, id);
        bucket = id % hdl->nr_of_buckets;

        /* Simple optimization: cache the last control found */
        if (hdl->cached && hdl->cached->ctrl->id == id)
                return hdl->cached;

        /* Not in cache, search the hash */
        ref = hdl->buckets ? hdl->buckets[bucket] : NULL;
        while (ref && ref->ctrl->id != id)
                ref = ref->next;

        if (ref)
                hdl->cached = ref; /* cache it! */
        return ref;
}

/* Find a control with the given ID. Take the handler's lock first. */
struct v4l2_ctrl_ref *find_ref_lock(struct v4l2_ctrl_handler *hdl, u32 id)
{
        struct v4l2_ctrl_ref *ref = NULL;

        if (hdl) {
                mutex_lock(hdl->lock);
                ref = find_ref(hdl, id);
                mutex_unlock(hdl->lock);
        }
        return ref;
}

/* Find a control with the given ID. */
struct v4l2_ctrl *v4l2_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
{
        struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);

        return ref ? ref->ctrl : NULL;
}
EXPORT_SYMBOL(v4l2_ctrl_find);

/* Allocate a new v4l2_ctrl_ref and hook it into the handler. */
int handler_new_ref(struct v4l2_ctrl_handler *hdl,
                    struct v4l2_ctrl *ctrl,
                    struct v4l2_ctrl_ref **ctrl_ref,
                    bool from_other_dev, bool allocate_req)
{
        struct v4l2_ctrl_ref *ref;
        struct v4l2_ctrl_ref *new_ref;
        u32 id = ctrl->id;
        u32 class_ctrl = V4L2_CTRL_ID2WHICH(id) | 1;
        int bucket = id % hdl->nr_of_buckets;        /* which bucket to use */
        unsigned int size_extra_req = 0;

        if (ctrl_ref)
                *ctrl_ref = NULL;

        /*
         * Automatically add the control class if it is not yet present and
         * the new control is not a compound control.
         */
        if (ctrl->type < V4L2_CTRL_COMPOUND_TYPES &&
            id != class_ctrl && find_ref_lock(hdl, class_ctrl) == NULL)
                if (!v4l2_ctrl_new_std(hdl, NULL, class_ctrl, 0, 0, 0, 0))
                        return hdl->error;

        if (hdl->error)
                return hdl->error;

        if (allocate_req && !ctrl->is_array)
                size_extra_req = ctrl->elems * ctrl->elem_size;
        new_ref = kzalloc(sizeof(*new_ref) + size_extra_req, GFP_KERNEL);
        if (!new_ref)
                return handler_set_err(hdl, -ENOMEM);
        new_ref->ctrl = ctrl;
        new_ref->from_other_dev = from_other_dev;
        if (size_extra_req)
                new_ref->p_req.p = &new_ref[1];

        INIT_LIST_HEAD(&new_ref->node);

        mutex_lock(hdl->lock);

        /* Add immediately at the end of the list if the list is empty, or if
           the last element in the list has a lower ID.
           This ensures that when elements are added in ascending order the
           insertion is an O(1) operation. */
        if (list_empty(&hdl->ctrl_refs) || id > node2id(hdl->ctrl_refs.prev)) {
                list_add_tail(&new_ref->node, &hdl->ctrl_refs);
                goto insert_in_hash;
        }

        /* Find insert position in sorted list */
        list_for_each_entry(ref, &hdl->ctrl_refs, node) {
                if (ref->ctrl->id < id)
                        continue;
                /* Don't add duplicates */
                if (ref->ctrl->id == id) {
                        kfree(new_ref);
                        goto unlock;
                }
                list_add(&new_ref->node, ref->node.prev);
                break;
        }

insert_in_hash:
        /* Insert the control node in the hash */
        new_ref->next = hdl->buckets[bucket];
        hdl->buckets[bucket] = new_ref;
        if (ctrl_ref)
                *ctrl_ref = new_ref;
        if (ctrl->handler == hdl) {
                /* By default each control starts in a cluster of its own.
                 * new_ref->ctrl is basically a cluster array with one
                 * element, so that's perfect to use as the cluster pointer.
                 * But only do this for the handler that owns the control.
                 */
                ctrl->cluster = &new_ref->ctrl;
                ctrl->ncontrols = 1;
        }

unlock:
        mutex_unlock(hdl->lock);
        return 0;
}

/* Add a new control */
static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_ops *ops,
                        const struct v4l2_ctrl_type_ops *type_ops,
                        u32 id, const char *name, enum v4l2_ctrl_type type,
                        s64 min, s64 max, u64 step, s64 def,
                        const u32 dims[V4L2_CTRL_MAX_DIMS], u32 elem_size,
                        u32 flags, const char * const *qmenu,
                        const s64 *qmenu_int, const union v4l2_ctrl_ptr p_def,
                        void *priv)
{
        struct v4l2_ctrl *ctrl;
        unsigned sz_extra;
        unsigned nr_of_dims = 0;
        unsigned elems = 1;
        bool is_array;
        unsigned tot_ctrl_size;
        void *data;
        int err;

        if (hdl->error)
                return NULL;

        while (dims && dims[nr_of_dims]) {
                elems *= dims[nr_of_dims];
                nr_of_dims++;
                if (nr_of_dims == V4L2_CTRL_MAX_DIMS)
                        break;
        }
        is_array = nr_of_dims > 0;

        /* Prefill elem_size for all types handled by std_type_ops */
        switch ((u32)type) {
        case V4L2_CTRL_TYPE_INTEGER64:
                elem_size = sizeof(s64);
                break;
        case V4L2_CTRL_TYPE_STRING:
                elem_size = max + 1;
                break;
        case V4L2_CTRL_TYPE_U8:
                elem_size = sizeof(u8);
                break;
        case V4L2_CTRL_TYPE_U16:
                elem_size = sizeof(u16);
                break;
        case V4L2_CTRL_TYPE_U32:
                elem_size = sizeof(u32);
                break;
        case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
                elem_size = sizeof(struct v4l2_ctrl_mpeg2_sequence);
                break;
        case V4L2_CTRL_TYPE_MPEG2_PICTURE:
                elem_size = sizeof(struct v4l2_ctrl_mpeg2_picture);
                break;
        case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
                elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantisation);
                break;
        case V4L2_CTRL_TYPE_FWHT_PARAMS:
                elem_size = sizeof(struct v4l2_ctrl_fwht_params);
                break;
        case V4L2_CTRL_TYPE_H264_SPS:
                elem_size = sizeof(struct v4l2_ctrl_h264_sps);
                break;
        case V4L2_CTRL_TYPE_H264_PPS:
                elem_size = sizeof(struct v4l2_ctrl_h264_pps);
                break;
        case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
                elem_size = sizeof(struct v4l2_ctrl_h264_scaling_matrix);
                break;
        case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
                elem_size = sizeof(struct v4l2_ctrl_h264_slice_params);
                break;
        case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
                elem_size = sizeof(struct v4l2_ctrl_h264_decode_params);
                break;
        case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
                elem_size = sizeof(struct v4l2_ctrl_h264_pred_weights);
                break;
        case V4L2_CTRL_TYPE_VP8_FRAME:
                elem_size = sizeof(struct v4l2_ctrl_vp8_frame);
                break;
        case V4L2_CTRL_TYPE_HEVC_SPS:
                elem_size = sizeof(struct v4l2_ctrl_hevc_sps);
                break;
        case V4L2_CTRL_TYPE_HEVC_PPS:
                elem_size = sizeof(struct v4l2_ctrl_hevc_pps);
                break;
        case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
                elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
                break;
        case V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX:
                elem_size = sizeof(struct v4l2_ctrl_hevc_scaling_matrix);
                break;
        case V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS:
                elem_size = sizeof(struct v4l2_ctrl_hevc_decode_params);
                break;
        case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
                elem_size = sizeof(struct v4l2_ctrl_hdr10_cll_info);
                break;
        case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
                elem_size = sizeof(struct v4l2_ctrl_hdr10_mastering_display);
                break;
        case V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR:
                elem_size = sizeof(struct v4l2_ctrl_vp9_compressed_hdr);
                break;
        case V4L2_CTRL_TYPE_VP9_FRAME:
                elem_size = sizeof(struct v4l2_ctrl_vp9_frame);
                break;
        case V4L2_CTRL_TYPE_AV1_SEQUENCE:
                elem_size = sizeof(struct v4l2_ctrl_av1_sequence);
                break;
        case V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY:
                elem_size = sizeof(struct v4l2_ctrl_av1_tile_group_entry);
                break;
        case V4L2_CTRL_TYPE_AV1_FRAME:
                elem_size = sizeof(struct v4l2_ctrl_av1_frame);
                break;
        case V4L2_CTRL_TYPE_AV1_FILM_GRAIN:
                elem_size = sizeof(struct v4l2_ctrl_av1_film_grain);
                break;
        case V4L2_CTRL_TYPE_AREA:
                elem_size = sizeof(struct v4l2_area);
                break;
        default:
                if (type < V4L2_CTRL_COMPOUND_TYPES)
                        elem_size = sizeof(s32);
                break;
        }

        /* Sanity checks */
        if (id == 0 || name == NULL || !elem_size ||
            id >= V4L2_CID_PRIVATE_BASE ||
            (type == V4L2_CTRL_TYPE_MENU && qmenu == NULL) ||
            (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL)) {
                handler_set_err(hdl, -ERANGE);
                return NULL;
        }
        err = check_range(type, min, max, step, def);
        if (err) {
                handler_set_err(hdl, err);
                return NULL;
        }
        if (is_array &&
            (type == V4L2_CTRL_TYPE_BUTTON ||
             type == V4L2_CTRL_TYPE_CTRL_CLASS)) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        if (flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) {
                /*
                 * For now only support this for one-dimensional arrays only.
                 *
                 * This can be relaxed in the future, but this will
                 * require more effort.
                 */
                if (nr_of_dims != 1) {
                        handler_set_err(hdl, -EINVAL);
                        return NULL;
                }
                /* Start with just 1 element */
                elems = 1;
        }

        tot_ctrl_size = elem_size * elems;
        sz_extra = 0;
        if (type == V4L2_CTRL_TYPE_BUTTON)
                flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
                        V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
        else if (type == V4L2_CTRL_TYPE_CTRL_CLASS)
                flags |= V4L2_CTRL_FLAG_READ_ONLY;
        else if (!is_array &&
                 (type == V4L2_CTRL_TYPE_INTEGER64 ||
                  type == V4L2_CTRL_TYPE_STRING ||
                  type >= V4L2_CTRL_COMPOUND_TYPES))
                sz_extra += 2 * tot_ctrl_size;

        if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const)
                sz_extra += elem_size;

        ctrl = kvzalloc(sizeof(*ctrl) + sz_extra, GFP_KERNEL);
        if (ctrl == NULL) {
                handler_set_err(hdl, -ENOMEM);
                return NULL;
        }

        INIT_LIST_HEAD(&ctrl->node);
        INIT_LIST_HEAD(&ctrl->ev_subs);
        ctrl->handler = hdl;
        ctrl->ops = ops;
        ctrl->type_ops = type_ops ? type_ops : &std_type_ops;
        ctrl->id = id;
        ctrl->name = name;
        ctrl->type = type;
        ctrl->flags = flags;
        ctrl->minimum = min;
        ctrl->maximum = max;
        ctrl->step = step;
        ctrl->default_value = def;
        ctrl->is_string = !is_array && type == V4L2_CTRL_TYPE_STRING;
        ctrl->is_ptr = is_array || type >= V4L2_CTRL_COMPOUND_TYPES || ctrl->is_string;
        ctrl->is_int = !ctrl->is_ptr && type != V4L2_CTRL_TYPE_INTEGER64;
        ctrl->is_array = is_array;
        ctrl->is_dyn_array = !!(flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY);
        ctrl->elems = elems;
        ctrl->new_elems = elems;
        ctrl->nr_of_dims = nr_of_dims;
        if (nr_of_dims)
                memcpy(ctrl->dims, dims, nr_of_dims * sizeof(dims[0]));
        ctrl->elem_size = elem_size;
        if (type == V4L2_CTRL_TYPE_MENU)
                ctrl->qmenu = qmenu;
        else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
                ctrl->qmenu_int = qmenu_int;
        ctrl->priv = priv;
        ctrl->cur.val = ctrl->val = def;
        data = &ctrl[1];

        if (ctrl->is_array) {
                ctrl->p_array_alloc_elems = elems;
                ctrl->p_array = kvzalloc(2 * elems * elem_size, GFP_KERNEL);
                if (!ctrl->p_array) {
                        kvfree(ctrl);
                        return NULL;
                }
                data = ctrl->p_array;
        }

        if (!ctrl->is_int) {
                ctrl->p_new.p = data;
                ctrl->p_cur.p = data + tot_ctrl_size;
        } else {
                ctrl->p_new.p = &ctrl->val;
                ctrl->p_cur.p = &ctrl->cur.val;
        }

        if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const) {
                if (ctrl->is_array)
                        ctrl->p_def.p = &ctrl[1];
                else
                        ctrl->p_def.p = ctrl->p_cur.p + tot_ctrl_size;
                memcpy(ctrl->p_def.p, p_def.p_const, elem_size);
        }

        ctrl->type_ops->init(ctrl, 0, ctrl->p_cur);
        cur_to_new(ctrl);

        if (handler_new_ref(hdl, ctrl, NULL, false, false)) {
                kvfree(ctrl->p_array);
                kvfree(ctrl);
                return NULL;
        }
        mutex_lock(hdl->lock);
        list_add_tail(&ctrl->node, &hdl->ctrls);
        mutex_unlock(hdl->lock);
        return ctrl;
}

struct v4l2_ctrl *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_config *cfg, void *priv)
{
        bool is_menu;
        struct v4l2_ctrl *ctrl;
        const char *name = cfg->name;
        const char * const *qmenu = cfg->qmenu;
        const s64 *qmenu_int = cfg->qmenu_int;
        enum v4l2_ctrl_type type = cfg->type;
        u32 flags = cfg->flags;
        s64 min = cfg->min;
        s64 max = cfg->max;
        u64 step = cfg->step;
        s64 def = cfg->def;

        if (name == NULL)
                v4l2_ctrl_fill(cfg->id, &name, &type, &min, &max, &step,
                                                                &def, &flags);

        is_menu = (type == V4L2_CTRL_TYPE_MENU ||
                   type == V4L2_CTRL_TYPE_INTEGER_MENU);
        if (is_menu)
                WARN_ON(step);
        else
                WARN_ON(cfg->menu_skip_mask);
        if (type == V4L2_CTRL_TYPE_MENU && !qmenu) {
                qmenu = v4l2_ctrl_get_menu(cfg->id);
        } else if (type == V4L2_CTRL_TYPE_INTEGER_MENU && !qmenu_int) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }

        ctrl = v4l2_ctrl_new(hdl, cfg->ops, cfg->type_ops, cfg->id, name,
                        type, min, max,
                        is_menu ? cfg->menu_skip_mask : step, def,
                        cfg->dims, cfg->elem_size,
                        flags, qmenu, qmenu_int, cfg->p_def, priv);
        if (ctrl)
                ctrl->is_private = cfg->is_private;
        return ctrl;
}
EXPORT_SYMBOL(v4l2_ctrl_new_custom);

/* Helper function for standard non-menu controls */
struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_ops *ops,
                        u32 id, s64 min, s64 max, u64 step, s64 def)
{
        const char *name;
        enum v4l2_ctrl_type type;
        u32 flags;

        v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
        if (type == V4L2_CTRL_TYPE_MENU ||
            type == V4L2_CTRL_TYPE_INTEGER_MENU ||
            type >= V4L2_CTRL_COMPOUND_TYPES) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
                             min, max, step, def, NULL, 0,
                             flags, NULL, NULL, ptr_null, NULL);
}
EXPORT_SYMBOL(v4l2_ctrl_new_std);

/* Helper function for standard menu controls */
struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_ops *ops,
                        u32 id, u8 _max, u64 mask, u8 _def)
{
        const char * const *qmenu = NULL;
        const s64 *qmenu_int = NULL;
        unsigned int qmenu_int_len = 0;
        const char *name;
        enum v4l2_ctrl_type type;
        s64 min;
        s64 max = _max;
        s64 def = _def;
        u64 step;
        u32 flags;

        v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);

        if (type == V4L2_CTRL_TYPE_MENU)
                qmenu = v4l2_ctrl_get_menu(id);
        else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
                qmenu_int = v4l2_ctrl_get_int_menu(id, &qmenu_int_len);

        if ((!qmenu && !qmenu_int) || (qmenu_int && max >= qmenu_int_len)) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
                             0, max, mask, def, NULL, 0,
                             flags, qmenu, qmenu_int, ptr_null, NULL);
}
EXPORT_SYMBOL(v4l2_ctrl_new_std_menu);

/* Helper function for standard menu controls with driver defined menu */
struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_ops *ops, u32 id, u8 _max,
                        u64 mask, u8 _def, const char * const *qmenu)
{
        enum v4l2_ctrl_type type;
        const char *name;
        u32 flags;
        u64 step;
        s64 min;
        s64 max = _max;
        s64 def = _def;

        /* v4l2_ctrl_new_std_menu_items() should only be called for
         * standard controls without a standard menu.
         */
        if (v4l2_ctrl_get_menu(id)) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }

        v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
        if (type != V4L2_CTRL_TYPE_MENU || qmenu == NULL) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
                             0, max, mask, def, NULL, 0,
                             flags, qmenu, NULL, ptr_null, NULL);

}
EXPORT_SYMBOL(v4l2_ctrl_new_std_menu_items);

/* Helper function for standard compound controls */
struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl,
                                const struct v4l2_ctrl_ops *ops, u32 id,
                                const union v4l2_ctrl_ptr p_def)
{
        const char *name;
        enum v4l2_ctrl_type type;
        u32 flags;
        s64 min, max, step, def;

        v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
        if (type < V4L2_CTRL_COMPOUND_TYPES) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
                             min, max, step, def, NULL, 0,
                             flags, NULL, NULL, p_def, NULL);
}
EXPORT_SYMBOL(v4l2_ctrl_new_std_compound);

/* Helper function for standard integer menu controls */
struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl,
                        const struct v4l2_ctrl_ops *ops,
                        u32 id, u8 _max, u8 _def, const s64 *qmenu_int)
{
        const char *name;
        enum v4l2_ctrl_type type;
        s64 min;
        u64 step;
        s64 max = _max;
        s64 def = _def;
        u32 flags;

        v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
        if (type != V4L2_CTRL_TYPE_INTEGER_MENU) {
                handler_set_err(hdl, -EINVAL);
                return NULL;
        }
        return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
                             0, max, 0, def, NULL, 0,
                             flags, NULL, qmenu_int, ptr_null, NULL);
}
EXPORT_SYMBOL(v4l2_ctrl_new_int_menu);

/* Add the controls from another handler to our own. */
int v4l2_ctrl_add_handler(struct v4l2_ctrl_handler *hdl,
                          struct v4l2_ctrl_handler *add,
                          bool (*filter)(const struct v4l2_ctrl *ctrl),
                          bool from_other_dev)
{
        struct v4l2_ctrl_ref *ref;
        int ret = 0;

        /* Do nothing if either handler is NULL or if they are the same */
        if (!hdl || !add || hdl == add)
                return 0;
        if (hdl->error)
                return hdl->error;
        mutex_lock(add->lock);
        list_for_each_entry(ref, &add->ctrl_refs, node) {
                struct v4l2_ctrl *ctrl = ref->ctrl;

                /* Skip handler-private controls. */
                if (ctrl->is_private)
                        continue;
                /* And control classes */
                if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
                        continue;
                /* Filter any unwanted controls */
                if (filter && !filter(ctrl))
                        continue;
                ret = handler_new_ref(hdl, ctrl, NULL, from_other_dev, false);
                if (ret)
                        break;
        }
        mutex_unlock(add->lock);
        return ret;
}
EXPORT_SYMBOL(v4l2_ctrl_add_handler);

bool v4l2_ctrl_radio_filter(const struct v4l2_ctrl *ctrl)
{
        if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_TX)
                return true;
        if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_RX)
                return true;
        switch (ctrl->id) {
        case V4L2_CID_AUDIO_MUTE:
        case V4L2_CID_AUDIO_VOLUME:
        case V4L2_CID_AUDIO_BALANCE:
        case V4L2_CID_AUDIO_BASS:
        case V4L2_CID_AUDIO_TREBLE:
        case V4L2_CID_AUDIO_LOUDNESS:
                return true;
        default:
                break;
        }
        return false;
}
EXPORT_SYMBOL(v4l2_ctrl_radio_filter);

/* Cluster controls */
void v4l2_ctrl_cluster(unsigned ncontrols, struct v4l2_ctrl **controls)
{
        bool has_volatiles = false;
        int i;

        /* The first control is the master control and it must not be NULL */
        if (WARN_ON(ncontrols == 0 || controls[0] == NULL))
                return;

        for (i = 0; i < ncontrols; i++) {
                if (controls[i]) {
                        controls[i]->cluster = controls;
                        controls[i]->ncontrols = ncontrols;
                        if (controls[i]->flags & V4L2_CTRL_FLAG_VOLATILE)
                                has_volatiles = true;
                }
        }
        controls[0]->has_volatiles = has_volatiles;
}
EXPORT_SYMBOL(v4l2_ctrl_cluster);

void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls,
                            u8 manual_val, bool set_volatile)
{
        struct v4l2_ctrl *master = controls[0];
        u32 flag = 0;
        int i;

        v4l2_ctrl_cluster(ncontrols, controls);
        WARN_ON(ncontrols <= 1);
        WARN_ON(manual_val < master->minimum || manual_val > master->maximum);
        WARN_ON(set_volatile && !has_op(master, g_volatile_ctrl));
        master->is_auto = true;
        master->has_volatiles = set_volatile;
        master->manual_mode_value = manual_val;
        master->flags |= V4L2_CTRL_FLAG_UPDATE;

        if (!is_cur_manual(master))
                flag = V4L2_CTRL_FLAG_INACTIVE |
                        (set_volatile ? V4L2_CTRL_FLAG_VOLATILE : 0);

        for (i = 1; i < ncontrols; i++)
                if (controls[i])
                        controls[i]->flags |= flag;
}
EXPORT_SYMBOL(v4l2_ctrl_auto_cluster);

/*
 * Obtain the current volatile values of an autocluster and mark them
 * as new.
 */
void update_from_auto_cluster(struct v4l2_ctrl *master)
{
        int i;

        for (i = 1; i < master->ncontrols; i++)
                cur_to_new(master->cluster[i]);
        if (!call_op(master, g_volatile_ctrl))
                for (i = 1; i < master->ncontrols; i++)
                        if (master->cluster[i])
                                master->cluster[i]->is_new = 1;
}

/*
 * Return non-zero if one or more of the controls in the cluster has a new
 * value that differs from the current value.
 */
static int cluster_changed(struct v4l2_ctrl *master)
{
        bool changed = false;
        int i;

        for (i = 0; i < master->ncontrols; i++) {
                struct v4l2_ctrl *ctrl = master->cluster[i];
                bool ctrl_changed = false;

                if (!ctrl)
                        continue;

                if (ctrl->flags & V4L2_CTRL_FLAG_EXECUTE_ON_WRITE) {
                        changed = true;
                        ctrl_changed = true;
                }

                /*
                 * Set has_changed to false to avoid generating
                 * the event V4L2_EVENT_CTRL_CH_VALUE
                 */
                if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
                        ctrl->has_changed = false;
                        continue;
                }

                if (ctrl->elems != ctrl->new_elems)
                        ctrl_changed = true;
                if (!ctrl_changed)
                        ctrl_changed = !ctrl->type_ops->equal(ctrl,
                                ctrl->p_cur, ctrl->p_new);
                ctrl->has_changed = ctrl_changed;
                changed |= ctrl->has_changed;
        }
        return changed;
}

/*
 * Core function that calls try/s_ctrl and ensures that the new value is
 * copied to the current value on a set.
 * Must be called with ctrl->handler->lock held.
 */
int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master,
                       bool set, u32 ch_flags)
{
        bool update_flag;
        int ret;
        int i;

        /*
         * Go through the cluster and either validate the new value or
         * (if no new value was set), copy the current value to the new
         * value, ensuring a consistent view for the control ops when
         * called.
         */
        for (i = 0; i < master->ncontrols; i++) {
                struct v4l2_ctrl *ctrl = master->cluster[i];

                if (!ctrl)
                        continue;

                if (!ctrl->is_new) {
                        cur_to_new(ctrl);
                        continue;
                }
                /*
                 * Check again: it may have changed since the
                 * previous check in try_or_set_ext_ctrls().
                 */
                if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED))
                        return -EBUSY;
        }

        ret = call_op(master, try_ctrl);

        /* Don't set if there is no change */
        if (ret || !set || !cluster_changed(master))
                return ret;
        ret = call_op(master, s_ctrl);
        if (ret)
                return ret;

        /* If OK, then make the new values permanent. */
        update_flag = is_cur_manual(master) != is_new_manual(master);

        for (i = 0; i < master->ncontrols; i++) {
                /*
                 * If we switch from auto to manual mode, and this cluster
                 * contains volatile controls, then all non-master controls
                 * have to be marked as changed. The 'new' value contains
                 * the volatile value (obtained by update_from_auto_cluster),
                 * which now has to become the current value.
                 */
                if (i && update_flag && is_new_manual(master) &&
                    master->has_volatiles && master->cluster[i])
                        master->cluster[i]->has_changed = true;

                new_to_cur(fh, master->cluster[i], ch_flags |
                        ((update_flag && i > 0) ? V4L2_EVENT_CTRL_CH_FLAGS : 0));
        }
        return 0;
}

/* Activate/deactivate a control. */
void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active)
{
        /* invert since the actual flag is called 'inactive' */
        bool inactive = !active;
        bool old;

        if (ctrl == NULL)
                return;

        if (inactive)
                /* set V4L2_CTRL_FLAG_INACTIVE */
                old = test_and_set_bit(4, &ctrl->flags);
        else
                /* clear V4L2_CTRL_FLAG_INACTIVE */
                old = test_and_clear_bit(4, &ctrl->flags);
        if (old != inactive)
                send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
}
EXPORT_SYMBOL(v4l2_ctrl_activate);

void __v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed)
{
        bool old;

        if (ctrl == NULL)
                return;

        lockdep_assert_held(ctrl->handler->lock);

        if (grabbed)
                /* set V4L2_CTRL_FLAG_GRABBED */
                old = test_and_set_bit(1, &ctrl->flags);
        else
                /* clear V4L2_CTRL_FLAG_GRABBED */
                old = test_and_clear_bit(1, &ctrl->flags);
        if (old != grabbed)
                send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
}
EXPORT_SYMBOL(__v4l2_ctrl_grab);

/* Call s_ctrl for all controls owned by the handler */
int __v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
{
        struct v4l2_ctrl *ctrl;
        int ret = 0;

        if (hdl == NULL)
                return 0;

        lockdep_assert_held(hdl->lock);

        list_for_each_entry(ctrl, &hdl->ctrls, node)
                ctrl->done = false;

        list_for_each_entry(ctrl, &hdl->ctrls, node) {
                struct v4l2_ctrl *master = ctrl->cluster[0];
                int i;

                /* Skip if this control was already handled by a cluster. */
                /* Skip button controls and read-only controls. */
                if (ctrl->done || ctrl->type == V4L2_CTRL_TYPE_BUTTON ||
                    (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
                        continue;

                for (i = 0; i < master->ncontrols; i++) {
                        if (master->cluster[i]) {
                                cur_to_new(master->cluster[i]);
                                master->cluster[i]->is_new = 1;
                                master->cluster[i]->done = true;
                        }
                }
                ret = call_op(master, s_ctrl);
                if (ret)
                        break;
        }

        return ret;
}
EXPORT_SYMBOL_GPL(__v4l2_ctrl_handler_setup);

int v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
{
        int ret;

        if (hdl == NULL)
                return 0;

        mutex_lock(hdl->lock);
        ret = __v4l2_ctrl_handler_setup(hdl);
        mutex_unlock(hdl->lock);

        return ret;
}
EXPORT_SYMBOL(v4l2_ctrl_handler_setup);

/* Log the control name and value */
static void log_ctrl(const struct v4l2_ctrl_handler *hdl,
                     struct v4l2_ctrl *ctrl,
                     const char *prefix, const char *colon)
{
        if (ctrl->flags & (V4L2_CTRL_FLAG_DISABLED | V4L2_CTRL_FLAG_WRITE_ONLY))
                return;
        if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
                return;

        pr_info("%s%s%s: ", prefix, colon, ctrl->name);

        if (ctrl->handler != hdl)
                v4l2_ctrl_lock(ctrl);
        ctrl->type_ops->log(ctrl);
        if (ctrl->handler != hdl)
                v4l2_ctrl_unlock(ctrl);

        if (ctrl->flags & (V4L2_CTRL_FLAG_INACTIVE |
                           V4L2_CTRL_FLAG_GRABBED |
                           V4L2_CTRL_FLAG_VOLATILE)) {
                if (ctrl->flags & V4L2_CTRL_FLAG_INACTIVE)
                        pr_cont(" inactive");
                if (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)
                        pr_cont(" grabbed");
                if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE)
                        pr_cont(" volatile");
        }
        pr_cont("\n");
}

/* Log all controls owned by the handler */
void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl,
                                  const char *prefix)
{
        struct v4l2_ctrl_ref *ref;
        const char *colon = "";
        int len;

        if (!hdl)
                return;
        if (!prefix)
                prefix = "";
        len = strlen(prefix);
        if (len && prefix[len - 1] != ' ')
                colon = ": ";
        mutex_lock(hdl->lock);
        list_for_each_entry(ref, &hdl->ctrl_refs, node) {
                if (ref->from_other_dev ||
                    (ref->ctrl->flags & V4L2_CTRL_FLAG_DISABLED))
                        continue;
                log_ctrl(hdl, ref->ctrl, prefix, colon);
        }
        mutex_unlock(hdl->lock);
}
EXPORT_SYMBOL(v4l2_ctrl_handler_log_status);

int v4l2_ctrl_new_fwnode_properties(struct v4l2_ctrl_handler *hdl,
                                    const struct v4l2_ctrl_ops *ctrl_ops,
                                    const struct v4l2_fwnode_device_properties *p)
{
        if (p->orientation != V4L2_FWNODE_PROPERTY_UNSET) {
                u32 orientation_ctrl;

                switch (p->orientation) {
                case V4L2_FWNODE_ORIENTATION_FRONT:
                        orientation_ctrl = V4L2_CAMERA_ORIENTATION_FRONT;
                        break;
                case V4L2_FWNODE_ORIENTATION_BACK:
                        orientation_ctrl = V4L2_CAMERA_ORIENTATION_BACK;
                        break;
                case V4L2_FWNODE_ORIENTATION_EXTERNAL:
                        orientation_ctrl = V4L2_CAMERA_ORIENTATION_EXTERNAL;
                        break;
                default:
                        return -EINVAL;
                }
                if (!v4l2_ctrl_new_std_menu(hdl, ctrl_ops,
                                            V4L2_CID_CAMERA_ORIENTATION,
                                            V4L2_CAMERA_ORIENTATION_EXTERNAL, 0,
                                            orientation_ctrl))
                        return hdl->error;
        }

        if (p->rotation != V4L2_FWNODE_PROPERTY_UNSET) {
                if (!v4l2_ctrl_new_std(hdl, ctrl_ops,
                                       V4L2_CID_CAMERA_SENSOR_ROTATION,
                                       p->rotation, p->rotation, 1,
                                       p->rotation))
                        return hdl->error;
        }

        return hdl->error;
}
EXPORT_SYMBOL(v4l2_ctrl_new_fwnode_properties);
















































    3 




















    3 



    3 


    3 






    3 

    1 



    3 
    3 









































    3 










    3 























    3 












    3 

    3 







    1 



    3 




    3 
    3 
    3 












   12 




   12 

   11 


   12 
   12 



   12 






   12 












    3 


    3 

    3 
















   78 














































    8 
    6 

    8 

    6 
    6 
    6 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/generic.c - generic driver for USB devices (not interfaces)
 *
 * (C) Copyright 2005 Greg Kroah-Hartman <gregkh@suse.de>
 *
 * based on drivers/usb/usb.c which had the following copyrights:
 *        (C) Copyright Linus Torvalds 1999
 *        (C) Copyright Johannes Erdfelt 1999-2001
 *        (C) Copyright Andreas Gal 1999
 *        (C) Copyright Gregory P. Smith 1999
 *        (C) Copyright Deti Fliegl 1999 (new USB architecture)
 *        (C) Copyright Randy Dunlap 2000
 *        (C) Copyright David Brownell 2000-2004
 *        (C) Copyright Yggdrasil Computing, Inc. 2000
 *                (usb_device_id matching changes by Adam J. Richter)
 *        (C) Copyright Greg Kroah-Hartman 2002-2003
 *
 * Released under the GPLv2 only.
 */

#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <uapi/linux/usb/audio.h>
#include "usb.h"

static inline const char *plural(int n)
{
        return (n == 1 ? "" : "s");
}

static int is_rndis(struct usb_interface_descriptor *desc)
{
        return desc->bInterfaceClass == USB_CLASS_COMM
                && desc->bInterfaceSubClass == 2
                && desc->bInterfaceProtocol == 0xff;
}

static int is_activesync(struct usb_interface_descriptor *desc)
{
        return desc->bInterfaceClass == USB_CLASS_MISC
                && desc->bInterfaceSubClass == 1
                && desc->bInterfaceProtocol == 1;
}

static bool is_audio(struct usb_interface_descriptor *desc)
{
        return desc->bInterfaceClass == USB_CLASS_AUDIO;
}

static bool is_uac3_config(struct usb_interface_descriptor *desc)
{
        return desc->bInterfaceProtocol == UAC_VERSION_3;
}

int usb_choose_configuration(struct usb_device *udev)
{
        int i;
        int num_configs;
        int insufficient_power = 0;
        struct usb_host_config *c, *best;
        struct usb_device_driver *udriver;

        /*
         * If a USB device (not an interface) doesn't have a driver then the
         * kernel has no business trying to select or install a configuration
         * for it.
         */
        if (!udev->dev.driver)
                return -1;
        udriver = to_usb_device_driver(udev->dev.driver);

        if (usb_device_is_owned(udev))
                return 0;

        if (udriver->choose_configuration) {
                i = udriver->choose_configuration(udev);
                if (i >= 0)
                        return i;
        }

        best = NULL;
        c = udev->config;
        num_configs = udev->descriptor.bNumConfigurations;
        for (i = 0; i < num_configs; (i++, c++)) {
                struct usb_interface_descriptor        *desc = NULL;

                /* It's possible that a config has no interfaces! */
                if (c->desc.bNumInterfaces > 0)
                        desc = &c->intf_cache[0]->altsetting->desc;

                /*
                 * HP's USB bus-powered keyboard has only one configuration
                 * and it claims to be self-powered; other devices may have
                 * similar errors in their descriptors.  If the next test
                 * were allowed to execute, such configurations would always
                 * be rejected and the devices would not work as expected.
                 * In the meantime, we run the risk of selecting a config
                 * that requires external power at a time when that power
                 * isn't available.  It seems to be the lesser of two evils.
                 *
                 * Bugzilla #6448 reports a device that appears to crash
                 * when it receives a GET_DEVICE_STATUS request!  We don't
                 * have any other way to tell whether a device is self-powered,
                 * but since we don't use that information anywhere but here,
                 * the call has been removed.
                 *
                 * Maybe the GET_DEVICE_STATUS call and the test below can
                 * be reinstated when device firmwares become more reliable.
                 * Don't hold your breath.
                 */
#if 0
                /* Rule out self-powered configs for a bus-powered device */
                if (bus_powered && (c->desc.bmAttributes &
                                        USB_CONFIG_ATT_SELFPOWER))
                        continue;
#endif

                /*
                 * The next test may not be as effective as it should be.
                 * Some hubs have errors in their descriptor, claiming
                 * to be self-powered when they are really bus-powered.
                 * We will overestimate the amount of current such hubs
                 * make available for each port.
                 *
                 * This is a fairly benign sort of failure.  It won't
                 * cause us to reject configurations that we should have
                 * accepted.
                 */

                /* Rule out configs that draw too much bus current */
                if (usb_get_max_power(udev, c) > udev->bus_mA) {
                        insufficient_power++;
                        continue;
                }

                /*
                 * Select first configuration as default for audio so that
                 * devices that don't comply with UAC3 protocol are supported.
                 * But, still iterate through other configurations and
                 * select UAC3 compliant config if present.
                 */
                if (desc && is_audio(desc)) {
                        /* Always prefer the first found UAC3 config */
                        if (is_uac3_config(desc)) {
                                best = c;
                                break;
                        }

                        /* If there is no UAC3 config, prefer the first config */
                        else if (i == 0)
                                best = c;

                        /* Unconditional continue, because the rest of the code
                         * in the loop is irrelevant for audio devices, and
                         * because it can reassign best, which for audio devices
                         * we don't want.
                         */
                        continue;
                }

                /* When the first config's first interface is one of Microsoft's
                 * pet nonstandard Ethernet-over-USB protocols, ignore it unless
                 * this kernel has enabled the necessary host side driver.
                 * But: Don't ignore it if it's the only config.
                 */
                if (i == 0 && num_configs > 1 && desc &&
                                (is_rndis(desc) || is_activesync(desc))) {
#if !defined(CONFIG_USB_NET_RNDIS_HOST) && !defined(CONFIG_USB_NET_RNDIS_HOST_MODULE)
                        continue;
#else
                        best = c;
#endif
                }

                /* From the remaining configs, choose the first one whose
                 * first interface is for a non-vendor-specific class.
                 * Reason: Linux is more likely to have a class driver
                 * than a vendor-specific driver. */
                else if (udev->descriptor.bDeviceClass !=
                                                USB_CLASS_VENDOR_SPEC &&
                                (desc && desc->bInterfaceClass !=
                                                USB_CLASS_VENDOR_SPEC)) {
                        best = c;
                        break;
                }

                /* If all the remaining configs are vendor-specific,
                 * choose the first one. */
                else if (!best)
                        best = c;
        }

        if (insufficient_power > 0)
                dev_info(&udev->dev, "rejected %d configuration%s "
                        "due to insufficient available bus power\n",
                        insufficient_power, plural(insufficient_power));

        if (best) {
                i = best->desc.bConfigurationValue;
                dev_dbg(&udev->dev,
                        "configuration #%d chosen from %d choice%s\n",
                        i, num_configs, plural(num_configs));
        } else {
                i = -1;
                dev_warn(&udev->dev,
                        "no configuration chosen from %d choice%s\n",
                        num_configs, plural(num_configs));
        }
        return i;
}
EXPORT_SYMBOL_GPL(usb_choose_configuration);

static int __check_for_non_generic_match(struct device_driver *drv, void *data)
{
        struct usb_device *udev = data;
        struct usb_device_driver *udrv;

        if (!is_usb_device_driver(drv))
                return 0;
        udrv = to_usb_device_driver(drv);
        if (udrv == &usb_generic_driver)
                return 0;
        return usb_driver_applicable(udev, udrv);
}

static bool usb_generic_driver_match(struct usb_device *udev)
{
        if (udev->use_generic_driver)
                return true;

        /*
         * If any other driver wants the device, leave the device to this other
         * driver.
         */
        if (bus_for_each_drv(&usb_bus_type, NULL, udev, __check_for_non_generic_match))
                return false;

        return true;
}

int usb_generic_driver_probe(struct usb_device *udev)
{
        int err, c;

        /* Choose and set the configuration.  This registers the interfaces
         * with the driver core and lets interface drivers bind to them.
         */
        if (udev->authorized == 0)
                dev_err(&udev->dev, "Device is not authorized for usage\n");
        else {
                c = usb_choose_configuration(udev);
                if (c >= 0) {
                        err = usb_set_configuration(udev, c);
                        if (err && err != -ENODEV) {
                                dev_err(&udev->dev, "can't set config #%d, error %d\n",
                                        c, err);
                                /* This need not be fatal.  The user can try to
                                 * set other configurations. */
                        }
                }
        }
        /* USB device state == configured ... usable */
        usb_notify_add_device(udev);

        return 0;
}

void usb_generic_driver_disconnect(struct usb_device *udev)
{
        usb_notify_remove_device(udev);

        /* if this is only an unbind, not a physical disconnect, then
         * unconfigure the device */
        if (udev->actconfig)
                usb_set_configuration(udev, -1);
}

#ifdef        CONFIG_PM

int usb_generic_driver_suspend(struct usb_device *udev, pm_message_t msg)
{
        int rc;

        /* Normal USB devices suspend through their upstream port.
         * Root hubs don't have upstream ports to suspend,
         * so we have to shut down their downstream HC-to-USB
         * interfaces manually by doing a bus (or "global") suspend.
         */
        if (!udev->parent)
                rc = hcd_bus_suspend(udev, msg);

        /*
         * Non-root USB2 devices don't need to do anything for FREEZE
         * or PRETHAW. USB3 devices don't support global suspend and
         * needs to be selectively suspended.
         */
        else if ((msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW)
                 && (udev->speed < USB_SPEED_SUPER))
                rc = 0;
        else
                rc = usb_port_suspend(udev, msg);

        if (rc == 0)
                usbfs_notify_suspend(udev);
        return rc;
}

int usb_generic_driver_resume(struct usb_device *udev, pm_message_t msg)
{
        int rc;

        /* Normal USB devices resume/reset through their upstream port.
         * Root hubs don't have upstream ports to resume or reset,
         * so we have to start up their downstream HC-to-USB
         * interfaces manually by doing a bus (or "global") resume.
         */
        if (!udev->parent)
                rc = hcd_bus_resume(udev, msg);
        else
                rc = usb_port_resume(udev, msg);

        if (rc == 0)
                usbfs_notify_resume(udev);
        return rc;
}

#endif        /* CONFIG_PM */

struct usb_device_driver usb_generic_driver = {
        .name =        "usb",
        .match = usb_generic_driver_match,
        .probe = usb_generic_driver_probe,
        .disconnect = usb_generic_driver_disconnect,
#ifdef        CONFIG_PM
        .suspend = usb_generic_driver_suspend,
        .resume = usb_generic_driver_resume,
#endif
        .supports_autosuspend = 1,
};





































































































































































  295 





  296 

  296 

  295 




  295 


  296 
  296 
























































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/domain.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include "common.h"

#include <linux/binfmts.h>
#include <linux/slab.h>
#include <linux/rculist.h>

/* Variables definitions.*/

/* The initial domain. */
struct tomoyo_domain_info tomoyo_kernel_domain;

/**
 * tomoyo_update_policy - Update an entry for exception policy.
 *
 * @new_entry:       Pointer to "struct tomoyo_acl_info".
 * @size:            Size of @new_entry in bytes.
 * @param:           Pointer to "struct tomoyo_acl_param".
 * @check_duplicate: Callback function to find duplicated entry.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
                         struct tomoyo_acl_param *param,
                         bool (*check_duplicate)(const struct tomoyo_acl_head
                                                 *,
                                                 const struct tomoyo_acl_head
                                                 *))
{
        int error = param->is_delete ? -ENOENT : -ENOMEM;
        struct tomoyo_acl_head *entry;
        struct list_head *list = param->list;

        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                return -ENOMEM;
        list_for_each_entry_rcu(entry, list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
                        continue;
                if (!check_duplicate(entry, new_entry))
                        continue;
                entry->is_deleted = param->is_delete;
                error = 0;
                break;
        }
        if (error && !param->is_delete) {
                entry = tomoyo_commit_ok(new_entry, size);
                if (entry) {
                        list_add_tail_rcu(&entry->list, list);
                        error = 0;
                }
        }
        mutex_unlock(&tomoyo_policy_lock);
        return error;
}

/**
 * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b, false otherwise.
 */
static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a,
                                        const struct tomoyo_acl_info *b)
{
        return a->type == b->type && a->cond == b->cond;
}

/**
 * tomoyo_update_domain - Update an entry for domain policy.
 *
 * @new_entry:       Pointer to "struct tomoyo_acl_info".
 * @size:            Size of @new_entry in bytes.
 * @param:           Pointer to "struct tomoyo_acl_param".
 * @check_duplicate: Callback function to find duplicated entry.
 * @merge_duplicate: Callback function to merge duplicated entry.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
                         struct tomoyo_acl_param *param,
                         bool (*check_duplicate)(const struct tomoyo_acl_info
                                                 *,
                                                 const struct tomoyo_acl_info
                                                 *),
                         bool (*merge_duplicate)(struct tomoyo_acl_info *,
                                                 struct tomoyo_acl_info *,
                                                 const bool))
{
        const bool is_delete = param->is_delete;
        int error = is_delete ? -ENOENT : -ENOMEM;
        struct tomoyo_acl_info *entry;
        struct list_head * const list = param->list;

        if (param->data[0]) {
                new_entry->cond = tomoyo_get_condition(param);
                if (!new_entry->cond)
                        return -EINVAL;
                /*
                 * Domain transition preference is allowed for only
                 * "file execute" entries.
                 */
                if (new_entry->cond->transit &&
                    !(new_entry->type == TOMOYO_TYPE_PATH_ACL &&
                      container_of(new_entry, struct tomoyo_path_acl, head)
                      ->perm == 1 << TOMOYO_TYPE_EXECUTE))
                        goto out;
        }
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
        list_for_each_entry_rcu(entry, list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
                        continue;
                if (!tomoyo_same_acl_head(entry, new_entry) ||
                    !check_duplicate(entry, new_entry))
                        continue;
                if (merge_duplicate)
                        entry->is_deleted = merge_duplicate(entry, new_entry,
                                                            is_delete);
                else
                        entry->is_deleted = is_delete;
                error = 0;
                break;
        }
        if (error && !is_delete) {
                entry = tomoyo_commit_ok(new_entry, size);
                if (entry) {
                        list_add_tail_rcu(&entry->list, list);
                        error = 0;
                }
        }
        mutex_unlock(&tomoyo_policy_lock);
out:
        tomoyo_put_condition(new_entry->cond);
        return error;
}

/**
 * tomoyo_check_acl - Do permission check.
 *
 * @r:           Pointer to "struct tomoyo_request_info".
 * @check_entry: Callback function to check type specific parameters.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
void tomoyo_check_acl(struct tomoyo_request_info *r,
                      bool (*check_entry)(struct tomoyo_request_info *,
                                          const struct tomoyo_acl_info *))
{
        const struct tomoyo_domain_info *domain = r->domain;
        struct tomoyo_acl_info *ptr;
        const struct list_head *list = &domain->acl_info_list;
        u16 i = 0;

retry:
        list_for_each_entry_rcu(ptr, list, list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (ptr->is_deleted || ptr->type != r->param_type)
                        continue;
                if (!check_entry(r, ptr))
                        continue;
                if (!tomoyo_condition(r, ptr->cond))
                        continue;
                r->matched_acl = ptr;
                r->granted = true;
                return;
        }
        for (; i < TOMOYO_MAX_ACL_GROUPS; i++) {
                if (!test_bit(i, domain->group))
                        continue;
                list = &domain->ns->acl_group[i++];
                goto retry;
        }
        r->granted = false;
}

/* The list for "struct tomoyo_domain_info". */
LIST_HEAD(tomoyo_domain_list);

/**
 * tomoyo_last_word - Get last component of a domainname.
 *
 * @name: Domainname to check.
 *
 * Returns the last word of @domainname.
 */
static const char *tomoyo_last_word(const char *name)
{
        const char *cp = strrchr(name, ' ');

        if (cp)
                return cp + 1;
        return name;
}

/**
 * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_head".
 * @b: Pointer to "struct tomoyo_acl_head".
 *
 * Returns true if @a == @b, false otherwise.
 */
static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a,
                                           const struct tomoyo_acl_head *b)
{
        const struct tomoyo_transition_control *p1 = container_of(a,
                                                                  typeof(*p1),
                                                                  head);
        const struct tomoyo_transition_control *p2 = container_of(b,
                                                                  typeof(*p2),
                                                                  head);

        return p1->type == p2->type && p1->is_last_name == p2->is_last_name
                && p1->domainname == p2->domainname
                && p1->program == p2->program;
}

/**
 * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 * @type:  Type of this entry.
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
                                    const u8 type)
{
        struct tomoyo_transition_control e = { .type = type };
        int error = param->is_delete ? -ENOENT : -ENOMEM;
        char *program = param->data;
        char *domainname = strstr(program, " from ");

        if (domainname) {
                *domainname = '\0';
                domainname += 6;
        } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
                   type == TOMOYO_TRANSITION_CONTROL_KEEP) {
                domainname = program;
                program = NULL;
        }
        if (program && strcmp(program, "any")) {
                if (!tomoyo_correct_path(program))
                        return -EINVAL;
                e.program = tomoyo_get_name(program);
                if (!e.program)
                        goto out;
        }
        if (domainname && strcmp(domainname, "any")) {
                if (!tomoyo_correct_domain(domainname)) {
                        if (!tomoyo_correct_path(domainname))
                                goto out;
                        e.is_last_name = true;
                }
                e.domainname = tomoyo_get_name(domainname);
                if (!e.domainname)
                        goto out;
        }
        param->list = &param->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
        error = tomoyo_update_policy(&e.head, sizeof(e), param,
                                     tomoyo_same_transition_control);
out:
        tomoyo_put_name(e.domainname);
        tomoyo_put_name(e.program);
        return error;
}

/**
 * tomoyo_scan_transition - Try to find specific domain transition type.
 *
 * @list:       Pointer to "struct list_head".
 * @domainname: The name of current domain.
 * @program:    The name of requested program.
 * @last_name:  The last component of @domainname.
 * @type:       One of values in "enum tomoyo_transition_type".
 *
 * Returns true if found one, false otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static inline bool tomoyo_scan_transition
(const struct list_head *list, const struct tomoyo_path_info *domainname,
 const struct tomoyo_path_info *program, const char *last_name,
 const enum tomoyo_transition_type type)
{
        const struct tomoyo_transition_control *ptr;

        list_for_each_entry_rcu(ptr, list, head.list,
                                srcu_read_lock_held(&tomoyo_ss)) {
                if (ptr->head.is_deleted || ptr->type != type)
                        continue;
                if (ptr->domainname) {
                        if (!ptr->is_last_name) {
                                if (ptr->domainname != domainname)
                                        continue;
                        } else {
                                /*
                                 * Use direct strcmp() since this is
                                 * unlikely used.
                                 */
                                if (strcmp(ptr->domainname->name, last_name))
                                        continue;
                        }
                }
                if (ptr->program && tomoyo_pathcmp(ptr->program, program))
                        continue;
                return true;
        }
        return false;
}

/**
 * tomoyo_transition_type - Get domain transition type.
 *
 * @ns:         Pointer to "struct tomoyo_policy_namespace".
 * @domainname: The name of current domain.
 * @program:    The name of requested program.
 *
 * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes
 * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if
 * executing @program reinitializes domain transition within that namespace,
 * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname ,
 * others otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static enum tomoyo_transition_type tomoyo_transition_type
(const struct tomoyo_policy_namespace *ns,
 const struct tomoyo_path_info *domainname,
 const struct tomoyo_path_info *program)
{
        const char *last_name = tomoyo_last_word(domainname->name);
        enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET;

        while (type < TOMOYO_MAX_TRANSITION_TYPE) {
                const struct list_head * const list =
                        &ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];

                if (!tomoyo_scan_transition(list, domainname, program,
                                            last_name, type)) {
                        type++;
                        continue;
                }
                if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET &&
                    type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE)
                        break;
                /*
                 * Do not check for reset_domain if no_reset_domain matched.
                 * Do not check for initialize_domain if no_initialize_domain
                 * matched.
                 */
                type++;
                type++;
        }
        return type;
}

/**
 * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_head".
 * @b: Pointer to "struct tomoyo_acl_head".
 *
 * Returns true if @a == @b, false otherwise.
 */
static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a,
                                   const struct tomoyo_acl_head *b)
{
        const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1),
                                                          head);
        const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2),
                                                          head);

        return p1->original_name == p2->original_name &&
                p1->aggregated_name == p2->aggregated_name;
}

/**
 * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_write_aggregator(struct tomoyo_acl_param *param)
{
        struct tomoyo_aggregator e = { };
        int error = param->is_delete ? -ENOENT : -ENOMEM;
        const char *original_name = tomoyo_read_token(param);
        const char *aggregated_name = tomoyo_read_token(param);

        if (!tomoyo_correct_word(original_name) ||
            !tomoyo_correct_path(aggregated_name))
                return -EINVAL;
        e.original_name = tomoyo_get_name(original_name);
        e.aggregated_name = tomoyo_get_name(aggregated_name);
        if (!e.original_name || !e.aggregated_name ||
            e.aggregated_name->is_patterned) /* No patterns allowed. */
                goto out;
        param->list = &param->ns->policy_list[TOMOYO_ID_AGGREGATOR];
        error = tomoyo_update_policy(&e.head, sizeof(e), param,
                                     tomoyo_same_aggregator);
out:
        tomoyo_put_name(e.original_name);
        tomoyo_put_name(e.aggregated_name);
        return error;
}

/**
 * tomoyo_find_namespace - Find specified namespace.
 *
 * @name: Name of namespace to find.
 * @len:  Length of @name.
 *
 * Returns pointer to "struct tomoyo_policy_namespace" if found,
 * NULL otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static struct tomoyo_policy_namespace *tomoyo_find_namespace
(const char *name, const unsigned int len)
{
        struct tomoyo_policy_namespace *ns;

        list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) {
                if (strncmp(name, ns->name, len) ||
                    (name[len] && name[len] != ' '))
                        continue;
                return ns;
        }
        return NULL;
}

/**
 * tomoyo_assign_namespace - Create a new namespace.
 *
 * @domainname: Name of namespace to create.
 *
 * Returns pointer to "struct tomoyo_policy_namespace" on success,
 * NULL otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname)
{
        struct tomoyo_policy_namespace *ptr;
        struct tomoyo_policy_namespace *entry;
        const char *cp = domainname;
        unsigned int len = 0;

        while (*cp && *cp++ != ' ')
                len++;
        ptr = tomoyo_find_namespace(domainname, len);
        if (ptr)
                return ptr;
        if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname))
                return NULL;
        entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS | __GFP_NOWARN);
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
        ptr = tomoyo_find_namespace(domainname, len);
        if (!ptr && tomoyo_memory_ok(entry)) {
                char *name = (char *) (entry + 1);

                ptr = entry;
                memmove(name, domainname, len);
                name[len] = '\0';
                entry->name = name;
                tomoyo_init_policy_namespace(entry);
                entry = NULL;
        }
        mutex_unlock(&tomoyo_policy_lock);
out:
        kfree(entry);
        return ptr;
}

/**
 * tomoyo_namespace_jump - Check for namespace jump.
 *
 * @domainname: Name of domain.
 *
 * Returns true if namespace differs, false otherwise.
 */
static bool tomoyo_namespace_jump(const char *domainname)
{
        const char *namespace = tomoyo_current_namespace()->name;
        const int len = strlen(namespace);

        return strncmp(domainname, namespace, len) ||
                (domainname[len] && domainname[len] != ' ');
}

/**
 * tomoyo_assign_domain - Create a domain or a namespace.
 *
 * @domainname: The name of domain.
 * @transit:    True if transit to domain found or created.
 *
 * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
                                                const bool transit)
{
        struct tomoyo_domain_info e = { };
        struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname);
        bool created = false;

        if (entry) {
                if (transit) {
                        /*
                         * Since namespace is created at runtime, profiles may
                         * not be created by the moment the process transits to
                         * that domain. Do not perform domain transition if
                         * profile for that domain is not yet created.
                         */
                        if (tomoyo_policy_loaded &&
                            !entry->ns->profile_ptr[entry->profile])
                                return NULL;
                }
                return entry;
        }
        /* Requested domain does not exist. */
        /* Don't create requested domain if domainname is invalid. */
        if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 ||
            !tomoyo_correct_domain(domainname))
                return NULL;
        /*
         * Since definition of profiles and acl_groups may differ across
         * namespaces, do not inherit "use_profile" and "use_group" settings
         * by automatically creating requested domain upon domain transition.
         */
        if (transit && tomoyo_namespace_jump(domainname))
                return NULL;
        e.ns = tomoyo_assign_namespace(domainname);
        if (!e.ns)
                return NULL;
        /*
         * "use_profile" and "use_group" settings for automatically created
         * domains are inherited from current domain. These are 0 for manually
         * created domains.
         */
        if (transit) {
                const struct tomoyo_domain_info *domain = tomoyo_domain();

                e.profile = domain->profile;
                memcpy(e.group, domain->group, sizeof(e.group));
        }
        e.domainname = tomoyo_get_name(domainname);
        if (!e.domainname)
                return NULL;
        if (mutex_lock_interruptible(&tomoyo_policy_lock))
                goto out;
        entry = tomoyo_find_domain(domainname);
        if (!entry) {
                entry = tomoyo_commit_ok(&e, sizeof(e));
                if (entry) {
                        INIT_LIST_HEAD(&entry->acl_info_list);
                        list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
                        created = true;
                }
        }
        mutex_unlock(&tomoyo_policy_lock);
out:
        tomoyo_put_name(e.domainname);
        if (entry && transit) {
                if (created) {
                        struct tomoyo_request_info r;
                        int i;

                        tomoyo_init_request_info(&r, entry,
                                                 TOMOYO_MAC_FILE_EXECUTE);
                        r.granted = false;
                        tomoyo_write_log(&r, "use_profile %u\n",
                                         entry->profile);
                        for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++)
                                if (test_bit(i, entry->group))
                                        tomoyo_write_log(&r, "use_group %u\n",
                                                         i);
                        tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
                }
        }
        return entry;
}

/**
 * tomoyo_environ - Check permission for environment variable names.
 *
 * @ee: Pointer to "struct tomoyo_execve".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_environ(struct tomoyo_execve *ee)
{
        struct tomoyo_request_info *r = &ee->r;
        struct linux_binprm *bprm = ee->bprm;
        /* env_page.data is allocated by tomoyo_dump_page(). */
        struct tomoyo_page_dump env_page = { };
        char *arg_ptr; /* Size is TOMOYO_EXEC_TMPSIZE bytes */
        int arg_len = 0;
        unsigned long pos = bprm->p;
        int offset = pos % PAGE_SIZE;
        int argv_count = bprm->argc;
        int envp_count = bprm->envc;
        int error = -ENOMEM;

        ee->r.type = TOMOYO_MAC_ENVIRON;
        ee->r.profile = r->domain->profile;
        ee->r.mode = tomoyo_get_mode(r->domain->ns, ee->r.profile,
                                     TOMOYO_MAC_ENVIRON);
        if (!r->mode || !envp_count)
                return 0;
        arg_ptr = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
        if (!arg_ptr)
                goto out;
        while (error == -ENOMEM) {
                if (!tomoyo_dump_page(bprm, pos, &env_page))
                        goto out;
                pos += PAGE_SIZE - offset;
                /* Read. */
                while (argv_count && offset < PAGE_SIZE) {
                        if (!env_page.data[offset++])
                                argv_count--;
                }
                if (argv_count) {
                        offset = 0;
                        continue;
                }
                while (offset < PAGE_SIZE) {
                        const unsigned char c = env_page.data[offset++];

                        if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) {
                                if (c == '=') {
                                        arg_ptr[arg_len++] = '\0';
                                } else if (c == '\\') {
                                        arg_ptr[arg_len++] = '\\';
                                        arg_ptr[arg_len++] = '\\';
                                } else if (c > ' ' && c < 127) {
                                        arg_ptr[arg_len++] = c;
                                } else {
                                        arg_ptr[arg_len++] = '\\';
                                        arg_ptr[arg_len++] = (c >> 6) + '0';
                                        arg_ptr[arg_len++]
                                                = ((c >> 3) & 7) + '0';
                                        arg_ptr[arg_len++] = (c & 7) + '0';
                                }
                        } else {
                                arg_ptr[arg_len] = '\0';
                        }
                        if (c)
                                continue;
                        if (tomoyo_env_perm(r, arg_ptr)) {
                                error = -EPERM;
                                break;
                        }
                        if (!--envp_count) {
                                error = 0;
                                break;
                        }
                        arg_len = 0;
                }
                offset = 0;
        }
out:
        if (r->mode != TOMOYO_CONFIG_ENFORCING)
                error = 0;
        kfree(env_page.data);
        kfree(arg_ptr);
        return error;
}

/**
 * tomoyo_find_next_domain - Find a domain.
 *
 * @bprm: Pointer to "struct linux_binprm".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_find_next_domain(struct linux_binprm *bprm)
{
        struct tomoyo_domain_info *old_domain = tomoyo_domain();
        struct tomoyo_domain_info *domain = NULL;
        const char *original_name = bprm->filename;
        int retval = -ENOMEM;
        bool reject_on_transition_failure = false;
        const struct tomoyo_path_info *candidate;
        struct tomoyo_path_info exename;
        struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS);

        if (!ee)
                return -ENOMEM;
        ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
        if (!ee->tmp) {
                kfree(ee);
                return -ENOMEM;
        }
        /* ee->dump->data is allocated by tomoyo_dump_page(). */
        tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE);
        ee->r.ee = ee;
        ee->bprm = bprm;
        ee->r.obj = &ee->obj;
        ee->obj.path1 = bprm->file->f_path;
        /* Get symlink's pathname of program. */
        retval = -ENOENT;
        exename.name = tomoyo_realpath_nofollow(original_name);
        if (!exename.name)
                goto out;
        tomoyo_fill_path_info(&exename);
retry:
        /* Check 'aggregator' directive. */
        {
                struct tomoyo_aggregator *ptr;
                struct list_head *list =
                        &old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR];

                /* Check 'aggregator' directive. */
                candidate = &exename;
                list_for_each_entry_rcu(ptr, list, head.list,
                                        srcu_read_lock_held(&tomoyo_ss)) {
                        if (ptr->head.is_deleted ||
                            !tomoyo_path_matches_pattern(&exename,
                                                         ptr->original_name))
                                continue;
                        candidate = ptr->aggregated_name;
                        break;
                }
        }

        /* Check execute permission. */
        retval = tomoyo_execute_permission(&ee->r, candidate);
        if (retval == TOMOYO_RETRY_REQUEST)
                goto retry;
        if (retval < 0)
                goto out;
        /*
         * To be able to specify domainnames with wildcards, use the
         * pathname specified in the policy (which may contain
         * wildcard) rather than the pathname passed to execve()
         * (which never contains wildcard).
         */
        if (ee->r.param.path.matched_path)
                candidate = ee->r.param.path.matched_path;

        /*
         * Check for domain transition preference if "file execute" matched.
         * If preference is given, make execve() fail if domain transition
         * has failed, for domain transition preference should be used with
         * destination domain defined.
         */
        if (ee->transition) {
                const char *domainname = ee->transition->name;

                reject_on_transition_failure = true;
                if (!strcmp(domainname, "keep"))
                        goto force_keep_domain;
                if (!strcmp(domainname, "child"))
                        goto force_child_domain;
                if (!strcmp(domainname, "reset"))
                        goto force_reset_domain;
                if (!strcmp(domainname, "initialize"))
                        goto force_initialize_domain;
                if (!strcmp(domainname, "parent")) {
                        char *cp;

                        strscpy(ee->tmp, old_domain->domainname->name, TOMOYO_EXEC_TMPSIZE);
                        cp = strrchr(ee->tmp, ' ');
                        if (cp)
                                *cp = '\0';
                } else if (*domainname == '<')
                        strscpy(ee->tmp, domainname, TOMOYO_EXEC_TMPSIZE);
                else
                        snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
                                 old_domain->domainname->name, domainname);
                goto force_jump_domain;
        }
        /*
         * No domain transition preference specified.
         * Calculate domain to transit to.
         */
        switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname,
                                       candidate)) {
        case TOMOYO_TRANSITION_CONTROL_RESET:
force_reset_domain:
                /* Transit to the root of specified namespace. */
                snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>",
                         candidate->name);
                /*
                 * Make execve() fail if domain transition across namespaces
                 * has failed.
                 */
                reject_on_transition_failure = true;
                break;
        case TOMOYO_TRANSITION_CONTROL_INITIALIZE:
force_initialize_domain:
                /* Transit to the child of current namespace's root. */
                snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
                         old_domain->ns->name, candidate->name);
                break;
        case TOMOYO_TRANSITION_CONTROL_KEEP:
force_keep_domain:
                /* Keep current domain. */
                domain = old_domain;
                break;
        default:
                if (old_domain == &tomoyo_kernel_domain &&
                    !tomoyo_policy_loaded) {
                        /*
                         * Needn't to transit from kernel domain before
                         * starting /sbin/init. But transit from kernel domain
                         * if executing initializers because they might start
                         * before /sbin/init.
                         */
                        domain = old_domain;
                        break;
                }
force_child_domain:
                /* Normal domain transition. */
                snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
                         old_domain->domainname->name, candidate->name);
                break;
        }
force_jump_domain:
        if (!domain)
                domain = tomoyo_assign_domain(ee->tmp, true);
        if (domain)
                retval = 0;
        else if (reject_on_transition_failure) {
                pr_warn("ERROR: Domain '%s' not ready.\n", ee->tmp);
                retval = -ENOMEM;
        } else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING)
                retval = -ENOMEM;
        else {
                retval = 0;
                if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) {
                        old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true;
                        ee->r.granted = false;
                        tomoyo_write_log(&ee->r, "%s", tomoyo_dif
                                         [TOMOYO_DIF_TRANSITION_FAILED]);
                        pr_warn("ERROR: Domain '%s' not defined.\n", ee->tmp);
                }
        }
 out:
        if (!domain)
                domain = old_domain;
        /* Update reference count on "struct tomoyo_domain_info". */
        {
                struct tomoyo_task *s = tomoyo_task(current);

                s->old_domain_info = s->domain_info;
                s->domain_info = domain;
                atomic_inc(&domain->users);
        }
        kfree(exename.name);
        if (!retval) {
                ee->r.domain = domain;
                retval = tomoyo_environ(ee);
        }
        kfree(ee->tmp);
        kfree(ee->dump.data);
        kfree(ee);
        return retval;
}

/**
 * tomoyo_dump_page - Dump a page to buffer.
 *
 * @bprm: Pointer to "struct linux_binprm".
 * @pos:  Location to dump.
 * @dump: Pointer to "struct tomoyo_page_dump".
 *
 * Returns true on success, false otherwise.
 */
bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
                      struct tomoyo_page_dump *dump)
{
        struct page *page;
#ifdef CONFIG_MMU
        int ret;
#endif

        /* dump->data is released by tomoyo_find_next_domain(). */
        if (!dump->data) {
                dump->data = kzalloc(PAGE_SIZE, GFP_NOFS);
                if (!dump->data)
                        return false;
        }
        /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
#ifdef CONFIG_MMU
        /*
         * This is called at execve() time in order to dig around
         * in the argv/environment of the new proceess
         * (represented by bprm).
         */
        mmap_read_lock(bprm->mm);
        ret = get_user_pages_remote(bprm->mm, pos, 1,
                                    FOLL_FORCE, &page, NULL);
        mmap_read_unlock(bprm->mm);
        if (ret <= 0)
                return false;
#else
        page = bprm->page[pos / PAGE_SIZE];
#endif
        if (page != dump->page) {
                const unsigned int offset = pos % PAGE_SIZE;
                /*
                 * Maybe kmap()/kunmap() should be used here.
                 * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic().
                 * So do I.
                 */
                char *kaddr = kmap_atomic(page);

                dump->page = page;
                memcpy(dump->data + offset, kaddr + offset,
                       PAGE_SIZE - offset);
                kunmap_atomic(kaddr);
        }
        /* Same with put_arg_page(page) in fs/exec.c */
#ifdef CONFIG_MMU
        put_page(page);
#endif
        return true;
}





























































































































  242 










  357 





   60 




   61 




   61 


   60 




























   14 






  373 





















  351 




   61 

















   61 




  300 
















  300 





  242 





  268 



  298 



























  185 




  307 

















  307 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Variant of atomic_t specialized for reference counts.
 *
 * The interface matches the atomic_t interface (to aid in porting) but only
 * provides the few functions one should use for reference counting.
 *
 * Saturation semantics
 * ====================
 *
 * refcount_t differs from atomic_t in that the counter saturates at
 * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
 * counter and causing 'spurious' use-after-free issues. In order to avoid the
 * cost associated with introducing cmpxchg() loops into all of the saturating
 * operations, we temporarily allow the counter to take on an unchecked value
 * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
 * or overflow has occurred. Although this is racy when multiple threads
 * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
 * equidistant from 0 and INT_MAX we minimise the scope for error:
 *
 *                                    INT_MAX     REFCOUNT_SATURATED   UINT_MAX
 *   0                          (0x7fff_ffff)    (0xc000_0000)    (0xffff_ffff)
 *   +--------------------------------+----------------+----------------+
 *                                     <---------- bad value! ---------->
 *
 * (in a signed view of the world, the "bad value" range corresponds to
 * a negative counter value).
 *
 * As an example, consider a refcount_inc() operation that causes the counter
 * to overflow:
 *
 *         int old = atomic_fetch_add_relaxed(r);
 *        // old is INT_MAX, refcount now INT_MIN (0x8000_0000)
 *        if (old < 0)
 *                atomic_set(r, REFCOUNT_SATURATED);
 *
 * If another thread also performs a refcount_inc() operation between the two
 * atomic operations, then the count will continue to edge closer to 0. If it
 * reaches a value of 1 before /any/ of the threads reset it to the saturated
 * value, then a concurrent refcount_dec_and_test() may erroneously free the
 * underlying object.
 * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
 * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
 * With the current PID limit, if no batched refcounting operations are used and
 * the attacker can't repeatedly trigger kernel oopses in the middle of refcount
 * operations, this makes it impossible for a saturated refcount to leave the
 * saturation range, even if it is possible for multiple uses of the same
 * refcount to nest in the context of a single task:
 *
 *     (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
 *     0x40000000 / 0x400000 = 0x100 = 256
 *
 * If hundreds of references are added/removed with a single refcounting
 * operation, it may potentially be possible to leave the saturation range; but
 * given the precise timing details involved with the round-robin scheduling of
 * each thread manipulating the refcount and the need to hit the race multiple
 * times in succession, there doesn't appear to be a practical avenue of attack
 * even if using refcount_add() operations with larger increments.
 *
 * Memory ordering
 * ===============
 *
 * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
 * and provide only what is strictly required for refcounts.
 *
 * The increments are fully relaxed; these will not provide ordering. The
 * rationale is that whatever is used to obtain the object we're increasing the
 * reference count on will provide the ordering. For locked data structures,
 * its the lock acquire, for RCU/lockless data structures its the dependent
 * load.
 *
 * Do note that inc_not_zero() provides a control dependency which will order
 * future stores against the inc, this ensures we'll never modify the object
 * if we did not in fact acquire a reference.
 *
 * The decrements will provide release order, such that all the prior loads and
 * stores will be issued before, it also provides a control dependency, which
 * will order us against the subsequent free().
 *
 * The control dependency is against the load of the cmpxchg (ll/sc) that
 * succeeded. This means the stores aren't fully ordered, but this is fine
 * because the 1->0 transition indicates no concurrency.
 *
 * Note that the allocator is responsible for ordering things between free()
 * and alloc().
 *
 * The decrements dec_and_test() and sub_and_test() also provide acquire
 * ordering on success.
 *
 */

#ifndef _LINUX_REFCOUNT_H
#define _LINUX_REFCOUNT_H

#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/limits.h>
#include <linux/refcount_types.h>
#include <linux/spinlock_types.h>

struct mutex;

#define REFCOUNT_INIT(n)        { .refs = ATOMIC_INIT(n), }
#define REFCOUNT_MAX                INT_MAX
#define REFCOUNT_SATURATED        (INT_MIN / 2)

enum refcount_saturation_type {
        REFCOUNT_ADD_NOT_ZERO_OVF,
        REFCOUNT_ADD_OVF,
        REFCOUNT_ADD_UAF,
        REFCOUNT_SUB_UAF,
        REFCOUNT_DEC_LEAK,
};

void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);

/**
 * refcount_set - set a refcount's value
 * @r: the refcount
 * @n: value to which the refcount will be set
 */
static inline void refcount_set(refcount_t *r, int n)
{
        atomic_set(&r->refs, n);
}

/**
 * refcount_read - get a refcount's value
 * @r: the refcount
 *
 * Return: the refcount's value
 */
static inline unsigned int refcount_read(const refcount_t *r)
{
        return atomic_read(&r->refs);
}

static inline __must_check __signed_wrap
bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
{
        int old = refcount_read(r);

        do {
                if (!old)
                        break;
        } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));

        if (oldp)
                *oldp = old;

        if (unlikely(old < 0 || old + i < 0))
                refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);

        return old;
}

/**
 * refcount_add_not_zero - add a value to a refcount unless it is 0
 * @i: the value to add to the refcount
 * @r: the refcount
 *
 * Will saturate at REFCOUNT_SATURATED and WARN.
 *
 * Provides no memory ordering, it is assumed the caller has guaranteed the
 * object memory to be stable (RCU, etc.). It does provide a control dependency
 * and thereby orders future stores. See the comment on top.
 *
 * Use of this function is not recommended for the normal reference counting
 * use case in which references are taken and released one at a time.  In these
 * cases, refcount_inc(), or one of its variants, should instead be used to
 * increment a reference count.
 *
 * Return: false if the passed refcount is 0, true otherwise
 */
static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
{
        return __refcount_add_not_zero(i, r, NULL);
}

static inline __signed_wrap
void __refcount_add(int i, refcount_t *r, int *oldp)
{
        int old = atomic_fetch_add_relaxed(i, &r->refs);

        if (oldp)
                *oldp = old;

        if (unlikely(!old))
                refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
        else if (unlikely(old < 0 || old + i < 0))
                refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
}

/**
 * refcount_add - add a value to a refcount
 * @i: the value to add to the refcount
 * @r: the refcount
 *
 * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
 *
 * Provides no memory ordering, it is assumed the caller has guaranteed the
 * object memory to be stable (RCU, etc.). It does provide a control dependency
 * and thereby orders future stores. See the comment on top.
 *
 * Use of this function is not recommended for the normal reference counting
 * use case in which references are taken and released one at a time.  In these
 * cases, refcount_inc(), or one of its variants, should instead be used to
 * increment a reference count.
 */
static inline void refcount_add(int i, refcount_t *r)
{
        __refcount_add(i, r, NULL);
}

static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
{
        return __refcount_add_not_zero(1, r, oldp);
}

/**
 * refcount_inc_not_zero - increment a refcount unless it is 0
 * @r: the refcount to increment
 *
 * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
 * and WARN.
 *
 * Provides no memory ordering, it is assumed the caller has guaranteed the
 * object memory to be stable (RCU, etc.). It does provide a control dependency
 * and thereby orders future stores. See the comment on top.
 *
 * Return: true if the increment was successful, false otherwise
 */
static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
{
        return __refcount_inc_not_zero(r, NULL);
}

static inline void __refcount_inc(refcount_t *r, int *oldp)
{
        __refcount_add(1, r, oldp);
}

/**
 * refcount_inc - increment a refcount
 * @r: the refcount to increment
 *
 * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
 *
 * Provides no memory ordering, it is assumed the caller already has a
 * reference on the object.
 *
 * Will WARN if the refcount is 0, as this represents a possible use-after-free
 * condition.
 */
static inline void refcount_inc(refcount_t *r)
{
        __refcount_inc(r, NULL);
}

static inline __must_check __signed_wrap
bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
{
        int old = atomic_fetch_sub_release(i, &r->refs);

        if (oldp)
                *oldp = old;

        if (old == i) {
                smp_acquire__after_ctrl_dep();
                return true;
        }

        if (unlikely(old < 0 || old - i < 0))
                refcount_warn_saturate(r, REFCOUNT_SUB_UAF);

        return false;
}

/**
 * refcount_sub_and_test - subtract from a refcount and test if it is 0
 * @i: amount to subtract from the refcount
 * @r: the refcount
 *
 * Similar to atomic_dec_and_test(), but it will WARN, return false and
 * ultimately leak on underflow and will fail to decrement when saturated
 * at REFCOUNT_SATURATED.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides an acquire ordering on success such that free()
 * must come after.
 *
 * Use of this function is not recommended for the normal reference counting
 * use case in which references are taken and released one at a time.  In these
 * cases, refcount_dec(), or one of its variants, should instead be used to
 * decrement a reference count.
 *
 * Return: true if the resulting refcount is 0, false otherwise
 */
static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
{
        return __refcount_sub_and_test(i, r, NULL);
}

static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
{
        return __refcount_sub_and_test(1, r, oldp);
}

/**
 * refcount_dec_and_test - decrement a refcount and test if it is 0
 * @r: the refcount
 *
 * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
 * decrement when saturated at REFCOUNT_SATURATED.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before, and provides an acquire ordering on success such that free()
 * must come after.
 *
 * Return: true if the resulting refcount is 0, false otherwise
 */
static inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
        return __refcount_dec_and_test(r, NULL);
}

static inline void __refcount_dec(refcount_t *r, int *oldp)
{
        int old = atomic_fetch_sub_release(1, &r->refs);

        if (oldp)
                *oldp = old;

        if (unlikely(old <= 1))
                refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
}

/**
 * refcount_dec - decrement a refcount
 * @r: the refcount
 *
 * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
 * when saturated at REFCOUNT_SATURATED.
 *
 * Provides release memory ordering, such that prior loads and stores are done
 * before.
 */
static inline void refcount_dec(refcount_t *r)
{
        __refcount_dec(r, NULL);
}

extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock);
extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock);
extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
                                                       spinlock_t *lock,
                                                       unsigned long *flags) __cond_acquires(lock);
#endif /* _LINUX_REFCOUNT_H */

































































































































































    8 




























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Directory notifications for Linux.
 *
 * Copyright (C) 2000,2001,2002 Stephen Rothwell
 *
 * Copyright (C) 2009 Eric Paris <Red Hat Inc>
 * dnotify was largly rewritten to use the new fsnotify infrastructure
 */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/dnotify.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>

static int dir_notify_enable __read_mostly = 1;
#ifdef CONFIG_SYSCTL
static struct ctl_table dnotify_sysctls[] = {
        {
                .procname        = "dir-notify-enable",
                .data                = &dir_notify_enable,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_dointvec,
        },
};
static void __init dnotify_sysctl_init(void)
{
        register_sysctl_init("fs", dnotify_sysctls);
}
#else
#define dnotify_sysctl_init() do { } while (0)
#endif

static struct kmem_cache *dnotify_struct_cache __ro_after_init;
static struct kmem_cache *dnotify_mark_cache __ro_after_init;
static struct fsnotify_group *dnotify_group __ro_after_init;

/*
 * dnotify will attach one of these to each inode (i_fsnotify_marks) which
 * is being watched by dnotify.  If multiple userspace applications are watching
 * the same directory with dnotify their information is chained in dn
 */
struct dnotify_mark {
        struct fsnotify_mark fsn_mark;
        struct dnotify_struct *dn;
};

/*
 * When a process starts or stops watching an inode the set of events which
 * dnotify cares about for that inode may change.  This function runs the
 * list of everything receiving dnotify events about this directory and calculates
 * the set of all those events.  After it updates what dnotify is interested in
 * it calls the fsnotify function so it can update the set of all events relevant
 * to this inode.
 */
static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
{
        __u32 new_mask = 0;
        struct dnotify_struct *dn;
        struct dnotify_mark *dn_mark  = container_of(fsn_mark,
                                                     struct dnotify_mark,
                                                     fsn_mark);

        assert_spin_locked(&fsn_mark->lock);

        for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
                new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
        if (fsn_mark->mask == new_mask)
                return;
        fsn_mark->mask = new_mask;

        fsnotify_recalc_mask(fsn_mark->connector);
}

/*
 * Mains fsnotify call where events are delivered to dnotify.
 * Find the dnotify mark on the relevant inode, run the list of dnotify structs
 * on that mark and determine which of them has expressed interest in receiving
 * events of this type.  When found send the correct process and signal and
 * destroy the dnotify struct if it was not registered to receive multiple
 * events.
 */
static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
                                struct inode *inode, struct inode *dir,
                                const struct qstr *name, u32 cookie)
{
        struct dnotify_mark *dn_mark;
        struct dnotify_struct *dn;
        struct dnotify_struct **prev;
        struct fown_struct *fown;
        __u32 test_mask = mask & ~FS_EVENT_ON_CHILD;

        /* not a dir, dnotify doesn't care */
        if (!dir && !(mask & FS_ISDIR))
                return 0;

        dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);

        spin_lock(&inode_mark->lock);
        prev = &dn_mark->dn;
        while ((dn = *prev) != NULL) {
                if ((dn->dn_mask & test_mask) == 0) {
                        prev = &dn->dn_next;
                        continue;
                }
                fown = &dn->dn_filp->f_owner;
                send_sigio(fown, dn->dn_fd, POLL_MSG);
                if (dn->dn_mask & FS_DN_MULTISHOT)
                        prev = &dn->dn_next;
                else {
                        *prev = dn->dn_next;
                        kmem_cache_free(dnotify_struct_cache, dn);
                        dnotify_recalc_inode_mask(inode_mark);
                }
        }

        spin_unlock(&inode_mark->lock);

        return 0;
}

static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
{
        struct dnotify_mark *dn_mark = container_of(fsn_mark,
                                                    struct dnotify_mark,
                                                    fsn_mark);

        BUG_ON(dn_mark->dn);

        kmem_cache_free(dnotify_mark_cache, dn_mark);
}

static const struct fsnotify_ops dnotify_fsnotify_ops = {
        .handle_inode_event = dnotify_handle_event,
        .free_mark = dnotify_free_mark,
};

/*
 * Called every time a file is closed.  Looks first for a dnotify mark on the
 * inode.  If one is found run all of the ->dn structures attached to that
 * mark for one relevant to this process closing the file and remove that
 * dnotify_struct.  If that was the last dnotify_struct also remove the
 * fsnotify_mark.
 */
void dnotify_flush(struct file *filp, fl_owner_t id)
{
        struct fsnotify_mark *fsn_mark;
        struct dnotify_mark *dn_mark;
        struct dnotify_struct *dn;
        struct dnotify_struct **prev;
        struct inode *inode;
        bool free = false;

        inode = file_inode(filp);
        if (!S_ISDIR(inode->i_mode))
                return;

        fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
        if (!fsn_mark)
                return;
        dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);

        fsnotify_group_lock(dnotify_group);

        spin_lock(&fsn_mark->lock);
        prev = &dn_mark->dn;
        while ((dn = *prev) != NULL) {
                if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
                        *prev = dn->dn_next;
                        kmem_cache_free(dnotify_struct_cache, dn);
                        dnotify_recalc_inode_mask(fsn_mark);
                        break;
                }
                prev = &dn->dn_next;
        }

        spin_unlock(&fsn_mark->lock);

        /* nothing else could have found us thanks to the dnotify_groups
           mark_mutex */
        if (dn_mark->dn == NULL) {
                fsnotify_detach_mark(fsn_mark);
                free = true;
        }

        fsnotify_group_unlock(dnotify_group);

        if (free)
                fsnotify_free_mark(fsn_mark);
        fsnotify_put_mark(fsn_mark);
}

/* this conversion is done only at watch creation */
static __u32 convert_arg(unsigned int arg)
{
        __u32 new_mask = FS_EVENT_ON_CHILD;

        if (arg & DN_MULTISHOT)
                new_mask |= FS_DN_MULTISHOT;
        if (arg & DN_DELETE)
                new_mask |= (FS_DELETE | FS_MOVED_FROM);
        if (arg & DN_MODIFY)
                new_mask |= FS_MODIFY;
        if (arg & DN_ACCESS)
                new_mask |= FS_ACCESS;
        if (arg & DN_ATTRIB)
                new_mask |= FS_ATTRIB;
        if (arg & DN_RENAME)
                new_mask |= FS_RENAME;
        if (arg & DN_CREATE)
                new_mask |= (FS_CREATE | FS_MOVED_TO);

        return new_mask;
}

/*
 * If multiple processes watch the same inode with dnotify there is only one
 * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
 * onto that mark.  This function either attaches the new dnotify_struct onto
 * that list, or it |= the mask onto an existing dnofiy_struct.
 */
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
                     fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
        struct dnotify_struct *odn;

        odn = dn_mark->dn;
        while (odn != NULL) {
                /* adding more events to existing dnofiy_struct? */
                if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
                        odn->dn_fd = fd;
                        odn->dn_mask |= mask;
                        return -EEXIST;
                }
                odn = odn->dn_next;
        }

        dn->dn_mask = mask;
        dn->dn_fd = fd;
        dn->dn_filp = filp;
        dn->dn_owner = id;
        dn->dn_next = dn_mark->dn;
        dn_mark->dn = dn;

        return 0;
}

/*
 * When a process calls fcntl to attach a dnotify watch to a directory it ends
 * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
 * attached to the fsnotify_mark.
 */
int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
{
        struct dnotify_mark *new_dn_mark, *dn_mark;
        struct fsnotify_mark *new_fsn_mark, *fsn_mark;
        struct dnotify_struct *dn;
        struct inode *inode;
        fl_owner_t id = current->files;
        struct file *f = NULL;
        int destroy = 0, error = 0;
        __u32 mask;

        /* we use these to tell if we need to kfree */
        new_fsn_mark = NULL;
        dn = NULL;

        if (!dir_notify_enable) {
                error = -EINVAL;
                goto out_err;
        }

        /* a 0 mask means we are explicitly removing the watch */
        if ((arg & ~DN_MULTISHOT) == 0) {
                dnotify_flush(filp, id);
                error = 0;
                goto out_err;
        }

        /* dnotify only works on directories */
        inode = file_inode(filp);
        if (!S_ISDIR(inode->i_mode)) {
                error = -ENOTDIR;
                goto out_err;
        }

        /*
         * convert the userspace DN_* "arg" to the internal FS_*
         * defined in fsnotify
         */
        mask = convert_arg(arg);

        error = security_path_notify(&filp->f_path, mask,
                        FSNOTIFY_OBJ_TYPE_INODE);
        if (error)
                goto out_err;

        /* expect most fcntl to add new rather than augment old */
        dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
        if (!dn) {
                error = -ENOMEM;
                goto out_err;
        }

        /* new fsnotify mark, we expect most fcntl calls to add a new mark */
        new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
        if (!new_dn_mark) {
                error = -ENOMEM;
                goto out_err;
        }

        /* set up the new_fsn_mark and new_dn_mark */
        new_fsn_mark = &new_dn_mark->fsn_mark;
        fsnotify_init_mark(new_fsn_mark, dnotify_group);
        new_fsn_mark->mask = mask;
        new_dn_mark->dn = NULL;

        /* this is needed to prevent the fcntl/close race described below */
        fsnotify_group_lock(dnotify_group);

        /* add the new_fsn_mark or find an old one. */
        fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
        if (fsn_mark) {
                dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
                spin_lock(&fsn_mark->lock);
        } else {
                error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
                if (error) {
                        fsnotify_group_unlock(dnotify_group);
                        goto out_err;
                }
                spin_lock(&new_fsn_mark->lock);
                fsn_mark = new_fsn_mark;
                dn_mark = new_dn_mark;
                /* we used new_fsn_mark, so don't free it */
                new_fsn_mark = NULL;
        }

        rcu_read_lock();
        f = lookup_fdget_rcu(fd);
        rcu_read_unlock();

        /* if (f != filp) means that we lost a race and another task/thread
         * actually closed the fd we are still playing with before we grabbed
         * the dnotify_groups mark_mutex and fsn_mark->lock.  Since closing the
         * fd is the only time we clean up the marks we need to get our mark
         * off the list. */
        if (f != filp) {
                /* if we added ourselves, shoot ourselves, it's possible that
                 * the flush actually did shoot this fsn_mark.  That's fine too
                 * since multiple calls to destroy_mark is perfectly safe, if
                 * we found a dn_mark already attached to the inode, just sod
                 * off silently as the flush at close time dealt with it.
                 */
                if (dn_mark == new_dn_mark)
                        destroy = 1;
                error = 0;
                goto out;
        }

        __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);

        error = attach_dn(dn, dn_mark, id, fd, filp, mask);
        /* !error means that we attached the dn to the dn_mark, so don't free it */
        if (!error)
                dn = NULL;
        /* -EEXIST means that we didn't add this new dn and used an old one.
         * that isn't an error (and the unused dn should be freed) */
        else if (error == -EEXIST)
                error = 0;

        dnotify_recalc_inode_mask(fsn_mark);
out:
        spin_unlock(&fsn_mark->lock);

        if (destroy)
                fsnotify_detach_mark(fsn_mark);
        fsnotify_group_unlock(dnotify_group);
        if (destroy)
                fsnotify_free_mark(fsn_mark);
        fsnotify_put_mark(fsn_mark);
out_err:
        if (new_fsn_mark)
                fsnotify_put_mark(new_fsn_mark);
        if (dn)
                kmem_cache_free(dnotify_struct_cache, dn);
        if (f)
                fput(f);
        return error;
}

static int __init dnotify_init(void)
{
        dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
                                          SLAB_PANIC|SLAB_ACCOUNT);
        dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);

        dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
                                             FSNOTIFY_GROUP_NOFS);
        if (IS_ERR(dnotify_group))
                panic("unable to allocate fsnotify group for dnotify\n");
        dnotify_sysctl_init();
        return 0;
}

module_init(dnotify_init)






























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MATH64_H
#define _LINUX_MATH64_H

#include <linux/types.h>
#include <linux/math.h>
#include <vdso/math64.h>
#include <asm/div64.h>

#if BITS_PER_LONG == 64

#define div64_long(x, y) div64_s64((x), (y))
#define div64_ul(x, y)   div64_u64((x), (y))

/**
 * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 32bit divisor
 * @remainder: pointer to unsigned 32bit remainder
 *
 * Return: sets ``*remainder``, then returns dividend / divisor
 *
 * This is commonly provided by 32bit archs to provide an optimized 64bit
 * divide.
 */
static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
{
        *remainder = dividend % divisor;
        return dividend / divisor;
}

/**
 * div_s64_rem - signed 64bit divide with 32bit divisor with remainder
 * @dividend: signed 64bit dividend
 * @divisor: signed 32bit divisor
 * @remainder: pointer to signed 32bit remainder
 *
 * Return: sets ``*remainder``, then returns dividend / divisor
 */
static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
{
        *remainder = dividend % divisor;
        return dividend / divisor;
}

/**
 * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 64bit divisor
 * @remainder: pointer to unsigned 64bit remainder
 *
 * Return: sets ``*remainder``, then returns dividend / divisor
 */
static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
{
        *remainder = dividend % divisor;
        return dividend / divisor;
}

/**
 * div64_u64 - unsigned 64bit divide with 64bit divisor
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 64bit divisor
 *
 * Return: dividend / divisor
 */
static inline u64 div64_u64(u64 dividend, u64 divisor)
{
        return dividend / divisor;
}

/**
 * div64_s64 - signed 64bit divide with 64bit divisor
 * @dividend: signed 64bit dividend
 * @divisor: signed 64bit divisor
 *
 * Return: dividend / divisor
 */
static inline s64 div64_s64(s64 dividend, s64 divisor)
{
        return dividend / divisor;
}

#elif BITS_PER_LONG == 32

#define div64_long(x, y) div_s64((x), (y))
#define div64_ul(x, y)   div_u64((x), (y))

#ifndef div_u64_rem
static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
{
        *remainder = do_div(dividend, divisor);
        return dividend;
}
#endif

#ifndef div_s64_rem
extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
#endif

#ifndef div64_u64_rem
extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder);
#endif

#ifndef div64_u64
extern u64 div64_u64(u64 dividend, u64 divisor);
#endif

#ifndef div64_s64
extern s64 div64_s64(s64 dividend, s64 divisor);
#endif

#endif /* BITS_PER_LONG */

/**
 * div_u64 - unsigned 64bit divide with 32bit divisor
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 32bit divisor
 *
 * This is the most common 64bit divide and should be used if possible,
 * as many 32bit archs can optimize this variant better than a full 64bit
 * divide.
 *
 * Return: dividend / divisor
 */
#ifndef div_u64
static inline u64 div_u64(u64 dividend, u32 divisor)
{
        u32 remainder;
        return div_u64_rem(dividend, divisor, &remainder);
}
#endif

/**
 * div_s64 - signed 64bit divide with 32bit divisor
 * @dividend: signed 64bit dividend
 * @divisor: signed 32bit divisor
 *
 * Return: dividend / divisor
 */
#ifndef div_s64
static inline s64 div_s64(s64 dividend, s32 divisor)
{
        s32 remainder;
        return div_s64_rem(dividend, divisor, &remainder);
}
#endif

u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);

#ifndef mul_u32_u32
/*
 * Many a GCC version messes this up and generates a 64x64 mult :-(
 */
static inline u64 mul_u32_u32(u32 a, u32 b)
{
        return (u64)a * b;
}
#endif

#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)

#ifndef mul_u64_u32_shr
static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
{
        return (u64)(((unsigned __int128)a * mul) >> shift);
}
#endif /* mul_u64_u32_shr */

#ifndef mul_u64_u64_shr
static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
{
        return (u64)(((unsigned __int128)a * mul) >> shift);
}
#endif /* mul_u64_u64_shr */

#else

#ifndef mul_u64_u32_shr
static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
{
        u32 ah, al;
        u64 ret;

        al = a;
        ah = a >> 32;

        ret = mul_u32_u32(al, mul) >> shift;
        if (ah)
                ret += mul_u32_u32(ah, mul) << (32 - shift);

        return ret;
}
#endif /* mul_u64_u32_shr */

#ifndef mul_u64_u64_shr
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
{
        union {
                u64 ll;
                struct {
#ifdef __BIG_ENDIAN
                        u32 high, low;
#else
                        u32 low, high;
#endif
                } l;
        } rl, rm, rn, rh, a0, b0;
        u64 c;

        a0.ll = a;
        b0.ll = b;

        rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
        rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
        rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
        rh.ll = mul_u32_u32(a0.l.high, b0.l.high);

        /*
         * Each of these lines computes a 64-bit intermediate result into "c",
         * starting at bits 32-95.  The low 32-bits go into the result of the
         * multiplication, the high 32-bits are carried into the next step.
         */
        rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
        rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
        rh.l.high = (c >> 32) + rh.l.high;

        /*
         * The 128-bit result of the multiplication is in rl.ll and rh.ll,
         * shift it right and throw away the high part of the result.
         */
        if (shift == 0)
                return rl.ll;
        if (shift < 64)
                return (rl.ll >> shift) | (rh.ll << (64 - shift));
        return rh.ll >> (shift & 63);
}
#endif /* mul_u64_u64_shr */

#endif

#ifndef mul_s64_u64_shr
static inline u64 mul_s64_u64_shr(s64 a, u64 b, unsigned int shift)
{
        u64 ret;

        /*
         * Extract the sign before the multiplication and put it back
         * afterwards if needed.
         */
        ret = mul_u64_u64_shr(abs(a), b, shift);

        if (a < 0)
                ret = -((s64) ret);

        return ret;
}
#endif /* mul_s64_u64_shr */

#ifndef mul_u64_u32_div
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
{
        union {
                u64 ll;
                struct {
#ifdef __BIG_ENDIAN
                        u32 high, low;
#else
                        u32 low, high;
#endif
                } l;
        } u, rl, rh;

        u.ll = a;
        rl.ll = mul_u32_u32(u.l.low, mul);
        rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high;

        /* Bits 32-63 of the result will be in rh.l.low. */
        rl.l.high = do_div(rh.ll, divisor);

        /* Bits 0-31 of the result will be in rl.l.low.        */
        do_div(rl.ll, divisor);

        rl.l.high = rh.l.low;
        return rl.ll;
}
#endif /* mul_u64_u32_div */

u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);

/**
 * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
 * @ll: unsigned 64bit dividend
 * @d: unsigned 64bit divisor
 *
 * Divide unsigned 64bit dividend by unsigned 64bit divisor
 * and round up.
 *
 * Return: dividend / divisor rounded up
 */
#define DIV64_U64_ROUND_UP(ll, d)        \
        ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })

/**
 * DIV64_U64_ROUND_CLOSEST - unsigned 64bit divide with 64bit divisor rounded to nearest integer
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 64bit divisor
 *
 * Divide unsigned 64bit dividend by unsigned 64bit divisor
 * and round to closest integer.
 *
 * Return: dividend / divisor rounded to nearest integer
 */
#define DIV64_U64_ROUND_CLOSEST(dividend, divisor)        \
        ({ u64 _tmp = (divisor); div64_u64((dividend) + _tmp / 2, _tmp); })

/**
 * DIV_U64_ROUND_CLOSEST - unsigned 64bit divide with 32bit divisor rounded to nearest integer
 * @dividend: unsigned 64bit dividend
 * @divisor: unsigned 32bit divisor
 *
 * Divide unsigned 64bit dividend by unsigned 32bit divisor
 * and round to closest integer.
 *
 * Return: dividend / divisor rounded to nearest integer
 */
#define DIV_U64_ROUND_CLOSEST(dividend, divisor)        \
        ({ u32 _tmp = (divisor); div_u64((u64)(dividend) + _tmp / 2, _tmp); })

/**
 * DIV_S64_ROUND_CLOSEST - signed 64bit divide with 32bit divisor rounded to nearest integer
 * @dividend: signed 64bit dividend
 * @divisor: signed 32bit divisor
 *
 * Divide signed 64bit dividend by signed 32bit divisor
 * and round to closest integer.
 *
 * Return: dividend / divisor rounded to nearest integer
 */
#define DIV_S64_ROUND_CLOSEST(dividend, divisor)(        \
{                                                        \
        s64 __x = (dividend);                                \
        s32 __d = (divisor);                                \
        ((__x > 0) == (__d > 0)) ?                        \
                div_s64((__x + (__d / 2)), __d) :        \
                div_s64((__x - (__d / 2)), __d);        \
}                                                        \
)
#endif /* _LINUX_MATH64_H */






































































































































   91 





































































    2 







    2 






    2 









    2 
























  298 






  298 





  298 





  297 








































































































  300 
  240 








  298 
  298 





























    2 



















  241 









  241 


  241 
  240 






































  241 
































































































































































































































    2 




















    2 








    2 

    1 
    2 








    2 




    2 
    2 















    2 





























































  297 


  298 

  298 

  298 

  298 






























    3 





    3 
    3 
    3 

    3 

































   33 




   33 
   33 





























































































  293 























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 
    2 












    2 




    2 


















    2 
















    2 





















































    2 



    2 



    2 
    2 


    2 
    2 






    2 
    2 


















    2 





    2 



    2 



    2 









    2 




    2 
    2 

    2 
    2 



    2 





    2 

    2 















    2 










    2 





























   92 

   91 





















































  192 


  192 










  191 


  192 

  192 







































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
// SPDX-License-Identifier: GPL-2.0
/*
 *  Kernel timekeeping code and accessor functions. Based on code from
 *  timer.c, moved in commit 8524070b7982.
 */
#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/clock.h>
#include <linux/syscore_ops.h>
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/tick.h>
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
#include <linux/audit.h>
#include <linux/random.h>

#include "tick-internal.h"
#include "ntp_internal.h"
#include "timekeeping_internal.h"

#define TK_CLEAR_NTP                (1 << 0)
#define TK_MIRROR                (1 << 1)
#define TK_CLOCK_WAS_SET        (1 << 2)

enum timekeeping_adv_mode {
        /* Update timekeeper when a tick has passed */
        TK_ADV_TICK,

        /* Update timekeeper on a direct frequency change */
        TK_ADV_FREQ
};

DEFINE_RAW_SPINLOCK(timekeeper_lock);

/*
 * The most important data for readout fits into a single 64 byte
 * cache line.
 */
static struct {
        seqcount_raw_spinlock_t        seq;
        struct timekeeper        timekeeper;
} tk_core ____cacheline_aligned = {
        .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
};

static struct timekeeper shadow_timekeeper;

/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;

/**
 * struct tk_fast - NMI safe timekeeper
 * @seq:        Sequence counter for protecting updates. The lowest bit
 *                is the index for the tk_read_base array
 * @base:        tk_read_base array. Access is indexed by the lowest bit of
 *                @seq.
 *
 * See @update_fast_timekeeper() below.
 */
struct tk_fast {
        seqcount_latch_t        seq;
        struct tk_read_base        base[2];
};

/* Suspend-time cycles value for halted fast timekeeper. */
static u64 cycles_at_suspend;

static u64 dummy_clock_read(struct clocksource *cs)
{
        if (timekeeping_suspended)
                return cycles_at_suspend;
        return local_clock();
}

static struct clocksource dummy_clock = {
        .read = dummy_clock_read,
};

/*
 * Boot time initialization which allows local_clock() to be utilized
 * during early boot when clocksources are not available. local_clock()
 * returns nanoseconds already so no conversion is required, hence mult=1
 * and shift=0. When the first proper clocksource is installed then
 * the fast time keepers are updated with the correct values.
 */
#define FAST_TK_INIT                                                \
        {                                                        \
                .clock                = &dummy_clock,                        \
                .mask                = CLOCKSOURCE_MASK(64),                \
                .mult                = 1,                                \
                .shift                = 0,                                \
        }

static struct tk_fast tk_fast_mono ____cacheline_aligned = {
        .seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
        .base[0] = FAST_TK_INIT,
        .base[1] = FAST_TK_INIT,
};

static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
        .seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
        .base[0] = FAST_TK_INIT,
        .base[1] = FAST_TK_INIT,
};

static inline void tk_normalize_xtime(struct timekeeper *tk)
{
        while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
                tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
                tk->xtime_sec++;
        }
        while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
                tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
                tk->raw_sec++;
        }
}

static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
{
        struct timespec64 ts;

        ts.tv_sec = tk->xtime_sec;
        ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        return ts;
}

static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
{
        tk->xtime_sec = ts->tv_sec;
        tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
}

static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
{
        tk->xtime_sec += ts->tv_sec;
        tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
        tk_normalize_xtime(tk);
}

static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
{
        struct timespec64 tmp;

        /*
         * Verify consistency of: offset_real = -wall_to_monotonic
         * before modifying anything
         */
        set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
                                        -tk->wall_to_monotonic.tv_nsec);
        WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
        tk->wall_to_monotonic = wtm;
        set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
        tk->offs_real = timespec64_to_ktime(tmp);
        tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
}

static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
        /*
         * Timespec representation for VDSO update to avoid 64bit division
         * on every update.
         */
        tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}

/*
 * tk_clock_read - atomic clocksource read() helper
 *
 * This helper is necessary to use in the read paths because, while the
 * seqcount ensures we don't return a bad value while structures are updated,
 * it doesn't protect from potential crashes. There is the possibility that
 * the tkr's clocksource may change between the read reference, and the
 * clock reference passed to the read function.  This can cause crashes if
 * the wrong clocksource is passed to the wrong read function.
 * This isn't necessary to use when holding the timekeeper_lock or doing
 * a read of the fast-timekeeper tkrs (which is protected by its own locking
 * and update logic).
 */
static inline u64 tk_clock_read(const struct tk_read_base *tkr)
{
        struct clocksource *clock = READ_ONCE(tkr->clock);

        return clock->read(clock);
}

#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */

static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{

        u64 max_cycles = tk->tkr_mono.clock->max_cycles;
        const char *name = tk->tkr_mono.clock->name;

        if (offset > max_cycles) {
                printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
                                offset, name, max_cycles);
                printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
        } else {
                if (offset > (max_cycles >> 1)) {
                        printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
                                        offset, name, max_cycles >> 1);
                        printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
                }
        }

        if (tk->underflow_seen) {
                if (jiffies - tk->last_warning > WARNING_FREQ) {
                        printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
                        printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
                        printk_deferred("         Your kernel is probably still fine.\n");
                        tk->last_warning = jiffies;
                }
                tk->underflow_seen = 0;
        }

        if (tk->overflow_seen) {
                if (jiffies - tk->last_warning > WARNING_FREQ) {
                        printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
                        printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
                        printk_deferred("         Your kernel is probably still fine.\n");
                        tk->last_warning = jiffies;
                }
                tk->overflow_seen = 0;
        }
}

static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        u64 now, last, mask, max, delta;
        unsigned int seq;

        /*
         * Since we're called holding a seqcount, the data may shift
         * under us while we're doing the calculation. This can cause
         * false positives, since we'd note a problem but throw the
         * results away. So nest another seqcount here to atomically
         * grab the points we are checking with.
         */
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                now = tk_clock_read(tkr);
                last = tkr->cycle_last;
                mask = tkr->mask;
                max = tkr->clock->max_cycles;
        } while (read_seqcount_retry(&tk_core.seq, seq));

        delta = clocksource_delta(now, last, mask);

        /*
         * Try to catch underflows by checking if we are seeing small
         * mask-relative negative values.
         */
        if (unlikely((~delta & mask) < (mask >> 3))) {
                tk->underflow_seen = 1;
                delta = 0;
        }

        /* Cap delta value to the max_cycles values to avoid mult overflows */
        if (unlikely(delta > max)) {
                tk->overflow_seen = 1;
                delta = tkr->clock->max_cycles;
        }

        return delta;
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
{
        u64 cycle_now, delta;

        /* read clocksource */
        cycle_now = tk_clock_read(tkr);

        /* calculate the delta since the last update_wall_time */
        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);

        return delta;
}
#endif

/**
 * tk_setup_internals - Set up internals to use clocksource clock.
 *
 * @tk:                The target timekeeper to setup.
 * @clock:                Pointer to clocksource.
 *
 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
 * pair and interval request.
 *
 * Unless you're the timekeeping code, you should not be using this!
 */
static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
{
        u64 interval;
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;

        ++tk->cs_was_changed_seq;
        old_clock = tk->tkr_mono.clock;
        tk->tkr_mono.clock = clock;
        tk->tkr_mono.mask = clock->mask;
        tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);

        tk->tkr_raw.clock = clock;
        tk->tkr_raw.mask = clock->mask;
        tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;

        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
        tmp <<= clock->shift;
        ntpinterval = tmp;
        tmp += clock->mult/2;
        do_div(tmp, clock->mult);
        if (tmp == 0)
                tmp = 1;

        interval = (u64) tmp;
        tk->cycle_interval = interval;

        /* Go back from cycles -> shifted ns */
        tk->xtime_interval = interval * clock->mult;
        tk->xtime_remainder = ntpinterval - tk->xtime_interval;
        tk->raw_interval = interval * clock->mult;

         /* if changing clocks, convert xtime_nsec shift units */
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0) {
                        tk->tkr_mono.xtime_nsec >>= -shift_change;
                        tk->tkr_raw.xtime_nsec >>= -shift_change;
                } else {
                        tk->tkr_mono.xtime_nsec <<= shift_change;
                        tk->tkr_raw.xtime_nsec <<= shift_change;
                }
        }

        tk->tkr_mono.shift = clock->shift;
        tk->tkr_raw.shift = clock->shift;

        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
        tk->ntp_tick = ntpinterval << tk->ntp_error_shift;

        /*
         * The timekeeper keeps its own mult values for the currently
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
        tk->tkr_mono.mult = clock->mult;
        tk->tkr_raw.mult = clock->mult;
        tk->ntp_err_mult = 0;
        tk->skip_second_overflow = 0;
}

/* Timekeeper helper functions. */

static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
{
        u64 nsec;

        nsec = delta * tkr->mult + tkr->xtime_nsec;
        nsec >>= tkr->shift;

        return nsec;
}

static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
        u64 delta;

        delta = timekeeping_get_delta(tkr);
        return timekeeping_delta_to_ns(tkr, delta);
}

static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
        u64 delta;

        /* calculate the delta since the last update_wall_time */
        delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
        return timekeeping_delta_to_ns(tkr, delta);
}

/**
 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
 * @tkr: Timekeeping readout base from which we take the update
 * @tkf: Pointer to NMI safe timekeeper
 *
 * We want to use this from any context including NMI and tracing /
 * instrumenting the timekeeping code itself.
 *
 * Employ the latch technique; see @raw_write_seqcount_latch.
 *
 * So if a NMI hits the update of base[0] then it will use base[1]
 * which is still consistent. In the worst case this can result is a
 * slightly wrong timestamp (a few nanoseconds). See
 * @ktime_get_mono_fast_ns.
 */
static void update_fast_timekeeper(const struct tk_read_base *tkr,
                                   struct tk_fast *tkf)
{
        struct tk_read_base *base = tkf->base;

        /* Force readers off to base[1] */
        raw_write_seqcount_latch(&tkf->seq);

        /* Update base[0] */
        memcpy(base, tkr, sizeof(*base));

        /* Force readers back to base[0] */
        raw_write_seqcount_latch(&tkf->seq);

        /* Update base[1] */
        memcpy(base + 1, base, sizeof(*base));
}

static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
{
        u64 delta, cycles = tk_clock_read(tkr);

        delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
        return timekeeping_delta_to_ns(tkr, delta);
}

static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
        struct tk_read_base *tkr;
        unsigned int seq;
        u64 now;

        do {
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                now = ktime_to_ns(tkr->base);
                now += fast_tk_get_delta_ns(tkr);
        } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));

        return now;
}

/**
 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
 *
 * This timestamp is not guaranteed to be monotonic across an update.
 * The timestamp is calculated by:
 *
 *        now = base_mono + clock_delta * slope
 *
 * So if the update lowers the slope, readers who are forced to the
 * not yet updated second array are still using the old steeper slope.
 *
 * tmono
 * ^
 * |    o  n
 * |   o n
 * |  u
 * | o
 * |o
 * |12345678---> reader order
 *
 * o = old slope
 * u = update
 * n = new slope
 *
 * So reader 6 will observe time going backwards versus reader 5.
 *
 * While other CPUs are likely to be able to observe that, the only way
 * for a CPU local observation is when an NMI hits in the middle of
 * the update. Timestamps taken from that NMI context might be ahead
 * of the following timestamps. Callers need to be aware of that and
 * deal with it.
 */
u64 notrace ktime_get_mono_fast_ns(void)
{
        return __ktime_get_fast_ns(&tk_fast_mono);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);

/**
 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw
 *
 * Contrary to ktime_get_mono_fast_ns() this is always correct because the
 * conversion factor is not affected by NTP/PTP correction.
 */
u64 notrace ktime_get_raw_fast_ns(void)
{
        return __ktime_get_fast_ns(&tk_fast_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);

/**
 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
 *
 * To keep it NMI safe since we're accessing from tracing, we're not using a
 * separate timekeeper with updates to monotonic clock and boot offset
 * protected with seqcounts. This has the following minor side effects:
 *
 * (1) Its possible that a timestamp be taken after the boot offset is updated
 * but before the timekeeper is updated. If this happens, the new boot offset
 * is added to the old timekeeping making the clock appear to update slightly
 * earlier:
 *    CPU 0                                        CPU 1
 *    timekeeping_inject_sleeptime64()
 *    __timekeeping_inject_sleeptime(tk, delta);
 *                                                 timestamp();
 *    timekeeping_update(tk, TK_CLEAR_NTP...);
 *
 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
 * partially updated.  Since the tk->offs_boot update is a rare event, this
 * should be a rare occurrence which postprocessing should be able to handle.
 *
 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns()
 * apply as well.
 */
u64 notrace ktime_get_boot_fast_ns(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);

/**
 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
 *
 * The same limitations as described for ktime_get_boot_fast_ns() apply. The
 * mono time and the TAI offset are not read atomically which may yield wrong
 * readouts. However, an update of the TAI offset is an rare event e.g., caused
 * by settime or adjtimex with an offset. The user of this function has to deal
 * with the possibility of wrong timestamps in post processing.
 */
u64 notrace ktime_get_tai_fast_ns(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
}
EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);

static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{
        struct tk_read_base *tkr;
        u64 basem, baser, delta;
        unsigned int seq;

        do {
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
                basem = ktime_to_ns(tkr->base);
                baser = ktime_to_ns(tkr->base_real);
                delta = fast_tk_get_delta_ns(tkr);
        } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));

        if (mono)
                *mono = basem + delta;
        return baser + delta;
}

/**
 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
 *
 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
 */
u64 ktime_get_real_fast_ns(void)
{
        return __ktime_get_real_fast(&tk_fast_mono, NULL);
}
EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);

/**
 * ktime_get_fast_timestamps: - NMI safe timestamps
 * @snapshot:        Pointer to timestamp storage
 *
 * Stores clock monotonic, boottime and realtime timestamps.
 *
 * Boot time is a racy access on 32bit systems if the sleep time injection
 * happens late during resume and not in timekeeping_resume(). That could
 * be avoided by expanding struct tk_read_base with boot offset for 32bit
 * and adding more overhead to the update. As this is a hard to observe
 * once per resume event which can be filtered with reasonable effort using
 * the accurate mono/real timestamps, it's probably not worth the trouble.
 *
 * Aside of that it might be possible on 32 and 64 bit to observe the
 * following when the sleep time injection happens late:
 *
 * CPU 0                                CPU 1
 * timekeeping_resume()
 * ktime_get_fast_timestamps()
 *        mono, real = __ktime_get_real_fast()
 *                                        inject_sleep_time()
 *                                           update boot offset
 *        boot = mono + bootoffset;
 *
 * That means that boot time already has the sleep time adjustment, but
 * real time does not. On the next readout both are in sync again.
 *
 * Preventing this for 64bit is not really feasible without destroying the
 * careful cache layout of the timekeeper because the sequence count and
 * struct tk_read_base would then need two cache lines instead of one.
 *
 * Access to the time keeper clock source is disabled across the innermost
 * steps of suspend/resume. The accessors still work, but the timestamps
 * are frozen until time keeping is resumed which happens very early.
 *
 * For regular suspend/resume there is no observable difference vs. sched
 * clock, but it might affect some of the nasty low level debug printks.
 *
 * OTOH, access to sched clock is not guaranteed across suspend/resume on
 * all systems either so it depends on the hardware in use.
 *
 * If that turns out to be a real problem then this could be mitigated by
 * using sched clock in a similar way as during early boot. But it's not as
 * trivial as on early boot because it needs some careful protection
 * against the clock monotonic timestamp jumping backwards on resume.
 */
void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
        snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
}

/**
 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
 * @tk: Timekeeper to snapshot.
 *
 * It generally is unsafe to access the clocksource after timekeeping has been
 * suspended, so take a snapshot of the readout base of @tk and use it as the
 * fast timekeeper's readout base while suspended.  It will return the same
 * number of cycles every time until timekeeping is resumed at which time the
 * proper readout base for the fast timekeeper will be restored automatically.
 */
static void halt_fast_timekeeper(const struct timekeeper *tk)
{
        static struct tk_read_base tkr_dummy;
        const struct tk_read_base *tkr = &tk->tkr_mono;

        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
        cycles_at_suspend = tk_clock_read(tkr);
        tkr_dummy.clock = &dummy_clock;
        tkr_dummy.base_real = tkr->base + tk->offs_real;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);

        tkr = &tk->tkr_raw;
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
        tkr_dummy.clock = &dummy_clock;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}

static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);

static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
{
        raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
}

/**
 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
 * @nb: Pointer to the notifier block to register
 */
int pvclock_gtod_register_notifier(struct notifier_block *nb)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
        int ret;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
        update_pvclock_gtod(tk, true);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        return ret;
}
EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);

/**
 * pvclock_gtod_unregister_notifier - unregister a pvclock
 * timedata update listener
 * @nb: Pointer to the notifier block to unregister
 */
int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
{
        unsigned long flags;
        int ret;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        return ret;
}
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);

/*
 * tk_update_leap_state - helper to update the next_leap_ktime
 */
static inline void tk_update_leap_state(struct timekeeper *tk)
{
        tk->next_leap_ktime = ntp_get_next_leap();
        if (tk->next_leap_ktime != KTIME_MAX)
                /* Convert to monotonic time */
                tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
}

/*
 * Update the ktime_t based scalar nsec members of the timekeeper
 */
static inline void tk_update_ktime_data(struct timekeeper *tk)
{
        u64 seconds;
        u32 nsec;

        /*
         * The xtime based monotonic readout is:
         *        nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
         * The ktime based monotonic readout is:
         *        nsec = base_mono + now();
         * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
         */
        seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
        nsec = (u32) tk->wall_to_monotonic.tv_nsec;
        tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

        /*
         * The sum of the nanoseconds portions of xtime and
         * wall_to_monotonic can be greater/equal one second. Take
         * this into account before updating tk->ktime_sec.
         */
        nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        if (nsec >= NSEC_PER_SEC)
                seconds++;
        tk->ktime_sec = seconds;

        /* Update the monotonic raw base */
        tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}

/* must hold timekeeper_lock */
static void timekeeping_update(struct timekeeper *tk, unsigned int action)
{
        if (action & TK_CLEAR_NTP) {
                tk->ntp_error = 0;
                ntp_clear();
        }

        tk_update_leap_state(tk);
        tk_update_ktime_data(tk);

        update_vsyscall(tk);
        update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);

        tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
        update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
        update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);

        if (action & TK_CLOCK_WAS_SET)
                tk->clock_was_set_seq++;
        /*
         * The mirroring of the data to the shadow-timekeeper needs
         * to happen last here to ensure we don't over-write the
         * timekeeper structure on the next update with stale data
         */
        if (action & TK_MIRROR)
                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                       sizeof(tk_core.timekeeper));
}

/**
 * timekeeping_forward_now - update clock to the current time
 * @tk:                Pointer to the timekeeper to update
 *
 * Forward the current clock to update its state since the last call to
 * update_wall_time(). This is useful before significant clock changes,
 * as it avoids having to deal with this time offset explicitly.
 */
static void timekeeping_forward_now(struct timekeeper *tk)
{
        u64 cycle_now, delta;

        cycle_now = tk_clock_read(&tk->tkr_mono);
        delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;

        tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
        tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;

        tk_normalize_xtime(tk);
}

/**
 * ktime_get_real_ts64 - Returns the time of day in a timespec64.
 * @ts:                pointer to the timespec to be set
 *
 * Returns the time of day in a timespec64 (WARN if suspended).
 */
void ktime_get_real_ts64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        u64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                ts->tv_sec = tk->xtime_sec;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        ts->tv_nsec = 0;
        timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(ktime_get_real_ts64);

ktime_t ktime_get(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base;
        u64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);

u32 ktime_get_resolution_ns(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        u32 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
        } while (read_seqcount_retry(&tk_core.seq, seq));

        return nsecs;
}
EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);

static ktime_t *offsets[TK_OFFS_MAX] = {
        [TK_OFFS_REAL]        = &tk_core.timekeeper.offs_real,
        [TK_OFFS_BOOT]        = &tk_core.timekeeper.offs_boot,
        [TK_OFFS_TAI]        = &tk_core.timekeeper.offs_tai,
};

ktime_t ktime_get_with_offset(enum tk_offsets offs)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, *offset = offsets[offs];
        u64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = ktime_add(tk->tkr_mono.base, *offset);
                nsecs = timekeeping_get_ns(&tk->tkr_mono);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns(base, nsecs);

}
EXPORT_SYMBOL_GPL(ktime_get_with_offset);

ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base, *offset = offsets[offs];
        u64 nsecs;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = ktime_add(tk->tkr_mono.base, *offset);
                nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);

/**
 * ktime_mono_to_any() - convert monotonic time to any other time
 * @tmono:        time to convert.
 * @offs:        which offset to use
 */
ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
{
        ktime_t *offset = offsets[offs];
        unsigned int seq;
        ktime_t tconv;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                tconv = ktime_add(tmono, *offset);
        } while (read_seqcount_retry(&tk_core.seq, seq));

        return tconv;
}
EXPORT_SYMBOL_GPL(ktime_mono_to_any);

/**
 * ktime_get_raw - Returns the raw monotonic time in ktime_t format
 */
ktime_t ktime_get_raw(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base;
        u64 nsecs;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = tk->tkr_raw.base;
                nsecs = timekeeping_get_ns(&tk->tkr_raw);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_raw);

/**
 * ktime_get_ts64 - get the monotonic clock in timespec64 format
 * @ts:                pointer to timespec variable
 *
 * The function calculates the monotonic clock from the realtime
 * clock and the wall_to_monotonic offset and stores the result
 * in normalized timespec64 format in the variable pointed to by @ts.
 */
void ktime_get_ts64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct timespec64 tomono;
        unsigned int seq;
        u64 nsec;

        WARN_ON(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
                nsec = timekeeping_get_ns(&tk->tkr_mono);
                tomono = tk->wall_to_monotonic;

        } while (read_seqcount_retry(&tk_core.seq, seq));

        ts->tv_sec += tomono.tv_sec;
        ts->tv_nsec = 0;
        timespec64_add_ns(ts, nsec + tomono.tv_nsec);
}
EXPORT_SYMBOL_GPL(ktime_get_ts64);

/**
 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
 *
 * Returns the seconds portion of CLOCK_MONOTONIC with a single non
 * serialized read. tk->ktime_sec is of type 'unsigned long' so this
 * works on both 32 and 64 bit systems. On 32 bit systems the readout
 * covers ~136 years of uptime which should be enough to prevent
 * premature wrap arounds.
 */
time64_t ktime_get_seconds(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        WARN_ON(timekeeping_suspended);
        return tk->ktime_sec;
}
EXPORT_SYMBOL_GPL(ktime_get_seconds);

/**
 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
 *
 * Returns the wall clock seconds since 1970.
 *
 * For 64bit systems the fast access to tk->xtime_sec is preserved. On
 * 32bit systems the access must be protected with the sequence
 * counter to provide "atomic" access to the 64bit tk->xtime_sec
 * value.
 */
time64_t ktime_get_real_seconds(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        time64_t seconds;
        unsigned int seq;

        if (IS_ENABLED(CONFIG_64BIT))
                return tk->xtime_sec;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                seconds = tk->xtime_sec;

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return seconds;
}
EXPORT_SYMBOL_GPL(ktime_get_real_seconds);

/**
 * __ktime_get_real_seconds - The same as ktime_get_real_seconds
 * but without the sequence counter protect. This internal function
 * is called just when timekeeping lock is already held.
 */
noinstr time64_t __ktime_get_real_seconds(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        return tk->xtime_sec;
}

/**
 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
 * @systime_snapshot:        pointer to struct receiving the system time snapshot
 */
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base_raw;
        ktime_t base_real;
        u64 nsec_raw;
        u64 nsec_real;
        u64 now;

        WARN_ON_ONCE(timekeeping_suspended);

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                now = tk_clock_read(&tk->tkr_mono);
                systime_snapshot->cs_id = tk->tkr_mono.clock->id;
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                base_real = ktime_add(tk->tkr_mono.base,
                                      tk_core.timekeeper.offs_real);
                base_raw = tk->tkr_raw.base;
                nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
                nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
        } while (read_seqcount_retry(&tk_core.seq, seq));

        systime_snapshot->cycles = now;
        systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
        systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_snapshot);

/* Scale base by mult/div checking for overflow */
static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
{
        u64 tmp, rem;

        tmp = div64_u64_rem(*base, div, &rem);

        if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
            ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
                return -EOVERFLOW;
        tmp *= mult;

        rem = div64_u64(rem * mult, div);
        *base = tmp + rem;
        return 0;
}

/**
 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
 * @history:                        Snapshot representing start of history
 * @partial_history_cycles:        Cycle offset into history (fractional part)
 * @total_history_cycles:        Total history length in cycles
 * @discontinuity:                True indicates clock was set on history period
 * @ts:                                Cross timestamp that should be adjusted using
 *        partial/total ratio
 *
 * Helper function used by get_device_system_crosststamp() to correct the
 * crosstimestamp corresponding to the start of the current interval to the
 * system counter value (timestamp point) provided by the driver. The
 * total_history_* quantities are the total history starting at the provided
 * reference point and ending at the start of the current interval. The cycle
 * count between the driver timestamp point and the start of the current
 * interval is partial_history_cycles.
 */
static int adjust_historical_crosststamp(struct system_time_snapshot *history,
                                         u64 partial_history_cycles,
                                         u64 total_history_cycles,
                                         bool discontinuity,
                                         struct system_device_crosststamp *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        u64 corr_raw, corr_real;
        bool interp_forward;
        int ret;

        if (total_history_cycles == 0 || partial_history_cycles == 0)
                return 0;

        /* Interpolate shortest distance from beginning or end of history */
        interp_forward = partial_history_cycles > total_history_cycles / 2;
        partial_history_cycles = interp_forward ?
                total_history_cycles - partial_history_cycles :
                partial_history_cycles;

        /*
         * Scale the monotonic raw time delta by:
         *        partial_history_cycles / total_history_cycles
         */
        corr_raw = (u64)ktime_to_ns(
                ktime_sub(ts->sys_monoraw, history->raw));
        ret = scale64_check_overflow(partial_history_cycles,
                                     total_history_cycles, &corr_raw);
        if (ret)
                return ret;

        /*
         * If there is a discontinuity in the history, scale monotonic raw
         *        correction by:
         *        mult(real)/mult(raw) yielding the realtime correction
         * Otherwise, calculate the realtime correction similar to monotonic
         *        raw calculation
         */
        if (discontinuity) {
                corr_real = mul_u64_u32_div
                        (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
        } else {
                corr_real = (u64)ktime_to_ns(
                        ktime_sub(ts->sys_realtime, history->real));
                ret = scale64_check_overflow(partial_history_cycles,
                                             total_history_cycles, &corr_real);
                if (ret)
                        return ret;
        }

        /* Fixup monotonic raw and real time time values */
        if (interp_forward) {
                ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
                ts->sys_realtime = ktime_add_ns(history->real, corr_real);
        } else {
                ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
                ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
        }

        return 0;
}

/*
 * timestamp_in_interval - true if ts is chronologically in [start, end]
 *
 * True if ts occurs chronologically at or after start, and before or at end.
 */
static bool timestamp_in_interval(u64 start, u64 end, u64 ts)
{
        if (ts >= start && ts <= end)
                return true;
        if (start > end && (ts >= start || ts <= end))
                return true;
        return false;
}

/**
 * get_device_system_crosststamp - Synchronously capture system/device timestamp
 * @get_time_fn:        Callback to get simultaneous device time and
 *        system counter from the device driver
 * @ctx:                Context passed to get_time_fn()
 * @history_begin:        Historical reference point used to interpolate system
 *        time when counter provided by the driver is before the current interval
 * @xtstamp:                Receives simultaneously captured system and device time
 *
 * Reads a timestamp from a device and correlates it to system time
 */
int get_device_system_crosststamp(int (*get_time_fn)
                                  (ktime_t *device_time,
                                   struct system_counterval_t *sys_counterval,
                                   void *ctx),
                                  void *ctx,
                                  struct system_time_snapshot *history_begin,
                                  struct system_device_crosststamp *xtstamp)
{
        struct system_counterval_t system_counterval;
        struct timekeeper *tk = &tk_core.timekeeper;
        u64 cycles, now, interval_start;
        unsigned int clock_was_set_seq = 0;
        ktime_t base_real, base_raw;
        u64 nsec_real, nsec_raw;
        u8 cs_was_changed_seq;
        unsigned int seq;
        bool do_interp;
        int ret;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                /*
                 * Try to synchronously capture device time and a system
                 * counter value calling back into the device driver
                 */
                ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
                if (ret)
                        return ret;

                /*
                 * Verify that the clocksource ID associated with the captured
                 * system counter value is the same as for the currently
                 * installed timekeeper clocksource
                 */
                if (system_counterval.cs_id == CSID_GENERIC ||
                    tk->tkr_mono.clock->id != system_counterval.cs_id)
                        return -ENODEV;
                cycles = system_counterval.cycles;

                /*
                 * Check whether the system counter value provided by the
                 * device driver is on the current timekeeping interval.
                 */
                now = tk_clock_read(&tk->tkr_mono);
                interval_start = tk->tkr_mono.cycle_last;
                if (!timestamp_in_interval(interval_start, now, cycles)) {
                        clock_was_set_seq = tk->clock_was_set_seq;
                        cs_was_changed_seq = tk->cs_was_changed_seq;
                        cycles = interval_start;
                        do_interp = true;
                } else {
                        do_interp = false;
                }

                base_real = ktime_add(tk->tkr_mono.base,
                                      tk_core.timekeeper.offs_real);
                base_raw = tk->tkr_raw.base;

                nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles);
                nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles);
        } while (read_seqcount_retry(&tk_core.seq, seq));

        xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
        xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);

        /*
         * Interpolate if necessary, adjusting back from the start of the
         * current interval
         */
        if (do_interp) {
                u64 partial_history_cycles, total_history_cycles;
                bool discontinuity;

                /*
                 * Check that the counter value is not before the provided
                 * history reference and that the history doesn't cross a
                 * clocksource change
                 */
                if (!history_begin ||
                    !timestamp_in_interval(history_begin->cycles,
                                           cycles, system_counterval.cycles) ||
                    history_begin->cs_was_changed_seq != cs_was_changed_seq)
                        return -EINVAL;
                partial_history_cycles = cycles - system_counterval.cycles;
                total_history_cycles = cycles - history_begin->cycles;
                discontinuity =
                        history_begin->clock_was_set_seq != clock_was_set_seq;

                ret = adjust_historical_crosststamp(history_begin,
                                                    partial_history_cycles,
                                                    total_history_cycles,
                                                    discontinuity, xtstamp);
                if (ret)
                        return ret;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(get_device_system_crosststamp);

/**
 * do_settimeofday64 - Sets the time of day.
 * @ts:     pointer to the timespec64 variable containing the new time
 *
 * Sets the time of day to the new time and update NTP and notify hrtimers
 */
int do_settimeofday64(const struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct timespec64 ts_delta, xt;
        unsigned long flags;
        int ret = 0;

        if (!timespec64_valid_settod(ts))
                return -EINVAL;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        timekeeping_forward_now(tk);

        xt = tk_xtime(tk);
        ts_delta = timespec64_sub(*ts, xt);

        if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
                ret = -EINVAL;
                goto out;
        }

        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));

        tk_set_xtime(tk, ts);
out:
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL);

        if (!ret) {
                audit_tk_injoffset(ts_delta);
                add_device_randomness(ts, sizeof(*ts));
        }

        return ret;
}
EXPORT_SYMBOL(do_settimeofday64);

/**
 * timekeeping_inject_offset - Adds or subtracts from the current time.
 * @ts:                Pointer to the timespec variable containing the offset
 *
 * Adds or subtracts an offset value from the current time.
 */
static int timekeeping_inject_offset(const struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
        struct timespec64 tmp;
        int ret = 0;

        if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        timekeeping_forward_now(tk);

        /* Make sure the proposed value is valid */
        tmp = timespec64_add(tk_xtime(tk), *ts);
        if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
            !timespec64_valid_settod(&tmp)) {
                ret = -EINVAL;
                goto error;
        }

        tk_xtime_add(tk, ts);
        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));

error: /* even if we error out, we forwarded the time, so call update */
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL);

        return ret;
}

/*
 * Indicates if there is an offset between the system clock and the hardware
 * clock/persistent clock/rtc.
 */
int persistent_clock_is_local;

/*
 * Adjust the time obtained from the CMOS to be UTC time instead of
 * local time.
 *
 * This is ugly, but preferable to the alternatives.  Otherwise we
 * would either need to write a program to do it in /etc/rc (and risk
 * confusion if the program gets run more than once; it would also be
 * hard to make the program warp the clock precisely n hours)  or
 * compile in the timezone information into the kernel.  Bad, bad....
 *
 *                                                - TYT, 1992-01-01
 *
 * The best thing to do is to keep the CMOS clock in universal time (UTC)
 * as real UNIX machines always do it. This avoids all headaches about
 * daylight saving times and warping kernel clocks.
 */
void timekeeping_warp_clock(void)
{
        if (sys_tz.tz_minuteswest != 0) {
                struct timespec64 adjust;

                persistent_clock_is_local = 1;
                adjust.tv_sec = sys_tz.tz_minuteswest * 60;
                adjust.tv_nsec = 0;
                timekeeping_inject_offset(&adjust);
        }
}

/*
 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
 */
static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
{
        tk->tai_offset = tai_offset;
        tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
}

/*
 * change_clocksource - Swaps clocksources if a new one is available
 *
 * Accumulates current time interval and initializes new clocksource
 */
static int change_clocksource(void *data)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct clocksource *new, *old = NULL;
        unsigned long flags;
        bool change = false;

        new = (struct clocksource *) data;

        /*
         * If the cs is in module, get a module reference. Succeeds
         * for built-in code (owner == NULL) as well.
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0)
                        change = true;
                else
                        module_put(new->owner);
        }

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        timekeeping_forward_now(tk);

        if (change) {
                old = tk->tkr_mono.clock;
                tk_setup_internals(tk, new);
        }

        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        if (old) {
                if (old->disable)
                        old->disable(old);

                module_put(old->owner);
        }

        return 0;
}

/**
 * timekeeping_notify - Install a new clock source
 * @clock:                pointer to the clock source
 *
 * This function is called from clocksource.c after a new, better clock
 * source has been registered. The caller holds the clocksource_mutex.
 */
int timekeeping_notify(struct clocksource *clock)
{
        struct timekeeper *tk = &tk_core.timekeeper;

        if (tk->tkr_mono.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
        return tk->tkr_mono.clock == clock ? 0 : -1;
}

/**
 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
 * @ts:                pointer to the timespec64 to be set
 *
 * Returns the raw monotonic time (completely un-modified by ntp)
 */
void ktime_get_raw_ts64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        u64 nsecs;

        do {
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->raw_sec;
                nsecs = timekeeping_get_ns(&tk->tkr_raw);

        } while (read_seqcount_retry(&tk_core.seq, seq));

        ts->tv_nsec = 0;
        timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(ktime_get_raw_ts64);


/**
 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
 */
int timekeeping_valid_for_hres(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        int ret;

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ret;
}

/**
 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
 */
u64 timekeeping_max_deferment(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        u64 ret;

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                ret = tk->tkr_mono.clock->max_idle_ns;

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return ret;
}

/**
 * read_persistent_clock64 -  Return time from the persistent clock.
 * @ts: Pointer to the storage for the readout value
 *
 * Weak dummy function for arches that do not yet support it.
 * Reads the time from the battery backed persistent clock.
 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
 *
 *  XXX - Do be sure to remove it once all arches implement it.
 */
void __weak read_persistent_clock64(struct timespec64 *ts)
{
        ts->tv_sec = 0;
        ts->tv_nsec = 0;
}

/**
 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
 *                                        from the boot.
 * @wall_time:          current time as returned by persistent clock
 * @boot_offset:  offset that is defined as wall_time - boot_time
 *
 * Weak dummy function for arches that do not yet support it.
 *
 * The default function calculates offset based on the current value of
 * local_clock(). This way architectures that support sched_clock() but don't
 * support dedicated boot time clock will provide the best estimate of the
 * boot time.
 */
void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
                                     struct timespec64 *boot_offset)
{
        read_persistent_clock64(wall_time);
        *boot_offset = ns_to_timespec64(local_clock());
}

/*
 * Flag reflecting whether timekeeping_resume() has injected sleeptime.
 *
 * The flag starts of false and is only set when a suspend reaches
 * timekeeping_suspend(), timekeeping_resume() sets it to false when the
 * timekeeper clocksource is not stopping across suspend and has been
 * used to update sleep time. If the timekeeper clocksource has stopped
 * then the flag stays true and is used by the RTC resume code to decide
 * whether sleeptime must be injected and if so the flag gets false then.
 *
 * If a suspend fails before reaching timekeeping_resume() then the flag
 * stays false and prevents erroneous sleeptime injection.
 */
static bool suspend_timing_needed;

/* Flag for if there is a persistent clock on this platform */
static bool persistent_clock_exists;

/*
 * timekeeping_init - Initializes the clocksource and common timekeeping values
 */
void __init timekeeping_init(void)
{
        struct timespec64 wall_time, boot_offset, wall_to_mono;
        struct timekeeper *tk = &tk_core.timekeeper;
        struct clocksource *clock;
        unsigned long flags;

        read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
        if (timespec64_valid_settod(&wall_time) &&
            timespec64_to_ns(&wall_time) > 0) {
                persistent_clock_exists = true;
        } else if (timespec64_to_ns(&wall_time) != 0) {
                pr_warn("Persistent clock returned invalid value");
                wall_time = (struct timespec64){0};
        }

        if (timespec64_compare(&wall_time, &boot_offset) < 0)
                boot_offset = (struct timespec64){0};

        /*
         * We want set wall_to_mono, so the following is true:
         * wall time + wall_to_mono = boot time
         */
        wall_to_mono = timespec64_sub(boot_offset, wall_time);

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
        ntp_init();

        clock = clocksource_default_clock();
        if (clock->enable)
                clock->enable(clock);
        tk_setup_internals(tk, clock);

        tk_set_xtime(tk, &wall_time);
        tk->raw_sec = 0;

        tk_set_wall_to_mono(tk, wall_to_mono);

        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}

/* time in seconds when suspend began for persistent clock */
static struct timespec64 timekeeping_suspend_time;

/**
 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
 * @tk:                Pointer to the timekeeper to be updated
 * @delta:        Pointer to the delta value in timespec64 format
 *
 * Takes a timespec offset measuring a suspend interval and properly
 * adds the sleep offset to the timekeeping variables.
 */
static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
                                           const struct timespec64 *delta)
{
        if (!timespec64_valid_strict(delta)) {
                printk_deferred(KERN_WARNING
                                "__timekeeping_inject_sleeptime: Invalid "
                                "sleep delta value!\n");
                return;
        }
        tk_xtime_add(tk, delta);
        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
        tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
        tk_debug_account_sleep_time(delta);
}

#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
/*
 * We have three kinds of time sources to use for sleep time
 * injection, the preference order is:
 * 1) non-stop clocksource
 * 2) persistent clock (ie: RTC accessible when irqs are off)
 * 3) RTC
 *
 * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
 * If system has neither 1) nor 2), 3) will be used finally.
 *
 *
 * If timekeeping has injected sleeptime via either 1) or 2),
 * 3) becomes needless, so in this case we don't need to call
 * rtc_resume(), and this is what timekeeping_rtc_skipresume()
 * means.
 */
bool timekeeping_rtc_skipresume(void)
{
        return !suspend_timing_needed;
}

/*
 * 1) can be determined whether to use or not only when doing
 * timekeeping_resume() which is invoked after rtc_suspend(),
 * so we can't skip rtc_suspend() surely if system has 1).
 *
 * But if system has 2), 2) will definitely be used, so in this
 * case we don't need to call rtc_suspend(), and this is what
 * timekeeping_rtc_skipsuspend() means.
 */
bool timekeeping_rtc_skipsuspend(void)
{
        return persistent_clock_exists;
}

/**
 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
 * @delta: pointer to a timespec64 delta value
 *
 * This hook is for architectures that cannot support read_persistent_clock64
 * because their RTC/persistent clock is only accessible when irqs are enabled.
 * and also don't have an effective nonstop clocksource.
 *
 * This function should only be called by rtc_resume(), and allows
 * a suspend offset to be injected into the timekeeping values.
 */
void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        suspend_timing_needed = false;

        timekeeping_forward_now(tk);

        __timekeeping_inject_sleeptime(tk, delta);

        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        /* Signal hrtimers about time change */
        clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
}
#endif

/**
 * timekeeping_resume - Resumes the generic timekeeping subsystem.
 */
void timekeeping_resume(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct clocksource *clock = tk->tkr_mono.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
        u64 cycle_now, nsec;
        bool inject_sleeptime = false;

        read_persistent_clock64(&ts_new);

        clockevents_resume();
        clocksource_resume();

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        /*
         * After system resumes, we need to calculate the suspended time and
         * compensate it for the OS time. There are 3 sources that could be
         * used: Nonstop clocksource during suspend, persistent clock and rtc
         * device.
         *
         * One specific platform may have 1 or 2 or all of them, and the
         * preference will be:
         *        suspend-nonstop clocksource -> persistent clock -> rtc
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
        cycle_now = tk_clock_read(&tk->tkr_mono);
        nsec = clocksource_stop_suspend_timing(clock, cycle_now);
        if (nsec > 0) {
                ts_delta = ns_to_timespec64(nsec);
                inject_sleeptime = true;
        } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
                ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
                inject_sleeptime = true;
        }

        if (inject_sleeptime) {
                suspend_timing_needed = false;
                __timekeeping_inject_sleeptime(tk, &ts_delta);
        }

        /* Re-base the last cycle value */
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;

        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        touch_softlockup_watchdog();

        /* Resume the clockevent device(s) and hrtimers */
        tick_resume();
        /* Notify timerfd as resume is equivalent to clock_was_set() */
        timerfd_resume();
}

int timekeeping_suspend(void)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
        struct timespec64                delta, delta_delta;
        static struct timespec64        old_delta;
        struct clocksource *curr_clock;
        u64 cycle_now;

        read_persistent_clock64(&timekeeping_suspend_time);

        /*
         * On some systems the persistent_clock can not be detected at
         * timekeeping_init by its return value, so if we see a valid
         * value returned, update the persistent_clock_exists flag.
         */
        if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
                persistent_clock_exists = true;

        suspend_timing_needed = true;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;

        /*
         * Since we've called forward_now, cycle_last stores the value
         * just read from the current clocksource. Save this to potentially
         * use in suspend timing.
         */
        curr_clock = tk->tkr_mono.clock;
        cycle_now = tk->tkr_mono.cycle_last;
        clocksource_start_suspend_timing(curr_clock, cycle_now);

        if (persistent_clock_exists) {
                /*
                 * To avoid drift caused by repeated suspend/resumes,
                 * which each can add ~1 second drift error,
                 * try to compensate so the difference in system time
                 * and persistent_clock time stays close to constant.
                 */
                delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
                delta_delta = timespec64_sub(delta, old_delta);
                if (abs(delta_delta.tv_sec) >= 2) {
                        /*
                         * if delta_delta is too large, assume time correction
                         * has occurred and set old_delta to the current delta.
                         */
                        old_delta = delta;
                } else {
                        /* Otherwise try to adjust old_system to compensate */
                        timekeeping_suspend_time =
                                timespec64_add(timekeeping_suspend_time, delta_delta);
                }
        }

        timekeeping_update(tk, TK_MIRROR);
        halt_fast_timekeeper(tk);
        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        tick_suspend();
        clocksource_suspend();
        clockevents_suspend();

        return 0;
}

/* sysfs resume/suspend bits for timekeeping */
static struct syscore_ops timekeeping_syscore_ops = {
        .resume                = timekeeping_resume,
        .suspend        = timekeeping_suspend,
};

static int __init timekeeping_init_ops(void)
{
        register_syscore_ops(&timekeeping_syscore_ops);
        return 0;
}
device_initcall(timekeeping_init_ops);

/*
 * Apply a multiplier adjustment to the timekeeper
 */
static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
                                                         s64 offset,
                                                         s32 mult_adj)
{
        s64 interval = tk->cycle_interval;

        if (mult_adj == 0) {
                return;
        } else if (mult_adj == -1) {
                interval = -interval;
                offset = -offset;
        } else if (mult_adj != 1) {
                interval *= mult_adj;
                offset *= mult_adj;
        }

        /*
         * So the following can be confusing.
         *
         * To keep things simple, lets assume mult_adj == 1 for now.
         *
         * When mult_adj != 1, remember that the interval and offset values
         * have been appropriately scaled so the math is the same.
         *
         * The basic idea here is that we're increasing the multiplier
         * by one, this causes the xtime_interval to be incremented by
         * one cycle_interval. This is because:
         *        xtime_interval = cycle_interval * mult
         * So if mult is being incremented by one:
         *        xtime_interval = cycle_interval * (mult + 1)
         * Its the same as:
         *        xtime_interval = (cycle_interval * mult) + cycle_interval
         * Which can be shortened to:
         *        xtime_interval += cycle_interval
         *
         * So offset stores the non-accumulated cycles. Thus the current
         * time (in shifted nanoseconds) is:
         *        now = (offset * adj) + xtime_nsec
         * Now, even though we're adjusting the clock frequency, we have
         * to keep time consistent. In other words, we can't jump back
         * in time, and we also want to avoid jumping forward in time.
         *
         * So given the same offset value, we need the time to be the same
         * both before and after the freq adjustment.
         *        now = (offset * adj_1) + xtime_nsec_1
         *        now = (offset * adj_2) + xtime_nsec_2
         * So:
         *        (offset * adj_1) + xtime_nsec_1 =
         *                (offset * adj_2) + xtime_nsec_2
         * And we know:
         *        adj_2 = adj_1 + 1
         * So:
         *        (offset * adj_1) + xtime_nsec_1 =
         *                (offset * (adj_1+1)) + xtime_nsec_2
         *        (offset * adj_1) + xtime_nsec_1 =
         *                (offset * adj_1) + offset + xtime_nsec_2
         * Canceling the sides:
         *        xtime_nsec_1 = offset + xtime_nsec_2
         * Which gives us:
         *        xtime_nsec_2 = xtime_nsec_1 - offset
         * Which simplifies to:
         *        xtime_nsec -= offset
         */
        if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
                /* NTP adjustment caused clocksource mult overflow */
                WARN_ON_ONCE(1);
                return;
        }

        tk->tkr_mono.mult += mult_adj;
        tk->xtime_interval += interval;
        tk->tkr_mono.xtime_nsec -= offset;
}

/*
 * Adjust the timekeeper's multiplier to the correct frequency
 * and also to reduce the accumulated error value.
 */
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
        u32 mult;

        /*
         * Determine the multiplier from the current NTP tick length.
         * Avoid expensive division when the tick length doesn't change.
         */
        if (likely(tk->ntp_tick == ntp_tick_length())) {
                mult = tk->tkr_mono.mult - tk->ntp_err_mult;
        } else {
                tk->ntp_tick = ntp_tick_length();
                mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
                                 tk->xtime_remainder, tk->cycle_interval);
        }

        /*
         * If the clock is behind the NTP time, increase the multiplier by 1
         * to catch up with it. If it's ahead and there was a remainder in the
         * tick division, the clock will slow down. Otherwise it will stay
         * ahead until the tick length changes to a non-divisible value.
         */
        tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
        mult += tk->ntp_err_mult;

        timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);

        if (unlikely(tk->tkr_mono.clock->maxadj &&
                (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
                        > tk->tkr_mono.clock->maxadj))) {
                printk_once(KERN_WARNING
                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
                        tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
                        (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
        }

        /*
         * It may be possible that when we entered this function, xtime_nsec
         * was very small.  Further, if we're slightly speeding the clocksource
         * in the code above, its possible the required corrective factor to
         * xtime_nsec could cause it to underflow.
         *
         * Now, since we have already accumulated the second and the NTP
         * subsystem has been notified via second_overflow(), we need to skip
         * the next update.
         */
        if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
                tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
                                                        tk->tkr_mono.shift;
                tk->xtime_sec--;
                tk->skip_second_overflow = 1;
        }
}

/*
 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
 *
 * Helper function that accumulates the nsecs greater than a second
 * from the xtime_nsec field to the xtime_secs field.
 * It also calls into the NTP code to handle leapsecond processing.
 */
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
        u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
        unsigned int clock_set = 0;

        while (tk->tkr_mono.xtime_nsec >= nsecps) {
                int leap;

                tk->tkr_mono.xtime_nsec -= nsecps;
                tk->xtime_sec++;

                /*
                 * Skip NTP update if this second was accumulated before,
                 * i.e. xtime_nsec underflowed in timekeeping_adjust()
                 */
                if (unlikely(tk->skip_second_overflow)) {
                        tk->skip_second_overflow = 0;
                        continue;
                }

                /* Figure out if its a leap sec and apply if needed */
                leap = second_overflow(tk->xtime_sec);
                if (unlikely(leap)) {
                        struct timespec64 ts;

                        tk->xtime_sec += leap;

                        ts.tv_sec = leap;
                        ts.tv_nsec = 0;
                        tk_set_wall_to_mono(tk,
                                timespec64_sub(tk->wall_to_monotonic, ts));

                        __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);

                        clock_set = TK_CLOCK_WAS_SET;
                }
        }
        return clock_set;
}

/*
 * logarithmic_accumulation - shifted accumulation of cycles
 *
 * This functions accumulates a shifted interval of cycles into
 * a shifted interval nanoseconds. Allows for O(log) accumulation
 * loop.
 *
 * Returns the unconsumed cycles.
 */
static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
                                    u32 shift, unsigned int *clock_set)
{
        u64 interval = tk->cycle_interval << shift;
        u64 snsec_per_sec;

        /* If the offset is smaller than a shifted interval, do nothing */
        if (offset < interval)
                return offset;

        /* Accumulate one shifted interval */
        offset -= interval;
        tk->tkr_mono.cycle_last += interval;
        tk->tkr_raw.cycle_last  += interval;

        tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);

        /* Accumulate raw time */
        tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
        snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
        while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
                tk->tkr_raw.xtime_nsec -= snsec_per_sec;
                tk->raw_sec++;
        }

        /* Accumulate error between NTP and clock interval */
        tk->ntp_error += tk->ntp_tick << shift;
        tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
                                                (tk->ntp_error_shift + shift);

        return offset;
}

/*
 * timekeeping_advance - Updates the timekeeper to the current time and
 * current NTP tick length
 */
static bool timekeeping_advance(enum timekeeping_adv_mode mode)
{
        struct timekeeper *real_tk = &tk_core.timekeeper;
        struct timekeeper *tk = &shadow_timekeeper;
        u64 offset;
        int shift = 0, maxshift;
        unsigned int clock_set = 0;
        unsigned long flags;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);

        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
                goto out;

        offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);

        /* Check if there's really nothing to do */
        if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
                goto out;

        /* Do some additional sanity checking */
        timekeeping_check_update(tk, offset);

        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
         * (think "ticks") worth of time at once. To do this efficiently,
         * we calculate the largest doubling multiple of cycle_intervals
         * that is smaller than the offset.  We then accumulate that
         * chunk in one go, and then try to consume the next smaller
         * doubled multiple.
         */
        shift = ilog2(offset) - ilog2(tk->cycle_interval);
        shift = max(0, shift);
        /* Bound shift to one less than what overflows tick_length */
        maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
        shift = min(shift, maxshift);
        while (offset >= tk->cycle_interval) {
                offset = logarithmic_accumulation(tk, offset, shift,
                                                        &clock_set);
                if (offset < tk->cycle_interval<<shift)
                        shift--;
        }

        /* Adjust the multiplier to correct NTP error */
        timekeeping_adjust(tk, offset);

        /*
         * Finally, make sure that after the rounding
         * xtime_nsec isn't larger than NSEC_PER_SEC
         */
        clock_set |= accumulate_nsecs_to_secs(tk);

        write_seqcount_begin(&tk_core.seq);
        /*
         * Update the real timekeeper.
         *
         * We could avoid this memcpy by switching pointers, but that
         * requires changes to all other timekeeper usage sites as
         * well, i.e. move the timekeeper pointer getter into the
         * spinlocked/seqcount protected sections. And we trade this
         * memcpy under the tk_core.seq against one before we start
         * updating.
         */
        timekeeping_update(tk, clock_set);
        memcpy(real_tk, tk, sizeof(*tk));
        /* The memcpy must come last. Do not put anything here! */
        write_seqcount_end(&tk_core.seq);
out:
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        return !!clock_set;
}

/**
 * update_wall_time - Uses the current clocksource to increment the wall time
 *
 */
void update_wall_time(void)
{
        if (timekeeping_advance(TK_ADV_TICK))
                clock_was_set_delayed();
}

/**
 * getboottime64 - Return the real time of system boot.
 * @ts:                pointer to the timespec64 to be set
 *
 * Returns the wall-time of boot in a timespec64.
 *
 * This is based on the wall_to_monotonic offset and the total suspend
 * time. Calls to settimeofday will affect the value returned (which
 * basically means that however wrong your real time clock is at boot time,
 * you get the right time here).
 */
void getboottime64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);

        *ts = ktime_to_timespec64(t);
}
EXPORT_SYMBOL_GPL(getboottime64);

void ktime_get_coarse_real_ts64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                *ts = tk_xtime(tk);
        } while (read_seqcount_retry(&tk_core.seq, seq));
}
EXPORT_SYMBOL(ktime_get_coarse_real_ts64);

void ktime_get_coarse_ts64(struct timespec64 *ts)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct timespec64 now, mono;
        unsigned int seq;

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                now = tk_xtime(tk);
                mono = tk->wall_to_monotonic;
        } while (read_seqcount_retry(&tk_core.seq, seq));

        set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
                                now.tv_nsec + mono.tv_nsec);
}
EXPORT_SYMBOL(ktime_get_coarse_ts64);

/*
 * Must hold jiffies_lock
 */
void do_timer(unsigned long ticks)
{
        jiffies_64 += ticks;
        calc_global_load();
}

/**
 * ktime_get_update_offsets_now - hrtimer helper
 * @cwsseq:        pointer to check and store the clock was set sequence number
 * @offs_real:        pointer to storage for monotonic -> realtime offset
 * @offs_boot:        pointer to storage for monotonic -> boottime offset
 * @offs_tai:        pointer to storage for monotonic -> clock tai offset
 *
 * Returns current monotonic time and updates the offsets if the
 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
 * different.
 *
 * Called from hrtimer_interrupt() or retrigger_next_event()
 */
ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
                                     ktime_t *offs_boot, ktime_t *offs_tai)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        ktime_t base;
        u64 nsecs;

        do {
                seq = read_seqcount_begin(&tk_core.seq);

                base = tk->tkr_mono.base;
                nsecs = timekeeping_get_ns(&tk->tkr_mono);
                base = ktime_add_ns(base, nsecs);

                if (*cwsseq != tk->clock_was_set_seq) {
                        *cwsseq = tk->clock_was_set_seq;
                        *offs_real = tk->offs_real;
                        *offs_boot = tk->offs_boot;
                        *offs_tai = tk->offs_tai;
                }

                /* Handle leapsecond insertion adjustments */
                if (unlikely(base >= tk->next_leap_ktime))
                        *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));

        } while (read_seqcount_retry(&tk_core.seq, seq));

        return base;
}

/*
 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
 */
static int timekeeping_validate_timex(const struct __kernel_timex *txc)
{
        if (txc->modes & ADJ_ADJTIME) {
                /* singleshot must not be used with any other mode bits */
                if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
                        return -EINVAL;
                if (!(txc->modes & ADJ_OFFSET_READONLY) &&
                    !capable(CAP_SYS_TIME))
                        return -EPERM;
        } else {
                /* In order to modify anything, you gotta be super-user! */
                if (txc->modes && !capable(CAP_SYS_TIME))
                        return -EPERM;
                /*
                 * if the quartz is off by more than 10% then
                 * something is VERY wrong!
                 */
                if (txc->modes & ADJ_TICK &&
                    (txc->tick <  900000/USER_HZ ||
                     txc->tick > 1100000/USER_HZ))
                        return -EINVAL;
        }

        if (txc->modes & ADJ_SETOFFSET) {
                /* In order to inject time, you gotta be super-user! */
                if (!capable(CAP_SYS_TIME))
                        return -EPERM;

                /*
                 * Validate if a timespec/timeval used to inject a time
                 * offset is valid.  Offsets can be positive or negative, so
                 * we don't check tv_sec. The value of the timeval/timespec
                 * is the sum of its fields,but *NOTE*:
                 * The field tv_usec/tv_nsec must always be non-negative and
                 * we can't have more nanoseconds/microseconds than a second.
                 */
                if (txc->time.tv_usec < 0)
                        return -EINVAL;

                if (txc->modes & ADJ_NANO) {
                        if (txc->time.tv_usec >= NSEC_PER_SEC)
                                return -EINVAL;
                } else {
                        if (txc->time.tv_usec >= USEC_PER_SEC)
                                return -EINVAL;
                }
        }

        /*
         * Check for potential multiplication overflows that can
         * only happen on 64-bit systems:
         */
        if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
                if (LLONG_MIN / PPM_SCALE > txc->freq)
                        return -EINVAL;
                if (LLONG_MAX / PPM_SCALE < txc->freq)
                        return -EINVAL;
        }

        return 0;
}

/**
 * random_get_entropy_fallback - Returns the raw clock source value,
 * used by random.c for platforms with no valid random_get_entropy().
 */
unsigned long random_get_entropy_fallback(void)
{
        struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
        struct clocksource *clock = READ_ONCE(tkr->clock);

        if (unlikely(timekeeping_suspended || !clock))
                return 0;
        return clock->read(clock);
}
EXPORT_SYMBOL_GPL(random_get_entropy_fallback);

/**
 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
 */
int do_adjtimex(struct __kernel_timex *txc)
{
        struct timekeeper *tk = &tk_core.timekeeper;
        struct audit_ntp_data ad;
        bool clock_set = false;
        struct timespec64 ts;
        unsigned long flags;
        s32 orig_tai, tai;
        int ret;

        /* Validate the data before disabling interrupts */
        ret = timekeeping_validate_timex(txc);
        if (ret)
                return ret;
        add_device_randomness(txc, sizeof(*txc));

        if (txc->modes & ADJ_SETOFFSET) {
                struct timespec64 delta;
                delta.tv_sec  = txc->time.tv_sec;
                delta.tv_nsec = txc->time.tv_usec;
                if (!(txc->modes & ADJ_NANO))
                        delta.tv_nsec *= 1000;
                ret = timekeeping_inject_offset(&delta);
                if (ret)
                        return ret;

                audit_tk_injoffset(delta);
        }

        audit_ntp_init(&ad);

        ktime_get_real_ts64(&ts);
        add_device_randomness(&ts, sizeof(ts));

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        orig_tai = tai = tk->tai_offset;
        ret = __do_adjtimex(txc, &ts, &tai, &ad);

        if (tai != orig_tai) {
                __timekeeping_set_tai_offset(tk, tai);
                timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
                clock_set = true;
        }
        tk_update_leap_state(tk);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);

        audit_ntp_log(&ad);

        /* Update the multiplier immediately if frequency was set directly */
        if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
                clock_set |= timekeeping_advance(TK_ADV_FREQ);

        if (clock_set)
                clock_was_set(CLOCK_REALTIME);

        ntp_notify_cmos_timer();

        return ret;
}

#ifdef CONFIG_NTP_PPS
/**
 * hardpps() - Accessor function to NTP __hardpps function
 */
void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
        unsigned long flags;

        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);

        __hardpps(phase_ts, raw_ts);

        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}
EXPORT_SYMBOL(hardpps);
#endif /* CONFIG_NTP_PPS */





































  113 



























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_GFP_H
#define __LINUX_GFP_H

#include <linux/gfp_types.h>

#include <linux/mmzone.h>
#include <linux/topology.h>

struct vm_area_struct;
struct mempolicy;

/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
#define GFP_MOVABLE_SHIFT 3

static inline int gfp_migratetype(const gfp_t gfp_flags)
{
        VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
        BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
        BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
        BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE);
        BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >>
                      GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC);

        if (unlikely(page_group_by_mobility_disabled))
                return MIGRATE_UNMOVABLE;

        /* Group based on mobility */
        return (__force unsigned long)(gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
}
#undef GFP_MOVABLE_MASK
#undef GFP_MOVABLE_SHIFT

static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
{
        return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
}

#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
#endif

#ifdef CONFIG_ZONE_DMA
#define OPT_ZONE_DMA ZONE_DMA
#else
#define OPT_ZONE_DMA ZONE_NORMAL
#endif

#ifdef CONFIG_ZONE_DMA32
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
#define OPT_ZONE_DMA32 ZONE_NORMAL
#endif

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT
 * bits long and there are 16 of them to cover all possible combinations of
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
 *       0xc    => DMA32 or NORMAL (MOVABLE+DMA32)
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4
/* ZONE_DEVICE is not a valid GFP zone specifier */
#define GFP_ZONES_SHIFT 2
#else
#define GFP_ZONES_SHIFT ZONES_SHIFT
#endif

#if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG
#error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
        (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT)                                       \
        | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT)                       \
        | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT)               \
        | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT)                       \
        | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT)                       \
        | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT)    \
        | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\
        | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\
)

/*
 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
        1 << (___GFP_DMA | ___GFP_HIGHMEM)                                      \
        | 1 << (___GFP_DMA | ___GFP_DMA32)                                      \
        | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)                                      \
        | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)                      \
        | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)                      \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)                      \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)                      \
        | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
)

static inline enum zone_type gfp_zone(gfp_t flags)
{
        enum zone_type z;
        int bit = (__force int) (flags & GFP_ZONEMASK);

        z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
                                         ((1 << GFP_ZONES_SHIFT) - 1);
        VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
        return z;
}

/*
 * There is only one page-allocator function, and two main namespaces to
 * it. The alloc_page*() variants return 'struct page *' and as such
 * can allocate highmem pages, the *get*page*() variants return
 * virtual kernel addresses to the allocated page(s).
 */

static inline int gfp_zonelist(gfp_t flags)
{
#ifdef CONFIG_NUMA
        if (unlikely(flags & __GFP_THISNODE))
                return ZONELIST_NOFALLBACK;
#endif
        return ZONELIST_FALLBACK;
}

/*
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones.
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
 *
 * For the case of non-NUMA systems the NODE_DATA() gets optimized to
 * &contig_page_data at compile-time.
 */
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
        return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
}

#ifndef HAVE_ARCH_FREE_PAGE
static inline void arch_free_page(struct page *page, int order) { }
#endif
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif

struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
                nodemask_t *nodemask);
struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
                nodemask_t *nodemask);

unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
                                nodemask_t *nodemask, int nr_pages,
                                struct list_head *page_list,
                                struct page **page_array);

unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
                                unsigned long nr_pages,
                                struct page **page_array);

/* Bulk allocate order-0 pages */
static inline unsigned long
alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
{
        return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
}

static inline unsigned long
alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
{
        return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array);
}

static inline unsigned long
alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
{
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();

        return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
}

static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
{
        gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);

        if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN))
                return;

        if (node_online(this_node))
                return;

        pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node);
        dump_stack();
}

/*
 * Allocate pages, preferring the node given as nid. The node must be valid and
 * online. For more general interface, see alloc_pages_node().
 */
static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
        VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
        warn_if_node_offline(nid, gfp_mask);

        return __alloc_pages(gfp_mask, order, nid, NULL);
}

static inline
struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
{
        VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
        warn_if_node_offline(nid, gfp);

        return __folio_alloc(gfp, order, nid, NULL);
}

/*
 * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
 * prefer the current CPU's closest node. Otherwise node must be valid and
 * online.
 */
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
                                                unsigned int order)
{
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();

        return __alloc_pages_node(nid, gfp_mask, order);
}

#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
                struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *folio_alloc(gfp_t gfp, unsigned int order);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
                unsigned long addr, bool hugepage);
#else
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
        return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
                struct mempolicy *mpol, pgoff_t ilx, int nid)
{
        return alloc_pages(gfp, order);
}
static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
        return __folio_alloc_node(gfp, order, numa_node_id());
}
#define vma_alloc_folio(gfp, order, vma, addr, hugepage)                \
        folio_alloc(gfp, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
static inline struct page *alloc_page_vma(gfp_t gfp,
                struct vm_area_struct *vma, unsigned long addr)
{
        struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false);

        return &folio->page;
}

extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);

void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
void free_pages_exact(void *virt, size_t size);
__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);

#define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask), 0)

#define __get_dma_pages(gfp_mask, order) \
                __get_free_pages((gfp_mask) | GFP_DMA, (order))

extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);

struct page_frag_cache;
void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
                              gfp_t gfp_mask, unsigned int align_mask);

static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
                                          unsigned int fragsz, gfp_t gfp_mask,
                                          unsigned int align)
{
        WARN_ON_ONCE(!is_power_of_2(align));
        return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}

static inline void *page_frag_alloc(struct page_frag_cache *nc,
                             unsigned int fragsz, gfp_t gfp_mask)
{
        return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}

extern void page_frag_free(void *addr);

#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)

void page_alloc_init_cpuhp(void);
int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);

void page_alloc_init_late(void);
void setup_pcp_cacheinfo(unsigned int cpu);

/*
 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
 * GFP flags are used before interrupts are enabled. Once interrupts are
 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
 * hibernation, it is used by PM to avoid I/O during memory allocation while
 * devices are suspended.
 */
extern gfp_t gfp_allowed_mask;

/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);

static inline bool gfp_has_io_fs(gfp_t gfp)
{
        return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
}

/*
 * Check if the gfp flags allow compaction - GFP_NOIO is a really
 * tricky context because the migration might require IO.
 */
static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
{
        return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
}

extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);

#ifdef CONFIG_CONTIG_ALLOC
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range(unsigned long start, unsigned long end,
                              unsigned migratetype, gfp_t gfp_mask);
extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
                                       int nid, nodemask_t *nodemask);
#endif
void free_contig_range(unsigned long pfn, unsigned long nr_pages);

#endif /* __LINUX_GFP_H */










































































































































































































































































































































































































































































































































































































































































































































































































































































































































    4 





    4 











































































    4 




















    4 
























    4 


    4 
    4 

























    4 




    4 



    4 


    4 




    4 

















































































































































































































































































































































































































































































































































































































































    4 














   11 


   11 


   11 





































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 */

#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/ratelimit.h>
#include <linux/usb.h>
#include <linux/usb/audio.h>
#include <linux/slab.h>

#include <sound/core.h>
#include <sound/pcm.h>
#include <sound/pcm_params.h>

#include "usbaudio.h"
#include "helper.h"
#include "card.h"
#include "endpoint.h"
#include "pcm.h"
#include "clock.h"
#include "quirks.h"

enum {
        EP_STATE_STOPPED,
        EP_STATE_RUNNING,
        EP_STATE_STOPPING,
};

/* interface refcounting */
struct snd_usb_iface_ref {
        unsigned char iface;
        bool need_setup;
        int opened;
        int altset;
        struct list_head list;
};

/* clock refcounting */
struct snd_usb_clock_ref {
        unsigned char clock;
        atomic_t locked;
        int opened;
        int rate;
        bool need_setup;
        struct list_head list;
};

/*
 * snd_usb_endpoint is a model that abstracts everything related to an
 * USB endpoint and its streaming.
 *
 * There are functions to activate and deactivate the streaming URBs and
 * optional callbacks to let the pcm logic handle the actual content of the
 * packets for playback and record. Thus, the bus streaming and the audio
 * handlers are fully decoupled.
 *
 * There are two different types of endpoints in audio applications.
 *
 * SND_USB_ENDPOINT_TYPE_DATA handles full audio data payload for both
 * inbound and outbound traffic.
 *
 * SND_USB_ENDPOINT_TYPE_SYNC endpoints are for inbound traffic only and
 * expect the payload to carry Q10.14 / Q16.16 formatted sync information
 * (3 or 4 bytes).
 *
 * Each endpoint has to be configured prior to being used by calling
 * snd_usb_endpoint_set_params().
 *
 * The model incorporates a reference counting, so that multiple users
 * can call snd_usb_endpoint_start() and snd_usb_endpoint_stop(), and
 * only the first user will effectively start the URBs, and only the last
 * one to stop it will tear the URBs down again.
 */

/*
 * convert a sampling rate into our full speed format (fs/1000 in Q16.16)
 * this will overflow at approx 524 kHz
 */
static inline unsigned get_usb_full_speed_rate(unsigned int rate)
{
        return ((rate << 13) + 62) / 125;
}

/*
 * convert a sampling rate into USB high speed format (fs/8000 in Q16.16)
 * this will overflow at approx 4 MHz
 */
static inline unsigned get_usb_high_speed_rate(unsigned int rate)
{
        return ((rate << 10) + 62) / 125;
}

/*
 * release a urb data
 */
static void release_urb_ctx(struct snd_urb_ctx *u)
{
        if (u->urb && u->buffer_size)
                usb_free_coherent(u->ep->chip->dev, u->buffer_size,
                                  u->urb->transfer_buffer,
                                  u->urb->transfer_dma);
        usb_free_urb(u->urb);
        u->urb = NULL;
        u->buffer_size = 0;
}

static const char *usb_error_string(int err)
{
        switch (err) {
        case -ENODEV:
                return "no device";
        case -ENOENT:
                return "endpoint not enabled";
        case -EPIPE:
                return "endpoint stalled";
        case -ENOSPC:
                return "not enough bandwidth";
        case -ESHUTDOWN:
                return "device disabled";
        case -EHOSTUNREACH:
                return "device suspended";
        case -EINVAL:
        case -EAGAIN:
        case -EFBIG:
        case -EMSGSIZE:
                return "internal error";
        default:
                return "unknown error";
        }
}

static inline bool ep_state_running(struct snd_usb_endpoint *ep)
{
        return atomic_read(&ep->state) == EP_STATE_RUNNING;
}

static inline bool ep_state_update(struct snd_usb_endpoint *ep, int old, int new)
{
        return atomic_try_cmpxchg(&ep->state, &old, new);
}

/**
 * snd_usb_endpoint_implicit_feedback_sink: Report endpoint usage type
 *
 * @ep: The snd_usb_endpoint
 *
 * Determine whether an endpoint is driven by an implicit feedback
 * data endpoint source.
 */
int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep)
{
        return  ep->implicit_fb_sync && usb_pipeout(ep->pipe);
}

/*
 * Return the number of samples to be sent in the next packet
 * for streaming based on information derived from sync endpoints
 *
 * This won't be used for implicit feedback which takes the packet size
 * returned from the sync source
 */
static int slave_next_packet_size(struct snd_usb_endpoint *ep,
                                  unsigned int avail)
{
        unsigned long flags;
        unsigned int phase;
        int ret;

        if (ep->fill_max)
                return ep->maxframesize;

        spin_lock_irqsave(&ep->lock, flags);
        phase = (ep->phase & 0xffff) + (ep->freqm << ep->datainterval);
        ret = min(phase >> 16, ep->maxframesize);
        if (avail && ret >= avail)
                ret = -EAGAIN;
        else
                ep->phase = phase;
        spin_unlock_irqrestore(&ep->lock, flags);

        return ret;
}

/*
 * Return the number of samples to be sent in the next packet
 * for adaptive and synchronous endpoints
 */
static int next_packet_size(struct snd_usb_endpoint *ep, unsigned int avail)
{
        unsigned int sample_accum;
        int ret;

        if (ep->fill_max)
                return ep->maxframesize;

        sample_accum = ep->sample_accum + ep->sample_rem;
        if (sample_accum >= ep->pps) {
                sample_accum -= ep->pps;
                ret = ep->packsize[1];
        } else {
                ret = ep->packsize[0];
        }
        if (avail && ret >= avail)
                ret = -EAGAIN;
        else
                ep->sample_accum = sample_accum;

        return ret;
}

/*
 * snd_usb_endpoint_next_packet_size: Return the number of samples to be sent
 * in the next packet
 *
 * If the size is equal or exceeds @avail, don't proceed but return -EAGAIN
 * Exception: @avail = 0 for skipping the check.
 */
int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
                                      struct snd_urb_ctx *ctx, int idx,
                                      unsigned int avail)
{
        unsigned int packet;

        packet = ctx->packet_size[idx];
        if (packet) {
                if (avail && packet >= avail)
                        return -EAGAIN;
                return packet;
        }

        if (ep->sync_source)
                return slave_next_packet_size(ep, avail);
        else
                return next_packet_size(ep, avail);
}

static void call_retire_callback(struct snd_usb_endpoint *ep,
                                 struct urb *urb)
{
        struct snd_usb_substream *data_subs;

        data_subs = READ_ONCE(ep->data_subs);
        if (data_subs && ep->retire_data_urb)
                ep->retire_data_urb(data_subs, urb);
}

static void retire_outbound_urb(struct snd_usb_endpoint *ep,
                                struct snd_urb_ctx *urb_ctx)
{
        call_retire_callback(ep, urb_ctx->urb);
}

static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
                                    struct snd_usb_endpoint *sender,
                                    const struct urb *urb);

static void retire_inbound_urb(struct snd_usb_endpoint *ep,
                               struct snd_urb_ctx *urb_ctx)
{
        struct urb *urb = urb_ctx->urb;
        struct snd_usb_endpoint *sync_sink;

        if (unlikely(ep->skip_packets > 0)) {
                ep->skip_packets--;
                return;
        }

        sync_sink = READ_ONCE(ep->sync_sink);
        if (sync_sink)
                snd_usb_handle_sync_urb(sync_sink, ep, urb);

        call_retire_callback(ep, urb);
}

static inline bool has_tx_length_quirk(struct snd_usb_audio *chip)
{
        return chip->quirk_flags & QUIRK_FLAG_TX_LENGTH;
}

static void prepare_silent_urb(struct snd_usb_endpoint *ep,
                               struct snd_urb_ctx *ctx)
{
        struct urb *urb = ctx->urb;
        unsigned int offs = 0;
        unsigned int extra = 0;
        __le32 packet_length;
        int i;

        /* For tx_length_quirk, put packet length at start of packet */
        if (has_tx_length_quirk(ep->chip))
                extra = sizeof(packet_length);

        for (i = 0; i < ctx->packets; ++i) {
                unsigned int offset;
                unsigned int length;
                int counts;

                counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, 0);
                length = counts * ep->stride; /* number of silent bytes */
                offset = offs * ep->stride + extra * i;
                urb->iso_frame_desc[i].offset = offset;
                urb->iso_frame_desc[i].length = length + extra;
                if (extra) {
                        packet_length = cpu_to_le32(length);
                        memcpy(urb->transfer_buffer + offset,
                               &packet_length, sizeof(packet_length));
                }
                memset(urb->transfer_buffer + offset + extra,
                       ep->silence_value, length);
                offs += counts;
        }

        urb->number_of_packets = ctx->packets;
        urb->transfer_buffer_length = offs * ep->stride + ctx->packets * extra;
        ctx->queued = 0;
}

/*
 * Prepare a PLAYBACK urb for submission to the bus.
 */
static int prepare_outbound_urb(struct snd_usb_endpoint *ep,
                                struct snd_urb_ctx *ctx,
                                bool in_stream_lock)
{
        struct urb *urb = ctx->urb;
        unsigned char *cp = urb->transfer_buffer;
        struct snd_usb_substream *data_subs;

        urb->dev = ep->chip->dev; /* we need to set this at each time */

        switch (ep->type) {
        case SND_USB_ENDPOINT_TYPE_DATA:
                data_subs = READ_ONCE(ep->data_subs);
                if (data_subs && ep->prepare_data_urb)
                        return ep->prepare_data_urb(data_subs, urb, in_stream_lock);
                /* no data provider, so send silence */
                prepare_silent_urb(ep, ctx);
                break;

        case SND_USB_ENDPOINT_TYPE_SYNC:
                if (snd_usb_get_speed(ep->chip->dev) >= USB_SPEED_HIGH) {
                        /*
                         * fill the length and offset of each urb descriptor.
                         * the fixed 12.13 frequency is passed as 16.16 through the pipe.
                         */
                        urb->iso_frame_desc[0].length = 4;
                        urb->iso_frame_desc[0].offset = 0;
                        cp[0] = ep->freqn;
                        cp[1] = ep->freqn >> 8;
                        cp[2] = ep->freqn >> 16;
                        cp[3] = ep->freqn >> 24;
                } else {
                        /*
                         * fill the length and offset of each urb descriptor.
                         * the fixed 10.14 frequency is passed through the pipe.
                         */
                        urb->iso_frame_desc[0].length = 3;
                        urb->iso_frame_desc[0].offset = 0;
                        cp[0] = ep->freqn >> 2;
                        cp[1] = ep->freqn >> 10;
                        cp[2] = ep->freqn >> 18;
                }

                break;
        }
        return 0;
}

/*
 * Prepare a CAPTURE or SYNC urb for submission to the bus.
 */
static int prepare_inbound_urb(struct snd_usb_endpoint *ep,
                               struct snd_urb_ctx *urb_ctx)
{
        int i, offs;
        struct urb *urb = urb_ctx->urb;

        urb->dev = ep->chip->dev; /* we need to set this at each time */

        switch (ep->type) {
        case SND_USB_ENDPOINT_TYPE_DATA:
                offs = 0;
                for (i = 0; i < urb_ctx->packets; i++) {
                        urb->iso_frame_desc[i].offset = offs;
                        urb->iso_frame_desc[i].length = ep->curpacksize;
                        offs += ep->curpacksize;
                }

                urb->transfer_buffer_length = offs;
                urb->number_of_packets = urb_ctx->packets;
                break;

        case SND_USB_ENDPOINT_TYPE_SYNC:
                urb->iso_frame_desc[0].length = min(4u, ep->syncmaxsize);
                urb->iso_frame_desc[0].offset = 0;
                break;
        }
        return 0;
}

/* notify an error as XRUN to the assigned PCM data substream */
static void notify_xrun(struct snd_usb_endpoint *ep)
{
        struct snd_usb_substream *data_subs;

        data_subs = READ_ONCE(ep->data_subs);
        if (data_subs && data_subs->pcm_substream)
                snd_pcm_stop_xrun(data_subs->pcm_substream);
}

static struct snd_usb_packet_info *
next_packet_fifo_enqueue(struct snd_usb_endpoint *ep)
{
        struct snd_usb_packet_info *p;

        p = ep->next_packet + (ep->next_packet_head + ep->next_packet_queued) %
                ARRAY_SIZE(ep->next_packet);
        ep->next_packet_queued++;
        return p;
}

static struct snd_usb_packet_info *
next_packet_fifo_dequeue(struct snd_usb_endpoint *ep)
{
        struct snd_usb_packet_info *p;

        p = ep->next_packet + ep->next_packet_head;
        ep->next_packet_head++;
        ep->next_packet_head %= ARRAY_SIZE(ep->next_packet);
        ep->next_packet_queued--;
        return p;
}

static void push_back_to_ready_list(struct snd_usb_endpoint *ep,
                                    struct snd_urb_ctx *ctx)
{
        unsigned long flags;

        spin_lock_irqsave(&ep->lock, flags);
        list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
        spin_unlock_irqrestore(&ep->lock, flags);
}

/*
 * Send output urbs that have been prepared previously. URBs are dequeued
 * from ep->ready_playback_urbs and in case there aren't any available
 * or there are no packets that have been prepared, this function does
 * nothing.
 *
 * The reason why the functionality of sending and preparing URBs is separated
 * is that host controllers don't guarantee the order in which they return
 * inbound and outbound packets to their submitters.
 *
 * This function is used both for implicit feedback endpoints and in low-
 * latency playback mode.
 */
int snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
                                      bool in_stream_lock)
{
        bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep);

        while (ep_state_running(ep)) {

                unsigned long flags;
                struct snd_usb_packet_info *packet;
                struct snd_urb_ctx *ctx = NULL;
                int err, i;

                spin_lock_irqsave(&ep->lock, flags);
                if ((!implicit_fb || ep->next_packet_queued > 0) &&
                    !list_empty(&ep->ready_playback_urbs)) {
                        /* take URB out of FIFO */
                        ctx = list_first_entry(&ep->ready_playback_urbs,
                                               struct snd_urb_ctx, ready_list);
                        list_del_init(&ctx->ready_list);
                        if (implicit_fb)
                                packet = next_packet_fifo_dequeue(ep);
                }
                spin_unlock_irqrestore(&ep->lock, flags);

                if (ctx == NULL)
                        break;

                /* copy over the length information */
                if (implicit_fb) {
                        for (i = 0; i < packet->packets; i++)
                                ctx->packet_size[i] = packet->packet_size[i];
                }

                /* call the data handler to fill in playback data */
                err = prepare_outbound_urb(ep, ctx, in_stream_lock);
                /* can be stopped during prepare callback */
                if (unlikely(!ep_state_running(ep)))
                        break;
                if (err < 0) {
                        /* push back to ready list again for -EAGAIN */
                        if (err == -EAGAIN) {
                                push_back_to_ready_list(ep, ctx);
                                break;
                        }

                        if (!in_stream_lock)
                                notify_xrun(ep);
                        return -EPIPE;
                }

                if (!atomic_read(&ep->chip->shutdown))
                        err = usb_submit_urb(ctx->urb, GFP_ATOMIC);
                else
                        err = -ENODEV;
                if (err < 0) {
                        if (!atomic_read(&ep->chip->shutdown)) {
                                usb_audio_err(ep->chip,
                                              "Unable to submit urb #%d: %d at %s\n",
                                              ctx->index, err, __func__);
                                if (!in_stream_lock)
                                        notify_xrun(ep);
                        }
                        return -EPIPE;
                }

                set_bit(ctx->index, &ep->active_mask);
                atomic_inc(&ep->submitted_urbs);
        }

        return 0;
}

/*
 * complete callback for urbs
 */
static void snd_complete_urb(struct urb *urb)
{
        struct snd_urb_ctx *ctx = urb->context;
        struct snd_usb_endpoint *ep = ctx->ep;
        int err;

        if (unlikely(urb->status == -ENOENT ||                /* unlinked */
                     urb->status == -ENODEV ||                /* device removed */
                     urb->status == -ECONNRESET ||        /* unlinked */
                     urb->status == -ESHUTDOWN))        /* device disabled */
                goto exit_clear;
        /* device disconnected */
        if (unlikely(atomic_read(&ep->chip->shutdown)))
                goto exit_clear;

        if (unlikely(!ep_state_running(ep)))
                goto exit_clear;

        if (usb_pipeout(ep->pipe)) {
                retire_outbound_urb(ep, ctx);
                /* can be stopped during retire callback */
                if (unlikely(!ep_state_running(ep)))
                        goto exit_clear;

                /* in low-latency and implicit-feedback modes, push back the
                 * URB to ready list at first, then process as much as possible
                 */
                if (ep->lowlatency_playback ||
                     snd_usb_endpoint_implicit_feedback_sink(ep)) {
                        push_back_to_ready_list(ep, ctx);
                        clear_bit(ctx->index, &ep->active_mask);
                        snd_usb_queue_pending_output_urbs(ep, false);
                        atomic_dec(&ep->submitted_urbs); /* decrement at last */
                        return;
                }

                /* in non-lowlatency mode, no error handling for prepare */
                prepare_outbound_urb(ep, ctx, false);
                /* can be stopped during prepare callback */
                if (unlikely(!ep_state_running(ep)))
                        goto exit_clear;
        } else {
                retire_inbound_urb(ep, ctx);
                /* can be stopped during retire callback */
                if (unlikely(!ep_state_running(ep)))
                        goto exit_clear;

                prepare_inbound_urb(ep, ctx);
        }

        if (!atomic_read(&ep->chip->shutdown))
                err = usb_submit_urb(urb, GFP_ATOMIC);
        else
                err = -ENODEV;
        if (err == 0)
                return;

        if (!atomic_read(&ep->chip->shutdown)) {
                usb_audio_err(ep->chip, "cannot submit urb (err = %d)\n", err);
                notify_xrun(ep);
        }

exit_clear:
        clear_bit(ctx->index, &ep->active_mask);
        atomic_dec(&ep->submitted_urbs);
}

/*
 * Find or create a refcount object for the given interface
 *
 * The objects are released altogether in snd_usb_endpoint_free_all()
 */
static struct snd_usb_iface_ref *
iface_ref_find(struct snd_usb_audio *chip, int iface)
{
        struct snd_usb_iface_ref *ip;

        list_for_each_entry(ip, &chip->iface_ref_list, list)
                if (ip->iface == iface)
                        return ip;

        ip = kzalloc(sizeof(*ip), GFP_KERNEL);
        if (!ip)
                return NULL;
        ip->iface = iface;
        list_add_tail(&ip->list, &chip->iface_ref_list);
        return ip;
}

/* Similarly, a refcount object for clock */
static struct snd_usb_clock_ref *
clock_ref_find(struct snd_usb_audio *chip, int clock)
{
        struct snd_usb_clock_ref *ref;

        list_for_each_entry(ref, &chip->clock_ref_list, list)
                if (ref->clock == clock)
                        return ref;

        ref = kzalloc(sizeof(*ref), GFP_KERNEL);
        if (!ref)
                return NULL;
        ref->clock = clock;
        atomic_set(&ref->locked, 0);
        list_add_tail(&ref->list, &chip->clock_ref_list);
        return ref;
}

/*
 * Get the existing endpoint object corresponding EP
 * Returns NULL if not present.
 */
struct snd_usb_endpoint *
snd_usb_get_endpoint(struct snd_usb_audio *chip, int ep_num)
{
        struct snd_usb_endpoint *ep;

        list_for_each_entry(ep, &chip->ep_list, list) {
                if (ep->ep_num == ep_num)
                        return ep;
        }

        return NULL;
}

#define ep_type_name(type) \
        (type == SND_USB_ENDPOINT_TYPE_DATA ? "data" : "sync")

/**
 * snd_usb_add_endpoint: Add an endpoint to an USB audio chip
 *
 * @chip: The chip
 * @ep_num: The number of the endpoint to use
 * @type: SND_USB_ENDPOINT_TYPE_DATA or SND_USB_ENDPOINT_TYPE_SYNC
 *
 * If the requested endpoint has not been added to the given chip before,
 * a new instance is created.
 *
 * Returns zero on success or a negative error code.
 *
 * New endpoints will be added to chip->ep_list and freed by
 * calling snd_usb_endpoint_free_all().
 *
 * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that
 * bNumEndpoints > 1 beforehand.
 */
int snd_usb_add_endpoint(struct snd_usb_audio *chip, int ep_num, int type)
{
        struct snd_usb_endpoint *ep;
        bool is_playback;

        ep = snd_usb_get_endpoint(chip, ep_num);
        if (ep)
                return 0;

        usb_audio_dbg(chip, "Creating new %s endpoint #%x\n",
                      ep_type_name(type),
                      ep_num);
        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
        if (!ep)
                return -ENOMEM;

        ep->chip = chip;
        spin_lock_init(&ep->lock);
        ep->type = type;
        ep->ep_num = ep_num;
        INIT_LIST_HEAD(&ep->ready_playback_urbs);
        atomic_set(&ep->submitted_urbs, 0);

        is_playback = ((ep_num & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT);
        ep_num &= USB_ENDPOINT_NUMBER_MASK;
        if (is_playback)
                ep->pipe = usb_sndisocpipe(chip->dev, ep_num);
        else
                ep->pipe = usb_rcvisocpipe(chip->dev, ep_num);

        list_add_tail(&ep->list, &chip->ep_list);
        return 0;
}

/* Set up syncinterval and maxsyncsize for a sync EP */
static void endpoint_set_syncinterval(struct snd_usb_audio *chip,
                                      struct snd_usb_endpoint *ep)
{
        struct usb_host_interface *alts;
        struct usb_endpoint_descriptor *desc;

        alts = snd_usb_get_host_interface(chip, ep->iface, ep->altsetting);
        if (!alts)
                return;

        desc = get_endpoint(alts, ep->ep_idx);
        if (desc->bLength >= USB_DT_ENDPOINT_AUDIO_SIZE &&
            desc->bRefresh >= 1 && desc->bRefresh <= 9)
                ep->syncinterval = desc->bRefresh;
        else if (snd_usb_get_speed(chip->dev) == USB_SPEED_FULL)
                ep->syncinterval = 1;
        else if (desc->bInterval >= 1 && desc->bInterval <= 16)
                ep->syncinterval = desc->bInterval - 1;
        else
                ep->syncinterval = 3;

        ep->syncmaxsize = le16_to_cpu(desc->wMaxPacketSize);
}

static bool endpoint_compatible(struct snd_usb_endpoint *ep,
                                const struct audioformat *fp,
                                const struct snd_pcm_hw_params *params)
{
        if (!ep->opened)
                return false;
        if (ep->cur_audiofmt != fp)
                return false;
        if (ep->cur_rate != params_rate(params) ||
            ep->cur_format != params_format(params) ||
            ep->cur_period_frames != params_period_size(params) ||
            ep->cur_buffer_periods != params_periods(params))
                return false;
        return true;
}

/*
 * Check whether the given fp and hw params are compatible with the current
 * setup of the target EP for implicit feedback sync
 */
bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
                                 struct snd_usb_endpoint *ep,
                                 const struct audioformat *fp,
                                 const struct snd_pcm_hw_params *params)
{
        bool ret;

        mutex_lock(&chip->mutex);
        ret = endpoint_compatible(ep, fp, params);
        mutex_unlock(&chip->mutex);
        return ret;
}

/*
 * snd_usb_endpoint_open: Open the endpoint
 *
 * Called from hw_params to assign the endpoint to the substream.
 * It's reference-counted, and only the first opener is allowed to set up
 * arbitrary parameters.  The later opener must be compatible with the
 * former opened parameters.
 * The endpoint needs to be closed via snd_usb_endpoint_close() later.
 *
 * Note that this function doesn't configure the endpoint.  The substream
 * needs to set it up later via snd_usb_endpoint_set_params() and
 * snd_usb_endpoint_prepare().
 */
struct snd_usb_endpoint *
snd_usb_endpoint_open(struct snd_usb_audio *chip,
                      const struct audioformat *fp,
                      const struct snd_pcm_hw_params *params,
                      bool is_sync_ep,
                      bool fixed_rate)
{
        struct snd_usb_endpoint *ep;
        int ep_num = is_sync_ep ? fp->sync_ep : fp->endpoint;

        mutex_lock(&chip->mutex);
        ep = snd_usb_get_endpoint(chip, ep_num);
        if (!ep) {
                usb_audio_err(chip, "Cannot find EP 0x%x to open\n", ep_num);
                goto unlock;
        }

        if (!ep->opened) {
                if (is_sync_ep) {
                        ep->iface = fp->sync_iface;
                        ep->altsetting = fp->sync_altsetting;
                        ep->ep_idx = fp->sync_ep_idx;
                } else {
                        ep->iface = fp->iface;
                        ep->altsetting = fp->altsetting;
                        ep->ep_idx = fp->ep_idx;
                }
                usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n",
                              ep_num, ep->iface, ep->altsetting, ep->ep_idx);

                ep->iface_ref = iface_ref_find(chip, ep->iface);
                if (!ep->iface_ref) {
                        ep = NULL;
                        goto unlock;
                }

                if (fp->protocol != UAC_VERSION_1) {
                        ep->clock_ref = clock_ref_find(chip, fp->clock);
                        if (!ep->clock_ref) {
                                ep = NULL;
                                goto unlock;
                        }
                        ep->clock_ref->opened++;
                }

                ep->cur_audiofmt = fp;
                ep->cur_channels = fp->channels;
                ep->cur_rate = params_rate(params);
                ep->cur_format = params_format(params);
                ep->cur_frame_bytes = snd_pcm_format_physical_width(ep->cur_format) *
                        ep->cur_channels / 8;
                ep->cur_period_frames = params_period_size(params);
                ep->cur_period_bytes = ep->cur_period_frames * ep->cur_frame_bytes;
                ep->cur_buffer_periods = params_periods(params);

                if (ep->type == SND_USB_ENDPOINT_TYPE_SYNC)
                        endpoint_set_syncinterval(chip, ep);

                ep->implicit_fb_sync = fp->implicit_fb;
                ep->need_setup = true;
                ep->need_prepare = true;
                ep->fixed_rate = fixed_rate;

                usb_audio_dbg(chip, "  channels=%d, rate=%d, format=%s, period_bytes=%d, periods=%d, implicit_fb=%d\n",
                              ep->cur_channels, ep->cur_rate,
                              snd_pcm_format_name(ep->cur_format),
                              ep->cur_period_bytes, ep->cur_buffer_periods,
                              ep->implicit_fb_sync);

        } else {
                if (WARN_ON(!ep->iface_ref)) {
                        ep = NULL;
                        goto unlock;
                }

                if (!endpoint_compatible(ep, fp, params)) {
                        usb_audio_err(chip, "Incompatible EP setup for 0x%x\n",
                                      ep_num);
                        ep = NULL;
                        goto unlock;
                }

                usb_audio_dbg(chip, "Reopened EP 0x%x (count %d)\n",
                              ep_num, ep->opened);
        }

        if (!ep->iface_ref->opened++)
                ep->iface_ref->need_setup = true;

        ep->opened++;

 unlock:
        mutex_unlock(&chip->mutex);
        return ep;
}

/*
 * snd_usb_endpoint_set_sync: Link data and sync endpoints
 *
 * Pass NULL to sync_ep to unlink again
 */
void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip,
                               struct snd_usb_endpoint *data_ep,
                               struct snd_usb_endpoint *sync_ep)
{
        data_ep->sync_source = sync_ep;
}

/*
 * Set data endpoint callbacks and the assigned data stream
 *
 * Called at PCM trigger and cleanups.
 * Pass NULL to deactivate each callback.
 */
void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep,
                                   int (*prepare)(struct snd_usb_substream *subs,
                                                  struct urb *urb,
                                                  bool in_stream_lock),
                                   void (*retire)(struct snd_usb_substream *subs,
                                                  struct urb *urb),
                                   struct snd_usb_substream *data_subs)
{
        ep->prepare_data_urb = prepare;
        ep->retire_data_urb = retire;
        if (data_subs)
                ep->lowlatency_playback = data_subs->lowlatency_playback;
        else
                ep->lowlatency_playback = false;
        WRITE_ONCE(ep->data_subs, data_subs);
}

static int endpoint_set_interface(struct snd_usb_audio *chip,
                                  struct snd_usb_endpoint *ep,
                                  bool set)
{
        int altset = set ? ep->altsetting : 0;
        int err;

        if (ep->iface_ref->altset == altset)
                return 0;

        usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n",
                      ep->iface, altset, ep->ep_num);
        err = usb_set_interface(chip->dev, ep->iface, altset);
        if (err < 0) {
                usb_audio_err_ratelimited(
                        chip, "%d:%d: usb_set_interface failed (%d)\n",
                        ep->iface, altset, err);
                return err;
        }

        if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY)
                msleep(50);
        ep->iface_ref->altset = altset;
        return 0;
}

/*
 * snd_usb_endpoint_close: Close the endpoint
 *
 * Unreference the already opened endpoint via snd_usb_endpoint_open().
 */
void snd_usb_endpoint_close(struct snd_usb_audio *chip,
                            struct snd_usb_endpoint *ep)
{
        mutex_lock(&chip->mutex);
        usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n",
                      ep->ep_num, ep->opened);

        if (!--ep->iface_ref->opened &&
                !(chip->quirk_flags & QUIRK_FLAG_IFACE_SKIP_CLOSE))
                endpoint_set_interface(chip, ep, false);

        if (!--ep->opened) {
                if (ep->clock_ref) {
                        if (!--ep->clock_ref->opened)
                                ep->clock_ref->rate = 0;
                }
                ep->iface = 0;
                ep->altsetting = 0;
                ep->cur_audiofmt = NULL;
                ep->cur_rate = 0;
                ep->iface_ref = NULL;
                ep->clock_ref = NULL;
                usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num);
        }
        mutex_unlock(&chip->mutex);
}

/* Prepare for suspening EP, called from the main suspend handler */
void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep)
{
        ep->need_prepare = true;
        if (ep->iface_ref)
                ep->iface_ref->need_setup = true;
        if (ep->clock_ref)
                ep->clock_ref->rate = 0;
}

/*
 *  wait until all urbs are processed.
 */
static int wait_clear_urbs(struct snd_usb_endpoint *ep)
{
        unsigned long end_time = jiffies + msecs_to_jiffies(1000);
        int alive;

        if (atomic_read(&ep->state) != EP_STATE_STOPPING)
                return 0;

        do {
                alive = atomic_read(&ep->submitted_urbs);
                if (!alive)
                        break;

                schedule_timeout_uninterruptible(1);
        } while (time_before(jiffies, end_time));

        if (alive)
                usb_audio_err(ep->chip,
                        "timeout: still %d active urbs on EP #%x\n",
                        alive, ep->ep_num);

        if (ep_state_update(ep, EP_STATE_STOPPING, EP_STATE_STOPPED)) {
                ep->sync_sink = NULL;
                snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL);
        }

        return 0;
}

/* sync the pending stop operation;
 * this function itself doesn't trigger the stop operation
 */
void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep)
{
        if (ep)
                wait_clear_urbs(ep);
}

/*
 * Stop active urbs
 *
 * This function moves the EP to STOPPING state if it's being RUNNING.
 */
static int stop_urbs(struct snd_usb_endpoint *ep, bool force, bool keep_pending)
{
        unsigned int i;
        unsigned long flags;

        if (!force && atomic_read(&ep->running))
                return -EBUSY;

        if (!ep_state_update(ep, EP_STATE_RUNNING, EP_STATE_STOPPING))
                return 0;

        spin_lock_irqsave(&ep->lock, flags);
        INIT_LIST_HEAD(&ep->ready_playback_urbs);
        ep->next_packet_head = 0;
        ep->next_packet_queued = 0;
        spin_unlock_irqrestore(&ep->lock, flags);

        if (keep_pending)
                return 0;

        for (i = 0; i < ep->nurbs; i++) {
                if (test_bit(i, &ep->active_mask)) {
                        if (!test_and_set_bit(i, &ep->unlink_mask)) {
                                struct urb *u = ep->urb[i].urb;
                                usb_unlink_urb(u);
                        }
                }
        }

        return 0;
}

/*
 * release an endpoint's urbs
 */
static int release_urbs(struct snd_usb_endpoint *ep, bool force)
{
        int i, err;

        /* route incoming urbs to nirvana */
        snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL);

        /* stop and unlink urbs */
        err = stop_urbs(ep, force, false);
        if (err)
                return err;

        wait_clear_urbs(ep);

        for (i = 0; i < ep->nurbs; i++)
                release_urb_ctx(&ep->urb[i]);

        usb_free_coherent(ep->chip->dev, SYNC_URBS * 4,
                          ep->syncbuf, ep->sync_dma);

        ep->syncbuf = NULL;
        ep->nurbs = 0;
        return 0;
}

/*
 * configure a data endpoint
 */
static int data_ep_set_params(struct snd_usb_endpoint *ep)
{
        struct snd_usb_audio *chip = ep->chip;
        unsigned int maxsize, minsize, packs_per_ms, max_packs_per_urb;
        unsigned int max_packs_per_period, urbs_per_period, urb_packs;
        unsigned int max_urbs, i;
        const struct audioformat *fmt = ep->cur_audiofmt;
        int frame_bits = ep->cur_frame_bytes * 8;
        int tx_length_quirk = (has_tx_length_quirk(chip) &&
                               usb_pipeout(ep->pipe));

        usb_audio_dbg(chip, "Setting params for data EP 0x%x, pipe 0x%x\n",
                      ep->ep_num, ep->pipe);

        if (ep->cur_format == SNDRV_PCM_FORMAT_DSD_U16_LE && fmt->dsd_dop) {
                /*
                 * When operating in DSD DOP mode, the size of a sample frame
                 * in hardware differs from the actual physical format width
                 * because we need to make room for the DOP markers.
                 */
                frame_bits += ep->cur_channels << 3;
        }

        ep->datainterval = fmt->datainterval;
        ep->stride = frame_bits >> 3;

        switch (ep->cur_format) {
        case SNDRV_PCM_FORMAT_U8:
                ep->silence_value = 0x80;
                break;
        case SNDRV_PCM_FORMAT_DSD_U8:
        case SNDRV_PCM_FORMAT_DSD_U16_LE:
        case SNDRV_PCM_FORMAT_DSD_U32_LE:
        case SNDRV_PCM_FORMAT_DSD_U16_BE:
        case SNDRV_PCM_FORMAT_DSD_U32_BE:
                ep->silence_value = 0x69;
                break;
        default:
                ep->silence_value = 0;
        }

        /* assume max. frequency is 50% higher than nominal */
        ep->freqmax = ep->freqn + (ep->freqn >> 1);
        /* Round up freqmax to nearest integer in order to calculate maximum
         * packet size, which must represent a whole number of frames.
         * This is accomplished by adding 0x0.ffff before converting the
         * Q16.16 format into integer.
         * In order to accurately calculate the maximum packet size when
         * the data interval is more than 1 (i.e. ep->datainterval > 0),
         * multiply by the data interval prior to rounding. For instance,
         * a freqmax of 41 kHz will result in a max packet size of 6 (5.125)
         * frames with a data interval of 1, but 11 (10.25) frames with a
         * data interval of 2.
         * (ep->freqmax << ep->datainterval overflows at 8.192 MHz for the
         * maximum datainterval value of 3, at USB full speed, higher for
         * USB high speed, noting that ep->freqmax is in units of
         * frames per packet in Q16.16 format.)
         */
        maxsize = (((ep->freqmax << ep->datainterval) + 0xffff) >> 16) *
                         (frame_bits >> 3);
        if (tx_length_quirk)
                maxsize += sizeof(__le32); /* Space for length descriptor */
        /* but wMaxPacketSize might reduce this */
        if (ep->maxpacksize && ep->maxpacksize < maxsize) {
                /* whatever fits into a max. size packet */
                unsigned int data_maxsize = maxsize = ep->maxpacksize;

                if (tx_length_quirk)
                        /* Need to remove the length descriptor to calc freq */
                        data_maxsize -= sizeof(__le32);
                ep->freqmax = (data_maxsize / (frame_bits >> 3))
                                << (16 - ep->datainterval);
        }

        if (ep->fill_max)
                ep->curpacksize = ep->maxpacksize;
        else
                ep->curpacksize = maxsize;

        if (snd_usb_get_speed(chip->dev) != USB_SPEED_FULL) {
                packs_per_ms = 8 >> ep->datainterval;
                max_packs_per_urb = MAX_PACKS_HS;
        } else {
                packs_per_ms = 1;
                max_packs_per_urb = MAX_PACKS;
        }
        if (ep->sync_source && !ep->implicit_fb_sync)
                max_packs_per_urb = min(max_packs_per_urb,
                                        1U << ep->sync_source->syncinterval);
        max_packs_per_urb = max(1u, max_packs_per_urb >> ep->datainterval);

        /*
         * Capture endpoints need to use small URBs because there's no way
         * to tell in advance where the next period will end, and we don't
         * want the next URB to complete much after the period ends.
         *
         * Playback endpoints with implicit sync much use the same parameters
         * as their corresponding capture endpoint.
         */
        if (usb_pipein(ep->pipe) || ep->implicit_fb_sync) {

                /* make capture URBs <= 1 ms and smaller than a period */
                urb_packs = min(max_packs_per_urb, packs_per_ms);
                while (urb_packs > 1 && urb_packs * maxsize >= ep->cur_period_bytes)
                        urb_packs >>= 1;
                ep->nurbs = MAX_URBS;

        /*
         * Playback endpoints without implicit sync are adjusted so that
         * a period fits as evenly as possible in the smallest number of
         * URBs.  The total number of URBs is adjusted to the size of the
         * ALSA buffer, subject to the MAX_URBS and MAX_QUEUE limits.
         */
        } else {
                /* determine how small a packet can be */
                minsize = (ep->freqn >> (16 - ep->datainterval)) *
                                (frame_bits >> 3);
                /* with sync from device, assume it can be 12% lower */
                if (ep->sync_source)
                        minsize -= minsize >> 3;
                minsize = max(minsize, 1u);

                /* how many packets will contain an entire ALSA period? */
                max_packs_per_period = DIV_ROUND_UP(ep->cur_period_bytes, minsize);

                /* how many URBs will contain a period? */
                urbs_per_period = DIV_ROUND_UP(max_packs_per_period,
                                max_packs_per_urb);
                /* how many packets are needed in each URB? */
                urb_packs = DIV_ROUND_UP(max_packs_per_period, urbs_per_period);

                /* limit the number of frames in a single URB */
                ep->max_urb_frames = DIV_ROUND_UP(ep->cur_period_frames,
                                                  urbs_per_period);

                /* try to use enough URBs to contain an entire ALSA buffer */
                max_urbs = min((unsigned) MAX_URBS,
                                MAX_QUEUE * packs_per_ms / urb_packs);
                ep->nurbs = min(max_urbs, urbs_per_period * ep->cur_buffer_periods);
        }

        /* allocate and initialize data urbs */
        for (i = 0; i < ep->nurbs; i++) {
                struct snd_urb_ctx *u = &ep->urb[i];
                u->index = i;
                u->ep = ep;
                u->packets = urb_packs;
                u->buffer_size = maxsize * u->packets;

                if (fmt->fmt_type == UAC_FORMAT_TYPE_II)
                        u->packets++; /* for transfer delimiter */
                u->urb = usb_alloc_urb(u->packets, GFP_KERNEL);
                if (!u->urb)
                        goto out_of_memory;

                u->urb->transfer_buffer =
                        usb_alloc_coherent(chip->dev, u->buffer_size,
                                           GFP_KERNEL, &u->urb->transfer_dma);
                if (!u->urb->transfer_buffer)
                        goto out_of_memory;
                u->urb->pipe = ep->pipe;
                u->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                u->urb->interval = 1 << ep->datainterval;
                u->urb->context = u;
                u->urb->complete = snd_complete_urb;
                INIT_LIST_HEAD(&u->ready_list);
        }

        return 0;

out_of_memory:
        release_urbs(ep, false);
        return -ENOMEM;
}

/*
 * configure a sync endpoint
 */
static int sync_ep_set_params(struct snd_usb_endpoint *ep)
{
        struct snd_usb_audio *chip = ep->chip;
        int i;

        usb_audio_dbg(chip, "Setting params for sync EP 0x%x, pipe 0x%x\n",
                      ep->ep_num, ep->pipe);

        ep->syncbuf = usb_alloc_coherent(chip->dev, SYNC_URBS * 4,
                                         GFP_KERNEL, &ep->sync_dma);
        if (!ep->syncbuf)
                return -ENOMEM;

        ep->nurbs = SYNC_URBS;
        for (i = 0; i < SYNC_URBS; i++) {
                struct snd_urb_ctx *u = &ep->urb[i];
                u->index = i;
                u->ep = ep;
                u->packets = 1;
                u->urb = usb_alloc_urb(1, GFP_KERNEL);
                if (!u->urb)
                        goto out_of_memory;
                u->urb->transfer_buffer = ep->syncbuf + i * 4;
                u->urb->transfer_dma = ep->sync_dma + i * 4;
                u->urb->transfer_buffer_length = 4;
                u->urb->pipe = ep->pipe;
                u->urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
                u->urb->number_of_packets = 1;
                u->urb->interval = 1 << ep->syncinterval;
                u->urb->context = u;
                u->urb->complete = snd_complete_urb;
        }

        return 0;

out_of_memory:
        release_urbs(ep, false);
        return -ENOMEM;
}

/* update the rate of the referred clock; return the actual rate */
static int update_clock_ref_rate(struct snd_usb_audio *chip,
                                 struct snd_usb_endpoint *ep)
{
        struct snd_usb_clock_ref *clock = ep->clock_ref;
        int rate = ep->cur_rate;

        if (!clock || clock->rate == rate)
                return rate;
        if (clock->rate) {
                if (atomic_read(&clock->locked))
                        return clock->rate;
                if (clock->rate != rate) {
                        usb_audio_err(chip, "Mismatched sample rate %d vs %d for EP 0x%x\n",
                                      clock->rate, rate, ep->ep_num);
                        return clock->rate;
                }
        }
        clock->rate = rate;
        clock->need_setup = true;
        return rate;
}

/*
 * snd_usb_endpoint_set_params: configure an snd_usb_endpoint
 *
 * It's called either from hw_params callback.
 * Determine the number of URBs to be used on this endpoint.
 * An endpoint must be configured before it can be started.
 * An endpoint that is already running can not be reconfigured.
 */
int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
                                struct snd_usb_endpoint *ep)
{
        const struct audioformat *fmt = ep->cur_audiofmt;
        int err = 0;

        mutex_lock(&chip->mutex);
        if (!ep->need_setup)
                goto unlock;

        /* release old buffers, if any */
        err = release_urbs(ep, false);
        if (err < 0)
                goto unlock;

        ep->datainterval = fmt->datainterval;
        ep->maxpacksize = fmt->maxpacksize;
        ep->fill_max = !!(fmt->attributes & UAC_EP_CS_ATTR_FILL_MAX);

        if (snd_usb_get_speed(chip->dev) == USB_SPEED_FULL) {
                ep->freqn = get_usb_full_speed_rate(ep->cur_rate);
                ep->pps = 1000 >> ep->datainterval;
        } else {
                ep->freqn = get_usb_high_speed_rate(ep->cur_rate);
                ep->pps = 8000 >> ep->datainterval;
        }

        ep->sample_rem = ep->cur_rate % ep->pps;
        ep->packsize[0] = ep->cur_rate / ep->pps;
        ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps;

        /* calculate the frequency in 16.16 format */
        ep->freqm = ep->freqn;
        ep->freqshift = INT_MIN;

        ep->phase = 0;

        switch (ep->type) {
        case  SND_USB_ENDPOINT_TYPE_DATA:
                err = data_ep_set_params(ep);
                break;
        case  SND_USB_ENDPOINT_TYPE_SYNC:
                err = sync_ep_set_params(ep);
                break;
        default:
                err = -EINVAL;
        }

        usb_audio_dbg(chip, "Set up %d URBS, ret=%d\n", ep->nurbs, err);

        if (err < 0)
                goto unlock;

        /* some unit conversions in runtime */
        ep->maxframesize = ep->maxpacksize / ep->cur_frame_bytes;
        ep->curframesize = ep->curpacksize / ep->cur_frame_bytes;

        err = update_clock_ref_rate(chip, ep);
        if (err >= 0) {
                ep->need_setup = false;
                err = 0;
        }

 unlock:
        mutex_unlock(&chip->mutex);
        return err;
}

static int init_sample_rate(struct snd_usb_audio *chip,
                            struct snd_usb_endpoint *ep)
{
        struct snd_usb_clock_ref *clock = ep->clock_ref;
        int rate, err;

        rate = update_clock_ref_rate(chip, ep);
        if (rate < 0)
                return rate;
        if (clock && !clock->need_setup)
                return 0;

        if (!ep->fixed_rate) {
                err = snd_usb_init_sample_rate(chip, ep->cur_audiofmt, rate);
                if (err < 0) {
                        if (clock)
                                clock->rate = 0; /* reset rate */
                        return err;
                }
        }

        if (clock)
                clock->need_setup = false;
        return 0;
}

/*
 * snd_usb_endpoint_prepare: Prepare the endpoint
 *
 * This function sets up the EP to be fully usable state.
 * It's called either from prepare callback.
 * The function checks need_setup flag, and performs nothing unless needed,
 * so it's safe to call this multiple times.
 *
 * This returns zero if unchanged, 1 if the configuration has changed,
 * or a negative error code.
 */
int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
                             struct snd_usb_endpoint *ep)
{
        bool iface_first;
        int err = 0;

        mutex_lock(&chip->mutex);
        if (WARN_ON(!ep->iface_ref))
                goto unlock;
        if (!ep->need_prepare)
                goto unlock;

        /* If the interface has been already set up, just set EP parameters */
        if (!ep->iface_ref->need_setup) {
                /* sample rate setup of UAC1 is per endpoint, and we need
                 * to update at each EP configuration
                 */
                if (ep->cur_audiofmt->protocol == UAC_VERSION_1) {
                        err = init_sample_rate(chip, ep);
                        if (err < 0)
                                goto unlock;
                }
                goto done;
        }

        /* Need to deselect altsetting at first */
        endpoint_set_interface(chip, ep, false);

        /* Some UAC1 devices (e.g. Yamaha THR10) need the host interface
         * to be set up before parameter setups
         */
        iface_first = ep->cur_audiofmt->protocol == UAC_VERSION_1;
        /* Workaround for devices that require the interface setup at first like UAC1 */
        if (chip->quirk_flags & QUIRK_FLAG_SET_IFACE_FIRST)
                iface_first = true;
        if (iface_first) {
                err = endpoint_set_interface(chip, ep, true);
                if (err < 0)
                        goto unlock;
        }

        err = snd_usb_init_pitch(chip, ep->cur_audiofmt);
        if (err < 0)
                goto unlock;

        err = init_sample_rate(chip, ep);
        if (err < 0)
                goto unlock;

        err = snd_usb_select_mode_quirk(chip, ep->cur_audiofmt);
        if (err < 0)
                goto unlock;

        /* for UAC2/3, enable the interface altset here at last */
        if (!iface_first) {
                err = endpoint_set_interface(chip, ep, true);
                if (err < 0)
                        goto unlock;
        }

        ep->iface_ref->need_setup = false;

 done:
        ep->need_prepare = false;
        err = 1;

unlock:
        mutex_unlock(&chip->mutex);
        return err;
}

/* get the current rate set to the given clock by any endpoint */
int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock)
{
        struct snd_usb_clock_ref *ref;
        int rate = 0;

        if (!clock)
                return 0;
        mutex_lock(&chip->mutex);
        list_for_each_entry(ref, &chip->clock_ref_list, list) {
                if (ref->clock == clock) {
                        rate = ref->rate;
                        break;
                }
        }
        mutex_unlock(&chip->mutex);
        return rate;
}

/**
 * snd_usb_endpoint_start: start an snd_usb_endpoint
 *
 * @ep: the endpoint to start
 *
 * A call to this function will increment the running count of the endpoint.
 * In case it is not already running, the URBs for this endpoint will be
 * submitted. Otherwise, this function does nothing.
 *
 * Must be balanced to calls of snd_usb_endpoint_stop().
 *
 * Returns an error if the URB submission failed, 0 in all other cases.
 */
int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
{
        bool is_playback = usb_pipeout(ep->pipe);
        int err;
        unsigned int i;

        if (atomic_read(&ep->chip->shutdown))
                return -EBADFD;

        if (ep->sync_source)
                WRITE_ONCE(ep->sync_source->sync_sink, ep);

        usb_audio_dbg(ep->chip, "Starting %s EP 0x%x (running %d)\n",
                      ep_type_name(ep->type), ep->ep_num,
                      atomic_read(&ep->running));

        /* already running? */
        if (atomic_inc_return(&ep->running) != 1)
                return 0;

        if (ep->clock_ref)
                atomic_inc(&ep->clock_ref->locked);

        ep->active_mask = 0;
        ep->unlink_mask = 0;
        ep->phase = 0;
        ep->sample_accum = 0;

        snd_usb_endpoint_start_quirk(ep);

        /*
         * If this endpoint has a data endpoint as implicit feedback source,
         * don't start the urbs here. Instead, mark them all as available,
         * wait for the record urbs to return and queue the playback urbs
         * from that context.
         */

        if (!ep_state_update(ep, EP_STATE_STOPPED, EP_STATE_RUNNING))
                goto __error;

        if (snd_usb_endpoint_implicit_feedback_sink(ep) &&
            !(ep->chip->quirk_flags & QUIRK_FLAG_PLAYBACK_FIRST)) {
                usb_audio_dbg(ep->chip, "No URB submission due to implicit fb sync\n");
                i = 0;
                goto fill_rest;
        }

        for (i = 0; i < ep->nurbs; i++) {
                struct urb *urb = ep->urb[i].urb;

                if (snd_BUG_ON(!urb))
                        goto __error;

                if (is_playback)
                        err = prepare_outbound_urb(ep, urb->context, true);
                else
                        err = prepare_inbound_urb(ep, urb->context);
                if (err < 0) {
                        /* stop filling at applptr */
                        if (err == -EAGAIN)
                                break;
                        usb_audio_dbg(ep->chip,
                                      "EP 0x%x: failed to prepare urb: %d\n",
                                      ep->ep_num, err);
                        goto __error;
                }

                if (!atomic_read(&ep->chip->shutdown))
                        err = usb_submit_urb(urb, GFP_ATOMIC);
                else
                        err = -ENODEV;
                if (err < 0) {
                        if (!atomic_read(&ep->chip->shutdown))
                                usb_audio_err(ep->chip,
                                              "cannot submit urb %d, error %d: %s\n",
                                              i, err, usb_error_string(err));
                        goto __error;
                }
                set_bit(i, &ep->active_mask);
                atomic_inc(&ep->submitted_urbs);
        }

        if (!i) {
                usb_audio_dbg(ep->chip, "XRUN at starting EP 0x%x\n",
                              ep->ep_num);
                goto __error;
        }

        usb_audio_dbg(ep->chip, "%d URBs submitted for EP 0x%x\n",
                      i, ep->ep_num);

 fill_rest:
        /* put the remaining URBs to ready list */
        if (is_playback) {
                for (; i < ep->nurbs; i++)
                        push_back_to_ready_list(ep, ep->urb + i);
        }

        return 0;

__error:
        snd_usb_endpoint_stop(ep, false);
        return -EPIPE;
}

/**
 * snd_usb_endpoint_stop: stop an snd_usb_endpoint
 *
 * @ep: the endpoint to stop (may be NULL)
 * @keep_pending: keep in-flight URBs
 *
 * A call to this function will decrement the running count of the endpoint.
 * In case the last user has requested the endpoint stop, the URBs will
 * actually be deactivated.
 *
 * Must be balanced to calls of snd_usb_endpoint_start().
 *
 * The caller needs to synchronize the pending stop operation via
 * snd_usb_endpoint_sync_pending_stop().
 */
void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep, bool keep_pending)
{
        if (!ep)
                return;

        usb_audio_dbg(ep->chip, "Stopping %s EP 0x%x (running %d)\n",
                      ep_type_name(ep->type), ep->ep_num,
                      atomic_read(&ep->running));

        if (snd_BUG_ON(!atomic_read(&ep->running)))
                return;

        if (!atomic_dec_return(&ep->running)) {
                if (ep->sync_source)
                        WRITE_ONCE(ep->sync_source->sync_sink, NULL);
                stop_urbs(ep, false, keep_pending);
                if (ep->clock_ref)
                        atomic_dec(&ep->clock_ref->locked);

                if (ep->chip->quirk_flags & QUIRK_FLAG_FORCE_IFACE_RESET &&
                    usb_pipeout(ep->pipe)) {
                        ep->need_prepare = true;
                        if (ep->iface_ref)
                                ep->iface_ref->need_setup = true;
                }
        }
}

/**
 * snd_usb_endpoint_release: Tear down an snd_usb_endpoint
 *
 * @ep: the endpoint to release
 *
 * This function does not care for the endpoint's running count but will tear
 * down all the streaming URBs immediately.
 */
void snd_usb_endpoint_release(struct snd_usb_endpoint *ep)
{
        release_urbs(ep, true);
}

/**
 * snd_usb_endpoint_free_all: Free the resources of an snd_usb_endpoint
 * @chip: The chip
 *
 * This free all endpoints and those resources
 */
void snd_usb_endpoint_free_all(struct snd_usb_audio *chip)
{
        struct snd_usb_endpoint *ep, *en;
        struct snd_usb_iface_ref *ip, *in;
        struct snd_usb_clock_ref *cp, *cn;

        list_for_each_entry_safe(ep, en, &chip->ep_list, list)
                kfree(ep);

        list_for_each_entry_safe(ip, in, &chip->iface_ref_list, list)
                kfree(ip);

        list_for_each_entry_safe(cp, cn, &chip->clock_ref_list, list)
                kfree(cp);
}

/*
 * snd_usb_handle_sync_urb: parse an USB sync packet
 *
 * @ep: the endpoint to handle the packet
 * @sender: the sending endpoint
 * @urb: the received packet
 *
 * This function is called from the context of an endpoint that received
 * the packet and is used to let another endpoint object handle the payload.
 */
static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
                                    struct snd_usb_endpoint *sender,
                                    const struct urb *urb)
{
        int shift;
        unsigned int f;
        unsigned long flags;

        snd_BUG_ON(ep == sender);

        /*
         * In case the endpoint is operating in implicit feedback mode, prepare
         * a new outbound URB that has the same layout as the received packet
         * and add it to the list of pending urbs. queue_pending_output_urbs()
         * will take care of them later.
         */
        if (snd_usb_endpoint_implicit_feedback_sink(ep) &&
            atomic_read(&ep->running)) {

                /* implicit feedback case */
                int i, bytes = 0;
                struct snd_urb_ctx *in_ctx;
                struct snd_usb_packet_info *out_packet;

                in_ctx = urb->context;

                /* Count overall packet size */
                for (i = 0; i < in_ctx->packets; i++)
                        if (urb->iso_frame_desc[i].status == 0)
                                bytes += urb->iso_frame_desc[i].actual_length;

                /*
                 * skip empty packets. At least M-Audio's Fast Track Ultra stops
                 * streaming once it received a 0-byte OUT URB
                 */
                if (bytes == 0)
                        return;

                spin_lock_irqsave(&ep->lock, flags);
                if (ep->next_packet_queued >= ARRAY_SIZE(ep->next_packet)) {
                        spin_unlock_irqrestore(&ep->lock, flags);
                        usb_audio_err(ep->chip,
                                      "next package FIFO overflow EP 0x%x\n",
                                      ep->ep_num);
                        notify_xrun(ep);
                        return;
                }

                out_packet = next_packet_fifo_enqueue(ep);

                /*
                 * Iterate through the inbound packet and prepare the lengths
                 * for the output packet. The OUT packet we are about to send
                 * will have the same amount of payload bytes per stride as the
                 * IN packet we just received. Since the actual size is scaled
                 * by the stride, use the sender stride to calculate the length
                 * in case the number of channels differ between the implicitly
                 * fed-back endpoint and the synchronizing endpoint.
                 */

                out_packet->packets = in_ctx->packets;
                for (i = 0; i < in_ctx->packets; i++) {
                        if (urb->iso_frame_desc[i].status == 0)
                                out_packet->packet_size[i] =
                                        urb->iso_frame_desc[i].actual_length / sender->stride;
                        else
                                out_packet->packet_size[i] = 0;
                }

                spin_unlock_irqrestore(&ep->lock, flags);
                snd_usb_queue_pending_output_urbs(ep, false);

                return;
        }

        /*
         * process after playback sync complete
         *
         * Full speed devices report feedback values in 10.14 format as samples
         * per frame, high speed devices in 16.16 format as samples per
         * microframe.
         *
         * Because the Audio Class 1 spec was written before USB 2.0, many high
         * speed devices use a wrong interpretation, some others use an
         * entirely different format.
         *
         * Therefore, we cannot predict what format any particular device uses
         * and must detect it automatically.
         */

        if (urb->iso_frame_desc[0].status != 0 ||
            urb->iso_frame_desc[0].actual_length < 3)
                return;

        f = le32_to_cpup(urb->transfer_buffer);
        if (urb->iso_frame_desc[0].actual_length == 3)
                f &= 0x00ffffff;
        else
                f &= 0x0fffffff;

        if (f == 0)
                return;

        if (unlikely(sender->tenor_fb_quirk)) {
                /*
                 * Devices based on Tenor 8802 chipsets (TEAC UD-H01
                 * and others) sometimes change the feedback value
                 * by +/- 0x1.0000.
                 */
                if (f < ep->freqn - 0x8000)
                        f += 0xf000;
                else if (f > ep->freqn + 0x8000)
                        f -= 0xf000;
        } else if (unlikely(ep->freqshift == INT_MIN)) {
                /*
                 * The first time we see a feedback value, determine its format
                 * by shifting it left or right until it matches the nominal
                 * frequency value.  This assumes that the feedback does not
                 * differ from the nominal value more than +50% or -25%.
                 */
                shift = 0;
                while (f < ep->freqn - ep->freqn / 4) {
                        f <<= 1;
                        shift++;
                }
                while (f > ep->freqn + ep->freqn / 2) {
                        f >>= 1;
                        shift--;
                }
                ep->freqshift = shift;
        } else if (ep->freqshift >= 0)
                f <<= ep->freqshift;
        else
                f >>= -ep->freqshift;

        if (likely(f >= ep->freqn - ep->freqn / 8 && f <= ep->freqmax)) {
                /*
                 * If the frequency looks valid, set it.
                 * This value is referred to in prepare_playback_urb().
                 */
                spin_lock_irqsave(&ep->lock, flags);
                ep->freqm = f;
                spin_unlock_irqrestore(&ep->lock, flags);
        } else {
                /*
                 * Out of range; maybe the shift value is wrong.
                 * Reset it so that we autodetect again the next time.
                 */
                ep->freqshift = INT_MIN;
        }
}





















































































































    4 












































    4 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Internal header to deal with irq_desc->status which will be renamed
 * to irq_desc->settings.
 */
enum {
        _IRQ_DEFAULT_INIT_FLAGS        = IRQ_DEFAULT_INIT_FLAGS,
        _IRQ_PER_CPU                = IRQ_PER_CPU,
        _IRQ_LEVEL                = IRQ_LEVEL,
        _IRQ_NOPROBE                = IRQ_NOPROBE,
        _IRQ_NOREQUEST                = IRQ_NOREQUEST,
        _IRQ_NOTHREAD                = IRQ_NOTHREAD,
        _IRQ_NOAUTOEN                = IRQ_NOAUTOEN,
        _IRQ_MOVE_PCNTXT        = IRQ_MOVE_PCNTXT,
        _IRQ_NO_BALANCING        = IRQ_NO_BALANCING,
        _IRQ_NESTED_THREAD        = IRQ_NESTED_THREAD,
        _IRQ_PER_CPU_DEVID        = IRQ_PER_CPU_DEVID,
        _IRQ_IS_POLLED                = IRQ_IS_POLLED,
        _IRQ_DISABLE_UNLAZY        = IRQ_DISABLE_UNLAZY,
        _IRQ_HIDDEN                = IRQ_HIDDEN,
        _IRQ_NO_DEBUG                = IRQ_NO_DEBUG,
        _IRQF_MODIFY_MASK        = IRQF_MODIFY_MASK,
};

#define IRQ_PER_CPU                GOT_YOU_MORON
#define IRQ_NO_BALANCING        GOT_YOU_MORON
#define IRQ_LEVEL                GOT_YOU_MORON
#define IRQ_NOPROBE                GOT_YOU_MORON
#define IRQ_NOREQUEST                GOT_YOU_MORON
#define IRQ_NOTHREAD                GOT_YOU_MORON
#define IRQ_NOAUTOEN                GOT_YOU_MORON
#define IRQ_NESTED_THREAD        GOT_YOU_MORON
#define IRQ_PER_CPU_DEVID        GOT_YOU_MORON
#define IRQ_IS_POLLED                GOT_YOU_MORON
#define IRQ_DISABLE_UNLAZY        GOT_YOU_MORON
#define IRQ_HIDDEN                GOT_YOU_MORON
#define IRQ_NO_DEBUG                GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK        GOT_YOU_MORON

static inline void
irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
{
        desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK);
        desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
}

static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_PER_CPU;
}

static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_PER_CPU_DEVID;
}

static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_PER_CPU;
}

static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_NO_BALANCING;
}

static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_NO_BALANCING;
}

static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
{
        return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK;
}

static inline void
irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
{
        desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK;
        desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK;
}

static inline bool irq_settings_is_level(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_LEVEL;
}

static inline void irq_settings_clr_level(struct irq_desc *desc)
{
        desc->status_use_accessors &= ~_IRQ_LEVEL;
}

static inline void irq_settings_set_level(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_LEVEL;
}

static inline bool irq_settings_can_request(struct irq_desc *desc)
{
        return !(desc->status_use_accessors & _IRQ_NOREQUEST);
}

static inline void irq_settings_clr_norequest(struct irq_desc *desc)
{
        desc->status_use_accessors &= ~_IRQ_NOREQUEST;
}

static inline void irq_settings_set_norequest(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_NOREQUEST;
}

static inline bool irq_settings_can_thread(struct irq_desc *desc)
{
        return !(desc->status_use_accessors & _IRQ_NOTHREAD);
}

static inline void irq_settings_clr_nothread(struct irq_desc *desc)
{
        desc->status_use_accessors &= ~_IRQ_NOTHREAD;
}

static inline void irq_settings_set_nothread(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_NOTHREAD;
}

static inline bool irq_settings_can_probe(struct irq_desc *desc)
{
        return !(desc->status_use_accessors & _IRQ_NOPROBE);
}

static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
{
        desc->status_use_accessors &= ~_IRQ_NOPROBE;
}

static inline void irq_settings_set_noprobe(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_NOPROBE;
}

static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_MOVE_PCNTXT;
}

static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
{
        return !(desc->status_use_accessors & _IRQ_NOAUTOEN);
}

static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_NESTED_THREAD;
}

static inline bool irq_settings_is_polled(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_IS_POLLED;
}

static inline bool irq_settings_disable_unlazy(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY;
}

static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc)
{
        desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY;
}

static inline bool irq_settings_is_hidden(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_HIDDEN;
}

static inline void irq_settings_set_no_debug(struct irq_desc *desc)
{
        desc->status_use_accessors |= _IRQ_NO_DEBUG;
}

static inline bool irq_settings_no_debug(struct irq_desc *desc)
{
        return desc->status_use_accessors & _IRQ_NO_DEBUG;
}














































































































    4 















































    6 












































































































































































































































































































































































































































































































































































































    4 






































































































































    6 






    6 

    6 


    6 







































    4 

































    1 
    1 













































    1 


    1 







   14 
   14 
   13 

    1 


    1 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2005-2010 IBM Corporation
 *
 * Author:
 * Mimi Zohar <zohar@us.ibm.com>
 * Kylene Hall <kjhall@us.ibm.com>
 *
 * File: evm_main.c
 *        implements evm_inode_setxattr, evm_inode_post_setxattr,
 *        evm_inode_removexattr, evm_verifyxattr, and evm_inode_set_acl.
 */

#define pr_fmt(fmt) "EVM: "fmt

#include <linux/init.h>
#include <linux/audit.h>
#include <linux/xattr.h>
#include <linux/integrity.h>
#include <linux/evm.h>
#include <linux/magic.h>
#include <linux/posix_acl_xattr.h>
#include <linux/lsm_hooks.h>

#include <crypto/hash.h>
#include <crypto/hash_info.h>
#include <crypto/utils.h>
#include "evm.h"

int evm_initialized;

static const char * const integrity_status_msg[] = {
        "pass", "pass_immutable", "fail", "fail_immutable", "no_label",
        "no_xattrs", "unknown"
};
int evm_hmac_attrs;

static struct xattr_list evm_config_default_xattrnames[] = {
        {
         .name = XATTR_NAME_SELINUX,
         .enabled = IS_ENABLED(CONFIG_SECURITY_SELINUX)
        },
        {
         .name = XATTR_NAME_SMACK,
         .enabled = IS_ENABLED(CONFIG_SECURITY_SMACK)
        },
        {
         .name = XATTR_NAME_SMACKEXEC,
         .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS)
        },
        {
         .name = XATTR_NAME_SMACKTRANSMUTE,
         .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS)
        },
        {
         .name = XATTR_NAME_SMACKMMAP,
         .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS)
        },
        {
         .name = XATTR_NAME_APPARMOR,
         .enabled = IS_ENABLED(CONFIG_SECURITY_APPARMOR)
        },
        {
         .name = XATTR_NAME_IMA,
         .enabled = IS_ENABLED(CONFIG_IMA_APPRAISE)
        },
        {
         .name = XATTR_NAME_CAPS,
         .enabled = true
        },
};

LIST_HEAD(evm_config_xattrnames);

static int evm_fixmode __ro_after_init;
static int __init evm_set_fixmode(char *str)
{
        if (strncmp(str, "fix", 3) == 0)
                evm_fixmode = 1;
        else
                pr_err("invalid \"%s\" mode", str);

        return 1;
}
__setup("evm=", evm_set_fixmode);

static void __init evm_init_config(void)
{
        int i, xattrs;

        xattrs = ARRAY_SIZE(evm_config_default_xattrnames);

        pr_info("Initialising EVM extended attributes:\n");
        for (i = 0; i < xattrs; i++) {
                pr_info("%s%s\n", evm_config_default_xattrnames[i].name,
                        !evm_config_default_xattrnames[i].enabled ?
                        " (disabled)" : "");
                list_add_tail(&evm_config_default_xattrnames[i].list,
                              &evm_config_xattrnames);
        }

#ifdef CONFIG_EVM_ATTR_FSUUID
        evm_hmac_attrs |= EVM_ATTR_FSUUID;
#endif
        pr_info("HMAC attrs: 0x%x\n", evm_hmac_attrs);
}

static bool evm_key_loaded(void)
{
        return (bool)(evm_initialized & EVM_KEY_MASK);
}

/*
 * This function determines whether or not it is safe to ignore verification
 * errors, based on the ability of EVM to calculate HMACs. If the HMAC key
 * is not loaded, and it cannot be loaded in the future due to the
 * EVM_SETUP_COMPLETE initialization flag, allowing an operation despite the
 * attrs/xattrs being found invalid will not make them valid.
 */
static bool evm_hmac_disabled(void)
{
        if (evm_initialized & EVM_INIT_HMAC)
                return false;

        if (!(evm_initialized & EVM_SETUP_COMPLETE))
                return false;

        return true;
}

static int evm_find_protected_xattrs(struct dentry *dentry)
{
        struct inode *inode = d_backing_inode(dentry);
        struct xattr_list *xattr;
        int error;
        int count = 0;

        if (!(inode->i_opflags & IOP_XATTR))
                return -EOPNOTSUPP;

        list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) {
                error = __vfs_getxattr(dentry, inode, xattr->name, NULL, 0);
                if (error < 0) {
                        if (error == -ENODATA)
                                continue;
                        return error;
                }
                count++;
        }

        return count;
}

static int is_unsupported_fs(struct dentry *dentry)
{
        struct inode *inode = d_backing_inode(dentry);

        if (inode->i_sb->s_iflags & SB_I_EVM_UNSUPPORTED) {
                pr_info_once("%s not supported\n", inode->i_sb->s_type->name);
                return 1;
        }
        return 0;
}

/*
 * evm_verify_hmac - calculate and compare the HMAC with the EVM xattr
 *
 * Compute the HMAC on the dentry's protected set of extended attributes
 * and compare it against the stored security.evm xattr.
 *
 * For performance:
 * - use the previoulsy retrieved xattr value and length to calculate the
 *   HMAC.)
 * - cache the verification result in the iint, when available.
 *
 * Returns integrity status
 */
static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                                             const char *xattr_name,
                                             char *xattr_value,
                                             size_t xattr_value_len)
{
        struct evm_ima_xattr_data *xattr_data = NULL;
        struct signature_v2_hdr *hdr;
        enum integrity_status evm_status = INTEGRITY_PASS;
        struct evm_digest digest;
        struct inode *inode = d_backing_inode(dentry);
        struct evm_iint_cache *iint = evm_iint_inode(inode);
        int rc, xattr_len, evm_immutable = 0;

        if (iint && (iint->evm_status == INTEGRITY_PASS ||
                     iint->evm_status == INTEGRITY_PASS_IMMUTABLE))
                return iint->evm_status;

        if (is_unsupported_fs(dentry))
                return INTEGRITY_UNKNOWN;

        /* if status is not PASS, try to check again - against -ENOMEM */

        /* first need to know the sig type */
        rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_EVM,
                                (char **)&xattr_data, 0, GFP_NOFS);
        if (rc <= 0) {
                evm_status = INTEGRITY_FAIL;
                if (rc == -ENODATA) {
                        rc = evm_find_protected_xattrs(dentry);
                        if (rc > 0)
                                evm_status = INTEGRITY_NOLABEL;
                        else if (rc == 0)
                                evm_status = INTEGRITY_NOXATTRS; /* new file */
                } else if (rc == -EOPNOTSUPP) {
                        evm_status = INTEGRITY_UNKNOWN;
                }
                goto out;
        }

        xattr_len = rc;

        /* check value type */
        switch (xattr_data->type) {
        case EVM_XATTR_HMAC:
                if (xattr_len != sizeof(struct evm_xattr)) {
                        evm_status = INTEGRITY_FAIL;
                        goto out;
                }

                digest.hdr.algo = HASH_ALGO_SHA1;
                rc = evm_calc_hmac(dentry, xattr_name, xattr_value,
                                   xattr_value_len, &digest);
                if (rc)
                        break;
                rc = crypto_memneq(xattr_data->data, digest.digest,
                                   SHA1_DIGEST_SIZE);
                if (rc)
                        rc = -EINVAL;
                break;
        case EVM_XATTR_PORTABLE_DIGSIG:
                evm_immutable = 1;
                fallthrough;
        case EVM_IMA_XATTR_DIGSIG:
                /* accept xattr with non-empty signature field */
                if (xattr_len <= sizeof(struct signature_v2_hdr)) {
                        evm_status = INTEGRITY_FAIL;
                        goto out;
                }

                hdr = (struct signature_v2_hdr *)xattr_data;
                digest.hdr.algo = hdr->hash_algo;
                rc = evm_calc_hash(dentry, xattr_name, xattr_value,
                                   xattr_value_len, xattr_data->type, &digest);
                if (rc)
                        break;
                rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM,
                                        (const char *)xattr_data, xattr_len,
                                        digest.digest, digest.hdr.length);
                if (!rc) {
                        if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) {
                                if (iint)
                                        iint->flags |= EVM_IMMUTABLE_DIGSIG;
                                evm_status = INTEGRITY_PASS_IMMUTABLE;
                        } else if (!IS_RDONLY(inode) &&
                                   !(inode->i_sb->s_readonly_remount) &&
                                   !IS_IMMUTABLE(inode)) {
                                evm_update_evmxattr(dentry, xattr_name,
                                                    xattr_value,
                                                    xattr_value_len);
                        }
                }
                break;
        default:
                rc = -EINVAL;
                break;
        }

        if (rc) {
                if (rc == -ENODATA)
                        evm_status = INTEGRITY_NOXATTRS;
                else if (evm_immutable)
                        evm_status = INTEGRITY_FAIL_IMMUTABLE;
                else
                        evm_status = INTEGRITY_FAIL;
        }
        pr_debug("digest: (%d) [%*phN]\n", digest.hdr.length, digest.hdr.length,
                  digest.digest);
out:
        if (iint)
                iint->evm_status = evm_status;
        kfree(xattr_data);
        return evm_status;
}

static int evm_protected_xattr_common(const char *req_xattr_name,
                                      bool all_xattrs)
{
        int namelen;
        int found = 0;
        struct xattr_list *xattr;

        namelen = strlen(req_xattr_name);
        list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) {
                if (!all_xattrs && !xattr->enabled)
                        continue;

                if ((strlen(xattr->name) == namelen)
                    && (strncmp(req_xattr_name, xattr->name, namelen) == 0)) {
                        found = 1;
                        break;
                }
                if (strncmp(req_xattr_name,
                            xattr->name + XATTR_SECURITY_PREFIX_LEN,
                            strlen(req_xattr_name)) == 0) {
                        found = 1;
                        break;
                }
        }

        return found;
}

int evm_protected_xattr(const char *req_xattr_name)
{
        return evm_protected_xattr_common(req_xattr_name, false);
}

int evm_protected_xattr_if_enabled(const char *req_xattr_name)
{
        return evm_protected_xattr_common(req_xattr_name, true);
}

/**
 * evm_read_protected_xattrs - read EVM protected xattr names, lengths, values
 * @dentry: dentry of the read xattrs
 * @buffer: buffer xattr names, lengths or values are copied to
 * @buffer_size: size of buffer
 * @type: n: names, l: lengths, v: values
 * @canonical_fmt: data format (true: little endian, false: native format)
 *
 * Read protected xattr names (separated by |), lengths (u32) or values for a
 * given dentry and return the total size of copied data. If buffer is NULL,
 * just return the total size.
 *
 * Returns the total size on success, a negative value on error.
 */
int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
                              int buffer_size, char type, bool canonical_fmt)
{
        struct xattr_list *xattr;
        int rc, size, total_size = 0;

        list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) {
                rc = __vfs_getxattr(dentry, d_backing_inode(dentry),
                                    xattr->name, NULL, 0);
                if (rc < 0 && rc == -ENODATA)
                        continue;
                else if (rc < 0)
                        return rc;

                switch (type) {
                case 'n':
                        size = strlen(xattr->name) + 1;
                        if (buffer) {
                                if (total_size)
                                        *(buffer + total_size - 1) = '|';

                                memcpy(buffer + total_size, xattr->name, size);
                        }
                        break;
                case 'l':
                        size = sizeof(u32);
                        if (buffer) {
                                if (canonical_fmt)
                                        rc = (__force int)cpu_to_le32(rc);

                                *(u32 *)(buffer + total_size) = rc;
                        }
                        break;
                case 'v':
                        size = rc;
                        if (buffer) {
                                rc = __vfs_getxattr(dentry,
                                        d_backing_inode(dentry), xattr->name,
                                        buffer + total_size,
                                        buffer_size - total_size);
                                if (rc < 0)
                                        return rc;
                        }
                        break;
                default:
                        return -EINVAL;
                }

                total_size += size;
        }

        return total_size;
}

/**
 * evm_verifyxattr - verify the integrity of the requested xattr
 * @dentry: object of the verify xattr
 * @xattr_name: requested xattr
 * @xattr_value: requested xattr value
 * @xattr_value_len: requested xattr value length
 *
 * Calculate the HMAC for the given dentry and verify it against the stored
 * security.evm xattr. For performance, use the xattr value and length
 * previously retrieved to calculate the HMAC.
 *
 * Returns the xattr integrity status.
 *
 * This function requires the caller to lock the inode's i_mutex before it
 * is executed.
 */
enum integrity_status evm_verifyxattr(struct dentry *dentry,
                                      const char *xattr_name,
                                      void *xattr_value, size_t xattr_value_len)
{
        if (!evm_key_loaded() || !evm_protected_xattr(xattr_name))
                return INTEGRITY_UNKNOWN;

        if (is_unsupported_fs(dentry))
                return INTEGRITY_UNKNOWN;

        return evm_verify_hmac(dentry, xattr_name, xattr_value,
                                 xattr_value_len);
}
EXPORT_SYMBOL_GPL(evm_verifyxattr);

/*
 * evm_verify_current_integrity - verify the dentry's metadata integrity
 * @dentry: pointer to the affected dentry
 *
 * Verify and return the dentry's metadata integrity. The exceptions are
 * before EVM is initialized or in 'fix' mode.
 */
static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
{
        struct inode *inode = d_backing_inode(dentry);

        if (!evm_key_loaded() || !S_ISREG(inode->i_mode) || evm_fixmode)
                return INTEGRITY_PASS;
        return evm_verify_hmac(dentry, NULL, NULL, 0);
}

/*
 * evm_xattr_change - check if passed xattr value differs from current value
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @xattr_name: requested xattr
 * @xattr_value: requested xattr value
 * @xattr_value_len: requested xattr value length
 *
 * Check if passed xattr value differs from current value.
 *
 * Returns 1 if passed xattr value differs from current value, 0 otherwise.
 */
static int evm_xattr_change(struct mnt_idmap *idmap,
                            struct dentry *dentry, const char *xattr_name,
                            const void *xattr_value, size_t xattr_value_len)
{
        char *xattr_data = NULL;
        int rc = 0;

        rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, xattr_name, &xattr_data,
                                0, GFP_NOFS);
        if (rc < 0) {
                rc = 1;
                goto out;
        }

        if (rc == xattr_value_len)
                rc = !!memcmp(xattr_value, xattr_data, rc);
        else
                rc = 1;

out:
        kfree(xattr_data);
        return rc;
}

/*
 * evm_protect_xattr - protect the EVM extended attribute
 *
 * Prevent security.evm from being modified or removed without the
 * necessary permissions or when the existing value is invalid.
 *
 * The posix xattr acls are 'system' prefixed, which normally would not
 * affect security.evm.  An interesting side affect of writing posix xattr
 * acls is their modifying of the i_mode, which is included in security.evm.
 * For posix xattr acls only, permit security.evm, even if it currently
 * doesn't exist, to be updated unless the EVM signature is immutable.
 */
static int evm_protect_xattr(struct mnt_idmap *idmap,
                             struct dentry *dentry, const char *xattr_name,
                             const void *xattr_value, size_t xattr_value_len)
{
        enum integrity_status evm_status;

        if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) {
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (is_unsupported_fs(dentry))
                        return -EPERM;
        } else if (!evm_protected_xattr(xattr_name)) {
                if (!posix_xattr_acl(xattr_name))
                        return 0;
                if (is_unsupported_fs(dentry))
                        return 0;

                evm_status = evm_verify_current_integrity(dentry);
                if ((evm_status == INTEGRITY_PASS) ||
                    (evm_status == INTEGRITY_NOXATTRS))
                        return 0;
                goto out;
        } else if (is_unsupported_fs(dentry))
                return 0;

        evm_status = evm_verify_current_integrity(dentry);
        if (evm_status == INTEGRITY_NOXATTRS) {
                struct evm_iint_cache *iint;

                /* Exception if the HMAC is not going to be calculated. */
                if (evm_hmac_disabled())
                        return 0;

                iint = evm_iint_inode(d_backing_inode(dentry));
                if (iint && (iint->flags & EVM_NEW_FILE))
                        return 0;

                /* exception for pseudo filesystems */
                if (dentry->d_sb->s_magic == TMPFS_MAGIC
                    || dentry->d_sb->s_magic == SYSFS_MAGIC)
                        return 0;

                integrity_audit_msg(AUDIT_INTEGRITY_METADATA,
                                    dentry->d_inode, dentry->d_name.name,
                                    "update_metadata",
                                    integrity_status_msg[evm_status],
                                    -EPERM, 0);
        }
out:
        /* Exception if the HMAC is not going to be calculated. */
        if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
            evm_status == INTEGRITY_UNKNOWN))
                return 0;

        /*
         * Writing other xattrs is safe for portable signatures, as portable
         * signatures are immutable and can never be updated.
         */
        if (evm_status == INTEGRITY_FAIL_IMMUTABLE)
                return 0;

        if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
            !evm_xattr_change(idmap, dentry, xattr_name, xattr_value,
                              xattr_value_len))
                return 0;

        if (evm_status != INTEGRITY_PASS &&
            evm_status != INTEGRITY_PASS_IMMUTABLE)
                integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                                    dentry->d_name.name, "appraise_metadata",
                                    integrity_status_msg[evm_status],
                                    -EPERM, 0);
        return evm_status == INTEGRITY_PASS ? 0 : -EPERM;
}

/**
 * evm_inode_setxattr - protect the EVM extended attribute
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @xattr_name: pointer to the affected extended attribute name
 * @xattr_value: pointer to the new extended attribute value
 * @xattr_value_len: pointer to the new extended attribute value length
 * @flags: flags to pass into filesystem operations
 *
 * Before allowing the 'security.evm' protected xattr to be updated,
 * verify the existing value is valid.  As only the kernel should have
 * access to the EVM encrypted key needed to calculate the HMAC, prevent
 * userspace from writing HMAC value.  Writing 'security.evm' requires
 * requires CAP_SYS_ADMIN privileges.
 */
static int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
                              const char *xattr_name, const void *xattr_value,
                              size_t xattr_value_len, int flags)
{
        const struct evm_ima_xattr_data *xattr_data = xattr_value;

        /* Policy permits modification of the protected xattrs even though
         * there's no HMAC key loaded
         */
        if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
                return 0;

        if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) {
                if (!xattr_value_len)
                        return -EINVAL;
                if (xattr_data->type != EVM_IMA_XATTR_DIGSIG &&
                    xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG)
                        return -EPERM;
        }
        return evm_protect_xattr(idmap, dentry, xattr_name, xattr_value,
                                 xattr_value_len);
}

/**
 * evm_inode_removexattr - protect the EVM extended attribute
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @xattr_name: pointer to the affected extended attribute name
 *
 * Removing 'security.evm' requires CAP_SYS_ADMIN privileges and that
 * the current value is valid.
 */
static int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
                                 const char *xattr_name)
{
        /* Policy permits modification of the protected xattrs even though
         * there's no HMAC key loaded
         */
        if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
                return 0;

        return evm_protect_xattr(idmap, dentry, xattr_name, NULL, 0);
}

#ifdef CONFIG_FS_POSIX_ACL
static int evm_inode_set_acl_change(struct mnt_idmap *idmap,
                                    struct dentry *dentry, const char *name,
                                    struct posix_acl *kacl)
{
        int rc;

        umode_t mode;
        struct inode *inode = d_backing_inode(dentry);

        if (!kacl)
                return 1;

        rc = posix_acl_update_mode(idmap, inode, &mode, &kacl);
        if (rc || (inode->i_mode != mode))
                return 1;

        return 0;
}
#else
static inline int evm_inode_set_acl_change(struct mnt_idmap *idmap,
                                           struct dentry *dentry,
                                           const char *name,
                                           struct posix_acl *kacl)
{
        return 0;
}
#endif

/**
 * evm_inode_set_acl - protect the EVM extended attribute from posix acls
 * @idmap: idmap of the idmapped mount
 * @dentry: pointer to the affected dentry
 * @acl_name: name of the posix acl
 * @kacl: pointer to the posix acls
 *
 * Prevent modifying posix acls causing the EVM HMAC to be re-calculated
 * and 'security.evm' xattr updated, unless the existing 'security.evm' is
 * valid.
 *
 * Return: zero on success, -EPERM on failure.
 */
static int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                             const char *acl_name, struct posix_acl *kacl)
{
        enum integrity_status evm_status;

        /* Policy permits modification of the protected xattrs even though
         * there's no HMAC key loaded
         */
        if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
                return 0;

        evm_status = evm_verify_current_integrity(dentry);
        if ((evm_status == INTEGRITY_PASS) ||
            (evm_status == INTEGRITY_NOXATTRS))
                return 0;

        /* Exception if the HMAC is not going to be calculated. */
        if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
            evm_status == INTEGRITY_UNKNOWN))
                return 0;

        /*
         * Writing other xattrs is safe for portable signatures, as portable
         * signatures are immutable and can never be updated.
         */
        if (evm_status == INTEGRITY_FAIL_IMMUTABLE)
                return 0;

        if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
            !evm_inode_set_acl_change(idmap, dentry, acl_name, kacl))
                return 0;

        if (evm_status != INTEGRITY_PASS_IMMUTABLE)
                integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                                    dentry->d_name.name, "appraise_metadata",
                                    integrity_status_msg[evm_status],
                                    -EPERM, 0);
        return -EPERM;
}

/**
 * evm_inode_remove_acl - Protect the EVM extended attribute from posix acls
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @acl_name: name of the posix acl
 *
 * Prevent removing posix acls causing the EVM HMAC to be re-calculated
 * and 'security.evm' xattr updated, unless the existing 'security.evm' is
 * valid.
 *
 * Return: zero on success, -EPERM on failure.
 */
static int evm_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                                const char *acl_name)
{
        return evm_inode_set_acl(idmap, dentry, acl_name, NULL);
}

static void evm_reset_status(struct inode *inode)
{
        struct evm_iint_cache *iint;

        iint = evm_iint_inode(inode);
        if (iint)
                iint->evm_status = INTEGRITY_UNKNOWN;
}

/**
 * evm_revalidate_status - report whether EVM status re-validation is necessary
 * @xattr_name: pointer to the affected extended attribute name
 *
 * Report whether callers of evm_verifyxattr() should re-validate the
 * EVM status.
 *
 * Return true if re-validation is necessary, false otherwise.
 */
bool evm_revalidate_status(const char *xattr_name)
{
        if (!evm_key_loaded())
                return false;

        /* evm_inode_post_setattr() passes NULL */
        if (!xattr_name)
                return true;

        if (!evm_protected_xattr(xattr_name) && !posix_xattr_acl(xattr_name) &&
            strcmp(xattr_name, XATTR_NAME_EVM))
                return false;

        return true;
}

/**
 * evm_inode_post_setxattr - update 'security.evm' to reflect the changes
 * @dentry: pointer to the affected dentry
 * @xattr_name: pointer to the affected extended attribute name
 * @xattr_value: pointer to the new extended attribute value
 * @xattr_value_len: pointer to the new extended attribute value length
 * @flags: flags to pass into filesystem operations
 *
 * Update the HMAC stored in 'security.evm' to reflect the change.
 *
 * No need to take the i_mutex lock here, as this function is called from
 * __vfs_setxattr_noperm().  The caller of which has taken the inode's
 * i_mutex lock.
 */
static void evm_inode_post_setxattr(struct dentry *dentry,
                                    const char *xattr_name,
                                    const void *xattr_value,
                                    size_t xattr_value_len,
                                    int flags)
{
        if (!evm_revalidate_status(xattr_name))
                return;

        evm_reset_status(dentry->d_inode);

        if (!strcmp(xattr_name, XATTR_NAME_EVM))
                return;

        if (!(evm_initialized & EVM_INIT_HMAC))
                return;

        if (is_unsupported_fs(dentry))
                return;

        evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len);
}

/**
 * evm_inode_post_set_acl - Update the EVM extended attribute from posix acls
 * @dentry: pointer to the affected dentry
 * @acl_name: name of the posix acl
 * @kacl: pointer to the posix acls
 *
 * Update the 'security.evm' xattr with the EVM HMAC re-calculated after setting
 * posix acls.
 */
static void evm_inode_post_set_acl(struct dentry *dentry, const char *acl_name,
                                   struct posix_acl *kacl)
{
        return evm_inode_post_setxattr(dentry, acl_name, NULL, 0, 0);
}

/**
 * evm_inode_post_removexattr - update 'security.evm' after removing the xattr
 * @dentry: pointer to the affected dentry
 * @xattr_name: pointer to the affected extended attribute name
 *
 * Update the HMAC stored in 'security.evm' to reflect removal of the xattr.
 *
 * No need to take the i_mutex lock here, as this function is called from
 * vfs_removexattr() which takes the i_mutex.
 */
static void evm_inode_post_removexattr(struct dentry *dentry,
                                       const char *xattr_name)
{
        if (!evm_revalidate_status(xattr_name))
                return;

        evm_reset_status(dentry->d_inode);

        if (!strcmp(xattr_name, XATTR_NAME_EVM))
                return;

        if (!(evm_initialized & EVM_INIT_HMAC))
                return;

        evm_update_evmxattr(dentry, xattr_name, NULL, 0);
}

/**
 * evm_inode_post_remove_acl - Update the EVM extended attribute from posix acls
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @acl_name: name of the posix acl
 *
 * Update the 'security.evm' xattr with the EVM HMAC re-calculated after
 * removing posix acls.
 */
static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap,
                                             struct dentry *dentry,
                                             const char *acl_name)
{
        evm_inode_post_removexattr(dentry, acl_name);
}

static int evm_attr_change(struct mnt_idmap *idmap,
                           struct dentry *dentry, struct iattr *attr)
{
        struct inode *inode = d_backing_inode(dentry);
        unsigned int ia_valid = attr->ia_valid;

        if (!i_uid_needs_update(idmap, attr, inode) &&
            !i_gid_needs_update(idmap, attr, inode) &&
            (!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode))
                return 0;

        return 1;
}

/**
 * evm_inode_setattr - prevent updating an invalid EVM extended attribute
 * @idmap: idmap of the mount
 * @dentry: pointer to the affected dentry
 * @attr: iattr structure containing the new file attributes
 *
 * Permit update of file attributes when files have a valid EVM signature,
 * except in the case of them having an immutable portable signature.
 */
static int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                             struct iattr *attr)
{
        unsigned int ia_valid = attr->ia_valid;
        enum integrity_status evm_status;

        /* Policy permits modification of the protected attrs even though
         * there's no HMAC key loaded
         */
        if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
                return 0;

        if (is_unsupported_fs(dentry))
                return 0;

        if (!(ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
                return 0;

        evm_status = evm_verify_current_integrity(dentry);
        /*
         * Writing attrs is safe for portable signatures, as portable signatures
         * are immutable and can never be updated.
         */
        if ((evm_status == INTEGRITY_PASS) ||
            (evm_status == INTEGRITY_NOXATTRS) ||
            (evm_status == INTEGRITY_FAIL_IMMUTABLE) ||
            (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
             evm_status == INTEGRITY_UNKNOWN)))
                return 0;

        if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
            !evm_attr_change(idmap, dentry, attr))
                return 0;

        integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                            dentry->d_name.name, "appraise_metadata",
                            integrity_status_msg[evm_status], -EPERM, 0);
        return -EPERM;
}

/**
 * evm_inode_post_setattr - update 'security.evm' after modifying metadata
 * @idmap: idmap of the idmapped mount
 * @dentry: pointer to the affected dentry
 * @ia_valid: for the UID and GID status
 *
 * For now, update the HMAC stored in 'security.evm' to reflect UID/GID
 * changes.
 *
 * This function is called from notify_change(), which expects the caller
 * to lock the inode's i_mutex.
 */
static void evm_inode_post_setattr(struct mnt_idmap *idmap,
                                   struct dentry *dentry, int ia_valid)
{
        if (!evm_revalidate_status(NULL))
                return;

        evm_reset_status(dentry->d_inode);

        if (!(evm_initialized & EVM_INIT_HMAC))
                return;

        if (is_unsupported_fs(dentry))
                return;

        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
                evm_update_evmxattr(dentry, NULL, NULL, 0);
}

static int evm_inode_copy_up_xattr(const char *name)
{
        if (strcmp(name, XATTR_NAME_EVM) == 0)
                return 1; /* Discard */
        return -EOPNOTSUPP;
}

/*
 * evm_inode_init_security - initializes security.evm HMAC value
 */
int evm_inode_init_security(struct inode *inode, struct inode *dir,
                            const struct qstr *qstr, struct xattr *xattrs,
                            int *xattr_count)
{
        struct evm_xattr *xattr_data;
        struct xattr *xattr, *evm_xattr;
        bool evm_protected_xattrs = false;
        int rc;

        if (!(evm_initialized & EVM_INIT_HMAC) || !xattrs)
                return 0;

        /*
         * security_inode_init_security() makes sure that the xattrs array is
         * contiguous, there is enough space for security.evm, and that there is
         * a terminator at the end of the array.
         */
        for (xattr = xattrs; xattr->name; xattr++) {
                if (evm_protected_xattr(xattr->name))
                        evm_protected_xattrs = true;
        }

        /* EVM xattr not needed. */
        if (!evm_protected_xattrs)
                return 0;

        evm_xattr = lsm_get_xattr_slot(xattrs, xattr_count);
        /*
         * Array terminator (xattr name = NULL) must be the first non-filled
         * xattr slot.
         */
        WARN_ONCE(evm_xattr != xattr,
                  "%s: xattrs terminator is not the first non-filled slot\n",
                  __func__);

        xattr_data = kzalloc(sizeof(*xattr_data), GFP_NOFS);
        if (!xattr_data)
                return -ENOMEM;

        xattr_data->data.type = EVM_XATTR_HMAC;
        rc = evm_init_hmac(inode, xattrs, xattr_data->digest);
        if (rc < 0)
                goto out;

        evm_xattr->value = xattr_data;
        evm_xattr->value_len = sizeof(*xattr_data);
        evm_xattr->name = XATTR_EVM_SUFFIX;
        return 0;
out:
        kfree(xattr_data);
        return rc;
}
EXPORT_SYMBOL_GPL(evm_inode_init_security);

static int evm_inode_alloc_security(struct inode *inode)
{
        struct evm_iint_cache *iint = evm_iint_inode(inode);

        /* Called by security_inode_alloc(), it cannot be NULL. */
        iint->flags = 0UL;
        iint->evm_status = INTEGRITY_UNKNOWN;

        return 0;
}

static void evm_file_release(struct file *file)
{
        struct inode *inode = file_inode(file);
        struct evm_iint_cache *iint = evm_iint_inode(inode);
        fmode_t mode = file->f_mode;

        if (!S_ISREG(inode->i_mode) || !(mode & FMODE_WRITE))
                return;

        if (iint && atomic_read(&inode->i_writecount) == 1)
                iint->flags &= ~EVM_NEW_FILE;
}

static void evm_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
        struct inode *inode = d_backing_inode(dentry);
        struct evm_iint_cache *iint = evm_iint_inode(inode);

        if (!S_ISREG(inode->i_mode))
                return;

        if (iint)
                iint->flags |= EVM_NEW_FILE;
}

#ifdef CONFIG_EVM_LOAD_X509
void __init evm_load_x509(void)
{
        int rc;

        rc = integrity_load_x509(INTEGRITY_KEYRING_EVM, CONFIG_EVM_X509_PATH);
        if (!rc)
                evm_initialized |= EVM_INIT_X509;
}
#endif

static int __init init_evm(void)
{
        int error;
        struct list_head *pos, *q;

        evm_init_config();

        error = integrity_init_keyring(INTEGRITY_KEYRING_EVM);
        if (error)
                goto error;

        error = evm_init_secfs();
        if (error < 0) {
                pr_info("Error registering secfs\n");
                goto error;
        }

error:
        if (error != 0) {
                if (!list_empty(&evm_config_xattrnames)) {
                        list_for_each_safe(pos, q, &evm_config_xattrnames)
                                list_del(pos);
                }
        }

        return error;
}

static struct security_hook_list evm_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(inode_setattr, evm_inode_setattr),
        LSM_HOOK_INIT(inode_post_setattr, evm_inode_post_setattr),
        LSM_HOOK_INIT(inode_copy_up_xattr, evm_inode_copy_up_xattr),
        LSM_HOOK_INIT(inode_setxattr, evm_inode_setxattr),
        LSM_HOOK_INIT(inode_post_setxattr, evm_inode_post_setxattr),
        LSM_HOOK_INIT(inode_set_acl, evm_inode_set_acl),
        LSM_HOOK_INIT(inode_post_set_acl, evm_inode_post_set_acl),
        LSM_HOOK_INIT(inode_remove_acl, evm_inode_remove_acl),
        LSM_HOOK_INIT(inode_post_remove_acl, evm_inode_post_remove_acl),
        LSM_HOOK_INIT(inode_removexattr, evm_inode_removexattr),
        LSM_HOOK_INIT(inode_post_removexattr, evm_inode_post_removexattr),
        LSM_HOOK_INIT(inode_init_security, evm_inode_init_security),
        LSM_HOOK_INIT(inode_alloc_security, evm_inode_alloc_security),
        LSM_HOOK_INIT(file_release, evm_file_release),
        LSM_HOOK_INIT(path_post_mknod, evm_post_path_mknod),
};

static const struct lsm_id evm_lsmid = {
        .name = "evm",
        .id = LSM_ID_EVM,
};

static int __init init_evm_lsm(void)
{
        security_add_hooks(evm_hooks, ARRAY_SIZE(evm_hooks), &evm_lsmid);
        return 0;
}

struct lsm_blob_sizes evm_blob_sizes __ro_after_init = {
        .lbs_inode = sizeof(struct evm_iint_cache),
        .lbs_xattr_count = 1,
};

DEFINE_LSM(evm) = {
        .name = "evm",
        .init = init_evm_lsm,
        .order = LSM_ORDER_LAST,
        .blobs = &evm_blob_sizes,
};

late_initcall(init_evm);
































































































































































































    4 
    4 




    4 


















































    4 




    4 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
// SPDX-License-Identifier: GPL-2.0-only
/*
 *        scsi_pm.c        Copyright (C) 2010 Alan Stern
 *
 *        SCSI dynamic Power Management
 *                Initial version: Alan Stern <stern@rowland.harvard.edu>
 */

#include <linux/pm_runtime.h>
#include <linux/export.h>
#include <linux/blk-pm.h>

#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_host.h>

#include "scsi_priv.h"

#ifdef CONFIG_PM_SLEEP

static int do_scsi_suspend(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->suspend ? pm->suspend(dev) : 0;
}

static int do_scsi_freeze(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->freeze ? pm->freeze(dev) : 0;
}

static int do_scsi_poweroff(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->poweroff ? pm->poweroff(dev) : 0;
}

static int do_scsi_resume(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->resume ? pm->resume(dev) : 0;
}

static int do_scsi_thaw(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->thaw ? pm->thaw(dev) : 0;
}

static int do_scsi_restore(struct device *dev, const struct dev_pm_ops *pm)
{
        return pm && pm->restore ? pm->restore(dev) : 0;
}

static int scsi_dev_type_suspend(struct device *dev,
                int (*cb)(struct device *, const struct dev_pm_ops *))
{
        const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
        int err;

        err = scsi_device_quiesce(to_scsi_device(dev));
        if (err == 0) {
                err = cb(dev, pm);
                if (err)
                        scsi_device_resume(to_scsi_device(dev));
        }
        dev_dbg(dev, "scsi suspend: %d\n", err);
        return err;
}

static int
scsi_bus_suspend_common(struct device *dev,
                int (*cb)(struct device *, const struct dev_pm_ops *))
{
        if (!scsi_is_sdev_device(dev))
                return 0;

        return scsi_dev_type_suspend(dev, cb);
}

static int scsi_bus_resume_common(struct device *dev,
                int (*cb)(struct device *, const struct dev_pm_ops *))
{
        const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
        int err;

        if (!scsi_is_sdev_device(dev))
                return 0;

        err = cb(dev, pm);
        scsi_device_resume(to_scsi_device(dev));
        dev_dbg(dev, "scsi resume: %d\n", err);

        return err;
}

static int scsi_bus_prepare(struct device *dev)
{
        if (scsi_is_host_device(dev)) {
                /* Wait until async scanning is finished */
                scsi_complete_async_scans();
        }
        return 0;
}

static int scsi_bus_suspend(struct device *dev)
{
        return scsi_bus_suspend_common(dev, do_scsi_suspend);
}

static int scsi_bus_resume(struct device *dev)
{
        return scsi_bus_resume_common(dev, do_scsi_resume);
}

static int scsi_bus_freeze(struct device *dev)
{
        return scsi_bus_suspend_common(dev, do_scsi_freeze);
}

static int scsi_bus_thaw(struct device *dev)
{
        return scsi_bus_resume_common(dev, do_scsi_thaw);
}

static int scsi_bus_poweroff(struct device *dev)
{
        return scsi_bus_suspend_common(dev, do_scsi_poweroff);
}

static int scsi_bus_restore(struct device *dev)
{
        return scsi_bus_resume_common(dev, do_scsi_restore);
}

#else /* CONFIG_PM_SLEEP */

#define scsi_bus_prepare                NULL
#define scsi_bus_suspend                NULL
#define scsi_bus_resume                        NULL
#define scsi_bus_freeze                        NULL
#define scsi_bus_thaw                        NULL
#define scsi_bus_poweroff                NULL
#define scsi_bus_restore                NULL

#endif /* CONFIG_PM_SLEEP */

static int sdev_runtime_suspend(struct device *dev)
{
        const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
        struct scsi_device *sdev = to_scsi_device(dev);
        int err = 0;

        err = blk_pre_runtime_suspend(sdev->request_queue);
        if (err)
                return err;
        if (pm && pm->runtime_suspend)
                err = pm->runtime_suspend(dev);
        blk_post_runtime_suspend(sdev->request_queue, err);

        return err;
}

static int scsi_runtime_suspend(struct device *dev)
{
        int err = 0;

        dev_dbg(dev, "scsi_runtime_suspend\n");
        if (scsi_is_sdev_device(dev))
                err = sdev_runtime_suspend(dev);

        /* Insert hooks here for targets, hosts, and transport classes */

        return err;
}

static int sdev_runtime_resume(struct device *dev)
{
        struct scsi_device *sdev = to_scsi_device(dev);
        const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
        int err = 0;

        blk_pre_runtime_resume(sdev->request_queue);
        if (pm && pm->runtime_resume)
                err = pm->runtime_resume(dev);
        blk_post_runtime_resume(sdev->request_queue);

        return err;
}

static int scsi_runtime_resume(struct device *dev)
{
        int err = 0;

        dev_dbg(dev, "scsi_runtime_resume\n");
        if (scsi_is_sdev_device(dev))
                err = sdev_runtime_resume(dev);

        /* Insert hooks here for targets, hosts, and transport classes */

        return err;
}

static int scsi_runtime_idle(struct device *dev)
{
        dev_dbg(dev, "scsi_runtime_idle\n");

        /* Insert hooks here for targets, hosts, and transport classes */

        if (scsi_is_sdev_device(dev)) {
                pm_runtime_mark_last_busy(dev);
                pm_runtime_autosuspend(dev);
                return -EBUSY;
        }

        return 0;
}

int scsi_autopm_get_device(struct scsi_device *sdev)
{
        int        err;

        err = pm_runtime_get_sync(&sdev->sdev_gendev);
        if (err < 0 && err !=-EACCES)
                pm_runtime_put_sync(&sdev->sdev_gendev);
        else
                err = 0;
        return err;
}
EXPORT_SYMBOL_GPL(scsi_autopm_get_device);

void scsi_autopm_put_device(struct scsi_device *sdev)
{
        pm_runtime_put_sync(&sdev->sdev_gendev);
}
EXPORT_SYMBOL_GPL(scsi_autopm_put_device);

void scsi_autopm_get_target(struct scsi_target *starget)
{
        pm_runtime_get_sync(&starget->dev);
}

void scsi_autopm_put_target(struct scsi_target *starget)
{
        pm_runtime_put_sync(&starget->dev);
}

int scsi_autopm_get_host(struct Scsi_Host *shost)
{
        int        err;

        err = pm_runtime_get_sync(&shost->shost_gendev);
        if (err < 0 && err !=-EACCES)
                pm_runtime_put_sync(&shost->shost_gendev);
        else
                err = 0;
        return err;
}

void scsi_autopm_put_host(struct Scsi_Host *shost)
{
        pm_runtime_put_sync(&shost->shost_gendev);
}

const struct dev_pm_ops scsi_bus_pm_ops = {
        .prepare =                scsi_bus_prepare,
        .suspend =                scsi_bus_suspend,
        .resume =                scsi_bus_resume,
        .freeze =                scsi_bus_freeze,
        .thaw =                        scsi_bus_thaw,
        .poweroff =                scsi_bus_poweroff,
        .restore =                scsi_bus_restore,
        .runtime_suspend =        scsi_runtime_suspend,
        .runtime_resume =        scsi_runtime_resume,
        .runtime_idle =                scsi_runtime_idle,
};


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   10 

   10 

    1 


   10 






   10 














































































































































































































































































































































































































































































































































    1 












































    1 


    1 












































































































   14 

   14 

   14 
    1 
   14 

   14 

   14 


























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   (Tentative) USB Audio Driver for ALSA
 *
 *   Mixer control part
 *
 *   Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de>
 *
 *   Many codes borrowed from audio.c by
 *            Alan Cox (alan@lxorguk.ukuu.org.uk)
 *            Thomas Sailer (sailer@ife.ee.ethz.ch)
 */

/*
 * TODOs, for both the mixer and the streaming interfaces:
 *
 *  - support for UAC2 effect units
 *  - support for graphical equalizers
 *  - RANGE and MEM set commands (UAC2)
 *  - RANGE and MEM interrupt dispatchers (UAC2)
 *  - audio channel clustering (UAC2)
 *  - audio sample rate converter units (UAC2)
 *  - proper handling of clock multipliers (UAC2)
 *  - dispatch clock change notifications (UAC2)
 *          - stop PCM streams which use a clock that became invalid
 *          - stop PCM streams which use a clock selector that has changed
 *          - parse available sample rates again when clock sources changed
 */

#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/usb.h>
#include <linux/usb/audio.h>
#include <linux/usb/audio-v2.h>
#include <linux/usb/audio-v3.h>

#include <sound/core.h>
#include <sound/control.h>
#include <sound/hwdep.h>
#include <sound/info.h>
#include <sound/tlv.h>

#include "usbaudio.h"
#include "mixer.h"
#include "helper.h"
#include "mixer_quirks.h"
#include "power.h"

#define MAX_ID_ELEMS        256

struct usb_audio_term {
        int id;
        int type;
        int channels;
        unsigned int chconfig;
        int name;
};

struct usbmix_name_map;

struct mixer_build {
        struct snd_usb_audio *chip;
        struct usb_mixer_interface *mixer;
        unsigned char *buffer;
        unsigned int buflen;
        DECLARE_BITMAP(unitbitmap, MAX_ID_ELEMS);
        DECLARE_BITMAP(termbitmap, MAX_ID_ELEMS);
        struct usb_audio_term oterm;
        const struct usbmix_name_map *map;
        const struct usbmix_selector_map *selector_map;
};

/*E-mu 0202/0404/0204 eXtension Unit(XU) control*/
enum {
        USB_XU_CLOCK_RATE                 = 0xe301,
        USB_XU_CLOCK_SOURCE                = 0xe302,
        USB_XU_DIGITAL_IO_STATUS        = 0xe303,
        USB_XU_DEVICE_OPTIONS                = 0xe304,
        USB_XU_DIRECT_MONITORING        = 0xe305,
        USB_XU_METERING                        = 0xe306
};
enum {
        USB_XU_CLOCK_SOURCE_SELECTOR = 0x02,        /* clock source*/
        USB_XU_CLOCK_RATE_SELECTOR = 0x03,        /* clock rate */
        USB_XU_DIGITAL_FORMAT_SELECTOR = 0x01,        /* the spdif format */
        USB_XU_SOFT_LIMIT_SELECTOR = 0x03        /* soft limiter */
};

/*
 * manual mapping of mixer names
 * if the mixer topology is too complicated and the parsed names are
 * ambiguous, add the entries in usbmixer_maps.c.
 */
#include "mixer_maps.c"

static const struct usbmix_name_map *
find_map(const struct usbmix_name_map *p, int unitid, int control)
{
        if (!p)
                return NULL;

        for (; p->id; p++) {
                if (p->id == unitid &&
                    (!control || !p->control || control == p->control))
                        return p;
        }
        return NULL;
}

/* get the mapped name if the unit matches */
static int
check_mapped_name(const struct usbmix_name_map *p, char *buf, int buflen)
{
        int len;

        if (!p || !p->name)
                return 0;

        buflen--;
        len = strscpy(buf, p->name, buflen);
        return len < 0 ? buflen : len;
}

/* ignore the error value if ignore_ctl_error flag is set */
#define filter_error(cval, err) \
        ((cval)->head.mixer->ignore_ctl_error ? 0 : (err))

/* check whether the control should be ignored */
static inline int
check_ignored_ctl(const struct usbmix_name_map *p)
{
        if (!p || p->name || p->dB)
                return 0;
        return 1;
}

/* dB mapping */
static inline void check_mapped_dB(const struct usbmix_name_map *p,
                                   struct usb_mixer_elem_info *cval)
{
        if (p && p->dB) {
                cval->dBmin = p->dB->min;
                cval->dBmax = p->dB->max;
                cval->min_mute = p->dB->min_mute;
                cval->initialized = 1;
        }
}

/* get the mapped selector source name */
static int check_mapped_selector_name(struct mixer_build *state, int unitid,
                                      int index, char *buf, int buflen)
{
        const struct usbmix_selector_map *p;
        int len;

        if (!state->selector_map)
                return 0;
        for (p = state->selector_map; p->id; p++) {
                if (p->id == unitid && index < p->count) {
                        len = strscpy(buf, p->names[index], buflen);
                        return len < 0 ? buflen : len;
                }
        }
        return 0;
}

/*
 * find an audio control unit with the given unit id
 */
static void *find_audio_control_unit(struct mixer_build *state,
                                     unsigned char unit)
{
        /* we just parse the header */
        struct uac_feature_unit_descriptor *hdr = NULL;

        while ((hdr = snd_usb_find_desc(state->buffer, state->buflen, hdr,
                                        USB_DT_CS_INTERFACE)) != NULL) {
                if (hdr->bLength >= 4 &&
                    hdr->bDescriptorSubtype >= UAC_INPUT_TERMINAL &&
                    hdr->bDescriptorSubtype <= UAC3_SAMPLE_RATE_CONVERTER &&
                    hdr->bUnitID == unit)
                        return hdr;
        }

        return NULL;
}

/*
 * copy a string with the given id
 */
static int snd_usb_copy_string_desc(struct snd_usb_audio *chip,
                                    int index, char *buf, int maxlen)
{
        int len = usb_string(chip->dev, index, buf, maxlen - 1);

        if (len < 0)
                return 0;

        buf[len] = 0;
        return len;
}

/*
 * convert from the byte/word on usb descriptor to the zero-based integer
 */
static int convert_signed_value(struct usb_mixer_elem_info *cval, int val)
{
        switch (cval->val_type) {
        case USB_MIXER_BOOLEAN:
                return !!val;
        case USB_MIXER_INV_BOOLEAN:
                return !val;
        case USB_MIXER_U8:
                val &= 0xff;
                break;
        case USB_MIXER_S8:
                val &= 0xff;
                if (val >= 0x80)
                        val -= 0x100;
                break;
        case USB_MIXER_U16:
                val &= 0xffff;
                break;
        case USB_MIXER_S16:
                val &= 0xffff;
                if (val >= 0x8000)
                        val -= 0x10000;
                break;
        }
        return val;
}

/*
 * convert from the zero-based int to the byte/word for usb descriptor
 */
static int convert_bytes_value(struct usb_mixer_elem_info *cval, int val)
{
        switch (cval->val_type) {
        case USB_MIXER_BOOLEAN:
                return !!val;
        case USB_MIXER_INV_BOOLEAN:
                return !val;
        case USB_MIXER_S8:
        case USB_MIXER_U8:
                return val & 0xff;
        case USB_MIXER_S16:
        case USB_MIXER_U16:
                return val & 0xffff;
        }
        return 0; /* not reached */
}

static int get_relative_value(struct usb_mixer_elem_info *cval, int val)
{
        if (!cval->res)
                cval->res = 1;
        if (val < cval->min)
                return 0;
        else if (val >= cval->max)
                return DIV_ROUND_UP(cval->max - cval->min, cval->res);
        else
                return (val - cval->min) / cval->res;
}

static int get_abs_value(struct usb_mixer_elem_info *cval, int val)
{
        if (val < 0)
                return cval->min;
        if (!cval->res)
                cval->res = 1;
        val *= cval->res;
        val += cval->min;
        if (val > cval->max)
                return cval->max;
        return val;
}

static int uac2_ctl_value_size(int val_type)
{
        switch (val_type) {
        case USB_MIXER_S32:
        case USB_MIXER_U32:
                return 4;
        case USB_MIXER_S16:
        case USB_MIXER_U16:
                return 2;
        default:
                return 1;
        }
        return 0; /* unreachable */
}


/*
 * retrieve a mixer value
 */

static inline int mixer_ctrl_intf(struct usb_mixer_interface *mixer)
{
        return get_iface_desc(mixer->hostif)->bInterfaceNumber;
}

static int get_ctl_value_v1(struct usb_mixer_elem_info *cval, int request,
                            int validx, int *value_ret)
{
        struct snd_usb_audio *chip = cval->head.mixer->chip;
        unsigned char buf[2];
        int val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
        int timeout = 10;
        int idx = 0, err;

        err = snd_usb_lock_shutdown(chip);
        if (err < 0)
                return -EIO;

        while (timeout-- > 0) {
                idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8);
                err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), request,
                                      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                                      validx, idx, buf, val_len);
                if (err >= val_len) {
                        *value_ret = convert_signed_value(cval, snd_usb_combine_bytes(buf, val_len));
                        err = 0;
                        goto out;
                } else if (err == -ETIMEDOUT) {
                        goto out;
                }
        }
        usb_audio_dbg(chip,
                "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
                request, validx, idx, cval->val_type);
        err = -EINVAL;

 out:
        snd_usb_unlock_shutdown(chip);
        return err;
}

static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request,
                            int validx, int *value_ret)
{
        struct snd_usb_audio *chip = cval->head.mixer->chip;
        /* enough space for one range */
        unsigned char buf[sizeof(__u16) + 3 * sizeof(__u32)];
        unsigned char *val;
        int idx = 0, ret, val_size, size;
        __u8 bRequest;

        val_size = uac2_ctl_value_size(cval->val_type);

        if (request == UAC_GET_CUR) {
                bRequest = UAC2_CS_CUR;
                size = val_size;
        } else {
                bRequest = UAC2_CS_RANGE;
                size = sizeof(__u16) + 3 * val_size;
        }

        memset(buf, 0, sizeof(buf));

        if (snd_usb_lock_shutdown(chip))
                return -EIO;

        idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8);
        ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), bRequest,
                              USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                              validx, idx, buf, size);
        snd_usb_unlock_shutdown(chip);

        if (ret < 0) {
                usb_audio_dbg(chip,
                        "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
                        request, validx, idx, cval->val_type);
                return ret;
        }

        /* FIXME: how should we handle multiple triplets here? */

        switch (request) {
        case UAC_GET_CUR:
                val = buf;
                break;
        case UAC_GET_MIN:
                val = buf + sizeof(__u16);
                break;
        case UAC_GET_MAX:
                val = buf + sizeof(__u16) + val_size;
                break;
        case UAC_GET_RES:
                val = buf + sizeof(__u16) + val_size * 2;
                break;
        default:
                return -EINVAL;
        }

        *value_ret = convert_signed_value(cval,
                                          snd_usb_combine_bytes(val, val_size));

        return 0;
}

static int get_ctl_value(struct usb_mixer_elem_info *cval, int request,
                         int validx, int *value_ret)
{
        validx += cval->idx_off;

        return (cval->head.mixer->protocol == UAC_VERSION_1) ?
                get_ctl_value_v1(cval, request, validx, value_ret) :
                get_ctl_value_v2(cval, request, validx, value_ret);
}

static int get_cur_ctl_value(struct usb_mixer_elem_info *cval,
                             int validx, int *value)
{
        return get_ctl_value(cval, UAC_GET_CUR, validx, value);
}

/* channel = 0: master, 1 = first channel */
static inline int get_cur_mix_raw(struct usb_mixer_elem_info *cval,
                                  int channel, int *value)
{
        return get_ctl_value(cval, UAC_GET_CUR,
                             (cval->control << 8) | channel,
                             value);
}

int snd_usb_get_cur_mix_value(struct usb_mixer_elem_info *cval,
                             int channel, int index, int *value)
{
        int err;

        if (cval->cached & (1 << channel)) {
                *value = cval->cache_val[index];
                return 0;
        }
        err = get_cur_mix_raw(cval, channel, value);
        if (err < 0) {
                if (!cval->head.mixer->ignore_ctl_error)
                        usb_audio_dbg(cval->head.mixer->chip,
                                "cannot get current value for control %d ch %d: err = %d\n",
                                      cval->control, channel, err);
                return err;
        }
        cval->cached |= 1 << channel;
        cval->cache_val[index] = *value;
        return 0;
}

/*
 * set a mixer value
 */

int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
                                int request, int validx, int value_set)
{
        struct snd_usb_audio *chip = cval->head.mixer->chip;
        unsigned char buf[4];
        int idx = 0, val_len, err, timeout = 10;

        validx += cval->idx_off;


        if (cval->head.mixer->protocol == UAC_VERSION_1) {
                val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
        } else { /* UAC_VERSION_2/3 */
                val_len = uac2_ctl_value_size(cval->val_type);

                /* FIXME */
                if (request != UAC_SET_CUR) {
                        usb_audio_dbg(chip, "RANGE setting not yet supported\n");
                        return -EINVAL;
                }

                request = UAC2_CS_CUR;
        }

        value_set = convert_bytes_value(cval, value_set);
        buf[0] = value_set & 0xff;
        buf[1] = (value_set >> 8) & 0xff;
        buf[2] = (value_set >> 16) & 0xff;
        buf[3] = (value_set >> 24) & 0xff;

        err = snd_usb_lock_shutdown(chip);
        if (err < 0)
                return -EIO;

        while (timeout-- > 0) {
                idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8);
                err = snd_usb_ctl_msg(chip->dev,
                                      usb_sndctrlpipe(chip->dev, 0), request,
                                      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
                                      validx, idx, buf, val_len);
                if (err >= 0) {
                        err = 0;
                        goto out;
                } else if (err == -ETIMEDOUT) {
                        goto out;
                }
        }
        usb_audio_dbg(chip, "cannot set ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d, data = %#x/%#x\n",
                      request, validx, idx, cval->val_type, buf[0], buf[1]);
        err = -EINVAL;

 out:
        snd_usb_unlock_shutdown(chip);
        return err;
}

static int set_cur_ctl_value(struct usb_mixer_elem_info *cval,
                             int validx, int value)
{
        return snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, validx, value);
}

int snd_usb_set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel,
                             int index, int value)
{
        int err;
        unsigned int read_only = (channel == 0) ?
                cval->master_readonly :
                cval->ch_readonly & (1 << (channel - 1));

        if (read_only) {
                usb_audio_dbg(cval->head.mixer->chip,
                              "%s(): channel %d of control %d is read_only\n",
                            __func__, channel, cval->control);
                return 0;
        }

        err = snd_usb_mixer_set_ctl_value(cval,
                                          UAC_SET_CUR, (cval->control << 8) | channel,
                                          value);
        if (err < 0)
                return err;
        cval->cached |= 1 << channel;
        cval->cache_val[index] = value;
        return 0;
}

/*
 * TLV callback for mixer volume controls
 */
int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag,
                         unsigned int size, unsigned int __user *_tlv)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        DECLARE_TLV_DB_MINMAX(scale, 0, 0);

        if (size < sizeof(scale))
                return -ENOMEM;
        if (cval->min_mute)
                scale[0] = SNDRV_CTL_TLVT_DB_MINMAX_MUTE;
        scale[2] = cval->dBmin;
        scale[3] = cval->dBmax;
        if (copy_to_user(_tlv, scale, sizeof(scale)))
                return -EFAULT;
        return 0;
}

/*
 * parser routines begin here...
 */

static int parse_audio_unit(struct mixer_build *state, int unitid);


/*
 * check if the input/output channel routing is enabled on the given bitmap.
 * used for mixer unit parser
 */
static int check_matrix_bitmap(unsigned char *bmap,
                               int ich, int och, int num_outs)
{
        int idx = ich * num_outs + och;
        return bmap[idx >> 3] & (0x80 >> (idx & 7));
}

/*
 * add an alsa control element
 * search and increment the index until an empty slot is found.
 *
 * if failed, give up and free the control instance.
 */

int snd_usb_mixer_add_list(struct usb_mixer_elem_list *list,
                           struct snd_kcontrol *kctl,
                           bool is_std_info)
{
        struct usb_mixer_interface *mixer = list->mixer;
        int err;

        while (snd_ctl_find_id(mixer->chip->card, &kctl->id))
                kctl->id.index++;
        err = snd_ctl_add(mixer->chip->card, kctl);
        if (err < 0) {
                usb_audio_dbg(mixer->chip, "cannot add control (err = %d)\n",
                              err);
                return err;
        }
        list->kctl = kctl;
        list->is_std_info = is_std_info;
        list->next_id_elem = mixer->id_elems[list->id];
        mixer->id_elems[list->id] = list;
        return 0;
}

/*
 * get a terminal name string
 */

static struct iterm_name_combo {
        int type;
        char *name;
} iterm_names[] = {
        { 0x0300, "Output" },
        { 0x0301, "Speaker" },
        { 0x0302, "Headphone" },
        { 0x0303, "HMD Audio" },
        { 0x0304, "Desktop Speaker" },
        { 0x0305, "Room Speaker" },
        { 0x0306, "Com Speaker" },
        { 0x0307, "LFE" },
        { 0x0600, "External In" },
        { 0x0601, "Analog In" },
        { 0x0602, "Digital In" },
        { 0x0603, "Line" },
        { 0x0604, "Legacy In" },
        { 0x0605, "IEC958 In" },
        { 0x0606, "1394 DA Stream" },
        { 0x0607, "1394 DV Stream" },
        { 0x0700, "Embedded" },
        { 0x0701, "Noise Source" },
        { 0x0702, "Equalization Noise" },
        { 0x0703, "CD" },
        { 0x0704, "DAT" },
        { 0x0705, "DCC" },
        { 0x0706, "MiniDisk" },
        { 0x0707, "Analog Tape" },
        { 0x0708, "Phonograph" },
        { 0x0709, "VCR Audio" },
        { 0x070a, "Video Disk Audio" },
        { 0x070b, "DVD Audio" },
        { 0x070c, "TV Tuner Audio" },
        { 0x070d, "Satellite Rec Audio" },
        { 0x070e, "Cable Tuner Audio" },
        { 0x070f, "DSS Audio" },
        { 0x0710, "Radio Receiver" },
        { 0x0711, "Radio Transmitter" },
        { 0x0712, "Multi-Track Recorder" },
        { 0x0713, "Synthesizer" },
        { 0 },
};

static int get_term_name(struct snd_usb_audio *chip, struct usb_audio_term *iterm,
                         unsigned char *name, int maxlen, int term_only)
{
        struct iterm_name_combo *names;
        int len;

        if (iterm->name) {
                len = snd_usb_copy_string_desc(chip, iterm->name,
                                                name, maxlen);
                if (len)
                        return len;
        }

        /* virtual type - not a real terminal */
        if (iterm->type >> 16) {
                if (term_only)
                        return 0;
                switch (iterm->type >> 16) {
                case UAC3_SELECTOR_UNIT:
                        strcpy(name, "Selector");
                        return 8;
                case UAC3_PROCESSING_UNIT:
                        strcpy(name, "Process Unit");
                        return 12;
                case UAC3_EXTENSION_UNIT:
                        strcpy(name, "Ext Unit");
                        return 8;
                case UAC3_MIXER_UNIT:
                        strcpy(name, "Mixer");
                        return 5;
                default:
                        return sprintf(name, "Unit %d", iterm->id);
                }
        }

        switch (iterm->type & 0xff00) {
        case 0x0100:
                strcpy(name, "PCM");
                return 3;
        case 0x0200:
                strcpy(name, "Mic");
                return 3;
        case 0x0400:
                strcpy(name, "Headset");
                return 7;
        case 0x0500:
                strcpy(name, "Phone");
                return 5;
        }

        for (names = iterm_names; names->type; names++) {
                if (names->type == iterm->type) {
                        strcpy(name, names->name);
                        return strlen(names->name);
                }
        }

        return 0;
}

/*
 * Get logical cluster information for UAC3 devices.
 */
static int get_cluster_channels_v3(struct mixer_build *state, unsigned int cluster_id)
{
        struct uac3_cluster_header_descriptor c_header;
        int err;

        err = snd_usb_ctl_msg(state->chip->dev,
                        usb_rcvctrlpipe(state->chip->dev, 0),
                        UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
                        USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                        cluster_id,
                        snd_usb_ctrl_intf(state->chip),
                        &c_header, sizeof(c_header));
        if (err < 0)
                goto error;
        if (err != sizeof(c_header)) {
                err = -EIO;
                goto error;
        }

        return c_header.bNrChannels;

error:
        usb_audio_err(state->chip, "cannot request logical cluster ID: %d (err: %d)\n", cluster_id, err);
        return err;
}

/*
 * Get number of channels for a Mixer Unit.
 */
static int uac_mixer_unit_get_channels(struct mixer_build *state,
                                       struct uac_mixer_unit_descriptor *desc)
{
        int mu_channels;

        switch (state->mixer->protocol) {
        case UAC_VERSION_1:
        case UAC_VERSION_2:
        default:
                if (desc->bLength < sizeof(*desc) + desc->bNrInPins + 1)
                        return 0; /* no bmControls -> skip */
                mu_channels = uac_mixer_unit_bNrChannels(desc);
                break;
        case UAC_VERSION_3:
                mu_channels = get_cluster_channels_v3(state,
                                uac3_mixer_unit_wClusterDescrID(desc));
                break;
        }

        return mu_channels;
}

/*
 * Parse Input Terminal Unit
 */
static int __check_input_term(struct mixer_build *state, int id,
                              struct usb_audio_term *term);

static int parse_term_uac1_iterm_unit(struct mixer_build *state,
                                      struct usb_audio_term *term,
                                      void *p1, int id)
{
        struct uac_input_terminal_descriptor *d = p1;

        term->type = le16_to_cpu(d->wTerminalType);
        term->channels = d->bNrChannels;
        term->chconfig = le16_to_cpu(d->wChannelConfig);
        term->name = d->iTerminal;
        return 0;
}

static int parse_term_uac2_iterm_unit(struct mixer_build *state,
                                      struct usb_audio_term *term,
                                      void *p1, int id)
{
        struct uac2_input_terminal_descriptor *d = p1;
        int err;

        /* call recursively to verify the referenced clock entity */
        err = __check_input_term(state, d->bCSourceID, term);
        if (err < 0)
                return err;

        /* save input term properties after recursion,
         * to ensure they are not overriden by the recursion calls
         */
        term->id = id;
        term->type = le16_to_cpu(d->wTerminalType);
        term->channels = d->bNrChannels;
        term->chconfig = le32_to_cpu(d->bmChannelConfig);
        term->name = d->iTerminal;
        return 0;
}

static int parse_term_uac3_iterm_unit(struct mixer_build *state,
                                      struct usb_audio_term *term,
                                      void *p1, int id)
{
        struct uac3_input_terminal_descriptor *d = p1;
        int err;

        /* call recursively to verify the referenced clock entity */
        err = __check_input_term(state, d->bCSourceID, term);
        if (err < 0)
                return err;

        /* save input term properties after recursion,
         * to ensure they are not overriden by the recursion calls
         */
        term->id = id;
        term->type = le16_to_cpu(d->wTerminalType);

        err = get_cluster_channels_v3(state, le16_to_cpu(d->wClusterDescrID));
        if (err < 0)
                return err;
        term->channels = err;

        /* REVISIT: UAC3 IT doesn't have channels cfg */
        term->chconfig = 0;

        term->name = le16_to_cpu(d->wTerminalDescrStr);
        return 0;
}

static int parse_term_mixer_unit(struct mixer_build *state,
                                 struct usb_audio_term *term,
                                 void *p1, int id)
{
        struct uac_mixer_unit_descriptor *d = p1;
        int protocol = state->mixer->protocol;
        int err;

        err = uac_mixer_unit_get_channels(state, d);
        if (err <= 0)
                return err;

        term->type = UAC3_MIXER_UNIT << 16; /* virtual type */
        term->channels = err;
        if (protocol != UAC_VERSION_3) {
                term->chconfig = uac_mixer_unit_wChannelConfig(d, protocol);
                term->name = uac_mixer_unit_iMixer(d);
        }
        return 0;
}

static int parse_term_selector_unit(struct mixer_build *state,
                                    struct usb_audio_term *term,
                                    void *p1, int id)
{
        struct uac_selector_unit_descriptor *d = p1;
        int err;

        /* call recursively to retrieve the channel info */
        err = __check_input_term(state, d->baSourceID[0], term);
        if (err < 0)
                return err;
        term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */
        term->id = id;
        if (state->mixer->protocol != UAC_VERSION_3)
                term->name = uac_selector_unit_iSelector(d);
        return 0;
}

static int parse_term_proc_unit(struct mixer_build *state,
                                struct usb_audio_term *term,
                                void *p1, int id, int vtype)
{
        struct uac_processing_unit_descriptor *d = p1;
        int protocol = state->mixer->protocol;
        int err;

        if (d->bNrInPins) {
                /* call recursively to retrieve the channel info */
                err = __check_input_term(state, d->baSourceID[0], term);
                if (err < 0)
                        return err;
        }

        term->type = vtype << 16; /* virtual type */
        term->id = id;

        if (protocol == UAC_VERSION_3)
                return 0;

        if (!term->channels) {
                term->channels = uac_processing_unit_bNrChannels(d);
                term->chconfig = uac_processing_unit_wChannelConfig(d, protocol);
        }
        term->name = uac_processing_unit_iProcessing(d, protocol);
        return 0;
}

static int parse_term_effect_unit(struct mixer_build *state,
                                  struct usb_audio_term *term,
                                  void *p1, int id)
{
        struct uac2_effect_unit_descriptor *d = p1;
        int err;

        err = __check_input_term(state, d->bSourceID, term);
        if (err < 0)
                return err;
        term->type = UAC3_EFFECT_UNIT << 16; /* virtual type */
        term->id = id;
        return 0;
}

static int parse_term_uac2_clock_source(struct mixer_build *state,
                                        struct usb_audio_term *term,
                                        void *p1, int id)
{
        struct uac_clock_source_descriptor *d = p1;

        term->type = UAC3_CLOCK_SOURCE << 16; /* virtual type */
        term->id = id;
        term->name = d->iClockSource;
        return 0;
}

static int parse_term_uac3_clock_source(struct mixer_build *state,
                                        struct usb_audio_term *term,
                                        void *p1, int id)
{
        struct uac3_clock_source_descriptor *d = p1;

        term->type = UAC3_CLOCK_SOURCE << 16; /* virtual type */
        term->id = id;
        term->name = le16_to_cpu(d->wClockSourceStr);
        return 0;
}

#define PTYPE(a, b)        ((a) << 8 | (b))

/*
 * parse the source unit recursively until it reaches to a terminal
 * or a branched unit.
 */
static int __check_input_term(struct mixer_build *state, int id,
                              struct usb_audio_term *term)
{
        int protocol = state->mixer->protocol;
        void *p1;
        unsigned char *hdr;

        for (;;) {
                /* a loop in the terminal chain? */
                if (test_and_set_bit(id, state->termbitmap))
                        return -EINVAL;

                p1 = find_audio_control_unit(state, id);
                if (!p1)
                        break;
                if (!snd_usb_validate_audio_desc(p1, protocol))
                        break; /* bad descriptor */

                hdr = p1;
                term->id = id;

                switch (PTYPE(protocol, hdr[2])) {
                case PTYPE(UAC_VERSION_1, UAC_FEATURE_UNIT):
                case PTYPE(UAC_VERSION_2, UAC_FEATURE_UNIT):
                case PTYPE(UAC_VERSION_3, UAC3_FEATURE_UNIT): {
                        /* the header is the same for all versions */
                        struct uac_feature_unit_descriptor *d = p1;

                        id = d->bSourceID;
                        break; /* continue to parse */
                }
                case PTYPE(UAC_VERSION_1, UAC_INPUT_TERMINAL):
                        return parse_term_uac1_iterm_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_2, UAC_INPUT_TERMINAL):
                        return parse_term_uac2_iterm_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_3, UAC_INPUT_TERMINAL):
                        return parse_term_uac3_iterm_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_1, UAC_MIXER_UNIT):
                case PTYPE(UAC_VERSION_2, UAC_MIXER_UNIT):
                case PTYPE(UAC_VERSION_3, UAC3_MIXER_UNIT):
                        return parse_term_mixer_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_1, UAC_SELECTOR_UNIT):
                case PTYPE(UAC_VERSION_2, UAC_SELECTOR_UNIT):
                case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SELECTOR):
                case PTYPE(UAC_VERSION_3, UAC3_SELECTOR_UNIT):
                case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SELECTOR):
                        return parse_term_selector_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_1, UAC1_PROCESSING_UNIT):
                case PTYPE(UAC_VERSION_2, UAC2_PROCESSING_UNIT_V2):
                case PTYPE(UAC_VERSION_3, UAC3_PROCESSING_UNIT):
                        return parse_term_proc_unit(state, term, p1, id,
                                                    UAC3_PROCESSING_UNIT);
                case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT):
                case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT):
                        return parse_term_effect_unit(state, term, p1, id);
                case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT):
                case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2):
                case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT):
                        return parse_term_proc_unit(state, term, p1, id,
                                                    UAC3_EXTENSION_UNIT);
                case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SOURCE):
                        return parse_term_uac2_clock_source(state, term, p1, id);
                case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SOURCE):
                        return parse_term_uac3_clock_source(state, term, p1, id);
                default:
                        return -ENODEV;
                }
        }
        return -ENODEV;
}


static int check_input_term(struct mixer_build *state, int id,
                            struct usb_audio_term *term)
{
        memset(term, 0, sizeof(*term));
        memset(state->termbitmap, 0, sizeof(state->termbitmap));
        return __check_input_term(state, id, term);
}

/*
 * Feature Unit
 */

/* feature unit control information */
struct usb_feature_control_info {
        int control;
        const char *name;
        int type;        /* data type for uac1 */
        int type_uac2;        /* data type for uac2 if different from uac1, else -1 */
};

static const struct usb_feature_control_info audio_feature_info[] = {
        { UAC_FU_MUTE,                        "Mute",                        USB_MIXER_INV_BOOLEAN, -1 },
        { UAC_FU_VOLUME,                "Volume",                USB_MIXER_S16, -1 },
        { UAC_FU_BASS,                        "Tone Control - Bass",        USB_MIXER_S8, -1 },
        { UAC_FU_MID,                        "Tone Control - Mid",        USB_MIXER_S8, -1 },
        { UAC_FU_TREBLE,                "Tone Control - Treble", USB_MIXER_S8, -1 },
        { UAC_FU_GRAPHIC_EQUALIZER,        "Graphic Equalizer",        USB_MIXER_S8, -1 }, /* FIXME: not implemented yet */
        { UAC_FU_AUTOMATIC_GAIN,        "Auto Gain Control",        USB_MIXER_BOOLEAN, -1 },
        { UAC_FU_DELAY,                        "Delay Control",        USB_MIXER_U16, USB_MIXER_U32 },
        { UAC_FU_BASS_BOOST,                "Bass Boost",                USB_MIXER_BOOLEAN, -1 },
        { UAC_FU_LOUDNESS,                "Loudness",                USB_MIXER_BOOLEAN, -1 },
        /* UAC2 specific */
        { UAC2_FU_INPUT_GAIN,                "Input Gain Control",        USB_MIXER_S16, -1 },
        { UAC2_FU_INPUT_GAIN_PAD,        "Input Gain Pad Control", USB_MIXER_S16, -1 },
        { UAC2_FU_PHASE_INVERTER,         "Phase Inverter Control", USB_MIXER_BOOLEAN, -1 },
};

static void usb_mixer_elem_info_free(struct usb_mixer_elem_info *cval)
{
        kfree(cval);
}

/* private_free callback */
void snd_usb_mixer_elem_free(struct snd_kcontrol *kctl)
{
        usb_mixer_elem_info_free(kctl->private_data);
        kctl->private_data = NULL;
}

/*
 * interface to ALSA control for feature/mixer units
 */

/* volume control quirks */
static void volume_control_quirks(struct usb_mixer_elem_info *cval,
                                  struct snd_kcontrol *kctl)
{
        struct snd_usb_audio *chip = cval->head.mixer->chip;
        switch (chip->usb_id) {
        case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */
        case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */
                if (strcmp(kctl->id.name, "Effect Duration") == 0) {
                        cval->min = 0x0000;
                        cval->max = 0xffff;
                        cval->res = 0x00e6;
                        break;
                }
                if (strcmp(kctl->id.name, "Effect Volume") == 0 ||
                    strcmp(kctl->id.name, "Effect Feedback Volume") == 0) {
                        cval->min = 0x00;
                        cval->max = 0xff;
                        break;
                }
                if (strstr(kctl->id.name, "Effect Return") != NULL) {
                        cval->min = 0xb706;
                        cval->max = 0xff7b;
                        cval->res = 0x0073;
                        break;
                }
                if ((strstr(kctl->id.name, "Playback Volume") != NULL) ||
                        (strstr(kctl->id.name, "Effect Send") != NULL)) {
                        cval->min = 0xb5fb; /* -73 dB = 0xb6ff */
                        cval->max = 0xfcfe;
                        cval->res = 0x0073;
                }
                break;

        case USB_ID(0x0763, 0x2081): /* M-Audio Fast Track Ultra 8R */
        case USB_ID(0x0763, 0x2080): /* M-Audio Fast Track Ultra */
                if (strcmp(kctl->id.name, "Effect Duration") == 0) {
                        usb_audio_info(chip,
                                       "set quirk for FTU Effect Duration\n");
                        cval->min = 0x0000;
                        cval->max = 0x7f00;
                        cval->res = 0x0100;
                        break;
                }
                if (strcmp(kctl->id.name, "Effect Volume") == 0 ||
                    strcmp(kctl->id.name, "Effect Feedback Volume") == 0) {
                        usb_audio_info(chip,
                                       "set quirks for FTU Effect Feedback/Volume\n");
                        cval->min = 0x00;
                        cval->max = 0x7f;
                        break;
                }
                break;

        case USB_ID(0x0d8c, 0x0103):
                if (!strcmp(kctl->id.name, "PCM Playback Volume")) {
                        usb_audio_info(chip,
                                 "set volume quirk for CM102-A+/102S+\n");
                        cval->min = -256;
                }
                break;

        case USB_ID(0x0471, 0x0101):
        case USB_ID(0x0471, 0x0104):
        case USB_ID(0x0471, 0x0105):
        case USB_ID(0x0672, 0x1041):
        /* quirk for UDA1321/N101.
         * note that detection between firmware 2.1.1.7 (N101)
         * and later 2.1.1.21 is not very clear from datasheets.
         * I hope that the min value is -15360 for newer firmware --jk
         */
                if (!strcmp(kctl->id.name, "PCM Playback Volume") &&
                    cval->min == -15616) {
                        usb_audio_info(chip,
                                 "set volume quirk for UDA1321/N101 chip\n");
                        cval->max = -256;
                }
                break;

        case USB_ID(0x046d, 0x09a4):
                if (!strcmp(kctl->id.name, "Mic Capture Volume")) {
                        usb_audio_info(chip,
                                "set volume quirk for QuickCam E3500\n");
                        cval->min = 6080;
                        cval->max = 8768;
                        cval->res = 192;
                }
                break;

        case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */
        case USB_ID(0x046d, 0x0808):
        case USB_ID(0x046d, 0x0809):
        case USB_ID(0x046d, 0x0819): /* Logitech Webcam C210 */
        case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */
        case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */
        case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */
        case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */
        case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */
        case USB_ID(0x046d, 0x0991):
        case USB_ID(0x046d, 0x09a2): /* QuickCam Communicate Deluxe/S7500 */
        /* Most audio usb devices lie about volume resolution.
         * Most Logitech webcams have res = 384.
         * Probably there is some logitech magic behind this number --fishor
         */
                if (!strcmp(kctl->id.name, "Mic Capture Volume")) {
                        usb_audio_info(chip,
                                "set resolution quirk: cval->res = 384\n");
                        cval->res = 384;
                }
                break;
        case USB_ID(0x0495, 0x3042): /* ESS Technology Asus USB DAC */
                if ((strstr(kctl->id.name, "Playback Volume") != NULL) ||
                        strstr(kctl->id.name, "Capture Volume") != NULL) {
                        cval->min >>= 8;
                        cval->max = 0;
                        cval->res = 1;
                }
                break;
        case USB_ID(0x1224, 0x2a25): /* Jieli Technology USB PHY 2.0 */
                if (!strcmp(kctl->id.name, "Mic Capture Volume")) {
                        usb_audio_info(chip,
                                "set resolution quirk: cval->res = 16\n");
                        cval->res = 16;
                }
                break;
        case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */
                if (!strcmp(kctl->id.name, "Mic Capture Volume")) {
                        usb_audio_info(chip,
                                "set resolution quirk: cval->res = 16\n");
                        cval->res = 16;
                }
                break;
        }
}

/* forcibly initialize the current mixer value; if GET_CUR fails, set to
 * the minimum as default
 */
static void init_cur_mix_raw(struct usb_mixer_elem_info *cval, int ch, int idx)
{
        int val, err;

        err = snd_usb_get_cur_mix_value(cval, ch, idx, &val);
        if (!err)
                return;
        if (!cval->head.mixer->ignore_ctl_error)
                usb_audio_warn(cval->head.mixer->chip,
                               "%d:%d: failed to get current value for ch %d (%d)\n",
                               cval->head.id, mixer_ctrl_intf(cval->head.mixer),
                               ch, err);
        snd_usb_set_cur_mix_value(cval, ch, idx, cval->min);
}

/*
 * retrieve the minimum and maximum values for the specified control
 */
static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
                                   int default_min, struct snd_kcontrol *kctl)
{
        int i, idx;

        /* for failsafe */
        cval->min = default_min;
        cval->max = cval->min + 1;
        cval->res = 1;
        cval->dBmin = cval->dBmax = 0;

        if (cval->val_type == USB_MIXER_BOOLEAN ||
            cval->val_type == USB_MIXER_INV_BOOLEAN) {
                cval->initialized = 1;
        } else {
                int minchn = 0;
                if (cval->cmask) {
                        for (i = 0; i < MAX_CHANNELS; i++)
                                if (cval->cmask & (1 << i)) {
                                        minchn = i + 1;
                                        break;
                                }
                }
                if (get_ctl_value(cval, UAC_GET_MAX, (cval->control << 8) | minchn, &cval->max) < 0 ||
                    get_ctl_value(cval, UAC_GET_MIN, (cval->control << 8) | minchn, &cval->min) < 0) {
                        usb_audio_err(cval->head.mixer->chip,
                                      "%d:%d: cannot get min/max values for control %d (id %d)\n",
                                   cval->head.id, mixer_ctrl_intf(cval->head.mixer),
                                                               cval->control, cval->head.id);
                        return -EINVAL;
                }
                if (get_ctl_value(cval, UAC_GET_RES,
                                  (cval->control << 8) | minchn,
                                  &cval->res) < 0) {
                        cval->res = 1;
                } else if (cval->head.mixer->protocol == UAC_VERSION_1) {
                        int last_valid_res = cval->res;

                        while (cval->res > 1) {
                                if (snd_usb_mixer_set_ctl_value(cval, UAC_SET_RES,
                                                                (cval->control << 8) | minchn,
                                                                cval->res / 2) < 0)
                                        break;
                                cval->res /= 2;
                        }
                        if (get_ctl_value(cval, UAC_GET_RES,
                                          (cval->control << 8) | minchn, &cval->res) < 0)
                                cval->res = last_valid_res;
                }
                if (cval->res == 0)
                        cval->res = 1;

                /* Additional checks for the proper resolution
                 *
                 * Some devices report smaller resolutions than actually
                 * reacting.  They don't return errors but simply clip
                 * to the lower aligned value.
                 */
                if (cval->min + cval->res < cval->max) {
                        int last_valid_res = cval->res;
                        int saved, test, check;
                        if (get_cur_mix_raw(cval, minchn, &saved) < 0)
                                goto no_res_check;
                        for (;;) {
                                test = saved;
                                if (test < cval->max)
                                        test += cval->res;
                                else
                                        test -= cval->res;
                                if (test < cval->min || test > cval->max ||
                                    snd_usb_set_cur_mix_value(cval, minchn, 0, test) ||
                                    get_cur_mix_raw(cval, minchn, &check)) {
                                        cval->res = last_valid_res;
                                        break;
                                }
                                if (test == check)
                                        break;
                                cval->res *= 2;
                        }
                        snd_usb_set_cur_mix_value(cval, minchn, 0, saved);
                }

no_res_check:
                cval->initialized = 1;
        }

        if (kctl)
                volume_control_quirks(cval, kctl);

        /* USB descriptions contain the dB scale in 1/256 dB unit
         * while ALSA TLV contains in 1/100 dB unit
         */
        cval->dBmin = (convert_signed_value(cval, cval->min) * 100) / 256;
        cval->dBmax = (convert_signed_value(cval, cval->max) * 100) / 256;
        if (cval->dBmin > cval->dBmax) {
                /* something is wrong; assume it's either from/to 0dB */
                if (cval->dBmin < 0)
                        cval->dBmax = 0;
                else if (cval->dBmin > 0)
                        cval->dBmin = 0;
                if (cval->dBmin > cval->dBmax) {
                        /* totally crap, return an error */
                        return -EINVAL;
                }
        } else {
                /* if the max volume is too low, it's likely a bogus range;
                 * here we use -96dB as the threshold
                 */
                if (cval->dBmax <= -9600) {
                        usb_audio_info(cval->head.mixer->chip,
                                       "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n",
                                       cval->head.id, mixer_ctrl_intf(cval->head.mixer),
                                       cval->dBmin, cval->dBmax);
                        cval->dBmin = cval->dBmax = 0;
                }
        }

        /* initialize all elements */
        if (!cval->cmask) {
                init_cur_mix_raw(cval, 0, 0);
        } else {
                idx = 0;
                for (i = 0; i < MAX_CHANNELS; i++) {
                        if (cval->cmask & (1 << i)) {
                                init_cur_mix_raw(cval, i + 1, idx);
                                idx++;
                        }
                }
        }

        return 0;
}

#define get_min_max(cval, def)        get_min_max_with_quirks(cval, def, NULL)

/* get a feature/mixer unit info */
static int mixer_ctl_feature_info(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_info *uinfo)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;

        if (cval->val_type == USB_MIXER_BOOLEAN ||
            cval->val_type == USB_MIXER_INV_BOOLEAN)
                uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
        else
                uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
        uinfo->count = cval->channels;
        if (cval->val_type == USB_MIXER_BOOLEAN ||
            cval->val_type == USB_MIXER_INV_BOOLEAN) {
                uinfo->value.integer.min = 0;
                uinfo->value.integer.max = 1;
        } else {
                if (!cval->initialized) {
                        get_min_max_with_quirks(cval, 0, kcontrol);
                        if (cval->initialized && cval->dBmin >= cval->dBmax) {
                                kcontrol->vd[0].access &= 
                                        ~(SNDRV_CTL_ELEM_ACCESS_TLV_READ |
                                          SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK);
                                snd_ctl_notify(cval->head.mixer->chip->card,
                                               SNDRV_CTL_EVENT_MASK_INFO,
                                               &kcontrol->id);
                        }
                }
                uinfo->value.integer.min = 0;
                uinfo->value.integer.max =
                        DIV_ROUND_UP(cval->max - cval->min, cval->res);
        }
        return 0;
}

/* get the current value from feature/mixer unit */
static int mixer_ctl_feature_get(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int c, cnt, val, err;

        ucontrol->value.integer.value[0] = cval->min;
        if (cval->cmask) {
                cnt = 0;
                for (c = 0; c < MAX_CHANNELS; c++) {
                        if (!(cval->cmask & (1 << c)))
                                continue;
                        err = snd_usb_get_cur_mix_value(cval, c + 1, cnt, &val);
                        if (err < 0)
                                return filter_error(cval, err);
                        val = get_relative_value(cval, val);
                        ucontrol->value.integer.value[cnt] = val;
                        cnt++;
                }
                return 0;
        } else {
                /* master channel */
                err = snd_usb_get_cur_mix_value(cval, 0, 0, &val);
                if (err < 0)
                        return filter_error(cval, err);
                val = get_relative_value(cval, val);
                ucontrol->value.integer.value[0] = val;
        }
        return 0;
}

/* put the current value to feature/mixer unit */
static int mixer_ctl_feature_put(struct snd_kcontrol *kcontrol,
                                 struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int c, cnt, val, oval, err;
        int changed = 0;

        if (cval->cmask) {
                cnt = 0;
                for (c = 0; c < MAX_CHANNELS; c++) {
                        if (!(cval->cmask & (1 << c)))
                                continue;
                        err = snd_usb_get_cur_mix_value(cval, c + 1, cnt, &oval);
                        if (err < 0)
                                return filter_error(cval, err);
                        val = ucontrol->value.integer.value[cnt];
                        val = get_abs_value(cval, val);
                        if (oval != val) {
                                snd_usb_set_cur_mix_value(cval, c + 1, cnt, val);
                                changed = 1;
                        }
                        cnt++;
                }
        } else {
                /* master channel */
                err = snd_usb_get_cur_mix_value(cval, 0, 0, &oval);
                if (err < 0)
                        return filter_error(cval, err);
                val = ucontrol->value.integer.value[0];
                val = get_abs_value(cval, val);
                if (val != oval) {
                        snd_usb_set_cur_mix_value(cval, 0, 0, val);
                        changed = 1;
                }
        }
        return changed;
}

/* get the boolean value from the master channel of a UAC control */
static int mixer_ctl_master_bool_get(struct snd_kcontrol *kcontrol,
                                     struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int val, err;

        err = snd_usb_get_cur_mix_value(cval, 0, 0, &val);
        if (err < 0)
                return filter_error(cval, err);
        val = (val != 0);
        ucontrol->value.integer.value[0] = val;
        return 0;
}

static int get_connector_value(struct usb_mixer_elem_info *cval,
                               char *name, int *val)
{
        struct snd_usb_audio *chip = cval->head.mixer->chip;
        int idx = 0, validx, ret;

        validx = cval->control << 8 | 0;

        ret = snd_usb_lock_shutdown(chip) ? -EIO : 0;
        if (ret)
                goto error;

        idx = mixer_ctrl_intf(cval->head.mixer) | (cval->head.id << 8);
        if (cval->head.mixer->protocol == UAC_VERSION_2) {
                struct uac2_connectors_ctl_blk uac2_conn;

                ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR,
                                      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                                      validx, idx, &uac2_conn, sizeof(uac2_conn));
                if (val)
                        *val = !!uac2_conn.bNrChannels;
        } else { /* UAC_VERSION_3 */
                struct uac3_insertion_ctl_blk uac3_conn;

                ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR,
                                      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
                                      validx, idx, &uac3_conn, sizeof(uac3_conn));
                if (val)
                        *val = !!uac3_conn.bmConInserted;
        }

        snd_usb_unlock_shutdown(chip);

        if (ret < 0) {
                if (name && strstr(name, "Speaker")) {
                        if (val)
                                *val = 1;
                        return 0;
                }
error:
                usb_audio_err(chip,
                        "cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
                        UAC_GET_CUR, validx, idx, cval->val_type);

                if (val)
                        *val = 0;

                return filter_error(cval, ret);
        }

        return ret;
}

/* get the connectors status and report it as boolean type */
static int mixer_ctl_connector_get(struct snd_kcontrol *kcontrol,
                                   struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int ret, val;

        ret = get_connector_value(cval, kcontrol->id.name, &val);

        if (ret < 0)
                return ret;

        ucontrol->value.integer.value[0] = val;
        return 0;
}

static const struct snd_kcontrol_new usb_feature_unit_ctl = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .name = "", /* will be filled later manually */
        .info = mixer_ctl_feature_info,
        .get = mixer_ctl_feature_get,
        .put = mixer_ctl_feature_put,
};

/* the read-only variant */
static const struct snd_kcontrol_new usb_feature_unit_ctl_ro = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .name = "", /* will be filled later manually */
        .info = mixer_ctl_feature_info,
        .get = mixer_ctl_feature_get,
        .put = NULL,
};

/*
 * A control which shows the boolean value from reading a UAC control on
 * the master channel.
 */
static const struct snd_kcontrol_new usb_bool_master_control_ctl_ro = {
        .iface = SNDRV_CTL_ELEM_IFACE_CARD,
        .name = "", /* will be filled later manually */
        .access = SNDRV_CTL_ELEM_ACCESS_READ,
        .info = snd_ctl_boolean_mono_info,
        .get = mixer_ctl_master_bool_get,
        .put = NULL,
};

static const struct snd_kcontrol_new usb_connector_ctl_ro = {
        .iface = SNDRV_CTL_ELEM_IFACE_CARD,
        .name = "", /* will be filled later manually */
        .access = SNDRV_CTL_ELEM_ACCESS_READ,
        .info = snd_ctl_boolean_mono_info,
        .get = mixer_ctl_connector_get,
        .put = NULL,
};

/*
 * This symbol is exported in order to allow the mixer quirks to
 * hook up to the standard feature unit control mechanism
 */
const struct snd_kcontrol_new *snd_usb_feature_unit_ctl = &usb_feature_unit_ctl;

/*
 * build a feature control
 */
static size_t append_ctl_name(struct snd_kcontrol *kctl, const char *str)
{
        return strlcat(kctl->id.name, str, sizeof(kctl->id.name));
}

/*
 * A lot of headsets/headphones have a "Speaker" mixer. Make sure we
 * rename it to "Headphone". We determine if something is a headphone
 * similar to how udev determines form factor.
 */
static void check_no_speaker_on_headset(struct snd_kcontrol *kctl,
                                        struct snd_card *card)
{
        static const char * const names_to_check[] = {
                "Headset", "headset", "Headphone", "headphone", NULL};
        const char * const *s;
        bool found = false;

        if (strcmp("Speaker", kctl->id.name))
                return;

        for (s = names_to_check; *s; s++)
                if (strstr(card->shortname, *s)) {
                        found = true;
                        break;
                }

        if (!found)
                return;

        snd_ctl_rename(card, kctl, "Headphone");
}

static const struct usb_feature_control_info *get_feature_control_info(int control)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(audio_feature_info); ++i) {
                if (audio_feature_info[i].control == control)
                        return &audio_feature_info[i];
        }
        return NULL;
}

static void __build_feature_ctl(struct usb_mixer_interface *mixer,
                                const struct usbmix_name_map *imap,
                                unsigned int ctl_mask, int control,
                                struct usb_audio_term *iterm,
                                struct usb_audio_term *oterm,
                                int unitid, int nameid, int readonly_mask)
{
        const struct usb_feature_control_info *ctl_info;
        unsigned int len = 0;
        int mapped_name = 0;
        struct snd_kcontrol *kctl;
        struct usb_mixer_elem_info *cval;
        const struct usbmix_name_map *map;
        unsigned int range;

        if (control == UAC_FU_GRAPHIC_EQUALIZER) {
                /* FIXME: not supported yet */
                return;
        }

        map = find_map(imap, unitid, control);
        if (check_ignored_ctl(map))
                return;

        cval = kzalloc(sizeof(*cval), GFP_KERNEL);
        if (!cval)
                return;
        snd_usb_mixer_elem_init_std(&cval->head, mixer, unitid);
        cval->control = control;
        cval->cmask = ctl_mask;

        ctl_info = get_feature_control_info(control);
        if (!ctl_info) {
                usb_mixer_elem_info_free(cval);
                return;
        }
        if (mixer->protocol == UAC_VERSION_1)
                cval->val_type = ctl_info->type;
        else /* UAC_VERSION_2 */
                cval->val_type = ctl_info->type_uac2 >= 0 ?
                        ctl_info->type_uac2 : ctl_info->type;

        if (ctl_mask == 0) {
                cval->channels = 1;        /* master channel */
                cval->master_readonly = readonly_mask;
        } else {
                int i, c = 0;
                for (i = 0; i < 16; i++)
                        if (ctl_mask & (1 << i))
                                c++;
                cval->channels = c;
                cval->ch_readonly = readonly_mask;
        }

        /*
         * If all channels in the mask are marked read-only, make the control
         * read-only. snd_usb_set_cur_mix_value() will check the mask again and won't
         * issue write commands to read-only channels.
         */
        if (cval->channels == readonly_mask)
                kctl = snd_ctl_new1(&usb_feature_unit_ctl_ro, cval);
        else
                kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);

        if (!kctl) {
                usb_audio_err(mixer->chip, "cannot malloc kcontrol\n");
                usb_mixer_elem_info_free(cval);
                return;
        }
        kctl->private_free = snd_usb_mixer_elem_free;

        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
        mapped_name = len != 0;
        if (!len && nameid)
                len = snd_usb_copy_string_desc(mixer->chip, nameid,
                                kctl->id.name, sizeof(kctl->id.name));

        switch (control) {
        case UAC_FU_MUTE:
        case UAC_FU_VOLUME:
                /*
                 * determine the control name.  the rule is:
                 * - if a name id is given in descriptor, use it.
                 * - if the connected input can be determined, then use the name
                 *   of terminal type.
                 * - if the connected output can be determined, use it.
                 * - otherwise, anonymous name.
                 */
                if (!len) {
                        if (iterm)
                                len = get_term_name(mixer->chip, iterm,
                                                    kctl->id.name,
                                                    sizeof(kctl->id.name), 1);
                        if (!len && oterm)
                                len = get_term_name(mixer->chip, oterm,
                                                    kctl->id.name,
                                                    sizeof(kctl->id.name), 1);
                        if (!len)
                                snprintf(kctl->id.name, sizeof(kctl->id.name),
                                         "Feature %d", unitid);
                }

                if (!mapped_name)
                        check_no_speaker_on_headset(kctl, mixer->chip->card);

                /*
                 * determine the stream direction:
                 * if the connected output is USB stream, then it's likely a
                 * capture stream.  otherwise it should be playback (hopefully :)
                 */
                if (!mapped_name && oterm && !(oterm->type >> 16)) {
                        if ((oterm->type & 0xff00) == 0x0100)
                                append_ctl_name(kctl, " Capture");
                        else
                                append_ctl_name(kctl, " Playback");
                }
                append_ctl_name(kctl, control == UAC_FU_MUTE ?
                                " Switch" : " Volume");
                break;
        default:
                if (!len)
                        strscpy(kctl->id.name, audio_feature_info[control-1].name,
                                sizeof(kctl->id.name));
                break;
        }

        /* get min/max values */
        get_min_max_with_quirks(cval, 0, kctl);

        /* skip a bogus volume range */
        if (cval->max <= cval->min) {
                usb_audio_dbg(mixer->chip,
                              "[%d] FU [%s] skipped due to invalid volume\n",
                              cval->head.id, kctl->id.name);
                snd_ctl_free_one(kctl);
                return;
        }


        if (control == UAC_FU_VOLUME) {
                check_mapped_dB(map, cval);
                if (cval->dBmin < cval->dBmax || !cval->initialized) {
                        kctl->tlv.c = snd_usb_mixer_vol_tlv;
                        kctl->vd[0].access |=
                                SNDRV_CTL_ELEM_ACCESS_TLV_READ |
                                SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
                }
        }

        snd_usb_mixer_fu_apply_quirk(mixer, cval, unitid, kctl);

        range = (cval->max - cval->min) / cval->res;
        /*
         * Are there devices with volume range more than 255? I use a bit more
         * to be sure. 384 is a resolution magic number found on Logitech
         * devices. It will definitively catch all buggy Logitech devices.
         */
        if (range > 384) {
                usb_audio_warn(mixer->chip,
                               "Warning! Unlikely big volume range (=%u), cval->res is probably wrong.",
                               range);
                usb_audio_warn(mixer->chip,
                               "[%d] FU [%s] ch = %d, val = %d/%d/%d",
                               cval->head.id, kctl->id.name, cval->channels,
                               cval->min, cval->max, cval->res);
        }

        usb_audio_dbg(mixer->chip, "[%d] FU [%s] ch = %d, val = %d/%d/%d\n",
                      cval->head.id, kctl->id.name, cval->channels,
                      cval->min, cval->max, cval->res);
        snd_usb_mixer_add_control(&cval->head, kctl);
}

static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
                              unsigned int ctl_mask, int control,
                              struct usb_audio_term *iterm, int unitid,
                              int readonly_mask)
{
        struct uac_feature_unit_descriptor *desc = raw_desc;
        int nameid = uac_feature_unit_iFeature(desc);

        __build_feature_ctl(state->mixer, state->map, ctl_mask, control,
                        iterm, &state->oterm, unitid, nameid, readonly_mask);
}

static void build_feature_ctl_badd(struct usb_mixer_interface *mixer,
                              unsigned int ctl_mask, int control, int unitid,
                              const struct usbmix_name_map *badd_map)
{
        __build_feature_ctl(mixer, badd_map, ctl_mask, control,
                        NULL, NULL, unitid, 0, 0);
}

static void get_connector_control_name(struct usb_mixer_interface *mixer,
                                       struct usb_audio_term *term,
                                       bool is_input, char *name, int name_size)
{
        int name_len = get_term_name(mixer->chip, term, name, name_size, 0);

        if (name_len == 0)
                strscpy(name, "Unknown", name_size);

        /*
         *  sound/core/ctljack.c has a convention of naming jack controls
         * by ending in " Jack".  Make it slightly more useful by
         * indicating Input or Output after the terminal name.
         */
        if (is_input)
                strlcat(name, " - Input Jack", name_size);
        else
                strlcat(name, " - Output Jack", name_size);
}

/* get connector value to "wake up" the USB audio */
static int connector_mixer_resume(struct usb_mixer_elem_list *list)
{
        struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);

        get_connector_value(cval, NULL, NULL);
        return 0;
}

/* Build a mixer control for a UAC connector control (jack-detect) */
static void build_connector_control(struct usb_mixer_interface *mixer,
                                    const struct usbmix_name_map *imap,
                                    struct usb_audio_term *term, bool is_input)
{
        struct snd_kcontrol *kctl;
        struct usb_mixer_elem_info *cval;
        const struct usbmix_name_map *map;

        map = find_map(imap, term->id, 0);
        if (check_ignored_ctl(map))
                return;

        cval = kzalloc(sizeof(*cval), GFP_KERNEL);
        if (!cval)
                return;
        snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id);

        /* set up a specific resume callback */
        cval->head.resume = connector_mixer_resume;

        /*
         * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the
         * number of channels connected.
         *
         * UAC3: The first byte specifies size of bitmap for the inserted controls. The
         * following byte(s) specifies which connectors are inserted.
         *
         * This boolean ctl will simply report if any channels are connected
         * or not.
         */
        if (mixer->protocol == UAC_VERSION_2)
                cval->control = UAC2_TE_CONNECTOR;
        else /* UAC_VERSION_3 */
                cval->control = UAC3_TE_INSERTION;

        cval->val_type = USB_MIXER_BOOLEAN;
        cval->channels = 1; /* report true if any channel is connected */
        cval->min = 0;
        cval->max = 1;
        kctl = snd_ctl_new1(&usb_connector_ctl_ro, cval);
        if (!kctl) {
                usb_audio_err(mixer->chip, "cannot malloc kcontrol\n");
                usb_mixer_elem_info_free(cval);
                return;
        }

        if (check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)))
                strlcat(kctl->id.name, " Jack", sizeof(kctl->id.name));
        else
                get_connector_control_name(mixer, term, is_input, kctl->id.name,
                                           sizeof(kctl->id.name));
        kctl->private_free = snd_usb_mixer_elem_free;
        snd_usb_mixer_add_control(&cval->head, kctl);
}

static int parse_clock_source_unit(struct mixer_build *state, int unitid,
                                   void *_ftr)
{
        struct uac_clock_source_descriptor *hdr = _ftr;
        struct usb_mixer_elem_info *cval;
        struct snd_kcontrol *kctl;
        int ret;

        if (state->mixer->protocol != UAC_VERSION_2)
                return -EINVAL;

        /*
         * The only property of this unit we are interested in is the
         * clock source validity. If that isn't readable, just bail out.
         */
        if (!uac_v2v3_control_is_readable(hdr->bmControls,
                                      UAC2_CS_CONTROL_CLOCK_VALID))
                return 0;

        cval = kzalloc(sizeof(*cval), GFP_KERNEL);
        if (!cval)
                return -ENOMEM;

        snd_usb_mixer_elem_init_std(&cval->head, state->mixer, hdr->bClockID);

        cval->min = 0;
        cval->max = 1;
        cval->channels = 1;
        cval->val_type = USB_MIXER_BOOLEAN;
        cval->control = UAC2_CS_CONTROL_CLOCK_VALID;

        cval->master_readonly = 1;
        /* From UAC2 5.2.5.1.2 "Only the get request is supported." */
        kctl = snd_ctl_new1(&usb_bool_master_control_ctl_ro, cval);

        if (!kctl) {
                usb_mixer_elem_info_free(cval);
                return -ENOMEM;
        }

        kctl->private_free = snd_usb_mixer_elem_free;
        ret = snd_usb_copy_string_desc(state->chip, hdr->iClockSource,
                                       kctl->id.name, sizeof(kctl->id.name));
        if (ret > 0)
                append_ctl_name(kctl, " Validity");
        else
                snprintf(kctl->id.name, sizeof(kctl->id.name),
                         "Clock Source %d Validity", hdr->bClockID);

        return snd_usb_mixer_add_control(&cval->head, kctl);
}

/*
 * parse a feature unit
 *
 * most of controls are defined here.
 */
static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
                                    void *_ftr)
{
        int channels, i, j;
        struct usb_audio_term iterm;
        unsigned int master_bits;
        int err, csize;
        struct uac_feature_unit_descriptor *hdr = _ftr;
        __u8 *bmaControls;

        if (state->mixer->protocol == UAC_VERSION_1) {
                csize = hdr->bControlSize;
                channels = (hdr->bLength - 7) / csize - 1;
                bmaControls = hdr->bmaControls;
        } else if (state->mixer->protocol == UAC_VERSION_2) {
                struct uac2_feature_unit_descriptor *ftr = _ftr;
                csize = 4;
                channels = (hdr->bLength - 6) / 4 - 1;
                bmaControls = ftr->bmaControls;
        } else { /* UAC_VERSION_3 */
                struct uac3_feature_unit_descriptor *ftr = _ftr;

                csize = 4;
                channels = (ftr->bLength - 7) / 4 - 1;
                bmaControls = ftr->bmaControls;
        }

        /* parse the source unit */
        err = parse_audio_unit(state, hdr->bSourceID);
        if (err < 0)
                return err;

        /* determine the input source type and name */
        err = check_input_term(state, hdr->bSourceID, &iterm);
        if (err < 0)
                return err;

        master_bits = snd_usb_combine_bytes(bmaControls, csize);
        /* master configuration quirks */
        switch (state->chip->usb_id) {
        case USB_ID(0x08bb, 0x2702):
                usb_audio_info(state->chip,
                               "usbmixer: master volume quirk for PCM2702 chip\n");
                /* disable non-functional volume control */
                master_bits &= ~UAC_CONTROL_BIT(UAC_FU_VOLUME);
                break;
        case USB_ID(0x1130, 0xf211):
                usb_audio_info(state->chip,
                               "usbmixer: volume control quirk for Tenx TP6911 Audio Headset\n");
                /* disable non-functional volume control */
                channels = 0;
                break;

        }

        if (state->mixer->protocol == UAC_VERSION_1) {
                /* check all control types */
                for (i = 0; i < 10; i++) {
                        unsigned int ch_bits = 0;
                        int control = audio_feature_info[i].control;

                        for (j = 0; j < channels; j++) {
                                unsigned int mask;

                                mask = snd_usb_combine_bytes(bmaControls +
                                                             csize * (j+1), csize);
                                if (mask & (1 << i))
                                        ch_bits |= (1 << j);
                        }
                        /* audio class v1 controls are never read-only */

                        /*
                         * The first channel must be set
                         * (for ease of programming).
                         */
                        if (ch_bits & 1)
                                build_feature_ctl(state, _ftr, ch_bits, control,
                                                  &iterm, unitid, 0);
                        if (master_bits & (1 << i))
                                build_feature_ctl(state, _ftr, 0, control,
                                                  &iterm, unitid, 0);
                }
        } else { /* UAC_VERSION_2/3 */
                for (i = 0; i < ARRAY_SIZE(audio_feature_info); i++) {
                        unsigned int ch_bits = 0;
                        unsigned int ch_read_only = 0;
                        int control = audio_feature_info[i].control;

                        for (j = 0; j < channels; j++) {
                                unsigned int mask;

                                mask = snd_usb_combine_bytes(bmaControls +
                                                             csize * (j+1), csize);
                                if (uac_v2v3_control_is_readable(mask, control)) {
                                        ch_bits |= (1 << j);
                                        if (!uac_v2v3_control_is_writeable(mask, control))
                                                ch_read_only |= (1 << j);
                                }
                        }

                        /*
                         * NOTE: build_feature_ctl() will mark the control
                         * read-only if all channels are marked read-only in
                         * the descriptors. Otherwise, the control will be
                         * reported as writeable, but the driver will not
                         * actually issue a write command for read-only
                         * channels.
                         */

                        /*
                         * The first channel must be set
                         * (for ease of programming).
                         */
                        if (ch_bits & 1)
                                build_feature_ctl(state, _ftr, ch_bits, control,
                                                  &iterm, unitid, ch_read_only);
                        if (uac_v2v3_control_is_readable(master_bits, control))
                                build_feature_ctl(state, _ftr, 0, control,
                                                  &iterm, unitid,
                                                  !uac_v2v3_control_is_writeable(master_bits,
                                                                                 control));
                }
        }

        return 0;
}

/*
 * Mixer Unit
 */

/* check whether the given in/out overflows bmMixerControls matrix */
static bool mixer_bitmap_overflow(struct uac_mixer_unit_descriptor *desc,
                                  int protocol, int num_ins, int num_outs)
{
        u8 *hdr = (u8 *)desc;
        u8 *c = uac_mixer_unit_bmControls(desc, protocol);
        size_t rest; /* remaining bytes after bmMixerControls */

        switch (protocol) {
        case UAC_VERSION_1:
        default:
                rest = 1; /* iMixer */
                break;
        case UAC_VERSION_2:
                rest = 2; /* bmControls + iMixer */
                break;
        case UAC_VERSION_3:
                rest = 6; /* bmControls + wMixerDescrStr */
                break;
        }

        /* overflow? */
        return c + (num_ins * num_outs + 7) / 8 + rest > hdr + hdr[0];
}

/*
 * build a mixer unit control
 *
 * the callbacks are identical with feature unit.
 * input channel number (zero based) is given in control field instead.
 */
static void build_mixer_unit_ctl(struct mixer_build *state,
                                 struct uac_mixer_unit_descriptor *desc,
                                 int in_pin, int in_ch, int num_outs,
                                 int unitid, struct usb_audio_term *iterm)
{
        struct usb_mixer_elem_info *cval;
        unsigned int i, len;
        struct snd_kcontrol *kctl;
        const struct usbmix_name_map *map;

        map = find_map(state->map, unitid, 0);
        if (check_ignored_ctl(map))
                return;

        cval = kzalloc(sizeof(*cval), GFP_KERNEL);
        if (!cval)
                return;

        snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid);
        cval->control = in_ch + 1; /* based on 1 */
        cval->val_type = USB_MIXER_S16;
        for (i = 0; i < num_outs; i++) {
                __u8 *c = uac_mixer_unit_bmControls(desc, state->mixer->protocol);

                if (check_matrix_bitmap(c, in_ch, i, num_outs)) {
                        cval->cmask |= (1 << i);
                        cval->channels++;
                }
        }

        /* get min/max values */
        get_min_max(cval, 0);

        kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);
        if (!kctl) {
                usb_audio_err(state->chip, "cannot malloc kcontrol\n");
                usb_mixer_elem_info_free(cval);
                return;
        }
        kctl->private_free = snd_usb_mixer_elem_free;

        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
        if (!len)
                len = get_term_name(state->chip, iterm, kctl->id.name,
                                    sizeof(kctl->id.name), 0);
        if (!len)
                len = sprintf(kctl->id.name, "Mixer Source %d", in_ch + 1);
        append_ctl_name(kctl, " Volume");

        usb_audio_dbg(state->chip, "[%d] MU [%s] ch = %d, val = %d/%d\n",
                    cval->head.id, kctl->id.name, cval->channels, cval->min, cval->max);
        snd_usb_mixer_add_control(&cval->head, kctl);
}

static int parse_audio_input_terminal(struct mixer_build *state, int unitid,
                                      void *raw_desc)
{
        struct usb_audio_term iterm;
        unsigned int control, bmctls, term_id;

        if (state->mixer->protocol == UAC_VERSION_2) {
                struct uac2_input_terminal_descriptor *d_v2 = raw_desc;
                control = UAC2_TE_CONNECTOR;
                term_id = d_v2->bTerminalID;
                bmctls = le16_to_cpu(d_v2->bmControls);
        } else if (state->mixer->protocol == UAC_VERSION_3) {
                struct uac3_input_terminal_descriptor *d_v3 = raw_desc;
                control = UAC3_TE_INSERTION;
                term_id = d_v3->bTerminalID;
                bmctls = le32_to_cpu(d_v3->bmControls);
        } else {
                return 0; /* UAC1. No Insertion control */
        }

        check_input_term(state, term_id, &iterm);

        /* Check for jack detection. */
        if ((iterm.type & 0xff00) != 0x0100 &&
            uac_v2v3_control_is_readable(bmctls, control))
                build_connector_control(state->mixer, state->map, &iterm, true);

        return 0;
}

/*
 * parse a mixer unit
 */
static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
                                  void *raw_desc)
{
        struct uac_mixer_unit_descriptor *desc = raw_desc;
        struct usb_audio_term iterm;
        int input_pins, num_ins, num_outs;
        int pin, ich, err;

        err = uac_mixer_unit_get_channels(state, desc);
        if (err < 0) {
                usb_audio_err(state->chip,
                              "invalid MIXER UNIT descriptor %d\n",
                              unitid);
                return err;
        }

        num_outs = err;
        input_pins = desc->bNrInPins;

        num_ins = 0;
        ich = 0;
        for (pin = 0; pin < input_pins; pin++) {
                err = parse_audio_unit(state, desc->baSourceID[pin]);
                if (err < 0)
                        continue;
                /* no bmControls field (e.g. Maya44) -> ignore */
                if (!num_outs)
                        continue;
                err = check_input_term(state, desc->baSourceID[pin], &iterm);
                if (err < 0)
                        return err;
                num_ins += iterm.channels;
                if (mixer_bitmap_overflow(desc, state->mixer->protocol,
                                          num_ins, num_outs))
                        break;
                for (; ich < num_ins; ich++) {
                        int och, ich_has_controls = 0;

                        for (och = 0; och < num_outs; och++) {
                                __u8 *c = uac_mixer_unit_bmControls(desc,
                                                state->mixer->protocol);

                                if (check_matrix_bitmap(c, ich, och, num_outs)) {
                                        ich_has_controls = 1;
                                        break;
                                }
                        }
                        if (ich_has_controls)
                                build_mixer_unit_ctl(state, desc, pin, ich, num_outs,
                                                     unitid, &iterm);
                }
        }
        return 0;
}

/*
 * Processing Unit / Extension Unit
 */

/* get callback for processing/extension unit */
static int mixer_ctl_procunit_get(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int err, val;

        err = get_cur_ctl_value(cval, cval->control << 8, &val);
        if (err < 0) {
                ucontrol->value.integer.value[0] = cval->min;
                return filter_error(cval, err);
        }
        val = get_relative_value(cval, val);
        ucontrol->value.integer.value[0] = val;
        return 0;
}

/* put callback for processing/extension unit */
static int mixer_ctl_procunit_put(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int val, oval, err;

        err = get_cur_ctl_value(cval, cval->control << 8, &oval);
        if (err < 0)
                return filter_error(cval, err);
        val = ucontrol->value.integer.value[0];
        val = get_abs_value(cval, val);
        if (val != oval) {
                set_cur_ctl_value(cval, cval->control << 8, val);
                return 1;
        }
        return 0;
}

/* alsa control interface for processing/extension unit */
static const struct snd_kcontrol_new mixer_procunit_ctl = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .name = "", /* will be filled later */
        .info = mixer_ctl_feature_info,
        .get = mixer_ctl_procunit_get,
        .put = mixer_ctl_procunit_put,
};

/*
 * predefined data for processing units
 */
struct procunit_value_info {
        int control;
        const char *suffix;
        int val_type;
        int min_value;
};

struct procunit_info {
        int type;
        char *name;
        const struct procunit_value_info *values;
};

static const struct procunit_value_info undefined_proc_info[] = {
        { 0x00, "Control Undefined", 0 },
        { 0 }
};

static const struct procunit_value_info updown_proc_info[] = {
        { UAC_UD_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
        { 0 }
};
static const struct procunit_value_info prologic_proc_info[] = {
        { UAC_DP_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_DP_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
        { 0 }
};
static const struct procunit_value_info threed_enh_proc_info[] = {
        { UAC_3D_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_3D_SPACE, "Spaciousness", USB_MIXER_U8 },
        { 0 }
};
static const struct procunit_value_info reverb_proc_info[] = {
        { UAC_REVERB_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_REVERB_LEVEL, "Level", USB_MIXER_U8 },
        { UAC_REVERB_TIME, "Time", USB_MIXER_U16 },
        { UAC_REVERB_FEEDBACK, "Feedback", USB_MIXER_U8 },
        { 0 }
};
static const struct procunit_value_info chorus_proc_info[] = {
        { UAC_CHORUS_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_CHORUS_LEVEL, "Level", USB_MIXER_U8 },
        { UAC_CHORUS_RATE, "Rate", USB_MIXER_U16 },
        { UAC_CHORUS_DEPTH, "Depth", USB_MIXER_U16 },
        { 0 }
};
static const struct procunit_value_info dcr_proc_info[] = {
        { UAC_DCR_ENABLE, "Switch", USB_MIXER_BOOLEAN },
        { UAC_DCR_RATE, "Ratio", USB_MIXER_U16 },
        { UAC_DCR_MAXAMPL, "Max Amp", USB_MIXER_S16 },
        { UAC_DCR_THRESHOLD, "Threshold", USB_MIXER_S16 },
        { UAC_DCR_ATTACK_TIME, "Attack Time", USB_MIXER_U16 },
        { UAC_DCR_RELEASE_TIME, "Release Time", USB_MIXER_U16 },
        { 0 }
};

static const struct procunit_info procunits[] = {
        { UAC_PROCESS_UP_DOWNMIX, "Up Down", updown_proc_info },
        { UAC_PROCESS_DOLBY_PROLOGIC, "Dolby Prologic", prologic_proc_info },
        { UAC_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", threed_enh_proc_info },
        { UAC_PROCESS_REVERB, "Reverb", reverb_proc_info },
        { UAC_PROCESS_CHORUS, "Chorus", chorus_proc_info },
        { UAC_PROCESS_DYN_RANGE_COMP, "DCR", dcr_proc_info },
        { 0 },
};

static const struct procunit_value_info uac3_updown_proc_info[] = {
        { UAC3_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
        { 0 }
};
static const struct procunit_value_info uac3_stereo_ext_proc_info[] = {
        { UAC3_EXT_WIDTH_CONTROL, "Width Control", USB_MIXER_U8 },
        { 0 }
};

static const struct procunit_info uac3_procunits[] = {
        { UAC3_PROCESS_UP_DOWNMIX, "Up Down", uac3_updown_proc_info },
        { UAC3_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", uac3_stereo_ext_proc_info },
        { UAC3_PROCESS_MULTI_FUNCTION, "Multi-Function", undefined_proc_info },
        { 0 },
};

/*
 * predefined data for extension units
 */
static const struct procunit_value_info clock_rate_xu_info[] = {
        { USB_XU_CLOCK_RATE_SELECTOR, "Selector", USB_MIXER_U8, 0 },
        { 0 }
};
static const struct procunit_value_info clock_source_xu_info[] = {
        { USB_XU_CLOCK_SOURCE_SELECTOR, "External", USB_MIXER_BOOLEAN },
        { 0 }
};
static const struct procunit_value_info spdif_format_xu_info[] = {
        { USB_XU_DIGITAL_FORMAT_SELECTOR, "SPDIF/AC3", USB_MIXER_BOOLEAN },
        { 0 }
};
static const struct procunit_value_info soft_limit_xu_info[] = {
        { USB_XU_SOFT_LIMIT_SELECTOR, " ", USB_MIXER_BOOLEAN },
        { 0 }
};
static const struct procunit_info extunits[] = {
        { USB_XU_CLOCK_RATE, "Clock rate", clock_rate_xu_info },
        { USB_XU_CLOCK_SOURCE, "DigitalIn CLK source", clock_source_xu_info },
        { USB_XU_DIGITAL_IO_STATUS, "DigitalOut format:", spdif_format_xu_info },
        { USB_XU_DEVICE_OPTIONS, "AnalogueIn Soft Limit", soft_limit_xu_info },
        { 0 }
};

/*
 * build a processing/extension unit
 */
static int build_audio_procunit(struct mixer_build *state, int unitid,
                                void *raw_desc, const struct procunit_info *list,
                                bool extension_unit)
{
        struct uac_processing_unit_descriptor *desc = raw_desc;
        int num_ins;
        struct usb_mixer_elem_info *cval;
        struct snd_kcontrol *kctl;
        int i, err, nameid, type, len, val;
        const struct procunit_info *info;
        const struct procunit_value_info *valinfo;
        const struct usbmix_name_map *map;
        static const struct procunit_value_info default_value_info[] = {
                { 0x01, "Switch", USB_MIXER_BOOLEAN },
                { 0 }
        };
        static const struct procunit_info default_info = {
                0, NULL, default_value_info
        };
        const char *name = extension_unit ?
                "Extension Unit" : "Processing Unit";

        num_ins = desc->bNrInPins;
        for (i = 0; i < num_ins; i++) {
                err = parse_audio_unit(state, desc->baSourceID[i]);
                if (err < 0)
                        return err;
        }

        type = le16_to_cpu(desc->wProcessType);
        for (info = list; info && info->type; info++)
                if (info->type == type)
                        break;
        if (!info || !info->type)
                info = &default_info;

        for (valinfo = info->values; valinfo->control; valinfo++) {
                __u8 *controls = uac_processing_unit_bmControls(desc, state->mixer->protocol);

                if (state->mixer->protocol == UAC_VERSION_1) {
                        if (!(controls[valinfo->control / 8] &
                                        (1 << ((valinfo->control % 8) - 1))))
                                continue;
                } else { /* UAC_VERSION_2/3 */
                        if (!uac_v2v3_control_is_readable(controls[valinfo->control / 8],
                                                          valinfo->control))
                                continue;
                }

                map = find_map(state->map, unitid, valinfo->control);
                if (check_ignored_ctl(map))
                        continue;
                cval = kzalloc(sizeof(*cval), GFP_KERNEL);
                if (!cval)
                        return -ENOMEM;
                snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid);
                cval->control = valinfo->control;
                cval->val_type = valinfo->val_type;
                cval->channels = 1;

                if (state->mixer->protocol > UAC_VERSION_1 &&
                    !uac_v2v3_control_is_writeable(controls[valinfo->control / 8],
                                                   valinfo->control))
                        cval->master_readonly = 1;

                /* get min/max values */
                switch (type) {
                case UAC_PROCESS_UP_DOWNMIX: {
                        bool mode_sel = false;

                        switch (state->mixer->protocol) {
                        case UAC_VERSION_1:
                        case UAC_VERSION_2:
                        default:
                                if (cval->control == UAC_UD_MODE_SELECT)
                                        mode_sel = true;
                                break;
                        case UAC_VERSION_3:
                                if (cval->control == UAC3_UD_MODE_SELECT)
                                        mode_sel = true;
                                break;
                        }

                        if (mode_sel) {
                                __u8 *control_spec = uac_processing_unit_specific(desc,
                                                                state->mixer->protocol);
                                cval->min = 1;
                                cval->max = control_spec[0];
                                cval->res = 1;
                                cval->initialized = 1;
                                break;
                        }

                        get_min_max(cval, valinfo->min_value);
                        break;
                }
                case USB_XU_CLOCK_RATE:
                        /*
                         * E-Mu USB 0404/0202/TrackerPre/0204
                         * samplerate control quirk
                         */
                        cval->min = 0;
                        cval->max = 5;
                        cval->res = 1;
                        cval->initialized = 1;
                        break;
                default:
                        get_min_max(cval, valinfo->min_value);
                        break;
                }

                err = get_cur_ctl_value(cval, cval->control << 8, &val);
                if (err < 0) {
                        usb_mixer_elem_info_free(cval);
                        return -EINVAL;
                }

                kctl = snd_ctl_new1(&mixer_procunit_ctl, cval);
                if (!kctl) {
                        usb_mixer_elem_info_free(cval);
                        return -ENOMEM;
                }
                kctl->private_free = snd_usb_mixer_elem_free;

                if (check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name))) {
                        /* nothing */ ;
                } else if (info->name) {
                        strscpy(kctl->id.name, info->name, sizeof(kctl->id.name));
                } else {
                        if (extension_unit)
                                nameid = uac_extension_unit_iExtension(desc, state->mixer->protocol);
                        else
                                nameid = uac_processing_unit_iProcessing(desc, state->mixer->protocol);
                        len = 0;
                        if (nameid)
                                len = snd_usb_copy_string_desc(state->chip,
                                                               nameid,
                                                               kctl->id.name,
                                                               sizeof(kctl->id.name));
                        if (!len)
                                strscpy(kctl->id.name, name, sizeof(kctl->id.name));
                }
                append_ctl_name(kctl, " ");
                append_ctl_name(kctl, valinfo->suffix);

                usb_audio_dbg(state->chip,
                              "[%d] PU [%s] ch = %d, val = %d/%d\n",
                              cval->head.id, kctl->id.name, cval->channels,
                              cval->min, cval->max);

                err = snd_usb_mixer_add_control(&cval->head, kctl);
                if (err < 0)
                        return err;
        }
        return 0;
}

static int parse_audio_processing_unit(struct mixer_build *state, int unitid,
                                       void *raw_desc)
{
        switch (state->mixer->protocol) {
        case UAC_VERSION_1:
        case UAC_VERSION_2:
        default:
                return build_audio_procunit(state, unitid, raw_desc,
                                            procunits, false);
        case UAC_VERSION_3:
                return build_audio_procunit(state, unitid, raw_desc,
                                            uac3_procunits, false);
        }
}

static int parse_audio_extension_unit(struct mixer_build *state, int unitid,
                                      void *raw_desc)
{
        /*
         * Note that we parse extension units with processing unit descriptors.
         * That's ok as the layout is the same.
         */
        return build_audio_procunit(state, unitid, raw_desc, extunits, true);
}

/*
 * Selector Unit
 */

/*
 * info callback for selector unit
 * use an enumerator type for routing
 */
static int mixer_ctl_selector_info(struct snd_kcontrol *kcontrol,
                                   struct snd_ctl_elem_info *uinfo)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        const char **itemlist = (const char **)kcontrol->private_value;

        if (snd_BUG_ON(!itemlist))
                return -EINVAL;
        return snd_ctl_enum_info(uinfo, 1, cval->max, itemlist);
}

/* get callback for selector unit */
static int mixer_ctl_selector_get(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int val, err;

        err = get_cur_ctl_value(cval, cval->control << 8, &val);
        if (err < 0) {
                ucontrol->value.enumerated.item[0] = 0;
                return filter_error(cval, err);
        }
        val = get_relative_value(cval, val);
        ucontrol->value.enumerated.item[0] = val;
        return 0;
}

/* put callback for selector unit */
static int mixer_ctl_selector_put(struct snd_kcontrol *kcontrol,
                                  struct snd_ctl_elem_value *ucontrol)
{
        struct usb_mixer_elem_info *cval = kcontrol->private_data;
        int val, oval, err;

        err = get_cur_ctl_value(cval, cval->control << 8, &oval);
        if (err < 0)
                return filter_error(cval, err);
        val = ucontrol->value.enumerated.item[0];
        val = get_abs_value(cval, val);
        if (val != oval) {
                set_cur_ctl_value(cval, cval->control << 8, val);
                return 1;
        }
        return 0;
}

/* alsa control interface for selector unit */
static const struct snd_kcontrol_new mixer_selectunit_ctl = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .name = "", /* will be filled later */
        .info = mixer_ctl_selector_info,
        .get = mixer_ctl_selector_get,
        .put = mixer_ctl_selector_put,
};

/*
 * private free callback.
 * free both private_data and private_value
 */
static void usb_mixer_selector_elem_free(struct snd_kcontrol *kctl)
{
        int i, num_ins = 0;

        if (kctl->private_data) {
                struct usb_mixer_elem_info *cval = kctl->private_data;
                num_ins = cval->max;
                usb_mixer_elem_info_free(cval);
                kctl->private_data = NULL;
        }
        if (kctl->private_value) {
                char **itemlist = (char **)kctl->private_value;
                for (i = 0; i < num_ins; i++)
                        kfree(itemlist[i]);
                kfree(itemlist);
                kctl->private_value = 0;
        }
}

/*
 * parse a selector unit
 */
static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
                                     void *raw_desc)
{
        struct uac_selector_unit_descriptor *desc = raw_desc;
        unsigned int i, nameid, len;
        int err;
        struct usb_mixer_elem_info *cval;
        struct snd_kcontrol *kctl;
        const struct usbmix_name_map *map;
        char **namelist;

        for (i = 0; i < desc->bNrInPins; i++) {
                err = parse_audio_unit(state, desc->baSourceID[i]);
                if (err < 0)
                        return err;
        }

        if (desc->bNrInPins == 1) /* only one ? nonsense! */
                return 0;

        map = find_map(state->map, unitid, 0);
        if (check_ignored_ctl(map))
                return 0;

        cval = kzalloc(sizeof(*cval), GFP_KERNEL);
        if (!cval)
                return -ENOMEM;
        snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid);
        cval->val_type = USB_MIXER_U8;
        cval->channels = 1;
        cval->min = 1;
        cval->max = desc->bNrInPins;
        cval->res = 1;
        cval->initialized = 1;

        switch (state->mixer->protocol) {
        case UAC_VERSION_1:
        default:
                cval->control = 0;
                break;
        case UAC_VERSION_2:
        case UAC_VERSION_3:
                if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR ||
                    desc->bDescriptorSubtype == UAC3_CLOCK_SELECTOR)
                        cval->control = UAC2_CX_CLOCK_SELECTOR;
                else /* UAC2/3_SELECTOR_UNIT */
                        cval->control = UAC2_SU_SELECTOR;
                break;
        }

        namelist = kcalloc(desc->bNrInPins, sizeof(char *), GFP_KERNEL);
        if (!namelist) {
                err = -ENOMEM;
                goto error_cval;
        }
#define MAX_ITEM_NAME_LEN        64
        for (i = 0; i < desc->bNrInPins; i++) {
                struct usb_audio_term iterm;
                namelist[i] = kmalloc(MAX_ITEM_NAME_LEN, GFP_KERNEL);
                if (!namelist[i]) {
                        err = -ENOMEM;
                        goto error_name;
                }
                len = check_mapped_selector_name(state, unitid, i, namelist[i],
                                                 MAX_ITEM_NAME_LEN);
                if (! len && check_input_term(state, desc->baSourceID[i], &iterm) >= 0)
                        len = get_term_name(state->chip, &iterm, namelist[i],
                                            MAX_ITEM_NAME_LEN, 0);
                if (! len)
                        sprintf(namelist[i], "Input %u", i);
        }

        kctl = snd_ctl_new1(&mixer_selectunit_ctl, cval);
        if (! kctl) {
                usb_audio_err(state->chip, "cannot malloc kcontrol\n");
                err = -ENOMEM;
                goto error_name;
        }
        kctl->private_value = (unsigned long)namelist;
        kctl->private_free = usb_mixer_selector_elem_free;

        /* check the static mapping table at first */
        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
        if (!len) {
                /* no mapping ? */
                switch (state->mixer->protocol) {
                case UAC_VERSION_1:
                case UAC_VERSION_2:
                default:
                /* if iSelector is given, use it */
                        nameid = uac_selector_unit_iSelector(desc);
                        if (nameid)
                                len = snd_usb_copy_string_desc(state->chip,
                                                        nameid, kctl->id.name,
                                                        sizeof(kctl->id.name));
                        break;
                case UAC_VERSION_3:
                        /* TODO: Class-Specific strings not yet supported */
                        break;
                }

                /* ... or pick up the terminal name at next */
                if (!len)
                        len = get_term_name(state->chip, &state->oterm,
                                    kctl->id.name, sizeof(kctl->id.name), 0);
                /* ... or use the fixed string "USB" as the last resort */
                if (!len)
                        strscpy(kctl->id.name, "USB", sizeof(kctl->id.name));

                /* and add the proper suffix */
                if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR ||
                    desc->bDescriptorSubtype == UAC3_CLOCK_SELECTOR)
                        append_ctl_name(kctl, " Clock Source");
                else if ((state->oterm.type & 0xff00) == 0x0100)
                        append_ctl_name(kctl, " Capture Source");
                else
                        append_ctl_name(kctl, " Playback Source");
        }

        usb_audio_dbg(state->chip, "[%d] SU [%s] items = %d\n",
                    cval->head.id, kctl->id.name, desc->bNrInPins);
        return snd_usb_mixer_add_control(&cval->head, kctl);

 error_name:
        for (i = 0; i < desc->bNrInPins; i++)
                kfree(namelist[i]);
        kfree(namelist);
 error_cval:
        usb_mixer_elem_info_free(cval);
        return err;
}

/*
 * parse an audio unit recursively
 */

static int parse_audio_unit(struct mixer_build *state, int unitid)
{
        unsigned char *p1;
        int protocol = state->mixer->protocol;

        if (test_and_set_bit(unitid, state->unitbitmap))
                return 0; /* the unit already visited */

        p1 = find_audio_control_unit(state, unitid);
        if (!p1) {
                usb_audio_err(state->chip, "unit %d not found!\n", unitid);
                return -EINVAL;
        }

        if (!snd_usb_validate_audio_desc(p1, protocol)) {
                usb_audio_dbg(state->chip, "invalid unit %d\n", unitid);
                return 0; /* skip invalid unit */
        }

        switch (PTYPE(protocol, p1[2])) {
        case PTYPE(UAC_VERSION_1, UAC_INPUT_TERMINAL):
        case PTYPE(UAC_VERSION_2, UAC_INPUT_TERMINAL):
        case PTYPE(UAC_VERSION_3, UAC_INPUT_TERMINAL):
                return parse_audio_input_terminal(state, unitid, p1);
        case PTYPE(UAC_VERSION_1, UAC_MIXER_UNIT):
        case PTYPE(UAC_VERSION_2, UAC_MIXER_UNIT):
        case PTYPE(UAC_VERSION_3, UAC3_MIXER_UNIT):
                return parse_audio_mixer_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SOURCE):
        case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SOURCE):
                return parse_clock_source_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_1, UAC_SELECTOR_UNIT):
        case PTYPE(UAC_VERSION_2, UAC_SELECTOR_UNIT):
        case PTYPE(UAC_VERSION_3, UAC3_SELECTOR_UNIT):
        case PTYPE(UAC_VERSION_2, UAC2_CLOCK_SELECTOR):
        case PTYPE(UAC_VERSION_3, UAC3_CLOCK_SELECTOR):
                return parse_audio_selector_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_1, UAC_FEATURE_UNIT):
        case PTYPE(UAC_VERSION_2, UAC_FEATURE_UNIT):
        case PTYPE(UAC_VERSION_3, UAC3_FEATURE_UNIT):
                return parse_audio_feature_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_1, UAC1_PROCESSING_UNIT):
        case PTYPE(UAC_VERSION_2, UAC2_PROCESSING_UNIT_V2):
        case PTYPE(UAC_VERSION_3, UAC3_PROCESSING_UNIT):
                return parse_audio_processing_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT):
        case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2):
        case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT):
                return parse_audio_extension_unit(state, unitid, p1);
        case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT):
        case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT):
                return 0; /* FIXME - effect units not implemented yet */
        default:
                usb_audio_err(state->chip,
                              "unit %u: unexpected type 0x%02x\n",
                              unitid, p1[2]);
                return -EINVAL;
        }
}

static void snd_usb_mixer_free(struct usb_mixer_interface *mixer)
{
        /* kill pending URBs */
        snd_usb_mixer_disconnect(mixer);

        kfree(mixer->id_elems);
        if (mixer->urb) {
                kfree(mixer->urb->transfer_buffer);
                usb_free_urb(mixer->urb);
        }
        usb_free_urb(mixer->rc_urb);
        kfree(mixer->rc_setup_packet);
        kfree(mixer);
}

static int snd_usb_mixer_dev_free(struct snd_device *device)
{
        struct usb_mixer_interface *mixer = device->device_data;
        snd_usb_mixer_free(mixer);
        return 0;
}

/* UAC3 predefined channels configuration */
struct uac3_badd_profile {
        int subclass;
        const char *name;
        int c_chmask;        /* capture channels mask */
        int p_chmask;        /* playback channels mask */
        int st_chmask;        /* side tone mixing channel mask */
};

static const struct uac3_badd_profile uac3_badd_profiles[] = {
        {
                /*
                 * BAIF, BAOF or combination of both
                 * IN: Mono or Stereo cfg, Mono alt possible
                 * OUT: Mono or Stereo cfg, Mono alt possible
                 */
                .subclass = UAC3_FUNCTION_SUBCLASS_GENERIC_IO,
                .name = "GENERIC IO",
                .c_chmask = -1,                /* dynamic channels */
                .p_chmask = -1,                /* dynamic channels */
        },
        {
                /* BAOF; Stereo only cfg, Mono alt possible */
                .subclass = UAC3_FUNCTION_SUBCLASS_HEADPHONE,
                .name = "HEADPHONE",
                .p_chmask = 3,
        },
        {
                /* BAOF; Mono or Stereo cfg, Mono alt possible */
                .subclass = UAC3_FUNCTION_SUBCLASS_SPEAKER,
                .name = "SPEAKER",
                .p_chmask = -1,                /* dynamic channels */
        },
        {
                /* BAIF; Mono or Stereo cfg, Mono alt possible */
                .subclass = UAC3_FUNCTION_SUBCLASS_MICROPHONE,
                .name = "MICROPHONE",
                .c_chmask = -1,                /* dynamic channels */
        },
        {
                /*
                 * BAIOF topology
                 * IN: Mono only
                 * OUT: Mono or Stereo cfg, Mono alt possible
                 */
                .subclass = UAC3_FUNCTION_SUBCLASS_HEADSET,
                .name = "HEADSET",
                .c_chmask = 1,
                .p_chmask = -1,                /* dynamic channels */
                .st_chmask = 1,
        },
        {
                /* BAIOF; IN: Mono only; OUT: Stereo only, Mono alt possible */
                .subclass = UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER,
                .name = "HEADSET ADAPTER",
                .c_chmask = 1,
                .p_chmask = 3,
                .st_chmask = 1,
        },
        {
                /* BAIF + BAOF; IN: Mono only; OUT: Mono only */
                .subclass = UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE,
                .name = "SPEAKERPHONE",
                .c_chmask = 1,
                .p_chmask = 1,
        },
        { 0 } /* terminator */
};

static bool uac3_badd_func_has_valid_channels(struct usb_mixer_interface *mixer,
                                              const struct uac3_badd_profile *f,
                                              int c_chmask, int p_chmask)
{
        /*
         * If both playback/capture channels are dynamic, make sure
         * at least one channel is present
         */
        if (f->c_chmask < 0 && f->p_chmask < 0) {
                if (!c_chmask && !p_chmask) {
                        usb_audio_warn(mixer->chip, "BAAD %s: no channels?",
                                       f->name);
                        return false;
                }
                return true;
        }

        if ((f->c_chmask < 0 && !c_chmask) ||
            (f->c_chmask >= 0 && f->c_chmask != c_chmask)) {
                usb_audio_warn(mixer->chip, "BAAD %s c_chmask mismatch",
                               f->name);
                return false;
        }
        if ((f->p_chmask < 0 && !p_chmask) ||
            (f->p_chmask >= 0 && f->p_chmask != p_chmask)) {
                usb_audio_warn(mixer->chip, "BAAD %s p_chmask mismatch",
                               f->name);
                return false;
        }
        return true;
}

/*
 * create mixer controls for UAC3 BADD profiles
 *
 * UAC3 BADD device doesn't contain CS descriptors thus we will guess everything
 *
 * BADD device may contain Mixer Unit, which doesn't have any controls, skip it
 */
static int snd_usb_mixer_controls_badd(struct usb_mixer_interface *mixer,
                                       int ctrlif)
{
        struct usb_device *dev = mixer->chip->dev;
        struct usb_interface_assoc_descriptor *assoc;
        int badd_profile = mixer->chip->badd_profile;
        const struct uac3_badd_profile *f;
        const struct usbmix_ctl_map *map;
        int p_chmask = 0, c_chmask = 0, st_chmask = 0;
        int i;

        assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc;

        /* Detect BADD capture/playback channels from AS EP descriptors */
        for (i = 0; i < assoc->bInterfaceCount; i++) {
                int intf = assoc->bFirstInterface + i;

                struct usb_interface *iface;
                struct usb_host_interface *alts;
                struct usb_interface_descriptor *altsd;
                unsigned int maxpacksize;
                char dir_in;
                int chmask, num;

                if (intf == ctrlif)
                        continue;

                iface = usb_ifnum_to_if(dev, intf);
                if (!iface)
                        continue;

                num = iface->num_altsetting;

                if (num < 2)
                        return -EINVAL;

                /*
                 * The number of Channels in an AudioStreaming interface
                 * and the audio sample bit resolution (16 bits or 24
                 * bits) can be derived from the wMaxPacketSize field in
                 * the Standard AS Audio Data Endpoint descriptor in
                 * Alternate Setting 1
                 */
                alts = &iface->altsetting[1];
                altsd = get_iface_desc(alts);

                if (altsd->bNumEndpoints < 1)
                        return -EINVAL;

                /* check direction */
                dir_in = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN);
                maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);

                switch (maxpacksize) {
                default:
                        usb_audio_err(mixer->chip,
                                "incorrect wMaxPacketSize 0x%x for BADD profile\n",
                                maxpacksize);
                        return -EINVAL;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16:
                case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24:
                        chmask = 1;
                        break;
                case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16:
                case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24:
                case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24:
                        chmask = 3;
                        break;
                }

                if (dir_in)
                        c_chmask = chmask;
                else
                        p_chmask = chmask;
        }

        usb_audio_dbg(mixer->chip,
                "UAC3 BADD profile 0x%x: detected c_chmask=%d p_chmask=%d\n",
                badd_profile, c_chmask, p_chmask);

        /* check the mapping table */
        for (map = uac3_badd_usbmix_ctl_maps; map->id; map++) {
                if (map->id == badd_profile)
                        break;
        }

        if (!map->id)
                return -EINVAL;

        for (f = uac3_badd_profiles; f->name; f++) {
                if (badd_profile == f->subclass)
                        break;
        }
        if (!f->name)
                return -EINVAL;
        if (!uac3_badd_func_has_valid_channels(mixer, f, c_chmask, p_chmask))
                return -EINVAL;
        st_chmask = f->st_chmask;

        /* Playback */
        if (p_chmask) {
                /* Master channel, always writable */
                build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
                                       UAC3_BADD_FU_ID2, map->map);
                /* Mono/Stereo volume channels, always writable */
                build_feature_ctl_badd(mixer, p_chmask, UAC_FU_VOLUME,
                                       UAC3_BADD_FU_ID2, map->map);
        }

        /* Capture */
        if (c_chmask) {
                /* Master channel, always writable */
                build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
                                       UAC3_BADD_FU_ID5, map->map);
                /* Mono/Stereo volume channels, always writable */
                build_feature_ctl_badd(mixer, c_chmask, UAC_FU_VOLUME,
                                       UAC3_BADD_FU_ID5, map->map);
        }

        /* Side tone-mixing */
        if (st_chmask) {
                /* Master channel, always writable */
                build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
                                       UAC3_BADD_FU_ID7, map->map);
                /* Mono volume channel, always writable */
                build_feature_ctl_badd(mixer, 1, UAC_FU_VOLUME,
                                       UAC3_BADD_FU_ID7, map->map);
        }

        /* Insertion Control */
        if (f->subclass == UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER) {
                struct usb_audio_term iterm, oterm;

                /* Input Term - Insertion control */
                memset(&iterm, 0, sizeof(iterm));
                iterm.id = UAC3_BADD_IT_ID4;
                iterm.type = UAC_BIDIR_TERMINAL_HEADSET;
                build_connector_control(mixer, map->map, &iterm, true);

                /* Output Term - Insertion control */
                memset(&oterm, 0, sizeof(oterm));
                oterm.id = UAC3_BADD_OT_ID3;
                oterm.type = UAC_BIDIR_TERMINAL_HEADSET;
                build_connector_control(mixer, map->map, &oterm, false);
        }

        return 0;
}

/*
 * create mixer controls
 *
 * walk through all UAC_OUTPUT_TERMINAL descriptors to search for mixers
 */
static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
{
        struct mixer_build state;
        int err;
        const struct usbmix_ctl_map *map;
        void *p;

        memset(&state, 0, sizeof(state));
        state.chip = mixer->chip;
        state.mixer = mixer;
        state.buffer = mixer->hostif->extra;
        state.buflen = mixer->hostif->extralen;

        /* check the mapping table */
        for (map = usbmix_ctl_maps; map->id; map++) {
                if (map->id == state.chip->usb_id) {
                        state.map = map->map;
                        state.selector_map = map->selector_map;
                        mixer->connector_map = map->connector_map;
                        break;
                }
        }

        p = NULL;
        while ((p = snd_usb_find_csint_desc(mixer->hostif->extra,
                                            mixer->hostif->extralen,
                                            p, UAC_OUTPUT_TERMINAL)) != NULL) {
                if (!snd_usb_validate_audio_desc(p, mixer->protocol))
                        continue; /* skip invalid descriptor */

                if (mixer->protocol == UAC_VERSION_1) {
                        struct uac1_output_terminal_descriptor *desc = p;

                        /* mark terminal ID as visited */
                        set_bit(desc->bTerminalID, state.unitbitmap);
                        state.oterm.id = desc->bTerminalID;
                        state.oterm.type = le16_to_cpu(desc->wTerminalType);
                        state.oterm.name = desc->iTerminal;
                        err = parse_audio_unit(&state, desc->bSourceID);
                        if (err < 0 && err != -EINVAL)
                                return err;
                } else if (mixer->protocol == UAC_VERSION_2) {
                        struct uac2_output_terminal_descriptor *desc = p;

                        /* mark terminal ID as visited */
                        set_bit(desc->bTerminalID, state.unitbitmap);
                        state.oterm.id = desc->bTerminalID;
                        state.oterm.type = le16_to_cpu(desc->wTerminalType);
                        state.oterm.name = desc->iTerminal;
                        err = parse_audio_unit(&state, desc->bSourceID);
                        if (err < 0 && err != -EINVAL)
                                return err;

                        /*
                         * For UAC2, use the same approach to also add the
                         * clock selectors
                         */
                        err = parse_audio_unit(&state, desc->bCSourceID);
                        if (err < 0 && err != -EINVAL)
                                return err;

                        if ((state.oterm.type & 0xff00) != 0x0100 &&
                            uac_v2v3_control_is_readable(le16_to_cpu(desc->bmControls),
                                                         UAC2_TE_CONNECTOR)) {
                                build_connector_control(state.mixer, state.map,
                                                        &state.oterm, false);
                        }
                } else {  /* UAC_VERSION_3 */
                        struct uac3_output_terminal_descriptor *desc = p;

                        /* mark terminal ID as visited */
                        set_bit(desc->bTerminalID, state.unitbitmap);
                        state.oterm.id = desc->bTerminalID;
                        state.oterm.type = le16_to_cpu(desc->wTerminalType);
                        state.oterm.name = le16_to_cpu(desc->wTerminalDescrStr);
                        err = parse_audio_unit(&state, desc->bSourceID);
                        if (err < 0 && err != -EINVAL)
                                return err;

                        /*
                         * For UAC3, use the same approach to also add the
                         * clock selectors
                         */
                        err = parse_audio_unit(&state, desc->bCSourceID);
                        if (err < 0 && err != -EINVAL)
                                return err;

                        if ((state.oterm.type & 0xff00) != 0x0100 &&
                            uac_v2v3_control_is_readable(le32_to_cpu(desc->bmControls),
                                                         UAC3_TE_INSERTION)) {
                                build_connector_control(state.mixer, state.map,
                                                        &state.oterm, false);
                        }
                }
        }

        return 0;
}

static int delegate_notify(struct usb_mixer_interface *mixer, int unitid,
                           u8 *control, u8 *channel)
{
        const struct usbmix_connector_map *map = mixer->connector_map;

        if (!map)
                return unitid;

        for (; map->id; map++) {
                if (map->id == unitid) {
                        if (control && map->control)
                                *control = map->control;
                        if (channel && map->channel)
                                *channel = map->channel;
                        return map->delegated_id;
                }
        }
        return unitid;
}

void snd_usb_mixer_notify_id(struct usb_mixer_interface *mixer, int unitid)
{
        struct usb_mixer_elem_list *list;

        unitid = delegate_notify(mixer, unitid, NULL, NULL);

        for_each_mixer_elem(list, mixer, unitid) {
                struct usb_mixer_elem_info *info;

                if (!list->is_std_info)
                        continue;
                info = mixer_elem_list_to_info(list);
                /* invalidate cache, so the value is read from the device */
                info->cached = 0;
                snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
                               &list->kctl->id);
        }
}

static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer,
                                    struct usb_mixer_elem_list *list)
{
        struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
        static const char * const val_types[] = {
                [USB_MIXER_BOOLEAN] = "BOOLEAN",
                [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN",
                [USB_MIXER_S8] = "S8",
                [USB_MIXER_U8] = "U8",
                [USB_MIXER_S16] = "S16",
                [USB_MIXER_U16] = "U16",
                [USB_MIXER_S32] = "S32",
                [USB_MIXER_U32] = "U32",
                [USB_MIXER_BESPOKEN] = "BESPOKEN",
        };
        snd_iprintf(buffer, "    Info: id=%i, control=%i, cmask=0x%x, "
                            "channels=%i, type=\"%s\"\n", cval->head.id,
                            cval->control, cval->cmask, cval->channels,
                            val_types[cval->val_type]);
        snd_iprintf(buffer, "    Volume: min=%i, max=%i, dBmin=%i, dBmax=%i\n",
                            cval->min, cval->max, cval->dBmin, cval->dBmax);
}

static void snd_usb_mixer_proc_read(struct snd_info_entry *entry,
                                    struct snd_info_buffer *buffer)
{
        struct snd_usb_audio *chip = entry->private_data;
        struct usb_mixer_interface *mixer;
        struct usb_mixer_elem_list *list;
        int unitid;

        list_for_each_entry(mixer, &chip->mixer_list, list) {
                snd_iprintf(buffer,
                        "USB Mixer: usb_id=0x%08x, ctrlif=%i, ctlerr=%i\n",
                                chip->usb_id, mixer_ctrl_intf(mixer),
                                mixer->ignore_ctl_error);
                snd_iprintf(buffer, "Card: %s\n", chip->card->longname);
                for (unitid = 0; unitid < MAX_ID_ELEMS; unitid++) {
                        for_each_mixer_elem(list, mixer, unitid) {
                                snd_iprintf(buffer, "  Unit: %i\n", list->id);
                                if (list->kctl)
                                        snd_iprintf(buffer,
                                                    "    Control: name=\"%s\", index=%i\n",
                                                    list->kctl->id.name,
                                                    list->kctl->id.index);
                                if (list->dump)
                                        list->dump(buffer, list);
                        }
                }
        }
}

static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer,
                                       int attribute, int value, int index)
{
        struct usb_mixer_elem_list *list;
        __u8 unitid = (index >> 8) & 0xff;
        __u8 control = (value >> 8) & 0xff;
        __u8 channel = value & 0xff;
        unsigned int count = 0;

        if (channel >= MAX_CHANNELS) {
                usb_audio_dbg(mixer->chip,
                        "%s(): bogus channel number %d\n",
                        __func__, channel);
                return;
        }

        unitid = delegate_notify(mixer, unitid, &control, &channel);

        for_each_mixer_elem(list, mixer, unitid)
                count++;

        if (count == 0)
                return;

        for_each_mixer_elem(list, mixer, unitid) {
                struct usb_mixer_elem_info *info;

                if (!list->kctl)
                        continue;
                if (!list->is_std_info)
                        continue;

                info = mixer_elem_list_to_info(list);
                if (count > 1 && info->control != control)
                        continue;

                switch (attribute) {
                case UAC2_CS_CUR:
                        /* invalidate cache, so the value is read from the device */
                        if (channel)
                                info->cached &= ~(1 << channel);
                        else /* master channel */
                                info->cached = 0;

                        snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
                                       &info->head.kctl->id);
                        break;

                case UAC2_CS_RANGE:
                        /* TODO */
                        break;

                case UAC2_CS_MEM:
                        /* TODO */
                        break;

                default:
                        usb_audio_dbg(mixer->chip,
                                "unknown attribute %d in interrupt\n",
                                attribute);
                        break;
                } /* switch */
        }
}

static void snd_usb_mixer_interrupt(struct urb *urb)
{
        struct usb_mixer_interface *mixer = urb->context;
        int len = urb->actual_length;
        int ustatus = urb->status;

        if (ustatus != 0)
                goto requeue;

        if (mixer->protocol == UAC_VERSION_1) {
                struct uac1_status_word *status;

                for (status = urb->transfer_buffer;
                     len >= sizeof(*status);
                     len -= sizeof(*status), status++) {
                        dev_dbg(&urb->dev->dev, "status interrupt: %02x %02x\n",
                                                status->bStatusType,
                                                status->bOriginator);

                        /* ignore any notifications not from the control interface */
                        if ((status->bStatusType & UAC1_STATUS_TYPE_ORIG_MASK) !=
                                UAC1_STATUS_TYPE_ORIG_AUDIO_CONTROL_IF)
                                continue;

                        if (status->bStatusType & UAC1_STATUS_TYPE_MEM_CHANGED)
                                snd_usb_mixer_rc_memory_change(mixer, status->bOriginator);
                        else
                                snd_usb_mixer_notify_id(mixer, status->bOriginator);
                }
        } else { /* UAC_VERSION_2 */
                struct uac2_interrupt_data_msg *msg;

                for (msg = urb->transfer_buffer;
                     len >= sizeof(*msg);
                     len -= sizeof(*msg), msg++) {
                        /* drop vendor specific and endpoint requests */
                        if ((msg->bInfo & UAC2_INTERRUPT_DATA_MSG_VENDOR) ||
                            (msg->bInfo & UAC2_INTERRUPT_DATA_MSG_EP))
                                continue;

                        snd_usb_mixer_interrupt_v2(mixer, msg->bAttribute,
                                                   le16_to_cpu(msg->wValue),
                                                   le16_to_cpu(msg->wIndex));
                }
        }

requeue:
        if (ustatus != -ENOENT &&
            ustatus != -ECONNRESET &&
            ustatus != -ESHUTDOWN) {
                urb->dev = mixer->chip->dev;
                usb_submit_urb(urb, GFP_ATOMIC);
        }
}

/* create the handler for the optional status interrupt endpoint */
static int snd_usb_mixer_status_create(struct usb_mixer_interface *mixer)
{
        struct usb_endpoint_descriptor *ep;
        void *transfer_buffer;
        int buffer_length;
        unsigned int epnum;

        /* we need one interrupt input endpoint */
        if (get_iface_desc(mixer->hostif)->bNumEndpoints < 1)
                return 0;
        ep = get_endpoint(mixer->hostif, 0);
        if (!usb_endpoint_dir_in(ep) || !usb_endpoint_xfer_int(ep))
                return 0;

        epnum = usb_endpoint_num(ep);
        buffer_length = le16_to_cpu(ep->wMaxPacketSize);
        transfer_buffer = kmalloc(buffer_length, GFP_KERNEL);
        if (!transfer_buffer)
                return -ENOMEM;
        mixer->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!mixer->urb) {
                kfree(transfer_buffer);
                return -ENOMEM;
        }
        usb_fill_int_urb(mixer->urb, mixer->chip->dev,
                         usb_rcvintpipe(mixer->chip->dev, epnum),
                         transfer_buffer, buffer_length,
                         snd_usb_mixer_interrupt, mixer, ep->bInterval);
        usb_submit_urb(mixer->urb, GFP_KERNEL);
        return 0;
}

int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif)
{
        static const struct snd_device_ops dev_ops = {
                .dev_free = snd_usb_mixer_dev_free
        };
        struct usb_mixer_interface *mixer;
        int err;

        strcpy(chip->card->mixername, "USB Mixer");

        mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
        if (!mixer)
                return -ENOMEM;
        mixer->chip = chip;
        mixer->ignore_ctl_error = !!(chip->quirk_flags & QUIRK_FLAG_IGNORE_CTL_ERROR);
        mixer->id_elems = kcalloc(MAX_ID_ELEMS, sizeof(*mixer->id_elems),
                                  GFP_KERNEL);
        if (!mixer->id_elems) {
                kfree(mixer);
                return -ENOMEM;
        }

        mixer->hostif = &usb_ifnum_to_if(chip->dev, ctrlif)->altsetting[0];
        switch (get_iface_desc(mixer->hostif)->bInterfaceProtocol) {
        case UAC_VERSION_1:
        default:
                mixer->protocol = UAC_VERSION_1;
                break;
        case UAC_VERSION_2:
                mixer->protocol = UAC_VERSION_2;
                break;
        case UAC_VERSION_3:
                mixer->protocol = UAC_VERSION_3;
                break;
        }

        if (mixer->protocol == UAC_VERSION_3 &&
                        chip->badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
                err = snd_usb_mixer_controls_badd(mixer, ctrlif);
                if (err < 0)
                        goto _error;
        } else {
                err = snd_usb_mixer_controls(mixer);
                if (err < 0)
                        goto _error;
        }

        err = snd_usb_mixer_status_create(mixer);
        if (err < 0)
                goto _error;

        err = snd_usb_mixer_apply_create_quirk(mixer);
        if (err < 0)
                goto _error;

        err = snd_device_new(chip->card, SNDRV_DEV_CODEC, mixer, &dev_ops);
        if (err < 0)
                goto _error;

        if (list_empty(&chip->mixer_list))
                snd_card_ro_proc_new(chip->card, "usbmixer", chip,
                                     snd_usb_mixer_proc_read);

        list_add(&mixer->list, &chip->mixer_list);
        return 0;

_error:
        snd_usb_mixer_free(mixer);
        return err;
}

void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer)
{
        if (mixer->disconnected)
                return;
        if (mixer->urb)
                usb_kill_urb(mixer->urb);
        if (mixer->rc_urb)
                usb_kill_urb(mixer->rc_urb);
        if (mixer->private_free)
                mixer->private_free(mixer);
        mixer->disconnected = true;
}

/* stop any bus activity of a mixer */
static void snd_usb_mixer_inactivate(struct usb_mixer_interface *mixer)
{
        usb_kill_urb(mixer->urb);
        usb_kill_urb(mixer->rc_urb);
}

static int snd_usb_mixer_activate(struct usb_mixer_interface *mixer)
{
        int err;

        if (mixer->urb) {
                err = usb_submit_urb(mixer->urb, GFP_NOIO);
                if (err < 0)
                        return err;
        }

        return 0;
}

int snd_usb_mixer_suspend(struct usb_mixer_interface *mixer)
{
        snd_usb_mixer_inactivate(mixer);
        if (mixer->private_suspend)
                mixer->private_suspend(mixer);
        return 0;
}

static int restore_mixer_value(struct usb_mixer_elem_list *list)
{
        struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
        int c, err, idx;

        if (cval->val_type == USB_MIXER_BESPOKEN)
                return 0;

        if (cval->cmask) {
                idx = 0;
                for (c = 0; c < MAX_CHANNELS; c++) {
                        if (!(cval->cmask & (1 << c)))
                                continue;
                        if (cval->cached & (1 << (c + 1))) {
                                err = snd_usb_set_cur_mix_value(cval, c + 1, idx,
                                                        cval->cache_val[idx]);
                                if (err < 0)
                                        break;
                        }
                        idx++;
                }
        } else {
                /* master */
                if (cval->cached)
                        snd_usb_set_cur_mix_value(cval, 0, 0, *cval->cache_val);
        }

        return 0;
}

int snd_usb_mixer_resume(struct usb_mixer_interface *mixer)
{
        struct usb_mixer_elem_list *list;
        int id, err;

        /* restore cached mixer values */
        for (id = 0; id < MAX_ID_ELEMS; id++) {
                for_each_mixer_elem(list, mixer, id) {
                        if (list->resume) {
                                err = list->resume(list);
                                if (err < 0)
                                        return err;
                        }
                }
        }

        snd_usb_mixer_resume_quirk(mixer);

        return snd_usb_mixer_activate(mixer);
}

void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
                                 struct usb_mixer_interface *mixer,
                                 int unitid)
{
        list->mixer = mixer;
        list->id = unitid;
        list->dump = snd_usb_mixer_dump_cval;
        list->resume = restore_mixer_value;
}












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   14 





   14 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Interface handling
 *
 * Copyright 2002-2005, Instant802 Networks, Inc.
 * Copyright 2005-2006, Devicescape Software, Inc.
 * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
 * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
 * Copyright 2013-2014  Intel Mobile Communications GmbH
 * Copyright (c) 2016        Intel Deutschland GmbH
 * Copyright (C) 2018-2024 Intel Corporation
 */
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/kcov.h>
#include <net/mac80211.h>
#include <net/ieee80211_radiotap.h>
#include "ieee80211_i.h"
#include "sta_info.h"
#include "debugfs_netdev.h"
#include "mesh.h"
#include "led.h"
#include "driver-ops.h"
#include "wme.h"
#include "rate.h"

/**
 * DOC: Interface list locking
 *
 * The interface list in each struct ieee80211_local is protected
 * three-fold:
 *
 * (1) modifications may only be done under the RTNL *and* wiphy mutex
 *     *and* iflist_mtx
 * (2) modifications are done in an RCU manner so atomic readers
 *     can traverse the list in RCU-safe blocks.
 *
 * As a consequence, reads (traversals) of the list can be protected
 * by either the RTNL, the wiphy mutex, the iflist_mtx or RCU.
 */

static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work);

bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_chanctx_conf *chanctx_conf;
        int power;

        rcu_read_lock();
        chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
        if (!chanctx_conf) {
                rcu_read_unlock();
                return false;
        }

        power = ieee80211_chandef_max_power(&chanctx_conf->def);
        rcu_read_unlock();

        if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL)
                power = min(power, sdata->deflink.user_power_level);

        if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
                power = min(power, sdata->deflink.ap_power_level);

        if (power != sdata->vif.bss_conf.txpower) {
                sdata->vif.bss_conf.txpower = power;
                ieee80211_hw_config(sdata->local, 0);
                return true;
        }

        return false;
}

void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
                              bool update_bss)
{
        if (__ieee80211_recalc_txpower(sdata) ||
            (update_bss && ieee80211_sdata_running(sdata)))
                ieee80211_link_info_change_notify(sdata, &sdata->deflink,
                                                  BSS_CHANGED_TXPOWER);
}

static u32 __ieee80211_idle_off(struct ieee80211_local *local)
{
        if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE))
                return 0;

        local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
        return IEEE80211_CONF_CHANGE_IDLE;
}

static u32 __ieee80211_idle_on(struct ieee80211_local *local)
{
        if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
                return 0;

        ieee80211_flush_queues(local, NULL, false);

        local->hw.conf.flags |= IEEE80211_CONF_IDLE;
        return IEEE80211_CONF_CHANGE_IDLE;
}

static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
                                   bool force_active)
{
        bool working, scanning, active;
        unsigned int led_trig_start = 0, led_trig_stop = 0;

        lockdep_assert_wiphy(local->hw.wiphy);

        active = force_active ||
                 !list_empty(&local->chanctx_list) ||
                 local->monitors;

        working = !local->ops->remain_on_channel &&
                  !list_empty(&local->roc_list);

        scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
                   test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning);

        if (working || scanning)
                led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK;
        else
                led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK;

        if (active)
                led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;
        else
                led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;

        ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop);

        if (working || scanning || active)
                return __ieee80211_idle_off(local);
        return __ieee80211_idle_on(local);
}

u32 ieee80211_idle_off(struct ieee80211_local *local)
{
        return __ieee80211_recalc_idle(local, true);
}

void ieee80211_recalc_idle(struct ieee80211_local *local)
{
        u32 change = __ieee80211_recalc_idle(local, false);
        if (change)
                ieee80211_hw_config(local, change);
}

static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
                                bool check_dup)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *iter;
        u64 new, mask, tmp;
        u8 *m;
        int ret = 0;

        lockdep_assert_wiphy(local->hw.wiphy);

        if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
                return 0;

        m = addr;
        new =        ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
                ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
                ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);

        m = local->hw.wiphy->addr_mask;
        mask =        ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
                ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
                ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);

        if (!check_dup)
                return ret;

        list_for_each_entry(iter, &local->interfaces, list) {
                if (iter == sdata)
                        continue;

                if (iter->vif.type == NL80211_IFTYPE_MONITOR &&
                    !(iter->u.mntr.flags & MONITOR_FLAG_ACTIVE))
                        continue;

                m = iter->vif.addr;
                tmp =        ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
                        ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
                        ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);

                if ((new & ~mask) != (tmp & ~mask)) {
                        ret = -EINVAL;
                        break;
                }
        }

        return ret;
}

static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_roc_work *roc;
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *scan_sdata;
        int ret = 0;

        lockdep_assert_wiphy(local->hw.wiphy);

        /* To be the most flexible here we want to only limit changing the
         * address if the specific interface is doing offchannel work or
         * scanning.
         */
        if (netif_carrier_ok(sdata->dev))
                return -EBUSY;

        /* First check no ROC work is happening on this iface */
        list_for_each_entry(roc, &local->roc_list, list) {
                if (roc->sdata != sdata)
                        continue;

                if (roc->started) {
                        ret = -EBUSY;
                        goto unlock;
                }
        }

        /* And if this iface is scanning */
        if (local->scanning) {
                scan_sdata = rcu_dereference_protected(local->scan_sdata,
                                                       lockdep_is_held(&local->hw.wiphy->mtx));
                if (sdata == scan_sdata)
                        ret = -EBUSY;
        }

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_P2P_CLIENT:
                /* More interface types could be added here but changing the
                 * address while powered makes the most sense in client modes.
                 */
                break;
        default:
                ret = -EOPNOTSUPP;
        }

unlock:
        return ret;
}

static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata,
                                 void *addr)
{
        struct ieee80211_local *local = sdata->local;
        struct sockaddr *sa = addr;
        bool check_dup = true;
        bool live = false;
        int ret;

        if (ieee80211_sdata_running(sdata)) {
                ret = ieee80211_can_powered_addr_change(sdata);
                if (ret)
                        return ret;

                live = true;
        }

        if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
            !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
                check_dup = false;

        ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup);
        if (ret)
                return ret;

        if (live)
                drv_remove_interface(local, sdata);
        ret = eth_mac_addr(sdata->dev, sa);

        if (ret == 0) {
                memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
                ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
        }

        /* Regardless of eth_mac_addr() return we still want to add the
         * interface back. This should not fail...
         */
        if (live)
                WARN_ON(drv_add_interface(local, sdata));

        return ret;
}

static int ieee80211_change_mac(struct net_device *dev, void *addr)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        struct ieee80211_local *local = sdata->local;
        int ret;

        /*
         * This happens during unregistration if there's a bond device
         * active (maybe other cases?) and we must get removed from it.
         * But we really don't care anymore if it's not registered now.
         */
        if (!dev->ieee80211_ptr->registered)
                return 0;

        wiphy_lock(local->hw.wiphy);
        ret = _ieee80211_change_mac(sdata, addr);
        wiphy_unlock(local->hw.wiphy);

        return ret;
}

static inline int identical_mac_addr_allowed(int type1, int type2)
{
        return type1 == NL80211_IFTYPE_MONITOR ||
                type2 == NL80211_IFTYPE_MONITOR ||
                type1 == NL80211_IFTYPE_P2P_DEVICE ||
                type2 == NL80211_IFTYPE_P2P_DEVICE ||
                (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) ||
                (type1 == NL80211_IFTYPE_AP_VLAN &&
                        (type2 == NL80211_IFTYPE_AP ||
                         type2 == NL80211_IFTYPE_AP_VLAN));
}

static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
                                            enum nl80211_iftype iftype)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *nsdata;

        ASSERT_RTNL();
        lockdep_assert_wiphy(local->hw.wiphy);

        /* we hold the RTNL here so can safely walk the list */
        list_for_each_entry(nsdata, &local->interfaces, list) {
                if (nsdata != sdata && ieee80211_sdata_running(nsdata)) {
                        /*
                         * Only OCB and monitor mode may coexist
                         */
                        if ((sdata->vif.type == NL80211_IFTYPE_OCB &&
                             nsdata->vif.type != NL80211_IFTYPE_MONITOR) ||
                            (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
                             nsdata->vif.type == NL80211_IFTYPE_OCB))
                                return -EBUSY;

                        /*
                         * Allow only a single IBSS interface to be up at any
                         * time. This is restricted because beacon distribution
                         * cannot work properly if both are in the same IBSS.
                         *
                         * To remove this restriction we'd have to disallow them
                         * from setting the same SSID on different IBSS interfaces
                         * belonging to the same hardware. Then, however, we're
                         * faced with having to adopt two different TSF timers...
                         */
                        if (iftype == NL80211_IFTYPE_ADHOC &&
                            nsdata->vif.type == NL80211_IFTYPE_ADHOC)
                                return -EBUSY;
                        /*
                         * will not add another interface while any channel
                         * switch is active.
                         */
                        if (nsdata->vif.bss_conf.csa_active)
                                return -EBUSY;

                        /*
                         * The remaining checks are only performed for interfaces
                         * with the same MAC address.
                         */
                        if (!ether_addr_equal(sdata->vif.addr,
                                              nsdata->vif.addr))
                                continue;

                        /*
                         * check whether it may have the same address
                         */
                        if (!identical_mac_addr_allowed(iftype,
                                                        nsdata->vif.type))
                                return -ENOTUNIQ;

                        /* No support for VLAN with MLO yet */
                        if (iftype == NL80211_IFTYPE_AP_VLAN &&
                            sdata->wdev.use_4addr &&
                            nsdata->vif.type == NL80211_IFTYPE_AP &&
                            nsdata->vif.valid_links)
                                return -EOPNOTSUPP;

                        /*
                         * can only add VLANs to enabled APs
                         */
                        if (iftype == NL80211_IFTYPE_AP_VLAN &&
                            nsdata->vif.type == NL80211_IFTYPE_AP)
                                sdata->bss = &nsdata->u.ap;
                }
        }

        return ieee80211_check_combinations(sdata, NULL, 0, 0);
}

static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
                                  enum nl80211_iftype iftype)
{
        int n_queues = sdata->local->hw.queues;
        int i;

        if (iftype == NL80211_IFTYPE_NAN)
                return 0;

        if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
                for (i = 0; i < IEEE80211_NUM_ACS; i++) {
                        if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
                                         IEEE80211_INVAL_HW_QUEUE))
                                return -EINVAL;
                        if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >=
                                         n_queues))
                                return -EINVAL;
                }
        }

        if ((iftype != NL80211_IFTYPE_AP &&
             iftype != NL80211_IFTYPE_P2P_GO &&
             iftype != NL80211_IFTYPE_MESH_POINT) ||
            !ieee80211_hw_check(&sdata->local->hw, QUEUE_CONTROL)) {
                sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
                return 0;
        }

        if (WARN_ON_ONCE(sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE))
                return -EINVAL;

        if (WARN_ON_ONCE(sdata->vif.cab_queue >= n_queues))
                return -EINVAL;

        return 0;
}

static int ieee80211_open(struct net_device *dev)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        int err;

        /* fail early if user set an invalid address */
        if (!is_valid_ether_addr(dev->dev_addr))
                return -EADDRNOTAVAIL;

        wiphy_lock(sdata->local->hw.wiphy);
        err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
        if (err)
                goto out;

        err = ieee80211_do_open(&sdata->wdev, true);
out:
        wiphy_unlock(sdata->local->hw.wiphy);

        return err;
}

static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down)
{
        struct ieee80211_local *local = sdata->local;
        unsigned long flags;
        struct sk_buff *skb, *tmp;
        u32 hw_reconf_flags = 0;
        int i, flushed;
        struct ps_data *ps;
        struct cfg80211_chan_def chandef;
        bool cancel_scan;
        struct cfg80211_nan_func *func;

        lockdep_assert_wiphy(local->hw.wiphy);

        clear_bit(SDATA_STATE_RUNNING, &sdata->state);
        synchronize_rcu(); /* flush _ieee80211_wake_txqs() */

        cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
        if (cancel_scan)
                ieee80211_scan_cancel(local);

        ieee80211_roc_purge(local, sdata);

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
                ieee80211_mgd_stop(sdata);
                break;
        case NL80211_IFTYPE_ADHOC:
                ieee80211_ibss_stop(sdata);
                break;
        case NL80211_IFTYPE_MONITOR:
                if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
                        break;
                list_del_rcu(&sdata->u.mntr.list);
                break;
        default:
                break;
        }

        /*
         * Remove all stations associated with this interface.
         *
         * This must be done before calling ops->remove_interface()
         * because otherwise we can later invoke ops->sta_notify()
         * whenever the STAs are removed, and that invalidates driver
         * assumptions about always getting a vif pointer that is valid
         * (because if we remove a STA after ops->remove_interface()
         * the driver will have removed the vif info already!)
         *
         * For AP_VLANs stations may exist since there's nothing else that
         * would have removed them, but in other modes there shouldn't
         * be any stations.
         */
        flushed = sta_info_flush(sdata, -1);
        WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0);

        /* don't count this interface for allmulti while it is down */
        if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
                atomic_dec(&local->iff_allmultis);

        if (sdata->vif.type == NL80211_IFTYPE_AP) {
                local->fif_pspoll--;
                local->fif_probe_req--;
        } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
                local->fif_probe_req--;
        }

        if (sdata->dev) {
                netif_addr_lock_bh(sdata->dev);
                spin_lock_bh(&local->filter_lock);
                __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
                                 sdata->dev->addr_len);
                spin_unlock_bh(&local->filter_lock);
                netif_addr_unlock_bh(sdata->dev);
        }

        del_timer_sync(&local->dynamic_ps_timer);
        wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work);

        WARN(ieee80211_vif_is_mld(&sdata->vif),
             "destroying interface with valid links 0x%04x\n",
             sdata->vif.valid_links);

        sdata->vif.bss_conf.csa_active = false;
        if (sdata->vif.type == NL80211_IFTYPE_STATION)
                sdata->deflink.u.mgd.csa_waiting_bcn = false;
        if (sdata->csa_blocked_tx) {
                ieee80211_wake_vif_queues(local, sdata,
                                          IEEE80211_QUEUE_STOP_REASON_CSA);
                sdata->csa_blocked_tx = false;
        }

        wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work);
        wiphy_work_cancel(local->hw.wiphy,
                          &sdata->deflink.color_change_finalize_work);
        wiphy_delayed_work_cancel(local->hw.wiphy,
                                  &sdata->deflink.dfs_cac_timer_work);

        if (sdata->wdev.cac_started) {
                chandef = sdata->vif.bss_conf.chanreq.oper;
                WARN_ON(local->suspended);
                ieee80211_link_release_channel(&sdata->deflink);
                cfg80211_cac_event(sdata->dev, &chandef,
                                   NL80211_RADAR_CAC_ABORTED,
                                   GFP_KERNEL);
        }

        if (sdata->vif.type == NL80211_IFTYPE_AP) {
                WARN_ON(!list_empty(&sdata->u.ap.vlans));
        } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
                /* remove all packets in parent bc_buf pointing to this dev */
                ps = &sdata->bss->ps;

                spin_lock_irqsave(&ps->bc_buf.lock, flags);
                skb_queue_walk_safe(&ps->bc_buf, skb, tmp) {
                        if (skb->dev == sdata->dev) {
                                __skb_unlink(skb, &ps->bc_buf);
                                local->total_ps_buffered--;
                                ieee80211_free_txskb(&local->hw, skb);
                        }
                }
                spin_unlock_irqrestore(&ps->bc_buf.lock, flags);
        }

        if (going_down)
                local->open_count--;

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP_VLAN:
                list_del(&sdata->u.vlan.list);
                RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL);
                /* see comment in the default case below */
                ieee80211_free_keys(sdata, true);
                /* no need to tell driver */
                break;
        case NL80211_IFTYPE_MONITOR:
                if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) {
                        local->cooked_mntrs--;
                        break;
                }

                local->monitors--;
                if (local->monitors == 0) {
                        local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR;
                        hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
                }

                ieee80211_adjust_monitor_flags(sdata, -1);
                break;
        case NL80211_IFTYPE_NAN:
                /* clean all the functions */
                spin_lock_bh(&sdata->u.nan.func_lock);

                idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) {
                        idr_remove(&sdata->u.nan.function_inst_ids, i);
                        cfg80211_free_nan_func(func);
                }
                idr_destroy(&sdata->u.nan.function_inst_ids);

                spin_unlock_bh(&sdata->u.nan.func_lock);
                break;
        case NL80211_IFTYPE_P2P_DEVICE:
                /* relies on synchronize_rcu() below */
                RCU_INIT_POINTER(local->p2p_sdata, NULL);
                fallthrough;
        default:
                wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work);
                /*
                 * When we get here, the interface is marked down.
                 * Free the remaining keys, if there are any
                 * (which can happen in AP mode if userspace sets
                 * keys before the interface is operating)
                 *
                 * Force the key freeing to always synchronize_net()
                 * to wait for the RX path in case it is using this
                 * interface enqueuing frames at this very time on
                 * another CPU.
                 */
                ieee80211_free_keys(sdata, true);
                skb_queue_purge(&sdata->skb_queue);
                skb_queue_purge(&sdata->status_queue);
        }

        spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
        for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
                skb_queue_walk_safe(&local->pending[i], skb, tmp) {
                        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
                        if (info->control.vif == &sdata->vif) {
                                __skb_unlink(skb, &local->pending[i]);
                                ieee80211_free_txskb(&local->hw, skb);
                        }
                }
        }
        spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);

        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                ieee80211_txq_remove_vlan(local, sdata);

        sdata->bss = NULL;

        if (local->open_count == 0)
                ieee80211_clear_tx_pending(local);

        sdata->vif.bss_conf.beacon_int = 0;

        /*
         * If the interface goes down while suspended, presumably because
         * the device was unplugged and that happens before our resume,
         * then the driver is already unconfigured and the remainder of
         * this function isn't needed.
         * XXX: what about WoWLAN? If the device has software state, e.g.
         *        memory allocated, it might expect teardown commands from
         *        mac80211 here?
         */
        if (local->suspended) {
                WARN_ON(local->wowlan);
                WARN_ON(rcu_access_pointer(local->monitor_sdata));
                return;
        }

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP_VLAN:
                break;
        case NL80211_IFTYPE_MONITOR:
                if (local->monitors == 0)
                        ieee80211_del_virtual_monitor(local);

                ieee80211_recalc_idle(local);

                if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
                        break;

                fallthrough;
        default:
                if (going_down)
                        drv_remove_interface(local, sdata);
        }

        ieee80211_recalc_ps(local);

        if (cancel_scan)
                wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work);

        if (local->open_count == 0) {
                ieee80211_stop_device(local);

                /* no reconfiguring after stop! */
                return;
        }

        /* do after stop to avoid reconfiguring when we stop anyway */
        ieee80211_configure_filter(local);
        ieee80211_hw_config(local, hw_reconf_flags);

        if (local->monitors == local->open_count)
                ieee80211_add_virtual_monitor(local);
}

static void ieee80211_stop_mbssid(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_sub_if_data *tx_sdata, *non_tx_sdata, *tmp_sdata;
        struct ieee80211_vif *tx_vif = sdata->vif.mbssid_tx_vif;

        if (!tx_vif)
                return;

        tx_sdata = vif_to_sdata(tx_vif);
        sdata->vif.mbssid_tx_vif = NULL;

        list_for_each_entry_safe(non_tx_sdata, tmp_sdata,
                                 &tx_sdata->local->interfaces, list) {
                if (non_tx_sdata != sdata && non_tx_sdata != tx_sdata &&
                    non_tx_sdata->vif.mbssid_tx_vif == tx_vif &&
                    ieee80211_sdata_running(non_tx_sdata)) {
                        non_tx_sdata->vif.mbssid_tx_vif = NULL;
                        dev_close(non_tx_sdata->wdev.netdev);
                }
        }

        if (sdata != tx_sdata && ieee80211_sdata_running(tx_sdata)) {
                tx_sdata->vif.mbssid_tx_vif = NULL;
                dev_close(tx_sdata->wdev.netdev);
        }
}

static int ieee80211_stop(struct net_device *dev)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);

        /* close dependent VLAN and MBSSID interfaces before locking wiphy */
        if (sdata->vif.type == NL80211_IFTYPE_AP) {
                struct ieee80211_sub_if_data *vlan, *tmpsdata;

                list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
                                         u.vlan.list)
                        dev_close(vlan->dev);

                ieee80211_stop_mbssid(sdata);
        }

        wiphy_lock(sdata->local->hw.wiphy);
        wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work);

        ieee80211_do_stop(sdata, true);
        wiphy_unlock(sdata->local->hw.wiphy);

        return 0;
}

static void ieee80211_set_multicast_list(struct net_device *dev)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        struct ieee80211_local *local = sdata->local;
        int allmulti, sdata_allmulti;

        allmulti = !!(dev->flags & IFF_ALLMULTI);
        sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);

        if (allmulti != sdata_allmulti) {
                if (dev->flags & IFF_ALLMULTI)
                        atomic_inc(&local->iff_allmultis);
                else
                        atomic_dec(&local->iff_allmultis);
                sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
        }

        spin_lock_bh(&local->filter_lock);
        __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
        spin_unlock_bh(&local->filter_lock);
        wiphy_work_queue(local->hw.wiphy, &local->reconfig_filter);
}

/*
 * Called when the netdev is removed or, by the code below, before
 * the interface type changes.
 */
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
        /* free extra data */
        ieee80211_free_keys(sdata, false);

        ieee80211_debugfs_remove_netdev(sdata);

        ieee80211_destroy_frag_cache(&sdata->frags);

        if (ieee80211_vif_is_mesh(&sdata->vif))
                ieee80211_mesh_teardown_sdata(sdata);

        ieee80211_vif_clear_links(sdata);
        ieee80211_link_stop(&sdata->deflink);
}

static void ieee80211_uninit(struct net_device *dev)
{
        ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev));
}

static void
ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
        dev_fetch_sw_netstats(stats, dev->tstats);
}

static int ieee80211_netdev_setup_tc(struct net_device *dev,
                                     enum tc_setup_type type, void *type_data)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        struct ieee80211_local *local = sdata->local;

        return drv_net_setup_tc(local, sdata, dev, type, type_data);
}

static const struct net_device_ops ieee80211_dataif_ops = {
        .ndo_open                = ieee80211_open,
        .ndo_stop                = ieee80211_stop,
        .ndo_uninit                = ieee80211_uninit,
        .ndo_start_xmit                = ieee80211_subif_start_xmit,
        .ndo_set_rx_mode        = ieee80211_set_multicast_list,
        .ndo_set_mac_address         = ieee80211_change_mac,
        .ndo_get_stats64        = ieee80211_get_stats64,
        .ndo_setup_tc                = ieee80211_netdev_setup_tc,
};

static u16 ieee80211_monitor_select_queue(struct net_device *dev,
                                          struct sk_buff *skb,
                                          struct net_device *sb_dev)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct ieee80211_hdr *hdr;
        int len_rthdr;

        if (local->hw.queues < IEEE80211_NUM_ACS)
                return 0;

        /* reset flags and info before parsing radiotap header */
        memset(info, 0, sizeof(*info));

        if (!ieee80211_parse_tx_radiotap(skb, dev))
                return 0; /* doesn't matter, frame will be dropped */

        len_rthdr = ieee80211_get_radiotap_len(skb->data);
        hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
        if (skb->len < len_rthdr + 2 ||
            skb->len < len_rthdr + ieee80211_hdrlen(hdr->frame_control))
                return 0; /* doesn't matter, frame will be dropped */

        return ieee80211_select_queue_80211(sdata, skb, hdr);
}

static const struct net_device_ops ieee80211_monitorif_ops = {
        .ndo_open                = ieee80211_open,
        .ndo_stop                = ieee80211_stop,
        .ndo_uninit                = ieee80211_uninit,
        .ndo_start_xmit                = ieee80211_monitor_start_xmit,
        .ndo_set_rx_mode        = ieee80211_set_multicast_list,
        .ndo_set_mac_address         = ieee80211_change_mac,
        .ndo_select_queue        = ieee80211_monitor_select_queue,
        .ndo_get_stats64        = ieee80211_get_stats64,
};

static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx,
                                              struct net_device_path *path)
{
        struct ieee80211_sub_if_data *sdata;
        struct ieee80211_local *local;
        struct sta_info *sta;
        int ret = -ENOENT;

        sdata = IEEE80211_DEV_TO_SUB_IF(ctx->dev);
        local = sdata->local;

        if (!local->ops->net_fill_forward_path)
                return -EOPNOTSUPP;

        rcu_read_lock();
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP_VLAN:
                sta = rcu_dereference(sdata->u.vlan.sta);
                if (sta)
                        break;
                if (sdata->wdev.use_4addr)
                        goto out;
                if (is_multicast_ether_addr(ctx->daddr))
                        goto out;
                sta = sta_info_get_bss(sdata, ctx->daddr);
                break;
        case NL80211_IFTYPE_AP:
                if (is_multicast_ether_addr(ctx->daddr))
                        goto out;
                sta = sta_info_get(sdata, ctx->daddr);
                break;
        case NL80211_IFTYPE_STATION:
                if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) {
                        sta = sta_info_get(sdata, ctx->daddr);
                        if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
                                if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
                                        goto out;

                                break;
                        }
                }

                sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid);
                break;
        default:
                goto out;
        }

        if (!sta)
                goto out;

        ret = drv_net_fill_forward_path(local, sdata, &sta->sta, ctx, path);
out:
        rcu_read_unlock();

        return ret;
}

static const struct net_device_ops ieee80211_dataif_8023_ops = {
        .ndo_open                = ieee80211_open,
        .ndo_stop                = ieee80211_stop,
        .ndo_uninit                = ieee80211_uninit,
        .ndo_start_xmit                = ieee80211_subif_start_xmit_8023,
        .ndo_set_rx_mode        = ieee80211_set_multicast_list,
        .ndo_set_mac_address        = ieee80211_change_mac,
        .ndo_get_stats64        = ieee80211_get_stats64,
        .ndo_fill_forward_path        = ieee80211_netdev_fill_forward_path,
        .ndo_setup_tc                = ieee80211_netdev_setup_tc,
};

static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype)
{
        switch (iftype) {
        /* P2P GO and client are mapped to AP/STATION types */
        case NL80211_IFTYPE_AP:
        case NL80211_IFTYPE_STATION:
                return true;
        default:
                return false;
        }
}

static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        u32 flags;

        flags = sdata->vif.offload_flags;

        if (ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) &&
            ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
                flags |= IEEE80211_OFFLOAD_ENCAP_ENABLED;

                if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG) &&
                    local->hw.wiphy->frag_threshold != (u32)-1)
                        flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;

                if (local->monitors)
                        flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
        } else {
                flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
        }

        if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
            ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
                flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;

                if (local->monitors &&
                    !ieee80211_hw_check(&local->hw, SUPPORTS_CONC_MON_RX_DECAP))
                        flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
        } else {
                flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
        }

        if (sdata->vif.offload_flags == flags)
                return false;

        sdata->vif.offload_flags = flags;
        ieee80211_check_fast_rx_iface(sdata);
        return true;
}

static void ieee80211_set_vif_encap_ops(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *bss = sdata;
        bool enabled;

        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
                if (!sdata->bss)
                        return;

                bss = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
        }

        if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) ||
            !ieee80211_iftype_supports_hdr_offload(bss->vif.type))
                return;

        enabled = bss->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED;
        if (sdata->wdev.use_4addr &&
            !(bss->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_4ADDR))
                enabled = false;

        sdata->dev->netdev_ops = enabled ? &ieee80211_dataif_8023_ops :
                                           &ieee80211_dataif_ops;
}

static void ieee80211_recalc_sdata_offload(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *vsdata;

        if (ieee80211_set_sdata_offload_flags(sdata)) {
                drv_update_vif_offload(local, sdata);
                ieee80211_set_vif_encap_ops(sdata);
        }

        list_for_each_entry(vsdata, &local->interfaces, list) {
                if (vsdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
                    vsdata->bss != &sdata->u.ap)
                        continue;

                ieee80211_set_vif_encap_ops(vsdata);
        }
}

void ieee80211_recalc_offload(struct ieee80211_local *local)
{
        struct ieee80211_sub_if_data *sdata;

        if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD))
                return;

        lockdep_assert_wiphy(local->hw.wiphy);

        list_for_each_entry(sdata, &local->interfaces, list) {
                if (!ieee80211_sdata_running(sdata))
                        continue;

                ieee80211_recalc_sdata_offload(sdata);
        }
}

void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
                                    const int offset)
{
        struct ieee80211_local *local = sdata->local;
        u32 flags = sdata->u.mntr.flags;

#define ADJUST(_f, _s)        do {                                        \
        if (flags & MONITOR_FLAG_##_f)                                \
                local->fif_##_s += offset;                        \
        } while (0)

        ADJUST(FCSFAIL, fcsfail);
        ADJUST(PLCPFAIL, plcpfail);
        ADJUST(CONTROL, control);
        ADJUST(CONTROL, pspoll);
        ADJUST(OTHER_BSS, other_bss);

#undef ADJUST
}

static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
{
        struct ieee80211_local *local = sdata->local;
        int i;

        for (i = 0; i < IEEE80211_NUM_ACS; i++) {
                if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL))
                        sdata->vif.hw_queue[i] = IEEE80211_INVAL_HW_QUEUE;
                else if (local->hw.queues >= IEEE80211_NUM_ACS)
                        sdata->vif.hw_queue[i] = i;
                else
                        sdata->vif.hw_queue[i] = 0;
        }
        sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
}

static void ieee80211_sdata_init(struct ieee80211_local *local,
                                 struct ieee80211_sub_if_data *sdata)
{
        sdata->local = local;

        /*
         * Initialize the default link, so we can use link_id 0 for non-MLD,
         * and that continues to work for non-MLD-aware drivers that use just
         * vif.bss_conf instead of vif.link_conf.
         *
         * Note that we never change this, so if link ID 0 isn't used in an
         * MLD connection, we get a separate allocation for it.
         */
        ieee80211_link_init(sdata, -1, &sdata->deflink, &sdata->vif.bss_conf);
}

int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
{
        struct ieee80211_sub_if_data *sdata;
        int ret;

        if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
                return 0;

        ASSERT_RTNL();
        lockdep_assert_wiphy(local->hw.wiphy);

        if (local->monitor_sdata)
                return 0;

        sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL);
        if (!sdata)
                return -ENOMEM;

        /* set up data */
        sdata->vif.type = NL80211_IFTYPE_MONITOR;
        snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
                 wiphy_name(local->hw.wiphy));
        sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
        sdata->wdev.wiphy = local->hw.wiphy;

        ieee80211_sdata_init(local, sdata);

        ieee80211_set_default_queues(sdata);

        ret = drv_add_interface(local, sdata);
        if (WARN_ON(ret)) {
                /* ok .. stupid driver, it asked for this! */
                kfree(sdata);
                return ret;
        }

        set_bit(SDATA_STATE_RUNNING, &sdata->state);

        ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR);
        if (ret) {
                kfree(sdata);
                return ret;
        }

        mutex_lock(&local->iflist_mtx);
        rcu_assign_pointer(local->monitor_sdata, sdata);
        mutex_unlock(&local->iflist_mtx);

        ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chanreq,
                                         IEEE80211_CHANCTX_EXCLUSIVE);
        if (ret) {
                mutex_lock(&local->iflist_mtx);
                RCU_INIT_POINTER(local->monitor_sdata, NULL);
                mutex_unlock(&local->iflist_mtx);
                synchronize_net();
                drv_remove_interface(local, sdata);
                kfree(sdata);
                return ret;
        }

        skb_queue_head_init(&sdata->skb_queue);
        skb_queue_head_init(&sdata->status_queue);
        wiphy_work_init(&sdata->work, ieee80211_iface_work);

        return 0;
}

void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
        struct ieee80211_sub_if_data *sdata;

        if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
                return;

        ASSERT_RTNL();
        lockdep_assert_wiphy(local->hw.wiphy);

        mutex_lock(&local->iflist_mtx);

        sdata = rcu_dereference_protected(local->monitor_sdata,
                                          lockdep_is_held(&local->iflist_mtx));
        if (!sdata) {
                mutex_unlock(&local->iflist_mtx);
                return;
        }

        RCU_INIT_POINTER(local->monitor_sdata, NULL);
        mutex_unlock(&local->iflist_mtx);

        synchronize_net();

        ieee80211_link_release_channel(&sdata->deflink);

        drv_remove_interface(local, sdata);

        kfree(sdata);
}

/*
 * NOTE: Be very careful when changing this function, it must NOT return
 * an error on interface type changes that have been pre-checked, so most
 * checks should be in ieee80211_check_concurrent_iface.
 */
int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
{
        struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
        struct net_device *dev = wdev->netdev;
        struct ieee80211_local *local = sdata->local;
        u64 changed = 0;
        int res;
        u32 hw_reconf_flags = 0;

        lockdep_assert_wiphy(local->hw.wiphy);

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP_VLAN: {
                struct ieee80211_sub_if_data *master;

                if (!sdata->bss)
                        return -ENOLINK;

                list_add(&sdata->u.vlan.list, &sdata->bss->vlans);

                master = container_of(sdata->bss,
                                      struct ieee80211_sub_if_data, u.ap);
                sdata->control_port_protocol =
                        master->control_port_protocol;
                sdata->control_port_no_encrypt =
                        master->control_port_no_encrypt;
                sdata->control_port_over_nl80211 =
                        master->control_port_over_nl80211;
                sdata->control_port_no_preauth =
                        master->control_port_no_preauth;
                sdata->vif.cab_queue = master->vif.cab_queue;
                memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
                       sizeof(sdata->vif.hw_queue));
                sdata->vif.bss_conf.chanreq = master->vif.bss_conf.chanreq;

                sdata->crypto_tx_tailroom_needed_cnt +=
                        master->crypto_tx_tailroom_needed_cnt;

                break;
                }
        case NL80211_IFTYPE_AP:
                sdata->bss = &sdata->u.ap;
                break;
        case NL80211_IFTYPE_MESH_POINT:
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_MONITOR:
        case NL80211_IFTYPE_ADHOC:
        case NL80211_IFTYPE_P2P_DEVICE:
        case NL80211_IFTYPE_OCB:
        case NL80211_IFTYPE_NAN:
                /* no special treatment */
                break;
        case NL80211_IFTYPE_UNSPECIFIED:
        case NUM_NL80211_IFTYPES:
        case NL80211_IFTYPE_P2P_CLIENT:
        case NL80211_IFTYPE_P2P_GO:
        case NL80211_IFTYPE_WDS:
                /* cannot happen */
                WARN_ON(1);
                break;
        }

        if (local->open_count == 0) {
                /* here we can consider everything in good order (again) */
                local->reconfig_failure = false;

                res = drv_start(local);
                if (res)
                        goto err_del_bss;
                ieee80211_led_radio(local, true);
                ieee80211_mod_tpt_led_trig(local,
                                           IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
        }

        /*
         * Copy the hopefully now-present MAC address to
         * this interface, if it has the special null one.
         */
        if (dev && is_zero_ether_addr(dev->dev_addr)) {
                eth_hw_addr_set(dev, local->hw.wiphy->perm_addr);
                memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);

                if (!is_valid_ether_addr(dev->dev_addr)) {
                        res = -EADDRNOTAVAIL;
                        goto err_stop;
                }
        }

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP_VLAN:
                /* no need to tell driver, but set carrier and chanctx */
                if (sdata->bss->active) {
                        ieee80211_link_vlan_copy_chanctx(&sdata->deflink);
                        netif_carrier_on(dev);
                        ieee80211_set_vif_encap_ops(sdata);
                } else {
                        netif_carrier_off(dev);
                }
                break;
        case NL80211_IFTYPE_MONITOR:
                if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) {
                        local->cooked_mntrs++;
                        break;
                }

                if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
                        res = drv_add_interface(local, sdata);
                        if (res)
                                goto err_stop;
                } else if (local->monitors == 0 && local->open_count == 0) {
                        res = ieee80211_add_virtual_monitor(local);
                        if (res)
                                goto err_stop;
                }

                /* must be before the call to ieee80211_configure_filter */
                local->monitors++;
                if (local->monitors == 1) {
                        local->hw.conf.flags |= IEEE80211_CONF_MONITOR;
                        hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
                }

                ieee80211_adjust_monitor_flags(sdata, 1);
                ieee80211_configure_filter(local);
                ieee80211_recalc_offload(local);
                ieee80211_recalc_idle(local);

                netif_carrier_on(dev);
                break;
        default:
                if (coming_up) {
                        ieee80211_del_virtual_monitor(local);
                        ieee80211_set_sdata_offload_flags(sdata);

                        res = drv_add_interface(local, sdata);
                        if (res)
                                goto err_stop;

                        ieee80211_set_vif_encap_ops(sdata);
                        res = ieee80211_check_queues(sdata,
                                ieee80211_vif_type_p2p(&sdata->vif));
                        if (res)
                                goto err_del_interface;
                }

                if (sdata->vif.type == NL80211_IFTYPE_AP) {
                        local->fif_pspoll++;
                        local->fif_probe_req++;

                        ieee80211_configure_filter(local);
                } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
                        local->fif_probe_req++;
                }

                if (sdata->vif.probe_req_reg)
                        drv_config_iface_filter(local, sdata,
                                                FIF_PROBE_REQ,
                                                FIF_PROBE_REQ);

                if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
                    sdata->vif.type != NL80211_IFTYPE_NAN)
                        changed |= ieee80211_reset_erp_info(sdata);
                ieee80211_link_info_change_notify(sdata, &sdata->deflink,
                                                  changed);

                switch (sdata->vif.type) {
                case NL80211_IFTYPE_STATION:
                case NL80211_IFTYPE_ADHOC:
                case NL80211_IFTYPE_AP:
                case NL80211_IFTYPE_MESH_POINT:
                case NL80211_IFTYPE_OCB:
                        netif_carrier_off(dev);
                        break;
                case NL80211_IFTYPE_P2P_DEVICE:
                case NL80211_IFTYPE_NAN:
                        break;
                default:
                        /* not reached */
                        WARN_ON(1);
                }

                /*
                 * Set default queue parameters so drivers don't
                 * need to initialise the hardware if the hardware
                 * doesn't start up with sane defaults.
                 * Enable QoS for anything but station interfaces.
                 */
                ieee80211_set_wmm_default(&sdata->deflink, true,
                        sdata->vif.type != NL80211_IFTYPE_STATION);
        }

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_P2P_DEVICE:
                rcu_assign_pointer(local->p2p_sdata, sdata);
                break;
        case NL80211_IFTYPE_MONITOR:
                if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
                        break;
                list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list);
                break;
        default:
                break;
        }

        /*
         * set_multicast_list will be invoked by the networking core
         * which will check whether any increments here were done in
         * error and sync them down to the hardware as filter flags.
         */
        if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
                atomic_inc(&local->iff_allmultis);

        if (coming_up)
                local->open_count++;

        if (local->open_count == 1)
                ieee80211_hw_conf_init(local);
        else if (hw_reconf_flags)
                ieee80211_hw_config(local, hw_reconf_flags);

        ieee80211_recalc_ps(local);

        set_bit(SDATA_STATE_RUNNING, &sdata->state);

        return 0;
 err_del_interface:
        drv_remove_interface(local, sdata);
 err_stop:
        if (!local->open_count)
                drv_stop(local);
 err_del_bss:
        sdata->bss = NULL;
        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                list_del(&sdata->u.vlan.list);
        /* might already be clear but that doesn't matter */
        clear_bit(SDATA_STATE_RUNNING, &sdata->state);
        return res;
}

static void ieee80211_if_free(struct net_device *dev)
{
        free_percpu(dev->tstats);
}

static void ieee80211_if_setup(struct net_device *dev)
{
        ether_setup(dev);
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->netdev_ops = &ieee80211_dataif_ops;
        dev->needs_free_netdev = true;
        dev->priv_destructor = ieee80211_if_free;
}

static void ieee80211_iface_process_skb(struct ieee80211_local *local,
                                        struct ieee80211_sub_if_data *sdata,
                                        struct sk_buff *skb)
{
        struct ieee80211_mgmt *mgmt = (void *)skb->data;

        lockdep_assert_wiphy(local->hw.wiphy);

        if (ieee80211_is_action(mgmt->frame_control) &&
            mgmt->u.action.category == WLAN_CATEGORY_BACK) {
                struct sta_info *sta;
                int len = skb->len;

                sta = sta_info_get_bss(sdata, mgmt->sa);
                if (sta) {
                        switch (mgmt->u.action.u.addba_req.action_code) {
                        case WLAN_ACTION_ADDBA_REQ:
                                ieee80211_process_addba_request(local, sta,
                                                                mgmt, len);
                                break;
                        case WLAN_ACTION_ADDBA_RESP:
                                ieee80211_process_addba_resp(local, sta,
                                                             mgmt, len);
                                break;
                        case WLAN_ACTION_DELBA:
                                ieee80211_process_delba(sdata, sta,
                                                        mgmt, len);
                                break;
                        default:
                                WARN_ON(1);
                                break;
                        }
                }
        } else if (ieee80211_is_action(mgmt->frame_control) &&
                   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
                switch (mgmt->u.action.u.vht_group_notif.action_code) {
                case WLAN_VHT_ACTION_OPMODE_NOTIF: {
                        struct ieee80211_rx_status *status;
                        enum nl80211_band band;
                        struct sta_info *sta;
                        u8 opmode;

                        status = IEEE80211_SKB_RXCB(skb);
                        band = status->band;
                        opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;

                        sta = sta_info_get_bss(sdata, mgmt->sa);

                        if (sta)
                                ieee80211_vht_handle_opmode(sdata,
                                                            &sta->deflink,
                                                            opmode, band);

                        break;
                }
                case WLAN_VHT_ACTION_GROUPID_MGMT:
                        ieee80211_process_mu_groups(sdata, &sdata->deflink,
                                                    mgmt);
                        break;
                default:
                        WARN_ON(1);
                        break;
                }
        } else if (ieee80211_is_action(mgmt->frame_control) &&
                   mgmt->u.action.category == WLAN_CATEGORY_S1G) {
                switch (mgmt->u.action.u.s1g.action_code) {
                case WLAN_S1G_TWT_TEARDOWN:
                case WLAN_S1G_TWT_SETUP:
                        ieee80211_s1g_rx_twt_action(sdata, skb);
                        break;
                default:
                        break;
                }
        } else if (ieee80211_is_action(mgmt->frame_control) &&
                   mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) {
                if (sdata->vif.type == NL80211_IFTYPE_STATION) {
                        switch (mgmt->u.action.u.ttlm_req.action_code) {
                        case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
                                ieee80211_process_neg_ttlm_req(sdata, mgmt,
                                                               skb->len);
                                break;
                        case WLAN_PROTECTED_EHT_ACTION_TTLM_RES:
                                ieee80211_process_neg_ttlm_res(sdata, mgmt,
                                                               skb->len);
                                break;
                        default:
                                break;
                        }
                }
        } else if (ieee80211_is_ext(mgmt->frame_control)) {
                if (sdata->vif.type == NL80211_IFTYPE_STATION)
                        ieee80211_sta_rx_queued_ext(sdata, skb);
                else
                        WARN_ON(1);
        } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
                struct ieee80211_hdr *hdr = (void *)mgmt;
                struct sta_info *sta;

                /*
                 * So the frame isn't mgmt, but frame_control
                 * is at the right place anyway, of course, so
                 * the if statement is correct.
                 *
                 * Warn if we have other data frame types here,
                 * they must not get here.
                 */
                WARN_ON(hdr->frame_control &
                                cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
                WARN_ON(!(hdr->seq_ctrl &
                                cpu_to_le16(IEEE80211_SCTL_FRAG)));
                /*
                 * This was a fragment of a frame, received while
                 * a block-ack session was active. That cannot be
                 * right, so terminate the session.
                 */
                sta = sta_info_get_bss(sdata, mgmt->sa);
                if (sta) {
                        u16 tid = ieee80211_get_tid(hdr);

                        __ieee80211_stop_rx_ba_session(
                                sta, tid, WLAN_BACK_RECIPIENT,
                                WLAN_REASON_QSTA_REQUIRE_SETUP,
                                true);
                }
        } else switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
                ieee80211_sta_rx_queued_mgmt(sdata, skb);
                break;
        case NL80211_IFTYPE_ADHOC:
                ieee80211_ibss_rx_queued_mgmt(sdata, skb);
                break;
        case NL80211_IFTYPE_MESH_POINT:
                if (!ieee80211_vif_is_mesh(&sdata->vif))
                        break;
                ieee80211_mesh_rx_queued_mgmt(sdata, skb);
                break;
        default:
                WARN(1, "frame for unexpected interface type");
                break;
        }
}

static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
                                           struct sk_buff *skb)
{
        struct ieee80211_mgmt *mgmt = (void *)skb->data;

        if (ieee80211_is_action(mgmt->frame_control) &&
            mgmt->u.action.category == WLAN_CATEGORY_S1G) {
                switch (mgmt->u.action.u.s1g.action_code) {
                case WLAN_S1G_TWT_TEARDOWN:
                case WLAN_S1G_TWT_SETUP:
                        ieee80211_s1g_status_twt_action(sdata, skb);
                        break;
                default:
                        break;
                }
        }
}

static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work)
{
        struct ieee80211_sub_if_data *sdata =
                container_of(work, struct ieee80211_sub_if_data, work);
        struct ieee80211_local *local = sdata->local;
        struct sk_buff *skb;

        if (!ieee80211_sdata_running(sdata))
                return;

        if (test_bit(SCAN_SW_SCANNING, &local->scanning))
                return;

        if (!ieee80211_can_run_worker(local))
                return;

        /* first process frames */
        while ((skb = skb_dequeue(&sdata->skb_queue))) {
                kcov_remote_start_common(skb_get_kcov_handle(skb));

                if (skb->protocol == cpu_to_be16(ETH_P_TDLS))
                        ieee80211_process_tdls_channel_switch(sdata, skb);
                else
                        ieee80211_iface_process_skb(local, sdata, skb);

                kfree_skb(skb);
                kcov_remote_stop();
        }

        /* process status queue */
        while ((skb = skb_dequeue(&sdata->status_queue))) {
                kcov_remote_start_common(skb_get_kcov_handle(skb));

                ieee80211_iface_process_status(sdata, skb);
                kfree_skb(skb);

                kcov_remote_stop();
        }

        /* then other type-dependent work */
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
                ieee80211_sta_work(sdata);
                break;
        case NL80211_IFTYPE_ADHOC:
                ieee80211_ibss_work(sdata);
                break;
        case NL80211_IFTYPE_MESH_POINT:
                if (!ieee80211_vif_is_mesh(&sdata->vif))
                        break;
                ieee80211_mesh_work(sdata);
                break;
        case NL80211_IFTYPE_OCB:
                ieee80211_ocb_work(sdata);
                break;
        default:
                break;
        }
}

static void ieee80211_activate_links_work(struct wiphy *wiphy,
                                          struct wiphy_work *work)
{
        struct ieee80211_sub_if_data *sdata =
                container_of(work, struct ieee80211_sub_if_data,
                             activate_links_work);

        ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links);
}

/*
 * Helper function to initialise an interface to a specific type.
 */
static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
                                  enum nl80211_iftype type)
{
        static const u8 bssid_wildcard[ETH_ALEN] = {0xff, 0xff, 0xff,
                                                    0xff, 0xff, 0xff};

        /* clear type-dependent unions */
        memset(&sdata->u, 0, sizeof(sdata->u));
        memset(&sdata->deflink.u, 0, sizeof(sdata->deflink.u));

        /* and set some type-dependent values */
        sdata->vif.type = type;
        sdata->vif.p2p = false;
        sdata->wdev.iftype = type;

        sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
        sdata->control_port_no_encrypt = false;
        sdata->control_port_over_nl80211 = false;
        sdata->control_port_no_preauth = false;
        sdata->vif.cfg.idle = true;
        sdata->vif.bss_conf.txpower = INT_MIN; /* unset */

        sdata->noack_map = 0;

        /* only monitor/p2p-device differ */
        if (sdata->dev) {
                sdata->dev->netdev_ops = &ieee80211_dataif_ops;
                sdata->dev->type = ARPHRD_ETHER;
        }

        skb_queue_head_init(&sdata->skb_queue);
        skb_queue_head_init(&sdata->status_queue);
        wiphy_work_init(&sdata->work, ieee80211_iface_work);
        wiphy_work_init(&sdata->activate_links_work,
                        ieee80211_activate_links_work);

        switch (type) {
        case NL80211_IFTYPE_P2P_GO:
                type = NL80211_IFTYPE_AP;
                sdata->vif.type = type;
                sdata->vif.p2p = true;
                fallthrough;
        case NL80211_IFTYPE_AP:
                skb_queue_head_init(&sdata->u.ap.ps.bc_buf);
                INIT_LIST_HEAD(&sdata->u.ap.vlans);
                sdata->vif.bss_conf.bssid = sdata->vif.addr;
                break;
        case NL80211_IFTYPE_P2P_CLIENT:
                type = NL80211_IFTYPE_STATION;
                sdata->vif.type = type;
                sdata->vif.p2p = true;
                fallthrough;
        case NL80211_IFTYPE_STATION:
                sdata->vif.bss_conf.bssid = sdata->deflink.u.mgd.bssid;
                ieee80211_sta_setup_sdata(sdata);
                break;
        case NL80211_IFTYPE_OCB:
                sdata->vif.bss_conf.bssid = bssid_wildcard;
                ieee80211_ocb_setup_sdata(sdata);
                break;
        case NL80211_IFTYPE_ADHOC:
                sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
                ieee80211_ibss_setup_sdata(sdata);
                break;
        case NL80211_IFTYPE_MESH_POINT:
                if (ieee80211_vif_is_mesh(&sdata->vif))
                        ieee80211_mesh_init_sdata(sdata);
                break;
        case NL80211_IFTYPE_MONITOR:
                sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
                sdata->dev->netdev_ops = &ieee80211_monitorif_ops;
                sdata->u.mntr.flags = MONITOR_FLAG_CONTROL |
                                      MONITOR_FLAG_OTHER_BSS;
                break;
        case NL80211_IFTYPE_NAN:
                idr_init(&sdata->u.nan.function_inst_ids);
                spin_lock_init(&sdata->u.nan.func_lock);
                sdata->vif.bss_conf.bssid = sdata->vif.addr;
                break;
        case NL80211_IFTYPE_AP_VLAN:
        case NL80211_IFTYPE_P2P_DEVICE:
                sdata->vif.bss_conf.bssid = sdata->vif.addr;
                break;
        case NL80211_IFTYPE_UNSPECIFIED:
        case NL80211_IFTYPE_WDS:
        case NUM_NL80211_IFTYPES:
                WARN_ON(1);
                break;
        }

        /* need to do this after the switch so vif.type is correct */
        ieee80211_link_setup(&sdata->deflink);

        ieee80211_debugfs_recreate_netdev(sdata, false);
}

static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
                                           enum nl80211_iftype type)
{
        struct ieee80211_local *local = sdata->local;
        int ret, err;
        enum nl80211_iftype internal_type = type;
        bool p2p = false;

        ASSERT_RTNL();

        if (!local->ops->change_interface)
                return -EBUSY;

        /* for now, don't support changing while links exist */
        if (ieee80211_vif_is_mld(&sdata->vif))
                return -EBUSY;

        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP:
                if (!list_empty(&sdata->u.ap.vlans))
                        return -EBUSY;
                break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_ADHOC:
        case NL80211_IFTYPE_OCB:
                /*
                 * Could maybe also all others here?
                 * Just not sure how that interacts
                 * with the RX/config path e.g. for
                 * mesh.
                 */
                break;
        default:
                return -EBUSY;
        }

        switch (type) {
        case NL80211_IFTYPE_AP:
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_ADHOC:
        case NL80211_IFTYPE_OCB:
                /*
                 * Could probably support everything
                 * but here.
                 */
                break;
        case NL80211_IFTYPE_P2P_CLIENT:
                p2p = true;
                internal_type = NL80211_IFTYPE_STATION;
                break;
        case NL80211_IFTYPE_P2P_GO:
                p2p = true;
                internal_type = NL80211_IFTYPE_AP;
                break;
        default:
                return -EBUSY;
        }

        ret = ieee80211_check_concurrent_iface(sdata, internal_type);
        if (ret)
                return ret;

        ieee80211_stop_vif_queues(local, sdata,
                                  IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
        /* do_stop will synchronize_rcu() first thing */
        ieee80211_do_stop(sdata, false);

        ieee80211_teardown_sdata(sdata);

        ieee80211_set_sdata_offload_flags(sdata);
        ret = drv_change_interface(local, sdata, internal_type, p2p);
        if (ret)
                type = ieee80211_vif_type_p2p(&sdata->vif);

        /*
         * Ignore return value here, there's not much we can do since
         * the driver changed the interface type internally already.
         * The warnings will hopefully make driver authors fix it :-)
         */
        ieee80211_check_queues(sdata, type);

        ieee80211_setup_sdata(sdata, type);
        ieee80211_set_vif_encap_ops(sdata);

        err = ieee80211_do_open(&sdata->wdev, false);
        WARN(err, "type change: do_open returned %d", err);

        ieee80211_wake_vif_queues(local, sdata,
                                  IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
        return ret;
}

int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
                             enum nl80211_iftype type)
{
        int ret;

        ASSERT_RTNL();

        if (type == ieee80211_vif_type_p2p(&sdata->vif))
                return 0;

        if (ieee80211_sdata_running(sdata)) {
                ret = ieee80211_runtime_change_iftype(sdata, type);
                if (ret)
                        return ret;
        } else {
                /* Purge and reset type-dependent state. */
                ieee80211_teardown_sdata(sdata);
                ieee80211_setup_sdata(sdata, type);
        }

        /* reset some values that shouldn't be kept across type changes */
        if (type == NL80211_IFTYPE_STATION)
                sdata->u.mgd.use_4addr = false;

        return 0;
}

static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
                                       u8 *perm_addr, enum nl80211_iftype type)
{
        struct ieee80211_sub_if_data *sdata;
        u64 mask, start, addr, val, inc;
        u8 *m;
        u8 tmp_addr[ETH_ALEN];
        int i;

        lockdep_assert_wiphy(local->hw.wiphy);

        /* default ... something at least */
        memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);

        if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
            local->hw.wiphy->n_addresses <= 1)
                return;

        switch (type) {
        case NL80211_IFTYPE_MONITOR:
                /* doesn't matter */
                break;
        case NL80211_IFTYPE_AP_VLAN:
                /* match up with an AP interface */
                list_for_each_entry(sdata, &local->interfaces, list) {
                        if (sdata->vif.type != NL80211_IFTYPE_AP)
                                continue;
                        memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
                        break;
                }
                /* keep default if no AP interface present */
                break;
        case NL80211_IFTYPE_P2P_CLIENT:
        case NL80211_IFTYPE_P2P_GO:
                if (ieee80211_hw_check(&local->hw, P2P_DEV_ADDR_FOR_INTF)) {
                        list_for_each_entry(sdata, &local->interfaces, list) {
                                if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
                                        continue;
                                if (!ieee80211_sdata_running(sdata))
                                        continue;
                                memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
                                return;
                        }
                }
                fallthrough;
        default:
                /* assign a new address if possible -- try n_addresses first */
                for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
                        bool used = false;

                        list_for_each_entry(sdata, &local->interfaces, list) {
                                if (ether_addr_equal(local->hw.wiphy->addresses[i].addr,
                                                     sdata->vif.addr)) {
                                        used = true;
                                        break;
                                }
                        }

                        if (!used) {
                                memcpy(perm_addr,
                                       local->hw.wiphy->addresses[i].addr,
                                       ETH_ALEN);
                                break;
                        }
                }

                /* try mask if available */
                if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
                        break;

                m = local->hw.wiphy->addr_mask;
                mask =        ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
                        ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
                        ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);

                if (__ffs64(mask) + hweight64(mask) != fls64(mask)) {
                        /* not a contiguous mask ... not handled now! */
                        pr_info("not contiguous\n");
                        break;
                }

                /*
                 * Pick address of existing interface in case user changed
                 * MAC address manually, default to perm_addr.
                 */
                m = local->hw.wiphy->perm_addr;
                list_for_each_entry(sdata, &local->interfaces, list) {
                        if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
                                continue;
                        m = sdata->vif.addr;
                        break;
                }
                start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
                        ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
                        ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);

                inc = 1ULL<<__ffs64(mask);
                val = (start & mask);
                addr = (start & ~mask) | (val & mask);
                do {
                        bool used = false;

                        tmp_addr[5] = addr >> 0*8;
                        tmp_addr[4] = addr >> 1*8;
                        tmp_addr[3] = addr >> 2*8;
                        tmp_addr[2] = addr >> 3*8;
                        tmp_addr[1] = addr >> 4*8;
                        tmp_addr[0] = addr >> 5*8;

                        val += inc;

                        list_for_each_entry(sdata, &local->interfaces, list) {
                                if (ether_addr_equal(tmp_addr, sdata->vif.addr)) {
                                        used = true;
                                        break;
                                }
                        }

                        if (!used) {
                                memcpy(perm_addr, tmp_addr, ETH_ALEN);
                                break;
                        }
                        addr = (start & ~mask) | (val & mask);
                } while (addr != start);

                break;
        }
}

int ieee80211_if_add(struct ieee80211_local *local, const char *name,
                     unsigned char name_assign_type,
                     struct wireless_dev **new_wdev, enum nl80211_iftype type,
                     struct vif_params *params)
{
        struct net_device *ndev = NULL;
        struct ieee80211_sub_if_data *sdata = NULL;
        struct txq_info *txqi;
        int ret, i;

        ASSERT_RTNL();
        lockdep_assert_wiphy(local->hw.wiphy);

        if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) {
                struct wireless_dev *wdev;

                sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size,
                                GFP_KERNEL);
                if (!sdata)
                        return -ENOMEM;
                wdev = &sdata->wdev;

                sdata->dev = NULL;
                strscpy(sdata->name, name, IFNAMSIZ);
                ieee80211_assign_perm_addr(local, wdev->address, type);
                memcpy(sdata->vif.addr, wdev->address, ETH_ALEN);
                ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
        } else {
                int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size,
                                 sizeof(void *));
                int txq_size = 0;

                if (type != NL80211_IFTYPE_AP_VLAN &&
                    (type != NL80211_IFTYPE_MONITOR ||
                     (params->flags & MONITOR_FLAG_ACTIVE)))
                        txq_size += sizeof(struct txq_info) +
                                    local->hw.txq_data_size;

                ndev = alloc_netdev_mqs(size + txq_size,
                                        name, name_assign_type,
                                        ieee80211_if_setup, 1, 1);
                if (!ndev)
                        return -ENOMEM;

                dev_net_set(ndev, wiphy_net(local->hw.wiphy));

                ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
                if (!ndev->tstats) {
                        free_netdev(ndev);
                        return -ENOMEM;
                }

                ndev->needed_headroom = local->tx_headroom +
                                        4*6 /* four MAC addresses */
                                        + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
                                        + 6 /* mesh */
                                        + 8 /* rfc1042/bridge tunnel */
                                        - ETH_HLEN /* ethernet hard_header_len */
                                        + IEEE80211_ENCRYPT_HEADROOM;
                ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;

                ret = dev_alloc_name(ndev, ndev->name);
                if (ret < 0) {
                        ieee80211_if_free(ndev);
                        free_netdev(ndev);
                        return ret;
                }

                ieee80211_assign_perm_addr(local, ndev->perm_addr, type);
                if (is_valid_ether_addr(params->macaddr))
                        eth_hw_addr_set(ndev, params->macaddr);
                else
                        eth_hw_addr_set(ndev, ndev->perm_addr);
                SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));

                /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */
                sdata = netdev_priv(ndev);
                ndev->ieee80211_ptr = &sdata->wdev;
                memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
                ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
                memcpy(sdata->name, ndev->name, IFNAMSIZ);

                if (txq_size) {
                        txqi = netdev_priv(ndev) + size;
                        ieee80211_txq_init(sdata, NULL, txqi, 0);
                }

                sdata->dev = ndev;
        }

        /* initialise type-independent data */
        sdata->wdev.wiphy = local->hw.wiphy;

        ieee80211_sdata_init(local, sdata);

        ieee80211_init_frag_cache(&sdata->frags);

        INIT_LIST_HEAD(&sdata->key_list);

        wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk,
                                ieee80211_delayed_tailroom_dec);

        for (i = 0; i < NUM_NL80211_BANDS; i++) {
                struct ieee80211_supported_band *sband;
                sband = local->hw.wiphy->bands[i];
                sdata->rc_rateidx_mask[i] =
                        sband ? (1 << sband->n_bitrates) - 1 : 0;
                if (sband) {
                        __le16 cap;
                        u16 *vht_rate_mask;

                        memcpy(sdata->rc_rateidx_mcs_mask[i],
                               sband->ht_cap.mcs.rx_mask,
                               sizeof(sdata->rc_rateidx_mcs_mask[i]));

                        cap = sband->vht_cap.vht_mcs.rx_mcs_map;
                        vht_rate_mask = sdata->rc_rateidx_vht_mcs_mask[i];
                        ieee80211_get_vht_mask_from_cap(cap, vht_rate_mask);
                } else {
                        memset(sdata->rc_rateidx_mcs_mask[i], 0,
                               sizeof(sdata->rc_rateidx_mcs_mask[i]));
                        memset(sdata->rc_rateidx_vht_mcs_mask[i], 0,
                               sizeof(sdata->rc_rateidx_vht_mcs_mask[i]));
                }
        }

        ieee80211_set_default_queues(sdata);

        sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
        sdata->deflink.user_power_level = local->user_power_level;

        /* setup type-dependent data */
        ieee80211_setup_sdata(sdata, type);

        if (ndev) {
                ndev->ieee80211_ptr->use_4addr = params->use_4addr;
                if (type == NL80211_IFTYPE_STATION)
                        sdata->u.mgd.use_4addr = params->use_4addr;

                ndev->features |= local->hw.netdev_features;
                ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
                ndev->hw_features |= ndev->features &
                                        MAC80211_SUPPORTED_FEATURES_TX;
                sdata->vif.netdev_features = local->hw.netdev_features;

                netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);

                /* MTU range is normally 256 - 2304, where the upper limit is
                 * the maximum MSDU size. Monitor interfaces send and receive
                 * MPDU and A-MSDU frames which may be much larger so we do
                 * not impose an upper limit in that case.
                 */
                ndev->min_mtu = 256;
                if (type == NL80211_IFTYPE_MONITOR)
                        ndev->max_mtu = 0;
                else
                        ndev->max_mtu = local->hw.max_mtu;

                ret = cfg80211_register_netdevice(ndev);
                if (ret) {
                        free_netdev(ndev);
                        return ret;
                }
        }

        mutex_lock(&local->iflist_mtx);
        list_add_tail_rcu(&sdata->list, &local->interfaces);
        mutex_unlock(&local->iflist_mtx);

        if (new_wdev)
                *new_wdev = &sdata->wdev;

        return 0;
}

void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
{
        ASSERT_RTNL();
        lockdep_assert_wiphy(sdata->local->hw.wiphy);

        mutex_lock(&sdata->local->iflist_mtx);
        list_del_rcu(&sdata->list);
        mutex_unlock(&sdata->local->iflist_mtx);

        if (sdata->vif.txq)
                ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));

        synchronize_rcu();

        cfg80211_unregister_wdev(&sdata->wdev);

        if (!sdata->dev) {
                ieee80211_teardown_sdata(sdata);
                kfree(sdata);
        }
}

void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
{
        if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
                return;
        ieee80211_do_stop(sdata, true);
}

void ieee80211_remove_interfaces(struct ieee80211_local *local)
{
        struct ieee80211_sub_if_data *sdata, *tmp;
        LIST_HEAD(unreg_list);

        ASSERT_RTNL();

        /* Before destroying the interfaces, make sure they're all stopped so
         * that the hardware is stopped. Otherwise, the driver might still be
         * iterating the interfaces during the shutdown, e.g. from a worker
         * or from RX processing or similar, and if it does so (using atomic
         * iteration) while we're manipulating the list, the iteration will
         * crash.
         *
         * After this, the hardware should be stopped and the driver should
         * have stopped all of its activities, so that we can do RCU-unaware
         * manipulations of the interface list below.
         */
        cfg80211_shutdown_all_interfaces(local->hw.wiphy);

        wiphy_lock(local->hw.wiphy);

        WARN(local->open_count, "%s: open count remains %d\n",
             wiphy_name(local->hw.wiphy), local->open_count);

        mutex_lock(&local->iflist_mtx);
        list_splice_init(&local->interfaces, &unreg_list);
        mutex_unlock(&local->iflist_mtx);

        list_for_each_entry_safe(sdata, tmp, &unreg_list, list) {
                bool netdev = sdata->dev;

                /*
                 * Remove IP addresses explicitly, since the notifier will
                 * skip the callbacks if wdev->registered is false, since
                 * we can't acquire the wiphy_lock() again there if already
                 * inside this locked section.
                 */
                sdata->vif.cfg.arp_addr_cnt = 0;
                if (sdata->vif.type == NL80211_IFTYPE_STATION &&
                    sdata->u.mgd.associated)
                        ieee80211_vif_cfg_change_notify(sdata,
                                                        BSS_CHANGED_ARP_FILTER);

                list_del(&sdata->list);
                cfg80211_unregister_wdev(&sdata->wdev);

                if (!netdev)
                        kfree(sdata);
        }
        wiphy_unlock(local->hw.wiphy);
}

static int netdev_notify(struct notifier_block *nb,
                         unsigned long state, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct ieee80211_sub_if_data *sdata;

        if (state != NETDEV_CHANGENAME)
                return NOTIFY_DONE;

        if (!dev->ieee80211_ptr || !dev->ieee80211_ptr->wiphy)
                return NOTIFY_DONE;

        if (dev->ieee80211_ptr->wiphy->privid != mac80211_wiphy_privid)
                return NOTIFY_DONE;

        sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        memcpy(sdata->name, dev->name, IFNAMSIZ);
        ieee80211_debugfs_rename_netdev(sdata);

        return NOTIFY_OK;
}

static struct notifier_block mac80211_netdev_notifier = {
        .notifier_call = netdev_notify,
};

int ieee80211_iface_init(void)
{
        return register_netdevice_notifier(&mac80211_netdev_notifier);
}

void ieee80211_iface_exit(void)
{
        unregister_netdevice_notifier(&mac80211_netdev_notifier);
}

void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata)
{
        if (sdata->vif.type == NL80211_IFTYPE_AP)
                atomic_inc(&sdata->u.ap.num_mcast_sta);
        else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                atomic_inc(&sdata->u.vlan.num_mcast_sta);
}

void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata)
{
        if (sdata->vif.type == NL80211_IFTYPE_AP)
                atomic_dec(&sdata->u.ap.num_mcast_sta);
        else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                atomic_dec(&sdata->u.vlan.num_mcast_sta);
}


































































































































































































































































    2 

    2 


    2 




























































































































































    2 


    2 

    2 

    2 



    2 
    2 
    2 


    2 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *   Generic MIDI synth driver for ALSA sequencer
 *   Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl>
 *                         Jaroslav Kysela <perex@perex.cz>
 */
 
/* 
Possible options for midisynth module:
        - automatic opening of midi ports on first received event or subscription
          (close will be performed when client leaves)
*/


#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <sound/core.h>
#include <sound/rawmidi.h>
#include <sound/seq_kernel.h>
#include <sound/seq_device.h>
#include <sound/seq_midi_event.h>
#include <sound/initval.h>

MODULE_AUTHOR("Frank van de Pol <fvdpol@coil.demon.nl>, Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("Advanced Linux Sound Architecture sequencer MIDI synth.");
MODULE_LICENSE("GPL");
static int output_buffer_size = PAGE_SIZE;
module_param(output_buffer_size, int, 0644);
MODULE_PARM_DESC(output_buffer_size, "Output buffer size in bytes.");
static int input_buffer_size = PAGE_SIZE;
module_param(input_buffer_size, int, 0644);
MODULE_PARM_DESC(input_buffer_size, "Input buffer size in bytes.");

/* data for this midi synth driver */
struct seq_midisynth {
        struct snd_card *card;
        struct snd_rawmidi *rmidi;
        int device;
        int subdevice;
        struct snd_rawmidi_file input_rfile;
        struct snd_rawmidi_file output_rfile;
        int seq_client;
        int seq_port;
        struct snd_midi_event *parser;
};

struct seq_midisynth_client {
        int seq_client;
        int num_ports;
        int ports_per_device[SNDRV_RAWMIDI_DEVICES];
         struct seq_midisynth *ports[SNDRV_RAWMIDI_DEVICES];
};

static struct seq_midisynth_client *synths[SNDRV_CARDS];
static DEFINE_MUTEX(register_mutex);

/* handle rawmidi input event (MIDI v1.0 stream) */
static void snd_midi_input_event(struct snd_rawmidi_substream *substream)
{
        struct snd_rawmidi_runtime *runtime;
        struct seq_midisynth *msynth;
        struct snd_seq_event ev;
        char buf[16], *pbuf;
        long res;

        if (substream == NULL)
                return;
        runtime = substream->runtime;
        msynth = runtime->private_data;
        if (msynth == NULL)
                return;
        memset(&ev, 0, sizeof(ev));
        while (runtime->avail > 0) {
                res = snd_rawmidi_kernel_read(substream, buf, sizeof(buf));
                if (res <= 0)
                        continue;
                if (msynth->parser == NULL)
                        continue;
                pbuf = buf;
                while (res-- > 0) {
                        if (!snd_midi_event_encode_byte(msynth->parser,
                                                        *pbuf++, &ev))
                                continue;
                        ev.source.port = msynth->seq_port;
                        ev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;
                        snd_seq_kernel_client_dispatch(msynth->seq_client, &ev, 1, 0);
                        /* clear event and reset header */
                        memset(&ev, 0, sizeof(ev));
                }
        }
}

static int dump_midi(struct snd_rawmidi_substream *substream, const char *buf, int count)
{
        struct snd_rawmidi_runtime *runtime;
        int tmp;

        if (snd_BUG_ON(!substream || !buf))
                return -EINVAL;
        runtime = substream->runtime;
        tmp = runtime->avail;
        if (tmp < count) {
                if (printk_ratelimit())
                        pr_err("ALSA: seq_midi: MIDI output buffer overrun\n");
                return -ENOMEM;
        }
        if (snd_rawmidi_kernel_write(substream, buf, count) < count)
                return -EINVAL;
        return 0;
}

/* callback for snd_seq_dump_var_event(), bridging to dump_midi() */
static int __dump_midi(void *ptr, void *buf, int count)
{
        return dump_midi(ptr, buf, count);
}

static int event_process_midi(struct snd_seq_event *ev, int direct,
                              void *private_data, int atomic, int hop)
{
        struct seq_midisynth *msynth = private_data;
        unsigned char msg[10];        /* buffer for constructing midi messages */
        struct snd_rawmidi_substream *substream;
        int len;

        if (snd_BUG_ON(!msynth))
                return -EINVAL;
        substream = msynth->output_rfile.output;
        if (substream == NULL)
                return -ENODEV;
        if (ev->type == SNDRV_SEQ_EVENT_SYSEX) {        /* special case, to save space */
                if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) {
                        /* invalid event */
                        pr_debug("ALSA: seq_midi: invalid sysex event flags = 0x%x\n", ev->flags);
                        return 0;
                }
                snd_seq_dump_var_event(ev, __dump_midi, substream);
                snd_midi_event_reset_decode(msynth->parser);
        } else {
                if (msynth->parser == NULL)
                        return -EIO;
                len = snd_midi_event_decode(msynth->parser, msg, sizeof(msg), ev);
                if (len < 0)
                        return 0;
                if (dump_midi(substream, msg, len) < 0)
                        snd_midi_event_reset_decode(msynth->parser);
        }
        return 0;
}


static int snd_seq_midisynth_new(struct seq_midisynth *msynth,
                                 struct snd_card *card,
                                 int device,
                                 int subdevice)
{
        if (snd_midi_event_new(MAX_MIDI_EVENT_BUF, &msynth->parser) < 0)
                return -ENOMEM;
        msynth->card = card;
        msynth->device = device;
        msynth->subdevice = subdevice;
        return 0;
}

/* open associated midi device for input */
static int midisynth_subscribe(void *private_data, struct snd_seq_port_subscribe *info)
{
        int err;
        struct seq_midisynth *msynth = private_data;
        struct snd_rawmidi_runtime *runtime;
        struct snd_rawmidi_params params;

        /* open midi port */
        err = snd_rawmidi_kernel_open(msynth->rmidi, msynth->subdevice,
                                      SNDRV_RAWMIDI_LFLG_INPUT,
                                      &msynth->input_rfile);
        if (err < 0) {
                pr_debug("ALSA: seq_midi: midi input open failed!!!\n");
                return err;
        }
        runtime = msynth->input_rfile.input->runtime;
        memset(&params, 0, sizeof(params));
        params.avail_min = 1;
        params.buffer_size = input_buffer_size;
        err = snd_rawmidi_input_params(msynth->input_rfile.input, &params);
        if (err < 0) {
                snd_rawmidi_kernel_release(&msynth->input_rfile);
                return err;
        }
        snd_midi_event_reset_encode(msynth->parser);
        runtime->event = snd_midi_input_event;
        runtime->private_data = msynth;
        snd_rawmidi_kernel_read(msynth->input_rfile.input, NULL, 0);
        return 0;
}

/* close associated midi device for input */
static int midisynth_unsubscribe(void *private_data, struct snd_seq_port_subscribe *info)
{
        int err;
        struct seq_midisynth *msynth = private_data;

        if (snd_BUG_ON(!msynth->input_rfile.input))
                return -EINVAL;
        err = snd_rawmidi_kernel_release(&msynth->input_rfile);
        return err;
}

/* open associated midi device for output */
static int midisynth_use(void *private_data, struct snd_seq_port_subscribe *info)
{
        int err;
        struct seq_midisynth *msynth = private_data;
        struct snd_rawmidi_params params;

        /* open midi port */
        err = snd_rawmidi_kernel_open(msynth->rmidi, msynth->subdevice,
                                      SNDRV_RAWMIDI_LFLG_OUTPUT,
                                      &msynth->output_rfile);
        if (err < 0) {
                pr_debug("ALSA: seq_midi: midi output open failed!!!\n");
                return err;
        }
        memset(&params, 0, sizeof(params));
        params.avail_min = 1;
        params.buffer_size = output_buffer_size;
        params.no_active_sensing = 1;
        err = snd_rawmidi_output_params(msynth->output_rfile.output, &params);
        if (err < 0) {
                snd_rawmidi_kernel_release(&msynth->output_rfile);
                return err;
        }
        snd_midi_event_reset_decode(msynth->parser);
        return 0;
}

/* close associated midi device for output */
static int midisynth_unuse(void *private_data, struct snd_seq_port_subscribe *info)
{
        struct seq_midisynth *msynth = private_data;

        if (snd_BUG_ON(!msynth->output_rfile.output))
                return -EINVAL;
        snd_rawmidi_drain_output(msynth->output_rfile.output);
        return snd_rawmidi_kernel_release(&msynth->output_rfile);
}

/* delete given midi synth port */
static void snd_seq_midisynth_delete(struct seq_midisynth *msynth)
{
        if (msynth == NULL)
                return;

        if (msynth->seq_client > 0) {
                /* delete port */
                snd_seq_event_port_detach(msynth->seq_client, msynth->seq_port);
        }

        snd_midi_event_free(msynth->parser);
}

/* register new midi synth port */
static int
snd_seq_midisynth_probe(struct device *_dev)
{
        struct snd_seq_device *dev = to_seq_dev(_dev);
        struct seq_midisynth_client *client;
        struct seq_midisynth *msynth, *ms;
        struct snd_seq_port_info *port __free(kfree) = NULL;
        struct snd_rawmidi_info *info __free(kfree) = NULL;
        struct snd_rawmidi *rmidi = dev->private_data;
        int newclient = 0;
        unsigned int p, ports;
        struct snd_seq_port_callback pcallbacks;
        struct snd_card *card = dev->card;
        int device = dev->device;
        unsigned int input_count = 0, output_count = 0;

        if (snd_BUG_ON(!card || device < 0 || device >= SNDRV_RAWMIDI_DEVICES))
                return -EINVAL;
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (! info)
                return -ENOMEM;
        info->device = device;
        info->stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
        info->subdevice = 0;
        if (snd_rawmidi_info_select(card, info) >= 0)
                output_count = info->subdevices_count;
        info->stream = SNDRV_RAWMIDI_STREAM_INPUT;
        if (snd_rawmidi_info_select(card, info) >= 0) {
                input_count = info->subdevices_count;
        }
        ports = output_count;
        if (ports < input_count)
                ports = input_count;
        if (ports == 0)
                return -ENODEV;
        if (ports > (256 / SNDRV_RAWMIDI_DEVICES))
                ports = 256 / SNDRV_RAWMIDI_DEVICES;

        guard(mutex)(&register_mutex);
        client = synths[card->number];
        if (client == NULL) {
                newclient = 1;
                client = kzalloc(sizeof(*client), GFP_KERNEL);
                if (client == NULL)
                        return -ENOMEM;
                client->seq_client =
                        snd_seq_create_kernel_client(
                                card, 0, "%s", card->shortname[0] ?
                                (const char *)card->shortname : "External MIDI");
                if (client->seq_client < 0) {
                        kfree(client);
                        return -ENOMEM;
                }
        }

        msynth = kcalloc(ports, sizeof(struct seq_midisynth), GFP_KERNEL);
        port = kmalloc(sizeof(*port), GFP_KERNEL);
        if (msynth == NULL || port == NULL)
                goto __nomem;

        for (p = 0; p < ports; p++) {
                ms = &msynth[p];
                ms->rmidi = rmidi;

                if (snd_seq_midisynth_new(ms, card, device, p) < 0)
                        goto __nomem;

                /* declare port */
                memset(port, 0, sizeof(*port));
                port->addr.client = client->seq_client;
                port->addr.port = device * (256 / SNDRV_RAWMIDI_DEVICES) + p;
                port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT;
                memset(info, 0, sizeof(*info));
                info->device = device;
                if (p < output_count)
                        info->stream = SNDRV_RAWMIDI_STREAM_OUTPUT;
                else
                        info->stream = SNDRV_RAWMIDI_STREAM_INPUT;
                info->subdevice = p;
                if (snd_rawmidi_info_select(card, info) >= 0)
                        strcpy(port->name, info->subname);
                if (! port->name[0]) {
                        if (info->name[0]) {
                                if (ports > 1)
                                        scnprintf(port->name, sizeof(port->name), "%s-%u", info->name, p);
                                else
                                        scnprintf(port->name, sizeof(port->name), "%s", info->name);
                        } else {
                                /* last resort */
                                if (ports > 1)
                                        sprintf(port->name, "MIDI %d-%d-%u", card->number, device, p);
                                else
                                        sprintf(port->name, "MIDI %d-%d", card->number, device);
                        }
                }
                if ((info->flags & SNDRV_RAWMIDI_INFO_OUTPUT) && p < output_count)
                        port->capability |= SNDRV_SEQ_PORT_CAP_WRITE | SNDRV_SEQ_PORT_CAP_SYNC_WRITE | SNDRV_SEQ_PORT_CAP_SUBS_WRITE;
                if ((info->flags & SNDRV_RAWMIDI_INFO_INPUT) && p < input_count)
                        port->capability |= SNDRV_SEQ_PORT_CAP_READ | SNDRV_SEQ_PORT_CAP_SYNC_READ | SNDRV_SEQ_PORT_CAP_SUBS_READ;
                if ((port->capability & (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_READ)) == (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_READ) &&
                    info->flags & SNDRV_RAWMIDI_INFO_DUPLEX)
                        port->capability |= SNDRV_SEQ_PORT_CAP_DUPLEX;
                if (port->capability & SNDRV_SEQ_PORT_CAP_READ)
                        port->direction |= SNDRV_SEQ_PORT_DIR_INPUT;
                if (port->capability & SNDRV_SEQ_PORT_CAP_WRITE)
                        port->direction |= SNDRV_SEQ_PORT_DIR_OUTPUT;
                port->type = SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC
                        | SNDRV_SEQ_PORT_TYPE_HARDWARE
                        | SNDRV_SEQ_PORT_TYPE_PORT;
                port->midi_channels = 16;
                memset(&pcallbacks, 0, sizeof(pcallbacks));
                pcallbacks.owner = THIS_MODULE;
                pcallbacks.private_data = ms;
                pcallbacks.subscribe = midisynth_subscribe;
                pcallbacks.unsubscribe = midisynth_unsubscribe;
                pcallbacks.use = midisynth_use;
                pcallbacks.unuse = midisynth_unuse;
                pcallbacks.event_input = event_process_midi;
                port->kernel = &pcallbacks;
                if (rmidi->ops && rmidi->ops->get_port_info)
                        rmidi->ops->get_port_info(rmidi, p, port);
                if (snd_seq_kernel_client_ctl(client->seq_client, SNDRV_SEQ_IOCTL_CREATE_PORT, port)<0)
                        goto __nomem;
                ms->seq_client = client->seq_client;
                ms->seq_port = port->addr.port;
        }
        client->ports_per_device[device] = ports;
        client->ports[device] = msynth;
        client->num_ports++;
        if (newclient)
                synths[card->number] = client;
        return 0;        /* success */

      __nomem:
        if (msynth != NULL) {
                      for (p = 0; p < ports; p++)
                              snd_seq_midisynth_delete(&msynth[p]);
                kfree(msynth);
        }
        if (newclient) {
                snd_seq_delete_kernel_client(client->seq_client);
                kfree(client);
        }
        return -ENOMEM;
}

/* release midi synth port */
static int
snd_seq_midisynth_remove(struct device *_dev)
{
        struct snd_seq_device *dev = to_seq_dev(_dev);
        struct seq_midisynth_client *client;
        struct seq_midisynth *msynth;
        struct snd_card *card = dev->card;
        int device = dev->device, p, ports;
        
        guard(mutex)(&register_mutex);
        client = synths[card->number];
        if (client == NULL || client->ports[device] == NULL)
                return -ENODEV;
        ports = client->ports_per_device[device];
        client->ports_per_device[device] = 0;
        msynth = client->ports[device];
        client->ports[device] = NULL;
        for (p = 0; p < ports; p++)
                snd_seq_midisynth_delete(&msynth[p]);
        kfree(msynth);
        client->num_ports--;
        if (client->num_ports <= 0) {
                snd_seq_delete_kernel_client(client->seq_client);
                synths[card->number] = NULL;
                kfree(client);
        }
        return 0;
}

static struct snd_seq_driver seq_midisynth_driver = {
        .driver = {
                .name = KBUILD_MODNAME,
                .probe = snd_seq_midisynth_probe,
                .remove = snd_seq_midisynth_remove,
        },
        .id = SNDRV_SEQ_DEV_ID_MIDISYNTH,
        .argsize = 0,
};

module_snd_seq_driver(seq_midisynth_driver);
































































































































































































































































































































































































   49 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kmem

#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KMEM_H

#include <linux/types.h>
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>

TRACE_EVENT(kmem_cache_alloc,

        TP_PROTO(unsigned long call_site,
                 const void *ptr,
                 struct kmem_cache *s,
                 gfp_t gfp_flags,
                 int node),

        TP_ARGS(call_site, ptr, s, gfp_flags, node),

        TP_STRUCT__entry(
                __field(        unsigned long,        call_site        )
                __field(        const void *,        ptr                )
                __field(        size_t,                bytes_req        )
                __field(        size_t,                bytes_alloc        )
                __field(        unsigned long,        gfp_flags        )
                __field(        int,                node                )
                __field(        bool,                accounted        )
        ),

        TP_fast_assign(
                __entry->call_site        = call_site;
                __entry->ptr                = ptr;
                __entry->bytes_req        = s->object_size;
                __entry->bytes_alloc        = s->size;
                __entry->gfp_flags        = (__force unsigned long)gfp_flags;
                __entry->node                = node;
                __entry->accounted        = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
                                          ((gfp_flags & __GFP_ACCOUNT) ||
                                          (s->flags & SLAB_ACCOUNT)) : false;
        ),

        TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
                (void *)__entry->call_site,
                __entry->ptr,
                __entry->bytes_req,
                __entry->bytes_alloc,
                show_gfp_flags(__entry->gfp_flags),
                __entry->node,
                __entry->accounted ? "true" : "false")
);

TRACE_EVENT(kmalloc,

        TP_PROTO(unsigned long call_site,
                 const void *ptr,
                 size_t bytes_req,
                 size_t bytes_alloc,
                 gfp_t gfp_flags,
                 int node),

        TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),

        TP_STRUCT__entry(
                __field(        unsigned long,        call_site        )
                __field(        const void *,        ptr                )
                __field(        size_t,                bytes_req        )
                __field(        size_t,                bytes_alloc        )
                __field(        unsigned long,        gfp_flags        )
                __field(        int,                node                )
        ),

        TP_fast_assign(
                __entry->call_site        = call_site;
                __entry->ptr                = ptr;
                __entry->bytes_req        = bytes_req;
                __entry->bytes_alloc        = bytes_alloc;
                __entry->gfp_flags        = (__force unsigned long)gfp_flags;
                __entry->node                = node;
        ),

        TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
                (void *)__entry->call_site,
                __entry->ptr,
                __entry->bytes_req,
                __entry->bytes_alloc,
                show_gfp_flags(__entry->gfp_flags),
                __entry->node,
                (IS_ENABLED(CONFIG_MEMCG_KMEM) &&
                 (__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false")
);

TRACE_EVENT(kfree,

        TP_PROTO(unsigned long call_site, const void *ptr),

        TP_ARGS(call_site, ptr),

        TP_STRUCT__entry(
                __field(        unsigned long,        call_site        )
                __field(        const void *,        ptr                )
        ),

        TP_fast_assign(
                __entry->call_site        = call_site;
                __entry->ptr                = ptr;
        ),

        TP_printk("call_site=%pS ptr=%p",
                  (void *)__entry->call_site, __entry->ptr)
);

TRACE_EVENT(kmem_cache_free,

        TP_PROTO(unsigned long call_site, const void *ptr, const struct kmem_cache *s),

        TP_ARGS(call_site, ptr, s),

        TP_STRUCT__entry(
                __field(        unsigned long,        call_site        )
                __field(        const void *,        ptr                )
                __string(        name,                s->name                )
        ),

        TP_fast_assign(
                __entry->call_site        = call_site;
                __entry->ptr                = ptr;
                __assign_str(name, s->name);
        ),

        TP_printk("call_site=%pS ptr=%p name=%s",
                  (void *)__entry->call_site, __entry->ptr, __get_str(name))
);

TRACE_EVENT(mm_page_free,

        TP_PROTO(struct page *page, unsigned int order),

        TP_ARGS(page, order),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                )
                __field(        unsigned int,        order                )
        ),

        TP_fast_assign(
                __entry->pfn                = page_to_pfn(page);
                __entry->order                = order;
        ),

        TP_printk("page=%p pfn=0x%lx order=%d",
                        pfn_to_page(__entry->pfn),
                        __entry->pfn,
                        __entry->order)
);

TRACE_EVENT(mm_page_free_batched,

        TP_PROTO(struct page *page),

        TP_ARGS(page),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                )
        ),

        TP_fast_assign(
                __entry->pfn                = page_to_pfn(page);
        ),

        TP_printk("page=%p pfn=0x%lx order=0",
                        pfn_to_page(__entry->pfn),
                        __entry->pfn)
);

TRACE_EVENT(mm_page_alloc,

        TP_PROTO(struct page *page, unsigned int order,
                        gfp_t gfp_flags, int migratetype),

        TP_ARGS(page, order, gfp_flags, migratetype),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                )
                __field(        unsigned int,        order                )
                __field(        unsigned long,        gfp_flags        )
                __field(        int,                migratetype        )
        ),

        TP_fast_assign(
                __entry->pfn                = page ? page_to_pfn(page) : -1UL;
                __entry->order                = order;
                __entry->gfp_flags        = (__force unsigned long)gfp_flags;
                __entry->migratetype        = migratetype;
        ),

        TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s",
                __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
                __entry->pfn != -1UL ? __entry->pfn : 0,
                __entry->order,
                __entry->migratetype,
                show_gfp_flags(__entry->gfp_flags))
);

DECLARE_EVENT_CLASS(mm_page,

        TP_PROTO(struct page *page, unsigned int order, int migratetype,
                 int percpu_refill),

        TP_ARGS(page, order, migratetype, percpu_refill),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                )
                __field(        unsigned int,        order                )
                __field(        int,                migratetype        )
                __field(        int,                percpu_refill        )
        ),

        TP_fast_assign(
                __entry->pfn                = page ? page_to_pfn(page) : -1UL;
                __entry->order                = order;
                __entry->migratetype        = migratetype;
                __entry->percpu_refill        = percpu_refill;
        ),

        TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d",
                __entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
                __entry->pfn != -1UL ? __entry->pfn : 0,
                __entry->order,
                __entry->migratetype,
                __entry->percpu_refill)
);

DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,

        TP_PROTO(struct page *page, unsigned int order, int migratetype,
                 int percpu_refill),

        TP_ARGS(page, order, migratetype, percpu_refill)
);

TRACE_EVENT(mm_page_pcpu_drain,

        TP_PROTO(struct page *page, unsigned int order, int migratetype),

        TP_ARGS(page, order, migratetype),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                )
                __field(        unsigned int,        order                )
                __field(        int,                migratetype        )
        ),

        TP_fast_assign(
                __entry->pfn                = page ? page_to_pfn(page) : -1UL;
                __entry->order                = order;
                __entry->migratetype        = migratetype;
        ),

        TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d",
                pfn_to_page(__entry->pfn), __entry->pfn,
                __entry->order, __entry->migratetype)
);

TRACE_EVENT(mm_page_alloc_extfrag,

        TP_PROTO(struct page *page,
                int alloc_order, int fallback_order,
                int alloc_migratetype, int fallback_migratetype),

        TP_ARGS(page,
                alloc_order, fallback_order,
                alloc_migratetype, fallback_migratetype),

        TP_STRUCT__entry(
                __field(        unsigned long,        pfn                        )
                __field(        int,                alloc_order                )
                __field(        int,                fallback_order                )
                __field(        int,                alloc_migratetype        )
                __field(        int,                fallback_migratetype        )
                __field(        int,                change_ownership        )
        ),

        TP_fast_assign(
                __entry->pfn                        = page_to_pfn(page);
                __entry->alloc_order                = alloc_order;
                __entry->fallback_order                = fallback_order;
                __entry->alloc_migratetype        = alloc_migratetype;
                __entry->fallback_migratetype        = fallback_migratetype;
                __entry->change_ownership        = (alloc_migratetype ==
                                        get_pageblock_migratetype(page));
        ),

        TP_printk("page=%p pfn=0x%lx alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
                pfn_to_page(__entry->pfn),
                __entry->pfn,
                __entry->alloc_order,
                __entry->fallback_order,
                pageblock_order,
                __entry->alloc_migratetype,
                __entry->fallback_migratetype,
                __entry->fallback_order < pageblock_order,
                __entry->change_ownership)
);

TRACE_EVENT(mm_alloc_contig_migrate_range_info,

        TP_PROTO(unsigned long start,
                 unsigned long end,
                 unsigned long nr_migrated,
                 unsigned long nr_reclaimed,
                 unsigned long nr_mapped,
                 int migratetype),

        TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype),

        TP_STRUCT__entry(
                __field(unsigned long, start)
                __field(unsigned long, end)
                __field(unsigned long, nr_migrated)
                __field(unsigned long, nr_reclaimed)
                __field(unsigned long, nr_mapped)
                __field(int, migratetype)
        ),

        TP_fast_assign(
                __entry->start = start;
                __entry->end = end;
                __entry->nr_migrated = nr_migrated;
                __entry->nr_reclaimed = nr_reclaimed;
                __entry->nr_mapped = nr_mapped;
                __entry->migratetype = migratetype;
        ),

        TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu",
                  __entry->start,
                  __entry->end,
                  __entry->migratetype,
                  __entry->nr_migrated,
                  __entry->nr_reclaimed,
                  __entry->nr_mapped)
);

/*
 * Required for uniquely and securely identifying mm in rss_stat tracepoint.
 */
#ifndef __PTR_TO_HASHVAL
static unsigned int __maybe_unused mm_ptr_to_hash(const void *ptr)
{
        int ret;
        unsigned long hashval;

        ret = ptr_to_hashval(ptr, &hashval);
        if (ret)
                return 0;

        /* The hashed value is only 32-bit */
        return (unsigned int)hashval;
}
#define __PTR_TO_HASHVAL
#endif

#define TRACE_MM_PAGES                \
        EM(MM_FILEPAGES)        \
        EM(MM_ANONPAGES)        \
        EM(MM_SWAPENTS)                \
        EMe(MM_SHMEMPAGES)

#undef EM
#undef EMe

#define EM(a)        TRACE_DEFINE_ENUM(a);
#define EMe(a)        TRACE_DEFINE_ENUM(a);

TRACE_MM_PAGES

#undef EM
#undef EMe

#define EM(a)        { a, #a },
#define EMe(a)        { a, #a }

TRACE_EVENT(rss_stat,

        TP_PROTO(struct mm_struct *mm,
                int member),

        TP_ARGS(mm, member),

        TP_STRUCT__entry(
                __field(unsigned int, mm_id)
                __field(unsigned int, curr)
                __field(int, member)
                __field(long, size)
        ),

        TP_fast_assign(
                __entry->mm_id = mm_ptr_to_hash(mm);
                __entry->curr = !!(current->mm == mm);
                __entry->member = member;
                __entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member])
                                                            << PAGE_SHIFT);
        ),

        TP_printk("mm_id=%u curr=%d type=%s size=%ldB",
                __entry->mm_id,
                __entry->curr,
                __print_symbolic(__entry->member, TRACE_MM_PAGES),
                __entry->size)
        );
#endif /* _TRACE_KMEM_H */

/* This part must be outside protection */
#include <trace/define_trace.h>































































  113 
  108 

























    2 


    2 
    2 



























































    4 
    4 























   20 


   20 
   20 



















  163 

  161 





































































































































    2 

    2 









































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_FIND_H_
#define __LINUX_FIND_H_

#ifndef __LINUX_BITMAP_H
#error only <linux/bitmap.h> can be included directly
#endif

#include <linux/bitops.h>

unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
                                unsigned long start);
unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start);
unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start);
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long nbits, unsigned long start);
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
                                         unsigned long start);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n);
unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
                                unsigned long size, unsigned long n);
unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        unsigned long size, unsigned long n);
unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                        const unsigned long *addr3, unsigned long size,
                                        unsigned long n);
extern unsigned long _find_first_and_bit(const unsigned long *addr1,
                                         const unsigned long *addr2, unsigned long size);
extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);

#ifdef __BIG_ENDIAN
unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size);
unsigned long _find_next_zero_bit_le(const  unsigned long *addr, unsigned
                                        long size, unsigned long offset);
unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
                                long size, unsigned long offset);
#endif

#ifndef find_next_bit
/**
 * find_next_bit - find the next set bit in a memory region
 * @addr: The address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
                            unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val;

                if (unlikely(offset >= size))
                        return size;

                val = *addr & GENMASK(size - 1, offset);
                return val ? __ffs(val) : size;
        }

        return _find_next_bit(addr, size, offset);
}
#endif

#ifndef find_next_and_bit
/**
 * find_next_and_bit - find the next set bit in both memory regions
 * @addr1: The first address to base the search on
 * @addr2: The second address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_and_bit(const unsigned long *addr1,
                const unsigned long *addr2, unsigned long size,
                unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val;

                if (unlikely(offset >= size))
                        return size;

                val = *addr1 & *addr2 & GENMASK(size - 1, offset);
                return val ? __ffs(val) : size;
        }

        return _find_next_and_bit(addr1, addr2, size, offset);
}
#endif

#ifndef find_next_andnot_bit
/**
 * find_next_andnot_bit - find the next set bit in *addr1 excluding all the bits
 *                        in *addr2
 * @addr1: The first address to base the search on
 * @addr2: The second address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_andnot_bit(const unsigned long *addr1,
                const unsigned long *addr2, unsigned long size,
                unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val;

                if (unlikely(offset >= size))
                        return size;

                val = *addr1 & ~*addr2 & GENMASK(size - 1, offset);
                return val ? __ffs(val) : size;
        }

        return _find_next_andnot_bit(addr1, addr2, size, offset);
}
#endif

#ifndef find_next_or_bit
/**
 * find_next_or_bit - find the next set bit in either memory regions
 * @addr1: The first address to base the search on
 * @addr2: The second address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_or_bit(const unsigned long *addr1,
                const unsigned long *addr2, unsigned long size,
                unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val;

                if (unlikely(offset >= size))
                        return size;

                val = (*addr1 | *addr2) & GENMASK(size - 1, offset);
                return val ? __ffs(val) : size;
        }

        return _find_next_or_bit(addr1, addr2, size, offset);
}
#endif

#ifndef find_next_zero_bit
/**
 * find_next_zero_bit - find the next cleared bit in a memory region
 * @addr: The address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number of the next zero bit
 * If no bits are zero, returns @size.
 */
static inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
                                 unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val;

                if (unlikely(offset >= size))
                        return size;

                val = *addr | ~GENMASK(size - 1, offset);
                return val == ~0UL ? size : ffz(val);
        }

        return _find_next_zero_bit(addr, size, offset);
}
#endif

#ifndef find_first_bit
/**
 * find_first_bit - find the first set bit in a memory region
 * @addr: The address to start the search at
 * @size: The maximum number of bits to search
 *
 * Returns the bit number of the first set bit.
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
        if (small_const_nbits(size)) {
                unsigned long val = *addr & GENMASK(size - 1, 0);

                return val ? __ffs(val) : size;
        }

        return _find_first_bit(addr, size);
}
#endif

/**
 * find_nth_bit - find N'th set bit in a memory region
 * @addr: The address to start the search at
 * @size: The maximum number of bits to search
 * @n: The number of set bit, which position is needed, counting from 0
 *
 * The following is semantically equivalent:
 *         idx = find_nth_bit(addr, size, 0);
 *         idx = find_first_bit(addr, size);
 *
 * Returns the bit number of the N'th set bit.
 * If no such, returns @size.
 */
static inline
unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
{
        if (n >= size)
                return size;

        if (small_const_nbits(size)) {
                unsigned long val =  *addr & GENMASK(size - 1, 0);

                return val ? fns(val, n) : size;
        }

        return __find_nth_bit(addr, size, n);
}

/**
 * find_nth_and_bit - find N'th set bit in 2 memory regions
 * @addr1: The 1st address to start the search at
 * @addr2: The 2nd address to start the search at
 * @size: The maximum number of bits to search
 * @n: The number of set bit, which position is needed, counting from 0
 *
 * Returns the bit number of the N'th set bit.
 * If no such, returns @size.
 */
static inline
unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
                                unsigned long size, unsigned long n)
{
        if (n >= size)
                return size;

        if (small_const_nbits(size)) {
                unsigned long val =  *addr1 & *addr2 & GENMASK(size - 1, 0);

                return val ? fns(val, n) : size;
        }

        return __find_nth_and_bit(addr1, addr2, size, n);
}

/**
 * find_nth_andnot_bit - find N'th set bit in 2 memory regions,
 *                         flipping bits in 2nd region
 * @addr1: The 1st address to start the search at
 * @addr2: The 2nd address to start the search at
 * @size: The maximum number of bits to search
 * @n: The number of set bit, which position is needed, counting from 0
 *
 * Returns the bit number of the N'th set bit.
 * If no such, returns @size.
 */
static inline
unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
                                unsigned long size, unsigned long n)
{
        if (n >= size)
                return size;

        if (small_const_nbits(size)) {
                unsigned long val =  *addr1 & (~*addr2) & GENMASK(size - 1, 0);

                return val ? fns(val, n) : size;
        }

        return __find_nth_andnot_bit(addr1, addr2, size, n);
}

/**
 * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions,
 *                             excluding those set in 3rd region
 * @addr1: The 1st address to start the search at
 * @addr2: The 2nd address to start the search at
 * @addr3: The 3rd address to start the search at
 * @size: The maximum number of bits to search
 * @n: The number of set bit, which position is needed, counting from 0
 *
 * Returns the bit number of the N'th set bit.
 * If no such, returns @size.
 */
static __always_inline
unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
                                        const unsigned long *addr2,
                                        const unsigned long *addr3,
                                        unsigned long size, unsigned long n)
{
        if (n >= size)
                return size;

        if (small_const_nbits(size)) {
                unsigned long val =  *addr1 & *addr2 & (~*addr3) & GENMASK(size - 1, 0);

                return val ? fns(val, n) : size;
        }

        return __find_nth_and_andnot_bit(addr1, addr2, addr3, size, n);
}

#ifndef find_first_and_bit
/**
 * find_first_and_bit - find the first set bit in both memory regions
 * @addr1: The first address to base the search on
 * @addr2: The second address to base the search on
 * @size: The bitmap size in bits
 *
 * Returns the bit number for the next set bit
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_first_and_bit(const unsigned long *addr1,
                                 const unsigned long *addr2,
                                 unsigned long size)
{
        if (small_const_nbits(size)) {
                unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0);

                return val ? __ffs(val) : size;
        }

        return _find_first_and_bit(addr1, addr2, size);
}
#endif

#ifndef find_first_zero_bit
/**
 * find_first_zero_bit - find the first cleared bit in a memory region
 * @addr: The address to start the search at
 * @size: The maximum number of bits to search
 *
 * Returns the bit number of the first cleared bit.
 * If no bits are zero, returns @size.
 */
static inline
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
        if (small_const_nbits(size)) {
                unsigned long val = *addr | ~GENMASK(size - 1, 0);

                return val == ~0UL ? size : ffz(val);
        }

        return _find_first_zero_bit(addr, size);
}
#endif

#ifndef find_last_bit
/**
 * find_last_bit - find the last set bit in a memory region
 * @addr: The address to start the search at
 * @size: The number of bits to search
 *
 * Returns the bit number of the last set bit, or size.
 */
static inline
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
        if (small_const_nbits(size)) {
                unsigned long val = *addr & GENMASK(size - 1, 0);

                return val ? __fls(val) : size;
        }

        return _find_last_bit(addr, size);
}
#endif

/**
 * find_next_and_bit_wrap - find the next set bit in both memory regions
 * @addr1: The first address to base the search on
 * @addr2: The second address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit, or first set bit up to @offset
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
                                        const unsigned long *addr2,
                                        unsigned long size, unsigned long offset)
{
        unsigned long bit = find_next_and_bit(addr1, addr2, size, offset);

        if (bit < size || offset == 0)
                return bit;

        bit = find_first_and_bit(addr1, addr2, offset);
        return bit < offset ? bit : size;
}

/**
 * find_next_bit_wrap - find the next set bit in a memory region
 * @addr: The address to base the search on
 * @size: The bitmap size in bits
 * @offset: The bitnumber to start searching at
 *
 * Returns the bit number for the next set bit, or first set bit up to @offset
 * If no bits are set, returns @size.
 */
static inline
unsigned long find_next_bit_wrap(const unsigned long *addr,
                                        unsigned long size, unsigned long offset)
{
        unsigned long bit = find_next_bit(addr, size, offset);

        if (bit < size || offset == 0)
                return bit;

        bit = find_first_bit(addr, offset);
        return bit < offset ? bit : size;
}

/*
 * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
 * before using it alone.
 */
static inline
unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
                                 unsigned long start, unsigned long n)
{
        unsigned long bit;

        /* If not wrapped around */
        if (n > start) {
                /* and have a bit, just return it. */
                bit = find_next_bit(bitmap, size, n);
                if (bit < size)
                        return bit;

                /* Otherwise, wrap around and ... */
                n = 0;
        }

        /* Search the other part. */
        bit = find_next_bit(bitmap, start, n);
        return bit < start ? bit : size;
}

/**
 * find_next_clump8 - find next 8-bit clump with set bits in a memory region
 * @clump: location to store copy of found clump
 * @addr: address to base the search on
 * @size: bitmap size in number of bits
 * @offset: bit offset at which to start searching
 *
 * Returns the bit offset for the next set clump; the found clump value is
 * copied to the location pointed by @clump. If no bits are set, returns @size.
 */
extern unsigned long find_next_clump8(unsigned long *clump,
                                      const unsigned long *addr,
                                      unsigned long size, unsigned long offset);

#define find_first_clump8(clump, bits, size) \
        find_next_clump8((clump), (bits), (size), 0)

#if defined(__LITTLE_ENDIAN)

static inline unsigned long find_next_zero_bit_le(const void *addr,
                unsigned long size, unsigned long offset)
{
        return find_next_zero_bit(addr, size, offset);
}

static inline unsigned long find_next_bit_le(const void *addr,
                unsigned long size, unsigned long offset)
{
        return find_next_bit(addr, size, offset);
}

static inline unsigned long find_first_zero_bit_le(const void *addr,
                unsigned long size)
{
        return find_first_zero_bit(addr, size);
}

#elif defined(__BIG_ENDIAN)

#ifndef find_next_zero_bit_le
static inline
unsigned long find_next_zero_bit_le(const void *addr, unsigned
                long size, unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val = *(const unsigned long *)addr;

                if (unlikely(offset >= size))
                        return size;

                val = swab(val) | ~GENMASK(size - 1, offset);
                return val == ~0UL ? size : ffz(val);
        }

        return _find_next_zero_bit_le(addr, size, offset);
}
#endif

#ifndef find_first_zero_bit_le
static inline
unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
{
        if (small_const_nbits(size)) {
                unsigned long val = swab(*(const unsigned long *)addr) | ~GENMASK(size - 1, 0);

                return val == ~0UL ? size : ffz(val);
        }

        return _find_first_zero_bit_le(addr, size);
}
#endif

#ifndef find_next_bit_le
static inline
unsigned long find_next_bit_le(const void *addr, unsigned
                long size, unsigned long offset)
{
        if (small_const_nbits(size)) {
                unsigned long val = *(const unsigned long *)addr;

                if (unlikely(offset >= size))
                        return size;

                val = swab(val) & GENMASK(size - 1, offset);
                return val ? __ffs(val) : size;
        }

        return _find_next_bit_le(addr, size, offset);
}
#endif

#else
#error "Please fix <asm/byteorder.h>"
#endif

#define for_each_set_bit(bit, addr, size) \
        for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)

#define for_each_and_bit(bit, addr1, addr2, size) \
        for ((bit) = 0;                                                                        \
             (bit) = find_next_and_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
             (bit)++)

#define for_each_andnot_bit(bit, addr1, addr2, size) \
        for ((bit) = 0;                                                                        \
             (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
             (bit)++)

#define for_each_or_bit(bit, addr1, addr2, size) \
        for ((bit) = 0;                                                                        \
             (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
             (bit)++)

/* same as for_each_set_bit() but use bit as value to start with */
#define for_each_set_bit_from(bit, addr, size) \
        for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)

#define for_each_clear_bit(bit, addr, size) \
        for ((bit) = 0;                                                                        \
             (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size);                \
             (bit)++)

/* same as for_each_clear_bit() but use bit as value to start with */
#define for_each_clear_bit_from(bit, addr, size) \
        for (; (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); (bit)++)

/**
 * for_each_set_bitrange - iterate over all set bit ranges [b; e)
 * @b: bit offset of start of current bitrange (first set bit)
 * @e: bit offset of end of current bitrange (first unset bit)
 * @addr: bitmap address to base the search on
 * @size: bitmap size in number of bits
 */
#define for_each_set_bitrange(b, e, addr, size)                        \
        for ((b) = 0;                                                \
             (b) = find_next_bit((addr), (size), b),                \
             (e) = find_next_zero_bit((addr), (size), (b) + 1),        \
             (b) < (size);                                        \
             (b) = (e) + 1)

/**
 * for_each_set_bitrange_from - iterate over all set bit ranges [b; e)
 * @b: bit offset of start of current bitrange (first set bit); must be initialized
 * @e: bit offset of end of current bitrange (first unset bit)
 * @addr: bitmap address to base the search on
 * @size: bitmap size in number of bits
 */
#define for_each_set_bitrange_from(b, e, addr, size)                \
        for (;                                                        \
             (b) = find_next_bit((addr), (size), (b)),                \
             (e) = find_next_zero_bit((addr), (size), (b) + 1),        \
             (b) < (size);                                        \
             (b) = (e) + 1)

/**
 * for_each_clear_bitrange - iterate over all unset bit ranges [b; e)
 * @b: bit offset of start of current bitrange (first unset bit)
 * @e: bit offset of end of current bitrange (first set bit)
 * @addr: bitmap address to base the search on
 * @size: bitmap size in number of bits
 */
#define for_each_clear_bitrange(b, e, addr, size)                \
        for ((b) = 0;                                                \
             (b) = find_next_zero_bit((addr), (size), (b)),        \
             (e) = find_next_bit((addr), (size), (b) + 1),        \
             (b) < (size);                                        \
             (b) = (e) + 1)

/**
 * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e)
 * @b: bit offset of start of current bitrange (first set bit); must be initialized
 * @e: bit offset of end of current bitrange (first unset bit)
 * @addr: bitmap address to base the search on
 * @size: bitmap size in number of bits
 */
#define for_each_clear_bitrange_from(b, e, addr, size)                \
        for (;                                                        \
             (b) = find_next_zero_bit((addr), (size), (b)),        \
             (e) = find_next_bit((addr), (size), (b) + 1),        \
             (b) < (size);                                        \
             (b) = (e) + 1)

/**
 * for_each_set_bit_wrap - iterate over all set bits starting from @start, and
 * wrapping around the end of bitmap.
 * @bit: offset for current iteration
 * @addr: bitmap address to base the search on
 * @size: bitmap size in number of bits
 * @start: Starting bit for bitmap traversing, wrapping around the bitmap end
 */
#define for_each_set_bit_wrap(bit, addr, size, start) \
        for ((bit) = find_next_bit_wrap((addr), (size), (start));                \
             (bit) < (size);                                                        \
             (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))

/**
 * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
 * @start: bit offset to start search and to store the current iteration offset
 * @clump: location to store copy of current 8-bit clump
 * @bits: bitmap address to base the search on
 * @size: bitmap size in number of bits
 */
#define for_each_set_clump8(start, clump, bits, size) \
        for ((start) = find_first_clump8(&(clump), (bits), (size)); \
             (start) < (size); \
             (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8))

#endif /*__LINUX_FIND_H_ */







































































































































































































    4 

   12 





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VMALLOC_H
#define _LINUX_VMALLOC_H

#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <asm/page.h>                /* pgprot_t */
#include <linux/rbtree.h>
#include <linux/overflow.h>

#include <asm/vmalloc.h>

struct vm_area_struct;                /* vma defining user mapping in mm_types.h */
struct notifier_block;                /* in notifier.h */
struct iov_iter;                /* in uio.h */

/* bits in flags of vmalloc's vm_struct below */
#define VM_IOREMAP                0x00000001        /* ioremap() and friends */
#define VM_ALLOC                0x00000002        /* vmalloc() */
#define VM_MAP                        0x00000004        /* vmap()ed pages */
#define VM_USERMAP                0x00000008        /* suitable for remap_vmalloc_range */
#define VM_DMA_COHERENT                0x00000010        /* dma_alloc_coherent */
#define VM_UNINITIALIZED        0x00000020        /* vm_struct is not fully initialized */
#define VM_NO_GUARD                0x00000040      /* ***DANGEROUS*** don't add guard page */
#define VM_KASAN                0x00000080      /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS        0x00000100        /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES        0x00000200        /* put pages and free array in vfree */
#define VM_ALLOW_HUGE_VMAP        0x00000400      /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */

#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
        !defined(CONFIG_KASAN_VMALLOC)
#define VM_DEFER_KMEMLEAK        0x00000800        /* defer kmemleak object creation */
#else
#define VM_DEFER_KMEMLEAK        0
#endif
#define VM_SPARSE                0x00001000        /* sparse vm_area. not all pages are present. */

/* bits [20..32] reserved for arch specific ioremap internals */

/*
 * Maximum alignment for ioremap() regions.
 * Can be overridden by arch-specific value.
 */
#ifndef IOREMAP_MAX_ORDER
#define IOREMAP_MAX_ORDER        (7 + PAGE_SHIFT)        /* 128 pages */
#endif

struct vm_struct {
        struct vm_struct        *next;
        void                        *addr;
        unsigned long                size;
        unsigned long                flags;
        struct page                **pages;
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
        unsigned int                page_order;
#endif
        unsigned int                nr_pages;
        phys_addr_t                phys_addr;
        const void                *caller;
};

struct vmap_area {
        unsigned long va_start;
        unsigned long va_end;

        struct rb_node rb_node;         /* address sorted rbtree */
        struct list_head list;          /* address sorted list */

        /*
         * The following two variables can be packed, because
         * a vmap_area object can be either:
         *    1) in "free" tree (root is free_vmap_area_root)
         *    2) or "busy" tree (root is vmap_area_root)
         */
        union {
                unsigned long subtree_max_size; /* in "free" tree */
                struct vm_struct *vm;           /* in "busy" tree */
        };
        unsigned long flags; /* mark type of vm_map_ram area */
};

/* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */
#ifndef arch_vmap_p4d_supported
static inline bool arch_vmap_p4d_supported(pgprot_t prot)
{
        return false;
}
#endif

#ifndef arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
        return false;
}
#endif

#ifndef arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
        return false;
}
#endif

#ifndef arch_vmap_pte_range_map_size
static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
                                                         u64 pfn, unsigned int max_page_shift)
{
        return PAGE_SIZE;
}
#endif

#ifndef arch_vmap_pte_supported_shift
static inline int arch_vmap_pte_supported_shift(unsigned long size)
{
        return PAGE_SHIFT;
}
#endif

#ifndef arch_vmap_pgprot_tagged
static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
{
        return prot;
}
#endif

/*
 *        Highlevel APIs for driver use
 */
extern void vm_unmap_ram(const void *mem, unsigned int count);
extern void *vm_map_ram(struct page **pages, unsigned int count, int node);
extern void vm_unmap_aliases(void);

#ifdef CONFIG_MMU
extern unsigned long vmalloc_nr_pages(void);
#else
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
#endif

extern void *vmalloc(unsigned long size) __alloc_size(1);
extern void *vzalloc(unsigned long size) __alloc_size(1);
extern void *vmalloc_user(unsigned long size) __alloc_size(1);
extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
extern void *vmalloc_32(unsigned long size) __alloc_size(1);
extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
                        unsigned long start, unsigned long end, gfp_t gfp_mask,
                        pgprot_t prot, unsigned long vm_flags, int node,
                        const void *caller) __alloc_size(1);
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
                int node, const void *caller) __alloc_size(1);
void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);

extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2);

extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr);

extern void *vmap(struct page **pages, unsigned int count,
                        unsigned long flags, pgprot_t prot);
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
extern void vunmap(const void *addr);

extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
                                       unsigned long uaddr, void *kaddr,
                                       unsigned long pgoff, unsigned long size);

extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
                                                        unsigned long pgoff);

/*
 * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
 * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
 * needs to be called.
 */
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
#endif

/*
 * There is no default implementation for arch_sync_kernel_mappings(). It is
 * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
 * is 0.
 */
void arch_sync_kernel_mappings(unsigned long start, unsigned long end);

/*
 *        Lowlevel-APIs (not for driver use!)
 */

static inline size_t get_vm_area_size(const struct vm_struct *area)
{
        if (!(area->flags & VM_NO_GUARD))
                /* return actual size without guard page */
                return area->size - PAGE_SIZE;
        else
                return area->size;

}

extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
extern struct vm_struct *get_vm_area_caller(unsigned long size,
                                        unsigned long flags, const void *caller);
extern struct vm_struct *__get_vm_area_caller(unsigned long size,
                                        unsigned long flags,
                                        unsigned long start, unsigned long end,
                                        const void *caller);
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
struct vmap_area *find_vmap_area(unsigned long addr);

static inline bool is_vm_area_hugepages(const void *addr)
{
        /*
         * This may not 100% tell if the area is mapped with > PAGE_SIZE
         * page table entries, if for some reason the architecture indicates
         * larger sizes are available but decides not to use them, nothing
         * prevents that. This only indicates the size of the physical page
         * allocated in the vmalloc layer.
         */
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
        return find_vm_area(addr)->page_order > 0;
#else
        return false;
#endif
}

#ifdef CONFIG_MMU
int vm_area_map_pages(struct vm_struct *area, unsigned long start,
                      unsigned long end, struct page **pages);
void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
                         unsigned long end);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{
        struct vm_struct *vm = find_vm_area(addr);

        if (vm)
                vm->flags |= VM_FLUSH_RESET_PERMS;
}

#else
static inline void set_vm_flush_reset_perms(void *addr)
{
}
#endif

/* for /proc/kcore */
extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count);

/*
 *        Internals.  Don't use..
 */
extern __init void vm_area_add_early(struct vm_struct *vm);
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);

#ifdef CONFIG_SMP
# ifdef CONFIG_MMU
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
                                     const size_t *sizes, int nr_vms,
                                     size_t align);

void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
# else
static inline struct vm_struct **
pcpu_get_vm_areas(const unsigned long *offsets,
                const size_t *sizes, int nr_vms,
                size_t align)
{
        return NULL;
}

static inline void
pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
{
}
# endif
#endif

#ifdef CONFIG_MMU
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
#else
#define VMALLOC_TOTAL 0UL
#endif

int register_vmap_purge_notifier(struct notifier_block *nb);
int unregister_vmap_purge_notifier(struct notifier_block *nb);

#if defined(CONFIG_MMU) && defined(CONFIG_PRINTK)
bool vmalloc_dump_obj(void *object);
#else
static inline bool vmalloc_dump_obj(void *object) { return false; }
#endif

#endif /* _LINUX_VMALLOC_H */



























































































































































  237 

  237 





   14 



   13 





   13 
    9 

    1 









    4 
    2 



    3 
    2 

    1 





   13 














































































































































    1 













































































































  237 









    4 










































































































































  237 
  236 
  235 












  237 


  237 
  237 
  236 
    7 
    3 




  236 






  235 







  237 







  229 



  236 


   35 

  230 



  235 





  233 








  234 






  234 






  233 










  237 



  237 










  235 

  237 

  237 















































  234 












    1 
    1 







  234 

  234 
  233 









  232 













































































    1 



    1 

    1 













































































    2 
    1 

    1 

    2 
    2 
    2 



    2 
    2 





































    1 
    1 

























































    6 


















    6 











    6 
























































    6 
    1 







    6 
    6 






    1 

    6 
    6 








    6 

    6 

    6 














    6 







    6 





    6 




    6 




    6 




    6 








    6 
    6 


    6 












    6 











    6 











    6 
















    6 















    6 




    6 




    6 

    6 






    6 























   10 










   11 
   11 





   11 


    1 
   11 



















































































































































































































































































































































































  237 




   50 






   11 























   11 

   11 


    9 

   11 












   11 





































































































































































































































































    3 

    3 
    3 







  237 
  237 









  236 








  231 


   26 
   11 


  230 
    7 
  230 






























  235 


  236 




  236 
  233 
  233 

  233 







    8 

    8 



  229 


  231 
  228 

  232 
    1 



























  233 

  233 


  233 




   75 
   74 






  233 

  233 




   66 








  231 



   17 
   11 
























  230 







  231 











  231 





  230 
  230 



  143 
  143 
  143 









    4 

  141 


  141 


















   66 
   66 

   66 

   66 



   75 



   75 
















   63 
  175 
  166 

























  209 
  210 
  210 







    2 
  210 

    1 




    1 






























































  232 

    8 
    8 






  215 




  210 

















  208 






  162 










  162 





  162 















  161 













































  233 




  233 



  233 









  233 






  210 



  180 


   51 
  162 
   48 
  162 
   49 


  162 


  162 





  162 
































    8 


    8 

















































































































































































  232 









  231 







  233 

  233 










  236 


    4 


    4 

    4 






  231 





    9 

  233 


    1 





  233 

  233 









  233 




    9 


    9 




  232 


  233 



  232 







  233 
    8 




  233 


  233 








  233 





  233 

  233 
  213 
   22 
   10 

   12 








  232 




  233 







  233 
    2 




    1 

    1 



  232 


    9 
  233 




  233 
    1 



  233 

    1 
    8 








    9 

  233 





    2 





    1 






    2 


















    9 








  233 
  233 






  233 


  233 


  233 
  233 








  230 


    2 
    1 


  232 
  233 









































  233 









   38 
    3 


   42 









  236 






  237 










    6 




    8 


















    8 
    2 



    8 
    8 


    8 












    8 
    1 
    1 







    6 


    3 
    1 

    3 










    3 







  136 
















  137 




    6 























































    2 

    2 
    2 




    2 















































































































































































































    8 
















    3 


    3 







    6 



    1 
    1 
    1 













    6 
    6 
    6 






    2 


    2 





    6 

































    8 

    1 



    1 



    8 







































    8 
    8 





    6 







    8 



    8 
    3 








    6 

    6 



    6 

    6 










    8 

    6 



    8 
    6 




    8 


    8 
    8 


    8 


    8 





    6 





    6 

    6 






    3 

    3 
    3 
























  232 

    1 


    4 

















    1 

    1 


















































































    6 

    6 























    6 


    6 







    6 
    6 






















































    1 

























































































































































































































































































































































  148 
  148 


  148 


























  148 





  148 



  147 

















    7 
    7 

































    7 




    7 









    2 








































































   15 




   15 
    2 
    2 


   13 



   14 

   14 
    1 
   14 


































   77 





   77 



   71 

   68 

   68 








   13 



   71 

   71 


   54 


   54 

   57 




    5 












  232 
  233 

  233 






  233 

  232 

  231 




  233 


  233 
  233 

  233 
















  233 

















  232 

  233 


































    4 

  233 





  233 

  233 





    4 














  233 

























  233 










  233 









  231 

  233 
  232 




  232 




  233 







  233 
  130 
  233 




  233 



  231 





  224 









   10 






  232 









  233 



  232 
  232 









  232 
   22 
   22 




   22 
















  233 

    2 
  233 





  233 







  233 

  230 
    3 
    2 




  233 


  233 





  232 
    2 
    2 







  233 










  233 




















  233 

































  233 








  233 





  233 

  131 




  131 
    8 

  123 
  131 








  233 







  232 
  231 

    1 
  232 















  233 


   26 

   14 

    1 
    1 





  233 

  233 


    4 
    2 

  233 
  232 








    2 


    2 



    2 


    2 








    2 






























































    1 



    1 















    1 
    1 


    1 
    1 



    1 




    1 
    1 



















































  230 

  229 





  233 

   78 

  233 

   76 

    3 



    3 


   54 






   59 
  233 





   38 
   37 
    3 
    3 

   38 



  233 






   10 
  233 




  233 














  233 





  233 



  233 






  231 



  233 












  233 
   15 

























  232 
   29 
    2 
    2 







  232 









  233 







    8 





















    8 

    2 



    2 


   10 
   10 



   10 
    1 

    1 

    1 


    1 


    1 





    3 


























  234 






  234 





  234 
   18 
    6 






    2 

    2 













    4 




    3 







  234 





  234 
  234 
   39 
















    2 



    2 



    2 




    2 


    2 

    2 












  233 






  234 
  230 



  234 
    1 


    2 













  232 
    2 

    2 



    2 




    2 



  234 
    1 
    1 

  234 
    2 
    1 
    1 


  234 
    1 
    1 


  234 
    1 





  233 



  234 


  234 








    5 
    1 






    1 

    1 
    1 

    1 












  234 
  234 



























  234 




  234 






  233 






  233 


  233 













  234 
  233 


  233 
    7 









  233 


  234 






   43 





























   43 

   43 



   43 

   42 















































































































































    3 














    3 


    3 




    3 







    3 






    3 

    1 


    2 





    1 
















































































    1 

















































































































































































    6 

    6 

    6 
    6 



































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
// SPDX-License-Identifier: GPL-2.0
/*
 * USB hub driver.
 *
 * (C) Copyright 1999 Linus Torvalds
 * (C) Copyright 1999 Johannes Erdfelt
 * (C) Copyright 1999 Gregory P. Smith
 * (C) Copyright 2001 Brad Hards (bhards@bigpond.net.au)
 *
 * Released under the GPLv2 only.
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/completion.h>
#include <linux/sched/mm.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/kcov.h>
#include <linux/ioctl.h>
#include <linux/usb.h>
#include <linux/usbdevice_fs.h>
#include <linux/usb/hcd.h>
#include <linux/usb/onboard_dev.h>
#include <linux/usb/otg.h>
#include <linux/usb/quirks.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/pm_qos.h>
#include <linux/kobject.h>

#include <linux/bitfield.h>
#include <linux/uaccess.h>
#include <asm/byteorder.h>

#include "hub.h"
#include "phy.h"
#include "otg_productlist.h"

#define USB_VENDOR_GENESYS_LOGIC                0x05e3
#define USB_VENDOR_SMSC                                0x0424
#define USB_PRODUCT_USB5534B                        0x5534
#define USB_VENDOR_CYPRESS                        0x04b4
#define USB_PRODUCT_CY7C65632                        0x6570
#define USB_VENDOR_TEXAS_INSTRUMENTS                0x0451
#define USB_PRODUCT_TUSB8041_USB3                0x8140
#define USB_PRODUCT_TUSB8041_USB2                0x8142
#define USB_VENDOR_MICROCHIP                        0x0424
#define USB_PRODUCT_USB4913                        0x4913
#define USB_PRODUCT_USB4914                        0x4914
#define USB_PRODUCT_USB4915                        0x4915
#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND        BIT(0)
#define HUB_QUIRK_DISABLE_AUTOSUSPEND                BIT(1)
#define HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL        BIT(2)

#define USB_TP_TRANSMISSION_DELAY        40        /* ns */
#define USB_TP_TRANSMISSION_DELAY_MAX        65535        /* ns */
#define USB_PING_RESPONSE_TIME                400        /* ns */
#define USB_REDUCE_FRAME_INTR_BINTERVAL        9

/*
 * The SET_ADDRESS request timeout will be 500 ms when
 * USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT quirk flag is set.
 */
#define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT        500  /* ms */

/* Protect struct usb_device->state and ->children members
 * Note: Both are also protected by ->dev.sem, except that ->state can
 * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
static DEFINE_SPINLOCK(device_state_lock);

/* workqueue to process hub events */
static struct workqueue_struct *hub_wq;
static void hub_event(struct work_struct *work);

/* synchronize hub-port add/remove and peering operations */
DEFINE_MUTEX(usb_port_peer_mutex);

/* cycle leds on hubs that aren't blinking for attention */
static bool blinkenlights;
module_param(blinkenlights, bool, S_IRUGO);
MODULE_PARM_DESC(blinkenlights, "true to cycle leds on hubs");

/*
 * Device SATA8000 FW1.0 from DATAST0R Technology Corp requires about
 * 10 seconds to send reply for the initial 64-byte descriptor request.
 */
/* define initial 64-byte descriptor request timeout in milliseconds */
static int initial_descriptor_timeout = USB_CTRL_GET_TIMEOUT;
module_param(initial_descriptor_timeout, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(initial_descriptor_timeout,
                "initial 64-byte descriptor request timeout in milliseconds "
                "(default 5000 - 5.0 seconds)");

/*
 * As of 2.6.10 we introduce a new USB device initialization scheme which
 * closely resembles the way Windows works.  Hopefully it will be compatible
 * with a wider range of devices than the old scheme.  However some previously
 * working devices may start giving rise to "device not accepting address"
 * errors; if that happens the user can try the old scheme by adjusting the
 * following module parameters.
 *
 * For maximum flexibility there are two boolean parameters to control the
 * hub driver's behavior.  On the first initialization attempt, if the
 * "old_scheme_first" parameter is set then the old scheme will be used,
 * otherwise the new scheme is used.  If that fails and "use_both_schemes"
 * is set, then the driver will make another attempt, using the other scheme.
 */
static bool old_scheme_first;
module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(old_scheme_first,
                 "start with the old device initialization scheme");

static bool use_both_schemes = true;
module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(use_both_schemes,
                "try the other device initialization scheme if the "
                "first one fails");

/* Mutual exclusion for EHCI CF initialization.  This interferes with
 * port reset on some companion controllers.
 */
DECLARE_RWSEM(ehci_cf_port_reset_rwsem);
EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem);

#define HUB_DEBOUNCE_TIMEOUT        2000
#define HUB_DEBOUNCE_STEP          25
#define HUB_DEBOUNCE_STABLE         100

static int usb_reset_and_verify_device(struct usb_device *udev);
static int hub_port_disable(struct usb_hub *hub, int port1, int set_state);
static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
                u16 portstatus);

static inline char *portspeed(struct usb_hub *hub, int portstatus)
{
        if (hub_is_superspeedplus(hub->hdev))
                return "10.0 Gb/s";
        if (hub_is_superspeed(hub->hdev))
                return "5.0 Gb/s";
        if (portstatus & USB_PORT_STAT_HIGH_SPEED)
                return "480 Mb/s";
        else if (portstatus & USB_PORT_STAT_LOW_SPEED)
                return "1.5 Mb/s";
        else
                return "12 Mb/s";
}

/* Note that hdev or one of its children must be locked! */
struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev)
{
        if (!hdev || !hdev->actconfig || !hdev->maxchild)
                return NULL;
        return usb_get_intfdata(hdev->actconfig->interface[0]);
}

int usb_device_supports_lpm(struct usb_device *udev)
{
        /* Some devices have trouble with LPM */
        if (udev->quirks & USB_QUIRK_NO_LPM)
                return 0;

        /* Skip if the device BOS descriptor couldn't be read */
        if (!udev->bos)
                return 0;

        /* USB 2.1 (and greater) devices indicate LPM support through
         * their USB 2.0 Extended Capabilities BOS descriptor.
         */
        if (udev->speed == USB_SPEED_HIGH || udev->speed == USB_SPEED_FULL) {
                if (udev->bos->ext_cap &&
                        (USB_LPM_SUPPORT &
                         le32_to_cpu(udev->bos->ext_cap->bmAttributes)))
                        return 1;
                return 0;
        }

        /*
         * According to the USB 3.0 spec, all USB 3.0 devices must support LPM.
         * However, there are some that don't, and they set the U1/U2 exit
         * latencies to zero.
         */
        if (!udev->bos->ss_cap) {
                dev_info(&udev->dev, "No LPM exit latency info found, disabling LPM.\n");
                return 0;
        }

        if (udev->bos->ss_cap->bU1devExitLat == 0 &&
                        udev->bos->ss_cap->bU2DevExitLat == 0) {
                if (udev->parent)
                        dev_info(&udev->dev, "LPM exit latency is zeroed, disabling LPM.\n");
                else
                        dev_info(&udev->dev, "We don't know the algorithms for LPM for this host, disabling LPM.\n");
                return 0;
        }

        if (!udev->parent || udev->parent->lpm_capable)
                return 1;
        return 0;
}

/*
 * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from
 * U1/U2, send a PING to the device and receive a PING_RESPONSE.
 * See USB 3.1 section C.1.5.2
 */
static void usb_set_lpm_mel(struct usb_device *udev,
                struct usb3_lpm_parameters *udev_lpm_params,
                unsigned int udev_exit_latency,
                struct usb_hub *hub,
                struct usb3_lpm_parameters *hub_lpm_params,
                unsigned int hub_exit_latency)
{
        unsigned int total_mel;

        /*
         * tMEL1. time to transition path from host to device into U0.
         * MEL for parent already contains the delay up to parent, so only add
         * the exit latency for the last link (pick the slower exit latency),
         * and the hub header decode latency. See USB 3.1 section C 2.2.1
         * Store MEL in nanoseconds
         */
        total_mel = hub_lpm_params->mel +
                max(udev_exit_latency, hub_exit_latency) * 1000 +
                hub->descriptor->u.ss.bHubHdrDecLat * 100;

        /*
         * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for
         * each link + wHubDelay for each hub. Add only for last link.
         * tMEL4, the time for PING_RESPONSE to traverse upstream is similar.
         * Multiply by 2 to include it as well.
         */
        total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) +
                      USB_TP_TRANSMISSION_DELAY) * 2;

        /*
         * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE
         * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4
         * to cover the delay if the PING_RESPONSE is queued behind a Max Packet
         * Size DP.
         * Note these delays should be added only once for the entire path, so
         * add them to the MEL of the device connected to the roothub.
         */
        if (!hub->hdev->parent)
                total_mel += USB_PING_RESPONSE_TIME + 2100;

        udev_lpm_params->mel = total_mel;
}

/*
 * Set the maximum Device to Host Exit Latency (PEL) for the device to initiate
 * a transition from either U1 or U2.
 */
static void usb_set_lpm_pel(struct usb_device *udev,
                struct usb3_lpm_parameters *udev_lpm_params,
                unsigned int udev_exit_latency,
                struct usb_hub *hub,
                struct usb3_lpm_parameters *hub_lpm_params,
                unsigned int hub_exit_latency,
                unsigned int port_to_port_exit_latency)
{
        unsigned int first_link_pel;
        unsigned int hub_pel;

        /*
         * First, the device sends an LFPS to transition the link between the
         * device and the parent hub into U0.  The exit latency is the bigger of
         * the device exit latency or the hub exit latency.
         */
        if (udev_exit_latency > hub_exit_latency)
                first_link_pel = udev_exit_latency * 1000;
        else
                first_link_pel = hub_exit_latency * 1000;

        /*
         * When the hub starts to receive the LFPS, there is a slight delay for
         * it to figure out that one of the ports is sending an LFPS.  Then it
         * will forward the LFPS to its upstream link.  The exit latency is the
         * delay, plus the PEL that we calculated for this hub.
         */
        hub_pel = port_to_port_exit_latency * 1000 + hub_lpm_params->pel;

        /*
         * According to figure C-7 in the USB 3.0 spec, the PEL for this device
         * is the greater of the two exit latencies.
         */
        if (first_link_pel > hub_pel)
                udev_lpm_params->pel = first_link_pel;
        else
                udev_lpm_params->pel = hub_pel;
}

/*
 * Set the System Exit Latency (SEL) to indicate the total worst-case time from
 * when a device initiates a transition to U0, until when it will receive the
 * first packet from the host controller.
 *
 * Section C.1.5.1 describes the four components to this:
 *  - t1: device PEL
 *  - t2: time for the ERDY to make it from the device to the host.
 *  - t3: a host-specific delay to process the ERDY.
 *  - t4: time for the packet to make it from the host to the device.
 *
 * t3 is specific to both the xHCI host and the platform the host is integrated
 * into.  The Intel HW folks have said it's negligible, FIXME if a different
 * vendor says otherwise.
 */
static void usb_set_lpm_sel(struct usb_device *udev,
                struct usb3_lpm_parameters *udev_lpm_params)
{
        struct usb_device *parent;
        unsigned int num_hubs;
        unsigned int total_sel;

        /* t1 = device PEL */
        total_sel = udev_lpm_params->pel;
        /* How many external hubs are in between the device & the root port. */
        for (parent = udev->parent, num_hubs = 0; parent->parent;
                        parent = parent->parent)
                num_hubs++;
        /* t2 = 2.1us + 250ns * (num_hubs - 1) */
        if (num_hubs > 0)
                total_sel += 2100 + 250 * (num_hubs - 1);

        /* t4 = 250ns * num_hubs */
        total_sel += 250 * num_hubs;

        udev_lpm_params->sel = total_sel;
}

static void usb_set_lpm_parameters(struct usb_device *udev)
{
        struct usb_hub *hub;
        unsigned int port_to_port_delay;
        unsigned int udev_u1_del;
        unsigned int udev_u2_del;
        unsigned int hub_u1_del;
        unsigned int hub_u2_del;

        if (!udev->lpm_capable || udev->speed < USB_SPEED_SUPER)
                return;

        /* Skip if the device BOS descriptor couldn't be read */
        if (!udev->bos)
                return;

        hub = usb_hub_to_struct_hub(udev->parent);
        /* It doesn't take time to transition the roothub into U0, since it
         * doesn't have an upstream link.
         */
        if (!hub)
                return;

        udev_u1_del = udev->bos->ss_cap->bU1devExitLat;
        udev_u2_del = le16_to_cpu(udev->bos->ss_cap->bU2DevExitLat);
        hub_u1_del = udev->parent->bos->ss_cap->bU1devExitLat;
        hub_u2_del = le16_to_cpu(udev->parent->bos->ss_cap->bU2DevExitLat);

        usb_set_lpm_mel(udev, &udev->u1_params, udev_u1_del,
                        hub, &udev->parent->u1_params, hub_u1_del);

        usb_set_lpm_mel(udev, &udev->u2_params, udev_u2_del,
                        hub, &udev->parent->u2_params, hub_u2_del);

        /*
         * Appendix C, section C.2.2.2, says that there is a slight delay from
         * when the parent hub notices the downstream port is trying to
         * transition to U0 to when the hub initiates a U0 transition on its
         * upstream port.  The section says the delays are tPort2PortU1EL and
         * tPort2PortU2EL, but it doesn't define what they are.
         *
         * The hub chapter, sections 10.4.2.4 and 10.4.2.5 seem to be talking
         * about the same delays.  Use the maximum delay calculations from those
         * sections.  For U1, it's tHubPort2PortExitLat, which is 1us max.  For
         * U2, it's tHubPort2PortExitLat + U2DevExitLat - U1DevExitLat.  I
         * assume the device exit latencies they are talking about are the hub
         * exit latencies.
         *
         * What do we do if the U2 exit latency is less than the U1 exit
         * latency?  It's possible, although not likely...
         */
        port_to_port_delay = 1;

        usb_set_lpm_pel(udev, &udev->u1_params, udev_u1_del,
                        hub, &udev->parent->u1_params, hub_u1_del,
                        port_to_port_delay);

        if (hub_u2_del > hub_u1_del)
                port_to_port_delay = 1 + hub_u2_del - hub_u1_del;
        else
                port_to_port_delay = 1 + hub_u1_del;

        usb_set_lpm_pel(udev, &udev->u2_params, udev_u2_del,
                        hub, &udev->parent->u2_params, hub_u2_del,
                        port_to_port_delay);

        /* Now that we've got PEL, calculate SEL. */
        usb_set_lpm_sel(udev, &udev->u1_params);
        usb_set_lpm_sel(udev, &udev->u2_params);
}

/* USB 2.0 spec Section 11.24.4.5 */
static int get_hub_descriptor(struct usb_device *hdev,
                struct usb_hub_descriptor *desc)
{
        int i, ret, size;
        unsigned dtype;

        if (hub_is_superspeed(hdev)) {
                dtype = USB_DT_SS_HUB;
                size = USB_DT_SS_HUB_SIZE;
        } else {
                dtype = USB_DT_HUB;
                size = sizeof(struct usb_hub_descriptor);
        }

        for (i = 0; i < 3; i++) {
                ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0),
                        USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB,
                        dtype << 8, 0, desc, size,
                        USB_CTRL_GET_TIMEOUT);
                if (hub_is_superspeed(hdev)) {
                        if (ret == size)
                                return ret;
                } else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) {
                        /* Make sure we have the DeviceRemovable field. */
                        size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1;
                        if (ret < size)
                                return -EMSGSIZE;
                        return ret;
                }
        }
        return -EINVAL;
}

/*
 * USB 2.0 spec Section 11.24.2.1
 */
static int clear_hub_feature(struct usb_device *hdev, int feature)
{
        return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                USB_REQ_CLEAR_FEATURE, USB_RT_HUB, feature, 0, NULL, 0, 1000);
}

/*
 * USB 2.0 spec Section 11.24.2.2
 */
int usb_clear_port_feature(struct usb_device *hdev, int port1, int feature)
{
        return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                USB_REQ_CLEAR_FEATURE, USB_RT_PORT, feature, port1,
                NULL, 0, 1000);
}

/*
 * USB 2.0 spec Section 11.24.2.13
 */
static int set_port_feature(struct usb_device *hdev, int port1, int feature)
{
        return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                USB_REQ_SET_FEATURE, USB_RT_PORT, feature, port1,
                NULL, 0, 1000);
}

static char *to_led_name(int selector)
{
        switch (selector) {
        case HUB_LED_AMBER:
                return "amber";
        case HUB_LED_GREEN:
                return "green";
        case HUB_LED_OFF:
                return "off";
        case HUB_LED_AUTO:
                return "auto";
        default:
                return "??";
        }
}

/*
 * USB 2.0 spec Section 11.24.2.7.1.10 and table 11-7
 * for info about using port indicators
 */
static void set_port_led(struct usb_hub *hub, int port1, int selector)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];
        int status;

        status = set_port_feature(hub->hdev, (selector << 8) | port1,
                        USB_PORT_FEAT_INDICATOR);
        dev_dbg(&port_dev->dev, "indicator %s status %d\n",
                to_led_name(selector), status);
}

#define        LED_CYCLE_PERIOD        ((2*HZ)/3)

static void led_work(struct work_struct *work)
{
        struct usb_hub                *hub =
                container_of(work, struct usb_hub, leds.work);
        struct usb_device        *hdev = hub->hdev;
        unsigned                i;
        unsigned                changed = 0;
        int                        cursor = -1;

        if (hdev->state != USB_STATE_CONFIGURED || hub->quiescing)
                return;

        for (i = 0; i < hdev->maxchild; i++) {
                unsigned        selector, mode;

                /* 30%-50% duty cycle */

                switch (hub->indicator[i]) {
                /* cycle marker */
                case INDICATOR_CYCLE:
                        cursor = i;
                        selector = HUB_LED_AUTO;
                        mode = INDICATOR_AUTO;
                        break;
                /* blinking green = sw attention */
                case INDICATOR_GREEN_BLINK:
                        selector = HUB_LED_GREEN;
                        mode = INDICATOR_GREEN_BLINK_OFF;
                        break;
                case INDICATOR_GREEN_BLINK_OFF:
                        selector = HUB_LED_OFF;
                        mode = INDICATOR_GREEN_BLINK;
                        break;
                /* blinking amber = hw attention */
                case INDICATOR_AMBER_BLINK:
                        selector = HUB_LED_AMBER;
                        mode = INDICATOR_AMBER_BLINK_OFF;
                        break;
                case INDICATOR_AMBER_BLINK_OFF:
                        selector = HUB_LED_OFF;
                        mode = INDICATOR_AMBER_BLINK;
                        break;
                /* blink green/amber = reserved */
                case INDICATOR_ALT_BLINK:
                        selector = HUB_LED_GREEN;
                        mode = INDICATOR_ALT_BLINK_OFF;
                        break;
                case INDICATOR_ALT_BLINK_OFF:
                        selector = HUB_LED_AMBER;
                        mode = INDICATOR_ALT_BLINK;
                        break;
                default:
                        continue;
                }
                if (selector != HUB_LED_AUTO)
                        changed = 1;
                set_port_led(hub, i + 1, selector);
                hub->indicator[i] = mode;
        }
        if (!changed && blinkenlights) {
                cursor++;
                cursor %= hdev->maxchild;
                set_port_led(hub, cursor + 1, HUB_LED_GREEN);
                hub->indicator[cursor] = INDICATOR_CYCLE;
                changed++;
        }
        if (changed)
                queue_delayed_work(system_power_efficient_wq,
                                &hub->leds, LED_CYCLE_PERIOD);
}

/* use a short timeout for hub/port status fetches */
#define        USB_STS_TIMEOUT                1000
#define        USB_STS_RETRIES                5

/*
 * USB 2.0 spec Section 11.24.2.6
 */
static int get_hub_status(struct usb_device *hdev,
                struct usb_hub_status *data)
{
        int i, status = -ETIMEDOUT;

        for (i = 0; i < USB_STS_RETRIES &&
                        (status == -ETIMEDOUT || status == -EPIPE); i++) {
                status = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0),
                        USB_REQ_GET_STATUS, USB_DIR_IN | USB_RT_HUB, 0, 0,
                        data, sizeof(*data), USB_STS_TIMEOUT);
        }
        return status;
}

/*
 * USB 2.0 spec Section 11.24.2.7
 * USB 3.1 takes into use the wValue and wLength fields, spec Section 10.16.2.6
 */
static int get_port_status(struct usb_device *hdev, int port1,
                           void *data, u16 value, u16 length)
{
        int i, status = -ETIMEDOUT;

        for (i = 0; i < USB_STS_RETRIES &&
                        (status == -ETIMEDOUT || status == -EPIPE); i++) {
                status = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0),
                        USB_REQ_GET_STATUS, USB_DIR_IN | USB_RT_PORT, value,
                        port1, data, length, USB_STS_TIMEOUT);
        }
        return status;
}

static int hub_ext_port_status(struct usb_hub *hub, int port1, int type,
                               u16 *status, u16 *change, u32 *ext_status)
{
        int ret;
        int len = 4;

        if (type != HUB_PORT_STATUS)
                len = 8;

        mutex_lock(&hub->status_mutex);
        ret = get_port_status(hub->hdev, port1, &hub->status->port, type, len);
        if (ret < len) {
                if (ret != -ENODEV)
                        dev_err(hub->intfdev,
                                "%s failed (err = %d)\n", __func__, ret);
                if (ret >= 0)
                        ret = -EIO;
        } else {
                *status = le16_to_cpu(hub->status->port.wPortStatus);
                *change = le16_to_cpu(hub->status->port.wPortChange);
                if (type != HUB_PORT_STATUS && ext_status)
                        *ext_status = le32_to_cpu(
                                hub->status->port.dwExtPortStatus);
                ret = 0;
        }
        mutex_unlock(&hub->status_mutex);

        /*
         * There is no need to lock status_mutex here, because status_mutex
         * protects hub->status, and the phy driver only checks the port
         * status without changing the status.
         */
        if (!ret) {
                struct usb_device *hdev = hub->hdev;

                /*
                 * Only roothub will be notified of connection changes,
                 * since the USB PHY only cares about changes at the next
                 * level.
                 */
                if (is_root_hub(hdev)) {
                        struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
                        bool connect;
                        bool connect_change;

                        connect_change = *change & USB_PORT_STAT_C_CONNECTION;
                        connect = *status & USB_PORT_STAT_CONNECTION;
                        if (connect_change && connect)
                                usb_phy_roothub_notify_connect(hcd->phy_roothub, port1 - 1);
                        else if (connect_change)
                                usb_phy_roothub_notify_disconnect(hcd->phy_roothub, port1 - 1);
                }
        }

        return ret;
}

int usb_hub_port_status(struct usb_hub *hub, int port1,
                u16 *status, u16 *change)
{
        return hub_ext_port_status(hub, port1, HUB_PORT_STATUS,
                                   status, change, NULL);
}

static void hub_resubmit_irq_urb(struct usb_hub *hub)
{
        unsigned long flags;
        int status;

        spin_lock_irqsave(&hub->irq_urb_lock, flags);

        if (hub->quiescing) {
                spin_unlock_irqrestore(&hub->irq_urb_lock, flags);
                return;
        }

        status = usb_submit_urb(hub->urb, GFP_ATOMIC);
        if (status && status != -ENODEV && status != -EPERM &&
            status != -ESHUTDOWN) {
                dev_err(hub->intfdev, "resubmit --> %d\n", status);
                mod_timer(&hub->irq_urb_retry, jiffies + HZ);
        }

        spin_unlock_irqrestore(&hub->irq_urb_lock, flags);
}

static void hub_retry_irq_urb(struct timer_list *t)
{
        struct usb_hub *hub = from_timer(hub, t, irq_urb_retry);

        hub_resubmit_irq_urb(hub);
}


static void kick_hub_wq(struct usb_hub *hub)
{
        struct usb_interface *intf;

        if (hub->disconnected || work_pending(&hub->events))
                return;

        /*
         * Suppress autosuspend until the event is proceed.
         *
         * Be careful and make sure that the symmetric operation is
         * always called. We are here only when there is no pending
         * work for this hub. Therefore put the interface either when
         * the new work is called or when it is canceled.
         */
        intf = to_usb_interface(hub->intfdev);
        usb_autopm_get_interface_no_resume(intf);
        hub_get(hub);

        if (queue_work(hub_wq, &hub->events))
                return;

        /* the work has already been scheduled */
        usb_autopm_put_interface_async(intf);
        hub_put(hub);
}

void usb_kick_hub_wq(struct usb_device *hdev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);

        if (hub)
                kick_hub_wq(hub);
}

/*
 * Let the USB core know that a USB 3.0 device has sent a Function Wake Device
 * Notification, which indicates it had initiated remote wakeup.
 *
 * USB 3.0 hubs do not report the port link state change from U3 to U0 when the
 * device initiates resume, so the USB core will not receive notice of the
 * resume through the normal hub interrupt URB.
 */
void usb_wakeup_notification(struct usb_device *hdev,
                unsigned int portnum)
{
        struct usb_hub *hub;
        struct usb_port *port_dev;

        if (!hdev)
                return;

        hub = usb_hub_to_struct_hub(hdev);
        if (hub) {
                port_dev = hub->ports[portnum - 1];
                if (port_dev && port_dev->child)
                        pm_wakeup_event(&port_dev->child->dev, 0);

                set_bit(portnum, hub->wakeup_bits);
                kick_hub_wq(hub);
        }
}
EXPORT_SYMBOL_GPL(usb_wakeup_notification);

/* completion function, fires on port status changes and various faults */
static void hub_irq(struct urb *urb)
{
        struct usb_hub *hub = urb->context;
        int status = urb->status;
        unsigned i;
        unsigned long bits;

        switch (status) {
        case -ENOENT:                /* synchronous unlink */
        case -ECONNRESET:        /* async unlink */
        case -ESHUTDOWN:        /* hardware going away */
                return;

        default:                /* presumably an error */
                /* Cause a hub reset after 10 consecutive errors */
                dev_dbg(hub->intfdev, "transfer --> %d\n", status);
                if ((++hub->nerrors < 10) || hub->error)
                        goto resubmit;
                hub->error = status;
                fallthrough;

        /* let hub_wq handle things */
        case 0:                        /* we got data:  port status changed */
                bits = 0;
                for (i = 0; i < urb->actual_length; ++i)
                        bits |= ((unsigned long) ((*hub->buffer)[i]))
                                        << (i*8);
                hub->event_bits[0] = bits;
                break;
        }

        hub->nerrors = 0;

        /* Something happened, let hub_wq figure it out */
        kick_hub_wq(hub);

resubmit:
        hub_resubmit_irq_urb(hub);
}

/* USB 2.0 spec Section 11.24.2.3 */
static inline int
hub_clear_tt_buffer(struct usb_device *hdev, u16 devinfo, u16 tt)
{
        /* Need to clear both directions for control ep */
        if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) ==
                        USB_ENDPOINT_XFER_CONTROL) {
                int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                                HUB_CLEAR_TT_BUFFER, USB_RT_PORT,
                                devinfo ^ 0x8000, tt, NULL, 0, 1000);
                if (status)
                        return status;
        }
        return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                               HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo,
                               tt, NULL, 0, 1000);
}

/*
 * enumeration blocks hub_wq for a long time. we use keventd instead, since
 * long blocking there is the exception, not the rule.  accordingly, HCDs
 * talking to TTs must queue control transfers (not just bulk and iso), so
 * both can talk to the same hub concurrently.
 */
static void hub_tt_work(struct work_struct *work)
{
        struct usb_hub                *hub =
                container_of(work, struct usb_hub, tt.clear_work);
        unsigned long                flags;

        spin_lock_irqsave(&hub->tt.lock, flags);
        while (!list_empty(&hub->tt.clear_list)) {
                struct list_head        *next;
                struct usb_tt_clear        *clear;
                struct usb_device        *hdev = hub->hdev;
                const struct hc_driver        *drv;
                int                        status;

                next = hub->tt.clear_list.next;
                clear = list_entry(next, struct usb_tt_clear, clear_list);
                list_del(&clear->clear_list);

                /* drop lock so HCD can concurrently report other TT errors */
                spin_unlock_irqrestore(&hub->tt.lock, flags);
                status = hub_clear_tt_buffer(hdev, clear->devinfo, clear->tt);
                if (status && status != -ENODEV)
                        dev_err(&hdev->dev,
                                "clear tt %d (%04x) error %d\n",
                                clear->tt, clear->devinfo, status);

                /* Tell the HCD, even if the operation failed */
                drv = clear->hcd->driver;
                if (drv->clear_tt_buffer_complete)
                        (drv->clear_tt_buffer_complete)(clear->hcd, clear->ep);

                kfree(clear);
                spin_lock_irqsave(&hub->tt.lock, flags);
        }
        spin_unlock_irqrestore(&hub->tt.lock, flags);
}

/**
 * usb_hub_set_port_power - control hub port's power state
 * @hdev: USB device belonging to the usb hub
 * @hub: target hub
 * @port1: port index
 * @set: expected status
 *
 * call this function to control port's power via setting or
 * clearing the port's PORT_POWER feature.
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub,
                           int port1, bool set)
{
        int ret;

        if (set)
                ret = set_port_feature(hdev, port1, USB_PORT_FEAT_POWER);
        else
                ret = usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_POWER);

        if (ret)
                return ret;

        if (set)
                set_bit(port1, hub->power_bits);
        else
                clear_bit(port1, hub->power_bits);
        return 0;
}

/**
 * usb_hub_clear_tt_buffer - clear control/bulk TT state in high speed hub
 * @urb: an URB associated with the failed or incomplete split transaction
 *
 * High speed HCDs use this to tell the hub driver that some split control or
 * bulk transaction failed in a way that requires clearing internal state of
 * a transaction translator.  This is normally detected (and reported) from
 * interrupt context.
 *
 * It may not be possible for that hub to handle additional full (or low)
 * speed transactions until that state is fully cleared out.
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
int usb_hub_clear_tt_buffer(struct urb *urb)
{
        struct usb_device        *udev = urb->dev;
        int                        pipe = urb->pipe;
        struct usb_tt                *tt = udev->tt;
        unsigned long                flags;
        struct usb_tt_clear        *clear;

        /* we've got to cope with an arbitrary number of pending TT clears,
         * since each TT has "at least two" buffers that can need it (and
         * there can be many TTs per hub).  even if they're uncommon.
         */
        clear = kmalloc(sizeof *clear, GFP_ATOMIC);
        if (clear == NULL) {
                dev_err(&udev->dev, "can't save CLEAR_TT_BUFFER state\n");
                /* FIXME recover somehow ... RESET_TT? */
                return -ENOMEM;
        }

        /* info that CLEAR_TT_BUFFER needs */
        clear->tt = tt->multi ? udev->ttport : 1;
        clear->devinfo = usb_pipeendpoint (pipe);
        clear->devinfo |= ((u16)udev->devaddr) << 4;
        clear->devinfo |= usb_pipecontrol(pipe)
                        ? (USB_ENDPOINT_XFER_CONTROL << 11)
                        : (USB_ENDPOINT_XFER_BULK << 11);
        if (usb_pipein(pipe))
                clear->devinfo |= 1 << 15;

        /* info for completion callback */
        clear->hcd = bus_to_hcd(udev->bus);
        clear->ep = urb->ep;

        /* tell keventd to clear state for this TT */
        spin_lock_irqsave(&tt->lock, flags);
        list_add_tail(&clear->clear_list, &tt->clear_list);
        schedule_work(&tt->clear_work);
        spin_unlock_irqrestore(&tt->lock, flags);
        return 0;
}
EXPORT_SYMBOL_GPL(usb_hub_clear_tt_buffer);

static void hub_power_on(struct usb_hub *hub, bool do_delay)
{
        int port1;

        /* Enable power on each port.  Some hubs have reserved values
         * of LPSM (> 2) in their descriptors, even though they are
         * USB 2.0 hubs.  Some hubs do not implement port-power switching
         * but only emulate it.  In all cases, the ports won't work
         * unless we send these messages to the hub.
         */
        if (hub_is_port_power_switchable(hub))
                dev_dbg(hub->intfdev, "enabling power on all ports\n");
        else
                dev_dbg(hub->intfdev, "trying to enable port power on "
                                "non-switchable hub\n");
        for (port1 = 1; port1 <= hub->hdev->maxchild; port1++)
                if (test_bit(port1, hub->power_bits))
                        set_port_feature(hub->hdev, port1, USB_PORT_FEAT_POWER);
                else
                        usb_clear_port_feature(hub->hdev, port1,
                                                USB_PORT_FEAT_POWER);
        if (do_delay)
                msleep(hub_power_on_good_delay(hub));
}

static int hub_hub_status(struct usb_hub *hub,
                u16 *status, u16 *change)
{
        int ret;

        mutex_lock(&hub->status_mutex);
        ret = get_hub_status(hub->hdev, &hub->status->hub);
        if (ret < 0) {
                if (ret != -ENODEV)
                        dev_err(hub->intfdev,
                                "%s failed (err = %d)\n", __func__, ret);
        } else {
                *status = le16_to_cpu(hub->status->hub.wHubStatus);
                *change = le16_to_cpu(hub->status->hub.wHubChange);
                ret = 0;
        }
        mutex_unlock(&hub->status_mutex);
        return ret;
}

static int hub_set_port_link_state(struct usb_hub *hub, int port1,
                        unsigned int link_status)
{
        return set_port_feature(hub->hdev,
                        port1 | (link_status << 3),
                        USB_PORT_FEAT_LINK_STATE);
}

/*
 * Disable a port and mark a logical connect-change event, so that some
 * time later hub_wq will disconnect() any existing usb_device on the port
 * and will re-enumerate if there actually is a device attached.
 */
static void hub_port_logical_disconnect(struct usb_hub *hub, int port1)
{
        dev_dbg(&hub->ports[port1 - 1]->dev, "logical disconnect\n");
        hub_port_disable(hub, port1, 1);

        /* FIXME let caller ask to power down the port:
         *  - some devices won't enumerate without a VBUS power cycle
         *  - SRP saves power that way
         *  - ... new call, TBD ...
         * That's easy if this hub can switch power per-port, and
         * hub_wq reactivates the port later (timer, SRP, etc).
         * Powerdown must be optional, because of reset/DFU.
         */

        set_bit(port1, hub->change_bits);
        kick_hub_wq(hub);
}

/**
 * usb_remove_device - disable a device's port on its parent hub
 * @udev: device to be disabled and removed
 * Context: @udev locked, must be able to sleep.
 *
 * After @udev's port has been disabled, hub_wq is notified and it will
 * see that the device has been disconnected.  When the device is
 * physically unplugged and something is plugged in, the events will
 * be received and processed normally.
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
int usb_remove_device(struct usb_device *udev)
{
        struct usb_hub *hub;
        struct usb_interface *intf;
        int ret;

        if (!udev->parent)        /* Can't remove a root hub */
                return -EINVAL;
        hub = usb_hub_to_struct_hub(udev->parent);
        intf = to_usb_interface(hub->intfdev);

        ret = usb_autopm_get_interface(intf);
        if (ret < 0)
                return ret;

        set_bit(udev->portnum, hub->removed_bits);
        hub_port_logical_disconnect(hub, udev->portnum);
        usb_autopm_put_interface(intf);
        return 0;
}

enum hub_activation_type {
        HUB_INIT, HUB_INIT2, HUB_INIT3,                /* INITs must come first */
        HUB_POST_RESET, HUB_RESUME, HUB_RESET_RESUME,
};

static void hub_init_func2(struct work_struct *ws);
static void hub_init_func3(struct work_struct *ws);

static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
{
        struct usb_device *hdev = hub->hdev;
        struct usb_hcd *hcd;
        int ret;
        int port1;
        int status;
        bool need_debounce_delay = false;
        unsigned delay;

        /* Continue a partial initialization */
        if (type == HUB_INIT2 || type == HUB_INIT3) {
                device_lock(&hdev->dev);

                /* Was the hub disconnected while we were waiting? */
                if (hub->disconnected)
                        goto disconnected;
                if (type == HUB_INIT2)
                        goto init2;
                goto init3;
        }
        hub_get(hub);

        /* The superspeed hub except for root hub has to use Hub Depth
         * value as an offset into the route string to locate the bits
         * it uses to determine the downstream port number. So hub driver
         * should send a set hub depth request to superspeed hub after
         * the superspeed hub is set configuration in initialization or
         * reset procedure.
         *
         * After a resume, port power should still be on.
         * For any other type of activation, turn it on.
         */
        if (type != HUB_RESUME) {
                if (hdev->parent && hub_is_superspeed(hdev)) {
                        ret = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                                        HUB_SET_DEPTH, USB_RT_HUB,
                                        hdev->level - 1, 0, NULL, 0,
                                        USB_CTRL_SET_TIMEOUT);
                        if (ret < 0)
                                dev_err(hub->intfdev,
                                                "set hub depth failed\n");
                }

                /* Speed up system boot by using a delayed_work for the
                 * hub's initial power-up delays.  This is pretty awkward
                 * and the implementation looks like a home-brewed sort of
                 * setjmp/longjmp, but it saves at least 100 ms for each
                 * root hub (assuming usbcore is compiled into the kernel
                 * rather than as a module).  It adds up.
                 *
                 * This can't be done for HUB_RESUME or HUB_RESET_RESUME
                 * because for those activation types the ports have to be
                 * operational when we return.  In theory this could be done
                 * for HUB_POST_RESET, but it's easier not to.
                 */
                if (type == HUB_INIT) {
                        delay = hub_power_on_good_delay(hub);

                        hub_power_on(hub, false);
                        INIT_DELAYED_WORK(&hub->init_work, hub_init_func2);
                        queue_delayed_work(system_power_efficient_wq,
                                        &hub->init_work,
                                        msecs_to_jiffies(delay));

                        /* Suppress autosuspend until init is done */
                        usb_autopm_get_interface_no_resume(
                                        to_usb_interface(hub->intfdev));
                        return;                /* Continues at init2: below */
                } else if (type == HUB_RESET_RESUME) {
                        /* The internal host controller state for the hub device
                         * may be gone after a host power loss on system resume.
                         * Update the device's info so the HW knows it's a hub.
                         */
                        hcd = bus_to_hcd(hdev->bus);
                        if (hcd->driver->update_hub_device) {
                                ret = hcd->driver->update_hub_device(hcd, hdev,
                                                &hub->tt, GFP_NOIO);
                                if (ret < 0) {
                                        dev_err(hub->intfdev,
                                                "Host not accepting hub info update\n");
                                        dev_err(hub->intfdev,
                                                "LS/FS devices and hubs may not work under this hub\n");
                                }
                        }
                        hub_power_on(hub, true);
                } else {
                        hub_power_on(hub, true);
                }
        /* Give some time on remote wakeup to let links to transit to U0 */
        } else if (hub_is_superspeed(hub->hdev))
                msleep(20);

 init2:

        /*
         * Check each port and set hub->change_bits to let hub_wq know
         * which ports need attention.
         */
        for (port1 = 1; port1 <= hdev->maxchild; ++port1) {
                struct usb_port *port_dev = hub->ports[port1 - 1];
                struct usb_device *udev = port_dev->child;
                u16 portstatus, portchange;

                portstatus = portchange = 0;
                status = usb_hub_port_status(hub, port1, &portstatus, &portchange);
                if (status)
                        goto abort;

                if (udev || (portstatus & USB_PORT_STAT_CONNECTION))
                        dev_dbg(&port_dev->dev, "status %04x change %04x\n",
                                        portstatus, portchange);

                /*
                 * After anything other than HUB_RESUME (i.e., initialization
                 * or any sort of reset), every port should be disabled.
                 * Unconnected ports should likewise be disabled (paranoia),
                 * and so should ports for which we have no usb_device.
                 */
                if ((portstatus & USB_PORT_STAT_ENABLE) && (
                                type != HUB_RESUME ||
                                !(portstatus & USB_PORT_STAT_CONNECTION) ||
                                !udev ||
                                udev->state == USB_STATE_NOTATTACHED)) {
                        /*
                         * USB3 protocol ports will automatically transition
                         * to Enabled state when detect an USB3.0 device attach.
                         * Do not disable USB3 protocol ports, just pretend
                         * power was lost
                         */
                        portstatus &= ~USB_PORT_STAT_ENABLE;
                        if (!hub_is_superspeed(hdev))
                                usb_clear_port_feature(hdev, port1,
                                                   USB_PORT_FEAT_ENABLE);
                }

                /* Make sure a warm-reset request is handled by port_event */
                if (type == HUB_RESUME &&
                    hub_port_warm_reset_required(hub, port1, portstatus))
                        set_bit(port1, hub->event_bits);

                /*
                 * Add debounce if USB3 link is in polling/link training state.
                 * Link will automatically transition to Enabled state after
                 * link training completes.
                 */
                if (hub_is_superspeed(hdev) &&
                    ((portstatus & USB_PORT_STAT_LINK_STATE) ==
                                                USB_SS_PORT_LS_POLLING))
                        need_debounce_delay = true;

                /* Clear status-change flags; we'll debounce later */
                if (portchange & USB_PORT_STAT_C_CONNECTION) {
                        need_debounce_delay = true;
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_CONNECTION);
                }
                if (portchange & USB_PORT_STAT_C_ENABLE) {
                        need_debounce_delay = true;
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_ENABLE);
                }
                if (portchange & USB_PORT_STAT_C_RESET) {
                        need_debounce_delay = true;
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_RESET);
                }
                if ((portchange & USB_PORT_STAT_C_BH_RESET) &&
                                hub_is_superspeed(hub->hdev)) {
                        need_debounce_delay = true;
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_BH_PORT_RESET);
                }
                /* We can forget about a "removed" device when there's a
                 * physical disconnect or the connect status changes.
                 */
                if (!(portstatus & USB_PORT_STAT_CONNECTION) ||
                                (portchange & USB_PORT_STAT_C_CONNECTION))
                        clear_bit(port1, hub->removed_bits);

                if (!udev || udev->state == USB_STATE_NOTATTACHED) {
                        /* Tell hub_wq to disconnect the device or
                         * check for a new connection or over current condition.
                         * Based on USB2.0 Spec Section 11.12.5,
                         * C_PORT_OVER_CURRENT could be set while
                         * PORT_OVER_CURRENT is not. So check for any of them.
                         */
                        if (udev || (portstatus & USB_PORT_STAT_CONNECTION) ||
                            (portchange & USB_PORT_STAT_C_CONNECTION) ||
                            (portstatus & USB_PORT_STAT_OVERCURRENT) ||
                            (portchange & USB_PORT_STAT_C_OVERCURRENT))
                                set_bit(port1, hub->change_bits);

                } else if (portstatus & USB_PORT_STAT_ENABLE) {
                        bool port_resumed = (portstatus &
                                        USB_PORT_STAT_LINK_STATE) ==
                                USB_SS_PORT_LS_U0;
                        /* The power session apparently survived the resume.
                         * If there was an overcurrent or suspend change
                         * (i.e., remote wakeup request), have hub_wq
                         * take care of it.  Look at the port link state
                         * for USB 3.0 hubs, since they don't have a suspend
                         * change bit, and they don't set the port link change
                         * bit on device-initiated resume.
                         */
                        if (portchange || (hub_is_superspeed(hub->hdev) &&
                                                port_resumed))
                                set_bit(port1, hub->event_bits);

                } else if (udev->persist_enabled) {
#ifdef CONFIG_PM
                        udev->reset_resume = 1;
#endif
                        /* Don't set the change_bits when the device
                         * was powered off.
                         */
                        if (test_bit(port1, hub->power_bits))
                                set_bit(port1, hub->change_bits);

                } else {
                        /* The power session is gone; tell hub_wq */
                        usb_set_device_state(udev, USB_STATE_NOTATTACHED);
                        set_bit(port1, hub->change_bits);
                }
        }

        /* If no port-status-change flags were set, we don't need any
         * debouncing.  If flags were set we can try to debounce the
         * ports all at once right now, instead of letting hub_wq do them
         * one at a time later on.
         *
         * If any port-status changes do occur during this delay, hub_wq
         * will see them later and handle them normally.
         */
        if (need_debounce_delay) {
                delay = HUB_DEBOUNCE_STABLE;

                /* Don't do a long sleep inside a workqueue routine */
                if (type == HUB_INIT2) {
                        INIT_DELAYED_WORK(&hub->init_work, hub_init_func3);
                        queue_delayed_work(system_power_efficient_wq,
                                        &hub->init_work,
                                        msecs_to_jiffies(delay));
                        device_unlock(&hdev->dev);
                        return;                /* Continues at init3: below */
                } else {
                        msleep(delay);
                }
        }
 init3:
        hub->quiescing = 0;

        status = usb_submit_urb(hub->urb, GFP_NOIO);
        if (status < 0)
                dev_err(hub->intfdev, "activate --> %d\n", status);
        if (hub->has_indicators && blinkenlights)
                queue_delayed_work(system_power_efficient_wq,
                                &hub->leds, LED_CYCLE_PERIOD);

        /* Scan all ports that need attention */
        kick_hub_wq(hub);
 abort:
        if (type == HUB_INIT2 || type == HUB_INIT3) {
                /* Allow autosuspend if it was suppressed */
 disconnected:
                usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
                device_unlock(&hdev->dev);
        }

        hub_put(hub);
}

/* Implement the continuations for the delays above */
static void hub_init_func2(struct work_struct *ws)
{
        struct usb_hub *hub = container_of(ws, struct usb_hub, init_work.work);

        hub_activate(hub, HUB_INIT2);
}

static void hub_init_func3(struct work_struct *ws)
{
        struct usb_hub *hub = container_of(ws, struct usb_hub, init_work.work);

        hub_activate(hub, HUB_INIT3);
}

enum hub_quiescing_type {
        HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND
};

static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
{
        struct usb_device *hdev = hub->hdev;
        unsigned long flags;
        int i;

        /* hub_wq and related activity won't re-trigger */
        spin_lock_irqsave(&hub->irq_urb_lock, flags);
        hub->quiescing = 1;
        spin_unlock_irqrestore(&hub->irq_urb_lock, flags);

        if (type != HUB_SUSPEND) {
                /* Disconnect all the children */
                for (i = 0; i < hdev->maxchild; ++i) {
                        if (hub->ports[i]->child)
                                usb_disconnect(&hub->ports[i]->child);
                }
        }

        /* Stop hub_wq and related activity */
        del_timer_sync(&hub->irq_urb_retry);
        usb_kill_urb(hub->urb);
        if (hub->has_indicators)
                cancel_delayed_work_sync(&hub->leds);
        if (hub->tt.hub)
                flush_work(&hub->tt.clear_work);
}

static void hub_pm_barrier_for_all_ports(struct usb_hub *hub)
{
        int i;

        for (i = 0; i < hub->hdev->maxchild; ++i)
                pm_runtime_barrier(&hub->ports[i]->dev);
}

/* caller has locked the hub device */
static int hub_pre_reset(struct usb_interface *intf)
{
        struct usb_hub *hub = usb_get_intfdata(intf);

        hub_quiesce(hub, HUB_PRE_RESET);
        hub->in_reset = 1;
        hub_pm_barrier_for_all_ports(hub);
        return 0;
}

/* caller has locked the hub device */
static int hub_post_reset(struct usb_interface *intf)
{
        struct usb_hub *hub = usb_get_intfdata(intf);

        hub->in_reset = 0;
        hub_pm_barrier_for_all_ports(hub);
        hub_activate(hub, HUB_POST_RESET);
        return 0;
}

static int hub_configure(struct usb_hub *hub,
        struct usb_endpoint_descriptor *endpoint)
{
        struct usb_hcd *hcd;
        struct usb_device *hdev = hub->hdev;
        struct device *hub_dev = hub->intfdev;
        u16 hubstatus, hubchange;
        u16 wHubCharacteristics;
        unsigned int pipe;
        int maxp, ret, i;
        char *message = "out of memory";
        unsigned unit_load;
        unsigned full_load;
        unsigned maxchild;

        hub->buffer = kmalloc(sizeof(*hub->buffer), GFP_KERNEL);
        if (!hub->buffer) {
                ret = -ENOMEM;
                goto fail;
        }

        hub->status = kmalloc(sizeof(*hub->status), GFP_KERNEL);
        if (!hub->status) {
                ret = -ENOMEM;
                goto fail;
        }
        mutex_init(&hub->status_mutex);

        hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL);
        if (!hub->descriptor) {
                ret = -ENOMEM;
                goto fail;
        }

        /* Request the entire hub descriptor.
         * hub->descriptor can handle USB_MAXCHILDREN ports,
         * but a (non-SS) hub can/will return fewer bytes here.
         */
        ret = get_hub_descriptor(hdev, hub->descriptor);
        if (ret < 0) {
                message = "can't read hub descriptor";
                goto fail;
        }

        maxchild = USB_MAXCHILDREN;
        if (hub_is_superspeed(hdev))
                maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS);

        if (hub->descriptor->bNbrPorts > maxchild) {
                message = "hub has too many ports!";
                ret = -ENODEV;
                goto fail;
        } else if (hub->descriptor->bNbrPorts == 0) {
                message = "hub doesn't have any ports!";
                ret = -ENODEV;
                goto fail;
        }

        /*
         * Accumulate wHubDelay + 40ns for every hub in the tree of devices.
         * The resulting value will be used for SetIsochDelay() request.
         */
        if (hub_is_superspeed(hdev) || hub_is_superspeedplus(hdev)) {
                u32 delay = __le16_to_cpu(hub->descriptor->u.ss.wHubDelay);

                if (hdev->parent)
                        delay += hdev->parent->hub_delay;

                delay += USB_TP_TRANSMISSION_DELAY;
                hdev->hub_delay = min_t(u32, delay, USB_TP_TRANSMISSION_DELAY_MAX);
        }

        maxchild = hub->descriptor->bNbrPorts;
        dev_info(hub_dev, "%d port%s detected\n", maxchild,
                        (maxchild == 1) ? "" : "s");

        hub->ports = kcalloc(maxchild, sizeof(struct usb_port *), GFP_KERNEL);
        if (!hub->ports) {
                ret = -ENOMEM;
                goto fail;
        }

        wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics);
        if (hub_is_superspeed(hdev)) {
                unit_load = 150;
                full_load = 900;
        } else {
                unit_load = 100;
                full_load = 500;
        }

        /* FIXME for USB 3.0, skip for now */
        if ((wHubCharacteristics & HUB_CHAR_COMPOUND) &&
                        !(hub_is_superspeed(hdev))) {
                char        portstr[USB_MAXCHILDREN + 1];

                for (i = 0; i < maxchild; i++)
                        portstr[i] = hub->descriptor->u.hs.DeviceRemovable
                                    [((i + 1) / 8)] & (1 << ((i + 1) % 8))
                                ? 'F' : 'R';
                portstr[maxchild] = 0;
                dev_dbg(hub_dev, "compound device; port removable status: %s\n", portstr);
        } else
                dev_dbg(hub_dev, "standalone hub\n");

        switch (wHubCharacteristics & HUB_CHAR_LPSM) {
        case HUB_CHAR_COMMON_LPSM:
                dev_dbg(hub_dev, "ganged power switching\n");
                break;
        case HUB_CHAR_INDV_PORT_LPSM:
                dev_dbg(hub_dev, "individual port power switching\n");
                break;
        case HUB_CHAR_NO_LPSM:
        case HUB_CHAR_LPSM:
                dev_dbg(hub_dev, "no power switching (usb 1.0)\n");
                break;
        }

        switch (wHubCharacteristics & HUB_CHAR_OCPM) {
        case HUB_CHAR_COMMON_OCPM:
                dev_dbg(hub_dev, "global over-current protection\n");
                break;
        case HUB_CHAR_INDV_PORT_OCPM:
                dev_dbg(hub_dev, "individual port over-current protection\n");
                break;
        case HUB_CHAR_NO_OCPM:
        case HUB_CHAR_OCPM:
                dev_dbg(hub_dev, "no over-current protection\n");
                break;
        }

        spin_lock_init(&hub->tt.lock);
        INIT_LIST_HEAD(&hub->tt.clear_list);
        INIT_WORK(&hub->tt.clear_work, hub_tt_work);
        switch (hdev->descriptor.bDeviceProtocol) {
        case USB_HUB_PR_FS:
                break;
        case USB_HUB_PR_HS_SINGLE_TT:
                dev_dbg(hub_dev, "Single TT\n");
                hub->tt.hub = hdev;
                break;
        case USB_HUB_PR_HS_MULTI_TT:
                ret = usb_set_interface(hdev, 0, 1);
                if (ret == 0) {
                        dev_dbg(hub_dev, "TT per port\n");
                        hub->tt.multi = 1;
                } else
                        dev_err(hub_dev, "Using single TT (err %d)\n",
                                ret);
                hub->tt.hub = hdev;
                break;
        case USB_HUB_PR_SS:
                /* USB 3.0 hubs don't have a TT */
                break;
        default:
                dev_dbg(hub_dev, "Unrecognized hub protocol %d\n",
                        hdev->descriptor.bDeviceProtocol);
                break;
        }

        /* Note 8 FS bit times == (8 bits / 12000000 bps) ~= 666ns */
        switch (wHubCharacteristics & HUB_CHAR_TTTT) {
        case HUB_TTTT_8_BITS:
                if (hdev->descriptor.bDeviceProtocol != 0) {
                        hub->tt.think_time = 666;
                        dev_dbg(hub_dev, "TT requires at most %d "
                                        "FS bit times (%d ns)\n",
                                8, hub->tt.think_time);
                }
                break;
        case HUB_TTTT_16_BITS:
                hub->tt.think_time = 666 * 2;
                dev_dbg(hub_dev, "TT requires at most %d "
                                "FS bit times (%d ns)\n",
                        16, hub->tt.think_time);
                break;
        case HUB_TTTT_24_BITS:
                hub->tt.think_time = 666 * 3;
                dev_dbg(hub_dev, "TT requires at most %d "
                                "FS bit times (%d ns)\n",
                        24, hub->tt.think_time);
                break;
        case HUB_TTTT_32_BITS:
                hub->tt.think_time = 666 * 4;
                dev_dbg(hub_dev, "TT requires at most %d "
                                "FS bit times (%d ns)\n",
                        32, hub->tt.think_time);
                break;
        }

        /* probe() zeroes hub->indicator[] */
        if (wHubCharacteristics & HUB_CHAR_PORTIND) {
                hub->has_indicators = 1;
                dev_dbg(hub_dev, "Port indicators are supported\n");
        }

        dev_dbg(hub_dev, "power on to power good time: %dms\n",
                hub->descriptor->bPwrOn2PwrGood * 2);

        /* power budgeting mostly matters with bus-powered hubs,
         * and battery-powered root hubs (may provide just 8 mA).
         */
        ret = usb_get_std_status(hdev, USB_RECIP_DEVICE, 0, &hubstatus);
        if (ret) {
                message = "can't get hub status";
                goto fail;
        }
        hcd = bus_to_hcd(hdev->bus);
        if (hdev == hdev->bus->root_hub) {
                if (hcd->power_budget > 0)
                        hdev->bus_mA = hcd->power_budget;
                else
                        hdev->bus_mA = full_load * maxchild;
                if (hdev->bus_mA >= full_load)
                        hub->mA_per_port = full_load;
                else {
                        hub->mA_per_port = hdev->bus_mA;
                        hub->limited_power = 1;
                }
        } else if ((hubstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0) {
                int remaining = hdev->bus_mA -
                        hub->descriptor->bHubContrCurrent;

                dev_dbg(hub_dev, "hub controller current requirement: %dmA\n",
                        hub->descriptor->bHubContrCurrent);
                hub->limited_power = 1;

                if (remaining < maxchild * unit_load)
                        dev_warn(hub_dev,
                                        "insufficient power available "
                                        "to use all downstream ports\n");
                hub->mA_per_port = unit_load;        /* 7.2.1 */

        } else {        /* Self-powered external hub */
                /* FIXME: What about battery-powered external hubs that
                 * provide less current per port? */
                hub->mA_per_port = full_load;
        }
        if (hub->mA_per_port < full_load)
                dev_dbg(hub_dev, "%umA bus power budget for each child\n",
                                hub->mA_per_port);

        ret = hub_hub_status(hub, &hubstatus, &hubchange);
        if (ret < 0) {
                message = "can't get hub status";
                goto fail;
        }

        /* local power status reports aren't always correct */
        if (hdev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_SELFPOWER)
                dev_dbg(hub_dev, "local power source is %s\n",
                        (hubstatus & HUB_STATUS_LOCAL_POWER)
                        ? "lost (inactive)" : "good");

        if ((wHubCharacteristics & HUB_CHAR_OCPM) == 0)
                dev_dbg(hub_dev, "%sover-current condition exists\n",
                        (hubstatus & HUB_STATUS_OVERCURRENT) ? "" : "no ");

        /* set up the interrupt endpoint
         * We use the EP's maxpacket size instead of (PORTS+1+7)/8
         * bytes as USB2.0[11.12.3] says because some hubs are known
         * to send more data (and thus cause overflow). For root hubs,
         * maxpktsize is defined in hcd.c's fake endpoint descriptors
         * to be big enough for at least USB_MAXCHILDREN ports. */
        pipe = usb_rcvintpipe(hdev, endpoint->bEndpointAddress);
        maxp = usb_maxpacket(hdev, pipe);

        if (maxp > sizeof(*hub->buffer))
                maxp = sizeof(*hub->buffer);

        hub->urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!hub->urb) {
                ret = -ENOMEM;
                goto fail;
        }

        usb_fill_int_urb(hub->urb, hdev, pipe, *hub->buffer, maxp, hub_irq,
                hub, endpoint->bInterval);

        /* maybe cycle the hub leds */
        if (hub->has_indicators && blinkenlights)
                hub->indicator[0] = INDICATOR_CYCLE;

        mutex_lock(&usb_port_peer_mutex);
        for (i = 0; i < maxchild; i++) {
                ret = usb_hub_create_port_device(hub, i + 1);
                if (ret < 0) {
                        dev_err(hub->intfdev,
                                "couldn't create port%d device.\n", i + 1);
                        break;
                }
        }
        hdev->maxchild = i;
        for (i = 0; i < hdev->maxchild; i++) {
                struct usb_port *port_dev = hub->ports[i];

                pm_runtime_put(&port_dev->dev);
        }

        mutex_unlock(&usb_port_peer_mutex);
        if (ret < 0)
                goto fail;

        /* Update the HCD's internal representation of this hub before hub_wq
         * starts getting port status changes for devices under the hub.
         */
        if (hcd->driver->update_hub_device) {
                ret = hcd->driver->update_hub_device(hcd, hdev,
                                &hub->tt, GFP_KERNEL);
                if (ret < 0) {
                        message = "can't update HCD hub info";
                        goto fail;
                }
        }

        usb_hub_adjust_deviceremovable(hdev, hub->descriptor);

        hub_activate(hub, HUB_INIT);
        return 0;

fail:
        dev_err(hub_dev, "config failed, %s (err %d)\n",
                        message, ret);
        /* hub_disconnect() frees urb and descriptor */
        return ret;
}

static void hub_release(struct kref *kref)
{
        struct usb_hub *hub = container_of(kref, struct usb_hub, kref);

        usb_put_dev(hub->hdev);
        usb_put_intf(to_usb_interface(hub->intfdev));
        kfree(hub);
}

void hub_get(struct usb_hub *hub)
{
        kref_get(&hub->kref);
}

void hub_put(struct usb_hub *hub)
{
        kref_put(&hub->kref, hub_release);
}

static unsigned highspeed_hubs;

static void hub_disconnect(struct usb_interface *intf)
{
        struct usb_hub *hub = usb_get_intfdata(intf);
        struct usb_device *hdev = interface_to_usbdev(intf);
        int port1;

        /*
         * Stop adding new hub events. We do not want to block here and thus
         * will not try to remove any pending work item.
         */
        hub->disconnected = 1;

        /* Disconnect all children and quiesce the hub */
        hub->error = 0;
        hub_quiesce(hub, HUB_DISCONNECT);

        mutex_lock(&usb_port_peer_mutex);

        /* Avoid races with recursively_mark_NOTATTACHED() */
        spin_lock_irq(&device_state_lock);
        port1 = hdev->maxchild;
        hdev->maxchild = 0;
        usb_set_intfdata(intf, NULL);
        spin_unlock_irq(&device_state_lock);

        for (; port1 > 0; --port1)
                usb_hub_remove_port_device(hub, port1);

        mutex_unlock(&usb_port_peer_mutex);

        if (hub->hdev->speed == USB_SPEED_HIGH)
                highspeed_hubs--;

        usb_free_urb(hub->urb);
        kfree(hub->ports);
        kfree(hub->descriptor);
        kfree(hub->status);
        kfree(hub->buffer);

        pm_suspend_ignore_children(&intf->dev, false);

        if (hub->quirk_disable_autosuspend)
                usb_autopm_put_interface(intf);

        onboard_dev_destroy_pdevs(&hub->onboard_devs);

        hub_put(hub);
}

static bool hub_descriptor_is_sane(struct usb_host_interface *desc)
{
        /* Some hubs have a subclass of 1, which AFAICT according to the */
        /*  specs is not defined, but it works */
        if (desc->desc.bInterfaceSubClass != 0 &&
            desc->desc.bInterfaceSubClass != 1)
                return false;

        /* Multiple endpoints? What kind of mutant ninja-hub is this? */
        if (desc->desc.bNumEndpoints != 1)
                return false;

        /* If the first endpoint is not interrupt IN, we'd better punt! */
        if (!usb_endpoint_is_int_in(&desc->endpoint[0].desc))
                return false;

        return true;
}

static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_host_interface *desc;
        struct usb_device *hdev;
        struct usb_hub *hub;

        desc = intf->cur_altsetting;
        hdev = interface_to_usbdev(intf);

        /*
         * Set default autosuspend delay as 0 to speedup bus suspend,
         * based on the below considerations:
         *
         * - Unlike other drivers, the hub driver does not rely on the
         *   autosuspend delay to provide enough time to handle a wakeup
         *   event, and the submitted status URB is just to check future
         *   change on hub downstream ports, so it is safe to do it.
         *
         * - The patch might cause one or more auto supend/resume for
         *   below very rare devices when they are plugged into hub
         *   first time:
         *
         *           devices having trouble initializing, and disconnect
         *           themselves from the bus and then reconnect a second
         *           or so later
         *
         *           devices just for downloading firmware, and disconnects
         *           themselves after completing it
         *
         *   For these quite rare devices, their drivers may change the
         *   autosuspend delay of their parent hub in the probe() to one
         *   appropriate value to avoid the subtle problem if someone
         *   does care it.
         *
         * - The patch may cause one or more auto suspend/resume on
         *   hub during running 'lsusb', but it is probably too
         *   infrequent to worry about.
         *
         * - Change autosuspend delay of hub can avoid unnecessary auto
         *   suspend timer for hub, also may decrease power consumption
         *   of USB bus.
         *
         * - If user has indicated to prevent autosuspend by passing
         *   usbcore.autosuspend = -1 then keep autosuspend disabled.
         */
#ifdef CONFIG_PM
        if (hdev->dev.power.autosuspend_delay >= 0)
                pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
#endif

        /*
         * Hubs have proper suspend/resume support, except for root hubs
         * where the controller driver doesn't have bus_suspend and
         * bus_resume methods.
         */
        if (hdev->parent) {                /* normal device */
                usb_enable_autosuspend(hdev);
        } else {                        /* root hub */
                const struct hc_driver *drv = bus_to_hcd(hdev->bus)->driver;

                if (drv->bus_suspend && drv->bus_resume)
                        usb_enable_autosuspend(hdev);
        }

        if (hdev->level == MAX_TOPO_LEVEL) {
                dev_err(&intf->dev,
                        "Unsupported bus topology: hub nested too deep\n");
                return -E2BIG;
        }

#ifdef        CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB
        if (hdev->parent) {
                dev_warn(&intf->dev, "ignoring external hub\n");
                return -ENODEV;
        }
#endif

        if (!hub_descriptor_is_sane(desc)) {
                dev_err(&intf->dev, "bad descriptor, ignoring hub\n");
                return -EIO;
        }

        /* We found a hub */
        dev_info(&intf->dev, "USB hub found\n");

        hub = kzalloc(sizeof(*hub), GFP_KERNEL);
        if (!hub)
                return -ENOMEM;

        kref_init(&hub->kref);
        hub->intfdev = &intf->dev;
        hub->hdev = hdev;
        INIT_DELAYED_WORK(&hub->leds, led_work);
        INIT_DELAYED_WORK(&hub->init_work, NULL);
        INIT_WORK(&hub->events, hub_event);
        INIT_LIST_HEAD(&hub->onboard_devs);
        spin_lock_init(&hub->irq_urb_lock);
        timer_setup(&hub->irq_urb_retry, hub_retry_irq_urb, 0);
        usb_get_intf(intf);
        usb_get_dev(hdev);

        usb_set_intfdata(intf, hub);
        intf->needs_remote_wakeup = 1;
        pm_suspend_ignore_children(&intf->dev, true);

        if (hdev->speed == USB_SPEED_HIGH)
                highspeed_hubs++;

        if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
                hub->quirk_check_port_auto_suspend = 1;

        if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) {
                hub->quirk_disable_autosuspend = 1;
                usb_autopm_get_interface_no_resume(intf);
        }

        if ((id->driver_info & HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL) &&
            desc->endpoint[0].desc.bInterval > USB_REDUCE_FRAME_INTR_BINTERVAL) {
                desc->endpoint[0].desc.bInterval =
                        USB_REDUCE_FRAME_INTR_BINTERVAL;
                /* Tell the HCD about the interrupt ep's new bInterval */
                usb_set_interface(hdev, 0, 0);
        }

        if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) {
                onboard_dev_create_pdevs(hdev, &hub->onboard_devs);

                return 0;
        }

        hub_disconnect(intf);
        return -ENODEV;
}

static int
hub_ioctl(struct usb_interface *intf, unsigned int code, void *user_data)
{
        struct usb_device *hdev = interface_to_usbdev(intf);
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);

        /* assert ifno == 0 (part of hub spec) */
        switch (code) {
        case USBDEVFS_HUB_PORTINFO: {
                struct usbdevfs_hub_portinfo *info = user_data;
                int i;

                spin_lock_irq(&device_state_lock);
                if (hdev->devnum <= 0)
                        info->nports = 0;
                else {
                        info->nports = hdev->maxchild;
                        for (i = 0; i < info->nports; i++) {
                                if (hub->ports[i]->child == NULL)
                                        info->port[i] = 0;
                                else
                                        info->port[i] =
                                                hub->ports[i]->child->devnum;
                        }
                }
                spin_unlock_irq(&device_state_lock);

                return info->nports + 1;
                }

        default:
                return -ENOSYS;
        }
}

/*
 * Allow user programs to claim ports on a hub.  When a device is attached
 * to one of these "claimed" ports, the program will "own" the device.
 */
static int find_port_owner(struct usb_device *hdev, unsigned port1,
                struct usb_dev_state ***ppowner)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);

        if (hdev->state == USB_STATE_NOTATTACHED)
                return -ENODEV;
        if (port1 == 0 || port1 > hdev->maxchild)
                return -EINVAL;

        /* Devices not managed by the hub driver
         * will always have maxchild equal to 0.
         */
        *ppowner = &(hub->ports[port1 - 1]->port_owner);
        return 0;
}

/* In the following three functions, the caller must hold hdev's lock */
int usb_hub_claim_port(struct usb_device *hdev, unsigned port1,
                       struct usb_dev_state *owner)
{
        int rc;
        struct usb_dev_state **powner;

        rc = find_port_owner(hdev, port1, &powner);
        if (rc)
                return rc;
        if (*powner)
                return -EBUSY;
        *powner = owner;
        return rc;
}
EXPORT_SYMBOL_GPL(usb_hub_claim_port);

int usb_hub_release_port(struct usb_device *hdev, unsigned port1,
                         struct usb_dev_state *owner)
{
        int rc;
        struct usb_dev_state **powner;

        rc = find_port_owner(hdev, port1, &powner);
        if (rc)
                return rc;
        if (*powner != owner)
                return -ENOENT;
        *powner = NULL;
        return rc;
}
EXPORT_SYMBOL_GPL(usb_hub_release_port);

void usb_hub_release_all_ports(struct usb_device *hdev, struct usb_dev_state *owner)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        int n;

        for (n = 0; n < hdev->maxchild; n++) {
                if (hub->ports[n]->port_owner == owner)
                        hub->ports[n]->port_owner = NULL;
        }

}

/* The caller must hold udev's lock */
bool usb_device_is_owned(struct usb_device *udev)
{
        struct usb_hub *hub;

        if (udev->state == USB_STATE_NOTATTACHED || !udev->parent)
                return false;
        hub = usb_hub_to_struct_hub(udev->parent);
        return !!hub->ports[udev->portnum - 1]->port_owner;
}

static void update_port_device_state(struct usb_device *udev)
{
        struct usb_hub *hub;
        struct usb_port *port_dev;

        if (udev->parent) {
                hub = usb_hub_to_struct_hub(udev->parent);

                /*
                 * The Link Layer Validation System Driver (lvstest)
                 * has a test step to unbind the hub before running the
                 * rest of the procedure. This triggers hub_disconnect
                 * which will set the hub's maxchild to 0, further
                 * resulting in usb_hub_to_struct_hub returning NULL.
                 */
                if (hub) {
                        port_dev = hub->ports[udev->portnum - 1];
                        WRITE_ONCE(port_dev->state, udev->state);
                        sysfs_notify_dirent(port_dev->state_kn);
                }
        }
}

static void recursively_mark_NOTATTACHED(struct usb_device *udev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(udev);
        int i;

        for (i = 0; i < udev->maxchild; ++i) {
                if (hub->ports[i]->child)
                        recursively_mark_NOTATTACHED(hub->ports[i]->child);
        }
        if (udev->state == USB_STATE_SUSPENDED)
                udev->active_duration -= jiffies;
        udev->state = USB_STATE_NOTATTACHED;
        update_port_device_state(udev);
}

/**
 * usb_set_device_state - change a device's current state (usbcore, hcds)
 * @udev: pointer to device whose state should be changed
 * @new_state: new state value to be stored
 *
 * udev->state is _not_ fully protected by the device lock.  Although
 * most transitions are made only while holding the lock, the state can
 * can change to USB_STATE_NOTATTACHED at almost any time.  This
 * is so that devices can be marked as disconnected as soon as possible,
 * without having to wait for any semaphores to be released.  As a result,
 * all changes to any device's state must be protected by the
 * device_state_lock spinlock.
 *
 * Once a device has been added to the device tree, all changes to its state
 * should be made using this routine.  The state should _not_ be set directly.
 *
 * If udev->state is already USB_STATE_NOTATTACHED then no change is made.
 * Otherwise udev->state is set to new_state, and if new_state is
 * USB_STATE_NOTATTACHED then all of udev's descendants' states are also set
 * to USB_STATE_NOTATTACHED.
 */
void usb_set_device_state(struct usb_device *udev,
                enum usb_device_state new_state)
{
        unsigned long flags;
        int wakeup = -1;

        spin_lock_irqsave(&device_state_lock, flags);
        if (udev->state == USB_STATE_NOTATTACHED)
                ;        /* do nothing */
        else if (new_state != USB_STATE_NOTATTACHED) {

                /* root hub wakeup capabilities are managed out-of-band
                 * and may involve silicon errata ... ignore them here.
                 */
                if (udev->parent) {
                        if (udev->state == USB_STATE_SUSPENDED
                                        || new_state == USB_STATE_SUSPENDED)
                                ;        /* No change to wakeup settings */
                        else if (new_state == USB_STATE_CONFIGURED)
                                wakeup = (udev->quirks &
                                        USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 :
                                        udev->actconfig->desc.bmAttributes &
                                        USB_CONFIG_ATT_WAKEUP;
                        else
                                wakeup = 0;
                }
                if (udev->state == USB_STATE_SUSPENDED &&
                        new_state != USB_STATE_SUSPENDED)
                        udev->active_duration -= jiffies;
                else if (new_state == USB_STATE_SUSPENDED &&
                                udev->state != USB_STATE_SUSPENDED)
                        udev->active_duration += jiffies;
                udev->state = new_state;
                update_port_device_state(udev);
        } else
                recursively_mark_NOTATTACHED(udev);
        spin_unlock_irqrestore(&device_state_lock, flags);
        if (wakeup >= 0)
                device_set_wakeup_capable(&udev->dev, wakeup);
}
EXPORT_SYMBOL_GPL(usb_set_device_state);

/*
 * Choose a device number.
 *
 * Device numbers are used as filenames in usbfs.  On USB-1.1 and
 * USB-2.0 buses they are also used as device addresses, however on
 * USB-3.0 buses the address is assigned by the controller hardware
 * and it usually is not the same as the device number.
 *
 * Devices connected under xHCI are not as simple.  The host controller
 * supports virtualization, so the hardware assigns device addresses and
 * the HCD must setup data structures before issuing a set address
 * command to the hardware.
 */
static void choose_devnum(struct usb_device *udev)
{
        int                devnum;
        struct usb_bus        *bus = udev->bus;

        /* be safe when more hub events are proceed in parallel */
        mutex_lock(&bus->devnum_next_mutex);

        /* Try to allocate the next devnum beginning at bus->devnum_next. */
        devnum = find_next_zero_bit(bus->devmap, 128, bus->devnum_next);
        if (devnum >= 128)
                devnum = find_next_zero_bit(bus->devmap, 128, 1);
        bus->devnum_next = (devnum >= 127 ? 1 : devnum + 1);
        if (devnum < 128) {
                set_bit(devnum, bus->devmap);
                udev->devnum = devnum;
        }
        mutex_unlock(&bus->devnum_next_mutex);
}

static void release_devnum(struct usb_device *udev)
{
        if (udev->devnum > 0) {
                clear_bit(udev->devnum, udev->bus->devmap);
                udev->devnum = -1;
        }
}

static void update_devnum(struct usb_device *udev, int devnum)
{
        udev->devnum = devnum;
        if (!udev->devaddr)
                udev->devaddr = (u8)devnum;
}

static void hub_free_dev(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        /* Root hubs aren't real devices, so don't free HCD resources */
        if (hcd->driver->free_dev && udev->parent)
                hcd->driver->free_dev(hcd, udev);
}

static void hub_disconnect_children(struct usb_device *udev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(udev);
        int i;

        /* Free up all the children before we remove this device */
        for (i = 0; i < udev->maxchild; i++) {
                if (hub->ports[i]->child)
                        usb_disconnect(&hub->ports[i]->child);
        }
}

/**
 * usb_disconnect - disconnect a device (usbcore-internal)
 * @pdev: pointer to device being disconnected
 *
 * Context: task context, might sleep
 *
 * Something got disconnected. Get rid of it and all of its children.
 *
 * If *pdev is a normal device then the parent hub must already be locked.
 * If *pdev is a root hub then the caller must hold the usb_bus_idr_lock,
 * which protects the set of root hubs as well as the list of buses.
 *
 * Only hub drivers (including virtual root hub drivers for host
 * controllers) should ever call this.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 */
void usb_disconnect(struct usb_device **pdev)
{
        struct usb_port *port_dev = NULL;
        struct usb_device *udev = *pdev;
        struct usb_hub *hub = NULL;
        int port1 = 1;

        /* mark the device as inactive, so any further urb submissions for
         * this device (and any of its children) will fail immediately.
         * this quiesces everything except pending urbs.
         */
        usb_set_device_state(udev, USB_STATE_NOTATTACHED);
        dev_info(&udev->dev, "USB disconnect, device number %d\n",
                        udev->devnum);

        /*
         * Ensure that the pm runtime code knows that the USB device
         * is in the process of being disconnected.
         */
        pm_runtime_barrier(&udev->dev);

        usb_lock_device(udev);

        hub_disconnect_children(udev);

        /* deallocate hcd/hardware state ... nuking all pending urbs and
         * cleaning up all state associated with the current configuration
         * so that the hardware is now fully quiesced.
         */
        dev_dbg(&udev->dev, "unregistering device\n");
        usb_disable_device(udev, 0);
        usb_hcd_synchronize_unlinks(udev);

        if (udev->parent) {
                port1 = udev->portnum;
                hub = usb_hub_to_struct_hub(udev->parent);
                port_dev = hub->ports[port1 - 1];

                sysfs_remove_link(&udev->dev.kobj, "port");
                sysfs_remove_link(&port_dev->dev.kobj, "device");

                /*
                 * As usb_port_runtime_resume() de-references udev, make
                 * sure no resumes occur during removal
                 */
                if (!test_and_set_bit(port1, hub->child_usage_bits))
                        pm_runtime_get_sync(&port_dev->dev);

                typec_deattach(port_dev->connector, &udev->dev);
        }

        usb_remove_ep_devs(&udev->ep0);
        usb_unlock_device(udev);

        /* Unregister the device.  The device driver is responsible
         * for de-configuring the device and invoking the remove-device
         * notifier chain (used by usbfs and possibly others).
         */
        device_del(&udev->dev);

        /* Free the device number and delete the parent's children[]
         * (or root_hub) pointer.
         */
        release_devnum(udev);

        /* Avoid races with recursively_mark_NOTATTACHED() */
        spin_lock_irq(&device_state_lock);
        *pdev = NULL;
        spin_unlock_irq(&device_state_lock);

        if (port_dev && test_and_clear_bit(port1, hub->child_usage_bits))
                pm_runtime_put(&port_dev->dev);

        hub_free_dev(udev);

        put_device(&udev->dev);
}

#ifdef CONFIG_USB_ANNOUNCE_NEW_DEVICES
static void show_string(struct usb_device *udev, char *id, char *string)
{
        if (!string)
                return;
        dev_info(&udev->dev, "%s: %s\n", id, string);
}

static void announce_device(struct usb_device *udev)
{
        u16 bcdDevice = le16_to_cpu(udev->descriptor.bcdDevice);

        dev_info(&udev->dev,
                "New USB device found, idVendor=%04x, idProduct=%04x, bcdDevice=%2x.%02x\n",
                le16_to_cpu(udev->descriptor.idVendor),
                le16_to_cpu(udev->descriptor.idProduct),
                bcdDevice >> 8, bcdDevice & 0xff);
        dev_info(&udev->dev,
                "New USB device strings: Mfr=%d, Product=%d, SerialNumber=%d\n",
                udev->descriptor.iManufacturer,
                udev->descriptor.iProduct,
                udev->descriptor.iSerialNumber);
        show_string(udev, "Product", udev->product);
        show_string(udev, "Manufacturer", udev->manufacturer);
        show_string(udev, "SerialNumber", udev->serial);
}
#else
static inline void announce_device(struct usb_device *udev) { }
#endif


/**
 * usb_enumerate_device_otg - FIXME (usbcore-internal)
 * @udev: newly addressed device (in ADDRESS state)
 *
 * Finish enumeration for On-The-Go devices
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
static int usb_enumerate_device_otg(struct usb_device *udev)
{
        int err = 0;

#ifdef        CONFIG_USB_OTG
        /*
         * OTG-aware devices on OTG-capable root hubs may be able to use SRP,
         * to wake us after we've powered off VBUS; and HNP, switching roles
         * "host" to "peripheral".  The OTG descriptor helps figure this out.
         */
        if (!udev->bus->is_b_host
                        && udev->config
                        && udev->parent == udev->bus->root_hub) {
                struct usb_otg_descriptor        *desc = NULL;
                struct usb_bus                        *bus = udev->bus;
                unsigned                        port1 = udev->portnum;

                /* descriptor may appear anywhere in config */
                err = __usb_get_extra_descriptor(udev->rawdescriptors[0],
                                le16_to_cpu(udev->config[0].desc.wTotalLength),
                                USB_DT_OTG, (void **) &desc, sizeof(*desc));
                if (err || !(desc->bmAttributes & USB_OTG_HNP))
                        return 0;

                dev_info(&udev->dev, "Dual-Role OTG device on %sHNP port\n",
                                        (port1 == bus->otg_port) ? "" : "non-");

                /* enable HNP before suspend, it's simpler */
                if (port1 == bus->otg_port) {
                        bus->b_hnp_enable = 1;
                        err = usb_control_msg(udev,
                                usb_sndctrlpipe(udev, 0),
                                USB_REQ_SET_FEATURE, 0,
                                USB_DEVICE_B_HNP_ENABLE,
                                0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
                        if (err < 0) {
                                /*
                                 * OTG MESSAGE: report errors here,
                                 * customize to match your product.
                                 */
                                dev_err(&udev->dev, "can't set HNP mode: %d\n",
                                                                        err);
                                bus->b_hnp_enable = 0;
                        }
                } else if (desc->bLength == sizeof
                                (struct usb_otg_descriptor)) {
                        /*
                         * We are operating on a legacy OTP device
                         * These should be told that they are operating
                         * on the wrong port if we have another port that does
                         * support HNP
                         */
                        if (bus->otg_port != 0) {
                                /* Set a_alt_hnp_support for legacy otg device */
                                err = usb_control_msg(udev,
                                        usb_sndctrlpipe(udev, 0),
                                        USB_REQ_SET_FEATURE, 0,
                                        USB_DEVICE_A_ALT_HNP_SUPPORT,
                                        0, NULL, 0,
                                        USB_CTRL_SET_TIMEOUT);
                                if (err < 0)
                                        dev_err(&udev->dev,
                                                "set a_alt_hnp_support failed: %d\n",
                                                err);
                        }
                }
        }
#endif
        return err;
}


/**
 * usb_enumerate_device - Read device configs/intfs/otg (usbcore-internal)
 * @udev: newly addressed device (in ADDRESS state)
 *
 * This is only called by usb_new_device() -- all comments that apply there
 * apply here wrt to environment.
 *
 * If the device is WUSB and not authorized, we don't attempt to read
 * the string descriptors, as they will be errored out by the device
 * until it has been authorized.
 *
 * Return: 0 if successful. A negative error code otherwise.
 */
static int usb_enumerate_device(struct usb_device *udev)
{
        int err;
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        if (udev->config == NULL) {
                err = usb_get_configuration(udev);
                if (err < 0) {
                        if (err != -ENODEV)
                                dev_err(&udev->dev, "can't read configurations, error %d\n",
                                                err);
                        return err;
                }
        }

        /* read the standard strings and cache them if present */
        udev->product = usb_cache_string(udev, udev->descriptor.iProduct);
        udev->manufacturer = usb_cache_string(udev,
                                              udev->descriptor.iManufacturer);
        udev->serial = usb_cache_string(udev, udev->descriptor.iSerialNumber);

        err = usb_enumerate_device_otg(udev);
        if (err < 0)
                return err;

        if (IS_ENABLED(CONFIG_USB_OTG_PRODUCTLIST) && hcd->tpl_support &&
                !is_targeted(udev)) {
                /* Maybe it can talk to us, though we can't talk to it.
                 * (Includes HNP test device.)
                 */
                if (IS_ENABLED(CONFIG_USB_OTG) && (udev->bus->b_hnp_enable
                        || udev->bus->is_b_host)) {
                        err = usb_port_suspend(udev, PMSG_AUTO_SUSPEND);
                        if (err < 0)
                                dev_dbg(&udev->dev, "HNP fail, %d\n", err);
                }
                return -ENOTSUPP;
        }

        usb_detect_interface_quirks(udev);

        return 0;
}

static void set_usb_port_removable(struct usb_device *udev)
{
        struct usb_device *hdev = udev->parent;
        struct usb_hub *hub;
        u8 port = udev->portnum;
        u16 wHubCharacteristics;
        bool removable = true;

        dev_set_removable(&udev->dev, DEVICE_REMOVABLE_UNKNOWN);

        if (!hdev)
                return;

        hub = usb_hub_to_struct_hub(udev->parent);

        /*
         * If the platform firmware has provided information about a port,
         * use that to determine whether it's removable.
         */
        switch (hub->ports[udev->portnum - 1]->connect_type) {
        case USB_PORT_CONNECT_TYPE_HOT_PLUG:
                dev_set_removable(&udev->dev, DEVICE_REMOVABLE);
                return;
        case USB_PORT_CONNECT_TYPE_HARD_WIRED:
        case USB_PORT_NOT_USED:
                dev_set_removable(&udev->dev, DEVICE_FIXED);
                return;
        default:
                break;
        }

        /*
         * Otherwise, check whether the hub knows whether a port is removable
         * or not
         */
        wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics);

        if (!(wHubCharacteristics & HUB_CHAR_COMPOUND))
                return;

        if (hub_is_superspeed(hdev)) {
                if (le16_to_cpu(hub->descriptor->u.ss.DeviceRemovable)
                                & (1 << port))
                        removable = false;
        } else {
                if (hub->descriptor->u.hs.DeviceRemovable[port / 8] & (1 << (port % 8)))
                        removable = false;
        }

        if (removable)
                dev_set_removable(&udev->dev, DEVICE_REMOVABLE);
        else
                dev_set_removable(&udev->dev, DEVICE_FIXED);

}

/**
 * usb_new_device - perform initial device setup (usbcore-internal)
 * @udev: newly addressed device (in ADDRESS state)
 *
 * This is called with devices which have been detected but not fully
 * enumerated.  The device descriptor is available, but not descriptors
 * for any device configuration.  The caller must have locked either
 * the parent hub (if udev is a normal device) or else the
 * usb_bus_idr_lock (if udev is a root hub).  The parent's pointer to
 * udev has already been installed, but udev is not yet visible through
 * sysfs or other filesystem code.
 *
 * This call is synchronous, and may not be used in an interrupt context.
 *
 * Only the hub driver or root-hub registrar should ever call this.
 *
 * Return: Whether the device is configured properly or not. Zero if the
 * interface was registered with the driver core; else a negative errno
 * value.
 *
 */
int usb_new_device(struct usb_device *udev)
{
        int err;

        if (udev->parent) {
                /* Initialize non-root-hub device wakeup to disabled;
                 * device (un)configuration controls wakeup capable
                 * sysfs power/wakeup controls wakeup enabled/disabled
                 */
                device_init_wakeup(&udev->dev, 0);
        }

        /* Tell the runtime-PM framework the device is active */
        pm_runtime_set_active(&udev->dev);
        pm_runtime_get_noresume(&udev->dev);
        pm_runtime_use_autosuspend(&udev->dev);
        pm_runtime_enable(&udev->dev);

        /* By default, forbid autosuspend for all devices.  It will be
         * allowed for hubs during binding.
         */
        usb_disable_autosuspend(udev);

        err = usb_enumerate_device(udev);        /* Read descriptors */
        if (err < 0)
                goto fail;
        dev_dbg(&udev->dev, "udev %d, busnum %d, minor = %d\n",
                        udev->devnum, udev->bus->busnum,
                        (((udev->bus->busnum-1) * 128) + (udev->devnum-1)));
        /* export the usbdev device-node for libusb */
        udev->dev.devt = MKDEV(USB_DEVICE_MAJOR,
                        (((udev->bus->busnum-1) * 128) + (udev->devnum-1)));

        /* Tell the world! */
        announce_device(udev);

        if (udev->serial)
                add_device_randomness(udev->serial, strlen(udev->serial));
        if (udev->product)
                add_device_randomness(udev->product, strlen(udev->product));
        if (udev->manufacturer)
                add_device_randomness(udev->manufacturer,
                                      strlen(udev->manufacturer));

        device_enable_async_suspend(&udev->dev);

        /* check whether the hub or firmware marks this port as non-removable */
        set_usb_port_removable(udev);

        /* Register the device.  The device driver is responsible
         * for configuring the device and invoking the add-device
         * notifier chain (used by usbfs and possibly others).
         */
        err = device_add(&udev->dev);
        if (err) {
                dev_err(&udev->dev, "can't device_add, error %d\n", err);
                goto fail;
        }

        /* Create link files between child device and usb port device. */
        if (udev->parent) {
                struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
                int port1 = udev->portnum;
                struct usb_port        *port_dev = hub->ports[port1 - 1];

                err = sysfs_create_link(&udev->dev.kobj,
                                &port_dev->dev.kobj, "port");
                if (err)
                        goto fail;

                err = sysfs_create_link(&port_dev->dev.kobj,
                                &udev->dev.kobj, "device");
                if (err) {
                        sysfs_remove_link(&udev->dev.kobj, "port");
                        goto fail;
                }

                if (!test_and_set_bit(port1, hub->child_usage_bits))
                        pm_runtime_get_sync(&port_dev->dev);

                typec_attach(port_dev->connector, &udev->dev);
        }

        (void) usb_create_ep_devs(&udev->dev, &udev->ep0, udev);
        usb_mark_last_busy(udev);
        pm_runtime_put_sync_autosuspend(&udev->dev);
        return err;

fail:
        usb_set_device_state(udev, USB_STATE_NOTATTACHED);
        pm_runtime_disable(&udev->dev);
        pm_runtime_set_suspended(&udev->dev);
        return err;
}


/**
 * usb_deauthorize_device - deauthorize a device (usbcore-internal)
 * @usb_dev: USB device
 *
 * Move the USB device to a very basic state where interfaces are disabled
 * and the device is in fact unconfigured and unusable.
 *
 * We share a lock (that we have) with device_del(), so we need to
 * defer its call.
 *
 * Return: 0.
 */
int usb_deauthorize_device(struct usb_device *usb_dev)
{
        usb_lock_device(usb_dev);
        if (usb_dev->authorized == 0)
                goto out_unauthorized;

        usb_dev->authorized = 0;
        usb_set_configuration(usb_dev, -1);

out_unauthorized:
        usb_unlock_device(usb_dev);
        return 0;
}


int usb_authorize_device(struct usb_device *usb_dev)
{
        int result = 0, c;

        usb_lock_device(usb_dev);
        if (usb_dev->authorized == 1)
                goto out_authorized;

        result = usb_autoresume_device(usb_dev);
        if (result < 0) {
                dev_err(&usb_dev->dev,
                        "can't autoresume for authorization: %d\n", result);
                goto error_autoresume;
        }

        usb_dev->authorized = 1;
        /* Choose and set the configuration.  This registers the interfaces
         * with the driver core and lets interface drivers bind to them.
         */
        c = usb_choose_configuration(usb_dev);
        if (c >= 0) {
                result = usb_set_configuration(usb_dev, c);
                if (result) {
                        dev_err(&usb_dev->dev,
                                "can't set config #%d, error %d\n", c, result);
                        /* This need not be fatal.  The user can try to
                         * set other configurations. */
                }
        }
        dev_info(&usb_dev->dev, "authorized to connect\n");

        usb_autosuspend_device(usb_dev);
error_autoresume:
out_authorized:
        usb_unlock_device(usb_dev);        /* complements locktree */
        return result;
}

/**
 * get_port_ssp_rate - Match the extended port status to SSP rate
 * @hdev: The hub device
 * @ext_portstatus: extended port status
 *
 * Match the extended port status speed id to the SuperSpeed Plus sublink speed
 * capability attributes. Base on the number of connected lanes and speed,
 * return the corresponding enum usb_ssp_rate.
 */
static enum usb_ssp_rate get_port_ssp_rate(struct usb_device *hdev,
                                           u32 ext_portstatus)
{
        struct usb_ssp_cap_descriptor *ssp_cap;
        u32 attr;
        u8 speed_id;
        u8 ssac;
        u8 lanes;
        int i;

        if (!hdev->bos)
                goto out;

        ssp_cap = hdev->bos->ssp_cap;
        if (!ssp_cap)
                goto out;

        speed_id = ext_portstatus & USB_EXT_PORT_STAT_RX_SPEED_ID;
        lanes = USB_EXT_PORT_RX_LANES(ext_portstatus) + 1;

        ssac = le32_to_cpu(ssp_cap->bmAttributes) &
                USB_SSP_SUBLINK_SPEED_ATTRIBS;

        for (i = 0; i <= ssac; i++) {
                u8 ssid;

                attr = le32_to_cpu(ssp_cap->bmSublinkSpeedAttr[i]);
                ssid = FIELD_GET(USB_SSP_SUBLINK_SPEED_SSID, attr);
                if (speed_id == ssid) {
                        u16 mantissa;
                        u8 lse;
                        u8 type;

                        /*
                         * Note: currently asymmetric lane types are only
                         * applicable for SSIC operate in SuperSpeed protocol
                         */
                        type = FIELD_GET(USB_SSP_SUBLINK_SPEED_ST, attr);
                        if (type == USB_SSP_SUBLINK_SPEED_ST_ASYM_RX ||
                            type == USB_SSP_SUBLINK_SPEED_ST_ASYM_TX)
                                goto out;

                        if (FIELD_GET(USB_SSP_SUBLINK_SPEED_LP, attr) !=
                            USB_SSP_SUBLINK_SPEED_LP_SSP)
                                goto out;

                        lse = FIELD_GET(USB_SSP_SUBLINK_SPEED_LSE, attr);
                        mantissa = FIELD_GET(USB_SSP_SUBLINK_SPEED_LSM, attr);

                        /* Convert to Gbps */
                        for (; lse < USB_SSP_SUBLINK_SPEED_LSE_GBPS; lse++)
                                mantissa /= 1000;

                        if (mantissa >= 10 && lanes == 1)
                                return USB_SSP_GEN_2x1;

                        if (mantissa >= 10 && lanes == 2)
                                return USB_SSP_GEN_2x2;

                        if (mantissa >= 5 && lanes == 2)
                                return USB_SSP_GEN_1x2;

                        goto out;
                }
        }

out:
        return USB_SSP_GEN_UNKNOWN;
}

#ifdef CONFIG_USB_FEW_INIT_RETRIES
#define PORT_RESET_TRIES        2
#define SET_ADDRESS_TRIES        1
#define GET_DESCRIPTOR_TRIES        1
#define GET_MAXPACKET0_TRIES        1
#define PORT_INIT_TRIES                4

#else
#define PORT_RESET_TRIES        5
#define SET_ADDRESS_TRIES        2
#define GET_DESCRIPTOR_TRIES        2
#define GET_MAXPACKET0_TRIES        3
#define PORT_INIT_TRIES                4
#endif        /* CONFIG_USB_FEW_INIT_RETRIES */

#define DETECT_DISCONNECT_TRIES 5

#define HUB_ROOT_RESET_TIME        60        /* times are in msec */
#define HUB_SHORT_RESET_TIME        10
#define HUB_BH_RESET_TIME        50
#define HUB_LONG_RESET_TIME        200
#define HUB_RESET_TIMEOUT        800

static bool use_new_scheme(struct usb_device *udev, int retry,
                           struct usb_port *port_dev)
{
        int old_scheme_first_port =
                (port_dev->quirks & USB_PORT_QUIRK_OLD_SCHEME) ||
                old_scheme_first;

        /*
         * "New scheme" enumeration causes an extra state transition to be
         * exposed to an xhci host and causes USB3 devices to receive control
         * commands in the default state.  This has been seen to cause
         * enumeration failures, so disable this enumeration scheme for USB3
         * devices.
         */
        if (udev->speed >= USB_SPEED_SUPER)
                return false;

        /*
         * If use_both_schemes is set, use the first scheme (whichever
         * it is) for the larger half of the retries, then use the other
         * scheme.  Otherwise, use the first scheme for all the retries.
         */
        if (use_both_schemes && retry >= (PORT_INIT_TRIES + 1) / 2)
                return old_scheme_first_port;        /* Second half */
        return !old_scheme_first_port;                /* First half or all */
}

/* Is a USB 3.0 port in the Inactive or Compliance Mode state?
 * Port warm reset is required to recover
 */
static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
                u16 portstatus)
{
        u16 link_state;

        if (!hub_is_superspeed(hub->hdev))
                return false;

        if (test_bit(port1, hub->warm_reset_bits))
                return true;

        link_state = portstatus & USB_PORT_STAT_LINK_STATE;
        return link_state == USB_SS_PORT_LS_SS_INACTIVE
                || link_state == USB_SS_PORT_LS_COMP_MOD;
}

static int hub_port_wait_reset(struct usb_hub *hub, int port1,
                        struct usb_device *udev, unsigned int delay, bool warm)
{
        int delay_time, ret;
        u16 portstatus;
        u16 portchange;
        u32 ext_portstatus = 0;

        for (delay_time = 0;
                        delay_time < HUB_RESET_TIMEOUT;
                        delay_time += delay) {
                /* wait to give the device a chance to reset */
                msleep(delay);

                /* read and decode port status */
                if (hub_is_superspeedplus(hub->hdev))
                        ret = hub_ext_port_status(hub, port1,
                                                  HUB_EXT_PORT_STATUS,
                                                  &portstatus, &portchange,
                                                  &ext_portstatus);
                else
                        ret = usb_hub_port_status(hub, port1, &portstatus,
                                              &portchange);
                if (ret < 0)
                        return ret;

                /*
                 * The port state is unknown until the reset completes.
                 *
                 * On top of that, some chips may require additional time
                 * to re-establish a connection after the reset is complete,
                 * so also wait for the connection to be re-established.
                 */
                if (!(portstatus & USB_PORT_STAT_RESET) &&
                    (portstatus & USB_PORT_STAT_CONNECTION))
                        break;

                /* switch to the long delay after two short delay failures */
                if (delay_time >= 2 * HUB_SHORT_RESET_TIME)
                        delay = HUB_LONG_RESET_TIME;

                dev_dbg(&hub->ports[port1 - 1]->dev,
                                "not %sreset yet, waiting %dms\n",
                                warm ? "warm " : "", delay);
        }

        if ((portstatus & USB_PORT_STAT_RESET))
                return -EBUSY;

        if (hub_port_warm_reset_required(hub, port1, portstatus))
                return -ENOTCONN;

        /* Device went away? */
        if (!(portstatus & USB_PORT_STAT_CONNECTION))
                return -ENOTCONN;

        /* Retry if connect change is set but status is still connected.
         * A USB 3.0 connection may bounce if multiple warm resets were issued,
         * but the device may have successfully re-connected. Ignore it.
         */
        if (!hub_is_superspeed(hub->hdev) &&
            (portchange & USB_PORT_STAT_C_CONNECTION)) {
                usb_clear_port_feature(hub->hdev, port1,
                                       USB_PORT_FEAT_C_CONNECTION);
                return -EAGAIN;
        }

        if (!(portstatus & USB_PORT_STAT_ENABLE))
                return -EBUSY;

        if (!udev)
                return 0;

        if (hub_is_superspeedplus(hub->hdev)) {
                /* extended portstatus Rx and Tx lane count are zero based */
                udev->rx_lanes = USB_EXT_PORT_RX_LANES(ext_portstatus) + 1;
                udev->tx_lanes = USB_EXT_PORT_TX_LANES(ext_portstatus) + 1;
                udev->ssp_rate = get_port_ssp_rate(hub->hdev, ext_portstatus);
        } else {
                udev->rx_lanes = 1;
                udev->tx_lanes = 1;
                udev->ssp_rate = USB_SSP_GEN_UNKNOWN;
        }
        if (udev->ssp_rate != USB_SSP_GEN_UNKNOWN)
                udev->speed = USB_SPEED_SUPER_PLUS;
        else if (hub_is_superspeed(hub->hdev))
                udev->speed = USB_SPEED_SUPER;
        else if (portstatus & USB_PORT_STAT_HIGH_SPEED)
                udev->speed = USB_SPEED_HIGH;
        else if (portstatus & USB_PORT_STAT_LOW_SPEED)
                udev->speed = USB_SPEED_LOW;
        else
                udev->speed = USB_SPEED_FULL;
        return 0;
}

/* Handle port reset and port warm(BH) reset (for USB3 protocol ports) */
static int hub_port_reset(struct usb_hub *hub, int port1,
                        struct usb_device *udev, unsigned int delay, bool warm)
{
        int i, status;
        u16 portchange, portstatus;
        struct usb_port *port_dev = hub->ports[port1 - 1];
        int reset_recovery_time;

        if (!hub_is_superspeed(hub->hdev)) {
                if (warm) {
                        dev_err(hub->intfdev, "only USB3 hub support "
                                                "warm reset\n");
                        return -EINVAL;
                }
                /* Block EHCI CF initialization during the port reset.
                 * Some companion controllers don't like it when they mix.
                 */
                down_read(&ehci_cf_port_reset_rwsem);
        } else if (!warm) {
                /*
                 * If the caller hasn't explicitly requested a warm reset,
                 * double check and see if one is needed.
                 */
                if (usb_hub_port_status(hub, port1, &portstatus,
                                        &portchange) == 0)
                        if (hub_port_warm_reset_required(hub, port1,
                                                        portstatus))
                                warm = true;
        }
        clear_bit(port1, hub->warm_reset_bits);

        /* Reset the port */
        for (i = 0; i < PORT_RESET_TRIES; i++) {
                status = set_port_feature(hub->hdev, port1, (warm ?
                                        USB_PORT_FEAT_BH_PORT_RESET :
                                        USB_PORT_FEAT_RESET));
                if (status == -ENODEV) {
                        ;        /* The hub is gone */
                } else if (status) {
                        dev_err(&port_dev->dev,
                                        "cannot %sreset (err = %d)\n",
                                        warm ? "warm " : "", status);
                } else {
                        status = hub_port_wait_reset(hub, port1, udev, delay,
                                                                warm);
                        if (status && status != -ENOTCONN && status != -ENODEV)
                                dev_dbg(hub->intfdev,
                                                "port_wait_reset: err = %d\n",
                                                status);
                }

                /*
                 * Check for disconnect or reset, and bail out after several
                 * reset attempts to avoid warm reset loop.
                 */
                if (status == 0 || status == -ENOTCONN || status == -ENODEV ||
                    (status == -EBUSY && i == PORT_RESET_TRIES - 1)) {
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_RESET);

                        if (!hub_is_superspeed(hub->hdev))
                                goto done;

                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_BH_PORT_RESET);
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_PORT_LINK_STATE);

                        if (udev)
                                usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_CONNECTION);

                        /*
                         * If a USB 3.0 device migrates from reset to an error
                         * state, re-issue the warm reset.
                         */
                        if (usb_hub_port_status(hub, port1,
                                        &portstatus, &portchange) < 0)
                                goto done;

                        if (!hub_port_warm_reset_required(hub, port1,
                                        portstatus))
                                goto done;

                        /*
                         * If the port is in SS.Inactive or Compliance Mode, the
                         * hot or warm reset failed.  Try another warm reset.
                         */
                        if (!warm) {
                                dev_dbg(&port_dev->dev,
                                                "hot reset failed, warm reset\n");
                                warm = true;
                        }
                }

                dev_dbg(&port_dev->dev,
                                "not enabled, trying %sreset again...\n",
                                warm ? "warm " : "");
                delay = HUB_LONG_RESET_TIME;
        }

        dev_err(&port_dev->dev, "Cannot enable. Maybe the USB cable is bad?\n");

done:
        if (status == 0) {
                if (port_dev->quirks & USB_PORT_QUIRK_FAST_ENUM)
                        usleep_range(10000, 12000);
                else {
                        /* TRSTRCY = 10 ms; plus some extra */
                        reset_recovery_time = 10 + 40;

                        /* Hub needs extra delay after resetting its port. */
                        if (hub->hdev->quirks & USB_QUIRK_HUB_SLOW_RESET)
                                reset_recovery_time += 100;

                        msleep(reset_recovery_time);
                }

                if (udev) {
                        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

                        update_devnum(udev, 0);
                        /* The xHC may think the device is already reset,
                         * so ignore the status.
                         */
                        if (hcd->driver->reset_device)
                                hcd->driver->reset_device(hcd, udev);

                        usb_set_device_state(udev, USB_STATE_DEFAULT);
                }
        } else {
                if (udev)
                        usb_set_device_state(udev, USB_STATE_NOTATTACHED);
        }

        if (!hub_is_superspeed(hub->hdev))
                up_read(&ehci_cf_port_reset_rwsem);

        return status;
}

/*
 * hub_port_stop_enumerate - stop USB enumeration or ignore port events
 * @hub: target hub
 * @port1: port num of the port
 * @retries: port retries number of hub_port_init()
 *
 * Return:
 *    true: ignore port actions/events or give up connection attempts.
 *    false: keep original behavior.
 *
 * This function will be based on retries to check whether the port which is
 * marked with early_stop attribute would stop enumeration or ignore events.
 *
 * Note:
 * This function didn't change anything if early_stop is not set, and it will
 * prevent all connection attempts when early_stop is set and the attempts of
 * the port are more than 1.
 */
static bool hub_port_stop_enumerate(struct usb_hub *hub, int port1, int retries)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];

        if (port_dev->early_stop) {
                if (port_dev->ignore_event)
                        return true;

                /*
                 * We want unsuccessful attempts to fail quickly.
                 * Since some devices may need one failure during
                 * port initialization, we allow two tries but no
                 * more.
                 */
                if (retries < 2)
                        return false;

                port_dev->ignore_event = 1;
        } else
                port_dev->ignore_event = 0;

        return port_dev->ignore_event;
}

/* Check if a port is power on */
int usb_port_is_power_on(struct usb_hub *hub, unsigned int portstatus)
{
        int ret = 0;

        if (hub_is_superspeed(hub->hdev)) {
                if (portstatus & USB_SS_PORT_STAT_POWER)
                        ret = 1;
        } else {
                if (portstatus & USB_PORT_STAT_POWER)
                        ret = 1;
        }

        return ret;
}

static void usb_lock_port(struct usb_port *port_dev)
                __acquires(&port_dev->status_lock)
{
        mutex_lock(&port_dev->status_lock);
        __acquire(&port_dev->status_lock);
}

static void usb_unlock_port(struct usb_port *port_dev)
                __releases(&port_dev->status_lock)
{
        mutex_unlock(&port_dev->status_lock);
        __release(&port_dev->status_lock);
}

#ifdef        CONFIG_PM

/* Check if a port is suspended(USB2.0 port) or in U3 state(USB3.0 port) */
static int port_is_suspended(struct usb_hub *hub, unsigned portstatus)
{
        int ret = 0;

        if (hub_is_superspeed(hub->hdev)) {
                if ((portstatus & USB_PORT_STAT_LINK_STATE)
                                == USB_SS_PORT_LS_U3)
                        ret = 1;
        } else {
                if (portstatus & USB_PORT_STAT_SUSPEND)
                        ret = 1;
        }

        return ret;
}

/* Determine whether the device on a port is ready for a normal resume,
 * is ready for a reset-resume, or should be disconnected.
 */
static int check_port_resume_type(struct usb_device *udev,
                struct usb_hub *hub, int port1,
                int status, u16 portchange, u16 portstatus)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];
        int retries = 3;

 retry:
        /* Is a warm reset needed to recover the connection? */
        if (status == 0 && udev->reset_resume
                && hub_port_warm_reset_required(hub, port1, portstatus)) {
                /* pass */;
        }
        /* Is the device still present? */
        else if (status || port_is_suspended(hub, portstatus) ||
                        !usb_port_is_power_on(hub, portstatus)) {
                if (status >= 0)
                        status = -ENODEV;
        } else if (!(portstatus & USB_PORT_STAT_CONNECTION)) {
                if (retries--) {
                        usleep_range(200, 300);
                        status = usb_hub_port_status(hub, port1, &portstatus,
                                                             &portchange);
                        goto retry;
                }
                status = -ENODEV;
        }

        /* Can't do a normal resume if the port isn't enabled,
         * so try a reset-resume instead.
         */
        else if (!(portstatus & USB_PORT_STAT_ENABLE) && !udev->reset_resume) {
                if (udev->persist_enabled)
                        udev->reset_resume = 1;
                else
                        status = -ENODEV;
        }

        if (status) {
                dev_dbg(&port_dev->dev, "status %04x.%04x after resume, %d\n",
                                portchange, portstatus, status);
        } else if (udev->reset_resume) {

                /* Late port handoff can set status-change bits */
                if (portchange & USB_PORT_STAT_C_CONNECTION)
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_CONNECTION);
                if (portchange & USB_PORT_STAT_C_ENABLE)
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_ENABLE);

                /*
                 * Whatever made this reset-resume necessary may have
                 * turned on the port1 bit in hub->change_bits.  But after
                 * a successful reset-resume we want the bit to be clear;
                 * if it was on it would indicate that something happened
                 * following the reset-resume.
                 */
                clear_bit(port1, hub->change_bits);
        }

        return status;
}

int usb_disable_ltm(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        /* Check if the roothub and device supports LTM. */
        if (!usb_device_supports_ltm(hcd->self.root_hub) ||
                        !usb_device_supports_ltm(udev))
                return 0;

        /* Clear Feature LTM Enable can only be sent if the device is
         * configured.
         */
        if (!udev->actconfig)
                return 0;

        return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE,
                        USB_DEVICE_LTM_ENABLE, 0, NULL, 0,
                        USB_CTRL_SET_TIMEOUT);
}
EXPORT_SYMBOL_GPL(usb_disable_ltm);

void usb_enable_ltm(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        /* Check if the roothub and device supports LTM. */
        if (!usb_device_supports_ltm(hcd->self.root_hub) ||
                        !usb_device_supports_ltm(udev))
                return;

        /* Set Feature LTM Enable can only be sent if the device is
         * configured.
         */
        if (!udev->actconfig)
                return;

        usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        USB_REQ_SET_FEATURE, USB_RECIP_DEVICE,
                        USB_DEVICE_LTM_ENABLE, 0, NULL, 0,
                        USB_CTRL_SET_TIMEOUT);
}
EXPORT_SYMBOL_GPL(usb_enable_ltm);

/*
 * usb_enable_remote_wakeup - enable remote wakeup for a device
 * @udev: target device
 *
 * For USB-2 devices: Set the device's remote wakeup feature.
 *
 * For USB-3 devices: Assume there's only one function on the device and
 * enable remote wake for the first interface.  FIXME if the interface
 * association descriptor shows there's more than one function.
 */
static int usb_enable_remote_wakeup(struct usb_device *udev)
{
        if (udev->speed < USB_SPEED_SUPER)
                return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_SET_FEATURE, USB_RECIP_DEVICE,
                                USB_DEVICE_REMOTE_WAKEUP, 0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
        else
                return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_SET_FEATURE, USB_RECIP_INTERFACE,
                                USB_INTRF_FUNC_SUSPEND,
                                USB_INTRF_FUNC_SUSPEND_RW |
                                        USB_INTRF_FUNC_SUSPEND_LP,
                                NULL, 0, USB_CTRL_SET_TIMEOUT);
}

/*
 * usb_disable_remote_wakeup - disable remote wakeup for a device
 * @udev: target device
 *
 * For USB-2 devices: Clear the device's remote wakeup feature.
 *
 * For USB-3 devices: Assume there's only one function on the device and
 * disable remote wake for the first interface.  FIXME if the interface
 * association descriptor shows there's more than one function.
 */
static int usb_disable_remote_wakeup(struct usb_device *udev)
{
        if (udev->speed < USB_SPEED_SUPER)
                return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE,
                                USB_DEVICE_REMOTE_WAKEUP, 0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
        else
                return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_SET_FEATURE, USB_RECIP_INTERFACE,
                                USB_INTRF_FUNC_SUSPEND,        0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
}

/* Count of wakeup-enabled devices at or below udev */
unsigned usb_wakeup_enabled_descendants(struct usb_device *udev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(udev);

        return udev->do_remote_wakeup +
                        (hub ? hub->wakeup_enabled_descendants : 0);
}
EXPORT_SYMBOL_GPL(usb_wakeup_enabled_descendants);

/*
 * usb_port_suspend - suspend a usb device's upstream port
 * @udev: device that's no longer in active use, not a root hub
 * Context: must be able to sleep; device not locked; pm locks held
 *
 * Suspends a USB device that isn't in active use, conserving power.
 * Devices may wake out of a suspend, if anything important happens,
 * using the remote wakeup mechanism.  They may also be taken out of
 * suspend by the host, using usb_port_resume().  It's also routine
 * to disconnect devices while they are suspended.
 *
 * This only affects the USB hardware for a device; its interfaces
 * (and, for hubs, child devices) must already have been suspended.
 *
 * Selective port suspend reduces power; most suspended devices draw
 * less than 500 uA.  It's also used in OTG, along with remote wakeup.
 * All devices below the suspended port are also suspended.
 *
 * Devices leave suspend state when the host wakes them up.  Some devices
 * also support "remote wakeup", where the device can activate the USB
 * tree above them to deliver data, such as a keypress or packet.  In
 * some cases, this wakes the USB host.
 *
 * Suspending OTG devices may trigger HNP, if that's been enabled
 * between a pair of dual-role devices.  That will change roles, such
 * as from A-Host to A-Peripheral or from B-Host back to B-Peripheral.
 *
 * Devices on USB hub ports have only one "suspend" state, corresponding
 * to ACPI D2, "may cause the device to lose some context".
 * State transitions include:
 *
 *   - suspend, resume ... when the VBUS power link stays live
 *   - suspend, disconnect ... VBUS lost
 *
 * Once VBUS drop breaks the circuit, the port it's using has to go through
 * normal re-enumeration procedures, starting with enabling VBUS power.
 * Other than re-initializing the hub (plug/unplug, except for root hubs),
 * Linux (2.6) currently has NO mechanisms to initiate that:  no hub_wq
 * timer, no SRP, no requests through sysfs.
 *
 * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get
 * suspended until their bus goes into global suspend (i.e., the root
 * hub is suspended).  Nevertheless, we change @udev->state to
 * USB_STATE_SUSPENDED as this is the device's "logical" state.  The actual
 * upstream port setting is stored in @udev->port_is_suspended.
 *
 * Returns 0 on success, else negative errno.
 */
int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
{
        struct usb_hub        *hub = usb_hub_to_struct_hub(udev->parent);
        struct usb_port *port_dev = hub->ports[udev->portnum - 1];
        int                port1 = udev->portnum;
        int                status;
        bool                really_suspend = true;

        usb_lock_port(port_dev);

        /* enable remote wakeup when appropriate; this lets the device
         * wake up the upstream hub (including maybe the root hub).
         *
         * NOTE:  OTG devices may issue remote wakeup (or SRP) even when
         * we don't explicitly enable it here.
         */
        if (udev->do_remote_wakeup) {
                status = usb_enable_remote_wakeup(udev);
                if (status) {
                        dev_dbg(&udev->dev, "won't remote wakeup, status %d\n",
                                        status);
                        /* bail if autosuspend is requested */
                        if (PMSG_IS_AUTO(msg))
                                goto err_wakeup;
                }
        }

        /* disable USB2 hardware LPM */
        usb_disable_usb2_hardware_lpm(udev);

        if (usb_disable_ltm(udev)) {
                dev_err(&udev->dev, "Failed to disable LTM before suspend\n");
                status = -ENOMEM;
                if (PMSG_IS_AUTO(msg))
                        goto err_ltm;
        }

        /* see 7.1.7.6 */
        if (hub_is_superspeed(hub->hdev))
                status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3);

        /*
         * For system suspend, we do not need to enable the suspend feature
         * on individual USB-2 ports.  The devices will automatically go
         * into suspend a few ms after the root hub stops sending packets.
         * The USB 2.0 spec calls this "global suspend".
         *
         * However, many USB hubs have a bug: They don't relay wakeup requests
         * from a downstream port if the port's suspend feature isn't on.
         * Therefore we will turn on the suspend feature if udev or any of its
         * descendants is enabled for remote wakeup.
         */
        else if (PMSG_IS_AUTO(msg) || usb_wakeup_enabled_descendants(udev) > 0)
                status = set_port_feature(hub->hdev, port1,
                                USB_PORT_FEAT_SUSPEND);
        else {
                really_suspend = false;
                status = 0;
        }
        if (status) {
                /* Check if the port has been suspended for the timeout case
                 * to prevent the suspended port from incorrect handling.
                 */
                if (status == -ETIMEDOUT) {
                        int ret;
                        u16 portstatus, portchange;

                        portstatus = portchange = 0;
                        ret = usb_hub_port_status(hub, port1, &portstatus,
                                        &portchange);

                        dev_dbg(&port_dev->dev,
                                "suspend timeout, status %04x\n", portstatus);

                        if (ret == 0 && port_is_suspended(hub, portstatus)) {
                                status = 0;
                                goto suspend_done;
                        }
                }

                dev_dbg(&port_dev->dev, "can't suspend, status %d\n", status);

                /* Try to enable USB3 LTM again */
                usb_enable_ltm(udev);
 err_ltm:
                /* Try to enable USB2 hardware LPM again */
                usb_enable_usb2_hardware_lpm(udev);

                if (udev->do_remote_wakeup)
                        (void) usb_disable_remote_wakeup(udev);
 err_wakeup:

                /* System sleep transitions should never fail */
                if (!PMSG_IS_AUTO(msg))
                        status = 0;
        } else {
 suspend_done:
                dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n",
                                (PMSG_IS_AUTO(msg) ? "auto-" : ""),
                                udev->do_remote_wakeup);
                if (really_suspend) {
                        udev->port_is_suspended = 1;

                        /* device has up to 10 msec to fully suspend */
                        msleep(10);
                }
                usb_set_device_state(udev, USB_STATE_SUSPENDED);
        }

        if (status == 0 && !udev->do_remote_wakeup && udev->persist_enabled
                        && test_and_clear_bit(port1, hub->child_usage_bits))
                pm_runtime_put_sync(&port_dev->dev);

        usb_mark_last_busy(hub->hdev);

        usb_unlock_port(port_dev);
        return status;
}

/*
 * If the USB "suspend" state is in use (rather than "global suspend"),
 * many devices will be individually taken out of suspend state using
 * special "resume" signaling.  This routine kicks in shortly after
 * hardware resume signaling is finished, either because of selective
 * resume (by host) or remote wakeup (by device) ... now see what changed
 * in the tree that's rooted at this device.
 *
 * If @udev->reset_resume is set then the device is reset before the
 * status check is done.
 */
static int finish_port_resume(struct usb_device *udev)
{
        int        status = 0;
        u16        devstatus = 0;

        /* caller owns the udev device lock */
        dev_dbg(&udev->dev, "%s\n",
                udev->reset_resume ? "finish reset-resume" : "finish resume");

        /* usb ch9 identifies four variants of SUSPENDED, based on what
         * state the device resumes to.  Linux currently won't see the
         * first two on the host side; they'd be inside hub_port_init()
         * during many timeouts, but hub_wq can't suspend until later.
         */
        usb_set_device_state(udev, udev->actconfig
                        ? USB_STATE_CONFIGURED
                        : USB_STATE_ADDRESS);

        /* 10.5.4.5 says not to reset a suspended port if the attached
         * device is enabled for remote wakeup.  Hence the reset
         * operation is carried out here, after the port has been
         * resumed.
         */
        if (udev->reset_resume) {
                /*
                 * If the device morphs or switches modes when it is reset,
                 * we don't want to perform a reset-resume.  We'll fail the
                 * resume, which will cause a logical disconnect, and then
                 * the device will be rediscovered.
                 */
 retry_reset_resume:
                if (udev->quirks & USB_QUIRK_RESET)
                        status = -ENODEV;
                else
                        status = usb_reset_and_verify_device(udev);
        }

        /* 10.5.4.5 says be sure devices in the tree are still there.
         * For now let's assume the device didn't go crazy on resume,
         * and device drivers will know about any resume quirks.
         */
        if (status == 0) {
                devstatus = 0;
                status = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, &devstatus);

                /* If a normal resume failed, try doing a reset-resume */
                if (status && !udev->reset_resume && udev->persist_enabled) {
                        dev_dbg(&udev->dev, "retry with reset-resume\n");
                        udev->reset_resume = 1;
                        goto retry_reset_resume;
                }
        }

        if (status) {
                dev_dbg(&udev->dev, "gone after usb resume? status %d\n",
                                status);
        /*
         * There are a few quirky devices which violate the standard
         * by claiming to have remote wakeup enabled after a reset,
         * which crash if the feature is cleared, hence check for
         * udev->reset_resume
         */
        } else if (udev->actconfig && !udev->reset_resume) {
                if (udev->speed < USB_SPEED_SUPER) {
                        if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP))
                                status = usb_disable_remote_wakeup(udev);
                } else {
                        status = usb_get_std_status(udev, USB_RECIP_INTERFACE, 0,
                                        &devstatus);
                        if (!status && devstatus & (USB_INTRF_STAT_FUNC_RW_CAP
                                        | USB_INTRF_STAT_FUNC_RW))
                                status = usb_disable_remote_wakeup(udev);
                }

                if (status)
                        dev_dbg(&udev->dev,
                                "disable remote wakeup, status %d\n",
                                status);
                status = 0;
        }
        return status;
}

/*
 * There are some SS USB devices which take longer time for link training.
 * XHCI specs 4.19.4 says that when Link training is successful, port
 * sets CCS bit to 1. So if SW reads port status before successful link
 * training, then it will not find device to be present.
 * USB Analyzer log with such buggy devices show that in some cases
 * device switch on the RX termination after long delay of host enabling
 * the VBUS. In few other cases it has been seen that device fails to
 * negotiate link training in first attempt. It has been
 * reported till now that few devices take as long as 2000 ms to train
 * the link after host enabling its VBUS and termination. Following
 * routine implements a 2000 ms timeout for link training. If in a case
 * link trains before timeout, loop will exit earlier.
 *
 * There are also some 2.0 hard drive based devices and 3.0 thumb
 * drives that, when plugged into a 2.0 only port, take a long
 * time to set CCS after VBUS enable.
 *
 * FIXME: If a device was connected before suspend, but was removed
 * while system was asleep, then the loop in the following routine will
 * only exit at timeout.
 *
 * This routine should only be called when persist is enabled.
 */
static int wait_for_connected(struct usb_device *udev,
                struct usb_hub *hub, int port1,
                u16 *portchange, u16 *portstatus)
{
        int status = 0, delay_ms = 0;

        while (delay_ms < 2000) {
                if (status || *portstatus & USB_PORT_STAT_CONNECTION)
                        break;
                if (!usb_port_is_power_on(hub, *portstatus)) {
                        status = -ENODEV;
                        break;
                }
                msleep(20);
                delay_ms += 20;
                status = usb_hub_port_status(hub, port1, portstatus, portchange);
        }
        dev_dbg(&udev->dev, "Waited %dms for CONNECT\n", delay_ms);
        return status;
}

/*
 * usb_port_resume - re-activate a suspended usb device's upstream port
 * @udev: device to re-activate, not a root hub
 * Context: must be able to sleep; device not locked; pm locks held
 *
 * This will re-activate the suspended device, increasing power usage
 * while letting drivers communicate again with its endpoints.
 * USB resume explicitly guarantees that the power session between
 * the host and the device is the same as it was when the device
 * suspended.
 *
 * If @udev->reset_resume is set then this routine won't check that the
 * port is still enabled.  Furthermore, finish_port_resume() above will
 * reset @udev.  The end result is that a broken power session can be
 * recovered and @udev will appear to persist across a loss of VBUS power.
 *
 * For example, if a host controller doesn't maintain VBUS suspend current
 * during a system sleep or is reset when the system wakes up, all the USB
 * power sessions below it will be broken.  This is especially troublesome
 * for mass-storage devices containing mounted filesystems, since the
 * device will appear to have disconnected and all the memory mappings
 * to it will be lost.  Using the USB_PERSIST facility, the device can be
 * made to appear as if it had not disconnected.
 *
 * This facility can be dangerous.  Although usb_reset_and_verify_device() makes
 * every effort to insure that the same device is present after the
 * reset as before, it cannot provide a 100% guarantee.  Furthermore it's
 * quite possible for a device to remain unaltered but its media to be
 * changed.  If the user replaces a flash memory card while the system is
 * asleep, he will have only himself to blame when the filesystem on the
 * new card is corrupted and the system crashes.
 *
 * Returns 0 on success, else negative errno.
 */
int usb_port_resume(struct usb_device *udev, pm_message_t msg)
{
        struct usb_hub        *hub = usb_hub_to_struct_hub(udev->parent);
        struct usb_port *port_dev = hub->ports[udev->portnum  - 1];
        int                port1 = udev->portnum;
        int                status;
        u16                portchange, portstatus;

        if (!test_and_set_bit(port1, hub->child_usage_bits)) {
                status = pm_runtime_resume_and_get(&port_dev->dev);
                if (status < 0) {
                        dev_dbg(&udev->dev, "can't resume usb port, status %d\n",
                                        status);
                        return status;
                }
        }

        usb_lock_port(port_dev);

        /* Skip the initial Clear-Suspend step for a remote wakeup */
        status = usb_hub_port_status(hub, port1, &portstatus, &portchange);
        if (status == 0 && !port_is_suspended(hub, portstatus)) {
                if (portchange & USB_PORT_STAT_C_SUSPEND)
                        pm_wakeup_event(&udev->dev, 0);
                goto SuspendCleared;
        }

        /* see 7.1.7.7; affects power usage, but not budgeting */
        if (hub_is_superspeed(hub->hdev))
                status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U0);
        else
                status = usb_clear_port_feature(hub->hdev,
                                port1, USB_PORT_FEAT_SUSPEND);
        if (status) {
                dev_dbg(&port_dev->dev, "can't resume, status %d\n", status);
        } else {
                /* drive resume for USB_RESUME_TIMEOUT msec */
                dev_dbg(&udev->dev, "usb %sresume\n",
                                (PMSG_IS_AUTO(msg) ? "auto-" : ""));
                msleep(USB_RESUME_TIMEOUT);

                /* Virtual root hubs can trigger on GET_PORT_STATUS to
                 * stop resume signaling.  Then finish the resume
                 * sequence.
                 */
                status = usb_hub_port_status(hub, port1, &portstatus, &portchange);
        }

 SuspendCleared:
        if (status == 0) {
                udev->port_is_suspended = 0;
                if (hub_is_superspeed(hub->hdev)) {
                        if (portchange & USB_PORT_STAT_C_LINK_STATE)
                                usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_PORT_LINK_STATE);
                } else {
                        if (portchange & USB_PORT_STAT_C_SUSPEND)
                                usb_clear_port_feature(hub->hdev, port1,
                                                USB_PORT_FEAT_C_SUSPEND);
                }

                /* TRSMRCY = 10 msec */
                msleep(10);
        }

        if (udev->persist_enabled)
                status = wait_for_connected(udev, hub, port1, &portchange,
                                &portstatus);

        status = check_port_resume_type(udev,
                        hub, port1, status, portchange, portstatus);
        if (status == 0)
                status = finish_port_resume(udev);
        if (status < 0) {
                dev_dbg(&udev->dev, "can't resume, status %d\n", status);
                hub_port_logical_disconnect(hub, port1);
        } else  {
                /* Try to enable USB2 hardware LPM */
                usb_enable_usb2_hardware_lpm(udev);

                /* Try to enable USB3 LTM */
                usb_enable_ltm(udev);
        }

        usb_unlock_port(port_dev);

        return status;
}

int usb_remote_wakeup(struct usb_device *udev)
{
        int        status = 0;

        usb_lock_device(udev);
        if (udev->state == USB_STATE_SUSPENDED) {
                dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
                status = usb_autoresume_device(udev);
                if (status == 0) {
                        /* Let the drivers do their thing, then... */
                        usb_autosuspend_device(udev);
                }
        }
        usb_unlock_device(udev);
        return status;
}

/* Returns 1 if there was a remote wakeup and a connect status change. */
static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port,
                u16 portstatus, u16 portchange)
                __must_hold(&port_dev->status_lock)
{
        struct usb_port *port_dev = hub->ports[port - 1];
        struct usb_device *hdev;
        struct usb_device *udev;
        int connect_change = 0;
        u16 link_state;
        int ret;

        hdev = hub->hdev;
        udev = port_dev->child;
        if (!hub_is_superspeed(hdev)) {
                if (!(portchange & USB_PORT_STAT_C_SUSPEND))
                        return 0;
                usb_clear_port_feature(hdev, port, USB_PORT_FEAT_C_SUSPEND);
        } else {
                link_state = portstatus & USB_PORT_STAT_LINK_STATE;
                if (!udev || udev->state != USB_STATE_SUSPENDED ||
                                (link_state != USB_SS_PORT_LS_U0 &&
                                 link_state != USB_SS_PORT_LS_U1 &&
                                 link_state != USB_SS_PORT_LS_U2))
                        return 0;
        }

        if (udev) {
                /* TRSMRCY = 10 msec */
                msleep(10);

                usb_unlock_port(port_dev);
                ret = usb_remote_wakeup(udev);
                usb_lock_port(port_dev);
                if (ret < 0)
                        connect_change = 1;
        } else {
                ret = -ENODEV;
                hub_port_disable(hub, port, 1);
        }
        dev_dbg(&port_dev->dev, "resume, status %d\n", ret);
        return connect_change;
}

static int check_ports_changed(struct usb_hub *hub)
{
        int port1;

        for (port1 = 1; port1 <= hub->hdev->maxchild; ++port1) {
                u16 portstatus, portchange;
                int status;

                status = usb_hub_port_status(hub, port1, &portstatus, &portchange);
                if (!status && portchange)
                        return 1;
        }
        return 0;
}

static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
{
        struct usb_hub                *hub = usb_get_intfdata(intf);
        struct usb_device        *hdev = hub->hdev;
        unsigned                port1;

        /*
         * Warn if children aren't already suspended.
         * Also, add up the number of wakeup-enabled descendants.
         */
        hub->wakeup_enabled_descendants = 0;
        for (port1 = 1; port1 <= hdev->maxchild; port1++) {
                struct usb_port *port_dev = hub->ports[port1 - 1];
                struct usb_device *udev = port_dev->child;

                if (udev && udev->can_submit) {
                        dev_warn(&port_dev->dev, "device %s not suspended yet\n",
                                        dev_name(&udev->dev));
                        if (PMSG_IS_AUTO(msg))
                                return -EBUSY;
                }
                if (udev)
                        hub->wakeup_enabled_descendants +=
                                        usb_wakeup_enabled_descendants(udev);
        }

        if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) {
                /* check if there are changes pending on hub ports */
                if (check_ports_changed(hub)) {
                        if (PMSG_IS_AUTO(msg))
                                return -EBUSY;
                        pm_wakeup_event(&hdev->dev, 2000);
                }
        }

        if (hub_is_superspeed(hdev) && hdev->do_remote_wakeup) {
                /* Enable hub to send remote wakeup for all ports. */
                for (port1 = 1; port1 <= hdev->maxchild; port1++) {
                        set_port_feature(hdev,
                                         port1 |
                                         USB_PORT_FEAT_REMOTE_WAKE_CONNECT |
                                         USB_PORT_FEAT_REMOTE_WAKE_DISCONNECT |
                                         USB_PORT_FEAT_REMOTE_WAKE_OVER_CURRENT,
                                         USB_PORT_FEAT_REMOTE_WAKE_MASK);
                }
        }

        dev_dbg(&intf->dev, "%s\n", __func__);

        /* stop hub_wq and related activity */
        hub_quiesce(hub, HUB_SUSPEND);
        return 0;
}

/* Report wakeup requests from the ports of a resuming root hub */
static void report_wakeup_requests(struct usb_hub *hub)
{
        struct usb_device        *hdev = hub->hdev;
        struct usb_device        *udev;
        struct usb_hcd                *hcd;
        unsigned long                resuming_ports;
        int                        i;

        if (hdev->parent)
                return;                /* Not a root hub */

        hcd = bus_to_hcd(hdev->bus);
        if (hcd->driver->get_resuming_ports) {

                /*
                 * The get_resuming_ports() method returns a bitmap (origin 0)
                 * of ports which have started wakeup signaling but have not
                 * yet finished resuming.  During system resume we will
                 * resume all the enabled ports, regardless of any wakeup
                 * signals, which means the wakeup requests would be lost.
                 * To prevent this, report them to the PM core here.
                 */
                resuming_ports = hcd->driver->get_resuming_ports(hcd);
                for (i = 0; i < hdev->maxchild; ++i) {
                        if (test_bit(i, &resuming_ports)) {
                                udev = hub->ports[i]->child;
                                if (udev)
                                        pm_wakeup_event(&udev->dev, 0);
                        }
                }
        }
}

static int hub_resume(struct usb_interface *intf)
{
        struct usb_hub *hub = usb_get_intfdata(intf);

        dev_dbg(&intf->dev, "%s\n", __func__);
        hub_activate(hub, HUB_RESUME);

        /*
         * This should be called only for system resume, not runtime resume.
         * We can't tell the difference here, so some wakeup requests will be
         * reported at the wrong time or more than once.  This shouldn't
         * matter much, so long as they do get reported.
         */
        report_wakeup_requests(hub);
        return 0;
}

static int hub_reset_resume(struct usb_interface *intf)
{
        struct usb_hub *hub = usb_get_intfdata(intf);

        dev_dbg(&intf->dev, "%s\n", __func__);
        hub_activate(hub, HUB_RESET_RESUME);
        return 0;
}

/**
 * usb_root_hub_lost_power - called by HCD if the root hub lost Vbus power
 * @rhdev: struct usb_device for the root hub
 *
 * The USB host controller driver calls this function when its root hub
 * is resumed and Vbus power has been interrupted or the controller
 * has been reset.  The routine marks @rhdev as having lost power.
 * When the hub driver is resumed it will take notice and carry out
 * power-session recovery for all the "USB-PERSIST"-enabled child devices;
 * the others will be disconnected.
 */
void usb_root_hub_lost_power(struct usb_device *rhdev)
{
        dev_notice(&rhdev->dev, "root hub lost power or was reset\n");
        rhdev->reset_resume = 1;
}
EXPORT_SYMBOL_GPL(usb_root_hub_lost_power);

static const char * const usb3_lpm_names[]  = {
        "U0",
        "U1",
        "U2",
        "U3",
};

/*
 * Send a Set SEL control transfer to the device, prior to enabling
 * device-initiated U1 or U2.  This lets the device know the exit latencies from
 * the time the device initiates a U1 or U2 exit, to the time it will receive a
 * packet from the host.
 *
 * This function will fail if the SEL or PEL values for udev are greater than
 * the maximum allowed values for the link state to be enabled.
 */
static int usb_req_set_sel(struct usb_device *udev)
{
        struct usb_set_sel_req *sel_values;
        unsigned long long u1_sel;
        unsigned long long u1_pel;
        unsigned long long u2_sel;
        unsigned long long u2_pel;
        int ret;

        if (!udev->parent || udev->speed < USB_SPEED_SUPER || !udev->lpm_capable)
                return 0;

        /* Convert SEL and PEL stored in ns to us */
        u1_sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
        u1_pel = DIV_ROUND_UP(udev->u1_params.pel, 1000);
        u2_sel = DIV_ROUND_UP(udev->u2_params.sel, 1000);
        u2_pel = DIV_ROUND_UP(udev->u2_params.pel, 1000);

        /*
         * Make sure that the calculated SEL and PEL values for the link
         * state we're enabling aren't bigger than the max SEL/PEL
         * value that will fit in the SET SEL control transfer.
         * Otherwise the device would get an incorrect idea of the exit
         * latency for the link state, and could start a device-initiated
         * U1/U2 when the exit latencies are too high.
         */
        if (u1_sel > USB3_LPM_MAX_U1_SEL_PEL ||
            u1_pel > USB3_LPM_MAX_U1_SEL_PEL ||
            u2_sel > USB3_LPM_MAX_U2_SEL_PEL ||
            u2_pel > USB3_LPM_MAX_U2_SEL_PEL) {
                dev_dbg(&udev->dev, "Device-initiated U1/U2 disabled due to long SEL or PEL\n");
                return -EINVAL;
        }

        /*
         * usb_enable_lpm() can be called as part of a failed device reset,
         * which may be initiated by an error path of a mass storage driver.
         * Therefore, use GFP_NOIO.
         */
        sel_values = kmalloc(sizeof *(sel_values), GFP_NOIO);
        if (!sel_values)
                return -ENOMEM;

        sel_values->u1_sel = u1_sel;
        sel_values->u1_pel = u1_pel;
        sel_values->u2_sel = cpu_to_le16(u2_sel);
        sel_values->u2_pel = cpu_to_le16(u2_pel);

        ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        USB_REQ_SET_SEL,
                        USB_RECIP_DEVICE,
                        0, 0,
                        sel_values, sizeof *(sel_values),
                        USB_CTRL_SET_TIMEOUT);
        kfree(sel_values);

        if (ret > 0)
                udev->lpm_devinit_allow = 1;

        return ret;
}

/*
 * Enable or disable device-initiated U1 or U2 transitions.
 */
static int usb_set_device_initiated_lpm(struct usb_device *udev,
                enum usb3_link_state state, bool enable)
{
        int ret;
        int feature;

        switch (state) {
        case USB3_LPM_U1:
                feature = USB_DEVICE_U1_ENABLE;
                break;
        case USB3_LPM_U2:
                feature = USB_DEVICE_U2_ENABLE;
                break;
        default:
                dev_warn(&udev->dev, "%s: Can't %s non-U1 or U2 state.\n",
                                __func__, enable ? "enable" : "disable");
                return -EINVAL;
        }

        if (udev->state != USB_STATE_CONFIGURED) {
                dev_dbg(&udev->dev, "%s: Can't %s %s state "
                                "for unconfigured device.\n",
                                __func__, enable ? "enable" : "disable",
                                usb3_lpm_names[state]);
                return 0;
        }

        if (enable) {
                /*
                 * Now send the control transfer to enable device-initiated LPM
                 * for either U1 or U2.
                 */
                ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_SET_FEATURE,
                                USB_RECIP_DEVICE,
                                feature,
                                0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
        } else {
                ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                                USB_REQ_CLEAR_FEATURE,
                                USB_RECIP_DEVICE,
                                feature,
                                0, NULL, 0,
                                USB_CTRL_SET_TIMEOUT);
        }
        if (ret < 0) {
                dev_warn(&udev->dev, "%s of device-initiated %s failed.\n",
                                enable ? "Enable" : "Disable",
                                usb3_lpm_names[state]);
                return -EBUSY;
        }
        return 0;
}

static int usb_set_lpm_timeout(struct usb_device *udev,
                enum usb3_link_state state, int timeout)
{
        int ret;
        int feature;

        switch (state) {
        case USB3_LPM_U1:
                feature = USB_PORT_FEAT_U1_TIMEOUT;
                break;
        case USB3_LPM_U2:
                feature = USB_PORT_FEAT_U2_TIMEOUT;
                break;
        default:
                dev_warn(&udev->dev, "%s: Can't set timeout for non-U1 or U2 state.\n",
                                __func__);
                return -EINVAL;
        }

        if (state == USB3_LPM_U1 && timeout > USB3_LPM_U1_MAX_TIMEOUT &&
                        timeout != USB3_LPM_DEVICE_INITIATED) {
                dev_warn(&udev->dev, "Failed to set %s timeout to 0x%x, "
                                "which is a reserved value.\n",
                                usb3_lpm_names[state], timeout);
                return -EINVAL;
        }

        ret = set_port_feature(udev->parent,
                        USB_PORT_LPM_TIMEOUT(timeout) | udev->portnum,
                        feature);
        if (ret < 0) {
                dev_warn(&udev->dev, "Failed to set %s timeout to 0x%x,"
                                "error code %i\n", usb3_lpm_names[state],
                                timeout, ret);
                return -EBUSY;
        }
        if (state == USB3_LPM_U1)
                udev->u1_params.timeout = timeout;
        else
                udev->u2_params.timeout = timeout;
        return 0;
}

/*
 * Don't allow device intiated U1/U2 if the system exit latency + one bus
 * interval is greater than the minimum service interval of any active
 * periodic endpoint. See USB 3.2 section 9.4.9
 */
static bool usb_device_may_initiate_lpm(struct usb_device *udev,
                                        enum usb3_link_state state)
{
        unsigned int sel;                /* us */
        int i, j;

        if (!udev->lpm_devinit_allow)
                return false;

        if (state == USB3_LPM_U1)
                sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
        else if (state == USB3_LPM_U2)
                sel = DIV_ROUND_UP(udev->u2_params.sel, 1000);
        else
                return false;

        for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
                struct usb_interface *intf;
                struct usb_endpoint_descriptor *desc;
                unsigned int interval;

                intf = udev->actconfig->interface[i];
                if (!intf)
                        continue;

                for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) {
                        desc = &intf->cur_altsetting->endpoint[j].desc;

                        if (usb_endpoint_xfer_int(desc) ||
                            usb_endpoint_xfer_isoc(desc)) {
                                interval = (1 << (desc->bInterval - 1)) * 125;
                                if (sel + 125 > interval)
                                        return false;
                        }
                }
        }
        return true;
}

/*
 * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated
 * U1/U2 entry.
 *
 * We will attempt to enable U1 or U2, but there are no guarantees that the
 * control transfers to set the hub timeout or enable device-initiated U1/U2
 * will be successful.
 *
 * If the control transfer to enable device-initiated U1/U2 entry fails, then
 * hub-initiated U1/U2 will be disabled.
 *
 * If we cannot set the parent hub U1/U2 timeout, we attempt to let the xHCI
 * driver know about it.  If that call fails, it should be harmless, and just
 * take up more slightly more bus bandwidth for unnecessary U1/U2 exit latency.
 */
static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev,
                enum usb3_link_state state)
{
        int timeout;
        __u8 u1_mel;
        __le16 u2_mel;

        /* Skip if the device BOS descriptor couldn't be read */
        if (!udev->bos)
                return;

        u1_mel = udev->bos->ss_cap->bU1devExitLat;
        u2_mel = udev->bos->ss_cap->bU2DevExitLat;

        /* If the device says it doesn't have *any* exit latency to come out of
         * U1 or U2, it's probably lying.  Assume it doesn't implement that link
         * state.
         */
        if ((state == USB3_LPM_U1 && u1_mel == 0) ||
                        (state == USB3_LPM_U2 && u2_mel == 0))
                return;

        /* We allow the host controller to set the U1/U2 timeout internally
         * first, so that it can change its schedule to account for the
         * additional latency to send data to a device in a lower power
         * link state.
         */
        timeout = hcd->driver->enable_usb3_lpm_timeout(hcd, udev, state);

        /* xHCI host controller doesn't want to enable this LPM state. */
        if (timeout == 0)
                return;

        if (timeout < 0) {
                dev_warn(&udev->dev, "Could not enable %s link state, "
                                "xHCI error %i.\n", usb3_lpm_names[state],
                                timeout);
                return;
        }

        if (usb_set_lpm_timeout(udev, state, timeout)) {
                /* If we can't set the parent hub U1/U2 timeout,
                 * device-initiated LPM won't be allowed either, so let the xHCI
                 * host know that this link state won't be enabled.
                 */
                hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
                return;
        }

        /* Only a configured device will accept the Set Feature
         * U1/U2_ENABLE
         */
        if (udev->actconfig &&
            usb_device_may_initiate_lpm(udev, state)) {
                if (usb_set_device_initiated_lpm(udev, state, true)) {
                        /*
                         * Request to enable device initiated U1/U2 failed,
                         * better to turn off lpm in this case.
                         */
                        usb_set_lpm_timeout(udev, state, 0);
                        hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
                        return;
                }
        }

        if (state == USB3_LPM_U1)
                udev->usb3_lpm_u1_enabled = 1;
        else if (state == USB3_LPM_U2)
                udev->usb3_lpm_u2_enabled = 1;
}
/*
 * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated
 * U1/U2 entry.
 *
 * If this function returns -EBUSY, the parent hub will still allow U1/U2 entry.
 * If zero is returned, the parent will not allow the link to go into U1/U2.
 *
 * If zero is returned, device-initiated U1/U2 entry may still be enabled, but
 * it won't have an effect on the bus link state because the parent hub will
 * still disallow device-initiated U1/U2 entry.
 *
 * If zero is returned, the xHCI host controller may still think U1/U2 entry is
 * possible.  The result will be slightly more bus bandwidth will be taken up
 * (to account for U1/U2 exit latency), but it should be harmless.
 */
static int usb_disable_link_state(struct usb_hcd *hcd, struct usb_device *udev,
                enum usb3_link_state state)
{
        switch (state) {
        case USB3_LPM_U1:
        case USB3_LPM_U2:
                break;
        default:
                dev_warn(&udev->dev, "%s: Can't disable non-U1 or U2 state.\n",
                                __func__);
                return -EINVAL;
        }

        if (usb_set_lpm_timeout(udev, state, 0))
                return -EBUSY;

        usb_set_device_initiated_lpm(udev, state, false);

        if (hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state))
                dev_warn(&udev->dev, "Could not disable xHCI %s timeout, "
                                "bus schedule bandwidth may be impacted.\n",
                                usb3_lpm_names[state]);

        /* As soon as usb_set_lpm_timeout(0) return 0, hub initiated LPM
         * is disabled. Hub will disallows link to enter U1/U2 as well,
         * even device is initiating LPM. Hence LPM is disabled if hub LPM
         * timeout set to 0, no matter device-initiated LPM is disabled or
         * not.
         */
        if (state == USB3_LPM_U1)
                udev->usb3_lpm_u1_enabled = 0;
        else if (state == USB3_LPM_U2)
                udev->usb3_lpm_u2_enabled = 0;

        return 0;
}

/*
 * Disable hub-initiated and device-initiated U1 and U2 entry.
 * Caller must own the bandwidth_mutex.
 *
 * This will call usb_enable_lpm() on failure, which will decrement
 * lpm_disable_count, and will re-enable LPM if lpm_disable_count reaches zero.
 */
int usb_disable_lpm(struct usb_device *udev)
{
        struct usb_hcd *hcd;

        if (!udev || !udev->parent ||
                        udev->speed < USB_SPEED_SUPER ||
                        !udev->lpm_capable ||
                        udev->state < USB_STATE_CONFIGURED)
                return 0;

        hcd = bus_to_hcd(udev->bus);
        if (!hcd || !hcd->driver->disable_usb3_lpm_timeout)
                return 0;

        udev->lpm_disable_count++;
        if ((udev->u1_params.timeout == 0 && udev->u2_params.timeout == 0))
                return 0;

        /* If LPM is enabled, attempt to disable it. */
        if (usb_disable_link_state(hcd, udev, USB3_LPM_U1))
                goto enable_lpm;
        if (usb_disable_link_state(hcd, udev, USB3_LPM_U2))
                goto enable_lpm;

        return 0;

enable_lpm:
        usb_enable_lpm(udev);
        return -EBUSY;
}
EXPORT_SYMBOL_GPL(usb_disable_lpm);

/* Grab the bandwidth_mutex before calling usb_disable_lpm() */
int usb_unlocked_disable_lpm(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);
        int ret;

        if (!hcd)
                return -EINVAL;

        mutex_lock(hcd->bandwidth_mutex);
        ret = usb_disable_lpm(udev);
        mutex_unlock(hcd->bandwidth_mutex);

        return ret;
}
EXPORT_SYMBOL_GPL(usb_unlocked_disable_lpm);

/*
 * Attempt to enable device-initiated and hub-initiated U1 and U2 entry.  The
 * xHCI host policy may prevent U1 or U2 from being enabled.
 *
 * Other callers may have disabled link PM, so U1 and U2 entry will be disabled
 * until the lpm_disable_count drops to zero.  Caller must own the
 * bandwidth_mutex.
 */
void usb_enable_lpm(struct usb_device *udev)
{
        struct usb_hcd *hcd;
        struct usb_hub *hub;
        struct usb_port *port_dev;

        if (!udev || !udev->parent ||
                        udev->speed < USB_SPEED_SUPER ||
                        !udev->lpm_capable ||
                        udev->state < USB_STATE_CONFIGURED)
                return;

        udev->lpm_disable_count--;
        hcd = bus_to_hcd(udev->bus);
        /* Double check that we can both enable and disable LPM.
         * Device must be configured to accept set feature U1/U2 timeout.
         */
        if (!hcd || !hcd->driver->enable_usb3_lpm_timeout ||
                        !hcd->driver->disable_usb3_lpm_timeout)
                return;

        if (udev->lpm_disable_count > 0)
                return;

        hub = usb_hub_to_struct_hub(udev->parent);
        if (!hub)
                return;

        port_dev = hub->ports[udev->portnum - 1];

        if (port_dev->usb3_lpm_u1_permit)
                usb_enable_link_state(hcd, udev, USB3_LPM_U1);

        if (port_dev->usb3_lpm_u2_permit)
                usb_enable_link_state(hcd, udev, USB3_LPM_U2);
}
EXPORT_SYMBOL_GPL(usb_enable_lpm);

/* Grab the bandwidth_mutex before calling usb_enable_lpm() */
void usb_unlocked_enable_lpm(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        if (!hcd)
                return;

        mutex_lock(hcd->bandwidth_mutex);
        usb_enable_lpm(udev);
        mutex_unlock(hcd->bandwidth_mutex);
}
EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm);

/* usb3 devices use U3 for disabled, make sure remote wakeup is disabled */
static void hub_usb3_port_prepare_disable(struct usb_hub *hub,
                                          struct usb_port *port_dev)
{
        struct usb_device *udev = port_dev->child;
        int ret;

        if (udev && udev->port_is_suspended && udev->do_remote_wakeup) {
                ret = hub_set_port_link_state(hub, port_dev->portnum,
                                              USB_SS_PORT_LS_U0);
                if (!ret) {
                        msleep(USB_RESUME_TIMEOUT);
                        ret = usb_disable_remote_wakeup(udev);
                }
                if (ret)
                        dev_warn(&udev->dev,
                                 "Port disable: can't disable remote wake\n");
                udev->do_remote_wakeup = 0;
        }
}

#else        /* CONFIG_PM */

#define hub_suspend                NULL
#define hub_resume                NULL
#define hub_reset_resume        NULL

static inline void hub_usb3_port_prepare_disable(struct usb_hub *hub,
                                                 struct usb_port *port_dev) { }

int usb_disable_lpm(struct usb_device *udev)
{
        return 0;
}
EXPORT_SYMBOL_GPL(usb_disable_lpm);

void usb_enable_lpm(struct usb_device *udev) { }
EXPORT_SYMBOL_GPL(usb_enable_lpm);

int usb_unlocked_disable_lpm(struct usb_device *udev)
{
        return 0;
}
EXPORT_SYMBOL_GPL(usb_unlocked_disable_lpm);

void usb_unlocked_enable_lpm(struct usb_device *udev) { }
EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm);

int usb_disable_ltm(struct usb_device *udev)
{
        return 0;
}
EXPORT_SYMBOL_GPL(usb_disable_ltm);

void usb_enable_ltm(struct usb_device *udev) { }
EXPORT_SYMBOL_GPL(usb_enable_ltm);

static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port,
                u16 portstatus, u16 portchange)
{
        return 0;
}

static int usb_req_set_sel(struct usb_device *udev)
{
        return 0;
}

#endif        /* CONFIG_PM */

/*
 * USB-3 does not have a similar link state as USB-2 that will avoid negotiating
 * a connection with a plugged-in cable but will signal the host when the cable
 * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices
 */
static int hub_port_disable(struct usb_hub *hub, int port1, int set_state)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_device *hdev = hub->hdev;
        int ret = 0;

        if (!hub->error) {
                if (hub_is_superspeed(hub->hdev)) {
                        hub_usb3_port_prepare_disable(hub, port_dev);
                        ret = hub_set_port_link_state(hub, port_dev->portnum,
                                                      USB_SS_PORT_LS_U3);
                } else {
                        ret = usb_clear_port_feature(hdev, port1,
                                        USB_PORT_FEAT_ENABLE);
                }
        }
        if (port_dev->child && set_state)
                usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED);
        if (ret && ret != -ENODEV)
                dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret);
        return ret;
}

/*
 * usb_port_disable - disable a usb device's upstream port
 * @udev: device to disable
 * Context: @udev locked, must be able to sleep.
 *
 * Disables a USB device that isn't in active use.
 */
int usb_port_disable(struct usb_device *udev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);

        return hub_port_disable(hub, udev->portnum, 0);
}

/* USB 2.0 spec, 7.1.7.3 / fig 7-29:
 *
 * Between connect detection and reset signaling there must be a delay
 * of 100ms at least for debounce and power-settling.  The corresponding
 * timer shall restart whenever the downstream port detects a disconnect.
 *
 * Apparently there are some bluetooth and irda-dongles and a number of
 * low-speed devices for which this debounce period may last over a second.
 * Not covered by the spec - but easy to deal with.
 *
 * This implementation uses a 1500ms total debounce timeout; if the
 * connection isn't stable by then it returns -ETIMEDOUT.  It checks
 * every 25ms for transient disconnects.  When the port status has been
 * unchanged for 100ms it returns the port status.
 */
int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected)
{
        int ret;
        u16 portchange, portstatus;
        unsigned connection = 0xffff;
        int total_time, stable_time = 0;
        struct usb_port *port_dev = hub->ports[port1 - 1];

        for (total_time = 0; ; total_time += HUB_DEBOUNCE_STEP) {
                ret = usb_hub_port_status(hub, port1, &portstatus, &portchange);
                if (ret < 0)
                        return ret;

                if (!(portchange & USB_PORT_STAT_C_CONNECTION) &&
                     (portstatus & USB_PORT_STAT_CONNECTION) == connection) {
                        if (!must_be_connected ||
                             (connection == USB_PORT_STAT_CONNECTION))
                                stable_time += HUB_DEBOUNCE_STEP;
                        if (stable_time >= HUB_DEBOUNCE_STABLE)
                                break;
                } else {
                        stable_time = 0;
                        connection = portstatus & USB_PORT_STAT_CONNECTION;
                }

                if (portchange & USB_PORT_STAT_C_CONNECTION) {
                        usb_clear_port_feature(hub->hdev, port1,
                                        USB_PORT_FEAT_C_CONNECTION);
                }

                if (total_time >= HUB_DEBOUNCE_TIMEOUT)
                        break;
                msleep(HUB_DEBOUNCE_STEP);
        }

        dev_dbg(&port_dev->dev, "debounce total %dms stable %dms status 0x%x\n",
                        total_time, stable_time, portstatus);

        if (stable_time < HUB_DEBOUNCE_STABLE)
                return -ETIMEDOUT;
        return portstatus;
}

void usb_ep0_reinit(struct usb_device *udev)
{
        usb_disable_endpoint(udev, 0 + USB_DIR_IN, true);
        usb_disable_endpoint(udev, 0 + USB_DIR_OUT, true);
        usb_enable_endpoint(udev, &udev->ep0, true);
}
EXPORT_SYMBOL_GPL(usb_ep0_reinit);

#define usb_sndaddr0pipe()        (PIPE_CONTROL << 30)
#define usb_rcvaddr0pipe()        ((PIPE_CONTROL << 30) | USB_DIR_IN)

static int hub_set_address(struct usb_device *udev, int devnum)
{
        int retval;
        unsigned int timeout_ms = USB_CTRL_SET_TIMEOUT;
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);
        struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);

        if (hub->hdev->quirks & USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT)
                timeout_ms = USB_SHORT_SET_ADDRESS_REQ_TIMEOUT;

        /*
         * The host controller will choose the device address,
         * instead of the core having chosen it earlier
         */
        if (!hcd->driver->address_device && devnum <= 1)
                return -EINVAL;
        if (udev->state == USB_STATE_ADDRESS)
                return 0;
        if (udev->state != USB_STATE_DEFAULT)
                return -EINVAL;
        if (hcd->driver->address_device)
                retval = hcd->driver->address_device(hcd, udev, timeout_ms);
        else
                retval = usb_control_msg(udev, usb_sndaddr0pipe(),
                                USB_REQ_SET_ADDRESS, 0, devnum, 0,
                                NULL, 0, timeout_ms);
        if (retval == 0) {
                update_devnum(udev, devnum);
                /* Device now using proper address. */
                usb_set_device_state(udev, USB_STATE_ADDRESS);
                usb_ep0_reinit(udev);
        }
        return retval;
}

/*
 * There are reports of USB 3.0 devices that say they support USB 2.0 Link PM
 * when they're plugged into a USB 2.0 port, but they don't work when LPM is
 * enabled.
 *
 * Only enable USB 2.0 Link PM if the port is internal (hardwired), or the
 * device says it supports the new USB 2.0 Link PM errata by setting the BESL
 * support bit in the BOS descriptor.
 */
static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
        int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN;

        if (!udev->usb2_hw_lpm_capable || !udev->bos)
                return;

        if (hub)
                connect_type = hub->ports[udev->portnum - 1]->connect_type;

        if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) ||
                        connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) {
                udev->usb2_hw_lpm_allowed = 1;
                usb_enable_usb2_hardware_lpm(udev);
        }
}

static int hub_enable_device(struct usb_device *udev)
{
        struct usb_hcd *hcd = bus_to_hcd(udev->bus);

        if (!hcd->driver->enable_device)
                return 0;
        if (udev->state == USB_STATE_ADDRESS)
                return 0;
        if (udev->state != USB_STATE_DEFAULT)
                return -EINVAL;

        return hcd->driver->enable_device(hcd, udev);
}

/*
 * Get the bMaxPacketSize0 value during initialization by reading the
 * device's device descriptor.  Since we don't already know this value,
 * the transfer is unsafe and it ignores I/O errors, only testing for
 * reasonable received values.
 *
 * For "old scheme" initialization, size will be 8 so we read just the
 * start of the device descriptor, which should work okay regardless of
 * the actual bMaxPacketSize0 value.  For "new scheme" initialization,
 * size will be 64 (and buf will point to a sufficiently large buffer),
 * which might not be kosher according to the USB spec but it's what
 * Windows does and what many devices expect.
 *
 * Returns: bMaxPacketSize0 or a negative error code.
 */
static int get_bMaxPacketSize0(struct usb_device *udev,
                struct usb_device_descriptor *buf, int size, bool first_time)
{
        int i, rc;

        /*
         * Retry on all errors; some devices are flakey.
         * 255 is for WUSB devices, we actually need to use
         * 512 (WUSB1.0[4.8.1]).
         */
        for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) {
                /* Start with invalid values in case the transfer fails */
                buf->bDescriptorType = buf->bMaxPacketSize0 = 0;
                rc = usb_control_msg(udev, usb_rcvaddr0pipe(),
                                USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
                                USB_DT_DEVICE << 8, 0,
                                buf, size,
                                initial_descriptor_timeout);
                switch (buf->bMaxPacketSize0) {
                case 8: case 16: case 32: case 64: case 9:
                        if (buf->bDescriptorType == USB_DT_DEVICE) {
                                rc = buf->bMaxPacketSize0;
                                break;
                        }
                        fallthrough;
                default:
                        if (rc >= 0)
                                rc = -EPROTO;
                        break;
                }

                /*
                 * Some devices time out if they are powered on
                 * when already connected. They need a second
                 * reset, so return early. But only on the first
                 * attempt, lest we get into a time-out/reset loop.
                 */
                if (rc > 0 || (rc == -ETIMEDOUT && first_time &&
                                udev->speed > USB_SPEED_FULL))
                        break;
        }
        return rc;
}

#define GET_DESCRIPTOR_BUFSIZE        64

/* Reset device, (re)assign address, get device descriptor.
 * Device connection must be stable, no more debouncing needed.
 * Returns device in USB_STATE_ADDRESS, except on error.
 *
 * If this is called for an already-existing device (as part of
 * usb_reset_and_verify_device), the caller must own the device lock and
 * the port lock.  For a newly detected device that is not accessible
 * through any global pointers, it's not necessary to lock the device,
 * but it is still necessary to lock the port.
 *
 * For a newly detected device, @dev_descr must be NULL.  The device
 * descriptor retrieved from the device will then be stored in
 * @udev->descriptor.  For an already existing device, @dev_descr
 * must be non-NULL.  The device descriptor will be stored there,
 * not in @udev->descriptor, because descriptors for registered
 * devices are meant to be immutable.
 */
static int
hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
                int retry_counter, struct usb_device_descriptor *dev_descr)
{
        struct usb_device        *hdev = hub->hdev;
        struct usb_hcd                *hcd = bus_to_hcd(hdev->bus);
        struct usb_port                *port_dev = hub->ports[port1 - 1];
        int                        retries, operations, retval, i;
        unsigned                delay = HUB_SHORT_RESET_TIME;
        enum usb_device_speed        oldspeed = udev->speed;
        const char                *speed;
        int                        devnum = udev->devnum;
        const char                *driver_name;
        bool                        do_new_scheme;
        const bool                initial = !dev_descr;
        int                        maxp0;
        struct usb_device_descriptor        *buf, *descr;

        buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO);
        if (!buf)
                return -ENOMEM;

        /* root hub ports have a slightly longer reset period
         * (from USB 2.0 spec, section 7.1.7.5)
         */
        if (!hdev->parent) {
                delay = HUB_ROOT_RESET_TIME;
                if (port1 == hdev->bus->otg_port)
                        hdev->bus->b_hnp_enable = 0;
        }

        /* Some low speed devices have problems with the quick delay, so */
        /*  be a bit pessimistic with those devices. RHbug #23670 */
        if (oldspeed == USB_SPEED_LOW)
                delay = HUB_LONG_RESET_TIME;

        /* Reset the device; full speed may morph to high speed */
        /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
        retval = hub_port_reset(hub, port1, udev, delay, false);
        if (retval < 0)                /* error or disconnect */
                goto fail;
        /* success, speed is known */

        retval = -ENODEV;

        /* Don't allow speed changes at reset, except usb 3.0 to faster */
        if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed &&
            !(oldspeed == USB_SPEED_SUPER && udev->speed > oldspeed)) {
                dev_dbg(&udev->dev, "device reset changed speed!\n");
                goto fail;
        }
        oldspeed = udev->speed;

        if (initial) {
                /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ...
                 * it's fixed size except for full speed devices.
                 */
                switch (udev->speed) {
                case USB_SPEED_SUPER_PLUS:
                case USB_SPEED_SUPER:
                        udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
                        break;
                case USB_SPEED_HIGH:                /* fixed at 64 */
                        udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
                        break;
                case USB_SPEED_FULL:                /* 8, 16, 32, or 64 */
                        /* to determine the ep0 maxpacket size, try to read
                         * the device descriptor to get bMaxPacketSize0 and
                         * then correct our initial guess.
                         */
                        udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64);
                        break;
                case USB_SPEED_LOW:                /* fixed at 8 */
                        udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8);
                        break;
                default:
                        goto fail;
                }
        }

        speed = usb_speed_string(udev->speed);

        /*
         * The controller driver may be NULL if the controller device
         * is the middle device between platform device and roothub.
         * This middle device may not need a device driver due to
         * all hardware control can be at platform device driver, this
         * platform device is usually a dual-role USB controller device.
         */
        if (udev->bus->controller->driver)
                driver_name = udev->bus->controller->driver->name;
        else
                driver_name = udev->bus->sysdev->driver->name;

        if (udev->speed < USB_SPEED_SUPER)
                dev_info(&udev->dev,
                                "%s %s USB device number %d using %s\n",
                                (initial ? "new" : "reset"), speed,
                                devnum, driver_name);

        if (initial) {
                /* Set up TT records, if needed  */
                if (hdev->tt) {
                        udev->tt = hdev->tt;
                        udev->ttport = hdev->ttport;
                } else if (udev->speed != USB_SPEED_HIGH
                                && hdev->speed == USB_SPEED_HIGH) {
                        if (!hub->tt.hub) {
                                dev_err(&udev->dev, "parent hub has no TT\n");
                                retval = -EINVAL;
                                goto fail;
                        }
                        udev->tt = &hub->tt;
                        udev->ttport = port1;
                }
        }

        /* Why interleave GET_DESCRIPTOR and SET_ADDRESS this way?
         * Because device hardware and firmware is sometimes buggy in
         * this area, and this is how Linux has done it for ages.
         * Change it cautiously.
         *
         * NOTE:  If use_new_scheme() is true we will start by issuing
         * a 64-byte GET_DESCRIPTOR request.  This is what Windows does,
         * so it may help with some non-standards-compliant devices.
         * Otherwise we start with SET_ADDRESS and then try to read the
         * first 8 bytes of the device descriptor to get the ep0 maxpacket
         * value.
         */
        do_new_scheme = use_new_scheme(udev, retry_counter, port_dev);

        for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) {
                if (hub_port_stop_enumerate(hub, port1, retries)) {
                        retval = -ENODEV;
                        break;
                }

                if (do_new_scheme) {
                        retval = hub_enable_device(udev);
                        if (retval < 0) {
                                dev_err(&udev->dev,
                                        "hub failed to enable device, error %d\n",
                                        retval);
                                goto fail;
                        }

                        maxp0 = get_bMaxPacketSize0(udev, buf,
                                        GET_DESCRIPTOR_BUFSIZE, retries == 0);
                        if (maxp0 > 0 && !initial &&
                                        maxp0 != udev->descriptor.bMaxPacketSize0) {
                                dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n");
                                retval = -ENODEV;
                                goto fail;
                        }

                        retval = hub_port_reset(hub, port1, udev, delay, false);
                        if (retval < 0)                /* error or disconnect */
                                goto fail;
                        if (oldspeed != udev->speed) {
                                dev_dbg(&udev->dev,
                                        "device reset changed speed!\n");
                                retval = -ENODEV;
                                goto fail;
                        }
                        if (maxp0 < 0) {
                                if (maxp0 != -ENODEV)
                                        dev_err(&udev->dev, "device descriptor read/64, error %d\n",
                                                        maxp0);
                                retval = maxp0;
                                continue;
                        }
                }

                for (operations = 0; operations < SET_ADDRESS_TRIES; ++operations) {
                        retval = hub_set_address(udev, devnum);
                        if (retval >= 0)
                                break;
                        msleep(200);
                }
                if (retval < 0) {
                        if (retval != -ENODEV)
                                dev_err(&udev->dev, "device not accepting address %d, error %d\n",
                                                devnum, retval);
                        goto fail;
                }
                if (udev->speed >= USB_SPEED_SUPER) {
                        devnum = udev->devnum;
                        dev_info(&udev->dev,
                                        "%s SuperSpeed%s%s USB device number %d using %s\n",
                                        (udev->config) ? "reset" : "new",
                                 (udev->speed == USB_SPEED_SUPER_PLUS) ?
                                                " Plus" : "",
                                 (udev->ssp_rate == USB_SSP_GEN_2x2) ?
                                                " Gen 2x2" :
                                 (udev->ssp_rate == USB_SSP_GEN_2x1) ?
                                                " Gen 2x1" :
                                 (udev->ssp_rate == USB_SSP_GEN_1x2) ?
                                                " Gen 1x2" : "",
                                 devnum, driver_name);
                }

                /*
                 * cope with hardware quirkiness:
                 *  - let SET_ADDRESS settle, some device hardware wants it
                 *  - read ep0 maxpacket even for high and low speed,
                 */
                msleep(10);

                if (do_new_scheme)
                        break;

                maxp0 = get_bMaxPacketSize0(udev, buf, 8, retries == 0);
                if (maxp0 < 0) {
                        retval = maxp0;
                        if (retval != -ENODEV)
                                dev_err(&udev->dev,
                                        "device descriptor read/8, error %d\n",
                                        retval);
                } else {
                        u32 delay;

                        if (!initial && maxp0 != udev->descriptor.bMaxPacketSize0) {
                                dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n");
                                retval = -ENODEV;
                                goto fail;
                        }

                        delay = udev->parent->hub_delay;
                        udev->hub_delay = min_t(u32, delay,
                                                USB_TP_TRANSMISSION_DELAY_MAX);
                        retval = usb_set_isoch_delay(udev);
                        if (retval) {
                                dev_dbg(&udev->dev,
                                        "Failed set isoch delay, error %d\n",
                                        retval);
                                retval = 0;
                        }
                        break;
                }
        }
        if (retval)
                goto fail;

        /*
         * Check the ep0 maxpacket guess and correct it if necessary.
         * maxp0 is the value stored in the device descriptor;
         * i is the value it encodes (logarithmic for SuperSpeed or greater).
         */
        i = maxp0;
        if (udev->speed >= USB_SPEED_SUPER) {
                if (maxp0 <= 16)
                        i = 1 << maxp0;
                else
                        i = 0;                /* Invalid */
        }
        if (usb_endpoint_maxp(&udev->ep0.desc) == i) {
                ;        /* Initial ep0 maxpacket guess is right */
        } else if (((udev->speed == USB_SPEED_FULL ||
                                udev->speed == USB_SPEED_HIGH) &&
                        (i == 8 || i == 16 || i == 32 || i == 64)) ||
                        (udev->speed >= USB_SPEED_SUPER && i > 0)) {
                /* Initial guess is wrong; use the descriptor's value */
                if (udev->speed == USB_SPEED_FULL)
                        dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i);
                else
                        dev_warn(&udev->dev, "Using ep0 maxpacket: %d\n", i);
                udev->ep0.desc.wMaxPacketSize = cpu_to_le16(i);
                usb_ep0_reinit(udev);
        } else {
                /* Initial guess is wrong and descriptor's value is invalid */
                dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", maxp0);
                retval = -EMSGSIZE;
                goto fail;
        }

        descr = usb_get_device_descriptor(udev);
        if (IS_ERR(descr)) {
                retval = PTR_ERR(descr);
                if (retval != -ENODEV)
                        dev_err(&udev->dev, "device descriptor read/all, error %d\n",
                                        retval);
                goto fail;
        }
        if (initial)
                udev->descriptor = *descr;
        else
                *dev_descr = *descr;
        kfree(descr);

        /*
         * Some superspeed devices have finished the link training process
         * and attached to a superspeed hub port, but the device descriptor
         * got from those devices show they aren't superspeed devices. Warm
         * reset the port attached by the devices can fix them.
         */
        if ((udev->speed >= USB_SPEED_SUPER) &&
                        (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) {
                dev_err(&udev->dev, "got a wrong device descriptor, warm reset device\n");
                hub_port_reset(hub, port1, udev, HUB_BH_RESET_TIME, true);
                retval = -EINVAL;
                goto fail;
        }

        usb_detect_quirks(udev);

        if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) {
                retval = usb_get_bos_descriptor(udev);
                if (!retval) {
                        udev->lpm_capable = usb_device_supports_lpm(udev);
                        udev->lpm_disable_count = 1;
                        usb_set_lpm_parameters(udev);
                        usb_req_set_sel(udev);
                }
        }

        retval = 0;
        /* notify HCD that we have a device connected and addressed */
        if (hcd->driver->update_device)
                hcd->driver->update_device(hcd, udev);
        hub_set_initial_usb2_lpm_policy(udev);
fail:
        if (retval) {
                hub_port_disable(hub, port1, 0);
                update_devnum(udev, devnum);        /* for disconnect processing */
        }
        kfree(buf);
        return retval;
}

static void
check_highspeed(struct usb_hub *hub, struct usb_device *udev, int port1)
{
        struct usb_qualifier_descriptor        *qual;
        int                                status;

        if (udev->quirks & USB_QUIRK_DEVICE_QUALIFIER)
                return;

        qual = kmalloc(sizeof *qual, GFP_KERNEL);
        if (qual == NULL)
                return;

        status = usb_get_descriptor(udev, USB_DT_DEVICE_QUALIFIER, 0,
                        qual, sizeof *qual);
        if (status == sizeof *qual) {
                dev_info(&udev->dev, "not running at top speed; "
                        "connect to a high speed hub\n");
                /* hub LEDs are probably harder to miss than syslog */
                if (hub->has_indicators) {
                        hub->indicator[port1-1] = INDICATOR_GREEN_BLINK;
                        queue_delayed_work(system_power_efficient_wq,
                                        &hub->leds, 0);
                }
        }
        kfree(qual);
}

static unsigned
hub_power_remaining(struct usb_hub *hub)
{
        struct usb_device *hdev = hub->hdev;
        int remaining;
        int port1;

        if (!hub->limited_power)
                return 0;

        remaining = hdev->bus_mA - hub->descriptor->bHubContrCurrent;
        for (port1 = 1; port1 <= hdev->maxchild; ++port1) {
                struct usb_port *port_dev = hub->ports[port1 - 1];
                struct usb_device *udev = port_dev->child;
                unsigned unit_load;
                int delta;

                if (!udev)
                        continue;
                if (hub_is_superspeed(udev))
                        unit_load = 150;
                else
                        unit_load = 100;

                /*
                 * Unconfigured devices may not use more than one unit load,
                 * or 8mA for OTG ports
                 */
                if (udev->actconfig)
                        delta = usb_get_max_power(udev, udev->actconfig);
                else if (port1 != udev->bus->otg_port || hdev->parent)
                        delta = unit_load;
                else
                        delta = 8;
                if (delta > hub->mA_per_port)
                        dev_warn(&port_dev->dev, "%dmA is over %umA budget!\n",
                                        delta, hub->mA_per_port);
                remaining -= delta;
        }
        if (remaining < 0) {
                dev_warn(hub->intfdev, "%dmA over power budget!\n",
                        -remaining);
                remaining = 0;
        }
        return remaining;
}


static int descriptors_changed(struct usb_device *udev,
                struct usb_device_descriptor *new_device_descriptor,
                struct usb_host_bos *old_bos)
{
        int                changed = 0;
        unsigned        index;
        unsigned        serial_len = 0;
        unsigned        len;
        unsigned        old_length;
        int                length;
        char                *buf;

        if (memcmp(&udev->descriptor, new_device_descriptor,
                        sizeof(*new_device_descriptor)) != 0)
                return 1;

        if ((old_bos && !udev->bos) || (!old_bos && udev->bos))
                return 1;
        if (udev->bos) {
                len = le16_to_cpu(udev->bos->desc->wTotalLength);
                if (len != le16_to_cpu(old_bos->desc->wTotalLength))
                        return 1;
                if (memcmp(udev->bos->desc, old_bos->desc, len))
                        return 1;
        }

        /* Since the idVendor, idProduct, and bcdDevice values in the
         * device descriptor haven't changed, we will assume the
         * Manufacturer and Product strings haven't changed either.
         * But the SerialNumber string could be different (e.g., a
         * different flash card of the same brand).
         */
        if (udev->serial)
                serial_len = strlen(udev->serial) + 1;

        len = serial_len;
        for (index = 0; index < udev->descriptor.bNumConfigurations; index++) {
                old_length = le16_to_cpu(udev->config[index].desc.wTotalLength);
                len = max(len, old_length);
        }

        buf = kmalloc(len, GFP_NOIO);
        if (!buf)
                /* assume the worst */
                return 1;

        for (index = 0; index < udev->descriptor.bNumConfigurations; index++) {
                old_length = le16_to_cpu(udev->config[index].desc.wTotalLength);
                length = usb_get_descriptor(udev, USB_DT_CONFIG, index, buf,
                                old_length);
                if (length != old_length) {
                        dev_dbg(&udev->dev, "config index %d, error %d\n",
                                        index, length);
                        changed = 1;
                        break;
                }
                if (memcmp(buf, udev->rawdescriptors[index], old_length)
                                != 0) {
                        dev_dbg(&udev->dev, "config index %d changed (#%d)\n",
                                index,
                                ((struct usb_config_descriptor *) buf)->
                                        bConfigurationValue);
                        changed = 1;
                        break;
                }
        }

        if (!changed && serial_len) {
                length = usb_string(udev, udev->descriptor.iSerialNumber,
                                buf, serial_len);
                if (length + 1 != serial_len) {
                        dev_dbg(&udev->dev, "serial string error %d\n",
                                        length);
                        changed = 1;
                } else if (memcmp(buf, udev->serial, length) != 0) {
                        dev_dbg(&udev->dev, "serial string changed\n");
                        changed = 1;
                }
        }

        kfree(buf);
        return changed;
}

static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
                u16 portchange)
{
        int status = -ENODEV;
        int i;
        unsigned unit_load;
        struct usb_device *hdev = hub->hdev;
        struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_device *udev = port_dev->child;
        static int unreliable_port = -1;
        bool retry_locked;

        /* Disconnect any existing devices under this port */
        if (udev) {
                if (hcd->usb_phy && !hdev->parent)
                        usb_phy_notify_disconnect(hcd->usb_phy, udev->speed);
                usb_disconnect(&port_dev->child);
        }

        /* We can forget about a "removed" device when there's a physical
         * disconnect or the connect status changes.
         */
        if (!(portstatus & USB_PORT_STAT_CONNECTION) ||
                        (portchange & USB_PORT_STAT_C_CONNECTION))
                clear_bit(port1, hub->removed_bits);

        if (portchange & (USB_PORT_STAT_C_CONNECTION |
                                USB_PORT_STAT_C_ENABLE)) {
                status = hub_port_debounce_be_stable(hub, port1);
                if (status < 0) {
                        if (status != -ENODEV &&
                                port1 != unreliable_port &&
                                printk_ratelimit())
                                dev_err(&port_dev->dev, "connect-debounce failed\n");
                        portstatus &= ~USB_PORT_STAT_CONNECTION;
                        unreliable_port = port1;
                } else {
                        portstatus = status;
                }
        }

        /* Return now if debouncing failed or nothing is connected or
         * the device was "removed".
         */
        if (!(portstatus & USB_PORT_STAT_CONNECTION) ||
                        test_bit(port1, hub->removed_bits)) {

                /*
                 * maybe switch power back on (e.g. root hub was reset)
                 * but only if the port isn't owned by someone else.
                 */
                if (hub_is_port_power_switchable(hub)
                                && !usb_port_is_power_on(hub, portstatus)
                                && !port_dev->port_owner)
                        set_port_feature(hdev, port1, USB_PORT_FEAT_POWER);

                if (portstatus & USB_PORT_STAT_ENABLE)
                        goto done;
                return;
        }
        if (hub_is_superspeed(hub->hdev))
                unit_load = 150;
        else
                unit_load = 100;

        status = 0;

        for (i = 0; i < PORT_INIT_TRIES; i++) {
                if (hub_port_stop_enumerate(hub, port1, i)) {
                        status = -ENODEV;
                        break;
                }

                usb_lock_port(port_dev);
                mutex_lock(hcd->address0_mutex);
                retry_locked = true;
                /* reallocate for each attempt, since references
                 * to the previous one can escape in various ways
                 */
                udev = usb_alloc_dev(hdev, hdev->bus, port1);
                if (!udev) {
                        dev_err(&port_dev->dev,
                                        "couldn't allocate usb_device\n");
                        mutex_unlock(hcd->address0_mutex);
                        usb_unlock_port(port_dev);
                        goto done;
                }

                usb_set_device_state(udev, USB_STATE_POWERED);
                udev->bus_mA = hub->mA_per_port;
                udev->level = hdev->level + 1;

                /* Devices connected to SuperSpeed hubs are USB 3.0 or later */
                if (hub_is_superspeed(hub->hdev))
                        udev->speed = USB_SPEED_SUPER;
                else
                        udev->speed = USB_SPEED_UNKNOWN;

                choose_devnum(udev);
                if (udev->devnum <= 0) {
                        status = -ENOTCONN;        /* Don't retry */
                        goto loop;
                }

                /* reset (non-USB 3.0 devices) and get descriptor */
                status = hub_port_init(hub, udev, port1, i, NULL);
                if (status < 0)
                        goto loop;

                mutex_unlock(hcd->address0_mutex);
                usb_unlock_port(port_dev);
                retry_locked = false;

                if (udev->quirks & USB_QUIRK_DELAY_INIT)
                        msleep(2000);

                /* consecutive bus-powered hubs aren't reliable; they can
                 * violate the voltage drop budget.  if the new child has
                 * a "powered" LED, users should notice we didn't enable it
                 * (without reading syslog), even without per-port LEDs
                 * on the parent.
                 */
                if (udev->descriptor.bDeviceClass == USB_CLASS_HUB
                                && udev->bus_mA <= unit_load) {
                        u16        devstat;

                        status = usb_get_std_status(udev, USB_RECIP_DEVICE, 0,
                                        &devstat);
                        if (status) {
                                dev_dbg(&udev->dev, "get status %d ?\n", status);
                                goto loop_disable;
                        }
                        if ((devstat & (1 << USB_DEVICE_SELF_POWERED)) == 0) {
                                dev_err(&udev->dev,
                                        "can't connect bus-powered hub "
                                        "to this port\n");
                                if (hub->has_indicators) {
                                        hub->indicator[port1-1] =
                                                INDICATOR_AMBER_BLINK;
                                        queue_delayed_work(
                                                system_power_efficient_wq,
                                                &hub->leds, 0);
                                }
                                status = -ENOTCONN;        /* Don't retry */
                                goto loop_disable;
                        }
                }

                /* check for devices running slower than they could */
                if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0200
                                && udev->speed == USB_SPEED_FULL
                                && highspeed_hubs != 0)
                        check_highspeed(hub, udev, port1);

                /* Store the parent's children[] pointer.  At this point
                 * udev becomes globally accessible, although presumably
                 * no one will look at it until hdev is unlocked.
                 */
                status = 0;

                mutex_lock(&usb_port_peer_mutex);

                /* We mustn't add new devices if the parent hub has
                 * been disconnected; we would race with the
                 * recursively_mark_NOTATTACHED() routine.
                 */
                spin_lock_irq(&device_state_lock);
                if (hdev->state == USB_STATE_NOTATTACHED)
                        status = -ENOTCONN;
                else
                        port_dev->child = udev;
                spin_unlock_irq(&device_state_lock);
                mutex_unlock(&usb_port_peer_mutex);

                /* Run it through the hoops (find a driver, etc) */
                if (!status) {
                        status = usb_new_device(udev);
                        if (status) {
                                mutex_lock(&usb_port_peer_mutex);
                                spin_lock_irq(&device_state_lock);
                                port_dev->child = NULL;
                                spin_unlock_irq(&device_state_lock);
                                mutex_unlock(&usb_port_peer_mutex);
                        } else {
                                if (hcd->usb_phy && !hdev->parent)
                                        usb_phy_notify_connect(hcd->usb_phy,
                                                        udev->speed);
                        }
                }

                if (status)
                        goto loop_disable;

                status = hub_power_remaining(hub);
                if (status)
                        dev_dbg(hub->intfdev, "%dmA power budget left\n", status);

                return;

loop_disable:
                hub_port_disable(hub, port1, 1);
loop:
                usb_ep0_reinit(udev);
                release_devnum(udev);
                hub_free_dev(udev);
                if (retry_locked) {
                        mutex_unlock(hcd->address0_mutex);
                        usb_unlock_port(port_dev);
                }
                usb_put_dev(udev);
                if ((status == -ENOTCONN) || (status == -ENOTSUPP))
                        break;

                /* When halfway through our retry count, power-cycle the port */
                if (i == (PORT_INIT_TRIES - 1) / 2) {
                        dev_info(&port_dev->dev, "attempt power cycle\n");
                        usb_hub_set_port_power(hdev, hub, port1, false);
                        msleep(2 * hub_power_on_good_delay(hub));
                        usb_hub_set_port_power(hdev, hub, port1, true);
                        msleep(hub_power_on_good_delay(hub));
                }
        }
        if (hub->hdev->parent ||
                        !hcd->driver->port_handed_over ||
                        !(hcd->driver->port_handed_over)(hcd, port1)) {
                if (status != -ENOTCONN && status != -ENODEV)
                        dev_err(&port_dev->dev,
                                        "unable to enumerate USB device\n");
        }

done:
        hub_port_disable(hub, port1, 1);
        if (hcd->driver->relinquish_port && !hub->hdev->parent) {
                if (status != -ENOTCONN && status != -ENODEV)
                        hcd->driver->relinquish_port(hcd, port1);
        }
}

/* Handle physical or logical connection change events.
 * This routine is called when:
 *        a port connection-change occurs;
 *        a port enable-change occurs (often caused by EMI);
 *        usb_reset_and_verify_device() encounters changed descriptors (as from
 *                a firmware download)
 * caller already locked the hub
 */
static void hub_port_connect_change(struct usb_hub *hub, int port1,
                                        u16 portstatus, u16 portchange)
                __must_hold(&port_dev->status_lock)
{
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_device *udev = port_dev->child;
        struct usb_device_descriptor *descr;
        int status = -ENODEV;

        dev_dbg(&port_dev->dev, "status %04x, change %04x, %s\n", portstatus,
                        portchange, portspeed(hub, portstatus));

        if (hub->has_indicators) {
                set_port_led(hub, port1, HUB_LED_AUTO);
                hub->indicator[port1-1] = INDICATOR_AUTO;
        }

#ifdef        CONFIG_USB_OTG
        /* during HNP, don't repeat the debounce */
        if (hub->hdev->bus->is_b_host)
                portchange &= ~(USB_PORT_STAT_C_CONNECTION |
                                USB_PORT_STAT_C_ENABLE);
#endif

        /* Try to resuscitate an existing device */
        if ((portstatus & USB_PORT_STAT_CONNECTION) && udev &&
                        udev->state != USB_STATE_NOTATTACHED) {
                if (portstatus & USB_PORT_STAT_ENABLE) {
                        /*
                         * USB-3 connections are initialized automatically by
                         * the hostcontroller hardware. Therefore check for
                         * changed device descriptors before resuscitating the
                         * device.
                         */
                        descr = usb_get_device_descriptor(udev);
                        if (IS_ERR(descr)) {
                                dev_dbg(&udev->dev,
                                                "can't read device descriptor %ld\n",
                                                PTR_ERR(descr));
                        } else {
                                if (descriptors_changed(udev, descr,
                                                udev->bos)) {
                                        dev_dbg(&udev->dev,
                                                        "device descriptor has changed\n");
                                } else {
                                        status = 0; /* Nothing to do */
                                }
                                kfree(descr);
                        }
#ifdef CONFIG_PM
                } else if (udev->state == USB_STATE_SUSPENDED &&
                                udev->persist_enabled) {
                        /* For a suspended device, treat this as a
                         * remote wakeup event.
                         */
                        usb_unlock_port(port_dev);
                        status = usb_remote_wakeup(udev);
                        usb_lock_port(port_dev);
#endif
                } else {
                        /* Don't resuscitate */;
                }
        }
        clear_bit(port1, hub->change_bits);

        /* successfully revalidated the connection */
        if (status == 0)
                return;

        usb_unlock_port(port_dev);
        hub_port_connect(hub, port1, portstatus, portchange);
        usb_lock_port(port_dev);
}

/* Handle notifying userspace about hub over-current events */
static void port_over_current_notify(struct usb_port *port_dev)
{
        char *envp[3] = { NULL, NULL, NULL };
        struct device *hub_dev;
        char *port_dev_path;

        sysfs_notify(&port_dev->dev.kobj, NULL, "over_current_count");

        hub_dev = port_dev->dev.parent;

        if (!hub_dev)
                return;

        port_dev_path = kobject_get_path(&port_dev->dev.kobj, GFP_KERNEL);
        if (!port_dev_path)
                return;

        envp[0] = kasprintf(GFP_KERNEL, "OVER_CURRENT_PORT=%s", port_dev_path);
        if (!envp[0])
                goto exit;

        envp[1] = kasprintf(GFP_KERNEL, "OVER_CURRENT_COUNT=%u",
                        port_dev->over_current_count);
        if (!envp[1])
                goto exit;

        kobject_uevent_env(&hub_dev->kobj, KOBJ_CHANGE, envp);

exit:
        kfree(envp[1]);
        kfree(envp[0]);
        kfree(port_dev_path);
}

static void port_event(struct usb_hub *hub, int port1)
                __must_hold(&port_dev->status_lock)
{
        int connect_change;
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_device *udev = port_dev->child;
        struct usb_device *hdev = hub->hdev;
        u16 portstatus, portchange;
        int i = 0;

        connect_change = test_bit(port1, hub->change_bits);
        clear_bit(port1, hub->event_bits);
        clear_bit(port1, hub->wakeup_bits);

        if (usb_hub_port_status(hub, port1, &portstatus, &portchange) < 0)
                return;

        if (portchange & USB_PORT_STAT_C_CONNECTION) {
                usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION);
                connect_change = 1;
        }

        if (portchange & USB_PORT_STAT_C_ENABLE) {
                if (!connect_change)
                        dev_dbg(&port_dev->dev, "enable change, status %08x\n",
                                        portstatus);
                usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE);

                /*
                 * EM interference sometimes causes badly shielded USB devices
                 * to be shutdown by the hub, this hack enables them again.
                 * Works at least with mouse driver.
                 */
                if (!(portstatus & USB_PORT_STAT_ENABLE)
                    && !connect_change && udev) {
                        dev_err(&port_dev->dev, "disabled by hub (EMI?), re-enabling...\n");
                        connect_change = 1;
                }
        }

        if (portchange & USB_PORT_STAT_C_OVERCURRENT) {
                u16 status = 0, unused;
                port_dev->over_current_count++;
                port_over_current_notify(port_dev);

                dev_dbg(&port_dev->dev, "over-current change #%u\n",
                        port_dev->over_current_count);
                usb_clear_port_feature(hdev, port1,
                                USB_PORT_FEAT_C_OVER_CURRENT);
                msleep(100);        /* Cool down */
                hub_power_on(hub, true);
                usb_hub_port_status(hub, port1, &status, &unused);
                if (status & USB_PORT_STAT_OVERCURRENT)
                        dev_err(&port_dev->dev, "over-current condition\n");
        }

        if (portchange & USB_PORT_STAT_C_RESET) {
                dev_dbg(&port_dev->dev, "reset change\n");
                usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_RESET);
        }
        if ((portchange & USB_PORT_STAT_C_BH_RESET)
            && hub_is_superspeed(hdev)) {
                dev_dbg(&port_dev->dev, "warm reset change\n");
                usb_clear_port_feature(hdev, port1,
                                USB_PORT_FEAT_C_BH_PORT_RESET);
        }
        if (portchange & USB_PORT_STAT_C_LINK_STATE) {
                dev_dbg(&port_dev->dev, "link state change\n");
                usb_clear_port_feature(hdev, port1,
                                USB_PORT_FEAT_C_PORT_LINK_STATE);
        }
        if (portchange & USB_PORT_STAT_C_CONFIG_ERROR) {
                dev_warn(&port_dev->dev, "config error\n");
                usb_clear_port_feature(hdev, port1,
                                USB_PORT_FEAT_C_PORT_CONFIG_ERROR);
        }

        /* skip port actions that require the port to be powered on */
        if (!pm_runtime_active(&port_dev->dev))
                return;

        /* skip port actions if ignore_event and early_stop are true */
        if (port_dev->ignore_event && port_dev->early_stop)
                return;

        if (hub_handle_remote_wakeup(hub, port1, portstatus, portchange))
                connect_change = 1;

        /*
         * Avoid trying to recover a USB3 SS.Inactive port with a warm reset if
         * the device was disconnected. A 12ms disconnect detect timer in
         * SS.Inactive state transitions the port to RxDetect automatically.
         * SS.Inactive link error state is common during device disconnect.
         */
        while (hub_port_warm_reset_required(hub, port1, portstatus)) {
                if ((i++ < DETECT_DISCONNECT_TRIES) && udev) {
                        u16 unused;

                        msleep(20);
                        usb_hub_port_status(hub, port1, &portstatus, &unused);
                        dev_dbg(&port_dev->dev, "Wait for inactive link disconnect detect\n");
                        continue;
                } else if (!udev || !(portstatus & USB_PORT_STAT_CONNECTION)
                                || udev->state == USB_STATE_NOTATTACHED) {
                        dev_dbg(&port_dev->dev, "do warm reset, port only\n");
                        if (hub_port_reset(hub, port1, NULL,
                                        HUB_BH_RESET_TIME, true) < 0)
                                hub_port_disable(hub, port1, 1);
                } else {
                        dev_dbg(&port_dev->dev, "do warm reset, full device\n");
                        usb_unlock_port(port_dev);
                        usb_lock_device(udev);
                        usb_reset_device(udev);
                        usb_unlock_device(udev);
                        usb_lock_port(port_dev);
                        connect_change = 0;
                }
                break;
        }

        if (connect_change)
                hub_port_connect_change(hub, port1, portstatus, portchange);
}

static void hub_event(struct work_struct *work)
{
        struct usb_device *hdev;
        struct usb_interface *intf;
        struct usb_hub *hub;
        struct device *hub_dev;
        u16 hubstatus;
        u16 hubchange;
        int i, ret;

        hub = container_of(work, struct usb_hub, events);
        hdev = hub->hdev;
        hub_dev = hub->intfdev;
        intf = to_usb_interface(hub_dev);

        kcov_remote_start_usb((u64)hdev->bus->busnum);

        dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n",
                        hdev->state, hdev->maxchild,
                        /* NOTE: expects max 15 ports... */
                        (u16) hub->change_bits[0],
                        (u16) hub->event_bits[0]);

        /* Lock the device, then check to see if we were
         * disconnected while waiting for the lock to succeed. */
        usb_lock_device(hdev);
        if (unlikely(hub->disconnected))
                goto out_hdev_lock;

        /* If the hub has died, clean up after it */
        if (hdev->state == USB_STATE_NOTATTACHED) {
                hub->error = -ENODEV;
                hub_quiesce(hub, HUB_DISCONNECT);
                goto out_hdev_lock;
        }

        /* Autoresume */
        ret = usb_autopm_get_interface(intf);
        if (ret) {
                dev_dbg(hub_dev, "Can't autoresume: %d\n", ret);
                goto out_hdev_lock;
        }

        /* If this is an inactive hub, do nothing */
        if (hub->quiescing)
                goto out_autopm;

        if (hub->error) {
                dev_dbg(hub_dev, "resetting for error %d\n", hub->error);

                ret = usb_reset_device(hdev);
                if (ret) {
                        dev_dbg(hub_dev, "error resetting hub: %d\n", ret);
                        goto out_autopm;
                }

                hub->nerrors = 0;
                hub->error = 0;
        }

        /* deal with port status changes */
        for (i = 1; i <= hdev->maxchild; i++) {
                struct usb_port *port_dev = hub->ports[i - 1];

                if (test_bit(i, hub->event_bits)
                                || test_bit(i, hub->change_bits)
                                || test_bit(i, hub->wakeup_bits)) {
                        /*
                         * The get_noresume and barrier ensure that if
                         * the port was in the process of resuming, we
                         * flush that work and keep the port active for
                         * the duration of the port_event().  However,
                         * if the port is runtime pm suspended
                         * (powered-off), we leave it in that state, run
                         * an abbreviated port_event(), and move on.
                         */
                        pm_runtime_get_noresume(&port_dev->dev);
                        pm_runtime_barrier(&port_dev->dev);
                        usb_lock_port(port_dev);
                        port_event(hub, i);
                        usb_unlock_port(port_dev);
                        pm_runtime_put_sync(&port_dev->dev);
                }
        }

        /* deal with hub status changes */
        if (test_and_clear_bit(0, hub->event_bits) == 0)
                ;        /* do nothing */
        else if (hub_hub_status(hub, &hubstatus, &hubchange) < 0)
                dev_err(hub_dev, "get_hub_status failed\n");
        else {
                if (hubchange & HUB_CHANGE_LOCAL_POWER) {
                        dev_dbg(hub_dev, "power change\n");
                        clear_hub_feature(hdev, C_HUB_LOCAL_POWER);
                        if (hubstatus & HUB_STATUS_LOCAL_POWER)
                                /* FIXME: Is this always true? */
                                hub->limited_power = 1;
                        else
                                hub->limited_power = 0;
                }
                if (hubchange & HUB_CHANGE_OVERCURRENT) {
                        u16 status = 0;
                        u16 unused;

                        dev_dbg(hub_dev, "over-current change\n");
                        clear_hub_feature(hdev, C_HUB_OVER_CURRENT);
                        msleep(500);        /* Cool down */
                        hub_power_on(hub, true);
                        hub_hub_status(hub, &status, &unused);
                        if (status & HUB_STATUS_OVERCURRENT)
                                dev_err(hub_dev, "over-current condition\n");
                }
        }

out_autopm:
        /* Balance the usb_autopm_get_interface() above */
        usb_autopm_put_interface_no_suspend(intf);
out_hdev_lock:
        usb_unlock_device(hdev);

        /* Balance the stuff in kick_hub_wq() and allow autosuspend */
        usb_autopm_put_interface(intf);
        hub_put(hub);

        kcov_remote_stop();
}

static const struct usb_device_id hub_id_table[] = {
    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                   | USB_DEVICE_ID_MATCH_PRODUCT
                   | USB_DEVICE_ID_MATCH_INT_CLASS,
      .idVendor = USB_VENDOR_SMSC,
      .idProduct = USB_PRODUCT_USB5534B,
      .bInterfaceClass = USB_CLASS_HUB,
      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                   | USB_DEVICE_ID_MATCH_PRODUCT,
      .idVendor = USB_VENDOR_CYPRESS,
      .idProduct = USB_PRODUCT_CY7C65632,
      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_INT_CLASS,
      .idVendor = USB_VENDOR_GENESYS_LOGIC,
      .bInterfaceClass = USB_CLASS_HUB,
      .driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND},
    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_PRODUCT,
      .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
      .idProduct = USB_PRODUCT_TUSB8041_USB2,
      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_PRODUCT,
      .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS,
      .idProduct = USB_PRODUCT_TUSB8041_USB3,
      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
        { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_PRODUCT,
          .idVendor = USB_VENDOR_MICROCHIP,
          .idProduct = USB_PRODUCT_USB4913,
          .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL},
        { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_PRODUCT,
          .idVendor = USB_VENDOR_MICROCHIP,
          .idProduct = USB_PRODUCT_USB4914,
          .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL},
        { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_PRODUCT,
          .idVendor = USB_VENDOR_MICROCHIP,
          .idProduct = USB_PRODUCT_USB4915,
          .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL},
    { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS,
      .bDeviceClass = USB_CLASS_HUB},
    { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS,
      .bInterfaceClass = USB_CLASS_HUB},
    { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, hub_id_table);

static struct usb_driver hub_driver = {
        .name =                "hub",
        .probe =        hub_probe,
        .disconnect =        hub_disconnect,
        .suspend =        hub_suspend,
        .resume =        hub_resume,
        .reset_resume =        hub_reset_resume,
        .pre_reset =        hub_pre_reset,
        .post_reset =        hub_post_reset,
        .unlocked_ioctl = hub_ioctl,
        .id_table =        hub_id_table,
        .supports_autosuspend =        1,
};

int usb_hub_init(void)
{
        if (usb_register(&hub_driver) < 0) {
                printk(KERN_ERR "%s: can't register hub driver\n",
                        usbcore_name);
                return -1;
        }

        /*
         * The workqueue needs to be freezable to avoid interfering with
         * USB-PERSIST port handover. Otherwise it might see that a full-speed
         * device was gone before the EHCI controller had handed its port
         * over to the companion full-speed controller.
         */
        hub_wq = alloc_workqueue("usb_hub_wq", WQ_FREEZABLE, 0);
        if (hub_wq)
                return 0;

        /* Fall through if kernel_thread failed */
        usb_deregister(&hub_driver);
        pr_err("%s: can't allocate workqueue for usb hub\n", usbcore_name);

        return -1;
}

void usb_hub_cleanup(void)
{
        destroy_workqueue(hub_wq);

        /*
         * Hub resources are freed for us by usb_deregister. It calls
         * usb_driver_purge on every device which in turn calls that
         * devices disconnect function if it is using this driver.
         * The hub_disconnect function takes care of releasing the
         * individual hub resources. -greg
         */
        usb_deregister(&hub_driver);
} /* usb_hub_cleanup() */

/**
 * usb_reset_and_verify_device - perform a USB port reset to reinitialize a device
 * @udev: device to reset (not in SUSPENDED or NOTATTACHED state)
 *
 * WARNING - don't use this routine to reset a composite device
 * (one with multiple interfaces owned by separate drivers)!
 * Use usb_reset_device() instead.
 *
 * Do a port reset, reassign the device's address, and establish its
 * former operating configuration.  If the reset fails, or the device's
 * descriptors change from their values before the reset, or the original
 * configuration and altsettings cannot be restored, a flag will be set
 * telling hub_wq to pretend the device has been disconnected and then
 * re-connected.  All drivers will be unbound, and the device will be
 * re-enumerated and probed all over again.
 *
 * Return: 0 if the reset succeeded, -ENODEV if the device has been
 * flagged for logical disconnection, or some other negative error code
 * if the reset wasn't even attempted.
 *
 * Note:
 * The caller must own the device lock and the port lock, the latter is
 * taken by usb_reset_device().  For example, it's safe to use
 * usb_reset_device() from a driver probe() routine after downloading
 * new firmware.  For calls that might not occur during probe(), drivers
 * should lock the device using usb_lock_device_for_reset().
 *
 * Locking exception: This routine may also be called from within an
 * autoresume handler.  Such usage won't conflict with other tasks
 * holding the device lock because these tasks should always call
 * usb_autopm_resume_device(), thereby preventing any unwanted
 * autoresume.  The autoresume handler is expected to have already
 * acquired the port lock before calling this routine.
 */
static int usb_reset_and_verify_device(struct usb_device *udev)
{
        struct usb_device                *parent_hdev = udev->parent;
        struct usb_hub                        *parent_hub;
        struct usb_hcd                        *hcd = bus_to_hcd(udev->bus);
        struct usb_device_descriptor        descriptor;
        struct usb_host_bos                *bos;
        int                                i, j, ret = 0;
        int                                port1 = udev->portnum;

        if (udev->state == USB_STATE_NOTATTACHED ||
                        udev->state == USB_STATE_SUSPENDED) {
                dev_dbg(&udev->dev, "device reset not allowed in state %d\n",
                                udev->state);
                return -EINVAL;
        }

        if (!parent_hdev)
                return -EISDIR;

        parent_hub = usb_hub_to_struct_hub(parent_hdev);

        /* Disable USB2 hardware LPM.
         * It will be re-enabled by the enumeration process.
         */
        usb_disable_usb2_hardware_lpm(udev);

        bos = udev->bos;
        udev->bos = NULL;

        mutex_lock(hcd->address0_mutex);

        for (i = 0; i < PORT_INIT_TRIES; ++i) {
                if (hub_port_stop_enumerate(parent_hub, port1, i)) {
                        ret = -ENODEV;
                        break;
                }

                /* ep0 maxpacket size may change; let the HCD know about it.
                 * Other endpoints will be handled by re-enumeration. */
                usb_ep0_reinit(udev);
                ret = hub_port_init(parent_hub, udev, port1, i, &descriptor);
                if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV)
                        break;
        }
        mutex_unlock(hcd->address0_mutex);

        if (ret < 0)
                goto re_enumerate;

        /* Device might have changed firmware (DFU or similar) */
        if (descriptors_changed(udev, &descriptor, bos)) {
                dev_info(&udev->dev, "device firmware changed\n");
                goto re_enumerate;
        }

        /* Restore the device's previous configuration */
        if (!udev->actconfig)
                goto done;

        mutex_lock(hcd->bandwidth_mutex);
        ret = usb_hcd_alloc_bandwidth(udev, udev->actconfig, NULL, NULL);
        if (ret < 0) {
                dev_warn(&udev->dev,
                                "Busted HC?  Not enough HCD resources for "
                                "old configuration.\n");
                mutex_unlock(hcd->bandwidth_mutex);
                goto re_enumerate;
        }
        ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
                        USB_REQ_SET_CONFIGURATION, 0,
                        udev->actconfig->desc.bConfigurationValue, 0,
                        NULL, 0, USB_CTRL_SET_TIMEOUT);
        if (ret < 0) {
                dev_err(&udev->dev,
                        "can't restore configuration #%d (error=%d)\n",
                        udev->actconfig->desc.bConfigurationValue, ret);
                mutex_unlock(hcd->bandwidth_mutex);
                goto re_enumerate;
        }
        mutex_unlock(hcd->bandwidth_mutex);
        usb_set_device_state(udev, USB_STATE_CONFIGURED);

        /* Put interfaces back into the same altsettings as before.
         * Don't bother to send the Set-Interface request for interfaces
         * that were already in altsetting 0; besides being unnecessary,
         * many devices can't handle it.  Instead just reset the host-side
         * endpoint state.
         */
        for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
                struct usb_host_config *config = udev->actconfig;
                struct usb_interface *intf = config->interface[i];
                struct usb_interface_descriptor *desc;

                desc = &intf->cur_altsetting->desc;
                if (desc->bAlternateSetting == 0) {
                        usb_disable_interface(udev, intf, true);
                        usb_enable_interface(udev, intf, true);
                        ret = 0;
                } else {
                        /* Let the bandwidth allocation function know that this
                         * device has been reset, and it will have to use
                         * alternate setting 0 as the current alternate setting.
                         */
                        intf->resetting_device = 1;
                        ret = usb_set_interface(udev, desc->bInterfaceNumber,
                                        desc->bAlternateSetting);
                        intf->resetting_device = 0;
                }
                if (ret < 0) {
                        dev_err(&udev->dev, "failed to restore interface %d "
                                "altsetting %d (error=%d)\n",
                                desc->bInterfaceNumber,
                                desc->bAlternateSetting,
                                ret);
                        goto re_enumerate;
                }
                /* Resetting also frees any allocated streams */
                for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++)
                        intf->cur_altsetting->endpoint[j].streams = 0;
        }

done:
        /* Now that the alt settings are re-installed, enable LTM and LPM. */
        usb_enable_usb2_hardware_lpm(udev);
        usb_unlocked_enable_lpm(udev);
        usb_enable_ltm(udev);
        usb_release_bos_descriptor(udev);
        udev->bos = bos;
        return 0;

re_enumerate:
        usb_release_bos_descriptor(udev);
        udev->bos = bos;
        hub_port_logical_disconnect(parent_hub, port1);
        return -ENODEV;
}

/**
 * usb_reset_device - warn interface drivers and perform a USB port reset
 * @udev: device to reset (not in NOTATTACHED state)
 *
 * Warns all drivers bound to registered interfaces (using their pre_reset
 * method), performs the port reset, and then lets the drivers know that
 * the reset is over (using their post_reset method).
 *
 * Return: The same as for usb_reset_and_verify_device().
 * However, if a reset is already in progress (for instance, if a
 * driver doesn't have pre_reset() or post_reset() callbacks, and while
 * being unbound or re-bound during the ongoing reset its disconnect()
 * or probe() routine tries to perform a second, nested reset), the
 * routine returns -EINPROGRESS.
 *
 * Note:
 * The caller must own the device lock.  For example, it's safe to use
 * this from a driver probe() routine after downloading new firmware.
 * For calls that might not occur during probe(), drivers should lock
 * the device using usb_lock_device_for_reset().
 *
 * If an interface is currently being probed or disconnected, we assume
 * its driver knows how to handle resets.  For all other interfaces,
 * if the driver doesn't have pre_reset and post_reset methods then
 * we attempt to unbind it and rebind afterward.
 */
int usb_reset_device(struct usb_device *udev)
{
        int ret;
        int i;
        unsigned int noio_flag;
        struct usb_port *port_dev;
        struct usb_host_config *config = udev->actconfig;
        struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);

        if (udev->state == USB_STATE_NOTATTACHED) {
                dev_dbg(&udev->dev, "device reset not allowed in state %d\n",
                                udev->state);
                return -EINVAL;
        }

        if (!udev->parent) {
                /* this requires hcd-specific logic; see ohci_restart() */
                dev_dbg(&udev->dev, "%s for root hub!\n", __func__);
                return -EISDIR;
        }

        if (udev->reset_in_progress)
                return -EINPROGRESS;
        udev->reset_in_progress = 1;

        port_dev = hub->ports[udev->portnum - 1];

        /*
         * Don't allocate memory with GFP_KERNEL in current
         * context to avoid possible deadlock if usb mass
         * storage interface or usbnet interface(iSCSI case)
         * is included in current configuration. The easist
         * approach is to do it for every device reset,
         * because the device 'memalloc_noio' flag may have
         * not been set before reseting the usb device.
         */
        noio_flag = memalloc_noio_save();

        /* Prevent autosuspend during the reset */
        usb_autoresume_device(udev);

        if (config) {
                for (i = 0; i < config->desc.bNumInterfaces; ++i) {
                        struct usb_interface *cintf = config->interface[i];
                        struct usb_driver *drv;
                        int unbind = 0;

                        if (cintf->dev.driver) {
                                drv = to_usb_driver(cintf->dev.driver);
                                if (drv->pre_reset && drv->post_reset)
                                        unbind = (drv->pre_reset)(cintf);
                                else if (cintf->condition ==
                                                USB_INTERFACE_BOUND)
                                        unbind = 1;
                                if (unbind)
                                        usb_forced_unbind_intf(cintf);
                        }
                }
        }

        usb_lock_port(port_dev);
        ret = usb_reset_and_verify_device(udev);
        usb_unlock_port(port_dev);

        if (config) {
                for (i = config->desc.bNumInterfaces - 1; i >= 0; --i) {
                        struct usb_interface *cintf = config->interface[i];
                        struct usb_driver *drv;
                        int rebind = cintf->needs_binding;

                        if (!rebind && cintf->dev.driver) {
                                drv = to_usb_driver(cintf->dev.driver);
                                if (drv->post_reset)
                                        rebind = (drv->post_reset)(cintf);
                                else if (cintf->condition ==
                                                USB_INTERFACE_BOUND)
                                        rebind = 1;
                                if (rebind)
                                        cintf->needs_binding = 1;
                        }
                }

                /* If the reset failed, hub_wq will unbind drivers later */
                if (ret == 0)
                        usb_unbind_and_rebind_marked_interfaces(udev);
        }

        usb_autosuspend_device(udev);
        memalloc_noio_restore(noio_flag);
        udev->reset_in_progress = 0;
        return ret;
}
EXPORT_SYMBOL_GPL(usb_reset_device);


/**
 * usb_queue_reset_device - Reset a USB device from an atomic context
 * @iface: USB interface belonging to the device to reset
 *
 * This function can be used to reset a USB device from an atomic
 * context, where usb_reset_device() won't work (as it blocks).
 *
 * Doing a reset via this method is functionally equivalent to calling
 * usb_reset_device(), except for the fact that it is delayed to a
 * workqueue. This means that any drivers bound to other interfaces
 * might be unbound, as well as users from usbfs in user space.
 *
 * Corner cases:
 *
 * - Scheduling two resets at the same time from two different drivers
 *   attached to two different interfaces of the same device is
 *   possible; depending on how the driver attached to each interface
 *   handles ->pre_reset(), the second reset might happen or not.
 *
 * - If the reset is delayed so long that the interface is unbound from
 *   its driver, the reset will be skipped.
 *
 * - This function can be called during .probe().  It can also be called
 *   during .disconnect(), but doing so is pointless because the reset
 *   will not occur.  If you really want to reset the device during
 *   .disconnect(), call usb_reset_device() directly -- but watch out
 *   for nested unbinding issues!
 */
void usb_queue_reset_device(struct usb_interface *iface)
{
        if (schedule_work(&iface->reset_ws))
                usb_get_intf(iface);
}
EXPORT_SYMBOL_GPL(usb_queue_reset_device);

/**
 * usb_hub_find_child - Get the pointer of child device
 * attached to the port which is specified by @port1.
 * @hdev: USB device belonging to the usb hub
 * @port1: port num to indicate which port the child device
 *        is attached to.
 *
 * USB drivers call this function to get hub's child device
 * pointer.
 *
 * Return: %NULL if input param is invalid and
 * child's usb_device pointer if non-NULL.
 */
struct usb_device *usb_hub_find_child(struct usb_device *hdev,
                int port1)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);

        if (port1 < 1 || port1 > hdev->maxchild)
                return NULL;
        return hub->ports[port1 - 1]->child;
}
EXPORT_SYMBOL_GPL(usb_hub_find_child);

void usb_hub_adjust_deviceremovable(struct usb_device *hdev,
                struct usb_hub_descriptor *desc)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
        enum usb_port_connect_type connect_type;
        int i;

        if (!hub)
                return;

        if (!hub_is_superspeed(hdev)) {
                for (i = 1; i <= hdev->maxchild; i++) {
                        struct usb_port *port_dev = hub->ports[i - 1];

                        connect_type = port_dev->connect_type;
                        if (connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) {
                                u8 mask = 1 << (i%8);

                                if (!(desc->u.hs.DeviceRemovable[i/8] & mask)) {
                                        dev_dbg(&port_dev->dev, "DeviceRemovable is changed to 1 according to platform information.\n");
                                        desc->u.hs.DeviceRemovable[i/8]        |= mask;
                                }
                        }
                }
        } else {
                u16 port_removable = le16_to_cpu(desc->u.ss.DeviceRemovable);

                for (i = 1; i <= hdev->maxchild; i++) {
                        struct usb_port *port_dev = hub->ports[i - 1];

                        connect_type = port_dev->connect_type;
                        if (connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) {
                                u16 mask = 1 << i;

                                if (!(port_removable & mask)) {
                                        dev_dbg(&port_dev->dev, "DeviceRemovable is changed to 1 according to platform information.\n");
                                        port_removable |= mask;
                                }
                        }
                }

                desc->u.ss.DeviceRemovable = cpu_to_le16(port_removable);
        }
}

#ifdef CONFIG_ACPI
/**
 * usb_get_hub_port_acpi_handle - Get the usb port's acpi handle
 * @hdev: USB device belonging to the usb hub
 * @port1: port num of the port
 *
 * Return: Port's acpi handle if successful, %NULL if params are
 * invalid.
 */
acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
        int port1)
{
        struct usb_hub *hub = usb_hub_to_struct_hub(hdev);

        if (!hub)
                return NULL;

        return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
}
#endif

























































    2 














    2 


















    2 









    2 











    2 









































































   10 











































   10 
    2 





   10 
    8 


    2 






   10 














   10 






















   10 


   10 





   10 
   10 








   10 


   10 

   10 

























































































































































































































































































































































































  254 

























































































































































































    8 



    8 
















































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
 *
 *  NOHZ implementation for low and high resolution timers
 *
 *  Started by: Thomas Gleixner and Ingo Molnar
 */
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/percpu.h>
#include <linux/nmi.h>
#include <linux/profile.h>
#include <linux/sched/signal.h>
#include <linux/sched/clock.h>
#include <linux/sched/stat.h>
#include <linux/sched/nohz.h>
#include <linux/sched/loadavg.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/context_tracking.h>
#include <linux/mm.h>

#include <asm/irq_regs.h>

#include "tick-internal.h"

#include <trace/events/timer.h>

/*
 * Per-CPU nohz control structure
 */
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);

struct tick_sched *tick_get_tick_sched(int cpu)
{
        return &per_cpu(tick_cpu_sched, cpu);
}

/*
 * The time when the last jiffy update happened. Write access must hold
 * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
 * consistent view of jiffies and last_jiffies_update.
 */
static ktime_t last_jiffies_update;

/*
 * Must be called with interrupts disabled !
 */
static void tick_do_update_jiffies64(ktime_t now)
{
        unsigned long ticks = 1;
        ktime_t delta, nextp;

        /*
         * 64-bit can do a quick check without holding the jiffies lock and
         * without looking at the sequence count. The smp_load_acquire()
         * pairs with the update done later in this function.
         *
         * 32-bit cannot do that because the store of 'tick_next_period'
         * consists of two 32-bit stores, and the first store could be
         * moved by the CPU to a random point in the future.
         */
        if (IS_ENABLED(CONFIG_64BIT)) {
                if (ktime_before(now, smp_load_acquire(&tick_next_period)))
                        return;
        } else {
                unsigned int seq;

                /*
                 * Avoid contention on 'jiffies_lock' and protect the quick
                 * check with the sequence count.
                 */
                do {
                        seq = read_seqcount_begin(&jiffies_seq);
                        nextp = tick_next_period;
                } while (read_seqcount_retry(&jiffies_seq, seq));

                if (ktime_before(now, nextp))
                        return;
        }

        /* Quick check failed, i.e. update is required. */
        raw_spin_lock(&jiffies_lock);
        /*
         * Re-evaluate with the lock held. Another CPU might have done the
         * update already.
         */
        if (ktime_before(now, tick_next_period)) {
                raw_spin_unlock(&jiffies_lock);
                return;
        }

        write_seqcount_begin(&jiffies_seq);

        delta = ktime_sub(now, tick_next_period);
        if (unlikely(delta >= TICK_NSEC)) {
                /* Slow path for long idle sleep times */
                s64 incr = TICK_NSEC;

                ticks += ktime_divns(delta, incr);

                last_jiffies_update = ktime_add_ns(last_jiffies_update,
                                                   incr * ticks);
        } else {
                last_jiffies_update = ktime_add_ns(last_jiffies_update,
                                                   TICK_NSEC);
        }

        /* Advance jiffies to complete the 'jiffies_seq' protected job */
        jiffies_64 += ticks;

        /* Keep the tick_next_period variable up to date */
        nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);

        if (IS_ENABLED(CONFIG_64BIT)) {
                /*
                 * Pairs with smp_load_acquire() in the lockless quick
                 * check above, and ensures that the update to 'jiffies_64' is
                 * not reordered vs. the store to 'tick_next_period', neither
                 * by the compiler nor by the CPU.
                 */
                smp_store_release(&tick_next_period, nextp);
        } else {
                /*
                 * A plain store is good enough on 32-bit, as the quick check
                 * above is protected by the sequence count.
                 */
                tick_next_period = nextp;
        }

        /*
         * Release the sequence count. calc_global_load() below is not
         * protected by it, but 'jiffies_lock' needs to be held to prevent
         * concurrent invocations.
         */
        write_seqcount_end(&jiffies_seq);

        calc_global_load();

        raw_spin_unlock(&jiffies_lock);
        update_wall_time();
}

/*
 * Initialize and return retrieve the jiffies update.
 */
static ktime_t tick_init_jiffy_update(void)
{
        ktime_t period;

        raw_spin_lock(&jiffies_lock);
        write_seqcount_begin(&jiffies_seq);

        /* Have we started the jiffies update yet ? */
        if (last_jiffies_update == 0) {
                u32 rem;

                /*
                 * Ensure that the tick is aligned to a multiple of
                 * TICK_NSEC.
                 */
                div_u64_rem(tick_next_period, TICK_NSEC, &rem);
                if (rem)
                        tick_next_period += TICK_NSEC - rem;

                last_jiffies_update = tick_next_period;
        }
        period = last_jiffies_update;

        write_seqcount_end(&jiffies_seq);
        raw_spin_unlock(&jiffies_lock);

        return period;
}

static inline int tick_sched_flag_test(struct tick_sched *ts,
                                       unsigned long flag)
{
        return !!(ts->flags & flag);
}

static inline void tick_sched_flag_set(struct tick_sched *ts,
                                       unsigned long flag)
{
        lockdep_assert_irqs_disabled();
        ts->flags |= flag;
}

static inline void tick_sched_flag_clear(struct tick_sched *ts,
                                         unsigned long flag)
{
        lockdep_assert_irqs_disabled();
        ts->flags &= ~flag;
}

#define MAX_STALLED_JIFFIES 5

static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
        int tick_cpu, cpu = smp_processor_id();

        /*
         * Check if the do_timer duty was dropped. We don't care about
         * concurrency: This happens only when the CPU in charge went
         * into a long sleep. If two CPUs happen to assign themselves to
         * this duty, then the jiffies update is still serialized by
         * 'jiffies_lock'.
         *
         * If nohz_full is enabled, this should not happen because the
         * 'tick_do_timer_cpu' CPU never relinquishes.
         */
        tick_cpu = READ_ONCE(tick_do_timer_cpu);

        if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
                WARN_ON_ONCE(tick_nohz_full_running);
#endif
                WRITE_ONCE(tick_do_timer_cpu, cpu);
                tick_cpu = cpu;
        }

        /* Check if jiffies need an update */
        if (tick_cpu == cpu)
                tick_do_update_jiffies64(now);

        /*
         * If the jiffies update stalled for too long (timekeeper in stop_machine()
         * or VMEXIT'ed for several msecs), force an update.
         */
        if (ts->last_tick_jiffies != jiffies) {
                ts->stalled_jiffies = 0;
                ts->last_tick_jiffies = READ_ONCE(jiffies);
        } else {
                if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
                        tick_do_update_jiffies64(now);
                        ts->stalled_jiffies = 0;
                        ts->last_tick_jiffies = READ_ONCE(jiffies);
                }
        }

        if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
                ts->got_idle_tick = 1;
}

static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
        /*
         * When we are idle and the tick is stopped, we have to touch
         * the watchdog as we might not schedule for a really long
         * time. This happens on completely idle SMP systems while
         * waiting on the login prompt. We also increment the "start of
         * idle" jiffy stamp so the idle accounting adjustment we do
         * when we go busy again does not account too many ticks.
         */
        if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
            tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                touch_softlockup_watchdog_sched();
                if (is_idle_task(current))
                        ts->idle_jiffies++;
                /*
                 * In case the current tick fired too early past its expected
                 * expiration, make sure we don't bypass the next clock reprogramming
                 * to the same deadline.
                 */
                ts->next_tick = 0;
        }

        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);
}

/*
 * We rearm the timer until we get disabled by the idle code.
 * Called with interrupts disabled.
 */
static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
{
        struct tick_sched *ts =        container_of(timer, struct tick_sched, sched_timer);
        struct pt_regs *regs = get_irq_regs();
        ktime_t now = ktime_get();

        tick_sched_do_timer(ts, now);

        /*
         * Do not call when we are not in IRQ context and have
         * no valid 'regs' pointer
         */
        if (regs)
                tick_sched_handle(ts, regs);
        else
                ts->next_tick = 0;

        /*
         * In dynticks mode, tick reprogram is deferred:
         * - to the idle task if in dynticks-idle
         * - to IRQ exit if in full-dynticks.
         */
        if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED)))
                return HRTIMER_NORESTART;

        hrtimer_forward(timer, now, TICK_NSEC);

        return HRTIMER_RESTART;
}

static void tick_sched_timer_cancel(struct tick_sched *ts)
{
        if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
                hrtimer_cancel(&ts->sched_timer);
        else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
                tick_program_event(KTIME_MAX, 1);
}

#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
bool tick_nohz_full_running;
EXPORT_SYMBOL_GPL(tick_nohz_full_running);
static atomic_t tick_dep_mask;

static bool check_tick_dependency(atomic_t *dep)
{
        int val = atomic_read(dep);

        if (val & TICK_DEP_MASK_POSIX_TIMER) {
                trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
                return true;
        }

        if (val & TICK_DEP_MASK_PERF_EVENTS) {
                trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
                return true;
        }

        if (val & TICK_DEP_MASK_SCHED) {
                trace_tick_stop(0, TICK_DEP_MASK_SCHED);
                return true;
        }

        if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
                trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
                return true;
        }

        if (val & TICK_DEP_MASK_RCU) {
                trace_tick_stop(0, TICK_DEP_MASK_RCU);
                return true;
        }

        if (val & TICK_DEP_MASK_RCU_EXP) {
                trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
                return true;
        }

        return false;
}

static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
{
        lockdep_assert_irqs_disabled();

        if (unlikely(!cpu_online(cpu)))
                return false;

        if (check_tick_dependency(&tick_dep_mask))
                return false;

        if (check_tick_dependency(&ts->tick_dep_mask))
                return false;

        if (check_tick_dependency(&current->tick_dep_mask))
                return false;

        if (check_tick_dependency(&current->signal->tick_dep_mask))
                return false;

        return true;
}

static void nohz_full_kick_func(struct irq_work *work)
{
        /* Empty, the tick restart happens on tick_nohz_irq_exit() */
}

static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
        IRQ_WORK_INIT_HARD(nohz_full_kick_func);

/*
 * Kick this CPU if it's full dynticks in order to force it to
 * re-evaluate its dependency on the tick and restart it if necessary.
 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
 * is NMI safe.
 */
static void tick_nohz_full_kick(void)
{
        if (!tick_nohz_full_cpu(smp_processor_id()))
                return;

        irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
}

/*
 * Kick the CPU if it's full dynticks in order to force it to
 * re-evaluate its dependency on the tick and restart it if necessary.
 */
void tick_nohz_full_kick_cpu(int cpu)
{
        if (!tick_nohz_full_cpu(cpu))
                return;

        irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}

static void tick_nohz_kick_task(struct task_struct *tsk)
{
        int cpu;

        /*
         * If the task is not running, run_posix_cpu_timers()
         * has nothing to elapse, and an IPI can then be optimized out.
         *
         * activate_task()                      STORE p->tick_dep_mask
         *   STORE p->on_rq
         * __schedule() (switch to task 'p')    smp_mb() (atomic_fetch_or())
         *   LOCK rq->lock                      LOAD p->on_rq
         *   smp_mb__after_spin_lock()
         *   tick_nohz_task_switch()
         *     LOAD p->tick_dep_mask
         */
        if (!sched_task_on_rq(tsk))
                return;

        /*
         * If the task concurrently migrates to another CPU,
         * we guarantee it sees the new tick dependency upon
         * schedule.
         *
         * set_task_cpu(p, cpu);
         *   STORE p->cpu = @cpu
         * __schedule() (switch to task 'p')
         *   LOCK rq->lock
         *   smp_mb__after_spin_lock()          STORE p->tick_dep_mask
         *   tick_nohz_task_switch()            smp_mb() (atomic_fetch_or())
         *      LOAD p->tick_dep_mask           LOAD p->cpu
         */
        cpu = task_cpu(tsk);

        preempt_disable();
        if (cpu_online(cpu))
                tick_nohz_full_kick_cpu(cpu);
        preempt_enable();
}

/*
 * Kick all full dynticks CPUs in order to force these to re-evaluate
 * their dependency on the tick and restart it if necessary.
 */
static void tick_nohz_full_kick_all(void)
{
        int cpu;

        if (!tick_nohz_full_running)
                return;

        preempt_disable();
        for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
                tick_nohz_full_kick_cpu(cpu);
        preempt_enable();
}

static void tick_nohz_dep_set_all(atomic_t *dep,
                                  enum tick_dep_bits bit)
{
        int prev;

        prev = atomic_fetch_or(BIT(bit), dep);
        if (!prev)
                tick_nohz_full_kick_all();
}

/*
 * Set a global tick dependency. Used by perf events that rely on freq and
 * unstable clocks.
 */
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
        tick_nohz_dep_set_all(&tick_dep_mask, bit);
}

void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
        atomic_andnot(BIT(bit), &tick_dep_mask);
}

/*
 * Set per-CPU tick dependency. Used by scheduler and perf events in order to
 * manage event-throttling.
 */
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
        int prev;
        struct tick_sched *ts;

        ts = per_cpu_ptr(&tick_cpu_sched, cpu);

        prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
        if (!prev) {
                preempt_disable();
                /* Perf needs local kick that is NMI safe */
                if (cpu == smp_processor_id()) {
                        tick_nohz_full_kick();
                } else {
                        /* Remote IRQ work not NMI-safe */
                        if (!WARN_ON_ONCE(in_nmi()))
                                tick_nohz_full_kick_cpu(cpu);
                }
                preempt_enable();
        }
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);

void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
        struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);

        atomic_andnot(BIT(bit), &ts->tick_dep_mask);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);

/*
 * Set a per-task tick dependency. RCU needs this. Also posix CPU timers
 * in order to elapse per task timers.
 */
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
        if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
                tick_nohz_kick_task(tsk);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);

void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
        atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);

/*
 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
 * per process timers.
 */
void tick_nohz_dep_set_signal(struct task_struct *tsk,
                              enum tick_dep_bits bit)
{
        int prev;
        struct signal_struct *sig = tsk->signal;

        prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
        if (!prev) {
                struct task_struct *t;

                lockdep_assert_held(&tsk->sighand->siglock);
                __for_each_thread(sig, t)
                        tick_nohz_kick_task(t);
        }
}

void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
        atomic_andnot(BIT(bit), &sig->tick_dep_mask);
}

/*
 * Re-evaluate the need for the tick as we switch the current task.
 * It might need the tick due to per task/process properties:
 * perf events, posix CPU timers, ...
 */
void __tick_nohz_task_switch(void)
{
        struct tick_sched *ts;

        if (!tick_nohz_full_cpu(smp_processor_id()))
                return;

        ts = this_cpu_ptr(&tick_cpu_sched);

        if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                if (atomic_read(&current->tick_dep_mask) ||
                    atomic_read(&current->signal->tick_dep_mask))
                        tick_nohz_full_kick();
        }
}

/* Get the boot-time nohz CPU list from the kernel parameters. */
void __init tick_nohz_full_setup(cpumask_var_t cpumask)
{
        alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
        cpumask_copy(tick_nohz_full_mask, cpumask);
        tick_nohz_full_running = true;
}

bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
        /*
         * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound
         * timers, workqueues, timekeeping, ...) on behalf of full dynticks
         * CPUs. It must remain online when nohz full is enabled.
         */
        if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
                return false;
        return true;
}

static int tick_nohz_cpu_down(unsigned int cpu)
{
        return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}

void __init tick_nohz_init(void)
{
        int cpu, ret;

        if (!tick_nohz_full_running)
                return;

        /*
         * Full dynticks uses IRQ work to drive the tick rescheduling on safe
         * locking contexts. But then we need IRQ work to raise its own
         * interrupts to avoid circular dependency on the tick.
         */
        if (!arch_irq_work_has_interrupt()) {
                pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n");
                cpumask_clear(tick_nohz_full_mask);
                tick_nohz_full_running = false;
                return;
        }

        if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
                        !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
                cpu = smp_processor_id();

                if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
                        pr_warn("NO_HZ: Clearing %d from nohz_full range "
                                "for timekeeping\n", cpu);
                        cpumask_clear_cpu(cpu, tick_nohz_full_mask);
                }
        }

        for_each_cpu(cpu, tick_nohz_full_mask)
                ct_cpu_track_user(cpu);

        ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
                                        "kernel/nohz:predown", NULL,
                                        tick_nohz_cpu_down);
        WARN_ON(ret < 0);
        pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
                cpumask_pr_args(tick_nohz_full_mask));
}
#endif /* #ifdef CONFIG_NO_HZ_FULL */

/*
 * NOHZ - aka dynamic tick functionality
 */
#ifdef CONFIG_NO_HZ_COMMON
/*
 * NO HZ enabled ?
 */
bool tick_nohz_enabled __read_mostly  = true;
unsigned long tick_nohz_active  __read_mostly;
/*
 * Enable / Disable tickless mode
 */
static int __init setup_tick_nohz(char *str)
{
        return (kstrtobool(str, &tick_nohz_enabled) == 0);
}

__setup("nohz=", setup_tick_nohz);

bool tick_nohz_tick_stopped(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

bool tick_nohz_tick_stopped_cpu(int cpu)
{
        struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);

        return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

/**
 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
 * @now: current ktime_t
 *
 * Called from interrupt entry when the CPU was idle
 *
 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
 * must be updated. Otherwise an interrupt handler could use a stale jiffy
 * value. We do this unconditionally on any CPU, as we don't know whether the
 * CPU, which has the update task assigned, is in a long sleep.
 */
static void tick_nohz_update_jiffies(ktime_t now)
{
        unsigned long flags;

        __this_cpu_write(tick_cpu_sched.idle_waketime, now);

        local_irq_save(flags);
        tick_do_update_jiffies64(now);
        local_irq_restore(flags);

        touch_softlockup_watchdog_sched();
}

static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
        ktime_t delta;

        if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)))
                return;

        delta = ktime_sub(now, ts->idle_entrytime);

        write_seqcount_begin(&ts->idle_sleeptime_seq);
        if (nr_iowait_cpu(smp_processor_id()) > 0)
                ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
        else
                ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);

        ts->idle_entrytime = now;
        tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
        write_seqcount_end(&ts->idle_sleeptime_seq);

        sched_clock_idle_wakeup_event();
}

static void tick_nohz_start_idle(struct tick_sched *ts)
{
        write_seqcount_begin(&ts->idle_sleeptime_seq);
        ts->idle_entrytime = ktime_get();
        tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
        write_seqcount_end(&ts->idle_sleeptime_seq);

        sched_clock_idle_sleep_event();
}

static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
                                 bool compute_delta, u64 *last_update_time)
{
        ktime_t now, idle;
        unsigned int seq;

        if (!tick_nohz_active)
                return -1;

        now = ktime_get();
        if (last_update_time)
                *last_update_time = ktime_to_us(now);

        do {
                seq = read_seqcount_begin(&ts->idle_sleeptime_seq);

                if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) {
                        ktime_t delta = ktime_sub(now, ts->idle_entrytime);

                        idle = ktime_add(*sleeptime, delta);
                } else {
                        idle = *sleeptime;
                }
        } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));

        return ktime_to_us(idle);

}

/**
 * get_cpu_idle_time_us - get the total idle time of a CPU
 * @cpu: CPU number to query
 * @last_update_time: variable to store update time in. Do not update
 * counters if NULL.
 *
 * Return the cumulative idle time (since boot) for a given
 * CPU, in microseconds. Note that this is partially broken due to
 * the counter of iowait tasks that can be remotely updated without
 * any synchronization. Therefore it is possible to observe backward
 * values within two consecutive reads.
 *
 * This time is measured via accounting rather than sampling,
 * and is as accurate as ktime_get() is.
 *
 * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
 */
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);

        return get_cpu_sleep_time_us(ts, &ts->idle_sleeptime,
                                     !nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);

/**
 * get_cpu_iowait_time_us - get the total iowait time of a CPU
 * @cpu: CPU number to query
 * @last_update_time: variable to store update time in. Do not update
 * counters if NULL.
 *
 * Return the cumulative iowait time (since boot) for a given
 * CPU, in microseconds. Note this is partially broken due to
 * the counter of iowait tasks that can be remotely updated without
 * any synchronization. Therefore it is possible to observe backward
 * values within two consecutive reads.
 *
 * This time is measured via accounting rather than sampling,
 * and is as accurate as ktime_get() is.
 *
 * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
 */
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);

        return get_cpu_sleep_time_us(ts, &ts->iowait_sleeptime,
                                     nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);

static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
{
        hrtimer_cancel(&ts->sched_timer);
        hrtimer_set_expires(&ts->sched_timer, ts->last_tick);

        /* Forward the time to expire in the future */
        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);

        if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
                hrtimer_start_expires(&ts->sched_timer,
                                      HRTIMER_MODE_ABS_PINNED_HARD);
        } else {
                tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        }

        /*
         * Reset to make sure the next tick stop doesn't get fooled by past
         * cached clock deadline.
         */
        ts->next_tick = 0;
}

static inline bool local_timer_softirq_pending(void)
{
        return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
}

/*
 * Read jiffies and the time when jiffies were updated last
 */
u64 get_jiffies_update(unsigned long *basej)
{
        unsigned long basejiff;
        unsigned int seq;
        u64 basemono;

        do {
                seq = read_seqcount_begin(&jiffies_seq);
                basemono = last_jiffies_update;
                basejiff = jiffies;
        } while (read_seqcount_retry(&jiffies_seq, seq));
        *basej = basejiff;
        return basemono;
}

/**
 * tick_nohz_next_event() - return the clock monotonic based next event
 * @ts:                pointer to tick_sched struct
 * @cpu:        CPU number
 *
 * Return:
 * *%0                - When the next event is a maximum of TICK_NSEC in the future
 *                  and the tick is not stopped yet
 * *%next_event        - Next event based on clock monotonic
 */
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
        u64 basemono, next_tick, delta, expires;
        unsigned long basejiff;
        int tick_cpu;

        basemono = get_jiffies_update(&basejiff);
        ts->last_jiffies = basejiff;
        ts->timer_expires_base = basemono;

        /*
         * Keep the periodic tick, when RCU, architecture or irq_work
         * requests it.
         * Aside of that, check whether the local timer softirq is
         * pending. If so, its a bad idea to call get_next_timer_interrupt(),
         * because there is an already expired timer, so it will request
         * immediate expiry, which rearms the hardware timer with a
         * minimal delta, which brings us back to this place
         * immediately. Lather, rinse and repeat...
         */
        if (rcu_needs_cpu() || arch_needs_cpu() ||
            irq_work_needs_cpu() || local_timer_softirq_pending()) {
                next_tick = basemono + TICK_NSEC;
        } else {
                /*
                 * Get the next pending timer. If high resolution
                 * timers are enabled this only takes the timer wheel
                 * timers into account. If high resolution timers are
                 * disabled this also looks at the next expiring
                 * hrtimer.
                 */
                next_tick = get_next_timer_interrupt(basejiff, basemono);
                ts->next_timer = next_tick;
        }

        /* Make sure next_tick is never before basemono! */
        if (WARN_ON_ONCE(basemono > next_tick))
                next_tick = basemono;

        /*
         * If the tick is due in the next period, keep it ticking or
         * force prod the timer.
         */
        delta = next_tick - basemono;
        if (delta <= (u64)TICK_NSEC) {
                /*
                 * We've not stopped the tick yet, and there's a timer in the
                 * next period, so no point in stopping it either, bail.
                 */
                if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                        ts->timer_expires = 0;
                        goto out;
                }
        }

        /*
         * If this CPU is the one which had the do_timer() duty last, we limit
         * the sleep time to the timekeeping 'max_deferment' value.
         * Otherwise we can sleep as long as we want.
         */
        delta = timekeeping_max_deferment();
        tick_cpu = READ_ONCE(tick_do_timer_cpu);
        if (tick_cpu != cpu &&
            (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
                delta = KTIME_MAX;

        /* Calculate the next expiry time */
        if (delta < (KTIME_MAX - basemono))
                expires = basemono + delta;
        else
                expires = KTIME_MAX;

        ts->timer_expires = min_t(u64, expires, next_tick);

out:
        return ts->timer_expires;
}

static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
        unsigned long basejiff = ts->last_jiffies;
        u64 basemono = ts->timer_expires_base;
        bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
        int tick_cpu;
        u64 expires;

        /* Make sure we won't be trying to stop it twice in a row. */
        ts->timer_expires_base = 0;

        /*
         * Now the tick should be stopped definitely - so the timer base needs
         * to be marked idle as well to not miss a newly queued timer.
         */
        expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
        if (expires > ts->timer_expires) {
                /*
                 * This path could only happen when the first timer was removed
                 * between calculating the possible sleep length and now (when
                 * high resolution mode is not active, timer could also be a
                 * hrtimer).
                 *
                 * We have to stick to the original calculated expiry value to
                 * not stop the tick for too long with a shallow C-state (which
                 * was programmed by cpuidle because of an early next expiration
                 * value).
                 */
                expires = ts->timer_expires;
        }

        /* If the timer base is not idle, retain the not yet stopped tick. */
        if (!timer_idle)
                return;

        /*
         * If this CPU is the one which updates jiffies, then give up
         * the assignment and let it be taken by the CPU which runs
         * the tick timer next, which might be this CPU as well. If we
         * don't drop this here, the jiffies might be stale and
         * do_timer() never gets invoked. Keep track of the fact that it
         * was the one which had the do_timer() duty last.
         */
        tick_cpu = READ_ONCE(tick_do_timer_cpu);
        if (tick_cpu == cpu) {
                WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
                tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
        } else if (tick_cpu != TICK_DO_TIMER_NONE) {
                tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
        }

        /* Skip reprogram of event if it's not changed */
        if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) {
                /* Sanity check: make sure clockevent is actually programmed */
                if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
                        return;

                WARN_ON_ONCE(1);
                printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
                            basemono, ts->next_tick, dev->next_event,
                            hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
        }

        /*
         * tick_nohz_stop_tick() can be called several times before
         * tick_nohz_restart_sched_tick() is called. This happens when
         * interrupts arrive which do not cause a reschedule. In the first
         * call we save the current tick time, so we can restart the
         * scheduler tick in tick_nohz_restart_sched_tick().
         */
        if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                calc_load_nohz_start();
                quiet_vmstat();

                ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
                tick_sched_flag_set(ts, TS_FLAG_STOPPED);
                trace_tick_stop(1, TICK_DEP_MASK_NONE);
        }

        ts->next_tick = expires;

        /*
         * If the expiration time == KTIME_MAX, then we simply stop
         * the tick timer.
         */
        if (unlikely(expires == KTIME_MAX)) {
                tick_sched_timer_cancel(ts);
                return;
        }

        if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
                hrtimer_start(&ts->sched_timer, expires,
                              HRTIMER_MODE_ABS_PINNED_HARD);
        } else {
                hrtimer_set_expires(&ts->sched_timer, expires);
                tick_program_event(expires, 1);
        }
}

static void tick_nohz_retain_tick(struct tick_sched *ts)
{
        ts->timer_expires_base = 0;
}

#ifdef CONFIG_NO_HZ_FULL
static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu)
{
        if (tick_nohz_next_event(ts, cpu))
                tick_nohz_stop_tick(ts, cpu);
        else
                tick_nohz_retain_tick(ts);
}
#endif /* CONFIG_NO_HZ_FULL */

static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
        /* Update jiffies first */
        tick_do_update_jiffies64(now);

        /*
         * Clear the timer idle flag, so we avoid IPIs on remote queueing and
         * the clock forward checks in the enqueue path:
         */
        timer_clear_idle();

        calc_load_nohz_stop();
        touch_softlockup_watchdog_sched();

        /* Cancel the scheduled timer and restore the tick: */
        tick_sched_flag_clear(ts, TS_FLAG_STOPPED);
        tick_nohz_restart(ts, now);
}

static void __tick_nohz_full_update_tick(struct tick_sched *ts,
                                         ktime_t now)
{
#ifdef CONFIG_NO_HZ_FULL
        int cpu = smp_processor_id();

        if (can_stop_full_tick(cpu, ts))
                tick_nohz_full_stop_tick(ts, cpu);
        else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
                tick_nohz_restart_sched_tick(ts, now);
#endif
}

static void tick_nohz_full_update_tick(struct tick_sched *ts)
{
        if (!tick_nohz_full_cpu(smp_processor_id()))
                return;

        if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ))
                return;

        __tick_nohz_full_update_tick(ts, ktime_get());
}

/*
 * A pending softirq outside an IRQ (or softirq disabled section) context
 * should be waiting for ksoftirqd to handle it. Therefore we shouldn't
 * reach this code due to the need_resched() early check in can_stop_idle_tick().
 *
 * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
 * cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
 * triggering the code below, since wakep_softirqd() is ignored.
 *
 */
static bool report_idle_softirq(void)
{
        static int ratelimit;
        unsigned int pending = local_softirq_pending();

        if (likely(!pending))
                return false;

        /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
        if (!cpu_active(smp_processor_id())) {
                pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
                if (!pending)
                        return false;
        }

        if (ratelimit >= 10)
                return false;

        /* On RT, softirq handling may be waiting on some lock */
        if (local_bh_blocked())
                return false;

        pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
                pending);
        ratelimit++;

        return true;
}

static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
        WARN_ON_ONCE(cpu_is_offline(cpu));

        if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
                return false;

        if (need_resched())
                return false;

        if (unlikely(report_idle_softirq()))
                return false;

        if (tick_nohz_full_enabled()) {
                int tick_cpu = READ_ONCE(tick_do_timer_cpu);

                /*
                 * Keep the tick alive to guarantee timekeeping progression
                 * if there are full dynticks CPUs around
                 */
                if (tick_cpu == cpu)
                        return false;

                /* Should not happen for nohz-full */
                if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
                        return false;
        }

        return true;
}

/**
 * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
 *
 * When the next event is more than a tick into the future, stop the idle tick
 */
void tick_nohz_idle_stop_tick(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
        int cpu = smp_processor_id();
        ktime_t expires;

        /*
         * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
         * tick timer expiration time is known already.
         */
        if (ts->timer_expires_base)
                expires = ts->timer_expires;
        else if (can_stop_idle_tick(cpu, ts))
                expires = tick_nohz_next_event(ts, cpu);
        else
                return;

        ts->idle_calls++;

        if (expires > 0LL) {
                int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

                tick_nohz_stop_tick(ts, cpu);

                ts->idle_sleeps++;
                ts->idle_expires = expires;

                if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                        ts->idle_jiffies = ts->last_jiffies;
                        nohz_balance_enter_idle(cpu);
                }
        } else {
                tick_nohz_retain_tick(ts);
        }
}

void tick_nohz_idle_retain_tick(void)
{
        tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
}

/**
 * tick_nohz_idle_enter - prepare for entering idle on the current CPU
 *
 * Called when we start the idle loop.
 */
void tick_nohz_idle_enter(void)
{
        struct tick_sched *ts;

        lockdep_assert_irqs_enabled();

        local_irq_disable();

        ts = this_cpu_ptr(&tick_cpu_sched);

        WARN_ON_ONCE(ts->timer_expires_base);

        tick_sched_flag_set(ts, TS_FLAG_INIDLE);
        tick_nohz_start_idle(ts);

        local_irq_enable();
}

/**
 * tick_nohz_irq_exit - Notify the tick about IRQ exit
 *
 * A timer may have been added/modified/deleted either by the current IRQ,
 * or by another place using this IRQ as a notification. This IRQ may have
 * also updated the RCU callback list. These events may require a
 * re-evaluation of the next tick. Depending on the context:
 *
 * 1) If the CPU is idle and no resched is pending, just proceed with idle
 *    time accounting. The next tick will be re-evaluated on the next idle
 *    loop iteration.
 *
 * 2) If the CPU is nohz_full:
 *
 *    2.1) If there is any tick dependency, restart the tick if stopped.
 *
 *    2.2) If there is no tick dependency, (re-)evaluate the next tick and
 *         stop/update it accordingly.
 */
void tick_nohz_irq_exit(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
                tick_nohz_start_idle(ts);
        else
                tick_nohz_full_update_tick(ts);
}

/**
 * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
 *
 * Return: %true if the tick handler has run, otherwise %false
 */
bool tick_nohz_idle_got_tick(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        if (ts->got_idle_tick) {
                ts->got_idle_tick = 0;
                return true;
        }
        return false;
}

/**
 * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
 * or the tick, whichever expires first. Note that, if the tick has been
 * stopped, it returns the next hrtimer.
 *
 * Called from power state control code with interrupts disabled
 *
 * Return: the next expiration time
 */
ktime_t tick_nohz_get_next_hrtimer(void)
{
        return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
}

/**
 * tick_nohz_get_sleep_length - return the expected length of the current sleep
 * @delta_next: duration until the next event if the tick cannot be stopped
 *
 * Called from power state control code with interrupts disabled.
 *
 * The return value of this function and/or the value returned by it through the
 * @delta_next pointer can be negative which must be taken into account by its
 * callers.
 *
 * Return: the expected length of the current sleep
 */
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
        int cpu = smp_processor_id();
        /*
         * The idle entry time is expected to be a sufficient approximation of
         * the current time at this point.
         */
        ktime_t now = ts->idle_entrytime;
        ktime_t next_event;

        WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));

        *delta_next = ktime_sub(dev->next_event, now);

        if (!can_stop_idle_tick(cpu, ts))
                return *delta_next;

        next_event = tick_nohz_next_event(ts, cpu);
        if (!next_event)
                return *delta_next;

        /*
         * If the next highres timer to expire is earlier than 'next_event', the
         * idle governor needs to know that.
         */
        next_event = min_t(u64, next_event,
                           hrtimer_next_event_without(&ts->sched_timer));

        return ktime_sub(next_event, now);
}

/**
 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
 * for a particular CPU.
 * @cpu: target CPU number
 *
 * Called from the schedutil frequency scaling governor in scheduler context.
 *
 * Return: the current idle calls counter value for @cpu
 */
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
        struct tick_sched *ts = tick_get_tick_sched(cpu);

        return ts->idle_calls;
}

/**
 * tick_nohz_get_idle_calls - return the current idle calls counter value
 *
 * Called from the schedutil frequency scaling governor in scheduler context.
 *
 * Return: the current idle calls counter value for the current CPU
 */
unsigned long tick_nohz_get_idle_calls(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        return ts->idle_calls;
}

static void tick_nohz_account_idle_time(struct tick_sched *ts,
                                        ktime_t now)
{
        unsigned long ticks;

        ts->idle_exittime = now;

        if (vtime_accounting_enabled_this_cpu())
                return;
        /*
         * We stopped the tick in idle. update_process_times() would miss the
         * time we slept, as it does only a 1 tick accounting.
         * Enforce that this is accounted to idle !
         */
        ticks = jiffies - ts->idle_jiffies;
        /*
         * We might be one off. Do not randomly account a huge number of ticks!
         */
        if (ticks && ticks < LONG_MAX)
                account_idle_ticks(ticks);
}

void tick_nohz_idle_restart_tick(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
                ktime_t now = ktime_get();
                tick_nohz_restart_sched_tick(ts, now);
                tick_nohz_account_idle_time(ts, now);
        }
}

static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
{
        if (tick_nohz_full_cpu(smp_processor_id()))
                __tick_nohz_full_update_tick(ts, now);
        else
                tick_nohz_restart_sched_tick(ts, now);

        tick_nohz_account_idle_time(ts, now);
}

/**
 * tick_nohz_idle_exit - Update the tick upon idle task exit
 *
 * When the idle task exits, update the tick depending on the
 * following situations:
 *
 * 1) If the CPU is not in nohz_full mode (most cases), then
 *    restart the tick.
 *
 * 2) If the CPU is in nohz_full mode (corner case):
 *   2.1) If the tick can be kept stopped (no tick dependencies)
 *        then re-evaluate the next tick and try to keep it stopped
 *        as long as possible.
 *   2.2) If the tick has dependencies, restart the tick.
 *
 */
void tick_nohz_idle_exit(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
        bool idle_active, tick_stopped;
        ktime_t now;

        local_irq_disable();

        WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));
        WARN_ON_ONCE(ts->timer_expires_base);

        tick_sched_flag_clear(ts, TS_FLAG_INIDLE);
        idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE);
        tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

        if (idle_active || tick_stopped)
                now = ktime_get();

        if (idle_active)
                tick_nohz_stop_idle(ts, now);

        if (tick_stopped)
                tick_nohz_idle_update_tick(ts, now);

        local_irq_enable();
}

/*
 * In low-resolution mode, the tick handler must be implemented directly
 * at the clockevent level. hrtimer can't be used instead, because its
 * infrastructure actually relies on the tick itself as a backend in
 * low-resolution mode (see hrtimer_run_queues()).
 */
static void tick_nohz_lowres_handler(struct clock_event_device *dev)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        dev->next_event = KTIME_MAX;

        if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
                tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}

static inline void tick_nohz_activate(struct tick_sched *ts)
{
        if (!tick_nohz_enabled)
                return;
        tick_sched_flag_set(ts, TS_FLAG_NOHZ);
        /* One update is enough */
        if (!test_and_set_bit(0, &tick_nohz_active))
                timers_update_nohz();
}

/**
 * tick_nohz_switch_to_nohz - switch to NOHZ mode
 */
static void tick_nohz_switch_to_nohz(void)
{
        if (!tick_nohz_enabled)
                return;

        if (tick_switch_to_oneshot(tick_nohz_lowres_handler))
                return;

        /*
         * Recycle the hrtimer in 'ts', so we can share the
         * highres code.
         */
        tick_setup_sched_timer(false);
}

static inline void tick_nohz_irq_enter(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
        ktime_t now;

        if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE))
                return;
        now = ktime_get();
        if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))
                tick_nohz_stop_idle(ts, now);
        /*
         * If all CPUs are idle we may need to update a stale jiffies value.
         * Note nohz_full is a special case: a timekeeper is guaranteed to stay
         * alive but it might be busy looping with interrupts disabled in some
         * rare case (typically stop machine). So we must make sure we have a
         * last resort.
         */
        if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
                tick_nohz_update_jiffies(now);
}

#else

static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
static inline void tick_nohz_activate(struct tick_sched *ts) { }

#endif /* CONFIG_NO_HZ_COMMON */

/*
 * Called from irq_enter() to notify about the possible interruption of idle()
 */
void tick_irq_enter(void)
{
        tick_check_oneshot_broadcast_this_cpu();
        tick_nohz_irq_enter();
}

static int sched_skew_tick;

static int __init skew_tick(char *str)
{
        get_option(&str, &sched_skew_tick);

        return 0;
}
early_param("skew_tick", skew_tick);

/**
 * tick_setup_sched_timer - setup the tick emulation timer
 * @hrtimer: whether to use the hrtimer or not
 */
void tick_setup_sched_timer(bool hrtimer)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        /* Emulate tick processing via per-CPU hrtimers: */
        hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);

        if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) {
                tick_sched_flag_set(ts, TS_FLAG_HIGHRES);
                ts->sched_timer.function = tick_nohz_handler;
        }

        /* Get the next period (per-CPU) */
        hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());

        /* Offset the tick to avert 'jiffies_lock' contention. */
        if (sched_skew_tick) {
                u64 offset = TICK_NSEC >> 1;
                do_div(offset, num_possible_cpus());
                offset *= smp_processor_id();
                hrtimer_add_expires_ns(&ts->sched_timer, offset);
        }

        hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
        if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer)
                hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
        else
                tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
        tick_nohz_activate(ts);
}

/*
 * Shut down the tick and make sure the CPU won't try to retake the timekeeping
 * duty before disabling IRQs in idle for the last time.
 */
void tick_sched_timer_dying(int cpu)
{
        struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
        struct clock_event_device *dev = td->evtdev;
        ktime_t idle_sleeptime, iowait_sleeptime;
        unsigned long idle_calls, idle_sleeps;

        /* This must happen before hrtimers are migrated! */
        tick_sched_timer_cancel(ts);

        /*
         * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
         * make sure not to call low-res tick handler.
         */
        if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
                dev->event_handler = clockevents_handle_noop;

        idle_sleeptime = ts->idle_sleeptime;
        iowait_sleeptime = ts->iowait_sleeptime;
        idle_calls = ts->idle_calls;
        idle_sleeps = ts->idle_sleeps;
        memset(ts, 0, sizeof(*ts));
        ts->idle_sleeptime = idle_sleeptime;
        ts->iowait_sleeptime = iowait_sleeptime;
        ts->idle_calls = idle_calls;
        ts->idle_sleeps = idle_sleeps;
}

/*
 * Async notification about clocksource changes
 */
void tick_clock_notify(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
}

/*
 * Async notification about clock event changes
 */
void tick_oneshot_notify(void)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        set_bit(0, &ts->check_clocks);
}

/*
 * Check if a change happened, which makes oneshot possible.
 *
 * Called cyclically from the hrtimer softirq (driven by the timer
 * softirq). 'allow_nohz' signals that we can switch into low-res NOHZ
 * mode, because high resolution timers are disabled (either compile
 * or runtime). Called with interrupts disabled.
 */
int tick_check_oneshot_change(int allow_nohz)
{
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

        if (!test_and_clear_bit(0, &ts->check_clocks))
                return 0;

        if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
                return 0;

        if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
                return 0;

        if (!allow_nohz)
                return 1;

        tick_nohz_switch_to_nohz();
        return 0;
}




























































































































































































































































   14 



























   13 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_INETDEVICE_H
#define _LINUX_INETDEVICE_H

#ifdef __KERNEL__

#include <linux/bitmap.h>
#include <linux/if.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
#include <linux/timer.h>
#include <linux/sysctl.h>
#include <linux/rtnetlink.h>
#include <linux/refcount.h>

struct ipv4_devconf {
        void        *sysctl;
        int        data[IPV4_DEVCONF_MAX];
        DECLARE_BITMAP(state, IPV4_DEVCONF_MAX);
};

#define MC_HASH_SZ_LOG 9

struct in_device {
        struct net_device        *dev;
        netdevice_tracker        dev_tracker;

        refcount_t                refcnt;
        int                        dead;
        struct in_ifaddr        __rcu *ifa_list;/* IP ifaddr chain                */

        struct ip_mc_list __rcu        *mc_list;        /* IP multicast filter chain    */
        struct ip_mc_list __rcu        * __rcu *mc_hash;

        int                        mc_count;        /* Number of installed mcasts        */
        spinlock_t                mc_tomb_lock;
        struct ip_mc_list        *mc_tomb;
        unsigned long                mr_v1_seen;
        unsigned long                mr_v2_seen;
        unsigned long                mr_maxdelay;
        unsigned long                mr_qi;                /* Query Interval */
        unsigned long                mr_qri;                /* Query Response Interval */
        unsigned char                mr_qrv;                /* Query Robustness Variable */
        unsigned char                mr_gq_running;
        u32                        mr_ifc_count;
        struct timer_list        mr_gq_timer;        /* general query timer */
        struct timer_list        mr_ifc_timer;        /* interface change timer */

        struct neigh_parms        *arp_parms;
        struct ipv4_devconf        cnf;
        struct rcu_head                rcu_head;
};

#define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr))
#define IPV4_DEVCONF_ALL(net, attr) \
        IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr))

static inline int ipv4_devconf_get(const struct in_device *in_dev, int index)
{
        index--;
        return READ_ONCE(in_dev->cnf.data[index]);
}

static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
                                    int val)
{
        index--;
        set_bit(index, in_dev->cnf.state);
        WRITE_ONCE(in_dev->cnf.data[index], val);
}

static inline void ipv4_devconf_setall(struct in_device *in_dev)
{
        bitmap_fill(in_dev->cnf.state, IPV4_DEVCONF_MAX);
}

#define IN_DEV_CONF_GET(in_dev, attr) \
        ipv4_devconf_get((in_dev), IPV4_DEVCONF_ ## attr)
#define IN_DEV_CONF_SET(in_dev, attr, val) \
        ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))

#define IN_DEV_ANDCONF(in_dev, attr) \
        (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \
         IN_DEV_CONF_GET((in_dev), attr))

#define IN_DEV_NET_ORCONF(in_dev, net, attr) \
        (IPV4_DEVCONF_ALL_RO(net, attr) || \
         IN_DEV_CONF_GET((in_dev), attr))

#define IN_DEV_ORCONF(in_dev, attr) \
        IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)

#define IN_DEV_MAXCONF(in_dev, attr) \
        (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \
             IN_DEV_CONF_GET((in_dev), attr)))

#define IN_DEV_FORWARD(in_dev)                IN_DEV_CONF_GET((in_dev), FORWARDING)
#define IN_DEV_MFORWARD(in_dev)                IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
#define IN_DEV_BFORWARD(in_dev)                IN_DEV_ANDCONF((in_dev), BC_FORWARDING)
#define IN_DEV_RPFILTER(in_dev)                IN_DEV_MAXCONF((in_dev), RP_FILTER)
#define IN_DEV_SRC_VMARK(in_dev)            IN_DEV_ORCONF((in_dev), SRC_VMARK)
#define IN_DEV_SOURCE_ROUTE(in_dev)        IN_DEV_ANDCONF((in_dev), \
                                                       ACCEPT_SOURCE_ROUTE)
#define IN_DEV_ACCEPT_LOCAL(in_dev)        IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
#define IN_DEV_BOOTP_RELAY(in_dev)        IN_DEV_ANDCONF((in_dev), BOOTP_RELAY)

#define IN_DEV_LOG_MARTIANS(in_dev)        IN_DEV_ORCONF((in_dev), LOG_MARTIANS)
#define IN_DEV_PROXY_ARP(in_dev)        IN_DEV_ORCONF((in_dev), PROXY_ARP)
#define IN_DEV_PROXY_ARP_PVLAN(in_dev)        IN_DEV_ORCONF((in_dev), PROXY_ARP_PVLAN)
#define IN_DEV_SHARED_MEDIA(in_dev)        IN_DEV_ORCONF((in_dev), SHARED_MEDIA)
#define IN_DEV_TX_REDIRECTS(in_dev)        IN_DEV_ORCONF((in_dev), SEND_REDIRECTS)
#define IN_DEV_SEC_REDIRECTS(in_dev)        IN_DEV_ORCONF((in_dev), \
                                                      SECURE_REDIRECTS)
#define IN_DEV_IDTAG(in_dev)                IN_DEV_CONF_GET(in_dev, TAG)
#define IN_DEV_MEDIUM_ID(in_dev)        IN_DEV_CONF_GET(in_dev, MEDIUM_ID)
#define IN_DEV_PROMOTE_SECONDARIES(in_dev) \
                                        IN_DEV_ORCONF((in_dev), \
                                                      PROMOTE_SECONDARIES)
#define IN_DEV_ROUTE_LOCALNET(in_dev)        IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET)
#define IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)        \
        IN_DEV_NET_ORCONF(in_dev, net, ROUTE_LOCALNET)

#define IN_DEV_RX_REDIRECTS(in_dev) \
        ((IN_DEV_FORWARD(in_dev) && \
          IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \
         || (!IN_DEV_FORWARD(in_dev) && \
          IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS)))

#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \
        IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN)

#define IN_DEV_ARPFILTER(in_dev)        IN_DEV_ORCONF((in_dev), ARPFILTER)
#define IN_DEV_ARP_ACCEPT(in_dev)        IN_DEV_MAXCONF((in_dev), ARP_ACCEPT)
#define IN_DEV_ARP_ANNOUNCE(in_dev)        IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
#define IN_DEV_ARP_IGNORE(in_dev)        IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
#define IN_DEV_ARP_NOTIFY(in_dev)        IN_DEV_MAXCONF((in_dev), ARP_NOTIFY)
#define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \
                                                          ARP_EVICT_NOCARRIER)

struct in_ifaddr {
        struct hlist_node        hash;
        struct in_ifaddr        __rcu *ifa_next;
        struct in_device        *ifa_dev;
        struct rcu_head                rcu_head;
        __be32                        ifa_local;
        __be32                        ifa_address;
        __be32                        ifa_mask;
        __u32                        ifa_rt_priority;
        __be32                        ifa_broadcast;
        unsigned char                ifa_scope;
        unsigned char                ifa_prefixlen;
        unsigned char                ifa_proto;
        __u32                        ifa_flags;
        char                        ifa_label[IFNAMSIZ];

        /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
        __u32                        ifa_valid_lft;
        __u32                        ifa_preferred_lft;
        unsigned long                ifa_cstamp; /* created timestamp */
        unsigned long                ifa_tstamp; /* updated timestamp */
};

struct in_validator_info {
        __be32                        ivi_addr;
        struct in_device        *ivi_dev;
        struct netlink_ext_ack        *extack;
};

int register_inetaddr_notifier(struct notifier_block *nb);
int unregister_inetaddr_notifier(struct notifier_block *nb);
int register_inetaddr_validator_notifier(struct notifier_block *nb);
int unregister_inetaddr_validator_notifier(struct notifier_block *nb);

void inet_netconf_notify_devconf(struct net *net, int event, int type,
                                 int ifindex, struct ipv4_devconf *devconf);

struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
{
        return __ip_dev_find(net, addr, true);
}

int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *);
#ifdef CONFIG_INET
int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size);
#else
static inline int inet_gifconf(struct net_device *dev, char __user *buf,
                               int len, int size)
{
        return 0;
}
#endif
void devinet_init(void);
struct in_device *inetdev_by_index(struct net *, int);
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst,
                         __be32 local, int scope);
struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
                                    __be32 mask);
struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr);
static inline bool inet_ifa_match(__be32 addr, const struct in_ifaddr *ifa)
{
        return !((addr^ifa->ifa_address)&ifa->ifa_mask);
}

/*
 *        Check if a mask is acceptable.
 */
 
static __inline__ bool bad_mask(__be32 mask, __be32 addr)
{
        __u32 hmask;
        if (addr & (mask = ~mask))
                return true;
        hmask = ntohl(mask);
        if (hmask & (hmask+1))
                return true;
        return false;
}

#define in_dev_for_each_ifa_rtnl(ifa, in_dev)                        \
        for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa;        \
             ifa = rtnl_dereference(ifa->ifa_next))

#define in_dev_for_each_ifa_rcu(ifa, in_dev)                        \
        for (ifa = rcu_dereference((in_dev)->ifa_list); ifa;        \
             ifa = rcu_dereference(ifa->ifa_next))

static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev)
{
        return rcu_dereference(dev->ip_ptr);
}

static inline struct in_device *in_dev_get(const struct net_device *dev)
{
        struct in_device *in_dev;

        rcu_read_lock();
        in_dev = __in_dev_get_rcu(dev);
        if (in_dev)
                refcount_inc(&in_dev->refcnt);
        rcu_read_unlock();
        return in_dev;
}

static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
{
        return rtnl_dereference(dev->ip_ptr);
}

/* called with rcu_read_lock or rtnl held */
static inline bool ip_ignore_linkdown(const struct net_device *dev)
{
        struct in_device *in_dev;
        bool rc = false;

        in_dev = rcu_dereference_rtnl(dev->ip_ptr);
        if (in_dev &&
            IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
                rc = true;

        return rc;
}

static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev)
{
        struct in_device *in_dev = __in_dev_get_rcu(dev);

        return in_dev ? in_dev->arp_parms : NULL;
}

void in_dev_finish_destroy(struct in_device *idev);

static inline void in_dev_put(struct in_device *idev)
{
        if (refcount_dec_and_test(&idev->refcnt))
                in_dev_finish_destroy(idev);
}

#define __in_dev_put(idev)  refcount_dec(&(idev)->refcnt)
#define in_dev_hold(idev)   refcount_inc(&(idev)->refcnt)

#endif /* __KERNEL__ */

static __inline__ __be32 inet_make_mask(int logmask)
{
        if (logmask)
                return htonl(~((1U<<(32-logmask))-1));
        return 0;
}

static __inline__ int inet_mask_len(__be32 mask)
{
        __u32 hmask = ntohl(mask);
        if (!hmask)
                return 0;
        return 32 - ffz(~hmask);
}


#endif /* _LINUX_INETDEVICE_H */









































































































   13 

   13 
   13 










   13 






   13 





   13 
   13 
   13 

   13 



   13 











































   14 


































   14 
   14 






























































   14 

















   14 






   14 

























































































































































































    8 

    8 

    8 







































































































































































































































































































































































































    8 




















































   13 

   13 





























































































   13 
   13 
   13 


   13 
   13 




























































































































































































































































































































































































































   13 




   14 






   14 




















   14 




   13 
   14 
   14 


   14 
   14 


   14 









   14 

   14 




   14 

























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
// SPDX-License-Identifier: GPL-2.0
/*
 * /proc/sys support
 */
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/printk.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
#include <linux/kmemleak.h>
#include "internal.h"

#define list_for_each_table_entry(entry, header)        \
        entry = header->ctl_table;                        \
        for (size_t i = 0 ; i < header->ctl_table_size && entry->procname; ++i, entry++)

static const struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;

/* Support for permanently empty directories */
static struct ctl_table sysctl_mount_point[] = {
        {.type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY }
};

/**
 * register_sysctl_mount_point() - registers a sysctl mount point
 * @path: path for the mount point
 *
 * Used to create a permanently empty directory to serve as mount point.
 * There are some subtle but important permission checks this allows in the
 * case of unprivileged mounts.
 */
struct ctl_table_header *register_sysctl_mount_point(const char *path)
{
        return register_sysctl(path, sysctl_mount_point);
}
EXPORT_SYMBOL(register_sysctl_mount_point);

#define sysctl_is_perm_empty_ctl_table(tptr)                \
        (tptr[0].type == SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
#define sysctl_is_perm_empty_ctl_header(hptr)                \
        (sysctl_is_perm_empty_ctl_table(hptr->ctl_table))
#define sysctl_set_perm_empty_ctl_header(hptr)                \
        (hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
#define sysctl_clear_perm_empty_ctl_header(hptr)        \
        (hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_DEFAULT)

void proc_sys_poll_notify(struct ctl_table_poll *poll)
{
        if (!poll)
                return;

        atomic_inc(&poll->event);
        wake_up_interruptible(&poll->wait);
}

static struct ctl_table root_table[] = {
        {
                .procname = "",
                .mode = S_IFDIR|S_IRUGO|S_IXUGO,
        },
};
static struct ctl_table_root sysctl_table_root = {
        .default_set.dir.header = {
                {{.count = 1,
                  .nreg = 1,
                  .ctl_table = root_table }},
                .ctl_table_arg = root_table,
                .root = &sysctl_table_root,
                .set = &sysctl_table_root.default_set,
        },
};

static DEFINE_SPINLOCK(sysctl_lock);

static void drop_sysctl_table(struct ctl_table_header *header);
static int sysctl_follow_link(struct ctl_table_header **phead,
        struct ctl_table **pentry);
static int insert_links(struct ctl_table_header *head);
static void put_links(struct ctl_table_header *header);

static void sysctl_print_dir(struct ctl_dir *dir)
{
        if (dir->header.parent)
                sysctl_print_dir(dir->header.parent);
        pr_cont("%s/", dir->header.ctl_table[0].procname);
}

static int namecmp(const char *name1, int len1, const char *name2, int len2)
{
        int cmp;

        cmp = memcmp(name1, name2, min(len1, len2));
        if (cmp == 0)
                cmp = len1 - len2;
        return cmp;
}

/* Called under sysctl_lock */
static struct ctl_table *find_entry(struct ctl_table_header **phead,
        struct ctl_dir *dir, const char *name, int namelen)
{
        struct ctl_table_header *head;
        struct ctl_table *entry;
        struct rb_node *node = dir->root.rb_node;

        while (node)
        {
                struct ctl_node *ctl_node;
                const char *procname;
                int cmp;

                ctl_node = rb_entry(node, struct ctl_node, node);
                head = ctl_node->header;
                entry = &head->ctl_table[ctl_node - head->node];
                procname = entry->procname;

                cmp = namecmp(name, namelen, procname, strlen(procname));
                if (cmp < 0)
                        node = node->rb_left;
                else if (cmp > 0)
                        node = node->rb_right;
                else {
                        *phead = head;
                        return entry;
                }
        }
        return NULL;
}

static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
{
        struct rb_node *node = &head->node[entry - head->ctl_table].node;
        struct rb_node **p = &head->parent->root.rb_node;
        struct rb_node *parent = NULL;
        const char *name = entry->procname;
        int namelen = strlen(name);

        while (*p) {
                struct ctl_table_header *parent_head;
                struct ctl_table *parent_entry;
                struct ctl_node *parent_node;
                const char *parent_name;
                int cmp;

                parent = *p;
                parent_node = rb_entry(parent, struct ctl_node, node);
                parent_head = parent_node->header;
                parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
                parent_name = parent_entry->procname;

                cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
                if (cmp < 0)
                        p = &(*p)->rb_left;
                else if (cmp > 0)
                        p = &(*p)->rb_right;
                else {
                        pr_err("sysctl duplicate entry: ");
                        sysctl_print_dir(head->parent);
                        pr_cont("%s\n", entry->procname);
                        return -EEXIST;
                }
        }

        rb_link_node(node, parent, p);
        rb_insert_color(node, &head->parent->root);
        return 0;
}

static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
{
        struct rb_node *node = &head->node[entry - head->ctl_table].node;

        rb_erase(node, &head->parent->root);
}

static void init_header(struct ctl_table_header *head,
        struct ctl_table_root *root, struct ctl_table_set *set,
        struct ctl_node *node, struct ctl_table *table, size_t table_size)
{
        head->ctl_table = table;
        head->ctl_table_size = table_size;
        head->ctl_table_arg = table;
        head->used = 0;
        head->count = 1;
        head->nreg = 1;
        head->unregistering = NULL;
        head->root = root;
        head->set = set;
        head->parent = NULL;
        head->node = node;
        INIT_HLIST_HEAD(&head->inodes);
        if (node) {
                struct ctl_table *entry;

                list_for_each_table_entry(entry, head) {
                        node->header = head;
                        node++;
                }
        }
}

static void erase_header(struct ctl_table_header *head)
{
        struct ctl_table *entry;

        list_for_each_table_entry(entry, head)
                erase_entry(head, entry);
}

static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
{
        struct ctl_table *entry;
        struct ctl_table_header *dir_h = &dir->header;
        int err;


        /* Is this a permanently empty directory? */
        if (sysctl_is_perm_empty_ctl_header(dir_h))
                return -EROFS;

        /* Am I creating a permanently empty directory? */
        if (header->ctl_table_size > 0 &&
            sysctl_is_perm_empty_ctl_table(header->ctl_table)) {
                if (!RB_EMPTY_ROOT(&dir->root))
                        return -EINVAL;
                sysctl_set_perm_empty_ctl_header(dir_h);
        }

        dir_h->nreg++;
        header->parent = dir;
        err = insert_links(header);
        if (err)
                goto fail_links;
        list_for_each_table_entry(entry, header) {
                err = insert_entry(header, entry);
                if (err)
                        goto fail;
        }
        return 0;
fail:
        erase_header(header);
        put_links(header);
fail_links:
        if (header->ctl_table == sysctl_mount_point)
                sysctl_clear_perm_empty_ctl_header(dir_h);
        header->parent = NULL;
        drop_sysctl_table(dir_h);
        return err;
}

/* called under sysctl_lock */
static int use_table(struct ctl_table_header *p)
{
        if (unlikely(p->unregistering))
                return 0;
        p->used++;
        return 1;
}

/* called under sysctl_lock */
static void unuse_table(struct ctl_table_header *p)
{
        if (!--p->used)
                if (unlikely(p->unregistering))
                        complete(p->unregistering);
}

static void proc_sys_invalidate_dcache(struct ctl_table_header *head)
{
        proc_invalidate_siblings_dcache(&head->inodes, &sysctl_lock);
}

/* called under sysctl_lock, will reacquire if has to wait */
static void start_unregistering(struct ctl_table_header *p)
{
        /*
         * if p->used is 0, nobody will ever touch that entry again;
         * we'll eliminate all paths to it before dropping sysctl_lock
         */
        if (unlikely(p->used)) {
                struct completion wait;
                init_completion(&wait);
                p->unregistering = &wait;
                spin_unlock(&sysctl_lock);
                wait_for_completion(&wait);
        } else {
                /* anything non-NULL; we'll never dereference it */
                p->unregistering = ERR_PTR(-EINVAL);
                spin_unlock(&sysctl_lock);
        }
        /*
         * Invalidate dentries for unregistered sysctls: namespaced sysctls
         * can have duplicate names and contaminate dcache very badly.
         */
        proc_sys_invalidate_dcache(p);
        /*
         * do not remove from the list until nobody holds it; walking the
         * list in do_sysctl() relies on that.
         */
        spin_lock(&sysctl_lock);
        erase_header(p);
}

static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
{
        BUG_ON(!head);
        spin_lock(&sysctl_lock);
        if (!use_table(head))
                head = ERR_PTR(-ENOENT);
        spin_unlock(&sysctl_lock);
        return head;
}

static void sysctl_head_finish(struct ctl_table_header *head)
{
        if (!head)
                return;
        spin_lock(&sysctl_lock);
        unuse_table(head);
        spin_unlock(&sysctl_lock);
}

static struct ctl_table_set *
lookup_header_set(struct ctl_table_root *root)
{
        struct ctl_table_set *set = &root->default_set;
        if (root->lookup)
                set = root->lookup(root);
        return set;
}

static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
                                      struct ctl_dir *dir,
                                      const char *name, int namelen)
{
        struct ctl_table_header *head;
        struct ctl_table *entry;

        spin_lock(&sysctl_lock);
        entry = find_entry(&head, dir, name, namelen);
        if (entry && use_table(head))
                *phead = head;
        else
                entry = NULL;
        spin_unlock(&sysctl_lock);
        return entry;
}

static struct ctl_node *first_usable_entry(struct rb_node *node)
{
        struct ctl_node *ctl_node;

        for (;node; node = rb_next(node)) {
                ctl_node = rb_entry(node, struct ctl_node, node);
                if (use_table(ctl_node->header))
                        return ctl_node;
        }
        return NULL;
}

static void first_entry(struct ctl_dir *dir,
        struct ctl_table_header **phead, struct ctl_table **pentry)
{
        struct ctl_table_header *head = NULL;
        struct ctl_table *entry = NULL;
        struct ctl_node *ctl_node;

        spin_lock(&sysctl_lock);
        ctl_node = first_usable_entry(rb_first(&dir->root));
        spin_unlock(&sysctl_lock);
        if (ctl_node) {
                head = ctl_node->header;
                entry = &head->ctl_table[ctl_node - head->node];
        }
        *phead = head;
        *pentry = entry;
}

static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
{
        struct ctl_table_header *head = *phead;
        struct ctl_table *entry = *pentry;
        struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];

        spin_lock(&sysctl_lock);
        unuse_table(head);

        ctl_node = first_usable_entry(rb_next(&ctl_node->node));
        spin_unlock(&sysctl_lock);
        head = NULL;
        if (ctl_node) {
                head = ctl_node->header;
                entry = &head->ctl_table[ctl_node - head->node];
        }
        *phead = head;
        *pentry = entry;
}

/*
 * sysctl_perm does NOT grant the superuser all rights automatically, because
 * some sysctl variables are readonly even to root.
 */

static int test_perm(int mode, int op)
{
        if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
                mode >>= 6;
        else if (in_egroup_p(GLOBAL_ROOT_GID))
                mode >>= 3;
        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
                return 0;
        return -EACCES;
}

static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
{
        struct ctl_table_root *root = head->root;
        int mode;

        if (root->permissions)
                mode = root->permissions(head, table);
        else
                mode = table->mode;

        return test_perm(mode, op);
}

static struct inode *proc_sys_make_inode(struct super_block *sb,
                struct ctl_table_header *head, struct ctl_table *table)
{
        struct ctl_table_root *root = head->root;
        struct inode *inode;
        struct proc_inode *ei;

        inode = new_inode(sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);

        inode->i_ino = get_next_ino();

        ei = PROC_I(inode);

        spin_lock(&sysctl_lock);
        if (unlikely(head->unregistering)) {
                spin_unlock(&sysctl_lock);
                iput(inode);
                return ERR_PTR(-ENOENT);
        }
        ei->sysctl = head;
        ei->sysctl_entry = table;
        hlist_add_head_rcu(&ei->sibling_inodes, &head->inodes);
        head->count++;
        spin_unlock(&sysctl_lock);

        simple_inode_init_ts(inode);
        inode->i_mode = table->mode;
        if (!S_ISDIR(table->mode)) {
                inode->i_mode |= S_IFREG;
                inode->i_op = &proc_sys_inode_operations;
                inode->i_fop = &proc_sys_file_operations;
        } else {
                inode->i_mode |= S_IFDIR;
                inode->i_op = &proc_sys_dir_operations;
                inode->i_fop = &proc_sys_dir_file_operations;
                if (sysctl_is_perm_empty_ctl_header(head))
                        make_empty_dir_inode(inode);
        }

        if (root->set_ownership)
                root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
        else {
                inode->i_uid = GLOBAL_ROOT_UID;
                inode->i_gid = GLOBAL_ROOT_GID;
        }

        return inode;
}

void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
{
        spin_lock(&sysctl_lock);
        hlist_del_init_rcu(&PROC_I(inode)->sibling_inodes);
        if (!--head->count)
                kfree_rcu(head, rcu);
        spin_unlock(&sysctl_lock);
}

static struct ctl_table_header *grab_header(struct inode *inode)
{
        struct ctl_table_header *head = PROC_I(inode)->sysctl;
        if (!head)
                head = &sysctl_table_root.default_set.dir.header;
        return sysctl_head_grab(head);
}

static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
                                        unsigned int flags)
{
        struct ctl_table_header *head = grab_header(dir);
        struct ctl_table_header *h = NULL;
        const struct qstr *name = &dentry->d_name;
        struct ctl_table *p;
        struct inode *inode;
        struct dentry *err = ERR_PTR(-ENOENT);
        struct ctl_dir *ctl_dir;
        int ret;

        if (IS_ERR(head))
                return ERR_CAST(head);

        ctl_dir = container_of(head, struct ctl_dir, header);

        p = lookup_entry(&h, ctl_dir, name->name, name->len);
        if (!p)
                goto out;

        if (S_ISLNK(p->mode)) {
                ret = sysctl_follow_link(&h, &p);
                err = ERR_PTR(ret);
                if (ret)
                        goto out;
        }

        d_set_d_op(dentry, &proc_sys_dentry_operations);
        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
        err = d_splice_alias(inode, dentry);

out:
        if (h)
                sysctl_head_finish(h);
        sysctl_head_finish(head);
        return err;
}

static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
                int write)
{
        struct inode *inode = file_inode(iocb->ki_filp);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
        size_t count = iov_iter_count(iter);
        char *kbuf;
        ssize_t error;

        if (IS_ERR(head))
                return PTR_ERR(head);

        /*
         * At this point we know that the sysctl was not unregistered
         * and won't be until we finish.
         */
        error = -EPERM;
        if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
                goto out;

        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
        error = -EINVAL;
        if (!table->proc_handler)
                goto out;

        /* don't even try if the size is too large */
        error = -ENOMEM;
        if (count >= KMALLOC_MAX_SIZE)
                goto out;
        kbuf = kvzalloc(count + 1, GFP_KERNEL);
        if (!kbuf)
                goto out;

        if (write) {
                error = -EFAULT;
                if (!copy_from_iter_full(kbuf, count, iter))
                        goto out_free_buf;
                kbuf[count] = '\0';
        }

        error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
                                           &iocb->ki_pos);
        if (error)
                goto out_free_buf;

        /* careful: calling conventions are nasty here */
        error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos);
        if (error)
                goto out_free_buf;

        if (!write) {
                error = -EFAULT;
                if (copy_to_iter(kbuf, count, iter) < count)
                        goto out_free_buf;
        }

        error = count;
out_free_buf:
        kvfree(kbuf);
out:
        sysctl_head_finish(head);

        return error;
}

static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter)
{
        return proc_sys_call_handler(iocb, iter, 0);
}

static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter)
{
        return proc_sys_call_handler(iocb, iter, 1);
}

static int proc_sys_open(struct inode *inode, struct file *filp)
{
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;

        /* sysctl was unregistered */
        if (IS_ERR(head))
                return PTR_ERR(head);

        if (table->poll)
                filp->private_data = proc_sys_poll_event(table->poll);

        sysctl_head_finish(head);

        return 0;
}

static __poll_t proc_sys_poll(struct file *filp, poll_table *wait)
{
        struct inode *inode = file_inode(filp);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
        __poll_t ret = DEFAULT_POLLMASK;
        unsigned long event;

        /* sysctl was unregistered */
        if (IS_ERR(head))
                return EPOLLERR | EPOLLHUP;

        if (!table->proc_handler)
                goto out;

        if (!table->poll)
                goto out;

        event = (unsigned long)filp->private_data;
        poll_wait(filp, &table->poll->wait, wait);

        if (event != atomic_read(&table->poll->event)) {
                filp->private_data = proc_sys_poll_event(table->poll);
                ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
        }

out:
        sysctl_head_finish(head);

        return ret;
}

static bool proc_sys_fill_cache(struct file *file,
                                struct dir_context *ctx,
                                struct ctl_table_header *head,
                                struct ctl_table *table)
{
        struct dentry *child, *dir = file->f_path.dentry;
        struct inode *inode;
        struct qstr qname;
        ino_t ino = 0;
        unsigned type = DT_UNKNOWN;

        qname.name = table->procname;
        qname.len  = strlen(table->procname);
        qname.hash = full_name_hash(dir, qname.name, qname.len);

        child = d_lookup(dir, &qname);
        if (!child) {
                DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
                child = d_alloc_parallel(dir, &qname, &wq);
                if (IS_ERR(child))
                        return false;
                if (d_in_lookup(child)) {
                        struct dentry *res;
                        d_set_d_op(child, &proc_sys_dentry_operations);
                        inode = proc_sys_make_inode(dir->d_sb, head, table);
                        res = d_splice_alias(inode, child);
                        d_lookup_done(child);
                        if (unlikely(res)) {
                                if (IS_ERR(res)) {
                                        dput(child);
                                        return false;
                                }
                                dput(child);
                                child = res;
                        }
                }
        }
        inode = d_inode(child);
        ino  = inode->i_ino;
        type = inode->i_mode >> 12;
        dput(child);
        return dir_emit(ctx, qname.name, qname.len, ino, type);
}

static bool proc_sys_link_fill_cache(struct file *file,
                                    struct dir_context *ctx,
                                    struct ctl_table_header *head,
                                    struct ctl_table *table)
{
        bool ret = true;

        head = sysctl_head_grab(head);
        if (IS_ERR(head))
                return false;

        /* It is not an error if we can not follow the link ignore it */
        if (sysctl_follow_link(&head, &table))
                goto out;

        ret = proc_sys_fill_cache(file, ctx, head, table);
out:
        sysctl_head_finish(head);
        return ret;
}

static int scan(struct ctl_table_header *head, struct ctl_table *table,
                unsigned long *pos, struct file *file,
                struct dir_context *ctx)
{
        bool res;

        if ((*pos)++ < ctx->pos)
                return true;

        if (unlikely(S_ISLNK(table->mode)))
                res = proc_sys_link_fill_cache(file, ctx, head, table);
        else
                res = proc_sys_fill_cache(file, ctx, head, table);

        if (res)
                ctx->pos = *pos;

        return res;
}

static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
{
        struct ctl_table_header *head = grab_header(file_inode(file));
        struct ctl_table_header *h = NULL;
        struct ctl_table *entry;
        struct ctl_dir *ctl_dir;
        unsigned long pos;

        if (IS_ERR(head))
                return PTR_ERR(head);

        ctl_dir = container_of(head, struct ctl_dir, header);

        if (!dir_emit_dots(file, ctx))
                goto out;

        pos = 2;

        for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
                if (!scan(h, entry, &pos, file, ctx)) {
                        sysctl_head_finish(h);
                        break;
                }
        }
out:
        sysctl_head_finish(head);
        return 0;
}

static int proc_sys_permission(struct mnt_idmap *idmap,
                               struct inode *inode, int mask)
{
        /*
         * sysctl entries that are not writeable,
         * are _NOT_ writeable, capabilities or not.
         */
        struct ctl_table_header *head;
        struct ctl_table *table;
        int error;

        /* Executable files are not allowed under /proc/sys/ */
        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
                return -EACCES;

        head = grab_header(inode);
        if (IS_ERR(head))
                return PTR_ERR(head);

        table = PROC_I(inode)->sysctl_entry;
        if (!table) /* global root - r-xr-xr-x */
                error = mask & MAY_WRITE ? -EACCES : 0;
        else /* Use the permissions on the sysctl table entry */
                error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);

        sysctl_head_finish(head);
        return error;
}

static int proc_sys_setattr(struct mnt_idmap *idmap,
                            struct dentry *dentry, struct iattr *attr)
{
        struct inode *inode = d_inode(dentry);
        int error;

        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
                return -EPERM;

        error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
        if (error)
                return error;

        setattr_copy(&nop_mnt_idmap, inode, attr);
        return 0;
}

static int proc_sys_getattr(struct mnt_idmap *idmap,
                            const struct path *path, struct kstat *stat,
                            u32 request_mask, unsigned int query_flags)
{
        struct inode *inode = d_inode(path->dentry);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;

        if (IS_ERR(head))
                return PTR_ERR(head);

        generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        if (table)
                stat->mode = (stat->mode & S_IFMT) | table->mode;

        sysctl_head_finish(head);
        return 0;
}

static const struct file_operations proc_sys_file_operations = {
        .open                = proc_sys_open,
        .poll                = proc_sys_poll,
        .read_iter        = proc_sys_read,
        .write_iter        = proc_sys_write,
        .splice_read        = copy_splice_read,
        .splice_write        = iter_file_splice_write,
        .llseek                = default_llseek,
};

static const struct file_operations proc_sys_dir_file_operations = {
        .read                = generic_read_dir,
        .iterate_shared        = proc_sys_readdir,
        .llseek                = generic_file_llseek,
};

static const struct inode_operations proc_sys_inode_operations = {
        .permission        = proc_sys_permission,
        .setattr        = proc_sys_setattr,
        .getattr        = proc_sys_getattr,
};

static const struct inode_operations proc_sys_dir_operations = {
        .lookup                = proc_sys_lookup,
        .permission        = proc_sys_permission,
        .setattr        = proc_sys_setattr,
        .getattr        = proc_sys_getattr,
};

static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
{
        if (flags & LOOKUP_RCU)
                return -ECHILD;
        return !PROC_I(d_inode(dentry))->sysctl->unregistering;
}

static int proc_sys_delete(const struct dentry *dentry)
{
        return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
}

static int sysctl_is_seen(struct ctl_table_header *p)
{
        struct ctl_table_set *set = p->set;
        int res;
        spin_lock(&sysctl_lock);
        if (p->unregistering)
                res = 0;
        else if (!set->is_seen)
                res = 1;
        else
                res = set->is_seen(set);
        spin_unlock(&sysctl_lock);
        return res;
}

static int proc_sys_compare(const struct dentry *dentry,
                unsigned int len, const char *str, const struct qstr *name)
{
        struct ctl_table_header *head;
        struct inode *inode;

        /* Although proc doesn't have negative dentries, rcu-walk means
         * that inode here can be NULL */
        /* AV: can it, indeed? */
        inode = d_inode_rcu(dentry);
        if (!inode)
                return 1;
        if (name->len != len)
                return 1;
        if (memcmp(name->name, str, len))
                return 1;
        head = rcu_dereference(PROC_I(inode)->sysctl);
        return !head || !sysctl_is_seen(head);
}

static const struct dentry_operations proc_sys_dentry_operations = {
        .d_revalidate        = proc_sys_revalidate,
        .d_delete        = proc_sys_delete,
        .d_compare        = proc_sys_compare,
};

static struct ctl_dir *find_subdir(struct ctl_dir *dir,
                                   const char *name, int namelen)
{
        struct ctl_table_header *head;
        struct ctl_table *entry;

        entry = find_entry(&head, dir, name, namelen);
        if (!entry)
                return ERR_PTR(-ENOENT);
        if (!S_ISDIR(entry->mode))
                return ERR_PTR(-ENOTDIR);
        return container_of(head, struct ctl_dir, header);
}

static struct ctl_dir *new_dir(struct ctl_table_set *set,
                               const char *name, int namelen)
{
        struct ctl_table *table;
        struct ctl_dir *new;
        struct ctl_node *node;
        char *new_name;

        new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
                      sizeof(struct ctl_table)*2 +  namelen + 1,
                      GFP_KERNEL);
        if (!new)
                return NULL;

        node = (struct ctl_node *)(new + 1);
        table = (struct ctl_table *)(node + 1);
        new_name = (char *)(table + 2);
        memcpy(new_name, name, namelen);
        table[0].procname = new_name;
        table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
        init_header(&new->header, set->dir.header.root, set, node, table, 1);

        return new;
}

/**
 * get_subdir - find or create a subdir with the specified name.
 * @dir:  Directory to create the subdirectory in
 * @name: The name of the subdirectory to find or create
 * @namelen: The length of name
 *
 * Takes a directory with an elevated reference count so we know that
 * if we drop the lock the directory will not go away.  Upon success
 * the reference is moved from @dir to the returned subdirectory.
 * Upon error an error code is returned and the reference on @dir is
 * simply dropped.
 */
static struct ctl_dir *get_subdir(struct ctl_dir *dir,
                                  const char *name, int namelen)
{
        struct ctl_table_set *set = dir->header.set;
        struct ctl_dir *subdir, *new = NULL;
        int err;

        spin_lock(&sysctl_lock);
        subdir = find_subdir(dir, name, namelen);
        if (!IS_ERR(subdir))
                goto found;
        if (PTR_ERR(subdir) != -ENOENT)
                goto failed;

        spin_unlock(&sysctl_lock);
        new = new_dir(set, name, namelen);
        spin_lock(&sysctl_lock);
        subdir = ERR_PTR(-ENOMEM);
        if (!new)
                goto failed;

        /* Was the subdir added while we dropped the lock? */
        subdir = find_subdir(dir, name, namelen);
        if (!IS_ERR(subdir))
                goto found;
        if (PTR_ERR(subdir) != -ENOENT)
                goto failed;

        /* Nope.  Use the our freshly made directory entry. */
        err = insert_header(dir, &new->header);
        subdir = ERR_PTR(err);
        if (err)
                goto failed;
        subdir = new;
found:
        subdir->header.nreg++;
failed:
        if (IS_ERR(subdir)) {
                pr_err("sysctl could not get directory: ");
                sysctl_print_dir(dir);
                pr_cont("%*.*s %ld\n", namelen, namelen, name,
                        PTR_ERR(subdir));
        }
        drop_sysctl_table(&dir->header);
        if (new)
                drop_sysctl_table(&new->header);
        spin_unlock(&sysctl_lock);
        return subdir;
}

static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
{
        struct ctl_dir *parent;
        const char *procname;
        if (!dir->header.parent)
                return &set->dir;
        parent = xlate_dir(set, dir->header.parent);
        if (IS_ERR(parent))
                return parent;
        procname = dir->header.ctl_table[0].procname;
        return find_subdir(parent, procname, strlen(procname));
}

static int sysctl_follow_link(struct ctl_table_header **phead,
        struct ctl_table **pentry)
{
        struct ctl_table_header *head;
        struct ctl_table_root *root;
        struct ctl_table_set *set;
        struct ctl_table *entry;
        struct ctl_dir *dir;
        int ret;

        spin_lock(&sysctl_lock);
        root = (*pentry)->data;
        set = lookup_header_set(root);
        dir = xlate_dir(set, (*phead)->parent);
        if (IS_ERR(dir))
                ret = PTR_ERR(dir);
        else {
                const char *procname = (*pentry)->procname;
                head = NULL;
                entry = find_entry(&head, dir, procname, strlen(procname));
                ret = -ENOENT;
                if (entry && use_table(head)) {
                        unuse_table(*phead);
                        *phead = head;
                        *pentry = entry;
                        ret = 0;
                }
        }

        spin_unlock(&sysctl_lock);
        return ret;
}

static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
{
        struct va_format vaf;
        va_list args;

        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;

        pr_err("sysctl table check failed: %s/%s %pV\n",
               path, table->procname, &vaf);

        va_end(args);
        return -EINVAL;
}

static int sysctl_check_table_array(const char *path, struct ctl_table *table)
{
        int err = 0;

        if ((table->proc_handler == proc_douintvec) ||
            (table->proc_handler == proc_douintvec_minmax)) {
                if (table->maxlen != sizeof(unsigned int))
                        err |= sysctl_err(path, table, "array not allowed");
        }

        if (table->proc_handler == proc_dou8vec_minmax) {
                if (table->maxlen != sizeof(u8))
                        err |= sysctl_err(path, table, "array not allowed");
        }

        if (table->proc_handler == proc_dobool) {
                if (table->maxlen != sizeof(bool))
                        err |= sysctl_err(path, table, "array not allowed");
        }

        return err;
}

static int sysctl_check_table(const char *path, struct ctl_table_header *header)
{
        struct ctl_table *entry;
        int err = 0;
        list_for_each_table_entry(entry, header) {
                if ((entry->proc_handler == proc_dostring) ||
                    (entry->proc_handler == proc_dobool) ||
                    (entry->proc_handler == proc_dointvec) ||
                    (entry->proc_handler == proc_douintvec) ||
                    (entry->proc_handler == proc_douintvec_minmax) ||
                    (entry->proc_handler == proc_dointvec_minmax) ||
                    (entry->proc_handler == proc_dou8vec_minmax) ||
                    (entry->proc_handler == proc_dointvec_jiffies) ||
                    (entry->proc_handler == proc_dointvec_userhz_jiffies) ||
                    (entry->proc_handler == proc_dointvec_ms_jiffies) ||
                    (entry->proc_handler == proc_doulongvec_minmax) ||
                    (entry->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
                        if (!entry->data)
                                err |= sysctl_err(path, entry, "No data");
                        if (!entry->maxlen)
                                err |= sysctl_err(path, entry, "No maxlen");
                        else
                                err |= sysctl_check_table_array(path, entry);
                }
                if (!entry->proc_handler)
                        err |= sysctl_err(path, entry, "No proc_handler");

                if ((entry->mode & (S_IRUGO|S_IWUGO)) != entry->mode)
                        err |= sysctl_err(path, entry, "bogus .mode 0%o",
                                entry->mode);
        }
        return err;
}

static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_header *head)
{
        struct ctl_table *link_table, *entry, *link;
        struct ctl_table_header *links;
        struct ctl_node *node;
        char *link_name;
        int nr_entries, name_bytes;

        name_bytes = 0;
        nr_entries = 0;
        list_for_each_table_entry(entry, head) {
                nr_entries++;
                name_bytes += strlen(entry->procname) + 1;
        }

        links = kzalloc(sizeof(struct ctl_table_header) +
                        sizeof(struct ctl_node)*nr_entries +
                        sizeof(struct ctl_table)*(nr_entries + 1) +
                        name_bytes,
                        GFP_KERNEL);

        if (!links)
                return NULL;

        node = (struct ctl_node *)(links + 1);
        link_table = (struct ctl_table *)(node + nr_entries);
        link_name = (char *)&link_table[nr_entries + 1];
        link = link_table;

        list_for_each_table_entry(entry, head) {
                int len = strlen(entry->procname) + 1;
                memcpy(link_name, entry->procname, len);
                link->procname = link_name;
                link->mode = S_IFLNK|S_IRWXUGO;
                link->data = head->root;
                link_name += len;
                link++;
        }
        init_header(links, dir->header.root, dir->header.set, node, link_table,
                    head->ctl_table_size);
        links->nreg = nr_entries;

        return links;
}

static bool get_links(struct ctl_dir *dir,
                      struct ctl_table_header *header,
                      struct ctl_table_root *link_root)
{
        struct ctl_table_header *tmp_head;
        struct ctl_table *entry, *link;

        if (header->ctl_table_size == 0 ||
            sysctl_is_perm_empty_ctl_table(header->ctl_table))
                return true;

        /* Are there links available for every entry in table? */
        list_for_each_table_entry(entry, header) {
                const char *procname = entry->procname;
                link = find_entry(&tmp_head, dir, procname, strlen(procname));
                if (!link)
                        return false;
                if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
                        continue;
                if (S_ISLNK(link->mode) && (link->data == link_root))
                        continue;
                return false;
        }

        /* The checks passed.  Increase the registration count on the links */
        list_for_each_table_entry(entry, header) {
                const char *procname = entry->procname;
                link = find_entry(&tmp_head, dir, procname, strlen(procname));
                tmp_head->nreg++;
        }
        return true;
}

static int insert_links(struct ctl_table_header *head)
{
        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
        struct ctl_dir *core_parent;
        struct ctl_table_header *links;
        int err;

        if (head->set == root_set)
                return 0;

        core_parent = xlate_dir(root_set, head->parent);
        if (IS_ERR(core_parent))
                return 0;

        if (get_links(core_parent, head, head->root))
                return 0;

        core_parent->header.nreg++;
        spin_unlock(&sysctl_lock);

        links = new_links(core_parent, head);

        spin_lock(&sysctl_lock);
        err = -ENOMEM;
        if (!links)
                goto out;

        err = 0;
        if (get_links(core_parent, head, head->root)) {
                kfree(links);
                goto out;
        }

        err = insert_header(core_parent, links);
        if (err)
                kfree(links);
out:
        drop_sysctl_table(&core_parent->header);
        return err;
}

/* Find the directory for the ctl_table. If one is not found create it. */
static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path)
{
        const char *name, *nextname;

        for (name = path; name; name = nextname) {
                int namelen;
                nextname = strchr(name, '/');
                if (nextname) {
                        namelen = nextname - name;
                        nextname++;
                } else {
                        namelen = strlen(name);
                }
                if (namelen == 0)
                        continue;

                /*
                 * namelen ensures if name is "foo/bar/yay" only foo is
                 * registered first. We traverse as if using mkdir -p and
                 * return a ctl_dir for the last directory entry.
                 */
                dir = get_subdir(dir, name, namelen);
                if (IS_ERR(dir))
                        break;
        }
        return dir;
}

/**
 * __register_sysctl_table - register a leaf sysctl table
 * @set: Sysctl tree to register on
 * @path: The path to the directory the sysctl table is in.
 * @table: the top-level table structure without any child. This table
 *          should not be free'd after registration. So it should not be
 *          used on stack. It can either be a global or dynamically allocated
 *          by the caller and free'd later after sysctl unregistration.
 * @table_size : The number of elements in table
 *
 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
 * array. A completely 0 filled entry terminates the table.
 *
 * The members of the &struct ctl_table structure are used as follows:
 *
 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
 *            enter a sysctl file
 *
 * data - a pointer to data for use by proc_handler
 *
 * maxlen - the maximum size in bytes of the data
 *
 * mode - the file permissions for the /proc/sys file
 *
 * child - must be %NULL.
 *
 * proc_handler - the text handler routine (described below)
 *
 * extra1, extra2 - extra pointers usable by the proc handler routines
 * XXX: we should eventually modify these to use long min / max [0]
 * [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org
 *
 * Leaf nodes in the sysctl tree will be represented by a single file
 * under /proc; non-leaf nodes (where child is not NULL) are not allowed,
 * sysctl_check_table() verifies this.
 *
 * There must be a proc_handler routine for any terminal nodes.
 * Several default handlers are available to cover common cases -
 *
 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
 *
 * It is the handler's job to read the input buffer from user memory
 * and process it. The handler should return 0 on success.
 *
 * This routine returns %NULL on a failure to register, and a pointer
 * to the table header on success.
 */
struct ctl_table_header *__register_sysctl_table(
        struct ctl_table_set *set,
        const char *path, struct ctl_table *table, size_t table_size)
{
        struct ctl_table_root *root = set->dir.header.root;
        struct ctl_table_header *header;
        struct ctl_dir *dir;
        struct ctl_node *node;

        header = kzalloc(sizeof(struct ctl_table_header) +
                         sizeof(struct ctl_node)*table_size, GFP_KERNEL_ACCOUNT);
        if (!header)
                return NULL;

        node = (struct ctl_node *)(header + 1);
        init_header(header, root, set, node, table, table_size);
        if (sysctl_check_table(path, header))
                goto fail;

        spin_lock(&sysctl_lock);
        dir = &set->dir;
        /* Reference moved down the directory tree get_subdir */
        dir->header.nreg++;
        spin_unlock(&sysctl_lock);

        dir = sysctl_mkdir_p(dir, path);
        if (IS_ERR(dir))
                goto fail;
        spin_lock(&sysctl_lock);
        if (insert_header(dir, header))
                goto fail_put_dir_locked;

        drop_sysctl_table(&dir->header);
        spin_unlock(&sysctl_lock);

        return header;

fail_put_dir_locked:
        drop_sysctl_table(&dir->header);
        spin_unlock(&sysctl_lock);
fail:
        kfree(header);
        return NULL;
}

/**
 * register_sysctl_sz - register a sysctl table
 * @path: The path to the directory the sysctl table is in. If the path
 *         doesn't exist we will create it for you.
 * @table: the table structure. The calller must ensure the life of the @table
 *         will be kept during the lifetime use of the syctl. It must not be freed
 *         until unregister_sysctl_table() is called with the given returned table
 *         with this registration. If your code is non modular then you don't need
 *         to call unregister_sysctl_table() and can instead use something like
 *         register_sysctl_init() which does not care for the result of the syctl
 *         registration.
 * @table_size: The number of elements in table.
 *
 * Register a sysctl table. @table should be a filled in ctl_table
 * array. A completely 0 filled entry terminates the table.
 *
 * See __register_sysctl_table for more details.
 */
struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table,
                                            size_t table_size)
{
        return __register_sysctl_table(&sysctl_table_root.default_set,
                                        path, table, table_size);
}
EXPORT_SYMBOL(register_sysctl_sz);

/**
 * __register_sysctl_init() - register sysctl table to path
 * @path: path name for sysctl base. If that path doesn't exist we will create
 *         it for you.
 * @table: This is the sysctl table that needs to be registered to the path.
 *         The caller must ensure the life of the @table will be kept during the
 *         lifetime use of the sysctl.
 * @table_name: The name of sysctl table, only used for log printing when
 *              registration fails
 * @table_size: The number of elements in table
 *
 * The sysctl interface is used by userspace to query or modify at runtime
 * a predefined value set on a variable. These variables however have default
 * values pre-set. Code which depends on these variables will always work even
 * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
 * ability to query or modify the sysctls dynamically at run time. Chances of
 * register_sysctl() failing on init are extremely low, and so for both reasons
 * this function does not return any error as it is used by initialization code.
 *
 * Context: if your base directory does not exist it will be created for you.
 */
void __init __register_sysctl_init(const char *path, struct ctl_table *table,
                                 const char *table_name, size_t table_size)
{
        struct ctl_table_header *hdr = register_sysctl_sz(path, table, table_size);

        if (unlikely(!hdr)) {
                pr_err("failed when register_sysctl_sz %s to %s\n", table_name, path);
                return;
        }
        kmemleak_not_leak(hdr);
}

static void put_links(struct ctl_table_header *header)
{
        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
        struct ctl_table_root *root = header->root;
        struct ctl_dir *parent = header->parent;
        struct ctl_dir *core_parent;
        struct ctl_table *entry;

        if (header->set == root_set)
                return;

        core_parent = xlate_dir(root_set, parent);
        if (IS_ERR(core_parent))
                return;

        list_for_each_table_entry(entry, header) {
                struct ctl_table_header *link_head;
                struct ctl_table *link;
                const char *name = entry->procname;

                link = find_entry(&link_head, core_parent, name, strlen(name));
                if (link &&
                    ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
                     (S_ISLNK(link->mode) && (link->data == root)))) {
                        drop_sysctl_table(link_head);
                }
                else {
                        pr_err("sysctl link missing during unregister: ");
                        sysctl_print_dir(parent);
                        pr_cont("%s\n", name);
                }
        }
}

static void drop_sysctl_table(struct ctl_table_header *header)
{
        struct ctl_dir *parent = header->parent;

        if (--header->nreg)
                return;

        if (parent) {
                put_links(header);
                start_unregistering(header);
        }

        if (!--header->count)
                kfree_rcu(header, rcu);

        if (parent)
                drop_sysctl_table(&parent->header);
}

/**
 * unregister_sysctl_table - unregister a sysctl table hierarchy
 * @header: the header returned from register_sysctl or __register_sysctl_table
 *
 * Unregisters the sysctl table and all children. proc entries may not
 * actually be removed until they are no longer used by anyone.
 */
void unregister_sysctl_table(struct ctl_table_header * header)
{
        might_sleep();

        if (header == NULL)
                return;

        spin_lock(&sysctl_lock);
        drop_sysctl_table(header);
        spin_unlock(&sysctl_lock);
}
EXPORT_SYMBOL(unregister_sysctl_table);

void setup_sysctl_set(struct ctl_table_set *set,
        struct ctl_table_root *root,
        int (*is_seen)(struct ctl_table_set *))
{
        memset(set, 0, sizeof(*set));
        set->is_seen = is_seen;
        init_header(&set->dir.header, root, set, NULL, root_table, 1);
}

void retire_sysctl_set(struct ctl_table_set *set)
{
        WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
}

int __init proc_sys_init(void)
{
        struct proc_dir_entry *proc_sys_root;

        proc_sys_root = proc_mkdir("sys", NULL);
        proc_sys_root->proc_iops = &proc_sys_dir_operations;
        proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
        proc_sys_root->nlink = 0;

        return sysctl_init_bases();
}

struct sysctl_alias {
        const char *kernel_param;
        const char *sysctl_param;
};

/*
 * Historically some settings had both sysctl and a command line parameter.
 * With the generic sysctl. parameter support, we can handle them at a single
 * place and only keep the historical name for compatibility. This is not meant
 * to add brand new aliases. When adding existing aliases, consider whether
 * the possibly different moment of changing the value (e.g. from early_param
 * to the moment do_sysctl_args() is called) is an issue for the specific
 * parameter.
 */
static const struct sysctl_alias sysctl_aliases[] = {
        {"hardlockup_all_cpu_backtrace",        "kernel.hardlockup_all_cpu_backtrace" },
        {"hung_task_panic",                        "kernel.hung_task_panic" },
        {"numa_zonelist_order",                        "vm.numa_zonelist_order" },
        {"softlockup_all_cpu_backtrace",        "kernel.softlockup_all_cpu_backtrace" },
        { }
};

static const char *sysctl_find_alias(char *param)
{
        const struct sysctl_alias *alias;

        for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
                if (strcmp(alias->kernel_param, param) == 0)
                        return alias->sysctl_param;
        }

        return NULL;
}

bool sysctl_is_alias(char *param)
{
        const char *alias = sysctl_find_alias(param);

        return alias != NULL;
}

/* Set sysctl value passed on kernel command line. */
static int process_sysctl_arg(char *param, char *val,
                               const char *unused, void *arg)
{
        char *path;
        struct vfsmount **proc_mnt = arg;
        struct file_system_type *proc_fs_type;
        struct file *file;
        int len;
        int err;
        loff_t pos = 0;
        ssize_t wret;

        if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
                param += sizeof("sysctl") - 1;

                if (param[0] != '/' && param[0] != '.')
                        return 0;

                param++;
        } else {
                param = (char *) sysctl_find_alias(param);
                if (!param)
                        return 0;
        }

        if (!val)
                return -EINVAL;
        len = strlen(val);
        if (len == 0)
                return -EINVAL;

        /*
         * To set sysctl options, we use a temporary mount of proc, look up the
         * respective sys/ file and write to it. To avoid mounting it when no
         * options were given, we mount it only when the first sysctl option is
         * found. Why not a persistent mount? There are problems with a
         * persistent mount of proc in that it forces userspace not to use any
         * proc mount options.
         */
        if (!*proc_mnt) {
                proc_fs_type = get_fs_type("proc");
                if (!proc_fs_type) {
                        pr_err("Failed to find procfs to set sysctl from command line\n");
                        return 0;
                }
                *proc_mnt = kern_mount(proc_fs_type);
                put_filesystem(proc_fs_type);
                if (IS_ERR(*proc_mnt)) {
                        pr_err("Failed to mount procfs to set sysctl from command line\n");
                        return 0;
                }
        }

        path = kasprintf(GFP_KERNEL, "sys/%s", param);
        if (!path)
                panic("%s: Failed to allocate path for %s\n", __func__, param);
        strreplace(path, '.', '/');

        file = file_open_root_mnt(*proc_mnt, path, O_WRONLY, 0);
        if (IS_ERR(file)) {
                err = PTR_ERR(file);
                if (err == -ENOENT)
                        pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n",
                                param, val);
                else if (err == -EACCES)
                        pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n",
                                param, val);
                else
                        pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n",
                                file, param, val);
                goto out;
        }
        wret = kernel_write(file, val, len, &pos);
        if (wret < 0) {
                err = wret;
                if (err == -EINVAL)
                        pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n",
                                param, val);
                else
                        pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n",
                                ERR_PTR(err), param, val);
        } else if (wret != len) {
                pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n",
                        wret, len, path, param, val);
        }

        err = filp_close(file, NULL);
        if (err)
                pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n",
                        ERR_PTR(err), param, val);
out:
        kfree(path);
        return 0;
}

void do_sysctl_args(void)
{
        char *command_line;
        struct vfsmount *proc_mnt = NULL;

        command_line = kstrdup(saved_command_line, GFP_KERNEL);
        if (!command_line)
                panic("%s: Failed to allocate copy of command line\n", __func__);

        parse_args("Setting sysctl args", command_line,
                   NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg);

        if (proc_mnt)
                kern_unmount(proc_mnt);

        kfree(command_line);
}
















































































































































































































































    4 





    4 

    4 



























































































































































    4 






    4 

    4 









































































































































































































































    4 






    4 

    4 











































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
// SPDX-License-Identifier: GPL-2.0-only
/*
 *  Copyright 2008 ioogle, Inc.  All rights reserved.
 *
 * Libata transport class.
 *
 * The ATA transport class contains common code to deal with ATA HBAs,
 * an approximated representation of ATA topologies in the driver model,
 * and various sysfs attributes to expose these topologies and management
 * interfaces to user-space.
 *
 * There are 3 objects defined in this class:
 * - ata_port
 * - ata_link
 * - ata_device
 * Each port has a link object. Each link can have up to two devices for PATA
 * and generally one for SATA.
 * If there is SATA port multiplier [PMP], 15 additional ata_link object are
 * created.
 *
 * These objects are created when the ata host is initialized and when a PMP is
 * found. They are removed only when the HBA is removed, cleaned before the
 * error handler runs.
 */


#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <scsi/scsi_transport.h>
#include <linux/libata.h>
#include <linux/hdreg.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>

#include "libata.h"
#include "libata-transport.h"

#define ATA_PORT_ATTRS                3
#define ATA_LINK_ATTRS                3
#define ATA_DEV_ATTRS                9

struct scsi_transport_template;
struct scsi_transport_template *ata_scsi_transport_template;

struct ata_internal {
        struct scsi_transport_template t;

        struct device_attribute private_port_attrs[ATA_PORT_ATTRS];
        struct device_attribute private_link_attrs[ATA_LINK_ATTRS];
        struct device_attribute private_dev_attrs[ATA_DEV_ATTRS];

        struct transport_container link_attr_cont;
        struct transport_container dev_attr_cont;

        /*
         * The array of null terminated pointers to attributes
         * needed by scsi_sysfs.c
         */
        struct device_attribute *link_attrs[ATA_LINK_ATTRS + 1];
        struct device_attribute *port_attrs[ATA_PORT_ATTRS + 1];
        struct device_attribute *dev_attrs[ATA_DEV_ATTRS + 1];
};
#define to_ata_internal(tmpl)        container_of(tmpl, struct ata_internal, t)


#define tdev_to_device(d)                                        \
        container_of((d), struct ata_device, tdev)
#define transport_class_to_dev(dev)                                \
        tdev_to_device((dev)->parent)

#define tdev_to_link(d)                                                \
        container_of((d), struct ata_link, tdev)
#define transport_class_to_link(dev)                                \
        tdev_to_link((dev)->parent)

#define tdev_to_port(d)                                                \
        container_of((d), struct ata_port, tdev)
#define transport_class_to_port(dev)                                \
        tdev_to_port((dev)->parent)


/* Device objects are always created whit link objects */
static int ata_tdev_add(struct ata_device *dev);
static void ata_tdev_delete(struct ata_device *dev);


/*
 * Hack to allow attributes of the same name in different objects.
 */
#define ATA_DEVICE_ATTR(_prefix,_name,_mode,_show,_store) \
        struct device_attribute device_attr_##_prefix##_##_name = \
        __ATTR(_name,_mode,_show,_store)

#define ata_bitfield_name_match(title, table)                        \
static ssize_t                                                        \
get_ata_##title##_names(u32 table_key, char *buf)                \
{                                                                \
        char *prefix = "";                                        \
        ssize_t len = 0;                                        \
        int i;                                                        \
                                                                \
        for (i = 0; i < ARRAY_SIZE(table); i++) {                \
                if (table[i].value & table_key) {                \
                        len += sprintf(buf + len, "%s%s",        \
                                prefix, table[i].name);                \
                        prefix = ", ";                                \
                }                                                \
        }                                                        \
        len += sprintf(buf + len, "\n");                        \
        return len;                                                \
}

#define ata_bitfield_name_search(title, table)                        \
static ssize_t                                                        \
get_ata_##title##_names(u32 table_key, char *buf)                \
{                                                                \
        ssize_t len = 0;                                        \
        int i;                                                        \
                                                                \
        for (i = 0; i < ARRAY_SIZE(table); i++) {                \
                if (table[i].value == table_key) {                \
                        len += sprintf(buf + len, "%s",                \
                                table[i].name);                        \
                        break;                                        \
                }                                                \
        }                                                        \
        len += sprintf(buf + len, "\n");                        \
        return len;                                                \
}

static struct {
        u32                value;
        char                *name;
} ata_class_names[] = {
        { ATA_DEV_UNKNOWN,                "unknown" },
        { ATA_DEV_ATA,                        "ata" },
        { ATA_DEV_ATA_UNSUP,                "ata" },
        { ATA_DEV_ATAPI,                "atapi" },
        { ATA_DEV_ATAPI_UNSUP,                "atapi" },
        { ATA_DEV_PMP,                        "pmp" },
        { ATA_DEV_PMP_UNSUP,                "pmp" },
        { ATA_DEV_SEMB,                        "semb" },
        { ATA_DEV_SEMB_UNSUP,                "semb" },
        { ATA_DEV_ZAC,                        "zac" },
        { ATA_DEV_NONE,                        "none" }
};
ata_bitfield_name_search(class, ata_class_names)


static struct {
        u32                value;
        char                *name;
} ata_err_names[] = {
        { AC_ERR_DEV,                        "DeviceError" },
        { AC_ERR_HSM,                        "HostStateMachineError" },
        { AC_ERR_TIMEOUT,                "Timeout" },
        { AC_ERR_MEDIA,                        "MediaError" },
        { AC_ERR_ATA_BUS,                "BusError" },
        { AC_ERR_HOST_BUS,                "HostBusError" },
        { AC_ERR_SYSTEM,                "SystemError" },
        { AC_ERR_INVALID,                "InvalidArg" },
        { AC_ERR_OTHER,                        "Unknown" },
        { AC_ERR_NODEV_HINT,                "NoDeviceHint" },
        { AC_ERR_NCQ,                        "NCQError" }
};
ata_bitfield_name_match(err, ata_err_names)

static struct {
        u32                value;
        char                *name;
} ata_xfer_names[] = {
        { XFER_UDMA_7,                        "XFER_UDMA_7" },
        { XFER_UDMA_6,                        "XFER_UDMA_6" },
        { XFER_UDMA_5,                        "XFER_UDMA_5" },
        { XFER_UDMA_4,                        "XFER_UDMA_4" },
        { XFER_UDMA_3,                        "XFER_UDMA_3" },
        { XFER_UDMA_2,                        "XFER_UDMA_2" },
        { XFER_UDMA_1,                        "XFER_UDMA_1" },
        { XFER_UDMA_0,                        "XFER_UDMA_0" },
        { XFER_MW_DMA_4,                "XFER_MW_DMA_4" },
        { XFER_MW_DMA_3,                "XFER_MW_DMA_3" },
        { XFER_MW_DMA_2,                "XFER_MW_DMA_2" },
        { XFER_MW_DMA_1,                "XFER_MW_DMA_1" },
        { XFER_MW_DMA_0,                "XFER_MW_DMA_0" },
        { XFER_SW_DMA_2,                "XFER_SW_DMA_2" },
        { XFER_SW_DMA_1,                "XFER_SW_DMA_1" },
        { XFER_SW_DMA_0,                "XFER_SW_DMA_0" },
        { XFER_PIO_6,                        "XFER_PIO_6" },
        { XFER_PIO_5,                        "XFER_PIO_5" },
        { XFER_PIO_4,                        "XFER_PIO_4" },
        { XFER_PIO_3,                        "XFER_PIO_3" },
        { XFER_PIO_2,                        "XFER_PIO_2" },
        { XFER_PIO_1,                        "XFER_PIO_1" },
        { XFER_PIO_0,                        "XFER_PIO_0" },
        { XFER_PIO_SLOW,                "XFER_PIO_SLOW" }
};
ata_bitfield_name_search(xfer, ata_xfer_names)

/*
 * ATA Port attributes
 */
#define ata_port_show_simple(field, name, format_string, cast)                \
static ssize_t                                                                \
show_ata_port_##name(struct device *dev,                                \
                     struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct ata_port *ap = transport_class_to_port(dev);                \
                                                                        \
        return scnprintf(buf, 20, format_string, cast ap->field);        \
}

#define ata_port_simple_attr(field, name, format_string, type)                \
        ata_port_show_simple(field, name, format_string, (type))        \
static DEVICE_ATTR(name, S_IRUGO, show_ata_port_##name, NULL)

ata_port_simple_attr(nr_pmp_links, nr_pmp_links, "%d\n", int);
ata_port_simple_attr(stats.idle_irq, idle_irq, "%ld\n", unsigned long);
ata_port_simple_attr(local_port_no, port_no, "%u\n", unsigned int);

static DECLARE_TRANSPORT_CLASS(ata_port_class,
                               "ata_port", NULL, NULL, NULL);

static void ata_tport_release(struct device *dev)
{
        struct ata_port *ap = tdev_to_port(dev);
        ata_host_put(ap->host);
}

/**
 * ata_is_port --  check if a struct device represents a ATA port
 * @dev:        device to check
 *
 * Returns:
 *        %1 if the device represents a ATA Port, %0 else
 */
static int ata_is_port(const struct device *dev)
{
        return dev->release == ata_tport_release;
}

static int ata_tport_match(struct attribute_container *cont,
                           struct device *dev)
{
        if (!ata_is_port(dev))
                return 0;
        return &ata_scsi_transport_template->host_attrs.ac == cont;
}

/**
 * ata_tport_delete  --  remove ATA PORT
 * @ap:        ATA PORT to remove
 *
 * Removes the specified ATA PORT.  Remove the associated link as well.
 */
void ata_tport_delete(struct ata_port *ap)
{
        struct device *dev = &ap->tdev;

        ata_tlink_delete(&ap->link);

        transport_remove_device(dev);
        device_del(dev);
        transport_destroy_device(dev);
        put_device(dev);
}

static const struct device_type ata_port_sas_type = {
        .name = ATA_PORT_TYPE_NAME,
};

/** ata_tport_add - initialize a transport ATA port structure
 *
 * @parent:        parent device
 * @ap:                existing ata_port structure
 *
 * Initialize a ATA port structure for sysfs.  It will be added to the device
 * tree below the device specified by @parent which could be a PCI device.
 *
 * Returns %0 on success
 */
int ata_tport_add(struct device *parent,
                  struct ata_port *ap)
{
        int error;
        struct device *dev = &ap->tdev;

        device_initialize(dev);
        if (ap->flags & ATA_FLAG_SAS_HOST)
                dev->type = &ata_port_sas_type;
        else
                dev->type = &ata_port_type;

        dev->parent = parent;
        ata_host_get(ap->host);
        dev->release = ata_tport_release;
        dev_set_name(dev, "ata%d", ap->print_id);
        transport_setup_device(dev);
        ata_acpi_bind_port(ap);
        error = device_add(dev);
        if (error) {
                goto tport_err;
        }

        device_enable_async_suspend(dev);
        pm_runtime_set_active(dev);
        pm_runtime_enable(dev);
        pm_runtime_forbid(dev);

        error = transport_add_device(dev);
        if (error)
                goto tport_transport_add_err;
        transport_configure_device(dev);

        error = ata_tlink_add(&ap->link);
        if (error) {
                goto tport_link_err;
        }
        return 0;

 tport_link_err:
        transport_remove_device(dev);
 tport_transport_add_err:
        device_del(dev);

 tport_err:
        transport_destroy_device(dev);
        put_device(dev);
        return error;
}

/**
 *     ata_port_classify - determine device type based on ATA-spec signature
 *     @ap: ATA port device on which the classification should be run
 *     @tf: ATA taskfile register set for device to be identified
 *
 *     A wrapper around ata_dev_classify() to provide additional logging
 *
 *     RETURNS:
 *     Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP,
 *     %ATA_DEV_ZAC, or %ATA_DEV_UNKNOWN the event of failure.
 */
unsigned int ata_port_classify(struct ata_port *ap,
                               const struct ata_taskfile *tf)
{
        int i;
        unsigned int class = ata_dev_classify(tf);

        /* Start with index '1' to skip the 'unknown' entry */
        for (i = 1; i < ARRAY_SIZE(ata_class_names); i++) {
                if (ata_class_names[i].value == class) {
                        ata_port_dbg(ap, "found %s device by sig\n",
                                     ata_class_names[i].name);
                        return class;
                }
        }

        ata_port_info(ap, "found unknown device (class %u)\n", class);
        return class;
}
EXPORT_SYMBOL_GPL(ata_port_classify);

/*
 * ATA link attributes
 */
static int noop(int x) { return x; }

#define ata_link_show_linkspeed(field, format)                                \
static ssize_t                                                                \
show_ata_link_##field(struct device *dev,                                \
                      struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct ata_link *link = transport_class_to_link(dev);                \
                                                                        \
        return sprintf(buf, "%s\n", sata_spd_string(format(link->field))); \
}

#define ata_link_linkspeed_attr(field, format)                                \
        ata_link_show_linkspeed(field, format)                                \
static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL)

ata_link_linkspeed_attr(hw_sata_spd_limit, fls);
ata_link_linkspeed_attr(sata_spd_limit, fls);
ata_link_linkspeed_attr(sata_spd, noop);


static DECLARE_TRANSPORT_CLASS(ata_link_class,
                "ata_link", NULL, NULL, NULL);

static void ata_tlink_release(struct device *dev)
{
}

/**
 * ata_is_link --  check if a struct device represents a ATA link
 * @dev:        device to check
 *
 * Returns:
 *        %1 if the device represents a ATA link, %0 else
 */
static int ata_is_link(const struct device *dev)
{
        return dev->release == ata_tlink_release;
}

static int ata_tlink_match(struct attribute_container *cont,
                           struct device *dev)
{
        struct ata_internal* i = to_ata_internal(ata_scsi_transport_template);
        if (!ata_is_link(dev))
                return 0;
        return &i->link_attr_cont.ac == cont;
}

/**
 * ata_tlink_delete  --  remove ATA LINK
 * @link:        ATA LINK to remove
 *
 * Removes the specified ATA LINK.  remove associated ATA device(s) as well.
 */
void ata_tlink_delete(struct ata_link *link)
{
        struct device *dev = &link->tdev;
        struct ata_device *ata_dev;

        ata_for_each_dev(ata_dev, link, ALL) {
                ata_tdev_delete(ata_dev);
        }

        transport_remove_device(dev);
        device_del(dev);
        transport_destroy_device(dev);
        put_device(dev);
}

/**
 * ata_tlink_add  --  initialize a transport ATA link structure
 * @link:        allocated ata_link structure.
 *
 * Initialize an ATA LINK structure for sysfs.  It will be added in the
 * device tree below the ATA PORT it belongs to.
 *
 * Returns %0 on success
 */
int ata_tlink_add(struct ata_link *link)
{
        struct device *dev = &link->tdev;
        struct ata_port *ap = link->ap;
        struct ata_device *ata_dev;
        int error;

        device_initialize(dev);
        dev->parent = &ap->tdev;
        dev->release = ata_tlink_release;
        if (ata_is_host_link(link))
                dev_set_name(dev, "link%d", ap->print_id);
        else
                dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp);

        transport_setup_device(dev);

        error = device_add(dev);
        if (error) {
                goto tlink_err;
        }

        error = transport_add_device(dev);
        if (error)
                goto tlink_transport_err;
        transport_configure_device(dev);

        ata_for_each_dev(ata_dev, link, ALL) {
                error = ata_tdev_add(ata_dev);
                if (error) {
                        goto tlink_dev_err;
                }
        }
        return 0;
  tlink_dev_err:
        while (--ata_dev >= link->device) {
                ata_tdev_delete(ata_dev);
        }
        transport_remove_device(dev);
  tlink_transport_err:
        device_del(dev);
  tlink_err:
        transport_destroy_device(dev);
        put_device(dev);
        return error;
}

/*
 * ATA device attributes
 */

#define ata_dev_show_class(title, field)                                \
static ssize_t                                                                \
show_ata_dev_##field(struct device *dev,                                \
                     struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct ata_device *ata_dev = transport_class_to_dev(dev);        \
                                                                        \
        return get_ata_##title##_names(ata_dev->field, buf);                \
}

#define ata_dev_attr(title, field)                                        \
        ata_dev_show_class(title, field)                                \
static DEVICE_ATTR(field, S_IRUGO, show_ata_dev_##field, NULL)

ata_dev_attr(class, class);
ata_dev_attr(xfer, pio_mode);
ata_dev_attr(xfer, dma_mode);
ata_dev_attr(xfer, xfer_mode);


#define ata_dev_show_simple(field, format_string, cast)                        \
static ssize_t                                                                \
show_ata_dev_##field(struct device *dev,                                \
                     struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct ata_device *ata_dev = transport_class_to_dev(dev);        \
                                                                        \
        return scnprintf(buf, 20, format_string, cast ata_dev->field);        \
}

#define ata_dev_simple_attr(field, format_string, type)                \
        ata_dev_show_simple(field, format_string, (type))        \
        static DEVICE_ATTR(field, S_IRUGO,                        \
                   show_ata_dev_##field, NULL)

ata_dev_simple_attr(spdn_cnt, "%d\n", int);

struct ata_show_ering_arg {
        char* buf;
        int written;
};

static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg)
{
        struct ata_show_ering_arg* arg = void_arg;
        u64 seconds;
        u32 rem;

        seconds = div_u64_rem(ent->timestamp, HZ, &rem);
        arg->written += sprintf(arg->buf + arg->written,
                                "[%5llu.%09lu]", seconds,
                                rem * NSEC_PER_SEC / HZ);
        arg->written += get_ata_err_names(ent->err_mask,
                                          arg->buf + arg->written);
        return 0;
}

static ssize_t
show_ata_dev_ering(struct device *dev,
                   struct device_attribute *attr, char *buf)
{
        struct ata_device *ata_dev = transport_class_to_dev(dev);
        struct ata_show_ering_arg arg = { buf, 0 };

        ata_ering_map(&ata_dev->ering, ata_show_ering, &arg);
        return arg.written;
}


static DEVICE_ATTR(ering, S_IRUGO, show_ata_dev_ering, NULL);

static ssize_t
show_ata_dev_id(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct ata_device *ata_dev = transport_class_to_dev(dev);
        int written = 0, i = 0;

        if (ata_dev->class == ATA_DEV_PMP)
                return 0;
        for(i=0;i<ATA_ID_WORDS;i++)  {
                written += scnprintf(buf+written, 20, "%04x%c",
                                    ata_dev->id[i],
                                    ((i+1) & 7) ? ' ' : '\n');
        }
        return written;
}

static DEVICE_ATTR(id, S_IRUGO, show_ata_dev_id, NULL);

static ssize_t
show_ata_dev_gscr(struct device *dev,
                  struct device_attribute *attr, char *buf)
{
        struct ata_device *ata_dev = transport_class_to_dev(dev);
        int written = 0, i = 0;

        if (ata_dev->class != ATA_DEV_PMP)
                return 0;
        for(i=0;i<SATA_PMP_GSCR_DWORDS;i++)  {
                written += scnprintf(buf+written, 20, "%08x%c",
                                    ata_dev->gscr[i],
                                    ((i+1) & 3) ? ' ' : '\n');
        }
        if (SATA_PMP_GSCR_DWORDS & 3)
                buf[written-1] = '\n';
        return written;
}

static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL);

static ssize_t
show_ata_dev_trim(struct device *dev,
                  struct device_attribute *attr, char *buf)
{
        struct ata_device *ata_dev = transport_class_to_dev(dev);
        unsigned char *mode;

        if (!ata_id_has_trim(ata_dev->id))
                mode = "unsupported";
        else if (ata_dev->horkage & ATA_HORKAGE_NOTRIM)
                mode = "forced_unsupported";
        else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM)
                        mode = "forced_unqueued";
        else if (ata_fpdma_dsm_supported(ata_dev))
                mode = "queued";
        else
                mode = "unqueued";

        return scnprintf(buf, 20, "%s\n", mode);
}

static DEVICE_ATTR(trim, S_IRUGO, show_ata_dev_trim, NULL);

static DECLARE_TRANSPORT_CLASS(ata_dev_class,
                               "ata_device", NULL, NULL, NULL);

static void ata_tdev_release(struct device *dev)
{
}

/**
 * ata_is_ata_dev  --  check if a struct device represents a ATA device
 * @dev:        device to check
 *
 * Returns:
 *        %1 if the device represents a ATA device, %0 else
 */
static int ata_is_ata_dev(const struct device *dev)
{
        return dev->release == ata_tdev_release;
}

static int ata_tdev_match(struct attribute_container *cont,
                          struct device *dev)
{
        struct ata_internal* i = to_ata_internal(ata_scsi_transport_template);
        if (!ata_is_ata_dev(dev))
                return 0;
        return &i->dev_attr_cont.ac == cont;
}

/**
 * ata_tdev_free  --  free a ATA LINK
 * @dev:        ATA PHY to free
 *
 * Frees the specified ATA PHY.
 *
 * Note:
 *   This function must only be called on a PHY that has not
 *   successfully been added using ata_tdev_add().
 */
static void ata_tdev_free(struct ata_device *dev)
{
        transport_destroy_device(&dev->tdev);
        put_device(&dev->tdev);
}

/**
 * ata_tdev_delete  --  remove ATA device
 * @ata_dev:        ATA device to remove
 *
 * Removes the specified ATA device.
 */
static void ata_tdev_delete(struct ata_device *ata_dev)
{
        struct device *dev = &ata_dev->tdev;

        transport_remove_device(dev);
        device_del(dev);
        ata_tdev_free(ata_dev);
}


/**
 * ata_tdev_add  --  initialize a transport ATA device structure.
 * @ata_dev:        ata_dev structure.
 *
 * Initialize an ATA device structure for sysfs.  It will be added in the
 * device tree below the ATA LINK device it belongs to.
 *
 * Returns %0 on success
 */
static int ata_tdev_add(struct ata_device *ata_dev)
{
        struct device *dev = &ata_dev->tdev;
        struct ata_link *link = ata_dev->link;
        struct ata_port *ap = link->ap;
        int error;

        device_initialize(dev);
        dev->parent = &link->tdev;
        dev->release = ata_tdev_release;
        if (ata_is_host_link(link))
                dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
        else
                dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp);

        transport_setup_device(dev);
        ata_acpi_bind_dev(ata_dev);
        error = device_add(dev);
        if (error) {
                ata_tdev_free(ata_dev);
                return error;
        }

        error = transport_add_device(dev);
        if (error) {
                device_del(dev);
                ata_tdev_free(ata_dev);
                return error;
        }

        transport_configure_device(dev);
        return 0;
}


/*
 * Setup / Teardown code
 */

#define SETUP_TEMPLATE(attrb, field, perm, test)                        \
        i->private_##attrb[count] = dev_attr_##field;                        \
        i->private_##attrb[count].attr.mode = perm;                        \
        i->attrb[count] = &i->private_##attrb[count];                        \
        if (test)                                                        \
                count++

#define SETUP_LINK_ATTRIBUTE(field)                                        \
        SETUP_TEMPLATE(link_attrs, field, S_IRUGO, 1)

#define SETUP_PORT_ATTRIBUTE(field)                                        \
        SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1)

#define SETUP_DEV_ATTRIBUTE(field)                                        \
        SETUP_TEMPLATE(dev_attrs, field, S_IRUGO, 1)

/**
 * ata_attach_transport  --  instantiate ATA transport template
 */
struct scsi_transport_template *ata_attach_transport(void)
{
        struct ata_internal *i;
        int count;

        i = kzalloc(sizeof(struct ata_internal), GFP_KERNEL);
        if (!i)
                return NULL;

        i->t.eh_strategy_handler        = ata_scsi_error;
        i->t.user_scan                        = ata_scsi_user_scan;

        i->t.host_attrs.ac.attrs = &i->port_attrs[0];
        i->t.host_attrs.ac.class = &ata_port_class.class;
        i->t.host_attrs.ac.match = ata_tport_match;
        transport_container_register(&i->t.host_attrs);

        i->link_attr_cont.ac.class = &ata_link_class.class;
        i->link_attr_cont.ac.attrs = &i->link_attrs[0];
        i->link_attr_cont.ac.match = ata_tlink_match;
        transport_container_register(&i->link_attr_cont);

        i->dev_attr_cont.ac.class = &ata_dev_class.class;
        i->dev_attr_cont.ac.attrs = &i->dev_attrs[0];
        i->dev_attr_cont.ac.match = ata_tdev_match;
        transport_container_register(&i->dev_attr_cont);

        count = 0;
        SETUP_PORT_ATTRIBUTE(nr_pmp_links);
        SETUP_PORT_ATTRIBUTE(idle_irq);
        SETUP_PORT_ATTRIBUTE(port_no);
        BUG_ON(count > ATA_PORT_ATTRS);
        i->port_attrs[count] = NULL;

        count = 0;
        SETUP_LINK_ATTRIBUTE(hw_sata_spd_limit);
        SETUP_LINK_ATTRIBUTE(sata_spd_limit);
        SETUP_LINK_ATTRIBUTE(sata_spd);
        BUG_ON(count > ATA_LINK_ATTRS);
        i->link_attrs[count] = NULL;

        count = 0;
        SETUP_DEV_ATTRIBUTE(class);
        SETUP_DEV_ATTRIBUTE(pio_mode);
        SETUP_DEV_ATTRIBUTE(dma_mode);
        SETUP_DEV_ATTRIBUTE(xfer_mode);
        SETUP_DEV_ATTRIBUTE(spdn_cnt);
        SETUP_DEV_ATTRIBUTE(ering);
        SETUP_DEV_ATTRIBUTE(id);
        SETUP_DEV_ATTRIBUTE(gscr);
        SETUP_DEV_ATTRIBUTE(trim);
        BUG_ON(count > ATA_DEV_ATTRS);
        i->dev_attrs[count] = NULL;

        return &i->t;
}

/**
 * ata_release_transport  --  release ATA transport template instance
 * @t:                transport template instance
 */
void ata_release_transport(struct scsi_transport_template *t)
{
        struct ata_internal *i = to_ata_internal(t);

        transport_container_unregister(&i->t.host_attrs);
        transport_container_unregister(&i->link_attr_cont);
        transport_container_unregister(&i->dev_attr_cont);

        kfree(i);
}

__init int libata_transport_init(void)
{
        int error;

        error = transport_class_register(&ata_link_class);
        if (error)
                goto out_unregister_transport;
        error = transport_class_register(&ata_port_class);
        if (error)
                goto out_unregister_link;
        error = transport_class_register(&ata_dev_class);
        if (error)
                goto out_unregister_port;
        return 0;

 out_unregister_port:
        transport_class_unregister(&ata_port_class);
 out_unregister_link:
        transport_class_unregister(&ata_link_class);
 out_unregister_transport:
        return error;

}

void __exit libata_transport_exit(void)
{
        transport_class_unregister(&ata_link_class);
        transport_class_unregister(&ata_port_class);
        transport_class_unregister(&ata_dev_class);
}


















































































































































































































































































































































































































































































































































































































    3 









    3 
    3 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _ASM_X86_NOSPEC_BRANCH_H_
#define _ASM_X86_NOSPEC_BRANCH_H_

#include <linux/static_key.h>
#include <linux/objtool.h>
#include <linux/linkage.h>

#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
#include <asm/current.h>

/*
 * Call depth tracking for Intel SKL CPUs to address the RSB underflow
 * issue in software.
 *
 * The tracking does not use a counter. It uses uses arithmetic shift
 * right on call entry and logical shift left on return.
 *
 * The depth tracking variable is initialized to 0x8000.... when the call
 * depth is zero. The arithmetic shift right sign extends the MSB and
 * saturates after the 12th call. The shift count is 5 for both directions
 * so the tracking covers 12 nested calls.
 *
 *  Call
 *  0: 0x8000000000000000        0x0000000000000000
 *  1: 0xfc00000000000000        0xf000000000000000
 * ...
 * 11: 0xfffffffffffffff8        0xfffffffffffffc00
 * 12: 0xffffffffffffffff        0xffffffffffffffe0
 *
 * After a return buffer fill the depth is credited 12 calls before the
 * next stuffing has to take place.
 *
 * There is a inaccuracy for situations like this:
 *
 *  10 calls
 *   5 returns
 *   3 calls
 *   4 returns
 *   3 calls
 *   ....
 *
 * The shift count might cause this to be off by one in either direction,
 * but there is still a cushion vs. the RSB depth. The algorithm does not
 * claim to be perfect and it can be speculated around by the CPU, but it
 * is considered that it obfuscates the problem enough to make exploitation
 * extremely difficult.
 */
#define RET_DEPTH_SHIFT                        5
#define RSB_RET_STUFF_LOOPS                16
#define RET_DEPTH_INIT                        0x8000000000000000ULL
#define RET_DEPTH_INIT_FROM_CALL        0xfc00000000000000ULL
#define RET_DEPTH_CREDIT                0xffffffffffffffffULL

#ifdef CONFIG_CALL_THUNKS_DEBUG
# define CALL_THUNKS_DEBUG_INC_CALLS                                \
        incq        PER_CPU_VAR(__x86_call_count);
# define CALL_THUNKS_DEBUG_INC_RETS                                \
        incq        PER_CPU_VAR(__x86_ret_count);
# define CALL_THUNKS_DEBUG_INC_STUFFS                                \
        incq        PER_CPU_VAR(__x86_stuffs_count);
# define CALL_THUNKS_DEBUG_INC_CTXSW                                \
        incq        PER_CPU_VAR(__x86_ctxsw_count);
#else
# define CALL_THUNKS_DEBUG_INC_CALLS
# define CALL_THUNKS_DEBUG_INC_RETS
# define CALL_THUNKS_DEBUG_INC_STUFFS
# define CALL_THUNKS_DEBUG_INC_CTXSW
#endif

#if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)

#include <asm/asm-offsets.h>

#define CREDIT_CALL_DEPTH                                        \
        movq        $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);

#define RESET_CALL_DEPTH                                        \
        xor        %eax, %eax;                                        \
        bts        $63, %rax;                                        \
        movq        %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);

#define RESET_CALL_DEPTH_FROM_CALL                                \
        movb        $0xfc, %al;                                        \
        shl        $56, %rax;                                        \
        movq        %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);        \
        CALL_THUNKS_DEBUG_INC_CALLS

#define INCREMENT_CALL_DEPTH                                        \
        sarq        $5, PER_CPU_VAR(pcpu_hot + X86_call_depth);        \
        CALL_THUNKS_DEBUG_INC_CALLS

#else
#define CREDIT_CALL_DEPTH
#define RESET_CALL_DEPTH
#define RESET_CALL_DEPTH_FROM_CALL
#define INCREMENT_CALL_DEPTH
#endif

/*
 * Fill the CPU return stack buffer.
 *
 * Each entry in the RSB, if used for a speculative 'ret', contains an
 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
 *
 * This is required in various cases for retpoline and IBRS-based
 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
 * eliminate potentially bogus entries from the RSB, and sometimes
 * purely to ensure that it doesn't get empty, which on some CPUs would
 * allow predictions from other (unwanted!) sources to be used.
 *
 * We define a CPP macro such that it can be used from both .S files and
 * inline assembly. It's possible to do a .macro and then include that
 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
 */

#define RETPOLINE_THUNK_SIZE        32
#define RSB_CLEAR_LOOPS                32        /* To forcibly overwrite all entries */

/*
 * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
 */
#define __FILL_RETURN_SLOT                        \
        ANNOTATE_INTRA_FUNCTION_CALL;                \
        call        772f;                                \
        int3;                                        \
772:

/*
 * Stuff the entire RSB.
 *
 * Google experimented with loop-unrolling and this turned out to be
 * the optimal version - two calls, each with their own speculation
 * trap should their return address end up getting used, in a loop.
 */
#ifdef CONFIG_X86_64
#define __FILL_RETURN_BUFFER(reg, nr)                        \
        mov        $(nr/2), reg;                                \
771:                                                        \
        __FILL_RETURN_SLOT                                \
        __FILL_RETURN_SLOT                                \
        add        $(BITS_PER_LONG/8) * 2, %_ASM_SP;        \
        dec        reg;                                        \
        jnz        771b;                                        \
        /* barrier for jnz misprediction */                \
        lfence;                                                \
        CREDIT_CALL_DEPTH                                \
        CALL_THUNKS_DEBUG_INC_CTXSW
#else
/*
 * i386 doesn't unconditionally have LFENCE, as such it can't
 * do a loop.
 */
#define __FILL_RETURN_BUFFER(reg, nr)                        \
        .rept nr;                                        \
        __FILL_RETURN_SLOT;                                \
        .endr;                                                \
        add        $(BITS_PER_LONG/8) * nr, %_ASM_SP;
#endif

/*
 * Stuff a single RSB slot.
 *
 * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
 * forced to retire before letting a RET instruction execute.
 *
 * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
 * before this point.
 */
#define __FILL_ONE_RETURN                                \
        __FILL_RETURN_SLOT                                \
        add        $(BITS_PER_LONG/8), %_ASM_SP;                \
        lfence;

#ifdef __ASSEMBLY__

/*
 * This should be used immediately before an indirect jump/call. It tells
 * objtool the subsequent indirect jump/call is vouched safe for retpoline
 * builds.
 */
.macro ANNOTATE_RETPOLINE_SAFE
.Lhere_\@:
        .pushsection .discard.retpoline_safe
        .long .Lhere_\@
        .popsection
.endm

/*
 * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
 * vs RETBleed validation.
 */
#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE

/*
 * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
 * eventually turn into its own annotation.
 */
.macro VALIDATE_UNRET_END
#if defined(CONFIG_NOINSTR_VALIDATION) && \
        (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO))
        ANNOTATE_RETPOLINE_SAFE
        nop
#endif
.endm

/*
 * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
 * to the retpoline thunk with a CS prefix when the register requires
 * a RAX prefix byte to encode. Also see apply_retpolines().
 */
.macro __CS_PREFIX reg:req
        .irp rs,r8,r9,r10,r11,r12,r13,r14,r15
        .ifc \reg,\rs
        .byte 0x2e
        .endif
        .endr
.endm

/*
 * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
 * indirect jmp/call which may be susceptible to the Spectre variant 2
 * attack.
 *
 * NOTE: these do not take kCFI into account and are thus not comparable to C
 * indirect calls, take care when using. The target of these should be an ENDBR
 * instruction irrespective of kCFI.
 */
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_MITIGATION_RETPOLINE
        __CS_PREFIX \reg
        jmp        __x86_indirect_thunk_\reg
#else
        jmp        *%\reg
        int3
#endif
.endm

.macro CALL_NOSPEC reg:req
#ifdef CONFIG_MITIGATION_RETPOLINE
        __CS_PREFIX \reg
        call        __x86_indirect_thunk_\reg
#else
        call        *%\reg
#endif
.endm

 /*
  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
  * monstrosity above, manually.
  */
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
        ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
                __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
                __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2

.Lskip_rsb_\@:
.endm

/*
 * The CALL to srso_alias_untrain_ret() must be patched in directly at
 * the spot where untraining must be done, ie., srso_alias_untrain_ret()
 * must be the target of a CALL instruction instead of indirectly
 * jumping to a wrapper which then calls it. Therefore, this macro is
 * called outside of __UNTRAIN_RET below, for the time being, before the
 * kernel can support nested alternatives with arbitrary nesting.
 */
.macro CALL_UNTRAIN_RET
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
        ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
                          "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
.endm

/*
 * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
 * return thunk isn't mapped into the userspace tables (then again, AMD
 * typically has NO_MELTDOWN).
 *
 * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
 * entry_ibpb() will clobber AX, CX, DX.
 *
 * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
 * where we have a stack but before any RET instruction.
 */
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
        VALIDATE_UNRET_END
        CALL_UNTRAIN_RET
        ALTERNATIVE_2 "",                                                \
                      "call entry_ibpb", \ibpb_feature,                        \
                     __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
.endm

#define UNTRAIN_RET \
        __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)

#define UNTRAIN_RET_VM \
        __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)

#define UNTRAIN_RET_FROM_CALL \
        __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)


.macro CALL_DEPTH_ACCOUNT
#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
        ALTERNATIVE "",                                                        \
                    __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm

/*
 * Macro to execute VERW instruction that mitigate transient data sampling
 * attacks such as MDS. On affected systems a microcode update overloaded VERW
 * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
 *
 * Note: Only the memory operand variant of VERW clears the CPU buffers.
 */
.macro CLEAR_CPU_BUFFERS
        ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm

#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
        ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
.endm

.macro CLEAR_BRANCH_HISTORY_VMEXIT
        ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
.endm
#else
#define CLEAR_BRANCH_HISTORY
#define CLEAR_BRANCH_HISTORY_VMEXIT
#endif

#else /* __ASSEMBLY__ */

#define ANNOTATE_RETPOLINE_SAFE                                        \
        "999:\n\t"                                                \
        ".pushsection .discard.retpoline_safe\n\t"                \
        ".long 999b\n\t"                                        \
        ".popsection\n\t"

typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];

#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
#else
static inline void __x86_return_thunk(void) {}
#endif

#ifdef CONFIG_MITIGATION_UNRET_ENTRY
extern void retbleed_return_thunk(void);
#else
static inline void retbleed_return_thunk(void) {}
#endif

extern void srso_alias_untrain_ret(void);

#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
#else
static inline void srso_return_thunk(void) {}
static inline void srso_alias_return_thunk(void) {}
#endif

extern void retbleed_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);

extern void entry_untrain_ret(void);
extern void entry_ibpb(void);

#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
#endif

extern void (*x86_return_thunk)(void);

extern void __warn_thunk(void);

#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
extern void call_depth_return_thunk(void);

#define CALL_DEPTH_ACCOUNT                                        \
        ALTERNATIVE("",                                                \
                    __stringify(INCREMENT_CALL_DEPTH),                \
                    X86_FEATURE_CALL_DEPTH)

#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
DECLARE_PER_CPU(u64, __x86_stuffs_count);
DECLARE_PER_CPU(u64, __x86_ctxsw_count);
#endif
#else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */

static inline void call_depth_return_thunk(void) {}
#define CALL_DEPTH_ACCOUNT ""

#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */

#ifdef CONFIG_MITIGATION_RETPOLINE

#define GEN(reg) \
        extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg)                                                \
        extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#define GEN(reg)                                                \
        extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN

#ifdef CONFIG_X86_64

/*
 * Inline asm uses the %V modifier which is only in newer GCC
 * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
 */
# define CALL_NOSPEC                                                \
        ALTERNATIVE_2(                                                \
        ANNOTATE_RETPOLINE_SAFE                                        \
        "call *%[thunk_target]\n",                                \
        "call __x86_indirect_thunk_%V[thunk_target]\n",                \
        X86_FEATURE_RETPOLINE,                                        \
        "lfence;\n"                                                \
        ANNOTATE_RETPOLINE_SAFE                                        \
        "call *%[thunk_target]\n",                                \
        X86_FEATURE_RETPOLINE_LFENCE)

# define THUNK_TARGET(addr) [thunk_target] "r" (addr)

#else /* CONFIG_X86_32 */
/*
 * For i386 we use the original ret-equivalent retpoline, because
 * otherwise we'll run out of registers. We don't care about CET
 * here, anyway.
 */
# define CALL_NOSPEC                                                \
        ALTERNATIVE_2(                                                \
        ANNOTATE_RETPOLINE_SAFE                                        \
        "call *%[thunk_target]\n",                                \
        "       jmp    904f;\n"                                        \
        "       .align 16\n"                                        \
        "901:        call   903f;\n"                                        \
        "902:        pause;\n"                                        \
        "            lfence;\n"                                        \
        "       jmp    902b;\n"                                        \
        "       .align 16\n"                                        \
        "903:        lea    4(%%esp), %%esp;\n"                        \
        "       pushl  %[thunk_target];\n"                        \
        "       ret;\n"                                                \
        "       .align 16\n"                                        \
        "904:        call   901b;\n",                                \
        X86_FEATURE_RETPOLINE,                                        \
        "lfence;\n"                                                \
        ANNOTATE_RETPOLINE_SAFE                                        \
        "call *%[thunk_target]\n",                                \
        X86_FEATURE_RETPOLINE_LFENCE)

# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif
#else /* No retpoline for C / inline asm */
# define CALL_NOSPEC "call *%[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
#endif

/* The Spectre V2 mitigation variants */
enum spectre_v2_mitigation {
        SPECTRE_V2_NONE,
        SPECTRE_V2_RETPOLINE,
        SPECTRE_V2_LFENCE,
        SPECTRE_V2_EIBRS,
        SPECTRE_V2_EIBRS_RETPOLINE,
        SPECTRE_V2_EIBRS_LFENCE,
        SPECTRE_V2_IBRS,
};

/* The indirect branch speculation control variants */
enum spectre_v2_user_mitigation {
        SPECTRE_V2_USER_NONE,
        SPECTRE_V2_USER_STRICT,
        SPECTRE_V2_USER_STRICT_PREFERRED,
        SPECTRE_V2_USER_PRCTL,
        SPECTRE_V2_USER_SECCOMP,
};

/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
        SPEC_STORE_BYPASS_NONE,
        SPEC_STORE_BYPASS_DISABLE,
        SPEC_STORE_BYPASS_PRCTL,
        SPEC_STORE_BYPASS_SECCOMP,
};

static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
        asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
                : : "c" (msr),
                    "a" ((u32)val),
                    "d" ((u32)(val >> 32)),
                    [feature] "i" (feature)
                : "memory");
}

extern u64 x86_pred_cmd;

static inline void indirect_branch_prediction_barrier(void)
{
        alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
}

/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
extern void update_spec_ctrl_cond(u64 val);
extern u64 spec_ctrl_current(void);

/*
 * With retpoline, we must use IBRS to restrict branch prediction
 * before calling into firmware.
 *
 * (Implemented as CPP macros due to header hell.)
 */
#define firmware_restrict_branch_speculation_start()                        \
do {                                                                        \
        preempt_disable();                                                \
        alternative_msr_write(MSR_IA32_SPEC_CTRL,                        \
                              spec_ctrl_current() | SPEC_CTRL_IBRS,        \
                              X86_FEATURE_USE_IBRS_FW);                        \
        alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,                \
                              X86_FEATURE_USE_IBPB_FW);                        \
} while (0)

#define firmware_restrict_branch_speculation_end()                        \
do {                                                                        \
        alternative_msr_write(MSR_IA32_SPEC_CTRL,                        \
                              spec_ctrl_current(),                        \
                              X86_FEATURE_USE_IBRS_FW);                        \
        preempt_enable();                                                \
} while (0)

DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);

DECLARE_STATIC_KEY_FALSE(mds_idle_clear);

DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);

DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);

extern u16 mds_verw_sel;

#include <asm/segment.h>

/**
 * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
 *
 * This uses the otherwise unused and obsolete VERW instruction in
 * combination with microcode which triggers a CPU buffer flush when the
 * instruction is executed.
 */
static __always_inline void mds_clear_cpu_buffers(void)
{
        static const u16 ds = __KERNEL_DS;

        /*
         * Has to be the memory-operand variant because only that
         * guarantees the CPU buffer flush functionality according to
         * documentation. The register-operand variant does not.
         * Works with any segment selector, but a valid writable
         * data segment is the fastest variant.
         *
         * "cc" clobber is required because VERW modifies ZF.
         */
        asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
}

/**
 * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
 *
 * Clear CPU buffers if the corresponding static key is enabled
 */
static __always_inline void mds_idle_clear_cpu_buffers(void)
{
        if (static_branch_likely(&mds_idle_clear))
                mds_clear_cpu_buffers();
}

#endif /* __ASSEMBLY__ */

#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */













































  241 
  242 
  242 
  242 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * generic net pointers
 */

#ifndef __NET_GENERIC_H__
#define __NET_GENERIC_H__

#include <linux/bug.h>
#include <linux/rcupdate.h>
#include <net/net_namespace.h>

/*
 * Generic net pointers are to be used by modules to put some private
 * stuff on the struct net without explicit struct net modification
 *
 * The rules are simple:
 * 1. set pernet_operations->id.  After register_pernet_device you
 *    will have the id of your private pointer.
 * 2. set pernet_operations->size to have the code allocate and free
 *    a private structure pointed to from struct net.
 * 3. do not change this pointer while the net is alive;
 * 4. do not try to have any private reference on the net_generic object.
 *
 * After accomplishing all of the above, the private pointer can be
 * accessed with the net_generic() call.
 */

struct net_generic {
        union {
                struct {
                        unsigned int len;
                        struct rcu_head rcu;
                } s;

                DECLARE_FLEX_ARRAY(void *, ptr);
        };
};

static inline void *net_generic(const struct net *net, unsigned int id)
{
        struct net_generic *ng;
        void *ptr;

        rcu_read_lock();
        ng = rcu_dereference(net->gen);
        ptr = ng->ptr[id];
        rcu_read_unlock();

        return ptr;
}
#endif

















































    9 




    9 





    9 



















    9 

    9 







    9 






















    9 











    9 





    9 










    9 


    9 


































































    9 


    9 




    9 


    9 







    9 































    9 



    9 











    9 





    9 






















    9 



    9 




































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
 *
 *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
 *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
 *  2000-2002   x86-64 support by Andi Kleen
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/kstrtox.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
#include <linux/context_tracking.h>
#include <linux/entry-common.h>
#include <linux/syscalls.h>
#include <linux/rseq.h>

#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xstate.h>
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
#include <asm/vm86.h>

#include <asm/syscall.h>
#include <asm/sigframe.h>
#include <asm/signal.h>
#include <asm/shstk.h>

static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
        return IS_ENABLED(CONFIG_IA32_EMULATION) &&
                ksig->ka.sa.sa_flags & SA_IA32_ABI;
}

static inline int is_ia32_frame(struct ksignal *ksig)
{
        return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
}

static inline int is_x32_frame(struct ksignal *ksig)
{
        return IS_ENABLED(CONFIG_X86_X32_ABI) &&
                ksig->ka.sa.sa_flags & SA_X32_ABI;
}

/*
 * Set up a signal frame.
 */

/* x86 ABI requires 16-byte alignment */
#define FRAME_ALIGNMENT        16UL

#define MAX_FRAME_PADDING        (FRAME_ALIGNMENT - 1)

/*
 * Determine which stack to use..
 */
void __user *
get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
             void __user **fpstate)
{
        struct k_sigaction *ka = &ksig->ka;
        int ia32_frame = is_ia32_frame(ksig);
        /* Default to using normal stack */
        bool nested_altstack = on_sig_stack(regs->sp);
        bool entering_altstack = false;
        unsigned long math_size = 0;
        unsigned long sp = regs->sp;
        unsigned long buf_fx = 0;

        /* redzone */
        if (!ia32_frame)
                sp -= 128;

        /* This is the X/Open sanctioned signal stack switching.  */
        if (ka->sa.sa_flags & SA_ONSTACK) {
                /*
                 * This checks nested_altstack via sas_ss_flags(). Sensible
                 * programs use SS_AUTODISARM, which disables that check, and
                 * programs that don't use SS_AUTODISARM get compatible.
                 */
                if (sas_ss_flags(sp) == 0) {
                        sp = current->sas_ss_sp + current->sas_ss_size;
                        entering_altstack = true;
                }
        } else if (ia32_frame &&
                   !nested_altstack &&
                   regs->ss != __USER_DS &&
                   !(ka->sa.sa_flags & SA_RESTORER) &&
                   ka->sa.sa_restorer) {
                /* This is the legacy signal stack switching. */
                sp = (unsigned long) ka->sa.sa_restorer;
                entering_altstack = true;
        }

        sp = fpu__alloc_mathframe(sp, ia32_frame, &buf_fx, &math_size);
        *fpstate = (void __user *)sp;

        sp -= frame_size;

        if (ia32_frame)
                /*
                 * Align the stack pointer according to the i386 ABI,
                 * i.e. so that on function entry ((sp + 4) & 15) == 0.
                 */
                sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
        else
                sp = round_down(sp, FRAME_ALIGNMENT) - 8;

        /*
         * If we are on the alternate signal stack and would overflow it, don't.
         * Return an always-bogus address instead so we will die with SIGSEGV.
         */
        if (unlikely((nested_altstack || entering_altstack) &&
                     !__on_sig_stack(sp))) {

                if (show_unhandled_signals && printk_ratelimit())
                        pr_info("%s[%d] overflowed sigaltstack\n",
                                current->comm, task_pid_nr(current));

                return (void __user *)-1L;
        }

        /* save i387 and extended state */
        if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
                return (void __user *)-1L;

        return (void __user *)sp;
}

/*
 * There are four different struct types for signal frame: sigframe_ia32,
 * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
 * -- the largest size. It means the size for 64-bit apps is a bit more
 * than needed, but this keeps the code simple.
 */
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
# define MAX_FRAME_SIGINFO_UCTXT_SIZE        sizeof(struct sigframe_ia32)
#else
# define MAX_FRAME_SIGINFO_UCTXT_SIZE        sizeof(struct rt_sigframe)
#endif

/*
 * The FP state frame contains an XSAVE buffer which must be 64-byte aligned.
 * If a signal frame starts at an unaligned address, extra space is required.
 * This is the max alignment padding, conservatively.
 */
#define MAX_XSAVE_PADDING        63UL

/*
 * The frame data is composed of the following areas and laid out as:
 *
 * -------------------------
 * | alignment padding     |
 * -------------------------
 * | (f)xsave frame        |
 * -------------------------
 * | fsave header          |
 * -------------------------
 * | alignment padding     |
 * -------------------------
 * | siginfo + ucontext    |
 * -------------------------
 */

/* max_frame_size tells userspace the worst case signal stack size. */
static unsigned long __ro_after_init max_frame_size;
static unsigned int __ro_after_init fpu_default_state_size;

static int __init init_sigframe_size(void)
{
        fpu_default_state_size = fpu__get_fpstate_size();

        max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;

        max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING;

        /* Userspace expects an aligned size. */
        max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);

        pr_info("max sigframe size: %lu\n", max_frame_size);
        return 0;
}
early_initcall(init_sigframe_size);

unsigned long get_sigframe_size(void)
{
        return max_frame_size;
}

static int
setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
{
        /* Perform fixup for the pre-signal frame. */
        rseq_signal_deliver(ksig, regs);

        /* Set up the stack frame */
        if (is_ia32_frame(ksig)) {
                if (ksig->ka.sa.sa_flags & SA_SIGINFO)
                        return ia32_setup_rt_frame(ksig, regs);
                else
                        return ia32_setup_frame(ksig, regs);
        } else if (is_x32_frame(ksig)) {
                return x32_setup_rt_frame(ksig, regs);
        } else {
                return x64_setup_rt_frame(ksig, regs);
        }
}

static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
        bool stepping, failed;
        struct fpu *fpu = &current->thread.fpu;

        if (v8086_mode(regs))
                save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);

        /* Are we from a system call? */
        if (syscall_get_nr(current, regs) != -1) {
                /* If so, check system call restarting.. */
                switch (syscall_get_error(current, regs)) {
                case -ERESTART_RESTARTBLOCK:
                case -ERESTARTNOHAND:
                        regs->ax = -EINTR;
                        break;

                case -ERESTARTSYS:
                        if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
                                regs->ax = -EINTR;
                                break;
                        }
                        fallthrough;
                case -ERESTARTNOINTR:
                        regs->ax = regs->orig_ax;
                        regs->ip -= 2;
                        break;
                }
        }

        /*
         * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now
         * so that register information in the sigcontext is correct and
         * then notify the tracer before entering the signal handler.
         */
        stepping = test_thread_flag(TIF_SINGLESTEP);
        if (stepping)
                user_disable_single_step(current);

        failed = (setup_rt_frame(ksig, regs) < 0);
        if (!failed) {
                /*
                 * Clear the direction flag as per the ABI for function entry.
                 *
                 * Clear RF when entering the signal handler, because
                 * it might disable possible debug exception from the
                 * signal handler.
                 *
                 * Clear TF for the case when it wasn't set by debugger to
                 * avoid the recursive send_sigtrap() in SIGTRAP handler.
                 */
                regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
                /*
                 * Ensure the signal handler starts with the new fpu state.
                 */
                fpu__clear_user_states(fpu);
        }
        signal_setup_done(failed, ksig, stepping);
}

static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
#ifdef CONFIG_IA32_EMULATION
        if (current->restart_block.arch_data & TS_COMPAT)
                return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
        return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
#else
        return __NR_restart_syscall;
#endif
}

/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 */
void arch_do_signal_or_restart(struct pt_regs *regs)
{
        struct ksignal ksig;

        if (get_signal(&ksig)) {
                /* Whee! Actually deliver the signal.  */
                handle_signal(&ksig, regs);
                return;
        }

        /* Did we come from a system call? */
        if (syscall_get_nr(current, regs) != -1) {
                /* Restart the system call - no handlers present */
                switch (syscall_get_error(current, regs)) {
                case -ERESTARTNOHAND:
                case -ERESTARTSYS:
                case -ERESTARTNOINTR:
                        regs->ax = regs->orig_ax;
                        regs->ip -= 2;
                        break;

                case -ERESTART_RESTARTBLOCK:
                        regs->ax = get_nr_restart_syscall(regs);
                        regs->ip -= 2;
                        break;
                }
        }

        /*
         * If there's no signal to deliver, we just put the saved sigmask
         * back.
         */
        restore_saved_sigmask();
}

void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
        struct task_struct *me = current;

        if (show_unhandled_signals && printk_ratelimit()) {
                printk("%s"
                       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
                       task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
                       me->comm, me->pid, where, frame,
                       regs->ip, regs->sp, regs->orig_ax);
                print_vma_addr(KERN_CONT " in ", regs->ip);
                pr_cont("\n");
        }

        force_sig(SIGSEGV);
}

#ifdef CONFIG_DYNAMIC_SIGFRAME
#ifdef CONFIG_STRICT_SIGALTSTACK_SIZE
static bool strict_sigaltstack_size __ro_after_init = true;
#else
static bool strict_sigaltstack_size __ro_after_init = false;
#endif

static int __init strict_sas_size(char *arg)
{
        return kstrtobool(arg, &strict_sigaltstack_size) == 0;
}
__setup("strict_sas_size", strict_sas_size);

/*
 * MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512
 * exceeds that size already. As such programs might never use the
 * sigaltstack they just continued to work. While always checking against
 * the real size would be correct, this might be considered a regression.
 *
 * Therefore avoid the sanity check, unless enforced by kernel
 * configuration or command line option.
 *
 * When dynamic FPU features are supported, the check is also enforced when
 * the task has permissions to use dynamic features. Tasks which have no
 * permission are checked against the size of the non-dynamic feature set
 * if strict checking is enabled. This avoids forcing all tasks on the
 * system to allocate large sigaltstacks even if they are never going
 * to use a dynamic feature. As this is serialized via sighand::siglock
 * any permission request for a dynamic feature either happened already
 * or will see the newly install sigaltstack size in the permission checks.
 */
bool sigaltstack_size_valid(size_t ss_size)
{
        unsigned long fsize = max_frame_size - fpu_default_state_size;
        u64 mask;

        lockdep_assert_held(&current->sighand->siglock);

        if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
                return true;

        fsize += current->group_leader->thread.fpu.perm.__user_state_size;
        if (likely(ss_size > fsize))
                return true;

        if (strict_sigaltstack_size)
                return ss_size > fsize;

        mask = current->group_leader->thread.fpu.perm.__state_perm;
        if (mask & XFEATURE_MASK_USER_DYNAMIC)
                return ss_size > fsize;

        return true;
}
#endif /* CONFIG_DYNAMIC_SIGFRAME */
































































































    2 
    2 
    2 




























































































































    4 
    4 





    4 
    4 

    4 








    2 


    3 

    4 








    1 



    1 

    1 





    1 





    1 









































    2 




    2 
    2 
















    1 







    1 


    1 



    1 





    1 





    1 


    1 
    1 








    1 

    1 









    1 



    1 



    1 

















    3 

    3 


    2 

    2 




    1 


    1 




    1 
    1 

    1 




    1 



    1 











    1 






    1 



    2 
    2 

    3 












































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
// SPDX-License-Identifier: GPL-2.0
/*
 * chaoskey - driver for ChaosKey device from Altus Metrum.
 *
 * This device provides true random numbers using a noise source based
 * on a reverse-biased p-n junction in avalanche breakdown. More
 * details can be found at http://chaoskey.org
 *
 * The driver connects to the kernel hardware RNG interface to provide
 * entropy for /dev/random and other kernel activities. It also offers
 * a separate /dev/ entry to allow for direct access to the random
 * bit stream.
 *
 * Copyright © 2015 Keith Packard <keithp@keithp.com>
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include <linux/wait.h>
#include <linux/hw_random.h>
#include <linux/mutex.h>
#include <linux/uaccess.h>

static struct usb_driver chaoskey_driver;
static struct usb_class_driver chaoskey_class;
static int chaoskey_rng_read(struct hwrng *rng, void *data,
                             size_t max, bool wait);

#define usb_dbg(usb_if, format, arg...) \
        dev_dbg(&(usb_if)->dev, format, ## arg)

#define usb_err(usb_if, format, arg...) \
        dev_err(&(usb_if)->dev, format, ## arg)

/* Version Information */
#define DRIVER_AUTHOR        "Keith Packard, keithp@keithp.com"
#define DRIVER_DESC        "Altus Metrum ChaosKey driver"
#define DRIVER_SHORT        "chaoskey"

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

#define CHAOSKEY_VENDOR_ID        0x1d50        /* OpenMoko */
#define CHAOSKEY_PRODUCT_ID        0x60c6        /* ChaosKey */

#define ALEA_VENDOR_ID                0x12d8        /* Araneus */
#define ALEA_PRODUCT_ID                0x0001        /* Alea I */

#define CHAOSKEY_BUF_LEN        64        /* max size of USB full speed packet */

#define NAK_TIMEOUT (HZ)                /* normal stall/wait timeout */
#define ALEA_FIRST_TIMEOUT (HZ*3)        /* first stall/wait timeout for Alea */

#ifdef CONFIG_USB_DYNAMIC_MINORS
#define USB_CHAOSKEY_MINOR_BASE 0
#else

/* IOWARRIOR_MINOR_BASE + 16, not official yet */
#define USB_CHAOSKEY_MINOR_BASE 224
#endif

static const struct usb_device_id chaoskey_table[] = {
        { USB_DEVICE(CHAOSKEY_VENDOR_ID, CHAOSKEY_PRODUCT_ID) },
        { USB_DEVICE(ALEA_VENDOR_ID, ALEA_PRODUCT_ID) },
        { },
};
MODULE_DEVICE_TABLE(usb, chaoskey_table);

static void chaos_read_callback(struct urb *urb);

/* Driver-local specific stuff */
struct chaoskey {
        struct usb_interface *interface;
        char in_ep;
        struct mutex lock;
        struct mutex rng_lock;
        int open;                        /* open count */
        bool present;                        /* device not disconnected */
        bool reading;                        /* ongoing IO */
        bool reads_started;                /* track first read for Alea */
        int size;                        /* size of buf */
        int valid;                        /* bytes of buf read */
        int used;                        /* bytes of buf consumed */
        char *name;                        /* product + serial */
        struct hwrng hwrng;                /* Embedded struct for hwrng */
        int hwrng_registered;                /* registered with hwrng API */
        wait_queue_head_t wait_q;        /* for timeouts */
        struct urb *urb;                /* for performing IO */
        char *buf;
};

static void chaoskey_free(struct chaoskey *dev)
{
        if (dev) {
                usb_dbg(dev->interface, "free");
                usb_free_urb(dev->urb);
                kfree(dev->name);
                kfree(dev->buf);
                usb_put_intf(dev->interface);
                kfree(dev);
        }
}

static int chaoskey_probe(struct usb_interface *interface,
                          const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev(interface);
        struct usb_host_interface *altsetting = interface->cur_altsetting;
        struct usb_endpoint_descriptor *epd;
        int in_ep;
        struct chaoskey *dev;
        int result = -ENOMEM;
        int size;
        int res;

        usb_dbg(interface, "probe %s-%s", udev->product, udev->serial);

        /* Find the first bulk IN endpoint and its packet size */
        res = usb_find_bulk_in_endpoint(altsetting, &epd);
        if (res) {
                usb_dbg(interface, "no IN endpoint found");
                return res;
        }

        in_ep = usb_endpoint_num(epd);
        size = usb_endpoint_maxp(epd);

        /* Validate endpoint and size */
        if (size <= 0) {
                usb_dbg(interface, "invalid size (%d)", size);
                return -ENODEV;
        }

        if (size > CHAOSKEY_BUF_LEN) {
                usb_dbg(interface, "size reduced from %d to %d\n",
                        size, CHAOSKEY_BUF_LEN);
                size = CHAOSKEY_BUF_LEN;
        }

        /* Looks good, allocate and initialize */

        dev = kzalloc(sizeof(struct chaoskey), GFP_KERNEL);

        if (dev == NULL)
                goto out;

        dev->interface = usb_get_intf(interface);

        dev->buf = kmalloc(size, GFP_KERNEL);

        if (dev->buf == NULL)
                goto out;

        dev->urb = usb_alloc_urb(0, GFP_KERNEL);

        if (!dev->urb)
                goto out;

        usb_fill_bulk_urb(dev->urb,
                udev,
                usb_rcvbulkpipe(udev, in_ep),
                dev->buf,
                size,
                chaos_read_callback,
                dev);

        /* Construct a name using the product and serial values. Each
         * device needs a unique name for the hwrng code
         */

        if (udev->product && udev->serial) {
                dev->name = kasprintf(GFP_KERNEL, "%s-%s", udev->product,
                                      udev->serial);
                if (dev->name == NULL)
                        goto out;
        }

        dev->in_ep = in_ep;

        if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID)
                dev->reads_started = true;

        dev->size = size;
        dev->present = true;

        init_waitqueue_head(&dev->wait_q);

        mutex_init(&dev->lock);
        mutex_init(&dev->rng_lock);

        usb_set_intfdata(interface, dev);

        result = usb_register_dev(interface, &chaoskey_class);
        if (result) {
                usb_err(interface, "Unable to allocate minor number.");
                goto out;
        }

        dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name;
        dev->hwrng.read = chaoskey_rng_read;

        dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0);
        if (!dev->hwrng_registered)
                usb_err(interface, "Unable to register with hwrng");

        usb_enable_autosuspend(udev);

        usb_dbg(interface, "chaoskey probe success, size %d", dev->size);
        return 0;

out:
        usb_set_intfdata(interface, NULL);
        chaoskey_free(dev);
        return result;
}

static void chaoskey_disconnect(struct usb_interface *interface)
{
        struct chaoskey        *dev;

        usb_dbg(interface, "disconnect");
        dev = usb_get_intfdata(interface);
        if (!dev) {
                usb_dbg(interface, "disconnect failed - no dev");
                return;
        }

        if (dev->hwrng_registered)
                hwrng_unregister(&dev->hwrng);

        usb_deregister_dev(interface, &chaoskey_class);

        usb_set_intfdata(interface, NULL);
        mutex_lock(&dev->lock);

        dev->present = false;
        usb_poison_urb(dev->urb);

        if (!dev->open) {
                mutex_unlock(&dev->lock);
                chaoskey_free(dev);
        } else
                mutex_unlock(&dev->lock);

        usb_dbg(interface, "disconnect done");
}

static int chaoskey_open(struct inode *inode, struct file *file)
{
        struct chaoskey *dev;
        struct usb_interface *interface;

        /* get the interface from minor number and driver information */
        interface = usb_find_interface(&chaoskey_driver, iminor(inode));
        if (!interface)
                return -ENODEV;

        usb_dbg(interface, "open");

        dev = usb_get_intfdata(interface);
        if (!dev) {
                usb_dbg(interface, "open (dev)");
                return -ENODEV;
        }

        file->private_data = dev;
        mutex_lock(&dev->lock);
        ++dev->open;
        mutex_unlock(&dev->lock);

        usb_dbg(interface, "open success");
        return 0;
}

static int chaoskey_release(struct inode *inode, struct file *file)
{
        struct chaoskey *dev = file->private_data;
        struct usb_interface *interface;

        if (dev == NULL)
                return -ENODEV;

        interface = dev->interface;

        usb_dbg(interface, "release");

        mutex_lock(&dev->lock);

        usb_dbg(interface, "open count at release is %d", dev->open);

        if (dev->open <= 0) {
                usb_dbg(interface, "invalid open count (%d)", dev->open);
                mutex_unlock(&dev->lock);
                return -ENODEV;
        }

        --dev->open;

        if (!dev->present) {
                if (dev->open == 0) {
                        mutex_unlock(&dev->lock);
                        chaoskey_free(dev);
                } else
                        mutex_unlock(&dev->lock);
        } else
                mutex_unlock(&dev->lock);

        usb_dbg(interface, "release success");
        return 0;
}

static void chaos_read_callback(struct urb *urb)
{
        struct chaoskey *dev = urb->context;
        int status = urb->status;

        usb_dbg(dev->interface, "callback status (%d)", status);

        if (status == 0)
                dev->valid = urb->actual_length;
        else
                dev->valid = 0;

        dev->used = 0;

        /* must be seen first before validity is announced */
        smp_wmb();

        dev->reading = false;
        wake_up(&dev->wait_q);
}

/* Fill the buffer. Called with dev->lock held
 */
static int _chaoskey_fill(struct chaoskey *dev)
{
        DEFINE_WAIT(wait);
        int result;
        bool started;

        usb_dbg(dev->interface, "fill");

        /* Return immediately if someone called before the buffer was
         * empty */
        if (dev->valid != dev->used) {
                usb_dbg(dev->interface, "not empty yet (valid %d used %d)",
                        dev->valid, dev->used);
                return 0;
        }

        /* Bail if the device has been removed */
        if (!dev->present) {
                usb_dbg(dev->interface, "device not present");
                return -ENODEV;
        }

        /* Make sure the device is awake */
        result = usb_autopm_get_interface(dev->interface);
        if (result) {
                usb_dbg(dev->interface, "wakeup failed (result %d)", result);
                return result;
        }

        dev->reading = true;
        result = usb_submit_urb(dev->urb, GFP_KERNEL);
        if (result < 0) {
                result = usb_translate_errors(result);
                dev->reading = false;
                goto out;
        }

        /* The first read on the Alea takes a little under 2 seconds.
         * Reads after the first read take only a few microseconds
         * though.  Presumably the entropy-generating circuit needs
         * time to ramp up.  So, we wait longer on the first read.
         */
        started = dev->reads_started;
        dev->reads_started = true;
        result = wait_event_interruptible_timeout(
                dev->wait_q,
                !dev->reading,
                (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) );

        if (result < 0) {
                usb_kill_urb(dev->urb);
                goto out;
        }

        if (result == 0) {
                result = -ETIMEDOUT;
                usb_kill_urb(dev->urb);
        } else {
                result = dev->valid;
        }
out:
        /* Let the device go back to sleep eventually */
        usb_autopm_put_interface(dev->interface);

        usb_dbg(dev->interface, "read %d bytes", dev->valid);

        return result;
}

static ssize_t chaoskey_read(struct file *file,
                             char __user *buffer,
                             size_t count,
                             loff_t *ppos)
{
        struct chaoskey *dev;
        ssize_t read_count = 0;
        int this_time;
        int result = 0;
        unsigned long remain;

        dev = file->private_data;

        if (dev == NULL || !dev->present)
                return -ENODEV;

        usb_dbg(dev->interface, "read %zu", count);

        while (count > 0) {

                /* Grab the rng_lock briefly to ensure that the hwrng interface
                 * gets priority over other user access
                 */
                result = mutex_lock_interruptible(&dev->rng_lock);
                if (result)
                        goto bail;
                mutex_unlock(&dev->rng_lock);

                result = mutex_lock_interruptible(&dev->lock);
                if (result)
                        goto bail;
                if (dev->valid == dev->used) {
                        result = _chaoskey_fill(dev);
                        if (result < 0) {
                                mutex_unlock(&dev->lock);
                                goto bail;
                        }
                }

                this_time = dev->valid - dev->used;
                if (this_time > count)
                        this_time = count;

                remain = copy_to_user(buffer, dev->buf + dev->used, this_time);
                if (remain) {
                        result = -EFAULT;

                        /* Consume the bytes that were copied so we don't leak
                         * data to user space
                         */
                        dev->used += this_time - remain;
                        mutex_unlock(&dev->lock);
                        goto bail;
                }

                count -= this_time;
                read_count += this_time;
                buffer += this_time;
                dev->used += this_time;
                mutex_unlock(&dev->lock);
        }
bail:
        if (read_count) {
                usb_dbg(dev->interface, "read %zu bytes", read_count);
                return read_count;
        }
        usb_dbg(dev->interface, "empty read, result %d", result);
        if (result == -ETIMEDOUT)
                result = -EAGAIN;
        return result;
}

static int chaoskey_rng_read(struct hwrng *rng, void *data,
                             size_t max, bool wait)
{
        struct chaoskey *dev = container_of(rng, struct chaoskey, hwrng);
        int this_time;

        usb_dbg(dev->interface, "rng_read max %zu wait %d", max, wait);

        if (!dev->present) {
                usb_dbg(dev->interface, "device not present");
                return 0;
        }

        /* Hold the rng_lock until we acquire the device lock so that
         * this operation gets priority over other user access to the
         * device
         */
        mutex_lock(&dev->rng_lock);

        mutex_lock(&dev->lock);

        mutex_unlock(&dev->rng_lock);

        /* Try to fill the buffer if empty. It doesn't actually matter
         * if _chaoskey_fill works; we'll just return zero bytes as
         * the buffer will still be empty
         */
        if (dev->valid == dev->used)
                (void) _chaoskey_fill(dev);

        this_time = dev->valid - dev->used;
        if (this_time > max)
                this_time = max;

        memcpy(data, dev->buf + dev->used, this_time);

        dev->used += this_time;

        mutex_unlock(&dev->lock);

        usb_dbg(dev->interface, "rng_read this_time %d\n", this_time);
        return this_time;
}

#ifdef CONFIG_PM
static int chaoskey_suspend(struct usb_interface *interface,
                            pm_message_t message)
{
        usb_dbg(interface, "suspend");
        return 0;
}

static int chaoskey_resume(struct usb_interface *interface)
{
        struct chaoskey *dev;
        struct usb_device *udev = interface_to_usbdev(interface);

        usb_dbg(interface, "resume");
        dev = usb_get_intfdata(interface);

        /*
         * We may have lost power.
         * In that case the device that needs a long time
         * for the first requests needs an extended timeout
         * again
         */
        if (le16_to_cpu(udev->descriptor.idVendor) == ALEA_VENDOR_ID)
                dev->reads_started = false;

        return 0;
}
#else
#define chaoskey_suspend NULL
#define chaoskey_resume NULL
#endif

/* file operation pointers */
static const struct file_operations chaoskey_fops = {
        .owner = THIS_MODULE,
        .read = chaoskey_read,
        .open = chaoskey_open,
        .release = chaoskey_release,
        .llseek = default_llseek,
};

/* class driver information */
static struct usb_class_driver chaoskey_class = {
        .name = "chaoskey%d",
        .fops = &chaoskey_fops,
        .minor_base = USB_CHAOSKEY_MINOR_BASE,
};

/* usb specific object needed to register this driver with the usb subsystem */
static struct usb_driver chaoskey_driver = {
        .name = DRIVER_SHORT,
        .probe = chaoskey_probe,
        .disconnect = chaoskey_disconnect,
        .suspend = chaoskey_suspend,
        .resume = chaoskey_resume,
        .reset_resume = chaoskey_resume,
        .id_table = chaoskey_table,
        .supports_autosuspend = 1,
};

module_usb_driver(chaoskey_driver);





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * acpi.h - ACPI Interface
 *
 * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 */

#ifndef _LINUX_ACPI_H
#define _LINUX_ACPI_H

#include <linux/errno.h>
#include <linux/ioport.h>        /* for struct resource */
#include <linux/resource_ext.h>
#include <linux/device.h>
#include <linux/mod_devicetable.h>
#include <linux/property.h>
#include <linux/uuid.h>
#include <linux/node.h>

struct irq_domain;
struct irq_domain_ops;

#ifndef _LINUX
#define _LINUX
#endif
#include <acpi/acpi.h>

#ifdef        CONFIG_ACPI

#include <linux/list.h>
#include <linux/dynamic_debug.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/fw_table.h>

#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include <acpi/acpi_numa.h>
#include <acpi/acpi_io.h>
#include <asm/acpi.h>

#ifdef CONFIG_ACPI_TABLE_LIB
#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI)
#define __init_or_acpilib
#define __initdata_or_acpilib
#else
#define EXPORT_SYMBOL_ACPI_LIB(x)
#define __init_or_acpilib __init
#define __initdata_or_acpilib __initdata
#endif

static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
{
        return adev ? adev->handle : NULL;
}

#define ACPI_COMPANION(dev)                to_acpi_device_node((dev)->fwnode)
#define ACPI_COMPANION_SET(dev, adev)        set_primary_fwnode(dev, (adev) ? \
        acpi_fwnode_handle(adev) : NULL)
#define ACPI_HANDLE(dev)                acpi_device_handle(ACPI_COMPANION(dev))
#define ACPI_HANDLE_FWNODE(fwnode)        \
                                acpi_device_handle(to_acpi_device_node(fwnode))

static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
{
        struct fwnode_handle *fwnode;

        fwnode = kzalloc(sizeof(struct fwnode_handle), GFP_KERNEL);
        if (!fwnode)
                return NULL;

        fwnode_init(fwnode, &acpi_static_fwnode_ops);

        return fwnode;
}

static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode)
{
        if (WARN_ON(!is_acpi_static_node(fwnode)))
                return;

        kfree(fwnode);
}

static inline bool has_acpi_companion(struct device *dev)
{
        return is_acpi_device_node(dev->fwnode);
}

static inline void acpi_preset_companion(struct device *dev,
                                         struct acpi_device *parent, u64 addr)
{
        ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false));
}

static inline const char *acpi_dev_name(struct acpi_device *adev)
{
        return dev_name(&adev->dev);
}

struct device *acpi_get_first_physical_node(struct acpi_device *adev);

enum acpi_irq_model_id {
        ACPI_IRQ_MODEL_PIC = 0,
        ACPI_IRQ_MODEL_IOAPIC,
        ACPI_IRQ_MODEL_IOSAPIC,
        ACPI_IRQ_MODEL_PLATFORM,
        ACPI_IRQ_MODEL_GIC,
        ACPI_IRQ_MODEL_LPIC,
        ACPI_IRQ_MODEL_COUNT
};

extern enum acpi_irq_model_id        acpi_irq_model;

enum acpi_interrupt_id {
        ACPI_INTERRUPT_PMI        = 1,
        ACPI_INTERRUPT_INIT,
        ACPI_INTERRUPT_CPEI,
        ACPI_INTERRUPT_COUNT
};

#define        ACPI_SPACE_MEM                0

enum acpi_address_range_id {
        ACPI_ADDRESS_RANGE_MEMORY = 1,
        ACPI_ADDRESS_RANGE_RESERVED = 2,
        ACPI_ADDRESS_RANGE_ACPI = 3,
        ACPI_ADDRESS_RANGE_NVS        = 4,
        ACPI_ADDRESS_RANGE_COUNT
};


/* Table Handlers */
typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);

/* Debugger support */

struct acpi_debugger_ops {
        int (*create_thread)(acpi_osd_exec_callback function, void *context);
        ssize_t (*write_log)(const char *msg);
        ssize_t (*read_cmd)(char *buffer, size_t length);
        int (*wait_command_ready)(bool single_step, char *buffer, size_t length);
        int (*notify_command_complete)(void);
};

struct acpi_debugger {
        const struct acpi_debugger_ops *ops;
        struct module *owner;
        struct mutex lock;
};

#ifdef CONFIG_ACPI_DEBUGGER
int __init acpi_debugger_init(void);
int acpi_register_debugger(struct module *owner,
                           const struct acpi_debugger_ops *ops);
void acpi_unregister_debugger(const struct acpi_debugger_ops *ops);
int acpi_debugger_create_thread(acpi_osd_exec_callback function, void *context);
ssize_t acpi_debugger_write_log(const char *msg);
ssize_t acpi_debugger_read_cmd(char *buffer, size_t buffer_length);
int acpi_debugger_wait_command_ready(void);
int acpi_debugger_notify_command_complete(void);
#else
static inline int acpi_debugger_init(void)
{
        return -ENODEV;
}

static inline int acpi_register_debugger(struct module *owner,
                                         const struct acpi_debugger_ops *ops)
{
        return -ENODEV;
}

static inline void acpi_unregister_debugger(const struct acpi_debugger_ops *ops)
{
}

static inline int acpi_debugger_create_thread(acpi_osd_exec_callback function,
                                              void *context)
{
        return -ENODEV;
}

static inline int acpi_debugger_write_log(const char *msg)
{
        return -ENODEV;
}

static inline int acpi_debugger_read_cmd(char *buffer, u32 buffer_length)
{
        return -ENODEV;
}

static inline int acpi_debugger_wait_command_ready(void)
{
        return -ENODEV;
}

static inline int acpi_debugger_notify_command_complete(void)
{
        return -ENODEV;
}
#endif

#define BAD_MADT_ENTRY(entry, end) (                                            \
                (!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
                ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))

void __iomem *__acpi_map_table(unsigned long phys, unsigned long size);
void __acpi_unmap_table(void __iomem *map, unsigned long size);
int early_acpi_boot_init(void);
int acpi_boot_init (void);
void acpi_boot_table_prepare (void);
void acpi_boot_table_init (void);
int acpi_mps_check (void);
int acpi_numa_init (void);

int acpi_locate_initial_tables (void);
void acpi_reserve_initial_tables (void);
void acpi_table_init_complete (void);
int acpi_table_init (void);

int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
int __init_or_acpilib acpi_table_parse_entries(char *id,
                unsigned long table_size, int entry_id,
                acpi_tbl_entry_handler handler, unsigned int max_entries);
int __init_or_acpilib acpi_table_parse_entries_array(char *id,
                unsigned long table_size, struct acpi_subtable_proc *proc,
                int proc_num, unsigned int max_entries);
int acpi_table_parse_madt(enum acpi_madt_type id,
                          acpi_tbl_entry_handler handler,
                          unsigned int max_entries);
int __init_or_acpilib
acpi_table_parse_cedt(enum acpi_cedt_type id,
                      acpi_tbl_entry_handler_arg handler_arg, void *arg);

int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);

static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
{
        return gicc->flags & ACPI_MADT_ENABLED;
}

/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);

#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
#else
static inline void
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { }
#endif

void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);

#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH)
void acpi_arch_dma_setup(struct device *dev);
#else
static inline void acpi_arch_dma_setup(struct device *dev) { }
#endif

#ifdef CONFIG_ARM64
void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
#else
static inline void
acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
#endif

int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);

#ifndef PHYS_CPUID_INVALID
typedef u32 phys_cpuid_t;
#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
#endif

static inline bool invalid_logical_cpuid(u32 cpuid)
{
        return (int)cpuid < 0;
}

static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
{
        return phys_id == PHYS_CPUID_INVALID;
}

/* Validate the processor object's proc_id */
bool acpi_duplicate_processor_id(int proc_id);
/* Processor _CTS control */
struct acpi_processor_power;

#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
bool acpi_processor_claim_cst_control(void);
int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu,
                                struct acpi_processor_power *info);
#else
static inline bool acpi_processor_claim_cst_control(void) { return false; }
static inline int acpi_processor_evaluate_cst(acpi_handle handle, u32 cpu,
                                              struct acpi_processor_power *info)
{
        return -ENODEV;
}
#endif

#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Arch dependent functions for cpu hotplug support */
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
                 int *pcpu);
int acpi_unmap_cpu(int cpu);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */

#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
#endif

int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base);
void acpi_irq_stats_init(void);
extern u32 acpi_irq_handled;
extern u32 acpi_irq_not_handled;
extern unsigned int acpi_sci_irq;
extern bool acpi_no_s5;
#define INVALID_ACPI_IRQ        ((unsigned)-1)
static inline bool acpi_sci_irq_valid(void)
{
        return acpi_sci_irq != INVALID_ACPI_IRQ;
}

extern int sbf_port;
extern unsigned long acpi_realmode_flags;

int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);

void acpi_set_irq_model(enum acpi_irq_model_id model,
                        struct fwnode_handle *(*)(u32));
void acpi_set_gsi_to_irq_fallback(u32 (*)(u32));

struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
                                             unsigned int size,
                                             struct fwnode_handle *fwnode,
                                             const struct irq_domain_ops *ops,
                                             void *host_data);

#ifdef CONFIG_X86_IO_APIC
extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
#else
static inline int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
{
        return -1;
}
#endif
/*
 * This function undoes the effect of one call to acpi_register_gsi().
 * If this matches the last registration, any IRQ resources for gsi
 * are freed.
 */
void acpi_unregister_gsi (u32 gsi);

struct pci_dev;

int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq);
#ifdef CONFIG_PCI
void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
#else
static inline void acpi_penalize_sci_irq(int irq, int trigger,
                                        int polarity)
{
}
#endif
void acpi_pci_irq_disable (struct pci_dev *dev);

extern int ec_read(u8 addr, u8 *val);
extern int ec_write(u8 addr, u8 val);
extern int ec_transaction(u8 command,
                          const u8 *wdata, unsigned wdata_len,
                          u8 *rdata, unsigned rdata_len);
extern acpi_handle ec_get_handle(void);

extern bool acpi_is_pnp_device(struct acpi_device *);

#if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)

typedef void (*wmi_notify_handler) (u32 value, void *context);

int wmi_instance_count(const char *guid);

extern acpi_status wmi_evaluate_method(const char *guid, u8 instance,
                                        u32 method_id,
                                        const struct acpi_buffer *in,
                                        struct acpi_buffer *out);
extern acpi_status wmi_query_block(const char *guid, u8 instance,
                                        struct acpi_buffer *out);
extern acpi_status wmi_set_block(const char *guid, u8 instance,
                                        const struct acpi_buffer *in);
extern acpi_status wmi_install_notify_handler(const char *guid,
                                        wmi_notify_handler handler, void *data);
extern acpi_status wmi_remove_notify_handler(const char *guid);
extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out);
extern bool wmi_has_guid(const char *guid);
extern char *wmi_get_acpi_device_uid(const char *guid);

#endif        /* CONFIG_ACPI_WMI */

#define ACPI_VIDEO_OUTPUT_SWITCHING                        0x0001
#define ACPI_VIDEO_DEVICE_POSTING                        0x0002
#define ACPI_VIDEO_ROM_AVAILABLE                        0x0004
#define ACPI_VIDEO_BACKLIGHT                                0x0008
#define ACPI_VIDEO_BACKLIGHT_FORCE_VENDOR                0x0010
#define ACPI_VIDEO_BACKLIGHT_FORCE_VIDEO                0x0020
#define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VENDOR        0x0040
#define ACPI_VIDEO_OUTPUT_SWITCHING_FORCE_VIDEO                0x0080
#define ACPI_VIDEO_BACKLIGHT_DMI_VENDOR                        0x0100
#define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO                        0x0200
#define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR                0x0400
#define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO                0x0800

extern char acpi_video_backlight_string[];
extern long acpi_is_video_device(acpi_handle handle);
extern int acpi_blacklisted(void);
extern void acpi_osi_setup(char *str);
extern bool acpi_osi_is_win8(void);

#ifdef CONFIG_ACPI_THERMAL_LIB
int thermal_acpi_active_trip_temp(struct acpi_device *adev, int id, int *ret_temp);
int thermal_acpi_passive_trip_temp(struct acpi_device *adev, int *ret_temp);
int thermal_acpi_hot_trip_temp(struct acpi_device *adev, int *ret_temp);
int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
#endif

#ifdef CONFIG_ACPI_HMAT
int acpi_get_genport_coordinates(u32 uid, struct access_coordinate *coord);
#else
static inline int acpi_get_genport_coordinates(u32 uid,
                                               struct access_coordinate *coord)
{
        return -EOPNOTSUPP;
}
#endif

#ifdef CONFIG_ACPI_NUMA
int acpi_map_pxm_to_node(int pxm);
int acpi_get_node(acpi_handle handle);

/**
 * pxm_to_online_node - Map proximity ID to online node
 * @pxm: ACPI proximity ID
 *
 * This is similar to pxm_to_node(), but always returns an online
 * node.  When the mapped node from a given proximity ID is offline, it
 * looks up the node distance table and returns the nearest online node.
 *
 * ACPI device drivers, which are called after the NUMA initialization has
 * completed in the kernel, can call this interface to obtain their device
 * NUMA topology from ACPI tables.  Such drivers do not have to deal with
 * offline nodes.  A node may be offline when SRAT memory entry does not exist,
 * or NUMA is disabled, ex. "numa=off" on x86.
 */
static inline int pxm_to_online_node(int pxm)
{
        int node = pxm_to_node(pxm);

        return numa_map_to_online_node(node);
}
#else
static inline int pxm_to_online_node(int pxm)
{
        return 0;
}
static inline int acpi_map_pxm_to_node(int pxm)
{
        return 0;
}
static inline int acpi_get_node(acpi_handle handle)
{
        return 0;
}
#endif
extern int pnpacpi_disabled;

#define PXM_INVAL        (-1)

bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res);
bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res);
bool acpi_dev_resource_address_space(struct acpi_resource *ares,
                                     struct resource_win *win);
bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares,
                                         struct resource_win *win);
unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable, u8 wake_capable);
unsigned int acpi_dev_get_irq_type(int triggering, int polarity);
bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
                                 struct resource *res);

void acpi_dev_free_resource_list(struct list_head *list);
int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list,
                           int (*preproc)(struct acpi_resource *, void *),
                           void *preproc_data);
int acpi_dev_get_dma_resources(struct acpi_device *adev,
                               struct list_head *list);
int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list);
int acpi_dev_filter_resource_type(struct acpi_resource *ares,
                                  unsigned long types);

static inline int acpi_dev_filter_resource_type_cb(struct acpi_resource *ares,
                                                   void *arg)
{
        return acpi_dev_filter_resource_type(ares, (unsigned long)arg);
}

struct acpi_device *acpi_resource_consumer(struct resource *res);

int acpi_check_resource_conflict(const struct resource *res);

int acpi_check_region(resource_size_t start, resource_size_t n,
                      const char *name);

int acpi_resources_are_enforced(void);

#ifdef CONFIG_HIBERNATION
extern int acpi_check_s4_hw_signature;
#endif

#ifdef CONFIG_PM_SLEEP
void __init acpi_old_suspend_ordering(void);
void __init acpi_nvs_nosave(void);
void __init acpi_nvs_nosave_s3(void);
void __init acpi_sleep_no_blacklist(void);
#endif /* CONFIG_PM_SLEEP */

int acpi_register_wakeup_handler(
        int wake_irq, bool (*wakeup)(void *context), void *context);
void acpi_unregister_wakeup_handler(
        bool (*wakeup)(void *context), void *context);

struct acpi_osc_context {
        char *uuid_str;                        /* UUID string */
        int rev;
        struct acpi_buffer cap;                /* list of DWORD capabilities */
        struct acpi_buffer ret;                /* free by caller if success */
};

acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);

/* Number of _OSC capability DWORDS depends on bridge type */
#define OSC_PCI_CAPABILITY_DWORDS                3
#define OSC_CXL_CAPABILITY_DWORDS                5

/* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */
#define OSC_QUERY_DWORD                                0        /* DWORD 1 */
#define OSC_SUPPORT_DWORD                        1        /* DWORD 2 */
#define OSC_CONTROL_DWORD                        2        /* DWORD 3 */
#define OSC_EXT_SUPPORT_DWORD                        3        /* DWORD 4 */
#define OSC_EXT_CONTROL_DWORD                        4        /* DWORD 5 */

/* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */
#define OSC_QUERY_ENABLE                        0x00000001  /* input */
#define OSC_REQUEST_ERROR                        0x00000002  /* return */
#define OSC_INVALID_UUID_ERROR                        0x00000004  /* return */
#define OSC_INVALID_REVISION_ERROR                0x00000008  /* return */
#define OSC_CAPABILITIES_MASK_ERROR                0x00000010  /* return */

/* Platform-Wide Capabilities _OSC: Capabilities DWORD 2: Support Field */
#define OSC_SB_PAD_SUPPORT                        0x00000001
#define OSC_SB_PPC_OST_SUPPORT                        0x00000002
#define OSC_SB_PR3_SUPPORT                        0x00000004
#define OSC_SB_HOTPLUG_OST_SUPPORT                0x00000008
#define OSC_SB_APEI_SUPPORT                        0x00000010
#define OSC_SB_CPC_SUPPORT                        0x00000020
#define OSC_SB_CPCV2_SUPPORT                        0x00000040
#define OSC_SB_PCLPI_SUPPORT                        0x00000080
#define OSC_SB_OSLPI_SUPPORT                        0x00000100
#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT                0x00001000
#define OSC_SB_GENERIC_INITIATOR_SUPPORT        0x00002000
#define OSC_SB_CPC_FLEXIBLE_ADR_SPACE                0x00004000
#define OSC_SB_NATIVE_USB4_SUPPORT                0x00040000
#define OSC_SB_PRM_SUPPORT                        0x00200000
#define OSC_SB_FFH_OPR_SUPPORT                        0x00400000

extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
extern bool osc_sb_native_usb4_support_confirmed;
extern bool osc_sb_cppc2_support_acked;
extern bool osc_cpc_flexible_adr_space_confirmed;

/* USB4 Capabilities */
#define OSC_USB_USB3_TUNNELING                        0x00000001
#define OSC_USB_DP_TUNNELING                        0x00000002
#define OSC_USB_PCIE_TUNNELING                        0x00000004
#define OSC_USB_XDOMAIN                                0x00000008

extern u32 osc_sb_native_usb4_control;

/* PCI Host Bridge _OSC: Capabilities DWORD 2: Support Field */
#define OSC_PCI_EXT_CONFIG_SUPPORT                0x00000001
#define OSC_PCI_ASPM_SUPPORT                        0x00000002
#define OSC_PCI_CLOCK_PM_SUPPORT                0x00000004
#define OSC_PCI_SEGMENT_GROUPS_SUPPORT                0x00000008
#define OSC_PCI_MSI_SUPPORT                        0x00000010
#define OSC_PCI_EDR_SUPPORT                        0x00000080
#define OSC_PCI_HPX_TYPE_3_SUPPORT                0x00000100

/* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */
#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL        0x00000001
#define OSC_PCI_SHPC_NATIVE_HP_CONTROL                0x00000002
#define OSC_PCI_EXPRESS_PME_CONTROL                0x00000004
#define OSC_PCI_EXPRESS_AER_CONTROL                0x00000008
#define OSC_PCI_EXPRESS_CAPABILITY_CONTROL        0x00000010
#define OSC_PCI_EXPRESS_LTR_CONTROL                0x00000020
#define OSC_PCI_EXPRESS_DPC_CONTROL                0x00000080

/* CXL _OSC: Capabilities DWORD 4: Support Field */
#define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT        0x00000001
#define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT        0x00000002
#define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT        0x00000004
#define OSC_CXL_NATIVE_HP_SUPPORT                0x00000008

/* CXL _OSC: Capabilities DWORD 5: Control Field */
#define OSC_CXL_ERROR_REPORTING_CONTROL                0x00000001

static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context)
{
        u32 *ret = context->ret.pointer;

        return ret[OSC_CONTROL_DWORD];
}

static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context)
{
        u32 *ret = context->ret.pointer;

        return ret[OSC_EXT_CONTROL_DWORD];
}

#define ACPI_GSB_ACCESS_ATTRIB_QUICK                0x00000002
#define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV         0x00000004
#define ACPI_GSB_ACCESS_ATTRIB_BYTE                0x00000006
#define ACPI_GSB_ACCESS_ATTRIB_WORD                0x00000008
#define ACPI_GSB_ACCESS_ATTRIB_BLOCK                0x0000000A
#define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE        0x0000000B
#define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL        0x0000000C
#define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL        0x0000000D
#define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES        0x0000000E
#define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS        0x0000000F

/* Enable _OST when all relevant hotplug operations are enabled */
#if defined(CONFIG_ACPI_HOTPLUG_CPU) &&                        \
        defined(CONFIG_ACPI_HOTPLUG_MEMORY) &&                \
        defined(CONFIG_ACPI_CONTAINER)
#define ACPI_HOTPLUG_OST
#endif

/* _OST Source Event Code (OSPM Action) */
#define ACPI_OST_EC_OSPM_SHUTDOWN                0x100
#define ACPI_OST_EC_OSPM_EJECT                        0x103
#define ACPI_OST_EC_OSPM_INSERTION                0x200

/* _OST General Processing Status Code */
#define ACPI_OST_SC_SUCCESS                        0x0
#define ACPI_OST_SC_NON_SPECIFIC_FAILURE        0x1
#define ACPI_OST_SC_UNRECOGNIZED_NOTIFY                0x2

/* _OST OS Shutdown Processing (0x100) Status Code */
#define ACPI_OST_SC_OS_SHUTDOWN_DENIED                0x80
#define ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS        0x81
#define ACPI_OST_SC_OS_SHUTDOWN_COMPLETED        0x82
#define ACPI_OST_SC_OS_SHUTDOWN_NOT_SUPPORTED        0x83

/* _OST Ejection Request (0x3, 0x103) Status Code */
#define ACPI_OST_SC_EJECT_NOT_SUPPORTED                0x80
#define ACPI_OST_SC_DEVICE_IN_USE                0x81
#define ACPI_OST_SC_DEVICE_BUSY                        0x82
#define ACPI_OST_SC_EJECT_DEPENDENCY_BUSY        0x83
#define ACPI_OST_SC_EJECT_IN_PROGRESS                0x84

/* _OST Insertion Request (0x200) Status Code */
#define ACPI_OST_SC_INSERT_IN_PROGRESS                0x80
#define ACPI_OST_SC_DRIVER_LOAD_FAILURE                0x81
#define ACPI_OST_SC_INSERT_NOT_SUPPORTED        0x82

enum acpi_predicate {
        all_versions,
        less_than_or_equal,
        equal,
        greater_than_or_equal,
};

/* Table must be terminted by a NULL entry */
struct acpi_platform_list {
        char        oem_id[ACPI_OEM_ID_SIZE+1];
        char        oem_table_id[ACPI_OEM_TABLE_ID_SIZE+1];
        u32        oem_revision;
        char        *table;
        enum acpi_predicate pred;
        char        *reason;
        u32        data;
};
int acpi_match_platform_list(const struct acpi_platform_list *plat);

extern void acpi_early_init(void);
extern void acpi_subsystem_init(void);

extern int acpi_nvs_register(__u64 start, __u64 size);

extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
                                    void *data);

const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids,
                                                    const struct acpi_device *adev);

const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
                                               const struct device *dev);

const void *acpi_device_get_match_data(const struct device *dev);
extern bool acpi_driver_match_device(struct device *dev,
                                     const struct device_driver *drv);
int acpi_device_uevent_modalias(const struct device *, struct kobj_uevent_env *);
int acpi_device_modalias(struct device *, char *, int);

struct platform_device *acpi_create_platform_device(struct acpi_device *,
                                                    const struct property_entry *);
#define ACPI_PTR(_ptr)        (_ptr)

static inline void acpi_device_set_enumerated(struct acpi_device *adev)
{
        adev->flags.visited = true;
}

static inline void acpi_device_clear_enumerated(struct acpi_device *adev)
{
        adev->flags.visited = false;
}

enum acpi_reconfig_event  {
        ACPI_RECONFIG_DEVICE_ADD = 0,
        ACPI_RECONFIG_DEVICE_REMOVE,
};

int acpi_reconfig_notifier_register(struct notifier_block *nb);
int acpi_reconfig_notifier_unregister(struct notifier_block *nb);

#ifdef CONFIG_ACPI_GTDT
int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count);
int acpi_gtdt_map_ppi(int type);
bool acpi_gtdt_c3stop(int type);
int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
#endif

#ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER
static inline void acpi_arch_set_root_pointer(u64 addr)
{
}
#endif

#ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
static inline u64 acpi_arch_get_root_pointer(void)
{
        return 0;
}
#endif

int acpi_get_local_address(acpi_handle handle, u32 *addr);
const char *acpi_get_subsystem_id(acpi_handle handle);

#else        /* !CONFIG_ACPI */

#define acpi_disabled 1

#define ACPI_COMPANION(dev)                (NULL)
#define ACPI_COMPANION_SET(dev, adev)        do { } while (0)
#define ACPI_HANDLE(dev)                (NULL)
#define ACPI_HANDLE_FWNODE(fwnode)        (NULL)

/* Get rid of the -Wunused-variable for adev */
#define acpi_dev_uid_match(adev, uid2)                        (adev && false)
#define acpi_dev_hid_uid_match(adev, hid2, uid2)        (adev && false)

#include <acpi/acpi_numa.h>

struct fwnode_handle;

static inline bool acpi_dev_found(const char *hid)
{
        return false;
}

static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv)
{
        return false;
}

struct acpi_device;

static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer)
{
        return -ENODEV;
}

static inline struct acpi_device *
acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
{
        return NULL;
}

static inline bool acpi_reduced_hardware(void)
{
        return false;
}

static inline void acpi_dev_put(struct acpi_device *adev) {}

static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
{
        return false;
}

static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
{
        return false;
}

static inline struct acpi_device *to_acpi_device_node(const struct fwnode_handle *fwnode)
{
        return NULL;
}

static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
{
        return false;
}

static inline struct acpi_data_node *to_acpi_data_node(const struct fwnode_handle *fwnode)
{
        return NULL;
}

static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode,
                                        const char *name)
{
        return false;
}

static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev)
{
        return NULL;
}

static inline bool has_acpi_companion(struct device *dev)
{
        return false;
}

static inline void acpi_preset_companion(struct device *dev,
                                         struct acpi_device *parent, u64 addr)
{
}

static inline const char *acpi_dev_name(struct acpi_device *adev)
{
        return NULL;
}

static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev)
{
        return NULL;
}

static inline void acpi_early_init(void) { }
static inline void acpi_subsystem_init(void) { }

static inline int early_acpi_boot_init(void)
{
        return 0;
}
static inline int acpi_boot_init(void)
{
        return 0;
}

static inline void acpi_boot_table_prepare(void)
{
}

static inline void acpi_boot_table_init(void)
{
}

static inline int acpi_mps_check(void)
{
        return 0;
}

static inline int acpi_check_resource_conflict(struct resource *res)
{
        return 0;
}

static inline int acpi_check_region(resource_size_t start, resource_size_t n,
                                    const char *name)
{
        return 0;
}

struct acpi_table_header;
static inline int acpi_table_parse(char *id,
                                int (*handler)(struct acpi_table_header *))
{
        return -ENODEV;
}

static inline int acpi_nvs_register(__u64 start, __u64 size)
{
        return 0;
}

static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
                                           void *data)
{
        return 0;
}

struct acpi_device_id;

static inline const struct acpi_device_id *acpi_match_acpi_device(
        const struct acpi_device_id *ids, const struct acpi_device *adev)
{
        return NULL;
}

static inline const struct acpi_device_id *acpi_match_device(
        const struct acpi_device_id *ids, const struct device *dev)
{
        return NULL;
}

static inline const void *acpi_device_get_match_data(const struct device *dev)
{
        return NULL;
}

static inline bool acpi_driver_match_device(struct device *dev,
                                            const struct device_driver *drv)
{
        return false;
}

static inline bool acpi_check_dsm(acpi_handle handle, const guid_t *guid,
                                  u64 rev, u64 funcs)
{
        return false;
}

static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
                                                   const guid_t *guid,
                                                   u64 rev, u64 func,
                                                   union acpi_object *argv4)
{
        return NULL;
}

static inline union acpi_object *acpi_evaluate_dsm_typed(acpi_handle handle,
                                                         const guid_t *guid,
                                                         u64 rev, u64 func,
                                                         union acpi_object *argv4,
                                                         acpi_object_type type)
{
        return NULL;
}

static inline int acpi_device_uevent_modalias(const struct device *dev,
                                struct kobj_uevent_env *env)
{
        return -ENODEV;
}

static inline int acpi_device_modalias(struct device *dev,
                                char *buf, int size)
{
        return -ENODEV;
}

static inline struct platform_device *
acpi_create_platform_device(struct acpi_device *adev,
                            const struct property_entry *properties)
{
        return NULL;
}

static inline bool acpi_dma_supported(const struct acpi_device *adev)
{
        return false;
}

static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev)
{
        return DEV_DMA_NOT_SUPPORTED;
}

static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map)
{
        return -ENODEV;
}

static inline int acpi_dma_configure(struct device *dev,
                                     enum dev_dma_attr attr)
{
        return 0;
}

static inline int acpi_dma_configure_id(struct device *dev,
                                        enum dev_dma_attr attr,
                                        const u32 *input_id)
{
        return 0;
}

#define ACPI_PTR(_ptr)        (NULL)

static inline void acpi_device_set_enumerated(struct acpi_device *adev)
{
}

static inline void acpi_device_clear_enumerated(struct acpi_device *adev)
{
}

static inline int acpi_reconfig_notifier_register(struct notifier_block *nb)
{
        return -EINVAL;
}

static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb)
{
        return -EINVAL;
}

static inline struct acpi_device *acpi_resource_consumer(struct resource *res)
{
        return NULL;
}

static inline int acpi_get_local_address(acpi_handle handle, u32 *addr)
{
        return -ENODEV;
}

static inline const char *acpi_get_subsystem_id(acpi_handle handle)
{
        return ERR_PTR(-ENODEV);
}

static inline int acpi_register_wakeup_handler(int wake_irq,
        bool (*wakeup)(void *context), void *context)
{
        return -ENXIO;
}

static inline void acpi_unregister_wakeup_handler(
        bool (*wakeup)(void *context), void *context) { }

struct acpi_osc_context;
static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context)
{
        return 0;
}

static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context)
{
        return 0;
}

static inline bool acpi_sleep_state_supported(u8 sleep_state)
{
        return false;
}

#endif        /* !CONFIG_ACPI */

extern void arch_post_acpi_subsys_init(void);

#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_ioapic_add(acpi_handle root);
#else
static inline int acpi_ioapic_add(acpi_handle root) { return 0; }
#endif

#ifdef CONFIG_ACPI
void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
                               u32 pm1a_ctrl,  u32 pm1b_ctrl));

acpi_status acpi_os_prepare_sleep(u8 sleep_state,
                                  u32 pm1a_control, u32 pm1b_control);

void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
                                        u32 val_a,  u32 val_b));

acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
                                           u32 val_a, u32 val_b);
#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
struct acpi_s2idle_dev_ops {
        struct list_head list_node;
        void (*prepare)(void);
        void (*check)(void);
        void (*restore)(void);
};
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
int acpi_get_lps0_constraint(struct acpi_device *adev);
#else /* CONFIG_SUSPEND && CONFIG_X86 */
static inline int acpi_get_lps0_constraint(struct device *dev)
{
        return ACPI_STATE_UNKNOWN;
}
#endif /* CONFIG_SUSPEND && CONFIG_X86 */
void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
#else
#define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
#endif

#if defined(CONFIG_ACPI) && defined(CONFIG_PM)
int acpi_dev_suspend(struct device *dev, bool wakeup);
int acpi_dev_resume(struct device *dev);
int acpi_subsys_runtime_suspend(struct device *dev);
int acpi_subsys_runtime_resume(struct device *dev);
int acpi_dev_pm_attach(struct device *dev, bool power_on);
bool acpi_storage_d3(struct device *dev);
bool acpi_dev_state_d0(struct device *dev);
#else
static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
{
        return 0;
}
static inline bool acpi_storage_d3(struct device *dev)
{
        return false;
}
static inline bool acpi_dev_state_d0(struct device *dev)
{
        return true;
}
#endif

#if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)
int acpi_subsys_prepare(struct device *dev);
void acpi_subsys_complete(struct device *dev);
int acpi_subsys_suspend_late(struct device *dev);
int acpi_subsys_suspend_noirq(struct device *dev);
int acpi_subsys_suspend(struct device *dev);
int acpi_subsys_freeze(struct device *dev);
int acpi_subsys_poweroff(struct device *dev);
void acpi_ec_mark_gpe_for_wake(void);
void acpi_ec_set_gpe_wake_mask(u8 action);
int acpi_subsys_restore_early(struct device *dev);
#else
static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
static inline void acpi_subsys_complete(struct device *dev) {}
static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; }
static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; }
static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
static inline int acpi_subsys_poweroff(struct device *dev) { return 0; }
static inline int acpi_subsys_restore_early(struct device *dev) { return 0; }
static inline void acpi_ec_mark_gpe_for_wake(void) {}
static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
#endif

#ifdef CONFIG_ACPI
char *acpi_handle_path(acpi_handle handle);
__printf(3, 4)
void acpi_handle_printk(const char *level, acpi_handle handle,
                        const char *fmt, ...);
void acpi_evaluation_failure_warn(acpi_handle handle, const char *name,
                                  acpi_status status);
#else        /* !CONFIG_ACPI */
static inline __printf(3, 4) void
acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
static inline void acpi_evaluation_failure_warn(acpi_handle handle,
                                                const char *name,
                                                acpi_status status) {}
#endif        /* !CONFIG_ACPI */

#if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG)
__printf(3, 4)
void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...);
#endif

/*
 * acpi_handle_<level>: Print message with ACPI prefix and object path
 *
 * These interfaces acquire the global namespace mutex to obtain an object
 * path.  In interrupt context, it shows the object path as <n/a>.
 */
#define acpi_handle_emerg(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_alert(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_crit(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_err(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_warn(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_notice(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__)
#define acpi_handle_info(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__)

#if defined(DEBUG)
#define acpi_handle_debug(handle, fmt, ...)                                \
        acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__)
#else
#if defined(CONFIG_DYNAMIC_DEBUG)
#define acpi_handle_debug(handle, fmt, ...)                                \
        _dynamic_func_call(fmt, __acpi_handle_debug,                        \
                           handle, pr_fmt(fmt), ##__VA_ARGS__)
#else
#define acpi_handle_debug(handle, fmt, ...)                                \
({                                                                        \
        if (0)                                                                \
                acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \
        0;                                                                \
})
#endif
#endif

#if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
                                struct acpi_resource_gpio **agpio);
bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
                               struct acpi_resource_gpio **agpio);
int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index,
                                  bool *wake_capable);
#else
static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
                                              struct acpi_resource_gpio **agpio)
{
        return false;
}
static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
                                             struct acpi_resource_gpio **agpio)
{
        return false;
}
static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name,
                                                int index, bool *wake_capable)
{
        return -ENXIO;
}
#endif

static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index,
                                             bool *wake_capable)
{
        return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable);
}

static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name,
                                           int index)
{
        return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL);
}

static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
{
        return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, NULL);
}

/* Device properties */

#ifdef CONFIG_ACPI
int acpi_dev_get_property(const struct acpi_device *adev, const char *name,
                          acpi_object_type type, const union acpi_object **obj);
int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
                                const char *name, size_t index, size_t num_args,
                                struct fwnode_reference_args *args);

static inline int acpi_node_get_property_reference(
                                const struct fwnode_handle *fwnode,
                                const char *name, size_t index,
                                struct fwnode_reference_args *args)
{
        return __acpi_node_get_property_reference(fwnode, name, index,
                NR_FWNODE_REFERENCE_ARGS, args);
}

static inline bool acpi_dev_has_props(const struct acpi_device *adev)
{
        return !list_empty(&adev->data.properties);
}

struct acpi_device_properties *
acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
                    union acpi_object *properties);

int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
                       void **valptr);

struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                                            struct fwnode_handle *child);

struct acpi_probe_entry;
typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
                                                 struct acpi_probe_entry *);

#define ACPI_TABLE_ID_LEN        5

/**
 * struct acpi_probe_entry - boot-time probing entry
 * @id:                        ACPI table name
 * @type:                Optional subtable type to match
 *                        (if @id contains subtables)
 * @subtable_valid:        Optional callback to check the validity of
 *                        the subtable
 * @probe_table:        Callback to the driver being probed when table
 *                        match is successful
 * @probe_subtbl:        Callback to the driver being probed when table and
 *                        subtable match (and optional callback is successful)
 * @driver_data:        Sideband data provided back to the driver
 */
struct acpi_probe_entry {
        __u8 id[ACPI_TABLE_ID_LEN];
        __u8 type;
        acpi_probe_entry_validate_subtbl subtable_valid;
        union {
                acpi_tbl_table_handler probe_table;
                acpi_tbl_entry_handler probe_subtbl;
        };
        kernel_ulong_t driver_data;
};

#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable,        \
                                 valid, data, fn)                        \
        static const struct acpi_probe_entry __acpi_probe_##name        \
                __used __section("__" #table "_acpi_probe_table") = {        \
                        .id = table_id,                                        \
                        .type = subtable,                                \
                        .subtable_valid = valid,                        \
                        .probe_table = fn,                                \
                        .driver_data = data,                                \
                }

#define ACPI_DECLARE_SUBTABLE_PROBE_ENTRY(table, name, table_id,        \
                                          subtable, valid, data, fn)        \
        static const struct acpi_probe_entry __acpi_probe_##name        \
                __used __section("__" #table "_acpi_probe_table") = {        \
                        .id = table_id,                                        \
                        .type = subtable,                                \
                        .subtable_valid = valid,                        \
                        .probe_subtbl = fn,                                \
                        .driver_data = data,                                \
                }

#define ACPI_PROBE_TABLE(name)                __##name##_acpi_probe_table
#define ACPI_PROBE_TABLE_END(name)        __##name##_acpi_probe_table_end

int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr);

#define acpi_probe_device_table(t)                                        \
        ({                                                                 \
                extern struct acpi_probe_entry ACPI_PROBE_TABLE(t),        \
                                               ACPI_PROBE_TABLE_END(t);        \
                __acpi_probe_device_table(&ACPI_PROBE_TABLE(t),                \
                                          (&ACPI_PROBE_TABLE_END(t) -        \
                                           &ACPI_PROBE_TABLE(t)));        \
        })
#else
static inline int acpi_dev_get_property(struct acpi_device *adev,
                                        const char *name, acpi_object_type type,
                                        const union acpi_object **obj)
{
        return -ENXIO;
}

static inline int
__acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
                                const char *name, size_t index, size_t num_args,
                                struct fwnode_reference_args *args)
{
        return -ENXIO;
}

static inline int
acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
                                 const char *name, size_t index,
                                 struct fwnode_reference_args *args)
{
        return -ENXIO;
}

static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode,
                                     const char *propname,
                                     void **valptr)
{
        return -ENXIO;
}

static inline struct fwnode_handle *
acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                      struct fwnode_handle *child)
{
        return NULL;
}

static inline struct fwnode_handle *
acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
                             struct fwnode_handle *prev)
{
        return ERR_PTR(-ENXIO);
}

static inline int
acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
                               struct fwnode_handle **remote,
                               struct fwnode_handle **port,
                               struct fwnode_handle **endpoint)
{
        return -ENXIO;
}

#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \
        static const void * __acpi_table_##name[]                        \
                __attribute__((unused))                                        \
                 = { (void *) table_id,                                        \
                     (void *) subtable,                                        \
                     (void *) valid,                                        \
                     (void *) fn,                                        \
                     (void *) data }

#define acpi_probe_device_table(t)        ({ int __r = 0; __r;})
#endif

#ifdef CONFIG_ACPI_TABLE_UPGRADE
void acpi_table_upgrade(void);
#else
static inline void acpi_table_upgrade(void) { }
#endif

#if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG)
extern bool acpi_has_watchdog(void);
#else
static inline bool acpi_has_watchdog(void) { return false; }
#endif

#ifdef CONFIG_ACPI_SPCR_TABLE
extern bool qdf2400_e44_present;
int acpi_parse_spcr(bool enable_earlycon, bool enable_console);
#else
static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console)
{
        return 0;
}
#endif

#if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI)
int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res);
#else
static inline
int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res)
{
        return -EINVAL;
}
#endif

#ifdef CONFIG_ACPI_LPIT
int lpit_read_residency_count_address(u64 *address);
#else
static inline int lpit_read_residency_count_address(u64 *address)
{
        return -EINVAL;
}
#endif

#ifdef CONFIG_ACPI_PROCESSOR_IDLE
#ifndef arch_get_idle_state_flags
static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
{
        return 0;
}
#endif
#endif /* CONFIG_ACPI_PROCESSOR_IDLE */

#ifdef CONFIG_ACPI_PPTT
int acpi_pptt_cpu_is_thread(unsigned int cpu);
int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
#else
static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
{
        return -EINVAL;
}
static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
{
        return -EINVAL;
}
static inline int find_acpi_cpu_topology_cluster(unsigned int cpu)
{
        return -EINVAL;
}
static inline int find_acpi_cpu_topology_package(unsigned int cpu)
{
        return -EINVAL;
}
static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
        return -EINVAL;
}
#endif

#ifdef CONFIG_ARM64
void acpi_arm_init(void);
#else
static inline void acpi_arm_init(void) { }
#endif

#ifdef CONFIG_ACPI_PCC
void acpi_init_pcc(void);
#else
static inline void acpi_init_pcc(void) { }
#endif

#ifdef CONFIG_ACPI_FFH
void acpi_init_ffh(void);
extern int acpi_ffh_address_space_arch_setup(void *handler_ctxt,
                                             void **region_ctxt);
extern int acpi_ffh_address_space_arch_handler(acpi_integer *value,
                                               void *region_context);
#else
static inline void acpi_init_ffh(void) { }
#endif

#ifdef CONFIG_ACPI
extern void acpi_device_notify(struct device *dev);
extern void acpi_device_notify_remove(struct device *dev);
#else
static inline void acpi_device_notify(struct device *dev) { }
static inline void acpi_device_notify_remove(struct device *dev) { }
#endif

static inline void acpi_use_parent_companion(struct device *dev)
{
        ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
}

#ifdef CONFIG_ACPI_HMAT
int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
                                   enum access_coordinate_class access);
#else
static inline int hmat_update_target_coordinates(int nid,
                                                 struct access_coordinate *coord,
                                                 enum access_coordinate_class access)
{
        return -EOPNOTSUPP;
}
#endif

#ifdef CONFIG_ACPI_NUMA
bool acpi_node_backed_by_real_pxm(int nid);
#else
static inline bool acpi_node_backed_by_real_pxm(int nid)
{
        return false;
}
#endif

#endif        /*_LINUX_ACPI_H*/
















































  205 


  205 






















































































































































    1 































    2 





    5 


    4 




    5 






    5 



    4 












   12 





   12 



   10 








   10 


































   13 

   24 


    5 


    7 

    1 










    1 








    1 

    1 

    1 










    3 


    3 



    2 

    1 











    1 













    1 





    1 

    1 










    7 




    5 





    4 






    4 









    7 













    5 



    1 
    5 
    1 
    5 
    2 
    5 
    1 
    5 
    1 
    5 
    1 
    5 
    1 













   13 



    2 
   13 
    3 
   13 
    3 
   13 
    3 
   13 
    2 
   12 
    2 
   13 
    2 



















   12 












    1 











    1 











    5 
























    9 
    4 











    9 


    7 







    9 
    2 




    8 


    8 



    7 
    1 






    7 
    2 



    7 
    6 
    7 
    6 























   10 






    9 


    9 
    9 


    9 
    4 





    5 

    9 

    7 

    9 

   10 








    2 
   24 
    2 





    6 







    4 

    5 












    1 

    1 






    6 




    6 



    5 








    1 





    1 






    1 

    1 






    1 












  260 




    1 



    2 



    1 


    3 


    2 

    1 

  259 








    1 


    1 



    2 


    2 


    2 


    5 


    7 


    6 
    4 

    2 



    3 


    6 


    2 


    6 


    1 


    1 


  208 
   37 






  262 







  258 



  258 

  202 


  256 
  255 











































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/fs/ioctl.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/syscalls.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/compat.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
#include <linux/mount.h>
#include <linux/fscrypt.h>
#include <linux/fileattr.h>

#include "internal.h"

#include <asm/ioctls.h>

/* So that the fiemap access checks can't overflow on 32 bit machines. */
#define FIEMAP_MAX_EXTENTS        (UINT_MAX / sizeof(struct fiemap_extent))

/**
 * vfs_ioctl - call filesystem specific ioctl methods
 * @filp:        open file to invoke ioctl method on
 * @cmd:        ioctl command to execute
 * @arg:        command-specific argument for ioctl
 *
 * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise
 * returns -ENOTTY.
 *
 * Returns 0 on success, -errno on error.
 */
long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
        int error = -ENOTTY;

        if (!filp->f_op->unlocked_ioctl)
                goto out;

        error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
        if (error == -ENOIOCTLCMD)
                error = -ENOTTY;
 out:
        return error;
}
EXPORT_SYMBOL(vfs_ioctl);

static int ioctl_fibmap(struct file *filp, int __user *p)
{
        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        int error, ur_block;
        sector_t block;

        if (!capable(CAP_SYS_RAWIO))
                return -EPERM;

        error = get_user(ur_block, p);
        if (error)
                return error;

        if (ur_block < 0)
                return -EINVAL;

        block = ur_block;
        error = bmap(inode, &block);

        if (block > INT_MAX) {
                error = -ERANGE;
                pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n",
                                    current->comm, task_pid_nr(current),
                                    sb->s_id, filp);
        }

        if (error)
                ur_block = 0;
        else
                ur_block = block;

        if (put_user(ur_block, p))
                error = -EFAULT;

        return error;
}

/**
 * fiemap_fill_next_extent - Fiemap helper function
 * @fieinfo:        Fiemap context passed into ->fiemap
 * @logical:        Extent logical start offset, in bytes
 * @phys:        Extent physical start offset, in bytes
 * @len:        Extent length, in bytes
 * @flags:        FIEMAP_EXTENT flags that describe this extent
 *
 * Called from file system ->fiemap callback. Will populate extent
 * info as passed in via arguments and copy to user memory. On
 * success, extent count on fieinfo is incremented.
 *
 * Returns 0 on success, -errno on error, 1 if this was the last
 * extent that will fit in user array.
 */
int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
                            u64 phys, u64 len, u32 flags)
{
        struct fiemap_extent extent;
        struct fiemap_extent __user *dest = fieinfo->fi_extents_start;

        /* only count the extents */
        if (fieinfo->fi_extents_max == 0) {
                fieinfo->fi_extents_mapped++;
                return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
        }

        if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
                return 1;

#define SET_UNKNOWN_FLAGS        (FIEMAP_EXTENT_DELALLOC)
#define SET_NO_UNMOUNTED_IO_FLAGS        (FIEMAP_EXTENT_DATA_ENCRYPTED)
#define SET_NOT_ALIGNED_FLAGS        (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)

        if (flags & SET_UNKNOWN_FLAGS)
                flags |= FIEMAP_EXTENT_UNKNOWN;
        if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
                flags |= FIEMAP_EXTENT_ENCODED;
        if (flags & SET_NOT_ALIGNED_FLAGS)
                flags |= FIEMAP_EXTENT_NOT_ALIGNED;

        memset(&extent, 0, sizeof(extent));
        extent.fe_logical = logical;
        extent.fe_physical = phys;
        extent.fe_length = len;
        extent.fe_flags = flags;

        dest += fieinfo->fi_extents_mapped;
        if (copy_to_user(dest, &extent, sizeof(extent)))
                return -EFAULT;

        fieinfo->fi_extents_mapped++;
        if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
                return 1;
        return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
}
EXPORT_SYMBOL(fiemap_fill_next_extent);

/**
 * fiemap_prep - check validity of requested flags for fiemap
 * @inode:        Inode to operate on
 * @fieinfo:        Fiemap context passed into ->fiemap
 * @start:        Start of the mapped range
 * @len:        Length of the mapped range, can be truncated by this function.
 * @supported_flags:        Set of fiemap flags that the file system understands
 *
 * This function must be called from each ->fiemap instance to validate the
 * fiemap request against the file system parameters.
 *
 * Returns 0 on success, or a negative error on failure.
 */
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
                u64 start, u64 *len, u32 supported_flags)
{
        u64 maxbytes = inode->i_sb->s_maxbytes;
        u32 incompat_flags;
        int ret = 0;

        if (*len == 0)
                return -EINVAL;
        if (start >= maxbytes)
                return -EFBIG;

        /*
         * Shrink request scope to what the fs can actually handle.
         */
        if (*len > maxbytes || (maxbytes - *len) < start)
                *len = maxbytes - start;

        supported_flags |= FIEMAP_FLAG_SYNC;
        supported_flags &= FIEMAP_FLAGS_COMPAT;
        incompat_flags = fieinfo->fi_flags & ~supported_flags;
        if (incompat_flags) {
                fieinfo->fi_flags = incompat_flags;
                return -EBADR;
        }

        if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
                ret = filemap_write_and_wait(inode->i_mapping);
        return ret;
}
EXPORT_SYMBOL(fiemap_prep);

static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
{
        struct fiemap fiemap;
        struct fiemap_extent_info fieinfo = { 0, };
        struct inode *inode = file_inode(filp);
        int error;

        if (!inode->i_op->fiemap)
                return -EOPNOTSUPP;

        if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
                return -EFAULT;

        if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
                return -EINVAL;

        fieinfo.fi_flags = fiemap.fm_flags;
        fieinfo.fi_extents_max = fiemap.fm_extent_count;
        fieinfo.fi_extents_start = ufiemap->fm_extents;

        error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start,
                        fiemap.fm_length);

        fiemap.fm_flags = fieinfo.fi_flags;
        fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
        if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
                error = -EFAULT;

        return error;
}

static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
                             u64 off, u64 olen, u64 destoff)
{
        struct fd src_file = fdget(srcfd);
        loff_t cloned;
        int ret;

        if (!src_file.file)
                return -EBADF;
        cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
                                      olen, 0);
        if (cloned < 0)
                ret = cloned;
        else if (olen && cloned != olen)
                ret = -EINVAL;
        else
                ret = 0;
        fdput(src_file);
        return ret;
}

static long ioctl_file_clone_range(struct file *file,
                                   struct file_clone_range __user *argp)
{
        struct file_clone_range args;

        if (copy_from_user(&args, argp, sizeof(args)))
                return -EFAULT;
        return ioctl_file_clone(file, args.src_fd, args.src_offset,
                                args.src_length, args.dest_offset);
}

/*
 * This provides compatibility with legacy XFS pre-allocation ioctls
 * which predate the fallocate syscall.
 *
 * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
 * are used here, rest are ignored.
 */
static int ioctl_preallocate(struct file *filp, int mode, void __user *argp)
{
        struct inode *inode = file_inode(filp);
        struct space_resv sr;

        if (copy_from_user(&sr, argp, sizeof(sr)))
                return -EFAULT;

        switch (sr.l_whence) {
        case SEEK_SET:
                break;
        case SEEK_CUR:
                sr.l_start += filp->f_pos;
                break;
        case SEEK_END:
                sr.l_start += i_size_read(inode);
                break;
        default:
                return -EINVAL;
        }

        return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start,
                        sr.l_len);
}

/* on ia32 l_start is on a 32-bit boundary */
#if defined CONFIG_COMPAT && defined(CONFIG_X86_64)
/* just account for different alignment */
static int compat_ioctl_preallocate(struct file *file, int mode,
                                    struct space_resv_32 __user *argp)
{
        struct inode *inode = file_inode(file);
        struct space_resv_32 sr;

        if (copy_from_user(&sr, argp, sizeof(sr)))
                return -EFAULT;

        switch (sr.l_whence) {
        case SEEK_SET:
                break;
        case SEEK_CUR:
                sr.l_start += file->f_pos;
                break;
        case SEEK_END:
                sr.l_start += i_size_read(inode);
                break;
        default:
                return -EINVAL;
        }

        return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
}
#endif

static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p)
{
        switch (cmd) {
        case FIBMAP:
                return ioctl_fibmap(filp, p);
        case FS_IOC_RESVSP:
        case FS_IOC_RESVSP64:
                return ioctl_preallocate(filp, 0, p);
        case FS_IOC_UNRESVSP:
        case FS_IOC_UNRESVSP64:
                return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p);
        case FS_IOC_ZERO_RANGE:
                return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p);
        }

        return -ENOIOCTLCMD;
}

static int ioctl_fionbio(struct file *filp, int __user *argp)
{
        unsigned int flag;
        int on, error;

        error = get_user(on, argp);
        if (error)
                return error;
        flag = O_NONBLOCK;
#ifdef __sparc__
        /* SunOS compatibility item. */
        if (O_NONBLOCK != O_NDELAY)
                flag |= O_NDELAY;
#endif
        spin_lock(&filp->f_lock);
        if (on)
                filp->f_flags |= flag;
        else
                filp->f_flags &= ~flag;
        spin_unlock(&filp->f_lock);
        return error;
}

static int ioctl_fioasync(unsigned int fd, struct file *filp,
                          int __user *argp)
{
        unsigned int flag;
        int on, error;

        error = get_user(on, argp);
        if (error)
                return error;
        flag = on ? FASYNC : 0;

        /* Did FASYNC state change ? */
        if ((flag ^ filp->f_flags) & FASYNC) {
                if (filp->f_op->fasync)
                        /* fasync() adjusts filp->f_flags */
                        error = filp->f_op->fasync(fd, filp, on);
                else
                        error = -ENOTTY;
        }
        return error < 0 ? error : 0;
}

static int ioctl_fsfreeze(struct file *filp)
{
        struct super_block *sb = file_inode(filp)->i_sb;

        if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        /* If filesystem doesn't support freeze feature, return. */
        if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
                return -EOPNOTSUPP;

        /* Freeze */
        if (sb->s_op->freeze_super)
                return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
        return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
}

static int ioctl_fsthaw(struct file *filp)
{
        struct super_block *sb = file_inode(filp)->i_sb;

        if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                return -EPERM;

        /* Thaw */
        if (sb->s_op->thaw_super)
                return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
        return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
}

static int ioctl_file_dedupe_range(struct file *file,
                                   struct file_dedupe_range __user *argp)
{
        struct file_dedupe_range *same = NULL;
        int ret;
        unsigned long size;
        u16 count;

        if (get_user(count, &argp->dest_count)) {
                ret = -EFAULT;
                goto out;
        }

        size = offsetof(struct file_dedupe_range, info[count]);
        if (size > PAGE_SIZE) {
                ret = -ENOMEM;
                goto out;
        }

        same = memdup_user(argp, size);
        if (IS_ERR(same)) {
                ret = PTR_ERR(same);
                same = NULL;
                goto out;
        }

        same->dest_count = count;
        ret = vfs_dedupe_file_range(file, same);
        if (ret)
                goto out;

        ret = copy_to_user(argp, same, size);
        if (ret)
                ret = -EFAULT;

out:
        kfree(same);
        return ret;
}

/**
 * fileattr_fill_xflags - initialize fileattr with xflags
 * @fa:                fileattr pointer
 * @xflags:        FS_XFLAG_* flags
 *
 * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags).  All
 * other fields are zeroed.
 */
void fileattr_fill_xflags(struct fileattr *fa, u32 xflags)
{
        memset(fa, 0, sizeof(*fa));
        fa->fsx_valid = true;
        fa->fsx_xflags = xflags;
        if (fa->fsx_xflags & FS_XFLAG_IMMUTABLE)
                fa->flags |= FS_IMMUTABLE_FL;
        if (fa->fsx_xflags & FS_XFLAG_APPEND)
                fa->flags |= FS_APPEND_FL;
        if (fa->fsx_xflags & FS_XFLAG_SYNC)
                fa->flags |= FS_SYNC_FL;
        if (fa->fsx_xflags & FS_XFLAG_NOATIME)
                fa->flags |= FS_NOATIME_FL;
        if (fa->fsx_xflags & FS_XFLAG_NODUMP)
                fa->flags |= FS_NODUMP_FL;
        if (fa->fsx_xflags & FS_XFLAG_DAX)
                fa->flags |= FS_DAX_FL;
        if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
                fa->flags |= FS_PROJINHERIT_FL;
}
EXPORT_SYMBOL(fileattr_fill_xflags);

/**
 * fileattr_fill_flags - initialize fileattr with flags
 * @fa:                fileattr pointer
 * @flags:        FS_*_FL flags
 *
 * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
 * All other fields are zeroed.
 */
void fileattr_fill_flags(struct fileattr *fa, u32 flags)
{
        memset(fa, 0, sizeof(*fa));
        fa->flags_valid = true;
        fa->flags = flags;
        if (fa->flags & FS_SYNC_FL)
                fa->fsx_xflags |= FS_XFLAG_SYNC;
        if (fa->flags & FS_IMMUTABLE_FL)
                fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
        if (fa->flags & FS_APPEND_FL)
                fa->fsx_xflags |= FS_XFLAG_APPEND;
        if (fa->flags & FS_NODUMP_FL)
                fa->fsx_xflags |= FS_XFLAG_NODUMP;
        if (fa->flags & FS_NOATIME_FL)
                fa->fsx_xflags |= FS_XFLAG_NOATIME;
        if (fa->flags & FS_DAX_FL)
                fa->fsx_xflags |= FS_XFLAG_DAX;
        if (fa->flags & FS_PROJINHERIT_FL)
                fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
}
EXPORT_SYMBOL(fileattr_fill_flags);

/**
 * vfs_fileattr_get - retrieve miscellaneous file attributes
 * @dentry:        the object to retrieve from
 * @fa:                fileattr pointer
 *
 * Call i_op->fileattr_get() callback, if exists.
 *
 * Return: 0 on success, or a negative error on failure.
 */
int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
        struct inode *inode = d_inode(dentry);

        if (!inode->i_op->fileattr_get)
                return -ENOIOCTLCMD;

        return inode->i_op->fileattr_get(dentry, fa);
}
EXPORT_SYMBOL(vfs_fileattr_get);

/**
 * copy_fsxattr_to_user - copy fsxattr to userspace.
 * @fa:                fileattr pointer
 * @ufa:        fsxattr user pointer
 *
 * Return: 0 on success, or -EFAULT on failure.
 */
int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa)
{
        struct fsxattr xfa;

        memset(&xfa, 0, sizeof(xfa));
        xfa.fsx_xflags = fa->fsx_xflags;
        xfa.fsx_extsize = fa->fsx_extsize;
        xfa.fsx_nextents = fa->fsx_nextents;
        xfa.fsx_projid = fa->fsx_projid;
        xfa.fsx_cowextsize = fa->fsx_cowextsize;

        if (copy_to_user(ufa, &xfa, sizeof(xfa)))
                return -EFAULT;

        return 0;
}
EXPORT_SYMBOL(copy_fsxattr_to_user);

static int copy_fsxattr_from_user(struct fileattr *fa,
                                  struct fsxattr __user *ufa)
{
        struct fsxattr xfa;

        if (copy_from_user(&xfa, ufa, sizeof(xfa)))
                return -EFAULT;

        fileattr_fill_xflags(fa, xfa.fsx_xflags);
        fa->fsx_extsize = xfa.fsx_extsize;
        fa->fsx_nextents = xfa.fsx_nextents;
        fa->fsx_projid = xfa.fsx_projid;
        fa->fsx_cowextsize = xfa.fsx_cowextsize;

        return 0;
}

/*
 * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
 * any invalid configurations.
 *
 * Note: must be called with inode lock held.
 */
static int fileattr_set_prepare(struct inode *inode,
                              const struct fileattr *old_ma,
                              struct fileattr *fa)
{
        int err;

        /*
         * The IMMUTABLE and APPEND_ONLY flags can only be changed by
         * the relevant capability.
         */
        if ((fa->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;

        err = fscrypt_prepare_setflags(inode, old_ma->flags, fa->flags);
        if (err)
                return err;

        /*
         * Project Quota ID state is only allowed to change from within the init
         * namespace. Enforce that restriction only if we are trying to change
         * the quota ID state. Everything else is allowed in user namespaces.
         */
        if (current_user_ns() != &init_user_ns) {
                if (old_ma->fsx_projid != fa->fsx_projid)
                        return -EINVAL;
                if ((old_ma->fsx_xflags ^ fa->fsx_xflags) &
                                FS_XFLAG_PROJINHERIT)
                        return -EINVAL;
        } else {
                /*
                 * Caller is allowed to change the project ID. If it is being
                 * changed, make sure that the new value is valid.
                 */
                if (old_ma->fsx_projid != fa->fsx_projid &&
                    !projid_valid(make_kprojid(&init_user_ns, fa->fsx_projid)))
                        return -EINVAL;
        }

        /* Check extent size hints. */
        if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
                return -EINVAL;

        if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
                        !S_ISDIR(inode->i_mode))
                return -EINVAL;

        if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
            !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
                return -EINVAL;

        /*
         * It is only valid to set the DAX flag on regular files and
         * directories on filesystems.
         */
        if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
            !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
                return -EINVAL;

        /* Extent size hints of zero turn off the flags. */
        if (fa->fsx_extsize == 0)
                fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
        if (fa->fsx_cowextsize == 0)
                fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;

        return 0;
}

/**
 * vfs_fileattr_set - change miscellaneous file attributes
 * @idmap:        idmap of the mount
 * @dentry:        the object to change
 * @fa:                fileattr pointer
 *
 * After verifying permissions, call i_op->fileattr_set() callback, if
 * exists.
 *
 * Verifying attributes involves retrieving current attributes with
 * i_op->fileattr_get(), this also allows initializing attributes that have
 * not been set by the caller to current values.  Inode lock is held
 * thoughout to prevent racing with another instance.
 *
 * Return: 0 on success, or a negative error on failure.
 */
int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
                     struct fileattr *fa)
{
        struct inode *inode = d_inode(dentry);
        struct fileattr old_ma = {};
        int err;

        if (!inode->i_op->fileattr_set)
                return -ENOIOCTLCMD;

        if (!inode_owner_or_capable(idmap, inode))
                return -EPERM;

        inode_lock(inode);
        err = vfs_fileattr_get(dentry, &old_ma);
        if (!err) {
                /* initialize missing bits from old_ma */
                if (fa->flags_valid) {
                        fa->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
                        fa->fsx_extsize = old_ma.fsx_extsize;
                        fa->fsx_nextents = old_ma.fsx_nextents;
                        fa->fsx_projid = old_ma.fsx_projid;
                        fa->fsx_cowextsize = old_ma.fsx_cowextsize;
                } else {
                        fa->flags |= old_ma.flags & ~FS_COMMON_FL;
                }
                err = fileattr_set_prepare(inode, &old_ma, fa);
                if (!err)
                        err = inode->i_op->fileattr_set(idmap, dentry, fa);
        }
        inode_unlock(inode);

        return err;
}
EXPORT_SYMBOL(vfs_fileattr_set);

static int ioctl_getflags(struct file *file, unsigned int __user *argp)
{
        struct fileattr fa = { .flags_valid = true }; /* hint only */
        int err;

        err = vfs_fileattr_get(file->f_path.dentry, &fa);
        if (!err)
                err = put_user(fa.flags, argp);
        return err;
}

static int ioctl_setflags(struct file *file, unsigned int __user *argp)
{
        struct mnt_idmap *idmap = file_mnt_idmap(file);
        struct dentry *dentry = file->f_path.dentry;
        struct fileattr fa;
        unsigned int flags;
        int err;

        err = get_user(flags, argp);
        if (!err) {
                err = mnt_want_write_file(file);
                if (!err) {
                        fileattr_fill_flags(&fa, flags);
                        err = vfs_fileattr_set(idmap, dentry, &fa);
                        mnt_drop_write_file(file);
                }
        }
        return err;
}

static int ioctl_fsgetxattr(struct file *file, void __user *argp)
{
        struct fileattr fa = { .fsx_valid = true }; /* hint only */
        int err;

        err = vfs_fileattr_get(file->f_path.dentry, &fa);
        if (!err)
                err = copy_fsxattr_to_user(&fa, argp);

        return err;
}

static int ioctl_fssetxattr(struct file *file, void __user *argp)
{
        struct mnt_idmap *idmap = file_mnt_idmap(file);
        struct dentry *dentry = file->f_path.dentry;
        struct fileattr fa;
        int err;

        err = copy_fsxattr_from_user(&fa, argp);
        if (!err) {
                err = mnt_want_write_file(file);
                if (!err) {
                        err = vfs_fileattr_set(idmap, dentry, &fa);
                        mnt_drop_write_file(file);
                }
        }
        return err;
}

static int ioctl_getfsuuid(struct file *file, void __user *argp)
{
        struct super_block *sb = file_inode(file)->i_sb;
        struct fsuuid2 u = { .len = sb->s_uuid_len, };

        if (!sb->s_uuid_len)
                return -ENOTTY;

        memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len);

        return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
}

static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
{
        struct super_block *sb = file_inode(file)->i_sb;

        if (!strlen(sb->s_sysfs_name))
                return -ENOTTY;

        struct fs_sysfs_path u = {};

        u.len = scnprintf(u.name, sizeof(u.name), "%s/%s", sb->s_type->name, sb->s_sysfs_name);

        return copy_to_user(argp, &u, sizeof(u)) ? -EFAULT : 0;
}

/*
 * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
 * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
 *
 * When you add any new common ioctls to the switches above and below,
 * please ensure they have compatible arguments in compat mode.
 */
static int do_vfs_ioctl(struct file *filp, unsigned int fd,
                        unsigned int cmd, unsigned long arg)
{
        void __user *argp = (void __user *)arg;
        struct inode *inode = file_inode(filp);

        switch (cmd) {
        case FIOCLEX:
                set_close_on_exec(fd, 1);
                return 0;

        case FIONCLEX:
                set_close_on_exec(fd, 0);
                return 0;

        case FIONBIO:
                return ioctl_fionbio(filp, argp);

        case FIOASYNC:
                return ioctl_fioasync(fd, filp, argp);

        case FIOQSIZE:
                if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
                    S_ISLNK(inode->i_mode)) {
                        loff_t res = inode_get_bytes(inode);
                        return copy_to_user(argp, &res, sizeof(res)) ?
                                            -EFAULT : 0;
                }

                return -ENOTTY;

        case FIFREEZE:
                return ioctl_fsfreeze(filp);

        case FITHAW:
                return ioctl_fsthaw(filp);

        case FS_IOC_FIEMAP:
                return ioctl_fiemap(filp, argp);

        case FIGETBSZ:
                /* anon_bdev filesystems may not have a block size */
                if (!inode->i_sb->s_blocksize)
                        return -EINVAL;

                return put_user(inode->i_sb->s_blocksize, (int __user *)argp);

        case FICLONE:
                return ioctl_file_clone(filp, arg, 0, 0, 0);

        case FICLONERANGE:
                return ioctl_file_clone_range(filp, argp);

        case FIDEDUPERANGE:
                return ioctl_file_dedupe_range(filp, argp);

        case FIONREAD:
                if (!S_ISREG(inode->i_mode))
                        return vfs_ioctl(filp, cmd, arg);

                return put_user(i_size_read(inode) - filp->f_pos,
                                (int __user *)argp);

        case FS_IOC_GETFLAGS:
                return ioctl_getflags(filp, argp);

        case FS_IOC_SETFLAGS:
                return ioctl_setflags(filp, argp);

        case FS_IOC_FSGETXATTR:
                return ioctl_fsgetxattr(filp, argp);

        case FS_IOC_FSSETXATTR:
                return ioctl_fssetxattr(filp, argp);

        case FS_IOC_GETFSUUID:
                return ioctl_getfsuuid(filp, argp);

        case FS_IOC_GETFSSYSFSPATH:
                return ioctl_get_fs_sysfs_path(filp, argp);

        default:
                if (S_ISREG(inode->i_mode))
                        return file_ioctl(filp, cmd, argp);
                break;
        }

        return -ENOIOCTLCMD;
}

SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
        struct fd f = fdget(fd);
        int error;

        if (!f.file)
                return -EBADF;

        error = security_file_ioctl(f.file, cmd, arg);
        if (error)
                goto out;

        error = do_vfs_ioctl(f.file, fd, cmd, arg);
        if (error == -ENOIOCTLCMD)
                error = vfs_ioctl(f.file, cmd, arg);

out:
        fdput(f);
        return error;
}

#ifdef CONFIG_COMPAT
/**
 * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
 * @file: The file to operate on.
 * @cmd: The ioctl command number.
 * @arg: The argument to the ioctl.
 *
 * This is not normally called as a function, but instead set in struct
 * file_operations as
 *
 *     .compat_ioctl = compat_ptr_ioctl,
 *
 * On most architectures, the compat_ptr_ioctl() just passes all arguments
 * to the corresponding ->ioctl handler. The exception is arch/s390, where
 * compat_ptr() clears the top bit of a 32-bit pointer value, so user space
 * pointers to the second 2GB alias the first 2GB, as is the case for
 * native 32-bit s390 user space.
 *
 * The compat_ptr_ioctl() function must therefore be used only with ioctl
 * functions that either ignore the argument or pass a pointer to a
 * compatible data type.
 *
 * If any ioctl command handled by fops->unlocked_ioctl passes a plain
 * integer instead of a pointer, or any of the passed data types
 * is incompatible between 32-bit and 64-bit architectures, a proper
 * handler is required instead of compat_ptr_ioctl.
 */
long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
        if (!file->f_op->unlocked_ioctl)
                return -ENOIOCTLCMD;

        return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
}
EXPORT_SYMBOL(compat_ptr_ioctl);

COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
                       compat_ulong_t, arg)
{
        struct fd f = fdget(fd);
        int error;

        if (!f.file)
                return -EBADF;

        error = security_file_ioctl_compat(f.file, cmd, arg);
        if (error)
                goto out;

        switch (cmd) {
        /* FICLONE takes an int argument, so don't use compat_ptr() */
        case FICLONE:
                error = ioctl_file_clone(f.file, arg, 0, 0, 0);
                break;

#if defined(CONFIG_X86_64)
        /* these get messy on amd64 due to alignment differences */
        case FS_IOC_RESVSP_32:
        case FS_IOC_RESVSP64_32:
                error = compat_ioctl_preallocate(f.file, 0, compat_ptr(arg));
                break;
        case FS_IOC_UNRESVSP_32:
        case FS_IOC_UNRESVSP64_32:
                error = compat_ioctl_preallocate(f.file, FALLOC_FL_PUNCH_HOLE,
                                compat_ptr(arg));
                break;
        case FS_IOC_ZERO_RANGE_32:
                error = compat_ioctl_preallocate(f.file, FALLOC_FL_ZERO_RANGE,
                                compat_ptr(arg));
                break;
#endif

        /*
         * These access 32-bit values anyway so no further handling is
         * necessary.
         */
        case FS_IOC32_GETFLAGS:
        case FS_IOC32_SETFLAGS:
                cmd = (cmd == FS_IOC32_GETFLAGS) ?
                        FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
                fallthrough;
        /*
         * everything else in do_vfs_ioctl() takes either a compatible
         * pointer argument or no argument -- call it with a modified
         * argument.
         */
        default:
                error = do_vfs_ioctl(f.file, fd, cmd,
                                     (unsigned long)compat_ptr(arg));
                if (error != -ENOIOCTLCMD)
                        break;

                if (f.file->f_op->compat_ioctl)
                        error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
                if (error == -ENOIOCTLCMD)
                        error = -ENOTTY;
                break;
        }

 out:
        fdput(f);

        return error;
}
#endif

































    3 

    3 
    3 






    3 

    3 

















    3 

































    3 











































































































































































    3 

    3 
    3 









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
// SPDX-License-Identifier: GPL-2.0-only
/*
 *
 *  Copyright (C) 2005 Mike Isely <isely@pobox.com>
 */

#include "pvrusb2-context.h"
#include "pvrusb2-io.h"
#include "pvrusb2-ioread.h"
#include "pvrusb2-hdw.h"
#include "pvrusb2-debug.h"
#include <linux/wait.h>
#include <linux/kthread.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/slab.h>

static struct pvr2_context *pvr2_context_exist_first;
static struct pvr2_context *pvr2_context_exist_last;
static struct pvr2_context *pvr2_context_notify_first;
static struct pvr2_context *pvr2_context_notify_last;
static DEFINE_MUTEX(pvr2_context_mutex);
static DECLARE_WAIT_QUEUE_HEAD(pvr2_context_sync_data);
static DECLARE_WAIT_QUEUE_HEAD(pvr2_context_cleanup_data);
static int pvr2_context_cleanup_flag;
static int pvr2_context_cleaned_flag;
static struct task_struct *pvr2_context_thread_ptr;


static void pvr2_context_set_notify(struct pvr2_context *mp, int fl)
{
        int signal_flag = 0;
        mutex_lock(&pvr2_context_mutex);
        if (fl) {
                if (!mp->notify_flag) {
                        signal_flag = (pvr2_context_notify_first == NULL);
                        mp->notify_prev = pvr2_context_notify_last;
                        mp->notify_next = NULL;
                        pvr2_context_notify_last = mp;
                        if (mp->notify_prev) {
                                mp->notify_prev->notify_next = mp;
                        } else {
                                pvr2_context_notify_first = mp;
                        }
                        mp->notify_flag = !0;
                }
        } else {
                if (mp->notify_flag) {
                        mp->notify_flag = 0;
                        if (mp->notify_next) {
                                mp->notify_next->notify_prev = mp->notify_prev;
                        } else {
                                pvr2_context_notify_last = mp->notify_prev;
                        }
                        if (mp->notify_prev) {
                                mp->notify_prev->notify_next = mp->notify_next;
                        } else {
                                pvr2_context_notify_first = mp->notify_next;
                        }
                }
        }
        mutex_unlock(&pvr2_context_mutex);
        if (signal_flag) wake_up(&pvr2_context_sync_data);
}


static void pvr2_context_destroy(struct pvr2_context *mp)
{
        pvr2_trace(PVR2_TRACE_CTXT,"pvr2_context %p (destroy)",mp);
        pvr2_hdw_destroy(mp->hdw);
        pvr2_context_set_notify(mp, 0);
        mutex_lock(&pvr2_context_mutex);
        if (mp->exist_next) {
                mp->exist_next->exist_prev = mp->exist_prev;
        } else {
                pvr2_context_exist_last = mp->exist_prev;
        }
        if (mp->exist_prev) {
                mp->exist_prev->exist_next = mp->exist_next;
        } else {
                pvr2_context_exist_first = mp->exist_next;
        }
        if (!pvr2_context_exist_first) {
                /* Trigger wakeup on control thread in case it is waiting
                   for an exit condition. */
                wake_up(&pvr2_context_sync_data);
        }
        mutex_unlock(&pvr2_context_mutex);
        kfree(mp);
}


static void pvr2_context_notify(void *ptr)
{
        struct pvr2_context *mp = ptr;

        pvr2_context_set_notify(mp,!0);
}


static void pvr2_context_check(struct pvr2_context *mp)
{
        struct pvr2_channel *ch1, *ch2;
        pvr2_trace(PVR2_TRACE_CTXT,
                   "pvr2_context %p (notify)", mp);
        if (!mp->initialized_flag && !mp->disconnect_flag) {
                mp->initialized_flag = !0;
                pvr2_trace(PVR2_TRACE_CTXT,
                           "pvr2_context %p (initialize)", mp);
                /* Finish hardware initialization */
                if (pvr2_hdw_initialize(mp->hdw, pvr2_context_notify, mp)) {
                        mp->video_stream.stream =
                                pvr2_hdw_get_video_stream(mp->hdw);
                        /* Trigger interface initialization.  By doing this
                           here initialization runs in our own safe and
                           cozy thread context. */
                        if (mp->setup_func) mp->setup_func(mp);
                } else {
                        pvr2_trace(PVR2_TRACE_CTXT,
                                   "pvr2_context %p (thread skipping setup)",
                                   mp);
                        /* Even though initialization did not succeed,
                           we're still going to continue anyway.  We need
                           to do this in order to await the expected
                           disconnect (which we will detect in the normal
                           course of operation). */
                }
        }

        for (ch1 = mp->mc_first; ch1; ch1 = ch2) {
                ch2 = ch1->mc_next;
                if (ch1->check_func) ch1->check_func(ch1);
        }

        if (mp->disconnect_flag && !mp->mc_first) {
                /* Go away... */
                pvr2_context_destroy(mp);
                return;
        }
}


static int pvr2_context_shutok(void)
{
        return pvr2_context_cleanup_flag && (pvr2_context_exist_first == NULL);
}


static int pvr2_context_thread_func(void *foo)
{
        struct pvr2_context *mp;

        pvr2_trace(PVR2_TRACE_CTXT,"pvr2_context thread start");

        do {
                while ((mp = pvr2_context_notify_first) != NULL) {
                        pvr2_context_set_notify(mp, 0);
                        pvr2_context_check(mp);
                }
                wait_event_interruptible(
                        pvr2_context_sync_data,
                        ((pvr2_context_notify_first != NULL) ||
                         pvr2_context_shutok()));
        } while (!pvr2_context_shutok());

        pvr2_context_cleaned_flag = !0;
        wake_up(&pvr2_context_cleanup_data);

        pvr2_trace(PVR2_TRACE_CTXT,"pvr2_context thread cleaned up");

        wait_event_interruptible(
                pvr2_context_sync_data,
                kthread_should_stop());

        pvr2_trace(PVR2_TRACE_CTXT,"pvr2_context thread end");

        return 0;
}


int pvr2_context_global_init(void)
{
        pvr2_context_thread_ptr = kthread_run(pvr2_context_thread_func,
                                              NULL,
                                              "pvrusb2-context");
        return IS_ERR(pvr2_context_thread_ptr) ? -ENOMEM : 0;
}


void pvr2_context_global_done(void)
{
        pvr2_context_cleanup_flag = !0;
        wake_up(&pvr2_context_sync_data);
        wait_event_interruptible(
                pvr2_context_cleanup_data,
                pvr2_context_cleaned_flag);
        kthread_stop(pvr2_context_thread_ptr);
}


struct pvr2_context *pvr2_context_create(
        struct usb_interface *intf,
        const struct usb_device_id *devid,
        void (*setup_func)(struct pvr2_context *))
{
        struct pvr2_context *mp = NULL;
        mp = kzalloc(sizeof(*mp),GFP_KERNEL);
        if (!mp) goto done;
        pvr2_trace(PVR2_TRACE_CTXT,"pvr2_context %p (create)",mp);
        mp->setup_func = setup_func;
        mutex_init(&mp->mutex);
        mutex_lock(&pvr2_context_mutex);
        mp->exist_prev = pvr2_context_exist_last;
        mp->exist_next = NULL;
        pvr2_context_exist_last = mp;
        if (mp->exist_prev) {
                mp->exist_prev->exist_next = mp;
        } else {
                pvr2_context_exist_first = mp;
        }
        mutex_unlock(&pvr2_context_mutex);
        mp->hdw = pvr2_hdw_create(intf,devid);
        if (!mp->hdw) {
                pvr2_context_destroy(mp);
                mp = NULL;
                goto done;
        }
        pvr2_context_set_notify(mp, !0);
 done:
        return mp;
}


static void pvr2_context_reset_input_limits(struct pvr2_context *mp)
{
        unsigned int tmsk,mmsk;
        struct pvr2_channel *cp;
        struct pvr2_hdw *hdw = mp->hdw;
        mmsk = pvr2_hdw_get_input_available(hdw);
        tmsk = mmsk;
        for (cp = mp->mc_first; cp; cp = cp->mc_next) {
                if (!cp->input_mask) continue;
                tmsk &= cp->input_mask;
        }
        pvr2_hdw_set_input_allowed(hdw,mmsk,tmsk);
        pvr2_hdw_commit_ctl(hdw);
}


static void pvr2_context_enter(struct pvr2_context *mp)
{
        mutex_lock(&mp->mutex);
}


static void pvr2_context_exit(struct pvr2_context *mp)
{
        int destroy_flag = 0;
        if (!(mp->mc_first || !mp->disconnect_flag)) {
                destroy_flag = !0;
        }
        mutex_unlock(&mp->mutex);
        if (destroy_flag) pvr2_context_notify(mp);
}


void pvr2_context_disconnect(struct pvr2_context *mp)
{
        pvr2_hdw_disconnect(mp->hdw);
        if (!pvr2_context_shutok())
                pvr2_context_notify(mp);
        mp->disconnect_flag = !0;
}


void pvr2_channel_init(struct pvr2_channel *cp,struct pvr2_context *mp)
{
        pvr2_context_enter(mp);
        cp->hdw = mp->hdw;
        cp->mc_head = mp;
        cp->mc_next = NULL;
        cp->mc_prev = mp->mc_last;
        if (mp->mc_last) {
                mp->mc_last->mc_next = cp;
        } else {
                mp->mc_first = cp;
        }
        mp->mc_last = cp;
        pvr2_context_exit(mp);
}


static void pvr2_channel_disclaim_stream(struct pvr2_channel *cp)
{
        if (!cp->stream) return;
        pvr2_stream_kill(cp->stream->stream);
        cp->stream->user = NULL;
        cp->stream = NULL;
}


void pvr2_channel_done(struct pvr2_channel *cp)
{
        struct pvr2_context *mp = cp->mc_head;
        pvr2_context_enter(mp);
        cp->input_mask = 0;
        pvr2_channel_disclaim_stream(cp);
        pvr2_context_reset_input_limits(mp);
        if (cp->mc_next) {
                cp->mc_next->mc_prev = cp->mc_prev;
        } else {
                mp->mc_last = cp->mc_prev;
        }
        if (cp->mc_prev) {
                cp->mc_prev->mc_next = cp->mc_next;
        } else {
                mp->mc_first = cp->mc_next;
        }
        cp->hdw = NULL;
        pvr2_context_exit(mp);
}


int pvr2_channel_limit_inputs(struct pvr2_channel *cp,unsigned int cmsk)
{
        unsigned int tmsk,mmsk;
        int ret = 0;
        struct pvr2_channel *p2;
        struct pvr2_hdw *hdw = cp->hdw;

        mmsk = pvr2_hdw_get_input_available(hdw);
        cmsk &= mmsk;
        if (cmsk == cp->input_mask) {
                /* No change; nothing to do */
                return 0;
        }

        pvr2_context_enter(cp->mc_head);
        do {
                if (!cmsk) {
                        cp->input_mask = 0;
                        pvr2_context_reset_input_limits(cp->mc_head);
                        break;
                }
                tmsk = mmsk;
                for (p2 = cp->mc_head->mc_first; p2; p2 = p2->mc_next) {
                        if (p2 == cp) continue;
                        if (!p2->input_mask) continue;
                        tmsk &= p2->input_mask;
                }
                if (!(tmsk & cmsk)) {
                        ret = -EPERM;
                        break;
                }
                tmsk &= cmsk;
                if ((ret = pvr2_hdw_set_input_allowed(hdw,mmsk,tmsk)) != 0) {
                        /* Internal failure changing allowed list; probably
                           should not happen, but react if it does. */
                        break;
                }
                cp->input_mask = cmsk;
                pvr2_hdw_commit_ctl(hdw);
        } while (0);
        pvr2_context_exit(cp->mc_head);
        return ret;
}


unsigned int pvr2_channel_get_limited_inputs(struct pvr2_channel *cp)
{
        return cp->input_mask;
}


int pvr2_channel_claim_stream(struct pvr2_channel *cp,
                              struct pvr2_context_stream *sp)
{
        int code = 0;
        pvr2_context_enter(cp->mc_head); do {
                if (sp == cp->stream) break;
                if (sp && sp->user) {
                        code = -EBUSY;
                        break;
                }
                pvr2_channel_disclaim_stream(cp);
                if (!sp) break;
                sp->user = cp;
                cp->stream = sp;
        } while (0);
        pvr2_context_exit(cp->mc_head);
        return code;
}


// This is the marker for the real beginning of a legitimate mpeg2 stream.
static char stream_sync_key[] = {
        0x00, 0x00, 0x01, 0xba,
};

struct pvr2_ioread *pvr2_channel_create_mpeg_stream(
        struct pvr2_context_stream *sp)
{
        struct pvr2_ioread *cp;
        cp = pvr2_ioread_create();
        if (!cp) return NULL;
        pvr2_ioread_setup(cp,sp->stream);
        pvr2_ioread_set_sync_key(cp,stream_sync_key,sizeof(stream_sync_key));
        return cp;
}

























































































































    2 






    1 

    1 
    1 






































    2 
    2 































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat KonePure driver for Linux
 *
 * Copyright (c) 2012 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat KonePure is a smaller version of KoneXTD with less buttons and lights.
 */

#include <linux/types.h>
#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"

enum {
        KONEPURE_MOUSE_REPORT_NUMBER_BUTTON = 3,
};

struct konepure_mouse_report_button {
        uint8_t report_number; /* always KONEPURE_MOUSE_REPORT_NUMBER_BUTTON */
        uint8_t zero;
        uint8_t type;
        uint8_t data1;
        uint8_t data2;
        uint8_t zero2;
        uint8_t unknown[2];
} __packed;

ROCCAT_COMMON2_BIN_ATTRIBUTE_W(control, 0x04, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(actual_profile, 0x05, 0x03);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(profile_settings, 0x06, 0x1f);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(profile_buttons, 0x07, 0x3b);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(macro, 0x08, 0x0822);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(info, 0x09, 0x06);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(tcu, 0x0c, 0x04);
ROCCAT_COMMON2_BIN_ATTRIBUTE_R(tcu_image, 0x0c, 0x0404);
ROCCAT_COMMON2_BIN_ATTRIBUTE_RW(sensor, 0x0f, 0x06);
ROCCAT_COMMON2_BIN_ATTRIBUTE_W(talk, 0x10, 0x10);

static struct bin_attribute *konepure_bin_attrs[] = {
        &bin_attr_actual_profile,
        &bin_attr_control,
        &bin_attr_info,
        &bin_attr_talk,
        &bin_attr_macro,
        &bin_attr_sensor,
        &bin_attr_tcu,
        &bin_attr_tcu_image,
        &bin_attr_profile_settings,
        &bin_attr_profile_buttons,
        NULL,
};

static const struct attribute_group konepure_group = {
        .bin_attrs = konepure_bin_attrs,
};

static const struct attribute_group *konepure_groups[] = {
        &konepure_group,
        NULL,
};

static const struct class konepure_class = {
        .name = "konepure",
        .dev_groups = konepure_groups,
};

static int konepure_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct roccat_common2_device *konepure;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE) {
                hid_set_drvdata(hdev, NULL);
                return 0;
        }

        konepure = kzalloc(sizeof(*konepure), GFP_KERNEL);
        if (!konepure) {
                hid_err(hdev, "can't alloc device descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, konepure);

        retval = roccat_common2_device_init_struct(usb_dev, konepure);
        if (retval) {
                hid_err(hdev, "couldn't init KonePure device\n");
                goto exit_free;
        }

        retval = roccat_connect(&konepure_class, hdev,
                                sizeof(struct konepure_mouse_report_button));
        if (retval < 0) {
                hid_err(hdev, "couldn't init char dev\n");
        } else {
                konepure->chrdev_minor = retval;
                konepure->roccat_claimed = 1;
        }

        return 0;
exit_free:
        kfree(konepure);
        return retval;
}

static void konepure_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *konepure;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return;

        konepure = hid_get_drvdata(hdev);
        if (konepure->roccat_claimed)
                roccat_disconnect(konepure->chrdev_minor);
        kfree(konepure);
}

static int konepure_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = konepure_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void konepure_remove(struct hid_device *hdev)
{
        konepure_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static int konepure_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct roccat_common2_device *konepure = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return 0;

        if (data[0] != KONEPURE_MOUSE_REPORT_NUMBER_BUTTON)
                return 0;

        if (konepure != NULL && konepure->roccat_claimed)
                roccat_report_event(konepure->chrdev_minor, data);

        return 0;
}

static const struct hid_device_id konepure_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPURE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPURE_OPTICAL) },
        { }
};

MODULE_DEVICE_TABLE(hid, konepure_devices);

static struct hid_driver konepure_driver = {
                .name = "konepure",
                .id_table = konepure_devices,
                .probe = konepure_probe,
                .remove = konepure_remove,
                .raw_event = konepure_raw_event
};

static int __init konepure_init(void)
{
        int retval;

        retval = class_register(&konepure_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&konepure_driver);
        if (retval)
                class_unregister(&konepure_class);
        return retval;
}

static void __exit konepure_exit(void)
{
        hid_unregister_driver(&konepure_driver);
        class_unregister(&konepure_class);
}

module_init(konepure_init);
module_exit(konepure_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat KonePure/Optical driver");
MODULE_LICENSE("GPL v2");










































































































































































































































































































































































































































































































    1 















































    1 
    1 






























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Kova[+] driver for Linux
 *
 * Copyright (c) 2011 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Kova[+] is a bigger version of the Pyra with two more side buttons.
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-kovaplus.h"

static uint profile_numbers[5] = {0, 1, 2, 3, 4};

static uint kovaplus_convert_event_cpi(uint value)
{
        return (value == 7 ? 4 : (value == 4 ? 3 : value));
}

static void kovaplus_profile_activated(struct kovaplus_device *kovaplus,
                uint new_profile_index)
{
        if (new_profile_index >= ARRAY_SIZE(kovaplus->profile_settings))
                return;
        kovaplus->actual_profile = new_profile_index;
        kovaplus->actual_cpi = kovaplus->profile_settings[new_profile_index].cpi_startup_level;
        kovaplus->actual_x_sensitivity = kovaplus->profile_settings[new_profile_index].sensitivity_x;
        kovaplus->actual_y_sensitivity = kovaplus->profile_settings[new_profile_index].sensitivity_y;
}

static int kovaplus_send_control(struct usb_device *usb_dev, uint value,
                enum kovaplus_control_requests request)
{
        int retval;
        struct roccat_common2_control control;

        if ((request == KOVAPLUS_CONTROL_REQUEST_PROFILE_SETTINGS ||
                        request == KOVAPLUS_CONTROL_REQUEST_PROFILE_BUTTONS) &&
                        value > 4)
                return -EINVAL;

        control.command = ROCCAT_COMMON_COMMAND_CONTROL;
        control.value = value;
        control.request = request;

        retval = roccat_common2_send(usb_dev, ROCCAT_COMMON_COMMAND_CONTROL,
                        &control, sizeof(struct roccat_common2_control));

        return retval;
}

static int kovaplus_select_profile(struct usb_device *usb_dev, uint number,
                enum kovaplus_control_requests request)
{
        return kovaplus_send_control(usb_dev, number, request);
}

static int kovaplus_get_profile_settings(struct usb_device *usb_dev,
                struct kovaplus_profile_settings *buf, uint number)
{
        int retval;

        retval = kovaplus_select_profile(usb_dev, number,
                        KOVAPLUS_CONTROL_REQUEST_PROFILE_SETTINGS);
        if (retval)
                return retval;

        return roccat_common2_receive(usb_dev, KOVAPLUS_COMMAND_PROFILE_SETTINGS,
                        buf, KOVAPLUS_SIZE_PROFILE_SETTINGS);
}

static int kovaplus_get_profile_buttons(struct usb_device *usb_dev,
                struct kovaplus_profile_buttons *buf, int number)
{
        int retval;

        retval = kovaplus_select_profile(usb_dev, number,
                        KOVAPLUS_CONTROL_REQUEST_PROFILE_BUTTONS);
        if (retval)
                return retval;

        return roccat_common2_receive(usb_dev, KOVAPLUS_COMMAND_PROFILE_BUTTONS,
                        buf, KOVAPLUS_SIZE_PROFILE_BUTTONS);
}

/* retval is 0-4 on success, < 0 on error */
static int kovaplus_get_actual_profile(struct usb_device *usb_dev)
{
        struct kovaplus_actual_profile buf;
        int retval;

        retval = roccat_common2_receive(usb_dev, KOVAPLUS_COMMAND_ACTUAL_PROFILE,
                        &buf, sizeof(struct kovaplus_actual_profile));

        return retval ? retval : buf.actual_profile;
}

static int kovaplus_set_actual_profile(struct usb_device *usb_dev,
                int new_profile)
{
        struct kovaplus_actual_profile buf;

        buf.command = KOVAPLUS_COMMAND_ACTUAL_PROFILE;
        buf.size = sizeof(struct kovaplus_actual_profile);
        buf.actual_profile = new_profile;

        return roccat_common2_send_with_status(usb_dev,
                        KOVAPLUS_COMMAND_ACTUAL_PROFILE,
                        &buf, sizeof(struct kovaplus_actual_profile));
}

static ssize_t kovaplus_sysfs_read(struct file *fp, struct kobject *kobj,
                char *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kovaplus_device *kovaplus = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off >= real_size)
                return 0;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&kovaplus->kovaplus_lock);
        retval = roccat_common2_receive(usb_dev, command, buf, real_size);
        mutex_unlock(&kovaplus->kovaplus_lock);

        if (retval)
                return retval;

        return real_size;
}

static ssize_t kovaplus_sysfs_write(struct file *fp, struct kobject *kobj,
                void const *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct kovaplus_device *kovaplus = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off != 0 || count != real_size)
                return -EINVAL;

        mutex_lock(&kovaplus->kovaplus_lock);
        retval = roccat_common2_send_with_status(usb_dev, command,
                        buf, real_size);
        mutex_unlock(&kovaplus->kovaplus_lock);

        if (retval)
                return retval;

        return real_size;
}

#define KOVAPLUS_SYSFS_W(thingy, THINGY) \
static ssize_t kovaplus_sysfs_write_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return kovaplus_sysfs_write(fp, kobj, buf, off, count, \
                        KOVAPLUS_SIZE_ ## THINGY, KOVAPLUS_COMMAND_ ## THINGY); \
}

#define KOVAPLUS_SYSFS_R(thingy, THINGY) \
static ssize_t kovaplus_sysfs_read_ ## thingy(struct file *fp, \
                struct kobject *kobj, struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return kovaplus_sysfs_read(fp, kobj, buf, off, count, \
                        KOVAPLUS_SIZE_ ## THINGY, KOVAPLUS_COMMAND_ ## THINGY); \
}

#define KOVAPLUS_SYSFS_RW(thingy, THINGY) \
KOVAPLUS_SYSFS_W(thingy, THINGY) \
KOVAPLUS_SYSFS_R(thingy, THINGY)

#define KOVAPLUS_BIN_ATTRIBUTE_RW(thingy, THINGY) \
KOVAPLUS_SYSFS_RW(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0660 }, \
        .size = KOVAPLUS_SIZE_ ## THINGY, \
        .read = kovaplus_sysfs_read_ ## thingy, \
        .write = kovaplus_sysfs_write_ ## thingy \
}

#define KOVAPLUS_BIN_ATTRIBUTE_W(thingy, THINGY) \
KOVAPLUS_SYSFS_W(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0220 }, \
        .size = KOVAPLUS_SIZE_ ## THINGY, \
        .write = kovaplus_sysfs_write_ ## thingy \
}
KOVAPLUS_BIN_ATTRIBUTE_W(control, CONTROL);
KOVAPLUS_BIN_ATTRIBUTE_RW(info, INFO);
KOVAPLUS_BIN_ATTRIBUTE_RW(profile_settings, PROFILE_SETTINGS);
KOVAPLUS_BIN_ATTRIBUTE_RW(profile_buttons, PROFILE_BUTTONS);

static ssize_t kovaplus_sysfs_read_profilex_settings(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = kovaplus_select_profile(usb_dev, *(uint *)(attr->private),
                        KOVAPLUS_CONTROL_REQUEST_PROFILE_SETTINGS);
        if (retval)
                return retval;

        return kovaplus_sysfs_read(fp, kobj, buf, off, count,
                        KOVAPLUS_SIZE_PROFILE_SETTINGS,
                        KOVAPLUS_COMMAND_PROFILE_SETTINGS);
}

static ssize_t kovaplus_sysfs_read_profilex_buttons(struct file *fp,
                struct kobject *kobj, struct bin_attribute *attr, char *buf,
                loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        ssize_t retval;

        retval = kovaplus_select_profile(usb_dev, *(uint *)(attr->private),
                        KOVAPLUS_CONTROL_REQUEST_PROFILE_BUTTONS);
        if (retval)
                return retval;

        return kovaplus_sysfs_read(fp, kobj, buf, off, count,
                        KOVAPLUS_SIZE_PROFILE_BUTTONS,
                        KOVAPLUS_COMMAND_PROFILE_BUTTONS);
}

#define PROFILE_ATTR(number)                                                \
static struct bin_attribute bin_attr_profile##number##_settings = {        \
        .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = KOVAPLUS_SIZE_PROFILE_SETTINGS,                                \
        .read = kovaplus_sysfs_read_profilex_settings,                        \
        .private = &profile_numbers[number-1],                                \
};                                                                        \
static struct bin_attribute bin_attr_profile##number##_buttons = {        \
        .attr = { .name = "profile" #number "_buttons", .mode = 0440 },        \
        .size = KOVAPLUS_SIZE_PROFILE_BUTTONS,                                \
        .read = kovaplus_sysfs_read_profilex_buttons,                        \
        .private = &profile_numbers[number-1],                                \
};
PROFILE_ATTR(1);
PROFILE_ATTR(2);
PROFILE_ATTR(3);
PROFILE_ATTR(4);
PROFILE_ATTR(5);

static ssize_t kovaplus_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kovaplus_device *kovaplus =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kovaplus->actual_profile);
}

static ssize_t kovaplus_sysfs_set_actual_profile(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct kovaplus_device *kovaplus;
        struct usb_device *usb_dev;
        unsigned long profile;
        int retval;
        struct kovaplus_roccat_report roccat_report;

        dev = dev->parent->parent;
        kovaplus = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        retval = kstrtoul(buf, 10, &profile);
        if (retval)
                return retval;

        if (profile >= 5)
                return -EINVAL;

        mutex_lock(&kovaplus->kovaplus_lock);
        retval = kovaplus_set_actual_profile(usb_dev, profile);
        if (retval) {
                mutex_unlock(&kovaplus->kovaplus_lock);
                return retval;
        }

        kovaplus_profile_activated(kovaplus, profile);

        roccat_report.type = KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE_1;
        roccat_report.profile = profile + 1;
        roccat_report.button = 0;
        roccat_report.data1 = profile + 1;
        roccat_report.data2 = 0;
        roccat_report_event(kovaplus->chrdev_minor,
                        (uint8_t const *)&roccat_report);

        mutex_unlock(&kovaplus->kovaplus_lock);

        return size;
}
static DEVICE_ATTR(actual_profile, 0660,
                   kovaplus_sysfs_show_actual_profile,
                   kovaplus_sysfs_set_actual_profile);

static ssize_t kovaplus_sysfs_show_actual_cpi(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kovaplus_device *kovaplus =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kovaplus->actual_cpi);
}
static DEVICE_ATTR(actual_cpi, 0440, kovaplus_sysfs_show_actual_cpi, NULL);

static ssize_t kovaplus_sysfs_show_actual_sensitivity_x(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kovaplus_device *kovaplus =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kovaplus->actual_x_sensitivity);
}
static DEVICE_ATTR(actual_sensitivity_x, 0440,
                   kovaplus_sysfs_show_actual_sensitivity_x, NULL);

static ssize_t kovaplus_sysfs_show_actual_sensitivity_y(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kovaplus_device *kovaplus =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", kovaplus->actual_y_sensitivity);
}
static DEVICE_ATTR(actual_sensitivity_y, 0440,
                   kovaplus_sysfs_show_actual_sensitivity_y, NULL);

static ssize_t kovaplus_sysfs_show_firmware_version(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct kovaplus_device *kovaplus;
        struct usb_device *usb_dev;
        struct kovaplus_info info;

        dev = dev->parent->parent;
        kovaplus = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        mutex_lock(&kovaplus->kovaplus_lock);
        roccat_common2_receive(usb_dev, KOVAPLUS_COMMAND_INFO,
                        &info, KOVAPLUS_SIZE_INFO);
        mutex_unlock(&kovaplus->kovaplus_lock);

        return snprintf(buf, PAGE_SIZE, "%d\n", info.firmware_version);
}
static DEVICE_ATTR(firmware_version, 0440,
                   kovaplus_sysfs_show_firmware_version, NULL);

static struct attribute *kovaplus_attrs[] = {
        &dev_attr_actual_cpi.attr,
        &dev_attr_firmware_version.attr,
        &dev_attr_actual_profile.attr,
        &dev_attr_actual_sensitivity_x.attr,
        &dev_attr_actual_sensitivity_y.attr,
        NULL,
};

static struct bin_attribute *kovaplus_bin_attributes[] = {
        &bin_attr_control,
        &bin_attr_info,
        &bin_attr_profile_settings,
        &bin_attr_profile_buttons,
        &bin_attr_profile1_settings,
        &bin_attr_profile2_settings,
        &bin_attr_profile3_settings,
        &bin_attr_profile4_settings,
        &bin_attr_profile5_settings,
        &bin_attr_profile1_buttons,
        &bin_attr_profile2_buttons,
        &bin_attr_profile3_buttons,
        &bin_attr_profile4_buttons,
        &bin_attr_profile5_buttons,
        NULL,
};

static const struct attribute_group kovaplus_group = {
        .attrs = kovaplus_attrs,
        .bin_attrs = kovaplus_bin_attributes,
};

static const struct attribute_group *kovaplus_groups[] = {
        &kovaplus_group,
        NULL,
};

static const struct class kovaplus_class = {
        .name = "kovaplus",
        .dev_groups = kovaplus_groups,
};

static int kovaplus_init_kovaplus_device_struct(struct usb_device *usb_dev,
                struct kovaplus_device *kovaplus)
{
        int retval, i;
        static uint wait = 70; /* device will freeze with just 60 */

        mutex_init(&kovaplus->kovaplus_lock);

        for (i = 0; i < 5; ++i) {
                msleep(wait);
                retval = kovaplus_get_profile_settings(usb_dev,
                                &kovaplus->profile_settings[i], i);
                if (retval)
                        return retval;

                msleep(wait);
                retval = kovaplus_get_profile_buttons(usb_dev,
                                &kovaplus->profile_buttons[i], i);
                if (retval)
                        return retval;
        }

        msleep(wait);
        retval = kovaplus_get_actual_profile(usb_dev);
        if (retval < 0)
                return retval;
        kovaplus_profile_activated(kovaplus, retval);

        return 0;
}

static int kovaplus_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct kovaplus_device *kovaplus;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {

                kovaplus = kzalloc(sizeof(*kovaplus), GFP_KERNEL);
                if (!kovaplus) {
                        hid_err(hdev, "can't alloc device descriptor\n");
                        return -ENOMEM;
                }
                hid_set_drvdata(hdev, kovaplus);

                retval = kovaplus_init_kovaplus_device_struct(usb_dev, kovaplus);
                if (retval) {
                        hid_err(hdev, "couldn't init struct kovaplus_device\n");
                        goto exit_free;
                }

                retval = roccat_connect(&kovaplus_class, hdev,
                                        sizeof(struct kovaplus_roccat_report));
                if (retval < 0) {
                        hid_err(hdev, "couldn't init char dev\n");
                } else {
                        kovaplus->chrdev_minor = retval;
                        kovaplus->roccat_claimed = 1;
                }

        } else {
                hid_set_drvdata(hdev, NULL);
        }

        return 0;
exit_free:
        kfree(kovaplus);
        return retval;
}

static void kovaplus_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct kovaplus_device *kovaplus;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        == USB_INTERFACE_PROTOCOL_MOUSE) {
                kovaplus = hid_get_drvdata(hdev);
                if (kovaplus->roccat_claimed)
                        roccat_disconnect(kovaplus->chrdev_minor);
                kfree(kovaplus);
        }
}

static int kovaplus_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = kovaplus_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install mouse\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void kovaplus_remove(struct hid_device *hdev)
{
        kovaplus_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void kovaplus_keep_values_up_to_date(struct kovaplus_device *kovaplus,
                u8 const *data)
{
        struct kovaplus_mouse_report_button const *button_report;

        if (data[0] != KOVAPLUS_MOUSE_REPORT_NUMBER_BUTTON)
                return;

        button_report = (struct kovaplus_mouse_report_button const *)data;

        switch (button_report->type) {
        case KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE_1:
                kovaplus_profile_activated(kovaplus, button_report->data1 - 1);
                break;
        case KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_CPI:
                kovaplus->actual_cpi = kovaplus_convert_event_cpi(button_report->data1);
                break;
        case KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_SENSITIVITY:
                kovaplus->actual_x_sensitivity = button_report->data1;
                kovaplus->actual_y_sensitivity = button_report->data2;
                break;
        default:
                break;
        }
}

static void kovaplus_report_to_chrdev(struct kovaplus_device const *kovaplus,
                u8 const *data)
{
        struct kovaplus_roccat_report roccat_report;
        struct kovaplus_mouse_report_button const *button_report;

        if (data[0] != KOVAPLUS_MOUSE_REPORT_NUMBER_BUTTON)
                return;

        button_report = (struct kovaplus_mouse_report_button const *)data;

        if (button_report->type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE_2)
                return;

        roccat_report.type = button_report->type;
        roccat_report.profile = kovaplus->actual_profile + 1;

        if (roccat_report.type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_MACRO ||
                        roccat_report.type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_SHORTCUT ||
                        roccat_report.type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_QUICKLAUNCH ||
                        roccat_report.type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_TIMER)
                roccat_report.button = button_report->data1;
        else
                roccat_report.button = 0;

        if (roccat_report.type == KOVAPLUS_MOUSE_REPORT_BUTTON_TYPE_CPI)
                roccat_report.data1 = kovaplus_convert_event_cpi(button_report->data1);
        else
                roccat_report.data1 = button_report->data1;

        roccat_report.data2 = button_report->data2;

        roccat_report_event(kovaplus->chrdev_minor,
                        (uint8_t const *)&roccat_report);
}

static int kovaplus_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct kovaplus_device *kovaplus = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != USB_INTERFACE_PROTOCOL_MOUSE)
                return 0;

        if (kovaplus == NULL)
                return 0;

        kovaplus_keep_values_up_to_date(kovaplus, data);

        if (kovaplus->roccat_claimed)
                kovaplus_report_to_chrdev(kovaplus, data);

        return 0;
}

static const struct hid_device_id kovaplus_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KOVAPLUS) },
        { }
};

MODULE_DEVICE_TABLE(hid, kovaplus_devices);

static struct hid_driver kovaplus_driver = {
                .name = "kovaplus",
                .id_table = kovaplus_devices,
                .probe = kovaplus_probe,
                .remove = kovaplus_remove,
                .raw_event = kovaplus_raw_event
};

static int __init kovaplus_init(void)
{
        int retval;

        retval = class_register(&kovaplus_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&kovaplus_driver);
        if (retval)
                class_unregister(&kovaplus_class);
        return retval;
}

static void __exit kovaplus_exit(void)
{
        hid_unregister_driver(&kovaplus_driver);
        class_unregister(&kovaplus_class);
}

module_init(kovaplus_init);
module_exit(kovaplus_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Kova[+] driver");
MODULE_LICENSE("GPL v2");











































































































































































































































































































































































































































































































































































  251 
























  254 
  254 






  253 



  254 
  250 
  248 
    1 
    1 




































  233 










  232 















  254 



  254 






  252 




  232 


  233 





  253 


















































































































































    4 




  253 




  249 





































    4 
    4 




  249 
  251 












  253 



  253 



















    4 
    4 







  251 

  251 
  250 
  251 
  251 





  251 

  254 


  251 
  250 



  251 
  252 







  253 






    4 








  254 












  254 
















  238 
  233 

   56 

   56 






  254 


























  253 


  254 
  254 

  252 













  254 











    6 





    2 














































  254 








  254 


  254 
  252 


  254 

  253 







    3 

    3 



  254 


    1 



  253 








  254 


  253 


  253 

  252 





















































    5 




















































































    4 

    4 



















    1 







    1 

    1 






    1 












    1 



    1 

    1 


    1 





















   58 















   41 
   27 



   27 


   58 


















   58 















































  252 







  251 
  253 


  252 




































































































































  253 
    3 

  252 





  252 









  253 







  252 














































  110 







































































































































































































































































































































































































































































































































































































































































































































































   10 

   10 
    8 
    2 











   10 



   10 
   10 


   10 

   10 





















































  261 












  228 









  252 








  251 













  263 














































































































































  238 


  238 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
// SPDX-License-Identifier: GPL-2.0
/*
 *  Kernel internal timers
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
 *
 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
 *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
 *              serialize accesses to xtime/lost_ticks).
 *                              Copyright (C) 1998  Andrea Arcangeli
 *  1999-03-10  Improved NTP compatibility by Ulrich Windl
 *  2002-05-31        Move sys_sysinfo here and make its locking sane, Robert Love
 *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
 *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
 *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
 */

#include <linux/kernel_stat.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/pid_namespace.h>
#include <linux/notifier.h>
#include <linux/thread_info.h>
#include <linux/time.h>
#include <linux/jiffies.h>
#include <linux/posix-timers.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/irq_work.h>
#include <linux/sched/signal.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/nohz.h>
#include <linux/sched/debug.h>
#include <linux/slab.h>
#include <linux/compat.h>
#include <linux/random.h>
#include <linux/sysctl.h>

#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/div64.h>
#include <asm/timex.h>
#include <asm/io.h>

#include "tick-internal.h"
#include "timer_migration.h"

#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>

__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;

EXPORT_SYMBOL(jiffies_64);

/*
 * The timer wheel has LVL_DEPTH array levels. Each level provides an array of
 * LVL_SIZE buckets. Each level is driven by its own clock and therefore each
 * level has a different granularity.
 *
 * The level granularity is:                LVL_CLK_DIV ^ level
 * The level clock frequency is:        HZ / (LVL_CLK_DIV ^ level)
 *
 * The array level of a newly armed timer depends on the relative expiry
 * time. The farther the expiry time is away the higher the array level and
 * therefore the granularity becomes.
 *
 * Contrary to the original timer wheel implementation, which aims for 'exact'
 * expiry of the timers, this implementation removes the need for recascading
 * the timers into the lower array levels. The previous 'classic' timer wheel
 * implementation of the kernel already violated the 'exact' expiry by adding
 * slack to the expiry time to provide batched expiration. The granularity
 * levels provide implicit batching.
 *
 * This is an optimization of the original timer wheel implementation for the
 * majority of the timer wheel use cases: timeouts. The vast majority of
 * timeout timers (networking, disk I/O ...) are canceled before expiry. If
 * the timeout expires it indicates that normal operation is disturbed, so it
 * does not matter much whether the timeout comes with a slight delay.
 *
 * The only exception to this are networking timers with a small expiry
 * time. They rely on the granularity. Those fit into the first wheel level,
 * which has HZ granularity.
 *
 * We don't have cascading anymore. timers with a expiry time above the
 * capacity of the last wheel level are force expired at the maximum timeout
 * value of the last wheel level. From data sampling we know that the maximum
 * value observed is 5 days (network connection tracking), so this should not
 * be an issue.
 *
 * The currently chosen array constants values are a good compromise between
 * array size and granularity.
 *
 * This results in the following granularity and range levels:
 *
 * HZ 1000 steps
 * Level Offset  Granularity            Range
 *  0      0         1 ms                0 ms -         63 ms
 *  1     64         8 ms               64 ms -        511 ms
 *  2    128        64 ms              512 ms -       4095 ms (512ms - ~4s)
 *  3    192       512 ms             4096 ms -      32767 ms (~4s - ~32s)
 *  4    256      4096 ms (~4s)      32768 ms -     262143 ms (~32s - ~4m)
 *  5    320     32768 ms (~32s)    262144 ms -    2097151 ms (~4m - ~34m)
 *  6    384    262144 ms (~4m)    2097152 ms -   16777215 ms (~34m - ~4h)
 *  7    448   2097152 ms (~34m)  16777216 ms -  134217727 ms (~4h - ~1d)
 *  8    512  16777216 ms (~4h)  134217728 ms - 1073741822 ms (~1d - ~12d)
 *
 * HZ  300
 * Level Offset  Granularity            Range
 *  0           0         3 ms                0 ms -        210 ms
 *  1          64        26 ms              213 ms -       1703 ms (213ms - ~1s)
 *  2         128       213 ms             1706 ms -      13650 ms (~1s - ~13s)
 *  3         192      1706 ms (~1s)      13653 ms -     109223 ms (~13s - ~1m)
 *  4         256     13653 ms (~13s)    109226 ms -     873810 ms (~1m - ~14m)
 *  5         320    109226 ms (~1m)     873813 ms -    6990503 ms (~14m - ~1h)
 *  6         384    873813 ms (~14m)   6990506 ms -   55924050 ms (~1h - ~15h)
 *  7         448   6990506 ms (~1h)   55924053 ms -  447392423 ms (~15h - ~5d)
 *  8    512  55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d)
 *
 * HZ  250
 * Level Offset  Granularity            Range
 *  0           0         4 ms                0 ms -        255 ms
 *  1          64        32 ms              256 ms -       2047 ms (256ms - ~2s)
 *  2         128       256 ms             2048 ms -      16383 ms (~2s - ~16s)
 *  3         192      2048 ms (~2s)      16384 ms -     131071 ms (~16s - ~2m)
 *  4         256     16384 ms (~16s)    131072 ms -    1048575 ms (~2m - ~17m)
 *  5         320    131072 ms (~2m)    1048576 ms -    8388607 ms (~17m - ~2h)
 *  6         384   1048576 ms (~17m)   8388608 ms -   67108863 ms (~2h - ~18h)
 *  7         448   8388608 ms (~2h)   67108864 ms -  536870911 ms (~18h - ~6d)
 *  8    512  67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d)
 *
 * HZ  100
 * Level Offset  Granularity            Range
 *  0           0         10 ms               0 ms -        630 ms
 *  1          64         80 ms             640 ms -       5110 ms (640ms - ~5s)
 *  2         128        640 ms            5120 ms -      40950 ms (~5s - ~40s)
 *  3         192       5120 ms (~5s)     40960 ms -     327670 ms (~40s - ~5m)
 *  4         256      40960 ms (~40s)   327680 ms -    2621430 ms (~5m - ~43m)
 *  5         320     327680 ms (~5m)   2621440 ms -   20971510 ms (~43m - ~5h)
 *  6         384    2621440 ms (~43m) 20971520 ms -  167772150 ms (~5h - ~1d)
 *  7         448   20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d)
 */

/* Clock divisor for the next level */
#define LVL_CLK_SHIFT        3
#define LVL_CLK_DIV        (1UL << LVL_CLK_SHIFT)
#define LVL_CLK_MASK        (LVL_CLK_DIV - 1)
#define LVL_SHIFT(n)        ((n) * LVL_CLK_SHIFT)
#define LVL_GRAN(n)        (1UL << LVL_SHIFT(n))

/*
 * The time start value for each level to select the bucket at enqueue
 * time. We start from the last possible delta of the previous level
 * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()).
 */
#define LVL_START(n)        ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))

/* Size of each clock level */
#define LVL_BITS        6
#define LVL_SIZE        (1UL << LVL_BITS)
#define LVL_MASK        (LVL_SIZE - 1)
#define LVL_OFFS(n)        ((n) * LVL_SIZE)

/* Level depth */
#if HZ > 100
# define LVL_DEPTH        9
# else
# define LVL_DEPTH        8
#endif

/* The cutoff (max. capacity of the wheel) */
#define WHEEL_TIMEOUT_CUTOFF        (LVL_START(LVL_DEPTH))
#define WHEEL_TIMEOUT_MAX        (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))

/*
 * The resulting wheel size. If NOHZ is configured we allocate two
 * wheels so we have a separate storage for the deferrable timers.
 */
#define WHEEL_SIZE        (LVL_SIZE * LVL_DEPTH)

#ifdef CONFIG_NO_HZ_COMMON
/*
 * If multiple bases need to be locked, use the base ordering for lock
 * nesting, i.e. lowest number first.
 */
# define NR_BASES        3
# define BASE_LOCAL        0
# define BASE_GLOBAL        1
# define BASE_DEF        2
#else
# define NR_BASES        1
# define BASE_LOCAL        0
# define BASE_GLOBAL        0
# define BASE_DEF        0
#endif

/**
 * struct timer_base - Per CPU timer base (number of base depends on config)
 * @lock:                Lock protecting the timer_base
 * @running_timer:        When expiring timers, the lock is dropped. To make
 *                        sure not to race against deleting/modifying a
 *                        currently running timer, the pointer is set to the
 *                        timer, which expires at the moment. If no timer is
 *                        running, the pointer is NULL.
 * @expiry_lock:        PREEMPT_RT only: Lock is taken in softirq around
 *                        timer expiry callback execution and when trying to
 *                        delete a running timer and it wasn't successful in
 *                        the first glance. It prevents priority inversion
 *                        when callback was preempted on a remote CPU and a
 *                        caller tries to delete the running timer. It also
 *                        prevents a life lock, when the task which tries to
 *                        delete a timer preempted the softirq thread which
 *                        is running the timer callback function.
 * @timer_waiters:        PREEMPT_RT only: Tells, if there is a waiter
 *                        waiting for the end of the timer callback function
 *                        execution.
 * @clk:                clock of the timer base; is updated before enqueue
 *                        of a timer; during expiry, it is 1 offset ahead of
 *                        jiffies to avoid endless requeuing to current
 *                        jiffies
 * @next_expiry:        expiry value of the first timer; it is updated when
 *                        finding the next timer and during enqueue; the
 *                        value is not valid, when next_expiry_recalc is set
 * @cpu:                Number of CPU the timer base belongs to
 * @next_expiry_recalc: States, whether a recalculation of next_expiry is
 *                        required. Value is set true, when a timer was
 *                        deleted.
 * @is_idle:                Is set, when timer_base is idle. It is triggered by NOHZ
 *                        code. This state is only used in standard
 *                        base. Deferrable timers, which are enqueued remotely
 *                        never wake up an idle CPU. So no matter of supporting it
 *                        for this base.
 * @timers_pending:        Is set, when a timer is pending in the base. It is only
 *                        reliable when next_expiry_recalc is not set.
 * @pending_map:        bitmap of the timer wheel; each bit reflects a
 *                        bucket of the wheel. When a bit is set, at least a
 *                        single timer is enqueued in the related bucket.
 * @vectors:                Array of lists; Each array member reflects a bucket
 *                        of the timer wheel. The list contains all timers
 *                        which are enqueued into a specific bucket.
 */
struct timer_base {
        raw_spinlock_t                lock;
        struct timer_list        *running_timer;
#ifdef CONFIG_PREEMPT_RT
        spinlock_t                expiry_lock;
        atomic_t                timer_waiters;
#endif
        unsigned long                clk;
        unsigned long                next_expiry;
        unsigned int                cpu;
        bool                        next_expiry_recalc;
        bool                        is_idle;
        bool                        timers_pending;
        DECLARE_BITMAP(pending_map, WHEEL_SIZE);
        struct hlist_head        vectors[WHEEL_SIZE];
} ____cacheline_aligned;

static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);

#ifdef CONFIG_NO_HZ_COMMON

static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
static DEFINE_MUTEX(timer_keys_mutex);

static void timer_update_keys(struct work_struct *work);
static DECLARE_WORK(timer_update_work, timer_update_keys);

#ifdef CONFIG_SMP
static unsigned int sysctl_timer_migration = 1;

DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);

static void timers_update_migration(void)
{
        if (sysctl_timer_migration && tick_nohz_active)
                static_branch_enable(&timers_migration_enabled);
        else
                static_branch_disable(&timers_migration_enabled);
}

#ifdef CONFIG_SYSCTL
static int timer_migration_handler(struct ctl_table *table, int write,
                            void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret;

        mutex_lock(&timer_keys_mutex);
        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (!ret && write)
                timers_update_migration();
        mutex_unlock(&timer_keys_mutex);
        return ret;
}

static struct ctl_table timer_sysctl[] = {
        {
                .procname        = "timer_migration",
                .data                = &sysctl_timer_migration,
                .maxlen                = sizeof(unsigned int),
                .mode                = 0644,
                .proc_handler        = timer_migration_handler,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
        {}
};

static int __init timer_sysctl_init(void)
{
        register_sysctl("kernel", timer_sysctl);
        return 0;
}
device_initcall(timer_sysctl_init);
#endif /* CONFIG_SYSCTL */
#else /* CONFIG_SMP */
static inline void timers_update_migration(void) { }
#endif /* !CONFIG_SMP */

static void timer_update_keys(struct work_struct *work)
{
        mutex_lock(&timer_keys_mutex);
        timers_update_migration();
        static_branch_enable(&timers_nohz_active);
        mutex_unlock(&timer_keys_mutex);
}

void timers_update_nohz(void)
{
        schedule_work(&timer_update_work);
}

static inline bool is_timers_nohz_active(void)
{
        return static_branch_unlikely(&timers_nohz_active);
}
#else
static inline bool is_timers_nohz_active(void) { return false; }
#endif /* NO_HZ_COMMON */

static unsigned long round_jiffies_common(unsigned long j, int cpu,
                bool force_up)
{
        int rem;
        unsigned long original = j;

        /*
         * We don't want all cpus firing their timers at once hitting the
         * same lock or cachelines, so we skew each extra cpu with an extra
         * 3 jiffies. This 3 jiffies came originally from the mm/ code which
         * already did this.
         * The skew is done by adding 3*cpunr, then round, then subtract this
         * extra offset again.
         */
        j += cpu * 3;

        rem = j % HZ;

        /*
         * If the target jiffie is just after a whole second (which can happen
         * due to delays of the timer irq, long irq off times etc etc) then
         * we should round down to the whole second, not up. Use 1/4th second
         * as cutoff for this rounding as an extreme upper bound for this.
         * But never round down if @force_up is set.
         */
        if (rem < HZ/4 && !force_up) /* round down */
                j = j - rem;
        else /* round up */
                j = j - rem + HZ;

        /* now that we have rounded, subtract the extra skew again */
        j -= cpu * 3;

        /*
         * Make sure j is still in the future. Otherwise return the
         * unmodified value.
         */
        return time_is_after_jiffies(j) ? j : original;
}

/**
 * __round_jiffies - function to round jiffies to a full second
 * @j: the time in (absolute) jiffies that should be rounded
 * @cpu: the processor number on which the timeout will happen
 *
 * __round_jiffies() rounds an absolute time in the future (in jiffies)
 * up or down to (approximately) full seconds. This is useful for timers
 * for which the exact time they fire does not matter too much, as long as
 * they fire approximately every X seconds.
 *
 * By rounding these timers to whole seconds, all such timers will fire
 * at the same time, rather than at various times spread out. The goal
 * of this is to have the CPU wake up less, which saves power.
 *
 * The exact rounding is skewed for each processor to avoid all
 * processors firing at the exact same time, which could lead
 * to lock contention or spurious cache line bouncing.
 *
 * The return value is the rounded version of the @j parameter.
 */
unsigned long __round_jiffies(unsigned long j, int cpu)
{
        return round_jiffies_common(j, cpu, false);
}
EXPORT_SYMBOL_GPL(__round_jiffies);

/**
 * __round_jiffies_relative - function to round jiffies to a full second
 * @j: the time in (relative) jiffies that should be rounded
 * @cpu: the processor number on which the timeout will happen
 *
 * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 * up or down to (approximately) full seconds. This is useful for timers
 * for which the exact time they fire does not matter too much, as long as
 * they fire approximately every X seconds.
 *
 * By rounding these timers to whole seconds, all such timers will fire
 * at the same time, rather than at various times spread out. The goal
 * of this is to have the CPU wake up less, which saves power.
 *
 * The exact rounding is skewed for each processor to avoid all
 * processors firing at the exact same time, which could lead
 * to lock contention or spurious cache line bouncing.
 *
 * The return value is the rounded version of the @j parameter.
 */
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
{
        unsigned long j0 = jiffies;

        /* Use j0 because jiffies might change while we run */
        return round_jiffies_common(j + j0, cpu, false) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_relative);

/**
 * round_jiffies - function to round jiffies to a full second
 * @j: the time in (absolute) jiffies that should be rounded
 *
 * round_jiffies() rounds an absolute time in the future (in jiffies)
 * up or down to (approximately) full seconds. This is useful for timers
 * for which the exact time they fire does not matter too much, as long as
 * they fire approximately every X seconds.
 *
 * By rounding these timers to whole seconds, all such timers will fire
 * at the same time, rather than at various times spread out. The goal
 * of this is to have the CPU wake up less, which saves power.
 *
 * The return value is the rounded version of the @j parameter.
 */
unsigned long round_jiffies(unsigned long j)
{
        return round_jiffies_common(j, raw_smp_processor_id(), false);
}
EXPORT_SYMBOL_GPL(round_jiffies);

/**
 * round_jiffies_relative - function to round jiffies to a full second
 * @j: the time in (relative) jiffies that should be rounded
 *
 * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 * up or down to (approximately) full seconds. This is useful for timers
 * for which the exact time they fire does not matter too much, as long as
 * they fire approximately every X seconds.
 *
 * By rounding these timers to whole seconds, all such timers will fire
 * at the same time, rather than at various times spread out. The goal
 * of this is to have the CPU wake up less, which saves power.
 *
 * The return value is the rounded version of the @j parameter.
 */
unsigned long round_jiffies_relative(unsigned long j)
{
        return __round_jiffies_relative(j, raw_smp_processor_id());
}
EXPORT_SYMBOL_GPL(round_jiffies_relative);

/**
 * __round_jiffies_up - function to round jiffies up to a full second
 * @j: the time in (absolute) jiffies that should be rounded
 * @cpu: the processor number on which the timeout will happen
 *
 * This is the same as __round_jiffies() except that it will never
 * round down.  This is useful for timeouts for which the exact time
 * of firing does not matter too much, as long as they don't fire too
 * early.
 */
unsigned long __round_jiffies_up(unsigned long j, int cpu)
{
        return round_jiffies_common(j, cpu, true);
}
EXPORT_SYMBOL_GPL(__round_jiffies_up);

/**
 * __round_jiffies_up_relative - function to round jiffies up to a full second
 * @j: the time in (relative) jiffies that should be rounded
 * @cpu: the processor number on which the timeout will happen
 *
 * This is the same as __round_jiffies_relative() except that it will never
 * round down.  This is useful for timeouts for which the exact time
 * of firing does not matter too much, as long as they don't fire too
 * early.
 */
unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
{
        unsigned long j0 = jiffies;

        /* Use j0 because jiffies might change while we run */
        return round_jiffies_common(j + j0, cpu, true) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);

/**
 * round_jiffies_up - function to round jiffies up to a full second
 * @j: the time in (absolute) jiffies that should be rounded
 *
 * This is the same as round_jiffies() except that it will never
 * round down.  This is useful for timeouts for which the exact time
 * of firing does not matter too much, as long as they don't fire too
 * early.
 */
unsigned long round_jiffies_up(unsigned long j)
{
        return round_jiffies_common(j, raw_smp_processor_id(), true);
}
EXPORT_SYMBOL_GPL(round_jiffies_up);

/**
 * round_jiffies_up_relative - function to round jiffies up to a full second
 * @j: the time in (relative) jiffies that should be rounded
 *
 * This is the same as round_jiffies_relative() except that it will never
 * round down.  This is useful for timeouts for which the exact time
 * of firing does not matter too much, as long as they don't fire too
 * early.
 */
unsigned long round_jiffies_up_relative(unsigned long j)
{
        return __round_jiffies_up_relative(j, raw_smp_processor_id());
}
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);


static inline unsigned int timer_get_idx(struct timer_list *timer)
{
        return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
}

static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
{
        timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
                        idx << TIMER_ARRAYSHIFT;
}

/*
 * Helper function to calculate the array index for a given expiry
 * time.
 */
static inline unsigned calc_index(unsigned long expires, unsigned lvl,
                                  unsigned long *bucket_expiry)
{

        /*
         * The timer wheel has to guarantee that a timer does not fire
         * early. Early expiry can happen due to:
         * - Timer is armed at the edge of a tick
         * - Truncation of the expiry time in the outer wheel levels
         *
         * Round up with level granularity to prevent this.
         */
        expires = (expires >> LVL_SHIFT(lvl)) + 1;
        *bucket_expiry = expires << LVL_SHIFT(lvl);
        return LVL_OFFS(lvl) + (expires & LVL_MASK);
}

static int calc_wheel_index(unsigned long expires, unsigned long clk,
                            unsigned long *bucket_expiry)
{
        unsigned long delta = expires - clk;
        unsigned int idx;

        if (delta < LVL_START(1)) {
                idx = calc_index(expires, 0, bucket_expiry);
        } else if (delta < LVL_START(2)) {
                idx = calc_index(expires, 1, bucket_expiry);
        } else if (delta < LVL_START(3)) {
                idx = calc_index(expires, 2, bucket_expiry);
        } else if (delta < LVL_START(4)) {
                idx = calc_index(expires, 3, bucket_expiry);
        } else if (delta < LVL_START(5)) {
                idx = calc_index(expires, 4, bucket_expiry);
        } else if (delta < LVL_START(6)) {
                idx = calc_index(expires, 5, bucket_expiry);
        } else if (delta < LVL_START(7)) {
                idx = calc_index(expires, 6, bucket_expiry);
        } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
                idx = calc_index(expires, 7, bucket_expiry);
        } else if ((long) delta < 0) {
                idx = clk & LVL_MASK;
                *bucket_expiry = clk;
        } else {
                /*
                 * Force expire obscene large timeouts to expire at the
                 * capacity limit of the wheel.
                 */
                if (delta >= WHEEL_TIMEOUT_CUTOFF)
                        expires = clk + WHEEL_TIMEOUT_MAX;

                idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
        }
        return idx;
}

static void
trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
{
        /*
         * Deferrable timers do not prevent the CPU from entering dynticks and
         * are not taken into account on the idle/nohz_full path. An IPI when a
         * new deferrable timer is enqueued will wake up the remote CPU but
         * nothing will be done with the deferrable timer base. Therefore skip
         * the remote IPI for deferrable timers completely.
         */
        if (!is_timers_nohz_active() || timer->flags & TIMER_DEFERRABLE)
                return;

        /*
         * We might have to IPI the remote CPU if the base is idle and the
         * timer is pinned. If it is a non pinned timer, it is only queued
         * on the remote CPU, when timer was running during queueing. Then
         * everything is handled by remote CPU anyway. If the other CPU is
         * on the way to idle then it can't set base->is_idle as we hold
         * the base lock:
         */
        if (base->is_idle) {
                WARN_ON_ONCE(!(timer->flags & TIMER_PINNED ||
                               tick_nohz_full_cpu(base->cpu)));
                wake_up_nohz_cpu(base->cpu);
        }
}

/*
 * Enqueue the timer into the hash bucket, mark it pending in
 * the bitmap, store the index in the timer flags then wake up
 * the target CPU if needed.
 */
static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
                          unsigned int idx, unsigned long bucket_expiry)
{

        hlist_add_head(&timer->entry, base->vectors + idx);
        __set_bit(idx, base->pending_map);
        timer_set_idx(timer, idx);

        trace_timer_start(timer, bucket_expiry);

        /*
         * Check whether this is the new first expiring timer. The
         * effective expiry time of the timer is required here
         * (bucket_expiry) instead of timer->expires.
         */
        if (time_before(bucket_expiry, base->next_expiry)) {
                /*
                 * Set the next expiry time and kick the CPU so it
                 * can reevaluate the wheel:
                 */
                base->next_expiry = bucket_expiry;
                base->timers_pending = true;
                base->next_expiry_recalc = false;
                trigger_dyntick_cpu(base, timer);
        }
}

static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
{
        unsigned long bucket_expiry;
        unsigned int idx;

        idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
        enqueue_timer(base, timer, idx, bucket_expiry);
}

#ifdef CONFIG_DEBUG_OBJECTS_TIMERS

static const struct debug_obj_descr timer_debug_descr;

struct timer_hint {
        void        (*function)(struct timer_list *t);
        long        offset;
};

#define TIMER_HINT(fn, container, timr, hintfn)                        \
        {                                                        \
                .function = fn,                                        \
                .offset          = offsetof(container, hintfn) -        \
                            offsetof(container, timr)                \
        }

static const struct timer_hint timer_hints[] = {
        TIMER_HINT(delayed_work_timer_fn,
                   struct delayed_work, timer, work.func),
        TIMER_HINT(kthread_delayed_work_timer_fn,
                   struct kthread_delayed_work, timer, work.func),
};

static void *timer_debug_hint(void *addr)
{
        struct timer_list *timer = addr;
        int i;

        for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
                if (timer_hints[i].function == timer->function) {
                        void (**fn)(void) = addr + timer_hints[i].offset;

                        return *fn;
                }
        }

        return timer->function;
}

static bool timer_is_static_object(void *addr)
{
        struct timer_list *timer = addr;

        return (timer->entry.pprev == NULL &&
                timer->entry.next == TIMER_ENTRY_STATIC);
}

/*
 * timer_fixup_init is called when:
 * - an active object is initialized
 */
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
{
        struct timer_list *timer = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                del_timer_sync(timer);
                debug_object_init(timer, &timer_debug_descr);
                return true;
        default:
                return false;
        }
}

/* Stub timer callback for improperly used timers. */
static void stub_timer(struct timer_list *unused)
{
        WARN_ON(1);
}

/*
 * timer_fixup_activate is called when:
 * - an active object is activated
 * - an unknown non-static object is activated
 */
static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
{
        struct timer_list *timer = addr;

        switch (state) {
        case ODEBUG_STATE_NOTAVAILABLE:
                timer_setup(timer, stub_timer, 0);
                return true;

        case ODEBUG_STATE_ACTIVE:
                WARN_ON(1);
                fallthrough;
        default:
                return false;
        }
}

/*
 * timer_fixup_free is called when:
 * - an active object is freed
 */
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
{
        struct timer_list *timer = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                del_timer_sync(timer);
                debug_object_free(timer, &timer_debug_descr);
                return true;
        default:
                return false;
        }
}

/*
 * timer_fixup_assert_init is called when:
 * - an untracked/uninit-ed object is found
 */
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
{
        struct timer_list *timer = addr;

        switch (state) {
        case ODEBUG_STATE_NOTAVAILABLE:
                timer_setup(timer, stub_timer, 0);
                return true;
        default:
                return false;
        }
}

static const struct debug_obj_descr timer_debug_descr = {
        .name                        = "timer_list",
        .debug_hint                = timer_debug_hint,
        .is_static_object        = timer_is_static_object,
        .fixup_init                = timer_fixup_init,
        .fixup_activate                = timer_fixup_activate,
        .fixup_free                = timer_fixup_free,
        .fixup_assert_init        = timer_fixup_assert_init,
};

static inline void debug_timer_init(struct timer_list *timer)
{
        debug_object_init(timer, &timer_debug_descr);
}

static inline void debug_timer_activate(struct timer_list *timer)
{
        debug_object_activate(timer, &timer_debug_descr);
}

static inline void debug_timer_deactivate(struct timer_list *timer)
{
        debug_object_deactivate(timer, &timer_debug_descr);
}

static inline void debug_timer_assert_init(struct timer_list *timer)
{
        debug_object_assert_init(timer, &timer_debug_descr);
}

static void do_init_timer(struct timer_list *timer,
                          void (*func)(struct timer_list *),
                          unsigned int flags,
                          const char *name, struct lock_class_key *key);

void init_timer_on_stack_key(struct timer_list *timer,
                             void (*func)(struct timer_list *),
                             unsigned int flags,
                             const char *name, struct lock_class_key *key)
{
        debug_object_init_on_stack(timer, &timer_debug_descr);
        do_init_timer(timer, func, flags, name, key);
}
EXPORT_SYMBOL_GPL(init_timer_on_stack_key);

void destroy_timer_on_stack(struct timer_list *timer)
{
        debug_object_free(timer, &timer_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_timer_on_stack);

#else
static inline void debug_timer_init(struct timer_list *timer) { }
static inline void debug_timer_activate(struct timer_list *timer) { }
static inline void debug_timer_deactivate(struct timer_list *timer) { }
static inline void debug_timer_assert_init(struct timer_list *timer) { }
#endif

static inline void debug_init(struct timer_list *timer)
{
        debug_timer_init(timer);
        trace_timer_init(timer);
}

static inline void debug_deactivate(struct timer_list *timer)
{
        debug_timer_deactivate(timer);
        trace_timer_cancel(timer);
}

static inline void debug_assert_init(struct timer_list *timer)
{
        debug_timer_assert_init(timer);
}

static void do_init_timer(struct timer_list *timer,
                          void (*func)(struct timer_list *),
                          unsigned int flags,
                          const char *name, struct lock_class_key *key)
{
        timer->entry.pprev = NULL;
        timer->function = func;
        if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
                flags &= TIMER_INIT_FLAGS;
        timer->flags = flags | raw_smp_processor_id();
        lockdep_init_map(&timer->lockdep_map, name, key, 0);
}

/**
 * init_timer_key - initialize a timer
 * @timer: the timer to be initialized
 * @func: timer callback function
 * @flags: timer flags
 * @name: name of the timer
 * @key: lockdep class key of the fake lock used for tracking timer
 *       sync lock dependencies
 *
 * init_timer_key() must be done to a timer prior to calling *any* of the
 * other timer functions.
 */
void init_timer_key(struct timer_list *timer,
                    void (*func)(struct timer_list *), unsigned int flags,
                    const char *name, struct lock_class_key *key)
{
        debug_init(timer);
        do_init_timer(timer, func, flags, name, key);
}
EXPORT_SYMBOL(init_timer_key);

static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
        struct hlist_node *entry = &timer->entry;

        debug_deactivate(timer);

        __hlist_del(entry);
        if (clear_pending)
                entry->pprev = NULL;
        entry->next = LIST_POISON2;
}

static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
                             bool clear_pending)
{
        unsigned idx = timer_get_idx(timer);

        if (!timer_pending(timer))
                return 0;

        if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
                __clear_bit(idx, base->pending_map);
                base->next_expiry_recalc = true;
        }

        detach_timer(timer, clear_pending);
        return 1;
}

static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
{
        int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
        struct timer_base *base;

        base = per_cpu_ptr(&timer_bases[index], cpu);

        /*
         * If the timer is deferrable and NO_HZ_COMMON is set then we need
         * to use the deferrable base.
         */
        if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
        return base;
}

static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
{
        int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL;
        struct timer_base *base;

        base = this_cpu_ptr(&timer_bases[index]);

        /*
         * If the timer is deferrable and NO_HZ_COMMON is set then we need
         * to use the deferrable base.
         */
        if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = this_cpu_ptr(&timer_bases[BASE_DEF]);
        return base;
}

static inline struct timer_base *get_timer_base(u32 tflags)
{
        return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
}

static inline void __forward_timer_base(struct timer_base *base,
                                        unsigned long basej)
{
        /*
         * Check whether we can forward the base. We can only do that when
         * @basej is past base->clk otherwise we might rewind base->clk.
         */
        if (time_before_eq(basej, base->clk))
                return;

        /*
         * If the next expiry value is > jiffies, then we fast forward to
         * jiffies otherwise we forward to the next expiry value.
         */
        if (time_after(base->next_expiry, basej)) {
                base->clk = basej;
        } else {
                if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
                        return;
                base->clk = base->next_expiry;
        }

}

static inline void forward_timer_base(struct timer_base *base)
{
        __forward_timer_base(base, READ_ONCE(jiffies));
}

/*
 * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
 * that all timers which are tied to this base are locked, and the base itself
 * is locked too.
 *
 * So __run_timers/migrate_timers can safely modify all timers which could
 * be found in the base->vectors array.
 *
 * When a timer is migrating then the TIMER_MIGRATING flag is set and we need
 * to wait until the migration is done.
 */
static struct timer_base *lock_timer_base(struct timer_list *timer,
                                          unsigned long *flags)
        __acquires(timer->base->lock)
{
        for (;;) {
                struct timer_base *base;
                u32 tf;

                /*
                 * We need to use READ_ONCE() here, otherwise the compiler
                 * might re-read @tf between the check for TIMER_MIGRATING
                 * and spin_lock().
                 */
                tf = READ_ONCE(timer->flags);

                if (!(tf & TIMER_MIGRATING)) {
                        base = get_timer_base(tf);
                        raw_spin_lock_irqsave(&base->lock, *flags);
                        if (timer->flags == tf)
                                return base;
                        raw_spin_unlock_irqrestore(&base->lock, *flags);
                }
                cpu_relax();
        }
}

#define MOD_TIMER_PENDING_ONLY                0x01
#define MOD_TIMER_REDUCE                0x02
#define MOD_TIMER_NOTPENDING                0x04

static inline int
__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
{
        unsigned long clk = 0, flags, bucket_expiry;
        struct timer_base *base, *new_base;
        unsigned int idx = UINT_MAX;
        int ret = 0;

        debug_assert_init(timer);

        /*
         * This is a common optimization triggered by the networking code - if
         * the timer is re-modified to have the same timeout or ends up in the
         * same array bucket then just return:
         */
        if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
                /*
                 * The downside of this optimization is that it can result in
                 * larger granularity than you would get from adding a new
                 * timer with this expiry.
                 */
                long diff = timer->expires - expires;

                if (!diff)
                        return 1;
                if (options & MOD_TIMER_REDUCE && diff <= 0)
                        return 1;

                /*
                 * We lock timer base and calculate the bucket index right
                 * here. If the timer ends up in the same bucket, then we
                 * just update the expiry time and avoid the whole
                 * dequeue/enqueue dance.
                 */
                base = lock_timer_base(timer, &flags);
                /*
                 * Has @timer been shutdown? This needs to be evaluated
                 * while holding base lock to prevent a race against the
                 * shutdown code.
                 */
                if (!timer->function)
                        goto out_unlock;

                forward_timer_base(base);

                if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
                    time_before_eq(timer->expires, expires)) {
                        ret = 1;
                        goto out_unlock;
                }

                clk = base->clk;
                idx = calc_wheel_index(expires, clk, &bucket_expiry);

                /*
                 * Retrieve and compare the array index of the pending
                 * timer. If it matches set the expiry to the new value so a
                 * subsequent call will exit in the expires check above.
                 */
                if (idx == timer_get_idx(timer)) {
                        if (!(options & MOD_TIMER_REDUCE))
                                timer->expires = expires;
                        else if (time_after(timer->expires, expires))
                                timer->expires = expires;
                        ret = 1;
                        goto out_unlock;
                }
        } else {
                base = lock_timer_base(timer, &flags);
                /*
                 * Has @timer been shutdown? This needs to be evaluated
                 * while holding base lock to prevent a race against the
                 * shutdown code.
                 */
                if (!timer->function)
                        goto out_unlock;

                forward_timer_base(base);
        }

        ret = detach_if_pending(timer, base, false);
        if (!ret && (options & MOD_TIMER_PENDING_ONLY))
                goto out_unlock;

        new_base = get_timer_this_cpu_base(timer->flags);

        if (base != new_base) {
                /*
                 * We are trying to schedule the timer on the new base.
                 * However we can't change timer's base while it is running,
                 * otherwise timer_delete_sync() can't detect that the timer's
                 * handler yet has not finished. This also guarantees that the
                 * timer is serialized wrt itself.
                 */
                if (likely(base->running_timer != timer)) {
                        /* See the comment in lock_timer_base() */
                        timer->flags |= TIMER_MIGRATING;

                        raw_spin_unlock(&base->lock);
                        base = new_base;
                        raw_spin_lock(&base->lock);
                        WRITE_ONCE(timer->flags,
                                   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
                        forward_timer_base(base);
                }
        }

        debug_timer_activate(timer);

        timer->expires = expires;
        /*
         * If 'idx' was calculated above and the base time did not advance
         * between calculating 'idx' and possibly switching the base, only
         * enqueue_timer() is required. Otherwise we need to (re)calculate
         * the wheel index via internal_add_timer().
         */
        if (idx != UINT_MAX && clk == base->clk)
                enqueue_timer(base, timer, idx, bucket_expiry);
        else
                internal_add_timer(base, timer);

out_unlock:
        raw_spin_unlock_irqrestore(&base->lock, flags);

        return ret;
}

/**
 * mod_timer_pending - Modify a pending timer's timeout
 * @timer:        The pending timer to be modified
 * @expires:        New absolute timeout in jiffies
 *
 * mod_timer_pending() is the same for pending timers as mod_timer(), but
 * will not activate inactive timers.
 *
 * If @timer->function == NULL then the start operation is silently
 * discarded.
 *
 * Return:
 * * %0 - The timer was inactive and not modified or was in
 *          shutdown state and the operation was discarded
 * * %1 - The timer was active and requeued to expire at @expires
 */
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
        return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
}
EXPORT_SYMBOL(mod_timer_pending);

/**
 * mod_timer - Modify a timer's timeout
 * @timer:        The timer to be modified
 * @expires:        New absolute timeout in jiffies
 *
 * mod_timer(timer, expires) is equivalent to:
 *
 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 *
 * mod_timer() is more efficient than the above open coded sequence. In
 * case that the timer is inactive, the del_timer() part is a NOP. The
 * timer is in any case activated with the new expiry time @expires.
 *
 * Note that if there are multiple unserialized concurrent users of the
 * same timer, then mod_timer() is the only safe way to modify the timeout,
 * since add_timer() cannot modify an already running timer.
 *
 * If @timer->function == NULL then the start operation is silently
 * discarded. In this case the return value is 0 and meaningless.
 *
 * Return:
 * * %0 - The timer was inactive and started or was in shutdown
 *          state and the operation was discarded
 * * %1 - The timer was active and requeued to expire at @expires or
 *          the timer was active and not modified because @expires did
 *          not change the effective expiry time
 */
int mod_timer(struct timer_list *timer, unsigned long expires)
{
        return __mod_timer(timer, expires, 0);
}
EXPORT_SYMBOL(mod_timer);

/**
 * timer_reduce - Modify a timer's timeout if it would reduce the timeout
 * @timer:        The timer to be modified
 * @expires:        New absolute timeout in jiffies
 *
 * timer_reduce() is very similar to mod_timer(), except that it will only
 * modify an enqueued timer if that would reduce the expiration time. If
 * @timer is not enqueued it starts the timer.
 *
 * If @timer->function == NULL then the start operation is silently
 * discarded.
 *
 * Return:
 * * %0 - The timer was inactive and started or was in shutdown
 *          state and the operation was discarded
 * * %1 - The timer was active and requeued to expire at @expires or
 *          the timer was active and not modified because @expires
 *          did not change the effective expiry time such that the
 *          timer would expire earlier than already scheduled
 */
int timer_reduce(struct timer_list *timer, unsigned long expires)
{
        return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
}
EXPORT_SYMBOL(timer_reduce);

/**
 * add_timer - Start a timer
 * @timer:        The timer to be started
 *
 * Start @timer to expire at @timer->expires in the future. @timer->expires
 * is the absolute expiry time measured in 'jiffies'. When the timer expires
 * timer->function(timer) will be invoked from soft interrupt context.
 *
 * The @timer->expires and @timer->function fields must be set prior
 * to calling this function.
 *
 * If @timer->function == NULL then the start operation is silently
 * discarded.
 *
 * If @timer->expires is already in the past @timer will be queued to
 * expire at the next timer tick.
 *
 * This can only operate on an inactive timer. Attempts to invoke this on
 * an active timer are rejected with a warning.
 */
void add_timer(struct timer_list *timer)
{
        if (WARN_ON_ONCE(timer_pending(timer)))
                return;
        __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
}
EXPORT_SYMBOL(add_timer);

/**
 * add_timer_local() - Start a timer on the local CPU
 * @timer:        The timer to be started
 *
 * Same as add_timer() except that the timer flag TIMER_PINNED is set.
 *
 * See add_timer() for further details.
 */
void add_timer_local(struct timer_list *timer)
{
        if (WARN_ON_ONCE(timer_pending(timer)))
                return;
        timer->flags |= TIMER_PINNED;
        __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
}
EXPORT_SYMBOL(add_timer_local);

/**
 * add_timer_global() - Start a timer without TIMER_PINNED flag set
 * @timer:        The timer to be started
 *
 * Same as add_timer() except that the timer flag TIMER_PINNED is unset.
 *
 * See add_timer() for further details.
 */
void add_timer_global(struct timer_list *timer)
{
        if (WARN_ON_ONCE(timer_pending(timer)))
                return;
        timer->flags &= ~TIMER_PINNED;
        __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
}
EXPORT_SYMBOL(add_timer_global);

/**
 * add_timer_on - Start a timer on a particular CPU
 * @timer:        The timer to be started
 * @cpu:        The CPU to start it on
 *
 * Same as add_timer() except that it starts the timer on the given CPU and
 * the TIMER_PINNED flag is set. When timer shouldn't be a pinned timer in
 * the next round, add_timer_global() should be used instead as it unsets
 * the TIMER_PINNED flag.
 *
 * See add_timer() for further details.
 */
void add_timer_on(struct timer_list *timer, int cpu)
{
        struct timer_base *new_base, *base;
        unsigned long flags;

        debug_assert_init(timer);

        if (WARN_ON_ONCE(timer_pending(timer)))
                return;

        /* Make sure timer flags have TIMER_PINNED flag set */
        timer->flags |= TIMER_PINNED;

        new_base = get_timer_cpu_base(timer->flags, cpu);

        /*
         * If @timer was on a different CPU, it should be migrated with the
         * old base locked to prevent other operations proceeding with the
         * wrong base locked.  See lock_timer_base().
         */
        base = lock_timer_base(timer, &flags);
        /*
         * Has @timer been shutdown? This needs to be evaluated while
         * holding base lock to prevent a race against the shutdown code.
         */
        if (!timer->function)
                goto out_unlock;

        if (base != new_base) {
                timer->flags |= TIMER_MIGRATING;

                raw_spin_unlock(&base->lock);
                base = new_base;
                raw_spin_lock(&base->lock);
                WRITE_ONCE(timer->flags,
                           (timer->flags & ~TIMER_BASEMASK) | cpu);
        }
        forward_timer_base(base);

        debug_timer_activate(timer);
        internal_add_timer(base, timer);
out_unlock:
        raw_spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);

/**
 * __timer_delete - Internal function: Deactivate a timer
 * @timer:        The timer to be deactivated
 * @shutdown:        If true, this indicates that the timer is about to be
 *                shutdown permanently.
 *
 * If @shutdown is true then @timer->function is set to NULL under the
 * timer base lock which prevents further rearming of the time. In that
 * case any attempt to rearm @timer after this function returns will be
 * silently ignored.
 *
 * Return:
 * * %0 - The timer was not pending
 * * %1 - The timer was pending and deactivated
 */
static int __timer_delete(struct timer_list *timer, bool shutdown)
{
        struct timer_base *base;
        unsigned long flags;
        int ret = 0;

        debug_assert_init(timer);

        /*
         * If @shutdown is set then the lock has to be taken whether the
         * timer is pending or not to protect against a concurrent rearm
         * which might hit between the lockless pending check and the lock
         * acquisition. By taking the lock it is ensured that such a newly
         * enqueued timer is dequeued and cannot end up with
         * timer->function == NULL in the expiry code.
         *
         * If timer->function is currently executed, then this makes sure
         * that the callback cannot requeue the timer.
         */
        if (timer_pending(timer) || shutdown) {
                base = lock_timer_base(timer, &flags);
                ret = detach_if_pending(timer, base, true);
                if (shutdown)
                        timer->function = NULL;
                raw_spin_unlock_irqrestore(&base->lock, flags);
        }

        return ret;
}

/**
 * timer_delete - Deactivate a timer
 * @timer:        The timer to be deactivated
 *
 * The function only deactivates a pending timer, but contrary to
 * timer_delete_sync() it does not take into account whether the timer's
 * callback function is concurrently executed on a different CPU or not.
 * It neither prevents rearming of the timer.  If @timer can be rearmed
 * concurrently then the return value of this function is meaningless.
 *
 * Return:
 * * %0 - The timer was not pending
 * * %1 - The timer was pending and deactivated
 */
int timer_delete(struct timer_list *timer)
{
        return __timer_delete(timer, false);
}
EXPORT_SYMBOL(timer_delete);

/**
 * timer_shutdown - Deactivate a timer and prevent rearming
 * @timer:        The timer to be deactivated
 *
 * The function does not wait for an eventually running timer callback on a
 * different CPU but it prevents rearming of the timer. Any attempt to arm
 * @timer after this function returns will be silently ignored.
 *
 * This function is useful for teardown code and should only be used when
 * timer_shutdown_sync() cannot be invoked due to locking or context constraints.
 *
 * Return:
 * * %0 - The timer was not pending
 * * %1 - The timer was pending
 */
int timer_shutdown(struct timer_list *timer)
{
        return __timer_delete(timer, true);
}
EXPORT_SYMBOL_GPL(timer_shutdown);

/**
 * __try_to_del_timer_sync - Internal function: Try to deactivate a timer
 * @timer:        Timer to deactivate
 * @shutdown:        If true, this indicates that the timer is about to be
 *                shutdown permanently.
 *
 * If @shutdown is true then @timer->function is set to NULL under the
 * timer base lock which prevents further rearming of the timer. Any
 * attempt to rearm @timer after this function returns will be silently
 * ignored.
 *
 * This function cannot guarantee that the timer cannot be rearmed
 * right after dropping the base lock if @shutdown is false. That
 * needs to be prevented by the calling code if necessary.
 *
 * Return:
 * * %0  - The timer was not pending
 * * %1  - The timer was pending and deactivated
 * * %-1 - The timer callback function is running on a different CPU
 */
static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown)
{
        struct timer_base *base;
        unsigned long flags;
        int ret = -1;

        debug_assert_init(timer);

        base = lock_timer_base(timer, &flags);

        if (base->running_timer != timer)
                ret = detach_if_pending(timer, base, true);
        if (shutdown)
                timer->function = NULL;

        raw_spin_unlock_irqrestore(&base->lock, flags);

        return ret;
}

/**
 * try_to_del_timer_sync - Try to deactivate a timer
 * @timer:        Timer to deactivate
 *
 * This function tries to deactivate a timer. On success the timer is not
 * queued and the timer callback function is not running on any CPU.
 *
 * This function does not guarantee that the timer cannot be rearmed right
 * after dropping the base lock. That needs to be prevented by the calling
 * code if necessary.
 *
 * Return:
 * * %0  - The timer was not pending
 * * %1  - The timer was pending and deactivated
 * * %-1 - The timer callback function is running on a different CPU
 */
int try_to_del_timer_sync(struct timer_list *timer)
{
        return __try_to_del_timer_sync(timer, false);
}
EXPORT_SYMBOL(try_to_del_timer_sync);

#ifdef CONFIG_PREEMPT_RT
static __init void timer_base_init_expiry_lock(struct timer_base *base)
{
        spin_lock_init(&base->expiry_lock);
}

static inline void timer_base_lock_expiry(struct timer_base *base)
{
        spin_lock(&base->expiry_lock);
}

static inline void timer_base_unlock_expiry(struct timer_base *base)
{
        spin_unlock(&base->expiry_lock);
}

/*
 * The counterpart to del_timer_wait_running().
 *
 * If there is a waiter for base->expiry_lock, then it was waiting for the
 * timer callback to finish. Drop expiry_lock and reacquire it. That allows
 * the waiter to acquire the lock and make progress.
 */
static void timer_sync_wait_running(struct timer_base *base)
{
        if (atomic_read(&base->timer_waiters)) {
                raw_spin_unlock_irq(&base->lock);
                spin_unlock(&base->expiry_lock);
                spin_lock(&base->expiry_lock);
                raw_spin_lock_irq(&base->lock);
        }
}

/*
 * This function is called on PREEMPT_RT kernels when the fast path
 * deletion of a timer failed because the timer callback function was
 * running.
 *
 * This prevents priority inversion, if the softirq thread on a remote CPU
 * got preempted, and it prevents a life lock when the task which tries to
 * delete a timer preempted the softirq thread running the timer callback
 * function.
 */
static void del_timer_wait_running(struct timer_list *timer)
{
        u32 tf;

        tf = READ_ONCE(timer->flags);
        if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) {
                struct timer_base *base = get_timer_base(tf);

                /*
                 * Mark the base as contended and grab the expiry lock,
                 * which is held by the softirq across the timer
                 * callback. Drop the lock immediately so the softirq can
                 * expire the next timer. In theory the timer could already
                 * be running again, but that's more than unlikely and just
                 * causes another wait loop.
                 */
                atomic_inc(&base->timer_waiters);
                spin_lock_bh(&base->expiry_lock);
                atomic_dec(&base->timer_waiters);
                spin_unlock_bh(&base->expiry_lock);
        }
}
#else
static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
static inline void timer_base_lock_expiry(struct timer_base *base) { }
static inline void timer_base_unlock_expiry(struct timer_base *base) { }
static inline void timer_sync_wait_running(struct timer_base *base) { }
static inline void del_timer_wait_running(struct timer_list *timer) { }
#endif

/**
 * __timer_delete_sync - Internal function: Deactivate a timer and wait
 *                         for the handler to finish.
 * @timer:        The timer to be deactivated
 * @shutdown:        If true, @timer->function will be set to NULL under the
 *                timer base lock which prevents rearming of @timer
 *
 * If @shutdown is not set the timer can be rearmed later. If the timer can
 * be rearmed concurrently, i.e. after dropping the base lock then the
 * return value is meaningless.
 *
 * If @shutdown is set then @timer->function is set to NULL under timer
 * base lock which prevents rearming of the timer. Any attempt to rearm
 * a shutdown timer is silently ignored.
 *
 * If the timer should be reused after shutdown it has to be initialized
 * again.
 *
 * Return:
 * * %0        - The timer was not pending
 * * %1        - The timer was pending and deactivated
 */
static int __timer_delete_sync(struct timer_list *timer, bool shutdown)
{
        int ret;

#ifdef CONFIG_LOCKDEP
        unsigned long flags;

        /*
         * If lockdep gives a backtrace here, please reference
         * the synchronization rules above.
         */
        local_irq_save(flags);
        lock_map_acquire(&timer->lockdep_map);
        lock_map_release(&timer->lockdep_map);
        local_irq_restore(flags);
#endif
        /*
         * don't use it in hardirq context, because it
         * could lead to deadlock.
         */
        WARN_ON(in_hardirq() && !(timer->flags & TIMER_IRQSAFE));

        /*
         * Must be able to sleep on PREEMPT_RT because of the slowpath in
         * del_timer_wait_running().
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE))
                lockdep_assert_preemption_enabled();

        do {
                ret = __try_to_del_timer_sync(timer, shutdown);

                if (unlikely(ret < 0)) {
                        del_timer_wait_running(timer);
                        cpu_relax();
                }
        } while (ret < 0);

        return ret;
}

/**
 * timer_delete_sync - Deactivate a timer and wait for the handler to finish.
 * @timer:        The timer to be deactivated
 *
 * Synchronization rules: Callers must prevent restarting of the timer,
 * otherwise this function is meaningless. It must not be called from
 * interrupt contexts unless the timer is an irqsafe one. The caller must
 * not hold locks which would prevent completion of the timer's callback
 * function. The timer's handler must not call add_timer_on(). Upon exit
 * the timer is not queued and the handler is not running on any CPU.
 *
 * For !irqsafe timers, the caller must not hold locks that are held in
 * interrupt context. Even if the lock has nothing to do with the timer in
 * question.  Here's why::
 *
 *    CPU0                             CPU1
 *    ----                             ----
 *                                     <SOFTIRQ>
 *                                       call_timer_fn();
 *                                       base->running_timer = mytimer;
 *    spin_lock_irq(somelock);
 *                                     <IRQ>
 *                                        spin_lock(somelock);
 *    timer_delete_sync(mytimer);
 *    while (base->running_timer == mytimer);
 *
 * Now timer_delete_sync() will never return and never release somelock.
 * The interrupt on the other CPU is waiting to grab somelock but it has
 * interrupted the softirq that CPU0 is waiting to finish.
 *
 * This function cannot guarantee that the timer is not rearmed again by
 * some concurrent or preempting code, right after it dropped the base
 * lock. If there is the possibility of a concurrent rearm then the return
 * value of the function is meaningless.
 *
 * If such a guarantee is needed, e.g. for teardown situations then use
 * timer_shutdown_sync() instead.
 *
 * Return:
 * * %0        - The timer was not pending
 * * %1        - The timer was pending and deactivated
 */
int timer_delete_sync(struct timer_list *timer)
{
        return __timer_delete_sync(timer, false);
}
EXPORT_SYMBOL(timer_delete_sync);

/**
 * timer_shutdown_sync - Shutdown a timer and prevent rearming
 * @timer: The timer to be shutdown
 *
 * When the function returns it is guaranteed that:
 *   - @timer is not queued
 *   - The callback function of @timer is not running
 *   - @timer cannot be enqueued again. Any attempt to rearm
 *     @timer is silently ignored.
 *
 * See timer_delete_sync() for synchronization rules.
 *
 * This function is useful for final teardown of an infrastructure where
 * the timer is subject to a circular dependency problem.
 *
 * A common pattern for this is a timer and a workqueue where the timer can
 * schedule work and work can arm the timer. On shutdown the workqueue must
 * be destroyed and the timer must be prevented from rearming. Unless the
 * code has conditionals like 'if (mything->in_shutdown)' to prevent that
 * there is no way to get this correct with timer_delete_sync().
 *
 * timer_shutdown_sync() is solving the problem. The correct ordering of
 * calls in this case is:
 *
 *        timer_shutdown_sync(&mything->timer);
 *        workqueue_destroy(&mything->workqueue);
 *
 * After this 'mything' can be safely freed.
 *
 * This obviously implies that the timer is not required to be functional
 * for the rest of the shutdown operation.
 *
 * Return:
 * * %0 - The timer was not pending
 * * %1 - The timer was pending
 */
int timer_shutdown_sync(struct timer_list *timer)
{
        return __timer_delete_sync(timer, true);
}
EXPORT_SYMBOL_GPL(timer_shutdown_sync);

static void call_timer_fn(struct timer_list *timer,
                          void (*fn)(struct timer_list *),
                          unsigned long baseclk)
{
        int count = preempt_count();

#ifdef CONFIG_LOCKDEP
        /*
         * It is permissible to free the timer from inside the
         * function that is called from it, this we need to take into
         * account for lockdep too. To avoid bogus "held lock freed"
         * warnings as well as problems when looking into
         * timer->lockdep_map, make a copy and use that here.
         */
        struct lockdep_map lockdep_map;

        lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
#endif
        /*
         * Couple the lock chain with the lock chain at
         * timer_delete_sync() by acquiring the lock_map around the fn()
         * call here and in timer_delete_sync().
         */
        lock_map_acquire(&lockdep_map);

        trace_timer_expire_entry(timer, baseclk);
        fn(timer);
        trace_timer_expire_exit(timer);

        lock_map_release(&lockdep_map);

        if (count != preempt_count()) {
                WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
                          fn, count, preempt_count());
                /*
                 * Restore the preempt count. That gives us a decent
                 * chance to survive and extract information. If the
                 * callback kept a lock held, bad luck, but not worse
                 * than the BUG() we had.
                 */
                preempt_count_set(count);
        }
}

static void expire_timers(struct timer_base *base, struct hlist_head *head)
{
        /*
         * This value is required only for tracing. base->clk was
         * incremented directly before expire_timers was called. But expiry
         * is related to the old base->clk value.
         */
        unsigned long baseclk = base->clk - 1;

        while (!hlist_empty(head)) {
                struct timer_list *timer;
                void (*fn)(struct timer_list *);

                timer = hlist_entry(head->first, struct timer_list, entry);

                base->running_timer = timer;
                detach_timer(timer, true);

                fn = timer->function;

                if (WARN_ON_ONCE(!fn)) {
                        /* Should never happen. Emphasis on should! */
                        base->running_timer = NULL;
                        continue;
                }

                if (timer->flags & TIMER_IRQSAFE) {
                        raw_spin_unlock(&base->lock);
                        call_timer_fn(timer, fn, baseclk);
                        raw_spin_lock(&base->lock);
                        base->running_timer = NULL;
                } else {
                        raw_spin_unlock_irq(&base->lock);
                        call_timer_fn(timer, fn, baseclk);
                        raw_spin_lock_irq(&base->lock);
                        base->running_timer = NULL;
                        timer_sync_wait_running(base);
                }
        }
}

static int collect_expired_timers(struct timer_base *base,
                                  struct hlist_head *heads)
{
        unsigned long clk = base->clk = base->next_expiry;
        struct hlist_head *vec;
        int i, levels = 0;
        unsigned int idx;

        for (i = 0; i < LVL_DEPTH; i++) {
                idx = (clk & LVL_MASK) + i * LVL_SIZE;

                if (__test_and_clear_bit(idx, base->pending_map)) {
                        vec = base->vectors + idx;
                        hlist_move_list(vec, heads++);
                        levels++;
                }
                /* Is it time to look at the next level? */
                if (clk & LVL_CLK_MASK)
                        break;
                /* Shift clock for the next level granularity */
                clk >>= LVL_CLK_SHIFT;
        }
        return levels;
}

/*
 * Find the next pending bucket of a level. Search from level start (@offset)
 * + @clk upwards and if nothing there, search from start of the level
 * (@offset) up to @offset + clk.
 */
static int next_pending_bucket(struct timer_base *base, unsigned offset,
                               unsigned clk)
{
        unsigned pos, start = offset + clk;
        unsigned end = offset + LVL_SIZE;

        pos = find_next_bit(base->pending_map, end, start);
        if (pos < end)
                return pos - start;

        pos = find_next_bit(base->pending_map, start, offset);
        return pos < start ? pos + LVL_SIZE - start : -1;
}

/*
 * Search the first expiring timer in the various clock levels. Caller must
 * hold base->lock.
 *
 * Store next expiry time in base->next_expiry.
 */
static void next_expiry_recalc(struct timer_base *base)
{
        unsigned long clk, next, adj;
        unsigned lvl, offset = 0;

        next = base->clk + NEXT_TIMER_MAX_DELTA;
        clk = base->clk;
        for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
                int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
                unsigned long lvl_clk = clk & LVL_CLK_MASK;

                if (pos >= 0) {
                        unsigned long tmp = clk + (unsigned long) pos;

                        tmp <<= LVL_SHIFT(lvl);
                        if (time_before(tmp, next))
                                next = tmp;

                        /*
                         * If the next expiration happens before we reach
                         * the next level, no need to check further.
                         */
                        if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
                                break;
                }
                /*
                 * Clock for the next level. If the current level clock lower
                 * bits are zero, we look at the next level as is. If not we
                 * need to advance it by one because that's going to be the
                 * next expiring bucket in that level. base->clk is the next
                 * expiring jiffie. So in case of:
                 *
                 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
                 *  0    0    0    0    0    0
                 *
                 * we have to look at all levels @index 0. With
                 *
                 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
                 *  0    0    0    0    0    2
                 *
                 * LVL0 has the next expiring bucket @index 2. The upper
                 * levels have the next expiring bucket @index 1.
                 *
                 * In case that the propagation wraps the next level the same
                 * rules apply:
                 *
                 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
                 *  0    0    0    0    F    2
                 *
                 * So after looking at LVL0 we get:
                 *
                 * LVL5 LVL4 LVL3 LVL2 LVL1
                 *  0    0    0    1    0
                 *
                 * So no propagation from LVL1 to LVL2 because that happened
                 * with the add already, but then we need to propagate further
                 * from LVL2 to LVL3.
                 *
                 * So the simple check whether the lower bits of the current
                 * level are 0 or not is sufficient for all cases.
                 */
                adj = lvl_clk ? 1 : 0;
                clk >>= LVL_CLK_SHIFT;
                clk += adj;
        }

        base->next_expiry = next;
        base->next_expiry_recalc = false;
        base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
}

#ifdef CONFIG_NO_HZ_COMMON
/*
 * Check, if the next hrtimer event is before the next timer wheel
 * event:
 */
static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
{
        u64 nextevt = hrtimer_get_next_event();

        /*
         * If high resolution timers are enabled
         * hrtimer_get_next_event() returns KTIME_MAX.
         */
        if (expires <= nextevt)
                return expires;

        /*
         * If the next timer is already expired, return the tick base
         * time so the tick is fired immediately.
         */
        if (nextevt <= basem)
                return basem;

        /*
         * Round up to the next jiffie. High resolution timers are
         * off, so the hrtimers are expired in the tick and we need to
         * make sure that this tick really expires the timer to avoid
         * a ping pong of the nohz stop code.
         *
         * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
         */
        return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}

static unsigned long next_timer_interrupt(struct timer_base *base,
                                          unsigned long basej)
{
        if (base->next_expiry_recalc)
                next_expiry_recalc(base);

        /*
         * Move next_expiry for the empty base into the future to prevent an
         * unnecessary raise of the timer softirq when the next_expiry value
         * will be reached even if there is no timer pending.
         *
         * This update is also required to make timer_base::next_expiry values
         * easy comparable to find out which base holds the first pending timer.
         */
        if (!base->timers_pending)
                base->next_expiry = basej + NEXT_TIMER_MAX_DELTA;

        return base->next_expiry;
}

static unsigned long fetch_next_timer_interrupt(unsigned long basej, u64 basem,
                                                struct timer_base *base_local,
                                                struct timer_base *base_global,
                                                struct timer_events *tevt)
{
        unsigned long nextevt, nextevt_local, nextevt_global;
        bool local_first;

        nextevt_local = next_timer_interrupt(base_local, basej);
        nextevt_global = next_timer_interrupt(base_global, basej);

        local_first = time_before_eq(nextevt_local, nextevt_global);

        nextevt = local_first ? nextevt_local : nextevt_global;

        /*
         * If the @nextevt is at max. one tick away, use @nextevt and store
         * it in the local expiry value. The next global event is irrelevant in
         * this case and can be left as KTIME_MAX.
         */
        if (time_before_eq(nextevt, basej + 1)) {
                /* If we missed a tick already, force 0 delta */
                if (time_before(nextevt, basej))
                        nextevt = basej;
                tevt->local = basem + (u64)(nextevt - basej) * TICK_NSEC;

                /*
                 * This is required for the remote check only but it doesn't
                 * hurt, when it is done for both call sites:
                 *
                 * * The remote callers will only take care of the global timers
                 *   as local timers will be handled by CPU itself. When not
                 *   updating tevt->global with the already missed first global
                 *   timer, it is possible that it will be missed completely.
                 *
                 * * The local callers will ignore the tevt->global anyway, when
                 *   nextevt is max. one tick away.
                 */
                if (!local_first)
                        tevt->global = tevt->local;
                return nextevt;
        }

        /*
         * Update tevt.* values:
         *
         * If the local queue expires first, then the global event can be
         * ignored. If the global queue is empty, nothing to do either.
         */
        if (!local_first && base_global->timers_pending)
                tevt->global = basem + (u64)(nextevt_global - basej) * TICK_NSEC;

        if (base_local->timers_pending)
                tevt->local = basem + (u64)(nextevt_local - basej) * TICK_NSEC;

        return nextevt;
}

# ifdef CONFIG_SMP
/**
 * fetch_next_timer_interrupt_remote() - Store next timers into @tevt
 * @basej:        base time jiffies
 * @basem:        base time clock monotonic
 * @tevt:        Pointer to the storage for the expiry values
 * @cpu:        Remote CPU
 *
 * Stores the next pending local and global timer expiry values in the
 * struct pointed to by @tevt. If a queue is empty the corresponding
 * field is set to KTIME_MAX. If local event expires before global
 * event, global event is set to KTIME_MAX as well.
 *
 * Caller needs to make sure timer base locks are held (use
 * timer_lock_remote_bases() for this purpose).
 */
void fetch_next_timer_interrupt_remote(unsigned long basej, u64 basem,
                                       struct timer_events *tevt,
                                       unsigned int cpu)
{
        struct timer_base *base_local, *base_global;

        /* Preset local / global events */
        tevt->local = tevt->global = KTIME_MAX;

        base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
        base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);

        lockdep_assert_held(&base_local->lock);
        lockdep_assert_held(&base_global->lock);

        fetch_next_timer_interrupt(basej, basem, base_local, base_global, tevt);
}

/**
 * timer_unlock_remote_bases - unlock timer bases of cpu
 * @cpu:        Remote CPU
 *
 * Unlocks the remote timer bases.
 */
void timer_unlock_remote_bases(unsigned int cpu)
        __releases(timer_bases[BASE_LOCAL]->lock)
        __releases(timer_bases[BASE_GLOBAL]->lock)
{
        struct timer_base *base_local, *base_global;

        base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
        base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);

        raw_spin_unlock(&base_global->lock);
        raw_spin_unlock(&base_local->lock);
}

/**
 * timer_lock_remote_bases - lock timer bases of cpu
 * @cpu:        Remote CPU
 *
 * Locks the remote timer bases.
 */
void timer_lock_remote_bases(unsigned int cpu)
        __acquires(timer_bases[BASE_LOCAL]->lock)
        __acquires(timer_bases[BASE_GLOBAL]->lock)
{
        struct timer_base *base_local, *base_global;

        base_local = per_cpu_ptr(&timer_bases[BASE_LOCAL], cpu);
        base_global = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);

        lockdep_assert_irqs_disabled();

        raw_spin_lock(&base_local->lock);
        raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);
}

/**
 * timer_base_is_idle() - Return whether timer base is set idle
 *
 * Returns value of local timer base is_idle value.
 */
bool timer_base_is_idle(void)
{
        return __this_cpu_read(timer_bases[BASE_LOCAL].is_idle);
}

static void __run_timer_base(struct timer_base *base);

/**
 * timer_expire_remote() - expire global timers of cpu
 * @cpu:        Remote CPU
 *
 * Expire timers of global base of remote CPU.
 */
void timer_expire_remote(unsigned int cpu)
{
        struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_GLOBAL], cpu);

        __run_timer_base(base);
}

static void timer_use_tmigr(unsigned long basej, u64 basem,
                            unsigned long *nextevt, bool *tick_stop_path,
                            bool timer_base_idle, struct timer_events *tevt)
{
        u64 next_tmigr;

        if (timer_base_idle)
                next_tmigr = tmigr_cpu_new_timer(tevt->global);
        else if (tick_stop_path)
                next_tmigr = tmigr_cpu_deactivate(tevt->global);
        else
                next_tmigr = tmigr_quick_check(tevt->global);

        /*
         * If the CPU is the last going idle in timer migration hierarchy, make
         * sure the CPU will wake up in time to handle remote timers.
         * next_tmigr == KTIME_MAX if other CPUs are still active.
         */
        if (next_tmigr < tevt->local) {
                u64 tmp;

                /* If we missed a tick already, force 0 delta */
                if (next_tmigr < basem)
                        next_tmigr = basem;

                tmp = div_u64(next_tmigr - basem, TICK_NSEC);

                *nextevt = basej + (unsigned long)tmp;
                tevt->local = next_tmigr;
        }
}
# else
static void timer_use_tmigr(unsigned long basej, u64 basem,
                            unsigned long *nextevt, bool *tick_stop_path,
                            bool timer_base_idle, struct timer_events *tevt)
{
        /*
         * Make sure first event is written into tevt->local to not miss a
         * timer on !SMP systems.
         */
        tevt->local = min_t(u64, tevt->local, tevt->global);
}
# endif /* CONFIG_SMP */

static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
                                             bool *idle)
{
        struct timer_events tevt = { .local = KTIME_MAX, .global = KTIME_MAX };
        struct timer_base *base_local, *base_global;
        unsigned long nextevt;
        bool idle_is_possible;

        /*
         * When the CPU is offline, the tick is cancelled and nothing is supposed
         * to try to stop it.
         */
        if (WARN_ON_ONCE(cpu_is_offline(smp_processor_id()))) {
                if (idle)
                        *idle = true;
                return tevt.local;
        }

        base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]);
        base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]);

        raw_spin_lock(&base_local->lock);
        raw_spin_lock_nested(&base_global->lock, SINGLE_DEPTH_NESTING);

        nextevt = fetch_next_timer_interrupt(basej, basem, base_local,
                                             base_global, &tevt);

        /*
         * If the next event is only one jiffie ahead there is no need to call
         * timer migration hierarchy related functions. The value for the next
         * global timer in @tevt struct equals then KTIME_MAX. This is also
         * true, when the timer base is idle.
         *
         * The proper timer migration hierarchy function depends on the callsite
         * and whether timer base is idle or not. @nextevt will be updated when
         * this CPU needs to handle the first timer migration hierarchy
         * event. See timer_use_tmigr() for detailed information.
         */
        idle_is_possible = time_after(nextevt, basej + 1);
        if (idle_is_possible)
                timer_use_tmigr(basej, basem, &nextevt, idle,
                                base_local->is_idle, &tevt);

        /*
         * We have a fresh next event. Check whether we can forward the
         * base.
         */
        __forward_timer_base(base_local, basej);
        __forward_timer_base(base_global, basej);

        /*
         * Set base->is_idle only when caller is timer_base_try_to_set_idle()
         */
        if (idle) {
                /*
                 * Bases are idle if the next event is more than a tick
                 * away. Caution: @nextevt could have changed by enqueueing a
                 * global timer into timer migration hierarchy. Therefore a new
                 * check is required here.
                 *
                 * If the base is marked idle then any timer add operation must
                 * forward the base clk itself to keep granularity small. This
                 * idle logic is only maintained for the BASE_LOCAL and
                 * BASE_GLOBAL base, deferrable timers may still see large
                 * granularity skew (by design).
                 */
                if (!base_local->is_idle && time_after(nextevt, basej + 1)) {
                        base_local->is_idle = true;
                        /*
                         * Global timers queued locally while running in a task
                         * in nohz_full mode need a self-IPI to kick reprogramming
                         * in IRQ tail.
                         */
                        if (tick_nohz_full_cpu(base_local->cpu))
                                base_global->is_idle = true;
                        trace_timer_base_idle(true, base_local->cpu);
                }
                *idle = base_local->is_idle;

                /*
                 * When timer base is not set idle, undo the effect of
                 * tmigr_cpu_deactivate() to prevent inconsistent states - active
                 * timer base but inactive timer migration hierarchy.
                 *
                 * When timer base was already marked idle, nothing will be
                 * changed here.
                 */
                if (!base_local->is_idle && idle_is_possible)
                        tmigr_cpu_activate();
        }

        raw_spin_unlock(&base_global->lock);
        raw_spin_unlock(&base_local->lock);

        return cmp_next_hrtimer_event(basem, tevt.local);
}

/**
 * get_next_timer_interrupt() - return the time (clock mono) of the next timer
 * @basej:        base time jiffies
 * @basem:        base time clock monotonic
 *
 * Returns the tick aligned clock monotonic time of the next pending timer or
 * KTIME_MAX if no timer is pending. If timer of global base was queued into
 * timer migration hierarchy, first global timer is not taken into account. If
 * it was the last CPU of timer migration hierarchy going idle, first global
 * event is taken into account.
 */
u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
{
        return __get_next_timer_interrupt(basej, basem, NULL);
}

/**
 * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases
 * @basej:        base time jiffies
 * @basem:        base time clock monotonic
 * @idle:        pointer to store the value of timer_base->is_idle on return;
 *                *idle contains the information whether tick was already stopped
 *
 * Returns the tick aligned clock monotonic time of the next pending timer or
 * KTIME_MAX if no timer is pending. When tick was already stopped KTIME_MAX is
 * returned as well.
 */
u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
{
        if (*idle)
                return KTIME_MAX;

        return __get_next_timer_interrupt(basej, basem, idle);
}

/**
 * timer_clear_idle - Clear the idle state of the timer base
 *
 * Called with interrupts disabled
 */
void timer_clear_idle(void)
{
        /*
         * We do this unlocked. The worst outcome is a remote pinned timer
         * enqueue sending a pointless IPI, but taking the lock would just
         * make the window for sending the IPI a few instructions smaller
         * for the cost of taking the lock in the exit from idle
         * path. Required for BASE_LOCAL only.
         */
        __this_cpu_write(timer_bases[BASE_LOCAL].is_idle, false);
        if (tick_nohz_full_cpu(smp_processor_id()))
                __this_cpu_write(timer_bases[BASE_GLOBAL].is_idle, false);
        trace_timer_base_idle(false, smp_processor_id());

        /* Activate without holding the timer_base->lock */
        tmigr_cpu_activate();
}
#endif

/**
 * __run_timers - run all expired timers (if any) on this CPU.
 * @base: the timer vector to be processed.
 */
static inline void __run_timers(struct timer_base *base)
{
        struct hlist_head heads[LVL_DEPTH];
        int levels;

        lockdep_assert_held(&base->lock);

        if (base->running_timer)
                return;

        while (time_after_eq(jiffies, base->clk) &&
               time_after_eq(jiffies, base->next_expiry)) {
                levels = collect_expired_timers(base, heads);
                /*
                 * The two possible reasons for not finding any expired
                 * timer at this clk are that all matching timers have been
                 * dequeued or no timer has been queued since
                 * base::next_expiry was set to base::clk +
                 * NEXT_TIMER_MAX_DELTA.
                 */
                WARN_ON_ONCE(!levels && !base->next_expiry_recalc
                             && base->timers_pending);
                /*
                 * While executing timers, base->clk is set 1 offset ahead of
                 * jiffies to avoid endless requeuing to current jiffies.
                 */
                base->clk++;
                next_expiry_recalc(base);

                while (levels--)
                        expire_timers(base, heads + levels);
        }
}

static void __run_timer_base(struct timer_base *base)
{
        if (time_before(jiffies, base->next_expiry))
                return;

        timer_base_lock_expiry(base);
        raw_spin_lock_irq(&base->lock);
        __run_timers(base);
        raw_spin_unlock_irq(&base->lock);
        timer_base_unlock_expiry(base);
}

static void run_timer_base(int index)
{
        struct timer_base *base = this_cpu_ptr(&timer_bases[index]);

        __run_timer_base(base);
}

/*
 * This function runs timers and the timer-tq in bottom half context.
 */
static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
        run_timer_base(BASE_LOCAL);
        if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) {
                run_timer_base(BASE_GLOBAL);
                run_timer_base(BASE_DEF);

                if (is_timers_nohz_active())
                        tmigr_handle_remote();
        }
}

/*
 * Called by the local, per-CPU timer interrupt on SMP.
 */
static void run_local_timers(void)
{
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]);

        hrtimer_run_queues();

        for (int i = 0; i < NR_BASES; i++, base++) {
                /* Raise the softirq only if required. */
                if (time_after_eq(jiffies, base->next_expiry) ||
                    (i == BASE_DEF && tmigr_requires_handle_remote())) {
                        raise_softirq(TIMER_SOFTIRQ);
                        return;
                }
        }
}

/*
 * Called from the timer interrupt handler to charge one tick to the current
 * process.  user_tick is 1 if the tick is user time, 0 for system.
 */
void update_process_times(int user_tick)
{
        struct task_struct *p = current;

        /* Note: this timer irq context must be accounted for as well. */
        account_process_tick(p, user_tick);
        run_local_timers();
        rcu_sched_clock_irq(user_tick);
#ifdef CONFIG_IRQ_WORK
        if (in_irq())
                irq_work_tick();
#endif
        scheduler_tick();
        if (IS_ENABLED(CONFIG_POSIX_TIMERS))
                run_posix_cpu_timers();
}

/*
 * Since schedule_timeout()'s timer is defined on the stack, it must store
 * the target task on the stack as well.
 */
struct process_timer {
        struct timer_list timer;
        struct task_struct *task;
};

static void process_timeout(struct timer_list *t)
{
        struct process_timer *timeout = from_timer(timeout, t, timer);

        wake_up_process(timeout->task);
}

/**
 * schedule_timeout - sleep until timeout
 * @timeout: timeout value in jiffies
 *
 * Make the current task sleep until @timeout jiffies have elapsed.
 * The function behavior depends on the current task state
 * (see also set_current_state() description):
 *
 * %TASK_RUNNING - the scheduler is called, but the task does not sleep
 * at all. That happens because sched_submit_work() does nothing for
 * tasks in %TASK_RUNNING state.
 *
 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
 * pass before the routine returns unless the current task is explicitly
 * woken up, (e.g. by wake_up_process()).
 *
 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
 * delivered to the current task or the current task is explicitly woken
 * up.
 *
 * The current task state is guaranteed to be %TASK_RUNNING when this
 * routine returns.
 *
 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
 * the CPU away without a bound on the timeout. In this case the return
 * value will be %MAX_SCHEDULE_TIMEOUT.
 *
 * Returns 0 when the timer has expired otherwise the remaining time in
 * jiffies will be returned. In all cases the return value is guaranteed
 * to be non-negative.
 */
signed long __sched schedule_timeout(signed long timeout)
{
        struct process_timer timer;
        unsigned long expire;

        switch (timeout)
        {
        case MAX_SCHEDULE_TIMEOUT:
                /*
                 * These two special cases are useful to be comfortable
                 * in the caller. Nothing more. We could take
                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
                 * but I' d like to return a valid offset (>=0) to allow
                 * the caller to do everything it want with the retval.
                 */
                schedule();
                goto out;
        default:
                /*
                 * Another bit of PARANOID. Note that the retval will be
                 * 0 since no piece of kernel is supposed to do a check
                 * for a negative retval of schedule_timeout() (since it
                 * should never happens anyway). You just have the printk()
                 * that will tell you if something is gone wrong and where.
                 */
                if (timeout < 0) {
                        printk(KERN_ERR "schedule_timeout: wrong timeout "
                                "value %lx\n", timeout);
                        dump_stack();
                        __set_current_state(TASK_RUNNING);
                        goto out;
                }
        }

        expire = timeout + jiffies;

        timer.task = current;
        timer_setup_on_stack(&timer.timer, process_timeout, 0);
        __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
        schedule();
        del_timer_sync(&timer.timer);

        /* Remove the timer from the object tracker */
        destroy_timer_on_stack(&timer.timer);

        timeout = expire - jiffies;

 out:
        return timeout < 0 ? 0 : timeout;
}
EXPORT_SYMBOL(schedule_timeout);

/*
 * We can use __set_current_state() here because schedule_timeout() calls
 * schedule() unconditionally.
 */
signed long __sched schedule_timeout_interruptible(signed long timeout)
{
        __set_current_state(TASK_INTERRUPTIBLE);
        return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_interruptible);

signed long __sched schedule_timeout_killable(signed long timeout)
{
        __set_current_state(TASK_KILLABLE);
        return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_killable);

signed long __sched schedule_timeout_uninterruptible(signed long timeout)
{
        __set_current_state(TASK_UNINTERRUPTIBLE);
        return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);

/*
 * Like schedule_timeout_uninterruptible(), except this task will not contribute
 * to load average.
 */
signed long __sched schedule_timeout_idle(signed long timeout)
{
        __set_current_state(TASK_IDLE);
        return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_idle);

#ifdef CONFIG_HOTPLUG_CPU
static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
{
        struct timer_list *timer;
        int cpu = new_base->cpu;

        while (!hlist_empty(head)) {
                timer = hlist_entry(head->first, struct timer_list, entry);
                detach_timer(timer, false);
                timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
                internal_add_timer(new_base, timer);
        }
}

int timers_prepare_cpu(unsigned int cpu)
{
        struct timer_base *base;
        int b;

        for (b = 0; b < NR_BASES; b++) {
                base = per_cpu_ptr(&timer_bases[b], cpu);
                base->clk = jiffies;
                base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
                base->next_expiry_recalc = false;
                base->timers_pending = false;
                base->is_idle = false;
        }
        return 0;
}

int timers_dead_cpu(unsigned int cpu)
{
        struct timer_base *old_base;
        struct timer_base *new_base;
        int b, i;

        for (b = 0; b < NR_BASES; b++) {
                old_base = per_cpu_ptr(&timer_bases[b], cpu);
                new_base = get_cpu_ptr(&timer_bases[b]);
                /*
                 * The caller is globally serialized and nobody else
                 * takes two locks at once, deadlock is not possible.
                 */
                raw_spin_lock_irq(&new_base->lock);
                raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);

                /*
                 * The current CPUs base clock might be stale. Update it
                 * before moving the timers over.
                 */
                forward_timer_base(new_base);

                WARN_ON_ONCE(old_base->running_timer);
                old_base->running_timer = NULL;

                for (i = 0; i < WHEEL_SIZE; i++)
                        migrate_timer_list(new_base, old_base->vectors + i);

                raw_spin_unlock(&old_base->lock);
                raw_spin_unlock_irq(&new_base->lock);
                put_cpu_ptr(&timer_bases);
        }
        return 0;
}

#endif /* CONFIG_HOTPLUG_CPU */

static void __init init_timer_cpu(int cpu)
{
        struct timer_base *base;
        int i;

        for (i = 0; i < NR_BASES; i++) {
                base = per_cpu_ptr(&timer_bases[i], cpu);
                base->cpu = cpu;
                raw_spin_lock_init(&base->lock);
                base->clk = jiffies;
                base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
                timer_base_init_expiry_lock(base);
        }
}

static void __init init_timer_cpus(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                init_timer_cpu(cpu);
}

void __init init_timers(void)
{
        init_timer_cpus();
        posix_cputimers_init_work();
        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}

/**
 * msleep - sleep safely even with waitqueue interruptions
 * @msecs: Time in milliseconds to sleep for
 */
void msleep(unsigned int msecs)
{
        unsigned long timeout = msecs_to_jiffies(msecs) + 1;

        while (timeout)
                timeout = schedule_timeout_uninterruptible(timeout);
}

EXPORT_SYMBOL(msleep);

/**
 * msleep_interruptible - sleep waiting for signals
 * @msecs: Time in milliseconds to sleep for
 */
unsigned long msleep_interruptible(unsigned int msecs)
{
        unsigned long timeout = msecs_to_jiffies(msecs) + 1;

        while (timeout && !signal_pending(current))
                timeout = schedule_timeout_interruptible(timeout);
        return jiffies_to_msecs(timeout);
}

EXPORT_SYMBOL(msleep_interruptible);

/**
 * usleep_range_state - Sleep for an approximate time in a given state
 * @min:        Minimum time in usecs to sleep
 * @max:        Maximum time in usecs to sleep
 * @state:        State of the current task that will be while sleeping
 *
 * In non-atomic context where the exact wakeup time is flexible, use
 * usleep_range_state() instead of udelay().  The sleep improves responsiveness
 * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
 * power usage by allowing hrtimers to take advantage of an already-
 * scheduled interrupt instead of scheduling a new one just for this sleep.
 */
void __sched usleep_range_state(unsigned long min, unsigned long max,
                                unsigned int state)
{
        ktime_t exp = ktime_add_us(ktime_get(), min);
        u64 delta = (u64)(max - min) * NSEC_PER_USEC;

        for (;;) {
                __set_current_state(state);
                /* Do not return before the requested sleep time has elapsed */
                if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
                        break;
        }
}
EXPORT_SYMBOL(usleep_range_state);









































































    5 
    5 


    2 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_TTY_FLIP_H
#define _LINUX_TTY_FLIP_H

#include <linux/tty_buffer.h>
#include <linux/tty_port.h>

struct tty_ldisc;

int tty_buffer_set_limit(struct tty_port *port, int limit);
unsigned int tty_buffer_space_avail(struct tty_port *port);
int tty_buffer_request_room(struct tty_port *port, size_t size);
size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars,
                                      const u8 *flags, bool mutable_flags,
                                      size_t size);
size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size);
void tty_flip_buffer_push(struct tty_port *port);

/**
 * tty_insert_flip_string_fixed_flag - add characters to the tty buffer
 * @port: tty port
 * @chars: characters
 * @flag: flag value for each character
 * @size: size
 *
 * Queue a series of bytes to the tty buffering. All the characters passed are
 * marked with the supplied flag.
 *
 * Returns: the number added.
 */
static inline size_t tty_insert_flip_string_fixed_flag(struct tty_port *port,
                                                       const u8 *chars, u8 flag,
                                                       size_t size)
{
        return __tty_insert_flip_string_flags(port, chars, &flag, false, size);
}

/**
 * tty_insert_flip_string_flags - add characters to the tty buffer
 * @port: tty port
 * @chars: characters
 * @flags: flag bytes
 * @size: size
 *
 * Queue a series of bytes to the tty buffering. For each character the flags
 * array indicates the status of the character.
 *
 * Returns: the number added.
 */
static inline size_t tty_insert_flip_string_flags(struct tty_port *port,
                                                  const u8 *chars,
                                                  const u8 *flags, size_t size)
{
        return __tty_insert_flip_string_flags(port, chars, flags, true, size);
}

/**
 * tty_insert_flip_char - add one character to the tty buffer
 * @port: tty port
 * @ch: character
 * @flag: flag byte
 *
 * Queue a single byte @ch to the tty buffering, with an optional flag.
 */
static inline size_t tty_insert_flip_char(struct tty_port *port, u8 ch, u8 flag)
{
        struct tty_buffer *tb = port->buf.tail;
        int change;

        change = !tb->flags && (flag != TTY_NORMAL);
        if (!change && tb->used < tb->size) {
                if (tb->flags)
                        *flag_buf_ptr(tb, tb->used) = flag;
                *char_buf_ptr(tb, tb->used++) = ch;
                return 1;
        }
        return __tty_insert_flip_string_flags(port, &ch, &flag, false, 1);
}

static inline size_t tty_insert_flip_string(struct tty_port *port,
                                            const u8 *chars, size_t size)
{
        return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size);
}

size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f,
                             size_t count);

void tty_buffer_lock_exclusive(struct tty_port *port);
void tty_buffer_unlock_exclusive(struct tty_port *port);

#endif /* _LINUX_TTY_FLIP_H */




















  320 





  320 














    9 



    9 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * printk_safe.c - Safe printk for printk-deadlock-prone contexts
 */

#include <linux/preempt.h>
#include <linux/kdb.h>
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/printk.h>
#include <linux/kprobes.h>

#include "internal.h"

static DEFINE_PER_CPU(int, printk_context);

/* Can be preempted by NMI. */
void __printk_safe_enter(void)
{
        this_cpu_inc(printk_context);
}

/* Can be preempted by NMI. */
void __printk_safe_exit(void)
{
        this_cpu_dec(printk_context);
}

asmlinkage int vprintk(const char *fmt, va_list args)
{
#ifdef CONFIG_KGDB_KDB
        /* Allow to pass printk() to kdb but avoid a recursion. */
        if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
                return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
#endif

        /*
         * Use the main logbuf even in NMI. But avoid calling console
         * drivers that might have their own locks.
         */
        if (this_cpu_read(printk_context) || in_nmi())
                return vprintk_deferred(fmt, args);

        /* No obstacles. */
        return vprintk_default(fmt, args);
}
EXPORT_SYMBOL(vprintk);



























































































































































































































































































































































































































































































































   14 









   14 










   14 

   14 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * xfrm_device.c - IPsec device offloading code.
 *
 * Copyright (c) 2015 secunet Security Networks AG
 *
 * Author:
 * Steffen Klassert <steffen.klassert@secunet.com>
 */

#include <linux/errno.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
#include <net/gso.h>
#include <net/xfrm.h>
#include <linux/notifier.h>

#ifdef CONFIG_XFRM_OFFLOAD
static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb,
                                  unsigned int hsize)
{
        struct xfrm_offload *xo = xfrm_offload(skb);

        skb_reset_mac_len(skb);
        if (xo->flags & XFRM_GSO_SEGMENT)
                skb->transport_header -= x->props.header_len;

        pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len);
}

static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
                                    unsigned int hsize)

{
        struct xfrm_offload *xo = xfrm_offload(skb);

        if (xo->flags & XFRM_GSO_SEGMENT)
                skb->transport_header = skb->network_header + hsize;

        skb_reset_mac_len(skb);
        pskb_pull(skb, skb->mac_len + x->props.header_len);
}

static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
                                  unsigned int hsize)
{
        struct xfrm_offload *xo = xfrm_offload(skb);
        int phlen = 0;

        if (xo->flags & XFRM_GSO_SEGMENT)
                skb->transport_header = skb->network_header + hsize;

        skb_reset_mac_len(skb);
        if (x->sel.family != AF_INET6) {
                phlen = IPV4_BEET_PHMAXLEN;
                if (x->outer_mode.family == AF_INET6)
                        phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr);
        }

        pskb_pull(skb, skb->mac_len + hsize + (x->props.header_len - phlen));
}

/* Adjust pointers into the packet when IPsec is done at layer2 */
static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
{
        switch (x->outer_mode.encap) {
        case XFRM_MODE_TUNNEL:
                if (x->outer_mode.family == AF_INET)
                        return __xfrm_mode_tunnel_prep(x, skb,
                                                       sizeof(struct iphdr));
                if (x->outer_mode.family == AF_INET6)
                        return __xfrm_mode_tunnel_prep(x, skb,
                                                       sizeof(struct ipv6hdr));
                break;
        case XFRM_MODE_TRANSPORT:
                if (x->outer_mode.family == AF_INET)
                        return __xfrm_transport_prep(x, skb,
                                                     sizeof(struct iphdr));
                if (x->outer_mode.family == AF_INET6)
                        return __xfrm_transport_prep(x, skb,
                                                     sizeof(struct ipv6hdr));
                break;
        case XFRM_MODE_BEET:
                if (x->outer_mode.family == AF_INET)
                        return __xfrm_mode_beet_prep(x, skb,
                                                     sizeof(struct iphdr));
                if (x->outer_mode.family == AF_INET6)
                        return __xfrm_mode_beet_prep(x, skb,
                                                     sizeof(struct ipv6hdr));
                break;
        case XFRM_MODE_ROUTEOPTIMIZATION:
        case XFRM_MODE_IN_TRIGGER:
                break;
        }
}

static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb)
{
        struct xfrm_offload *xo = xfrm_offload(skb);
        __u32 seq = xo->seq.low;

        seq += skb_shinfo(skb)->gso_segs;
        if (unlikely(seq < xo->seq.low))
                return true;

        return false;
}

struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
        int err;
        unsigned long flags;
        struct xfrm_state *x;
        struct softnet_data *sd;
        struct sk_buff *skb2, *nskb, *pskb = NULL;
        netdev_features_t esp_features = features;
        struct xfrm_offload *xo = xfrm_offload(skb);
        struct net_device *dev = skb->dev;
        struct sec_path *sp;

        if (!xo || (xo->flags & XFRM_XMIT))
                return skb;

        if (!(features & NETIF_F_HW_ESP))
                esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);

        sp = skb_sec_path(skb);
        x = sp->xvec[sp->len - 1];
        if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
                return skb;

        /* The packet was sent to HW IPsec packet offload engine,
         * but to wrong device. Drop the packet, so it won't skip
         * XFRM stack.
         */
        if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) {
                kfree_skb(skb);
                dev_core_stats_tx_dropped_inc(dev);
                return NULL;
        }

        /* This skb was already validated on the upper/virtual dev */
        if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
                return skb;

        local_irq_save(flags);
        sd = this_cpu_ptr(&softnet_data);
        err = !skb_queue_empty(&sd->xfrm_backlog);
        local_irq_restore(flags);

        if (err) {
                *again = true;
                return skb;
        }

        if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) ||
                                unlikely(xmit_xfrm_check_overflow(skb)))) {
                struct sk_buff *segs;

                /* Packet got rerouted, fixup features and segment it. */
                esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP);

                segs = skb_gso_segment(skb, esp_features);
                if (IS_ERR(segs)) {
                        kfree_skb(skb);
                        dev_core_stats_tx_dropped_inc(dev);
                        return NULL;
                } else {
                        consume_skb(skb);
                        skb = segs;
                }
        }

        if (!skb->next) {
                esp_features |= skb->dev->gso_partial_features;
                xfrm_outer_mode_prep(x, skb);

                xo->flags |= XFRM_DEV_RESUME;

                err = x->type_offload->xmit(x, skb, esp_features);
                if (err) {
                        if (err == -EINPROGRESS)
                                return NULL;

                        XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
                        kfree_skb(skb);
                        return NULL;
                }

                skb_push(skb, skb->data - skb_mac_header(skb));

                return skb;
        }

        skb_list_walk_safe(skb, skb2, nskb) {
                esp_features |= skb->dev->gso_partial_features;
                skb_mark_not_on_list(skb2);

                xo = xfrm_offload(skb2);
                xo->flags |= XFRM_DEV_RESUME;

                xfrm_outer_mode_prep(x, skb2);

                err = x->type_offload->xmit(x, skb2, esp_features);
                if (!err) {
                        skb2->next = nskb;
                } else if (err != -EINPROGRESS) {
                        XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
                        skb2->next = nskb;
                        kfree_skb_list(skb2);
                        return NULL;
                } else {
                        if (skb == skb2)
                                skb = nskb;
                        else
                                pskb->next = nskb;

                        continue;
                }

                skb_push(skb2, skb2->data - skb_mac_header(skb2));
                pskb = skb2;
        }

        return skb;
}
EXPORT_SYMBOL_GPL(validate_xmit_xfrm);

int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
                       struct xfrm_user_offload *xuo,
                       struct netlink_ext_ack *extack)
{
        int err;
        struct dst_entry *dst;
        struct net_device *dev;
        struct xfrm_dev_offload *xso = &x->xso;
        xfrm_address_t *saddr;
        xfrm_address_t *daddr;
        bool is_packet_offload;

        if (!x->type_offload) {
                NL_SET_ERR_MSG(extack, "Type doesn't support offload");
                return -EINVAL;
        }

        if (xuo->flags &
            ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
                NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
                return -EINVAL;
        }

        is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;

        /* We don't yet support UDP encapsulation and TFC padding. */
        if ((!is_packet_offload && x->encap) || x->tfcpad) {
                NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
                return -EINVAL;
        }

        dev = dev_get_by_index(net, xuo->ifindex);
        if (!dev) {
                if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
                        saddr = &x->props.saddr;
                        daddr = &x->id.daddr;
                } else {
                        saddr = &x->id.daddr;
                        daddr = &x->props.saddr;
                }

                dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
                                        x->props.family,
                                        xfrm_smark_get(0, x));
                if (IS_ERR(dst))
                        return (is_packet_offload) ? -EINVAL : 0;

                dev = dst->dev;

                dev_hold(dev);
                dst_release(dst);
        }

        if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
                xso->dev = NULL;
                dev_put(dev);
                return (is_packet_offload) ? -EINVAL : 0;
        }

        if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN &&
            !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
                NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN");
                xso->dev = NULL;
                dev_put(dev);
                return -EINVAL;
        }

        xso->dev = dev;
        netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC);
        xso->real_dev = dev;

        if (xuo->flags & XFRM_OFFLOAD_INBOUND)
                xso->dir = XFRM_DEV_OFFLOAD_IN;
        else
                xso->dir = XFRM_DEV_OFFLOAD_OUT;

        if (is_packet_offload)
                xso->type = XFRM_DEV_OFFLOAD_PACKET;
        else
                xso->type = XFRM_DEV_OFFLOAD_CRYPTO;

        err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack);
        if (err) {
                xso->dev = NULL;
                xso->dir = 0;
                xso->real_dev = NULL;
                netdev_put(dev, &xso->dev_tracker);
                xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;

                /* User explicitly requested packet offload mode and configured
                 * policy in addition to the XFRM state. So be civil to users,
                 * and return an error instead of taking fallback path.
                 *
                 * This WARN_ON() can be seen as a documentation for driver
                 * authors to do not return -EOPNOTSUPP in packet offload mode.
                 */
                WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
                if (err != -EOPNOTSUPP || is_packet_offload) {
                        NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
                        return err;
                }
        }

        return 0;
}
EXPORT_SYMBOL_GPL(xfrm_dev_state_add);

int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
                        struct xfrm_user_offload *xuo, u8 dir,
                        struct netlink_ext_ack *extack)
{
        struct xfrm_dev_offload *xdo = &xp->xdo;
        struct net_device *dev;
        int err;

        if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) {
                /* We support only packet offload mode and it means
                 * that user must set XFRM_OFFLOAD_PACKET bit.
                 */
                NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
                return -EINVAL;
        }

        dev = dev_get_by_index(net, xuo->ifindex);
        if (!dev)
                return -EINVAL;

        if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) {
                xdo->dev = NULL;
                dev_put(dev);
                NL_SET_ERR_MSG(extack, "Policy offload is not supported");
                return -EINVAL;
        }

        xdo->dev = dev;
        netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC);
        xdo->real_dev = dev;
        xdo->type = XFRM_DEV_OFFLOAD_PACKET;
        switch (dir) {
        case XFRM_POLICY_IN:
                xdo->dir = XFRM_DEV_OFFLOAD_IN;
                break;
        case XFRM_POLICY_OUT:
                xdo->dir = XFRM_DEV_OFFLOAD_OUT;
                break;
        case XFRM_POLICY_FWD:
                xdo->dir = XFRM_DEV_OFFLOAD_FWD;
                break;
        default:
                xdo->dev = NULL;
                netdev_put(dev, &xdo->dev_tracker);
                NL_SET_ERR_MSG(extack, "Unrecognized offload direction");
                return -EINVAL;
        }

        err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack);
        if (err) {
                xdo->dev = NULL;
                xdo->real_dev = NULL;
                xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
                xdo->dir = 0;
                netdev_put(dev, &xdo->dev_tracker);
                NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy");
                return err;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(xfrm_dev_policy_add);

bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
        int mtu;
        struct dst_entry *dst = skb_dst(skb);
        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
        struct net_device *dev = x->xso.dev;

        if (!x->type_offload ||
            (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap))
                return false;

        if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
            ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
             !xdst->child->xfrm)) {
                mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
                if (skb->len <= mtu)
                        goto ok;

                if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
                        goto ok;
        }

        return false;

ok:
        if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok)
                return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x);

        return true;
}
EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);

void xfrm_dev_resume(struct sk_buff *skb)
{
        struct net_device *dev = skb->dev;
        int ret = NETDEV_TX_BUSY;
        struct netdev_queue *txq;
        struct softnet_data *sd;
        unsigned long flags;

        rcu_read_lock();
        txq = netdev_core_pick_tx(dev, skb, NULL);

        HARD_TX_LOCK(dev, txq, smp_processor_id());
        if (!netif_xmit_frozen_or_stopped(txq))
                skb = dev_hard_start_xmit(skb, dev, txq, &ret);
        HARD_TX_UNLOCK(dev, txq);

        if (!dev_xmit_complete(ret)) {
                local_irq_save(flags);
                sd = this_cpu_ptr(&softnet_data);
                skb_queue_tail(&sd->xfrm_backlog, skb);
                raise_softirq_irqoff(NET_TX_SOFTIRQ);
                local_irq_restore(flags);
        }
        rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(xfrm_dev_resume);

void xfrm_dev_backlog(struct softnet_data *sd)
{
        struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog;
        struct sk_buff_head list;
        struct sk_buff *skb;

        if (skb_queue_empty(xfrm_backlog))
                return;

        __skb_queue_head_init(&list);

        spin_lock(&xfrm_backlog->lock);
        skb_queue_splice_init(xfrm_backlog, &list);
        spin_unlock(&xfrm_backlog->lock);

        while (!skb_queue_empty(&list)) {
                skb = __skb_dequeue(&list);
                xfrm_dev_resume(skb);
        }

}
#endif

static int xfrm_api_check(struct net_device *dev)
{
#ifdef CONFIG_XFRM_OFFLOAD
        if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
            !(dev->features & NETIF_F_HW_ESP))
                return NOTIFY_BAD;

        if ((dev->features & NETIF_F_HW_ESP) &&
            (!(dev->xfrmdev_ops &&
               dev->xfrmdev_ops->xdo_dev_state_add &&
               dev->xfrmdev_ops->xdo_dev_state_delete)))
                return NOTIFY_BAD;
#else
        if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM))
                return NOTIFY_BAD;
#endif

        return NOTIFY_DONE;
}

static int xfrm_dev_down(struct net_device *dev)
{
        if (dev->features & NETIF_F_HW_ESP) {
                xfrm_dev_state_flush(dev_net(dev), dev, true);
                xfrm_dev_policy_flush(dev_net(dev), dev, true);
        }

        return NOTIFY_DONE;
}

static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);

        switch (event) {
        case NETDEV_REGISTER:
                return xfrm_api_check(dev);

        case NETDEV_FEAT_CHANGE:
                return xfrm_api_check(dev);

        case NETDEV_DOWN:
        case NETDEV_UNREGISTER:
                return xfrm_dev_down(dev);
        }
        return NOTIFY_DONE;
}

static struct notifier_block xfrm_dev_notifier = {
        .notifier_call        = xfrm_dev_event,
};

void __init xfrm_dev_init(void)
{
        register_netdevice_notifier(&xfrm_dev_notifier);
}






















  268 















  268 

  268 














  192 

  192 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  Generic Timer-queue
 *
 *  Manages a simple queue of timers, ordered by expiration time.
 *  Uses rbtrees for quick list adds and expiration.
 *
 *  NOTE: All of the following functions need to be serialized
 *  to avoid races. No locking is done by this library code.
 */

#include <linux/bug.h>
#include <linux/timerqueue.h>
#include <linux/rbtree.h>
#include <linux/export.h>

#define __node_2_tq(_n) \
        rb_entry((_n), struct timerqueue_node, node)

static inline bool __timerqueue_less(struct rb_node *a, const struct rb_node *b)
{
        return __node_2_tq(a)->expires < __node_2_tq(b)->expires;
}

/**
 * timerqueue_add - Adds timer to timerqueue.
 *
 * @head: head of timerqueue
 * @node: timer node to be added
 *
 * Adds the timer node to the timerqueue, sorted by the node's expires
 * value. Returns true if the newly added timer is the first expiring timer in
 * the queue.
 */
bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
{
        /* Make sure we don't add nodes that are already added */
        WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));

        return rb_add_cached(&node->node, &head->rb_root, __timerqueue_less);
}
EXPORT_SYMBOL_GPL(timerqueue_add);

/**
 * timerqueue_del - Removes a timer from the timerqueue.
 *
 * @head: head of timerqueue
 * @node: timer node to be removed
 *
 * Removes the timer node from the timerqueue. Returns true if the queue is
 * not empty after the remove.
 */
bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
{
        WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));

        rb_erase_cached(&node->node, &head->rb_root);
        RB_CLEAR_NODE(&node->node);

        return !RB_EMPTY_ROOT(&head->rb_root.rb_root);
}
EXPORT_SYMBOL_GPL(timerqueue_del);

/**
 * timerqueue_iterate_next - Returns the timer after the provided timer
 *
 * @node: Pointer to a timer.
 *
 * Provides the timer that is after the given node. This is used, when
 * necessary, to iterate through the list of timers in a timer list
 * without modifying the list.
 */
struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
{
        struct rb_node *next;

        if (!node)
                return NULL;
        next = rb_next(&node->node);
        if (!next)
                return NULL;
        return container_of(next, struct timerqueue_node, node);
}
EXPORT_SYMBOL_GPL(timerqueue_iterate_next);




















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/* SPDX-License-Identifier: GPL-2.0 */
/*
 *        Routines to manage notifier chains for passing status changes to any
 *        interested routines. We need this instead of hard coded call lists so
 *        that modules can poke their nose into the innards. The network devices
 *        needed them so here they are for the rest of you.
 *
 *                                Alan Cox <Alan.Cox@linux.org>
 */
 
#ifndef _LINUX_NOTIFIER_H
#define _LINUX_NOTIFIER_H
#include <linux/errno.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/srcu.h>

/*
 * Notifier chains are of four types:
 *
 *        Atomic notifier chains: Chain callbacks run in interrupt/atomic
 *                context. Callouts are not allowed to block.
 *        Blocking notifier chains: Chain callbacks run in process context.
 *                Callouts are allowed to block.
 *        Raw notifier chains: There are no restrictions on callbacks,
 *                registration, or unregistration.  All locking and protection
 *                must be provided by the caller.
 *        SRCU notifier chains: A variant of blocking notifier chains, with
 *                the same restrictions.
 *
 * atomic_notifier_chain_register() may be called from an atomic context,
 * but blocking_notifier_chain_register() and srcu_notifier_chain_register()
 * must be called from a process context.  Ditto for the corresponding
 * _unregister() routines.
 *
 * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(),
 * and srcu_notifier_chain_unregister() _must not_ be called from within
 * the call chain.
 *
 * SRCU notifier chains are an alternative form of blocking notifier chains.
 * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for
 * protection of the chain links.  This means there is _very_ low overhead
 * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
 * As compensation, srcu_notifier_chain_unregister() is rather expensive.
 * SRCU notifier chains should be used when the chain will be called very
 * often but notifier_blocks will seldom be removed.
 */

struct notifier_block;

typedef        int (*notifier_fn_t)(struct notifier_block *nb,
                        unsigned long action, void *data);

struct notifier_block {
        notifier_fn_t notifier_call;
        struct notifier_block __rcu *next;
        int priority;
};

struct atomic_notifier_head {
        spinlock_t lock;
        struct notifier_block __rcu *head;
};

struct blocking_notifier_head {
        struct rw_semaphore rwsem;
        struct notifier_block __rcu *head;
};

struct raw_notifier_head {
        struct notifier_block __rcu *head;
};

struct srcu_notifier_head {
        struct mutex mutex;
        struct srcu_usage srcuu;
        struct srcu_struct srcu;
        struct notifier_block __rcu *head;
};

#define ATOMIC_INIT_NOTIFIER_HEAD(name) do {        \
                spin_lock_init(&(name)->lock);        \
                (name)->head = NULL;                \
        } while (0)
#define BLOCKING_INIT_NOTIFIER_HEAD(name) do {        \
                init_rwsem(&(name)->rwsem);        \
                (name)->head = NULL;                \
        } while (0)
#define RAW_INIT_NOTIFIER_HEAD(name) do {        \
                (name)->head = NULL;                \
        } while (0)

/* srcu_notifier_heads must be cleaned up dynamically */
extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define srcu_cleanup_notifier_head(name)        \
                cleanup_srcu_struct(&(name)->srcu);

#define ATOMIC_NOTIFIER_INIT(name) {                                \
                .lock = __SPIN_LOCK_UNLOCKED(name.lock),        \
                .head = NULL }
#define BLOCKING_NOTIFIER_INIT(name) {                                \
                .rwsem = __RWSEM_INITIALIZER((name).rwsem),        \
                .head = NULL }
#define RAW_NOTIFIER_INIT(name)        {                                \
                .head = NULL }

#define SRCU_NOTIFIER_INIT(name, pcpu)                                \
        {                                                        \
                .mutex = __MUTEX_INITIALIZER(name.mutex),        \
                .head = NULL,                                        \
                .srcuu = __SRCU_USAGE_INIT(name.srcuu),                \
                .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
        }

#define ATOMIC_NOTIFIER_HEAD(name)                                \
        struct atomic_notifier_head name =                        \
                ATOMIC_NOTIFIER_INIT(name)
#define BLOCKING_NOTIFIER_HEAD(name)                                \
        struct blocking_notifier_head name =                        \
                BLOCKING_NOTIFIER_INIT(name)
#define RAW_NOTIFIER_HEAD(name)                                        \
        struct raw_notifier_head name =                                \
                RAW_NOTIFIER_INIT(name)

#ifdef CONFIG_TREE_SRCU
#define _SRCU_NOTIFIER_HEAD(name, mod)                                \
        static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
        mod struct srcu_notifier_head name =                        \
                        SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)

#else
#define _SRCU_NOTIFIER_HEAD(name, mod)                                \
        mod struct srcu_notifier_head name =                        \
                        SRCU_NOTIFIER_INIT(name, name)

#endif

#define SRCU_NOTIFIER_HEAD(name)                                \
        _SRCU_NOTIFIER_HEAD(name, /* not static */)

#define SRCU_NOTIFIER_HEAD_STATIC(name)                                \
        _SRCU_NOTIFIER_HEAD(name, static)

#ifdef __KERNEL__

extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
                struct notifier_block *nb);
extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
                struct notifier_block *nb);
extern int raw_notifier_chain_register(struct raw_notifier_head *nh,
                struct notifier_block *nb);
extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
                struct notifier_block *nb);

extern int atomic_notifier_chain_register_unique_prio(
                struct atomic_notifier_head *nh, struct notifier_block *nb);
extern int blocking_notifier_chain_register_unique_prio(
                struct blocking_notifier_head *nh, struct notifier_block *nb);

extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
                struct notifier_block *nb);
extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
                struct notifier_block *nb);
extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
                struct notifier_block *nb);
extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
                struct notifier_block *nb);

extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
                unsigned long val, void *v);
extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
                unsigned long val, void *v);
extern int raw_notifier_call_chain(struct raw_notifier_head *nh,
                unsigned long val, void *v);
extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
                unsigned long val, void *v);

extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
                unsigned long val_up, unsigned long val_down, void *v);
extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
                unsigned long val_up, unsigned long val_down, void *v);

extern bool atomic_notifier_call_chain_is_empty(struct atomic_notifier_head *nh);

#define NOTIFY_DONE                0x0000                /* Don't care */
#define NOTIFY_OK                0x0001                /* Suits me */
#define NOTIFY_STOP_MASK        0x8000                /* Don't call further */
#define NOTIFY_BAD                (NOTIFY_STOP_MASK|0x0002)
                                                /* Bad/Veto action */
/*
 * Clean way to return from the notifier and stop further calls.
 */
#define NOTIFY_STOP                (NOTIFY_OK|NOTIFY_STOP_MASK)

/* Encapsulate (negative) errno value (in particular, NOTIFY_BAD <=> EPERM). */
static inline int notifier_from_errno(int err)
{
        if (err)
                return NOTIFY_STOP_MASK | (NOTIFY_OK - err);

        return NOTIFY_OK;
}

/* Restore (negative) errno value from notify return value. */
static inline int notifier_to_errno(int ret)
{
        ret &= ~NOTIFY_STOP_MASK;
        return ret > NOTIFY_OK ? NOTIFY_OK - ret : 0;
}

/*
 *        Declared notifiers so far. I can imagine quite a few more chains
 *        over time (eg laptop power reset chains, reboot chain (to clean 
 *        device units up), device [un]mount chain, module load/unload chain,
 *        low memory chain, screenblank chain (for plug in modular screenblankers) 
 *        VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
 */
 
/* CPU notfiers are defined in include/linux/cpu.h. */

/* netdevice notifiers are defined in include/linux/netdevice.h */

/* reboot notifiers are defined in include/linux/reboot.h. */

/* Hibernation and suspend events are defined in include/linux/suspend.h. */

/* Virtual Terminal events are defined in include/linux/vt.h. */

#define NETLINK_URELEASE        0x0001        /* Unicast netlink socket released */

/* Console keyboard events.
 * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and
 * KBD_KEYSYM. */
#define KBD_KEYCODE                0x0001 /* Keyboard keycode, called before any other */
#define KBD_UNBOUND_KEYCODE        0x0002 /* Keyboard keycode which is not bound to any other */
#define KBD_UNICODE                0x0003 /* Keyboard unicode */
#define KBD_KEYSYM                0x0004 /* Keyboard keysym */
#define KBD_POST_KEYSYM                0x0005 /* Called after keyboard keysym interpretation */

extern struct blocking_notifier_head reboot_notifier_list;

#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 






    1 







    1 
    1 




    1 










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright 2002-2005, Instant802 Networks, Inc.
 * Copyright 2005-2006, Devicescape Software, Inc.
 * Copyright 2006-2007        Jiri Benc <jbenc@suse.cz>
 * Copyright 2013-2014  Intel Mobile Communications GmbH
 * Copyright (C) 2017     Intel Deutschland GmbH
 * Copyright (C) 2018-2023 Intel Corporation
 */

#include <net/mac80211.h>
#include <linux/module.h>
#include <linux/fips.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/bitmap.h>
#include <linux/inetdevice.h>
#include <net/net_namespace.h>
#include <net/dropreason.h>
#include <net/cfg80211.h>
#include <net/addrconf.h>

#include "ieee80211_i.h"
#include "driver-ops.h"
#include "rate.h"
#include "mesh.h"
#include "wep.h"
#include "led.h"
#include "debugfs.h"

void ieee80211_configure_filter(struct ieee80211_local *local)
{
        u64 mc;
        unsigned int changed_flags;
        unsigned int new_flags = 0;

        if (atomic_read(&local->iff_allmultis))
                new_flags |= FIF_ALLMULTI;

        if (local->monitors || test_bit(SCAN_SW_SCANNING, &local->scanning) ||
            test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning))
                new_flags |= FIF_BCN_PRBRESP_PROMISC;

        if (local->fif_probe_req || local->probe_req_reg)
                new_flags |= FIF_PROBE_REQ;

        if (local->fif_fcsfail)
                new_flags |= FIF_FCSFAIL;

        if (local->fif_plcpfail)
                new_flags |= FIF_PLCPFAIL;

        if (local->fif_control)
                new_flags |= FIF_CONTROL;

        if (local->fif_other_bss)
                new_flags |= FIF_OTHER_BSS;

        if (local->fif_pspoll)
                new_flags |= FIF_PSPOLL;

        if (local->rx_mcast_action_reg)
                new_flags |= FIF_MCAST_ACTION;

        spin_lock_bh(&local->filter_lock);
        changed_flags = local->filter_flags ^ new_flags;

        mc = drv_prepare_multicast(local, &local->mc_list);
        spin_unlock_bh(&local->filter_lock);

        /* be a bit nasty */
        new_flags |= (1<<31);

        drv_configure_filter(local, changed_flags, &new_flags, mc);

        WARN_ON(new_flags & (1<<31));

        local->filter_flags = new_flags & ~(1<<31);
}

static void ieee80211_reconfig_filter(struct wiphy *wiphy,
                                      struct wiphy_work *work)
{
        struct ieee80211_local *local =
                container_of(work, struct ieee80211_local, reconfig_filter);

        ieee80211_configure_filter(local);
}

static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
                                       struct ieee80211_chanctx_conf *ctx)
{
        struct ieee80211_sub_if_data *sdata;
        struct cfg80211_chan_def chandef = {};
        struct cfg80211_chan_def *oper = NULL;
        enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_STATIC;
        u32 changed = 0;
        int power;
        u32 offchannel_flag;

        if (!local->emulate_chanctx)
                return 0;

        offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;

        if (ctx && !WARN_ON(!ctx->def.chan)) {
                oper = &ctx->def;
                if (ctx->rx_chains_static > 1)
                        smps_mode = IEEE80211_SMPS_OFF;
                else if (ctx->rx_chains_dynamic > 1)
                        smps_mode = IEEE80211_SMPS_DYNAMIC;
                else
                        smps_mode = IEEE80211_SMPS_STATIC;
        }

        if (local->scan_chandef.chan) {
                chandef = local->scan_chandef;
        } else if (local->tmp_channel) {
                chandef.chan = local->tmp_channel;
                chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
                chandef.center_freq1 = chandef.chan->center_freq;
                chandef.freq1_offset = chandef.chan->freq_offset;
        } else if (oper) {
                chandef = *oper;
        } else {
                chandef = local->dflt_chandef;
        }

        if (WARN(!cfg80211_chandef_valid(&chandef),
                 "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
                 chandef.chan ? chandef.chan->center_freq : -1,
                 chandef.chan ? chandef.chan->freq_offset : 0,
                 chandef.width, chandef.center_freq1, chandef.freq1_offset,
                 chandef.center_freq2))
                return 0;

        if (!oper || !cfg80211_chandef_identical(&chandef, oper))
                local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
        else
                local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;

        offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;

        /* force it also for scanning, since drivers might config differently */
        if (offchannel_flag || local->scanning ||
            !cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) {
                local->hw.conf.chandef = chandef;
                changed |= IEEE80211_CONF_CHANGE_CHANNEL;
        }

        if (!conf_is_ht(&local->hw.conf)) {
                /*
                 * mac80211.h documents that this is only valid
                 * when the channel is set to an HT type, and
                 * that otherwise STATIC is used.
                 */
                local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC;
        } else if (local->hw.conf.smps_mode != smps_mode) {
                local->hw.conf.smps_mode = smps_mode;
                changed |= IEEE80211_CONF_CHANGE_SMPS;
        }

        power = ieee80211_chandef_max_power(&chandef);

        rcu_read_lock();
        list_for_each_entry_rcu(sdata, &local->interfaces, list) {
                if (!rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf))
                        continue;
                if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                        continue;
                if (sdata->vif.bss_conf.txpower == INT_MIN)
                        continue;
                power = min(power, sdata->vif.bss_conf.txpower);
        }
        rcu_read_unlock();

        if (local->hw.conf.power_level != power) {
                changed |= IEEE80211_CONF_CHANGE_POWER;
                local->hw.conf.power_level = power;
        }

        return changed;
}

int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
{
        int ret = 0;

        might_sleep();

        WARN_ON(changed & (IEEE80211_CONF_CHANGE_CHANNEL |
                           IEEE80211_CONF_CHANGE_POWER |
                           IEEE80211_CONF_CHANGE_SMPS));

        if (changed && local->open_count) {
                ret = drv_config(local, changed);
                /*
                 * Goal:
                 * HW reconfiguration should never fail, the driver has told
                 * us what it can support so it should live up to that promise.
                 *
                 * Current status:
                 * rfkill is not integrated with mac80211 and a
                 * configuration command can thus fail if hardware rfkill
                 * is enabled
                 *
                 * FIXME: integrate rfkill with mac80211 and then add this
                 * WARN_ON() back
                 *
                 */
                /* WARN_ON(ret); */
        }

        return ret;
}

/* for scanning, offchannel and chanctx emulation only */
static int _ieee80211_hw_conf_chan(struct ieee80211_local *local,
                                   struct ieee80211_chanctx_conf *ctx)
{
        u32 changed;

        if (!local->open_count)
                return 0;

        changed = ieee80211_calc_hw_conf_chan(local, ctx);
        if (!changed)
                return 0;

        return drv_config(local, changed);
}

int ieee80211_hw_conf_chan(struct ieee80211_local *local)
{
        struct ieee80211_chanctx *ctx;

        ctx = list_first_entry_or_null(&local->chanctx_list,
                                       struct ieee80211_chanctx,
                                       list);

        return _ieee80211_hw_conf_chan(local, ctx ? &ctx->conf : NULL);
}

void ieee80211_hw_conf_init(struct ieee80211_local *local)
{
        u32 changed = ~(IEEE80211_CONF_CHANGE_CHANNEL |
                        IEEE80211_CONF_CHANGE_POWER |
                        IEEE80211_CONF_CHANGE_SMPS);

        if (WARN_ON(!local->open_count))
                return;

        if (local->emulate_chanctx) {
                struct ieee80211_chanctx *ctx;

                ctx = list_first_entry_or_null(&local->chanctx_list,
                                               struct ieee80211_chanctx,
                                               list);

                changed |= ieee80211_calc_hw_conf_chan(local,
                                                       ctx ? &ctx->conf : NULL);
        }

        WARN_ON(drv_config(local, changed));
}

int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
                                  struct ieee80211_chanctx_conf *ctx)
{
        struct ieee80211_local *local = hw_to_local(hw);

        local->hw.conf.radar_enabled = ctx->radar_enabled;

        return _ieee80211_hw_conf_chan(local, ctx);
}
EXPORT_SYMBOL(ieee80211_emulate_add_chanctx);

void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
                                      struct ieee80211_chanctx_conf *ctx)
{
        struct ieee80211_local *local = hw_to_local(hw);

        local->hw.conf.radar_enabled = false;

        _ieee80211_hw_conf_chan(local, NULL);
}
EXPORT_SYMBOL(ieee80211_emulate_remove_chanctx);

void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
                                      struct ieee80211_chanctx_conf *ctx,
                                      u32 changed)
{
        struct ieee80211_local *local = hw_to_local(hw);

        local->hw.conf.radar_enabled = ctx->radar_enabled;

        _ieee80211_hw_conf_chan(local, ctx);
}
EXPORT_SYMBOL(ieee80211_emulate_change_chanctx);

int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
                                         struct ieee80211_vif_chanctx_switch *vifs,
                                         int n_vifs,
                                         enum ieee80211_chanctx_switch_mode mode)
{
        struct ieee80211_local *local = hw_to_local(hw);

        if (n_vifs <= 0)
                return -EINVAL;

        local->hw.conf.radar_enabled = vifs[0].new_ctx->radar_enabled;
        _ieee80211_hw_conf_chan(local, vifs[0].new_ctx);

        return 0;
}
EXPORT_SYMBOL(ieee80211_emulate_switch_vif_chanctx);

#define BSS_CHANGED_VIF_CFG_FLAGS (BSS_CHANGED_ASSOC |\
                                   BSS_CHANGED_IDLE |\
                                   BSS_CHANGED_PS |\
                                   BSS_CHANGED_IBSS |\
                                   BSS_CHANGED_ARP_FILTER |\
                                   BSS_CHANGED_SSID |\
                                   BSS_CHANGED_MLD_VALID_LINKS |\
                                   BSS_CHANGED_MLD_TTLM)

void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
                                      u64 changed)
{
        struct ieee80211_local *local = sdata->local;

        might_sleep();

        if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                return;

        if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
                                    BSS_CHANGED_BEACON_ENABLED) &&
                         sdata->vif.type != NL80211_IFTYPE_AP &&
                         sdata->vif.type != NL80211_IFTYPE_ADHOC &&
                         sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
                         sdata->vif.type != NL80211_IFTYPE_OCB))
                return;

        if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
                         sdata->vif.type == NL80211_IFTYPE_NAN ||
                         (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
                          !sdata->vif.bss_conf.mu_mimo_owner &&
                          !(changed & BSS_CHANGED_TXPOWER))))
                return;

        if (!check_sdata_in_driver(sdata))
                return;

        if (changed & BSS_CHANGED_VIF_CFG_FLAGS) {
                u64 ch = changed & BSS_CHANGED_VIF_CFG_FLAGS;

                trace_drv_vif_cfg_changed(local, sdata, changed);
                if (local->ops->vif_cfg_changed)
                        local->ops->vif_cfg_changed(&local->hw, &sdata->vif, ch);
        }

        if (changed & ~BSS_CHANGED_VIF_CFG_FLAGS) {
                u64 ch = changed & ~BSS_CHANGED_VIF_CFG_FLAGS;

                /* FIXME: should be for each link */
                trace_drv_link_info_changed(local, sdata, &sdata->vif.bss_conf,
                                            changed);
                if (local->ops->link_info_changed)
                        local->ops->link_info_changed(&local->hw, &sdata->vif,
                                                      &sdata->vif.bss_conf, ch);
        }

        if (local->ops->bss_info_changed)
                local->ops->bss_info_changed(&local->hw, &sdata->vif,
                                             &sdata->vif.bss_conf, changed);
        trace_drv_return_void(local);
}

void ieee80211_vif_cfg_change_notify(struct ieee80211_sub_if_data *sdata,
                                     u64 changed)
{
        struct ieee80211_local *local = sdata->local;

        WARN_ON_ONCE(changed & ~BSS_CHANGED_VIF_CFG_FLAGS);

        if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                return;

        drv_vif_cfg_changed(local, sdata, changed);
}

void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
                                       struct ieee80211_link_data *link,
                                       u64 changed)
{
        struct ieee80211_local *local = sdata->local;

        WARN_ON_ONCE(changed & BSS_CHANGED_VIF_CFG_FLAGS);

        if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                return;

        if (!check_sdata_in_driver(sdata))
                return;

        drv_link_info_changed(local, sdata, link->conf, link->link_id, changed);
}

u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
{
        sdata->vif.bss_conf.use_cts_prot = false;
        sdata->vif.bss_conf.use_short_preamble = false;
        sdata->vif.bss_conf.use_short_slot = false;
        return BSS_CHANGED_ERP_CTS_PROT |
               BSS_CHANGED_ERP_PREAMBLE |
               BSS_CHANGED_ERP_SLOT;
}

static void ieee80211_tasklet_handler(struct tasklet_struct *t)
{
        struct ieee80211_local *local = from_tasklet(local, t, tasklet);
        struct sk_buff *skb;

        while ((skb = skb_dequeue(&local->skb_queue)) ||
               (skb = skb_dequeue(&local->skb_queue_unreliable))) {
                switch (skb->pkt_type) {
                case IEEE80211_RX_MSG:
                        /* Clear skb->pkt_type in order to not confuse kernel
                         * netstack. */
                        skb->pkt_type = 0;
                        ieee80211_rx(&local->hw, skb);
                        break;
                case IEEE80211_TX_STATUS_MSG:
                        skb->pkt_type = 0;
                        ieee80211_tx_status_skb(&local->hw, skb);
                        break;
                default:
                        WARN(1, "mac80211: Packet is of unknown type %d\n",
                             skb->pkt_type);
                        dev_kfree_skb(skb);
                        break;
                }
        }
}

static void ieee80211_restart_work(struct work_struct *work)
{
        struct ieee80211_local *local =
                container_of(work, struct ieee80211_local, restart_work);
        struct ieee80211_sub_if_data *sdata;
        int ret;

        flush_workqueue(local->workqueue);

        rtnl_lock();
        /* we might do interface manipulations, so need both */
        wiphy_lock(local->hw.wiphy);
        wiphy_work_flush(local->hw.wiphy, NULL);

        WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
             "%s called with hardware scan in progress\n", __func__);

        list_for_each_entry(sdata, &local->interfaces, list) {
                /*
                 * XXX: there may be more work for other vif types and even
                 * for station mode: a good thing would be to run most of
                 * the iface type's dependent _stop (ieee80211_mg_stop,
                 * ieee80211_ibss_stop) etc...
                 * For now, fix only the specific bug that was seen: race
                 * between csa_connection_drop_work and us.
                 */
                if (sdata->vif.type == NL80211_IFTYPE_STATION) {
                        /*
                         * This worker is scheduled from the iface worker that
                         * runs on mac80211's workqueue, so we can't be
                         * scheduling this worker after the cancel right here.
                         * The exception is ieee80211_chswitch_done.
                         * Then we can have a race...
                         */
                        wiphy_work_cancel(local->hw.wiphy,
                                          &sdata->u.mgd.csa_connection_drop_work);
                        if (sdata->vif.bss_conf.csa_active)
                                ieee80211_sta_connection_lost(sdata,
                                                              WLAN_REASON_UNSPECIFIED,
                                                              false);
                }
                wiphy_delayed_work_flush(local->hw.wiphy,
                                         &sdata->dec_tailroom_needed_wk);
        }
        ieee80211_scan_cancel(local);

        /* make sure any new ROC will consider local->in_reconfig */
        wiphy_delayed_work_flush(local->hw.wiphy, &local->roc_work);
        wiphy_work_flush(local->hw.wiphy, &local->hw_roc_done);

        /* wait for all packet processing to be done */
        synchronize_net();

        ret = ieee80211_reconfig(local);
        wiphy_unlock(local->hw.wiphy);

        if (ret)
                cfg80211_shutdown_all_interfaces(local->hw.wiphy);

        rtnl_unlock();
}

void ieee80211_restart_hw(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        trace_api_restart_hw(local);

        wiphy_info(hw->wiphy,
                   "Hardware restart was requested\n");

        /* use this reason, ieee80211_reconfig will unblock it */
        ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
                                        IEEE80211_QUEUE_STOP_REASON_SUSPEND,
                                        false);

        /*
         * Stop all Rx during the reconfig. We don't want state changes
         * or driver callbacks while this is in progress.
         */
        local->in_reconfig = true;
        barrier();

        queue_work(system_freezable_wq, &local->restart_work);
}
EXPORT_SYMBOL(ieee80211_restart_hw);

#ifdef CONFIG_INET
static int ieee80211_ifa_changed(struct notifier_block *nb,
                                 unsigned long data, void *arg)
{
        struct in_ifaddr *ifa = arg;
        struct ieee80211_local *local =
                container_of(nb, struct ieee80211_local,
                             ifa_notifier);
        struct net_device *ndev = ifa->ifa_dev->dev;
        struct wireless_dev *wdev = ndev->ieee80211_ptr;
        struct in_device *idev;
        struct ieee80211_sub_if_data *sdata;
        struct ieee80211_vif_cfg *vif_cfg;
        struct ieee80211_if_managed *ifmgd;
        int c = 0;

        /* Make sure it's our interface that got changed */
        if (!wdev)
                return NOTIFY_DONE;

        if (wdev->wiphy != local->hw.wiphy || !wdev->registered)
                return NOTIFY_DONE;

        sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
        vif_cfg = &sdata->vif.cfg;

        /* ARP filtering is only supported in managed mode */
        if (sdata->vif.type != NL80211_IFTYPE_STATION)
                return NOTIFY_DONE;

        idev = __in_dev_get_rtnl(sdata->dev);
        if (!idev)
                return NOTIFY_DONE;

        ifmgd = &sdata->u.mgd;

        /*
         * The nested here is needed to convince lockdep that this is
         * all OK. Yes, we lock the wiphy mutex here while we already
         * hold the notifier rwsem, that's the normal case. And yes,
         * we also acquire the notifier rwsem again when unregistering
         * a netdev while we already hold the wiphy mutex, so it does
         * look like a typical ABBA deadlock.
         *
         * However, both of these things happen with the RTNL held
         * already. Therefore, they can't actually happen, since the
         * lock orders really are ABC and ACB, which is fine due to
         * the RTNL (A).
         *
         * We still need to prevent recursion, which is accomplished
         * by the !wdev->registered check above.
         */
        mutex_lock_nested(&local->hw.wiphy->mtx, 1);
        __acquire(&local->hw.wiphy->mtx);

        /* Copy the addresses to the vif config list */
        ifa = rtnl_dereference(idev->ifa_list);
        while (ifa) {
                if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN)
                        vif_cfg->arp_addr_list[c] = ifa->ifa_address;
                ifa = rtnl_dereference(ifa->ifa_next);
                c++;
        }

        vif_cfg->arp_addr_cnt = c;

        /* Configure driver only if associated (which also implies it is up) */
        if (ifmgd->associated)
                ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER);

        wiphy_unlock(local->hw.wiphy);

        return NOTIFY_OK;
}
#endif

#if IS_ENABLED(CONFIG_IPV6)
static int ieee80211_ifa6_changed(struct notifier_block *nb,
                                  unsigned long data, void *arg)
{
        struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)arg;
        struct inet6_dev *idev = ifa->idev;
        struct net_device *ndev = ifa->idev->dev;
        struct ieee80211_local *local =
                container_of(nb, struct ieee80211_local, ifa6_notifier);
        struct wireless_dev *wdev = ndev->ieee80211_ptr;
        struct ieee80211_sub_if_data *sdata;

        /* Make sure it's our interface that got changed */
        if (!wdev || wdev->wiphy != local->hw.wiphy)
                return NOTIFY_DONE;

        sdata = IEEE80211_DEV_TO_SUB_IF(ndev);

        /*
         * For now only support station mode. This is mostly because
         * doing AP would have to handle AP_VLAN in some way ...
         */
        if (sdata->vif.type != NL80211_IFTYPE_STATION)
                return NOTIFY_DONE;

        drv_ipv6_addr_change(local, sdata, idev);

        return NOTIFY_OK;
}
#endif

/* There isn't a lot of sense in it, but you can transmit anything you like */
static const struct ieee80211_txrx_stypes
ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
        [NL80211_IFTYPE_ADHOC] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_DEAUTH >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
        },
        [NL80211_IFTYPE_STATION] = {
                .tx = 0xffff,
                /*
                 * To support Pre Association Security Negotiation (PASN) while
                 * already associated to one AP, allow user space to register to
                 * Rx authentication frames, so that the user space logic would
                 * be able to receive/handle authentication frames from a
                 * different AP as part of PASN.
                 * It is expected that user space would intelligently register
                 * for Rx authentication frames, i.e., only when PASN is used
                 * and configure a match filter only for PASN authentication
                 * algorithm, as otherwise the MLME functionality of mac80211
                 * would be broken.
                 */
                .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
        },
        [NL80211_IFTYPE_AP] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
                        BIT(IEEE80211_STYPE_DISASSOC >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_DEAUTH >> 4) |
                        BIT(IEEE80211_STYPE_ACTION >> 4),
        },
        [NL80211_IFTYPE_AP_VLAN] = {
                /* copy AP */
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
                        BIT(IEEE80211_STYPE_DISASSOC >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_DEAUTH >> 4) |
                        BIT(IEEE80211_STYPE_ACTION >> 4),
        },
        [NL80211_IFTYPE_P2P_CLIENT] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
        },
        [NL80211_IFTYPE_P2P_GO] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
                        BIT(IEEE80211_STYPE_DISASSOC >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_DEAUTH >> 4) |
                        BIT(IEEE80211_STYPE_ACTION >> 4),
        },
        [NL80211_IFTYPE_MESH_POINT] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
                        BIT(IEEE80211_STYPE_AUTH >> 4) |
                        BIT(IEEE80211_STYPE_DEAUTH >> 4),
        },
        [NL80211_IFTYPE_P2P_DEVICE] = {
                .tx = 0xffff,
                .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
                        BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
        },
};

static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
        .ampdu_params_info = IEEE80211_HT_AMPDU_PARM_FACTOR |
                             IEEE80211_HT_AMPDU_PARM_DENSITY,

        .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
                                IEEE80211_HT_CAP_MAX_AMSDU |
                                IEEE80211_HT_CAP_SGI_20 |
                                IEEE80211_HT_CAP_SGI_40 |
                                IEEE80211_HT_CAP_TX_STBC |
                                IEEE80211_HT_CAP_RX_STBC |
                                IEEE80211_HT_CAP_LDPC_CODING |
                                IEEE80211_HT_CAP_40MHZ_INTOLERANT),
        .mcs = {
                .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff,
                             0xff, 0xff, 0xff, 0xff, 0xff, },
        },
};

static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
        .vht_cap_info =
                cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
                            IEEE80211_VHT_CAP_SHORT_GI_80 |
                            IEEE80211_VHT_CAP_SHORT_GI_160 |
                            IEEE80211_VHT_CAP_RXSTBC_MASK |
                            IEEE80211_VHT_CAP_TXSTBC |
                            IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
                            IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
                            IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN |
                            IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
                            IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK),
        .supp_mcs = {
                .rx_mcs_map = cpu_to_le16(~0),
                .tx_mcs_map = cpu_to_le16(~0),
        },
};

struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
                                           const struct ieee80211_ops *ops,
                                           const char *requested_name)
{
        struct ieee80211_local *local;
        int priv_size, i;
        struct wiphy *wiphy;
        bool emulate_chanctx;

        if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config ||
                    !ops->add_interface || !ops->remove_interface ||
                    !ops->configure_filter || !ops->wake_tx_queue))
                return NULL;

        if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove)))
                return NULL;

        if (WARN_ON(!!ops->link_info_changed != !!ops->vif_cfg_changed ||
                    (ops->link_info_changed && ops->bss_info_changed)))
                return NULL;

        /* check all or no channel context operations exist */
        if (ops->add_chanctx == ieee80211_emulate_add_chanctx &&
            ops->remove_chanctx == ieee80211_emulate_remove_chanctx &&
            ops->change_chanctx == ieee80211_emulate_change_chanctx) {
                if (WARN_ON(ops->assign_vif_chanctx ||
                            ops->unassign_vif_chanctx))
                        return NULL;
                emulate_chanctx = true;
        } else {
                if (WARN_ON(ops->add_chanctx == ieee80211_emulate_add_chanctx ||
                            ops->remove_chanctx == ieee80211_emulate_remove_chanctx ||
                            ops->change_chanctx == ieee80211_emulate_change_chanctx))
                        return NULL;
                if (WARN_ON(!ops->add_chanctx ||
                            !ops->remove_chanctx ||
                            !ops->change_chanctx ||
                            !ops->assign_vif_chanctx ||
                            !ops->unassign_vif_chanctx))
                        return NULL;
                emulate_chanctx = false;
        }

        /* Ensure 32-byte alignment of our private data and hw private data.
         * We use the wiphy priv data for both our ieee80211_local and for
         * the driver's private data
         *
         * In memory it'll be like this:
         *
         * +-------------------------+
         * | struct wiphy            |
         * +-------------------------+
         * | struct ieee80211_local  |
         * +-------------------------+
         * | driver's private data   |
         * +-------------------------+
         *
         */
        priv_size = ALIGN(sizeof(*local), NETDEV_ALIGN) + priv_data_len;

        wiphy = wiphy_new_nm(&mac80211_config_ops, priv_size, requested_name);

        if (!wiphy)
                return NULL;

        wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;

        wiphy->privid = mac80211_wiphy_privid;

        wiphy->flags |= WIPHY_FLAG_NETNS_OK |
                        WIPHY_FLAG_4ADDR_AP |
                        WIPHY_FLAG_4ADDR_STATION |
                        WIPHY_FLAG_REPORTS_OBSS |
                        WIPHY_FLAG_OFFCHAN_TX;

        if (emulate_chanctx || ops->remain_on_channel)
                wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;

        wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
                           NL80211_FEATURE_SAE |
                           NL80211_FEATURE_HT_IBSS |
                           NL80211_FEATURE_VIF_TXPOWER |
                           NL80211_FEATURE_MAC_ON_CREATE |
                           NL80211_FEATURE_USERSPACE_MPM |
                           NL80211_FEATURE_FULL_AP_CLIENT_STATE;
        wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH);
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS);
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_SCAN_FREQ_KHZ);
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE);

        if (!ops->hw_scan) {
                wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
                                   NL80211_FEATURE_AP_SCAN;
                /*
                 * if the driver behaves correctly using the probe request
                 * (template) from mac80211, then both of these should be
                 * supported even with hw scan - but let drivers opt in.
                 */
                wiphy_ext_feature_set(wiphy,
                                      NL80211_EXT_FEATURE_SCAN_RANDOM_SN);
                wiphy_ext_feature_set(wiphy,
                                      NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
        }

        if (!ops->set_key) {
                wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
                wiphy_ext_feature_set(wiphy,
                                      NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT);
        }

        wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS);
        wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM);

        wiphy->bss_priv_size = sizeof(struct ieee80211_bss);

        local = wiphy_priv(wiphy);

        if (sta_info_init(local))
                goto err_free;

        local->hw.wiphy = wiphy;

        local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);

        local->ops = ops;
        local->emulate_chanctx = emulate_chanctx;

        if (emulate_chanctx)
                ieee80211_hw_set(&local->hw, CHANCTX_STA_CSA);

        /*
         * We need a bit of data queued to build aggregates properly, so
         * instruct the TCP stack to allow more than a single ms of data
         * to be queued in the stack. The value is a bit-shift of 1
         * second, so 7 is ~8ms of queued data. Only affects local TCP
         * sockets.
         * This is the default, anyhow - drivers may need to override it
         * for local reasons (longer buffers, longer completion time, or
         * similar).
         */
        local->hw.tx_sk_pacing_shift = 7;

        /* set up some defaults */
        local->hw.queues = 1;
        local->hw.max_rates = 1;
        local->hw.max_report_rates = 0;
        local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
        local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
        local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
        local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
        local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
        local->hw.radiotap_mcs_details = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
                                         IEEE80211_RADIOTAP_MCS_HAVE_GI |
                                         IEEE80211_RADIOTAP_MCS_HAVE_BW;
        local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI |
                                         IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
        local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
        local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
        local->hw.max_mtu = IEEE80211_MAX_DATA_LEN;
        local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
        wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
        wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;

        local->ext_capa[7] = WLAN_EXT_CAPA8_OPMODE_NOTIF;

        wiphy->extended_capabilities = local->ext_capa;
        wiphy->extended_capabilities_mask = local->ext_capa;
        wiphy->extended_capabilities_len =
                ARRAY_SIZE(local->ext_capa);

        INIT_LIST_HEAD(&local->interfaces);
        INIT_LIST_HEAD(&local->mon_list);

        __hw_addr_init(&local->mc_list);

        mutex_init(&local->iflist_mtx);
        spin_lock_init(&local->filter_lock);
        spin_lock_init(&local->rx_path_lock);
        spin_lock_init(&local->queue_stop_reason_lock);

        for (i = 0; i < IEEE80211_NUM_ACS; i++) {
                INIT_LIST_HEAD(&local->active_txqs[i]);
                spin_lock_init(&local->active_txq_lock[i]);
                local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
                local->aql_txq_limit_high[i] =
                        IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
                atomic_set(&local->aql_ac_pending_airtime[i], 0);
        }

        local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
        local->aql_threshold = IEEE80211_AQL_THRESHOLD;
        atomic_set(&local->aql_total_pending_airtime, 0);

        spin_lock_init(&local->handle_wake_tx_queue_lock);

        INIT_LIST_HEAD(&local->chanctx_list);

        wiphy_delayed_work_init(&local->scan_work, ieee80211_scan_work);

        INIT_WORK(&local->restart_work, ieee80211_restart_work);

        wiphy_work_init(&local->radar_detected_work,
                        ieee80211_dfs_radar_detected_work);

        wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter);

        wiphy_work_init(&local->dynamic_ps_enable_work,
                        ieee80211_dynamic_ps_enable_work);
        wiphy_work_init(&local->dynamic_ps_disable_work,
                        ieee80211_dynamic_ps_disable_work);
        timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0);

        wiphy_work_init(&local->sched_scan_stopped_work,
                        ieee80211_sched_scan_stopped_work);

        spin_lock_init(&local->ack_status_lock);
        idr_init(&local->ack_status_frames);

        for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
                skb_queue_head_init(&local->pending[i]);
                atomic_set(&local->agg_queue_stop[i], 0);
        }
        tasklet_setup(&local->tx_pending_tasklet, ieee80211_tx_pending);
        tasklet_setup(&local->wake_txqs_tasklet, ieee80211_wake_txqs);
        tasklet_setup(&local->tasklet, ieee80211_tasklet_handler);

        skb_queue_head_init(&local->skb_queue);
        skb_queue_head_init(&local->skb_queue_unreliable);

        ieee80211_alloc_led_names(local);

        ieee80211_roc_setup(local);

        local->hw.radiotap_timestamp.units_pos = -1;
        local->hw.radiotap_timestamp.accuracy = -1;

        return &local->hw;
 err_free:
        wiphy_free(wiphy);
        return NULL;
}
EXPORT_SYMBOL(ieee80211_alloc_hw_nm);

static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
{
        bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */
        bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
        int r = 0, w = 0;
        u32 *suites;
        static const u32 cipher_suites[] = {
                /* keep WEP first, it may be removed below */
                WLAN_CIPHER_SUITE_WEP40,
                WLAN_CIPHER_SUITE_WEP104,
                WLAN_CIPHER_SUITE_TKIP,
                WLAN_CIPHER_SUITE_CCMP,
                WLAN_CIPHER_SUITE_CCMP_256,
                WLAN_CIPHER_SUITE_GCMP,
                WLAN_CIPHER_SUITE_GCMP_256,

                /* keep last -- depends on hw flags! */
                WLAN_CIPHER_SUITE_AES_CMAC,
                WLAN_CIPHER_SUITE_BIP_CMAC_256,
                WLAN_CIPHER_SUITE_BIP_GMAC_128,
                WLAN_CIPHER_SUITE_BIP_GMAC_256,
        };

        if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) ||
            local->hw.wiphy->cipher_suites) {
                /* If the driver advertises, or doesn't support SW crypto,
                 * we only need to remove WEP if necessary.
                 */
                if (have_wep)
                        return 0;

                /* well if it has _no_ ciphers ... fine */
                if (!local->hw.wiphy->n_cipher_suites)
                        return 0;

                /* Driver provides cipher suites, but we need to exclude WEP */
                suites = kmemdup(local->hw.wiphy->cipher_suites,
                                 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
                                 GFP_KERNEL);
                if (!suites)
                        return -ENOMEM;

                for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
                        u32 suite = local->hw.wiphy->cipher_suites[r];

                        if (suite == WLAN_CIPHER_SUITE_WEP40 ||
                            suite == WLAN_CIPHER_SUITE_WEP104)
                                continue;
                        suites[w++] = suite;
                }
        } else {
                /* assign the (software supported and perhaps offloaded)
                 * cipher suites
                 */
                local->hw.wiphy->cipher_suites = cipher_suites;
                local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);

                if (!have_mfp)
                        local->hw.wiphy->n_cipher_suites -= 4;

                if (!have_wep) {
                        local->hw.wiphy->cipher_suites += 2;
                        local->hw.wiphy->n_cipher_suites -= 2;
                }

                /* not dynamically allocated, so just return */
                return 0;
        }

        local->hw.wiphy->cipher_suites = suites;
        local->hw.wiphy->n_cipher_suites = w;
        local->wiphy_ciphers_allocated = true;

        return 0;
}

int ieee80211_register_hw(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);
        int result, i;
        enum nl80211_band band;
        int channels, max_bitrates;
        bool supp_ht, supp_vht, supp_he, supp_eht;
        struct cfg80211_chan_def dflt_chandef = {};

        if (ieee80211_hw_check(hw, QUEUE_CONTROL) &&
            (local->hw.offchannel_tx_hw_queue == IEEE80211_INVAL_HW_QUEUE ||
             local->hw.offchannel_tx_hw_queue >= local->hw.queues))
                return -EINVAL;

        if ((hw->wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH) &&
            (!local->ops->tdls_channel_switch ||
             !local->ops->tdls_cancel_channel_switch ||
             !local->ops->tdls_recv_channel_switch))
                return -EOPNOTSUPP;

        if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) &&
                    !local->ops->set_frag_threshold))
                return -EINVAL;

        if (WARN_ON(local->hw.wiphy->interface_modes &
                        BIT(NL80211_IFTYPE_NAN) &&
                    (!local->ops->start_nan || !local->ops->stop_nan)))
                return -EINVAL;

        if (hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) {
                /*
                 * For drivers capable of doing MLO, assume modern driver
                 * or firmware facilities, so software doesn't have to do
                 * as much, e.g. monitoring beacons would be hard if we
                 * might not even know which link is active at which time.
                 */
                if (WARN_ON(local->emulate_chanctx))
                        return -EINVAL;

                if (WARN_ON(!local->ops->link_info_changed))
                        return -EINVAL;

                if (WARN_ON(!ieee80211_hw_check(hw, HAS_RATE_CONTROL)))
                        return -EINVAL;

                if (WARN_ON(!ieee80211_hw_check(hw, AMPDU_AGGREGATION)))
                        return -EINVAL;

                if (WARN_ON(ieee80211_hw_check(hw, HOST_BROADCAST_PS_BUFFERING)))
                        return -EINVAL;

                if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_PS) &&
                            (!ieee80211_hw_check(hw, SUPPORTS_DYNAMIC_PS) ||
                             ieee80211_hw_check(hw, PS_NULLFUNC_STACK))))
                        return -EINVAL;

                if (WARN_ON(!ieee80211_hw_check(hw, MFP_CAPABLE)))
                        return -EINVAL;

                if (WARN_ON(!ieee80211_hw_check(hw, CONNECTION_MONITOR)))
                        return -EINVAL;

                if (WARN_ON(ieee80211_hw_check(hw, NEED_DTIM_BEFORE_ASSOC)))
                        return -EINVAL;

                if (WARN_ON(ieee80211_hw_check(hw, TIMING_BEACON_ONLY)))
                        return -EINVAL;

                if (WARN_ON(!ieee80211_hw_check(hw, AP_LINK_PS)))
                        return -EINVAL;

                if (WARN_ON(ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP)))
                        return -EINVAL;
        }

#ifdef CONFIG_PM
        if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume))
                return -EINVAL;
#endif

        if (local->emulate_chanctx) {
                for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
                        const struct ieee80211_iface_combination *comb;

                        comb = &local->hw.wiphy->iface_combinations[i];

                        if (comb->num_different_channels > 1)
                                return -EINVAL;
                }
        } else {
                /* DFS is not supported with multi-channel combinations yet */
                for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) {
                        const struct ieee80211_iface_combination *comb;

                        comb = &local->hw.wiphy->iface_combinations[i];

                        if (comb->radar_detect_widths &&
                            comb->num_different_channels > 1)
                                return -EINVAL;
                }
        }

        /* Only HW csum features are currently compatible with mac80211 */
        if (WARN_ON(hw->netdev_features & ~MAC80211_SUPPORTED_FEATURES))
                return -EINVAL;

        if (hw->max_report_rates == 0)
                hw->max_report_rates = hw->max_rates;

        local->rx_chains = 1;

        /*
         * generic code guarantees at least one band,
         * set this very early because much code assumes
         * that hw.conf.channel is assigned
         */
        channels = 0;
        max_bitrates = 0;
        supp_ht = false;
        supp_vht = false;
        supp_he = false;
        supp_eht = false;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                const struct ieee80211_sband_iftype_data *iftd;
                struct ieee80211_supported_band *sband;

                sband = local->hw.wiphy->bands[band];
                if (!sband)
                        continue;

                if (!dflt_chandef.chan) {
                        /*
                         * Assign the first enabled channel to dflt_chandef
                         * from the list of channels
                         */
                        for (i = 0; i < sband->n_channels; i++)
                                if (!(sband->channels[i].flags &
                                                IEEE80211_CHAN_DISABLED))
                                        break;
                        /* if none found then use the first anyway */
                        if (i == sband->n_channels)
                                i = 0;
                        cfg80211_chandef_create(&dflt_chandef,
                                                &sband->channels[i],
                                                NL80211_CHAN_NO_HT);
                        /* init channel we're on */
                        local->monitor_chanreq.oper = dflt_chandef;
                        if (local->emulate_chanctx) {
                                local->dflt_chandef = dflt_chandef;
                                local->hw.conf.chandef = dflt_chandef;
                        }
                }

                channels += sband->n_channels;

                /*
                 * Due to the way the aggregation code handles this and it
                 * being an HT capability, we can't really support delayed
                 * BA in MLO (yet).
                 */
                if (WARN_ON(sband->ht_cap.ht_supported &&
                            (sband->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA) &&
                            hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))
                        return -EINVAL;

                if (max_bitrates < sband->n_bitrates)
                        max_bitrates = sband->n_bitrates;
                supp_ht = supp_ht || sband->ht_cap.ht_supported;
                supp_vht = supp_vht || sband->vht_cap.vht_supported;

                for_each_sband_iftype_data(sband, i, iftd) {
                        u8 he_40_mhz_cap;

                        supp_he = supp_he || iftd->he_cap.has_he;
                        supp_eht = supp_eht || iftd->eht_cap.has_eht;

                        if (band == NL80211_BAND_2GHZ)
                                he_40_mhz_cap =
                                        IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
                        else
                                he_40_mhz_cap =
                                        IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;

                        /* currently no support for HE client where HT has 40 MHz but not HT */
                        if (iftd->he_cap.has_he &&
                            iftd->types_mask & (BIT(NL80211_IFTYPE_STATION) |
                                                BIT(NL80211_IFTYPE_P2P_CLIENT)) &&
                            sband->ht_cap.ht_supported &&
                            sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
                            !(iftd->he_cap.he_cap_elem.phy_cap_info[0] & he_40_mhz_cap))
                                return -EINVAL;
                }

                /* HT, VHT, HE require QoS, thus >= 4 queues */
                if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS &&
                            (supp_ht || supp_vht || supp_he)))
                        return -EINVAL;

                /* EHT requires HE support */
                if (WARN_ON(supp_eht && !supp_he))
                        return -EINVAL;

                if (!sband->ht_cap.ht_supported)
                        continue;

                /* TODO: consider VHT for RX chains, hopefully it's the same */
                local->rx_chains =
                        max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
                            local->rx_chains);

                /* no need to mask, SM_PS_DISABLED has all bits set */
                sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
                                     IEEE80211_HT_CAP_SM_PS_SHIFT;
        }

        /* if low-level driver supports AP, we also support VLAN.
         * drivers advertising SW_CRYPTO_CONTROL should enable AP_VLAN
         * based on their support to transmit SW encrypted packets.
         */
        if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP) &&
            !ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL)) {
                hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
                hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
        }

        /* mac80211 always supports monitor */
        hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
        hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);

        /* mac80211 doesn't support more than one IBSS interface right now */
        for (i = 0; i < hw->wiphy->n_iface_combinations; i++) {
                const struct ieee80211_iface_combination *c;
                int j;

                c = &hw->wiphy->iface_combinations[i];

                for (j = 0; j < c->n_limits; j++)
                        if ((c->limits[j].types & BIT(NL80211_IFTYPE_ADHOC)) &&
                            c->limits[j].max > 1)
                                return -EINVAL;
        }

        local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
                                      sizeof(void *) * channels, GFP_KERNEL);
        if (!local->int_scan_req)
                return -ENOMEM;

        eth_broadcast_addr(local->int_scan_req->bssid);

        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                if (!local->hw.wiphy->bands[band])
                        continue;
                local->int_scan_req->rates[band] = (u32) -1;
        }

#ifndef CONFIG_MAC80211_MESH
        /* mesh depends on Kconfig, but drivers should set it if they want */
        local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
#endif

        /* if the underlying driver supports mesh, mac80211 will (at least)
         * provide routing of mesh authentication frames to userspace */
        if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_MESH_POINT))
                local->hw.wiphy->flags |= WIPHY_FLAG_MESH_AUTH;

        /* mac80211 supports control port protocol changing */
        local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;

        if (ieee80211_hw_check(&local->hw, SIGNAL_DBM)) {
                local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
        } else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) {
                local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
                if (hw->max_signal <= 0) {
                        result = -EINVAL;
                        goto fail_workqueue;
                }
        }

        /* Mac80211 and therefore all drivers using SW crypto only
         * are able to handle PTK rekeys and Extended Key ID.
         */
        if (!local->ops->set_key) {
                wiphy_ext_feature_set(local->hw.wiphy,
                                      NL80211_EXT_FEATURE_CAN_REPLACE_PTK0);
                wiphy_ext_feature_set(local->hw.wiphy,
                                      NL80211_EXT_FEATURE_EXT_KEY_ID);
        }

        if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_ADHOC))
                wiphy_ext_feature_set(local->hw.wiphy,
                                      NL80211_EXT_FEATURE_DEL_IBSS_STA);

        /*
         * Calculate scan IE length -- we need this to alloc
         * memory and to subtract from the driver limit. It
         * includes the DS Params, (extended) supported rates, and HT
         * information -- SSID is the driver's responsibility.
         */
        local->scan_ies_len = 4 + max_bitrates /* (ext) supp rates */ +
                3 /* DS Params */;
        if (supp_ht)
                local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);

        if (supp_vht)
                local->scan_ies_len +=
                        2 + sizeof(struct ieee80211_vht_cap);

        /*
         * HE cap element is variable in size - set len to allow max size */
        if (supp_he) {
                local->scan_ies_len +=
                        3 + sizeof(struct ieee80211_he_cap_elem) +
                        sizeof(struct ieee80211_he_mcs_nss_supp) +
                        IEEE80211_HE_PPE_THRES_MAX_LEN;

                if (supp_eht)
                        local->scan_ies_len +=
                                3 + sizeof(struct ieee80211_eht_cap_elem) +
                                sizeof(struct ieee80211_eht_mcs_nss_supp) +
                                IEEE80211_EHT_PPE_THRES_MAX_LEN;
        }

        if (!local->ops->hw_scan) {
                /* For hw_scan, driver needs to set these up. */
                local->hw.wiphy->max_scan_ssids = 4;
                local->hw.wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
        }

        /*
         * If the driver supports any scan IEs, then assume the
         * limit includes the IEs mac80211 will add, otherwise
         * leave it at zero and let the driver sort it out; we
         * still pass our IEs to the driver but userspace will
         * not be allowed to in that case.
         */
        if (local->hw.wiphy->max_scan_ie_len)
                local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;

        result = ieee80211_init_cipher_suites(local);
        if (result < 0)
                goto fail_workqueue;

        if (!local->ops->remain_on_channel)
                local->hw.wiphy->max_remain_on_channel_duration = 5000;

        /* mac80211 based drivers don't support internal TDLS setup */
        if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
                local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;

        /* mac80211 supports eCSA, if the driver supports STA CSA at all */
        if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA))
                local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;

        /* mac80211 supports multi BSSID, if the driver supports it */
        if (ieee80211_hw_check(&local->hw, SUPPORTS_MULTI_BSSID)) {
                local->hw.wiphy->support_mbssid = true;
                if (ieee80211_hw_check(&local->hw,
                                       SUPPORTS_ONLY_HE_MULTI_BSSID))
                        local->hw.wiphy->support_only_he_mbssid = true;
                else
                        local->ext_capa[2] |=
                                WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
        }

        local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;

        /*
         * We use the number of queues for feature tests (QoS, HT) internally
         * so restrict them appropriately.
         */
        if (hw->queues > IEEE80211_MAX_QUEUES)
                hw->queues = IEEE80211_MAX_QUEUES;

        local->workqueue =
                alloc_ordered_workqueue("%s", 0, wiphy_name(local->hw.wiphy));
        if (!local->workqueue) {
                result = -ENOMEM;
                goto fail_workqueue;
        }

        /*
         * The hardware needs headroom for sending the frame,
         * and we need some headroom for passing the frame to monitor
         * interfaces, but never both at the same time.
         */
        local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom,
                                   IEEE80211_TX_STATUS_HEADROOM);

        /*
         * if the driver doesn't specify a max listen interval we
         * use 5 which should be a safe default
         */
        if (local->hw.max_listen_interval == 0)
                local->hw.max_listen_interval = 5;

        local->hw.conf.listen_interval = local->hw.max_listen_interval;

        local->dynamic_ps_forced_timeout = -1;

        if (!local->hw.max_nan_de_entries)
                local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;

        if (!local->hw.weight_multiplier)
                local->hw.weight_multiplier = 1;

        ieee80211_wep_init(local);

        local->hw.conf.flags = IEEE80211_CONF_IDLE;

        ieee80211_led_init(local);

        result = ieee80211_txq_setup_flows(local);
        if (result)
                goto fail_flows;

        rtnl_lock();
        result = ieee80211_init_rate_ctrl_alg(local,
                                              hw->rate_control_algorithm);
        rtnl_unlock();
        if (result < 0) {
                wiphy_debug(local->hw.wiphy,
                            "Failed to initialize rate control algorithm\n");
                goto fail_rate;
        }

        if (local->rate_ctrl) {
                clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
                if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
                        ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW);
        }

        /*
         * If the VHT capabilities don't have IEEE80211_VHT_EXT_NSS_BW_CAPABLE,
         * or have it when we don't, copy the sband structure and set/clear it.
         * This is necessary because rate scaling algorithms could be switched
         * and have different support values.
         * Print a message so that in the common case the reallocation can be
         * avoided.
         */
        BUILD_BUG_ON(NUM_NL80211_BANDS > 8 * sizeof(local->sband_allocated));
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                struct ieee80211_supported_band *sband;
                bool local_cap, ie_cap;

                local_cap = ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW);

                sband = local->hw.wiphy->bands[band];
                if (!sband || !sband->vht_cap.vht_supported)
                        continue;

                ie_cap = !!(sband->vht_cap.vht_mcs.tx_highest &
                            cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE));

                if (local_cap == ie_cap)
                        continue;

                sband = kmemdup(sband, sizeof(*sband), GFP_KERNEL);
                if (!sband) {
                        result = -ENOMEM;
                        goto fail_rate;
                }

                wiphy_dbg(hw->wiphy, "copying sband (band %d) due to VHT EXT NSS BW flag\n",
                          band);

                sband->vht_cap.vht_mcs.tx_highest ^=
                        cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);

                local->hw.wiphy->bands[band] = sband;
                local->sband_allocated |= BIT(band);
        }

        result = wiphy_register(local->hw.wiphy);
        if (result < 0)
                goto fail_wiphy_register;

        debugfs_hw_add(local);
        rate_control_add_debugfs(local);

        ieee80211_check_wbrf_support(local);

        rtnl_lock();
        wiphy_lock(hw->wiphy);

        /* add one default STA interface if supported */
        if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
            !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
                struct vif_params params = {0};

                result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
                                          NL80211_IFTYPE_STATION, &params);
                if (result)
                        wiphy_warn(local->hw.wiphy,
                                   "Failed to add default virtual iface\n");
        }

        wiphy_unlock(hw->wiphy);
        rtnl_unlock();

#ifdef CONFIG_INET
        local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
        result = register_inetaddr_notifier(&local->ifa_notifier);
        if (result)
                goto fail_ifa;
#endif

#if IS_ENABLED(CONFIG_IPV6)
        local->ifa6_notifier.notifier_call = ieee80211_ifa6_changed;
        result = register_inet6addr_notifier(&local->ifa6_notifier);
        if (result)
                goto fail_ifa6;
#endif

        return 0;

#if IS_ENABLED(CONFIG_IPV6)
 fail_ifa6:
#ifdef CONFIG_INET
        unregister_inetaddr_notifier(&local->ifa_notifier);
#endif
#endif
#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
 fail_ifa:
#endif
        wiphy_unregister(local->hw.wiphy);
 fail_wiphy_register:
        rtnl_lock();
        rate_control_deinitialize(local);
        ieee80211_remove_interfaces(local);
        rtnl_unlock();
 fail_rate:
        ieee80211_txq_teardown_flows(local);
 fail_flows:
        ieee80211_led_exit(local);
        destroy_workqueue(local->workqueue);
 fail_workqueue:
        if (local->wiphy_ciphers_allocated) {
                kfree(local->hw.wiphy->cipher_suites);
                local->wiphy_ciphers_allocated = false;
        }
        kfree(local->int_scan_req);
        return result;
}
EXPORT_SYMBOL(ieee80211_register_hw);

void ieee80211_unregister_hw(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);

        tasklet_kill(&local->tx_pending_tasklet);
        tasklet_kill(&local->tasklet);

#ifdef CONFIG_INET
        unregister_inetaddr_notifier(&local->ifa_notifier);
#endif
#if IS_ENABLED(CONFIG_IPV6)
        unregister_inet6addr_notifier(&local->ifa6_notifier);
#endif

        rtnl_lock();

        /*
         * At this point, interface list manipulations are fine
         * because the driver cannot be handing us frames any
         * more and the tasklet is killed.
         */
        ieee80211_remove_interfaces(local);

        ieee80211_txq_teardown_flows(local);

        wiphy_lock(local->hw.wiphy);
        wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work);
        wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter);
        wiphy_work_cancel(local->hw.wiphy, &local->sched_scan_stopped_work);
        wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work);
        wiphy_unlock(local->hw.wiphy);
        rtnl_unlock();

        cancel_work_sync(&local->restart_work);

        ieee80211_clear_tx_pending(local);
        rate_control_deinitialize(local);

        if (skb_queue_len(&local->skb_queue) ||
            skb_queue_len(&local->skb_queue_unreliable))
                wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
        skb_queue_purge(&local->skb_queue);
        skb_queue_purge(&local->skb_queue_unreliable);

        wiphy_unregister(local->hw.wiphy);
        destroy_workqueue(local->workqueue);
        ieee80211_led_exit(local);
        kfree(local->int_scan_req);
}
EXPORT_SYMBOL(ieee80211_unregister_hw);

static int ieee80211_free_ack_frame(int id, void *p, void *data)
{
        WARN_ONCE(1, "Have pending ack frames!\n");
        kfree_skb(p);
        return 0;
}

void ieee80211_free_hw(struct ieee80211_hw *hw)
{
        struct ieee80211_local *local = hw_to_local(hw);
        enum nl80211_band band;

        mutex_destroy(&local->iflist_mtx);

        if (local->wiphy_ciphers_allocated) {
                kfree(local->hw.wiphy->cipher_suites);
                local->wiphy_ciphers_allocated = false;
        }

        idr_for_each(&local->ack_status_frames,
                     ieee80211_free_ack_frame, NULL);
        idr_destroy(&local->ack_status_frames);

        sta_info_stop(local);

        ieee80211_free_led_names(local);

        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                if (!(local->sband_allocated & BIT(band)))
                        continue;
                kfree(local->hw.wiphy->bands[band]);
        }

        wiphy_free(local->hw.wiphy);
}
EXPORT_SYMBOL(ieee80211_free_hw);

static const char * const drop_reasons_monitor[] = {
#define V(x)        #x,
        [0] = "RX_DROP_MONITOR",
        MAC80211_DROP_REASONS_MONITOR(V)
};

static struct drop_reason_list drop_reason_list_monitor = {
        .reasons = drop_reasons_monitor,
        .n_reasons = ARRAY_SIZE(drop_reasons_monitor),
};

static const char * const drop_reasons_unusable[] = {
        [0] = "RX_DROP_UNUSABLE",
        MAC80211_DROP_REASONS_UNUSABLE(V)
#undef V
};

static struct drop_reason_list drop_reason_list_unusable = {
        .reasons = drop_reasons_unusable,
        .n_reasons = ARRAY_SIZE(drop_reasons_unusable),
};

static int __init ieee80211_init(void)
{
        struct sk_buff *skb;
        int ret;

        BUILD_BUG_ON(sizeof(struct ieee80211_tx_info) > sizeof(skb->cb));
        BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) +
                     IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb));

        ret = rc80211_minstrel_init();
        if (ret)
                return ret;

        ret = ieee80211_iface_init();
        if (ret)
                goto err_netdev;

        drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
                                     &drop_reason_list_monitor);
        drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
                                     &drop_reason_list_unusable);

        return 0;
 err_netdev:
        rc80211_minstrel_exit();

        return ret;
}

static void __exit ieee80211_exit(void)
{
        rc80211_minstrel_exit();

        ieee80211s_stop();

        ieee80211_iface_exit();

        drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR);
        drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE);

        rcu_barrier();
}


subsys_initcall(ieee80211_init);
module_exit(ieee80211_exit);

MODULE_DESCRIPTION("IEEE 802.11 subsystem");
MODULE_LICENSE("GPL");


















































































   64 




   64 
























  260 




  260 

































   64 

  294 















































































  259 


















   64 





   65 
   63 
   64 




















  260 
  260 
  259 
    1 
































































































































































































































































































   65 




   65 



   64 



   65 

   64 


























































































































  258 









  257 
  259 
  259 




  257 




  259 
  259 

  259 

  260 

















   65 












   64 

   65 




   65 
   61 


   45 





   63 
   63 


   65 














    6 











    6 
    6 



    6 















    6 



    6 
    6 










































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
// SPDX-License-Identifier: GPL-2.0
/*
 * security/tomoyo/file.c
 *
 * Copyright (C) 2005-2011  NTT DATA CORPORATION
 */

#include "common.h"
#include <linux/slab.h>

/*
 * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index".
 */
static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = {
        [TOMOYO_TYPE_EXECUTE]    = TOMOYO_MAC_FILE_EXECUTE,
        [TOMOYO_TYPE_READ]       = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_WRITE]      = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_APPEND]     = TOMOYO_MAC_FILE_OPEN,
        [TOMOYO_TYPE_UNLINK]     = TOMOYO_MAC_FILE_UNLINK,
        [TOMOYO_TYPE_GETATTR]    = TOMOYO_MAC_FILE_GETATTR,
        [TOMOYO_TYPE_RMDIR]      = TOMOYO_MAC_FILE_RMDIR,
        [TOMOYO_TYPE_TRUNCATE]   = TOMOYO_MAC_FILE_TRUNCATE,
        [TOMOYO_TYPE_SYMLINK]    = TOMOYO_MAC_FILE_SYMLINK,
        [TOMOYO_TYPE_CHROOT]     = TOMOYO_MAC_FILE_CHROOT,
        [TOMOYO_TYPE_UMOUNT]     = TOMOYO_MAC_FILE_UMOUNT,
};

/*
 * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index".
 */
const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
        [TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK,
        [TOMOYO_TYPE_MKCHAR]  = TOMOYO_MAC_FILE_MKCHAR,
};

/*
 * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index".
 */
const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
        [TOMOYO_TYPE_LINK]       = TOMOYO_MAC_FILE_LINK,
        [TOMOYO_TYPE_RENAME]     = TOMOYO_MAC_FILE_RENAME,
        [TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT,
};

/*
 * Mapping table from "enum tomoyo_path_number_acl_index" to
 * "enum tomoyo_mac_index".
 */
const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
        [TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE,
        [TOMOYO_TYPE_MKDIR]  = TOMOYO_MAC_FILE_MKDIR,
        [TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO,
        [TOMOYO_TYPE_MKSOCK] = TOMOYO_MAC_FILE_MKSOCK,
        [TOMOYO_TYPE_IOCTL]  = TOMOYO_MAC_FILE_IOCTL,
        [TOMOYO_TYPE_CHMOD]  = TOMOYO_MAC_FILE_CHMOD,
        [TOMOYO_TYPE_CHOWN]  = TOMOYO_MAC_FILE_CHOWN,
        [TOMOYO_TYPE_CHGRP]  = TOMOYO_MAC_FILE_CHGRP,
};

/**
 * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union".
 *
 * @ptr: Pointer to "struct tomoyo_name_union".
 *
 * Returns nothing.
 */
void tomoyo_put_name_union(struct tomoyo_name_union *ptr)
{
        tomoyo_put_group(ptr->group);
        tomoyo_put_name(ptr->filename);
}

/**
 * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not.
 *
 * @name: Pointer to "struct tomoyo_path_info".
 * @ptr:  Pointer to "struct tomoyo_name_union".
 *
 * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise.
 */
const struct tomoyo_path_info *
tomoyo_compare_name_union(const struct tomoyo_path_info *name,
                          const struct tomoyo_name_union *ptr)
{
        if (ptr->group)
                return tomoyo_path_matches_group(name, ptr->group);
        if (tomoyo_path_matches_pattern(name, ptr->filename))
                return ptr->filename;
        return NULL;
}

/**
 * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union".
 *
 * @ptr: Pointer to "struct tomoyo_number_union".
 *
 * Returns nothing.
 */
void tomoyo_put_number_union(struct tomoyo_number_union *ptr)
{
        tomoyo_put_group(ptr->group);
}

/**
 * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not.
 *
 * @value: Number to check.
 * @ptr:   Pointer to "struct tomoyo_number_union".
 *
 * Returns true if @value matches @ptr, false otherwise.
 */
bool tomoyo_compare_number_union(const unsigned long value,
                                 const struct tomoyo_number_union *ptr)
{
        if (ptr->group)
                return tomoyo_number_matches_group(value, value, ptr->group);
        return value >= ptr->values[0] && value <= ptr->values[1];
}

/**
 * tomoyo_add_slash - Add trailing '/' if needed.
 *
 * @buf: Pointer to "struct tomoyo_path_info".
 *
 * Returns nothing.
 *
 * @buf must be generated by tomoyo_encode() because this function does not
 * allocate memory for adding '/'.
 */
static void tomoyo_add_slash(struct tomoyo_path_info *buf)
{
        if (buf->is_dir)
                return;
        /*
         * This is OK because tomoyo_encode() reserves space for appending "/".
         */
        strcat((char *) buf->name, "/");
        tomoyo_fill_path_info(buf);
}

/**
 * tomoyo_get_realpath - Get realpath.
 *
 * @buf:  Pointer to "struct tomoyo_path_info".
 * @path: Pointer to "struct path".
 *
 * Returns true on success, false otherwise.
 */
static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, const struct path *path)
{
        buf->name = tomoyo_realpath_from_path(path);
        if (buf->name) {
                tomoyo_fill_path_info(buf);
                return true;
        }
        return false;
}

/**
 * tomoyo_audit_path_log - Audit path request log.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_audit_path_log(struct tomoyo_request_info *r)
{
        return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword
                                 [r->param.path.operation],
                                 r->param.path.filename->name);
}

/**
 * tomoyo_audit_path2_log - Audit path/path request log.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_audit_path2_log(struct tomoyo_request_info *r)
{
        return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
                                 [tomoyo_pp2mac[r->param.path2.operation]],
                                 r->param.path2.filename1->name,
                                 r->param.path2.filename2->name);
}

/**
 * tomoyo_audit_mkdev_log - Audit path/number/number/number request log.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r)
{
        return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n",
                                 tomoyo_mac_keywords
                                 [tomoyo_pnnn2mac[r->param.mkdev.operation]],
                                 r->param.mkdev.filename->name,
                                 r->param.mkdev.mode, r->param.mkdev.major,
                                 r->param.mkdev.minor);
}

/**
 * tomoyo_audit_path_number_log - Audit path/number request log.
 *
 * @r: Pointer to "struct tomoyo_request_info".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r)
{
        const u8 type = r->param.path_number.operation;
        u8 radix;
        char buffer[64];

        switch (type) {
        case TOMOYO_TYPE_CREATE:
        case TOMOYO_TYPE_MKDIR:
        case TOMOYO_TYPE_MKFIFO:
        case TOMOYO_TYPE_MKSOCK:
        case TOMOYO_TYPE_CHMOD:
                radix = TOMOYO_VALUE_TYPE_OCTAL;
                break;
        case TOMOYO_TYPE_IOCTL:
                radix = TOMOYO_VALUE_TYPE_HEXADECIMAL;
                break;
        default:
                radix = TOMOYO_VALUE_TYPE_DECIMAL;
                break;
        }
        tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number,
                           radix);
        return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
                                 [tomoyo_pn2mac[type]],
                                 r->param.path_number.filename->name, buffer);
}

/**
 * tomoyo_check_path_acl - Check permission for path operation.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @ptr: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if granted, false otherwise.
 *
 * To be able to use wildcard for domain transition, this function sets
 * matching entry on success. Since the caller holds tomoyo_read_lock(),
 * it is safe to set matching entry.
 */
static bool tomoyo_check_path_acl(struct tomoyo_request_info *r,
                                  const struct tomoyo_acl_info *ptr)
{
        const struct tomoyo_path_acl *acl = container_of(ptr, typeof(*acl),
                                                         head);

        if (acl->perm & (1 << r->param.path.operation)) {
                r->param.path.matched_path =
                        tomoyo_compare_name_union(r->param.path.filename,
                                                  &acl->name);
                return r->param.path.matched_path != NULL;
        }
        return false;
}

/**
 * tomoyo_check_path_number_acl - Check permission for path number operation.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @ptr: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if granted, false otherwise.
 */
static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r,
                                         const struct tomoyo_acl_info *ptr)
{
        const struct tomoyo_path_number_acl *acl =
                container_of(ptr, typeof(*acl), head);

        return (acl->perm & (1 << r->param.path_number.operation)) &&
                tomoyo_compare_number_union(r->param.path_number.number,
                                            &acl->number) &&
                tomoyo_compare_name_union(r->param.path_number.filename,
                                          &acl->name);
}

/**
 * tomoyo_check_path2_acl - Check permission for path path operation.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @ptr: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if granted, false otherwise.
 */
static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r,
                                   const struct tomoyo_acl_info *ptr)
{
        const struct tomoyo_path2_acl *acl =
                container_of(ptr, typeof(*acl), head);

        return (acl->perm & (1 << r->param.path2.operation)) &&
                tomoyo_compare_name_union(r->param.path2.filename1, &acl->name1)
                && tomoyo_compare_name_union(r->param.path2.filename2,
                                             &acl->name2);
}

/**
 * tomoyo_check_mkdev_acl - Check permission for path number number number operation.
 *
 * @r:   Pointer to "struct tomoyo_request_info".
 * @ptr: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if granted, false otherwise.
 */
static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r,
                                   const struct tomoyo_acl_info *ptr)
{
        const struct tomoyo_mkdev_acl *acl =
                container_of(ptr, typeof(*acl), head);

        return (acl->perm & (1 << r->param.mkdev.operation)) &&
                tomoyo_compare_number_union(r->param.mkdev.mode,
                                            &acl->mode) &&
                tomoyo_compare_number_union(r->param.mkdev.major,
                                            &acl->major) &&
                tomoyo_compare_number_union(r->param.mkdev.minor,
                                            &acl->minor) &&
                tomoyo_compare_name_union(r->param.mkdev.filename,
                                          &acl->name);
}

/**
 * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b except permission bits, false otherwise.
 */
static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a,
                                 const struct tomoyo_acl_info *b)
{
        const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head);

        return tomoyo_same_name_union(&p1->name, &p2->name);
}

/**
 * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry.
 *
 * @a:         Pointer to "struct tomoyo_acl_info".
 * @b:         Pointer to "struct tomoyo_acl_info".
 * @is_delete: True for @a &= ~@b, false for @a |= @b.
 *
 * Returns true if @a is empty, false otherwise.
 */
static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
                                  struct tomoyo_acl_info *b,
                                  const bool is_delete)
{
        u16 * const a_perm = &container_of(a, struct tomoyo_path_acl, head)
                ->perm;
        u16 perm = READ_ONCE(*a_perm);
        const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm;

        if (is_delete)
                perm &= ~b_perm;
        else
                perm |= b_perm;
        WRITE_ONCE(*a_perm, perm);
        return !perm;
}

/**
 * tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list.
 *
 * @perm:  Permission.
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_update_path_acl(const u16 perm,
                                  struct tomoyo_acl_param *param)
{
        struct tomoyo_path_acl e = {
                .head.type = TOMOYO_TYPE_PATH_ACL,
                .perm = perm
        };
        int error;

        if (!tomoyo_parse_name_union(param, &e.name))
                error = -EINVAL;
        else
                error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                             tomoyo_same_path_acl,
                                             tomoyo_merge_path_acl);
        tomoyo_put_name_union(&e.name);
        return error;
}

/**
 * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b except permission bits, false otherwise.
 */
static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a,
                                         const struct tomoyo_acl_info *b)
{
        const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head);

        return tomoyo_same_name_union(&p1->name, &p2->name) &&
                tomoyo_same_number_union(&p1->mode, &p2->mode) &&
                tomoyo_same_number_union(&p1->major, &p2->major) &&
                tomoyo_same_number_union(&p1->minor, &p2->minor);
}

/**
 * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry.
 *
 * @a:         Pointer to "struct tomoyo_acl_info".
 * @b:         Pointer to "struct tomoyo_acl_info".
 * @is_delete: True for @a &= ~@b, false for @a |= @b.
 *
 * Returns true if @a is empty, false otherwise.
 */
static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a,
                                   struct tomoyo_acl_info *b,
                                   const bool is_delete)
{
        u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl,
                                         head)->perm;
        u8 perm = READ_ONCE(*a_perm);
        const u8 b_perm = container_of(b, struct tomoyo_mkdev_acl, head)
                ->perm;

        if (is_delete)
                perm &= ~b_perm;
        else
                perm |= b_perm;
        WRITE_ONCE(*a_perm, perm);
        return !perm;
}

/**
 * tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list.
 *
 * @perm:  Permission.
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_update_mkdev_acl(const u8 perm,
                                   struct tomoyo_acl_param *param)
{
        struct tomoyo_mkdev_acl e = {
                .head.type = TOMOYO_TYPE_MKDEV_ACL,
                .perm = perm
        };
        int error;

        if (!tomoyo_parse_name_union(param, &e.name) ||
            !tomoyo_parse_number_union(param, &e.mode) ||
            !tomoyo_parse_number_union(param, &e.major) ||
            !tomoyo_parse_number_union(param, &e.minor))
                error = -EINVAL;
        else
                error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                             tomoyo_same_mkdev_acl,
                                             tomoyo_merge_mkdev_acl);
        tomoyo_put_name_union(&e.name);
        tomoyo_put_number_union(&e.mode);
        tomoyo_put_number_union(&e.major);
        tomoyo_put_number_union(&e.minor);
        return error;
}

/**
 * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b except permission bits, false otherwise.
 */
static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a,
                                  const struct tomoyo_acl_info *b)
{
        const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head);

        return tomoyo_same_name_union(&p1->name1, &p2->name1) &&
                tomoyo_same_name_union(&p1->name2, &p2->name2);
}

/**
 * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry.
 *
 * @a:         Pointer to "struct tomoyo_acl_info".
 * @b:         Pointer to "struct tomoyo_acl_info".
 * @is_delete: True for @a &= ~@b, false for @a |= @b.
 *
 * Returns true if @a is empty, false otherwise.
 */
static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a,
                                   struct tomoyo_acl_info *b,
                                   const bool is_delete)
{
        u8 * const a_perm = &container_of(a, struct tomoyo_path2_acl, head)
                ->perm;
        u8 perm = READ_ONCE(*a_perm);
        const u8 b_perm = container_of(b, struct tomoyo_path2_acl, head)->perm;

        if (is_delete)
                perm &= ~b_perm;
        else
                perm |= b_perm;
        WRITE_ONCE(*a_perm, perm);
        return !perm;
}

/**
 * tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list.
 *
 * @perm:  Permission.
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_update_path2_acl(const u8 perm,
                                   struct tomoyo_acl_param *param)
{
        struct tomoyo_path2_acl e = {
                .head.type = TOMOYO_TYPE_PATH2_ACL,
                .perm = perm
        };
        int error;

        if (!tomoyo_parse_name_union(param, &e.name1) ||
            !tomoyo_parse_name_union(param, &e.name2))
                error = -EINVAL;
        else
                error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                             tomoyo_same_path2_acl,
                                             tomoyo_merge_path2_acl);
        tomoyo_put_name_union(&e.name1);
        tomoyo_put_name_union(&e.name2);
        return error;
}

/**
 * tomoyo_path_permission - Check permission for single path operation.
 *
 * @r:         Pointer to "struct tomoyo_request_info".
 * @operation: Type of operation.
 * @filename:  Filename to check.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
                                  const struct tomoyo_path_info *filename)
{
        int error;

        r->type = tomoyo_p2mac[operation];
        r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type);
        if (r->mode == TOMOYO_CONFIG_DISABLED)
                return 0;
        r->param_type = TOMOYO_TYPE_PATH_ACL;
        r->param.path.filename = filename;
        r->param.path.operation = operation;
        do {
                tomoyo_check_acl(r, tomoyo_check_path_acl);
                error = tomoyo_audit_path_log(r);
        } while (error == TOMOYO_RETRY_REQUEST);
        return error;
}

/**
 * tomoyo_execute_permission - Check permission for execute operation.
 *
 * @r:         Pointer to "struct tomoyo_request_info".
 * @filename:  Filename to check.
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_execute_permission(struct tomoyo_request_info *r,
                              const struct tomoyo_path_info *filename)
{
        /*
         * Unlike other permission checks, this check is done regardless of
         * profile mode settings in order to check for domain transition
         * preference.
         */
        r->type = TOMOYO_MAC_FILE_EXECUTE;
        r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type);
        r->param_type = TOMOYO_TYPE_PATH_ACL;
        r->param.path.filename = filename;
        r->param.path.operation = TOMOYO_TYPE_EXECUTE;
        tomoyo_check_acl(r, tomoyo_check_path_acl);
        r->ee->transition = r->matched_acl && r->matched_acl->cond ?
                r->matched_acl->cond->transit : NULL;
        if (r->mode != TOMOYO_CONFIG_DISABLED)
                return tomoyo_audit_path_log(r);
        return 0;
}

/**
 * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b except permission bits, false otherwise.
 */
static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a,
                                        const struct tomoyo_acl_info *b)
{
        const struct tomoyo_path_number_acl *p1 = container_of(a, typeof(*p1),
                                                               head);
        const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2),
                                                               head);

        return tomoyo_same_name_union(&p1->name, &p2->name) &&
                tomoyo_same_number_union(&p1->number, &p2->number);
}

/**
 * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry.
 *
 * @a:         Pointer to "struct tomoyo_acl_info".
 * @b:         Pointer to "struct tomoyo_acl_info".
 * @is_delete: True for @a &= ~@b, false for @a |= @b.
 *
 * Returns true if @a is empty, false otherwise.
 */
static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a,
                                         struct tomoyo_acl_info *b,
                                         const bool is_delete)
{
        u8 * const a_perm = &container_of(a, struct tomoyo_path_number_acl,
                                          head)->perm;
        u8 perm = READ_ONCE(*a_perm);
        const u8 b_perm = container_of(b, struct tomoyo_path_number_acl, head)
                ->perm;

        if (is_delete)
                perm &= ~b_perm;
        else
                perm |= b_perm;
        WRITE_ONCE(*a_perm, perm);
        return !perm;
}

/**
 * tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL.
 *
 * @perm:  Permission.
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 */
static int tomoyo_update_path_number_acl(const u8 perm,
                                         struct tomoyo_acl_param *param)
{
        struct tomoyo_path_number_acl e = {
                .head.type = TOMOYO_TYPE_PATH_NUMBER_ACL,
                .perm = perm
        };
        int error;

        if (!tomoyo_parse_name_union(param, &e.name) ||
            !tomoyo_parse_number_union(param, &e.number))
                error = -EINVAL;
        else
                error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                             tomoyo_same_path_number_acl,
                                             tomoyo_merge_path_number_acl);
        tomoyo_put_name_union(&e.name);
        tomoyo_put_number_union(&e.number);
        return error;
}

/**
 * tomoyo_path_number_perm - Check permission for "create", "mkdir", "mkfifo", "mksock", "ioctl", "chmod", "chown", "chgrp".
 *
 * @type:   Type of operation.
 * @path:   Pointer to "struct path".
 * @number: Number.
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_path_number_perm(const u8 type, const struct path *path,
                            unsigned long number)
{
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
                .path1 = { .mnt = path->mnt, .dentry = path->dentry },
        };
        int error = -ENOMEM;
        struct tomoyo_path_info buf;
        int idx;

        if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        idx = tomoyo_read_lock();
        if (!tomoyo_get_realpath(&buf, path))
                goto out;
        r.obj = &obj;
        if (type == TOMOYO_TYPE_MKDIR)
                tomoyo_add_slash(&buf);
        r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL;
        r.param.path_number.operation = type;
        r.param.path_number.filename = &buf;
        r.param.path_number.number = number;
        do {
                tomoyo_check_acl(&r, tomoyo_check_path_number_acl);
                error = tomoyo_audit_path_number_log(&r);
        } while (error == TOMOYO_RETRY_REQUEST);
        kfree(buf.name);
 out:
        tomoyo_read_unlock(idx);
        if (r.mode != TOMOYO_CONFIG_ENFORCING)
                error = 0;
        return error;
}

/**
 * tomoyo_check_open_permission - Check permission for "read" and "write".
 *
 * @domain: Pointer to "struct tomoyo_domain_info".
 * @path:   Pointer to "struct path".
 * @flag:   Flags for open().
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
                                 const struct path *path, const int flag)
{
        const u8 acc_mode = ACC_MODE(flag);
        int error = 0;
        struct tomoyo_path_info buf;
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
                .path1 = { .mnt = path->mnt, .dentry = path->dentry },
        };
        int idx;

        buf.name = NULL;
        r.mode = TOMOYO_CONFIG_DISABLED;
        idx = tomoyo_read_lock();
        if (acc_mode &&
            tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
            != TOMOYO_CONFIG_DISABLED) {
                if (!tomoyo_get_realpath(&buf, path)) {
                        error = -ENOMEM;
                        goto out;
                }
                r.obj = &obj;
                if (acc_mode & MAY_READ)
                        error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ,
                                                       &buf);
                if (!error && (acc_mode & MAY_WRITE))
                        error = tomoyo_path_permission(&r, (flag & O_APPEND) ?
                                                       TOMOYO_TYPE_APPEND :
                                                       TOMOYO_TYPE_WRITE,
                                                       &buf);
        }
 out:
        kfree(buf.name);
        tomoyo_read_unlock(idx);
        if (r.mode != TOMOYO_CONFIG_ENFORCING)
                error = 0;
        return error;
}

/**
 * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount".
 *
 * @operation: Type of operation.
 * @path:      Pointer to "struct path".
 * @target:    Symlink's target if @operation is TOMOYO_TYPE_SYMLINK,
 *             NULL otherwise.
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target)
{
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
                .path1 = { .mnt = path->mnt, .dentry = path->dentry },
        };
        int error;
        struct tomoyo_path_info buf;
        bool is_enforce;
        struct tomoyo_path_info symlink_target;
        int idx;

        if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING);
        error = -ENOMEM;
        buf.name = NULL;
        idx = tomoyo_read_lock();
        if (!tomoyo_get_realpath(&buf, path))
                goto out;
        r.obj = &obj;
        switch (operation) {
        case TOMOYO_TYPE_RMDIR:
        case TOMOYO_TYPE_CHROOT:
                tomoyo_add_slash(&buf);
                break;
        case TOMOYO_TYPE_SYMLINK:
                symlink_target.name = tomoyo_encode(target);
                if (!symlink_target.name)
                        goto out;
                tomoyo_fill_path_info(&symlink_target);
                obj.symlink_target = &symlink_target;
                break;
        }
        error = tomoyo_path_permission(&r, operation, &buf);
        if (operation == TOMOYO_TYPE_SYMLINK)
                kfree(symlink_target.name);
 out:
        kfree(buf.name);
        tomoyo_read_unlock(idx);
        if (!is_enforce)
                error = 0;
        return error;
}

/**
 * tomoyo_mkdev_perm - Check permission for "mkblock" and "mkchar".
 *
 * @operation: Type of operation. (TOMOYO_TYPE_MKCHAR or TOMOYO_TYPE_MKBLOCK)
 * @path:      Pointer to "struct path".
 * @mode:      Create mode.
 * @dev:       Device number.
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_mkdev_perm(const u8 operation, const struct path *path,
                      const unsigned int mode, unsigned int dev)
{
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
                .path1 = { .mnt = path->mnt, .dentry = path->dentry },
        };
        int error = -ENOMEM;
        struct tomoyo_path_info buf;
        int idx;

        if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        idx = tomoyo_read_lock();
        error = -ENOMEM;
        if (tomoyo_get_realpath(&buf, path)) {
                r.obj = &obj;
                dev = new_decode_dev(dev);
                r.param_type = TOMOYO_TYPE_MKDEV_ACL;
                r.param.mkdev.filename = &buf;
                r.param.mkdev.operation = operation;
                r.param.mkdev.mode = mode;
                r.param.mkdev.major = MAJOR(dev);
                r.param.mkdev.minor = MINOR(dev);
                tomoyo_check_acl(&r, tomoyo_check_mkdev_acl);
                error = tomoyo_audit_mkdev_log(&r);
                kfree(buf.name);
        }
        tomoyo_read_unlock(idx);
        if (r.mode != TOMOYO_CONFIG_ENFORCING)
                error = 0;
        return error;
}

/**
 * tomoyo_path2_perm - Check permission for "rename", "link" and "pivot_root".
 *
 * @operation: Type of operation.
 * @path1:      Pointer to "struct path".
 * @path2:      Pointer to "struct path".
 *
 * Returns 0 on success, negative value otherwise.
 */
int tomoyo_path2_perm(const u8 operation, const struct path *path1,
                      const struct path *path2)
{
        int error = -ENOMEM;
        struct tomoyo_path_info buf1;
        struct tomoyo_path_info buf2;
        struct tomoyo_request_info r;
        struct tomoyo_obj_info obj = {
                .path1 = { .mnt = path1->mnt, .dentry = path1->dentry },
                .path2 = { .mnt = path2->mnt, .dentry = path2->dentry }
        };
        int idx;

        if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
            == TOMOYO_CONFIG_DISABLED)
                return 0;
        buf1.name = NULL;
        buf2.name = NULL;
        idx = tomoyo_read_lock();
        if (!tomoyo_get_realpath(&buf1, path1) ||
            !tomoyo_get_realpath(&buf2, path2))
                goto out;
        switch (operation) {
        case TOMOYO_TYPE_RENAME:
        case TOMOYO_TYPE_LINK:
                if (!d_is_dir(path1->dentry))
                        break;
                fallthrough;
        case TOMOYO_TYPE_PIVOT_ROOT:
                tomoyo_add_slash(&buf1);
                tomoyo_add_slash(&buf2);
                break;
        }
        r.obj = &obj;
        r.param_type = TOMOYO_TYPE_PATH2_ACL;
        r.param.path2.operation = operation;
        r.param.path2.filename1 = &buf1;
        r.param.path2.filename2 = &buf2;
        do {
                tomoyo_check_acl(&r, tomoyo_check_path2_acl);
                error = tomoyo_audit_path2_log(&r);
        } while (error == TOMOYO_RETRY_REQUEST);
 out:
        kfree(buf1.name);
        kfree(buf2.name);
        tomoyo_read_unlock(idx);
        if (r.mode != TOMOYO_CONFIG_ENFORCING)
                error = 0;
        return error;
}

/**
 * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry.
 *
 * @a: Pointer to "struct tomoyo_acl_info".
 * @b: Pointer to "struct tomoyo_acl_info".
 *
 * Returns true if @a == @b, false otherwise.
 */
static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
                                  const struct tomoyo_acl_info *b)
{
        const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
        const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);

        return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
                tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
                tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
                tomoyo_same_number_union(&p1->flags, &p2->flags);
}

/**
 * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param)
{
        struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
        int error;

        if (!tomoyo_parse_name_union(param, &e.dev_name) ||
            !tomoyo_parse_name_union(param, &e.dir_name) ||
            !tomoyo_parse_name_union(param, &e.fs_type) ||
            !tomoyo_parse_number_union(param, &e.flags))
                error = -EINVAL;
        else
                error = tomoyo_update_domain(&e.head, sizeof(e), param,
                                             tomoyo_same_mount_acl, NULL);
        tomoyo_put_name_union(&e.dev_name);
        tomoyo_put_name_union(&e.dir_name);
        tomoyo_put_name_union(&e.fs_type);
        tomoyo_put_number_union(&e.flags);
        return error;
}

/**
 * tomoyo_write_file - Update file related list.
 *
 * @param: Pointer to "struct tomoyo_acl_param".
 *
 * Returns 0 on success, negative value otherwise.
 *
 * Caller holds tomoyo_read_lock().
 */
int tomoyo_write_file(struct tomoyo_acl_param *param)
{
        u16 perm = 0;
        u8 type;
        const char *operation = tomoyo_read_token(param);

        for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++)
                if (tomoyo_permstr(operation, tomoyo_path_keyword[type]))
                        perm |= 1 << type;
        if (perm)
                return tomoyo_update_path_acl(perm, param);
        for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++)
                if (tomoyo_permstr(operation,
                                   tomoyo_mac_keywords[tomoyo_pp2mac[type]]))
                        perm |= 1 << type;
        if (perm)
                return tomoyo_update_path2_acl(perm, param);
        for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++)
                if (tomoyo_permstr(operation,
                                   tomoyo_mac_keywords[tomoyo_pn2mac[type]]))
                        perm |= 1 << type;
        if (perm)
                return tomoyo_update_path_number_acl(perm, param);
        for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++)
                if (tomoyo_permstr(operation,
                                   tomoyo_mac_keywords[tomoyo_pnnn2mac[type]]))
                        perm |= 1 << type;
        if (perm)
                return tomoyo_update_mkdev_acl(perm, param);
        if (tomoyo_permstr(operation,
                           tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT]))
                return tomoyo_update_mount_acl(param);
        return -EINVAL;
}





























































































































































































































































































































































   52 





   51 
   50 










































































































































































































































































































   48 

   50 



   33 











  127 
































































































































































   84 
   85 
   83 
   84 
   84 

   84 
   84 


    8 





























































































   82 
   82 


   83 

































































































































   52 



   50 
   33 


   33 








   33 






  128 


  127 





































































































































































































































































  223 


  223 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/sysfs.c
 *
 * (C) Copyright 2002 David Brownell
 * (C) Copyright 2002,2004 Greg Kroah-Hartman
 * (C) Copyright 2002,2004 IBM Corp.
 *
 * All of the sysfs file attributes for usb devices and interfaces.
 *
 * Released under the GPLv2 only.
 */


#include <linux/kernel.h>
#include <linux/kstrtox.h>
#include <linux/string.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <linux/usb/quirks.h>
#include <linux/of.h>
#include "usb.h"

/* Active configuration fields */
#define usb_actconfig_show(field, format_string)                        \
static ssize_t field##_show(struct device *dev,                                \
                            struct device_attribute *attr, char *buf)        \
{                                                                        \
        struct usb_device *udev;                                        \
        struct usb_host_config *actconfig;                                \
        ssize_t rc;                                                        \
                                                                        \
        udev = to_usb_device(dev);                                        \
        rc = usb_lock_device_interruptible(udev);                        \
        if (rc < 0)                                                        \
                return -EINTR;                                                \
        actconfig = udev->actconfig;                                        \
        if (actconfig)                                                        \
                rc = sysfs_emit(buf, format_string,                        \
                                actconfig->desc.field);                        \
        usb_unlock_device(udev);                                        \
        return rc;                                                        \
}                                                                        \

#define usb_actconfig_attr(field, format_string)                \
        usb_actconfig_show(field, format_string)                \
        static DEVICE_ATTR_RO(field)

usb_actconfig_attr(bNumInterfaces, "%2d\n");
usb_actconfig_attr(bmAttributes, "%2x\n");

static ssize_t bMaxPower_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct usb_device *udev;
        struct usb_host_config *actconfig;
        ssize_t rc;

        udev = to_usb_device(dev);
        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;
        actconfig = udev->actconfig;
        if (actconfig)
                rc = sysfs_emit(buf, "%dmA\n", usb_get_max_power(udev, actconfig));
        usb_unlock_device(udev);
        return rc;
}
static DEVICE_ATTR_RO(bMaxPower);

static ssize_t configuration_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct usb_device *udev;
        struct usb_host_config *actconfig;
        ssize_t rc;

        udev = to_usb_device(dev);
        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;
        actconfig = udev->actconfig;
        if (actconfig && actconfig->string)
                rc = sysfs_emit(buf, "%s\n", actconfig->string);
        usb_unlock_device(udev);
        return rc;
}
static DEVICE_ATTR_RO(configuration);

/* configuration value is always present, and r/w */
usb_actconfig_show(bConfigurationValue, "%u\n");

static ssize_t bConfigurationValue_store(struct device *dev,
                                         struct device_attribute *attr,
                                         const char *buf, size_t count)
{
        struct usb_device        *udev = to_usb_device(dev);
        int                        config, value, rc;

        if (sscanf(buf, "%d", &config) != 1 || config < -1 || config > 255)
                return -EINVAL;
        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;
        value = usb_set_configuration(udev, config);
        usb_unlock_device(udev);
        return (value < 0) ? value : count;
}
static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
                bConfigurationValue_show, bConfigurationValue_store);

#ifdef CONFIG_OF
static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct device_node *of_node = dev->of_node;

        return sysfs_emit(buf, "%pOF\n", of_node);
}
static DEVICE_ATTR_RO(devspec);
#endif

/* String fields */
#define usb_string_attr(name)                                                \
static ssize_t  name##_show(struct device *dev,                                \
                struct device_attribute *attr, char *buf)                \
{                                                                        \
        struct usb_device *udev;                                        \
        int retval;                                                        \
                                                                        \
        udev = to_usb_device(dev);                                        \
        retval = usb_lock_device_interruptible(udev);                        \
        if (retval < 0)                                                        \
                return -EINTR;                                                \
        retval = sysfs_emit(buf, "%s\n", udev->name);                        \
        usb_unlock_device(udev);                                        \
        return retval;                                                        \
}                                                                        \
static DEVICE_ATTR_RO(name)

usb_string_attr(product);
usb_string_attr(manufacturer);
usb_string_attr(serial);

static ssize_t speed_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_device *udev;
        char *speed;

        udev = to_usb_device(dev);

        switch (udev->speed) {
        case USB_SPEED_LOW:
                speed = "1.5";
                break;
        case USB_SPEED_UNKNOWN:
        case USB_SPEED_FULL:
                speed = "12";
                break;
        case USB_SPEED_HIGH:
                speed = "480";
                break;
        case USB_SPEED_SUPER:
                speed = "5000";
                break;
        case USB_SPEED_SUPER_PLUS:
                if (udev->ssp_rate == USB_SSP_GEN_2x2)
                        speed = "20000";
                else
                        speed = "10000";
                break;
        default:
                speed = "unknown";
        }
        return sysfs_emit(buf, "%s\n", speed);
}
static DEVICE_ATTR_RO(speed);

static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->rx_lanes);
}
static DEVICE_ATTR_RO(rx_lanes);

static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->tx_lanes);
}
static DEVICE_ATTR_RO(tx_lanes);

static ssize_t busnum_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->bus->busnum);
}
static DEVICE_ATTR_RO(busnum);

static ssize_t devnum_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->devnum);
}
static DEVICE_ATTR_RO(devnum);

static ssize_t devpath_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%s\n", udev->devpath);
}
static DEVICE_ATTR_RO(devpath);

static ssize_t version_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct usb_device *udev;
        u16 bcdUSB;

        udev = to_usb_device(dev);
        bcdUSB = le16_to_cpu(udev->descriptor.bcdUSB);
        return sysfs_emit(buf, "%2x.%02x\n", bcdUSB >> 8, bcdUSB & 0xff);
}
static DEVICE_ATTR_RO(version);

static ssize_t maxchild_show(struct device *dev, struct device_attribute *attr,
                             char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->maxchild);
}
static DEVICE_ATTR_RO(maxchild);

static ssize_t quirks_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "0x%x\n", udev->quirks);
}
static DEVICE_ATTR_RO(quirks);

static ssize_t avoid_reset_quirk_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", !!(udev->quirks & USB_QUIRK_RESET));
}

static ssize_t avoid_reset_quirk_store(struct device *dev,
                                      struct device_attribute *attr,
                                      const char *buf, size_t count)
{
        struct usb_device        *udev = to_usb_device(dev);
        bool                        val;
        int                        rc;

        if (kstrtobool(buf, &val) != 0)
                return -EINVAL;
        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;
        if (val)
                udev->quirks |= USB_QUIRK_RESET;
        else
                udev->quirks &= ~USB_QUIRK_RESET;
        usb_unlock_device(udev);
        return count;
}
static DEVICE_ATTR_RW(avoid_reset_quirk);

static ssize_t urbnum_show(struct device *dev, struct device_attribute *attr,
                           char *buf)
{
        struct usb_device *udev;

        udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", atomic_read(&udev->urbnum));
}
static DEVICE_ATTR_RO(urbnum);

static ssize_t ltm_capable_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        if (usb_device_supports_ltm(to_usb_device(dev)))
                return sysfs_emit(buf, "%s\n", "yes");
        return sysfs_emit(buf, "%s\n", "no");
}
static DEVICE_ATTR_RO(ltm_capable);

#ifdef        CONFIG_PM

static ssize_t persist_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
{
        struct usb_device *udev = to_usb_device(dev);

        return sysfs_emit(buf, "%d\n", udev->persist_enabled);
}

static ssize_t persist_store(struct device *dev, struct device_attribute *attr,
                             const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        bool value;
        int rc;

        /* Hubs are always enabled for USB_PERSIST */
        if (udev->descriptor.bDeviceClass == USB_CLASS_HUB)
                return -EPERM;

        if (kstrtobool(buf, &value) != 0)
                return -EINVAL;

        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;
        udev->persist_enabled = !!value;
        usb_unlock_device(udev);
        return count;
}
static DEVICE_ATTR_RW(persist);

static int add_persist_attributes(struct device *dev)
{
        int rc = 0;

        if (is_usb_device(dev)) {
                struct usb_device *udev = to_usb_device(dev);

                /* Hubs are automatically enabled for USB_PERSIST,
                 * no point in creating the attribute file.
                 */
                if (udev->descriptor.bDeviceClass != USB_CLASS_HUB)
                        rc = sysfs_add_file_to_group(&dev->kobj,
                                        &dev_attr_persist.attr,
                                        power_group_name);
        }
        return rc;
}

static void remove_persist_attributes(struct device *dev)
{
        sysfs_remove_file_from_group(&dev->kobj,
                        &dev_attr_persist.attr,
                        power_group_name);
}

static ssize_t connected_duration_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);

        return sysfs_emit(buf, "%u\n",
                        jiffies_to_msecs(jiffies - udev->connect_time));
}
static DEVICE_ATTR_RO(connected_duration);

/*
 * If the device is resumed, the last time the device was suspended has
 * been pre-subtracted from active_duration.  We add the current time to
 * get the duration that the device was actually active.
 *
 * If the device is suspended, the active_duration is up-to-date.
 */
static ssize_t active_duration_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        int duration;

        if (udev->state != USB_STATE_SUSPENDED)
                duration = jiffies_to_msecs(jiffies + udev->active_duration);
        else
                duration = jiffies_to_msecs(udev->active_duration);
        return sysfs_emit(buf, "%u\n", duration);
}
static DEVICE_ATTR_RO(active_duration);

static ssize_t autosuspend_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
{
        return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay / 1000);
}

static ssize_t autosuspend_store(struct device *dev,
                                 struct device_attribute *attr, const char *buf,
                                 size_t count)
{
        int value;

        if (sscanf(buf, "%d", &value) != 1 || value >= INT_MAX/1000 ||
                        value <= -INT_MAX/1000)
                return -EINVAL;

        pm_runtime_set_autosuspend_delay(dev, value * 1000);
        return count;
}
static DEVICE_ATTR_RW(autosuspend);

static const char on_string[] = "on";
static const char auto_string[] = "auto";

static void warn_level(void)
{
        static int level_warned;

        if (!level_warned) {
                level_warned = 1;
                printk(KERN_WARNING "WARNING! power/level is deprecated; "
                                "use power/control instead\n");
        }
}

static ssize_t level_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        const char *p = auto_string;

        warn_level();
        if (udev->state != USB_STATE_SUSPENDED && !udev->dev.power.runtime_auto)
                p = on_string;
        return sysfs_emit(buf, "%s\n", p);
}

static ssize_t level_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        int len = count;
        char *cp;
        int rc = count;
        int rv;

        warn_level();
        cp = memchr(buf, '\n', count);
        if (cp)
                len = cp - buf;

        rv = usb_lock_device_interruptible(udev);
        if (rv < 0)
                return -EINTR;

        if (len == sizeof on_string - 1 &&
                        strncmp(buf, on_string, len) == 0)
                usb_disable_autosuspend(udev);

        else if (len == sizeof auto_string - 1 &&
                        strncmp(buf, auto_string, len) == 0)
                usb_enable_autosuspend(udev);

        else
                rc = -EINVAL;

        usb_unlock_device(udev);
        return rc;
}
static DEVICE_ATTR_RW(level);

static ssize_t usb2_hardware_lpm_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        const char *p;

        if (udev->usb2_hw_lpm_allowed == 1)
                p = "enabled";
        else
                p = "disabled";

        return sysfs_emit(buf, "%s\n", p);
}

static ssize_t usb2_hardware_lpm_store(struct device *dev,
                                       struct device_attribute *attr,
                                       const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        bool value;
        int ret;

        ret = usb_lock_device_interruptible(udev);
        if (ret < 0)
                return -EINTR;

        ret = kstrtobool(buf, &value);

        if (!ret) {
                udev->usb2_hw_lpm_allowed = value;
                if (value)
                        ret = usb_enable_usb2_hardware_lpm(udev);
                else
                        ret = usb_disable_usb2_hardware_lpm(udev);
        }

        usb_unlock_device(udev);

        if (!ret)
                return count;

        return ret;
}
static DEVICE_ATTR_RW(usb2_hardware_lpm);

static ssize_t usb2_lpm_l1_timeout_show(struct device *dev,
                                        struct device_attribute *attr,
                                        char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->l1_params.timeout);
}

static ssize_t usb2_lpm_l1_timeout_store(struct device *dev,
                                         struct device_attribute *attr,
                                         const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        u16 timeout;

        if (kstrtou16(buf, 0, &timeout))
                return -EINVAL;

        udev->l1_params.timeout = timeout;

        return count;
}
static DEVICE_ATTR_RW(usb2_lpm_l1_timeout);

static ssize_t usb2_lpm_besl_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        return sysfs_emit(buf, "%d\n", udev->l1_params.besl);
}

static ssize_t usb2_lpm_besl_store(struct device *dev,
                                   struct device_attribute *attr,
                                   const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        u8 besl;

        if (kstrtou8(buf, 0, &besl) || besl > 15)
                return -EINVAL;

        udev->l1_params.besl = besl;

        return count;
}
static DEVICE_ATTR_RW(usb2_lpm_besl);

static ssize_t usb3_hardware_lpm_u1_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        const char *p;
        int rc;

        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;

        if (udev->usb3_lpm_u1_enabled)
                p = "enabled";
        else
                p = "disabled";

        usb_unlock_device(udev);

        return sysfs_emit(buf, "%s\n", p);
}
static DEVICE_ATTR_RO(usb3_hardware_lpm_u1);

static ssize_t usb3_hardware_lpm_u2_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
{
        struct usb_device *udev = to_usb_device(dev);
        const char *p;
        int rc;

        rc = usb_lock_device_interruptible(udev);
        if (rc < 0)
                return -EINTR;

        if (udev->usb3_lpm_u2_enabled)
                p = "enabled";
        else
                p = "disabled";

        usb_unlock_device(udev);

        return sysfs_emit(buf, "%s\n", p);
}
static DEVICE_ATTR_RO(usb3_hardware_lpm_u2);

static struct attribute *usb2_hardware_lpm_attr[] = {
        &dev_attr_usb2_hardware_lpm.attr,
        &dev_attr_usb2_lpm_l1_timeout.attr,
        &dev_attr_usb2_lpm_besl.attr,
        NULL,
};
static const struct attribute_group usb2_hardware_lpm_attr_group = {
        .name        = power_group_name,
        .attrs        = usb2_hardware_lpm_attr,
};

static struct attribute *usb3_hardware_lpm_attr[] = {
        &dev_attr_usb3_hardware_lpm_u1.attr,
        &dev_attr_usb3_hardware_lpm_u2.attr,
        NULL,
};
static const struct attribute_group usb3_hardware_lpm_attr_group = {
        .name        = power_group_name,
        .attrs        = usb3_hardware_lpm_attr,
};

static struct attribute *power_attrs[] = {
        &dev_attr_autosuspend.attr,
        &dev_attr_level.attr,
        &dev_attr_connected_duration.attr,
        &dev_attr_active_duration.attr,
        NULL,
};
static const struct attribute_group power_attr_group = {
        .name        = power_group_name,
        .attrs        = power_attrs,
};

static int add_power_attributes(struct device *dev)
{
        int rc = 0;

        if (is_usb_device(dev)) {
                struct usb_device *udev = to_usb_device(dev);
                rc = sysfs_merge_group(&dev->kobj, &power_attr_group);
                if (udev->usb2_hw_lpm_capable == 1)
                        rc = sysfs_merge_group(&dev->kobj,
                                        &usb2_hardware_lpm_attr_group);
                if ((udev->speed == USB_SPEED_SUPER ||
                     udev->speed == USB_SPEED_SUPER_PLUS) &&
                                udev->lpm_capable == 1)
                        rc = sysfs_merge_group(&dev->kobj,
                                        &usb3_hardware_lpm_attr_group);
        }

        return rc;
}

static void remove_power_attributes(struct device *dev)
{
        sysfs_unmerge_group(&dev->kobj, &usb2_hardware_lpm_attr_group);
        sysfs_unmerge_group(&dev->kobj, &power_attr_group);
}

#else

#define add_persist_attributes(dev)        0
#define remove_persist_attributes(dev)        do {} while (0)

#define add_power_attributes(dev)        0
#define remove_power_attributes(dev)        do {} while (0)

#endif        /* CONFIG_PM */


/* Descriptor fields */
#define usb_descriptor_attr_le16(field, format_string)                        \
static ssize_t                                                                \
field##_show(struct device *dev, struct device_attribute *attr,        \
                char *buf)                                                \
{                                                                        \
        struct usb_device *udev;                                        \
                                                                        \
        udev = to_usb_device(dev);                                        \
        return sysfs_emit(buf, format_string,                                \
                        le16_to_cpu(udev->descriptor.field));                \
}                                                                        \
static DEVICE_ATTR_RO(field)

usb_descriptor_attr_le16(idVendor, "%04x\n");
usb_descriptor_attr_le16(idProduct, "%04x\n");
usb_descriptor_attr_le16(bcdDevice, "%04x\n");

#define usb_descriptor_attr(field, format_string)                        \
static ssize_t                                                                \
field##_show(struct device *dev, struct device_attribute *attr,        \
                char *buf)                                                \
{                                                                        \
        struct usb_device *udev;                                        \
                                                                        \
        udev = to_usb_device(dev);                                        \
        return sysfs_emit(buf, format_string, udev->descriptor.field);        \
}                                                                        \
static DEVICE_ATTR_RO(field)

usb_descriptor_attr(bDeviceClass, "%02x\n");
usb_descriptor_attr(bDeviceSubClass, "%02x\n");
usb_descriptor_attr(bDeviceProtocol, "%02x\n");
usb_descriptor_attr(bNumConfigurations, "%d\n");
usb_descriptor_attr(bMaxPacketSize0, "%d\n");


/* show if the device is authorized (1) or not (0) */
static ssize_t authorized_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
{
        struct usb_device *usb_dev = to_usb_device(dev);
        return sysfs_emit(buf, "%u\n", usb_dev->authorized);
}

/*
 * Authorize a device to be used in the system
 *
 * Writing a 0 deauthorizes the device, writing a 1 authorizes it.
 */
static ssize_t authorized_store(struct device *dev,
                                struct device_attribute *attr, const char *buf,
                                size_t size)
{
        ssize_t result;
        struct usb_device *usb_dev = to_usb_device(dev);
        bool val;

        if (kstrtobool(buf, &val) != 0)
                result = -EINVAL;
        else if (val)
                result = usb_authorize_device(usb_dev);
        else
                result = usb_deauthorize_device(usb_dev);
        return result < 0 ? result : size;
}
static DEVICE_ATTR_IGNORE_LOCKDEP(authorized, S_IRUGO | S_IWUSR,
                                  authorized_show, authorized_store);

/* "Safely remove a device" */
static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
                            const char *buf, size_t count)
{
        struct usb_device *udev = to_usb_device(dev);
        int rc = 0;

        usb_lock_device(udev);
        if (udev->state != USB_STATE_NOTATTACHED) {

                /* To avoid races, first unconfigure and then remove */
                usb_set_configuration(udev, -1);
                rc = usb_remove_device(udev);
        }
        if (rc == 0)
                rc = count;
        usb_unlock_device(udev);
        return rc;
}
static DEVICE_ATTR_IGNORE_LOCKDEP(remove, S_IWUSR, NULL, remove_store);


static struct attribute *dev_attrs[] = {
        /* current configuration's attributes */
        &dev_attr_configuration.attr,
        &dev_attr_bNumInterfaces.attr,
        &dev_attr_bConfigurationValue.attr,
        &dev_attr_bmAttributes.attr,
        &dev_attr_bMaxPower.attr,
        /* device attributes */
        &dev_attr_urbnum.attr,
        &dev_attr_idVendor.attr,
        &dev_attr_idProduct.attr,
        &dev_attr_bcdDevice.attr,
        &dev_attr_bDeviceClass.attr,
        &dev_attr_bDeviceSubClass.attr,
        &dev_attr_bDeviceProtocol.attr,
        &dev_attr_bNumConfigurations.attr,
        &dev_attr_bMaxPacketSize0.attr,
        &dev_attr_speed.attr,
        &dev_attr_rx_lanes.attr,
        &dev_attr_tx_lanes.attr,
        &dev_attr_busnum.attr,
        &dev_attr_devnum.attr,
        &dev_attr_devpath.attr,
        &dev_attr_version.attr,
        &dev_attr_maxchild.attr,
        &dev_attr_quirks.attr,
        &dev_attr_avoid_reset_quirk.attr,
        &dev_attr_authorized.attr,
        &dev_attr_remove.attr,
        &dev_attr_ltm_capable.attr,
#ifdef CONFIG_OF
        &dev_attr_devspec.attr,
#endif
        NULL,
};
static const struct attribute_group dev_attr_grp = {
        .attrs = dev_attrs,
};

/* When modifying this list, be sure to modify dev_string_attrs_are_visible()
 * accordingly.
 */
static struct attribute *dev_string_attrs[] = {
        &dev_attr_manufacturer.attr,
        &dev_attr_product.attr,
        &dev_attr_serial.attr,
        NULL
};

static umode_t dev_string_attrs_are_visible(struct kobject *kobj,
                struct attribute *a, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_device *udev = to_usb_device(dev);

        if (a == &dev_attr_manufacturer.attr) {
                if (udev->manufacturer == NULL)
                        return 0;
        } else if (a == &dev_attr_product.attr) {
                if (udev->product == NULL)
                        return 0;
        } else if (a == &dev_attr_serial.attr) {
                if (udev->serial == NULL)
                        return 0;
        }
        return a->mode;
}

static const struct attribute_group dev_string_attr_grp = {
        .attrs =        dev_string_attrs,
        .is_visible =        dev_string_attrs_are_visible,
};

/* Binary descriptors */

static ssize_t
descriptors_read(struct file *filp, struct kobject *kobj,
                struct bin_attribute *attr,
                char *buf, loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_device *udev = to_usb_device(dev);
        size_t nleft = count;
        size_t srclen, n;
        int cfgno;
        void *src;

        /* The binary attribute begins with the device descriptor.
         * Following that are the raw descriptor entries for all the
         * configurations (config plus subsidiary descriptors).
         */
        for (cfgno = -1; cfgno < udev->descriptor.bNumConfigurations &&
                        nleft > 0; ++cfgno) {
                if (cfgno < 0) {
                        src = &udev->descriptor;
                        srclen = sizeof(struct usb_device_descriptor);
                } else {
                        src = udev->rawdescriptors[cfgno];
                        srclen = le16_to_cpu(udev->config[cfgno].desc.
                                        wTotalLength);
                }
                if (off < srclen) {
                        n = min(nleft, srclen - (size_t) off);
                        memcpy(buf, src + off, n);
                        nleft -= n;
                        buf += n;
                        off = 0;
                } else {
                        off -= srclen;
                }
        }
        return count - nleft;
}
static BIN_ATTR_RO(descriptors, 18 + 65535); /* dev descr + max-size raw descriptor */

static ssize_t
bos_descriptors_read(struct file *filp, struct kobject *kobj,
                struct bin_attribute *attr,
                char *buf, loff_t off, size_t count)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_device *udev = to_usb_device(dev);
        struct usb_host_bos *bos = udev->bos;
        struct usb_bos_descriptor *desc;
        size_t desclen, n = 0;

        if (bos) {
                desc = bos->desc;
                desclen = le16_to_cpu(desc->wTotalLength);
                if (off < desclen) {
                        n = min(count, desclen - (size_t) off);
                        memcpy(buf, (void *) desc + off, n);
                }
        }
        return n;
}
static BIN_ATTR_RO(bos_descriptors, 65535); /* max-size BOS */

/* When modifying this list, be sure to modify dev_bin_attrs_are_visible()
 * accordingly.
 */
static struct bin_attribute *dev_bin_attrs[] = {
        &bin_attr_descriptors,
        &bin_attr_bos_descriptors,
        NULL
};

static umode_t dev_bin_attrs_are_visible(struct kobject *kobj,
                struct bin_attribute *a, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_device *udev = to_usb_device(dev);

        /*
         * There's no need to check if the descriptors attribute should
         * be visible because all devices have a device descriptor. The
         * bos_descriptors attribute should be visible if and only if
         * the device has a BOS, so check if it exists here.
         */
        if (a == &bin_attr_bos_descriptors) {
                if (udev->bos == NULL)
                        return 0;
        }
        return a->attr.mode;
}

static const struct attribute_group dev_bin_attr_grp = {
        .bin_attrs =                dev_bin_attrs,
        .is_bin_visible =        dev_bin_attrs_are_visible,
};

const struct attribute_group *usb_device_groups[] = {
        &dev_attr_grp,
        &dev_string_attr_grp,
        &dev_bin_attr_grp,
        NULL
};

/*
 * Show & store the current value of authorized_default
 */
static ssize_t authorized_default_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
{
        struct usb_device *rh_usb_dev = to_usb_device(dev);
        struct usb_bus *usb_bus = rh_usb_dev->bus;
        struct usb_hcd *hcd;

        hcd = bus_to_hcd(usb_bus);
        return sysfs_emit(buf, "%u\n", hcd->dev_policy);
}

static ssize_t authorized_default_store(struct device *dev,
                                        struct device_attribute *attr,
                                        const char *buf, size_t size)
{
        ssize_t result;
        unsigned int val;
        struct usb_device *rh_usb_dev = to_usb_device(dev);
        struct usb_bus *usb_bus = rh_usb_dev->bus;
        struct usb_hcd *hcd;

        hcd = bus_to_hcd(usb_bus);
        result = sscanf(buf, "%u\n", &val);
        if (result == 1) {
                hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ?
                        val : USB_DEVICE_AUTHORIZE_ALL;
                result = size;
        } else {
                result = -EINVAL;
        }
        return result;
}
static DEVICE_ATTR_RW(authorized_default);

/*
 * interface_authorized_default_show - show default authorization status
 * for USB interfaces
 *
 * note: interface_authorized_default is the default value
 *       for initializing the authorized attribute of interfaces
 */
static ssize_t interface_authorized_default_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct usb_device *usb_dev = to_usb_device(dev);
        struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus);

        return sysfs_emit(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd));
}

/*
 * interface_authorized_default_store - store default authorization status
 * for USB interfaces
 *
 * note: interface_authorized_default is the default value
 *       for initializing the authorized attribute of interfaces
 */
static ssize_t interface_authorized_default_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_device *usb_dev = to_usb_device(dev);
        struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus);
        int rc = count;
        bool val;

        if (kstrtobool(buf, &val) != 0)
                return -EINVAL;

        if (val)
                set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);
        else
                clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags);

        return rc;
}
static DEVICE_ATTR_RW(interface_authorized_default);

/* Group all the USB bus attributes */
static struct attribute *usb_bus_attrs[] = {
                &dev_attr_authorized_default.attr,
                &dev_attr_interface_authorized_default.attr,
                NULL,
};

static const struct attribute_group usb_bus_attr_group = {
        .name = NULL,        /* we want them in the same directory */
        .attrs = usb_bus_attrs,
};


static int add_default_authorized_attributes(struct device *dev)
{
        int rc = 0;

        if (is_usb_device(dev))
                rc = sysfs_create_group(&dev->kobj, &usb_bus_attr_group);

        return rc;
}

static void remove_default_authorized_attributes(struct device *dev)
{
        if (is_usb_device(dev)) {
                sysfs_remove_group(&dev->kobj, &usb_bus_attr_group);
        }
}

int usb_create_sysfs_dev_files(struct usb_device *udev)
{
        struct device *dev = &udev->dev;
        int retval;

        retval = add_persist_attributes(dev);
        if (retval)
                goto error;

        retval = add_power_attributes(dev);
        if (retval)
                goto error;

        if (is_root_hub(udev)) {
                retval = add_default_authorized_attributes(dev);
                if (retval)
                        goto error;
        }
        return retval;

error:
        usb_remove_sysfs_dev_files(udev);
        return retval;
}

void usb_remove_sysfs_dev_files(struct usb_device *udev)
{
        struct device *dev = &udev->dev;

        if (is_root_hub(udev))
                remove_default_authorized_attributes(dev);

        remove_power_attributes(dev);
        remove_persist_attributes(dev);
}

/* Interface Association Descriptor fields */
#define usb_intf_assoc_attr(field, format_string)                        \
static ssize_t                                                                \
iad_##field##_show(struct device *dev, struct device_attribute *attr,        \
                char *buf)                                                \
{                                                                        \
        struct usb_interface *intf = to_usb_interface(dev);                \
                                                                        \
        return sysfs_emit(buf, format_string,                                \
                        intf->intf_assoc->field);                         \
}                                                                        \
static DEVICE_ATTR_RO(iad_##field)

usb_intf_assoc_attr(bFirstInterface, "%02x\n");
usb_intf_assoc_attr(bInterfaceCount, "%02d\n");
usb_intf_assoc_attr(bFunctionClass, "%02x\n");
usb_intf_assoc_attr(bFunctionSubClass, "%02x\n");
usb_intf_assoc_attr(bFunctionProtocol, "%02x\n");

/* Interface fields */
#define usb_intf_attr(field, format_string)                                \
static ssize_t                                                                \
field##_show(struct device *dev, struct device_attribute *attr,                \
                char *buf)                                                \
{                                                                        \
        struct usb_interface *intf = to_usb_interface(dev);                \
                                                                        \
        return sysfs_emit(buf, format_string,                                \
                        intf->cur_altsetting->desc.field);                 \
}                                                                        \
static DEVICE_ATTR_RO(field)

usb_intf_attr(bInterfaceNumber, "%02x\n");
usb_intf_attr(bAlternateSetting, "%2d\n");
usb_intf_attr(bNumEndpoints, "%02x\n");
usb_intf_attr(bInterfaceClass, "%02x\n");
usb_intf_attr(bInterfaceSubClass, "%02x\n");
usb_intf_attr(bInterfaceProtocol, "%02x\n");

static ssize_t interface_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
{
        struct usb_interface *intf;
        char *string;

        intf = to_usb_interface(dev);
        string = READ_ONCE(intf->cur_altsetting->string);
        if (!string)
                return 0;
        return sysfs_emit(buf, "%s\n", string);
}
static DEVICE_ATTR_RO(interface);

static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
                             char *buf)
{
        struct usb_interface *intf;
        struct usb_device *udev;
        struct usb_host_interface *alt;

        intf = to_usb_interface(dev);
        udev = interface_to_usbdev(intf);
        alt = READ_ONCE(intf->cur_altsetting);

        return sysfs_emit(buf,
                        "usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02X"
                        "ic%02Xisc%02Xip%02Xin%02X\n",
                        le16_to_cpu(udev->descriptor.idVendor),
                        le16_to_cpu(udev->descriptor.idProduct),
                        le16_to_cpu(udev->descriptor.bcdDevice),
                        udev->descriptor.bDeviceClass,
                        udev->descriptor.bDeviceSubClass,
                        udev->descriptor.bDeviceProtocol,
                        alt->desc.bInterfaceClass,
                        alt->desc.bInterfaceSubClass,
                        alt->desc.bInterfaceProtocol,
                        alt->desc.bInterfaceNumber);
}
static DEVICE_ATTR_RO(modalias);

static ssize_t supports_autosuspend_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
{
        int s;

        s = device_lock_interruptible(dev);
        if (s < 0)
                return -EINTR;
        /* Devices will be autosuspended even when an interface isn't claimed */
        s = (!dev->driver || to_usb_driver(dev->driver)->supports_autosuspend);
        device_unlock(dev);

        return sysfs_emit(buf, "%u\n", s);
}
static DEVICE_ATTR_RO(supports_autosuspend);

/*
 * interface_authorized_show - show authorization status of an USB interface
 * 1 is authorized, 0 is deauthorized
 */
static ssize_t interface_authorized_show(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf = to_usb_interface(dev);

        return sysfs_emit(buf, "%u\n", intf->authorized);
}

/*
 * interface_authorized_store - authorize or deauthorize an USB interface
 */
static ssize_t interface_authorized_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_interface *intf = to_usb_interface(dev);
        bool val;
        struct kernfs_node *kn;

        if (kstrtobool(buf, &val) != 0)
                return -EINVAL;

        if (val) {
                usb_authorize_interface(intf);
        } else {
                /*
                 * Prevent deadlock if another process is concurrently
                 * trying to unregister intf.
                 */
                kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
                if (kn) {
                        usb_deauthorize_interface(intf);
                        sysfs_unbreak_active_protection(kn);
                }
        }

        return count;
}
static struct device_attribute dev_attr_interface_authorized =
                __ATTR(authorized, S_IRUGO | S_IWUSR,
                interface_authorized_show, interface_authorized_store);

static struct attribute *intf_attrs[] = {
        &dev_attr_bInterfaceNumber.attr,
        &dev_attr_bAlternateSetting.attr,
        &dev_attr_bNumEndpoints.attr,
        &dev_attr_bInterfaceClass.attr,
        &dev_attr_bInterfaceSubClass.attr,
        &dev_attr_bInterfaceProtocol.attr,
        &dev_attr_modalias.attr,
        &dev_attr_supports_autosuspend.attr,
        &dev_attr_interface_authorized.attr,
        NULL,
};
static const struct attribute_group intf_attr_grp = {
        .attrs = intf_attrs,
};

static struct attribute *intf_assoc_attrs[] = {
        &dev_attr_iad_bFirstInterface.attr,
        &dev_attr_iad_bInterfaceCount.attr,
        &dev_attr_iad_bFunctionClass.attr,
        &dev_attr_iad_bFunctionSubClass.attr,
        &dev_attr_iad_bFunctionProtocol.attr,
        NULL,
};

static umode_t intf_assoc_attrs_are_visible(struct kobject *kobj,
                struct attribute *a, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_interface *intf = to_usb_interface(dev);

        if (intf->intf_assoc == NULL)
                return 0;
        return a->mode;
}

static const struct attribute_group intf_assoc_attr_grp = {
        .attrs =        intf_assoc_attrs,
        .is_visible =        intf_assoc_attrs_are_visible,
};

static ssize_t wireless_status_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        struct usb_interface *intf;

        intf = to_usb_interface(dev);
        if (intf->wireless_status == USB_WIRELESS_STATUS_DISCONNECTED)
                return sysfs_emit(buf, "%s\n", "disconnected");
        return sysfs_emit(buf, "%s\n", "connected");
}
static DEVICE_ATTR_RO(wireless_status);

static struct attribute *intf_wireless_status_attrs[] = {
        &dev_attr_wireless_status.attr,
        NULL
};

static umode_t intf_wireless_status_attr_is_visible(struct kobject *kobj,
                struct attribute *a, int n)
{
        struct device *dev = kobj_to_dev(kobj);
        struct usb_interface *intf = to_usb_interface(dev);

        if (a != &dev_attr_wireless_status.attr ||
            intf->wireless_status != USB_WIRELESS_STATUS_NA)
                return a->mode;
        return 0;
}

static const struct attribute_group intf_wireless_status_attr_grp = {
        .attrs =        intf_wireless_status_attrs,
        .is_visible =        intf_wireless_status_attr_is_visible,
};

int usb_update_wireless_status_attr(struct usb_interface *intf)
{
        struct device *dev = &intf->dev;
        int ret;

        ret = sysfs_update_group(&dev->kobj, &intf_wireless_status_attr_grp);
        if (ret < 0)
                return ret;

        sysfs_notify(&dev->kobj, NULL, "wireless_status");
        kobject_uevent(&dev->kobj, KOBJ_CHANGE);

        return 0;
}

const struct attribute_group *usb_interface_groups[] = {
        &intf_attr_grp,
        &intf_assoc_attr_grp,
        &intf_wireless_status_attr_grp,
        NULL
};

void usb_create_sysfs_intf_files(struct usb_interface *intf)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct usb_host_interface *alt = intf->cur_altsetting;

        if (intf->sysfs_files_created || intf->unregistering)
                return;

        if (!alt->string && !(udev->quirks & USB_QUIRK_CONFIG_INTF_STRINGS))
                alt->string = usb_cache_string(udev, alt->desc.iInterface);
        if (alt->string && device_create_file(&intf->dev, &dev_attr_interface)) {
                /* This is not a serious error */
                dev_dbg(&intf->dev, "interface string descriptor file not created\n");
        }
        intf->sysfs_files_created = 1;
}

void usb_remove_sysfs_intf_files(struct usb_interface *intf)
{
        if (!intf->sysfs_files_created)
                return;

        device_remove_file(&intf->dev, &dev_attr_interface);
        intf->sysfs_files_created = 0;
}












































































  232 
  232 















  306 
  290 





















































































































































  234 









































































































































































































































































































































































  307 



  293 
  251 











    9 








    9 




































































    9 

    9 
    9 






    9 

    9 
    9 

































































































































































































































































   71 

   72 





   72 
   72 

   72 



   72 



   72 

   72 

   72 
   71 

   72 




















   72 
   72 
   72 


   72 









































































































































































































































































































































































    1 

























    1 



    1 


    1 
    1 



























































































































   55 





   55 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
// SPDX-License-Identifier: GPL-2.0-only
/* Kernel thread helper functions.
 *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
 *   Copyright (C) 2009 Red Hat, Inc.
 *
 * Creation is done via kthreadd, so that we get a clean environment
 * even if we're invoked from userspace (think modprobe, hotplug cpu,
 * etc.).
 */
#include <uapi/linux/sched/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/err.h>
#include <linux/cgroup.h>
#include <linux/cpuset.h>
#include <linux/unistd.h>
#include <linux/file.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <linux/numa.h>
#include <linux/sched/isolation.h>
#include <trace/events/sched.h>


static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;

struct kthread_create_info
{
        /* Information passed to kthread() from kthreadd. */
        char *full_name;
        int (*threadfn)(void *data);
        void *data;
        int node;

        /* Result passed back to kthread_create() from kthreadd. */
        struct task_struct *result;
        struct completion *done;

        struct list_head list;
};

struct kthread {
        unsigned long flags;
        unsigned int cpu;
        int result;
        int (*threadfn)(void *);
        void *data;
        struct completion parked;
        struct completion exited;
#ifdef CONFIG_BLK_CGROUP
        struct cgroup_subsys_state *blkcg_css;
#endif
        /* To store the full name if task comm is truncated. */
        char *full_name;
};

enum KTHREAD_BITS {
        KTHREAD_IS_PER_CPU = 0,
        KTHREAD_SHOULD_STOP,
        KTHREAD_SHOULD_PARK,
};

static inline struct kthread *to_kthread(struct task_struct *k)
{
        WARN_ON(!(k->flags & PF_KTHREAD));
        return k->worker_private;
}

/*
 * Variant of to_kthread() that doesn't assume @p is a kthread.
 *
 * Per construction; when:
 *
 *   (p->flags & PF_KTHREAD) && p->worker_private
 *
 * the task is both a kthread and struct kthread is persistent. However
 * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
 * begin_new_exec()).
 */
static inline struct kthread *__to_kthread(struct task_struct *p)
{
        void *kthread = p->worker_private;
        if (kthread && !(p->flags & PF_KTHREAD))
                kthread = NULL;
        return kthread;
}

void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
        struct kthread *kthread = to_kthread(tsk);

        if (!kthread || !kthread->full_name) {
                __get_task_comm(buf, buf_size, tsk);
                return;
        }

        strscpy_pad(buf, kthread->full_name, buf_size);
}

bool set_kthread_struct(struct task_struct *p)
{
        struct kthread *kthread;

        if (WARN_ON_ONCE(to_kthread(p)))
                return false;

        kthread = kzalloc(sizeof(*kthread), GFP_KERNEL);
        if (!kthread)
                return false;

        init_completion(&kthread->exited);
        init_completion(&kthread->parked);
        p->vfork_done = &kthread->exited;

        p->worker_private = kthread;
        return true;
}

void free_kthread_struct(struct task_struct *k)
{
        struct kthread *kthread;

        /*
         * Can be NULL if kmalloc() in set_kthread_struct() failed.
         */
        kthread = to_kthread(k);
        if (!kthread)
                return;

#ifdef CONFIG_BLK_CGROUP
        WARN_ON_ONCE(kthread->blkcg_css);
#endif
        k->worker_private = NULL;
        kfree(kthread->full_name);
        kfree(kthread);
}

/**
 * kthread_should_stop - should this kthread return now?
 *
 * When someone calls kthread_stop() on your kthread, it will be woken
 * and this will return true.  You should then return, and your return
 * value will be passed through to kthread_stop().
 */
bool kthread_should_stop(void)
{
        return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
}
EXPORT_SYMBOL(kthread_should_stop);

static bool __kthread_should_park(struct task_struct *k)
{
        return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
}

/**
 * kthread_should_park - should this kthread park now?
 *
 * When someone calls kthread_park() on your kthread, it will be woken
 * and this will return true.  You should then do the necessary
 * cleanup and call kthread_parkme()
 *
 * Similar to kthread_should_stop(), but this keeps the thread alive
 * and in a park position. kthread_unpark() "restarts" the thread and
 * calls the thread function again.
 */
bool kthread_should_park(void)
{
        return __kthread_should_park(current);
}
EXPORT_SYMBOL_GPL(kthread_should_park);

bool kthread_should_stop_or_park(void)
{
        struct kthread *kthread = __to_kthread(current);

        if (!kthread)
                return false;

        return kthread->flags & (BIT(KTHREAD_SHOULD_STOP) | BIT(KTHREAD_SHOULD_PARK));
}

/**
 * kthread_freezable_should_stop - should this freezable kthread return now?
 * @was_frozen: optional out parameter, indicates whether %current was frozen
 *
 * kthread_should_stop() for freezable kthreads, which will enter
 * refrigerator if necessary.  This function is safe from kthread_stop() /
 * freezer deadlock and freezable kthreads should use this function instead
 * of calling try_to_freeze() directly.
 */
bool kthread_freezable_should_stop(bool *was_frozen)
{
        bool frozen = false;

        might_sleep();

        if (unlikely(freezing(current)))
                frozen = __refrigerator(true);

        if (was_frozen)
                *was_frozen = frozen;

        return kthread_should_stop();
}
EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);

/**
 * kthread_func - return the function specified on kthread creation
 * @task: kthread task in question
 *
 * Returns NULL if the task is not a kthread.
 */
void *kthread_func(struct task_struct *task)
{
        struct kthread *kthread = __to_kthread(task);
        if (kthread)
                return kthread->threadfn;
        return NULL;
}
EXPORT_SYMBOL_GPL(kthread_func);

/**
 * kthread_data - return data value specified on kthread creation
 * @task: kthread task in question
 *
 * Return the data value specified when kthread @task was created.
 * The caller is responsible for ensuring the validity of @task when
 * calling this function.
 */
void *kthread_data(struct task_struct *task)
{
        return to_kthread(task)->data;
}
EXPORT_SYMBOL_GPL(kthread_data);

/**
 * kthread_probe_data - speculative version of kthread_data()
 * @task: possible kthread task in question
 *
 * @task could be a kthread task.  Return the data value specified when it
 * was created if accessible.  If @task isn't a kthread task or its data is
 * inaccessible for any reason, %NULL is returned.  This function requires
 * that @task itself is safe to dereference.
 */
void *kthread_probe_data(struct task_struct *task)
{
        struct kthread *kthread = __to_kthread(task);
        void *data = NULL;

        if (kthread)
                copy_from_kernel_nofault(&data, &kthread->data, sizeof(data));
        return data;
}

static void __kthread_parkme(struct kthread *self)
{
        for (;;) {
                /*
                 * TASK_PARKED is a special state; we must serialize against
                 * possible pending wakeups to avoid store-store collisions on
                 * task->state.
                 *
                 * Such a collision might possibly result in the task state
                 * changin from TASK_PARKED and us failing the
                 * wait_task_inactive() in kthread_park().
                 */
                set_special_state(TASK_PARKED);
                if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
                        break;

                /*
                 * Thread is going to call schedule(), do not preempt it,
                 * or the caller of kthread_park() may spend more time in
                 * wait_task_inactive().
                 */
                preempt_disable();
                complete(&self->parked);
                schedule_preempt_disabled();
                preempt_enable();
        }
        __set_current_state(TASK_RUNNING);
}

void kthread_parkme(void)
{
        __kthread_parkme(to_kthread(current));
}
EXPORT_SYMBOL_GPL(kthread_parkme);

/**
 * kthread_exit - Cause the current kthread return @result to kthread_stop().
 * @result: The integer value to return to kthread_stop().
 *
 * While kthread_exit can be called directly, it exists so that
 * functions which do some additional work in non-modular code such as
 * module_put_and_kthread_exit can be implemented.
 *
 * Does not return.
 */
void __noreturn kthread_exit(long result)
{
        struct kthread *kthread = to_kthread(current);
        kthread->result = result;
        do_exit(0);
}

/**
 * kthread_complete_and_exit - Exit the current kthread.
 * @comp: Completion to complete
 * @code: The integer value to return to kthread_stop().
 *
 * If present, complete @comp and then return code to kthread_stop().
 *
 * A kernel thread whose module may be removed after the completion of
 * @comp can use this function to exit safely.
 *
 * Does not return.
 */
void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
{
        if (comp)
                complete(comp);

        kthread_exit(code);
}
EXPORT_SYMBOL(kthread_complete_and_exit);

static int kthread(void *_create)
{
        static const struct sched_param param = { .sched_priority = 0 };
        /* Copy data: it's on kthread's stack */
        struct kthread_create_info *create = _create;
        int (*threadfn)(void *data) = create->threadfn;
        void *data = create->data;
        struct completion *done;
        struct kthread *self;
        int ret;

        self = to_kthread(current);

        /* Release the structure when caller killed by a fatal signal. */
        done = xchg(&create->done, NULL);
        if (!done) {
                kfree(create->full_name);
                kfree(create);
                kthread_exit(-EINTR);
        }

        self->full_name = create->full_name;
        self->threadfn = threadfn;
        self->data = data;

        /*
         * The new thread inherited kthreadd's priority and CPU mask. Reset
         * back to default in case they have been changed.
         */
        sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
        set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));

        /* OK, tell user we're spawned, wait for stop or wakeup */
        __set_current_state(TASK_UNINTERRUPTIBLE);
        create->result = current;
        /*
         * Thread is going to call schedule(), do not preempt it,
         * or the creator may spend more time in wait_task_inactive().
         */
        preempt_disable();
        complete(done);
        schedule_preempt_disabled();
        preempt_enable();

        ret = -EINTR;
        if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
                cgroup_kthread_ready();
                __kthread_parkme(self);
                ret = threadfn(data);
        }
        kthread_exit(ret);
}

/* called from kernel_clone() to get node information for about to be created task */
int tsk_fork_get_node(struct task_struct *tsk)
{
#ifdef CONFIG_NUMA
        if (tsk == kthreadd_task)
                return tsk->pref_node_fork;
#endif
        return NUMA_NO_NODE;
}

static void create_kthread(struct kthread_create_info *create)
{
        int pid;

#ifdef CONFIG_NUMA
        current->pref_node_fork = create->node;
#endif
        /* We want our own signal handler (we take no signals by default). */
        pid = kernel_thread(kthread, create, create->full_name,
                            CLONE_FS | CLONE_FILES | SIGCHLD);
        if (pid < 0) {
                /* Release the structure when caller killed by a fatal signal. */
                struct completion *done = xchg(&create->done, NULL);

                kfree(create->full_name);
                if (!done) {
                        kfree(create);
                        return;
                }
                create->result = ERR_PTR(pid);
                complete(done);
        }
}

static __printf(4, 0)
struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
                                                    void *data, int node,
                                                    const char namefmt[],
                                                    va_list args)
{
        DECLARE_COMPLETION_ONSTACK(done);
        struct task_struct *task;
        struct kthread_create_info *create = kmalloc(sizeof(*create),
                                                     GFP_KERNEL);

        if (!create)
                return ERR_PTR(-ENOMEM);
        create->threadfn = threadfn;
        create->data = data;
        create->node = node;
        create->done = &done;
        create->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
        if (!create->full_name) {
                task = ERR_PTR(-ENOMEM);
                goto free_create;
        }

        spin_lock(&kthread_create_lock);
        list_add_tail(&create->list, &kthread_create_list);
        spin_unlock(&kthread_create_lock);

        wake_up_process(kthreadd_task);
        /*
         * Wait for completion in killable state, for I might be chosen by
         * the OOM killer while kthreadd is trying to allocate memory for
         * new kernel thread.
         */
        if (unlikely(wait_for_completion_killable(&done))) {
                /*
                 * If I was killed by a fatal signal before kthreadd (or new
                 * kernel thread) calls complete(), leave the cleanup of this
                 * structure to that thread.
                 */
                if (xchg(&create->done, NULL))
                        return ERR_PTR(-EINTR);
                /*
                 * kthreadd (or new kernel thread) will call complete()
                 * shortly.
                 */
                wait_for_completion(&done);
        }
        task = create->result;
free_create:
        kfree(create);
        return task;
}

/**
 * kthread_create_on_node - create a kthread.
 * @threadfn: the function to run until signal_pending(current).
 * @data: data ptr for @threadfn.
 * @node: task and thread structures for the thread are allocated on this node
 * @namefmt: printf-style name for the thread.
 *
 * Description: This helper function creates and names a kernel
 * thread.  The thread will be stopped: use wake_up_process() to start
 * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
 * is affine to all CPUs.
 *
 * If thread is going to be bound on a particular cpu, give its node
 * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
 * When woken, the thread will run @threadfn() with @data as its
 * argument. @threadfn() can either return directly if it is a
 * standalone thread for which no one will call kthread_stop(), or
 * return when 'kthread_should_stop()' is true (which means
 * kthread_stop() has been called).  The return value should be zero
 * or a negative error number; it will be passed to kthread_stop().
 *
 * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
 */
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
                                           void *data, int node,
                                           const char namefmt[],
                                           ...)
{
        struct task_struct *task;
        va_list args;

        va_start(args, namefmt);
        task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
        va_end(args);

        return task;
}
EXPORT_SYMBOL(kthread_create_on_node);

static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
{
        unsigned long flags;

        if (!wait_task_inactive(p, state)) {
                WARN_ON(1);
                return;
        }

        /* It's safe because the task is inactive. */
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        do_set_cpus_allowed(p, mask);
        p->flags |= PF_NO_SETAFFINITY;
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}

static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state)
{
        __kthread_bind_mask(p, cpumask_of(cpu), state);
}

void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
{
        __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
}

/**
 * kthread_bind - bind a just-created kthread to a cpu.
 * @p: thread created by kthread_create().
 * @cpu: cpu (might not be online, must be possible) for @k to run on.
 *
 * Description: This function is equivalent to set_cpus_allowed(),
 * except that @cpu doesn't need to be online, and the thread must be
 * stopped (i.e., just returned from kthread_create()).
 */
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
        __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(kthread_bind);

/**
 * kthread_create_on_cpu - Create a cpu bound kthread
 * @threadfn: the function to run until signal_pending(current).
 * @data: data ptr for @threadfn.
 * @cpu: The cpu on which the thread should be bound,
 * @namefmt: printf-style name for the thread. Format is restricted
 *             to "name.*%u". Code fills in cpu number.
 *
 * Description: This helper function creates and names a kernel thread
 */
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
                                          void *data, unsigned int cpu,
                                          const char *namefmt)
{
        struct task_struct *p;

        p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
                                   cpu);
        if (IS_ERR(p))
                return p;
        kthread_bind(p, cpu);
        /* CPU hotplug need to bind once again when unparking the thread. */
        to_kthread(p)->cpu = cpu;
        return p;
}
EXPORT_SYMBOL(kthread_create_on_cpu);

void kthread_set_per_cpu(struct task_struct *k, int cpu)
{
        struct kthread *kthread = to_kthread(k);
        if (!kthread)
                return;

        WARN_ON_ONCE(!(k->flags & PF_NO_SETAFFINITY));

        if (cpu < 0) {
                clear_bit(KTHREAD_IS_PER_CPU, &kthread->flags);
                return;
        }

        kthread->cpu = cpu;
        set_bit(KTHREAD_IS_PER_CPU, &kthread->flags);
}

bool kthread_is_per_cpu(struct task_struct *p)
{
        struct kthread *kthread = __to_kthread(p);
        if (!kthread)
                return false;

        return test_bit(KTHREAD_IS_PER_CPU, &kthread->flags);
}

/**
 * kthread_unpark - unpark a thread created by kthread_create().
 * @k:                thread created by kthread_create().
 *
 * Sets kthread_should_park() for @k to return false, wakes it, and
 * waits for it to return. If the thread is marked percpu then its
 * bound to the cpu again.
 */
void kthread_unpark(struct task_struct *k)
{
        struct kthread *kthread = to_kthread(k);

        /*
         * Newly created kthread was parked when the CPU was offline.
         * The binding was lost and we need to set it again.
         */
        if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
                __kthread_bind(k, kthread->cpu, TASK_PARKED);

        clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
        /*
         * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
         */
        wake_up_state(k, TASK_PARKED);
}
EXPORT_SYMBOL_GPL(kthread_unpark);

/**
 * kthread_park - park a thread created by kthread_create().
 * @k: thread created by kthread_create().
 *
 * Sets kthread_should_park() for @k to return true, wakes it, and
 * waits for it to return. This can also be called after kthread_create()
 * instead of calling wake_up_process(): the thread will park without
 * calling threadfn().
 *
 * Returns 0 if the thread is parked, -ENOSYS if the thread exited.
 * If called by the kthread itself just the park bit is set.
 */
int kthread_park(struct task_struct *k)
{
        struct kthread *kthread = to_kthread(k);

        if (WARN_ON(k->flags & PF_EXITING))
                return -ENOSYS;

        if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
                return -EBUSY;

        set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
        if (k != current) {
                wake_up_process(k);
                /*
                 * Wait for __kthread_parkme() to complete(), this means we
                 * _will_ have TASK_PARKED and are about to call schedule().
                 */
                wait_for_completion(&kthread->parked);
                /*
                 * Now wait for that schedule() to complete and the task to
                 * get scheduled out.
                 */
                WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
        }

        return 0;
}
EXPORT_SYMBOL_GPL(kthread_park);

/**
 * kthread_stop - stop a thread created by kthread_create().
 * @k: thread created by kthread_create().
 *
 * Sets kthread_should_stop() for @k to return true, wakes it, and
 * waits for it to exit. This can also be called after kthread_create()
 * instead of calling wake_up_process(): the thread will exit without
 * calling threadfn().
 *
 * If threadfn() may call kthread_exit() itself, the caller must ensure
 * task_struct can't go away.
 *
 * Returns the result of threadfn(), or %-EINTR if wake_up_process()
 * was never called.
 */
int kthread_stop(struct task_struct *k)
{
        struct kthread *kthread;
        int ret;

        trace_sched_kthread_stop(k);

        get_task_struct(k);
        kthread = to_kthread(k);
        set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
        kthread_unpark(k);
        set_tsk_thread_flag(k, TIF_NOTIFY_SIGNAL);
        wake_up_process(k);
        wait_for_completion(&kthread->exited);
        ret = kthread->result;
        put_task_struct(k);

        trace_sched_kthread_stop_ret(ret);
        return ret;
}
EXPORT_SYMBOL(kthread_stop);

/**
 * kthread_stop_put - stop a thread and put its task struct
 * @k: thread created by kthread_create().
 *
 * Stops a thread created by kthread_create() and put its task_struct.
 * Only use when holding an extra task struct reference obtained by
 * calling get_task_struct().
 */
int kthread_stop_put(struct task_struct *k)
{
        int ret;

        ret = kthread_stop(k);
        put_task_struct(k);
        return ret;
}
EXPORT_SYMBOL(kthread_stop_put);

int kthreadd(void *unused)
{
        struct task_struct *tsk = current;

        /* Setup a clean context for our children to inherit. */
        set_task_comm(tsk, "kthreadd");
        ignore_signals(tsk);
        set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
        set_mems_allowed(node_states[N_MEMORY]);

        current->flags |= PF_NOFREEZE;
        cgroup_init_kthreadd();

        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
                if (list_empty(&kthread_create_list))
                        schedule();
                __set_current_state(TASK_RUNNING);

                spin_lock(&kthread_create_lock);
                while (!list_empty(&kthread_create_list)) {
                        struct kthread_create_info *create;

                        create = list_entry(kthread_create_list.next,
                                            struct kthread_create_info, list);
                        list_del_init(&create->list);
                        spin_unlock(&kthread_create_lock);

                        create_kthread(create);

                        spin_lock(&kthread_create_lock);
                }
                spin_unlock(&kthread_create_lock);
        }

        return 0;
}

void __kthread_init_worker(struct kthread_worker *worker,
                                const char *name,
                                struct lock_class_key *key)
{
        memset(worker, 0, sizeof(struct kthread_worker));
        raw_spin_lock_init(&worker->lock);
        lockdep_set_class_and_name(&worker->lock, key, name);
        INIT_LIST_HEAD(&worker->work_list);
        INIT_LIST_HEAD(&worker->delayed_work_list);
}
EXPORT_SYMBOL_GPL(__kthread_init_worker);

/**
 * kthread_worker_fn - kthread function to process kthread_worker
 * @worker_ptr: pointer to initialized kthread_worker
 *
 * This function implements the main cycle of kthread worker. It processes
 * work_list until it is stopped with kthread_stop(). It sleeps when the queue
 * is empty.
 *
 * The works are not allowed to keep any locks, disable preemption or interrupts
 * when they finish. There is defined a safe point for freezing when one work
 * finishes and before a new one is started.
 *
 * Also the works must not be handled by more than one worker at the same time,
 * see also kthread_queue_work().
 */
int kthread_worker_fn(void *worker_ptr)
{
        struct kthread_worker *worker = worker_ptr;
        struct kthread_work *work;

        /*
         * FIXME: Update the check and remove the assignment when all kthread
         * worker users are created using kthread_create_worker*() functions.
         */
        WARN_ON(worker->task && worker->task != current);
        worker->task = current;

        if (worker->flags & KTW_FREEZABLE)
                set_freezable();

repeat:
        set_current_state(TASK_INTERRUPTIBLE);        /* mb paired w/ kthread_stop */

        if (kthread_should_stop()) {
                __set_current_state(TASK_RUNNING);
                raw_spin_lock_irq(&worker->lock);
                worker->task = NULL;
                raw_spin_unlock_irq(&worker->lock);
                return 0;
        }

        work = NULL;
        raw_spin_lock_irq(&worker->lock);
        if (!list_empty(&worker->work_list)) {
                work = list_first_entry(&worker->work_list,
                                        struct kthread_work, node);
                list_del_init(&work->node);
        }
        worker->current_work = work;
        raw_spin_unlock_irq(&worker->lock);

        if (work) {
                kthread_work_func_t func = work->func;
                __set_current_state(TASK_RUNNING);
                trace_sched_kthread_work_execute_start(work);
                work->func(work);
                /*
                 * Avoid dereferencing work after this point.  The trace
                 * event only cares about the address.
                 */
                trace_sched_kthread_work_execute_end(work, func);
        } else if (!freezing(current))
                schedule();

        try_to_freeze();
        cond_resched();
        goto repeat;
}
EXPORT_SYMBOL_GPL(kthread_worker_fn);

static __printf(3, 0) struct kthread_worker *
__kthread_create_worker(int cpu, unsigned int flags,
                        const char namefmt[], va_list args)
{
        struct kthread_worker *worker;
        struct task_struct *task;
        int node = NUMA_NO_NODE;

        worker = kzalloc(sizeof(*worker), GFP_KERNEL);
        if (!worker)
                return ERR_PTR(-ENOMEM);

        kthread_init_worker(worker);

        if (cpu >= 0)
                node = cpu_to_node(cpu);

        task = __kthread_create_on_node(kthread_worker_fn, worker,
                                                node, namefmt, args);
        if (IS_ERR(task))
                goto fail_task;

        if (cpu >= 0)
                kthread_bind(task, cpu);

        worker->flags = flags;
        worker->task = task;
        wake_up_process(task);
        return worker;

fail_task:
        kfree(worker);
        return ERR_CAST(task);
}

/**
 * kthread_create_worker - create a kthread worker
 * @flags: flags modifying the default behavior of the worker
 * @namefmt: printf-style name for the kthread worker (task).
 *
 * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
 * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
 * when the caller was killed by a fatal signal.
 */
struct kthread_worker *
kthread_create_worker(unsigned int flags, const char namefmt[], ...)
{
        struct kthread_worker *worker;
        va_list args;

        va_start(args, namefmt);
        worker = __kthread_create_worker(-1, flags, namefmt, args);
        va_end(args);

        return worker;
}
EXPORT_SYMBOL(kthread_create_worker);

/**
 * kthread_create_worker_on_cpu - create a kthread worker and bind it
 *        to a given CPU and the associated NUMA node.
 * @cpu: CPU number
 * @flags: flags modifying the default behavior of the worker
 * @namefmt: printf-style name for the kthread worker (task).
 *
 * Use a valid CPU number if you want to bind the kthread worker
 * to the given CPU and the associated NUMA node.
 *
 * A good practice is to add the cpu number also into the worker name.
 * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu).
 *
 * CPU hotplug:
 * The kthread worker API is simple and generic. It just provides a way
 * to create, use, and destroy workers.
 *
 * It is up to the API user how to handle CPU hotplug. They have to decide
 * how to handle pending work items, prevent queuing new ones, and
 * restore the functionality when the CPU goes off and on. There are a
 * few catches:
 *
 *    - CPU affinity gets lost when it is scheduled on an offline CPU.
 *
 *    - The worker might not exist when the CPU was off when the user
 *      created the workers.
 *
 * Good practice is to implement two CPU hotplug callbacks and to
 * destroy/create the worker when the CPU goes down/up.
 *
 * Return:
 * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
 * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
 * when the caller was killed by a fatal signal.
 */
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
                             const char namefmt[], ...)
{
        struct kthread_worker *worker;
        va_list args;

        va_start(args, namefmt);
        worker = __kthread_create_worker(cpu, flags, namefmt, args);
        va_end(args);

        return worker;
}
EXPORT_SYMBOL(kthread_create_worker_on_cpu);

/*
 * Returns true when the work could not be queued at the moment.
 * It happens when it is already pending in a worker list
 * or when it is being cancelled.
 */
static inline bool queuing_blocked(struct kthread_worker *worker,
                                   struct kthread_work *work)
{
        lockdep_assert_held(&worker->lock);

        return !list_empty(&work->node) || work->canceling;
}

static void kthread_insert_work_sanity_check(struct kthread_worker *worker,
                                             struct kthread_work *work)
{
        lockdep_assert_held(&worker->lock);
        WARN_ON_ONCE(!list_empty(&work->node));
        /* Do not use a work with >1 worker, see kthread_queue_work() */
        WARN_ON_ONCE(work->worker && work->worker != worker);
}

/* insert @work before @pos in @worker */
static void kthread_insert_work(struct kthread_worker *worker,
                                struct kthread_work *work,
                                struct list_head *pos)
{
        kthread_insert_work_sanity_check(worker, work);

        trace_sched_kthread_work_queue_work(worker, work);

        list_add_tail(&work->node, pos);
        work->worker = worker;
        if (!worker->current_work && likely(worker->task))
                wake_up_process(worker->task);
}

/**
 * kthread_queue_work - queue a kthread_work
 * @worker: target kthread_worker
 * @work: kthread_work to queue
 *
 * Queue @work to work processor @task for async execution.  @task
 * must have been created with kthread_worker_create().  Returns %true
 * if @work was successfully queued, %false if it was already pending.
 *
 * Reinitialize the work if it needs to be used by another worker.
 * For example, when the worker was stopped and started again.
 */
bool kthread_queue_work(struct kthread_worker *worker,
                        struct kthread_work *work)
{
        bool ret = false;
        unsigned long flags;

        raw_spin_lock_irqsave(&worker->lock, flags);
        if (!queuing_blocked(worker, work)) {
                kthread_insert_work(worker, work, &worker->work_list);
                ret = true;
        }
        raw_spin_unlock_irqrestore(&worker->lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(kthread_queue_work);

/**
 * kthread_delayed_work_timer_fn - callback that queues the associated kthread
 *        delayed work when the timer expires.
 * @t: pointer to the expired timer
 *
 * The format of the function is defined by struct timer_list.
 * It should have been called from irqsafe timer with irq already off.
 */
void kthread_delayed_work_timer_fn(struct timer_list *t)
{
        struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
        struct kthread_work *work = &dwork->work;
        struct kthread_worker *worker = work->worker;
        unsigned long flags;

        /*
         * This might happen when a pending work is reinitialized.
         * It means that it is used a wrong way.
         */
        if (WARN_ON_ONCE(!worker))
                return;

        raw_spin_lock_irqsave(&worker->lock, flags);
        /* Work must not be used with >1 worker, see kthread_queue_work(). */
        WARN_ON_ONCE(work->worker != worker);

        /* Move the work from worker->delayed_work_list. */
        WARN_ON_ONCE(list_empty(&work->node));
        list_del_init(&work->node);
        if (!work->canceling)
                kthread_insert_work(worker, work, &worker->work_list);

        raw_spin_unlock_irqrestore(&worker->lock, flags);
}
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);

static void __kthread_queue_delayed_work(struct kthread_worker *worker,
                                         struct kthread_delayed_work *dwork,
                                         unsigned long delay)
{
        struct timer_list *timer = &dwork->timer;
        struct kthread_work *work = &dwork->work;

        WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn);

        /*
         * If @delay is 0, queue @dwork->work immediately.  This is for
         * both optimization and correctness.  The earliest @timer can
         * expire is on the closest next tick and delayed_work users depend
         * on that there's no such delay when @delay is 0.
         */
        if (!delay) {
                kthread_insert_work(worker, work, &worker->work_list);
                return;
        }

        /* Be paranoid and try to detect possible races already now. */
        kthread_insert_work_sanity_check(worker, work);

        list_add(&work->node, &worker->delayed_work_list);
        work->worker = worker;
        timer->expires = jiffies + delay;
        add_timer(timer);
}

/**
 * kthread_queue_delayed_work - queue the associated kthread work
 *        after a delay.
 * @worker: target kthread_worker
 * @dwork: kthread_delayed_work to queue
 * @delay: number of jiffies to wait before queuing
 *
 * If the work has not been pending it starts a timer that will queue
 * the work after the given @delay. If @delay is zero, it queues the
 * work immediately.
 *
 * Return: %false if the @work has already been pending. It means that
 * either the timer was running or the work was queued. It returns %true
 * otherwise.
 */
bool kthread_queue_delayed_work(struct kthread_worker *worker,
                                struct kthread_delayed_work *dwork,
                                unsigned long delay)
{
        struct kthread_work *work = &dwork->work;
        unsigned long flags;
        bool ret = false;

        raw_spin_lock_irqsave(&worker->lock, flags);

        if (!queuing_blocked(worker, work)) {
                __kthread_queue_delayed_work(worker, dwork, delay);
                ret = true;
        }

        raw_spin_unlock_irqrestore(&worker->lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);

struct kthread_flush_work {
        struct kthread_work        work;
        struct completion        done;
};

static void kthread_flush_work_fn(struct kthread_work *work)
{
        struct kthread_flush_work *fwork =
                container_of(work, struct kthread_flush_work, work);
        complete(&fwork->done);
}

/**
 * kthread_flush_work - flush a kthread_work
 * @work: work to flush
 *
 * If @work is queued or executing, wait for it to finish execution.
 */
void kthread_flush_work(struct kthread_work *work)
{
        struct kthread_flush_work fwork = {
                KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
                COMPLETION_INITIALIZER_ONSTACK(fwork.done),
        };
        struct kthread_worker *worker;
        bool noop = false;

        worker = work->worker;
        if (!worker)
                return;

        raw_spin_lock_irq(&worker->lock);
        /* Work must not be used with >1 worker, see kthread_queue_work(). */
        WARN_ON_ONCE(work->worker != worker);

        if (!list_empty(&work->node))
                kthread_insert_work(worker, &fwork.work, work->node.next);
        else if (worker->current_work == work)
                kthread_insert_work(worker, &fwork.work,
                                    worker->work_list.next);
        else
                noop = true;

        raw_spin_unlock_irq(&worker->lock);

        if (!noop)
                wait_for_completion(&fwork.done);
}
EXPORT_SYMBOL_GPL(kthread_flush_work);

/*
 * Make sure that the timer is neither set nor running and could
 * not manipulate the work list_head any longer.
 *
 * The function is called under worker->lock. The lock is temporary
 * released but the timer can't be set again in the meantime.
 */
static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
                                              unsigned long *flags)
{
        struct kthread_delayed_work *dwork =
                container_of(work, struct kthread_delayed_work, work);
        struct kthread_worker *worker = work->worker;

        /*
         * del_timer_sync() must be called to make sure that the timer
         * callback is not running. The lock must be temporary released
         * to avoid a deadlock with the callback. In the meantime,
         * any queuing is blocked by setting the canceling counter.
         */
        work->canceling++;
        raw_spin_unlock_irqrestore(&worker->lock, *flags);
        del_timer_sync(&dwork->timer);
        raw_spin_lock_irqsave(&worker->lock, *flags);
        work->canceling--;
}

/*
 * This function removes the work from the worker queue.
 *
 * It is called under worker->lock. The caller must make sure that
 * the timer used by delayed work is not running, e.g. by calling
 * kthread_cancel_delayed_work_timer().
 *
 * The work might still be in use when this function finishes. See the
 * current_work proceed by the worker.
 *
 * Return: %true if @work was pending and successfully canceled,
 *        %false if @work was not pending
 */
static bool __kthread_cancel_work(struct kthread_work *work)
{
        /*
         * Try to remove the work from a worker list. It might either
         * be from worker->work_list or from worker->delayed_work_list.
         */
        if (!list_empty(&work->node)) {
                list_del_init(&work->node);
                return true;
        }

        return false;
}

/**
 * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work
 * @worker: kthread worker to use
 * @dwork: kthread delayed work to queue
 * @delay: number of jiffies to wait before queuing
 *
 * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise,
 * modify @dwork's timer so that it expires after @delay. If @delay is zero,
 * @work is guaranteed to be queued immediately.
 *
 * Return: %false if @dwork was idle and queued, %true otherwise.
 *
 * A special case is when the work is being canceled in parallel.
 * It might be caused either by the real kthread_cancel_delayed_work_sync()
 * or yet another kthread_mod_delayed_work() call. We let the other command
 * win and return %true here. The return value can be used for reference
 * counting and the number of queued works stays the same. Anyway, the caller
 * is supposed to synchronize these operations a reasonable way.
 *
 * This function is safe to call from any context including IRQ handler.
 * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
 * for details.
 */
bool kthread_mod_delayed_work(struct kthread_worker *worker,
                              struct kthread_delayed_work *dwork,
                              unsigned long delay)
{
        struct kthread_work *work = &dwork->work;
        unsigned long flags;
        int ret;

        raw_spin_lock_irqsave(&worker->lock, flags);

        /* Do not bother with canceling when never queued. */
        if (!work->worker) {
                ret = false;
                goto fast_queue;
        }

        /* Work must not be used with >1 worker, see kthread_queue_work() */
        WARN_ON_ONCE(work->worker != worker);

        /*
         * Temporary cancel the work but do not fight with another command
         * that is canceling the work as well.
         *
         * It is a bit tricky because of possible races with another
         * mod_delayed_work() and cancel_delayed_work() callers.
         *
         * The timer must be canceled first because worker->lock is released
         * when doing so. But the work can be removed from the queue (list)
         * only when it can be queued again so that the return value can
         * be used for reference counting.
         */
        kthread_cancel_delayed_work_timer(work, &flags);
        if (work->canceling) {
                /* The number of works in the queue does not change. */
                ret = true;
                goto out;
        }
        ret = __kthread_cancel_work(work);

fast_queue:
        __kthread_queue_delayed_work(worker, dwork, delay);
out:
        raw_spin_unlock_irqrestore(&worker->lock, flags);
        return ret;
}
EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);

static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
{
        struct kthread_worker *worker = work->worker;
        unsigned long flags;
        int ret = false;

        if (!worker)
                goto out;

        raw_spin_lock_irqsave(&worker->lock, flags);
        /* Work must not be used with >1 worker, see kthread_queue_work(). */
        WARN_ON_ONCE(work->worker != worker);

        if (is_dwork)
                kthread_cancel_delayed_work_timer(work, &flags);

        ret = __kthread_cancel_work(work);

        if (worker->current_work != work)
                goto out_fast;

        /*
         * The work is in progress and we need to wait with the lock released.
         * In the meantime, block any queuing by setting the canceling counter.
         */
        work->canceling++;
        raw_spin_unlock_irqrestore(&worker->lock, flags);
        kthread_flush_work(work);
        raw_spin_lock_irqsave(&worker->lock, flags);
        work->canceling--;

out_fast:
        raw_spin_unlock_irqrestore(&worker->lock, flags);
out:
        return ret;
}

/**
 * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish
 * @work: the kthread work to cancel
 *
 * Cancel @work and wait for its execution to finish.  This function
 * can be used even if the work re-queues itself. On return from this
 * function, @work is guaranteed to be not pending or executing on any CPU.
 *
 * kthread_cancel_work_sync(&delayed_work->work) must not be used for
 * delayed_work's. Use kthread_cancel_delayed_work_sync() instead.
 *
 * The caller must ensure that the worker on which @work was last
 * queued can't be destroyed before this function returns.
 *
 * Return: %true if @work was pending, %false otherwise.
 */
bool kthread_cancel_work_sync(struct kthread_work *work)
{
        return __kthread_cancel_work_sync(work, false);
}
EXPORT_SYMBOL_GPL(kthread_cancel_work_sync);

/**
 * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and
 *        wait for it to finish.
 * @dwork: the kthread delayed work to cancel
 *
 * This is kthread_cancel_work_sync() for delayed works.
 *
 * Return: %true if @dwork was pending, %false otherwise.
 */
bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork)
{
        return __kthread_cancel_work_sync(&dwork->work, true);
}
EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);

/**
 * kthread_flush_worker - flush all current works on a kthread_worker
 * @worker: worker to flush
 *
 * Wait until all currently executing or pending works on @worker are
 * finished.
 */
void kthread_flush_worker(struct kthread_worker *worker)
{
        struct kthread_flush_work fwork = {
                KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
                COMPLETION_INITIALIZER_ONSTACK(fwork.done),
        };

        kthread_queue_work(worker, &fwork.work);
        wait_for_completion(&fwork.done);
}
EXPORT_SYMBOL_GPL(kthread_flush_worker);

/**
 * kthread_destroy_worker - destroy a kthread worker
 * @worker: worker to be destroyed
 *
 * Flush and destroy @worker.  The simple flush is enough because the kthread
 * worker API is used only in trivial scenarios.  There are no multi-step state
 * machines needed.
 *
 * Note that this function is not responsible for handling delayed work, so
 * caller should be responsible for queuing or canceling all delayed work items
 * before invoke this function.
 */
void kthread_destroy_worker(struct kthread_worker *worker)
{
        struct task_struct *task;

        task = worker->task;
        if (WARN_ON(!task))
                return;

        kthread_flush_worker(worker);
        kthread_stop(task);
        WARN_ON(!list_empty(&worker->delayed_work_list));
        WARN_ON(!list_empty(&worker->work_list));
        kfree(worker);
}
EXPORT_SYMBOL(kthread_destroy_worker);

/**
 * kthread_use_mm - make the calling kthread operate on an address space
 * @mm: address space to operate on
 */
void kthread_use_mm(struct mm_struct *mm)
{
        struct mm_struct *active_mm;
        struct task_struct *tsk = current;

        WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
        WARN_ON_ONCE(tsk->mm);

        /*
         * It is possible for mm to be the same as tsk->active_mm, but
         * we must still mmgrab(mm) and mmdrop_lazy_tlb(active_mm),
         * because these references are not equivalent.
         */
        mmgrab(mm);

        task_lock(tsk);
        /* Hold off tlb flush IPIs while switching mm's */
        local_irq_disable();
        active_mm = tsk->active_mm;
        tsk->active_mm = mm;
        tsk->mm = mm;
        membarrier_update_current_mm(mm);
        switch_mm_irqs_off(active_mm, mm, tsk);
        local_irq_enable();
        task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
        finish_arch_post_lock_switch();
#endif

        /*
         * When a kthread starts operating on an address space, the loop
         * in membarrier_{private,global}_expedited() may not observe
         * that tsk->mm, and not issue an IPI. Membarrier requires a
         * memory barrier after storing to tsk->mm, before accessing
         * user-space memory. A full memory barrier for membarrier
         * {PRIVATE,GLOBAL}_EXPEDITED is implicitly provided by
         * mmdrop_lazy_tlb().
         */
        mmdrop_lazy_tlb(active_mm);
}
EXPORT_SYMBOL_GPL(kthread_use_mm);

/**
 * kthread_unuse_mm - reverse the effect of kthread_use_mm()
 * @mm: address space to operate on
 */
void kthread_unuse_mm(struct mm_struct *mm)
{
        struct task_struct *tsk = current;

        WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
        WARN_ON_ONCE(!tsk->mm);

        task_lock(tsk);
        /*
         * When a kthread stops operating on an address space, the loop
         * in membarrier_{private,global}_expedited() may not observe
         * that tsk->mm, and not issue an IPI. Membarrier requires a
         * memory barrier after accessing user-space memory, before
         * clearing tsk->mm.
         */
        smp_mb__after_spinlock();
        local_irq_disable();
        tsk->mm = NULL;
        membarrier_update_current_mm(NULL);
        mmgrab_lazy_tlb(mm);
        /* active_mm is still 'mm' */
        enter_lazy_tlb(mm, tsk);
        local_irq_enable();
        task_unlock(tsk);

        mmdrop(mm);
}
EXPORT_SYMBOL_GPL(kthread_unuse_mm);

#ifdef CONFIG_BLK_CGROUP
/**
 * kthread_associate_blkcg - associate blkcg to current kthread
 * @css: the cgroup info
 *
 * Current thread must be a kthread. The thread is running jobs on behalf of
 * other threads. In some cases, we expect the jobs attach cgroup info of
 * original threads instead of that of current thread. This function stores
 * original thread's cgroup info in current kthread context for later
 * retrieval.
 */
void kthread_associate_blkcg(struct cgroup_subsys_state *css)
{
        struct kthread *kthread;

        if (!(current->flags & PF_KTHREAD))
                return;
        kthread = to_kthread(current);
        if (!kthread)
                return;

        if (kthread->blkcg_css) {
                css_put(kthread->blkcg_css);
                kthread->blkcg_css = NULL;
        }
        if (css) {
                css_get(css);
                kthread->blkcg_css = css;
        }
}
EXPORT_SYMBOL(kthread_associate_blkcg);

/**
 * kthread_blkcg - get associated blkcg css of current kthread
 *
 * Current thread must be a kthread.
 */
struct cgroup_subsys_state *kthread_blkcg(void)
{
        struct kthread *kthread;

        if (current->flags & PF_KTHREAD) {
                kthread = to_kthread(current);
                if (kthread)
                        return kthread->blkcg_css;
        }
        return NULL;
}
#endif

















































































































































































































































































































































































































































































































































































































   94 
   94 

   94 

   94 
   95 
   95 








   95 


   95 

















   94 













































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
// SPDX-License-Identifier: GPL-2.0-only
/*
 * AppArmor security module
 *
 * This file contains AppArmor mediation of files
 *
 * Copyright (C) 1998-2008 Novell/SUSE
 * Copyright 2009-2010 Canonical Ltd.
 */

#include <linux/tty.h>
#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mount.h>

#include "include/apparmor.h"
#include "include/audit.h"
#include "include/cred.h"
#include "include/file.h"
#include "include/match.h"
#include "include/net.h"
#include "include/path.h"
#include "include/policy.h"
#include "include/label.h"

static u32 map_mask_to_chr_mask(u32 mask)
{
        u32 m = mask & PERMS_CHRS_MASK;

        if (mask & AA_MAY_GETATTR)
                m |= MAY_READ;
        if (mask & (AA_MAY_SETATTR | AA_MAY_CHMOD | AA_MAY_CHOWN))
                m |= MAY_WRITE;

        return m;
}

/**
 * file_audit_cb - call back for file specific audit fields
 * @ab: audit_buffer  (NOT NULL)
 * @va: audit struct to audit values of  (NOT NULL)
 */
static void file_audit_cb(struct audit_buffer *ab, void *va)
{
        struct common_audit_data *sa = va;
        struct apparmor_audit_data *ad = aad(sa);
        kuid_t fsuid = ad->subj_cred ? ad->subj_cred->fsuid : current_fsuid();
        char str[10];

        if (ad->request & AA_AUDIT_FILE_MASK) {
                aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs,
                                    map_mask_to_chr_mask(ad->request));
                audit_log_format(ab, " requested_mask=\"%s\"", str);
        }
        if (ad->denied & AA_AUDIT_FILE_MASK) {
                aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs,
                                    map_mask_to_chr_mask(ad->denied));
                audit_log_format(ab, " denied_mask=\"%s\"", str);
        }
        if (ad->request & AA_AUDIT_FILE_MASK) {
                audit_log_format(ab, " fsuid=%d",
                                 from_kuid(&init_user_ns, fsuid));
                audit_log_format(ab, " ouid=%d",
                                 from_kuid(&init_user_ns, ad->fs.ouid));
        }

        if (ad->peer) {
                audit_log_format(ab, " target=");
                aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer,
                                FLAG_VIEW_SUBNS, GFP_KERNEL);
        } else if (ad->fs.target) {
                audit_log_format(ab, " target=");
                audit_log_untrustedstring(ab, ad->fs.target);
        }
}

/**
 * aa_audit_file - handle the auditing of file operations
 * @subj_cred: cred of the subject
 * @profile: the profile being enforced  (NOT NULL)
 * @perms: the permissions computed for the request (NOT NULL)
 * @op: operation being mediated
 * @request: permissions requested
 * @name: name of object being mediated (MAYBE NULL)
 * @target: name of target (MAYBE NULL)
 * @tlabel: target label (MAY BE NULL)
 * @ouid: object uid
 * @info: extra information message (MAYBE NULL)
 * @error: 0 if operation allowed else failure error code
 *
 * Returns: %0 or error on failure
 */
int aa_audit_file(const struct cred *subj_cred,
                  struct aa_profile *profile, struct aa_perms *perms,
                  const char *op, u32 request, const char *name,
                  const char *target, struct aa_label *tlabel,
                  kuid_t ouid, const char *info, int error)
{
        int type = AUDIT_APPARMOR_AUTO;
        DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op);

        ad.subj_cred = subj_cred;
        ad.request = request;
        ad.name = name;
        ad.fs.target = target;
        ad.peer = tlabel;
        ad.fs.ouid = ouid;
        ad.info = info;
        ad.error = error;
        ad.common.u.tsk = NULL;

        if (likely(!ad.error)) {
                u32 mask = perms->audit;

                if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL))
                        mask = 0xffff;

                /* mask off perms that are not being force audited */
                ad.request &= mask;

                if (likely(!ad.request))
                        return 0;
                type = AUDIT_APPARMOR_AUDIT;
        } else {
                /* only report permissions that were denied */
                ad.request = ad.request & ~perms->allow;
                AA_BUG(!ad.request);

                if (ad.request & perms->kill)
                        type = AUDIT_APPARMOR_KILL;

                /* quiet known rejects, assumes quiet and kill do not overlap */
                if ((ad.request & perms->quiet) &&
                    AUDIT_MODE(profile) != AUDIT_NOQUIET &&
                    AUDIT_MODE(profile) != AUDIT_ALL)
                        ad.request &= ~perms->quiet;

                if (!ad.request)
                        return ad.error;
        }

        ad.denied = ad.request & ~perms->allow;
        return aa_audit(type, profile, &ad, file_audit_cb);
}

/**
 * is_deleted - test if a file has been completely unlinked
 * @dentry: dentry of file to test for deletion  (NOT NULL)
 *
 * Returns: true if deleted else false
 */
static inline bool is_deleted(struct dentry *dentry)
{
        if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0)
                return true;
        return false;
}

static int path_name(const char *op, const struct cred *subj_cred,
                     struct aa_label *label,
                     const struct path *path, int flags, char *buffer,
                     const char **name, struct path_cond *cond, u32 request)
{
        struct aa_profile *profile;
        const char *info = NULL;
        int error;

        error = aa_path_name(path, flags, buffer, name, &info,
                             labels_profile(label)->disconnected);
        if (error) {
                fn_for_each_confined(label, profile,
                        aa_audit_file(subj_cred,
                                      profile, &nullperms, op, request, *name,
                                      NULL, NULL, cond->uid, info, error));
                return error;
        }

        return 0;
}

struct aa_perms default_perms = {};
/**
 * aa_lookup_fperms - convert dfa compressed perms to internal perms
 * @file_rules: the aa_policydb to lookup perms for  (NOT NULL)
 * @state: state in dfa
 * @cond:  conditions to consider  (NOT NULL)
 *
 * TODO: convert from dfa + state to permission entry
 *
 * Returns: a pointer to a file permission set
 */
struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
                                 aa_state_t state, struct path_cond *cond)
{
        unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state];

        if (!(file_rules->perms))
                return &default_perms;

        if (uid_eq(current_fsuid(), cond->uid))
                return &(file_rules->perms[index]);

        return &(file_rules->perms[index + 1]);
}

/**
 * aa_str_perms - find permission that match @name
 * @file_rules: the aa_policydb to match against  (NOT NULL)
 * @start: state to start matching in
 * @name: string to match against dfa  (NOT NULL)
 * @cond: conditions to consider for permission set computation  (NOT NULL)
 * @perms: Returns - the permissions found when matching @name
 *
 * Returns: the final state in @dfa when beginning @start and walking @name
 */
aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
                        const char *name, struct path_cond *cond,
                        struct aa_perms *perms)
{
        aa_state_t state;
        state = aa_dfa_match(file_rules->dfa, start, name);
        *perms = *(aa_lookup_fperms(file_rules, state, cond));

        return state;
}

static int __aa_path_perm(const char *op, const struct cred *subj_cred,
                          struct aa_profile *profile, const char *name,
                          u32 request, struct path_cond *cond, int flags,
                          struct aa_perms *perms)
{
        struct aa_ruleset *rules = list_first_entry(&profile->rules,
                                                    typeof(*rules), list);
        int e = 0;

        if (profile_unconfined(profile))
                return 0;
        aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE],
                     name, cond, perms);
        if (request & ~perms->allow)
                e = -EACCES;
        return aa_audit_file(subj_cred,
                             profile, perms, op, request, name, NULL, NULL,
                             cond->uid, NULL, e);
}


static int profile_path_perm(const char *op, const struct cred *subj_cred,
                             struct aa_profile *profile,
                             const struct path *path, char *buffer, u32 request,
                             struct path_cond *cond, int flags,
                             struct aa_perms *perms)
{
        const char *name;
        int error;

        if (profile_unconfined(profile))
                return 0;

        error = path_name(op, subj_cred, &profile->label, path,
                          flags | profile->path_flags, buffer, &name, cond,
                          request);
        if (error)
                return error;
        return __aa_path_perm(op, subj_cred, profile, name, request, cond,
                              flags, perms);
}

/**
 * aa_path_perm - do permissions check & audit for @path
 * @op: operation being checked
 * @subj_cred: subject cred
 * @label: profile being enforced  (NOT NULL)
 * @path: path to check permissions of  (NOT NULL)
 * @flags: any additional path flags beyond what the profile specifies
 * @request: requested permissions
 * @cond: conditional info for this request  (NOT NULL)
 *
 * Returns: %0 else error if access denied or other error
 */
int aa_path_perm(const char *op, const struct cred *subj_cred,
                 struct aa_label *label,
                 const struct path *path, int flags, u32 request,
                 struct path_cond *cond)
{
        struct aa_perms perms = {};
        struct aa_profile *profile;
        char *buffer = NULL;
        int error;

        flags |= PATH_DELEGATE_DELETED | (S_ISDIR(cond->mode) ? PATH_IS_DIR :
                                                                0);
        buffer = aa_get_buffer(false);
        if (!buffer)
                return -ENOMEM;
        error = fn_for_each_confined(label, profile,
                        profile_path_perm(op, subj_cred, profile, path, buffer,
                                          request, cond, flags, &perms));

        aa_put_buffer(buffer);

        return error;
}

/**
 * xindex_is_subset - helper for aa_path_link
 * @link: link permission set
 * @target: target permission set
 *
 * test target x permissions are equal OR a subset of link x permissions
 * this is done as part of the subset test, where a hardlink must have
 * a subset of permissions that the target has.
 *
 * Returns: true if subset else false
 */
static inline bool xindex_is_subset(u32 link, u32 target)
{
        if (((link & ~AA_X_UNSAFE) != (target & ~AA_X_UNSAFE)) ||
            ((link & AA_X_UNSAFE) && !(target & AA_X_UNSAFE)))
                return false;

        return true;
}

static int profile_path_link(const struct cred *subj_cred,
                             struct aa_profile *profile,
                             const struct path *link, char *buffer,
                             const struct path *target, char *buffer2,
                             struct path_cond *cond)
{
        struct aa_ruleset *rules = list_first_entry(&profile->rules,
                                                    typeof(*rules), list);
        const char *lname, *tname = NULL;
        struct aa_perms lperms = {}, perms;
        const char *info = NULL;
        u32 request = AA_MAY_LINK;
        aa_state_t state;
        int error;

        error = path_name(OP_LINK, subj_cred, &profile->label, link,
                          profile->path_flags,
                          buffer, &lname, cond, AA_MAY_LINK);
        if (error)
                goto audit;

        /* buffer2 freed below, tname is pointer in buffer2 */
        error = path_name(OP_LINK, subj_cred, &profile->label, target,
                          profile->path_flags,
                          buffer2, &tname, cond, AA_MAY_LINK);
        if (error)
                goto audit;

        error = -EACCES;
        /* aa_str_perms - handles the case of the dfa being NULL */
        state = aa_str_perms(rules->file,
                             rules->file->start[AA_CLASS_FILE], lname,
                             cond, &lperms);

        if (!(lperms.allow & AA_MAY_LINK))
                goto audit;

        /* test to see if target can be paired with link */
        state = aa_dfa_null_transition(rules->file->dfa, state);
        aa_str_perms(rules->file, state, tname, cond, &perms);

        /* force audit/quiet masks for link are stored in the second entry
         * in the link pair.
         */
        lperms.audit = perms.audit;
        lperms.quiet = perms.quiet;
        lperms.kill = perms.kill;

        if (!(perms.allow & AA_MAY_LINK)) {
                info = "target restricted";
                lperms = perms;
                goto audit;
        }

        /* done if link subset test is not required */
        if (!(perms.allow & AA_LINK_SUBSET))
                goto done_tests;

        /* Do link perm subset test requiring allowed permission on link are
         * a subset of the allowed permissions on target.
         */
        aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE],
                     tname, cond, &perms);

        /* AA_MAY_LINK is not considered in the subset test */
        request = lperms.allow & ~AA_MAY_LINK;
        lperms.allow &= perms.allow | AA_MAY_LINK;

        request |= AA_AUDIT_FILE_MASK & (lperms.allow & ~perms.allow);
        if (request & ~lperms.allow) {
                goto audit;
        } else if ((lperms.allow & MAY_EXEC) &&
                   !xindex_is_subset(lperms.xindex, perms.xindex)) {
                lperms.allow &= ~MAY_EXEC;
                request |= MAY_EXEC;
                info = "link not subset of target";
                goto audit;
        }

done_tests:
        error = 0;

audit:
        return aa_audit_file(subj_cred,
                             profile, &lperms, OP_LINK, request, lname, tname,
                             NULL, cond->uid, info, error);
}

/**
 * aa_path_link - Handle hard link permission check
 * @subj_cred: subject cred
 * @label: the label being enforced  (NOT NULL)
 * @old_dentry: the target dentry  (NOT NULL)
 * @new_dir: directory the new link will be created in  (NOT NULL)
 * @new_dentry: the link being created  (NOT NULL)
 *
 * Handle the permission test for a link & target pair.  Permission
 * is encoded as a pair where the link permission is determined
 * first, and if allowed, the target is tested.  The target test
 * is done from the point of the link match (not start of DFA)
 * making the target permission dependent on the link permission match.
 *
 * The subset test if required forces that permissions granted
 * on link are a subset of the permission granted to target.
 *
 * Returns: %0 if allowed else error
 */
int aa_path_link(const struct cred *subj_cred,
                 struct aa_label *label, struct dentry *old_dentry,
                 const struct path *new_dir, struct dentry *new_dentry)
{
        struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry };
        struct path target = { .mnt = new_dir->mnt, .dentry = old_dentry };
        struct path_cond cond = {
                d_backing_inode(old_dentry)->i_uid,
                d_backing_inode(old_dentry)->i_mode
        };
        char *buffer = NULL, *buffer2 = NULL;
        struct aa_profile *profile;
        int error;

        /* buffer freed below, lname is pointer in buffer */
        buffer = aa_get_buffer(false);
        buffer2 = aa_get_buffer(false);
        error = -ENOMEM;
        if (!buffer || !buffer2)
                goto out;

        error = fn_for_each_confined(label, profile,
                        profile_path_link(subj_cred, profile, &link, buffer,
                                          &target, buffer2, &cond));
out:
        aa_put_buffer(buffer);
        aa_put_buffer(buffer2);
        return error;
}

static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label,
                            u32 request)
{
        struct aa_label *l, *old;

        /* update caching of label on file_ctx */
        spin_lock(&fctx->lock);
        old = rcu_dereference_protected(fctx->label,
                                        lockdep_is_held(&fctx->lock));
        l = aa_label_merge(old, label, GFP_ATOMIC);
        if (l) {
                if (l != old) {
                        rcu_assign_pointer(fctx->label, l);
                        aa_put_label(old);
                } else
                        aa_put_label(l);
                fctx->allow |= request;
        }
        spin_unlock(&fctx->lock);
}

static int __file_path_perm(const char *op, const struct cred *subj_cred,
                            struct aa_label *label,
                            struct aa_label *flabel, struct file *file,
                            u32 request, u32 denied, bool in_atomic)
{
        struct aa_profile *profile;
        struct aa_perms perms = {};
        vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(file),
                                            file_inode(file));
        struct path_cond cond = {
                .uid = vfsuid_into_kuid(vfsuid),
                .mode = file_inode(file)->i_mode
        };
        char *buffer;
        int flags, error;

        /* revalidation due to label out of date. No revocation at this time */
        if (!denied && aa_label_is_subset(flabel, label))
                /* TODO: check for revocation on stale profiles */
                return 0;

        flags = PATH_DELEGATE_DELETED | (S_ISDIR(cond.mode) ? PATH_IS_DIR : 0);
        buffer = aa_get_buffer(in_atomic);
        if (!buffer)
                return -ENOMEM;

        /* check every profile in task label not in current cache */
        error = fn_for_each_not_in_set(flabel, label, profile,
                        profile_path_perm(op, subj_cred, profile,
                                          &file->f_path, buffer,
                                          request, &cond, flags, &perms));
        if (denied && !error) {
                /*
                 * check every profile in file label that was not tested
                 * in the initial check above.
                 *
                 * TODO: cache full perms so this only happens because of
                 * conditionals
                 * TODO: don't audit here
                 */
                if (label == flabel)
                        error = fn_for_each(label, profile,
                                profile_path_perm(op, subj_cred,
                                                  profile, &file->f_path,
                                                  buffer, request, &cond, flags,
                                                  &perms));
                else
                        error = fn_for_each_not_in_set(label, flabel, profile,
                                profile_path_perm(op, subj_cred,
                                                  profile, &file->f_path,
                                                  buffer, request, &cond, flags,
                                                  &perms));
        }
        if (!error)
                update_file_ctx(file_ctx(file), label, request);

        aa_put_buffer(buffer);

        return error;
}

static int __file_sock_perm(const char *op, const struct cred *subj_cred,
                            struct aa_label *label,
                            struct aa_label *flabel, struct file *file,
                            u32 request, u32 denied)
{
        struct socket *sock = (struct socket *) file->private_data;
        int error;

        AA_BUG(!sock);

        /* revalidation due to label out of date. No revocation at this time */
        if (!denied && aa_label_is_subset(flabel, label))
                return 0;

        /* TODO: improve to skip profiles cached in flabel */
        error = aa_sock_file_perm(subj_cred, label, op, request, sock);
        if (denied) {
                /* TODO: improve to skip profiles checked above */
                /* check every profile in file label to is cached */
                last_error(error, aa_sock_file_perm(subj_cred, flabel, op,
                                                    request, sock));
        }
        if (!error)
                update_file_ctx(file_ctx(file), label, request);

        return error;
}

/**
 * aa_file_perm - do permission revalidation check & audit for @file
 * @op: operation being checked
 * @subj_cred: subject cred
 * @label: label being enforced   (NOT NULL)
 * @file: file to revalidate access permissions on  (NOT NULL)
 * @request: requested permissions
 * @in_atomic: whether allocations need to be done in atomic context
 *
 * Returns: %0 if access allowed else error
 */
int aa_file_perm(const char *op, const struct cred *subj_cred,
                 struct aa_label *label, struct file *file,
                 u32 request, bool in_atomic)
{
        struct aa_file_ctx *fctx;
        struct aa_label *flabel;
        u32 denied;
        int error = 0;

        AA_BUG(!label);
        AA_BUG(!file);

        fctx = file_ctx(file);

        rcu_read_lock();
        flabel  = rcu_dereference(fctx->label);
        AA_BUG(!flabel);

        /* revalidate access, if task is unconfined, or the cached cred
         * doesn't match or if the request is for more permissions than
         * was granted.
         *
         * Note: the test for !unconfined(flabel) is to handle file
         *       delegation from unconfined tasks
         */
        denied = request & ~fctx->allow;
        if (unconfined(label) || unconfined(flabel) ||
            (!denied && aa_label_is_subset(flabel, label))) {
                rcu_read_unlock();
                goto done;
        }

        flabel  = aa_get_newest_label(flabel);
        rcu_read_unlock();
        /* TODO: label cross check */

        if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry))
                error = __file_path_perm(op, subj_cred, label, flabel, file,
                                         request, denied, in_atomic);

        else if (S_ISSOCK(file_inode(file)->i_mode))
                error = __file_sock_perm(op, subj_cred, label, flabel, file,
                                         request, denied);
        aa_put_label(flabel);

done:
        return error;
}

static void revalidate_tty(const struct cred *subj_cred, struct aa_label *label)
{
        struct tty_struct *tty;
        int drop_tty = 0;

        tty = get_current_tty();
        if (!tty)
                return;

        spin_lock(&tty->files_lock);
        if (!list_empty(&tty->tty_files)) {
                struct tty_file_private *file_priv;
                struct file *file;
                /* TODO: Revalidate access to controlling tty. */
                file_priv = list_first_entry(&tty->tty_files,
                                             struct tty_file_private, list);
                file = file_priv->file;

                if (aa_file_perm(OP_INHERIT, subj_cred, label, file,
                                 MAY_READ | MAY_WRITE, IN_ATOMIC))
                        drop_tty = 1;
        }
        spin_unlock(&tty->files_lock);
        tty_kref_put(tty);

        if (drop_tty)
                no_tty();
}

struct cred_label {
        const struct cred *cred;
        struct aa_label *label;
};

static int match_file(const void *p, struct file *file, unsigned int fd)
{
        struct cred_label *cl = (struct cred_label *)p;

        if (aa_file_perm(OP_INHERIT, cl->cred, cl->label, file,
                         aa_map_file_to_perms(file), IN_ATOMIC))
                return fd + 1;
        return 0;
}


/* based on selinux's flush_unauthorized_files */
void aa_inherit_files(const struct cred *cred, struct files_struct *files)
{
        struct aa_label *label = aa_get_newest_cred_label(cred);
        struct cred_label cl = {
                .cred = cred,
                .label = label,
        };
        struct file *devnull = NULL;
        unsigned int n;

        revalidate_tty(cred, label);

        /* Revalidate access to inherited open files. */
        n = iterate_fd(files, 0, match_file, &cl);
        if (!n) /* none found? */
                goto out;

        devnull = dentry_open(&aa_null, O_RDWR, cred);
        if (IS_ERR(devnull))
                devnull = NULL;
        /* replace all the matching ones with this */
        do {
                replace_fd(n - 1, devnull, 0);
        } while ((n = iterate_fd(files, n, match_file, &cl)) != 0);
        if (devnull)
                fput(devnull);
out:
        aa_put_label(label);
}



































































   54 





















   54 




































































   54 






   54 
















   16 








   16 


















   17 



























   20 








   20 
































   20 
































   20 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PAGE_REF_H
#define _LINUX_PAGE_REF_H

#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
#include <linux/tracepoint-defs.h>

DECLARE_TRACEPOINT(page_ref_set);
DECLARE_TRACEPOINT(page_ref_mod);
DECLARE_TRACEPOINT(page_ref_mod_and_test);
DECLARE_TRACEPOINT(page_ref_mod_and_return);
DECLARE_TRACEPOINT(page_ref_mod_unless);
DECLARE_TRACEPOINT(page_ref_freeze);
DECLARE_TRACEPOINT(page_ref_unfreeze);

#ifdef CONFIG_DEBUG_PAGE_REF

/*
 * Ideally we would want to use the trace_<tracepoint>_enabled() helper
 * functions. But due to include header file issues, that is not
 * feasible. Instead we have to open code the static key functions.
 *
 * See trace_##name##_enabled(void) in include/linux/tracepoint.h
 */
#define page_ref_tracepoint_active(t) tracepoint_enabled(t)

extern void __page_ref_set(struct page *page, int v);
extern void __page_ref_mod(struct page *page, int v);
extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
extern void __page_ref_mod_unless(struct page *page, int v, int u);
extern void __page_ref_freeze(struct page *page, int v, int ret);
extern void __page_ref_unfreeze(struct page *page, int v);

#else

#define page_ref_tracepoint_active(t) false

static inline void __page_ref_set(struct page *page, int v)
{
}
static inline void __page_ref_mod(struct page *page, int v)
{
}
static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
{
}
static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
{
}
static inline void __page_ref_mod_unless(struct page *page, int v, int u)
{
}
static inline void __page_ref_freeze(struct page *page, int v, int ret)
{
}
static inline void __page_ref_unfreeze(struct page *page, int v)
{
}

#endif

static inline int page_ref_count(const struct page *page)
{
        return atomic_read(&page->_refcount);
}

/**
 * folio_ref_count - The reference count on this folio.
 * @folio: The folio.
 *
 * The refcount is usually incremented by calls to folio_get() and
 * decremented by calls to folio_put().  Some typical users of the
 * folio refcount:
 *
 * - Each reference from a page table
 * - The page cache
 * - Filesystem private data
 * - The LRU list
 * - Pipes
 * - Direct IO which references this page in the process address space
 *
 * Return: The number of references to this folio.
 */
static inline int folio_ref_count(const struct folio *folio)
{
        return page_ref_count(&folio->page);
}

static inline int page_count(const struct page *page)
{
        return folio_ref_count(page_folio(page));
}

static inline void set_page_count(struct page *page, int v)
{
        atomic_set(&page->_refcount, v);
        if (page_ref_tracepoint_active(page_ref_set))
                __page_ref_set(page, v);
}

static inline void folio_set_count(struct folio *folio, int v)
{
        set_page_count(&folio->page, v);
}

/*
 * Setup the page count before being freed into the page allocator for
 * the first time (boot or memory hotplug)
 */
static inline void init_page_count(struct page *page)
{
        set_page_count(page, 1);
}

static inline void page_ref_add(struct page *page, int nr)
{
        atomic_add(nr, &page->_refcount);
        if (page_ref_tracepoint_active(page_ref_mod))
                __page_ref_mod(page, nr);
}

static inline void folio_ref_add(struct folio *folio, int nr)
{
        page_ref_add(&folio->page, nr);
}

static inline void page_ref_sub(struct page *page, int nr)
{
        atomic_sub(nr, &page->_refcount);
        if (page_ref_tracepoint_active(page_ref_mod))
                __page_ref_mod(page, -nr);
}

static inline void folio_ref_sub(struct folio *folio, int nr)
{
        page_ref_sub(&folio->page, nr);
}

static inline int page_ref_sub_return(struct page *page, int nr)
{
        int ret = atomic_sub_return(nr, &page->_refcount);

        if (page_ref_tracepoint_active(page_ref_mod_and_return))
                __page_ref_mod_and_return(page, -nr, ret);
        return ret;
}

static inline int folio_ref_sub_return(struct folio *folio, int nr)
{
        return page_ref_sub_return(&folio->page, nr);
}

static inline void page_ref_inc(struct page *page)
{
        atomic_inc(&page->_refcount);
        if (page_ref_tracepoint_active(page_ref_mod))
                __page_ref_mod(page, 1);
}

static inline void folio_ref_inc(struct folio *folio)
{
        page_ref_inc(&folio->page);
}

static inline void page_ref_dec(struct page *page)
{
        atomic_dec(&page->_refcount);
        if (page_ref_tracepoint_active(page_ref_mod))
                __page_ref_mod(page, -1);
}

static inline void folio_ref_dec(struct folio *folio)
{
        page_ref_dec(&folio->page);
}

static inline int page_ref_sub_and_test(struct page *page, int nr)
{
        int ret = atomic_sub_and_test(nr, &page->_refcount);

        if (page_ref_tracepoint_active(page_ref_mod_and_test))
                __page_ref_mod_and_test(page, -nr, ret);
        return ret;
}

static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
{
        return page_ref_sub_and_test(&folio->page, nr);
}

static inline int page_ref_inc_return(struct page *page)
{
        int ret = atomic_inc_return(&page->_refcount);

        if (page_ref_tracepoint_active(page_ref_mod_and_return))
                __page_ref_mod_and_return(page, 1, ret);
        return ret;
}

static inline int folio_ref_inc_return(struct folio *folio)
{
        return page_ref_inc_return(&folio->page);
}

static inline int page_ref_dec_and_test(struct page *page)
{
        int ret = atomic_dec_and_test(&page->_refcount);

        if (page_ref_tracepoint_active(page_ref_mod_and_test))
                __page_ref_mod_and_test(page, -1, ret);
        return ret;
}

static inline int folio_ref_dec_and_test(struct folio *folio)
{
        return page_ref_dec_and_test(&folio->page);
}

static inline int page_ref_dec_return(struct page *page)
{
        int ret = atomic_dec_return(&page->_refcount);

        if (page_ref_tracepoint_active(page_ref_mod_and_return))
                __page_ref_mod_and_return(page, -1, ret);
        return ret;
}

static inline int folio_ref_dec_return(struct folio *folio)
{
        return page_ref_dec_return(&folio->page);
}

static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
        bool ret = atomic_add_unless(&page->_refcount, nr, u);

        if (page_ref_tracepoint_active(page_ref_mod_unless))
                __page_ref_mod_unless(page, nr, ret);
        return ret;
}

static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
{
        return page_ref_add_unless(&folio->page, nr, u);
}

/**
 * folio_try_get - Attempt to increase the refcount on a folio.
 * @folio: The folio.
 *
 * If you do not already have a reference to a folio, you can attempt to
 * get one using this function.  It may fail if, for example, the folio
 * has been freed since you found a pointer to it, or it is frozen for
 * the purposes of splitting or migration.
 *
 * Return: True if the reference count was successfully incremented.
 */
static inline bool folio_try_get(struct folio *folio)
{
        return folio_ref_add_unless(folio, 1, 0);
}

static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
{
#ifdef CONFIG_TINY_RCU
        /*
         * The caller guarantees the folio will not be freed from interrupt
         * context, so (on !SMP) we only need preemption to be disabled
         * and TINY_RCU does that for us.
         */
# ifdef CONFIG_PREEMPT_COUNT
        VM_BUG_ON(!in_atomic() && !irqs_disabled());
# endif
        VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
        folio_ref_add(folio, count);
#else
        if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
                /* Either the folio has been freed, or will be freed. */
                return false;
        }
#endif
        return true;
}

/**
 * folio_try_get_rcu - Attempt to increase the refcount on a folio.
 * @folio: The folio.
 *
 * This is a version of folio_try_get() optimised for non-SMP kernels.
 * If you are still holding the rcu_read_lock() after looking up the
 * page and know that the page cannot have its refcount decreased to
 * zero in interrupt context, you can use this instead of folio_try_get().
 *
 * Example users include get_user_pages_fast() (as pages are not unmapped
 * from interrupt context) and the page cache lookups (as pages are not
 * truncated from interrupt context).  We also know that pages are not
 * frozen in interrupt context for the purposes of splitting or migration.
 *
 * You can also use this function if you're holding a lock that prevents
 * pages being frozen & removed; eg the i_pages lock for the page cache
 * or the mmap_lock or page table lock for page tables.  In this case,
 * it will always succeed, and you could have used a plain folio_get(),
 * but it's sometimes more convenient to have a common function called
 * from both locked and RCU-protected contexts.
 *
 * Return: True if the reference count was successfully incremented.
 */
static inline bool folio_try_get_rcu(struct folio *folio)
{
        return folio_ref_try_add_rcu(folio, 1);
}

static inline int page_ref_freeze(struct page *page, int count)
{
        int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);

        if (page_ref_tracepoint_active(page_ref_freeze))
                __page_ref_freeze(page, count, ret);
        return ret;
}

static inline int folio_ref_freeze(struct folio *folio, int count)
{
        return page_ref_freeze(&folio->page, count);
}

static inline void page_ref_unfreeze(struct page *page, int count)
{
        VM_BUG_ON_PAGE(page_count(page) != 0, page);
        VM_BUG_ON(count == 0);

        atomic_set_release(&page->_refcount, count);
        if (page_ref_tracepoint_active(page_ref_unfreeze))
                __page_ref_unfreeze(page, count);
}

static inline void folio_ref_unfreeze(struct folio *folio, int count)
{
        page_ref_unfreeze(&folio->page, count);
}
#endif






























































































































































    1 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/export.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/fs_struct.h>
#include "internal.h"

/*
 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
 * It can block.
 */
void set_fs_root(struct fs_struct *fs, const struct path *path)
{
        struct path old_root;

        path_get(path);
        spin_lock(&fs->lock);
        write_seqcount_begin(&fs->seq);
        old_root = fs->root;
        fs->root = *path;
        write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
        if (old_root.dentry)
                path_put(&old_root);
}

/*
 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
 * It can block.
 */
void set_fs_pwd(struct fs_struct *fs, const struct path *path)
{
        struct path old_pwd;

        path_get(path);
        spin_lock(&fs->lock);
        write_seqcount_begin(&fs->seq);
        old_pwd = fs->pwd;
        fs->pwd = *path;
        write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);

        if (old_pwd.dentry)
                path_put(&old_pwd);
}

static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
{
        if (likely(p->dentry != old->dentry || p->mnt != old->mnt))
                return 0;
        *p = *new;
        return 1;
}

void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
{
        struct task_struct *g, *p;
        struct fs_struct *fs;
        int count = 0;

        read_lock(&tasklist_lock);
        for_each_process_thread(g, p) {
                task_lock(p);
                fs = p->fs;
                if (fs) {
                        int hits = 0;
                        spin_lock(&fs->lock);
                        write_seqcount_begin(&fs->seq);
                        hits += replace_path(&fs->root, old_root, new_root);
                        hits += replace_path(&fs->pwd, old_root, new_root);
                        write_seqcount_end(&fs->seq);
                        while (hits--) {
                                count++;
                                path_get(new_root);
                        }
                        spin_unlock(&fs->lock);
                }
                task_unlock(p);
        }
        read_unlock(&tasklist_lock);
        while (count--)
                path_put(old_root);
}

void free_fs_struct(struct fs_struct *fs)
{
        path_put(&fs->root);
        path_put(&fs->pwd);
        kmem_cache_free(fs_cachep, fs);
}

void exit_fs(struct task_struct *tsk)
{
        struct fs_struct *fs = tsk->fs;

        if (fs) {
                int kill;
                task_lock(tsk);
                spin_lock(&fs->lock);
                tsk->fs = NULL;
                kill = !--fs->users;
                spin_unlock(&fs->lock);
                task_unlock(tsk);
                if (kill)
                        free_fs_struct(fs);
        }
}

struct fs_struct *copy_fs_struct(struct fs_struct *old)
{
        struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
        /* We don't need to lock fs - think why ;-) */
        if (fs) {
                fs->users = 1;
                fs->in_exec = 0;
                spin_lock_init(&fs->lock);
                seqcount_spinlock_init(&fs->seq, &fs->lock);
                fs->umask = old->umask;

                spin_lock(&old->lock);
                fs->root = old->root;
                path_get(&fs->root);
                fs->pwd = old->pwd;
                path_get(&fs->pwd);
                spin_unlock(&old->lock);
        }
        return fs;
}

int unshare_fs_struct(void)
{
        struct fs_struct *fs = current->fs;
        struct fs_struct *new_fs = copy_fs_struct(fs);
        int kill;

        if (!new_fs)
                return -ENOMEM;

        task_lock(current);
        spin_lock(&fs->lock);
        kill = !--fs->users;
        current->fs = new_fs;
        spin_unlock(&fs->lock);
        task_unlock(current);

        if (kill)
                free_fs_struct(fs);

        return 0;
}
EXPORT_SYMBOL_GPL(unshare_fs_struct);

int current_umask(void)
{
        return current->fs->umask;
}
EXPORT_SYMBOL(current_umask);

/* to be mentioned only in INIT_TASK */
struct fs_struct init_fs = {
        .users                = 1,
        .lock                = __SPIN_LOCK_UNLOCKED(init_fs.lock),
        .seq                = SEQCNT_SPINLOCK_ZERO(init_fs.seq, &init_fs.lock),
        .umask                = 0022,
};































































































































































































































































































































































   10 



   10 
   10 
   10 

   10 











































   18 




   18 


   18 


   18 















   18 










   18 
















































   75 


















   94 


   94 















































































  157 



















  233 





































  233 



  233 




  231 






  233 
































  233 
  233 


  232 


    1 


    1 
    1 










  233 


  233 




  231 











  233 
  233 





















  254 
  253 
  253 












  140 
  140 





































   36 
   36 

































































































































  210 


  210 








  210 
    2 
  209 


  210 




















































   68 

   67 

   64 






   52 




  232 

   52 
   52 





  231 
  128 
  222 
  223 


  231 







































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
// SPDX-License-Identifier: GPL-2.0
/*
 * drivers/usb/core/usb.c
 *
 * (C) Copyright Linus Torvalds 1999
 * (C) Copyright Johannes Erdfelt 1999-2001
 * (C) Copyright Andreas Gal 1999
 * (C) Copyright Gregory P. Smith 1999
 * (C) Copyright Deti Fliegl 1999 (new USB architecture)
 * (C) Copyright Randy Dunlap 2000
 * (C) Copyright David Brownell 2000-2004
 * (C) Copyright Yggdrasil Computing, Inc. 2000
 *     (usb_device_id matching changes by Adam J. Richter)
 * (C) Copyright Greg Kroah-Hartman 2002-2003
 *
 * Released under the GPLv2 only.
 *
 * NOTE! This is not actually a driver at all, rather this is
 * just a collection of helper routines that implement the
 * generic USB things that the real drivers can use..
 *
 * Think of this as a "USB library" rather than anything else,
 * with no callbacks.  Callbacks are evil.
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/of.h>
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/usb/of.h>

#include <asm/io.h>
#include <linux/scatterlist.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>

#include "hub.h"

const char *usbcore_name = "usbcore";

static bool nousb;        /* Disable USB when built into kernel image */

module_param(nousb, bool, 0444);

/*
 * for external read access to <nousb>
 */
int usb_disabled(void)
{
        return nousb;
}
EXPORT_SYMBOL_GPL(usb_disabled);

#ifdef        CONFIG_PM
/* Default delay value, in seconds */
static int usb_autosuspend_delay = CONFIG_USB_AUTOSUSPEND_DELAY;
module_param_named(autosuspend, usb_autosuspend_delay, int, 0644);
MODULE_PARM_DESC(autosuspend, "default autosuspend delay");

#else
#define usb_autosuspend_delay                0
#endif

static bool match_endpoint(struct usb_endpoint_descriptor *epd,
                struct usb_endpoint_descriptor **bulk_in,
                struct usb_endpoint_descriptor **bulk_out,
                struct usb_endpoint_descriptor **int_in,
                struct usb_endpoint_descriptor **int_out)
{
        switch (usb_endpoint_type(epd)) {
        case USB_ENDPOINT_XFER_BULK:
                if (usb_endpoint_dir_in(epd)) {
                        if (bulk_in && !*bulk_in) {
                                *bulk_in = epd;
                                break;
                        }
                } else {
                        if (bulk_out && !*bulk_out) {
                                *bulk_out = epd;
                                break;
                        }
                }

                return false;
        case USB_ENDPOINT_XFER_INT:
                if (usb_endpoint_dir_in(epd)) {
                        if (int_in && !*int_in) {
                                *int_in = epd;
                                break;
                        }
                } else {
                        if (int_out && !*int_out) {
                                *int_out = epd;
                                break;
                        }
                }

                return false;
        default:
                return false;
        }

        return (!bulk_in || *bulk_in) && (!bulk_out || *bulk_out) &&
                        (!int_in || *int_in) && (!int_out || *int_out);
}

/**
 * usb_find_common_endpoints() -- look up common endpoint descriptors
 * @alt:        alternate setting to search
 * @bulk_in:        pointer to descriptor pointer, or NULL
 * @bulk_out:        pointer to descriptor pointer, or NULL
 * @int_in:        pointer to descriptor pointer, or NULL
 * @int_out:        pointer to descriptor pointer, or NULL
 *
 * Search the alternate setting's endpoint descriptors for the first bulk-in,
 * bulk-out, interrupt-in and interrupt-out endpoints and return them in the
 * provided pointers (unless they are NULL).
 *
 * If a requested endpoint is not found, the corresponding pointer is set to
 * NULL.
 *
 * Return: Zero if all requested descriptors were found, or -ENXIO otherwise.
 */
int usb_find_common_endpoints(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in,
                struct usb_endpoint_descriptor **bulk_out,
                struct usb_endpoint_descriptor **int_in,
                struct usb_endpoint_descriptor **int_out)
{
        struct usb_endpoint_descriptor *epd;
        int i;

        if (bulk_in)
                *bulk_in = NULL;
        if (bulk_out)
                *bulk_out = NULL;
        if (int_in)
                *int_in = NULL;
        if (int_out)
                *int_out = NULL;

        for (i = 0; i < alt->desc.bNumEndpoints; ++i) {
                epd = &alt->endpoint[i].desc;

                if (match_endpoint(epd, bulk_in, bulk_out, int_in, int_out))
                        return 0;
        }

        return -ENXIO;
}
EXPORT_SYMBOL_GPL(usb_find_common_endpoints);

/**
 * usb_find_common_endpoints_reverse() -- look up common endpoint descriptors
 * @alt:        alternate setting to search
 * @bulk_in:        pointer to descriptor pointer, or NULL
 * @bulk_out:        pointer to descriptor pointer, or NULL
 * @int_in:        pointer to descriptor pointer, or NULL
 * @int_out:        pointer to descriptor pointer, or NULL
 *
 * Search the alternate setting's endpoint descriptors for the last bulk-in,
 * bulk-out, interrupt-in and interrupt-out endpoints and return them in the
 * provided pointers (unless they are NULL).
 *
 * If a requested endpoint is not found, the corresponding pointer is set to
 * NULL.
 *
 * Return: Zero if all requested descriptors were found, or -ENXIO otherwise.
 */
int usb_find_common_endpoints_reverse(struct usb_host_interface *alt,
                struct usb_endpoint_descriptor **bulk_in,
                struct usb_endpoint_descriptor **bulk_out,
                struct usb_endpoint_descriptor **int_in,
                struct usb_endpoint_descriptor **int_out)
{
        struct usb_endpoint_descriptor *epd;
        int i;

        if (bulk_in)
                *bulk_in = NULL;
        if (bulk_out)
                *bulk_out = NULL;
        if (int_in)
                *int_in = NULL;
        if (int_out)
                *int_out = NULL;

        for (i = alt->desc.bNumEndpoints - 1; i >= 0; --i) {
                epd = &alt->endpoint[i].desc;

                if (match_endpoint(epd, bulk_in, bulk_out, int_in, int_out))
                        return 0;
        }

        return -ENXIO;
}
EXPORT_SYMBOL_GPL(usb_find_common_endpoints_reverse);

/**
 * usb_find_endpoint() - Given an endpoint address, search for the endpoint's
 * usb_host_endpoint structure in an interface's current altsetting.
 * @intf: the interface whose current altsetting should be searched
 * @ep_addr: the endpoint address (number and direction) to find
 *
 * Search the altsetting's list of endpoints for one with the specified address.
 *
 * Return: Pointer to the usb_host_endpoint if found, %NULL otherwise.
 */
static const struct usb_host_endpoint *usb_find_endpoint(
                const struct usb_interface *intf, unsigned int ep_addr)
{
        int n;
        const struct usb_host_endpoint *ep;

        n = intf->cur_altsetting->desc.bNumEndpoints;
        ep = intf->cur_altsetting->endpoint;
        for (; n > 0; (--n, ++ep)) {
                if (ep->desc.bEndpointAddress == ep_addr)
                        return ep;
        }
        return NULL;
}

/**
 * usb_check_bulk_endpoints - Check whether an interface's current altsetting
 * contains a set of bulk endpoints with the given addresses.
 * @intf: the interface whose current altsetting should be searched
 * @ep_addrs: 0-terminated array of the endpoint addresses (number and
 * direction) to look for
 *
 * Search for endpoints with the specified addresses and check their types.
 *
 * Return: %true if all the endpoints are found and are bulk, %false otherwise.
 */
bool usb_check_bulk_endpoints(
                const struct usb_interface *intf, const u8 *ep_addrs)
{
        const struct usb_host_endpoint *ep;

        for (; *ep_addrs; ++ep_addrs) {
                ep = usb_find_endpoint(intf, *ep_addrs);
                if (!ep || !usb_endpoint_xfer_bulk(&ep->desc))
                        return false;
        }
        return true;
}
EXPORT_SYMBOL_GPL(usb_check_bulk_endpoints);

/**
 * usb_check_int_endpoints - Check whether an interface's current altsetting
 * contains a set of interrupt endpoints with the given addresses.
 * @intf: the interface whose current altsetting should be searched
 * @ep_addrs: 0-terminated array of the endpoint addresses (number and
 * direction) to look for
 *
 * Search for endpoints with the specified addresses and check their types.
 *
 * Return: %true if all the endpoints are found and are interrupt,
 * %false otherwise.
 */
bool usb_check_int_endpoints(
                const struct usb_interface *intf, const u8 *ep_addrs)
{
        const struct usb_host_endpoint *ep;

        for (; *ep_addrs; ++ep_addrs) {
                ep = usb_find_endpoint(intf, *ep_addrs);
                if (!ep || !usb_endpoint_xfer_int(&ep->desc))
                        return false;
        }
        return true;
}
EXPORT_SYMBOL_GPL(usb_check_int_endpoints);

/**
 * usb_find_alt_setting() - Given a configuration, find the alternate setting
 * for the given interface.
 * @config: the configuration to search (not necessarily the current config).
 * @iface_num: interface number to search in
 * @alt_num: alternate interface setting number to search for.
 *
 * Search the configuration's interface cache for the given alt setting.
 *
 * Return: The alternate setting, if found. %NULL otherwise.
 */
struct usb_host_interface *usb_find_alt_setting(
                struct usb_host_config *config,
                unsigned int iface_num,
                unsigned int alt_num)
{
        struct usb_interface_cache *intf_cache = NULL;
        int i;

        if (!config)
                return NULL;
        for (i = 0; i < config->desc.bNumInterfaces; i++) {
                if (config->intf_cache[i]->altsetting[0].desc.bInterfaceNumber
                                == iface_num) {
                        intf_cache = config->intf_cache[i];
                        break;
                }
        }
        if (!intf_cache)
                return NULL;
        for (i = 0; i < intf_cache->num_altsetting; i++)
                if (intf_cache->altsetting[i].desc.bAlternateSetting == alt_num)
                        return &intf_cache->altsetting[i];

        printk(KERN_DEBUG "Did not find alt setting %u for intf %u, "
                        "config %u\n", alt_num, iface_num,
                        config->desc.bConfigurationValue);
        return NULL;
}
EXPORT_SYMBOL_GPL(usb_find_alt_setting);

/**
 * usb_ifnum_to_if - get the interface object with a given interface number
 * @dev: the device whose current configuration is considered
 * @ifnum: the desired interface
 *
 * This walks the device descriptor for the currently active configuration
 * to find the interface object with the particular interface number.
 *
 * Note that configuration descriptors are not required to assign interface
 * numbers sequentially, so that it would be incorrect to assume that
 * the first interface in that descriptor corresponds to interface zero.
 * This routine helps device drivers avoid such mistakes.
 * However, you should make sure that you do the right thing with any
 * alternate settings available for this interfaces.
 *
 * Don't call this function unless you are bound to one of the interfaces
 * on this device or you have locked the device!
 *
 * Return: A pointer to the interface that has @ifnum as interface number,
 * if found. %NULL otherwise.
 */
struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
                                      unsigned ifnum)
{
        struct usb_host_config *config = dev->actconfig;
        int i;

        if (!config)
                return NULL;
        for (i = 0; i < config->desc.bNumInterfaces; i++)
                if (config->interface[i]->altsetting[0]
                                .desc.bInterfaceNumber == ifnum)
                        return config->interface[i];

        return NULL;
}
EXPORT_SYMBOL_GPL(usb_ifnum_to_if);

/**
 * usb_altnum_to_altsetting - get the altsetting structure with a given alternate setting number.
 * @intf: the interface containing the altsetting in question
 * @altnum: the desired alternate setting number
 *
 * This searches the altsetting array of the specified interface for
 * an entry with the correct bAlternateSetting value.
 *
 * Note that altsettings need not be stored sequentially by number, so
 * it would be incorrect to assume that the first altsetting entry in
 * the array corresponds to altsetting zero.  This routine helps device
 * drivers avoid such mistakes.
 *
 * Don't call this function unless you are bound to the intf interface
 * or you have locked the device!
 *
 * Return: A pointer to the entry of the altsetting array of @intf that
 * has @altnum as the alternate setting number. %NULL if not found.
 */
struct usb_host_interface *usb_altnum_to_altsetting(
                                        const struct usb_interface *intf,
                                        unsigned int altnum)
{
        int i;

        for (i = 0; i < intf->num_altsetting; i++) {
                if (intf->altsetting[i].desc.bAlternateSetting == altnum)
                        return &intf->altsetting[i];
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(usb_altnum_to_altsetting);

struct find_interface_arg {
        int minor;
        struct device_driver *drv;
};

static int __find_interface(struct device *dev, const void *data)
{
        const struct find_interface_arg *arg = data;
        struct usb_interface *intf;

        if (!is_usb_interface(dev))
                return 0;

        if (dev->driver != arg->drv)
                return 0;
        intf = to_usb_interface(dev);
        return intf->minor == arg->minor;
}

/**
 * usb_find_interface - find usb_interface pointer for driver and device
 * @drv: the driver whose current configuration is considered
 * @minor: the minor number of the desired device
 *
 * This walks the bus device list and returns a pointer to the interface
 * with the matching minor and driver.  Note, this only works for devices
 * that share the USB major number.
 *
 * Return: A pointer to the interface with the matching major and @minor.
 */
struct usb_interface *usb_find_interface(struct usb_driver *drv, int minor)
{
        struct find_interface_arg argb;
        struct device *dev;

        argb.minor = minor;
        argb.drv = &drv->driver;

        dev = bus_find_device(&usb_bus_type, NULL, &argb, __find_interface);

        /* Drop reference count from bus_find_device */
        put_device(dev);

        return dev ? to_usb_interface(dev) : NULL;
}
EXPORT_SYMBOL_GPL(usb_find_interface);

struct each_dev_arg {
        void *data;
        int (*fn)(struct usb_device *, void *);
};

static int __each_dev(struct device *dev, void *data)
{
        struct each_dev_arg *arg = (struct each_dev_arg *)data;

        /* There are struct usb_interface on the same bus, filter them out */
        if (!is_usb_device(dev))
                return 0;

        return arg->fn(to_usb_device(dev), arg->data);
}

/**
 * usb_for_each_dev - iterate over all USB devices in the system
 * @data: data pointer that will be handed to the callback function
 * @fn: callback function to be called for each USB device
 *
 * Iterate over all USB devices and call @fn for each, passing it @data. If it
 * returns anything other than 0, we break the iteration prematurely and return
 * that value.
 */
int usb_for_each_dev(void *data, int (*fn)(struct usb_device *, void *))
{
        struct each_dev_arg arg = {data, fn};

        return bus_for_each_dev(&usb_bus_type, NULL, &arg, __each_dev);
}
EXPORT_SYMBOL_GPL(usb_for_each_dev);

/**
 * usb_release_dev - free a usb device structure when all users of it are finished.
 * @dev: device that's been disconnected
 *
 * Will be called only by the device core when all users of this usb device are
 * done.
 */
static void usb_release_dev(struct device *dev)
{
        struct usb_device *udev;
        struct usb_hcd *hcd;

        udev = to_usb_device(dev);
        hcd = bus_to_hcd(udev->bus);

        usb_destroy_configuration(udev);
        usb_release_bos_descriptor(udev);
        of_node_put(dev->of_node);
        usb_put_hcd(hcd);
        kfree(udev->product);
        kfree(udev->manufacturer);
        kfree(udev->serial);
        kfree(udev);
}

static int usb_dev_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct usb_device *usb_dev;

        usb_dev = to_usb_device(dev);

        if (add_uevent_var(env, "BUSNUM=%03d", usb_dev->bus->busnum))
                return -ENOMEM;

        if (add_uevent_var(env, "DEVNUM=%03d", usb_dev->devnum))
                return -ENOMEM;

        return 0;
}

#ifdef        CONFIG_PM

/* USB device Power-Management thunks.
 * There's no need to distinguish here between quiescing a USB device
 * and powering it down; the generic_suspend() routine takes care of
 * it by skipping the usb_port_suspend() call for a quiesce.  And for
 * USB interfaces there's no difference at all.
 */

static int usb_dev_prepare(struct device *dev)
{
        return 0;                /* Implement eventually? */
}

static void usb_dev_complete(struct device *dev)
{
        /* Currently used only for rebinding interfaces */
        usb_resume_complete(dev);
}

static int usb_dev_suspend(struct device *dev)
{
        return usb_suspend(dev, PMSG_SUSPEND);
}

static int usb_dev_resume(struct device *dev)
{
        return usb_resume(dev, PMSG_RESUME);
}

static int usb_dev_freeze(struct device *dev)
{
        return usb_suspend(dev, PMSG_FREEZE);
}

static int usb_dev_thaw(struct device *dev)
{
        return usb_resume(dev, PMSG_THAW);
}

static int usb_dev_poweroff(struct device *dev)
{
        return usb_suspend(dev, PMSG_HIBERNATE);
}

static int usb_dev_restore(struct device *dev)
{
        return usb_resume(dev, PMSG_RESTORE);
}

static const struct dev_pm_ops usb_device_pm_ops = {
        .prepare =        usb_dev_prepare,
        .complete =        usb_dev_complete,
        .suspend =        usb_dev_suspend,
        .resume =        usb_dev_resume,
        .freeze =        usb_dev_freeze,
        .thaw =                usb_dev_thaw,
        .poweroff =        usb_dev_poweroff,
        .restore =        usb_dev_restore,
        .runtime_suspend =        usb_runtime_suspend,
        .runtime_resume =        usb_runtime_resume,
        .runtime_idle =                usb_runtime_idle,
};

#endif        /* CONFIG_PM */


static char *usb_devnode(const struct device *dev,
                         umode_t *mode, kuid_t *uid, kgid_t *gid)
{
        const struct usb_device *usb_dev;

        usb_dev = to_usb_device(dev);
        return kasprintf(GFP_KERNEL, "bus/usb/%03d/%03d",
                         usb_dev->bus->busnum, usb_dev->devnum);
}

const struct device_type usb_device_type = {
        .name =                "usb_device",
        .release =        usb_release_dev,
        .uevent =        usb_dev_uevent,
        .devnode =         usb_devnode,
#ifdef CONFIG_PM
        .pm =                &usb_device_pm_ops,
#endif
};

static bool usb_dev_authorized(struct usb_device *dev, struct usb_hcd *hcd)
{
        struct usb_hub *hub;

        if (!dev->parent)
                return true; /* Root hub always ok [and always wired] */

        switch (hcd->dev_policy) {
        case USB_DEVICE_AUTHORIZE_NONE:
        default:
                return false;

        case USB_DEVICE_AUTHORIZE_ALL:
                return true;

        case USB_DEVICE_AUTHORIZE_INTERNAL:
                hub = usb_hub_to_struct_hub(dev->parent);
                return hub->ports[dev->portnum - 1]->connect_type ==
                                USB_PORT_CONNECT_TYPE_HARD_WIRED;
        }
}

/**
 * usb_alloc_dev - usb device constructor (usbcore-internal)
 * @parent: hub to which device is connected; null to allocate a root hub
 * @bus: bus used to access the device
 * @port1: one-based index of port; ignored for root hubs
 *
 * Context: task context, might sleep.
 *
 * Only hub drivers (including virtual root hub drivers for host
 * controllers) should ever call this.
 *
 * This call may not be used in a non-sleeping context.
 *
 * Return: On success, a pointer to the allocated usb device. %NULL on
 * failure.
 */
struct usb_device *usb_alloc_dev(struct usb_device *parent,
                                 struct usb_bus *bus, unsigned port1)
{
        struct usb_device *dev;
        struct usb_hcd *usb_hcd = bus_to_hcd(bus);
        unsigned raw_port = port1;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return NULL;

        if (!usb_get_hcd(usb_hcd)) {
                kfree(dev);
                return NULL;
        }
        /* Root hubs aren't true devices, so don't allocate HCD resources */
        if (usb_hcd->driver->alloc_dev && parent &&
                !usb_hcd->driver->alloc_dev(usb_hcd, dev)) {
                usb_put_hcd(bus_to_hcd(bus));
                kfree(dev);
                return NULL;
        }

        device_initialize(&dev->dev);
        dev->dev.bus = &usb_bus_type;
        dev->dev.type = &usb_device_type;
        dev->dev.groups = usb_device_groups;
        set_dev_node(&dev->dev, dev_to_node(bus->sysdev));
        dev->state = USB_STATE_ATTACHED;
        dev->lpm_disable_count = 1;
        atomic_set(&dev->urbnum, 0);

        INIT_LIST_HEAD(&dev->ep0.urb_list);
        dev->ep0.desc.bLength = USB_DT_ENDPOINT_SIZE;
        dev->ep0.desc.bDescriptorType = USB_DT_ENDPOINT;
        /* ep0 maxpacket comes later, from device descriptor */
        usb_enable_endpoint(dev, &dev->ep0, false);
        dev->can_submit = 1;

        /* Save readable and stable topology id, distinguishing devices
         * by location for diagnostics, tools, driver model, etc.  The
         * string is a path along hub ports, from the root.  Each device's
         * dev->devpath will be stable until USB is re-cabled, and hubs
         * are often labeled with these port numbers.  The name isn't
         * as stable:  bus->busnum changes easily from modprobe order,
         * cardbus or pci hotplugging, and so on.
         */
        if (unlikely(!parent)) {
                dev->devpath[0] = '0';
                dev->route = 0;

                dev->dev.parent = bus->controller;
                device_set_of_node_from_dev(&dev->dev, bus->sysdev);
                dev_set_name(&dev->dev, "usb%d", bus->busnum);
        } else {
                /* match any labeling on the hubs; it's one-based */
                if (parent->devpath[0] == '0') {
                        snprintf(dev->devpath, sizeof dev->devpath,
                                "%d", port1);
                        /* Root ports are not counted in route string */
                        dev->route = 0;
                } else {
                        snprintf(dev->devpath, sizeof dev->devpath,
                                "%s.%d", parent->devpath, port1);
                        /* Route string assumes hubs have less than 16 ports */
                        if (port1 < 15)
                                dev->route = parent->route +
                                        (port1 << ((parent->level - 1)*4));
                        else
                                dev->route = parent->route +
                                        (15 << ((parent->level - 1)*4));
                }

                dev->dev.parent = &parent->dev;
                dev_set_name(&dev->dev, "%d-%s", bus->busnum, dev->devpath);

                if (!parent->parent) {
                        /* device under root hub's port */
                        raw_port = usb_hcd_find_raw_port_number(usb_hcd,
                                port1);
                }
                dev->dev.of_node = usb_of_get_device_node(parent, raw_port);

                /* hub driver sets up TT records */
        }

        dev->portnum = port1;
        dev->bus = bus;
        dev->parent = parent;
        INIT_LIST_HEAD(&dev->filelist);

#ifdef        CONFIG_PM
        pm_runtime_set_autosuspend_delay(&dev->dev,
                        usb_autosuspend_delay * 1000);
        dev->connect_time = jiffies;
        dev->active_duration = -jiffies;
#endif

        dev->authorized = usb_dev_authorized(dev, usb_hcd);
        return dev;
}
EXPORT_SYMBOL_GPL(usb_alloc_dev);

/**
 * usb_get_dev - increments the reference count of the usb device structure
 * @dev: the device being referenced
 *
 * Each live reference to a device should be refcounted.
 *
 * Drivers for USB interfaces should normally record such references in
 * their probe() methods, when they bind to an interface, and release
 * them by calling usb_put_dev(), in their disconnect() methods.
 * However, if a driver does not access the usb_device structure after
 * its disconnect() method returns then refcounting is not necessary,
 * because the USB core guarantees that a usb_device will not be
 * deallocated until after all of its interface drivers have been unbound.
 *
 * Return: A pointer to the device with the incremented reference counter.
 */
struct usb_device *usb_get_dev(struct usb_device *dev)
{
        if (dev)
                get_device(&dev->dev);
        return dev;
}
EXPORT_SYMBOL_GPL(usb_get_dev);

/**
 * usb_put_dev - release a use of the usb device structure
 * @dev: device that's been disconnected
 *
 * Must be called when a user of a device is finished with it.  When the last
 * user of the device calls this function, the memory of the device is freed.
 */
void usb_put_dev(struct usb_device *dev)
{
        if (dev)
                put_device(&dev->dev);
}
EXPORT_SYMBOL_GPL(usb_put_dev);

/**
 * usb_get_intf - increments the reference count of the usb interface structure
 * @intf: the interface being referenced
 *
 * Each live reference to a interface must be refcounted.
 *
 * Drivers for USB interfaces should normally record such references in
 * their probe() methods, when they bind to an interface, and release
 * them by calling usb_put_intf(), in their disconnect() methods.
 * However, if a driver does not access the usb_interface structure after
 * its disconnect() method returns then refcounting is not necessary,
 * because the USB core guarantees that a usb_interface will not be
 * deallocated until after its driver has been unbound.
 *
 * Return: A pointer to the interface with the incremented reference counter.
 */
struct usb_interface *usb_get_intf(struct usb_interface *intf)
{
        if (intf)
                get_device(&intf->dev);
        return intf;
}
EXPORT_SYMBOL_GPL(usb_get_intf);

/**
 * usb_put_intf - release a use of the usb interface structure
 * @intf: interface that's been decremented
 *
 * Must be called when a user of an interface is finished with it.  When the
 * last user of the interface calls this function, the memory of the interface
 * is freed.
 */
void usb_put_intf(struct usb_interface *intf)
{
        if (intf)
                put_device(&intf->dev);
}
EXPORT_SYMBOL_GPL(usb_put_intf);

/**
 * usb_intf_get_dma_device - acquire a reference on the usb interface's DMA endpoint
 * @intf: the usb interface
 *
 * While a USB device cannot perform DMA operations by itself, many USB
 * controllers can. A call to usb_intf_get_dma_device() returns the DMA endpoint
 * for the given USB interface, if any. The returned device structure must be
 * released with put_device().
 *
 * See also usb_get_dma_device().
 *
 * Returns: A reference to the usb interface's DMA endpoint; or NULL if none
 *          exists.
 */
struct device *usb_intf_get_dma_device(struct usb_interface *intf)
{
        struct usb_device *udev = interface_to_usbdev(intf);
        struct device *dmadev;

        if (!udev->bus)
                return NULL;

        dmadev = get_device(udev->bus->sysdev);
        if (!dmadev || !dmadev->dma_mask) {
                put_device(dmadev);
                return NULL;
        }

        return dmadev;
}
EXPORT_SYMBOL_GPL(usb_intf_get_dma_device);

/*                        USB device locking
 *
 * USB devices and interfaces are locked using the semaphore in their
 * embedded struct device.  The hub driver guarantees that whenever a
 * device is connected or disconnected, drivers are called with the
 * USB device locked as well as their particular interface.
 *
 * Complications arise when several devices are to be locked at the same
 * time.  Only hub-aware drivers that are part of usbcore ever have to
 * do this; nobody else needs to worry about it.  The rule for locking
 * is simple:
 *
 *        When locking both a device and its parent, always lock the
 *        parent first.
 */

/**
 * usb_lock_device_for_reset - cautiously acquire the lock for a usb device structure
 * @udev: device that's being locked
 * @iface: interface bound to the driver making the request (optional)
 *
 * Attempts to acquire the device lock, but fails if the device is
 * NOTATTACHED or SUSPENDED, or if iface is specified and the interface
 * is neither BINDING nor BOUND.  Rather than sleeping to wait for the
 * lock, the routine polls repeatedly.  This is to prevent deadlock with
 * disconnect; in some drivers (such as usb-storage) the disconnect()
 * or suspend() method will block waiting for a device reset to complete.
 *
 * Return: A negative error code for failure, otherwise 0.
 */
int usb_lock_device_for_reset(struct usb_device *udev,
                              const struct usb_interface *iface)
{
        unsigned long jiffies_expire = jiffies + HZ;

        if (udev->state == USB_STATE_NOTATTACHED)
                return -ENODEV;
        if (udev->state == USB_STATE_SUSPENDED)
                return -EHOSTUNREACH;
        if (iface && (iface->condition == USB_INTERFACE_UNBINDING ||
                        iface->condition == USB_INTERFACE_UNBOUND))
                return -EINTR;

        while (!usb_trylock_device(udev)) {

                /* If we can't acquire the lock after waiting one second,
                 * we're probably deadlocked */
                if (time_after(jiffies, jiffies_expire))
                        return -EBUSY;

                msleep(15);
                if (udev->state == USB_STATE_NOTATTACHED)
                        return -ENODEV;
                if (udev->state == USB_STATE_SUSPENDED)
                        return -EHOSTUNREACH;
                if (iface && (iface->condition == USB_INTERFACE_UNBINDING ||
                                iface->condition == USB_INTERFACE_UNBOUND))
                        return -EINTR;
        }
        return 0;
}
EXPORT_SYMBOL_GPL(usb_lock_device_for_reset);

/**
 * usb_get_current_frame_number - return current bus frame number
 * @dev: the device whose bus is being queried
 *
 * Return: The current frame number for the USB host controller used
 * with the given USB device. This can be used when scheduling
 * isochronous requests.
 *
 * Note: Different kinds of host controller have different "scheduling
 * horizons". While one type might support scheduling only 32 frames
 * into the future, others could support scheduling up to 1024 frames
 * into the future.
 *
 */
int usb_get_current_frame_number(struct usb_device *dev)
{
        return usb_hcd_get_frame_number(dev);
}
EXPORT_SYMBOL_GPL(usb_get_current_frame_number);

/*-------------------------------------------------------------------*/
/*
 * __usb_get_extra_descriptor() finds a descriptor of specific type in the
 * extra field of the interface and endpoint descriptor structs.
 */

int __usb_get_extra_descriptor(char *buffer, unsigned size,
                               unsigned char type, void **ptr, size_t minsize)
{
        struct usb_descriptor_header *header;

        while (size >= sizeof(struct usb_descriptor_header)) {
                header = (struct usb_descriptor_header *)buffer;

                if (header->bLength < 2 || header->bLength > size) {
                        printk(KERN_ERR
                                "%s: bogus descriptor, type %d length %d\n",
                                usbcore_name,
                                header->bDescriptorType,
                                header->bLength);
                        return -1;
                }

                if (header->bDescriptorType == type && header->bLength >= minsize) {
                        *ptr = header;
                        return 0;
                }

                buffer += header->bLength;
                size -= header->bLength;
        }
        return -1;
}
EXPORT_SYMBOL_GPL(__usb_get_extra_descriptor);

/**
 * usb_alloc_coherent - allocate dma-consistent buffer for URB_NO_xxx_DMA_MAP
 * @dev: device the buffer will be used with
 * @size: requested buffer size
 * @mem_flags: affect whether allocation may block
 * @dma: used to return DMA address of buffer
 *
 * Return: Either null (indicating no buffer could be allocated), or the
 * cpu-space pointer to a buffer that may be used to perform DMA to the
 * specified device.  Such cpu-space buffers are returned along with the DMA
 * address (through the pointer provided).
 *
 * Note:
 * These buffers are used with URB_NO_xxx_DMA_MAP set in urb->transfer_flags
 * to avoid behaviors like using "DMA bounce buffers", or thrashing IOMMU
 * hardware during URB completion/resubmit.  The implementation varies between
 * platforms, depending on details of how DMA will work to this device.
 * Using these buffers also eliminates cacheline sharing problems on
 * architectures where CPU caches are not DMA-coherent.  On systems without
 * bus-snooping caches, these buffers are uncached.
 *
 * When the buffer is no longer used, free it with usb_free_coherent().
 */
void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags,
                         dma_addr_t *dma)
{
        if (!dev || !dev->bus)
                return NULL;
        return hcd_buffer_alloc(dev->bus, size, mem_flags, dma);
}
EXPORT_SYMBOL_GPL(usb_alloc_coherent);

/**
 * usb_free_coherent - free memory allocated with usb_alloc_coherent()
 * @dev: device the buffer was used with
 * @size: requested buffer size
 * @addr: CPU address of buffer
 * @dma: DMA address of buffer
 *
 * This reclaims an I/O buffer, letting it be reused.  The memory must have
 * been allocated using usb_alloc_coherent(), and the parameters must match
 * those provided in that allocation request.
 */
void usb_free_coherent(struct usb_device *dev, size_t size, void *addr,
                       dma_addr_t dma)
{
        if (!dev || !dev->bus)
                return;
        if (!addr)
                return;
        hcd_buffer_free(dev->bus, size, addr, dma);
}
EXPORT_SYMBOL_GPL(usb_free_coherent);

/*
 * Notifications of device and interface registration
 */
static int usb_bus_notify(struct notifier_block *nb, unsigned long action,
                void *data)
{
        struct device *dev = data;

        switch (action) {
        case BUS_NOTIFY_ADD_DEVICE:
                if (dev->type == &usb_device_type)
                        (void) usb_create_sysfs_dev_files(to_usb_device(dev));
                else if (dev->type == &usb_if_device_type)
                        usb_create_sysfs_intf_files(to_usb_interface(dev));
                break;

        case BUS_NOTIFY_DEL_DEVICE:
                if (dev->type == &usb_device_type)
                        usb_remove_sysfs_dev_files(to_usb_device(dev));
                else if (dev->type == &usb_if_device_type)
                        usb_remove_sysfs_intf_files(to_usb_interface(dev));
                break;
        }
        return 0;
}

static struct notifier_block usb_bus_nb = {
        .notifier_call = usb_bus_notify,
};

static void usb_debugfs_init(void)
{
        debugfs_create_file("devices", 0444, usb_debug_root, NULL,
                            &usbfs_devices_fops);
}

static void usb_debugfs_cleanup(void)
{
        debugfs_lookup_and_remove("devices", usb_debug_root);
}

/*
 * Init
 */
static int __init usb_init(void)
{
        int retval;
        if (usb_disabled()) {
                pr_info("%s: USB support disabled\n", usbcore_name);
                return 0;
        }
        usb_init_pool_max();

        usb_debugfs_init();

        usb_acpi_register();
        retval = bus_register(&usb_bus_type);
        if (retval)
                goto bus_register_failed;
        retval = bus_register_notifier(&usb_bus_type, &usb_bus_nb);
        if (retval)
                goto bus_notifier_failed;
        retval = usb_major_init();
        if (retval)
                goto major_init_failed;
        retval = class_register(&usbmisc_class);
        if (retval)
                goto class_register_failed;
        retval = usb_register(&usbfs_driver);
        if (retval)
                goto driver_register_failed;
        retval = usb_devio_init();
        if (retval)
                goto usb_devio_init_failed;
        retval = usb_hub_init();
        if (retval)
                goto hub_init_failed;
        retval = usb_register_device_driver(&usb_generic_driver, THIS_MODULE);
        if (!retval)
                goto out;

        usb_hub_cleanup();
hub_init_failed:
        usb_devio_cleanup();
usb_devio_init_failed:
        usb_deregister(&usbfs_driver);
driver_register_failed:
        class_unregister(&usbmisc_class);
class_register_failed:
        usb_major_cleanup();
major_init_failed:
        bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
bus_notifier_failed:
        bus_unregister(&usb_bus_type);
bus_register_failed:
        usb_acpi_unregister();
        usb_debugfs_cleanup();
out:
        return retval;
}

/*
 * Cleanup
 */
static void __exit usb_exit(void)
{
        /* This will matter if shutdown/reboot does exitcalls. */
        if (usb_disabled())
                return;

        usb_release_quirk_list();
        usb_deregister_device_driver(&usb_generic_driver);
        usb_major_cleanup();
        usb_deregister(&usbfs_driver);
        usb_devio_cleanup();
        usb_hub_cleanup();
        class_unregister(&usbmisc_class);
        bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
        bus_unregister(&usb_bus_type);
        usb_acpi_unregister();
        usb_debugfs_cleanup();
        idr_destroy(&usb_bus_idr);
}

subsys_initcall(usb_init);
module_exit(usb_exit);
MODULE_LICENSE("GPL");








































































































    2 


























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* cx25840 internal API header
 *
 * Copyright (C) 2003-2004 Chris Kennedy
 */

#ifndef _CX25840_CORE_H_
#define _CX25840_CORE_H_

#include <linux/videodev2.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ctrls.h>
#include <linux/i2c.h>

struct cx25840_ir_state;

enum cx25840_model {
        CX23885_AV,
        CX23887_AV,
        CX23888_AV,
        CX2310X_AV,
        CX25840,
        CX25841,
        CX25842,
        CX25843,
        CX25836,
        CX25837,
};

enum cx25840_media_pads {
        CX25840_PAD_INPUT,
        CX25840_PAD_VID_OUT,

        CX25840_NUM_PADS
};

/**
 * struct cx25840_state - a device instance private data
 * @c:                        i2c_client struct representing this device
 * @sd:                our V4L2 sub-device
 * @hdl:                our V4L2 control handler
 * @volume:                audio volume V4L2 control (non-cx2583x devices only)
 * @mute:                audio mute V4L2 control (non-cx2583x devices only)
 * @pvr150_workaround:        whether we enable workaround for Hauppauge PVR150
 *                        hardware bug (audio dropping out)
 * @generic_mode:        whether we disable ivtv-specific hacks
 *                        this mode gets turned on when the bridge driver calls
 *                        cx25840 subdevice init core op
 * @radio:                set if we are currently in the radio mode, otherwise
 *                        the current mode is non-radio (that is, video)
 * @std:                currently set video standard
 * @vid_input:                currently set video input
 * @vid_config:        currently set video output configuration
 *                        only used in the generic mode
 * @aud_input:                currently set audio input
 * @audclk_freq:        currently set audio sample rate
 * @audmode:                currently set audio mode (when in non-radio mode)
 * @vbi_line_offset:        vbi line number offset
 * @id:                exact device model
 * @rev:                raw device id read from the chip
 * @is_initialized:        whether we have already loaded firmware into the chip
 *                        and initialized it
 * @vbi_regs_offset:        offset of vbi regs
 * @fw_wait:                wait queue to wake an initialization function up when
 *                        firmware loading (on a separate workqueue) finishes
 * @fw_work:                a work that actually loads the firmware on a separate
 *                        workqueue
 * @ir_state:                a pointer to chip IR controller private data
 * @pads:                array of supported chip pads (currently only a stub)
 */
struct cx25840_state {
        struct i2c_client *c;
        struct v4l2_subdev sd;
        struct v4l2_ctrl_handler hdl;
        struct {
                /* volume cluster */
                struct v4l2_ctrl *volume;
                struct v4l2_ctrl *mute;
        };
        int pvr150_workaround;
        bool generic_mode;
        int radio;
        v4l2_std_id std;
        enum cx25840_video_input vid_input;
        u32 vid_config;
        enum cx25840_audio_input aud_input;
        u32 audclk_freq;
        int audmode;
        int vbi_line_offset;
        enum cx25840_model id;
        u32 rev;
        int is_initialized;
        unsigned int vbi_regs_offset;
        wait_queue_head_t fw_wait;
        struct work_struct fw_work;
        struct cx25840_ir_state *ir_state;
#if defined(CONFIG_MEDIA_CONTROLLER)
        struct media_pad        pads[CX25840_NUM_PADS];
#endif
};

static inline struct cx25840_state *to_state(struct v4l2_subdev *sd)
{
        return container_of(sd, struct cx25840_state, sd);
}

static inline struct v4l2_subdev *to_sd(struct v4l2_ctrl *ctrl)
{
        return &container_of(ctrl->handler, struct cx25840_state, hdl)->sd;
}

static inline bool is_cx2583x(struct cx25840_state *state)
{
        return state->id == CX25836 ||
               state->id == CX25837;
}

static inline bool is_cx2584x(struct cx25840_state *state)
{
        return state->id == CX25840 ||
               state->id == CX25841 ||
               state->id == CX25842 ||
               state->id == CX25843;
}

static inline bool is_cx231xx(struct cx25840_state *state)
{
        return state->id == CX2310X_AV;
}

static inline bool is_cx2388x(struct cx25840_state *state)
{
        return state->id == CX23885_AV ||
               state->id == CX23887_AV ||
               state->id == CX23888_AV;
}

static inline bool is_cx23885(struct cx25840_state *state)
{
        return state->id == CX23885_AV;
}

static inline bool is_cx23887(struct cx25840_state *state)
{
        return state->id == CX23887_AV;
}

static inline bool is_cx23888(struct cx25840_state *state)
{
        return state->id == CX23888_AV;
}

/* ----------------------------------------------------------------------- */
/* cx25850-core.c                                                           */
int cx25840_write(struct i2c_client *client, u16 addr, u8 value);
int cx25840_write4(struct i2c_client *client, u16 addr, u32 value);
u8 cx25840_read(struct i2c_client *client, u16 addr);
u32 cx25840_read4(struct i2c_client *client, u16 addr);
int cx25840_and_or(struct i2c_client *client, u16 addr, unsigned int mask,
                   u8 value);
int cx25840_and_or4(struct i2c_client *client, u16 addr, u32 and_mask,
                    u32 or_value);
void cx25840_std_setup(struct i2c_client *client);

/* ----------------------------------------------------------------------- */
/* cx25850-firmware.c                                                      */
int cx25840_loadfw(struct i2c_client *client);

/* ----------------------------------------------------------------------- */
/* cx25850-audio.c                                                         */
void cx25840_audio_set_path(struct i2c_client *client);
int cx25840_s_clock_freq(struct v4l2_subdev *sd, u32 freq);

extern const struct v4l2_ctrl_ops cx25840_audio_ctrl_ops;

/* ----------------------------------------------------------------------- */
/* cx25850-vbi.c                                                           */
int cx25840_s_raw_fmt(struct v4l2_subdev *sd, struct v4l2_vbi_format *fmt);
int cx25840_s_sliced_fmt(struct v4l2_subdev *sd,
                         struct v4l2_sliced_vbi_format *fmt);
int cx25840_g_sliced_fmt(struct v4l2_subdev *sd,
                         struct v4l2_sliced_vbi_format *fmt);
int cx25840_decode_vbi_line(struct v4l2_subdev *sd,
                            struct v4l2_decode_vbi_line *vbi);

/* ----------------------------------------------------------------------- */
/* cx25850-ir.c                                                            */
extern const struct v4l2_subdev_ir_ops cx25840_ir_ops;
int cx25840_ir_log_status(struct v4l2_subdev *sd);
int cx25840_ir_irq_handler(struct v4l2_subdev *sd, u32 status, bool *handled);
int cx25840_ir_probe(struct v4l2_subdev *sd);
int cx25840_ir_remove(struct v4l2_subdev *sd);

#endif









































































































    1 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
// SPDX-License-Identifier: GPL-2.0+
/*
 * Infinity Unlimited USB Phoenix driver
 *
 * Copyright (C) 2010 James Courtier-Dutton (James@superbug.co.uk)

 * Copyright (C) 2007 Alain Degreffe (eczema@ecze.com)
 *
 * Original code taken from iuutool (Copyright (C) 2006 Juan Carlos BorrÃ¡s)
 *
 *  And tested with help of WB Electronics
 */
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
#include <linux/tty_flip.h>
#include <linux/serial.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
#include "iuu_phoenix.h"
#include <linux/random.h>

#define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"

static const struct usb_device_id id_table[] = {
        {USB_DEVICE(IUU_USB_VENDOR_ID, IUU_USB_PRODUCT_ID)},
        {}                        /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, id_table);

/* turbo parameter */
static int boost = 100;
static int clockmode = 1;
static int cdmode = 1;
static int iuu_cardin;
static int iuu_cardout;
static bool xmas;
static int vcc_default = 5;

static int iuu_create_sysfs_attrs(struct usb_serial_port *port);
static int iuu_remove_sysfs_attrs(struct usb_serial_port *port);
static void read_rxcmd_callback(struct urb *urb);

struct iuu_private {
        spinlock_t lock;        /* store irq state */
        u8 line_status;
        int tiostatus;                /* store IUART SIGNAL for tiocmget call */
        u8 reset;                /* if 1 reset is needed */
        int poll;                /* number of poll */
        u8 *writebuf;                /* buffer for writing to device */
        int writelen;                /* num of byte to write to device */
        u8 *buf;                /* used for initialize speed */
        u8 len;
        int vcc;                /* vcc (either 3 or 5 V) */
        u32 boost;
        u32 clk;
};

static int iuu_port_probe(struct usb_serial_port *port)
{
        struct iuu_private *priv;
        int ret;

        priv = kzalloc(sizeof(struct iuu_private), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;

        priv->buf = kzalloc(256, GFP_KERNEL);
        if (!priv->buf) {
                kfree(priv);
                return -ENOMEM;
        }

        priv->writebuf = kzalloc(256, GFP_KERNEL);
        if (!priv->writebuf) {
                kfree(priv->buf);
                kfree(priv);
                return -ENOMEM;
        }

        priv->vcc = vcc_default;
        spin_lock_init(&priv->lock);

        usb_set_serial_port_data(port, priv);

        ret = iuu_create_sysfs_attrs(port);
        if (ret) {
                kfree(priv->writebuf);
                kfree(priv->buf);
                kfree(priv);
                return ret;
        }

        return 0;
}

static void iuu_port_remove(struct usb_serial_port *port)
{
        struct iuu_private *priv = usb_get_serial_port_data(port);

        iuu_remove_sysfs_attrs(port);
        kfree(priv->writebuf);
        kfree(priv->buf);
        kfree(priv);
}

static int iuu_tiocmset(struct tty_struct *tty,
                        unsigned int set, unsigned int clear)
{
        struct usb_serial_port *port = tty->driver_data;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;

        /* FIXME: locking on tiomstatus */
        dev_dbg(&port->dev, "%s msg : SET = 0x%04x, CLEAR = 0x%04x\n",
                __func__, set, clear);

        spin_lock_irqsave(&priv->lock, flags);

        if ((set & TIOCM_RTS) && !(priv->tiostatus == TIOCM_RTS)) {
                dev_dbg(&port->dev, "%s TIOCMSET RESET called !!!\n", __func__);
                priv->reset = 1;
        }
        if (set & TIOCM_RTS)
                priv->tiostatus = TIOCM_RTS;

        spin_unlock_irqrestore(&priv->lock, flags);
        return 0;
}

/* This is used to provide a carrier detect mechanism
 * When a card is present, the response is 0x00
 * When no card , the reader respond with TIOCM_CD
 * This is known as CD autodetect mechanism
 */
static int iuu_tiocmget(struct tty_struct *tty)
{
        struct usb_serial_port *port = tty->driver_data;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        int rc;

        spin_lock_irqsave(&priv->lock, flags);
        rc = priv->tiostatus;
        spin_unlock_irqrestore(&priv->lock, flags);

        return rc;
}

static void iuu_rxcmd(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        int status = urb->status;

        if (status) {
                dev_dbg(&port->dev, "%s - status = %d\n", __func__, status);
                /* error stop all */
                return;
        }


        memset(port->write_urb->transfer_buffer, IUU_UART_RX, 1);
        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 1,
                          read_rxcmd_callback, port);
        usb_submit_urb(port->write_urb, GFP_ATOMIC);
}

static int iuu_reset(struct usb_serial_port *port, u8 wt)
{
        struct iuu_private *priv = usb_get_serial_port_data(port);
        int result;
        char *buf_ptr = port->write_urb->transfer_buffer;

        /* Prepare the reset sequence */

        *buf_ptr++ = IUU_RST_SET;
        *buf_ptr++ = IUU_DELAY_MS;
        *buf_ptr++ = wt;
        *buf_ptr = IUU_RST_CLEAR;

        /* send the sequence */

        usb_fill_bulk_urb(port->write_urb,
                          port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 4, iuu_rxcmd, port);
        result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
        priv->reset = 0;
        return result;
}

/* Status Function
 * Return value is
 * 0x00 = no card
 * 0x01 = smartcard
 * 0x02 = sim card
 */
static void iuu_update_status_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        u8 *st;
        int status = urb->status;

        if (status) {
                dev_dbg(&port->dev, "%s - status = %d\n", __func__, status);
                /* error stop all */
                return;
        }

        st = urb->transfer_buffer;
        dev_dbg(&port->dev, "%s - enter\n", __func__);
        if (urb->actual_length == 1) {
                switch (st[0]) {
                case 0x1:
                        priv->tiostatus = iuu_cardout;
                        break;
                case 0x0:
                        priv->tiostatus = iuu_cardin;
                        break;
                default:
                        priv->tiostatus = iuu_cardin;
                }
        }
        iuu_rxcmd(urb);
}

static void iuu_status_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        int status = urb->status;

        dev_dbg(&port->dev, "%s - status = %d\n", __func__, status);
        usb_fill_bulk_urb(port->read_urb, port->serial->dev,
                          usb_rcvbulkpipe(port->serial->dev,
                                          port->bulk_in_endpointAddress),
                          port->read_urb->transfer_buffer, 256,
                          iuu_update_status_callback, port);
        usb_submit_urb(port->read_urb, GFP_ATOMIC);
}

static int iuu_status(struct usb_serial_port *port)
{
        int result;

        memset(port->write_urb->transfer_buffer, IUU_GET_STATE_REGISTER, 1);
        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 1,
                          iuu_status_callback, port);
        result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
        return result;

}

static int bulk_immediate(struct usb_serial_port *port, u8 *buf, u8 count)
{
        int status;
        struct usb_serial *serial = port->serial;
        int actual = 0;

        /* send the data out the bulk port */

        status =
            usb_bulk_msg(serial->dev,
                         usb_sndbulkpipe(serial->dev,
                                         port->bulk_out_endpointAddress), buf,
                         count, &actual, 1000);

        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - error = %2x\n", __func__, status);
        else
                dev_dbg(&port->dev, "%s - write OK !\n", __func__);
        return status;
}

static int read_immediate(struct usb_serial_port *port, u8 *buf, u8 count)
{
        int status;
        struct usb_serial *serial = port->serial;
        int actual = 0;

        /* send the data out the bulk port */
        status =
            usb_bulk_msg(serial->dev,
                         usb_rcvbulkpipe(serial->dev,
                                         port->bulk_in_endpointAddress), buf,
                         count, &actual, 1000);

        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - error = %2x\n", __func__, status);
        else
                dev_dbg(&port->dev, "%s - read OK !\n", __func__);
        return status;
}

static int iuu_led(struct usb_serial_port *port, unsigned int R,
                   unsigned int G, unsigned int B, u8 f)
{
        int status;
        u8 *buf;
        buf = kmalloc(8, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        buf[0] = IUU_SET_LED;
        buf[1] = R & 0xFF;
        buf[2] = (R >> 8) & 0xFF;
        buf[3] = G & 0xFF;
        buf[4] = (G >> 8) & 0xFF;
        buf[5] = B & 0xFF;
        buf[6] = (B >> 8) & 0xFF;
        buf[7] = f;
        status = bulk_immediate(port, buf, 8);
        kfree(buf);
        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - led error status = %2x\n", __func__, status);
        else
                dev_dbg(&port->dev, "%s - led OK !\n", __func__);
        return IUU_OPERATION_OK;
}

static void iuu_rgbf_fill_buffer(u8 *buf, u8 r1, u8 r2, u8 g1, u8 g2, u8 b1,
                                 u8 b2, u8 freq)
{
        *buf++ = IUU_SET_LED;
        *buf++ = r1;
        *buf++ = r2;
        *buf++ = g1;
        *buf++ = g2;
        *buf++ = b1;
        *buf++ = b2;
        *buf = freq;
}

static void iuu_led_activity_on(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        char *buf_ptr = port->write_urb->transfer_buffer;

        if (xmas) {
                buf_ptr[0] = IUU_SET_LED;
                get_random_bytes(buf_ptr + 1, 6);
                buf_ptr[7] = 1;
        } else {
                iuu_rgbf_fill_buffer(buf_ptr, 255, 255, 0, 0, 0, 0, 255);
        }

        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 8 ,
                          iuu_rxcmd, port);
        usb_submit_urb(port->write_urb, GFP_ATOMIC);
}

static void iuu_led_activity_off(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        char *buf_ptr = port->write_urb->transfer_buffer;

        if (xmas) {
                iuu_rxcmd(urb);
                return;
        }

        iuu_rgbf_fill_buffer(buf_ptr, 0, 0, 255, 255, 0, 0, 255);

        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 8 ,
                          iuu_rxcmd, port);
        usb_submit_urb(port->write_urb, GFP_ATOMIC);
}



static int iuu_clk(struct usb_serial_port *port, int dwFrq)
{
        int status;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        int Count = 0;
        u8 FrqGenAdr = 0x69;
        u8 DIV = 0;                /* 8bit */
        u8 XDRV = 0;                /* 8bit */
        u8 PUMP = 0;                /* 3bit */
        u8 PBmsb = 0;                /* 2bit */
        u8 PBlsb = 0;                /* 8bit */
        u8 PO = 0;                /* 1bit */
        u8 Q = 0;                /* 7bit */
        /* 24bit = 3bytes */
        unsigned int P = 0;
        unsigned int P2 = 0;
        int frq = (int)dwFrq;

        if (frq == 0) {
                priv->buf[Count++] = IUU_UART_WRITE_I2C;
                priv->buf[Count++] = FrqGenAdr << 1;
                priv->buf[Count++] = 0x09;
                priv->buf[Count++] = 0x00;

                status = bulk_immediate(port, (u8 *) priv->buf, Count);
                if (status != 0) {
                        dev_dbg(&port->dev, "%s - write error\n", __func__);
                        return status;
                }
        } else if (frq == 3579000) {
                DIV = 100;
                P = 1193;
                Q = 40;
                XDRV = 0;
        } else if (frq == 3680000) {
                DIV = 105;
                P = 161;
                Q = 5;
                XDRV = 0;
        } else if (frq == 6000000) {
                DIV = 66;
                P = 66;
                Q = 2;
                XDRV = 0x28;
        } else {
                unsigned int result = 0;
                unsigned int tmp = 0;
                unsigned int check;
                unsigned int check2;
                char found = 0x00;
                unsigned int lQ = 2;
                unsigned int lP = 2055;
                unsigned int lDiv = 4;

                for (lQ = 2; lQ <= 47 && !found; lQ++)
                        for (lP = 2055; lP >= 8 && !found; lP--)
                                for (lDiv = 4; lDiv <= 127 && !found; lDiv++) {
                                        tmp = (12000000 / lDiv) * (lP / lQ);
                                        if (abs((int)(tmp - frq)) <
                                            abs((int)(frq - result))) {
                                                check2 = (12000000 / lQ);
                                                if (check2 < 250000)
                                                        continue;
                                                check = (12000000 / lQ) * lP;
                                                if (check > 400000000)
                                                        continue;
                                                if (check < 100000000)
                                                        continue;
                                                if (lDiv < 4 || lDiv > 127)
                                                        continue;
                                                result = tmp;
                                                P = lP;
                                                DIV = lDiv;
                                                Q = lQ;
                                                if (result == frq)
                                                        found = 0x01;
                                        }
                                }
        }
        P2 = ((P - PO) / 2) - 4;
        PUMP = 0x04;
        PBmsb = (P2 >> 8 & 0x03);
        PBlsb = P2 & 0xFF;
        PO = (P >> 10) & 0x01;
        Q = Q - 2;

        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /* 0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x09;
        priv->buf[Count++] = 0x20;        /* Adr = 0x09 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /* 0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x0C;
        priv->buf[Count++] = DIV;        /* Adr = 0x0C */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /* 0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x12;
        priv->buf[Count++] = XDRV;        /* Adr = 0x12 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x13;
        priv->buf[Count++] = 0x6B;        /* Adr = 0x13 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x40;
        priv->buf[Count++] = (0xC0 | ((PUMP & 0x07) << 2)) |
                             (PBmsb & 0x03);        /* Adr = 0x40 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x41;
        priv->buf[Count++] = PBlsb;        /* Adr = 0x41 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x42;
        priv->buf[Count++] = Q | (((PO & 0x01) << 7));        /* Adr = 0x42 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x44;
        priv->buf[Count++] = (char)0xFF;        /* Adr = 0x44 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x45;
        priv->buf[Count++] = (char)0xFE;        /* Adr = 0x45 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x46;
        priv->buf[Count++] = 0x7F;        /* Adr = 0x46 */
        priv->buf[Count++] = IUU_UART_WRITE_I2C;        /*  0x4C */
        priv->buf[Count++] = FrqGenAdr << 1;
        priv->buf[Count++] = 0x47;
        priv->buf[Count++] = (char)0x84;        /* Adr = 0x47 */

        status = bulk_immediate(port, (u8 *) priv->buf, Count);
        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - write error\n", __func__);
        return status;
}

static int iuu_uart_flush(struct usb_serial_port *port)
{
        struct device *dev = &port->dev;
        int i;
        int status;
        u8 *rxcmd;
        struct iuu_private *priv = usb_get_serial_port_data(port);

        if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0)
                return -EIO;

        rxcmd = kmalloc(1, GFP_KERNEL);
        if (!rxcmd)
                return -ENOMEM;

        rxcmd[0] = IUU_UART_RX;

        for (i = 0; i < 2; i++) {
                status = bulk_immediate(port, rxcmd, 1);
                if (status != IUU_OPERATION_OK) {
                        dev_dbg(dev, "%s - uart_flush_write error\n", __func__);
                        goto out_free;
                }

                status = read_immediate(port, &priv->len, 1);
                if (status != IUU_OPERATION_OK) {
                        dev_dbg(dev, "%s - uart_flush_read error\n", __func__);
                        goto out_free;
                }

                if (priv->len > 0) {
                        dev_dbg(dev, "%s - uart_flush datalen is : %i\n", __func__, priv->len);
                        status = read_immediate(port, priv->buf, priv->len);
                        if (status != IUU_OPERATION_OK) {
                                dev_dbg(dev, "%s - uart_flush_read error\n", __func__);
                                goto out_free;
                        }
                }
        }
        dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__);
        iuu_led(port, 0, 0xF000, 0, 0xFF);

out_free:
        kfree(rxcmd);

        return status;
}

static void read_buf_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        unsigned char *data = urb->transfer_buffer;
        int status = urb->status;

        if (status) {
                if (status == -EPROTO) {
                        /* reschedule needed */
                }
                return;
        }

        dev_dbg(&port->dev, "%s - %i chars to write\n", __func__, urb->actual_length);

        if (urb->actual_length) {
                tty_insert_flip_string(&port->port, data, urb->actual_length);
                tty_flip_buffer_push(&port->port);
        }
        iuu_led_activity_on(urb);
}

static int iuu_bulk_write(struct usb_serial_port *port)
{
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        int result;
        int buf_len;
        char *buf_ptr = port->write_urb->transfer_buffer;

        spin_lock_irqsave(&priv->lock, flags);
        *buf_ptr++ = IUU_UART_ESC;
        *buf_ptr++ = IUU_UART_TX;
        *buf_ptr++ = priv->writelen;

        memcpy(buf_ptr, priv->writebuf, priv->writelen);
        buf_len = priv->writelen;
        priv->writelen = 0;
        spin_unlock_irqrestore(&priv->lock, flags);
        dev_dbg(&port->dev, "%s - writing %i chars : %*ph\n", __func__,
                buf_len, buf_len, buf_ptr);
        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, buf_len + 3,
                          iuu_rxcmd, port);
        result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
        usb_serial_port_softint(port);
        return result;
}

static int iuu_read_buf(struct usb_serial_port *port, int len)
{
        int result;

        usb_fill_bulk_urb(port->read_urb, port->serial->dev,
                          usb_rcvbulkpipe(port->serial->dev,
                                          port->bulk_in_endpointAddress),
                          port->read_urb->transfer_buffer, len,
                          read_buf_callback, port);
        result = usb_submit_urb(port->read_urb, GFP_ATOMIC);
        return result;
}

static void iuu_uart_read_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
        int status = urb->status;
        int len = 0;
        unsigned char *data = urb->transfer_buffer;
        priv->poll++;

        if (status) {
                dev_dbg(&port->dev, "%s - status = %d\n", __func__, status);
                /* error stop all */
                return;
        }

        if (urb->actual_length == 1)
                len = (int) data[0];

        if (urb->actual_length > 1) {
                dev_dbg(&port->dev, "%s - urb->actual_length = %i\n", __func__,
                    urb->actual_length);
                return;
        }
        /* if len > 0 call readbuf */

        if (len > 0) {
                dev_dbg(&port->dev, "%s - call read buf - len to read is %i\n",
                        __func__, len);
                status = iuu_read_buf(port, len);
                return;
        }
        /* need to update status  ? */
        if (priv->poll > 99) {
                status = iuu_status(port);
                priv->poll = 0;
                return;
        }

        /* reset waiting ? */

        if (priv->reset == 1) {
                status = iuu_reset(port, 0xC);
                return;
        }
        /* Writebuf is waiting */
        spin_lock_irqsave(&priv->lock, flags);
        if (priv->writelen > 0) {
                spin_unlock_irqrestore(&priv->lock, flags);
                status = iuu_bulk_write(port);
                return;
        }
        spin_unlock_irqrestore(&priv->lock, flags);
        /* if nothing to write call again rxcmd */
        dev_dbg(&port->dev, "%s - rxcmd recall\n", __func__);
        iuu_led_activity_off(urb);
}

static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port,
                          const u8 *buf, int count)
{
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;

        spin_lock_irqsave(&priv->lock, flags);

        count = min(count, 256 - priv->writelen);
        if (count == 0)
                goto out;

        /* fill the buffer */
        memcpy(priv->writebuf + priv->writelen, buf, count);
        priv->writelen += count;
out:
        spin_unlock_irqrestore(&priv->lock, flags);

        return count;
}

static void read_rxcmd_callback(struct urb *urb)
{
        struct usb_serial_port *port = urb->context;
        int result;
        int status = urb->status;

        if (status) {
                /* error stop all */
                return;
        }

        usb_fill_bulk_urb(port->read_urb, port->serial->dev,
                          usb_rcvbulkpipe(port->serial->dev,
                                          port->bulk_in_endpointAddress),
                          port->read_urb->transfer_buffer, 256,
                          iuu_uart_read_callback, port);
        result = usb_submit_urb(port->read_urb, GFP_ATOMIC);
        dev_dbg(&port->dev, "%s - submit result = %d\n", __func__, result);
}

static int iuu_uart_on(struct usb_serial_port *port)
{
        int status;
        u8 *buf;

        buf = kmalloc(4, GFP_KERNEL);

        if (!buf)
                return -ENOMEM;

        buf[0] = IUU_UART_ENABLE;
        buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
        buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
        buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);

        status = bulk_immediate(port, buf, 4);
        if (status != IUU_OPERATION_OK) {
                dev_dbg(&port->dev, "%s - uart_on error\n", __func__);
                goto uart_enable_failed;
        }
        /*  iuu_reset() the card after iuu_uart_on() */
        status = iuu_uart_flush(port);
        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - uart_flush error\n", __func__);
uart_enable_failed:
        kfree(buf);
        return status;
}

/*  Disables the IUU UART (a.k.a. the Phoenix voiderface) */
static int iuu_uart_off(struct usb_serial_port *port)
{
        int status;
        u8 *buf;
        buf = kmalloc(1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
        buf[0] = IUU_UART_DISABLE;

        status = bulk_immediate(port, buf, 1);
        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - uart_off error\n", __func__);

        kfree(buf);
        return status;
}

static int iuu_uart_baud(struct usb_serial_port *port, u32 baud_base,
                         u32 *actual, u8 parity)
{
        int status;
        u32 baud;
        u8 *dataout;
        u8 DataCount = 0;
        u8 T1Frekvens = 0;
        u8 T1reload = 0;
        unsigned int T1FrekvensHZ = 0;

        dev_dbg(&port->dev, "%s - enter baud_base=%d\n", __func__, baud_base);
        dataout = kmalloc(5, GFP_KERNEL);

        if (!dataout)
                return -ENOMEM;
        /*baud = (((priv->clk / 35) * baud_base) / 100000); */
        baud = baud_base;

        if (baud < 1200 || baud > 230400) {
                kfree(dataout);
                return IUU_INVALID_PARAMETER;
        }
        if (baud > 977) {
                T1Frekvens = 3;
                T1FrekvensHZ = 500000;
        }

        if (baud > 3906) {
                T1Frekvens = 2;
                T1FrekvensHZ = 2000000;
        }

        if (baud > 11718) {
                T1Frekvens = 1;
                T1FrekvensHZ = 6000000;
        }

        if (baud > 46875) {
                T1Frekvens = 0;
                T1FrekvensHZ = 24000000;
        }

        T1reload = 256 - (u8) (T1FrekvensHZ / (baud * 2));

        /*  magic number here:  ENTER_FIRMWARE_UPDATE; */
        dataout[DataCount++] = IUU_UART_ESC;
        /*  magic number here:  CHANGE_BAUD; */
        dataout[DataCount++] = IUU_UART_CHANGE;
        dataout[DataCount++] = T1Frekvens;
        dataout[DataCount++] = T1reload;

        *actual = (T1FrekvensHZ / (256 - T1reload)) / 2;

        switch (parity & 0x0F) {
        case IUU_PARITY_NONE:
                dataout[DataCount++] = 0x00;
                break;
        case IUU_PARITY_EVEN:
                dataout[DataCount++] = 0x01;
                break;
        case IUU_PARITY_ODD:
                dataout[DataCount++] = 0x02;
                break;
        case IUU_PARITY_MARK:
                dataout[DataCount++] = 0x03;
                break;
        case IUU_PARITY_SPACE:
                dataout[DataCount++] = 0x04;
                break;
        default:
                kfree(dataout);
                return IUU_INVALID_PARAMETER;
        }

        switch (parity & 0xF0) {
        case IUU_ONE_STOP_BIT:
                dataout[DataCount - 1] |= IUU_ONE_STOP_BIT;
                break;

        case IUU_TWO_STOP_BITS:
                dataout[DataCount - 1] |= IUU_TWO_STOP_BITS;
                break;
        default:
                kfree(dataout);
                return IUU_INVALID_PARAMETER;
        }

        status = bulk_immediate(port, dataout, DataCount);
        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - uart_off error\n", __func__);
        kfree(dataout);
        return status;
}

static void iuu_set_termios(struct tty_struct *tty,
                            struct usb_serial_port *port,
                            const struct ktermios *old_termios)
{
        const u32 supported_mask = CMSPAR|PARENB|PARODD;
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned int cflag = tty->termios.c_cflag;
        int status;
        u32 actual;
        u32 parity;
        int csize = CS7;
        int baud;
        u32 newval = cflag & supported_mask;

        /* Just use the ospeed. ispeed should be the same. */
        baud = tty->termios.c_ospeed;

        dev_dbg(&port->dev, "%s - enter c_ospeed or baud=%d\n", __func__, baud);

        /* compute the parity parameter */
        parity = 0;
        if (cflag & CMSPAR) {        /* Using mark space */
                if (cflag & PARODD)
                        parity |= IUU_PARITY_SPACE;
                else
                        parity |= IUU_PARITY_MARK;
        } else if (!(cflag & PARENB)) {
                parity |= IUU_PARITY_NONE;
                csize = CS8;
        } else if (cflag & PARODD)
                parity |= IUU_PARITY_ODD;
        else
                parity |= IUU_PARITY_EVEN;

        parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);

        /* set it */
        status = iuu_uart_baud(port,
                        baud * priv->boost / 100,
                        &actual, parity);

        /* set the termios value to the real one, so the user now what has
         * changed. We support few fields so its easies to copy the old hw
         * settings back over and then adjust them
         */
        if (old_termios)
                tty_termios_copy_hw(&tty->termios, old_termios);
        if (status != 0)        /* Set failed - return old bits */
                return;
        /* Re-encode speed, parity and csize */
        tty_encode_baud_rate(tty, baud, baud);
        tty->termios.c_cflag &= ~(supported_mask|CSIZE);
        tty->termios.c_cflag |= newval | csize;
}

static void iuu_close(struct usb_serial_port *port)
{
        /* iuu_led (port,255,0,0,0); */

        iuu_uart_off(port);

        usb_kill_urb(port->write_urb);
        usb_kill_urb(port->read_urb);

        iuu_led(port, 0, 0, 0xF000, 0xFF);
}

static void iuu_init_termios(struct tty_struct *tty)
{
        tty->termios.c_cflag = B9600 | CS8 | CSTOPB | CREAD | PARENB | CLOCAL;
        tty->termios.c_ispeed = 9600;
        tty->termios.c_ospeed = 9600;
        tty->termios.c_lflag = 0;
        tty->termios.c_oflag = 0;
        tty->termios.c_iflag = 0;
}

static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
{
        struct usb_serial *serial = port->serial;
        struct device *dev = &port->dev;
        int result;
        int baud;
        u32 actual;
        struct iuu_private *priv = usb_get_serial_port_data(port);

        baud = tty->termios.c_ospeed;

        dev_dbg(dev, "%s - baud %d\n", __func__, baud);
        usb_clear_halt(serial->dev, port->write_urb->pipe);
        usb_clear_halt(serial->dev, port->read_urb->pipe);

        priv->poll = 0;

#define SOUP(a, b, c, d)  do { \
        result = usb_control_msg(port->serial->dev,        \
                                usb_sndctrlpipe(port->serial->dev, 0),        \
                                b, a, c, d, NULL, 0, 1000); \
        dev_dbg(dev, "0x%x:0x%x:0x%x:0x%x  %d\n", a, b, c, d, result); } while (0)

        /*  This is not UART related but IUU USB driver related or something */
        /*  like that. Basically no IUU will accept any commands from the USB */
        /*  host unless it has received the following message */
        /* sprintf(buf ,"%c%c%c%c",0x03,0x02,0x02,0x0); */

        SOUP(0x03, 0x02, 0x02, 0x0);

        iuu_led(port, 0xF000, 0xF000, 0, 0xFF);
        iuu_uart_on(port);
        if (boost < 100)
                boost = 100;
        priv->boost = boost;
        switch (clockmode) {
        case 2:                /*  3.680 Mhz */
                priv->clk = IUU_CLK_3680000;
                iuu_clk(port, IUU_CLK_3680000 * boost / 100);
                result =
                    iuu_uart_baud(port, baud * boost / 100, &actual,
                                  IUU_PARITY_EVEN);
                break;
        case 3:                /*  6.00 Mhz */
                iuu_clk(port, IUU_CLK_6000000 * boost / 100);
                priv->clk = IUU_CLK_6000000;
                /* Ratio of 6000000 to 3500000 for baud 9600 */
                result =
                    iuu_uart_baud(port, 16457 * boost / 100, &actual,
                                  IUU_PARITY_EVEN);
                break;
        default:                /*  3.579 Mhz */
                iuu_clk(port, IUU_CLK_3579000 * boost / 100);
                priv->clk = IUU_CLK_3579000;
                result =
                    iuu_uart_baud(port, baud * boost / 100, &actual,
                                  IUU_PARITY_EVEN);
        }

        /* set the cardin cardout signals */
        switch (cdmode) {
        case 0:
                iuu_cardin = 0;
                iuu_cardout = 0;
                break;
        case 1:
                iuu_cardin = TIOCM_CD;
                iuu_cardout =  0;
                break;
        case 2:
                iuu_cardin = 0;
                iuu_cardout = TIOCM_CD;
                break;
        case 3:
                iuu_cardin = TIOCM_DSR;
                iuu_cardout = 0;
                break;
        case 4:
                iuu_cardin = 0;
                iuu_cardout = TIOCM_DSR;
                break;
        case 5:
                iuu_cardin = TIOCM_CTS;
                iuu_cardout = 0;
                break;
        case 6:
                iuu_cardin = 0;
                iuu_cardout = TIOCM_CTS;
                break;
        case 7:
                iuu_cardin = TIOCM_RNG;
                iuu_cardout = 0;
                break;
        case 8:
                iuu_cardin = 0;
                iuu_cardout = TIOCM_RNG;
        }

        iuu_uart_flush(port);

        dev_dbg(dev, "%s - initialization done\n", __func__);

        memset(port->write_urb->transfer_buffer, IUU_UART_RX, 1);
        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
                          port->write_urb->transfer_buffer, 1,
                          read_rxcmd_callback, port);
        result = usb_submit_urb(port->write_urb, GFP_KERNEL);
        if (result) {
                dev_err(dev, "%s - failed submitting read urb, error %d\n", __func__, result);
                iuu_close(port);
        } else {
                dev_dbg(dev, "%s - rxcmd OK\n", __func__);
        }

        return result;
}

/* how to change VCC */
static int iuu_vcc_set(struct usb_serial_port *port, unsigned int vcc)
{
        int status;
        u8 *buf;

        buf = kmalloc(5, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;

        buf[0] = IUU_SET_VCC;
        buf[1] = vcc & 0xFF;
        buf[2] = (vcc >> 8) & 0xFF;
        buf[3] = (vcc >> 16) & 0xFF;
        buf[4] = (vcc >> 24) & 0xFF;

        status = bulk_immediate(port, buf, 5);
        kfree(buf);

        if (status != IUU_OPERATION_OK)
                dev_dbg(&port->dev, "%s - vcc error status = %2x\n", __func__, status);
        else
                dev_dbg(&port->dev, "%s - vcc OK !\n", __func__);

        return status;
}

/*
 * Sysfs Attributes
 */

static ssize_t vcc_mode_show(struct device *dev,
        struct device_attribute *attr, char *buf)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct iuu_private *priv = usb_get_serial_port_data(port);

        return sprintf(buf, "%d\n", priv->vcc);
}

static ssize_t vcc_mode_store(struct device *dev,
        struct device_attribute *attr, const char *buf, size_t count)
{
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct iuu_private *priv = usb_get_serial_port_data(port);
        unsigned long v;

        if (kstrtoul(buf, 10, &v)) {
                dev_err(dev, "%s - vcc_mode: %s is not a unsigned long\n",
                                __func__, buf);
                goto fail_store_vcc_mode;
        }

        dev_dbg(dev, "%s: setting vcc_mode = %ld\n", __func__, v);

        if ((v != 3) && (v != 5)) {
                dev_err(dev, "%s - vcc_mode %ld is invalid\n", __func__, v);
        } else {
                iuu_vcc_set(port, v);
                priv->vcc = v;
        }
fail_store_vcc_mode:
        return count;
}
static DEVICE_ATTR_RW(vcc_mode);

static int iuu_create_sysfs_attrs(struct usb_serial_port *port)
{
        return device_create_file(&port->dev, &dev_attr_vcc_mode);
}

static int iuu_remove_sysfs_attrs(struct usb_serial_port *port)
{
        device_remove_file(&port->dev, &dev_attr_vcc_mode);
        return 0;
}

/*
 * End Sysfs Attributes
 */

static struct usb_serial_driver iuu_device = {
        .driver = {
                   .owner = THIS_MODULE,
                   .name = "iuu_phoenix",
                   },
        .id_table = id_table,
        .num_ports = 1,
        .num_bulk_in = 1,
        .num_bulk_out = 1,
        .bulk_in_size = 512,
        .bulk_out_size = 512,
        .open = iuu_open,
        .close = iuu_close,
        .write = iuu_uart_write,
        .read_bulk_callback = iuu_uart_read_callback,
        .tiocmget = iuu_tiocmget,
        .tiocmset = iuu_tiocmset,
        .set_termios = iuu_set_termios,
        .init_termios = iuu_init_termios,
        .port_probe = iuu_port_probe,
        .port_remove = iuu_port_remove,
};

static struct usb_serial_driver * const serial_drivers[] = {
        &iuu_device, NULL
};

module_usb_serial_driver(serial_drivers, id_table);

MODULE_AUTHOR("Alain Degreffe eczema@ecze.com");

MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");

module_param(xmas, bool, 0644);
MODULE_PARM_DESC(xmas, "Xmas colors enabled or not");

module_param(boost, int, 0644);
MODULE_PARM_DESC(boost, "Card overclock boost (in percent 100-500)");

module_param(clockmode, int, 0644);
MODULE_PARM_DESC(clockmode, "Card clock mode (1=3.579 MHz, 2=3.680 MHz, "
                "3=6 Mhz)");

module_param(cdmode, int, 0644);
MODULE_PARM_DESC(cdmode, "Card detect mode (0=none, 1=CD, 2=!CD, 3=DSR, "
                 "4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING)");

module_param(vcc_default, int, 0644);
MODULE_PARM_DESC(vcc_default, "Set default VCC (either 3 for 3.3V or 5 "
                "for 5V). Default to 5.");




























































   61 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __FS_NOTIFY_FSNOTIFY_H_
#define __FS_NOTIFY_FSNOTIFY_H_

#include <linux/list.h>
#include <linux/fsnotify.h>
#include <linux/srcu.h>
#include <linux/types.h>

#include "../mount.h"

static inline struct inode *fsnotify_conn_inode(
                                struct fsnotify_mark_connector *conn)
{
        return container_of(conn->obj, struct inode, i_fsnotify_marks);
}

static inline struct mount *fsnotify_conn_mount(
                                struct fsnotify_mark_connector *conn)
{
        return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
}

static inline struct super_block *fsnotify_conn_sb(
                                struct fsnotify_mark_connector *conn)
{
        return container_of(conn->obj, struct super_block, s_fsnotify_marks);
}

static inline struct super_block *fsnotify_connector_sb(
                                struct fsnotify_mark_connector *conn)
{
        switch (conn->type) {
        case FSNOTIFY_OBJ_TYPE_INODE:
                return fsnotify_conn_inode(conn)->i_sb;
        case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
                return fsnotify_conn_mount(conn)->mnt.mnt_sb;
        case FSNOTIFY_OBJ_TYPE_SB:
                return fsnotify_conn_sb(conn);
        default:
                return NULL;
        }
}

/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);

/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;

/* compare two groups for sorting of marks lists */
extern int fsnotify_compare_groups(struct fsnotify_group *a,
                                   struct fsnotify_group *b);

/* Destroy all marks attached to an object via connector */
extern void fsnotify_destroy_marks(fsnotify_connp_t *connp);
/* run the list of all marks associated with inode and destroy them */
static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
{
        fsnotify_destroy_marks(&inode->i_fsnotify_marks);
}
/* run the list of all marks associated with vfsmount and destroy them */
static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
        fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
}
/* run the list of all marks associated with sb and destroy them */
static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
{
        fsnotify_destroy_marks(&sb->s_fsnotify_marks);
}

/*
 * update the dentry->d_flags of all of inode's children to indicate if inode cares
 * about events that happen to its children.
 */
extern void __fsnotify_update_child_dentry_flags(struct inode *inode);

extern struct kmem_cache *fsnotify_mark_connector_cachep;

#endif        /* __FS_NOTIFY_FSNOTIFY_H_ */






















































































































































































































































































































































































































































































































































































































































































































    5 












































    5 






    5 




































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  acpi_bus.c - ACPI Bus Driver ($Revision: 80 $)
 *
 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
 */

#define pr_fmt(fmt) "ACPI: " fmt

#include <linux/module.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/proc_fs.h>
#include <linux/acpi.h>
#include <linux/slab.h>
#include <linux/regulator/machine.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
#include <linux/delay.h>
#ifdef CONFIG_X86
#include <asm/mpspec.h>
#include <linux/dmi.h>
#endif
#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
#include <linux/suspend.h>
#include <linux/prmt.h>

#include "internal.h"

struct acpi_device *acpi_root;
struct proc_dir_entry *acpi_root_dir;
EXPORT_SYMBOL(acpi_root_dir);

#ifdef CONFIG_X86
#ifdef CONFIG_ACPI_CUSTOM_DSDT
static inline int set_copy_dsdt(const struct dmi_system_id *id)
{
        return 0;
}
#else
static int set_copy_dsdt(const struct dmi_system_id *id)
{
        pr_notice("%s detected - force copy of DSDT to local memory\n", id->ident);
        acpi_gbl_copy_dsdt_locally = 1;
        return 0;
}
#endif

static const struct dmi_system_id dsdt_dmi_table[] __initconst = {
        /*
         * Invoke DSDT corruption work-around on all Toshiba Satellite.
         * https://bugzilla.kernel.org/show_bug.cgi?id=14679
         */
        {
         .callback = set_copy_dsdt,
         .ident = "TOSHIBA Satellite",
         .matches = {
                DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
                DMI_MATCH(DMI_PRODUCT_NAME, "Satellite"),
                },
        },
        {}
};
#endif

/* --------------------------------------------------------------------------
                                Device Management
   -------------------------------------------------------------------------- */

acpi_status acpi_bus_get_status_handle(acpi_handle handle,
                                       unsigned long long *sta)
{
        acpi_status status;

        status = acpi_evaluate_integer(handle, "_STA", NULL, sta);
        if (ACPI_SUCCESS(status))
                return AE_OK;

        if (status == AE_NOT_FOUND) {
                *sta = ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED |
                       ACPI_STA_DEVICE_UI      | ACPI_STA_DEVICE_FUNCTIONING;
                return AE_OK;
        }
        return status;
}
EXPORT_SYMBOL_GPL(acpi_bus_get_status_handle);

int acpi_bus_get_status(struct acpi_device *device)
{
        acpi_status status;
        unsigned long long sta;

        if (acpi_device_override_status(device, &sta)) {
                acpi_set_device_status(device, sta);
                return 0;
        }

        /* Battery devices must have their deps met before calling _STA */
        if (acpi_device_is_battery(device) && device->dep_unmet) {
                acpi_set_device_status(device, 0);
                return 0;
        }

        status = acpi_bus_get_status_handle(device->handle, &sta);
        if (ACPI_FAILURE(status))
                return -ENODEV;

        acpi_set_device_status(device, sta);

        if (device->status.functional && !device->status.present) {
                pr_debug("Device [%s] status [%08x]: functional but not present\n",
                         device->pnp.bus_id, (u32)sta);
        }

        pr_debug("Device [%s] status [%08x]\n", device->pnp.bus_id, (u32)sta);
        return 0;
}
EXPORT_SYMBOL(acpi_bus_get_status);

void acpi_bus_private_data_handler(acpi_handle handle,
                                   void *context)
{
        return;
}
EXPORT_SYMBOL(acpi_bus_private_data_handler);

int acpi_bus_attach_private_data(acpi_handle handle, void *data)
{
        acpi_status status;

        status = acpi_attach_data(handle,
                        acpi_bus_private_data_handler, data);
        if (ACPI_FAILURE(status)) {
                acpi_handle_debug(handle, "Error attaching device data\n");
                return -ENODEV;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(acpi_bus_attach_private_data);

int acpi_bus_get_private_data(acpi_handle handle, void **data)
{
        acpi_status status;

        if (!data)
                return -EINVAL;

        status = acpi_get_data(handle, acpi_bus_private_data_handler, data);
        if (ACPI_FAILURE(status)) {
                acpi_handle_debug(handle, "No context for object\n");
                return -ENODEV;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(acpi_bus_get_private_data);

void acpi_bus_detach_private_data(acpi_handle handle)
{
        acpi_detach_data(handle, acpi_bus_private_data_handler);
}
EXPORT_SYMBOL_GPL(acpi_bus_detach_private_data);

static void acpi_print_osc_error(acpi_handle handle,
                                 struct acpi_osc_context *context, char *error)
{
        int i;

        acpi_handle_debug(handle, "(%s): %s\n", context->uuid_str, error);

        pr_debug("_OSC request data:");
        for (i = 0; i < context->cap.length; i += sizeof(u32))
                pr_debug(" %x", *((u32 *)(context->cap.pointer + i)));

        pr_debug("\n");
}

acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
{
        acpi_status status;
        struct acpi_object_list input;
        union acpi_object in_params[4];
        union acpi_object *out_obj;
        guid_t guid;
        u32 errors;
        struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};

        if (!context)
                return AE_ERROR;
        if (guid_parse(context->uuid_str, &guid))
                return AE_ERROR;
        context->ret.length = ACPI_ALLOCATE_BUFFER;
        context->ret.pointer = NULL;

        /* Setting up input parameters */
        input.count = 4;
        input.pointer = in_params;
        in_params[0].type                 = ACPI_TYPE_BUFFER;
        in_params[0].buffer.length         = 16;
        in_params[0].buffer.pointer        = (u8 *)&guid;
        in_params[1].type                 = ACPI_TYPE_INTEGER;
        in_params[1].integer.value         = context->rev;
        in_params[2].type                 = ACPI_TYPE_INTEGER;
        in_params[2].integer.value        = context->cap.length/sizeof(u32);
        in_params[3].type                = ACPI_TYPE_BUFFER;
        in_params[3].buffer.length         = context->cap.length;
        in_params[3].buffer.pointer         = context->cap.pointer;

        status = acpi_evaluate_object(handle, "_OSC", &input, &output);
        if (ACPI_FAILURE(status))
                return status;

        if (!output.length)
                return AE_NULL_OBJECT;

        out_obj = output.pointer;
        if (out_obj->type != ACPI_TYPE_BUFFER
                || out_obj->buffer.length != context->cap.length) {
                acpi_print_osc_error(handle, context,
                        "_OSC evaluation returned wrong type");
                status = AE_TYPE;
                goto out_kfree;
        }
        /* Need to ignore the bit0 in result code */
        errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
        if (errors) {
                if (errors & OSC_REQUEST_ERROR)
                        acpi_print_osc_error(handle, context,
                                "_OSC request failed");
                if (errors & OSC_INVALID_UUID_ERROR)
                        acpi_print_osc_error(handle, context,
                                "_OSC invalid UUID");
                if (errors & OSC_INVALID_REVISION_ERROR)
                        acpi_print_osc_error(handle, context,
                                "_OSC invalid revision");
                if (errors & OSC_CAPABILITIES_MASK_ERROR) {
                        if (((u32 *)context->cap.pointer)[OSC_QUERY_DWORD]
                            & OSC_QUERY_ENABLE)
                                goto out_success;
                        status = AE_SUPPORT;
                        goto out_kfree;
                }
                status = AE_ERROR;
                goto out_kfree;
        }
out_success:
        context->ret.length = out_obj->buffer.length;
        context->ret.pointer = kmemdup(out_obj->buffer.pointer,
                                       context->ret.length, GFP_KERNEL);
        if (!context->ret.pointer) {
                status =  AE_NO_MEMORY;
                goto out_kfree;
        }
        status =  AE_OK;

out_kfree:
        kfree(output.pointer);
        return status;
}
EXPORT_SYMBOL(acpi_run_osc);

bool osc_sb_apei_support_acked;

/*
 * ACPI 6.0 Section 8.4.4.2 Idle State Coordination
 * OSPM supports platform coordinated low power idle(LPI) states
 */
bool osc_pc_lpi_support_confirmed;
EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);

/*
 * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities':
 *   Starting with ACPI Specification 6.2, all _CPC registers can be in
 *   PCC, System Memory, System IO, or Functional Fixed Hardware address
 *   spaces. OSPM support for this more flexible register space scheme is
 *   indicated by the “Flexible Address Space for CPPC Registers” _OSC bit.
 *
 * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in:
 * - PCC or Functional Fixed Hardware address space if defined
 * - SystemMemory address space (NULL register) if not defined
 */
bool osc_cpc_flexible_adr_space_confirmed;
EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed);

/*
 * ACPI 6.4 Operating System Capabilities for USB.
 */
bool osc_sb_native_usb4_support_confirmed;
EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed);

bool osc_sb_cppc2_support_acked;

static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
static void acpi_bus_osc_negotiate_platform_control(void)
{
        u32 capbuf[2], *capbuf_ret;
        struct acpi_osc_context context = {
                .uuid_str = sb_uuid_str,
                .rev = 1,
                .cap.length = 8,
                .cap.pointer = capbuf,
        };
        acpi_handle handle;

        capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
        capbuf[OSC_SUPPORT_DWORD] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
        if (IS_ENABLED(CONFIG_ACPI_PROCESSOR_AGGREGATOR))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PAD_SUPPORT;
        if (IS_ENABLED(CONFIG_ACPI_PROCESSOR))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PPC_OST_SUPPORT;

        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT;
        if (IS_ENABLED(CONFIG_ACPI_PRMT))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PRM_SUPPORT;
        if (IS_ENABLED(CONFIG_ACPI_FFH))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_FFH_OPR_SUPPORT;

#ifdef CONFIG_ARM64
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
#endif
#ifdef CONFIG_X86
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
#endif

#ifdef CONFIG_ACPI_CPPC_LIB
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
#endif

        capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE;

        if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;

        if (IS_ENABLED(CONFIG_USB4))
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_NATIVE_USB4_SUPPORT;

        if (!ghes_disable)
                capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT;
        if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
                return;

        if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
                return;

        capbuf_ret = context.ret.pointer;
        if (context.ret.length <= OSC_SUPPORT_DWORD) {
                kfree(context.ret.pointer);
                return;
        }

        /*
         * Now run _OSC again with query flag clear and with the caps
         * supported by both the OS and the platform.
         */
        capbuf[OSC_QUERY_DWORD] = 0;
        capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
        kfree(context.ret.pointer);

        if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
                return;

        capbuf_ret = context.ret.pointer;
        if (context.ret.length > OSC_SUPPORT_DWORD) {
#ifdef CONFIG_ACPI_CPPC_LIB
                osc_sb_cppc2_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPCV2_SUPPORT;
#endif

                osc_sb_apei_support_acked =
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
                osc_pc_lpi_support_confirmed =
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
                osc_sb_native_usb4_support_confirmed =
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
                osc_cpc_flexible_adr_space_confirmed =
                        capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
        }

        kfree(context.ret.pointer);
}

/*
 * Native control of USB4 capabilities. If any of the tunneling bits is
 * set it means OS is in control and we use software based connection
 * manager.
 */
u32 osc_sb_native_usb4_control;
EXPORT_SYMBOL_GPL(osc_sb_native_usb4_control);

static void acpi_bus_decode_usb_osc(const char *msg, u32 bits)
{
        pr_info("%s USB3%c DisplayPort%c PCIe%c XDomain%c\n", msg,
               (bits & OSC_USB_USB3_TUNNELING) ? '+' : '-',
               (bits & OSC_USB_DP_TUNNELING) ? '+' : '-',
               (bits & OSC_USB_PCIE_TUNNELING) ? '+' : '-',
               (bits & OSC_USB_XDOMAIN) ? '+' : '-');
}

static u8 sb_usb_uuid_str[] = "23A0D13A-26AB-486C-9C5F-0FFA525A575A";
static void acpi_bus_osc_negotiate_usb_control(void)
{
        u32 capbuf[3], *capbuf_ret;
        struct acpi_osc_context context = {
                .uuid_str = sb_usb_uuid_str,
                .rev = 1,
                .cap.length = sizeof(capbuf),
                .cap.pointer = capbuf,
        };
        acpi_handle handle;
        acpi_status status;
        u32 control;

        if (!osc_sb_native_usb4_support_confirmed)
                return;

        if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
                return;

        control = OSC_USB_USB3_TUNNELING | OSC_USB_DP_TUNNELING |
                  OSC_USB_PCIE_TUNNELING | OSC_USB_XDOMAIN;

        /*
         * Run _OSC first with query bit set, trying to get control over
         * all tunneling. The platform can then clear out bits in the
         * control dword that it does not want to grant to the OS.
         */
        capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
        capbuf[OSC_SUPPORT_DWORD] = 0;
        capbuf[OSC_CONTROL_DWORD] = control;

        status = acpi_run_osc(handle, &context);
        if (ACPI_FAILURE(status))
                return;

        if (context.ret.length != sizeof(capbuf)) {
                pr_info("USB4 _OSC: returned invalid length buffer\n");
                goto out_free;
        }

        /*
         * Run _OSC again now with query bit clear and the control dword
         * matching what the platform granted (which may not have all
         * the control bits set).
         */
        capbuf_ret = context.ret.pointer;

        capbuf[OSC_QUERY_DWORD] = 0;
        capbuf[OSC_CONTROL_DWORD] = capbuf_ret[OSC_CONTROL_DWORD];

        kfree(context.ret.pointer);

        status = acpi_run_osc(handle, &context);
        if (ACPI_FAILURE(status))
                return;

        if (context.ret.length != sizeof(capbuf)) {
                pr_info("USB4 _OSC: returned invalid length buffer\n");
                goto out_free;
        }

        osc_sb_native_usb4_control =
                control & acpi_osc_ctx_get_pci_control(&context);

        acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control);
        acpi_bus_decode_usb_osc("USB4 _OSC: OS controls",
                                osc_sb_native_usb4_control);

out_free:
        kfree(context.ret.pointer);
}

/* --------------------------------------------------------------------------
                             Notification Handling
   -------------------------------------------------------------------------- */

/**
 * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler
 * @handle: Target ACPI object.
 * @type: Notification type.
 * @data: Ignored.
 *
 * This only handles notifications related to device hotplug.
 */
static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
{
        struct acpi_device *adev;

        switch (type) {
        case ACPI_NOTIFY_BUS_CHECK:
                acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
                break;

        case ACPI_NOTIFY_DEVICE_CHECK:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
                break;

        case ACPI_NOTIFY_DEVICE_WAKE:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
                return;

        case ACPI_NOTIFY_EJECT_REQUEST:
                acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
                break;

        case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
                acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
                /* TBD: Exactly what does 'light' mean? */
                return;

        case ACPI_NOTIFY_FREQUENCY_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a frequency mismatch\n");
                return;

        case ACPI_NOTIFY_BUS_MODE_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a bus mode mismatch\n");
                return;

        case ACPI_NOTIFY_POWER_FAULT:
                acpi_handle_err(handle, "Device has suffered a power fault\n");
                return;

        default:
                acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
                return;
        }

        adev = acpi_get_acpi_dev(handle);

        if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
                return;

        acpi_put_acpi_dev(adev);

        acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
}

static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
{
        struct acpi_device *device = data;
        struct acpi_driver *acpi_drv = to_acpi_driver(device->dev.driver);

        acpi_drv->ops.notify(device, event);
}

static int acpi_device_install_notify_handler(struct acpi_device *device,
                                              struct acpi_driver *acpi_drv)
{
        u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
                                ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
        acpi_status status;

        status = acpi_install_notify_handler(device->handle, type,
                                             acpi_notify_device, device);
        if (ACPI_FAILURE(status))
                return -EINVAL;

        return 0;
}

static void acpi_device_remove_notify_handler(struct acpi_device *device,
                                              struct acpi_driver *acpi_drv)
{
        u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
                                ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;

        acpi_remove_notify_handler(device->handle, type,
                                   acpi_notify_device);

        acpi_os_wait_events_complete();
}

int acpi_dev_install_notify_handler(struct acpi_device *adev,
                                    u32 handler_type,
                                    acpi_notify_handler handler, void *context)
{
        acpi_status status;

        status = acpi_install_notify_handler(adev->handle, handler_type,
                                             handler, context);
        if (ACPI_FAILURE(status))
                return -ENODEV;

        return 0;
}
EXPORT_SYMBOL_GPL(acpi_dev_install_notify_handler);

void acpi_dev_remove_notify_handler(struct acpi_device *adev,
                                    u32 handler_type,
                                    acpi_notify_handler handler)
{
        acpi_remove_notify_handler(adev->handle, handler_type, handler);
        acpi_os_wait_events_complete();
}
EXPORT_SYMBOL_GPL(acpi_dev_remove_notify_handler);

/* Handle events targeting \_SB device (at present only graceful shutdown) */

#define ACPI_SB_NOTIFY_SHUTDOWN_REQUEST 0x81
#define ACPI_SB_INDICATE_INTERVAL        10000

static void sb_notify_work(struct work_struct *dummy)
{
        acpi_handle sb_handle;

        orderly_poweroff(true);

        /*
         * After initiating graceful shutdown, the ACPI spec requires OSPM
         * to evaluate _OST method once every 10seconds to indicate that
         * the shutdown is in progress
         */
        acpi_get_handle(NULL, "\\_SB", &sb_handle);
        while (1) {
                pr_info("Graceful shutdown in progress.\n");
                acpi_evaluate_ost(sb_handle, ACPI_OST_EC_OSPM_SHUTDOWN,
                                ACPI_OST_SC_OS_SHUTDOWN_IN_PROGRESS, NULL);
                msleep(ACPI_SB_INDICATE_INTERVAL);
        }
}

static void acpi_sb_notify(acpi_handle handle, u32 event, void *data)
{
        static DECLARE_WORK(acpi_sb_work, sb_notify_work);

        if (event == ACPI_SB_NOTIFY_SHUTDOWN_REQUEST) {
                if (!work_busy(&acpi_sb_work))
                        schedule_work(&acpi_sb_work);
        } else {
                pr_warn("event %x is not supported by \\_SB device\n", event);
        }
}

static int __init acpi_setup_sb_notify_handler(void)
{
        acpi_handle sb_handle;

        if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &sb_handle)))
                return -ENXIO;

        if (ACPI_FAILURE(acpi_install_notify_handler(sb_handle, ACPI_DEVICE_NOTIFY,
                                                acpi_sb_notify, NULL)))
                return -EINVAL;

        return 0;
}

/* --------------------------------------------------------------------------
                             Device Matching
   -------------------------------------------------------------------------- */

/**
 * acpi_get_first_physical_node - Get first physical node of an ACPI device
 * @adev:        ACPI device in question
 *
 * Return: First physical node of ACPI device @adev
 */
struct device *acpi_get_first_physical_node(struct acpi_device *adev)
{
        struct mutex *physical_node_lock = &adev->physical_node_lock;
        struct device *phys_dev;

        mutex_lock(physical_node_lock);
        if (list_empty(&adev->physical_node_list)) {
                phys_dev = NULL;
        } else {
                const struct acpi_device_physical_node *node;

                node = list_first_entry(&adev->physical_node_list,
                                        struct acpi_device_physical_node, node);

                phys_dev = node->dev;
        }
        mutex_unlock(physical_node_lock);
        return phys_dev;
}
EXPORT_SYMBOL_GPL(acpi_get_first_physical_node);

static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev,
                                                      const struct device *dev)
{
        const struct device *phys_dev = acpi_get_first_physical_node(adev);

        return phys_dev && phys_dev == dev ? adev : NULL;
}

/**
 * acpi_device_is_first_physical_node - Is given dev first physical node
 * @adev: ACPI companion device
 * @dev: Physical device to check
 *
 * Function checks if given @dev is the first physical devices attached to
 * the ACPI companion device. This distinction is needed in some cases
 * where the same companion device is shared between many physical devices.
 *
 * Note that the caller have to provide valid @adev pointer.
 */
bool acpi_device_is_first_physical_node(struct acpi_device *adev,
                                        const struct device *dev)
{
        return !!acpi_primary_dev_companion(adev, dev);
}

/*
 * acpi_companion_match() - Can we match via ACPI companion device
 * @dev: Device in question
 *
 * Check if the given device has an ACPI companion and if that companion has
 * a valid list of PNP IDs, and if the device is the first (primary) physical
 * device associated with it.  Return the companion pointer if that's the case
 * or NULL otherwise.
 *
 * If multiple physical devices are attached to a single ACPI companion, we need
 * to be careful.  The usage scenario for this kind of relationship is that all
 * of the physical devices in question use resources provided by the ACPI
 * companion.  A typical case is an MFD device where all the sub-devices share
 * the parent's ACPI companion.  In such cases we can only allow the primary
 * (first) physical device to be matched with the help of the companion's PNP
 * IDs.
 *
 * Additional physical devices sharing the ACPI companion can still use
 * resources available from it but they will be matched normally using functions
 * provided by their bus types (and analogously for their modalias).
 */
const struct acpi_device *acpi_companion_match(const struct device *dev)
{
        struct acpi_device *adev;

        adev = ACPI_COMPANION(dev);
        if (!adev)
                return NULL;

        if (list_empty(&adev->pnp.ids))
                return NULL;

        return acpi_primary_dev_companion(adev, dev);
}

/**
 * acpi_of_match_device - Match device object using the "compatible" property.
 * @adev: ACPI device object to match.
 * @of_match_table: List of device IDs to match against.
 * @of_id: OF ID if matched
 *
 * If @dev has an ACPI companion which has ACPI_DT_NAMESPACE_HID in its list of
 * identifiers and a _DSD object with the "compatible" property, use that
 * property to match against the given list of identifiers.
 */
static bool acpi_of_match_device(const struct acpi_device *adev,
                                 const struct of_device_id *of_match_table,
                                 const struct of_device_id **of_id)
{
        const union acpi_object *of_compatible, *obj;
        int i, nval;

        if (!adev)
                return false;

        of_compatible = adev->data.of_compatible;
        if (!of_match_table || !of_compatible)
                return false;

        if (of_compatible->type == ACPI_TYPE_PACKAGE) {
                nval = of_compatible->package.count;
                obj = of_compatible->package.elements;
        } else { /* Must be ACPI_TYPE_STRING. */
                nval = 1;
                obj = of_compatible;
        }
        /* Now we can look for the driver DT compatible strings */
        for (i = 0; i < nval; i++, obj++) {
                const struct of_device_id *id;

                for (id = of_match_table; id->compatible[0]; id++)
                        if (!strcasecmp(obj->string.pointer, id->compatible)) {
                                if (of_id)
                                        *of_id = id;
                                return true;
                        }
        }

        return false;
}

static bool acpi_of_modalias(struct acpi_device *adev,
                             char *modalias, size_t len)
{
        const union acpi_object *of_compatible;
        const union acpi_object *obj;
        const char *str, *chr;

        of_compatible = adev->data.of_compatible;
        if (!of_compatible)
                return false;

        if (of_compatible->type == ACPI_TYPE_PACKAGE)
                obj = of_compatible->package.elements;
        else /* Must be ACPI_TYPE_STRING. */
                obj = of_compatible;

        str = obj->string.pointer;
        chr = strchr(str, ',');
        strscpy(modalias, chr ? chr + 1 : str, len);

        return true;
}

/**
 * acpi_set_modalias - Set modalias using "compatible" property or supplied ID
 * @adev:        ACPI device object to match
 * @default_id:        ID string to use as default if no compatible string found
 * @modalias:   Pointer to buffer that modalias value will be copied into
 * @len:        Length of modalias buffer
 *
 * This is a counterpart of of_alias_from_compatible() for struct acpi_device
 * objects. If there is a compatible string for @adev, it will be copied to
 * @modalias with the vendor prefix stripped; otherwise, @default_id will be
 * used.
 */
void acpi_set_modalias(struct acpi_device *adev, const char *default_id,
                       char *modalias, size_t len)
{
        if (!acpi_of_modalias(adev, modalias, len))
                strscpy(modalias, default_id, len);
}
EXPORT_SYMBOL_GPL(acpi_set_modalias);

static bool __acpi_match_device_cls(const struct acpi_device_id *id,
                                    struct acpi_hardware_id *hwid)
{
        int i, msk, byte_shift;
        char buf[3];

        if (!id->cls)
                return false;

        /* Apply class-code bitmask, before checking each class-code byte */
        for (i = 1; i <= 3; i++) {
                byte_shift = 8 * (3 - i);
                msk = (id->cls_msk >> byte_shift) & 0xFF;
                if (!msk)
                        continue;

                sprintf(buf, "%02x", (id->cls >> byte_shift) & msk);
                if (strncmp(buf, &hwid->id[(i - 1) * 2], 2))
                        return false;
        }
        return true;
}

static bool __acpi_match_device(const struct acpi_device *device,
                                const struct acpi_device_id *acpi_ids,
                                const struct of_device_id *of_ids,
                                const struct acpi_device_id **acpi_id,
                                const struct of_device_id **of_id)
{
        const struct acpi_device_id *id;
        struct acpi_hardware_id *hwid;

        /*
         * If the device is not present, it is unnecessary to load device
         * driver for it.
         */
        if (!device || !device->status.present)
                return false;

        list_for_each_entry(hwid, &device->pnp.ids, list) {
                /* First, check the ACPI/PNP IDs provided by the caller. */
                if (acpi_ids) {
                        for (id = acpi_ids; id->id[0] || id->cls; id++) {
                                if (id->id[0] && !strcmp((char *)id->id, hwid->id))
                                        goto out_acpi_match;
                                if (id->cls && __acpi_match_device_cls(id, hwid))
                                        goto out_acpi_match;
                        }
                }

                /*
                 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
                 * "compatible" property if found.
                 */
                if (!strcmp(ACPI_DT_NAMESPACE_HID, hwid->id))
                        return acpi_of_match_device(device, of_ids, of_id);
        }
        return false;

out_acpi_match:
        if (acpi_id)
                *acpi_id = id;
        return true;
}

/**
 * acpi_match_acpi_device - Match an ACPI device against a given list of ACPI IDs
 * @ids: Array of struct acpi_device_id objects to match against.
 * @adev: The ACPI device pointer to match.
 *
 * Match the ACPI device @adev against a given list of ACPI IDs @ids.
 *
 * Return:
 * a pointer to the first matching ACPI ID on success or %NULL on failure.
 */
const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids,
                                                    const struct acpi_device *adev)
{
        const struct acpi_device_id *id = NULL;

        __acpi_match_device(adev, ids, NULL, &id, NULL);
        return id;
}
EXPORT_SYMBOL_GPL(acpi_match_acpi_device);

/**
 * acpi_match_device - Match a struct device against a given list of ACPI IDs
 * @ids: Array of struct acpi_device_id object to match against.
 * @dev: The device structure to match.
 *
 * Check if @dev has a valid ACPI handle and if there is a struct acpi_device
 * object for that handle and use that object to match against a given list of
 * device IDs.
 *
 * Return a pointer to the first matching ID on success or %NULL on failure.
 */
const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
                                               const struct device *dev)
{
        return acpi_match_acpi_device(ids, acpi_companion_match(dev));
}
EXPORT_SYMBOL_GPL(acpi_match_device);

static const void *acpi_of_device_get_match_data(const struct device *dev)
{
        struct acpi_device *adev = ACPI_COMPANION(dev);
        const struct of_device_id *match = NULL;

        if (!acpi_of_match_device(adev, dev->driver->of_match_table, &match))
                return NULL;

        return match->data;
}

const void *acpi_device_get_match_data(const struct device *dev)
{
        const struct acpi_device_id *acpi_ids = dev->driver->acpi_match_table;
        const struct acpi_device_id *match;

        if (!acpi_ids)
                return acpi_of_device_get_match_data(dev);

        match = acpi_match_device(acpi_ids, dev);
        if (!match)
                return NULL;

        return (const void *)match->driver_data;
}
EXPORT_SYMBOL_GPL(acpi_device_get_match_data);

int acpi_match_device_ids(struct acpi_device *device,
                          const struct acpi_device_id *ids)
{
        return __acpi_match_device(device, ids, NULL, NULL, NULL) ? 0 : -ENOENT;
}
EXPORT_SYMBOL(acpi_match_device_ids);

bool acpi_driver_match_device(struct device *dev,
                              const struct device_driver *drv)
{
        const struct acpi_device_id *acpi_ids = drv->acpi_match_table;
        const struct of_device_id *of_ids = drv->of_match_table;

        if (!acpi_ids)
                return acpi_of_match_device(ACPI_COMPANION(dev), of_ids, NULL);

        return __acpi_match_device(acpi_companion_match(dev), acpi_ids, of_ids, NULL, NULL);
}
EXPORT_SYMBOL_GPL(acpi_driver_match_device);

/* --------------------------------------------------------------------------
                              ACPI Driver Management
   -------------------------------------------------------------------------- */

/**
 * acpi_bus_register_driver - register a driver with the ACPI bus
 * @driver: driver being registered
 *
 * Registers a driver with the ACPI bus.  Searches the namespace for all
 * devices that match the driver's criteria and binds.  Returns zero for
 * success or a negative error status for failure.
 */
int acpi_bus_register_driver(struct acpi_driver *driver)
{
        if (acpi_disabled)
                return -ENODEV;
        driver->drv.name = driver->name;
        driver->drv.bus = &acpi_bus_type;
        driver->drv.owner = driver->owner;

        return driver_register(&driver->drv);
}

EXPORT_SYMBOL(acpi_bus_register_driver);

/**
 * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus
 * @driver: driver to unregister
 *
 * Unregisters a driver with the ACPI bus.  Searches the namespace for all
 * devices that match the driver's criteria and unbinds.
 */
void acpi_bus_unregister_driver(struct acpi_driver *driver)
{
        driver_unregister(&driver->drv);
}

EXPORT_SYMBOL(acpi_bus_unregister_driver);

/* --------------------------------------------------------------------------
                              ACPI Bus operations
   -------------------------------------------------------------------------- */

static int acpi_bus_match(struct device *dev, struct device_driver *drv)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        struct acpi_driver *acpi_drv = to_acpi_driver(drv);

        return acpi_dev->flags.match_driver
                && !acpi_match_device_ids(acpi_dev, acpi_drv->ids);
}

static int acpi_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        return __acpi_device_uevent_modalias(to_acpi_device(dev), env);
}

static int acpi_device_probe(struct device *dev)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver);
        int ret;

        if (acpi_dev->handler && !acpi_is_pnp_device(acpi_dev))
                return -EINVAL;

        if (!acpi_drv->ops.add)
                return -ENOSYS;

        ret = acpi_drv->ops.add(acpi_dev);
        if (ret) {
                acpi_dev->driver_data = NULL;
                return ret;
        }

        pr_debug("Driver [%s] successfully bound to device [%s]\n",
                 acpi_drv->name, acpi_dev->pnp.bus_id);

        if (acpi_drv->ops.notify) {
                ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv);
                if (ret) {
                        if (acpi_drv->ops.remove)
                                acpi_drv->ops.remove(acpi_dev);

                        acpi_dev->driver_data = NULL;
                        return ret;
                }
        }

        pr_debug("Found driver [%s] for device [%s]\n", acpi_drv->name,
                 acpi_dev->pnp.bus_id);

        get_device(dev);
        return 0;
}

static void acpi_device_remove(struct device *dev)
{
        struct acpi_device *acpi_dev = to_acpi_device(dev);
        struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver);

        if (acpi_drv->ops.notify)
                acpi_device_remove_notify_handler(acpi_dev, acpi_drv);

        if (acpi_drv->ops.remove)
                acpi_drv->ops.remove(acpi_dev);

        acpi_dev->driver_data = NULL;

        put_device(dev);
}

const struct bus_type acpi_bus_type = {
        .name                = "acpi",
        .match                = acpi_bus_match,
        .probe                = acpi_device_probe,
        .remove                = acpi_device_remove,
        .uevent                = acpi_device_uevent,
};

int acpi_bus_for_each_dev(int (*fn)(struct device *, void *), void *data)
{
        return bus_for_each_dev(&acpi_bus_type, NULL, data, fn);
}
EXPORT_SYMBOL_GPL(acpi_bus_for_each_dev);

struct acpi_dev_walk_context {
        int (*fn)(struct acpi_device *, void *);
        void *data;
};

static int acpi_dev_for_one_check(struct device *dev, void *context)
{
        struct acpi_dev_walk_context *adwc = context;

        if (dev->bus != &acpi_bus_type)
                return 0;

        return adwc->fn(to_acpi_device(dev), adwc->data);
}
EXPORT_SYMBOL_GPL(acpi_dev_for_each_child);

int acpi_dev_for_each_child(struct acpi_device *adev,
                            int (*fn)(struct acpi_device *, void *), void *data)
{
        struct acpi_dev_walk_context adwc = {
                .fn = fn,
                .data = data,
        };

        return device_for_each_child(&adev->dev, &adwc, acpi_dev_for_one_check);
}

int acpi_dev_for_each_child_reverse(struct acpi_device *adev,
                                    int (*fn)(struct acpi_device *, void *),
                                    void *data)
{
        struct acpi_dev_walk_context adwc = {
                .fn = fn,
                .data = data,
        };

        return device_for_each_child_reverse(&adev->dev, &adwc, acpi_dev_for_one_check);
}

/* --------------------------------------------------------------------------
                             Initialization/Cleanup
   -------------------------------------------------------------------------- */

static int __init acpi_bus_init_irq(void)
{
        acpi_status status;
        char *message = NULL;


        /*
         * Let the system know what interrupt model we are using by
         * evaluating the \_PIC object, if exists.
         */

        switch (acpi_irq_model) {
        case ACPI_IRQ_MODEL_PIC:
                message = "PIC";
                break;
        case ACPI_IRQ_MODEL_IOAPIC:
                message = "IOAPIC";
                break;
        case ACPI_IRQ_MODEL_IOSAPIC:
                message = "IOSAPIC";
                break;
        case ACPI_IRQ_MODEL_GIC:
                message = "GIC";
                break;
        case ACPI_IRQ_MODEL_PLATFORM:
                message = "platform specific model";
                break;
        case ACPI_IRQ_MODEL_LPIC:
                message = "LPIC";
                break;
        default:
                pr_info("Unknown interrupt routing model\n");
                return -ENODEV;
        }

        pr_info("Using %s for interrupt routing\n", message);

        status = acpi_execute_simple_method(NULL, "\\_PIC", acpi_irq_model);
        if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
                pr_info("_PIC evaluation failed: %s\n", acpi_format_exception(status));
                return -ENODEV;
        }

        return 0;
}

/**
 * acpi_early_init - Initialize ACPICA and populate the ACPI namespace.
 *
 * The ACPI tables are accessible after this, but the handling of events has not
 * been initialized and the global lock is not available yet, so AML should not
 * be executed at this point.
 *
 * Doing this before switching the EFI runtime services to virtual mode allows
 * the EfiBootServices memory to be freed slightly earlier on boot.
 */
void __init acpi_early_init(void)
{
        acpi_status status;

        if (acpi_disabled)
                return;

        pr_info("Core revision %08x\n", ACPI_CA_VERSION);

        /* enable workarounds, unless strict ACPI spec. compliance */
        if (!acpi_strict)
                acpi_gbl_enable_interpreter_slack = TRUE;

        acpi_permanent_mmap = true;

#ifdef CONFIG_X86
        /*
         * If the machine falls into the DMI check table,
         * DSDT will be copied to memory.
         * Note that calling dmi_check_system() here on other architectures
         * would not be OK because only x86 initializes dmi early enough.
         * Thankfully only x86 systems need such quirks for now.
         */
        dmi_check_system(dsdt_dmi_table);
#endif

        status = acpi_reallocate_root_table();
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to reallocate ACPI tables\n");
                goto error0;
        }

        status = acpi_initialize_subsystem();
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to initialize the ACPI Interpreter\n");
                goto error0;
        }

#ifdef CONFIG_X86
        if (!acpi_ioapic) {
                /* compatible (0) means level (3) */
                if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
                        acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
                        acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL;
                }
                /* Set PIC-mode SCI trigger type */
                acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
                                         (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
        } else {
                /*
                 * now that acpi_gbl_FADT is initialized,
                 * update it with result from INT_SRC_OVR parsing
                 */
                acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi;
        }
#endif
        return;

 error0:
        disable_acpi();
}

/**
 * acpi_subsystem_init - Finalize the early initialization of ACPI.
 *
 * Switch over the platform to the ACPI mode (if possible).
 *
 * Doing this too early is generally unsafe, but at the same time it needs to be
 * done before all things that really depend on ACPI.  The right spot appears to
 * be before finalizing the EFI initialization.
 */
void __init acpi_subsystem_init(void)
{
        acpi_status status;

        if (acpi_disabled)
                return;

        status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE);
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to enable ACPI\n");
                disable_acpi();
        } else {
                /*
                 * If the system is using ACPI then we can be reasonably
                 * confident that any regulators are managed by the firmware
                 * so tell the regulator core it has everything it needs to
                 * know.
                 */
                regulator_has_full_constraints();
        }
}

static acpi_status acpi_bus_table_handler(u32 event, void *table, void *context)
{
        if (event == ACPI_TABLE_EVENT_LOAD)
                acpi_scan_table_notify();

        return acpi_sysfs_table_handler(event, table, context);
}

static int __init acpi_bus_init(void)
{
        int result;
        acpi_status status;

        acpi_os_initialize1();

        status = acpi_load_tables();
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to load the System Description Tables\n");
                goto error1;
        }

        /*
         * ACPI 2.0 requires the EC driver to be loaded and work before the EC
         * device is found in the namespace.
         *
         * This is accomplished by looking for the ECDT table and getting the EC
         * parameters out of that.
         *
         * Do that before calling acpi_initialize_objects() which may trigger EC
         * address space accesses.
         */
        acpi_ec_ecdt_probe();

        status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE);
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to start the ACPI Interpreter\n");
                goto error1;
        }

        status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to initialize ACPI objects\n");
                goto error1;
        }

        /*
         * _OSC method may exist in module level code,
         * so it must be run after ACPI_FULL_INITIALIZATION
         */
        acpi_bus_osc_negotiate_platform_control();
        acpi_bus_osc_negotiate_usb_control();

        /*
         * _PDC control method may load dynamic SSDT tables,
         * and we need to install the table handler before that.
         */
        status = acpi_install_table_handler(acpi_bus_table_handler, NULL);

        acpi_sysfs_init();

        acpi_early_processor_control_setup();

        /*
         * Maybe EC region is required at bus_scan/acpi_get_devices. So it
         * is necessary to enable it as early as possible.
         */
        acpi_ec_dsdt_probe();

        pr_info("Interpreter enabled\n");

        /* Initialize sleep structures */
        acpi_sleep_init();

        /*
         * Get the system interrupt model and evaluate \_PIC.
         */
        result = acpi_bus_init_irq();
        if (result)
                goto error1;

        /*
         * Register the for all standard device notifications.
         */
        status =
            acpi_install_notify_handler(ACPI_ROOT_OBJECT, ACPI_SYSTEM_NOTIFY,
                                        &acpi_bus_notify, NULL);
        if (ACPI_FAILURE(status)) {
                pr_err("Unable to register for system notifications\n");
                goto error1;
        }

        /*
         * Create the top ACPI proc directory
         */
        acpi_root_dir = proc_mkdir(ACPI_BUS_FILE_ROOT, NULL);

        result = bus_register(&acpi_bus_type);
        if (!result)
                return 0;

        /* Mimic structured exception handling */
      error1:
        acpi_terminate();
        return -ENODEV;
}

struct kobject *acpi_kobj;
EXPORT_SYMBOL_GPL(acpi_kobj);

static int __init acpi_init(void)
{
        int result;

        if (acpi_disabled) {
                pr_info("Interpreter disabled.\n");
                return -ENODEV;
        }

        acpi_kobj = kobject_create_and_add("acpi", firmware_kobj);
        if (!acpi_kobj)
                pr_debug("%s: kset create error\n", __func__);

        init_prmt();
        acpi_init_pcc();
        result = acpi_bus_init();
        if (result) {
                kobject_put(acpi_kobj);
                disable_acpi();
                return result;
        }
        acpi_init_ffh();

        pci_mmcfg_late_init();
        acpi_viot_early_init();
        acpi_hest_init();
        acpi_ghes_init();
        acpi_arm_init();
        acpi_scan_init();
        acpi_ec_init();
        acpi_debugfs_init();
        acpi_sleep_proc_init();
        acpi_wakeup_device_init();
        acpi_debugger_init();
        acpi_setup_sb_notify_handler();
        acpi_viot_init();
        return 0;
}

subsys_initcall(acpi_init);














































































































































































    6 


    6 







    6 
    6 




    6 


    6 

    6 















    2 



    2 


    2 
    2 


    2 

    2 
    2 









    2 

    2 

    2 




















































































   61 







   61 









































































































   61 





















   61 












   61 






















































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/truncate.c - code for taking down pages from address_spaces
 *
 * Copyright (C) 2002, Linus Torvalds
 *
 * 10Sep2002        Andrew Morton
 *                Initial version.
 */

#include <linux/kernel.h>
#include <linux/backing-dev.h>
#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/export.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/pagevec.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include "internal.h"

/*
 * Regular page slots are stabilized by the page lock even without the tree
 * itself locked.  These unlocked entries need verification under the tree
 * lock.
 */
static inline void __clear_shadow_entry(struct address_space *mapping,
                                pgoff_t index, void *entry)
{
        XA_STATE(xas, &mapping->i_pages, index);

        xas_set_update(&xas, workingset_update_node);
        if (xas_load(&xas) != entry)
                return;
        xas_store(&xas, NULL);
}

static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
                               void *entry)
{
        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
        __clear_shadow_entry(mapping, index, entry);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);
}

/*
 * Unconditionally remove exceptional entries. Usually called from truncate
 * path. Note that the folio_batch may be altered by this function by removing
 * exceptional entries similar to what folio_batch_remove_exceptionals() does.
 */
static void truncate_folio_batch_exceptionals(struct address_space *mapping,
                                struct folio_batch *fbatch, pgoff_t *indices)
{
        int i, j;
        bool dax;

        /* Handled by shmem itself */
        if (shmem_mapping(mapping))
                return;

        for (j = 0; j < folio_batch_count(fbatch); j++)
                if (xa_is_value(fbatch->folios[j]))
                        break;

        if (j == folio_batch_count(fbatch))
                return;

        dax = dax_mapping(mapping);
        if (!dax) {
                spin_lock(&mapping->host->i_lock);
                xa_lock_irq(&mapping->i_pages);
        }

        for (i = j; i < folio_batch_count(fbatch); i++) {
                struct folio *folio = fbatch->folios[i];
                pgoff_t index = indices[i];

                if (!xa_is_value(folio)) {
                        fbatch->folios[j++] = folio;
                        continue;
                }

                if (unlikely(dax)) {
                        dax_delete_mapping_entry(mapping, index);
                        continue;
                }

                __clear_shadow_entry(mapping, index, folio);
        }

        if (!dax) {
                xa_unlock_irq(&mapping->i_pages);
                if (mapping_shrinkable(mapping))
                        inode_add_lru(mapping->host);
                spin_unlock(&mapping->host->i_lock);
        }
        fbatch->nr = j;
}

/*
 * Invalidate exceptional entry if easily possible. This handles exceptional
 * entries for invalidate_inode_pages().
 */
static int invalidate_exceptional_entry(struct address_space *mapping,
                                        pgoff_t index, void *entry)
{
        /* Handled by shmem itself, or for DAX we do nothing. */
        if (shmem_mapping(mapping) || dax_mapping(mapping))
                return 1;
        clear_shadow_entry(mapping, index, entry);
        return 1;
}

/*
 * Invalidate exceptional entry if clean. This handles exceptional entries for
 * invalidate_inode_pages2() so for DAX it evicts only clean entries.
 */
static int invalidate_exceptional_entry2(struct address_space *mapping,
                                         pgoff_t index, void *entry)
{
        /* Handled by shmem itself */
        if (shmem_mapping(mapping))
                return 1;
        if (dax_mapping(mapping))
                return dax_invalidate_mapping_entry_sync(mapping, index);
        clear_shadow_entry(mapping, index, entry);
        return 1;
}

/**
 * folio_invalidate - Invalidate part or all of a folio.
 * @folio: The folio which is affected.
 * @offset: start of the range to invalidate
 * @length: length of the range to invalidate
 *
 * folio_invalidate() is called when all or part of the folio has become
 * invalidated by a truncate operation.
 *
 * folio_invalidate() does not have to release all buffers, but it must
 * ensure that no dirty buffer is left outside @offset and that no I/O
 * is underway against any of the blocks which are outside the truncation
 * point.  Because the caller is about to free (and possibly reuse) those
 * blocks on-disk.
 */
void folio_invalidate(struct folio *folio, size_t offset, size_t length)
{
        const struct address_space_operations *aops = folio->mapping->a_ops;

        if (aops->invalidate_folio)
                aops->invalidate_folio(folio, offset, length);
}
EXPORT_SYMBOL_GPL(folio_invalidate);

/*
 * If truncate cannot remove the fs-private metadata from the page, the page
 * becomes orphaned.  It will be left on the LRU and may even be mapped into
 * user pagetables if we're racing with filemap_fault().
 *
 * We need to bail out if page->mapping is no longer equal to the original
 * mapping.  This happens a) when the VM reclaimed the page while we waited on
 * its lock, b) when a concurrent invalidate_mapping_pages got there first and
 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
 */
static void truncate_cleanup_folio(struct folio *folio)
{
        if (folio_mapped(folio))
                unmap_mapping_folio(folio);

        if (folio_has_private(folio))
                folio_invalidate(folio, 0, folio_size(folio));

        /*
         * Some filesystems seem to re-dirty the page even after
         * the VM has canceled the dirty bit (eg ext3 journaling).
         * Hence dirty accounting check is placed after invalidation.
         */
        folio_cancel_dirty(folio);
        folio_clear_mappedtodisk(folio);
}

int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
{
        if (folio->mapping != mapping)
                return -EIO;

        truncate_cleanup_folio(folio);
        filemap_remove_folio(folio);
        return 0;
}

/*
 * Handle partial folios.  The folio may be entirely within the
 * range if a split has raced with us.  If not, we zero the part of the
 * folio that's within the [start, end] range, and then split the folio if
 * it's large.  split_page_range() will discard pages which now lie beyond
 * i_size, and we rely on the caller to discard pages which lie within a
 * newly created hole.
 *
 * Returns false if splitting failed so the caller can avoid
 * discarding the entire folio which is stubbornly unsplit.
 */
bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
{
        loff_t pos = folio_pos(folio);
        unsigned int offset, length;

        if (pos < start)
                offset = start - pos;
        else
                offset = 0;
        length = folio_size(folio);
        if (pos + length <= (u64)end)
                length = length - offset;
        else
                length = end + 1 - pos - offset;

        folio_wait_writeback(folio);
        if (length == folio_size(folio)) {
                truncate_inode_folio(folio->mapping, folio);
                return true;
        }

        /*
         * We may be zeroing pages we're about to discard, but it avoids
         * doing a complex calculation here, and then doing the zeroing
         * anyway if the page split fails.
         */
        folio_zero_range(folio, offset, length);

        if (folio_has_private(folio))
                folio_invalidate(folio, offset, length);
        if (!folio_test_large(folio))
                return true;
        if (split_folio(folio) == 0)
                return true;
        if (folio_test_dirty(folio))
                return false;
        truncate_inode_folio(folio->mapping, folio);
        return true;
}

/*
 * Used to get rid of pages on hardware memory corruption.
 */
int generic_error_remove_folio(struct address_space *mapping,
                struct folio *folio)
{
        if (!mapping)
                return -EINVAL;
        /*
         * Only punch for normal data pages for now.
         * Handling other types like directories would need more auditing.
         */
        if (!S_ISREG(mapping->host->i_mode))
                return -EIO;
        return truncate_inode_folio(mapping, folio);
}
EXPORT_SYMBOL(generic_error_remove_folio);

/**
 * mapping_evict_folio() - Remove an unused folio from the page-cache.
 * @mapping: The mapping this folio belongs to.
 * @folio: The folio to remove.
 *
 * Safely remove one folio from the page cache.
 * It only drops clean, unused folios.
 *
 * Context: Folio must be locked.
 * Return: The number of pages successfully removed.
 */
long mapping_evict_folio(struct address_space *mapping, struct folio *folio)
{
        /* The page may have been truncated before it was locked */
        if (!mapping)
                return 0;
        if (folio_test_dirty(folio) || folio_test_writeback(folio))
                return 0;
        /* The refcount will be elevated if any page in the folio is mapped */
        if (folio_ref_count(folio) >
                        folio_nr_pages(folio) + folio_has_private(folio) + 1)
                return 0;
        if (!filemap_release_folio(folio, 0))
                return 0;

        return remove_mapping(mapping, folio);
}

/**
 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 * @lend: offset to which to truncate (inclusive)
 *
 * Truncate the page cache, removing the pages that are between
 * specified offsets (and zeroing out partial pages
 * if lstart or lend + 1 is not page aligned).
 *
 * Truncate takes two passes - the first pass is nonblocking.  It will not
 * block on page locks and it will not block on writeback.  The second pass
 * will wait.  This is to prevent as much IO as possible in the affected region.
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
 *
 * Note that since ->invalidate_folio() accepts range to invalidate
 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
 * page aligned properly.
 */
void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
{
        pgoff_t                start;                /* inclusive */
        pgoff_t                end;                /* exclusive */
        struct folio_batch fbatch;
        pgoff_t                indices[PAGEVEC_SIZE];
        pgoff_t                index;
        int                i;
        struct folio        *folio;
        bool                same_folio;

        if (mapping_empty(mapping))
                return;

        /*
         * 'start' and 'end' always covers the range of pages to be fully
         * truncated. Partial pages are covered with 'partial_start' at the
         * start of the range and 'partial_end' at the end of the range.
         * Note that 'end' is exclusive while 'lend' is inclusive.
         */
        start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
        if (lend == -1)
                /*
                 * lend == -1 indicates end-of-file so we have to set 'end'
                 * to the highest possible pgoff_t and since the type is
                 * unsigned we're using -1.
                 */
                end = -1;
        else
                end = (lend + 1) >> PAGE_SHIFT;

        folio_batch_init(&fbatch);
        index = start;
        while (index < end && find_lock_entries(mapping, &index, end - 1,
                        &fbatch, indices)) {
                truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
                for (i = 0; i < folio_batch_count(&fbatch); i++)
                        truncate_cleanup_folio(fbatch.folios[i]);
                delete_from_page_cache_batch(mapping, &fbatch);
                for (i = 0; i < folio_batch_count(&fbatch); i++)
                        folio_unlock(fbatch.folios[i]);
                folio_batch_release(&fbatch);
                cond_resched();
        }

        same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
        folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
        if (!IS_ERR(folio)) {
                same_folio = lend < folio_pos(folio) + folio_size(folio);
                if (!truncate_inode_partial_folio(folio, lstart, lend)) {
                        start = folio_next_index(folio);
                        if (same_folio)
                                end = folio->index;
                }
                folio_unlock(folio);
                folio_put(folio);
                folio = NULL;
        }

        if (!same_folio) {
                folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
                                                FGP_LOCK, 0);
                if (!IS_ERR(folio)) {
                        if (!truncate_inode_partial_folio(folio, lstart, lend))
                                end = folio->index;
                        folio_unlock(folio);
                        folio_put(folio);
                }
        }

        index = start;
        while (index < end) {
                cond_resched();
                if (!find_get_entries(mapping, &index, end - 1, &fbatch,
                                indices)) {
                        /* If all gone from start onwards, we're done */
                        if (index == start)
                                break;
                        /* Otherwise restart to make sure all gone */
                        index = start;
                        continue;
                }

                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];

                        /* We rely upon deletion not changing page->index */

                        if (xa_is_value(folio))
                                continue;

                        folio_lock(folio);
                        VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
                        folio_wait_writeback(folio);
                        truncate_inode_folio(mapping, folio);
                        folio_unlock(folio);
                }
                truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
                folio_batch_release(&fbatch);
        }
}
EXPORT_SYMBOL(truncate_inode_pages_range);

/**
 * truncate_inode_pages - truncate *all* the pages from an offset
 * @mapping: mapping to truncate
 * @lstart: offset from which to truncate
 *
 * Called under (and serialised by) inode->i_rwsem and
 * mapping->invalidate_lock.
 *
 * Note: When this function returns, there can be a page in the process of
 * deletion (inside __filemap_remove_folio()) in the specified range.  Thus
 * mapping->nrpages can be non-zero when this function returns even after
 * truncation of the whole mapping.
 */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{
        truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
}
EXPORT_SYMBOL(truncate_inode_pages);

/**
 * truncate_inode_pages_final - truncate *all* pages before inode dies
 * @mapping: mapping to truncate
 *
 * Called under (and serialized by) inode->i_rwsem.
 *
 * Filesystems have to use this in the .evict_inode path to inform the
 * VM that this is the final truncate and the inode is going away.
 */
void truncate_inode_pages_final(struct address_space *mapping)
{
        /*
         * Page reclaim can not participate in regular inode lifetime
         * management (can't call iput()) and thus can race with the
         * inode teardown.  Tell it when the address space is exiting,
         * so that it does not install eviction information after the
         * final truncate has begun.
         */
        mapping_set_exiting(mapping);

        if (!mapping_empty(mapping)) {
                /*
                 * As truncation uses a lockless tree lookup, cycle
                 * the tree lock to make sure any ongoing tree
                 * modification that does not see AS_EXITING is
                 * completed before starting the final truncate.
                 */
                xa_lock_irq(&mapping->i_pages);
                xa_unlock_irq(&mapping->i_pages);
        }

        truncate_inode_pages(mapping, 0);
}
EXPORT_SYMBOL(truncate_inode_pages_final);

/**
 * mapping_try_invalidate - Invalidate all the evictable folios of one inode
 * @mapping: the address_space which holds the folios to invalidate
 * @start: the offset 'from' which to invalidate
 * @end: the offset 'to' which to invalidate (inclusive)
 * @nr_failed: How many folio invalidations failed
 *
 * This function is similar to invalidate_mapping_pages(), except that it
 * returns the number of folios which could not be evicted in @nr_failed.
 */
unsigned long mapping_try_invalidate(struct address_space *mapping,
                pgoff_t start, pgoff_t end, unsigned long *nr_failed)
{
        pgoff_t indices[PAGEVEC_SIZE];
        struct folio_batch fbatch;
        pgoff_t index = start;
        unsigned long ret;
        unsigned long count = 0;
        int i;

        folio_batch_init(&fbatch);
        while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];

                        /* We rely upon deletion not changing folio->index */

                        if (xa_is_value(folio)) {
                                count += invalidate_exceptional_entry(mapping,
                                                             indices[i], folio);
                                continue;
                        }

                        ret = mapping_evict_folio(mapping, folio);
                        folio_unlock(folio);
                        /*
                         * Invalidation is a hint that the folio is no longer
                         * of interest and try to speed up its reclaim.
                         */
                        if (!ret) {
                                deactivate_file_folio(folio);
                                /* Likely in the lru cache of a remote CPU */
                                if (nr_failed)
                                        (*nr_failed)++;
                        }
                        count += ret;
                }
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
                cond_resched();
        }
        return count;
}

/**
 * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
 * @mapping: the address_space which holds the cache to invalidate
 * @start: the offset 'from' which to invalidate
 * @end: the offset 'to' which to invalidate (inclusive)
 *
 * This function removes pages that are clean, unmapped and unlocked,
 * as well as shadow entries. It will not block on IO activity.
 *
 * If you want to remove all the pages of one inode, regardless of
 * their use and writeback state, use truncate_inode_pages().
 *
 * Return: The number of indices that had their contents invalidated
 */
unsigned long invalidate_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t end)
{
        return mapping_try_invalidate(mapping, start, end, NULL);
}
EXPORT_SYMBOL(invalidate_mapping_pages);

/*
 * This is like mapping_evict_folio(), except it ignores the folio's
 * refcount.  We do this because invalidate_inode_pages2() needs stronger
 * invalidation guarantees, and cannot afford to leave folios behind because
 * shrink_page_list() has a temp ref on them, or because they're transiently
 * sitting in the folio_add_lru() caches.
 */
static int invalidate_complete_folio2(struct address_space *mapping,
                                        struct folio *folio)
{
        if (folio->mapping != mapping)
                return 0;

        if (!filemap_release_folio(folio, GFP_KERNEL))
                return 0;

        spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
        if (folio_test_dirty(folio))
                goto failed;

        BUG_ON(folio_has_private(folio));
        __filemap_remove_folio(folio, NULL);
        xa_unlock_irq(&mapping->i_pages);
        if (mapping_shrinkable(mapping))
                inode_add_lru(mapping->host);
        spin_unlock(&mapping->host->i_lock);

        filemap_free_folio(mapping, folio);
        return 1;
failed:
        xa_unlock_irq(&mapping->i_pages);
        spin_unlock(&mapping->host->i_lock);
        return 0;
}

static int folio_launder(struct address_space *mapping, struct folio *folio)
{
        if (!folio_test_dirty(folio))
                return 0;
        if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL)
                return 0;
        return mapping->a_ops->launder_folio(folio);
}

/**
 * invalidate_inode_pages2_range - remove range of pages from an address_space
 * @mapping: the address_space
 * @start: the page offset 'from' which to invalidate
 * @end: the page offset 'to' which to invalidate (inclusive)
 *
 * Any pages which are found to be mapped into pagetables are unmapped prior to
 * invalidation.
 *
 * Return: -EBUSY if any pages could not be invalidated.
 */
int invalidate_inode_pages2_range(struct address_space *mapping,
                                  pgoff_t start, pgoff_t end)
{
        pgoff_t indices[PAGEVEC_SIZE];
        struct folio_batch fbatch;
        pgoff_t index;
        int i;
        int ret = 0;
        int ret2 = 0;
        int did_range_unmap = 0;

        if (mapping_empty(mapping))
                return 0;

        folio_batch_init(&fbatch);
        index = start;
        while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
                for (i = 0; i < folio_batch_count(&fbatch); i++) {
                        struct folio *folio = fbatch.folios[i];

                        /* We rely upon deletion not changing folio->index */

                        if (xa_is_value(folio)) {
                                if (!invalidate_exceptional_entry2(mapping,
                                                indices[i], folio))
                                        ret = -EBUSY;
                                continue;
                        }

                        if (!did_range_unmap && folio_mapped(folio)) {
                                /*
                                 * If folio is mapped, before taking its lock,
                                 * zap the rest of the file in one hit.
                                 */
                                unmap_mapping_pages(mapping, indices[i],
                                                (1 + end - indices[i]), false);
                                did_range_unmap = 1;
                        }

                        folio_lock(folio);
                        if (unlikely(folio->mapping != mapping)) {
                                folio_unlock(folio);
                                continue;
                        }
                        VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
                        folio_wait_writeback(folio);

                        if (folio_mapped(folio))
                                unmap_mapping_folio(folio);
                        BUG_ON(folio_mapped(folio));

                        ret2 = folio_launder(mapping, folio);
                        if (ret2 == 0) {
                                if (!invalidate_complete_folio2(mapping, folio))
                                        ret2 = -EBUSY;
                        }
                        if (ret2 < 0)
                                ret = ret2;
                        folio_unlock(folio);
                }
                folio_batch_remove_exceptionals(&fbatch);
                folio_batch_release(&fbatch);
                cond_resched();
        }
        /*
         * For DAX we invalidate page tables after invalidating page cache.  We
         * could invalidate page tables while invalidating each entry however
         * that would be expensive. And doing range unmapping before doesn't
         * work as we have no cheap way to find whether page cache entry didn't
         * get remapped later.
         */
        if (dax_mapping(mapping)) {
                unmap_mapping_pages(mapping, start, end - start + 1, false);
        }
        return ret;
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);

/**
 * invalidate_inode_pages2 - remove all pages from an address_space
 * @mapping: the address_space
 *
 * Any pages which are found to be mapped into pagetables are unmapped prior to
 * invalidation.
 *
 * Return: -EBUSY if any pages could not be invalidated.
 */
int invalidate_inode_pages2(struct address_space *mapping)
{
        return invalidate_inode_pages2_range(mapping, 0, -1);
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);

/**
 * truncate_pagecache - unmap and remove pagecache that has been truncated
 * @inode: inode
 * @newsize: new file size
 *
 * inode's new i_size must already be written before truncate_pagecache
 * is called.
 *
 * This function should typically be called before the filesystem
 * releases resources associated with the freed range (eg. deallocates
 * blocks). This way, pagecache will always stay logically coherent
 * with on-disk format, and the filesystem would not have to deal with
 * situations such as writepage being called for a page that has already
 * had its underlying blocks deallocated.
 */
void truncate_pagecache(struct inode *inode, loff_t newsize)
{
        struct address_space *mapping = inode->i_mapping;
        loff_t holebegin = round_up(newsize, PAGE_SIZE);

        /*
         * unmap_mapping_range is called twice, first simply for
         * efficiency so that truncate_inode_pages does fewer
         * single-page unmaps.  However after this first call, and
         * before truncate_inode_pages finishes, it is possible for
         * private pages to be COWed, which remain after
         * truncate_inode_pages finishes, hence the second
         * unmap_mapping_range call must be made for correctness.
         */
        unmap_mapping_range(mapping, holebegin, 0, 1);
        truncate_inode_pages(mapping, newsize);
        unmap_mapping_range(mapping, holebegin, 0, 1);
}
EXPORT_SYMBOL(truncate_pagecache);

/**
 * truncate_setsize - update inode and pagecache for a new file size
 * @inode: inode
 * @newsize: new file size
 *
 * truncate_setsize updates i_size and performs pagecache truncation (if
 * necessary) to @newsize. It will be typically be called from the filesystem's
 * setattr function when ATTR_SIZE is passed in.
 *
 * Must be called with a lock serializing truncates and writes (generally
 * i_rwsem but e.g. xfs uses a different lock) and before all filesystem
 * specific block truncation has been performed.
 */
void truncate_setsize(struct inode *inode, loff_t newsize)
{
        loff_t oldsize = inode->i_size;

        i_size_write(inode, newsize);
        if (newsize > oldsize)
                pagecache_isize_extended(inode, oldsize, newsize);
        truncate_pagecache(inode, newsize);
}
EXPORT_SYMBOL(truncate_setsize);

/**
 * pagecache_isize_extended - update pagecache after extension of i_size
 * @inode:        inode for which i_size was extended
 * @from:        original inode size
 * @to:                new inode size
 *
 * Handle extension of inode size either caused by extending truncate or by
 * write starting after current i_size. We mark the page straddling current
 * i_size RO so that page_mkwrite() is called on the nearest write access to
 * the page.  This way filesystem can be sure that page_mkwrite() is called on
 * the page before user writes to the page via mmap after the i_size has been
 * changed.
 *
 * The function must be called after i_size is updated so that page fault
 * coming after we unlock the page will already see the new i_size.
 * The function must be called while we still hold i_rwsem - this not only
 * makes sure i_size is stable but also that userspace cannot observe new
 * i_size value before we are prepared to store mmap writes at new inode size.
 */
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
{
        int bsize = i_blocksize(inode);
        loff_t rounded_from;
        struct page *page;
        pgoff_t index;

        WARN_ON(to > inode->i_size);

        if (from >= to || bsize == PAGE_SIZE)
                return;
        /* Page straddling @from will not have any hole block created? */
        rounded_from = round_up(from, bsize);
        if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
                return;

        index = from >> PAGE_SHIFT;
        page = find_lock_page(inode->i_mapping, index);
        /* Page not cached? Nothing to do */
        if (!page)
                return;
        /*
         * See clear_page_dirty_for_io() for details why set_page_dirty()
         * is needed.
         */
        if (page_mkclean(page))
                set_page_dirty(page);
        unlock_page(page);
        put_page(page);
}
EXPORT_SYMBOL(pagecache_isize_extended);

/**
 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
 * @inode: inode
 * @lstart: offset of beginning of hole
 * @lend: offset of last byte of hole
 *
 * This function should typically be called before the filesystem
 * releases resources associated with the freed range (eg. deallocates
 * blocks). This way, pagecache will always stay logically coherent
 * with on-disk format, and the filesystem would not have to deal with
 * situations such as writepage being called for a page that has already
 * had its underlying blocks deallocated.
 */
void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
{
        struct address_space *mapping = inode->i_mapping;
        loff_t unmap_start = round_up(lstart, PAGE_SIZE);
        loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
        /*
         * This rounding is currently just for example: unmap_mapping_range
         * expands its hole outwards, whereas we want it to contract the hole
         * inwards.  However, existing callers of truncate_pagecache_range are
         * doing their own page rounding first.  Note that unmap_mapping_range
         * allows holelen 0 for all, and we allow lend -1 for end of file.
         */

        /*
         * Unlike in truncate_pagecache, unmap_mapping_range is called only
         * once (before truncating pagecache), and without "even_cows" flag:
         * hole-punching should not remove private COWed pages from the hole.
         */
        if ((u64)unmap_end > (u64)unmap_start)
                unmap_mapping_range(mapping, unmap_start,
                                    1 + unmap_end - unmap_start, 0);
        truncate_inode_pages_range(mapping, lstart, lend);
}
EXPORT_SYMBOL(truncate_pagecache_range);












































































































































































































































































































































































































  256 







  256 















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
/*
 *  linux/include/linux/console.h
 *
 *  Copyright (C) 1993        Hamish Macdonald
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file COPYING in the main directory of this archive
 * for more details.
 *
 * Changed:
 * 10-Mar-94: Arno Griffioen: Conversion for vt100 emulator port from PC LINUX
 */

#ifndef _LINUX_CONSOLE_H_
#define _LINUX_CONSOLE_H_ 1

#include <linux/atomic.h>
#include <linux/bits.h>
#include <linux/rculist.h>
#include <linux/types.h>
#include <linux/vesa.h>

struct vc_data;
struct console_font_op;
struct console_font;
struct module;
struct tty_struct;
struct notifier_block;

enum con_scroll {
        SM_UP,
        SM_DOWN,
};

enum vc_intensity;

/**
 * struct consw - callbacks for consoles
 *
 * @owner:      the module to get references of when this console is used
 * @con_startup: set up the console and return its name (like VGA, EGA, ...)
 * @con_init:   initialize the console on @vc. @init is true for the very first
 *                call on this @vc.
 * @con_deinit: deinitialize the console from @vc.
 * @con_clear:  erase @count characters at [@x, @y] on @vc. @count >= 1.
 * @con_putc:   emit one character with attributes @ca to [@x, @y] on @vc.
 *                (optional -- @con_putcs would be called instead)
 * @con_putcs:  emit @count characters with attributes @s to [@x, @y] on @vc.
 * @con_cursor: enable/disable cursor depending on @enable
 * @con_scroll: move lines from @top to @bottom in direction @dir by @lines.
 *                Return true if no generic handling should be done.
 *                Invoked by csi_M and printing to the console.
 * @con_switch: notifier about the console switch; it is supposed to return
 *                true if a redraw is needed.
 * @con_blank:  blank/unblank the console. The target mode is passed in @blank.
 *                @mode_switch is set if changing from/to text/graphics. The hook
 *                is supposed to return true if a redraw is needed.
 * @con_font_set: set console @vc font to @font with height @vpitch. @flags can
 *                be %KD_FONT_FLAG_DONT_RECALC. (optional)
 * @con_font_get: fetch the current font on @vc of height @vpitch into @font.
 *                (optional)
 * @con_font_default: set default font on @vc. @name can be %NULL or font name
 *                to search for. @font can be filled back. (optional)
 * @con_resize:        resize the @vc console to @width x @height. @from_user is true
 *                when this change comes from the user space.
 * @con_set_palette: sets the palette of the console @vc to @table (optional)
 * @con_scrolldelta: the contents of the console should be scrolled by @lines.
 *                     Invoked by user. (optional)
 * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not
 *                provided or returns false, the origin is set to
 *                @vc->vc_screenbuf. (optional)
 * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g.
 *                upon entering graphics. (optional)
 * @con_build_attr: build attributes based on @color, @intensity and other
 *                parameters. The result is used for both normal and erase
 *                characters. (optional)
 * @con_invert_region: invert a region of length @count on @vc starting at @p.
 *                (optional)
 * @con_debug_enter: prepare the console for the debugger. This includes, but
 *                is not limited to, unblanking the console, loading an
 *                appropriate palette, and allowing debugger generated output.
 *                (optional)
 * @con_debug_leave: restore the console to its pre-debug state as closely as
 *                possible. (optional)
 */
struct consw {
        struct module *owner;
        const char *(*con_startup)(void);
        void        (*con_init)(struct vc_data *vc, bool init);
        void        (*con_deinit)(struct vc_data *vc);
        void        (*con_clear)(struct vc_data *vc, unsigned int y,
                             unsigned int x, unsigned int count);
        void        (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y,
                            unsigned int x);
        void        (*con_putcs)(struct vc_data *vc, const u16 *s,
                             unsigned int count, unsigned int ypos,
                             unsigned int xpos);
        void        (*con_cursor)(struct vc_data *vc, bool enable);
        bool        (*con_scroll)(struct vc_data *vc, unsigned int top,
                        unsigned int bottom, enum con_scroll dir,
                        unsigned int lines);
        bool        (*con_switch)(struct vc_data *vc);
        bool        (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank,
                             bool mode_switch);
        int        (*con_font_set)(struct vc_data *vc,
                                const struct console_font *font,
                                unsigned int vpitch, unsigned int flags);
        int        (*con_font_get)(struct vc_data *vc, struct console_font *font,
                        unsigned int vpitch);
        int        (*con_font_default)(struct vc_data *vc,
                        struct console_font *font, const char *name);
        int     (*con_resize)(struct vc_data *vc, unsigned int width,
                              unsigned int height, bool from_user);
        void        (*con_set_palette)(struct vc_data *vc,
                        const unsigned char *table);
        void        (*con_scrolldelta)(struct vc_data *vc, int lines);
        bool        (*con_set_origin)(struct vc_data *vc);
        void        (*con_save_screen)(struct vc_data *vc);
        u8        (*con_build_attr)(struct vc_data *vc, u8 color,
                        enum vc_intensity intensity,
                        bool blink, bool underline, bool reverse, bool italic);
        void        (*con_invert_region)(struct vc_data *vc, u16 *p, int count);
        void        (*con_debug_enter)(struct vc_data *vc);
        void        (*con_debug_leave)(struct vc_data *vc);
};

extern const struct consw *conswitchp;

extern const struct consw dummy_con;        /* dummy console buffer */
extern const struct consw vga_con;        /* VGA text console */
extern const struct consw newport_con;        /* SGI Newport console  */

struct screen_info;
#ifdef CONFIG_VGA_CONSOLE
void vgacon_register_screen(struct screen_info *si);
#else
static inline void vgacon_register_screen(struct screen_info *si) { }
#endif

int con_is_bound(const struct consw *csw);
int do_unregister_con_driver(const struct consw *csw);
int do_take_over_console(const struct consw *sw, int first, int last, int deflt);
void give_up_console(const struct consw *sw);
#ifdef CONFIG_VT
void con_debug_enter(struct vc_data *vc);
void con_debug_leave(void);
#else
static inline void con_debug_enter(struct vc_data *vc) { }
static inline void con_debug_leave(void) { }
#endif

/*
 * The interface for a console, or any other device that wants to capture
 * console messages (printer driver?)
 */

/**
 * enum cons_flags - General console flags
 * @CON_PRINTBUFFER:        Used by newly registered consoles to avoid duplicate
 *                        output of messages that were already shown by boot
 *                        consoles or read by userspace via syslog() syscall.
 * @CON_CONSDEV:        Indicates that the console driver is backing
 *                        /dev/console.
 * @CON_ENABLED:        Indicates if a console is allowed to print records. If
 *                        false, the console also will not advance to later
 *                        records.
 * @CON_BOOT:                Marks the console driver as early console driver which
 *                        is used during boot before the real driver becomes
 *                        available. It will be automatically unregistered
 *                        when the real console driver is registered unless
 *                        "keep_bootcon" parameter is used.
 * @CON_ANYTIME:        A misnomed historical flag which tells the core code
 *                        that the legacy @console::write callback can be invoked
 *                        on a CPU which is marked OFFLINE. That is misleading as
 *                        it suggests that there is no contextual limit for
 *                        invoking the callback. The original motivation was
 *                        readiness of the per-CPU areas.
 * @CON_BRL:                Indicates a braille device which is exempt from
 *                        receiving the printk spam for obvious reasons.
 * @CON_EXTENDED:        The console supports the extended output format of
 *                        /dev/kmesg which requires a larger output buffer.
 * @CON_SUSPENDED:        Indicates if a console is suspended. If true, the
 *                        printing callbacks must not be called.
 * @CON_NBCON:                Console can operate outside of the legacy style console_lock
 *                        constraints.
 */
enum cons_flags {
        CON_PRINTBUFFER                = BIT(0),
        CON_CONSDEV                = BIT(1),
        CON_ENABLED                = BIT(2),
        CON_BOOT                = BIT(3),
        CON_ANYTIME                = BIT(4),
        CON_BRL                        = BIT(5),
        CON_EXTENDED                = BIT(6),
        CON_SUSPENDED                = BIT(7),
        CON_NBCON                = BIT(8),
};

/**
 * struct nbcon_state - console state for nbcon consoles
 * @atom:        Compound of the state fields for atomic operations
 *
 * @req_prio:                The priority of a handover request
 * @prio:                The priority of the current owner
 * @unsafe:                Console is busy in a non takeover region
 * @unsafe_takeover:        A hostile takeover in an unsafe state happened in the
 *                        past. The console cannot be safe until re-initialized.
 * @cpu:                The CPU on which the owner runs
 *
 * To be used for reading and preparing of the value stored in the nbcon
 * state variable @console::nbcon_state.
 *
 * The @prio and @req_prio fields are particularly important to allow
 * spin-waiting to timeout and give up without the risk of a waiter being
 * assigned the lock after giving up.
 */
struct nbcon_state {
        union {
                unsigned int        atom;
                struct {
                        unsigned int prio                :  2;
                        unsigned int req_prio                :  2;
                        unsigned int unsafe                :  1;
                        unsigned int unsafe_takeover        :  1;
                        unsigned int cpu                : 24;
                };
        };
};

/*
 * The nbcon_state struct is used to easily create and interpret values that
 * are stored in the @console::nbcon_state variable. Ensure this struct stays
 * within the size boundaries of the atomic variable's underlying type in
 * order to avoid any accidental truncation.
 */
static_assert(sizeof(struct nbcon_state) <= sizeof(int));

/**
 * enum nbcon_prio - console owner priority for nbcon consoles
 * @NBCON_PRIO_NONE:                Unused
 * @NBCON_PRIO_NORMAL:                Normal (non-emergency) usage
 * @NBCON_PRIO_EMERGENCY:        Emergency output (WARN/OOPS...)
 * @NBCON_PRIO_PANIC:                Panic output
 * @NBCON_PRIO_MAX:                The number of priority levels
 *
 * A higher priority context can takeover the console when it is
 * in the safe state. The final attempt to flush consoles in panic()
 * can be allowed to do so even in an unsafe state (Hope and pray).
 */
enum nbcon_prio {
        NBCON_PRIO_NONE = 0,
        NBCON_PRIO_NORMAL,
        NBCON_PRIO_EMERGENCY,
        NBCON_PRIO_PANIC,
        NBCON_PRIO_MAX,
};

struct console;
struct printk_buffers;

/**
 * struct nbcon_context - Context for console acquire/release
 * @console:                        The associated console
 * @spinwait_max_us:                Limit for spin-wait acquire
 * @prio:                        Priority of the context
 * @allow_unsafe_takeover:        Allow performing takeover even if unsafe. Can
 *                                be used only with NBCON_PRIO_PANIC @prio. It
 *                                might cause a system freeze when the console
 *                                is used later.
 * @backlog:                        Ringbuffer has pending records
 * @pbufs:                        Pointer to the text buffer for this context
 * @seq:                        The sequence number to print for this context
 */
struct nbcon_context {
        /* members set by caller */
        struct console                *console;
        unsigned int                spinwait_max_us;
        enum nbcon_prio                prio;
        unsigned int                allow_unsafe_takeover        : 1;

        /* members set by emit */
        unsigned int                backlog                        : 1;

        /* members set by acquire */
        struct printk_buffers        *pbufs;
        u64                        seq;
};

/**
 * struct nbcon_write_context - Context handed to the nbcon write callbacks
 * @ctxt:                The core console context
 * @outbuf:                Pointer to the text buffer for output
 * @len:                Length to write
 * @unsafe_takeover:        If a hostile takeover in an unsafe state has occurred
 */
struct nbcon_write_context {
        struct nbcon_context        __private ctxt;
        char                        *outbuf;
        unsigned int                len;
        bool                        unsafe_takeover;
};

/**
 * struct console - The console descriptor structure
 * @name:                The name of the console driver
 * @write:                Write callback to output messages (Optional)
 * @read:                Read callback for console input (Optional)
 * @device:                The underlying TTY device driver (Optional)
 * @unblank:                Callback to unblank the console (Optional)
 * @setup:                Callback for initializing the console (Optional)
 * @exit:                Callback for teardown of the console (Optional)
 * @match:                Callback for matching a console (Optional)
 * @flags:                Console flags. See enum cons_flags
 * @index:                Console index, e.g. port number
 * @cflag:                TTY control mode flags
 * @ispeed:                TTY input speed
 * @ospeed:                TTY output speed
 * @seq:                Sequence number of the next ringbuffer record to print
 * @dropped:                Number of unreported dropped ringbuffer records
 * @data:                Driver private data
 * @node:                hlist node for the console list
 *
 * @write_atomic:        Write callback for atomic context
 * @nbcon_state:        State for nbcon consoles
 * @nbcon_seq:                Sequence number of the next record for nbcon to print
 * @pbufs:                Pointer to nbcon private buffer
 */
struct console {
        char                        name[16];
        void                        (*write)(struct console *co, const char *s, unsigned int count);
        int                        (*read)(struct console *co, char *s, unsigned int count);
        struct tty_driver        *(*device)(struct console *co, int *index);
        void                        (*unblank)(void);
        int                        (*setup)(struct console *co, char *options);
        int                        (*exit)(struct console *co);
        int                        (*match)(struct console *co, char *name, int idx, char *options);
        short                        flags;
        short                        index;
        int                        cflag;
        uint                        ispeed;
        uint                        ospeed;
        u64                        seq;
        unsigned long                dropped;
        void                        *data;
        struct hlist_node        node;

        /* nbcon console specific members */
        bool                        (*write_atomic)(struct console *con,
                                                struct nbcon_write_context *wctxt);
        atomic_t                __private nbcon_state;
        atomic_long_t                __private nbcon_seq;
        struct printk_buffers        *pbufs;
};

#ifdef CONFIG_LOCKDEP
extern void lockdep_assert_console_list_lock_held(void);
#else
static inline void lockdep_assert_console_list_lock_held(void)
{
}
#endif

#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern bool console_srcu_read_lock_is_held(void);
#else
static inline bool console_srcu_read_lock_is_held(void)
{
        return 1;
}
#endif

extern int console_srcu_read_lock(void);
extern void console_srcu_read_unlock(int cookie);

extern void console_list_lock(void) __acquires(console_mutex);
extern void console_list_unlock(void) __releases(console_mutex);

extern struct hlist_head console_list;

/**
 * console_srcu_read_flags - Locklessly read the console flags
 * @con:        struct console pointer of console to read flags from
 *
 * This function provides the necessary READ_ONCE() and data_race()
 * notation for locklessly reading the console flags. The READ_ONCE()
 * in this function matches the WRITE_ONCE() when @flags are modified
 * for registered consoles with console_srcu_write_flags().
 *
 * Only use this function to read console flags when locklessly
 * iterating the console list via srcu.
 *
 * Context: Any context.
 */
static inline short console_srcu_read_flags(const struct console *con)
{
        WARN_ON_ONCE(!console_srcu_read_lock_is_held());

        /*
         * Locklessly reading console->flags provides a consistent
         * read value because there is at most one CPU modifying
         * console->flags and that CPU is using only read-modify-write
         * operations to do so.
         */
        return data_race(READ_ONCE(con->flags));
}

/**
 * console_srcu_write_flags - Write flags for a registered console
 * @con:        struct console pointer of console to write flags to
 * @flags:        new flags value to write
 *
 * Only use this function to write flags for registered consoles. It
 * requires holding the console_list_lock.
 *
 * Context: Any context.
 */
static inline void console_srcu_write_flags(struct console *con, short flags)
{
        lockdep_assert_console_list_lock_held();

        /* This matches the READ_ONCE() in console_srcu_read_flags(). */
        WRITE_ONCE(con->flags, flags);
}

/* Variant of console_is_registered() when the console_list_lock is held. */
static inline bool console_is_registered_locked(const struct console *con)
{
        lockdep_assert_console_list_lock_held();
        return !hlist_unhashed(&con->node);
}

/*
 * console_is_registered - Check if the console is registered
 * @con:        struct console pointer of console to check
 *
 * Context: Process context. May sleep while acquiring console list lock.
 * Return: true if the console is in the console list, otherwise false.
 *
 * If false is returned for a console that was previously registered, it
 * can be assumed that the console's unregistration is fully completed,
 * including the exit() callback after console list removal.
 */
static inline bool console_is_registered(const struct console *con)
{
        bool ret;

        console_list_lock();
        ret = console_is_registered_locked(con);
        console_list_unlock();
        return ret;
}

/**
 * for_each_console_srcu() - Iterator over registered consoles
 * @con:        struct console pointer used as loop cursor
 *
 * Although SRCU guarantees the console list will be consistent, the
 * struct console fields may be updated by other CPUs while iterating.
 *
 * Requires console_srcu_read_lock to be held. Can be invoked from
 * any context.
 */
#define for_each_console_srcu(con)                                        \
        hlist_for_each_entry_srcu(con, &console_list, node,                \
                                  console_srcu_read_lock_is_held())

/**
 * for_each_console() - Iterator over registered consoles
 * @con:        struct console pointer used as loop cursor
 *
 * The console list and the &console.flags are immutable while iterating.
 *
 * Requires console_list_lock to be held.
 */
#define for_each_console(con)                                                \
        lockdep_assert_console_list_lock_held();                        \
        hlist_for_each_entry(con, &console_list, node)

#ifdef CONFIG_PRINTK
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
#else
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
#endif

extern int console_set_on_cmdline;
extern struct console *early_console;

enum con_flush_mode {
        CONSOLE_FLUSH_PENDING,
        CONSOLE_REPLAY_ALL,
};

extern int add_preferred_console(const char *name, const short idx, char *options);
extern void console_force_preferred_locked(struct console *con);
extern void register_console(struct console *);
extern int unregister_console(struct console *);
extern void console_lock(void);
extern int console_trylock(void);
extern void console_unlock(void);
extern void console_conditional_schedule(void);
extern void console_unblank(void);
extern void console_flush_on_panic(enum con_flush_mode mode);
extern struct tty_driver *console_device(int *);
extern void console_stop(struct console *);
extern void console_start(struct console *);
extern int is_console_locked(void);
extern int braille_register_console(struct console *, int index,
                char *console_options, char *braille_options);
extern int braille_unregister_console(struct console *);
#ifdef CONFIG_TTY
extern void console_sysfs_notify(void);
#else
static inline void console_sysfs_notify(void)
{ }
#endif
extern bool console_suspend_enabled;

/* Suspend and resume console messages over PM events */
extern void suspend_console(void);
extern void resume_console(void);

int mda_console_init(void);

void vcs_make_sysfs(int index);
void vcs_remove_sysfs(int index);

/* Some debug stub to catch some of the obvious races in the VT code */
#define WARN_CONSOLE_UNLOCKED()                                                \
        WARN_ON(!atomic_read(&ignore_console_lock_warning) &&                \
                !is_console_locked() && !oops_in_progress)
/*
 * Increment ignore_console_lock_warning if you need to quiet
 * WARN_CONSOLE_UNLOCKED() for debugging purposes.
 */
extern atomic_t ignore_console_lock_warning;

extern void console_init(void);

/* For deferred console takeover */
void dummycon_register_output_notifier(struct notifier_block *nb);
void dummycon_unregister_output_notifier(struct notifier_block *nb);

#endif /* _LINUX_CONSOLE_H */


























































































































































































































































































































































































































    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// SPDX-License-Identifier: GPL-2.0
/*
 *  power_supply_hwmon.c - power supply hwmon support.
 */

#include <linux/err.h>
#include <linux/hwmon.h>
#include <linux/power_supply.h>
#include <linux/slab.h>

struct power_supply_hwmon {
        struct power_supply *psy;
        unsigned long *props;
};

static const char *const ps_temp_label[] = {
        "temp",
        "ambient temp",
};

static int power_supply_hwmon_in_to_property(u32 attr)
{
        switch (attr) {
        case hwmon_in_average:
                return POWER_SUPPLY_PROP_VOLTAGE_AVG;
        case hwmon_in_min:
                return POWER_SUPPLY_PROP_VOLTAGE_MIN;
        case hwmon_in_max:
                return POWER_SUPPLY_PROP_VOLTAGE_MAX;
        case hwmon_in_input:
                return POWER_SUPPLY_PROP_VOLTAGE_NOW;
        default:
                return -EINVAL;
        }
}

static int power_supply_hwmon_curr_to_property(u32 attr)
{
        switch (attr) {
        case hwmon_curr_average:
                return POWER_SUPPLY_PROP_CURRENT_AVG;
        case hwmon_curr_max:
                return POWER_SUPPLY_PROP_CURRENT_MAX;
        case hwmon_curr_input:
                return POWER_SUPPLY_PROP_CURRENT_NOW;
        default:
                return -EINVAL;
        }
}

static int power_supply_hwmon_temp_to_property(u32 attr, int channel)
{
        if (channel) {
                switch (attr) {
                case hwmon_temp_input:
                        return POWER_SUPPLY_PROP_TEMP_AMBIENT;
                case hwmon_temp_min_alarm:
                        return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN;
                case hwmon_temp_max_alarm:
                        return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX;
                default:
                        break;
                }
        } else {
                switch (attr) {
                case hwmon_temp_input:
                        return POWER_SUPPLY_PROP_TEMP;
                case hwmon_temp_max:
                        return POWER_SUPPLY_PROP_TEMP_MAX;
                case hwmon_temp_min:
                        return POWER_SUPPLY_PROP_TEMP_MIN;
                case hwmon_temp_min_alarm:
                        return POWER_SUPPLY_PROP_TEMP_ALERT_MIN;
                case hwmon_temp_max_alarm:
                        return POWER_SUPPLY_PROP_TEMP_ALERT_MAX;
                default:
                        break;
                }
        }

        return -EINVAL;
}

static int
power_supply_hwmon_to_property(enum hwmon_sensor_types type,
                               u32 attr, int channel)
{
        switch (type) {
        case hwmon_in:
                return power_supply_hwmon_in_to_property(attr);
        case hwmon_curr:
                return power_supply_hwmon_curr_to_property(attr);
        case hwmon_temp:
                return power_supply_hwmon_temp_to_property(attr, channel);
        default:
                return -EINVAL;
        }
}

static bool power_supply_hwmon_is_a_label(enum hwmon_sensor_types type,
                                           u32 attr)
{
        return type == hwmon_temp && attr == hwmon_temp_label;
}

struct hwmon_type_attr_list {
        const u32 *attrs;
        size_t n_attrs;
};

static const u32 ps_temp_attrs[] = {
        hwmon_temp_input,
        hwmon_temp_min, hwmon_temp_max,
        hwmon_temp_min_alarm, hwmon_temp_max_alarm,
};

static const struct hwmon_type_attr_list ps_type_attrs[hwmon_max] = {
        [hwmon_temp] = { ps_temp_attrs, ARRAY_SIZE(ps_temp_attrs) },
};

static bool power_supply_hwmon_has_input(
        const struct power_supply_hwmon *psyhw,
        enum hwmon_sensor_types type, int channel)
{
        const struct hwmon_type_attr_list *attr_list = &ps_type_attrs[type];
        size_t i;

        for (i = 0; i < attr_list->n_attrs; ++i) {
                int prop = power_supply_hwmon_to_property(type,
                        attr_list->attrs[i], channel);

                if (prop >= 0 && test_bit(prop, psyhw->props))
                        return true;
        }

        return false;
}

static bool power_supply_hwmon_is_writable(enum hwmon_sensor_types type,
                                           u32 attr)
{
        switch (type) {
        case hwmon_in:
                return attr == hwmon_in_min ||
                       attr == hwmon_in_max;
        case hwmon_curr:
                return attr == hwmon_curr_max;
        case hwmon_temp:
                return attr == hwmon_temp_max ||
                       attr == hwmon_temp_min ||
                       attr == hwmon_temp_min_alarm ||
                       attr == hwmon_temp_max_alarm;
        default:
                return false;
        }
}

static umode_t power_supply_hwmon_is_visible(const void *data,
                                             enum hwmon_sensor_types type,
                                             u32 attr, int channel)
{
        const struct power_supply_hwmon *psyhw = data;
        int prop;

        if (power_supply_hwmon_is_a_label(type, attr)) {
                if (power_supply_hwmon_has_input(psyhw, type, channel))
                        return 0444;
                else
                        return 0;
        }

        prop = power_supply_hwmon_to_property(type, attr, channel);
        if (prop < 0 || !test_bit(prop, psyhw->props))
                return 0;

        if (power_supply_property_is_writeable(psyhw->psy, prop) > 0 &&
            power_supply_hwmon_is_writable(type, attr))
                return 0644;

        return 0444;
}

static int power_supply_hwmon_read_string(struct device *dev,
                                          enum hwmon_sensor_types type,
                                          u32 attr, int channel,
                                          const char **str)
{
        switch (type) {
        case hwmon_temp:
                *str = ps_temp_label[channel];
                break;
        default:
                /* unreachable, but see:
                 * gcc bug #51513 [1] and clang bug #978 [2]
                 *
                 * [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51513
                 * [2] https://github.com/ClangBuiltLinux/linux/issues/978
                 */
                break;
        }

        return 0;
}

static int
power_supply_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
                        u32 attr, int channel, long *val)
{
        struct power_supply_hwmon *psyhw = dev_get_drvdata(dev);
        struct power_supply *psy = psyhw->psy;
        union power_supply_propval pspval;
        int ret, prop;

        prop = power_supply_hwmon_to_property(type, attr, channel);
        if (prop < 0)
                return prop;

        ret  = power_supply_get_property(psy, prop, &pspval);
        if (ret)
                return ret;

        switch (type) {
        /*
         * Both voltage and current is reported in units of
         * microvolts/microamps, so we need to adjust it to
         * milliamps(volts)
         */
        case hwmon_curr:
        case hwmon_in:
                pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 1000);
                break;
        /*
         * Temp needs to be converted from 1/10 C to milli-C
         */
        case hwmon_temp:
                if (check_mul_overflow(pspval.intval, 100,
                                       &pspval.intval))
                        return -EOVERFLOW;
                break;
        default:
                return -EINVAL;
        }

        *val = pspval.intval;

        return 0;
}

static int
power_supply_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
                         u32 attr, int channel, long val)
{
        struct power_supply_hwmon *psyhw = dev_get_drvdata(dev);
        struct power_supply *psy = psyhw->psy;
        union power_supply_propval pspval;
        int prop;

        prop = power_supply_hwmon_to_property(type, attr, channel);
        if (prop < 0)
                return prop;

        pspval.intval = val;

        switch (type) {
        /*
         * Both voltage and current is reported in units of
         * microvolts/microamps, so we need to adjust it to
         * milliamps(volts)
         */
        case hwmon_curr:
        case hwmon_in:
                if (check_mul_overflow(pspval.intval, 1000,
                                       &pspval.intval))
                        return -EOVERFLOW;
                break;
        /*
         * Temp needs to be converted from 1/10 C to milli-C
         */
        case hwmon_temp:
                pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 100);
                break;
        default:
                return -EINVAL;
        }

        return power_supply_set_property(psy, prop, &pspval);
}

static const struct hwmon_ops power_supply_hwmon_ops = {
        .is_visible        = power_supply_hwmon_is_visible,
        .read                = power_supply_hwmon_read,
        .write                = power_supply_hwmon_write,
        .read_string        = power_supply_hwmon_read_string,
};

static const struct hwmon_channel_info * const power_supply_hwmon_info[] = {
        HWMON_CHANNEL_INFO(temp,
                           HWMON_T_LABEL     |
                           HWMON_T_INPUT     |
                           HWMON_T_MAX       |
                           HWMON_T_MIN       |
                           HWMON_T_MIN_ALARM,

                           HWMON_T_LABEL     |
                           HWMON_T_INPUT     |
                           HWMON_T_MIN_ALARM |
                           HWMON_T_MAX_ALARM),

        HWMON_CHANNEL_INFO(curr,
                           HWMON_C_AVERAGE |
                           HWMON_C_MAX     |
                           HWMON_C_INPUT),

        HWMON_CHANNEL_INFO(in,
                           HWMON_I_AVERAGE |
                           HWMON_I_MIN     |
                           HWMON_I_MAX     |
                           HWMON_I_INPUT),
        NULL
};

static const struct hwmon_chip_info power_supply_hwmon_chip_info = {
        .ops = &power_supply_hwmon_ops,
        .info = power_supply_hwmon_info,
};

int power_supply_add_hwmon_sysfs(struct power_supply *psy)
{
        const struct power_supply_desc *desc = psy->desc;
        struct power_supply_hwmon *psyhw;
        struct device *dev = &psy->dev;
        struct device *hwmon;
        int ret, i;
        const char *name;

        if (!devres_open_group(dev, power_supply_add_hwmon_sysfs,
                               GFP_KERNEL))
                return -ENOMEM;

        psyhw = devm_kzalloc(dev, sizeof(*psyhw), GFP_KERNEL);
        if (!psyhw) {
                ret = -ENOMEM;
                goto error;
        }

        psyhw->psy = psy;
        psyhw->props = devm_bitmap_zalloc(dev,
                                          POWER_SUPPLY_PROP_TIME_TO_FULL_AVG + 1,
                                          GFP_KERNEL);
        if (!psyhw->props) {
                ret = -ENOMEM;
                goto error;
        }

        for (i = 0; i < desc->num_properties; i++) {
                const enum power_supply_property prop = desc->properties[i];

                switch (prop) {
                case POWER_SUPPLY_PROP_CURRENT_AVG:
                case POWER_SUPPLY_PROP_CURRENT_MAX:
                case POWER_SUPPLY_PROP_CURRENT_NOW:
                case POWER_SUPPLY_PROP_TEMP:
                case POWER_SUPPLY_PROP_TEMP_MAX:
                case POWER_SUPPLY_PROP_TEMP_MIN:
                case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
                case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
                case POWER_SUPPLY_PROP_TEMP_AMBIENT:
                case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
                case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
                case POWER_SUPPLY_PROP_VOLTAGE_AVG:
                case POWER_SUPPLY_PROP_VOLTAGE_MIN:
                case POWER_SUPPLY_PROP_VOLTAGE_MAX:
                case POWER_SUPPLY_PROP_VOLTAGE_NOW:
                        set_bit(prop, psyhw->props);
                        break;
                default:
                        break;
                }
        }

        name = psy->desc->name;
        if (strchr(name, '-')) {
                char *new_name;

                new_name = devm_kstrdup(dev, name, GFP_KERNEL);
                if (!new_name) {
                        ret = -ENOMEM;
                        goto error;
                }
                strreplace(new_name, '-', '_');
                name = new_name;
        }
        hwmon = devm_hwmon_device_register_with_info(dev, name,
                                                psyhw,
                                                &power_supply_hwmon_chip_info,
                                                NULL);
        ret = PTR_ERR_OR_ZERO(hwmon);
        if (ret)
                goto error;

        devres_close_group(dev, power_supply_add_hwmon_sysfs);
        return 0;
error:
        devres_release_group(dev, NULL);
        return ret;
}

void power_supply_remove_hwmon_sysfs(struct power_supply *psy)
{
        devres_release_group(&psy->dev, power_supply_add_hwmon_sysfs);
}























































































































































































































































































   23 

    3 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BITOPS_H
#define _LINUX_BITOPS_H

#include <asm/types.h>
#include <linux/bits.h>
#include <linux/typecheck.h>

#include <uapi/linux/kernel.h>

/* Set bits in the first 'n' bytes when loaded from memory */
#ifdef __LITTLE_ENDIAN
#  define aligned_byte_mask(n) ((1UL << 8*(n))-1)
#else
#  define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n)))
#endif

#define BITS_PER_TYPE(type)        (sizeof(type) * BITS_PER_BYTE)
#define BITS_TO_LONGS(nr)        __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
#define BITS_TO_U64(nr)                __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64))
#define BITS_TO_U32(nr)                __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32))
#define BITS_TO_BYTES(nr)        __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char))

extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
extern unsigned int __sw_hweight32(unsigned int w);
extern unsigned long __sw_hweight64(__u64 w);

/*
 * Defined here because those may be needed by architecture-specific static
 * inlines.
 */

#include <asm-generic/bitops/generic-non-atomic.h>

/*
 * Many architecture-specific non-atomic bitops contain inline asm code and due
 * to that the compiler can't optimize them to compile-time expressions or
 * constants. In contrary, generic_*() helpers are defined in pure C and
 * compilers optimize them just well.
 * Therefore, to make `unsigned long foo = 0; __set_bit(BAR, &foo)` effectively
 * equal to `unsigned long foo = BIT(BAR)`, pick the generic C alternative when
 * the arguments can be resolved at compile time. That expression itself is a
 * constant and doesn't bring any functional changes to the rest of cases.
 * The casts to `uintptr_t` are needed to mitigate `-Waddress` warnings when
 * passing a bitmap from .bss or .data (-> `!!addr` is always true).
 */
#define bitop(op, nr, addr)                                                \
        ((__builtin_constant_p(nr) &&                                        \
          __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) &&        \
          (uintptr_t)(addr) != (uintptr_t)NULL &&                        \
          __builtin_constant_p(*(const unsigned long *)(addr))) ?        \
         const##op(nr, addr) : op(nr, addr))

#define __set_bit(nr, addr)                bitop(___set_bit, nr, addr)
#define __clear_bit(nr, addr)                bitop(___clear_bit, nr, addr)
#define __change_bit(nr, addr)                bitop(___change_bit, nr, addr)
#define __test_and_set_bit(nr, addr)        bitop(___test_and_set_bit, nr, addr)
#define __test_and_clear_bit(nr, addr)        bitop(___test_and_clear_bit, nr, addr)
#define __test_and_change_bit(nr, addr)        bitop(___test_and_change_bit, nr, addr)
#define test_bit(nr, addr)                bitop(_test_bit, nr, addr)
#define test_bit_acquire(nr, addr)        bitop(_test_bit_acquire, nr, addr)

/*
 * Include this here because some architectures need generic_ffs/fls in
 * scope
 */
#include <asm/bitops.h>

/* Check that the bitops prototypes are sane */
#define __check_bitop_pr(name)                                                \
        static_assert(__same_type(arch_##name, generic_##name) &&        \
                      __same_type(const_##name, generic_##name) &&        \
                      __same_type(_##name, generic_##name))

__check_bitop_pr(__set_bit);
__check_bitop_pr(__clear_bit);
__check_bitop_pr(__change_bit);
__check_bitop_pr(__test_and_set_bit);
__check_bitop_pr(__test_and_clear_bit);
__check_bitop_pr(__test_and_change_bit);
__check_bitop_pr(test_bit);

#undef __check_bitop_pr

static inline int get_bitmask_order(unsigned int count)
{
        int order;

        order = fls(count);
        return order;        /* We could be slightly more clever with -1 here... */
}

static __always_inline unsigned long hweight_long(unsigned long w)
{
        return sizeof(w) == 4 ? hweight32(w) : hweight64((__u64)w);
}

/**
 * rol64 - rotate a 64-bit value left
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u64 rol64(__u64 word, unsigned int shift)
{
        return (word << (shift & 63)) | (word >> ((-shift) & 63));
}

/**
 * ror64 - rotate a 64-bit value right
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u64 ror64(__u64 word, unsigned int shift)
{
        return (word >> (shift & 63)) | (word << ((-shift) & 63));
}

/**
 * rol32 - rotate a 32-bit value left
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u32 rol32(__u32 word, unsigned int shift)
{
        return (word << (shift & 31)) | (word >> ((-shift) & 31));
}

/**
 * ror32 - rotate a 32-bit value right
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u32 ror32(__u32 word, unsigned int shift)
{
        return (word >> (shift & 31)) | (word << ((-shift) & 31));
}

/**
 * rol16 - rotate a 16-bit value left
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u16 rol16(__u16 word, unsigned int shift)
{
        return (word << (shift & 15)) | (word >> ((-shift) & 15));
}

/**
 * ror16 - rotate a 16-bit value right
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u16 ror16(__u16 word, unsigned int shift)
{
        return (word >> (shift & 15)) | (word << ((-shift) & 15));
}

/**
 * rol8 - rotate an 8-bit value left
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u8 rol8(__u8 word, unsigned int shift)
{
        return (word << (shift & 7)) | (word >> ((-shift) & 7));
}

/**
 * ror8 - rotate an 8-bit value right
 * @word: value to rotate
 * @shift: bits to roll
 */
static inline __u8 ror8(__u8 word, unsigned int shift)
{
        return (word >> (shift & 7)) | (word << ((-shift) & 7));
}

/**
 * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit
 * @value: value to sign extend
 * @index: 0 based bit index (0<=index<32) to sign bit
 *
 * This is safe to use for 16- and 8-bit types as well.
 */
static __always_inline __s32 sign_extend32(__u32 value, int index)
{
        __u8 shift = 31 - index;
        return (__s32)(value << shift) >> shift;
}

/**
 * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
 * @value: value to sign extend
 * @index: 0 based bit index (0<=index<64) to sign bit
 */
static __always_inline __s64 sign_extend64(__u64 value, int index)
{
        __u8 shift = 63 - index;
        return (__s64)(value << shift) >> shift;
}

static inline unsigned fls_long(unsigned long l)
{
        if (sizeof(l) == 4)
                return fls(l);
        return fls64(l);
}

static inline int get_count_order(unsigned int count)
{
        if (count == 0)
                return -1;

        return fls(--count);
}

/**
 * get_count_order_long - get order after rounding @l up to power of 2
 * @l: parameter
 *
 * it is same as get_count_order() but with long type parameter
 */
static inline int get_count_order_long(unsigned long l)
{
        if (l == 0UL)
                return -1;
        return (int)fls_long(--l);
}

/**
 * __ffs64 - find first set bit in a 64 bit word
 * @word: The 64 bit word
 *
 * On 64 bit arches this is a synonym for __ffs
 * The result is not defined if no bits are set, so check that @word
 * is non-zero before calling this.
 */
static inline unsigned long __ffs64(u64 word)
{
#if BITS_PER_LONG == 32
        if (((u32)word) == 0UL)
                return __ffs((u32)(word >> 32)) + 32;
#elif BITS_PER_LONG != 64
#error BITS_PER_LONG not 32 or 64
#endif
        return __ffs((unsigned long)word);
}

/**
 * fns - find N'th set bit in a word
 * @word: The word to search
 * @n: Bit to find
 */
static inline unsigned long fns(unsigned long word, unsigned int n)
{
        unsigned int bit;

        while (word) {
                bit = __ffs(word);
                if (n-- == 0)
                        return bit;
                __clear_bit(bit, &word);
        }

        return BITS_PER_LONG;
}

/**
 * assign_bit - Assign value to a bit in memory
 * @nr: the bit to set
 * @addr: the address to start counting from
 * @value: the value to assign
 */
static __always_inline void assign_bit(long nr, volatile unsigned long *addr,
                                       bool value)
{
        if (value)
                set_bit(nr, addr);
        else
                clear_bit(nr, addr);
}

static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
                                         bool value)
{
        if (value)
                __set_bit(nr, addr);
        else
                __clear_bit(nr, addr);
}

/**
 * __ptr_set_bit - Set bit in a pointer's value
 * @nr: the bit to set
 * @addr: the address of the pointer variable
 *
 * Example:
 *        void *p = foo();
 *        __ptr_set_bit(bit, &p);
 */
#define __ptr_set_bit(nr, addr)                         \
        ({                                              \
                typecheck_pointer(*(addr));             \
                __set_bit(nr, (unsigned long *)(addr)); \
        })

/**
 * __ptr_clear_bit - Clear bit in a pointer's value
 * @nr: the bit to clear
 * @addr: the address of the pointer variable
 *
 * Example:
 *        void *p = foo();
 *        __ptr_clear_bit(bit, &p);
 */
#define __ptr_clear_bit(nr, addr)                         \
        ({                                                \
                typecheck_pointer(*(addr));               \
                __clear_bit(nr, (unsigned long *)(addr)); \
        })

/**
 * __ptr_test_bit - Test bit in a pointer's value
 * @nr: the bit to test
 * @addr: the address of the pointer variable
 *
 * Example:
 *        void *p = foo();
 *        if (__ptr_test_bit(bit, &p)) {
 *                ...
 *        } else {
 *                ...
 *        }
 */
#define __ptr_test_bit(nr, addr)                       \
        ({                                             \
                typecheck_pointer(*(addr));            \
                test_bit(nr, (unsigned long *)(addr)); \
        })

#ifdef __KERNEL__

#ifndef set_mask_bits
#define set_mask_bits(ptr, mask, bits)        \
({                                                                \
        const typeof(*(ptr)) mask__ = (mask), bits__ = (bits);        \
        typeof(*(ptr)) old__, new__;                                \
                                                                \
        old__ = READ_ONCE(*(ptr));                                \
        do {                                                        \
                new__ = (old__ & ~mask__) | bits__;                \
        } while (!try_cmpxchg(ptr, &old__, new__));                \
                                                                \
        old__;                                                        \
})
#endif

#ifndef bit_clear_unless
#define bit_clear_unless(ptr, clear, test)        \
({                                                                \
        const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
        typeof(*(ptr)) old__, new__;                                \
                                                                \
        old__ = READ_ONCE(*(ptr));                                \
        do {                                                        \
                if (old__ & test__)                                \
                        break;                                        \
                new__ = old__ & ~clear__;                        \
        } while (!try_cmpxchg(ptr, &old__, new__));                \
                                                                \
        !(old__ & test__);                                        \
})
#endif

#endif /* __KERNEL__ */
#endif































































































































































































































































































































    1 








    1 





    1 














    1 




    1 



    1 



    1 



    1 







    1 





    1 




    1 






    1 

























































    1 



    1 


    1 



    1 
    1 



    1 

    1 



    1 
    1 

























































    1 






























































































































































































































































































    1 


    1 
    1 


    1 


    1 







    1 






    1 

























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * drivers/usb/input/yealink.c
 *
 * Copyright (c) 2005 Henk Vergonet <Henk.Vergonet@gmail.com>
 */
/*
 * Description:
 *   Driver for the USB-P1K voip usb phone.
 *   This device is produced by Yealink Network Technology Co Ltd
 *   but may be branded under several names:
 *        - Yealink usb-p1k
 *        - Tiptel 115
 *        - ...
 *
 * This driver is based on:
 *   - the usbb2k-api        http://savannah.nongnu.org/projects/usbb2k-api/
 *   - information from        http://memeteau.free.fr/usbb2k
 *   - the xpad-driver        drivers/input/joystick/xpad.c
 *
 * Thanks to:
 *   - Olivier Vandorpe, for providing the usbb2k-api.
 *   - Martin Diehl, for spotting my memory allocation bug.
 *
 * History:
 *   20050527 henk        First version, functional keyboard. Keyboard events
 *                        will pop-up on the ../input/eventX bus.
 *   20050531 henk        Added led, LCD, dialtone and sysfs interface.
 *   20050610 henk        Cleanups, make it ready for public consumption.
 *   20050630 henk        Cleanups, fixes in response to comments.
 *   20050701 henk        sysfs write serialisation, fix potential unload races
 *   20050801 henk        Added ringtone, restructure USB
 *   20050816 henk        Merge 2.6.13-rc6
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/rwsem.h>
#include <linux/usb/input.h>
#include <linux/map_to_7segment.h>

#include "yealink.h"

#define DRIVER_VERSION "yld-20051230"

#define YEALINK_POLLING_FREQUENCY        10        /* in [Hz] */

struct yld_status {
        u8        lcd[24];
        u8        led;
        u8        dialtone;
        u8        ringtone;
        u8        keynum;
} __attribute__ ((packed));

/*
 * Register the LCD segment and icon map
 */
#define _LOC(k,l)        { .a = (k), .m = (l) }
#define _SEG(t, a, am, b, bm, c, cm, d, dm, e, em, f, fm, g, gm)        \
        { .type        = (t),                                                        \
          .u = { .s = {        _LOC(a, am), _LOC(b, bm), _LOC(c, cm),                \
                        _LOC(d, dm), _LOC(e, em), _LOC(g, gm),                \
                        _LOC(f, fm) } } }
#define _PIC(t, h, hm, n)                                                \
        { .type        = (t),                                                        \
           .u = { .p = { .name = (n), .a = (h), .m = (hm) } } }

static const struct lcd_segment_map {
        char        type;
        union {
                struct pictogram_map {
                        u8        a,m;
                        char        name[10];
                }        p;
                struct segment_map {
                        u8        a,m;
                } s[7];
        } u;
} lcdMap[] = {
#include "yealink.h"
};

struct yealink_dev {
        struct input_dev *idev;                /* input device */
        struct usb_device *udev;        /* usb device */
        struct usb_interface *intf;        /* usb interface */

        /* irq input channel */
        struct yld_ctl_packet        *irq_data;
        dma_addr_t                irq_dma;
        struct urb                *urb_irq;

        /* control output channel */
        struct yld_ctl_packet        *ctl_data;
        dma_addr_t                ctl_dma;
        struct usb_ctrlrequest        *ctl_req;
        struct urb                *urb_ctl;

        char phys[64];                        /* physical device path */

        u8 lcdMap[ARRAY_SIZE(lcdMap)];        /* state of LCD, LED ... */
        int key_code;                        /* last reported key         */

        unsigned int shutdown:1;

        int        stat_ix;
        union {
                struct yld_status s;
                u8                  b[sizeof(struct yld_status)];
        } master, copy;
};


/*******************************************************************************
 * Yealink lcd interface
 ******************************************************************************/

/*
 * Register a default 7 segment character set
 */
static SEG7_DEFAULT_MAP(map_seg7);

 /* Display a char,
  * char '\9' and '\n' are placeholders and do not overwrite the original text.
  * A space will always hide an icon.
  */
static int setChar(struct yealink_dev *yld, int el, int chr)
{
        int i, a, m, val;

        if (el >= ARRAY_SIZE(lcdMap))
                return -EINVAL;

        if (chr == '\t' || chr == '\n')
            return 0;

        yld->lcdMap[el] = chr;

        if (lcdMap[el].type == '.') {
                a = lcdMap[el].u.p.a;
                m = lcdMap[el].u.p.m;
                if (chr != ' ')
                        yld->master.b[a] |= m;
                else
                        yld->master.b[a] &= ~m;
                return 0;
        }

        val = map_to_seg7(&map_seg7, chr);
        for (i = 0; i < ARRAY_SIZE(lcdMap[0].u.s); i++) {
                m = lcdMap[el].u.s[i].m;

                if (m == 0)
                        continue;

                a = lcdMap[el].u.s[i].a;
                if (val & 1)
                        yld->master.b[a] |= m;
                else
                        yld->master.b[a] &= ~m;
                val = val >> 1;
        }
        return 0;
};

/*******************************************************************************
 * Yealink key interface
 ******************************************************************************/

/* Map device buttons to internal key events.
 *
 * USB-P1K button layout:
 *
 *             up
 *       IN           OUT
 *            down
 *
 *     pickup   C    hangup
 *       1      2      3
 *       4      5      6
 *       7      8      9
 *       *      0      #
 *
 * The "up" and "down" keys, are symbolised by arrows on the button.
 * The "pickup" and "hangup" keys are symbolised by a green and red phone
 * on the button.
 */
static int map_p1k_to_key(int scancode)
{
        switch(scancode) {                /* phone key:        */
        case 0x23: return KEY_LEFT;        /*   IN                */
        case 0x33: return KEY_UP;        /*   up                */
        case 0x04: return KEY_RIGHT;        /*   OUT        */
        case 0x24: return KEY_DOWN;        /*   down        */
        case 0x03: return KEY_ENTER;        /*   pickup        */
        case 0x14: return KEY_BACKSPACE; /*  C                */
        case 0x13: return KEY_ESC;        /*   hangup        */
        case 0x00: return KEY_1;        /*   1                */
        case 0x01: return KEY_2;        /*   2                 */
        case 0x02: return KEY_3;        /*   3                */
        case 0x10: return KEY_4;        /*   4                */
        case 0x11: return KEY_5;        /*   5                */
        case 0x12: return KEY_6;        /*   6                */
        case 0x20: return KEY_7;        /*   7                */
        case 0x21: return KEY_8;        /*   8                */
        case 0x22: return KEY_9;        /*   9                */
        case 0x30: return KEY_KPASTERISK; /* *                */
        case 0x31: return KEY_0;        /*   0                */
        case 0x32: return KEY_LEFTSHIFT |
                          KEY_3 << 8;        /*   #                */
        }
        return -EINVAL;
}

/* Completes a request by converting the data into events for the
 * input subsystem.
 *
 * The key parameter can be cascaded: key2 << 8 | key1
 */
static void report_key(struct yealink_dev *yld, int key)
{
        struct input_dev *idev = yld->idev;

        if (yld->key_code >= 0) {
                /* old key up */
                input_report_key(idev, yld->key_code & 0xff, 0);
                if (yld->key_code >> 8)
                        input_report_key(idev, yld->key_code >> 8, 0);
        }

        yld->key_code = key;
        if (key >= 0) {
                /* new valid key */
                input_report_key(idev, key & 0xff, 1);
                if (key >> 8)
                        input_report_key(idev, key >> 8, 1);
        }
        input_sync(idev);
}

/*******************************************************************************
 * Yealink usb communication interface
 ******************************************************************************/

static int yealink_cmd(struct yealink_dev *yld, struct yld_ctl_packet *p)
{
        u8        *buf = (u8 *)p;
        int        i;
        u8        sum = 0;

        for(i=0; i<USB_PKT_LEN-1; i++)
                sum -= buf[i];
        p->sum = sum;
        return usb_control_msg(yld->udev,
                        usb_sndctrlpipe(yld->udev, 0),
                        USB_REQ_SET_CONFIGURATION,
                        USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
                        0x200, 3,
                        p, sizeof(*p),
                        USB_CTRL_SET_TIMEOUT);
}

static u8 default_ringtone[] = {
        0xEF,                        /* volume [0-255] */
        0xFB, 0x1E, 0x00, 0x0C,        /* 1250 [hz], 12/100 [s] */
        0xFC, 0x18, 0x00, 0x0C,        /* 1000 [hz], 12/100 [s] */
        0xFB, 0x1E, 0x00, 0x0C,
        0xFC, 0x18, 0x00, 0x0C,
        0xFB, 0x1E, 0x00, 0x0C,
        0xFC, 0x18, 0x00, 0x0C,
        0xFB, 0x1E, 0x00, 0x0C,
        0xFC, 0x18, 0x00, 0x0C,
        0xFF, 0xFF, 0x01, 0x90,        /* silent, 400/100 [s] */
        0x00, 0x00                /* end of sequence */
};

static int yealink_set_ringtone(struct yealink_dev *yld, u8 *buf, size_t size)
{
        struct yld_ctl_packet *p = yld->ctl_data;
        int        ix, len;

        if (size <= 0)
                return -EINVAL;

        /* Set the ringtone volume */
        memset(yld->ctl_data, 0, sizeof(*(yld->ctl_data)));
        yld->ctl_data->cmd        = CMD_RING_VOLUME;
        yld->ctl_data->size        = 1;
        yld->ctl_data->data[0]        = buf[0];
        yealink_cmd(yld, p);

        buf++;
        size--;

        p->cmd = CMD_RING_NOTE;
        ix = 0;
        while (size != ix) {
                len = size - ix;
                if (len > sizeof(p->data))
                        len = sizeof(p->data);
                p->size          = len;
                p->offset = cpu_to_be16(ix);
                memcpy(p->data, &buf[ix], len);
                yealink_cmd(yld, p);
                ix += len;
        }
        return 0;
}

/* keep stat_master & stat_copy in sync.
 */
static int yealink_do_idle_tasks(struct yealink_dev *yld)
{
        u8 val;
        int i, ix, len;

        ix = yld->stat_ix;

        memset(yld->ctl_data, 0, sizeof(*(yld->ctl_data)));
        yld->ctl_data->cmd  = CMD_KEYPRESS;
        yld->ctl_data->size = 1;
        yld->ctl_data->sum  = 0xff - CMD_KEYPRESS;

        /* If state update pointer wraps do a KEYPRESS first. */
        if (ix >= sizeof(yld->master)) {
                yld->stat_ix = 0;
                return 0;
        }

        /* find update candidates: copy != master */
        do {
                val = yld->master.b[ix];
                if (val != yld->copy.b[ix])
                        goto send_update;
        } while (++ix < sizeof(yld->master));

        /* nothing todo, wait a bit and poll for a KEYPRESS */
        yld->stat_ix = 0;
        /* TODO how can we wait abit. ??
         * msleep_interruptible(1000 / YEALINK_POLLING_FREQUENCY);
         */
        return 0;

send_update:

        /* Setup an appropriate update request */
        yld->copy.b[ix] = val;
        yld->ctl_data->data[0] = val;

        switch(ix) {
        case offsetof(struct yld_status, led):
                yld->ctl_data->cmd        = CMD_LED;
                yld->ctl_data->sum        = -1 - CMD_LED - val;
                break;
        case offsetof(struct yld_status, dialtone):
                yld->ctl_data->cmd        = CMD_DIALTONE;
                yld->ctl_data->sum        = -1 - CMD_DIALTONE - val;
                break;
        case offsetof(struct yld_status, ringtone):
                yld->ctl_data->cmd        = CMD_RINGTONE;
                yld->ctl_data->sum        = -1 - CMD_RINGTONE - val;
                break;
        case offsetof(struct yld_status, keynum):
                val--;
                val &= 0x1f;
                yld->ctl_data->cmd        = CMD_SCANCODE;
                yld->ctl_data->offset        = cpu_to_be16(val);
                yld->ctl_data->data[0]        = 0;
                yld->ctl_data->sum        = -1 - CMD_SCANCODE - val;
                break;
        default:
                len = sizeof(yld->master.s.lcd) - ix;
                if (len > sizeof(yld->ctl_data->data))
                        len = sizeof(yld->ctl_data->data);

                /* Combine up to <len> consecutive LCD bytes in a singe request
                 */
                yld->ctl_data->cmd        = CMD_LCD;
                yld->ctl_data->offset        = cpu_to_be16(ix);
                yld->ctl_data->size        = len;
                yld->ctl_data->sum        = -CMD_LCD - ix - val - len;
                for(i=1; i<len; i++) {
                        ix++;
                        val = yld->master.b[ix];
                        yld->copy.b[ix]                = val;
                        yld->ctl_data->data[i]        = val;
                        yld->ctl_data->sum     -= val;
                }
        }
        yld->stat_ix = ix + 1;
        return 1;
}

/* Decide on how to handle responses
 *
 * The state transition diagram is somethhing like:
 *
 *          syncState<--+
 *               |      |
 *               |    idle
 *              \|/     |
 * init --ok--> waitForKey --ok--> getKey
 *  ^               ^                |
 *  |               +-------ok-------+
 * error,start
 *
 */
static void urb_irq_callback(struct urb *urb)
{
        struct yealink_dev *yld = urb->context;
        int ret, status = urb->status;

        if (status)
                dev_err(&yld->intf->dev, "%s - urb status %d\n",
                        __func__, status);

        switch (yld->irq_data->cmd) {
        case CMD_KEYPRESS:

                yld->master.s.keynum = yld->irq_data->data[0];
                break;

        case CMD_SCANCODE:
                dev_dbg(&yld->intf->dev, "get scancode %x\n",
                        yld->irq_data->data[0]);

                report_key(yld, map_p1k_to_key(yld->irq_data->data[0]));
                break;

        default:
                dev_err(&yld->intf->dev, "unexpected response %x\n",
                        yld->irq_data->cmd);
        }

        yealink_do_idle_tasks(yld);

        if (!yld->shutdown) {
                ret = usb_submit_urb(yld->urb_ctl, GFP_ATOMIC);
                if (ret && ret != -EPERM)
                        dev_err(&yld->intf->dev,
                                "%s - usb_submit_urb failed %d\n",
                                __func__, ret);
        }
}

static void urb_ctl_callback(struct urb *urb)
{
        struct yealink_dev *yld = urb->context;
        int ret = 0, status = urb->status;

        if (status)
                dev_err(&yld->intf->dev, "%s - urb status %d\n",
                        __func__, status);

        switch (yld->ctl_data->cmd) {
        case CMD_KEYPRESS:
        case CMD_SCANCODE:
                /* ask for a response */
                if (!yld->shutdown)
                        ret = usb_submit_urb(yld->urb_irq, GFP_ATOMIC);
                break;
        default:
                /* send new command */
                yealink_do_idle_tasks(yld);
                if (!yld->shutdown)
                        ret = usb_submit_urb(yld->urb_ctl, GFP_ATOMIC);
                break;
        }

        if (ret && ret != -EPERM)
                dev_err(&yld->intf->dev, "%s - usb_submit_urb failed %d\n",
                        __func__, ret);
}

/*******************************************************************************
 * input event interface
 ******************************************************************************/

/* TODO should we issue a ringtone on a SND_BELL event?
static int input_ev(struct input_dev *dev, unsigned int type,
                unsigned int code, int value)
{

        if (type != EV_SND)
                return -EINVAL;

        switch (code) {
        case SND_BELL:
        case SND_TONE:
                break;
        default:
                return -EINVAL;
        }

        return 0;
}
*/

static int input_open(struct input_dev *dev)
{
        struct yealink_dev *yld = input_get_drvdata(dev);
        int i, ret;

        dev_dbg(&yld->intf->dev, "%s\n", __func__);

        /* force updates to device */
        for (i = 0; i<sizeof(yld->master); i++)
                yld->copy.b[i] = ~yld->master.b[i];
        yld->key_code = -1;        /* no keys pressed */

        yealink_set_ringtone(yld, default_ringtone, sizeof(default_ringtone));

        /* issue INIT */
        memset(yld->ctl_data, 0, sizeof(*(yld->ctl_data)));
        yld->ctl_data->cmd        = CMD_INIT;
        yld->ctl_data->size        = 10;
        yld->ctl_data->sum        = 0x100-CMD_INIT-10;
        if ((ret = usb_submit_urb(yld->urb_ctl, GFP_KERNEL)) != 0) {
                dev_dbg(&yld->intf->dev,
                        "%s - usb_submit_urb failed with result %d\n",
                        __func__, ret);
                return ret;
        }
        return 0;
}

static void input_close(struct input_dev *dev)
{
        struct yealink_dev *yld = input_get_drvdata(dev);

        yld->shutdown = 1;
        /*
         * Make sure the flag is seen by other CPUs before we start
         * killing URBs so new URBs won't be submitted
         */
        smp_wmb();

        usb_kill_urb(yld->urb_ctl);
        usb_kill_urb(yld->urb_irq);

        yld->shutdown = 0;
        smp_wmb();
}

/*******************************************************************************
 * sysfs interface
 ******************************************************************************/

static DECLARE_RWSEM(sysfs_rwsema);

/* Interface to the 7-segments translation table aka. char set.
 */
static ssize_t show_map(struct device *dev, struct device_attribute *attr,
                                char *buf)
{
        memcpy(buf, &map_seg7, sizeof(map_seg7));
        return sizeof(map_seg7);
}

static ssize_t store_map(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t cnt)
{
        if (cnt != sizeof(map_seg7))
                return -EINVAL;
        memcpy(&map_seg7, buf, sizeof(map_seg7));
        return sizeof(map_seg7);
}

/* Interface to the LCD.
 */

/* Reading /sys/../lineX will return the format string with its settings:
 *
 * Example:
 * cat ./line3
 * 888888888888
 * Linux Rocks!
 */
static ssize_t show_line(struct device *dev, char *buf, int a, int b)
{
        struct yealink_dev *yld;
        int i;

        down_read(&sysfs_rwsema);
        yld = dev_get_drvdata(dev);
        if (yld == NULL) {
                up_read(&sysfs_rwsema);
                return -ENODEV;
        }

        for (i = a; i < b; i++)
                *buf++ = lcdMap[i].type;
        *buf++ = '\n';
        for (i = a; i < b; i++)
                *buf++ = yld->lcdMap[i];
        *buf++ = '\n';
        *buf = 0;

        up_read(&sysfs_rwsema);
        return 3 + ((b - a) << 1);
}

static ssize_t show_line1(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        return show_line(dev, buf, LCD_LINE1_OFFSET, LCD_LINE2_OFFSET);
}

static ssize_t show_line2(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        return show_line(dev, buf, LCD_LINE2_OFFSET, LCD_LINE3_OFFSET);
}

static ssize_t show_line3(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        return show_line(dev, buf, LCD_LINE3_OFFSET, LCD_LINE4_OFFSET);
}

/* Writing to /sys/../lineX will set the coresponding LCD line.
 * - Excess characters are ignored.
 * - If less characters are written than allowed, the remaining digits are
 *   unchanged.
 * - The '\n' or '\t' char is a placeholder, it does not overwrite the
 *   original content.
 */
static ssize_t store_line(struct device *dev, const char *buf, size_t count,
                int el, size_t len)
{
        struct yealink_dev *yld;
        int i;

        down_write(&sysfs_rwsema);
        yld = dev_get_drvdata(dev);
        if (yld == NULL) {
                up_write(&sysfs_rwsema);
                return -ENODEV;
        }

        if (len > count)
                len = count;
        for (i = 0; i < len; i++)
                setChar(yld, el++, buf[i]);

        up_write(&sysfs_rwsema);
        return count;
}

static ssize_t store_line1(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t count)
{
        return store_line(dev, buf, count, LCD_LINE1_OFFSET, LCD_LINE1_SIZE);
}

static ssize_t store_line2(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t count)
{
        return store_line(dev, buf, count, LCD_LINE2_OFFSET, LCD_LINE2_SIZE);
}

static ssize_t store_line3(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t count)
{
        return store_line(dev, buf, count, LCD_LINE3_OFFSET, LCD_LINE3_SIZE);
}

/* Interface to visible and audible "icons", these include:
 * pictures on the LCD, the LED, and the dialtone signal.
 */

/* Get a list of "switchable elements" with their current state. */
static ssize_t get_icons(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
        struct yealink_dev *yld;
        int i, ret = 1;

        down_read(&sysfs_rwsema);
        yld = dev_get_drvdata(dev);
        if (yld == NULL) {
                up_read(&sysfs_rwsema);
                return -ENODEV;
        }

        for (i = 0; i < ARRAY_SIZE(lcdMap); i++) {
                if (lcdMap[i].type != '.')
                        continue;
                ret += sprintf(&buf[ret], "%s %s\n",
                                yld->lcdMap[i] == ' ' ? "  " : "on",
                                lcdMap[i].u.p.name);
        }
        up_read(&sysfs_rwsema);
        return ret;
}

/* Change the visibility of a particular element. */
static ssize_t set_icon(struct device *dev, const char *buf, size_t count,
                        int chr)
{
        struct yealink_dev *yld;
        int i;

        down_write(&sysfs_rwsema);
        yld = dev_get_drvdata(dev);
        if (yld == NULL) {
                up_write(&sysfs_rwsema);
                return -ENODEV;
        }

        for (i = 0; i < ARRAY_SIZE(lcdMap); i++) {
                if (lcdMap[i].type != '.')
                        continue;
                if (strncmp(buf, lcdMap[i].u.p.name, count) == 0) {
                        setChar(yld, i, chr);
                        break;
                }
        }

        up_write(&sysfs_rwsema);
        return count;
}

static ssize_t show_icon(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
{
        return set_icon(dev, buf, count, buf[0]);
}

static ssize_t hide_icon(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
{
        return set_icon(dev, buf, count, ' ');
}

/* Upload a ringtone to the device.
 */

/* Stores raw ringtone data in the phone */
static ssize_t store_ringtone(struct device *dev,
                struct device_attribute *attr,
                const char *buf, size_t count)
{
        struct yealink_dev *yld;

        down_write(&sysfs_rwsema);
        yld = dev_get_drvdata(dev);
        if (yld == NULL) {
                up_write(&sysfs_rwsema);
                return -ENODEV;
        }

        /* TODO locking with async usb control interface??? */
        yealink_set_ringtone(yld, (char *)buf, count);
        up_write(&sysfs_rwsema);
        return count;
}

#define _M444        S_IRUGO
#define _M664        S_IRUGO|S_IWUSR|S_IWGRP
#define _M220        S_IWUSR|S_IWGRP

static DEVICE_ATTR(map_seg7        , _M664, show_map        , store_map        );
static DEVICE_ATTR(line1        , _M664, show_line1        , store_line1        );
static DEVICE_ATTR(line2        , _M664, show_line2        , store_line2        );
static DEVICE_ATTR(line3        , _M664, show_line3        , store_line3        );
static DEVICE_ATTR(get_icons        , _M444, get_icons        , NULL                );
static DEVICE_ATTR(show_icon        , _M220, NULL                , show_icon        );
static DEVICE_ATTR(hide_icon        , _M220, NULL                , hide_icon        );
static DEVICE_ATTR(ringtone        , _M220, NULL                , store_ringtone);

static struct attribute *yld_attributes[] = {
        &dev_attr_line1.attr,
        &dev_attr_line2.attr,
        &dev_attr_line3.attr,
        &dev_attr_get_icons.attr,
        &dev_attr_show_icon.attr,
        &dev_attr_hide_icon.attr,
        &dev_attr_map_seg7.attr,
        &dev_attr_ringtone.attr,
        NULL
};

static const struct attribute_group yld_attr_group = {
        .attrs = yld_attributes
};

/*******************************************************************************
 * Linux interface and usb initialisation
 ******************************************************************************/

struct driver_info {
        char *name;
};

static const struct driver_info info_P1K = {
        .name        = "Yealink usb-p1k",
};

static const struct usb_device_id usb_table [] = {
        {
                .match_flags                = USB_DEVICE_ID_MATCH_DEVICE |
                                                USB_DEVICE_ID_MATCH_INT_INFO,
                .idVendor                = 0x6993,
                .idProduct                = 0xb001,
                .bInterfaceClass        = USB_CLASS_HID,
                .bInterfaceSubClass        = 0,
                .bInterfaceProtocol        = 0,
                .driver_info                = (kernel_ulong_t)&info_P1K
        },
        { }
};

static int usb_cleanup(struct yealink_dev *yld, int err)
{
        if (yld == NULL)
                return err;

        if (yld->idev) {
                if (err)
                        input_free_device(yld->idev);
                else
                        input_unregister_device(yld->idev);
        }

        usb_free_urb(yld->urb_irq);
        usb_free_urb(yld->urb_ctl);

        kfree(yld->ctl_req);
        usb_free_coherent(yld->udev, USB_PKT_LEN, yld->ctl_data, yld->ctl_dma);
        usb_free_coherent(yld->udev, USB_PKT_LEN, yld->irq_data, yld->irq_dma);

        kfree(yld);
        return err;
}

static void usb_disconnect(struct usb_interface *intf)
{
        struct yealink_dev *yld;

        down_write(&sysfs_rwsema);
        yld = usb_get_intfdata(intf);
        sysfs_remove_group(&intf->dev.kobj, &yld_attr_group);
        usb_set_intfdata(intf, NULL);
        up_write(&sysfs_rwsema);

        usb_cleanup(yld, 0);
}

static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
        struct usb_device *udev = interface_to_usbdev (intf);
        struct driver_info *nfo = (struct driver_info *)id->driver_info;
        struct usb_host_interface *interface;
        struct usb_endpoint_descriptor *endpoint;
        struct yealink_dev *yld;
        struct input_dev *input_dev;
        int ret, pipe, i;

        interface = intf->cur_altsetting;

        if (interface->desc.bNumEndpoints < 1)
                return -ENODEV;

        endpoint = &interface->endpoint[0].desc;
        if (!usb_endpoint_is_int_in(endpoint))
                return -ENODEV;

        yld = kzalloc(sizeof(struct yealink_dev), GFP_KERNEL);
        if (!yld)
                return -ENOMEM;

        yld->udev = udev;
        yld->intf = intf;

        yld->idev = input_dev = input_allocate_device();
        if (!input_dev)
                return usb_cleanup(yld, -ENOMEM);

        /* allocate usb buffers */
        yld->irq_data = usb_alloc_coherent(udev, USB_PKT_LEN,
                                           GFP_KERNEL, &yld->irq_dma);
        if (yld->irq_data == NULL)
                return usb_cleanup(yld, -ENOMEM);

        yld->ctl_data = usb_alloc_coherent(udev, USB_PKT_LEN,
                                           GFP_KERNEL, &yld->ctl_dma);
        if (!yld->ctl_data)
                return usb_cleanup(yld, -ENOMEM);

        yld->ctl_req = kmalloc(sizeof(*(yld->ctl_req)), GFP_KERNEL);
        if (yld->ctl_req == NULL)
                return usb_cleanup(yld, -ENOMEM);

        /* allocate urb structures */
        yld->urb_irq = usb_alloc_urb(0, GFP_KERNEL);
        if (yld->urb_irq == NULL)
                return usb_cleanup(yld, -ENOMEM);

        yld->urb_ctl = usb_alloc_urb(0, GFP_KERNEL);
        if (yld->urb_ctl == NULL)
                return usb_cleanup(yld, -ENOMEM);

        /* get a handle to the interrupt data pipe */
        pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
        ret = usb_maxpacket(udev, pipe);
        if (ret != USB_PKT_LEN)
                dev_err(&intf->dev, "invalid payload size %d, expected %zd\n",
                        ret, USB_PKT_LEN);

        /* initialise irq urb */
        usb_fill_int_urb(yld->urb_irq, udev, pipe, yld->irq_data,
                        USB_PKT_LEN,
                        urb_irq_callback,
                        yld, endpoint->bInterval);
        yld->urb_irq->transfer_dma = yld->irq_dma;
        yld->urb_irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        yld->urb_irq->dev = udev;

        /* initialise ctl urb */
        yld->ctl_req->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE |
                                      USB_DIR_OUT;
        yld->ctl_req->bRequest        = USB_REQ_SET_CONFIGURATION;
        yld->ctl_req->wValue        = cpu_to_le16(0x200);
        yld->ctl_req->wIndex        = cpu_to_le16(interface->desc.bInterfaceNumber);
        yld->ctl_req->wLength        = cpu_to_le16(USB_PKT_LEN);

        usb_fill_control_urb(yld->urb_ctl, udev, usb_sndctrlpipe(udev, 0),
                        (void *)yld->ctl_req, yld->ctl_data, USB_PKT_LEN,
                        urb_ctl_callback, yld);
        yld->urb_ctl->transfer_dma        = yld->ctl_dma;
        yld->urb_ctl->transfer_flags        |= URB_NO_TRANSFER_DMA_MAP;
        yld->urb_ctl->dev = udev;

        /* find out the physical bus location */
        usb_make_path(udev, yld->phys, sizeof(yld->phys));
        strlcat(yld->phys,  "/input0", sizeof(yld->phys));

        /* register settings for the input device */
        input_dev->name = nfo->name;
        input_dev->phys = yld->phys;
        usb_to_input_id(udev, &input_dev->id);
        input_dev->dev.parent = &intf->dev;

        input_set_drvdata(input_dev, yld);

        input_dev->open = input_open;
        input_dev->close = input_close;
        /* input_dev->event = input_ev;        TODO */

        /* register available key events */
        input_dev->evbit[0] = BIT_MASK(EV_KEY);
        for (i = 0; i < 256; i++) {
                int k = map_p1k_to_key(i);
                if (k >= 0) {
                        set_bit(k & 0xff, input_dev->keybit);
                        if (k >> 8)
                                set_bit(k >> 8, input_dev->keybit);
                }
        }

        ret = input_register_device(yld->idev);
        if (ret)
                return usb_cleanup(yld, ret);

        usb_set_intfdata(intf, yld);

        /* clear visible elements */
        for (i = 0; i < ARRAY_SIZE(lcdMap); i++)
                setChar(yld, i, ' ');

        /* display driver version on LCD line 3 */
        store_line3(&intf->dev, NULL,
                        DRIVER_VERSION, sizeof(DRIVER_VERSION));

        /* Register sysfs hooks (don't care about failure) */
        ret = sysfs_create_group(&intf->dev.kobj, &yld_attr_group);
        return 0;
}

static struct usb_driver yealink_driver = {
        .name                = "yealink",
        .probe                = usb_probe,
        .disconnect        = usb_disconnect,
        .id_table        = usb_table,
};

module_usb_driver(yealink_driver);

MODULE_DEVICE_TABLE (usb, usb_table);

MODULE_AUTHOR("Henk Vergonet");
MODULE_DESCRIPTION("Yealink phone driver");
MODULE_LICENSE("GPL");
























































































































































































































































































































    1 
















































    1 
    1 





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Roccat Isku driver for Linux
 *
 * Copyright (c) 2011 Stefan Achatz <erazor_de@users.sourceforge.net>
 */

/*
 */

/*
 * Roccat Isku is a gamer keyboard with macro keys that can be configured in
 * 5 profiles.
 */

#include <linux/device.h>
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hid-roccat.h>
#include "hid-ids.h"
#include "hid-roccat-common.h"
#include "hid-roccat-isku.h"

static void isku_profile_activated(struct isku_device *isku, uint new_profile)
{
        isku->actual_profile = new_profile;
}

static int isku_receive(struct usb_device *usb_dev, uint command,
                void *buf, uint size)
{
        return roccat_common2_receive(usb_dev, command, buf, size);
}

static int isku_get_actual_profile(struct usb_device *usb_dev)
{
        struct isku_actual_profile buf;
        int retval;

        retval = isku_receive(usb_dev, ISKU_COMMAND_ACTUAL_PROFILE,
                        &buf, sizeof(struct isku_actual_profile));
        return retval ? retval : buf.actual_profile;
}

static int isku_set_actual_profile(struct usb_device *usb_dev, int new_profile)
{
        struct isku_actual_profile buf;

        buf.command = ISKU_COMMAND_ACTUAL_PROFILE;
        buf.size = sizeof(struct isku_actual_profile);
        buf.actual_profile = new_profile;
        return roccat_common2_send_with_status(usb_dev,
                        ISKU_COMMAND_ACTUAL_PROFILE, &buf,
                        sizeof(struct isku_actual_profile));
}

static ssize_t isku_sysfs_show_actual_profile(struct device *dev,
                struct device_attribute *attr, char *buf)
{
        struct isku_device *isku =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
        return snprintf(buf, PAGE_SIZE, "%d\n", isku->actual_profile);
}

static ssize_t isku_sysfs_set_actual_profile(struct device *dev,
                struct device_attribute *attr, char const *buf, size_t size)
{
        struct isku_device *isku;
        struct usb_device *usb_dev;
        unsigned long profile;
        int retval;
        struct isku_roccat_report roccat_report;

        dev = dev->parent->parent;
        isku = hid_get_drvdata(dev_get_drvdata(dev));
        usb_dev = interface_to_usbdev(to_usb_interface(dev));

        retval = kstrtoul(buf, 10, &profile);
        if (retval)
                return retval;

        if (profile > 4)
                return -EINVAL;

        mutex_lock(&isku->isku_lock);

        retval = isku_set_actual_profile(usb_dev, profile);
        if (retval) {
                mutex_unlock(&isku->isku_lock);
                return retval;
        }

        isku_profile_activated(isku, profile);

        roccat_report.event = ISKU_REPORT_BUTTON_EVENT_PROFILE;
        roccat_report.data1 = profile + 1;
        roccat_report.data2 = 0;
        roccat_report.profile = profile + 1;
        roccat_report_event(isku->chrdev_minor, (uint8_t const *)&roccat_report);

        mutex_unlock(&isku->isku_lock);

        return size;
}
static DEVICE_ATTR(actual_profile, 0660, isku_sysfs_show_actual_profile,
                   isku_sysfs_set_actual_profile);

static struct attribute *isku_attrs[] = {
        &dev_attr_actual_profile.attr,
        NULL,
};

static ssize_t isku_sysfs_read(struct file *fp, struct kobject *kobj,
                char *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct isku_device *isku = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off >= real_size)
                return 0;

        if (off != 0 || count > real_size)
                return -EINVAL;

        mutex_lock(&isku->isku_lock);
        retval = isku_receive(usb_dev, command, buf, count);
        mutex_unlock(&isku->isku_lock);

        return retval ? retval : count;
}

static ssize_t isku_sysfs_write(struct file *fp, struct kobject *kobj,
                void const *buf, loff_t off, size_t count,
                size_t real_size, uint command)
{
        struct device *dev = kobj_to_dev(kobj)->parent->parent;
        struct isku_device *isku = hid_get_drvdata(dev_get_drvdata(dev));
        struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev));
        int retval;

        if (off != 0 || count > real_size)
                return -EINVAL;

        mutex_lock(&isku->isku_lock);
        retval = roccat_common2_send_with_status(usb_dev, command,
                        (void *)buf, count);
        mutex_unlock(&isku->isku_lock);

        return retval ? retval : count;
}

#define ISKU_SYSFS_W(thingy, THINGY) \
static ssize_t isku_sysfs_write_ ## thingy(struct file *fp, struct kobject *kobj, \
                struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return isku_sysfs_write(fp, kobj, buf, off, count, \
                        ISKU_SIZE_ ## THINGY, ISKU_COMMAND_ ## THINGY); \
}

#define ISKU_SYSFS_R(thingy, THINGY) \
static ssize_t isku_sysfs_read_ ## thingy(struct file *fp, struct kobject *kobj, \
                struct bin_attribute *attr, char *buf, \
                loff_t off, size_t count) \
{ \
        return isku_sysfs_read(fp, kobj, buf, off, count, \
                        ISKU_SIZE_ ## THINGY, ISKU_COMMAND_ ## THINGY); \
}

#define ISKU_SYSFS_RW(thingy, THINGY) \
ISKU_SYSFS_R(thingy, THINGY) \
ISKU_SYSFS_W(thingy, THINGY)

#define ISKU_BIN_ATTR_RW(thingy, THINGY) \
ISKU_SYSFS_RW(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0660 }, \
        .size = ISKU_SIZE_ ## THINGY, \
        .read = isku_sysfs_read_ ## thingy, \
        .write = isku_sysfs_write_ ## thingy \
}

#define ISKU_BIN_ATTR_R(thingy, THINGY) \
ISKU_SYSFS_R(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0440 }, \
        .size = ISKU_SIZE_ ## THINGY, \
        .read = isku_sysfs_read_ ## thingy, \
}

#define ISKU_BIN_ATTR_W(thingy, THINGY) \
ISKU_SYSFS_W(thingy, THINGY); \
static struct bin_attribute bin_attr_##thingy = { \
        .attr = { .name = #thingy, .mode = 0220 }, \
        .size = ISKU_SIZE_ ## THINGY, \
        .write = isku_sysfs_write_ ## thingy \
}

ISKU_BIN_ATTR_RW(macro, MACRO);
ISKU_BIN_ATTR_RW(keys_function, KEYS_FUNCTION);
ISKU_BIN_ATTR_RW(keys_easyzone, KEYS_EASYZONE);
ISKU_BIN_ATTR_RW(keys_media, KEYS_MEDIA);
ISKU_BIN_ATTR_RW(keys_thumbster, KEYS_THUMBSTER);
ISKU_BIN_ATTR_RW(keys_macro, KEYS_MACRO);
ISKU_BIN_ATTR_RW(keys_capslock, KEYS_CAPSLOCK);
ISKU_BIN_ATTR_RW(light, LIGHT);
ISKU_BIN_ATTR_RW(key_mask, KEY_MASK);
ISKU_BIN_ATTR_RW(last_set, LAST_SET);
ISKU_BIN_ATTR_W(talk, TALK);
ISKU_BIN_ATTR_W(talkfx, TALKFX);
ISKU_BIN_ATTR_W(control, CONTROL);
ISKU_BIN_ATTR_W(reset, RESET);
ISKU_BIN_ATTR_R(info, INFO);

static struct bin_attribute *isku_bin_attributes[] = {
        &bin_attr_macro,
        &bin_attr_keys_function,
        &bin_attr_keys_easyzone,
        &bin_attr_keys_media,
        &bin_attr_keys_thumbster,
        &bin_attr_keys_macro,
        &bin_attr_keys_capslock,
        &bin_attr_light,
        &bin_attr_key_mask,
        &bin_attr_last_set,
        &bin_attr_talk,
        &bin_attr_talkfx,
        &bin_attr_control,
        &bin_attr_reset,
        &bin_attr_info,
        NULL,
};

static const struct attribute_group isku_group = {
        .attrs = isku_attrs,
        .bin_attrs = isku_bin_attributes,
};

static const struct attribute_group *isku_groups[] = {
        &isku_group,
        NULL,
};

static const struct class isku_class = {
        .name = "isku",
        .dev_groups = isku_groups,
};

static int isku_init_isku_device_struct(struct usb_device *usb_dev,
                struct isku_device *isku)
{
        int retval;

        mutex_init(&isku->isku_lock);

        retval = isku_get_actual_profile(usb_dev);
        if (retval < 0)
                return retval;
        isku_profile_activated(isku, retval);

        return 0;
}

static int isku_init_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct isku_device *isku;
        int retval;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != ISKU_USB_INTERFACE_PROTOCOL) {
                hid_set_drvdata(hdev, NULL);
                return 0;
        }

        isku = kzalloc(sizeof(*isku), GFP_KERNEL);
        if (!isku) {
                hid_err(hdev, "can't alloc device descriptor\n");
                return -ENOMEM;
        }
        hid_set_drvdata(hdev, isku);

        retval = isku_init_isku_device_struct(usb_dev, isku);
        if (retval) {
                hid_err(hdev, "couldn't init struct isku_device\n");
                goto exit_free;
        }

        retval = roccat_connect(&isku_class, hdev,
                        sizeof(struct isku_roccat_report));
        if (retval < 0) {
                hid_err(hdev, "couldn't init char dev\n");
        } else {
                isku->chrdev_minor = retval;
                isku->roccat_claimed = 1;
        }

        return 0;
exit_free:
        kfree(isku);
        return retval;
}

static void isku_remove_specials(struct hid_device *hdev)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct isku_device *isku;

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != ISKU_USB_INTERFACE_PROTOCOL)
                return;

        isku = hid_get_drvdata(hdev);
        if (isku->roccat_claimed)
                roccat_disconnect(isku->chrdev_minor);
        kfree(isku);
}

static int isku_probe(struct hid_device *hdev,
                const struct hid_device_id *id)
{
        int retval;

        if (!hid_is_usb(hdev))
                return -EINVAL;

        retval = hid_parse(hdev);
        if (retval) {
                hid_err(hdev, "parse failed\n");
                goto exit;
        }

        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (retval) {
                hid_err(hdev, "hw start failed\n");
                goto exit;
        }

        retval = isku_init_specials(hdev);
        if (retval) {
                hid_err(hdev, "couldn't install keyboard\n");
                goto exit_stop;
        }

        return 0;

exit_stop:
        hid_hw_stop(hdev);
exit:
        return retval;
}

static void isku_remove(struct hid_device *hdev)
{
        isku_remove_specials(hdev);
        hid_hw_stop(hdev);
}

static void isku_keep_values_up_to_date(struct isku_device *isku,
                u8 const *data)
{
        struct isku_report_button const *button_report;

        switch (data[0]) {
        case ISKU_REPORT_NUMBER_BUTTON:
                button_report = (struct isku_report_button const *)data;
                switch (button_report->event) {
                case ISKU_REPORT_BUTTON_EVENT_PROFILE:
                        isku_profile_activated(isku, button_report->data1 - 1);
                        break;
                }
                break;
        }
}

static void isku_report_to_chrdev(struct isku_device const *isku,
                u8 const *data)
{
        struct isku_roccat_report roccat_report;
        struct isku_report_button const *button_report;

        if (data[0] != ISKU_REPORT_NUMBER_BUTTON)
                return;

        button_report = (struct isku_report_button const *)data;

        roccat_report.event = button_report->event;
        roccat_report.data1 = button_report->data1;
        roccat_report.data2 = button_report->data2;
        roccat_report.profile = isku->actual_profile + 1;
        roccat_report_event(isku->chrdev_minor,
                        (uint8_t const *)&roccat_report);
}

static int isku_raw_event(struct hid_device *hdev,
                struct hid_report *report, u8 *data, int size)
{
        struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
        struct isku_device *isku = hid_get_drvdata(hdev);

        if (intf->cur_altsetting->desc.bInterfaceProtocol
                        != ISKU_USB_INTERFACE_PROTOCOL)
                return 0;

        if (isku == NULL)
                return 0;

        isku_keep_values_up_to_date(isku, data);

        if (isku->roccat_claimed)
                isku_report_to_chrdev(isku, data);

        return 0;
}

static const struct hid_device_id isku_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKUFX) },
        { }
};

MODULE_DEVICE_TABLE(hid, isku_devices);

static struct hid_driver isku_driver = {
                .name = "isku",
                .id_table = isku_devices,
                .probe = isku_probe,
                .remove = isku_remove,
                .raw_event = isku_raw_event
};

static int __init isku_init(void)
{
        int retval;

        retval = class_register(&isku_class);
        if (retval)
                return retval;

        retval = hid_register_driver(&isku_driver);
        if (retval)
                class_unregister(&isku_class);
        return retval;
}

static void __exit isku_exit(void)
{
        hid_unregister_driver(&isku_driver);
        class_unregister(&isku_class);
}

module_init(isku_init);
module_exit(isku_exit);

MODULE_AUTHOR("Stefan Achatz");
MODULE_DESCRIPTION("USB Roccat Isku/FX driver");
MODULE_LICENSE("GPL v2");







































































   90 




    4 










































































































































































































































































































































































































































































































   19 

   19 

























































   19 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VMSTAT_H
#define _LINUX_VMSTAT_H

#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/mmzone.h>
#include <linux/vm_event_item.h>
#include <linux/atomic.h>
#include <linux/static_key.h>
#include <linux/mmdebug.h>

extern int sysctl_stat_interval;

#ifdef CONFIG_NUMA
#define ENABLE_NUMA_STAT   1
#define DISABLE_NUMA_STAT   0
extern int sysctl_vm_numa_stat;
DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key);
int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
                void *buffer, size_t *length, loff_t *ppos);
#endif

struct reclaim_stat {
        unsigned nr_dirty;
        unsigned nr_unqueued_dirty;
        unsigned nr_congested;
        unsigned nr_writeback;
        unsigned nr_immediate;
        unsigned nr_pageout;
        unsigned nr_activate[ANON_AND_FILE];
        unsigned nr_ref_keep;
        unsigned nr_unmap_fail;
        unsigned nr_lazyfree_fail;
};

enum writeback_stat_item {
        NR_DIRTY_THRESHOLD,
        NR_DIRTY_BG_THRESHOLD,
        NR_VM_WRITEBACK_STAT_ITEMS,
};

#ifdef CONFIG_VM_EVENT_COUNTERS
/*
 * Light weight per cpu counter implementation.
 *
 * Counters should only be incremented and no critical kernel component
 * should rely on the counter values.
 *
 * Counters are handled completely inline. On many platforms the code
 * generated will simply be the increment of a global address.
 */

struct vm_event_state {
        unsigned long event[NR_VM_EVENT_ITEMS];
};

DECLARE_PER_CPU(struct vm_event_state, vm_event_states);

/*
 * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
 * local_irq_disable overhead.
 */
static inline void __count_vm_event(enum vm_event_item item)
{
        raw_cpu_inc(vm_event_states.event[item]);
}

static inline void count_vm_event(enum vm_event_item item)
{
        this_cpu_inc(vm_event_states.event[item]);
}

static inline void __count_vm_events(enum vm_event_item item, long delta)
{
        raw_cpu_add(vm_event_states.event[item], delta);
}

static inline void count_vm_events(enum vm_event_item item, long delta)
{
        this_cpu_add(vm_event_states.event[item], delta);
}

extern void all_vm_events(unsigned long *);

extern void vm_events_fold_cpu(int cpu);

#else

/* Disable counters */
static inline void count_vm_event(enum vm_event_item item)
{
}
static inline void count_vm_events(enum vm_event_item item, long delta)
{
}
static inline void __count_vm_event(enum vm_event_item item)
{
}
static inline void __count_vm_events(enum vm_event_item item, long delta)
{
}
static inline void all_vm_events(unsigned long *ret)
{
}
static inline void vm_events_fold_cpu(int cpu)
{
}

#endif /* CONFIG_VM_EVENT_COUNTERS */

#ifdef CONFIG_NUMA_BALANCING
#define count_vm_numa_event(x)     count_vm_event(x)
#define count_vm_numa_events(x, y) count_vm_events(x, y)
#else
#define count_vm_numa_event(x) do {} while (0)
#define count_vm_numa_events(x, y) do { (void)(y); } while (0)
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_DEBUG_TLBFLUSH
#define count_vm_tlb_event(x)           count_vm_event(x)
#define count_vm_tlb_events(x, y)  count_vm_events(x, y)
#else
#define count_vm_tlb_event(x)     do {} while (0)
#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
#endif

#ifdef CONFIG_PER_VMA_LOCK_STATS
#define count_vm_vma_lock_event(x) count_vm_event(x)
#else
#define count_vm_vma_lock_event(x) do {} while (0)
#endif

#define __count_zid_vm_events(item, zid, delta) \
        __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta)

/*
 * Zone and node-based page accounting with per cpu differentials.
 */
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];

#ifdef CONFIG_NUMA
static inline void zone_numa_event_add(long x, struct zone *zone,
                                enum numa_stat_item item)
{
        atomic_long_add(x, &zone->vm_numa_event[item]);
        atomic_long_add(x, &vm_numa_event[item]);
}

static inline unsigned long zone_numa_event_state(struct zone *zone,
                                        enum numa_stat_item item)
{
        return atomic_long_read(&zone->vm_numa_event[item]);
}

static inline unsigned long
global_numa_event_state(enum numa_stat_item item)
{
        return atomic_long_read(&vm_numa_event[item]);
}
#endif /* CONFIG_NUMA */

static inline void zone_page_state_add(long x, struct zone *zone,
                                 enum zone_stat_item item)
{
        atomic_long_add(x, &zone->vm_stat[item]);
        atomic_long_add(x, &vm_zone_stat[item]);
}

static inline void node_page_state_add(long x, struct pglist_data *pgdat,
                                 enum node_stat_item item)
{
        atomic_long_add(x, &pgdat->vm_stat[item]);
        atomic_long_add(x, &vm_node_stat[item]);
}

static inline unsigned long global_zone_page_state(enum zone_stat_item item)
{
        long x = atomic_long_read(&vm_zone_stat[item]);
#ifdef CONFIG_SMP
        if (x < 0)
                x = 0;
#endif
        return x;
}

static inline
unsigned long global_node_page_state_pages(enum node_stat_item item)
{
        long x = atomic_long_read(&vm_node_stat[item]);
#ifdef CONFIG_SMP
        if (x < 0)
                x = 0;
#endif
        return x;
}

static inline unsigned long global_node_page_state(enum node_stat_item item)
{
        VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));

        return global_node_page_state_pages(item);
}

static inline unsigned long zone_page_state(struct zone *zone,
                                        enum zone_stat_item item)
{
        long x = atomic_long_read(&zone->vm_stat[item]);
#ifdef CONFIG_SMP
        if (x < 0)
                x = 0;
#endif
        return x;
}

/*
 * More accurate version that also considers the currently pending
 * deltas. For that we need to loop over all cpus to find the current
 * deltas. There is no synchronization so the result cannot be
 * exactly accurate either.
 */
static inline unsigned long zone_page_state_snapshot(struct zone *zone,
                                        enum zone_stat_item item)
{
        long x = atomic_long_read(&zone->vm_stat[item]);

#ifdef CONFIG_SMP
        int cpu;
        for_each_online_cpu(cpu)
                x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item];

        if (x < 0)
                x = 0;
#endif
        return x;
}

#ifdef CONFIG_NUMA
/* See __count_vm_event comment on why raw_cpu_inc is used. */
static inline void
__count_numa_event(struct zone *zone, enum numa_stat_item item)
{
        struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;

        raw_cpu_inc(pzstats->vm_numa_event[item]);
}

static inline void
__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta)
{
        struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;

        raw_cpu_add(pzstats->vm_numa_event[item], delta);
}

extern unsigned long sum_zone_node_page_state(int node,
                                              enum zone_stat_item item);
extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat,
                                                enum node_stat_item item);
extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
                                           enum node_stat_item item);
extern void fold_vm_numa_events(void);
#else
#define sum_zone_node_page_state(node, item) global_zone_page_state(item)
#define node_page_state(node, item) global_node_page_state(item)
#define node_page_state_pages(node, item) global_node_page_state_pages(item)
static inline void fold_vm_numa_events(void)
{
}
#endif /* CONFIG_NUMA */

#ifdef CONFIG_SMP
void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
void __inc_zone_page_state(struct page *, enum zone_stat_item);
void __dec_zone_page_state(struct page *, enum zone_stat_item);

void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long);
void __inc_node_page_state(struct page *, enum node_stat_item);
void __dec_node_page_state(struct page *, enum node_stat_item);

void mod_zone_page_state(struct zone *, enum zone_stat_item, long);
void inc_zone_page_state(struct page *, enum zone_stat_item);
void dec_zone_page_state(struct page *, enum zone_stat_item);

void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
void inc_node_page_state(struct page *, enum node_stat_item);
void dec_node_page_state(struct page *, enum node_stat_item);

extern void inc_node_state(struct pglist_data *, enum node_stat_item);
extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_node_state(struct pglist_data *, enum node_stat_item);

void quiet_vmstat(void);
void cpu_vm_stats_fold(int cpu);
void refresh_zone_stat_thresholds(void);

struct ctl_table;
int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp,
                loff_t *ppos);

void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *);

int calculate_pressure_threshold(struct zone *zone);
int calculate_normal_threshold(struct zone *zone);
void set_pgdat_percpu_threshold(pg_data_t *pgdat,
                                int (*calculate_pressure)(struct zone *));
#else /* CONFIG_SMP */

/*
 * We do not maintain differentials in a single processor configuration.
 * The functions directly modify the zone and global counters.
 */
static inline void __mod_zone_page_state(struct zone *zone,
                        enum zone_stat_item item, long delta)
{
        zone_page_state_add(delta, zone, item);
}

static inline void __mod_node_page_state(struct pglist_data *pgdat,
                        enum node_stat_item item, int delta)
{
        if (vmstat_item_in_bytes(item)) {
                /*
                 * Only cgroups use subpage accounting right now; at
                 * the global level, these items still change in
                 * multiples of whole pages. Store them as pages
                 * internally to keep the per-cpu counters compact.
                 */
                VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
                delta >>= PAGE_SHIFT;
        }

        node_page_state_add(delta, pgdat, item);
}

static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
        atomic_long_inc(&zone->vm_stat[item]);
        atomic_long_inc(&vm_zone_stat[item]);
}

static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{
        atomic_long_inc(&pgdat->vm_stat[item]);
        atomic_long_inc(&vm_node_stat[item]);
}

static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
        atomic_long_dec(&zone->vm_stat[item]);
        atomic_long_dec(&vm_zone_stat[item]);
}

static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
{
        atomic_long_dec(&pgdat->vm_stat[item]);
        atomic_long_dec(&vm_node_stat[item]);
}

static inline void __inc_zone_page_state(struct page *page,
                        enum zone_stat_item item)
{
        __inc_zone_state(page_zone(page), item);
}

static inline void __inc_node_page_state(struct page *page,
                        enum node_stat_item item)
{
        __inc_node_state(page_pgdat(page), item);
}


static inline void __dec_zone_page_state(struct page *page,
                        enum zone_stat_item item)
{
        __dec_zone_state(page_zone(page), item);
}

static inline void __dec_node_page_state(struct page *page,
                        enum node_stat_item item)
{
        __dec_node_state(page_pgdat(page), item);
}


/*
 * We only use atomic operations to update counters. So there is no need to
 * disable interrupts.
 */
#define inc_zone_page_state __inc_zone_page_state
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state

#define inc_node_page_state __inc_node_page_state
#define dec_node_page_state __dec_node_page_state
#define mod_node_page_state __mod_node_page_state

#define inc_zone_state __inc_zone_state
#define inc_node_state __inc_node_state
#define dec_zone_state __dec_zone_state

#define set_pgdat_percpu_threshold(pgdat, callback) { }

static inline void refresh_zone_stat_thresholds(void) { }
static inline void cpu_vm_stats_fold(int cpu) { }
static inline void quiet_vmstat(void) { }

static inline void drain_zonestat(struct zone *zone,
                        struct per_cpu_zonestat *pzstats) { }
#endif                /* CONFIG_SMP */

static inline void __zone_stat_mod_folio(struct folio *folio,
                enum zone_stat_item item, long nr)
{
        __mod_zone_page_state(folio_zone(folio), item, nr);
}

static inline void __zone_stat_add_folio(struct folio *folio,
                enum zone_stat_item item)
{
        __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
}

static inline void __zone_stat_sub_folio(struct folio *folio,
                enum zone_stat_item item)
{
        __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
}

static inline void zone_stat_mod_folio(struct folio *folio,
                enum zone_stat_item item, long nr)
{
        mod_zone_page_state(folio_zone(folio), item, nr);
}

static inline void zone_stat_add_folio(struct folio *folio,
                enum zone_stat_item item)
{
        mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
}

static inline void zone_stat_sub_folio(struct folio *folio,
                enum zone_stat_item item)
{
        mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
}

static inline void __node_stat_mod_folio(struct folio *folio,
                enum node_stat_item item, long nr)
{
        __mod_node_page_state(folio_pgdat(folio), item, nr);
}

static inline void __node_stat_add_folio(struct folio *folio,
                enum node_stat_item item)
{
        __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
}

static inline void __node_stat_sub_folio(struct folio *folio,
                enum node_stat_item item)
{
        __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
}

static inline void node_stat_mod_folio(struct folio *folio,
                enum node_stat_item item, long nr)
{
        mod_node_page_state(folio_pgdat(folio), item, nr);
}

static inline void node_stat_add_folio(struct folio *folio,
                enum node_stat_item item)
{
        mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
}

static inline void node_stat_sub_folio(struct folio *folio,
                enum node_stat_item item)
{
        mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
}

static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
                                             int migratetype)
{
        __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
        if (is_migrate_cma(migratetype))
                __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
}

extern const char * const vmstat_text[];

static inline const char *zone_stat_name(enum zone_stat_item item)
{
        return vmstat_text[item];
}

#ifdef CONFIG_NUMA
static inline const char *numa_stat_name(enum numa_stat_item item)
{
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
                           item];
}
#endif /* CONFIG_NUMA */

static inline const char *node_stat_name(enum node_stat_item item)
{
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
                           NR_VM_NUMA_EVENT_ITEMS +
                           item];
}

static inline const char *lru_list_name(enum lru_list lru)
{
        return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
}

static inline const char *writeback_stat_name(enum writeback_stat_item item)
{
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
                           NR_VM_NUMA_EVENT_ITEMS +
                           NR_VM_NODE_STAT_ITEMS +
                           item];
}

#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
static inline const char *vm_event_name(enum vm_event_item item)
{
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
                           NR_VM_NUMA_EVENT_ITEMS +
                           NR_VM_NODE_STAT_ITEMS +
                           NR_VM_WRITEBACK_STAT_ITEMS +
                           item];
}
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */

#ifdef CONFIG_MEMCG

void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                        int val);

static inline void mod_lruvec_state(struct lruvec *lruvec,
                                    enum node_stat_item idx, int val)
{
        unsigned long flags;

        local_irq_save(flags);
        __mod_lruvec_state(lruvec, idx, val);
        local_irq_restore(flags);
}

void __lruvec_stat_mod_folio(struct folio *folio,
                             enum node_stat_item idx, int val);

static inline void lruvec_stat_mod_folio(struct folio *folio,
                                         enum node_stat_item idx, int val)
{
        unsigned long flags;

        local_irq_save(flags);
        __lruvec_stat_mod_folio(folio, idx, val);
        local_irq_restore(flags);
}

static inline void mod_lruvec_page_state(struct page *page,
                                         enum node_stat_item idx, int val)
{
        lruvec_stat_mod_folio(page_folio(page), idx, val);
}

#else

static inline void __mod_lruvec_state(struct lruvec *lruvec,
                                      enum node_stat_item idx, int val)
{
        __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}

static inline void mod_lruvec_state(struct lruvec *lruvec,
                                    enum node_stat_item idx, int val)
{
        mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}

static inline void __lruvec_stat_mod_folio(struct folio *folio,
                                         enum node_stat_item idx, int val)
{
        __mod_node_page_state(folio_pgdat(folio), idx, val);
}

static inline void lruvec_stat_mod_folio(struct folio *folio,
                                         enum node_stat_item idx, int val)
{
        mod_node_page_state(folio_pgdat(folio), idx, val);
}

static inline void mod_lruvec_page_state(struct page *page,
                                         enum node_stat_item idx, int val)
{
        mod_node_page_state(page_pgdat(page), idx, val);
}

#endif /* CONFIG_MEMCG */

static inline void __lruvec_stat_add_folio(struct folio *folio,
                                           enum node_stat_item idx)
{
        __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
}

static inline void __lruvec_stat_sub_folio(struct folio *folio,
                                           enum node_stat_item idx)
{
        __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}

static inline void lruvec_stat_add_folio(struct folio *folio,
                                         enum node_stat_item idx)
{
        lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
}

static inline void lruvec_stat_sub_folio(struct folio *folio,
                                         enum node_stat_item idx)
{
        lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
#endif /* _LINUX_VMSTAT_H */

































































































































































































































   61 

























































































































































































































    6 









    6 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FILELOCK_H
#define _LINUX_FILELOCK_H

#include <linux/fs.h>

#define FL_POSIX        1
#define FL_FLOCK        2
#define FL_DELEG        4        /* NFSv4 delegation */
#define FL_ACCESS        8        /* not trying to lock, just looking */
#define FL_EXISTS        16        /* when unlocking, test for existence */
#define FL_LEASE        32        /* lease held on this file */
#define FL_CLOSE        64        /* unlock on close */
#define FL_SLEEP        128        /* A blocking lock */
#define FL_DOWNGRADE_PENDING        256 /* Lease is being downgraded */
#define FL_UNLOCK_PENDING        512 /* Lease is being broken */
#define FL_OFDLCK        1024        /* lock is "owned" by struct file */
#define FL_LAYOUT        2048        /* outstanding pNFS layout */
#define FL_RECLAIM        4096        /* reclaiming from a reboot server */

#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)

/*
 * Special return value from posix_lock_file() and vfs_lock_file() for
 * asynchronous locking.
 */
#define FILE_LOCK_DEFERRED 1

struct file_lock;
struct file_lease;

struct file_lock_operations {
        void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
        void (*fl_release_private)(struct file_lock *);
};

struct lock_manager_operations {
        void *lm_mod_owner;
        fl_owner_t (*lm_get_owner)(fl_owner_t);
        void (*lm_put_owner)(fl_owner_t);
        void (*lm_notify)(struct file_lock *);        /* unblock callback */
        int (*lm_grant)(struct file_lock *, int);
        bool (*lm_lock_expirable)(struct file_lock *cfl);
        void (*lm_expire_lock)(void);
};

struct lease_manager_operations {
        bool (*lm_break)(struct file_lease *);
        int (*lm_change)(struct file_lease *, int, struct list_head *);
        void (*lm_setup)(struct file_lease *, void **);
        bool (*lm_breaker_owns_lease)(struct file_lease *);
};

struct lock_manager {
        struct list_head list;
        /*
         * NFSv4 and up also want opens blocked during the grace period;
         * NLM doesn't care:
         */
        bool block_opens;
};

struct net;
void locks_start_grace(struct net *, struct lock_manager *);
void locks_end_grace(struct lock_manager *);
bool locks_in_grace(struct net *);
bool opens_in_grace(struct net *);

/*
 * struct file_lock has a union that some filesystems use to track
 * their own private info. The NFS side of things is defined here:
 */
#include <linux/nfs_fs_i.h>

/*
 * struct file_lock represents a generic "file lock". It's used to represent
 * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
 * note that the same struct is used to represent both a request for a lock and
 * the lock itself, but the same object is never used for both.
 *
 * FIXME: should we create a separate "struct lock_request" to help distinguish
 * these two uses?
 *
 * The varous i_flctx lists are ordered by:
 *
 * 1) lock owner
 * 2) lock range start
 * 3) lock range end
 *
 * Obviously, the last two criteria only matter for POSIX locks.
 */

struct file_lock_core {
        struct file_lock_core *flc_blocker;        /* The lock that is blocking us */
        struct list_head flc_list;        /* link into file_lock_context */
        struct hlist_node flc_link;        /* node in global lists */
        struct list_head flc_blocked_requests;        /* list of requests with
                                                 * ->fl_blocker pointing here
                                                 */
        struct list_head flc_blocked_member;        /* node in
                                                 * ->fl_blocker->fl_blocked_requests
                                                 */
        fl_owner_t flc_owner;
        unsigned int flc_flags;
        unsigned char flc_type;
        pid_t flc_pid;
        int flc_link_cpu;                /* what cpu's list is this on? */
        wait_queue_head_t flc_wait;
        struct file *flc_file;
};

struct file_lock {
        struct file_lock_core c;
        loff_t fl_start;
        loff_t fl_end;

        const struct file_lock_operations *fl_ops;        /* Callbacks for filesystems */
        const struct lock_manager_operations *fl_lmops;        /* Callbacks for lockmanagers */
        union {
                struct nfs_lock_info        nfs_fl;
                struct nfs4_lock_info        nfs4_fl;
                struct {
                        struct list_head link;        /* link in AFS vnode's pending_locks list */
                        int state;                /* state of grant or error if -ve */
                        unsigned int        debug_id;
                } afs;
                struct {
                        struct inode *inode;
                } ceph;
        } fl_u;
} __randomize_layout;

struct file_lease {
        struct file_lock_core c;
        struct fasync_struct *        fl_fasync; /* for lease break notifications */
        /* for lease breaks: */
        unsigned long fl_break_time;
        unsigned long fl_downgrade_time;
        const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */
} __randomize_layout;

struct file_lock_context {
        spinlock_t                flc_lock;
        struct list_head        flc_flock;
        struct list_head        flc_posix;
        struct list_head        flc_lease;
};

#ifdef CONFIG_FILE_LOCKING
int fcntl_getlk(struct file *, unsigned int, struct flock *);
int fcntl_setlk(unsigned int, struct file *, unsigned int,
                        struct flock *);

#if BITS_PER_LONG == 32
int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
int fcntl_setlk64(unsigned int, struct file *, unsigned int,
                        struct flock64 *);
#endif

int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);

static inline bool lock_is_unlock(struct file_lock *fl)
{
        return fl->c.flc_type == F_UNLCK;
}

static inline bool lock_is_read(struct file_lock *fl)
{
        return fl->c.flc_type == F_RDLCK;
}

static inline bool lock_is_write(struct file_lock *fl)
{
        return fl->c.flc_type == F_WRLCK;
}

static inline void locks_wake_up(struct file_lock *fl)
{
        wake_up(&fl->c.flc_wait);
}

/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
void locks_init_lock(struct file_lock *);
struct file_lock *locks_alloc_lock(void);
void locks_copy_lock(struct file_lock *, struct file_lock *);
void locks_copy_conflock(struct file_lock *, struct file_lock *);
void locks_remove_posix(struct file *, fl_owner_t);
void locks_remove_file(struct file *);
void locks_release_private(struct file_lock *);
void posix_test_lock(struct file *, struct file_lock *);
int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
int locks_delete_block(struct file_lock *);
int vfs_test_lock(struct file *, struct file_lock *);
int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);

void locks_init_lease(struct file_lease *);
void locks_free_lease(struct file_lease *fl);
struct file_lease *locks_alloc_lease(void);
int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
void lease_get_mtime(struct inode *, struct timespec64 *time);
int generic_setlease(struct file *, int, struct file_lease **, void **priv);
int kernel_setlease(struct file *, int, struct file_lease **, void **);
int vfs_setlease(struct file *, int, struct file_lease **, void **);
int lease_modify(struct file_lease *, int, struct list_head *);

struct notifier_block;
int lease_register_notifier(struct notifier_block *);
void lease_unregister_notifier(struct notifier_block *);

struct files_struct;
void show_fd_locks(struct seq_file *f,
                         struct file *filp, struct files_struct *files);
bool locks_owner_has_blockers(struct file_lock_context *flctx,
                        fl_owner_t owner);

static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
        return smp_load_acquire(&inode->i_flctx);
}

#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
                              struct flock __user *user)
{
        return -EINVAL;
}

static inline int fcntl_setlk(unsigned int fd, struct file *file,
                              unsigned int cmd, struct flock __user *user)
{
        return -EACCES;
}

#if BITS_PER_LONG == 32
static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
                                struct flock64 *user)
{
        return -EINVAL;
}

static inline int fcntl_setlk64(unsigned int fd, struct file *file,
                                unsigned int cmd, struct flock64 *user)
{
        return -EACCES;
}
#endif
static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
        return -EINVAL;
}

static inline int fcntl_getlease(struct file *filp)
{
        return F_UNLCK;
}

static inline bool lock_is_unlock(struct file_lock *fl)
{
        return false;
}

static inline bool lock_is_read(struct file_lock *fl)
{
        return false;
}

static inline bool lock_is_write(struct file_lock *fl)
{
        return false;
}

static inline void locks_wake_up(struct file_lock *fl)
{
}

static inline void
locks_free_lock_context(struct inode *inode)
{
}

static inline void locks_init_lock(struct file_lock *fl)
{
        return;
}

static inline void locks_init_lease(struct file_lease *fl)
{
        return;
}

static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
        return;
}

static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
        return;
}

static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
        return;
}

static inline void locks_remove_file(struct file *filp)
{
        return;
}

static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
{
        return;
}

static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
                                  struct file_lock *conflock)
{
        return -ENOLCK;
}

static inline int locks_delete_block(struct file_lock *waiter)
{
        return -ENOENT;
}

static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
        return 0;
}

static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
                                struct file_lock *fl, struct file_lock *conf)
{
        return -ENOLCK;
}

static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
        return 0;
}

static inline bool vfs_inode_has_locks(struct inode *inode)
{
        return false;
}

static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
        return -ENOLCK;
}

static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{
        return 0;
}

static inline void lease_get_mtime(struct inode *inode,
                                   struct timespec64 *time)
{
        return;
}

static inline int generic_setlease(struct file *filp, int arg,
                                    struct file_lease **flp, void **priv)
{
        return -EINVAL;
}

static inline int kernel_setlease(struct file *filp, int arg,
                               struct file_lease **lease, void **priv)
{
        return -EINVAL;
}

static inline int vfs_setlease(struct file *filp, int arg,
                               struct file_lease **lease, void **priv)
{
        return -EINVAL;
}

static inline int lease_modify(struct file_lease *fl, int arg,
                               struct list_head *dispose)
{
        return -EINVAL;
}

struct files_struct;
static inline void show_fd_locks(struct seq_file *f,
                        struct file *filp, struct files_struct *files) {}
static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
                        fl_owner_t owner)
{
        return false;
}

static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
        return NULL;
}

#endif /* !CONFIG_FILE_LOCKING */

/* for walking lists of file_locks linked by fl_list */
#define for_each_file_lock(_fl, _head)        list_for_each_entry(_fl, _head, c.flc_list)

static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
{
        return locks_lock_inode_wait(file_inode(filp), fl);
}

#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
        /*
         * Since this check is lockless, we must ensure that any refcounts
         * taken are done before checking i_flctx->flc_lease. Otherwise, we
         * could end up racing with tasks trying to set a new lease on this
         * file.
         */
        smp_mb();
        if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
                return __break_lease(inode, mode, FL_LEASE);
        return 0;
}

static inline int break_deleg(struct inode *inode, unsigned int mode)
{
        /*
         * Since this check is lockless, we must ensure that any refcounts
         * taken are done before checking i_flctx->flc_lease. Otherwise, we
         * could end up racing with tasks trying to set a new lease on this
         * file.
         */
        smp_mb();
        if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
                return __break_lease(inode, mode, FL_DELEG);
        return 0;
}

static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
{
        int ret;

        ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
        if (ret == -EWOULDBLOCK && delegated_inode) {
                *delegated_inode = inode;
                ihold(inode);
        }
        return ret;
}

static inline int break_deleg_wait(struct inode **delegated_inode)
{
        int ret;

        ret = break_deleg(*delegated_inode, O_WRONLY);
        iput(*delegated_inode);
        *delegated_inode = NULL;
        return ret;
}

static inline int break_layout(struct inode *inode, bool wait)
{
        smp_mb();
        if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
                return __break_lease(inode,
                                wait ? O_WRONLY : O_WRONLY | O_NONBLOCK,
                                FL_LAYOUT);
        return 0;
}

#else /* !CONFIG_FILE_LOCKING */
static inline int break_lease(struct inode *inode, unsigned int mode)
{
        return 0;
}

static inline int break_deleg(struct inode *inode, unsigned int mode)
{
        return 0;
}

static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
{
        return 0;
}

static inline int break_deleg_wait(struct inode **delegated_inode)
{
        BUG();
        return 0;
}

static inline int break_layout(struct inode *inode, bool wait)
{
        return 0;
}

#endif /* CONFIG_FILE_LOCKING */

#endif /* _LINUX_FILELOCK_H */


























































































































































































































   74 

   73 














   74 






























   37 













   37 











































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Credentials management - see Documentation/security/credentials.rst
 *
 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#ifndef _LINUX_CRED_H
#define _LINUX_CRED_H

#include <linux/capability.h>
#include <linux/init.h>
#include <linux/key.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/uidgid.h>
#include <linux/sched.h>
#include <linux/sched/user.h>

struct cred;
struct inode;

/*
 * COW Supplementary groups list
 */
struct group_info {
        refcount_t        usage;
        int                ngroups;
        kgid_t                gid[];
} __randomize_layout;

/**
 * get_group_info - Get a reference to a group info structure
 * @group_info: The group info to reference
 *
 * This gets a reference to a set of supplementary groups.
 *
 * If the caller is accessing a task's credentials, they must hold the RCU read
 * lock when reading.
 */
static inline struct group_info *get_group_info(struct group_info *gi)
{
        refcount_inc(&gi->usage);
        return gi;
}

/**
 * put_group_info - Release a reference to a group info structure
 * @group_info: The group info to release
 */
#define put_group_info(group_info)                        \
do {                                                        \
        if (refcount_dec_and_test(&(group_info)->usage))        \
                groups_free(group_info);                \
} while (0)

#ifdef CONFIG_MULTIUSER
extern struct group_info *groups_alloc(int);
extern void groups_free(struct group_info *);

extern int in_group_p(kgid_t);
extern int in_egroup_p(kgid_t);
extern int groups_search(const struct group_info *, kgid_t);

extern int set_current_groups(struct group_info *);
extern void set_groups(struct cred *, struct group_info *);
extern bool may_setgroups(void);
extern void groups_sort(struct group_info *);
#else
static inline void groups_free(struct group_info *group_info)
{
}

static inline int in_group_p(kgid_t grp)
{
        return 1;
}
static inline int in_egroup_p(kgid_t grp)
{
        return 1;
}
static inline int groups_search(const struct group_info *group_info, kgid_t grp)
{
        return 1;
}
#endif

/*
 * The security context of a task
 *
 * The parts of the context break down into two categories:
 *
 *  (1) The objective context of a task.  These parts are used when some other
 *        task is attempting to affect this one.
 *
 *  (2) The subjective context.  These details are used when the task is acting
 *        upon another object, be that a file, a task, a key or whatever.
 *
 * Note that some members of this structure belong to both categories - the
 * LSM security pointer for instance.
 *
 * A task has two security pointers.  task->real_cred points to the objective
 * context that defines that task's actual details.  The objective part of this
 * context is used whenever that task is acted upon.
 *
 * task->cred points to the subjective context that defines the details of how
 * that task is going to act upon another object.  This may be overridden
 * temporarily to point to another security context, but normally points to the
 * same context as task->real_cred.
 */
struct cred {
        atomic_long_t        usage;
        kuid_t                uid;                /* real UID of the task */
        kgid_t                gid;                /* real GID of the task */
        kuid_t                suid;                /* saved UID of the task */
        kgid_t                sgid;                /* saved GID of the task */
        kuid_t                euid;                /* effective UID of the task */
        kgid_t                egid;                /* effective GID of the task */
        kuid_t                fsuid;                /* UID for VFS ops */
        kgid_t                fsgid;                /* GID for VFS ops */
        unsigned        securebits;        /* SUID-less security management */
        kernel_cap_t        cap_inheritable; /* caps our children can inherit */
        kernel_cap_t        cap_permitted;        /* caps we're permitted */
        kernel_cap_t        cap_effective;        /* caps we can actually use */
        kernel_cap_t        cap_bset;        /* capability bounding set */
        kernel_cap_t        cap_ambient;        /* Ambient capability set */
#ifdef CONFIG_KEYS
        unsigned char        jit_keyring;        /* default keyring to attach requested
                                         * keys to */
        struct key        *session_keyring; /* keyring inherited over fork */
        struct key        *process_keyring; /* keyring private to this process */
        struct key        *thread_keyring; /* keyring private to this thread */
        struct key        *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
        void                *security;        /* LSM security */
#endif
        struct user_struct *user;        /* real user ID subscription */
        struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
        struct ucounts *ucounts;
        struct group_info *group_info;        /* supplementary groups for euid/fsgid */
        /* RCU deletion */
        union {
                int non_rcu;                        /* Can we skip RCU deletion? */
                struct rcu_head        rcu;                /* RCU deletion hook */
        };
} __randomize_layout;

extern void __put_cred(struct cred *);
extern void exit_creds(struct task_struct *);
extern int copy_creds(struct task_struct *, unsigned long);
extern const struct cred *get_task_cred(struct task_struct *);
extern struct cred *cred_alloc_blank(void);
extern struct cred *prepare_creds(void);
extern struct cred *prepare_exec_creds(void);
extern int commit_creds(struct cred *);
extern void abort_creds(struct cred *);
extern const struct cred *override_creds(const struct cred *);
extern void revert_creds(const struct cred *);
extern struct cred *prepare_kernel_cred(struct task_struct *);
extern int set_security_override(struct cred *, u32);
extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
extern int set_cred_ucounts(struct cred *);

static inline bool cap_ambient_invariant_ok(const struct cred *cred)
{
        return cap_issubset(cred->cap_ambient,
                            cap_intersect(cred->cap_permitted,
                                          cred->cap_inheritable));
}

/**
 * get_new_cred_many - Get references on a new set of credentials
 * @cred: The new credentials to reference
 * @nr: Number of references to acquire
 *
 * Get references on the specified set of new credentials.  The caller must
 * release all acquired references.
 */
static inline struct cred *get_new_cred_many(struct cred *cred, int nr)
{
        atomic_long_add(nr, &cred->usage);
        return cred;
}

/**
 * get_new_cred - Get a reference on a new set of credentials
 * @cred: The new credentials to reference
 *
 * Get a reference on the specified set of new credentials.  The caller must
 * release the reference.
 */
static inline struct cred *get_new_cred(struct cred *cred)
{
        return get_new_cred_many(cred, 1);
}

/**
 * get_cred_many - Get references on a set of credentials
 * @cred: The credentials to reference
 * @nr: Number of references to acquire
 *
 * Get references on the specified set of credentials.  The caller must release
 * all acquired reference.  If %NULL is passed, it is returned with no action.
 *
 * This is used to deal with a committed set of credentials.  Although the
 * pointer is const, this will temporarily discard the const and increment the
 * usage count.  The purpose of this is to attempt to catch at compile time the
 * accidental alteration of a set of credentials that should be considered
 * immutable.
 */
static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
{
        struct cred *nonconst_cred = (struct cred *) cred;
        if (!cred)
                return cred;
        nonconst_cred->non_rcu = 0;
        return get_new_cred_many(nonconst_cred, nr);
}

/*
 * get_cred - Get a reference on a set of credentials
 * @cred: The credentials to reference
 *
 * Get a reference on the specified set of credentials.  The caller must
 * release the reference.  If %NULL is passed, it is returned with no action.
 *
 * This is used to deal with a committed set of credentials.
 */
static inline const struct cred *get_cred(const struct cred *cred)
{
        return get_cred_many(cred, 1);
}

static inline const struct cred *get_cred_rcu(const struct cred *cred)
{
        struct cred *nonconst_cred = (struct cred *) cred;
        if (!cred)
                return NULL;
        if (!atomic_long_inc_not_zero(&nonconst_cred->usage))
                return NULL;
        nonconst_cred->non_rcu = 0;
        return cred;
}

/**
 * put_cred - Release a reference to a set of credentials
 * @cred: The credentials to release
 * @nr: Number of references to release
 *
 * Release a reference to a set of credentials, deleting them when the last ref
 * is released.  If %NULL is passed, nothing is done.
 *
 * This takes a const pointer to a set of credentials because the credentials
 * on task_struct are attached by const pointers to prevent accidental
 * alteration of otherwise immutable credential sets.
 */
static inline void put_cred_many(const struct cred *_cred, int nr)
{
        struct cred *cred = (struct cred *) _cred;

        if (cred) {
                if (atomic_long_sub_and_test(nr, &cred->usage))
                        __put_cred(cred);
        }
}

/*
 * put_cred - Release a reference to a set of credentials
 * @cred: The credentials to release
 *
 * Release a reference to a set of credentials, deleting them when the last ref
 * is released.  If %NULL is passed, nothing is done.
 */
static inline void put_cred(const struct cred *cred)
{
        put_cred_many(cred, 1);
}

/**
 * current_cred - Access the current task's subjective credentials
 *
 * Access the subjective credentials of the current task.  RCU-safe,
 * since nobody else can modify it.
 */
#define current_cred() \
        rcu_dereference_protected(current->cred, 1)

/**
 * current_real_cred - Access the current task's objective credentials
 *
 * Access the objective credentials of the current task.  RCU-safe,
 * since nobody else can modify it.
 */
#define current_real_cred() \
        rcu_dereference_protected(current->real_cred, 1)

/**
 * __task_cred - Access a task's objective credentials
 * @task: The task to query
 *
 * Access the objective credentials of a task.  The caller must hold the RCU
 * readlock.
 *
 * The result of this function should not be passed directly to get_cred();
 * rather get_task_cred() should be used instead.
 */
#define __task_cred(task)        \
        rcu_dereference((task)->real_cred)

/**
 * get_current_cred - Get the current task's subjective credentials
 *
 * Get the subjective credentials of the current task, pinning them so that
 * they can't go away.  Accessing the current task's credentials directly is
 * not permitted.
 */
#define get_current_cred()                                \
        (get_cred(current_cred()))

/**
 * get_current_user - Get the current task's user_struct
 *
 * Get the user record of the current task, pinning it so that it can't go
 * away.
 */
#define get_current_user()                                \
({                                                        \
        struct user_struct *__u;                        \
        const struct cred *__cred;                        \
        __cred = current_cred();                        \
        __u = get_uid(__cred->user);                        \
        __u;                                                \
})

/**
 * get_current_groups - Get the current task's supplementary group list
 *
 * Get the supplementary group list of the current task, pinning it so that it
 * can't go away.
 */
#define get_current_groups()                                \
({                                                        \
        struct group_info *__groups;                        \
        const struct cred *__cred;                        \
        __cred = current_cred();                        \
        __groups = get_group_info(__cred->group_info);        \
        __groups;                                        \
})

#define task_cred_xxx(task, xxx)                        \
({                                                        \
        __typeof__(((struct cred *)NULL)->xxx) ___val;        \
        rcu_read_lock();                                \
        ___val = __task_cred((task))->xxx;                \
        rcu_read_unlock();                                \
        ___val;                                                \
})

#define task_uid(task)                (task_cred_xxx((task), uid))
#define task_euid(task)                (task_cred_xxx((task), euid))
#define task_ucounts(task)        (task_cred_xxx((task), ucounts))

#define current_cred_xxx(xxx)                        \
({                                                \
        current_cred()->xxx;                        \
})

#define current_uid()                (current_cred_xxx(uid))
#define current_gid()                (current_cred_xxx(gid))
#define current_euid()                (current_cred_xxx(euid))
#define current_egid()                (current_cred_xxx(egid))
#define current_suid()                (current_cred_xxx(suid))
#define current_sgid()                (current_cred_xxx(sgid))
#define current_fsuid()         (current_cred_xxx(fsuid))
#define current_fsgid()         (current_cred_xxx(fsgid))
#define current_cap()                (current_cred_xxx(cap_effective))
#define current_user()                (current_cred_xxx(user))
#define current_ucounts()        (current_cred_xxx(ucounts))

extern struct user_namespace init_user_ns;
#ifdef CONFIG_USER_NS
#define current_user_ns()        (current_cred_xxx(user_ns))
#else
static inline struct user_namespace *current_user_ns(void)
{
        return &init_user_ns;
}
#endif


#define current_uid_gid(_uid, _gid)                \
do {                                                \
        const struct cred *__cred;                \
        __cred = current_cred();                \
        *(_uid) = __cred->uid;                        \
        *(_gid) = __cred->gid;                        \
} while(0)

#define current_euid_egid(_euid, _egid)                \
do {                                                \
        const struct cred *__cred;                \
        __cred = current_cred();                \
        *(_euid) = __cred->euid;                \
        *(_egid) = __cred->egid;                \
} while(0)

#define current_fsuid_fsgid(_fsuid, _fsgid)        \
do {                                                \
        const struct cred *__cred;                \
        __cred = current_cred();                \
        *(_fsuid) = __cred->fsuid;                \
        *(_fsgid) = __cred->fsgid;                \
} while(0)

#endif /* _LINUX_CRED_H */











  294 













  295 







  294 


  296 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/uaccess.h>
#include <linux/kernel.h>

#include <asm/vsyscall.h>

#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
        unsigned long vaddr = (unsigned long)unsafe_src;

        /*
         * Do not allow userspace addresses.  This disallows
         * normal userspace and the userspace guard page:
         */
        if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
                return false;

        /*
         * Reading from the vsyscall page may cause an unhandled fault in
         * certain cases.  Though it is at an address above TASK_SIZE_MAX, it is
         * usually considered as a user space address.
         */
        if (is_vsyscall_vaddr(vaddr))
                return false;

        /*
         * Allow everything during early boot before 'x86_virt_bits'
         * is initialized.  Needed for instruction decoding in early
         * exception handlers.
         */
        if (!boot_cpu_data.x86_virt_bits)
                return true;

        return __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
}
#else
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
        return (unsigned long)unsafe_src >= TASK_SIZE_MAX;
}
#endif


































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef IOPRIO_H
#define IOPRIO_H

#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/iocontext.h>

#include <uapi/linux/ioprio.h>

/*
 * Default IO priority.
 */
#define IOPRIO_DEFAULT        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0)

/*
 * Check that a priority value has a valid class.
 */
static inline bool ioprio_valid(unsigned short ioprio)
{
        unsigned short class = IOPRIO_PRIO_CLASS(ioprio);

        return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_IDLE;
}

/*
 * if process has set io priority explicitly, use that. if not, convert
 * the cpu scheduler nice value to an io priority
 */
static inline int task_nice_ioprio(struct task_struct *task)
{
        return (task_nice(task) + 20) / 5;
}

/*
 * This is for the case where the task hasn't asked for a specific IO class.
 * Check for idle and rt task process, and return appropriate IO class.
 */
static inline int task_nice_ioclass(struct task_struct *task)
{
        if (task->policy == SCHED_IDLE)
                return IOPRIO_CLASS_IDLE;
        else if (task_is_realtime(task))
                return IOPRIO_CLASS_RT;
        else
                return IOPRIO_CLASS_BE;
}

#ifdef CONFIG_BLOCK
/*
 * If the task has set an I/O priority, use that. Otherwise, return
 * the default I/O priority.
 *
 * Expected to be called for current task or with task_lock() held to keep
 * io_context stable.
 */
static inline int __get_task_ioprio(struct task_struct *p)
{
        struct io_context *ioc = p->io_context;
        int prio;

        if (!ioc)
                return IOPRIO_DEFAULT;

        if (p != current)
                lockdep_assert_held(&p->alloc_lock);

        prio = ioc->ioprio;
        if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
                prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
                                         task_nice_ioprio(p));
        return prio;
}
#else
static inline int __get_task_ioprio(struct task_struct *p)
{
        return IOPRIO_DEFAULT;
}
#endif /* CONFIG_BLOCK */

static inline int get_current_ioprio(void)
{
        return __get_task_ioprio(current);
}

extern int set_task_ioprio(struct task_struct *task, int ioprio);

#ifdef CONFIG_BLOCK
extern int ioprio_check_cap(int ioprio);
#else
static inline int ioprio_check_cap(int ioprio)
{
        return -ENOTBLK;
}
#endif /* CONFIG_BLOCK */

#endif




















































































































































































































































































































































































































































































































































    1 


































































































































































































































































































































































































   56 





































































































    6 





































































    4 

    4 



    4 





    3 


    1 
    4 
    1 




















   24 








    2 

   23 





   24 
   24 













































   24 





   31 


   24 














































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
 * Copyright (C) 2017-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
 * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved.
 *
 * This driver produces cryptographically secure pseudorandom data. It is divided
 * into roughly six sections, each with a section header:
 *
 *   - Initialization and readiness waiting.
 *   - Fast key erasure RNG, the "crng".
 *   - Entropy accumulation and extraction routines.
 *   - Entropy collection routines.
 *   - Userspace reader/writer interfaces.
 *   - Sysctl interface.
 *
 * The high level overview is that there is one input pool, into which
 * various pieces of data are hashed. Prior to initialization, some of that
 * data is then "credited" as having a certain number of bits of entropy.
 * When enough bits of entropy are available, the hash is finalized and
 * handed as a key to a stream cipher that expands it indefinitely for
 * various consumers. This key is periodically refreshed as the various
 * entropy collectors, described below, add data to the input pool.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/utsname.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/string.h>
#include <linux/fcntl.h>
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/workqueue.h>
#include <linux/irq.h>
#include <linux/ratelimit.h>
#include <linux/syscalls.h>
#include <linux/completion.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include <linux/siphash.h>
#include <linux/sched/isolation.h>
#include <crypto/chacha.h>
#include <crypto/blake2s.h>
#include <asm/archrandom.h>
#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/io.h>

/*********************************************************************
 *
 * Initialization and readiness waiting.
 *
 * Much of the RNG infrastructure is devoted to various dependencies
 * being able to wait until the RNG has collected enough entropy and
 * is ready for safe consumption.
 *
 *********************************************************************/

/*
 * crng_init is protected by base_crng->lock, and only increases
 * its value (from empty->early->ready).
 */
static enum {
        CRNG_EMPTY = 0, /* Little to no entropy collected */
        CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */
        CRNG_READY = 2  /* Fully initialized with POOL_READY_BITS collected */
} crng_init __read_mostly = CRNG_EMPTY;
static DEFINE_STATIC_KEY_FALSE(crng_is_ready);
#define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY)
/* Various types of waiters for crng_init->CRNG_READY transition. */
static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
static struct fasync_struct *fasync;
static ATOMIC_NOTIFIER_HEAD(random_ready_notifier);

/* Control how we warn userspace. */
static struct ratelimit_state urandom_warning =
        RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE);
static int ratelimit_disable __read_mostly =
        IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM);
module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression");

/*
 * Returns whether or not the input pool has been seeded and thus guaranteed
 * to supply cryptographically secure random numbers. This applies to: the
 * /dev/urandom device, the get_random_bytes function, and the get_random_{u8,
 * u16,u32,u64,long} family of functions.
 *
 * Returns: true if the input pool has been seeded.
 *          false if the input pool has not been seeded.
 */
bool rng_is_initialized(void)
{
        return crng_ready();
}
EXPORT_SYMBOL(rng_is_initialized);

static void __cold crng_set_ready(struct work_struct *work)
{
        static_branch_enable(&crng_is_ready);
}

/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */
static void try_to_generate_entropy(void);

/*
 * Wait for the input pool to be seeded and thus guaranteed to supply
 * cryptographically secure random numbers. This applies to: the /dev/urandom
 * device, the get_random_bytes function, and the get_random_{u8,u16,u32,u64,
 * long} family of functions. Using any of these functions without first
 * calling this function forfeits the guarantee of security.
 *
 * Returns: 0 if the input pool has been seeded.
 *          -ERESTARTSYS if the function was interrupted by a signal.
 */
int wait_for_random_bytes(void)
{
        while (!crng_ready()) {
                int ret;

                try_to_generate_entropy();
                ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ);
                if (ret)
                        return ret > 0 ? 0 : ret;
        }
        return 0;
}
EXPORT_SYMBOL(wait_for_random_bytes);

/*
 * Add a callback function that will be invoked when the crng is initialised,
 * or immediately if it already has been. Only use this is you are absolutely
 * sure it is required. Most users should instead be able to test
 * `rng_is_initialized()` on demand, or make use of `get_random_bytes_wait()`.
 */
int __cold execute_with_initialized_rng(struct notifier_block *nb)
{
        unsigned long flags;
        int ret = 0;

        spin_lock_irqsave(&random_ready_notifier.lock, flags);
        if (crng_ready())
                nb->notifier_call(nb, 0, NULL);
        else
                ret = raw_notifier_chain_register((struct raw_notifier_head *)&random_ready_notifier.head, nb);
        spin_unlock_irqrestore(&random_ready_notifier.lock, flags);
        return ret;
}

#define warn_unseeded_randomness() \
        if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \
                printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \
                                __func__, (void *)_RET_IP_, crng_init)


/*********************************************************************
 *
 * Fast key erasure RNG, the "crng".
 *
 * These functions expand entropy from the entropy extractor into
 * long streams for external consumption using the "fast key erasure"
 * RNG described at <https://blog.cr.yp.to/20170723-random.html>.
 *
 * There are a few exported interfaces for use by other drivers:
 *
 *        void get_random_bytes(void *buf, size_t len)
 *        u8 get_random_u8()
 *        u16 get_random_u16()
 *        u32 get_random_u32()
 *        u32 get_random_u32_below(u32 ceil)
 *        u32 get_random_u32_above(u32 floor)
 *        u32 get_random_u32_inclusive(u32 floor, u32 ceil)
 *        u64 get_random_u64()
 *        unsigned long get_random_long()
 *
 * These interfaces will return the requested number of random bytes
 * into the given buffer or as a return value. This is equivalent to
 * a read from /dev/urandom. The u8, u16, u32, u64, long family of
 * functions may be higher performance for one-off random integers,
 * because they do a bit of buffering and do not invoke reseeding
 * until the buffer is emptied.
 *
 *********************************************************************/

enum {
        CRNG_RESEED_START_INTERVAL = HZ,
        CRNG_RESEED_INTERVAL = 60 * HZ
};

static struct {
        u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
        unsigned long generation;
        spinlock_t lock;
} base_crng = {
        .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock)
};

struct crng {
        u8 key[CHACHA_KEY_SIZE];
        unsigned long generation;
        local_lock_t lock;
};

static DEFINE_PER_CPU(struct crng, crngs) = {
        .generation = ULONG_MAX,
        .lock = INIT_LOCAL_LOCK(crngs.lock),
};

/*
 * Return the interval until the next reseeding, which is normally
 * CRNG_RESEED_INTERVAL, but during early boot, it is at an interval
 * proportional to the uptime.
 */
static unsigned int crng_reseed_interval(void)
{
        static bool early_boot = true;

        if (unlikely(READ_ONCE(early_boot))) {
                time64_t uptime = ktime_get_seconds();
                if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2)
                        WRITE_ONCE(early_boot, false);
                else
                        return max_t(unsigned int, CRNG_RESEED_START_INTERVAL,
                                     (unsigned int)uptime / 2 * HZ);
        }
        return CRNG_RESEED_INTERVAL;
}

/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */
static void extract_entropy(void *buf, size_t len);

/* This extracts a new crng key from the input pool. */
static void crng_reseed(struct work_struct *work)
{
        static DECLARE_DELAYED_WORK(next_reseed, crng_reseed);
        unsigned long flags;
        unsigned long next_gen;
        u8 key[CHACHA_KEY_SIZE];

        /* Immediately schedule the next reseeding, so that it fires sooner rather than later. */
        if (likely(system_unbound_wq))
                queue_delayed_work(system_unbound_wq, &next_reseed, crng_reseed_interval());

        extract_entropy(key, sizeof(key));

        /*
         * We copy the new key into the base_crng, overwriting the old one,
         * and update the generation counter. We avoid hitting ULONG_MAX,
         * because the per-cpu crngs are initialized to ULONG_MAX, so this
         * forces new CPUs that come online to always initialize.
         */
        spin_lock_irqsave(&base_crng.lock, flags);
        memcpy(base_crng.key, key, sizeof(base_crng.key));
        next_gen = base_crng.generation + 1;
        if (next_gen == ULONG_MAX)
                ++next_gen;
        WRITE_ONCE(base_crng.generation, next_gen);
        if (!static_branch_likely(&crng_is_ready))
                crng_init = CRNG_READY;
        spin_unlock_irqrestore(&base_crng.lock, flags);
        memzero_explicit(key, sizeof(key));
}

/*
 * This generates a ChaCha block using the provided key, and then
 * immediately overwrites that key with half the block. It returns
 * the resultant ChaCha state to the user, along with the second
 * half of the block containing 32 bytes of random data that may
 * be used; random_data_len may not be greater than 32.
 *
 * The returned ChaCha state contains within it a copy of the old
 * key value, at index 4, so the state should always be zeroed out
 * immediately after using in order to maintain forward secrecy.
 * If the state cannot be erased in a timely manner, then it is
 * safer to set the random_data parameter to &chacha_state[4] so
 * that this function overwrites it before returning.
 */
static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
                                  u32 chacha_state[CHACHA_STATE_WORDS],
                                  u8 *random_data, size_t random_data_len)
{
        u8 first_block[CHACHA_BLOCK_SIZE];

        BUG_ON(random_data_len > 32);

        chacha_init_consts(chacha_state);
        memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE);
        memset(&chacha_state[12], 0, sizeof(u32) * 4);
        chacha20_block(chacha_state, first_block);

        memcpy(key, first_block, CHACHA_KEY_SIZE);
        memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
        memzero_explicit(first_block, sizeof(first_block));
}

/*
 * This function returns a ChaCha state that you may use for generating
 * random data. It also returns up to 32 bytes on its own of random data
 * that may be used; random_data_len may not be greater than 32.
 */
static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
                            u8 *random_data, size_t random_data_len)
{
        unsigned long flags;
        struct crng *crng;

        BUG_ON(random_data_len > 32);

        /*
         * For the fast path, we check whether we're ready, unlocked first, and
         * then re-check once locked later. In the case where we're really not
         * ready, we do fast key erasure with the base_crng directly, extracting
         * when crng_init is CRNG_EMPTY.
         */
        if (!crng_ready()) {
                bool ready;

                spin_lock_irqsave(&base_crng.lock, flags);
                ready = crng_ready();
                if (!ready) {
                        if (crng_init == CRNG_EMPTY)
                                extract_entropy(base_crng.key, sizeof(base_crng.key));
                        crng_fast_key_erasure(base_crng.key, chacha_state,
                                              random_data, random_data_len);
                }
                spin_unlock_irqrestore(&base_crng.lock, flags);
                if (!ready)
                        return;
        }

        local_lock_irqsave(&crngs.lock, flags);
        crng = raw_cpu_ptr(&crngs);

        /*
         * If our per-cpu crng is older than the base_crng, then it means
         * somebody reseeded the base_crng. In that case, we do fast key
         * erasure on the base_crng, and use its output as the new key
         * for our per-cpu crng. This brings us up to date with base_crng.
         */
        if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) {
                spin_lock(&base_crng.lock);
                crng_fast_key_erasure(base_crng.key, chacha_state,
                                      crng->key, sizeof(crng->key));
                crng->generation = base_crng.generation;
                spin_unlock(&base_crng.lock);
        }

        /*
         * Finally, when we've made it this far, our per-cpu crng has an up
         * to date key, and we can do fast key erasure with it to produce
         * some random data and a ChaCha state for the caller. All other
         * branches of this function are "unlikely", so most of the time we
         * should wind up here immediately.
         */
        crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len);
        local_unlock_irqrestore(&crngs.lock, flags);
}

static void _get_random_bytes(void *buf, size_t len)
{
        u32 chacha_state[CHACHA_STATE_WORDS];
        u8 tmp[CHACHA_BLOCK_SIZE];
        size_t first_block_len;

        if (!len)
                return;

        first_block_len = min_t(size_t, 32, len);
        crng_make_state(chacha_state, buf, first_block_len);
        len -= first_block_len;
        buf += first_block_len;

        while (len) {
                if (len < CHACHA_BLOCK_SIZE) {
                        chacha20_block(chacha_state, tmp);
                        memcpy(buf, tmp, len);
                        memzero_explicit(tmp, sizeof(tmp));
                        break;
                }

                chacha20_block(chacha_state, buf);
                if (unlikely(chacha_state[12] == 0))
                        ++chacha_state[13];
                len -= CHACHA_BLOCK_SIZE;
                buf += CHACHA_BLOCK_SIZE;
        }

        memzero_explicit(chacha_state, sizeof(chacha_state));
}

/*
 * This returns random bytes in arbitrary quantities. The quality of the
 * random bytes is good as /dev/urandom. In order to ensure that the
 * randomness provided by this function is okay, the function
 * wait_for_random_bytes() should be called and return 0 at least once
 * at any point prior.
 */
void get_random_bytes(void *buf, size_t len)
{
        warn_unseeded_randomness();
        _get_random_bytes(buf, len);
}
EXPORT_SYMBOL(get_random_bytes);

static ssize_t get_random_bytes_user(struct iov_iter *iter)
{
        u32 chacha_state[CHACHA_STATE_WORDS];
        u8 block[CHACHA_BLOCK_SIZE];
        size_t ret = 0, copied;

        if (unlikely(!iov_iter_count(iter)))
                return 0;

        /*
         * Immediately overwrite the ChaCha key at index 4 with random
         * bytes, in case userspace causes copy_to_iter() below to sleep
         * forever, so that we still retain forward secrecy in that case.
         */
        crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
        /*
         * However, if we're doing a read of len <= 32, we don't need to
         * use chacha_state after, so we can simply return those bytes to
         * the user directly.
         */
        if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) {
                ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter);
                goto out_zero_chacha;
        }

        for (;;) {
                chacha20_block(chacha_state, block);
                if (unlikely(chacha_state[12] == 0))
                        ++chacha_state[13];

                copied = copy_to_iter(block, sizeof(block), iter);
                ret += copied;
                if (!iov_iter_count(iter) || copied != sizeof(block))
                        break;

                BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
                if (ret % PAGE_SIZE == 0) {
                        if (signal_pending(current))
                                break;
                        cond_resched();
                }
        }

        memzero_explicit(block, sizeof(block));
out_zero_chacha:
        memzero_explicit(chacha_state, sizeof(chacha_state));
        return ret ? ret : -EFAULT;
}

/*
 * Batched entropy returns random integers. The quality of the random
 * number is good as /dev/urandom. In order to ensure that the randomness
 * provided by this function is okay, the function wait_for_random_bytes()
 * should be called and return 0 at least once at any point prior.
 */

#define DEFINE_BATCHED_ENTROPY(type)                                                \
struct batch_ ##type {                                                                \
        /*                                                                        \
         * We make this 1.5x a ChaCha block, so that we get the                        \
         * remaining 32 bytes from fast key erasure, plus one full                \
         * block from the detached ChaCha state. We can increase                \
         * the size of this later if needed so long as we keep the                \
         * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE.                \
         */                                                                        \
        type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))];                \
        local_lock_t lock;                                                        \
        unsigned long generation;                                                \
        unsigned int position;                                                        \
};                                                                                \
                                                                                \
static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = {        \
        .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock),                        \
        .position = UINT_MAX                                                        \
};                                                                                \
                                                                                \
type get_random_ ##type(void)                                                        \
{                                                                                \
        type ret;                                                                \
        unsigned long flags;                                                        \
        struct batch_ ##type *batch;                                                \
        unsigned long next_gen;                                                        \
                                                                                \
        warn_unseeded_randomness();                                                \
                                                                                \
        if  (!crng_ready()) {                                                        \
                _get_random_bytes(&ret, sizeof(ret));                                \
                return ret;                                                        \
        }                                                                        \
                                                                                \
        local_lock_irqsave(&batched_entropy_ ##type.lock, flags);                \
        batch = raw_cpu_ptr(&batched_entropy_##type);                                \
                                                                                \
        next_gen = READ_ONCE(base_crng.generation);                                \
        if (batch->position >= ARRAY_SIZE(batch->entropy) ||                        \
            next_gen != batch->generation) {                                        \
                _get_random_bytes(batch->entropy, sizeof(batch->entropy));        \
                batch->position = 0;                                                \
                batch->generation = next_gen;                                        \
        }                                                                        \
                                                                                \
        ret = batch->entropy[batch->position];                                        \
        batch->entropy[batch->position] = 0;                                        \
        ++batch->position;                                                        \
        local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags);                \
        return ret;                                                                \
}                                                                                \
EXPORT_SYMBOL(get_random_ ##type);

DEFINE_BATCHED_ENTROPY(u8)
DEFINE_BATCHED_ENTROPY(u16)
DEFINE_BATCHED_ENTROPY(u32)
DEFINE_BATCHED_ENTROPY(u64)

u32 __get_random_u32_below(u32 ceil)
{
        /*
         * This is the slow path for variable ceil. It is still fast, most of
         * the time, by doing traditional reciprocal multiplication and
         * opportunistically comparing the lower half to ceil itself, before
         * falling back to computing a larger bound, and then rejecting samples
         * whose lower half would indicate a range indivisible by ceil. The use
         * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable
         * in 32-bits.
         */
        u32 rand = get_random_u32();
        u64 mult;

        /*
         * This function is technically undefined for ceil == 0, and in fact
         * for the non-underscored constant version in the header, we build bug
         * on that. But for the non-constant case, it's convenient to have that
         * evaluate to being a straight call to get_random_u32(), so that
         * get_random_u32_inclusive() can work over its whole range without
         * undefined behavior.
         */
        if (unlikely(!ceil))
                return rand;

        mult = (u64)ceil * rand;
        if (unlikely((u32)mult < ceil)) {
                u32 bound = -ceil % ceil;
                while (unlikely((u32)mult < bound))
                        mult = (u64)ceil * get_random_u32();
        }
        return mult >> 32;
}
EXPORT_SYMBOL(__get_random_u32_below);

#ifdef CONFIG_SMP
/*
 * This function is called when the CPU is coming up, with entry
 * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP.
 */
int __cold random_prepare_cpu(unsigned int cpu)
{
        /*
         * When the cpu comes back online, immediately invalidate both
         * the per-cpu crng and all batches, so that we serve fresh
         * randomness.
         */
        per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX;
        per_cpu_ptr(&batched_entropy_u8, cpu)->position = UINT_MAX;
        per_cpu_ptr(&batched_entropy_u16, cpu)->position = UINT_MAX;
        per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX;
        per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX;
        return 0;
}
#endif


/**********************************************************************
 *
 * Entropy accumulation and extraction routines.
 *
 * Callers may add entropy via:
 *
 *     static void mix_pool_bytes(const void *buf, size_t len)
 *
 * After which, if added entropy should be credited:
 *
 *     static void credit_init_bits(size_t bits)
 *
 * Finally, extract entropy via:
 *
 *     static void extract_entropy(void *buf, size_t len)
 *
 **********************************************************************/

enum {
        POOL_BITS = BLAKE2S_HASH_SIZE * 8,
        POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */
        POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */
};

static struct {
        struct blake2s_state hash;
        spinlock_t lock;
        unsigned int init_bits;
} input_pool = {
        .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
                    BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
                    BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 },
        .hash.outlen = BLAKE2S_HASH_SIZE,
        .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
};

static void _mix_pool_bytes(const void *buf, size_t len)
{
        blake2s_update(&input_pool.hash, buf, len);
}

/*
 * This function adds bytes into the input pool. It does not
 * update the initialization bit counter; the caller should call
 * credit_init_bits if this is appropriate.
 */
static void mix_pool_bytes(const void *buf, size_t len)
{
        unsigned long flags;

        spin_lock_irqsave(&input_pool.lock, flags);
        _mix_pool_bytes(buf, len);
        spin_unlock_irqrestore(&input_pool.lock, flags);
}

/*
 * This is an HKDF-like construction for using the hashed collected entropy
 * as a PRF key, that's then expanded block-by-block.
 */
static void extract_entropy(void *buf, size_t len)
{
        unsigned long flags;
        u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE];
        struct {
                unsigned long rdseed[32 / sizeof(long)];
                size_t counter;
        } block;
        size_t i, longs;

        for (i = 0; i < ARRAY_SIZE(block.rdseed);) {
                longs = arch_get_random_seed_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i);
                if (longs) {
                        i += longs;
                        continue;
                }
                longs = arch_get_random_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i);
                if (longs) {
                        i += longs;
                        continue;
                }
                block.rdseed[i++] = random_get_entropy();
        }

        spin_lock_irqsave(&input_pool.lock, flags);

        /* seed = HASHPRF(last_key, entropy_input) */
        blake2s_final(&input_pool.hash, seed);

        /* next_key = HASHPRF(seed, RDSEED || 0) */
        block.counter = 0;
        blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
        blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));

        spin_unlock_irqrestore(&input_pool.lock, flags);
        memzero_explicit(next_key, sizeof(next_key));

        while (len) {
                i = min_t(size_t, len, BLAKE2S_HASH_SIZE);
                /* output = HASHPRF(seed, RDSEED || ++counter) */
                ++block.counter;
                blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
                len -= i;
                buf += i;
        }

        memzero_explicit(seed, sizeof(seed));
        memzero_explicit(&block, sizeof(block));
}

#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits)

static void __cold _credit_init_bits(size_t bits)
{
        static DECLARE_WORK(set_ready, crng_set_ready);
        unsigned int new, orig, add;
        unsigned long flags;

        if (!bits)
                return;

        add = min_t(size_t, bits, POOL_BITS);

        orig = READ_ONCE(input_pool.init_bits);
        do {
                new = min_t(unsigned int, POOL_BITS, orig + add);
        } while (!try_cmpxchg(&input_pool.init_bits, &orig, new));

        if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
                crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
                if (static_key_initialized && system_unbound_wq)
                        queue_work(system_unbound_wq, &set_ready);
                atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
                wake_up_interruptible(&crng_init_wait);
                kill_fasync(&fasync, SIGIO, POLL_IN);
                pr_notice("crng init done\n");
                if (urandom_warning.missed)
                        pr_notice("%d urandom warning(s) missed due to ratelimiting\n",
                                  urandom_warning.missed);
        } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) {
                spin_lock_irqsave(&base_crng.lock, flags);
                /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */
                if (crng_init == CRNG_EMPTY) {
                        extract_entropy(base_crng.key, sizeof(base_crng.key));
                        crng_init = CRNG_EARLY;
                }
                spin_unlock_irqrestore(&base_crng.lock, flags);
        }
}


/**********************************************************************
 *
 * Entropy collection routines.
 *
 * The following exported functions are used for pushing entropy into
 * the above entropy accumulation routines:
 *
 *        void add_device_randomness(const void *buf, size_t len);
 *        void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after);
 *        void add_bootloader_randomness(const void *buf, size_t len);
 *        void add_vmfork_randomness(const void *unique_vm_id, size_t len);
 *        void add_interrupt_randomness(int irq);
 *        void add_input_randomness(unsigned int type, unsigned int code, unsigned int value);
 *        void add_disk_randomness(struct gendisk *disk);
 *
 * add_device_randomness() adds data to the input pool that
 * is likely to differ between two devices (or possibly even per boot).
 * This would be things like MAC addresses or serial numbers, or the
 * read-out of the RTC. This does *not* credit any actual entropy to
 * the pool, but it initializes the pool to different values for devices
 * that might otherwise be identical and have very little entropy
 * available to them (particularly common in the embedded world).
 *
 * add_hwgenerator_randomness() is for true hardware RNGs, and will credit
 * entropy as specified by the caller. If the entropy pool is full it will
 * block until more entropy is needed.
 *
 * add_bootloader_randomness() is called by bootloader drivers, such as EFI
 * and device tree, and credits its input depending on whether or not the
 * command line option 'random.trust_bootloader'.
 *
 * add_vmfork_randomness() adds a unique (but not necessarily secret) ID
 * representing the current instance of a VM to the pool, without crediting,
 * and then force-reseeds the crng so that it takes effect immediately.
 *
 * add_interrupt_randomness() uses the interrupt timing as random
 * inputs to the entropy pool. Using the cycle counters and the irq source
 * as inputs, it feeds the input pool roughly once a second or after 64
 * interrupts, crediting 1 bit of entropy for whichever comes first.
 *
 * add_input_randomness() uses the input layer interrupt timing, as well
 * as the event type information from the hardware.
 *
 * add_disk_randomness() uses what amounts to the seek time of block
 * layer request events, on a per-disk_devt basis, as input to the
 * entropy pool. Note that high-speed solid state drives with very low
 * seek times do not make for good sources of entropy, as their seek
 * times are usually fairly consistent.
 *
 * The last two routines try to estimate how many bits of entropy
 * to credit. They do this by keeping track of the first and second
 * order deltas of the event timings.
 *
 **********************************************************************/

static bool trust_cpu __initdata = true;
static bool trust_bootloader __initdata = true;
static int __init parse_trust_cpu(char *arg)
{
        return kstrtobool(arg, &trust_cpu);
}
static int __init parse_trust_bootloader(char *arg)
{
        return kstrtobool(arg, &trust_bootloader);
}
early_param("random.trust_cpu", parse_trust_cpu);
early_param("random.trust_bootloader", parse_trust_bootloader);

static int random_pm_notification(struct notifier_block *nb, unsigned long action, void *data)
{
        unsigned long flags, entropy = random_get_entropy();

        /*
         * Encode a representation of how long the system has been suspended,
         * in a way that is distinct from prior system suspends.
         */
        ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() };

        spin_lock_irqsave(&input_pool.lock, flags);
        _mix_pool_bytes(&action, sizeof(action));
        _mix_pool_bytes(stamps, sizeof(stamps));
        _mix_pool_bytes(&entropy, sizeof(entropy));
        spin_unlock_irqrestore(&input_pool.lock, flags);

        if (crng_ready() && (action == PM_RESTORE_PREPARE ||
            (action == PM_POST_SUSPEND && !IS_ENABLED(CONFIG_PM_AUTOSLEEP) &&
             !IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)))) {
                crng_reseed(NULL);
                pr_notice("crng reseeded on system resumption\n");
        }
        return 0;
}

static struct notifier_block pm_notifier = { .notifier_call = random_pm_notification };

/*
 * This is called extremely early, before time keeping functionality is
 * available, but arch randomness is. Interrupts are not yet enabled.
 */
void __init random_init_early(const char *command_line)
{
        unsigned long entropy[BLAKE2S_BLOCK_SIZE / sizeof(long)];
        size_t i, longs, arch_bits;

#if defined(LATENT_ENTROPY_PLUGIN)
        static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy;
        _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
#endif

        for (i = 0, arch_bits = sizeof(entropy) * 8; i < ARRAY_SIZE(entropy);) {
                longs = arch_get_random_seed_longs(entropy, ARRAY_SIZE(entropy) - i);
                if (longs) {
                        _mix_pool_bytes(entropy, sizeof(*entropy) * longs);
                        i += longs;
                        continue;
                }
                longs = arch_get_random_longs(entropy, ARRAY_SIZE(entropy) - i);
                if (longs) {
                        _mix_pool_bytes(entropy, sizeof(*entropy) * longs);
                        i += longs;
                        continue;
                }
                arch_bits -= sizeof(*entropy) * 8;
                ++i;
        }

        _mix_pool_bytes(init_utsname(), sizeof(*(init_utsname())));
        _mix_pool_bytes(command_line, strlen(command_line));

        /* Reseed if already seeded by earlier phases. */
        if (crng_ready())
                crng_reseed(NULL);
        else if (trust_cpu)
                _credit_init_bits(arch_bits);
}

/*
 * This is called a little bit after the prior function, and now there is
 * access to timestamps counters. Interrupts are not yet enabled.
 */
void __init random_init(void)
{
        unsigned long entropy = random_get_entropy();
        ktime_t now = ktime_get_real();

        _mix_pool_bytes(&now, sizeof(now));
        _mix_pool_bytes(&entropy, sizeof(entropy));
        add_latent_entropy();

        /*
         * If we were initialized by the cpu or bootloader before jump labels
         * or workqueues are initialized, then we should enable the static
         * branch here, where it's guaranteed that these have been initialized.
         */
        if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
                crng_set_ready(NULL);

        /* Reseed if already seeded by earlier phases. */
        if (crng_ready())
                crng_reseed(NULL);

        WARN_ON(register_pm_notifier(&pm_notifier));

        WARN(!entropy, "Missing cycle counter and fallback timer; RNG "
                       "entropy collection will consequently suffer.");
}

/*
 * Add device- or boot-specific data to the input pool to help
 * initialize it.
 *
 * None of this adds any entropy; it is meant to avoid the problem of
 * the entropy pool having similar initial state across largely
 * identical devices.
 */
void add_device_randomness(const void *buf, size_t len)
{
        unsigned long entropy = random_get_entropy();
        unsigned long flags;

        spin_lock_irqsave(&input_pool.lock, flags);
        _mix_pool_bytes(&entropy, sizeof(entropy));
        _mix_pool_bytes(buf, len);
        spin_unlock_irqrestore(&input_pool.lock, flags);
}
EXPORT_SYMBOL(add_device_randomness);

/*
 * Interface for in-kernel drivers of true hardware RNGs. Those devices
 * may produce endless random bits, so this function will sleep for
 * some amount of time after, if the sleep_after parameter is true.
 */
void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after)
{
        mix_pool_bytes(buf, len);
        credit_init_bits(entropy);

        /*
         * Throttle writing to once every reseed interval, unless we're not yet
         * initialized or no entropy is credited.
         */
        if (sleep_after && !kthread_should_stop() && (crng_ready() || !entropy))
                schedule_timeout_interruptible(crng_reseed_interval());
}
EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);

/*
 * Handle random seed passed by bootloader, and credit it depending
 * on the command line option 'random.trust_bootloader'.
 */
void __init add_bootloader_randomness(const void *buf, size_t len)
{
        mix_pool_bytes(buf, len);
        if (trust_bootloader)
                credit_init_bits(len * 8);
}

#if IS_ENABLED(CONFIG_VMGENID)
static BLOCKING_NOTIFIER_HEAD(vmfork_chain);

/*
 * Handle a new unique VM ID, which is unique, not secret, so we
 * don't credit it, but we do immediately force a reseed after so
 * that it's used by the crng posthaste.
 */
void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len)
{
        add_device_randomness(unique_vm_id, len);
        if (crng_ready()) {
                crng_reseed(NULL);
                pr_notice("crng reseeded due to virtual machine fork\n");
        }
        blocking_notifier_call_chain(&vmfork_chain, 0, NULL);
}
#if IS_MODULE(CONFIG_VMGENID)
EXPORT_SYMBOL_GPL(add_vmfork_randomness);
#endif

int __cold register_random_vmfork_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_register(&vmfork_chain, nb);
}
EXPORT_SYMBOL_GPL(register_random_vmfork_notifier);

int __cold unregister_random_vmfork_notifier(struct notifier_block *nb)
{
        return blocking_notifier_chain_unregister(&vmfork_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier);
#endif

struct fast_pool {
        unsigned long pool[4];
        unsigned long last;
        unsigned int count;
        struct timer_list mix;
};

static void mix_interrupt_randomness(struct timer_list *work);

static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
#ifdef CONFIG_64BIT
#define FASTMIX_PERM SIPHASH_PERMUTATION
        .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 },
#else
#define FASTMIX_PERM HSIPHASH_PERMUTATION
        .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 },
#endif
        .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0)
};

/*
 * This is [Half]SipHash-1-x, starting from an empty key. Because
 * the key is fixed, it assumes that its inputs are non-malicious,
 * and therefore this has no security on its own. s represents the
 * four-word SipHash state, while v represents a two-word input.
 */
static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2)
{
        s[3] ^= v1;
        FASTMIX_PERM(s[0], s[1], s[2], s[3]);
        s[0] ^= v1;
        s[3] ^= v2;
        FASTMIX_PERM(s[0], s[1], s[2], s[3]);
        s[0] ^= v2;
}

#ifdef CONFIG_SMP
/*
 * This function is called when the CPU has just come online, with
 * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE.
 */
int __cold random_online_cpu(unsigned int cpu)
{
        /*
         * During CPU shutdown and before CPU onlining, add_interrupt_
         * randomness() may schedule mix_interrupt_randomness(), and
         * set the MIX_INFLIGHT flag. However, because the worker can
         * be scheduled on a different CPU during this period, that
         * flag will never be cleared. For that reason, we zero out
         * the flag here, which runs just after workqueues are onlined
         * for the CPU again. This also has the effect of setting the
         * irq randomness count to zero so that new accumulated irqs
         * are fresh.
         */
        per_cpu_ptr(&irq_randomness, cpu)->count = 0;
        return 0;
}
#endif

static void mix_interrupt_randomness(struct timer_list *work)
{
        struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
        /*
         * The size of the copied stack pool is explicitly 2 longs so that we
         * only ever ingest half of the siphash output each time, retaining
         * the other half as the next "key" that carries over. The entropy is
         * supposed to be sufficiently dispersed between bits so on average
         * we don't wind up "losing" some.
         */
        unsigned long pool[2];
        unsigned int count;

        /* Check to see if we're running on the wrong CPU due to hotplug. */
        local_irq_disable();
        if (fast_pool != this_cpu_ptr(&irq_randomness)) {
                local_irq_enable();
                return;
        }

        /*
         * Copy the pool to the stack so that the mixer always has a
         * consistent view, before we reenable irqs again.
         */
        memcpy(pool, fast_pool->pool, sizeof(pool));
        count = fast_pool->count;
        fast_pool->count = 0;
        fast_pool->last = jiffies;
        local_irq_enable();

        mix_pool_bytes(pool, sizeof(pool));
        credit_init_bits(clamp_t(unsigned int, (count & U16_MAX) / 64, 1, sizeof(pool) * 8));

        memzero_explicit(pool, sizeof(pool));
}

void add_interrupt_randomness(int irq)
{
        enum { MIX_INFLIGHT = 1U << 31 };
        unsigned long entropy = random_get_entropy();
        struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
        struct pt_regs *regs = get_irq_regs();
        unsigned int new_count;

        fast_mix(fast_pool->pool, entropy,
                 (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq));
        new_count = ++fast_pool->count;

        if (new_count & MIX_INFLIGHT)
                return;

        if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
                return;

        fast_pool->count |= MIX_INFLIGHT;
        if (!timer_pending(&fast_pool->mix)) {
                fast_pool->mix.expires = jiffies;
                add_timer_on(&fast_pool->mix, raw_smp_processor_id());
        }
}
EXPORT_SYMBOL_GPL(add_interrupt_randomness);

/* There is one of these per entropy source */
struct timer_rand_state {
        unsigned long last_time;
        long last_delta, last_delta2;
};

/*
 * This function adds entropy to the entropy "pool" by using timing
 * delays. It uses the timer_rand_state structure to make an estimate
 * of how many bits of entropy this call has added to the pool. The
 * value "num" is also added to the pool; it should somehow describe
 * the type of event that just happened.
 */
static void add_timer_randomness(struct timer_rand_state *state, unsigned int num)
{
        unsigned long entropy = random_get_entropy(), now = jiffies, flags;
        long delta, delta2, delta3;
        unsigned int bits;

        /*
         * If we're in a hard IRQ, add_interrupt_randomness() will be called
         * sometime after, so mix into the fast pool.
         */
        if (in_hardirq()) {
                fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num);
        } else {
                spin_lock_irqsave(&input_pool.lock, flags);
                _mix_pool_bytes(&entropy, sizeof(entropy));
                _mix_pool_bytes(&num, sizeof(num));
                spin_unlock_irqrestore(&input_pool.lock, flags);
        }

        if (crng_ready())
                return;

        /*
         * Calculate number of bits of randomness we probably added.
         * We take into account the first, second and third-order deltas
         * in order to make our estimate.
         */
        delta = now - READ_ONCE(state->last_time);
        WRITE_ONCE(state->last_time, now);

        delta2 = delta - READ_ONCE(state->last_delta);
        WRITE_ONCE(state->last_delta, delta);

        delta3 = delta2 - READ_ONCE(state->last_delta2);
        WRITE_ONCE(state->last_delta2, delta2);

        if (delta < 0)
                delta = -delta;
        if (delta2 < 0)
                delta2 = -delta2;
        if (delta3 < 0)
                delta3 = -delta3;
        if (delta > delta2)
                delta = delta2;
        if (delta > delta3)
                delta = delta3;

        /*
         * delta is now minimum absolute delta. Round down by 1 bit
         * on general principles, and limit entropy estimate to 11 bits.
         */
        bits = min(fls(delta >> 1), 11);

        /*
         * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness()
         * will run after this, which uses a different crediting scheme of 1 bit
         * per every 64 interrupts. In order to let that function do accounting
         * close to the one in this function, we credit a full 64/64 bit per bit,
         * and then subtract one to account for the extra one added.
         */
        if (in_hardirq())
                this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1;
        else
                _credit_init_bits(bits);
}

void add_input_randomness(unsigned int type, unsigned int code, unsigned int value)
{
        static unsigned char last_value;
        static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES };

        /* Ignore autorepeat and the like. */
        if (value == last_value)
                return;

        last_value = value;
        add_timer_randomness(&input_timer_state,
                             (type << 4) ^ code ^ (code >> 4) ^ value);
}
EXPORT_SYMBOL_GPL(add_input_randomness);

#ifdef CONFIG_BLOCK
void add_disk_randomness(struct gendisk *disk)
{
        if (!disk || !disk->random)
                return;
        /* First major is 1, so we get >= 0x200 here. */
        add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
}
EXPORT_SYMBOL_GPL(add_disk_randomness);

void __cold rand_initialize_disk(struct gendisk *disk)
{
        struct timer_rand_state *state;

        /*
         * If kzalloc returns null, we just won't use that entropy
         * source.
         */
        state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
        if (state) {
                state->last_time = INITIAL_JIFFIES;
                disk->random = state;
        }
}
#endif

struct entropy_timer_state {
        unsigned long entropy;
        struct timer_list timer;
        atomic_t samples;
        unsigned int samples_per_bit;
};

/*
 * Each time the timer fires, we expect that we got an unpredictable jump in
 * the cycle counter. Even if the timer is running on another CPU, the timer
 * activity will be touching the stack of the CPU that is generating entropy.
 *
 * Note that we don't re-arm the timer in the timer itself - we are happy to be
 * scheduled away, since that just makes the load more complex, but we do not
 * want the timer to keep ticking unless the entropy loop is running.
 *
 * So the re-arming always happens in the entropy loop itself.
 */
static void __cold entropy_timer(struct timer_list *timer)
{
        struct entropy_timer_state *state = container_of(timer, struct entropy_timer_state, timer);
        unsigned long entropy = random_get_entropy();

        mix_pool_bytes(&entropy, sizeof(entropy));
        if (atomic_inc_return(&state->samples) % state->samples_per_bit == 0)
                credit_init_bits(1);
}

/*
 * If we have an actual cycle counter, see if we can generate enough entropy
 * with timing noise.
 */
static void __cold try_to_generate_entropy(void)
{
        enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 15 };
        u8 stack_bytes[sizeof(struct entropy_timer_state) + SMP_CACHE_BYTES - 1];
        struct entropy_timer_state *stack = PTR_ALIGN((void *)stack_bytes, SMP_CACHE_BYTES);
        unsigned int i, num_different = 0;
        unsigned long last = random_get_entropy();
        int cpu = -1;

        for (i = 0; i < NUM_TRIAL_SAMPLES - 1; ++i) {
                stack->entropy = random_get_entropy();
                if (stack->entropy != last)
                        ++num_different;
                last = stack->entropy;
        }
        stack->samples_per_bit = DIV_ROUND_UP(NUM_TRIAL_SAMPLES, num_different + 1);
        if (stack->samples_per_bit > MAX_SAMPLES_PER_BIT)
                return;

        atomic_set(&stack->samples, 0);
        timer_setup_on_stack(&stack->timer, entropy_timer, 0);
        while (!crng_ready() && !signal_pending(current)) {
                /*
                 * Check !timer_pending() and then ensure that any previous callback has finished
                 * executing by checking try_to_del_timer_sync(), before queueing the next one.
                 */
                if (!timer_pending(&stack->timer) && try_to_del_timer_sync(&stack->timer) >= 0) {
                        struct cpumask timer_cpus;
                        unsigned int num_cpus;

                        /*
                         * Preemption must be disabled here, both to read the current CPU number
                         * and to avoid scheduling a timer on a dead CPU.
                         */
                        preempt_disable();

                        /* Only schedule callbacks on timer CPUs that are online. */
                        cpumask_and(&timer_cpus, housekeeping_cpumask(HK_TYPE_TIMER), cpu_online_mask);
                        num_cpus = cpumask_weight(&timer_cpus);
                        /* In very bizarre case of misconfiguration, fallback to all online. */
                        if (unlikely(num_cpus == 0)) {
                                timer_cpus = *cpu_online_mask;
                                num_cpus = cpumask_weight(&timer_cpus);
                        }

                        /* Basic CPU round-robin, which avoids the current CPU. */
                        do {
                                cpu = cpumask_next(cpu, &timer_cpus);
                                if (cpu >= nr_cpu_ids)
                                        cpu = cpumask_first(&timer_cpus);
                        } while (cpu == smp_processor_id() && num_cpus > 1);

                        /* Expiring the timer at `jiffies` means it's the next tick. */
                        stack->timer.expires = jiffies;

                        add_timer_on(&stack->timer, cpu);

                        preempt_enable();
                }
                mix_pool_bytes(&stack->entropy, sizeof(stack->entropy));
                schedule();
                stack->entropy = random_get_entropy();
        }
        mix_pool_bytes(&stack->entropy, sizeof(stack->entropy));

        del_timer_sync(&stack->timer);
        destroy_timer_on_stack(&stack->timer);
}


/**********************************************************************
 *
 * Userspace reader/writer interfaces.
 *
 * getrandom(2) is the primary modern interface into the RNG and should
 * be used in preference to anything else.
 *
 * Reading from /dev/random has the same functionality as calling
 * getrandom(2) with flags=0. In earlier versions, however, it had
 * vastly different semantics and should therefore be avoided, to
 * prevent backwards compatibility issues.
 *
 * Reading from /dev/urandom has the same functionality as calling
 * getrandom(2) with flags=GRND_INSECURE. Because it does not block
 * waiting for the RNG to be ready, it should not be used.
 *
 * Writing to either /dev/random or /dev/urandom adds entropy to
 * the input pool but does not credit it.
 *
 * Polling on /dev/random indicates when the RNG is initialized, on
 * the read side, and when it wants new entropy, on the write side.
 *
 * Both /dev/random and /dev/urandom have the same set of ioctls for
 * adding entropy, getting the entropy count, zeroing the count, and
 * reseeding the crng.
 *
 **********************************************************************/

SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags)
{
        struct iov_iter iter;
        int ret;

        if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))
                return -EINVAL;

        /*
         * Requesting insecure and blocking randomness at the same time makes
         * no sense.
         */
        if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM))
                return -EINVAL;

        if (!crng_ready() && !(flags & GRND_INSECURE)) {
                if (flags & GRND_NONBLOCK)
                        return -EAGAIN;
                ret = wait_for_random_bytes();
                if (unlikely(ret))
                        return ret;
        }

        ret = import_ubuf(ITER_DEST, ubuf, len, &iter);
        if (unlikely(ret))
                return ret;
        return get_random_bytes_user(&iter);
}

static __poll_t random_poll(struct file *file, poll_table *wait)
{
        poll_wait(file, &crng_init_wait, wait);
        return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM;
}

static ssize_t write_pool_user(struct iov_iter *iter)
{
        u8 block[BLAKE2S_BLOCK_SIZE];
        ssize_t ret = 0;
        size_t copied;

        if (unlikely(!iov_iter_count(iter)))
                return 0;

        for (;;) {
                copied = copy_from_iter(block, sizeof(block), iter);
                ret += copied;
                mix_pool_bytes(block, copied);
                if (!iov_iter_count(iter) || copied != sizeof(block))
                        break;

                BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0);
                if (ret % PAGE_SIZE == 0) {
                        if (signal_pending(current))
                                break;
                        cond_resched();
                }
        }

        memzero_explicit(block, sizeof(block));
        return ret ? ret : -EFAULT;
}

static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter)
{
        return write_pool_user(iter);
}

static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
{
        static int maxwarn = 10;

        /*
         * Opportunistically attempt to initialize the RNG on platforms that
         * have fast cycle counters, but don't (for now) require it to succeed.
         */
        if (!crng_ready())
                try_to_generate_entropy();

        if (!crng_ready()) {
                if (!ratelimit_disable && maxwarn <= 0)
                        ++urandom_warning.missed;
                else if (ratelimit_disable || __ratelimit(&urandom_warning)) {
                        --maxwarn;
                        pr_notice("%s: uninitialized urandom read (%zu bytes read)\n",
                                  current->comm, iov_iter_count(iter));
                }
        }

        return get_random_bytes_user(iter);
}

static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter)
{
        int ret;

        if (!crng_ready() &&
            ((kiocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) ||
             (kiocb->ki_filp->f_flags & O_NONBLOCK)))
                return -EAGAIN;

        ret = wait_for_random_bytes();
        if (ret != 0)
                return ret;
        return get_random_bytes_user(iter);
}

static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
        int __user *p = (int __user *)arg;
        int ent_count;

        switch (cmd) {
        case RNDGETENTCNT:
                /* Inherently racy, no point locking. */
                if (put_user(input_pool.init_bits, p))
                        return -EFAULT;
                return 0;
        case RNDADDTOENTCNT:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (get_user(ent_count, p))
                        return -EFAULT;
                if (ent_count < 0)
                        return -EINVAL;
                credit_init_bits(ent_count);
                return 0;
        case RNDADDENTROPY: {
                struct iov_iter iter;
                ssize_t ret;
                int len;

                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (get_user(ent_count, p++))
                        return -EFAULT;
                if (ent_count < 0)
                        return -EINVAL;
                if (get_user(len, p++))
                        return -EFAULT;
                ret = import_ubuf(ITER_SOURCE, p, len, &iter);
                if (unlikely(ret))
                        return ret;
                ret = write_pool_user(&iter);
                if (unlikely(ret < 0))
                        return ret;
                /* Since we're crediting, enforce that it was all written into the pool. */
                if (unlikely(ret != len))
                        return -EFAULT;
                credit_init_bits(ent_count);
                return 0;
        }
        case RNDZAPENTCNT:
        case RNDCLEARPOOL:
                /* No longer has any effect. */
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                return 0;
        case RNDRESEEDCRNG:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (!crng_ready())
                        return -ENODATA;
                crng_reseed(NULL);
                return 0;
        default:
                return -EINVAL;
        }
}

static int random_fasync(int fd, struct file *filp, int on)
{
        return fasync_helper(fd, filp, on, &fasync);
}

const struct file_operations random_fops = {
        .read_iter = random_read_iter,
        .write_iter = random_write_iter,
        .poll = random_poll,
        .unlocked_ioctl = random_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .fasync = random_fasync,
        .llseek = noop_llseek,
        .splice_read = copy_splice_read,
        .splice_write = iter_file_splice_write,
};

const struct file_operations urandom_fops = {
        .read_iter = urandom_read_iter,
        .write_iter = random_write_iter,
        .unlocked_ioctl = random_ioctl,
        .compat_ioctl = compat_ptr_ioctl,
        .fasync = random_fasync,
        .llseek = noop_llseek,
        .splice_read = copy_splice_read,
        .splice_write = iter_file_splice_write,
};


/********************************************************************
 *
 * Sysctl interface.
 *
 * These are partly unused legacy knobs with dummy values to not break
 * userspace and partly still useful things. They are usually accessible
 * in /proc/sys/kernel/random/ and are as follows:
 *
 * - boot_id - a UUID representing the current boot.
 *
 * - uuid - a random UUID, different each time the file is read.
 *
 * - poolsize - the number of bits of entropy that the input pool can
 *   hold, tied to the POOL_BITS constant.
 *
 * - entropy_avail - the number of bits of entropy currently in the
 *   input pool. Always <= poolsize.
 *
 * - write_wakeup_threshold - the amount of entropy in the input pool
 *   below which write polls to /dev/random will unblock, requesting
 *   more entropy, tied to the POOL_READY_BITS constant. It is writable
 *   to avoid breaking old userspaces, but writing to it does not
 *   change any behavior of the RNG.
 *
 * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL.
 *   It is writable to avoid breaking old userspaces, but writing
 *   to it does not change any behavior of the RNG.
 *
 ********************************************************************/

#ifdef CONFIG_SYSCTL

#include <linux/sysctl.h>

static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ;
static int sysctl_random_write_wakeup_bits = POOL_READY_BITS;
static int sysctl_poolsize = POOL_BITS;
static u8 sysctl_bootid[UUID_SIZE];

/*
 * This function is used to return both the bootid UUID, and random
 * UUID. The difference is in whether table->data is NULL; if it is,
 * then a new UUID is generated and returned to the user.
 */
static int proc_do_uuid(struct ctl_table *table, int write, void *buf,
                        size_t *lenp, loff_t *ppos)
{
        u8 tmp_uuid[UUID_SIZE], *uuid;
        char uuid_string[UUID_STRING_LEN + 1];
        struct ctl_table fake_table = {
                .data = uuid_string,
                .maxlen = UUID_STRING_LEN
        };

        if (write)
                return -EPERM;

        uuid = table->data;
        if (!uuid) {
                uuid = tmp_uuid;
                generate_random_uuid(uuid);
        } else {
                static DEFINE_SPINLOCK(bootid_spinlock);

                spin_lock(&bootid_spinlock);
                if (!uuid[8])
                        generate_random_uuid(uuid);
                spin_unlock(&bootid_spinlock);
        }

        snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid);
        return proc_dostring(&fake_table, 0, buf, lenp, ppos);
}

/* The same as proc_dointvec, but writes don't change anything. */
static int proc_do_rointvec(struct ctl_table *table, int write, void *buf,
                            size_t *lenp, loff_t *ppos)
{
        return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos);
}

static struct ctl_table random_table[] = {
        {
                .procname        = "poolsize",
                .data                = &sysctl_poolsize,
                .maxlen                = sizeof(int),
                .mode                = 0444,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "entropy_avail",
                .data                = &input_pool.init_bits,
                .maxlen                = sizeof(int),
                .mode                = 0444,
                .proc_handler        = proc_dointvec,
        },
        {
                .procname        = "write_wakeup_threshold",
                .data                = &sysctl_random_write_wakeup_bits,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_do_rointvec,
        },
        {
                .procname        = "urandom_min_reseed_secs",
                .data                = &sysctl_random_min_urandom_seed,
                .maxlen                = sizeof(int),
                .mode                = 0644,
                .proc_handler        = proc_do_rointvec,
        },
        {
                .procname        = "boot_id",
                .data                = &sysctl_bootid,
                .mode                = 0444,
                .proc_handler        = proc_do_uuid,
        },
        {
                .procname        = "uuid",
                .mode                = 0444,
                .proc_handler        = proc_do_uuid,
        },
};

/*
 * random_init() is called before sysctl_init(),
 * so we cannot call register_sysctl_init() in random_init()
 */
static int __init random_sysctls_init(void)
{
        register_sysctl_init("kernel/random", random_table);
        return 0;
}
device_initcall(random_sysctls_init);
#endif






























































































































































































































































































































































































    5 

    5 


    4 



    1 


















    5 




















































































    4 










    3 


    2 





    2 


    1 

















































    4 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/sched/xacct.h>
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/dax.h>
#include <linux/overflow.h>
#include "internal.h"

#include <linux/uaccess.h>
#include <asm/unistd.h>

/*
 * Performs necessary checks before doing a clone.
 *
 * Can adjust amount of bytes to clone via @req_count argument.
 * Returns appropriate error code that caller should return or
 * zero in case the clone should be allowed.
 */
static int generic_remap_checks(struct file *file_in, loff_t pos_in,
                                struct file *file_out, loff_t pos_out,
                                loff_t *req_count, unsigned int remap_flags)
{
        struct inode *inode_in = file_in->f_mapping->host;
        struct inode *inode_out = file_out->f_mapping->host;
        uint64_t count = *req_count;
        uint64_t bcount;
        loff_t size_in, size_out;
        loff_t bs = inode_out->i_sb->s_blocksize;
        int ret;

        /* The start of both ranges must be aligned to an fs block. */
        if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
                return -EINVAL;

        /* Ensure offsets don't wrap. */
        if (pos_in + count < pos_in || pos_out + count < pos_out)
                return -EINVAL;

        size_in = i_size_read(inode_in);
        size_out = i_size_read(inode_out);

        /* Dedupe requires both ranges to be within EOF. */
        if ((remap_flags & REMAP_FILE_DEDUP) &&
            (pos_in >= size_in || pos_in + count > size_in ||
             pos_out >= size_out || pos_out + count > size_out))
                return -EINVAL;

        /* Ensure the infile range is within the infile. */
        if (pos_in >= size_in)
                return -EINVAL;
        count = min(count, size_in - (uint64_t)pos_in);

        ret = generic_write_check_limits(file_out, pos_out, &count);
        if (ret)
                return ret;

        /*
         * If the user wanted us to link to the infile's EOF, round up to the
         * next block boundary for this check.
         *
         * Otherwise, make sure the count is also block-aligned, having
         * already confirmed the starting offsets' block alignment.
         */
        if (pos_in + count == size_in &&
            (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
                bcount = ALIGN(size_in, bs) - pos_in;
        } else {
                if (!IS_ALIGNED(count, bs))
                        count = ALIGN_DOWN(count, bs);
                bcount = count;
        }

        /* Don't allow overlapped cloning within the same file. */
        if (inode_in == inode_out &&
            pos_out + bcount > pos_in &&
            pos_out < pos_in + bcount)
                return -EINVAL;

        /*
         * We shortened the request but the caller can't deal with that, so
         * bounce the request back to userspace.
         */
        if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
                return -EINVAL;

        *req_count = count;
        return 0;
}

static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
                             bool write)
{
        int mask = write ? MAY_WRITE : MAY_READ;
        loff_t tmp;
        int ret;

        if (unlikely(pos < 0 || len < 0))
                return -EINVAL;

        if (unlikely(check_add_overflow(pos, len, &tmp)))
                return -EINVAL;

        ret = security_file_permission(file, mask);
        if (ret)
                return ret;

        return fsnotify_file_area_perm(file, mask, &pos, len);
}

/*
 * Ensure that we don't remap a partial EOF block in the middle of something
 * else.  Assume that the offsets have already been checked for block
 * alignment.
 *
 * For clone we only link a partial EOF block above or at the destination file's
 * EOF.  For deduplication we accept a partial EOF block only if it ends at the
 * destination file's EOF (can not link it into the middle of a file).
 *
 * Shorten the request if possible.
 */
static int generic_remap_check_len(struct inode *inode_in,
                                   struct inode *inode_out,
                                   loff_t pos_out,
                                   loff_t *len,
                                   unsigned int remap_flags)
{
        u64 blkmask = i_blocksize(inode_in) - 1;
        loff_t new_len = *len;

        if ((*len & blkmask) == 0)
                return 0;

        if (pos_out + *len < i_size_read(inode_out))
                new_len &= ~blkmask;

        if (new_len == *len)
                return 0;

        if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
                *len = new_len;
                return 0;
        }

        return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
}

/* Read a page's worth of file data into the page cache. */
static struct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos)
{
        return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
}

/*
 * Lock two folios, ensuring that we lock in offset order if the folios
 * are from the same file.
 */
static void vfs_lock_two_folios(struct folio *folio1, struct folio *folio2)
{
        /* Always lock in order of increasing index. */
        if (folio1->index > folio2->index)
                swap(folio1, folio2);

        folio_lock(folio1);
        if (folio1 != folio2)
                folio_lock(folio2);
}

/* Unlock two folios, being careful not to unlock the same folio twice. */
static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2)
{
        folio_unlock(folio1);
        if (folio1 != folio2)
                folio_unlock(folio2);
}

/*
 * Compare extents of two files to see if they are the same.
 * Caller must have locked both inodes to prevent write races.
 */
static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff,
                                         struct file *dest, loff_t dstoff,
                                         loff_t len, bool *is_same)
{
        bool same = true;
        int error = -EINVAL;

        while (len) {
                struct folio *src_folio, *dst_folio;
                void *src_addr, *dst_addr;
                loff_t cmp_len = min(PAGE_SIZE - offset_in_page(srcoff),
                                     PAGE_SIZE - offset_in_page(dstoff));

                cmp_len = min(cmp_len, len);
                if (cmp_len <= 0)
                        goto out_error;

                src_folio = vfs_dedupe_get_folio(src, srcoff);
                if (IS_ERR(src_folio)) {
                        error = PTR_ERR(src_folio);
                        goto out_error;
                }
                dst_folio = vfs_dedupe_get_folio(dest, dstoff);
                if (IS_ERR(dst_folio)) {
                        error = PTR_ERR(dst_folio);
                        folio_put(src_folio);
                        goto out_error;
                }

                vfs_lock_two_folios(src_folio, dst_folio);

                /*
                 * Now that we've locked both folios, make sure they're still
                 * mapped to the file data we're interested in.  If not,
                 * someone is invalidating pages on us and we lose.
                 */
                if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) ||
                    src_folio->mapping != src->f_mapping ||
                    dst_folio->mapping != dest->f_mapping) {
                        same = false;
                        goto unlock;
                }

                src_addr = kmap_local_folio(src_folio,
                                        offset_in_folio(src_folio, srcoff));
                dst_addr = kmap_local_folio(dst_folio,
                                        offset_in_folio(dst_folio, dstoff));

                flush_dcache_folio(src_folio);
                flush_dcache_folio(dst_folio);

                if (memcmp(src_addr, dst_addr, cmp_len))
                        same = false;

                kunmap_local(dst_addr);
                kunmap_local(src_addr);
unlock:
                vfs_unlock_two_folios(src_folio, dst_folio);
                folio_put(dst_folio);
                folio_put(src_folio);

                if (!same)
                        break;

                srcoff += cmp_len;
                dstoff += cmp_len;
                len -= cmp_len;
        }

        *is_same = same;
        return 0;

out_error:
        return error;
}

/*
 * Check that the two inodes are eligible for cloning, the ranges make
 * sense, and then flush all dirty data.  Caller must ensure that the
 * inodes have been locked against any other modifications.
 *
 * If there's an error, then the usual negative error code is returned.
 * Otherwise returns 0 with *len set to the request length.
 */
int
__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                struct file *file_out, loff_t pos_out,
                                loff_t *len, unsigned int remap_flags,
                                const struct iomap_ops *dax_read_ops)
{
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);
        bool same_inode = (inode_in == inode_out);
        int ret;

        /* Don't touch certain kinds of inodes */
        if (IS_IMMUTABLE(inode_out))
                return -EPERM;

        if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
                return -ETXTBSY;

        /* Don't reflink dirs, pipes, sockets... */
        if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
                return -EISDIR;
        if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
                return -EINVAL;

        /* Zero length dedupe exits immediately; reflink goes to EOF. */
        if (*len == 0) {
                loff_t isize = i_size_read(inode_in);

                if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
                        return 0;
                if (pos_in > isize)
                        return -EINVAL;
                *len = isize - pos_in;
                if (*len == 0)
                        return 0;
        }

        /* Check that we don't violate system file offset limits. */
        ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
                        remap_flags);
        if (ret || *len == 0)
                return ret;

        /* Wait for the completion of any pending IOs on both files */
        inode_dio_wait(inode_in);
        if (!same_inode)
                inode_dio_wait(inode_out);

        ret = filemap_write_and_wait_range(inode_in->i_mapping,
                        pos_in, pos_in + *len - 1);
        if (ret)
                return ret;

        ret = filemap_write_and_wait_range(inode_out->i_mapping,
                        pos_out, pos_out + *len - 1);
        if (ret)
                return ret;

        /*
         * Check that the extents are the same.
         */
        if (remap_flags & REMAP_FILE_DEDUP) {
                bool                is_same = false;

                if (!IS_DAX(inode_in))
                        ret = vfs_dedupe_file_range_compare(file_in, pos_in,
                                        file_out, pos_out, *len, &is_same);
                else if (dax_read_ops)
                        ret = dax_dedupe_file_range_compare(inode_in, pos_in,
                                        inode_out, pos_out, *len, &is_same,
                                        dax_read_ops);
                else
                        return -EINVAL;
                if (ret)
                        return ret;
                if (!is_same)
                        return -EBADE;
        }

        ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
                        remap_flags);
        if (ret || *len == 0)
                return ret;

        /* If can't alter the file contents, we're done. */
        if (!(remap_flags & REMAP_FILE_DEDUP))
                ret = file_modified(file_out);

        return ret;
}

int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                  struct file *file_out, loff_t pos_out,
                                  loff_t *len, unsigned int remap_flags)
{
        return __generic_remap_file_range_prep(file_in, pos_in, file_out,
                                               pos_out, len, remap_flags, NULL);
}
EXPORT_SYMBOL(generic_remap_file_range_prep);

loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
                            struct file *file_out, loff_t pos_out,
                            loff_t len, unsigned int remap_flags)
{
        loff_t ret;

        WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);

        if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
                return -EXDEV;

        ret = generic_file_rw_checks(file_in, file_out);
        if (ret < 0)
                return ret;

        if (!file_in->f_op->remap_file_range)
                return -EOPNOTSUPP;

        ret = remap_verify_area(file_in, pos_in, len, false);
        if (ret)
                return ret;

        ret = remap_verify_area(file_out, pos_out, len, true);
        if (ret)
                return ret;

        file_start_write(file_out);
        ret = file_in->f_op->remap_file_range(file_in, pos_in,
                        file_out, pos_out, len, remap_flags);
        file_end_write(file_out);
        if (ret < 0)
                return ret;

        fsnotify_access(file_in);
        fsnotify_modify(file_out);
        return ret;
}
EXPORT_SYMBOL(vfs_clone_file_range);

/* Check whether we are allowed to dedupe the destination file */
static bool may_dedupe_file(struct file *file)
{
        struct mnt_idmap *idmap = file_mnt_idmap(file);
        struct inode *inode = file_inode(file);

        if (capable(CAP_SYS_ADMIN))
                return true;
        if (file->f_mode & FMODE_WRITE)
                return true;
        if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
                return true;
        if (!inode_permission(idmap, inode, MAY_WRITE))
                return true;
        return false;
}

loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
                                 struct file *dst_file, loff_t dst_pos,
                                 loff_t len, unsigned int remap_flags)
{
        loff_t ret;

        WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
                                     REMAP_FILE_CAN_SHORTEN));

        /*
         * This is redundant if called from vfs_dedupe_file_range(), but other
         * callers need it and it's not performance sesitive...
         */
        ret = remap_verify_area(src_file, src_pos, len, false);
        if (ret)
                return ret;

        ret = remap_verify_area(dst_file, dst_pos, len, true);
        if (ret)
                return ret;

        /*
         * This needs to be called after remap_verify_area() because of
         * sb_start_write() and before may_dedupe_file() because the mount's
         * MAY_WRITE need to be checked with mnt_get_write_access_file() held.
         */
        ret = mnt_want_write_file(dst_file);
        if (ret)
                return ret;

        ret = -EPERM;
        if (!may_dedupe_file(dst_file))
                goto out_drop_write;

        ret = -EXDEV;
        if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
                goto out_drop_write;

        ret = -EISDIR;
        if (S_ISDIR(file_inode(dst_file)->i_mode))
                goto out_drop_write;

        ret = -EINVAL;
        if (!dst_file->f_op->remap_file_range)
                goto out_drop_write;

        if (len == 0) {
                ret = 0;
                goto out_drop_write;
        }

        ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
                        dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
out_drop_write:
        mnt_drop_write_file(dst_file);

        return ret;
}
EXPORT_SYMBOL(vfs_dedupe_file_range_one);

int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
        struct file_dedupe_range_info *info;
        struct inode *src = file_inode(file);
        u64 off;
        u64 len;
        int i;
        int ret;
        u16 count = same->dest_count;
        loff_t deduped;

        if (!(file->f_mode & FMODE_READ))
                return -EINVAL;

        if (same->reserved1 || same->reserved2)
                return -EINVAL;

        off = same->src_offset;
        len = same->src_length;

        if (S_ISDIR(src->i_mode))
                return -EISDIR;

        if (!S_ISREG(src->i_mode))
                return -EINVAL;

        if (!file->f_op->remap_file_range)
                return -EOPNOTSUPP;

        ret = remap_verify_area(file, off, len, false);
        if (ret < 0)
                return ret;
        ret = 0;

        if (off + len > i_size_read(src))
                return -EINVAL;

        /* Arbitrary 1G limit on a single dedupe request, can be raised. */
        len = min_t(u64, len, 1 << 30);

        /* pre-format output fields to sane values */
        for (i = 0; i < count; i++) {
                same->info[i].bytes_deduped = 0ULL;
                same->info[i].status = FILE_DEDUPE_RANGE_SAME;
        }

        for (i = 0, info = same->info; i < count; i++, info++) {
                struct fd dst_fd = fdget(info->dest_fd);
                struct file *dst_file = dst_fd.file;

                if (!dst_file) {
                        info->status = -EBADF;
                        goto next_loop;
                }

                if (info->reserved) {
                        info->status = -EINVAL;
                        goto next_fdput;
                }

                deduped = vfs_dedupe_file_range_one(file, off, dst_file,
                                                    info->dest_offset, len,
                                                    REMAP_FILE_CAN_SHORTEN);
                if (deduped == -EBADE)
                        info->status = FILE_DEDUPE_RANGE_DIFFERS;
                else if (deduped < 0)
                        info->status = deduped;
                else
                        info->bytes_deduped = len;

next_fdput:
                fdput(dst_fd);
next_loop:
                if (fatal_signal_pending(current))
                        break;
        }
        return ret;
}
EXPORT_SYMBOL(vfs_dedupe_file_range);





















































































































































    2 
































































    2 
























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PID_H
#define _LINUX_PID_H

#include <linux/pid_types.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/wait.h>

/*
 * What is struct pid?
 *
 * A struct pid is the kernel's internal notion of a process identifier.
 * It refers to individual tasks, process groups, and sessions.  While
 * there are processes attached to it the struct pid lives in a hash
 * table, so it and then the processes that it refers to can be found
 * quickly from the numeric pid value.  The attached processes may be
 * quickly accessed by following pointers from struct pid.
 *
 * Storing pid_t values in the kernel and referring to them later has a
 * problem.  The process originally with that pid may have exited and the
 * pid allocator wrapped, and another process could have come along
 * and been assigned that pid.
 *
 * Referring to user space processes by holding a reference to struct
 * task_struct has a problem.  When the user space process exits
 * the now useless task_struct is still kept.  A task_struct plus a
 * stack consumes around 10K of low kernel memory.  More precisely
 * this is THREAD_SIZE + sizeof(struct task_struct).  By comparison
 * a struct pid is about 64 bytes.
 *
 * Holding a reference to struct pid solves both of these problems.
 * It is small so holding a reference does not consume a lot of
 * resources, and since a new struct pid is allocated when the numeric pid
 * value is reused (when pids wrap around) we don't mistakenly refer to new
 * processes.
 */


/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

#define RESERVED_PIDS 300

struct upid {
        int nr;
        struct pid_namespace *ns;
};

struct pid
{
        refcount_t count;
        unsigned int level;
        spinlock_t lock;
        struct dentry *stashed;
        u64 ino;
        /* lists of tasks that use this pid */
        struct hlist_head tasks[PIDTYPE_MAX];
        struct hlist_head inodes;
        /* wait queue for pidfd notifications */
        wait_queue_head_t wait_pidfd;
        struct rcu_head rcu;
        struct upid numbers[];
};

extern struct pid init_struct_pid;

struct file;

struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
void do_notify_pidfd(struct task_struct *task);

static inline struct pid *get_pid(struct pid *pid)
{
        if (pid)
                refcount_inc(&pid->count);
        return pid;
}

extern void put_pid(struct pid *pid);
extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
static inline bool pid_has_task(struct pid *pid, enum pid_type type)
{
        return !hlist_empty(&pid->tasks[type]);
}
extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);

extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);

/*
 * these helpers must be called with the tasklist_lock write-held.
 */
extern void attach_pid(struct task_struct *task, enum pid_type);
extern void detach_pid(struct task_struct *task, enum pid_type);
extern void change_pid(struct task_struct *task, enum pid_type,
                        struct pid *pid);
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
                         enum pid_type);

extern int pid_max;
extern int pid_max_min, pid_max_max;

/*
 * look up a PID in the hash table. Must be called with the tasklist_lock
 * or rcu_read_lock() held.
 *
 * find_pid_ns() finds the pid in the namespace specified
 * find_vpid() finds the pid by its virtual id, i.e. in the current namespace
 *
 * see also find_task_by_vpid() set in include/linux/sched.h
 */
extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
extern struct pid *find_vpid(int nr);

/*
 * Lookup a PID in the hash table, and return with it's count elevated.
 */
extern struct pid *find_get_pid(int nr);
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);

extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
                             size_t set_tid_size);
extern void free_pid(struct pid *pid);
extern void disable_pid_allocation(struct pid_namespace *ns);

/*
 * ns_of_pid() returns the pid namespace in which the specified pid was
 * allocated.
 *
 * NOTE:
 *         ns_of_pid() is expected to be called for a process (task) that has
 *         an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
 *         is expected to be non-NULL. If @pid is NULL, caller should handle
 *         the resulting NULL pid-ns.
 */
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
        struct pid_namespace *ns = NULL;
        if (pid)
                ns = pid->numbers[pid->level].ns;
        return ns;
}

/*
 * is_child_reaper returns true if the pid is the init process
 * of the current namespace. As this one could be checked before
 * pid_ns->child_reaper is assigned in copy_process, we check
 * with the pid number.
 */
static inline bool is_child_reaper(struct pid *pid)
{
        return pid->numbers[pid->level].nr == 1;
}

/*
 * the helpers to get the pid's id seen from different namespaces
 *
 * pid_nr()    : global id, i.e. the id seen from the init namespace;
 * pid_vnr()   : virtual id, i.e. the id seen from the pid namespace of
 *               current.
 * pid_nr_ns() : id seen from the ns specified.
 *
 * see also task_xid_nr() etc in include/linux/sched.h
 */

static inline pid_t pid_nr(struct pid *pid)
{
        pid_t nr = 0;
        if (pid)
                nr = pid->numbers[0].nr;
        return nr;
}

pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
pid_t pid_vnr(struct pid *pid);

#define do_each_pid_task(pid, type, task)                                \
        do {                                                                \
                if ((pid) != NULL)                                        \
                        hlist_for_each_entry_rcu((task),                \
                                &(pid)->tasks[type], pid_links[type]) {

                        /*
                         * Both old and new leaders may be attached to
                         * the same pid in the middle of de_thread().
                         */
#define while_each_pid_task(pid, type, task)                                \
                                if (type == PIDTYPE_PID)                \
                                        break;                                \
                        }                                                \
        } while (0)

#define do_each_pid_thread(pid, type, task)                                \
        do_each_pid_task(pid, type, task) {                                \
                struct task_struct *tg___ = task;                        \
                for_each_thread(tg___, task) {

#define while_each_pid_thread(pid, type, task)                                \
                }                                                        \
                task = tg___;                                                \
        } while_each_pid_task(pid, type, task)

static inline struct pid *task_pid(struct task_struct *task)
{
        return task->thread_pid;
}

/*
 * the helpers to get the task's different pids as they are seen
 * from various namespaces
 *
 * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
 * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
 *                     current.
 * task_xid_nr_ns()  : id seen from the ns specified;
 *
 * see also pid_nr() etc in include/linux/pid.h
 */
pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);

static inline pid_t task_pid_nr(struct task_struct *tsk)
{
        return tsk->pid;
}

static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
}

static inline pid_t task_pid_vnr(struct task_struct *tsk)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
}


static inline pid_t task_tgid_nr(struct task_struct *tsk)
{
        return tsk->tgid;
}

/**
 * pid_alive - check that a task structure is not stale
 * @p: Task structure to be checked.
 *
 * Test if a process is not yet dead (at most zombie state)
 * If pid_alive fails, then pointers within the task structure
 * can be stale and must not be dereferenced.
 *
 * Return: 1 if the process is alive. 0 otherwise.
 */
static inline int pid_alive(const struct task_struct *p)
{
        return p->thread_pid != NULL;
}

static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
}

static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
}


static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
}

static inline pid_t task_session_vnr(struct task_struct *tsk)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
}

static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
}

static inline pid_t task_tgid_vnr(struct task_struct *tsk)
{
        return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
}

static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
{
        pid_t pid = 0;

        rcu_read_lock();
        if (pid_alive(tsk))
                pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
        rcu_read_unlock();

        return pid;
}

static inline pid_t task_ppid_nr(const struct task_struct *tsk)
{
        return task_ppid_nr_ns(tsk, &init_pid_ns);
}

/* Obsolete, do not use: */
static inline pid_t task_pgrp_nr(struct task_struct *tsk)
{
        return task_pgrp_nr_ns(tsk, &init_pid_ns);
}

/**
 * is_global_init - check if a task structure is init. Since init
 * is free to have sub-threads we need to check tgid.
 * @tsk: Task structure to be checked.
 *
 * Check if a task structure is the first user space task the kernel created.
 *
 * Return: 1 if the task structure is init. 0 otherwise.
 */
static inline int is_global_init(struct task_struct *tsk)
{
        return task_tgid_nr(tsk) == 1;
}

#endif /* _LINUX_PID_H */




























































































































































































































































































































































































































































    2 











































































































































    2 
    2 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_NEIGHBOUR_H
#define _NET_NEIGHBOUR_H

#include <linux/neighbour.h>

/*
 *        Generic neighbour manipulation
 *
 *        Authors:
 *        Pedro Roque                <roque@di.fc.ul.pt>
 *        Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
 *
 *         Changes:
 *
 *        Harald Welte:                <laforge@gnumonks.org>
 *                - Add neighbour cache statistics like rtstat
 */

#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/bitmap.h>

#include <linux/err.h>
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/rtnetlink.h>

/*
 * NUD stands for "neighbor unreachability detection"
 */

#define NUD_IN_TIMER        (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
#define NUD_VALID        (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
#define NUD_CONNECTED        (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE)

struct neighbour;

enum {
        NEIGH_VAR_MCAST_PROBES,
        NEIGH_VAR_UCAST_PROBES,
        NEIGH_VAR_APP_PROBES,
        NEIGH_VAR_MCAST_REPROBES,
        NEIGH_VAR_RETRANS_TIME,
        NEIGH_VAR_BASE_REACHABLE_TIME,
        NEIGH_VAR_DELAY_PROBE_TIME,
        NEIGH_VAR_INTERVAL_PROBE_TIME_MS,
        NEIGH_VAR_GC_STALETIME,
        NEIGH_VAR_QUEUE_LEN_BYTES,
        NEIGH_VAR_PROXY_QLEN,
        NEIGH_VAR_ANYCAST_DELAY,
        NEIGH_VAR_PROXY_DELAY,
        NEIGH_VAR_LOCKTIME,
#define NEIGH_VAR_DATA_MAX (NEIGH_VAR_LOCKTIME + 1)
        /* Following are used as a second way to access one of the above */
        NEIGH_VAR_QUEUE_LEN, /* same data as NEIGH_VAR_QUEUE_LEN_BYTES */
        NEIGH_VAR_RETRANS_TIME_MS, /* same data as NEIGH_VAR_RETRANS_TIME */
        NEIGH_VAR_BASE_REACHABLE_TIME_MS, /* same data as NEIGH_VAR_BASE_REACHABLE_TIME */
        /* Following are used by "default" only */
        NEIGH_VAR_GC_INTERVAL,
        NEIGH_VAR_GC_THRESH1,
        NEIGH_VAR_GC_THRESH2,
        NEIGH_VAR_GC_THRESH3,
        NEIGH_VAR_MAX
};

struct neigh_parms {
        possible_net_t net;
        struct net_device *dev;
        netdevice_tracker dev_tracker;
        struct list_head list;
        int        (*neigh_setup)(struct neighbour *);
        struct neigh_table *tbl;

        void        *sysctl_table;

        int dead;
        refcount_t refcnt;
        struct rcu_head rcu_head;

        int        reachable_time;
        u32        qlen;
        int        data[NEIGH_VAR_DATA_MAX];
        DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX);
};

static inline void neigh_var_set(struct neigh_parms *p, int index, int val)
{
        set_bit(index, p->data_state);
        p->data[index] = val;
}

#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr])

/* In ndo_neigh_setup, NEIGH_VAR_INIT should be used.
 * In other cases, NEIGH_VAR_SET should be used.
 */
#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val)
#define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val)

static inline void neigh_parms_data_state_setall(struct neigh_parms *p)
{
        bitmap_fill(p->data_state, NEIGH_VAR_DATA_MAX);
}

static inline void neigh_parms_data_state_cleanall(struct neigh_parms *p)
{
        bitmap_zero(p->data_state, NEIGH_VAR_DATA_MAX);
}

struct neigh_statistics {
        unsigned long allocs;                /* number of allocated neighs */
        unsigned long destroys;                /* number of destroyed neighs */
        unsigned long hash_grows;        /* number of hash resizes */

        unsigned long res_failed;        /* number of failed resolutions */

        unsigned long lookups;                /* number of lookups */
        unsigned long hits;                /* number of hits (among lookups) */

        unsigned long rcv_probes_mcast;        /* number of received mcast ipv6 */
        unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */

        unsigned long periodic_gc_runs;        /* number of periodic GC runs */
        unsigned long forced_gc_runs;        /* number of forced GC runs */

        unsigned long unres_discards;        /* number of unresolved drops */
        unsigned long table_fulls;      /* times even gc couldn't help */
};

#define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)

struct neighbour {
        struct neighbour __rcu        *next;
        struct neigh_table        *tbl;
        struct neigh_parms        *parms;
        unsigned long                confirmed;
        unsigned long                updated;
        rwlock_t                lock;
        refcount_t                refcnt;
        unsigned int                arp_queue_len_bytes;
        struct sk_buff_head        arp_queue;
        struct timer_list        timer;
        unsigned long                used;
        atomic_t                probes;
        u8                        nud_state;
        u8                        type;
        u8                        dead;
        u8                        protocol;
        u32                        flags;
        seqlock_t                ha_lock;
        unsigned char                ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))] __aligned(8);
        struct hh_cache                hh;
        int                        (*output)(struct neighbour *, struct sk_buff *);
        const struct neigh_ops        *ops;
        struct list_head        gc_list;
        struct list_head        managed_list;
        struct rcu_head                rcu;
        struct net_device        *dev;
        netdevice_tracker        dev_tracker;
        u8                        primary_key[];
} __randomize_layout;

struct neigh_ops {
        int                        family;
        void                        (*solicit)(struct neighbour *, struct sk_buff *);
        void                        (*error_report)(struct neighbour *, struct sk_buff *);
        int                        (*output)(struct neighbour *, struct sk_buff *);
        int                        (*connected_output)(struct neighbour *, struct sk_buff *);
};

struct pneigh_entry {
        struct pneigh_entry        *next;
        possible_net_t                net;
        struct net_device        *dev;
        netdevice_tracker        dev_tracker;
        u32                        flags;
        u8                        protocol;
        u32                        key[];
};

/*
 *        neighbour table manipulation
 */

#define NEIGH_NUM_HASH_RND        4

struct neigh_hash_table {
        struct neighbour __rcu        **hash_buckets;
        unsigned int                hash_shift;
        __u32                        hash_rnd[NEIGH_NUM_HASH_RND];
        struct rcu_head                rcu;
};


struct neigh_table {
        int                        family;
        unsigned int                entry_size;
        unsigned int                key_len;
        __be16                        protocol;
        __u32                        (*hash)(const void *pkey,
                                        const struct net_device *dev,
                                        __u32 *hash_rnd);
        bool                        (*key_eq)(const struct neighbour *, const void *pkey);
        int                        (*constructor)(struct neighbour *);
        int                        (*pconstructor)(struct pneigh_entry *);
        void                        (*pdestructor)(struct pneigh_entry *);
        void                        (*proxy_redo)(struct sk_buff *skb);
        int                        (*is_multicast)(const void *pkey);
        bool                        (*allow_add)(const struct net_device *dev,
                                             struct netlink_ext_ack *extack);
        char                        *id;
        struct neigh_parms        parms;
        struct list_head        parms_list;
        int                        gc_interval;
        int                        gc_thresh1;
        int                        gc_thresh2;
        int                        gc_thresh3;
        unsigned long                last_flush;
        struct delayed_work        gc_work;
        struct delayed_work        managed_work;
        struct timer_list         proxy_timer;
        struct sk_buff_head        proxy_queue;
        atomic_t                entries;
        atomic_t                gc_entries;
        struct list_head        gc_list;
        struct list_head        managed_list;
        rwlock_t                lock;
        unsigned long                last_rand;
        struct neigh_statistics        __percpu *stats;
        struct neigh_hash_table __rcu *nht;
        struct pneigh_entry        **phash_buckets;
};

enum {
        NEIGH_ARP_TABLE = 0,
        NEIGH_ND_TABLE = 1,
        NEIGH_DN_TABLE = 2,
        NEIGH_NR_TABLES,
        NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};

static inline int neigh_parms_family(struct neigh_parms *p)
{
        return p->tbl->family;
}

#define NEIGH_PRIV_ALIGN        sizeof(long long)
#define NEIGH_ENTRY_SIZE(size)        ALIGN((size), NEIGH_PRIV_ALIGN)

static inline void *neighbour_priv(const struct neighbour *n)
{
        return (char *)n + n->tbl->entry_size;
}

/* flags for neigh_update() */
#define NEIGH_UPDATE_F_OVERRIDE                        BIT(0)
#define NEIGH_UPDATE_F_WEAK_OVERRIDE                BIT(1)
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER        BIT(2)
#define NEIGH_UPDATE_F_USE                        BIT(3)
#define NEIGH_UPDATE_F_MANAGED                        BIT(4)
#define NEIGH_UPDATE_F_EXT_LEARNED                BIT(5)
#define NEIGH_UPDATE_F_ISROUTER                        BIT(6)
#define NEIGH_UPDATE_F_ADMIN                        BIT(7)

/* In-kernel representation for NDA_FLAGS_EXT flags: */
#define NTF_OLD_MASK                0xff
#define NTF_EXT_SHIFT                8
#define NTF_EXT_MASK                (NTF_EXT_MANAGED)

#define NTF_MANAGED                (NTF_EXT_MANAGED << NTF_EXT_SHIFT)

extern const struct nla_policy nda_policy[];

static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
{
        return *(const u32 *)n->primary_key == *(const u32 *)pkey;
}

static inline bool neigh_key_eq128(const struct neighbour *n, const void *pkey)
{
        const u32 *n32 = (const u32 *)n->primary_key;
        const u32 *p32 = pkey;

        return ((n32[0] ^ p32[0]) | (n32[1] ^ p32[1]) |
                (n32[2] ^ p32[2]) | (n32[3] ^ p32[3])) == 0;
}

static inline struct neighbour *___neigh_lookup_noref(
        struct neigh_table *tbl,
        bool (*key_eq)(const struct neighbour *n, const void *pkey),
        __u32 (*hash)(const void *pkey,
                      const struct net_device *dev,
                      __u32 *hash_rnd),
        const void *pkey,
        struct net_device *dev)
{
        struct neigh_hash_table *nht = rcu_dereference(tbl->nht);
        struct neighbour *n;
        u32 hash_val;

        hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
        for (n = rcu_dereference(nht->hash_buckets[hash_val]);
             n != NULL;
             n = rcu_dereference(n->next)) {
                if (n->dev == dev && key_eq(n, pkey))
                        return n;
        }

        return NULL;
}

static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl,
                                                     const void *pkey,
                                                     struct net_device *dev)
{
        return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev);
}

static inline void neigh_confirm(struct neighbour *n)
{
        if (n) {
                unsigned long now = jiffies;

                /* avoid dirtying neighbour */
                if (READ_ONCE(n->confirmed) != now)
                        WRITE_ONCE(n->confirmed, now);
        }
}

void neigh_table_init(int index, struct neigh_table *tbl);
int neigh_table_clear(int index, struct neigh_table *tbl);
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
                               struct net_device *dev);
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
                                 struct net_device *dev, bool want_ref);
static inline struct neighbour *neigh_create(struct neigh_table *tbl,
                                             const void *pkey,
                                             struct net_device *dev)
{
        return __neigh_create(tbl, pkey, dev, true);
}
void neigh_destroy(struct neighbour *neigh);
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
                       const bool immediate_ok);
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags,
                 u32 nlmsg_pid);
void __neigh_set_probe_once(struct neighbour *neigh);
bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev);
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb);
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb);
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb);
struct neighbour *neigh_event_ns(struct neigh_table *tbl,
                                                u8 *lladdr, void *saddr,
                                                struct net_device *dev);

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
                                      struct neigh_table *tbl);
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);

static inline
struct net *neigh_parms_net(const struct neigh_parms *parms)
{
        return read_pnet(&parms->net);
}

unsigned long neigh_rand_reach_time(unsigned long base);

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
                    struct sk_buff *skb);
struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net,
                                   const void *key, struct net_device *dev,
                                   int creat);
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net,
                                     const void *key, struct net_device *dev);
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key,
                  struct net_device *dev);

static inline struct net *pneigh_net(const struct pneigh_entry *pneigh)
{
        return read_pnet(&pneigh->net);
}

void neigh_app_ns(struct neighbour *n);
void neigh_for_each(struct neigh_table *tbl,
                    void (*cb)(struct neighbour *, void *), void *cookie);
void __neigh_for_each_release(struct neigh_table *tbl,
                              int (*cb)(struct neighbour *));
int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);

struct neigh_seq_state {
        struct seq_net_private p;
        struct neigh_table *tbl;
        struct neigh_hash_table *nht;
        void *(*neigh_sub_iter)(struct neigh_seq_state *state,
                                struct neighbour *n, loff_t *pos);
        unsigned int bucket;
        unsigned int flags;
#define NEIGH_SEQ_NEIGH_ONLY        0x00000001
#define NEIGH_SEQ_IS_PNEIGH        0x00000002
#define NEIGH_SEQ_SKIP_NOARP        0x00000004
};
void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *,
                      unsigned int);
void *neigh_seq_next(struct seq_file *, void *, loff_t *);
void neigh_seq_stop(struct seq_file *, void *);

int neigh_proc_dointvec(struct ctl_table *ctl, int write,
                        void *buffer, size_t *lenp, loff_t *ppos);
int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
                                void *buffer,
                                size_t *lenp, loff_t *ppos);
int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
                                   void *buffer, size_t *lenp, loff_t *ppos);

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
                          proc_handler *proc_handler);
void neigh_sysctl_unregister(struct neigh_parms *p);

static inline void __neigh_parms_put(struct neigh_parms *parms)
{
        refcount_dec(&parms->refcnt);
}

static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
{
        refcount_inc(&parms->refcnt);
        return parms;
}

/*
 *        Neighbour references
 */

static inline void neigh_release(struct neighbour *neigh)
{
        if (refcount_dec_and_test(&neigh->refcnt))
                neigh_destroy(neigh);
}

static inline struct neighbour * neigh_clone(struct neighbour *neigh)
{
        if (neigh)
                refcount_inc(&neigh->refcnt);
        return neigh;
}

#define neigh_hold(n)        refcount_inc(&(n)->refcnt)

static __always_inline int neigh_event_send_probe(struct neighbour *neigh,
                                                  struct sk_buff *skb,
                                                  const bool immediate_ok)
{
        unsigned long now = jiffies;

        if (READ_ONCE(neigh->used) != now)
                WRITE_ONCE(neigh->used, now);
        if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
                return __neigh_event_send(neigh, skb, immediate_ok);
        return 0;
}

static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
        return neigh_event_send_probe(neigh, skb, true);
}

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
        unsigned int seq, hh_alen;

        do {
                seq = read_seqbegin(&hh->hh_lock);
                hh_alen = HH_DATA_ALIGN(ETH_HLEN);
                memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN);
        } while (read_seqretry(&hh->hh_lock, seq));
        return 0;
}
#endif

static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
{
        unsigned int hh_alen = 0;
        unsigned int seq;
        unsigned int hh_len;

        do {
                seq = read_seqbegin(&hh->hh_lock);
                hh_len = READ_ONCE(hh->hh_len);
                if (likely(hh_len <= HH_DATA_MOD)) {
                        hh_alen = HH_DATA_MOD;

                        /* skb_push() would proceed silently if we have room for
                         * the unaligned size but not for the aligned size:
                         * check headroom explicitly.
                         */
                        if (likely(skb_headroom(skb) >= HH_DATA_MOD)) {
                                /* this is inlined by gcc */
                                memcpy(skb->data - HH_DATA_MOD, hh->hh_data,
                                       HH_DATA_MOD);
                        }
                } else {
                        hh_alen = HH_DATA_ALIGN(hh_len);

                        if (likely(skb_headroom(skb) >= hh_alen)) {
                                memcpy(skb->data - hh_alen, hh->hh_data,
                                       hh_alen);
                        }
                }
        } while (read_seqretry(&hh->hh_lock, seq));

        if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) {
                kfree_skb(skb);
                return NET_XMIT_DROP;
        }

        __skb_push(skb, hh_len);
        return dev_queue_xmit(skb);
}

static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
                               bool skip_cache)
{
        const struct hh_cache *hh = &n->hh;

        /* n->nud_state and hh->hh_len could be changed under us.
         * neigh_hh_output() is taking care of the race later.
         */
        if (!skip_cache &&
            (READ_ONCE(n->nud_state) & NUD_CONNECTED) &&
            READ_ONCE(hh->hh_len))
                return neigh_hh_output(hh, skb);

        return READ_ONCE(n->output)(n, skb);
}

static inline struct neighbour *
__neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
        struct neighbour *n = neigh_lookup(tbl, pkey, dev);

        if (n || !creat)
                return n;

        n = neigh_create(tbl, pkey, dev);
        return IS_ERR(n) ? NULL : n;
}

static inline struct neighbour *
__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
  struct net_device *dev)
{
        struct neighbour *n = neigh_lookup(tbl, pkey, dev);

        if (n)
                return n;

        return neigh_create(tbl, pkey, dev);
}

struct neighbour_cb {
        unsigned long sched_next;
        unsigned int flags;
};

#define LOCALLY_ENQUEUED 0x1

#define NEIGH_CB(skb)        ((struct neighbour_cb *)(skb)->cb)

static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
                                     const struct net_device *dev)
{
        unsigned int seq;

        do {
                seq = read_seqbegin(&n->ha_lock);
                memcpy(dst, n->ha, dev->addr_len);
        } while (read_seqretry(&n->ha_lock, seq));
}

static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags,
                                          int *notify)
{
        u8 ndm_flags = 0;

        ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0;
        if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) {
                if (ndm_flags & NTF_ROUTER)
                        neigh->flags |= NTF_ROUTER;
                else
                        neigh->flags &= ~NTF_ROUTER;
                *notify = 1;
        }
}
#endif














































































































































































































































































































































































































































































































































































    2 































































































































































































    2 
    2 


    2 



    2 


































    2 






















































































































































































































































































































































































































































































































































































    2 





    2 



    2 




    2 
    2 











































    2 




    2 





    2 








































    2 


    2 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
// SPDX-License-Identifier: GPL-2.0
/*
 * platform.c - platform 'pseudo' bus for legacy devices
 *
 * Copyright (c) 2002-3 Patrick Mochel
 * Copyright (c) 2002-3 Open Source Development Labs
 *
 * Please see Documentation/driver-api/driver-model/platform.rst for more
 * information.
 */

#include <linux/string.h>
#include <linux/platform_device.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/dma-mapping.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include <linux/pm_domain.h>
#include <linux/idr.h>
#include <linux/acpi.h>
#include <linux/clk/clk-conf.h>
#include <linux/limits.h>
#include <linux/property.h>
#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/iommu.h>
#include <linux/dma-map-ops.h>

#include "base.h"
#include "power/power.h"

/* For automatically allocated device IDs */
static DEFINE_IDA(platform_devid_ida);

struct device platform_bus = {
        .init_name        = "platform",
};
EXPORT_SYMBOL_GPL(platform_bus);

/**
 * platform_get_resource - get a resource for a device
 * @dev: platform device
 * @type: resource type
 * @num: resource index
 *
 * Return: a pointer to the resource or NULL on failure.
 */
struct resource *platform_get_resource(struct platform_device *dev,
                                       unsigned int type, unsigned int num)
{
        u32 i;

        for (i = 0; i < dev->num_resources; i++) {
                struct resource *r = &dev->resource[i];

                if (type == resource_type(r) && num-- == 0)
                        return r;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(platform_get_resource);

struct resource *platform_get_mem_or_io(struct platform_device *dev,
                                        unsigned int num)
{
        u32 i;

        for (i = 0; i < dev->num_resources; i++) {
                struct resource *r = &dev->resource[i];

                if ((resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) && num-- == 0)
                        return r;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(platform_get_mem_or_io);

#ifdef CONFIG_HAS_IOMEM
/**
 * devm_platform_get_and_ioremap_resource - call devm_ioremap_resource() for a
 *                                            platform device and get resource
 *
 * @pdev: platform device to use both for memory resource lookup as well as
 *        resource management
 * @index: resource index
 * @res: optional output parameter to store a pointer to the obtained resource.
 *
 * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
 * on failure.
 */
void __iomem *
devm_platform_get_and_ioremap_resource(struct platform_device *pdev,
                                unsigned int index, struct resource **res)
{
        struct resource *r;

        r = platform_get_resource(pdev, IORESOURCE_MEM, index);
        if (res)
                *res = r;
        return devm_ioremap_resource(&pdev->dev, r);
}
EXPORT_SYMBOL_GPL(devm_platform_get_and_ioremap_resource);

/**
 * devm_platform_ioremap_resource - call devm_ioremap_resource() for a platform
 *                                    device
 *
 * @pdev: platform device to use both for memory resource lookup as well as
 *        resource management
 * @index: resource index
 *
 * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
 * on failure.
 */
void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev,
                                             unsigned int index)
{
        return devm_platform_get_and_ioremap_resource(pdev, index, NULL);
}
EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource);

/**
 * devm_platform_ioremap_resource_byname - call devm_ioremap_resource for
 *                                           a platform device, retrieve the
 *                                           resource by name
 *
 * @pdev: platform device to use both for memory resource lookup as well as
 *          resource management
 * @name: name of the resource
 *
 * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
 * on failure.
 */
void __iomem *
devm_platform_ioremap_resource_byname(struct platform_device *pdev,
                                      const char *name)
{
        struct resource *res;

        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
        return devm_ioremap_resource(&pdev->dev, res);
}
EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname);
#endif /* CONFIG_HAS_IOMEM */

/**
 * platform_get_irq_optional - get an optional IRQ for a device
 * @dev: platform device
 * @num: IRQ number index
 *
 * Gets an IRQ for a platform device. Device drivers should check the return
 * value for errors so as to not pass a negative integer value to the
 * request_irq() APIs. This is the same as platform_get_irq(), except that it
 * does not print an error message if an IRQ can not be obtained.
 *
 * For example::
 *
 *                int irq = platform_get_irq_optional(pdev, 0);
 *                if (irq < 0)
 *                        return irq;
 *
 * Return: non-zero IRQ number on success, negative error number on failure.
 */
int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
{
        int ret;
#ifdef CONFIG_SPARC
        /* sparc does not have irqs represented as IORESOURCE_IRQ resources */
        if (!dev || num >= dev->archdata.num_irqs)
                goto out_not_found;
        ret = dev->archdata.irqs[num];
        goto out;
#else
        struct fwnode_handle *fwnode = dev_fwnode(&dev->dev);
        struct resource *r;

        if (is_of_node(fwnode)) {
                ret = of_irq_get(to_of_node(fwnode), num);
                if (ret > 0 || ret == -EPROBE_DEFER)
                        goto out;
        }

        r = platform_get_resource(dev, IORESOURCE_IRQ, num);
        if (is_acpi_device_node(fwnode)) {
                if (r && r->flags & IORESOURCE_DISABLED) {
                        ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), num, r);
                        if (ret)
                                goto out;
                }
        }

        /*
         * The resources may pass trigger flags to the irqs that need
         * to be set up. It so happens that the trigger flags for
         * IORESOURCE_BITS correspond 1-to-1 to the IRQF_TRIGGER*
         * settings.
         */
        if (r && r->flags & IORESOURCE_BITS) {
                struct irq_data *irqd;

                irqd = irq_get_irq_data(r->start);
                if (!irqd)
                        goto out_not_found;
                irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
        }

        if (r) {
                ret = r->start;
                goto out;
        }

        /*
         * For the index 0 interrupt, allow falling back to GpioInt
         * resources. While a device could have both Interrupt and GpioInt
         * resources, making this fallback ambiguous, in many common cases
         * the device will only expose one IRQ, and this fallback
         * allows a common code path across either kind of resource.
         */
        if (num == 0 && is_acpi_device_node(fwnode)) {
                ret = acpi_dev_gpio_irq_get(to_acpi_device_node(fwnode), num);
                /* Our callers expect -ENXIO for missing IRQs. */
                if (ret >= 0 || ret == -EPROBE_DEFER)
                        goto out;
        }

#endif
out_not_found:
        ret = -ENXIO;
out:
        if (WARN(!ret, "0 is an invalid IRQ number\n"))
                return -EINVAL;
        return ret;
}
EXPORT_SYMBOL_GPL(platform_get_irq_optional);

/**
 * platform_get_irq - get an IRQ for a device
 * @dev: platform device
 * @num: IRQ number index
 *
 * Gets an IRQ for a platform device and prints an error message if finding the
 * IRQ fails. Device drivers should check the return value for errors so as to
 * not pass a negative integer value to the request_irq() APIs.
 *
 * For example::
 *
 *                int irq = platform_get_irq(pdev, 0);
 *                if (irq < 0)
 *                        return irq;
 *
 * Return: non-zero IRQ number on success, negative error number on failure.
 */
int platform_get_irq(struct platform_device *dev, unsigned int num)
{
        int ret;

        ret = platform_get_irq_optional(dev, num);
        if (ret < 0)
                return dev_err_probe(&dev->dev, ret,
                                     "IRQ index %u not found\n", num);

        return ret;
}
EXPORT_SYMBOL_GPL(platform_get_irq);

/**
 * platform_irq_count - Count the number of IRQs a platform device uses
 * @dev: platform device
 *
 * Return: Number of IRQs a platform device uses or EPROBE_DEFER
 */
int platform_irq_count(struct platform_device *dev)
{
        int ret, nr = 0;

        while ((ret = platform_get_irq_optional(dev, nr)) >= 0)
                nr++;

        if (ret == -EPROBE_DEFER)
                return ret;

        return nr;
}
EXPORT_SYMBOL_GPL(platform_irq_count);

struct irq_affinity_devres {
        unsigned int count;
        unsigned int irq[] __counted_by(count);
};

static void platform_disable_acpi_irq(struct platform_device *pdev, int index)
{
        struct resource *r;

        r = platform_get_resource(pdev, IORESOURCE_IRQ, index);
        if (r)
                irqresource_disabled(r, 0);
}

static void devm_platform_get_irqs_affinity_release(struct device *dev,
                                                    void *res)
{
        struct irq_affinity_devres *ptr = res;
        int i;

        for (i = 0; i < ptr->count; i++) {
                irq_dispose_mapping(ptr->irq[i]);

                if (is_acpi_device_node(dev_fwnode(dev)))
                        platform_disable_acpi_irq(to_platform_device(dev), i);
        }
}

/**
 * devm_platform_get_irqs_affinity - devm method to get a set of IRQs for a
 *                                device using an interrupt affinity descriptor
 * @dev: platform device pointer
 * @affd: affinity descriptor
 * @minvec: minimum count of interrupt vectors
 * @maxvec: maximum count of interrupt vectors
 * @irqs: pointer holder for IRQ numbers
 *
 * Gets a set of IRQs for a platform device, and updates IRQ afffinty according
 * to the passed affinity descriptor
 *
 * Return: Number of vectors on success, negative error number on failure.
 */
int devm_platform_get_irqs_affinity(struct platform_device *dev,
                                    struct irq_affinity *affd,
                                    unsigned int minvec,
                                    unsigned int maxvec,
                                    int **irqs)
{
        struct irq_affinity_devres *ptr;
        struct irq_affinity_desc *desc;
        size_t size;
        int i, ret, nvec;

        if (!affd)
                return -EPERM;

        if (maxvec < minvec)
                return -ERANGE;

        nvec = platform_irq_count(dev);
        if (nvec < 0)
                return nvec;

        if (nvec < minvec)
                return -ENOSPC;

        nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
        if (nvec < minvec)
                return -ENOSPC;

        if (nvec > maxvec)
                nvec = maxvec;

        size = sizeof(*ptr) + sizeof(unsigned int) * nvec;
        ptr = devres_alloc(devm_platform_get_irqs_affinity_release, size,
                           GFP_KERNEL);
        if (!ptr)
                return -ENOMEM;

        ptr->count = nvec;

        for (i = 0; i < nvec; i++) {
                int irq = platform_get_irq(dev, i);
                if (irq < 0) {
                        ret = irq;
                        goto err_free_devres;
                }
                ptr->irq[i] = irq;
        }

        desc = irq_create_affinity_masks(nvec, affd);
        if (!desc) {
                ret = -ENOMEM;
                goto err_free_devres;
        }

        for (i = 0; i < nvec; i++) {
                ret = irq_update_affinity_desc(ptr->irq[i], &desc[i]);
                if (ret) {
                        dev_err(&dev->dev, "failed to update irq%d affinity descriptor (%d)\n",
                                ptr->irq[i], ret);
                        goto err_free_desc;
                }
        }

        devres_add(&dev->dev, ptr);

        kfree(desc);

        *irqs = ptr->irq;

        return nvec;

err_free_desc:
        kfree(desc);
err_free_devres:
        devres_free(ptr);
        return ret;
}
EXPORT_SYMBOL_GPL(devm_platform_get_irqs_affinity);

/**
 * platform_get_resource_byname - get a resource for a device by name
 * @dev: platform device
 * @type: resource type
 * @name: resource name
 */
struct resource *platform_get_resource_byname(struct platform_device *dev,
                                              unsigned int type,
                                              const char *name)
{
        u32 i;

        for (i = 0; i < dev->num_resources; i++) {
                struct resource *r = &dev->resource[i];

                if (unlikely(!r->name))
                        continue;

                if (type == resource_type(r) && !strcmp(r->name, name))
                        return r;
        }
        return NULL;
}
EXPORT_SYMBOL_GPL(platform_get_resource_byname);

static int __platform_get_irq_byname(struct platform_device *dev,
                                     const char *name)
{
        struct resource *r;
        int ret;

        ret = fwnode_irq_get_byname(dev_fwnode(&dev->dev), name);
        if (ret > 0 || ret == -EPROBE_DEFER)
                return ret;

        r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name);
        if (r) {
                if (WARN(!r->start, "0 is an invalid IRQ number\n"))
                        return -EINVAL;
                return r->start;
        }

        return -ENXIO;
}

/**
 * platform_get_irq_byname - get an IRQ for a device by name
 * @dev: platform device
 * @name: IRQ name
 *
 * Get an IRQ like platform_get_irq(), but then by name rather then by index.
 *
 * Return: non-zero IRQ number on success, negative error number on failure.
 */
int platform_get_irq_byname(struct platform_device *dev, const char *name)
{
        int ret;

        ret = __platform_get_irq_byname(dev, name);
        if (ret < 0)
                return dev_err_probe(&dev->dev, ret, "IRQ %s not found\n",
                                     name);
        return ret;
}
EXPORT_SYMBOL_GPL(platform_get_irq_byname);

/**
 * platform_get_irq_byname_optional - get an optional IRQ for a device by name
 * @dev: platform device
 * @name: IRQ name
 *
 * Get an optional IRQ by name like platform_get_irq_byname(). Except that it
 * does not print an error message if an IRQ can not be obtained.
 *
 * Return: non-zero IRQ number on success, negative error number on failure.
 */
int platform_get_irq_byname_optional(struct platform_device *dev,
                                     const char *name)
{
        return __platform_get_irq_byname(dev, name);
}
EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional);

/**
 * platform_add_devices - add a numbers of platform devices
 * @devs: array of platform devices to add
 * @num: number of platform devices in array
 *
 * Return: 0 on success, negative error number on failure.
 */
int platform_add_devices(struct platform_device **devs, int num)
{
        int i, ret = 0;

        for (i = 0; i < num; i++) {
                ret = platform_device_register(devs[i]);
                if (ret) {
                        while (--i >= 0)
                                platform_device_unregister(devs[i]);
                        break;
                }
        }

        return ret;
}
EXPORT_SYMBOL_GPL(platform_add_devices);

struct platform_object {
        struct platform_device pdev;
        char name[];
};

/*
 * Set up default DMA mask for platform devices if the they weren't
 * previously set by the architecture / DT.
 */
static void setup_pdev_dma_masks(struct platform_device *pdev)
{
        pdev->dev.dma_parms = &pdev->dma_parms;

        if (!pdev->dev.coherent_dma_mask)
                pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
        if (!pdev->dev.dma_mask) {
                pdev->platform_dma_mask = DMA_BIT_MASK(32);
                pdev->dev.dma_mask = &pdev->platform_dma_mask;
        }
};

/**
 * platform_device_put - destroy a platform device
 * @pdev: platform device to free
 *
 * Free all memory associated with a platform device.  This function must
 * _only_ be externally called in error cases.  All other usage is a bug.
 */
void platform_device_put(struct platform_device *pdev)
{
        if (!IS_ERR_OR_NULL(pdev))
                put_device(&pdev->dev);
}
EXPORT_SYMBOL_GPL(platform_device_put);

static void platform_device_release(struct device *dev)
{
        struct platform_object *pa = container_of(dev, struct platform_object,
                                                  pdev.dev);

        of_node_put(pa->pdev.dev.of_node);
        kfree(pa->pdev.dev.platform_data);
        kfree(pa->pdev.mfd_cell);
        kfree(pa->pdev.resource);
        kfree(pa->pdev.driver_override);
        kfree(pa);
}

/**
 * platform_device_alloc - create a platform device
 * @name: base name of the device we're adding
 * @id: instance id
 *
 * Create a platform device object which can have other objects attached
 * to it, and which will have attached objects freed when it is released.
 */
struct platform_device *platform_device_alloc(const char *name, int id)
{
        struct platform_object *pa;

        pa = kzalloc(sizeof(*pa) + strlen(name) + 1, GFP_KERNEL);
        if (pa) {
                strcpy(pa->name, name);
                pa->pdev.name = pa->name;
                pa->pdev.id = id;
                device_initialize(&pa->pdev.dev);
                pa->pdev.dev.release = platform_device_release;
                setup_pdev_dma_masks(&pa->pdev);
        }

        return pa ? &pa->pdev : NULL;
}
EXPORT_SYMBOL_GPL(platform_device_alloc);

/**
 * platform_device_add_resources - add resources to a platform device
 * @pdev: platform device allocated by platform_device_alloc to add resources to
 * @res: set of resources that needs to be allocated for the device
 * @num: number of resources
 *
 * Add a copy of the resources to the platform device.  The memory
 * associated with the resources will be freed when the platform device is
 * released.
 */
int platform_device_add_resources(struct platform_device *pdev,
                                  const struct resource *res, unsigned int num)
{
        struct resource *r = NULL;

        if (res) {
                r = kmemdup(res, sizeof(struct resource) * num, GFP_KERNEL);
                if (!r)
                        return -ENOMEM;
        }

        kfree(pdev->resource);
        pdev->resource = r;
        pdev->num_resources = num;
        return 0;
}
EXPORT_SYMBOL_GPL(platform_device_add_resources);

/**
 * platform_device_add_data - add platform-specific data to a platform device
 * @pdev: platform device allocated by platform_device_alloc to add resources to
 * @data: platform specific data for this platform device
 * @size: size of platform specific data
 *
 * Add a copy of platform specific data to the platform device's
 * platform_data pointer.  The memory associated with the platform data
 * will be freed when the platform device is released.
 */
int platform_device_add_data(struct platform_device *pdev, const void *data,
                             size_t size)
{
        void *d = NULL;

        if (data) {
                d = kmemdup(data, size, GFP_KERNEL);
                if (!d)
                        return -ENOMEM;
        }

        kfree(pdev->dev.platform_data);
        pdev->dev.platform_data = d;
        return 0;
}
EXPORT_SYMBOL_GPL(platform_device_add_data);

/**
 * platform_device_add - add a platform device to device hierarchy
 * @pdev: platform device we're adding
 *
 * This is part 2 of platform_device_register(), though may be called
 * separately _iff_ pdev was allocated by platform_device_alloc().
 */
int platform_device_add(struct platform_device *pdev)
{
        struct device *dev = &pdev->dev;
        u32 i;
        int ret;

        if (!dev->parent)
                dev->parent = &platform_bus;

        dev->bus = &platform_bus_type;

        switch (pdev->id) {
        default:
                dev_set_name(dev, "%s.%d", pdev->name,  pdev->id);
                break;
        case PLATFORM_DEVID_NONE:
                dev_set_name(dev, "%s", pdev->name);
                break;
        case PLATFORM_DEVID_AUTO:
                /*
                 * Automatically allocated device ID. We mark it as such so
                 * that we remember it must be freed, and we append a suffix
                 * to avoid namespace collision with explicit IDs.
                 */
                ret = ida_alloc(&platform_devid_ida, GFP_KERNEL);
                if (ret < 0)
                        return ret;
                pdev->id = ret;
                pdev->id_auto = true;
                dev_set_name(dev, "%s.%d.auto", pdev->name, pdev->id);
                break;
        }

        for (i = 0; i < pdev->num_resources; i++) {
                struct resource *p, *r = &pdev->resource[i];

                if (r->name == NULL)
                        r->name = dev_name(dev);

                p = r->parent;
                if (!p) {
                        if (resource_type(r) == IORESOURCE_MEM)
                                p = &iomem_resource;
                        else if (resource_type(r) == IORESOURCE_IO)
                                p = &ioport_resource;
                }

                if (p) {
                        ret = insert_resource(p, r);
                        if (ret) {
                                dev_err(dev, "failed to claim resource %d: %pR\n", i, r);
                                goto failed;
                        }
                }
        }

        pr_debug("Registering platform device '%s'. Parent at %s\n", dev_name(dev),
                 dev_name(dev->parent));

        ret = device_add(dev);
        if (ret)
                goto failed;

        return 0;

 failed:
        if (pdev->id_auto) {
                ida_free(&platform_devid_ida, pdev->id);
                pdev->id = PLATFORM_DEVID_AUTO;
        }

        while (i--) {
                struct resource *r = &pdev->resource[i];
                if (r->parent)
                        release_resource(r);
        }

        return ret;
}
EXPORT_SYMBOL_GPL(platform_device_add);

/**
 * platform_device_del - remove a platform-level device
 * @pdev: platform device we're removing
 *
 * Note that this function will also release all memory- and port-based
 * resources owned by the device (@dev->resource).  This function must
 * _only_ be externally called in error cases.  All other usage is a bug.
 */
void platform_device_del(struct platform_device *pdev)
{
        u32 i;

        if (!IS_ERR_OR_NULL(pdev)) {
                device_del(&pdev->dev);

                if (pdev->id_auto) {
                        ida_free(&platform_devid_ida, pdev->id);
                        pdev->id = PLATFORM_DEVID_AUTO;
                }

                for (i = 0; i < pdev->num_resources; i++) {
                        struct resource *r = &pdev->resource[i];
                        if (r->parent)
                                release_resource(r);
                }
        }
}
EXPORT_SYMBOL_GPL(platform_device_del);

/**
 * platform_device_register - add a platform-level device
 * @pdev: platform device we're adding
 *
 * NOTE: _Never_ directly free @pdev after calling this function, even if it
 * returned an error! Always use platform_device_put() to give up the
 * reference initialised in this function instead.
 */
int platform_device_register(struct platform_device *pdev)
{
        device_initialize(&pdev->dev);
        setup_pdev_dma_masks(pdev);
        return platform_device_add(pdev);
}
EXPORT_SYMBOL_GPL(platform_device_register);

/**
 * platform_device_unregister - unregister a platform-level device
 * @pdev: platform device we're unregistering
 *
 * Unregistration is done in 2 steps. First we release all resources
 * and remove it from the subsystem, then we drop reference count by
 * calling platform_device_put().
 */
void platform_device_unregister(struct platform_device *pdev)
{
        platform_device_del(pdev);
        platform_device_put(pdev);
}
EXPORT_SYMBOL_GPL(platform_device_unregister);

/**
 * platform_device_register_full - add a platform-level device with
 * resources and platform-specific data
 *
 * @pdevinfo: data used to create device
 *
 * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
 */
struct platform_device *platform_device_register_full(
                const struct platform_device_info *pdevinfo)
{
        int ret;
        struct platform_device *pdev;

        pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id);
        if (!pdev)
                return ERR_PTR(-ENOMEM);

        pdev->dev.parent = pdevinfo->parent;
        pdev->dev.fwnode = pdevinfo->fwnode;
        pdev->dev.of_node = of_node_get(to_of_node(pdev->dev.fwnode));
        pdev->dev.of_node_reused = pdevinfo->of_node_reused;

        if (pdevinfo->dma_mask) {
                pdev->platform_dma_mask = pdevinfo->dma_mask;
                pdev->dev.dma_mask = &pdev->platform_dma_mask;
                pdev->dev.coherent_dma_mask = pdevinfo->dma_mask;
        }

        ret = platform_device_add_resources(pdev,
                        pdevinfo->res, pdevinfo->num_res);
        if (ret)
                goto err;

        ret = platform_device_add_data(pdev,
                        pdevinfo->data, pdevinfo->size_data);
        if (ret)
                goto err;

        if (pdevinfo->properties) {
                ret = device_create_managed_software_node(&pdev->dev,
                                                          pdevinfo->properties, NULL);
                if (ret)
                        goto err;
        }

        ret = platform_device_add(pdev);
        if (ret) {
err:
                ACPI_COMPANION_SET(&pdev->dev, NULL);
                platform_device_put(pdev);
                return ERR_PTR(ret);
        }

        return pdev;
}
EXPORT_SYMBOL_GPL(platform_device_register_full);

/**
 * __platform_driver_register - register a driver for platform-level devices
 * @drv: platform driver structure
 * @owner: owning module/driver
 */
int __platform_driver_register(struct platform_driver *drv,
                                struct module *owner)
{
        drv->driver.owner = owner;
        drv->driver.bus = &platform_bus_type;

        return driver_register(&drv->driver);
}
EXPORT_SYMBOL_GPL(__platform_driver_register);

/**
 * platform_driver_unregister - unregister a driver for platform-level devices
 * @drv: platform driver structure
 */
void platform_driver_unregister(struct platform_driver *drv)
{
        driver_unregister(&drv->driver);
}
EXPORT_SYMBOL_GPL(platform_driver_unregister);

static int platform_probe_fail(struct platform_device *pdev)
{
        return -ENXIO;
}

static int is_bound_to_driver(struct device *dev, void *driver)
{
        if (dev->driver == driver)
                return 1;
        return 0;
}

/**
 * __platform_driver_probe - register driver for non-hotpluggable device
 * @drv: platform driver structure
 * @probe: the driver probe routine, probably from an __init section
 * @module: module which will be the owner of the driver
 *
 * Use this instead of platform_driver_register() when you know the device
 * is not hotpluggable and has already been registered, and you want to
 * remove its run-once probe() infrastructure from memory after the driver
 * has bound to the device.
 *
 * One typical use for this would be with drivers for controllers integrated
 * into system-on-chip processors, where the controller devices have been
 * configured as part of board setup.
 *
 * Note that this is incompatible with deferred probing.
 *
 * Returns zero if the driver registered and bound to a device, else returns
 * a negative error code and with the driver not registered.
 */
int __init_or_module __platform_driver_probe(struct platform_driver *drv,
                int (*probe)(struct platform_device *), struct module *module)
{
        int retval;

        if (drv->driver.probe_type == PROBE_PREFER_ASYNCHRONOUS) {
                pr_err("%s: drivers registered with %s can not be probed asynchronously\n",
                         drv->driver.name, __func__);
                return -EINVAL;
        }

        /*
         * We have to run our probes synchronously because we check if
         * we find any devices to bind to and exit with error if there
         * are any.
         */
        drv->driver.probe_type = PROBE_FORCE_SYNCHRONOUS;

        /*
         * Prevent driver from requesting probe deferral to avoid further
         * futile probe attempts.
         */
        drv->prevent_deferred_probe = true;

        /* make sure driver won't have bind/unbind attributes */
        drv->driver.suppress_bind_attrs = true;

        /* temporary section violation during probe() */
        drv->probe = probe;
        retval = __platform_driver_register(drv, module);
        if (retval)
                return retval;

        /* Force all new probes of this driver to fail */
        drv->probe = platform_probe_fail;

        /* Walk all platform devices and see if any actually bound to this driver.
         * If not, return an error as the device should have done so by now.
         */
        if (!bus_for_each_dev(&platform_bus_type, NULL, &drv->driver, is_bound_to_driver)) {
                retval = -ENODEV;
                platform_driver_unregister(drv);
        }

        return retval;
}
EXPORT_SYMBOL_GPL(__platform_driver_probe);

/**
 * __platform_create_bundle - register driver and create corresponding device
 * @driver: platform driver structure
 * @probe: the driver probe routine, probably from an __init section
 * @res: set of resources that needs to be allocated for the device
 * @n_res: number of resources
 * @data: platform specific data for this platform device
 * @size: size of platform specific data
 * @module: module which will be the owner of the driver
 *
 * Use this in legacy-style modules that probe hardware directly and
 * register a single platform device and corresponding platform driver.
 *
 * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
 */
struct platform_device * __init_or_module __platform_create_bundle(
                        struct platform_driver *driver,
                        int (*probe)(struct platform_device *),
                        struct resource *res, unsigned int n_res,
                        const void *data, size_t size, struct module *module)
{
        struct platform_device *pdev;
        int error;

        pdev = platform_device_alloc(driver->driver.name, -1);
        if (!pdev) {
                error = -ENOMEM;
                goto err_out;
        }

        error = platform_device_add_resources(pdev, res, n_res);
        if (error)
                goto err_pdev_put;

        error = platform_device_add_data(pdev, data, size);
        if (error)
                goto err_pdev_put;

        error = platform_device_add(pdev);
        if (error)
                goto err_pdev_put;

        error = __platform_driver_probe(driver, probe, module);
        if (error)
                goto err_pdev_del;

        return pdev;

err_pdev_del:
        platform_device_del(pdev);
err_pdev_put:
        platform_device_put(pdev);
err_out:
        return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(__platform_create_bundle);

/**
 * __platform_register_drivers - register an array of platform drivers
 * @drivers: an array of drivers to register
 * @count: the number of drivers to register
 * @owner: module owning the drivers
 *
 * Registers platform drivers specified by an array. On failure to register a
 * driver, all previously registered drivers will be unregistered. Callers of
 * this API should use platform_unregister_drivers() to unregister drivers in
 * the reverse order.
 *
 * Returns: 0 on success or a negative error code on failure.
 */
int __platform_register_drivers(struct platform_driver * const *drivers,
                                unsigned int count, struct module *owner)
{
        unsigned int i;
        int err;

        for (i = 0; i < count; i++) {
                pr_debug("registering platform driver %ps\n", drivers[i]);

                err = __platform_driver_register(drivers[i], owner);
                if (err < 0) {
                        pr_err("failed to register platform driver %ps: %d\n",
                               drivers[i], err);
                        goto error;
                }
        }

        return 0;

error:
        while (i--) {
                pr_debug("unregistering platform driver %ps\n", drivers[i]);
                platform_driver_unregister(drivers[i]);
        }

        return err;
}
EXPORT_SYMBOL_GPL(__platform_register_drivers);

/**
 * platform_unregister_drivers - unregister an array of platform drivers
 * @drivers: an array of drivers to unregister
 * @count: the number of drivers to unregister
 *
 * Unregisters platform drivers specified by an array. This is typically used
 * to complement an earlier call to platform_register_drivers(). Drivers are
 * unregistered in the reverse order in which they were registered.
 */
void platform_unregister_drivers(struct platform_driver * const *drivers,
                                 unsigned int count)
{
        while (count--) {
                pr_debug("unregistering platform driver %ps\n", drivers[count]);
                platform_driver_unregister(drivers[count]);
        }
}
EXPORT_SYMBOL_GPL(platform_unregister_drivers);

static const struct platform_device_id *platform_match_id(
                        const struct platform_device_id *id,
                        struct platform_device *pdev)
{
        while (id->name[0]) {
                if (strcmp(pdev->name, id->name) == 0) {
                        pdev->id_entry = id;
                        return id;
                }
                id++;
        }
        return NULL;
}

#ifdef CONFIG_PM_SLEEP

static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
{
        struct platform_driver *pdrv = to_platform_driver(dev->driver);
        struct platform_device *pdev = to_platform_device(dev);
        int ret = 0;

        if (dev->driver && pdrv->suspend)
                ret = pdrv->suspend(pdev, mesg);

        return ret;
}

static int platform_legacy_resume(struct device *dev)
{
        struct platform_driver *pdrv = to_platform_driver(dev->driver);
        struct platform_device *pdev = to_platform_device(dev);
        int ret = 0;

        if (dev->driver && pdrv->resume)
                ret = pdrv->resume(pdev);

        return ret;
}

#endif /* CONFIG_PM_SLEEP */

#ifdef CONFIG_SUSPEND

int platform_pm_suspend(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->suspend)
                        ret = drv->pm->suspend(dev);
        } else {
                ret = platform_legacy_suspend(dev, PMSG_SUSPEND);
        }

        return ret;
}

int platform_pm_resume(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->resume)
                        ret = drv->pm->resume(dev);
        } else {
                ret = platform_legacy_resume(dev);
        }

        return ret;
}

#endif /* CONFIG_SUSPEND */

#ifdef CONFIG_HIBERNATE_CALLBACKS

int platform_pm_freeze(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->freeze)
                        ret = drv->pm->freeze(dev);
        } else {
                ret = platform_legacy_suspend(dev, PMSG_FREEZE);
        }

        return ret;
}

int platform_pm_thaw(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->thaw)
                        ret = drv->pm->thaw(dev);
        } else {
                ret = platform_legacy_resume(dev);
        }

        return ret;
}

int platform_pm_poweroff(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->poweroff)
                        ret = drv->pm->poweroff(dev);
        } else {
                ret = platform_legacy_suspend(dev, PMSG_HIBERNATE);
        }

        return ret;
}

int platform_pm_restore(struct device *dev)
{
        struct device_driver *drv = dev->driver;
        int ret = 0;

        if (!drv)
                return 0;

        if (drv->pm) {
                if (drv->pm->restore)
                        ret = drv->pm->restore(dev);
        } else {
                ret = platform_legacy_resume(dev);
        }

        return ret;
}

#endif /* CONFIG_HIBERNATE_CALLBACKS */

/* modalias support enables more hands-off userspace setup:
 * (a) environment variable lets new-style hotplug events work once system is
 *     fully running:  "modprobe $MODALIAS"
 * (b) sysfs attribute lets new-style coldplug recover from hotplug events
 *     mishandled before system is fully running:  "modprobe $(cat modalias)"
 */
static ssize_t modalias_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
{
        struct platform_device *pdev = to_platform_device(dev);
        int len;

        len = of_device_modalias(dev, buf, PAGE_SIZE);
        if (len != -ENODEV)
                return len;

        len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1);
        if (len != -ENODEV)
                return len;

        return sysfs_emit(buf, "platform:%s\n", pdev->name);
}
static DEVICE_ATTR_RO(modalias);

static ssize_t numa_node_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
{
        return sysfs_emit(buf, "%d\n", dev_to_node(dev));
}
static DEVICE_ATTR_RO(numa_node);

static ssize_t driver_override_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
{
        struct platform_device *pdev = to_platform_device(dev);
        ssize_t len;

        device_lock(dev);
        len = sysfs_emit(buf, "%s\n", pdev->driver_override);
        device_unlock(dev);

        return len;
}

static ssize_t driver_override_store(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
{
        struct platform_device *pdev = to_platform_device(dev);
        int ret;

        ret = driver_set_override(dev, &pdev->driver_override, buf, count);
        if (ret)
                return ret;

        return count;
}
static DEVICE_ATTR_RW(driver_override);

static struct attribute *platform_dev_attrs[] = {
        &dev_attr_modalias.attr,
        &dev_attr_numa_node.attr,
        &dev_attr_driver_override.attr,
        NULL,
};

static umode_t platform_dev_attrs_visible(struct kobject *kobj, struct attribute *a,
                int n)
{
        struct device *dev = container_of(kobj, typeof(*dev), kobj);

        if (a == &dev_attr_numa_node.attr &&
                        dev_to_node(dev) == NUMA_NO_NODE)
                return 0;

        return a->mode;
}

static const struct attribute_group platform_dev_group = {
        .attrs = platform_dev_attrs,
        .is_visible = platform_dev_attrs_visible,
};
__ATTRIBUTE_GROUPS(platform_dev);


/**
 * platform_match - bind platform device to platform driver.
 * @dev: device.
 * @drv: driver.
 *
 * Platform device IDs are assumed to be encoded like this:
 * "<name><instance>", where <name> is a short description of the type of
 * device, like "pci" or "floppy", and <instance> is the enumerated
 * instance of the device, like '0' or '42'.  Driver IDs are simply
 * "<name>".  So, extract the <name> from the platform_device structure,
 * and compare it against the name of the driver. Return whether they match
 * or not.
 */
static int platform_match(struct device *dev, struct device_driver *drv)
{
        struct platform_device *pdev = to_platform_device(dev);
        struct platform_driver *pdrv = to_platform_driver(drv);

        /* When driver_override is set, only bind to the matching driver */
        if (pdev->driver_override)
                return !strcmp(pdev->driver_override, drv->name);

        /* Attempt an OF style match first */
        if (of_driver_match_device(dev, drv))
                return 1;

        /* Then try ACPI style match */
        if (acpi_driver_match_device(dev, drv))
                return 1;

        /* Then try to match against the id table */
        if (pdrv->id_table)
                return platform_match_id(pdrv->id_table, pdev) != NULL;

        /* fall-back to driver name match */
        return (strcmp(pdev->name, drv->name) == 0);
}

static int platform_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const struct platform_device *pdev = to_platform_device(dev);
        int rc;

        /* Some devices have extra OF data and an OF-style MODALIAS */
        rc = of_device_uevent_modalias(dev, env);
        if (rc != -ENODEV)
                return rc;

        rc = acpi_device_uevent_modalias(dev, env);
        if (rc != -ENODEV)
                return rc;

        add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX,
                        pdev->name);
        return 0;
}

static int platform_probe(struct device *_dev)
{
        struct platform_driver *drv = to_platform_driver(_dev->driver);
        struct platform_device *dev = to_platform_device(_dev);
        int ret;

        /*
         * A driver registered using platform_driver_probe() cannot be bound
         * again later because the probe function usually lives in __init code
         * and so is gone. For these drivers .probe is set to
         * platform_probe_fail in __platform_driver_probe(). Don't even prepare
         * clocks and PM domains for these to match the traditional behaviour.
         */
        if (unlikely(drv->probe == platform_probe_fail))
                return -ENXIO;

        ret = of_clk_set_defaults(_dev->of_node, false);
        if (ret < 0)
                return ret;

        ret = dev_pm_domain_attach(_dev, true);
        if (ret)
                goto out;

        if (drv->probe) {
                ret = drv->probe(dev);
                if (ret)
                        dev_pm_domain_detach(_dev, true);
        }

out:
        if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
                dev_warn(_dev, "probe deferral not supported\n");
                ret = -ENXIO;
        }

        return ret;
}

static void platform_remove(struct device *_dev)
{
        struct platform_driver *drv = to_platform_driver(_dev->driver);
        struct platform_device *dev = to_platform_device(_dev);

        if (drv->remove_new) {
                drv->remove_new(dev);
        } else if (drv->remove) {
                int ret = drv->remove(dev);

                if (ret)
                        dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n");
        }
        dev_pm_domain_detach(_dev, true);
}

static void platform_shutdown(struct device *_dev)
{
        struct platform_device *dev = to_platform_device(_dev);
        struct platform_driver *drv;

        if (!_dev->driver)
                return;

        drv = to_platform_driver(_dev->driver);
        if (drv->shutdown)
                drv->shutdown(dev);
}

static int platform_dma_configure(struct device *dev)
{
        struct platform_driver *drv = to_platform_driver(dev->driver);
        struct fwnode_handle *fwnode = dev_fwnode(dev);
        enum dev_dma_attr attr;
        int ret = 0;

        if (is_of_node(fwnode)) {
                ret = of_dma_configure(dev, to_of_node(fwnode), true);
        } else if (is_acpi_device_node(fwnode)) {
                attr = acpi_get_dma_attr(to_acpi_device_node(fwnode));
                ret = acpi_dma_configure(dev, attr);
        }
        if (ret || drv->driver_managed_dma)
                return ret;

        ret = iommu_device_use_default_domain(dev);
        if (ret)
                arch_teardown_dma_ops(dev);

        return ret;
}

static void platform_dma_cleanup(struct device *dev)
{
        struct platform_driver *drv = to_platform_driver(dev->driver);

        if (!drv->driver_managed_dma)
                iommu_device_unuse_default_domain(dev);
}

static const struct dev_pm_ops platform_dev_pm_ops = {
        SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL)
        USE_PLATFORM_PM_SLEEP_OPS
};

struct bus_type platform_bus_type = {
        .name                = "platform",
        .dev_groups        = platform_dev_groups,
        .match                = platform_match,
        .uevent                = platform_uevent,
        .probe                = platform_probe,
        .remove                = platform_remove,
        .shutdown        = platform_shutdown,
        .dma_configure        = platform_dma_configure,
        .dma_cleanup        = platform_dma_cleanup,
        .pm                = &platform_dev_pm_ops,
};
EXPORT_SYMBOL_GPL(platform_bus_type);

static inline int __platform_match(struct device *dev, const void *drv)
{
        return platform_match(dev, (struct device_driver *)drv);
}

/**
 * platform_find_device_by_driver - Find a platform device with a given
 * driver.
 * @start: The device to start the search from.
 * @drv: The device driver to look for.
 */
struct device *platform_find_device_by_driver(struct device *start,
                                              const struct device_driver *drv)
{
        return bus_find_device(&platform_bus_type, start, drv,
                               __platform_match);
}
EXPORT_SYMBOL_GPL(platform_find_device_by_driver);

void __weak __init early_platform_cleanup(void) { }

int __init platform_bus_init(void)
{
        int error;

        early_platform_cleanup();

        error = device_register(&platform_bus);
        if (error) {
                put_device(&platform_bus);
                return error;
        }
        error =  bus_register(&platform_bus_type);
        if (error)
                device_unregister(&platform_bus);

        return error;
}

























































































































































































































































    1 



















































    1 























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
// SPDX-License-Identifier: GPL-2.0-or-later
/* A driver for the D-Link DSB-R100 USB radio and Gemtek USB Radio 21.
 * The device plugs into both the USB and an analog audio input, so this thing
 * only deals with initialisation and frequency setting, the
 * audio data has to be handled by a sound driver.
 *
 * Major issue: I can't find out where the device reports the signal
 * strength, and indeed the windows software appearantly just looks
 * at the stereo indicator as well.  So, scanning will only find
 * stereo stations.  Sad, but I can't help it.
 *
 * Also, the windows program sends oodles of messages over to the
 * device, and I couldn't figure out their meaning.  My suspicion
 * is that they don't have any:-)
 *
 * You might find some interesting stuff about this module at
 * http://unimut.fsk.uni-heidelberg.de/unimut/demi/dsbr
 *
 * Fully tested with the Keene USB FM Transmitter and the v4l2-compliance tool.
 *
 * Copyright (c) 2000 Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/input.h>
#include <linux/videodev2.h>
#include <linux/usb.h>
#include <media/v4l2-device.h>
#include <media/v4l2-ioctl.h>
#include <media/v4l2-ctrls.h>
#include <media/v4l2-event.h>

/*
 * Version Information
 */
MODULE_AUTHOR("Markus Demleitner <msdemlei@tucana.harvard.edu>");
MODULE_DESCRIPTION("D-Link DSB-R100 USB FM radio driver");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.1.0");

#define DSB100_VENDOR 0x04b4
#define DSB100_PRODUCT 0x1002

/* Commands the device appears to understand */
#define DSB100_TUNE 1
#define DSB100_ONOFF 2

#define TB_LEN 16

/* Frequency limits in MHz -- these are European values.  For Japanese
devices, that would be 76 and 91.  */
#define FREQ_MIN  87.5
#define FREQ_MAX 108.0
#define FREQ_MUL 16000

#define v4l2_dev_to_radio(d) container_of(d, struct dsbr100_device, v4l2_dev)

static int radio_nr = -1;
module_param(radio_nr, int, 0);

/* Data for one (physical) device */
struct dsbr100_device {
        struct usb_device *usbdev;
        struct video_device videodev;
        struct v4l2_device v4l2_dev;
        struct v4l2_ctrl_handler hdl;

        u8 *transfer_buffer;
        struct mutex v4l2_lock;
        int curfreq;
        bool stereo;
        bool muted;
};

/* Low-level device interface begins here */

/* set a frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */
static int dsbr100_setfreq(struct dsbr100_device *radio, unsigned freq)
{
        unsigned f = (freq / 16 * 80) / 1000 + 856;
        int retval = 0;

        if (!radio->muted) {
                retval = usb_control_msg(radio->usbdev,
                                usb_rcvctrlpipe(radio->usbdev, 0),
                                DSB100_TUNE,
                                USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                                (f >> 8) & 0x00ff, f & 0xff,
                                radio->transfer_buffer, 8, 300);
                if (retval >= 0)
                        mdelay(1);
        }

        if (retval >= 0) {
                radio->curfreq = freq;
                return 0;
        }
        dev_err(&radio->usbdev->dev,
                "%s - usb_control_msg returned %i, request %i\n",
                        __func__, retval, DSB100_TUNE);
        return retval;
}

/* switch on radio */
static int dsbr100_start(struct dsbr100_device *radio)
{
        int retval = usb_control_msg(radio->usbdev,
                usb_rcvctrlpipe(radio->usbdev, 0),
                DSB100_ONOFF,
                USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                0x01, 0x00, radio->transfer_buffer, 8, 300);

        if (retval >= 0)
                return dsbr100_setfreq(radio, radio->curfreq);
        dev_err(&radio->usbdev->dev,
                "%s - usb_control_msg returned %i, request %i\n",
                        __func__, retval, DSB100_ONOFF);
        return retval;

}

/* switch off radio */
static int dsbr100_stop(struct dsbr100_device *radio)
{
        int retval = usb_control_msg(radio->usbdev,
                usb_rcvctrlpipe(radio->usbdev, 0),
                DSB100_ONOFF,
                USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                0x00, 0x00, radio->transfer_buffer, 8, 300);

        if (retval >= 0)
                return 0;
        dev_err(&radio->usbdev->dev,
                "%s - usb_control_msg returned %i, request %i\n",
                        __func__, retval, DSB100_ONOFF);
        return retval;

}

/* return the device status.  This is, in effect, just whether it
sees a stereo signal or not.  Pity. */
static void dsbr100_getstat(struct dsbr100_device *radio)
{
        int retval = usb_control_msg(radio->usbdev,
                usb_rcvctrlpipe(radio->usbdev, 0),
                USB_REQ_GET_STATUS,
                USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                0x00, 0x24, radio->transfer_buffer, 8, 300);

        if (retval < 0) {
                radio->stereo = false;
                dev_err(&radio->usbdev->dev,
                        "%s - usb_control_msg returned %i, request %i\n",
                                __func__, retval, USB_REQ_GET_STATUS);
        } else {
                radio->stereo = !(radio->transfer_buffer[0] & 0x01);
        }
}

static int vidioc_querycap(struct file *file, void *priv,
                                        struct v4l2_capability *v)
{
        struct dsbr100_device *radio = video_drvdata(file);

        strscpy(v->driver, "dsbr100", sizeof(v->driver));
        strscpy(v->card, "D-Link R-100 USB FM Radio", sizeof(v->card));
        usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info));
        return 0;
}

static int vidioc_g_tuner(struct file *file, void *priv,
                                struct v4l2_tuner *v)
{
        struct dsbr100_device *radio = video_drvdata(file);

        if (v->index > 0)
                return -EINVAL;

        dsbr100_getstat(radio);
        strscpy(v->name, "FM", sizeof(v->name));
        v->type = V4L2_TUNER_RADIO;
        v->rangelow = FREQ_MIN * FREQ_MUL;
        v->rangehigh = FREQ_MAX * FREQ_MUL;
        v->rxsubchans = radio->stereo ? V4L2_TUNER_SUB_STEREO :
                V4L2_TUNER_SUB_MONO;
        v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO;
        v->audmode = V4L2_TUNER_MODE_STEREO;
        v->signal = radio->stereo ? 0xffff : 0;     /* We can't get the signal strength */
        return 0;
}

static int vidioc_s_tuner(struct file *file, void *priv,
                                const struct v4l2_tuner *v)
{
        return v->index ? -EINVAL : 0;
}

static int vidioc_s_frequency(struct file *file, void *priv,
                                const struct v4l2_frequency *f)
{
        struct dsbr100_device *radio = video_drvdata(file);

        if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO)
                return -EINVAL;

        return dsbr100_setfreq(radio, clamp_t(unsigned, f->frequency,
                        FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL));
}

static int vidioc_g_frequency(struct file *file, void *priv,
                                struct v4l2_frequency *f)
{
        struct dsbr100_device *radio = video_drvdata(file);

        if (f->tuner)
                return -EINVAL;
        f->type = V4L2_TUNER_RADIO;
        f->frequency = radio->curfreq;
        return 0;
}

static int usb_dsbr100_s_ctrl(struct v4l2_ctrl *ctrl)
{
        struct dsbr100_device *radio =
                container_of(ctrl->handler, struct dsbr100_device, hdl);

        switch (ctrl->id) {
        case V4L2_CID_AUDIO_MUTE:
                radio->muted = ctrl->val;
                return radio->muted ? dsbr100_stop(radio) : dsbr100_start(radio);
        }
        return -EINVAL;
}


/* USB subsystem interface begins here */

/*
 * Handle unplugging of the device.
 * We call video_unregister_device in any case.
 * The last function called in this procedure is
 * usb_dsbr100_video_device_release
 */
static void usb_dsbr100_disconnect(struct usb_interface *intf)
{
        struct dsbr100_device *radio = usb_get_intfdata(intf);

        mutex_lock(&radio->v4l2_lock);
        /*
         * Disconnect is also called on unload, and in that case we need to
         * mute the device. This call will silently fail if it is called
         * after a physical disconnect.
         */
        usb_control_msg(radio->usbdev,
                usb_rcvctrlpipe(radio->usbdev, 0),
                DSB100_ONOFF,
                USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
                0x00, 0x00, radio->transfer_buffer, 8, 300);
        usb_set_intfdata(intf, NULL);
        video_unregister_device(&radio->videodev);
        v4l2_device_disconnect(&radio->v4l2_dev);
        mutex_unlock(&radio->v4l2_lock);
        v4l2_device_put(&radio->v4l2_dev);
}


/* Suspend device - stop device. */
static int usb_dsbr100_suspend(struct usb_interface *intf, pm_message_t message)
{
        struct dsbr100_device *radio = usb_get_intfdata(intf);

        mutex_lock(&radio->v4l2_lock);
        if (!radio->muted && dsbr100_stop(radio) < 0)
                dev_warn(&intf->dev, "dsbr100_stop failed\n");
        mutex_unlock(&radio->v4l2_lock);

        dev_info(&intf->dev, "going into suspend..\n");
        return 0;
}

/* Resume device - start device. */
static int usb_dsbr100_resume(struct usb_interface *intf)
{
        struct dsbr100_device *radio = usb_get_intfdata(intf);

        mutex_lock(&radio->v4l2_lock);
        if (!radio->muted && dsbr100_start(radio) < 0)
                dev_warn(&intf->dev, "dsbr100_start failed\n");
        mutex_unlock(&radio->v4l2_lock);

        dev_info(&intf->dev, "coming out of suspend..\n");
        return 0;
}

/* free data structures */
static void usb_dsbr100_release(struct v4l2_device *v4l2_dev)
{
        struct dsbr100_device *radio = v4l2_dev_to_radio(v4l2_dev);

        v4l2_ctrl_handler_free(&radio->hdl);
        v4l2_device_unregister(&radio->v4l2_dev);
        kfree(radio->transfer_buffer);
        kfree(radio);
}

static const struct v4l2_ctrl_ops usb_dsbr100_ctrl_ops = {
        .s_ctrl = usb_dsbr100_s_ctrl,
};

/* File system interface */
static const struct v4l2_file_operations usb_dsbr100_fops = {
        .owner                = THIS_MODULE,
        .unlocked_ioctl        = video_ioctl2,
        .open           = v4l2_fh_open,
        .release        = v4l2_fh_release,
        .poll                = v4l2_ctrl_poll,
};

static const struct v4l2_ioctl_ops usb_dsbr100_ioctl_ops = {
        .vidioc_querycap    = vidioc_querycap,
        .vidioc_g_tuner     = vidioc_g_tuner,
        .vidioc_s_tuner     = vidioc_s_tuner,
        .vidioc_g_frequency = vidioc_g_frequency,
        .vidioc_s_frequency = vidioc_s_frequency,
        .vidioc_log_status  = v4l2_ctrl_log_status,
        .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
};

/* check if the device is present and register with v4l and usb if it is */
static int usb_dsbr100_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
{
        struct dsbr100_device *radio;
        struct v4l2_device *v4l2_dev;
        int retval;

        radio = kzalloc(sizeof(struct dsbr100_device), GFP_KERNEL);

        if (!radio)
                return -ENOMEM;

        radio->transfer_buffer = kmalloc(TB_LEN, GFP_KERNEL);

        if (!(radio->transfer_buffer)) {
                kfree(radio);
                return -ENOMEM;
        }

        v4l2_dev = &radio->v4l2_dev;
        v4l2_dev->release = usb_dsbr100_release;

        retval = v4l2_device_register(&intf->dev, v4l2_dev);
        if (retval < 0) {
                v4l2_err(v4l2_dev, "couldn't register v4l2_device\n");
                goto err_reg_dev;
        }

        v4l2_ctrl_handler_init(&radio->hdl, 1);
        v4l2_ctrl_new_std(&radio->hdl, &usb_dsbr100_ctrl_ops,
                          V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1);
        if (radio->hdl.error) {
                retval = radio->hdl.error;
                v4l2_err(v4l2_dev, "couldn't register control\n");
                goto err_reg_ctrl;
        }
        mutex_init(&radio->v4l2_lock);
        strscpy(radio->videodev.name, v4l2_dev->name,
                sizeof(radio->videodev.name));
        radio->videodev.v4l2_dev = v4l2_dev;
        radio->videodev.fops = &usb_dsbr100_fops;
        radio->videodev.ioctl_ops = &usb_dsbr100_ioctl_ops;
        radio->videodev.release = video_device_release_empty;
        radio->videodev.lock = &radio->v4l2_lock;
        radio->videodev.ctrl_handler = &radio->hdl;
        radio->videodev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_TUNER;

        radio->usbdev = interface_to_usbdev(intf);
        radio->curfreq = FREQ_MIN * FREQ_MUL;
        radio->muted = true;

        video_set_drvdata(&radio->videodev, radio);
        usb_set_intfdata(intf, radio);

        retval = video_register_device(&radio->videodev, VFL_TYPE_RADIO, radio_nr);
        if (retval == 0)
                return 0;
        v4l2_err(v4l2_dev, "couldn't register video device\n");

err_reg_ctrl:
        v4l2_ctrl_handler_free(&radio->hdl);
        v4l2_device_unregister(v4l2_dev);
err_reg_dev:
        kfree(radio->transfer_buffer);
        kfree(radio);
        return retval;
}

static const struct usb_device_id usb_dsbr100_device_table[] = {
        { USB_DEVICE(DSB100_VENDOR, DSB100_PRODUCT) },
        { }                                                /* Terminating entry */
};

MODULE_DEVICE_TABLE(usb, usb_dsbr100_device_table);

/* USB subsystem interface */
static struct usb_driver usb_dsbr100_driver = {
        .name                        = "dsbr100",
        .probe                        = usb_dsbr100_probe,
        .disconnect                = usb_dsbr100_disconnect,
        .id_table                = usb_dsbr100_device_table,
        .suspend                = usb_dsbr100_suspend,
        .resume                        = usb_dsbr100_resume,
        .reset_resume                = usb_dsbr100_resume,
};

module_usb_driver(usb_dsbr100_driver);






































































































































































































































































































































  228 























  228 

















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_MM_H
#define _LINUX_SCHED_MM_H

#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/gfp.h>
#include <linux/sync_core.h>

/*
 * Routines for handling mm_structs
 */
extern struct mm_struct *mm_alloc(void);

/**
 * mmgrab() - Pin a &struct mm_struct.
 * @mm: The &struct mm_struct to pin.
 *
 * Make sure that @mm will not get freed even after the owning task
 * exits. This doesn't guarantee that the associated address space
 * will still exist later on and mmget_not_zero() has to be used before
 * accessing it.
 *
 * This is a preferred way to pin @mm for a longer/unbounded amount
 * of time.
 *
 * Use mmdrop() to release the reference acquired by mmgrab().
 *
 * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
 * of &mm_struct.mm_count vs &mm_struct.mm_users.
 */
static inline void mmgrab(struct mm_struct *mm)
{
        atomic_inc(&mm->mm_count);
}

static inline void smp_mb__after_mmgrab(void)
{
        smp_mb__after_atomic();
}

extern void __mmdrop(struct mm_struct *mm);

static inline void mmdrop(struct mm_struct *mm)
{
        /*
         * The implicit full barrier implied by atomic_dec_and_test() is
         * required by the membarrier system call before returning to
         * user-space, after storing to rq->curr.
         */
        if (unlikely(atomic_dec_and_test(&mm->mm_count)))
                __mmdrop(mm);
}

#ifdef CONFIG_PREEMPT_RT
/*
 * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
 * by far the least expensive way to do that.
 */
static inline void __mmdrop_delayed(struct rcu_head *rhp)
{
        struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);

        __mmdrop(mm);
}

/*
 * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
 * kernels via RCU.
 */
static inline void mmdrop_sched(struct mm_struct *mm)
{
        /* Provides a full memory barrier. See mmdrop() */
        if (atomic_dec_and_test(&mm->mm_count))
                call_rcu(&mm->delayed_drop, __mmdrop_delayed);
}
#else
static inline void mmdrop_sched(struct mm_struct *mm)
{
        mmdrop(mm);
}
#endif

/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
        if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
                mmgrab(mm);
}

static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
        if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
                mmdrop(mm);
        } else {
                /*
                 * mmdrop_lazy_tlb must provide a full memory barrier, see the
                 * membarrier comment finish_task_switch which relies on this.
                 */
                smp_mb();
        }
}

static inline void mmdrop_lazy_tlb_sched(struct mm_struct *mm)
{
        if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
                mmdrop_sched(mm);
        else
                smp_mb(); /* see mmdrop_lazy_tlb() above */
}

/**
 * mmget() - Pin the address space associated with a &struct mm_struct.
 * @mm: The address space to pin.
 *
 * Make sure that the address space of the given &struct mm_struct doesn't
 * go away. This does not protect against parts of the address space being
 * modified or freed, however.
 *
 * Never use this function to pin this address space for an
 * unbounded/indefinite amount of time.
 *
 * Use mmput() to release the reference acquired by mmget().
 *
 * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
 * of &mm_struct.mm_count vs &mm_struct.mm_users.
 */
static inline void mmget(struct mm_struct *mm)
{
        atomic_inc(&mm->mm_users);
}

static inline bool mmget_not_zero(struct mm_struct *mm)
{
        return atomic_inc_not_zero(&mm->mm_users);
}

/* mmput gets rid of the mappings and all user-space */
extern void mmput(struct mm_struct *);
#ifdef CONFIG_MMU
/* same as above but performs the slow path from the async context. Can
 * be called from the atomic context as well
 */
void mmput_async(struct mm_struct *);
#endif

/* Grab a reference to a task's mm, if it is not already going away */
extern struct mm_struct *get_task_mm(struct task_struct *task);
/*
 * Grab a reference to a task's mm, if it is not already going away
 * and ptrace_may_access with the mode parameter passed to it
 * succeeds.
 */
extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
/* Remove the current tasks stale references to the old mm_struct on exit() */
extern void exit_mm_release(struct task_struct *, struct mm_struct *);
/* Remove the current tasks stale references to the old mm_struct on exec() */
extern void exec_mm_release(struct task_struct *, struct mm_struct *);

#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
#else
static inline void mm_update_next_owner(struct mm_struct *mm)
{
}
#endif /* CONFIG_MEMCG */

#ifdef CONFIG_MMU
#ifndef arch_get_mmap_end
#define arch_get_mmap_end(addr, len, flags)        (TASK_SIZE)
#endif

#ifndef arch_get_mmap_base
#define arch_get_mmap_base(addr, base) (base)
#endif

extern void arch_pick_mmap_layout(struct mm_struct *mm,
                                  struct rlimit *rlim_stack);
extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
                       unsigned long, unsigned long);
extern unsigned long
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                          unsigned long len, unsigned long pgoff,
                          unsigned long flags);

unsigned long
generic_get_unmapped_area(struct file *filp, unsigned long addr,
                          unsigned long len, unsigned long pgoff,
                          unsigned long flags);
unsigned long
generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                                  unsigned long len, unsigned long pgoff,
                                  unsigned long flags);
#else
static inline void arch_pick_mmap_layout(struct mm_struct *mm,
                                         struct rlimit *rlim_stack) {}
#endif

static inline bool in_vfork(struct task_struct *tsk)
{
        bool ret;

        /*
         * need RCU to access ->real_parent if CLONE_VM was used along with
         * CLONE_PARENT.
         *
         * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
         * imply CLONE_VM
         *
         * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
         * ->real_parent is not necessarily the task doing vfork(), so in
         * theory we can't rely on task_lock() if we want to dereference it.
         *
         * And in this case we can't trust the real_parent->mm == tsk->mm
         * check, it can be false negative. But we do not care, if init or
         * another oom-unkillable task does this it should blame itself.
         */
        rcu_read_lock();
        ret = tsk->vfork_done &&
                        rcu_dereference(tsk->real_parent)->mm == tsk->mm;
        rcu_read_unlock();

        return ret;
}

/*
 * Applies per-task gfp context to the given allocation flags.
 * PF_MEMALLOC_NOIO implies GFP_NOIO
 * PF_MEMALLOC_NOFS implies GFP_NOFS
 * PF_MEMALLOC_PIN  implies !GFP_MOVABLE
 */
static inline gfp_t current_gfp_context(gfp_t flags)
{
        unsigned int pflags = READ_ONCE(current->flags);

        if (unlikely(pflags & (PF_MEMALLOC_NOIO |
                               PF_MEMALLOC_NOFS |
                               PF_MEMALLOC_NORECLAIM |
                               PF_MEMALLOC_NOWARN |
                               PF_MEMALLOC_PIN))) {
                /*
                 * Stronger flags before weaker flags:
                 * NORECLAIM implies NOIO, which in turn implies NOFS
                 */
                if (pflags & PF_MEMALLOC_NORECLAIM)
                        flags &= ~__GFP_DIRECT_RECLAIM;
                else if (pflags & PF_MEMALLOC_NOIO)
                        flags &= ~(__GFP_IO | __GFP_FS);
                else if (pflags & PF_MEMALLOC_NOFS)
                        flags &= ~__GFP_FS;

                if (pflags & PF_MEMALLOC_NOWARN)
                        flags |= __GFP_NOWARN;

                if (pflags & PF_MEMALLOC_PIN)
                        flags &= ~__GFP_MOVABLE;
        }
        return flags;
}

#ifdef CONFIG_LOCKDEP
extern void __fs_reclaim_acquire(unsigned long ip);
extern void __fs_reclaim_release(unsigned long ip);
extern void fs_reclaim_acquire(gfp_t gfp_mask);
extern void fs_reclaim_release(gfp_t gfp_mask);
#else
static inline void __fs_reclaim_acquire(unsigned long ip) { }
static inline void __fs_reclaim_release(unsigned long ip) { }
static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif

/* Any memory-allocation retry loop should use
 * memalloc_retry_wait(), and pass the flags for the most
 * constrained allocation attempt that might have failed.
 * This provides useful documentation of where loops are,
 * and a central place to fine tune the waiting as the MM
 * implementation changes.
 */
static inline void memalloc_retry_wait(gfp_t gfp_flags)
{
        /* We use io_schedule_timeout because waiting for memory
         * typically included waiting for dirty pages to be
         * written out, which requires IO.
         */
        __set_current_state(TASK_UNINTERRUPTIBLE);
        gfp_flags = current_gfp_context(gfp_flags);
        if (gfpflags_allow_blocking(gfp_flags) &&
            !(gfp_flags & __GFP_NORETRY))
                /* Probably waited already, no need for much more */
                io_schedule_timeout(1);
        else
                /* Probably didn't wait, and has now released a lock,
                 * so now is a good time to wait
                 */
                io_schedule_timeout(HZ/50);
}

/**
 * might_alloc - Mark possible allocation sites
 * @gfp_mask: gfp_t flags that would be used to allocate
 *
 * Similar to might_sleep() and other annotations, this can be used in functions
 * that might allocate, but often don't. Compiles to nothing without
 * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking.
 */
static inline void might_alloc(gfp_t gfp_mask)
{
        fs_reclaim_acquire(gfp_mask);
        fs_reclaim_release(gfp_mask);

        might_sleep_if(gfpflags_allow_blocking(gfp_mask));
}

/**
 * memalloc_flags_save - Add a PF_* flag to current->flags, save old value
 *
 * This allows PF_* flags to be conveniently added, irrespective of current
 * value, and then the old version restored with memalloc_flags_restore().
 */
static inline unsigned memalloc_flags_save(unsigned flags)
{
        unsigned oldflags = ~current->flags & flags;
        current->flags |= flags;
        return oldflags;
}

static inline void memalloc_flags_restore(unsigned flags)
{
        current->flags &= ~flags;
}

/**
 * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
 *
 * This functions marks the beginning of the GFP_NOIO allocation scope.
 * All further allocations will implicitly drop __GFP_IO flag and so
 * they are safe for the IO critical section from the allocation recursion
 * point of view. Use memalloc_noio_restore to end the scope with flags
 * returned by this function.
 *
 * Context: This function is safe to be used from any context.
 * Return: The saved flags to be passed to memalloc_noio_restore.
 */
static inline unsigned int memalloc_noio_save(void)
{
        return memalloc_flags_save(PF_MEMALLOC_NOIO);
}

/**
 * memalloc_noio_restore - Ends the implicit GFP_NOIO scope.
 * @flags: Flags to restore.
 *
 * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function.
 * Always make sure that the given flags is the return value from the
 * pairing memalloc_noio_save call.
 */
static inline void memalloc_noio_restore(unsigned int flags)
{
        memalloc_flags_restore(flags);
}

/**
 * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope.
 *
 * This functions marks the beginning of the GFP_NOFS allocation scope.
 * All further allocations will implicitly drop __GFP_FS flag and so
 * they are safe for the FS critical section from the allocation recursion
 * point of view. Use memalloc_nofs_restore to end the scope with flags
 * returned by this function.
 *
 * Context: This function is safe to be used from any context.
 * Return: The saved flags to be passed to memalloc_nofs_restore.
 */
static inline unsigned int memalloc_nofs_save(void)
{
        return memalloc_flags_save(PF_MEMALLOC_NOFS);
}

/**
 * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope.
 * @flags: Flags to restore.
 *
 * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function.
 * Always make sure that the given flags is the return value from the
 * pairing memalloc_nofs_save call.
 */
static inline void memalloc_nofs_restore(unsigned int flags)
{
        memalloc_flags_restore(flags);
}

/**
 * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope.
 *
 * This function marks the beginning of the __GFP_MEMALLOC allocation scope.
 * All further allocations will implicitly add the __GFP_MEMALLOC flag, which
 * prevents entering reclaim and allows access to all memory reserves. This
 * should only be used when the caller guarantees the allocation will allow more
 * memory to be freed very shortly, i.e. it needs to allocate some memory in
 * the process of freeing memory, and cannot reclaim due to potential recursion.
 *
 * Users of this scope have to be extremely careful to not deplete the reserves
 * completely and implement a throttling mechanism which controls the
 * consumption of the reserve based on the amount of freed memory. Usage of a
 * pre-allocated pool (e.g. mempool) should be always considered before using
 * this scope.
 *
 * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC
 *
 * Context: This function should not be used in an interrupt context as that one
 *          does not give PF_MEMALLOC access to reserves.
 *          See __gfp_pfmemalloc_flags().
 * Return: The saved flags to be passed to memalloc_noreclaim_restore.
 */
static inline unsigned int memalloc_noreclaim_save(void)
{
        return memalloc_flags_save(PF_MEMALLOC);
}

/**
 * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope.
 * @flags: Flags to restore.
 *
 * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save
 * function. Always make sure that the given flags is the return value from the
 * pairing memalloc_noreclaim_save call.
 */
static inline void memalloc_noreclaim_restore(unsigned int flags)
{
        memalloc_flags_restore(flags);
}

/**
 * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope.
 *
 * This function marks the beginning of the ~__GFP_MOVABLE allocation scope.
 * All further allocations will implicitly remove the __GFP_MOVABLE flag, which
 * will constraint the allocations to zones that allow long term pinning, i.e.
 * not ZONE_MOVABLE zones.
 *
 * Return: The saved flags to be passed to memalloc_pin_restore.
 */
static inline unsigned int memalloc_pin_save(void)
{
        return memalloc_flags_save(PF_MEMALLOC_PIN);
}

/**
 * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope.
 * @flags: Flags to restore.
 *
 * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function.
 * Always make sure that the given flags is the return value from the pairing
 * memalloc_pin_save call.
 */
static inline void memalloc_pin_restore(unsigned int flags)
{
        memalloc_flags_restore(flags);
}

#ifdef CONFIG_MEMCG
DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
/**
 * set_active_memcg - Starts the remote memcg charging scope.
 * @memcg: memcg to charge.
 *
 * This function marks the beginning of the remote memcg charging scope. All the
 * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
 * given memcg.
 *
 * Please, make sure that caller has a reference to the passed memcg structure,
 * so its lifetime is guaranteed to exceed the scope between two
 * set_active_memcg() calls.
 *
 * NOTE: This function can nest. Users must save the return value and
 * reset the previous value after their own charging scope is over.
 */
static inline struct mem_cgroup *
set_active_memcg(struct mem_cgroup *memcg)
{
        struct mem_cgroup *old;

        if (!in_task()) {
                old = this_cpu_read(int_active_memcg);
                this_cpu_write(int_active_memcg, memcg);
        } else {
                old = current->active_memcg;
                current->active_memcg = memcg;
        }

        return old;
}
#else
static inline struct mem_cgroup *
set_active_memcg(struct mem_cgroup *memcg)
{
        return NULL;
}
#endif

#ifdef CONFIG_MEMBARRIER
enum {
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY                = (1U << 0),
        MEMBARRIER_STATE_PRIVATE_EXPEDITED                        = (1U << 1),
        MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY                        = (1U << 2),
        MEMBARRIER_STATE_GLOBAL_EXPEDITED                        = (1U << 3),
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY        = (1U << 4),
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE                = (1U << 5),
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY                = (1U << 6),
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ                        = (1U << 7),
};

enum {
        MEMBARRIER_FLAG_SYNC_CORE        = (1U << 0),
        MEMBARRIER_FLAG_RSEQ                = (1U << 1),
};

#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
#include <asm/membarrier.h>
#endif

static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
        if (current->mm != mm)
                return;
        if (likely(!(atomic_read(&mm->membarrier_state) &
                     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
                return;
        sync_core_before_usermode();
}

extern void membarrier_exec_mmap(struct mm_struct *mm);

extern void membarrier_update_current_mm(struct mm_struct *next_mm);

#else
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
                                             struct mm_struct *next,
                                             struct task_struct *tsk)
{
}
#endif
static inline void membarrier_exec_mmap(struct mm_struct *mm)
{
}
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
}
static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
{
}
#endif

#endif /* _LINUX_SCHED_MM_H */






































































































































































































































































  236 


































    4 














    4 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_reserved_mem.h>
#include <linux/dma-direct.h> /* for bus_dma_region */
#include <linux/dma-map-ops.h>
#include <linux/init.h>
#include <linux/mod_devicetable.h>
#include <linux/slab.h>
#include <linux/platform_device.h>

#include <asm/errno.h>
#include "of_private.h"

/**
 * of_match_device - Tell if a struct device matches an of_device_id list
 * @matches: array of of device match structures to search in
 * @dev: the of device structure to match against
 *
 * Used by a driver to check whether an platform_device present in the
 * system is in its list of supported devices.
 */
const struct of_device_id *of_match_device(const struct of_device_id *matches,
                                           const struct device *dev)
{
        if (!matches || !dev->of_node || dev->of_node_reused)
                return NULL;
        return of_match_node(matches, dev->of_node);
}
EXPORT_SYMBOL(of_match_device);

static void
of_dma_set_restricted_buffer(struct device *dev, struct device_node *np)
{
        struct device_node *node, *of_node = dev->of_node;
        int count, i;

        if (!IS_ENABLED(CONFIG_DMA_RESTRICTED_POOL))
                return;

        count = of_property_count_elems_of_size(of_node, "memory-region",
                                                sizeof(u32));
        /*
         * If dev->of_node doesn't exist or doesn't contain memory-region, try
         * the OF node having DMA configuration.
         */
        if (count <= 0) {
                of_node = np;
                count = of_property_count_elems_of_size(
                        of_node, "memory-region", sizeof(u32));
        }

        for (i = 0; i < count; i++) {
                node = of_parse_phandle(of_node, "memory-region", i);
                /*
                 * There might be multiple memory regions, but only one
                 * restricted-dma-pool region is allowed.
                 */
                if (of_device_is_compatible(node, "restricted-dma-pool") &&
                    of_device_is_available(node)) {
                        of_node_put(node);
                        break;
                }
                of_node_put(node);
        }

        /*
         * Attempt to initialize a restricted-dma-pool region if one was found.
         * Note that count can hold a negative error code.
         */
        if (i < count && of_reserved_mem_device_init_by_idx(dev, of_node, i))
                dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n");
}

/**
 * of_dma_configure_id - Setup DMA configuration
 * @dev:        Device to apply DMA configuration
 * @np:                Pointer to OF node having DMA configuration
 * @force_dma:  Whether device is to be set up by of_dma_configure() even if
 *                DMA capability is not explicitly described by firmware.
 * @id:                Optional const pointer value input id
 *
 * Try to get devices's DMA configuration from DT and update it
 * accordingly.
 *
 * If platform code needs to use its own special DMA configuration, it
 * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
 * to fix up DMA configuration.
 */
int of_dma_configure_id(struct device *dev, struct device_node *np,
                        bool force_dma, const u32 *id)
{
        const struct bus_dma_region *map = NULL;
        struct device_node *bus_np;
        u64 dma_start = 0;
        u64 mask, end, size = 0;
        bool coherent;
        int iommu_ret;
        int ret;

        if (np == dev->of_node)
                bus_np = __of_get_dma_parent(np);
        else
                bus_np = of_node_get(np);

        ret = of_dma_get_range(bus_np, &map);
        of_node_put(bus_np);
        if (ret < 0) {
                /*
                 * For legacy reasons, we have to assume some devices need
                 * DMA configuration regardless of whether "dma-ranges" is
                 * correctly specified or not.
                 */
                if (!force_dma)
                        return ret == -ENODEV ? 0 : ret;
        } else {
                const struct bus_dma_region *r = map;
                u64 dma_end = 0;

                /* Determine the overall bounds of all DMA regions */
                for (dma_start = ~0; r->size; r++) {
                        /* Take lower and upper limits */
                        if (r->dma_start < dma_start)
                                dma_start = r->dma_start;
                        if (r->dma_start + r->size > dma_end)
                                dma_end = r->dma_start + r->size;
                }
                size = dma_end - dma_start;

                /*
                 * Add a work around to treat the size as mask + 1 in case
                 * it is defined in DT as a mask.
                 */
                if (size & 1) {
                        dev_warn(dev, "Invalid size 0x%llx for dma-range(s)\n",
                                 size);
                        size = size + 1;
                }

                if (!size) {
                        dev_err(dev, "Adjusted size 0x%llx invalid\n", size);
                        kfree(map);
                        return -EINVAL;
                }
        }

        /*
         * If @dev is expected to be DMA-capable then the bus code that created
         * it should have initialised its dma_mask pointer by this point. For
         * now, we'll continue the legacy behaviour of coercing it to the
         * coherent mask if not, but we'll no longer do so quietly.
         */
        if (!dev->dma_mask) {
                dev_warn(dev, "DMA mask not set\n");
                dev->dma_mask = &dev->coherent_dma_mask;
        }

        if (!size && dev->coherent_dma_mask)
                size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
        else if (!size)
                size = 1ULL << 32;

        /*
         * Limit coherent and dma mask based on size and default mask
         * set by the driver.
         */
        end = dma_start + size - 1;
        mask = DMA_BIT_MASK(ilog2(end) + 1);
        dev->coherent_dma_mask &= mask;
        *dev->dma_mask &= mask;
        /* ...but only set bus limit and range map if we found valid dma-ranges earlier */
        if (!ret) {
                dev->bus_dma_limit = end;
                dev->dma_range_map = map;
        }

        coherent = of_dma_is_coherent(np);
        dev_dbg(dev, "device is%sdma coherent\n",
                coherent ? " " : " not ");

        iommu_ret = of_iommu_configure(dev, np, id);
        if (iommu_ret == -EPROBE_DEFER) {
                /* Don't touch range map if it wasn't set from a valid dma-ranges */
                if (!ret)
                        dev->dma_range_map = NULL;
                kfree(map);
                return -EPROBE_DEFER;
        } else if (iommu_ret == -ENODEV) {
                dev_dbg(dev, "device is not behind an iommu\n");
        } else if (iommu_ret) {
                dev_err(dev, "iommu configuration for device failed with %pe\n",
                        ERR_PTR(iommu_ret));

                /*
                 * Historically this routine doesn't fail driver probing
                 * due to errors in of_iommu_configure()
                 */
        } else
                dev_dbg(dev, "device is behind an iommu\n");

        arch_setup_dma_ops(dev, dma_start, size, coherent);

        if (iommu_ret)
                of_dma_set_restricted_buffer(dev, np);

        return 0;
}
EXPORT_SYMBOL_GPL(of_dma_configure_id);

const void *of_device_get_match_data(const struct device *dev)
{
        const struct of_device_id *match;

        match = of_match_device(dev->driver->of_match_table, dev);
        if (!match)
                return NULL;

        return match->data;
}
EXPORT_SYMBOL(of_device_get_match_data);

/**
 * of_device_modalias - Fill buffer with newline terminated modalias string
 * @dev:        Calling device
 * @str:        Modalias string
 * @len:        Size of @str
 */
ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len)
{
        ssize_t sl;

        if (!dev || !dev->of_node || dev->of_node_reused)
                return -ENODEV;

        sl = of_modalias(dev->of_node, str, len - 2);
        if (sl < 0)
                return sl;
        if (sl > len - 2)
                return -ENOMEM;

        str[sl++] = '\n';
        str[sl] = 0;
        return sl;
}
EXPORT_SYMBOL_GPL(of_device_modalias);

/**
 * of_device_uevent - Display OF related uevent information
 * @dev:        Device to display the uevent information for
 * @env:        Kernel object's userspace event reference to fill up
 */
void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
        const char *compat, *type;
        struct alias_prop *app;
        struct property *p;
        int seen = 0;

        if ((!dev) || (!dev->of_node))
                return;

        add_uevent_var(env, "OF_NAME=%pOFn", dev->of_node);
        add_uevent_var(env, "OF_FULLNAME=%pOF", dev->of_node);
        type = of_node_get_device_type(dev->of_node);
        if (type)
                add_uevent_var(env, "OF_TYPE=%s", type);

        /* Since the compatible field can contain pretty much anything
         * it's not really legal to split it out with commas. We split it
         * up using a number of environment variables instead. */
        of_property_for_each_string(dev->of_node, "compatible", p, compat) {
                add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat);
                seen++;
        }
        add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen);

        seen = 0;
        mutex_lock(&of_mutex);
        list_for_each_entry(app, &aliases_lookup, link) {
                if (dev->of_node == app->np) {
                        add_uevent_var(env, "OF_ALIAS_%d=%s", seen,
                                       app->alias);
                        seen++;
                }
        }
        mutex_unlock(&of_mutex);
}
EXPORT_SYMBOL_GPL(of_device_uevent);

int of_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env)
{
        int sl;

        if ((!dev) || (!dev->of_node) || dev->of_node_reused)
                return -ENODEV;

        /* Devicetree modalias is tricky, we add it in 2 steps */
        if (add_uevent_var(env, "MODALIAS="))
                return -ENOMEM;

        sl = of_modalias(dev->of_node, &env->buf[env->buflen-1],
                         sizeof(env->buf) - env->buflen);
        if (sl < 0)
                return sl;
        if (sl >= (sizeof(env->buf) - env->buflen))
                return -ENOMEM;
        env->buflen += sl;

        return 0;
}
EXPORT_SYMBOL_GPL(of_device_uevent_modalias);

/**
 * of_device_make_bus_id - Use the device node data to assign a unique name
 * @dev: pointer to device structure that is linked to a device tree node
 *
 * This routine will first try using the translated bus address to
 * derive a unique name. If it cannot, then it will prepend names from
 * parent nodes until a unique name can be derived.
 */
void of_device_make_bus_id(struct device *dev)
{
        struct device_node *node = dev->of_node;
        const __be32 *reg;
        u64 addr;
        u32 mask;

        /* Construct the name, using parent nodes if necessary to ensure uniqueness */
        while (node->parent) {
                /*
                 * If the address can be translated, then that is as much
                 * uniqueness as we need. Make it the first component and return
                 */
                reg = of_get_property(node, "reg", NULL);
                if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
                        if (!of_property_read_u32(node, "mask", &mask))
                                dev_set_name(dev, dev_name(dev) ? "%llx.%x.%pOFn:%s" : "%llx.%x.%pOFn",
                                             addr, ffs(mask) - 1, node, dev_name(dev));

                        else
                                dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
                                             addr, node, dev_name(dev));
                        return;
                }

                /* format arguments only used if dev_name() resolves to NULL */
                dev_set_name(dev, dev_name(dev) ? "%s:%s" : "%s",
                             kbasename(node->full_name), dev_name(dev));
                node = node->parent;
        }
}
EXPORT_SYMBOL_GPL(of_device_make_bus_id);




















































































































































































































































































































































































































































































































































































































    4 























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
// SPDX-License-Identifier: GPL-2.0
/*
 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
 * fairer distribution of tags between multiple submitters when a shared tag map
 * is used.
 *
 * Copyright (C) 2013-2014 Jens Axboe
 */
#include <linux/kernel.h>
#include <linux/module.h>

#include <linux/delay.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"

/*
 * Recalculate wakeup batch when tag is shared by hctx.
 */
static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
                unsigned int users)
{
        if (!users)
                return;

        sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
                        users);
        sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
                        users);
}

/*
 * If a previously inactive queue goes active, bump the active user count.
 * We need to do this before try to allocate driver tag, then even if fail
 * to get tag when first time, the other shared-tag users could reserve
 * budget for it.
 */
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
        unsigned int users;
        struct blk_mq_tags *tags = hctx->tags;

        /*
         * calling test_bit() prior to test_and_set_bit() is intentional,
         * it avoids dirtying the cacheline if the queue is already active.
         */
        if (blk_mq_is_shared_tags(hctx->flags)) {
                struct request_queue *q = hctx->queue;

                if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
                    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
                        return;
        } else {
                if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
                    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
                        return;
        }

        spin_lock_irq(&tags->lock);
        users = tags->active_queues + 1;
        WRITE_ONCE(tags->active_queues, users);
        blk_mq_update_wake_batch(tags, users);
        spin_unlock_irq(&tags->lock);
}

/*
 * Wakeup all potentially sleeping on tags
 */
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
{
        sbitmap_queue_wake_all(&tags->bitmap_tags);
        if (include_reserve)
                sbitmap_queue_wake_all(&tags->breserved_tags);
}

/*
 * If a previously busy queue goes inactive, potential waiters could now
 * be allowed to queue. Wake them up and check.
 */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
        struct blk_mq_tags *tags = hctx->tags;
        unsigned int users;

        if (blk_mq_is_shared_tags(hctx->flags)) {
                struct request_queue *q = hctx->queue;

                if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
                                        &q->queue_flags))
                        return;
        } else {
                if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
                        return;
        }

        spin_lock_irq(&tags->lock);
        users = tags->active_queues - 1;
        WRITE_ONCE(tags->active_queues, users);
        blk_mq_update_wake_batch(tags, users);
        spin_unlock_irq(&tags->lock);

        blk_mq_tag_wakeup_all(tags, false);
}

static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
                            struct sbitmap_queue *bt)
{
        if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
                        !hctx_may_queue(data->hctx, bt))
                return BLK_MQ_NO_TAG;

        if (data->shallow_depth)
                return sbitmap_queue_get_shallow(bt, data->shallow_depth);
        else
                return __sbitmap_queue_get(bt);
}

unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
                              unsigned int *offset)
{
        struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
        struct sbitmap_queue *bt = &tags->bitmap_tags;
        unsigned long ret;

        if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||
            data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
                return 0;
        ret = __sbitmap_queue_get_batch(bt, nr_tags, offset);
        *offset += tags->nr_reserved_tags;
        return ret;
}

unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
        struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
        struct sbitmap_queue *bt;
        struct sbq_wait_state *ws;
        DEFINE_SBQ_WAIT(wait);
        unsigned int tag_offset;
        int tag;

        if (data->flags & BLK_MQ_REQ_RESERVED) {
                if (unlikely(!tags->nr_reserved_tags)) {
                        WARN_ON_ONCE(1);
                        return BLK_MQ_NO_TAG;
                }
                bt = &tags->breserved_tags;
                tag_offset = 0;
        } else {
                bt = &tags->bitmap_tags;
                tag_offset = tags->nr_reserved_tags;
        }

        tag = __blk_mq_get_tag(data, bt);
        if (tag != BLK_MQ_NO_TAG)
                goto found_tag;

        if (data->flags & BLK_MQ_REQ_NOWAIT)
                return BLK_MQ_NO_TAG;

        ws = bt_wait_ptr(bt, data->hctx);
        do {
                struct sbitmap_queue *bt_prev;

                /*
                 * We're out of tags on this hardware queue, kick any
                 * pending IO submits before going to sleep waiting for
                 * some to complete.
                 */
                blk_mq_run_hw_queue(data->hctx, false);

                /*
                 * Retry tag allocation after running the hardware queue,
                 * as running the queue may also have found completions.
                 */
                tag = __blk_mq_get_tag(data, bt);
                if (tag != BLK_MQ_NO_TAG)
                        break;

                sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);

                tag = __blk_mq_get_tag(data, bt);
                if (tag != BLK_MQ_NO_TAG)
                        break;

                bt_prev = bt;
                io_schedule();

                sbitmap_finish_wait(bt, ws, &wait);

                data->ctx = blk_mq_get_ctx(data->q);
                data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
                                                data->ctx);
                tags = blk_mq_tags_from_data(data);
                if (data->flags & BLK_MQ_REQ_RESERVED)
                        bt = &tags->breserved_tags;
                else
                        bt = &tags->bitmap_tags;

                /*
                 * If destination hw queue is changed, fake wake up on
                 * previous queue for compensating the wake up miss, so
                 * other allocations on previous queue won't be starved.
                 */
                if (bt != bt_prev)
                        sbitmap_queue_wake_up(bt_prev, 1);

                ws = bt_wait_ptr(bt, data->hctx);
        } while (1);

        sbitmap_finish_wait(bt, ws, &wait);

found_tag:
        /*
         * Give up this allocation if the hctx is inactive.  The caller will
         * retry on an active hctx.
         */
        if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
                blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
                return BLK_MQ_NO_TAG;
        }
        return tag + tag_offset;
}

void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
                    unsigned int tag)
{
        if (!blk_mq_tag_is_reserved(tags, tag)) {
                const int real_tag = tag - tags->nr_reserved_tags;

                BUG_ON(real_tag >= tags->nr_tags);
                sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
        } else {
                sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
        }
}

void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
{
        sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
                                        tag_array, nr_tags);
}

struct bt_iter_data {
        struct blk_mq_hw_ctx *hctx;
        struct request_queue *q;
        busy_tag_iter_fn *fn;
        void *data;
        bool reserved;
};

static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
                unsigned int bitnr)
{
        struct request *rq;
        unsigned long flags;

        spin_lock_irqsave(&tags->lock, flags);
        rq = tags->rqs[bitnr];
        if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
                rq = NULL;
        spin_unlock_irqrestore(&tags->lock, flags);
        return rq;
}

static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
        struct bt_iter_data *iter_data = data;
        struct blk_mq_hw_ctx *hctx = iter_data->hctx;
        struct request_queue *q = iter_data->q;
        struct blk_mq_tag_set *set = q->tag_set;
        struct blk_mq_tags *tags;
        struct request *rq;
        bool ret = true;

        if (blk_mq_is_shared_tags(set->flags))
                tags = set->shared_tags;
        else
                tags = hctx->tags;

        if (!iter_data->reserved)
                bitnr += tags->nr_reserved_tags;
        /*
         * We can hit rq == NULL here, because the tagging functions
         * test and set the bit before assigning ->rqs[].
         */
        rq = blk_mq_find_and_get_req(tags, bitnr);
        if (!rq)
                return true;

        if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
                ret = iter_data->fn(rq, iter_data->data);
        blk_mq_put_rq_ref(rq);
        return ret;
}

/**
 * bt_for_each - iterate over the requests associated with a hardware queue
 * @hctx:        Hardware queue to examine.
 * @q:                Request queue to examine.
 * @bt:                sbitmap to examine. This is either the breserved_tags member
 *                or the bitmap_tags member of struct blk_mq_tags.
 * @fn:                Pointer to the function that will be called for each request
 *                associated with @hctx that has been assigned a driver tag.
 *                @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
 *                where rq is a pointer to a request. Return true to continue
 *                iterating tags, false to stop.
 * @data:        Will be passed as third argument to @fn.
 * @reserved:        Indicates whether @bt is the breserved_tags member or the
 *                bitmap_tags member of struct blk_mq_tags.
 */
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
                        struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
                        void *data, bool reserved)
{
        struct bt_iter_data iter_data = {
                .hctx = hctx,
                .fn = fn,
                .data = data,
                .reserved = reserved,
                .q = q,
        };

        sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
}

struct bt_tags_iter_data {
        struct blk_mq_tags *tags;
        busy_tag_iter_fn *fn;
        void *data;
        unsigned int flags;
};

#define BT_TAG_ITER_RESERVED                (1 << 0)
#define BT_TAG_ITER_STARTED                (1 << 1)
#define BT_TAG_ITER_STATIC_RQS                (1 << 2)

static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
        struct bt_tags_iter_data *iter_data = data;
        struct blk_mq_tags *tags = iter_data->tags;
        struct request *rq;
        bool ret = true;
        bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);

        if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
                bitnr += tags->nr_reserved_tags;

        /*
         * We can hit rq == NULL here, because the tagging functions
         * test and set the bit before assigning ->rqs[].
         */
        if (iter_static_rqs)
                rq = tags->static_rqs[bitnr];
        else
                rq = blk_mq_find_and_get_req(tags, bitnr);
        if (!rq)
                return true;

        if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
            blk_mq_request_started(rq))
                ret = iter_data->fn(rq, iter_data->data);
        if (!iter_static_rqs)
                blk_mq_put_rq_ref(rq);
        return ret;
}

/**
 * bt_tags_for_each - iterate over the requests in a tag map
 * @tags:        Tag map to iterate over.
 * @bt:                sbitmap to examine. This is either the breserved_tags member
 *                or the bitmap_tags member of struct blk_mq_tags.
 * @fn:                Pointer to the function that will be called for each started
 *                request. @fn will be called as follows: @fn(rq, @data,
 *                @reserved) where rq is a pointer to a request. Return true
 *                to continue iterating tags, false to stop.
 * @data:        Will be passed as second argument to @fn.
 * @flags:        BT_TAG_ITER_*
 */
static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
                             busy_tag_iter_fn *fn, void *data, unsigned int flags)
{
        struct bt_tags_iter_data iter_data = {
                .tags = tags,
                .fn = fn,
                .data = data,
                .flags = flags,
        };

        if (tags->rqs)
                sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
}

static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
                busy_tag_iter_fn *fn, void *priv, unsigned int flags)
{
        WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);

        if (tags->nr_reserved_tags)
                bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
                                 flags | BT_TAG_ITER_RESERVED);
        bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
}

/**
 * blk_mq_all_tag_iter - iterate over all requests in a tag map
 * @tags:        Tag map to iterate over.
 * @fn:                Pointer to the function that will be called for each
 *                request. @fn will be called as follows: @fn(rq, @priv,
 *                reserved) where rq is a pointer to a request. 'reserved'
 *                indicates whether or not @rq is a reserved request. Return
 *                true to continue iterating tags, false to stop.
 * @priv:        Will be passed as second argument to @fn.
 *
 * Caller has to pass the tag map from which requests are allocated.
 */
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
                void *priv)
{
        __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
}

/**
 * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
 * @tagset:        Tag set to iterate over.
 * @fn:                Pointer to the function that will be called for each started
 *                request. @fn will be called as follows: @fn(rq, @priv,
 *                reserved) where rq is a pointer to a request. 'reserved'
 *                indicates whether or not @rq is a reserved request. Return
 *                true to continue iterating tags, false to stop.
 * @priv:        Will be passed as second argument to @fn.
 *
 * We grab one request reference before calling @fn and release it after
 * @fn returns.
 */
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
                busy_tag_iter_fn *fn, void *priv)
{
        unsigned int flags = tagset->flags;
        int i, nr_tags;

        nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;

        for (i = 0; i < nr_tags; i++) {
                if (tagset->tags && tagset->tags[i])
                        __blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
                                              BT_TAG_ITER_STARTED);
        }
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);

static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
{
        unsigned *count = data;

        if (blk_mq_request_completed(rq))
                (*count)++;
        return true;
}

/**
 * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
 * completions have finished.
 * @tagset:        Tag set to drain completed request
 *
 * Note: This function has to be run after all IO queues are shutdown
 */
void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
{
        while (true) {
                unsigned count = 0;

                blk_mq_tagset_busy_iter(tagset,
                                blk_mq_tagset_count_completed_rqs, &count);
                if (!count)
                        break;
                msleep(5);
        }
}
EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);

/**
 * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
 * @q:                Request queue to examine.
 * @fn:                Pointer to the function that will be called for each request
 *                on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
 *                reserved) where rq is a pointer to a request and hctx points
 *                to the hardware queue associated with the request. 'reserved'
 *                indicates whether or not @rq is a reserved request.
 * @priv:        Will be passed as third argument to @fn.
 *
 * Note: if @q->tag_set is shared with other request queues then @fn will be
 * called for all requests on all queues that share that tag set and not only
 * for requests associated with @q.
 */
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
                void *priv)
{
        /*
         * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
         * while the queue is frozen. So we can use q_usage_counter to avoid
         * racing with it.
         */
        if (!percpu_ref_tryget(&q->q_usage_counter))
                return;

        if (blk_mq_is_shared_tags(q->tag_set->flags)) {
                struct blk_mq_tags *tags = q->tag_set->shared_tags;
                struct sbitmap_queue *bresv = &tags->breserved_tags;
                struct sbitmap_queue *btags = &tags->bitmap_tags;

                if (tags->nr_reserved_tags)
                        bt_for_each(NULL, q, bresv, fn, priv, true);
                bt_for_each(NULL, q, btags, fn, priv, false);
        } else {
                struct blk_mq_hw_ctx *hctx;
                unsigned long i;

                queue_for_each_hw_ctx(q, hctx, i) {
                        struct blk_mq_tags *tags = hctx->tags;
                        struct sbitmap_queue *bresv = &tags->breserved_tags;
                        struct sbitmap_queue *btags = &tags->bitmap_tags;

                        /*
                         * If no software queues are currently mapped to this
                         * hardware queue, there's nothing to check
                         */
                        if (!blk_mq_hw_queue_mapped(hctx))
                                continue;

                        if (tags->nr_reserved_tags)
                                bt_for_each(hctx, q, bresv, fn, priv, true);
                        bt_for_each(hctx, q, btags, fn, priv, false);
                }
        }
        blk_queue_exit(q);
}

static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
                    bool round_robin, int node)
{
        return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
                                       node);
}

int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
                        struct sbitmap_queue *breserved_tags,
                        unsigned int queue_depth, unsigned int reserved,
                        int node, int alloc_policy)
{
        unsigned int depth = queue_depth - reserved;
        bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;

        if (bt_alloc(bitmap_tags, depth, round_robin, node))
                return -ENOMEM;
        if (bt_alloc(breserved_tags, reserved, round_robin, node))
                goto free_bitmap_tags;

        return 0;

free_bitmap_tags:
        sbitmap_queue_free(bitmap_tags);
        return -ENOMEM;
}

struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
                                     unsigned int reserved_tags,
                                     int node, int alloc_policy)
{
        struct blk_mq_tags *tags;

        if (total_tags > BLK_MQ_TAG_MAX) {
                pr_err("blk-mq: tag depth too large\n");
                return NULL;
        }

        tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
        if (!tags)
                return NULL;

        tags->nr_tags = total_tags;
        tags->nr_reserved_tags = reserved_tags;
        spin_lock_init(&tags->lock);

        if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags,
                                total_tags, reserved_tags, node,
                                alloc_policy) < 0) {
                kfree(tags);
                return NULL;
        }
        return tags;
}

void blk_mq_free_tags(struct blk_mq_tags *tags)
{
        sbitmap_queue_free(&tags->bitmap_tags);
        sbitmap_queue_free(&tags->breserved_tags);
        kfree(tags);
}

int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
                            struct blk_mq_tags **tagsptr, unsigned int tdepth,
                            bool can_grow)
{
        struct blk_mq_tags *tags = *tagsptr;

        if (tdepth <= tags->nr_reserved_tags)
                return -EINVAL;

        /*
         * If we are allowed to grow beyond the original size, allocate
         * a new set of tags before freeing the old one.
         */
        if (tdepth > tags->nr_tags) {
                struct blk_mq_tag_set *set = hctx->queue->tag_set;
                struct blk_mq_tags *new;

                if (!can_grow)
                        return -EINVAL;

                /*
                 * We need some sort of upper limit, set it high enough that
                 * no valid use cases should require more.
                 */
                if (tdepth > MAX_SCHED_RQ)
                        return -EINVAL;

                /*
                 * Only the sbitmap needs resizing since we allocated the max
                 * initially.
                 */
                if (blk_mq_is_shared_tags(set->flags))
                        return 0;

                new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
                if (!new)
                        return -ENOMEM;

                blk_mq_free_map_and_rqs(set, *tagsptr, hctx->queue_num);
                *tagsptr = new;
        } else {
                /*
                 * Don't need (or can't) update reserved tags here, they
                 * remain static and should never need resizing.
                 */
                sbitmap_queue_resize(&tags->bitmap_tags,
                                tdepth - tags->nr_reserved_tags);
        }

        return 0;
}

void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
{
        struct blk_mq_tags *tags = set->shared_tags;

        sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
}

void blk_mq_tag_update_sched_shared_tags(struct request_queue *q)
{
        sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
                             q->nr_requests - q->tag_set->reserved_tags);
}

/**
 * blk_mq_unique_tag() - return a tag that is unique queue-wide
 * @rq: request for which to compute a unique tag
 *
 * The tag field in struct request is unique per hardware queue but not over
 * all hardware queues. Hence this function that returns a tag with the
 * hardware context index in the upper bits and the per hardware queue tag in
 * the lower bits.
 *
 * Note: When called for a request that is queued on a non-multiqueue request
 * queue, the hardware context index is set to zero.
 */
u32 blk_mq_unique_tag(struct request *rq)
{
        return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
                (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
}
EXPORT_SYMBOL(blk_mq_unique_tag);
































  308 


































   12 
























































































































































































   14 

















  168 


















  232 














































































































  242 


































  340 




























































































































































   66 









































































































































































































































































































































































  256 



































































































































































































































































































































    2 








































































































   94 





































































































































   70 
  266 


















    8 


















    9 


























































































































































































































































































































































































































































































  240 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  262 













































































































































































































































    1 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  256 

    2 












































    1 



















































































































































































































  345 








































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
// SPDX-License-Identifier: GPL-2.0

// Generated by scripts/atomic/gen-atomic-instrumented.sh
// DO NOT MODIFY THIS FILE DIRECTLY

/*
 * This file provoides atomic operations with explicit instrumentation (e.g.
 * KASAN, KCSAN), which should be used unless it is necessary to avoid
 * instrumentation. Where it is necessary to aovid instrumenation, the
 * raw_atomic*() operations should be used.
 */
#ifndef _LINUX_ATOMIC_INSTRUMENTED_H
#define _LINUX_ATOMIC_INSTRUMENTED_H

#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/instrumented.h>

/**
 * atomic_read() - atomic load with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_read() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline int
atomic_read(const atomic_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic_read(v);
}

/**
 * atomic_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_read_acquire() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline int
atomic_read_acquire(const atomic_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic_read_acquire(v);
}

/**
 * atomic_set() - atomic set with relaxed ordering
 * @v: pointer to atomic_t
 * @i: int value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_set() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_set(atomic_t *v, int i)
{
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic_set(v, i);
}

/**
 * atomic_set_release() - atomic set with release ordering
 * @v: pointer to atomic_t
 * @i: int value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_set_release() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_set_release(atomic_t *v, int i)
{
        kcsan_release();
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic_set_release(v, i);
}

/**
 * atomic_add() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_add(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_add(i, v);
}

/**
 * atomic_add_return() - atomic add with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_add_return(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_return(i, v);
}

/**
 * atomic_add_return_acquire() - atomic add with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_add_return_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_return_acquire(i, v);
}

/**
 * atomic_add_return_release() - atomic add with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_add_return_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_return_release(i, v);
}

/**
 * atomic_add_return_relaxed() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_add_return_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_return_relaxed(i, v);
}

/**
 * atomic_fetch_add() - atomic add with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_add() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_add(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_add(i, v);
}

/**
 * atomic_fetch_add_acquire() - atomic add with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_add_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_add_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_add_acquire(i, v);
}

/**
 * atomic_fetch_add_release() - atomic add with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_add_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_add_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_add_release(i, v);
}

/**
 * atomic_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_add_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_add_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_add_relaxed(i, v);
}

/**
 * atomic_sub() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_sub(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_sub(i, v);
}

/**
 * atomic_sub_return() - atomic subtract with full ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_sub_return(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_sub_return(i, v);
}

/**
 * atomic_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_sub_return_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_sub_return_acquire(i, v);
}

/**
 * atomic_sub_return_release() - atomic subtract with release ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_sub_return_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_sub_return_release(i, v);
}

/**
 * atomic_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_sub_return_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_sub_return_relaxed(i, v);
}

/**
 * atomic_fetch_sub() - atomic subtract with full ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_sub() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_sub(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_sub(i, v);
}

/**
 * atomic_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_sub_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_sub_acquire(i, v);
}

/**
 * atomic_fetch_sub_release() - atomic subtract with release ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_sub_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_sub_release(i, v);
}

/**
 * atomic_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: int value to subtract
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_sub_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_sub_relaxed(i, v);
}

/**
 * atomic_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_inc(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_inc(v);
}

/**
 * atomic_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_inc_return(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_return(v);
}

/**
 * atomic_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_inc_return_acquire(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_return_acquire(v);
}

/**
 * atomic_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_inc_return_release(atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_return_release(v);
}

/**
 * atomic_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_inc_return_relaxed(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_return_relaxed(v);
}

/**
 * atomic_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_inc() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_inc(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_inc(v);
}

/**
 * atomic_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_inc_acquire(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_inc_acquire(v);
}

/**
 * atomic_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_inc_release(atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_inc_release(v);
}

/**
 * atomic_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_inc_relaxed(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_inc_relaxed(v);
}

/**
 * atomic_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_dec(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_dec(v);
}

/**
 * atomic_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_dec_return(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_return(v);
}

/**
 * atomic_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_dec_return_acquire(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_return_acquire(v);
}

/**
 * atomic_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_dec_return_release(atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_return_release(v);
}

/**
 * atomic_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline int
atomic_dec_return_relaxed(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_return_relaxed(v);
}

/**
 * atomic_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_dec() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_dec(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_dec(v);
}

/**
 * atomic_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_dec_acquire(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_dec_acquire(v);
}

/**
 * atomic_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_dec_release(atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_dec_release(v);
}

/**
 * atomic_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_dec_relaxed(atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_dec_relaxed(v);
}

/**
 * atomic_and() - atomic bitwise AND with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_and() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_and(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_and(i, v);
}

/**
 * atomic_fetch_and() - atomic bitwise AND with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_and() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_and(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_and(i, v);
}

/**
 * atomic_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_and_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_and_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_and_acquire(i, v);
}

/**
 * atomic_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_and_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_and_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_and_release(i, v);
}

/**
 * atomic_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_and_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_and_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_and_relaxed(i, v);
}

/**
 * atomic_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_andnot() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_andnot(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_andnot(i, v);
}

/**
 * atomic_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_andnot(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_andnot(i, v);
}

/**
 * atomic_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_andnot_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_andnot_acquire(i, v);
}

/**
 * atomic_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_andnot_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_andnot_release(i, v);
}

/**
 * atomic_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_andnot_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_andnot_relaxed(i, v);
}

/**
 * atomic_or() - atomic bitwise OR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_or() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_or(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_or(i, v);
}

/**
 * atomic_fetch_or() - atomic bitwise OR with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_or() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_or(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_or(i, v);
}

/**
 * atomic_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_or_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_or_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_or_acquire(i, v);
}

/**
 * atomic_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_or_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_or_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_or_release(i, v);
}

/**
 * atomic_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_or_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_or_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_or_relaxed(i, v);
}

/**
 * atomic_xor() - atomic bitwise XOR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_xor() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_xor(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_xor(i, v);
}

/**
 * atomic_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_xor() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_xor(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_xor(i, v);
}

/**
 * atomic_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_xor_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_xor_acquire(i, v);
}

/**
 * atomic_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_xor_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_xor_release(i, v);
}

/**
 * atomic_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: int value
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_xor_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_xor_relaxed(i, v);
}

/**
 * atomic_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_xchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_xchg(atomic_t *v, int new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_xchg(v, new);
}

/**
 * atomic_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_xchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_xchg_acquire(atomic_t *v, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_xchg_acquire(v, new);
}

/**
 * atomic_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_xchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_xchg_release(atomic_t *v, int new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_xchg_release(v, new);
}

/**
 * atomic_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @new: int value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_xchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_xchg_relaxed(atomic_t *v, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_xchg_relaxed(v, new);
}

/**
 * atomic_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_cmpxchg(atomic_t *v, int old, int new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_cmpxchg(v, old, new);
}

/**
 * atomic_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_cmpxchg_acquire(v, old, new);
}

/**
 * atomic_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_cmpxchg_release(atomic_t *v, int old, int new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_cmpxchg_release(v, old, new);
}

/**
 * atomic_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @old: int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_try_cmpxchg(v, old, new);
}

/**
 * atomic_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_try_cmpxchg_acquire(v, old, new);
}

/**
 * atomic_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_try_cmpxchg_release(v, old, new);
}

/**
 * atomic_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_t
 * @old: pointer to int value to compare with
 * @new: int value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_try_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_sub_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_sub_and_test(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_sub_and_test(i, v);
}

/**
 * atomic_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_dec_and_test(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_and_test(v);
}

/**
 * atomic_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_inc_and_test(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_and_test(v);
}

/**
 * atomic_add_negative() - atomic add and test if negative with full ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_negative() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_add_negative(int i, atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_negative(i, v);
}

/**
 * atomic_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_negative_acquire() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_add_negative_acquire(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_negative_acquire(i, v);
}

/**
 * atomic_add_negative_release() - atomic add and test if negative with release ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_negative_release() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_add_negative_release(int i, atomic_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_negative_release(i, v);
}

/**
 * atomic_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: int value to add
 * @v: pointer to atomic_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_negative_relaxed() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_add_negative_relaxed(int i, atomic_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_negative_relaxed(i, v);
}

/**
 * atomic_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_t
 * @a: int value to add
 * @u: int value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there.
 *
 * Return: The original value of @v.
 */
static __always_inline int
atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_fetch_add_unless(v, a, u);
}

/**
 * atomic_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_t
 * @a: int value to add
 * @u: int value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_add_unless() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_add_unless(atomic_t *v, int a, int u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_add_unless(v, a, u);
}

/**
 * atomic_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_inc_not_zero(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_not_zero(v);
}

/**
 * atomic_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_inc_unless_negative(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_inc_unless_negative(v);
}

/**
 * atomic_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_dec_unless_positive(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_unless_positive(v);
}

/**
 * atomic_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline int
atomic_dec_if_positive(atomic_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_dec_if_positive(v);
}

/**
 * atomic64_read() - atomic load with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_read() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline s64
atomic64_read(const atomic64_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic64_read(v);
}

/**
 * atomic64_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_read_acquire() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline s64
atomic64_read_acquire(const atomic64_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic64_read_acquire(v);
}

/**
 * atomic64_set() - atomic set with relaxed ordering
 * @v: pointer to atomic64_t
 * @i: s64 value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_set() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_set(atomic64_t *v, s64 i)
{
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic64_set(v, i);
}

/**
 * atomic64_set_release() - atomic set with release ordering
 * @v: pointer to atomic64_t
 * @i: s64 value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_set_release() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_set_release(atomic64_t *v, s64 i)
{
        kcsan_release();
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic64_set_release(v, i);
}

/**
 * atomic64_add() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_add(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_add(i, v);
}

/**
 * atomic64_add_return() - atomic add with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_add_return(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_return(i, v);
}

/**
 * atomic64_add_return_acquire() - atomic add with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_add_return_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_return_acquire(i, v);
}

/**
 * atomic64_add_return_release() - atomic add with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_add_return_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_return_release(i, v);
}

/**
 * atomic64_add_return_relaxed() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_add_return_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_return_relaxed(i, v);
}

/**
 * atomic64_fetch_add() - atomic add with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_add() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_add(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_add(i, v);
}

/**
 * atomic64_fetch_add_acquire() - atomic add with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_add_acquire(i, v);
}

/**
 * atomic64_fetch_add_release() - atomic add with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_add_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_add_release(i, v);
}

/**
 * atomic64_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_add_relaxed(i, v);
}

/**
 * atomic64_sub() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_sub(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_sub(i, v);
}

/**
 * atomic64_sub_return() - atomic subtract with full ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_sub_return(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_sub_return(i, v);
}

/**
 * atomic64_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_sub_return_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_sub_return_acquire(i, v);
}

/**
 * atomic64_sub_return_release() - atomic subtract with release ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_sub_return_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_sub_return_release(i, v);
}

/**
 * atomic64_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_sub_return_relaxed(i, v);
}

/**
 * atomic64_fetch_sub() - atomic subtract with full ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_sub(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_sub(i, v);
}

/**
 * atomic64_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_sub_acquire(i, v);
}

/**
 * atomic64_fetch_sub_release() - atomic subtract with release ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_sub_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_sub_release(i, v);
}

/**
 * atomic64_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: s64 value to subtract
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_sub_relaxed(i, v);
}

/**
 * atomic64_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_inc(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_inc(v);
}

/**
 * atomic64_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_inc_return(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_return(v);
}

/**
 * atomic64_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_inc_return_acquire(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_return_acquire(v);
}

/**
 * atomic64_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_inc_return_release(atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_return_release(v);
}

/**
 * atomic64_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_inc_return_relaxed(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_return_relaxed(v);
}

/**
 * atomic64_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_inc(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_inc(v);
}

/**
 * atomic64_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_inc_acquire(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_inc_acquire(v);
}

/**
 * atomic64_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_inc_release(atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_inc_release(v);
}

/**
 * atomic64_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_inc_relaxed(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_inc_relaxed(v);
}

/**
 * atomic64_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_dec(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_dec(v);
}

/**
 * atomic64_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_dec_return(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_return(v);
}

/**
 * atomic64_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_dec_return_acquire(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_return_acquire(v);
}

/**
 * atomic64_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_dec_return_release(atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_return_release(v);
}

/**
 * atomic64_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline s64
atomic64_dec_return_relaxed(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_return_relaxed(v);
}

/**
 * atomic64_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_dec(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_dec(v);
}

/**
 * atomic64_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_dec_acquire(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_dec_acquire(v);
}

/**
 * atomic64_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_dec_release(atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_dec_release(v);
}

/**
 * atomic64_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_dec_relaxed(atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_dec_relaxed(v);
}

/**
 * atomic64_and() - atomic bitwise AND with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_and() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_and(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_and(i, v);
}

/**
 * atomic64_fetch_and() - atomic bitwise AND with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_and() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_and(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_and(i, v);
}

/**
 * atomic64_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_and_acquire(i, v);
}

/**
 * atomic64_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_and_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_and_release(i, v);
}

/**
 * atomic64_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_and_relaxed(i, v);
}

/**
 * atomic64_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_andnot() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_andnot(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_andnot(i, v);
}

/**
 * atomic64_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_andnot(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_andnot(i, v);
}

/**
 * atomic64_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_andnot_acquire(i, v);
}

/**
 * atomic64_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_andnot_release(i, v);
}

/**
 * atomic64_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_andnot_relaxed(i, v);
}

/**
 * atomic64_or() - atomic bitwise OR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_or() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_or(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_or(i, v);
}

/**
 * atomic64_fetch_or() - atomic bitwise OR with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_or() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_or(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_or(i, v);
}

/**
 * atomic64_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_or_acquire(i, v);
}

/**
 * atomic64_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_or_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_or_release(i, v);
}

/**
 * atomic64_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_or_relaxed(i, v);
}

/**
 * atomic64_xor() - atomic bitwise XOR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_xor() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic64_xor(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic64_xor(i, v);
}

/**
 * atomic64_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_xor(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_xor(i, v);
}

/**
 * atomic64_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_xor_acquire(i, v);
}

/**
 * atomic64_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_xor_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_xor_release(i, v);
}

/**
 * atomic64_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: s64 value
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_xor_relaxed(i, v);
}

/**
 * atomic64_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_xchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_xchg(atomic64_t *v, s64 new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_xchg(v, new);
}

/**
 * atomic64_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_xchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_xchg_acquire(atomic64_t *v, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_xchg_acquire(v, new);
}

/**
 * atomic64_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_xchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_xchg_release(atomic64_t *v, s64 new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_xchg_release(v, new);
}

/**
 * atomic64_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @new: s64 value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_xchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_xchg_relaxed(atomic64_t *v, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_xchg_relaxed(v, new);
}

/**
 * atomic64_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_cmpxchg(v, old, new);
}

/**
 * atomic64_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_cmpxchg_acquire(v, old, new);
}

/**
 * atomic64_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_cmpxchg_release(v, old, new);
}

/**
 * atomic64_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @old: s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic64_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic64_try_cmpxchg(v, old, new);
}

/**
 * atomic64_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic64_try_cmpxchg_acquire(v, old, new);
}

/**
 * atomic64_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic64_try_cmpxchg_release(v, old, new);
}

/**
 * atomic64_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic64_t
 * @old: pointer to s64 value to compare with
 * @new: s64 value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic64_try_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_sub_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic64_sub_and_test(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_sub_and_test(i, v);
}

/**
 * atomic64_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic64_dec_and_test(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_and_test(v);
}

/**
 * atomic64_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic64_inc_and_test(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_and_test(v);
}

/**
 * atomic64_add_negative() - atomic add and test if negative with full ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_negative() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic64_add_negative(s64 i, atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_negative(i, v);
}

/**
 * atomic64_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_negative_acquire() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic64_add_negative_acquire(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_negative_acquire(i, v);
}

/**
 * atomic64_add_negative_release() - atomic add and test if negative with release ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_negative_release() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic64_add_negative_release(s64 i, atomic64_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_negative_release(i, v);
}

/**
 * atomic64_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: s64 value to add
 * @v: pointer to atomic64_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_negative_relaxed() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_negative_relaxed(i, v);
}

/**
 * atomic64_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic64_t
 * @a: s64 value to add
 * @u: s64 value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there.
 *
 * Return: The original value of @v.
 */
static __always_inline s64
atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_fetch_add_unless(v, a, u);
}

/**
 * atomic64_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic64_t
 * @a: s64 value to add
 * @u: s64 value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_add_unless(v, a, u);
}

/**
 * atomic64_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic64_inc_not_zero(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_not_zero(v);
}

/**
 * atomic64_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic64_inc_unless_negative(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_inc_unless_negative(v);
}

/**
 * atomic64_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic64_dec_unless_positive(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_unless_positive(v);
}

/**
 * atomic64_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic64_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline s64
atomic64_dec_if_positive(atomic64_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic64_dec_if_positive(v);
}

/**
 * atomic_long_read() - atomic load with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically loads the value of @v with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_read() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline long
atomic_long_read(const atomic_long_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic_long_read(v);
}

/**
 * atomic_long_read_acquire() - atomic load with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically loads the value of @v with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_read_acquire() there.
 *
 * Return: The value loaded from @v.
 */
static __always_inline long
atomic_long_read_acquire(const atomic_long_t *v)
{
        instrument_atomic_read(v, sizeof(*v));
        return raw_atomic_long_read_acquire(v);
}

/**
 * atomic_long_set() - atomic set with relaxed ordering
 * @v: pointer to atomic_long_t
 * @i: long value to assign
 *
 * Atomically sets @v to @i with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_set() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_set(atomic_long_t *v, long i)
{
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic_long_set(v, i);
}

/**
 * atomic_long_set_release() - atomic set with release ordering
 * @v: pointer to atomic_long_t
 * @i: long value to assign
 *
 * Atomically sets @v to @i with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_set_release() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_set_release(atomic_long_t *v, long i)
{
        kcsan_release();
        instrument_atomic_write(v, sizeof(*v));
        raw_atomic_long_set_release(v, i);
}

/**
 * atomic_long_add() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_add(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_add(i, v);
}

/**
 * atomic_long_add_return() - atomic add with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_add_return(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_return(i, v);
}

/**
 * atomic_long_add_return_acquire() - atomic add with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_add_return_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_return_acquire(i, v);
}

/**
 * atomic_long_add_return_release() - atomic add with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_add_return_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_return_release(i, v);
}

/**
 * atomic_long_add_return_relaxed() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_add_return_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_return_relaxed(i, v);
}

/**
 * atomic_long_fetch_add() - atomic add with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_add(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_add(i, v);
}

/**
 * atomic_long_fetch_add_acquire() - atomic add with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_add_acquire(i, v);
}

/**
 * atomic_long_fetch_add_release() - atomic add with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_add_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_add_release(i, v);
}

/**
 * atomic_long_fetch_add_relaxed() - atomic add with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_add_relaxed(i, v);
}

/**
 * atomic_long_sub() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_sub(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_sub(i, v);
}

/**
 * atomic_long_sub_return() - atomic subtract with full ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_sub_return(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_sub_return(i, v);
}

/**
 * atomic_long_sub_return_acquire() - atomic subtract with acquire ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_sub_return_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_sub_return_acquire(i, v);
}

/**
 * atomic_long_sub_return_release() - atomic subtract with release ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_sub_return_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_sub_return_release(i, v);
}

/**
 * atomic_long_sub_return_relaxed() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_sub_return_relaxed(i, v);
}

/**
 * atomic_long_fetch_sub() - atomic subtract with full ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_sub(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_sub(i, v);
}

/**
 * atomic_long_fetch_sub_acquire() - atomic subtract with acquire ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_sub_acquire(i, v);
}

/**
 * atomic_long_fetch_sub_release() - atomic subtract with release ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_sub_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_sub_release(i, v);
}

/**
 * atomic_long_fetch_sub_relaxed() - atomic subtract with relaxed ordering
 * @i: long value to subtract
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_sub_relaxed(i, v);
}

/**
 * atomic_long_inc() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_inc(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_inc(v);
}

/**
 * atomic_long_inc_return() - atomic increment with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_inc_return(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_return(v);
}

/**
 * atomic_long_inc_return_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_inc_return_acquire(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_return_acquire(v);
}

/**
 * atomic_long_inc_return_release() - atomic increment with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_inc_return_release(atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_return_release(v);
}

/**
 * atomic_long_inc_return_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_inc_return_relaxed(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_return_relaxed(v);
}

/**
 * atomic_long_fetch_inc() - atomic increment with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_inc(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_inc(v);
}

/**
 * atomic_long_fetch_inc_acquire() - atomic increment with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_inc_acquire(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_inc_acquire(v);
}

/**
 * atomic_long_fetch_inc_release() - atomic increment with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_inc_release(atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_inc_release(v);
}

/**
 * atomic_long_fetch_inc_relaxed() - atomic increment with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_inc_relaxed(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_inc_relaxed(v);
}

/**
 * atomic_long_dec() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_dec(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_dec(v);
}

/**
 * atomic_long_dec_return() - atomic decrement with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_return() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_dec_return(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_return(v);
}

/**
 * atomic_long_dec_return_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_acquire() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_dec_return_acquire(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_return_acquire(v);
}

/**
 * atomic_long_dec_return_release() - atomic decrement with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_release() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_dec_return_release(atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_return_release(v);
}

/**
 * atomic_long_dec_return_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_relaxed() there.
 *
 * Return: The updated value of @v.
 */
static __always_inline long
atomic_long_dec_return_relaxed(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_return_relaxed(v);
}

/**
 * atomic_long_fetch_dec() - atomic decrement with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_dec(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_dec(v);
}

/**
 * atomic_long_fetch_dec_acquire() - atomic decrement with acquire ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_dec_acquire(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_dec_acquire(v);
}

/**
 * atomic_long_fetch_dec_release() - atomic decrement with release ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_dec_release(atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_dec_release(v);
}

/**
 * atomic_long_fetch_dec_relaxed() - atomic decrement with relaxed ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_dec_relaxed(atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_dec_relaxed(v);
}

/**
 * atomic_long_and() - atomic bitwise AND with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_and() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_and(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_and(i, v);
}

/**
 * atomic_long_fetch_and() - atomic bitwise AND with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_and(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_and(i, v);
}

/**
 * atomic_long_fetch_and_acquire() - atomic bitwise AND with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_and_acquire(i, v);
}

/**
 * atomic_long_fetch_and_release() - atomic bitwise AND with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_and_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_and_release(i, v);
}

/**
 * atomic_long_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_and_relaxed(i, v);
}

/**
 * atomic_long_andnot() - atomic bitwise AND NOT with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_andnot() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_andnot(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_andnot(i, v);
}

/**
 * atomic_long_fetch_andnot() - atomic bitwise AND NOT with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_andnot(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_andnot(i, v);
}

/**
 * atomic_long_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_andnot_acquire(i, v);
}

/**
 * atomic_long_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_andnot_release(i, v);
}

/**
 * atomic_long_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v & ~@i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_andnot_relaxed(i, v);
}

/**
 * atomic_long_or() - atomic bitwise OR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_or() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_or(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_or(i, v);
}

/**
 * atomic_long_fetch_or() - atomic bitwise OR with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_or(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_or(i, v);
}

/**
 * atomic_long_fetch_or_acquire() - atomic bitwise OR with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_or_acquire(i, v);
}

/**
 * atomic_long_fetch_or_release() - atomic bitwise OR with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_or_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_or_release(i, v);
}

/**
 * atomic_long_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v | @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_or_relaxed(i, v);
}

/**
 * atomic_long_xor() - atomic bitwise XOR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_xor() there.
 *
 * Return: Nothing.
 */
static __always_inline void
atomic_long_xor(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        raw_atomic_long_xor(i, v);
}

/**
 * atomic_long_fetch_xor() - atomic bitwise XOR with full ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_xor(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_xor(i, v);
}

/**
 * atomic_long_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_xor_acquire(i, v);
}

/**
 * atomic_long_fetch_xor_release() - atomic bitwise XOR with release ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_xor_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_xor_release(i, v);
}

/**
 * atomic_long_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
 * @i: long value
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v ^ @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_xor_relaxed(i, v);
}

/**
 * atomic_long_xchg() - atomic exchange with full ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_xchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_xchg(atomic_long_t *v, long new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_xchg(v, new);
}

/**
 * atomic_long_xchg_acquire() - atomic exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_xchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_xchg_acquire(atomic_long_t *v, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_xchg_acquire(v, new);
}

/**
 * atomic_long_xchg_release() - atomic exchange with release ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_xchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_xchg_release(atomic_long_t *v, long new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_xchg_release(v, new);
}

/**
 * atomic_long_xchg_relaxed() - atomic exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @new: long value to assign
 *
 * Atomically updates @v to @new with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_xchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_xchg_relaxed(atomic_long_t *v, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_xchg_relaxed(v, new);
}

/**
 * atomic_long_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_cmpxchg(v, old, new);
}

/**
 * atomic_long_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_cmpxchg_acquire(v, old, new);
}

/**
 * atomic_long_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_cmpxchg_release(v, old, new);
}

/**
 * atomic_long_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @old: long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic_long_try_cmpxchg() - atomic compare and exchange with full ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with full ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_long_try_cmpxchg(v, old, new);
}

/**
 * atomic_long_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with acquire ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_long_try_cmpxchg_acquire(v, old, new);
}

/**
 * atomic_long_try_cmpxchg_release() - atomic compare and exchange with release ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with release ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_long_try_cmpxchg_release(v, old, new);
}

/**
 * atomic_long_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
 * @v: pointer to atomic_long_t
 * @old: pointer to long value to compare with
 * @new: long value to assign
 *
 * If (@v == @old), atomically updates @v to @new with relaxed ordering.
 * Otherwise, @v is not modified, @old is updated to the current value of @v,
 * and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there.
 *
 * Return: @true if the exchange occured, @false otherwise.
 */
static __always_inline bool
atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
{
        instrument_atomic_read_write(v, sizeof(*v));
        instrument_atomic_read_write(old, sizeof(*old));
        return raw_atomic_long_try_cmpxchg_relaxed(v, old, new);
}

/**
 * atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_sub_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_long_sub_and_test(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_sub_and_test(i, v);
}

/**
 * atomic_long_dec_and_test() - atomic decrement and test if zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v - 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_long_dec_and_test(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_and_test(v);
}

/**
 * atomic_long_inc_and_test() - atomic increment and test if zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + 1) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_and_test() there.
 *
 * Return: @true if the resulting value of @v is zero, @false otherwise.
 */
static __always_inline bool
atomic_long_inc_and_test(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_and_test(v);
}

/**
 * atomic_long_add_negative() - atomic add and test if negative with full ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with full ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_negative() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_long_add_negative(long i, atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_negative(i, v);
}

/**
 * atomic_long_add_negative_acquire() - atomic add and test if negative with acquire ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with acquire ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_acquire() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_long_add_negative_acquire(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_negative_acquire(i, v);
}

/**
 * atomic_long_add_negative_release() - atomic add and test if negative with release ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with release ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_release() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_long_add_negative_release(long i, atomic_long_t *v)
{
        kcsan_release();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_negative_release(i, v);
}

/**
 * atomic_long_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
 * @i: long value to add
 * @v: pointer to atomic_long_t
 *
 * Atomically updates @v to (@v + @i) with relaxed ordering.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_relaxed() there.
 *
 * Return: @true if the resulting value of @v is negative, @false otherwise.
 */
static __always_inline bool
atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
{
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_negative_relaxed(i, v);
}

/**
 * atomic_long_fetch_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_long_t
 * @a: long value to add
 * @u: long value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there.
 *
 * Return: The original value of @v.
 */
static __always_inline long
atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_fetch_add_unless(v, a, u);
}

/**
 * atomic_long_add_unless() - atomic add unless value with full ordering
 * @v: pointer to atomic_long_t
 * @a: long value to add
 * @u: long value to compare with
 *
 * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_long_add_unless(atomic_long_t *v, long a, long u)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_add_unless(v, a, u);
}

/**
 * atomic_long_inc_not_zero() - atomic increment unless zero with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_long_inc_not_zero(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_not_zero(v);
}

/**
 * atomic_long_inc_unless_negative() - atomic increment unless negative with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_long_inc_unless_negative(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_inc_unless_negative(v);
}

/**
 * atomic_long_dec_unless_positive() - atomic decrement unless positive with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there.
 *
 * Return: @true if @v was updated, @false otherwise.
 */
static __always_inline bool
atomic_long_dec_unless_positive(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_unless_positive(v);
}

/**
 * atomic_long_dec_if_positive() - atomic decrement if positive with full ordering
 * @v: pointer to atomic_long_t
 *
 * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
 * Otherwise, @v is not modified and relaxed ordering is provided.
 *
 * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there.
 *
 * Return: The old value of (@v - 1), regardless of whether @v was updated.
 */
static __always_inline long
atomic_long_dec_if_positive(atomic_long_t *v)
{
        kcsan_mb();
        instrument_atomic_read_write(v, sizeof(*v));
        return raw_atomic_long_dec_if_positive(v);
}

#define xchg(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_xchg(__ai_ptr, __VA_ARGS__); \
})

#define xchg_acquire(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_xchg_acquire(__ai_ptr, __VA_ARGS__); \
})

#define xchg_release(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_xchg_release(__ai_ptr, __VA_ARGS__); \
})

#define xchg_relaxed(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg_acquire(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg_release(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg_relaxed(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg64(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg64(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg64_acquire(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg64_release(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg64_relaxed(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg128(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg128(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg128_acquire(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg128_acquire(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg128_release(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg128_release(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg128_relaxed(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg128_relaxed(__ai_ptr, __VA_ARGS__); \
})

#define try_cmpxchg(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg_acquire(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg_release(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg_relaxed(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg64(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg64_acquire(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg64_release(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg64_relaxed(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg128(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg128(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg128_acquire(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg128_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg128_release(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        kcsan_release(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg128_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg128_relaxed(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg128_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define cmpxchg_local(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg_local(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg64_local(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
})

#define cmpxchg128_local(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_cmpxchg128_local(__ai_ptr, __VA_ARGS__); \
})

#define sync_cmpxchg(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
})

#define try_cmpxchg_local(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg64_local(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg64_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define try_cmpxchg128_local(ptr, oldp, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        typeof(oldp) __ai_oldp = (oldp); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
        raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
})

#define sync_try_cmpxchg(ptr, ...) \
({ \
        typeof(ptr) __ai_ptr = (ptr); \
        kcsan_mb(); \
        instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
        raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \
})


#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
// ce5b65e0f1f8a276268b667194581d24bed219d4











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    6 






    6 





























































    6 




































































































































































































































































































































































































































































































































































































    6 

    9 
    6 










































































































































































    9 













    9 

    9 










































    6 















    6 


























































































































































































































    2 


















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/page-writeback.c
 *
 * Copyright (C) 2002, Linus Torvalds.
 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
 *
 * Contains functions related to writing back dirty pages at the
 * address_space level.
 *
 * 10Apr2002        Andrew Morton
 *                Initial version
 */

#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/pagevec.h>
#include <linux/timer.h>
#include <linux/sched/rt.h>
#include <linux/sched/signal.h>
#include <linux/mm_inline.h>
#include <trace/events/writeback.h>

#include "internal.h"

/*
 * Sleep at most 200ms at a time in balance_dirty_pages().
 */
#define MAX_PAUSE                max(HZ/5, 1)

/*
 * Try to keep balance_dirty_pages() call intervals higher than this many pages
 * by raising pause time to max_pause when falls below it.
 */
#define DIRTY_POLL_THRESH        (128 >> (PAGE_SHIFT - 10))

/*
 * Estimate write bandwidth at 200ms intervals.
 */
#define BANDWIDTH_INTERVAL        max(HZ/5, 1)

#define RATELIMIT_CALC_SHIFT        10

/*
 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
 * will look to see if it needs to force writeback or throttling.
 */
static long ratelimit_pages = 32;

/* The following parameters are exported via /proc/sys/vm */

/*
 * Start background writeback (via writeback threads) at this percentage
 */
static int dirty_background_ratio = 10;

/*
 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
 * dirty_background_ratio * the amount of dirtyable memory
 */
static unsigned long dirty_background_bytes;

/*
 * free highmem will not be subtracted from the total free memory
 * for calculating free ratios if vm_highmem_is_dirtyable is true
 */
static int vm_highmem_is_dirtyable;

/*
 * The generator of dirty data starts writeback at this percentage
 */
static int vm_dirty_ratio = 20;

/*
 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
 * vm_dirty_ratio * the amount of dirtyable memory
 */
static unsigned long vm_dirty_bytes;

/*
 * The interval between `kupdate'-style writebacks
 */
unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */

EXPORT_SYMBOL_GPL(dirty_writeback_interval);

/*
 * The longest time for which data is allowed to remain dirty
 */
unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */

/*
 * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
 * a full sync is triggered after this time elapses without any disk activity.
 */
int laptop_mode;

EXPORT_SYMBOL(laptop_mode);

/* End of sysctl-exported parameters */

struct wb_domain global_wb_domain;

/* consolidated parameters for balance_dirty_pages() and its subroutines */
struct dirty_throttle_control {
#ifdef CONFIG_CGROUP_WRITEBACK
        struct wb_domain        *dom;
        struct dirty_throttle_control *gdtc;        /* only set in memcg dtc's */
#endif
        struct bdi_writeback        *wb;
        struct fprop_local_percpu *wb_completions;

        unsigned long                avail;                /* dirtyable */
        unsigned long                dirty;                /* file_dirty + write + nfs */
        unsigned long                thresh;                /* dirty threshold */
        unsigned long                bg_thresh;        /* dirty background threshold */

        unsigned long                wb_dirty;        /* per-wb counterparts */
        unsigned long                wb_thresh;
        unsigned long                wb_bg_thresh;

        unsigned long                pos_ratio;
};

/*
 * Length of period for aging writeout fractions of bdis. This is an
 * arbitrarily chosen number. The longer the period, the slower fractions will
 * reflect changes in current writeout rate.
 */
#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)

#ifdef CONFIG_CGROUP_WRITEBACK

#define GDTC_INIT(__wb)                .wb = (__wb),                                \
                                .dom = &global_wb_domain,                \
                                .wb_completions = &(__wb)->completions

#define GDTC_INIT_NO_WB                .dom = &global_wb_domain

#define MDTC_INIT(__wb, __gdtc)        .wb = (__wb),                                \
                                .dom = mem_cgroup_wb_domain(__wb),        \
                                .wb_completions = &(__wb)->memcg_completions, \
                                .gdtc = __gdtc

static bool mdtc_valid(struct dirty_throttle_control *dtc)
{
        return dtc->dom;
}

static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
{
        return dtc->dom;
}

static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
{
        return mdtc->gdtc;
}

static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
{
        return &wb->memcg_completions;
}

static void wb_min_max_ratio(struct bdi_writeback *wb,
                             unsigned long *minp, unsigned long *maxp)
{
        unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
        unsigned long long min = wb->bdi->min_ratio;
        unsigned long long max = wb->bdi->max_ratio;

        /*
         * @wb may already be clean by the time control reaches here and
         * the total may not include its bw.
         */
        if (this_bw < tot_bw) {
                if (min) {
                        min *= this_bw;
                        min = div64_ul(min, tot_bw);
                }
                if (max < 100 * BDI_RATIO_SCALE) {
                        max *= this_bw;
                        max = div64_ul(max, tot_bw);
                }
        }

        *minp = min;
        *maxp = max;
}

#else        /* CONFIG_CGROUP_WRITEBACK */

#define GDTC_INIT(__wb)                .wb = (__wb),                           \
                                .wb_completions = &(__wb)->completions
#define GDTC_INIT_NO_WB
#define MDTC_INIT(__wb, __gdtc)

static bool mdtc_valid(struct dirty_throttle_control *dtc)
{
        return false;
}

static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
{
        return &global_wb_domain;
}

static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
{
        return NULL;
}

static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
{
        return NULL;
}

static void wb_min_max_ratio(struct bdi_writeback *wb,
                             unsigned long *minp, unsigned long *maxp)
{
        *minp = wb->bdi->min_ratio;
        *maxp = wb->bdi->max_ratio;
}

#endif        /* CONFIG_CGROUP_WRITEBACK */

/*
 * In a memory zone, there is a certain amount of pages we consider
 * available for the page cache, which is essentially the number of
 * free and reclaimable pages, minus some zone reserves to protect
 * lowmem and the ability to uphold the zone's watermarks without
 * requiring writeback.
 *
 * This number of dirtyable pages is the base value of which the
 * user-configurable dirty ratio is the effective number of pages that
 * are allowed to be actually dirtied.  Per individual zone, or
 * globally by using the sum of dirtyable pages over all zones.
 *
 * Because the user is allowed to specify the dirty limit globally as
 * absolute number of bytes, calculating the per-zone dirty limit can
 * require translating the configured limit into a percentage of
 * global dirtyable memory first.
 */

/**
 * node_dirtyable_memory - number of dirtyable pages in a node
 * @pgdat: the node
 *
 * Return: the node's number of pages potentially available for dirty
 * page cache.  This is the base value for the per-node dirty limits.
 */
static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
{
        unsigned long nr_pages = 0;
        int z;

        for (z = 0; z < MAX_NR_ZONES; z++) {
                struct zone *zone = pgdat->node_zones + z;

                if (!populated_zone(zone))
                        continue;

                nr_pages += zone_page_state(zone, NR_FREE_PAGES);
        }

        /*
         * Pages reserved for the kernel should not be considered
         * dirtyable, to prevent a situation where reclaim has to
         * clean pages in order to balance the zones.
         */
        nr_pages -= min(nr_pages, pgdat->totalreserve_pages);

        nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
        nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);

        return nr_pages;
}

static unsigned long highmem_dirtyable_memory(unsigned long total)
{
#ifdef CONFIG_HIGHMEM
        int node;
        unsigned long x = 0;
        int i;

        for_each_node_state(node, N_HIGH_MEMORY) {
                for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
                        struct zone *z;
                        unsigned long nr_pages;

                        if (!is_highmem_idx(i))
                                continue;

                        z = &NODE_DATA(node)->node_zones[i];
                        if (!populated_zone(z))
                                continue;

                        nr_pages = zone_page_state(z, NR_FREE_PAGES);
                        /* watch for underflows */
                        nr_pages -= min(nr_pages, high_wmark_pages(z));
                        nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
                        nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
                        x += nr_pages;
                }
        }

        /*
         * Make sure that the number of highmem pages is never larger
         * than the number of the total dirtyable memory. This can only
         * occur in very strange VM situations but we want to make sure
         * that this does not occur.
         */
        return min(x, total);
#else
        return 0;
#endif
}

/**
 * global_dirtyable_memory - number of globally dirtyable pages
 *
 * Return: the global number of pages potentially available for dirty
 * page cache.  This is the base value for the global dirty limits.
 */
static unsigned long global_dirtyable_memory(void)
{
        unsigned long x;

        x = global_zone_page_state(NR_FREE_PAGES);
        /*
         * Pages reserved for the kernel should not be considered
         * dirtyable, to prevent a situation where reclaim has to
         * clean pages in order to balance the zones.
         */
        x -= min(x, totalreserve_pages);

        x += global_node_page_state(NR_INACTIVE_FILE);
        x += global_node_page_state(NR_ACTIVE_FILE);

        if (!vm_highmem_is_dirtyable)
                x -= highmem_dirtyable_memory(x);

        return x + 1;        /* Ensure that we never return 0 */
}

/**
 * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain
 * @dtc: dirty_throttle_control of interest
 *
 * Calculate @dtc->thresh and ->bg_thresh considering
 * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}.  The caller
 * must ensure that @dtc->avail is set before calling this function.  The
 * dirty limits will be lifted by 1/4 for real-time tasks.
 */
static void domain_dirty_limits(struct dirty_throttle_control *dtc)
{
        const unsigned long available_memory = dtc->avail;
        struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
        unsigned long bytes = vm_dirty_bytes;
        unsigned long bg_bytes = dirty_background_bytes;
        /* convert ratios to per-PAGE_SIZE for higher precision */
        unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
        unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
        unsigned long thresh;
        unsigned long bg_thresh;
        struct task_struct *tsk;

        /* gdtc is !NULL iff @dtc is for memcg domain */
        if (gdtc) {
                unsigned long global_avail = gdtc->avail;

                /*
                 * The byte settings can't be applied directly to memcg
                 * domains.  Convert them to ratios by scaling against
                 * globally available memory.  As the ratios are in
                 * per-PAGE_SIZE, they can be obtained by dividing bytes by
                 * number of pages.
                 */
                if (bytes)
                        ratio = min(DIV_ROUND_UP(bytes, global_avail),
                                    PAGE_SIZE);
                if (bg_bytes)
                        bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
                                       PAGE_SIZE);
                bytes = bg_bytes = 0;
        }

        if (bytes)
                thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
        else
                thresh = (ratio * available_memory) / PAGE_SIZE;

        if (bg_bytes)
                bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
        else
                bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;

        if (bg_thresh >= thresh)
                bg_thresh = thresh / 2;
        tsk = current;
        if (rt_task(tsk)) {
                bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
                thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
        }
        dtc->thresh = thresh;
        dtc->bg_thresh = bg_thresh;

        /* we should eventually report the domain in the TP */
        if (!gdtc)
                trace_global_dirty_state(bg_thresh, thresh);
}

/**
 * global_dirty_limits - background-writeback and dirty-throttling thresholds
 * @pbackground: out parameter for bg_thresh
 * @pdirty: out parameter for thresh
 *
 * Calculate bg_thresh and thresh for global_wb_domain.  See
 * domain_dirty_limits() for details.
 */
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
{
        struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };

        gdtc.avail = global_dirtyable_memory();
        domain_dirty_limits(&gdtc);

        *pbackground = gdtc.bg_thresh;
        *pdirty = gdtc.thresh;
}

/**
 * node_dirty_limit - maximum number of dirty pages allowed in a node
 * @pgdat: the node
 *
 * Return: the maximum number of dirty pages allowed in a node, based
 * on the node's dirtyable memory.
 */
static unsigned long node_dirty_limit(struct pglist_data *pgdat)
{
        unsigned long node_memory = node_dirtyable_memory(pgdat);
        struct task_struct *tsk = current;
        unsigned long dirty;

        if (vm_dirty_bytes)
                dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
                        node_memory / global_dirtyable_memory();
        else
                dirty = vm_dirty_ratio * node_memory / 100;

        if (rt_task(tsk))
                dirty += dirty / 4;

        return dirty;
}

/**
 * node_dirty_ok - tells whether a node is within its dirty limits
 * @pgdat: the node to check
 *
 * Return: %true when the dirty pages in @pgdat are within the node's
 * dirty limit, %false if the limit is exceeded.
 */
bool node_dirty_ok(struct pglist_data *pgdat)
{
        unsigned long limit = node_dirty_limit(pgdat);
        unsigned long nr_pages = 0;

        nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
        nr_pages += node_page_state(pgdat, NR_WRITEBACK);

        return nr_pages <= limit;
}

#ifdef CONFIG_SYSCTL
static int dirty_background_ratio_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret;

        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                dirty_background_bytes = 0;
        return ret;
}

static int dirty_background_bytes_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        int ret;

        ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                dirty_background_ratio = 0;
        return ret;
}

static int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
                size_t *lenp, loff_t *ppos)
{
        int old_ratio = vm_dirty_ratio;
        int ret;

        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
                writeback_set_ratelimit();
                vm_dirty_bytes = 0;
        }
        return ret;
}

static int dirty_bytes_handler(struct ctl_table *table, int write,
                void *buffer, size_t *lenp, loff_t *ppos)
{
        unsigned long old_bytes = vm_dirty_bytes;
        int ret;

        ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
                writeback_set_ratelimit();
                vm_dirty_ratio = 0;
        }
        return ret;
}
#endif

static unsigned long wp_next_time(unsigned long cur_time)
{
        cur_time += VM_COMPLETIONS_PERIOD_LEN;
        /* 0 has a special meaning... */
        if (!cur_time)
                return 1;
        return cur_time;
}

static void wb_domain_writeout_add(struct wb_domain *dom,
                                   struct fprop_local_percpu *completions,
                                   unsigned int max_prop_frac, long nr)
{
        __fprop_add_percpu_max(&dom->completions, completions,
                               max_prop_frac, nr);
        /* First event after period switching was turned off? */
        if (unlikely(!dom->period_time)) {
                /*
                 * We can race with other __bdi_writeout_inc calls here but
                 * it does not cause any harm since the resulting time when
                 * timer will fire and what is in writeout_period_time will be
                 * roughly the same.
                 */
                dom->period_time = wp_next_time(jiffies);
                mod_timer(&dom->period_timer, dom->period_time);
        }
}

/*
 * Increment @wb's writeout completion count and the global writeout
 * completion count. Called from __folio_end_writeback().
 */
static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr)
{
        struct wb_domain *cgdom;

        wb_stat_mod(wb, WB_WRITTEN, nr);
        wb_domain_writeout_add(&global_wb_domain, &wb->completions,
                               wb->bdi->max_prop_frac, nr);

        cgdom = mem_cgroup_wb_domain(wb);
        if (cgdom)
                wb_domain_writeout_add(cgdom, wb_memcg_completions(wb),
                                       wb->bdi->max_prop_frac, nr);
}

void wb_writeout_inc(struct bdi_writeback *wb)
{
        unsigned long flags;

        local_irq_save(flags);
        __wb_writeout_add(wb, 1);
        local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(wb_writeout_inc);

/*
 * On idle system, we can be called long after we scheduled because we use
 * deferred timers so count with missed periods.
 */
static void writeout_period(struct timer_list *t)
{
        struct wb_domain *dom = from_timer(dom, t, period_timer);
        int miss_periods = (jiffies - dom->period_time) /
                                                 VM_COMPLETIONS_PERIOD_LEN;

        if (fprop_new_period(&dom->completions, miss_periods + 1)) {
                dom->period_time = wp_next_time(dom->period_time +
                                miss_periods * VM_COMPLETIONS_PERIOD_LEN);
                mod_timer(&dom->period_timer, dom->period_time);
        } else {
                /*
                 * Aging has zeroed all fractions. Stop wasting CPU on period
                 * updates.
                 */
                dom->period_time = 0;
        }
}

int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
{
        memset(dom, 0, sizeof(*dom));

        spin_lock_init(&dom->lock);

        timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);

        dom->dirty_limit_tstamp = jiffies;

        return fprop_global_init(&dom->completions, gfp);
}

#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom)
{
        del_timer_sync(&dom->period_timer);
        fprop_global_destroy(&dom->completions);
}
#endif

/*
 * bdi_min_ratio keeps the sum of the minimum dirty shares of all
 * registered backing devices, which, for obvious reasons, can not
 * exceed 100%.
 */
static unsigned int bdi_min_ratio;

static int bdi_check_pages_limit(unsigned long pages)
{
        unsigned long max_dirty_pages = global_dirtyable_memory();

        if (pages > max_dirty_pages)
                return -EINVAL;

        return 0;
}

static unsigned long bdi_ratio_from_pages(unsigned long pages)
{
        unsigned long background_thresh;
        unsigned long dirty_thresh;
        unsigned long ratio;

        global_dirty_limits(&background_thresh, &dirty_thresh);
        ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh);

        return ratio;
}

static u64 bdi_get_bytes(unsigned int ratio)
{
        unsigned long background_thresh;
        unsigned long dirty_thresh;
        u64 bytes;

        global_dirty_limits(&background_thresh, &dirty_thresh);
        bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100;

        return bytes;
}

static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
{
        unsigned int delta;
        int ret = 0;

        if (min_ratio > 100 * BDI_RATIO_SCALE)
                return -EINVAL;

        spin_lock_bh(&bdi_lock);
        if (min_ratio > bdi->max_ratio) {
                ret = -EINVAL;
        } else {
                if (min_ratio < bdi->min_ratio) {
                        delta = bdi->min_ratio - min_ratio;
                        bdi_min_ratio -= delta;
                        bdi->min_ratio = min_ratio;
                } else {
                        delta = min_ratio - bdi->min_ratio;
                        if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) {
                                bdi_min_ratio += delta;
                                bdi->min_ratio = min_ratio;
                        } else {
                                ret = -EINVAL;
                        }
                }
        }
        spin_unlock_bh(&bdi_lock);

        return ret;
}

static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
{
        int ret = 0;

        if (max_ratio > 100 * BDI_RATIO_SCALE)
                return -EINVAL;

        spin_lock_bh(&bdi_lock);
        if (bdi->min_ratio > max_ratio) {
                ret = -EINVAL;
        } else {
                bdi->max_ratio = max_ratio;
                bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) /
                                                (100 * BDI_RATIO_SCALE);
        }
        spin_unlock_bh(&bdi_lock);

        return ret;
}

int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio)
{
        return __bdi_set_min_ratio(bdi, min_ratio);
}

int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio)
{
        return __bdi_set_max_ratio(bdi, max_ratio);
}

int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
{
        return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE);
}

int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
{
        return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE);
}
EXPORT_SYMBOL(bdi_set_max_ratio);

u64 bdi_get_min_bytes(struct backing_dev_info *bdi)
{
        return bdi_get_bytes(bdi->min_ratio);
}

int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes)
{
        int ret;
        unsigned long pages = min_bytes >> PAGE_SHIFT;
        unsigned long min_ratio;

        ret = bdi_check_pages_limit(pages);
        if (ret)
                return ret;

        min_ratio = bdi_ratio_from_pages(pages);
        return __bdi_set_min_ratio(bdi, min_ratio);
}

u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
{
        return bdi_get_bytes(bdi->max_ratio);
}

int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes)
{
        int ret;
        unsigned long pages = max_bytes >> PAGE_SHIFT;
        unsigned long max_ratio;

        ret = bdi_check_pages_limit(pages);
        if (ret)
                return ret;

        max_ratio = bdi_ratio_from_pages(pages);
        return __bdi_set_max_ratio(bdi, max_ratio);
}

int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
{
        if (strict_limit > 1)
                return -EINVAL;

        spin_lock_bh(&bdi_lock);
        if (strict_limit)
                bdi->capabilities |= BDI_CAP_STRICTLIMIT;
        else
                bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
        spin_unlock_bh(&bdi_lock);

        return 0;
}

static unsigned long dirty_freerun_ceiling(unsigned long thresh,
                                           unsigned long bg_thresh)
{
        return (thresh + bg_thresh) / 2;
}

static unsigned long hard_dirty_limit(struct wb_domain *dom,
                                      unsigned long thresh)
{
        return max(thresh, dom->dirty_limit);
}

/*
 * Memory which can be further allocated to a memcg domain is capped by
 * system-wide clean memory excluding the amount being used in the domain.
 */
static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
                            unsigned long filepages, unsigned long headroom)
{
        struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
        unsigned long clean = filepages - min(filepages, mdtc->dirty);
        unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
        unsigned long other_clean = global_clean - min(global_clean, clean);

        mdtc->avail = filepages + min(headroom, other_clean);
}

/**
 * __wb_calc_thresh - @wb's share of dirty throttling threshold
 * @dtc: dirty_throttle_context of interest
 *
 * Note that balance_dirty_pages() will only seriously take it as a hard limit
 * when sleeping max_pause per page is not enough to keep the dirty pages under
 * control. For example, when the device is completely stalled due to some error
 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
 * In the other normal situations, it acts more gently by throttling the tasks
 * more (rather than completely block them) when the wb dirty pages go high.
 *
 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
 * - starving fast devices
 * - piling up dirty pages (that will take long time to sync) on slow devices
 *
 * The wb's share of dirty limit will be adapting to its throughput and
 * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
 *
 * Return: @wb's dirty limit in pages. The term "dirty" in the context of
 * dirty balancing includes all PG_dirty and PG_writeback pages.
 */
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
{
        struct wb_domain *dom = dtc_dom(dtc);
        unsigned long thresh = dtc->thresh;
        u64 wb_thresh;
        unsigned long numerator, denominator;
        unsigned long wb_min_ratio, wb_max_ratio;

        /*
         * Calculate this BDI's share of the thresh ratio.
         */
        fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
                              &numerator, &denominator);

        wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
        wb_thresh *= numerator;
        wb_thresh = div64_ul(wb_thresh, denominator);

        wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);

        wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
        if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
                wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);

        return wb_thresh;
}

unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
{
        struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
                                               .thresh = thresh };
        return __wb_calc_thresh(&gdtc);
}

/*
 *                           setpoint - dirty 3
 *        f(dirty) := 1.0 + (----------------)
 *                           limit - setpoint
 *
 * it's a 3rd order polynomial that subjects to
 *
 * (1) f(freerun)  = 2.0 => rampup dirty_ratelimit reasonably fast
 * (2) f(setpoint) = 1.0 => the balance point
 * (3) f(limit)    = 0   => the hard limit
 * (4) df/dx      <= 0         => negative feedback control
 * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
 *     => fast response on large errors; small oscillation near setpoint
 */
static long long pos_ratio_polynom(unsigned long setpoint,
                                          unsigned long dirty,
                                          unsigned long limit)
{
        long long pos_ratio;
        long x;

        x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                      (limit - setpoint) | 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
        pos_ratio += 1 << RATELIMIT_CALC_SHIFT;

        return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
}

/*
 * Dirty position control.
 *
 * (o) global/bdi setpoints
 *
 * We want the dirty pages be balanced around the global/wb setpoints.
 * When the number of dirty pages is higher/lower than the setpoint, the
 * dirty position control ratio (and hence task dirty ratelimit) will be
 * decreased/increased to bring the dirty pages back to the setpoint.
 *
 *     pos_ratio = 1 << RATELIMIT_CALC_SHIFT
 *
 *     if (dirty < setpoint) scale up   pos_ratio
 *     if (dirty > setpoint) scale down pos_ratio
 *
 *     if (wb_dirty < wb_setpoint) scale up   pos_ratio
 *     if (wb_dirty > wb_setpoint) scale down pos_ratio
 *
 *     task_ratelimit = dirty_ratelimit * pos_ratio >> RATELIMIT_CALC_SHIFT
 *
 * (o) global control line
 *
 *     ^ pos_ratio
 *     |
 *     |            |<===== global dirty control scope ======>|
 * 2.0  * * * * * * *
 *     |            .*
 *     |            . *
 *     |            .   *
 *     |            .     *
 *     |            .        *
 *     |            .            *
 * 1.0 ................................*
 *     |            .                  .     *
 *     |            .                  .          *
 *     |            .                  .              *
 *     |            .                  .                 *
 *     |            .                  .                    *
 *   0 +------------.------------------.----------------------*------------->
 *           freerun^          setpoint^                 limit^   dirty pages
 *
 * (o) wb control line
 *
 *     ^ pos_ratio
 *     |
 *     |            *
 *     |              *
 *     |                *
 *     |                  *
 *     |                    * |<=========== span ============>|
 * 1.0 .......................*
 *     |                      . *
 *     |                      .   *
 *     |                      .     *
 *     |                      .       *
 *     |                      .         *
 *     |                      .           *
 *     |                      .             *
 *     |                      .               *
 *     |                      .                 *
 *     |                      .                   *
 *     |                      .                     *
 * 1/4 ...............................................* * * * * * * * * * * *
 *     |                      .                         .
 *     |                      .                           .
 *     |                      .                             .
 *   0 +----------------------.-------------------------------.------------->
 *                wb_setpoint^                    x_intercept^
 *
 * The wb control line won't drop below pos_ratio=1/4, so that wb_dirty can
 * be smoothly throttled down to normal if it starts high in situations like
 * - start writing to a slow SD card and a fast disk at the same time. The SD
 *   card's wb_dirty may rush to many times higher than wb_setpoint.
 * - the wb dirty thresh drops quickly due to change of JBOD workload
 */
static void wb_position_ratio(struct dirty_throttle_control *dtc)
{
        struct bdi_writeback *wb = dtc->wb;
        unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
        unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
        unsigned long wb_thresh = dtc->wb_thresh;
        unsigned long x_intercept;
        unsigned long setpoint;                /* dirty pages' target balance point */
        unsigned long wb_setpoint;
        unsigned long span;
        long long pos_ratio;                /* for scaling up/down the rate limit */
        long x;

        dtc->pos_ratio = 0;

        if (unlikely(dtc->dirty >= limit))
                return;

        /*
         * global setpoint
         *
         * See comment for pos_ratio_polynom().
         */
        setpoint = (freerun + limit) / 2;
        pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);

        /*
         * The strictlimit feature is a tool preventing mistrusted filesystems
         * from growing a large number of dirty pages before throttling. For
         * such filesystems balance_dirty_pages always checks wb counters
         * against wb limits. Even if global "nr_dirty" is under "freerun".
         * This is especially important for fuse which sets bdi->max_ratio to
         * 1% by default. Without strictlimit feature, fuse writeback may
         * consume arbitrary amount of RAM because it is accounted in
         * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
         *
         * Here, in wb_position_ratio(), we calculate pos_ratio based on
         * two values: wb_dirty and wb_thresh. Let's consider an example:
         * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
         * limits are set by default to 10% and 20% (background and throttle).
         * Then wb_thresh is 1% of 20% of 16GB. This amounts to ~8K pages.
         * wb_calc_thresh(wb, bg_thresh) is about ~4K pages. wb_setpoint is
         * about ~6K pages (as the average of background and throttle wb
         * limits). The 3rd order polynomial will provide positive feedback if
         * wb_dirty is under wb_setpoint and vice versa.
         *
         * Note, that we cannot use global counters in these calculations
         * because we want to throttle process writing to a strictlimit wb
         * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB
         * in the example above).
         */
        if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
                long long wb_pos_ratio;

                if (dtc->wb_dirty < 8) {
                        dtc->pos_ratio = min_t(long long, pos_ratio * 2,
                                           2 << RATELIMIT_CALC_SHIFT);
                        return;
                }

                if (dtc->wb_dirty >= wb_thresh)
                        return;

                wb_setpoint = dirty_freerun_ceiling(wb_thresh,
                                                    dtc->wb_bg_thresh);

                if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
                        return;

                wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
                                                 wb_thresh);

                /*
                 * Typically, for strictlimit case, wb_setpoint << setpoint
                 * and pos_ratio >> wb_pos_ratio. In the other words global
                 * state ("dirty") is not limiting factor and we have to
                 * make decision based on wb counters. But there is an
                 * important case when global pos_ratio should get precedence:
                 * global limits are exceeded (e.g. due to activities on other
                 * wb's) while given strictlimit wb is below limit.
                 *
                 * "pos_ratio * wb_pos_ratio" would work for the case above,
                 * but it would look too non-natural for the case of all
                 * activity in the system coming from a single strictlimit wb
                 * with bdi->max_ratio == 100%.
                 *
                 * Note that min() below somewhat changes the dynamics of the
                 * control system. Normally, pos_ratio value can be well over 3
                 * (when globally we are at freerun and wb is well below wb
                 * setpoint). Now the maximum pos_ratio in the same situation
                 * is 2. We might want to tweak this if we observe the control
                 * system is too slow to adapt.
                 */
                dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
                return;
        }

        /*
         * We have computed basic pos_ratio above based on global situation. If
         * the wb is over/under its share of dirty pages, we want to scale
         * pos_ratio further down/up. That is done by the following mechanism.
         */

        /*
         * wb setpoint
         *
         *        f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint)
         *
         *                        x_intercept - wb_dirty
         *                     := --------------------------
         *                        x_intercept - wb_setpoint
         *
         * The main wb control line is a linear function that subjects to
         *
         * (1) f(wb_setpoint) = 1.0
         * (2) k = - 1 / (8 * write_bw)  (in single wb case)
         *     or equally: x_intercept = wb_setpoint + 8 * write_bw
         *
         * For single wb case, the dirty pages are observed to fluctuate
         * regularly within range
         *        [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2]
         * for various filesystems, where (2) can yield in a reasonable 12.5%
         * fluctuation range for pos_ratio.
         *
         * For JBOD case, wb_thresh (not wb_dirty!) could fluctuate up to its
         * own size, so move the slope over accordingly and choose a slope that
         * yields 100% pos_ratio fluctuation on suddenly doubled wb_thresh.
         */
        if (unlikely(wb_thresh > dtc->thresh))
                wb_thresh = dtc->thresh;
        /*
         * It's very possible that wb_thresh is close to 0 not because the
         * device is slow, but that it has remained inactive for long time.
         * Honour such devices a reasonable good (hopefully IO efficient)
         * threshold, so that the occasional writes won't be blocked and active
         * writes can rampup the threshold quickly.
         */
        wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
        /*
         * scale global setpoint to wb's:
         *        wb_setpoint = setpoint * wb_thresh / thresh
         */
        x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
        wb_setpoint = setpoint * (u64)x >> 16;
        /*
         * Use span=(8*write_bw) in single wb case as indicated by
         * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case.
         *
         *        wb_thresh                    thresh - wb_thresh
         * span = --------- * (8 * write_bw) + ------------------ * wb_thresh
         *         thresh                           thresh
         */
        span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
        x_intercept = wb_setpoint + span;

        if (dtc->wb_dirty < x_intercept - span / 4) {
                pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
                                      (x_intercept - wb_setpoint) | 1);
        } else
                pos_ratio /= 4;

        /*
         * wb reserve area, safeguard against dirty pool underrun and disk idle
         * It may push the desired control point of global dirty pages higher
         * than setpoint.
         */
        x_intercept = wb_thresh / 2;
        if (dtc->wb_dirty < x_intercept) {
                if (dtc->wb_dirty > x_intercept / 8)
                        pos_ratio = div_u64(pos_ratio * x_intercept,
                                            dtc->wb_dirty);
                else
                        pos_ratio *= 8;
        }

        dtc->pos_ratio = pos_ratio;
}

static void wb_update_write_bandwidth(struct bdi_writeback *wb,
                                      unsigned long elapsed,
                                      unsigned long written)
{
        const unsigned long period = roundup_pow_of_two(3 * HZ);
        unsigned long avg = wb->avg_write_bandwidth;
        unsigned long old = wb->write_bandwidth;
        u64 bw;

        /*
         * bw = written * HZ / elapsed
         *
         *                   bw * elapsed + write_bandwidth * (period - elapsed)
         * write_bandwidth = ---------------------------------------------------
         *                                          period
         *
         * @written may have decreased due to folio_redirty_for_writepage().
         * Avoid underflowing @bw calculation.
         */
        bw = written - min(written, wb->written_stamp);
        bw *= HZ;
        if (unlikely(elapsed > period)) {
                bw = div64_ul(bw, elapsed);
                avg = bw;
                goto out;
        }
        bw += (u64)wb->write_bandwidth * (period - elapsed);
        bw >>= ilog2(period);

        /*
         * one more level of smoothing, for filtering out sudden spikes
         */
        if (avg > old && old >= (unsigned long)bw)
                avg -= (avg - old) >> 3;

        if (avg < old && old <= (unsigned long)bw)
                avg += (old - avg) >> 3;

out:
        /* keep avg > 0 to guarantee that tot > 0 if there are dirty wbs */
        avg = max(avg, 1LU);
        if (wb_has_dirty_io(wb)) {
                long delta = avg - wb->avg_write_bandwidth;
                WARN_ON_ONCE(atomic_long_add_return(delta,
                                        &wb->bdi->tot_write_bandwidth) <= 0);
        }
        wb->write_bandwidth = bw;
        WRITE_ONCE(wb->avg_write_bandwidth, avg);
}

static void update_dirty_limit(struct dirty_throttle_control *dtc)
{
        struct wb_domain *dom = dtc_dom(dtc);
        unsigned long thresh = dtc->thresh;
        unsigned long limit = dom->dirty_limit;

        /*
         * Follow up in one step.
         */
        if (limit < thresh) {
                limit = thresh;
                goto update;
        }

        /*
         * Follow down slowly. Use the higher one as the target, because thresh
         * may drop below dirty. This is exactly the reason to introduce
         * dom->dirty_limit which is guaranteed to lie above the dirty pages.
         */
        thresh = max(thresh, dtc->dirty);
        if (limit > thresh) {
                limit -= (limit - thresh) >> 5;
                goto update;
        }
        return;
update:
        dom->dirty_limit = limit;
}

static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
                                      unsigned long now)
{
        struct wb_domain *dom = dtc_dom(dtc);

        /*
         * check locklessly first to optimize away locking for the most time
         */
        if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
                return;

        spin_lock(&dom->lock);
        if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
                update_dirty_limit(dtc);
                dom->dirty_limit_tstamp = now;
        }
        spin_unlock(&dom->lock);
}

/*
 * Maintain wb->dirty_ratelimit, the base dirty throttle rate.
 *
 * Normal wb tasks will be curbed at or below it in long term.
 * Obviously it should be around (write_bw / N) when there are N dd tasks.
 */
static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
                                      unsigned long dirtied,
                                      unsigned long elapsed)
{
        struct bdi_writeback *wb = dtc->wb;
        unsigned long dirty = dtc->dirty;
        unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
        unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
        unsigned long setpoint = (freerun + limit) / 2;
        unsigned long write_bw = wb->avg_write_bandwidth;
        unsigned long dirty_ratelimit = wb->dirty_ratelimit;
        unsigned long dirty_rate;
        unsigned long task_ratelimit;
        unsigned long balanced_dirty_ratelimit;
        unsigned long step;
        unsigned long x;
        unsigned long shift;

        /*
         * The dirty rate will match the writeout rate in long term, except
         * when dirty pages are truncated by userspace or re-dirtied by FS.
         */
        dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;

        /*
         * task_ratelimit reflects each dd's dirty rate for the past 200ms.
         */
        task_ratelimit = (u64)dirty_ratelimit *
                                        dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
        task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */

        /*
         * A linear estimation of the "balanced" throttle rate. The theory is,
         * if there are N dd tasks, each throttled at task_ratelimit, the wb's
         * dirty_rate will be measured to be (N * task_ratelimit). So the below
         * formula will yield the balanced rate limit (write_bw / N).
         *
         * Note that the expanded form is not a pure rate feedback:
         *        rate_(i+1) = rate_(i) * (write_bw / dirty_rate)                     (1)
         * but also takes pos_ratio into account:
         *        rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio  (2)
         *
         * (1) is not realistic because pos_ratio also takes part in balancing
         * the dirty rate.  Consider the state
         *        pos_ratio = 0.5                                                     (3)
         *        rate = 2 * (write_bw / N)                                     (4)
         * If (1) is used, it will stuck in that state! Because each dd will
         * be throttled at
         *        task_ratelimit = pos_ratio * rate = (write_bw / N)             (5)
         * yielding
         *        dirty_rate = N * task_ratelimit = write_bw                     (6)
         * put (6) into (1) we get
         *        rate_(i+1) = rate_(i)                                             (7)
         *
         * So we end up using (2) to always keep
         *        rate_(i+1) ~= (write_bw / N)                                     (8)
         * regardless of the value of pos_ratio. As long as (8) is satisfied,
         * pos_ratio is able to drive itself to 1.0, which is not only where
         * the dirty count meet the setpoint, but also where the slope of
         * pos_ratio is most flat and hence task_ratelimit is least fluctuated.
         */
        balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
                                           dirty_rate | 1);
        /*
         * balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw
         */
        if (unlikely(balanced_dirty_ratelimit > write_bw))
                balanced_dirty_ratelimit = write_bw;

        /*
         * We could safely do this and return immediately:
         *
         *        wb->dirty_ratelimit = balanced_dirty_ratelimit;
         *
         * However to get a more stable dirty_ratelimit, the below elaborated
         * code makes use of task_ratelimit to filter out singular points and
         * limit the step size.
         *
         * The below code essentially only uses the relative value of
         *
         *        task_ratelimit - dirty_ratelimit
         *        = (pos_ratio - 1) * dirty_ratelimit
         *
         * which reflects the direction and size of dirty position error.
         */

        /*
         * dirty_ratelimit will follow balanced_dirty_ratelimit iff
         * task_ratelimit is on the same side of dirty_ratelimit, too.
         * For example, when
         * - dirty_ratelimit > balanced_dirty_ratelimit
         * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint)
         * lowering dirty_ratelimit will help meet both the position and rate
         * control targets. Otherwise, don't update dirty_ratelimit if it will
         * only help meet the rate target. After all, what the users ultimately
         * feel and care are stable dirty rate and small position error.
         *
         * |task_ratelimit - dirty_ratelimit| is used to limit the step size
         * and filter out the singular points of balanced_dirty_ratelimit. Which
         * keeps jumping around randomly and can even leap far away at times
         * due to the small 200ms estimation period of dirty_rate (we want to
         * keep that period small to reduce time lags).
         */
        step = 0;

        /*
         * For strictlimit case, calculations above were based on wb counters
         * and limits (starting from pos_ratio = wb_position_ratio() and up to
         * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
         * Hence, to calculate "step" properly, we have to use wb_dirty as
         * "dirty" and wb_setpoint as "setpoint".
         *
         * We rampup dirty_ratelimit forcibly if wb_dirty is low because
         * it's possible that wb_thresh is close to zero due to inactivity
         * of backing device.
         */
        if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
                dirty = dtc->wb_dirty;
                if (dtc->wb_dirty < 8)
                        setpoint = dtc->wb_dirty + 1;
                else
                        setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
        }

        if (dirty < setpoint) {
                x = min3(wb->balanced_dirty_ratelimit,
                         balanced_dirty_ratelimit, task_ratelimit);
                if (dirty_ratelimit < x)
                        step = x - dirty_ratelimit;
        } else {
                x = max3(wb->balanced_dirty_ratelimit,
                         balanced_dirty_ratelimit, task_ratelimit);
                if (dirty_ratelimit > x)
                        step = dirty_ratelimit - x;
        }

        /*
         * Don't pursue 100% rate matching. It's impossible since the balanced
         * rate itself is constantly fluctuating. So decrease the track speed
         * when it gets close to the target. Helps eliminate pointless tremors.
         */
        shift = dirty_ratelimit / (2 * step + 1);
        if (shift < BITS_PER_LONG)
                step = DIV_ROUND_UP(step >> shift, 8);
        else
                step = 0;

        if (dirty_ratelimit < balanced_dirty_ratelimit)
                dirty_ratelimit += step;
        else
                dirty_ratelimit -= step;

        WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
        wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;

        trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
}

static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
                                  struct dirty_throttle_control *mdtc,
                                  bool update_ratelimit)
{
        struct bdi_writeback *wb = gdtc->wb;
        unsigned long now = jiffies;
        unsigned long elapsed;
        unsigned long dirtied;
        unsigned long written;

        spin_lock(&wb->list_lock);

        /*
         * Lockless checks for elapsed time are racy and delayed update after
         * IO completion doesn't do it at all (to make sure written pages are
         * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
         * division errors.
         */
        elapsed = max(now - wb->bw_time_stamp, 1UL);
        dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
        written = percpu_counter_read(&wb->stat[WB_WRITTEN]);

        if (update_ratelimit) {
                domain_update_dirty_limit(gdtc, now);
                wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);

                /*
                 * @mdtc is always NULL if !CGROUP_WRITEBACK but the
                 * compiler has no way to figure that out.  Help it.
                 */
                if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
                        domain_update_dirty_limit(mdtc, now);
                        wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
                }
        }
        wb_update_write_bandwidth(wb, elapsed, written);

        wb->dirtied_stamp = dirtied;
        wb->written_stamp = written;
        WRITE_ONCE(wb->bw_time_stamp, now);
        spin_unlock(&wb->list_lock);
}

void wb_update_bandwidth(struct bdi_writeback *wb)
{
        struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };

        __wb_update_bandwidth(&gdtc, NULL, false);
}

/* Interval after which we consider wb idle and don't estimate bandwidth */
#define WB_BANDWIDTH_IDLE_JIF (HZ)

static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
{
        unsigned long now = jiffies;
        unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);

        if (elapsed > WB_BANDWIDTH_IDLE_JIF &&
            !atomic_read(&wb->writeback_inodes)) {
                spin_lock(&wb->list_lock);
                wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
                wb->written_stamp = wb_stat(wb, WB_WRITTEN);
                WRITE_ONCE(wb->bw_time_stamp, now);
                spin_unlock(&wb->list_lock);
        }
}

/*
 * After a task dirtied this many pages, balance_dirty_pages_ratelimited()
 * will look to see if it needs to start dirty throttling.
 *
 * If dirty_poll_interval is too low, big NUMA machines will call the expensive
 * global_zone_page_state() too often. So scale it near-sqrt to the safety margin
 * (the number of pages we may dirty without exceeding the dirty limits).
 */
static unsigned long dirty_poll_interval(unsigned long dirty,
                                         unsigned long thresh)
{
        if (thresh > dirty)
                return 1UL << (ilog2(thresh - dirty) >> 1);

        return 1;
}

static unsigned long wb_max_pause(struct bdi_writeback *wb,
                                  unsigned long wb_dirty)
{
        unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
        unsigned long t;

        /*
         * Limit pause time for small memory systems. If sleeping for too long
         * time, a small pool of dirty/writeback pages may go empty and disk go
         * idle.
         *
         * 8 serves as the safety ratio.
         */
        t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
        t++;

        return min_t(unsigned long, t, MAX_PAUSE);
}

static long wb_min_pause(struct bdi_writeback *wb,
                         long max_pause,
                         unsigned long task_ratelimit,
                         unsigned long dirty_ratelimit,
                         int *nr_dirtied_pause)
{
        long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
        long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
        long t;                /* target pause */
        long pause;        /* estimated next pause */
        int pages;        /* target nr_dirtied_pause */

        /* target for 10ms pause on 1-dd case */
        t = max(1, HZ / 100);

        /*
         * Scale up pause time for concurrent dirtiers in order to reduce CPU
         * overheads.
         *
         * (N * 10ms) on 2^N concurrent tasks.
         */
        if (hi > lo)
                t += (hi - lo) * (10 * HZ) / 1024;

        /*
         * This is a bit convoluted. We try to base the next nr_dirtied_pause
         * on the much more stable dirty_ratelimit. However the next pause time
         * will be computed based on task_ratelimit and the two rate limits may
         * depart considerably at some time. Especially if task_ratelimit goes
         * below dirty_ratelimit/2 and the target pause is max_pause, the next
         * pause time will be max_pause*2 _trimmed down_ to max_pause.  As a
         * result task_ratelimit won't be executed faithfully, which could
         * eventually bring down dirty_ratelimit.
         *
         * We apply two rules to fix it up:
         * 1) try to estimate the next pause time and if necessary, use a lower
         *    nr_dirtied_pause so as not to exceed max_pause. When this happens,
         *    nr_dirtied_pause will be "dancing" with task_ratelimit.
         * 2) limit the target pause time to max_pause/2, so that the normal
         *    small fluctuations of task_ratelimit won't trigger rule (1) and
         *    nr_dirtied_pause will remain as stable as dirty_ratelimit.
         */
        t = min(t, 1 + max_pause / 2);
        pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);

        /*
         * Tiny nr_dirtied_pause is found to hurt I/O performance in the test
         * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}.
         * When the 16 consecutive reads are often interrupted by some dirty
         * throttling pause during the async writes, cfq will go into idles
         * (deadline is fine). So push nr_dirtied_pause as high as possible
         * until reaches DIRTY_POLL_THRESH=32 pages.
         */
        if (pages < DIRTY_POLL_THRESH) {
                t = max_pause;
                pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
                if (pages > DIRTY_POLL_THRESH) {
                        pages = DIRTY_POLL_THRESH;
                        t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
                }
        }

        pause = HZ * pages / (task_ratelimit + 1);
        if (pause > max_pause) {
                t = max_pause;
                pages = task_ratelimit * t / roundup_pow_of_two(HZ);
        }

        *nr_dirtied_pause = pages;
        /*
         * The minimal pause time will normally be half the target pause time.
         */
        return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
}

static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
{
        struct bdi_writeback *wb = dtc->wb;
        unsigned long wb_reclaimable;

        /*
         * wb_thresh is not treated as some limiting factor as
         * dirty_thresh, due to reasons
         * - in JBOD setup, wb_thresh can fluctuate a lot
         * - in a system with HDD and USB key, the USB key may somehow
         *   go into state (wb_dirty >> wb_thresh) either because
         *   wb_dirty starts high, or because wb_thresh drops low.
         *   In this case we don't want to hard throttle the USB key
         *   dirtiers for 100 seconds until wb_dirty drops under
         *   wb_thresh. Instead the auxiliary wb control line in
         *   wb_position_ratio() will let the dirtier task progress
         *   at some rate <= (write_bw / 2) for bringing down wb_dirty.
         */
        dtc->wb_thresh = __wb_calc_thresh(dtc);
        dtc->wb_bg_thresh = dtc->thresh ?
                div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;

        /*
         * In order to avoid the stacked BDI deadlock we need
         * to ensure we accurately count the 'dirty' pages when
         * the threshold is low.
         *
         * Otherwise it would be possible to get thresh+n pages
         * reported dirty, even though there are thresh-m pages
         * actually dirty; with m+n sitting in the percpu
         * deltas.
         */
        if (dtc->wb_thresh < 2 * wb_stat_error()) {
                wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
                dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
        } else {
                wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
                dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
        }
}

/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2.
 * If we're over `background_thresh' then the writeback threads are woken to
 * perform some writeout.
 */
static int balance_dirty_pages(struct bdi_writeback *wb,
                               unsigned long pages_dirtied, unsigned int flags)
{
        struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
        struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
        struct dirty_throttle_control * const gdtc = &gdtc_stor;
        struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
                                                     &mdtc_stor : NULL;
        struct dirty_throttle_control *sdtc;
        unsigned long nr_reclaimable;        /* = file_dirty */
        long period;
        long pause;
        long max_pause;
        long min_pause;
        int nr_dirtied_pause;
        bool dirty_exceeded = false;
        unsigned long task_ratelimit;
        unsigned long dirty_ratelimit;
        struct backing_dev_info *bdi = wb->bdi;
        bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
        unsigned long start_time = jiffies;
        int ret = 0;

        for (;;) {
                unsigned long now = jiffies;
                unsigned long dirty, thresh, bg_thresh;
                unsigned long m_dirty = 0;        /* stop bogus uninit warnings */
                unsigned long m_thresh = 0;
                unsigned long m_bg_thresh = 0;

                nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
                gdtc->avail = global_dirtyable_memory();
                gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);

                domain_dirty_limits(gdtc);

                if (unlikely(strictlimit)) {
                        wb_dirty_limits(gdtc);

                        dirty = gdtc->wb_dirty;
                        thresh = gdtc->wb_thresh;
                        bg_thresh = gdtc->wb_bg_thresh;
                } else {
                        dirty = gdtc->dirty;
                        thresh = gdtc->thresh;
                        bg_thresh = gdtc->bg_thresh;
                }

                if (mdtc) {
                        unsigned long filepages, headroom, writeback;

                        /*
                         * If @wb belongs to !root memcg, repeat the same
                         * basic calculations for the memcg domain.
                         */
                        mem_cgroup_wb_stats(wb, &filepages, &headroom,
                                            &mdtc->dirty, &writeback);
                        mdtc->dirty += writeback;
                        mdtc_calc_avail(mdtc, filepages, headroom);

                        domain_dirty_limits(mdtc);

                        if (unlikely(strictlimit)) {
                                wb_dirty_limits(mdtc);
                                m_dirty = mdtc->wb_dirty;
                                m_thresh = mdtc->wb_thresh;
                                m_bg_thresh = mdtc->wb_bg_thresh;
                        } else {
                                m_dirty = mdtc->dirty;
                                m_thresh = mdtc->thresh;
                                m_bg_thresh = mdtc->bg_thresh;
                        }
                }

                /*
                 * In laptop mode, we wait until hitting the higher threshold
                 * before starting background writeout, and then write out all
                 * the way down to the lower threshold.  So slow writers cause
                 * minimal disk activity.
                 *
                 * In normal mode, we start background writeout at the lower
                 * background_thresh, to keep the amount of dirty memory low.
                 */
                if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh &&
                    !writeback_in_progress(wb))
                        wb_start_background_writeback(wb);

                /*
                 * Throttle it only when the background writeback cannot
                 * catch-up. This avoids (excessively) small writeouts
                 * when the wb limits are ramping up in case of !strictlimit.
                 *
                 * In strictlimit case make decision based on the wb counters
                 * and limits. Small writeouts when the wb limits are ramping
                 * up are the price we consciously pay for strictlimit-ing.
                 *
                 * If memcg domain is in effect, @dirty should be under
                 * both global and memcg freerun ceilings.
                 */
                if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
                    (!mdtc ||
                     m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
                        unsigned long intv;
                        unsigned long m_intv;

free_running:
                        intv = dirty_poll_interval(dirty, thresh);
                        m_intv = ULONG_MAX;

                        current->dirty_paused_when = now;
                        current->nr_dirtied = 0;
                        if (mdtc)
                                m_intv = dirty_poll_interval(m_dirty, m_thresh);
                        current->nr_dirtied_pause = min(intv, m_intv);
                        break;
                }

                /* Start writeback even when in laptop mode */
                if (unlikely(!writeback_in_progress(wb)))
                        wb_start_background_writeback(wb);

                mem_cgroup_flush_foreign(wb);

                /*
                 * Calculate global domain's pos_ratio and select the
                 * global dtc by default.
                 */
                if (!strictlimit) {
                        wb_dirty_limits(gdtc);

                        if ((current->flags & PF_LOCAL_THROTTLE) &&
                            gdtc->wb_dirty <
                            dirty_freerun_ceiling(gdtc->wb_thresh,
                                                  gdtc->wb_bg_thresh))
                                /*
                                 * LOCAL_THROTTLE tasks must not be throttled
                                 * when below the per-wb freerun ceiling.
                                 */
                                goto free_running;
                }

                dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
                        ((gdtc->dirty > gdtc->thresh) || strictlimit);

                wb_position_ratio(gdtc);
                sdtc = gdtc;

                if (mdtc) {
                        /*
                         * If memcg domain is in effect, calculate its
                         * pos_ratio.  @wb should satisfy constraints from
                         * both global and memcg domains.  Choose the one
                         * w/ lower pos_ratio.
                         */
                        if (!strictlimit) {
                                wb_dirty_limits(mdtc);

                                if ((current->flags & PF_LOCAL_THROTTLE) &&
                                    mdtc->wb_dirty <
                                    dirty_freerun_ceiling(mdtc->wb_thresh,
                                                          mdtc->wb_bg_thresh))
                                        /*
                                         * LOCAL_THROTTLE tasks must not be
                                         * throttled when below the per-wb
                                         * freerun ceiling.
                                         */
                                        goto free_running;
                        }
                        dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
                                ((mdtc->dirty > mdtc->thresh) || strictlimit);

                        wb_position_ratio(mdtc);
                        if (mdtc->pos_ratio < gdtc->pos_ratio)
                                sdtc = mdtc;
                }

                if (dirty_exceeded != wb->dirty_exceeded)
                        wb->dirty_exceeded = dirty_exceeded;

                if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
                                           BANDWIDTH_INTERVAL))
                        __wb_update_bandwidth(gdtc, mdtc, true);

                /* throttle according to the chosen dtc */
                dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
                task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
                                                        RATELIMIT_CALC_SHIFT;
                max_pause = wb_max_pause(wb, sdtc->wb_dirty);
                min_pause = wb_min_pause(wb, max_pause,
                                         task_ratelimit, dirty_ratelimit,
                                         &nr_dirtied_pause);

                if (unlikely(task_ratelimit == 0)) {
                        period = max_pause;
                        pause = max_pause;
                        goto pause;
                }
                period = HZ * pages_dirtied / task_ratelimit;
                pause = period;
                if (current->dirty_paused_when)
                        pause -= now - current->dirty_paused_when;
                /*
                 * For less than 1s think time (ext3/4 may block the dirtier
                 * for up to 800ms from time to time on 1-HDD; so does xfs,
                 * however at much less frequency), try to compensate it in
                 * future periods by updating the virtual time; otherwise just
                 * do a reset, as it may be a light dirtier.
                 */
                if (pause < min_pause) {
                        trace_balance_dirty_pages(wb,
                                                  sdtc->thresh,
                                                  sdtc->bg_thresh,
                                                  sdtc->dirty,
                                                  sdtc->wb_thresh,
                                                  sdtc->wb_dirty,
                                                  dirty_ratelimit,
                                                  task_ratelimit,
                                                  pages_dirtied,
                                                  period,
                                                  min(pause, 0L),
                                                  start_time);
                        if (pause < -HZ) {
                                current->dirty_paused_when = now;
                                current->nr_dirtied = 0;
                        } else if (period) {
                                current->dirty_paused_when += period;
                                current->nr_dirtied = 0;
                        } else if (current->nr_dirtied_pause <= pages_dirtied)
                                current->nr_dirtied_pause += pages_dirtied;
                        break;
                }
                if (unlikely(pause > max_pause)) {
                        /* for occasional dropped task_ratelimit */
                        now += min(pause - max_pause, max_pause);
                        pause = max_pause;
                }

pause:
                trace_balance_dirty_pages(wb,
                                          sdtc->thresh,
                                          sdtc->bg_thresh,
                                          sdtc->dirty,
                                          sdtc->wb_thresh,
                                          sdtc->wb_dirty,
                                          dirty_ratelimit,
                                          task_ratelimit,
                                          pages_dirtied,
                                          period,
                                          pause,
                                          start_time);
                if (flags & BDP_ASYNC) {
                        ret = -EAGAIN;
                        break;
                }
                __set_current_state(TASK_KILLABLE);
                bdi->last_bdp_sleep = jiffies;
                io_schedule_timeout(pause);

                current->dirty_paused_when = now + pause;
                current->nr_dirtied = 0;
                current->nr_dirtied_pause = nr_dirtied_pause;

                /*
                 * This is typically equal to (dirty < thresh) and can also
                 * keep "1000+ dd on a slow USB stick" under control.
                 */
                if (task_ratelimit)
                        break;

                /*
                 * In the case of an unresponsive NFS server and the NFS dirty
                 * pages exceeds dirty_thresh, give the other good wb's a pipe
                 * to go through, so that tasks on them still remain responsive.
                 *
                 * In theory 1 page is enough to keep the consumer-producer
                 * pipe going: the flusher cleans 1 page => the task dirties 1
                 * more page. However wb_dirty has accounting errors.  So use
                 * the larger and more IO friendly wb_stat_error.
                 */
                if (sdtc->wb_dirty <= wb_stat_error())
                        break;

                if (fatal_signal_pending(current))
                        break;
        }
        return ret;
}

static DEFINE_PER_CPU(int, bdp_ratelimits);

/*
 * Normal tasks are throttled by
 *        loop {
 *                dirty tsk->nr_dirtied_pause pages;
 *                take a snap in balance_dirty_pages();
 *        }
 * However there is a worst case. If every task exit immediately when dirtied
 * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
 * called to throttle the page dirties. The solution is to save the not yet
 * throttled page dirties in dirty_throttle_leaks on task exit and charge them
 * randomly into the running tasks. This works well for the above worst case,
 * as the new task will pick up and accumulate the old task's leaked dirty
 * count and eventually get throttled.
 */
DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;

/**
 * balance_dirty_pages_ratelimited_flags - Balance dirty memory state.
 * @mapping: address_space which was dirtied.
 * @flags: BDP flags.
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * See balance_dirty_pages_ratelimited() for details.
 *
 * Return: If @flags contains BDP_ASYNC, it may return -EAGAIN to
 * indicate that memory is out of balance and the caller must wait
 * for I/O to complete.  Otherwise, it will return 0 to indicate
 * that either memory was already in balance, or it was able to sleep
 * until the amount of dirty memory returned to balance.
 */
int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
                                        unsigned int flags)
{
        struct inode *inode = mapping->host;
        struct backing_dev_info *bdi = inode_to_bdi(inode);
        struct bdi_writeback *wb = NULL;
        int ratelimit;
        int ret = 0;
        int *p;

        if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
                return ret;

        if (inode_cgwb_enabled(inode))
                wb = wb_get_create_current(bdi, GFP_KERNEL);
        if (!wb)
                wb = &bdi->wb;

        ratelimit = current->nr_dirtied_pause;
        if (wb->dirty_exceeded)
                ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));

        preempt_disable();
        /*
         * This prevents one CPU to accumulate too many dirtied pages without
         * calling into balance_dirty_pages(), which can happen when there are
         * 1000+ tasks, all of them start dirtying pages at exactly the same
         * time, hence all honoured too large initial task->nr_dirtied_pause.
         */
        p =  this_cpu_ptr(&bdp_ratelimits);
        if (unlikely(current->nr_dirtied >= ratelimit))
                *p = 0;
        else if (unlikely(*p >= ratelimit_pages)) {
                *p = 0;
                ratelimit = 0;
        }
        /*
         * Pick up the dirtied pages by the exited tasks. This avoids lots of
         * short-lived tasks (eg. gcc invocations in a kernel build) escaping
         * the dirty throttling and livelock other long-run dirtiers.
         */
        p = this_cpu_ptr(&dirty_throttle_leaks);
        if (*p > 0 && current->nr_dirtied < ratelimit) {
                unsigned long nr_pages_dirtied;
                nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
                *p -= nr_pages_dirtied;
                current->nr_dirtied += nr_pages_dirtied;
        }
        preempt_enable();

        if (unlikely(current->nr_dirtied >= ratelimit))
                ret = balance_dirty_pages(wb, current->nr_dirtied, flags);

        wb_put(wb);
        return ret;
}
EXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited_flags);

/**
 * balance_dirty_pages_ratelimited - balance dirty memory state.
 * @mapping: address_space which was dirtied.
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * Once we're over the dirty memory limit we decrease the ratelimiting
 * by a lot, to prevent individual processes from overshooting the limit
 * by (ratelimit_pages) each.
 */
void balance_dirty_pages_ratelimited(struct address_space *mapping)
{
        balance_dirty_pages_ratelimited_flags(mapping, 0);
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited);

/**
 * wb_over_bg_thresh - does @wb need to be written back?
 * @wb: bdi_writeback of interest
 *
 * Determines whether background writeback should keep writing @wb or it's
 * clean enough.
 *
 * Return: %true if writeback should continue.
 */
bool wb_over_bg_thresh(struct bdi_writeback *wb)
{
        struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
        struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
        struct dirty_throttle_control * const gdtc = &gdtc_stor;
        struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
                                                     &mdtc_stor : NULL;
        unsigned long reclaimable;
        unsigned long thresh;

        /*
         * Similar to balance_dirty_pages() but ignores pages being written
         * as we're trying to decide whether to put more under writeback.
         */
        gdtc->avail = global_dirtyable_memory();
        gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
        domain_dirty_limits(gdtc);

        if (gdtc->dirty > gdtc->bg_thresh)
                return true;

        thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
        if (thresh < 2 * wb_stat_error())
                reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
        else
                reclaimable = wb_stat(wb, WB_RECLAIMABLE);

        if (reclaimable > thresh)
                return true;

        if (mdtc) {
                unsigned long filepages, headroom, writeback;

                mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
                                    &writeback);
                mdtc_calc_avail(mdtc, filepages, headroom);
                domain_dirty_limits(mdtc);        /* ditto, ignore writeback */

                if (mdtc->dirty > mdtc->bg_thresh)
                        return true;

                thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
                if (thresh < 2 * wb_stat_error())
                        reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
                else
                        reclaimable = wb_stat(wb, WB_RECLAIMABLE);

                if (reclaimable > thresh)
                        return true;
        }

        return false;
}

#ifdef CONFIG_SYSCTL
/*
 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
 */
static int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
                void *buffer, size_t *length, loff_t *ppos)
{
        unsigned int old_interval = dirty_writeback_interval;
        int ret;

        ret = proc_dointvec(table, write, buffer, length, ppos);

        /*
         * Writing 0 to dirty_writeback_interval will disable periodic writeback
         * and a different non-zero value will wakeup the writeback threads.
         * wb_wakeup_delayed() would be more appropriate, but it's a pain to
         * iterate over all bdis and wbs.
         * The reason we do this is to make the change take effect immediately.
         */
        if (!ret && write && dirty_writeback_interval &&
                dirty_writeback_interval != old_interval)
                wakeup_flusher_threads(WB_REASON_PERIODIC);

        return ret;
}
#endif

void laptop_mode_timer_fn(struct timer_list *t)
{
        struct backing_dev_info *backing_dev_info =
                from_timer(backing_dev_info, t, laptop_mode_wb_timer);

        wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
}

/*
 * We've spun up the disk and we're in laptop mode: schedule writeback
 * of all dirty data a few seconds from now.  If the flush is already scheduled
 * then push it back - the user is still using the disk.
 */
void laptop_io_completion(struct backing_dev_info *info)
{
        mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
}

/*
 * We're in laptop mode and we've just synced. The sync's writes will have
 * caused another writeback to be scheduled by laptop_io_completion.
 * Nothing needs to be written back anymore, so we unschedule the writeback.
 */
void laptop_sync_completion(void)
{
        struct backing_dev_info *bdi;

        rcu_read_lock();

        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
                del_timer(&bdi->laptop_mode_wb_timer);

        rcu_read_unlock();
}

/*
 * If ratelimit_pages is too high then we can get into dirty-data overload
 * if a large number of processes all perform writes at the same time.
 *
 * Here we set ratelimit_pages to a level which ensures that when all CPUs are
 * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
 * thresholds.
 */

void writeback_set_ratelimit(void)
{
        struct wb_domain *dom = &global_wb_domain;
        unsigned long background_thresh;
        unsigned long dirty_thresh;

        global_dirty_limits(&background_thresh, &dirty_thresh);
        dom->dirty_limit = dirty_thresh;
        ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
        if (ratelimit_pages < 16)
                ratelimit_pages = 16;
}

static int page_writeback_cpu_online(unsigned int cpu)
{
        writeback_set_ratelimit();
        return 0;
}

#ifdef CONFIG_SYSCTL

/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;

static struct ctl_table vm_page_writeback_sysctls[] = {
        {
                .procname   = "dirty_background_ratio",
                .data       = &dirty_background_ratio,
                .maxlen     = sizeof(dirty_background_ratio),
                .mode       = 0644,
                .proc_handler   = dirty_background_ratio_handler,
                .extra1     = SYSCTL_ZERO,
                .extra2     = SYSCTL_ONE_HUNDRED,
        },
        {
                .procname   = "dirty_background_bytes",
                .data       = &dirty_background_bytes,
                .maxlen     = sizeof(dirty_background_bytes),
                .mode       = 0644,
                .proc_handler   = dirty_background_bytes_handler,
                .extra1     = SYSCTL_LONG_ONE,
        },
        {
                .procname   = "dirty_ratio",
                .data       = &vm_dirty_ratio,
                .maxlen     = sizeof(vm_dirty_ratio),
                .mode       = 0644,
                .proc_handler   = dirty_ratio_handler,
                .extra1     = SYSCTL_ZERO,
                .extra2     = SYSCTL_ONE_HUNDRED,
        },
        {
                .procname   = "dirty_bytes",
                .data       = &vm_dirty_bytes,
                .maxlen     = sizeof(vm_dirty_bytes),
                .mode       = 0644,
                .proc_handler   = dirty_bytes_handler,
                .extra1     = (void *)&dirty_bytes_min,
        },
        {
                .procname   = "dirty_writeback_centisecs",
                .data       = &dirty_writeback_interval,
                .maxlen     = sizeof(dirty_writeback_interval),
                .mode       = 0644,
                .proc_handler   = dirty_writeback_centisecs_handler,
        },
        {
                .procname   = "dirty_expire_centisecs",
                .data       = &dirty_expire_interval,
                .maxlen     = sizeof(dirty_expire_interval),
                .mode       = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1     = SYSCTL_ZERO,
        },
#ifdef CONFIG_HIGHMEM
        {
                .procname        = "highmem_is_dirtyable",
                .data                = &vm_highmem_is_dirtyable,
                .maxlen                = sizeof(vm_highmem_is_dirtyable),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_minmax,
                .extra1                = SYSCTL_ZERO,
                .extra2                = SYSCTL_ONE,
        },
#endif
        {
                .procname        = "laptop_mode",
                .data                = &laptop_mode,
                .maxlen                = sizeof(laptop_mode),
                .mode                = 0644,
                .proc_handler        = proc_dointvec_jiffies,
        },
        {}
};
#endif

/*
 * Called early on to tune the page writeback dirty limits.
 *
 * We used to scale dirty pages according to how total memory
 * related to pages that could be allocated for buffers.
 *
 * However, that was when we used "dirty_ratio" to scale with
 * all memory, and we don't do that any more. "dirty_ratio"
 * is now applied to total non-HIGHPAGE memory, and as such we can't
 * get into the old insane situation any more where we had
 * large amounts of dirty pages compared to a small amount of
 * non-HIGHMEM memory.
 *
 * But we might still want to scale the dirty_ratio by how
 * much memory the box has..
 */
void __init page_writeback_init(void)
{
        BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));

        cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
                          page_writeback_cpu_online, NULL);
        cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
                          page_writeback_cpu_online);
#ifdef CONFIG_SYSCTL
        register_sysctl_init("vm", vm_page_writeback_sysctls);
#endif
}

/**
 * tag_pages_for_writeback - tag pages to be written by writeback
 * @mapping: address space structure to write
 * @start: starting page index
 * @end: ending page index (inclusive)
 *
 * This function scans the page range from @start to @end (inclusive) and tags
 * all pages that have DIRTY tag set with a special TOWRITE tag.  The caller
 * can then use the TOWRITE tag to identify pages eligible for writeback.
 * This mechanism is used to avoid livelocking of writeback by a process
 * steadily creating new dirty pages in the file (thus it is important for this
 * function to be quick so that it can tag pages faster than a dirtying process
 * can create them).
 */
void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end)
{
        XA_STATE(xas, &mapping->i_pages, start);
        unsigned int tagged = 0;
        void *page;

        xas_lock_irq(&xas);
        xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
                xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
                if (++tagged % XA_CHECK_SCHED)
                        continue;

                xas_pause(&xas);
                xas_unlock_irq(&xas);
                cond_resched();
                xas_lock_irq(&xas);
        }
        xas_unlock_irq(&xas);
}
EXPORT_SYMBOL(tag_pages_for_writeback);

static bool folio_prepare_writeback(struct address_space *mapping,
                struct writeback_control *wbc, struct folio *folio)
{
        /*
         * Folio truncated or invalidated. We can freely skip it then,
         * even for data integrity operations: the folio has disappeared
         * concurrently, so there could be no real expectation of this
         * data integrity operation even if there is now a new, dirty
         * folio at the same pagecache index.
         */
        if (unlikely(folio->mapping != mapping))
                return false;

        /*
         * Did somebody else write it for us?
         */
        if (!folio_test_dirty(folio))
                return false;

        if (folio_test_writeback(folio)) {
                if (wbc->sync_mode == WB_SYNC_NONE)
                        return false;
                folio_wait_writeback(folio);
        }
        BUG_ON(folio_test_writeback(folio));

        if (!folio_clear_dirty_for_io(folio))
                return false;

        return true;
}

static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                return PAGECACHE_TAG_TOWRITE;
        return PAGECACHE_TAG_DIRTY;
}

static pgoff_t wbc_end(struct writeback_control *wbc)
{
        if (wbc->range_cyclic)
                return -1;
        return wbc->range_end >> PAGE_SHIFT;
}

static struct folio *writeback_get_folio(struct address_space *mapping,
                struct writeback_control *wbc)
{
        struct folio *folio;

retry:
        folio = folio_batch_next(&wbc->fbatch);
        if (!folio) {
                folio_batch_release(&wbc->fbatch);
                cond_resched();
                filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
                                wbc_to_tag(wbc), &wbc->fbatch);
                folio = folio_batch_next(&wbc->fbatch);
                if (!folio)
                        return NULL;
        }

        folio_lock(folio);
        if (unlikely(!folio_prepare_writeback(mapping, wbc, folio))) {
                folio_unlock(folio);
                goto retry;
        }

        trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
        return folio;
}

/**
 * writeback_iter - iterate folio of a mapping for writeback
 * @mapping: address space structure to write
 * @wbc: writeback context
 * @folio: previously iterated folio (%NULL to start)
 * @error: in-out pointer for writeback errors (see below)
 *
 * This function returns the next folio for the writeback operation described by
 * @wbc on @mapping and  should be called in a while loop in the ->writepages
 * implementation.
 *
 * To start the writeback operation, %NULL is passed in the @folio argument, and
 * for every subsequent iteration the folio returned previously should be passed
 * back in.
 *
 * If there was an error in the per-folio writeback inside the writeback_iter()
 * loop, @error should be set to the error value.
 *
 * Once the writeback described in @wbc has finished, this function will return
 * %NULL and if there was an error in any iteration restore it to @error.
 *
 * Note: callers should not manually break out of the loop using break or goto
 * but must keep calling writeback_iter() until it returns %NULL.
 *
 * Return: the folio to write or %NULL if the loop is done.
 */
struct folio *writeback_iter(struct address_space *mapping,
                struct writeback_control *wbc, struct folio *folio, int *error)
{
        if (!folio) {
                folio_batch_init(&wbc->fbatch);
                wbc->saved_err = *error = 0;

                /*
                 * For range cyclic writeback we remember where we stopped so
                 * that we can continue where we stopped.
                 *
                 * For non-cyclic writeback we always start at the beginning of
                 * the passed in range.
                 */
                if (wbc->range_cyclic)
                        wbc->index = mapping->writeback_index;
                else
                        wbc->index = wbc->range_start >> PAGE_SHIFT;

                /*
                 * To avoid livelocks when other processes dirty new pages, we
                 * first tag pages which should be written back and only then
                 * start writing them.
                 *
                 * For data-integrity writeback we have to be careful so that we
                 * do not miss some pages (e.g., because some other process has
                 * cleared the TOWRITE tag we set).  The rule we follow is that
                 * TOWRITE tag can be cleared only by the process clearing the
                 * DIRTY tag (and submitting the page for I/O).
                 */
                if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                        tag_pages_for_writeback(mapping, wbc->index,
                                        wbc_end(wbc));
        } else {
                wbc->nr_to_write -= folio_nr_pages(folio);

                WARN_ON_ONCE(*error > 0);

                /*
                 * For integrity writeback we have to keep going until we have
                 * written all the folios we tagged for writeback above, even if
                 * we run past wbc->nr_to_write or encounter errors.
                 * We stash away the first error we encounter in wbc->saved_err
                 * so that it can be retrieved when we're done.  This is because
                 * the file system may still have state to clear for each folio.
                 *
                 * For background writeback we exit as soon as we run past
                 * wbc->nr_to_write or encounter the first error.
                 */
                if (wbc->sync_mode == WB_SYNC_ALL) {
                        if (*error && !wbc->saved_err)
                                wbc->saved_err = *error;
                } else {
                        if (*error || wbc->nr_to_write <= 0)
                                goto done;
                }
        }

        folio = writeback_get_folio(mapping, wbc);
        if (!folio) {
                /*
                 * To avoid deadlocks between range_cyclic writeback and callers
                 * that hold pages in PageWriteback to aggregate I/O until
                 * the writeback iteration finishes, we do not loop back to the
                 * start of the file.  Doing so causes a page lock/page
                 * writeback access order inversion - we should only ever lock
                 * multiple pages in ascending page->index order, and looping
                 * back to the start of the file violates that rule and causes
                 * deadlocks.
                 */
                if (wbc->range_cyclic)
                        mapping->writeback_index = 0;

                /*
                 * Return the first error we encountered (if there was any) to
                 * the caller.
                 */
                *error = wbc->saved_err;
        }
        return folio;

done:
        if (wbc->range_cyclic)
                mapping->writeback_index = folio->index + folio_nr_pages(folio);
        folio_batch_release(&wbc->fbatch);
        return NULL;
}

/**
 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
 * @mapping: address space structure to write
 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
 * @writepage: function called for each page
 * @data: data passed to writepage function
 *
 * Return: %0 on success, negative error code otherwise
 *
 * Note: please use writeback_iter() instead.
 */
int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
                      void *data)
{
        struct folio *folio = NULL;
        int error;

        while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
                error = writepage(folio, wbc, data);
                if (error == AOP_WRITEPAGE_ACTIVATE) {
                        folio_unlock(folio);
                        error = 0;
                }
        }

        return error;
}
EXPORT_SYMBOL(write_cache_pages);

static int writeback_use_writepage(struct address_space *mapping,
                struct writeback_control *wbc)
{
        struct folio *folio = NULL;
        struct blk_plug plug;
        int err;

        blk_start_plug(&plug);
        while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
                err = mapping->a_ops->writepage(&folio->page, wbc);
                if (err == AOP_WRITEPAGE_ACTIVATE) {
                        folio_unlock(folio);
                        err = 0;
                }
                mapping_set_error(mapping, err);
        }
        blk_finish_plug(&plug);

        return err;
}

int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
        int ret;
        struct bdi_writeback *wb;

        if (wbc->nr_to_write <= 0)
                return 0;
        wb = inode_to_wb_wbc(mapping->host, wbc);
        wb_bandwidth_estimate_start(wb);
        while (1) {
                if (mapping->a_ops->writepages) {
                        ret = mapping->a_ops->writepages(mapping, wbc);
                } else if (mapping->a_ops->writepage) {
                        ret = writeback_use_writepage(mapping, wbc);
                } else {
                        /* deal with chardevs and other special files */
                        ret = 0;
                }
                if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
                        break;

                /*
                 * Lacking an allocation context or the locality or writeback
                 * state of any of the inode's pages, throttle based on
                 * writeback activity on the local node. It's as good a
                 * guess as any.
                 */
                reclaim_throttle(NODE_DATA(numa_node_id()),
                        VMSCAN_THROTTLE_WRITEBACK);
        }
        /*
         * Usually few pages are written by now from those we've just submitted
         * but if there's constant writeback being submitted, this makes sure
         * writeback bandwidth is updated once in a while.
         */
        if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
                                   BANDWIDTH_INTERVAL))
                wb_update_bandwidth(wb);
        return ret;
}

/*
 * For address_spaces which do not use buffers nor write back.
 */
bool noop_dirty_folio(struct address_space *mapping, struct folio *folio)
{
        if (!folio_test_dirty(folio))
                return !folio_test_set_dirty(folio);
        return false;
}
EXPORT_SYMBOL(noop_dirty_folio);

/*
 * Helper function for set_page_dirty family.
 *
 * Caller must hold folio_memcg_lock().
 *
 * NOTE: This relies on being atomic wrt interrupts.
 */
static void folio_account_dirtied(struct folio *folio,
                struct address_space *mapping)
{
        struct inode *inode = mapping->host;

        trace_writeback_dirty_folio(folio, mapping);

        if (mapping_can_writeback(mapping)) {
                struct bdi_writeback *wb;
                long nr = folio_nr_pages(folio);

                inode_attach_wb(inode, folio);
                wb = inode_to_wb(inode);

                __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr);
                __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
                __node_stat_mod_folio(folio, NR_DIRTIED, nr);
                wb_stat_mod(wb, WB_RECLAIMABLE, nr);
                wb_stat_mod(wb, WB_DIRTIED, nr);
                task_io_account_write(nr * PAGE_SIZE);
                current->nr_dirtied += nr;
                __this_cpu_add(bdp_ratelimits, nr);

                mem_cgroup_track_foreign_dirty(folio, wb);
        }
}

/*
 * Helper function for deaccounting dirty page without writeback.
 *
 * Caller must hold folio_memcg_lock().
 */
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
{
        long nr = folio_nr_pages(folio);

        lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
        zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
        wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
        task_io_account_cancelled_write(nr * PAGE_SIZE);
}

/*
 * Mark the folio dirty, and set it dirty in the page cache, and mark
 * the inode dirty.
 *
 * If warn is true, then emit a warning if the folio is not uptodate and has
 * not been truncated.
 *
 * The caller must hold folio_memcg_lock().  Most callers have the folio
 * locked.  A few have the folio blocked from truncation through other
 * means (eg zap_vma_pages() has it mapped and is holding the page table
 * lock).  This can also be called from mark_buffer_dirty(), which I
 * cannot prove is always protected against truncate.
 */
void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
                             int warn)
{
        unsigned long flags;

        xa_lock_irqsave(&mapping->i_pages, flags);
        if (folio->mapping) {        /* Race with truncate? */
                WARN_ON_ONCE(warn && !folio_test_uptodate(folio));
                folio_account_dirtied(folio, mapping);
                __xa_set_mark(&mapping->i_pages, folio_index(folio),
                                PAGECACHE_TAG_DIRTY);
        }
        xa_unlock_irqrestore(&mapping->i_pages, flags);
}

/**
 * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
 * @mapping: Address space this folio belongs to.
 * @folio: Folio to be marked as dirty.
 *
 * Filesystems which do not use buffer heads should call this function
 * from their dirty_folio address space operation.  It ignores the
 * contents of folio_get_private(), so if the filesystem marks individual
 * blocks as dirty, the filesystem should handle that itself.
 *
 * This is also sometimes used by filesystems which use buffer_heads when
 * a single buffer is being dirtied: we want to set the folio dirty in
 * that case, but not all the buffers.  This is a "bottom-up" dirtying,
 * whereas block_dirty_folio() is a "top-down" dirtying.
 *
 * The caller must ensure this doesn't race with truncation.  Most will
 * simply hold the folio lock, but e.g. zap_pte_range() calls with the
 * folio mapped and the pte lock held, which also locks out truncation.
 */
bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
{
        folio_memcg_lock(folio);
        if (folio_test_set_dirty(folio)) {
                folio_memcg_unlock(folio);
                return false;
        }

        __folio_mark_dirty(folio, mapping, !folio_test_private(folio));
        folio_memcg_unlock(folio);

        if (mapping->host) {
                /* !PageAnon && !swapper_space */
                __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
        }
        return true;
}
EXPORT_SYMBOL(filemap_dirty_folio);

/**
 * folio_redirty_for_writepage - Decline to write a dirty folio.
 * @wbc: The writeback control.
 * @folio: The folio.
 *
 * When a writepage implementation decides that it doesn't want to write
 * @folio for some reason, it should call this function, unlock @folio and
 * return 0.
 *
 * Return: True if we redirtied the folio.  False if someone else dirtied
 * it first.
 */
bool folio_redirty_for_writepage(struct writeback_control *wbc,
                struct folio *folio)
{
        struct address_space *mapping = folio->mapping;
        long nr = folio_nr_pages(folio);
        bool ret;

        wbc->pages_skipped += nr;
        ret = filemap_dirty_folio(mapping, folio);
        if (mapping && mapping_can_writeback(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
                struct wb_lock_cookie cookie = {};

                wb = unlocked_inode_to_wb_begin(inode, &cookie);
                current->nr_dirtied -= nr;
                node_stat_mod_folio(folio, NR_DIRTIED, -nr);
                wb_stat_mod(wb, WB_DIRTIED, -nr);
                unlocked_inode_to_wb_end(inode, &cookie);
        }
        return ret;
}
EXPORT_SYMBOL(folio_redirty_for_writepage);

/**
 * folio_mark_dirty - Mark a folio as being modified.
 * @folio: The folio.
 *
 * The folio may not be truncated while this function is running.
 * Holding the folio lock is sufficient to prevent truncation, but some
 * callers cannot acquire a sleeping lock.  These callers instead hold
 * the page table lock for a page table which contains at least one page
 * in this folio.  Truncation will block on the page table lock as it
 * unmaps pages before removing the folio from its mapping.
 *
 * Return: True if the folio was newly dirtied, false if it was already dirty.
 */
bool folio_mark_dirty(struct folio *folio)
{
        struct address_space *mapping = folio_mapping(folio);

        if (likely(mapping)) {
                /*
                 * readahead/folio_deactivate could remain
                 * PG_readahead/PG_reclaim due to race with folio_end_writeback
                 * About readahead, if the folio is written, the flags would be
                 * reset. So no problem.
                 * About folio_deactivate, if the folio is redirtied,
                 * the flag will be reset. So no problem. but if the
                 * folio is used by readahead it will confuse readahead
                 * and make it restart the size rampup process. But it's
                 * a trivial problem.
                 */
                if (folio_test_reclaim(folio))
                        folio_clear_reclaim(folio);
                return mapping->a_ops->dirty_folio(mapping, folio);
        }

        return noop_dirty_folio(mapping, folio);
}
EXPORT_SYMBOL(folio_mark_dirty);

/*
 * set_page_dirty() is racy if the caller has no reference against
 * page->mapping->host, and if the page is unlocked.  This is because another
 * CPU could truncate the page off the mapping and then free the mapping.
 *
 * Usually, the page _is_ locked, or the caller is a user-space process which
 * holds a reference on the inode by having an open file.
 *
 * In other cases, the page should be locked before running set_page_dirty().
 */
int set_page_dirty_lock(struct page *page)
{
        int ret;

        lock_page(page);
        ret = set_page_dirty(page);
        unlock_page(page);
        return ret;
}
EXPORT_SYMBOL(set_page_dirty_lock);

/*
 * This cancels just the dirty bit on the kernel page itself, it does NOT
 * actually remove dirty bits on any mmap's that may be around. It also
 * leaves the page tagged dirty, so any sync activity will still find it on
 * the dirty lists, and in particular, clear_page_dirty_for_io() will still
 * look at the dirty bits in the VM.
 *
 * Doing this should *normally* only ever be done when a page is truncated,
 * and is not actually mapped anywhere at all. However, fs/buffer.c does
 * this when it notices that somebody has cleaned out all the buffers on a
 * page without actually doing it through the VM. Can you say "ext3 is
 * horribly ugly"? Thought you could.
 */
void __folio_cancel_dirty(struct folio *folio)
{
        struct address_space *mapping = folio_mapping(folio);

        if (mapping_can_writeback(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
                struct wb_lock_cookie cookie = {};

                folio_memcg_lock(folio);
                wb = unlocked_inode_to_wb_begin(inode, &cookie);

                if (folio_test_clear_dirty(folio))
                        folio_account_cleaned(folio, wb);

                unlocked_inode_to_wb_end(inode, &cookie);
                folio_memcg_unlock(folio);
        } else {
                folio_clear_dirty(folio);
        }
}
EXPORT_SYMBOL(__folio_cancel_dirty);

/*
 * Clear a folio's dirty flag, while caring for dirty memory accounting.
 * Returns true if the folio was previously dirty.
 *
 * This is for preparing to put the folio under writeout.  We leave
 * the folio tagged as dirty in the xarray so that a concurrent
 * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
 * The ->writepage implementation will run either folio_start_writeback()
 * or folio_mark_dirty(), at which stage we bring the folio's dirty flag
 * and xarray dirty tag back into sync.
 *
 * This incoherency between the folio's dirty flag and xarray tag is
 * unfortunate, but it only exists while the folio is locked.
 */
bool folio_clear_dirty_for_io(struct folio *folio)
{
        struct address_space *mapping = folio_mapping(folio);
        bool ret = false;

        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);

        if (mapping && mapping_can_writeback(mapping)) {
                struct inode *inode = mapping->host;
                struct bdi_writeback *wb;
                struct wb_lock_cookie cookie = {};

                /*
                 * Yes, Virginia, this is indeed insane.
                 *
                 * We use this sequence to make sure that
                 *  (a) we account for dirty stats properly
                 *  (b) we tell the low-level filesystem to
                 *      mark the whole folio dirty if it was
                 *      dirty in a pagetable. Only to then
                 *  (c) clean the folio again and return 1 to
                 *      cause the writeback.
                 *
                 * This way we avoid all nasty races with the
                 * dirty bit in multiple places and clearing
                 * them concurrently from different threads.
                 *
                 * Note! Normally the "folio_mark_dirty(folio)"
                 * has no effect on the actual dirty bit - since
                 * that will already usually be set. But we
                 * need the side effects, and it can help us
                 * avoid races.
                 *
                 * We basically use the folio "master dirty bit"
                 * as a serialization point for all the different
                 * threads doing their things.
                 */
                if (folio_mkclean(folio))
                        folio_mark_dirty(folio);
                /*
                 * We carefully synchronise fault handlers against
                 * installing a dirty pte and marking the folio dirty
                 * at this point.  We do this by having them hold the
                 * page lock while dirtying the folio, and folios are
                 * always locked coming in here, so we get the desired
                 * exclusion.
                 */
                wb = unlocked_inode_to_wb_begin(inode, &cookie);
                if (folio_test_clear_dirty(folio)) {
                        long nr = folio_nr_pages(folio);
                        lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
                        zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
                        wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
                        ret = true;
                }
                unlocked_inode_to_wb_end(inode, &cookie);
                return ret;
        }
        return folio_test_clear_dirty(folio);
}
EXPORT_SYMBOL(folio_clear_dirty_for_io);

static void wb_inode_writeback_start(struct bdi_writeback *wb)
{
        atomic_inc(&wb->writeback_inodes);
}

static void wb_inode_writeback_end(struct bdi_writeback *wb)
{
        unsigned long flags;
        atomic_dec(&wb->writeback_inodes);
        /*
         * Make sure estimate of writeback throughput gets updated after
         * writeback completed. We delay the update by BANDWIDTH_INTERVAL
         * (which is the interval other bandwidth updates use for batching) so
         * that if multiple inodes end writeback at a similar time, they get
         * batched into one bandwidth update.
         */
        spin_lock_irqsave(&wb->work_lock, flags);
        if (test_bit(WB_registered, &wb->state))
                queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
        spin_unlock_irqrestore(&wb->work_lock, flags);
}

bool __folio_end_writeback(struct folio *folio)
{
        long nr = folio_nr_pages(folio);
        struct address_space *mapping = folio_mapping(folio);
        bool ret;

        folio_memcg_lock(folio);
        if (mapping && mapping_use_writeback_tags(mapping)) {
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;

                xa_lock_irqsave(&mapping->i_pages, flags);
                ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
                __xa_clear_mark(&mapping->i_pages, folio_index(folio),
                                        PAGECACHE_TAG_WRITEBACK);
                if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
                        struct bdi_writeback *wb = inode_to_wb(inode);

                        wb_stat_mod(wb, WB_WRITEBACK, -nr);
                        __wb_writeout_add(wb, nr);
                        if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
                                wb_inode_writeback_end(wb);
                }

                if (mapping->host && !mapping_tagged(mapping,
                                                     PAGECACHE_TAG_WRITEBACK))
                        sb_clear_inode_writeback(mapping->host);

                xa_unlock_irqrestore(&mapping->i_pages, flags);
        } else {
                ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
        }

        lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
        zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
        node_stat_mod_folio(folio, NR_WRITTEN, nr);
        folio_memcg_unlock(folio);

        return ret;
}

void __folio_start_writeback(struct folio *folio, bool keep_write)
{
        long nr = folio_nr_pages(folio);
        struct address_space *mapping = folio_mapping(folio);
        int access_ret;

        VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio);

        folio_memcg_lock(folio);
        if (mapping && mapping_use_writeback_tags(mapping)) {
                XA_STATE(xas, &mapping->i_pages, folio_index(folio));
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
                bool on_wblist;

                xas_lock_irqsave(&xas, flags);
                xas_load(&xas);
                folio_test_set_writeback(folio);

                on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);

                xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
                if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
                        struct bdi_writeback *wb = inode_to_wb(inode);

                        wb_stat_mod(wb, WB_WRITEBACK, nr);
                        if (!on_wblist)
                                wb_inode_writeback_start(wb);
                }

                /*
                 * We can come through here when swapping anonymous
                 * folios, so we don't necessarily have an inode to
                 * track for sync.
                 */
                if (mapping->host && !on_wblist)
                        sb_mark_inode_writeback(mapping->host);
                if (!folio_test_dirty(folio))
                        xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
                if (!keep_write)
                        xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
                xas_unlock_irqrestore(&xas, flags);
        } else {
                folio_test_set_writeback(folio);
        }

        lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
        zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
        folio_memcg_unlock(folio);

        access_ret = arch_make_folio_accessible(folio);
        /*
         * If writeback has been triggered on a page that cannot be made
         * accessible, it is too late to recover here.
         */
        VM_BUG_ON_FOLIO(access_ret != 0, folio);
}
EXPORT_SYMBOL(__folio_start_writeback);

/**
 * folio_wait_writeback - Wait for a folio to finish writeback.
 * @folio: The folio to wait for.
 *
 * If the folio is currently being written back to storage, wait for the
 * I/O to complete.
 *
 * Context: Sleeps.  Must be called in process context and with
 * no spinlocks held.  Caller should hold a reference on the folio.
 * If the folio is not locked, writeback may start again after writeback
 * has finished.
 */
void folio_wait_writeback(struct folio *folio)
{
        while (folio_test_writeback(folio)) {
                trace_folio_wait_writeback(folio, folio_mapping(folio));
                folio_wait_bit(folio, PG_writeback);
        }
}
EXPORT_SYMBOL_GPL(folio_wait_writeback);

/**
 * folio_wait_writeback_killable - Wait for a folio to finish writeback.
 * @folio: The folio to wait for.
 *
 * If the folio is currently being written back to storage, wait for the
 * I/O to complete or a fatal signal to arrive.
 *
 * Context: Sleeps.  Must be called in process context and with
 * no spinlocks held.  Caller should hold a reference on the folio.
 * If the folio is not locked, writeback may start again after writeback
 * has finished.
 * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
 */
int folio_wait_writeback_killable(struct folio *folio)
{
        while (folio_test_writeback(folio)) {
                trace_folio_wait_writeback(folio, folio_mapping(folio));
                if (folio_wait_bit_killable(folio, PG_writeback))
                        return -EINTR;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);

/**
 * folio_wait_stable() - wait for writeback to finish, if necessary.
 * @folio: The folio to wait on.
 *
 * This function determines if the given folio is related to a backing
 * device that requires folio contents to be held stable during writeback.
 * If so, then it will wait for any pending writeback to complete.
 *
 * Context: Sleeps.  Must be called in process context and with
 * no spinlocks held.  Caller should hold a reference on the folio.
 * If the folio is not locked, writeback may start again after writeback
 * has finished.
 */
void folio_wait_stable(struct folio *folio)
{
        if (mapping_stable_writes(folio_mapping(folio)))
                folio_wait_writeback(folio);
}
EXPORT_SYMBOL_GPL(folio_wait_stable);





















































































































































































































































































































































































































































































































































































































































    1 






















    1 

    1 


    1 

    1 
    1 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
// SPDX-License-Identifier: GPL-2.0+
/*
 *  HID driver for UC-Logic devices not fully compliant with HID standard
 *  - tablet initialization and parameter retrieval
 *
 *  Copyright (c) 2018 Nikolai Kondrashov
 */

/*
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 */

#include "hid-uclogic-params.h"
#include "hid-uclogic-rdesc.h"
#include "usbhid/usbhid.h"
#include "hid-ids.h"
#include <linux/ctype.h>
#include <linux/string.h>
#include <asm/unaligned.h>

/**
 * uclogic_params_pen_inrange_to_str() - Convert a pen in-range reporting type
 *                                       to a string.
 * @inrange:        The in-range reporting type to convert.
 *
 * Return:
 * * The string representing the type, or
 * * %NULL if the type is unknown.
 */
static const char *uclogic_params_pen_inrange_to_str(
                                enum uclogic_params_pen_inrange inrange)
{
        switch (inrange) {
        case UCLOGIC_PARAMS_PEN_INRANGE_NORMAL:
                return "normal";
        case UCLOGIC_PARAMS_PEN_INRANGE_INVERTED:
                return "inverted";
        case UCLOGIC_PARAMS_PEN_INRANGE_NONE:
                return "none";
        default:
                return NULL;
        }
}

/**
 * uclogic_params_pen_hid_dbg() - Dump tablet interface pen parameters
 * @hdev:        The HID device the pen parameters describe.
 * @pen:        The pen parameters to dump.
 *
 * Dump tablet interface pen parameters with hid_dbg(). The dump is indented
 * with a tab.
 */
static void uclogic_params_pen_hid_dbg(const struct hid_device *hdev,
                                        const struct uclogic_params_pen *pen)
{
        size_t i;

        hid_dbg(hdev, "\t.usage_invalid = %s\n",
                (pen->usage_invalid ? "true" : "false"));
        hid_dbg(hdev, "\t.desc_ptr = %p\n", pen->desc_ptr);
        hid_dbg(hdev, "\t.desc_size = %u\n", pen->desc_size);
        hid_dbg(hdev, "\t.id = %u\n", pen->id);
        hid_dbg(hdev, "\t.subreport_list = {\n");
        for (i = 0; i < ARRAY_SIZE(pen->subreport_list); i++) {
                hid_dbg(hdev, "\t\t{0x%02hhx, %hhu}%s\n",
                        pen->subreport_list[i].value,
                        pen->subreport_list[i].id,
                        i < (ARRAY_SIZE(pen->subreport_list) - 1) ? "," : "");
        }
        hid_dbg(hdev, "\t}\n");
        hid_dbg(hdev, "\t.inrange = %s\n",
                uclogic_params_pen_inrange_to_str(pen->inrange));
        hid_dbg(hdev, "\t.fragmented_hires = %s\n",
                (pen->fragmented_hires ? "true" : "false"));
        hid_dbg(hdev, "\t.tilt_y_flipped = %s\n",
                (pen->tilt_y_flipped ? "true" : "false"));
}

/**
 * uclogic_params_frame_hid_dbg() - Dump tablet interface frame parameters
 * @hdev:        The HID device the pen parameters describe.
 * @frame:        The frame parameters to dump.
 *
 * Dump tablet interface frame parameters with hid_dbg(). The dump is
 * indented with two tabs.
 */
static void uclogic_params_frame_hid_dbg(
                                const struct hid_device *hdev,
                                const struct uclogic_params_frame *frame)
{
        hid_dbg(hdev, "\t\t.desc_ptr = %p\n", frame->desc_ptr);
        hid_dbg(hdev, "\t\t.desc_size = %u\n", frame->desc_size);
        hid_dbg(hdev, "\t\t.id = %u\n", frame->id);
        hid_dbg(hdev, "\t\t.suffix = %s\n", frame->suffix);
        hid_dbg(hdev, "\t\t.re_lsb = %u\n", frame->re_lsb);
        hid_dbg(hdev, "\t\t.dev_id_byte = %u\n", frame->dev_id_byte);
        hid_dbg(hdev, "\t\t.touch_byte = %u\n", frame->touch_byte);
        hid_dbg(hdev, "\t\t.touch_max = %hhd\n", frame->touch_max);
        hid_dbg(hdev, "\t\t.touch_flip_at = %hhd\n",
                frame->touch_flip_at);
        hid_dbg(hdev, "\t\t.bitmap_dial_byte = %u\n",
                frame->bitmap_dial_byte);
}

/**
 * uclogic_params_hid_dbg() - Dump tablet interface parameters
 * @hdev:        The HID device the parameters describe.
 * @params:        The parameters to dump.
 *
 * Dump tablet interface parameters with hid_dbg().
 */
void uclogic_params_hid_dbg(const struct hid_device *hdev,
                                const struct uclogic_params *params)
{
        size_t i;

        hid_dbg(hdev, ".invalid = %s\n",
                params->invalid ? "true" : "false");
        hid_dbg(hdev, ".desc_ptr = %p\n", params->desc_ptr);
        hid_dbg(hdev, ".desc_size = %u\n", params->desc_size);
        hid_dbg(hdev, ".pen = {\n");
        uclogic_params_pen_hid_dbg(hdev, &params->pen);
        hid_dbg(hdev, "\t}\n");
        hid_dbg(hdev, ".frame_list = {\n");
        for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
                hid_dbg(hdev, "\t{\n");
                uclogic_params_frame_hid_dbg(hdev, &params->frame_list[i]);
                hid_dbg(hdev, "\t}%s\n",
                        i < (ARRAY_SIZE(params->frame_list) - 1) ? "," : "");
        }
        hid_dbg(hdev, "}\n");
}

/**
 * uclogic_params_get_str_desc - retrieve a string descriptor from a HID
 * device interface, putting it into a kmalloc-allocated buffer as is, without
 * character encoding conversion.
 *
 * @pbuf:        Location for the kmalloc-allocated buffer pointer containing
 *                the retrieved descriptor. Not modified in case of error.
 *                Can be NULL to have retrieved descriptor discarded.
 * @hdev:        The HID device of the tablet interface to retrieve the string
 *                descriptor from. Cannot be NULL.
 * @idx:        Index of the string descriptor to request from the device.
 * @len:        Length of the buffer to allocate and the data to retrieve.
 *
 * Returns:
 *        number of bytes retrieved (<= len),
 *        -EPIPE, if the descriptor was not found, or
 *        another negative errno code in case of other error.
 */
static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev,
                                        __u8 idx, size_t len)
{
        int rc;
        struct usb_device *udev;
        __u8 *buf = NULL;

        /* Check arguments */
        if (hdev == NULL) {
                rc = -EINVAL;
                goto cleanup;
        }

        udev = hid_to_usb_dev(hdev);

        buf = kmalloc(len, GFP_KERNEL);
        if (buf == NULL) {
                rc = -ENOMEM;
                goto cleanup;
        }

        rc = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                                USB_REQ_GET_DESCRIPTOR, USB_DIR_IN,
                                (USB_DT_STRING << 8) + idx,
                                0x0409, buf, len,
                                USB_CTRL_GET_TIMEOUT);
        if (rc == -EPIPE) {
                hid_dbg(hdev, "string descriptor #%hhu not found\n", idx);
                goto cleanup;
        } else if (rc < 0) {
                hid_err(hdev,
                        "failed retrieving string descriptor #%u: %d\n",
                        idx, rc);
                goto cleanup;
        }

        if (pbuf != NULL) {
                *pbuf = buf;
                buf = NULL;
        }

cleanup:
        kfree(buf);
        return rc;
}

/**
 * uclogic_params_pen_cleanup - free resources used by struct
 * uclogic_params_pen (tablet interface's pen input parameters).
 * Can be called repeatedly.
 *
 * @pen:        Pen input parameters to cleanup. Cannot be NULL.
 */
static void uclogic_params_pen_cleanup(struct uclogic_params_pen *pen)
{
        kfree(pen->desc_ptr);
        memset(pen, 0, sizeof(*pen));
}

/**
 * uclogic_params_pen_init_v1() - initialize tablet interface pen
 * input and retrieve its parameters from the device, using v1 protocol.
 *
 * @pen:        Pointer to the pen parameters to initialize (to be
 *                cleaned up with uclogic_params_pen_cleanup()). Not modified in
 *                case of error, or if parameters are not found. Cannot be NULL.
 * @pfound:        Location for a flag which is set to true if the parameters
 *                were found, and to false if not (e.g. device was
 *                incompatible). Not modified in case of error. Cannot be NULL.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_pen_init_v1(struct uclogic_params_pen *pen,
                                      bool *pfound,
                                      struct hid_device *hdev)
{
        int rc;
        bool found = false;
        /* Buffer for (part of) the string descriptor */
        __u8 *buf = NULL;
        /* Minimum descriptor length required, maximum seen so far is 18 */
        const int len = 12;
        s32 resolution;
        /* Pen report descriptor template parameters */
        s32 desc_params[UCLOGIC_RDESC_PH_ID_NUM];
        __u8 *desc_ptr = NULL;

        /* Check arguments */
        if (pen == NULL || pfound == NULL || hdev == NULL) {
                rc = -EINVAL;
                goto cleanup;
        }

        /*
         * Read string descriptor containing pen input parameters.
         * The specific string descriptor and data were discovered by sniffing
         * the Windows driver traffic.
         * NOTE: This enables fully-functional tablet mode.
         */
        rc = uclogic_params_get_str_desc(&buf, hdev, 100, len);
        if (rc == -EPIPE) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters not found, assuming not compatible\n");
                goto finish;
        } else if (rc < 0) {
                hid_err(hdev, "failed retrieving pen parameters: %d\n", rc);
                goto cleanup;
        } else if (rc != len) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters has invalid length (got %d, expected %d), assuming not compatible\n",
                        rc, len);
                goto finish;
        }

        /*
         * Fill report descriptor parameters from the string descriptor
         */
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_LM] =
                get_unaligned_le16(buf + 2);
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] =
                get_unaligned_le16(buf + 4);
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_PRESSURE_LM] =
                get_unaligned_le16(buf + 8);
        resolution = get_unaligned_le16(buf + 10);
        if (resolution == 0) {
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_PM] = 0;
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_PM] = 0;
        } else {
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_PM] =
                        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_LM] * 1000 /
                        resolution;
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_PM] =
                        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] * 1000 /
                        resolution;
        }
        kfree(buf);
        buf = NULL;

        /*
         * Generate pen report descriptor
         */
        desc_ptr = uclogic_rdesc_template_apply(
                                uclogic_rdesc_v1_pen_template_arr,
                                uclogic_rdesc_v1_pen_template_size,
                                desc_params, ARRAY_SIZE(desc_params));
        if (desc_ptr == NULL) {
                rc = -ENOMEM;
                goto cleanup;
        }

        /*
         * Fill-in the parameters
         */
        memset(pen, 0, sizeof(*pen));
        pen->desc_ptr = desc_ptr;
        desc_ptr = NULL;
        pen->desc_size = uclogic_rdesc_v1_pen_template_size;
        pen->id = UCLOGIC_RDESC_V1_PEN_ID;
        pen->inrange = UCLOGIC_PARAMS_PEN_INRANGE_INVERTED;
        found = true;
finish:
        *pfound = found;
        rc = 0;
cleanup:
        kfree(desc_ptr);
        kfree(buf);
        return rc;
}

/**
 * uclogic_params_get_le24() - get a 24-bit little-endian number from a
 * buffer.
 *
 * @p:        The pointer to the number buffer.
 *
 * Returns:
 *        The retrieved number
 */
static s32 uclogic_params_get_le24(const void *p)
{
        const __u8 *b = p;
        return b[0] | (b[1] << 8UL) | (b[2] << 16UL);
}

/**
 * uclogic_params_pen_init_v2() - initialize tablet interface pen
 * input and retrieve its parameters from the device, using v2 protocol.
 *
 * @pen:                Pointer to the pen parameters to initialize (to be
 *                        cleaned up with uclogic_params_pen_cleanup()). Not
 *                        modified in case of error, or if parameters are not
 *                        found. Cannot be NULL.
 * @pfound:                Location for a flag which is set to true if the
 *                        parameters were found, and to false if not (e.g.
 *                        device was incompatible). Not modified in case of
 *                        error. Cannot be NULL.
 * @pparams_ptr:        Location for a kmalloc'ed pointer to the retrieved raw
 *                        parameters, which could be used to identify the tablet
 *                        to some extent. Should be freed with kfree after use.
 *                        NULL, if not needed. Not modified in case of error.
 *                        Only set if *pfound is set to true.
 * @pparams_len:        Location for the length of the retrieved raw
 *                        parameters. NULL, if not needed. Not modified in case
 *                        of error. Only set if *pfound is set to true.
 * @hdev:                The HID device of the tablet interface to initialize
 *                        and get parameters from. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_pen_init_v2(struct uclogic_params_pen *pen,
                                        bool *pfound,
                                        __u8 **pparams_ptr,
                                        size_t *pparams_len,
                                        struct hid_device *hdev)
{
        int rc;
        bool found = false;
        /* Buffer for (part of) the parameter string descriptor */
        __u8 *buf = NULL;
        /* Parameter string descriptor required length */
        const int params_len_min = 18;
        /* Parameter string descriptor accepted length */
        const int params_len_max = 32;
        /* Parameter string descriptor received length */
        int params_len;
        size_t i;
        s32 resolution;
        /* Pen report descriptor template parameters */
        s32 desc_params[UCLOGIC_RDESC_PH_ID_NUM];
        __u8 *desc_ptr = NULL;

        /* Check arguments */
        if (pen == NULL || pfound == NULL || hdev == NULL) {
                rc = -EINVAL;
                goto cleanup;
        }

        /*
         * Read string descriptor containing pen input parameters.
         * The specific string descriptor and data were discovered by sniffing
         * the Windows driver traffic.
         * NOTE: This enables fully-functional tablet mode.
         */
        rc = uclogic_params_get_str_desc(&buf, hdev, 200, params_len_max);
        if (rc == -EPIPE) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters not found, assuming not compatible\n");
                goto finish;
        } else if (rc < 0) {
                hid_err(hdev, "failed retrieving pen parameters: %d\n", rc);
                goto cleanup;
        } else if (rc < params_len_min) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters is too short (got %d, expected at least %d), assuming not compatible\n",
                        rc, params_len_min);
                goto finish;
        }

        params_len = rc;

        /*
         * Check it's not just a catch-all UTF-16LE-encoded ASCII
         * string (such as the model name) some tablets put into all
         * unknown string descriptors.
         */
        for (i = 2;
             i < params_len &&
                (buf[i] >= 0x20 && buf[i] < 0x7f && buf[i + 1] == 0);
             i += 2);
        if (i >= params_len) {
                hid_dbg(hdev,
                        "string descriptor with pen parameters seems to contain only text, assuming not compatible\n");
                goto finish;
        }

        /*
         * Fill report descriptor parameters from the string descriptor
         */
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_LM] =
                uclogic_params_get_le24(buf + 2);
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] =
                uclogic_params_get_le24(buf + 5);
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_PRESSURE_LM] =
                get_unaligned_le16(buf + 8);
        resolution = get_unaligned_le16(buf + 10);
        if (resolution == 0) {
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_PM] = 0;
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_PM] = 0;
        } else {
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_PM] =
                        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_LM] * 1000 /
                        resolution;
                desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_PM] =
                        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] * 1000 /
                        resolution;
        }

        /*
         * Generate pen report descriptor
         */
        desc_ptr = uclogic_rdesc_template_apply(
                                uclogic_rdesc_v2_pen_template_arr,
                                uclogic_rdesc_v2_pen_template_size,
                                desc_params, ARRAY_SIZE(desc_params));
        if (desc_ptr == NULL) {
                rc = -ENOMEM;
                goto cleanup;
        }

        /*
         * Fill-in the parameters
         */
        memset(pen, 0, sizeof(*pen));
        pen->desc_ptr = desc_ptr;
        desc_ptr = NULL;
        pen->desc_size = uclogic_rdesc_v2_pen_template_size;
        pen->id = UCLOGIC_RDESC_V2_PEN_ID;
        pen->inrange = UCLOGIC_PARAMS_PEN_INRANGE_NONE;
        pen->fragmented_hires = true;
        pen->tilt_y_flipped = true;
        found = true;
        if (pparams_ptr != NULL) {
                *pparams_ptr = buf;
                buf = NULL;
        }
        if (pparams_len != NULL)
                *pparams_len = params_len;

finish:
        *pfound = found;
        rc = 0;
cleanup:
        kfree(desc_ptr);
        kfree(buf);
        return rc;
}

/**
 * uclogic_params_frame_cleanup - free resources used by struct
 * uclogic_params_frame (tablet interface's frame controls input parameters).
 * Can be called repeatedly.
 *
 * @frame:        Frame controls input parameters to cleanup. Cannot be NULL.
 */
static void uclogic_params_frame_cleanup(struct uclogic_params_frame *frame)
{
        kfree(frame->desc_ptr);
        memset(frame, 0, sizeof(*frame));
}

/**
 * uclogic_params_frame_init_with_desc() - initialize tablet's frame control
 * parameters with a static report descriptor.
 *
 * @frame:        Pointer to the frame parameters to initialize (to be cleaned
 *                up with uclogic_params_frame_cleanup()). Not modified in case
 *                of error. Cannot be NULL.
 * @desc_ptr:        Report descriptor pointer. Can be NULL, if desc_size is zero.
 * @desc_size:        Report descriptor size.
 * @id:                Report ID used for frame reports, if they should be tweaked,
 *                zero if not.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_frame_init_with_desc(
                                        struct uclogic_params_frame *frame,
                                        const __u8 *desc_ptr,
                                        size_t desc_size,
                                        unsigned int id)
{
        __u8 *copy_desc_ptr;

        if (frame == NULL || (desc_ptr == NULL && desc_size != 0))
                return -EINVAL;

        copy_desc_ptr = kmemdup(desc_ptr, desc_size, GFP_KERNEL);
        if (copy_desc_ptr == NULL)
                return -ENOMEM;

        memset(frame, 0, sizeof(*frame));
        frame->desc_ptr = copy_desc_ptr;
        frame->desc_size = desc_size;
        frame->id = id;
        return 0;
}

/**
 * uclogic_params_frame_init_v1() - initialize v1 tablet interface frame
 * controls.
 *
 * @frame:        Pointer to the frame parameters to initialize (to be cleaned
 *                up with uclogic_params_frame_cleanup()). Not modified in case
 *                of error, or if parameters are not found. Cannot be NULL.
 * @pfound:        Location for a flag which is set to true if the parameters
 *                were found, and to false if not (e.g. device was
 *                incompatible). Not modified in case of error. Cannot be NULL.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_frame_init_v1(struct uclogic_params_frame *frame,
                                        bool *pfound,
                                        struct hid_device *hdev)
{
        int rc;
        bool found = false;
        struct usb_device *usb_dev;
        char *str_buf = NULL;
        const size_t str_len = 16;

        /* Check arguments */
        if (frame == NULL || pfound == NULL || hdev == NULL) {
                rc = -EINVAL;
                goto cleanup;
        }

        usb_dev = hid_to_usb_dev(hdev);

        /*
         * Enable generic button mode
         */
        str_buf = kzalloc(str_len, GFP_KERNEL);
        if (str_buf == NULL) {
                rc = -ENOMEM;
                goto cleanup;
        }

        rc = usb_string(usb_dev, 123, str_buf, str_len);
        if (rc == -EPIPE) {
                hid_dbg(hdev,
                        "generic button -enabling string descriptor not found\n");
        } else if (rc < 0) {
                goto cleanup;
        } else if (strncmp(str_buf, "HK On", rc) != 0) {
                hid_dbg(hdev,
                        "invalid response to enabling generic buttons: \"%s\"\n",
                        str_buf);
        } else {
                hid_dbg(hdev, "generic buttons enabled\n");
                rc = uclogic_params_frame_init_with_desc(
                                frame,
                                uclogic_rdesc_v1_frame_arr,
                                uclogic_rdesc_v1_frame_size,
                                UCLOGIC_RDESC_V1_FRAME_ID);
                if (rc != 0)
                        goto cleanup;
                found = true;
        }

        *pfound = found;
        rc = 0;
cleanup:
        kfree(str_buf);
        return rc;
}

/**
 * uclogic_params_cleanup_event_hooks - free resources used by the list of raw
 * event hooks.
 * Can be called repeatedly.
 *
 * @params: Input parameters to cleanup. Cannot be NULL.
 */
static void uclogic_params_cleanup_event_hooks(struct uclogic_params *params)
{
        struct uclogic_raw_event_hook *curr, *n;

        if (!params || !params->event_hooks)
                return;

        list_for_each_entry_safe(curr, n, &params->event_hooks->list, list) {
                cancel_work_sync(&curr->work);
                list_del(&curr->list);
                kfree(curr->event);
                kfree(curr);
        }

        kfree(params->event_hooks);
        params->event_hooks = NULL;
}

/**
 * uclogic_params_cleanup - free resources used by struct uclogic_params
 * (tablet interface's parameters).
 * Can be called repeatedly.
 *
 * @params:        Input parameters to cleanup. Cannot be NULL.
 */
void uclogic_params_cleanup(struct uclogic_params *params)
{
        if (!params->invalid) {
                size_t i;
                kfree(params->desc_ptr);
                uclogic_params_pen_cleanup(&params->pen);
                for (i = 0; i < ARRAY_SIZE(params->frame_list); i++)
                        uclogic_params_frame_cleanup(&params->frame_list[i]);

                uclogic_params_cleanup_event_hooks(params);
                memset(params, 0, sizeof(*params));
        }
}

/**
 * uclogic_params_get_desc() - Get a replacement report descriptor for a
 *                             tablet's interface.
 *
 * @params:        The parameters of a tablet interface to get report
 *                descriptor for. Cannot be NULL.
 * @pdesc:        Location for the resulting, kmalloc-allocated report
 *                descriptor pointer, or for NULL, if there's no replacement
 *                report descriptor. Not modified in case of error. Cannot be
 *                NULL.
 * @psize:        Location for the resulting report descriptor size, not set if
 *                there's no replacement report descriptor. Not modified in case
 *                of error. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful.
 *        -EINVAL, if invalid arguments are supplied.
 *        -ENOMEM, if failed to allocate memory.
 */
int uclogic_params_get_desc(const struct uclogic_params *params,
                                __u8 **pdesc,
                                unsigned int *psize)
{
        int rc = -ENOMEM;
        bool present = false;
        unsigned int size = 0;
        __u8 *desc = NULL;
        size_t i;

        /* Check arguments */
        if (params == NULL || pdesc == NULL || psize == NULL)
                return -EINVAL;

        /* Concatenate descriptors */
#define ADD_DESC(_desc_ptr, _desc_size) \
        do {                                                        \
                unsigned int new_size;                              \
                __u8 *new_desc;                                     \
                if ((_desc_ptr) == NULL) {                          \
                        break;                                      \
                }                                                   \
                new_size = size + (_desc_size);                     \
                new_desc = krealloc(desc, new_size, GFP_KERNEL);    \
                if (new_desc == NULL) {                             \
                        goto cleanup;                               \
                }                                                   \
                memcpy(new_desc + size, (_desc_ptr), (_desc_size)); \
                desc = new_desc;                                    \
                size = new_size;                                    \
                present = true;                                     \
        } while (0)

        ADD_DESC(params->desc_ptr, params->desc_size);
        ADD_DESC(params->pen.desc_ptr, params->pen.desc_size);
        for (i = 0; i < ARRAY_SIZE(params->frame_list); i++) {
                ADD_DESC(params->frame_list[i].desc_ptr,
                                params->frame_list[i].desc_size);
        }

#undef ADD_DESC

        if (present) {
                *pdesc = desc;
                *psize = size;
                desc = NULL;
        }
        rc = 0;
cleanup:
        kfree(desc);
        return rc;
}

/**
 * uclogic_params_init_invalid() - initialize tablet interface parameters,
 * specifying the interface is invalid.
 *
 * @params:                Parameters to initialize (to be cleaned with
 *                        uclogic_params_cleanup()). Cannot be NULL.
 */
static void uclogic_params_init_invalid(struct uclogic_params *params)
{
        params->invalid = true;
}

/**
 * uclogic_params_init_with_opt_desc() - initialize tablet interface
 * parameters with an optional replacement report descriptor. Only modify
 * report descriptor, if the original report descriptor matches the expected
 * size.
 *
 * @params:                Parameters to initialize (to be cleaned with
 *                        uclogic_params_cleanup()). Not modified in case of
 *                        error. Cannot be NULL.
 * @hdev:                The HID device of the tablet interface create the
 *                        parameters for. Cannot be NULL.
 * @orig_desc_size:        Expected size of the original report descriptor to
 *                        be replaced.
 * @desc_ptr:                Pointer to the replacement report descriptor.
 *                        Can be NULL, if desc_size is zero.
 * @desc_size:                Size of the replacement report descriptor.
 *
 * Returns:
 *        Zero, if successful. -EINVAL if an invalid argument was passed.
 *        -ENOMEM, if failed to allocate memory.
 */
static int uclogic_params_init_with_opt_desc(struct uclogic_params *params,
                                             struct hid_device *hdev,
                                             unsigned int orig_desc_size,
                                             __u8 *desc_ptr,
                                             unsigned int desc_size)
{
        __u8 *desc_copy_ptr = NULL;
        unsigned int desc_copy_size;
        int rc;

        /* Check arguments */
        if (params == NULL || hdev == NULL ||
            (desc_ptr == NULL && desc_size != 0)) {
                rc = -EINVAL;
                goto cleanup;
        }

        /* Replace report descriptor, if it matches */
        if (hdev->dev_rsize == orig_desc_size) {
                hid_dbg(hdev,
                        "device report descriptor matches the expected size, replacing\n");
                desc_copy_ptr = kmemdup(desc_ptr, desc_size, GFP_KERNEL);
                if (desc_copy_ptr == NULL) {
                        rc = -ENOMEM;
                        goto cleanup;
                }
                desc_copy_size = desc_size;
        } else {
                hid_dbg(hdev,
                        "device report descriptor doesn't match the expected size (%u != %u), preserving\n",
                        hdev->dev_rsize, orig_desc_size);
                desc_copy_ptr = NULL;
                desc_copy_size = 0;
        }

        /* Output parameters */
        memset(params, 0, sizeof(*params));
        params->desc_ptr = desc_copy_ptr;
        desc_copy_ptr = NULL;
        params->desc_size = desc_copy_size;

        rc = 0;
cleanup:
        kfree(desc_copy_ptr);
        return rc;
}

/**
 * uclogic_params_huion_init() - initialize a Huion tablet interface and discover
 * its parameters.
 *
 * @params:        Parameters to fill in (to be cleaned with
 *                uclogic_params_cleanup()). Not modified in case of error.
 *                Cannot be NULL.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_huion_init(struct uclogic_params *params,
                                     struct hid_device *hdev)
{
        int rc;
        struct usb_device *udev;
        struct usb_interface *iface;
        __u8 bInterfaceNumber;
        bool found;
        /* The resulting parameters (noop) */
        struct uclogic_params p = {0, };
        static const char transition_ver[] = "HUION_T153_160607";
        char *ver_ptr = NULL;
        const size_t ver_len = sizeof(transition_ver) + 1;
        __u8 *params_ptr = NULL;
        size_t params_len = 0;
        /* Parameters string descriptor of a model with touch ring (HS610) */
        const __u8 touch_ring_model_params_buf[] = {
                0x13, 0x03, 0x70, 0xC6, 0x00, 0x06, 0x7C, 0x00,
                0xFF, 0x1F, 0xD8, 0x13, 0x03, 0x0D, 0x10, 0x01,
                0x04, 0x3C, 0x3E
        };

        /* Check arguments */
        if (params == NULL || hdev == NULL) {
                rc = -EINVAL;
                goto cleanup;
        }

        udev = hid_to_usb_dev(hdev);
        iface = to_usb_interface(hdev->dev.parent);
        bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;

        /* If it's a custom keyboard interface */
        if (bInterfaceNumber == 1) {
                /* Keep everything intact, but mark pen usage invalid */
                p.pen.usage_invalid = true;
                goto output;
        /* Else, if it's not a pen interface */
        } else if (bInterfaceNumber != 0) {
                uclogic_params_init_invalid(&p);
                goto output;
        }

        /* Try to get firmware version */
        ver_ptr = kzalloc(ver_len, GFP_KERNEL);
        if (ver_ptr == NULL) {
                rc = -ENOMEM;
                goto cleanup;
        }
        rc = usb_string(udev, 201, ver_ptr, ver_len);
        if (rc == -EPIPE) {
                *ver_ptr = '\0';
        } else if (rc < 0) {
                hid_err(hdev,
                        "failed retrieving Huion firmware version: %d\n", rc);
                goto cleanup;
        }

        /* If this is a transition firmware */
        if (strcmp(ver_ptr, transition_ver) == 0) {
                hid_dbg(hdev,
                        "transition firmware detected, not probing pen v2 parameters\n");
        } else {
                /* Try to probe v2 pen parameters */
                rc = uclogic_params_pen_init_v2(&p.pen, &found,
                                                &params_ptr, &params_len,
                                                hdev);
                if (rc != 0) {
                        hid_err(hdev,
                                "failed probing pen v2 parameters: %d\n", rc);
                        goto cleanup;
                } else if (found) {
                        hid_dbg(hdev, "pen v2 parameters found\n");
                        /* Create v2 frame button parameters */
                        rc = uclogic_params_frame_init_with_desc(
                                        &p.frame_list[0],
                                        uclogic_rdesc_v2_frame_buttons_arr,
                                        uclogic_rdesc_v2_frame_buttons_size,
                                        UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID);
                        if (rc != 0) {
                                hid_err(hdev,
                                        "failed creating v2 frame button parameters: %d\n",
                                        rc);
                                goto cleanup;
                        }

                        /* Link from pen sub-report */
                        p.pen.subreport_list[0].value = 0xe0;
                        p.pen.subreport_list[0].id =
                                UCLOGIC_RDESC_V2_FRAME_BUTTONS_ID;

                        /* If this is the model with touch ring */
                        if (params_ptr != NULL &&
                            params_len == sizeof(touch_ring_model_params_buf) &&
                            memcmp(params_ptr, touch_ring_model_params_buf,
                                   params_len) == 0) {
                                /* Create touch ring parameters */
                                rc = uclogic_params_frame_init_with_desc(
                                        &p.frame_list[1],
                                        uclogic_rdesc_v2_frame_touch_ring_arr,
                                        uclogic_rdesc_v2_frame_touch_ring_size,
                                        UCLOGIC_RDESC_V2_FRAME_TOUCH_ID);
                                if (rc != 0) {
                                        hid_err(hdev,
                                                "failed creating v2 frame touch ring parameters: %d\n",
                                                rc);
                                        goto cleanup;
                                }
                                p.frame_list[1].suffix = "Touch Ring";
                                p.frame_list[1].dev_id_byte =
                                        UCLOGIC_RDESC_V2_FRAME_TOUCH_DEV_ID_BYTE;
                                p.frame_list[1].touch_byte = 5;
                                p.frame_list[1].touch_max = 12;
                                p.frame_list[1].touch_flip_at = 7;
                        } else {
                                /* Create touch strip parameters */
                                rc = uclogic_params_frame_init_with_desc(
                                        &p.frame_list[1],
                                        uclogic_rdesc_v2_frame_touch_strip_arr,
                                        uclogic_rdesc_v2_frame_touch_strip_size,
                                        UCLOGIC_RDESC_V2_FRAME_TOUCH_ID);
                                if (rc != 0) {
                                        hid_err(hdev,
                                                "failed creating v2 frame touch strip parameters: %d\n",
                                                rc);
                                        goto cleanup;
                                }
                                p.frame_list[1].suffix = "Touch Strip";
                                p.frame_list[1].dev_id_byte =
                                        UCLOGIC_RDESC_V2_FRAME_TOUCH_DEV_ID_BYTE;
                                p.frame_list[1].touch_byte = 5;
                                p.frame_list[1].touch_max = 8;
                        }

                        /* Link from pen sub-report */
                        p.pen.subreport_list[1].value = 0xf0;
                        p.pen.subreport_list[1].id =
                                UCLOGIC_RDESC_V2_FRAME_TOUCH_ID;

                        /* Create v2 frame dial parameters */
                        rc = uclogic_params_frame_init_with_desc(
                                        &p.frame_list[2],
                                        uclogic_rdesc_v2_frame_dial_arr,
                                        uclogic_rdesc_v2_frame_dial_size,
                                        UCLOGIC_RDESC_V2_FRAME_DIAL_ID);
                        if (rc != 0) {
                                hid_err(hdev,
                                        "failed creating v2 frame dial parameters: %d\n",
                                        rc);
                                goto cleanup;
                        }
                        p.frame_list[2].suffix = "Dial";
                        p.frame_list[2].dev_id_byte =
                                UCLOGIC_RDESC_V2_FRAME_DIAL_DEV_ID_BYTE;
                        p.frame_list[2].bitmap_dial_byte = 5;

                        /* Link from pen sub-report */
                        p.pen.subreport_list[2].value = 0xf1;
                        p.pen.subreport_list[2].id =
                                UCLOGIC_RDESC_V2_FRAME_DIAL_ID;

                        goto output;
                }
                hid_dbg(hdev, "pen v2 parameters not found\n");
        }

        /* Try to probe v1 pen parameters */
        rc = uclogic_params_pen_init_v1(&p.pen, &found, hdev);
        if (rc != 0) {
                hid_err(hdev,
                        "failed probing pen v1 parameters: %d\n", rc);
                goto cleanup;
        } else if (found) {
                hid_dbg(hdev, "pen v1 parameters found\n");
                /* Try to probe v1 frame */
                rc = uclogic_params_frame_init_v1(&p.frame_list[0],
                                                  &found, hdev);
                if (rc != 0) {
                        hid_err(hdev, "v1 frame probing failed: %d\n", rc);
                        goto cleanup;
                }
                hid_dbg(hdev, "frame v1 parameters%s found\n",
                        (found ? "" : " not"));
                if (found) {
                        /* Link frame button subreports from pen reports */
                        p.pen.subreport_list[0].value = 0xe0;
                        p.pen.subreport_list[0].id =
                                UCLOGIC_RDESC_V1_FRAME_ID;
                }
                goto output;
        }
        hid_dbg(hdev, "pen v1 parameters not found\n");

        uclogic_params_init_invalid(&p);

output:
        /* Output parameters */
        memcpy(params, &p, sizeof(*params));
        memset(&p, 0, sizeof(p));
        rc = 0;
cleanup:
        kfree(params_ptr);
        kfree(ver_ptr);
        uclogic_params_cleanup(&p);
        return rc;
}

/**
 * uclogic_probe_interface() - some tablets, like the Parblo A610 PLUS V2 or
 * the XP-PEN Deco Mini 7, need to be initialized by sending them magic data.
 *
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL.
 * @magic_arr:        The magic data that should be sent to probe the interface.
 *                Cannot be NULL.
 * @magic_size:        Size of the magic data.
 * @endpoint:        Endpoint where the magic data should be sent.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_probe_interface(struct hid_device *hdev, const u8 *magic_arr,
                                   size_t magic_size, int endpoint)
{
        struct usb_device *udev;
        unsigned int pipe = 0;
        int sent;
        u8 *buf = NULL;
        int rc = 0;

        if (!hdev || !magic_arr) {
                rc = -EINVAL;
                goto cleanup;
        }

        buf = kmemdup(magic_arr, magic_size, GFP_KERNEL);
        if (!buf) {
                rc = -ENOMEM;
                goto cleanup;
        }

        udev = hid_to_usb_dev(hdev);
        pipe = usb_sndintpipe(udev, endpoint);

        rc = usb_interrupt_msg(udev, pipe, buf, magic_size, &sent, 1000);
        if (rc || sent != magic_size) {
                hid_err(hdev, "Interface probing failed: %d\n", rc);
                rc = -1;
                goto cleanup;
        }

        rc = 0;
cleanup:
        kfree(buf);
        return rc;
}

/**
 * uclogic_params_parse_ugee_v2_desc - parse the string descriptor containing
 * pen and frame parameters returned by UGEE v2 devices.
 *
 * @str_desc:                String descriptor, cannot be NULL.
 * @str_desc_size:        Size of the string descriptor.
 * @desc_params:        Output description params list.
 * @desc_params_size:        Size of the output description params list.
 * @frame_type:                Output frame type.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_parse_ugee_v2_desc(const __u8 *str_desc,
                                             size_t str_desc_size,
                                             s32 *desc_params,
                                             size_t desc_params_size,
                                             enum uclogic_params_frame_type *frame_type)
{
        s32 pen_x_lm, pen_y_lm;
        s32 pen_x_pm, pen_y_pm;
        s32 pen_pressure_lm;
        s32 frame_num_buttons;
        s32 resolution;

        /* Minimum descriptor length required, maximum seen so far is 14 */
        const int min_str_desc_size = 12;

        if (!str_desc || str_desc_size < min_str_desc_size)
                return -EINVAL;

        if (desc_params_size != UCLOGIC_RDESC_PH_ID_NUM)
                return -EINVAL;

        pen_x_lm = get_unaligned_le16(str_desc + 2);
        pen_y_lm = get_unaligned_le16(str_desc + 4);
        frame_num_buttons = str_desc[6];
        *frame_type = str_desc[7];
        pen_pressure_lm = get_unaligned_le16(str_desc + 8);

        resolution = get_unaligned_le16(str_desc + 10);
        if (resolution == 0) {
                pen_x_pm = 0;
                pen_y_pm = 0;
        } else {
                pen_x_pm = pen_x_lm * 1000 / resolution;
                pen_y_pm = pen_y_lm * 1000 / resolution;
        }

        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_LM] = pen_x_lm;
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_X_PM] = pen_x_pm;
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_LM] = pen_y_lm;
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_Y_PM] = pen_y_pm;
        desc_params[UCLOGIC_RDESC_PEN_PH_ID_PRESSURE_LM] = pen_pressure_lm;
        desc_params[UCLOGIC_RDESC_FRAME_PH_ID_UM] = frame_num_buttons;

        return 0;
}

/**
 * uclogic_params_ugee_v2_init_frame_buttons() - initialize a UGEE v2 frame with
 * buttons.
 * @p:                        Parameters to fill in, cannot be NULL.
 * @desc_params:        Device description params list.
 * @desc_params_size:        Size of the description params list.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init_frame_buttons(struct uclogic_params *p,
                                                     const s32 *desc_params,
                                                     size_t desc_params_size)
{
        __u8 *rdesc_frame = NULL;
        int rc = 0;

        if (!p || desc_params_size != UCLOGIC_RDESC_PH_ID_NUM)
                return -EINVAL;

        rdesc_frame = uclogic_rdesc_template_apply(
                                uclogic_rdesc_ugee_v2_frame_btn_template_arr,
                                uclogic_rdesc_ugee_v2_frame_btn_template_size,
                                desc_params, UCLOGIC_RDESC_PH_ID_NUM);
        if (!rdesc_frame)
                return -ENOMEM;

        rc = uclogic_params_frame_init_with_desc(&p->frame_list[0],
                                                 rdesc_frame,
                                                 uclogic_rdesc_ugee_v2_frame_btn_template_size,
                                                 UCLOGIC_RDESC_V1_FRAME_ID);
        kfree(rdesc_frame);
        return rc;
}

/**
 * uclogic_params_ugee_v2_init_frame_dial() - initialize a UGEE v2 frame with a
 * bitmap dial.
 * @p:                        Parameters to fill in, cannot be NULL.
 * @desc_params:        Device description params list.
 * @desc_params_size:        Size of the description params list.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init_frame_dial(struct uclogic_params *p,
                                                  const s32 *desc_params,
                                                  size_t desc_params_size)
{
        __u8 *rdesc_frame = NULL;
        int rc = 0;

        if (!p || desc_params_size != UCLOGIC_RDESC_PH_ID_NUM)
                return -EINVAL;

        rdesc_frame = uclogic_rdesc_template_apply(
                                uclogic_rdesc_ugee_v2_frame_dial_template_arr,
                                uclogic_rdesc_ugee_v2_frame_dial_template_size,
                                desc_params, UCLOGIC_RDESC_PH_ID_NUM);
        if (!rdesc_frame)
                return -ENOMEM;

        rc = uclogic_params_frame_init_with_desc(&p->frame_list[0],
                                                 rdesc_frame,
                                                 uclogic_rdesc_ugee_v2_frame_dial_template_size,
                                                 UCLOGIC_RDESC_V1_FRAME_ID);
        kfree(rdesc_frame);
        if (rc)
                return rc;

        p->frame_list[0].bitmap_dial_byte = 7;
        return 0;
}

/**
 * uclogic_params_ugee_v2_init_frame_mouse() - initialize a UGEE v2 frame with a
 * mouse.
 * @p:                        Parameters to fill in, cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init_frame_mouse(struct uclogic_params *p)
{
        int rc = 0;

        if (!p)
                return -EINVAL;

        rc = uclogic_params_frame_init_with_desc(&p->frame_list[1],
                                                 uclogic_rdesc_ugee_v2_frame_mouse_template_arr,
                                                 uclogic_rdesc_ugee_v2_frame_mouse_template_size,
                                                 UCLOGIC_RDESC_V1_FRAME_ID);
        return rc;
}

/**
 * uclogic_params_ugee_v2_has_battery() - check whether a UGEE v2 device has
 * battery or not.
 * @hdev:        The HID device of the tablet interface.
 *
 * Returns:
 *        True if the device has battery, false otherwise.
 */
static bool uclogic_params_ugee_v2_has_battery(struct hid_device *hdev)
{
        struct uclogic_drvdata *drvdata = hid_get_drvdata(hdev);

        if (drvdata->quirks & UCLOGIC_BATTERY_QUIRK)
                return true;

        /* The XP-PEN Deco LW vendor, product and version are identical to the
         * Deco L. The only difference reported by their firmware is the product
         * name. Add a quirk to support battery reporting on the wireless
         * version.
         */
        if (hdev->vendor == USB_VENDOR_ID_UGEE &&
            hdev->product == USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L) {
                struct usb_device *udev = hid_to_usb_dev(hdev);

                if (strstarts(udev->product, "Deco LW"))
                        return true;
        }

        return false;
}

/**
 * uclogic_params_ugee_v2_init_battery() - initialize UGEE v2 battery reporting.
 * @hdev:        The HID device of the tablet interface, cannot be NULL.
 * @p:                Parameters to fill in, cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init_battery(struct hid_device *hdev,
                                               struct uclogic_params *p)
{
        int rc = 0;

        if (!hdev || !p)
                return -EINVAL;

        /* Some tablets contain invalid characters in hdev->uniq, throwing a
         * "hwmon: '<name>' is not a valid name attribute, please fix" error.
         * Use the device vendor and product IDs instead.
         */
        snprintf(hdev->uniq, sizeof(hdev->uniq), "%x-%x", hdev->vendor,
                 hdev->product);

        rc = uclogic_params_frame_init_with_desc(&p->frame_list[1],
                                                 uclogic_rdesc_ugee_v2_battery_template_arr,
                                                 uclogic_rdesc_ugee_v2_battery_template_size,
                                                 UCLOGIC_RDESC_UGEE_V2_BATTERY_ID);
        if (rc)
                return rc;

        p->frame_list[1].suffix = "Battery";
        p->pen.subreport_list[1].value = 0xf2;
        p->pen.subreport_list[1].id = UCLOGIC_RDESC_UGEE_V2_BATTERY_ID;

        return rc;
}

/**
 * uclogic_params_ugee_v2_reconnect_work() - When a wireless tablet looses
 * connection to the USB dongle and reconnects, either because of its physical
 * distance or because it was switches off and on using the frame's switch,
 * uclogic_probe_interface() needs to be called again to enable the tablet.
 *
 * @work: The work that triggered this function.
 */
static void uclogic_params_ugee_v2_reconnect_work(struct work_struct *work)
{
        struct uclogic_raw_event_hook *event_hook;

        event_hook = container_of(work, struct uclogic_raw_event_hook, work);
        uclogic_probe_interface(event_hook->hdev, uclogic_ugee_v2_probe_arr,
                                uclogic_ugee_v2_probe_size,
                                uclogic_ugee_v2_probe_endpoint);
}

/**
 * uclogic_params_ugee_v2_init_event_hooks() - initialize the list of events
 * to be hooked for UGEE v2 devices.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from.
 * @p:                Parameters to fill in, cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init_event_hooks(struct hid_device *hdev,
                                                   struct uclogic_params *p)
{
        struct uclogic_raw_event_hook *event_hook;
        __u8 reconnect_event[] = {
                /* Event received on wireless tablet reconnection */
                0x02, 0xF8, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
        };

        if (!p)
                return -EINVAL;

        /* The reconnection event is only received if the tablet has battery */
        if (!uclogic_params_ugee_v2_has_battery(hdev))
                return 0;

        p->event_hooks = kzalloc(sizeof(*p->event_hooks), GFP_KERNEL);
        if (!p->event_hooks)
                return -ENOMEM;

        INIT_LIST_HEAD(&p->event_hooks->list);

        event_hook = kzalloc(sizeof(*event_hook), GFP_KERNEL);
        if (!event_hook)
                return -ENOMEM;

        INIT_WORK(&event_hook->work, uclogic_params_ugee_v2_reconnect_work);
        event_hook->hdev = hdev;
        event_hook->size = ARRAY_SIZE(reconnect_event);
        event_hook->event = kmemdup(reconnect_event, event_hook->size, GFP_KERNEL);
        if (!event_hook->event)
                return -ENOMEM;

        list_add_tail(&event_hook->list, &p->event_hooks->list);

        return 0;
}

/**
 * uclogic_params_ugee_v2_init() - initialize a UGEE graphics tablets by
 * discovering their parameters.
 *
 * These tables, internally designed as v2 to differentiate them from older
 * models, expect a payload of magic data in orther to be switched to the fully
 * functional mode and expose their parameters in a similar way to the
 * information present in uclogic_params_pen_init_v1() but with some
 * differences.
 *
 * @params:        Parameters to fill in (to be cleaned with
 *                uclogic_params_cleanup()). Not modified in case of error.
 *                Cannot be NULL.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
static int uclogic_params_ugee_v2_init(struct uclogic_params *params,
                                       struct hid_device *hdev)
{
        int rc = 0;
        struct uclogic_drvdata *drvdata;
        struct usb_interface *iface;
        __u8 bInterfaceNumber;
        const int str_desc_len = 12;
        __u8 *str_desc = NULL;
        __u8 *rdesc_pen = NULL;
        s32 desc_params[UCLOGIC_RDESC_PH_ID_NUM];
        enum uclogic_params_frame_type frame_type;
        /* The resulting parameters (noop) */
        struct uclogic_params p = {0, };

        if (!params || !hdev) {
                rc = -EINVAL;
                goto cleanup;
        }

        drvdata = hid_get_drvdata(hdev);
        iface = to_usb_interface(hdev->dev.parent);
        bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;

        if (bInterfaceNumber == 0) {
                rc = uclogic_params_ugee_v2_init_frame_mouse(&p);
                if (rc)
                        goto cleanup;

                goto output;
        }

        if (bInterfaceNumber != 2) {
                uclogic_params_init_invalid(&p);
                goto output;
        }

        /*
         * Initialize the interface by sending magic data.
         * The specific data was discovered by sniffing the Windows driver
         * traffic.
         */
        rc = uclogic_probe_interface(hdev, uclogic_ugee_v2_probe_arr,
                                     uclogic_ugee_v2_probe_size,
                                     uclogic_ugee_v2_probe_endpoint);
        if (rc) {
                uclogic_params_init_invalid(&p);
                goto output;
        }

        /*
         * Read the string descriptor containing pen and frame parameters.
         * The specific string descriptor and data were discovered by sniffing
         * the Windows driver traffic.
         */
        rc = uclogic_params_get_str_desc(&str_desc, hdev, 100, str_desc_len);
        if (rc != str_desc_len) {
                hid_err(hdev, "failed retrieving pen and frame parameters: %d\n", rc);
                uclogic_params_init_invalid(&p);
                goto output;
        }

        rc = uclogic_params_parse_ugee_v2_desc(str_desc, str_desc_len,
                                               desc_params,
                                               ARRAY_SIZE(desc_params),
                                               &frame_type);
        if (rc)
                goto cleanup;

        kfree(str_desc);
        str_desc = NULL;

        /* Initialize the pen interface */
        rdesc_pen = uclogic_rdesc_template_apply(
                                uclogic_rdesc_ugee_v2_pen_template_arr,
                                uclogic_rdesc_ugee_v2_pen_template_size,
                                desc_params, ARRAY_SIZE(desc_params));
        if (!rdesc_pen) {
                rc = -ENOMEM;
                goto cleanup;
        }

        p.pen.desc_ptr = rdesc_pen;
        p.pen.desc_size = uclogic_rdesc_ugee_v2_pen_template_size;
        p.pen.id = 0x02;
        p.pen.subreport_list[0].value = 0xf0;
        p.pen.subreport_list[0].id = UCLOGIC_RDESC_V1_FRAME_ID;

        /* Initialize the frame interface */
        if (drvdata->quirks & UCLOGIC_MOUSE_FRAME_QUIRK)
                frame_type = UCLOGIC_PARAMS_FRAME_MOUSE;

        switch (frame_type) {
        case UCLOGIC_PARAMS_FRAME_DIAL:
        case UCLOGIC_PARAMS_FRAME_MOUSE:
                rc = uclogic_params_ugee_v2_init_frame_dial(&p, desc_params,
                                                            ARRAY_SIZE(desc_params));
                break;
        case UCLOGIC_PARAMS_FRAME_BUTTONS:
        default:
                rc = uclogic_params_ugee_v2_init_frame_buttons(&p, desc_params,
                                                               ARRAY_SIZE(desc_params));
                break;
        }

        if (rc)
                goto cleanup;

        /* Initialize the battery interface*/
        if (uclogic_params_ugee_v2_has_battery(hdev)) {
                rc = uclogic_params_ugee_v2_init_battery(hdev, &p);
                if (rc) {
                        hid_err(hdev, "error initializing battery: %d\n", rc);
                        goto cleanup;
                }
        }

        /* Create a list of raw events to be ignored */
        rc = uclogic_params_ugee_v2_init_event_hooks(hdev, &p);
        if (rc) {
                hid_err(hdev, "error initializing event hook list: %d\n", rc);
                goto cleanup;
        }

output:
        /* Output parameters */
        memcpy(params, &p, sizeof(*params));
        memset(&p, 0, sizeof(p));
        rc = 0;
cleanup:
        kfree(str_desc);
        uclogic_params_cleanup(&p);
        return rc;
}

/**
 * uclogic_params_init() - initialize a tablet interface and discover its
 * parameters.
 *
 * @params:        Parameters to fill in (to be cleaned with
 *                uclogic_params_cleanup()). Not modified in case of error.
 *                Cannot be NULL.
 * @hdev:        The HID device of the tablet interface to initialize and get
 *                parameters from. Cannot be NULL. Must be using the USB low-level
 *                driver, i.e. be an actual USB tablet.
 *
 * Returns:
 *        Zero, if successful. A negative errno code on error.
 */
int uclogic_params_init(struct uclogic_params *params,
                        struct hid_device *hdev)
{
        int rc;
        struct usb_device *udev;
        __u8  bNumInterfaces;
        struct usb_interface *iface;
        __u8 bInterfaceNumber;
        bool found;
        /* The resulting parameters (noop) */
        struct uclogic_params p = {0, };

        /* Check arguments */
        if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) {
                rc = -EINVAL;
                goto cleanup;
        }

        udev = hid_to_usb_dev(hdev);
        bNumInterfaces = udev->config->desc.bNumInterfaces;
        iface = to_usb_interface(hdev->dev.parent);
        bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber;

        /*
         * Set replacement report descriptor if the original matches the
         * specified size. Otherwise keep interface unchanged.
         */
#define WITH_OPT_DESC(_orig_desc_token, _new_desc_token) \
        uclogic_params_init_with_opt_desc(                  \
                &p, hdev,                                   \
                UCLOGIC_RDESC_##_orig_desc_token##_SIZE,    \
                uclogic_rdesc_##_new_desc_token##_arr,      \
                uclogic_rdesc_##_new_desc_token##_size)

#define VID_PID(_vid, _pid) \
        (((__u32)(_vid) << 16) | ((__u32)(_pid) & U16_MAX))

        /*
         * Handle specific interfaces for specific tablets.
         *
         * Observe the following logic:
         *
         * If the interface is recognized as producing certain useful input:
         *        Mark interface as valid.
         *        Output interface parameters.
         * Else, if the interface is recognized as *not* producing any useful
         * input:
         *        Mark interface as invalid.
         * Else:
         *        Mark interface as valid.
         *        Output noop parameters.
         *
         * Rule of thumb: it is better to disable a broken interface than let
         *                  it spew garbage input.
         */

        switch (VID_PID(hdev->vendor, hdev->product)) {
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_PF1209):
                rc = WITH_OPT_DESC(PF1209_ORIG, pf1209_fixed);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U):
                rc = WITH_OPT_DESC(WPXXXXU_ORIG, wp4030u_fixed);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U):
                if (hdev->dev_rsize == UCLOGIC_RDESC_WP5540U_V2_ORIG_SIZE) {
                        if (bInterfaceNumber == 0) {
                                /* Try to probe v1 pen parameters */
                                rc = uclogic_params_pen_init_v1(&p.pen,
                                                                &found, hdev);
                                if (rc != 0) {
                                        hid_err(hdev,
                                                "pen probing failed: %d\n",
                                                rc);
                                        goto cleanup;
                                }
                                if (!found) {
                                        hid_warn(hdev,
                                                 "pen parameters not found");
                                }
                        } else {
                                uclogic_params_init_invalid(&p);
                        }
                } else {
                        rc = WITH_OPT_DESC(WPXXXXU_ORIG, wp5540u_fixed);
                        if (rc != 0)
                                goto cleanup;
                }
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U):
                rc = WITH_OPT_DESC(WPXXXXU_ORIG, wp8060u_fixed);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_WP1062):
                rc = WITH_OPT_DESC(WP1062_ORIG, wp1062_fixed);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850):
                switch (bInterfaceNumber) {
                case 0:
                        rc = WITH_OPT_DESC(TWHL850_ORIG0, twhl850_fixed0);
                        if (rc != 0)
                                goto cleanup;
                        break;
                case 1:
                        rc = WITH_OPT_DESC(TWHL850_ORIG1, twhl850_fixed1);
                        if (rc != 0)
                                goto cleanup;
                        break;
                case 2:
                        rc = WITH_OPT_DESC(TWHL850_ORIG2, twhl850_fixed2);
                        if (rc != 0)
                                goto cleanup;
                        break;
                }
                break;
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60):
                /*
                 * If it is not a three-interface version, which is known to
                 * respond to initialization.
                 */
                if (bNumInterfaces != 3) {
                        switch (bInterfaceNumber) {
                        case 0:
                                rc = WITH_OPT_DESC(TWHA60_ORIG0,
                                                        twha60_fixed0);
                                if (rc != 0)
                                        goto cleanup;
                                break;
                        case 1:
                                rc = WITH_OPT_DESC(TWHA60_ORIG1,
                                                        twha60_fixed1);
                                if (rc != 0)
                                        goto cleanup;
                                break;
                        }
                        break;
                }
                fallthrough;
        case VID_PID(USB_VENDOR_ID_HUION,
                     USB_DEVICE_ID_HUION_TABLET):
        case VID_PID(USB_VENDOR_ID_HUION,
                     USB_DEVICE_ID_HUION_TABLET2):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_HUION_TABLET):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_YIYNOVA_TABLET):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_81):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_45):
        case VID_PID(USB_VENDOR_ID_UCLOGIC,
                     USB_DEVICE_ID_UCLOGIC_UGEE_TABLET_47):
                rc = uclogic_params_huion_init(&p, hdev);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_UGTIZER,
                     USB_DEVICE_ID_UGTIZER_TABLET_GP0610):
        case VID_PID(USB_VENDOR_ID_UGTIZER,
                     USB_DEVICE_ID_UGTIZER_TABLET_GT5040):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_G640):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_STAR06):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_TABLET_RAINBOW_CV720):
                /* If this is the pen interface */
                if (bInterfaceNumber == 1) {
                        /* Probe v1 pen parameters */
                        rc = uclogic_params_pen_init_v1(&p.pen, &found, hdev);
                        if (rc != 0) {
                                hid_err(hdev, "pen probing failed: %d\n", rc);
                                goto cleanup;
                        }
                        if (!found) {
                                hid_warn(hdev, "pen parameters not found");
                                uclogic_params_init_invalid(&p);
                        }
                } else {
                        uclogic_params_init_invalid(&p);
                }
                break;
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01):
                /* If this is the pen and frame interface */
                if (bInterfaceNumber == 1) {
                        /* Probe v1 pen parameters */
                        rc = uclogic_params_pen_init_v1(&p.pen, &found, hdev);
                        if (rc != 0) {
                                hid_err(hdev, "pen probing failed: %d\n", rc);
                                goto cleanup;
                        }
                        /* Initialize frame parameters */
                        rc = uclogic_params_frame_init_with_desc(
                                &p.frame_list[0],
                                uclogic_rdesc_xppen_deco01_frame_arr,
                                uclogic_rdesc_xppen_deco01_frame_size,
                                0);
                        if (rc != 0)
                                goto cleanup;
                } else {
                        uclogic_params_init_invalid(&p);
                }
                break;
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_PARBLO_A610_PRO):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO01_V2):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_L):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_MW):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_S):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_DECO_PRO_SW):
                rc = uclogic_params_ugee_v2_init(&p, hdev);
                if (rc != 0)
                        goto cleanup;
                break;
        case VID_PID(USB_VENDOR_ID_TRUST,
                     USB_DEVICE_ID_TRUST_PANORA_TABLET):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_TABLET_G5):
                /* Ignore non-pen interfaces */
                if (bInterfaceNumber != 1) {
                        uclogic_params_init_invalid(&p);
                        break;
                }

                rc = uclogic_params_pen_init_v1(&p.pen, &found, hdev);
                if (rc != 0) {
                        hid_err(hdev, "pen probing failed: %d\n", rc);
                        goto cleanup;
                } else if (found) {
                        rc = uclogic_params_frame_init_with_desc(
                                &p.frame_list[0],
                                uclogic_rdesc_ugee_g5_frame_arr,
                                uclogic_rdesc_ugee_g5_frame_size,
                                UCLOGIC_RDESC_UGEE_G5_FRAME_ID);
                        if (rc != 0) {
                                hid_err(hdev,
                                        "failed creating frame parameters: %d\n",
                                        rc);
                                goto cleanup;
                        }
                        p.frame_list[0].re_lsb =
                                UCLOGIC_RDESC_UGEE_G5_FRAME_RE_LSB;
                        p.frame_list[0].dev_id_byte =
                                UCLOGIC_RDESC_UGEE_G5_FRAME_DEV_ID_BYTE;
                } else {
                        hid_warn(hdev, "pen parameters not found");
                        uclogic_params_init_invalid(&p);
                }

                break;
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_TABLET_EX07S):
                /* Ignore non-pen interfaces */
                if (bInterfaceNumber != 1) {
                        uclogic_params_init_invalid(&p);
                        break;
                }

                rc = uclogic_params_pen_init_v1(&p.pen, &found, hdev);
                if (rc != 0) {
                        hid_err(hdev, "pen probing failed: %d\n", rc);
                        goto cleanup;
                } else if (found) {
                        rc = uclogic_params_frame_init_with_desc(
                                &p.frame_list[0],
                                uclogic_rdesc_ugee_ex07_frame_arr,
                                uclogic_rdesc_ugee_ex07_frame_size,
                                0);
                        if (rc != 0) {
                                hid_err(hdev,
                                        "failed creating frame parameters: %d\n",
                                        rc);
                                goto cleanup;
                        }
                } else {
                        hid_warn(hdev, "pen parameters not found");
                        uclogic_params_init_invalid(&p);
                }

                break;
        }

#undef VID_PID
#undef WITH_OPT_DESC

        /* Output parameters */
        memcpy(params, &p, sizeof(*params));
        memset(&p, 0, sizeof(p));
        rc = 0;
cleanup:
        uclogic_params_cleanup(&p);
        return rc;
}

#ifdef CONFIG_HID_KUNIT_TEST
#include "hid-uclogic-params-test.c"
#endif











































    9 






   16 





    6 




















   56 























    6 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * include/linux/pagevec.h
 *
 * In many places it is efficient to batch an operation up against multiple
 * folios.  A folio_batch is a container which is used for that.
 */

#ifndef _LINUX_PAGEVEC_H
#define _LINUX_PAGEVEC_H

#include <linux/types.h>

/* 31 pointers + header align the folio_batch structure to a power of two */
#define PAGEVEC_SIZE        31

struct folio;

/**
 * struct folio_batch - A collection of folios.
 *
 * The folio_batch is used to amortise the cost of retrieving and
 * operating on a set of folios.  The order of folios in the batch may be
 * significant (eg delete_from_page_cache_batch()).  Some users of the
 * folio_batch store "exceptional" entries in it which can be removed
 * by calling folio_batch_remove_exceptionals().
 */
struct folio_batch {
        unsigned char nr;
        unsigned char i;
        bool percpu_pvec_drained;
        struct folio *folios[PAGEVEC_SIZE];
};

/**
 * folio_batch_init() - Initialise a batch of folios
 * @fbatch: The folio batch.
 *
 * A freshly initialised folio_batch contains zero folios.
 */
static inline void folio_batch_init(struct folio_batch *fbatch)
{
        fbatch->nr = 0;
        fbatch->i = 0;
        fbatch->percpu_pvec_drained = false;
}

static inline void folio_batch_reinit(struct folio_batch *fbatch)
{
        fbatch->nr = 0;
        fbatch->i = 0;
}

static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
{
        return fbatch->nr;
}

static inline unsigned int folio_batch_space(struct folio_batch *fbatch)
{
        return PAGEVEC_SIZE - fbatch->nr;
}

/**
 * folio_batch_add() - Add a folio to a batch.
 * @fbatch: The folio batch.
 * @folio: The folio to add.
 *
 * The folio is added to the end of the batch.
 * The batch must have previously been initialised using folio_batch_init().
 *
 * Return: The number of slots still available.
 */
static inline unsigned folio_batch_add(struct folio_batch *fbatch,
                struct folio *folio)
{
        fbatch->folios[fbatch->nr++] = folio;
        return folio_batch_space(fbatch);
}

/**
 * folio_batch_next - Return the next folio to process.
 * @fbatch: The folio batch being processed.
 *
 * Use this function to implement a queue of folios.
 *
 * Return: The next folio in the queue, or NULL if the queue is empty.
 */
static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
{
        if (fbatch->i == fbatch->nr)
                return NULL;
        return fbatch->folios[fbatch->i++];
}

void __folio_batch_release(struct folio_batch *pvec);

static inline void folio_batch_release(struct folio_batch *fbatch)
{
        if (folio_batch_count(fbatch))
                __folio_batch_release(fbatch);
}

void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
#endif /* _LINUX_PAGEVEC_H */































































































































































































































    4 
























    1 











    3 




   85 




  115 









    3 










  113 






















   31 























    4 





























    4 




    4 









    3 











































    4 













    4 

















    3 
    3 



    4 


























































    3 

    3 
    3 





    4 












    4 













  113 






























































































    3 

   22 


  115 


















    3 






















    4 
    1 

    4 














    4 

    1 






































  114 

  114 


  114 







    1 

    4 
    1 




  115 
    1 





  115 





    4 












    4 













    4 










  115 





















    1 

    1 

    1 













    1 

    1 


















































    1 

  114 








    4 














    1 





























































    3 

    3 
    3 

    3 

    3 
    3 









































    3 





    3 

    3 

    3 
    3 
    3 








    3 




















    3 
























































































































































































































































  115 


  114 



  115 
  115 

  115 
  115 

















































    1 
  111 

    1 

  113 

   23 





















    3 

    3 
    3 

    3 
    3 


    3 

    3 

    3 





















    1 

    1 






















    1 







    1 




    1 
    1 







    1 

































    1 

    1 



    1 


















    1 

    1 



























    1 
    1 
    1 



    1 
    1 







    1 




    1 
    1 
    1 


    1 















    1 





    1 


    1 


    1 

    1 
    1 



    1 


























































































































































































































































































































































































































































































































































    4 
    4 
    4 



    4 


    4 
    3 


























































































































































































































































































































































































































































































































































  113 




  111 

  113 
  114 


  114 






   93 
   92 



   90 


   93 

  114 
  114 
  111 

  111 

  113 





  114 










































































































































































































































































































































































































































































































































































































































































































































































    3 
    3 














    3 




    4 
    4 
    4 




















    4 


    4 
    4 


    4 
    3 




































































  114 
  113 


  113 


  112 





















    1 

    1 

    1 
    1 


    1 



    1 
    1 
    1 

    1 



















































































































































































































































































































































    3 
    3 
    3 

    3 
    3 










    1 





    3 
    3 






















    1 

    1 

    1 



    1 
    1 
    1 


    1 














    1 






    1 
    1 

    1 






















    1 














    1 
    1 



    1 



























































    1 















    1 



    1 





    1 



    1 


    1 


    1 


    1 




    1 
























    1 





    1 








    1 

    1 


    1 




















   30 


























    3 














    3 







































    3 


    3 





















    3 


    3 

    3 
    3 


    3 










    3 
    3 
    3 
    3 



















    3 
    3 
    3 



    3 


    3 
    3 



































    3 





    3 



    3 


    3 


    3 

    3 


    3 

    3 
    3 
    3 
    3 


    3 





    3 







    3 


    3 


    3 
    3 



    3 

    3 


    3 


    3 





















   29 



   30 

   29 
   30 


   29 
    3 
    3 



    3 


    3 
    3 
   30 



   29 
   29 

   30 

   30 

    3 

    3 




    3 




    3 


    3 



    3 



   29 
   30 
   30 


   30 



    3 
    3 




    3 


























   30 




































































































    1 












    1 
    1 


    1 
    1 
    1 
    1 
    1 


    1 



    1 
    1 
    1 



    1 
    1 



    1 
    1 







    1 






    1 


    1 

















   44 

   45 
   43 

   43 

   44 
   43 















































































    1 
    1 























































    1 


    1 


    1 
    1 






    1 



    1 

    1 

    1 


    1 
    1 


    1 

    1 
    1 
    1 

    1 
    1 
    1 





















































































































































































































































































    3 





















    3 


    3 












































































































    3 








    3 
    3 

    3 


    3 







    3 
    3 

    3 
    3 


    3 

    3 


    3 




















































    3 










    3 















    4 




































































    3 






    3 






























    3 



















































    3 

    3 


    3 


    3 



























    3 

    3 




    3 


























    3 
    3 

    3 









    3 
























    3 

    3 


    3 

    3 
















































































    3 

    3 













































    3 

    3 


    3 





    3 


    3 


    3 



























    3 



    3 


    3 



















































































































































































































    1 
    1 
    1 

































    1 



    1 

    1 
    1 


    1 






    1 
    1 


    1 
    1 





















































































































































































    1 



    1 

    1 



    1 




































































































































































































































































































































































































   84 










   83 

   84 
   84 


   84 


   83 


   30 
   30 
   29 



   29 


   84 

   83 

   83 
   84 

































































































    3 







    3 


    3 



    3 


    3 


    3 





    3 
















































































































































































































    3 

    3 




    3 
















    3 
    3 

    3 
    3 
    3 

    3 

    3 
    3 

    3 







    3 








    3 
    3 
    3 





    3 





    3 
    3 







    3 


    3 
    3 
    3 
    3 















    3 


    3 


    3 
    3 



    3 
    3 
    3 
    3 


    3 
    3 


    3 






    3 
    3 




    3 


    3 
    3 

    3 





    3 






    3 






    3 













    3 
    3 


    3 


    3 

    3 





    3 





    3 




    3 









    3 





    3 
    3 

    3 





    3 
    3 
















    3 

    3 
    3 


    3 
    3 

    3 

    3 
    3 



    3 

    3 
    3 



    3 
    3 

    3 


    3 











    3 
    3 
    3 
    3 


    3 
    3 

    3 
    3 
    3 
    3 




    3 
    3 
    3 
    3 
    3 
    3 

    3 

    3 






















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
// SPDX-License-Identifier: GPL-2.0+
/*
 * Maple Tree implementation
 * Copyright (c) 2018-2022 Oracle Corporation
 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
 *            Matthew Wilcox <willy@infradead.org>
 * Copyright (c) 2023 ByteDance
 * Author: Peng Zhang <zhangpeng.00@bytedance.com>
 */

/*
 * DOC: Interesting implementation details of the Maple Tree
 *
 * Each node type has a number of slots for entries and a number of slots for
 * pivots.  In the case of dense nodes, the pivots are implied by the position
 * and are simply the slot index + the minimum of the node.
 *
 * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
 * indicate that the tree is specifying ranges.  Pivots may appear in the
 * subtree with an entry attached to the value whereas keys are unique to a
 * specific position of a B-tree.  Pivot values are inclusive of the slot with
 * the same index.
 *
 *
 * The following illustrates the layout of a range64 nodes slots and pivots.
 *
 *
 *  Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
 *           ┬   ┬   ┬   ┬     ┬    ┬    ┬    ┬    ┬
 *           │   │   │   │     │    │    │    │    └─ Implied maximum
 *           │   │   │   │     │    │    │    └─ Pivot 14
 *           │   │   │   │     │    │    └─ Pivot 13
 *           │   │   │   │     │    └─ Pivot 12
 *           │   │   │   │     └─ Pivot 11
 *           │   │   │   └─ Pivot 2
 *           │   │   └─ Pivot 1
 *           │   └─ Pivot 0
 *           └─  Implied minimum
 *
 * Slot contents:
 *  Internal (non-leaf) nodes contain pointers to other nodes.
 *  Leaf nodes contain entries.
 *
 * The location of interest is often referred to as an offset.  All offsets have
 * a slot, but the last offset has an implied pivot from the node above (or
 * UINT_MAX for the root node.
 *
 * Ranges complicate certain write activities.  When modifying any of
 * the B-tree variants, it is known that one entry will either be added or
 * deleted.  When modifying the Maple Tree, one store operation may overwrite
 * the entire data set, or one half of the tree, or the middle half of the tree.
 *
 */


#include <linux/maple_tree.h>
#include <linux/xarray.h>
#include <linux/types.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/limits.h>
#include <asm/barrier.h>

#define CREATE_TRACE_POINTS
#include <trace/events/maple_tree.h>

#define MA_ROOT_PARENT 1

/*
 * Maple state flags
 * * MA_STATE_BULK                - Bulk insert mode
 * * MA_STATE_REBALANCE                - Indicate a rebalance during bulk insert
 * * MA_STATE_PREALLOC                - Preallocated nodes, WARN_ON allocation
 */
#define MA_STATE_BULK                1
#define MA_STATE_REBALANCE        2
#define MA_STATE_PREALLOC        4

#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
#define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
#define ma_mnode_ptr(x) ((struct maple_node *)(x))
#define ma_enode_ptr(x) ((struct maple_enode *)(x))
static struct kmem_cache *maple_node_cache;

#ifdef CONFIG_DEBUG_MAPLE_TREE
static const unsigned long mt_max[] = {
        [maple_dense]                = MAPLE_NODE_SLOTS,
        [maple_leaf_64]                = ULONG_MAX,
        [maple_range_64]        = ULONG_MAX,
        [maple_arange_64]        = ULONG_MAX,
};
#define mt_node_max(x) mt_max[mte_node_type(x)]
#endif

static const unsigned char mt_slots[] = {
        [maple_dense]                = MAPLE_NODE_SLOTS,
        [maple_leaf_64]                = MAPLE_RANGE64_SLOTS,
        [maple_range_64]        = MAPLE_RANGE64_SLOTS,
        [maple_arange_64]        = MAPLE_ARANGE64_SLOTS,
};
#define mt_slot_count(x) mt_slots[mte_node_type(x)]

static const unsigned char mt_pivots[] = {
        [maple_dense]                = 0,
        [maple_leaf_64]                = MAPLE_RANGE64_SLOTS - 1,
        [maple_range_64]        = MAPLE_RANGE64_SLOTS - 1,
        [maple_arange_64]        = MAPLE_ARANGE64_SLOTS - 1,
};
#define mt_pivot_count(x) mt_pivots[mte_node_type(x)]

static const unsigned char mt_min_slots[] = {
        [maple_dense]                = MAPLE_NODE_SLOTS / 2,
        [maple_leaf_64]                = (MAPLE_RANGE64_SLOTS / 2) - 2,
        [maple_range_64]        = (MAPLE_RANGE64_SLOTS / 2) - 2,
        [maple_arange_64]        = (MAPLE_ARANGE64_SLOTS / 2) - 1,
};
#define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]

#define MAPLE_BIG_NODE_SLOTS        (MAPLE_RANGE64_SLOTS * 2 + 2)
#define MAPLE_BIG_NODE_GAPS        (MAPLE_ARANGE64_SLOTS * 2 + 1)

struct maple_big_node {
        struct maple_pnode *parent;
        unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
        union {
                struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
                struct {
                        unsigned long padding[MAPLE_BIG_NODE_GAPS];
                        unsigned long gap[MAPLE_BIG_NODE_GAPS];
                };
        };
        unsigned char b_end;
        enum maple_type type;
};

/*
 * The maple_subtree_state is used to build a tree to replace a segment of an
 * existing tree in a more atomic way.  Any walkers of the older tree will hit a
 * dead node and restart on updates.
 */
struct maple_subtree_state {
        struct ma_state *orig_l;        /* Original left side of subtree */
        struct ma_state *orig_r;        /* Original right side of subtree */
        struct ma_state *l;                /* New left side of subtree */
        struct ma_state *m;                /* New middle of subtree (rare) */
        struct ma_state *r;                /* New right side of subtree */
        struct ma_topiary *free;        /* nodes to be freed */
        struct ma_topiary *destroy;        /* Nodes to be destroyed (walked and freed) */
        struct maple_big_node *bn;
};

#ifdef CONFIG_KASAN_STACK
/* Prevent mas_wr_bnode() from exceeding the stack frame limit */
#define noinline_for_kasan noinline_for_stack
#else
#define noinline_for_kasan inline
#endif

/* Functions */
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
{
        return kmem_cache_alloc(maple_node_cache, gfp);
}

static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
{
        return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}

static inline void mt_free_one(struct maple_node *node)
{
        kmem_cache_free(maple_node_cache, node);
}

static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
        kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
}

static void mt_free_rcu(struct rcu_head *head)
{
        struct maple_node *node = container_of(head, struct maple_node, rcu);

        kmem_cache_free(maple_node_cache, node);
}

/*
 * ma_free_rcu() - Use rcu callback to free a maple node
 * @node: The node to free
 *
 * The maple tree uses the parent pointer to indicate this node is no longer in
 * use and will be freed.
 */
static void ma_free_rcu(struct maple_node *node)
{
        WARN_ON(node->parent != ma_parent_ptr(node));
        call_rcu(&node->rcu, mt_free_rcu);
}

static void mas_set_height(struct ma_state *mas)
{
        unsigned int new_flags = mas->tree->ma_flags;

        new_flags &= ~MT_FLAGS_HEIGHT_MASK;
        MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX);
        new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
        mas->tree->ma_flags = new_flags;
}

static unsigned int mas_mt_height(struct ma_state *mas)
{
        return mt_height(mas->tree);
}

static inline unsigned int mt_attr(struct maple_tree *mt)
{
        return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
}

static __always_inline enum maple_type mte_node_type(
                const struct maple_enode *entry)
{
        return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
                MAPLE_NODE_TYPE_MASK;
}

static __always_inline bool ma_is_dense(const enum maple_type type)
{
        return type < maple_leaf_64;
}

static __always_inline bool ma_is_leaf(const enum maple_type type)
{
        return type < maple_range_64;
}

static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
{
        return ma_is_leaf(mte_node_type(entry));
}

/*
 * We also reserve values with the bottom two bits set to '10' which are
 * below 4096
 */
static __always_inline bool mt_is_reserved(const void *entry)
{
        return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
                xa_is_internal(entry);
}

static __always_inline void mas_set_err(struct ma_state *mas, long err)
{
        mas->node = MA_ERROR(err);
        mas->status = ma_error;
}

static __always_inline bool mas_is_ptr(const struct ma_state *mas)
{
        return mas->status == ma_root;
}

static __always_inline bool mas_is_start(const struct ma_state *mas)
{
        return mas->status == ma_start;
}

static __always_inline bool mas_is_none(const struct ma_state *mas)
{
        return mas->status == ma_none;
}

static __always_inline bool mas_is_paused(const struct ma_state *mas)
{
        return mas->status == ma_pause;
}

static __always_inline bool mas_is_overflow(struct ma_state *mas)
{
        return mas->status == ma_overflow;
}

static inline bool mas_is_underflow(struct ma_state *mas)
{
        return mas->status == ma_underflow;
}

static __always_inline struct maple_node *mte_to_node(
                const struct maple_enode *entry)
{
        return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
}

/*
 * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
 * @entry: The maple encoded node
 *
 * Return: a maple topiary pointer
 */
static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
{
        return (struct maple_topiary *)
                ((unsigned long)entry & ~MAPLE_NODE_MASK);
}

/*
 * mas_mn() - Get the maple state node.
 * @mas: The maple state
 *
 * Return: the maple node (not encoded - bare pointer).
 */
static inline struct maple_node *mas_mn(const struct ma_state *mas)
{
        return mte_to_node(mas->node);
}

/*
 * mte_set_node_dead() - Set a maple encoded node as dead.
 * @mn: The maple encoded node.
 */
static inline void mte_set_node_dead(struct maple_enode *mn)
{
        mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
        smp_wmb(); /* Needed for RCU */
}

/* Bit 1 indicates the root is a node */
#define MAPLE_ROOT_NODE                        0x02
/* maple_type stored bit 3-6 */
#define MAPLE_ENODE_TYPE_SHIFT                0x03
/* Bit 2 means a NULL somewhere below */
#define MAPLE_ENODE_NULL                0x04

static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
                                             enum maple_type type)
{
        return (void *)((unsigned long)node |
                        (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
}

static inline void *mte_mk_root(const struct maple_enode *node)
{
        return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
}

static inline void *mte_safe_root(const struct maple_enode *node)
{
        return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
}

static inline void *mte_set_full(const struct maple_enode *node)
{
        return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
}

static inline void *mte_clear_full(const struct maple_enode *node)
{
        return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
}

static inline bool mte_has_null(const struct maple_enode *node)
{
        return (unsigned long)node & MAPLE_ENODE_NULL;
}

static __always_inline bool ma_is_root(struct maple_node *node)
{
        return ((unsigned long)node->parent & MA_ROOT_PARENT);
}

static __always_inline bool mte_is_root(const struct maple_enode *node)
{
        return ma_is_root(mte_to_node(node));
}

static inline bool mas_is_root_limits(const struct ma_state *mas)
{
        return !mas->min && mas->max == ULONG_MAX;
}

static __always_inline bool mt_is_alloc(struct maple_tree *mt)
{
        return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
}

/*
 * The Parent Pointer
 * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
 * When storing a 32 or 64 bit values, the offset can fit into 5 bits.  The 16
 * bit values need an extra bit to store the offset.  This extra bit comes from
 * a reuse of the last bit in the node type.  This is possible by using bit 1 to
 * indicate if bit 2 is part of the type or the slot.
 *
 * Note types:
 *  0x??1 = Root
 *  0x?00 = 16 bit nodes
 *  0x010 = 32 bit nodes
 *  0x110 = 64 bit nodes
 *
 * Slot size and alignment
 *  0b??1 : Root
 *  0b?00 : 16 bit values, type in 0-1, slot in 2-7
 *  0b010 : 32 bit values, type in 0-2, slot in 3-7
 *  0b110 : 64 bit values, type in 0-2, slot in 3-7
 */

#define MAPLE_PARENT_ROOT                0x01

#define MAPLE_PARENT_SLOT_SHIFT                0x03
#define MAPLE_PARENT_SLOT_MASK                0xF8

#define MAPLE_PARENT_16B_SLOT_SHIFT        0x02
#define MAPLE_PARENT_16B_SLOT_MASK        0xFC

#define MAPLE_PARENT_RANGE64                0x06
#define MAPLE_PARENT_RANGE32                0x04
#define MAPLE_PARENT_NOT_RANGE16        0x02

/*
 * mte_parent_shift() - Get the parent shift for the slot storage.
 * @parent: The parent pointer cast as an unsigned long
 * Return: The shift into that pointer to the star to of the slot
 */
static inline unsigned long mte_parent_shift(unsigned long parent)
{
        /* Note bit 1 == 0 means 16B */
        if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
                return MAPLE_PARENT_SLOT_SHIFT;

        return MAPLE_PARENT_16B_SLOT_SHIFT;
}

/*
 * mte_parent_slot_mask() - Get the slot mask for the parent.
 * @parent: The parent pointer cast as an unsigned long.
 * Return: The slot mask for that parent.
 */
static inline unsigned long mte_parent_slot_mask(unsigned long parent)
{
        /* Note bit 1 == 0 means 16B */
        if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
                return MAPLE_PARENT_SLOT_MASK;

        return MAPLE_PARENT_16B_SLOT_MASK;
}

/*
 * mas_parent_type() - Return the maple_type of the parent from the stored
 * parent type.
 * @mas: The maple state
 * @enode: The maple_enode to extract the parent's enum
 * Return: The node->parent maple_type
 */
static inline
enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
{
        unsigned long p_type;

        p_type = (unsigned long)mte_to_node(enode)->parent;
        if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
                return 0;

        p_type &= MAPLE_NODE_MASK;
        p_type &= ~mte_parent_slot_mask(p_type);
        switch (p_type) {
        case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
                if (mt_is_alloc(mas->tree))
                        return maple_arange_64;
                return maple_range_64;
        }

        return 0;
}

/*
 * mas_set_parent() - Set the parent node and encode the slot
 * @enode: The encoded maple node.
 * @parent: The encoded maple node that is the parent of @enode.
 * @slot: The slot that @enode resides in @parent.
 *
 * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
 * parent type.
 */
static inline
void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
                    const struct maple_enode *parent, unsigned char slot)
{
        unsigned long val = (unsigned long)parent;
        unsigned long shift;
        unsigned long type;
        enum maple_type p_type = mte_node_type(parent);

        MAS_BUG_ON(mas, p_type == maple_dense);
        MAS_BUG_ON(mas, p_type == maple_leaf_64);

        switch (p_type) {
        case maple_range_64:
        case maple_arange_64:
                shift = MAPLE_PARENT_SLOT_SHIFT;
                type = MAPLE_PARENT_RANGE64;
                break;
        default:
        case maple_dense:
        case maple_leaf_64:
                shift = type = 0;
                break;
        }

        val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
        val |= (slot << shift) | type;
        mte_to_node(enode)->parent = ma_parent_ptr(val);
}

/*
 * mte_parent_slot() - get the parent slot of @enode.
 * @enode: The encoded maple node.
 *
 * Return: The slot in the parent node where @enode resides.
 */
static __always_inline
unsigned int mte_parent_slot(const struct maple_enode *enode)
{
        unsigned long val = (unsigned long)mte_to_node(enode)->parent;

        if (unlikely(val & MA_ROOT_PARENT))
                return 0;

        /*
         * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
         * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
         */
        return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
}

/*
 * mte_parent() - Get the parent of @node.
 * @node: The encoded maple node.
 *
 * Return: The parent maple node.
 */
static __always_inline
struct maple_node *mte_parent(const struct maple_enode *enode)
{
        return (void *)((unsigned long)
                        (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
}

/*
 * ma_dead_node() - check if the @enode is dead.
 * @enode: The encoded maple node
 *
 * Return: true if dead, false otherwise.
 */
static __always_inline bool ma_dead_node(const struct maple_node *node)
{
        struct maple_node *parent;

        /* Do not reorder reads from the node prior to the parent check */
        smp_rmb();
        parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
        return (parent == node);
}

/*
 * mte_dead_node() - check if the @enode is dead.
 * @enode: The encoded maple node
 *
 * Return: true if dead, false otherwise.
 */
static __always_inline bool mte_dead_node(const struct maple_enode *enode)
{
        struct maple_node *parent, *node;

        node = mte_to_node(enode);
        /* Do not reorder reads from the node prior to the parent check */
        smp_rmb();
        parent = mte_parent(enode);
        return (parent == node);
}

/*
 * mas_allocated() - Get the number of nodes allocated in a maple state.
 * @mas: The maple state
 *
 * The ma_state alloc member is overloaded to hold a pointer to the first
 * allocated node or to the number of requested nodes to allocate.  If bit 0 is
 * set, then the alloc contains the number of requested nodes.  If there is an
 * allocated node, then the total allocated nodes is in that node.
 *
 * Return: The total number of nodes allocated
 */
static inline unsigned long mas_allocated(const struct ma_state *mas)
{
        if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
                return 0;

        return mas->alloc->total;
}

/*
 * mas_set_alloc_req() - Set the requested number of allocations.
 * @mas: the maple state
 * @count: the number of allocations.
 *
 * The requested number of allocations is either in the first allocated node,
 * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
 * no allocated node.  Set the request either in the node or do the necessary
 * encoding to store in @mas->alloc directly.
 */
static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
{
        if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
                if (!count)
                        mas->alloc = NULL;
                else
                        mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
                return;
        }

        mas->alloc->request_count = count;
}

/*
 * mas_alloc_req() - get the requested number of allocations.
 * @mas: The maple state
 *
 * The alloc count is either stored directly in @mas, or in
 * @mas->alloc->request_count if there is at least one node allocated.  Decode
 * the request count if it's stored directly in @mas->alloc.
 *
 * Return: The allocation request count.
 */
static inline unsigned int mas_alloc_req(const struct ma_state *mas)
{
        if ((unsigned long)mas->alloc & 0x1)
                return (unsigned long)(mas->alloc) >> 1;
        else if (mas->alloc)
                return mas->alloc->request_count;
        return 0;
}

/*
 * ma_pivots() - Get a pointer to the maple node pivots.
 * @node - the maple node
 * @type - the node type
 *
 * In the event of a dead node, this array may be %NULL
 *
 * Return: A pointer to the maple node pivots
 */
static inline unsigned long *ma_pivots(struct maple_node *node,
                                           enum maple_type type)
{
        switch (type) {
        case maple_arange_64:
                return node->ma64.pivot;
        case maple_range_64:
        case maple_leaf_64:
                return node->mr64.pivot;
        case maple_dense:
                return NULL;
        }
        return NULL;
}

/*
 * ma_gaps() - Get a pointer to the maple node gaps.
 * @node - the maple node
 * @type - the node type
 *
 * Return: A pointer to the maple node gaps
 */
static inline unsigned long *ma_gaps(struct maple_node *node,
                                     enum maple_type type)
{
        switch (type) {
        case maple_arange_64:
                return node->ma64.gap;
        case maple_range_64:
        case maple_leaf_64:
        case maple_dense:
                return NULL;
        }
        return NULL;
}

/*
 * mas_safe_pivot() - get the pivot at @piv or mas->max.
 * @mas: The maple state
 * @pivots: The pointer to the maple node pivots
 * @piv: The pivot to fetch
 * @type: The maple node type
 *
 * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
 * otherwise.
 */
static __always_inline unsigned long
mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
               unsigned char piv, enum maple_type type)
{
        if (piv >= mt_pivots[type])
                return mas->max;

        return pivots[piv];
}

/*
 * mas_safe_min() - Return the minimum for a given offset.
 * @mas: The maple state
 * @pivots: The pointer to the maple node pivots
 * @offset: The offset into the pivot array
 *
 * Return: The minimum range value that is contained in @offset.
 */
static inline unsigned long
mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
{
        if (likely(offset))
                return pivots[offset - 1] + 1;

        return mas->min;
}

/*
 * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
 * @mn: The encoded maple node
 * @piv: The pivot offset
 * @val: The value of the pivot
 */
static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
                                unsigned long val)
{
        struct maple_node *node = mte_to_node(mn);
        enum maple_type type = mte_node_type(mn);

        BUG_ON(piv >= mt_pivots[type]);
        switch (type) {
        case maple_range_64:
        case maple_leaf_64:
                node->mr64.pivot[piv] = val;
                break;
        case maple_arange_64:
                node->ma64.pivot[piv] = val;
                break;
        case maple_dense:
                break;
        }

}

/*
 * ma_slots() - Get a pointer to the maple node slots.
 * @mn: The maple node
 * @mt: The maple node type
 *
 * Return: A pointer to the maple node slots
 */
static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
{
        switch (mt) {
        case maple_arange_64:
                return mn->ma64.slot;
        case maple_range_64:
        case maple_leaf_64:
                return mn->mr64.slot;
        case maple_dense:
                return mn->slot;
        }

        return NULL;
}

static inline bool mt_write_locked(const struct maple_tree *mt)
{
        return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
                lockdep_is_held(&mt->ma_lock);
}

static __always_inline bool mt_locked(const struct maple_tree *mt)
{
        return mt_external_lock(mt) ? mt_lock_is_held(mt) :
                lockdep_is_held(&mt->ma_lock);
}

static __always_inline void *mt_slot(const struct maple_tree *mt,
                void __rcu **slots, unsigned char offset)
{
        return rcu_dereference_check(slots[offset], mt_locked(mt));
}

static __always_inline void *mt_slot_locked(struct maple_tree *mt,
                void __rcu **slots, unsigned char offset)
{
        return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
}
/*
 * mas_slot_locked() - Get the slot value when holding the maple tree lock.
 * @mas: The maple state
 * @slots: The pointer to the slots
 * @offset: The offset into the slots array to fetch
 *
 * Return: The entry stored in @slots at the @offset.
 */
static __always_inline void *mas_slot_locked(struct ma_state *mas,
                void __rcu **slots, unsigned char offset)
{
        return mt_slot_locked(mas->tree, slots, offset);
}

/*
 * mas_slot() - Get the slot value when not holding the maple tree lock.
 * @mas: The maple state
 * @slots: The pointer to the slots
 * @offset: The offset into the slots array to fetch
 *
 * Return: The entry stored in @slots at the @offset
 */
static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
                unsigned char offset)
{
        return mt_slot(mas->tree, slots, offset);
}

/*
 * mas_root() - Get the maple tree root.
 * @mas: The maple state.
 *
 * Return: The pointer to the root of the tree
 */
static __always_inline void *mas_root(struct ma_state *mas)
{
        return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
}

static inline void *mt_root_locked(struct maple_tree *mt)
{
        return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
}

/*
 * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
 * @mas: The maple state.
 *
 * Return: The pointer to the root of the tree
 */
static inline void *mas_root_locked(struct ma_state *mas)
{
        return mt_root_locked(mas->tree);
}

static inline struct maple_metadata *ma_meta(struct maple_node *mn,
                                             enum maple_type mt)
{
        switch (mt) {
        case maple_arange_64:
                return &mn->ma64.meta;
        default:
                return &mn->mr64.meta;
        }
}

/*
 * ma_set_meta() - Set the metadata information of a node.
 * @mn: The maple node
 * @mt: The maple node type
 * @offset: The offset of the highest sub-gap in this node.
 * @end: The end of the data in this node.
 */
static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
                               unsigned char offset, unsigned char end)
{
        struct maple_metadata *meta = ma_meta(mn, mt);

        meta->gap = offset;
        meta->end = end;
}

/*
 * mt_clear_meta() - clear the metadata information of a node, if it exists
 * @mt: The maple tree
 * @mn: The maple node
 * @type: The maple node type
 * @offset: The offset of the highest sub-gap in this node.
 * @end: The end of the data in this node.
 */
static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
                                  enum maple_type type)
{
        struct maple_metadata *meta;
        unsigned long *pivots;
        void __rcu **slots;
        void *next;

        switch (type) {
        case maple_range_64:
                pivots = mn->mr64.pivot;
                if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
                        slots = mn->mr64.slot;
                        next = mt_slot_locked(mt, slots,
                                              MAPLE_RANGE64_SLOTS - 1);
                        if (unlikely((mte_to_node(next) &&
                                      mte_node_type(next))))
                                return; /* no metadata, could be node */
                }
                fallthrough;
        case maple_arange_64:
                meta = ma_meta(mn, type);
                break;
        default:
                return;
        }

        meta->gap = 0;
        meta->end = 0;
}

/*
 * ma_meta_end() - Get the data end of a node from the metadata
 * @mn: The maple node
 * @mt: The maple node type
 */
static inline unsigned char ma_meta_end(struct maple_node *mn,
                                        enum maple_type mt)
{
        struct maple_metadata *meta = ma_meta(mn, mt);

        return meta->end;
}

/*
 * ma_meta_gap() - Get the largest gap location of a node from the metadata
 * @mn: The maple node
 */
static inline unsigned char ma_meta_gap(struct maple_node *mn)
{
        return mn->ma64.meta.gap;
}

/*
 * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
 * @mn: The maple node
 * @mn: The maple node type
 * @offset: The location of the largest gap.
 */
static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
                                   unsigned char offset)
{

        struct maple_metadata *meta = ma_meta(mn, mt);

        meta->gap = offset;
}

/*
 * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
 * @mat - the ma_topiary, a linked list of dead nodes.
 * @dead_enode - the node to be marked as dead and added to the tail of the list
 *
 * Add the @dead_enode to the linked list in @mat.
 */
static inline void mat_add(struct ma_topiary *mat,
                           struct maple_enode *dead_enode)
{
        mte_set_node_dead(dead_enode);
        mte_to_mat(dead_enode)->next = NULL;
        if (!mat->tail) {
                mat->tail = mat->head = dead_enode;
                return;
        }

        mte_to_mat(mat->tail)->next = dead_enode;
        mat->tail = dead_enode;
}

static void mt_free_walk(struct rcu_head *head);
static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
                            bool free);
/*
 * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
 * @mas - the maple state
 * @mat - the ma_topiary linked list of dead nodes to free.
 *
 * Destroy walk a dead list.
 */
static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
{
        struct maple_enode *next;
        struct maple_node *node;
        bool in_rcu = mt_in_rcu(mas->tree);

        while (mat->head) {
                next = mte_to_mat(mat->head)->next;
                node = mte_to_node(mat->head);
                mt_destroy_walk(mat->head, mas->tree, !in_rcu);
                if (in_rcu)
                        call_rcu(&node->rcu, mt_free_walk);
                mat->head = next;
        }
}
/*
 * mas_descend() - Descend into the slot stored in the ma_state.
 * @mas - the maple state.
 *
 * Note: Not RCU safe, only use in write side or debug code.
 */
static inline void mas_descend(struct ma_state *mas)
{
        enum maple_type type;
        unsigned long *pivots;
        struct maple_node *node;
        void __rcu **slots;

        node = mas_mn(mas);
        type = mte_node_type(mas->node);
        pivots = ma_pivots(node, type);
        slots = ma_slots(node, type);

        if (mas->offset)
                mas->min = pivots[mas->offset - 1] + 1;
        mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
        mas->node = mas_slot(mas, slots, mas->offset);
}

/*
 * mte_set_gap() - Set a maple node gap.
 * @mn: The encoded maple node
 * @gap: The offset of the gap to set
 * @val: The gap value
 */
static inline void mte_set_gap(const struct maple_enode *mn,
                                 unsigned char gap, unsigned long val)
{
        switch (mte_node_type(mn)) {
        default:
                break;
        case maple_arange_64:
                mte_to_node(mn)->ma64.gap[gap] = val;
                break;
        }
}

/*
 * mas_ascend() - Walk up a level of the tree.
 * @mas: The maple state
 *
 * Sets the @mas->max and @mas->min to the correct values when walking up.  This
 * may cause several levels of walking up to find the correct min and max.
 * May find a dead node which will cause a premature return.
 * Return: 1 on dead node, 0 otherwise
 */
static int mas_ascend(struct ma_state *mas)
{
        struct maple_enode *p_enode; /* parent enode. */
        struct maple_enode *a_enode; /* ancestor enode. */
        struct maple_node *a_node; /* ancestor node. */
        struct maple_node *p_node; /* parent node. */
        unsigned char a_slot;
        enum maple_type a_type;
        unsigned long min, max;
        unsigned long *pivots;
        bool set_max = false, set_min = false;

        a_node = mas_mn(mas);
        if (ma_is_root(a_node)) {
                mas->offset = 0;
                return 0;
        }

        p_node = mte_parent(mas->node);
        if (unlikely(a_node == p_node))
                return 1;

        a_type = mas_parent_type(mas, mas->node);
        mas->offset = mte_parent_slot(mas->node);
        a_enode = mt_mk_node(p_node, a_type);

        /* Check to make sure all parent information is still accurate */
        if (p_node != mte_parent(mas->node))
                return 1;

        mas->node = a_enode;

        if (mte_is_root(a_enode)) {
                mas->max = ULONG_MAX;
                mas->min = 0;
                return 0;
        }

        min = 0;
        max = ULONG_MAX;
        if (!mas->offset) {
                min = mas->min;
                set_min = true;
        }

        if (mas->max == ULONG_MAX)
                set_max = true;

        do {
                p_enode = a_enode;
                a_type = mas_parent_type(mas, p_enode);
                a_node = mte_parent(p_enode);
                a_slot = mte_parent_slot(p_enode);
                a_enode = mt_mk_node(a_node, a_type);
                pivots = ma_pivots(a_node, a_type);

                if (unlikely(ma_dead_node(a_node)))
                        return 1;

                if (!set_min && a_slot) {
                        set_min = true;
                        min = pivots[a_slot - 1] + 1;
                }

                if (!set_max && a_slot < mt_pivots[a_type]) {
                        set_max = true;
                        max = pivots[a_slot];
                }

                if (unlikely(ma_dead_node(a_node)))
                        return 1;

                if (unlikely(ma_is_root(a_node)))
                        break;

        } while (!set_min || !set_max);

        mas->max = max;
        mas->min = min;
        return 0;
}

/*
 * mas_pop_node() - Get a previously allocated maple node from the maple state.
 * @mas: The maple state
 *
 * Return: A pointer to a maple node.
 */
static inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
        struct maple_alloc *ret, *node = mas->alloc;
        unsigned long total = mas_allocated(mas);
        unsigned int req = mas_alloc_req(mas);

        /* nothing or a request pending. */
        if (WARN_ON(!total))
                return NULL;

        if (total == 1) {
                /* single allocation in this ma_state */
                mas->alloc = NULL;
                ret = node;
                goto single_node;
        }

        if (node->node_count == 1) {
                /* Single allocation in this node. */
                mas->alloc = node->slot[0];
                mas->alloc->total = node->total - 1;
                ret = node;
                goto new_head;
        }
        node->total--;
        ret = node->slot[--node->node_count];
        node->slot[node->node_count] = NULL;

single_node:
new_head:
        if (req) {
                req++;
                mas_set_alloc_req(mas, req);
        }

        memset(ret, 0, sizeof(*ret));
        return (struct maple_node *)ret;
}

/*
 * mas_push_node() - Push a node back on the maple state allocation.
 * @mas: The maple state
 * @used: The used maple node
 *
 * Stores the maple node back into @mas->alloc for reuse.  Updates allocated and
 * requested node count as necessary.
 */
static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
{
        struct maple_alloc *reuse = (struct maple_alloc *)used;
        struct maple_alloc *head = mas->alloc;
        unsigned long count;
        unsigned int requested = mas_alloc_req(mas);

        count = mas_allocated(mas);

        reuse->request_count = 0;
        reuse->node_count = 0;
        if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
                head->slot[head->node_count++] = reuse;
                head->total++;
                goto done;
        }

        reuse->total = 1;
        if ((head) && !((unsigned long)head & 0x1)) {
                reuse->slot[0] = head;
                reuse->node_count = 1;
                reuse->total += head->total;
        }

        mas->alloc = reuse;
done:
        if (requested > 1)
                mas_set_alloc_req(mas, requested - 1);
}

/*
 * mas_alloc_nodes() - Allocate nodes into a maple state
 * @mas: The maple state
 * @gfp: The GFP Flags
 */
static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
        struct maple_alloc *node;
        unsigned long allocated = mas_allocated(mas);
        unsigned int requested = mas_alloc_req(mas);
        unsigned int count;
        void **slots = NULL;
        unsigned int max_req = 0;

        if (!requested)
                return;

        mas_set_alloc_req(mas, 0);
        if (mas->mas_flags & MA_STATE_PREALLOC) {
                if (allocated)
                        return;
                BUG_ON(!allocated);
                WARN_ON(!allocated);
        }

        if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
                node = (struct maple_alloc *)mt_alloc_one(gfp);
                if (!node)
                        goto nomem_one;

                if (allocated) {
                        node->slot[0] = mas->alloc;
                        node->node_count = 1;
                } else {
                        node->node_count = 0;
                }

                mas->alloc = node;
                node->total = ++allocated;
                requested--;
        }

        node = mas->alloc;
        node->request_count = 0;
        while (requested) {
                max_req = MAPLE_ALLOC_SLOTS - node->node_count;
                slots = (void **)&node->slot[node->node_count];
                max_req = min(requested, max_req);
                count = mt_alloc_bulk(gfp, max_req, slots);
                if (!count)
                        goto nomem_bulk;

                if (node->node_count == 0) {
                        node->slot[0]->node_count = 0;
                        node->slot[0]->request_count = 0;
                }

                node->node_count += count;
                allocated += count;
                node = node->slot[0];
                requested -= count;
        }
        mas->alloc->total = allocated;
        return;

nomem_bulk:
        /* Clean up potential freed allocations on bulk failure */
        memset(slots, 0, max_req * sizeof(unsigned long));
nomem_one:
        mas_set_alloc_req(mas, requested);
        if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
                mas->alloc->total = allocated;
        mas_set_err(mas, -ENOMEM);
}

/*
 * mas_free() - Free an encoded maple node
 * @mas: The maple state
 * @used: The encoded maple node to free.
 *
 * Uses rcu free if necessary, pushes @used back on the maple state allocations
 * otherwise.
 */
static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
{
        struct maple_node *tmp = mte_to_node(used);

        if (mt_in_rcu(mas->tree))
                ma_free_rcu(tmp);
        else
                mas_push_node(mas, tmp);
}

/*
 * mas_node_count_gfp() - Check if enough nodes are allocated and request more
 * if there is not enough nodes.
 * @mas: The maple state
 * @count: The number of nodes needed
 * @gfp: the gfp flags
 */
static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
{
        unsigned long allocated = mas_allocated(mas);

        if (allocated < count) {
                mas_set_alloc_req(mas, count - allocated);
                mas_alloc_nodes(mas, gfp);
        }
}

/*
 * mas_node_count() - Check if enough nodes are allocated and request more if
 * there is not enough nodes.
 * @mas: The maple state
 * @count: The number of nodes needed
 *
 * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
 */
static void mas_node_count(struct ma_state *mas, int count)
{
        return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
}

/*
 * mas_start() - Sets up maple state for operations.
 * @mas: The maple state.
 *
 * If mas->status == mas_start, then set the min, max and depth to
 * defaults.
 *
 * Return:
 * - If mas->node is an error or not mas_start, return NULL.
 * - If it's an empty tree:     NULL & mas->status == ma_none
 * - If it's a single entry:    The entry & mas->status == mas_root
 * - If it's a tree:            NULL & mas->status == safe root node.
 */
static inline struct maple_enode *mas_start(struct ma_state *mas)
{
        if (likely(mas_is_start(mas))) {
                struct maple_enode *root;

                mas->min = 0;
                mas->max = ULONG_MAX;

retry:
                mas->depth = 0;
                root = mas_root(mas);
                /* Tree with nodes */
                if (likely(xa_is_node(root))) {
                        mas->depth = 1;
                        mas->status = ma_active;
                        mas->node = mte_safe_root(root);
                        mas->offset = 0;
                        if (mte_dead_node(mas->node))
                                goto retry;

                        return NULL;
                }

                /* empty tree */
                if (unlikely(!root)) {
                        mas->node = NULL;
                        mas->status = ma_none;
                        mas->offset = MAPLE_NODE_SLOTS;
                        return NULL;
                }

                /* Single entry tree */
                mas->status = ma_root;
                mas->offset = MAPLE_NODE_SLOTS;

                /* Single entry tree. */
                if (mas->index > 0)
                        return NULL;

                return root;
        }

        return NULL;
}

/*
 * ma_data_end() - Find the end of the data in a node.
 * @node: The maple node
 * @type: The maple node type
 * @pivots: The array of pivots in the node
 * @max: The maximum value in the node
 *
 * Uses metadata to find the end of the data when possible.
 * Return: The zero indexed last slot with data (may be null).
 */
static __always_inline unsigned char ma_data_end(struct maple_node *node,
                enum maple_type type, unsigned long *pivots, unsigned long max)
{
        unsigned char offset;

        if (!pivots)
                return 0;

        if (type == maple_arange_64)
                return ma_meta_end(node, type);

        offset = mt_pivots[type] - 1;
        if (likely(!pivots[offset]))
                return ma_meta_end(node, type);

        if (likely(pivots[offset] == max))
                return offset;

        return mt_pivots[type];
}

/*
 * mas_data_end() - Find the end of the data (slot).
 * @mas: the maple state
 *
 * This method is optimized to check the metadata of a node if the node type
 * supports data end metadata.
 *
 * Return: The zero indexed last slot with data (may be null).
 */
static inline unsigned char mas_data_end(struct ma_state *mas)
{
        enum maple_type type;
        struct maple_node *node;
        unsigned char offset;
        unsigned long *pivots;

        type = mte_node_type(mas->node);
        node = mas_mn(mas);
        if (type == maple_arange_64)
                return ma_meta_end(node, type);

        pivots = ma_pivots(node, type);
        if (unlikely(ma_dead_node(node)))
                return 0;

        offset = mt_pivots[type] - 1;
        if (likely(!pivots[offset]))
                return ma_meta_end(node, type);

        if (likely(pivots[offset] == mas->max))
                return offset;

        return mt_pivots[type];
}

/*
 * mas_leaf_max_gap() - Returns the largest gap in a leaf node
 * @mas - the maple state
 *
 * Return: The maximum gap in the leaf.
 */
static unsigned long mas_leaf_max_gap(struct ma_state *mas)
{
        enum maple_type mt;
        unsigned long pstart, gap, max_gap;
        struct maple_node *mn;
        unsigned long *pivots;
        void __rcu **slots;
        unsigned char i;
        unsigned char max_piv;

        mt = mte_node_type(mas->node);
        mn = mas_mn(mas);
        slots = ma_slots(mn, mt);
        max_gap = 0;
        if (unlikely(ma_is_dense(mt))) {
                gap = 0;
                for (i = 0; i < mt_slots[mt]; i++) {
                        if (slots[i]) {
                                if (gap > max_gap)
                                        max_gap = gap;
                                gap = 0;
                        } else {
                                gap++;
                        }
                }
                if (gap > max_gap)
                        max_gap = gap;
                return max_gap;
        }

        /*
         * Check the first implied pivot optimizes the loop below and slot 1 may
         * be skipped if there is a gap in slot 0.
         */
        pivots = ma_pivots(mn, mt);
        if (likely(!slots[0])) {
                max_gap = pivots[0] - mas->min + 1;
                i = 2;
        } else {
                i = 1;
        }

        /* reduce max_piv as the special case is checked before the loop */
        max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
        /*
         * Check end implied pivot which can only be a gap on the right most
         * node.
         */
        if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
                gap = ULONG_MAX - pivots[max_piv];
                if (gap > max_gap)
                        max_gap = gap;

                if (max_gap > pivots[max_piv] - mas->min)
                        return max_gap;
        }

        for (; i <= max_piv; i++) {
                /* data == no gap. */
                if (likely(slots[i]))
                        continue;

                pstart = pivots[i - 1];
                gap = pivots[i] - pstart;
                if (gap > max_gap)
                        max_gap = gap;

                /* There cannot be two gaps in a row. */
                i++;
        }
        return max_gap;
}

/*
 * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
 * @node: The maple node
 * @gaps: The pointer to the gaps
 * @mt: The maple node type
 * @*off: Pointer to store the offset location of the gap.
 *
 * Uses the metadata data end to scan backwards across set gaps.
 *
 * Return: The maximum gap value
 */
static inline unsigned long
ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
            unsigned char *off)
{
        unsigned char offset, i;
        unsigned long max_gap = 0;

        i = offset = ma_meta_end(node, mt);
        do {
                if (gaps[i] > max_gap) {
                        max_gap = gaps[i];
                        offset = i;
                }
        } while (i--);

        *off = offset;
        return max_gap;
}

/*
 * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
 * @mas: The maple state.
 *
 * Return: The gap value.
 */
static inline unsigned long mas_max_gap(struct ma_state *mas)
{
        unsigned long *gaps;
        unsigned char offset;
        enum maple_type mt;
        struct maple_node *node;

        mt = mte_node_type(mas->node);
        if (ma_is_leaf(mt))
                return mas_leaf_max_gap(mas);

        node = mas_mn(mas);
        MAS_BUG_ON(mas, mt != maple_arange_64);
        offset = ma_meta_gap(node);
        gaps = ma_gaps(node, mt);
        return gaps[offset];
}

/*
 * mas_parent_gap() - Set the parent gap and any gaps above, as needed
 * @mas: The maple state
 * @offset: The gap offset in the parent to set
 * @new: The new gap value.
 *
 * Set the parent gap then continue to set the gap upwards, using the metadata
 * of the parent to see if it is necessary to check the node above.
 */
static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
                unsigned long new)
{
        unsigned long meta_gap = 0;
        struct maple_node *pnode;
        struct maple_enode *penode;
        unsigned long *pgaps;
        unsigned char meta_offset;
        enum maple_type pmt;

        pnode = mte_parent(mas->node);
        pmt = mas_parent_type(mas, mas->node);
        penode = mt_mk_node(pnode, pmt);
        pgaps = ma_gaps(pnode, pmt);

ascend:
        MAS_BUG_ON(mas, pmt != maple_arange_64);
        meta_offset = ma_meta_gap(pnode);
        meta_gap = pgaps[meta_offset];

        pgaps[offset] = new;

        if (meta_gap == new)
                return;

        if (offset != meta_offset) {
                if (meta_gap > new)
                        return;

                ma_set_meta_gap(pnode, pmt, offset);
        } else if (new < meta_gap) {
                new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
                ma_set_meta_gap(pnode, pmt, meta_offset);
        }

        if (ma_is_root(pnode))
                return;

        /* Go to the parent node. */
        pnode = mte_parent(penode);
        pmt = mas_parent_type(mas, penode);
        pgaps = ma_gaps(pnode, pmt);
        offset = mte_parent_slot(penode);
        penode = mt_mk_node(pnode, pmt);
        goto ascend;
}

/*
 * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
 * @mas - the maple state.
 */
static inline void mas_update_gap(struct ma_state *mas)
{
        unsigned char pslot;
        unsigned long p_gap;
        unsigned long max_gap;

        if (!mt_is_alloc(mas->tree))
                return;

        if (mte_is_root(mas->node))
                return;

        max_gap = mas_max_gap(mas);

        pslot = mte_parent_slot(mas->node);
        p_gap = ma_gaps(mte_parent(mas->node),
                        mas_parent_type(mas, mas->node))[pslot];

        if (p_gap != max_gap)
                mas_parent_gap(mas, pslot, max_gap);
}

/*
 * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
 * @parent with the slot encoded.
 * @mas - the maple state (for the tree)
 * @parent - the maple encoded node containing the children.
 */
static inline void mas_adopt_children(struct ma_state *mas,
                struct maple_enode *parent)
{
        enum maple_type type = mte_node_type(parent);
        struct maple_node *node = mte_to_node(parent);
        void __rcu **slots = ma_slots(node, type);
        unsigned long *pivots = ma_pivots(node, type);
        struct maple_enode *child;
        unsigned char offset;

        offset = ma_data_end(node, type, pivots, mas->max);
        do {
                child = mas_slot_locked(mas, slots, offset);
                mas_set_parent(mas, child, parent, offset);
        } while (offset--);
}

/*
 * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
 * node as dead.
 * @mas - the maple state with the new node
 * @old_enode - The old maple encoded node to replace.
 */
static inline void mas_put_in_tree(struct ma_state *mas,
                struct maple_enode *old_enode)
        __must_hold(mas->tree->ma_lock)
{
        unsigned char offset;
        void __rcu **slots;

        if (mte_is_root(mas->node)) {
                mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
                rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
                mas_set_height(mas);
        } else {

                offset = mte_parent_slot(mas->node);
                slots = ma_slots(mte_parent(mas->node),
                                 mas_parent_type(mas, mas->node));
                rcu_assign_pointer(slots[offset], mas->node);
        }

        mte_set_node_dead(old_enode);
}

/*
 * mas_replace_node() - Replace a node by putting it in the tree, marking it
 * dead, and freeing it.
 * the parent encoding to locate the maple node in the tree.
 * @mas - the ma_state with @mas->node pointing to the new node.
 * @old_enode - The old maple encoded node.
 */
static inline void mas_replace_node(struct ma_state *mas,
                struct maple_enode *old_enode)
        __must_hold(mas->tree->ma_lock)
{
        mas_put_in_tree(mas, old_enode);
        mas_free(mas, old_enode);
}

/*
 * mas_find_child() - Find a child who has the parent @mas->node.
 * @mas: the maple state with the parent.
 * @child: the maple state to store the child.
 */
static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
        __must_hold(mas->tree->ma_lock)
{
        enum maple_type mt;
        unsigned char offset;
        unsigned char end;
        unsigned long *pivots;
        struct maple_enode *entry;
        struct maple_node *node;
        void __rcu **slots;

        mt = mte_node_type(mas->node);
        node = mas_mn(mas);
        slots = ma_slots(node, mt);
        pivots = ma_pivots(node, mt);
        end = ma_data_end(node, mt, pivots, mas->max);
        for (offset = mas->offset; offset <= end; offset++) {
                entry = mas_slot_locked(mas, slots, offset);
                if (mte_parent(entry) == node) {
                        *child = *mas;
                        mas->offset = offset + 1;
                        child->offset = offset;
                        mas_descend(child);
                        child->offset = 0;
                        return true;
                }
        }
        return false;
}

/*
 * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
 * old data or set b_node->b_end.
 * @b_node: the maple_big_node
 * @shift: the shift count
 */
static inline void mab_shift_right(struct maple_big_node *b_node,
                                 unsigned char shift)
{
        unsigned long size = b_node->b_end * sizeof(unsigned long);

        memmove(b_node->pivot + shift, b_node->pivot, size);
        memmove(b_node->slot + shift, b_node->slot, size);
        if (b_node->type == maple_arange_64)
                memmove(b_node->gap + shift, b_node->gap, size);
}

/*
 * mab_middle_node() - Check if a middle node is needed (unlikely)
 * @b_node: the maple_big_node that contains the data.
 * @size: the amount of data in the b_node
 * @split: the potential split location
 * @slot_count: the size that can be stored in a single node being considered.
 *
 * Return: true if a middle node is required.
 */
static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
                                   unsigned char slot_count)
{
        unsigned char size = b_node->b_end;

        if (size >= 2 * slot_count)
                return true;

        if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
                return true;

        return false;
}

/*
 * mab_no_null_split() - ensure the split doesn't fall on a NULL
 * @b_node: the maple_big_node with the data
 * @split: the suggested split location
 * @slot_count: the number of slots in the node being considered.
 *
 * Return: the split location.
 */
static inline int mab_no_null_split(struct maple_big_node *b_node,
                                    unsigned char split, unsigned char slot_count)
{
        if (!b_node->slot[split]) {
                /*
                 * If the split is less than the max slot && the right side will
                 * still be sufficient, then increment the split on NULL.
                 */
                if ((split < slot_count - 1) &&
                    (b_node->b_end - split) > (mt_min_slots[b_node->type]))
                        split++;
                else
                        split--;
        }
        return split;
}

/*
 * mab_calc_split() - Calculate the split location and if there needs to be two
 * splits.
 * @bn: The maple_big_node with the data
 * @mid_split: The second split, if required.  0 otherwise.
 *
 * Return: The first split location.  The middle split is set in @mid_split.
 */
static inline int mab_calc_split(struct ma_state *mas,
         struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
{
        unsigned char b_end = bn->b_end;
        int split = b_end / 2; /* Assume equal split. */
        unsigned char slot_min, slot_count = mt_slots[bn->type];

        /*
         * To support gap tracking, all NULL entries are kept together and a node cannot
         * end on a NULL entry, with the exception of the left-most leaf.  The
         * limitation means that the split of a node must be checked for this condition
         * and be able to put more data in one direction or the other.
         */
        if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
                *mid_split = 0;
                split = b_end - mt_min_slots[bn->type];

                if (!ma_is_leaf(bn->type))
                        return split;

                mas->mas_flags |= MA_STATE_REBALANCE;
                if (!bn->slot[split])
                        split--;
                return split;
        }

        /*
         * Although extremely rare, it is possible to enter what is known as the 3-way
         * split scenario.  The 3-way split comes about by means of a store of a range
         * that overwrites the end and beginning of two full nodes.  The result is a set
         * of entries that cannot be stored in 2 nodes.  Sometimes, these two nodes can
         * also be located in different parent nodes which are also full.  This can
         * carry upwards all the way to the root in the worst case.
         */
        if (unlikely(mab_middle_node(bn, split, slot_count))) {
                split = b_end / 3;
                *mid_split = split * 2;
        } else {
                slot_min = mt_min_slots[bn->type];

                *mid_split = 0;
                /*
                 * Avoid having a range less than the slot count unless it
                 * causes one node to be deficient.
                 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
                 */
                while ((split < slot_count - 1) &&
                       ((bn->pivot[split] - min) < slot_count - 1) &&
                       (b_end - split > slot_min))
                        split++;
        }

        /* Avoid ending a node on a NULL entry */
        split = mab_no_null_split(bn, split, slot_count);

        if (unlikely(*mid_split))
                *mid_split = mab_no_null_split(bn, *mid_split, slot_count);

        return split;
}

/*
 * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
 * and set @b_node->b_end to the next free slot.
 * @mas: The maple state
 * @mas_start: The starting slot to copy
 * @mas_end: The end slot to copy (inclusively)
 * @b_node: The maple_big_node to place the data
 * @mab_start: The starting location in maple_big_node to store the data.
 */
static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
                        unsigned char mas_end, struct maple_big_node *b_node,
                        unsigned char mab_start)
{
        enum maple_type mt;
        struct maple_node *node;
        void __rcu **slots;
        unsigned long *pivots, *gaps;
        int i = mas_start, j = mab_start;
        unsigned char piv_end;

        node = mas_mn(mas);
        mt = mte_node_type(mas->node);
        pivots = ma_pivots(node, mt);
        if (!i) {
                b_node->pivot[j] = pivots[i++];
                if (unlikely(i > mas_end))
                        goto complete;
                j++;
        }

        piv_end = min(mas_end, mt_pivots[mt]);
        for (; i < piv_end; i++, j++) {
                b_node->pivot[j] = pivots[i];
                if (unlikely(!b_node->pivot[j]))
                        break;

                if (unlikely(mas->max == b_node->pivot[j]))
                        goto complete;
        }

        if (likely(i <= mas_end))
                b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);

complete:
        b_node->b_end = ++j;
        j -= mab_start;
        slots = ma_slots(node, mt);
        memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
        if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
                gaps = ma_gaps(node, mt);
                memcpy(b_node->gap + mab_start, gaps + mas_start,
                       sizeof(unsigned long) * j);
        }
}

/*
 * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
 * @node: The maple node
 * @mt: The maple type
 * @end: The node end
 */
static inline void mas_leaf_set_meta(struct maple_node *node,
                enum maple_type mt, unsigned char end)
{
        if (end < mt_slots[mt] - 1)
                ma_set_meta(node, mt, 0, end);
}

/*
 * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
 * @b_node: the maple_big_node that has the data
 * @mab_start: the start location in @b_node.
 * @mab_end: The end location in @b_node (inclusively)
 * @mas: The maple state with the maple encoded node.
 */
static inline void mab_mas_cp(struct maple_big_node *b_node,
                              unsigned char mab_start, unsigned char mab_end,
                              struct ma_state *mas, bool new_max)
{
        int i, j = 0;
        enum maple_type mt = mte_node_type(mas->node);
        struct maple_node *node = mte_to_node(mas->node);
        void __rcu **slots = ma_slots(node, mt);
        unsigned long *pivots = ma_pivots(node, mt);
        unsigned long *gaps = NULL;
        unsigned char end;

        if (mab_end - mab_start > mt_pivots[mt])
                mab_end--;

        if (!pivots[mt_pivots[mt] - 1])
                slots[mt_pivots[mt]] = NULL;

        i = mab_start;
        do {
                pivots[j++] = b_node->pivot[i++];
        } while (i <= mab_end && likely(b_node->pivot[i]));

        memcpy(slots, b_node->slot + mab_start,
               sizeof(void *) * (i - mab_start));

        if (new_max)
                mas->max = b_node->pivot[i - 1];

        end = j - 1;
        if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
                unsigned long max_gap = 0;
                unsigned char offset = 0;

                gaps = ma_gaps(node, mt);
                do {
                        gaps[--j] = b_node->gap[--i];
                        if (gaps[j] > max_gap) {
                                offset = j;
                                max_gap = gaps[j];
                        }
                } while (j);

                ma_set_meta(node, mt, offset, end);
        } else {
                mas_leaf_set_meta(node, mt, end);
        }
}

/*
 * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
 * @mas: The maple state
 * @end: The maple node end
 * @mt: The maple node type
 */
static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
                                      enum maple_type mt)
{
        if (!(mas->mas_flags & MA_STATE_BULK))
                return;

        if (mte_is_root(mas->node))
                return;

        if (end > mt_min_slots[mt]) {
                mas->mas_flags &= ~MA_STATE_REBALANCE;
                return;
        }
}

/*
 * mas_store_b_node() - Store an @entry into the b_node while also copying the
 * data from a maple encoded node.
 * @wr_mas: the maple write state
 * @b_node: the maple_big_node to fill with data
 * @offset_end: the offset to end copying
 *
 * Return: The actual end of the data stored in @b_node
 */
static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
                struct maple_big_node *b_node, unsigned char offset_end)
{
        unsigned char slot;
        unsigned char b_end;
        /* Possible underflow of piv will wrap back to 0 before use. */
        unsigned long piv;
        struct ma_state *mas = wr_mas->mas;

        b_node->type = wr_mas->type;
        b_end = 0;
        slot = mas->offset;
        if (slot) {
                /* Copy start data up to insert. */
                mas_mab_cp(mas, 0, slot - 1, b_node, 0);
                b_end = b_node->b_end;
                piv = b_node->pivot[b_end - 1];
        } else
                piv = mas->min - 1;

        if (piv + 1 < mas->index) {
                /* Handle range starting after old range */
                b_node->slot[b_end] = wr_mas->content;
                if (!wr_mas->content)
                        b_node->gap[b_end] = mas->index - 1 - piv;
                b_node->pivot[b_end++] = mas->index - 1;
        }

        /* Store the new entry. */
        mas->offset = b_end;
        b_node->slot[b_end] = wr_mas->entry;
        b_node->pivot[b_end] = mas->last;

        /* Appended. */
        if (mas->last >= mas->max)
                goto b_end;

        /* Handle new range ending before old range ends */
        piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
        if (piv > mas->last) {
                if (piv == ULONG_MAX)
                        mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);

                if (offset_end != slot)
                        wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
                                                          offset_end);

                b_node->slot[++b_end] = wr_mas->content;
                if (!wr_mas->content)
                        b_node->gap[b_end] = piv - mas->last + 1;
                b_node->pivot[b_end] = piv;
        }

        slot = offset_end + 1;
        if (slot > mas->end)
                goto b_end;

        /* Copy end data to the end of the node. */
        mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
        b_node->b_end--;
        return;

b_end:
        b_node->b_end = b_end;
}

/*
 * mas_prev_sibling() - Find the previous node with the same parent.
 * @mas: the maple state
 *
 * Return: True if there is a previous sibling, false otherwise.
 */
static inline bool mas_prev_sibling(struct ma_state *mas)
{
        unsigned int p_slot = mte_parent_slot(mas->node);

        if (mte_is_root(mas->node))
                return false;

        if (!p_slot)
                return false;

        mas_ascend(mas);
        mas->offset = p_slot - 1;
        mas_descend(mas);
        return true;
}

/*
 * mas_next_sibling() - Find the next node with the same parent.
 * @mas: the maple state
 *
 * Return: true if there is a next sibling, false otherwise.
 */
static inline bool mas_next_sibling(struct ma_state *mas)
{
        MA_STATE(parent, mas->tree, mas->index, mas->last);

        if (mte_is_root(mas->node))
                return false;

        parent = *mas;
        mas_ascend(&parent);
        parent.offset = mte_parent_slot(mas->node) + 1;
        if (parent.offset > mas_data_end(&parent))
                return false;

        *mas = parent;
        mas_descend(mas);
        return true;
}

/*
 * mte_node_or_none() - Set the enode and state.
 * @enode: The encoded maple node.
 *
 * Set the node to the enode and the status.
 */
static inline void mas_node_or_none(struct ma_state *mas,
                struct maple_enode *enode)
{
        if (enode) {
                mas->node = enode;
                mas->status = ma_active;
        } else {
                mas->node = NULL;
                mas->status = ma_none;
        }
}

/*
 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
 * @wr_mas: The maple write state
 *
 * Uses mas_slot_locked() and does not need to worry about dead nodes.
 */
static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;
        unsigned char count, offset;

        if (unlikely(ma_is_dense(wr_mas->type))) {
                wr_mas->r_max = wr_mas->r_min = mas->index;
                mas->offset = mas->index = mas->min;
                return;
        }

        wr_mas->node = mas_mn(wr_mas->mas);
        wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
        count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
                                       wr_mas->pivots, mas->max);
        offset = mas->offset;

        while (offset < count && mas->index > wr_mas->pivots[offset])
                offset++;

        wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
        wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
        wr_mas->offset_end = mas->offset = offset;
}

/*
 * mast_rebalance_next() - Rebalance against the next node
 * @mast: The maple subtree state
 * @old_r: The encoded maple node to the right (next node).
 */
static inline void mast_rebalance_next(struct maple_subtree_state *mast)
{
        unsigned char b_end = mast->bn->b_end;

        mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
                   mast->bn, b_end);
        mast->orig_r->last = mast->orig_r->max;
}

/*
 * mast_rebalance_prev() - Rebalance against the previous node
 * @mast: The maple subtree state
 * @old_l: The encoded maple node to the left (previous node)
 */
static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
{
        unsigned char end = mas_data_end(mast->orig_l) + 1;
        unsigned char b_end = mast->bn->b_end;

        mab_shift_right(mast->bn, end);
        mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
        mast->l->min = mast->orig_l->min;
        mast->orig_l->index = mast->orig_l->min;
        mast->bn->b_end = end + b_end;
        mast->l->offset += end;
}

/*
 * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
 * the node to the right.  Checking the nodes to the right then the left at each
 * level upwards until root is reached.
 * Data is copied into the @mast->bn.
 * @mast: The maple_subtree_state.
 */
static inline
bool mast_spanning_rebalance(struct maple_subtree_state *mast)
{
        struct ma_state r_tmp = *mast->orig_r;
        struct ma_state l_tmp = *mast->orig_l;
        unsigned char depth = 0;

        do {
                mas_ascend(mast->orig_r);
                mas_ascend(mast->orig_l);
                depth++;
                if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
                        mast->orig_r->offset++;
                        do {
                                mas_descend(mast->orig_r);
                                mast->orig_r->offset = 0;
                        } while (--depth);

                        mast_rebalance_next(mast);
                        *mast->orig_l = l_tmp;
                        return true;
                } else if (mast->orig_l->offset != 0) {
                        mast->orig_l->offset--;
                        do {
                                mas_descend(mast->orig_l);
                                mast->orig_l->offset =
                                        mas_data_end(mast->orig_l);
                        } while (--depth);

                        mast_rebalance_prev(mast);
                        *mast->orig_r = r_tmp;
                        return true;
                }
        } while (!mte_is_root(mast->orig_r->node));

        *mast->orig_r = r_tmp;
        *mast->orig_l = l_tmp;
        return false;
}

/*
 * mast_ascend() - Ascend the original left and right maple states.
 * @mast: the maple subtree state.
 *
 * Ascend the original left and right sides.  Set the offsets to point to the
 * data already in the new tree (@mast->l and @mast->r).
 */
static inline void mast_ascend(struct maple_subtree_state *mast)
{
        MA_WR_STATE(wr_mas, mast->orig_r,  NULL);
        mas_ascend(mast->orig_l);
        mas_ascend(mast->orig_r);

        mast->orig_r->offset = 0;
        mast->orig_r->index = mast->r->max;
        /* last should be larger than or equal to index */
        if (mast->orig_r->last < mast->orig_r->index)
                mast->orig_r->last = mast->orig_r->index;

        wr_mas.type = mte_node_type(mast->orig_r->node);
        mas_wr_node_walk(&wr_mas);
        /* Set up the left side of things */
        mast->orig_l->offset = 0;
        mast->orig_l->index = mast->l->min;
        wr_mas.mas = mast->orig_l;
        wr_mas.type = mte_node_type(mast->orig_l->node);
        mas_wr_node_walk(&wr_mas);

        mast->bn->type = wr_mas.type;
}

/*
 * mas_new_ma_node() - Create and return a new maple node.  Helper function.
 * @mas: the maple state with the allocations.
 * @b_node: the maple_big_node with the type encoding.
 *
 * Use the node type from the maple_big_node to allocate a new node from the
 * ma_state.  This function exists mainly for code readability.
 *
 * Return: A new maple encoded node
 */
static inline struct maple_enode
*mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
{
        return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
}

/*
 * mas_mab_to_node() - Set up right and middle nodes
 *
 * @mas: the maple state that contains the allocations.
 * @b_node: the node which contains the data.
 * @left: The pointer which will have the left node
 * @right: The pointer which may have the right node
 * @middle: the pointer which may have the middle node (rare)
 * @mid_split: the split location for the middle node
 *
 * Return: the split of left.
 */
static inline unsigned char mas_mab_to_node(struct ma_state *mas,
        struct maple_big_node *b_node, struct maple_enode **left,
        struct maple_enode **right, struct maple_enode **middle,
        unsigned char *mid_split, unsigned long min)
{
        unsigned char split = 0;
        unsigned char slot_count = mt_slots[b_node->type];

        *left = mas_new_ma_node(mas, b_node);
        *right = NULL;
        *middle = NULL;
        *mid_split = 0;

        if (b_node->b_end < slot_count) {
                split = b_node->b_end;
        } else {
                split = mab_calc_split(mas, b_node, mid_split, min);
                *right = mas_new_ma_node(mas, b_node);
        }

        if (*mid_split)
                *middle = mas_new_ma_node(mas, b_node);

        return split;

}

/*
 * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
 * pointer.
 * @b_node - the big node to add the entry
 * @mas - the maple state to get the pivot (mas->max)
 * @entry - the entry to add, if NULL nothing happens.
 */
static inline void mab_set_b_end(struct maple_big_node *b_node,
                                 struct ma_state *mas,
                                 void *entry)
{
        if (!entry)
                return;

        b_node->slot[b_node->b_end] = entry;
        if (mt_is_alloc(mas->tree))
                b_node->gap[b_node->b_end] = mas_max_gap(mas);
        b_node->pivot[b_node->b_end++] = mas->max;
}

/*
 * mas_set_split_parent() - combine_then_separate helper function.  Sets the parent
 * of @mas->node to either @left or @right, depending on @slot and @split
 *
 * @mas - the maple state with the node that needs a parent
 * @left - possible parent 1
 * @right - possible parent 2
 * @slot - the slot the mas->node was placed
 * @split - the split location between @left and @right
 */
static inline void mas_set_split_parent(struct ma_state *mas,
                                        struct maple_enode *left,
                                        struct maple_enode *right,
                                        unsigned char *slot, unsigned char split)
{
        if (mas_is_none(mas))
                return;

        if ((*slot) <= split)
                mas_set_parent(mas, mas->node, left, *slot);
        else if (right)
                mas_set_parent(mas, mas->node, right, (*slot) - split - 1);

        (*slot)++;
}

/*
 * mte_mid_split_check() - Check if the next node passes the mid-split
 * @**l: Pointer to left encoded maple node.
 * @**m: Pointer to middle encoded maple node.
 * @**r: Pointer to right encoded maple node.
 * @slot: The offset
 * @*split: The split location.
 * @mid_split: The middle split.
 */
static inline void mte_mid_split_check(struct maple_enode **l,
                                       struct maple_enode **r,
                                       struct maple_enode *right,
                                       unsigned char slot,
                                       unsigned char *split,
                                       unsigned char mid_split)
{
        if (*r == right)
                return;

        if (slot < mid_split)
                return;

        *l = *r;
        *r = right;
        *split = mid_split;
}

/*
 * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
 * is taken from @mast->l.
 * @mast - the maple subtree state
 * @left - the left node
 * @right - the right node
 * @split - the split location.
 */
static inline void mast_set_split_parents(struct maple_subtree_state *mast,
                                          struct maple_enode *left,
                                          struct maple_enode *middle,
                                          struct maple_enode *right,
                                          unsigned char split,
                                          unsigned char mid_split)
{
        unsigned char slot;
        struct maple_enode *l = left;
        struct maple_enode *r = right;

        if (mas_is_none(mast->l))
                return;

        if (middle)
                r = middle;

        slot = mast->l->offset;

        mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
        mas_set_split_parent(mast->l, l, r, &slot, split);

        mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
        mas_set_split_parent(mast->m, l, r, &slot, split);

        mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
        mas_set_split_parent(mast->r, l, r, &slot, split);
}

/*
 * mas_topiary_node() - Dispose of a single node
 * @mas: The maple state for pushing nodes
 * @enode: The encoded maple node
 * @in_rcu: If the tree is in rcu mode
 *
 * The node will either be RCU freed or pushed back on the maple state.
 */
static inline void mas_topiary_node(struct ma_state *mas,
                struct ma_state *tmp_mas, bool in_rcu)
{
        struct maple_node *tmp;
        struct maple_enode *enode;

        if (mas_is_none(tmp_mas))
                return;

        enode = tmp_mas->node;
        tmp = mte_to_node(enode);
        mte_set_node_dead(enode);
        if (in_rcu)
                ma_free_rcu(tmp);
        else
                mas_push_node(mas, tmp);
}

/*
 * mas_topiary_replace() - Replace the data with new data, then repair the
 * parent links within the new tree.  Iterate over the dead sub-tree and collect
 * the dead subtrees and topiary the nodes that are no longer of use.
 *
 * The new tree will have up to three children with the correct parent.  Keep
 * track of the new entries as they need to be followed to find the next level
 * of new entries.
 *
 * The old tree will have up to three children with the old parent.  Keep track
 * of the old entries as they may have more nodes below replaced.  Nodes within
 * [index, last] are dead subtrees, others need to be freed and followed.
 *
 * @mas: The maple state pointing at the new data
 * @old_enode: The maple encoded node being replaced
 *
 */
static inline void mas_topiary_replace(struct ma_state *mas,
                struct maple_enode *old_enode)
{
        struct ma_state tmp[3], tmp_next[3];
        MA_TOPIARY(subtrees, mas->tree);
        bool in_rcu;
        int i, n;

        /* Place data in tree & then mark node as old */
        mas_put_in_tree(mas, old_enode);

        /* Update the parent pointers in the tree */
        tmp[0] = *mas;
        tmp[0].offset = 0;
        tmp[1].status = ma_none;
        tmp[2].status = ma_none;
        while (!mte_is_leaf(tmp[0].node)) {
                n = 0;
                for (i = 0; i < 3; i++) {
                        if (mas_is_none(&tmp[i]))
                                continue;

                        while (n < 3) {
                                if (!mas_find_child(&tmp[i], &tmp_next[n]))
                                        break;
                                n++;
                        }

                        mas_adopt_children(&tmp[i], tmp[i].node);
                }

                if (MAS_WARN_ON(mas, n == 0))
                        break;

                while (n < 3)
                        tmp_next[n++].status = ma_none;

                for (i = 0; i < 3; i++)
                        tmp[i] = tmp_next[i];
        }

        /* Collect the old nodes that need to be discarded */
        if (mte_is_leaf(old_enode))
                return mas_free(mas, old_enode);

        tmp[0] = *mas;
        tmp[0].offset = 0;
        tmp[0].node = old_enode;
        tmp[1].status = ma_none;
        tmp[2].status = ma_none;
        in_rcu = mt_in_rcu(mas->tree);
        do {
                n = 0;
                for (i = 0; i < 3; i++) {
                        if (mas_is_none(&tmp[i]))
                                continue;

                        while (n < 3) {
                                if (!mas_find_child(&tmp[i], &tmp_next[n]))
                                        break;

                                if ((tmp_next[n].min >= tmp_next->index) &&
                                    (tmp_next[n].max <= tmp_next->last)) {
                                        mat_add(&subtrees, tmp_next[n].node);
                                        tmp_next[n].status = ma_none;
                                } else {
                                        n++;
                                }
                        }
                }

                if (MAS_WARN_ON(mas, n == 0))
                        break;

                while (n < 3)
                        tmp_next[n++].status = ma_none;

                for (i = 0; i < 3; i++) {
                        mas_topiary_node(mas, &tmp[i], in_rcu);
                        tmp[i] = tmp_next[i];
                }
        } while (!mte_is_leaf(tmp[0].node));

        for (i = 0; i < 3; i++)
                mas_topiary_node(mas, &tmp[i], in_rcu);

        mas_mat_destroy(mas, &subtrees);
}

/*
 * mas_wmb_replace() - Write memory barrier and replace
 * @mas: The maple state
 * @old: The old maple encoded node that is being replaced.
 *
 * Updates gap as necessary.
 */
static inline void mas_wmb_replace(struct ma_state *mas,
                struct maple_enode *old_enode)
{
        /* Insert the new data in the tree */
        mas_topiary_replace(mas, old_enode);

        if (mte_is_leaf(mas->node))
                return;

        mas_update_gap(mas);
}

/*
 * mast_cp_to_nodes() - Copy data out to nodes.
 * @mast: The maple subtree state
 * @left: The left encoded maple node
 * @middle: The middle encoded maple node
 * @right: The right encoded maple node
 * @split: The location to split between left and (middle ? middle : right)
 * @mid_split: The location to split between middle and right.
 */
static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
        struct maple_enode *left, struct maple_enode *middle,
        struct maple_enode *right, unsigned char split, unsigned char mid_split)
{
        bool new_lmax = true;

        mas_node_or_none(mast->l, left);
        mas_node_or_none(mast->m, middle);
        mas_node_or_none(mast->r, right);

        mast->l->min = mast->orig_l->min;
        if (split == mast->bn->b_end) {
                mast->l->max = mast->orig_r->max;
                new_lmax = false;
        }

        mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);

        if (middle) {
                mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
                mast->m->min = mast->bn->pivot[split] + 1;
                split = mid_split;
        }

        mast->r->max = mast->orig_r->max;
        if (right) {
                mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
                mast->r->min = mast->bn->pivot[split] + 1;
        }
}

/*
 * mast_combine_cp_left - Copy in the original left side of the tree into the
 * combined data set in the maple subtree state big node.
 * @mast: The maple subtree state
 */
static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
{
        unsigned char l_slot = mast->orig_l->offset;

        if (!l_slot)
                return;

        mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
}

/*
 * mast_combine_cp_right: Copy in the original right side of the tree into the
 * combined data set in the maple subtree state big node.
 * @mast: The maple subtree state
 */
static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
{
        if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
                return;

        mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
                   mt_slot_count(mast->orig_r->node), mast->bn,
                   mast->bn->b_end);
        mast->orig_r->last = mast->orig_r->max;
}

/*
 * mast_sufficient: Check if the maple subtree state has enough data in the big
 * node to create at least one sufficient node
 * @mast: the maple subtree state
 */
static inline bool mast_sufficient(struct maple_subtree_state *mast)
{
        if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
                return true;

        return false;
}

/*
 * mast_overflow: Check if there is too much data in the subtree state for a
 * single node.
 * @mast: The maple subtree state
 */
static inline bool mast_overflow(struct maple_subtree_state *mast)
{
        if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
                return true;

        return false;
}

static inline void *mtree_range_walk(struct ma_state *mas)
{
        unsigned long *pivots;
        unsigned char offset;
        struct maple_node *node;
        struct maple_enode *next, *last;
        enum maple_type type;
        void __rcu **slots;
        unsigned char end;
        unsigned long max, min;
        unsigned long prev_max, prev_min;

        next = mas->node;
        min = mas->min;
        max = mas->max;
        do {
                last = next;
                node = mte_to_node(next);
                type = mte_node_type(next);
                pivots = ma_pivots(node, type);
                end = ma_data_end(node, type, pivots, max);
                prev_min = min;
                prev_max = max;
                if (pivots[0] >= mas->index) {
                        offset = 0;
                        max = pivots[0];
                        goto next;
                }

                offset = 1;
                while (offset < end) {
                        if (pivots[offset] >= mas->index) {
                                max = pivots[offset];
                                break;
                        }
                        offset++;
                }

                min = pivots[offset - 1] + 1;
next:
                slots = ma_slots(node, type);
                next = mt_slot(mas->tree, slots, offset);
                if (unlikely(ma_dead_node(node)))
                        goto dead_node;
        } while (!ma_is_leaf(type));

        mas->end = end;
        mas->offset = offset;
        mas->index = min;
        mas->last = max;
        mas->min = prev_min;
        mas->max = prev_max;
        mas->node = last;
        return (void *)next;

dead_node:
        mas_reset(mas);
        return NULL;
}

/*
 * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
 * @mas: The starting maple state
 * @mast: The maple_subtree_state, keeps track of 4 maple states.
 * @count: The estimated count of iterations needed.
 *
 * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
 * is hit.  First @b_node is split into two entries which are inserted into the
 * next iteration of the loop.  @b_node is returned populated with the final
 * iteration. @mas is used to obtain allocations.  orig_l_mas keeps track of the
 * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
 * to account of what has been copied into the new sub-tree.  The update of
 * orig_l_mas->last is used in mas_consume to find the slots that will need to
 * be either freed or destroyed.  orig_l_mas->depth keeps track of the height of
 * the new sub-tree in case the sub-tree becomes the full tree.
 *
 * Return: the number of elements in b_node during the last loop.
 */
static int mas_spanning_rebalance(struct ma_state *mas,
                struct maple_subtree_state *mast, unsigned char count)
{
        unsigned char split, mid_split;
        unsigned char slot = 0;
        struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
        struct maple_enode *old_enode;

        MA_STATE(l_mas, mas->tree, mas->index, mas->index);
        MA_STATE(r_mas, mas->tree, mas->index, mas->last);
        MA_STATE(m_mas, mas->tree, mas->index, mas->index);

        /*
         * The tree needs to be rebalanced and leaves need to be kept at the same level.
         * Rebalancing is done by use of the ``struct maple_topiary``.
         */
        mast->l = &l_mas;
        mast->m = &m_mas;
        mast->r = &r_mas;
        l_mas.status = r_mas.status = m_mas.status = ma_none;

        /* Check if this is not root and has sufficient data.  */
        if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
            unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
                mast_spanning_rebalance(mast);

        l_mas.depth = 0;

        /*
         * Each level of the tree is examined and balanced, pushing data to the left or
         * right, or rebalancing against left or right nodes is employed to avoid
         * rippling up the tree to limit the amount of churn.  Once a new sub-section of
         * the tree is created, there may be a mix of new and old nodes.  The old nodes
         * will have the incorrect parent pointers and currently be in two trees: the
         * original tree and the partially new tree.  To remedy the parent pointers in
         * the old tree, the new data is swapped into the active tree and a walk down
         * the tree is performed and the parent pointers are updated.
         * See mas_topiary_replace() for more information.
         */
        while (count--) {
                mast->bn->b_end--;
                mast->bn->type = mte_node_type(mast->orig_l->node);
                split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
                                        &mid_split, mast->orig_l->min);
                mast_set_split_parents(mast, left, middle, right, split,
                                       mid_split);
                mast_cp_to_nodes(mast, left, middle, right, split, mid_split);

                /*
                 * Copy data from next level in the tree to mast->bn from next
                 * iteration
                 */
                memset(mast->bn, 0, sizeof(struct maple_big_node));
                mast->bn->type = mte_node_type(left);
                l_mas.depth++;

                /* Root already stored in l->node. */
                if (mas_is_root_limits(mast->l))
                        goto new_root;

                mast_ascend(mast);
                mast_combine_cp_left(mast);
                l_mas.offset = mast->bn->b_end;
                mab_set_b_end(mast->bn, &l_mas, left);
                mab_set_b_end(mast->bn, &m_mas, middle);
                mab_set_b_end(mast->bn, &r_mas, right);

                /* Copy anything necessary out of the right node. */
                mast_combine_cp_right(mast);
                mast->orig_l->last = mast->orig_l->max;

                if (mast_sufficient(mast))
                        continue;

                if (mast_overflow(mast))
                        continue;

                /* May be a new root stored in mast->bn */
                if (mas_is_root_limits(mast->orig_l))
                        break;

                mast_spanning_rebalance(mast);

                /* rebalancing from other nodes may require another loop. */
                if (!count)
                        count++;
        }

        l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
                                mte_node_type(mast->orig_l->node));
        l_mas.depth++;
        mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
        mas_set_parent(mas, left, l_mas.node, slot);
        if (middle)
                mas_set_parent(mas, middle, l_mas.node, ++slot);

        if (right)
                mas_set_parent(mas, right, l_mas.node, ++slot);

        if (mas_is_root_limits(mast->l)) {
new_root:
                mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
                while (!mte_is_root(mast->orig_l->node))
                        mast_ascend(mast);
        } else {
                mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
        }

        old_enode = mast->orig_l->node;
        mas->depth = l_mas.depth;
        mas->node = l_mas.node;
        mas->min = l_mas.min;
        mas->max = l_mas.max;
        mas->offset = l_mas.offset;
        mas_wmb_replace(mas, old_enode);
        mtree_range_walk(mas);
        return mast->bn->b_end;
}

/*
 * mas_rebalance() - Rebalance a given node.
 * @mas: The maple state
 * @b_node: The big maple node.
 *
 * Rebalance two nodes into a single node or two new nodes that are sufficient.
 * Continue upwards until tree is sufficient.
 *
 * Return: the number of elements in b_node during the last loop.
 */
static inline int mas_rebalance(struct ma_state *mas,
                                struct maple_big_node *b_node)
{
        char empty_count = mas_mt_height(mas);
        struct maple_subtree_state mast;
        unsigned char shift, b_end = ++b_node->b_end;

        MA_STATE(l_mas, mas->tree, mas->index, mas->last);
        MA_STATE(r_mas, mas->tree, mas->index, mas->last);

        trace_ma_op(__func__, mas);

        /*
         * Rebalancing occurs if a node is insufficient.  Data is rebalanced
         * against the node to the right if it exists, otherwise the node to the
         * left of this node is rebalanced against this node.  If rebalancing
         * causes just one node to be produced instead of two, then the parent
         * is also examined and rebalanced if it is insufficient.  Every level
         * tries to combine the data in the same way.  If one node contains the
         * entire range of the tree, then that node is used as a new root node.
         */
        mas_node_count(mas, empty_count * 2 - 1);
        if (mas_is_err(mas))
                return 0;

        mast.orig_l = &l_mas;
        mast.orig_r = &r_mas;
        mast.bn = b_node;
        mast.bn->type = mte_node_type(mas->node);

        l_mas = r_mas = *mas;

        if (mas_next_sibling(&r_mas)) {
                mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
                r_mas.last = r_mas.index = r_mas.max;
        } else {
                mas_prev_sibling(&l_mas);
                shift = mas_data_end(&l_mas) + 1;
                mab_shift_right(b_node, shift);
                mas->offset += shift;
                mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
                b_node->b_end = shift + b_end;
                l_mas.index = l_mas.last = l_mas.min;
        }

        return mas_spanning_rebalance(mas, &mast, empty_count);
}

/*
 * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
 * state.
 * @mas: The maple state
 * @end: The end of the left-most node.
 *
 * During a mass-insert event (such as forking), it may be necessary to
 * rebalance the left-most node when it is not sufficient.
 */
static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
{
        enum maple_type mt = mte_node_type(mas->node);
        struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
        struct maple_enode *eparent, *old_eparent;
        unsigned char offset, tmp, split = mt_slots[mt] / 2;
        void __rcu **l_slots, **slots;
        unsigned long *l_pivs, *pivs, gap;
        bool in_rcu = mt_in_rcu(mas->tree);

        MA_STATE(l_mas, mas->tree, mas->index, mas->last);

        l_mas = *mas;
        mas_prev_sibling(&l_mas);

        /* set up node. */
        if (in_rcu) {
                /* Allocate for both left and right as well as parent. */
                mas_node_count(mas, 3);
                if (mas_is_err(mas))
                        return;

                newnode = mas_pop_node(mas);
        } else {
                newnode = &reuse;
        }

        node = mas_mn(mas);
        newnode->parent = node->parent;
        slots = ma_slots(newnode, mt);
        pivs = ma_pivots(newnode, mt);
        left = mas_mn(&l_mas);
        l_slots = ma_slots(left, mt);
        l_pivs = ma_pivots(left, mt);
        if (!l_slots[split])
                split++;
        tmp = mas_data_end(&l_mas) - split;

        memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
        memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
        pivs[tmp] = l_mas.max;
        memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
        memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);

        l_mas.max = l_pivs[split];
        mas->min = l_mas.max + 1;
        old_eparent = mt_mk_node(mte_parent(l_mas.node),
                             mas_parent_type(&l_mas, l_mas.node));
        tmp += end;
        if (!in_rcu) {
                unsigned char max_p = mt_pivots[mt];
                unsigned char max_s = mt_slots[mt];

                if (tmp < max_p)
                        memset(pivs + tmp, 0,
                               sizeof(unsigned long) * (max_p - tmp));

                if (tmp < mt_slots[mt])
                        memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));

                memcpy(node, newnode, sizeof(struct maple_node));
                ma_set_meta(node, mt, 0, tmp - 1);
                mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
                              l_pivs[split]);

                /* Remove data from l_pivs. */
                tmp = split + 1;
                memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
                memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
                ma_set_meta(left, mt, 0, split);
                eparent = old_eparent;

                goto done;
        }

        /* RCU requires replacing both l_mas, mas, and parent. */
        mas->node = mt_mk_node(newnode, mt);
        ma_set_meta(newnode, mt, 0, tmp);

        new_left = mas_pop_node(mas);
        new_left->parent = left->parent;
        mt = mte_node_type(l_mas.node);
        slots = ma_slots(new_left, mt);
        pivs = ma_pivots(new_left, mt);
        memcpy(slots, l_slots, sizeof(void *) * split);
        memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
        ma_set_meta(new_left, mt, 0, split);
        l_mas.node = mt_mk_node(new_left, mt);

        /* replace parent. */
        offset = mte_parent_slot(mas->node);
        mt = mas_parent_type(&l_mas, l_mas.node);
        parent = mas_pop_node(mas);
        slots = ma_slots(parent, mt);
        pivs = ma_pivots(parent, mt);
        memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
        rcu_assign_pointer(slots[offset], mas->node);
        rcu_assign_pointer(slots[offset - 1], l_mas.node);
        pivs[offset - 1] = l_mas.max;
        eparent = mt_mk_node(parent, mt);
done:
        gap = mas_leaf_max_gap(mas);
        mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
        gap = mas_leaf_max_gap(&l_mas);
        mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
        mas_ascend(mas);

        if (in_rcu) {
                mas_replace_node(mas, old_eparent);
                mas_adopt_children(mas, mas->node);
        }

        mas_update_gap(mas);
}

/*
 * mas_split_final_node() - Split the final node in a subtree operation.
 * @mast: the maple subtree state
 * @mas: The maple state
 * @height: The height of the tree in case it's a new root.
 */
static inline void mas_split_final_node(struct maple_subtree_state *mast,
                                        struct ma_state *mas, int height)
{
        struct maple_enode *ancestor;

        if (mte_is_root(mas->node)) {
                if (mt_is_alloc(mas->tree))
                        mast->bn->type = maple_arange_64;
                else
                        mast->bn->type = maple_range_64;
                mas->depth = height;
        }
        /*
         * Only a single node is used here, could be root.
         * The Big_node data should just fit in a single node.
         */
        ancestor = mas_new_ma_node(mas, mast->bn);
        mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
        mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
        mte_to_node(ancestor)->parent = mas_mn(mas)->parent;

        mast->l->node = ancestor;
        mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
        mas->offset = mast->bn->b_end - 1;
}

/*
 * mast_fill_bnode() - Copy data into the big node in the subtree state
 * @mast: The maple subtree state
 * @mas: the maple state
 * @skip: The number of entries to skip for new nodes insertion.
 */
static inline void mast_fill_bnode(struct maple_subtree_state *mast,
                                         struct ma_state *mas,
                                         unsigned char skip)
{
        bool cp = true;
        unsigned char split;

        memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
        memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
        memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
        mast->bn->b_end = 0;

        if (mte_is_root(mas->node)) {
                cp = false;
        } else {
                mas_ascend(mas);
                mas->offset = mte_parent_slot(mas->node);
        }

        if (cp && mast->l->offset)
                mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);

        split = mast->bn->b_end;
        mab_set_b_end(mast->bn, mast->l, mast->l->node);
        mast->r->offset = mast->bn->b_end;
        mab_set_b_end(mast->bn, mast->r, mast->r->node);
        if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
                cp = false;

        if (cp)
                mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
                           mast->bn, mast->bn->b_end);

        mast->bn->b_end--;
        mast->bn->type = mte_node_type(mas->node);
}

/*
 * mast_split_data() - Split the data in the subtree state big node into regular
 * nodes.
 * @mast: The maple subtree state
 * @mas: The maple state
 * @split: The location to split the big node
 */
static inline void mast_split_data(struct maple_subtree_state *mast,
           struct ma_state *mas, unsigned char split)
{
        unsigned char p_slot;

        mab_mas_cp(mast->bn, 0, split, mast->l, true);
        mte_set_pivot(mast->r->node, 0, mast->r->max);
        mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
        mast->l->offset = mte_parent_slot(mas->node);
        mast->l->max = mast->bn->pivot[split];
        mast->r->min = mast->l->max + 1;
        if (mte_is_leaf(mas->node))
                return;

        p_slot = mast->orig_l->offset;
        mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
                             &p_slot, split);
        mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
                             &p_slot, split);
}

/*
 * mas_push_data() - Instead of splitting a node, it is beneficial to push the
 * data to the right or left node if there is room.
 * @mas: The maple state
 * @height: The current height of the maple state
 * @mast: The maple subtree state
 * @left: Push left or not.
 *
 * Keeping the height of the tree low means faster lookups.
 *
 * Return: True if pushed, false otherwise.
 */
static inline bool mas_push_data(struct ma_state *mas, int height,
                                 struct maple_subtree_state *mast, bool left)
{
        unsigned char slot_total = mast->bn->b_end;
        unsigned char end, space, split;

        MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
        tmp_mas = *mas;
        tmp_mas.depth = mast->l->depth;

        if (left && !mas_prev_sibling(&tmp_mas))
                return false;
        else if (!left && !mas_next_sibling(&tmp_mas))
                return false;

        end = mas_data_end(&tmp_mas);
        slot_total += end;
        space = 2 * mt_slot_count(mas->node) - 2;
        /* -2 instead of -1 to ensure there isn't a triple split */
        if (ma_is_leaf(mast->bn->type))
                space--;

        if (mas->max == ULONG_MAX)
                space--;

        if (slot_total >= space)
                return false;

        /* Get the data; Fill mast->bn */
        mast->bn->b_end++;
        if (left) {
                mab_shift_right(mast->bn, end + 1);
                mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
                mast->bn->b_end = slot_total + 1;
        } else {
                mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
        }

        /* Configure mast for splitting of mast->bn */
        split = mt_slots[mast->bn->type] - 2;
        if (left) {
                /*  Switch mas to prev node  */
                *mas = tmp_mas;
                /* Start using mast->l for the left side. */
                tmp_mas.node = mast->l->node;
                *mast->l = tmp_mas;
        } else {
                tmp_mas.node = mast->r->node;
                *mast->r = tmp_mas;
                split = slot_total - split;
        }
        split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
        /* Update parent slot for split calculation. */
        if (left)
                mast->orig_l->offset += end + 1;

        mast_split_data(mast, mas, split);
        mast_fill_bnode(mast, mas, 2);
        mas_split_final_node(mast, mas, height + 1);
        return true;
}

/*
 * mas_split() - Split data that is too big for one node into two.
 * @mas: The maple state
 * @b_node: The maple big node
 * Return: 1 on success, 0 on failure.
 */
static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
{
        struct maple_subtree_state mast;
        int height = 0;
        unsigned char mid_split, split = 0;
        struct maple_enode *old;

        /*
         * Splitting is handled differently from any other B-tree; the Maple
         * Tree splits upwards.  Splitting up means that the split operation
         * occurs when the walk of the tree hits the leaves and not on the way
         * down.  The reason for splitting up is that it is impossible to know
         * how much space will be needed until the leaf is (or leaves are)
         * reached.  Since overwriting data is allowed and a range could
         * overwrite more than one range or result in changing one entry into 3
         * entries, it is impossible to know if a split is required until the
         * data is examined.
         *
         * Splitting is a balancing act between keeping allocations to a minimum
         * and avoiding a 'jitter' event where a tree is expanded to make room
         * for an entry followed by a contraction when the entry is removed.  To
         * accomplish the balance, there are empty slots remaining in both left
         * and right nodes after a split.
         */
        MA_STATE(l_mas, mas->tree, mas->index, mas->last);
        MA_STATE(r_mas, mas->tree, mas->index, mas->last);
        MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
        MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);

        trace_ma_op(__func__, mas);
        mas->depth = mas_mt_height(mas);
        /* Allocation failures will happen early. */
        mas_node_count(mas, 1 + mas->depth * 2);
        if (mas_is_err(mas))
                return 0;

        mast.l = &l_mas;
        mast.r = &r_mas;
        mast.orig_l = &prev_l_mas;
        mast.orig_r = &prev_r_mas;
        mast.bn = b_node;

        while (height++ <= mas->depth) {
                if (mt_slots[b_node->type] > b_node->b_end) {
                        mas_split_final_node(&mast, mas, height);
                        break;
                }

                l_mas = r_mas = *mas;
                l_mas.node = mas_new_ma_node(mas, b_node);
                r_mas.node = mas_new_ma_node(mas, b_node);
                /*
                 * Another way that 'jitter' is avoided is to terminate a split up early if the
                 * left or right node has space to spare.  This is referred to as "pushing left"
                 * or "pushing right" and is similar to the B* tree, except the nodes left or
                 * right can rarely be reused due to RCU, but the ripple upwards is halted which
                 * is a significant savings.
                 */
                /* Try to push left. */
                if (mas_push_data(mas, height, &mast, true))
                        break;
                /* Try to push right. */
                if (mas_push_data(mas, height, &mast, false))
                        break;

                split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
                mast_split_data(&mast, mas, split);
                /*
                 * Usually correct, mab_mas_cp in the above call overwrites
                 * r->max.
                 */
                mast.r->max = mas->max;
                mast_fill_bnode(&mast, mas, 1);
                prev_l_mas = *mast.l;
                prev_r_mas = *mast.r;
        }

        /* Set the original node as dead */
        old = mas->node;
        mas->node = l_mas.node;
        mas_wmb_replace(mas, old);
        mtree_range_walk(mas);
        return 1;
}

/*
 * mas_reuse_node() - Reuse the node to store the data.
 * @wr_mas: The maple write state
 * @bn: The maple big node
 * @end: The end of the data.
 *
 * Will always return false in RCU mode.
 *
 * Return: True if node was reused, false otherwise.
 */
static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
                          struct maple_big_node *bn, unsigned char end)
{
        /* Need to be rcu safe. */
        if (mt_in_rcu(wr_mas->mas->tree))
                return false;

        if (end > bn->b_end) {
                int clear = mt_slots[wr_mas->type] - bn->b_end;

                memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
                memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
        }
        mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
        return true;
}

/*
 * mas_commit_b_node() - Commit the big node into the tree.
 * @wr_mas: The maple write state
 * @b_node: The maple big node
 * @end: The end of the data.
 */
static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas,
                            struct maple_big_node *b_node, unsigned char end)
{
        struct maple_node *node;
        struct maple_enode *old_enode;
        unsigned char b_end = b_node->b_end;
        enum maple_type b_type = b_node->type;

        old_enode = wr_mas->mas->node;
        if ((b_end < mt_min_slots[b_type]) &&
            (!mte_is_root(old_enode)) &&
            (mas_mt_height(wr_mas->mas) > 1))
                return mas_rebalance(wr_mas->mas, b_node);

        if (b_end >= mt_slots[b_type])
                return mas_split(wr_mas->mas, b_node);

        if (mas_reuse_node(wr_mas, b_node, end))
                goto reuse_node;

        mas_node_count(wr_mas->mas, 1);
        if (mas_is_err(wr_mas->mas))
                return 0;

        node = mas_pop_node(wr_mas->mas);
        node->parent = mas_mn(wr_mas->mas)->parent;
        wr_mas->mas->node = mt_mk_node(node, b_type);
        mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
        mas_replace_node(wr_mas->mas, old_enode);
reuse_node:
        mas_update_gap(wr_mas->mas);
        wr_mas->mas->end = b_end;
        return 1;
}

/*
 * mas_root_expand() - Expand a root to a node
 * @mas: The maple state
 * @entry: The entry to store into the tree
 */
static inline int mas_root_expand(struct ma_state *mas, void *entry)
{
        void *contents = mas_root_locked(mas);
        enum maple_type type = maple_leaf_64;
        struct maple_node *node;
        void __rcu **slots;
        unsigned long *pivots;
        int slot = 0;

        mas_node_count(mas, 1);
        if (unlikely(mas_is_err(mas)))
                return 0;

        node = mas_pop_node(mas);
        pivots = ma_pivots(node, type);
        slots = ma_slots(node, type);
        node->parent = ma_parent_ptr(mas_tree_parent(mas));
        mas->node = mt_mk_node(node, type);
        mas->status = ma_active;

        if (mas->index) {
                if (contents) {
                        rcu_assign_pointer(slots[slot], contents);
                        if (likely(mas->index > 1))
                                slot++;
                }
                pivots[slot++] = mas->index - 1;
        }

        rcu_assign_pointer(slots[slot], entry);
        mas->offset = slot;
        pivots[slot] = mas->last;
        if (mas->last != ULONG_MAX)
                pivots[++slot] = ULONG_MAX;

        mas->depth = 1;
        mas_set_height(mas);
        ma_set_meta(node, maple_leaf_64, 0, slot);
        /* swap the new root into the tree */
        rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
        return slot;
}

static inline void mas_store_root(struct ma_state *mas, void *entry)
{
        if (likely((mas->last != 0) || (mas->index != 0)))
                mas_root_expand(mas, entry);
        else if (((unsigned long) (entry) & 3) == 2)
                mas_root_expand(mas, entry);
        else {
                rcu_assign_pointer(mas->tree->ma_root, entry);
                mas->status = ma_start;
        }
}

/*
 * mas_is_span_wr() - Check if the write needs to be treated as a write that
 * spans the node.
 * @mas: The maple state
 * @piv: The pivot value being written
 * @type: The maple node type
 * @entry: The data to write
 *
 * Spanning writes are writes that start in one node and end in another OR if
 * the write of a %NULL will cause the node to end with a %NULL.
 *
 * Return: True if this is a spanning write, false otherwise.
 */
static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
{
        unsigned long max = wr_mas->r_max;
        unsigned long last = wr_mas->mas->last;
        enum maple_type type = wr_mas->type;
        void *entry = wr_mas->entry;

        /* Contained in this pivot, fast path */
        if (last < max)
                return false;

        if (ma_is_leaf(type)) {
                max = wr_mas->mas->max;
                if (last < max)
                        return false;
        }

        if (last == max) {
                /*
                 * The last entry of leaf node cannot be NULL unless it is the
                 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
                 */
                if (entry || last == ULONG_MAX)
                        return false;
        }

        trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
        return true;
}

static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
{
        wr_mas->type = mte_node_type(wr_mas->mas->node);
        mas_wr_node_walk(wr_mas);
        wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
}

static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
{
        wr_mas->mas->max = wr_mas->r_max;
        wr_mas->mas->min = wr_mas->r_min;
        wr_mas->mas->node = wr_mas->content;
        wr_mas->mas->offset = 0;
        wr_mas->mas->depth++;
}
/*
 * mas_wr_walk() - Walk the tree for a write.
 * @wr_mas: The maple write state
 *
 * Uses mas_slot_locked() and does not need to worry about dead nodes.
 *
 * Return: True if it's contained in a node, false on spanning write.
 */
static bool mas_wr_walk(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;

        while (true) {
                mas_wr_walk_descend(wr_mas);
                if (unlikely(mas_is_span_wr(wr_mas)))
                        return false;

                wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
                                                  mas->offset);
                if (ma_is_leaf(wr_mas->type))
                        return true;

                mas_wr_walk_traverse(wr_mas);
        }

        return true;
}

static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;

        while (true) {
                mas_wr_walk_descend(wr_mas);
                wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
                                                  mas->offset);
                if (ma_is_leaf(wr_mas->type))
                        return true;
                mas_wr_walk_traverse(wr_mas);

        }
        return true;
}
/*
 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
 * @l_wr_mas: The left maple write state
 * @r_wr_mas: The right maple write state
 */
static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
                                            struct ma_wr_state *r_wr_mas)
{
        struct ma_state *r_mas = r_wr_mas->mas;
        struct ma_state *l_mas = l_wr_mas->mas;
        unsigned char l_slot;

        l_slot = l_mas->offset;
        if (!l_wr_mas->content)
                l_mas->index = l_wr_mas->r_min;

        if ((l_mas->index == l_wr_mas->r_min) &&
                 (l_slot &&
                  !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
                if (l_slot > 1)
                        l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
                else
                        l_mas->index = l_mas->min;

                l_mas->offset = l_slot - 1;
        }

        if (!r_wr_mas->content) {
                if (r_mas->last < r_wr_mas->r_max)
                        r_mas->last = r_wr_mas->r_max;
                r_mas->offset++;
        } else if ((r_mas->last == r_wr_mas->r_max) &&
            (r_mas->last < r_mas->max) &&
            !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
                r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
                                             r_wr_mas->type, r_mas->offset + 1);
                r_mas->offset++;
        }
}

static inline void *mas_state_walk(struct ma_state *mas)
{
        void *entry;

        entry = mas_start(mas);
        if (mas_is_none(mas))
                return NULL;

        if (mas_is_ptr(mas))
                return entry;

        return mtree_range_walk(mas);
}

/*
 * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
 * to date.
 *
 * @mas: The maple state.
 *
 * Note: Leaves mas in undesirable state.
 * Return: The entry for @mas->index or %NULL on dead node.
 */
static inline void *mtree_lookup_walk(struct ma_state *mas)
{
        unsigned long *pivots;
        unsigned char offset;
        struct maple_node *node;
        struct maple_enode *next;
        enum maple_type type;
        void __rcu **slots;
        unsigned char end;

        next = mas->node;
        do {
                node = mte_to_node(next);
                type = mte_node_type(next);
                pivots = ma_pivots(node, type);
                end = mt_pivots[type];
                offset = 0;
                do {
                        if (pivots[offset] >= mas->index)
                                break;
                } while (++offset < end);

                slots = ma_slots(node, type);
                next = mt_slot(mas->tree, slots, offset);
                if (unlikely(ma_dead_node(node)))
                        goto dead_node;
        } while (!ma_is_leaf(type));

        return (void *)next;

dead_node:
        mas_reset(mas);
        return NULL;
}

static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
/*
 * mas_new_root() - Create a new root node that only contains the entry passed
 * in.
 * @mas: The maple state
 * @entry: The entry to store.
 *
 * Only valid when the index == 0 and the last == ULONG_MAX
 *
 * Return 0 on error, 1 on success.
 */
static inline int mas_new_root(struct ma_state *mas, void *entry)
{
        struct maple_enode *root = mas_root_locked(mas);
        enum maple_type type = maple_leaf_64;
        struct maple_node *node;
        void __rcu **slots;
        unsigned long *pivots;

        if (!entry && !mas->index && mas->last == ULONG_MAX) {
                mas->depth = 0;
                mas_set_height(mas);
                rcu_assign_pointer(mas->tree->ma_root, entry);
                mas->status = ma_start;
                goto done;
        }

        mas_node_count(mas, 1);
        if (mas_is_err(mas))
                return 0;

        node = mas_pop_node(mas);
        pivots = ma_pivots(node, type);
        slots = ma_slots(node, type);
        node->parent = ma_parent_ptr(mas_tree_parent(mas));
        mas->node = mt_mk_node(node, type);
        mas->status = ma_active;
        rcu_assign_pointer(slots[0], entry);
        pivots[0] = mas->last;
        mas->depth = 1;
        mas_set_height(mas);
        rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));

done:
        if (xa_is_node(root))
                mte_destroy_walk(root, mas->tree);

        return 1;
}
/*
 * mas_wr_spanning_store() - Create a subtree with the store operation completed
 * and new nodes where necessary, then place the sub-tree in the actual tree.
 * Note that mas is expected to point to the node which caused the store to
 * span.
 * @wr_mas: The maple write state
 *
 * Return: 0 on error, positive on success.
 */
static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
{
        struct maple_subtree_state mast;
        struct maple_big_node b_node;
        struct ma_state *mas;
        unsigned char height;

        /* Left and Right side of spanning store */
        MA_STATE(l_mas, NULL, 0, 0);
        MA_STATE(r_mas, NULL, 0, 0);
        MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
        MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);

        /*
         * A store operation that spans multiple nodes is called a spanning
         * store and is handled early in the store call stack by the function
         * mas_is_span_wr().  When a spanning store is identified, the maple
         * state is duplicated.  The first maple state walks the left tree path
         * to ``index``, the duplicate walks the right tree path to ``last``.
         * The data in the two nodes are combined into a single node, two nodes,
         * or possibly three nodes (see the 3-way split above).  A ``NULL``
         * written to the last entry of a node is considered a spanning store as
         * a rebalance is required for the operation to complete and an overflow
         * of data may happen.
         */
        mas = wr_mas->mas;
        trace_ma_op(__func__, mas);

        if (unlikely(!mas->index && mas->last == ULONG_MAX))
                return mas_new_root(mas, wr_mas->entry);
        /*
         * Node rebalancing may occur due to this store, so there may be three new
         * entries per level plus a new root.
         */
        height = mas_mt_height(mas);
        mas_node_count(mas, 1 + height * 3);
        if (mas_is_err(mas))
                return 0;

        /*
         * Set up right side.  Need to get to the next offset after the spanning
         * store to ensure it's not NULL and to combine both the next node and
         * the node with the start together.
         */
        r_mas = *mas;
        /* Avoid overflow, walk to next slot in the tree. */
        if (r_mas.last + 1)
                r_mas.last++;

        r_mas.index = r_mas.last;
        mas_wr_walk_index(&r_wr_mas);
        r_mas.last = r_mas.index = mas->last;

        /* Set up left side. */
        l_mas = *mas;
        mas_wr_walk_index(&l_wr_mas);

        if (!wr_mas->entry) {
                mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
                mas->offset = l_mas.offset;
                mas->index = l_mas.index;
                mas->last = l_mas.last = r_mas.last;
        }

        /* expanding NULLs may make this cover the entire range */
        if (!l_mas.index && r_mas.last == ULONG_MAX) {
                mas_set_range(mas, 0, ULONG_MAX);
                return mas_new_root(mas, wr_mas->entry);
        }

        memset(&b_node, 0, sizeof(struct maple_big_node));
        /* Copy l_mas and store the value in b_node. */
        mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
        /* Copy r_mas into b_node. */
        if (r_mas.offset <= r_mas.end)
                mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
                           &b_node, b_node.b_end + 1);
        else
                b_node.b_end++;

        /* Stop spanning searches by searching for just index. */
        l_mas.index = l_mas.last = mas->index;

        mast.bn = &b_node;
        mast.orig_l = &l_mas;
        mast.orig_r = &r_mas;
        /* Combine l_mas and r_mas and split them up evenly again. */
        return mas_spanning_rebalance(mas, &mast, height + 1);
}

/*
 * mas_wr_node_store() - Attempt to store the value in a node
 * @wr_mas: The maple write state
 *
 * Attempts to reuse the node, but may allocate.
 *
 * Return: True if stored, false otherwise
 */
static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas,
                                     unsigned char new_end)
{
        struct ma_state *mas = wr_mas->mas;
        void __rcu **dst_slots;
        unsigned long *dst_pivots;
        unsigned char dst_offset, offset_end = wr_mas->offset_end;
        struct maple_node reuse, *newnode;
        unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
        bool in_rcu = mt_in_rcu(mas->tree);

        /* Check if there is enough data. The room is enough. */
        if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
            !(mas->mas_flags & MA_STATE_BULK))
                return false;

        if (mas->last == wr_mas->end_piv)
                offset_end++; /* don't copy this offset */
        else if (unlikely(wr_mas->r_max == ULONG_MAX))
                mas_bulk_rebalance(mas, mas->end, wr_mas->type);

        /* set up node. */
        if (in_rcu) {
                mas_node_count(mas, 1);
                if (mas_is_err(mas))
                        return false;

                newnode = mas_pop_node(mas);
        } else {
                memset(&reuse, 0, sizeof(struct maple_node));
                newnode = &reuse;
        }

        newnode->parent = mas_mn(mas)->parent;
        dst_pivots = ma_pivots(newnode, wr_mas->type);
        dst_slots = ma_slots(newnode, wr_mas->type);
        /* Copy from start to insert point */
        memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
        memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);

        /* Handle insert of new range starting after old range */
        if (wr_mas->r_min < mas->index) {
                rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
                dst_pivots[mas->offset++] = mas->index - 1;
        }

        /* Store the new entry and range end. */
        if (mas->offset < node_pivots)
                dst_pivots[mas->offset] = mas->last;
        rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);

        /*
         * this range wrote to the end of the node or it overwrote the rest of
         * the data
         */
        if (offset_end > mas->end)
                goto done;

        dst_offset = mas->offset + 1;
        /* Copy to the end of node if necessary. */
        copy_size = mas->end - offset_end + 1;
        memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
               sizeof(void *) * copy_size);
        memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
               sizeof(unsigned long) * (copy_size - 1));

        if (new_end < node_pivots)
                dst_pivots[new_end] = mas->max;

done:
        mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
        if (in_rcu) {
                struct maple_enode *old_enode = mas->node;

                mas->node = mt_mk_node(newnode, wr_mas->type);
                mas_replace_node(mas, old_enode);
        } else {
                memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
        }
        trace_ma_write(__func__, mas, 0, wr_mas->entry);
        mas_update_gap(mas);
        mas->end = new_end;
        return true;
}

/*
 * mas_wr_slot_store: Attempt to store a value in a slot.
 * @wr_mas: the maple write state
 *
 * Return: True if stored, false otherwise
 */
static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;
        unsigned char offset = mas->offset;
        void __rcu **slots = wr_mas->slots;
        bool gap = false;

        gap |= !mt_slot_locked(mas->tree, slots, offset);
        gap |= !mt_slot_locked(mas->tree, slots, offset + 1);

        if (wr_mas->offset_end - offset == 1) {
                if (mas->index == wr_mas->r_min) {
                        /* Overwriting the range and a part of the next one */
                        rcu_assign_pointer(slots[offset], wr_mas->entry);
                        wr_mas->pivots[offset] = mas->last;
                } else {
                        /* Overwriting a part of the range and the next one */
                        rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
                        wr_mas->pivots[offset] = mas->index - 1;
                        mas->offset++; /* Keep mas accurate. */
                }
        } else if (!mt_in_rcu(mas->tree)) {
                /*
                 * Expand the range, only partially overwriting the previous and
                 * next ranges
                 */
                gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
                rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
                wr_mas->pivots[offset] = mas->index - 1;
                wr_mas->pivots[offset + 1] = mas->last;
                mas->offset++; /* Keep mas accurate. */
        } else {
                return false;
        }

        trace_ma_write(__func__, mas, 0, wr_mas->entry);
        /*
         * Only update gap when the new entry is empty or there is an empty
         * entry in the original two ranges.
         */
        if (!wr_mas->entry || gap)
                mas_update_gap(mas);

        return true;
}

static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;

        if (!wr_mas->slots[wr_mas->offset_end]) {
                /* If this one is null, the next and prev are not */
                mas->last = wr_mas->end_piv;
        } else {
                /* Check next slot(s) if we are overwriting the end */
                if ((mas->last == wr_mas->end_piv) &&
                    (mas->end != wr_mas->offset_end) &&
                    !wr_mas->slots[wr_mas->offset_end + 1]) {
                        wr_mas->offset_end++;
                        if (wr_mas->offset_end == mas->end)
                                mas->last = mas->max;
                        else
                                mas->last = wr_mas->pivots[wr_mas->offset_end];
                        wr_mas->end_piv = mas->last;
                }
        }

        if (!wr_mas->content) {
                /* If this one is null, the next and prev are not */
                mas->index = wr_mas->r_min;
        } else {
                /* Check prev slot if we are overwriting the start */
                if (mas->index == wr_mas->r_min && mas->offset &&
                    !wr_mas->slots[mas->offset - 1]) {
                        mas->offset--;
                        wr_mas->r_min = mas->index =
                                mas_safe_min(mas, wr_mas->pivots, mas->offset);
                        wr_mas->r_max = wr_mas->pivots[mas->offset];
                }
        }
}

static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
{
        while ((wr_mas->offset_end < wr_mas->mas->end) &&
               (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
                wr_mas->offset_end++;

        if (wr_mas->offset_end < wr_mas->mas->end)
                wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
        else
                wr_mas->end_piv = wr_mas->mas->max;

        if (!wr_mas->entry)
                mas_wr_extend_null(wr_mas);
}

static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;
        unsigned char new_end = mas->end + 2;

        new_end -= wr_mas->offset_end - mas->offset;
        if (wr_mas->r_min == mas->index)
                new_end--;

        if (wr_mas->end_piv == mas->last)
                new_end--;

        return new_end;
}

/*
 * mas_wr_append: Attempt to append
 * @wr_mas: the maple write state
 * @new_end: The end of the node after the modification
 *
 * This is currently unsafe in rcu mode since the end of the node may be cached
 * by readers while the node contents may be updated which could result in
 * inaccurate information.
 *
 * Return: True if appended, false otherwise
 */
static inline bool mas_wr_append(struct ma_wr_state *wr_mas,
                unsigned char new_end)
{
        struct ma_state *mas;
        void __rcu **slots;
        unsigned char end;

        mas = wr_mas->mas;
        if (mt_in_rcu(mas->tree))
                return false;

        end = mas->end;
        if (mas->offset != end)
                return false;

        if (new_end < mt_pivots[wr_mas->type]) {
                wr_mas->pivots[new_end] = wr_mas->pivots[end];
                ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
        }

        slots = wr_mas->slots;
        if (new_end == end + 1) {
                if (mas->last == wr_mas->r_max) {
                        /* Append to end of range */
                        rcu_assign_pointer(slots[new_end], wr_mas->entry);
                        wr_mas->pivots[end] = mas->index - 1;
                        mas->offset = new_end;
                } else {
                        /* Append to start of range */
                        rcu_assign_pointer(slots[new_end], wr_mas->content);
                        wr_mas->pivots[end] = mas->last;
                        rcu_assign_pointer(slots[end], wr_mas->entry);
                }
        } else {
                /* Append to the range without touching any boundaries. */
                rcu_assign_pointer(slots[new_end], wr_mas->content);
                wr_mas->pivots[end + 1] = mas->last;
                rcu_assign_pointer(slots[end + 1], wr_mas->entry);
                wr_mas->pivots[end] = mas->index - 1;
                mas->offset = end + 1;
        }

        if (!wr_mas->content || !wr_mas->entry)
                mas_update_gap(mas);

        mas->end = new_end;
        trace_ma_write(__func__, mas, new_end, wr_mas->entry);
        return  true;
}

/*
 * mas_wr_bnode() - Slow path for a modification.
 * @wr_mas: The write maple state
 *
 * This is where split, rebalance end up.
 */
static void mas_wr_bnode(struct ma_wr_state *wr_mas)
{
        struct maple_big_node b_node;

        trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
        memset(&b_node, 0, sizeof(struct maple_big_node));
        mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
        mas_commit_b_node(wr_mas, &b_node, wr_mas->mas->end);
}

static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;
        unsigned char new_end;

        /* Direct replacement */
        if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
                rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
                if (!!wr_mas->entry ^ !!wr_mas->content)
                        mas_update_gap(mas);
                return;
        }

        /*
         * new_end exceeds the size of the maple node and cannot enter the fast
         * path.
         */
        new_end = mas_wr_new_end(wr_mas);
        if (new_end >= mt_slots[wr_mas->type])
                goto slow_path;

        /* Attempt to append */
        if (mas_wr_append(wr_mas, new_end))
                return;

        if (new_end == mas->end && mas_wr_slot_store(wr_mas))
                return;

        if (mas_wr_node_store(wr_mas, new_end))
                return;

        if (mas_is_err(mas))
                return;

slow_path:
        mas_wr_bnode(wr_mas);
}

/*
 * mas_wr_store_entry() - Internal call to store a value
 * @mas: The maple state
 * @entry: The entry to store.
 *
 * Return: The contents that was stored at the index.
 */
static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
{
        struct ma_state *mas = wr_mas->mas;

        wr_mas->content = mas_start(mas);
        if (mas_is_none(mas) || mas_is_ptr(mas)) {
                mas_store_root(mas, wr_mas->entry);
                return wr_mas->content;
        }

        if (unlikely(!mas_wr_walk(wr_mas))) {
                mas_wr_spanning_store(wr_mas);
                return wr_mas->content;
        }

        /* At this point, we are at the leaf node that needs to be altered. */
        mas_wr_end_piv(wr_mas);
        /* New root for a single pointer */
        if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
                mas_new_root(mas, wr_mas->entry);
                return wr_mas->content;
        }

        mas_wr_modify(wr_mas);
        return wr_mas->content;
}

/**
 * mas_insert() - Internal call to insert a value
 * @mas: The maple state
 * @entry: The entry to store
 *
 * Return: %NULL or the contents that already exists at the requested index
 * otherwise.  The maple state needs to be checked for error conditions.
 */
static inline void *mas_insert(struct ma_state *mas, void *entry)
{
        MA_WR_STATE(wr_mas, mas, entry);

        /*
         * Inserting a new range inserts either 0, 1, or 2 pivots within the
         * tree.  If the insert fits exactly into an existing gap with a value
         * of NULL, then the slot only needs to be written with the new value.
         * If the range being inserted is adjacent to another range, then only a
         * single pivot needs to be inserted (as well as writing the entry).  If
         * the new range is within a gap but does not touch any other ranges,
         * then two pivots need to be inserted: the start - 1, and the end.  As
         * usual, the entry must be written.  Most operations require a new node
         * to be allocated and replace an existing node to ensure RCU safety,
         * when in RCU mode.  The exception to requiring a newly allocated node
         * is when inserting at the end of a node (appending).  When done
         * carefully, appending can reuse the node in place.
         */
        wr_mas.content = mas_start(mas);
        if (wr_mas.content)
                goto exists;

        if (mas_is_none(mas) || mas_is_ptr(mas)) {
                mas_store_root(mas, entry);
                return NULL;
        }

        /* spanning writes always overwrite something */
        if (!mas_wr_walk(&wr_mas))
                goto exists;

        /* At this point, we are at the leaf node that needs to be altered. */
        wr_mas.offset_end = mas->offset;
        wr_mas.end_piv = wr_mas.r_max;

        if (wr_mas.content || (mas->last > wr_mas.r_max))
                goto exists;

        if (!entry)
                return NULL;

        mas_wr_modify(&wr_mas);
        return wr_mas.content;

exists:
        mas_set_err(mas, -EEXIST);
        return wr_mas.content;

}

/**
 * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
 * @mas: The maple state.
 * @startp: Pointer to ID.
 * @range_lo: Lower bound of range to search.
 * @range_hi: Upper bound of range to search.
 * @entry: The entry to store.
 * @next: Pointer to next ID to allocate.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * Return: 0 if the allocation succeeded without wrapping, 1 if the
 * allocation succeeded after wrapping, or -EBUSY if there are no
 * free entries.
 */
int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
                void *entry, unsigned long range_lo, unsigned long range_hi,
                unsigned long *next, gfp_t gfp)
{
        unsigned long min = range_lo;
        int ret = 0;

        range_lo = max(min, *next);
        ret = mas_empty_area(mas, range_lo, range_hi, 1);
        if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
                mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
                ret = 1;
        }
        if (ret < 0 && range_lo > min) {
                ret = mas_empty_area(mas, min, range_hi, 1);
                if (ret == 0)
                        ret = 1;
        }
        if (ret < 0)
                return ret;

        do {
                mas_insert(mas, entry);
        } while (mas_nomem(mas, gfp));
        if (mas_is_err(mas))
                return xa_err(mas->node);

        *startp = mas->index;
        *next = *startp + 1;
        if (*next == 0)
                mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;

        return ret;
}
EXPORT_SYMBOL(mas_alloc_cyclic);

static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
{
retry:
        mas_set(mas, index);
        mas_state_walk(mas);
        if (mas_is_start(mas))
                goto retry;
}

static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
                struct maple_node *node, const unsigned long index)
{
        if (unlikely(ma_dead_node(node))) {
                mas_rewalk(mas, index);
                return true;
        }
        return false;
}

/*
 * mas_prev_node() - Find the prev non-null entry at the same level in the
 * tree.  The prev value will be mas->node[mas->offset] or the status will be
 * ma_none.
 * @mas: The maple state
 * @min: The lower limit to search
 *
 * The prev node value will be mas->node[mas->offset] or the status will be
 * ma_none.
 * Return: 1 if the node is dead, 0 otherwise.
 */
static int mas_prev_node(struct ma_state *mas, unsigned long min)
{
        enum maple_type mt;
        int offset, level;
        void __rcu **slots;
        struct maple_node *node;
        unsigned long *pivots;
        unsigned long max;

        node = mas_mn(mas);
        if (!mas->min)
                goto no_entry;

        max = mas->min - 1;
        if (max < min)
                goto no_entry;

        level = 0;
        do {
                if (ma_is_root(node))
                        goto no_entry;

                /* Walk up. */
                if (unlikely(mas_ascend(mas)))
                        return 1;
                offset = mas->offset;
                level++;
                node = mas_mn(mas);
        } while (!offset);

        offset--;
        mt = mte_node_type(mas->node);
        while (level > 1) {
                level--;
                slots = ma_slots(node, mt);
                mas->node = mas_slot(mas, slots, offset);
                if (unlikely(ma_dead_node(node)))
                        return 1;

                mt = mte_node_type(mas->node);
                node = mas_mn(mas);
                pivots = ma_pivots(node, mt);
                offset = ma_data_end(node, mt, pivots, max);
                if (unlikely(ma_dead_node(node)))
                        return 1;
        }

        slots = ma_slots(node, mt);
        mas->node = mas_slot(mas, slots, offset);
        pivots = ma_pivots(node, mt);
        if (unlikely(ma_dead_node(node)))
                return 1;

        if (likely(offset))
                mas->min = pivots[offset - 1] + 1;
        mas->max = max;
        mas->offset = mas_data_end(mas);
        if (unlikely(mte_dead_node(mas->node)))
                return 1;

        mas->end = mas->offset;
        return 0;

no_entry:
        if (unlikely(ma_dead_node(node)))
                return 1;

        mas->status = ma_underflow;
        return 0;
}

/*
 * mas_prev_slot() - Get the entry in the previous slot
 *
 * @mas: The maple state
 * @max: The minimum starting range
 * @empty: Can be empty
 * @set_underflow: Set the @mas->node to underflow state on limit.
 *
 * Return: The entry in the previous slot which is possibly NULL
 */
static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
{
        void *entry;
        void __rcu **slots;
        unsigned long pivot;
        enum maple_type type;
        unsigned long *pivots;
        struct maple_node *node;
        unsigned long save_point = mas->index;

retry:
        node = mas_mn(mas);
        type = mte_node_type(mas->node);
        pivots = ma_pivots(node, type);
        if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                goto retry;

        if (mas->min <= min) {
                pivot = mas_safe_min(mas, pivots, mas->offset);

                if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                        goto retry;

                if (pivot <= min)
                        goto underflow;
        }

again:
        if (likely(mas->offset)) {
                mas->offset--;
                mas->last = mas->index - 1;
                mas->index = mas_safe_min(mas, pivots, mas->offset);
        } else  {
                if (mas->index <= min)
                        goto underflow;

                if (mas_prev_node(mas, min)) {
                        mas_rewalk(mas, save_point);
                        goto retry;
                }

                if (WARN_ON_ONCE(mas_is_underflow(mas)))
                        return NULL;

                mas->last = mas->max;
                node = mas_mn(mas);
                type = mte_node_type(mas->node);
                pivots = ma_pivots(node, type);
                mas->index = pivots[mas->offset - 1] + 1;
        }

        slots = ma_slots(node, type);
        entry = mas_slot(mas, slots, mas->offset);
        if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                goto retry;


        if (likely(entry))
                return entry;

        if (!empty) {
                if (mas->index <= min) {
                        mas->status = ma_underflow;
                        return NULL;
                }

                goto again;
        }

        return entry;

underflow:
        mas->status = ma_underflow;
        return NULL;
}

/*
 * mas_next_node() - Get the next node at the same level in the tree.
 * @mas: The maple state
 * @max: The maximum pivot value to check.
 *
 * The next value will be mas->node[mas->offset] or the status will have
 * overflowed.
 * Return: 1 on dead node, 0 otherwise.
 */
static int mas_next_node(struct ma_state *mas, struct maple_node *node,
                unsigned long max)
{
        unsigned long min;
        unsigned long *pivots;
        struct maple_enode *enode;
        struct maple_node *tmp;
        int level = 0;
        unsigned char node_end;
        enum maple_type mt;
        void __rcu **slots;

        if (mas->max >= max)
                goto overflow;

        min = mas->max + 1;
        level = 0;
        do {
                if (ma_is_root(node))
                        goto overflow;

                /* Walk up. */
                if (unlikely(mas_ascend(mas)))
                        return 1;

                level++;
                node = mas_mn(mas);
                mt = mte_node_type(mas->node);
                pivots = ma_pivots(node, mt);
                node_end = ma_data_end(node, mt, pivots, mas->max);
                if (unlikely(ma_dead_node(node)))
                        return 1;

        } while (unlikely(mas->offset == node_end));

        slots = ma_slots(node, mt);
        mas->offset++;
        enode = mas_slot(mas, slots, mas->offset);
        if (unlikely(ma_dead_node(node)))
                return 1;

        if (level > 1)
                mas->offset = 0;

        while (unlikely(level > 1)) {
                level--;
                mas->node = enode;
                node = mas_mn(mas);
                mt = mte_node_type(mas->node);
                slots = ma_slots(node, mt);
                enode = mas_slot(mas, slots, 0);
                if (unlikely(ma_dead_node(node)))
                        return 1;
        }

        if (!mas->offset)
                pivots = ma_pivots(node, mt);

        mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
        tmp = mte_to_node(enode);
        mt = mte_node_type(enode);
        pivots = ma_pivots(tmp, mt);
        mas->end = ma_data_end(tmp, mt, pivots, mas->max);
        if (unlikely(ma_dead_node(node)))
                return 1;

        mas->node = enode;
        mas->min = min;
        return 0;

overflow:
        if (unlikely(ma_dead_node(node)))
                return 1;

        mas->status = ma_overflow;
        return 0;
}

/*
 * mas_next_slot() - Get the entry in the next slot
 *
 * @mas: The maple state
 * @max: The maximum starting range
 * @empty: Can be empty
 * @set_overflow: Should @mas->node be set to overflow when the limit is
 * reached.
 *
 * Return: The entry in the next slot which is possibly NULL
 */
static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
{
        void __rcu **slots;
        unsigned long *pivots;
        unsigned long pivot;
        enum maple_type type;
        struct maple_node *node;
        unsigned long save_point = mas->last;
        void *entry;

retry:
        node = mas_mn(mas);
        type = mte_node_type(mas->node);
        pivots = ma_pivots(node, type);
        if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                goto retry;

        if (mas->max >= max) {
                if (likely(mas->offset < mas->end))
                        pivot = pivots[mas->offset];
                else
                        pivot = mas->max;

                if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                        goto retry;

                if (pivot >= max) { /* Was at the limit, next will extend beyond */
                        mas->status = ma_overflow;
                        return NULL;
                }
        }

        if (likely(mas->offset < mas->end)) {
                mas->index = pivots[mas->offset] + 1;
again:
                mas->offset++;
                if (likely(mas->offset < mas->end))
                        mas->last = pivots[mas->offset];
                else
                        mas->last = mas->max;
        } else  {
                if (mas->last >= max) {
                        mas->status = ma_overflow;
                        return NULL;
                }

                if (mas_next_node(mas, node, max)) {
                        mas_rewalk(mas, save_point);
                        goto retry;
                }

                if (WARN_ON_ONCE(mas_is_overflow(mas)))
                        return NULL;

                mas->offset = 0;
                mas->index = mas->min;
                node = mas_mn(mas);
                type = mte_node_type(mas->node);
                pivots = ma_pivots(node, type);
                mas->last = pivots[0];
        }

        slots = ma_slots(node, type);
        entry = mt_slot(mas->tree, slots, mas->offset);
        if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
                goto retry;

        if (entry)
                return entry;


        if (!empty) {
                if (mas->last >= max) {
                        mas->status = ma_overflow;
                        return NULL;
                }

                mas->index = mas->last + 1;
                goto again;
        }

        return entry;
}

/*
 * mas_next_entry() - Internal function to get the next entry.
 * @mas: The maple state
 * @limit: The maximum range start.
 *
 * Set the @mas->node to the next entry and the range_start to
 * the beginning value for the entry.  Does not check beyond @limit.
 * Sets @mas->index and @mas->last to the range, Does not update @mas->index and
 * @mas->last on overflow.
 * Restarts on dead nodes.
 *
 * Return: the next entry or %NULL.
 */
static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
{
        if (mas->last >= limit) {
                mas->status = ma_overflow;
                return NULL;
        }

        return mas_next_slot(mas, limit, false);
}

/*
 * mas_rev_awalk() - Internal function.  Reverse allocation walk.  Find the
 * highest gap address of a given size in a given node and descend.
 * @mas: The maple state
 * @size: The needed size.
 *
 * Return: True if found in a leaf, false otherwise.
 *
 */
static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
                unsigned long *gap_min, unsigned long *gap_max)
{
        enum maple_type type = mte_node_type(mas->node);
        struct maple_node *node = mas_mn(mas);
        unsigned long *pivots, *gaps;
        void __rcu **slots;
        unsigned long gap = 0;
        unsigned long max, min;
        unsigned char offset;

        if (unlikely(mas_is_err(mas)))
                return true;

        if (ma_is_dense(type)) {
                /* dense nodes. */
                mas->offset = (unsigned char)(mas->index - mas->min);
                return true;
        }

        pivots = ma_pivots(node, type);
        slots = ma_slots(node, type);
        gaps = ma_gaps(node, type);
        offset = mas->offset;
        min = mas_safe_min(mas, pivots, offset);
        /* Skip out of bounds. */
        while (mas->last < min)
                min = mas_safe_min(mas, pivots, --offset);

        max = mas_safe_pivot(mas, pivots, offset, type);
        while (mas->index <= max) {
                gap = 0;
                if (gaps)
                        gap = gaps[offset];
                else if (!mas_slot(mas, slots, offset))
                        gap = max - min + 1;

                if (gap) {
                        if ((size <= gap) && (size <= mas->last - min + 1))
                                break;

                        if (!gaps) {
                                /* Skip the next slot, it cannot be a gap. */
                                if (offset < 2)
                                        goto ascend;

                                offset -= 2;
                                max = pivots[offset];
                                min = mas_safe_min(mas, pivots, offset);
                                continue;
                        }
                }

                if (!offset)
                        goto ascend;

                offset--;
                max = min - 1;
                min = mas_safe_min(mas, pivots, offset);
        }

        if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
                goto no_space;

        if (unlikely(ma_is_leaf(type))) {
                mas->offset = offset;
                *gap_min = min;
                *gap_max = min + gap - 1;
                return true;
        }

        /* descend, only happens under lock. */
        mas->node = mas_slot(mas, slots, offset);
        mas->min = min;
        mas->max = max;
        mas->offset = mas_data_end(mas);
        return false;

ascend:
        if (!mte_is_root(mas->node))
                return false;

no_space:
        mas_set_err(mas, -EBUSY);
        return false;
}

static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
{
        enum maple_type type = mte_node_type(mas->node);
        unsigned long pivot, min, gap = 0;
        unsigned char offset, data_end;
        unsigned long *gaps, *pivots;
        void __rcu **slots;
        struct maple_node *node;
        bool found = false;

        if (ma_is_dense(type)) {
                mas->offset = (unsigned char)(mas->index - mas->min);
                return true;
        }

        node = mas_mn(mas);
        pivots = ma_pivots(node, type);
        slots = ma_slots(node, type);
        gaps = ma_gaps(node, type);
        offset = mas->offset;
        min = mas_safe_min(mas, pivots, offset);
        data_end = ma_data_end(node, type, pivots, mas->max);
        for (; offset <= data_end; offset++) {
                pivot = mas_safe_pivot(mas, pivots, offset, type);

                /* Not within lower bounds */
                if (mas->index > pivot)
                        goto next_slot;

                if (gaps)
                        gap = gaps[offset];
                else if (!mas_slot(mas, slots, offset))
                        gap = min(pivot, mas->last) - max(mas->index, min) + 1;
                else
                        goto next_slot;

                if (gap >= size) {
                        if (ma_is_leaf(type)) {
                                found = true;
                                goto done;
                        }
                        if (mas->index <= pivot) {
                                mas->node = mas_slot(mas, slots, offset);
                                mas->min = min;
                                mas->max = pivot;
                                offset = 0;
                                break;
                        }
                }
next_slot:
                min = pivot + 1;
                if (mas->last <= pivot) {
                        mas_set_err(mas, -EBUSY);
                        return true;
                }
        }

        if (mte_is_root(mas->node))
                found = true;
done:
        mas->offset = offset;
        return found;
}

/**
 * mas_walk() - Search for @mas->index in the tree.
 * @mas: The maple state.
 *
 * mas->index and mas->last will be set to the range if there is a value.  If
 * mas->status is ma_none, reset to ma_start
 *
 * Return: the entry at the location or %NULL.
 */
void *mas_walk(struct ma_state *mas)
{
        void *entry;

        if (!mas_is_active(mas) || !mas_is_start(mas))
                mas->status = ma_start;
retry:
        entry = mas_state_walk(mas);
        if (mas_is_start(mas)) {
                goto retry;
        } else if (mas_is_none(mas)) {
                mas->index = 0;
                mas->last = ULONG_MAX;
        } else if (mas_is_ptr(mas)) {
                if (!mas->index) {
                        mas->last = 0;
                        return entry;
                }

                mas->index = 1;
                mas->last = ULONG_MAX;
                mas->status = ma_none;
                return NULL;
        }

        return entry;
}
EXPORT_SYMBOL_GPL(mas_walk);

static inline bool mas_rewind_node(struct ma_state *mas)
{
        unsigned char slot;

        do {
                if (mte_is_root(mas->node)) {
                        slot = mas->offset;
                        if (!slot)
                                return false;
                } else {
                        mas_ascend(mas);
                        slot = mas->offset;
                }
        } while (!slot);

        mas->offset = --slot;
        return true;
}

/*
 * mas_skip_node() - Internal function.  Skip over a node.
 * @mas: The maple state.
 *
 * Return: true if there is another node, false otherwise.
 */
static inline bool mas_skip_node(struct ma_state *mas)
{
        if (mas_is_err(mas))
                return false;

        do {
                if (mte_is_root(mas->node)) {
                        if (mas->offset >= mas_data_end(mas)) {
                                mas_set_err(mas, -EBUSY);
                                return false;
                        }
                } else {
                        mas_ascend(mas);
                }
        } while (mas->offset >= mas_data_end(mas));

        mas->offset++;
        return true;
}

/*
 * mas_awalk() - Allocation walk.  Search from low address to high, for a gap of
 * @size
 * @mas: The maple state
 * @size: The size of the gap required
 *
 * Search between @mas->index and @mas->last for a gap of @size.
 */
static inline void mas_awalk(struct ma_state *mas, unsigned long size)
{
        struct maple_enode *last = NULL;

        /*
         * There are 4 options:
         * go to child (descend)
         * go back to parent (ascend)
         * no gap found. (return, slot == MAPLE_NODE_SLOTS)
         * found the gap. (return, slot != MAPLE_NODE_SLOTS)
         */
        while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
                if (last == mas->node)
                        mas_skip_node(mas);
                else
                        last = mas->node;
        }
}

/*
 * mas_sparse_area() - Internal function.  Return upper or lower limit when
 * searching for a gap in an empty tree.
 * @mas: The maple state
 * @min: the minimum range
 * @max: The maximum range
 * @size: The size of the gap
 * @fwd: Searching forward or back
 */
static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
                                unsigned long max, unsigned long size, bool fwd)
{
        if (!unlikely(mas_is_none(mas)) && min == 0) {
                min++;
                /*
                 * At this time, min is increased, we need to recheck whether
                 * the size is satisfied.
                 */
                if (min > max || max - min + 1 < size)
                        return -EBUSY;
        }
        /* mas_is_ptr */

        if (fwd) {
                mas->index = min;
                mas->last = min + size - 1;
        } else {
                mas->last = max;
                mas->index = max - size + 1;
        }
        return 0;
}

/*
 * mas_empty_area() - Get the lowest address within the range that is
 * sufficient for the size requested.
 * @mas: The maple state
 * @min: The lowest value of the range
 * @max: The highest value of the range
 * @size: The size needed
 */
int mas_empty_area(struct ma_state *mas, unsigned long min,
                unsigned long max, unsigned long size)
{
        unsigned char offset;
        unsigned long *pivots;
        enum maple_type mt;
        struct maple_node *node;

        if (min > max)
                return -EINVAL;

        if (size == 0 || max - min < size - 1)
                return -EINVAL;

        if (mas_is_start(mas))
                mas_start(mas);
        else if (mas->offset >= 2)
                mas->offset -= 2;
        else if (!mas_skip_node(mas))
                return -EBUSY;

        /* Empty set */
        if (mas_is_none(mas) || mas_is_ptr(mas))
                return mas_sparse_area(mas, min, max, size, true);

        /* The start of the window can only be within these values */
        mas->index = min;
        mas->last = max;
        mas_awalk(mas, size);

        if (unlikely(mas_is_err(mas)))
                return xa_err(mas->node);

        offset = mas->offset;
        if (unlikely(offset == MAPLE_NODE_SLOTS))
                return -EBUSY;

        node = mas_mn(mas);
        mt = mte_node_type(mas->node);
        pivots = ma_pivots(node, mt);
        min = mas_safe_min(mas, pivots, offset);
        if (mas->index < min)
                mas->index = min;
        mas->last = mas->index + size - 1;
        mas->end = ma_data_end(node, mt, pivots, mas->max);
        return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area);

/*
 * mas_empty_area_rev() - Get the highest address within the range that is
 * sufficient for the size requested.
 * @mas: The maple state
 * @min: The lowest value of the range
 * @max: The highest value of the range
 * @size: The size needed
 */
int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
                unsigned long max, unsigned long size)
{
        struct maple_enode *last = mas->node;

        if (min > max)
                return -EINVAL;

        if (size == 0 || max - min < size - 1)
                return -EINVAL;

        if (mas_is_start(mas)) {
                mas_start(mas);
                mas->offset = mas_data_end(mas);
        } else if (mas->offset >= 2) {
                mas->offset -= 2;
        } else if (!mas_rewind_node(mas)) {
                return -EBUSY;
        }

        /* Empty set. */
        if (mas_is_none(mas) || mas_is_ptr(mas))
                return mas_sparse_area(mas, min, max, size, false);

        /* The start of the window can only be within these values. */
        mas->index = min;
        mas->last = max;

        while (!mas_rev_awalk(mas, size, &min, &max)) {
                if (last == mas->node) {
                        if (!mas_rewind_node(mas))
                                return -EBUSY;
                } else {
                        last = mas->node;
                }
        }

        if (mas_is_err(mas))
                return xa_err(mas->node);

        if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
                return -EBUSY;

        /* Trim the upper limit to the max. */
        if (max < mas->last)
                mas->last = max;

        mas->index = mas->last - size + 1;
        mas->end = mas_data_end(mas);
        return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area_rev);

/*
 * mte_dead_leaves() - Mark all leaves of a node as dead.
 * @mas: The maple state
 * @slots: Pointer to the slot array
 * @type: The maple node type
 *
 * Must hold the write lock.
 *
 * Return: The number of leaves marked as dead.
 */
static inline
unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
                              void __rcu **slots)
{
        struct maple_node *node;
        enum maple_type type;
        void *entry;
        int offset;

        for (offset = 0; offset < mt_slot_count(enode); offset++) {
                entry = mt_slot(mt, slots, offset);
                type = mte_node_type(entry);
                node = mte_to_node(entry);
                /* Use both node and type to catch LE & BE metadata */
                if (!node || !type)
                        break;

                mte_set_node_dead(entry);
                node->type = type;
                rcu_assign_pointer(slots[offset], node);
        }

        return offset;
}

/**
 * mte_dead_walk() - Walk down a dead tree to just before the leaves
 * @enode: The maple encoded node
 * @offset: The starting offset
 *
 * Note: This can only be used from the RCU callback context.
 */
static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{
        struct maple_node *node, *next;
        void __rcu **slots = NULL;

        next = mte_to_node(*enode);
        do {
                *enode = ma_enode_ptr(next);
                node = mte_to_node(*enode);
                slots = ma_slots(node, node->type);
                next = rcu_dereference_protected(slots[offset],
                                        lock_is_held(&rcu_callback_map));
                offset = 0;
        } while (!ma_is_leaf(next->type));

        return slots;
}

/**
 * mt_free_walk() - Walk & free a tree in the RCU callback context
 * @head: The RCU head that's within the node.
 *
 * Note: This can only be used from the RCU callback context.
 */
static void mt_free_walk(struct rcu_head *head)
{
        void __rcu **slots;
        struct maple_node *node, *start;
        struct maple_enode *enode;
        unsigned char offset;
        enum maple_type type;

        node = container_of(head, struct maple_node, rcu);

        if (ma_is_leaf(node->type))
                goto free_leaf;

        start = node;
        enode = mt_mk_node(node, node->type);
        slots = mte_dead_walk(&enode, 0);
        node = mte_to_node(enode);
        do {
                mt_free_bulk(node->slot_len, slots);
                offset = node->parent_slot + 1;
                enode = node->piv_parent;
                if (mte_to_node(enode) == node)
                        goto free_leaf;

                type = mte_node_type(enode);
                slots = ma_slots(mte_to_node(enode), type);
                if ((offset < mt_slots[type]) &&
                    rcu_dereference_protected(slots[offset],
                                              lock_is_held(&rcu_callback_map)))
                        slots = mte_dead_walk(&enode, offset);
                node = mte_to_node(enode);
        } while ((node != start) || (node->slot_len < offset));

        slots = ma_slots(node, node->type);
        mt_free_bulk(node->slot_len, slots);

free_leaf:
        mt_free_rcu(&node->rcu);
}

static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
        struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{
        struct maple_node *node;
        struct maple_enode *next = *enode;
        void __rcu **slots = NULL;
        enum maple_type type;
        unsigned char next_offset = 0;

        do {
                *enode = next;
                node = mte_to_node(*enode);
                type = mte_node_type(*enode);
                slots = ma_slots(node, type);
                next = mt_slot_locked(mt, slots, next_offset);
                if ((mte_dead_node(next)))
                        next = mt_slot_locked(mt, slots, ++next_offset);

                mte_set_node_dead(*enode);
                node->type = type;
                node->piv_parent = prev;
                node->parent_slot = offset;
                offset = next_offset;
                next_offset = 0;
                prev = *enode;
        } while (!mte_is_leaf(next));

        return slots;
}

static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
                            bool free)
{
        void __rcu **slots;
        struct maple_node *node = mte_to_node(enode);
        struct maple_enode *start;

        if (mte_is_leaf(enode)) {
                node->type = mte_node_type(enode);
                goto free_leaf;
        }

        start = enode;
        slots = mte_destroy_descend(&enode, mt, start, 0);
        node = mte_to_node(enode); // Updated in the above call.
        do {
                enum maple_type type;
                unsigned char offset;
                struct maple_enode *parent, *tmp;

                node->slot_len = mte_dead_leaves(enode, mt, slots);
                if (free)
                        mt_free_bulk(node->slot_len, slots);
                offset = node->parent_slot + 1;
                enode = node->piv_parent;
                if (mte_to_node(enode) == node)
                        goto free_leaf;

                type = mte_node_type(enode);
                slots = ma_slots(mte_to_node(enode), type);
                if (offset >= mt_slots[type])
                        goto next;

                tmp = mt_slot_locked(mt, slots, offset);
                if (mte_node_type(tmp) && mte_to_node(tmp)) {
                        parent = enode;
                        enode = tmp;
                        slots = mte_destroy_descend(&enode, mt, parent, offset);
                }
next:
                node = mte_to_node(enode);
        } while (start != enode);

        node = mte_to_node(enode);
        node->slot_len = mte_dead_leaves(enode, mt, slots);
        if (free)
                mt_free_bulk(node->slot_len, slots);

free_leaf:
        if (free)
                mt_free_rcu(&node->rcu);
        else
                mt_clear_meta(mt, node, node->type);
}

/*
 * mte_destroy_walk() - Free a tree or sub-tree.
 * @enode: the encoded maple node (maple_enode) to start
 * @mt: the tree to free - needed for node types.
 *
 * Must hold the write lock.
 */
static inline void mte_destroy_walk(struct maple_enode *enode,
                                    struct maple_tree *mt)
{
        struct maple_node *node = mte_to_node(enode);

        if (mt_in_rcu(mt)) {
                mt_destroy_walk(enode, mt, false);
                call_rcu(&node->rcu, mt_free_walk);
        } else {
                mt_destroy_walk(enode, mt, true);
        }
}

static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
{
        if (!mas_is_active(wr_mas->mas)) {
                if (mas_is_start(wr_mas->mas))
                        return;

                if (unlikely(mas_is_paused(wr_mas->mas)))
                        goto reset;

                if (unlikely(mas_is_none(wr_mas->mas)))
                        goto reset;

                if (unlikely(mas_is_overflow(wr_mas->mas)))
                        goto reset;

                if (unlikely(mas_is_underflow(wr_mas->mas)))
                        goto reset;
        }

        /*
         * A less strict version of mas_is_span_wr() where we allow spanning
         * writes within this node.  This is to stop partial walks in
         * mas_prealloc() from being reset.
         */
        if (wr_mas->mas->last > wr_mas->mas->max)
                goto reset;

        if (wr_mas->entry)
                return;

        if (mte_is_leaf(wr_mas->mas->node) &&
            wr_mas->mas->last == wr_mas->mas->max)
                goto reset;

        return;

reset:
        mas_reset(wr_mas->mas);
}

/* Interface */

/**
 * mas_store() - Store an @entry.
 * @mas: The maple state.
 * @entry: The entry to store.
 *
 * The @mas->index and @mas->last is used to set the range for the @entry.
 * Note: The @mas should have pre-allocated entries to ensure there is memory to
 * store the entry.  Please see mas_expected_entries()/mas_destroy() for more details.
 *
 * Return: the first entry between mas->index and mas->last or %NULL.
 */
void *mas_store(struct ma_state *mas, void *entry)
{
        MA_WR_STATE(wr_mas, mas, entry);

        trace_ma_write(__func__, mas, 0, entry);
#ifdef CONFIG_DEBUG_MAPLE_TREE
        if (MAS_WARN_ON(mas, mas->index > mas->last))
                pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry);

        if (mas->index > mas->last) {
                mas_set_err(mas, -EINVAL);
                return NULL;
        }

#endif

        /*
         * Storing is the same operation as insert with the added caveat that it
         * can overwrite entries.  Although this seems simple enough, one may
         * want to examine what happens if a single store operation was to
         * overwrite multiple entries within a self-balancing B-Tree.
         */
        mas_wr_store_setup(&wr_mas);
        mas_wr_store_entry(&wr_mas);
        return wr_mas.content;
}
EXPORT_SYMBOL_GPL(mas_store);

/**
 * mas_store_gfp() - Store a value into the tree.
 * @mas: The maple state
 * @entry: The entry to store
 * @gfp: The GFP_FLAGS to use for allocations if necessary.
 *
 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
 * be allocated.
 */
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
{
        MA_WR_STATE(wr_mas, mas, entry);

        mas_wr_store_setup(&wr_mas);
        trace_ma_write(__func__, mas, 0, entry);
retry:
        mas_wr_store_entry(&wr_mas);
        if (unlikely(mas_nomem(mas, gfp)))
                goto retry;

        if (unlikely(mas_is_err(mas)))
                return xa_err(mas->node);

        return 0;
}
EXPORT_SYMBOL_GPL(mas_store_gfp);

/**
 * mas_store_prealloc() - Store a value into the tree using memory
 * preallocated in the maple state.
 * @mas: The maple state
 * @entry: The entry to store.
 */
void mas_store_prealloc(struct ma_state *mas, void *entry)
{
        MA_WR_STATE(wr_mas, mas, entry);

        mas_wr_store_setup(&wr_mas);
        trace_ma_write(__func__, mas, 0, entry);
        mas_wr_store_entry(&wr_mas);
        MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
        mas_destroy(mas);
}
EXPORT_SYMBOL_GPL(mas_store_prealloc);

/**
 * mas_preallocate() - Preallocate enough nodes for a store operation
 * @mas: The maple state
 * @entry: The entry that will be stored
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated.
 */
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
{
        MA_WR_STATE(wr_mas, mas, entry);
        unsigned char node_size;
        int request = 1;
        int ret;


        if (unlikely(!mas->index && mas->last == ULONG_MAX))
                goto ask_now;

        mas_wr_store_setup(&wr_mas);
        wr_mas.content = mas_start(mas);
        /* Root expand */
        if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
                goto ask_now;

        if (unlikely(!mas_wr_walk(&wr_mas))) {
                /* Spanning store, use worst case for now */
                request = 1 + mas_mt_height(mas) * 3;
                goto ask_now;
        }

        /* At this point, we are at the leaf node that needs to be altered. */
        /* Exact fit, no nodes needed. */
        if (wr_mas.r_min == mas->index && wr_mas.r_max == mas->last)
                return 0;

        mas_wr_end_piv(&wr_mas);
        node_size = mas_wr_new_end(&wr_mas);

        /* Slot store, does not require additional nodes */
        if (node_size == mas->end) {
                /* reuse node */
                if (!mt_in_rcu(mas->tree))
                        return 0;
                /* shifting boundary */
                if (wr_mas.offset_end - mas->offset == 1)
                        return 0;
        }

        if (node_size >= mt_slots[wr_mas.type]) {
                /* Split, worst case for now. */
                request = 1 + mas_mt_height(mas) * 2;
                goto ask_now;
        }

        /* New root needs a single node */
        if (unlikely(mte_is_root(mas->node)))
                goto ask_now;

        /* Potential spanning rebalance collapsing a node, use worst-case */
        if (node_size  - 1 <= mt_min_slots[wr_mas.type])
                request = mas_mt_height(mas) * 2 - 1;

        /* node store, slot store needs one node */
ask_now:
        mas_node_count_gfp(mas, request, gfp);
        mas->mas_flags |= MA_STATE_PREALLOC;
        if (likely(!mas_is_err(mas)))
                return 0;

        mas_set_alloc_req(mas, 0);
        ret = xa_err(mas->node);
        mas_reset(mas);
        mas_destroy(mas);
        mas_reset(mas);
        return ret;
}
EXPORT_SYMBOL_GPL(mas_preallocate);

/*
 * mas_destroy() - destroy a maple state.
 * @mas: The maple state
 *
 * Upon completion, check the left-most node and rebalance against the node to
 * the right if necessary.  Frees any allocated nodes associated with this maple
 * state.
 */
void mas_destroy(struct ma_state *mas)
{
        struct maple_alloc *node;
        unsigned long total;

        /*
         * When using mas_for_each() to insert an expected number of elements,
         * it is possible that the number inserted is less than the expected
         * number.  To fix an invalid final node, a check is performed here to
         * rebalance the previous node with the final node.
         */
        if (mas->mas_flags & MA_STATE_REBALANCE) {
                unsigned char end;

                mas_start(mas);
                mtree_range_walk(mas);
                end = mas->end + 1;
                if (end < mt_min_slot_count(mas->node) - 1)
                        mas_destroy_rebalance(mas, end);

                mas->mas_flags &= ~MA_STATE_REBALANCE;
        }
        mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);

        total = mas_allocated(mas);
        while (total) {
                node = mas->alloc;
                mas->alloc = node->slot[0];
                if (node->node_count > 1) {
                        size_t count = node->node_count - 1;

                        mt_free_bulk(count, (void __rcu **)&node->slot[1]);
                        total -= count;
                }
                mt_free_one(ma_mnode_ptr(node));
                total--;
        }

        mas->alloc = NULL;
}
EXPORT_SYMBOL_GPL(mas_destroy);

/*
 * mas_expected_entries() - Set the expected number of entries that will be inserted.
 * @mas: The maple state
 * @nr_entries: The number of expected entries.
 *
 * This will attempt to pre-allocate enough nodes to store the expected number
 * of entries.  The allocations will occur using the bulk allocator interface
 * for speed.  Please call mas_destroy() on the @mas after inserting the entries
 * to ensure any unused nodes are freed.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated.
 */
int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
{
        int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
        struct maple_enode *enode = mas->node;
        int nr_nodes;
        int ret;

        /*
         * Sometimes it is necessary to duplicate a tree to a new tree, such as
         * forking a process and duplicating the VMAs from one tree to a new
         * tree.  When such a situation arises, it is known that the new tree is
         * not going to be used until the entire tree is populated.  For
         * performance reasons, it is best to use a bulk load with RCU disabled.
         * This allows for optimistic splitting that favours the left and reuse
         * of nodes during the operation.
         */

        /* Optimize splitting for bulk insert in-order */
        mas->mas_flags |= MA_STATE_BULK;

        /*
         * Avoid overflow, assume a gap between each entry and a trailing null.
         * If this is wrong, it just means allocation can happen during
         * insertion of entries.
         */
        nr_nodes = max(nr_entries, nr_entries * 2 + 1);
        if (!mt_is_alloc(mas->tree))
                nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;

        /* Leaves; reduce slots to keep space for expansion */
        nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
        /* Internal nodes */
        nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
        /* Add working room for split (2 nodes) + new parents */
        mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);

        /* Detect if allocations run out */
        mas->mas_flags |= MA_STATE_PREALLOC;

        if (!mas_is_err(mas))
                return 0;

        ret = xa_err(mas->node);
        mas->node = enode;
        mas_destroy(mas);
        return ret;

}
EXPORT_SYMBOL_GPL(mas_expected_entries);

static bool mas_next_setup(struct ma_state *mas, unsigned long max,
                void **entry)
{
        bool was_none = mas_is_none(mas);

        if (unlikely(mas->last >= max)) {
                mas->status = ma_overflow;
                return true;
        }

        switch (mas->status) {
        case ma_active:
                return false;
        case ma_none:
                fallthrough;
        case ma_pause:
                mas->status = ma_start;
                fallthrough;
        case ma_start:
                mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
                break;
        case ma_overflow:
                /* Overflowed before, but the max changed */
                mas->status = ma_active;
                break;
        case ma_underflow:
                /* The user expects the mas to be one before where it is */
                mas->status = ma_active;
                *entry = mas_walk(mas);
                if (*entry)
                        return true;
                break;
        case ma_root:
                break;
        case ma_error:
                return true;
        }

        if (likely(mas_is_active(mas))) /* Fast path */
                return false;

        if (mas_is_ptr(mas)) {
                *entry = NULL;
                if (was_none && mas->index == 0) {
                        mas->index = mas->last = 0;
                        return true;
                }
                mas->index = 1;
                mas->last = ULONG_MAX;
                mas->status = ma_none;
                return true;
        }

        if (mas_is_none(mas))
                return true;

        return false;
}

/**
 * mas_next() - Get the next entry.
 * @mas: The maple state
 * @max: The maximum index to check.
 *
 * Returns the next entry after @mas->index.
 * Must hold rcu_read_lock or the write lock.
 * Can return the zero entry.
 *
 * Return: The next entry or %NULL
 */
void *mas_next(struct ma_state *mas, unsigned long max)
{
        void *entry = NULL;

        if (mas_next_setup(mas, max, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_next_slot */
        return mas_next_slot(mas, max, false);
}
EXPORT_SYMBOL_GPL(mas_next);

/**
 * mas_next_range() - Advance the maple state to the next range
 * @mas: The maple state
 * @max: The maximum index to check.
 *
 * Sets @mas->index and @mas->last to the range.
 * Must hold rcu_read_lock or the write lock.
 * Can return the zero entry.
 *
 * Return: The next entry or %NULL
 */
void *mas_next_range(struct ma_state *mas, unsigned long max)
{
        void *entry = NULL;

        if (mas_next_setup(mas, max, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_next_slot */
        return mas_next_slot(mas, max, true);
}
EXPORT_SYMBOL_GPL(mas_next_range);

/**
 * mt_next() - get the next value in the maple tree
 * @mt: The maple tree
 * @index: The start index
 * @max: The maximum index to check
 *
 * Takes RCU read lock internally to protect the search, which does not
 * protect the returned pointer after dropping RCU read lock.
 * See also: Documentation/core-api/maple_tree.rst
 *
 * Return: The entry higher than @index or %NULL if nothing is found.
 */
void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
{
        void *entry = NULL;
        MA_STATE(mas, mt, index, index);

        rcu_read_lock();
        entry = mas_next(&mas, max);
        rcu_read_unlock();
        return entry;
}
EXPORT_SYMBOL_GPL(mt_next);

static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
{
        if (unlikely(mas->index <= min)) {
                mas->status = ma_underflow;
                return true;
        }

        switch (mas->status) {
        case ma_active:
                return false;
        case ma_start:
                break;
        case ma_none:
                fallthrough;
        case ma_pause:
                mas->status = ma_start;
                break;
        case ma_underflow:
                /* underflowed before but the min changed */
                mas->status = ma_active;
                break;
        case ma_overflow:
                /* User expects mas to be one after where it is */
                mas->status = ma_active;
                *entry = mas_walk(mas);
                if (*entry)
                        return true;
                break;
        case ma_root:
                break;
        case ma_error:
                return true;
        }

        if (mas_is_start(mas))
                mas_walk(mas);

        if (unlikely(mas_is_ptr(mas))) {
                if (!mas->index) {
                        mas->status = ma_none;
                        return true;
                }
                mas->index = mas->last = 0;
                *entry = mas_root(mas);
                return true;
        }

        if (mas_is_none(mas)) {
                if (mas->index) {
                        /* Walked to out-of-range pointer? */
                        mas->index = mas->last = 0;
                        mas->status = ma_root;
                        *entry = mas_root(mas);
                        return true;
                }
                return true;
        }

        return false;
}

/**
 * mas_prev() - Get the previous entry
 * @mas: The maple state
 * @min: The minimum value to check.
 *
 * Must hold rcu_read_lock or the write lock.
 * Will reset mas to ma_start if the status is ma_none.  Will stop on not
 * searchable nodes.
 *
 * Return: the previous value or %NULL.
 */
void *mas_prev(struct ma_state *mas, unsigned long min)
{
        void *entry = NULL;

        if (mas_prev_setup(mas, min, &entry))
                return entry;

        return mas_prev_slot(mas, min, false);
}
EXPORT_SYMBOL_GPL(mas_prev);

/**
 * mas_prev_range() - Advance to the previous range
 * @mas: The maple state
 * @min: The minimum value to check.
 *
 * Sets @mas->index and @mas->last to the range.
 * Must hold rcu_read_lock or the write lock.
 * Will reset mas to ma_start if the node is ma_none.  Will stop on not
 * searchable nodes.
 *
 * Return: the previous value or %NULL.
 */
void *mas_prev_range(struct ma_state *mas, unsigned long min)
{
        void *entry = NULL;

        if (mas_prev_setup(mas, min, &entry))
                return entry;

        return mas_prev_slot(mas, min, true);
}
EXPORT_SYMBOL_GPL(mas_prev_range);

/**
 * mt_prev() - get the previous value in the maple tree
 * @mt: The maple tree
 * @index: The start index
 * @min: The minimum index to check
 *
 * Takes RCU read lock internally to protect the search, which does not
 * protect the returned pointer after dropping RCU read lock.
 * See also: Documentation/core-api/maple_tree.rst
 *
 * Return: The entry before @index or %NULL if nothing is found.
 */
void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
{
        void *entry = NULL;
        MA_STATE(mas, mt, index, index);

        rcu_read_lock();
        entry = mas_prev(&mas, min);
        rcu_read_unlock();
        return entry;
}
EXPORT_SYMBOL_GPL(mt_prev);

/**
 * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
 * @mas: The maple state to pause
 *
 * Some users need to pause a walk and drop the lock they're holding in
 * order to yield to a higher priority thread or carry out an operation
 * on an entry.  Those users should call this function before they drop
 * the lock.  It resets the @mas to be suitable for the next iteration
 * of the loop after the user has reacquired the lock.  If most entries
 * found during a walk require you to call mas_pause(), the mt_for_each()
 * iterator may be more appropriate.
 *
 */
void mas_pause(struct ma_state *mas)
{
        mas->status = ma_pause;
        mas->node = NULL;
}
EXPORT_SYMBOL_GPL(mas_pause);

/**
 * mas_find_setup() - Internal function to set up mas_find*().
 * @mas: The maple state
 * @max: The maximum index
 * @entry: Pointer to the entry
 *
 * Returns: True if entry is the answer, false otherwise.
 */
static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
{
        switch (mas->status) {
        case ma_active:
                if (mas->last < max)
                        return false;
                return true;
        case ma_start:
                break;
        case ma_pause:
                if (unlikely(mas->last >= max))
                        return true;

                mas->index = ++mas->last;
                mas->status = ma_start;
                break;
        case ma_none:
                if (unlikely(mas->last >= max))
                        return true;

                mas->index = mas->last;
                mas->status = ma_start;
                break;
        case ma_underflow:
                /* mas is pointing at entry before unable to go lower */
                if (unlikely(mas->index >= max)) {
                        mas->status = ma_overflow;
                        return true;
                }

                mas->status = ma_active;
                *entry = mas_walk(mas);
                if (*entry)
                        return true;
                break;
        case ma_overflow:
                if (unlikely(mas->last >= max))
                        return true;

                mas->status = ma_active;
                *entry = mas_walk(mas);
                if (*entry)
                        return true;
                break;
        case ma_root:
                break;
        case ma_error:
                return true;
        }

        if (mas_is_start(mas)) {
                /* First run or continue */
                if (mas->index > max)
                        return true;

                *entry = mas_walk(mas);
                if (*entry)
                        return true;

        }

        if (unlikely(mas_is_ptr(mas)))
                goto ptr_out_of_range;

        if (unlikely(mas_is_none(mas)))
                return true;

        if (mas->index == max)
                return true;

        return false;

ptr_out_of_range:
        mas->status = ma_none;
        mas->index = 1;
        mas->last = ULONG_MAX;
        return true;
}

/**
 * mas_find() - On the first call, find the entry at or after mas->index up to
 * %max.  Otherwise, find the entry after mas->index.
 * @mas: The maple state
 * @max: The maximum value to check.
 *
 * Must hold rcu_read_lock or the write lock.
 * If an entry exists, last and index are updated accordingly.
 * May set @mas->status to ma_overflow.
 *
 * Return: The entry or %NULL.
 */
void *mas_find(struct ma_state *mas, unsigned long max)
{
        void *entry = NULL;

        if (mas_find_setup(mas, max, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_next_slot */
        entry = mas_next_slot(mas, max, false);
        /* Ignore overflow */
        mas->status = ma_active;
        return entry;
}
EXPORT_SYMBOL_GPL(mas_find);

/**
 * mas_find_range() - On the first call, find the entry at or after
 * mas->index up to %max.  Otherwise, advance to the next slot mas->index.
 * @mas: The maple state
 * @max: The maximum value to check.
 *
 * Must hold rcu_read_lock or the write lock.
 * If an entry exists, last and index are updated accordingly.
 * May set @mas->status to ma_overflow.
 *
 * Return: The entry or %NULL.
 */
void *mas_find_range(struct ma_state *mas, unsigned long max)
{
        void *entry = NULL;

        if (mas_find_setup(mas, max, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_next_slot */
        return mas_next_slot(mas, max, true);
}
EXPORT_SYMBOL_GPL(mas_find_range);

/**
 * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
 * @mas: The maple state
 * @min: The minimum index
 * @entry: Pointer to the entry
 *
 * Returns: True if entry is the answer, false otherwise.
 */
static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
                void **entry)
{

        switch (mas->status) {
        case ma_active:
                goto active;
        case ma_start:
                break;
        case ma_pause:
                if (unlikely(mas->index <= min)) {
                        mas->status = ma_underflow;
                        return true;
                }
                mas->last = --mas->index;
                mas->status = ma_start;
                break;
        case ma_none:
                if (mas->index <= min)
                        goto none;

                mas->last = mas->index;
                mas->status = ma_start;
                break;
        case ma_overflow: /* user expects the mas to be one after where it is */
                if (unlikely(mas->index <= min)) {
                        mas->status = ma_underflow;
                        return true;
                }

                mas->status = ma_active;
                break;
        case ma_underflow: /* user expects the mas to be one before where it is */
                if (unlikely(mas->index <= min))
                        return true;

                mas->status = ma_active;
                break;
        case ma_root:
                break;
        case ma_error:
                return true;
        }

        if (mas_is_start(mas)) {
                /* First run or continue */
                if (mas->index < min)
                        return true;

                *entry = mas_walk(mas);
                if (*entry)
                        return true;
        }

        if (unlikely(mas_is_ptr(mas)))
                goto none;

        if (unlikely(mas_is_none(mas))) {
                /*
                 * Walked to the location, and there was nothing so the previous
                 * location is 0.
                 */
                mas->last = mas->index = 0;
                mas->status = ma_root;
                *entry = mas_root(mas);
                return true;
        }

active:
        if (mas->index < min)
                return true;

        return false;

none:
        mas->status = ma_none;
        return true;
}

/**
 * mas_find_rev: On the first call, find the first non-null entry at or below
 * mas->index down to %min.  Otherwise find the first non-null entry below
 * mas->index down to %min.
 * @mas: The maple state
 * @min: The minimum value to check.
 *
 * Must hold rcu_read_lock or the write lock.
 * If an entry exists, last and index are updated accordingly.
 * May set @mas->status to ma_underflow.
 *
 * Return: The entry or %NULL.
 */
void *mas_find_rev(struct ma_state *mas, unsigned long min)
{
        void *entry = NULL;

        if (mas_find_rev_setup(mas, min, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_prev_slot */
        return mas_prev_slot(mas, min, false);

}
EXPORT_SYMBOL_GPL(mas_find_rev);

/**
 * mas_find_range_rev: On the first call, find the first non-null entry at or
 * below mas->index down to %min.  Otherwise advance to the previous slot after
 * mas->index down to %min.
 * @mas: The maple state
 * @min: The minimum value to check.
 *
 * Must hold rcu_read_lock or the write lock.
 * If an entry exists, last and index are updated accordingly.
 * May set @mas->status to ma_underflow.
 *
 * Return: The entry or %NULL.
 */
void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
{
        void *entry = NULL;

        if (mas_find_rev_setup(mas, min, &entry))
                return entry;

        /* Retries on dead nodes handled by mas_prev_slot */
        return mas_prev_slot(mas, min, true);
}
EXPORT_SYMBOL_GPL(mas_find_range_rev);

/**
 * mas_erase() - Find the range in which index resides and erase the entire
 * range.
 * @mas: The maple state
 *
 * Must hold the write lock.
 * Searches for @mas->index, sets @mas->index and @mas->last to the range and
 * erases that range.
 *
 * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
 */
void *mas_erase(struct ma_state *mas)
{
        void *entry;
        MA_WR_STATE(wr_mas, mas, NULL);

        if (!mas_is_active(mas) || !mas_is_start(mas))
                mas->status = ma_start;

        /* Retry unnecessary when holding the write lock. */
        entry = mas_state_walk(mas);
        if (!entry)
                return NULL;

write_retry:
        /* Must reset to ensure spanning writes of last slot are detected */
        mas_reset(mas);
        mas_wr_store_setup(&wr_mas);
        mas_wr_store_entry(&wr_mas);
        if (mas_nomem(mas, GFP_KERNEL))
                goto write_retry;

        return entry;
}
EXPORT_SYMBOL_GPL(mas_erase);

/**
 * mas_nomem() - Check if there was an error allocating and do the allocation
 * if necessary If there are allocations, then free them.
 * @mas: The maple state
 * @gfp: The GFP_FLAGS to use for allocations
 * Return: true on allocation, false otherwise.
 */
bool mas_nomem(struct ma_state *mas, gfp_t gfp)
        __must_hold(mas->tree->ma_lock)
{
        if (likely(mas->node != MA_ERROR(-ENOMEM))) {
                mas_destroy(mas);
                return false;
        }

        if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
                mtree_unlock(mas->tree);
                mas_alloc_nodes(mas, gfp);
                mtree_lock(mas->tree);
        } else {
                mas_alloc_nodes(mas, gfp);
        }

        if (!mas_allocated(mas))
                return false;

        mas->status = ma_start;
        return true;
}

void __init maple_tree_init(void)
{
        maple_node_cache = kmem_cache_create("maple_node",
                        sizeof(struct maple_node), sizeof(struct maple_node),
                        SLAB_PANIC, NULL);
}

/**
 * mtree_load() - Load a value stored in a maple tree
 * @mt: The maple tree
 * @index: The index to load
 *
 * Return: the entry or %NULL
 */
void *mtree_load(struct maple_tree *mt, unsigned long index)
{
        MA_STATE(mas, mt, index, index);
        void *entry;

        trace_ma_read(__func__, &mas);
        rcu_read_lock();
retry:
        entry = mas_start(&mas);
        if (unlikely(mas_is_none(&mas)))
                goto unlock;

        if (unlikely(mas_is_ptr(&mas))) {
                if (index)
                        entry = NULL;

                goto unlock;
        }

        entry = mtree_lookup_walk(&mas);
        if (!entry && unlikely(mas_is_start(&mas)))
                goto retry;
unlock:
        rcu_read_unlock();
        if (xa_is_zero(entry))
                return NULL;

        return entry;
}
EXPORT_SYMBOL(mtree_load);

/**
 * mtree_store_range() - Store an entry at a given range.
 * @mt: The maple tree
 * @index: The start of the range
 * @last: The end of the range
 * @entry: The entry to store
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
 * be allocated.
 */
int mtree_store_range(struct maple_tree *mt, unsigned long index,
                unsigned long last, void *entry, gfp_t gfp)
{
        MA_STATE(mas, mt, index, last);
        MA_WR_STATE(wr_mas, &mas, entry);

        trace_ma_write(__func__, &mas, 0, entry);
        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return -EINVAL;

        if (index > last)
                return -EINVAL;

        mtree_lock(mt);
retry:
        mas_wr_store_entry(&wr_mas);
        if (mas_nomem(&mas, gfp))
                goto retry;

        mtree_unlock(mt);
        if (mas_is_err(&mas))
                return xa_err(mas.node);

        return 0;
}
EXPORT_SYMBOL(mtree_store_range);

/**
 * mtree_store() - Store an entry at a given index.
 * @mt: The maple tree
 * @index: The index to store the value
 * @entry: The entry to store
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
 * be allocated.
 */
int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
                 gfp_t gfp)
{
        return mtree_store_range(mt, index, index, entry, gfp);
}
EXPORT_SYMBOL(mtree_store);

/**
 * mtree_insert_range() - Insert an entry at a given range if there is no value.
 * @mt: The maple tree
 * @first: The start of the range
 * @last: The end of the range
 * @entry: The entry to store
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
 * request, -ENOMEM if memory could not be allocated.
 */
int mtree_insert_range(struct maple_tree *mt, unsigned long first,
                unsigned long last, void *entry, gfp_t gfp)
{
        MA_STATE(ms, mt, first, last);

        if (WARN_ON_ONCE(xa_is_advanced(entry)))
                return -EINVAL;

        if (first > last)
                return -EINVAL;

        mtree_lock(mt);
retry:
        mas_insert(&ms, entry);
        if (mas_nomem(&ms, gfp))
                goto retry;

        mtree_unlock(mt);
        if (mas_is_err(&ms))
                return xa_err(ms.node);

        return 0;
}
EXPORT_SYMBOL(mtree_insert_range);

/**
 * mtree_insert() - Insert an entry at a given index if there is no value.
 * @mt: The maple tree
 * @index : The index to store the value
 * @entry: The entry to store
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
 * request, -ENOMEM if memory could not be allocated.
 */
int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
                 gfp_t gfp)
{
        return mtree_insert_range(mt, index, index, entry, gfp);
}
EXPORT_SYMBOL(mtree_insert);

int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long size, unsigned long min,
                unsigned long max, gfp_t gfp)
{
        int ret = 0;

        MA_STATE(mas, mt, 0, 0);
        if (!mt_is_alloc(mt))
                return -EINVAL;

        if (WARN_ON_ONCE(mt_is_reserved(entry)))
                return -EINVAL;

        mtree_lock(mt);
retry:
        ret = mas_empty_area(&mas, min, max, size);
        if (ret)
                goto unlock;

        mas_insert(&mas, entry);
        /*
         * mas_nomem() may release the lock, causing the allocated area
         * to be unavailable, so try to allocate a free area again.
         */
        if (mas_nomem(&mas, gfp))
                goto retry;

        if (mas_is_err(&mas))
                ret = xa_err(mas.node);
        else
                *startp = mas.index;

unlock:
        mtree_unlock(mt);
        return ret;
}
EXPORT_SYMBOL(mtree_alloc_range);

/**
 * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
 * @mt: The maple tree.
 * @startp: Pointer to ID.
 * @range_lo: Lower bound of range to search.
 * @range_hi: Upper bound of range to search.
 * @entry: The entry to store.
 * @next: Pointer to next ID to allocate.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * Finds an empty entry in @mt after @next, stores the new index into
 * the @id pointer, stores the entry at that index, then updates @next.
 *
 * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
 *
 * Context: Any context.  Takes and releases the mt.lock.  May sleep if
 * the @gfp flags permit.
 *
 * Return: 0 if the allocation succeeded without wrapping, 1 if the
 * allocation succeeded after wrapping, -ENOMEM if memory could not be
 * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
 * free entries.
 */
int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long range_lo, unsigned long range_hi,
                unsigned long *next, gfp_t gfp)
{
        int ret;

        MA_STATE(mas, mt, 0, 0);

        if (!mt_is_alloc(mt))
                return -EINVAL;
        if (WARN_ON_ONCE(mt_is_reserved(entry)))
                return -EINVAL;
        mtree_lock(mt);
        ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
                               next, gfp);
        mtree_unlock(mt);
        return ret;
}
EXPORT_SYMBOL(mtree_alloc_cyclic);

int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
                void *entry, unsigned long size, unsigned long min,
                unsigned long max, gfp_t gfp)
{
        int ret = 0;

        MA_STATE(mas, mt, 0, 0);
        if (!mt_is_alloc(mt))
                return -EINVAL;

        if (WARN_ON_ONCE(mt_is_reserved(entry)))
                return -EINVAL;

        mtree_lock(mt);
retry:
        ret = mas_empty_area_rev(&mas, min, max, size);
        if (ret)
                goto unlock;

        mas_insert(&mas, entry);
        /*
         * mas_nomem() may release the lock, causing the allocated area
         * to be unavailable, so try to allocate a free area again.
         */
        if (mas_nomem(&mas, gfp))
                goto retry;

        if (mas_is_err(&mas))
                ret = xa_err(mas.node);
        else
                *startp = mas.index;

unlock:
        mtree_unlock(mt);
        return ret;
}
EXPORT_SYMBOL(mtree_alloc_rrange);

/**
 * mtree_erase() - Find an index and erase the entire range.
 * @mt: The maple tree
 * @index: The index to erase
 *
 * Erasing is the same as a walk to an entry then a store of a NULL to that
 * ENTIRE range.  In fact, it is implemented as such using the advanced API.
 *
 * Return: The entry stored at the @index or %NULL
 */
void *mtree_erase(struct maple_tree *mt, unsigned long index)
{
        void *entry = NULL;

        MA_STATE(mas, mt, index, index);
        trace_ma_op(__func__, &mas);

        mtree_lock(mt);
        entry = mas_erase(&mas);
        mtree_unlock(mt);

        return entry;
}
EXPORT_SYMBOL(mtree_erase);

/*
 * mas_dup_free() - Free an incomplete duplication of a tree.
 * @mas: The maple state of a incomplete tree.
 *
 * The parameter @mas->node passed in indicates that the allocation failed on
 * this node. This function frees all nodes starting from @mas->node in the
 * reverse order of mas_dup_build(). There is no need to hold the source tree
 * lock at this time.
 */
static void mas_dup_free(struct ma_state *mas)
{
        struct maple_node *node;
        enum maple_type type;
        void __rcu **slots;
        unsigned char count, i;

        /* Maybe the first node allocation failed. */
        if (mas_is_none(mas))
                return;

        while (!mte_is_root(mas->node)) {
                mas_ascend(mas);
                if (mas->offset) {
                        mas->offset--;
                        do {
                                mas_descend(mas);
                                mas->offset = mas_data_end(mas);
                        } while (!mte_is_leaf(mas->node));

                        mas_ascend(mas);
                }

                node = mte_to_node(mas->node);
                type = mte_node_type(mas->node);
                slots = ma_slots(node, type);
                count = mas_data_end(mas) + 1;
                for (i = 0; i < count; i++)
                        ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
                mt_free_bulk(count, slots);
        }

        node = mte_to_node(mas->node);
        mt_free_one(node);
}

/*
 * mas_copy_node() - Copy a maple node and replace the parent.
 * @mas: The maple state of source tree.
 * @new_mas: The maple state of new tree.
 * @parent: The parent of the new node.
 *
 * Copy @mas->node to @new_mas->node, set @parent to be the parent of
 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
 */
static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
                struct maple_pnode *parent)
{
        struct maple_node *node = mte_to_node(mas->node);
        struct maple_node *new_node = mte_to_node(new_mas->node);
        unsigned long val;

        /* Copy the node completely. */
        memcpy(new_node, node, sizeof(struct maple_node));
        /* Update the parent node pointer. */
        val = (unsigned long)node->parent & MAPLE_NODE_MASK;
        new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
}

/*
 * mas_dup_alloc() - Allocate child nodes for a maple node.
 * @mas: The maple state of source tree.
 * @new_mas: The maple state of new tree.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * This function allocates child nodes for @new_mas->node during the duplication
 * process. If memory allocation fails, @mas is set to -ENOMEM.
 */
static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
                gfp_t gfp)
{
        struct maple_node *node = mte_to_node(mas->node);
        struct maple_node *new_node = mte_to_node(new_mas->node);
        enum maple_type type;
        unsigned char request, count, i;
        void __rcu **slots;
        void __rcu **new_slots;
        unsigned long val;

        /* Allocate memory for child nodes. */
        type = mte_node_type(mas->node);
        new_slots = ma_slots(new_node, type);
        request = mas_data_end(mas) + 1;
        count = mt_alloc_bulk(gfp, request, (void **)new_slots);
        if (unlikely(count < request)) {
                memset(new_slots, 0, request * sizeof(void *));
                mas_set_err(mas, -ENOMEM);
                return;
        }

        /* Restore node type information in slots. */
        slots = ma_slots(node, type);
        for (i = 0; i < count; i++) {
                val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
                val &= MAPLE_NODE_MASK;
                ((unsigned long *)new_slots)[i] |= val;
        }
}

/*
 * mas_dup_build() - Build a new maple tree from a source tree
 * @mas: The maple state of source tree, need to be in MAS_START state.
 * @new_mas: The maple state of new tree, need to be in MAS_START state.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * This function builds a new tree in DFS preorder. If the memory allocation
 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
 * last node. mas_dup_free() will free the incomplete duplication of a tree.
 *
 * Note that the attributes of the two trees need to be exactly the same, and the
 * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
 */
static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
                gfp_t gfp)
{
        struct maple_node *node;
        struct maple_pnode *parent = NULL;
        struct maple_enode *root;
        enum maple_type type;

        if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
            unlikely(!mtree_empty(new_mas->tree))) {
                mas_set_err(mas, -EINVAL);
                return;
        }

        root = mas_start(mas);
        if (mas_is_ptr(mas) || mas_is_none(mas))
                goto set_new_tree;

        node = mt_alloc_one(gfp);
        if (!node) {
                new_mas->status = ma_none;
                mas_set_err(mas, -ENOMEM);
                return;
        }

        type = mte_node_type(mas->node);
        root = mt_mk_node(node, type);
        new_mas->node = root;
        new_mas->min = 0;
        new_mas->max = ULONG_MAX;
        root = mte_mk_root(root);
        while (1) {
                mas_copy_node(mas, new_mas, parent);
                if (!mte_is_leaf(mas->node)) {
                        /* Only allocate child nodes for non-leaf nodes. */
                        mas_dup_alloc(mas, new_mas, gfp);
                        if (unlikely(mas_is_err(mas)))
                                return;
                } else {
                        /*
                         * This is the last leaf node and duplication is
                         * completed.
                         */
                        if (mas->max == ULONG_MAX)
                                goto done;

                        /* This is not the last leaf node and needs to go up. */
                        do {
                                mas_ascend(mas);
                                mas_ascend(new_mas);
                        } while (mas->offset == mas_data_end(mas));

                        /* Move to the next subtree. */
                        mas->offset++;
                        new_mas->offset++;
                }

                mas_descend(mas);
                parent = ma_parent_ptr(mte_to_node(new_mas->node));
                mas_descend(new_mas);
                mas->offset = 0;
                new_mas->offset = 0;
        }
done:
        /* Specially handle the parent of the root node. */
        mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
set_new_tree:
        /* Make them the same height */
        new_mas->tree->ma_flags = mas->tree->ma_flags;
        rcu_assign_pointer(new_mas->tree->ma_root, root);
}

/**
 * __mt_dup(): Duplicate an entire maple tree
 * @mt: The source maple tree
 * @new: The new maple tree
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
 * new child nodes in non-leaf nodes. The new node is exactly the same as the
 * source node except for all the addresses stored in it. It will be faster than
 * traversing all elements in the source tree and inserting them one by one into
 * the new tree.
 * The user needs to ensure that the attributes of the source tree and the new
 * tree are the same, and the new tree needs to be an empty tree, otherwise
 * -EINVAL will be returned.
 * Note that the user needs to manually lock the source tree and the new tree.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
 * the attributes of the two trees are different or the new tree is not an empty
 * tree.
 */
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
        int ret = 0;
        MA_STATE(mas, mt, 0, 0);
        MA_STATE(new_mas, new, 0, 0);

        mas_dup_build(&mas, &new_mas, gfp);
        if (unlikely(mas_is_err(&mas))) {
                ret = xa_err(mas.node);
                if (ret == -ENOMEM)
                        mas_dup_free(&new_mas);
        }

        return ret;
}
EXPORT_SYMBOL(__mt_dup);

/**
 * mtree_dup(): Duplicate an entire maple tree
 * @mt: The source maple tree
 * @new: The new maple tree
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
 * new child nodes in non-leaf nodes. The new node is exactly the same as the
 * source node except for all the addresses stored in it. It will be faster than
 * traversing all elements in the source tree and inserting them one by one into
 * the new tree.
 * The user needs to ensure that the attributes of the source tree and the new
 * tree are the same, and the new tree needs to be an empty tree, otherwise
 * -EINVAL will be returned.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
 * the attributes of the two trees are different or the new tree is not an empty
 * tree.
 */
int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
        int ret = 0;
        MA_STATE(mas, mt, 0, 0);
        MA_STATE(new_mas, new, 0, 0);

        mas_lock(&new_mas);
        mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
        mas_dup_build(&mas, &new_mas, gfp);
        mas_unlock(&mas);
        if (unlikely(mas_is_err(&mas))) {
                ret = xa_err(mas.node);
                if (ret == -ENOMEM)
                        mas_dup_free(&new_mas);
        }

        mas_unlock(&new_mas);
        return ret;
}
EXPORT_SYMBOL(mtree_dup);

/**
 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
 * @mt: The maple tree
 *
 * Note: Does not handle locking.
 */
void __mt_destroy(struct maple_tree *mt)
{
        void *root = mt_root_locked(mt);

        rcu_assign_pointer(mt->ma_root, NULL);
        if (xa_is_node(root))
                mte_destroy_walk(root, mt);

        mt->ma_flags = mt_attr(mt);
}
EXPORT_SYMBOL_GPL(__mt_destroy);

/**
 * mtree_destroy() - Destroy a maple tree
 * @mt: The maple tree
 *
 * Frees all resources used by the tree.  Handles locking.
 */
void mtree_destroy(struct maple_tree *mt)
{
        mtree_lock(mt);
        __mt_destroy(mt);
        mtree_unlock(mt);
}
EXPORT_SYMBOL(mtree_destroy);

/**
 * mt_find() - Search from the start up until an entry is found.
 * @mt: The maple tree
 * @index: Pointer which contains the start location of the search
 * @max: The maximum value of the search range
 *
 * Takes RCU read lock internally to protect the search, which does not
 * protect the returned pointer after dropping RCU read lock.
 * See also: Documentation/core-api/maple_tree.rst
 *
 * In case that an entry is found @index is updated to point to the next
 * possible entry independent whether the found entry is occupying a
 * single index or a range if indices.
 *
 * Return: The entry at or after the @index or %NULL
 */
void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
{
        MA_STATE(mas, mt, *index, *index);
        void *entry;
#ifdef CONFIG_DEBUG_MAPLE_TREE
        unsigned long copy = *index;
#endif

        trace_ma_read(__func__, &mas);

        if ((*index) > max)
                return NULL;

        rcu_read_lock();
retry:
        entry = mas_state_walk(&mas);
        if (mas_is_start(&mas))
                goto retry;

        if (unlikely(xa_is_zero(entry)))
                entry = NULL;

        if (entry)
                goto unlock;

        while (mas_is_active(&mas) && (mas.last < max)) {
                entry = mas_next_entry(&mas, max);
                if (likely(entry && !xa_is_zero(entry)))
                        break;
        }

        if (unlikely(xa_is_zero(entry)))
                entry = NULL;
unlock:
        rcu_read_unlock();
        if (likely(entry)) {
                *index = mas.last + 1;
#ifdef CONFIG_DEBUG_MAPLE_TREE
                if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
                        pr_err("index not increased! %lx <= %lx\n",
                               *index, copy);
#endif
        }

        return entry;
}
EXPORT_SYMBOL(mt_find);

/**
 * mt_find_after() - Search from the start up until an entry is found.
 * @mt: The maple tree
 * @index: Pointer which contains the start location of the search
 * @max: The maximum value to check
 *
 * Same as mt_find() except that it checks @index for 0 before
 * searching. If @index == 0, the search is aborted. This covers a wrap
 * around of @index to 0 in an iterator loop.
 *
 * Return: The entry at or after the @index or %NULL
 */
void *mt_find_after(struct maple_tree *mt, unsigned long *index,
                    unsigned long max)
{
        if (!(*index))
                return NULL;

        return mt_find(mt, index, max);
}
EXPORT_SYMBOL(mt_find_after);

#ifdef CONFIG_DEBUG_MAPLE_TREE
atomic_t maple_tree_tests_run;
EXPORT_SYMBOL_GPL(maple_tree_tests_run);
atomic_t maple_tree_tests_passed;
EXPORT_SYMBOL_GPL(maple_tree_tests_passed);

#ifndef __KERNEL__
extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
void mt_set_non_kernel(unsigned int val)
{
        kmem_cache_set_non_kernel(maple_node_cache, val);
}

extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
unsigned long mt_get_alloc_size(void)
{
        return kmem_cache_get_alloc(maple_node_cache);
}

extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
void mt_zero_nr_tallocated(void)
{
        kmem_cache_zero_nr_tallocated(maple_node_cache);
}

extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
unsigned int mt_nr_tallocated(void)
{
        return kmem_cache_nr_tallocated(maple_node_cache);
}

extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
unsigned int mt_nr_allocated(void)
{
        return kmem_cache_nr_allocated(maple_node_cache);
}

void mt_cache_shrink(void)
{
}
#else
/*
 * mt_cache_shrink() - For testing, don't use this.
 *
 * Certain testcases can trigger an OOM when combined with other memory
 * debugging configuration options.  This function is used to reduce the
 * possibility of an out of memory even due to kmem_cache objects remaining
 * around for longer than usual.
 */
void mt_cache_shrink(void)
{
        kmem_cache_shrink(maple_node_cache);

}
EXPORT_SYMBOL_GPL(mt_cache_shrink);

#endif /* not defined __KERNEL__ */
/*
 * mas_get_slot() - Get the entry in the maple state node stored at @offset.
 * @mas: The maple state
 * @offset: The offset into the slot array to fetch.
 *
 * Return: The entry stored at @offset.
 */
static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
                unsigned char offset)
{
        return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
                        offset);
}

/* Depth first search, post-order */
static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
{

        struct maple_enode *p, *mn = mas->node;
        unsigned long p_min, p_max;

        mas_next_node(mas, mas_mn(mas), max);
        if (!mas_is_overflow(mas))
                return;

        if (mte_is_root(mn))
                return;

        mas->node = mn;
        mas_ascend(mas);
        do {
                p = mas->node;
                p_min = mas->min;
                p_max = mas->max;
                mas_prev_node(mas, 0);
        } while (!mas_is_underflow(mas));

        mas->node = p;
        mas->max = p_max;
        mas->min = p_min;
}

/* Tree validations */
static void mt_dump_node(const struct maple_tree *mt, void *entry,
                unsigned long min, unsigned long max, unsigned int depth,
                enum mt_dump_format format);
static void mt_dump_range(unsigned long min, unsigned long max,
                          unsigned int depth, enum mt_dump_format format)
{
        static const char spaces[] = "                                ";

        switch(format) {
        case mt_dump_hex:
                if (min == max)
                        pr_info("%.*s%lx: ", depth * 2, spaces, min);
                else
                        pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
                break;
        case mt_dump_dec:
                if (min == max)
                        pr_info("%.*s%lu: ", depth * 2, spaces, min);
                else
                        pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
        }
}

static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
                          unsigned int depth, enum mt_dump_format format)
{
        mt_dump_range(min, max, depth, format);

        if (xa_is_value(entry))
                pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
                                xa_to_value(entry), entry);
        else if (xa_is_zero(entry))
                pr_cont("zero (%ld)\n", xa_to_internal(entry));
        else if (mt_is_reserved(entry))
                pr_cont("UNKNOWN ENTRY (%p)\n", entry);
        else
                pr_cont("%p\n", entry);
}

static void mt_dump_range64(const struct maple_tree *mt, void *entry,
                unsigned long min, unsigned long max, unsigned int depth,
                enum mt_dump_format format)
{
        struct maple_range_64 *node = &mte_to_node(entry)->mr64;
        bool leaf = mte_is_leaf(entry);
        unsigned long first = min;
        int i;

        pr_cont(" contents: ");
        for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
                switch(format) {
                case mt_dump_hex:
                        pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
                        break;
                case mt_dump_dec:
                        pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
                }
        }
        pr_cont("%p\n", node->slot[i]);
        for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
                unsigned long last = max;

                if (i < (MAPLE_RANGE64_SLOTS - 1))
                        last = node->pivot[i];
                else if (!node->slot[i] && max != mt_node_max(entry))
                        break;
                if (last == 0 && i > 0)
                        break;
                if (leaf)
                        mt_dump_entry(mt_slot(mt, node->slot, i),
                                        first, last, depth + 1, format);
                else if (node->slot[i])
                        mt_dump_node(mt, mt_slot(mt, node->slot, i),
                                        first, last, depth + 1, format);

                if (last == max)
                        break;
                if (last > max) {
                        switch(format) {
                        case mt_dump_hex:
                                pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
                                        node, last, max, i);
                                break;
                        case mt_dump_dec:
                                pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
                                        node, last, max, i);
                        }
                }
                first = last + 1;
        }
}

static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
        unsigned long min, unsigned long max, unsigned int depth,
        enum mt_dump_format format)
{
        struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
        bool leaf = mte_is_leaf(entry);
        unsigned long first = min;
        int i;

        pr_cont(" contents: ");
        for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
                switch (format) {
                case mt_dump_hex:
                        pr_cont("%lx ", node->gap[i]);
                        break;
                case mt_dump_dec:
                        pr_cont("%lu ", node->gap[i]);
                }
        }
        pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
        for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
                switch (format) {
                case mt_dump_hex:
                        pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
                        break;
                case mt_dump_dec:
                        pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
                }
        }
        pr_cont("%p\n", node->slot[i]);
        for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
                unsigned long last = max;

                if (i < (MAPLE_ARANGE64_SLOTS - 1))
                        last = node->pivot[i];
                else if (!node->slot[i])
                        break;
                if (last == 0 && i > 0)
                        break;
                if (leaf)
                        mt_dump_entry(mt_slot(mt, node->slot, i),
                                        first, last, depth + 1, format);
                else if (node->slot[i])
                        mt_dump_node(mt, mt_slot(mt, node->slot, i),
                                        first, last, depth + 1, format);

                if (last == max)
                        break;
                if (last > max) {
                        pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
                                        node, last, max, i);
                        break;
                }
                first = last + 1;
        }
}

static void mt_dump_node(const struct maple_tree *mt, void *entry,
                unsigned long min, unsigned long max, unsigned int depth,
                enum mt_dump_format format)
{
        struct maple_node *node = mte_to_node(entry);
        unsigned int type = mte_node_type(entry);
        unsigned int i;

        mt_dump_range(min, max, depth, format);

        pr_cont("node %p depth %d type %d parent %p", node, depth, type,
                        node ? node->parent : NULL);
        switch (type) {
        case maple_dense:
                pr_cont("\n");
                for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
                        if (min + i > max)
                                pr_cont("OUT OF RANGE: ");
                        mt_dump_entry(mt_slot(mt, node->slot, i),
                                        min + i, min + i, depth, format);
                }
                break;
        case maple_leaf_64:
        case maple_range_64:
                mt_dump_range64(mt, entry, min, max, depth, format);
                break;
        case maple_arange_64:
                mt_dump_arange64(mt, entry, min, max, depth, format);
                break;

        default:
                pr_cont(" UNKNOWN TYPE\n");
        }
}

void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
{
        void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));

        pr_info("maple_tree(%p) flags %X, height %u root %p\n",
                 mt, mt->ma_flags, mt_height(mt), entry);
        if (!xa_is_node(entry))
                mt_dump_entry(entry, 0, 0, 0, format);
        else if (entry)
                mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
}
EXPORT_SYMBOL_GPL(mt_dump);

/*
 * Calculate the maximum gap in a node and check if that's what is reported in
 * the parent (unless root).
 */
static void mas_validate_gaps(struct ma_state *mas)
{
        struct maple_enode *mte = mas->node;
        struct maple_node *p_mn, *node = mte_to_node(mte);
        enum maple_type mt = mte_node_type(mas->node);
        unsigned long gap = 0, max_gap = 0;
        unsigned long p_end, p_start = mas->min;
        unsigned char p_slot, offset;
        unsigned long *gaps = NULL;
        unsigned long *pivots = ma_pivots(node, mt);
        unsigned int i;

        if (ma_is_dense(mt)) {
                for (i = 0; i < mt_slot_count(mte); i++) {
                        if (mas_get_slot(mas, i)) {
                                if (gap > max_gap)
                                        max_gap = gap;
                                gap = 0;
                                continue;
                        }
                        gap++;
                }
                goto counted;
        }

        gaps = ma_gaps(node, mt);
        for (i = 0; i < mt_slot_count(mte); i++) {
                p_end = mas_safe_pivot(mas, pivots, i, mt);

                if (!gaps) {
                        if (!mas_get_slot(mas, i))
                                gap = p_end - p_start + 1;
                } else {
                        void *entry = mas_get_slot(mas, i);

                        gap = gaps[i];
                        MT_BUG_ON(mas->tree, !entry);

                        if (gap > p_end - p_start + 1) {
                                pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
                                       mas_mn(mas), i, gap, p_end, p_start,
                                       p_end - p_start + 1);
                                MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
                        }
                }

                if (gap > max_gap)
                        max_gap = gap;

                p_start = p_end + 1;
                if (p_end >= mas->max)
                        break;
        }

counted:
        if (mt == maple_arange_64) {
                MT_BUG_ON(mas->tree, !gaps);
                offset = ma_meta_gap(node);
                if (offset > i) {
                        pr_err("gap offset %p[%u] is invalid\n", node, offset);
                        MT_BUG_ON(mas->tree, 1);
                }

                if (gaps[offset] != max_gap) {
                        pr_err("gap %p[%u] is not the largest gap %lu\n",
                               node, offset, max_gap);
                        MT_BUG_ON(mas->tree, 1);
                }

                for (i++ ; i < mt_slot_count(mte); i++) {
                        if (gaps[i] != 0) {
                                pr_err("gap %p[%u] beyond node limit != 0\n",
                                       node, i);
                                MT_BUG_ON(mas->tree, 1);
                        }
                }
        }

        if (mte_is_root(mte))
                return;

        p_slot = mte_parent_slot(mas->node);
        p_mn = mte_parent(mte);
        MT_BUG_ON(mas->tree, max_gap > mas->max);
        if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
                pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
                mt_dump(mas->tree, mt_dump_hex);
                MT_BUG_ON(mas->tree, 1);
        }
}

static void mas_validate_parent_slot(struct ma_state *mas)
{
        struct maple_node *parent;
        struct maple_enode *node;
        enum maple_type p_type;
        unsigned char p_slot;
        void __rcu **slots;
        int i;

        if (mte_is_root(mas->node))
                return;

        p_slot = mte_parent_slot(mas->node);
        p_type = mas_parent_type(mas, mas->node);
        parent = mte_parent(mas->node);
        slots = ma_slots(parent, p_type);
        MT_BUG_ON(mas->tree, mas_mn(mas) == parent);

        /* Check prev/next parent slot for duplicate node entry */

        for (i = 0; i < mt_slots[p_type]; i++) {
                node = mas_slot(mas, slots, i);
                if (i == p_slot) {
                        if (node != mas->node)
                                pr_err("parent %p[%u] does not have %p\n",
                                        parent, i, mas_mn(mas));
                        MT_BUG_ON(mas->tree, node != mas->node);
                } else if (node == mas->node) {
                        pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
                               mas_mn(mas), parent, i, p_slot);
                        MT_BUG_ON(mas->tree, node == mas->node);
                }
        }
}

static void mas_validate_child_slot(struct ma_state *mas)
{
        enum maple_type type = mte_node_type(mas->node);
        void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
        unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
        struct maple_enode *child;
        unsigned char i;

        if (mte_is_leaf(mas->node))
                return;

        for (i = 0; i < mt_slots[type]; i++) {
                child = mas_slot(mas, slots, i);

                if (!child) {
                        pr_err("Non-leaf node lacks child at %p[%u]\n",
                               mas_mn(mas), i);
                        MT_BUG_ON(mas->tree, 1);
                }

                if (mte_parent_slot(child) != i) {
                        pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
                               mas_mn(mas), i, mte_to_node(child),
                               mte_parent_slot(child));
                        MT_BUG_ON(mas->tree, 1);
                }

                if (mte_parent(child) != mte_to_node(mas->node)) {
                        pr_err("child %p has parent %p not %p\n",
                               mte_to_node(child), mte_parent(child),
                               mte_to_node(mas->node));
                        MT_BUG_ON(mas->tree, 1);
                }

                if (i < mt_pivots[type] && pivots[i] == mas->max)
                        break;
        }
}

/*
 * Validate all pivots are within mas->min and mas->max, check metadata ends
 * where the maximum ends and ensure there is no slots or pivots set outside of
 * the end of the data.
 */
static void mas_validate_limits(struct ma_state *mas)
{
        int i;
        unsigned long prev_piv = 0;
        enum maple_type type = mte_node_type(mas->node);
        void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
        unsigned long *pivots = ma_pivots(mas_mn(mas), type);

        for (i = 0; i < mt_slots[type]; i++) {
                unsigned long piv;

                piv = mas_safe_pivot(mas, pivots, i, type);

                if (!piv && (i != 0)) {
                        pr_err("Missing node limit pivot at %p[%u]",
                               mas_mn(mas), i);
                        MAS_WARN_ON(mas, 1);
                }

                if (prev_piv > piv) {
                        pr_err("%p[%u] piv %lu < prev_piv %lu\n",
                                mas_mn(mas), i, piv, prev_piv);
                        MAS_WARN_ON(mas, piv < prev_piv);
                }

                if (piv < mas->min) {
                        pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
                                piv, mas->min);
                        MAS_WARN_ON(mas, piv < mas->min);
                }
                if (piv > mas->max) {
                        pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
                                piv, mas->max);
                        MAS_WARN_ON(mas, piv > mas->max);
                }
                prev_piv = piv;
                if (piv == mas->max)
                        break;
        }

        if (mas_data_end(mas) != i) {
                pr_err("node%p: data_end %u != the last slot offset %u\n",
                       mas_mn(mas), mas_data_end(mas), i);
                MT_BUG_ON(mas->tree, 1);
        }

        for (i += 1; i < mt_slots[type]; i++) {
                void *entry = mas_slot(mas, slots, i);

                if (entry && (i != mt_slots[type] - 1)) {
                        pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
                               i, entry);
                        MT_BUG_ON(mas->tree, entry != NULL);
                }

                if (i < mt_pivots[type]) {
                        unsigned long piv = pivots[i];

                        if (!piv)
                                continue;

                        pr_err("%p[%u] should not have piv %lu\n",
                               mas_mn(mas), i, piv);
                        MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
                }
        }
}

static void mt_validate_nulls(struct maple_tree *mt)
{
        void *entry, *last = (void *)1;
        unsigned char offset = 0;
        void __rcu **slots;
        MA_STATE(mas, mt, 0, 0);

        mas_start(&mas);
        if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
                return;

        while (!mte_is_leaf(mas.node))
                mas_descend(&mas);

        slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
        do {
                entry = mas_slot(&mas, slots, offset);
                if (!last && !entry) {
                        pr_err("Sequential nulls end at %p[%u]\n",
                                mas_mn(&mas), offset);
                }
                MT_BUG_ON(mt, !last && !entry);
                last = entry;
                if (offset == mas_data_end(&mas)) {
                        mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
                        if (mas_is_overflow(&mas))
                                return;
                        offset = 0;
                        slots = ma_slots(mte_to_node(mas.node),
                                         mte_node_type(mas.node));
                } else {
                        offset++;
                }

        } while (!mas_is_overflow(&mas));
}

/*
 * validate a maple tree by checking:
 * 1. The limits (pivots are within mas->min to mas->max)
 * 2. The gap is correctly set in the parents
 */
void mt_validate(struct maple_tree *mt)
{
        unsigned char end;

        MA_STATE(mas, mt, 0, 0);
        rcu_read_lock();
        mas_start(&mas);
        if (!mas_is_active(&mas))
                goto done;

        while (!mte_is_leaf(mas.node))
                mas_descend(&mas);

        while (!mas_is_overflow(&mas)) {
                MAS_WARN_ON(&mas, mte_dead_node(mas.node));
                end = mas_data_end(&mas);
                if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
                                (mas.max != ULONG_MAX))) {
                        pr_err("Invalid size %u of %p\n", end, mas_mn(&mas));
                }

                mas_validate_parent_slot(&mas);
                mas_validate_limits(&mas);
                mas_validate_child_slot(&mas);
                if (mt_is_alloc(mt))
                        mas_validate_gaps(&mas);
                mas_dfs_postorder(&mas, ULONG_MAX);
        }
        mt_validate_nulls(mt);
done:
        rcu_read_unlock();

}
EXPORT_SYMBOL_GPL(mt_validate);

void mas_dump(const struct ma_state *mas)
{
        pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node);
        switch (mas->status) {
        case ma_active:
                pr_err("(ma_active)");
                break;
        case ma_none:
                pr_err("(ma_none)");
                break;
        case ma_root:
                pr_err("(ma_root)");
                break;
        case ma_start:
                pr_err("(ma_start) ");
                break;
        case ma_pause:
                pr_err("(ma_pause) ");
                break;
        case ma_overflow:
                pr_err("(ma_overflow) ");
                break;
        case ma_underflow:
                pr_err("(ma_underflow) ");
                break;
        case ma_error:
                pr_err("(ma_error) ");
                break;
        }

        pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
               mas->index, mas->last);
        pr_err("     min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n",
               mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
        if (mas->index > mas->last)
                pr_err("Check index & last\n");
}
EXPORT_SYMBOL_GPL(mas_dump);

void mas_wr_dump(const struct ma_wr_state *wr_mas)
{
        pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n",
               wr_mas->node, wr_mas->r_min, wr_mas->r_max);
        pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
               wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
               wr_mas->end_piv);
}
EXPORT_SYMBOL_GPL(mas_wr_dump);

#endif /* CONFIG_DEBUG_MAPLE_TREE */

































   36 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// SPDX-License-Identifier: GPL-2.0-only
/*
 * A generic implementation of binary search for the Linux kernel
 *
 * Copyright (C) 2008-2009 Ksplice, Inc.
 * Author: Tim Abbott <tabbott@ksplice.com>
 */

#include <linux/export.h>
#include <linux/bsearch.h>
#include <linux/kprobes.h>

/*
 * bsearch - binary search an array of elements
 * @key: pointer to item being searched for
 * @base: pointer to first element to search
 * @num: number of elements
 * @size: size of each element
 * @cmp: pointer to comparison function
 *
 * This function does a binary search on the given array.  The
 * contents of the array should already be in ascending sorted order
 * under the provided comparison function.
 *
 * Note that the key need not have the same type as the elements in
 * the array, e.g. key could be a string and the comparison function
 * could compare the string with the struct's name field.  However, if
 * the key and elements in the array are of the same type, you can use
 * the same comparison function for both sort() and bsearch().
 */
void *bsearch(const void *key, const void *base, size_t num, size_t size, cmp_func_t cmp)
{
        return __inline_bsearch(key, base, num, size, cmp);
}
EXPORT_SYMBOL(bsearch);
NOKPROBE_SYMBOL(bsearch);































































































































    1 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2012 Red Hat
 */

#include <linux/module.h>

#include <drm/drm_drv.h>
#include <drm/drm_fbdev_generic.h>
#include <drm/drm_file.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
#include <drm/drm_modeset_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_probe_helper.h>
#include <drm/drm_print.h>

#include "udl_drv.h"

static int udl_usb_suspend(struct usb_interface *interface,
                           pm_message_t message)
{
        struct drm_device *dev = usb_get_intfdata(interface);
        int ret;

        ret = drm_mode_config_helper_suspend(dev);
        if (ret)
                return ret;

        udl_sync_pending_urbs(dev);
        return 0;
}

static int udl_usb_resume(struct usb_interface *interface)
{
        struct drm_device *dev = usb_get_intfdata(interface);

        return drm_mode_config_helper_resume(dev);
}

static int udl_usb_reset_resume(struct usb_interface *interface)
{
        struct drm_device *dev = usb_get_intfdata(interface);
        struct udl_device *udl = to_udl(dev);

        udl_select_std_channel(udl);

        return drm_mode_config_helper_resume(dev);
}

/*
 * FIXME: Dma-buf sharing requires DMA support by the importing device.
 *        This function is a workaround to make USB devices work as well.
 *        See todo.rst for how to fix the issue in the dma-buf framework.
 */
static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev,
                                                          struct dma_buf *dma_buf)
{
        struct udl_device *udl = to_udl(dev);

        if (!udl->dmadev)
                return ERR_PTR(-ENODEV);

        return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev);
}

DEFINE_DRM_GEM_FOPS(udl_driver_fops);

static const struct drm_driver driver = {
        .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET,

        /* GEM hooks */
        .fops = &udl_driver_fops,
        DRM_GEM_SHMEM_DRIVER_OPS,
        .gem_prime_import = udl_driver_gem_prime_import,

        .name = DRIVER_NAME,
        .desc = DRIVER_DESC,
        .date = DRIVER_DATE,
        .major = DRIVER_MAJOR,
        .minor = DRIVER_MINOR,
        .patchlevel = DRIVER_PATCHLEVEL,
};

static struct udl_device *udl_driver_create(struct usb_interface *interface)
{
        struct udl_device *udl;
        int r;

        udl = devm_drm_dev_alloc(&interface->dev, &driver,
                                 struct udl_device, drm);
        if (IS_ERR(udl))
                return udl;

        r = udl_init(udl);
        if (r)
                return ERR_PTR(r);

        usb_set_intfdata(interface, udl);

        return udl;
}

static int udl_usb_probe(struct usb_interface *interface,
                         const struct usb_device_id *id)
{
        int r;
        struct udl_device *udl;

        udl = udl_driver_create(interface);
        if (IS_ERR(udl))
                return PTR_ERR(udl);

        r = drm_dev_register(&udl->drm, 0);
        if (r)
                return r;

        DRM_INFO("Initialized udl on minor %d\n", udl->drm.primary->index);

        drm_fbdev_generic_setup(&udl->drm, 0);

        return 0;
}

static void udl_usb_disconnect(struct usb_interface *interface)
{
        struct drm_device *dev = usb_get_intfdata(interface);

        drm_kms_helper_poll_fini(dev);
        udl_drop_usb(dev);
        drm_dev_unplug(dev);
}

/*
 * There are many DisplayLink-based graphics products, all with unique PIDs.
 * So we match on DisplayLink's VID + Vendor-Defined Interface Class (0xff)
 * We also require a match on SubClass (0x00) and Protocol (0x00),
 * which is compatible with all known USB 2.0 era graphics chips and firmware,
 * but allows DisplayLink to increment those for any future incompatible chips
 */
static const struct usb_device_id id_table[] = {
        {.idVendor = 0x17e9, .bInterfaceClass = 0xff,
         .bInterfaceSubClass = 0x00,
         .bInterfaceProtocol = 0x00,
         .match_flags = USB_DEVICE_ID_MATCH_VENDOR |
                        USB_DEVICE_ID_MATCH_INT_CLASS |
                        USB_DEVICE_ID_MATCH_INT_SUBCLASS |
                        USB_DEVICE_ID_MATCH_INT_PROTOCOL,},
        {},
};
MODULE_DEVICE_TABLE(usb, id_table);

static struct usb_driver udl_driver = {
        .name = "udl",
        .probe = udl_usb_probe,
        .disconnect = udl_usb_disconnect,
        .suspend = udl_usb_suspend,
        .resume = udl_usb_resume,
        .reset_resume = udl_usb_reset_resume,
        .id_table = id_table,
};
module_usb_driver(udl_driver);
MODULE_LICENSE("GPL");









































































































  132 























































































































   73 



   24 

   25 





   74 







































   74 































   72 






















































   72 
   72 







   72 


   72 










   61 



   61 
   61 



   53 
   61 


   61 
























    8 
    8 



    8 





    8 
    8 



    8 





















































   72 


   72 

   72 
   72 
   72 


   72 

   71 
   72 










































   72 



















   72 

   29 













   72 








   70 
    8 
    8 


   71 
   72 
   60 

   11 
   72 
   72 
   53 

   71 



   72 

   71 
   72 
   71 

   72 


   72 
   71 


















   72 




   72 
   70 
































   72 







   19 




   19 
   19 




    8 







    8 


    8 








































  172 







  173 




  174 
































  173 
  166 


































  175 

  174 
  173 
  175 
  167 


   72 
   72 



    8 
    8 









































   65 
   65 


   65 

   65 































    8 















    8 
    8 
    8 











    8 
    8 

    8 


























    8 
    8 
    8 
    8 

    8 












































































































































































































































   61 

   61 













   61 

   61 
































   61 

   61 


















   61 

   61 


   61 





















































































































   61 


















   61 






































   60 









   61 







































































   61 










   61 


   60 




   61 






   61 

   61 


   61 

   61 
























   11 










   12 



   12 











   12 


   12 

   11 































   11 

   12 













   11 


   11 





   12 


   11 




























































   12 
   11 





   12 


   12 

   12 

   12 

   12 

   12 
   11 
   12 

   11 









    1 


    1 










    1 
    1 



    1 


    1 



    1 






    1 


    1 





    1 



    1 




















    1 
    1 
    1 















































































































































































































   19 
   19 

   19 


















































































   73 

   73 


























   74 



















   74 


   74 

   73 

   74 


   74 





















   13 



    8 
   13 




















   26 
   27 
























   27 

   27 

   19 


   19 


   19 





   19 






   27 







































































   12 

   12 
   12 
   11 



















   12 

   12 
   12 








   12 






















   11 










   12 
   12 
   12 















   12 




   11 




   12 
   11 











   12 







































   12 



   12 
   12 


















   12 

   12 
   12 
   11 
   12 
   12 

   12 
   12 



















   11 

   12 
   12 
   12 









   12 

   12 




















   12 




































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
// SPDX-License-Identifier: GPL-2.0-only
/*
 * fs/dcache.c
 *
 * Complete reimplementation
 * (C) 1997 Thomas Schoebel-Theuer,
 * with heavy changes by Linus Torvalds
 */

/*
 * Notes on the allocation strategy:
 *
 * The dcache is a master of the icache - whenever a dcache entry
 * exists, the inode will always exist. "iput()" is done either when
 * the dcache entry is deleted or garbage collected.
 */

#include <linux/ratelimit.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/cache.h>
#include <linux/export.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/memblock.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"

/*
 * Usage:
 * dcache->d_inode->i_lock protects:
 *   - i_dentry, d_u.d_alias, d_inode of aliases
 * dcache_hash_bucket lock protects:
 *   - the dcache hash table
 * s_roots bl list spinlock protects:
 *   - the s_roots list (see __d_drop)
 * dentry->d_sb->s_dentry_lru_lock protects:
 *   - the dcache lru lists and counters
 * d_lock protects:
 *   - d_flags
 *   - d_name
 *   - d_lru
 *   - d_count
 *   - d_unhashed()
 *   - d_parent and d_chilren
 *   - childrens' d_sib and d_parent
 *   - d_u.d_alias, d_inode
 *
 * Ordering:
 * dentry->d_inode->i_lock
 *   dentry->d_lock
 *     dentry->d_sb->s_dentry_lru_lock
 *     dcache_hash_bucket lock
 *     s_roots lock
 *
 * If there is an ancestor relationship:
 * dentry->d_parent->...->d_parent->d_lock
 *   ...
 *     dentry->d_parent->d_lock
 *       dentry->d_lock
 *
 * If no ancestor relationship:
 * arbitrary, since it's serialized on rename_lock
 */
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);

EXPORT_SYMBOL(rename_lock);

static struct kmem_cache *dentry_cache __ro_after_init;

const struct qstr empty_name = QSTR_INIT("", 0);
EXPORT_SYMBOL(empty_name);
const struct qstr slash_name = QSTR_INIT("/", 1);
EXPORT_SYMBOL(slash_name);
const struct qstr dotdot_name = QSTR_INIT("..", 2);
EXPORT_SYMBOL(dotdot_name);

/*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
 * to make this good - I've just made it work.
 *
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */

static unsigned int d_hash_shift __ro_after_init;

static struct hlist_bl_head *dentry_hashtable __ro_after_init;

static inline struct hlist_bl_head *d_hash(unsigned int hash)
{
        return dentry_hashtable + (hash >> d_hash_shift);
}

#define IN_LOOKUP_SHIFT 10
static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];

static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
                                        unsigned int hash)
{
        hash += (unsigned long) parent / L1_CACHE_BYTES;
        return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
}

struct dentry_stat_t {
        long nr_dentry;
        long nr_unused;
        long age_limit;                /* age in seconds */
        long want_pages;        /* pages requested by system */
        long nr_negative;        /* # of unused negative dentries */
        long dummy;                /* Reserved for future use */
};

static DEFINE_PER_CPU(long, nr_dentry);
static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);

#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
/* Statistics gathering. */
static struct dentry_stat_t dentry_stat = {
        .age_limit = 45,
};

/*
 * Here we resort to our own counters instead of using generic per-cpu counters
 * for consistency with what the vfs inode code does. We are expected to harvest
 * better code and performance by having our own specialized counters.
 *
 * Please note that the loop is done over all possible CPUs, not over all online
 * CPUs. The reason for this is that we don't want to play games with CPUs going
 * on and off. If one of them goes off, we will just keep their counters.
 *
 * glommer: See cffbc8a for details, and if you ever intend to change this,
 * please update all vfs counters to match.
 */
static long get_nr_dentry(void)
{
        int i;
        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_dentry, i);
        return sum < 0 ? 0 : sum;
}

static long get_nr_dentry_unused(void)
{
        int i;
        long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_dentry_unused, i);
        return sum < 0 ? 0 : sum;
}

static long get_nr_dentry_negative(void)
{
        int i;
        long sum = 0;

        for_each_possible_cpu(i)
                sum += per_cpu(nr_dentry_negative, i);
        return sum < 0 ? 0 : sum;
}

static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
                          size_t *lenp, loff_t *ppos)
{
        dentry_stat.nr_dentry = get_nr_dentry();
        dentry_stat.nr_unused = get_nr_dentry_unused();
        dentry_stat.nr_negative = get_nr_dentry_negative();
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}

static struct ctl_table fs_dcache_sysctls[] = {
        {
                .procname        = "dentry-state",
                .data                = &dentry_stat,
                .maxlen                = 6*sizeof(long),
                .mode                = 0444,
                .proc_handler        = proc_nr_dentry,
        },
};

static int __init init_fs_dcache_sysctls(void)
{
        register_sysctl_init("fs", fs_dcache_sysctls);
        return 0;
}
fs_initcall(init_fs_dcache_sysctls);
#endif

/*
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
#ifdef CONFIG_DCACHE_WORD_ACCESS

#include <asm/word-at-a-time.h>
/*
 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 * aligned allocation for this particular component. We don't
 * strictly need the load_unaligned_zeropad() safety, but it
 * doesn't hurt either.
 *
 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 * need the careful unaligned handling.
 */
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
        unsigned long a,b,mask;

        for (;;) {
                a = read_word_at_a_time(cs);
                b = load_unaligned_zeropad(ct);
                if (tcount < sizeof(unsigned long))
                        break;
                if (unlikely(a != b))
                        return 1;
                cs += sizeof(unsigned long);
                ct += sizeof(unsigned long);
                tcount -= sizeof(unsigned long);
                if (!tcount)
                        return 0;
        }
        mask = bytemask_from_count(tcount);
        return unlikely(!!((a ^ b) & mask));
}

#else

static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
        do {
                if (*cs != *ct)
                        return 1;
                cs++;
                ct++;
                tcount--;
        } while (tcount);
        return 0;
}

#endif

static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
        /*
         * Be careful about RCU walk racing with rename:
         * use 'READ_ONCE' to fetch the name pointer.
         *
         * NOTE! Even if a rename will mean that the length
         * was not loaded atomically, we don't care. The
         * RCU walk will check the sequence count eventually,
         * and catch it. And we won't overrun the buffer,
         * because we're reading the name pointer atomically,
         * and a dentry name is guaranteed to be properly
         * terminated with a NUL byte.
         *
         * End result: even if 'len' is wrong, we'll exit
         * early because the data cannot match (there can
         * be no NUL in the ct/tcount data)
         */
        const unsigned char *cs = READ_ONCE(dentry->d_name.name);

        return dentry_string_cmp(cs, ct, tcount);
}

struct external_name {
        union {
                atomic_t count;
                struct rcu_head head;
        } u;
        unsigned char name[];
};

static inline struct external_name *external_name(struct dentry *dentry)
{
        return container_of(dentry->d_name.name, struct external_name, name[0]);
}

static void __d_free(struct rcu_head *head)
{
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);

        kmem_cache_free(dentry_cache, dentry); 
}

static void __d_free_external(struct rcu_head *head)
{
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
        kfree(external_name(dentry));
        kmem_cache_free(dentry_cache, dentry);
}

static inline int dname_external(const struct dentry *dentry)
{
        return dentry->d_name.name != dentry->d_iname;
}

void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
{
        spin_lock(&dentry->d_lock);
        name->name = dentry->d_name;
        if (unlikely(dname_external(dentry))) {
                atomic_inc(&external_name(dentry)->u.count);
        } else {
                memcpy(name->inline_name, dentry->d_iname,
                       dentry->d_name.len + 1);
                name->name.name = name->inline_name;
        }
        spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(take_dentry_name_snapshot);

void release_dentry_name_snapshot(struct name_snapshot *name)
{
        if (unlikely(name->name.name != name->inline_name)) {
                struct external_name *p;
                p = container_of(name->name.name, struct external_name, name[0]);
                if (unlikely(atomic_dec_and_test(&p->u.count)))
                        kfree_rcu(p, u.head);
        }
}
EXPORT_SYMBOL(release_dentry_name_snapshot);

static inline void __d_set_inode_and_type(struct dentry *dentry,
                                          struct inode *inode,
                                          unsigned type_flags)
{
        unsigned flags;

        dentry->d_inode = inode;
        flags = READ_ONCE(dentry->d_flags);
        flags &= ~DCACHE_ENTRY_TYPE;
        flags |= type_flags;
        smp_store_release(&dentry->d_flags, flags);
}

static inline void __d_clear_type_and_inode(struct dentry *dentry)
{
        unsigned flags = READ_ONCE(dentry->d_flags);

        flags &= ~DCACHE_ENTRY_TYPE;
        WRITE_ONCE(dentry->d_flags, flags);
        dentry->d_inode = NULL;
        if (dentry->d_flags & DCACHE_LRU_LIST)
                this_cpu_inc(nr_dentry_negative);
}

static void dentry_free(struct dentry *dentry)
{
        WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
        if (unlikely(dname_external(dentry))) {
                struct external_name *p = external_name(dentry);
                if (likely(atomic_dec_and_test(&p->u.count))) {
                        call_rcu(&dentry->d_u.d_rcu, __d_free_external);
                        return;
                }
        }
        /* if dentry was never visible to RCU, immediate free is OK */
        if (dentry->d_flags & DCACHE_NORCU)
                __d_free(&dentry->d_u.d_rcu);
        else
                call_rcu(&dentry->d_u.d_rcu, __d_free);
}

/*
 * Release the dentry's inode, using the filesystem
 * d_iput() operation if defined.
 */
static void dentry_unlink_inode(struct dentry * dentry)
        __releases(dentry->d_lock)
        __releases(dentry->d_inode->i_lock)
{
        struct inode *inode = dentry->d_inode;

        raw_write_seqcount_begin(&dentry->d_seq);
        __d_clear_type_and_inode(dentry);
        hlist_del_init(&dentry->d_u.d_alias);
        raw_write_seqcount_end(&dentry->d_seq);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&inode->i_lock);
        if (!inode->i_nlink)
                fsnotify_inoderemove(inode);
        if (dentry->d_op && dentry->d_op->d_iput)
                dentry->d_op->d_iput(dentry, inode);
        else
                iput(inode);
}

/*
 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
 * is in use - which includes both the "real" per-superblock
 * LRU list _and_ the DCACHE_SHRINK_LIST use.
 *
 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
 * on the shrink list (ie not on the superblock LRU list).
 *
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
 * The per-cpu "nr_dentry_negative" counters are only updated
 * when deleted from or added to the per-superblock LRU list, not
 * from/to the shrink list. That is to avoid an unneeded dec/inc
 * pair when moving from LRU to shrink list in select_collect().
 *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
static void d_lru_add(struct dentry *dentry)
{
        D_FLAG_VERIFY(dentry, 0);
        dentry->d_flags |= DCACHE_LRU_LIST;
        this_cpu_inc(nr_dentry_unused);
        if (d_is_negative(dentry))
                this_cpu_inc(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_add_obj(
                        &dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_lru_del(struct dentry *dentry)
{
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
        if (d_is_negative(dentry))
                this_cpu_dec(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_del_obj(
                        &dentry->d_sb->s_dentry_lru, &dentry->d_lru));
}

static void d_shrink_del(struct dentry *dentry)
{
        D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
        list_del_init(&dentry->d_lru);
        dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
        this_cpu_dec(nr_dentry_unused);
}

static void d_shrink_add(struct dentry *dentry, struct list_head *list)
{
        D_FLAG_VERIFY(dentry, 0);
        list_add(&dentry->d_lru, list);
        dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
        this_cpu_inc(nr_dentry_unused);
}

/*
 * These can only be called under the global LRU lock, ie during the
 * callback for freeing the LRU list. "isolate" removes it from the
 * LRU lists entirely, while shrink_move moves it to the indicated
 * private list.
 */
static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
{
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
        if (d_is_negative(dentry))
                this_cpu_dec(nr_dentry_negative);
        list_lru_isolate(lru, &dentry->d_lru);
}

static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
                              struct list_head *list)
{
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags |= DCACHE_SHRINK_LIST;
        if (d_is_negative(dentry))
                this_cpu_dec(nr_dentry_negative);
        list_lru_isolate_move(lru, &dentry->d_lru, list);
}

static void ___d_drop(struct dentry *dentry)
{
        struct hlist_bl_head *b;
        /*
         * Hashed dentries are normally on the dentry hashtable,
         * with the exception of those newly allocated by
         * d_obtain_root, which are always IS_ROOT:
         */
        if (unlikely(IS_ROOT(dentry)))
                b = &dentry->d_sb->s_roots;
        else
                b = d_hash(dentry->d_name.hash);

        hlist_bl_lock(b);
        __hlist_bl_del(&dentry->d_hash);
        hlist_bl_unlock(b);
}

void __d_drop(struct dentry *dentry)
{
        if (!d_unhashed(dentry)) {
                ___d_drop(dentry);
                dentry->d_hash.pprev = NULL;
                write_seqcount_invalidate(&dentry->d_seq);
        }
}
EXPORT_SYMBOL(__d_drop);

/**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
 *
 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 * be found through a VFS lookup any more. Note that this is different from
 * deleting the dentry - d_delete will try to mark the dentry negative if
 * possible, giving a successful _negative_ lookup, while d_drop will
 * just make the cache lookup fail.
 *
 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 * reason (NFS timeouts or autofs deletes).
 *
 * __d_drop requires dentry->d_lock
 *
 * ___d_drop doesn't mark dentry as "unhashed"
 * (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
 */
void d_drop(struct dentry *dentry)
{
        spin_lock(&dentry->d_lock);
        __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_drop);

static inline void dentry_unlist(struct dentry *dentry)
{
        struct dentry *next;
        /*
         * Inform d_walk() and shrink_dentry_list() that we are no longer
         * attached to the dentry tree
         */
        dentry->d_flags |= DCACHE_DENTRY_KILLED;
        if (unlikely(hlist_unhashed(&dentry->d_sib)))
                return;
        __hlist_del(&dentry->d_sib);
        /*
         * Cursors can move around the list of children.  While we'd been
         * a normal list member, it didn't matter - ->d_sib.next would've
         * been updated.  However, from now on it won't be and for the
         * things like d_walk() it might end up with a nasty surprise.
         * Normally d_walk() doesn't care about cursors moving around -
         * ->d_lock on parent prevents that and since a cursor has no children
         * of its own, we get through it without ever unlocking the parent.
         * There is one exception, though - if we ascend from a child that
         * gets killed as soon as we unlock it, the next sibling is found
         * using the value left in its ->d_sib.next.  And if _that_
         * pointed to a cursor, and cursor got moved (e.g. by lseek())
         * before d_walk() regains parent->d_lock, we'll end up skipping
         * everything the cursor had been moved past.
         *
         * Solution: make sure that the pointer left behind in ->d_sib.next
         * points to something that won't be moving around.  I.e. skip the
         * cursors.
         */
        while (dentry->d_sib.next) {
                next = hlist_entry(dentry->d_sib.next, struct dentry, d_sib);
                if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
                        break;
                dentry->d_sib.next = next->d_sib.next;
        }
}

static struct dentry *__dentry_kill(struct dentry *dentry)
{
        struct dentry *parent = NULL;
        bool can_free = true;

        /*
         * The dentry is now unrecoverably dead to the world.
         */
        lockref_mark_dead(&dentry->d_lockref);

        /*
         * inform the fs via d_prune that this dentry is about to be
         * unhashed and destroyed.
         */
        if (dentry->d_flags & DCACHE_OP_PRUNE)
                dentry->d_op->d_prune(dentry);

        if (dentry->d_flags & DCACHE_LRU_LIST) {
                if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
                        d_lru_del(dentry);
        }
        /* if it was on the hash then remove it */
        __d_drop(dentry);
        if (dentry->d_inode)
                dentry_unlink_inode(dentry);
        else
                spin_unlock(&dentry->d_lock);
        this_cpu_dec(nr_dentry);
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);

        cond_resched();
        /* now that it's negative, ->d_parent is stable */
        if (!IS_ROOT(dentry)) {
                parent = dentry->d_parent;
                spin_lock(&parent->d_lock);
        }
        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
        dentry_unlist(dentry);
        if (dentry->d_flags & DCACHE_SHRINK_LIST)
                can_free = false;
        spin_unlock(&dentry->d_lock);
        if (likely(can_free))
                dentry_free(dentry);
        if (parent && --parent->d_lockref.count) {
                spin_unlock(&parent->d_lock);
                return NULL;
        }
        return parent;
}

/*
 * Lock a dentry for feeding it to __dentry_kill().
 * Called under rcu_read_lock() and dentry->d_lock; the former
 * guarantees that nothing we access will be freed under us.
 * Note that dentry is *not* protected from concurrent dentry_kill(),
 * d_delete(), etc.
 *
 * Return false if dentry is busy.  Otherwise, return true and have
 * that dentry's inode locked.
 */

static bool lock_for_kill(struct dentry *dentry)
{
        struct inode *inode = dentry->d_inode;

        if (unlikely(dentry->d_lockref.count))
                return false;

        if (!inode || likely(spin_trylock(&inode->i_lock)))
                return true;

        do {
                spin_unlock(&dentry->d_lock);
                spin_lock(&inode->i_lock);
                spin_lock(&dentry->d_lock);
                if (likely(inode == dentry->d_inode))
                        break;
                spin_unlock(&inode->i_lock);
                inode = dentry->d_inode;
        } while (inode);
        if (likely(!dentry->d_lockref.count))
                return true;
        if (inode)
                spin_unlock(&inode->i_lock);
        return false;
}

/*
 * Decide if dentry is worth retaining.  Usually this is called with dentry
 * locked; if not locked, we are more limited and might not be able to tell
 * without a lock.  False in this case means "punt to locked path and recheck".
 *
 * In case we aren't locked, these predicates are not "stable". However, it is
 * sufficient that at some point after we dropped the reference the dentry was
 * hashed and the flags had the proper value. Other dentry users may have
 * re-gotten a reference to the dentry and change that, but our work is done -
 * we can leave the dentry around with a zero refcount.
 */
static inline bool retain_dentry(struct dentry *dentry, bool locked)
{
        unsigned int d_flags;

        smp_rmb();
        d_flags = READ_ONCE(dentry->d_flags);

        // Unreachable? Nobody would be able to look it up, no point retaining
        if (unlikely(d_unhashed(dentry)))
                return false;

        // Same if it's disconnected
        if (unlikely(d_flags & DCACHE_DISCONNECTED))
                return false;

        // ->d_delete() might tell us not to bother, but that requires
        // ->d_lock; can't decide without it
        if (unlikely(d_flags & DCACHE_OP_DELETE)) {
                if (!locked || dentry->d_op->d_delete(dentry))
                        return false;
        }

        // Explicitly told not to bother
        if (unlikely(d_flags & DCACHE_DONTCACHE))
                return false;

        // At this point it looks like we ought to keep it.  We also might
        // need to do something - put it on LRU if it wasn't there already
        // and mark it referenced if it was on LRU, but not marked yet.
        // Unfortunately, both actions require ->d_lock, so in lockless
        // case we'd have to punt rather than doing those.
        if (unlikely(!(d_flags & DCACHE_LRU_LIST))) {
                if (!locked)
                        return false;
                d_lru_add(dentry);
        } else if (unlikely(!(d_flags & DCACHE_REFERENCED))) {
                if (!locked)
                        return false;
                dentry->d_flags |= DCACHE_REFERENCED;
        }
        return true;
}

void d_mark_dontcache(struct inode *inode)
{
        struct dentry *de;

        spin_lock(&inode->i_lock);
        hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) {
                spin_lock(&de->d_lock);
                de->d_flags |= DCACHE_DONTCACHE;
                spin_unlock(&de->d_lock);
        }
        inode->i_state |= I_DONTCACHE;
        spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_mark_dontcache);

/*
 * Try to do a lockless dput(), and return whether that was successful.
 *
 * If unsuccessful, we return false, having already taken the dentry lock.
 * In that case refcount is guaranteed to be zero and we have already
 * decided that it's not worth keeping around.
 *
 * The caller needs to hold the RCU read lock, so that the dentry is
 * guaranteed to stay around even if the refcount goes down to zero!
 */
static inline bool fast_dput(struct dentry *dentry)
{
        int ret;

        /*
         * try to decrement the lockref optimistically.
         */
        ret = lockref_put_return(&dentry->d_lockref);

        /*
         * If the lockref_put_return() failed due to the lock being held
         * by somebody else, the fast path has failed. We will need to
         * get the lock, and then check the count again.
         */
        if (unlikely(ret < 0)) {
                spin_lock(&dentry->d_lock);
                if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) {
                        spin_unlock(&dentry->d_lock);
                        return true;
                }
                dentry->d_lockref.count--;
                goto locked;
        }

        /*
         * If we weren't the last ref, we're done.
         */
        if (ret)
                return true;

        /*
         * Can we decide that decrement of refcount is all we needed without
         * taking the lock?  There's a very common case when it's all we need -
         * dentry looks like it ought to be retained and there's nothing else
         * to do.
         */
        if (retain_dentry(dentry, false))
                return true;

        /*
         * Either not worth retaining or we can't tell without the lock.
         * Get the lock, then.  We've already decremented the refcount to 0,
         * but we'll need to re-check the situation after getting the lock.
         */
        spin_lock(&dentry->d_lock);

        /*
         * Did somebody else grab a reference to it in the meantime, and
         * we're no longer the last user after all? Alternatively, somebody
         * else could have killed it and marked it dead. Either way, we
         * don't need to do anything else.
         */
locked:
        if (dentry->d_lockref.count || retain_dentry(dentry, true)) {
                spin_unlock(&dentry->d_lock);
                return true;
        }
        return false;
}


/* 
 * This is dput
 *
 * This is complicated by the fact that we do not want to put
 * dentries that are no longer on any hash chain on the unused
 * list: we'd much rather just get rid of them immediately.
 *
 * However, that implies that we have to traverse the dentry
 * tree upwards to the parents which might _also_ now be
 * scheduled for deletion (it may have been only waiting for
 * its last child to go away).
 *
 * This tail recursion is done by hand as we don't want to depend
 * on the compiler to always get this right (gcc generally doesn't).
 * Real recursion would eat up our stack space.
 */

/*
 * dput - release a dentry
 * @dentry: dentry to release 
 *
 * Release a dentry. This will drop the usage count and if appropriate
 * call the dentry unlink method as well as removing it from the queues and
 * releasing its resources. If the parent dentries were scheduled for release
 * they too may now get deleted.
 */
void dput(struct dentry *dentry)
{
        if (!dentry)
                return;
        might_sleep();
        rcu_read_lock();
        if (likely(fast_dput(dentry))) {
                rcu_read_unlock();
                return;
        }
        while (lock_for_kill(dentry)) {
                rcu_read_unlock();
                dentry = __dentry_kill(dentry);
                if (!dentry)
                        return;
                if (retain_dentry(dentry, true)) {
                        spin_unlock(&dentry->d_lock);
                        return;
                }
                rcu_read_lock();
        }
        rcu_read_unlock();
        spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(dput);

static void to_shrink_list(struct dentry *dentry, struct list_head *list)
__must_hold(&dentry->d_lock)
{
        if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
                if (dentry->d_flags & DCACHE_LRU_LIST)
                        d_lru_del(dentry);
                d_shrink_add(dentry, list);
        }
}

void dput_to_list(struct dentry *dentry, struct list_head *list)
{
        rcu_read_lock();
        if (likely(fast_dput(dentry))) {
                rcu_read_unlock();
                return;
        }
        rcu_read_unlock();
        to_shrink_list(dentry, list);
        spin_unlock(&dentry->d_lock);
}

struct dentry *dget_parent(struct dentry *dentry)
{
        int gotref;
        struct dentry *ret;
        unsigned seq;

        /*
         * Do optimistic parent lookup without any
         * locking.
         */
        rcu_read_lock();
        seq = raw_seqcount_begin(&dentry->d_seq);
        ret = READ_ONCE(dentry->d_parent);
        gotref = lockref_get_not_zero(&ret->d_lockref);
        rcu_read_unlock();
        if (likely(gotref)) {
                if (!read_seqcount_retry(&dentry->d_seq, seq))
                        return ret;
                dput(ret);
        }

repeat:
        /*
         * Don't need rcu_dereference because we re-check it was correct under
         * the lock.
         */
        rcu_read_lock();
        ret = dentry->d_parent;
        spin_lock(&ret->d_lock);
        if (unlikely(ret != dentry->d_parent)) {
                spin_unlock(&ret->d_lock);
                rcu_read_unlock();
                goto repeat;
        }
        rcu_read_unlock();
        BUG_ON(!ret->d_lockref.count);
        ret->d_lockref.count++;
        spin_unlock(&ret->d_lock);
        return ret;
}
EXPORT_SYMBOL(dget_parent);

static struct dentry * __d_find_any_alias(struct inode *inode)
{
        struct dentry *alias;

        if (hlist_empty(&inode->i_dentry))
                return NULL;
        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
        lockref_get(&alias->d_lockref);
        return alias;
}

/**
 * d_find_any_alias - find any alias for a given inode
 * @inode: inode to find an alias for
 *
 * If any aliases exist for the given inode, take and return a
 * reference for one of them.  If no aliases exist, return %NULL.
 */
struct dentry *d_find_any_alias(struct inode *inode)
{
        struct dentry *de;

        spin_lock(&inode->i_lock);
        de = __d_find_any_alias(inode);
        spin_unlock(&inode->i_lock);
        return de;
}
EXPORT_SYMBOL(d_find_any_alias);

static struct dentry *__d_find_alias(struct inode *inode)
{
        struct dentry *alias;

        if (S_ISDIR(inode->i_mode))
                return __d_find_any_alias(inode);

        hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
                spin_lock(&alias->d_lock);
                 if (!d_unhashed(alias)) {
                        dget_dlock(alias);
                        spin_unlock(&alias->d_lock);
                        return alias;
                }
                spin_unlock(&alias->d_lock);
        }
        return NULL;
}

/**
 * d_find_alias - grab a hashed alias of inode
 * @inode: inode in question
 *
 * If inode has a hashed alias, or is a directory and has any alias,
 * acquire the reference to alias and return it. Otherwise return NULL.
 * Notice that if inode is a directory there can be only one alias and
 * it can be unhashed only if it has no children, or if it is the root
 * of a filesystem, or if the directory was renamed and d_revalidate
 * was the first vfs operation to notice.
 *
 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 * any other hashed alias over that one.
 */
struct dentry *d_find_alias(struct inode *inode)
{
        struct dentry *de = NULL;

        if (!hlist_empty(&inode->i_dentry)) {
                spin_lock(&inode->i_lock);
                de = __d_find_alias(inode);
                spin_unlock(&inode->i_lock);
        }
        return de;
}
EXPORT_SYMBOL(d_find_alias);

/*
 *  Caller MUST be holding rcu_read_lock() and be guaranteed
 *  that inode won't get freed until rcu_read_unlock().
 */
struct dentry *d_find_alias_rcu(struct inode *inode)
{
        struct hlist_head *l = &inode->i_dentry;
        struct dentry *de = NULL;

        spin_lock(&inode->i_lock);
        // ->i_dentry and ->i_rcu are colocated, but the latter won't be
        // used without having I_FREEING set, which means no aliases left
        if (likely(!(inode->i_state & I_FREEING) && !hlist_empty(l))) {
                if (S_ISDIR(inode->i_mode)) {
                        de = hlist_entry(l->first, struct dentry, d_u.d_alias);
                } else {
                        hlist_for_each_entry(de, l, d_u.d_alias)
                                if (!d_unhashed(de))
                                        break;
                }
        }
        spin_unlock(&inode->i_lock);
        return de;
}

/*
 *        Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
 */
void d_prune_aliases(struct inode *inode)
{
        LIST_HEAD(dispose);
        struct dentry *dentry;

        spin_lock(&inode->i_lock);
        hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
                spin_lock(&dentry->d_lock);
                if (!dentry->d_lockref.count)
                        to_shrink_list(dentry, &dispose);
                spin_unlock(&dentry->d_lock);
        }
        spin_unlock(&inode->i_lock);
        shrink_dentry_list(&dispose);
}
EXPORT_SYMBOL(d_prune_aliases);

static inline void shrink_kill(struct dentry *victim)
{
        do {
                rcu_read_unlock();
                victim = __dentry_kill(victim);
                rcu_read_lock();
        } while (victim && lock_for_kill(victim));
        rcu_read_unlock();
        if (victim)
                spin_unlock(&victim->d_lock);
}

void shrink_dentry_list(struct list_head *list)
{
        while (!list_empty(list)) {
                struct dentry *dentry;

                dentry = list_entry(list->prev, struct dentry, d_lru);
                spin_lock(&dentry->d_lock);
                rcu_read_lock();
                if (!lock_for_kill(dentry)) {
                        bool can_free;
                        rcu_read_unlock();
                        d_shrink_del(dentry);
                        can_free = dentry->d_flags & DCACHE_DENTRY_KILLED;
                        spin_unlock(&dentry->d_lock);
                        if (can_free)
                                dentry_free(dentry);
                        continue;
                }
                d_shrink_del(dentry);
                shrink_kill(dentry);
        }
}

static enum lru_status dentry_lru_isolate(struct list_head *item,
                struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
{
        struct list_head *freeable = arg;
        struct dentry        *dentry = container_of(item, struct dentry, d_lru);


        /*
         * we are inverting the lru lock/dentry->d_lock here,
         * so use a trylock. If we fail to get the lock, just skip
         * it
         */
        if (!spin_trylock(&dentry->d_lock))
                return LRU_SKIP;

        /*
         * Referenced dentries are still in use. If they have active
         * counts, just remove them from the LRU. Otherwise give them
         * another pass through the LRU.
         */
        if (dentry->d_lockref.count) {
                d_lru_isolate(lru, dentry);
                spin_unlock(&dentry->d_lock);
                return LRU_REMOVED;
        }

        if (dentry->d_flags & DCACHE_REFERENCED) {
                dentry->d_flags &= ~DCACHE_REFERENCED;
                spin_unlock(&dentry->d_lock);

                /*
                 * The list move itself will be made by the common LRU code. At
                 * this point, we've dropped the dentry->d_lock but keep the
                 * lru lock. This is safe to do, since every list movement is
                 * protected by the lru lock even if both locks are held.
                 *
                 * This is guaranteed by the fact that all LRU management
                 * functions are intermediated by the LRU API calls like
                 * list_lru_add_obj and list_lru_del_obj. List movement in this file
                 * only ever occur through this functions or through callbacks
                 * like this one, that are called from the LRU API.
                 *
                 * The only exceptions to this are functions like
                 * shrink_dentry_list, and code that first checks for the
                 * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
                 * operating only with stack provided lists after they are
                 * properly isolated from the main list.  It is thus, always a
                 * local access.
                 */
                return LRU_ROTATE;
        }

        d_lru_shrink_move(lru, dentry, freeable);
        spin_unlock(&dentry->d_lock);

        return LRU_REMOVED;
}

/**
 * prune_dcache_sb - shrink the dcache
 * @sb: superblock
 * @sc: shrink control, passed to list_lru_shrink_walk()
 *
 * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
 * is done when we need more memory and called from the superblock shrinker
 * function.
 *
 * This function may fail to free any resources if all the dentries are in
 * use.
 */
long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
{
        LIST_HEAD(dispose);
        long freed;

        freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
                                     dentry_lru_isolate, &dispose);
        shrink_dentry_list(&dispose);
        return freed;
}

static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
                struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
{
        struct list_head *freeable = arg;
        struct dentry        *dentry = container_of(item, struct dentry, d_lru);

        /*
         * we are inverting the lru lock/dentry->d_lock here,
         * so use a trylock. If we fail to get the lock, just skip
         * it
         */
        if (!spin_trylock(&dentry->d_lock))
                return LRU_SKIP;

        d_lru_shrink_move(lru, dentry, freeable);
        spin_unlock(&dentry->d_lock);

        return LRU_REMOVED;
}


/**
 * shrink_dcache_sb - shrink dcache for a superblock
 * @sb: superblock
 *
 * Shrink the dcache for the specified super block. This is used to free
 * the dcache before unmounting a file system.
 */
void shrink_dcache_sb(struct super_block *sb)
{
        do {
                LIST_HEAD(dispose);

                list_lru_walk(&sb->s_dentry_lru,
                        dentry_lru_isolate_shrink, &dispose, 1024);
                shrink_dentry_list(&dispose);
        } while (list_lru_count(&sb->s_dentry_lru) > 0);
}
EXPORT_SYMBOL(shrink_dcache_sb);

/**
 * enum d_walk_ret - action to talke during tree walk
 * @D_WALK_CONTINUE:        contrinue walk
 * @D_WALK_QUIT:        quit walk
 * @D_WALK_NORETRY:        quit when retry is needed
 * @D_WALK_SKIP:        skip this dentry and its children
 */
enum d_walk_ret {
        D_WALK_CONTINUE,
        D_WALK_QUIT,
        D_WALK_NORETRY,
        D_WALK_SKIP,
};

/**
 * d_walk - walk the dentry tree
 * @parent:        start of walk
 * @data:        data passed to @enter() and @finish()
 * @enter:        callback when first entering the dentry
 *
 * The @enter() callbacks are called with d_lock held.
 */
static void d_walk(struct dentry *parent, void *data,
                   enum d_walk_ret (*enter)(void *, struct dentry *))
{
        struct dentry *this_parent, *dentry;
        unsigned seq = 0;
        enum d_walk_ret ret;
        bool retry = true;

again:
        read_seqbegin_or_lock(&rename_lock, &seq);
        this_parent = parent;
        spin_lock(&this_parent->d_lock);

        ret = enter(data, this_parent);
        switch (ret) {
        case D_WALK_CONTINUE:
                break;
        case D_WALK_QUIT:
        case D_WALK_SKIP:
                goto out_unlock;
        case D_WALK_NORETRY:
                retry = false;
                break;
        }
repeat:
        dentry = d_first_child(this_parent);
resume:
        hlist_for_each_entry_from(dentry, d_sib) {
                if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
                        continue;

                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);

                ret = enter(data, dentry);
                switch (ret) {
                case D_WALK_CONTINUE:
                        break;
                case D_WALK_QUIT:
                        spin_unlock(&dentry->d_lock);
                        goto out_unlock;
                case D_WALK_NORETRY:
                        retry = false;
                        break;
                case D_WALK_SKIP:
                        spin_unlock(&dentry->d_lock);
                        continue;
                }

                if (!hlist_empty(&dentry->d_children)) {
                        spin_unlock(&this_parent->d_lock);
                        spin_release(&dentry->d_lock.dep_map, _RET_IP_);
                        this_parent = dentry;
                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
                        goto repeat;
                }
                spin_unlock(&dentry->d_lock);
        }
        /*
         * All done at this level ... ascend and resume the search.
         */
        rcu_read_lock();
ascend:
        if (this_parent != parent) {
                dentry = this_parent;
                this_parent = dentry->d_parent;

                spin_unlock(&dentry->d_lock);
                spin_lock(&this_parent->d_lock);

                /* might go back up the wrong parent if we have had a rename. */
                if (need_seqretry(&rename_lock, seq))
                        goto rename_retry;
                /* go into the first sibling still alive */
                hlist_for_each_entry_continue(dentry, d_sib) {
                        if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) {
                                rcu_read_unlock();
                                goto resume;
                        }
                }
                goto ascend;
        }
        if (need_seqretry(&rename_lock, seq))
                goto rename_retry;
        rcu_read_unlock();

out_unlock:
        spin_unlock(&this_parent->d_lock);
        done_seqretry(&rename_lock, seq);
        return;

rename_retry:
        spin_unlock(&this_parent->d_lock);
        rcu_read_unlock();
        BUG_ON(seq & 1);
        if (!retry)
                return;
        seq = 1;
        goto again;
}

struct check_mount {
        struct vfsmount *mnt;
        unsigned int mounted;
};

static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
{
        struct check_mount *info = data;
        struct path path = { .mnt = info->mnt, .dentry = dentry };

        if (likely(!d_mountpoint(dentry)))
                return D_WALK_CONTINUE;
        if (__path_is_mountpoint(&path)) {
                info->mounted = 1;
                return D_WALK_QUIT;
        }
        return D_WALK_CONTINUE;
}

/**
 * path_has_submounts - check for mounts over a dentry in the
 *                      current namespace.
 * @parent: path to check.
 *
 * Return true if the parent or its subdirectories contain
 * a mount point in the current namespace.
 */
int path_has_submounts(const struct path *parent)
{
        struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };

        read_seqlock_excl(&mount_lock);
        d_walk(parent->dentry, &data, path_check_mount);
        read_sequnlock_excl(&mount_lock);

        return data.mounted;
}
EXPORT_SYMBOL(path_has_submounts);

/*
 * Called by mount code to set a mountpoint and check if the mountpoint is
 * reachable (e.g. NFS can unhash a directory dentry and then the complete
 * subtree can become unreachable).
 *
 * Only one of d_invalidate() and d_set_mounted() must succeed.  For
 * this reason take rename_lock and d_lock on dentry and ancestors.
 */
int d_set_mounted(struct dentry *dentry)
{
        struct dentry *p;
        int ret = -ENOENT;
        write_seqlock(&rename_lock);
        for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
                /* Need exclusion wrt. d_invalidate() */
                spin_lock(&p->d_lock);
                if (unlikely(d_unhashed(p))) {
                        spin_unlock(&p->d_lock);
                        goto out;
                }
                spin_unlock(&p->d_lock);
        }
        spin_lock(&dentry->d_lock);
        if (!d_unlinked(dentry)) {
                ret = -EBUSY;
                if (!d_mountpoint(dentry)) {
                        dentry->d_flags |= DCACHE_MOUNTED;
                        ret = 0;
                }
        }
         spin_unlock(&dentry->d_lock);
out:
        write_sequnlock(&rename_lock);
        return ret;
}

/*
 * Search the dentry child list of the specified parent,
 * and move any unused dentries to the end of the unused
 * list for prune_dcache(). We descend to the next level
 * whenever the d_children list is non-empty and continue
 * searching.
 *
 * It returns zero iff there are no unused children,
 * otherwise  it returns the number of children moved to
 * the end of the unused list. This may not be the total
 * number of unused children, because select_parent can
 * drop the lock and return early due to latency
 * constraints.
 */

struct select_data {
        struct dentry *start;
        union {
                long found;
                struct dentry *victim;
        };
        struct list_head dispose;
};

static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
{
        struct select_data *data = _data;
        enum d_walk_ret ret = D_WALK_CONTINUE;

        if (data->start == dentry)
                goto out;

        if (dentry->d_flags & DCACHE_SHRINK_LIST) {
                data->found++;
        } else if (!dentry->d_lockref.count) {
                to_shrink_list(dentry, &data->dispose);
                data->found++;
        } else if (dentry->d_lockref.count < 0) {
                data->found++;
        }
        /*
         * We can return to the caller if we have found some (this
         * ensures forward progress). We'll be coming back to find
         * the rest.
         */
        if (!list_empty(&data->dispose))
                ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
out:
        return ret;
}

static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
{
        struct select_data *data = _data;
        enum d_walk_ret ret = D_WALK_CONTINUE;

        if (data->start == dentry)
                goto out;

        if (!dentry->d_lockref.count) {
                if (dentry->d_flags & DCACHE_SHRINK_LIST) {
                        rcu_read_lock();
                        data->victim = dentry;
                        return D_WALK_QUIT;
                }
                to_shrink_list(dentry, &data->dispose);
        }
        /*
         * We can return to the caller if we have found some (this
         * ensures forward progress). We'll be coming back to find
         * the rest.
         */
        if (!list_empty(&data->dispose))
                ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
out:
        return ret;
}

/**
 * shrink_dcache_parent - prune dcache
 * @parent: parent of entries to prune
 *
 * Prune the dcache to remove unused children of the parent dentry.
 */
void shrink_dcache_parent(struct dentry *parent)
{
        for (;;) {
                struct select_data data = {.start = parent};

                INIT_LIST_HEAD(&data.dispose);
                d_walk(parent, &data, select_collect);

                if (!list_empty(&data.dispose)) {
                        shrink_dentry_list(&data.dispose);
                        continue;
                }

                cond_resched();
                if (!data.found)
                        break;
                data.victim = NULL;
                d_walk(parent, &data, select_collect2);
                if (data.victim) {
                        spin_lock(&data.victim->d_lock);
                        if (!lock_for_kill(data.victim)) {
                                spin_unlock(&data.victim->d_lock);
                                rcu_read_unlock();
                        } else {
                                shrink_kill(data.victim);
                        }
                }
                if (!list_empty(&data.dispose))
                        shrink_dentry_list(&data.dispose);
        }
}
EXPORT_SYMBOL(shrink_dcache_parent);

static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
{
        /* it has busy descendents; complain about those instead */
        if (!hlist_empty(&dentry->d_children))
                return D_WALK_CONTINUE;

        /* root with refcount 1 is fine */
        if (dentry == _data && dentry->d_lockref.count == 1)
                return D_WALK_CONTINUE;

        WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
                        " still in use (%d) [unmount of %s %s]\n",
                       dentry,
                       dentry->d_inode ?
                       dentry->d_inode->i_ino : 0UL,
                       dentry,
                       dentry->d_lockref.count,
                       dentry->d_sb->s_type->name,
                       dentry->d_sb->s_id);
        return D_WALK_CONTINUE;
}

static void do_one_tree(struct dentry *dentry)
{
        shrink_dcache_parent(dentry);
        d_walk(dentry, dentry, umount_check);
        d_drop(dentry);
        dput(dentry);
}

/*
 * destroy the dentries attached to a superblock on unmounting
 */
void shrink_dcache_for_umount(struct super_block *sb)
{
        struct dentry *dentry;

        WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");

        dentry = sb->s_root;
        sb->s_root = NULL;
        do_one_tree(dentry);

        while (!hlist_bl_empty(&sb->s_roots)) {
                dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
                do_one_tree(dentry);
        }
}

static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
{
        struct dentry **victim = _data;
        if (d_mountpoint(dentry)) {
                *victim = dget_dlock(dentry);
                return D_WALK_QUIT;
        }
        return D_WALK_CONTINUE;
}

/**
 * d_invalidate - detach submounts, prune dcache, and drop
 * @dentry: dentry to invalidate (aka detach, prune and drop)
 */
void d_invalidate(struct dentry *dentry)
{
        bool had_submounts = false;
        spin_lock(&dentry->d_lock);
        if (d_unhashed(dentry)) {
                spin_unlock(&dentry->d_lock);
                return;
        }
        __d_drop(dentry);
        spin_unlock(&dentry->d_lock);

        /* Negative dentries can be dropped without further checks */
        if (!dentry->d_inode)
                return;

        shrink_dcache_parent(dentry);
        for (;;) {
                struct dentry *victim = NULL;
                d_walk(dentry, &victim, find_submount);
                if (!victim) {
                        if (had_submounts)
                                shrink_dcache_parent(dentry);
                        return;
                }
                had_submounts = true;
                detach_mounts(victim);
                dput(victim);
        }
}
EXPORT_SYMBOL(d_invalidate);

/**
 * __d_alloc        -        allocate a dcache entry
 * @sb: filesystem it will belong to
 * @name: qstr of the name
 *
 * Allocates a dentry. It returns %NULL if there is insufficient memory
 * available. On a success the dentry is returned. The name passed in is
 * copied and the copy passed in may be reused after this call.
 */
 
static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
        struct dentry *dentry;
        char *dname;
        int err;

        dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru,
                                      GFP_KERNEL);
        if (!dentry)
                return NULL;

        /*
         * We guarantee that the inline name is always NUL-terminated.
         * This way the memcpy() done by the name switching in rename
         * will still always have a NUL at the end, even if we might
         * be overwriting an internal NUL character
         */
        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
        if (unlikely(!name)) {
                name = &slash_name;
                dname = dentry->d_iname;
        } else if (name->len > DNAME_INLINE_LEN-1) {
                size_t size = offsetof(struct external_name, name[1]);
                struct external_name *p = kmalloc(size + name->len,
                                                  GFP_KERNEL_ACCOUNT |
                                                  __GFP_RECLAIMABLE);
                if (!p) {
                        kmem_cache_free(dentry_cache, dentry); 
                        return NULL;
                }
                atomic_set(&p->u.count, 1);
                dname = p->name;
        } else  {
                dname = dentry->d_iname;
        }        

        dentry->d_name.len = name->len;
        dentry->d_name.hash = name->hash;
        memcpy(dname, name->name, name->len);
        dname[name->len] = 0;

        /* Make sure we always see the terminating NUL character */
        smp_store_release(&dentry->d_name.name, dname); /* ^^^ */

        dentry->d_lockref.count = 1;
        dentry->d_flags = 0;
        spin_lock_init(&dentry->d_lock);
        seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
        dentry->d_inode = NULL;
        dentry->d_parent = dentry;
        dentry->d_sb = sb;
        dentry->d_op = NULL;
        dentry->d_fsdata = NULL;
        INIT_HLIST_BL_NODE(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_HLIST_HEAD(&dentry->d_children);
        INIT_HLIST_NODE(&dentry->d_u.d_alias);
        INIT_HLIST_NODE(&dentry->d_sib);
        d_set_d_op(dentry, dentry->d_sb->s_d_op);

        if (dentry->d_op && dentry->d_op->d_init) {
                err = dentry->d_op->d_init(dentry);
                if (err) {
                        if (dname_external(dentry))
                                kfree(external_name(dentry));
                        kmem_cache_free(dentry_cache, dentry);
                        return NULL;
                }
        }

        this_cpu_inc(nr_dentry);

        return dentry;
}

/**
 * d_alloc        -        allocate a dcache entry
 * @parent: parent of entry to allocate
 * @name: qstr of the name
 *
 * Allocates a dentry. It returns %NULL if there is insufficient memory
 * available. On a success the dentry is returned. The name passed in is
 * copied and the copy passed in may be reused after this call.
 */
struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
        struct dentry *dentry = __d_alloc(parent->d_sb, name);
        if (!dentry)
                return NULL;
        spin_lock(&parent->d_lock);
        /*
         * don't need child lock because it is not subject
         * to concurrency here
         */
        dentry->d_parent = dget_dlock(parent);
        hlist_add_head(&dentry->d_sib, &parent->d_children);
        spin_unlock(&parent->d_lock);

        return dentry;
}
EXPORT_SYMBOL(d_alloc);

struct dentry *d_alloc_anon(struct super_block *sb)
{
        return __d_alloc(sb, NULL);
}
EXPORT_SYMBOL(d_alloc_anon);

struct dentry *d_alloc_cursor(struct dentry * parent)
{
        struct dentry *dentry = d_alloc_anon(parent->d_sb);
        if (dentry) {
                dentry->d_flags |= DCACHE_DENTRY_CURSOR;
                dentry->d_parent = dget(parent);
        }
        return dentry;
}

/**
 * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
 * @sb: the superblock
 * @name: qstr of the name
 *
 * For a filesystem that just pins its dentries in memory and never
 * performs lookups at all, return an unhashed IS_ROOT dentry.
 * This is used for pipes, sockets et.al. - the stuff that should
 * never be anyone's children or parents.  Unlike all other
 * dentries, these will not have RCU delay between dropping the
 * last reference and freeing them.
 *
 * The only user is alloc_file_pseudo() and that's what should
 * be considered a public interface.  Don't use directly.
 */
struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
{
        static const struct dentry_operations anon_ops = {
                .d_dname = simple_dname
        };
        struct dentry *dentry = __d_alloc(sb, name);
        if (likely(dentry)) {
                dentry->d_flags |= DCACHE_NORCU;
                if (!sb->s_d_op)
                        d_set_d_op(dentry, &anon_ops);
        }
        return dentry;
}

struct dentry *d_alloc_name(struct dentry *parent, const char *name)
{
        struct qstr q;

        q.name = name;
        q.hash_len = hashlen_string(parent, name);
        return d_alloc(parent, &q);
}
EXPORT_SYMBOL(d_alloc_name);

void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
{
        WARN_ON_ONCE(dentry->d_op);
        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH        |
                                DCACHE_OP_COMPARE        |
                                DCACHE_OP_REVALIDATE        |
                                DCACHE_OP_WEAK_REVALIDATE        |
                                DCACHE_OP_DELETE        |
                                DCACHE_OP_REAL));
        dentry->d_op = op;
        if (!op)
                return;
        if (op->d_hash)
                dentry->d_flags |= DCACHE_OP_HASH;
        if (op->d_compare)
                dentry->d_flags |= DCACHE_OP_COMPARE;
        if (op->d_revalidate)
                dentry->d_flags |= DCACHE_OP_REVALIDATE;
        if (op->d_weak_revalidate)
                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
        if (op->d_delete)
                dentry->d_flags |= DCACHE_OP_DELETE;
        if (op->d_prune)
                dentry->d_flags |= DCACHE_OP_PRUNE;
        if (op->d_real)
                dentry->d_flags |= DCACHE_OP_REAL;

}
EXPORT_SYMBOL(d_set_d_op);

static unsigned d_flags_for_inode(struct inode *inode)
{
        unsigned add_flags = DCACHE_REGULAR_TYPE;

        if (!inode)
                return DCACHE_MISS_TYPE;

        if (S_ISDIR(inode->i_mode)) {
                add_flags = DCACHE_DIRECTORY_TYPE;
                if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
                        if (unlikely(!inode->i_op->lookup))
                                add_flags = DCACHE_AUTODIR_TYPE;
                        else
                                inode->i_opflags |= IOP_LOOKUP;
                }
                goto type_determined;
        }

        if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
                if (unlikely(inode->i_op->get_link)) {
                        add_flags = DCACHE_SYMLINK_TYPE;
                        goto type_determined;
                }
                inode->i_opflags |= IOP_NOFOLLOW;
        }

        if (unlikely(!S_ISREG(inode->i_mode)))
                add_flags = DCACHE_SPECIAL_TYPE;

type_determined:
        if (unlikely(IS_AUTOMOUNT(inode)))
                add_flags |= DCACHE_NEED_AUTOMOUNT;
        return add_flags;
}

static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
        unsigned add_flags = d_flags_for_inode(inode);
        WARN_ON(d_in_lookup(dentry));

        spin_lock(&dentry->d_lock);
        /*
         * Decrement negative dentry count if it was in the LRU list.
         */
        if (dentry->d_flags & DCACHE_LRU_LIST)
                this_cpu_dec(nr_dentry_negative);
        hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
        raw_write_seqcount_end(&dentry->d_seq);
        fsnotify_update_flags(dentry);
        spin_unlock(&dentry->d_lock);
}

/**
 * d_instantiate - fill in inode information for a dentry
 * @entry: dentry to complete
 * @inode: inode to attach to this dentry
 *
 * Fill in inode information in the entry.
 *
 * This turns negative dentries into productive full members
 * of society.
 *
 * NOTE! This assumes that the inode count has been incremented
 * (or otherwise set) by the caller to indicate that it is now
 * in use by the dcache.
 */
 
void d_instantiate(struct dentry *entry, struct inode * inode)
{
        BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
        if (inode) {
                security_d_instantiate(entry, inode);
                spin_lock(&inode->i_lock);
                __d_instantiate(entry, inode);
                spin_unlock(&inode->i_lock);
        }
}
EXPORT_SYMBOL(d_instantiate);

/*
 * This should be equivalent to d_instantiate() + unlock_new_inode(),
 * with lockdep-related part of unlock_new_inode() done before
 * anything else.  Use that instead of open-coding d_instantiate()/
 * unlock_new_inode() combinations.
 */
void d_instantiate_new(struct dentry *entry, struct inode *inode)
{
        BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
        BUG_ON(!inode);
        lockdep_annotate_inode_mutex_key(inode);
        security_d_instantiate(entry, inode);
        spin_lock(&inode->i_lock);
        __d_instantiate(entry, inode);
        WARN_ON(!(inode->i_state & I_NEW));
        inode->i_state &= ~I_NEW & ~I_CREATING;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);

struct dentry *d_make_root(struct inode *root_inode)
{
        struct dentry *res = NULL;

        if (root_inode) {
                res = d_alloc_anon(root_inode->i_sb);
                if (res)
                        d_instantiate(res, root_inode);
                else
                        iput(root_inode);
        }
        return res;
}
EXPORT_SYMBOL(d_make_root);

static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
{
        struct super_block *sb;
        struct dentry *new, *res;

        if (!inode)
                return ERR_PTR(-ESTALE);
        if (IS_ERR(inode))
                return ERR_CAST(inode);

        sb = inode->i_sb;

        res = d_find_any_alias(inode); /* existing alias? */
        if (res)
                goto out;

        new = d_alloc_anon(sb);
        if (!new) {
                res = ERR_PTR(-ENOMEM);
                goto out;
        }

        security_d_instantiate(new, inode);
        spin_lock(&inode->i_lock);
        res = __d_find_any_alias(inode); /* recheck under lock */
        if (likely(!res)) { /* still no alias, attach a disconnected dentry */
                unsigned add_flags = d_flags_for_inode(inode);

                if (disconnected)
                        add_flags |= DCACHE_DISCONNECTED;

                spin_lock(&new->d_lock);
                __d_set_inode_and_type(new, inode, add_flags);
                hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
                if (!disconnected) {
                        hlist_bl_lock(&sb->s_roots);
                        hlist_bl_add_head(&new->d_hash, &sb->s_roots);
                        hlist_bl_unlock(&sb->s_roots);
                }
                spin_unlock(&new->d_lock);
                spin_unlock(&inode->i_lock);
                inode = NULL; /* consumed by new->d_inode */
                res = new;
        } else {
                spin_unlock(&inode->i_lock);
                dput(new);
        }

 out:
        iput(inode);
        return res;
}

/**
 * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
 * @inode: inode to allocate the dentry for
 *
 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
 * similar open by handle operations.  The returned dentry may be anonymous,
 * or may have a full name (if the inode was already in the cache).
 *
 * When called on a directory inode, we must ensure that the inode only ever
 * has one dentry.  If a dentry is found, that is returned instead of
 * allocating a new one.
 *
 * On successful return, the reference to the inode has been transferred
 * to the dentry.  In case of an error the reference on the inode is released.
 * To make it easier to use in export operations a %NULL or IS_ERR inode may
 * be passed in and the error will be propagated to the return value,
 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
 */
struct dentry *d_obtain_alias(struct inode *inode)
{
        return __d_obtain_alias(inode, true);
}
EXPORT_SYMBOL(d_obtain_alias);

/**
 * d_obtain_root - find or allocate a dentry for a given inode
 * @inode: inode to allocate the dentry for
 *
 * Obtain an IS_ROOT dentry for the root of a filesystem.
 *
 * We must ensure that directory inodes only ever have one dentry.  If a
 * dentry is found, that is returned instead of allocating a new one.
 *
 * On successful return, the reference to the inode has been transferred
 * to the dentry.  In case of an error the reference on the inode is
 * released.  A %NULL or IS_ERR inode may be passed in and will be the
 * error will be propagate to the return value, with a %NULL @inode
 * replaced by ERR_PTR(-ESTALE).
 */
struct dentry *d_obtain_root(struct inode *inode)
{
        return __d_obtain_alias(inode, false);
}
EXPORT_SYMBOL(d_obtain_root);

/**
 * d_add_ci - lookup or allocate new dentry with case-exact name
 * @inode:  the inode case-insensitive lookup has found
 * @dentry: the negative dentry that was passed to the parent's lookup func
 * @name:   the case-exact name to be associated with the returned dentry
 *
 * This is to avoid filling the dcache with case-insensitive names to the
 * same inode, only the actual correct case is stored in the dcache for
 * case-insensitive filesystems.
 *
 * For a case-insensitive lookup match and if the case-exact dentry
 * already exists in the dcache, use it and return it.
 *
 * If no entry exists with the exact case name, allocate new dentry with
 * the exact case, and return the spliced entry.
 */
struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
                        struct qstr *name)
{
        struct dentry *found, *res;

        /*
         * First check if a dentry matching the name already exists,
         * if not go ahead and create it now.
         */
        found = d_hash_and_lookup(dentry->d_parent, name);
        if (found) {
                iput(inode);
                return found;
        }
        if (d_in_lookup(dentry)) {
                found = d_alloc_parallel(dentry->d_parent, name,
                                        dentry->d_wait);
                if (IS_ERR(found) || !d_in_lookup(found)) {
                        iput(inode);
                        return found;
                }
        } else {
                found = d_alloc(dentry->d_parent, name);
                if (!found) {
                        iput(inode);
                        return ERR_PTR(-ENOMEM);
                } 
        }
        res = d_splice_alias(inode, found);
        if (res) {
                d_lookup_done(found);
                dput(found);
                return res;
        }
        return found;
}
EXPORT_SYMBOL(d_add_ci);

/**
 * d_same_name - compare dentry name with case-exact name
 * @parent: parent dentry
 * @dentry: the negative dentry that was passed to the parent's lookup func
 * @name:   the case-exact name to be associated with the returned dentry
 *
 * Return: true if names are same, or false
 */
bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
                 const struct qstr *name)
{
        if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
                if (dentry->d_name.len != name->len)
                        return false;
                return dentry_cmp(dentry, name->name, name->len) == 0;
        }
        return parent->d_op->d_compare(dentry,
                                       dentry->d_name.len, dentry->d_name.name,
                                       name) == 0;
}
EXPORT_SYMBOL_GPL(d_same_name);

/*
 * This is __d_lookup_rcu() when the parent dentry has
 * DCACHE_OP_COMPARE, which makes things much nastier.
 */
static noinline struct dentry *__d_lookup_rcu_op_compare(
        const struct dentry *parent,
        const struct qstr *name,
        unsigned *seqp)
{
        u64 hashlen = name->hash_len;
        struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
        struct hlist_bl_node *node;
        struct dentry *dentry;

        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
                int tlen;
                const char *tname;
                unsigned seq;

seqretry:
                seq = raw_seqcount_begin(&dentry->d_seq);
                if (dentry->d_parent != parent)
                        continue;
                if (d_unhashed(dentry))
                        continue;
                if (dentry->d_name.hash != hashlen_hash(hashlen))
                        continue;
                tlen = dentry->d_name.len;
                tname = dentry->d_name.name;
                /* we want a consistent (name,len) pair */
                if (read_seqcount_retry(&dentry->d_seq, seq)) {
                        cpu_relax();
                        goto seqretry;
                }
                if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0)
                        continue;
                *seqp = seq;
                return dentry;
        }
        return NULL;
}

/**
 * __d_lookup_rcu - search for a dentry (racy, store-free)
 * @parent: parent dentry
 * @name: qstr of name we wish to find
 * @seqp: returns d_seq value at the point where the dentry was found
 * Returns: dentry, or NULL
 *
 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
 * resolution (store-free path walking) design described in
 * Documentation/filesystems/path-lookup.txt.
 *
 * This is not to be used outside core vfs.
 *
 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
 * held, and rcu_read_lock held. The returned dentry must not be stored into
 * without taking d_lock and checking d_seq sequence count against @seq
 * returned here.
 *
 * A refcount may be taken on the found dentry with the d_rcu_to_refcount
 * function.
 *
 * Alternatively, __d_lookup_rcu may be called again to look up the child of
 * the returned dentry, so long as its parent's seqlock is checked after the
 * child is looked up. Thus, an interlocking stepping of sequence lock checks
 * is formed, giving integrity down the path walk.
 *
 * NOTE! The caller *has* to check the resulting dentry against the sequence
 * number we've returned before using any of the resulting dentry state!
 */
struct dentry *__d_lookup_rcu(const struct dentry *parent,
                                const struct qstr *name,
                                unsigned *seqp)
{
        u64 hashlen = name->hash_len;
        const unsigned char *str = name->name;
        struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
        struct hlist_bl_node *node;
        struct dentry *dentry;

        /*
         * Note: There is significant duplication with __d_lookup_rcu which is
         * required to prevent single threaded performance regressions
         * especially on architectures where smp_rmb (in seqcounts) are costly.
         * Keep the two functions in sync.
         */

        if (unlikely(parent->d_flags & DCACHE_OP_COMPARE))
                return __d_lookup_rcu_op_compare(parent, name, seqp);

        /*
         * The hash list is protected using RCU.
         *
         * Carefully use d_seq when comparing a candidate dentry, to avoid
         * races with d_move().
         *
         * It is possible that concurrent renames can mess up our list
         * walk here and result in missing our dentry, resulting in the
         * false-negative result. d_lookup() protects against concurrent
         * renames using rename_lock seqlock.
         *
         * See Documentation/filesystems/path-lookup.txt for more details.
         */
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
                unsigned seq;

                /*
                 * The dentry sequence count protects us from concurrent
                 * renames, and thus protects parent and name fields.
                 *
                 * The caller must perform a seqcount check in order
                 * to do anything useful with the returned dentry.
                 *
                 * NOTE! We do a "raw" seqcount_begin here. That means that
                 * we don't wait for the sequence count to stabilize if it
                 * is in the middle of a sequence change. If we do the slow
                 * dentry compare, we will do seqretries until it is stable,
                 * and if we end up with a successful lookup, we actually
                 * want to exit RCU lookup anyway.
                 *
                 * Note that raw_seqcount_begin still *does* smp_rmb(), so
                 * we are still guaranteed NUL-termination of ->d_name.name.
                 */
                seq = raw_seqcount_begin(&dentry->d_seq);
                if (dentry->d_parent != parent)
                        continue;
                if (d_unhashed(dentry))
                        continue;
                if (dentry->d_name.hash_len != hashlen)
                        continue;
                if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
                        continue;
                *seqp = seq;
                return dentry;
        }
        return NULL;
}

/**
 * d_lookup - search for a dentry
 * @parent: parent dentry
 * @name: qstr of name we wish to find
 * Returns: dentry, or NULL
 *
 * d_lookup searches the children of the parent dentry for the name in
 * question. If the dentry is found its reference count is incremented and the
 * dentry is returned. The caller must use dput to free the entry when it has
 * finished using it. %NULL is returned if the dentry does not exist.
 */
struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
{
        struct dentry *dentry;
        unsigned seq;

        do {
                seq = read_seqbegin(&rename_lock);
                dentry = __d_lookup(parent, name);
                if (dentry)
                        break;
        } while (read_seqretry(&rename_lock, seq));
        return dentry;
}
EXPORT_SYMBOL(d_lookup);

/**
 * __d_lookup - search for a dentry (racy)
 * @parent: parent dentry
 * @name: qstr of name we wish to find
 * Returns: dentry, or NULL
 *
 * __d_lookup is like d_lookup, however it may (rarely) return a
 * false-negative result due to unrelated rename activity.
 *
 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
 * however it must be used carefully, eg. with a following d_lookup in
 * the case of failure.
 *
 * __d_lookup callers must be commented.
 */
struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
{
        unsigned int hash = name->hash;
        struct hlist_bl_head *b = d_hash(hash);
        struct hlist_bl_node *node;
        struct dentry *found = NULL;
        struct dentry *dentry;

        /*
         * Note: There is significant duplication with __d_lookup_rcu which is
         * required to prevent single threaded performance regressions
         * especially on architectures where smp_rmb (in seqcounts) are costly.
         * Keep the two functions in sync.
         */

        /*
         * The hash list is protected using RCU.
         *
         * Take d_lock when comparing a candidate dentry, to avoid races
         * with d_move().
         *
         * It is possible that concurrent renames can mess up our list
         * walk here and result in missing our dentry, resulting in the
         * false-negative result. d_lookup() protects against concurrent
         * renames using rename_lock seqlock.
         *
         * See Documentation/filesystems/path-lookup.txt for more details.
         */
        rcu_read_lock();
        
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {

                if (dentry->d_name.hash != hash)
                        continue;

                spin_lock(&dentry->d_lock);
                if (dentry->d_parent != parent)
                        goto next;
                if (d_unhashed(dentry))
                        goto next;

                if (!d_same_name(dentry, parent, name))
                        goto next;

                dentry->d_lockref.count++;
                found = dentry;
                spin_unlock(&dentry->d_lock);
                break;
next:
                spin_unlock(&dentry->d_lock);
         }
         rcu_read_unlock();

         return found;
}

/**
 * d_hash_and_lookup - hash the qstr then search for a dentry
 * @dir: Directory to search in
 * @name: qstr of name we wish to find
 *
 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
 */
struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
{
        /*
         * Check for a fs-specific hash function. Note that we must
         * calculate the standard hash first, as the d_op->d_hash()
         * routine may choose to leave the hash value unchanged.
         */
        name->hash = full_name_hash(dir, name->name, name->len);
        if (dir->d_flags & DCACHE_OP_HASH) {
                int err = dir->d_op->d_hash(dir, name);
                if (unlikely(err < 0))
                        return ERR_PTR(err);
        }
        return d_lookup(dir, name);
}
EXPORT_SYMBOL(d_hash_and_lookup);

/*
 * When a file is deleted, we have two options:
 * - turn this dentry into a negative dentry
 * - unhash this dentry and free it.
 *
 * Usually, we want to just turn this into
 * a negative dentry, but if anybody else is
 * currently using the dentry or the inode
 * we can't do that and we fall back on removing
 * it from the hash queues and waiting for
 * it to be deleted later when it has no users
 */
 
/**
 * d_delete - delete a dentry
 * @dentry: The dentry to delete
 *
 * Turn the dentry into a negative dentry if possible, otherwise
 * remove it from the hash queues so it can be deleted later
 */
 
void d_delete(struct dentry * dentry)
{
        struct inode *inode = dentry->d_inode;

        spin_lock(&inode->i_lock);
        spin_lock(&dentry->d_lock);
        /*
         * Are we the only user?
         */
        if (dentry->d_lockref.count == 1) {
                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
                dentry_unlink_inode(dentry);
        } else {
                __d_drop(dentry);
                spin_unlock(&dentry->d_lock);
                spin_unlock(&inode->i_lock);
        }
}
EXPORT_SYMBOL(d_delete);

static void __d_rehash(struct dentry *entry)
{
        struct hlist_bl_head *b = d_hash(entry->d_name.hash);

        hlist_bl_lock(b);
        hlist_bl_add_head_rcu(&entry->d_hash, b);
        hlist_bl_unlock(b);
}

/**
 * d_rehash        - add an entry back to the hash
 * @entry: dentry to add to the hash
 *
 * Adds a dentry to the hash according to its name.
 */
 
void d_rehash(struct dentry * entry)
{
        spin_lock(&entry->d_lock);
        __d_rehash(entry);
        spin_unlock(&entry->d_lock);
}
EXPORT_SYMBOL(d_rehash);

static inline unsigned start_dir_add(struct inode *dir)
{
        preempt_disable_nested();
        for (;;) {
                unsigned n = dir->i_dir_seq;
                if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
                        return n;
                cpu_relax();
        }
}

static inline void end_dir_add(struct inode *dir, unsigned int n,
                               wait_queue_head_t *d_wait)
{
        smp_store_release(&dir->i_dir_seq, n + 2);
        preempt_enable_nested();
        wake_up_all(d_wait);
}

static void d_wait_lookup(struct dentry *dentry)
{
        if (d_in_lookup(dentry)) {
                DECLARE_WAITQUEUE(wait, current);
                add_wait_queue(dentry->d_wait, &wait);
                do {
                        set_current_state(TASK_UNINTERRUPTIBLE);
                        spin_unlock(&dentry->d_lock);
                        schedule();
                        spin_lock(&dentry->d_lock);
                } while (d_in_lookup(dentry));
        }
}

struct dentry *d_alloc_parallel(struct dentry *parent,
                                const struct qstr *name,
                                wait_queue_head_t *wq)
{
        unsigned int hash = name->hash;
        struct hlist_bl_head *b = in_lookup_hash(parent, hash);
        struct hlist_bl_node *node;
        struct dentry *new = d_alloc(parent, name);
        struct dentry *dentry;
        unsigned seq, r_seq, d_seq;

        if (unlikely(!new))
                return ERR_PTR(-ENOMEM);

retry:
        rcu_read_lock();
        seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
        r_seq = read_seqbegin(&rename_lock);
        dentry = __d_lookup_rcu(parent, name, &d_seq);
        if (unlikely(dentry)) {
                if (!lockref_get_not_dead(&dentry->d_lockref)) {
                        rcu_read_unlock();
                        goto retry;
                }
                if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
                        rcu_read_unlock();
                        dput(dentry);
                        goto retry;
                }
                rcu_read_unlock();
                dput(new);
                return dentry;
        }
        if (unlikely(read_seqretry(&rename_lock, r_seq))) {
                rcu_read_unlock();
                goto retry;
        }

        if (unlikely(seq & 1)) {
                rcu_read_unlock();
                goto retry;
        }

        hlist_bl_lock(b);
        if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
                hlist_bl_unlock(b);
                rcu_read_unlock();
                goto retry;
        }
        /*
         * No changes for the parent since the beginning of d_lookup().
         * Since all removals from the chain happen with hlist_bl_lock(),
         * any potential in-lookup matches are going to stay here until
         * we unlock the chain.  All fields are stable in everything
         * we encounter.
         */
        hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
                if (dentry->d_name.hash != hash)
                        continue;
                if (dentry->d_parent != parent)
                        continue;
                if (!d_same_name(dentry, parent, name))
                        continue;
                hlist_bl_unlock(b);
                /* now we can try to grab a reference */
                if (!lockref_get_not_dead(&dentry->d_lockref)) {
                        rcu_read_unlock();
                        goto retry;
                }

                rcu_read_unlock();
                /*
                 * somebody is likely to be still doing lookup for it;
                 * wait for them to finish
                 */
                spin_lock(&dentry->d_lock);
                d_wait_lookup(dentry);
                /*
                 * it's not in-lookup anymore; in principle we should repeat
                 * everything from dcache lookup, but it's likely to be what
                 * d_lookup() would've found anyway.  If it is, just return it;
                 * otherwise we really have to repeat the whole thing.
                 */
                if (unlikely(dentry->d_name.hash != hash))
                        goto mismatch;
                if (unlikely(dentry->d_parent != parent))
                        goto mismatch;
                if (unlikely(d_unhashed(dentry)))
                        goto mismatch;
                if (unlikely(!d_same_name(dentry, parent, name)))
                        goto mismatch;
                /* OK, it *is* a hashed match; return it */
                spin_unlock(&dentry->d_lock);
                dput(new);
                return dentry;
        }
        rcu_read_unlock();
        /* we can't take ->d_lock here; it's OK, though. */
        new->d_flags |= DCACHE_PAR_LOOKUP;
        new->d_wait = wq;
        hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
        hlist_bl_unlock(b);
        return new;
mismatch:
        spin_unlock(&dentry->d_lock);
        dput(dentry);
        goto retry;
}
EXPORT_SYMBOL(d_alloc_parallel);

/*
 * - Unhash the dentry
 * - Retrieve and clear the waitqueue head in dentry
 * - Return the waitqueue head
 */
static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
{
        wait_queue_head_t *d_wait;
        struct hlist_bl_head *b;

        lockdep_assert_held(&dentry->d_lock);

        b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
        hlist_bl_lock(b);
        dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
        __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
        d_wait = dentry->d_wait;
        dentry->d_wait = NULL;
        hlist_bl_unlock(b);
        INIT_HLIST_NODE(&dentry->d_u.d_alias);
        INIT_LIST_HEAD(&dentry->d_lru);
        return d_wait;
}

void __d_lookup_unhash_wake(struct dentry *dentry)
{
        spin_lock(&dentry->d_lock);
        wake_up_all(__d_lookup_unhash(dentry));
        spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(__d_lookup_unhash_wake);

/* inode->i_lock held if inode is non-NULL */

static inline void __d_add(struct dentry *dentry, struct inode *inode)
{
        wait_queue_head_t *d_wait;
        struct inode *dir = NULL;
        unsigned n;
        spin_lock(&dentry->d_lock);
        if (unlikely(d_in_lookup(dentry))) {
                dir = dentry->d_parent->d_inode;
                n = start_dir_add(dir);
                d_wait = __d_lookup_unhash(dentry);
        }
        if (inode) {
                unsigned add_flags = d_flags_for_inode(inode);
                hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
                raw_write_seqcount_begin(&dentry->d_seq);
                __d_set_inode_and_type(dentry, inode, add_flags);
                raw_write_seqcount_end(&dentry->d_seq);
                fsnotify_update_flags(dentry);
        }
        __d_rehash(dentry);
        if (dir)
                end_dir_add(dir, n, d_wait);
        spin_unlock(&dentry->d_lock);
        if (inode)
                spin_unlock(&inode->i_lock);
}

/**
 * d_add - add dentry to hash queues
 * @entry: dentry to add
 * @inode: The inode to attach to this dentry
 *
 * This adds the entry to the hash queues and initializes @inode.
 * The entry was actually filled in earlier during d_alloc().
 */

void d_add(struct dentry *entry, struct inode *inode)
{
        if (inode) {
                security_d_instantiate(entry, inode);
                spin_lock(&inode->i_lock);
        }
        __d_add(entry, inode);
}
EXPORT_SYMBOL(d_add);

/**
 * d_exact_alias - find and hash an exact unhashed alias
 * @entry: dentry to add
 * @inode: The inode to go with this dentry
 *
 * If an unhashed dentry with the same name/parent and desired
 * inode already exists, hash and return it.  Otherwise, return
 * NULL.
 *
 * Parent directory should be locked.
 */
struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
{
        struct dentry *alias;
        unsigned int hash = entry->d_name.hash;

        spin_lock(&inode->i_lock);
        hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
                /*
                 * Don't need alias->d_lock here, because aliases with
                 * d_parent == entry->d_parent are not subject to name or
                 * parent changes, because the parent inode i_mutex is held.
                 */
                if (alias->d_name.hash != hash)
                        continue;
                if (alias->d_parent != entry->d_parent)
                        continue;
                if (!d_same_name(alias, entry->d_parent, &entry->d_name))
                        continue;
                spin_lock(&alias->d_lock);
                if (!d_unhashed(alias)) {
                        spin_unlock(&alias->d_lock);
                        alias = NULL;
                } else {
                        dget_dlock(alias);
                        __d_rehash(alias);
                        spin_unlock(&alias->d_lock);
                }
                spin_unlock(&inode->i_lock);
                return alias;
        }
        spin_unlock(&inode->i_lock);
        return NULL;
}
EXPORT_SYMBOL(d_exact_alias);

static void swap_names(struct dentry *dentry, struct dentry *target)
{
        if (unlikely(dname_external(target))) {
                if (unlikely(dname_external(dentry))) {
                        /*
                         * Both external: swap the pointers
                         */
                        swap(target->d_name.name, dentry->d_name.name);
                } else {
                        /*
                         * dentry:internal, target:external.  Steal target's
                         * storage and make target internal.
                         */
                        memcpy(target->d_iname, dentry->d_name.name,
                                        dentry->d_name.len + 1);
                        dentry->d_name.name = target->d_name.name;
                        target->d_name.name = target->d_iname;
                }
        } else {
                if (unlikely(dname_external(dentry))) {
                        /*
                         * dentry:external, target:internal.  Give dentry's
                         * storage to target and make dentry internal
                         */
                        memcpy(dentry->d_iname, target->d_name.name,
                                        target->d_name.len + 1);
                        target->d_name.name = dentry->d_name.name;
                        dentry->d_name.name = dentry->d_iname;
                } else {
                        /*
                         * Both are internal.
                         */
                        unsigned int i;
                        BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
                        for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
                                swap(((long *) &dentry->d_iname)[i],
                                     ((long *) &target->d_iname)[i]);
                        }
                }
        }
        swap(dentry->d_name.hash_len, target->d_name.hash_len);
}

static void copy_name(struct dentry *dentry, struct dentry *target)
{
        struct external_name *old_name = NULL;
        if (unlikely(dname_external(dentry)))
                old_name = external_name(dentry);
        if (unlikely(dname_external(target))) {
                atomic_inc(&external_name(target)->u.count);
                dentry->d_name = target->d_name;
        } else {
                memcpy(dentry->d_iname, target->d_name.name,
                                target->d_name.len + 1);
                dentry->d_name.name = dentry->d_iname;
                dentry->d_name.hash_len = target->d_name.hash_len;
        }
        if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
                kfree_rcu(old_name, u.head);
}

/*
 * __d_move - move a dentry
 * @dentry: entry to move
 * @target: new dentry
 * @exchange: exchange the two dentries
 *
 * Update the dcache to reflect the move of a file name. Negative
 * dcache entries should not be moved in this way. Caller must hold
 * rename_lock, the i_mutex of the source and target directories,
 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
 */
static void __d_move(struct dentry *dentry, struct dentry *target,
                     bool exchange)
{
        struct dentry *old_parent, *p;
        wait_queue_head_t *d_wait;
        struct inode *dir = NULL;
        unsigned n;

        WARN_ON(!dentry->d_inode);
        if (WARN_ON(dentry == target))
                return;

        BUG_ON(d_ancestor(target, dentry));
        old_parent = dentry->d_parent;
        p = d_ancestor(old_parent, target);
        if (IS_ROOT(dentry)) {
                BUG_ON(p);
                spin_lock(&target->d_parent->d_lock);
        } else if (!p) {
                /* target is not a descendent of dentry->d_parent */
                spin_lock(&target->d_parent->d_lock);
                spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
        } else {
                BUG_ON(p == dentry);
                spin_lock(&old_parent->d_lock);
                if (p != target)
                        spin_lock_nested(&target->d_parent->d_lock,
                                        DENTRY_D_LOCK_NESTED);
        }
        spin_lock_nested(&dentry->d_lock, 2);
        spin_lock_nested(&target->d_lock, 3);

        if (unlikely(d_in_lookup(target))) {
                dir = target->d_parent->d_inode;
                n = start_dir_add(dir);
                d_wait = __d_lookup_unhash(target);
        }

        write_seqcount_begin(&dentry->d_seq);
        write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);

        /* unhash both */
        if (!d_unhashed(dentry))
                ___d_drop(dentry);
        if (!d_unhashed(target))
                ___d_drop(target);

        /* ... and switch them in the tree */
        dentry->d_parent = target->d_parent;
        if (!exchange) {
                copy_name(dentry, target);
                target->d_hash.pprev = NULL;
                dentry->d_parent->d_lockref.count++;
                if (dentry != old_parent) /* wasn't IS_ROOT */
                        WARN_ON(!--old_parent->d_lockref.count);
        } else {
                target->d_parent = old_parent;
                swap_names(dentry, target);
                if (!hlist_unhashed(&target->d_sib))
                        __hlist_del(&target->d_sib);
                hlist_add_head(&target->d_sib, &target->d_parent->d_children);
                __d_rehash(target);
                fsnotify_update_flags(target);
        }
        if (!hlist_unhashed(&dentry->d_sib))
                __hlist_del(&dentry->d_sib);
        hlist_add_head(&dentry->d_sib, &dentry->d_parent->d_children);
        __d_rehash(dentry);
        fsnotify_update_flags(dentry);
        fscrypt_handle_d_move(dentry);

        write_seqcount_end(&target->d_seq);
        write_seqcount_end(&dentry->d_seq);

        if (dir)
                end_dir_add(dir, n, d_wait);

        if (dentry->d_parent != old_parent)
                spin_unlock(&dentry->d_parent->d_lock);
        if (dentry != old_parent)
                spin_unlock(&old_parent->d_lock);
        spin_unlock(&target->d_lock);
        spin_unlock(&dentry->d_lock);
}

/*
 * d_move - move a dentry
 * @dentry: entry to move
 * @target: new dentry
 *
 * Update the dcache to reflect the move of a file name. Negative
 * dcache entries should not be moved in this way. See the locking
 * requirements for __d_move.
 */
void d_move(struct dentry *dentry, struct dentry *target)
{
        write_seqlock(&rename_lock);
        __d_move(dentry, target, false);
        write_sequnlock(&rename_lock);
}
EXPORT_SYMBOL(d_move);

/*
 * d_exchange - exchange two dentries
 * @dentry1: first dentry
 * @dentry2: second dentry
 */
void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
{
        write_seqlock(&rename_lock);

        WARN_ON(!dentry1->d_inode);
        WARN_ON(!dentry2->d_inode);
        WARN_ON(IS_ROOT(dentry1));
        WARN_ON(IS_ROOT(dentry2));

        __d_move(dentry1, dentry2, true);

        write_sequnlock(&rename_lock);
}

/**
 * d_ancestor - search for an ancestor
 * @p1: ancestor dentry
 * @p2: child dentry
 *
 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
 * an ancestor of p2, else NULL.
 */
struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
{
        struct dentry *p;

        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
                if (p->d_parent == p1)
                        return p;
        }
        return NULL;
}

/*
 * This helper attempts to cope with remotely renamed directories
 *
 * It assumes that the caller is already holding
 * dentry->d_parent->d_inode->i_mutex, and rename_lock
 *
 * Note: If ever the locking in lock_rename() changes, then please
 * remember to update this too...
 */
static int __d_unalias(struct dentry *dentry, struct dentry *alias)
{
        struct mutex *m1 = NULL;
        struct rw_semaphore *m2 = NULL;
        int ret = -ESTALE;

        /* If alias and dentry share a parent, then no extra locks required */
        if (alias->d_parent == dentry->d_parent)
                goto out_unalias;

        /* See lock_rename() */
        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
                goto out_err;
        m1 = &dentry->d_sb->s_vfs_rename_mutex;
        if (!inode_trylock_shared(alias->d_parent->d_inode))
                goto out_err;
        m2 = &alias->d_parent->d_inode->i_rwsem;
out_unalias:
        __d_move(alias, dentry, false);
        ret = 0;
out_err:
        if (m2)
                up_read(m2);
        if (m1)
                mutex_unlock(m1);
        return ret;
}

/**
 * d_splice_alias - splice a disconnected dentry into the tree if one exists
 * @inode:  the inode which may have a disconnected dentry
 * @dentry: a negative dentry which we want to point to the inode.
 *
 * If inode is a directory and has an IS_ROOT alias, then d_move that in
 * place of the given dentry and return it, else simply d_add the inode
 * to the dentry and return NULL.
 *
 * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
 * we should error out: directories can't have multiple aliases.
 *
 * This is needed in the lookup routine of any filesystem that is exportable
 * (via knfsd) so that we can build dcache paths to directories effectively.
 *
 * If a dentry was found and moved, then it is returned.  Otherwise NULL
 * is returned.  This matches the expected return value of ->lookup.
 *
 * Cluster filesystems may call this function with a negative, hashed dentry.
 * In that case, we know that the inode will be a regular file, and also this
 * will only occur during atomic_open. So we need to check for the dentry
 * being already hashed only in the final case.
 */
struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
{
        if (IS_ERR(inode))
                return ERR_CAST(inode);

        BUG_ON(!d_unhashed(dentry));

        if (!inode)
                goto out;

        security_d_instantiate(dentry, inode);
        spin_lock(&inode->i_lock);
        if (S_ISDIR(inode->i_mode)) {
                struct dentry *new = __d_find_any_alias(inode);
                if (unlikely(new)) {
                        /* The reference to new ensures it remains an alias */
                        spin_unlock(&inode->i_lock);
                        write_seqlock(&rename_lock);
                        if (unlikely(d_ancestor(new, dentry))) {
                                write_sequnlock(&rename_lock);
                                dput(new);
                                new = ERR_PTR(-ELOOP);
                                pr_warn_ratelimited(
                                        "VFS: Lookup of '%s' in %s %s"
                                        " would have caused loop\n",
                                        dentry->d_name.name,
                                        inode->i_sb->s_type->name,
                                        inode->i_sb->s_id);
                        } else if (!IS_ROOT(new)) {
                                struct dentry *old_parent = dget(new->d_parent);
                                int err = __d_unalias(dentry, new);
                                write_sequnlock(&rename_lock);
                                if (err) {
                                        dput(new);
                                        new = ERR_PTR(err);
                                }
                                dput(old_parent);
                        } else {
                                __d_move(new, dentry, false);
                                write_sequnlock(&rename_lock);
                        }
                        iput(inode);
                        return new;
                }
        }
out:
        __d_add(dentry, inode);
        return NULL;
}
EXPORT_SYMBOL(d_splice_alias);

/*
 * Test whether new_dentry is a subdirectory of old_dentry.
 *
 * Trivially implemented using the dcache structure
 */

/**
 * is_subdir - is new dentry a subdirectory of old_dentry
 * @new_dentry: new dentry
 * @old_dentry: old dentry
 *
 * Returns true if new_dentry is a subdirectory of the parent (at any depth).
 * Returns false otherwise.
 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
 */
  
bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
        bool result;
        unsigned seq;

        if (new_dentry == old_dentry)
                return true;

        do {
                /* for restarting inner loop in case of seq retry */
                seq = read_seqbegin(&rename_lock);
                /*
                 * Need rcu_readlock to protect against the d_parent trashing
                 * due to d_move
                 */
                rcu_read_lock();
                if (d_ancestor(old_dentry, new_dentry))
                        result = true;
                else
                        result = false;
                rcu_read_unlock();
        } while (read_seqretry(&rename_lock, seq));

        return result;
}
EXPORT_SYMBOL(is_subdir);

static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
{
        struct dentry *root = data;
        if (dentry != root) {
                if (d_unhashed(dentry) || !dentry->d_inode)
                        return D_WALK_SKIP;

                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
                        dentry->d_flags |= DCACHE_GENOCIDE;
                        dentry->d_lockref.count--;
                }
        }
        return D_WALK_CONTINUE;
}

void d_genocide(struct dentry *parent)
{
        d_walk(parent, parent, d_genocide_kill);
}

void d_mark_tmpfile(struct file *file, struct inode *inode)
{
        struct dentry *dentry = file->f_path.dentry;

        BUG_ON(dentry->d_name.name != dentry->d_iname ||
                !hlist_unhashed(&dentry->d_u.d_alias) ||
                !d_unlinked(dentry));
        spin_lock(&dentry->d_parent->d_lock);
        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
        dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
                                (unsigned long long)inode->i_ino);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&dentry->d_parent->d_lock);
}
EXPORT_SYMBOL(d_mark_tmpfile);

void d_tmpfile(struct file *file, struct inode *inode)
{
        struct dentry *dentry = file->f_path.dentry;

        inode_dec_link_count(inode);
        d_mark_tmpfile(file, inode);
        d_instantiate(dentry, inode);
}
EXPORT_SYMBOL(d_tmpfile);

static __initdata unsigned long dhash_entries;
static int __init set_dhash_entries(char *str)
{
        if (!str)
                return 0;
        dhash_entries = simple_strtoul(str, &str, 0);
        return 1;
}
__setup("dhash_entries=", set_dhash_entries);

static void __init dcache_init_early(void)
{
        /* If hashes are distributed across NUMA nodes, defer
         * hash allocation until vmalloc space is available.
         */
        if (hashdist)
                return;

        dentry_hashtable =
                alloc_large_system_hash("Dentry cache",
                                        sizeof(struct hlist_bl_head),
                                        dhash_entries,
                                        13,
                                        HASH_EARLY | HASH_ZERO,
                                        &d_hash_shift,
                                        NULL,
                                        0,
                                        0);
        d_hash_shift = 32 - d_hash_shift;
}

static void __init dcache_init(void)
{
        /*
         * A constructor could be added for stable state like the lists,
         * but it is probably not worth it because of the cache nature
         * of the dcache.
         */
        dentry_cache = KMEM_CACHE_USERCOPY(dentry,
                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT,
                d_iname);

        /* Hash may have been set up in dcache_init_early */
        if (!hashdist)
                return;

        dentry_hashtable =
                alloc_large_system_hash("Dentry cache",
                                        sizeof(struct hlist_bl_head),
                                        dhash_entries,
                                        13,
                                        HASH_ZERO,
                                        &d_hash_shift,
                                        NULL,
                                        0,
                                        0);
        d_hash_shift = 32 - d_hash_shift;
}

/* SLAB cache for __getname() consumers */
struct kmem_cache *names_cachep __ro_after_init;
EXPORT_SYMBOL(names_cachep);

void __init vfs_caches_init_early(void)
{
        int i;

        for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
                INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);

        dcache_init_early();
        inode_init_early();
}

void __init vfs_caches_init(void)
{
        names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);

        dcache_init();
        inode_init();
        files_init();
        files_maxfiles_init();
        mnt_init();
        bdev_cache_init();
        chrdev_init();
}


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000003005740ed0b0011c3ec000000010902120001000000000904"], 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCGREP(r1, 0x400445a0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000010c0)={0x84, &(0x7f0000000080)=ANY=[@ANYBLOB="00000100000008"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r2 = syz_open_dev$evdev(&(0x7f0000000280), 0x0, 0x0)
ioctl$EVIOCSMASK(r2, 0x40104593, &(0x7f0000002d40)={0x17, 0x0, 0x0})
syz_usb_control_io$printer(r0, 0x0, 0x0)
r3 = syz_open_dev$evdev(&(0x7f00000000c0), 0x100000002, 0x120b40)
ioctl$EVIOCSKEYCODE(r3, 0x40086602, &(0x7f0000000340)=[0x7fffffff])
syz_usb_control_io(r0, 0x0, &(0x7f0000000c40)={0x84, &(0x7f00000007c0)=ANY=[@ANYBLOB="000049000000f8679de84fd69a3b38d55760292225d9730c0ca582c70a0af319c77c9931c0c4622ad7f49f8fe4a60c3557f440abe9e4c247058376cb34cf4a6ab5ffb6dc47ee000000000000000000"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x10, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x3}}]}}]}}, &(0x7f0000000080)={0x0, 0x0, 0x5, &(0x7f0000000040)=ANY=[@ANYBLOB]})


r0 = syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000000140)={0x0, 0x0, 0x0})
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000040)={0x0, 0x0, 0x0, 0xfffffffe, "207d3d00000000201b08700c1e0ac74f000000001200000000000900"})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x80, 0x2}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000000000407d1e9c3100000000000109022400010000000009040000010300800009210000000122070009058103"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000080)={0x2c, &(0x7f0000000000)={0x0, 0x0, 0x7, {0x7, 0x0, "53b9246588"}}, 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="120100002189dc40081915130000010203010902240000000000000904"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMTSLOTS(r0, 0x8040450a, &(0x7f0000001140)=""/80)
ioctl$EVIOCGID(r0, 0x80084502, &(0x7f00000011c0)=""/16)


r0 = syz_open_dev$evdev(&(0x7f00000004c0), 0x0, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x801c581f, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000002cc0)={{0x12, 0x1, 0x340, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000003340)={0x0, 0x0, 0xc, &(0x7f0000002ec0)=ANY=[@ANYBLOB="050f0c000307"]})
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[], 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)=ANY=[@ANYBLOB="1201000000000040ac054382408b0b00000109022400010000002009040000fd0301000009210000000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000080)={0x0, 0x22, 0x1, {[@main]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000000), 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
ioctl$HIDIOCSREPORT(r1, 0x81044804, &(0x7f0000000040)={0x1})


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="120100004d0f7b08cd0c390087b0000000010902120001000000000904"], 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x6cb, 0x81a7, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000540)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x0, "96cf62ab"}]}}, 0x0}, 0x0)


syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r0, 0x5b04, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x100842)
write$hidraw(r0, &(0x7f0000003540)="bd80894ec947fb776e30504e0e68261fdf25dfe55bb5efd20ce7e076e58d9d8d842134cc424c9c73b052e2702867207a1e8cc518b5f197af219609399167ad15c3eb8f7a4ff2b16172109580ce58cea188626308990d55757ee4d0929d1fc1e9b29047867ad9cfb20011839739d9d1877e748d501c07d98ec6376fe2da6ff5aedb6002cbb98bfa3aebbdd4b2dd7147668bd3481224b8371019ebcc551acb9f121f5b20df101e244736741d5f04f962d80a8a7590d99b07d214a50e04510d869bcebe5bf330cb52cf717edfa8bd4847971cf46b7a4b12346082c2c7e0ada0965d0ece6c657cb0cb6cf181471bde249501a05d79a1fa0b23039b9f8d25c74a45caabd07e0766110fc1faf3a5b40b0066fe24d364547d17e0a84b0b0d95057efb6a3f6689fea2b51d9a89a63ce98182c177305546d4ab3e424629a4660a68d500192b0e073ddd75577723cf68d1128ad7574ffb38dbef031363e82067d845037f216af9e1037d3476c38d883a962516e1d1e46863176f8f362f7ae3a012a5bf0c13c769f716c3dc52f96d98ade19bc3d5cb8d15e4f25fd7be73d26bcccb270545261b777b7b70857928e3f1b4045a3ddda103bb1f9c39367e7e9c70872fdb94b2c31be2e62eefe16312b6683d4b8ae92a2b73a55153990974a90e85c87c8e8b0382b48ef8573a611f3ee91b4d76f86e94201e722826a05e04da9c9bff1022e36074342e20c156a431009aba0c02cf9eb7693fc67831e7068faa9a4708eac736558ce105800be052af88223d58de03b03a1ebda21cd900d867b93cf28d7f354435db3945d1267c668a38fd30d4cc1f96acad792612edf149091cb07690654b4f6294d8d84901e3eb7b8194396f76416c235f4649de3cb96d58250cf98a160467c216b6b292f2610783ab6dfe1f23c3a365f31934a5630b02e6825c54c97ac5392aa08c82ac085a8b739076d23f4948d999b71d8f33e51039318367eb9b23f6a7cd897e801489c6fbf70cb38451231b268f314a429a7609e97f8f1f08c108ac1c3137a8320236c8702a4a254b18ba7f97068c76710ade94435f67e337a5eed6068f55fda01d0f7271392ab57068419880b37c3f513844c5a63a4ebafa030985bc6bfea9befa5d5d7646a6a8be5d0f563a86279bbaa9fc3ad601287aefc5330fce72804c7fcb1e15b3314ae91e067f990e48ed29079889cb44593a840d6ba4cb45d638b2e6cc31a06926d88a559c176560587ef82e79ff000a040f552a9c66b24e87683c2dc719c394c78ca1ce74f43dd762c5d04881f1d608912ae75780a2e43ad4bb39ede0e3a6522b8be866bc9f7adf6439a7ca5f2160d2af43f847e5ca20a02894efb87f37f8742251eb40319ea31711c66a82fe49ff8a887f4749a96704ed0bf89562b129ac3863623032ad8015c0e39b2ebff5de88c2ddc3f707e6dc904b251c7168d0e29d0716cb33ae5eab33fc0e06d07911a2899a542adebacaa51f3a846e9cdc8b6fc34342abd6e9d698bc48d74de5d7addcc06f2daee4f348463a81ad2b79ee53b271e4e486f1f67e513791820079a4ba2c3d539c4c9860177413ba2cbb154fa8f68da6e1f43994304f043f790459743d7063a103e0e48d253acfcea01ef72a167f8eaaa54b1840f2d2bb3e2f8ce9480d1ac84bb605e580d1252f5c9404e1aa495a59904483b7678cc4d8bf731a8c5f6a476ede4f5c5fb585534b28588b4c95bba92b7c8226d35e93342cb57800def09c6bb37a085bc061829ec2025e5ba5a63c6b508236f67e676c6c0b69e0242c3ac6bd2b93bbbfb6f9ac73ba4061701ddac7506afc53539637502fb5290242aca54ee11bcc8b7db7ced2545300858d86e502d89e80a708ae2fdde6ccae6cceb13652945942d262b397f283b4ee7f7f80e80d764cebb953f9fcaec8de410e239c84cde7f6d04bc8bd97e55444c5e680a196b42c9e767d52bd00a2aff5c7366c0c4e946d9cc8f49bef3d9eb50b6f3e23c353d19ae6a948ac9f419c7b7555f63bd8a02650558fd3203bde7401a12dbf7c0bb56623cef996a023ebff6b337849be0c7a34159fb3cac890220b570aaef7260159910ab195b6d9b1fbcdebb4a3a651b9c595bbb4308cd8506fc44583b41eb52da60a5fff7b07a9293556bf7524c72ea326981373e1ad1e4dff0950a7df2419110d1e6c0f571ea426739827531f7a083a7dfc28221a3f4c9b7b0171cfddbef1fb2bdc47553e2076bee86d766b64c6ef0f842e01632600ee515897e2d3d7a701316260ffccbca5029461180743978aad3f75c796392aafb15ae0d93e14035d7f8c6c666aee0c9b7e771c421806000000f9b2393dc7f2a15b0c88a5c1ea7936d34997be57b1df73cb84c6d55bfee2bc23fa7030920914572c9fe888d7fd22806f2a2c5398343916c417b930ab93232e3821c6e2a3766a462e003a60132660335f0eb570c2bb80a7045dc1720f9783b2519b3ec9f4abb5769b67d7369cb955c29e7bcf442365597ba51943427f84b24007fd3bc1d2abb580ec2afb5a851b0929a9d9ed80f1a3b8646485dac4e1101a5966ef65e9fab1797c548d4b5eb37e59acc7e44fabaac69ef52627a4a54013aa2eb02e2748de9de2989790c15e99e1d740872f78cac06c964bf07e84991d7f55bec5a34cb4fbd33cf7208b92887c5bfc41d5d347a507a78806c632928c7e45e8ab0f8e6716460924db4e82397a8a524d4a709518d6e0529cfb8bd7bd6ffa508bea6e72705648e70a9b2c179a834127637649894a7d623c0c08e77d7dc36751294db166180b461df3f71349687f02d35bfc0d747179a4cfcfd5f2801af4bc032ea2ea56da5dcc40ada2220d8ab01c9741fc767d7ed5a776a56cd8b09df36115dd0452acf1af909b3ea2209ab33879b7838e45e3ad442f4cbd635e8fcc4156a4d829678dfd65c835230d0220a46e81f5879031ce900741fb0a0a8605220238647a26977b88f6fd638c33b0da57307e91836e7433e345fd0c44ecf7970cc808c2162d00fc543ff864922e5f645f31b76a7e5f0e6c8371ce0de15a642fe2b12ca3cf956828f2c1ad4125842fe94dd1d819c1ea1cdf6c250842350122a3c059f0493bbac0dcfadfc6f0d6cafdeefebe5d4ca823cc42491758794a6a9af9d43f86d6887c7e8d21d42ba1b2d39f21a72d74c231028bc56a758bd38a5a62025604fe79f09e145ec83c309d820913ba73bb4fc98bd464d4cf103015d86cfad4ecfe7ba15d175774603bf9a1e7d5ffe2c4f52000fd65db50cf721fbbc7913a54d4c9fd9170b339d807db7d50129f07d5fa126f56df393d8dee30c4d6eb94cfbda04c72deb6f99fc8524359dd39d4ff32ba989da03bb1c351d04a947833d1571634d3d598d5c5edf8e6244dfa4805bfa24c8db532be71b31c067de9d2511320021cc4963c4442411f877d23478936e7b08e9530864f9679b8d49c884c374bacd38f8de48248711943274ac3eadbcc92e3015a3ac789c3ea38a6bb4b5d3cc64881871b3db30a6ec618f644eb94716af52d81509b83175e4fc632287cb4a8790631ad25725d6d4090f3dbb17fafaed8dc9b7ce1574bc360342d35a88fcc980ee0d1296d0cdbb3fa46d42b17f83ab1048394572e26de18a6a0a2f2c27705aace537da52c47494eaeccf45c158f12b2dbdba2d3d69d598d7e28aa51abab4c26051f22c9b7169ef9c98f18eaba7172780ea15a6ebde3b4da2d761e9df9b9fe4912849c261cad7aa48e5db40161e6d21b536b0b4eae8a59261aa99b3d15700a5da6948be4a3318d5c25ffb081ceb30cdf5433c5709db91dcc7611d127c9782e52103c09b2c7faabace71101aed1001580e72604040e1f67f1bca5fc89245607d7d2639eac091c51f58abf3e789f1fde8b9b2aa8b33e86bb1219da498ac452cceed599d8deadf36a187f864187e06aa139d7635303863f34478a20fc19e4f4b4b1d07c9df56c2a9206833d2f0dfb98659695fcdf8f87bab4fe7f3d90eb849142a44cfd57d3de04ae5a9634c2761e642ad949effaaa1aee784cda74376ae7e81a6c15de32a7d14f8df7a2dcf0e782482e5639c092dd1980a2735e9e5f2b1bd049de72e28e59e95e41cb32a2d65a979ba0d448f44992461cabe080df2c27201cae6455a83fd44c3ffb0c60f3b077ba6583a9657c8f9d54fa11c799b2ede146a59ea0af12d412e37c70176c3e631480a6b2025142f6a24f67365c7038e24c434a34e6d8fd5bd7daa6f42ddfb58b20d61e9e0c9675b02b613cdbc2a9a6c6bc617271f2ac9d476c4a7bcafe543862c7ad0cdb02d685276e0e34430092385ade102da1ede5abf2e1e80887a561cd4747dccfcf8043b47df72f34b455f0bce4ea5958d52cdf1784fbcc3bc523c49834d4d61802ac9a797ea86e611c0caa5d66ebf2d20da3df7b9d201d775bb9ddcd50e9e99796f249b05a13fe1d12ee2bd01298774ee13c370aea00eaf4339cba32ca772156824d610739bd7fc877e76cd494d9d29c466f27c69a75d9906b5aac743b42b732cf8940f87c1572a5cd79340b455c22078b89c9874546b3f2f3da56b19da853b81dac42098092f697579d8d8bdc5dce7ef0c487ee681350ca0bc5ba2539433316658852e34c0a6233cd0fd77af2a091e07c45a1215a535def9800c25a2795706d51c525720ee4266748400f255f2b3fdb5feb3fe038ee68b9e12a1294e72cde25ad8914e9af010050fa444a5d566b9ee3a6a2e4e1608714812a3e152f34840938e4705787cbc381884ddefd0e22f927a307d84572572c9bd87a9c93ebe44755930be506337e235f51bfe6d9dae734de9d48676ee6a6c73e9023962c5bdd0d31dce57739169972bb9ef2b3da4a1e4e0863fbce4bb9da18c22e230e1c2e4bb3fefc75905f2170b56452dd45cee4df232bd19fe468729f4d65ff9c6f5625f3c7a5505dc5495c655ec655665279b6dab9799c97908ed1636af13763894064e573f86e5339b9d081af2ffbaa8ba4e74c595fe17bbb3091b899495d402d0288f1ac06158ae2c230bfae4d6eb632b3af3b6d3a0e0e01d7fa84f160967eb47f733ae0d1f1ddb02e368288559f165162f4e12a76130c86876fc189a8f57dd9f9d67d2d8a3df03d18950dc2c4ef43d3e4dcec96f67b2a910d9ef4153e4c86a2175fd5cf0502a6dd5e6de06660dfd90dd4edc9f574f780f090f914662ce6f620b42121c303ae1e8b1becddd5fcf2e61452104537660609c48fbd1166ad80c1b74c0e1ff4670b109547c9614979de3bcd6b82193d850ca0550bfc428d2cc00cba131fcdc8bbc1c4e895c3859d6b78edc83f68be6a16878d94cf345d4f53faf22084432dfc0ffbe9f998c88fe233681fcf5ed5feb8103edd7aff54465a409b565700e02a038c2e6bbbb4aca72a3e16d6753847e414b3929e5369077673a041490de450588f2658bb510b588c7a88b7340c405cd0085ec1b6659e9749d3526165d1769b381440dc9b5e6ad47a23bd75045453ec760f0d0682915f72e96bb15cef052d0c715c04484050031b831b7d868c96c7768a15ab64eee5be965c49204109eca1bb1aa81b228195da40fb15b6262b417e0b8cbfa7aa50036d7b74f2562730fc4486954f6f4b76604c5b8193eefb5ebfc66e0746986ee356089ff6fd22f47aceb950ca6fd190f1a7888920a1deeca6e694fa53ab66042833cf3b8bc42e8e0e19231d76ef46fbfec6aa69a618ee46adcc6aa292355f0ac934b272e1511056763bfeaa88fdb486ce3d9c6f09aaad1de0253bdf0455d52c13d32d53fa0d2adb862d9a2b0f8a651157e8afce663dc80793c2c3be", 0x1000)
syz_open_dev$hidraw(&(0x7f0000008e80), 0x5, 0x200)
read$hidraw(r0, 0x0, 0x0)


write$hidraw(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r1, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
ioctl$HIDIOCGRDESC(r0, 0x40305829, &(0x7f00000002c0)={0xd, "3a82000000130000000000009d"})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
r1 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x80801)
ioctl$EVIOCSKEYCODE_V2(r1, 0xc0189436, 0x0)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x200)


syz_open_dev$hidraw(0x0, 0x0, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000280)=ANY=[@ANYBLOB="12010000eafa7240936901b0293df400100109021b000124a800800904000001030000000905", @ANYRES16], 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGBITKEY(r0, 0x80404521, &(0x7f0000000340)=""/4096)


syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x6, 0x105141)
ioctl$EVIOCGMASK(r0, 0x4020940d, &(0x7f00000003c0)={0x4, 0x0, 0x0})


r0 = syz_usb_connect(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82300000904000002ca744d00090503034d00ff99090805848faa"], &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, &(0x7f0000000200)=""/128, 0x80)
write$char_usb(r1, &(0x7f0000000280)="d7", 0x1)
syz_usb_disconnect(r0)


syz_usb_connect$uac1(0x1, 0x71, &(0x7f00000001c0)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000001ac0)={0x0, 0x0, 0xf, &(0x7f0000001840)={0x5, 0xf, 0xf, 0x1, [@ss_cap={0xa, 0x10, 0x3, 0x0, 0x0, 0x0, 0x0, 0x8}]}, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


r0 = syz_open_dev$evdev(&(0x7f0000000200), 0x200, 0x0)
ioctl$EVIOCSCLOCKID(r0, 0x40084504, &(0x7f0000ffcffc))


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000001c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0xb05, 0x1807, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x40}}}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550e, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x80086601, &(0x7f0000000040))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0xeef, 0x72d0, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000140)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@global=@item_4={0x3, 0x1, 0xa, "6652f890"}]}}, 0x0}, 0x0)


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x12, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000100)=ANY=[@ANYBLOB="1201ec00a84122104f100400af8f0102030109022400010000000009040000025597dc000905083a000000000009058702"], 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000100)=ANY=[@ANYBLOB="12010000d5e9bd40eb030200c0ba050000010902115c01000000000904000001b504b100090581"], 0x0)


syz_usb_connect(0x0, 0x66f, &(0x7f00000006c0)=ANY=[@ANYBLOB="12010000a797e620cd061a01f8320102030109025d0601000000000904"], 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB, @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000140)={0x17, 0x0, 0x0})


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000180)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x2, 0x0}]})


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCSCLOCKID(r0, 0x2, &(0x7f00000002c0))


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000d1d7a440041601801f44010203010902120001000000000904"], 0x0)


syz_usb_connect(0x0, 0x27, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x6b, 0x53, 0xd3, 0x20, 0xf98, 0x201, 0x7b1b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x15, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xea, 0xea, 0x1c, 0x0, [@generic={0x3, 0x9, "f0"}]}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCSKEYCODE(r1, 0x40084504, &(0x7f0000000240))
r2 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r2, 0x40084504, &(0x7f0000000140)=""/148)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0xc0189436, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, "c889190700000000000000f36926832bc7464cde460ba989075e2f211c806179"})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x4f, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_open_dev$evdev(&(0x7f0000001900), 0x0, 0x6002)
ioctl$EVIOCRMFF(r0, 0x40044581, &(0x7f0000000000)=0x8001)
ioctl$EVIOCGMTSLOTS(r0, 0x8040450a, &(0x7f0000000040))


syz_usb_connect(0x0, 0x36, &(0x7f0000000780)={{0x12, 0x1, 0x0, 0xf6, 0xb3, 0xe0, 0x40, 0xdf6, 0x4b, 0x56d7, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x3f, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x66, 0x87, 0xca, 0x0, [@uac_control], [{{0x9, 0x5, 0x5}}]}}]}}]}}, 0x0)


syz_usb_connect(0x0, 0x10b, &(0x7f0000000100)=ANY=[@ANYBLOB="12010000b24b4a10e60407007501000005010902240001010000000904000002ccb8280009050b02000000000009058a02"], 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x10, 0x8, 0x5c, 0x40, 0x4b4, 0x1002, 0x6ab1, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xf3, 0x97, 0xa}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$uac1(0x6, 0x71, &(0x7f0000000180)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x5509, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x48, 0x2}, @period={0x0, 0x0, 0x0, 0x0, 0x0, {}, 0x0, 0x0}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000140)={0x0, 0x0, 0x5, &(0x7f0000000040)=ANY=[@ANYBLOB="020f"]})


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCSKEYCODE(r1, 0x40084504, &(0x7f0000000240)=[0xffffffff])


r0 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCSREP(r0, 0x80086601, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000002c0), 0x1, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000300)={0x0, 0x0, &(0x7f0000000200)})
r1 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r1, 0x80104592, &(0x7f00000001c0)={0xff, 0x0, 0x0, 0x8, "99f9ffffdf7f00000020000000000900000000001000"})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, 0x0, 0x0, &(0x7f0000000540)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "b6954b00"}, @main=@item_4={0x3, 0x0, 0x0, "7d223e18"}, @local=@item_4]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGPHYS(r1, 0x541b, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGPROP(r0, 0x80404509, &(0x7f0000000100)=""/176)


r0 = syz_open_dev$evdev(&(0x7f0000001900), 0x0, 0x0)
read$hidraw(r0, &(0x7f00000000c0)=""/222, 0xde)
ioctl$EVIOCREVOKE(r0, 0x40044591, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x41e, 0x2801, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000140)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@global=@item_4={0x3, 0x1, 0x0, "6652f890"}]}}, 0x0}, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x5509, &(0x7f0000000400))


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b02, 0x0)
syz_usb_control_io$printer(r0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x56a, 0xb0, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x25, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}, {{}, [{}]}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000d40)={0x2c, &(0x7f0000000780)=ANY=[@ANYBLOB='\x00\x00\a'], 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="120100008010bd40820514009dbb0000000109022400011b00000009040000022a3e740009058bff7f0000100109050b"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f00000001c0)={0x0, 0x0, 0x8cc4, 0x24e9, "00f8ffffffffffffff0000f62386f0dfdf293700"})


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000000000406d04171000000000000109022400010000000009040000000e"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x49, 0x2}})
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x2778)
ioctl$EVIOCGSW(r0, 0x8040451b, &(0x7f0000000100)=""/4096)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000700000040"], 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
write$hidraw(r0, 0x0, 0x0)
write$hidraw(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x3b, 0xf, 0x33, 0x8, 0x10b8, 0xbb9, 0x2f2a, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x20, 0xb6, 0x3}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001900), 0x0, 0x0)
read$hidraw(r0, &(0x7f00000000c0)=""/222, 0xde)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x74, 0x2}})
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x1068)
r2 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r2, 0x5460, 0x0)
r3 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
ioctl$EVIOCRMFF(r3, 0x41015500, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x56a, 0x331, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000300)={0x24, 0x0, 0x0, &(0x7f0000000100)={0x0, 0x22, 0x7, {[@global=@item_012={0x1, 0x1, 0x0, "ed"}, @local=@item_4={0x3, 0x2, 0x0, "884649c0"}]}}, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
ioctl$EVIOCGSW(r0, 0x80404531, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000020ac050f0222000182830109022400010100000009040000020301020009210005000122000009058103"], 0x0)
syz_open_dev$evdev(&(0x7f0000001180), 0x75c, 0x0)
ioctl$EVIOCGEFFECTS(0xffffffffffffffff, 0x80044584, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCGBITSND(0xffffffffffffffff, 0x80404532, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


syz_usb_connect(0x2, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82200000904000002ca744d00090503034d00ff99090805848f"], &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, &(0x7f0000000080)="1a", 0x1)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCREVOKE(r0, 0x40044591, &(0x7f0000000040)=0x1000)
ioctl$EVIOCGSND(r0, 0x8040451a, &(0x7f0000000100)=""/182)
syz_usb_connect(0x0, 0x2d, &(0x7f0000000100)=ANY=[@ANYBLOB="120100001ddf8208c007121522300000000109021b0001000000010904010001faf40d0009058203"], 0x0)
r1 = syz_open_dev$evdev(0x0, 0x2, 0x0)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCSKEYCODE(r0, 0x40084504, &(0x7f0000000000)=[0x5, 0x5])


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x2, 0xfffffffffffffffc)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1e7d, 0x2d51, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000380)={0x2c, &(0x7f0000000080)={0x0, 0x0, 0x7, {0x7, 0x0, "c827831962"}}, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCSKEYCODE(r1, 0x40084504, &(0x7f0000000240))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000440)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES16], 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xa, {[@global=@item_4={0x3, 0x1, 0x0, "9b4d3948"}, @local=@item_4={0x3, 0x2, 0x0, "6d011fe4"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000d40), 0x0, 0x0)
ioctl$HIDIOCGREPORT(r1, 0x400c4802, &(0x7f00000004c0)={0x3})


syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x4020940d, &(0x7f0000000040)={0x4, 0x0, 0x0})


r0 = syz_usb_connect$hid(0x0, 0x36, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000080)={0x0, 0x22, 0x1, {[@main]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGREPORT(r1, 0x400c4807, &(0x7f00000000c0)={0x2, 0x2, 0x8})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1e7d, 0x2db4, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x2, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})
syz_usb_disconnect(r0)


r0 = syz_open_dev$evdev(&(0x7f0000001500), 0x361, 0x1c9962)
ioctl$EVIOCGEFFECTS(r0, 0xc0189436, &(0x7f00000039c0)=""/4096)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
r1 = syz_open_dev$hidraw(&(0x7f0000000100), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r1, 0x40086602, &(0x7f0000000040)={0x70, "667f40bd32eb094bd8c9871638606d393b1508374a664d0e50edce2b854e1aa2eaa352c2dc13503b7a33bfcf29ff7f00005bceb207052b537147d90f6b20615f67a62d63237444c3a3b4"})
ioctl$EVIOCGREP(0xffffffffffffffff, 0x40044591, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x4030582a, &(0x7f0000000280)={0xd, "50ef33490271c1de6f2522669d"})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, 0x0, 0x39)


r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
ioctl$HIDIOCINITREPORT(r0, 0x40004580, 0x0)


syz_usb_connect(0x0, 0x53, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x1f, 0x1d, 0x64, 0x20, 0x13e5, 0x1, 0xa1f5, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x41, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x8, 0xc8, 0x67, 0x0, [@uac_control={{}, [@mixer_unit={0x5, 0x24, 0x4, 0x6}, @processing_unit={0x7}, @mixer_unit={0x5}, @input_terminal={0xc}, @feature_unit={0x9, 0x24, 0x6, 0x0, 0x0, 0x1, [0x0]}]}]}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x44f, 0xb65d, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000300)={0x24, 0x0, 0x0, &(0x7f0000000280)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "f234d324"}]}}, 0x0}, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
ioctl$HIDIOCGRDESC(r0, 0x40305828, &(0x7f0000000040)={0xe, "3a82000000010000000000009d4b"})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x4d, 0x2}, @rumble})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


syz_usb_connect(0x0, 0x24, &(0x7f0000001040)={{0x12, 0x1, 0x0, 0x5e, 0xa5, 0xd1, 0x20, 0x545, 0x8002, 0x30a, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xd0, 0xdb, 0x94}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x17ef, 0x6047, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCSFLAG(r1, 0x4004480f, &(0x7f0000000000)=0x1)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x5ac, 0x269, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x4, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000300)={0x2c, &(0x7f0000000000)=ANY=[@ANYBLOB='\x00\x00\f'], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000280), 0x0, 0x800)
read$hiddev(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x4, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085504, &(0x7f0000000400))


syz_usb_connect$uac1(0x0, 0x9e, &(0x7f00000004c0)=ANY=[@ANYBLOB="12010000000000406b1d010140000102030109028c0003010000000904000000010100000a24010000000201021124060000050000000000000000000000072408000000000b240700000000a5cfe3b00a240804"], 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xff, 0xf6, 0xaf, 0x20, 0x2639, 0x301, 0xe153, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xe, 0x39, 0x97, 0x0, [], [{{0x9, 0x5, 0x8}}]}}]}}]}}, 0x0)


syz_open_dev$evdev(0x0, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40104593, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x18000, "ffffd130be30000fb700000000be6f3380000000000003000000000000008000"})


syz_open_dev$evdev(0x0, 0x0, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="05010000000000107d1e222e00000000000109022400010000000009040000020300020009210000000122020009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000300)={0x2c, &(0x7f00000004c0)=ANY=[@ANYBLOB="000002"], 0x0, 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000740)={0x84, &(0x7f00000002c0)=ANY=[@ANYBLOB='@\x008'], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
ioctl$EVIOCSMASK(0xffffffffffffffff, 0x40104593, 0x0)
ioctl$EVIOCSKEYCODE_V2(0xffffffffffffffff, 0x80104592, 0x0)
syz_usb_connect$cdc_ecm(0x0, 0x0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001780), 0x0, 0x0)
ioctl$EVIOCGKEY(r0, 0x80404518, 0xfffffffffffffffe)


r0 = syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)
ioctl$EVIOCGBITSND(r0, 0x80404532, &(0x7f0000000100)=""/142)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550d, 0x0)


syz_usb_connect$hid(0x0, 0x36, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x2, {[@main=@item_012={0x1, 0x0, 0x0, "9f"}]}}, 0x0}, 0x0)
r0 = syz_open_dev$hiddev(&(0x7f0000000d40), 0x0, 0x0)
ioctl$HIDIOCGREPORT(r0, 0x4802, &(0x7f00000004c0)={0x3})


syz_usb_connect(0x0, 0x24, &(0x7f0000000440)={{0x12, 0x1, 0x0, 0xab, 0xbe, 0x67, 0x40, 0x17e9, 0x8b4e, 0x9c08, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0xffffffffffffff9a, &(0x7f0000000040)=ANY=[@ANYRES16=0x0], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)=ANY=[@ANYBLOB="00220f0000000b"], 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0xc038480a, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x80086601, 0x0)


r0 = syz_usb_connect_ath9k(0x3, 0x5a, &(0x7f0000000000)={{0x12, 0x1, 0x200, 0xff, 0xff, 0xff, 0x40, 0xcf3, 0x9271, 0x108, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x48}}]}}, 0x0)
syz_usb_ep_read(r0, 0x4, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x44, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x16, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[{{0x9, 0x5, 0x81, 0x3, 0x10}}], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x3, 0x862b01)
ioctl$EVIOCGMASK(0xffffffffffffffff, 0x80104592, &(0x7f0000000200)={0x2, 0x1, &(0x7f0000000180)="8a"})
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x16, 0xca, 0xe1, 0x8, 0x5ac, 0x24c, 0x9dc3, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x99, 0x2}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "f896e404"}, @local=@item_012={0x1, 0x2, 0x0, 'e'}, @main=@item_012={0x2, 0x0, 0x9, "f792"}, @main=@item_4={0x3, 0x0, 0x0, "9ef12d19"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0xc038480a, &(0x7f00000000c0)={0x2})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
ioctl$HIDIOCGRDESC(r0, 0x4030582b, &(0x7f0000000040)={0xd, "3a82000000010000000000009d"})


ioctl$EVIOCSKEYCODE_V2(0xffffffffffffffff, 0xc0189436, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, "c889190700000000000000f36926832bc7464cde460ba989075e2f211c806179"})
r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x401c5820, &(0x7f00000001c0))


r0 = syz_open_dev$evdev(&(0x7f0000001d80), 0x0, 0x0)
ioctl$EVIOCGVERSION(r0, 0x80044501, &(0x7f0000000040)=""/107)


r0 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCGREP(r0, 0x40044591, 0x0)
ioctl$EVIOCGBITSND(r0, 0x80404532, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x17ef, 0x6009, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000001440)={0x24, 0x0, 0x0, &(0x7f00000013c0)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "741cb976"}]}}, 0x0}, 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000000040), 0x0, 0x14a442)
r2 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r2, 0x541b, 0x0)
write$hidraw(r1, &(0x7f0000003000)=')', 0x1)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x53, 0x2}, @rumble})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0x2c, &(0x7f0000000040)=ANY=[@ANYBLOB="00000f00000009003d140f"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000300), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0x40184810, &(0x7f00000000c0)={0x2, 0xffffffff, 0x0, 0x0, 0x0, 0x2})


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000100)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{}, [{}]}}}]}}]}}, &(0x7f0000000300)={0x0, 0x0, 0x5, &(0x7f0000000040)={0x5, 0xf, 0xff2a, 0x0, [@wireless, @ext_cap]}, 0x1, [{0x0, 0x0}]})


r0 = syz_open_dev$evdev(&(0x7f0000001240), 0x0, 0x0)
ioctl$EVIOCSKEYCODE(r0, 0x40084503, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x80811501, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1e7d, 0x31ce, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0xb}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000200)={0x24, 0x0, 0x0, &(0x7f0000000240)=ANY=[@ANYBLOB='\x00\"\v'], 0x0}, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
write$hidraw(r0, &(0x7f0000000180)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3fc6", 0x1000)
r1 = syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x200)
write$hidraw(r0, &(0x7f0000000140)="d5", 0x1)
read$hidraw(r1, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)=ANY=[@ANYBLOB="1201000000000040ac054382408b0b00000109022400010000002009040000fd0301000009210000000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000080)={0x0, 0x22, 0x1, {[@main]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCSREPORT(r1, 0x81044804, &(0x7f0000000040)={0x1})


syz_usb_connect$uac1(0x5, 0xd8, 0x0, &(0x7f0000000940)={0xa, &(0x7f0000000600)={0xa, 0x6, 0x200, 0x80, 0x4, 0x20, 0x20, 0x7}, 0x5e, &(0x7f0000000640)={0x5, 0xf, 0x5e, 0x5, [@ssp_cap={0x20, 0x10, 0xa, 0x0, 0x5, 0x40, 0xf00, 0x100, [0x6000, 0xff000f, 0x0, 0x0, 0x30]}, @ss_cap={0xa, 0x10, 0x3, 0x2, 0x0, 0x7, 0x9, 0x7}, @ssp_cap={0x14, 0x10, 0xa, 0x1f, 0x2, 0x5, 0xf, 0xfff, [0x0, 0xff08]}, @ss_container_id={0x14, 0x10, 0x4, 0x0, "2662cf94070ee8989bd86cc44b8f0f70"}, @ext_cap={0x7, 0x10, 0x2, 0x2, 0x8, 0x3, 0x8000}]}, 0x3, [{0x4, &(0x7f00000006c0)=@lang_id={0x4, 0x3, 0x444}}, {0x8b, &(0x7f0000000700)=@string={0x8b, 0x3, "3f22eafd5545058a2be0a0a724480050b0fabceade48c0ecf0045c8e3e47e78f50fab97429179d5f63fbd3b00a8b27ff4f38355db4d2ea6127be28441310723ab6bda3eb6adbbbd8ca19f359c3a21cc25ad28a0297ff8cf703d34fa0af5d65ddadda7f44f8e23b4876ea9f4d3552ac9e5b1925eda2f72b0de4309e026a9318200c2094c49b9c775d92"}}, {0xc1, &(0x7f00000007c0)=@string={0xc1, 0x3, "b0c9e189cc46bdddd9cb5fd2b24db6130bb01cf5ebdd56fac5212ff34529202bce5ee345e1da90e37cf84ada3cf315c92129c867c39c0045ed94423902dfc2d005e762921ae954b247a37df8a916a2a01180708537bfa1cfb2bc6e8c9581584602f951bf348459734022e6a06f48f1c9282dc0e0b26cf2de6685fa177413e2316e69e8a7fe021ff38015cf01c4d4c3906aeb08b96657bcdd85b5c31c9367da73a7265024d3bbe155510b8ec27f90eb29879a7d6af887bc81889b8650329826"}}]})
syz_usb_disconnect(0xffffffffffffffff)
syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
r0 = syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCSFF(r0, 0x40304580, 0x0)
r1 = syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
syz_usb_ep_write$ath9k_ep2(0xffffffffffffffff, 0x83, 0x0, 0x0)
syz_usb_ep_write$ath9k_ep1(0xffffffffffffffff, 0x82, 0xec0, &(0x7f0000003640)={[{0xdfc, 0x4e00, "628dee589b0c94ce9ab4f2db0afa3a8007e2297bd32b98a2051122e3999ed8649c3e921f061f43ae373694f972a28324d59204c76671fa430c61711081b17a0abd9b8f7f0fa193962fc892b809d5f5a1d293acc03fd35276bad9ebaf1cac155e32f99ffa937fb1e0f096cd451596b678c421e72a535f4cd63bd30fe1a3594caa95f01af791ea9dd60f29dd699f824b32927e5aad3cc62579c78aa0dc117f16bc30489e94ba0ca1a52213e844932e218b112403aa018c94c7349cb0e3668d5a43c769b7e11ca38c864749c8e83b97725d815a648c87676cec6f07f9570723e7a45054d09c79d128469695f0141f0ba493dd99a4e55ad1434b525d327a952179124eb4d6231219aa59fe1beae993f39b70dbd88bcfc8977700e23e6dff786c1fdc8940d81d7a2aae70d7e3124ddaaf7dded7ee38851e71487408e931f0d15ebce16b677993d3b6ebf32b022f5e71ad76a81167585a04df67223df6f125f3d9efcb3b77566de84dbe636c38d5e2517cd68aba2dc70759aa988adfed537146bbb3cbbb90b028cd3ddaad0b79db808ea5950a445071bdc43a6f9befddc39ddb8fa30bf95d959f2e47d4c6bc3add674002b0e4acb3d8f02d6d65098567b88dc20c574122bc533b137644ed76e7f242e488e96a03d16dc9e0a0078d4c546815b7114dc94595c9c9cab8b1c08d9f457ed40bdfb6ec4ddb1c42307426d5d226d88f9708631f20aba12fd6f63d93a6ee81b16bc3b223ae8488d964dd22fd6da1e60040fb5f6993b2e04ce7b35cc7b2db5f445be029f92562346be712b06fafb0571ba06cd1a5f56b5282cc6efd1079af990dde07d1c19748e0491af3ba7f1e4557941bfbd83500c0f36699ad02d62fd2fb202e59f7ad64f32a9880efc4748e1b6e8129e073c84c4ccd279d58cfc074e7c514cae0885ec057179ac781e70e7d895c3108af53a6d4d0805e58d09f648295cd73d130759ea61ed57281be8f9fa7eaf9145561e2dd4cae6b6fde4e559f52accd2479841b6209e7a79b604c7fc085c9959c31327e5a1dc8feb47e7617b62abd07970344e0479f42068fef32794b2e7b57feeac70fff8be19529e3d59639bd17b906b022198723caac2dc39d2bda4389540e0cae8b1cdb90a02c66eb246bc556dd7e739d7cbacd203947b8d42363c00ae2822bff307ec9c1db06942da84560c3d063c033496a49dac623c517c05a43f345bb87f1558697ba5bfaf474f2d66908d84a2f8bd6b1c1d0888bde998f41442f4a68400be33885826d5447f1c7a787dbf51efe08b86201d26a3f6f1261b8f265b923d0ab4bf0d7542bcd86bbacea3ddcfb0aa7b1c1bbff2ed560c67a0d9555a889a6639c6bedf513062230e42ac7748a3b76ebe01b0fa453071507682d43f2c4f388d74440ac1331b5e5750d58f89c05573b323c77ceccda8c38241966f47ac899963a0e3d3ddd3f714d64b5eeb32896bb5aafce74c3d77c0e038fec5afef24bd9c55ef244d6db5fde2272b11412cc866eddf777b15c89a4714134f43778d4da0deecb3ce3118497ebf2f97861c2a93fdeee62e2ec7d59e6030c7aba037a6b3853676b23536916ea0a9c1a03aa6749c4a4b07e3a14040a1f94bfd716715f78500819468ff56a63a11c703f3b60421da473a5486904089ab9ee5466f7617757dff7968acfadadcd535536ad6d22da688b72982ba16de339b89d45c1403acb2f957016067fe7c17bf2956e0ec9513d6751c8b7005b49d0e01fac8712f459b9e2e30b65565ea9685b727aa56dd729a22bb8d8be6960a300032bcc7c665c1cd682f9375fea86e34c9f4c39e409c37ee989473c6ac6b0ade45bb3b1f20bf355969d21ab11bb91acc07243ebed51e48badabbf6f524a7a544ebe82fa3f6ff27299db62229f0f75d2cf258759efb84bab6235db721132ab8482548a26da2472c4903e003dc8b572e2775fef10e6e110f4c091253f048a85aa7634c318cd3a45deeef19ed2b15e0a56ef94ab4cfaaafc8237b655ce7bb7c249c38b9de8d544629f204c00279ea479e7ceca76c082820f3dc13a2742b7c9d33af78894771a4e6ec765cdad47fa898128049d94b292bab417a3d307e0a0a2dd31a892047e73079d0b966cb2b908c3e394ce534d74f479d99ce095d801096d0fc1aa3bc6b1eab7ce8f1c07e4dca96ab4507a542ff9523713465ab27d04de11ab41e27a74ad005f1758d39113ed9f06329ea3f762d5e2d636baca20d99896f1d6a151137a58f3567f287e729f4846978b26688073686f4cc8eb0e9e329d55dc87449eaab2c07d091f3f252acf72ad2070e962c2c6d78a901325ec26fa5bec972936baf36df29fd33c0fe60b6bea9df6cf5b86d512e58a7f4e57aa745f7bca291066df9405ec80eee2745611c34499b89ad4ce5a7690e24ea625485f2aee898191f741b5ce823a3d8589a9f98e8fadda1c3d51e19230911ef9d61129725dc6349e85322432548aa82c148ff4dc21263098489449ae75bdaf68e0283adf570272c7eb2f8841a2daebda50f3bf8c6e8cd91bbc8efd90fd5f57d9a9f8a87a7c074bd7978a33f275ba9372df75ef23940a5ef86dbcd510e3bbc9b154c0255acbedfb50037ec2a4eae8d72fef9dad2b3eeef59a3ffdc48e1fc6ca7f2153364b5db1a63a8b2a944cf0cecc699e6d08d69288dbbfd3e4ebe397c9408709c96c6739395ecd54441573b8e27ee5aac30ed10c167e44eaa8fddf5b44845ee9bba67abe35174b1d1a09cf52557cf1e7320d97ff8a40fbe08c67edf5a5cb9945aa3284d576453d1d29588151b454123664499df3a3fc63085630c5b5644b31461bce3992193385937114fa5f6ad6fce7c7affc23bcd08a8e1ade702cff862a63cae4c258d8e87a5bd3323284b9204e892c28617efc7b5a96b6a548c425ea299c8b1b0243385f1312442374aa4bce1f3f76a5ef74068973db351fe6842f6eaa16b27dcfc437e790a100c15384ea59d447c173df638ed08f1e57cc369864c273108476af35acfeb6457ac37678d7955a9b283813f4c766f7b5e0598d9f28e213c351eb0cb4a3341da14e5998e4f6e0995a9adc5a06406cfc704ca7eb568f1839afca9ca668feed2096fe2256d3b1c3acdc5b5aefc155c03eb49e8d713f9e1d67008665abecdb12849a8a3e286bfec40abf6f79a431f5e72e9fdfef2f0be1c87afe5c5ea48a8acd321efbc4226588a05a185187b9d689e88e0b6884fa84dfb82da75b262ac64dcd376a2320cbf01be2fa80758e295c1072bd48b649c214d2604696e95716eb008d729b4b84288302ab73d2961aaaaf1e549d65f4d75305c945a43069e803b5a9a40ea6006774702d5b833a0a8f9ff5f4cf3024ebae3611335339fc82a0cbe1d555bcbea7996e11843e1367d011e43b0ec7ba45420ac1d5cfb03e5344a3b41b3f9efe82e9d3cf47339ac189b2747f16139ca2d9775e3c53ada7355a80f5a74702906f7a33f092d32dbf08d65df6db955d992c1d5a8c421c9e30e50013a7b8a168904c6848eb7e400c08bcd5085cf834c89be569856ed5f1ab85cdd42bd6b3de8fc33c73b452a8128ac7723d0d2e6bbb08cfb8da1bdb1f66e9e68915bfcc45eeeed406a81438d64f01d0a060b4f226b50648adc3d1111570bcb527a5b3f9639fa24e90e6a0379b7768e497aae0864abae76496c9cc10398cce34f18fa1cfadc4e2a4e80e79ee37bedcc52c6bdf650cbf8633332884709473f657bf4d1ecde777de6e2b03571d33f03181dc6624e41fc75d30eb7364ef5d94fcaddb9f82d2551d9b22d9ff68b7b50da4eff0b4f83317baa4b27a33a1cf57ffb6ddb074068239b6fea33ea29acae227ad82eff6cd5f58361892afc7466a89f695a0d08d4c0d2c879314dea7e1d00519b1f9cc0e38279b7f0760462cc2d58aa6221fcb0dc014119b68e53c33c21d52ccd244e068b185ddf44a8b34891b9fa8382bc3773ddab39669a82865b6fd71aed02a96c3e570ae8e9bae2d88a5368678756d69d127c660df01c78be23702f97c1435c774971f572a5987d1ed6fd6d0437c40e3e3f3098511d5881982928dee76c22c294c43165aa636ad71b0509273731677096d8892092c598cbd9e56484f61b055f1d834db472af8ea9cb6e14d193205507718c6bd3bcf1f55cfcb37f78d72603f6f8071b92d8274723887cdd7bcbd7f52481f599dfbe9c15fb3597a9b153f855b3917ba03a5782d862ec82951d0db157ae67e719798402e8bd5505660b57c0d35a808a09e31483a4e4e5921b2a2a6d5caf04c11b1ed42da1e05e22f00921dbd40e4a87d82402b25380bbfb492fe710074079e605e59cde085ba056779ad72d6f73399d434b549f302d546bf42408e4d010f96d78c284ff62cfa96d090b4c6847d2a3fd983be94a89cacf468a6d7eb2799039ba06ed336b91cc344a1c27ee3cd44b220f022744019a440f49d6dac555fa9b63f0b0f5c48950ab31ac58969c374e7186a227bb8be33a23a8070675ec01d657d978b2d3ee7cc467a825619e8919beae721f84d07f5b683ec999747311a419431ff214166f165185a4700bb0e7b99457fb4ffbfa9bb6ff492dbddeb2f8936dd1a873688ea1d569bb132204784c10d97574ca355b6609d5e7748b546be4a78ba10488a73f44ee45c284ec2d867af4ac4baa54444c9a8e78d993984fe5791d49bf158af00bbf62ac976ad5a7e75251e8263fe6bbfb68fe67234814cd203a30877c8c1e6e1603ddc08e07c2790485a3f46ce1dcefe27a85656722758a61b5ae9a44838ffee9559fd8dd9e0b685f8a4be6c7a6c4c264cc78144504c60416bb60e90bbf0fba4b8dde94209f6f20f1c7c7ff66c9cafba9df7f7c29359514b02de31deb83f30825aba86bf85728269e472f6e4da14b6e357976f9e33e959275d65d17ecedb2b7d5edc7dd0057e2686bdf4466ae074167be79c9ed6c8abda732e24f2a3b2074709ec03482c93e9242d4ddfdf28b54e627eb2e208f35293803a798a1184af1d09de25d5b66d2aae4042b91e89e83a8201b8f568c01de4f5257e96e60293e41442402e8a079ae614d59a3eddec0eb3ab5b5578e87d"}, {}, {0xa6, 0x4e00, "9c16a9b4d267727a1b7b52597b9b665097610b9f99d8da8b6d38e063289c217bf416c3697a909eb10d412a7c17845346bd2338419ccddefcca776b4e5d9914da1cddbf86206a0a17d98362317396fa9dd7a0bc4551c9583c5ce5f5994c50635d664354edea4f72cceba04ec232b0b68ead989e9548ebbd7dc49f550d2b6ee0118193170d8bc24100ddd09bf3003fe3ed08013118a6db3478213201d371e4416f0f71010dff24"}, {0x7, 0x4e00, "c7819fac9e1bfb"}, {}]})
syz_usb_ep_write$ath9k_ep2(0xffffffffffffffff, 0x83, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, &(0x7f0000001640)={0x2c, &(0x7f00000014c0)={0x0, 0x0, 0x2, {0x2, 0x11}}, &(0x7f0000001540)={0x0, 0x3, 0x4, @lang_id={0x4}}, &(0x7f0000001580)={0x0, 0xf, 0x18, {0x5, 0xf, 0x18, 0x2, [@ssp_cap={0xc, 0x10, 0xa, 0x61, 0x0, 0x0, 0x0, 0x4}, @ext_cap={0x7, 0x10, 0x2, 0x2, 0x0, 0x9, 0x8}]}}, 0x0, &(0x7f0000001600)={0x20, 0x2a, 0xc, {0xc, 0x2a, 0x0, 0x0, 0x1, 0x6e, 0xd1, 0x1f, 0x8000}}}, &(0x7f0000001ac0)={0x84, &(0x7f0000001680)={0x0, 0xc, 0x40, "4e55cf056b9334fd208a30aa330482ed104de62e90cc4a2f765582d844146352b6bdfa3a9e185263e8be1b0798e476238426f2bfb034ca27b1e3209c9a0c092a"}, &(0x7f0000001700)={0x0, 0xa, 0x1, 0x1}, &(0x7f0000001740)={0x0, 0x8, 0x1}, &(0x7f0000001780)={0x20, 0x0, 0x4, {0x1}}, 0x0, &(0x7f0000001800)={0x40, 0x7, 0x2, 0x4255}, &(0x7f0000001840)={0x40, 0x9, 0x1, 0x1f}, &(0x7f0000001880)={0x40, 0xb, 0x2, "9499"}, 0x0, &(0x7f0000001900)={0x40, 0x13, 0x6, @random="2f261936003c"}, &(0x7f0000001940)={0x40, 0x17, 0x6}, &(0x7f0000001980)={0x40, 0x19, 0x2, "6f83"}, &(0x7f00000019c0)={0x40, 0x1a, 0x2, 0x8}, &(0x7f0000001a00)={0x40, 0x1c, 0x1, 0x5e}, 0x0, &(0x7f0000001a80)={0x40, 0x21, 0x1, 0x5}})
ioctl$EVIOCGPHYS(r1, 0x80404507, &(0x7f00000046c0)=""/4096)
r2 = syz_open_dev$evdev(&(0x7f0000000200), 0x200, 0x0)
ioctl$EVIOCSCLOCKID(r2, 0x40084503, &(0x7f0000ffcffc))
ioctl$EVIOCSREP(r2, 0x40084503, &(0x7f00000001c0))
r3 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x822b01)
write$char_usb(r3, &(0x7f0000000040)="e2", 0x1b18)


syz_usb_connect(0x0, 0x24, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0xc, 0xed, 0x97, 0x8, 0xa2c, 0x8, 0x5858, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xb1, 0x76, 0x1c}}]}}]}}, 0x0)


ioctl$EVIOCSKEYCODE_V2(0xffffffffffffffff, 0x40284504, &(0x7f0000000000)={0x0, 0x2, 0x0, 0x0, "00001182062b650900eb4e00fb6a82714456862f9916ebff0005000000408000"})
r0 = syz_open_dev$evdev(&(0x7f0000001140), 0x2, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, &(0x7f0000000000)=""/7)


syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x40305829, &(0x7f00000001c0))


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="1201000000000040701700ff0000000000010902010001"], 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000740)=ANY=[@ANYBLOB="120100005ab05740450c088085e10000000109021200b8dc0000000904"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f00000007c0)={0x2c, &(0x7f0000000000)={0x0, 0x0, 0x1, "f6"}, 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x5450, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMTSLOTS(r0, 0x8040450a, &(0x7f0000000100)=""/160)


r0 = syz_open_dev$evdev(&(0x7f0000001300), 0x0, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x80084504, 0x0)


syz_usb_disconnect(0xffffffffffffffff)
syz_usb_connect(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82300000904000002ca744d00090503034d00ff99090805848f"], &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xff, 0x1e, 0xc6, 0x8, 0x2040, 0xd300, 0x32a3, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc5, 0xab, 0x39}}]}}]}}, 0x0)
write$char_usb(0xffffffffffffffff, 0x0, 0x0)
syz_usb_disconnect(0xffffffffffffffff)


r0 = syz_open_dev$evdev(&(0x7f0000000280), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000002d40)={0x17, 0x0, 0x0})


syz_open_dev$hiddev(0x0, 0x0, 0x0)
syz_usb_connect$uac1(0x0, 0x93, &(0x7f0000000480)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x81, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@mixer_unit={0x5, 0x24, 0x4, 0x2, 0x9}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0xc, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, 'b', "af08de"}, @format_type_i_continuous={0xa, 0x24, 0x2, 0x1, 0x6, 0x0, 0x0, 0x0, "ee61"}, @as_header={0x7}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7, 0x25, 0x1, 0x80}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x400, 0x0, 0x0, 0xff, {0x7, 0x25, 0x1, 0x81}}}}}}}]}}, 0x0)
ioctl$HIDIOCGFLAG(0xffffffffffffffff, 0x8004480e, 0x0)
syz_open_dev$evdev(0x0, 0xffff, 0x0)
write$char_usb(0xffffffffffffffff, 0x0, 0x0)


syz_usb_connect$hid(0x1, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1477, 0x100e, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x8}}}}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010800000000401c1b341b000000000001090224000100000000090400000103000000092100000001220700090581030000"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000005c0)={0x24, 0x0, 0x0, &(0x7f0000000080)={0x0, 0x22, 0x7, {[@global=@item_4={0x3, 0x1, 0x0, "00ea8e00"}, @local=@item_012={0x1, 0x2, 0x0, '\x00'}]}}, 0x0}, 0x0)


r0 = syz_usb_connect(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82200000904000002ca744d00090503034d00ff99090805848f"], &(0x7f00000007c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_disconnect(r0)
read$char_usb(r1, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000001340)={{0x12, 0x1, 0x310, 0x94, 0xc5, 0xdd, 0x8, 0x4e6, 0x101, 0x200, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc6, 0xa5, 0xc7}}]}}]}}, &(0x7f0000001c80)={0x0, 0x0, 0x25, &(0x7f0000000000)=ANY=[@ANYBLOB="050f250014101e00edeaec2afcb2a5ff2ff5a421bc51"]})


syz_usb_connect(0x0, 0x24, &(0x7f0000000680)={{0x12, 0x1, 0x0, 0x7e, 0x14, 0x84, 0x10, 0xfc5, 0x1227, 0xd910, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x43, 0x4f, 0x6c}}]}}]}}, 0x0)


syz_usb_connect$printer(0x2, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0})
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x22, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x35, &(0x7f00000009c0)={{0x12, 0x1, 0x0, 0x87, 0xa0, 0x4f, 0x40, 0x403, 0xcff8, 0xee3e, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x23, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x43, 0x5d, 0x20, 0x0, [], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@generic={0x8, 0xb, "00729d5c5ba1"}]}}]}}]}}]}}, 0x0)


r0 = syz_usb_connect(0x0, 0x10b, &(0x7f0000000000)=ANY=[@ANYBLOB="05010900b24b6a10e6040300770100000001090224000b010000000904000302ccd4280009050b02000000040009058a02"], 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0x2c, &(0x7f0000000040)=ANY=[@ANYBLOB="00000f00000009003d140f3c369197d09647190890"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0xc038480a, &(0x7f00000000c0)={0x2, 0x0, 0xfffffffe})


syz_usb_connect(0x0, 0x2d, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000ec31f8104c1302007eec0102030109021b0001000000000904000001018b750009040016"], 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000000018105e04da070000000000010902"], 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000001140), 0x2, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, &(0x7f0000000040))


r0 = syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000100)=ANY=[@ANYBLOB="1201000000000520961b0a0000000000000109022d000100000000090400000503000000092100000001220500090581030000000000090502"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000d40)={0x2c, &(0x7f0000000780)=ANY=[@ANYBLOB='\x00\x00\a'], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x80084504, &(0x7f00000002c0)=""/145)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000040))
ioctl$EVIOCGRAB(r0, 0x40044590, 0x0)
write$hidraw(0xffffffffffffffff, 0x0, 0x0)
r1 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r1, 0x0, 0x0)
syz_usb_control_io$hid(r1, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "f896e404"}, @local=@item_012={0x1, 0x2, 0x0, 'e'}, @main=@item_012={0x2, 0x0, 0x0, "f792"}, @main=@item_4={0x3, 0x0, 0x0, "9ef12d19"}]}}, 0x0}, 0x0)
r2 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r2, 0x4004480f, &(0x7f00000000c0)={0x2})
r3 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
r4 = syz_open_dev$hidraw(0x0, 0x0, 0x14a042)
ioctl$HIDIOCGRAWINFO(r3, 0x80084803, &(0x7f0000000080)=""/105)
syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, 0x0)
write$hidraw(r4, &(0x7f0000003000), 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[], 0x0)


syz_usb_connect$printer(0x0, 0x0, 0x0, &(0x7f0000000440)={0x0, 0x0, 0x0, 0x0, 0x4, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


syz_usb_connect$printer(0x2, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x20, 0x6}}]}}, &(0x7f0000000140)={0x0, 0x0, 0x8, &(0x7f0000000080)={0x5, 0xf, 0x8, 0x1, [@generic={0x3}]}})
syz_usb_control_io$printer(0xffffffffffffffff, &(0x7f0000000780)={0x14, 0x0, &(0x7f00000006c0)={0x0, 0x3, 0x2, @string={0x2}}}, 0x0)
syz_open_dev$hiddev(0x0, 0x0, 0x0)
syz_usb_connect$printer(0x3, 0x0, 0x0, 0x0)
syz_usb_ep_read(0xffffffffffffffff, 0x6, 0x0, 0x0)
ioctl$EVIOCGSND(0xffffffffffffffff, 0x8040451a, &(0x7f0000001240)=""/217)
ioctl$HIDIOCGVERSION(0xffffffffffffffff, 0x80044801, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$hiddev(&(0x7f0000004b80), 0x0, 0x383002)
ioctl$EVIOCGKEYCODE_V2(0xffffffffffffffff, 0x80284504, 0x0)
ioctl$EVIOCGSND(0xffffffffffffffff, 0x8040451a, &(0x7f0000005e40)=""/60)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0xc534, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
r1 = syz_open_dev$hidraw(0x0, 0x243c2917, 0x14a042)
write$hidraw(r1, &(0x7f0000003000), 0x0)
read$hidraw(r1, 0x0, 0x0)
write$hidraw(r1, 0x0, 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503"], 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
ioctl$HIDIOCGUSAGE(0xffffffffffffffff, 0xc038480a, &(0x7f00000000c0)={0x2})
syz_open_dev$evdev(0x0, 0x0, 0x0)
r2 = syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r2, 0x80104592, &(0x7f0000000040)={0x0, 0x0, 0x0, 0xfffffffe, "207d3d00000000201b08700c1e0ac74f000000001200000000000900"})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000280)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x8, "a8f46877"}]}}, 0x0}, 0x0)


syz_usb_connect$printer(0x1, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000300)={0x0, 0x0, 0x5, &(0x7f0000000080)={0x5, 0xf, 0x5}, 0x1, [{0x0, 0x0}]})


r0 = syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f0000000000)=ANY=[@ANYBLOB="120100000900000801000080000000000101090244000101000000090400001202060000052406000005240000000d240f01000000000000000000090581030002ee0009090582021000000000090503020002"], 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f00000003c0)={0x14, 0x0, &(0x7f0000000380)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_ep_read(r0, 0x3, 0xcf, &(0x7f0000000080)=""/207)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000f80)={0x2c, &(0x7f0000000e80)=ANY=[], 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b04, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
r2 = syz_open_dev$evdev(&(0x7f00000004c0), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r2, 0x40284504, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000004bc0), 0xc636, 0x0)
ioctl$EVIOCGREP(r0, 0x80084503, &(0x7f0000004c00)=""/233)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000090024206d041cc340000000000109022400010000a00009040000010301010009210008000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, &(0x7f00000002c0)=ANY=[@ANYBLOB="00000c000000070001"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000180)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000280)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000001200)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x20, 0x0, 0x4, {0x1}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000140)={0xfffffffffffffd81, &(0x7f0000000700)={0x0, 0x0, 0x4, "56800000"}, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f00000000c0)={0x0, 0x9, &(0x7f0000000000)="bf963ff3e317ba275f"})


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x20, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000080)={0x0, 0x1, 0x0, 0x0, "0d69e73627848b6df85ca9295474259012147e1bf544270ed84f34cfc66288f7"})


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x12, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd, 0x24, 0xf, 0x1, 0x0, 0x21}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x20}}}}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)


syz_open_dev$evdev(&(0x7f0000000240), 0x0, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x72, &(0x7f0000000200)=ANY=[@ANYBLOB="1201000002000040257d15a4400001040001090260004201000000090400000102090000052406000105240000000d240f01000004eaffffff1e0006031a00000804800200090581", @ANYBLOB='s'], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r1, 0x40084504, &(0x7f0000000140)=""/148)


syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x80084504, &(0x7f0000000140)=""/148)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x45, 0x2}, @period={0x0, 0x0, 0x0, 0x0, 0x0, {}, 0x0, 0x0}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1e7d, 0x2d5a, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001"], 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010800000000401c1b341b0000000000010902"], 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x96, &(0x7f0000000700)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x84, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x9, 0x24, 0x6, 0x0, 0x1, "19b2c447"}, {0x5}, {0xd}, {0x6}, [@acm={0x4}, @country_functional={0x12, 0x24, 0x7, 0x0, 0x0, [0x0, 0x0, 0x0, 0x0, 0x0, 0x0]}, @country_functional={0xe, 0x24, 0x7, 0x0, 0x0, [0x0, 0x0, 0x0, 0x0]}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x0, 0x1}}}}}}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x903d01)
r1 = syz_open_dev$evdev(&(0x7f0000000800), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x5452, &(0x7f0000000100)={0x3, 0x0, 0x0})
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x4030582b, &(0x7f0000000000)={0xc, "3e6aaec34ef9eab95f1e4ba6"})


r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000640)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe0"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCGSW(r0, 0x40084503, &(0x7f00000010c0)=""/4096)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000050cb5340450c10108e492940a80909021b00090000000009040002010035040009058dff86"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x401c5820, &(0x7f00000001c0)={0xfef5})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001"], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x5ac, 0x24f, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x5, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000d40)={0x2c, &(0x7f0000000780)=ANY=[@ANYBLOB='\x00\x00\a'], 0x0, 0x0, 0x0, 0x0}, 0x0)
syz_open_dev$hidraw(&(0x7f0000000280), 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x69, 0x12, 0x83, 0x8, 0x20b7, 0x1540, 0x2a2, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff}}]}}]}}, 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)
r0 = syz_usb_connect$cdc_ecm(0x0, 0x56, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1e7d, 0x31ce, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0xb}}}}]}}]}}, 0x0)
syz_usb_connect_ath9k(0x3, 0x0, 0x0, 0x7fe782539b0b)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCREVOKE(0xffffffffffffffff, 0x40044591, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000200)={0x24, 0x0, 0x0, &(0x7f0000000240)={0x0, 0x22, 0xb, {[@main=@item_012={0x2, 0x0, 0x0, '\x00<'}, @main=@item_4={0x3, 0x0, 0xa, "225aa4c5"}, @main=@item_012={0x2, 0x0, 0xc, "a914"}]}}, 0x0}, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="120100007fa42810d112955a2721010203010902240001000000000904000002ff04730009050a3a2804000000090587020002"], 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_connect(0x0, 0x2d, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[@ANYBLOB="00000100000001"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b02, 0x0)
syz_usb_disconnect(r0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x76, 0xef, 0x4, 0x20, 0x6cd, 0x107, 0xef52, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xb8, 0xd3, 0xec}}]}}]}}, 0x0)


syz_open_dev$evdev(&(0x7f0000000240), 0x0, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x72, &(0x7f0000000200)=ANY=[@ANYBLOB="1201000002000040257d15a4400001040001090260004201000000090400000102090000052406000105240000000d240f01000004eaffffff1e0006031a00000804800200090581", @ANYBLOB='s'], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, &(0x7f0000000000)="ff", 0x1)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGKEY(r0, 0x40044582, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)={{0x12, 0x1, 0x310, 0x78, 0x56, 0xbb, 0x40, 0xbda, 0x8153, 0xe23d, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6}}]}}]}}, &(0x7f0000000900)={0x0, 0x0, 0x5, &(0x7f00000007c0)={0x5, 0xf, 0x5}})


syz_usb_connect(0x0, 0x3f, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x41015500, &(0x7f0000000400))


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x5a, 0x38, 0x31, 0x20, 0xbc3, 0x1, 0x58d6, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xbf, 0xeb, 0xd8}}]}}]}}, 0x0)


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x200}}}}}]}}]}}, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)
syz_usb_disconnect(r0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1e7d, 0x2ced, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000200)={0x24, 0x0, 0x0, &(0x7f0000000180)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "d32052f2"}]}}, 0x0}, 0x0)


syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(0xffffffffffffffff, &(0x7f0000000bc0)={0x14, 0x0, 0x0}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000040)={0x0, 0x0, 0x0, 0xfffffffe, "00207d2000000000201b14700c1e0ac74f000000001200000000000900"})
ioctl$EVIOCSFF(r0, 0x40304580, &(0x7f0000000080)={0x0, 0x2, 0x6, {0x4, 0x8}, {0xfd, 0x200}, @const={0xfb, {0x4, 0x8}}})
ioctl$EVIOCREVOKE(r0, 0x80004507, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f00000000c0)={0x0, 0x0, 0x0})


syz_usb_connect$uac1(0x4, 0x92, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x80, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x9, 0x24, 0x5, 0x0, 0x0, "03cf3322"}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7}, @format_type_ii_discrete={0x11, 0x24, 0x2, 0x2, 0x0, 0x0, 0x0, "ff19f02332733822"}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000003005740ed0b0011c3ec000000010902120001000000000904"], 0x0)


syz_usb_connect$printer(0x1, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000300)={0x0, 0x0, 0xf, &(0x7f00000000c0)={0x5, 0xf, 0xf, 0x1, [@ss_cap={0xa, 0x10, 0x3, 0x0, 0x0, 0x0, 0x1}]}, 0x1, [{0x0, 0x0}]})


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000180), 0x3, 0x0)
ioctl$EVIOCSKEYCODE_V2(r1, 0x80111500, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x0, "25ae97df0000800021c599885b000000000001000012f6ff5cdd8bc400"})
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000f80)={0x2c, &(0x7f0000000e80)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_open_dev$char_usb(0xc, 0xb4, 0x3)
syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x44, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x100}, {0xd}}, {[{{0x9, 0x5, 0x81, 0x3, 0x40}}], {{0x9, 0x5, 0x82, 0x2, 0x0, 0x0, 0x0, 0x9}}}}}]}}]}}, 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {}, {}, @cond=[{}, {0x0, 0x0, 0x4, 0x4}]})
r2 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r2, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x24, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x95, 0xbc, 0x6b, 0x20, 0x13b1, 0x42, 0x496b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xcf, 0x5, 0x5a}}]}}]}}, 0x0)


syz_usb_connect$uac1(0x0, 0x9e, &(0x7f00000000c0)=ANY=[@ANYBLOB="12010000000000406b1d010140000102030109028c0003010000000904000000010100000a24010000000201021124060000050000000000000000000000072408000000000b240700000000a5cfe3b00a240804", @ANYRESHEX=0x0], 0x0)


syz_open_dev$hidraw(0xffffffffffffffff, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000200), 0x0, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f00000014c0))
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000001c00))


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, 0xffffffffffffffff, 0x0)


syz_usb_connect$uac1(0x6, 0x71, &(0x7f0000000180)=ANY=[], 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82200000904000002ca744d00090503034d00ff99090805848f"], &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)=ANY=[@ANYBLOB="12010000a42887406d040e0a759400000001090212000100000000090400000003"], 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f00000012c0)={{0x12, 0x1, 0x0, 0x37, 0x61, 0xeb, 0x8, 0x5ac, 0x223, 0x74f4, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0xa4, 0x2}}]}}]}}, 0x0)


syz_usb_connect$cdc_ecm(0x2, 0x4d, 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
syz_open_dev$evdev(0x0, 0x0, 0x0)
write$hidraw(r0, 0x0, 0x0)
write$hidraw(r0, 0x0, 0x0)
syz_usb_ep_write(0xffffffffffffffff, 0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000e2793b10d10501200006010203010902120008000000000904"], 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000100)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000001540)={0x24, 0x0, 0x0, &(0x7f00000001c0)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, 'WNib'}, @local=@item_4={0x3, 0x2, 0x0, "f85edaca"}, @main=@item_4]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000540), 0x0, 0x0)
ioctl$HIDIOCGPHYS(r1, 0x8004480e, &(0x7f0000000000))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581d7"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000001540)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@main=@item_4={0x3, 0x0, 0x8, "a8407a73"}, @local=@item_4={0x3, 0x2, 0x0, "93bf0280"}, @main=@item_4={0x3, 0x0, 0x0, "7488dffc"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000540), 0x0, 0x0)
ioctl$HIDIOCGREPORT(r1, 0x400c4807, &(0x7f0000000900)={0x1, 0x200})


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0xfffbffffffffffff, 0x42)
ioctl$EVIOCSKEYCODE(r0, 0x40086602, &(0x7f0000000340))


syz_usb_connect(0x0, 0x24, &(0x7f0000000080)=ANY=[@ANYBLOB="1201000095bc6b20b31342006a490000080109020000cf"], 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x97, 0xff, 0x82, 0x8, 0x2058, 0x1005, 0xc19b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xbf, 0x57, 0x5a}}]}}]}}, 0x0)
syz_usb_connect(0x0, 0x36, 0x0, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x4b, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


syz_open_dev$hiddev(0x0, 0x0, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f00000004c0)=ANY=[@ANYBLOB="12010000000000407d1ed43000000000000109022400010000000009040000010300000009210000000122070009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB='\x00\"\a'], 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000007c0)={0x84, &(0x7f0000000380)={0x0, 0x0, 0x2, "80a1"}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000000)={0x0, 0x0, 0xd9, 0xc0, "00f8ffffffffffffff0000f62386f0dfdf293700"})


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xcd, 0x6d, 0xef, 0x20, 0xccd, 0x60, 0xd7, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x8a, 0xc3, 0xc7}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f00000000c0)={0x0, 0x0, 0x0})
syz_usb_disconnect(r0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x74, 0x2}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x1068)
ioctl$EVIOCGLED(r0, 0x80404519, &(0x7f0000000140)=""/118)


syz_usb_connect(0x0, 0x24, &(0x7f0000000400)=ANY=[@ANYBLOB="120100000a38a74030088000a0f0000000010902120001000000000904"], 0x0)


syz_usb_connect$uac1(0x1, 0x71, &(0x7f00000001c0)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000001ac0)={0x0, 0x0, 0xf, &(0x7f0000001840)=ANY=[@ANYBLOB="050f0f00010a1003"], 0x1, [{0x0, 0x0}]})


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000002d40)={0xe10baf739ff75b9b, 0x0, 0x0})


r0 = syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_usb_disconnect(r0)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
write$hidraw(r1, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
read$hidraw(r1, &(0x7f0000001080)=""/147, 0x93)
write$hidraw(r1, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f00000004c0)={{0x12, 0x1, 0x0, 0xf8, 0xed, 0x45, 0x40, 0x2040, 0x2900, 0xd452, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x83, 0xe4, 0x5d}}]}}]}}, 0x0)
syz_usb_connect$cdc_ecm(0x0, 0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xc9, 0xcf, 0xe8, 0x20, 0xb49, 0x64f, 0x31a2, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa5, 0xd2, 0x66}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x2, 0x0)
ioctl$EVIOCGREP(r0, 0x80084504, &(0x7f0000001540)=""/86)


syz_usb_connect$hid(0x0, 0x5, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x44e, 0x121e, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x3f, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000000018105e04da0700000000000109022400010000000009040000090300000009210000000122220009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="00222200000096231306e53f070c0000002a9000070d00be0083"], 0x0}, 0x0)
write$char_usb(0xffffffffffffffff, &(0x7f0000000040), 0x0)


r0 = syz_usb_connect(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82300000904000002ca744d00090503034d00ff99090805848f"], &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_disconnect(r0)
write$char_usb(r1, 0x0, 0x0)


r0 = syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000480)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x200}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}}}]}}, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, &(0x7f0000000340)={0x44, 0x0, 0x0, 0x0, &(0x7f0000000200)={0x20, 0x80, 0x1c, {0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}}, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_ep_write(r0, 0x82, 0x5, &(0x7f0000002340)='hello')


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xe2, 0x54, 0x46, 0x10, 0x23a7, 0xfedc, 0xe00b, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x3, 0x42}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000280), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000001a80)={0x0, 0x0, 0xffffffffffffffff})
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000440))
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x74, 0x2}})
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x1068)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
syz_open_dev$evdev(0x0, 0x0, 0x0)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x200)
read$hidraw(r0, &(0x7f0000001080)=""/147, 0x93)


syz_usb_connect(0x2, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="120100008010bd40820514009dbb0000000109022400011b00001009040000022a3e740009058bff7f0040101109050b362f"], 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0xa7, 0x4, 0x2b, 0x10, 0xd46, 0x81, 0x1982, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x89, 0x84, 0xaa, 0x0, [], [{{0x9, 0x5, 0xc, 0xf32e960e24d01a7f}}]}}]}}]}}, 0x0)


ioctl$EVIOCSMASK(0xffffffffffffffff, 0x40104593, 0x0)
r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe00000090582021a"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_connect$cdc_ncm(0x5, 0x94, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(0xffffffffffffffff, 0x0, &(0x7f0000000a40)={0x44, &(0x7f0000000840)={0x60, 0x30, 0x5, "652d90c2ee"}, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000980)={0x20, 0x83, 0x2}, &(0x7f00000009c0)={0x20, 0x87, 0x2, 0xfffd}, &(0x7f0000000a00)={0x20, 0x89, 0x2, 0x1}})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000100)="cd", 0x1)


syz_usb_connect$cdc_ecm(0x0, 0x0, 0x0, &(0x7f0000000500)={0x0, 0x0, 0x16, &(0x7f0000000280)={0x5, 0xf, 0x16, 0x2, [@ext_cap={0x7}, @ss_cap={0xa, 0x10, 0x3, 0x0, 0x14, 0x0, 0x1}]}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80111500, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x0, "25ae97df0000800021c599885b000000000001000012f6ff5cdd8bc400"})


syz_usb_connect(0x2, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82200000904000002ca744d00090503034d00ff99090805848f"], &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r0, &(0x7f0000000040)=""/51, 0x33)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff00010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085504, &(0x7f0000000400))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0xc08d, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000540)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x0, "96cf62ab"}]}}, 0x0}, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {}, {}, @cond=[{}, {0x0, 0x0, 0x4, 0x4}]})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x2, 0x400)
ioctl$EVIOCSCLOCKID(r0, 0x400445a0, &(0x7f0000000040)=0x8)
r1 = syz_open_dev$evdev(0xfffffffffffffffc, 0xdb, 0x4000)
ioctl$EVIOCGBITSW(r0, 0x80404525, &(0x7f0000000080)=""/70)
ioctl$EVIOCSREP(r0, 0x40084503, &(0x7f0000000100)=[0x1ff, 0xfffffff8])
r2 = syz_open_dev$evdev(&(0x7f0000000140), 0x800, 0x20800)
ioctl$EVIOCGVERSION(r2, 0x80044501, &(0x7f0000000180)=""/136)
ioctl$EVIOCGSW(r1, 0x8040451b, &(0x7f0000000240)=""/111)
ioctl$EVIOCSREP(r1, 0x40084503, &(0x7f00000002c0)=[0x3, 0x80000000])
r3 = syz_open_dev$evdev(&(0x7f0000000300), 0x9, 0x101082)
ioctl$EVIOCGABS3F(r3, 0x8018457f, &(0x7f0000000340)=""/87)
syz_open_dev$evdev(&(0x7f00000003c0), 0xffffffffffffffff, 0x600)
ioctl$EVIOCGVERSION(0xffffffffffffffff, 0x80044501, &(0x7f0000000400)=""/71)
r4 = syz_open_dev$evdev(&(0x7f0000000480), 0x4, 0x151100)
ioctl$EVIOCGMASK(r4, 0x80104592, &(0x7f0000000540)={0x10, 0x41, &(0x7f00000004c0)="5e33fa573a66a5417894432f75878ebc6e3fa3b2f51912c28ef0f30d9fd91638c2d4e6f750dc0de8c6d6cc92889717f5f5fa7c3e53120486b315f1420feabc4fe9"})
syz_open_dev$evdev(&(0x7f0000000580), 0x9, 0x2)
ioctl$EVIOCSCLOCKID(r3, 0x400445a0, &(0x7f00000005c0)=0x400)
r5 = syz_open_dev$evdev(&(0x7f0000000600), 0x6828, 0x402)
ioctl$EVIOCGKEYCODE_V2(r5, 0x80284504, &(0x7f0000000640)=""/146)
ioctl$EVIOCGABS3F(r3, 0x8018457f, &(0x7f0000000700)=""/77)
r6 = syz_open_dev$evdev(&(0x7f0000000780), 0x6, 0x20002)
ioctl$EVIOCGVERSION(r6, 0x80044501, &(0x7f00000007c0)=""/86)
ioctl$EVIOCSKEYCODE_V2(r3, 0x40284504, &(0x7f0000000840)={0x80, 0xe, 0x8001, 0x4, "2f679ec69d4362225b383d09b578bb6717a77e195afe0917695a33d40f2abd92"})
r7 = syz_open_dev$evdev(&(0x7f0000000880), 0x9, 0x290400)
ioctl$EVIOCSKEYCODE_V2(r7, 0x40284504, &(0x7f00000008c0)={0x0, 0x12, 0x9, 0x0, "cd45d114cff6b5f297a92ff4ff0767baac2f080ec97423c7c76a6e37bf73ea11"})
ioctl$EVIOCGREP(r3, 0x80084503, &(0x7f0000000900)=""/132)
ioctl$EVIOCGPHYS(r2, 0x80404507, &(0x7f0000000a40)=""/139)
ioctl$EVIOCSKEYCODE(r0, 0x40084504, &(0x7f0000000b00)=[0x1, 0x3])


r0 = syz_usb_connect$hid(0x0, 0x7c, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1e7d, 0x2cf6, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x25, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000d40)={0x2c, &(0x7f0000000780)=ANY=[@ANYBLOB='\x00\x00\a'], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001400), 0x0, 0x0)
ioctl$EVIOCRMFF(r0, 0x40044581, 0x0)


r0 = syz_usb_connect_ath9k(0x3, 0x5a, &(0x7f0000001640)={{0x12, 0x1, 0x200, 0xff, 0xff, 0xff, 0x40, 0xcf3, 0x9271, 0x108, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x48}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_disconnect(r0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0x2c, &(0x7f0000000040)=ANY=[@ANYBLOB="00000f00000009003d140f3c369197d09647190890"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000300), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0x40184810, &(0x7f00000000c0)={0x2})


syz_usb_connect(0x0, 0x2d, &(0x7f0000001100)={{0x12, 0x1, 0x0, 0x39, 0xaa, 0xdd, 0x20, 0x4cb, 0x123, 0x88b9, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xa2, 0x43, 0x4f, 0x0, [], [{{0x9, 0x5, 0x5}}]}}]}}]}}, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[@ANYBLOB="00000100000001"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b04, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000000080)={0x0, 0x0, 0x0, {}, {}, @ramp={0x0, 0x0, {0x0, 0x0, 0x4, 0x8000}}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x52, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x8d, 0x4a, 0x30, 0x20, 0x6cd, 0x110, 0x7171, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x22, 0x63, 0x47}}]}}]}}, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x6, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5ac, 0x27f, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
syz_usb_disconnect(r0)


r0 = syz_usb_connect(0x0, 0x10b, &(0x7f0000000000)=ANY=[@ANYBLOB="05010900b24b6a10e6040300770100000001090224000b010000000904000302ccd4280009050b02000000040009058a02"], 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000240), 0x0, 0x0)
ioctl$EVIOCGLED(r0, 0x80284511, 0x0)


syz_open_dev$evdev(&(0x7f0000000000), 0x3, 0x4000)


r0 = syz_open_dev$evdev(&(0x7f00000001c0), 0x0, 0x0)
ioctl$EVIOCGEFFECTS(r0, 0xc0189436, 0xfffffffffffffffc)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000040))
ioctl$EVIOCGRAB(r0, 0x40044590, 0x0)


r0 = syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, 0x0)
syz_usb_connect(0x6, 0x2d, &(0x7f0000000040)=ANY=[], 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
r2 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
write$hidraw(r2, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
write$hidraw(r1, 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCSMASK(0xffffffffffffffff, 0x40104593, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x5501, &(0x7f0000000400))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000440)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES16], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@global=@item_4={0x3, 0x1, 0x0, "9b4d3948"}, @main=@item_012={0x1, 0x0, 0x0, "9f"}, @local=@item_4={0x3, 0x2, 0x0, "6d011fe4"}, @main=@item_012={0x2, 0x0, 0x0, "1a79"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000d40), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0x81044804, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x5501, 0x0)


ioctl$EVIOCGKEYCODE_V2(0xffffffffffffffff, 0x801c581f, 0x0)
syz_usb_connect(0x0, 0x3f, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000d0918108ac051582588f0000000109022d00010000000009040000030b08000009058d67c8002a000009050502000000000009058b6e"], 0x0)


syz_usb_connect(0x0, 0x84b, &(0x7f0000001240)=ANY=[@ANYBLOB="120100004d69c608c016df05acec0000000109023908030000000009041c000203"], 0x0)


syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000000080), 0x0, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000090000082502000000000000000109025c00020100f92a0904000001020900000524060001053408fa6e0d240f0100000000000d000a0006471a010000190581"], 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
ioctl$HIDIOCGRDESC(r0, 0x40305839, &(0x7f0000000280)={0xd, "50ef33490271c1de6f2522669d"})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000000180)={0x0, 0x0, 0x0, {0x0, 0x11}, {0x6}, @ramp})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000050cb5340450c10108e492940a80909021b00090000000009040002010035040009058dff86"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r0, 0x0, 0x0)


syz_open_dev$evdev(0x0, 0x0, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)
ioctl$EVIOCGKEYCODE(0xffffffffffffffff, 0x4020940d, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r1, 0x80084504, &(0x7f0000000140)=""/148)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000000000404c05d50310000200000109022400010000000009041200010300000009210000000122010009058103"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000b00)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x0, "dda79475"}]}}, 0x0}, 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES64], 0x0)
r1 = syz_open_dev$evdev(&(0x7f00000002c0), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0xffffffffffffff9a, &(0x7f0000000040)=ANY=[@ANYRES16=0x0], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGKEY(r0, 0x40086602, 0xffffffffffffffff)


r0 = syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201fb0019030320d812010079de01ec020109021b0001000003000904000001785ecc00090585020004"], 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, &(0x7f0000000080)=""/239, 0xef)
syz_usb_disconnect(r0)


write$hidraw(0xffffffffffffffff, &(0x7f0000003000), 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x56, 0x0, 0x0, {0x0, 0x1}, {0x58, 0x1}})
r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x2, 0x822b01)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f00000000c0)={0x1, 0x0, 0x0})
syz_open_dev$evdev(&(0x7f0000000080), 0xffffffffffffff7f, 0x4f0500)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x12d8)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x5543, 0x4, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000180)={0x24, 0x0, 0x0, &(0x7f0000000080)=ANY=[@ANYBLOB='\x00\"\n'], 0x0}, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000500)={{0x12, 0x1, 0x0, 0x89, 0xcc, 0xd, 0x10, 0x5c6, 0x9201, 0xbd0b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xbf, 0xcf, 0x62, 0x0, [], [{{0x9, 0x5, 0x0, 0x1, 0x3ff}}]}}]}}]}}, 0x0)


r0 = syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000480)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
syz_usb_disconnect(r0)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x0)
ioctl$EVIOCSABS3F(r0, 0x401845ff, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000440)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES16], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@global=@item_4={0x3, 0x1, 0x0, "9b4d3948"}, @main=@item_012={0x1, 0x0, 0x0, "9f"}, @local=@item_4={0x3, 0x2, 0x0, "6d011fe4"}, @main=@item_012={0x2, 0x0, 0xb, "1a79"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000d40), 0x0, 0x0)
ioctl$HIDIOCGREPORT(r1, 0x400c4807, &(0x7f00000004c0)={0x3})


syz_open_dev$evdev(&(0x7f0000000240), 0xffffffffffffffff, 0x40)


r0 = syz_open_dev$evdev(&(0x7f0000001240), 0x0, 0x0)
ioctl$EVIOCGSND(r0, 0x40086602, &(0x7f0000000240)=""/4096)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x2, 0x0)
ioctl$EVIOCSKEYCODE(r0, 0x40084504, &(0x7f0000000040)=[0x1000])


syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x55, 0x53, 0xdc, 0x20, 0x12d1, 0x9cab, 0x2e6b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x3, 0xff, 0x5, 0x1a, 0x0, [], [{{0x9, 0x5, 0xe, 0xa}}, {}, {{0x9, 0x5, 0xf, 0x2}}]}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "f896e404"}, @local=@item_012={0x1, 0x2, 0x0, 'e'}, @main=@item_012={0x2, 0x0, 0x0, "f792"}, @main=@item_4={0x3, 0x0, 0x0, "9ef12d19"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0x4004480f, &(0x7f00000000c0)={0x2})


syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0xc5, 0x2d, 0x22, 0x40, 0x4cb, 0x10b, 0x3de0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x6f, 0x3d, 0x4d}}]}}]}}, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x6a, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000000180)={0x1, 0x68, &(0x7f0000000100)="f13b8716bcfdcca0c8af3538908cd9249f533b44e0242eab855e96f55b9ffada39f62d8b23dd050e4bd47c6e12e3bd98840d9ae70008ed417a94f6c6b2fccb3a3bcb625653302c9301e4fe75a70cdee4c457f97a50895e6577df2340fb458bd55be572ac782da8b5"})
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {}, {}, @cond=[{}, {0x0, 0x0, 0x4, 0x4}]})
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x3a, 0x2}, @ramp})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)
ioctl$EVIOCGKEY(r0, 0x80404518, &(0x7f0000000140)=""/183)
read$hidraw(0xffffffffffffffff, 0x0, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x7f, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x3b, &(0x7f0000001340)={{0x12, 0x1, 0x310, 0x94, 0xc5, 0xdd, 0x8, 0x4e6, 0x101, 0x200, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x29, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc6, 0xa5, 0xc7, 0x0, [@cdc_ecm={{0x5}, {0x5}, {0xd}}]}}]}}]}}, &(0x7f0000001c80)={0x0, 0x0, 0x25, &(0x7f0000001840)={0x5, 0xf, 0x25, 0x2, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "edeaec2afcb291ff2ff5a421bc510d6b"}, @ssp_cap={0xc}]}})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x3a, 0x2}, @ramp})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCRMFF(r0, 0x541b, 0x0)


syz_usb_connect(0x0, 0x3f, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000d0918108ac051582588f0000000109022d00010000000009040000030b08000009058d67c8002a000009050502000000000009058b6e"], 0x0)


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000000c0)=ANY=[@ANYBLOB="12010000020000402505a1a440000000010109023b000101000000090400001202060000052406000005240000000d240f01000000000000000000090582020002000000090503020002"], 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
read$hidraw(0xffffffffffffffff, &(0x7f00000001c0)=""/22, 0x16)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)


syz_usb_connect$hid(0x0, 0x0, 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
r0 = syz_open_dev$evdev(0x0, 0x0, 0x822b01)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x1f, 0x1d, 0x64, 0x20, 0x13e5, 0x1, 0xa1f5, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x8, 0xc8, 0x67}}]}}]}}, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0x5, &(0x7f00000000c0)="952bb3e006"})
write$char_usb(r0, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)
syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, &(0x7f0000000040)={0x1c, &(0x7f0000000180)={0x0, 0x0, 0x100, "a856070d0b55c32d63afd0c7ba751e90f32cd84345a3ec79792aea9b2397a461a1570c12fd626a56e3541f7809c95fa5375ef1e4bc3cf3a4b1f646ba68bfa3cc37fe2bd11b13235167c4f110465d34ff180c18f0bc1a76408f0af4ea54d1152f320b024cbf403c66df03e942785376adb43a288aa3030e0000217629a3a15b2446474363b75b044bc785abc29719442657b52793ec7ba4b8b9c7f2abb8d2ad5f5c29181a860e8153d140467a98845c60335b6bac054f25106340ba7f2d8383370d59504f4976e81a50ed83a526d56cae8e09492462008d05444aee227e0185bda58971fcebe31001e197d0589f7d60a268bf811fb254ca226c85a48cfc17a0f2"}, 0x0, &(0x7f0000000000)={0x0, 0x8, 0x1}})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)=ANY=[@ANYBLOB="1201000000000040ac054382408b0b00000109022400010000"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000080)={0x0, 0x22, 0x1, {[@main]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCSREPORT(r1, 0x80044801, &(0x7f0000000040))


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x2e, 0xab, 0x5a, 0x40, 0x1c40, 0x534, 0x6dcc, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x76, 0xdf, 0xdb}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x541b, &(0x7f0000000040))


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0xc5, 0x2d, 0x22, 0x40, 0x4cb, 0x10b, 0x3de0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x6f, 0x3d, 0x4d}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550d, &(0x7f0000000400))


r0 = syz_usb_connect(0x0, 0x36, &(0x7f00000003c0)=ANY=[@ANYBLOB="31010000dccd5e08cb060700000000952301090224000100007e000904340102d469e70009058a", @ANYRES8], 0x0)
syz_usb_control_io$uac1(r0, 0x0, 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000040), 0x75c, 0x0)


r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe00000090582021a"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_connect$cdc_ncm(0x5, 0x94, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(0xffffffffffffffff, 0x0, 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000180)=ANY=[@ANYBLOB="12010000090003206d0414c340000000000109022400010000a000090400000103010100092100080001220100090581", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000900)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000500)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f0000000b40)={0xc3, 0x0, 0x0, 0x0, &(0x7f00000003c0)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)


syz_open_dev$evdev(&(0x7f0000000a40), 0xffffffffffffffff, 0x200002)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0xc0189436, &(0x7f0000002a40)={0x0, 0x0, 0x0, 0x0, "c88919b0137c9e45db6810f36926832bc7464cde460ba989075e2f211c806179"})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff000109022400010400a000090400000103010100093700086ce82201000905815f"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000900)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000500)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f00000006c0)={0x84, 0x0, 0x0, 0x0, &(0x7f00000005c0)={0x20, 0x0, 0x4, {0x5}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f0000000e80)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)={0x20, 0x0, 0x4, {0xc0}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000b80)={0x2c, &(0x7f0000000600), 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGLED(r0, 0x40049409, &(0x7f0000000240)=""/77)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f0000000040), 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x40045b17, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b24, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000780), 0x0, 0x0)
ioctl$EVIOCGEFFECTS(r0, 0x80044584, &(0x7f0000000000)=""/125)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550f, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xcc, 0x1e, 0xf4, 0x20, 0xb89, 0x7, 0xef64, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xfd, 0x3f, 0x9}}]}}]}}, 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000014c0), 0x0, 0x0)
ioctl$EVIOCGEFFECTS(r0, 0x80044584, &(0x7f0000002500))


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258164787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea", 0xae6)


syz_usb_connect$cdc_ecm(0x2, 0x4d, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x8}}}}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff000109022400010400a000090400000103010100093700086ce82201000905815f"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000900)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000500)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f00000006c0)={0x84, 0x0, 0x0, 0x0, &(0x7f00000005c0)={0x20, 0x0, 0x4, {0x5}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f0000000e80)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="20000800000095cdaf"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xf5, 0x6e, 0xb7, 0x20, 0x2c7c, 0x620, 0xa7b9, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x20, 0x41, 0x13, 0x0, 0x257a, 0x360f, 0x31, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x32, 0x8a, 0xff}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x83c0550b, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x200)
syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
read$hidraw(0xffffffffffffffff, 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, 0x0)
write$hidraw(r0, &(0x7f00000000c0)="5c64d71bfffe92eba3c5913656422fdc52d035e376d9420d6a151d592724a97e72f43b4f658c2d62ae0970022bad9b56", 0x30)


syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRAWINFO(r0, 0x401c5820, &(0x7f0000000080)=""/105)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x46d, 0xc101, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000040)={0x2c, &(0x7f0000000080)={0x0, 0x0, 0x8, {0x8, 0x0, "47f886f4d105"}}, 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085504, &(0x7f0000000400))


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000000000406d04ee200000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085508, &(0x7f0000000400)=0x18)


syz_usb_connect(0x0, 0x3f, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000009000008010000800000000001010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x40095505, 0x0)


syz_usb_control_io(0xffffffffffffffff, &(0x7f0000000800)={0x2c, &(0x7f0000000680)={0x0, 0x11, 0x51, {0x51, 0x22, "bdc349bd322483b3c1b5b26e659817142dfb828a0d47d39a3e860edebf633acdaa298cb7f8fe9356bfe9c1549ebe9415a6eb1ccf31ac1bdaaae887b90e9391ed549a665bdf1b9e11a77379c01165d9"}}, &(0x7f0000000700)={0x0, 0x3, 0x16, @string={0x16, 0x3, "6428ec6e82abd4bd3fd813962576e2efd13310a3"}}, &(0x7f0000000740)={0x0, 0xf, 0x34, {0x5, 0xf, 0x34, 0x3, [@ss_container_id={0x14, 0x10, 0x4, 0x3, "ff7ccc0c68467d3c3010e8e4d7f2c69d"}, @ext_cap={0x7, 0x10, 0x2, 0x18, 0x0, 0xc}, @ss_container_id={0x14, 0x10, 0x4, 0x5, "5a95ebcaf66d49f9b11efba11131e746"}]}}, &(0x7f0000000780)={0x20, 0x29, 0xf, {0xf, 0x29, 0x3, 0x80, 0xfd, 0x1, "8039cc95", "68c86530"}}, &(0x7f00000007c0)={0x20, 0x2a, 0xc, {0xc, 0x2a, 0xfe, 0x2, 0x2, 0xe, 0x5, 0x0, 0x8e6}}}, &(0x7f0000000cc0)={0x84, &(0x7f0000000840)={0x20, 0xe, 0x86, "e175068a6ff2b54b47da8527331dcb68cf3848c7c58dc249ade25dc4ef4e63dcc529d693a5f9cf331afef3d517cda1f4a991d2af04c1c2cd5aca41bfaab2dfb10f16884d0061abf949b2431a9fa8c2760ca92e47624b35d936716e38400aba8b49e0bb38f7ca377f58e7eee514c3c697eec1aece0e69872ac91623fb81d8b3f7e186c491059f"}, &(0x7f0000000900)={0x0, 0xa, 0x1, 0x4}, &(0x7f0000000940)={0x0, 0x8, 0x1, 0xa2}, &(0x7f0000000980)={0x20, 0x0, 0x4, {0x1}}, &(0x7f00000009c0)={0x20, 0x0, 0x4, {0xa854b9330836f5f4, 0x2}}, &(0x7f0000000a00)={0x40, 0x7, 0x2, 0x7}, &(0x7f0000000a40)={0x40, 0x9, 0x1, 0xff}, &(0x7f0000000a80)={0x40, 0xb, 0x2, '>L'}, &(0x7f0000000ac0)={0x40, 0xf, 0x2, 0xfffb}, &(0x7f0000000b00)={0x40, 0x13, 0x6, @multicast}, &(0x7f0000000b40)={0x40, 0x17, 0x6, @link_local={0x1, 0x80, 0xc2, 0x0, 0x0, 0x3}}, &(0x7f0000000b80)={0x40, 0x19, 0x2, "cd4a"}, &(0x7f0000000bc0)={0x40, 0x1a, 0x2, 0x8001}, &(0x7f0000000c00)={0x40, 0x1c, 0x1, 0x40}, &(0x7f0000000c40)={0x40, 0x1e, 0x1, 0x28}, &(0x7f0000000c80)={0x40, 0x21, 0x1, 0x55}})
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
ioctl$EVIOCGKEYCODE_V2(0xffffffffffffffff, 0x80284504, &(0x7f0000000040))
syz_usb_control_io$hid(0xffffffffffffffff, &(0x7f0000001440)={0x24, 0x0, 0x0, &(0x7f00000013c0)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "741cb976"}]}}, 0x0}, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000000040), 0x0, 0x14a442)
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
syz_usb_connect(0x0, 0x36, &(0x7f0000000ac0)={{0x12, 0x1, 0x0, 0xc2, 0xab, 0xc8, 0x40, 0xf11, 0x2060, 0xb70b, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x3, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x5d, 0x72, 0x86}}, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x8e, 0x34, 0x52}}, {{0x9, 0x4, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff}}]}}]}}, 0x0)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x2250)
ioctl$EVIOCGMASK(r1, 0x5452, &(0x7f0000000100)={0x0, 0x41, 0x0})
syz_open_dev$evdev(&(0x7f00000000c0), 0x4, 0x1992e0)
write$hidraw(r0, &(0x7f0000000080)="00000000000000f72808f2bccac12a7c15be02e5332d", 0x16)


syz_usb_connect(0x1, 0x2d, &(0x7f0000000140)=ANY=[@ANYBLOB="1201000009a65d0860040800dec30102030109021b050000000000090400000178eaf50009058402"], &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
syz_open_dev$evdev(&(0x7f00000014c0), 0xbfd0, 0x0)


r0 = syz_usb_connect$cdc_ecm(0x2, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000000bc0)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29267949e59ee7623fd3e9583ced617be0fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f89e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66d7d0e47a6d51d44e2d8a8a5a98d942bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec524340000c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9017efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af9", 0xfffffffffffffea8)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)


syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe1900fc1109af"], 0x0)
ioctl$EVIOCSABS0(0xffffffffffffffff, 0xc0045878, 0x0)


r0 = syz_usb_connect(0x1, 0x2d, &(0x7f0000000000)=ANY=[@ANYBLOB="12012000f1048108cd060202d4920000000109021b1901000000d40904150001da40df0009058202"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$printer(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xcc, 0x1e, 0xf4, 0x20, 0xb89, 0x7, 0xef64, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xfd, 0x3f, 0x9}}]}}]}}, 0x0)


syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000001040)={{0x12, 0x1, 0x0, 0x5e, 0xa5, 0xd1, 0x20, 0x545, 0x8002, 0x30a, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xd0, 0xdb, 0x94}}]}}]}}, 0x0)
ioctl$EVIOCGMASK(0xffffffffffffffff, 0x80104592, &(0x7f0000000540)={0x0, 0x0, 0x0})
syz_open_dev$evdev(&(0x7f0000001280), 0x0, 0x0)
ioctl$EVIOCGKEYCODE_V2(0xffffffffffffffff, 0x80284504, 0x0)
syz_open_dev$evdev(&(0x7f0000000780), 0x4, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f00000005c0)={0x5, 0x1, &(0x7f0000000500)="14"})


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x10000000000000, 0x173102)
ioctl$EVIOCREVOKE(r0, 0x40044591, &(0x7f0000000040)=0x5)
ioctl$EVIOCSCLOCKID(r0, 0x400445a0, &(0x7f0000000080)=0xffffffff)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000180)={0x17, 0xb4, &(0x7f00000000c0)="7a3d65150e5911867fbf272a2366cea750515be4ecd011fe2b52e05548c6e31fe50c37f03ad66b0e40a4113b2aafebc9b2cff9907583b4d55e3ed4524069347ca6243423ac15cf98374971c50e82b28ac04e2b1d43f7720dad19608fd204ec2661a0f0d206b7d6764a43cc4eb7dbd8571eff3a07d0dd9874f938faa415969f60b90ab97acace95e1fcc9e0906bfb63bf2ebbb7cb51dfac0e81ce8f9ce4207d715e1eb2d576d8666933d81f63a127857ada595108"})
ioctl$EVIOCREVOKE(r0, 0x40044591, &(0x7f00000001c0)=0x1)
ioctl$EVIOCGVERSION(r0, 0x80044501, &(0x7f0000000200)=""/187)
r1 = syz_open_dev$evdev(&(0x7f00000002c0), 0xffff, 0x20002)
ioctl$EVIOCGPHYS(0xffffffffffffffff, 0x80404507, &(0x7f0000000300)=""/37)
ioctl$EVIOCSFF(r0, 0x40304580, &(0x7f0000000340)={0x51, 0x8000, 0x1fca, {0x9, 0x2cdf}, {0x0, 0x9}, @const={0xcc, {0x7f, 0x2, 0x1}}})
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000440)={0x10, 0x98, &(0x7f0000000380)="f3a6d375533765e3a5b9425bf3b2b5c81cd8ff29b4e11fc71ff64c5e4cfb578734c8cb53d73595d7c30e6c469c8999dc2a22c02d3e67f8a12cbb08778cca0f5bde116b23fcb3c8a2cba5c99abd78e22d2a8884bd66efa51767c075fc12c3042f85e21f9ae8e41bc150edb6d53048bb8b7cda913fd1bf7fa19b798e5fec74616122774b18ad844b4b85dc85aec79b579afedba5ecd8bbadee"})
r2 = syz_open_dev$evdev(&(0x7f0000000480), 0x96d2, 0x200000)
ioctl$EVIOCGABS3F(r2, 0x8018457f, &(0x7f00000004c0)=""/4096)
syz_open_dev$evdev(&(0x7f00000014c0), 0xbfd0, 0x0)
ioctl$EVIOCSCLOCKID(r0, 0x400445a0, &(0x7f0000001500))
r3 = syz_open_dev$evdev(&(0x7f0000001540), 0x4, 0x2)
ioctl$EVIOCRMFF(r3, 0x40044581, &(0x7f0000001580)=0x4)
ioctl$EVIOCGEFFECTS(r0, 0x80044584, &(0x7f0000002ac0)=""/83)
r4 = syz_open_dev$evdev(&(0x7f0000001640), 0x8000, 0x1)
ioctl$EVIOCSKEYCODE(r0, 0x40084504, &(0x7f0000001680)=[0x0, 0xffffff3a])
ioctl$EVIOCGEFFECTS(r4, 0x80044584, &(0x7f00000016c0)=""/208)
ioctl$EVIOCGSW(r4, 0x8040451b, &(0x7f00000017c0)=""/4096)
ioctl$EVIOCGABS3F(r2, 0x8018457f, &(0x7f00000027c0)=""/253)
syz_open_dev$evdev(&(0x7f0000002a80), 0x2, 0x4e2080)
syz_open_dev$evdev(&(0x7f0000002a40), 0x3, 0x1856c1)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
ioctl$EVIOCGABS20(r0, 0x40044591, 0x0)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x1068)


r0 = syz_usb_connect$hid(0x2, 0x36, &(0x7f0000000180)=ANY=[@ANYBLOB="12010000090003206d0414c340000000000109022400010000a0000904000001030101000921000800012201000905", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000a00)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f00000002c0)={0x2c, &(0x7f0000000300)=ANY=[], 0x0, 0x0, 0x0, 0x0})


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201fb0019030320d812010079de01ec020109021b0001000003000904000001785ecc00090585020004"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x200000)
syz_usb_disconnect(r0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$uac1(0x6, 0x71, &(0x7f0000000180)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x550c, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x5c, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x4a, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x16, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, [@country_functional={0x6}]}, {[{{0x9, 0x5, 0x81, 0x3, 0x10}}], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}]}}]}}, 0x0)


syz_usb_connect$cdc_ecm(0x1, 0x4d, &(0x7f0000001840)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x400}}}}}]}}]}}, &(0x7f0000001d00)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000040)=ANY=[@ANYBLOB="11010000733336088dee1adb23610000000109022d0001100000000904000003fe03010009cd8d1f00020000000905050200067e001009058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000680)={0x84, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000280)="d7", 0x1)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x45e, 0xf9, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0xb}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000080)={0x24, 0x0, 0x0, &(0x7f0000000880)={0x0, 0x22, 0xb, {[@global=@item_4={0x3, 0x1, 0x0, "5b29b10b"}, @global=@item_4={0x3, 0x1, 0x0, "831a75be"}, @global]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000b00)={0x2c, &(0x7f0000000040)=ANY=[@ANYBLOB="00000f00000009003d140f3c369197d09647190890"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0xc018480d, &(0x7f00000000c0)={0x2, 0x0, 0x0, 0x8000})


r0 = syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000050cb5340450c10108e492940a80909021b00090000000009040002010035040009058dff86"], 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_disconnect(r0)
read$char_usb(r1, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x20, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, "0d69e73627848b6df85ca9295474259012147e1bf544270ed84f34cfc66288f7"})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000000000402609333340000000000109022400010000000009040000010301000009210000000122010009058103"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000004300)={0x18, &(0x7f0000004240)={0x0, 0x0, 0x2, {0x2}}, 0x0, 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_disconnect(r0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0xc1, 0x3a, 0x89, 0x8, 0x4b4, 0x2, 0x620d, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xf1, 0x8a, 0x27}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x100000002, 0x120b40)
ioctl$EVIOCSKEYCODE(r0, 0x40086602, &(0x7f0000000340)=[0x7fffffff])


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff000109022400010400a000090400000103010100093700086ce82201000905815f"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, &(0x7f0000000900)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000500)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000400)={0x2c, &(0x7f0000000100)=ANY=[], 0x0, 0x0, 0x0, 0x0})


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000079381010c2154000fb0707820b0109021b0009000000080904000001e104550009058b37ad"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x4020940d, &(0x7f0000000040)={0x4, 0x0, 0x0})


r0 = syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x44, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[{{0x9, 0x5, 0x81, 0x3, 0x40}}]}}}]}}]}}, 0x0)
syz_usb_ep_read(r0, 0x81, 0x0, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000000)=ANY=[@ANYBLOB="120100007566cb20f80600b0c8cb0102030109021b0001000000000904000001ffb9ab0009050a"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000040))
ioctl$EVIOCGRAB(r0, 0x40044590, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010007000000407d1eb42d0000000097"], 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x40305828, &(0x7f0000000040))


syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000000018105e04da0700000000000109022400010000000009040000090300000009210000000122220009058103"], 0x0)
ioctl$EVIOCGABS3F(0xffffffffffffffff, 0x8018457f, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="00222200000096231306e53f070c0000002a9000070d00be0083"], 0x0}, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x2e, 0xab, 0x5a, 0x40, 0x1c40, 0x534, 0x6dcc, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x76, 0xdf, 0xdb}}]}}]}}, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x4, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x862b01)
write$char_usb(r0, 0x0, 0x51)


syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x20a00)
syz_usb_connect(0x0, 0x3b, &(0x7f0000001340)={{0x12, 0x1, 0x310, 0x94, 0xc5, 0xdd, 0x8, 0x4e6, 0x101, 0x200, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x29, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc6, 0xa5, 0xc7, 0x0, [@cdc_ecm={{0x5}, {0x5}, {0xd}}]}}]}}]}}, &(0x7f00000002c0)={0x0, 0x0, 0x25, &(0x7f0000001840)={0x5, 0xf, 0x25, 0x2, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "00eaec2a180a000000f5a421bc510d6b"}, @ssp_cap={0xc}]}, 0x155555c3, [{0xb9, &(0x7f0000000080)}, {0x4, &(0x7f0000000140)=@lang_id={0x4}}, {0x32, &(0x7f0000000340)=@string={0x32, 0x3, "8dddbdf3b127c18b16f82d9535747a9e39d23ecd91f1c05b2aea3f460774b3bd685e00"/48}}, {0xa9, &(0x7f00000001c0)=@string={0xa9, 0x3, "fa26a57e6c387aae3c7fd2c5116037cfbfcb9ac4d0677ef17a83e3d6b7e57f702df2c7481d5b7d93967ae89d879812115376bd82cae20a884206047fd9f0c8ba659ef80ba9c7c90e96d400cac669fc85a0cc4fb5d321ecde0b9c9f7ec32a5df5eadf7168dd4737a928ba168ab135598fc1dd47d9d7901173fe00070f702bf5728456180fc79404592b9aea4e3d764f53f21edcff386635e554b71174116bf49b50117251117212"}}, {0x4, &(0x7f0000000280)=@lang_id={0x4}}]})


syz_usb_connect(0x6, 0x36, &(0x7f0000000680)=ANY=[@ANYRESOCT], &(0x7f00000007c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000050cb5340450c10108e492940a80909021b00090000000009040002010035040009058dff86"], 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x40305828, &(0x7f0000000100)={0xffffffffffffffaf, "374c244c61025cd3990db910b31f163f4e86c476d46f8a79"})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f000905", @ANYRES64], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGCOLLECTIONINFO(r1, 0xc0104811, &(0x7f0000000040)={0x0, 0x7, 0xa25b, 0x4})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1a34, 0x802, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000200)={0x24, 0x0, 0x0, &(0x7f0000000180)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "d32052f2"}]}}, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001140), 0x2, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, &(0x7f0000000040))


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
syz_usb_connect$hid(0x0, 0x36, 0x0, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
r2 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r2, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
ioctl$HIDIOCGRDESC(r1, 0x4030582a, &(0x7f0000000040)={0xd, "3a82000000010000000000009d"})
ioctl$HIDIOCGRDESC(r0, 0x40305829, &(0x7f00000002c0)={0xd, "3a82000000130000000000009d"})
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, &(0x7f00000001c0)={0x24, 0x0, 0x0, 0x0, 0x0}, 0x0)
ioctl$EVIOCGRAB(0xffffffffffffffff, 0x40044590, 0x0)
syz_open_dev$hidraw(0x0, 0x4000000401, 0x0)


r0 = syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201fb0019030320d812010079de01ec020109021b0001000003000904000001785ecc00090585020004"], 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, &(0x7f0000000080)=""/239, 0xef)
syz_usb_disconnect(r0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCGREP(r0, 0x40044591, 0x0)
ioctl$HIDIOCSUSAGES(0xffffffffffffffff, 0x501c4814, &(0x7f0000001100)={{}, 0x0, [0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10000]})
r1 = syz_open_dev$evdev(&(0x7f0000000100), 0x0, 0x862b01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000e2793b10d10501200006010203010902120008000000000904"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000640)={0x2c, &(0x7f0000000500)=ANY=[@ANYBLOB="000002"], 0x0, 0x0, 0x0, 0x0})
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$uac1(0x0, 0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f00000001c0)=ANY=[@ANYBLOB="1201000074020440fd07010099090000000109021b0001000000000904000001c5b3e3000905", @ANYRES64], 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000280), 0x0, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000440))
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, 0x0)
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x1068)


syz_usb_connect$uac1(0x0, 0x9e, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000000000086b1d010140000102030109028c0003010000000904000000010100000a24010000000201020624050000220904010000010200000904010101010200000924020100000000360924020105000000560905010900000000000725010000000009040200000102"], 0x0)
syz_usb_connect$printer(0x0, 0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xb, 0x2e, 0x35, 0x10, 0x413, 0x6026, 0x18aa, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa6, 0x9b, 0xce}}]}}]}}, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_open_dev$evdev(0xfffffffffffffffc, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000600), 0x6828, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, &(0x7f0000000640)=""/146)


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000001c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x12, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd, 0x24, 0xf, 0x1, 0x0, 0x2}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x8}}}}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000140)=ANY=[@ANYBLOB="12011001c0ae84400912008b4a1b0102030109021b0001000000000904"], 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "f896e404"}, @local=@item_012={0x1, 0x2, 0x0, 'e'}, @main=@item_012={0x2, 0x0, 0x9, "f792"}, @main=@item_4={0x3, 0x0, 0x0, "9ef12d19"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0xc018480b, &(0x7f00000000c0)={0x2, 0xffffffff, 0x0, 0x0, 0x4e496f8})


r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000040)={0x0, 0x14, 0x0, 0xffffffc1, "25ae97df0000800021c599885b000000000001000012f6ff5cdd8bc400"})


syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCGKEYCODE(0xffffffffffffffff, 0x4020940d, 0x0)
syz_usb_connect(0x2, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xb, 0x2e, 0x35, 0x10, 0x413, 0x6026, 0x18aa, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa6, 0x9b, 0xce}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x80084504, &(0x7f0000000140)=""/148)


r0 = syz_open_dev$evdev(0x0, 0x0, 0x800)
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="1201000000000040701700ff0000000000010902240001000000000903ffffea030000000921000000012205000905810300"], 0x0)
syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e"], 0x0)
ioctl$EVIOCGMASK(0xffffffffffffffff, 0x5b03, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCGEFFECTS(0xffffffffffffffff, 0x80044584, 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x48, 0x2}, @period={0x0, 0x0, 0x0, 0x0, 0x0, {}, 0x0, 0x0}})
syz_usb_connect$uac1(0x0, 0x71, &(0x7f00000001c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)
r2 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
write$char_usb(r2, &(0x7f0000000040)="e2", 0x2250)
ioctl$EVIOCGKEY(r1, 0x80404518, &(0x7f0000000100)=""/66)
read$hiddev(r0, 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001240), 0x0, 0x0)
ioctl$EVIOCGSND(r0, 0x5451, 0x0)


syz_usb_connect(0x4, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB], &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0})
syz_usb_connect$uac1(0x0, 0x0, 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0xfb, 0xf8, 0xe, 0x10, 0x2304, 0x21f, 0x5faf, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa7, 0x13, 0x85}}]}}]}}, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
syz_open_dev$evdev(0x0, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x80084504, &(0x7f0000000140)=""/148)


syz_open_dev$evdev(&(0x7f0000000240), 0x0, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x72, &(0x7f0000000200)=ANY=[@ANYBLOB="1201000002000040257d15a4400001040001090260004201000000090400000102090000052406000105240000000d240f01000004eaffffff1e0006031a00000804800200090581", @ANYBLOB='s'], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x5)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x56a, 0x12c, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000480), 0x0, 0x10000)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x17ef, 0x6067, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x10, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x20, 0x0, [{{0x9, 0x4, 0x0, 0x4, 0x0, 0x7, 0x1, 0x1, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x20, 0x0, 0x9}}}}}]}}]}}, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
r2 = syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x0)
write$hidraw(r1, &(0x7f0000000140)="d5", 0x1)
read$hidraw(r2, &(0x7f0000000680)=""/180, 0xb4)
syz_usb_connect$cdc_ecm(0x2, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000a40)={0x24, 0x0, 0x0, &(0x7f00000009c0)={0x0, 0x22, 0x5, {[@global=@item_4={0x3, 0x1, 0x0, "0296a62c"}]}}, 0x0}, 0x0)


syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000040)={0x0, 0x0, 0x0, 0xfffffffe, "00207d2000000000201b14700c1e0ac74f000000001200000000000900"})
syz_open_dev$evdev(&(0x7f0000000100), 0x0, 0x105040)
r1 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r1)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000000000008d80402f000000000000109022400010000000009040000010300000009210000000122070009058103"], 0x0)
syz_usb_control_io$cdc_ecm(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000680)={0x2c, &(0x7f0000000280)=ANY=[@ANYBLOB='\x00\x00\b'], 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
r1 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
syz_usb_disconnect(0xffffffffffffffff)
syz_usb_connect$hid(0x0, 0x36, 0x0, 0x0)
syz_usb_ep_write(r1, 0x81, 0x0, 0x0)
syz_usb_connect$cdc_ecm(0x0, 0x0, 0x0, 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550d, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x801c581f, &(0x7f00000002c0))


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, &(0x7f0000000080)=""/239, 0xef)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0xfff)
syz_open_dev$hidraw(&(0x7f0000000000), 0x5, 0x200)
write$hidraw(r0, &(0x7f0000000140)="d5", 0x1)


syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000180), 0x0, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x4020940d, 0x0)
syz_open_dev$hiddev(0xfffffffffffffffe, 0x0, 0x0)
syz_open_dev$hidraw(&(0x7f0000001100), 0x20000003, 0xc300)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x4e7, 0x9, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_open_dev$evdev(0x0, 0x1, 0x101000)
ioctl$EVIOCSFF(r0, 0x40304580, &(0x7f0000001080)={0x56, 0x0, 0x0, {0x0, 0x1}, {0x58, 0x1}})
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x2, 0x822b01)
ioctl$EVIOCSMASK(r1, 0x40104593, &(0x7f00000000c0)={0x1, 0x0, 0x0})
syz_open_dev$evdev(&(0x7f0000000080), 0xffffffffffffff7f, 0x4f0500)
ioctl$EVIOCSKEYCODE(r1, 0x40084504, &(0x7f0000000340)=[0x8, 0x3])
r2 = syz_open_dev$evdev(&(0x7f00000001c0), 0x7, 0x2000)
ioctl$EVIOCGMTSLOTS(r2, 0x8040450a, &(0x7f0000000200)=""/187)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x12d8)


syz_open_dev$evdev(&(0x7f0000000240), 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x6, 0x105141)
ioctl$EVIOCGMASK(r0, 0x4020940d, &(0x7f00000003c0)={0x4, 0x0, 0x0})


ioctl$HIDIOCGFIELDINFO(0xffffffffffffffff, 0xc038480a, &(0x7f0000000000)={0x0, 0x200, 0xb6d4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x1})
ioctl$HIDIOCGUSAGE(0xffffffffffffffff, 0xc018480b, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000940), 0x0, 0x0)
ioctl$EVIOCSREP(r0, 0xc020660b, 0x0)
ioctl$HIDIOCGSTRING(0xffffffffffffffff, 0x81044804, &(0x7f0000000100))
syz_open_dev$hiddev(&(0x7f0000001140), 0x0, 0x0)
syz_open_dev$hiddev(0x0, 0x3, 0x4f8c41)
ioctl$HIDIOCSFLAG(0xffffffffffffffff, 0x4004480f, 0x0)
read$hiddev(0xffffffffffffffff, &(0x7f0000002380)=""/45, 0x2d)
syz_open_dev$hiddev(0x0, 0x0, 0x0)
ioctl$HIDIOCGREPORT(0xffffffffffffffff, 0x400c4807, 0x0)
ioctl$HIDIOCGNAME(0xffffffffffffffff, 0x80404806, 0x0)
syz_usb_connect$uac1(0x0, 0x96, &(0x7f0000002b00)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x84, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x7, 0x24, 0x5, 0x0, 0x0, "4080"}, @input_terminal={0xc, 0x24, 0x2, 0x0, 0x0, 0x1}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x40, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7, 0x24, 0x1, 0x0, 0x0, 0x5}, @format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x6, 0x0, 0x0, 0x1, "", "11f7b7"}]}, {{0x9, 0x5, 0x82, 0x9, 0x200, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x72, &(0x7f0000000200)=ANY=[@ANYBLOB="1201000002000040257d15a4400001040001090260004201000000090400000102090000052406000105240000000d240f01000004eaffffff1e0006031a00000804800200090581", @ANYBLOB='s'], 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000000000402609333340000000000109022400010000000009040000010301000009210000000122010009058103"], 0x0)
ioctl$EVIOCRMFF(r0, 0x550c, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x862b01)
syz_open_dev$evdev(0x0, 0x0, 0x0)
write$char_usb(r0, &(0x7f0000000040)="f2", 0x1)


r0 = syz_open_dev$evdev(&(0x7f0000001540), 0x0, 0x0)
ioctl$EVIOCGLED(r0, 0x5452, &(0x7f0000000240)=""/77)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000140)={0x0, 0x0, 0x5, 0x0})


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0xc5, 0x2d, 0x22, 0x40, 0x4cb, 0x10b, 0x3de0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x6f, 0x3d, 0x4d}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x550c, &(0x7f0000000400))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff000109022400010400a000090400000103010100093700086ce82201000905815f"], 0x0)
syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
ioctl$HIDIOCGRDESC(r0, 0x90044802, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, 0x0)
syz_open_dev$evdev(&(0x7f0000000000), 0x56d, 0x8040)
syz_open_dev$evdev(&(0x7f0000000740), 0x9, 0x22303)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x69, 0x12, 0x83, 0x8, 0x20b7, 0x1540, 0x2a2, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001500), 0x0, 0x0)
ioctl$EVIOCGEFFECTS(r0, 0xc0189436, &(0x7f00000039c0)=""/4096)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000000000406d041bc700000000000109022400010000000009042000010300000009210000000122070009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000040)=ANY=[@ANYBLOB='\x00\"\a'], 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGKEYCODE(r0, 0x400445a0, &(0x7f0000000040)=""/235)
r1 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000340)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00", @ANYRES64], 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[], 0x0)
syz_usb_control_io(r1, &(0x7f0000000b00)={0x2c, &(0x7f0000000040)=ANY=[@ANYBLOB="00000f00000009003d140f3c369197d09647190890"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r2 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGCOLLECTIONINFO(r2, 0xc0104811, &(0x7f0000000100)={0x7, 0x3, 0xffffffff, 0x70a})


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x97, 0xff, 0x82, 0x8, 0x2058, 0x1005, 0xc19b, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xbf, 0x57, 0x5a}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCSABS0(r0, 0xc0045878, 0x0)


syz_usb_connect(0x0, 0x3f, &(0x7f00000006c0)=ANY=[@ANYBLOB="12010000413b88400819151300000000000109022d0001000000000904"], 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000040)=ANY=[@ANYBLOB="11010000733336088dee1adb23610000000109022d0001100000000904000003fe03010009cd8d1f00020000000905050200067e001009058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000680)={0x84, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000080)='!', 0xffffff56)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGABS20(r0, 0x40044591, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGRAB(r0, 0x40044590, &(0x7f0000000040))
ioctl$EVIOCGRAB(r0, 0x40044590, 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x41015500, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_connect$hid(0x0, 0x0, 0x0, &(0x7f0000000480)={0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000080)='!', 0xffffff56)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c340000000000109022400010400a000090400000103010100093700086ce82201000905815f"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000001c0)={0x24, &(0x7f0000000dc0)=ANY=[@ANYBLOB="00020c0000000c0002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000001040)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000b40)={0x20, 0x0, 0x4, {0x3}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f0000000900)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000500)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f00000011c0)={0x84, 0x0, 0x0, 0x0, &(0x7f00000005c0)={0x20, 0x0, 0x4, {0x5}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, &(0x7f0000000e80)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000180)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="120100004d0f7b08cd0c390087b0000000010902120001000000000904"], 0x0)
syz_open_dev$hidraw(0x0, 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)=ANY=[@ANYBLOB="12014003020000202505a1a440000102030109025c0002010000000904000001020d000005f7ffffffffffffff000d240f0100002000000000000056411a0000000905810300000000000904010000020d000009"], 0x0)
syz_usb_connect$printer(0x0, 0x36, 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
ioctl$HIDIOCGRDESC(r0, 0x4030582b, &(0x7f0000000040))


r0 = syz_open_dev$evdev(&(0x7f0000002ac0), 0x0, 0x0)
read$hidraw(r0, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGABS20(r0, 0x40049409, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000480)=ANY=[@ANYBLOB="12010000ec31f8104c1302007eec0102030109021b0001000000000904000001018b7500090583"], 0x0)
syz_open_dev$evdev(&(0x7f0000000100), 0x4000000, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x80, 0xd7, 0x7, 0x8, 0x413c, 0x4087, 0xd6df, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0xff, 0x1, 0x1, 0x0, [], [{{0x9, 0x5, 0x7}}, {{0x9, 0x5, 0x7}}]}}]}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001240), 0x0, 0x0)
ioctl$EVIOCGABS20(r0, 0x80184560, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000090024206d041cc340000000000109022400010000a00009040000010301010009210008000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, &(0x7f00000002c0)=ANY=[@ANYBLOB="00000c000000070001"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000180)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000280)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000001200)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x20, 0x0, 0x4, {0x1}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(r0, 0x0, &(0x7f00000006c0)={0x2c, &(0x7f0000000000)={0x0, 0x0, 0x4, "373026d1"}, 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCGMASK(r0, 0x5452, &(0x7f0000000100)={0x3, 0x0, 0x0})


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000000)=ANY=[@ANYBLOB="120120002eab5a40401c3405cc6d010203010902feff01000000000904"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000003780)={0xfffffffffffffe56, &(0x7f0000000140)=ANY=[@ANYBLOB="0900040000000000f7002c2f30bce310aa768717b9581caf22a5ce3f4255d0e149"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0x0, &(0x7f0000000200)})
r2 = syz_open_dev$evdev(&(0x7f00000002c0), 0x1, 0x0)
ioctl$EVIOCGMASK(r2, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
syz_open_dev$evdev(0x0, 0x0, 0x0)
r1 = syz_open_dev$evdev(&(0x7f00000002c0), 0x1, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})


r0 = syz_open_dev$evdev(&(0x7f0000001140), 0x2, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000000)={0x0, 0x2, 0x0, 0x0, "00001182062b650900eb4e00fb6a82714456862f9916ebff0005000000408000"})


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[@ANYBLOB="00000100000001"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b02, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f00000001c0)={0x14, &(0x7f0000000240)={0x0, 0x0, 0x3, {0x3, 0x0, '1'}}, 0x0}, 0x0)


syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
syz_usb_connect$uac1(0x0, 0x9e, &(0x7f0000008280)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x8c, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x6, 0x24, 0x5, 0x0, 0x0, '\"'}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0x9, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, '6'}, @format_type_i_continuous={0x9, 0x24, 0x2, 0x1, 0x5, 0x0, 0x0, 0x0, "", 'V'}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7}, @format_type_i_discrete={0xe, 0x24, 0x2, 0x1, 0x9, 0x1, 0x0, 0x0, "9df1d021ba44"}]}, {{0x9, 0x5, 0x82, 0x9, 0x20, 0x0, 0x20, 0x0, {0x7, 0x25, 0x1, 0x0, 0x4}}}}}}}]}}, 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r0, 0x80015b12, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
syz_open_dev$evdev(0x0, 0x0, 0x0)


syz_usb_connect$hid(0x1, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1b96, 0x6, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x400}}}}}]}}]}}, 0x0)


syz_open_dev$evdev(0x0, 0xffffffffffffffff, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)=')', 0x1)
read$hidraw(r0, 0x0, 0x0)
syz_open_dev$evdev(0x0, 0x0, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x49, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d34, 0xa, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000280)={0x24, 0x0, 0x0, &(0x7f00000000c0)=ANY=[@ANYBLOB="002205"], 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82300000904000002ca744d00090503034d00ff99090805848faa"], &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_connect(0x0, 0x2d, &(0x7f0000000000)=ANY=[@ANYBLOB], 0x0)
read$char_usb(r0, &(0x7f0000000200)=""/128, 0x80)
syz_usb_disconnect(0xffffffffffffffff)


r0 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCGREP(r0, 0x401c5820, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201fb0019030320d812010079de01ec020109021b0001000003000904000001785ecc00090585020004"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x0)


r0 = syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000b40)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x12, 0x2, 0x6, 0x0, 0x2, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000080)={0x14, 0x0, &(0x7f0000000040)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_ep_write(r0, 0x82, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000040)=ANY=[@ANYBLOB="11010000733336088dee1adb23610000000109022d0001100000000904000003fe03010009cd8d1f00020000000905050200067e001009058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000680)={0x84, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000000)="ae", 0x1)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000090024206d041cc340000000000109022400010000a00009040000010301010009210008000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, &(0x7f00000002c0)=ANY=[@ANYBLOB="00000c000000070001"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, &(0x7f00000000c0)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x20, 0x0, 0x42, {0x0, 0x2}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(r0, 0x0, &(0x7f00000006c0)={0x2c, &(0x7f0000000000), 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$evdev(&(0x7f0000000300), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r1, 0x40284504, &(0x7f0000000340)={0x0, 0x0, 0x0, 0x3, "387a577f64d8b8ba71cb394dd358cda55d61e2ad480b0156d6382ecc89565ab1"})


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGLED(r0, 0x5421, &(0x7f0000001c80)=""/93)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="12013f00000000407f04ffff00000000000109022400010000000009040000150300"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000001540)={0x24, 0x0, 0x0, &(0x7f0000000100)=ANY=[@ANYBLOB="00222500000083a8407a730b93bf0280037488df110576eec34dbcad9552f5bb273b614f36167ccd43e36a6c7914f5caf5"], 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000540), 0x0, 0x0)
ioctl$HIDIOCGREPORTINFO(r1, 0xc00c4809, &(0x7f0000000080)={0x1, 0x0, 0x380000})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000640)=ANY=[@ANYBLOB="1201000000000010c41090ea40000000000109022400010000000009040000010301000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000380)={0x24, 0x0, 0x0, &(0x7f0000000080)=ANY=[@ANYBLOB="002205"], 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000340)={0x2c, 0x0, 0x0, 0x0, &(0x7f0000000680)=ANY=[@ANYBLOB="200125"], 0x0})
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000440)={0x2c, 0x0, 0x0, 0x0, &(0x7f00000003c0)={0x20, 0x1, 0xe, "1b73aa2f3a71b385545396fc2d4a"}, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f00000000c0)=ANY=[@ANYBLOB="1201000014da2108ab1204000000001200010902240001b30000040904410c17ff5d810009050f1f0504400000090583030049"], 0x0)
syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x4080)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, 0x0, 0x0)


syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, &(0x7f0000001540)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xa, {[@local=@item_4={0x3, 0x2, 0x0, "93bf0280"}, @main=@item_4={0x3, 0x0, 0xb, "7488dffc"}]}}, 0x0}, 0x0)
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0xfffe, {0x100, 0x1}, {0x50, 0x2}, @cond=[{}, {0x0, 0x0, 0x0, 0x1000}]})
syz_usb_control_io$hid(0xffffffffffffffff, &(0x7f0000000800)={0x24, 0x0, &(0x7f0000000740)={0x0, 0x3, 0x4, @lang_id={0x4}}, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, &(0x7f0000001200)={0x2c, &(0x7f0000001040), 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)
syz_open_dev$hiddev(&(0x7f0000000540), 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f00000000c0)=ANY=[@ANYBLOB="12010000020000402505a1a440000102030109025c00020100800005260000000d240f0100000000000000f30006241a0000000905812300020000000904010000020d00000904010102020de4ff0805"], 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff00010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x40045506, &(0x7f0000000400))


syz_usb_connect_ath9k(0x3, 0x5a, &(0x7f0000000140)={{0x12, 0x1, 0x200, 0xff, 0xff, 0xff, 0x40, 0xcf3, 0x9271, 0x108, 0x1, 0x2, 0x3, 0x38e38e38e38e3d1, [{{0x9, 0x2, 0xffffffffffffffba, 0x1, 0x1, 0x0, 0x80, 0xfa, {{0x9, 0x4, 0x0, 0x0, 0x6, 0xff, 0x0, 0x0, 0x0, "", {{0x3}}}}}}]}}, 0x7fe782539b0b)
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x3, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)=ANY=[@ANYBLOB="12010000ff4ae0086d04dd08f4ff080203010902120001000000000904"], 0x0)


syz_usb_connect_ath9k(0x3, 0x1c, &(0x7f0000000000)={{0x12, 0x1, 0x200, 0xff, 0xff, 0xff, 0x40, 0xcf3, 0x9271, 0x108, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x48, 0x1, 0x1, 0x0, 0x80, 0xfa, {{0x9, 0x4, 0x0, 0x0, 0x6, 0xff, 0x0, 0x0, 0x0, "", {{0x3}}}}}}]}}, 0x7fe782539b0b)


syz_usb_connect$cdc_ecm(0x1, 0x4d, &(0x7f00000009c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x10}}}}}]}}]}}, &(0x7f0000000d40)={0x0, 0x0, 0x0, 0x0, 0x4, [{0x2, &(0x7f0000000ac0)=@string={0x2}}, {0x2, &(0x7f0000000b40)=@string={0x2}}, {0x2, &(0x7f0000000c00)=@string={0x2}}, {0x4, &(0x7f0000000c80)=@lang_id={0x4}}]})


syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000001140), 0x2, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, &(0x7f0000000040))


r0 = syz_usb_connect(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a82300000904000002ca744d00090503034d00ff99090805848faa"], &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
read$char_usb(r1, &(0x7f0000000200)=""/128, 0x80)
write$char_usb(r1, 0x0, 0x0)
syz_usb_disconnect(r0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x4, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085508, &(0x7f0000000500))


syz_usb_connect(0x1, 0x36, &(0x7f0000000680)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a86200000904000002ca744d07090503020000ff99090805848f"], &(0x7f00000007c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, &(0x7f00000003c0)='l', 0x1)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x100842)
write$hidraw(r0, &(0x7f0000002340)="bd80894ec947fb776e30504e0e68261fdf25dfe55bb5efd20ce7e076e58d9d8d842134cc424c9c73b052e2702867207a1e8cc518b5f197af219609399167ad15c3eb8f7a4ff2b16172109580ce58cea188626308990d55757ee4d0929d1fc1e9b2c647807ad9cfb20011839739d9d1877e748d501c07d98ec6376fe2da6ff5aedb6002cbb98bfa3aebbdd4b2dd7147668bd3481224b8371019ebcc551acb9f121f5b20df101e244736741d5f04f962d80a8a7590d99b07d214a50e04510d869bcebe5bf330cb52cf717edfa8bd4847971cf46b7a4b12346082c2c7e0ada0965d0ece6c657cb0cb6cf181471bde249501a05d79a1fa0b23039b9f8d25c74a45caabd07e0766110fc1faf3a5b40b0066fe24d364547d17e0a84b0b0d95057efb6a3f6689fea2b51d9a89a63ce98182c177305546d4ab3e424629a4660a68d500192b0e073ddd75577723cf68d1128ad7574ffb38dbef031363e82067d845037f216af9e1037d3476c38d883a962516e1d1e46863176f8f362f7ae3a012a5bf0c13c769f716c3dc52f96d98ade19bc3d5cb8d15e4f25fd7be73d26bcccb270545261b777b7b70857928e3f1b4045a3ddda103bb1f9c39367e7e9c70872fdb94b2c31be2e62eefe16312b6683d4b8ae92a2b73a55153990974a90e85c87c8e8b0382b48ef8573a611f3ee91b4d76f86e94201e722826a05e04da9c9bff1022e36074342e20c156a431009aba0c02cf9eb7693fc67831e7068faa9a4708eac736558ce105800be052af88223d58de03b03a1ebda21cd900d867b93cf28d7f354435db3945d1267c668a38fd30d4cc1f96acad792612edf149091cb07690654b4f6294d8d84901e3eb7b8194396f76416c235f4649de3cb96d58250cf98a160467c216b6b292f2610783ab6dfe1f23c3a365f31934a5630b02e6825c54c97ac5392aa08c82ac085a8b739076d23f4948d999b71d8f33e51039318367eb9b23f6a7cd897e801489c6fbf70cb38451231b268f314a429a7609e97f8f1f08c108ac1c3137a8320236c8702a4a254b18ba7f97068c77710ade94435f67e337a5eed6068f55fda01d0f7271392ab57068419880b37c3f513844c5a63a4ebafa030985bc6bfea9befa5d5d7646a6a8be5d0f563a86279bbaa9fc3ad601287aefc5330fce72804c7fcb1e15b3314ae91e067f990e48ed29079889cb44593a840d6ba4cb45d638b2e6cc31a06926d88a559c176560587ef82e79ff000a040f552a9c66b24e87683c2dc719c394c78ca1ce74f43dd762c5d04881f1d608912ae75780a2f43ad4bb39ede0e3a6522b8be866bc9f7adf6439a7ca5f2160d2af43f807e5ca20a02894efb87f37f8742251eb40319ea31711c66a82fe49ff8a887f4749a96704ed0bf89562b129ac3863623032ad8015c0e39b2ebff5de88c2ddc3f707e6dc904b251c7168d0e29d0716cb33ae5eab33fc0e06d07911a2899a542adebacaa51f3a846e9cdc8b6fc34342abd6e9d698bc48d74de5d7addcc06f2daee4f348463a81ad2b79ee53b271e4e486f1f67e513791820079a4ba2c3d539c4c9860177413ba2cbb154fa8f68da6e1f41894304f043f790459743d7063a103e0e48d253acfcea01ef72a167f8eaaa54b1840f2d2bb3e2f8ce9480d1ac84bb605e580d1252f5c9404e1aa495a59904483b7678cc4d8bf731a8c5f6a476ede4f5c5fb585534b28588b4c95bba92b7c8226d35e93342cb57800def09c6bb37a085bc061829ec2025e5ba5a63c6b508236f67e676c6c0b69e0242c3ac6bd2b93bbbfb6f9ac73ba4061701ddac7506afc53539637502fb5290242aca54ee11bcc8b7db7ced2545300858d86e502d89e80a708ae2fdde6ccae6cceb13652945942d262b397f283b4ee7f7f80e80d764cebb953f9fcaec8de410e239c84cde7f6d04bc8bd97e55444c5e680a196b42c9e767d52bd00a2aff5c7366c0c4e946d9cc8f49bef3d9eb50b6f3e23c353d19ae6a948ac9f419c7b7555f63bd8a42650558fd3203bde7401a12dbf7c0bb56623cef996a023ebff6b337849be0c7a34159fb3cac890220b570aaef7260159910ab195b6d9b1fbcdebb4a3a651b9c595bbb4308cd8506fc44583b41eb52da60a5fff7b07a9293556bf7524c72ea326981373e1ad1e4dff0950a7df2419110d1e6c0f571ea426739827531f7a083a7dfc28221a3f4c9b7b0171cfddbef1fb2bdc47553e2076bee86d766b6433d5141fbbd51a5781d6b4a2c6ef0f842e01632600ee515897e2d3d7a701316260ffccbca5029461180743978aad3f75c796392aafb15ae0d93e14035d7f8c6c666aee0c9b7e771c421806000000f9b2393dc7f2a15b0c88a5c1ea7936d34997be57b1df73cb84c6d55bfee2bc23fa7030920914572c9fe888d7fd22806f2a2c5398343916c417b930ab93232e3821c6e2a3766a462e003a60132660335f0eb570c2bb80a7045dc1720f9783b2519b3ec9f4abb5769b67d7369cb955c29e7bcf442365597ba51943427f84b24007fd3bc1d2abb580ec2afb5a851b0929a9d9ed80f1a3b8646485dac4e1101a5966ef65e9fab1797c548d4b5eb37e59acc7e44f13aa2eb02e2748de9de2989790c15e99e1d740872f78cac06c964bf07e84991d7f55bec5a34cb4fbd33cf7208b92887c5bfc41d5d347a507a78806c632928c7e45e8ab0f8e6716460924db4e82397a8a524d4a709518d6e0529cfb8bd7bd6ffa508bea6e72705648e70a9b2c179a834127637649894a7d623c0c08e77d7dc36751294db166180b461df3f71349687f02d35bfc0d747179a4cfcfd5f2801af4bc032ea2ea56da5dcc40ada2220d8ab01c9741fc767d7ed5a776a56cd8b09df36115dd0452acf1af909b3ea2209ab33879b7838e45e3ad442f4cbd635e8fcc4156a4d829678dfd65c835230d0220a46e81f5879031ce900741fb0a0a8605220238647a00007b88f6fd638c33b0da57307e91836e7433e345fd0c44ecf7970cc808c2162d00fc543ff864922e5f645f31b76a7e5f0e6c8371ce0de15a642fe2b12ca3cf956828f2c1ad4125842fe94dd1d819c1ea1cdf6c250842350122a3c059f0493bbac0dcfadfc6f0d6cafdeefebe5d4ca823cc42491758794a6a9af9d43f86d6887c7e8d21d42ba1b2d39f21a72d74c231028bc56a758bd38a5a62025604fe79f09e145ec83c309d820913ba73bb4fc98bd464d4cf103015d86cfad4ecfe7ba15d175774603bf9a1e7d5ffe2c4f52000fd65db50cf721fbbc7913a54d4c9fd9170b339d807db7d50129f07d5fa126f56df393d8dee30c4d6eb94cfbda04c72deb6f99fc8524359dd39d4ff32ba989da03bb1c351d04a947833d1571634d3d598d5c5edf8e6244dfa4805bfa24c8db532be71b31c067de9d2511320021cc4963c4442411f877d23478936e7b08e9530864f9679b8d49c884c374bacd38f8de48248711943274ac3eadbcc92e3015a3ac789c3ea38a6bb4b5d3cc64881871b3db30a6ec618f644eb94716af52d81509b83175e4fc632287cb4a8790631ad25725d6d4090f3dbb17fafaed8dc9b7ce1574bc360342d35a88fcc980ee0d1296d0cdbb3fa46d42b17f83ab1048394572e26de18a6a0a2f2c27705aace537da52c47494eaeccf45c158f12b2dbdba2d3d69d598d7e28aa51abab4c26051f22c9b7169ef9c98f18eaba7172780ea15a6ebde3b4da2d761e9df9b9fe4912849c261cad7aa48e5db40161e6d21b536b0b4eae8a59261aa99b3d15700a5da6948be4a3318d5c25ffb081ceb30cdf5433c5709db91dcc7611d127c9782e52103c09b2c7faabace71101aed1001580e72604040e1f67f1bca5fc89245607d7d2639eac091c51f58abf3e789f1fde8b9b2aa8b33e86bb8339f217eda91219da498ac452cceed599d8deadf36a187f864187e06aa139d7635303863f34478a20fc19e4f4b4b1d17c9df56c2a9206a33d2f0dfb98659695fcdf8f87bab4fe7f3d90eb849142a44cfd57d3de04ae5a9634c2761e642ad949effaaa1aee784cda74376ae7e81a6c15de32a7d14f8df7a2dcf0e782482e5639c092dd1980a2735e9e5f2b1bd049de72e28e59e95e41cb32a2d65a979ba0d448f44992461cabe080df2c27201cae6455a83fd44c3ffb0c60f3b077ba6583a9657c8f9d54fa11e799b2ede146a59ea0af12d412e37c70176c3e631480a6b2025142f6a24f67365c7038e24c434a34e6d8fd5bd7daa6f42ddfb58b20d61e9e0c9675b02b613cdbc2a9a6c6bc617271f2ac9d476c4a7bcafe543862c7ad0cdb02d685276e0e34430092385ade102da1ede5abf2e1e80887a561cd4747dccfcf8043b47df72f34b455f0bce4ea5958d52cdf1784fbcc3bc523c49834d4d61802ac9a797ea86e611c0caa5d66ebf2d20da3df7b9d201d775bb9ddcd50e5e99796f249b05a13fe1d12ee2bd01298774ee13c370aea00eaf4339cba32ca772156824d610739bd7fc877e76cd494d9d29c466f27c69a75d9906b5aac743b42b732cf8940f87c1572a5cd79340b455c22078b89c9874546b3f2f3da56b19da853b81dac42098092f697579d8d8bdc5dce7ef0c487ee681350ca0bc5ba2539433316658852e34c0a6233cd0fd77af2a091e07c45a1215a535def9800c25a2795706d51c525720ee4266748400f255f2b3fdb5feb3fe038ee68b9e12a1294e72cde25ad8914e9af010050fa444a5d566b9ee3a6a2e4e1608714812a3e152f34840938e4705787cbc381884ddefd0e22f927a307d84572572c9bd87a9c93ebe44755930be506337e235f51bfe6d9dae734de9d48676ee6a6c73e9023962c5bdd0d31dce57739169972bb9ef2b3da4a1e4e0863fbce4bb9da18c22e230e1c2e4bb3fefc75905f2170b56452dd45cee4df232bd19fe468729f4d65ff9c6f5625f3c7a5505dc5495c655ec655665279b6dab9799c97908ed1636af13763894064e573f86e5339b9d081af2ffbaa8ba4e74c595fe17b057791b899495d402d0288f1ac06158ae2c230bfae4d6eb632b3af3b6d3a0e0e01d7fa84f160967eb4368288559f165162f4e12a76130c86876fc189a8f57dd9f9d67d2d8a3df03d18950dc2c4ef43d3e4dcee96f67b2a910d9ef4153e4c86a2175fd5cf0502a6dd5e6de06660dfd90dd4edc9f574f780f090f914662ce6f620b42121c303ae1e8b1becddd5fcf2e61452104537660609c48fbd1166ad80c1b74c0e1ff4670b109547c9614979de3bcd6b82193d850ca0550bfc428d2cc00cba131fcdc8bbc1c4e895c3859d6b78edc83f68be6a16878d94cf345d4f53faf22084432dfc0ffbe9f998c88fe233681fcf5ed5feb8103edd7aff54465a409b565700e02a038c2e6bbbb4aca72a3e16d6753847e414b3929e5369077673a041490de450588f2658bb510b588c7a88b7340c405cd0085ec1b6659e9749d3526165d1769b97466e6f321ef7867a23bd75045453ec760f0d0682915f72e96bb15cef052d0c715c04484050031b831b7d868c96c7768a15ab64eee5be965c49204109eca1bb1aa81b228195da40fb15b6262b417e0b8cbfa7aa50036d7b74f2562730fc4486954f6f4b76604c5b8193eefb5ebfc66e0746986ee356089ff6fd22f47aceb950ca6fd190f1a7888920a1deeca6e694fa53ab66042833cf3b8bc42e8e0e19231d76ef46fbfec6aa69a618ee46adcc6aa292355f0ac934b272e1511056763bfeaa88fdb486ce3d9c6f09aaad1de0253bdf0455d52c13d32d53fa0d2adb862d9a2b0f8a651157e8afce663d0000000000000000", 0x1001)
syz_open_dev$hidraw(&(0x7f0000008e80), 0x5, 0xc5d78e0a32184f32)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
r1 = syz_usb_connect(0x0, 0x24, &(0x7f0000000780)={{0x12, 0x1, 0x0, 0xf6, 0xb3, 0xe0, 0x40, 0xdf6, 0x4b, 0x56d7, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x66, 0x87, 0xca}}]}}]}}, 0x0)
syz_usb_disconnect(0xffffffffffffffff)
syz_usb_disconnect(0xffffffffffffffff)
syz_usb_control_io$hid(r1, 0x0, 0x0)
ioctl$EVIOCRMFF(r0, 0x550c, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000f80)={0x2c, &(0x7f0000000e80)=ANY=[], 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x541b, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x8301)
read$hidraw(r0, 0x0, 0x0)


r0 = syz_usb_connect(0x0, 0x10b, &(0x7f0000000000)=ANY=[@ANYBLOB="05010900b24b6a10e6040300770100000001090224000b010000000904000302ccd4280009050b02000000040009058a02"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x32, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x3e, 0xdd, 0xbd, 0x20, 0x41e, 0x3f19, 0xa472, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff, 0x49, 0xf, 0x0, [@uac_control={{}, [@mixer_unit={0x5}]}]}}]}}]}}, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000100)=ANY=[@ANYBLOB="120100000b2e351013042660aa18010203010902120001000000000904000000a6"], 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, "1155d4f82abca481433d930101b203d24da8edcf1a4d57a9e83a64c2e8da1fa4"})


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x8d, 0x4a, 0x30, 0x20, 0x6cd, 0x110, 0x7171, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x22, 0x63, 0x47}}]}}]}}, 0x0)
syz_usb_disconnect(r0)
syz_usb_disconnect(r0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_connect$uac1(0x6, 0x71, 0x0, 0x0)
ioctl$EVIOCRMFF(r0, 0x8933, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085504, 0x0)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000100)=ANY=[@ANYBLOB="120100001ddf8208c007121522300000000109021b0001000000010904010001faf40d0009058203"], 0x0)
syz_usb_connect$uac1(0x0, 0x0, 0x0, 0x0)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000440)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0xff, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x40, 0x40, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x200}}}}}]}}]}}, &(0x7f0000000780)={0x0, 0x0, 0x0, 0x0})
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


syz_usb_connect$uac1(0x6, 0x71, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f00000002c0), 0x1, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})
r1 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r1, 0x80104592, &(0x7f00000001c0)={0xff, 0x0, 0x0, 0x8, "99f9ffffdf7f00000020000000000900000000001000"})


r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000640)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe0000009058202"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_disconnect(r0)
r1 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[], 0x0)
syz_usb_control_io$hid(r1, 0x0, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000002cc0)={{0x12, 0x1, 0x340, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000003340)={0x0, 0x0, 0xc, &(0x7f0000002ec0)={0x5, 0xf, 0xc, 0x1, [@ext_cap={0x7, 0x10, 0x2, 0x12}]}})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r1, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
ioctl$HIDIOCGRDESC(r0, 0x4030582a, &(0x7f0000000040)={0xd, "3a82000000010000000000009d"})


syz_usb_connect(0x0, 0x2d, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000100), 0x0, 0x822b01)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffffcf, &(0x7f00000000c0)="952bb3e006ae9a4c3a"})
write$char_usb(r0, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)


syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0xb5, 0x40, 0x33, 0x40, 0x1a86, 0x7522, 0x3536, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xe4, 0xd6, 0x24}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000007c0)={0x84, &(0x7f00000003c0)={0x0, 0x0, 0x2, "d413"}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$printer(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, 0x0)
syz_usb_control_io$printer(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ncm(r0, 0x0, &(0x7f00000003c0)={0x44, &(0x7f0000000180)={0x0, 0x0, 0x2, "ce1b"}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


syz_usb_connect(0x0, 0x2d, &(0x7f0000001600)=ANY=[@ANYBLOB="12010000ec31f8104c1302007eec0102030109021b0001000000000904000001018b75000905839d", @ANYRESHEX], 0x0)
syz_open_dev$evdev(&(0x7f0000000100), 0x4000000, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x4, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0xc0085508, &(0x7f0000000400)=0x20000000)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$uac1(0x6, 0x71, &(0x7f0000000180)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550a, 0x0)


r0 = syz_usb_connect$cdc_ecm(0x2, 0x4d, &(0x7f0000000340)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x12, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x208}}, {{0x9, 0x5, 0x3, 0x2, 0x40}}}}}]}}]}}, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r0, &(0x7f0000000bc0)={0x14, 0x0, &(0x7f0000000b80)={0x0, 0x3, 0x1a, {0x1a}}}, 0x0)
syz_usb_ep_read(r0, 0x3, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffffcf, &(0x7f00000000c0)="952bb3e006ae9a4c3a"})


r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(0xffffffffffffffff, 0x80104592, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, "00f8ffffffffffffff0000f62386f0dfdf293700"})
ioctl$HIDIOCGRDESC(r0, 0x401c5820, &(0x7f00000001c0))


r0 = syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0xfc, 0xe, 0xf7, 0x20, 0x10c4, 0x818a, 0x99d3, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000008c0)={0x84, &(0x7f0000000000)={0x0, 0x0, 0x3, "65b89c"}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000040)={0x0, 0x0, 0x0, 0xfffffffe, "207d3d00000000201b08700c1e0ac74f000000001200000000000900"})


syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x44, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x6, 0x24, 0x6, 0x0, 0x0, 'P'}, {0x5}, {0xd}, [@mbim_extended={0x8, 0x24, 0x1c, 0x0, 0x0, 0x2}]}}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x1068)
ioctl$EVIOCSCLOCKID(r0, 0x400445a0, &(0x7f00000002c0)=0x7)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000050cb5340450c10108e492940a80909021b00090000000009040002010035040009058dff86"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, &(0x7f0000000080)='l', 0x1)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1770, 0xff00, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000140)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@global=@item_4={0x3, 0x1, 0x0, "6652de90"}]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x17ef, 0x60a3, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000540)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x0, "96cf62ab"}]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000090024206d041cc340000000000109022400010000a00009040000010301010009210008000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, &(0x7f00000000c0)=ANY=[@ANYBLOB="00000c000000070001", @ANYRES64=r0], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000180)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000280)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000001200)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x20, 0x0, 0x4, {0x1}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000001700)={0x2c, &(0x7f00000011c0)={0x0, 0x0, 0x4, "ba76598c"}, 0x0, 0x0, 0x0, 0x0})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x56a, 0x331, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x7}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000000000), 0xfffffffffffffffd, 0x531842)
write$hidraw(r1, &(0x7f0000000040)="87f2b6e8f8c22c3cefbb88f58450b65954341eae628e66999b4e47a63ffd2a95e1835fc5c9c14c5a49087493a3f18fa77a8c8244ab74d30bd42aaa65a21d0e41fcd948548089f0cab9a4f7b5c6b8c8419966a0c0d0942d17d81fe1f83c5d2e5dcf7d0339e02bd35792c5ef440b4fa964b0152b3bbcf621eff8c006705591aa0111b608d81c0f4bfc0287cc8152abecf41a6e07e153e344ffe0f40aa9a5efcff6bc82702cd026d87c663b5cc98bb39f5269729642b99a08defb77c985f47f2047a2f2853fa5c38b611c29b7b5aff20e7dc4b695bd7e7fdc131e3d5ae3521fed076d51260ce4a8538baa2696c8651f", 0xee)
r2 = syz_open_dev$hidraw(&(0x7f0000000140), 0x3f, 0x40)
ioctl$HIDIOCGRDESC(0xffffffffffffffff, 0x90044802, 0x0)
r3 = syz_open_dev$hidraw(&(0x7f0000001000), 0x0, 0x456c01)
write$hidraw(r3, &(0x7f0000001040), 0x0)
write$hidraw(r2, &(0x7f0000001100)="6d01858ce633bf4f95cf68d5ab16f19c6f12d930a16391ab2353f152ceb0b80e1c5b219724f2d83dfd6d6d6601593494fefb79ad96a979db673035851446f065309dc8355cb1a967592f6dc284aa4c882b12e6d70b2a7708bec5b7ce68ba890cb4e59d3d8250519feb1cc4ae3457d3df36be3a9690b018a6b25f21ec5e639151380d2e59b0c273087c16a752a81e6344e84c035ee65068de3611fa2f58", 0x9d)
write$hidraw(r1, &(0x7f0000001200)="03647af105781b0c7502bdb777aa4703fd9049398493d6184aa1014e609375853d8a48e3a81ba59aabc0bf9a35d15f3979c9dd6130c89f455c67dbd6c625695f151aecd858232c8fa4eff75095d2b42923e5bbe4a9e65561f2a7c1e0c517b6cdf4274f92eb41501c6fb4186888d95e015093db905d7ba7fb72ae61d6a381e2253d6205a953e437db8b9d53c4ba85aacd19368c23f3caf38b5d8bd72d1de6fd46dcde1d900370bf4c741664ed498c6c0639bebf923c021d3d463d1d7258ce0ef2801fb773b694092a3907398590ca082f", 0xd0)
r4 = syz_open_dev$hidraw(&(0x7f0000001300), 0x8, 0x31b640)
ioctl$HIDIOCGRDESC(r4, 0x90044802, &(0x7f0000001340)={0x894, "260e3ff683315cebcf97827324c953a54e09efc857e5b44e58424addca72b595274796c44f161ba382ab8c398308cefe93a47d265dbf52c8c5c69fd14cfd797624fff2c0ab3429815220b4e9eb9ba88530d1fa9c96bcb10995d2b25d7447408666aa0fec0ac38f00c0573d86b206d088368bde2ec25132bb61e4a655ab0a752ee1f8b5e7a90c2b5f7b4baea988e871fa9e4c14fcf83a480963d178f5a638b1c04580a7ee65cef4274b31cd210a885ad4abd910e67e22fc20f623fab349451fea3704bd47eec4fa96c367d4366e44d3d82d684dda0f737a2302366722ede13c1e031c343c1fdf45af07d2c6f5fcf96ee004fd513ad24236e36ef5da8528a2dd8972af80815b59e65e0e60cfd06e42818f4978464d25dde11e5ed713981b7771720992f13bcbb5cb04b737795cd7facbcd7c10c96fae29b6008292f56590acac70cbebc0c606bc1ca77f1e52d6dcf7468b315467c42fbf5b4ded804727b9f5d87fedc3cf22e1132f7fce72d516d7654cb9a1721f11a9c13a3a47c354cecfec8b635f4fd57f2f10b1c531e622319e8eb290d47ba475a64342fdbeb4c47786e9183a9ae4a87cd8ce016e72c2e692e17f91d5edd42596e6b563c7dc7555b3dcef32402a0cccb7a85fe713d6c3182203db30603b5474725f564dd3f5c4b86711ffa14e8029dafc3ce517fc062873ca2797342056dfa8d18fe49acb59d5ebfc8120ec38d4905adc12ff601ce0b4d2b9a2b9a54d49be2397e649a372ca149a8e8f007f84ac86be846b9736af2c5057324f8f2e057b33ccf9755146950de77da462fe75bc6ad144ce2241460cefebf7e80dc24b7cbb8e0d831cb4b2d7c4f06ba8f907af6d99714bebf58baa2ae46ff36bf00d9a2688dd8b2607956188c11950a0898dfb730138366e27c465bde5d4fdd8d79eea1e08dc76323e81d9778cd1b86e2f1805d790c63c4faad0c5941d729db0132f6e480b94813f388ad8964025205ad92205f77ffcc02f5975b5241e1afb08baad7769cb083a8e08da1a48f87c19c42d780a1159f03c1d7f8bb5c81fa36e404f83f9409a9526d23785ef7acd7e1e1903d95879a1fdcd0da0840ce6fc27f7f585dbf49712e476d8fe0e205640db671cfec1a4dd520606115be1df66da9e4b08d362d9acecfa61cdf8e5bb39e6947245148bad3222af70ecd9ba004e68d3dc7a9dd0de7ab9bb0b17105825b402ca70208aeb4cbe2b3b1068dd0212c03894c1d9eb7b96f9e5e0be70c8d6aa0801f8070fe3627fa3b97a1957dcd39a18bf3b76289b545b7783caa7e598dec4e74fda398d85840eb60ac8a37cef5b285995dea7946576a3362bf78b6c0e051db214649a57a5ebae5c0cdab762536809a608163cf8bf25dba76a9e0e69f194affde773e8caf44510d7b8a0876a9fba9f4c2f1aaeb752e009df8dfd66e361c188747ce41bf52968be93254311fea138f3613db311ff9894ae454ec693b3c2044be95b51dad4e5b81b1ea3de9c1e37e522f61a58fe6bf72e22df32c6796668a9b5d42a92055d0774b159d5c7f69eb30fe1c261595f9adc49ac0311ca8100d073855a2453d05d9a8909b30a13971d1770740668d48313d93038957d4fd3f7b18074ddd5d22250d96aff437fa3652a08fe86d758592a579e1168fad785748fa92269a7003ddfd8261010c548c898ca27deeab4d246c24750d100c945174cbf30a133ed4a0187a19c15e33da876e281f8b930012108d08a644012373a9801bff7b5f0cbc058b7c585ba99dcdfa10c352e565195d2e54cbf68df6419b32fd3c6eb79322b1de789ddd057b799a5ca4d11b32655a6e71a942f2498b1c928a682498879cf34875d0dea219e8c2d420ef6ed2978286a03565994815d21e727de6da5f9522e1b72341c2be402407bd06ee4f9f11e71d47899d255e101b54ceefb992d23a28aae417f73627335c34d01ded72185dc5a82811b4955398f201c9780eb962c37c7b4e1d0f082d47e0e8b9c83612fd891e64f7b21daf6b3b216ec3de1715d0fc4376d6ccb9e3ee34b438aaa48714e49aa71d27bdcb77ca9058aca8153dfa9224c92ed7e43674d329c902c05558029c143626b810bda721d330ec91e66e7aa58a2c132fa2917b3ede3e4cbb153f0de9392ceb0063680531ae1311ca68687a513f33da16f0b73d24377966ee156acc32214d2c70ac57cef53748ee29131365b17d0d96a0a81723a1bd2f4bda4e008d5b19220d74bc356cd10fc45f6cb5078e781d97610deec214eda9234da88af490d5f989e5d3621ec6f0bede1ce78365145ba7a92eeb8938efcbe9f95da8ffff93b661704c761527f0bf6217a111395929925d1c5b5d35cf02e0d55aec4fa349f237b4f2de10ddf3be8b0f65d415215eac9d88d6611e2817ee26a650bebb33380246df18699c6cb657b888f4f54542b4942c2cf0495254b395205b2b895dd1971f6199752fbc20156407e6b310d7cd84d32a93620383da45cb3dd1d768e6d8490efa9df7a198b2481b5c555debc5126e29900fc3aceede50fa6632ba679e53e50af83fea710e73253c3a34441db862e16dc6455021cc99266180437b2365973b22e1447d2fb72db0cd67b2e092e55a59f008aa15ee0300388dc87a9d92598d886a73ed36f48f52aab8a2e3eabe8095b76358692c4bc45bffebcc7c3878798c548056c10207a084dc3b2946f68bd80114882b4354c8b256f3da1a9834ebd200a82bb124d3bc90c2df0728b2f3cd5a368d99876aa6aa0162a0b1febf1722daf360fdbe74962deabb4d75fe320b1fae64d61a3c6f7a0c818392ed9b04bdc50332b4cce1e1c5286b16fa172e126cf5015c9af9e2de14c34cbb2832bbf8cf6cd91dd0725c490859c59189a0f66f309b36fdee2a0b4e14ce6e52a63640109df8804c2742c4372cfb5aaa462b8aceef528cdc4e873fc16eb26544e80855f337146310657d720c7c18328a305834b1200d09d401e0ad1d679cc24898ff9ba5ff9bb824684329258526517b156a3bdb9c57fac1f7d9a9a7c74de5468fdef01c4ad8d3fb51e9849595acf81c1af513056a519b8cf215fd5457b2bfb7b88de84892d0d9d88e7fa14d50c974e7b3c4d9def791a0e98f104"})
syz_open_dev$hidraw(&(0x7f0000001c00), 0x10, 0x400000)
read$hidraw(r2, &(0x7f0000001c40)=""/13, 0xd)
r5 = syz_open_dev$hidraw(&(0x7f0000001c80), 0x7, 0x20000)
ioctl$HIDIOCGRDESC(r3, 0x90044802, 0x0)
read$hidraw(r5, &(0x7f0000000f00)=""/52, 0x8f)
read$hidraw(0xffffffffffffffff, &(0x7f0000002a00)=""/85, 0x55)
write$hidraw(r5, &(0x7f0000002a80)="739878ad8d2f4b4152d10a6bab0e6c26b9270f976f55cd30076f471f46c00c6e2bd2dd6f5bcd77192e87b77fd4fcce765422a33d7e5191aaf8511c7afd4d9ea80ca4d9f351c810a96bc7d093c4423e3361f14603a1e2fbd0c6804c", 0x5b)
ioctl$HIDIOCGRDESCSIZE(r4, 0x80044801, &(0x7f0000002b00))
syz_open_dev$hidraw(&(0x7f0000002b40), 0x0, 0x340)
syz_open_dev$hidraw(&(0x7f0000002b80), 0x8, 0x220100)
syz_open_dev$hidraw(&(0x7f0000002cc0), 0x5, 0x200)


syz_usb_connect(0x0, 0x24, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x33, 0x7, 0xd0, 0x40, 0x1199, 0x301, 0x2ec5, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc3, 0x51, 0xb}}]}}]}}, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000f2d07c40501d89601dd0000000010902120001000000000904"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000007c0)={0x44, &(0x7f0000000400)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io$printer(r0, 0x0, &(0x7f0000000300)={0x34, &(0x7f0000000040)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000300)=ANY=[@ANYBLOB="12010000090024206d041cc340000000000109022400010000a00009040000010301010009210008000122010009058103"], 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000240)={0x24, &(0x7f0000000080)=ANY=[@ANYBLOB="00000900000007000189"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000180)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000280)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000080)=ANY=[@ANYBLOB="12010000a8f4dd086d04b1082a6d000000040902120001000000000904"], 0x0)
syz_usb_disconnect(r0)
r1 = syz_usb_connect$cdc_ecm(0x0, 0x56, &(0x7f0000000000)=ANY=[@ANYBLOB], 0x0)
syz_usb_control_io$cdc_ecm(r1, 0x0, 0x0)
syz_usb_control_io$cdc_ecm(r1, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000002ac0), 0x0, 0x0)
read$hidraw(r0, &(0x7f0000000080)=""/4, 0xfffffffffffffddc)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x56a, 0x12c, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[@ANYBLOB="00000100000001"], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b02, 0x0)
syz_usb_control_io$printer(r0, &(0x7f0000000100)={0x14, &(0x7f0000000000)=ANY=[], 0x0}, 0x0)


r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000640)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe0000009058202"], 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_connect$uac1(0x0, 0x0, 0x0, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_connect$uac1(0x0, 0x0, 0x0, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$uac1(0x6, 0x71, &(0x7f0000000180)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x40045506, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x20, 0x41, 0x13, 0x20, 0x257a, 0x360f, 0x31, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x32, 0x8a, 0xff}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x4004550e, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)=ANY=[@ANYBLOB="12010000000000408c0d220000000000000109022400010000000009040000010300000009210000000122050009058103"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f0000000740)={0x2c, &(0x7f0000000980)=ANY=[@ANYBLOB="00000001000000090090"], 0x0, 0x0, 0x0, 0x0}, 0x0)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x4, 0x0)
ioctl$EVIOCSMASK(0xffffffffffffffff, 0x40104593, 0x0)
ioctl$EVIOCSKEYCODE(r1, 0x40084504, &(0x7f0000000240)=[0x0, 0x5])


syz_usb_connect(0x0, 0x2d, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0xbd, 0x4d, 0x96, 0x40, 0xfff0, 0xfff0, 0x3978, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xab, 0x93, 0x3b, 0x0, [], [{{0x9, 0x5, 0xf, 0x1}}]}}]}}]}}, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x8, 0x5ac, 0x259, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f0000000000)={0x0, 0x0, 0x5, &(0x7f0000000240)=ANY=[@ANYBLOB="050f050020"]})


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="120100006cb87f10cd06080121bb0102030109021b000100000000090400f16783970000090587", @ANYRES64], 0x0)


syz_usb_connect$uac1(0x0, 0x89, &(0x7f0000003680)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x77, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@mixer_unit={0x5}, @output_terminal={0x9}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0xa, 0x24, 0x2, 0x1, 0x0, 0x4, 0x0, 0x0, "", "9943"}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x541b, 0x0)


syz_usb_connect(0x2, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xb, 0x2e, 0x35, 0x10, 0x413, 0x6026, 0x18aa, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xa6, 0x9b, 0xce}}]}}]}}, 0x0)
ioctl$EVIOCGKEYCODE(0xffffffffffffffff, 0x80084504, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x46, 0x2}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_open_dev$evdev(&(0x7f00000014c0), 0x0, 0x0)
ioctl$EVIOCSKEYCODE(r0, 0x40084504, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000140), 0x0, 0x0)
ioctl$EVIOCGREP(r0, 0x400445a0, 0x0)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000f80)={0x2c, &(0x7f0000000040)=ANY=[], 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_control_io$printer(r0, 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b03, 0x0)


r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x401c5820, &(0x7f00000001c0))


r0 = syz_usb_connect(0x0, 0x5a, &(0x7f0000000080)=ANY=[@ANYBLOB="1201000060f94d100d05020027230102030109024840020000000009047d04031d5abf0009050400005539000009050b00000000000009050200000005000009047d01013481af0009a00e00230000690009047dbe"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f00000002c0)={0x84, &(0x7f0000000100), 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x40305829, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x46d, 0x1017, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x41015500, &(0x7f0000000400))


ioctl$EVIOCSMASK(0xffffffffffffffff, 0x40104593, 0x0)
r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f00000001c0)=ANY=[@ANYBLOB="0d01000009000008250592d20700006a3b010902241700fa0074980904e4ff11070103000905010200ffe00000090582021a"], 0x0)
syz_usb_connect(0x0, 0x0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000000)={0x84, 0x0, 0x0, 0x0, 0x0, &(0x7f00000005c0)=ANY=[@ANYBLOB=' '], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r1, &(0x7f0000000100)="cd", 0x1)


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000080)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io$cdc_ecm(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000f80)={0x2c, &(0x7f0000000040)=ANY=[], 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b03, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)=ANY=[@ANYBLOB="1201000000000040ac054382408b0b00000109022400010000002009040000fd0301000009"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f00000003c0)={0x24, 0x0, 0x0, &(0x7f0000000080)=ANY=[@ANYBLOB="00220102000000"], 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCSREPORT(r1, 0x81044804, &(0x7f0000000040)={0x20000001})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x61, 0x2}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201000009980708b5192100c7980000000109021b00012000ac00090400000107000009090585cf", @ANYRESOCT], 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0xc5, 0x2d, 0x22, 0x40, 0x4cb, 0x10b, 0x3de0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x6f, 0x3d, 0x4d}}]}}]}}, 0x0)
ioctl$EVIOCRMFF(r0, 0x83c0550b, &(0x7f0000000400))


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1e7d, 0x2db4, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000b80)={0x24, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x22, 0x5, {[@main=@item_4]}}, 0x0}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io(r0, &(0x7f00000011c0)={0x2c, &(0x7f0000000100)=ANY=[@ANYBLOB="000002"], 0x0, 0x0, 0x0, 0x0}, 0x0)
syz_usb_ep_write(r0, 0x81, 0x81, &(0x7f00000002c0)="b9425b446512d23236973599b76c4705397f00466eb0ef01e29655f663ee844da60be22bf21472b1e7f49ad068c4e1c0a9573325f36784ffffce4c6b81fdb183acf730ddbf395346f7fd23f2e176b224e7ea1deb33c697884689393c15d155a710eb972acd778cd33d4d8a9cf9d6707a573da8dd49c0c6d33f0a3898c315943f48")
ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x49, 0x2}})
r1 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r1, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_open_dev$evdev(&(0x7f0000000800), 0x0, 0x0)
ioctl$EVIOCGMASK(r0, 0x5452, &(0x7f0000000100)={0x3, 0x0, 0x0})
ioctl$EVIOCGMASK(r0, 0x5452, &(0x7f00000033c0)={0x0, 0x0, 0x0})


ioctl$EVIOCSKEYCODE_V2(0xffffffffffffffff, 0xc0189436, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, "5f4908c3efde460b010001002f4d6d80617900"})
r0 = syz_open_dev$hidraw(&(0x7f0000000000), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r0, 0x401c5820, &(0x7f00000001c0))


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000100)={0x0, 0xfd, 0x0, 0x0, "d76c4035548c60c4147f7561abfc25ebaceae31ed887817b48477518ae653d09"})


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000680)={{0x12, 0x1, 0x0, 0x10, 0xa0, 0xfd, 0x20, 0x2c42, 0x16f8, 0xbae3, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xb5, 0x51, 0xd2}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x550c, 0x0)


syz_open_dev$evdev(0x0, 0x0, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000001040)={{0x12, 0x1, 0x0, 0x5e, 0xa5, 0xd1, 0x20, 0x545, 0x8002, 0x30a, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xd0, 0xdb, 0x94}}]}}]}}, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2a, 0xb3, 0xdf, 0x8, 0x4bb, 0x901, 0x56a0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc7, 0xad, 0xd8, 0x9e}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000800)={0x24, 0x0, &(0x7f0000000740)={0x0, 0x3, 0x4, @lang_id={0x4}}, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000001200)={0x2c, &(0x7f0000001040), 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$printer(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)


syz_usb_connect(0x0, 0x3f, &(0x7f0000000040)=ANY=[@ANYBLOB="11010000733336088dee1adb23610000000109022d0001100000000904"], 0x0)
r0 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
write$char_usb(r0, 0x0, 0x58)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x40284504, &(0x7f0000000a40)={0x1, 0x0, 0x0, 0x0, "c2db8f6119935381739024adac2739f5f4ab6ea78c22e18d00baad652b9c0336"})


syz_usb_connect(0x0, 0x54, &(0x7f0000002840)={{0x12, 0x1, 0x0, 0xbb, 0xaa, 0x2b, 0x10, 0x1410, 0x7041, 0xf85f, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x42, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x4f, 0x27, 0xa8, 0x0, [@uac_control={{}, [@mixer_unit={0x7, 0x24, 0x4, 0x4, 0x5e, "0d89"}]}], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@uac_iso={0x7}]}}, {{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@uac_iso={0x7}]}}]}}]}}]}}, 0x0)


syz_usb_connect(0x0, 0x3f, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000d0918108ac051582588f0000000109022d00010000000009040000030b08000009058d67c8002a000009050502000000000009058b6e"], 0x0)
ioctl$EVIOCGKEYCODE_V2(0xffffffffffffffff, 0x801c581f, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x0)
ioctl$EVIOCGSND(r0, 0x8040451a, 0xffffffffffffffff)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0xff, 0x1e, 0xc6, 0x8, 0x2040, 0xd300, 0x32a3, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xc5, 0xab, 0x39}}]}}]}}, 0x0)
write$char_usb(0xffffffffffffffff, 0x0, 0x0)


syz_usb_connect(0x0, 0x36, &(0x7f0000000080)=ANY=[@ANYBLOB="120100009c147010861246205bb4018203010902240001000000000904000002ff04010009050a3a000000000009058702"], 0x0)


syz_usb_connect(0x1, 0x36, &(0x7f0000000680)=ANY=[@ANYBLOB="1a0100005c6b4408070a64006e40010203030902240001a86200000904000002ca744d07090503020000ff99090805848f"], &(0x7f00000007c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x46d, 0xc534, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000280)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x5, {[@main=@item_4={0x3, 0x0, 0x8, "a8f46877"}]}}, 0x0}, 0x0)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x4c, 0x2}, @period={0x0, 0x0, 0x0, 0x0, 0x0, {}, 0x0, 0x0}})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000000000)={0x0, 0x8, &(0x7f00000010c0)="f39561fc17b7f981"})
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000580)=ANY=[@ANYBLOB="12013f00000000407f04ffff000000000001090224000100000000090400001503000000092140000001220f00090581", @ANYRES16], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0xf, {[@local=@item_4={0x3, 0x2, 0x0, "f896e404"}, @local=@item_012={0x1, 0x2, 0x0, 'e'}, @main=@item_012={0x2, 0x0, 0x0, "f792"}, @main=@item_4={0x3, 0x0, 0x0, "9ef12d19"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hiddev(&(0x7f0000000080), 0x0, 0x0)
ioctl$HIDIOCGUSAGE(r1, 0x4004480f, 0x0)


syz_usb_connect(0x0, 0x34, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x71, 0x5d, 0x76, 0x10, 0x19d2, 0x1121, 0xcf68, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x22, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xff, 0xff, 0xff, 0x0, [], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@generic={0x7, 0x5, "879f76be2e"}]}}]}}]}}]}}, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x0, 0x0, 0xffffffffffff8001, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x2, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x40095505, &(0x7f0000000400))


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x7, 0x4042)
ioctl$EVIOCGMASK(r0, 0x5460, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGKEYCODE_V2(r0, 0x80284504, 0xffffffffffffffff)


syz_usb_connect$hid(0x0, 0x0, 0x0, 0x0)
r0 = syz_open_dev$evdev(&(0x7f0000000100), 0x0, 0x822b01)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffffcf, &(0x7f00000000c0)="952bb3e006ae9a4c3a"})
write$char_usb(r0, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000b80), 0x0, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x80801)
ioctl$EVIOCSKEYCODE_V2(r0, 0xc0189436, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, "c889190700000000000000f36926832bc7464cde460ba989075e2f4d69806179"})


syz_usb_connect$uac1(0x0, 0x71, &(0x7f00000012c0)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000001840)={0x0, 0x0, 0x59, &(0x7f0000001400)={0x5, 0xf, 0x59, 0x1, [@generic={0x54, 0x10, 0xa, "e111c363e519cad11b179ec8ab2bcd2681e79c7e5791c6492ded87c91f9efcd266960fc1bf0ce7b014669f75f3132ed11872e82da40ed66acccf010379b3ffcec65c56cf579ea2ac38f08d1d1175622964"}]}})


syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0xeef, 0x72aa, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, {0x9}, {{}, [{{0x9, 0x5, 0x2, 0x3, 0x10}}]}}}]}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x17ef, 0x6009, 0x0, 0x0, 0x0, 0x0, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x0, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x5}}}}]}}]}}, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000001440)={0x24, 0x0, 0x0, &(0x7f00000013c0)={0x0, 0x22, 0x5, {[@local=@item_4={0x3, 0x2, 0x0, "741cb976"}]}}, 0x0}, 0x0)
r1 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x0)
ioctl$HIDIOCGRDESC(r1, 0x541b, 0x0)
write$hidraw(0xffffffffffffffff, &(0x7f0000003000)=')', 0x1)
syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
r2 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r2, 0x0, 0x39)


r0 = syz_open_dev$evdev(&(0x7f0000002c00), 0x2, 0x0)
ioctl$EVIOCGREP(r0, 0x80084503, &(0x7f0000003000)=""/192)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
ioctl$EVIOCSKEYCODE_V2(r0, 0x80104592, &(0x7f0000000000)={0x0, 0x0, 0x1, 0x18000, "99f9ffffff7f00000020000fb700000000000000000000000900000000001000"})


syz_usb_control_io(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$uac1(0x0, 0x96, &(0x7f0000002b00)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x84, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x7, 0x24, 0x5, 0x0, 0x0, "4080"}, @input_terminal={0xc, 0x24, 0x2, 0x0, 0x0, 0x1}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7, 0x24, 0x1, 0x0, 0x0, 0x5}, @format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x6, 0x0, 0x0, 0x1, "", "11f7b7"}]}, {{0x9, 0x5, 0x82, 0x9, 0x200, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000280)={0x24, 0x0, 0x0, &(0x7f0000000140)=ANY=[@ANYBLOB="002205"], 0x0}, 0x0)
syz_usb_ep_write(r0, 0x81, 0x37, &(0x7f0000000400)="97ae72ff54b05f523dfbd89511878f1175598c42563d5f9e690600c3e6cd9ec9d9b999d3e0d569a86266ea2b3ff3a128bec3d8ac4d4175")


syz_usb_connect(0x0, 0xad, &(0x7f0000000680)={{0x12, 0x1, 0x0, 0x49, 0x45, 0xa2, 0x20, 0xbf8, 0x1009, 0x7225, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x9b, 0x2, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xfe, 0x83, 0xab, 0x0, [@cdc_ncm={{0x6, 0x24, 0x6, 0x0, 0x1, "ea"}, {0x5}, {0xd}, {0x6}, [@mdlm_detail={0x4}, @acm={0x4}, @acm={0x4}]}], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@uac_iso={0x7}, @uac_iso={0x7}]}}]}}, {{0x9, 0x4, 0x0, 0x0, 0x0, 0xff, 0x3, 0x3f, 0x0, [@cdc_ecm={{0x7, 0x24, 0x6, 0x0, 0x0, "d837"}, {0x5, 0x24, 0x0, 0x1}, {0xd}}, @uac_as={[@format_type_i_discrete={0xf, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, "55ea5f5050e08b"}, @format_type_i_continuous={0x9, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, "", "11"}, @format_type_ii_discrete={0xe, 0x24, 0x2, 0x2, 0x0, 0x0, 0x0, "28e7b57add"}]}]}}]}}]}}, 0x0)
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x822b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x1068)


syz_usb_connect(0x2, 0x2d, &(0x7f0000000600)=ANY=[@ANYBLOB="12010000ec31f8104c1302007eec0102030109021b0001000000000904002c11018b75000905836dc6"], 0x0)
syz_usb_control_io$hid(0xffffffffffffffff, 0x0, 0x0)
syz_open_dev$evdev(&(0x7f0000000100), 0x4000000, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000009c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, &(0x7f0000000d40)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x2, &(0x7f0000000ac0)=@string={0x2}}, {0x2, &(0x7f0000000b40)=@string={0x2}}]})


r0 = syz_open_dev$evdev(&(0x7f0000001240), 0x0, 0x0)
ioctl$EVIOCSMASK(r0, 0x40104593, &(0x7f0000000040)={0x12, 0x0, 0x0})
ioctl$EVIOCGMASK(r0, 0x80104592, &(0x7f0000000300)={0x12, 0x0, 0x0})


r0 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000300)=ANY=[@ANYBLOB="12010000000000107d1e502d00000000000109022400010000000009040000010300000009210000000122070009058103"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, &(0x7f0000000000)={0x24, 0x0, 0x0, &(0x7f0000000040)={0x0, 0x22, 0x7, {[@local=@item_4={0x3, 0x2, 0x0, "292793c0"}, @local=@item_012={0x1, 0x2, 0x0, "01"}]}}, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000001640), 0x0, 0x80801)
ioctl$EVIOCSKEYCODE_V2(r0, 0x5450, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect(0x4, 0x24, &(0x7f0000000100)=ANY=[], 0x0)
ioctl$EVIOCRMFF(r0, 0x40085507, &(0x7f00000000c0)=0xc53)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGKEY(r0, 0x8040452f, 0x0)


r0 = syz_usb_connect(0x0, 0x2d, &(0x7f0000000000)=ANY=[@ANYBLOB="12010000bd4d9640f0fff0ff78390102030109021b0001ab93"], 0x0)
syz_usb_control_io(r0, &(0x7f0000000300)={0x2c, 0x0, &(0x7f0000000180)={0x0, 0x3, 0x4, @string={0x4, 0x3, "d3e3"}}, 0x0, 0x0, 0x0}, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x83c0550b, 0x0)


r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x0, 0x0)
ioctl$EVIOCGABS20(r0, 0x10, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f00000007c0)=ANY=[@ANYBLOB="1201000064172f2057155081ed29010203010902120001000000000904"], 0x0)
r1 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000180)=ANY=[@ANYBLOB="12010000090003206d0414c340000000000109022400010000a000090400000103010100092100080001220100090581", @ANYRES64], 0x0)
r2 = syz_open_dev$evdev(&(0x7f0000000080), 0x0, 0x0)
syz_usb_disconnect(r2)
r3 = syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)=ANY=[@ANYBLOB="1201000000000040260933334000000000010902240001000000000904000001030100000921000000012201000905810308"], 0x0)
syz_usb_ep_write(r3, 0x0, 0x41, &(0x7f0000000040)="97c016d72e1a70e9666fa444580d3ce4c7c2d0da738f9e44054442775d8e584a316bacef3b495ee537b3f82587dff4c71703510704e3f4190bc90686ead9fe4000")
ioctl$EVIOCRMFF(r2, 0x4004550f, 0x0)
syz_usb_control_io$hid(r1, &(0x7f00000001c0)={0x24, &(0x7f0000000ac0)=ANY=[@ANYBLOB="00020c000000000002"], 0x0, 0x0, 0x0}, 0x0)
syz_usb_control_io$hid(r1, 0x0, &(0x7f0000000080)={0x7b, &(0x7f00000000c0)=ANY=[], 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r1, 0x0, 0x0)
r4 = syz_open_dev$evdev(&(0x7f0000003540), 0x0, 0x0)
ioctl$EVIOCGUNIQ(r4, 0x80404508, 0x0)
syz_usb_control_io(r1, 0x0, 0x0)
syz_usb_control_io$hid(r1, 0x0, 0x0)
syz_usb_control_io(r1, 0x0, &(0x7f00000000c0)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x20, 0x0, 0x4}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r1, 0x0, &(0x7f0000000b80)={0x84, 0x0, 0x0, 0x0, &(0x7f0000000680)={0x20, 0x0, 0x4, {0x0, 0x3}}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
syz_usb_control_io(r1, 0x0, 0x0)
r5 = syz_usb_connect(0x0, 0x10b, &(0x7f0000000480)=ANY=[@ANYBLOB="05010900b24b6a10e6040300770100000001090224000b0100"], 0x0)
syz_usb_control_io(r5, 0x0, 0x0)
syz_usb_control_io$hid(r5, 0x0, 0x0)
syz_usb_control_io$hid(r1, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, 0x0)
syz_usb_control_io$hid(r0, 0x0, &(0x7f0000000400)={0x2c, &(0x7f0000000040)={0x0, 0x0, 0x1, '$'}, 0x0, 0x0, 0x0, 0x0})


r0 = syz_usb_connect(0x0, 0x3f, &(0x7f0000000540)=ANY=[@ANYBLOB="11010000733336088dee1edb23610000000109022d0101100000000904000003fe03010009cd8d1f0002000000090505020000fcffff09058b1e20"], 0x0)
syz_usb_control_io(r0, 0x0, 0x0)
syz_usb_control_io(r0, 0x0, &(0x7f0000000780)={0x84, &(0x7f00000004c0)=ANY=[], 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
ioctl$EVIOCGMASK(r1, 0x5b02, 0x0)


r0 = syz_open_dev$evdev(&(0x7f0000002ac0), 0x0, 0x0)
read$hidraw(r0, &(0x7f0000000000)=""/4, 0x4)


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000001080)={0x0, 0x0, 0x0, {0x0, 0x1}, {0x51, 0x2}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x903d01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2778)


r0 = syz_open_dev$evdev(&(0x7f0000000040), 0x0, 0x0)
syz_usb_disconnect(r0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001180)=ANY=[@ANYBLOB="12010000090003206d0414c34000ffff00010902"], 0x0)
ioctl$EVIOCRMFF(r0, 0x40085503, &(0x7f0000000400))


r0 = syz_usb_connect(0x0, 0x2d, &(0x7f0000000040)=ANY=[@ANYBLOB="1201fb0019030320d812010079de01ec020109021b0001000003000904000001785ecc00090585020004"], 0x0)
r1 = syz_open_dev$char_usb(0xc, 0xb4, 0x0)
syz_usb_disconnect(r0)
read$char_usb(r1, 0x0, 0x0)


r0 = syz_usb_connect(0x0, 0x24, &(0x7f00000000c0)=ANY=[@ANYBLOB="120100008d4a3020cd0610017171010203010902"], 0x0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x100842)
syz_usb_disconnect(r0)
syz_open_dev$hidraw(&(0x7f0000002300), 0x243c2917, 0x100842)
syz_usb_disconnect(r0)


r0 = syz_open_dev$hidraw(&(0x7f0000002300), 0x0, 0x14a042)
write$hidraw(r0, &(0x7f0000003000)="297860dfde1863c08be8a1b7ad03094335f03542a50ea1e811193af3fb75f07ebb230f0791102fcd36124dd6fe8af416d0cb4bbbebbd8f1396a4772e1e9bc2c9e18ec784a3a99bc0f612d783b6822e8c7d2fd6f3e32710a16907ba29fc540e5f7a84138d2b91d7f7c68a43c4aa95ddd5adda44c7d132c5a51435d891e0804aa2f10e6ac281ec9f9e4538066bb7b9d692f30357780059f3bb86876d2ed600588fbd47a73825e61607fc361a459ee650333870af5d4c828d7f7799f662e7f043292136de6a9c3d928d95ed36f316329fe4a37af00bc0f200045538ba0961168a72ce5a358100d07b9c92c687926dd42b4a85ba8b7b150a99a000f4b3985c277ee70ff96c12c2c5625677f29cbcbf1cf9228c35f3a1bcf71e3a69b959413ce88746fbd18ac526335460496df9afb86c9011482dd00e57cf2898ad963f993d5fd94927aab1321cedc606d52d24c97997a04b5bebcbf2b55be90e35055b4a5d1b6eb70f7a7e5544c44d8a9f0f35755da03f642d385641b7d814d6c4aff7bfa4cf9b20c67633d517cb4fc431a5fc34d758e0bbb4860a05b5044aea1bcc14ff9ecb81e98af787abddff559ad8ea005e8efc5a9588b2dee9028e633e66c6c425f72c7c655b64b1284d21892e332dc53095268ad9a74b033f1cffc462177fae09fa528769fa5c4ed497cfd3119afae90c8a5fe2114a21595275ae10bbbe2aaee8a366a185235670049c5503e570f52087c53ab0c38db6b0b028e635fcb8882d0bb1be1872bd75f937aff57c7026525bfc2414250f8978e156de8bd6f2e15f026ee7333c9cf8892f81e868f9bccf942c77259808a14127df1b7ebee12b25041ba61afb81c9c4bccaaa0b181c74f6d6bc084d665ea4a744bc2364e6ad5fa3da0230ea4d1ffa883c3fe852e7a64ae91185f64a99e56beb11321f92a6dfe668a9d41acf9921c2497c0aaeae50c441fdf34fca82cc58ff1961eb6bd2eea0bea29213fcb39c5bd5199b3f0a7bda8629c9a98b6ebad502293f9f2c835b12c69d630cbea6b0e0c97ce8110d852bbc9e0a9a1eea66c2d0e47a6d51d44e2d8a8a5a980828bf0de3eea059f2597f398cb50cf2d9704dcef49ad9f5d3b4bd8729d57a7e665708780863d1f96c204cedba74cae9ee7b433d50a2bbedad81ba2aaff9d1a8e15bbc13ae86f469bffae227592768b968f27042137ec2568304448b8209f9b1822c899fa47f189aae3aa1e0e115581dc1c1dac3141e0dfeb85e87a12952852d5018f8f0d5ad5975320ae356b847f6cc9c89bb9d7a5e6728584a97b77a8054e129aac35623e30c169d444f0c58adb4148d734eb26fe57dad156ebf0a985ff946583dade5f183687997b7302f207840ced708719df301880d6fddf6e38fb04bd8c069303d55f6c8a07275434eaad6f8435f4096fe909c7e02f317c28975a7bad6ae1d883244ba6e72f5fcb24d832562449866b7bc6878775237dc5c0d9ccb1cd3b6fe67794a5af53ea7fb01de105809a84496ed2f03e20b1dfdab3d4cb21d8b4998f77595c4b99c183c821989fec094cb91ede61bf5c0d116c3116f903715e692675196d7e499a118c788a09a8f687aefd10af1c34f670dce4b5e628715ddcde04acc7299db87db83f8ef0d7c1d9979d7894d3d0d16e536d25789e86f2cab2856cf0b71efd91757f157da3300b1374fe80ea0d05b5ec52434f5b8c20dee83d2b92f338d4a3339aa3a78bf79d8260c55d002618b1444952bad36ba1c17dc801c1ad008f2bebdee89c54d582fdd89da995c5eb7b4c923c1e1d9d29207e92e913fa2187a10302052e6fa0da1c6134db7a4ad2679a0558a43d4d05d60cfa0ee41c7f458951a54173d91139142573794c594c65f25b93d7b7499956a2a5614a5aebc1567ea47cef87fca1d2a82d7f9eabdb7a933ffba467a3e26eeec8ab0a31358589cf451fbc90a9619c470424afc464308bf02b4208a72be8f74cc840fca700ae6e147da83fa27216179ef984c1dfce0c7bb5c8e0ac23b97748905b219bd75ef425a334660ec930b4ef6cc3ab0429794649149404d856d9c6ddaf69c66b60aabe8d214973f74db7760d26f9699a32246a0947a0af6a26841386298ff61a32bac3aa6e8fa5260c19e0b5db71a11d45ed34cb9a48ca56d1d2516e110e510aa5eddbbc207220dd7ac2fd77f1e219e6097e3a5cd15662c80169d00fb69cdac5e7246fdfdead45f91d8f3bc6c878d8ff0182e0f9efdc9330d55a04a01f08825e6e7df9e552f8f630f3344150c0bad7901eb9ef11c56fee9d86a7781bd34ed0c4da0a59366c7bc31fe01e8b01ceb9ed833c9759df103ab0e644282e951a0cc50e6c76217bebe6a2e6be568fc3670ce621f2379b22b77340986dc323e5f252ce4ff3a87295fb2d124ce7cd8afb00c1e3c187d0abbd448196256ac652112073ccae7577d27cbe3df18321e0fb435629737f51e849abadc065b8fdbe846ab31d11eaf80ef27eb60eca3d12944c43152a67c7e3ca484c4f325076846f87063e79f893d7f872a1cc4167b7f8474166ebc43e5c1e1adc6c963d77d6c9dd164cd37e12e8ce4699cd500a3b060bac1d778bbe2a5d36bc816ff5e75e8a32704a0f326a3a46c703549e0a7ef88f0fbde6598591f66aec1813cfc0de35986d0151ee79e1297a8932b400996672f092b50718f5ffb0159044552bae06811bc42d5b90e7cba3fc775916af5861d0025a1d3957a93f704e5ab5c2876e477a8a9df7efe6661c7e208cddf8f1a15bf030206a64f69e2fc26cfbcf1315d5f6d189e0da3c69c3bbcc14f70d6a6b02644e36decc52992545238112b5f3c2a80036a8af0ad8381b97162a7ff0565e2945f0182b13d6235437dee1a8f48aa847a6aafdf699dcec9e46c63f26e7fffb70a54129b7494ee3d9d97e220d2e418735d129be91bf24f0d0b22f9ef6dca7fdb60a34b59e3fda4d364d18451d385fd17dff74519a1f22470781c86d07fa66ffffab1d6aa413362570d38780340bdeeee6d4c5da51512c0757d69cac48c7c44441e1abb06b6a8943c093d732e8b39f85570c3e846a62b4cda5c0c5974a001433c8e3ffae9afff41419c0caa58f6853067bfb92b55c6ecd58503b8deebc44ab258064787dbeef58b0914dbc8cea6bffbe619ff976d7987648457ee49c7744f58f919906c1987bebfb13b14ef8bc40dc7a95c34a825efd517c95fa5380073421efb95ae6e4ff3ece3f4501c918521b230dd169ee6ddb5f09c8762287c28a2b9a75d569f392bb7184edae72084e1fc295347d16caabe0196a7f679687f399dfaaffc537544b9f8cf27b2a44217187308845ce2bb3227412aa9d6a582800540fb715e0483c96f5e66247616f59bc275036b622502191ca3b29749f79675f88a6ad9b304e9cbe21a2c817db9622bac725246a71b9c8659d81b75cc1d37ca5797270ef9f82124a2077ea408bd01dfc53cb701a2b276e322ee4ff268e540781b8d4066d75747aa27342c842ea569679f6d0146bf3c5fcd57f10cdc2a1cab835cf9e6c8924d4808c3beeb16c47f58484d0dc60a38612a4caeef41cc3b6c7adb651c2e024ec75c5a4af98450aac1638ae45c9cb902723d584ecef25345bb33255f65b0cc469c19a1ec84045a7e8388f7391ad31606cb6e3e46cf7eb48996a96ff4b7a2d97a8cb5da97842b28fc48b279ab99da252fffbd0034667c51e12406ea161dd9ea19bbe33ee464a45fed53939d104deb7febc9b362320d2c3008e27ae7094274bb75b7379e8db191ef34e1d9b648dda3de37c59fd2e6d569398c9fb3529976f12ea64a67f66a577ceed99da2899edc953feef13e3ac6952bef1c2443d25e6cabb7fb538920255650a7ffa05d43b9d87c2ed323b4edbf39f5a101986f2ef41cb14565d9f86584008548e6688fda0e95f13c3856368fafee0bc673d0d9cde94e500398785c14fcad3082684817d2d1ea4c96c207f2b12036ce0954639f4e4f2e0ff5bf4af919464359fb67208401cb1e6196e64534871a74e888b16dcc4f6c8872cd2f973788c5048e30ef9d863534017f4c35ae35dbc96d9bf487a181b2edeeb8d5bade6d0df4214a70a1feb58b0bc94eb601b8a2c0f3d4012bc215d0bc3fc9f548d58a02df1a4a5b071829665be43396bc8de262f69f245888699c611354428c9f31ac0a59cbb6fd1cddcfb7cbc5a1fb4d10627d8e37940052945c1da8c9994ef34372baad992cbaa20511c0064762d70edcfd6da5c4d7ed9a9e951656ce24b22f32bdef9707ecbb725b64c31ba0466259600260513d9f7652661d32e77236a4eb35597ad9962d93138374af9e09a14ff3b60f5b7e024b9f9867a9c13260115023ed09b11ae64cbafdafbdca3fbc5075ce311e18510a5220841813a3e0fab2e6f3e42e4ef745058bad080ab61a5c3393ab2e59e3e57707d0ca9c6978eb516204cd1492097fd44dbe46b6ea2c650f8aa7bb48f836bcdbe6748f2cd52314f1bc9594531b598e44f82e80ae4a0d09d713dffc1444c948ae98ffe49ec1985b12870d104942bcfa7aca4c55d01b160cac2f67bc9eeeac3fc04708de40e4b9cee99e8c1409f3a161f3dbf4eda628f92df6b51939bcf6d5edbac55cc28480f9ca49bc1d4dc5139bfb5755c6217b784c5e337102e9f5d290d66c712548110eb65c92927409a4c8f8caa8ba8d50ef09a7d0c4077593dc7394416ec21daf38c7460e0bb5584b463f3080d453f39f39c5ed7a0bdc402447a2f77154dbeebc951fecbe5c6d95daa82a849c52909b06eca7edb175ecef1f4a1bbdfde650ccfe7ee1d79f68ad372b238a6edf93a7893912b33c4ad59ef9c7d4074329b5a35712a956397b941f7f660542127cff35e1dcb0b0d6ef3352537cda87a274022d81e0ed7b9a7a9249e00f934bcc8f4c3dc8ee8225f6cc5d91d343bf7e1d866716bfd3f651ce04fa6504229a0cfba1600ab8a9ddf670c59737558dccabb39436f507b60377e438f7ebfdefc4bed6ef3716cd75ebebdbddfddf79ab2f28093b1fb0de2fbedd8ca1a1c0103396ff2b5b8d698e0317c9da7cfe8de0856c2156305fa90d22ac0cb1706a12fbb80596202d6d286422c1f8ce8e70e2c43449b4e5f457c1ed92354f1bbb9ebd0c72e13639ff5e0f4e4a21593b8c05bec7f6dd097bc23bc8e202241f8f5fe81ad72479c5f4542b79644f722d80e140b8a0ed3a7f09702b3340f0e2d3e69c5c26c1e6d08621297670a006d07496f5cc09edb1841d941ccf1fb9f9ef5391d1a11a4638aa287fe87a8dda89eba495f6ad25f50f69773143961d076558fa10482f615fe8d5c8fdf2d3265c76bd23b7de8ea1204d1ad0c8d787d59ae47e000b61812dcba8a8253af88d6575b6001180bb4bd6e811338d07848737fa6b368447ed640af0c2edec2284056b05ae4bd5e939d0097daf22433eeca39548cee7d197c31b8aa1e9780d834a0c5396b34aa53aa725844cb7afda83dbf3caebe9a2bbc2174a8e2f8a48e4af3d4a1ee3690a81ae4f2ca0a70145720da67083576058d7b6cd1e7ac86bbed5d3247cc918d20bcbf053675dadb3970e06621748299c17b93c8b969904e72d6bffddc31ed279b5cf4df1799c4b0988c8f87505652c3626bc33f26361e496db09bae6ad6b26a6a1dd0506be9139905368256638468b596ac343fea3e5511899d757c4ea1f571a189545e6e6282833ef18f2b2962b3905ccf69f4bfd0e07268f2c4dc80c68b24b9555ef3eb3d4a9a52f4a993c260e4563319a8b13052be67c5dff1d54c250267c912d50a47f384daf400b728a9ac53ffc0019d9e39ed42c17bb1ee49ff65b3f", 0x6bc47488d130961f)
r1 = syz_open_dev$evdev(&(0x7f0000000000), 0x0, 0x0)
ioctl$EVIOCGMASK(r1, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})
r2 = syz_open_dev$evdev(&(0x7f00000002c0), 0x1, 0x0)
ioctl$EVIOCGMASK(r2, 0x80104592, &(0x7f0000000300)={0x0, 0xffffffffffffff36, &(0x7f0000000200)="952bb3e006ae9a4c3a"})


ioctl$EVIOCSFF(0xffffffffffffffff, 0x40304580, &(0x7f0000000180)={0x0, 0x0, 0x0, {0x0, 0x11}, {0x9b}, @cond})
r0 = syz_open_dev$evdev(&(0x7f00000000c0), 0x2, 0x862b01)
write$char_usb(r0, &(0x7f0000000040)="e2", 0x2250)

local_cleanup100%of 1
nfc_llcp_build_gb.isra.0---of 12
nfc_llcp_connecting_sock_get---of 13
nfc_llcp_data_received---of 4
nfc_llcp_exit---of 1
nfc_llcp_find_local---of 10
nfc_llcp_general_bytes---of 5
nfc_llcp_get_local_ssap---of 6
nfc_llcp_get_sdp_ssap---of 20
nfc_llcp_local_put---of 3
nfc_llcp_local_put.part.050%of 6
nfc_llcp_mac_is_down---of 2
nfc_llcp_mac_is_up---of 9
nfc_llcp_put_ssap---of 19
nfc_llcp_queue_i_frames---of 7
nfc_llcp_recv---of 3
nfc_llcp_register_device---of 7
nfc_llcp_reserve_sdp_ssap---of 8
nfc_llcp_rx_skb---of 257
nfc_llcp_rx_work---of 4
nfc_llcp_sdreq_timeout_work---of 18
nfc_llcp_sdreq_timer---of 1
nfc_llcp_send_to_raw_sock---of 14
nfc_llcp_set_remote_gb---of 8
nfc_llcp_sock_from_sn---of 27
nfc_llcp_sock_get---of 18
nfc_llcp_sock_link---of 8
nfc_llcp_sock_unlink---of 8
nfc_llcp_socket_purge---of 6
nfc_llcp_socket_release12%of 35
nfc_llcp_socket_remote_param_init---of 1
nfc_llcp_symm_timer---of 1
nfc_llcp_timeout_work---of 1
nfc_llcp_tx_work---of 19
nfc_llcp_unregister_device60%of 10
-----------
SUMMARY27%of 52

cypress_disconnect100%of 1
cypress_probe---of 3
port0_show---of 1
port0_store---of 1
port1_show---of 1
port1_store---of 1
read_port.constprop.0.isra.0---of 5
vendor_command---of 13
write_port.constprop.0.isra.0---of 10
-----------
SUMMARY100%of 1

msi001_enum_freq_bands---of 5
msi001_g_frequency---of 3
msi001_g_tuner---of 3
msi001_probe---of 9
msi001_remove67%of 3
msi001_s_ctrl---of 10
msi001_s_frequency---of 5
msi001_s_tuner---of 4
msi001_set_gain---of 6
msi001_set_tuner---of 23
msi001_standby---of 1
spi_sync_transfer.constprop.0---of 6
spi_write.constprop.0---of 1
-----------
SUMMARY67%of 3

__dquot_alloc_space9%of 61
__dquot_drop---of 14
__dquot_free_space10%of 50
__dquot_initialize7%of 46
__dquot_transfer---of 86
__quota_error---of 3
do_get_dqblk---of 1
do_proc_dqstats---of 4
dqcache_shrink_count---of 1
dqcache_shrink_scan---of 17
dqget---of 44
dqget.cold---of 6
dqput---of 2
dqput.part.0---of 12
dqput.part.0.cold---of 2
dquot_acquire---of 10
dquot_add_inodes---of 25
dquot_add_inodes.cold---of 1
dquot_add_space---of 32
dquot_add_space.cold---of 1
dquot_alloc---of 1
dquot_alloc_inode---of 42
dquot_claim_space_nodirty---of 33
dquot_commit---of 10
dquot_commit_info---of 1
dquot_decr_inodes---of 6
dquot_decr_space---of 6
dquot_destroy---of 1
dquot_disable---of 80
dquot_drop---of 5
dquot_file_open---of 5
dquot_free_inode---of 32
dquot_get_dqblk---of 6
dquot_get_next_dqblk---of 8
dquot_get_next_id---of 6
dquot_get_next_id.cold---of 2
dquot_get_state---of 14
dquot_initialize100%of 1
dquot_initialize_needed---of 8
dquot_load_quota_inode---of 12
dquot_load_quota_inode.cold---of 1
dquot_load_quota_sb---of 63
dquot_load_quota_sb.cold---of 2
dquot_mark_dquot_dirty---of 9
dquot_quota_disable---of 15
dquot_quota_enable---of 16
dquot_quota_off---of 1
dquot_quota_on---of 4
dquot_quota_on_mount---of 6
dquot_quota_sync---of 12
dquot_reclaim_space_nodirty---of 33
dquot_release---of 8
dquot_resume---of 9
dquot_scan_active---of 17
dquot_set_dqblk---of 54
dquot_set_dqinfo---of 17
dquot_set_dqinfo.cold---of 2
dquot_transfer---of 21
dquot_writeback_dquots---of 39
info_bdq_free---of 6
info_idq_free---of 8
info_idq_free.cold---of 1
mark_info_dirty---of 1
prepare_warning---of 4
quota_release_workfn---of 26
register_quota_format---of 1
unregister_quota_format---of 9
vfs_cleanup_quota_inode---of 4
-----------
SUMMARY9%of 158

-----------
SUMMARY---of 0

____netdev_has_upper_dev---of 1
__dev_change_flags---of 24
__dev_change_net_namespace---of 58
__dev_close_many80%of 10
__dev_direct_xmit---of 22
__dev_forward_skb---of 1
__dev_forward_skb2---of 18
__dev_get_by_flags---of 8
__dev_get_by_index---of 8
__dev_get_by_name---of 3
__dev_notify_flags---of 10
__dev_open---of 18
__dev_queue_xmit---of 260
__dev_remove_pack60%of 15
__dev_set_allmulti---of 14
__dev_set_mtu---of 5
__dev_set_promiscuity---of 16
__dev_set_rx_mode10%of 10
__get_xps_queue_idx---of 15
__napi_busy_loop---of 69
__napi_poll.constprop.0---of 23
__napi_schedule50%of 22
__napi_schedule_irqoff---of 14
__netdev_adjacent_dev_insert---of 42
__netdev_adjacent_dev_remove---of 30
__netdev_adjacent_dev_set---of 12
__netdev_has_upper_dev---of 4
__netdev_notify_peers---of 4
__netdev_printk48%of 19
__netdev_update_features---of 154
__netdev_update_features.cold---of 2
__netdev_update_lower_level---of 15
__netdev_update_upper_level---of 8
__netdev_upper_dev_link---of 32
__netdev_upper_dev_unlink---of 11
__netdev_walk_all_lower_dev.constprop.0.isra.0---of 18
__netdev_walk_all_upper_dev---of 11
__netif_napi_del77%of 13
__netif_receive_skb---of 5
__netif_receive_skb_core.constprop.0---of 249
__netif_receive_skb_list_core---of 36
__netif_receive_skb_one_core---of 7
__netif_rx---of 19
__netif_schedule58%of 7
__netif_set_xps_queue---of 178
alloc_netdev_mqs---of 32
bpf_prog_run_generic_xdp---of 42
bpf_xdp_link_attach---of 26
bpf_xdp_link_dealloc---of 1
bpf_xdp_link_detach---of 1
bpf_xdp_link_fill_link_info---of 3
bpf_xdp_link_release---of 20
bpf_xdp_link_show_fdinfo---of 3
bpf_xdp_link_update---of 16
busy_poll_stop---of 17
call_netdevice_notifiers---of 1
call_netdevice_notifiers_info67%of 6
call_netdevice_register_net_notifiers---of 8
call_netdevice_unregister_notifiers---of 3
clean_xps_maps---of 26
default_device_exit_batch---of 41
dev_add_pack---of 9
dev_alloc_name---of 1
dev_change_carrier---of 4
dev_change_flags---of 3
dev_change_name---of 29
dev_change_proto_down---of 7
dev_change_proto_down_reason---of 11
dev_change_proto_down_reason.cold---of 5
dev_change_tx_queue_len---of 7
dev_change_xdp_fd---of 26
dev_close---of 7
dev_close_many100%of 13
dev_cpu_dead---of 41
dev_disable_lro---of 16
dev_fetch_sw_netstats100%of 5
dev_fill_forward_path---of 12
dev_fill_metadata_dst---of 27
dev_forward_skb---of 4
dev_forward_skb_nomtu---of 4
dev_fwd_path---of 5
dev_get_alias45%of 18
dev_get_by_index---of 26
dev_get_by_index_rcu---of 8
dev_get_by_name---of 28
dev_get_by_name_rcu---of 12
dev_get_by_napi_id---of 13
dev_get_flags23%of 9
dev_get_iflink67%of 6
dev_get_mac_address---of 26
dev_get_phys_port_id67%of 3
dev_get_phys_port_name67%of 3
dev_get_port_parent_id25%of 12
dev_get_stats65%of 14
dev_get_tstats64100%of 1
dev_getbyhwaddr_rcu---of 5
dev_getfirstbyhwtype---of 21
dev_hard_start_xmit---of 41
dev_index_reserve---of 8
dev_ingress_queue_create---of 10
dev_kfree_skb_any_reason---of 4
dev_kfree_skb_irq_reason---of 14
dev_loopback_xmit---of 31
dev_nit_active---of 4
dev_open---of 4
dev_pick_tx_cpu_id---of 1
dev_pick_tx_zero---of 1
dev_pre_changeaddr_notify---of 3
dev_prep_valid_name.constprop.0---of 31
dev_qdisc_enqueue---of 17
dev_queue_xmit_nit---of 49
dev_remove_pack---of 3
dev_set_alias---of 14
dev_set_allmulti---of 1
dev_set_group---of 1
dev_set_mac_address---of 9
dev_set_mac_address_user---of 1
dev_set_mtu---of 5
dev_set_mtu_ext---of 21
dev_set_promiscuity---of 5
dev_set_rx_mode---of 1
dev_set_threaded---of 15
dev_valid_name---of 12
dev_validate_mtu---of 9
dev_xdp_attach---of 103
dev_xdp_install---of 10
dev_xdp_prog_count---of 5
dev_xdp_prog_id67%of 6
do_xdp_generic---of 35
enqueue_to_backlog---of 41
flush_backlog---of 9
free_netdev---of 21
generic_xdp_install---of 21
generic_xdp_tx---of 15
get_rps_cpu---of 66
init_dummy_netdev---of 1
is_skb_forwardable---of 4
list_netdevice---of 19
napi_busy_loop---of 11
napi_busy_loop_rcu---of 1
napi_by_id---of 8
napi_complete_done---of 34
napi_disable---of 6
napi_enable---of 8
napi_kthread_create---of 4
napi_schedule_prep100%of 5
napi_threaded_poll---of 39
napi_watchdog---of 16
net_dec_egress_queue---of 1
net_dec_ingress_queue---of 1
net_disable_timestamp---of 5
net_enable_timestamp---of 5
net_inc_egress_queue---of 1
net_inc_ingress_queue---of 1
net_rps_action_and_irq_enable---of 9
net_rx_action---of 45
net_tx_action---of 73
netdev_adjacent_change_abort---of 6
netdev_adjacent_change_commit---of 5
netdev_adjacent_change_prepare---of 11
netdev_adjacent_get_private---of 1
netdev_adjacent_rename_links---of 9
netdev_alert---of 1
netdev_bind_sb_channel_queue---of 8
netdev_bonding_info_change---of 1
netdev_change_features---of 1
netdev_cmd_to_name---of 3
netdev_core_pick_tx---of 13
netdev_core_stats_inc---of 6
netdev_create_hash---of 3
netdev_crit---of 1
netdev_drivername---of 5
netdev_emerg---of 1
netdev_err---of 1
netdev_exit---of 3
netdev_features_change---of 1
netdev_freemem---of 1
netdev_get_by_index---of 5
netdev_get_by_name---of 5
netdev_get_name---of 25
netdev_get_xmit_slave---of 3
netdev_has_any_upper_dev50%of 4
netdev_has_upper_dev---of 4
netdev_has_upper_dev_all_rcu---of 1
netdev_increment_features---of 5
netdev_info100%of 1
netdev_init---of 5
netdev_init_one_queue---of 5
netdev_is_rx_handler_busy---of 11
netdev_lower_dev_get_private---of 9
netdev_lower_get_first_private_rcu---of 6
netdev_lower_get_next---of 3
netdev_lower_get_next_private---of 3
netdev_lower_get_next_private_rcu---of 6
netdev_lower_state_changed---of 4
netdev_master_upper_dev_get---of 8
netdev_master_upper_dev_get_rcu43%of 7
netdev_master_upper_dev_link---of 1
netdev_name_in_use---of 1
netdev_name_node_alt_create---of 9
netdev_name_node_alt_destroy---of 9
netdev_name_node_alt_free---of 1
netdev_name_node_lookup---of 9
netdev_next_lower_dev_rcu---of 3
netdev_notice---of 1
netdev_notify_peers---of 1
netdev_offload_xstats_disable38%of 8
netdev_offload_xstats_enable---of 14
netdev_offload_xstats_enabled50%of 8
netdev_offload_xstats_get---of 11
netdev_offload_xstats_get_stats---of 9
netdev_offload_xstats_push_delta---of 8
netdev_offload_xstats_report_delta---of 1
netdev_offload_xstats_report_used---of 1
netdev_pick_tx---of 71
netdev_pick_tx.cold---of 1
netdev_port_same_parent_id---of 7
netdev_printk---of 1
netdev_refcnt_read---of 1
netdev_reset_tc---of 5
netdev_run_todo---of 65
netdev_rx_csum_fault---of 2
netdev_rx_handler_register---of 4
netdev_rx_handler_unregister---of 7
netdev_set_default_ethtool_ops---of 2
netdev_set_num_tc---of 7
netdev_set_sb_channel---of 4
netdev_set_tc_queue---of 3
netdev_sk_get_lowest_dev---of 6
netdev_state_change---of 3
netdev_stats_to_stats64100%of 2
netdev_sw_irq_coalesce_default_on---of 3
netdev_txq_to_tc---of 5
netdev_unbind_sb_channel---of 4
netdev_update_features---of 3
netdev_upper_dev_link---of 1
netdev_upper_dev_unlink---of 1
netdev_upper_get_next_dev_rcu---of 6
netdev_walk_all_lower_dev---of 10
netdev_walk_all_lower_dev_rcu---of 10
netdev_walk_all_upper_dev_rcu---of 13
netdev_warn---of 1
netdev_xmit_skip_txqueue---of 1
netif_device_attach---of 5
netif_device_detach---of 3
netif_get_num_default_rss_queues---of 9
netif_inherit_tso_max---of 7
netif_napi_add_weight---of 27
netif_queue_set_napi---of 12
netif_receive_skb---of 49
netif_receive_skb_core---of 11
netif_receive_skb_list---of 32
netif_receive_skb_list_internal---of 50
netif_reset_xps_queues---of 4
netif_rx---of 21
netif_rx_internal---of 35
netif_schedule_queue50%of 18
netif_set_real_num_queues---of 17
netif_set_real_num_rx_queues---of 9
netif_set_real_num_tx_queues---of 32
netif_set_tso_max_segs---of 2
netif_set_tso_max_size---of 5
netif_set_xps_queue---of 1
netif_skb_features---of 48
netif_stacked_transfer_operstate---of 11
netif_tx_stop_all_queues---of 2
netif_tx_wake_queue---of 17
netstamp_clear---of 3
passthru_features_check---of 1
process_backlog---of 22
register_netdev---of 3
register_netdevice---of 104
register_netdevice_notifier---of 11
register_netdevice_notifier_dev_net---of 7
register_netdevice_notifier_net---of 5
remove_xps_queue---of 15
rps_may_expire_flow---of 21
rps_trigger_softirq---of 15
skb_checksum_help---of 23
skb_crc32c_csum_help---of 11
skb_csum_hwoffload_help---of 6
skb_network_protocol---of 28
skb_warn_bad_offload---of 11
synchronize_net100%of 3
tc_run---of 12
tcx_dec---of 1
tcx_inc---of 1
trace_napi_poll---of 15
trace_netif_rx_exit---of 15
trace_xdp_exception---of 15
trigger_rx_softirq---of 1
unlist_netdevice50%of 14
unregister_netdev100%of 1
unregister_netdevice_many---of 1
unregister_netdevice_many_notify45%of 95
unregister_netdevice_notifier---of 6
unregister_netdevice_notifier_dev_net---of 6
unregister_netdevice_notifier_net---of 4
unregister_netdevice_queue42%of 12
validate_xmit_skb.constprop.0---of 49
validate_xmit_skb_list---of 7
-----------
SUMMARY53%of 370

-----------
SUMMARY---of 0

ath10k_usb_alloc_urb_from_pipe---of 7
ath10k_usb_bmi_exchange_msg---of 8
ath10k_usb_destroy67%of 9
ath10k_usb_free_urb_to_pipe---of 4
ath10k_usb_hif_diag_read---of 7
ath10k_usb_hif_diag_write---of 3
ath10k_usb_hif_get_default_pipe---of 1
ath10k_usb_hif_get_free_queue_number---of 1
ath10k_usb_hif_map_service_to_pipe---of 5
ath10k_usb_hif_power_down---of 3
ath10k_usb_hif_power_up---of 1
ath10k_usb_hif_resume---of 1
ath10k_usb_hif_start---of 3
ath10k_usb_hif_stop---of 4
ath10k_usb_hif_suspend---of 1
ath10k_usb_hif_tx_sg---of 11
ath10k_usb_io_comp_work---of 21
ath10k_usb_napi_poll---of 4
ath10k_usb_pm_resume---of 1
ath10k_usb_pm_suspend---of 5
ath10k_usb_post_recv_transfers.constprop.0---of 8
ath10k_usb_probe---of 36
ath10k_usb_recv_complete---of 8
ath10k_usb_remove100%of 2
ath10k_usb_submit_ctrl_in.constprop.0---of 6
ath10k_usb_submit_ctrl_out.constprop.0---of 6
ath10k_usb_transmit_complete---of 1
-----------
SUMMARY73%of 11

const_folio_flags.constprop.040%of 10
count_shadow_nodes---of 24
folio_flags.constprop.0---of 10
folio_memcg40%of 20
scan_shadow_nodes---of 1
shadow_lru_isolate---of 18
workingset_activation52%of 35
workingset_age_nonresident80%of 10
workingset_eviction---of 25
workingset_eviction.cold---of 1
workingset_refault---of 34
workingset_test_recent---of 80
workingset_test_recent.cold---of 1
workingset_update_node---of 9
-----------
SUMMARY51%of 75

__usb_create_hcd---of 13
__usb_hcd_giveback_urb11%of 19
hcd_alloc_coherent---of 7
hcd_bus_resume62%of 26
hcd_bus_suspend---of 21
hcd_died_work---of 1
hcd_resume_work---of 1
init_giveback_urb_bh---of 1
register_root_hub---of 21
rh_timer_func---of 1
unlink117%of 18
usb_add_hcd---of 67
usb_alloc_streams---of 12
usb_bus_start_enum---of 6
usb_calc_bus_time---of 20
usb_create_hcd---of 1
usb_create_shared_hcd---of 1
usb_free_streams---of 9
usb_get_hcd72%of 7
usb_giveback_urb_bh---of 9
usb_hc_died---of 15
usb_hcd_alloc_bandwidth---of 39
usb_hcd_check_unlink_urb89%of 9
usb_hcd_disable_endpoint50%of 2
usb_hcd_end_port_resume---of 5
usb_hcd_end_port_resume.cold---of 1
usb_hcd_find_raw_port_number67%of 3
usb_hcd_flush_endpoint82%of 16
usb_hcd_get_frame_number---of 3
usb_hcd_giveback_urb93%of 14
usb_hcd_irq---of 4
usb_hcd_is_primary_hcd---of 4
usb_hcd_link_urb_to_ep100%of 7
usb_hcd_map_urb_for_dma20%of 57
usb_hcd_platform_shutdown---of 2
usb_hcd_poll_rh_status69%of 19
usb_hcd_reset_endpoint75%of 4
usb_hcd_resume_root_hub100%of 3
usb_hcd_setup_local_mem---of 13
usb_hcd_start_port_resume---of 3
usb_hcd_start_port_resume.cold---of 1
usb_hcd_submit_urb42%of 90
usb_hcd_synchronize_unlinks100%of 1
usb_hcd_unlink_urb78%of 9
usb_hcd_unlink_urb_from_ep---of 3
usb_hcd_unmap_urb_for_dma50%of 12
usb_hcd_unmap_urb_setup_for_dma60%of 5
usb_mon_deregister---of 3
usb_mon_register---of 3
usb_put_hcd45%of 9
usb_remove_hcd---of 15
usb_stop_hcd---of 1
-----------
SUMMARY48%of 330

gt683r_brightness_set100%of 4
gt683r_led_probe---of 18
gt683r_led_remove100%of 1
gt683r_led_work---of 15
mode_show---of 3
mode_store---of 6
-----------
SUMMARY100%of 5

-----------
SUMMARY---of 0

__profile_flip_buffers---of 1
create_proc_profile---of 9
do_profile_hits.constprop.0---of 20
do_profile_hits.constprop.0.cold---of 1
profile_dead_cpu---of 7
profile_hits---of 3
profile_init---of 9
profile_online_cpu---of 3
profile_prepare_cpu---of 9
profile_setup---of 22
profile_tick43%of 7
read_profile---of 27
read_profile.cold---of 1
setup_profiling_timer---of 1
write_profile---of 14
-----------
SUMMARY43%of 7

-----------
SUMMARY---of 0

__fsnotify_inode_delete100%of 1
__fsnotify_parent52%of 39
__fsnotify_update_child_dentry_flags---of 16
__fsnotify_vfsmount_delete---of 1
fsnotify14%of 146
fsnotify_first_mark---of 17
fsnotify_handle_inode_event.isra.0---of 18
fsnotify_sb_delete---of 15
-----------
SUMMARY22%of 186

__virtio_unbreak_device---of 3
__virtqueue_break---of 1
__virtqueue_unbreak---of 1
__vring_new_virtqueue---of 14
detach_buf_packed---of 16
detach_buf_split---of 18
virtio_break_device---of 3
virtio_max_dma_size---of 4
virtqueue_add_inbuf---of 4
virtqueue_add_inbuf_ctx---of 4
virtqueue_add_outbuf---of 4
virtqueue_add_packed---of 86
virtqueue_add_sgs---of 8
virtqueue_add_split---of 81
virtqueue_detach_unused_buf---of 16
virtqueue_disable_and_recycle---of 7
virtqueue_disable_cb34%of 9
virtqueue_dma_dev---of 3
virtqueue_dma_map_single_attrs---of 9
virtqueue_dma_mapping_error---of 3
virtqueue_dma_need_sync---of 4
virtqueue_dma_sync_single_range_for_cpu---of 2
virtqueue_dma_sync_single_range_for_device---of 2
virtqueue_dma_unmap_single_attrs---of 2
virtqueue_enable_cb---of 10
virtqueue_enable_cb_delayed---of 24
virtqueue_enable_cb_prepare---of 15
virtqueue_get_avail_addr---of 7
virtqueue_get_buf---of 5
virtqueue_get_buf_ctx---of 5
virtqueue_get_buf_ctx_packed---of 19
virtqueue_get_buf_ctx_split---of 15
virtqueue_get_desc_addr---of 3
virtqueue_get_used_addr---of 7
virtqueue_get_vring---of 1
virtqueue_get_vring_size---of 1
virtqueue_is_broken---of 1
virtqueue_kick---of 5
virtqueue_kick_prepare---of 17
virtqueue_notify---of 5
virtqueue_poll---of 9
virtqueue_reinit_packed---of 2
virtqueue_reinit_split---of 3
virtqueue_reset---of 6
virtqueue_resize---of 26
virtqueue_set_dma_premapped---of 4
vring_alloc_desc_extra---of 4
vring_alloc_queue---of 5
vring_alloc_queue_packed---of 14
vring_alloc_queue_split---of 18
vring_alloc_state_extra_packed---of 5
vring_alloc_state_extra_split---of 5
vring_create_virtqueue---of 4
vring_create_virtqueue_dma---of 4
vring_create_virtqueue_packed.constprop.0---of 14
vring_create_virtqueue_split---of 8
vring_del_virtqueue---of 3
vring_free---of 17
vring_free_packed---of 13
vring_interrupt60%of 15
vring_map_one_sg---of 10
vring_new_virtqueue---of 3
vring_notification_data---of 4
vring_transport_features---of 3
vring_unmap_extra_packed.isra.0---of 7
vring_unmap_one_split---of 8
vring_unmap_one_split_indirect.part.0---of 2
-----------
SUMMARY50%of 24

snd_hwdep_control_ioctl---of 25
snd_hwdep_dev_disconnect79%of 14
snd_hwdep_dev_free80%of 5
snd_hwdep_dev_register---of 13
snd_hwdep_dsp_load---of 7
snd_hwdep_dsp_load.cold---of 1
snd_hwdep_info---of 10
snd_hwdep_ioctl---of 13
snd_hwdep_ioctl_compat---of 11
snd_hwdep_llseek---of 3
snd_hwdep_mmap---of 3
snd_hwdep_new---of 23
snd_hwdep_open---of 26
snd_hwdep_poll---of 3
snd_hwdep_proc_read---of 3
snd_hwdep_read---of 3
snd_hwdep_release---of 7
snd_hwdep_write---of 3
-----------
SUMMARY79%of 19

__buf_prepare---of 33
__enqueue_in_driver---of 17
__prepare_dmabuf.constprop.0---of 59
__prepare_userptr.constprop.0---of 50
__vb2_buf_mem_free---of 6
__vb2_cleanup_fileio---of 5
__vb2_init_fileio---of 46
__vb2_perform_fileio---of 77
__vb2_plane_dmabuf_put---of 7
__vb2_queue_alloc---of 56
__vb2_queue_cancel---of 47
__vb2_queue_free---of 43
vb2_buffer_done---of 38
vb2_buffer_in_use---of 6
vb2_core_create_bufs---of 42
vb2_core_dqbuf---of 96
vb2_core_expbuf---of 31
vb2_core_poll---of 35
vb2_core_prepare_buf---of 17
vb2_core_qbuf---of 90
vb2_core_querybuf---of 4
vb2_core_queue_init---of 33
vb2_core_queue_release---of 1
vb2_core_reqbufs---of 58
vb2_core_streamoff---of 6
vb2_core_streamon---of 22
vb2_discard_done---of 3
vb2_mmap---of 35
vb2_plane_cookie---of 6
vb2_plane_vaddr---of 6
vb2_queue_error100%of 1
vb2_read---of 1
vb2_req_prepare---of 5
vb2_req_queue---of 4
vb2_req_release---of 4
vb2_req_unbind---of 5
vb2_req_unprepare---of 7
vb2_request_buffer_cnt---of 5
vb2_request_object_is_buffer---of 1
vb2_start_streaming---of 19
vb2_thread---of 51
vb2_thread_start---of 13
vb2_thread_stop---of 3
vb2_verify_memory_type---of 31
vb2_wait_for_all_buffers---of 10
vb2_write---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

should_fail_usercopy100%of 1
-----------
SUMMARY100%of 1

__bpf_trace_pwc_handler_enter---of 1
__bpf_trace_pwc_handler_exit---of 1
__traceiter_pwc_handler_enter---of 3
__traceiter_pwc_handler_exit---of 3
buffer_cleanup---of 1
buffer_finish---of 2
buffer_init---of 2
buffer_prepare---of 2
buffer_queue---of 5
perf_trace_pwc_handler_enter---of 12
perf_trace_pwc_handler_exit---of 10
pwc_cleanup_queued_bufs---of 5
pwc_get_next_fill_buf---of 5
pwc_isoc_cleanup---of 9
pwc_isoc_handler---of 87
pwc_video_release100%of 1
queue_setup---of 5
start_streaming---of 42
stop_streaming---of 5
trace_event_raw_event_pwc_handler_enter---of 13
trace_event_raw_event_pwc_handler_exit---of 11
trace_raw_output_pwc_handler_enter---of 4
trace_raw_output_pwc_handler_exit---of 5
usb_pwc_disconnect80%of 5
usb_pwc_probe---of 84
-----------
SUMMARY84%of 6

-----------
SUMMARY---of 0

___pmd_free_tlb---of 13
___pte_free_tlb---of 13
___pud_free_tlb---of 13
__native_set_fixmap---of 3
__native_set_fixmap.part.0---of 1
arch_check_zapped_pmd---of 5
arch_check_zapped_pte---of 4
const_folio_flags.constprop.040%of 10
native_set_fixmap---of 10
pgd_alloc---of 7
pgd_free100%of 3
pgd_page_get_mm---of 1
pmd_clear_huge60%of 5
pmd_free_pte_page---of 2
pmd_mkwrite---of 1
pmd_set_huge---of 12
pmdp_test_and_clear_young---of 5
pte_alloc_one48%of 19
pte_mkwrite100%of 1
ptep_clear_flush_young---of 4
ptep_set_access_flags50%of 4
ptep_test_and_clear_young---of 4
pud_clear_huge50%of 4
pud_free_pmd_page---of 23
pud_set_huge---of 10
-----------
SUMMARY53%of 46

__drm_plane_get_damage_clips---of 5
__drm_universal_plane_alloc---of 8
__drm_universal_plane_init---of 62
__drm_universal_plane_init.cold---of 1
__drmm_universal_plane_alloc---of 9
__setplane_atomic---of 10
__setplane_check.isra.0---of 12
__setplane_check.isra.0.cold---of 1
__setplane_internal---of 14
drm_any_plane_has_format---of 4
drm_create_scaling_filter_prop---of 11
drm_mode_cursor2_ioctl---of 1
drm_mode_cursor_common---of 25
drm_mode_cursor_ioctl---of 1
drm_mode_cursor_universal---of 40
drm_mode_getplane---of 24
drm_mode_getplane_res---of 16
drm_mode_page_flip_ioctl---of 66
drm_mode_plane_set_obj_prop---of 5
drm_mode_setplane---of 13
drm_plane_check_pixel_format---of 15
drm_plane_cleanup---of 11
drm_plane_create_scaling_filter_property---of 5
drm_plane_enable_fb_damage_clips---of 1
drm_plane_force_disable---of 8
drm_plane_from_index---of 5
drm_plane_get_damage_clips---of 11
drm_plane_get_damage_clips_count---of 5
drm_plane_register_all---of 12
drm_plane_unregister_all75%of 4
drm_universal_plane_init---of 3
drmm_universal_plane_alloc_release---of 3
setplane_internal---of 18
-----------
SUMMARY75%of 4

connect_type_show---of 3
connector_bind---of 11
connector_unbind---of 3
disable_show---of 12
disable_store---of 20
early_stop_show---of 2
early_stop_store---of 3
link_peers_report---of 38
location_show---of 1
match_location---of 11
over_current_count_show---of 1
quirks_show---of 1
quirks_store---of 3
state_show---of 1
usb3_lpm_permit_show---of 4
usb3_lpm_permit_store---of 11
usb_hub_create_port_device---of 39
usb_hub_remove_port_device20%of 10
usb_port_device_release100%of 1
usb_port_runtime_resume---of 20
usb_port_runtime_suspend37%of 11
usb_port_shutdown---of 2
-----------
SUMMARY32%of 22

netlbl_unlabel_accept---of 5
netlbl_unlabel_addrinfo_get---of 10
netlbl_unlabel_getattr---of 35
netlbl_unlabel_list---of 6
netlbl_unlabel_staticadd---of 11
netlbl_unlabel_staticadddef---of 10
netlbl_unlabel_staticlist---of 73
netlbl_unlabel_staticlist_gen---of 32
netlbl_unlabel_staticlistdef---of 62
netlbl_unlabel_staticremove---of 9
netlbl_unlabel_staticremovedef---of 8
netlbl_unlhsh_add---of 67
netlbl_unlhsh_free_iface---of 23
netlbl_unlhsh_hash43%of 7
netlbl_unlhsh_netdev_handler56%of 9
netlbl_unlhsh_remove---of 79
netlbl_unlhsh_search_iface37%of 11
-----------
SUMMARY45%of 27

ieee802154_alloc_hw---of 12
ieee802154_configure_durations---of 13
ieee802154_configure_durations.cold---of 4
ieee802154_free_hw---of 3
ieee802154_register_hw---of 19
ieee802154_tasklet_handler---of 5
ieee802154_unregister_hw100%of 1
-----------
SUMMARY100%of 1

_dvb_dmx_swfilter---of 50
dmx_section_feed_allocate_filter---of 11
dmx_section_feed_release_filter---of 10
dmx_section_feed_set---of 4
dmx_section_feed_start_filtering---of 13
dmx_section_feed_stop_filtering---of 4
dmx_ts_feed_set---of 10
dmx_ts_feed_start_filtering---of 9
dmx_ts_feed_stop_filtering---of 6
dvb_demux_feed_add---of 10
dvb_demux_feed_del---of 11
dvb_dmx_crc32---of 1
dvb_dmx_init---of 19
dvb_dmx_memcopy---of 1
dvb_dmx_release---of 1
dvb_dmx_swfilter---of 1
dvb_dmx_swfilter_204---of 1
dvb_dmx_swfilter_packet---of 91
dvb_dmx_swfilter_packets---of 5
dvb_dmx_swfilter_raw---of 1
dvb_dmx_swfilter_section_copy_dump.isra.0---of 28
dvb_dmx_swfilter_section_new---of 5
dvbdmx_add_frontend---of 3
dvbdmx_allocate_section_feed---of 11
dvbdmx_allocate_ts_feed---of 20
dvbdmx_close100%of 3
dvbdmx_connect_frontend---of 3
dvbdmx_disconnect_frontend---of 1
dvbdmx_get_frontends---of 2
dvbdmx_get_pes_pids---of 1
dvbdmx_open---of 3
dvbdmx_release_section_feed---of 4
dvbdmx_release_ts_feed---of 7
dvbdmx_remove_frontend---of 9
dvbdmx_write---of 10
-----------
SUMMARY100%of 3

brightness_show---of 1
brightness_store---of 6
devm_led_classdev_match---of 6
devm_led_classdev_register_ext---of 6
devm_led_classdev_release100%of 1
devm_led_classdev_unregister---of 2
devm_led_get---of 5
devm_led_release---of 1
devm_of_led_get---of 6
devm_of_led_get_optional---of 7
led_add_lookup---of 3
led_classdev_register_ext---of 43
led_classdev_resume---of 3
led_classdev_suspend---of 1
led_classdev_unregister100%of 9
led_get---of 14
led_put---of 1
led_remove_lookup---of 3
led_resume---of 6
led_suspend---of 4
max_brightness_show---of 1
of_led_get---of 7
-----------
SUMMARY100%of 10

-----------
SUMMARY---of 0

cmedia_exit---of 1
cmedia_init---of 5
cmhid_hs100b_report_fixup---of 4
cmhid_input_configured---of 1
cmhid_input_mapping---of 1
cmhid_probe---of 8
cmhid_raw_event---of 7
cmhid_remove100%of 1
-----------
SUMMARY100%of 1

__bpf_map_offload_destroy---of 5
__bpf_offload_dev_match---of 8
__bpf_offload_dev_netdev_register---of 105
__bpf_offload_dev_netdev_unregister---of 130
__bpf_prog_dev_bound_init---of 12
__bpf_prog_offload_destroy---of 5
__rhashtable_lookup.constprop.058%of 28
bpf_dev_bound_kfunc_check---of 5
bpf_dev_bound_netdev_unregister43%of 7
bpf_dev_bound_resolve_kfunc---of 9
bpf_map_offload_delete_elem---of 3
bpf_map_offload_get_next_key---of 3
bpf_map_offload_info_fill---of 4
bpf_map_offload_info_fill_ns---of 9
bpf_map_offload_lookup_elem---of 3
bpf_map_offload_map_alloc---of 13
bpf_map_offload_map_free---of 3
bpf_map_offload_map_mem_usage---of 1
bpf_map_offload_ndo---of 4
bpf_map_offload_update_elem---of 5
bpf_offload_dev_create---of 3
bpf_offload_dev_destroy---of 4
bpf_offload_dev_match---of 1
bpf_offload_dev_netdev_register---of 1
bpf_offload_dev_netdev_unregister---of 1
bpf_offload_dev_priv---of 1
bpf_offload_find_netdev50%of 16
bpf_offload_prog_map_match---of 5
bpf_prog_dev_bound_destroy---of 7
bpf_prog_dev_bound_inherit---of 6
bpf_prog_dev_bound_init---of 12
bpf_prog_dev_bound_match---of 7
bpf_prog_offload_compile---of 3
bpf_prog_offload_finalize---of 4
bpf_prog_offload_info_fill---of 12
bpf_prog_offload_info_fill_ns---of 9
bpf_prog_offload_remove_insns---of 6
bpf_prog_offload_replace_insn---of 6
bpf_prog_offload_verifier_prep---of 3
bpf_prog_offload_verify_insn---of 3
bpf_prog_warn_on_exec---of 1
jhash---of 16
rht_key_get_hash.constprop.0100%of 1
-----------
SUMMARY54%of 52

__access_remote_vm---of 47
__apply_to_page_range49%of 85
__do_fault---of 17
__get_locked_pte---of 5
__handle_mm_fault49%of 173
__might_fault100%of 3
__pmd_alloc41%of 37
__pte_alloc19%of 16
__pte_alloc_kernel---of 8
__pud_alloc---of 33
__vm_insert_mixed---of 9
__vm_map_pages---of 6
_compound_head---of 9
access_process_vm---of 3
access_remote_vm---of 1
apply_to_existing_page_range---of 1
apply_to_page_range100%of 1
clear_huge_page---of 11
const_folio_flags.constprop.040%of 10
copy_folio_from_user---of 9
copy_page_range---of 295
copy_subpage---of 2
copy_user_large_folio---of 21
count_memcg_event_mm.part.048%of 23
do_page_mkwrite---of 12
do_set_pmd---of 1
do_swap_page---of 146
do_wp_page24%of 197
fault_around_bytes_fops_open---of 1
fault_around_bytes_get---of 1
fault_around_bytes_set---of 4
fault_around_bytes_set.cold---of 1
fault_dirty_shared_page---of 26
finish_fault---of 31
finish_mkwrite_fault---of 12
folio_flags.constprop.040%of 10
folio_pte_batch.constprop.0---of 47
folio_put---of 4
folio_try_dup_anon_rmap_ptes---of 94
follow_pfn---of 11
follow_phys---of 29
follow_pte---of 10
free_pgd_range---of 46
free_pgtables---of 32
generic_access_phys---of 28
handle_mm_fault57%of 46
insert_page_into_pte_locked.isra.0---of 24
insert_pfn---of 41
lock_mm_and_find_vma56%of 40
lock_vma_under_rcu47%of 45
mm_trace_rss_stat---of 1
mmap_assert_write_locked60%of 5
numa_migrate_prep---of 5
percpu_ref_put_many.constprop.0---of 15
pmd_install100%of 3
print_bad_pte---of 21
print_vma_addr---of 16
ptlock_alloc100%of 3
ptlock_free---of 1
rcu_read_unlock50%of 6
remap_pfn_range---of 4
remap_pfn_range_notrack---of 58
set_pte_range43%of 26
set_ptes.constprop.0---of 9
trace_rss_stat34%of 15
unmap_mapping_folio---of 8
unmap_mapping_pages---of 5
unmap_mapping_range60%of 5
unmap_page_range---of 199
unmap_single_vma---of 13
unmap_vmas---of 14
validate_page_before_insert---of 11
vm_insert_page---of 37
vm_insert_pages---of 54
vm_iomap_memory---of 5
vm_map_pages---of 1
vm_map_pages_zero---of 1
vm_normal_folio---of 9
vm_normal_page46%of 11
vma_end_read---of 11
vmf_anon_prepare---of 5
vmf_insert_mixed---of 1
vmf_insert_mixed_mkwrite---of 1
vmf_insert_pfn---of 1
vmf_insert_pfn_prot---of 43
walk_to_pmd---of 11
wp_page_reuse---of 32
zap_page_range_single---of 13
zap_vma_ptes---of 5
-----------
SUMMARY42%of 760

__hif_usb_tx.isra.0---of 31
ath9k_hif_request_firmware---of 16
ath9k_hif_usb_alloc_urbs---of 37
ath9k_hif_usb_dealloc_tx_urbs---of 9
ath9k_hif_usb_dealloc_urbs---of 1
ath9k_hif_usb_disconnect42%of 12
ath9k_hif_usb_download_fw---of 9
ath9k_hif_usb_exit---of 1
ath9k_hif_usb_firmware_cb---of 16
ath9k_hif_usb_free_rx_remain_skb---of 4
ath9k_hif_usb_init---of 1
ath9k_hif_usb_probe---of 20
ath9k_hif_usb_reg_in_cb---of 12
ath9k_hif_usb_resume---of 8
ath9k_hif_usb_rx_cb---of 47
ath9k_hif_usb_suspend---of 6
hif_usb_mgmt_cb---of 8
hif_usb_regout_cb---of 4
hif_usb_send---of 26
hif_usb_sta_drain---of 7
hif_usb_start---of 1
hif_usb_stop---of 10
hif_usb_tx_cb---of 26
-----------
SUMMARY42%of 12

cpu_latency_qos_add_request---of 21
cpu_latency_qos_limit---of 1
cpu_latency_qos_open---of 3
cpu_latency_qos_read---of 4
cpu_latency_qos_release---of 1
cpu_latency_qos_remove_request---of 21
cpu_latency_qos_request_active---of 1
cpu_latency_qos_update_request---of 22
cpu_latency_qos_write---of 6
freq_constraints_init---of 1
freq_qos_add_notifier---of 10
freq_qos_add_request---of 10
freq_qos_apply---of 4
freq_qos_read_value---of 11
freq_qos_remove_notifier---of 10
freq_qos_remove_request---of 10
freq_qos_update_request---of 9
pm_qos_get_value---of 9
pm_qos_read_value100%of 1
pm_qos_update_flags40%of 35
pm_qos_update_target---of 26
-----------
SUMMARY42%of 36

acm_ctrl_irq---of 63
acm_ctrl_irq.cold---of 1
acm_ctrl_msg---of 5
acm_disconnect73%of 22
acm_port_activate---of 18
acm_port_destruct---of 3
acm_port_dtr_rts---of 6
acm_port_shutdown---of 11
acm_pre_reset---of 1
acm_probe---of 135
acm_read_bulk_callback---of 19
acm_reset_resume---of 3
acm_resume---of 13
acm_softint---of 16
acm_start_wb---of 5
acm_submit_read_urb---of 7
acm_suspend---of 10
acm_tty_break_ctl---of 8
acm_tty_chars_in_buffer---of 7
acm_tty_cleanup---of 1
acm_tty_close---of 1
acm_tty_flush_buffer---of 5
acm_tty_get_icount---of 1
acm_tty_hangup---of 1
acm_tty_install---of 21
acm_tty_ioctl---of 15
acm_tty_open---of 1
acm_tty_set_termios---of 16
acm_tty_throttle---of 1
acm_tty_tiocmget---of 1
acm_tty_tiocmset---of 4
acm_tty_unthrottle---of 3
acm_tty_write---of 15
acm_tty_write_room---of 7
acm_write_bulk---of 1
bmCapabilities_show---of 1
get_serial_info---of 3
iCountryCodeRelDate_show---of 1
set_serial_info---of 7
wCountryCodes_show---of 1
-----------
SUMMARY73%of 22

__media_entity_next_link---of 7
__media_entity_remove_link---of 22
__media_entity_remove_links67%of 3
__media_entity_setup_link---of 29
__media_pipeline_entity_iter_next---of 12
__media_pipeline_pad_iter_next---of 6
__media_pipeline_start---of 96
__media_pipeline_stop---of 11
__media_remove_intf_link100%of 7
__media_remove_intf_links13%of 8
dev_dbg_obj39%of 13
media_add_link---of 4
media_create_ancillary_link---of 7
media_create_intf_link---of 7
media_create_pad_link---of 17
media_create_pad_links---of 22
media_devnode_create---of 7
media_devnode_remove100%of 6
media_entity_enum_cleanup---of 1
media_entity_enum_init---of 3
media_entity_find_link---of 13
media_entity_get_fwnode_pad---of 12
media_entity_has_pad_interdep---of 8
media_entity_pads_init---of 21
media_entity_pipeline---of 5
media_entity_remote_pad_unique---of 9
media_entity_remove_links---of 4
media_entity_setup_link---of 1
media_get_pad_index---of 8
media_gobj_create---of 12
media_gobj_destroy100%of 4
media_graph_walk_cleanup---of 1
media_graph_walk_init---of 3
media_graph_walk_next---of 29
media_graph_walk_start---of 5
media_pad_pipeline---of 1
media_pad_remote_pad_first---of 12
media_pad_remote_pad_unique---of 9
media_pipeline_add_pad---of 18
media_pipeline_alloc_start---of 6
media_pipeline_entity_iter_cleanup---of 1
media_pipeline_entity_iter_init---of 3
media_pipeline_start---of 1
media_pipeline_stop---of 1
media_remove_intf_link---of 8
media_remove_intf_links---of 2
-----------
SUMMARY61%of 41

-----------
SUMMARY---of 0

__nfc_alloc_vendor_cmd_reply_skb---of 10
nfc_genl_activate_target---of 6
nfc_genl_data_exit100%of 1
nfc_genl_data_init---of 1
nfc_genl_deactivate_target---of 5
nfc_genl_dep_link_down---of 5
nfc_genl_dep_link_down_event---of 10
nfc_genl_dep_link_up---of 10
nfc_genl_dep_link_up_event---of 14
nfc_genl_dev_down---of 4
nfc_genl_dev_up---of 4
nfc_genl_device_added---of 8
nfc_genl_device_removed75%of 8
nfc_genl_disable_se---of 5
nfc_genl_dump_devices---of 12
nfc_genl_dump_devices_done---of 4
nfc_genl_dump_ses---of 27
nfc_genl_dump_ses_done---of 4
nfc_genl_dump_targets---of 32
nfc_genl_dump_targets_done---of 4
nfc_genl_enable_se---of 5
nfc_genl_exit---of 1
nfc_genl_fw_download---of 5
nfc_genl_fw_download_done---of 10
nfc_genl_get_device---of 8
nfc_genl_llc_get_params---of 20
nfc_genl_llc_sdreq---of 25
nfc_genl_llc_send_sdres---of 22
nfc_genl_llc_set_params---of 24
nfc_genl_rcv_nl_event---of 8
nfc_genl_se_added---of 10
nfc_genl_se_connectivity---of 11
nfc_genl_se_io---of 22
nfc_genl_se_removed---of 9
nfc_genl_se_transaction---of 13
nfc_genl_send_device---of 13
nfc_genl_setup_device_added---of 8
nfc_genl_start_poll---of 18
nfc_genl_stop_poll---of 8
nfc_genl_target_lost---of 11
nfc_genl_targets_found---of 8
nfc_genl_tm_activated---of 9
nfc_genl_tm_deactivated---of 8
nfc_genl_vendor_cmd---of 16
nfc_urelease_event_work---of 9
nfc_vendor_cmd_reply---of 4
se_io_cb---of 11
-----------
SUMMARY78%of 9

__v4l2_device_register_subdev_nodes---of 19
v4l2_device_disconnect100%of 4
v4l2_device_put88%of 8
v4l2_device_register---of 11
v4l2_device_register_subdev---of 23
v4l2_device_release_subdev_node---of 6
v4l2_device_set_name---of 4
v4l2_device_unregister39%of 13
v4l2_device_unregister_subdev59%of 17
-----------
SUMMARY62%of 42

__cgroup_account_cputime50%of 2
__cgroup_account_cputime_field60%of 5
cgroup_base_stat_cputime_show---of 4
cgroup_rstat_exit---of 11
cgroup_rstat_flush---of 1
cgroup_rstat_flush_hold---of 1
cgroup_rstat_flush_locked---of 51
cgroup_rstat_flush_release---of 1
cgroup_rstat_init---of 8
cgroup_rstat_updated100%of 6
root_cgroup_cputime---of 6
-----------
SUMMARY77%of 13

-----------
SUMMARY---of 0

call_commit_handler---of 6
compat_standard_call---of 4
compat_wext_handle_ioctl---of 12
get_wireless_stats---of 5
ioctl_standard_call---of 11
ioctl_standard_iw_point---of 44
iw_handler_get_iwstats---of 7
iwe_stream_add_event---of 5
iwe_stream_add_point---of 9
iwe_stream_add_value---of 4
rtnetlink_ifinfo_prep---of 12
wext_handle_ioctl---of 10
wext_netdev_notifier_call100%of 1
wext_pernet_exit---of 1
wext_pernet_init---of 1
wireless_nlevent_flush84%of 6
wireless_nlevent_process---of 1
wireless_process_ioctl---of 17
wireless_send_event---of 41
-----------
SUMMARY86%of 7

-----------
SUMMARY---of 0

iowarrior_callback---of 17
iowarrior_devnode100%of 3
iowarrior_disconnect67%of 6
iowarrior_ioctl---of 23
iowarrior_open78%of 9
iowarrior_poll---of 16
iowarrior_probe---of 35
iowarrior_read---of 33
iowarrior_release---of 11
iowarrior_write---of 42
iowarrior_write_callback---of 5
-----------
SUMMARY78%of 18

cap_bprm_creds_from_file---of 75
cap_capable40%of 10
cap_capable.cold---of 1
cap_capget---of 16
cap_capset---of 19
cap_convert_nscap---of 17
cap_inode_getsecurity---of 36
cap_inode_killpriv---of 2
cap_inode_need_killpriv100%of 1
cap_inode_removexattr---of 7
cap_inode_setxattr---of 5
cap_mmap_addr---of 13
cap_mmap_file---of 1
cap_ptrace_access_check---of 22
cap_ptrace_traceme---of 19
cap_safe_nice---of 23
cap_settime---of 1
cap_task_fix_setuid---of 29
cap_task_prctl---of 48
cap_task_setioprio---of 1
cap_task_setnice---of 1
cap_task_setscheduler---of 1
cap_vm_enough_memory34%of 9
get_vfs_caps_from_disk---of 21
-----------
SUMMARY40%of 20

__scsi_init_queue---of 10
__scsi_queue_insert---of 5
device_quiesce_fn---of 1
device_resume_fn---of 5
device_unblock---of 1
scsi_alloc_request---of 3
scsi_alloc_sgtables---of 31
scsi_block_requests---of 1
scsi_block_targets---of 3
scsi_build_sense---of 1
scsi_cleanup_rq---of 10
scsi_cmd_runtime_exceeced---of 4
scsi_commit_rqs---of 1
scsi_complete---of 9
scsi_dec_host_busy---of 17
scsi_device_block---of 9
scsi_device_from_queue---of 5
scsi_device_quiesce---of 9
scsi_device_resume---of 5
scsi_device_set_state---of 12
scsi_device_unbusy---of 9
scsi_device_unbusy.cold---of 2
scsi_done---of 1
scsi_done_direct---of 1
scsi_done_internal---of 23
scsi_end_request---of 43
scsi_evt_thread---of 23
scsi_execute_cmd---of 56
scsi_exit_queue---of 1
scsi_failures_reset_retries---of 2
scsi_free_sgtables---of 5
scsi_host_block---of 11
scsi_host_unblock---of 7
scsi_init_command---of 4
scsi_init_hctx---of 1
scsi_init_sense_cache---of 3
scsi_initialize_rq---of 1
scsi_internal_device_block_nowait---of 8
scsi_internal_device_unblock_nowait---of 10
scsi_io_completion---of 92
scsi_kick_sdev_queue---of 2
scsi_kmap_atomic_sg---of 15
scsi_kunmap_atomic_sg---of 2
scsi_map_queues---of 3
scsi_mode_select---of 15
scsi_mode_sense---of 30
scsi_mq_exit_request67%of 3
scsi_mq_free_tags100%of 1
scsi_mq_get_budget---of 14
scsi_mq_get_budget.cold---of 2
scsi_mq_get_rq_budget_token---of 1
scsi_mq_init_request---of 7
scsi_mq_lld_busy---of 6
scsi_mq_poll---of 3
scsi_mq_put_budget---of 6
scsi_mq_put_budget.cold---of 2
scsi_mq_requeue_cmd---of 13
scsi_mq_set_rq_budget_token---of 1
scsi_mq_setup_tags---of 9
scsi_queue_insert---of 3
scsi_queue_rq---of 150
scsi_queue_rq.cold---of 2
scsi_requeue_run_queue---of 1
scsi_result_to_blk_status---of 9
scsi_run_host_queues---of 2
scsi_run_queue---of 27
scsi_run_queue_async---of 8
scsi_set_blocked.isra.0---of 4
scsi_start_queue---of 2
scsi_target_quiesce---of 1
scsi_target_resume---of 1
scsi_target_unblock---of 4
scsi_test_unit_ready---of 15
scsi_unblock_requests---of 2
scsi_vpd_lun_id---of 66
scsi_vpd_tpg_id---of 34
sdev_disable_disk_events---of 1
sdev_enable_disk_events---of 3
sdev_evt_alloc---of 6
sdev_evt_send---of 3
sdev_evt_send_simple---of 6
target_block---of 4
target_unblock---of 4
-----------
SUMMARY75%of 4

__ref_tracker_dir_pr_ostream---of 36
ref_tracker_alloc43%of 26
ref_tracker_dir_exit---of 16
ref_tracker_dir_print---of 1
ref_tracker_dir_print_locked---of 1
ref_tracker_dir_snprint---of 1
ref_tracker_free48%of 25
-----------
SUMMARY46%of 51

device_cmp---of 9
inet_cmp---of 10
iterate_cleanup_work---of 6
masq_device_event100%of 4
masq_inet6_event100%of 4
masq_inet_event---of 5
nf_nat_masq_schedule.part.061%of 23
nf_nat_masquerade_inet_register_notifiers---of 12
nf_nat_masquerade_inet_unregister_notifiers---of 3
nf_nat_masquerade_ipv4---of 20
nf_nat_masquerade_ipv6---of 8
-----------
SUMMARY71%of 31

__dev_printk91%of 11
__device_link_del---of 9
__device_links_no_driver17%of 12
__device_links_queue_sync_state36%of 14
__device_links_supplier_defer_sync---of 8
__fw_devlink_link_to_consumers.isra.0---of 17
__fw_devlink_link_to_suppliers---of 7
__fw_devlink_pickup_dangling_consumers---of 6
__fw_devlink_relax_cycles---of 45
__fwnode_link_add---of 12
__fwnode_link_cycle---of 3
__fwnode_link_del---of 7
__root_device_register---of 12
_dev_alert---of 1
_dev_crit---of 1
_dev_emerg---of 1
_dev_err100%of 1
_dev_info100%of 1
_dev_notice100%of 1
_dev_printk---of 1
_dev_warn100%of 1
auto_remove_on_show---of 3
class_dir_child_ns_type100%of 1
class_dir_release100%of 1
cleanup_glue_dir92%of 12
dev_attr_show---of 4
dev_attr_store---of 3
dev_driver_string43%of 7
dev_err_probe---of 5
dev_printk_emit100%of 1
dev_set_name100%of 1
dev_show---of 1
dev_uevent63%of 37
dev_uevent_filter100%of 5
dev_uevent_name100%of 6
dev_vprintk_emit---of 1
device_add35%of 126
device_add_groups100%of 1
device_change_owner---of 22
device_check_offline---of 13
device_create---of 1
device_create_bin_file---of 3
device_create_file78%of 9
device_create_groups_vargs---of 7
device_create_release34%of 6
device_create_with_groups---of 1
device_del63%of 37
device_destroy100%of 3
device_find_any_child---of 7
device_find_child---of 8
device_find_child_by_name---of 10
device_for_each_child100%of 7
device_for_each_child_reverse75%of 8
device_get_devnode84%of 12
device_get_ownership34%of 3
device_initialize100%of 3
device_is_dependent---of 19
device_link_add---of 117
device_link_del---of 1
device_link_drop_managed---of 5
device_link_init_status.isra.0---of 4
device_link_put_kref---of 9
device_link_release_fn---of 9
device_link_remove---of 7
device_link_wait_removal---of 1
device_links_busy34%of 6
device_links_check_suppliers13%of 39
device_links_driver_bound21%of 39
device_links_driver_cleanup31%of 13
device_links_flush_sync_list6%of 18
device_links_force_bind---of 7
device_links_no_driver---of 6
device_links_read_lock100%of 1
device_links_read_lock_held---of 3
device_links_read_unlock67%of 3
device_links_supplier_sync_state_pause---of 1
device_links_supplier_sync_state_resume---of 8
device_links_unbind_consumers---of 11
device_match_acpi_dev---of 3
device_match_acpi_handle---of 4
device_match_any---of 1
device_match_devt100%of 1
device_match_fwnode---of 1
device_match_name---of 3
device_match_of_node---of 1
device_move---of 73
device_namespace---of 5
device_offline---of 17
device_online---of 7
device_pm_move_to_tail---of 3
device_register---of 1
device_release70%of 10
device_remove_attrs89%of 9
device_remove_bin_file---of 2
device_remove_class_symlinks90%of 10
device_remove_file100%of 2
device_remove_file_self---of 3
device_remove_groups100%of 1
device_rename---of 13
device_reorder_to_tail---of 15
device_set_node---of 5
device_set_of_node_from_dev---of 1
device_show_bool---of 1
device_show_int---of 1
device_show_ulong---of 1
device_shutdown---of 31
device_store_bool---of 3
device_store_int---of 5
device_store_ulong---of 4
device_unregister50%of 6
devices_kset_move_last---of 10
devlink_add_symlinks---of 40
devlink_dev_release---of 1
devlink_remove_symlinks---of 29
devm_attr_group_remove---of 3
devm_attr_groups_remove---of 3
devm_device_add_group---of 6
devm_device_add_groups---of 6
fw_devlink_create_devlink---of 61
fw_devlink_dev_sync_state---of 16
fw_devlink_drivers_done---of 1
fw_devlink_is_strict---of 3
fw_devlink_no_driver---of 4
fw_devlink_parse_fwtree---of 9
fw_devlink_probing_done---of 1
fw_devlink_purge_absent_suppliers---of 2
fw_devlink_purge_absent_suppliers.part.0---of 6
fw_devlink_relax_link---of 6
fwnode_get_next_parent_dev---of 6
fwnode_link_add---of 1
fwnode_links_purge---of 5
get_device100%of 3
get_device_parent17%of 24
kill_device84%of 6
klist_children_get---of 2
klist_children_put100%of 2
lock_device_hotplug---of 1
lock_device_hotplug_sysfs---of 3
online_show---of 1
online_store---of 9
put_device100%of 2
removable_show---of 2
root_device_release---of 1
root_device_unregister---of 3
runtime_pm_show---of 1
set_dev_info83%of 17
set_primary_fwnode---of 14
set_secondary_fwnode---of 6
status_show---of 3
sync_state_only_show---of 1
sync_state_resume_initcall---of 1
uevent_show---of 16
uevent_store---of 5
unlock_device_hotplug---of 1
virtual_device_parent---of 4
waiting_for_supplier_show---of 7
-----------
SUMMARY49%of 539

-----------
SUMMARY---of 0

__ia32_sys_modify_ldt---of 10
__x64_sys_modify_ldt---of 10
alloc_ldt_struct---of 9
destroy_context_ldt34%of 6
flush_ldt---of 5
free_ldt_pgtables---of 3
ldt_arch_exit_mmap---of 1
ldt_dup_context---of 11
load_mm_ldt---of 12
map_ldt_struct---of 37
native_write_gdt_entry.constprop.0---of 1
read_default_ldt---of 5
read_ldt---of 8
switch_ldt---of 9
write_ldt---of 48
-----------
SUMMARY34%of 6

-----------
SUMMARY---of 0

tty_port_alloc_xmit_buf---of 5
tty_port_block_til_ready---of 28
tty_port_carrier_raised---of 4
tty_port_close---of 4
tty_port_close_end---of 5
tty_port_close_start---of 22
tty_port_default_lookahead_buf---of 5
tty_port_default_receive_buf---of 5
tty_port_default_wakeup---of 2
tty_port_destroy100%of 1
tty_port_free_xmit_buf---of 1
tty_port_hangup---of 3
tty_port_init---of 1
tty_port_install---of 1
tty_port_link_device---of 3
tty_port_lower_dtr_rts---of 2
tty_port_open---of 7
tty_port_put---of 11
tty_port_raise_dtr_rts---of 2
tty_port_register_device---of 4
tty_port_register_device_attr---of 4
tty_port_register_device_attr_serdev---of 4
tty_port_register_device_serdev---of 4
tty_port_shutdown---of 10
tty_port_tty_get29%of 7
tty_port_tty_hangup---of 5
tty_port_tty_set---of 7
tty_port_tty_wakeup---of 1
tty_port_unregister_device---of 1
-----------
SUMMARY38%of 8

ethnl_bcastmsg_put---of 1
ethnl_default_doit---of 34
ethnl_default_done---of 1
ethnl_default_dumpit---of 49
ethnl_default_notify---of 24
ethnl_default_parse---of 4
ethnl_default_set_doit---of 21
ethnl_default_start---of 14
ethnl_dump_put---of 1
ethnl_fill_reply_header---of 14
ethnl_init_reply_data.isra.0---of 1
ethnl_multicast---of 5
ethnl_netdev_event50%of 4
ethnl_ops_begin---of 11
ethnl_ops_complete---of 4
ethnl_parse_header_dev_get---of 29
ethnl_reply_init---of 9
ethtool_notify---of 11
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

delayedwork_callback---of 45
dj_put_receiver_dev58%of 14
logi_dj_dj_event.constprop.0---of 23
logi_dj_hidpp_event.constprop.0---of 47
logi_dj_ll_close---of 2
logi_dj_ll_may_wakeup---of 7
logi_dj_ll_open---of 4
logi_dj_ll_parse---of 42
logi_dj_ll_raw_request---of 16
logi_dj_ll_start---of 4
logi_dj_ll_stop---of 2
logi_dj_probe---of 62
logi_dj_raw_event---of 25
logi_dj_recv_forward_input_report---of 9
logi_dj_recv_forward_report.isra.0---of 3
logi_dj_recv_query_paired_devices---of 7
logi_dj_recv_queue_notification---of 8
logi_dj_recv_queue_unknown_work---of 3
logi_dj_recv_send_report.constprop.0.isra.0---of 5
logi_dj_recv_switch_to_dj_mode.constprop.0---of 5
logi_dj_remove70%of 10
logi_dj_reset_resume---of 5
logi_hidpp_dev_conn_notif_equad---of 7
rdcat---of 1
-----------
SUMMARY63%of 24

-----------
SUMMARY---of 0

drm_mode_config_cleanup---of 25
drm_mode_config_init_release---of 1
drm_mode_config_reset---of 18
drm_mode_config_validate---of 69
drm_mode_config_validate.cold---of 16
drm_mode_getresources---of 30
drm_modeset_register_all---of 9
drm_modeset_unregister_all100%of 1
drmm_mode_config_init---of 43
-----------
SUMMARY100%of 1

__pte_offset_map48%of 19
__pte_offset_map_lock54%of 15
pgd_clear_bad---of 1
pmd_clear_bad---of 1
pte_offset_map_nolock86%of 7
ptep_clear_flush72%of 7
pud_clear_bad---of 1
-----------
SUMMARY59%of 48

netdev_genl_dev_notify37%of 11
netdev_genl_netdevice_event50%of 8
netdev_nl_dev_fill---of 23
netdev_nl_dev_get_doit---of 11
netdev_nl_dev_get_dumpit---of 4
netdev_nl_napi_fill_one---of 17
netdev_nl_napi_get_doit---of 11
netdev_nl_napi_get_dumpit---of 23
netdev_nl_qstats_get_dumpit---of 53
netdev_nl_queue_dump_one---of 8
netdev_nl_queue_fill_one---of 17
netdev_nl_queue_get_doit---of 22
netdev_nl_queue_get_dumpit---of 9
netdev_nl_stats_queue---of 19
netdev_nl_stats_write_rx---of 16
netdev_nl_stats_write_tx---of 11
-----------
SUMMARY43%of 19

tomoyo_bprm_check_security---of 7
tomoyo_bprm_committed_creds---of 1
tomoyo_cred_prepare---of 5
tomoyo_domain50%of 4
tomoyo_file_fcntl---of 5
tomoyo_file_ioctl100%of 1
tomoyo_file_open100%of 3
tomoyo_file_truncate100%of 1
tomoyo_inode_getattr---of 1
tomoyo_path_chmod---of 1
tomoyo_path_chown---of 5
tomoyo_path_chroot---of 1
tomoyo_path_link---of 1
tomoyo_path_mkdir---of 1
tomoyo_path_mknod80%of 5
tomoyo_path_rename---of 4
tomoyo_path_rmdir---of 1
tomoyo_path_symlink---of 1
tomoyo_path_truncate---of 1
tomoyo_path_unlink---of 1
tomoyo_sb_mount---of 1
tomoyo_sb_pivotroot---of 1
tomoyo_sb_umount---of 1
tomoyo_socket_bind---of 1
tomoyo_socket_connect---of 1
tomoyo_socket_listen---of 1
tomoyo_socket_sendmsg---of 1
tomoyo_task_alloc---of 1
tomoyo_task_free---of 4
-----------
SUMMARY79%of 14

__orc_find100%of 9
__read_once_word_nocheck100%of 1
__unwind_start60%of 22
orc_sort_cmp---of 5
orc_sort_swap---of 1
stack_access_ok100%of 12
unwind_dump---of 8
unwind_get_return_address100%of 5
unwind_get_return_address_ptr---of 8
unwind_module_init---of 4
unwind_next_frame38%of 139
-----------
SUMMARY49%of 188

__pm_runtime_barrier100%of 12
__pm_runtime_disable69%of 16
__pm_runtime_idle100%of 9
__pm_runtime_resume100%of 7
__pm_runtime_set_status75%of 39
__pm_runtime_suspend100%of 9
__pm_runtime_use_autosuspend100%of 1
__rpm_callback31%of 26
__rpm_get_callback72%of 14
__rpm_put_suppliers10%of 10
__update_runtime_status53%of 21
dev_memalloc_noio100%of 1
devm_pm_runtime_enable---of 3
pm_runtime_active_time---of 8
pm_runtime_allow---of 6
pm_runtime_autosuspend_expiration---of 4
pm_runtime_barrier72%of 7
pm_runtime_disable_action---of 1
pm_runtime_drop_link---of 4
pm_runtime_enable50%of 8
pm_runtime_forbid100%of 3
pm_runtime_force_resume---of 7
pm_runtime_force_suspend---of 11
pm_runtime_get_conditional---of 9
pm_runtime_get_if_active---of 1
pm_runtime_get_if_in_use---of 1
pm_runtime_get_suppliers40%of 5
pm_runtime_init100%of 1
pm_runtime_irq_safe---of 3
pm_runtime_new_link---of 1
pm_runtime_no_callbacks---of 2
pm_runtime_put_suppliers40%of 5
pm_runtime_reinit50%of 6
pm_runtime_release_supplier---of 6
pm_runtime_remove67%of 6
pm_runtime_set_autosuspend_delay100%of 1
pm_runtime_set_memalloc_noio75%of 8
pm_runtime_suspended_time---of 8
pm_runtime_work---of 7
pm_schedule_suspend---of 7
pm_suspend_timer_fn---of 5
rpm_callback80%of 5
rpm_check_suspend_allowed92%of 12
rpm_drop_usage_count67%of 3
rpm_get_suppliers14%of 15
rpm_idle68%of 40
rpm_resume68%of 78
rpm_suspend51%of 75
trace_rpm_return_int34%of 15
trace_rpm_usage34%of 15
update_autosuspend40%of 10
-----------
SUMMARY61%of 483

__snd_seq_driver_register---of 6
autoload_drivers---of 3
print_dev_info---of 2
request_seq_drv---of 4
snd_seq_autoload_exit---of 1
snd_seq_autoload_init---of 1
snd_seq_bus_match---of 3
snd_seq_dev_release100%of 1
snd_seq_device_dev_disconnect100%of 1
snd_seq_device_dev_free100%of 3
snd_seq_device_dev_register---of 4
snd_seq_device_info---of 1
snd_seq_device_load_drivers---of 1
snd_seq_device_new---of 10
snd_seq_driver_unregister---of 1
-----------
SUMMARY100%of 5

-----------
SUMMARY---of 0

atusb_channel---of 3
atusb_disconnect72%of 7
atusb_ed---of 3
atusb_in---of 32
atusb_probe---of 57
atusb_set_cca_ed_level---of 6
atusb_set_cca_mode---of 5
atusb_set_channel---of 1
atusb_set_csma_params---of 4
atusb_set_frame_retries---of 1
atusb_set_hw_addr_filt---of 13
atusb_set_promiscuous_mode---of 6
atusb_set_txpower---of 6
atusb_start---of 6
atusb_stop---of 3
atusb_txpower---of 3
atusb_work_urbs---of 13
atusb_write_subreg.isra.0---of 7
atusb_xmit---of 5
atusb_xmit_complete---of 2
hulusb_set_channel---of 15
hulusb_set_lbt---of 1
hulusb_set_txpower---of 6
-----------
SUMMARY72%of 7

lg_event---of 6
lg_input_mapped---of 16
lg_input_mapping---of 190
lg_probe---of 36
lg_raw_event---of 3
lg_remove67%of 3
lg_report_fixup---of 37
lg_ultrax_remote_mapping---of 72
-----------
SUMMARY67%of 3

__input_release_device73%of 11
__input_unregister_device91%of 11
devm_input_allocate_device---of 5
devm_input_device_match---of 1
devm_input_device_release---of 6
devm_input_device_unregister40%of 5
inhibited_show---of 1
inhibited_store---of 20
input_add_uevent_bm_var100%of 4
input_alloc_absinfo---of 5
input_allocate_device---of 3
input_attach_handler.isra.0---of 12
input_bits_to_string36%of 17
input_close_device89%of 9
input_copy_abs---of 6
input_default_getkeycode86%of 14
input_default_setkeycode85%of 26
input_dev_freeze---of 3
input_dev_poweroff---of 1
input_dev_release100%of 1
input_dev_release_keys88%of 8
input_dev_resume---of 1
input_dev_show_cap_abs---of 1
input_dev_show_cap_ev---of 1
input_dev_show_cap_ff---of 1
input_dev_show_cap_key---of 1
input_dev_show_cap_led---of 1
input_dev_show_cap_msc---of 1
input_dev_show_cap_rel---of 1
input_dev_show_cap_snd---of 1
input_dev_show_cap_sw---of 1
input_dev_show_id_bustype---of 1
input_dev_show_id_product---of 1
input_dev_show_id_vendor---of 1
input_dev_show_id_version---of 1
input_dev_show_modalias---of 1
input_dev_show_name---of 2
input_dev_show_phys---of 2
input_dev_show_properties---of 1
input_dev_show_uniq---of 2
input_dev_suspend---of 3
input_dev_toggle---of 30
input_dev_uevent94%of 29
input_device_enabled---of 6
input_devices_seq_next---of 1
input_devices_seq_show---of 61
input_devices_seq_start---of 5
input_devnode100%of 3
input_enable_softrepeat---of 1
input_event100%of 3
input_event_dispose93%of 14
input_flush_device100%of 5
input_free_device20%of 5
input_free_minor100%of 1
input_get_keycode100%of 1
input_get_new_minor---of 5
input_get_timestamp100%of 3
input_grab_device100%of 5
input_handle_event42%of 62
input_handler_for_each_handle---of 14
input_handlers_seq_next---of 1
input_handlers_seq_show---of 5
input_handlers_seq_start---of 5
input_inject_event58%of 21
input_match_device_id---of 19
input_open_device91%of 11
input_pass_values74%of 38
input_print_bitmap90%of 10
input_print_modalias93%of 39
input_proc_devices_open---of 1
input_proc_devices_poll---of 6
input_proc_handlers_open---of 1
input_register_device---of 69
input_register_handle---of 11
input_register_handler---of 7
input_release_device100%of 1
input_repeat_key---of 7
input_reset_device---of 3
input_scancode_to_scalar67%of 6
input_seq_stop---of 2
input_set_abs_params---of 2
input_set_capability---of 15
input_set_keycode59%of 17
input_set_timestamp---of 1
input_to_handler50%of 18
input_unregister_device50%of 6
input_unregister_handle100%of 5
input_unregister_handler---of 7
-----------
SUMMARY73%of 409

as102_dvb_dmx_start_feed---of 12
as102_dvb_dmx_stop_feed---of 15
as102_dvb_register---of 23
as102_dvb_unregister100%of 1
as102_get_stats---of 8
as102_get_status---of 6
as102_get_tps---of 3
as102_set_tune---of 6
as102_stream_ctrl---of 8
as10x_pid_filter.isra.0---of 12
-----------
SUMMARY100%of 1

child_iter---of 3
period_to_str---of 8
show_spi_host_hba_id---of 5
show_spi_host_signalling---of 12
show_spi_host_width---of 6
show_spi_transport_dt---of 7
show_spi_transport_hold_mcs---of 7
show_spi_transport_iu---of 7
show_spi_transport_max_iu---of 1
show_spi_transport_max_offset---of 1
show_spi_transport_max_qas---of 1
show_spi_transport_max_width---of 1
show_spi_transport_min_period---of 13
show_spi_transport_offset---of 7
show_spi_transport_pcomp_en---of 7
show_spi_transport_period---of 13
show_spi_transport_qas---of 7
show_spi_transport_rd_strm---of 7
show_spi_transport_rti---of 7
show_spi_transport_width---of 7
show_spi_transport_wr_flow---of 7
spi_attach_transport---of 3
spi_device_configure---of 9
spi_device_match29%of 7
spi_display_xfer_agreement---of 21
spi_dv_device---of 76
spi_dv_device_compare_inquiry---of 12
spi_dv_device_echo_buffer---of 27
spi_dv_device_work_wrapper---of 1
spi_dv_retrain---of 26
spi_execute---of 1
spi_host_configure---of 7
spi_host_match---of 9
spi_host_setup---of 4
spi_populate_ppr_msg---of 1
spi_populate_sync_msg---of 1
spi_populate_tag_msg---of 4
spi_populate_width_msg---of 1
spi_print_msg---of 30
spi_print_msg.cold---of 2
spi_release_transport---of 5
spi_schedule_dv_device---of 6
spi_setup_transport_attrs---of 1
spi_target_configure---of 1
spi_target_match---of 12
sprint_frac.constprop.0---of 5
store_spi_host_signalling---of 17
store_spi_revalidate---of 1
store_spi_transport_dt---of 7
store_spi_transport_hold_mcs---of 7
store_spi_transport_iu---of 7
store_spi_transport_max_iu---of 1
store_spi_transport_max_offset---of 1
store_spi_transport_max_qas---of 1
store_spi_transport_max_width---of 1
store_spi_transport_min_period---of 1
store_spi_transport_offset---of 7
store_spi_transport_pcomp_en---of 7
store_spi_transport_period---of 9
store_spi_transport_period_helper.constprop.0---of 9
store_spi_transport_qas---of 7
store_spi_transport_rd_strm---of 7
store_spi_transport_rti---of 7
store_spi_transport_width---of 7
store_spi_transport_wr_flow---of 7
target_attribute_is_visible---of 74
-----------
SUMMARY29%of 7

-----------
SUMMARY---of 0

kobil_close---of 1
kobil_init_termios---of 1
kobil_ioctl---of 7
kobil_open---of 24
kobil_port_probe---of 12
kobil_port_remove100%of 1
kobil_read_int_callback---of 10
kobil_set_termios---of 12
kobil_tiocmget---of 12
kobil_tiocmset---of 25
kobil_write---of 28
kobil_write_room---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

tick_init_highres---of 1
tick_oneshot_mode_active---of 8
tick_program_event58%of 7
tick_resume_oneshot---of 1
tick_setup_oneshot---of 1
tick_switch_to_oneshot---of 10
-----------
SUMMARY58%of 7

-----------
SUMMARY---of 0

check_access_path---of 2
collect_domain_accesses---of 15
current_check_refer_path---of 22
find_rule---of 18
get_current_fs_domain40%of 5
get_mode_access---of 4
hook_file_alloc_security100%of 1
hook_file_open9%of 23
hook_file_truncate100%of 2
hook_inode_free_security50%of 2
hook_move_mount---of 1
hook_path_link---of 1
hook_path_mkdir---of 3
hook_path_mknod67%of 3
hook_path_rename---of 1
hook_path_rmdir---of 3
hook_path_symlink---of 3
hook_path_truncate---of 3
hook_path_unlink---of 3
hook_sb_delete---of 47
hook_sb_mount---of 1
hook_sb_pivotroot---of 1
hook_sb_remount---of 1
hook_sb_umount---of 1
is_access_to_paths_allowed---of 42
is_eacces---of 9
landlock_append_fs_rule---of 50
release_inode---of 4
scope_to_request---of 10
-----------
SUMMARY28%of 36

netconsole_netdev_event29%of 21
write_ext_msg---of 35
write_msg17%of 12
-----------
SUMMARY25%of 33

-----------
SUMMARY---of 0

dvb_usb_nec_rc_key_to_event---of 10
dvb_usb_read_remote_control---of 5
dvb_usb_remote_exit100%of 5
dvb_usb_remote_init---of 29
legacy_dvb_usb_get_keymap_index62%of 13
legacy_dvb_usb_getkeycode100%of 4
legacy_dvb_usb_read_remote_control---of 8
legacy_dvb_usb_setkeycode25%of 8
-----------
SUMMARY64%of 30

alloc_sglist.isra.0---of 22
complicated_callback---of 26
ctrl_complete---of 33
ctrl_out---of 26
free_sglist.part.0---of 7
is_good_config---of 13
perform_sglist---of 12
sg_timeout---of 1
simple_callback---of 1
simple_check_buf---of 16
simple_fill_buf---of 7
simple_free_urb---of 4
simple_io---of 22
test_ctrl_queue---of 43
test_halt.part.0---of 14
test_queue---of 63
test_queue.cold---of 3
test_unaligned_bulk---of 3
unlink1---of 18
unlink1_callback---of 3
unlink_queued---of 27
unlink_queued_callback---of 8
usbtest_alloc_urb---of 23
usbtest_disconnect67%of 3
usbtest_do_ioctl---of 216
usbtest_ioctl---of 15
usbtest_probe---of 81
usbtest_resume---of 1
usbtest_suspend---of 1
verify_not_halted---of 6
-----------
SUMMARY67%of 3

__ieee80211_create_tpt_led_trigger---of 7
__ieee80211_get_assoc_led_name---of 1
__ieee80211_get_radio_led_name---of 1
__ieee80211_get_rx_led_name---of 1
__ieee80211_get_tx_led_name---of 1
ieee80211_alloc_led_names---of 9
ieee80211_assoc_led_activate---of 1
ieee80211_assoc_led_deactivate---of 1
ieee80211_free_led_names100%of 1
ieee80211_led_assoc---of 4
ieee80211_led_exit---of 10
ieee80211_led_init---of 15
ieee80211_led_radio---of 4
ieee80211_mod_tpt_led_trig---of 9
ieee80211_radio_led_activate---of 1
ieee80211_radio_led_deactivate---of 1
ieee80211_rx_led_activate---of 1
ieee80211_rx_led_deactivate---of 1
ieee80211_tpt_led_activate---of 1
ieee80211_tpt_led_deactivate---of 1
ieee80211_tx_led_activate---of 1
ieee80211_tx_led_deactivate---of 1
tpt_trig_timer---of 8
-----------
SUMMARY100%of 1

__rtnl_link_register---of 12
__rtnl_link_unregister---of 9
__rtnl_newlink---of 115
__rtnl_unlock---of 4
do_set_master---of 10
do_setlink---of 275
if_nlmsg_size27%of 53
lockdep_rtnl_is_held100%of 1
ndo_dflt_bridge_getlink---of 76
ndo_dflt_fdb_add---of 14
ndo_dflt_fdb_del---of 8
ndo_dflt_fdb_dump---of 5
netdev_set_operstate---of 5
nla_put_ifalias29%of 7
nlmsg_populate_fdb---of 7
nlmsg_populate_fdb_fill.constprop.0---of 11
nlmsg_trim---of 4
refcount_dec_and_rtnl_lock---of 1
rtmsg_ifinfo80%of 5
rtmsg_ifinfo_build_skb40%of 10
rtmsg_ifinfo_newnet---of 3
rtmsg_ifinfo_send67%of 3
rtnetlink_bind---of 4
rtnetlink_event29%of 7
rtnetlink_net_exit---of 1
rtnetlink_net_init---of 3
rtnetlink_put_metrics8%of 25
rtnetlink_rcv---of 1
rtnetlink_rcv_msg---of 72
rtnetlink_send---of 1
rtnl_af_lookup---of 8
rtnl_af_register---of 3
rtnl_af_unregister---of 3
rtnl_bridge_dellink---of 28
rtnl_bridge_getlink---of 32
rtnl_bridge_notify---of 13
rtnl_bridge_setlink---of 30
rtnl_calcit.isra.0---of 21
rtnl_configure_link---of 10
rtnl_create_link---of 57
rtnl_delete_link---of 4
rtnl_dellink---of 51
rtnl_dellinkprop---of 1
rtnl_dev_get---of 6
rtnl_dump_all---of 29
rtnl_dump_ifinfo---of 68
rtnl_ensure_unique_netns---of 12
rtnl_fdb_add---of 37
rtnl_fdb_del---of 52
rtnl_fdb_dump---of 38
rtnl_fdb_get---of 68
rtnl_fdb_notify---of 5
rtnl_fill_ifinfo44%of 266
rtnl_fill_stats100%of 4
rtnl_fill_statsinfo.constprop.0---of 146
rtnl_fill_vf18%of 17
rtnl_fill_vfinfo---of 44
rtnl_get_link---of 21
rtnl_get_net_ns_capable---of 9
rtnl_getlink---of 49
rtnl_is_locked100%of 1
rtnl_kfree_skbs---of 3
rtnl_link_get_net---of 11
rtnl_link_get_net_capable.constprop.0---of 18
rtnl_link_register---of 6
rtnl_link_unregister---of 4
rtnl_linkprop.isra.0---of 35
rtnl_lock100%of 1
rtnl_lock_killable---of 1
rtnl_mdb_add---of 19
rtnl_mdb_del---of 27
rtnl_mdb_dump---of 19
rtnl_mdb_get---of 19
rtnl_newlink---of 3
rtnl_newlinkprop---of 1
rtnl_nla_parse_ifinfomsg---of 5
rtnl_notify67%of 3
rtnl_offload_xstats_get_size---of 14
rtnl_offload_xstats_notify---of 9
rtnl_prop_list_size39%of 18
rtnl_put_cacheinfo30%of 10
rtnl_register---of 2
rtnl_register_internal---of 39
rtnl_register_module---of 1
rtnl_set_sk_err---of 1
rtnl_setlink---of 16
rtnl_stats_dump---of 24
rtnl_stats_get---of 42
rtnl_stats_get_parse---of 19
rtnl_stats_set---of 21
rtnl_trylock---of 1
rtnl_unicast---of 1
rtnl_unlock---of 1
rtnl_unregister---of 19
rtnl_unregister_all---of 18
rtnl_valid_stats_req---of 13
rtnl_validate_mdb_entry---of 31
rtnl_validate_mdb_entry_del_bulk---of 16
rtnl_validate_mdb_entry_get---of 20
set_operstate---of 11
valid_bridge_getlink_req.constprop.0---of 25
valid_fdb_dump_legacy.constprop.0---of 10
valid_fdb_dump_strict.constprop.0---of 25
validate_linkmsg---of 36
-----------
SUMMARY39%of 431

-----------
SUMMARY---of 0

__create_xol_area---of 28
__replace_page---of 77
__update_ref_ctr---of 21
__uprobe_register---of 36
__uprobe_unregister---of 12
arch_uprobe_copy_ixol---of 1
arch_uprobe_ignore---of 1
const_folio_flags.constprop.0---of 10
copy_from_page---of 2
copy_to_page---of 2
delayed_uprobe_delete---of 5
dup_xol_work---of 5
filter_chain---of 5
find_uprobe---of 16
folio_flags.constprop.0---of 10
install_breakpoint.part.0---of 7
is_swbp_insn---of 1
is_trap_insn---of 1
prepare_uprobe---of 31
put_uprobe---of 10
register_for_each_vma---of 62
set_orig_insn---of 1
set_swbp---of 1
update_ref_ctr---of 37
uprobe_apply---of 12
uprobe_clear_state---of 18
uprobe_copy_process---of 20
uprobe_deny_signal23%of 9
uprobe_dup_mmap---of 2
uprobe_end_dup_mmap---of 10
uprobe_free_utask---of 6
uprobe_get_swbp_addr---of 1
uprobe_get_trap_addr---of 5
uprobe_mmap---of 63
uprobe_munmap---of 19
uprobe_notify_resume---of 169
uprobe_post_sstep_notifier---of 6
uprobe_pre_sstep_notifier---of 7
uprobe_register---of 1
uprobe_register_refctr---of 1
uprobe_start_dup_mmap---of 10
uprobe_unregister---of 3
uprobe_write_opcode---of 123
xol_free_insn_slot---of 10
-----------
SUMMARY23%of 9

-----------
SUMMARY---of 0

__bpf_trace_rseq_ip_fixup---of 1
__bpf_trace_rseq_update---of 1
__do_sys_rseq---of 23
__ia32_sys_rseq---of 1
__rseq_handle_notify_resume26%of 83
__traceiter_rseq_ip_fixup---of 3
__traceiter_rseq_update---of 3
__x64_sys_rseq---of 1
perf_trace_rseq_ip_fixup---of 5
perf_trace_rseq_update---of 5
rseq_warn_flags.part.0---of 7
trace_event_raw_event_rseq_ip_fixup---of 6
trace_event_raw_event_rseq_update---of 6
trace_raw_output_rseq_ip_fixup---of 5
trace_raw_output_rseq_update---of 5
-----------
SUMMARY26%of 83

-----------
SUMMARY---of 0

hiddev_connect---of 17
hiddev_devnode100%of 3
hiddev_disconnect100%of 3
hiddev_fasync---of 1
hiddev_hid_event---of 4
hiddev_ioctl51%of 57
hiddev_ioctl_string.constprop.0.isra.050%of 10
hiddev_ioctl_usage.isra.056%of 56
hiddev_lookup_report67%of 9
hiddev_open79%of 19
hiddev_poll---of 7
hiddev_read10%of 33
hiddev_release---of 10
hiddev_report_event---of 4
hiddev_send_event.isra.0---of 6
hiddev_write---of 1
-----------
SUMMARY50%of 190

speed_show---of 1
speed_store---of 14
tv_disconnect100%of 1
tv_probe---of 4
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__devinet_sysctl_register---of 7
__inet_del_ifa---of 62
__inet_insert_ifa---of 40
__ip_dev_find---of 31
check_lifetime---of 55
confirm_addr_indev---of 45
devinet_conf_proc---of 53
devinet_exit_net---of 5
devinet_init_net---of 21
devinet_ioctl---of 128
devinet_ioctl.cold---of 1
devinet_sysctl_forward---of 25
devinet_sysctl_register---of 6
in_dev_dump_addr---of 21
in_dev_finish_destroy60%of 10
in_dev_free_rcu---of 1
inet_addr_onlink---of 34
inet_confirm_addr---of 23
inet_dump_ifaddr---of 40
inet_fill_ifaddr---of 36
inet_fill_link_af60%of 10
inet_get_link_af_size50%of 8
inet_gifconf---of 30
inet_hash_remove---of 7
inet_ifa_byprefix---of 18
inet_lookup_ifaddr_rcu---of 5
inet_netconf_dump_devconf---of 34
inet_netconf_fill_devconf19%of 37
inet_netconf_get_devconf---of 67
inet_netconf_notify_devconf59%of 17
inet_rcu_free_ifa---of 7
inet_rtm_deladdr---of 32
inet_rtm_newaddr---of 34
inet_select_addr---of 67
inet_set_link_af---of 14
inet_valid_dump_ifaddr_req.constprop.0---of 25
inet_validate_link_af---of 17
inetdev_by_index---of 18
inetdev_event18%of 104
inetdev_init---of 26
ip_mc_autojoin_config.isra.0---of 7
ipv4_doint_and_flush---of 4
register_inetaddr_notifier---of 1
register_inetaddr_validator_notifier---of 1
rtm_to_ifaddr.constprop.0---of 51
rtm_to_ifaddr.constprop.0.cold---of 1
rtmsg_ifa---of 10
set_ifa_lifetime---of 9
unregister_inetaddr_notifier---of 1
unregister_inetaddr_validator_notifier---of 1
-----------
SUMMARY28%of 186

-----------
SUMMARY---of 0

usb_of_get_connect_type---of 12
usb_of_get_device_node40%of 5
usb_of_get_interface_node---of 6
usb_of_has_combined_node---of 6
-----------
SUMMARY40%of 5

event_input_timer---of 1
snd_seq_system_broadcast100%of 3
snd_seq_system_client_done---of 2
snd_seq_system_notify---of 1
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

update_vsyscall95%of 18
update_vsyscall.cold---of 10
update_vsyscall_tz---of 1
vdso_update_begin---of 1
vdso_update_end---of 1
-----------
SUMMARY95%of 18

-----------
SUMMARY---of 0

__usb_unanchor_urb100%of 7
usb_alloc_urb100%of 5
usb_anchor_empty---of 1
usb_anchor_resume_wakeups---of 4
usb_anchor_suspend_wakeups---of 2
usb_anchor_urb86%of 7
usb_block_urb---of 2
usb_free_urb100%of 2
usb_free_urb.part.086%of 7
usb_get_from_anchor100%of 5
usb_get_urb100%of 3
usb_get_urb.part.060%of 5
usb_init_urb100%of 2
usb_kill_anchored_urbs74%of 19
usb_kill_urb100%of 4
usb_kill_urb.part.0100%of 6
usb_pipe_type_check---of 6
usb_poison_anchored_urbs32%of 16
usb_poison_urb100%of 2
usb_poison_urb.part.050%of 8
usb_scuttle_anchored_urbs72%of 7
usb_submit_urb57%of 86
usb_submit_urb.cold---of 1
usb_unanchor_urb100%of 5
usb_unlink_anchored_urbs---of 15
usb_unlink_urb---of 5
usb_unpoison_anchored_urbs---of 5
usb_unpoison_urb100%of 2
usb_urb_ep_type_check---of 6
usb_wait_anchor_empty_timeout100%of 14
-----------
SUMMARY71%of 212

bEndpointAddress_show---of 1
bInterval_show---of 1
bLength_show---of 1
bmAttributes_show---of 1
direction_show---of 3
ep_device_release100%of 1
interval_show---of 3
type_show---of 3
usb_create_ep_devs---of 8
usb_remove_ep_devs100%of 2
wMaxPacketSize_show---of 1
-----------
SUMMARY100%of 3

__v4l2_event_dequeue---of 10
__v4l2_event_queue_fh---of 27
__v4l2_event_unsubscribe---of 14
v4l2_event_dequeue---of 15
v4l2_event_pending---of 1
v4l2_event_queue---of 4
v4l2_event_queue_fh---of 1
v4l2_event_src_merge---of 1
v4l2_event_src_replace---of 1
v4l2_event_subdev_unsubscribe---of 5
v4l2_event_subscribe---of 15
v4l2_event_subscribed---of 7
v4l2_event_unsubscribe---of 5
v4l2_event_unsubscribe.part.0---of 6
v4l2_event_unsubscribe_all---of 9
v4l2_event_wake_all75%of 4
v4l2_src_change_event_subdev_subscribe---of 4
v4l2_src_change_event_subscribe---of 4
-----------
SUMMARY75%of 4

associate_remote_show---of 6
associate_remote_store---of 6
display_close---of 14
display_open---of 43
free_imon_context---of 8
imon_clock_show---of 7
imon_clock_store---of 23
imon_disconnect---of 19
imon_find_endpoints---of 44
imon_incoming_packet---of 161
imon_ir_change_protocol---of 24
imon_probe---of 111
imon_resume---of 10
imon_suspend---of 5
imon_touch_display_timeout---of 2
lcd_write---of 19
send_packet---of 18
stabilize---of 20
usb_rx_callback_intf058%of 7
usb_rx_callback_intf1---of 7
usb_tx_callback---of 3
vfd_write---of 26
-----------
SUMMARY58%of 7

-----------
SUMMARY---of 0

picolcd_cir_close---of 1
picolcd_cir_open---of 1
picolcd_exit_cir100%of 1
picolcd_init_cir---of 6
picolcd_raw_cir---of 14
-----------
SUMMARY100%of 1

__video_device_pipeline_start---of 3
__video_device_pipeline_stop---of 3
__video_register_device---of 380
dev_debug_show---of 1
dev_debug_store---of 4
index_show---of 1
name_show---of 1
v4l2_device_release50%of 10
v4l2_ioctl---of 4
v4l2_mmap---of 8
v4l2_open---of 14
v4l2_poll---of 10
v4l2_prio_change---of 6
v4l2_prio_check---of 5
v4l2_prio_close---of 2
v4l2_prio_init---of 1
v4l2_prio_max---of 4
v4l2_prio_open---of 4
v4l2_read---of 8
v4l2_release---of 11
v4l2_write---of 8
video_devdata---of 1
video_device_alloc---of 1
video_device_pipeline---of 5
video_device_pipeline_alloc_start---of 3
video_device_pipeline_start---of 3
video_device_pipeline_stop---of 3
video_device_release---of 1
video_unregister_device100%of 5
-----------
SUMMARY67%of 15

__media_device_register---of 9
__media_device_unregister_entity100%of 9
__media_device_usb_init---of 21
copy_arg_from_user---of 3
copy_arg_to_user---of 3
media_device_cleanup---of 1
media_device_close---of 1
media_device_compat_ioctl---of 10
media_device_enum_entities---of 21
media_device_enum_links---of 21
media_device_get_info---of 20
media_device_get_topology---of 37
media_device_init---of 13
media_device_ioctl---of 15
media_device_open---of 1
media_device_pci_init---of 16
media_device_register_entity---of 15
media_device_register_entity_notify---of 3
media_device_release---of 2
media_device_request_alloc---of 5
media_device_setup_link---of 19
media_device_unregister---of 15
media_device_unregister_entity100%of 2
media_device_unregister_entity_notify---of 3
model_show---of 1
-----------
SUMMARY100%of 11

-----------
SUMMARY---of 0

__bpf_trace_tmigr_connect_child_parent---of 1
__bpf_trace_tmigr_connect_cpu_parent---of 1
__bpf_trace_tmigr_cpugroup---of 1
__bpf_trace_tmigr_group_and_cpu---of 1
__bpf_trace_tmigr_group_set---of 1
__bpf_trace_tmigr_handle_remote---of 1
__bpf_trace_tmigr_idle---of 1
__bpf_trace_tmigr_update_events---of 1
__tmigr_cpu_activate---of 16
__traceiter_tmigr_connect_child_parent---of 3
__traceiter_tmigr_connect_cpu_parent---of 3
__traceiter_tmigr_cpu_active---of 3
__traceiter_tmigr_cpu_idle---of 3
__traceiter_tmigr_cpu_new_timer---of 3
__traceiter_tmigr_cpu_new_timer_idle---of 3
__traceiter_tmigr_cpu_offline---of 3
__traceiter_tmigr_cpu_online---of 3
__traceiter_tmigr_group_set---of 3
__traceiter_tmigr_group_set_cpu_active---of 3
__traceiter_tmigr_group_set_cpu_inactive---of 3
__traceiter_tmigr_handle_remote---of 3
__traceiter_tmigr_handle_remote_cpu---of 3
__traceiter_tmigr_update_events---of 3
perf_trace_tmigr_connect_child_parent---of 5
perf_trace_tmigr_connect_cpu_parent---of 5
perf_trace_tmigr_cpugroup---of 5
perf_trace_tmigr_group_and_cpu---of 5
perf_trace_tmigr_group_set---of 5
perf_trace_tmigr_handle_remote---of 5
perf_trace_tmigr_idle---of 5
perf_trace_tmigr_update_events---of 8
tmigr_active_up---of 23
tmigr_connect_child_parent---of 22
tmigr_connect_child_parent.cold---of 1
tmigr_cpu_activate---of 5
tmigr_cpu_deactivate---of 21
tmigr_cpu_new_timer---of 41
tmigr_cpu_offline---of 28
tmigr_cpu_online---of 98
tmigr_cpu_online.cold---of 1
tmigr_handle_remote---of 62
tmigr_inactive_up---of 40
tmigr_inactive_up.cold---of 1
tmigr_new_timer_up---of 1
tmigr_next_groupevt---of 9
tmigr_quick_check---of 13
tmigr_requires_handle_remote84%of 12
tmigr_trigger_active---of 4
tmigr_update_events---of 51
trace_event_raw_event_tmigr_connect_child_parent---of 6
trace_event_raw_event_tmigr_connect_cpu_parent---of 6
trace_event_raw_event_tmigr_cpugroup---of 6
trace_event_raw_event_tmigr_group_and_cpu---of 6
trace_event_raw_event_tmigr_group_set---of 6
trace_event_raw_event_tmigr_handle_remote---of 6
trace_event_raw_event_tmigr_idle---of 6
trace_event_raw_event_tmigr_update_events---of 9
trace_raw_output_tmigr_connect_child_parent---of 4
trace_raw_output_tmigr_connect_cpu_parent---of 4
trace_raw_output_tmigr_cpugroup---of 5
trace_raw_output_tmigr_group_and_cpu---of 4
trace_raw_output_tmigr_group_set---of 5
trace_raw_output_tmigr_handle_remote---of 5
trace_raw_output_tmigr_idle---of 5
trace_raw_output_tmigr_update_events---of 4
walk_groups---of 8
-----------
SUMMARY84%of 12

__alloc_percpu---of 1
__alloc_percpu_gfp---of 1
__alloc_reserved_percpu---of 1
__bpf_trace_percpu_alloc_percpu---of 1
__bpf_trace_percpu_alloc_percpu_fail---of 1
__bpf_trace_percpu_create_chunk---of 1
__bpf_trace_percpu_destroy_chunk---of 1
__bpf_trace_percpu_free_percpu---of 1
__is_kernel_percpu_address82%of 11
__pcpu_chunk_move---of 10
__traceiter_percpu_alloc_percpu---of 4
__traceiter_percpu_alloc_percpu_fail---of 4
__traceiter_percpu_create_chunk---of 3
__traceiter_percpu_destroy_chunk---of 3
__traceiter_percpu_free_percpu---of 3
free_percpu19%of 81
is_kernel_percpu_address100%of 1
pcpu_alloc---of 113
pcpu_alloc_area---of 25
pcpu_alloc_size---of 3
pcpu_balance_free---of 44
pcpu_balance_workfn---of 60
pcpu_block_refresh_hint---of 6
pcpu_block_update93%of 26
pcpu_block_update_hint_alloc---of 34
pcpu_chunk_addr_search100%of 8
pcpu_chunk_depopulated---of 6
pcpu_chunk_populated---of 6
pcpu_chunk_refresh_hint72%of 7
pcpu_chunk_relocate75%of 8
pcpu_chunk_slot72%of 7
pcpu_create_chunk---of 14
pcpu_depopulate_chunk---of 16
pcpu_dump_alloc_info---of 21
pcpu_dump_alloc_info.cold---of 1
pcpu_find_block_fit---of 14
pcpu_free_area84%of 30
pcpu_free_pages.constprop.0---of 10
pcpu_get_pages---of 7
pcpu_init_md_blocks---of 2
pcpu_mem_zalloc---of 8
pcpu_memcg_post_alloc_hook---of 38
pcpu_next_fit_region.constprop.0---of 17
pcpu_next_md_free_region100%of 11
pcpu_nr_pages---of 1
pcpu_populate_chunk---of 48
pcpu_post_unmap_tlb_flush---of 1
pcpu_reintegrate_chunk---of 5
per_cpu_ptr_to_phys---of 15
perf_trace_percpu_alloc_percpu---of 5
perf_trace_percpu_alloc_percpu_fail---of 5
perf_trace_percpu_create_chunk---of 5
perf_trace_percpu_destroy_chunk---of 5
perf_trace_percpu_free_percpu---of 5
trace_event_raw_event_percpu_alloc_percpu---of 6
trace_event_raw_event_percpu_alloc_percpu_fail---of 6
trace_event_raw_event_percpu_create_chunk---of 6
trace_event_raw_event_percpu_destroy_chunk---of 6
trace_event_raw_event_percpu_free_percpu---of 6
trace_percpu_create_chunk---of 15
trace_raw_output_percpu_alloc_percpu---of 6
trace_raw_output_percpu_alloc_percpu_fail---of 5
trace_raw_output_percpu_create_chunk---of 5
trace_raw_output_percpu_destroy_chunk---of 5
trace_raw_output_percpu_free_percpu---of 5
-----------
SUMMARY58%of 190

__shmem_file_setup---of 11
__shmem_get_inode50%of 32
const_folio_flags.constprop.040%of 10
folio_flags.constprop.040%of 10
percpu_ref_put_many.constprop.0---of 15
shmem_add_to_page_cache49%of 37
shmem_add_to_page_cache.cold---of 2
shmem_alloc_folio58%of 7
shmem_alloc_inode100%of 3
shmem_charge---of 3
shmem_create100%of 1
shmem_destroy_inode---of 4
shmem_encode_fh---of 7
shmem_error_remove_folio---of 1
shmem_evict_inode---of 27
shmem_fallocate68%of 46
shmem_fault---of 37
shmem_fh_to_dentry---of 7
shmem_file_llseek---of 7
shmem_file_open100%of 1
shmem_file_read_iter71%of 34
shmem_file_setup---of 1
shmem_file_setup_with_mnt---of 1
shmem_file_splice_read---of 29
shmem_file_write_iter100%of 5
shmem_fileattr_get100%of 1
shmem_fileattr_set100%of 13
shmem_fill_super---of 20
shmem_free_fc---of 4
shmem_free_in_core_inode---of 3
shmem_free_swap---of 3
shmem_get_folio---of 1
shmem_get_folio_gfp47%of 76
shmem_get_link---of 14
shmem_get_offset_ctx---of 1
shmem_get_parent---of 1
shmem_get_partial_folio46%of 11
shmem_get_policy---of 1
shmem_get_sbmpol40%of 5
shmem_get_tree---of 1
shmem_get_unmapped_area---of 5
shmem_getattr---of 11
shmem_init_fs_context---of 3
shmem_init_inode100%of 1
shmem_initxattrs---of 22
shmem_inode_acct_blocks59%of 12
shmem_inode_acct_blocks.cold---of 2
shmem_inode_unacct_blocks100%of 5
shmem_inode_unacct_blocks.cold---of 1
shmem_kernel_file_setup---of 1
shmem_link---of 15
shmem_listxattr---of 1
shmem_lock---of 8
shmem_mapping100%of 1
shmem_match---of 3
shmem_mkdir---of 3
shmem_mknod70%of 10
shmem_mmap---of 25
shmem_parse_one---of 60
shmem_parse_options---of 11
shmem_partial_swap_usage---of 31
shmem_put_link---of 4
shmem_put_super---of 3
shmem_read_folio_gfp---of 4
shmem_read_mapping_page_gfp---of 7
shmem_recalc_inode80%of 5
shmem_reconfigure---of 37
shmem_rename2---of 16
shmem_replace_entry---of 7
shmem_rmdir---of 3
shmem_set_policy---of 1
shmem_setattr52%of 49
shmem_show_options---of 20
shmem_statfs---of 5
shmem_swap_usage---of 6
shmem_swapin_folio---of 85
shmem_symlink---of 19
shmem_tmpfile---of 9
shmem_truncate_range---of 1
shmem_uncharge---of 3
shmem_undo_range40%of 82
shmem_undo_range.cold---of 1
shmem_unlink---of 5
shmem_unlock_mapping---of 8
shmem_unuse---of 16
shmem_unuse_inode---of 47
shmem_write_begin56%of 9
shmem_write_end64%of 25
shmem_write_end.cold---of 3
shmem_writepage---of 59
shmem_writepage.cold---of 1
shmem_xattr_handler_get100%of 1
shmem_xattr_handler_set---of 17
shmem_zero_setup---of 6
synchronous_wake_function---of 3
vma_is_anon_shmem---of 1
vma_is_shmem---of 3
xas_next_entry---of 16
zero_pipe_buf_get---of 1
zero_pipe_buf_try_steal---of 1
zero_user_segments.constprop.074%of 15
zero_user_segments.constprop.0.cold---of 2
-----------
SUMMARY56%of 507

__irq_work_queue_local14%of 22
irq_work_claim100%of 3
irq_work_needs_cpu---of 9
irq_work_queue75%of 4
irq_work_queue_on---of 13
irq_work_run---of 1
irq_work_run_list40%of 5
irq_work_single---of 5
irq_work_sync---of 11
irq_work_tick50%of 4
-----------
SUMMARY35%of 38

-----------
SUMMARY---of 0

__wacom_devm_sysfs_create_group---of 6
__wacom_initialize_battery---of 5
__wacom_led_brightness_get---of 8
_wacom_query_tablet_data.isra.0---of 31
wacom_aes_battery_handler---of 2
wacom_allocate_input---of 6
wacom_battery_get_property---of 16
wacom_battery_work---of 5
wacom_bt_query_tablet_data.isra.0---of 23
wacom_btnimg0_store---of 6
wacom_btnimg1_store---of 6
wacom_btnimg2_store---of 6
wacom_btnimg3_store---of 6
wacom_btnimg4_store---of 6
wacom_btnimg5_store---of 6
wacom_btnimg6_store---of 6
wacom_btnimg7_store---of 6
wacom_buttons_luminance_show---of 1
wacom_buttons_luminance_store---of 5
wacom_calc_hid_res---of 1
wacom_close---of 2
wacom_devm_kfifo_release100%of 1
wacom_devm_sysfs_group_release---of 3
wacom_hid_usage_quirk---of 34
wacom_init_work---of 1
wacom_initialize_leds---of 34
wacom_led0_select_show---of 1
wacom_led0_select_store---of 5
wacom_led1_select_show---of 1
wacom_led1_select_store---of 5
wacom_led_brightness_set---of 5
wacom_led_control---of 33
wacom_led_find---of 5
wacom_led_groups_alloc_and_register_one---of 19
wacom_led_groups_allocate---of 6
wacom_led_groups_release100%of 1
wacom_led_groups_release_one---of 1
wacom_led_next---of 9
wacom_led_putimage---of 29
wacom_leds_brightness_get---of 7
wacom_mode_change_work---of 15
wacom_open---of 1
wacom_parse_and_register---of 254
wacom_probe---of 19
wacom_raw_event---of 34
wacom_remote_destroy_one---of 8
wacom_remote_work---of 39
wacom_remotes_destroy100%of 2
wacom_remove86%of 7
wacom_remove_shared_data62%of 13
wacom_reset_resume---of 1
wacom_resume---of 1
wacom_show_remote0_mode---of 3
wacom_show_remote1_mode---of 3
wacom_show_remote2_mode---of 3
wacom_show_remote3_mode---of 3
wacom_show_remote4_mode---of 3
wacom_show_speed---of 1
wacom_status0_luminance_show---of 1
wacom_status0_luminance_store---of 5
wacom_status1_luminance_show---of 1
wacom_status1_luminance_store---of 5
wacom_store_speed---of 4
wacom_store_unpair_remote---of 14
wacom_wac_queue_flush---of 6
wacom_wireless_work---of 30
-----------
SUMMARY75%of 24

-----------
SUMMARY---of 0

__drm_helper_update_and_validate---of 56
__drm_helper_update_and_validate.cold---of 2
check_connector_changed---of 17
drm_connector_helper_get_modes---of 1
drm_connector_helper_get_modes_fixed---of 11
drm_connector_helper_hpd_irq_event---of 8
drm_connector_helper_tv_get_modes---of 27
drm_connector_helper_tv_get_modes.cold---of 2
drm_connector_mode_valid---of 7
drm_crtc_helper_mode_valid_fixed---of 5
drm_crtc_mode_valid---of 5
drm_encoder_mode_valid---of 5
drm_helper_hpd_irq_event---of 20
drm_helper_probe_detect---of 11
drm_helper_probe_detect_ctx---of 15
drm_helper_probe_single_connector_modes---of 61
drm_kms_helper_connector_hotplug_event---of 3
drm_kms_helper_disable_hpd84%of 6
drm_kms_helper_hotplug_event---of 3
drm_kms_helper_is_poll_worker---of 3
drm_kms_helper_poll_disable58%of 7
drm_kms_helper_poll_enable---of 19
drm_kms_helper_poll_fini100%of 2
drm_kms_helper_poll_init---of 1
drm_kms_helper_poll_reschedule---of 3
output_poll_execute---of 25
-----------
SUMMARY74%of 15

__put_net---of 2
cleanup_net---of 51
copy_net_ns---of 35
free_exit_list---of 10
get_net_ns---of 8
get_net_ns_by_fd---of 11
get_net_ns_by_id---of 24
get_net_ns_by_pid---of 19
net_alloc_generic---of 3
net_defaults_init_net---of 1
net_drop_ns---of 2
net_eq_idr---of 4
net_free---of 5
net_ns_barrier---of 1
net_ns_get_ownership---of 7
net_ns_net_exit---of 1
net_ns_net_init---of 1
netns_get---of 10
netns_install---of 15
netns_owner---of 1
netns_put---of 6
ops_exit_list---of 6
ops_free_list.part.0---of 17
ops_init---of 27
peernet2id54%of 13
peernet2id_alloc---of 25
peernet_has_id---of 1
ref_tracker_dir_init.constprop.0---of 1
register_pernet_device---of 4
register_pernet_operations---of 19
register_pernet_subsys---of 1
rtnl_net_dumpid---of 23
rtnl_net_dumpid_one---of 9
rtnl_net_fill---of 11
rtnl_net_getid---of 57
rtnl_net_newid---of 39
rtnl_net_notifyid---of 8
rtnl_valid_dump_net_req.constprop.0.isra.0---of 18
setup_net---of 34
unregister_pernet_device---of 3
unregister_pernet_operations---of 9
unregister_pernet_subsys---of 1
-----------
SUMMARY54%of 13

-----------
SUMMARY---of 0

____fput100%of 1
__fput80%of 40
__fput_sync100%of 2
alloc_empty_backing_file---of 6
alloc_empty_file50%of 10
alloc_empty_file_noaccount---of 6
alloc_file_clone---of 3
alloc_file_pseudo---of 6
alloc_file_pseudo_noaccount---of 6
backing_file_user_path---of 1
delayed_fput---of 2
file_init_path---of 19
flush_delayed_fput---of 2
fput74%of 15
get_max_files---of 1
init_file63%of 8
proc_nr_files---of 1
-----------
SUMMARY74%of 76

attribute_container_add_attrs---of 9
attribute_container_add_class_device---of 3
attribute_container_add_class_device_adapter---of 3
attribute_container_add_device---of 14
attribute_container_class_device_del---of 1
attribute_container_classdev_to_container---of 1
attribute_container_device_trigger37%of 11
attribute_container_device_trigger_safe---of 15
attribute_container_find_class_device---of 6
attribute_container_register---of 3
attribute_container_release---of 1
attribute_container_remove_attrs---of 6
attribute_container_remove_device42%of 12
attribute_container_trigger---of 5
attribute_container_unregister---of 5
do_attribute_container_device_trigger_safe---of 15
internal_container_klist_get---of 1
internal_container_klist_put---of 1
-----------
SUMMARY40%of 23

cmp_ex_search100%of 3
cmp_ex_sort---of 3
search_extable100%of 1
sort_extable---of 1
swap_ex---of 1
trim_init_extable---of 18
-----------
SUMMARY100%of 4

-----------
SUMMARY---of 0

__report_bad_irq---of 8
irq_wait_for_poll---of 10
noirqdebug_setup---of 1
note_interrupt23%of 44
poll_spurious_irqs---of 10
try_one_irq---of 13
-----------
SUMMARY23%of 44

page_counter_cancel58%of 7
page_counter_charge100%of 11
page_counter_memparse---of 5
page_counter_set_low---of 3
page_counter_set_max---of 6
page_counter_set_min---of 3
page_counter_try_charge87%of 15
page_counter_uncharge100%of 2
propagate_protected_usage.part.034%of 6
-----------
SUMMARY79%of 41

dvb_create_io_intf_links.isra.0---of 6
dvb_create_media_graph---of 59
dvb_create_tsout_entity---of 13
dvb_device_get---of 8
dvb_device_open---of 17
dvb_device_put100%of 2
dvb_device_put.part.080%of 5
dvb_devnode100%of 1
dvb_generic_ioctl---of 4
dvb_generic_open---of 9
dvb_generic_release---of 6
dvb_media_device_free92%of 12
dvb_module_probe---of 20
dvb_module_release---of 2
dvb_register_adapter---of 18
dvb_register_device---of 78
dvb_remove_device100%of 2
dvb_remove_device.part.0100%of 3
dvb_uevent100%of 1
dvb_unregister_adapter---of 3
dvb_unregister_device50%of 6
dvb_usercopy---of 11
-----------
SUMMARY85%of 32

dump_var_event---of 21
expand_var_event---of 8
seq_copy_in_kernel---of 1
seq_copy_in_user---of 5
snd_seq_cell_alloc.constprop.0---of 24
snd_seq_cell_free---of 14
snd_seq_dump_var_event---of 1
snd_seq_event_dup---of 27
snd_seq_expand_var_event---of 12
snd_seq_expand_var_event_at---of 5
snd_seq_info_pool---of 2
snd_seq_pool_delete100%of 3
snd_seq_pool_done59%of 12
snd_seq_pool_init---of 13
snd_seq_pool_mark_closing75%of 4
snd_seq_pool_new---of 3
snd_seq_pool_poll_wait---of 5
-----------
SUMMARY69%of 19

__drm_crtc_init_with_planes---of 34
__drm_crtc_init_with_planes.cold---of 2
__drm_mode_set_config_internal---of 16
__drmm_crtc_alloc_with_planes---of 7
__drmm_crtc_init_with_planes---of 8
drm_crtc_check_viewport---of 5
drm_crtc_cleanup---of 9
drm_crtc_create_fence---of 3
drm_crtc_create_scaling_filter_property---of 5
drm_crtc_fence_get_driver_name---of 3
drm_crtc_fence_get_timeline_name---of 3
drm_crtc_force_disable---of 5
drm_crtc_from_index---of 5
drm_crtc_init_with_planes---of 3
drm_crtc_register_all---of 6
drm_crtc_unregister_all75%of 4
drm_mode_crtc_set_obj_prop---of 5
drm_mode_getcrtc---of 19
drm_mode_set_config_internal---of 5
drm_mode_setcrtc---of 74
drmm_crtc_init_with_planes---of 1
drmm_crtc_init_with_planes_cleanup---of 1
-----------
SUMMARY75%of 4

-----------
SUMMARY---of 0

input_leds_brightness_get---of 3
input_leds_brightness_set100%of 1
input_leds_connect---of 29
input_leds_disconnect67%of 3
-----------
SUMMARY75%of 4

-----------
SUMMARY---of 0

__dev_exception_clean---of 6
dev_exception_add---of 14
dev_exception_clean---of 5
dev_exception_rm---of 13
dev_exceptions_copy---of 13
devcgroup_access_write---of 5
devcgroup_check_permission57%of 23
devcgroup_css_alloc---of 3
devcgroup_css_free---of 1
devcgroup_offline---of 1
devcgroup_online---of 5
devcgroup_seq_show---of 18
devcgroup_update_access---of 90
match_exception---of 13
match_exception_partial14%of 15
set_access---of 6
set_majmin---of 3
verify_new_ex---of 11
-----------
SUMMARY40%of 38

-----------
SUMMARY---of 0

unbusy_queued_urb---of 3
usb_wwan_chars_in_buffer---of 8
usb_wwan_close---of 8
usb_wwan_dtr_rts---of 2
usb_wwan_indat_callback---of 15
usb_wwan_open---of 10
usb_wwan_outdat_callback---of 4
usb_wwan_port_probe---of 13
usb_wwan_port_remove100%of 4
usb_wwan_resume---of 20
usb_wwan_send_setup---of 5
usb_wwan_setup_urb.constprop.0---of 5
usb_wwan_suspend---of 10
usb_wwan_tiocmget---of 1
usb_wwan_tiocmset---of 11
usb_wwan_write---of 24
usb_wwan_write_room---of 8
-----------
SUMMARY100%of 4

-----------
SUMMARY---of 0

__cpuset_memory_pressure_bump---of 20
alloc_trial_cpuset---of 5
compute_partition_effective_cpumask---of 30
cpuset_attach---of 23
cpuset_attach_task---of 10
cpuset_bind---of 5
cpuset_can_attach---of 31
cpuset_can_fork---of 33
cpuset_cancel_attach---of 7
cpuset_cancel_fork---of 23
cpuset_change_task_nodemask---of 6
cpuset_common_seq_show---of 10
cpuset_cpu_is_isolated---of 3
cpuset_cpus_allowed---of 24
cpuset_cpus_allowed_fallback---of 24
cpuset_css_alloc---of 8
cpuset_css_free---of 1
cpuset_css_offline---of 9
cpuset_css_online---of 39
cpuset_force_rebuild---of 1
cpuset_fork---of 36
cpuset_hotplug_workfn---of 143
cpuset_lock---of 1
cpuset_mem_spread_node---of 10
cpuset_mems_allowed---of 22
cpuset_mems_allowed_intersects---of 1
cpuset_migrate_mm.part.0---of 3
cpuset_migrate_mm_workfn---of 1
cpuset_node_allowed10%of 30
cpuset_nodemask_valid_mems_allowed---of 1
cpuset_post_attach---of 1
cpuset_print_current_mems_allowed---of 20
cpuset_read_s64---of 3
cpuset_read_u64---of 20
cpuset_slab_spread_node---of 10
cpuset_task_status_allowed---of 1
cpuset_unlock---of 1
cpuset_update_active_cpus---of 1
cpuset_update_task_spread_flags.part.0---of 6
cpuset_wait_for_hotplug---of 1
cpuset_write_resmask---of 225
cpuset_write_s64---of 11
cpuset_write_u64---of 13
css_tryget_online---of 23
current_cpuset_is_being_rebound---of 20
dec_dl_tasks_cs---of 10
dl_update_tasks_root_domain---of 6
fmeter_update---of 4
guarantee_online_cpus---of 26
inc_dl_tasks_cs---of 10
is_cpuset_subset---of 5
node_random---of 11
partition_is_populated---of 30
partition_xcpus_add---of 12
partition_xcpus_del---of 12
partition_xcpus_newstate---of 6
proc_cpuset_show---of 61
rebuild_sched_domains---of 1
rebuild_sched_domains_locked---of 170
remote_cpus_update---of 15
remote_partition_check---of 11
remote_partition_disable---of 16
reset_partition_data---of 9
sched_partition_show---of 11
sched_partition_write---of 42
tasks_nocpu_error---of 6
update_cpumasks_hier---of 115
update_domain_attr_tree---of 21
update_flag---of 14
update_parent_effective_cpumask---of 135
update_partition_exclusive---of 7
update_partition_sd_lb---of 8
update_prstate---of 33
update_sibling_cpumasks---of 73
update_tasks_cpumask---of 8
update_tasks_flags---of 5
update_tasks_nodemask---of 10
update_unbound_workqueue_cpumask---of 3
validate_change---of 57
-----------
SUMMARY10%of 30

__bpf_trace_spi_controller---of 1
__bpf_trace_spi_message---of 1
__bpf_trace_spi_message_done---of 1
__bpf_trace_spi_set_cs---of 1
__bpf_trace_spi_setup---of 1
__bpf_trace_spi_transfer---of 1
__devm_spi_alloc_controller---of 5
__spi_add_device---of 32
__spi_alloc_controller---of 4
__spi_async---of 12
__spi_optimize_message---of 84
__spi_optimize_message.cold---of 1
__spi_pump_messages---of 25
__spi_pump_transfer_message---of 102
__spi_register_driver---of 15
__spi_replace_transfers_release---of 8
__spi_split_transfer_maxsize---of 46
__spi_sync---of 28
__spi_unmap_msg.isra.0---of 12
__traceiter_spi_controller_busy---of 3
__traceiter_spi_controller_idle---of 3
__traceiter_spi_message_done---of 3
__traceiter_spi_message_start---of 3
__traceiter_spi_message_submit---of 3
__traceiter_spi_set_cs---of 4
__traceiter_spi_setup---of 3
__traceiter_spi_transfer_start---of 3
__traceiter_spi_transfer_stop---of 3
__unregister100%of 1
acpi_register_spi_device---of 14
acpi_spi_add_device---of 3
acpi_spi_add_resource---of 29
acpi_spi_count---of 5
acpi_spi_count_resources---of 3
acpi_spi_device_alloc---of 40
acpi_spi_find_controller_by_adev---of 1
acpi_spi_notify---of 11
devm_spi_register_controller---of 6
devm_spi_release_controller---of 2
devm_spi_unregister---of 1
driver_override_show---of 2
driver_override_store---of 5
modalias_show---of 5
perf_trace_spi_controller---of 5
perf_trace_spi_message---of 5
perf_trace_spi_message_done---of 5
perf_trace_spi_set_cs---of 5
perf_trace_spi_setup---of 5
perf_trace_spi_transfer---of 17
spi_acpi_controller_match---of 3
spi_add_device---of 1
spi_alloc_device---of 12
spi_async---of 10
spi_bus_lock---of 1
spi_bus_unlock---of 1
spi_complete---of 1
spi_controller_bytes_rx_show---of 1
spi_controller_bytes_show---of 1
spi_controller_bytes_tx_show---of 1
spi_controller_errors_show---of 1
spi_controller_id_alloc---of 5
spi_controller_messages_show---of 1
spi_controller_release---of 1
spi_controller_resume---of 5
spi_controller_spi_async_show---of 1
spi_controller_spi_sync_immediate_show---of 1
spi_controller_spi_sync_show---of 1
spi_controller_suspend---of 4
spi_controller_timedout_show---of 1
spi_controller_transfer_bytes_histo0_show---of 1
spi_controller_transfer_bytes_histo10_show---of 1
spi_controller_transfer_bytes_histo11_show---of 1
spi_controller_transfer_bytes_histo12_show---of 1
spi_controller_transfer_bytes_histo13_show---of 1
spi_controller_transfer_bytes_histo14_show---of 1
spi_controller_transfer_bytes_histo15_show---of 1
spi_controller_transfer_bytes_histo16_show---of 1
spi_controller_transfer_bytes_histo1_show---of 1
spi_controller_transfer_bytes_histo2_show---of 1
spi_controller_transfer_bytes_histo3_show---of 1
spi_controller_transfer_bytes_histo4_show---of 1
spi_controller_transfer_bytes_histo5_show---of 1
spi_controller_transfer_bytes_histo6_show---of 1
spi_controller_transfer_bytes_histo7_show---of 1
spi_controller_transfer_bytes_histo8_show---of 1
spi_controller_transfer_bytes_histo9_show---of 1
spi_controller_transfers_show---of 1
spi_controller_transfers_split_maxsize_show---of 1
spi_delay_exec---of 9
spi_delay_to_ns---of 11
spi_dev_check---of 9
spi_dev_set_name---of 8
spi_device_bytes_rx_show---of 1
spi_device_bytes_show---of 1
spi_device_bytes_tx_show---of 1
spi_device_errors_show---of 1
spi_device_messages_show---of 1
spi_device_spi_async_show---of 1
spi_device_spi_sync_immediate_show---of 1
spi_device_spi_sync_show---of 1
spi_device_timedout_show---of 1
spi_device_transfer_bytes_histo0_show---of 1
spi_device_transfer_bytes_histo10_show---of 1
spi_device_transfer_bytes_histo11_show---of 1
spi_device_transfer_bytes_histo12_show---of 1
spi_device_transfer_bytes_histo13_show---of 1
spi_device_transfer_bytes_histo14_show---of 1
spi_device_transfer_bytes_histo15_show---of 1
spi_device_transfer_bytes_histo16_show---of 1
spi_device_transfer_bytes_histo1_show---of 1
spi_device_transfer_bytes_histo2_show---of 1
spi_device_transfer_bytes_histo3_show---of 1
spi_device_transfer_bytes_histo4_show---of 1
spi_device_transfer_bytes_histo5_show---of 1
spi_device_transfer_bytes_histo6_show---of 1
spi_device_transfer_bytes_histo7_show---of 1
spi_device_transfer_bytes_histo8_show---of 1
spi_device_transfer_bytes_histo9_show---of 1
spi_device_transfers_show---of 1
spi_device_transfers_split_maxsize_show---of 1
spi_dma_sync_for_cpu---of 5
spi_emit_pcpu_stats---of 6
spi_finalize_current_message---of 48
spi_finalize_current_transfer---of 1
spi_flush_queue---of 2
spi_get_device_id---of 7
spi_get_device_match_data---of 9
spi_get_next_queued_message---of 3
spi_map_buf---of 1
spi_map_buf_attrs---of 25
spi_match_device---of 14
spi_new_ancillary_device---of 8
spi_new_device---of 16
spi_optimize_message---of 3
spi_probe---of 13
spi_pump_messages---of 1
spi_queued_transfer---of 8
spi_register_board_info---of 11
spi_register_controller---of 157
spi_register_controller.cold---of 1
spi_remove100%of 5
spi_res_release---of 6
spi_set_cs---of 69
spi_setup---of 68
spi_shutdown---of 3
spi_split_transfers_maxsize---of 5
spi_split_transfers_maxwords---of 6
spi_split_transfers_maxwords.cold---of 1
spi_statistics_add_transfer_stats---of 8
spi_stop_queue78%of 9
spi_sync---of 1
spi_sync_locked---of 1
spi_take_timestamp_post---of 13
spi_take_timestamp_pre---of 8
spi_transfer_cs_change_delay_exec---of 6
spi_transfer_one_message---of 101
spi_uevent75%of 4
spi_unmap_buf---of 2
spi_unoptimize_message---of 3
spi_unregister_controller60%of 10
spi_unregister_device46%of 11
spi_write_then_read---of 16
spidev_release100%of 3
trace_event_raw_event_spi_controller---of 6
trace_event_raw_event_spi_message---of 6
trace_event_raw_event_spi_message_done---of 6
trace_event_raw_event_spi_set_cs---of 6
trace_event_raw_event_spi_setup---of 6
trace_event_raw_event_spi_transfer---of 18
trace_raw_output_spi_controller---of 5
trace_raw_output_spi_message---of 5
trace_raw_output_spi_message_done---of 5
trace_raw_output_spi_set_cs---of 6
trace_raw_output_spi_setup---of 8
trace_raw_output_spi_transfer---of 4
trace_spi_controller_idle---of 15
trace_spi_message_submit---of 15
-----------
SUMMARY70%of 43

iio_device_add_event---of 17
iio_device_register_eventset---of 47
iio_device_unregister_eventset50%of 2
iio_device_wakeup_eventset50%of 2
iio_ev_label_show---of 3
iio_ev_state_show---of 4
iio_ev_state_store---of 6
iio_ev_value_show---of 4
iio_ev_value_store---of 7
iio_event_chrdev_read---of 21
iio_event_chrdev_release---of 3
iio_event_enabled---of 1
iio_event_ioctl---of 16
iio_event_poll---of 8
iio_push_event---of 5
-----------
SUMMARY50%of 4

_snd_pcm_lib_alloc_vmalloc_buffer---of 9
decrease_allocated_size---of 4
do_alloc_pages---of 12
preallocate_pages---of 31
preallocate_pcm_pages---of 7
snd_pcm_lib_free_pages---of 10
snd_pcm_lib_free_vmalloc_buffer---of 6
snd_pcm_lib_get_vmalloc_page---of 1
snd_pcm_lib_malloc_pages---of 23
snd_pcm_lib_preallocate_free---of 2
snd_pcm_lib_preallocate_free_for_all80%of 5
snd_pcm_lib_preallocate_max_proc_read---of 1
snd_pcm_lib_preallocate_pages---of 1
snd_pcm_lib_preallocate_pages_for_all---of 4
snd_pcm_lib_preallocate_proc_read---of 1
snd_pcm_lib_preallocate_proc_write---of 19
snd_pcm_set_managed_buffer---of 1
snd_pcm_set_managed_buffer_all---of 5
-----------
SUMMARY80%of 5

dtv_property_legacy_params_sync.isra.0---of 16
dtv_property_process_get.constprop.0---of 73
dtv_property_process_set---of 74
dtv_set_frontend---of 35
dvb_frontend_add_event---of 10
dvb_frontend_clear_cache.isra.0---of 11
dvb_frontend_compat_ioctl---of 7
dvb_frontend_detach58%of 7
dvb_frontend_do_ioctl---of 12
dvb_frontend_get_event.isra.0---of 16
dvb_frontend_get_frequency_limits---of 14
dvb_frontend_handle_compat_ioctl---of 30
dvb_frontend_handle_ioctl---of 117
dvb_frontend_init---of 9
dvb_frontend_ioctl---of 3
dvb_frontend_open---of 65
dvb_frontend_poll---of 9
dvb_frontend_put67%of 15
dvb_frontend_reinitialise---of 1
dvb_frontend_release---of 14
dvb_frontend_resume---of 16
dvb_frontend_sleep_until---of 4
dvb_frontend_stop58%of 7
dvb_frontend_suspend---of 10
dvb_frontend_swzigzag---of 44
dvb_frontend_swzigzag_autotune---of 33
dvb_frontend_swzigzag_update_delay---of 6
dvb_frontend_thread---of 86
dvb_get_frontend---of 5
dvb_get_property.isra.0---of 19
dvb_register_frontend---of 14
dvb_unregister_frontend67%of 3
emulate_delivery_system---of 10
-----------
SUMMARY63%of 32

-----------
SUMMARY---of 0

snd_usb_audio_free50%of 2
snd_usb_autoresume---of 9
snd_usb_autosuspend---of 4
snd_usb_create_stream.isra.0---of 25
snd_usb_lock_shutdown---of 6
snd_usb_unlock_shutdown---of 2
try_to_register_card---of 12
usb_audio_disconnect76%of 25
usb_audio_probe---of 218
usb_audio_resume---of 17
usb_audio_suspend---of 14
-----------
SUMMARY75%of 27

kone_check_write---of 6
kone_probe---of 21
kone_raw_event---of 16
kone_receive---of 4
kone_remove40%of 5
kone_send---of 4
kone_sysfs_read_profilex---of 5
kone_sysfs_read_settings---of 5
kone_sysfs_set_startup_profile---of 10
kone_sysfs_set_tcu---of 23
kone_sysfs_show_actual_dpi---of 1
kone_sysfs_show_actual_profile---of 1
kone_sysfs_show_firmware_version---of 1
kone_sysfs_show_startup_profile---of 1
kone_sysfs_show_tcu---of 1
kone_sysfs_show_weight---of 4
kone_sysfs_write_profilex---of 16
kone_sysfs_write_settings---of 12
-----------
SUMMARY40%of 5

nci_hci_allocate---of 3
nci_hci_clear_all_pipes---of 13
nci_hci_connect_gate---of 23
nci_hci_data_received_cb---of 17
nci_hci_deallocate100%of 1
nci_hci_dev_session_init---of 24
nci_hci_get_param---of 11
nci_hci_hcp_message_rx---of 28
nci_hci_msg_rx_work---of 4
nci_hci_open_pipe---of 3
nci_hci_reset_pipes---of 2
nci_hci_send_cmd---of 9
nci_hci_send_data---of 13
nci_hci_send_data_req---of 1
nci_hci_send_event---of 3
nci_hci_set_param---of 13
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__component_add---of 22
__component_match_add---of 10
component_add---of 1
component_add_typed---of 5
component_bind_all---of 32
component_compare_dev---of 1
component_compare_dev_name---of 1
component_compare_of---of 1
component_del58%of 19
component_devices_open---of 1
component_devices_show---of 10
component_master_add_with_match---of 11
component_master_del---of 11
component_match_add_release---of 1
component_match_add_typed---of 1
component_match_realloc---of 7
component_unbind---of 6
component_unbind_all---of 12
devm_component_match_release---of 5
free_aggregate_device---of 10
try_to_bring_up_aggregate_device---of 37
-----------
SUMMARY58%of 19

-----------
SUMMARY---of 0

roccat_connect---of 11
roccat_disconnect67%of 3
roccat_ioctl---of 4
roccat_open---of 20
roccat_poll---of 7
roccat_read---of 14
roccat_release---of 12
roccat_report_event---of 8
-----------
SUMMARY67%of 3

acpi_bind_one12%of 27
acpi_device_notify26%of 27
acpi_device_notify_remove13%of 8
acpi_find_child_by_adr67%of 3
acpi_find_child_device---of 3
acpi_physnode_link_name---of 3
acpi_unbind_one---of 10
check_one_child---of 12
find_child_checks---of 8
match_any---of 1
register_acpi_bus_type---of 8
unregister_acpi_bus_type---of 6
-----------
SUMMARY20%of 65

hackrf_buf_queue---of 9
hackrf_copy_stream---of 5
hackrf_ctrl_msg---of 11
hackrf_disconnect67%of 3
hackrf_enum_fmt_sdr---of 5
hackrf_enum_freq_bands---of 9
hackrf_free_stream_bufs.isra.0---of 7
hackrf_free_urbs.isra.0---of 7
hackrf_g_fmt_sdr---of 3
hackrf_g_frequency---of 13
hackrf_g_modulator---of 7
hackrf_g_tuner---of 7
hackrf_get_next_buffer---of 5
hackrf_kill_urbs.isra.0---of 5
hackrf_probe---of 34
hackrf_querycap---of 8
hackrf_queue_setup---of 7
hackrf_return_all_buffers---of 12
hackrf_s_ctrl_rx---of 13
hackrf_s_ctrl_tx---of 12
hackrf_s_fmt_sdr---of 12
hackrf_s_frequency---of 14
hackrf_s_modulator---of 4
hackrf_s_tuner---of 4
hackrf_set_params---of 82
hackrf_start_streaming---of 41
hackrf_stop_streaming---of 6
hackrf_try_fmt_sdr---of 7
hackrf_urb_complete_in---of 17
hackrf_urb_complete_out---of 14
hackrf_video_release67%of 3
-----------
SUMMARY67%of 6

__fpu_restore_sig---of 85
copy_fpstate_to_sigframe60%of 44
fpu__alloc_mathframe80%of 5
fpu__restore_sig---of 11
save_fsave_header---of 7
-----------
SUMMARY62%of 49

acct_account_cputime100%of 3
acct_clear_integrals---of 1
acct_update_integrals---of 15
bacct_add_tsk---of 34
xacct_add_tsk---of 3
-----------
SUMMARY100%of 3

thrustmaster_change_handler---of 4
thrustmaster_model_handler23%of 18
thrustmaster_probe---of 29
thrustmaster_remove100%of 1
-----------
SUMMARY27%of 19

__power_supply_am_i_supplied---of 4
__power_supply_changed_work---of 4
__power_supply_find_supply_from_node---of 1
__power_supply_get_supplier_property---of 3
__power_supply_is_supplied_by---of 11
__power_supply_is_system_supplied---of 6
__power_supply_populate_supplied_from---of 9
__power_supply_register---of 75
devm_power_supply_get_by_phandle---of 7
devm_power_supply_put---of 1
devm_power_supply_register---of 5
devm_power_supply_register_no_ws---of 5
devm_power_supply_release---of 1
of_parse_phandle---of 3
power_supply_am_i_supplied---of 4
power_supply_batinfo_ocv2cap---of 7
power_supply_battery_bti_in_range---of 6
power_supply_battery_info_get_prop---of 19
power_supply_battery_info_has_prop---of 21
power_supply_changed---of 4
power_supply_changed_work---of 7
power_supply_deferred_register_work---of 6
power_supply_dev_release---of 4
power_supply_external_power_changed---of 3
power_supply_find_ocv2cap_table---of 6
power_supply_for_each_device---of 1
power_supply_get_battery_info---of 63
power_supply_get_by_name---of 3
power_supply_get_by_phandle---of 6
power_supply_get_drvdata---of 1
power_supply_get_maintenance_charging_setting---of 4
power_supply_get_property---of 12
power_supply_get_property_from_supplier---of 3
power_supply_is_system_supplied---of 2
power_supply_match_device_by_name---of 1
power_supply_match_device_node---of 3
power_supply_ocv2cap_simple---of 14
power_supply_powers---of 1
power_supply_property_is_writeable---of 4
power_supply_put---of 1
power_supply_put_battery_info---of 7
power_supply_read_temp---of 5
power_supply_reg_notifier---of 1
power_supply_register---of 1
power_supply_register_no_ws---of 1
power_supply_set_battery_charged---of 5
power_supply_set_property---of 4
power_supply_temp2resist_simple---of 14
power_supply_unreg_notifier---of 1
power_supply_unregister50%of 6
power_supply_vbat2ri---of 27
-----------
SUMMARY50%of 6

klist_add_before---of 3
klist_add_behind---of 3
klist_add_head---of 3
klist_add_tail100%of 3
klist_del100%of 1
klist_init100%of 1
klist_iter_exit100%of 2
klist_iter_init100%of 1
klist_iter_init_node16%of 13
klist_next79%of 23
klist_node_attached100%of 1
klist_node_init75%of 4
klist_prev61%of 23
klist_put82%of 11
klist_release91%of 11
klist_remove84%of 6
-----------
SUMMARY70%of 100

__deliver_to_subscribers30%of 17
__snd_seq_deliver_single_event---of 6
check_event_type_and_length50%of 6
check_subscription_permission.isra.0---of 11
clientptr60%of 5
get_client_info---of 10
seq_create_client1---of 13
seq_free_client72%of 7
seq_free_client1.part.0100%of 3
snd_seq_call_port_info_ioctl---of 10
snd_seq_client_enqueue_event.constprop.0---of 15
snd_seq_client_ioctl_lock---of 3
snd_seq_client_ioctl_unlock---of 3
snd_seq_client_notify_subscription---of 1
snd_seq_client_use_ptr20%of 21
snd_seq_create_kernel_client---of 13
snd_seq_delete_kernel_client56%of 9
snd_seq_deliver_event48%of 17
snd_seq_deliver_single_event.constprop.0---of 20
snd_seq_dispatch_event---of 14
snd_seq_info_clients_read---of 27
snd_seq_info_dump_subscribers---of 17
snd_seq_ioctl---of 18
snd_seq_ioctl_client_id---of 1
snd_seq_ioctl_compat---of 10
snd_seq_ioctl_create_port---of 18
snd_seq_ioctl_create_queue---of 10
snd_seq_ioctl_delete_port84%of 6
snd_seq_ioctl_delete_queue---of 1
snd_seq_ioctl_get_client_info---of 3
snd_seq_ioctl_get_client_pool---of 8
snd_seq_ioctl_get_named_queue---of 3
snd_seq_ioctl_get_port_info---of 5
snd_seq_ioctl_get_queue_client---of 3
snd_seq_ioctl_get_queue_info---of 7
snd_seq_ioctl_get_queue_status---of 3
snd_seq_ioctl_get_queue_tempo---of 3
snd_seq_ioctl_get_queue_timer---of 5
snd_seq_ioctl_get_subscription---of 5
snd_seq_ioctl_pversion---of 1
snd_seq_ioctl_query_next_client---of 13
snd_seq_ioctl_query_next_port---of 5
snd_seq_ioctl_query_subs---of 17
snd_seq_ioctl_remove_events---of 8
snd_seq_ioctl_running_mode---of 6
snd_seq_ioctl_set_client_info---of 14
snd_seq_ioctl_set_client_pool---of 19
snd_seq_ioctl_set_port_info---of 4
snd_seq_ioctl_set_queue_client---of 5
snd_seq_ioctl_set_queue_info---of 14
snd_seq_ioctl_set_queue_tempo---of 3
snd_seq_ioctl_set_queue_timer---of 7
snd_seq_ioctl_subscribe_port---of 12
snd_seq_ioctl_system_info---of 1
snd_seq_ioctl_unsubscribe_port---of 12
snd_seq_ioctl_user_pversion---of 1
snd_seq_kernel_client_ctl60%of 10
snd_seq_kernel_client_dispatch88%of 8
snd_seq_kernel_client_enqueue---of 10
snd_seq_kernel_client_get---of 1
snd_seq_kernel_client_put---of 2
snd_seq_kernel_client_write_poll---of 9
snd_seq_open---of 19
snd_seq_poll---of 14
snd_seq_read---of 30
snd_seq_release---of 5
snd_seq_set_queue_tempo---of 3
snd_seq_write---of 29
snd_sequencer_device_done---of 1
update_timestamp_of_queue.isra.0---of 5
-----------
SUMMARY50%of 109

NF_HOOK.constprop.0.isra.0---of 47
__pim_rcv.constprop.0---of 11
__rhashtable_remove_fast.constprop.0.isra.0---of 107
_ipmr_fill_mroute---of 1
ip_mr_forward---of 44
ip_mr_input---of 64
ip_mroute_getsockopt---of 23
ip_mroute_setsockopt---of 64
ipmr_cache_free_rcu---of 1
ipmr_cache_report---of 35
ipmr_cache_unresolved---of 22
ipmr_compat_ioctl---of 36
ipmr_destroy_unres---of 6
ipmr_device_event58%of 7
ipmr_dump---of 1
ipmr_expire_process---of 14
ipmr_fill_mroute---of 14
ipmr_forward_finish---of 27
ipmr_get_route---of 44
ipmr_hash_cmp---of 3
ipmr_init_vif_indev---of 12
ipmr_ioctl---of 29
ipmr_mfc_add---of 177
ipmr_mfc_delete---of 22
ipmr_mfc_seq_show---of 11
ipmr_mfc_seq_start---of 4
ipmr_mr_table_iter---of 4
ipmr_net_exit---of 1
ipmr_net_exit_batch---of 3
ipmr_net_init---of 15
ipmr_queue_xmit.constprop.0---of 39
ipmr_rtm_dumplink---of 61
ipmr_rtm_dumproute---of 15
ipmr_rtm_getroute---of 53
ipmr_rtm_route---of 36
ipmr_rule_default---of 1
ipmr_rules_dump---of 1
ipmr_rules_exit---of 4
ipmr_seq_read---of 4
ipmr_sk_ioctl---of 4
ipmr_update_thresholds---of 9
ipmr_vif_seq_show---of 6
ipmr_vif_seq_start---of 9
ipmr_vif_seq_stop---of 6
jhash---of 16
mr_mfc_seq_stop---of 9
mroute_clean_tables---of 33
mroute_netlink_event---of 6
mrtsock_destruct---of 9
pim_rcv---of 17
pim_rcv_v1---of 16
reg_vif_get_iflink---of 1
reg_vif_setup---of 1
reg_vif_xmit---of 11
vif_add---of 77
vif_delete---of 32
vif_dev_read---of 6
-----------
SUMMARY58%of 7

apple_backlight_led_set---of 1
apple_backlight_set.constprop.0---of 3
apple_battery_timer_tick---of 6
apple_event---of 89
apple_input_configured---of 10
apple_input_mapped---of 19
apple_input_mapping---of 18
apple_probe---of 34
apple_remove100%of 1
apple_report_fixup---of 19
input_event_with_scancode---of 4
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__free_old_xmit---of 11
_virtnet_set_queues---of 7
check_sq_full_and_disable.isra.0---of 15
free_receive_page_frags---of 17
init_vqs---of 46
mergeable_buf_free---of 27
mergeable_rx_buffer_size_show---of 10
page_to_skb---of 56
receive_buf---of 399
refill_work---of 4
remove_vq_common---of 28
skb_recv_done100%of 2
skb_xmit_done50%of 6
start_xmit---of 69
trace_xdp_exception---of 15
try_fill_recv---of 98
virtnet_clean_affinity.part.0---of 7
virtnet_close---of 5
virtnet_commit_rss_command---of 19
virtnet_config_changed---of 1
virtnet_config_changed_work---of 11
virtnet_cpu_dead---of 3
virtnet_cpu_down_prep---of 5
virtnet_cpu_notif_add---of 5
virtnet_cpu_online---of 3
virtnet_free_queues---of 3
virtnet_freeze---of 1
virtnet_freeze_down.isra.0---of 2
virtnet_get_channels---of 1
virtnet_get_coalesce---of 6
virtnet_get_drvinfo---of 8
virtnet_get_ethtool_stats---of 8
virtnet_get_link_ksettings---of 1
virtnet_get_per_queue_coalesce---of 6
virtnet_get_phys_port_name---of 3
virtnet_get_ringparam---of 1
virtnet_get_rxfh---of 6
virtnet_get_rxfh_indir_size---of 1
virtnet_get_rxfh_key_size---of 1
virtnet_get_rxnfc---of 14
virtnet_get_sset_count---of 3
virtnet_get_strings---of 8
virtnet_napi_enable---of 3
virtnet_open---of 17
virtnet_poll---of 68
virtnet_poll_tx---of 15
virtnet_probe---of 139
virtnet_remove---of 1
virtnet_restore---of 8
virtnet_rq_alloc---of 29
virtnet_rq_free_buf.isra.0---of 41
virtnet_rq_init_one_sg---of 3
virtnet_rq_unmap---of 23
virtnet_rq_unmap_free_buf---of 3
virtnet_rx_dim_work---of 12
virtnet_rx_mode_work---of 25
virtnet_send_command---of 13
virtnet_send_rx_ctrl_coal_vq_cmd---of 4
virtnet_send_tx_ctrl_coal_vq_cmd---of 4
virtnet_set_affinity---of 15
virtnet_set_channels---of 10
virtnet_set_coalesce---of 36
virtnet_set_features---of 11
virtnet_set_guest_offloads---of 3
virtnet_set_link_ksettings---of 1
virtnet_set_mac_address---of 13
virtnet_set_per_queue_coalesce---of 21
virtnet_set_ringparam---of 41
virtnet_set_rx_mode---of 2
virtnet_set_rxfh---of 17
virtnet_set_rxnfc---of 24
virtnet_sq_free_unused_buf---of 3
virtnet_stats---of 3
virtnet_tx_timeout---of 3
virtnet_update_settings---of 6
virtnet_validate---of 28
virtnet_vlan_rx_add_vid---of 3
virtnet_vlan_rx_kill_vid---of 3
virtnet_xdp---of 81
virtnet_xdp_handler---of 29
virtnet_xdp_xmit---of 38
xdp_linearize_page---of 42
-----------
SUMMARY63%of 8

-----------
SUMMARY---of 0

compat_only_sysfs_link_entry_to_kobj---of 10
internal_create_group53%of 65
internal_create_groups72%of 7
remove_files100%of 6
sysfs_add_link_to_group---of 3
sysfs_create_group100%of 1
sysfs_create_groups100%of 1
sysfs_group_change_owner---of 18
sysfs_groups_change_owner---of 6
sysfs_merge_group70%of 10
sysfs_remove_group75%of 8
sysfs_remove_groups100%of 3
sysfs_remove_link_from_group---of 2
sysfs_unmerge_group100%of 4
sysfs_update_group---of 1
sysfs_update_groups---of 1
-----------
SUMMARY64%of 105

__parport_register_driver---of 8
dead_frob_lines---of 1
dead_read---of 1
dead_read_lines---of 1
dead_write---of 1
driver_check---of 3
driver_detach---of 3
free_pardevice---of 1
free_port---of 5
parport_announce_port---of 17
parport_bus_exit---of 1
parport_bus_init---of 1
parport_claim---of 25
parport_claim_or_block---of 13
parport_del_port---of 1
parport_find_base---of 7
parport_find_number---of 7
parport_get_port---of 1
parport_irq_handler---of 4
parport_probe---of 5
parport_put_port100%of 1
parport_register_dev_model---of 33
parport_register_port---of 11
parport_release---of 17
parport_remove_port44%of 16
parport_unregister_device---of 20
parport_unregister_driver---of 1
port_check---of 4
port_detach---of 5
port_detect---of 1
-----------
SUMMARY48%of 17

-----------
SUMMARY---of 0

__do_SAK---of 30
__start_tty---of 5
__stop_tty---of 3
__tty_alloc_driver---of 12
__tty_fasync---of 13
__tty_hangup.part.0---of 31
alloc_tty_struct---of 8
check_tty_count---of 11
compat_tty_tiocgserial---of 6
compat_tty_tiocsserial---of 3
console_sysfs_notify---of 2
destruct_tty_driver---of 10
do_SAK---of 2
do_SAK_work---of 1
do_tty_hangup---of 2
file_tty_write.constprop.0---of 32
hung_up_tty_compat_ioctl---of 2
hung_up_tty_fasync---of 1
hung_up_tty_ioctl---of 2
hung_up_tty_poll---of 1
hung_up_tty_read---of 1
hung_up_tty_write---of 1
queue_release_one_tty---of 1
redirected_tty_write---of 5
release_one_tty---of 10
release_tty---of 30
send_break---of 12
show_cons_active---of 24
start_tty---of 6
stop_tty---of 4
this_tty---of 4
tty_add_file---of 3
tty_alloc_file---of 3
tty_cdev_add---of 4
tty_compat_ioctl---of 22
tty_default_fops---of 1
tty_dev_name_to_number---of 15
tty_device_create_release50%of 4
tty_devnode60%of 5
tty_devnum---of 1
tty_do_resize---of 5
tty_driver_kref_put---of 5
tty_driver_name---of 5
tty_fasync---of 4
tty_free_file---of 1
tty_get_icount---of 3
tty_get_tiocm---of 3
tty_hangup---of 1
tty_hung_up_p---of 3
tty_init_dev---of 3
tty_init_dev.part.0---of 25
tty_init_termios---of 6
tty_ioctl---of 87
tty_kclose---of 3
tty_kopen---of 27
tty_kopen_exclusive---of 1
tty_kopen_shared---of 1
tty_kref_put---of 6
tty_lookup_driver---of 32
tty_name---of 4
tty_open---of 78
tty_poll---of 7
tty_put_char---of 4
tty_read---of 22
tty_register_device---of 1
tty_register_device_attr---of 18
tty_register_driver---of 21
tty_release---of 78
tty_release_struct---of 3
tty_reopen---of 12
tty_save_termios---of 5
tty_send_xchar---of 9
tty_set_serial---of 6
tty_show_fdinfo---of 4
tty_standard_install---of 6
tty_unregister_device100%of 2
tty_unregister_driver---of 3
tty_update_time---of 8
tty_vhangup---of 2
tty_vhangup_self---of 6
tty_vhangup_session---of 2
tty_wakeup---of 6
tty_write---of 1
tty_write_lock---of 4
tty_write_unlock---of 1
-----------
SUMMARY64%of 11

-----------
SUMMARY---of 0

alloc_lc_skb.isra.0---of 3
fw_dnld_over---of 9
fw_dnld_rx_work---of 57
fw_dnld_timeout---of 1
nfcmrvl_fw_dnld_abort---of 1
nfcmrvl_fw_dnld_deinit100%of 1
nfcmrvl_fw_dnld_init---of 5
nfcmrvl_fw_dnld_recv_frame---of 3
nfcmrvl_fw_dnld_start---of 16
process_state_init---of 4
process_state_set_ref_clock---of 8
-----------
SUMMARY100%of 1

clear_ti_thread_flag.constprop.0---of 1
cond_local_irq_disable.isra.0100%of 3
cond_local_irq_enable.isra.0100%of 2
do_error_trap---of 11
do_int3---of 1
do_int3_user---of 6
do_trap---of 12
fixup_iopl_exception23%of 9
get_kernel_gp_address---of 6
get_si_code---of 3
gp_try_fixup_and_notify.constprop.040%of 5
gp_user_force_sig_segv.constprop.040%of 5
handle_cfi_failure.constprop.0---of 1
handle_invalid_op---of 1
handle_stack_overflow---of 1
handle_xfd_event---of 15
is_sysenter_singlestep.isra.0---of 1
is_valid_bugaddr---of 3
math_error---of 13
native_read_msr---of 4
notify_debug---of 1
read_cr0---of 1
test_ti_thread_flag.constprop.0---of 1
try_fixup_enqcmd_gp100%of 1
write_cr0---of 1
wrmsrl.constprop.0---of 2
-----------
SUMMARY48%of 25

ipw_attach---of 3
ipw_close---of 7
ipw_dtr_rts---of 9
ipw_open---of 17
ipw_release100%of 1
-----------
SUMMARY100%of 1

klsi_105_chg_port_settings.isra.0---of 4
klsi_105_close---of 3
klsi_105_get_line_state---of 7
klsi_105_open---of 11
klsi_105_port_probe---of 3
klsi_105_port_remove100%of 1
klsi_105_prepare_write_buffer---of 1
klsi_105_process_read_urb---of 9
klsi_105_set_termios---of 30
klsi_105_tiocmget---of 6
-----------
SUMMARY100%of 1

sierra_calc_num_ports---of 5
sierra_chars_in_buffer---of 3
sierra_close---of 12
sierra_dtr_rts---of 1
sierra_indat_callback---of 13
sierra_instat_callback---of 22
sierra_open---of 18
sierra_outdat_callback---of 4
sierra_port_probe---of 15
sierra_port_remove100%of 1
sierra_probe---of 13
sierra_release100%of 1
sierra_resume---of 14
sierra_send_setup---of 10
sierra_startup---of 4
sierra_submit_rx_urbs---of 12
sierra_suspend---of 10
sierra_tiocmget---of 1
sierra_tiocmset---of 9
sierra_write---of 27
sierra_write_room---of 6
-----------
SUMMARY100%of 2

devtmpfs_create_node89%of 9
devtmpfs_delete_node100%of 4
devtmpfs_submit_req100%of 1
devtmpfs_work_loop---of 26
devtmpfsd---of 3
handle_remove---of 25
public_dev_mount---of 6
-----------
SUMMARY93%of 14

devm_mfd_add_devices---of 11
devm_mfd_dev_release---of 1
match_device_ids---of 4
mfd_add_device---of 80
mfd_add_devices---of 6
mfd_remove_devices100%of 1
mfd_remove_devices_fn50%of 14
mfd_remove_devices_late---of 1
-----------
SUMMARY54%of 15

-----------
SUMMARY---of 0

__hw_addr_add_ex---of 26
__hw_addr_del_entry57%of 16
__hw_addr_del_ex94%of 15
__hw_addr_flush29%of 7
__hw_addr_init---of 1
__hw_addr_ref_sync_dev---of 14
__hw_addr_ref_unsync_dev---of 7
__hw_addr_sync---of 10
__hw_addr_sync_dev---of 16
__hw_addr_sync_multiple---of 9
__hw_addr_unsync---of 5
__hw_addr_unsync_dev---of 7
dev_addr_add---of 7
dev_addr_check---of 8
dev_addr_del---of 9
dev_addr_flush---of 1
dev_addr_init---of 3
dev_addr_mod---of 21
dev_mc_add---of 3
dev_mc_add_excl---of 3
dev_mc_add_global---of 3
dev_mc_del100%of 3
dev_mc_del_global---of 3
dev_mc_flush100%of 1
dev_mc_init---of 1
dev_mc_sync---of 5
dev_mc_sync_multiple---of 5
dev_mc_unsync---of 2
dev_uc_add---of 3
dev_uc_add_excl---of 3
dev_uc_del---of 3
dev_uc_flush100%of 1
dev_uc_init---of 1
dev_uc_sync---of 5
dev_uc_sync_multiple---of 5
dev_uc_unsync---of 2
-----------
SUMMARY70%of 43

-----------
SUMMARY---of 0

option_attach---of 8
option_instat_callback---of 28
option_probe---of 7
option_release100%of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

bcm5974_close---of 1
bcm5974_disconnect100%of 3
bcm5974_irq_button---of 14
bcm5974_irq_trackpad---of 31
bcm5974_open---of 7
bcm5974_probe---of 48
bcm5974_resume---of 3
bcm5974_start_traffic---of 11
bcm5974_suspend---of 3
bcm5974_wellspring_mode---of 15
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

gid_eq---of 1
gid_gt---of 1
gid_lt---of 1
ima_alloc_rule_opt_list---of 14
ima_appraise_signature---of 30
ima_check_policy---of 2
ima_delete_rules---of 6
ima_free_rule.part.0---of 6
ima_log_string_op---of 6
ima_lsm_copy_rule---of 7
ima_lsm_policy_change---of 13
ima_lsm_rule_init---of 6
ima_match_policy34%of 120
ima_parse_add_rule---of 278
ima_parse_add_rule.cold---of 1
ima_policy_next---of 14
ima_policy_show---of 141
ima_policy_start---of 28
ima_update_policy---of 5
ima_update_policy_flags---of 24
uid_eq---of 1
uid_gt---of 1
uid_lt---of 1
vfsgid_eq_kgid---of 3
vfsgid_gt_kgid---of 1
vfsgid_lt_kgid---of 1
vfsuid_eq_kuid---of 3
vfsuid_gt_kuid---of 1
vfsuid_lt_kuid---of 1
-----------
SUMMARY34%of 120

igorplugusb_callback10%of 20
igorplugusb_disconnect100%of 1
igorplugusb_probe---of 17
igorplugusb_timer---of 2
-----------
SUMMARY15%of 21

-----------
SUMMARY---of 0

___neigh_create---of 154
___neigh_create.cold---of 4
___neigh_lookup_noref---of 22
___neigh_lookup_noref.cold---of 1
__neigh_create---of 1
__neigh_event_send---of 69
__neigh_for_each_release---of 24
__neigh_for_each_release.cold---of 1
__neigh_ifdown.isra.043%of 19
__neigh_notify43%of 7
__neigh_set_probe_once---of 3
__neigh_update---of 180
__pneigh_lookup---of 1
__pneigh_lookup_1---of 8
neigh_add---of 81
neigh_add_timer---of 13
neigh_app_ns---of 1
neigh_blackhole---of 1
neigh_carrier_down---of 1
neigh_changeaddr---of 1
neigh_cleanup_and_release35%of 20
neigh_connected_output---of 21
neigh_del_timer---of 8
neigh_delete---of 27
neigh_destroy44%of 25
neigh_direct_output---of 1
neigh_dump_info---of 88
neigh_dump_info.cold---of 1
neigh_event_ns---of 8
neigh_fill_info65%of 31
neigh_flush_dev47%of 41
neigh_flush_dev.cold---of 1
neigh_for_each---of 32
neigh_for_each.cold---of 1
neigh_get---of 33
neigh_get_dev_parms_rcu---of 18
neigh_get_first.isra.0---of 27
neigh_get_first.isra.0.cold---of 1
neigh_get_next.isra.0---of 31
neigh_get_next.isra.0.cold---of 1
neigh_hash_alloc---of 10
neigh_hash_alloc.cold---of 1
neigh_hash_free_rcu---of 5
neigh_hash_free_rcu.cold---of 1
neigh_ifdown100%of 1
neigh_invalidate---of 11
neigh_lookup---of 23
neigh_managed_work---of 7
neigh_mark_dead63%of 8
neigh_parms_alloc---of 20
neigh_parms_qlen_dec---of 13
neigh_parms_release88%of 8
neigh_periodic_work---of 44
neigh_periodic_work.cold---of 1
neigh_probe---of 6
neigh_proc_base_reachable_time---of 10
neigh_proc_dointvec---of 1
neigh_proc_dointvec_jiffies---of 1
neigh_proc_dointvec_ms_jiffies---of 1
neigh_proc_dointvec_ms_jiffies_positive---of 1
neigh_proc_dointvec_unres_qlen---of 4
neigh_proc_dointvec_userhz_jiffies---of 1
neigh_proc_dointvec_zero_intmax---of 1
neigh_proc_update---of 21
neigh_proxy_process---of 26
neigh_rand_reach_time---of 4
neigh_rcu_free_parms---of 5
neigh_remove_one---of 24
neigh_remove_one.cold---of 1
neigh_resolve_output---of 29
neigh_seq_next---of 10
neigh_seq_start---of 32
neigh_seq_stop---of 6
neigh_stat_seq_next---of 9
neigh_stat_seq_show---of 4
neigh_stat_seq_start---of 8
neigh_sysctl_register---of 14
neigh_sysctl_unregister100%of 2
neigh_table_clear---of 3
neigh_table_init---of 16
neigh_timer_handler---of 63
neigh_update---of 1
neigh_valid_dump_req---of 29
neigh_valid_get_req.constprop.0---of 29
neigh_xmit---of 53
neigh_xmit.cold---of 1
neightbl_dump_info---of 35
neightbl_fill_info.constprop.0---of 41
neightbl_fill_info.constprop.0.cold---of 1
neightbl_fill_parms---of 26
neightbl_set---of 73
pneigh_delete---of 13
pneigh_enqueue---of 19
pneigh_fill_info.constprop.0---of 15
pneigh_get_first.isra.0---of 9
pneigh_get_next.isra.0---of 16
pneigh_lookup---of 21
pneigh_queue_purge20%of 15
-----------
SUMMARY49%of 177

__enable_rsa---of 6
__start_tx---of 12
__stop_tx---of 16
autoconfig_read_divisor_id---of 1
default_serial_dl_read---of 1
default_serial_dl_write---of 1
hub6_serial_in---of 2
hub6_serial_in.cold---of 1
hub6_serial_out---of 2
hub6_serial_out.cold---of 1
io_serial_in100%of 2
io_serial_in.cold---of 1
io_serial_out100%of 2
io_serial_out.cold---of 1
mem16_serial_in---of 2
mem16_serial_in.cold---of 1
mem16_serial_out---of 2
mem16_serial_out.cold---of 1
mem32_serial_in---of 2
mem32_serial_in.cold---of 1
mem32_serial_out---of 2
mem32_serial_out.cold---of 1
mem32be_serial_in---of 2
mem32be_serial_in.cold---of 1
mem32be_serial_out---of 2
mem32be_serial_out.cold---of 1
mem_serial_in---of 2
mem_serial_in.cold---of 1
mem_serial_out---of 2
mem_serial_out.cold---of 1
rx_trig_bytes_show---of 7
rx_trig_bytes_store---of 13
serial8250_break_ctl---of 7
serial8250_clear_IER67%of 3
serial8250_clear_and_reinit_fifos---of 3
serial8250_clear_fifos.part.0---of 1
serial8250_config_port---of 185
serial8250_console_exit---of 3
serial8250_console_putchar---of 1
serial8250_console_setup---of 10
serial8250_console_write41%of 61
serial8250_default_handle_irq---of 5
serial8250_do_get_mctrl---of 13
serial8250_do_pm---of 1
serial8250_do_set_divisor---of 4
serial8250_do_set_ldisc---of 10
serial8250_do_set_mctrl---of 18
serial8250_do_set_termios---of 69
serial8250_do_shutdown---of 23
serial8250_do_startup---of 80
serial8250_em485_config---of 14
serial8250_em485_destroy---of 2
serial8250_em485_handle_start_tx---of 3
serial8250_em485_handle_stop_tx---of 7
serial8250_em485_start_tx---of 26
serial8250_em485_stop_tx---of 31
serial8250_enable_ms---of 8
serial8250_get_divisor---of 9
serial8250_get_mctrl---of 5
serial8250_handle_irq---of 25
serial8250_init_port---of 1
serial8250_modem_status---of 13
serial8250_pm---of 3
serial8250_read_char---of 20
serial8250_release_port---of 1
serial8250_release_std_resource---of 10
serial8250_release_std_resource.cold---of 1
serial8250_request_port---of 1
serial8250_request_std_resource---of 12
serial8250_request_std_resource.cold---of 1
serial8250_rpm_get---of 2
serial8250_rpm_get_tx---of 3
serial8250_rpm_put---of 2
serial8250_rpm_put_tx---of 3
serial8250_rx_chars---of 3
serial8250_set_defaults---of 13
serial8250_set_ldisc---of 3
serial8250_set_mctrl---of 4
serial8250_set_sleep---of 10
serial8250_set_termios---of 3
serial8250_shutdown---of 3
serial8250_start_tx---of 16
serial8250_startup---of 5
serial8250_stop_rx---of 7
serial8250_stop_tx---of 6
serial8250_throttle---of 1
serial8250_tx_chars---of 26
serial8250_tx_empty---of 8
serial8250_tx_threshold_handle_irq---of 3
serial8250_type---of 3
serial8250_unthrottle---of 1
serial8250_update_uartclk---of 6
serial8250_verify_port---of 6
serial_icr_read---of 1
serial_port_out_sync.constprop.0---of 3
set_io_from_upio---of 2
size_fifo---of 29
wait_for_lsr100%of 4
wait_for_xmitr25%of 4
-----------
SUMMARY48%of 76

-----------
SUMMARY---of 0

close_pdeo---of 4
close_pdeo.part.0---of 5
init_once---of 1
proc_alloc_inode---of 3
proc_entry_rundown60%of 5
proc_evict_inode75%of 4
proc_free_inode---of 5
proc_get_inode---of 25
proc_get_link---of 8
proc_invalidate_siblings_dcache63%of 51
proc_put_link---of 2
proc_reg_compat_ioctl---of 13
proc_reg_get_unmapped_area---of 15
proc_reg_llseek---of 10
proc_reg_mmap---of 13
proc_reg_open---of 23
proc_reg_poll---of 14
proc_reg_read---of 13
proc_reg_read_iter---of 10
proc_reg_release---of 11
proc_reg_unlocked_ioctl---of 13
proc_reg_write---of 13
proc_show_options---of 11
-----------
SUMMARY64%of 60

-----------
SUMMARY---of 0

__anon_vma_interval_tree_augment_rotate---of 5
__anon_vma_interval_tree_subtree_search---of 9
anon_vma_interval_tree_insert---of 11
anon_vma_interval_tree_iter_first---of 5
anon_vma_interval_tree_iter_next---of 9
anon_vma_interval_tree_remove---of 57
anon_vma_interval_tree_verify50%of 4
vma_interval_tree_augment_rotate---of 5
vma_interval_tree_insert---of 11
vma_interval_tree_insert_after---of 14
vma_interval_tree_iter_first---of 5
vma_interval_tree_iter_next---of 9
vma_interval_tree_remove---of 57
vma_interval_tree_subtree_search---of 9
-----------
SUMMARY50%of 4

__account_locked_vm---of 17
__vcalloc---of 4
__vm_enough_memory27%of 15
__vmalloc_array---of 4
account_locked_vm---of 16
const_folio_flags.constprop.040%of 10
folio_anon_vma---of 3
folio_copy---of 5
folio_mapping75%of 8
get_cmdline---of 14
kfree_const100%of 3
kmemdup100%of 3
kmemdup_array---of 2
kmemdup_nul---of 6
kstrdup100%of 4
kstrdup_const100%of 5
kstrndup---of 10
kvfree67%of 3
kvfree_sensitive---of 4
kvmalloc_node25%of 12
kvmemdup---of 3
kvrealloc---of 7
mem_dump_obj---of 7
memcmp_pages---of 1
memdup_user86%of 7
memdup_user_nul---of 8
overcommit_kbytes_handler---of 4
overcommit_policy_handler---of 8
overcommit_ratio_handler---of 4
page_offline_begin---of 1
page_offline_end---of 1
page_offline_freeze---of 1
page_offline_thaw---of 1
randomize_page---of 7
randomize_stack_top---of 4
strndup_user---of 6
sync_overcommit_as---of 1
vcalloc---of 4
vm_commit_limit---of 4
vm_memory_committed---of 1
vm_mmap---of 4
vm_mmap_pgoff---of 16
vma_is_stack_for_current---of 3
vma_set_file---of 1
vmalloc_array---of 4
vmemdup_user---of 9
-----------
SUMMARY58%of 70

NF_HOOK.constprop.0---of 45
__ndisc_fill_addr_option---of 2
dst_output---of 25
in6_dev_get---of 22
ndisc_alloc_skb---of 5
ndisc_allow_add---of 12
ndisc_cleanup---of 1
ndisc_constructor---of 35
ndisc_error_report---of 11
ndisc_hash---of 1
ndisc_ifinfo_sysctl_change---of 32
ndisc_is_multicast---of 1
ndisc_key_eq---of 1
ndisc_late_cleanup---of 1
ndisc_mc_map28%of 11
ndisc_net_exit---of 2
ndisc_net_init---of 8
ndisc_net_init.cold---of 1
ndisc_netdev_event11%of 29
ndisc_ns_create---of 16
ndisc_parse_options---of 25
ndisc_rcv---of 27
ndisc_recv_na---of 65
ndisc_recv_ns---of 82
ndisc_recv_rs---of 30
ndisc_redirect_rcv---of 19
ndisc_router_discovery---of 174
ndisc_send_na---of 24
ndisc_send_na.cold---of 1
ndisc_send_ns---of 6
ndisc_send_redirect---of 45
ndisc_send_rs---of 11
ndisc_send_skb---of 60
ndisc_send_skb.cold---of 1
ndisc_send_unsol_na---of 10
ndisc_solicit---of 13
ndisc_update---of 3
pndisc_constructor---of 10
pndisc_destructor---of 10
pndisc_redo---of 1
-----------
SUMMARY15%of 40

__match_tty---of 3
bt_host_release60%of 5
bt_link_release---of 1
bt_sysfs_cleanup---of 1
hci_conn_add_sysfs---of 9
hci_conn_del_sysfs---of 9
hci_conn_init_sysfs---of 5
hci_init_sysfs---of 1
-----------
SUMMARY60%of 5

-----------
SUMMARY---of 0

address_mask_show---of 1
addresses_show---of 7
index_show---of 1
macaddress_show---of 1
name_show---of 3
rdev_suspend---of 16
trace_rdev_return_int---of 15
wiphy_dev_release100%of 1
wiphy_namespace---of 1
wiphy_resume---of 21
wiphy_suspend---of 13
wiphy_sysfs_exit---of 1
wiphy_sysfs_init---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__do_sys_openat2---of 13
__do_sys_vhangup---of 3
__ia32_compat_sys_ftruncate---of 5
__ia32_compat_sys_open---of 7
__ia32_compat_sys_openat---of 7
__ia32_compat_sys_truncate---of 1
__ia32_sys_access---of 1
__ia32_sys_chdir---of 8
__ia32_sys_chmod---of 6
__ia32_sys_chown---of 1
__ia32_sys_chroot---of 11
__ia32_sys_close---of 5
__ia32_sys_close_range---of 1
__ia32_sys_creat---of 1
__ia32_sys_faccessat---of 1
__ia32_sys_faccessat2---of 1
__ia32_sys_fallocate---of 5
__ia32_sys_fchdir---of 8
__ia32_sys_fchmod---of 10
__ia32_sys_fchmodat---of 6
__ia32_sys_fchmodat2---of 1
__ia32_sys_fchown---of 1
__ia32_sys_fchownat---of 1
__ia32_sys_ftruncate---of 5
__ia32_sys_lchown---of 1
__ia32_sys_open---of 7
__ia32_sys_openat---of 7
__ia32_sys_openat2---of 1
__ia32_sys_truncate---of 1
__x64_compat_sys_ftruncate---of 5
__x64_compat_sys_open---of 7
__x64_compat_sys_openat---of 7
__x64_compat_sys_truncate---of 7
__x64_sys_access---of 1
__x64_sys_chdir---of 8
__x64_sys_chmod---of 6
__x64_sys_chown---of 1
__x64_sys_chroot---of 11
__x64_sys_close100%of 5
__x64_sys_close_range---of 1
__x64_sys_creat---of 1
__x64_sys_faccessat---of 1
__x64_sys_faccessat2---of 1
__x64_sys_fallocate---of 5
__x64_sys_fchdir---of 8
__x64_sys_fchmod---of 10
__x64_sys_fchmodat---of 6
__x64_sys_fchmodat2---of 1
__x64_sys_fchown---of 1
__x64_sys_fchownat---of 1
__x64_sys_ftruncate---of 6
__x64_sys_lchown---of 1
__x64_sys_open---of 7
__x64_sys_openat100%of 7
__x64_sys_openat2---of 1
__x64_sys_truncate---of 7
build_open_flags84%of 43
build_open_how---of 7
chmod_common---of 13
chown_common---of 19
dentry_create---of 5
dentry_open---of 7
do_dentry_open79%of 82
do_faccessat---of 43
do_fchmodat---of 8
do_fchownat---of 10
do_ftruncate---of 30
do_sys_ftruncate---of 6
do_sys_open---of 7
do_sys_openat290%of 10
do_sys_truncate---of 7
do_truncate88%of 8
file_open_name---of 29
file_open_root---of 29
file_path---of 1
filp_close---of 1
filp_flush72%of 7
filp_open---of 4
finish_no_open---of 1
finish_open---of 3
generic_file_open67%of 3
kernel_file_open---of 4
ksys_fallocate---of 5
ksys_fchown---of 10
nonseekable_open---of 1
stream_open100%of 1
vfs_fallocate74%of 57
vfs_fchmod---of 4
vfs_fchown---of 7
vfs_open100%of 1
vfs_truncate---of 19
-----------
SUMMARY80%of 224

-----------
SUMMARY---of 0

___pskb_trim---of 57
__alloc_skb50%of 16
__build_skb100%of 3
__build_skb_around34%of 6
__consume_stateless_skb---of 1
__copy_skb_header27%of 19
__kfree_skb---of 3
__msg_zerocopy_callback---of 19
__napi_alloc_frag_align---of 1
__napi_alloc_skb---of 27
__napi_build_skb---of 3
__napi_kfree_skb---of 3
__netdev_alloc_frag_align---of 6
__netdev_alloc_skb71%of 17
__pskb_copy_fclone---of 26
__pskb_pull_tail---of 71
__skb_checksum---of 40
__skb_checksum_complete---of 7
__skb_checksum_complete_head---of 7
__skb_clone75%of 4
__skb_complete_tx_timestamp---of 8
__skb_ext_alloc---of 3
__skb_ext_del---of 13
__skb_ext_del.cold---of 1
__skb_ext_put---of 15
__skb_ext_set---of 4
__skb_ext_set.cold---of 1
__skb_pad---of 17
__skb_send_sock---of 36
__skb_splice_bits.isra.0---of 13
__skb_to_sgvec---of 34
__skb_tstamp_tx---of 26
__skb_unclone_keeptruesize---of 9
__skb_vlan_pop---of 19
__skb_warn_lro_forwarding---of 2
__skb_zcopy_downgrade_managed---of 11
__splice_segment.isra.0---of 29
alloc_skb_for_msg---of 3
alloc_skb_with_frags---of 31
alloc_skb_with_frags.cold---of 6
build_skb---of 13
build_skb_around---of 13
consume_skb90%of 10
csum_and_copy_from_iter_full---of 95
csum_block_add_ext---of 3
csum_partial_ext---of 1
drop_reasons_register_subsys---of 3
drop_reasons_unregister_subsys---of 3
folio_size---of 13
folio_size.cold---of 1
kfree_skb_list_reason---of 26
kfree_skb_partial---of 5
kfree_skb_reason63%of 16
kfree_skbmem20%of 10
kmalloc_reserve64%of 11
mm_account_pinned_pages---of 18
mm_unaccount_pinned_pages---of 2
msg_zerocopy_callback---of 5
msg_zerocopy_put_abort---of 6
msg_zerocopy_realloc---of 31
napi_build_skb---of 13
napi_consume_skb---of 18
napi_get_frags_check---of 1
napi_pp_put_page---of 21
napi_skb_cache_get---of 4
napi_skb_cache_put---of 4
napi_skb_free_stolen_head---of 19
pskb_carve_inside_header---of 31
pskb_carve_inside_nonlinear---of 54
pskb_expand_head20%of 51
pskb_extract---of 11
pskb_put---of 7
pskb_trim_rcsum_slow---of 13
sendmsg_locked---of 5
sendmsg_unlocked---of 3
skb_abort_seq_read---of 3
skb_add_rx_frag_netmem---of 12
skb_append---of 1
skb_append_pagefrags---of 21
skb_attempt_defer_free---of 23
skb_checksum---of 1
skb_checksum_setup---of 39
skb_checksum_setup_ip---of 16
skb_checksum_trimmed---of 23
skb_clone54%of 15
skb_clone_fraglist.isra.0---of 7
skb_clone_sk---of 19
skb_coalesce_rx_frag---of 3
skb_complete_tx_timestamp---of 23
skb_complete_wifi_ack---of 18
skb_condense---of 7
skb_copy---of 7
skb_copy_and_csum_bits---of 28
skb_copy_and_csum_dev---of 8
skb_copy_bits---of 24
skb_copy_expand---of 9
skb_copy_header---of 1
skb_copy_ubufs---of 95
skb_copy_ubufs.cold---of 3
skb_cow_data---of 57
skb_cow_data_for_xdp---of 3
skb_dequeue50%of 4
skb_dequeue_tail---of 4
skb_dump---of 37
skb_ensure_writable---of 12
skb_ensure_writable_head_tail---of 10
skb_errqueue_purge---of 8
skb_eth_pop---of 9
skb_eth_push---of 16
skb_expand_head---of 24
skb_ext_add---of 20
skb_ext_add.cold---of 1
skb_find_text---of 4
skb_free_head58%of 7
skb_headers_offset_update60%of 5
skb_morph---of 3
skb_mpls_dec_ttl---of 10
skb_mpls_pop---of 19
skb_mpls_push---of 27
skb_mpls_update_lse---of 6
skb_panic---of 3
skb_partial_csum_set---of 6
skb_pp_cow_data---of 45
skb_pp_cow_data.cold---of 2
skb_prepare_seq_read---of 1
skb_pull---of 7
skb_pull_data---of 8
skb_pull_rcsum---of 12
skb_push---of 3
skb_put60%of 5
skb_queue_head---of 1
skb_queue_purge_reason38%of 8
skb_queue_tail100%of 1
skb_rbtree_purge---of 3
skb_realloc_headroom---of 7
skb_release_data35%of 32
skb_release_head_state38%of 16
skb_scrub_packet---of 15
skb_segment---of 165
skb_segment_list---of 55
skb_send_sock---of 1
skb_send_sock_locked---of 1
skb_seq_read---of 33
skb_shift---of 73
skb_splice_bits---of 3
skb_splice_from_iter---of 53
skb_split---of 25
skb_store_bits---of 24
skb_to_sgvec---of 3
skb_to_sgvec_nomark---of 1
skb_trim75%of 4
skb_try_coalesce---of 76
skb_ts_finish---of 3
skb_ts_get_next_block---of 1
skb_tstamp_tx---of 1
skb_tx_error---of 8
skb_unlink---of 1
skb_vlan_pop---of 8
skb_vlan_push---of 21
skb_vlan_untag---of 34
skb_zerocopy---of 67
skb_zerocopy_clone---of 23
skb_zerocopy_headlen---of 6
skb_zerocopy_iter_stream---of 22
slab_build_skb---of 7
sock_dequeue_err_skb---of 15
sock_queue_err_skb---of 25
sock_rmem_free---of 1
sock_spd_release---of 11
trace_consume_skb34%of 15
trace_kfree_skb34%of 15
warn_crc32c_csum_combine---of 4
warn_crc32c_csum_update---of 4
-----------
SUMMARY44%of 290

-----------
SUMMARY---of 0

drv_set_promiscuous_mode---of 19
ieee802154_header_create---of 12
ieee802154_if_add---of 23
ieee802154_if_remove---of 6
ieee802154_if_setup---of 1
ieee802154_iface_exit---of 1
ieee802154_iface_init---of 1
ieee802154_remove_interfaces80%of 5
mac802154_header_create---of 8
mac802154_header_parse---of 6
mac802154_set_header_security---of 14
mac802154_slave_close---of 13
mac802154_wpan_free---of 1
mac802154_wpan_ioctl---of 15
mac802154_wpan_mac_addr---of 10
mac802154_wpan_open---of 173
mac802154_wpan_update_llsec---of 3
netdev_notify29%of 7
trace_802154_drv_return_int---of 15
-----------
SUMMARY50%of 12

-----------
SUMMARY---of 0

contrast_show---of 4
contrast_store---of 9
devm_lcd_device_match---of 1
devm_lcd_device_register---of 5
devm_lcd_device_release---of 2
devm_lcd_device_unregister---of 2
fb_notifier_callback---of 11
lcd_device_register---of 8
lcd_device_release---of 1
lcd_device_unregister50%of 2
lcd_power_show---of 4
lcd_power_store---of 9
max_contrast_show---of 1
-----------
SUMMARY50%of 2

-----------
SUMMARY---of 0

__hci_cmd_sync---of 1
__hci_cmd_sync_ev---of 1
__hci_cmd_sync_sk---of 68
__hci_cmd_sync_status---of 7
__hci_cmd_sync_status_sk---of 9
_hci_cmd_sync_cancel_entry.constprop.0---of 5
_update_adv_data_sync---of 6
adv_timeout_expire---of 8
adv_timeout_expire_sync---of 3
adv_use_rpa---of 5
bacpy---of 1
cancel_interleave_scan---of 5
conn_params_copy---of 29
create_le_conn_complete---of 30
enable_advertising_sync---of 1
hci_abort_conn_sync---of 51
hci_acl_create_conn_sync---of 33
hci_active_scan_sync---of 38
hci_cancel_connect_sync---of 13
hci_clear_adv_instance_sync---of 28
hci_clear_adv_sync---of 12
hci_clear_event_filter_sync---of 4
hci_cmd_sync---of 8
hci_cmd_sync_cancel---of 6
hci_cmd_sync_cancel_entry---of 1
hci_cmd_sync_cancel_sync34%of 6
hci_cmd_sync_cancel_work---of 1
hci_cmd_sync_clear67%of 3
hci_cmd_sync_complete---of 19
hci_cmd_sync_dequeue---of 13
hci_cmd_sync_dequeue_once---of 3
hci_cmd_sync_init---of 1
hci_cmd_sync_lookup_entry---of 10
hci_cmd_sync_queue---of 3
hci_cmd_sync_queue_once---of 3
hci_cmd_sync_submit---of 6
hci_cmd_sync_work---of 19
hci_connect_acl_sync---of 9
hci_connect_le_sync---of 4
hci_delete_stored_link_key_sync---of 4
hci_dev_close_sync13%of 56
hci_dev_open_sync---of 171
hci_disable_advertising_sync---of 5
hci_disable_ext_adv_instance_sync---of 5
hci_disable_per_advertising_sync---of 5
hci_disconnect_all_sync.constprop.0---of 24
hci_disconnect_sync---of 8
hci_enable_advertising---of 5
hci_enable_advertising_sync---of 19
hci_enable_ext_advertising_sync---of 8
hci_get_mws_transport_config_sync---of 5
hci_get_random_address---of 22
hci_inquiry_sync---of 10
hci_le_add_accept_list_sync---of 29
hci_le_add_resolve_list_sync---of 26
hci_le_big_terminate_sync---of 1
hci_le_clear_accept_list_sync---of 9
hci_le_clear_resolv_list_sync---of 9
hci_le_connect_cancel_sync---of 6
hci_le_create_cis_sync---of 33
hci_le_create_conn_sync---of 81
hci_le_del_accept_list_sync.isra.0---of 10
hci_le_del_resolve_list_sync.isra.0---of 5
hci_le_pa_terminate_sync---of 1
hci_le_read_accept_list_size_sync---of 9
hci_le_read_adv_tx_power_sync---of 5
hci_le_read_buffer_size_sync---of 11
hci_le_read_def_data_len_sync---of 9
hci_le_read_local_features_sync---of 8
hci_le_read_max_data_len_sync---of 9
hci_le_read_num_support_adv_sets_sync---of 9
hci_le_read_resolv_list_size_sync---of 9
hci_le_read_supported_states_sync---of 8
hci_le_read_tx_power_sync---of 5
hci_le_remove_cig_sync---of 1
hci_le_set_addr_resolution_enable_sync---of 5
hci_le_set_default_phy_sync---of 9
hci_le_set_event_mask_sync---of 42
hci_le_set_host_feature_sync---of 3
hci_le_set_rpa_timeout_sync---of 4
hci_le_set_scan_enable_sync---of 8
hci_le_set_write_def_data_len_sync---of 3
hci_le_terminate_big_sync---of 1
hci_lookup_le_connect---of 23
hci_pause_advertising_sync---of 17
hci_powered_update_sync---of 29
hci_read_bd_addr_sync---of 8
hci_read_buffer_size_sync---of 8
hci_read_clock_sync---of 8
hci_read_current_iac_lap_sync---of 8
hci_read_data_block_size_sync---of 8
hci_read_def_err_data_reporting_sync---of 6
hci_read_dev_class_sync---of 8
hci_read_flow_control_mode_sync---of 8
hci_read_inq_rsp_tx_power_sync---of 9
hci_read_local_amp_info_sync---of 8
hci_read_local_cmds_sync---of 5
hci_read_local_codecs_sync---of 7
hci_read_local_ext_features_1_sync---of 3
hci_read_local_ext_features_all_sync---of 9
hci_read_local_features_sync---of 10
hci_read_local_name_sync---of 8
hci_read_local_oob_data_sync---of 2
hci_read_local_pairing_opts_sync---of 9
hci_read_local_version_sync---of 8
hci_read_location_data_sync---of 8
hci_read_num_supported_iac_sync---of 8
hci_read_page_scan_activity_sync---of 9
hci_read_page_scan_type_sync---of 9
hci_read_rssi_sync---of 7
hci_read_stored_link_key_sync---of 4
hci_read_sync_train_params_sync---of 9
hci_read_tx_power_sync---of 7
hci_read_voice_setting_sync---of 8
hci_remove_advertising_sync---of 32
hci_remove_ext_adv_instance---of 4
hci_remove_ext_adv_instance_sync---of 6
hci_reset_sync---of 1
hci_resume_advertising_sync.isra.0---of 14
hci_resume_sync---of 3
hci_resume_sync.part.0---of 4
hci_scan_disable_sync---of 11
hci_schedule_adv_instance_sync---of 19
hci_set_adv_data_sync---of 6
hci_set_adv_set_random_addr_sync---of 4
hci_set_err_data_report_sync---of 6
hci_set_event_filter_sync---of 6
hci_set_event_mask_page_2_sync---of 11
hci_set_event_mask_sync---of 37
hci_set_ext_adv_data_sync---of 10
hci_set_ext_scan_rsp_data_sync---of 10
hci_set_le_support_sync---of 7
hci_set_per_adv_data_sync---of 5
hci_set_powered_sync---of 31
hci_set_random_addr_sync---of 30
hci_setup_ext_adv_instance_sync---of 30
hci_setup_link_policy_sync---of 9
hci_start_discovery_sync---of 22
hci_start_ext_adv_sync---of 5
hci_start_per_adv_sync---of 26
hci_start_scan_sync---of 48
hci_stop_discovery_sync---of 19
hci_suspend_sync---of 39
hci_update_adv_data---of 3
hci_update_adv_data_sync---of 6
hci_update_class_sync---of 15
hci_update_connectable_sync---of 13
hci_update_discoverable---of 7
hci_update_discoverable_sync---of 18
hci_update_eir_sync---of 12
hci_update_name_sync---of 1
hci_update_passive_scan---of 13
hci_update_passive_scan_sync---of 165
hci_update_random_address_sync---of 24
hci_update_scan---of 3
hci_update_scan_rsp_data_sync---of 10
hci_update_scan_sync---of 38
hci_write_auth_enable_sync---of 3
hci_write_ca_timeout_sync---of 7
hci_write_eir_sync---of 4
hci_write_fast_connectable_sync---of 11
hci_write_inquiry_mode_sync---of 4
hci_write_le_host_supported_sync---of 6
hci_write_sc_support_1_sync---of 5
hci_write_sc_support_sync---of 8
hci_write_ssp_mode_1_sync---of 4
hci_write_ssp_mode_sync---of 8
interleaved_inquiry_sync---of 1
is_advertising_allowed---of 15
le_scan_disable---of 20
reenable_adv---of 9
reenable_adv_sync---of 15
remove_ext_adv_sync---of 8
scan_disable_sync---of 1
set_ext_conn_params---of 1
update_discoverable_sync---of 1
update_passive_scan_sync---of 1
update_scan_sync---of 1
-----------
SUMMARY17%of 65

hugetlb_get_unmapped_area---of 28
pmd_huge---of 3
pud_huge100%of 3
-----------
SUMMARY100%of 3

usb_acpi_bus_match67%of 3
usb_acpi_find_companion25%of 20
usb_acpi_get_companion_for_port63%of 8
usb_acpi_port_lpm_incapable---of 14
usb_acpi_power_manageable---of 3
usb_acpi_register---of 1
usb_acpi_set_power_state---of 11
usb_acpi_unregister---of 1
-----------
SUMMARY39%of 31

__copy_xstate_to_uabi_buf---of 29
__raw_xsave_addr---of 9
__raw_xsave_addr.cold---of 2
__xfd_enable_feature---of 51
arch_set_user_pkey_access---of 14
arch_set_user_pkey_access.cold---of 2
copy_from_buffer---of 6
copy_sigframe_from_user_to_xstate---of 1
copy_uabi_from_kernel_to_xstate---of 1
copy_uabi_to_xstate---of 21
copy_xstate_to_uabi_buf---of 1
cpu_has_xfeatures---of 4
fpstate_free---of 3
fpu__init_cpu_xstate---of 3
fpu__init_cpu_xstate.part.0---of 15
fpu__resume_cpu---of 10
fpu_xstate_prctl---of 30
get_xsave_addr---of 7
get_xsave_addr.cold---of 2
membuf_write.isra.0---of 2
proc_pid_arch_status---of 6
validate_independent_components---of 8
xfd_enable_feature---of 1
xfd_validate_state17%of 6
xfeature_get_offset---of 12
xfeature_size---of 5
xrstors---of 3
xsaves---of 3
xstate_calculate_size---of 5
xstate_get_guest_group_perm---of 1
-----------
SUMMARY17%of 6

__do_compat_sys_wait4---of 5
__do_compat_sys_waitid---of 18
__do_sys_wait4---of 5
__do_sys_waitid---of 15
__do_wait---of 36
__ia32_compat_sys_wait4---of 1
__ia32_compat_sys_waitid---of 1
__ia32_sys_exit---of 1
__ia32_sys_exit_group---of 1
__ia32_sys_wait4---of 1
__ia32_sys_waitid---of 1
__ia32_sys_waitpid---of 1
__wake_up_parent---of 1
__x64_compat_sys_wait4---of 1
__x64_compat_sys_waitid---of 1
__x64_sys_exit---of 1
__x64_sys_exit_group---of 1
__x64_sys_wait4---of 1
__x64_sys_waitid---of 1
__x64_sys_waitpid---of 1
abort---of 1
child_wait_callback---of 9
cpu_online---of 3
delayed_put_task_struct---of 20
do_exit---of 150
do_group_exit---of 9
do_wait---of 22
find_alive_thread---of 5
is_current_pgrp_orphaned---of 1
kernel_wait---of 4
kernel_wait4---of 12
kernel_waitid---of 6
kernel_waitid_prepare---of 16
kill_orphaned_pgrp---of 16
make_task_dead---of 21
mm_update_next_owner---of 39
oops_count_show---of 1
pid_child_should_wake---of 10
put_task_struct_rcu_user80%of 5
rcuwait_wake_up---of 18
release_task---of 71
wait_consider_task---of 172
will_become_orphaned_pgrp---of 15
-----------
SUMMARY80%of 5

-----------
SUMMARY---of 0

kernfs_create_link63%of 8
kernfs_iop_get_link---of 36
-----------
SUMMARY63%of 8

__bpf_trace_regulator_basic---of 1
__bpf_trace_regulator_range---of 1
__bpf_trace_regulator_value---of 1
__suspend_set_state---of 27
__traceiter_regulator_bypass_disable---of 3
__traceiter_regulator_bypass_disable_complete---of 3
__traceiter_regulator_bypass_enable---of 3
__traceiter_regulator_bypass_enable_complete---of 3
__traceiter_regulator_disable---of 3
__traceiter_regulator_disable_complete---of 3
__traceiter_regulator_enable---of 3
__traceiter_regulator_enable_complete---of 3
__traceiter_regulator_enable_delay---of 3
__traceiter_regulator_set_voltage---of 3
__traceiter_regulator_set_voltage_complete---of 3
_regulator_bulk_get---of 25
_regulator_call_set_voltage_sel---of 4
_regulator_delay_helper---of 6
_regulator_disable---of 29
_regulator_do_disable---of 47
_regulator_do_enable---of 95
_regulator_do_set_voltage---of 96
_regulator_enable---of 37
_regulator_get---of 7
_regulator_get.part.0---of 40
_regulator_get_error_flags---of 12
_regulator_handle_consumer_disable---of 13
_regulator_list_voltage---of 21
_regulator_put---of 8
_regulator_set_voltage_time.isra.0---of 11
bypass_show---of 3
constraint_flags_read_file---of 4
create_regulator---of 41
destroy_regulator---of 12
drms_uA_update---of 59
fail_show---of 4
generic_coupler_attach---of 11
handle_notify_limits---of 15
max_microamps_show---of 5
max_microvolts_show---of 5
microamps_show---of 8
microvolts_show---of 10
min_microamps_show---of 5
min_microvolts_show---of 5
name_show---of 4
num_users_show---of 1
of_get_child_regulator---of 7
of_parse_phandle.constprop.0---of 3
opmode_show---of 10
over_current_show---of 4
over_current_warn_show---of 4
over_temp_show---of 4
over_temp_warn_show---of 4
over_voltage_warn_show---of 4
perf_trace_regulator_basic---of 6
perf_trace_regulator_range---of 6
perf_trace_regulator_value---of 6
print_constraints_debug---of 53
rdev_get_dev---of 1
rdev_get_drvdata---of 1
rdev_get_id---of 1
rdev_get_name---of 5
rdev_get_regmap---of 1
rdev_init_debugfs---of 15
regulation_out_show---of 4
regulator_allow_bypass---of 85
regulator_attr_is_visible---of 52
regulator_balance_voltage---of 11
regulator_bulk_disable---of 8
regulator_bulk_enable---of 11
regulator_bulk_enable_async---of 1
regulator_bulk_force_disable---of 5
regulator_bulk_free---of 2
regulator_bulk_get---of 1
regulator_bulk_register_supply_alias---of 10
regulator_bulk_unregister_supply_alias10%of 11
regulator_check_consumers---of 15
regulator_check_voltage---of 19
regulator_count_voltages---of 6
regulator_coupler_register---of 3
regulator_dev_lookup---of 36
regulator_dev_release---of 1
regulator_disable---of 1
regulator_disable_deferred---of 9
regulator_disable_work---of 15
regulator_do_balance_voltage---of 70
regulator_ena_gpio_ctrl.isra.0---of 9
regulator_ena_gpio_free---of 10
regulator_enable---of 1
regulator_force_disable---of 18
regulator_get---of 5
regulator_get_current_limit---of 8
regulator_get_drvdata---of 1
regulator_get_error_flags---of 1
regulator_get_exclusive---of 5
regulator_get_hardware_vsel_register---of 3
regulator_get_init_drvdata---of 1
regulator_get_linear_step---of 1
regulator_get_mode---of 8
regulator_get_optional---of 5
regulator_get_regmap---of 2
regulator_get_suspend_state_check---of 13
regulator_get_voltage---of 1
regulator_get_voltage_rdev---of 25
regulator_has_full_constraints---of 1
regulator_init_complete_work_function---of 3
regulator_is_enabled---of 12
regulator_is_supported_voltage---of 22
regulator_late_cleanup---of 33
regulator_list_hardware_vsel---of 5
regulator_list_voltage---of 1
regulator_lock_dependent---of 22
regulator_lock_recursive---of 25
regulator_map_voltage---of 11
regulator_match---of 4
regulator_mode_constrain---of 18
regulator_mode_to_status---of 3
regulator_notifier_call_chain---of 4
regulator_ops_is_valid---of 5
regulator_put---of 1
regulator_register---of 155
regulator_register_notifier---of 1
regulator_register_resolve_supply---of 7
regulator_register_supply_alias---of 15
regulator_remove_coupling---of 22
regulator_resolve_coupling---of 13
regulator_resolve_supply---of 89
regulator_resume---of 16
regulator_set_current_limit---of 21
regulator_set_drvdata---of 1
regulator_set_load---of 10
regulator_set_mode---of 11
regulator_set_suspend_voltage---of 16
regulator_set_voltage---of 1
regulator_set_voltage_rdev---of 30
regulator_set_voltage_time---of 20
regulator_set_voltage_time_sel---of 6
regulator_set_voltage_unlocked---of 15
regulator_summary_lock---of 25
regulator_summary_lock_one---of 11
regulator_summary_open---of 1
regulator_summary_show---of 7
regulator_summary_show_children---of 5
regulator_summary_show_roots---of 4
regulator_summary_show_subtree---of 29
regulator_summary_unlock_one---of 4
regulator_suspend---of 8
regulator_suspend_disable---of 13
regulator_suspend_enable---of 10
regulator_sync_voltage---of 21
regulator_sync_voltage_rdev---of 10
regulator_unlock---of 5
regulator_unlock_dependent---of 6
regulator_unlock_recursive---of 12
regulator_unregister---of 13
regulator_unregister_notifier---of 1
regulator_unregister_supply_alias---of 9
requested_microamps_show---of 10
set_machine_constraints---of 234
state_show---of 14
status_show---of 6
supply_map_open---of 1
supply_map_show---of 6
suspend_disk_microvolts_show---of 1
suspend_disk_mode_show---of 3
suspend_disk_state_show---of 7
suspend_mem_microvolts_show---of 1
suspend_mem_mode_show---of 3
suspend_mem_state_show---of 7
suspend_standby_microvolts_show---of 1
suspend_standby_mode_show---of 3
suspend_standby_state_show---of 7
trace_event_raw_event_regulator_basic---of 7
trace_event_raw_event_regulator_range---of 7
trace_event_raw_event_regulator_value---of 7
trace_raw_output_regulator_basic---of 5
trace_raw_output_regulator_range---of 5
trace_raw_output_regulator_value---of 5
type_show---of 7
under_voltage_show---of 4
under_voltage_warn_show---of 4
unset_regulator_supplies---of 6
-----------
SUMMARY10%of 11

pctv452e_frontend_attach---of 22
pctv452e_i2c_func---of 1
pctv452e_i2c_msg---of 11
pctv452e_i2c_xfer---of 9
pctv452e_power_ctrl---of 11
pctv452e_rc_query---of 16
pctv452e_read_mac_address---of 7
pctv452e_tuner_attach---of 8
pctv452e_usb_disconnect50%of 8
pctv452e_usb_probe---of 3
stb6100_get_bandwidth---of 5
stb6100_get_frequency---of 5
stb6100_set_bandwidth---of 5
stb6100_set_frequency---of 5
tt3650_ci_msg---of 7
tt3650_ci_poll_slot_status---of 4
tt3650_ci_read_attribute_mem---of 4
tt3650_ci_read_cam_control---of 4
tt3650_ci_slot_reset---of 6
tt3650_ci_slot_shutdown---of 5
tt3650_ci_slot_ts_enable---of 5
tt3650_ci_write_attribute_mem---of 3
tt3650_ci_write_cam_control---of 3
-----------
SUMMARY50%of 8

__pm_relax---of 4
__pm_stay_awake---of 2
__pm_stay_awake.part.0---of 5
device_set_wakeup_capable72%of 7
device_set_wakeup_enable---of 8
device_wakeup_arm_wake_irqs---of 5
device_wakeup_attach_irq---of 4
device_wakeup_detach_irq---of 2
device_wakeup_disable100%of 5
device_wakeup_disarm_wake_irqs---of 5
device_wakeup_enable---of 17
pm_get_wakeup_count---of 8
pm_print_active_wakeup_sources---of 19
pm_relax---of 6
pm_save_wakeup_count---of 4
pm_stay_awake---of 4
pm_system_cancel_wakeup---of 3
pm_system_irq_wakeup---of 10
pm_system_wakeup---of 1
pm_wakeup_clear---of 5
pm_wakeup_dev_event75%of 4
pm_wakeup_irq---of 1
pm_wakeup_pending---of 15
pm_wakeup_timer_fn---of 5
pm_wakeup_ws_event---of 2
pm_wakeup_ws_event.part.0---of 12
print_wakeup_source_stats.isra.0---of 6
wakeup_source_activate---of 20
wakeup_source_add---of 5
wakeup_source_create---of 7
wakeup_source_deactivate---of 22
wakeup_source_destroy---of 4
wakeup_source_record100%of 3
wakeup_source_register---of 7
wakeup_source_remove80%of 5
wakeup_source_unregister---of 2
wakeup_source_unregister.part.080%of 5
wakeup_sources_read_lock---of 1
wakeup_sources_read_unlock---of 3
wakeup_sources_stats_open---of 1
wakeup_sources_stats_seq_next---of 5
wakeup_sources_stats_seq_show---of 1
wakeup_sources_stats_seq_start---of 9
wakeup_sources_stats_seq_stop---of 3
wakeup_sources_walk_next---of 4
wakeup_sources_walk_start---of 1
-----------
SUMMARY83%of 29

errseq_check---of 4
errseq_check_and_advance---of 5
errseq_sample100%of 2
errseq_set---of 10
-----------
SUMMARY100%of 2

__netdev_watchdog_up---of 12
__qdisc_destroy47%of 26
__qdisc_run---of 88
__skb_array_destroy_skb---of 1
dev_activate---of 88
dev_deactivate---of 5
dev_deactivate_many51%of 53
dev_deactivate_queue67%of 9
dev_graft_qdisc---of 7
dev_init_scheduler---of 15
dev_qdisc_change_real_num_tx---of 7
dev_qdisc_change_tx_queue_len---of 14
dev_reset_queue56%of 9
dev_shutdown39%of 21
dev_trans_start---of 7
dev_watchdog---of 39
mini_qdisc_pair_block_init---of 1
mini_qdisc_pair_init---of 1
mini_qdisc_pair_swap---of 9
mq_change_real_num_tx---of 24
netif_carrier_event---of 2
netif_carrier_off---of 3
netif_carrier_on---of 4
netif_freeze_queues100%of 2
netif_tx_lock---of 1
netif_tx_unlock---of 1
netif_unfreeze_queues100%of 2
noop_dequeue---of 1
noop_enqueue---of 1
noqueue_init---of 1
pfifo_fast_change_tx_queue_len---of 35
pfifo_fast_dequeue---of 25
pfifo_fast_destroy100%of 3
pfifo_fast_dump---of 3
pfifo_fast_enqueue---of 13
pfifo_fast_init---of 10
pfifo_fast_peek---of 7
pfifo_fast_reset50%of 20
psched_ppscfg_precompute---of 6
psched_ratecfg_precompute---of 6
qdisc_alloc---of 25
qdisc_create_dflt---of 24
qdisc_destroy---of 2
qdisc_free---of 4
qdisc_free_cb---of 3
qdisc_maybe_clear_missed---of 3
qdisc_put86%of 7
qdisc_put_unlocked---of 3
qdisc_reset50%of 24
sch_direct_xmit---of 33
shutdown_scheduler_queue58%of 7
transition_one_qdisc---of 9
-----------
SUMMARY54%of 183

-----------
SUMMARY---of 0

__dev_fwnode60%of 5
__dev_fwnode_const---of 5
device_dma_supported---of 10
device_get_child_node_count---of 31
device_get_dma_attr---of 10
device_get_match_data---of 10
device_get_named_child_node---of 10
device_get_next_child_node---of 17
device_get_phy_mode---of 5
device_property_match_string---of 4
device_property_present---of 4
device_property_read_string---of 4
device_property_read_string_array---of 4
device_property_read_u16_array---of 4
device_property_read_u32_array---of 4
device_property_read_u64_array---of 4
device_property_read_u8_array---of 4
fwnode_connection_find_match---of 6
fwnode_connection_find_matches---of 6
fwnode_count_parents---of 16
fwnode_devcon_matches---of 19
fwnode_device_is_available---of 8
fwnode_find_reference---of 4
fwnode_get_name---of 7
fwnode_get_name_prefix---of 7
fwnode_get_named_child_node---of 7
fwnode_get_next_available_child_node---of 11
fwnode_get_next_child_node---of 7
fwnode_get_next_parent---of 11
fwnode_get_nth_parent---of 24
fwnode_get_parent---of 7
fwnode_get_phy_mode---of 7
fwnode_graph_devcon_matches---of 33
fwnode_graph_get_endpoint_by_id---of 38
fwnode_graph_get_endpoint_count---of 23
fwnode_graph_get_next_endpoint---of 15
fwnode_graph_get_port_parent---of 16
fwnode_graph_get_remote_endpoint---of 7
fwnode_graph_get_remote_port---of 16
fwnode_graph_get_remote_port_parent---of 12
fwnode_graph_parse_endpoint---of 8
fwnode_handle_get---of 7
fwnode_handle_put20%of 5
fwnode_iomap---of 7
fwnode_irq_get---of 8
fwnode_irq_get_byname---of 4
fwnode_name_eq---of 9
fwnode_property_get_reference_args---of 11
fwnode_property_match_property_string---of 4
fwnode_property_match_string---of 7
fwnode_property_present---of 12
fwnode_property_read_int_array---of 11
fwnode_property_read_string---of 1
fwnode_property_read_string_array---of 13
fwnode_property_read_u16_array---of 1
fwnode_property_read_u32_array---of 1
fwnode_property_read_u64_array---of 1
fwnode_property_read_u8_array---of 1
-----------
SUMMARY40%of 10

__radix_tree_delete48%of 17
__radix_tree_lookup88%of 16
__radix_tree_lookup.cold---of 2
__radix_tree_preload15%of 28
__radix_tree_replace37%of 22
delete_node38%of 35
idr_destroy17%of 12
idr_get_free54%of 41
idr_get_free.cold---of 6
idr_preload13%of 8
node_tag_clear63%of 8
node_tag_clear.cold---of 2
radix_tree_cpu_dead---of 3
radix_tree_delete---of 1
radix_tree_delete_item46%of 11
radix_tree_extend---of 23
radix_tree_extend.cold---of 1
radix_tree_gang_lookup---of 18
radix_tree_gang_lookup_tag---of 17
radix_tree_gang_lookup_tag.cold---of 1
radix_tree_gang_lookup_tag_slot---of 13
radix_tree_gang_lookup_tag_slot.cold---of 1
radix_tree_insert---of 33
radix_tree_insert.cold---of 2
radix_tree_iter_delete---of 2
radix_tree_iter_replace100%of 1
radix_tree_iter_resume---of 1
radix_tree_iter_tag_clear100%of 1
radix_tree_lookup100%of 1
radix_tree_lookup_slot---of 3
radix_tree_maybe_preload---of 11
radix_tree_next_chunk14%of 45
radix_tree_next_chunk.cold---of 8
radix_tree_node_alloc.constprop.050%of 12
radix_tree_node_ctor100%of 1
radix_tree_node_rcu_free---of 1
radix_tree_preload---of 3
radix_tree_replace_slot---of 9
radix_tree_tag_clear---of 10
radix_tree_tag_clear.cold---of 2
radix_tree_tag_get92%of 12
radix_tree_tag_get.cold---of 3
radix_tree_tag_set---of 16
radix_tree_tag_set.cold---of 4
radix_tree_tagged---of 2
radix_tree_tagged.cold---of 1
-----------
SUMMARY41%of 271

ida_alloc_range47%of 49
ida_destroy10%of 21
ida_free87%of 15
ida_free.cold---of 2
idr_alloc---of 7
idr_alloc_cyclic64%of 11
idr_alloc_u3275%of 8
idr_find100%of 1
idr_for_each37%of 11
idr_get_next75%of 4
idr_get_next_ul37%of 19
idr_remove100%of 1
idr_replace100%of 4
-----------
SUMMARY50%of 144

-----------
SUMMARY---of 0

__build_flow_key.constprop.0---of 7
__ip_do_redirect---of 95
__ip_do_redirect.cold---of 1
__ip_rt_update_pmtu---of 21
__ip_select_ident---of 6
__ipv4_sk_update_pmtu---of 7
dst_discard---of 1
fib_dump_info_fnhe---of 99
fib_lookup.constprop.0---of 34
fib_multipath_hash---of 77
find_exception---of 48
fnhe_flush_routes---of 14
fnhe_hashfun---of 5
inet_rtm_getroute---of 121
ip_do_redirect---of 1
ip_dst_mtu_maybe_forward.constprop.0---of 8
ip_error---of 26
ip_handle_martian_source---of 7
ip_mc_validate_source---of 12
ip_mtu_from_fib_result---of 9
ip_multipath_l3_keys.constprop.0---of 12
ip_route_input_noref---of 14
ip_route_input_rcu.part.0---of 39
ip_route_input_slow---of 166
ip_route_output_flow---of 4
ip_route_output_key_hash---of 13
ip_route_output_key_hash_rcu---of 122
ip_route_use_hint---of 26
ip_rt_bug---of 5
ip_rt_do_proc_exit---of 1
ip_rt_do_proc_init---of 4
ip_rt_get_source---of 19
ip_rt_multicast_event100%of 1
ip_rt_send_redirect---of 61
ip_rt_send_redirect.cold---of 1
ip_rt_update_pmtu---of 28
ipv4_blackhole_route---of 16
ipv4_confirm_neigh---of 66
ipv4_confirm_neigh.cold---of 2
ipv4_cow_metrics---of 1
ipv4_default_advmss---of 1
ipv4_dst_check---of 4
ipv4_dst_destroy---of 10
ipv4_inetpeer_exit---of 1
ipv4_inetpeer_init---of 3
ipv4_link_failure---of 47
ipv4_mtu---of 8
ipv4_negative_advice---of 7
ipv4_neigh_lookup---of 97
ipv4_neigh_lookup.cold---of 3
ipv4_redirect---of 3
ipv4_sk_redirect---of 3
ipv4_sk_update_pmtu---of 51
ipv4_sysctl_rtcache_flush---of 3
ipv4_update_pmtu---of 5
netns_ip_rt_init---of 1
rt_add_uncached_list---of 3
rt_cache_flush100%of 1
rt_cache_route---of 12
rt_cache_seq_next---of 1
rt_cache_seq_show---of 4
rt_cache_seq_start---of 1
rt_cpu_seq_next---of 9
rt_cpu_seq_show---of 4
rt_cpu_seq_start---of 8
rt_del_uncached_list---of 4
rt_dst_alloc---of 4
rt_dst_clone---of 7
rt_fill_info---of 56
rt_flush_dev38%of 27
rt_genid_init---of 1
rt_set_nexthop.constprop.0---of 44
sysctl_route_net_exit---of 3
sysctl_route_net_init---of 13
update_or_create_fnhe---of 83
-----------
SUMMARY42%of 29

-----------
SUMMARY---of 0

queue_access_lock---of 7
queue_broadcast_event.isra.0---of 1
queue_delete---of 1
queue_list_remove29%of 7
queue_use---of 11
queueptr80%of 5
snd_seq_check_queue---of 17
snd_seq_control_queue---of 22
snd_seq_enqueue_event---of 20
snd_seq_info_queues_read---of 10
snd_seq_queue_alloc---of 11
snd_seq_queue_check_access---of 15
snd_seq_queue_client_leave56%of 9
snd_seq_queue_client_leave_cells---of 3
snd_seq_queue_delete---of 4
snd_seq_queue_find_name---of 6
snd_seq_queue_get_cur_queues---of 1
snd_seq_queue_is_used---of 3
snd_seq_queue_remove_cells---of 8
snd_seq_queue_set_owner---of 8
snd_seq_queue_timer_close---of 3
snd_seq_queue_timer_open---of 5
snd_seq_queue_timer_set_tempo---of 10
snd_seq_queue_use---of 3
snd_seq_queues_delete---of 3
-----------
SUMMARY53%of 21

-----------
SUMMARY---of 0

arvo_probe---of 18
arvo_raw_event---of 5
arvo_remove100%of 5
arvo_sysfs_read_info---of 6
arvo_sysfs_set_actual_profile---of 7
arvo_sysfs_set_key_mask---of 5
arvo_sysfs_set_mode_key---of 5
arvo_sysfs_show_actual_profile---of 1
arvo_sysfs_show_key_mask---of 4
arvo_sysfs_show_mode_key---of 4
arvo_sysfs_write_button---of 5
-----------
SUMMARY100%of 5

-----------
SUMMARY---of 0

__bpf_trace_gpio_direction---of 1
__bpf_trace_gpio_value---of 1
__traceiter_gpio_direction---of 3
__traceiter_gpio_value---of 3
desc_to_gpio---of 1
fwnode_gpiod_get_index---of 1
gpio_bus_match---of 3
gpio_chip_get_multiple.part.0---of 7
gpio_chip_match_by_fwnode---of 1
gpio_chip_match_by_label---of 3
gpio_chip_set_multiple.part.0---of 2
gpio_device_chip_cmp---of 11
gpio_device_find---of 22
gpio_device_find_by_fwnode---of 1
gpio_device_find_by_label---of 1
gpio_device_get---of 1
gpio_device_get_base---of 1
gpio_device_get_chip---of 1
gpio_device_get_desc---of 3
gpio_device_get_label---of 1
gpio_device_put---of 1
gpio_device_to_device---of 1
gpio_name_to_desc---of 29
gpio_set_bias---of 5
gpio_set_config_with_argument---of 12
gpio_set_debounce_timeout---of 2
gpio_set_open_drain_value_commit---of 20
gpio_set_open_source_value_commit---of 20
gpio_stub_drv_probe---of 1
gpio_to_desc---of 12
gpiochip_add_data_with_key---of 196
gpiochip_allocate_mask---of 3
gpiochip_child_offset_to_irq_noop---of 1
gpiochip_disable_irq---of 5
gpiochip_dup_line_label---of 10
gpiochip_enable_irq---of 8
gpiochip_free_hogs84%of 6
gpiochip_free_own_desc---of 2
gpiochip_generic_config---of 1
gpiochip_generic_request---of 1
gpiochip_get_data---of 1
gpiochip_get_desc---of 3
gpiochip_get_ngpios---of 14
gpiochip_hierarchy_irq_domain_alloc---of 17
gpiochip_hierarchy_irq_domain_translate---of 10
gpiochip_irq_disable---of 1
gpiochip_irq_domain_activate---of 1
gpiochip_irq_domain_deactivate---of 1
gpiochip_irq_enable---of 1
gpiochip_irq_map---of 12
gpiochip_irq_mask---of 3
gpiochip_irq_relres---of 1
gpiochip_irq_reqres---of 1
gpiochip_irq_unmap---of 3
gpiochip_irq_unmask---of 2
gpiochip_irqchip_add_domain---of 5
gpiochip_irqchip_irq_valid.part.0.isra.067%of 3
gpiochip_irqchip_remove63%of 24
gpiochip_line_is_irq---of 3
gpiochip_line_is_open_drain---of 3
gpiochip_line_is_open_source---of 3
gpiochip_line_is_persistent---of 3
gpiochip_line_is_valid67%of 3
gpiochip_lock_as_irq---of 12
gpiochip_machine_hog---of 10
gpiochip_populate_parent_fwspec_fourcell---of 1
gpiochip_populate_parent_fwspec_twocell---of 1
gpiochip_relres_irq---of 1
gpiochip_remove100%of 3
gpiochip_reqres_irq---of 4
gpiochip_request_own_desc---of 10
gpiochip_setup_dev---of 9
gpiochip_to_irq---of 7
gpiochip_unlock_as_irq---of 3
gpiod_add_hogs---of 8
gpiod_add_lookup_table---of 1
gpiod_add_lookup_tables---of 5
gpiod_cansleep---of 6
gpiod_configure_flags---of 47
gpiod_count---of 24
gpiod_direction_input---of 30
gpiod_direction_output---of 26
gpiod_direction_output_raw---of 6
gpiod_direction_output_raw_commit---of 28
gpiod_disable_hw_timestamp_ns---of 26
gpiod_enable_hw_timestamp_ns---of 26
gpiod_find_and_request---of 69
gpiod_find_lookup_table---of 11
gpiod_free---of 4
gpiod_free_commit---of 14
gpiod_get---of 5
gpiod_get_array---of 50
gpiod_get_array_optional---of 4
gpiod_get_array_value---of 3
gpiod_get_array_value_cansleep---of 3
gpiod_get_array_value_complex---of 67
gpiod_get_direction---of 21
gpiod_get_index---of 5
gpiod_get_index_optional---of 7
gpiod_get_label---of 10
gpiod_get_optional---of 7
gpiod_get_raw_array_value---of 3
gpiod_get_raw_array_value_cansleep---of 3
gpiod_get_raw_value---of 8
gpiod_get_raw_value_cansleep---of 6
gpiod_get_raw_value_commit---of 16
gpiod_get_value---of 10
gpiod_get_value_cansleep---of 8
gpiod_hog---of 29
gpiod_is_active_low---of 6
gpiod_line_state_notify---of 1
gpiod_put---of 2
gpiod_put_array---of 5
gpiod_remove_hogs---of 5
gpiod_remove_lookup_table---of 4
gpiod_request---of 22
gpiod_request_commit---of 20
gpiod_set_array_value---of 3
gpiod_set_array_value_cansleep---of 3
gpiod_set_array_value_complex---of 69
gpiod_set_config---of 16
gpiod_set_consumer_name---of 7
gpiod_set_debounce---of 1
gpiod_set_raw_array_value---of 3
gpiod_set_raw_array_value_cansleep---of 3
gpiod_set_raw_value---of 6
gpiod_set_raw_value_cansleep---of 4
gpiod_set_raw_value_commit---of 11
gpiod_set_transitory---of 10
gpiod_set_value---of 6
gpiod_set_value_cansleep---of 4
gpiod_set_value_nocheck---of 8
gpiod_to_chip---of 4
gpiod_to_gpio_device---of 3
gpiod_to_irq---of 19
gpiod_toggle_active_low---of 4
gpiodev_release100%of 3
gpiolib_open---of 4
gpiolib_seq_next---of 2
gpiolib_seq_show---of 63
gpiolib_seq_start---of 6
gpiolib_seq_stop---of 3
perf_trace_gpio_direction---of 5
perf_trace_gpio_value---of 5
trace_event_raw_event_gpio_direction---of 6
trace_event_raw_event_gpio_value---of 6
trace_gpio_direction---of 15
trace_gpio_value---of 15
trace_raw_output_gpio_direction---of 6
trace_raw_output_gpio_value---of 6
-----------
SUMMARY72%of 42

sound_devnode100%of 5
-----------
SUMMARY100%of 5

alloc_ucounts---of 32
dec_rlimit_put_ucounts100%of 1
dec_rlimit_ucounts---of 8
dec_ucount---of 11
do_dec_rlimit_put_ucounts86%of 7
get_ucounts---of 4
inc_rlimit_get_ucounts77%of 13
inc_rlimit_ucounts---of 7
inc_ucount---of 10
is_rlimit_overlimit---of 7
put_ucounts16%of 13
retire_userns_sysctls---of 1
set_is_seen---of 1
set_lookup---of 1
set_permissions---of 5
setup_userns_sysctls---of 7
-----------
SUMMARY56%of 34

drm_client_buffer_addfb---of 10
drm_client_buffer_delete---of 8
drm_client_buffer_vmap---of 3
drm_client_buffer_vunmap---of 1
drm_client_debugfs_init---of 1
drm_client_debugfs_internal_clients---of 3
drm_client_dev_hotplug---of 15
drm_client_dev_restore---of 9
drm_client_dev_unregister30%of 10
drm_client_framebuffer_create---of 11
drm_client_framebuffer_delete---of 8
drm_client_framebuffer_flush---of 7
drm_client_init---of 11
drm_client_register---of 9
drm_client_release---of 5
-----------
SUMMARY30%of 10

__rfkill_switch_all---of 8
hard_block_reasons_show---of 1
hard_show---of 1
index_show---of 1
name_show---of 1
persistent_show---of 1
rfkill_alloc---of 13
rfkill_blocked---of 1
rfkill_destroy100%of 2
rfkill_dev_uevent100%of 7
rfkill_epo---of 5
rfkill_find_type---of 5
rfkill_fop_ioctl---of 11
rfkill_fop_open---of 13
rfkill_fop_poll---of 6
rfkill_fop_read---of 21
rfkill_fop_release---of 8
rfkill_fop_write---of 22
rfkill_get_global_sw_state---of 1
rfkill_get_led_trigger_name---of 1
rfkill_global_led_trigger_worker---of 7
rfkill_init_sw_state---of 9
rfkill_is_epo_lock_active---of 1
rfkill_led_trigger_activate---of 6
rfkill_pause_polling---of 4
rfkill_poll---of 1
rfkill_register---of 23
rfkill_release100%of 1
rfkill_remove_epo_lock---of 2
rfkill_restore_states---of 4
rfkill_resume---of 8
rfkill_resume_polling---of 5
rfkill_send_events17%of 6
rfkill_set_block---of 21
rfkill_set_hw_state_reason---of 16
rfkill_set_led_trigger_name---of 3
rfkill_set_states---of 18
rfkill_set_sw_state---of 15
rfkill_soft_blocked---of 1
rfkill_suspend---of 1
rfkill_switch_all---of 4
rfkill_sync---of 5
rfkill_sync_work---of 1
rfkill_uevent_work---of 3
rfkill_unregister72%of 7
soft_show---of 1
soft_store---of 6
state_show---of 3
state_store---of 6
type_show---of 1
-----------
SUMMARY70%of 23

-----------
SUMMARY---of 0

sysfs_create_link84%of 6
sysfs_create_link_nowarn---of 6
sysfs_create_link_sd---of 1
sysfs_delete_link100%of 4
sysfs_do_create_link_sd46%of 11
sysfs_remove_link75%of 4
sysfs_rename_link_ns---of 10
-----------
SUMMARY68%of 25

__phonet_get43%of 7
net_generic44%of 16
phonet_address_add---of 8
phonet_address_del---of 9
phonet_address_get---of 35
phonet_address_lookup---of 17
phonet_device_exit---of 1
phonet_device_get---of 25
phonet_device_list---of 1
phonet_device_notify28%of 37
phonet_exit_net---of 2
phonet_init_net---of 3
phonet_route_add---of 9
phonet_route_del---of 6
phonet_route_get_rcu---of 6
phonet_route_output---of 29
-----------
SUMMARY34%of 60

async_complete---of 14
change_mode---of 18
destroy_async---of 9
destroy_priv67%of 3
get_1284_register---of 21
parport_uss720_data_forward---of 2
parport_uss720_data_reverse---of 2
parport_uss720_disable_irq---of 2
parport_uss720_ecp_read_data---of 6
parport_uss720_ecp_write_addr---of 6
parport_uss720_ecp_write_data---of 6
parport_uss720_enable_irq---of 2
parport_uss720_epp_read_addr---of 8
parport_uss720_epp_read_data---of 8
parport_uss720_epp_write_addr---of 9
parport_uss720_epp_write_data---of 6
parport_uss720_frob_control---of 3
parport_uss720_init_state---of 2
parport_uss720_read_control---of 1
parport_uss720_read_data---of 3
parport_uss720_read_status---of 3
parport_uss720_restore_state---of 1
parport_uss720_save_state---of 1
parport_uss720_write_compat---of 6
parport_uss720_write_control---of 2
parport_uss720_write_data---of 1
set_1284_register---of 9
submit_async_request.constprop.0---of 30
uss720_disconnect50%of 14
uss720_probe---of 26
-----------
SUMMARY53%of 17

-----------
SUMMARY---of 0

bdi_alloc---of 6
bdi_debug_stats_open---of 1
bdi_debug_stats_show---of 11
bdi_dev_name---of 5
bdi_get_by_id---of 7
bdi_init---of 3
bdi_lookup_rb_node---of 12
bdi_put---of 5
bdi_register---of 1
bdi_register_va---of 25
bdi_set_owner---of 3
bdi_unregister---of 20
cgwb_free_rcu---of 1
cgwb_kill---of 12
cgwb_release---of 1
cgwb_release_workfn---of 14
cleanup_offline_cgwbs_workfn---of 51
inode_to_bdi67%of 6
max_bytes_show---of 1
max_bytes_store---of 4
max_ratio_fine_show---of 1
max_ratio_fine_store---of 4
max_ratio_show---of 1
max_ratio_store---of 4
min_bytes_show---of 1
min_bytes_store---of 4
min_ratio_fine_show---of 1
min_ratio_fine_store---of 4
min_ratio_show---of 1
min_ratio_store---of 4
percpu_ref_put_many.constprop.0---of 15
read_ahead_kb_show---of 1
read_ahead_kb_store---of 3
release_bdi---of 5
stable_pages_required_show---of 3
strict_limit_show---of 1
strict_limit_store---of 4
wb_blkcg_offline---of 3
wb_exit---of 4
wb_get_create---of 87
wb_get_lookup---of 5
wb_get_lookup.part.0---of 34
wb_init---of 7
wb_memcg_offline---of 3
wb_shutdown---of 7
wb_update_bandwidth_workfn---of 1
-----------
SUMMARY67%of 6

do_compute_shiftstate89%of 9
do_compute_shiftstate.cold---of 1
fn_SAK---of 1
fn_bare_num---of 2
fn_boot_it---of 1
fn_caps_on---of 2
fn_caps_toggle50%of 2
fn_compose---of 1
fn_dec_console---of 8
fn_enter---of 10
fn_hold20%of 5
fn_inc_console---of 8
fn_lastcons---of 1
fn_null100%of 1
fn_num50%of 4
fn_scroll_back---of 1
fn_scroll_forw---of 1
fn_send_intr---of 5
fn_show_mem---of 1
fn_show_ptregs---of 2
fn_show_state---of 1
fn_spawn_con---of 4
getkeycode_helper---of 1
handle_diacr---of 17
k_ascii86%of 7
k_brl---of 22
k_brlcommit.constprop.0---of 6
k_cons---of 2
k_cur100%of 3
k_dead---of 4
k_dead2---of 4
k_fn100%of 4
k_lock---of 4
k_lock.cold---of 1
k_lowercase---of 1
k_meta---of 4
k_pad80%of 29
k_self100%of 2
k_shift78%of 22
k_shift.cold---of 2
k_slock---of 6
k_slock.cold---of 2
k_spec84%of 6
k_unicode.part.050%of 8
kbd_bh---of 12
kbd_connect---of 6
kbd_disconnect100%of 1
kbd_event45%of 81
kbd_led_trigger_activate---of 4
kbd_match---of 7
kbd_rate---of 1
kbd_rate_helper---of 7
kbd_start100%of 7
kd_mksound---of 4
kd_nosound---of 1
kd_sound_helper---of 7
put_queue84%of 6
puts_queue100%of 1
register_keyboard_notifier---of 1
setkeycode_helper---of 1
setledstate---of 5
to_utf820%of 10
unregister_keyboard_notifier---of 1
vt_clr_kbd_mode_bit---of 2
vt_clr_kbd_mode_bit.cold---of 1
vt_do_diacrit---of 34
vt_do_kbkeycode_ioctl---of 9
vt_do_kdgkb_ioctl---of 25
vt_do_kdgkbmeta---of 1
vt_do_kdgkbmode---of 3
vt_do_kdsk_ioctl---of 43
vt_do_kdskbmeta---of 4
vt_do_kdskbmode---of 7
vt_do_kdskled---of 13
vt_get_kbd_mode_bit---of 2
vt_get_kbd_mode_bit.cold---of 1
vt_get_leds---of 2
vt_get_leds.cold---of 1
vt_get_shift_state---of 1
vt_kbd_con_start---of 3
vt_kbd_con_stop---of 3
vt_reset_keyboard---of 1
vt_reset_unicode---of 2
vt_set_kbd_mode_bit---of 2
vt_set_kbd_mode_bit.cold---of 1
vt_set_led_state---of 5
vt_set_leds_compute_shiftstate---of 3
-----------
SUMMARY63%of 208

erase_effect45%of 9
input_ff_create---of 13
input_ff_destroy100%of 4
input_ff_erase67%of 3
input_ff_event---of 13
input_ff_flush80%of 5
input_ff_upload6%of 34
-----------
SUMMARY30%of 55

-----------
SUMMARY---of 0

tomoyo_convert_time---of 1
tomoyo_correct_domain---of 9
tomoyo_correct_path---of 1
tomoyo_correct_path2---of 6
tomoyo_correct_word---of 1
tomoyo_correct_word2---of 29
tomoyo_domain_def---of 8
tomoyo_domain_quota_is_ok24%of 17
tomoyo_file_matches_pattern---of 11
tomoyo_file_matches_pattern2---of 65
tomoyo_fill_path_info65%of 14
tomoyo_find_domain---of 7
tomoyo_get_domainname---of 8
tomoyo_get_exe---of 5
tomoyo_get_mode86%of 7
tomoyo_init_request_info100%of 8
tomoyo_normalize_line---of 15
tomoyo_parse_name_union---of 5
tomoyo_parse_number_union---of 11
tomoyo_parse_ulong---of 10
tomoyo_path_matches_pattern43%of 7
tomoyo_path_matches_pattern2---of 34
tomoyo_permstr---of 4
tomoyo_print_ulong72%of 7
tomoyo_read_token---of 4
tomoyo_str_starts---of 3
-----------
SUMMARY59%of 60

__check_timeout---of 36
__hci_cmd_send---of 10
adv_instance_rpa_expired---of 3
bacpy---of 1
hci_add_acl_hdr---of 3
hci_add_adv_instance---of 18
hci_add_adv_monitor---of 9
hci_add_irk---of 6
hci_add_link_key---of 33
hci_add_ltk---of 6
hci_add_per_instance---of 7
hci_add_remote_oob_data---of 27
hci_adv_instance_flags---of 14
hci_adv_instance_is_scannable---of 11
hci_adv_instances_clear43%of 7
hci_adv_instances_set_rpa_expired---of 2
hci_adv_monitors_clear---of 4
hci_alloc_dev_priv---of 3
hci_auth_req---of 3
hci_bdaddr_is_paired---of 28
hci_bdaddr_list_add---of 12
hci_bdaddr_list_add_with_flags---of 12
hci_bdaddr_list_add_with_irk---of 11
hci_bdaddr_list_clear25%of 4
hci_bdaddr_list_del---of 12
hci_bdaddr_list_del_with_flags---of 12
hci_bdaddr_list_del_with_irk---of 12
hci_bdaddr_list_lookup---of 5
hci_bdaddr_list_lookup_with_flags---of 5
hci_bdaddr_list_lookup_with_irk---of 5
hci_blocked_keys_clear17%of 6
hci_chan_sent---of 52
hci_cmd_timeout---of 10
hci_cmd_work---of 32
hci_conn_params_add---of 15
hci_conn_params_clear_disabled---of 8
hci_conn_params_del---of 8
hci_conn_params_free---of 19
hci_conn_params_lookup---of 5
hci_copy_identity_address---of 7
hci_dev_close---of 9
hci_dev_cmd---of 36
hci_dev_do_close67%of 3
hci_dev_do_open---of 3
hci_dev_get---of 11
hci_dev_open---of 13
hci_dev_reset---of 13
hci_dev_reset_stat---of 8
hci_discovery_active---of 1
hci_discovery_set_state---of 7
hci_encrypt_req---of 3
hci_error_reset---of 14
hci_find_adv_instance---of 4
hci_find_irk_by_addr---of 24
hci_find_irk_by_rpa---of 24
hci_find_link_key---of 24
hci_find_ltk---of 27
hci_find_remote_oob_data---of 5
hci_free_adv_monitor---of 10
hci_free_dev100%of 1
hci_get_adv_monitor_offload_ext---of 1
hci_get_dev_info---of 18
hci_get_dev_list---of 12
hci_get_next_instance---of 8
hci_inq_req---of 5
hci_inquiry---of 32
hci_inquiry_cache_flush---of 5
hci_inquiry_cache_lookup---of 6
hci_inquiry_cache_lookup_resolve---of 9
hci_inquiry_cache_lookup_unknown---of 6
hci_inquiry_cache_update---of 24
hci_inquiry_cache_update_resolve---of 10
hci_is_adv_monitoring---of 4
hci_is_blocked_key---of 15
hci_link_keys_clear17%of 6
hci_linkpol_req---of 3
hci_low_sent---of 44
hci_ncmd_timeout---of 4
hci_pend_le_action_lookup---of 22
hci_pend_le_list_add---of 2
hci_pend_le_list_del_init---of 4
hci_power_off---of 4
hci_power_on---of 24
hci_prio_recalculate---of 31
hci_recv_diag---of 1
hci_recv_event_data---of 10
hci_recv_frame---of 27
hci_register_cb---of 5
hci_register_dev---of 41
hci_register_suspend_notifier---of 4
hci_release_dev63%of 8
hci_remote_oob_data_clear25%of 4
hci_remove_adv_instance---of 15
hci_remove_adv_monitor---of 5
hci_remove_all_adv_monitor---of 5
hci_remove_irk---of 10
hci_remove_link_key---of 7
hci_remove_ltk---of 13
hci_remove_remote_oob_data---of 12
hci_remove_single_adv_monitor---of 3
hci_req_cmd_complete---of 28
hci_reset_dev---of 5
hci_resume_dev---of 11
hci_rfkill_set_block---of 15
hci_rx_work---of 112
hci_scan_req---of 3
hci_sched_esco---of 14
hci_sched_sco---of 14
hci_send_acl---of 21
hci_send_cmd---of 9
hci_send_frame---of 15
hci_send_iso---of 18
hci_send_sco---of 5
hci_sent_cmd_data---of 9
hci_set_adv_instance_data---of 24
hci_set_fw_info---of 1
hci_set_hw_info---of 1
hci_smp_irks_clear17%of 6
hci_smp_ltks_clear17%of 6
hci_suspend_dev---of 14
hci_suspend_notifier---of 15
hci_tx_work---of 100
hci_unregister_cb---of 5
hci_unregister_dev73%of 18
hci_unregister_suspend_notifier---of 4
hci_uuids_clear25%of 4
-----------
SUMMARY43%of 73

-----------
SUMMARY---of 0

__qdisc_calculate_pkt_len---of 8
__qdisc_calculate_pkt_len.cold---of 2
check_loop---of 3
check_loop_fn---of 5
psched_net_exit---of 1
psched_net_init---of 2
psched_show---of 1
qdisc_class_dump---of 1
qdisc_class_hash_destroy---of 1
qdisc_class_hash_grow---of 37
qdisc_class_hash_init---of 4
qdisc_class_hash_insert---of 3
qdisc_class_hash_remove---of 3
qdisc_create---of 89
qdisc_get_default---of 6
qdisc_get_rtab---of 28
qdisc_get_rtab.cold---of 2
qdisc_get_stab---of 31
qdisc_graft---of 109
qdisc_hash_add---of 9
qdisc_hash_del12%of 9
qdisc_leaf---of 5
qdisc_lookup---of 27
qdisc_lookup_ops---of 7
qdisc_lookup_rcu---of 22
qdisc_match_from_root---of 12
qdisc_notify.isra.0---of 14
qdisc_offload_dump_helper---of 6
qdisc_offload_graft_helper---of 14
qdisc_offload_query_caps---of 3
qdisc_put_rtab---of 8
qdisc_put_stab20%of 5
qdisc_set_default---of 13
qdisc_tree_reduce_backlog---of 26
qdisc_warn_nonwc---of 2
qdisc_watchdog---of 17
qdisc_watchdog_cancel---of 1
qdisc_watchdog_init---of 1
qdisc_watchdog_init_clockid---of 1
qdisc_watchdog_schedule_range_ns---of 20
register_qdisc---of 21
tc_bind_class_walker---of 6
tc_ctl_tclass---of 75
tc_dump_qdisc---of 33
tc_dump_qdisc_root---of 24
tc_dump_tclass---of 24
tc_dump_tclass_qdisc---of 11
tc_dump_tclass_root---of 15
tc_fill_qdisc---of 57
tc_fill_tclass---of 23
tc_get_qdisc---of 66
tc_modify_qdisc---of 146
tcf_node_bind---of 29
unregister_qdisc---of 9
-----------
SUMMARY15%of 14

-----------
SUMMARY---of 0

_picolcd_flash_read---of 17
dump_buff_as_hex.constprop.0---of 8
picolcd_debug_eeprom_read---of 15
picolcd_debug_eeprom_write---of 15
picolcd_debug_flash_read---of 8
picolcd_debug_flash_write---of 39
picolcd_debug_out_report---of 41
picolcd_debug_raw_event---of 41
picolcd_debug_reset_open---of 1
picolcd_debug_reset_show---of 5
picolcd_debug_reset_write---of 11
picolcd_exit_devfs100%of 1
picolcd_init_devfs---of 15
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__list_lru_init---of 18
__list_lru_walk_one---of 25
list_lru_add54%of 15
list_lru_add_obj80%of 5
list_lru_count_node---of 1
list_lru_count_one---of 19
list_lru_del59%of 12
list_lru_del_obj80%of 5
list_lru_destroy---of 28
list_lru_isolate---of 3
list_lru_isolate_move---of 5
list_lru_walk_node---of 7
list_lru_walk_one---of 3
list_lru_walk_one_irq---of 3
memcg_list_lru_alloc54%of 45
memcg_reparent_list_lrus---of 39
-----------
SUMMARY58%of 82

-----------
SUMMARY---of 0

class_attr_show---of 3
class_attr_store---of 3
class_child_ns_type---of 1
class_compat_create_link---of 9
class_compat_register---of 4
class_compat_remove_link100%of 5
class_compat_unregister---of 1
class_create---of 5
class_create_file_ns---of 3
class_create_release---of 4
class_destroy---of 3
class_dev_iter_exit---of 2
class_dev_iter_init---of 4
class_dev_iter_next---of 5
class_find_device83%of 17
class_for_each_device---of 16
class_interface_register---of 17
class_interface_unregister---of 16
class_is_registered---of 3
class_register---of 9
class_release---of 7
class_remove_file_ns---of 2
class_to_subsys93%of 13
class_unregister---of 4
klist_class_dev_get---of 1
klist_class_dev_put100%of 1
show_class_attr_string---of 1
-----------
SUMMARY89%of 36

-----------
SUMMARY---of 0

__snd_device_free50%of 12
snd_device_disconnect---of 15
snd_device_disconnect_all78%of 9
snd_device_free---of 11
snd_device_free_all89%of 9
snd_device_get_state---of 7
snd_device_new---of 11
snd_device_register---of 14
snd_device_register_all---of 11
-----------
SUMMARY70%of 30

__ep_eventpoll_poll---of 17
__ep_remove---of 40
__ia32_compat_sys_epoll_pwait---of 7
__ia32_compat_sys_epoll_pwait2---of 7
__ia32_sys_epoll_create---of 3
__ia32_sys_epoll_create1---of 1
__ia32_sys_epoll_ctl---of 4
__ia32_sys_epoll_pwait---of 7
__ia32_sys_epoll_pwait2---of 7
__ia32_sys_epoll_wait---of 5
__x64_compat_sys_epoll_pwait---of 7
__x64_compat_sys_epoll_pwait2---of 7
__x64_sys_epoll_create---of 3
__x64_sys_epoll_create1---of 1
__x64_sys_epoll_ctl---of 4
__x64_sys_epoll_pwait---of 7
__x64_sys_epoll_pwait2---of 7
__x64_sys_epoll_wait---of 5
do_compat_epoll_pwait.part.0---of 8
do_epoll_create---of 14
do_epoll_ctl---of 210
do_epoll_wait---of 101
ep_autoremove_wake_function100%of 3
ep_busy_loop_end---of 7
ep_clear_and_put---of 12
ep_create_wakeup_source---of 5
ep_destroy_wakeup_source---of 7
ep_done_scan---of 19
ep_eventpoll_ioctl---of 15
ep_eventpoll_poll---of 1
ep_eventpoll_release---of 3
ep_item_poll---of 12
ep_loop_check_proc---of 10
ep_pm_stay_awake_rcu45%of 18
ep_poll_callback53%of 38
ep_ptable_queue_proc---of 7
ep_refcount_dec_and_test---of 8
ep_remove_safe---of 2
ep_remove_wait_queue---of 13
ep_show_fdinfo---of 4
ep_start_scan---of 7
eventpoll_release_file---of 6
get_epoll_tfile_raw_ptr---of 9
reverse_path_check_proc---of 10
-----------
SUMMARY53%of 59

__do_adjtimex---of 79
__do_adjtimex.cold---of 3
ntp_clear---of 1
ntp_get_next_leap34%of 6
ntp_notify_cmos_timer---of 3
ntp_tick_length100%of 1
ntp_update_frequency---of 1
second_overflow---of 30
second_overflow.cold---of 2
sync_hw_clock---of 25
sync_timer_callback---of 1
-----------
SUMMARY43%of 7

adu_delete100%of 1
adu_disconnect100%of 3
adu_interrupt_in_callback40%of 15
adu_interrupt_out_callback---of 8
adu_open87%of 15
adu_probe---of 20
adu_read45%of 59
adu_release---of 26
adu_write52%of 39
-----------
SUMMARY53%of 132

__fanout_set_data_bpf---of 7
__packet_get_status.part.0---of 1
__packet_rcv_has_room---of 21
__packet_set_status.part.0---of 1
__register_prot_hook---of 12
__unregister_prot_hook22%of 28
bpf_prog_run_clear_cb---of 6
fanout_add---of 54
fanout_demux_rollover---of 42
free_pg_vec---of 8
match_fanout_group---of 3
nf_hook_direct_egress---of 37
packet_bind---of 4
packet_bind_spkt---of 3
packet_cached_dev_get---of 23
packet_create---of 22
packet_dev_mc---of 16
packet_do_bind---of 69
packet_extra_vlan_len_allowed.isra.0---of 6
packet_getname---of 19
packet_getname_spkt---of 19
packet_getsockopt---of 32
packet_ioctl---of 11
packet_lookup_frame.isra.0---of 7
packet_mc_add---of 17
packet_mm_close---of 2
packet_mm_open---of 2
packet_mmap---of 21
packet_net_exit---of 2
packet_net_init---of 2
packet_notifier49%of 37
packet_parse_headers---of 33
packet_poll---of 19
packet_rcv---of 56
packet_rcv_fanout---of 39
packet_rcv_spkt---of 26
packet_read_pending.part.0---of 7
packet_recvmsg---of 58
packet_release---of 42
packet_sendmsg---of 258
packet_sendmsg_spkt---of 67
packet_seq_next---of 1
packet_seq_show---of 4
packet_seq_start---of 6
packet_seq_stop---of 6
packet_set_ring---of 61
packet_set_ring.cold---of 1
packet_setsockopt---of 145
packet_sock_destruct---of 6
packet_xmit---of 12
prb_calc_retire_blk_tmo.constprop.0---of 6
prb_fill_curr_block---of 8
prb_open_block---of 1
prb_retire_current_block---of 13
prb_retire_rx_blk_timer_expired---of 13
register_prot_hook---of 5
run_filter---of 18
tpacket_destruct_skb---of 17
tpacket_get_timestamp---of 9
tpacket_rcv---of 118
virtio_net_hdr_from_skb.constprop.0---of 12
virtio_net_hdr_to_skb.constprop.0---of 58
-----------
SUMMARY37%of 65

failover_event27%of 23
failover_get_bymac---of 16
failover_register---of 18
failover_slave_register---of 18
failover_slave_unregister17%of 12
failover_unregister---of 11
-----------
SUMMARY23%of 35

usb_console_device---of 3
usb_console_setup---of 40
usb_console_write---of 21
usb_serial_console_disconnect67%of 3
usb_serial_console_exit---of 2
usb_serial_console_init---of 4
-----------
SUMMARY67%of 3

__scsi_lookup_proc_entry63%of 8
proc_scsi_host_open---of 1
proc_scsi_host_write---of 7
proc_scsi_open---of 1
proc_scsi_show---of 1
proc_scsi_write---of 39
scsi_exit_procfs---of 1
scsi_proc_host_add---of 5
scsi_proc_host_rm100%of 4
scsi_proc_hostdir_add---of 10
scsi_proc_hostdir_rm100%of 7
scsi_seq_next---of 1
scsi_seq_show---of 22
scsi_seq_start---of 5
scsi_seq_stop---of 1
scsi_template_proc_dir---of 3
-----------
SUMMARY85%of 19

__get_user_pages---of 128
__gup_longterm_locked---of 205
__mm_populate---of 21
check_vma_flags---of 26
const_folio_flags.constprop.0---of 10
fault_in_readable89%of 18
fault_in_safe_writeable---of 13
fault_in_subpage_writeable---of 1
fault_in_writeable---of 18
faultin_page_range---of 68
fixup_user_fault---of 36
folio_add_pin---of 8
folio_fast_pin_allowed---of 13
folio_flags.constprop.0---of 10
follow_page---of 20
follow_page_mask---of 211
get_dump_page---of 15
get_user_pages---of 64
get_user_pages_fast---of 3
get_user_pages_fast_only---of 3
get_user_pages_remote---of 75
get_user_pages_unlocked---of 62
gup_must_unshare.part.0---of 53
gup_put_folio---of 9
gup_signal_pending---of 7
gup_vma_lookup---of 8
internal_get_user_pages_fast---of 252
is_valid_gup_args---of 17
pin_user_pages---of 3
pin_user_pages_fast---of 3
pin_user_pages_remote---of 4
pin_user_pages_unlocked---of 3
populate_vma_page_range---of 21
sanity_check_pinned_pages---of 106
try_grab_folio---of 70
try_grab_page---of 21
unpin_user_page---of 8
unpin_user_page_range_dirty_lock---of 18
unpin_user_pages---of 24
unpin_user_pages_dirty_lock---of 28
-----------
SUMMARY89%of 18

__nci_request---of 9
nci_activate_target---of 28
nci_allocate_device---of 15
nci_close_device58%of 7
nci_cmd_timer---of 1
nci_cmd_work---of 9
nci_core_cmd---of 1
nci_core_conn_close---of 1
nci_core_conn_close_req---of 1
nci_core_conn_create---of 11
nci_core_conn_create_req---of 1
nci_core_init---of 1
nci_core_ntf_packet---of 9
nci_core_reset---of 1
nci_core_rsp_packet---of 9
nci_data_timer---of 1
nci_deactivate_target---of 6
nci_dep_link_down---of 11
nci_dep_link_up---of 6
nci_dev_down---of 1
nci_dev_up---of 17
nci_disable_se---of 3
nci_discover_se---of 4
nci_enable_se---of 3
nci_free_device34%of 3
nci_fw_download---of 3
nci_generic_req---of 1
nci_get_conn_info_by_conn_id---of 4
nci_get_conn_info_by_dest_type_params---of 9
nci_init_complete_req---of 11
nci_init_req---of 1
nci_nfcc_loopback---of 21
nci_nfcc_loopback_cb---of 9
nci_nfcee_discover---of 1
nci_nfcee_discover_req---of 1
nci_nfcee_mode_set---of 1
nci_nfcee_mode_set_req---of 1
nci_prop_cmd---of 1
nci_prop_ntf_packet---of 9
nci_prop_rsp_packet---of 9
nci_recv_frame---of 8
nci_register_device---of 14
nci_req_complete---of 2
nci_request---of 3
nci_reset_req---of 1
nci_rf_deactivate_req---of 1
nci_rf_discover_req---of 14
nci_rf_discover_select_req---of 2
nci_rx_work---of 15
nci_se_io---of 3
nci_send_cmd---of 9
nci_send_data_req---of 1
nci_send_frame---of 10
nci_set_config---of 4
nci_set_config_req---of 3
nci_start_poll---of 37
nci_stop_poll---of 6
nci_tm_send---of 4
nci_transceive---of 9
nci_tx_work---of 16
nci_unregister_device40%of 5
-----------
SUMMARY47%of 15

-----------
SUMMARY---of 0

dpcm_transport---of 6
nand_compute_ecc---of 8
nand_store_ecc---of 1
sddr09_card_info_destructor100%of 2
sddr09_common_init---of 16
sddr09_get_cardinfo---of 31
sddr09_get_cardinfo.cold---of 1
sddr09_probe---of 4
sddr09_readX.constprop.0---of 3
sddr09_transport---of 163
sddr09_transport.cold---of 22
usb_stor_sddr09_dpcm_init---of 11
usb_stor_sddr09_init---of 1
-----------
SUMMARY100%of 2

__do_sys_clock_adjtime---of 11
__do_sys_clock_adjtime32---of 11
__ia32_compat_sys_timer_create---of 6
__ia32_sys_clock_adjtime---of 1
__ia32_sys_clock_adjtime32---of 1
__ia32_sys_clock_getres---of 10
__ia32_sys_clock_getres_time32---of 10
__ia32_sys_clock_gettime---of 9
__ia32_sys_clock_gettime32---of 9
__ia32_sys_clock_nanosleep---of 13
__ia32_sys_clock_nanosleep_time32---of 14
__ia32_sys_clock_settime---of 9
__ia32_sys_clock_settime32---of 9
__ia32_sys_timer_create---of 6
__ia32_sys_timer_delete---of 10
__ia32_sys_timer_getoverrun---of 3
__ia32_sys_timer_gettime---of 4
__ia32_sys_timer_gettime32---of 4
__ia32_sys_timer_settime---of 8
__ia32_sys_timer_settime32---of 8
__lock_timer---of 29
__x64_compat_sys_timer_create---of 6
__x64_sys_clock_adjtime---of 1
__x64_sys_clock_adjtime32---of 1
__x64_sys_clock_getres---of 10
__x64_sys_clock_getres_time32---of 10
__x64_sys_clock_gettime---of 9
__x64_sys_clock_gettime32---of 9
__x64_sys_clock_nanosleep93%of 13
__x64_sys_clock_nanosleep_time32---of 14
__x64_sys_clock_settime---of 9
__x64_sys_clock_settime32---of 9
__x64_sys_timer_create---of 6
__x64_sys_timer_delete---of 10
__x64_sys_timer_getoverrun---of 3
__x64_sys_timer_gettime---of 4
__x64_sys_timer_gettime32---of 4
__x64_sys_timer_settime---of 8
__x64_sys_timer_settime32---of 8
common_hrtimer_arm---of 8
common_hrtimer_forward---of 1
common_hrtimer_rearm---of 1
common_hrtimer_remaining---of 1
common_hrtimer_try_to_cancel---of 1
common_nsleep100%of 3
common_nsleep_timens---of 6
common_timer_create---of 1
common_timer_del---of 3
common_timer_get---of 13
common_timer_set---of 14
common_timer_wait_running---of 1
do_clock_adjtime---of 9
do_timer_create---of 62
do_timer_gettime---of 7
do_timer_settime---of 14
exit_itimers---of 14
k_itimer_rcu_free---of 1
posix_clock_realtime_adj---of 1
posix_clock_realtime_set---of 1
posix_get_boottime_ktime---of 1
posix_get_boottime_timespec---of 1
posix_get_coarse_res---of 1
posix_get_hrtimer_res---of 1
posix_get_monotonic_coarse---of 1
posix_get_monotonic_ktime---of 1
posix_get_monotonic_raw---of 1
posix_get_monotonic_timespec---of 1
posix_get_realtime_coarse---of 1
posix_get_realtime_ktime---of 1
posix_get_realtime_timespec---of 1
posix_get_tai_ktime---of 1
posix_get_tai_timespec---of 1
posix_timer_event---of 1
posix_timer_fn---of 8
posix_timer_unhash_and_free---of 3
posixtimer_rearm---of 6
timer_wait_running---of 14
-----------
SUMMARY94%of 16

synusb_close---of 2
synusb_disconnect67%of 3
synusb_irq---of 24
synusb_open63%of 8
synusb_post_reset---of 4
synusb_pre_reset---of 1
synusb_probe---of 77
synusb_report_buttons---of 1
synusb_reset_resume---of 1
synusb_resume---of 4
synusb_suspend---of 1
-----------
SUMMARY64%of 11

cleanup100%of 1
service_interrupt_work---of 3
service_outstanding_interrupt---of 10
usb_cdc_wdm_register---of 3
wdm_create---of 24
wdm_disconnect90%of 10
wdm_flush---of 1
wdm_fsync---of 1
wdm_in_callback---of 22
wdm_int_callback21%of 29
wdm_ioctl---of 4
wdm_manage_power75%of 4
wdm_open88%of 16
wdm_out_callback---of 1
wdm_poll---of 12
wdm_post_reset---of 8
wdm_pre_reset---of 5
wdm_probe---of 6
wdm_read---of 43
wdm_release---of 10
wdm_resume80%of 10
wdm_rxwork---of 6
wdm_suspend---of 13
wdm_wait_for_response.isra.0---of 18
wdm_write43%of 35
-----------
SUMMARY54%of 105

nfcmrvl_chip_halt---of 2
nfcmrvl_chip_reset---of 3
nfcmrvl_nci_close---of 3
nfcmrvl_nci_fw_download---of 1
nfcmrvl_nci_open---of 5
nfcmrvl_nci_recv_frame---of 12
nfcmrvl_nci_register_dev---of 21
nfcmrvl_nci_send---of 3
nfcmrvl_nci_setup---of 1
nfcmrvl_nci_unregister_dev60%of 5
nfcmrvl_parse_dt---of 3
-----------
SUMMARY60%of 5

actual_brightness_show---of 6
backlight_device_get_by_name---of 3
backlight_device_get_by_type---of 8
backlight_device_register---of 15
backlight_device_set_brightness---of 9
backlight_device_unregister50%of 2
backlight_device_unregister.part.0---of 3
backlight_force_update---of 7
backlight_register_notifier---of 1
backlight_resume---of 7
backlight_suspend---of 7
backlight_unregister_notifier---of 1
bl_device_release---of 1
bl_power_show---of 1
bl_power_store---of 15
brightness_show---of 1
brightness_store---of 5
devm_backlight_device_match---of 1
devm_backlight_device_register---of 5
devm_backlight_device_release---of 2
devm_backlight_device_unregister---of 2
devm_backlight_release---of 1
devm_of_find_backlight---of 11
fb_notifier_callback---of 17
max_brightness_show---of 1
of_find_backlight_by_node---of 3
of_parent_match---of 3
scale_show---of 5
type_show---of 1
-----------
SUMMARY50%of 2

device_capabilities_show---of 1
interface_capabilities_show---of 1
usb488_device_capabilities_show---of 1
usb488_interface_capabilities_show---of 1
usbtmc488_ioctl_simple---of 13
usbtmc_create_urb80%of 5
usbtmc_disconnect50%of 14
usbtmc_fasync---of 1
usbtmc_flush---of 5
usbtmc_generic_read---of 93
usbtmc_generic_write65%of 40
usbtmc_get_stb---of 37
usbtmc_interrupt---of 19
usbtmc_ioctl19%of 141
usbtmc_ioctl_abort_bulk_in_tag52%of 27
usbtmc_ioctl_abort_bulk_out_tag50%of 16
usbtmc_open75%of 12
usbtmc_poll---of 15
usbtmc_post_reset---of 1
usbtmc_pre_reset---of 4
usbtmc_probe---of 61
usbtmc_read21%of 43
usbtmc_read_bulk_cb---of 9
usbtmc_release---of 8
usbtmc_resume---of 5
usbtmc_suspend---of 11
usbtmc_write75%of 27
usbtmc_write_bulk_cb---of 10
-----------
SUMMARY38%of 325

alloc_mnt_idmap---of 6
copy_mnt_idmap---of 8
free_mnt_idmap---of 6
from_vfsgid50%of 6
from_vfsuid50%of 6
make_vfsgid34%of 6
make_vfsuid34%of 6
mnt_idmap_get---of 7
mnt_idmap_put---of 6
vfsgid_in_group_p---of 1
-----------
SUMMARY42%of 24

-----------
SUMMARY---of 0

__cyc2ns_read---of 2
__set_cyc2ns_scale---of 9
__set_cyc2ns_scale.cold---of 2
calibrate_delay_is_known---of 19
check_tsc_unstable---of 1
convert_art_ns_to_tsc---of 2
convert_art_to_tsc---of 2
cyc2ns_read_begin---of 2
cyc2ns_read_end---of 2
mark_tsc_unstable---of 4
native_calibrate_cpu---of 4
native_calibrate_cpu_early---of 34
native_calibrate_tsc---of 15
native_sched_clock_from_tsc---of 6
native_sched_clock_from_tsc.cold---of 1
pit_hpet_ptimer_calibrate_cpu---of 57
read_tsc100%of 1
sched_clock50%of 4
set_cyc2ns_scale---of 12
time_cpufreq_notifier---of 18
tsc_clocksource_watchdog_disabled---of 5
tsc_cs_enable---of 1
tsc_cs_mark_unstable---of 4
tsc_cs_tick_stable---of 3
tsc_read_refs---of 9
tsc_refine_calibration_work---of 30
tsc_restore_sched_clock_state---of 17
tsc_resume---of 1
tsc_save_sched_clock_state---of 4
unsynchronized_tsc---of 8
using_native_sched_clock---of 1
-----------
SUMMARY60%of 5

__hci_req_sync---of 42
add_to_accept_list---of 19
bacpy---of 1
cancel_interleave_scan40%of 5
hci_prepare_cmd---of 9
hci_req_add---of 6
hci_req_add_ev---of 12
hci_req_add_le_interleaved_scan---of 15
hci_req_add_le_passive_scan---of 129
hci_req_add_le_scan_disable---of 13
hci_req_init---of 1
hci_req_purge---of 1
hci_req_run---of 1
hci_req_run_skb---of 1
hci_req_status_pend---of 1
hci_req_sync---of 3
hci_req_sync_complete---of 13
hci_request_cancel_all100%of 1
hci_request_setup---of 1
interleave_scan_work---of 10
req_run---of 16
set_random_addr---of 29
-----------
SUMMARY50%of 6

-----------
SUMMARY---of 0

__bpf_getsockopt---of 16
__bpf_getsockopt.cold---of 1
__bpf_redirect---of 43
__bpf_setsockopt---of 11
__bpf_setsockopt.cold---of 1
__bpf_sk_lookup.isra.0---of 12
__bpf_sk_lookup.isra.0.cold---of 1
__bpf_skb_change_tail---of 31
__bpf_skb_load_bytes---of 10
__bpf_skb_store_bytes---of 19
__bpf_skc_lookup.constprop.0---of 13
__bpf_xdp_load_bytes---of 7
__bpf_xdp_store_bytes---of 7
__get_filter---of 10
__sk_attach_prog---of 12
__sk_filter_charge---of 4
bpf_bind---of 10
bpf_check_classic---of 31
bpf_check_classic.cold---of 2
bpf_clear_redirect_map---of 7
bpf_clone_redirect---of 7
bpf_convert_ctx_access---of 59
bpf_convert_filter---of 129
bpf_csum_diff---of 10
bpf_csum_level---of 15
bpf_csum_update---of 4
bpf_dynptr_from_skb---of 4
bpf_dynptr_from_skb_rdonly---of 3
bpf_dynptr_from_xdp---of 6
bpf_flow_dissector_load_bytes---of 10
bpf_gen_ld_abs---of 10
bpf_get_cgroup_classid---of 9
bpf_get_cgroup_classid.cold---of 1
bpf_get_cgroup_classid_curr---of 1
bpf_get_hash_recalc---of 3
bpf_get_listener_sock---of 7
bpf_get_netns_cookie_sk_msg---of 4
bpf_get_netns_cookie_sock---of 3
bpf_get_netns_cookie_sock_addr---of 4
bpf_get_netns_cookie_sock_ops---of 4
bpf_get_route_realm---of 1
bpf_get_skb_set_tunnel_proto---of 6
bpf_get_socket_cookie---of 3
bpf_get_socket_cookie_sock---of 1
bpf_get_socket_cookie_sock_addr---of 1
bpf_get_socket_cookie_sock_ops---of 1
bpf_get_socket_ptr_cookie---of 4
bpf_get_socket_uid---of 10
bpf_get_socket_uid.cold---of 1
bpf_helper_changes_pkt_data---of 25
bpf_ipv4_fib_lookup---of 125
bpf_ipv4_fib_lookup.cold---of 2
bpf_ipv6_fib_lookup---of 61
bpf_ipv6_fib_lookup.cold---of 1
bpf_l3_csum_replace---of 10
bpf_l4_csum_replace---of 16
bpf_lwt_in_push_encap---of 1
bpf_lwt_xmit_push_encap---of 1
bpf_migrate_filter---of 16
bpf_msg_apply_bytes---of 1
bpf_msg_cork_bytes---of 1
bpf_msg_pop_data---of 83
bpf_msg_pull_data---of 63
bpf_msg_push_data---of 91
bpf_noop_prologue---of 1
bpf_prepare_filter---of 15
bpf_prog_create---of 16
bpf_prog_create_from_user---of 12
bpf_prog_destroy---of 5
bpf_prog_store_orig_filter---of 4
bpf_redirect---of 4
bpf_redirect_neigh---of 6
bpf_redirect_peer---of 4
bpf_run_sk_reuseport---of 6
bpf_search_tcp_opt---of 16
bpf_set_hash---of 1
bpf_set_hash_invalid---of 1
bpf_sk_ancestor_cgroup_id---of 11
bpf_sk_ancestor_cgroup_id.cold---of 1
bpf_sk_assign---of 24
bpf_sk_assign.cold---of 1
bpf_sk_assign_tcp_reqsk---of 28
bpf_sk_base_func_proto---of 5
bpf_sk_cgroup_id---of 8
bpf_sk_cgroup_id.cold---of 1
bpf_sk_fullsock---of 3
bpf_sk_fullsock.cold---of 1
bpf_sk_getsockopt---of 7
bpf_sk_getsockopt.cold---of 1
bpf_sk_lookup---of 15
bpf_sk_lookup.cold---of 1
bpf_sk_lookup_assign---of 21
bpf_sk_lookup_assign.cold---of 1
bpf_sk_lookup_tcp---of 1
bpf_sk_lookup_udp---of 1
bpf_sk_release---of 6
bpf_sk_release.cold---of 1
bpf_sk_setsockopt---of 7
bpf_sk_setsockopt.cold---of 1
bpf_skb_adjust_room---of 115
bpf_skb_ancestor_cgroup_id---of 11
bpf_skb_ancestor_cgroup_id.cold---of 1
bpf_skb_cgroup_classid---of 8
bpf_skb_cgroup_classid.cold---of 1
bpf_skb_cgroup_id---of 8
bpf_skb_cgroup_id.cold---of 1
bpf_skb_change_head---of 19
bpf_skb_change_proto---of 47
bpf_skb_change_tail---of 1
bpf_skb_change_type---of 5
bpf_skb_check_mtu---of 16
bpf_skb_copy---of 8
bpf_skb_ecn_set_ce---of 39
bpf_skb_event_output---of 5
bpf_skb_fib_lookup---of 10
bpf_skb_generic_pop---of 17
bpf_skb_generic_push---of 1
bpf_skb_get_nlattr---of 6
bpf_skb_get_nlattr_nest---of 9
bpf_skb_get_pay_offset---of 1
bpf_skb_get_tunnel_key---of 39
bpf_skb_get_tunnel_opt---of 27
bpf_skb_get_xfrm_state---of 10
bpf_skb_is_valid_access.constprop.0---of 15
bpf_skb_load_bytes---of 10
bpf_skb_load_bytes_relative---of 9
bpf_skb_load_helper_16---of 8
bpf_skb_load_helper_16_no_cache---of 8
bpf_skb_load_helper_32---of 8
bpf_skb_load_helper_32_no_cache---of 8
bpf_skb_load_helper_8---of 8
bpf_skb_load_helper_8_no_cache---of 8
bpf_skb_pull_data---of 3
bpf_skb_set_tstamp---of 8
bpf_skb_set_tunnel_key---of 28
bpf_skb_set_tunnel_opt---of 25
bpf_skb_store_bytes---of 19
bpf_skb_under_cgroup---of 11
bpf_skb_under_cgroup.cold---of 1
bpf_skb_vlan_pop---of 15
bpf_skb_vlan_push---of 17
bpf_skc_lookup_tcp---of 4
bpf_skc_to_mptcp_sock---of 1
bpf_skc_to_tcp6_sock---of 6
bpf_skc_to_tcp6_sock.cold---of 1
bpf_skc_to_tcp_request_sock---of 7
bpf_skc_to_tcp_sock---of 5
bpf_skc_to_tcp_sock.cold---of 1
bpf_skc_to_tcp_timewait_sock---of 7
bpf_skc_to_udp6_sock---of 6
bpf_skc_to_udp6_sock.cold---of 1
bpf_skc_to_unix_sock---of 5
bpf_skc_to_unix_sock.cold---of 1
bpf_sock_addr_getsockopt---of 7
bpf_sock_addr_getsockopt.cold---of 1
bpf_sock_addr_set_sun_path---of 4
bpf_sock_addr_setsockopt---of 7
bpf_sock_addr_setsockopt.cold---of 1
bpf_sock_addr_sk_lookup_tcp---of 1
bpf_sock_addr_sk_lookup_udp---of 1
bpf_sock_addr_skc_lookup_tcp---of 1
bpf_sock_common_is_valid_access---of 6
bpf_sock_convert_ctx_access---of 22
bpf_sock_destroy---of 5
bpf_sock_from_file---of 1
bpf_sock_is_valid_access---of 11
bpf_sock_is_valid_access.part.0---of 10
bpf_sock_ops_cb_flags_set---of 4
bpf_sock_ops_cb_flags_set.cold---of 1
bpf_sock_ops_get_syn.isra.0---of 29
bpf_sock_ops_getsockopt---of 17
bpf_sock_ops_getsockopt.cold---of 1
bpf_sock_ops_load_hdr_opt---of 21
bpf_sock_ops_reserve_hdr_opt---of 6
bpf_sock_ops_setsockopt---of 7
bpf_sock_ops_setsockopt.cold---of 1
bpf_sock_ops_store_hdr_opt---of 14
bpf_tc_sk_lookup_tcp---of 1
bpf_tc_sk_lookup_udp---of 1
bpf_tc_skc_lookup_tcp---of 1
bpf_tcp_check_syncookie---of 24
bpf_tcp_gen_syncookie---of 17
bpf_tcp_raw_check_syncookie_ipv4---of 2
bpf_tcp_raw_check_syncookie_ipv6---of 2
bpf_tcp_raw_gen_syncookie_ipv4---of 5
bpf_tcp_raw_gen_syncookie_ipv6---of 5
bpf_tcp_sock---of 4
bpf_tcp_sock.cold---of 1
bpf_tcp_sock_convert_ctx_access---of 28
bpf_tcp_sock_is_valid_access---of 8
bpf_unlocked_sk_getsockopt---of 1
bpf_unlocked_sk_setsockopt---of 1
bpf_warn_invalid_xdp_action---of 5
bpf_xdp_adjust_head---of 8
bpf_xdp_adjust_meta---of 7
bpf_xdp_adjust_tail---of 31
bpf_xdp_check_mtu---of 9
bpf_xdp_copy---of 1
bpf_xdp_copy_buf---of 10
bpf_xdp_event_output---of 7
bpf_xdp_fib_lookup---of 6
bpf_xdp_get_buff_len---of 4
bpf_xdp_load_bytes---of 5
bpf_xdp_pointer---of 13
bpf_xdp_redirect---of 4
bpf_xdp_redirect_map---of 1
bpf_xdp_sk_lookup_tcp---of 1
bpf_xdp_sk_lookup_udp---of 1
bpf_xdp_skc_lookup_tcp---of 1
bpf_xdp_sock_convert_ctx_access---of 3
bpf_xdp_sock_is_valid_access---of 5
bpf_xdp_store_bytes---of 5
btf_id_cmp_func---of 1
cg_skb_func_proto---of 6
cg_skb_is_valid_access---of 13
convert_bpf_ld_abs---of 25
copy_bpf_fprog_from_user---of 13
flow_dissector_convert_ctx_access---of 6
flow_dissector_func_proto---of 7
flow_dissector_is_valid_access---of 10
init_subsystem---of 1
lwt_in_func_proto---of 6
lwt_is_valid_access---of 12
lwt_out_func_proto---of 4
lwt_seg6local_func_proto---of 4
lwt_xmit_func_proto---of 6
neigh_hh_output---of 20
netkit_peer_dev---of 1
sk_attach_bpf---of 8
sk_attach_filter---of 10
sk_detach_filter---of 10
sk_filter_charge---of 17
sk_filter_func_proto---of 4
sk_filter_is_valid_access---of 8
sk_filter_release_rcu---of 6
sk_filter_trim_cap33%of 34
sk_filter_uncharge---of 5
sk_get_filter---of 16
sk_lookup.constprop.0---of 14
sk_lookup_convert_ctx_access---of 13
sk_lookup_func_proto---of 3
sk_lookup_is_valid_access---of 11
sk_msg_convert_ctx_access---of 13
sk_msg_func_proto---of 4
sk_msg_is_valid_access---of 11
sk_reuseport_attach_bpf---of 14
sk_reuseport_attach_filter---of 12
sk_reuseport_convert_ctx_access---of 13
sk_reuseport_func_proto---of 4
sk_reuseport_is_valid_access---of 15
sk_reuseport_load_bytes---of 10
sk_reuseport_load_bytes_relative---of 9
sk_reuseport_prog_free---of 7
sk_select_reuseport---of 21
sk_select_reuseport.cold---of 1
sk_skb_adjust_room---of 24
sk_skb_change_head---of 16
sk_skb_change_tail---of 1
sk_skb_convert_ctx_access---of 15
sk_skb_func_proto---of 4
sk_skb_is_valid_access---of 12
sk_skb_prologue---of 3
sk_skb_pull_data---of 3
skb_do_redirect---of 282
skb_do_redirect.cold---of 6
sock_addr_convert_ctx_access---of 67
sock_addr_func_proto---of 7
sock_addr_is_valid_access---of 26
sock_filter_func_proto---of 4
sock_filter_is_valid_access---of 10
sock_ops_convert_ctx_access---of 394
sock_ops_func_proto---of 4
sock_ops_is_valid_access---of 17
sol_ip_sockopt---of 7
sol_ipv6_sockopt---of 7
sol_socket_sockopt---of 8
sol_tcp_sockopt---of 32
tc_cls_act_btf_struct_access---of 3
tc_cls_act_convert_ctx_access---of 4
tc_cls_act_func_proto---of 6
tc_cls_act_is_valid_access---of 13
tc_cls_act_prologue---of 3
trace_xdp_redirect.constprop.0---of 15
trace_xdp_redirect_err---of 15
tracing_iter_filter---of 3
xdp_btf_struct_access---of 3
xdp_convert_ctx_access---of 11
xdp_do_check_flushed---of 3
xdp_do_flush---of 1
xdp_do_generic_redirect---of 18
xdp_do_redirect---of 23
xdp_do_redirect_frame---of 16
xdp_func_proto---of 4
xdp_is_valid_access---of 14
xdp_master_redirect---of 4
-----------
SUMMARY33%of 34

__register_chrdev---of 9
__register_chrdev_region---of 33
__unregister_chrdev---of 4
__unregister_chrdev_region---of 9
alloc_chrdev_region---of 5
base_probe---of 3
cd_forget---of 3
cdev_add---of 7
cdev_alloc---of 3
cdev_default_release100%of 1
cdev_del100%of 1
cdev_device_add---of 9
cdev_device_del100%of 2
cdev_dynamic_release100%of 1
cdev_init---of 1
cdev_purge40%of 5
cdev_put100%of 2
cdev_set_parent---of 3
chrdev_open66%of 26
chrdev_show---of 5
exact_lock60%of 5
exact_match100%of 1
register_chrdev_region---of 7
unregister_chrdev_region---of 2
-----------
SUMMARY69%of 44

-----------
SUMMARY---of 0

__irq_alloc_descs---of 33
__irq_get_desc_lock---of 10
__irq_put_desc_unlock---of 3
__irq_set_lockdep_class---of 2
actions_show---of 6
alloc_desc---of 12
chip_name_show---of 4
delayed_free_desc---of 1
free_desc---of 3
generic_handle_domain_irq---of 1
generic_handle_domain_irq_safe---of 8
generic_handle_domain_nmi---of 3
generic_handle_irq---of 1
generic_handle_irq_safe---of 8
handle_irq_desc---of 7
hwirq_show---of 3
irq_free_descs---of 5
irq_get_next_irq---of 4
irq_get_percpu_devid_partition---of 5
irq_insert_desc.part.0---of 1
irq_kobj_release---of 1
irq_lock_sparse---of 1
irq_set_percpu_devid---of 5
irq_set_percpu_devid_partition---of 6
irq_to_desc100%of 1
irq_unlock_sparse---of 1
kstat_incr_irq_this_cpu---of 1
kstat_irqs_cpu---of 5
kstat_irqs_usr---of 21
name_show---of 3
per_cpu_count_show---of 11
type_show---of 2
wakeup_show---of 2
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__addrconf_sysctl_register---of 14
__ipv6_chk_addr_and_flags---of 33
__ipv6_dev_get_saddr---of 22
__ipv6_ifa_notify37%of 41
__ipv6_isatap_ifid---of 13
__snmp6_fill_stats64.constprop.089%of 9
add_addr---of 11
addrconf_add_dev---of 10
addrconf_add_ifaddr---of 4
addrconf_add_linklocal---of 20
addrconf_add_mroute---of 1
addrconf_addr_gen.constprop.0---of 9
addrconf_cleanup---of 11
addrconf_dad_completed---of 47
addrconf_dad_failure---of 40
addrconf_dad_kick---of 20
addrconf_dad_run---of 8
addrconf_dad_start---of 13
addrconf_dad_stop---of 32
addrconf_dad_work---of 56
addrconf_del_ifaddr---of 4
addrconf_disable_policy_idev---of 27
addrconf_exit_net---of 9
addrconf_get_prefix_route---of 43
addrconf_ifdown.isra.050%of 102
addrconf_init_auto_addrs---of 75
addrconf_init_net---of 15
addrconf_join_solict---of 3
addrconf_leave_solict---of 3
addrconf_mod_dad_work---of 11
addrconf_notify4%of 131
addrconf_prefix_rcv---of 122
addrconf_prefix_rcv_add_addr---of 23
addrconf_prefix_route---of 5
addrconf_rs_timer---of 31
addrconf_set_dstaddr---of 11
addrconf_sysctl_addr_gen_mode---of 29
addrconf_sysctl_disable---of 23
addrconf_sysctl_disable_policy---of 22
addrconf_sysctl_forward---of 30
addrconf_sysctl_ignore_routes_with_linkdown---of 25
addrconf_sysctl_mtu---of 3
addrconf_sysctl_proxy_ndp---of 11
addrconf_sysctl_register---of 8
addrconf_sysctl_stable_secret---of 23
addrconf_sysctl_unregister100%of 3
addrconf_verify_rtnl---of 119
addrconf_verify_work---of 1
check_cleanup_prefix_route---of 18
check_cleanup_prefix_route.cold---of 1
cleanup_prefix_route---of 25
dev_disable_change---of 13
dev_forward_change---of 26
if6_proc_exit---of 1
if6_proc_net_exit---of 1
if6_proc_net_init---of 2
if6_seq_next---of 10
if6_seq_show---of 1
if6_seq_start---of 18
if6_seq_stop---of 6
in6_dump_addrs---of 84
inet6_addr_add---of 47
inet6_addr_del---of 32
inet6_dump_addr---of 40
inet6_dump_ifacaddr---of 1
inet6_dump_ifaddr---of 1
inet6_dump_ifinfo---of 35
inet6_dump_ifmcaddr---of 1
inet6_fill_ifaddr52%of 31
inet6_fill_ifinfo---of 24
inet6_fill_ifla6_attrs83%of 17
inet6_fill_link_af56%of 9
inet6_get_link_af_size50%of 8
inet6_ifa_finish_destroy62%of 13
inet6_ifinfo_notify---of 7
inet6_netconf_dump_devconf---of 35
inet6_netconf_fill_devconf32%of 22
inet6_netconf_get_devconf---of 66
inet6_netconf_notify_devconf64%of 11
inet6_rtm_deladdr---of 16
inet6_rtm_getaddr---of 57
inet6_rtm_newaddr---of 84
inet6_set_link_af---of 49
inet6_valid_dump_ifaddr_req.constprop.0---of 25
inet6_validate_link_af---of 18
ipv6_add_addr---of 107
ipv6_add_dev---of 67
ipv6_addr_prefix---of 2
ipv6_chk_addr---of 1
ipv6_chk_addr_and_flags---of 1
ipv6_chk_custom_prefix---of 28
ipv6_chk_custom_prefix.cold---of 1
ipv6_chk_prefix---of 27
ipv6_chk_prefix.cold---of 1
ipv6_chk_rpl_srh_loop---of 25
ipv6_count_addresses---of 13
ipv6_create_tempaddr.isra.0---of 57
ipv6_del_addr---of 41
ipv6_dev_find---of 1
ipv6_dev_get_saddr---of 40
ipv6_find_idev---of 15
ipv6_generate_eui64---of 21
ipv6_generate_stable_address---of 17
ipv6_get_ifaddr---of 30
ipv6_get_lladdr---of 25
ipv6_get_saddr_eval---of 39
ipv6_mc_config---of 7
manage_tempaddrs---of 14
modify_prefix_route---of 30
-----------
SUMMARY37%of 397

-----------
SUMMARY---of 0

arm_timer---of 6
bump_cpu_timer---of 10
bump_cpu_timer.cold---of 1
check_cpu_itimer---of 22
clear_posix_cputimers_work---of 1
collect_posix_cputimers---of 11
cpu_clock_sample---of 7
cpu_clock_sample_group---of 20
cpu_timer_fire---of 8
do_cpu_nanosleep---of 26
pid_for_clock---of 14
posix_cpu_clock_get---of 22
posix_cpu_clock_getres---of 20
posix_cpu_clock_set---of 16
posix_cpu_nsleep---of 7
posix_cpu_nsleep_restart---of 1
posix_cpu_timer_create---of 24
posix_cpu_timer_del---of 32
posix_cpu_timer_get---of 19
posix_cpu_timer_rearm---of 17
posix_cpu_timer_set---of 60
posix_cpu_timer_wait_running---of 26
posix_cpu_timers_exit---of 6
posix_cpu_timers_exit_group---of 6
posix_cpu_timers_work---of 42
posix_cputimers_group_init---of 2
process_cpu_clock_get---of 1
process_cpu_clock_getres---of 18
process_cpu_nsleep---of 5
process_cpu_timer_create---of 1
run_posix_cpu_timers31%of 26
set_process_cpu_timer---of 12
thread_cpu_clock_get---of 1
thread_cpu_clock_getres---of 18
thread_cpu_timer_create---of 1
thread_group_sample_cputime---of 3
update_rlimit_cpu---of 5
-----------
SUMMARY31%of 26

__ia32_compat_sys_old_select---of 3
__ia32_compat_sys_ppoll_time32---of 7
__ia32_compat_sys_ppoll_time64---of 7
__ia32_compat_sys_pselect6_time32---of 7
__ia32_compat_sys_pselect6_time64---of 7
__ia32_compat_sys_select---of 1
__ia32_sys_poll---of 8
__ia32_sys_ppoll---of 7
__ia32_sys_pselect6---of 7
__ia32_sys_select---of 1
__pollwait---of 10
__x64_compat_sys_old_select---of 3
__x64_compat_sys_ppoll_time32---of 7
__x64_compat_sys_ppoll_time64---of 7
__x64_compat_sys_pselect6_time32---of 8
__x64_compat_sys_pselect6_time64---of 8
__x64_compat_sys_select---of 1
__x64_sys_poll---of 8
__x64_sys_ppoll---of 7
__x64_sys_pselect6---of 8
__x64_sys_select---of 1
compat_core_sys_select---of 44
core_sys_select---of 61
do_compat_pselect---of 9
do_compat_select---of 5
do_pselect.constprop.0---of 7
do_restart_poll---of 6
do_select---of 94
do_sys_poll---of 54
kern_select---of 5
poll_freewait---of 7
poll_initwait---of 1
poll_schedule_timeout.constprop.0---of 3
poll_select_finish---of 26
poll_select_set_timeout---of 7
pollwake100%of 4
select_estimate_accuracy---of 8
-----------
SUMMARY100%of 4

__fib_validate_source---of 31
__inet_dev_addr_type---of 45
fib_add_ifaddr---of 15
fib_compute_spec_dst---of 26
fib_del_ifaddr---of 88
fib_flush---of 5
fib_get_table---of 9
fib_gw_from_via---of 15
fib_inetaddr_event---of 9
fib_info_nh_uses_dev---of 7
fib_lookup---of 33
fib_magic---of 9
fib_modify_prefix_metric---of 8
fib_net_exit---of 1
fib_net_exit_batch---of 3
fib_net_init---of 12
fib_netdev_event30%of 40
fib_new_table---of 21
fib_unmerge---of 23
fib_validate_source---of 14
inet_addr_type---of 1
inet_addr_type_dev_table---of 1
inet_addr_type_table---of 1
inet_dev_addr_type---of 1
inet_dump_fib---of 44
inet_rtm_delroute---of 17
inet_rtm_newroute---of 6
ip_fib_net_exit---of 10
ip_rt_ioctl---of 101
ip_valid_fib_dump_req---of 39
nexthop_uses_dev---of 23
nl_fib_input---of 7
nl_fib_lookup---of 23
rtm_to_fib_config---of 46
-----------
SUMMARY30%of 40

_atomic_dec_and_lock88%of 8
_atomic_dec_and_lock_irqsave63%of 8
_atomic_dec_and_raw_lock---of 8
_atomic_dec_and_raw_lock_irqsave---of 8
-----------
SUMMARY75%of 16

-----------
SUMMARY---of 0

of_parse_phandle---of 3
of_usb_get_dr_mode_by_phy---of 13
of_usb_host_tpl_support---of 1
of_usb_update_otg_caps---of 14
usb_decode_interval---of 20
usb_decode_interval.cold---of 2
usb_ep_type_string---of 3
usb_get_dr_mode---of 3
usb_get_maximum_speed---of 4
usb_get_maximum_ssp_rate---of 3
usb_get_role_switch_default_mode---of 3
usb_of_get_companion_dev---of 6
usb_otg_state_string---of 3
usb_speed_string100%of 2
usb_state_string---of 3
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

usb_led_activity75%of 4
-----------
SUMMARY75%of 4

add_uevent_var67%of 6
alloc_uevent_skb100%of 3
cleanup_uevent_env---of 1
kobject_synth_uevent---of 50
kobject_uevent100%of 1
kobject_uevent_env62%of 102
uevent_net_exit---of 5
uevent_net_init---of 8
uevent_net_rcv---of 1
uevent_net_rcv_skb---of 13
-----------
SUMMARY64%of 112

-----------
SUMMARY---of 0

fib_add_nexthop---of 8
fib_check_nh---of 32
fib_check_nh_v4_gw---of 87
fib_check_nh_v6_gw---of 10
fib_create_info---of 266
fib_create_info.cold---of 2
fib_detect_death---of 37
fib_dump_info---of 115
fib_get_nhs---of 57
fib_info_update_nhc_saddr---of 5
fib_metrics_match---of 15
fib_nexthop_info16%of 44
fib_nh_common_init---of 9
fib_nh_common_release---of 16
fib_nh_init---of 14
fib_nh_match---of 60
fib_nh_release---of 1
fib_nhc_update_mtu---of 10
fib_nlmsg_size---of 56
fib_rebalance---of 64
fib_release_info---of 41
fib_result_prefsrc---of 9
fib_select_multipath---of 88
fib_select_multipath.cold---of 2
fib_select_path---of 127
fib_sync_down_addr---of 12
fib_sync_down_addr.cold---of 1
fib_sync_down_dev6%of 55
fib_sync_mtu---of 6
fib_sync_up---of 57
free_fib_info---of 3
free_fib_info_rcu---of 27
ip_fib_check_default---of 12
rt_fibinfo_free_cpus.part.0---of 8
rtmsg_fib---of 10
-----------
SUMMARY11%of 99

-----------
SUMMARY---of 0

__attach_mnt---of 5
__attach_mnt.cold---of 1
__cleanup_mnt---of 1
__detach_mounts---of 13
__do_loopback---of 16
__do_sys_fsmount---of 34
__do_sys_listmount---of 51
__do_sys_mount_setattr---of 116
__do_sys_move_mount---of 50
__do_sys_pivot_root---of 56
__do_sys_statmount---of 109
__ia32_sys_fsmount---of 1
__ia32_sys_listmount---of 1
__ia32_sys_mount---of 15
__ia32_sys_mount_setattr---of 1
__ia32_sys_move_mount---of 1
__ia32_sys_oldumount---of 3
__ia32_sys_open_tree---of 21
__ia32_sys_pivot_root---of 1
__ia32_sys_statmount---of 1
__ia32_sys_umount---of 6
__is_local_mountpoint---of 9
__legitimize_mnt45%of 9
__lookup_mnt89%of 9
__lookup_mnt.cold---of 1
__mnt_is_readonly---of 3
__put_mountpoint.part.0---of 5
__x64_sys_fsmount---of 1
__x64_sys_listmount---of 1
__x64_sys_mount---of 15
__x64_sys_mount_setattr---of 1
__x64_sys_move_mount---of 1
__x64_sys_oldumount---of 3
__x64_sys_open_tree---of 21
__x64_sys_pivot_root---of 1
__x64_sys_statmount---of 1
__x64_sys_umount---of 6
alloc_mnt_ns---of 16
alloc_vfsmnt---of 10
attach_mnt---of 8
attach_recursive_mnt---of 66
attr_flags_to_mnt_flags---of 11
can_change_locked_flags.isra.0---of 13
cleanup_group_ids---of 11
cleanup_mnt---of 17
clone_mnt---of 38
clone_private_mount---of 13
collect_mounts---of 6
commit_tree---of 11
copy_mnt_id_req---of 12
copy_mnt_ns---of 54
copy_mount_options---of 9
copy_tree---of 34
count_mounts---of 12
current_chrooted---of 6
delayed_free_vfsmnt---of 1
delayed_mntput---of 2
dissolve_on_fput---of 5
do_add_mount---of 8
do_lock_mount---of 23
do_mount---of 4
do_move_mount---of 85
drop_collected_mounts---of 1
fc_mount---of 5
finish_automount---of 44
free_mnt_ns---of 9
from_mnt_ns---of 1
get_mountpoint---of 18
get_mountpoint.cold---of 1
graft_tree---of 7
invent_group_ids---of 12
is_path_reachable---of 7
iterate_mounts---of 5
kern_mount---of 4
kern_unmount---of 6
kern_unmount_array---of 11
lock_mnt_tree---of 16
lookup_mnt---of 37
lookup_mountpoint---of 9
lookup_mountpoint.cold---of 1
m_next---of 4
m_show---of 1
m_start---of 8
m_stop---of 1
mark_mounts_for_expiry---of 15
may_mount---of 1
may_umount---of 1
may_umount_tree---of 10
mnt_add_to_ns---of 10
mnt_change_mountpoint---of 13
mnt_clone_internal---of 3
mnt_drop_write50%of 12
mnt_drop_write_file54%of 13
mnt_get_count100%of 7
mnt_get_write_access78%of 9
mnt_get_write_access_file67%of 6
mnt_get_writers---of 7
mnt_make_shortterm---of 2
mnt_may_suid---of 4
mnt_put_write_access100%of 2
mnt_put_write_access_file34%of 3
mnt_release_group_id---of 1
mnt_set_expiry---of 3
mnt_set_mountpoint---of 3
mnt_want_write32%of 22
mnt_want_write_file40%of 25
mnt_warn_timestamp_expiry---of 9
mntget100%of 3
mntns_get---of 7
mntns_install---of 14
mntns_owner---of 1
mntns_put---of 1
mntput75%of 4
mntput_no_expire28%of 44
mount_subtree---of 13
mount_too_revealing---of 28
namespace_unlock---of 18
open_detached_copy---of 19
our_mnt---of 1
path_is_mountpoint---of 24
path_is_under---of 7
path_mount---of 131
path_umount---of 62
put_mnt_ns---of 5
sb_prepare_remount_readonly---of 15
show_path---of 5
umount_tree---of 49
unhash_mnt---of 11
vfs_create_mount---of 11
vfs_kern_mount---of 3
vfs_kern_mount.part.0---of 10
vfs_submount---of 4
-----------
SUMMARY49%of 168

get_option---of 13
get_options---of 18
memparse---of 11
next_arg---of 23
parse_option_str14%of 15
-----------
SUMMARY14%of 15

-----------
SUMMARY---of 0

__hidinput_change_resolution_multipliers.part.0---of 16
hid_map_usage.isra.0---of 12
hid_map_usage_clear.isra.0---of 9
hid_report_release_tool---of 2
hidinput_calc_abs_res---of 23
hidinput_close100%of 1
hidinput_configure_usage---of 427
hidinput_connect---of 129
hidinput_count_leds---of 11
hidinput_disconnect90%of 10
hidinput_get_battery_property---of 18
hidinput_get_led_field---of 9
hidinput_getkeycode80%of 5
hidinput_hid_event20%of 111
hidinput_input_event20%of 15
hidinput_led_worker---of 7
hidinput_locate_usage42%of 29
hidinput_open---of 1
hidinput_query_battery_capacity---of 9
hidinput_report_event100%of 3
hidinput_setkeycode95%of 19
hidinput_setup_battery.isra.0---of 27
-----------
SUMMARY38%of 193

pcmidi_in_close---of 4
pcmidi_in_open---of 3
pcmidi_in_trigger---of 3
pcmidi_send_note---of 4
pcmidi_snd_free---of 1
pcmidi_sustained_note_release---of 1
pk_input_mapping---of 7
pk_probe---of 55
pk_raw_event---of 39
pk_remove50%of 4
pk_report_fixup---of 6
show_channel---of 3
show_octave---of 3
show_sustain---of 3
store_channel---of 6
store_octave---of 6
store_sustain---of 8
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

dev_add_physical_location23%of 9
dock_show---of 2
horizontal_position_show---of 3
lid_show---of 2
panel_show---of 3
vertical_position_show---of 3
-----------
SUMMARY23%of 9

-----------
SUMMARY---of 0

vprbrd_disconnect50%of 2
vprbrd_probe---of 8
-----------
SUMMARY50%of 2

-----------
SUMMARY---of 0

task_work_add59%of 12
task_work_cancel---of 1
task_work_cancel_match---of 9
task_work_func_match---of 1
task_work_run100%of 9
-----------
SUMMARY77%of 21

ghl_init_urb---of 4
ghl_magic_poke---of 2
ghl_magic_poke_cb---of 3
motion_send_output_report---of 1
sixaxis_parse_report.constprop.0---of 6
sixaxis_send_output_report---of 10
sixaxis_set_operational_bt---of 3
sixaxis_set_operational_usb---of 10
sony_battery_get_property---of 6
sony_input_configured---of 142
sony_led_blink_set---of 24
sony_led_get_brightness---of 7
sony_led_set_brightness---of 10
sony_mapping---of 74
sony_play_effect---of 7
sony_probe---of 29
sony_raw_event---of 31
sony_register_sensors---of 15
sony_remove55%of 11
sony_report_fixup---of 22
sony_resume---of 4
sony_set_leds---of 10
sony_state_worker---of 1
sony_suspend---of 3
-----------
SUMMARY55%of 11

__delete_and_unsubscribe_port---of 19
check_and_subscribe_port---of 38
clear_subscriber_list12%of 18
get_client_port---of 5
port_delete58%of 7
snd_seq_create_port---of 22
snd_seq_delete_all_ports50%of 16
snd_seq_delete_port69%of 16
snd_seq_event_port_attach---of 9
snd_seq_event_port_detach100%of 1
snd_seq_get_port_info---of 13
snd_seq_port_connect---of 7
snd_seq_port_disconnect---of 22
snd_seq_port_get_subscription---of 9
snd_seq_port_query_nearest---of 14
snd_seq_port_use_ptr100%of 9
snd_seq_set_port_info---of 15
-----------
SUMMARY53%of 67

__bpf_trace_notifier_info---of 1
__traceiter_notifier_register---of 3
__traceiter_notifier_run---of 3
__traceiter_notifier_unregister---of 3
atomic_notifier_call_chain46%of 11
atomic_notifier_call_chain_is_empty---of 1
atomic_notifier_chain_register---of 1
atomic_notifier_chain_register_unique_prio---of 1
atomic_notifier_chain_unregister---of 1
blocking_notifier_call_chain100%of 3
blocking_notifier_call_chain_robust---of 5
blocking_notifier_chain_register---of 5
blocking_notifier_chain_register_unique_prio---of 5
blocking_notifier_chain_unregister60%of 5
notifier_call_chain54%of 26
notifier_chain_register---of 23
notifier_chain_unregister48%of 19
notify_die---of 6
perf_trace_notifier_info---of 5
raw_notifier_call_chain100%of 1
raw_notifier_call_chain_robust---of 3
raw_notifier_chain_register---of 1
raw_notifier_chain_unregister---of 1
register_die_notifier---of 1
srcu_init_notifier_head---of 3
srcu_notifier_call_chain---of 3
srcu_notifier_chain_register---of 5
srcu_notifier_chain_unregister---of 5
trace_event_raw_event_notifier_info---of 6
trace_raw_output_notifier_info---of 5
unregister_die_notifier---of 1
-----------
SUMMARY54%of 65

xpad360_process_packet.part.0---of 17
xpad360w_start_input---of 5
xpad_close100%of 1
xpad_disconnect---of 12
xpad_init_input---of 45
xpad_irq_in---of 79
xpad_irq_out---of 12
xpad_led_set---of 4
xpad_open100%of 1
xpad_play_effect---of 11
xpad_prepare_next_out_packet---of 19
xpad_presence_work---of 8
xpad_probe---of 68
xpad_resume---of 9
xpad_set_up_abs---of 8
xpad_start_input70%of 10
xpad_start_xbox_one---of 4
xpad_suspend---of 12
xpad_try_sending_next_out_packet---of 6
-----------
SUMMARY75%of 12

a_alt_hnp_support_show---of 1
a_hnp_support_show---of 1
b_hnp_enable_show---of 1
current_speed_show---of 1
function_show---of 4
gadget_bind_driver49%of 35
gadget_find_ep_by_name---of 4
gadget_match_driver100%of 7
gadget_unbind_driver73%of 11
is_a_peripheral_show---of 1
is_otg_show---of 1
is_selfpowered_show---of 1
maximum_speed_show---of 1
soft_connect_store---of 16
srp_store---of 4
state_show---of 1
usb_add_gadget---of 16
usb_add_gadget_udc---of 4
usb_add_gadget_udc_release---of 4
usb_del_gadget---of 4
usb_del_gadget_udc---of 1
usb_ep_alloc_request42%of 17
usb_ep_clear_halt38%of 16
usb_ep_dequeue---of 16
usb_ep_disable48%of 19
usb_ep_enable50%of 20
usb_ep_fifo_flush---of 17
usb_ep_fifo_status---of 18
usb_ep_free_request38%of 16
usb_ep_queue43%of 21
usb_ep_set_halt38%of 16
usb_ep_set_maxpacket_limit---of 15
usb_ep_set_wedge43%of 19
usb_gadget_activate---of 19
usb_gadget_check_config---of 3
usb_gadget_clear_selfpowered---of 18
usb_gadget_connect---of 1
usb_gadget_connect_locked53%of 23
usb_gadget_deactivate---of 21
usb_gadget_disconnect---of 1
usb_gadget_disconnect_locked58%of 26
usb_gadget_ep_match_desc69%of 19
usb_gadget_frame_number---of 16
usb_gadget_giveback_request45%of 18
usb_gadget_map_request---of 1
usb_gadget_map_request_by_dev---of 24
usb_gadget_register_driver_owner72%of 14
usb_gadget_set_remote_wakeup---of 18
usb_gadget_set_selfpowered---of 18
usb_gadget_set_state100%of 1
usb_gadget_state_work---of 2
usb_gadget_udc_reset100%of 1
usb_gadget_unmap_request---of 8
usb_gadget_unmap_request_by_dev---of 8
usb_gadget_unregister_driver100%of 4
usb_gadget_vbus_connect---of 18
usb_gadget_vbus_disconnect---of 18
usb_gadget_vbus_draw37%of 19
usb_gadget_wakeup---of 18
usb_get_gadget_udc_name---of 5
usb_initialize_gadget---of 2
usb_udc_release---of 5
usb_udc_uevent56%of 9
usb_udc_vbus_handler---of 2
vbus_event_work---of 4
-----------
SUMMARY52%of 331

__netpoll_cleanup---of 14
__netpoll_free---of 4
__netpoll_setup---of 23
netpoll_cleanup---of 6
netpoll_parse_ip_addr---of 6
netpoll_parse_options---of 30
netpoll_poll_dev---of 41
netpoll_poll_disable55%of 11
netpoll_poll_enable45%of 18
netpoll_print_options---of 7
netpoll_send_skb---of 47
netpoll_send_udp---of 18
netpoll_setup---of 54
netpoll_start_xmit---of 20
queue_process---of 32
rcu_cleanup_netpoll_info---of 4
refill_skbs---of 4
zap_completion_queue---of 19
-----------
SUMMARY49%of 29

-----------
SUMMARY---of 0

__get_acl7%of 29
__posix_acl_chmod---of 22
__posix_acl_create---of 14
acl_by_type.part.0---of 1
do_get_acl---of 16
do_set_acl---of 11
forget_all_cached_acls---of 14
forget_cached_acl---of 11
get_cached_acl26%of 39
get_cached_acl_rcu---of 13
get_inode_acl---of 1
posix_acl_alloc---of 3
posix_acl_chmod---of 14
posix_acl_clone---of 5
posix_acl_create15%of 34
posix_acl_create_masq---of 11
posix_acl_equiv_mode---of 11
posix_acl_from_mode---of 6
posix_acl_from_xattr---of 18
posix_acl_init---of 1
posix_acl_listxattr---of 7
posix_acl_permission---of 22
posix_acl_to_xattr---of 8
posix_acl_update_mode---of 8
posix_acl_valid---of 16
posix_acl_xattr_list---of 1
set_cached_acl36%of 17
set_posix_acl---of 10
simple_acl_create29%of 14
simple_set_acl---of 6
vfs_get_acl---of 9
vfs_remove_acl---of 30
vfs_set_acl---of 37
-----------
SUMMARY21%of 133

-----------
SUMMARY---of 0

snd_lookup_minor_data---of 8
snd_minor_info_read---of 15
snd_open---of 21
snd_register_device---of 18
snd_request_card---of 4
snd_unregister_device100%of 6
-----------
SUMMARY100%of 6

__fib6_clean_all58%of 14
__fib6_drop_pcpu_from.part.040%of 15
__fib6_update_sernum_upto_root---of 12
call_fib6_entry_notifiers---of 1
call_fib6_entry_notifiers_replace---of 1
call_fib6_multipath_entry_notifiers---of 1
fib6_add---of 307
fib6_add.cold---of 1
fib6_clean_all100%of 1
fib6_clean_all_skip_notify---of 1
fib6_clean_node60%of 20
fib6_clean_tree100%of 1
fib6_del45%of 85
fib6_dump_done---of 9
fib6_dump_node---of 8
fib6_dump_table.isra.0---of 11
fib6_find_prefix.part.0---of 28
fib6_flush_trees100%of 1
fib6_force_start_gc---of 2
fib6_gc_cleanup---of 1
fib6_gc_timer_cb---of 1
fib6_get_table---of 1
fib6_info_alloc---of 4
fib6_info_destroy_rcu---of 16
fib6_locate---of 35
fib6_locate.cold---of 1
fib6_lookup---of 1
fib6_metric_set---of 5
fib6_net_exit---of 5
fib6_net_init---of 10
fib6_new_sernum100%of 5
fib6_new_table---of 1
fib6_nh_drop_pcpu_from---of 3
fib6_node_dump---of 6
fib6_node_lookup---of 46
fib6_node_lookup.cold---of 1
fib6_purge_rt16%of 45
fib6_repair_tree.part.0---of 98
fib6_rule_lookup---of 18
fib6_run_gc42%of 36
fib6_tables_dump---of 7
fib6_tables_seq_read---of 13
fib6_update_sernum---of 7
fib6_update_sernum_stub---of 1
fib6_update_sernum_upto_root---of 1
fib6_walk100%of 7
fib6_walk_continue86%of 28
inet6_dump_fib---of 47
ipv6_route_seq_next---of 23
ipv6_route_seq_next_table---of 13
ipv6_route_seq_setup_walk---of 3
ipv6_route_seq_show---of 28
ipv6_route_seq_start---of 8
ipv6_route_seq_stop---of 15
ipv6_route_yield---of 10
node_free_rcu---of 1
-----------
SUMMARY49%of 258

__page_table_check_pmd_clear---of 34
__page_table_check_pmd_set---of 9
__page_table_check_pte_clear54%of 32
__page_table_check_pte_clear_range---of 43
__page_table_check_ptes_set24%of 39
__page_table_check_pud_clear---of 6
__page_table_check_pud_set---of 10
__page_table_check_zero44%of 39
__page_table_check_zero.cold---of 1
page_table_check_clear---of 30
page_table_check_clear.part.036%of 48
page_table_check_set44%of 78
-----------
SUMMARY40%of 236

__fortify_panic---of 1
__fortify_report---of 2
__sysfs_match_string---of 5
devm_kasprintf_strarray---of 5
devm_kfree_strarray---of 4
kasprintf_strarray---of 9
kfree_strarray---of 4
kstrdup_and_replace---of 6
kstrdup_quotable---of 6
kstrdup_quotable_cmdline---of 11
kstrdup_quotable_file---of 10
match_string---of 5
memcpy_and_pad---of 3
parse_int_array_user---of 8
skip_spaces67%of 3
strim---of 7
string_escape_mem---of 86
string_get_size---of 23
string_unescape---of 31
strreplace---of 5
sysfs_streq---of 17
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

ms_event---of 15
ms_ff_worker---of 2
ms_input_mapped---of 4
ms_input_mapping---of 66
ms_play_effect---of 4
ms_probe---of 16
ms_remove50%of 2
ms_report_fixup---of 6
-----------
SUMMARY50%of 2

__drm_connector_init---of 37
__drm_connector_put_safe63%of 8
drm_connector_atomic_hdr_metadata_equal---of 6
drm_connector_attach_colorspace_property---of 1
drm_connector_attach_content_type_property---of 5
drm_connector_attach_dp_subconnector_property---of 3
drm_connector_attach_edid_property---of 1
drm_connector_attach_encoder---of 6
drm_connector_attach_encoder.cold---of 1
drm_connector_attach_hdr_output_metadata_property---of 1
drm_connector_attach_max_bpc_property---of 5
drm_connector_attach_privacy_screen_properties---of 2
drm_connector_attach_privacy_screen_provider---of 4
drm_connector_attach_scaling_mode_property---of 11
drm_connector_attach_tv_margin_properties---of 1
drm_connector_attach_vrr_capable_property---of 4
drm_connector_cleanup---of 27
drm_connector_cleanup_action---of 1
drm_connector_create_privacy_screen_properties---of 2
drm_connector_create_standard_properties---of 9
drm_connector_find_by_fwnode---of 3
drm_connector_find_by_fwnode.part.0---of 7
drm_connector_free---of 1
drm_connector_free_work_fn---of 2
drm_connector_has_possible_encoder---of 2
drm_connector_has_possible_encoder.cold---of 1
drm_connector_ida_destroy---of 1
drm_connector_ida_init---of 1
drm_connector_init---of 8
drm_connector_init_with_ddc---of 8
drm_connector_list_iter_begin100%of 1
drm_connector_list_iter_end67%of 3
drm_connector_list_iter_next83%of 17
drm_connector_oob_hotplug_event---of 5
drm_connector_privacy_screen_notifier---of 1
drm_connector_property_set_ioctl---of 1
drm_connector_register---of 3
drm_connector_register.part.0---of 12
drm_connector_register_all---of 11
drm_connector_set_link_status_property---of 1
drm_connector_set_obj_prop---of 7
drm_connector_set_orientation_from_panel---of 10
drm_connector_set_panel_orientation---of 7
drm_connector_set_panel_orientation_with_quirk---of 11
drm_connector_set_path_property---of 1
drm_connector_set_tile_property---of 8
drm_connector_set_vrr_capable_property---of 2
drm_connector_unregister72%of 7
drm_connector_unregister_all84%of 6
drm_connector_update_privacy_screen---of 4
drm_display_info_set_bus_formats---of 6
drm_get_colorspace_name---of 4
drm_get_connector_force_name---of 3
drm_get_connector_status_name---of 3
drm_get_connector_type_name---of 3
drm_get_dp_subconnector_name---of 4
drm_get_dpms_name---of 7
drm_get_dvi_i_select_name---of 6
drm_get_dvi_i_subconnector_name---of 6
drm_get_subpixel_order_name---of 1
drm_get_tv_mode_from_name---of 5
drm_get_tv_mode_name---of 4
drm_get_tv_select_name---of 8
drm_get_tv_subconnector_name---of 8
drm_mode_create_aspect_ratio_property---of 3
drm_mode_create_colorspace_property---of 11
drm_mode_create_content_type_property---of 3
drm_mode_create_dp_colorspace_property---of 3
drm_mode_create_dvi_i_properties---of 4
drm_mode_create_hdmi_colorspace_property---of 3
drm_mode_create_scaling_mode_property---of 4
drm_mode_create_suggested_offset_properties---of 5
drm_mode_create_tile_group---of 6
drm_mode_create_tv_margin_properties---of 6
drm_mode_create_tv_properties---of 12
drm_mode_create_tv_properties_legacy---of 15
drm_mode_get_tile_group---of 17
drm_mode_getconnector---of 55
drm_mode_getconnector.cold---of 1
drm_mode_put_tile_group---of 5
drmm_connector_init---of 9
-----------
SUMMARY77%of 42

-----------
SUMMARY---of 0

sysfs_create_dir_ns59%of 12
sysfs_create_mount_point---of 6
sysfs_move_dir_ns---of 4
sysfs_remove_dir75%of 4
sysfs_remove_mount_point---of 1
sysfs_rename_dir_ns---of 1
sysfs_warn_dup---of 3
-----------
SUMMARY63%of 16

usb_acecad_close---of 1
usb_acecad_disconnect100%of 1
usb_acecad_irq---of 10
usb_acecad_open100%of 2
usb_acecad_probe---of 55
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

__ia32_sys_readahead---of 1
__x64_sys_readahead---of 1
const_folio_flags.constprop.0---of 10
file_ra_state_init100%of 1
folio_flags.constprop.0---of 10
force_page_cache_ra---of 9
ksys_readahead---of 9
ondemand_readahead---of 84
ondemand_readahead.cold---of 2
page_cache_async_ra---of 4
page_cache_ra_order---of 5
page_cache_ra_unbounded---of 17
page_cache_sync_ra---of 11
read_pages---of 42
readahead_expand---of 30
-----------
SUMMARY100%of 1

usb_notify_add_bus---of 1
usb_notify_add_device---of 1
usb_notify_remove_bus---of 1
usb_notify_remove_device100%of 1
usb_register_notify---of 1
usb_unregister_notify---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__async_dev_cache_fw_image---of 11
_request_firmware---of 63
alloc_fw_cache_entry---of 4
alloc_lookup_fw_priv---of 28
assign_fw---of 28
dev_cache_fw_image---of 15
dev_create_fw_entry---of 3
device_uncache_fw_images_work---of 16
devm_name_match---of 1
firmware_request_cache---of 1
firmware_request_nowarn---of 1
firmware_request_platform---of 1
free_fw_priv---of 12
fw_add_devm_name---of 6
fw_devm_match---of 4
fw_name_devm_release75%of 4
fw_pm_notify---of 7
fw_set_page_data---of 2
fw_shutdown_notify---of 1
fw_state_init---of 1
fw_suspend---of 1
release_firmware---of 6
request_firmware---of 1
request_firmware_direct---of 1
request_firmware_into_buf---of 3
request_firmware_nowait---of 14
request_firmware_work_func---of 1
request_partial_firmware_into_buf---of 3
-----------
SUMMARY75%of 4

-----------
SUMMARY---of 0

__scsi_host_busy_iter_fn---of 1
__scsi_host_match---of 1
complete_all_cmds_iter---of 1
scsi_add_host_with_dma---of 44
scsi_exit_hosts---of 1
scsi_flush_work---of 3
scsi_host_alloc---of 23
scsi_host_busy---of 1
scsi_host_busy_iter---of 1
scsi_host_check_in_flight---of 3
scsi_host_cls_release100%of 1
scsi_host_complete_all_commands---of 1
scsi_host_dev_release25%of 16
scsi_host_get---of 4
scsi_host_lookup---of 5
scsi_host_put100%of 1
scsi_host_set_state---of 11
scsi_init_hosts---of 1
scsi_is_host_device---of 1
scsi_queue_work---of 5
scsi_remove_host48%of 19
-----------
SUMMARY41%of 37

picolcd_exit_framebuffer50%of 2
picolcd_fb_blank---of 1
picolcd_fb_check_var---of 3
picolcd_fb_deferred_io---of 49
picolcd_fb_destroy---of 3
picolcd_fb_refresh---of 2
picolcd_fb_reset---of 18
picolcd_fb_update_rate_show---of 10
picolcd_fb_update_rate_store---of 8
picolcd_init_framebuffer---of 17
picolcd_set_par---of 13
picolcdfb_ops_defio_copyarea---of 2
picolcdfb_ops_defio_fillrect---of 2
picolcdfb_ops_defio_imageblit---of 2
picolcdfb_ops_defio_read---of 1
picolcdfb_ops_defio_write---of 4
-----------
SUMMARY50%of 2

__acpi_device_modalias.part.0---of 9
__acpi_device_uevent_modalias23%of 9
acpi_data_node_attr_show---of 3
acpi_data_node_release---of 1
acpi_device_modalias---of 4
acpi_device_remove_files---of 22
acpi_device_setup_files---of 28
acpi_device_uevent_modalias100%of 1
acpi_expose_nondev_subnodes---of 7
acpi_hide_nondev_subnodes---of 27
adr_show---of 5
create_of_modalias---of 13
create_pnp_modalias---of 13
data_node_show_path---of 6
description_show---of 3
eject_store---of 14
hid_show---of 1
hrv_show---of 3
modalias_show---of 4
path_show---of 4
power_state_show---of 1
real_power_state_show---of 4
status_show---of 3
sun_show---of 3
uid_show---of 1
-----------
SUMMARY30%of 10

__iommu_attach_device---of 19
__iommu_attach_group---of 9
__iommu_device_set_domain---of 13
__iommu_domain_alloc---of 19
__iommu_domain_alloc_dev---of 12
__iommu_group_free_device.constprop.0---of 20
__iommu_group_remove_device---of 11
__iommu_group_set_domain_internal---of 18
__iommu_map---of 32
__iommu_map.cold---of 1
__iommu_probe_device---of 62
__iommu_probe_device.cold---of 1
__iommu_release_dma_ownership---of 6
__iommu_take_dma_ownership---of 12
__iommu_unmap---of 32
__iommu_unmap.cold---of 1
bus_iommu_probe---of 12
dev_iommu_get---of 9
dev_iommu_priv_set---of 4
device_iommu_capable---of 6
fsl_mc_device_group---of 7
generic_device_group---of 1
generic_single_device_group---of 5
get_pci_alias_group---of 11
get_pci_alias_or_group---of 3
get_pci_function_alias_group---of 10
iommu_alloc_global_pasid---of 5
iommu_alloc_resv_region---of 6
iommu_attach_device---of 7
iommu_attach_device_pasid---of 23
iommu_attach_group---of 1
iommu_bus_notifier46%of 11
iommu_create_device_direct_mappings---of 34
iommu_create_device_direct_mappings.cold---of 1
iommu_default_passthrough---of 1
iommu_deferred_attach---of 5
iommu_deinit_device---of 18
iommu_detach_device---of 11
iommu_detach_device_pasid---of 5
iommu_detach_group---of 4
iommu_dev_disable_feature---of 6
iommu_dev_enable_feature---of 6
iommu_device_claim_dma_owner---of 9
iommu_device_register---of 11
iommu_device_release_dma_owner---of 4
iommu_device_unregister---of 5
iommu_device_unuse_default_domain17%of 6
iommu_device_use_default_domain---of 10
iommu_domain_alloc---of 13
iommu_domain_free---of 5
iommu_enable_nesting---of 4
iommu_free_global_pasid---of 3
iommu_fwspec_add_ids---of 10
iommu_fwspec_free---of 3
iommu_fwspec_init---of 7
iommu_get_dma_domain---of 1
iommu_get_domain_for_dev---of 3
iommu_get_domain_for_dev_pasid---of 6
iommu_get_group_resv_regions---of 49
iommu_get_resv_regions---of 2
iommu_group_add_device---of 6
iommu_group_alloc---of 14
iommu_group_alloc_device---of 29
iommu_group_attr_show---of 3
iommu_group_attr_store---of 3
iommu_group_claim_dma_owner---of 6
iommu_group_default_domain---of 1
iommu_group_dma_owner_claimed---of 1
iommu_group_first_dev---of 4
iommu_group_for_each_dev---of 4
iommu_group_get---of 3
iommu_group_get_iommudata---of 1
iommu_group_has_isolated_msi---of 3
iommu_group_id---of 1
iommu_group_mutex_assert---of 3
iommu_group_put---of 2
iommu_group_ref_get---of 1
iommu_group_release---of 9
iommu_group_release_dma_owner---of 1
iommu_group_remove_device---of 2
iommu_group_replace_domain---of 3
iommu_group_set_iommudata---of 1
iommu_group_set_name---of 7
iommu_group_show_name---of 1
iommu_group_show_resv_regions---of 4
iommu_group_show_type---of 3
iommu_group_store_type---of 27
iommu_iova_to_phys---of 4
iommu_map---of 9
iommu_map_sg---of 23
iommu_ops_from_fwnode---of 5
iommu_pgsize---of 13
iommu_pgsize.cold---of 5
iommu_present---of 7
iommu_probe_device---of 4
iommu_put_resv_regions---of 5
iommu_set_default_passthrough---of 3
iommu_set_default_translated---of 3
iommu_set_dma_strict---of 2
iommu_set_fault_handler---of 3
iommu_set_pgtable_quirks---of 4
iommu_setup_default_domain---of 84
iommu_unmap---of 3
iommu_unmap_fast---of 1
pci_device_group---of 14
probe_iommu_group---of 2
remove_iommu_group---of 8
report_iommu_fault---of 18
-----------
SUMMARY36%of 17

__bpf_trace_cpuhp_enter---of 1
__bpf_trace_cpuhp_exit---of 1
__bpf_trace_cpuhp_multi_enter---of 1
__cpu_down_maps_locked---of 1
__cpu_hotplug_enable---of 4
__cpuhp_invoke_callback_range---of 6
__cpuhp_remove_state---of 20
__cpuhp_remove_state_cpuslocked---of 15
__cpuhp_setup_state---of 21
__cpuhp_setup_state_cpuslocked---of 31
__cpuhp_state_add_instance---of 22
__cpuhp_state_add_instance_cpuslocked---of 17
__cpuhp_state_remove_instance---of 35
__traceiter_cpuhp_enter---of 3
__traceiter_cpuhp_exit---of 3
__traceiter_cpuhp_multi_enter---of 3
_cpu_down---of 38
_cpu_up---of 30
active_show---of 2
add_cpu---of 1
bringup_hibernate_cpu---of 6
clear_tasks_mm_cpumask---of 22
control_show---of 6
control_store---of 24
cpu_device_down---of 1
cpu_device_up---of 1
cpu_down_maps_locked---of 8
cpu_hotplug_disable---of 1
cpu_hotplug_enable---of 1
cpu_hotplug_pm_callback---of 5
cpu_maps_update_begin---of 1
cpu_maps_update_done---of 1
cpu_mitigations_auto_nosmt---of 1
cpu_mitigations_off---of 1
cpu_smt_possible---of 1
cpu_up---of 18
cpuhp_ap_report_dead---of 1
cpuhp_ap_sync_alive---of 3
cpuhp_bringup_ap---of 20
cpuhp_complete_idle_dead---of 1
cpuhp_invoke_callback---of 60
cpuhp_issue_call---of 28
cpuhp_kick_ap---of 24
cpuhp_kick_ap_alive---of 7
cpuhp_kick_ap_work---of 16
cpuhp_next_state---of 7
cpuhp_online_idle---of 2
cpuhp_report_idle_dead---of 5
cpuhp_reset_state---of 15
cpuhp_rollback_install---of 9
cpuhp_should_run---of 1
cpuhp_smt_disable---of 13
cpuhp_smt_enable---of 10
cpuhp_thread_fun---of 25
cpuhp_wait_for_sync_state---of 10
cpus_read_lock50%of 10
cpus_read_trylock---of 13
cpus_read_unlock50%of 10
cpus_write_lock---of 1
cpus_write_unlock---of 1
fail_show---of 1
fail_store---of 13
finish_cpu---of 6
freeze_secondary_cpus---of 26
init_cpu_online---of 1
init_cpu_possible---of 1
init_cpu_present---of 1
lockdep_assert_cpus_held---of 4
lockdep_is_cpus_held100%of 1
notify_cpu_starting---of 3
perf_trace_cpuhp_enter---of 5
perf_trace_cpuhp_exit---of 5
perf_trace_cpuhp_multi_enter---of 5
remove_cpu---of 1
set_cpu_online---of 9
smp_shutdown_nonboot_cpus---of 19
state_show---of 1
states_show---of 5
take_cpu_down---of 5
takedown_cpu---of 18
target_show---of 1
target_store---of 18
thaw_secondary_cpus---of 11
trace_cpuhp_exit---of 15
trace_cpuhp_multi_enter---of 15
trace_event_raw_event_cpuhp_enter---of 6
trace_event_raw_event_cpuhp_exit---of 6
trace_event_raw_event_cpuhp_multi_enter---of 6
trace_raw_output_cpuhp_enter---of 5
trace_raw_output_cpuhp_exit---of 5
trace_raw_output_cpuhp_multi_enter---of 5
trace_suspend_resume---of 15
-----------
SUMMARY53%of 21

ma901_set_stereo---of 5
usb_ma901radio_disconnect100%of 1
usb_ma901radio_probe---of 22
usb_ma901radio_release100%of 1
usb_ma901radio_resume---of 1
usb_ma901radio_s_ctrl---of 4
usb_ma901radio_suspend---of 1
vidioc_g_frequency---of 3
vidioc_g_tuner---of 3
vidioc_querycap---of 1
vidioc_s_frequency---of 4
vidioc_s_tuner---of 6
-----------
SUMMARY100%of 2

__sysvec_call_function---of 29
__sysvec_call_function_single32%of 29
__sysvec_reboot---of 1
fred_sysvec_call_function---of 4
fred_sysvec_call_function_single---of 4
fred_sysvec_reboot---of 4
fred_sysvec_reschedule_ipi---of 5
native_stop_other_cpus---of 35
smp_stop_nmi_callback---of 3
trace_reschedule_entry.constprop.034%of 15
trace_reschedule_exit.constprop.034%of 15
-----------
SUMMARY33%of 59

device_add_software_node---of 26
device_create_managed_software_node---of 15
device_remove_software_node9%of 12
fwnode_create_software_node---of 22
fwnode_remove_software_node---of 5
is_software_node---of 4
property_entries_dup---of 3
property_entries_dup.part.0---of 44
property_entries_free---of 4
property_entry_find---of 14
property_entry_free_data---of 10
property_entry_read_int_array---of 14
software_node_find_by_name---of 11
software_node_fwnode---of 8
software_node_get---of 5
software_node_get_name---of 5
software_node_get_name_prefix---of 8
software_node_get_named_child_node---of 10
software_node_get_next_child---of 32
software_node_get_parent---of 8
software_node_get_reference_args---of 31
software_node_graph_get_next_endpoint---of 25
software_node_graph_get_port_parent---of 14
software_node_graph_get_remote_endpoint---of 25
software_node_graph_parse_endpoint---of 9
software_node_notify7%of 15
software_node_notify_remove8%of 13
software_node_property_present---of 10
software_node_put---of 6
software_node_read_int_array---of 5
software_node_read_string_array---of 16
software_node_register---of 19
software_node_register_node_group---of 7
software_node_release---of 12
software_node_unregister---of 10
software_node_unregister_node_group---of 2
software_node_unregister_node_group.part.0---of 14
swnode_graph_find_next_port---of 6
swnode_register---of 15
to_software_node---of 7
-----------
SUMMARY8%of 40

inode_newsize_ok86%of 7
may_setattr38%of 8
notify_change48%of 74
setattr_copy58%of 14
setattr_prepare27%of 34
setattr_should_drop_sgid---of 4
setattr_should_drop_suidgid30%of 10
-----------
SUMMARY44%of 147

tomoyo_addprintf---of 1
tomoyo_check_profile---of 14
tomoyo_close_control---of 4
tomoyo_find_yesno---of 4
tomoyo_flush---of 16
tomoyo_init_policy_namespace---of 5
tomoyo_io_printf---of 5
tomoyo_open_control---of 28
tomoyo_parse_policy---of 12
tomoyo_poll_control---of 3
tomoyo_poll_query---of 7
tomoyo_print_entry---of 134
tomoyo_print_name_union---of 3
tomoyo_print_number_union_nospace.part.0---of 9
tomoyo_profile100%of 2
tomoyo_read_control---of 21
tomoyo_read_domain---of 36
tomoyo_read_domain2---of 19
tomoyo_read_exception---of 48
tomoyo_read_group---of 54
tomoyo_read_manager---of 21
tomoyo_read_pid---of 28
tomoyo_read_profile---of 37
tomoyo_read_query---of 20
tomoyo_read_stat---of 9
tomoyo_read_version---of 2
tomoyo_same_manager---of 1
tomoyo_same_task_acl---of 1
tomoyo_set_group---of 7
tomoyo_set_string---of 3
tomoyo_supervisor9%of 57
tomoyo_truncate---of 3
tomoyo_update_stat---of 1
tomoyo_write_answer---of 13
tomoyo_write_control---of 74
tomoyo_write_domain---of 37
tomoyo_write_domain2---of 5
tomoyo_write_exception---of 14
tomoyo_write_manager---of 10
tomoyo_write_pid---of 1
tomoyo_write_profile---of 55
tomoyo_write_stat---of 5
tomoyo_write_task---of 6
-----------
SUMMARY12%of 59

-----------
SUMMARY---of 0

elo_input_configured---of 1
elo_probe---of 18
elo_raw_event---of 13
elo_remove100%of 1
elo_work---of 12
-----------
SUMMARY100%of 1

__bpf_trace_mm_lru_activate---of 1
__bpf_trace_mm_lru_insertion---of 1
__folio_batch_release100%of 3
__folio_put84%of 12
__lru_add_drain_all---of 24
__page_cache_release67%of 39
__traceiter_mm_lru_activate---of 3
__traceiter_mm_lru_insertion---of 3
const_folio_flags.constprop.040%of 10
deactivate_file_folio---of 17
folio_activate53%of 19
folio_activate_fn58%of 56
folio_add_lru43%of 21
folio_add_lru_vma60%of 5
folio_batch_add_and_move100%of 4
folio_batch_move_lru95%of 17
folio_batch_remove_exceptionals100%of 6
folio_deactivate---of 19
folio_flags.constprop.040%of 10
folio_mark_accessed64%of 25
folio_mark_lazyfree---of 22
folio_memcg40%of 20
folio_rotate_reclaimable---of 26
folios_put_refs75%of 20
lru_add_drain47%of 15
lru_add_drain_all---of 1
lru_add_drain_cpu26%of 31
lru_add_drain_cpu_zone---of 15
lru_add_drain_per_cpu---of 15
lru_add_fn51%of 55
lru_cache_disable---of 1
lru_deactivate_file_fn---of 64
lru_deactivate_fn---of 41
lru_lazyfree_fn---of 44
lru_move_tail_fn---of 37
lru_note_cost---of 27
lru_note_cost_refault---of 16
perf_trace_mm_lru_activate---of 5
perf_trace_mm_lru_insertion---of 18
put_pages_list---of 16
release_pages---of 16
trace_event_raw_event_mm_lru_activate---of 6
trace_event_raw_event_mm_lru_insertion---of 19
trace_raw_output_mm_lru_activate---of 5
trace_raw_output_mm_lru_insertion---of 10
-----------
SUMMARY57%of 368

-----------
SUMMARY---of 0

__dev_pm_set_dedicated_wake_irq---of 12
dev_pm_arm_wake_irq---of 6
dev_pm_attach_wake_irq---of 10
dev_pm_clear_wake_irq25%of 4
dev_pm_disable_wake_irq_check17%of 6
dev_pm_disarm_wake_irq---of 5
dev_pm_enable_wake_irq_check13%of 8
dev_pm_enable_wake_irq_complete---of 4
dev_pm_set_dedicated_wake_irq---of 1
dev_pm_set_dedicated_wake_irq_reverse---of 1
dev_pm_set_wake_irq---of 6
handle_threaded_wake_irq---of 5
-----------
SUMMARY17%of 18

rmi_event---of 8
rmi_hid_read_block---of 18
rmi_hid_reset---of 1
rmi_hid_write_block---of 5
rmi_input_configured---of 20
rmi_input_mapping---of 4
rmi_irq_map---of 1
rmi_irq_teardown---of 5
rmi_post_resume---of 8
rmi_probe---of 43
rmi_raw_event---of 17
rmi_remove50%of 4
rmi_report---of 6
rmi_reset_attn_mode---of 4
rmi_reset_work---of 1
rmi_set_mode.constprop.0---of 4
rmi_set_page---of 5
rmi_suspend---of 4
rmi_write_report---of 7
-----------
SUMMARY50%of 4

dvb_demux_do_ioctl---of 51
dvb_demux_ioctl---of 1
dvb_demux_open---of 13
dvb_demux_poll---of 12
dvb_demux_read---of 18
dvb_demux_release---of 11
dvb_dmxdev_add_pid---of 10
dvb_dmxdev_buffer_read.constprop.0---of 26
dvb_dmxdev_buffer_write---of 8
dvb_dmxdev_delete_pids---of 6
dvb_dmxdev_feed_restart.isra.0---of 13
dvb_dmxdev_feed_stop.isra.0---of 4
dvb_dmxdev_filter_start---of 33
dvb_dmxdev_filter_stop---of 11
dvb_dmxdev_filter_timeout---of 1
dvb_dmxdev_init---of 11
dvb_dmxdev_release24%of 13
dvb_dmxdev_section_callback---of 14
dvb_dmxdev_start_feed---of 12
dvb_dmxdev_ts_callback---of 13
dvb_dvr_do_ioctl---of 10
dvb_dvr_ioctl---of 1
dvb_dvr_open---of 26
dvb_dvr_poll---of 13
dvb_dvr_read---of 3
dvb_dvr_release---of 11
dvb_dvr_write---of 7
-----------
SUMMARY24%of 13

ax_probe---of 26
ax_remove100%of 1
axff_play---of 10
-----------
SUMMARY100%of 1

ieee1284_id_show---of 5
usblp_bulk_read---of 8
usblp_bulk_write---of 8
usblp_cache_device_id_string---of 7
usblp_ctrl_msg---of 9
usblp_devnode100%of 3
usblp_disconnect84%of 6
usblp_ioctl---of 54
usblp_open100%of 11
usblp_poll---of 13
usblp_probe---of 39
usblp_read---of 26
usblp_release---of 5
usblp_resume75%of 4
usblp_set_protocol---of 8
usblp_submit_read84%of 6
usblp_suspend---of 1
usblp_write63%of 24
usblp_wwait36%of 28
-----------
SUMMARY64%of 82

ap_init_aperfmperf---of 6
arch_freq_get_on_cpu---of 11
arch_scale_freq_tick9%of 12
arch_set_max_freq_ratio---of 3
disable_freq_invariance_workfn---of 5
freq_invariance_enable---of 3
freq_invariance_set_perf_ratio---of 2
init_counter_refs---of 5
-----------
SUMMARY9%of 12

__devm_add_action---of 3
__devm_alloc_percpu---of 5
__devres_alloc_node---of 4
add_dr---of 4
devm_action_match---of 4
devm_action_release100%of 1
devm_free_pages---of 6
devm_free_percpu---of 5
devm_get_free_pages---of 5
devm_kasprintf---of 1
devm_kfree---of 8
devm_kmalloc---of 9
devm_kmalloc_match---of 1
devm_kmemdup---of 3
devm_krealloc---of 31
devm_kstrdup---of 4
devm_kstrdup_const---of 5
devm_kvasprintf---of 3
devm_pages_match---of 1
devm_pages_release---of 1
devm_percpu_match---of 1
devm_percpu_release---of 1
devm_release_action---of 6
devm_remove_action---of 6
devres_add---of 1
devres_close_group---of 11
devres_destroy---of 6
devres_find---of 9
devres_for_each_res---of 8
devres_free---of 4
devres_get---of 13
devres_log36%of 17
devres_open_group---of 7
devres_release---of 6
devres_release_all84%of 6
devres_release_group79%of 14
devres_remove---of 12
devres_remove_group---of 15
remove_nodes.constprop.095%of 36
-----------
SUMMARY78%of 74

-----------
SUMMARY---of 0

ima_init_template_list---of 2
ima_init_template_list.part.0---of 5
ima_restore_measurement_list---of 44
ima_template_desc_buf---of 6
ima_template_desc_current34%of 6
ima_template_has_modsig---of 6
lookup_template_desc---of 19
template_desc_init_fields---of 26
-----------
SUMMARY34%of 6

-----------
SUMMARY---of 0

__iio_update_buffers---of 120
data_available_show---of 1
direction_show---of 6
enable_show---of 1
enable_store---of 10
iio_buffer_add_demux---of 8
iio_buffer_chrdev_release---of 3
iio_buffer_deactivate---of 4
iio_buffer_demux_free---of 4
iio_buffer_get---of 7
iio_buffer_get.part.0---of 5
iio_buffer_init---of 2
iio_buffer_poll---of 11
iio_buffer_poll_wrapper---of 3
iio_buffer_put---of 2
iio_buffer_put.part.0---of 5
iio_buffer_read---of 19
iio_buffer_read_wrapper---of 3
iio_buffer_ready---of 13
iio_buffer_wakeup_poll50%of 2
iio_buffer_write---of 24
iio_buffer_write_wrapper---of 3
iio_buffers_alloc_sysfs_and_mask---of 70
iio_buffers_free_sysfs_and_mask20%of 5
iio_compute_scan_bytes---of 9
iio_device_attach_buffer---of 7
iio_device_buffer_ioctl---of 15
iio_device_detach_buffers20%of 10
iio_disable_all_buffers50%of 2
iio_disable_buffers12%of 17
iio_pop_from_buffer---of 5
iio_push_to_buffers---of 10
iio_push_to_buffers_with_ts_unaligned---of 7
iio_scan_el_show---of 1
iio_scan_el_store---of 26
iio_scan_el_ts_show---of 1
iio_scan_el_ts_store---of 7
iio_scan_mask_match---of 8
iio_show_fixed_type---of 6
iio_show_scan_index---of 1
iio_update_buffers---of 14
iio_validate_scan_mask_onehot---of 1
length_show---of 1
length_store---of 11
watermark_show---of 1
watermark_store---of 9
-----------
SUMMARY20%of 36

__debugfs_create_file---of 18
_debugfs_apply_options.isra.0---of 8
debug_fill_super---of 7
debug_mount---of 3
debugfs_automount---of 1
debugfs_create_automount---of 16
debugfs_create_dir---of 14
debugfs_create_file---of 2
debugfs_create_file_size---of 3
debugfs_create_file_unsafe---of 2
debugfs_create_symlink---of 9
debugfs_free_inode---of 4
debugfs_initialized---of 1
debugfs_lookup---of 9
debugfs_lookup_and_remove---of 4
debugfs_parse_options---of 15
debugfs_parse_options.cold---of 1
debugfs_release_dentry43%of 7
debugfs_remount---of 4
debugfs_remove100%of 3
debugfs_rename---of 35
debugfs_setattr---of 4
debugfs_show_options---of 7
remove_one20%of 15
start_creating.part.0---of 17
-----------
SUMMARY36%of 25

-----------
SUMMARY---of 0

__check_sticky---of 6
__filename_parentat---of 19
__ia32_sys_link---of 1
__ia32_sys_linkat---of 1
__ia32_sys_mkdir---of 5
__ia32_sys_mkdirat---of 1
__ia32_sys_mknod---of 1
__ia32_sys_mknodat---of 1
__ia32_sys_rename---of 1
__ia32_sys_renameat---of 1
__ia32_sys_renameat2---of 1
__ia32_sys_rmdir---of 5
__ia32_sys_symlink---of 1
__ia32_sys_symlinkat---of 1
__ia32_sys_unlink---of 5
__ia32_sys_unlinkat---of 6
__kern_path_locked---of 7
__legitimize_path50%of 8
__lookup_slow47%of 13
__traverse_mounts---of 31
__x64_sys_link---of 1
__x64_sys_linkat---of 1
__x64_sys_mkdir---of 5
__x64_sys_mkdirat---of 5
__x64_sys_mknod---of 5
__x64_sys_mknodat---of 1
__x64_sys_rename---of 1
__x64_sys_renameat---of 1
__x64_sys_renameat2---of 1
__x64_sys_rmdir---of 5
__x64_sys_symlink---of 1
__x64_sys_symlinkat---of 1
__x64_sys_unlink---of 5
__x64_sys_unlinkat---of 6
choose_mountpoint_rcu---of 14
complete_walk67%of 12
do_file_open_root---of 17
do_filp_open50%of 12
do_linkat---of 24
do_mkdirat---of 11
do_mknodat---of 16
do_renameat2---of 62
do_rmdir---of 15
do_symlinkat---of 9
do_unlinkat---of 29
done_path_create---of 1
filename_create---of 16
filename_lookup---of 19
follow_down---of 6
follow_down_one---of 6
follow_up---of 7
fsnotify_move---of 17
full_name_hash100%of 7
generic_permission29%of 32
getname50%of 6
getname_flags---of 6
getname_flags.part.032%of 19
getname_kernel---of 12
getname_uflags---of 6
handle_dots22%of 82
hashlen_string---of 3
inode_permission86%of 21
kern_path---of 1
kern_path_create---of 1
kern_path_locked---of 1
kernel_tmpfile_open---of 4
legitimize_links34%of 15
link_path_walk.part.0.constprop.073%of 36
lock_rename---of 4
lock_rename_child---of 7
lock_two_directories---of 15
lookup_dcache---of 7
lookup_fast45%of 18
lookup_one---of 7
lookup_one_common---of 12
lookup_one_len---of 7
lookup_one_len_unlocked---of 5
lookup_one_positive_unlocked---of 5
lookup_one_qstr_excl---of 6
lookup_one_unlocked---of 5
lookup_open.isra.046%of 71
lookup_positive_unlocked---of 5
may_delete---of 24
may_linkat---of 10
may_open78%of 18
may_open_dev---of 3
nd_alloc_stack---of 5
nd_jump_link---of 8
nd_jump_root60%of 15
page_get_link---of 35
page_put_link---of 11
page_readlink---of 3
page_symlink---of 9
path_get100%of 2
path_init17%of 80
path_lookupat62%of 26
path_openat65%of 155
path_parentat---of 7
path_pts---of 7
path_put100%of 1
putname72%of 7
readlink_copy---of 9
set_root50%of 18
step_into42%of 114
terminate_walk38%of 16
try_lookup_one_len---of 6
try_to_unlazy46%of 22
try_to_unlazy_next---of 32
unlock_rename---of 2
user_path_at_empty---of 1
user_path_create---of 1
user_path_locked_at---of 1
vfs_create---of 22
vfs_get_link---of 7
vfs_link---of 45
vfs_mkdir---of 24
vfs_mknod---of 31
vfs_mkobj---of 18
vfs_path_lookup---of 1
vfs_path_parent_lookup---of 1
vfs_readlink---of 11
vfs_rename---of 142
vfs_rmdir---of 21
vfs_symlink---of 19
vfs_tmpfile---of 12
vfs_unlink---of 34
walk_component50%of 20
-----------
SUMMARY48%of 846

ath10k_radar_confirmation_work---of 13
ath10k_tpc_config_disp_tables---of 36
ath10k_wmi_10_1_op_gen_init---of 3
ath10k_wmi_10_1_op_gen_peer_assoc---of 7
ath10k_wmi_10_1_op_rx---of 35
ath10k_wmi_10_2_4_op_gen_bb_timing---of 4
ath10k_wmi_10_2_4_op_gen_pdev_get_tpc_config---of 4
ath10k_wmi_10_2_4_op_get_vdev_subtype---of 3
ath10k_wmi_10_2_4_op_pull_fw_stats---of 22
ath10k_wmi_10_2_4_op_pull_swba_ev---of 10
ath10k_wmi_10_2_op_gen_init---of 12
ath10k_wmi_10_2_op_gen_pdev_bss_chan_info---of 4
ath10k_wmi_10_2_op_gen_pdev_get_temperature---of 2
ath10k_wmi_10_2_op_gen_peer_assoc---of 7
ath10k_wmi_10_2_op_pull_fw_stats---of 17
ath10k_wmi_10_2_op_rx---of 44
ath10k_wmi_10_4_ext_resource_config---of 3
ath10k_wmi_10_4_gen_per_peer_per_tid_cfg---of 4
ath10k_wmi_10_4_gen_radar_found---of 4
ath10k_wmi_10_4_gen_tdls_peer_update---of 14
ath10k_wmi_10_4_gen_update_fw_tdls_state---of 5
ath10k_wmi_10_4_op_fw_stats_fill---of 35
ath10k_wmi_10_4_op_gen_dbglog_cfg---of 6
ath10k_wmi_10_4_op_gen_init---of 3
ath10k_wmi_10_4_op_gen_pdev_get_tpc_table_cmdid---of 4
ath10k_wmi_10_4_op_gen_peer_assoc---of 7
ath10k_wmi_10_4_op_get_vdev_subtype---of 3
ath10k_wmi_10_4_op_pull_ch_info_ev---of 3
ath10k_wmi_10_4_op_pull_dfs_status_ev---of 3
ath10k_wmi_10_4_op_pull_fw_stats---of 42
ath10k_wmi_10_4_op_pull_mgmt_rx_ev---of 6
ath10k_wmi_10_4_op_pull_phyerr_ev---of 8
ath10k_wmi_10_4_op_pull_phyerr_ev_hdr---of 3
ath10k_wmi_10_4_op_pull_swba_ev---of 13
ath10k_wmi_10_4_op_rx---of 56
ath10k_wmi_10_4_txbf_conf_scheme---of 1
ath10k_wmi_10x_op_fw_stats_fill---of 18
ath10k_wmi_10x_op_gen_pdev_set_rd---of 3
ath10k_wmi_10x_op_gen_start_scan---of 7
ath10k_wmi_10x_op_pull_fw_stats---of 14
ath10k_wmi_10x_op_pull_svc_rdy_ev---of 5
ath10k_wmi_alloc_skb---of 5
ath10k_wmi_attach---of 12
ath10k_wmi_barrier---of 9
ath10k_wmi_cmd_send---of 18
ath10k_wmi_cmd_send_nowait---of 5
ath10k_wmi_connect---of 4
ath10k_wmi_detach---of 3
ath10k_wmi_event_chan_info---of 19
ath10k_wmi_event_debug_mesg---of 1
ath10k_wmi_event_debug_print---of 16
ath10k_wmi_event_echo---of 6
ath10k_wmi_event_host_swba---of 70
ath10k_wmi_event_mgmt_rx---of 52
ath10k_wmi_event_mgmt_tx_bundle_compl---of 9
ath10k_wmi_event_mgmt_tx_compl---of 7
ath10k_wmi_event_pdev_bss_chan_info.isra.0---of 6
ath10k_wmi_event_pdev_tpc_config---of 7
ath10k_wmi_event_peer_sta_kickout---of 18
ath10k_wmi_event_peer_sta_ps_state_chg---of 17
ath10k_wmi_event_phyerr---of 13
ath10k_wmi_event_ready---of 7
ath10k_wmi_event_roam---of 9
ath10k_wmi_event_scan---of 40
ath10k_wmi_event_service_available---of 7
ath10k_wmi_event_service_ready---of 1
ath10k_wmi_event_service_ready_work---of 63
ath10k_wmi_event_service_ready_work.cold---of 3
ath10k_wmi_event_spectral_scan---of 11
ath10k_wmi_event_temperature.isra.0---of 3
ath10k_wmi_event_tpc_final_table---of 7
ath10k_wmi_event_vdev_start_resp---of 7
ath10k_wmi_event_vdev_stopped---of 1
ath10k_wmi_event_wow_wakeup_host---of 4
ath10k_wmi_free_host_mem67%of 3
ath10k_wmi_fw_pdev_base_stats_fill.constprop.0---of 1
ath10k_wmi_fw_pdev_extra_stats_fill---of 1
ath10k_wmi_fw_pdev_rx_stats_fill---of 1
ath10k_wmi_fw_pdev_tx_stats_fill---of 1
ath10k_wmi_fw_peer_stats_fill---of 3
ath10k_wmi_fw_vdev_stats_fill---of 7
ath10k_wmi_htc_tx_complete---of 1
ath10k_wmi_main_op_fw_stats_fill---of 18
ath10k_wmi_main_op_pull_fw_stats---of 14
ath10k_wmi_main_op_pull_svc_rdy_ev---of 5
ath10k_wmi_mgmt_tx_clean_up_pending---of 1
ath10k_wmi_op_ep_tx_credits---of 1
ath10k_wmi_op_gen_addba_clear_resp---of 5
ath10k_wmi_op_gen_addba_send---of 4
ath10k_wmi_op_gen_addba_set_resp---of 4
ath10k_wmi_op_gen_beacon_dma---of 5
ath10k_wmi_op_gen_dbglog_cfg---of 6
ath10k_wmi_op_gen_delba_send---of 4
ath10k_wmi_op_gen_echo---of 4
ath10k_wmi_op_gen_force_fw_hang---of 4
ath10k_wmi_op_gen_init---of 3
ath10k_wmi_op_gen_mgmt_tx---of 18
ath10k_wmi_op_gen_pdev_enable_adaptive_cca---of 3
ath10k_wmi_op_gen_pdev_resume---of 2
ath10k_wmi_op_gen_pdev_set_base_macaddr---of 4
ath10k_wmi_op_gen_pdev_set_param---of 7
ath10k_wmi_op_gen_pdev_set_quiet_mode---of 3
ath10k_wmi_op_gen_pdev_set_rd---of 3
ath10k_wmi_op_gen_pdev_set_wmm---of 4
ath10k_wmi_op_gen_pdev_suspend---of 4
ath10k_wmi_op_gen_peer_assoc---of 7
ath10k_wmi_op_gen_peer_create---of 3
ath10k_wmi_op_gen_peer_delete---of 4
ath10k_wmi_op_gen_peer_flush---of 3
ath10k_wmi_op_gen_peer_set_param---of 3
ath10k_wmi_op_gen_pktlog_disable---of 2
ath10k_wmi_op_gen_pktlog_enable---of 4
ath10k_wmi_op_gen_request_stats---of 4
ath10k_wmi_op_gen_scan_chan_list---of 4
ath10k_wmi_op_gen_set_ap_ps---of 4
ath10k_wmi_op_gen_set_psmode---of 4
ath10k_wmi_op_gen_set_sta_ps---of 3
ath10k_wmi_op_gen_start_scan---of 7
ath10k_wmi_op_gen_stop_scan---of 6
ath10k_wmi_op_gen_vdev_create---of 3
ath10k_wmi_op_gen_vdev_delete---of 4
ath10k_wmi_op_gen_vdev_down---of 4
ath10k_wmi_op_gen_vdev_install_key---of 9
ath10k_wmi_op_gen_vdev_set_param---of 4
ath10k_wmi_op_gen_vdev_spectral_conf---of 4
ath10k_wmi_op_gen_vdev_spectral_enable---of 3
ath10k_wmi_op_gen_vdev_start---of 15
ath10k_wmi_op_gen_vdev_stop---of 4
ath10k_wmi_op_gen_vdev_up---of 3
ath10k_wmi_op_get_vdev_subtype---of 3
ath10k_wmi_op_pull_ch_info_ev---of 3
ath10k_wmi_op_pull_echo_ev---of 1
ath10k_wmi_op_pull_mgmt_rx_ev---of 9
ath10k_wmi_op_pull_peer_kick_ev---of 3
ath10k_wmi_op_pull_phyerr_ev---of 8
ath10k_wmi_op_pull_phyerr_ev_hdr---of 3
ath10k_wmi_op_pull_rdy_ev---of 3
ath10k_wmi_op_pull_roam_ev---of 3
ath10k_wmi_op_pull_scan_ev---of 3
ath10k_wmi_op_pull_swba_ev---of 10
ath10k_wmi_op_pull_vdev_start_ev---of 3
ath10k_wmi_op_rx---of 36
ath10k_wmi_peer_assoc_fill.constprop.0---of 7
ath10k_wmi_process_rx---of 3
ath10k_wmi_pull_pdev_stats_base---of 1
ath10k_wmi_pull_pdev_stats_extra---of 1
ath10k_wmi_pull_pdev_stats_rx---of 1
ath10k_wmi_pull_pdev_stats_tx---of 1
ath10k_wmi_pull_peer_stats---of 1
ath10k_wmi_put_host_mem_chunks---of 5
ath10k_wmi_put_start_scan_common---of 1
ath10k_wmi_put_start_scan_tlvs---of 20
ath10k_wmi_put_wmi_channel---of 19
ath10k_wmi_set_wmm_param---of 1
ath10k_wmi_start_scan_init---of 1
ath10k_wmi_start_scan_tlvs_len.isra.0---of 9
ath10k_wmi_start_scan_verify---of 5
ath10k_wmi_tpc_config_get_rate_code---of 25
ath10k_wmi_tpc_stats_final_disp_tables---of 51
ath10k_wmi_tx_beacons_iter---of 12
ath10k_wmi_wait_for_service_ready---of 2
ath10k_wmi_wait_for_unified_ready---of 2
freq_to_idx---of 8
wmi_10_4_svc_map---of 349
wmi_10x_svc_map---of 160
wmi_main_svc_map---of 155
wmi_process_mgmt_tx_comp.isra.0---of 7
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

__should_fail_alloc_page70%of 10
__should_fail_alloc_page.cold---of 1
-----------
SUMMARY70%of 10

__evdev_queue_syn_dropped50%of 2
bits_to_user86%of 7
evdev_cleanup100%of 4
evdev_connect---of 15
evdev_disconnect100%of 1
evdev_do_ioctl81%of 106
evdev_event---of 1
evdev_events56%of 20
evdev_fasync100%of 1
evdev_free100%of 3
evdev_handle_get_keycode100%of 4
evdev_handle_get_keycode_v2100%of 4
evdev_handle_get_val100%of 15
evdev_handle_set_keycode100%of 4
evdev_handle_set_keycode_v2100%of 4
evdev_ioctl86%of 7
evdev_ioctl_compat---of 7
evdev_open94%of 16
evdev_open.cold---of 1
evdev_pass_values95%of 19
evdev_poll---of 9
evdev_read85%of 40
evdev_release100%of 11
evdev_ungrab63%of 8
evdev_write81%of 21
str_to_user100%of 3
-----------
SUMMARY84%of 300

ipv6_misc_proc_exit---of 1
ipv6_proc_exit_net---of 1
ipv6_proc_init_net---of 6
snmp6_dev_seq_show---of 1
snmp6_register_dev---of 6
snmp6_seq_show---of 1
snmp6_seq_show_icmpv6msg---of 10
snmp6_seq_show_item---of 14
snmp6_seq_show_item64.constprop.0---of 9
snmp6_unregister_dev100%of 4
sockstat6_seq_show---of 1
-----------
SUMMARY100%of 4

-----------
SUMMARY---of 0

__ia32_compat_sys_x32_rt_sigreturn---of 8
__ia32_sys_rt_sigreturn---of 8
copy_siginfo_to_user32---of 5
restore_sigcontext---of 7
sigaction_compat_abi---of 5
x32_copy_siginfo_to_user---of 4
x32_setup_rt_frame---of 46
x64_setup_rt_frame92%of 47
-----------
SUMMARY92%of 47

try_set_ext_ctrls_request---of 56
v4l2_ctrl_handler_free_request50%of 4
v4l2_ctrl_handler_init_request---of 1
v4l2_ctrl_request_bind---of 18
v4l2_ctrl_request_complete---of 24
v4l2_ctrl_request_hdl_ctrl_find---of 5
v4l2_ctrl_request_hdl_find---of 6
v4l2_ctrl_request_queue---of 3
v4l2_ctrl_request_release---of 1
v4l2_ctrl_request_setup---of 36
v4l2_ctrl_request_unbind---of 7
v4l2_ctrls_find_req_obj---of 19
v4l2_g_ext_ctrls_request---of 18
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

__kfifo_alloc---of 8
__kfifo_alloc.cold---of 1
__kfifo_dma_in_finish_r---of 4
__kfifo_dma_in_finish_r.cold---of 1
__kfifo_dma_in_prepare---of 1
__kfifo_dma_in_prepare_r---of 6
__kfifo_dma_in_prepare_r.cold---of 1
__kfifo_dma_out_finish_r---of 3
__kfifo_dma_out_prepare---of 1
__kfifo_dma_out_prepare_r---of 6
__kfifo_dma_out_prepare_r.cold---of 1
__kfifo_free100%of 1
__kfifo_from_user---of 3
__kfifo_from_user_r---of 9
__kfifo_from_user_r.cold---of 1
__kfifo_in---of 1
__kfifo_in_r---of 5
__kfifo_init---of 8
__kfifo_init.cold---of 1
__kfifo_len_r---of 3
__kfifo_max_r---of 2
__kfifo_max_r.cold---of 1
__kfifo_out---of 1
__kfifo_out_peek---of 1
__kfifo_out_peek_r---of 5
__kfifo_out_r---of 5
__kfifo_skip_r---of 3
__kfifo_to_user---of 3
__kfifo_to_user_r---of 8
kfifo_copy_from_user---of 11
kfifo_copy_in---of 3
kfifo_copy_out---of 3
kfifo_copy_to_user---of 11
setup_sgl---of 9
setup_sgl_buf.part.0---of 13
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

devm_rc_alloc_release---of 2
devm_rc_allocate_device---of 5
devm_rc_register_device---of 6
devm_rc_release---of 1
ir_close---of 2
ir_do_keydown---of 21
ir_do_keyup.part.0---of 5
ir_establish_scancode83%of 17
ir_getkeycode54%of 15
ir_open---of 1
ir_raw_load_modules---of 10
ir_resize_table.constprop.057%of 16
ir_setkeycode60%of 10
ir_timer_keyup---of 4
ir_timer_repeat---of 4
ir_update_mapping30%of 17
rc_allocate_device---of 7
rc_close---of 2
rc_close.part.0---of 5
rc_dev_release100%of 1
rc_dev_uevent25%of 8
rc_devnode---of 3
rc_free_device---of 2
rc_g_keycode_from_table---of 7
rc_keydown---of 5
rc_keydown_notimeout---of 1
rc_keyup---of 3
rc_map_cmp67%of 3
rc_map_get---of 15
rc_map_register---of 3
rc_map_unregister---of 3
rc_open---of 8
rc_register_device---of 68
rc_register_device.cold---of 2
rc_repeat---of 7
rc_unregister_device80%of 10
rc_validate_scancode---of 9
show_filter---of 5
show_protocols---of 16
show_wakeup_protocols---of 11
store_filter---of 26
store_protocols---of 47
store_wakeup_protocols---of 20
-----------
SUMMARY57%of 97

-----------
SUMMARY---of 0

__bpf_trace_applptr---of 1
__bpf_trace_hw_ptr_error---of 1
__bpf_trace_hwptr---of 1
__bpf_trace_xrun---of 1
__snd_pcm_lib_xfer---of 102
__snd_pcm_xrun---of 27
__traceiter_applptr---of 3
__traceiter_hw_ptr_error---of 3
__traceiter_hwptr---of 4
__traceiter_xrun---of 3
_snd_pcm_hw_param_setempty---of 5
_snd_pcm_hw_params_any---of 4
default_read_copy---of 4
default_write_copy---of 4
do_transfer---of 5
fill_silence---of 5
interleaved_copy---of 1
noninterleaved_copy---of 8
pcm_chmap_ctl_get---of 18
pcm_chmap_ctl_get.cold---of 1
pcm_chmap_ctl_info---of 1
pcm_chmap_ctl_private_free100%of 1
pcm_chmap_ctl_tlv---of 19
pcm_chmap_ctl_tlv.cold---of 1
pcm_lib_apply_appl_ptr---of 26
perf_trace_applptr---of 12
perf_trace_hw_ptr_error---of 6
perf_trace_hwptr---of 5
perf_trace_xrun---of 5
snd_interval_div---of 15
snd_interval_list---of 16
snd_interval_list.cold---of 1
snd_interval_mul---of 16
snd_interval_muldivk---of 23
snd_interval_mulkdiv---of 19
snd_interval_ranges---of 21
snd_interval_ranges.cold---of 1
snd_interval_ratnum---of 34
snd_interval_refine---of 30
snd_pcm_add_chmap_ctls---of 10
snd_pcm_debug_name---of 2
snd_pcm_hw_constraint_integer---of 5
snd_pcm_hw_constraint_list---of 1
snd_pcm_hw_constraint_mask---of 2
snd_pcm_hw_constraint_mask64---of 3
snd_pcm_hw_constraint_minmax---of 1
snd_pcm_hw_constraint_msbits---of 1
snd_pcm_hw_constraint_pow2---of 1
snd_pcm_hw_constraint_ranges---of 1
snd_pcm_hw_constraint_ratdens---of 1
snd_pcm_hw_constraint_ratnums---of 1
snd_pcm_hw_constraint_step---of 1
snd_pcm_hw_param_first---of 24
snd_pcm_hw_param_last---of 25
snd_pcm_hw_param_value---of 25
snd_pcm_hw_params_bits---of 9
snd_pcm_hw_rule_add---of 14
snd_pcm_hw_rule_list---of 1
snd_pcm_hw_rule_msbits---of 16
snd_pcm_hw_rule_noresample---of 1
snd_pcm_hw_rule_noresample_func---of 1
snd_pcm_hw_rule_pow2---of 1
snd_pcm_hw_rule_ranges---of 1
snd_pcm_hw_rule_ratdens---of 40
snd_pcm_hw_rule_ratnums---of 5
snd_pcm_hw_rule_step---of 11
snd_pcm_lib_ioctl---of 23
snd_pcm_period_elapsed---of 3
snd_pcm_period_elapsed_under_stream_lock---of 10
snd_pcm_playback_silence---of 47
snd_pcm_set_ops---of 2
snd_pcm_set_sync---of 1
snd_pcm_update_hw_ptr---of 1
snd_pcm_update_hw_ptr0---of 91
snd_pcm_update_state---of 20
trace_event_raw_event_applptr---of 13
trace_event_raw_event_hw_ptr_error---of 7
trace_event_raw_event_hwptr---of 6
trace_event_raw_event_xrun---of 6
trace_hw_ptr_error---of 15
trace_raw_output_applptr---of 5
trace_raw_output_hw_ptr_error---of 5
trace_raw_output_hwptr---of 6
trace_raw_output_xrun---of 5
update_audio_tstamp---of 17
-----------
SUMMARY100%of 1

___perf_sw_event---of 31
__do_sys_perf_event_open---of 198
__ia32_sys_perf_event_open---of 1
__output_copy.isra.0---of 3
__perf_addr_filters_adjust---of 18
__perf_cgroup_move---of 4
__perf_event__output_id_sample---of 12
__perf_event_account_interrupt---of 9
__perf_event_disable---of 21
__perf_event_enable---of 33
__perf_event_exit_context---of 3
__perf_event_header__init_id---of 18
__perf_event_init_context---of 1
__perf_event_output_stop---of 11
__perf_event_overflow---of 35
__perf_event_period---of 11
__perf_event_read---of 23
__perf_event_read_cpu---of 4
__perf_event_read_size---of 8
__perf_event_read_value---of 3
__perf_event_stop---of 5
__perf_event_task_sched_in---of 32
__perf_event_task_sched_out---of 99
__perf_install_in_context---of 35
__perf_pmu_install_event---of 8
__perf_pmu_output_stop---of 13
__perf_pmu_remove.constprop.0---of 14
__perf_read_group_add---of 30
__perf_remove_from_context---of 42
__perf_sw_event---of 4
__pmu_ctx_sched_out---of 22
__update_context_time---of 7
__x64_sys_perf_event_open---of 1
_free_event---of 80
_perf_event_disable---of 3
_perf_event_enable---of 7
_perf_event_period---of 9
_perf_event_reset---of 1
_perf_ioctl---of 152
alloc_perf_context---of 9
bpf_overflow_handler---of 21
calc_timer_values---of 15
cpu_clock_event_add---of 4
cpu_clock_event_del---of 3
cpu_clock_event_init---of 7
cpu_clock_event_read---of 1
cpu_clock_event_start---of 2
cpu_clock_event_stop---of 3
cpu_clock_event_update---of 4
ctx_event_to_rotate---of 37
ctx_groups_sched_in---of 6
ctx_resched---of 20
ctx_sched_in---of 19
ctx_sched_out---of 28
event_function---of 22
event_function_call---of 14
event_function_local.constprop.0---of 25
event_sched_in---of 28
event_sched_out---of 39
exclusive_event_destroy---of 4
exclusive_event_installable---of 12
find_get_context---of 27
find_get_pmu_context---of 32
free_ctx---of 1
free_epc_rcu---of 1
free_event---of 3
free_event_rcu---of 3
free_filters_list---of 4
get_event_type---of 8
get_pmu_ctx---of 6
group_sched_out.part.0---of 10
inherit_event.constprop.0---of 22
inherit_task_group.isra.0---of 30
ktime_get_boottime_ns---of 1
ktime_get_clocktai_ns---of 1
ktime_get_real_ns---of 1
list_add_event---of 36
list_del_event---of 24
merge_sched_in---of 65
nr_addr_filters_show---of 1
perf_addr_filters_splice---of 9
perf_adjust_freq_unthr_context8%of 25
perf_adjust_period---of 33
perf_bp_event---of 6
perf_callchain---of 5
perf_cgroup_attach---of 3
perf_cgroup_css_alloc---of 5
perf_cgroup_css_free---of 1
perf_cgroup_css_online---of 7
perf_cgroup_switch---of 13
perf_compat_ioctl---of 6
perf_copy_attr---of 47
perf_cpu_task_ctx---of 5
perf_cpu_time_max_percent_handler---of 10
perf_ctx_disable---of 6
perf_ctx_enable---of 6
perf_ctx_sched_task_cb---of 5
perf_duration_warn---of 2
perf_event__header_size---of 21
perf_event__id_header_size---of 11
perf_event__output_id_sample---of 2
perf_event_account_interrupt---of 1
perf_event_addr_filters_apply---of 31
perf_event_addr_filters_exec---of 11
perf_event_addr_filters_sync---of 7
perf_event_alloc---of 236
perf_event_attrs---of 4
perf_event_aux_event---of 7
perf_event_bpf_event---of 8
perf_event_bpf_output---of 8
perf_event_cgroup_output---of 9
perf_event_comm---of 10
perf_event_comm_output---of 15
perf_event_create_kernel_counter---of 20
perf_event_ctx_lock_nested---of 30
perf_event_delayed_put---of 2
perf_event_disable---of 4
perf_event_disable_inatomic---of 1
perf_event_disable_local---of 1
perf_event_enable---of 1
perf_event_exec---of 41
perf_event_exit_cpu---of 1
perf_event_exit_cpu_context---of 3
perf_event_exit_event---of 15
perf_event_exit_task---of 19
perf_event_for_each_child---of 5
perf_event_fork---of 1
perf_event_free_bpf_prog---of 5
perf_event_free_task---of 22
perf_event_get---of 5
perf_event_groups_delete---of 4
perf_event_groups_first---of 19
perf_event_groups_insert---of 22
perf_event_groups_next---of 11
perf_event_header__init_id---of 2
perf_event_idx_default---of 1
perf_event_init_cpu---of 13
perf_event_init_task---of 27
perf_event_itrace_started---of 1
perf_event_ksymbol---of 18
perf_event_ksymbol_output---of 8
perf_event_max_sample_rate_handler---of 9
perf_event_mmap---of 55
perf_event_mmap_output---of 31
perf_event_mux_interval_ms_show---of 1
perf_event_mux_interval_ms_store---of 11
perf_event_namespaces---of 3
perf_event_namespaces_output---of 15
perf_event_nop_int---of 1
perf_event_output---of 13
perf_event_output_backward---of 13
perf_event_output_forward---of 13
perf_event_overflow---of 1
perf_event_pause---of 8
perf_event_period---of 1
perf_event_read---of 17
perf_event_read_event---of 13
perf_event_read_local---of 23
perf_event_read_value---of 1
perf_event_refresh---of 4
perf_event_release_kernel---of 31
perf_event_set_bpf_prog---of 23
perf_event_set_output---of 21
perf_event_set_state.part.0---of 11
perf_event_stop.isra.0---of 4
perf_event_switch_output---of 20
perf_event_sysfs_show---of 3
perf_event_task---of 6
perf_event_task_disable---of 13
perf_event_task_enable---of 7
perf_event_task_output---of 28
perf_event_task_tick50%of 22
perf_event_text_poke---of 3
perf_event_text_poke_output---of 10
perf_event_update_sibling_time---of 16
perf_event_update_time---of 9
perf_event_update_userpage---of 24
perf_event_wakeup---of 22
perf_exclude_event---of 8
perf_fasync---of 1
perf_fill_ns_link_info---of 3
perf_get_aux_event---of 15
perf_get_event---of 4
perf_get_page_size.part.0---of 25
perf_group_attach---of 17
perf_group_detach---of 52
perf_install_in_context---of 27
perf_ioctl---of 3
perf_iterate_ctx---of 13
perf_iterate_sb---of 33
perf_lock_task_context---of 67
perf_log_itrace_start---of 16
perf_log_lost_samples---of 7
perf_log_throttle---of 11
perf_mmap---of 93
perf_mmap_close---of 58
perf_mmap_fault---of 34
perf_mmap_open---of 4
perf_mux_hrtimer_handler---of 43
perf_mux_hrtimer_restart---of 3
perf_mux_hrtimer_restart_ipi---of 1
perf_output_read---of 55
perf_output_sample---of 104
perf_output_sample_regs---of 7
perf_pending_irq---of 11
perf_pending_task---of 8
perf_pmu_cancel_txn---of 3
perf_pmu_commit_txn---of 5
perf_pmu_disable---of 2
perf_pmu_enable---of 2
perf_pmu_migrate_context---of 14
perf_pmu_nop_int---of 1
perf_pmu_register---of 42
perf_pmu_resched---of 5
perf_pmu_sched_task.part.0---of 14
perf_pmu_start_txn---of 3
perf_pmu_unregister---of 9
perf_poll---of 11
perf_prepare_header---of 2
perf_prepare_sample---of 98
perf_read---of 35
perf_reboot---of 6
perf_release---of 1
perf_remove_from_context---of 6
perf_remove_from_owner---of 31
perf_report_aux_output_id---of 8
perf_sample_event_took---of 6
perf_sched_cb_dec---of 4
perf_sched_cb_inc---of 4
perf_sched_delayed---of 3
perf_sigtrap---of 4
perf_swevent_add---of 15
perf_swevent_del---of 3
perf_swevent_event---of 15
perf_swevent_get_recursion_context---of 3
perf_swevent_hrtimer---of 8
perf_swevent_init---of 31
perf_swevent_put_recursion_context---of 1
perf_swevent_set_period---of 4
perf_swevent_start---of 1
perf_swevent_start_hrtimer.part.0---of 5
perf_swevent_stop---of 1
perf_tp_event---of 65
perf_tp_event_init---of 5
perf_tp_event_match---of 8
perf_trace_run_bpf_submit---of 5
perf_try_init_event---of 20
perf_uprobe_event_init---of 8
pmu_dev_alloc---of 8
pmu_dev_is_visible---of 5
pmu_dev_release---of 1
put_ctx---of 13
put_pmu_ctx---of 12
rb_free_rcu---of 1
ref_ctr_offset_show---of 1
remote_function---of 4
retprobe_show---of 1
ring_buffer_attach---of 19
ring_buffer_get---of 28
ring_buffer_put---of 7
sw_perf_event_destroy---of 7
swevent_hlist_put_cpu---of 9
task_clock_event_add---of 4
task_clock_event_del---of 3
task_clock_event_init---of 7
task_clock_event_read---of 1
task_clock_event_start---of 2
task_clock_event_stop---of 3
task_clock_event_update---of 4
task_ctx_sched_out---of 4
task_function_call---of 6
tp_perf_event_destroy---of 1
type_show---of 1
unclone_ctx---of 6
visit_groups_merge.constprop.0.isra.0---of 76
-----------
SUMMARY28%of 47

-----------
SUMMARY---of 0

psmouse_smbus_cleanup---of 7
psmouse_smbus_create_companion---of 5
psmouse_smbus_disconnect---of 11
psmouse_smbus_init---of 21
psmouse_smbus_module_exit---of 1
psmouse_smbus_notifier_call20%of 25
psmouse_smbus_process_byte---of 1
psmouse_smbus_reconnect---of 4
psmouse_smbus_remove_i2c_device---of 3
-----------
SUMMARY20%of 25

-----------
SUMMARY---of 0

dpm_save_errno---of 3
dpm_save_failed_dev---of 7
dpm_save_failed_step---of 1
fail_show---of 1
failed_freeze_show---of 1
failed_prepare_show---of 1
failed_resume_early_show---of 1
failed_resume_noirq_show---of 1
failed_resume_show---of 1
failed_suspend_late_show---of 1
failed_suspend_noirq_show---of 1
failed_suspend_show---of 1
ksys_sync_helper---of 1
last_failed_dev_show---of 1
last_failed_errno_show---of 1
last_failed_step_show---of 1
last_hw_sleep_show---of 1
lock_system_sleep---of 1
max_hw_sleep_show---of 1
mem_sleep_show---of 8
mem_sleep_store---of 12
pm_async_show---of 1
pm_async_store---of 4
pm_debug_messages_should_print---of 3
pm_debug_messages_show---of 1
pm_debug_messages_store---of 4
pm_freeze_timeout_show---of 1
pm_freeze_timeout_store---of 3
pm_notifier_call_chain---of 1
pm_notifier_call_chain_robust---of 3
pm_print_times_show---of 1
pm_print_times_store---of 4
pm_report_hw_sleep_time---of 1
pm_report_max_hw_sleep---of 1
pm_restore_gfp_mask---of 4
pm_restrict_gfp_mask---of 5
pm_test_show---of 8
pm_test_store---of 16
pm_trace_dev_match_show---of 1
pm_trace_show---of 1
pm_trace_store---of 5
pm_wakeup_irq_show---of 3
register_pm_notifier---of 1
state_show---of 8
state_store---of 18
success_show---of 1
suspend_attr_is_visible---of 6
suspend_stats_open---of 1
suspend_stats_show---of 3
sync_on_suspend_show---of 1
sync_on_suspend_store---of 4
total_hw_sleep_show---of 1
unlock_system_sleep---of 3
unregister_pm_notifier100%of 1
wakeup_count_show---of 3
wakeup_count_store---of 5
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__bpf_trace_emulate_vsyscall---of 1
__traceiter_emulate_vsyscall---of 3
emulate_vsyscall9%of 57
gate_vma_name---of 1
get_gate_vma---of 5
in_gate_area---of 7
in_gate_area_no_mm100%of 3
perf_trace_emulate_vsyscall---of 5
trace_event_raw_event_emulate_vsyscall---of 6
trace_raw_output_emulate_vsyscall---of 5
warn_bad_vsyscall34%of 3
-----------
SUMMARY15%of 63

-----------
SUMMARY---of 0

__snd_rawmidi_transmit_ack---of 10
__snd_rawmidi_transmit_peek.isra.0---of 11
close_substream.part.0---of 19
open_substream---of 19
rawmidi_open_priv---of 33
rawmidi_release_priv---of 10
reset_runtime_ptrs---of 7
resize_runtime_buffer---of 16
snd_rawmidi_alloc_substreams---of 6
snd_rawmidi_control_ioctl---of 18
snd_rawmidi_dev_disconnect91%of 11
snd_rawmidi_dev_free67%of 3
snd_rawmidi_dev_register---of 28
snd_rawmidi_dev_seq_free100%of 1
snd_rawmidi_drain_input---of 3
snd_rawmidi_drain_output---of 33
snd_rawmidi_drop_output---of 3
snd_rawmidi_free---of 3
snd_rawmidi_free.part.050%of 14
snd_rawmidi_info---of 15
snd_rawmidi_info_select---of 14
snd_rawmidi_info_select_user---of 6
snd_rawmidi_info_user---of 3
snd_rawmidi_init---of 12
snd_rawmidi_input_event_work---of 2
snd_rawmidi_input_params---of 11
snd_rawmidi_input_status---of 3
snd_rawmidi_ioctl---of 39
snd_rawmidi_ioctl_compat---of 16
snd_rawmidi_ioctl_status32---of 9
snd_rawmidi_ioctl_status64---of 9
snd_rawmidi_ioctl_status_compat64---of 9
snd_rawmidi_kernel_open---of 9
snd_rawmidi_kernel_read---of 3
snd_rawmidi_kernel_read1---of 18
snd_rawmidi_kernel_release---of 5
snd_rawmidi_kernel_write---of 1
snd_rawmidi_kernel_write1---of 27
snd_rawmidi_new---of 8
snd_rawmidi_open---of 26
snd_rawmidi_output_params---of 7
snd_rawmidi_output_status---of 3
snd_rawmidi_poll---of 31
snd_rawmidi_proc_info_read---of 25
snd_rawmidi_proceed---of 7
snd_rawmidi_read---of 22
snd_rawmidi_receive---of 44
snd_rawmidi_release---of 1
snd_rawmidi_set_ops---of 2
snd_rawmidi_transmit---of 6
snd_rawmidi_transmit_ack---of 6
snd_rawmidi_transmit_empty---of 9
snd_rawmidi_transmit_peek---of 6
snd_rawmidi_write---of 37
-----------
SUMMARY69%of 29

__irq_domain_activate_irq---of 9
__irq_domain_add---of 3
__irq_domain_alloc_fwnode---of 9
__irq_domain_alloc_irqs---of 5
__irq_domain_create---of 35
__irq_domain_deactivate_irq---of 6
__irq_domain_publish---of 4
__irq_resolve_mapping46%of 24
irq_create_fwspec_mapping---of 51
irq_create_mapping_affinity---of 11
irq_create_mapping_affinity_locked---of 16
irq_create_of_mapping---of 5
irq_dispose_mapping7%of 15
irq_domain_activate_irq---of 5
irq_domain_add_legacy---of 5
irq_domain_alloc_descs---of 5
irq_domain_alloc_descs.part.0---of 4
irq_domain_alloc_irqs_hierarchy---of 5
irq_domain_alloc_irqs_locked---of 48
irq_domain_alloc_irqs_parent---of 6
irq_domain_associate---of 1
irq_domain_associate_locked---of 13
irq_domain_associate_many---of 10
irq_domain_clear_mapping---of 6
irq_domain_create_hierarchy---of 8
irq_domain_create_legacy---of 3
irq_domain_create_simple---of 10
irq_domain_deactivate_irq---of 2
irq_domain_disconnect_hierarchy---of 6
irq_domain_fix_revmap---of 7
irq_domain_free_fwnode---of 4
irq_domain_free_irqs---of 14
irq_domain_free_irqs_common---of 8
irq_domain_free_irqs_hierarchy---of 7
irq_domain_free_irqs_parent---of 2
irq_domain_free_irqs_top---of 3
irq_domain_get_irq_data---of 5
irq_domain_pop_irq---of 17
irq_domain_push_irq---of 14
irq_domain_remove57%of 16
irq_domain_reset_irq_data---of 1
irq_domain_set_hwirq_and_chip---of 6
irq_domain_set_info---of 6
irq_domain_set_mapping---of 6
irq_domain_translate_onecell---of 5
irq_domain_translate_twocell---of 5
irq_domain_update_bus_token---of 7
irq_domain_xlate_onecell---of 5
irq_domain_xlate_onetwocell---of 7
irq_domain_xlate_twocell---of 6
irq_find_matching_fwspec---of 17
irq_get_default_host---of 1
irq_set_default_host---of 4
irqchip_fwnode_get_name---of 1
of_phandle_args_to_fwspec---of 4
-----------
SUMMARY39%of 55

-----------
SUMMARY---of 0

__bpf_trace_x86_fpu---of 1
__traceiter_x86_fpu_after_restore---of 3
__traceiter_x86_fpu_after_save---of 3
__traceiter_x86_fpu_before_restore---of 3
__traceiter_x86_fpu_before_save---of 3
__traceiter_x86_fpu_copy_dst---of 3
__traceiter_x86_fpu_copy_src---of 3
__traceiter_x86_fpu_dropped---of 3
__traceiter_x86_fpu_init_state---of 3
__traceiter_x86_fpu_regs_activated---of 3
__traceiter_x86_fpu_regs_deactivated---of 3
__traceiter_x86_fpu_xstate_check_failed---of 3
fpregs_assert_state_consistent75%of 4
fpregs_lock_and_load---of 6
fpregs_mark_activate38%of 16
fpregs_restore_userregs48%of 21
fpstate_init_user---of 3
fpstate_reset---of 1
fpu__clear_user_states50%of 12
fpu__drop---of 31
fpu__exception_code---of 9
fpu_clone---of 42
fpu_flush_thread---of 4
fpu_reset_from_exception_fixup---of 1
fpu_sync_fpstate---of 34
fpu_thread_struct_whitelist---of 1
irq_fpu_usable---of 7
kernel_fpu_begin_mask---of 12
kernel_fpu_end---of 4
perf_trace_x86_fpu---of 7
restore_fpregs_from_fpstate42%of 12
save_fpregs_to_fpstate---of 8
switch_fpu_return100%of 1
trace_event_raw_event_x86_fpu---of 8
trace_raw_output_x86_fpu---of 5
-----------
SUMMARY47%of 66

cfg802154_dev_free---of 1
cfg802154_netdev_notifier_call53%of 19
cfg802154_pernet_exit---of 6
cfg802154_rdev_by_wpan_phy_idx---of 8
cfg802154_switch_netns---of 18
cfg802154_update_iface_num.constprop.0---of 4
wpan_phy_find---of 5
wpan_phy_for_each---of 1
wpan_phy_free---of 1
wpan_phy_idx_to_wpan_phy---of 6
wpan_phy_iter---of 1
wpan_phy_new---of 5
wpan_phy_register---of 9
wpan_phy_unregister---of 12
-----------
SUMMARY53%of 19

-----------
SUMMARY---of 0

__scsi_remove_device---of 19
scsi_bus_match---of 5
scsi_bus_uevent50%of 4
scsi_device_cls_release---of 1
scsi_device_dev_release---of 68
scsi_device_state_name---of 4
scsi_host_state_name---of 4
scsi_is_sdev_device100%of 1
scsi_register_driver---of 1
scsi_register_interface---of 1
scsi_remove_device---of 1
scsi_remove_target---of 31
scsi_sdev_attr_is_visible---of 6
scsi_sdev_bin_attr_is_visible---of 18
scsi_sysfs_add_host---of 3
scsi_sysfs_add_sdev---of 21
scsi_sysfs_device_initialize---of 12
scsi_sysfs_register---of 5
scsi_sysfs_unregister---of 1
sdev_show_blacklist---of 13
sdev_show_cdl_enable---of 1
sdev_show_cdl_supported---of 1
sdev_show_device_blocked---of 1
sdev_show_device_busy---of 1
sdev_show_eh_timeout---of 1
sdev_show_evt_capacity_change_reported---of 1
sdev_show_evt_inquiry_change_reported---of 1
sdev_show_evt_lun_change_reported---of 1
sdev_show_evt_media_change---of 1
sdev_show_evt_mode_parameter_change_reported---of 1
sdev_show_evt_soft_threshold_reached---of 1
sdev_show_modalias---of 1
sdev_show_model---of 1
sdev_show_queue_depth---of 1
sdev_show_queue_ramp_up_period---of 1
sdev_show_rev---of 1
sdev_show_scsi_level---of 1
sdev_show_timeout---of 1
sdev_show_type---of 1
sdev_show_vendor---of 1
sdev_show_wwid---of 3
sdev_store_cdl_enable---of 5
sdev_store_delete---of 6
sdev_store_eh_timeout---of 5
sdev_store_evt_capacity_change_reported---of 6
sdev_store_evt_inquiry_change_reported---of 6
sdev_store_evt_lun_change_reported---of 6
sdev_store_evt_media_change---of 6
sdev_store_evt_mode_parameter_change_reported---of 6
sdev_store_evt_soft_threshold_reached---of 6
sdev_store_queue_depth---of 7
sdev_store_queue_ramp_up_period---of 3
sdev_store_timeout---of 1
show_can_queue---of 1
show_cmd_per_lun---of 1
show_host_busy---of 1
show_inquiry---of 3
show_iostat_counterbits---of 1
show_iostat_iodone_cnt---of 1
show_iostat_ioerr_cnt---of 1
show_iostat_iorequest_cnt---of 1
show_iostat_iotmo_cnt---of 1
show_nr_hw_queues---of 1
show_proc_name---of 1
show_prot_capabilities---of 1
show_prot_guard_type---of 1
show_queue_type_field---of 2
show_sg_prot_tablesize---of 1
show_sg_tablesize---of 1
show_shost_active_mode---of 5
show_shost_eh_deadline---of 5
show_shost_mode---of 7
show_shost_state---of 8
show_shost_supported_mode---of 1
show_state_field---of 8
show_unique_id---of 1
show_use_blk_mq---of 1
show_vpd_pg0---of 18
show_vpd_pg80---of 18
show_vpd_pg83---of 18
show_vpd_pg89---of 18
show_vpd_pgb0---of 18
show_vpd_pgb1---of 18
show_vpd_pgb2---of 18
show_vpd_pgb7---of 18
store_host_reset---of 7
store_queue_type_field---of 3
store_rescan_field---of 1
store_scan---of 15
store_shost_eh_deadline---of 15
store_shost_state---of 10
store_state_field---of 18
-----------
SUMMARY60%of 5

-----------
SUMMARY---of 0

bind_store---of 10
bus_add_device53%of 17
bus_add_driver60%of 20
bus_attr_show---of 3
bus_attr_store---of 3
bus_create_file---of 3
bus_find_device90%of 10
bus_for_each_dev89%of 9
bus_for_each_drv90%of 10
bus_get_dev_root---of 4
bus_get_kset---of 3
bus_is_registered100%of 3
bus_notify100%of 2
bus_probe_device38%of 8
bus_register---of 25
bus_register_notifier---of 3
bus_release---of 1
bus_remove_device58%of 14
bus_remove_driver84%of 6
bus_remove_file---of 2
bus_rescan_devices---of 7
bus_rescan_devices_helper---of 10
bus_sort_breadthfirst---of 18
bus_to_subsys93%of 13
bus_uevent_filter---of 1
bus_uevent_store---of 6
bus_unregister---of 12
bus_unregister_notifier---of 3
device_reprobe---of 11
driver_find60%of 5
driver_release50%of 4
drivers_autoprobe_show---of 3
drivers_autoprobe_store---of 3
drivers_probe_store---of 5
drv_attr_show---of 3
drv_attr_store---of 3
klist_devices_get100%of 1
klist_devices_put100%of 1
subsys_interface_register---of 15
subsys_interface_unregister---of 15
subsys_register.part.0---of 10
subsys_system_register---of 3
subsys_virtual_register---of 4
system_root_device_release---of 1
uevent_store---of 5
unbind_store---of 7
-----------
SUMMARY71%of 123

add_early_randomness---of 10
devm_hwrng_match---of 6
devm_hwrng_register---of 6
devm_hwrng_release---of 1
devm_hwrng_unregister---of 1
drop_current_rng---of 11
enable_best_rng---of 12
get_current_rng---of 11
hwrng_fillfn---of 27
hwrng_msleep---of 1
hwrng_register---of 26
hwrng_unregister64%of 22
hwrng_yield---of 1
put_rng56%of 9
rng_available_show---of 14
rng_current_show---of 7
rng_current_store---of 23
rng_dev_open---of 3
rng_dev_read---of 37
rng_quality_show---of 6
rng_quality_store---of 11
rng_selected_show---of 1
set_current_rng---of 21
-----------
SUMMARY62%of 31

nfcmrvl_bulk_complete---of 14
nfcmrvl_disconnect100%of 2
nfcmrvl_probe---of 21
nfcmrvl_resume---of 16
nfcmrvl_submit_bulk_urb---of 12
nfcmrvl_suspend---of 6
nfcmrvl_tx_complete---of 1
nfcmrvl_usb_nci_close---of 3
nfcmrvl_usb_nci_open---of 6
nfcmrvl_usb_nci_send---of 12
nfcmrvl_waker---of 2
-----------
SUMMARY100%of 2

address_val---of 12
bdev_name.constprop.0---of 13
bitmap_list_string.constprop.0---of 14
bitmap_string.constprop.0---of 19
bprintf---of 1
bstr_printf---of 60
check_pointer---of 11
clock.constprop.0---of 12
date_str---of 7
default_pointer17%of 31
dentry_name---of 43
device_node_string---of 69
escaped_string---of 18
file_dentry_name---of 12
fill_ptr_key---of 1
flags_string---of 39
format_decode62%of 73
format_flags---of 11
fourcc_string---of 30
fwnode_full_name_string---of 5
fwnode_string---of 16
hex_string---of 20
ip4_addr_string---of 6
ip4_addr_string_sa---of 14
ip4_string---of 13
ip6_addr_string---of 10
ip6_addr_string_sa---of 23
ip6_compressed_string---of 33
ip6_string---of 6
ip_addr_string---of 26
mac_address_string---of 26
netdev_bits---of 20
num_to_str---of 14
number58%of 64
pointer7%of 59
ptr_to_hashval---of 3
put_dec50%of 4
put_dec_full8---of 1
put_dec_trunc888%of 8
resource_string.isra.0---of 96
restricted_pointer---of 23
rtc_str---of 22
scnprintf75%of 4
set_field_width---of 4
set_precision---of 4
simple_strntoll75%of 4
simple_strntoull84%of 6
simple_strtol---of 5
simple_strtoll---of 1
simple_strtoul100%of 1
simple_strtoull100%of 1
skip_atoi100%of 3
snprintf100%of 1
special_hex_number---of 1
sprintf100%of 1
sscanf100%of 1
string53%of 17
symbol_string---of 20
time64_str---of 1
time_and_date---of 12
time_str.constprop.0---of 5
uuid_string---of 24
va_format.constprop.043%of 14
vbin_printf---of 89
vscnprintf60%of 5
vsnprintf47%of 106
vsprintf---of 1
vsscanf20%of 157
widen_string63%of 16
-----------
SUMMARY40%of 576

keene_cmd_main---of 8
keene_cmd_set---of 3
keene_s_ctrl---of 6
usb_keene_disconnect100%of 1
usb_keene_probe---of 20
usb_keene_resume---of 3
usb_keene_suspend---of 1
usb_keene_video_device_release100%of 1
vidioc_g_frequency---of 3
vidioc_g_modulator---of 3
vidioc_querycap---of 1
vidioc_s_frequency---of 4
vidioc_s_modulator---of 3
-----------
SUMMARY100%of 2

ima_iint_find40%of 5
ima_iint_init_once---of 1
ima_inode_free25%of 4
ima_inode_get---of 10
-----------
SUMMARY34%of 9

alloc_info_private---of 7
snd_card_id_read---of 1
snd_card_rw_proc_new---of 4
snd_info_card_create---of 8
snd_info_card_disconnect100%of 9
snd_info_card_free100%of 3
snd_info_card_id_change---of 5
snd_info_card_register---of 9
snd_info_check_reserved_words---of 5
snd_info_clear_entries31%of 36
snd_info_create_card_entry---of 3
snd_info_create_entry---of 9
snd_info_create_module_entry---of 3
snd_info_entry_ioctl---of 3
snd_info_entry_llseek---of 13
snd_info_entry_mmap---of 4
snd_info_entry_open---of 12
snd_info_entry_poll---of 6
snd_info_entry_read---of 6
snd_info_entry_release---of 3
snd_info_entry_write---of 6
snd_info_free_entry65%of 17
snd_info_get_line---of 15
snd_info_get_str---of 22
snd_info_register---of 20
snd_info_seq_show---of 3
snd_info_text_entry_open---of 8
snd_info_text_entry_release---of 6
snd_info_text_entry_write---of 15
snd_info_version_read---of 1
-----------
SUMMARY53%of 65

usb_destroy_configuration85%of 20
usb_get_bos_descriptor98%of 34
usb_get_configuration77%of 291
usb_release_bos_descriptor100%of 2
usb_release_interface_cache---of 3
-----------
SUMMARY79%of 347

picolcd_exit_leds67%of 3
picolcd_init_leds---of 19
picolcd_led_get_brightness---of 7
picolcd_led_set_brightness---of 9
picolcd_leds_set---of 2
picolcd_leds_set.part.0---of 6
-----------
SUMMARY67%of 3

__bpf_trace_filelock_lease---of 1
__bpf_trace_filelock_lock---of 1
__bpf_trace_generic_add_lease---of 1
__bpf_trace_leases_conflict---of 1
__bpf_trace_locks_get_lock_context---of 1
__break_lease---of 126
__do_sys_flock---of 20
__ia32_sys_flock---of 1
__locks_delete_block---of 7
__locks_insert_block---of 15
__locks_wake_up_blocks---of 9
__show_fd_locks---of 6
__traceiter_break_lease_block---of 3
__traceiter_break_lease_noblock---of 3
__traceiter_break_lease_unblock---of 3
__traceiter_fcntl_setlk---of 3
__traceiter_flock_lock_inode---of 3
__traceiter_generic_add_lease---of 3
__traceiter_generic_delete_lease---of 3
__traceiter_leases_conflict---of 4
__traceiter_locks_get_lock_context---of 3
__traceiter_locks_remove_posix---of 3
__traceiter_posix_lock_inode---of 3
__traceiter_time_out_leases---of 3
__x64_sys_flock---of 1
any_leases_conflict.isra.0---of 9
check_conflicting_open.isra.0---of 9
do_lock_file_wait---of 15
fcntl_getlease---of 31
fcntl_getlk---of 14
fcntl_setlease---of 15
fcntl_setlk---of 40
flock64_to_posix_lock---of 17
flock_lock_inode---of 70
flock_locks_conflict---of 5
generic_setlease---of 93
kernel_setlease---of 8
lease_break_callback---of 1
lease_get_mtime---of 8
lease_modify---of 15
lease_register_notifier---of 1
lease_setup---of 3
lease_unregister_notifier---of 1
leases_conflict---of 23
lock_get_status---of 37
locks_alloc_lease---of 3
locks_alloc_lock---of 3
locks_check_ctx_file_list---of 4
locks_copy_conflock---of 3
locks_copy_lock---of 8
locks_delete_block---of 1
locks_delete_global_blocked---of 7
locks_dispose_list---of 7
locks_free_lease---of 1
locks_free_lock---of 1
locks_free_lock_context10%of 11
locks_get_lock_context---of 21
locks_init_lease---of 1
locks_init_lock---of 1
locks_insert_lock_ctx---of 8
locks_lock_inode_wait---of 30
locks_move_blocks---of 7
locks_next---of 1
locks_owner_has_blockers---of 7
locks_release_private---of 18
locks_remove_file7%of 29
locks_remove_flock---of 9
locks_remove_posix10%of 21
locks_show---of 27
locks_start---of 1
locks_stop---of 1
locks_translate_pid---of 15
locks_unlink_lock_ctx---of 13
perf_trace_filelock_lease---of 8
perf_trace_filelock_lock---of 8
perf_trace_generic_add_lease---of 5
perf_trace_leases_conflict---of 5
perf_trace_locks_get_lock_context---of 5
posix_lock_file---of 1
posix_lock_inode---of 157
posix_locks_conflict---of 7
posix_test_lock---of 24
show_fd_locks---of 3
time_out_leases---of 28
trace_event_raw_event_filelock_lease---of 9
trace_event_raw_event_filelock_lock---of 9
trace_event_raw_event_generic_add_lease---of 6
trace_event_raw_event_leases_conflict---of 6
trace_event_raw_event_locks_get_lock_context---of 6
trace_generic_delete_lease---of 15
trace_raw_output_filelock_lease---of 4
trace_raw_output_filelock_lock---of 4
trace_raw_output_generic_add_lease---of 4
trace_raw_output_leases_conflict---of 4
trace_raw_output_locks_get_lock_context---of 5
vfs_cancel_lock---of 5
vfs_inode_has_locks---of 5
vfs_lock_file---of 7
vfs_setlease---of 7
vfs_test_lock---of 7
-----------
SUMMARY9%of 61

change_protocol---of 24
ir_raw_edge_handle---of 7
ir_raw_encode_carrier---of 7
ir_raw_encode_carrier.cold---of 1
ir_raw_encode_scancode---of 8
ir_raw_encode_scancode.cold---of 1
ir_raw_event_free---of 2
ir_raw_event_handle---of 3
ir_raw_event_prepare---of 4
ir_raw_event_register---of 6
ir_raw_event_set_idle---of 10
ir_raw_event_store---of 8
ir_raw_event_store_edge---of 3
ir_raw_event_store_with_filter---of 14
ir_raw_event_store_with_timeout---of 6
ir_raw_event_thread---of 23
ir_raw_event_unregister60%of 10
ir_raw_gen_manchester---of 27
ir_raw_gen_manchester.cold---of 1
ir_raw_gen_pd---of 20
ir_raw_gen_pd.cold---of 1
ir_raw_gen_pl---of 18
ir_raw_gen_pl.cold---of 1
ir_raw_get_allowed_protocols---of 1
ir_raw_handler_register---of 3
ir_raw_handler_unregister---of 9
-----------
SUMMARY60%of 10

-----------
SUMMARY---of 0

__hci_send_to_channel---of 15
bacpy---of 1
create_monitor_ctrl_close---of 7
create_monitor_ctrl_open---of 9
create_monitor_event---of 16
hci_dev_put---of 3
hci_hdev_from_sock---of 4
hci_mgmt_chan_register---of 11
hci_mgmt_chan_unregister---of 3
hci_send_monitor_ctrl_event---of 14
hci_send_to_channel---of 1
hci_send_to_monitor---of 9
hci_send_to_sock10%of 31
hci_sock_bind---of 96
hci_sock_cleanup---of 1
hci_sock_clear_flag---of 1
hci_sock_compat_ioctl---of 5
hci_sock_copy_creds---of 9
hci_sock_create---of 6
hci_sock_destruct---of 1
hci_sock_dev_event53%of 17
hci_sock_get_channel---of 1
hci_sock_get_cookie---of 1
hci_sock_getname---of 8
hci_sock_getsockopt---of 18
hci_sock_ioctl---of 41
hci_sock_recvmsg---of 63
hci_sock_release---of 22
hci_sock_sendmsg---of 123
hci_sock_set_flag---of 1
hci_sock_setsockopt---of 42
hci_sock_test_flag---of 1
send_monitor_note---of 5
-----------
SUMMARY25%of 48

addr_assign_type_show---of 1
addr_len_show---of 1
address_show---of 13
bql_set_hold_time---of 4
bql_set_limit---of 8
bql_set_limit_max---of 8
bql_set_limit_min---of 8
bql_set_stall_max---of 1
bql_set_stall_thrs---of 8
bql_show_hold_time---of 1
bql_show_inflight---of 1
bql_show_limit---of 1
bql_show_limit_max---of 1
bql_show_limit_min---of 1
bql_show_stall_cnt---of 1
bql_show_stall_max---of 1
bql_show_stall_thrs---of 1
broadcast_show---of 13
carrier_changes_show---of 1
carrier_down_count_show---of 1
carrier_show---of 6
carrier_store---of 14
carrier_up_count_show---of 1
collisions_show---of 1
dev_id_show---of 1
dev_port_show---of 1
dormant_show---of 3
duplex_show---of 10
flags_show---of 1
flags_store---of 11
format_addr_assign_type---of 1
format_addr_len---of 1
format_dev_id---of 1
format_dev_port---of 1
format_flags---of 1
format_gro_flush_timeout---of 1
format_group---of 1
format_ifindex---of 1
format_link_mode---of 1
format_mtu---of 1
format_name_assign_type---of 1
format_napi_defer_hard_irqs---of 1
format_proto_down---of 1
format_tx_queue_len---of 1
format_type---of 1
gro_flush_timeout_show---of 1
gro_flush_timeout_store---of 11
group_show---of 1
group_store---of 9
ifalias_show---of 3
ifalias_store---of 10
ifindex_show---of 1
iflink_show---of 1
link_mode_show---of 1
mtu_show---of 1
mtu_store---of 11
multicast_show---of 1
name_assign_type_show---of 3
napi_defer_hard_irqs_show---of 1
napi_defer_hard_irqs_store---of 11
net_current_may_mount---of 1
net_get_ownership---of 1
net_grab_current_ns---of 7
net_initial_ns---of 1
net_namespace100%of 1
net_netlink_ns---of 1
net_rx_queue_update_kobjects30%of 24
netdev_change_owner---of 24
netdev_class_create_file_ns---of 1
netdev_class_remove_file_ns---of 1
netdev_queue_attr_show---of 3
netdev_queue_attr_store---of 3
netdev_queue_get_ownership---of 4
netdev_queue_namespace80%of 5
netdev_queue_release67%of 3
netdev_queue_update_kobjects38%of 27
netdev_register_kobject---of 13
netdev_release---of 3
netdev_rx_queue_set_rps_mask---of 20
netdev_show.constprop.0---of 13
netdev_uevent---of 4
netdev_unregister_kobject67%of 3
netstat_show.constprop.0---of 13
of_dev_node_match---of 5
of_find_net_device_by_node---of 3
operstate_show---of 5
phys_port_id_show---of 9
phys_port_name_show---of 10
phys_switch_id_show---of 10
proto_down_show---of 1
proto_down_store---of 11
rps_cpumask_housekeeping---of 3
rps_dev_flow_table_release---of 1
rx_bytes_show---of 1
rx_compressed_show---of 1
rx_crc_errors_show---of 1
rx_dropped_show---of 1
rx_errors_show---of 1
rx_fifo_errors_show---of 1
rx_frame_errors_show---of 1
rx_length_errors_show---of 1
rx_missed_errors_show---of 1
rx_nohandler_show---of 1
rx_over_errors_show---of 1
rx_packets_show---of 1
rx_queue_attr_show---of 3
rx_queue_attr_store---of 3
rx_queue_get_ownership---of 4
rx_queue_namespace80%of 5
rx_queue_release58%of 7
show_rps_dev_flow_table_cnt---of 18
show_rps_map---of 23
speed_show---of 10
store_rps_dev_flow_table_cnt---of 20
store_rps_map---of 7
testing_show---of 3
threaded_show---of 6
threaded_store---of 13
traffic_class_show---of 13
tx_aborted_errors_show---of 1
tx_bytes_show---of 1
tx_carrier_errors_show---of 1
tx_compressed_show---of 1
tx_dropped_show---of 1
tx_errors_show---of 1
tx_fifo_errors_show---of 1
tx_heartbeat_errors_show---of 1
tx_maxrate_show---of 1
tx_maxrate_store---of 13
tx_packets_show---of 1
tx_queue_len_show---of 1
tx_queue_len_store---of 13
tx_timeout_show---of 1
tx_window_errors_show---of 1
type_show---of 1
xps_cpus_show---of 12
xps_cpus_store---of 12
xps_queue_show---of 45
xps_rxqs_show---of 7
xps_rxqs_store---of 12
-----------
SUMMARY46%of 75

input_event_from_user50%of 8
input_event_to_user50%of 8
input_ff_effect_from_user50%of 10
-----------
SUMMARY50%of 26

chmod_sysfs_attrs---of 9
cp2112_functionality---of 1
cp2112_gpio_direction_input---of 6
cp2112_gpio_direction_input.cold---of 1
cp2112_gpio_direction_output---of 7
cp2112_gpio_direction_output.cold---of 1
cp2112_gpio_get---of 4
cp2112_gpio_get.cold---of 1
cp2112_gpio_get_all---of 4
cp2112_gpio_irq_mask---of 1
cp2112_gpio_irq_shutdown---of 2
cp2112_gpio_irq_startup---of 3
cp2112_gpio_irq_type---of 1
cp2112_gpio_irq_unmask---of 1
cp2112_gpio_poll_callback---of 29
cp2112_gpio_poll_callback.cold---of 3
cp2112_gpio_set---of 4
cp2112_gpio_set.cold---of 1
cp2112_hid_get.constprop.0---of 3
cp2112_hid_output---of 6
cp2112_i2c_xfer---of 43
cp2112_probe---of 39
cp2112_raw_event---of 16
cp2112_read---of 7
cp2112_remove67%of 3
cp2112_set_usb_config---of 6
cp2112_wait---of 15
cp2112_write_read_req---of 4
cp2112_write_req---of 3
cp2112_xfer---of 52
cp2112_xfer_status---of 5
manufacturer_show---of 1
manufacturer_store---of 1
max_power_show---of 5
max_power_store---of 8
power_mode_show---of 5
power_mode_store---of 7
product_id_show---of 5
product_id_store---of 8
product_show---of 1
product_store---of 1
pstr_show---of 7
pstr_store---of 5
release_version_show---of 5
release_version_store---of 7
serial_show---of 1
serial_store---of 1
vendor_id_show---of 5
vendor_id_store---of 8
-----------
SUMMARY67%of 3

__anon_vma_prepare---of 15
__bpf_trace_migration_pte---of 1
__bpf_trace_mm_migrate_pages---of 1
__bpf_trace_mm_migrate_pages_start---of 1
__bpf_trace_tlb_flush---of 1
__folio_rmap_sanity_checks.constprop.042%of 24
__put_anon_vma---of 11
__traceiter_mm_migrate_pages---of 3
__traceiter_mm_migrate_pages_start---of 3
__traceiter_remove_migration_pte---of 3
__traceiter_set_migration_pte---of 3
__traceiter_tlb_flush---of 3
anon_vma_clone---of 26
anon_vma_ctor---of 1
anon_vma_fork---of 12
const_folio_flags.constprop.040%of 10
flush_tlb_batched_pending---of 2
folio_add_anon_rmap_pmd---of 1
folio_add_anon_rmap_ptes---of 100
folio_add_file_rmap_pmd---of 1
folio_add_file_rmap_ptes54%of 13
folio_add_new_anon_rmap23%of 74
folio_flags.constprop.040%of 10
folio_get_anon_vma---of 33
folio_lock_anon_vma_read---of 57
folio_mkclean---of 12
folio_move_anon_rmap---of 5
folio_not_mapped---of 5
folio_referenced---of 17
folio_referenced_one---of 50
folio_referenced_one.cold---of 2
folio_remove_rmap_pmd---of 1
folio_remove_rmap_ptes---of 10
folio_total_mapcount---of 9
folio_try_share_anon_rmap_pte---of 65
hugetlb_add_anon_rmap---of 67
hugetlb_add_new_anon_rmap---of 39
invalid_folio_referenced_vma---of 26
invalid_migration_vma---of 3
invalid_mkclean_vma---of 1
mm_find_pmd---of 6
page_address_in_vma---of 21
page_mkclean_one---of 3
page_vma_mkclean_one.constprop.0---of 21
perf_trace_migration_pte---of 5
perf_trace_mm_migrate_pages---of 5
perf_trace_mm_migrate_pages_start---of 5
perf_trace_tlb_flush---of 5
pfn_mkclean_range---of 11
rmap_walk---of 3
rmap_walk_anon---of 31
rmap_walk_file---of 31
rmap_walk_locked---of 3
set_tlb_ubc_flush_pending.isra.0---of 12
trace_event_raw_event_migration_pte---of 6
trace_event_raw_event_mm_migrate_pages---of 6
trace_event_raw_event_mm_migrate_pages_start---of 6
trace_event_raw_event_tlb_flush---of 6
trace_raw_output_migration_pte---of 5
trace_raw_output_mm_migrate_pages---of 4
trace_raw_output_mm_migrate_pages_start---of 5
trace_raw_output_tlb_flush---of 5
try_to_migrate---of 10
try_to_migrate_one---of 239
try_to_migrate_one.cold---of 1
try_to_unmap---of 8
try_to_unmap_flush---of 2
try_to_unmap_flush_dirty---of 3
try_to_unmap_one---of 158
try_to_unmap_one.cold---of 1
unlink_anon_vmas---of 28
-----------
SUMMARY33%of 131

anon_transport_class_register---of 3
anon_transport_class_unregister---of 2
anon_transport_dummy_function---of 1
transport_add_class_device---of 7
transport_add_device---of 1
transport_class_register---of 1
transport_class_unregister---of 1
transport_configure---of 3
transport_configure_device---of 1
transport_destroy_classdev---of 2
transport_destroy_device100%of 1
transport_remove_classdev---of 6
transport_remove_device100%of 1
transport_setup_classdev---of 3
transport_setup_device---of 1
-----------
SUMMARY100%of 2

dummy_alloc_request63%of 8
dummy_alloc_streams---of 18
dummy_bus_resume72%of 7
dummy_bus_suspend---of 3
dummy_dequeue---of 21
dummy_disable91%of 11
dummy_enable65%of 37
dummy_free_request67%of 6
dummy_free_streams---of 11
dummy_g_get_frame---of 1
dummy_h_get_frame---of 1
dummy_hcd_probe---of 15
dummy_hcd_remove---of 3
dummy_hcd_resume---of 3
dummy_hcd_suspend---of 6
dummy_hub_control35%of 84
dummy_hub_status70%of 10
dummy_pullup80%of 5
dummy_queue92%of 24
dummy_set_halt100%of 9
dummy_set_selfpowered---of 5
dummy_set_wedge100%of 8
dummy_setup---of 5
dummy_start---of 5
dummy_stop---of 1
dummy_timer21%of 164
dummy_udc_async_callbacks100%of 1
dummy_udc_probe---of 14
dummy_udc_remove---of 1
dummy_udc_resume---of 4
dummy_udc_set_speed100%of 2
dummy_udc_start67%of 6
dummy_udc_stop100%of 9
dummy_udc_suspend---of 4
dummy_urb_dequeue60%of 5
dummy_urb_enqueue77%of 30
dummy_wakeup---of 7
find_endpoint100%of 10
function_show---of 5
set_link_state67%of 60
urbs_show---of 10
-----------
SUMMARY51%of 496

-----------
SUMMARY---of 0

__rhashtable_walk_find_next---of 41
__rht_bucket_nested---of 25
__rht_bucket_nested.cold---of 3
bucket_table_alloc.isra.0---of 12
bucket_table_free17%of 12
bucket_table_free.cold---of 2
bucket_table_free_rcu---of 1
jhash---of 16
lockdep_rht_bucket_is_held---of 5
lockdep_rht_bucket_is_held.part.0---of 4
lockdep_rht_mutex_is_held75%of 4
nested_table_alloc.constprop.0---of 16
nested_table_free---of 15
rhashtable_destroy---of 1
rhashtable_free_and_destroy20%of 50
rhashtable_init---of 22
rhashtable_init.cold---of 3
rhashtable_insert_slow---of 149
rhashtable_jhash2---of 7
rhashtable_last_table---of 9
rhashtable_rehash_alloc---of 8
rhashtable_walk_enter---of 8
rhashtable_walk_exit---of 5
rhashtable_walk_next---of 22
rhashtable_walk_peek---of 9
rhashtable_walk_start_check---of 59
rhashtable_walk_stop---of 15
rhltable_init---of 1
rht_bucket_nested---of 2
rht_bucket_nested_insert---of 11
rht_bucket_nested_insert.cold---of 3
rht_deferred_worker---of 158
rht_deferred_worker.cold---of 1
-----------
SUMMARY23%of 66

_copy_from_user100%of 7
_copy_to_user100%of 5
check_zeroed_user---of 18
-----------
SUMMARY100%of 12

-----------
SUMMARY---of 0

__usbhid_submit_report29%of 32
hid_ctrl---of 12
hid_free_buffers.isra.0100%of 1
hid_get_class_descriptor.constprop.0---of 4
hid_io_error.isra.062%of 13
hid_irq_in62%of 13
hid_irq_out---of 8
hid_is_usb---of 1
hid_post_reset---of 13
hid_pre_reset---of 1
hid_reset---of 12
hid_reset_resume---of 4
hid_restart_io---of 12
hid_resume---of 4
hid_retry_timeout---of 4
hid_start_in.isra.078%of 9
hid_submit_ctrl58%of 19
hid_submit_out---of 8
hid_suspend---of 18
usbhid_close100%of 5
usbhid_disconnect67%of 3
usbhid_find_interface100%of 1
usbhid_idle---of 3
usbhid_init_reports24%of 13
usbhid_may_wakeup---of 3
usbhid_open64%of 11
usbhid_output_report---of 7
usbhid_parse---of 29
usbhid_power60%of 5
usbhid_probe---of 63
usbhid_raw_request---of 13
usbhid_request67%of 3
usbhid_restart_ctrl_queue.isra.070%of 13
usbhid_restart_out_queue.isra.0---of 12
usbhid_start---of 77
usbhid_stop80%of 10
usbhid_wait_io84%of 12
-----------
SUMMARY58%of 163

-----------
SUMMARY---of 0

ntrig_event---of 57
ntrig_input_configured---of 7
ntrig_input_mapped---of 5
ntrig_input_mapping---of 27
ntrig_probe---of 26
ntrig_remove100%of 1
set_activate_slack---of 4
set_activation_height---of 4
set_activation_width---of 4
set_deactivate_slack---of 4
set_min_height---of 4
set_min_width---of 4
show_activate_slack---of 1
show_activation_height---of 1
show_activation_width---of 1
show_deactivate_slack---of 1
show_log_height---of 1
show_log_width---of 1
show_min_height---of 1
show_min_width---of 1
show_phys_height---of 1
show_phys_width---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

_ath10k_coredump_get_mem_layout---of 8
ath10k_coredump_create---of 3
ath10k_coredump_destroy67%of 3
ath10k_coredump_get_mem_layout---of 3
ath10k_coredump_new---of 7
ath10k_coredump_register---of 9
ath10k_coredump_submit---of 51
ath10k_coredump_unregister---of 1
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

r8712_usb_dvobj_deinit---of 1
r8712_usb_dvobj_init---of 4
r871x_dev_unload---of 6
r871x_resume---of 5
r871x_suspend---of 6
r871xu_dev_remove15%of 14
r871xu_drv_init---of 55
rtl871x_intf_stop---of 3
-----------
SUMMARY15%of 14

__do_sys_copy_file_range---of 22
__ia32_compat_sys_lseek---of 1
__ia32_compat_sys_preadv---of 1
__ia32_compat_sys_preadv2---of 4
__ia32_compat_sys_preadv64---of 9
__ia32_compat_sys_preadv64v2---of 1
__ia32_compat_sys_pwritev---of 1
__ia32_compat_sys_pwritev2---of 4
__ia32_compat_sys_pwritev64---of 9
__ia32_compat_sys_pwritev64v2---of 1
__ia32_compat_sys_sendfile---of 5
__ia32_compat_sys_sendfile64---of 5
__ia32_sys_copy_file_range---of 1
__ia32_sys_llseek---of 12
__ia32_sys_lseek---of 1
__ia32_sys_pread64---of 6
__ia32_sys_preadv---of 9
__ia32_sys_preadv2---of 1
__ia32_sys_pwrite64---of 6
__ia32_sys_pwritev---of 9
__ia32_sys_pwritev2---of 1
__ia32_sys_read---of 1
__ia32_sys_readv---of 1
__ia32_sys_sendfile---of 5
__ia32_sys_sendfile64---of 5
__ia32_sys_write---of 1
__ia32_sys_writev---of 1
__kernel_read---of 33
__kernel_write---of 1
__kernel_write_iter---of 33
__x64_compat_sys_lseek---of 1
__x64_compat_sys_preadv---of 11
__x64_compat_sys_preadv2---of 4
__x64_compat_sys_preadv64---of 11
__x64_compat_sys_preadv64v2---of 4
__x64_compat_sys_pwritev---of 11
__x64_compat_sys_pwritev2---of 4
__x64_compat_sys_pwritev64---of 11
__x64_compat_sys_pwritev64v2---of 4
__x64_compat_sys_sendfile---of 5
__x64_compat_sys_sendfile64---of 5
__x64_sys_copy_file_range---of 1
__x64_sys_llseek---of 12
__x64_sys_lseek---of 1
__x64_sys_pread64---of 7
__x64_sys_preadv---of 11
__x64_sys_preadv2---of 4
__x64_sys_pwrite64---of 7
__x64_sys_pwritev---of 11
__x64_sys_pwritev2---of 4
__x64_sys_read100%of 1
__x64_sys_readv---of 1
__x64_sys_sendfile---of 5
__x64_sys_sendfile64---of 5
__x64_sys_write100%of 1
__x64_sys_writev---of 1
default_llseek---of 11
do_iter_readv_writev---of 34
do_preadv---of 11
do_pwritev---of 11
do_readv---of 18
do_sendfile---of 42
do_writev---of 18
fixed_size_llseek---of 3
generic_file_llseek---of 1
generic_file_llseek_size---of 17
generic_file_rw_checks100%of 8
generic_write_check_limits88%of 8
generic_write_checks77%of 13
generic_write_checks_count---of 9
kernel_read---of 10
kernel_write---of 24
ksys_lseek---of 9
ksys_pread64---of 7
ksys_pwrite64---of 7
ksys_read100%of 12
ksys_write100%of 12
no_seek_end_llseek---of 3
no_seek_end_llseek_size---of 3
noop_llseek---of 1
rw_verify_area64%of 11
vfs_copy_file_range---of 79
vfs_iocb_iter_read---of 15
vfs_iocb_iter_write---of 35
vfs_iter_read---of 15
vfs_iter_write---of 36
vfs_llseek---of 3
vfs_read60%of 37
vfs_readv---of 27
vfs_setpos---of 6
vfs_write60%of 60
vfs_writev---of 49
-----------
SUMMARY72%of 163

-----------
SUMMARY---of 0

llist_add_batch100%of 3
llist_del_first100%of 5
llist_del_first_this---of 4
llist_reverse_order100%of 3
-----------
SUMMARY100%of 11

__drm_encoder_init---of 12
__drmm_encoder_alloc---of 4
__drmm_encoder_init---of 8
drm_encoder_cleanup---of 5
drm_encoder_init---of 3
drm_encoder_register_all---of 7
drm_encoder_unregister_all80%of 5
drm_mode_getencoder---of 17
drmm_encoder_alloc_release---of 3
drmm_encoder_init---of 1
-----------
SUMMARY80%of 5

-----------
SUMMARY---of 0

__dev_flush---of 2
__dev_map_alloc_node---of 17
__dev_map_entry_free---of 6
bq_enqueue---of 6
bq_xmit_all---of 43
dev_check_flush---of 3
dev_hash_map_redirect---of 15
dev_map_alloc---of 19
dev_map_alloc.cold---of 1
dev_map_delete_elem---of 4
dev_map_enqueue---of 10
dev_map_enqueue_multi---of 41
dev_map_free---of 29
dev_map_generic_redirect---of 13
dev_map_get_next_key---of 6
dev_map_hash_delete_elem---of 13
dev_map_hash_get_next_key---of 21
dev_map_hash_lookup_elem---of 9
dev_map_hash_update_elem---of 29
dev_map_lookup_elem---of 11
dev_map_mem_usage---of 4
dev_map_notification17%of 48
dev_map_redirect---of 17
dev_map_redirect_multi---of 43
dev_map_update_elem---of 13
dev_xdp_enqueue---of 10
is_valid_dst---of 11
trace_xdp_exception---of 15
-----------
SUMMARY17%of 48

__dev_pm_qos_add_request---of 33
__dev_pm_qos_flags73%of 11
__dev_pm_qos_remove_request55%of 24
__dev_pm_qos_resume_latency88%of 8
__dev_pm_qos_update_request---of 29
apply_constraint30%of 10
dev_pm_qos_add_ancestor_request---of 12
dev_pm_qos_add_notifier---of 9
dev_pm_qos_add_request---of 1
dev_pm_qos_constraints_allocate---of 5
dev_pm_qos_constraints_destroy66%of 23
dev_pm_qos_expose_flags---of 13
dev_pm_qos_expose_latency_limit---of 14
dev_pm_qos_expose_latency_tolerance---of 3
dev_pm_qos_flags100%of 1
dev_pm_qos_get_user_latency_tolerance---of 5
dev_pm_qos_hide_flags---of 5
dev_pm_qos_hide_latency_limit---of 5
dev_pm_qos_hide_latency_tolerance---of 1
dev_pm_qos_read_value---of 12
dev_pm_qos_remove_notifier---of 8
dev_pm_qos_remove_request---of 1
dev_pm_qos_update_flags---of 8
dev_pm_qos_update_request---of 1
dev_pm_qos_update_user_latency_tolerance---of 13
-----------
SUMMARY62%of 77

-----------
SUMMARY---of 0

active_count_show---of 1
active_time_ms_show---of 3
device_create_release100%of 1
event_count_show---of 1
expire_count_show---of 1
last_change_ms_show---of 1
max_time_ms_show---of 3
name_show---of 1
pm_wakeup_source_sysfs_add34%of 6
prevent_suspend_time_ms_show---of 3
total_time_ms_show---of 3
wakeup_count_show---of 1
wakeup_source_device_create---of 6
wakeup_source_sysfs_add---of 5
wakeup_source_sysfs_remove100%of 1
-----------
SUMMARY50%of 8

-----------
SUMMARY---of 0

netdev_nl_page_pool_event---of 15
netdev_nl_page_pool_get_do---of 9
netdev_nl_page_pool_get_doit---of 8
netdev_nl_page_pool_get_dump---of 9
netdev_nl_page_pool_get_dumpit---of 1
netdev_nl_page_pool_stats_get_doit---of 18
netdev_nl_page_pool_stats_get_dumpit---of 1
page_pool_detached---of 1
page_pool_list---of 11
page_pool_netdevice_event15%of 21
page_pool_nl_fill---of 26
page_pool_nl_fill.cold---of 1
page_pool_nl_stats_fill---of 3
page_pool_unlist---of 5
-----------
SUMMARY15%of 21

dev_pm_domain_attach---of 4
dev_pm_domain_attach_by_id---of 2
dev_pm_domain_attach_by_name---of 2
dev_pm_domain_attach_list---of 14
dev_pm_domain_detach34%of 3
dev_pm_domain_detach_list---of 8
dev_pm_domain_set---of 5
dev_pm_domain_set_performance_state---of 5
dev_pm_domain_start---of 5
dev_pm_get_subsys_data---of 6
dev_pm_put_subsys_data---of 5
-----------
SUMMARY34%of 3

-----------
SUMMARY---of 0

apply_envelope---of 21
input_ff_create_memless---of 7
ml_effect_timer---of 3
ml_ff_destroy100%of 1
ml_ff_playback---of 10
ml_ff_set_gain---of 3
ml_ff_upload---of 3
ml_play_effects---of 57
ml_schedule_timer---of 30
-----------
SUMMARY100%of 1

__do_sys_mlockall---of 27
__ia32_sys_mlock---of 1
__ia32_sys_mlock2---of 4
__ia32_sys_mlockall---of 1
__ia32_sys_munlock---of 14
__x64_sys_mlock---of 1
__x64_sys_mlock2---of 4
__x64_sys_mlockall---of 1
__x64_sys_munlock---of 14
__x64_sys_munlockall---of 14
apply_mlockall_flags---of 13
apply_vma_lock_flags---of 14
can_do_mlock---of 3
const_folio_flags.constprop.0---of 10
do_mlock---of 33
folio_evictable---of 14
folio_flags.constprop.0---of 10
folio_lruvec_relock_irq---of 26
mlock_drain_local44%of 16
mlock_drain_remote---of 6
mlock_fixup---of 49
mlock_folio---of 26
mlock_folio_batch---of 164
mlock_new_folio---of 24
mlock_pte_range---of 40
mlock_pte_range.cold---of 1
munlock_folio---of 20
need_mlock_drain---of 1
user_shm_lock---of 8
user_shm_unlock---of 1
-----------
SUMMARY44%of 16

snd_media_device_create---of 26
snd_media_device_delete75%of 4
snd_media_mixer_delete9%of 12
snd_media_start_pipeline---of 5
snd_media_stop_pipeline---of 4
snd_media_stream_delete17%of 6
snd_media_stream_init---of 17
-----------
SUMMARY23%of 22

__do_compat_sys_newfstat---of 3
__do_compat_sys_newfstatat---of 4
__do_compat_sys_newlstat---of 4
__do_compat_sys_newstat---of 4
__do_sys_fstat---of 3
__do_sys_lstat---of 4
__do_sys_newfstat---of 3
__do_sys_newfstatat---of 4
__do_sys_newlstat---of 4
__do_sys_newstat---of 4
__do_sys_stat---of 4
__ia32_compat_sys_newfstat---of 1
__ia32_compat_sys_newfstatat---of 1
__ia32_compat_sys_newlstat---of 1
__ia32_compat_sys_newstat---of 1
__ia32_sys_fstat---of 1
__ia32_sys_lstat---of 1
__ia32_sys_newfstat---of 1
__ia32_sys_newfstatat---of 1
__ia32_sys_newlstat---of 1
__ia32_sys_newstat---of 1
__ia32_sys_readlink---of 1
__ia32_sys_readlinkat---of 1
__ia32_sys_stat---of 1
__ia32_sys_statx---of 5
__inode_add_bytes---of 2
__inode_sub_bytes---of 3
__x64_compat_sys_newfstat---of 1
__x64_compat_sys_newfstatat---of 1
__x64_compat_sys_newlstat---of 1
__x64_compat_sys_newstat---of 1
__x64_sys_fstat---of 1
__x64_sys_lstat---of 1
__x64_sys_newfstat---of 1
__x64_sys_newfstatat---of 1
__x64_sys_newlstat---of 1
__x64_sys_newstat---of 1
__x64_sys_readlink---of 1
__x64_sys_readlinkat---of 1
__x64_sys_stat---of 1
__x64_sys_statx---of 5
cp_compat_stat---of 11
cp_new_stat---of 2
cp_old_stat---of 14
cp_statx---of 2
do_readlinkat---of 11
do_statx---of 5
generic_fill_statx_attr---of 5
generic_fillattr---of 4
generic_fillattr.cold---of 1
getname_statx_lookup_flags---of 5
inode_add_bytes67%of 3
inode_get_bytes100%of 1
inode_set_bytes---of 1
inode_sub_bytes75%of 4
vfs_fstat---of 7
vfs_fstatat---of 9
vfs_getattr---of 7
vfs_getattr_nosec---of 8
vfs_statx---of 18
-----------
SUMMARY75%of 8

__check_object_size46%of 37
__check_object_size.cold---of 1
check_stack_object100%of 7
const_folio_flags.constprop.040%of 10
usercopy_abort---of 4
-----------
SUMMARY52%of 54

__devm_iio_device_register---of 5
__iio_add_chan_devattr---of 45
__iio_device_register---of 116
__iio_format_value---of 29
__iio_format_value.cold---of 2
__iio_str_to_fixpoint---of 29
current_timestamp_clock_show---of 3
current_timestamp_clock_store---of 9
devm_iio_device_alloc---of 4
devm_iio_device_release100%of 2
devm_iio_device_unreg100%of 1
iio_buffer_enabled---of 1
iio_chrdev_open---of 8
iio_chrdev_release---of 3
iio_debugfs_read_reg---of 6
iio_debugfs_write_reg---of 7
iio_dev_release67%of 3
iio_device_add_info_mask_type---of 13
iio_device_add_info_mask_type_avail---of 13
iio_device_alloc---of 7
iio_device_claim_buffer_mode---of 3
iio_device_claim_direct_mode---of 3
iio_device_free---of 2
iio_device_get_clock---of 1
iio_device_get_current_mode---of 1
iio_device_id---of 1
iio_device_ioctl_handler_register---of 2
iio_device_ioctl_handler_unregister---of 3
iio_device_register_sysfs_group---of 3
iio_device_release_buffer_mode---of 1
iio_device_release_direct_mode---of 1
iio_device_set_clock---of 7
iio_device_unregister100%of 1
iio_enum_available_read---of 6
iio_enum_read---of 7
iio_enum_write---of 7
iio_find_channel_from_si---of 5
iio_format_list---of 10
iio_format_value---of 3
iio_free_chan_devattr_list100%of 4
iio_get_debugfs_dentry---of 1
iio_get_time_ns---of 12
iio_ioctl---of 8
iio_read_channel_ext_info---of 1
iio_read_channel_info---of 8
iio_read_channel_info_avail---of 7
iio_read_channel_label---of 6
iio_read_const_attr---of 1
iio_read_mount_matrix---of 6
iio_show_mount_matrix---of 6
iio_str_to_fixpoint---of 1
iio_write_channel_ext_info---of 1
iio_write_channel_info---of 13
label_show---of 1
name_show---of 1
-----------
SUMMARY91%of 11

-----------
SUMMARY---of 0

__xa_alloc---of 16
__xa_alloc_cyclic---of 10
__xa_clear_mark---of 3
__xa_cmpxchg---of 13
__xa_erase75%of 4
__xa_insert---of 13
__xa_set_mark---of 3
__xa_store---of 13
__xas_next---of 28
__xas_next.cold---of 2
__xas_nomem---of 16
__xas_prev---of 28
__xas_prev.cold---of 2
xa_clear_mark---of 1
xa_delete_node---of 7
xa_delete_node.cold---of 1
xa_destroy67%of 18
xa_erase100%of 1
xa_extract---of 67
xa_find---of 19
xa_find_after---of 24
xa_find_after.cold---of 2
xa_get_mark---of 27
xa_get_mark.cold---of 1
xa_get_order---of 20
xa_get_order.cold---of 1
xa_load50%of 14
xa_set_mark---of 1
xa_store---of 1
xa_store_range---of 36
xa_store_range.cold---of 2
xas_alloc73%of 11
xas_clear_mark43%of 14
xas_clear_mark.cold---of 3
xas_create70%of 79
xas_create.cold---of 3
xas_create_range---of 21
xas_create_range.cold---of 2
xas_descend34%of 21
xas_descend.cold---of 1
xas_destroy---of 2
xas_find73%of 44
xas_find_conflict35%of 44
xas_find_marked15%of 74
xas_find_marked.cold---of 4
xas_free_nodes67%of 21
xas_get_mark---of 6
xas_get_mark.cold---of 1
xas_init_marks67%of 6
xas_load100%of 6
xas_move_index100%of 3
xas_move_index.cold---of 1
xas_nomem38%of 8
xas_pause---of 16
xas_pause.cold---of 1
xas_set_mark70%of 13
xas_set_mark.cold---of 3
xas_split---of 31
xas_split_alloc---of 14
xas_start42%of 39
xas_start.cold---of 2
xas_store47%of 104
-----------
SUMMARY50%of 524

-----------
SUMMARY---of 0

__bpf_trace_task_newtask---of 1
__bpf_trace_task_rename---of 1
__cleanup_sighand---of 5
__delayed_free_task---of 1
__do_sys_clone---of 1
__do_sys_clone3---of 15
__ia32_sys_clone---of 1
__ia32_sys_clone3---of 1
__ia32_sys_set_tid_address---of 1
__ia32_sys_unshare---of 1
__ia32_sys_vfork---of 1
__list_add_rcu---of 2
__mmdrop60%of 20
__put_task_struct---of 20
__put_task_struct_rcu_cb---of 1
__refcount_add.constprop.0---of 5
__traceiter_task_newtask---of 3
__traceiter_task_rename---of 3
__vm_area_free---of 1
__x64_sys_clone---of 1
__x64_sys_clone3---of 1
__x64_sys_fork---of 1
__x64_sys_set_tid_address---of 1
__x64_sys_unshare---of 1
account_kernel_stack---of 15
copy_clone_args_from_user---of 22
copy_process---of 361
copy_signal---of 4
create_io_thread---of 1
exec_mm_release---of 1
exit_mm_release---of 1
exit_task_stack_account---of 4
fatal_signal_pending---of 4
free_task---of 7
free_vm_stack_cache---of 5
get_mm_exe_file---of 11
get_task_exe_file---of 4
get_task_mm---of 4
idle_dummy---of 1
kernel_clone---of 46
kernel_thread---of 1
ksys_unshare---of 57
lockdep_tasklist_lock_is_held---of 1
memcg_charge_kernel_stack---of 10
mm_access---of 8
mm_alloc---of 3
mm_init---of 28
mm_release---of 16
mmdrop_async---of 2
mmdrop_async_fn---of 1
mmput---of 13
mmput_async---of 2
mmput_async_fn---of 12
nr_processes---of 6
perf_trace_task_newtask---of 5
perf_trace_task_rename---of 10
pidfd_prepare---of 7
ptrace_event_pid---of 22
put_task_stack60%of 10
replace_mm_exe_file---of 33
set_mm_exe_file---of 10
set_task_stack_end_magic---of 1
sighand_ctor---of 1
sysctl_max_threads---of 4
thread_stack_free_rcu---of 3
trace_event_raw_event_task_newtask---of 6
trace_event_raw_event_task_rename---of 11
trace_raw_output_task_newtask---of 5
trace_raw_output_task_rename---of 5
trace_task_newtask---of 15
unshare_fd---of 6
unshare_files---of 7
user_mode_thread---of 1
vm_area_alloc---of 5
vm_area_dup---of 5
vm_area_free---of 1
vm_area_free_rcu_cb---of 3
walk_process_tree---of 12
-----------
SUMMARY60%of 30

-----------
SUMMARY---of 0

__bpf_trace_swiotlb_bounced---of 1
__traceiter_swiotlb_bounced---of 3
default_swiotlb_base---of 1
default_swiotlb_limit---of 1
fops_io_tlb_hiwater_open---of 1
fops_io_tlb_used_open---of 1
io_tlb_hiwater_get---of 1
io_tlb_hiwater_set---of 3
io_tlb_used_get---of 1
is_swiotlb_active---of 3
is_swiotlb_allocated---of 1
perf_trace_swiotlb_bounced---of 23
round_up_default_nslabs---of 8
round_up_default_nslabs.cold---of 1
swiotlb_adjust_nareas---of 6
swiotlb_adjust_nareas.cold---of 1
swiotlb_bounce---of 11
swiotlb_dev_init100%of 1
swiotlb_init_io_tlb_pool.constprop.0---of 6
swiotlb_init_late---of 35
swiotlb_init_late.cold---of 4
swiotlb_map---of 42
swiotlb_max_mapping_size---of 5
swiotlb_print_info---of 3
swiotlb_release_slots---of 12
swiotlb_size_or_default---of 1
swiotlb_sync_single_for_cpu---of 4
swiotlb_sync_single_for_device---of 4
swiotlb_tbl_map_single---of 72
swiotlb_tbl_unmap_single---of 4
trace_event_raw_event_swiotlb_bounced---of 24
trace_raw_output_swiotlb_bounced---of 6
-----------
SUMMARY100%of 1

__address_space_init_once100%of 1
__destroy_inode24%of 43
__iget---of 1
__inode_add_lru.part.0---of 9
__insert_inode_hash---of 4
__insert_inode_hash.cold---of 1
__remove_inode_hash---of 5
__wait_on_freeing_inode---of 1
address_space_init_once---of 1
alloc_inode39%of 13
atime_needs_update86%of 21
bmap---of 3
clear_inode55%of 11
clear_nlink100%of 2
current_time100%of 1
dentry_needs_remove_privs80%of 5
destroy_inode67%of 6
discard_new_inode---of 8
dispose_list---of 4
drop_nlink50%of 4
dump_mapping---of 16
evict54%of 26
evict_inodes---of 13
file_modified82%of 11
file_remove_privs100%of 1
file_remove_privs_flags75%of 16
file_update_time88%of 8
find_inode---of 11
find_inode_by_ino_rcu---of 16
find_inode_by_ino_rcu.cold---of 1
find_inode_fast---of 13
find_inode_nowait---of 9
find_inode_nowait.cold---of 1
find_inode_rcu---of 14
find_inode_rcu.cold---of 1
free_inode_nonrcu---of 1
generic_delete_inode100%of 1
generic_update_time80%of 5
get_next_ino---of 6
get_nr_dirty_inodes---of 8
get_nr_inodes---of 7
i_callback---of 3
iget5_locked---of 16
iget5_locked.cold---of 1
iget_locked---of 24
iget_locked.cold---of 1
igrab60%of 5
ihold---of 2
ilookup---of 11
ilookup.cold---of 1
ilookup5---of 11
ilookup5.cold---of 1
ilookup5_nowait---of 3
ilookup5_nowait.cold---of 1
in_group_or_capable---of 3
inc_nlink---of 5
init_once---of 1
init_special_inode---of 8
inode_add_lru67%of 3
inode_dio_wait---of 7
inode_init_always80%of 5
inode_init_once100%of 1
inode_init_owner67%of 6
inode_insert5---of 20
inode_insert5.cold---of 1
inode_lru_isolate---of 27
inode_needs_sync---of 6
inode_needs_update_time80%of 10
inode_nohighmem---of 1
inode_owner_or_capable50%of 6
inode_sb_list_add---of 3
inode_set_ctime_current100%of 1
inode_set_flags80%of 5
inode_update_time---of 5
inode_update_timestamps87%of 15
insert_inode_locked---of 26
insert_inode_locked.cold---of 1
insert_inode_locked4---of 6
invalidate_inodes---of 13
iput75%of 4
iput.part.027%of 41
iunique---of 22
iunique.cold---of 1
kiocb_modified---of 10
lock_two_nondirectories---of 19
lockdep_annotate_inode_mutex_key34%of 3
lockdep_annotate_inode_mutex_key.part.0---of 2
mode_strip_sgid23%of 9
new_inode100%of 5
new_inode_pseudo---of 3
no_open---of 1
proc_nr_inodes---of 7
prune_icache_sb---of 1
set_nlink---of 6
timestamp_truncate56%of 9
touch_atime60%of 27
unlock_new_inode---of 5
unlock_two_nondirectories---of 9
-----------
SUMMARY57%of 330

__kobject_del86%of 7
dynamic_kobj_release50%of 4
kobj_attr_show---of 3
kobj_attr_store---of 3
kobj_child_ns_ops---of 6
kobj_kset_leave100%of 4
kobj_ns_current_may_mount---of 4
kobj_ns_drop---of 5
kobj_ns_grab_current---of 4
kobj_ns_initial---of 4
kobj_ns_netlink---of 4
kobj_ns_ops100%of 6
kobj_ns_type_register---of 4
kobj_ns_type_registered---of 3
kobject_add75%of 8
kobject_add_internal45%of 40
kobject_create_and_add---of 5
kobject_del100%of 2
kobject_get60%of 10
kobject_get_ownership100%of 2
kobject_get_path77%of 13
kobject_get_unless_zero75%of 12
kobject_init67%of 6
kobject_init_and_add75%of 4
kobject_move---of 10
kobject_namespace50%of 8
kobject_put60%of 30
kobject_rename---of 10
kobject_set_name---of 1
kobject_set_name_vargs75%of 8
kset_create_and_add---of 5
kset_find_obj84%of 6
kset_get_ownership---of 3
kset_init---of 3
kset_register---of 10
kset_release50%of 4
kset_unregister100%of 4
-----------
SUMMARY66%of 178

__bpf_trace_vector_activate---of 1
__bpf_trace_vector_alloc---of 1
__bpf_trace_vector_alloc_managed---of 1
__bpf_trace_vector_config---of 1
__bpf_trace_vector_free_moved---of 1
__bpf_trace_vector_mod---of 1
__bpf_trace_vector_reserve---of 1
__bpf_trace_vector_setup---of 1
__bpf_trace_vector_teardown---of 1
__bpf_trace_x86_irq_vector---of 1
__common_interrupt50%of 12
__sysvec_thermal---of 33
__sysvec_x86_platform_ipi---of 32
__traceiter_call_function_entry---of 3
__traceiter_call_function_exit---of 3
__traceiter_call_function_single_entry---of 3
__traceiter_call_function_single_exit---of 3
__traceiter_deferred_error_apic_entry---of 3
__traceiter_deferred_error_apic_exit---of 3
__traceiter_error_apic_entry---of 3
__traceiter_error_apic_exit---of 3
__traceiter_irq_work_entry---of 3
__traceiter_irq_work_exit---of 3
__traceiter_local_timer_entry---of 3
__traceiter_local_timer_exit---of 3
__traceiter_reschedule_entry---of 3
__traceiter_reschedule_exit---of 3
__traceiter_spurious_apic_entry---of 3
__traceiter_spurious_apic_exit---of 3
__traceiter_thermal_apic_entry---of 3
__traceiter_thermal_apic_exit---of 3
__traceiter_threshold_apic_entry---of 3
__traceiter_threshold_apic_exit---of 3
__traceiter_vector_activate---of 4
__traceiter_vector_alloc---of 4
__traceiter_vector_alloc_managed---of 3
__traceiter_vector_clear---of 3
__traceiter_vector_config---of 3
__traceiter_vector_deactivate---of 4
__traceiter_vector_free_moved---of 4
__traceiter_vector_reserve---of 3
__traceiter_vector_reserve_managed---of 3
__traceiter_vector_setup---of 4
__traceiter_vector_teardown---of 4
__traceiter_vector_update---of 3
__traceiter_x86_platform_ipi_entry---of 3
__traceiter_x86_platform_ipi_exit---of 3
ack_bad_irq---of 4
arch_irq_stat---of 1
arch_irq_stat_cpu---of 3
arch_show_interrupts---of 85
fixup_irqs---of 9
fred_sysvec_thermal---of 4
fred_sysvec_x86_platform_ipi---of 4
perf_perm_irq_work_exit---of 1
perf_trace_vector_activate---of 5
perf_trace_vector_alloc---of 6
perf_trace_vector_alloc_managed---of 6
perf_trace_vector_config---of 5
perf_trace_vector_free_moved---of 5
perf_trace_vector_mod---of 5
perf_trace_vector_reserve---of 5
perf_trace_vector_setup---of 5
perf_trace_vector_teardown---of 5
perf_trace_x86_irq_vector---of 5
trace_event_raw_event_vector_activate---of 6
trace_event_raw_event_vector_alloc---of 7
trace_event_raw_event_vector_alloc_managed---of 7
trace_event_raw_event_vector_config---of 6
trace_event_raw_event_vector_free_moved---of 6
trace_event_raw_event_vector_mod---of 6
trace_event_raw_event_vector_reserve---of 6
trace_event_raw_event_vector_setup---of 6
trace_event_raw_event_vector_teardown---of 6
trace_event_raw_event_x86_irq_vector---of 6
trace_raw_output_vector_activate---of 5
trace_raw_output_vector_alloc---of 5
trace_raw_output_vector_alloc_managed---of 5
trace_raw_output_vector_config---of 5
trace_raw_output_vector_free_moved---of 5
trace_raw_output_vector_mod---of 5
trace_raw_output_vector_reserve---of 5
trace_raw_output_vector_setup---of 5
trace_raw_output_vector_teardown---of 5
trace_raw_output_x86_irq_vector---of 5
-----------
SUMMARY50%of 12

cx25840_ir_irq_handler---of 66
cx25840_ir_log_status---of 43
cx25840_ir_probe---of 21
cx25840_ir_remove80%of 5
cx25840_ir_rx_g_parameters---of 5
cx25840_ir_rx_read---of 25
cx25840_ir_rx_s_parameters---of 28
cx25840_ir_rx_shutdown100%of 5
cx25840_ir_tx_g_parameters---of 5
cx25840_ir_tx_s_parameters---of 23
cx25840_ir_tx_shutdown100%of 5
cx25840_ir_tx_write---of 5
-----------
SUMMARY94%of 15

__cleanup_single_sta---of 15
__ieee80211_sta_recalc_aggregates---of 4
__ieee80211_sta_recalc_aggregates.part.0---of 24
__rhashtable_insert_fast.constprop.0.isra.0---of 123
__rhashtable_lookup.isra.0---of 30
__rhashtable_remove_fast.constprop.0.isra.0---of 113
__sta_info_alloc---of 38
__sta_info_alloc.cold---of 2
__sta_info_destroy---of 3
__sta_info_destroy_part1---of 103
__sta_info_destroy_part2---of 16
__sta_info_flush---of 31
__sta_info_flush.cold---of 1
__sta_info_recalc_tim---of 55
_sta_info_move_state---of 68
drv_allow_buffered_frames---of 18
find_highest_prio_tid---of 4
ieee80211_find_sta---of 6
ieee80211_find_sta_by_ifaddr---of 8
ieee80211_find_sta_by_link_addrs---of 17
ieee80211_purge_sta_txqs---of 4
ieee80211_recalc_p2p_go_ps_allowed---of 17
ieee80211_send_eosp_nullfunc---of 21
ieee80211_send_null_response---of 33
ieee80211_send_null_response.cold---of 1
ieee80211_sta_activate_link---of 37
ieee80211_sta_activate_link.cold---of 1
ieee80211_sta_allocate_link---of 16
ieee80211_sta_allocate_link.cold---of 1
ieee80211_sta_block_awake---of 24
ieee80211_sta_eosp---of 16
ieee80211_sta_expire---of 14
ieee80211_sta_free_link---of 6
ieee80211_sta_last_active---of 5
ieee80211_sta_ps_deliver_poll_response---of 3
ieee80211_sta_ps_deliver_response---of 78
ieee80211_sta_ps_deliver_response.cold---of 2
ieee80211_sta_ps_deliver_uapsd---of 5
ieee80211_sta_ps_deliver_wakeup---of 71
ieee80211_sta_recalc_aggregates---of 4
ieee80211_sta_register_airtime---of 6
ieee80211_sta_remove_link---of 8
ieee80211_sta_remove_link.cold---of 1
ieee80211_sta_set_buffered---of 21
ieee80211_sta_set_expected_throughput---of 3
ieee80211_sta_set_max_amsdu_subframes---of 6
ieee80211_sta_update_pending_airtime---of 14
jhash---of 16
link_sta_info_get_bss---of 23
link_sta_info_hash_add---of 4
link_sta_info_hash_del.isra.0---of 4
link_sta_info_hash_lookup---of 1
lockdep_sta_mutex_held---of 1
rht_key_get_hash.isra.0---of 1
sta_deliver_ps_frames---of 14
sta_get_expected_throughput---of 37
sta_get_last_rx_stats---of 8
sta_info_alloc---of 1
sta_info_alloc_link---of 4
sta_info_alloc_with_link---of 1
sta_info_cleanup---of 33
sta_info_destroy_addr---of 6
sta_info_destroy_addr_bss---of 6
sta_info_free---of 12
sta_info_get---of 22
sta_info_get_bss---of 23
sta_info_get_by_addrs---of 6
sta_info_get_by_idx---of 6
sta_info_hash_lookup---of 1
sta_info_init---of 7
sta_info_insert---of 6
sta_info_insert_rcu---of 95
sta_info_move_state---of 1
sta_info_recalc_tim---of 1
sta_info_stop100%of 1
sta_remove_link---of 19
sta_remove_link.cold---of 2
sta_set_sinfo---of 192
trace_drv_return_void---of 15
-----------
SUMMARY100%of 1

ath10k_core_check_bdfext---of 14
ath10k_core_check_dt---of 9
ath10k_core_create---of 15
ath10k_core_create_board_name.constprop.0---of 19
ath10k_core_destroy100%of 1
ath10k_core_fetch_board_file---of 57
ath10k_core_fetch_firmware_api_n---of 32
ath10k_core_fetch_firmware_files---of 8
ath10k_core_free_board_files---of 5
ath10k_core_free_firmware_files---of 7
ath10k_core_get_board_id_from_otp---of 13
ath10k_core_get_fw_features_str---of 9
ath10k_core_napi_enable---of 5
ath10k_core_napi_sync_disable---of 8
ath10k_core_parse_bd_ie_board---of 13
ath10k_core_pre_cal_download---of 12
ath10k_core_register---of 1
ath10k_core_register_work---of 89
ath10k_core_restart---of 9
ath10k_core_search_bd---of 8
ath10k_core_set_coverage_class_work---of 2
ath10k_core_start---of 219
ath10k_core_start.cold---of 1
ath10k_core_start_recovery---of 3
ath10k_core_stop---of 7
ath10k_core_unregister17%of 6
ath10k_download_and_run_otp---of 15
ath10k_download_board_data---of 32
ath10k_download_cal_dt---of 11
ath10k_download_cal_nvmem---of 9
ath10k_fetch_cal_file.isra.0---of 11
ath10k_fetch_fw_file---of 6
ath10k_send_suspend_complete---of 1
ath10k_wait_for_suspend---of 10
-----------
SUMMARY29%of 7

__add_preferred_console.constprop.0---of 19
__bpf_trace_console---of 1
__control_devkmsg---of 11
__down_trylock_console_sem80%of 10
__ia32_sys_syslog---of 1
__pr_flush.constprop.0.isra.0---of 32
__printk_cpu_sync_put---of 3
__printk_cpu_sync_try_get---of 4
__printk_cpu_sync_wait---of 1
__printk_ratelimit---of 1
__traceiter_console---of 3
__wake_up_klogd.part.0100%of 5
__x64_sys_syslog---of 1
_printk100%of 1
_printk_deferred---of 1
add_preferred_console---of 1
console_conditional_schedule---of 2
console_cpu_notify---of 6
console_device---of 9
console_flush_all72%of 60
console_flush_on_panic---of 14
console_force_preferred_locked---of 8
console_list_lock---of 5
console_list_unlock---of 1
console_lock---of 4
console_prepend_dropped---of 12
console_srcu_read_lock---of 1
console_srcu_read_lock_is_held---of 3
console_srcu_read_unlock---of 3
console_start---of 1
console_stop---of 1
console_trylock---of 5
console_unblank---of 35
console_unlock65%of 14
console_verbose---of 3
defer_console_output---of 2
devkmsg_emit.constprop.0---of 1
devkmsg_llseek---of 7
devkmsg_open---of 8
devkmsg_poll---of 6
devkmsg_read---of 21
devkmsg_release---of 4
devkmsg_sysctl_set_loglvl---of 8
devkmsg_write---of 14
do_syslog---of 46
early_printk---of 3
find_first_fitting_seq---of 10
get_record_print_text_size---of 1
info_print_prefix84%of 6
is_console_locked---of 1
kmsg_dump---of 17
kmsg_dump_get_buffer---of 18
kmsg_dump_get_line---of 12
kmsg_dump_reason_str---of 3
kmsg_dump_register---of 6
kmsg_dump_rewind---of 2
kmsg_dump_unregister---of 5
lockdep_assert_console_list_lock_held---of 3
log_buf_addr_get---of 1
log_buf_len_get---of 1
log_buf_vmcoreinfo_setup---of 1
msg_add_dict_text---of 5
msg_add_ext_text---of 10
other_cpu_in_panic50%of 4
perf_trace_console---of 7
printk_get_next_message60%of 15
printk_parse_prefix75%of 12
printk_percpu_data_ready---of 1
printk_sprint57%of 23
printk_timed_ratelimit---of 5
printk_trigger_flush---of 2
record_print_text67%of 15
register_console---of 71
resume_console---of 6
suspend_console---of 6
syslog_print---of 31
syslog_print_all---of 16
this_cpu_in_panic100%of 1
trace_event_raw_event_console---of 8
trace_raw_output_console---of 5
try_enable_preferred_console---of 18
unregister_console---of 1
unregister_console_locked---of 13
vprintk_default100%of 1
vprintk_deferred---of 1
vprintk_emit69%of 45
vprintk_store52%of 52
wake_up_klogd---of 2
wake_up_klogd_work_func---of 6
-----------
SUMMARY66%of 264

-----------
SUMMARY---of 0

common_s_io_pin_config---of 83
cx23888_std_setup---of 12
cx25840_and_or---of 1
cx25840_and_or4100%of 1
cx25840_g_input_status---of 3
cx25840_g_std---of 1
cx25840_g_tuner---of 10
cx25840_init---of 5
cx25840_irq_handler---of 32
cx25840_load_fw---of 4
cx25840_log_status---of 30
cx25840_probe---of 49
cx25840_querystd---of 3
cx25840_read---of 3
cx25840_read467%of 3
cx25840_remove100%of 1
cx25840_reset---of 41
cx25840_s_audio_routing---of 3
cx25840_s_audio_stream---of 9
cx25840_s_ctrl---of 10
cx25840_s_frequency---of 1
cx25840_s_radio---of 1
cx25840_s_std---of 30
cx25840_s_stream---of 8
cx25840_s_tuner---of 9
cx25840_s_video_routing---of 7
cx25840_set_fmt---of 30
cx25840_set_invert---of 25
cx25840_std_setup---of 26
cx25840_vconfig_add---of 24
cx25840_vconfig_apply---of 55
cx25840_work_handler---of 1
cx25840_write---of 1
cx25840_write4100%of 1
input_change---of 25
set_input---of 64
-----------
SUMMARY84%of 6

-----------
SUMMARY---of 0

__fsnotify_recalc_mask---of 25
fsnotify_add_mark---of 5
fsnotify_add_mark_locked---of 48
fsnotify_clear_marks_by_group---of 22
fsnotify_compare_groups---of 9
fsnotify_conn_mask---of 10
fsnotify_connector_destroy_workfn---of 2
fsnotify_destroy_mark---of 5
fsnotify_destroy_marks13%of 16
fsnotify_detach_connector_from_object---of 10
fsnotify_detach_mark---of 15
fsnotify_drop_object---of 5
fsnotify_final_mark_destroy---of 3
fsnotify_find_mark---of 12
fsnotify_finish_user_wait---of 4
fsnotify_free_mark---of 4
fsnotify_get_mark---of 7
fsnotify_grab_connector47%of 15
fsnotify_init_mark---of 1
fsnotify_mark_destroy_workfn---of 5
fsnotify_prepare_user_wait---of 24
fsnotify_put_mark---of 18
fsnotify_put_mark_wake.part.0---of 3
fsnotify_put_sb_connectors---of 7
fsnotify_recalc_mask---of 3
fsnotify_wait_marks_destroyed---of 1
-----------
SUMMARY30%of 31

est_fetch_counters---of 4
est_timer---of 15
est_timer.cold---of 7
gen_estimator_active---of 1
gen_estimator_read---of 33
gen_kill_estimator50%of 2
gen_new_estimator---of 15
gen_new_estimator.cold---of 1
gen_replace_estimator---of 1
-----------
SUMMARY50%of 2

-----------
SUMMARY---of 0

magicmouse_battery_timer_tick---of 7
magicmouse_emit_touch---of 41
magicmouse_enable_mt_work---of 2
magicmouse_enable_multitouch---of 6
magicmouse_event---of 4
magicmouse_input_configured---of 42
magicmouse_input_mapping---of 7
magicmouse_probe---of 38
magicmouse_raw_event---of 59
magicmouse_remove100%of 3
magicmouse_report_fixup---of 8
param_set_scroll_speed---of 5
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

__snd_timer_user_ioctl.isra.0---of 168
check_matching_master_slave.part.0---of 10
realloc_user_queue---of 8
snd_timer_clear_callbacks---of 6
snd_timer_close---of 5
snd_timer_close_locked---of 33
snd_timer_continue---of 6
snd_timer_dev_disconnect58%of 7
snd_timer_dev_free100%of 3
snd_timer_dev_register---of 24
snd_timer_find---of 10
snd_timer_free.part.060%of 10
snd_timer_free_system---of 1
snd_timer_global_free---of 3
snd_timer_global_new---of 1
snd_timer_global_register---of 1
snd_timer_instance_free---of 4
snd_timer_instance_new---of 5
snd_timer_interrupt---of 5
snd_timer_interrupt.part.0---of 39
snd_timer_new---of 21
snd_timer_notify---of 20
snd_timer_notify1---of 19
snd_timer_open---of 52
snd_timer_pause---of 5
snd_timer_proc_read---of 26
snd_timer_process_callbacks---of 7
snd_timer_reschedule---of 11
snd_timer_resolution---of 9
snd_timer_s_close---of 1
snd_timer_s_function---of 8
snd_timer_s_start---of 4
snd_timer_s_stop---of 3
snd_timer_start---of 7
snd_timer_start1---of 25
snd_timer_start_slave---of 10
snd_timer_stop---of 5
snd_timer_stop1---of 26
snd_timer_stop_slave---of 13
snd_timer_user_append_to_tqueue---of 3
snd_timer_user_ccallback---of 9
snd_timer_user_ccallback.cold---of 1
snd_timer_user_disconnect---of 1
snd_timer_user_fasync---of 1
snd_timer_user_info_compat.isra.0---of 16
snd_timer_user_interrupt---of 11
snd_timer_user_ioctl---of 3
snd_timer_user_ioctl_compat---of 13
snd_timer_user_open---of 6
snd_timer_user_params.isra.0---of 38
snd_timer_user_poll---of 10
snd_timer_user_read---of 24
snd_timer_user_release---of 16
snd_timer_user_start.isra.0---of 11
snd_timer_user_status32.isra.0---of 5
snd_timer_user_status64.isra.0---of 5
snd_timer_user_tinterrupt---of 26
snd_timer_work---of 5
timer_set_gparams---of 5
-----------
SUMMARY65%of 20

page_ext_get53%of 21
page_ext_put58%of 7
-----------
SUMMARY54%of 28

-----------
SUMMARY---of 0

keyspan_close---of 1
keyspan_disconnect100%of 2
keyspan_irq_recv---of 60
keyspan_load_tester---of 10
keyspan_load_tester.cold---of 1
keyspan_open---of 2
keyspan_probe---of 67
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

do_active_device---of 16
do_hardware_base_addr---of 10
do_hardware_dma---of 9
do_hardware_irq---of 9
do_hardware_modes---of 37
parport_device_proc_register---of 7
parport_device_proc_unregister---of 3
parport_proc_register---of 10
parport_proc_unregister100%of 3
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

__tty_buffer_request_room82%of 11
__tty_insert_flip_string_flags73%of 11
flush_to_ldisc---of 23
tty_buffer_alloc78%of 9
tty_buffer_cancel_work100%of 1
tty_buffer_flush---of 6
tty_buffer_flush_work---of 1
tty_buffer_free---of 6
tty_buffer_free_all56%of 9
tty_buffer_init---of 1
tty_buffer_lock_exclusive---of 1
tty_buffer_request_room---of 8
tty_buffer_restart_work---of 1
tty_buffer_set_limit---of 3
tty_buffer_set_lock_subclass---of 1
tty_buffer_space_avail---of 1
tty_buffer_unlock_exclusive---of 2
tty_flip_buffer_push100%of 1
tty_insert_flip_string_and_push_buffer---of 3
tty_ldisc_receive_buf---of 6
tty_prepare_flip_string---of 5
-----------
SUMMARY74%of 42

alloc_bulk_urb---of 4
alloc_ctrl_urb---of 5
alloc_isoc_urb---of 29
btusb_bcm_set_diag---of 10
btusb_bulk_complete---of 17
btusb_close---of 7
btusb_coredump---of 2
btusb_coredump_qca---of 4
btusb_diag_complete---of 14
btusb_disconnect44%of 23
btusb_dump_hdr_qca---of 9
btusb_find_altsetting.isra.0---of 8
btusb_flush---of 3
btusb_intel_cmd_timeout---of 18
btusb_intr_complete---of 18
btusb_isoc_complete---of 54
btusb_isoc_tx_complete---of 7
btusb_notify---of 4
btusb_oob_wake_handler---of 4
btusb_open---of 29
btusb_probe---of 165
btusb_qca_cmd_timeout---of 14
btusb_qca_send_vendor_req---of 6
btusb_recv_acl_qca---of 3
btusb_recv_bulk---of 16
btusb_recv_bulk_intel---of 5
btusb_recv_event_realtek---of 17
btusb_recv_evt_qca---of 3
btusb_recv_intr---of 16
btusb_reset---of 9
btusb_resume---of 31
btusb_rtl_cmd_timeout---of 17
btusb_rtl_hw_error---of 9
btusb_rx_work---of 3
btusb_send_frame---of 18
btusb_send_frame_intel---of 21
btusb_set_bdaddr_ath3012---of 6
btusb_set_bdaddr_marvell---of 6
btusb_set_bdaddr_wcn6855---of 6
btusb_setup_bcm92035---of 8
btusb_setup_csr---of 48
btusb_setup_qca---of 52
btusb_setup_qca_download_fw---of 17
btusb_setup_realtek---of 4
btusb_shutdown_qca---of 7
btusb_submit_bulk_urb---of 15
btusb_submit_intr_urb---of 23
btusb_submit_isoc_urb---of 31
btusb_suspend---of 19
btusb_tx_complete---of 9
btusb_waker---of 2
btusb_wakeup---of 3
btusb_work---of 77
force_poll_sync_read---of 2
force_poll_sync_write---of 6
handle_dump_pkt_qca---of 42
submit_or_queue_tx_urb---of 4
submit_tx_urb---of 9
-----------
SUMMARY44%of 23

__device_attach21%of 24
__device_attach_async_helper---of 9
__device_attach_driver48%of 19
__driver_attach39%of 26
__driver_attach_async_helper---of 9
__driver_probe_device75%of 16
coredump_store---of 1
deferred_devs_open---of 1
deferred_devs_show---of 6
deferred_probe_extend_timeout34%of 3
deferred_probe_initcall---of 6
deferred_probe_timeout_work_func---of 6
deferred_probe_work_func---of 7
device_attach---of 1
device_bind_driver---of 5
device_block_probing---of 1
device_driver_attach---of 8
device_driver_detach---of 1
device_initial_probe100%of 1
device_is_bound---of 3
device_release_driver100%of 1
device_release_driver_internal61%of 28
device_remove100%of 5
device_set_deferred_probe_reason---of 1
device_unbind_cleanup50%of 4
device_unblock_probing---of 2
driver_allows_async_probing58%of 7
driver_attach100%of 1
driver_bound75%of 8
driver_deferred_probe_add---of 2
driver_deferred_probe_add.part.0---of 6
driver_deferred_probe_check_state---of 5
driver_deferred_probe_del29%of 7
driver_deferred_probe_trigger---of 2
driver_deferred_probe_trigger.part.067%of 3
driver_detach80%of 5
driver_probe_device34%of 9
driver_sysfs_add58%of 7
really_probe40%of 50
state_synced_show---of 1
state_synced_store---of 11
wait_for_device_probe---of 6
-----------
SUMMARY49%of 224

alt_xfer---of 9
bulk_irq---of 13
destroy_urbs63%of 8
gspca_buffer_finish---of 3
gspca_buffer_prepare---of 5
gspca_buffer_queue---of 3
gspca_dev_probe---of 6
gspca_dev_probe2---of 49
gspca_disconnect56%of 9
gspca_frame_add---of 29
gspca_init_transfer---of 96
gspca_init_transfer.cold---of 2
gspca_input_create_urb.part.0---of 16
gspca_queue_setup---of 5
gspca_release100%of 1
gspca_resume---of 5
gspca_return_all_buffers---of 5
gspca_start_streaming---of 4
gspca_stop_streaming---of 1
gspca_stream_off---of 14
gspca_suspend---of 12
int_irq---of 6
isoc_irq---of 22
try_fmt_vid_cap---of 33
vidioc_enum_fmt_vid_cap---of 14
vidioc_enum_frameintervals---of 15
vidioc_enum_framesizes---of 10
vidioc_enum_input---of 8
vidioc_g_fmt_vid_cap---of 1
vidioc_g_input---of 1
vidioc_g_jpegcomp---of 1
vidioc_g_parm---of 3
vidioc_querycap---of 13
vidioc_s_fmt_vid_cap---of 7
vidioc_s_input---of 2
vidioc_s_jpegcomp---of 1
vidioc_s_parm---of 4
vidioc_try_fmt_vid_cap---of 3
which_bandwidth---of 12
-----------
SUMMARY62%of 18

-----------
SUMMARY---of 0

driver_add_groups100%of 1
driver_create_file100%of 3
driver_find_device---of 11
driver_for_each_device---of 9
driver_register62%of 18
driver_remove_file100%of 2
driver_remove_groups100%of 1
driver_set_override---of 13
driver_unregister75%of 4
-----------
SUMMARY73%of 29

i2c_24xxx_ir---of 11
i2c_black_hole---of 1
i2c_hack_cx25840---of 33
i2c_hack_wm8775---of 11
pvr2_i2c_basic_op---of 10
pvr2_i2c_core_done100%of 2
pvr2_i2c_core_init---of 34
pvr2_i2c_functionality---of 1
pvr2_i2c_read---of 27
pvr2_i2c_write---of 13
pvr2_i2c_xfer12%of 53
-----------
SUMMARY15%of 55

__bpf_trace_hw_interval_param---of 1
__bpf_trace_hw_mask_param---of 1
__traceiter_hw_interval_param---of 3
__traceiter_hw_mask_param---of 3
_snd_pcm_stream_lock_irqsave---of 5
_snd_pcm_stream_lock_irqsave_nested---of 5
constrain_params_by_rules---of 34
constrain_params_by_rules.cold---of 1
do_pcm_hwsync---of 5
fixup_unreferenced_params---of 56
hw_support_mmap---of 8
pcm_release_private---of 2
perf_trace_hw_interval_param---of 5
perf_trace_hw_mask_param---of 5
relink_to_local---of 10
snd_pcm_action---of 5
snd_pcm_action_group---of 40
snd_pcm_action_lock_irq---of 9
snd_pcm_action_nonatomic---of 6
snd_pcm_action_single---of 7
snd_pcm_buffer_access_lock---of 7
snd_pcm_capture_open---of 4
snd_pcm_channel_info---of 23
snd_pcm_channel_info_user---of 4
snd_pcm_common_ioctl---of 200
snd_pcm_delay---of 26
snd_pcm_do_drain_init---of 24
snd_pcm_do_pause---of 5
snd_pcm_do_prepare---of 3
snd_pcm_do_reset---of 12
snd_pcm_do_resume---of 7
snd_pcm_do_start---of 5
snd_pcm_do_stop---of 7
snd_pcm_do_suspend---of 7
snd_pcm_drain---of 51
snd_pcm_drain_done---of 4
snd_pcm_drop---of 14
snd_pcm_fasync---of 7
snd_pcm_forward---of 32
snd_pcm_group_init---of 1
snd_pcm_group_unref.part.0---of 11
snd_pcm_hw_convert_from_old_params---of 1
snd_pcm_hw_convert_to_old_params---of 1
snd_pcm_hw_params---of 74
snd_pcm_hw_refine---of 31
snd_pcm_hw_rule_buffer_bytes_max---of 1
snd_pcm_hw_rule_div---of 1
snd_pcm_hw_rule_format---of 10
snd_pcm_hw_rule_mul---of 1
snd_pcm_hw_rule_muldivk---of 1
snd_pcm_hw_rule_mulkdiv---of 1
snd_pcm_hw_rule_rate---of 1
snd_pcm_hw_rule_sample_bits---of 9
snd_pcm_hw_rule_subformats---of 12
snd_pcm_info---of 13
snd_pcm_info_user---of 5
snd_pcm_ioctl---of 3
snd_pcm_ioctl_compat---of 73
snd_pcm_ioctl_hw_params_compat---of 18
snd_pcm_ioctl_sw_params_compat---of 21
snd_pcm_ioctl_sync_ptr_buggy---of 26
snd_pcm_ioctl_xferi_compat---of 9
snd_pcm_ioctl_xfern_compat---of 17
snd_pcm_kernel_ioctl---of 19
snd_pcm_lib_default_mmap---of 11
snd_pcm_lib_mmap_iomem---of 3
snd_pcm_mmap---of 36
snd_pcm_mmap_control_fault---of 13
snd_pcm_mmap_data---of 14
snd_pcm_mmap_data_close---of 1
snd_pcm_mmap_data_fault---of 21
snd_pcm_mmap_data_open---of 1
snd_pcm_mmap_status_fault---of 13
snd_pcm_open---of 19
snd_pcm_open_substream---of 81
snd_pcm_playback_open---of 4
snd_pcm_poll---of 30
snd_pcm_post_pause---of 6
snd_pcm_post_prepare---of 9
snd_pcm_post_reset---of 10
snd_pcm_post_resume---of 2
snd_pcm_post_start---of 5
snd_pcm_post_stop---of 4
snd_pcm_post_suspend---of 3
snd_pcm_pre_drain_init---of 4
snd_pcm_pre_pause---of 6
snd_pcm_pre_prepare---of 7
snd_pcm_pre_reset---of 3
snd_pcm_pre_resume---of 3
snd_pcm_pre_start---of 10
snd_pcm_pre_stop---of 3
snd_pcm_pre_suspend---of 3
snd_pcm_prepare---of 20
snd_pcm_read---of 8
snd_pcm_readv---of 18
snd_pcm_release---of 9
snd_pcm_release_substream---of 2
snd_pcm_release_substream.part.0---of 12
snd_pcm_rewind---of 32
snd_pcm_set_state---of 9
snd_pcm_start---of 1
snd_pcm_status64---of 46
snd_pcm_status_user32---of 5
snd_pcm_status_user64---of 5
snd_pcm_status_user_compat64---of 7
snd_pcm_stop---of 1
snd_pcm_stop_xrun---of 14
snd_pcm_stream_group_ref---of 25
snd_pcm_stream_lock---of 3
snd_pcm_stream_lock_irq67%of 3
snd_pcm_stream_unlock---of 3
snd_pcm_stream_unlock_irq67%of 3
snd_pcm_stream_unlock_irqrestore---of 3
snd_pcm_suspend_all---of 19
snd_pcm_sw_params---of 39
snd_pcm_sw_params_user---of 3
snd_pcm_sync_ptr---of 26
snd_pcm_sync_stop13%of 8
snd_pcm_trigger_tstamp---of 9
snd_pcm_undo_pause---of 3
snd_pcm_undo_resume---of 5
snd_pcm_undo_start---of 2
snd_pcm_unlink---of 20
snd_pcm_write---of 8
snd_pcm_writev---of 18
trace_event_raw_event_hw_interval_param---of 6
trace_event_raw_event_hw_mask_param---of 6
trace_hw_interval_param---of 15
trace_hw_mask_param---of 15
trace_raw_output_hw_interval_param---of 9
trace_raw_output_hw_mask_param---of 5
-----------
SUMMARY36%of 14

-----------
SUMMARY---of 0

__bpf_trace_exit_mmap---of 1
__bpf_trace_vm_unmapped_area---of 1
__bpf_trace_vma_mas_szero---of 1
__bpf_trace_vma_store---of 1
__do_sys_brk---of 39
__do_sys_remap_file_pages---of 39
__ia32_sys_brk---of 1
__ia32_sys_mmap_pgoff---of 1
__ia32_sys_munmap---of 1
__ia32_sys_remap_file_pages---of 1
__install_special_mapping---of 11
__split_vma---of 52
__traceiter_exit_mmap---of 3
__traceiter_vm_unmapped_area---of 3
__traceiter_vma_mas_szero---of 3
__traceiter_vma_store---of 3
__vm_munmap---of 16
__x64_sys_brk---of 1
__x64_sys_mmap_pgoff---of 1
__x64_sys_munmap---of 1
__x64_sys_remap_file_pages---of 1
_install_special_mapping---of 1
can_vma_merge_after.constprop.0---of 10
can_vma_merge_before.constprop.0---of 12
check_brk_limits---of 7
copy_vma---of 32
count_vma_pages_range---of 4
do_brk_flags---of 73
do_mmap---of 69
do_munmap---of 1
do_vma_munmap---of 1
do_vmi_align_munmap.constprop.0---of 92
do_vmi_munmap---of 20
exit_mmap---of 60
expand_downwards48%of 50
expand_stack---of 27
expand_stack_locked100%of 1
find_extend_vma_locked---of 11
find_mergeable_anon_vma---of 23
find_vma67%of 6
find_vma_intersection---of 6
find_vma_prev---of 3
generic_get_unmapped_area---of 15
generic_get_unmapped_area_topdown---of 20
get_unmapped_area---of 13
init_admin_reserve---of 1
init_user_reserve---of 1
insert_vm_struct---of 16
install_special_mapping---of 4
ksys_mmap_pgoff---of 24
ksys_mmap_pgoff.cold---of 5
may_expand_vm20%of 10
mlock_future_ok---of 5
mm_drop_all_locks---of 21
mm_take_all_locks---of 44
mmap_region---of 132
perf_trace_exit_mmap---of 5
perf_trace_vm_unmapped_area---of 5
perf_trace_vma_mas_szero---of 5
perf_trace_vma_store---of 5
remove_vma---of 10
special_mapping_fault---of 22
special_mapping_mremap---of 6
special_mapping_name---of 1
special_mapping_split---of 1
trace_event_raw_event_exit_mmap---of 6
trace_event_raw_event_vm_unmapped_area---of 6
trace_event_raw_event_vma_mas_szero---of 6
trace_event_raw_event_vma_store---of 6
trace_raw_output_exit_mmap---of 5
trace_raw_output_vm_unmapped_area---of 5
trace_raw_output_vma_mas_szero---of 5
trace_raw_output_vma_store---of 5
unlink_file_vma---of 4
unmap_region---of 7
validate_mm56%of 20
vm_brk_flags---of 29
vm_lock_mapping---of 4
vm_munmap---of 1
vm_stat_account---of 6
vm_unmapped_area---of 40
vma_complete---of 49
vma_expand---of 57
vma_is_special_mapping---of 5
vma_iter_store---of 30
vma_link---of 10
vma_link_file---of 4
vma_merge.isra.0---of 201
vma_merge_extend---of 1
vma_modify---of 9
vma_needs_dirty_tracking---of 9
vma_prepare---of 18
vma_set_page_prot---of 3
vma_shrink---of 34
vma_wants_writenotify---of 10
-----------
SUMMARY49%of 87

__ipv6_addr_type---of 14
eafnosupport_fib6_get_table---of 1
eafnosupport_fib6_lookup---of 1
eafnosupport_fib6_nh_init---of 3
eafnosupport_fib6_table_lookup---of 1
eafnosupport_ip6_del_rt---of 1
eafnosupport_ip6_mtu_from_fib6---of 1
eafnosupport_ipv6_dev_find---of 1
eafnosupport_ipv6_dst_lookup_flow---of 1
eafnosupport_ipv6_fragment---of 1
eafnosupport_ipv6_route_input---of 1
in6_dev_finish_destroy59%of 12
in6_dev_finish_destroy_rcu---of 1
inet6addr_notifier_call_chain100%of 1
inet6addr_validator_notifier_call_chain---of 1
register_inet6addr_notifier---of 1
register_inet6addr_validator_notifier---of 1
unregister_inet6addr_notifier---of 1
unregister_inet6addr_validator_notifier---of 1
-----------
SUMMARY62%of 13

-----------
SUMMARY---of 0

as102_attach---of 8
as102_fe_get_frontend---of 31
as102_fe_get_tune_settings---of 1
as102_fe_read_ber---of 1
as102_fe_read_signal_strength---of 1
as102_fe_read_snr---of 1
as102_fe_read_status---of 8
as102_fe_read_ucblocks---of 3
as102_fe_release100%of 1
as102_fe_set_frontend---of 37
as102_fe_ts_bus_ctrl---of 1
-----------
SUMMARY100%of 1

__bpf_trace_module_free---of 1
__bpf_trace_module_load---of 1
__bpf_trace_module_refcnt---of 1
__bpf_trace_module_request---of 1
__do_sys_delete_module---of 40
__do_sys_init_module---of 18
__ia32_sys_delete_module---of 1
__ia32_sys_finit_module---of 9
__ia32_sys_init_module---of 1
__is_module_percpu_address22%of 19
__layout_sections---of 14
__module_address10%of 21
__module_get---of 2
__module_put_and_kthread_exit---of 3
__module_text_address---of 4
__symbol_get---of 14
__symbol_put---of 7
__traceiter_module_free---of 3
__traceiter_module_get---of 3
__traceiter_module_load---of 3
__traceiter_module_put---of 3
__traceiter_module_request---of 4
__x64_sys_delete_module---of 1
__x64_sys_finit_module---of 9
__x64_sys_init_module---of 1
arch_mod_section_prepend---of 1
cmp_name---of 1
do_free_init---of 2
do_init_module---of 26
find_exported_symbol_in_section---of 5
find_module---of 1
find_module_all---of 15
find_sec---of 6
find_symbol---of 16
finished_loading---of 3
flush_module_init_free_work---of 1
free_mod_mem---of 7
free_modinfo_srcversion---of 1
free_modinfo_version---of 1
free_module---of 18
get_next_modinfo---of 18
idempotent_init_module---of 27
init_module_from_file---of 5
is_module_address100%of 4
is_module_percpu_address100%of 1
is_module_text_address63%of 8
load_module---of 317
modinfo_srcversion_exists---of 1
modinfo_version_exists---of 1
module_elf_check_arch---of 1
module_exit_section---of 1
module_flags---of 16
module_flags_taint---of 5
module_frob_arch_sections---of 1
module_get_offset_and_type---of 3
module_init_layout_section---of 1
module_init_section---of 1
module_memfree---of 4
module_next_tag_pair---of 5
module_patient_check_exists.constprop.0---of 14
module_put50%of 2
module_put.part.0---of 22
module_refcount---of 1
module_unload_free---of 11
perf_trace_module_free---of 10
perf_trace_module_load---of 10
perf_trace_module_refcnt---of 10
perf_trace_module_request---of 6
print_modules---of 9
register_module_notifier---of 1
resolve_symbol---of 42
search_module_extables---of 6
setup_modinfo_srcversion---of 1
setup_modinfo_version---of 1
show_coresize---of 5
show_initsize---of 5
show_initstate---of 3
show_modinfo_srcversion---of 1
show_modinfo_version---of 1
show_refcnt---of 1
show_taint---of 1
store_uevent---of 5
symbol_put_addr15%of 7
trace_event_raw_event_module_free---of 11
trace_event_raw_event_module_load---of 11
trace_event_raw_event_module_refcnt---of 11
trace_event_raw_event_module_request---of 7
trace_module_get---of 15
trace_raw_output_module_free---of 5
trace_raw_output_module_load---of 5
trace_raw_output_module_refcnt---of 5
trace_raw_output_module_request---of 5
try_module_get20%of 10
try_to_force_load---of 1
unknown_module_param_cb---of 8
unregister_module_notifier---of 1
-----------
SUMMARY28%of 72

tomoyo_init_log---of 89
tomoyo_poll_log---of 6
tomoyo_read_log---of 7
tomoyo_write_log---of 1
tomoyo_write_log27%of 30
-----------
SUMMARY7%of 30

llcp_add_header---of 6
llcp_allocate_pdu---of 5
nfc_llcp_allocate_snl---of 6
nfc_llcp_build_sdreq_tlv---of 13
nfc_llcp_build_sdres_tlv---of 5
nfc_llcp_build_tlv---of 10
nfc_llcp_free_sdp_tlv---of 1
nfc_llcp_free_sdp_tlv_list20%of 5
nfc_llcp_parse_connection_tlv---of 21
nfc_llcp_parse_gb_tlv---of 33
nfc_llcp_send_cc---of 12
nfc_llcp_send_connect---of 20
nfc_llcp_send_disconnect---of 5
nfc_llcp_send_dm---of 7
nfc_llcp_send_i_frame---of 27
nfc_llcp_send_rr---of 6
nfc_llcp_send_snl_sdreq---of 17
nfc_llcp_send_snl_sdres---of 10
nfc_llcp_send_symm---of 5
nfc_llcp_send_ui_frame---of 24
-----------
SUMMARY20%of 5

-----------
SUMMARY---of 0

__bpf_trace_hwmon_attr_class---of 1
__bpf_trace_hwmon_attr_show_string---of 1
__hwmon_device_register---of 92
__hwmon_device_register.cold---of 1
__hwmon_sanitize_name---of 9
__traceiter_hwmon_attr_show---of 3
__traceiter_hwmon_attr_show_string---of 3
__traceiter_hwmon_attr_store---of 3
devm_hwmon_device_register_with_groups---of 7
devm_hwmon_device_register_with_info---of 11
devm_hwmon_device_unregister---of 2
devm_hwmon_match---of 1
devm_hwmon_release100%of 1
devm_hwmon_sanitize_name---of 3
hwmon_attr_show---of 21
hwmon_attr_show_string---of 21
hwmon_attr_store---of 23
hwmon_dev_attr_is_visible---of 7
hwmon_dev_release80%of 5
hwmon_device_register---of 1
hwmon_device_register_for_thermal---of 5
hwmon_device_register_with_groups---of 3
hwmon_device_register_with_info---of 8
hwmon_device_unregister72%of 7
hwmon_notify_event---of 6
hwmon_sanitize_name---of 1
label_show---of 1
name_show---of 1
perf_trace_hwmon_attr_class---of 6
perf_trace_hwmon_attr_show_string---of 17
trace_event_get_offsets_hwmon_attr_show_string.constprop.0---of 3
trace_event_raw_event_hwmon_attr_class---of 7
trace_event_raw_event_hwmon_attr_show_string---of 18
trace_raw_output_hwmon_attr_class---of 5
trace_raw_output_hwmon_attr_show_string---of 5
-----------
SUMMARY77%of 13

__ipv6_dev_ac_dec---of 30
__ipv6_dev_ac_inc---of 44
__ipv6_sock_ac_close---of 19
__ipv6_sock_ac_close.cold---of 1
ac6_get_next.isra.0---of 28
ac6_proc_exit---of 1
ac6_proc_init---of 2
ac6_seq_next---of 1
ac6_seq_show---of 1
ac6_seq_start---of 29
ac6_seq_stop---of 6
aca_free_rcu---of 9
aca_put---of 5
ipv6_ac_destroy_dev39%of 13
ipv6_anycast_cleanup---of 5
ipv6_chk_acast_addr---of 59
ipv6_chk_acast_addr_src---of 2
ipv6_sock_ac_close---of 5
ipv6_sock_ac_close.cold---of 1
ipv6_sock_ac_drop---of 27
ipv6_sock_ac_drop.cold---of 1
ipv6_sock_ac_join---of 31
ipv6_sock_ac_join.cold---of 1
-----------
SUMMARY39%of 13

-----------
SUMMARY---of 0

__bpf_trace_alloc_vmap_area---of 1
__bpf_trace_free_vmap_area_noflush---of 1
__bpf_trace_purge_vmap_area_lazy---of 1
__get_vm_area_caller---of 1
__get_vm_area_node70%of 13
__purge_vmap_area_lazy---of 50
__traceiter_alloc_vmap_area---of 3
__traceiter_free_vmap_area_noflush---of 3
__traceiter_purge_vmap_area_lazy---of 3
__vmalloc---of 1
__vmalloc_node---of 1
__vmalloc_node_range49%of 88
__vmap_pages_range_noflush37%of 11
__vmap_pages_range_noflush.cold---of 3
__vunmap_range_noflush77%of 38
_vm_unmap_aliases---of 39
aligned_vread_iter---of 7
alloc_vmap_area42%of 192
check_sparse_vm_area---of 12
const_folio_flags.constprop.040%of 10
decay_va_pool_node---of 52
decode_vn_id40%of 5
delayed_vfree_work---of 2
drain_vmap_area_work---of 1
find_unlink_vmap_area84%of 18
find_vm_area---of 3
find_vmap_area---of 14
find_vmap_area_exceed_addr_lock---of 28
free_vm_area---of 3
free_vmap_area---of 104
free_vmap_area_noflush41%of 22
free_vmap_area_rb_augment_cb_copy---of 1
free_vmap_area_rb_augment_cb_propagate---of 7
free_vmap_area_rb_augment_cb_rotate---of 5
free_vmap_block---of 9
get_vm_area---of 1
get_vm_area_caller---of 1
insert_vmap_area90%of 19
insert_vmap_area_augment.constprop.0---of 33
ioremap_page_range---of 15
is_vmalloc_addr100%of 1
is_vmalloc_or_module_addr100%of 3
mod_memcg_page_state.part.0.constprop.036%of 42
pcpu_free_vm_areas---of 5
pcpu_get_vm_areas---of 337
perf_trace_alloc_vmap_area---of 5
perf_trace_free_vmap_area_noflush---of 5
perf_trace_purge_vmap_area_lazy---of 5
purge_vmap_node---of 20
reclaim_and_purge_vmap_areas---of 31
reclaim_list_global.part.0---of 101
register_vmap_purge_notifier---of 1
remap_vmalloc_range---of 1
remap_vmalloc_range_partial---of 21
remove_vm_area79%of 14
trace_event_raw_event_alloc_vmap_area---of 6
trace_event_raw_event_free_vmap_area_noflush---of 6
trace_event_raw_event_purge_vmap_area_lazy---of 6
trace_raw_output_alloc_vmap_area---of 4
trace_raw_output_free_vmap_area_noflush---of 5
trace_raw_output_purge_vmap_area_lazy---of 5
unregister_vmap_purge_notifier---of 1
vfree40%of 30
vfree.cold---of 2
vfree_atomic---of 5
vm_area_map_pages---of 4
vm_area_unmap_pages---of 2
vm_map_ram---of 55
vm_map_ram.cold---of 5
vm_unmap_aliases---of 1
vm_unmap_ram---of 21
vm_unmap_ram.cold---of 1
vmalloc---of 1
vmalloc_32---of 1
vmalloc_32_user---of 1
vmalloc_dump_obj---of 15
vmalloc_huge---of 1
vmalloc_info_show---of 48
vmalloc_info_show.cold---of 1
vmalloc_node---of 1
vmalloc_nr_pages---of 1
vmalloc_to_page69%of 22
vmalloc_to_pfn---of 1
vmalloc_user---of 1
vmap---of 12
vmap_node_shrink_count---of 7
vmap_node_shrink_scan---of 3
vmap_page_range---of 1
vmap_pages_range_noflush---of 1
vmap_pfn---of 7
vmap_pfn_apply---of 41
vmap_range_noflush---of 60
vmap_small_pages_range_noflush60%of 72
vread_iter---of 53
vunmap---of 6
vunmap_range---of 1
vunmap_range_noflush---of 1
vzalloc100%of 1
vzalloc_node---of 1
zero_iter---of 4
-----------
SUMMARY53%of 601

autosuspend_delay_ms_show---of 3
autosuspend_delay_ms_store---of 5
control_show---of 2
control_store---of 5
dpm_sysfs_add54%of 15
dpm_sysfs_change_owner---of 14
dpm_sysfs_remove100%of 2
pm_qos_latency_tolerance_us_show---of 8
pm_qos_latency_tolerance_us_store---of 10
pm_qos_no_power_off_show---of 1
pm_qos_no_power_off_store---of 6
pm_qos_resume_latency_us_show---of 6
pm_qos_resume_latency_us_store---of 10
pm_qos_sysfs_add_flags---of 1
pm_qos_sysfs_add_latency_tolerance---of 1
pm_qos_sysfs_add_resume_latency---of 1
pm_qos_sysfs_remove_flags100%of 1
pm_qos_sysfs_remove_latency_tolerance---of 1
pm_qos_sysfs_remove_resume_latency100%of 1
rpm_sysfs_remove---of 1
runtime_active_time_show---of 1
runtime_status_show---of 6
runtime_suspended_time_show---of 1
wakeup_abort_count_show---of 5
wakeup_active_count_show---of 5
wakeup_active_show---of 5
wakeup_count_show---of 5
wakeup_expire_count_show---of 5
wakeup_last_time_ms_show---of 5
wakeup_max_time_ms_show---of 5
wakeup_show---of 4
wakeup_store---of 7
wakeup_sysfs_add---of 4
wakeup_sysfs_remove100%of 1
wakeup_total_time_ms_show---of 5
-----------
SUMMARY65%of 20

msi2500_buf_queue---of 5
msi2500_cleanup_queued_bufs---of 7
msi2500_ctrl_msg---of 6
msi2500_disconnect67%of 3
msi2500_enum_fmt_sdr_cap---of 5
msi2500_enum_freq_bands---of 14
msi2500_g_fmt_sdr_cap---of 3
msi2500_g_frequency---of 13
msi2500_g_tuner---of 13
msi2500_isoc_cleanup---of 18
msi2500_isoc_handler---of 61
msi2500_probe---of 22
msi2500_querycap---of 8
msi2500_queue_setup---of 5
msi2500_s_fmt_sdr_cap---of 9
msi2500_s_frequency---of 15
msi2500_s_tuner---of 12
msi2500_set_usb_adc---of 22
msi2500_start_streaming---of 33
msi2500_stop_streaming---of 15
msi2500_transfer_one_message---of 5
msi2500_try_fmt_sdr_cap---of 8
msi2500_video_release---of 1
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

new_id_show---of 1
new_id_store---of 4
usb_serial_bus_deregister---of 5
usb_serial_bus_register---of 1
usb_serial_device_match---of 1
usb_serial_device_probe---of 9
usb_serial_device_remove100%of 4
-----------
SUMMARY100%of 4

__input_mt_drop_unused---of 8
adjust_dual---of 21
copy_abs---of 3
input_mt_assign_slots---of 33
input_mt_destroy_slots100%of 3
input_mt_drop_unused---of 2
input_mt_get_slot_by_key---of 11
input_mt_init_slots---of 22
input_mt_release_slots---of 7
input_mt_report_finger_count---of 1
input_mt_report_pointer_emulation---of 21
input_mt_report_slot_state---of 7
input_mt_sync_frame---of 4
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

__ath10k_scan_finish---of 15
__ath10k_set_antenna---of 22
ath10k_add_interface---of 196
ath10k_add_interface.cold---of 2
ath10k_ampdu_action---of 2
ath10k_bss_assoc.isra.0---of 67
ath10k_bss_info_changed---of 184
ath10k_cancel_hw_scan---of 1
ath10k_cancel_remain_on_channel---of 1
ath10k_conf_tx---of 55
ath10k_config---of 6
ath10k_config_ps---of 9
ath10k_configure_filter---of 4
ath10k_drain_tx---of 10
ath10k_flush---of 14
ath10k_get_antenna---of 1
ath10k_get_arvif---of 3
ath10k_get_arvif_iter---of 2
ath10k_get_survey---of 23
ath10k_halt---of 16
ath10k_hw_scan---of 27
ath10k_install_key---of 27
ath10k_install_peer_wep_keys---of 24
ath10k_mac_bitrate_mask_get_single_rate---of 13
ath10k_mac_bitrate_mask_has_single_rate.constprop.0---of 5
ath10k_mac_bitrate_to_idx---of 6
ath10k_mac_can_set_cts_prot---of 4
ath10k_mac_change_chanctx_cnt_iter---of 2
ath10k_mac_change_chanctx_fill_iter---of 4
ath10k_mac_clr_bitrate_mask_iter---of 8
ath10k_mac_create---of 5
ath10k_mac_dec_num_stations---of 6
ath10k_mac_destroy100%of 1
ath10k_mac_ext_resource_config---of 9
ath10k_mac_get_any_chandef_iter---of 1
ath10k_mac_handle_beacon---of 1
ath10k_mac_handle_beacon_iter---of 3
ath10k_mac_handle_beacon_miss---of 1
ath10k_mac_handle_beacon_miss_iter---of 3
ath10k_mac_handle_tx_pause_iter---of 7
ath10k_mac_handle_tx_pause_vdev---of 1
ath10k_mac_has_radar_iter---of 3
ath10k_mac_hw_rate_to_idx---of 10
ath10k_mac_is_peer_wep_key_set---of 8
ath10k_mac_num_chanctxs_iter---of 1
ath10k_mac_op_add_chanctx---of 1
ath10k_mac_op_assign_vif_chanctx---of 29
ath10k_mac_op_change_chanctx---of 10
ath10k_mac_op_remove_chanctx---of 1
ath10k_mac_op_reset_tid_config---of 4
ath10k_mac_op_set_bitrate_mask---of 71
ath10k_mac_op_set_bitrate_mask.cold---of 1
ath10k_mac_op_set_coverage_class---of 3
ath10k_mac_op_set_frag_threshold---of 1
ath10k_mac_op_set_tid_config---of 100
ath10k_mac_op_sta_pre_rcu_remove---of 4
ath10k_mac_op_switch_vif_chanctx---of 1
ath10k_mac_op_tx---of 13
ath10k_mac_op_unassign_vif_chanctx---of 16
ath10k_mac_op_wake_tx_queue---of 8
ath10k_mac_parse_bitrate---of 22
ath10k_mac_register---of 109
ath10k_mac_reset_tid_config---of 13
ath10k_mac_rfkill_enable_radio---of 8
ath10k_mac_set_bitrate_mask_iter---of 10
ath10k_mac_set_cts_prot---of 8
ath10k_mac_set_sar_specs---of 19
ath10k_mac_setup_bcn_tmpl---of 28
ath10k_mac_setup_ht_vht_cap---of 49
ath10k_mac_setup_ht_vht_cap.cold---of 1
ath10k_mac_setup_prb_tmpl---of 12
ath10k_mac_tdls_peer_update---of 13
ath10k_mac_tdls_vif_stations_count_iter---of 3
ath10k_mac_tx---of 50
ath10k_mac_tx_frm_has_freq---of 5
ath10k_mac_tx_h_fill_cb---of 38
ath10k_mac_tx_h_get_txmode---of 13
ath10k_mac_tx_h_get_txpath.constprop.0---of 6
ath10k_mac_tx_lock---of 7
ath10k_mac_tx_lock.cold---of 1
ath10k_mac_tx_push_pending---of 23
ath10k_mac_tx_push_txq---of 20
ath10k_mac_tx_unlock---of 8
ath10k_mac_tx_unlock.cold---of 1
ath10k_mac_tx_unlock_iter---of 2
ath10k_mac_txpower_recalc---of 38
ath10k_mac_txq_lookup---of 12
ath10k_mac_txq_unref---of 8
ath10k_mac_unregister---of 1
ath10k_mac_update_rx_channel---of 34
ath10k_mac_update_vif_chan---of 39
ath10k_mac_validate_rate_mask---of 12
ath10k_mac_vif_ap_csa_work---of 15
ath10k_mac_vif_beacon_cleanup---of 8
ath10k_mac_vif_beacon_free---of 9
ath10k_mac_vif_chan---of 24
ath10k_mac_vif_recalc_ps_poll_count---of 12
ath10k_mac_vif_recalc_ps_wake_threshold---of 11
ath10k_mac_vif_recalc_txbf.isra.0---of 27
ath10k_mac_vif_setup_ps---of 35
ath10k_mac_vif_sta_connection_loss_work---of 2
ath10k_mac_vif_stations_tid_conf---of 3
ath10k_mac_vif_tx_lock---of 8
ath10k_mac_vif_tx_unlock---of 9
ath10k_mac_wait_tx_complete---of 2
ath10k_mac_wait_tx_complete.part.0---of 18
ath10k_mgmt_over_wmi_tx_purge---of 3
ath10k_mgmt_over_wmi_tx_work---of 24
ath10k_monitor_recalc---of 74
ath10k_monitor_recalc.cold---of 1
ath10k_monitor_stop---of 31
ath10k_monitor_vdev_delete---of 13
ath10k_monitor_vdev_delete.cold---of 1
ath10k_offchan_tx_purge---of 3
ath10k_offchan_tx_work---of 24
ath10k_offset_tsf---of 10
ath10k_peer_assoc_prepare---of 188
ath10k_peer_assoc_prepare.cold---of 1
ath10k_peer_create---of 19
ath10k_peer_delete---of 15
ath10k_peer_map_cleanup---of 12
ath10k_recalc_radar_detection---of 16
ath10k_recalc_rtscts_prot---of 9
ath10k_reconfig_complete---of 9
ath10k_reg_notifier---of 11
ath10k_regd_update---of 38
ath10k_remain_on_channel---of 12
ath10k_remove_interface---of 47
ath10k_remove_interface.cold---of 1
ath10k_scan_abort---of 9
ath10k_scan_finish---of 1
ath10k_scan_stop---of 15
ath10k_scan_timeout_work---of 1
ath10k_set_antenna---of 1
ath10k_set_default_unicast_key---of 10
ath10k_set_key---of 100
ath10k_set_rts_threshold---of 10
ath10k_sta_rc_update---of 13
ath10k_sta_rc_update_wk---of 47
ath10k_sta_set_txpwr---of 11
ath10k_sta_state---of 95
ath10k_sta_statistics---of 27
ath10k_sta_tid_cfg_wk---of 46
ath10k_start---of 117
ath10k_start_scan---of 16
ath10k_station_assoc---of 80
ath10k_stop---of 7
ath10k_tx_last_beacon---of 1
ath10k_update_vif_offload---of 11
ath10k_vdev_start_restart---of 33
ath10k_vdev_stop---of 21
ath10k_wait_for_peer_delete_done---of 5
-----------
SUMMARY100%of 1

__snd_usb_add_audio_stream---of 32
__snd_usb_parse_audio_interface---of 199
add_chmap---of 6
audio_format_alloc_init---of 5
parse_uac_endpoint_attributes---of 17
snd_usb_add_audio_stream---of 1
snd_usb_audio_pcm_free69%of 16
snd_usb_init_substream---of 7
snd_usb_parse_audio_interface---of 4
usb_chmap_ctl_get---of 8
usb_chmap_ctl_info---of 1
usb_chmap_ctl_tlv---of 22
-----------
SUMMARY69%of 16

-----------
SUMMARY---of 0

netdev_close---of 8
netdev_open---of 26
r8712_free_drv_sw---of 1
r8712_init_drv_sw---of 15
r8712_init_netdev---of 5
r8712_stop_drv_threads---of 3
r8712_stop_drv_timers---of 1
r871x_net_get_stats100%of 1
r871x_net_set_mac_address---of 4
-----------
SUMMARY100%of 1

snd_pcm_timer_done---of 2
snd_pcm_timer_free100%of 1
snd_pcm_timer_init---of 6
snd_pcm_timer_resolution---of 3
snd_pcm_timer_resolution_change---of 9
snd_pcm_timer_start---of 1
snd_pcm_timer_stop---of 1
-----------
SUMMARY100%of 1

__bpf_trace_workqueue_activate_work---of 1
__bpf_trace_workqueue_execute_end---of 1
__bpf_trace_workqueue_execute_start---of 1
__bpf_trace_workqueue_queue_work---of 1
__cancel_work82%of 16
__cancel_work_sync60%of 25
__flush_work70%of 49
__flush_workqueue17%of 55
__init_work67%of 3
__pwq_activate_work---of 5
__queue_delayed_work53%of 17
__queue_work48%of 91
__traceiter_workqueue_activate_work---of 3
__traceiter_workqueue_execute_end---of 3
__traceiter_workqueue_execute_start---of 3
__traceiter_workqueue_queue_work---of 3
__warn_flushing_systemwide_wq---of 1
alloc_unbound_pwq---of 35
alloc_worker---of 3
alloc_workqueue---of 81
alloc_workqueue_attrs---of 3
apply_workqueue_attrs---of 1
apply_workqueue_attrs_locked---of 8
apply_wqattrs_cleanup.part.0---of 12
apply_wqattrs_commit---of 8
apply_wqattrs_prepare---of 27
assign_work---of 13
bh_pool_kick_highpri---of 1
bh_pool_kick_normal---of 1
bh_worker---of 29
cancel_delayed_work100%of 1
cancel_delayed_work_sync100%of 1
cancel_work---of 1
cancel_work_sync100%of 1
check_flush_dependency54%of 15
create_worker---of 27
current_is_workqueue_rescuer---of 6
current_work---of 6
cwt_wakefn---of 3
delayed_work_timer_fn---of 1
destroy_delayed_work_on_stack---of 1
destroy_work_on_stack---of 1
destroy_workqueue77%of 39
drain_dead_softirq_workfn---of 6
drain_workqueue67%of 15
execute_in_process_context---of 5
flush_delayed_work80%of 5
flush_rcu_work---of 5
flush_work100%of 1
flush_workqueue_prep_pwqs75%of 20
free_node_nr_active---of 6
free_workqueue_attrs---of 2
freeze_workqueues_begin---of 5
freeze_workqueues_busy---of 28
get_pwq67%of 6
get_work_pool70%of 13
idle_cull_fn---of 7
idle_worker_timeout---of 8
init_pwq---of 3
init_rescuer---of 10
init_worker_pool---of 5
insert_work80%of 5
install_unbound_pwq---of 10
jhash---of 17
kick_pool68%of 25
link_pwq---of 6
max_active_show---of 1
max_active_store---of 4
mod_delayed_work_on90%of 10
move_linked_works---of 9
parse_affn_scope---of 3
per_cpu_show---of 1
perf_trace_workqueue_activate_work---of 5
perf_trace_workqueue_execute_end---of 5
perf_trace_workqueue_execute_start---of 5
perf_trace_workqueue_queue_work---of 16
pool_mayday_timeout---of 16
pr_cont_pool_info---of 6
pr_cont_work---of 16
pr_cont_work_flush---of 11
pr_cont_worker_id---of 5
print_worker_info---of 8
process_one_work---of 86
put_pwq80%of 5
put_unbound_pool---of 32
pwq_dec_nr_in_flight---of 51
pwq_release_workfn---of 29
pwq_tryinc_nr_active54%of 28
queue_delayed_work_on55%of 11
queue_rcu_work---of 4
queue_work_node---of 19
queue_work_on82%of 11
rcu_free_pool---of 3
rcu_free_pwq---of 1
rcu_free_wq---of 7
rcu_work_rcufn---of 3
rescuer_thread---of 35
schedule_on_each_cpu---of 14
set_worker_desc---of 5
set_worker_dying---of 20
show_all_workqueues---of 31
show_freezable_workqueues---of 15
show_one_workqueue---of 11
show_pwq---of 42
thaw_workqueues---of 4
touch_wq_lockdep_map50%of 4
trace_event_raw_event_workqueue_activate_work---of 6
trace_event_raw_event_workqueue_execute_end---of 6
trace_event_raw_event_workqueue_execute_start---of 6
trace_event_raw_event_workqueue_queue_work---of 17
trace_raw_output_workqueue_activate_work---of 5
trace_raw_output_workqueue_execute_end---of 5
trace_raw_output_workqueue_execute_start---of 5
trace_raw_output_workqueue_queue_work---of 5
trace_workqueue_activate_work34%of 15
try_to_grab_pending27%of 49
tryinc_node_nr_active100%of 4
unbind_worker---of 8
unbound_pwq73%of 11
wake_dying_workers---of 4
work_busy---of 21
work_debug_hint---of 1
work_fixup_free---of 4
work_fixup_init---of 4
work_for_cpu_fn---of 1
work_is_static_object100%of 1
work_on_cpu_key---of 1
work_on_cpu_safe_key---of 5
worker_attach_to_pool---of 14
worker_detach_from_pool---of 9
worker_enter_idle---of 15
worker_leave_idle---of 9
worker_pool_assign_id---of 7
worker_thread---of 46
workqueue_apply_unbound_cpumask---of 22
workqueue_congested---of 15
workqueue_offline_cpu---of 27
workqueue_online_cpu---of 61
workqueue_prepare_cpu---of 6
workqueue_set_max_active---of 9
workqueue_set_min_active---of 5
workqueue_softirq_action---of 3
workqueue_softirq_dead---of 8
workqueue_sysfs_register---of 12
workqueue_unbound_exclude_cpumask---of 4
wq_adjust_max_active---of 19
wq_affinity_strict_show---of 1
wq_affinity_strict_store---of 7
wq_affn_dfl_get---of 1
wq_affn_dfl_set---of 10
wq_affn_scope_show---of 4
wq_affn_scope_store---of 9
wq_barrier_func---of 1
wq_calc_pod_cpumask---of 9
wq_cpumask_show---of 1
wq_cpumask_store---of 10
wq_device_release---of 1
wq_isolated_cpumask_show---of 1
wq_nice_show---of 1
wq_nice_store---of 12
wq_requested_cpumask_show---of 1
wq_sysfs_prep_attrs---of 6
wq_unbound_cpumask_show---of 1
wq_unbound_cpumask_store---of 8
wq_update_node_max_active---of 37
wq_update_pod---of 16
wq_watchdog_param_set_thresh---of 6
wq_watchdog_reset_touched---of 5
wq_watchdog_timer_fn---of 61
wq_watchdog_touch100%of 3
wq_worker_comm---of 14
wq_worker_last_func---of 1
wq_worker_running100%of 6
wq_worker_sleeping100%of 6
wq_worker_tick39%of 13
wqattrs_pod_type---of 12
-----------
SUMMARY57%of 580

_read_latency_timer---of 3
change_speed---of 61
event_char_store---of 9
ftdi_232bm_baud_base_to_divisor.constprop.0---of 7
ftdi_8u2232c_probe---of 10
ftdi_HE_TIRA1_setup---of 1
ftdi_NDI_device_setup---of 6
ftdi_USB_UIRT_setup---of 1
ftdi_break_ctl---of 7
ftdi_dtr_rts---of 5
ftdi_get_modem_status---of 11
ftdi_gpio_direction_get---of 2
ftdi_gpio_direction_get.cold---of 1
ftdi_gpio_direction_input---of 2
ftdi_gpio_direction_input.cold---of 1
ftdi_gpio_direction_output---of 5
ftdi_gpio_direction_output.cold---of 3
ftdi_gpio_get---of 4
ftdi_gpio_get.cold---of 1
ftdi_gpio_get_multiple---of 3
ftdi_gpio_init_valid_mask---of 6
ftdi_gpio_request---of 7
ftdi_gpio_set---of 6
ftdi_gpio_set.cold---of 2
ftdi_gpio_set_multiple---of 1
ftdi_ioctl---of 6
ftdi_is_visible---of 7
ftdi_jtag_probe---of 3
ftdi_open---of 3
ftdi_port_probe---of 73
ftdi_port_probe.cold---of 3
ftdi_port_remove80%of 5
ftdi_prepare_write_buffer---of 8
ftdi_probe---of 6
ftdi_process_read_urb---of 55
ftdi_read_cbus_pins---of 5
ftdi_read_eeprom.part.0---of 5
ftdi_set_bitmode---of 5
ftdi_set_termios---of 55
ftdi_stmclite_probe---of 3
ftdi_tiocmget---of 3
ftdi_tiocmset---of 1
ftdi_tx_empty---of 3
get_serial_info---of 1
latency_timer_show---of 5
latency_timer_store---of 4
set_serial_info---of 13
update_mctrl---of 25
write_latency_timer---of 8
-----------
SUMMARY80%of 5

-----------
SUMMARY---of 0

destroy_serial100%of 12
get_iface_id---of 11
port_number_show---of 1
serial_break---of 5
serial_chars_in_buffer---of 5
serial_cleanup---of 9
serial_close---of 3
serial_get_icount---of 5
serial_get_serial---of 5
serial_hangup---of 4
serial_install---of 15
serial_ioctl---of 9
serial_open---of 3
serial_port_activate---of 9
serial_port_carrier_raised---of 3
serial_port_dtr_rts---of 2
serial_port_shutdown---of 3
serial_proc_show---of 12
serial_set_serial---of 9
serial_set_termios---of 5
serial_throttle---of 4
serial_tiocmget---of 5
serial_tiocmset---of 5
serial_unthrottle---of 4
serial_wait_until_sent---of 6
serial_write---of 8
serial_write_room---of 3
store_endpoint.isra.0---of 20
usb_serial_claim_interface---of 6
usb_serial_deregister---of 3
usb_serial_deregister_drivers---of 3
usb_serial_disconnect64%of 19
usb_serial_port_get_by_minor---of 9
usb_serial_port_poison_urbs100%of 1
usb_serial_port_release67%of 3
usb_serial_port_softint---of 1
usb_serial_port_work---of 1
usb_serial_probe---of 115
usb_serial_put---of 5
usb_serial_register_drivers---of 56
usb_serial_reset_resume---of 6
usb_serial_resume---of 6
usb_serial_suspend---of 7
usb_serial_unpoison_port_urbs---of 2
-----------
SUMMARY78%of 35

__close_range---of 50
__f_unlock_pos100%of 1
__fdget100%of 1
__fdget_pos86%of 7
__fdget_raw---of 1
__fget_files67%of 21
__fget_light63%of 8
__get_file_rcu---of 9
__get_unused_fd_flags---of 1
__ia32_sys_dup---of 6
__ia32_sys_dup2---of 24
__ia32_sys_dup3---of 1
__put_unused_fd50%of 8
__x64_sys_dup---of 6
__x64_sys_dup2---of 24
__x64_sys_dup3---of 1
alloc_fd75%of 24
alloc_fdtable---of 10
alloc_fdtable.cold---of 1
close_fd---of 3
copy_fd_bitmaps---of 1
do_close_on_exec---of 16
do_dup2---of 19
dup_fd---of 37
exit_files---of 2
expand_files12%of 35
f_dupfd---of 4
fd_install27%of 38
fget---of 1
fget_raw---of 1
fget_task---of 3
file_close_fd100%of 1
file_close_fd_locked65%of 14
free_fdtable_rcu---of 1
get_close_on_exec---of 17
get_file_active---of 12
get_file_rcu---of 3
get_unused_fd_flags100%of 1
iterate_fd---of 19
ksys_dup3---of 14
lookup_fdget_rcu---of 9
put_files_struct---of 11
put_unused_fd100%of 1
receive_fd---of 8
receive_fd_replace---of 5
replace_fd---of 8
set_close_on_exec64%of 11
task_lookup_fdget_rcu---of 10
task_lookup_next_fdget_rcu---of 18
-----------
SUMMARY48%of 171

-----------
SUMMARY---of 0

__devm_drm_dev_alloc---of 8
devm_drm_dev_init_release---of 2
drm_core_exit---of 1
drm_dev_alloc---of 6
drm_dev_enter---of 6
drm_dev_exit---of 3
drm_dev_get---of 6
drm_dev_init---of 28
drm_dev_init_release---of 3
drm_dev_put---of 2
drm_dev_put.part.0---of 7
drm_dev_register---of 32
drm_dev_unplug100%of 1
drm_dev_unregister80%of 5
drm_fs_init_fs_context---of 2
drm_minor_acquire---of 16
drm_minor_alloc---of 28
drm_minor_alloc_release---of 4
drm_minor_get_slot.part.0---of 1
drm_minor_register---of 16
drm_minor_release---of 2
drm_minor_unregister80%of 10
drm_put_dev---of 3
drm_stub_open---of 16
remove_compat_control_link100%of 4
-----------
SUMMARY85%of 20

appleir_input_configured---of 3
appleir_input_mapping---of 1
appleir_probe---of 8
appleir_raw_event---of 14
appleir_remove100%of 1
key_up_tick---of 3
-----------
SUMMARY100%of 1

lockref_get100%of 1
lockref_get_not_dead100%of 3
lockref_get_not_zero100%of 3
lockref_mark_dead67%of 3
lockref_put_not_zero---of 3
lockref_put_or_lock---of 3
lockref_put_return100%of 1
-----------
SUMMARY91%of 11

__ath10k_htt_tx_txq_recalc.isra.0---of 25
__ath10k_htt_tx_txq_sync---of 6
ath10k_htt_flush_tx_queue---of 1
ath10k_htt_h2t_aggr_cfg_msg_32---of 6
ath10k_htt_h2t_aggr_cfg_msg_v2---of 6
ath10k_htt_h2t_stats_req---of 4
ath10k_htt_h2t_ver_req_msg---of 5
ath10k_htt_hif_tx_complete---of 1
ath10k_htt_htc_tx_complete---of 7
ath10k_htt_mgmt_tx---of 32
ath10k_htt_op_ep_tx_credits---of 1
ath10k_htt_send_frag_desc_bank_cfg_32---of 9
ath10k_htt_send_frag_desc_bank_cfg_64---of 9
ath10k_htt_send_rx_ring_cfg_32---of 7
ath10k_htt_send_rx_ring_cfg_64---of 7
ath10k_htt_send_rx_ring_cfg_hl---of 4
ath10k_htt_set_tx_ops---of 3
ath10k_htt_tx_32---of 47
ath10k_htt_tx_64---of 46
ath10k_htt_tx_alloc_cont_frag_desc_32---of 5
ath10k_htt_tx_alloc_cont_frag_desc_64---of 5
ath10k_htt_tx_alloc_cont_txbuf_32---of 3
ath10k_htt_tx_alloc_cont_txbuf_64---of 3
ath10k_htt_tx_alloc_msdu_id---of 1
ath10k_htt_tx_clean_up_pending---of 1
ath10k_htt_tx_dec_pending---of 7
ath10k_htt_tx_destroy50%of 2
ath10k_htt_tx_destroy.part.0---of 7
ath10k_htt_tx_fetch_resp---of 4
ath10k_htt_tx_free---of 2
ath10k_htt_tx_free_cont_frag_desc_32---of 2
ath10k_htt_tx_free_cont_frag_desc_64---of 2
ath10k_htt_tx_free_cont_txbuf_32---of 2
ath10k_htt_tx_free_cont_txbuf_64---of 2
ath10k_htt_tx_free_msdu_id---of 4
ath10k_htt_tx_free_txq---of 2
ath10k_htt_tx_hl---of 39
ath10k_htt_tx_inc_pending---of 7
ath10k_htt_tx_mgmt_dec_pending---of 5
ath10k_htt_tx_mgmt_inc_pending---of 9
ath10k_htt_tx_start---of 29
ath10k_htt_tx_start.cold---of 1
ath10k_htt_tx_stop---of 1
ath10k_htt_tx_txq_recalc---of 1
ath10k_htt_tx_txq_sync---of 1
ath10k_htt_tx_txq_update---of 1
-----------
SUMMARY50%of 2

__find_nth_and_andnot_bit---of 16
__find_nth_and_bit---of 16
__find_nth_andnot_bit---of 16
__find_nth_bit---of 16
_find_first_and_bit---of 5
_find_first_bit100%of 5
_find_first_zero_bit100%of 5
_find_last_bit100%of 6
_find_next_and_bit---of 7
_find_next_andnot_bit---of 7
_find_next_bit100%of 7
_find_next_or_bit---of 7
_find_next_zero_bit58%of 7
find_next_clump8---of 3
-----------
SUMMARY90%of 30

___neigh_lookup_noref.constprop.0---of 22
___neigh_lookup_noref.constprop.0.cold---of 1
__bpf_trace_fib6_table_lookup---of 1
__find_rr_leaf---of 70
__ip6_route_redirect---of 55
__ip6_rt_update_pmtu---of 82
__ip6_rt_update_pmtu.cold---of 1
__rt6_find_exception_rcu.constprop.0---of 11
__rt6_find_exception_spinlock.constprop.0---of 9
__rt6_nh_dev_match---of 5
__traceiter_fib6_table_lookup---of 3
addrconf_f6i_alloc---of 8
dst_discard---of 1
fib6_backtrack.constprop.0---of 9
fib6_clean_tohost---of 16
fib6_ifdown22%of 37
fib6_ifup---of 7
fib6_info_hw_flags_set---of 14
fib6_info_nh_uses_dev---of 1
fib6_nh_age_exceptions.part.0---of 32
fib6_nh_del_cached_rt---of 8
fib6_nh_find_match---of 8
fib6_nh_flush_exceptions10%of 20
fib6_nh_get_excptn_bucket40%of 15
fib6_nh_init---of 124
fib6_nh_mtu_change---of 59
fib6_nh_redirect_match---of 1
fib6_nh_release---of 15
fib6_nh_release_dsts---of 2
fib6_nh_release_dsts.part.0---of 7
fib6_nh_remove_exception.constprop.0---of 6
fib6_remove_prefsrc---of 6
fib6_rt_update---of 11
fib6_select_path---of 55
fib6_table_lookup---of 39
icmp6_dst_alloc---of 16
in6_dev_get50%of 22
inet6_rt_notify55%of 11
inet6_rtm_delroute---of 9
inet6_rtm_getroute---of 112
inet6_rtm_newroute---of 8
ip6_blackhole_route---of 11
ip6_confirm_neigh---of 23
ip6_create_rt_rcu---of 26
ip6_default_advmss---of 27
ip6_del_rt64%of 11
ip6_dst_alloc---of 3
ip6_dst_check---of 43
ip6_dst_destroy---of 24
ip6_dst_gc---of 5
ip6_dst_gc.cold---of 1
ip6_dst_ifdown---of 8
ip6_dst_neigh_lookup---of 5
ip6_hold_safe---of 9
ip6_ins_rt---of 1
ip6_link_failure---of 34
ip6_mtu---of 21
ip6_mtu_from_fib6---of 13
ip6_multipath_l3_keys.constprop.0---of 18
ip6_negative_advice---of 18
ip6_neigh_lookup---of 30
ip6_nh_lookup_table.isra.0---of 7
ip6_pkt_discard---of 1
ip6_pkt_discard_out---of 7
ip6_pkt_drop---of 32
ip6_pkt_prohibit---of 1
ip6_pkt_prohibit_out---of 7
ip6_pol_route---of 82
ip6_pol_route_input---of 1
ip6_pol_route_lookup---of 146
ip6_pol_route_output---of 1
ip6_redirect---of 1
ip6_redirect_nh_match.isra.0---of 8
ip6_redirect_no_header---of 3
ip6_route_add---of 11
ip6_route_check_nh---of 20
ip6_route_cleanup---of 1
ip6_route_del---of 143
ip6_route_dev_notify16%of 13
ip6_route_info_create---of 103
ip6_route_input---of 36
ip6_route_input_lookup---of 4
ip6_route_lookup---of 1
ip6_route_multipath_add---of 131
ip6_route_multipath_del---of 15
ip6_route_net_exit---of 1
ip6_route_net_exit_late---of 1
ip6_route_net_init---of 7
ip6_route_net_init_late---of 4
ip6_route_output_flags---of 32
ip6_route_output_flags.cold---of 1
ip6_route_redirect.constprop.0.isra.0---of 1
ip6_rt_cache_alloc.constprop.0---of 25
ip6_rt_copy_init---of 21
ip6_rt_get_dev_rcu---of 8
ip6_rt_update_pmtu---of 3
ip6_sk_dst_store_flow---of 32
ip6_sk_dst_store_flow.cold---of 1
ip6_sk_redirect---of 1
ip6_sk_update_pmtu---of 21
ip6_sk_update_pmtu.cold---of 1
ip6_update_pmtu---of 6
ipv6_inetpeer_exit---of 1
ipv6_inetpeer_init---of 3
ipv6_route_ioctl---of 8
ipv6_route_sysctl_init---of 4
ipv6_route_sysctl_table_size---of 2
ipv6_sysctl_rtcache_flush---of 4
ndisc_hashfn---of 1
neigh_key_eq128---of 1
perf_trace_fib6_table_lookup---of 21
rt6_add_dflt_router---of 4
rt6_age_exceptions---of 5
rt6_check_expired---of 12
rt6_clean_tohost---of 1
rt6_disable_ip79%of 38
rt6_do_redirect---of 78
rt6_do_update_pmtu---of 30
rt6_dump_route---of 47
rt6_exception_hash.constprop.0.isra.0---of 5
rt6_fill_node.constprop.020%of 153
rt6_find_cached_rt.constprop.0.isra.0---of 4
rt6_flush_exceptions67%of 3
rt6_get_dflt_router---of 42
rt6_info_init---of 1
rt6_insert_exception---of 53
rt6_lookup---of 5
rt6_mtu_change---of 1
rt6_mtu_change_route---of 14
rt6_multipath_hash---of 78
rt6_multipath_rebalance---of 3
rt6_multipath_rebalance.part.0---of 71
rt6_nh_age_exceptions---of 3
rt6_nh_dump_exceptions---of 11
rt6_nh_find_match---of 16
rt6_nh_flush_exceptions---of 1
rt6_nh_nlmsg_size---of 3
rt6_nh_remove_exception_rt---of 1
rt6_nlmsg_size24%of 13
rt6_purge_dflt_routers---of 83
rt6_remove_exception.part.0---of 13
rt6_remove_exception_rt---of 12
rt6_remove_prefsrc---of 1
rt6_score_route---of 23
rt6_stats_seq_show---of 1
rt6_sync_down_dev---of 4
rt6_sync_up---of 4
rt6_uncached_list_add---of 3
rt6_uncached_list_del---of 4
rtm_to_fib6_config---of 56
trace_event_raw_event_fib6_table_lookup---of 22
trace_fib6_table_lookup---of 15
trace_raw_output_fib6_table_lookup---of 4
-----------
SUMMARY32%of 336

-----------
SUMMARY---of 0

call_netevent_notifiers100%of 1
register_netevent_notifier---of 1
unregister_netevent_notifier---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

decimals_show---of 5
decimals_store---of 15
mode_lsb_show---of 1
mode_lsb_store---of 3
mode_msb_show---of 1
mode_msb_store---of 3
powered_show---of 1
powered_store---of 12
sevseg_disconnect100%of 1
sevseg_probe---of 3
sevseg_reset_resume---of 1
sevseg_resume---of 1
sevseg_suspend---of 1
text_show---of 1
text_store---of 10
textmode_show---of 26
textmode_store---of 5
update_display_mode.part.0---of 3
update_display_visual---of 12
-----------
SUMMARY100%of 1

as102_open67%of 9
as102_read_ep2---of 8
as102_release---of 8
as102_send_ep1---of 7
as102_urb_stream_irq---of 7
as102_usb_disconnect13%of 8
as102_usb_probe---of 22
as102_usb_start_stream---of 6
as102_usb_stop_stream---of 1
as102_usb_xfer_cmd---of 12
-----------
SUMMARY42%of 17

hci_codec_list_add.isra.0---of 8
hci_codec_list_clear25%of 4
hci_read_codec_capabilities---of 18
hci_read_supported_codecs---of 15
hci_read_supported_codecs_v2---of 15
-----------
SUMMARY25%of 4

roccat_common2_sysfs_read_buttons---of 1
roccat_common2_sysfs_read_general---of 1
roccat_common2_sysfs_read_info---of 1
roccat_common2_sysfs_read_macro---of 1
roccat_common2_sysfs_read_profile---of 1
roccat_common2_sysfs_read_sensor---of 1
roccat_common2_sysfs_write_buttons---of 1
roccat_common2_sysfs_write_control---of 1
roccat_common2_sysfs_write_general---of 1
roccat_common2_sysfs_write_info---of 1
roccat_common2_sysfs_write_macro---of 1
roccat_common2_sysfs_write_profile---of 1
roccat_common2_sysfs_write_sensor---of 1
savu_probe---of 18
savu_raw_event---of 7
savu_remove40%of 5
-----------
SUMMARY40%of 5

__bpf_trace_file_check_and_advance_wb_err---of 1
__bpf_trace_filemap_set_wb_err---of 1
__bpf_trace_mm_filemap_op_page_cache---of 1
__filemap_add_folio---of 67
__filemap_fdatawait_range---of 7
__filemap_fdatawrite_range---of 1
__filemap_get_folio---of 54
__filemap_remove_folio43%of 26
__filemap_set_wb_err---of 15
__folio_lock---of 1
__folio_lock_killable---of 1
__folio_lock_or_retry---of 41
__generic_file_write_iter---of 9
__ia32_sys_cachestat---of 17
__traceiter_file_check_and_advance_wb_err---of 3
__traceiter_filemap_set_wb_err---of 3
__traceiter_mm_filemap_add_to_page_cache---of 3
__traceiter_mm_filemap_delete_from_page_cache---of 3
__x64_sys_cachestat---of 17
const_folio_flags.constprop.040%of 10
delete_from_page_cache_batch---of 41
dio_warn_stale_pagecache---of 4
do_read_cache_folio---of 31
file_check_and_advance_wb_err---of 18
file_fdatawait_range---of 1
file_write_and_wait_range---of 7
filemap_add_folio---of 9
filemap_alloc_folio---of 21
filemap_cachestat---of 44
filemap_cachestat.cold---of 1
filemap_check_and_keep_errors---of 3
filemap_check_errors---of 6
filemap_fault---of 233
filemap_fdatawait_keep_errors---of 1
filemap_fdatawait_range---of 1
filemap_fdatawait_range_keep_errors---of 1
filemap_fdatawrite---of 1
filemap_fdatawrite_range---of 1
filemap_fdatawrite_wbc---of 7
filemap_flush---of 1
filemap_free_folio43%of 7
filemap_get_entry57%of 25
filemap_get_folios---of 1
filemap_get_folios_contig---of 48
filemap_get_folios_tag---of 61
filemap_get_folios_tag.cold---of 1
filemap_get_pages---of 76
filemap_get_read_batch---of 53
filemap_get_read_batch.cold---of 1
filemap_invalidate_lock_two---of 8
filemap_invalidate_unlock_two---of 5
filemap_map_pages42%of 53
filemap_page_mkwrite---of 33
filemap_range_has_page---of 18
filemap_range_has_writeback---of 23
filemap_read---of 39
filemap_read.cold---of 2
filemap_read_folio---of 10
filemap_release_folio---of 11
filemap_remove_folio75%of 8
filemap_splice_read---of 33
filemap_unaccount_folio39%of 18
filemap_write_and_wait_range---of 9
filemap_write_and_wait_range.part.0---of 8
find_get_entries16%of 57
find_get_entries.cold---of 1
find_lock_entries59%of 55
folio_add_wait_queue---of 3
folio_end_private_2---of 6
folio_end_read---of 9
folio_end_writeback---of 16
folio_flags.constprop.040%of 10
folio_unlock50%of 4
folio_wait_bit---of 1
folio_wait_bit_common---of 42
folio_wait_bit_killable---of 1
folio_wait_private_2---of 2
folio_wait_private_2_killable---of 3
folio_wake_bit---of 4
generic_file_direct_write---of 10
generic_file_mmap---of 5
generic_file_read_iter---of 12
generic_file_readonly_mmap---of 6
generic_file_write_iter---of 9
generic_perform_write80%of 15
kiocb_invalidate_pages---of 6
kiocb_invalidate_post_direct_write---of 3
kiocb_write_and_wait---of 8
mapping_read_folio_gfp---of 1
mapping_seek_hole_data---of 114
mapping_seek_hole_data.cold---of 6
migration_entry_wait_on_locked---of 35
next_uptodate_folio67%of 27
page_cache_next_miss---of 18
page_cache_prev_miss---of 18
perf_trace_file_check_and_advance_wb_err---of 8
perf_trace_filemap_set_wb_err---of 8
perf_trace_mm_filemap_op_page_cache---of 10
read_cache_folio---of 1
read_cache_page---of 6
read_cache_page_gfp---of 6
replace_page_cache_folio---of 31
splice_folio_into_pipe---of 11
splice_folio_into_pipe.cold---of 2
trace_event_raw_event_file_check_and_advance_wb_err---of 9
trace_event_raw_event_filemap_set_wb_err---of 9
trace_event_raw_event_mm_filemap_op_page_cache---of 11
trace_raw_output_file_check_and_advance_wb_err---of 4
trace_raw_output_filemap_set_wb_err---of 5
trace_raw_output_mm_filemap_op_page_cache---of 4
wake_page_function---of 11
xas_next_entry82%of 16
xas_reload44%of 23
xas_reload.cold---of 1
-----------
SUMMARY48%of 354

-----------
SUMMARY---of 0

__copy_overflow---of 1
copy_from_kernel_nofault94%of 16
copy_from_user_nofault---of 5
copy_to_kernel_nofault---of 14
copy_to_user_nofault---of 6
strncpy_from_kernel_nofault---of 9
strncpy_from_user_nofault---of 8
strnlen_user_nofault---of 1
-----------
SUMMARY94%of 16

-----------
SUMMARY---of 0

drop_ref70%of 10
hidraw_connect---of 9
hidraw_disconnect100%of 1
hidraw_exit---of 1
hidraw_fasync---of 1
hidraw_get_report.isra.0---of 15
hidraw_ioctl19%of 38
hidraw_open---of 18
hidraw_poll---of 8
hidraw_read---of 19
hidraw_release---of 5
hidraw_report_event34%of 6
hidraw_send_report.isra.0---of 17
hidraw_write---of 1
-----------
SUMMARY31%of 55

-----------
SUMMARY---of 0

__bforget---of 5
__bh_read---of 8
__bh_read_batch---of 11
__block_commit_write---of 12
__block_write_begin---of 8
__block_write_begin_int---of 90
__block_write_begin_int.cold---of 4
__block_write_full_folio---of 59
__bread_gfp---of 13
__breadahead---of 8
__brelse---of 3
__find_get_block---of 63
__find_get_block.cold---of 2
__getblk_slow---of 29
__lock_buffer---of 2
__remove_assoc_queue---of 5
__sync_dirty_buffer---of 14
__wait_on_buffer---of 2
alloc_buffer_head---of 6
alloc_page_buffers---of 9
bdev_getblk---of 6
bh_uptodate_or_lock---of 7
block_commit_write---of 8
block_dirty_folio---of 8
block_invalidate_folio---of 23
block_invalidate_folio.cold---of 2
block_is_partially_uptodate---of 13
block_is_partially_uptodate.cold---of 2
block_page_mkwrite---of 19
block_page_mkwrite.cold---of 1
block_read_full_folio---of 33
block_truncate_page---of 30
block_truncate_page.cold---of 3
block_write_begin---of 23
block_write_end---of 13
block_write_full_folio---of 12
block_write_full_folio.cold---of 3
buffer_check_dirty_writeback---of 10
buffer_exit_cpu_dead---of 5
buffer_io_error---of 3
clean_bdev_aliases---of 26
clean_bdev_aliases.cold---of 3
const_folio_flags.constprop.0---of 10
cont_write_begin---of 27
cont_write_begin.cold---of 3
create_empty_buffers---of 15
drop_buffers.constprop.0---of 12
end_bio_bh_io_sync---of 3
end_buffer_async_read---of 15
end_buffer_async_read_io---of 1
end_buffer_async_write---of 13
end_buffer_read_sync---of 5
end_buffer_write_sync---of 5
folio_alloc_buffers---of 37
folio_alloc_buffers.cold---of 2
folio_create_buffers---of 7
folio_create_buffers.cold---of 1
folio_flags.constprop.0---of 10
folio_init_buffers---of 15
folio_init_buffers.cold---of 1
folio_set_bh---of 5
folio_set_bh.cold---of 1
folio_zero_new_buffers---of 12
free_buffer_head---of 6
fsync_buffers_list---of 36
generic_block_bmap---of 2
generic_block_bmap.cold---of 1
generic_buffers_fsync---of 4
generic_buffers_fsync_noflush---of 14
generic_cont_expand_simple---of 5
generic_write_end---of 18
has_bh_in_lru---of 5
inode_has_buffers100%of 1
invalidate_bh_lru---of 6
invalidate_bh_lrus---of 1
invalidate_bh_lrus_cpu---of 7
invalidate_inode_buffers---of 8
mark_buffer_async_write---of 2
mark_buffer_async_write_endio.constprop.0---of 2
mark_buffer_dirty---of 24
mark_buffer_dirty_inode---of 10
mark_buffer_write_io_error---of 11
recalc_bh_state.part.0---of 7
remove_inode_buffers---of 6
submit_bh---of 1
submit_bh_wbc---of 24
submit_bh_wbc.cold---of 1
sync_dirty_buffer---of 1
sync_mapping_buffers---of 5
touch_buffer---of 16
try_to_free_buffers---of 12
unlock_buffer---of 1
write_boundary_block---of 4
write_dirty_buffer---of 6
zero_user_segments---of 15
zero_user_segments.cold---of 2
-----------
SUMMARY100%of 1

blake2s_final---of 1
blake2s_update65%of 17
-----------
SUMMARY65%of 17

-----------
SUMMARY---of 0

__blk_mq_alloc_disk---of 7
__blk_mq_alloc_driver_tag---of 21
__blk_mq_alloc_map_and_rqs---of 4
__blk_mq_alloc_requests---of 91
__blk_mq_alloc_requests.cold---of 2
__blk_mq_complete_request_remote---of 1
__blk_mq_end_request---of 21
__blk_mq_flush_plug_list---of 2
__blk_mq_free_map_and_rqs100%of 4
__blk_mq_free_request---of 14
__blk_mq_issue_directly---of 6
__blk_mq_requeue_request---of 26
__blk_mq_unfreeze_queue---of 7
blk_account_io_completion.part.0---of 7
blk_account_io_done---of 27
blk_account_io_start---of 22
blk_add_rq_to_plug---of 19
blk_complete_reqs---of 2
blk_done_softirq---of 1
blk_dump_rq_flags---of 9
blk_end_sync_rq---of 1
blk_execute_rq---of 13
blk_execute_rq_nowait---of 9
blk_freeze_queue---of 1
blk_freeze_queue_start---of 4
blk_hctx_poll---of 14
blk_insert_cloned_request---of 38
blk_mq_alloc_and_init_hctx---of 36
blk_mq_alloc_disk_for_queue---of 5
blk_mq_alloc_map_and_rqs---of 45
blk_mq_alloc_queue---of 5
blk_mq_alloc_request---of 32
blk_mq_alloc_request_hctx---of 45
blk_mq_alloc_sq_tag_set---of 1
blk_mq_alloc_tag_set---of 54
blk_mq_cancel_work_sync---of 3
blk_mq_check_expired---of 9
blk_mq_check_inflight---of 8
blk_mq_complete_request---of 2
blk_mq_complete_request_remote---of 20
blk_mq_delay_kick_requeue_list---of 1
blk_mq_delay_run_hw_queue---of 34
blk_mq_delay_run_hw_queues---of 12
blk_mq_dequeue_from_ctx---of 30
blk_mq_dequeue_from_ctx.cold---of 8
blk_mq_destroy_queue---of 5
blk_mq_dispatch_rq_list---of 96
blk_mq_dispatch_wake---of 5
blk_mq_end_request---of 3
blk_mq_end_request_batch---of 102
blk_mq_exit_hctx---of 20
blk_mq_exit_queue---of 10
blk_mq_flush_busy_ctxs---of 21
blk_mq_flush_busy_ctxs.cold---of 7
blk_mq_flush_plug_list---of 2
blk_mq_flush_plug_list.part.0---of 131
blk_mq_free_map_and_rqs---of 2
blk_mq_free_plug_rqs---of 3
blk_mq_free_request---of 13
blk_mq_free_rq_map---of 1
blk_mq_free_rqs54%of 28
blk_mq_free_rqs.cold---of 1
blk_mq_free_tag_set67%of 9
blk_mq_freeze_queue---of 1
blk_mq_freeze_queue_wait---of 6
blk_mq_freeze_queue_wait_timeout---of 12
blk_mq_get_budget_and_tag---of 11
blk_mq_get_sq_hctx---of 2
blk_mq_handle_expired---of 12
blk_mq_has_request---of 4
blk_mq_hctx_has_pending---of 7
blk_mq_hctx_mark_pending---of 6
blk_mq_hctx_mark_pending.cold---of 4
blk_mq_hctx_notify_dead---of 16
blk_mq_hctx_notify_dead.cold---of 2
blk_mq_hctx_notify_offline---of 52
blk_mq_hctx_notify_online---of 8
blk_mq_in_flight---of 1
blk_mq_in_flight_rw---of 1
blk_mq_init_allocated_queue---of 38
blk_mq_insert_request---of 38
blk_mq_kick_requeue_list---of 1
blk_mq_map_swqueue---of 40
blk_mq_plug_issue_direct---of 19
blk_mq_poll---of 1
blk_mq_put_rq_ref---of 7
blk_mq_queue_inflight---of 1
blk_mq_quiesce_queue---of 6
blk_mq_quiesce_queue_nowait---of 3
blk_mq_quiesce_tagset---of 10
blk_mq_realloc_hw_ctxs---of 20
blk_mq_release---of 9
blk_mq_request_issue_directly---of 7
blk_mq_requeue_request---of 7
blk_mq_requeue_work---of 21
blk_mq_rq_cpu---of 1
blk_mq_rq_ctx_init.isra.0---of 10
blk_mq_rq_inflight---of 8
blk_mq_run_hw_queue---of 45
blk_mq_run_hw_queues---of 11
blk_mq_run_work_fn---of 15
blk_mq_start_hw_queue---of 1
blk_mq_start_hw_queues---of 3
blk_mq_start_request---of 29
blk_mq_start_stopped_hw_queue---of 2
blk_mq_start_stopped_hw_queues---of 5
blk_mq_stop_hw_queue---of 1
blk_mq_stop_hw_queues---of 3
blk_mq_submit_bio---of 123
blk_mq_timeout_work---of 33
blk_mq_try_issue_directly---of 13
blk_mq_try_issue_list_directly---of 17
blk_mq_unfreeze_queue---of 1
blk_mq_unquiesce_queue---of 5
blk_mq_unquiesce_tagset---of 5
blk_mq_update_nr_hw_queues---of 61
blk_mq_update_nr_requests---of 26
blk_mq_update_queue_map---of 15
blk_mq_update_tag_set_shared---of 6
blk_mq_wait_quiesce_done---of 3
blk_mq_wake_waiters---of 6
blk_rq_init---of 6
blk_rq_is_poll---of 4
blk_rq_poll---of 22
blk_rq_prep_clone---of 18
blk_rq_unprep_clone---of 2
blk_softirq_cpu_dead---of 1
blk_steal_bios---of 6
blk_update_request---of 85
cpu_online---of 3
queue_set_hctx_shared---of 8
trace_block_plug---of 15
trace_block_unplug---of 15
-----------
SUMMARY61%of 41

-----------
SUMMARY---of 0

attach_rules---of 8
dump_rules---of 18
fib_default_rule_add---of 4
fib_nl2rule.constprop.0.isra.0---of 88
fib_nl_delrule---of 104
fib_nl_dumprule---of 36
fib_nl_fill_rule---of 55
fib_nl_newrule---of 82
fib_rule_matchall---of 12
fib_rules_dump---of 6
fib_rules_event40%of 23
fib_rules_lookup---of 61
fib_rules_net_exit---of 2
fib_rules_net_init---of 1
fib_rules_register---of 17
fib_rules_seq_read---of 6
fib_rules_unregister---of 16
lookup_rules_ops---of 22
notify_rule_change---of 9
-----------
SUMMARY40%of 23

__kernfs_create_file75%of 12
kernfs_deref_open_node_locked---of 6
kernfs_drain_open_files---of 10
kernfs_fop_llseek---of 7
kernfs_fop_mmap---of 14
kernfs_fop_open---of 49
kernfs_fop_poll---of 9
kernfs_fop_read_iter---of 18
kernfs_fop_release---of 3
kernfs_fop_write_iter---of 20
kernfs_generic_poll---of 6
kernfs_notify50%of 22
kernfs_notify_workfn---of 18
kernfs_ops---of 5
kernfs_release_file---of 5
kernfs_seq_next---of 7
kernfs_seq_show---of 1
kernfs_seq_start---of 8
kernfs_seq_stop---of 5
kernfs_should_drain_open_files50%of 20
kernfs_unlink_open_file---of 15
kernfs_vma_access---of 6
kernfs_vma_fault---of 7
kernfs_vma_open---of 5
kernfs_vma_page_mkwrite---of 8
of_on---of 6
-----------
SUMMARY56%of 54

call_blocking_lsm_notifier---of 1
inode_free_by_rcu---of 1
lsm_append.constprop.0---of 10
lsm_fill_user_ctx---of 12
lsm_inode_alloc---of 3
register_blocking_lsm_notifier---of 1
security_audit_rule_free---of 2
security_audit_rule_init---of 4
security_audit_rule_known---of 5
security_audit_rule_match---of 4
security_binder_set_context_mgr---of 5
security_binder_transaction---of 5
security_binder_transfer_binder---of 5
security_binder_transfer_file---of 4
security_bpf---of 4
security_bpf_map---of 5
security_bpf_map_create---of 4
security_bpf_map_free---of 2
security_bpf_prog---of 5
security_bpf_prog_free---of 2
security_bpf_prog_load---of 4
security_bpf_token_capable---of 5
security_bpf_token_cmd---of 5
security_bpf_token_create---of 4
security_bpf_token_free---of 2
security_bprm_check---of 5
security_bprm_committed_creds---of 2
security_bprm_committing_creds---of 2
security_bprm_creds_for_exec---of 5
security_bprm_creds_from_file---of 5
security_capable100%of 4
security_capget---of 4
security_capset---of 4
security_create_user_ns---of 5
security_cred_alloc_blank---of 14
security_cred_free---of 5
security_cred_getsecid---of 2
security_current_getsecid_subj100%of 2
security_d_instantiate75%of 4
security_dentry_create_files_as---of 4
security_dentry_init_security---of 4
security_file_alloc54%of 13
security_file_fcntl---of 4
security_file_free100%of 4
security_file_ioctl100%of 4
security_file_ioctl_compat---of 4
security_file_lock---of 5
security_file_mprotect---of 4
security_file_open80%of 5
security_file_permission80%of 5
security_file_post_open80%of 5
security_file_receive---of 5
security_file_release100%of 2
security_file_send_sigiotask---of 4
security_file_set_fowner---of 2
security_file_truncate80%of 5
security_free_mnt_opts---of 5
security_fs_context_dup---of 5
security_fs_context_parse_param---of 5
security_fs_context_submount---of 5
security_getprocattr---of 10
security_getselfattr---of 24
security_inet_conn_established---of 2
security_inet_conn_request---of 4
security_inet_csk_clone---of 2
security_inode_alloc54%of 13
security_inode_copy_up---of 5
security_inode_copy_up_xattr---of 5
security_inode_create50%of 6
security_inode_follow_link50%of 6
security_inode_free100%of 4
security_inode_get_acl---of 5
security_inode_getattr---of 6
security_inode_getsecctx---of 4
security_inode_getsecid---of 2
security_inode_getsecurity---of 6
security_inode_getxattr---of 6
security_inode_init_security65%of 17
security_inode_init_security_anon---of 4
security_inode_invalidate_secctx---of 2
security_inode_killpriv---of 5
security_inode_link---of 5
security_inode_listsecurity---of 5
security_inode_listxattr---of 6
security_inode_mkdir---of 6
security_inode_mknod---of 6
security_inode_need_killpriv80%of 5
security_inode_notifysecctx---of 4
security_inode_permission50%of 6
security_inode_post_create_tmpfile---of 3
security_inode_post_remove_acl---of 3
security_inode_post_removexattr---of 3
security_inode_post_set_acl---of 3
security_inode_post_setattr100%of 3
security_inode_post_setxattr---of 3
security_inode_readlink---of 6
security_inode_remove_acl---of 5
security_inode_removexattr---of 8
security_inode_rename---of 13
security_inode_rmdir---of 6
security_inode_set_acl---of 5
security_inode_setattr100%of 5
security_inode_setsecctx---of 4
security_inode_setsecurity---of 5
security_inode_setxattr---of 8
security_inode_symlink---of 5
security_inode_unlink---of 6
security_ipc_getsecid---of 2
security_ipc_permission---of 6
security_ismaclabel---of 5
security_kernel_act_as---of 5
security_kernel_create_files_as---of 5
security_kernel_load_data---of 6
security_kernel_module_request---of 5
security_kernel_post_load_data---of 4
security_kernel_post_read_file---of 4
security_kernel_read_file---of 5
security_kernfs_init_security40%of 5
security_key_alloc---of 4
security_key_free---of 2
security_key_getsecurity---of 5
security_key_permission---of 4
security_key_post_create_or_update---of 3
security_locked_down---of 5
security_mmap_addr---of 5
security_mmap_file---of 10
security_move_mount---of 5
security_mptcp_add_subflow---of 5
security_msg_msg_alloc---of 12
security_msg_msg_free---of 3
security_msg_queue_alloc---of 12
security_msg_queue_associate---of 5
security_msg_queue_free---of 3
security_msg_queue_msgctl---of 5
security_msg_queue_msgrcv---of 4
security_msg_queue_msgsnd---of 4
security_netlink_send---of 5
security_path_chmod---of 7
security_path_chown---of 5
security_path_chroot---of 5
security_path_link---of 5
security_path_mkdir---of 6
security_path_mknod100%of 6
security_path_notify---of 4
security_path_post_mknod---of 3
security_path_rename---of 7
security_path_rmdir---of 6
security_path_symlink---of 5
security_path_truncate---of 6
security_path_unlink---of 6
security_perf_event_alloc---of 5
security_perf_event_free---of 2
security_perf_event_open---of 5
security_perf_event_read---of 5
security_perf_event_write---of 5
security_prepare_creds---of 14
security_ptrace_access_check---of 5
security_ptrace_traceme---of 5
security_quota_on---of 5
security_quotactl---of 4
security_release_secctx---of 2
security_req_classify_flow---of 2
security_sb_alloc---of 12
security_sb_clone_mnt_opts---of 4
security_sb_delete---of 2
security_sb_eat_lsm_opts---of 5
security_sb_free---of 3
security_sb_kern_mount---of 5
security_sb_mnt_opts_compat---of 5
security_sb_mount---of 4
security_sb_pivotroot---of 5
security_sb_remount---of 5
security_sb_set_mnt_opts---of 5
security_sb_show_options---of 5
security_sb_statfs---of 5
security_sb_umount---of 5
security_sctp_assoc_established---of 5
security_sctp_assoc_request---of 5
security_sctp_bind_connect---of 4
security_sctp_sk_clone---of 2
security_secctx_to_secid---of 4
security_secid_to_secctx---of 4
security_secmark_refcount_dec---of 2
security_secmark_refcount_inc---of 2
security_secmark_relabel_packet---of 5
security_sem_alloc---of 12
security_sem_associate---of 5
security_sem_free---of 3
security_sem_semctl---of 5
security_sem_semop---of 4
security_setprocattr---of 10
security_setselfattr---of 17
security_settime64---of 5
security_shm_alloc---of 12
security_shm_associate---of 5
security_shm_free---of 3
security_shm_shmat---of 4
security_shm_shmctl---of 5
security_sk_alloc---of 4
security_sk_classify_flow---of 3
security_sk_clone---of 2
security_sk_free---of 2
security_skb_classify_flow---of 7
security_sock_graft---of 2
security_sock_rcv_skb80%of 5
security_socket_accept---of 5
security_socket_bind---of 4
security_socket_connect---of 4
security_socket_create---of 4
security_socket_getpeername---of 5
security_socket_getpeersec_dgram---of 4
security_socket_getpeersec_stream---of 4
security_socket_getsockname---of 5
security_socket_getsockopt---of 4
security_socket_listen---of 5
security_socket_post_create---of 4
security_socket_recvmsg---of 4
security_socket_sendmsg---of 4
security_socket_setsockopt---of 4
security_socket_shutdown---of 5
security_socket_socketpair---of 5
security_syslog---of 5
security_task_alloc---of 12
security_task_fix_setgid---of 4
security_task_fix_setgroups---of 5
security_task_fix_setuid---of 4
security_task_free---of 3
security_task_getioprio---of 5
security_task_getpgid---of 5
security_task_getscheduler---of 5
security_task_getsecid_obj---of 2
security_task_getsid---of 5
security_task_kill---of 4
security_task_movememory---of 5
security_task_prctl---of 5
security_task_prlimit---of 4
security_task_setioprio---of 5
security_task_setnice---of 5
security_task_setpgid---of 5
security_task_setrlimit---of 4
security_task_setscheduler---of 5
security_task_to_inode---of 2
security_transfer_creds---of 2
security_tun_dev_alloc_security---of 5
security_tun_dev_attach---of 5
security_tun_dev_attach_queue---of 5
security_tun_dev_create---of 5
security_tun_dev_free_security---of 2
security_tun_dev_open---of 5
security_unix_may_send---of 5
security_unix_stream_connect---of 4
security_uring_cmd---of 5
security_uring_override_creds---of 5
security_uring_sqpoll---of 5
security_vm_enough_memory_mm100%of 4
security_xfrm_decode_session---of 5
security_xfrm_policy_alloc---of 4
security_xfrm_policy_clone---of 5
security_xfrm_policy_delete---of 5
security_xfrm_policy_free---of 2
security_xfrm_policy_lookup---of 5
security_xfrm_state_alloc---of 5
security_xfrm_state_alloc_acquire---of 4
security_xfrm_state_delete---of 5
security_xfrm_state_free---of 2
security_xfrm_state_pol_flow_match---of 3
unregister_blocking_lsm_notifier---of 1
-----------
SUMMARY74%of 138

corsair_event---of 7
corsair_input_mapping---of 40
corsair_mouse_report_fixup---of 9
corsair_probe---of 32
corsair_remove60%of 5
k90_backlight_get---of 8
k90_backlight_work---of 3
k90_brightness_set---of 1
k90_record_led_get---of 1
k90_record_led_work---of 4
k90_show_current_profile---of 8
k90_show_macro_mode---of 8
k90_store_current_profile---of 6
k90_store_macro_mode---of 6
-----------
SUMMARY60%of 5

__phys_addr75%of 8
__phys_addr.cold---of 1
__phys_addr_symbol67%of 3
__virt_addr_valid59%of 34
__virt_addr_valid.cold---of 1
-----------
SUMMARY63%of 45

__d_path---of 6
__dentry_path---of 37
__do_sys_getcwd---of 38
__ia32_sys_getcwd---of 1
__x64_sys_getcwd---of 1
d_absolute_path100%of 6
d_path---of 34
dentry_path---of 7
dentry_path_raw---of 3
dynamic_dname---of 4
prepend80%of 5
prepend_copy50%of 4
prepend_path56%of 74
simple_dname---of 7
-----------
SUMMARY60%of 89

usb_phy_roothub_add_phy_by_name---of 6
usb_phy_roothub_alloc---of 12
usb_phy_roothub_alloc_usb3_phy---of 5
usb_phy_roothub_calibrate34%of 6
usb_phy_roothub_exit---of 5
usb_phy_roothub_init---of 7
usb_phy_roothub_notify_connect34%of 6
usb_phy_roothub_notify_disconnect34%of 6
usb_phy_roothub_power_off---of 3
usb_phy_roothub_power_on---of 7
usb_phy_roothub_resume---of 10
usb_phy_roothub_set_mode---of 7
usb_phy_roothub_suspend---of 9
-----------
SUMMARY34%of 18

-----------
SUMMARY---of 0

__bitmap_and---of 5
__bitmap_andnot---of 5
__bitmap_clear100%of 4
__bitmap_complement---of 2
__bitmap_equal---of 6
__bitmap_intersects---of 6
__bitmap_or---of 2
__bitmap_or_equal---of 6
__bitmap_replace---of 2
__bitmap_set---of 4
__bitmap_shift_left---of 9
__bitmap_shift_right---of 13
__bitmap_subset---of 6
__bitmap_weight---of 5
__bitmap_weight_and---of 5
__bitmap_weight_andnot---of 5
__bitmap_xor---of 2
bitmap_alloc100%of 1
bitmap_alloc_node---of 1
bitmap_bitremap---of 6
bitmap_cut---of 11
bitmap_find_next_zero_area_off---of 5
bitmap_fold---of 4
bitmap_free100%of 1
bitmap_from_arr32---of 6
bitmap_onto---of 6
bitmap_pos_to_ord---of 4
bitmap_remap---of 10
bitmap_to_arr32---of 6
bitmap_zalloc100%of 1
bitmap_zalloc_node---of 1
devm_bitmap_alloc---of 4
devm_bitmap_free100%of 1
devm_bitmap_zalloc---of 1
-----------
SUMMARY100%of 8

-----------
SUMMARY---of 0

__blkcg_rstat_flush.isra.0---of 20
__blkg_prfill_u64---of 5
__blkg_release---of 10
bio_associate_blkg---of 17
bio_associate_blkg_from_css---of 103
bio_blkcg_css---of 5
bio_clone_blkg_association---of 4
blk_cgroup_bio_start---of 10
blk_cgroup_congested60%of 15
blkcg_activate_policy---of 56
blkcg_add_delay---of 3
blkcg_css.part.070%of 10
blkcg_css_alloc---of 25
blkcg_css_free---of 7
blkcg_css_offline---of 1
blkcg_css_online---of 7
blkcg_deactivate_policy---of 12
blkcg_exit---of 3
blkcg_exit_disk---of 1
blkcg_free_all_cpd---of 4
blkcg_get_cgwb_list---of 1
blkcg_init_disk---of 24
blkcg_iostat_update---of 6
blkcg_maybe_throttle_current3%of 69
blkcg_pin_online---of 5
blkcg_policy_enabled---of 3
blkcg_policy_register---of 25
blkcg_policy_unregister---of 10
blkcg_print_blkgs---of 18
blkcg_print_stat---of 43
blkcg_reset_stats---of 15
blkcg_rstat_flush---of 2
blkcg_scale_delay---of 10
blkcg_schedule_throttle---of 10
blkcg_unpin_online---of 11
blkg_alloc---of 22
blkg_conf_exit---of 4
blkg_conf_init---of 1
blkg_conf_open_bdev---of 10
blkg_conf_prep---of 73
blkg_create---of 83
blkg_destroy---of 19
blkg_destroy_all.isra.0---of 10
blkg_dev_name---of 3
blkg_free.part.0---of 1
blkg_free_workfn---of 9
blkg_init_queue---of 1
blkg_release---of 1
percpu_ref_put_many.constprop.0---of 15
-----------
SUMMARY20%of 94

__cpu_to_node60%of 5
__node_distance---of 6
__nodes_weight.constprop.0---of 1
cpumask_of_node---of 5
debug_cpumask_set_cpu---of 9
early_cpu_to_node---of 9
numa_add_cpu---of 1
numa_clear_node---of 1
numa_cpu_node---of 6
numa_remove_cpu---of 1
numa_set_node---of 8
-----------
SUMMARY60%of 5

picolcd_check_lcd_fb---of 3
picolcd_exit_lcd100%of 1
picolcd_get_contrast---of 1
picolcd_init_lcd---of 11
picolcd_resume_lcd---of 3
picolcd_set_contrast---of 7
-----------
SUMMARY100%of 1

__usb_queue_reset_device---of 3
__usb_wireless_status_intf---of 3
cdc_parse_cdc_header---of 43
create_intf_ep_devs.isra.0---of 5
driver_set_config_work---of 5
sg_complete---of 23
usb_api_blocking_completion100%of 1
usb_authorize_interface---of 2
usb_bulk_msg72%of 14
usb_cache_string90%of 10
usb_clear_halt34%of 9
usb_control_msg88%of 8
usb_control_msg_recv75%of 8
usb_control_msg_send100%of 4
usb_deauthorize_interface---of 3
usb_disable_device74%of 23
usb_disable_device_endpoints50%of 6
usb_disable_endpoint79%of 19
usb_disable_interface48%of 21
usb_driver_set_configuration---of 5
usb_enable_endpoint100%of 7
usb_enable_interface88%of 8
usb_get_descriptor100%of 8
usb_get_device_descriptor100%of 6
usb_get_status70%of 10
usb_get_string100%of 8
usb_if_uevent100%of 3
usb_interrupt_msg---of 1
usb_release_interface67%of 6
usb_reset_configuration---of 21
usb_reset_endpoint---of 5
usb_set_configuration9%of 92
usb_set_interface11%of 38
usb_set_isoch_delay---of 5
usb_set_wireless_status---of 3
usb_sg_cancel---of 11
usb_sg_init---of 31
usb_sg_wait---of 20
usb_start_wait_urb84%of 12
usb_string85%of 20
usb_string_sub57%of 16
-----------
SUMMARY52%of 357

-----------
SUMMARY---of 0

____ip_mc_inc_group---of 52
__igmp_group_dropped16%of 39
__ip_mc_dec_group42%of 31
__ip_mc_inc_group---of 1
__ip_mc_join_group---of 23
add_grec---of 72
add_grhead---of 6
igmp_gq_timer_expire---of 5
igmp_group_added---of 23
igmp_ifc_event---of 18
igmp_ifc_timer_expire---of 85
igmp_mc_get_next.isra.0---of 29
igmp_mc_seq_next---of 18
igmp_mc_seq_show---of 18
igmp_mc_seq_start---of 26
igmp_mc_seq_stop---of 6
igmp_mcf_get_next.isra.0---of 29
igmp_mcf_seq_next---of 20
igmp_mcf_seq_show---of 4
igmp_mcf_seq_start---of 28
igmp_mcf_seq_stop---of 8
igmp_net_exit---of 2
igmp_net_init---of 6
igmp_netdev_event6%of 35
igmp_rcv---of 188
igmp_send_report.isra.0---of 16
igmp_start_timer---of 13
igmp_timer_expire---of 15
igmpv3_clear_delrec25%of 36
igmpv3_del_delrec---of 28
igmpv3_newpack---of 46
igmpv3_send_report.isra.0---of 33
igmpv3_sendpack.isra.0---of 10
ip_check_mc_rcu---of 47
ip_ma_put55%of 11
ip_mc_add_src---of 71
ip_mc_check_igmp---of 59
ip_mc_clear_src50%of 4
ip_mc_del1_src---of 28
ip_mc_del_src.isra.0---of 52
ip_mc_destroy_dev46%of 11
ip_mc_down46%of 22
ip_mc_drop_socket---of 12
ip_mc_find_dev---of 13
ip_mc_gsfget---of 36
ip_mc_inc_group---of 1
ip_mc_init_dev---of 4
ip_mc_join_group---of 1
ip_mc_join_group_ssm---of 1
ip_mc_leave_group---of 23
ip_mc_leave_src.isra.0---of 8
ip_mc_msfget---of 41
ip_mc_msfilter---of 40
ip_mc_remap---of 15
ip_mc_sf_allow---of 47
ip_mc_source---of 69
ip_mc_unmap---of 15
ip_mc_up---of 15
ip_mc_validate_checksum---of 12
is_in---of 24
sf_markstate---of 7
sf_setstate---of 30
unsolicited_report_interval---of 11
-----------
SUMMARY29%of 189

-----------
SUMMARY---of 0

con_allocate_new---of 4
con_clear_unimap---of 5
con_copy_unimap---of 8
con_free_unimap---of 3
con_get_trans_new---of 2
con_get_trans_old---of 6
con_get_unimap---of 20
con_insert_unipair---of 7
con_release_unimap---of 8
con_set_default_unimap---of 23
con_set_trans_new---of 3
con_set_trans_old---of 5
con_set_unimap---of 29
con_unify_unimap---of 20
conv_8bit_to_uni60%of 5
conv_uni_to_8bit---of 5
conv_uni_to_pc---of 13
inverse_translate---of 8
set_inverse_trans_unicode---of 14
set_inverse_transl---of 8
set_translate---of 1
update_user_maps---of 5
-----------
SUMMARY60%of 5

ctrl_audio_modes_present_get---of 7
ctrl_channel_get---of 4
ctrl_channel_set---of 16
ctrl_channelfreq_get---of 3
ctrl_channelfreq_set---of 7
ctrl_channelprog_get---of 1
ctrl_channelprog_set---of 3
ctrl_check_input---of 3
ctrl_cleardirty_audiomode---of 1
ctrl_cleardirty_balance---of 1
ctrl_cleardirty_bass---of 1
ctrl_cleardirty_brightness---of 1
ctrl_cleardirty_contrast---of 1
ctrl_cleardirty_croph---of 1
ctrl_cleardirty_cropl---of 1
ctrl_cleardirty_cropt---of 1
ctrl_cleardirty_cropw---of 1
ctrl_cleardirty_hue---of 1
ctrl_cleardirty_input---of 1
ctrl_cleardirty_mute---of 1
ctrl_cleardirty_res_hor---of 1
ctrl_cleardirty_res_ver---of 1
ctrl_cleardirty_saturation---of 1
ctrl_cleardirty_srate---of 1
ctrl_cleardirty_treble---of 1
ctrl_cleardirty_volume---of 1
ctrl_croph_max_get---of 7
ctrl_cropl_max_get---of 5
ctrl_cropl_min_get---of 4
ctrl_cropt_max_get---of 5
ctrl_cropt_min_get---of 4
ctrl_cropw_max_get---of 7
ctrl_cx2341x_clear_dirty---of 1
ctrl_cx2341x_get---of 3
ctrl_cx2341x_getv4lflags---of 6
ctrl_cx2341x_is_dirty---of 1
ctrl_cx2341x_set---of 6
ctrl_freq_clear_dirty---of 1
ctrl_freq_get---of 4
ctrl_freq_is_dirty---of 1
ctrl_freq_max_get---of 8
ctrl_freq_min_get---of 8
ctrl_freq_set---of 10
ctrl_get_audiomode---of 1
ctrl_get_balance---of 1
ctrl_get_bass---of 1
ctrl_get_brightness---of 1
ctrl_get_contrast---of 1
ctrl_get_cropcapbh---of 4
ctrl_get_cropcapbl---of 4
ctrl_get_cropcapbt---of 4
ctrl_get_cropcapbw---of 4
ctrl_get_cropcapdh---of 4
ctrl_get_cropcapdl---of 4
ctrl_get_cropcapdt---of 4
ctrl_get_cropcapdw---of 4
ctrl_get_cropcappad---of 4
ctrl_get_cropcappan---of 4
ctrl_get_croph---of 1
ctrl_get_cropl---of 1
ctrl_get_cropt---of 1
ctrl_get_cropw---of 1
ctrl_get_hue---of 1
ctrl_get_input---of 1
ctrl_get_mute---of 1
ctrl_get_res_hor---of 1
ctrl_get_res_ver---of 1
ctrl_get_saturation---of 1
ctrl_get_srate---of 1
ctrl_get_treble---of 1
ctrl_get_volume---of 1
ctrl_hsm_get---of 5
ctrl_isdirty_audiomode---of 1
ctrl_isdirty_balance---of 1
ctrl_isdirty_bass---of 1
ctrl_isdirty_brightness---of 1
ctrl_isdirty_contrast---of 1
ctrl_isdirty_croph---of 1
ctrl_isdirty_cropl---of 1
ctrl_isdirty_cropt---of 1
ctrl_isdirty_cropw---of 1
ctrl_isdirty_hue---of 1
ctrl_isdirty_input---of 1
ctrl_isdirty_mute---of 1
ctrl_isdirty_res_hor---of 1
ctrl_isdirty_res_ver---of 1
ctrl_isdirty_saturation---of 1
ctrl_isdirty_srate---of 1
ctrl_isdirty_treble---of 1
ctrl_isdirty_volume---of 1
ctrl_masterstate_get---of 1
ctrl_set_audiomode---of 1
ctrl_set_balance---of 1
ctrl_set_bass---of 1
ctrl_set_brightness---of 1
ctrl_set_contrast---of 1
ctrl_set_croph---of 1
ctrl_set_cropl---of 1
ctrl_set_cropt---of 1
ctrl_set_cropw---of 1
ctrl_set_hue---of 1
ctrl_set_input---of 8
ctrl_set_mute---of 1
ctrl_set_res_hor---of 1
ctrl_set_res_ver---of 1
ctrl_set_saturation---of 1
ctrl_set_srate---of 1
ctrl_set_treble---of 1
ctrl_set_volume---of 1
ctrl_signal_get---of 1
ctrl_std_sym_to_val---of 6
ctrl_std_val_to_sym---of 1
ctrl_stdavail_get---of 1
ctrl_stdavail_set---of 3
ctrl_stdcur_clear_dirty---of 1
ctrl_stdcur_get---of 1
ctrl_stdcur_is_dirty---of 1
ctrl_stdcur_set---of 3
ctrl_stddetect_get---of 7
ctrl_streamingenabled_get---of 1
ctrl_vres_max_get---of 2
ctrl_vres_min_get---of 2
print_input_mask---of 6
pvr2_config_get_name---of 3
pvr2_ctl_read_complete---of 2
pvr2_ctl_timeout---of 5
pvr2_ctl_write_complete---of 2
pvr2_decoder_enable---of 20
pvr2_hdw_cmd_decoder_reset---of 14
pvr2_hdw_cmd_deep_reset---of 1
pvr2_hdw_cmd_powerup---of 1
pvr2_hdw_cmd_usbstream---of 13
pvr2_hdw_commit_ctl---of 3
pvr2_hdw_commit_execute.isra.0---of 56
pvr2_hdw_commit_setup---of 17
pvr2_hdw_cpufw_get---of 12
pvr2_hdw_cpufw_get_enabled---of 1
pvr2_hdw_cpufw_set_enabled---of 50
pvr2_hdw_cpureset_assert---of 2
pvr2_hdw_cpureset_assert.part.0---of 11
pvr2_hdw_create---of 106
pvr2_hdw_cx25840_vbi_hack---of 12
pvr2_hdw_decoder_stabilization_timeout---of 3
pvr2_hdw_destroy---of 11
pvr2_hdw_dev_ok---of 3
pvr2_hdw_device_reset---of 10
pvr2_hdw_disconnect88%of 16
pvr2_hdw_encoder_run_timeout---of 4
pvr2_hdw_encoder_wait_timeout---of 3
pvr2_hdw_execute_tuner_poll---of 1
pvr2_hdw_get_bus_info---of 1
pvr2_hdw_get_cropcap---of 4
pvr2_hdw_get_ctrl_by_id---of 6
pvr2_hdw_get_ctrl_by_index---of 4
pvr2_hdw_get_ctrl_count---of 1
pvr2_hdw_get_ctrl_nextv4l---of 8
pvr2_hdw_get_ctrl_v4l---of 6
pvr2_hdw_get_cur_freq---of 5
pvr2_hdw_get_desc---of 1
pvr2_hdw_get_dev---of 1
pvr2_hdw_get_device_identifier---of 1
pvr2_hdw_get_driver_name---of 1
pvr2_hdw_get_input_allowed---of 1
pvr2_hdw_get_input_available---of 1
pvr2_hdw_get_sn---of 1
pvr2_hdw_get_state---of 1
pvr2_hdw_get_streaming---of 1
pvr2_hdw_get_tuner_status---of 3
pvr2_hdw_get_type---of 1
pvr2_hdw_get_unit_number---of 1
pvr2_hdw_get_video_stream---of 1
pvr2_hdw_gpio_chg_dir---of 8
pvr2_hdw_gpio_chg_out---of 8
pvr2_hdw_gpio_get_dir---of 1
pvr2_hdw_gpio_get_in---of 1
pvr2_hdw_gpio_get_out---of 1
pvr2_hdw_initialize---of 205
pvr2_hdw_is_hsm---of 3
pvr2_hdw_quiescent_timeout---of 3
pvr2_hdw_render_useless---of 2
pvr2_hdw_render_useless.part.070%of 10
pvr2_hdw_report_clients---of 10
pvr2_hdw_report_unlocked---of 38
pvr2_hdw_set_input_allowed---of 14
pvr2_hdw_set_input_allowed.cold---of 2
pvr2_hdw_set_stream_type---of 10
pvr2_hdw_set_streaming---of 17
pvr2_hdw_set_v4l2_dev---of 1
pvr2_hdw_state_log_state---of 10
pvr2_hdw_state_report---of 5
pvr2_hdw_status_poll---of 9
pvr2_hdw_trigger_module_log---of 8
pvr2_hdw_untrip---of 10
pvr2_hdw_v4l_get_minor_number---of 8
pvr2_hdw_v4l_store_minor_number---of 4
pvr2_hdw_wait---of 13
pvr2_hdw_worker_poll---of 49
pvr2_issue_simple_cmd---of 13
pvr2_led_ctrl_hauppauge---of 10
pvr2_locate_firmware---of 22
pvr2_read_register---of 1
pvr2_send_request---of 1
pvr2_send_request_ex---of 92
pvr2_subdev_set_control---of 5
pvr2_subdev_update---of 118
pvr2_upload_firmware1---of 31
pvr2_upload_firmware2---of 44
pvr2_write_register---of 1
state_eval_decoder_run---of 31
state_eval_encoder_config---of 28
state_eval_encoder_ok---of 15
state_eval_encoder_run---of 22
state_eval_pathway_ok---of 24
state_eval_pipeline_config---of 5
state_eval_usbstream_run---of 28
state_update_pipeline_state.isra.0---of 9
-----------
SUMMARY81%of 26

-----------
SUMMARY---of 0

__irq_disable---of 11
__irq_do_set_handler---of 31
__irq_set_handler---of 3
__irq_startup---of 6
bad_chained_irq---of 4
handle_edge_irq23%of 35
handle_fasteoi_irq---of 32
handle_fasteoi_nmi---of 2
handle_level_irq---of 20
handle_nested_irq---of 8
handle_percpu_devid_fasteoi_nmi---of 2
handle_percpu_devid_irq---of 12
handle_percpu_irq---of 4
handle_simple_irq---of 9
handle_untracked_irq---of 9
irq_activate---of 4
irq_activate_and_startup---of 6
irq_chip_ack_parent100%of 1
irq_chip_compose_msi_msg---of 9
irq_chip_disable_parent---of 3
irq_chip_enable_parent---of 3
irq_chip_eoi_parent---of 1
irq_chip_get_parent_state---of 5
irq_chip_mask_ack_parent---of 1
irq_chip_mask_parent---of 1
irq_chip_pm_get---of 7
irq_chip_pm_put---of 5
irq_chip_release_resources_parent---of 2
irq_chip_request_resources_parent---of 3
irq_chip_retrigger_hierarchy---of 7
irq_chip_set_affinity_parent---of 3
irq_chip_set_parent_state---of 5
irq_chip_set_type_parent---of 3
irq_chip_set_vcpu_affinity_parent---of 3
irq_chip_set_wake_parent---of 4
irq_chip_unmask_parent---of 1
irq_disable---of 11
irq_enable---of 9
irq_get_irq_data100%of 3
irq_modify_status---of 17
irq_percpu_disable---of 6
irq_percpu_enable---of 6
irq_set_chained_handler_and_data---of 3
irq_set_chip---of 4
irq_set_chip_and_handler_name---of 6
irq_set_chip_data---of 3
irq_set_handler_data---of 3
irq_set_irq_type---of 3
irq_set_msi_desc---of 5
irq_set_msi_desc_off---of 6
irq_shutdown---of 5
irq_shutdown_and_deactivate---of 6
irq_startup---of 19
mask_irq---of 3
trace_irq_handler_entry---of 15
trace_irq_handler_exit---of 15
unmask_irq---of 3
unmask_threaded_irq---of 5
-----------
SUMMARY31%of 39

-----------
SUMMARY---of 0

__do_sys_capget---of 27
__do_sys_capset---of 15
__ia32_sys_capget---of 1
__ia32_sys_capset---of 1
__x64_sys_capget---of 1
__x64_sys_capset---of 1
cap_validate_magic---of 11
capable67%of 6
capable_wrt_inode_uidgid80%of 5
file_ns_capable---of 5
has_capability---of 1
has_capability_noaudit---of 1
has_ns_capability---of 16
has_ns_capability_noaudit---of 16
ns_capable67%of 6
ns_capable_noaudit---of 6
ns_capable_setid---of 6
privileged_wrt_inode_uidgid100%of 3
ptracer_capable---of 18
-----------
SUMMARY75%of 20

yurex_control_callback---of 2
yurex_delete86%of 7
yurex_disconnect84%of 6
yurex_fasync---of 1
yurex_interrupt---of 19
yurex_open70%of 10
yurex_probe---of 21
yurex_read100%of 4
yurex_release---of 7
yurex_write28%of 22
-----------
SUMMARY58%of 49

devm_led_trigger_register---of 6
devm_led_trigger_release---of 1
led_trigger_blink---of 1
led_trigger_blink_oneshot100%of 1
led_trigger_blink_setup45%of 18
led_trigger_event58%of 14
led_trigger_format---of 9
led_trigger_read---of 4
led_trigger_register---of 20
led_trigger_register_simple---of 5
led_trigger_remove---of 1
led_trigger_set47%of 26
led_trigger_set_default---of 10
led_trigger_snprintf---of 4
led_trigger_unregister78%of 9
led_trigger_unregister_simple100%of 3
led_trigger_write---of 11
-----------
SUMMARY55%of 71

-----------
SUMMARY---of 0

udl_drop_usb100%of 1
udl_free_urb_list84%of 6
udl_get_urb---of 1
udl_get_urb_locked42%of 17
udl_init---of 40
udl_select_std_channel---of 3
udl_submit_urb---of 5
udl_sync_pending_urbs---of 11
udl_urb_completion---of 6
-----------
SUMMARY55%of 24

cp210x_attach---of 94
cp210x_attach.cold---of 9
cp210x_break_ctl---of 7
cp210x_close---of 1
cp210x_disconnect100%of 2
cp210x_dtr_rts---of 3
cp210x_get_fw_version---of 5
cp210x_gpio_direction_get---of 2
cp210x_gpio_direction_get.cold---of 1
cp210x_gpio_direction_input---of 6
cp210x_gpio_direction_input.cold---of 2
cp210x_gpio_direction_output---of 2
cp210x_gpio_direction_output.cold---of 1
cp210x_gpio_get---of 6
cp210x_gpio_get.cold---of 1
cp210x_gpio_init_valid_mask---of 6
cp210x_gpio_set---of 12
cp210x_gpio_set.cold---of 2
cp210x_gpio_set_config---of 8
cp210x_gpio_set_config.cold---of 2
cp210x_open---of 9
cp210x_port_probe---of 3
cp210x_port_remove100%of 1
cp210x_process_lsr---of 13
cp210x_process_read_urb---of 37
cp210x_read_reg_block---of 3
cp210x_read_vendor_block---of 3
cp210x_release67%of 3
cp210x_set_termios---of 88
cp210x_tiocmget---of 4
cp210x_tiocmset---of 1
cp210x_tiocmset_port---of 23
cp210x_tx_empty---of 4
cp210x_write_reg_block---of 3
cp210x_write_u16_reg---of 3
cp210x_write_vendor_block.constprop.0---of 3
-----------
SUMMARY84%of 6

__clockevents_unbind---of 18
__clockevents_update_freq---of 11
cev_delta2ns---of 15
cev_delta2ns.cold---of 5
clockevent_delta2ns---of 1
clockevents_config.part.0---of 5
clockevents_config_and_register---of 3
clockevents_exchange_device---of 18
clockevents_program_event67%of 15
clockevents_program_event.cold---of 1
clockevents_program_min_delta29%of 14
clockevents_program_min_delta.cold---of 1
clockevents_register_device---of 16
clockevents_resume---of 5
clockevents_shutdown---of 6
clockevents_suspend---of 5
clockevents_switch_state---of 23
clockevents_tick_resume---of 3
clockevents_unbind_device---of 1
clockevents_update_freq---of 12
current_device_show---of 7
tick_cleanup_dead_cpu---of 18
tick_offline_cpu---of 1
unbind_device_store---of 14
-----------
SUMMARY49%of 29

async_set_reg_cb---of 4
fill_skb_pool---of 4
intr_callback---of 21
read_bulk_callback---of 25
rtl8150_close---of 5
rtl8150_disconnect50%of 4
rtl8150_get_drvinfo---of 1
rtl8150_get_link_ksettings---of 7
rtl8150_open---of 26
rtl8150_probe---of 21
rtl8150_reset---of 5
rtl8150_resume---of 4
rtl8150_set_mac_address---of 5
rtl8150_set_multicast---of 17
rtl8150_siocdevprivate---of 14
rtl8150_start_xmit---of 15
rtl8150_suspend---of 4
rtl8150_tx_timeout---of 1
rx_fixup---of 14
write_bulk_callback---of 10
-----------
SUMMARY50%of 4

get_stack_info62%of 13
get_stack_info.cold---of 3
stack_type_name---of 7
-----------
SUMMARY62%of 13

-----------
SUMMARY---of 0

__handle_link_change---of 6
__usbnet_read_cmd---of 13
__usbnet_write_cmd---of 9
defer_bh100%of 4
intr_complete---of 10
rx_alloc_submit---of 5
rx_complete59%of 31
rx_submit32%of 32
tx_complete40%of 15
unlink_urbs19%of 11
usbnet_async_cmd_cb---of 4
usbnet_bh---of 41
usbnet_bh_tasklet---of 1
usbnet_change_mtu---of 6
usbnet_defer_kevent---of 5
usbnet_deferred_kevent---of 48
usbnet_device_suggests_idle---of 2
usbnet_disconnect45%of 9
usbnet_get_drvinfo---of 9
usbnet_get_endpoints---of 26
usbnet_get_ethernet_addr---of 7
usbnet_get_link---of 8
usbnet_get_link_ksettings_internal---of 4
usbnet_get_link_ksettings_mii---of 3
usbnet_get_msglevel---of 1
usbnet_link_change---of 6
usbnet_manage_power100%of 1
usbnet_nway_reset---of 3
usbnet_open---of 32
usbnet_pause_rx---of 3
usbnet_probe---of 74
usbnet_purge_paused_rxq---of 1
usbnet_read_cmd---of 3
usbnet_read_cmd_nopm---of 1
usbnet_resume---of 26
usbnet_resume_rx---of 8
usbnet_set_link_ksettings_mii---of 5
usbnet_set_msglevel---of 1
usbnet_set_rx_mode---of 1
usbnet_skb_return---of 11
usbnet_start_xmit---of 93
usbnet_status_start---of 8
usbnet_status_stop---of 2
usbnet_status_stop.part.0---of 7
usbnet_stop64%of 19
usbnet_suspend---of 10
usbnet_terminate_urbs50%of 4
usbnet_tx_timeout---of 4
usbnet_unlink_rx_urbs---of 3
usbnet_update_max_qlen---of 6
usbnet_write_cmd---of 3
usbnet_write_cmd_async---of 12
usbnet_write_cmd_nopm---of 1
wait_skb_queue_empty67%of 3
-----------
SUMMARY48%of 129

-----------
SUMMARY---of 0

bridges_open---of 1
bridges_show---of 13
connector_open---of 1
connector_show---of 1
connector_write---of 8
drm_clients_info---of 30
drm_debugfs_add_file---of 2
drm_debugfs_add_files---of 4
drm_debugfs_connector_add---of 3
drm_debugfs_connector_remove100%of 2
drm_debugfs_create_files---of 6
drm_debugfs_crtc_add---of 2
drm_debugfs_crtc_remove100%of 1
drm_debugfs_dev_fini---of 1
drm_debugfs_dev_init---of 1
drm_debugfs_dev_register---of 9
drm_debugfs_encoder_add---of 4
drm_debugfs_encoder_remove100%of 1
drm_debugfs_entry_open---of 5
drm_debugfs_gpuva_info---of 6
drm_debugfs_open---of 3
drm_debugfs_register---of 4
drm_debugfs_remove_files---of 5
drm_debugfs_unregister---of 1
drm_gem_name_info---of 1
drm_gem_one_name_info---of 1
drm_name_info---of 10
edid_open---of 1
edid_show---of 1
edid_write---of 12
output_bpc_open---of 1
output_bpc_show---of 3
vrr_range_open---of 1
vrr_range_show---of 3
-----------
SUMMARY100%of 4

___drm_dbg75%of 4
___drm_dbg.cold---of 1
__drm_dev_dbg---of 7
__drm_dev_dbg.cold---of 1
__drm_err---of 1
__drm_printfn_coredump---of 8
__drm_printfn_dbg---of 8
__drm_printfn_dbg.cold---of 1
__drm_printfn_err---of 7
__drm_printfn_info---of 1
__drm_printfn_seq_file---of 1
__drm_puts_coredump---of 6
__drm_puts_seq_file---of 1
drm_dev_printk---of 4
drm_print_bits---of 12
drm_print_regset32---of 4
drm_printf---of 1
drm_puts---of 3
-----------
SUMMARY75%of 4

__bad_area67%of 3
__bad_area_nosemaphore67%of 21
__bpf_trace_x86_exceptions---of 1
__traceiter_page_fault_kernel---of 3
__traceiter_page_fault_user---of 3
access_error78%of 18
bad_area_access_error17%of 12
bad_area_nosemaphore100%of 1
do_kern_addr_fault75%of 4
do_user_addr_fault41%of 93
dump_pagetable---of 16
fault_in_kernel_space100%of 4
fault_signal_pending29%of 7
is_prefetch.constprop.08%of 28
kernelmode_fixup_or_oops.constprop.040%of 5
kmmio_fault.constprop.0100%of 1
page_fault_oops---of 44
perf_trace_x86_exceptions---of 5
pgtable_bad---of 2
show_ldttss---of 8
spurious_kernel_fault19%of 16
spurious_kernel_fault_check---of 8
trace_event_raw_event_x86_exceptions---of 6
trace_page_fault_kernel---of 15
trace_page_fault_user---of 15
trace_raw_output_x86_exceptions---of 5
-----------
SUMMARY42%of 213

dev_free93%of 13
gadget_bind71%of 17
gadget_disconnect50%of 4
gadget_ep0_complete---of 7
gadget_ep_complete75%of 4
gadget_reset50%of 4
gadget_resume50%of 4
gadget_setup---of 15
gadget_suspend50%of 4
gadget_unbind60%of 5
raw_alloc_io_data100%of 8
raw_event_queue_add72%of 7
raw_ioctl71%of 167
raw_ioctl_ep_set_clear_halt_wedge60%of 32
raw_ioctl_init86%of 27
raw_open100%of 4
raw_process_ep0_io52%of 29
raw_process_ep_io50%of 32
raw_release56%of 18
-----------
SUMMARY68%of 379

-----------
SUMMARY---of 0

kasprintf100%of 1
kvasprintf75%of 4
kvasprintf_const80%of 10
-----------
SUMMARY80%of 15

-----------
SUMMARY---of 0

__vc_resize---of 1
add_softcursor---of 11
blank_screen_t---of 1
build_attr---of 22
clear_buffer_attributes---of 3
complement_pos---of 16
con_cleanup---of 1
con_debug_enter---of 2
con_debug_leave---of 2
con_driver_unregister_callback---of 9
con_flush_chars---of 2
con_font_op---of 54
con_get_cmap---of 4
con_install---of 20
con_is_bound---of 8
con_is_visible---of 4
con_open---of 1
con_put_char---of 1
con_putc---of 4
con_scroll---of 20
con_set_cmap---of 9
con_shutdown---of 3
con_start---of 4
con_stop---of 4
con_unthrottle---of 1
con_write---of 4
con_write_room---of 1
console_callback---of 16
csi_DEC_hl.constprop.0---of 17
csi_J---of 28
csi_hl.constprop.0---of 7
cursor_report.constprop.0---of 3
do_blank_screen---of 20
do_con_write---of 351
do_take_over_console---of 52
do_unblank_screen---of 17
do_unregister_con_driver---of 6
do_update_region---of 14
getconsxy---of 1
give_up_console---of 1
gotoxy---of 13
hide_cursor---of 7
insert_char---of 5
invert_screen---of 20
juggle_array---of 6
lf---of 5
mouse_report---of 1
mouse_reporting---of 1
poke_blanked_console---of 10
putconsxy---of 10
redraw_screen---of 23
register_vt_notifier---of 1
reset_palette---of 2
reset_terminal---of 4
restore_cur.constprop.0---of 1
rgb_background---of 1
rgb_foreground---of 10
save_cur---of 1
save_screen---of 5
schedule_console_callback100%of 1
screen_glyph---of 3
screen_glyph_unicode---of 7
screen_pos---of 4
scrollback---of 1
scrollfront---of 3
set_console---of 8
set_cursor---of 9
set_origin---of 9
set_palette---of 6
show_bind---of 1
show_name---of 2
show_tty_active---of 1
store_bind---of 1
tioclinux---of 29
ucs_cmp---of 3
unblank_screen---of 1
unregister_vt_notifier---of 1
update_attr---of 1
update_region---of 6
vc_allocate---of 17
vc_cons_allocated---of 3
vc_deallocate---of 7
vc_do_resize---of 56
vc_init---of 3
vc_port_destruct---of 1
vc_reset_params---of 1
vc_setGx---of 7
vc_t416_color---of 22
vc_uniscr_check---of 15
vc_uniscr_copy_line---of 10
vcs_scr_readw---of 6
vcs_scr_updated---of 1
vcs_scr_writew---of 2
visual_init---of 10
vt_console_device---of 4
vt_console_print---of 46
vt_console_setup---of 2
vt_kmsg_redirect---of 5
vt_resize---of 1
-----------
SUMMARY100%of 1

arch_stack_walk73%of 11
arch_stack_walk_reliable---of 14
arch_stack_walk_user---of 11
-----------
SUMMARY73%of 11

strncpy_from_user65%of 17
-----------
SUMMARY65%of 17

usb_deregister_dev75%of 4
usb_devnode100%of 5
usb_major_cleanup---of 1
usb_major_init---of 4
usb_open100%of 8
usb_register_dev---of 16
-----------
SUMMARY95%of 17

dvb_usb_adapter_dvb_exit40%of 5
dvb_usb_adapter_dvb_init---of 19
dvb_usb_adapter_frontend_exit40%of 5
dvb_usb_adapter_frontend_init---of 15
dvb_usb_ctrl_feed---of 27
dvb_usb_fe_sleep---of 6
dvb_usb_fe_wakeup---of 7
dvb_usb_start_feed---of 1
dvb_usb_stop_feed---of 1
-----------
SUMMARY40%of 10

brightness_show---of 1
brightness_store---of 14
button_show---of 8
cytherm_disconnect100%of 1
cytherm_probe---of 3
port0_show---of 6
port0_store---of 6
port1_show---of 6
port1_store---of 6
temp_show---of 10
-----------
SUMMARY100%of 1

dmc_tsc10_init---of 10
dmc_tsc10_read_data---of 1
e2i_init---of 4
e2i_read_data---of 1
egalax_get_pkt_len---of 5
egalax_init---of 7
egalax_read_data---of 3
elo_read_data---of 1
etouch_get_pkt_len---of 6
etouch_read_data---of 5
eturbo_get_pkt_len---of 4
eturbo_read_data---of 3
general_touch_read_data---of 1
gotop_read_data---of 1
gunze_read_data---of 5
idealtek_get_pkt_len---of 4
idealtek_read_data---of 5
irtouch_read_data---of 1
itm_read_data---of 5
jastec_read_data---of 1
mtouch_alloc---of 5
mtouch_exit---of 1
mtouch_firmware_rev_show---of 1
mtouch_init---of 17
mtouch_read_data---of 4
nexio_alloc---of 8
nexio_exit---of 1
nexio_init---of 28
nexio_read_data---of 22
panjit_read_data---of 1
tc45usb_read_data---of 1
usbtouch_close---of 4
usbtouch_disconnect67%of 6
usbtouch_irq---of 10
usbtouch_open84%of 6
usbtouch_probe---of 81
usbtouch_process_multi---of 16
usbtouch_process_pkt---of 7
usbtouch_reset_resume---of 10
usbtouch_resume---of 4
usbtouch_suspend---of 1
zytronic_read_data---of 11
-----------
SUMMARY75%of 12

cgrp_css_alloc---of 2
cgrp_css_free---of 1
cgrp_css_online---of 7
net_prio_attach---of 3
netprio_device_event50%of 10
netprio_prio---of 11
netprio_set_prio.isra.0---of 31
read_prioidx---of 1
read_priomap---of 13
update_netprio---of 3
write_priomap---of 8
-----------
SUMMARY50%of 10

-----------
SUMMARY---of 0

__do_sys_set_mempolicy_home_node---of 36
__get_vma_policy---of 6
__ia32_sys_get_mempolicy---of 1
__ia32_sys_mbind---of 1
__ia32_sys_migrate_pages---of 1
__ia32_sys_set_mempolicy---of 1
__ia32_sys_set_mempolicy_home_node---of 1
__mpol_dup---of 8
__mpol_equal---of 11
__mpol_put---of 2
__nodes_weight.constprop.0---of 1
__x64_sys_get_mempolicy---of 1
__x64_sys_mbind---of 1
__x64_sys_migrate_pages---of 1
__x64_sys_set_mempolicy---of 1
__x64_sys_set_mempolicy_home_node---of 1
alloc_migration_target_by_mpol---of 11
alloc_migration_target_by_mpol.cold---of 2
alloc_pages100%of 6
alloc_pages_bulk_array_mempolicy12%of 90
alloc_pages_mpol22%of 14
apply_policy_zone---of 4
const_folio_flags.constprop.0---of 10
do_mbind---of 78
do_mbind.cold---of 1
do_migrate_pages---of 33
do_set_mempolicy---of 15
folio_alloc---of 6
get_bitmap---of 8
get_il_weight---of 18
get_nodes---of 11
get_task_policy72%of 7
get_vma_policy63%of 16
get_vma_policy.cold---of 2
huge_node---of 1
init_nodemask_of_mempolicy---of 9
interleave_nid---of 10
interleave_nodes---of 20
kernel_get_mempolicy---of 107
kernel_mbind---of 10
kernel_migrate_pages---of 52
kernel_set_mempolicy---of 10
mbind_range---of 25
mempolicy_in_oom_domain---of 6
mempolicy_kobj_release---of 6
mempolicy_slab_node20%of 15
mpol_free_shared_policy---of 6
mpol_misplaced---of 28
mpol_new---of 15
mpol_new_nodemask---of 3
mpol_new_preferred---of 5
mpol_parse_str---of 76
mpol_put_task_policy---of 2
mpol_rebind_mm---of 21
mpol_rebind_nodemask---of 8
mpol_rebind_policy---of 5
mpol_rebind_preferred---of 1
mpol_rebind_task---of 5
mpol_set_nodemask.part.0---of 9
mpol_set_shared_policy---of 42
mpol_shared_policy_init16%of 13
mpol_shared_policy_lookup34%of 6
mpol_to_str---of 15
node_show---of 1
node_store---of 16
numa_default_policy---of 1
numa_nearest_node---of 10
policy_nodemask20%of 26
queue_folios_hugetlb---of 50
queue_folios_hugetlb.cold---of 3
queue_folios_pte_range---of 53
queue_pages_range---of 6
queue_pages_test_walk---of 17
read_once_policy_nodemask---of 1
sp_alloc---of 6
sp_insert---of 10
sp_lookup.isra.0---of 12
sysfs_wi_release---of 5
vma_alloc_folio75%of 4
vma_dup_policy---of 6
vma_migratable---of 12
vma_policy_mof---of 13
weighted_interleave_nid---of 39
weighted_interleave_nodes---of 23
-----------
SUMMARY25%of 197

tomoyo_encode75%of 4
tomoyo_encode270%of 20
tomoyo_get_local_path---of 27
tomoyo_realpath_from_path58%of 26
tomoyo_realpath_nofollow---of 4
-----------
SUMMARY64%of 50

-----------
SUMMARY---of 0

__genl_sk_priv_get---of 5
ctrl_build_family_msg---of 4
ctrl_dumpfamily---of 8
ctrl_dumppolicy---of 29
ctrl_dumppolicy_done---of 1
ctrl_dumppolicy_prep---of 3
ctrl_dumppolicy_put_op---of 17
ctrl_dumppolicy_start---of 35
ctrl_fill_info---of 41
ctrl_getfamily---of 21
genl_bind---of 13
genl_cmd_full_to_split---of 11
genl_ctrl_event.isra.0---of 45
genl_done---of 6
genl_dumpit---of 6
genl_family_rcv_msg_attrs_parse.constprop.0---of 10
genl_family_rcv_msg_doit---of 8
genl_family_rcv_msg_dumpit---of 5
genl_get_cmd---of 19
genl_lock---of 1
genl_notify---of 5
genl_op_from_full---of 6
genl_op_from_small---of 3
genl_op_iter_next---of 27
genl_pernet_exit---of 1
genl_pernet_init---of 4
genl_rcv---of 1
genl_rcv_msg---of 28
genl_register_family---of 3
genl_register_family.part.0---of 85
genl_release---of 9
genl_sk_priv_get---of 12
genl_sk_privs_free.part.0---of 5
genl_split_op_check.isra.0---of 4
genl_start---of 14
genl_unbind---of 9
genl_unlock---of 1
genl_unregister_family---of 29
genl_validate_ops---of 33
genlmsg_multicast_allns---of 15
genlmsg_put100%of 6
-----------
SUMMARY100%of 6

sysfs_add_bin_file_mode_ns67%of 12
sysfs_add_file_mode_ns53%of 17
sysfs_add_file_to_group84%of 6
sysfs_break_active_protection---of 5
sysfs_change_owner---of 5
sysfs_chmod_file---of 3
sysfs_create_bin_file---of 6
sysfs_create_file_ns84%of 6
sysfs_create_files---of 8
sysfs_emit---of 5
sysfs_emit_at---of 6
sysfs_file_change_owner---of 5
sysfs_file_ops---of 7
sysfs_kf_bin_llseek---of 4
sysfs_kf_bin_mmap---of 1
sysfs_kf_bin_open---of 3
sysfs_kf_bin_read---of 8
sysfs_kf_bin_write---of 7
sysfs_kf_read---of 8
sysfs_kf_seq_show---of 15
sysfs_kf_write---of 3
sysfs_link_change_owner---of 9
sysfs_notify75%of 8
sysfs_remove_bin_file---of 1
sysfs_remove_file_from_group80%of 5
sysfs_remove_file_ns100%of 1
sysfs_remove_file_self---of 5
sysfs_remove_files---of 2
sysfs_unbreak_active_protection---of 1
-----------
SUMMARY70%of 55

-----------
SUMMARY---of 0

__f_setown---of 1
__ia32_compat_sys_fcntl---of 3
__ia32_compat_sys_fcntl64---of 1
__ia32_sys_fcntl---of 8
__x64_compat_sys_fcntl---of 3
__x64_compat_sys_fcntl64---of 1
__x64_sys_fcntl---of 8
do_compat_fcntl64---of 28
do_fcntl---of 83
f_delown---of 1
f_getown---of 14
f_modown---of 12
f_setown---of 18
fasync_alloc---of 1
fasync_free---of 1
fasync_helper75%of 8
fasync_insert_entry50%of 6
fasync_remove_entry80%of 5
kill_fasync54%of 30
send_sigio10%of 21
send_sigio_to_task---of 36
send_sigurg---of 68
-----------
SUMMARY45%of 70

-----------
SUMMARY---of 0

__ia32_sys_fgetxattr---of 7
__ia32_sys_flistxattr---of 7
__ia32_sys_fremovexattr---of 10
__ia32_sys_fsetxattr---of 10
__ia32_sys_getxattr---of 1
__ia32_sys_lgetxattr---of 1
__ia32_sys_listxattr---of 1
__ia32_sys_llistxattr---of 1
__ia32_sys_lremovexattr---of 1
__ia32_sys_lsetxattr---of 1
__ia32_sys_removexattr---of 1
__ia32_sys_setxattr---of 1
__vfs_getxattr86%of 7
__vfs_removexattr---of 7
__vfs_removexattr_locked---of 18
__vfs_setxattr---of 8
__vfs_setxattr_locked---of 11
__vfs_setxattr_noperm---of 22
__x64_sys_fgetxattr---of 7
__x64_sys_flistxattr---of 7
__x64_sys_fremovexattr---of 10
__x64_sys_fsetxattr---of 10
__x64_sys_getxattr---of 1
__x64_sys_lgetxattr---of 1
__x64_sys_listxattr---of 1
__x64_sys_llistxattr---of 1
__x64_sys_lremovexattr---of 1
__x64_sys_lsetxattr---of 1
__x64_sys_removexattr---of 1
__x64_sys_setxattr---of 1
do_getxattr---of 18
do_setxattr---of 5
generic_listxattr---of 11
getxattr---of 4
listxattr---of 11
may_write_xattr---of 4
path_getxattr---of 6
path_listxattr---of 6
path_removexattr---of 8
path_setxattr---of 8
removexattr---of 8
setxattr---of 4
setxattr_copy---of 8
simple_xattr_add---of 8
simple_xattr_alloc---of 4
simple_xattr_free---of 3
simple_xattr_get16%of 13
simple_xattr_list---of 12
simple_xattr_set---of 26
simple_xattr_space---of 1
simple_xattrs_free---of 6
simple_xattrs_init100%of 1
vfs_getxattr---of 15
vfs_getxattr_alloc---of 11
vfs_listxattr---of 6
vfs_removexattr---of 9
vfs_setxattr---of 15
xattr_full_name67%of 3
xattr_list_one---of 5
xattr_permission---of 15
xattr_resolve_name73%of 18
xattr_supports_user_prefix---of 9
-----------
SUMMARY58%of 42

-----------
SUMMARY---of 0

__call_nexthop_res_bucket_notifiers---of 47
__nexthop_replace_notify---of 7
__nh_valid_dump_req---of 16
__remove_nexthop---of 43
__unregister_nexthop_notifier---of 4
call_nexthop_notifiers---of 16
fib6_check_nexthop---of 25
fib6_check_nh_list---of 8
fib_check_nexthop---of 36
nexthop_alloc---of 3
nexthop_bucket_set_hw_flags---of 37
nexthop_find_by_id---of 7
nexthop_find_group_resilient---of 20
nexthop_flush_dev15%of 7
nexthop_for_each_fib6_nh---of 24
nexthop_free_rcu---of 21
nexthop_net_exit---of 1
nexthop_net_exit_batch_rtnl---of 16
nexthop_net_init---of 3
nexthop_notify---of 30
nexthop_res_grp_activity_update---of 36
nexthop_select_path---of 102
nexthop_select_path.cold---of 2
nexthop_set_hw_flags---of 24
nexthops_dump---of 13
nh_dump_filtered---of 23
nh_fill_node---of 127
nh_fill_res_bucket.constprop.0---of 17
nh_grp_entry_stats_inc---of 2
nh_grp_hw_stats_report_delta---of 1
nh_hthr_group_rebalance---of 4
nh_netdev_event25%of 12
nh_notifier_grp_info_fini---of 9
nh_notifier_grp_info_init---of 31
nh_notifier_mpath_info_init---of 13
nh_notifier_single_info_init---of 12
nh_res_bucket_migrate---of 22
nh_res_group_rebalance---of 12
nh_res_table_upkeep---of 21
nh_res_table_upkeep_dw---of 1
nh_valid_dump_bucket_req---of 20
nh_valid_dump_req---of 8
nh_valid_get_bucket_req---of 18
nh_valid_get_del_req---of 16
register_nexthop_notifier---of 3
remove_nexthop---of 10
remove_nh_grp_entry---of 43
replace_nexthop_grp_res---of 24
replace_nexthop_single_notify---of 32
rtm_del_nexthop---of 15
rtm_dump_nexthop---of 11
rtm_dump_nexthop_bucket---of 21
rtm_dump_nexthop_bucket_nh---of 25
rtm_get_nexthop---of 20
rtm_get_nexthop_bucket---of 23
rtm_new_nexthop---of 327
rtm_to_nh_config---of 188
unregister_nexthop_notifier---of 3
-----------
SUMMARY22%of 19

snd_use_lock_sync_helper34%of 6
-----------
SUMMARY34%of 6

-----------
SUMMARY---of 0

blake2s_compress100%of 6
-----------
SUMMARY100%of 6

-----------
SUMMARY---of 0

__scsi_add_device---of 10
__scsi_scan_target---of 30
do_scan_async---of 16
do_scsi_scan_host---of 6
scsi_add_device---of 5
scsi_alloc_sdev---of 22
scsi_alloc_target---of 39
scsi_complete_async_scans---of 12
scsi_enable_async_suspend---of 3
scsi_forget_host50%of 6
scsi_is_target_device---of 1
scsi_probe_and_add_lun---of 110
scsi_probe_lun.constprop.0---of 51
scsi_realloc_sdev_budget_map---of 13
scsi_realloc_sdev_budget_map.cold---of 1
scsi_report_lun_scan---of 31
scsi_rescan_device---of 11
scsi_resume_device---of 8
scsi_sanitize_inquiry_string---of 6
scsi_scan_channel---of 7
scsi_scan_host---of 15
scsi_scan_host_selected---of 17
scsi_scan_target---of 13
scsi_target_destroy---of 11
scsi_target_dev_release---of 1
scsi_target_reap---of 10
-----------
SUMMARY50%of 6

g920_ff_set_autocenter---of 6
g920_get_config---of 14
hidpp10_query_battery_mileage---of 5
hidpp10_query_battery_status---of 7
hidpp10_set_register.constprop.0---of 3
hidpp20_battery_map_status_voltage---of 10
hidpp20_batterylevel_map_status_capacity---of 10
hidpp20_map_adc_measurement_1f20---of 5
hidpp20_map_adc_measurement_1f20_capacity---of 9
hidpp20_map_battery_capacity---of 8
hidpp20_query_adc_measurement_info_1f20---of 12
hidpp20_query_battery_info_1000---of 20
hidpp20_query_battery_info_1004---of 28
hidpp20_query_battery_voltage_info---of 11
hidpp_battery_get_property---of 14
hidpp_connect_event---of 144
hidpp_event---of 8
hidpp_ff_destroy---of 1
hidpp_ff_erase_effect---of 3
hidpp_ff_playback---of 5
hidpp_ff_queue_work---of 5
hidpp_ff_range_show---of 1
hidpp_ff_range_store---of 1
hidpp_ff_set_autocenter---of 3
hidpp_ff_set_gain---of 3
hidpp_ff_upload_effect---of 45
hidpp_ff_work_handler---of 24
hidpp_get_device_name---of 20
hidpp_input_close---of 1
hidpp_input_configured---of 3
hidpp_input_mapped---of 7
hidpp_input_mapping---of 13
hidpp_input_open---of 1
hidpp_non_unifying_init---of 12
hidpp_populate_input---of 15
hidpp_prefix_name---of 4
hidpp_probe---of 86
hidpp_raw_event---of 49
hidpp_raw_hidpp_event---of 149
hidpp_remove67%of 3
hidpp_report_fixup---of 7
hidpp_root_get_feature---of 4
hidpp_scroll_counter_handle_scroll---of 8
hidpp_send_fap_command_sync---of 7
hidpp_send_message_sync---of 33
hidpp_send_rap_command_sync.constprop.0---of 3
hidpp_solar_request_battery_event.constprop.0---of 6
hidpp_unifying_init.isra.0---of 15
hidpp_update_usb_wireless_status---of 3
wtp_get_config---of 11
wtp_send_raw_xy_event---of 13
-----------
SUMMARY67%of 3

__nla_parse---of 1
__nla_put100%of 1
__nla_put_64bit100%of 1
__nla_put_nohdr---of 1
__nla_reserve100%of 1
__nla_reserve_64bit---of 1
__nla_reserve_nohdr---of 1
__nla_validate---of 1
__nla_validate_parse---of 160
nla_append---of 6
nla_find---of 6
nla_get_range_signed---of 7
nla_get_range_unsigned---of 11
nla_memcmp---of 3
nla_memcpy---of 3
nla_policy_len---of 9
nla_put100%of 5
nla_put_64bit100%of 5
nla_put_nohdr---of 6
nla_reserve100%of 5
nla_reserve_64bit100%of 5
nla_reserve_nohdr---of 6
nla_strcmp---of 6
nla_strdup---of 6
nla_strscpy---of 10
-----------
SUMMARY100%of 23

uclogic_input_configured---of 11
uclogic_input_mapping---of 3
uclogic_inrange_timeout---of 4
uclogic_probe---of 18
uclogic_raw_event---of 69
uclogic_remove100%of 1
uclogic_report_fixup---of 4
uclogic_resume---of 4
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

pyra_probe---of 21
pyra_raw_event---of 15
pyra_remove40%of 5
pyra_sysfs_read.constprop.0---of 7
pyra_sysfs_read_info---of 6
pyra_sysfs_read_profile_buttons---of 6
pyra_sysfs_read_profile_settings---of 6
pyra_sysfs_read_profilex_buttons---of 5
pyra_sysfs_read_profilex_settings---of 5
pyra_sysfs_read_settings---of 6
pyra_sysfs_show_actual_cpi---of 1
pyra_sysfs_show_actual_profile---of 1
pyra_sysfs_show_firmware_version---of 1
pyra_sysfs_write_control---of 5
pyra_sysfs_write_info---of 5
pyra_sysfs_write_profile_buttons---of 5
pyra_sysfs_write_profile_settings---of 5
pyra_sysfs_write_settings---of 9
-----------
SUMMARY40%of 5

__dump_page_owner---of 21
__folio_copy_owner---of 11
__folio_copy_owner.cold---of 2
__reset_page_owner87%of 15
__reset_page_owner.cold---of 2
__set_page_owner80%of 15
__set_page_owner.cold---of 1
__set_page_owner_migrate_reason---of 2
__split_page_owner---of 6
__split_page_owner.cold---of 1
__update_page_owner_handle78%of 9
__update_page_owner_handle.cold---of 1
lseek_page_owner---of 4
page_owner_stack_open---of 1
page_owner_threshold_get---of 1
page_owner_threshold_set---of 1
pagetypeinfo_showmixedcount_print---of 57
pagetypeinfo_showmixedcount_print.cold---of 1
proc_page_owner_threshold_open---of 1
read_page_owner---of 137
read_page_owner.cold---of 1
register_dummy_stack---of 1
register_early_stack---of 1
register_failure_stack---of 1
save_stack84%of 6
stack_next---of 3
stack_print---of 8
stack_start---of 6
-----------
SUMMARY83%of 45

ch341_break_ctl---of 33
ch341_carrier_raised---of 1
ch341_close---of 1
ch341_configure---of 7
ch341_control_in.constprop.0---of 7
ch341_control_out---of 7
ch341_dtr_rts---of 4
ch341_get_status---of 3
ch341_open---of 13
ch341_port_probe---of 11
ch341_port_remove100%of 1
ch341_read_int_callback---of 24
ch341_reset_resume---of 9
ch341_set_baudrate_lcr---of 30
ch341_set_termios---of 24
ch341_tiocmget---of 3
ch341_tiocmset---of 10
-----------
SUMMARY100%of 1

dvb_usb_device_exit47%of 15
dvb_usb_device_init---of 90
dvb_usb_device_power_ctrl---of 7
-----------
SUMMARY47%of 15

-----------
SUMMARY---of 0

keyspan_break_ctl---of 1
keyspan_close---of 3
keyspan_disconnect100%of 1
keyspan_dtr_rts---of 1
keyspan_fake_startup---of 12
keyspan_open---of 11
keyspan_port_probe---of 18
keyspan_port_remove100%of 1
keyspan_release100%of 1
keyspan_send_setup---of 6
keyspan_set_termios---of 4
keyspan_setup_urb---of 23
keyspan_startup---of 25
keyspan_tiocmget---of 1
keyspan_tiocmset---of 9
keyspan_usa19_calc_baud---of 13
keyspan_usa19hs_calc_baud---of 12
keyspan_usa19w_calc_baud---of 19
keyspan_usa26_send_setup.isra.0---of 32
keyspan_usa28_calc_baud---of 15
keyspan_usa28_send_setup.isra.0---of 20
keyspan_usa49_send_setup.isra.0---of 32
keyspan_usa67_send_setup.isra.0---of 28
keyspan_usa90_send_setup.isra.0---of 36
keyspan_write---of 19
keyspan_write_room---of 6
usa26_indat_callback---of 44
usa26_instat_callback---of 15
usa26_outcont_callback---of 4
usa28_indat_callback---of 12
usa28_instat_callback---of 16
usa28_outcont_callback---of 4
usa2x_outdat_callback---of 3
usa49_glocont_callback---of 7
usa49_indat_callback---of 30
usa49_instat_callback---of 16
usa49wg_indat_callback---of 45
usa67_glocont_callback---of 7
usa67_instat_callback---of 16
usa90_indat_callback---of 46
usa90_instat_callback---of 13
usa90_outcont_callback---of 4
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

quirks_param_set---of 46
usb_detect_interface_quirks25%of 4
usb_detect_quirks50%of 14
usb_detect_static_quirks93%of 14
usb_endpoint_is_ignored80%of 5
usb_release_quirk_list---of 1
-----------
SUMMARY68%of 37

pvr_disconnect50%of 4
pvr_probe---of 7
pvr_setup_attach---of 1
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

roccat_common2_sysfs_read_info---of 1
roccat_common2_sysfs_read_key_mask---of 1
roccat_common2_sysfs_read_keys_easyzone---of 1
roccat_common2_sysfs_read_keys_extra---of 1
roccat_common2_sysfs_read_keys_function---of 1
roccat_common2_sysfs_read_keys_macro---of 1
roccat_common2_sysfs_read_keys_primary---of 1
roccat_common2_sysfs_read_keys_thumbster---of 1
roccat_common2_sysfs_read_light---of 1
roccat_common2_sysfs_read_light_macro---of 1
roccat_common2_sysfs_read_macro---of 1
roccat_common2_sysfs_read_profile---of 1
roccat_common2_sysfs_read_stored_lights---of 1
roccat_common2_sysfs_write_control---of 1
roccat_common2_sysfs_write_custom_lights---of 1
roccat_common2_sysfs_write_key_mask---of 1
roccat_common2_sysfs_write_keys_easyzone---of 1
roccat_common2_sysfs_write_keys_extra---of 1
roccat_common2_sysfs_write_keys_function---of 1
roccat_common2_sysfs_write_keys_macro---of 1
roccat_common2_sysfs_write_keys_primary---of 1
roccat_common2_sysfs_write_keys_thumbster---of 1
roccat_common2_sysfs_write_light---of 1
roccat_common2_sysfs_write_light_control---of 1
roccat_common2_sysfs_write_light_macro---of 1
roccat_common2_sysfs_write_macro---of 1
roccat_common2_sysfs_write_profile---of 1
roccat_common2_sysfs_write_reset---of 1
roccat_common2_sysfs_write_stored_lights---of 1
roccat_common2_sysfs_write_talk---of 1
ryos_probe---of 19
ryos_raw_event---of 6
ryos_remove100%of 5
-----------
SUMMARY100%of 5

ex_get_fixup_type---of 3
ex_handler_msr---of 18
ex_handler_uaccess56%of 9
ex_handler_zeropad---of 14
fixup_exception8%of 55
-----------
SUMMARY15%of 64

async_completed---of 38
async_completed.cold---of 1
async_getcompleted---of 5
check_ctrlrecip---of 15
check_reset_of_active_ep---of 6
checkintf---of 5
claimintf---of 8
copy_urb_data_to_user---of 12
dec_usb_memory_use_count---of 7
destroy_async---of 5
destroy_async_on_interface---of 8
do_proc_bulk---of 44
do_proc_control---of 37
driver_disconnect---of 5
driver_probe---of 1
driver_resume---of 1
driver_suspend---of 1
findintfep---of 13
free_async---of 15
get_urb32---of 3
parse_usbdevfs_streams---of 22
proc_disconnect_claim---of 14
proc_do_submiturb---of 160
proc_getdriver---of 10
proc_ioctl---of 29
processcompl---of 17
processcompl_compat---of 17
reap_as---of 8
releaseintf---of 5
snoop_urb.part.0---of 10
snoop_urb_data---of 9
usb_devio_cleanup---of 1
usbdev_ioctl---of 228
usbdev_mmap---of 24
usbdev_notify56%of 9
usbdev_open---of 21
usbdev_poll---of 14
usbdev_read---of 23
usbdev_release---of 16
usbdev_vm_close---of 1
usbdev_vm_open---of 1
usbfs_blocking_completion---of 1
usbfs_notify_resume67%of 3
usbfs_start_wait_urb---of 8
-----------
SUMMARY59%of 12

mon_bus_complete---of 3
mon_bus_init---of 4
mon_bus_lookup---of 6
mon_bus_submit---of 3
mon_bus_submit_error---of 3
mon_complete---of 3
mon_notify12%of 18
mon_reader_add---of 13
mon_reader_del---of 17
mon_submit---of 3
mon_submit_error---of 3
-----------
SUMMARY12%of 18

_kstrtol---of 3
_kstrtoul---of 3
_kstrtoull---of 7
_parse_integer---of 1
_parse_integer_fixup_radix24%of 13
_parse_integer_limit70%of 10
kstrtobool---of 7
kstrtobool_from_user---of 3
kstrtoint---of 4
kstrtoint_from_user---of 3
kstrtol_from_user---of 1
kstrtoll---of 8
kstrtoll_from_user---of 3
kstrtos16---of 4
kstrtos16_from_user---of 3
kstrtos8---of 4
kstrtos8_from_user---of 3
kstrtou16---of 4
kstrtou16_from_user---of 3
kstrtou8---of 4
kstrtou8_from_user---of 3
kstrtouint---of 4
kstrtouint_from_user---of 3
kstrtoul_from_user---of 1
kstrtoull---of 3
kstrtoull_from_user---of 3
-----------
SUMMARY44%of 23

-----------
SUMMARY---of 0

__import_iovec---of 29
__iov_iter_get_pages_alloc---of 110
__iov_iter_get_pages_alloc.cold---of 1
_copy_from_iter---of 58
_copy_from_iter_flushcache---of 47
_copy_from_iter_nocache---of 48
_copy_mc_to_iter---of 57
_copy_to_iter18%of 58
const_folio_flags.constprop.0---of 10
copy_compat_iovec_from_user---of 9
copy_iovec_from_user---of 9
copy_page_from_iter---of 7
copy_page_from_iter_atomic21%of 59
copy_page_to_iter78%of 9
copy_page_to_iter_nofault---of 70
copy_page_to_iter_nofault.cold---of 2
dup_iter---of 5
fault_in_iov_iter_readable30%of 10
fault_in_iov_iter_writeable---of 10
folio_size---of 3
folio_size.cold---of 1
import_iovec---of 3
import_ubuf---of 6
iov_iter_advance---of 18
iov_iter_alignment---of 15
iov_iter_bvec---of 3
iov_iter_discard---of 3
iov_iter_extract_pages---of 105
iov_iter_extract_pages.cold---of 1
iov_iter_gap_alignment---of 12
iov_iter_get_pages2---of 5
iov_iter_get_pages_alloc2---of 4
iov_iter_init---of 3
iov_iter_is_aligned---of 17
iov_iter_kvec---of 3
iov_iter_npages---of 18
iov_iter_restore---of 9
iov_iter_revert---of 15
iov_iter_single_seg_count---of 7
iov_iter_xarray---of 3
iov_iter_zero---of 55
iov_iter_zero.cold---of 2
iovec_from_user---of 4
iovec_from_user.part.0---of 11
page_copy_sane19%of 16
page_copy_sane.cold---of 1
rcu_lock_acquire.constprop.0---of 1
rcu_read_unlock---of 6
want_pages_array---of 6
xas_next_entry.constprop.0---of 16
-----------
SUMMARY24%of 152

__del_from_avail_list---of 7
__do_sys_swapoff---of 128
__do_sys_swapon---of 196
__do_sys_swapon.cold---of 1
__folio_throttle_swaprate30%of 10
__ia32_sys_swapoff---of 1
__ia32_sys_swapon---of 1
__page_file_index---of 8
__swap_count---of 3
__swap_duplicate---of 21
__swap_entry_free---of 7
__swap_entry_free_locked---of 14
__try_to_reclaim_swap---of 19
__x64_sys_swapoff---of 1
__x64_sys_swapon---of 1
_enable_swap_info---of 4
_swap_info_get---of 11
add_swap_count_continuation---of 29
add_swap_extent---of 10
add_to_avail_list---of 6
const_folio_flags.constprop.0---of 10
count_swap_pages---of 6
destroy_swap_extents---of 6
find_first_swap---of 6
folio_flags.constprop.0---of 10
folio_free_swap---of 11
free_swap_and_cache---of 15
generic_max_swapfile_size---of 1
get_swap_device---of 30
get_swap_page_of_type---of 8
get_swap_pages---of 22
has_usable_swap---of 1
inc_cluster_info_page---of 11
percpu_ref_put_many.constprop.0---of 15
put_swap_folio---of 9
scan_swap_map_slots---of 100
scan_swap_map_try_ssd_cluster---of 21
setup_swap_info---of 13
si_swapinfo---of 5
swap_count_continued---of 29
swap_discard_work---of 1
swap_do_scheduled_discard---of 23
swap_duplicate---of 3
swap_folio_sector---of 10
swap_free---of 2
swap_next---of 8
swap_offset_available_and_locked---of 6
swap_shmem_alloc---of 1
swap_show---of 9
swap_start---of 7
swap_stop---of 1
swap_swapcount---of 5
swap_type_of---of 9
swap_users_ref_free---of 1
swapcache_clear---of 7
swapcache_free_entries---of 45
swapcache_mapping---of 3
swapcache_prepare---of 1
swapdev_block---of 12
swaps_open---of 4
swaps_poll---of 6
swp_entry_cmp---of 1
swp_swap_info---of 3
swp_swapcount---of 14
unuse_pte_range---of 78
-----------
SUMMARY30%of 10

-----------
SUMMARY---of 0

__hrtimer_get_remaining---of 5
__hrtimer_init88%of 8
__hrtimer_next_event_base84%of 12
__hrtimer_next_event_base.cold---of 1
__hrtimer_run_queues50%of 63
__hrtimer_run_queues.cold---of 1
__ia32_sys_nanosleep---of 7
__ia32_sys_nanosleep_time32---of 7
__remove_hrtimer40%of 10
__remove_hrtimer.cold---of 1
__x64_sys_nanosleep---of 7
__x64_sys_nanosleep_time32---of 7
clock_was_set---of 24
clock_was_set.cold---of 1
clock_was_set_delayed---of 1
clock_was_set_work---of 1
destroy_hrtimer_on_stack---of 1
do_nanosleep50%of 14
enqueue_hrtimer43%of 19
enqueue_hrtimer.cold---of 1
hrtimer_active86%of 7
hrtimer_cancel---of 3
hrtimer_debug_hint---of 1
hrtimer_fixup_activate---of 3
hrtimer_fixup_free---of 6
hrtimer_fixup_init---of 6
hrtimer_forward79%of 14
hrtimer_get_next_event---of 5
hrtimer_init38%of 16
hrtimer_init_on_stack---of 1
hrtimer_init_sleeper---of 16
hrtimer_init_sleeper_on_stack---of 1
hrtimer_interrupt40%of 15
hrtimer_nanosleep67%of 12
hrtimer_nanosleep_restart---of 1
hrtimer_next_event_without---of 5
hrtimer_reprogram62%of 13
hrtimer_run_queues13%of 8
hrtimer_run_softirq---of 3
hrtimer_sleeper_start_expires---of 1
hrtimer_start_range_ns57%of 50
hrtimer_try_to_cancel52%of 25
hrtimer_update_next_event60%of 5
hrtimer_wakeup100%of 3
hrtimers_cpu_dying---of 26
hrtimers_prepare_cpu---of 3
hrtimers_resume_local---of 5
ktime_add_safe---of 4
ktime_get_boottime---of 1
ktime_get_clocktai---of 1
ktime_get_real---of 1
nanosleep_copyout---of 6
retrigger_next_event---of 9
schedule_hrtimeout---of 1
schedule_hrtimeout_range---of 1
schedule_hrtimeout_range_clock---of 16
-----------
SUMMARY55%of 294

_cfg80211_unregister_wdev---of 29
cfg80211_destroy_iface_wk---of 1
cfg80211_destroy_ifaces---of 9
cfg80211_dev_check_name---of 19
cfg80211_dev_free43%of 7
cfg80211_dev_rename---of 16
cfg80211_event_work---of 1
cfg80211_init_wdev---of 5
cfg80211_leave---of 13
cfg80211_netdev_notifier_call4%of 64
cfg80211_pernet_exit---of 9
cfg80211_process_wiphy_works---of 12
cfg80211_propagate_cac_done_wk---of 1
cfg80211_propagate_radar_detect_wk---of 1
cfg80211_rdev_by_wiphy_idx---of 11
cfg80211_register_netdevice---of 16
cfg80211_register_wdev---of 14
cfg80211_rfkill_block_work---of 1
cfg80211_rfkill_poll---of 16
cfg80211_rfkill_set_block---of 3
cfg80211_sched_scan_stop_wk---of 4
cfg80211_shutdown_all_interfaces---of 13
cfg80211_stop_iface---of 24
cfg80211_stop_nan---of 25
cfg80211_stop_p2p_device---of 31
cfg80211_switch_netns---of 28
cfg80211_unregister_wdev---of 1
cfg80211_update_iface_num---of 5
cfg80211_wiphy_work---of 10
get_wiphy_idx---of 3
trace_rdev_return_void---of 15
wiphy_delayed_work_cancel---of 4
wiphy_delayed_work_flush---of 4
wiphy_delayed_work_queue---of 3
wiphy_delayed_work_timer---of 1
wiphy_free100%of 1
wiphy_idx_to_wiphy---of 6
wiphy_new_nm---of 70
wiphy_register---of 194
wiphy_rfkill_set_hw_state_reason---of 4
wiphy_rfkill_start_polling---of 4
wiphy_unregister---of 39
wiphy_work_cancel---of 10
wiphy_work_flush---of 6
wiphy_work_queue---of 6
-----------
SUMMARY9%of 72

__usb_bus_reprobe_drivers---of 6
autosuspend_check11%of 19
is_usb_device_driver100%of 1
new_id_show---of 1
new_id_store---of 1
rebind_marked_interfaces.isra.0---of 10
remove_id_show---of 1
remove_id_store---of 11
unbind_marked_interfaces.isra.0---of 6
usb_autopm_get_interface100%of 6
usb_autopm_get_interface_async29%of 7
usb_autopm_get_interface_no_resume100%of 1
usb_autopm_put_interface100%of 1
usb_autopm_put_interface_async100%of 1
usb_autopm_put_interface_no_suspend100%of 3
usb_autoresume_device17%of 6
usb_autosuspend_device---of 1
usb_deregister---of 8
usb_deregister_device_driver---of 1
usb_device_match59%of 12
usb_device_match_id---of 5
usb_disable_autosuspend100%of 1
usb_disable_usb2_hardware_lpm40%of 5
usb_driver_applicable58%of 14
usb_driver_claim_interface---of 12
usb_driver_release_interface---of 6
usb_enable_autosuspend---of 1
usb_enable_usb2_hardware_lpm40%of 5
usb_forced_unbind_intf---of 10
usb_match_device65%of 17
usb_match_dynamic_id---of 8
usb_match_id---of 9
usb_match_one_id---of 5
usb_match_one_id_intf84%of 12
usb_probe_device27%of 19
usb_probe_interface---of 33
usb_register_device_driver---of 5
usb_register_driver---of 10
usb_resume---of 6
usb_resume_both64%of 22
usb_resume_complete---of 4
usb_resume_interface.constprop.0.isra.040%of 15
usb_runtime_idle50%of 4
usb_runtime_resume100%of 1
usb_runtime_suspend23%of 9
usb_show_dynids---of 7
usb_store_new_id---of 23
usb_suspend---of 18
usb_suspend_both---of 40
usb_uevent53%of 17
usb_unbind_and_rebind_marked_interfaces---of 1
usb_unbind_device86%of 7
usb_unbind_interface85%of 32
-----------
SUMMARY56%of 237

-----------
SUMMARY---of 0

i2c_acpi_add_device---of 5
i2c_acpi_add_irq_resource---of 4
i2c_acpi_client_count---of 3
i2c_acpi_do_lookup---of 8
i2c_acpi_fill_info---of 9
i2c_acpi_find_adapter_by_handle---of 5
i2c_acpi_find_bus_speed---of 11
i2c_acpi_get_i2c_resource---of 5
i2c_acpi_get_info---of 17
i2c_acpi_get_irq---of 9
i2c_acpi_install_space_handler---of 10
i2c_acpi_lookup_speed---of 8
i2c_acpi_new_device_by_fwnode---of 10
i2c_acpi_notify---of 13
i2c_acpi_register_devices---of 7
i2c_acpi_remove_space_handler38%of 8
i2c_acpi_resource_count---of 5
i2c_acpi_space_handler---of 45
i2c_acpi_waive_d0_probe---of 6
-----------
SUMMARY38%of 8

-----------
SUMMARY---of 0

__suspend_report_result---of 2
async_resume---of 1
async_resume_early---of 1
async_resume_noirq---of 1
async_suspend---of 1
async_suspend_late---of 1
async_suspend_noirq---of 1
dev_pm_skip_resume---of 6
dev_pm_skip_suspend---of 3
device_pm_add47%of 15
device_pm_check_callbacks94%of 15
device_pm_lock---of 1
device_pm_move_after---of 14
device_pm_move_before---of 14
device_pm_move_last---of 10
device_pm_remove50%of 10
device_pm_sleep_init100%of 1
device_pm_unlock---of 1
device_pm_wait_for_dev---of 6
device_resume---of 36
device_resume_early---of 41
device_resume_noirq---of 48
device_suspend---of 64
device_suspend_late---of 39
device_suspend_noirq---of 41
dpm_async_fn---of 7
dpm_complete---of 28
dpm_for_each_dev---of 4
dpm_noirq_resume_devices---of 12
dpm_prepare---of 35
dpm_resume---of 12
dpm_resume_early---of 12
dpm_resume_end---of 1
dpm_resume_noirq---of 1
dpm_resume_start---of 1
dpm_run_callback---of 11
dpm_show_time---of 10
dpm_suspend---of 14
dpm_suspend_end---of 5
dpm_suspend_late---of 15
dpm_suspend_noirq---of 16
dpm_suspend_start---of 4
dpm_wait_fn---of 6
dpm_wait_for_subordinate---of 9
dpm_wait_for_superior---of 17
initcall_debug_report.part.0---of 1
pm_dev_dbg---of 6
pm_dev_err---of 2
pm_late_early_op---of 8
pm_noirq_op---of 8
pm_op---of 8
pm_ops_is_empty100%of 11
pm_verb---of 3
trace_device_pm_callback_end---of 15
trace_device_pm_callback_start---of 15
trace_suspend_resume---of 15
-----------
SUMMARY74%of 52

mt_allocate_report_data---of 29
mt_allocate_usage---of 4
mt_event---of 11
mt_expired_timeout---of 4
mt_feature_mapping---of 12
mt_get_feature---of 7
mt_input_configured---of 39
mt_input_mapped---of 9
mt_input_mapping---of 120
mt_probe---of 31
mt_release_contacts---of 8
mt_remove100%of 1
mt_report---of 132
mt_reset_resume---of 1
mt_resume---of 3
mt_set_modes---of 23
mt_set_quirks---of 8
mt_show_quirks---of 1
mt_store_field---of 15
mt_suspend---of 10
set_abs---of 4
-----------
SUMMARY100%of 1

__put_user_ns---of 1
cmp_extents_forward---of 3
cmp_extents_reverse---of 3
cmp_map_id---of 11
create_user_ns---of 22
current_in_userns---of 3
free_user_ns---of 12
from_kgid100%of 1
from_kgid_munged---of 3
from_kprojid---of 1
from_kprojid_munged---of 2
from_kuid100%of 1
from_kuid_munged---of 3
gid_m_show---of 3
gid_m_start---of 7
in_userns---of 3
m_next---of 1
make_kgid100%of 1
make_kprojid100%of 1
make_kuid100%of 1
map_id_down---of 1
map_id_range_down93%of 13
map_id_up73%of 11
map_write---of 94
ns_get_owner---of 12
proc_gid_map_write---of 5
proc_projid_map_write---of 5
proc_setgroups_show---of 2
proc_setgroups_write---of 13
proc_uid_map_write---of 5
projid_m_show---of 3
projid_m_start---of 7
uid_m_show---of 3
uid_m_start---of 7
unshare_userns---of 7
userns_get---of 30
userns_install---of 21
userns_may_setgroups---of 3
userns_owner---of 1
userns_put---of 6
-----------
SUMMARY87%of 29

__iterate_supers---of 10
__put_super.part.0---of 9
alloc_super---of 17
bdev_super_lock---of 18
compare_single---of 1
deactivate_locked_super---of 5
deactivate_super67%of 6
destroy_super_rcu---of 1
destroy_super_work---of 7
do_emergency_remount---of 1
do_emergency_remount_callback---of 9
do_thaw_all---of 1
do_thaw_all_callback---of 7
drop_super---of 3
drop_super_exclusive---of 3
emergency_remount---of 2
emergency_thaw_all---of 2
free_anon_bdev---of 1
freeze_super---of 71
fs_bdev_freeze---of 11
fs_bdev_mark_dead---of 6
fs_bdev_sync---of 2
fs_bdev_thaw---of 11
generic_shutdown_super---of 11
get_anon_bdev---of 5
get_bdev_super---of 7
get_tree_bdev---of 19
get_tree_keyed---of 9
get_tree_nodev---of 9
get_tree_single---of 9
grab_super---of 19
iterate_supers---of 13
iterate_supers_type---of 15
kill_anon_super---of 1
kill_block_super---of 2
kill_litter_super---of 3
kill_super_notify---of 9
mount_bdev---of 12
mount_capable---of 5
mount_nodev---of 7
mount_single---of 14
put_super---of 3
reconfigure_single---of 7
reconfigure_super---of 31
retire_super---of 5
sb_init_dio_done_wq---of 4
set_anon_super---of 5
set_anon_super_fc---of 5
set_bdev_super---of 1
setup_bdev_super---of 25
setup_bdev_super.cold---of 1
sget---of 31
sget_dev---of 1
sget_fc---of 35
super_cache_count---of 7
super_cache_scan---of 14
super_lock48%of 17
super_s_dev_set---of 1
super_s_dev_test---of 3
super_setup_bdi---of 1
super_setup_bdi_name---of 7
super_trylock_shared---of 6
test_bdev_super---of 3
test_keyed_super---of 1
test_single_super---of 1
thaw_super75%of 4
thaw_super_locked10%of 31
user_get_super---of 12
vfs_get_tree---of 11
-----------
SUMMARY32%of 58

serial8250_backup_timeout---of 13
serial8250_get_port---of 1
serial8250_interrupt---of 12
serial8250_probe---of 6
serial8250_register_8250_port---of 78
serial8250_release_rsa_resource---of 3
serial8250_release_rsa_resource.cold---of 2
serial8250_remove---of 5
serial8250_request_rsa_resource---of 4
serial8250_request_rsa_resource.cold---of 2
serial8250_resume---of 5
serial8250_resume_port---of 5
serial8250_set_isa_configurator---of 1
serial8250_setup_port---of 5
serial8250_suspend---of 5
serial8250_suspend_port---of 7
serial8250_timeout---of 3
serial8250_unregister_port---of 6
serial_8250_overrun_backoff_work---of 1
serial_do_unlink---of 13
univ8250_config_port---of 12
univ8250_console_exit---of 1
univ8250_console_match---of 18
univ8250_console_setup---of 9
univ8250_console_write100%of 1
univ8250_release_irq---of 15
univ8250_release_port---of 3
univ8250_request_port---of 5
univ8250_setup_irq---of 22
univ8250_setup_timer---of 10
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__register_nls---of 9
char2uni---of 2
find_nls---of 9
load_nls---of 4
load_nls_default---of 4
uni2char---of 5
unload_nls---of 2
unregister_nls---of 9
utf16s_to_utf8s61%of 23
utf32_to_utf8100%of 12
utf32_to_utf8.cold---of 2
utf8_to_utf32---of 10
utf8s_to_utf16s---of 23
-----------
SUMMARY75%of 35

picolcd_check_bl_fb---of 3
picolcd_exit_backlight100%of 1
picolcd_get_brightness---of 1
picolcd_init_backlight---of 11
picolcd_resume_backlight---of 3
picolcd_set_brightness---of 9
picolcd_suspend_backlight---of 2
-----------
SUMMARY100%of 1

tcx_entry_fetch42%of 17
tcx_link_attach---of 26
tcx_link_dealloc---of 1
tcx_link_detach---of 1
tcx_link_fdinfo---of 4
tcx_link_fill_info---of 3
tcx_link_release---of 26
tcx_link_update---of 13
tcx_prog_attach---of 24
tcx_prog_detach---of 24
tcx_prog_query---of 3
tcx_uninstall5%of 22
-----------
SUMMARY21%of 39

-----------
SUMMARY---of 0

power_supply_create_triggers---of 12
power_supply_remove_triggers67%of 3
power_supply_update_leds---of 16
-----------
SUMMARY67%of 3

NF_HOOK.constprop.0---of 45
__ipv6_dev_mc_dec53%of 17
__ipv6_dev_mc_inc---of 44
__ipv6_sock_mc_close---of 24
__ipv6_sock_mc_close.cold---of 1
__ipv6_sock_mc_join---of 35
__ipv6_sock_mc_join.cold---of 1
add_grec---of 100
add_grhead---of 6
dst_output---of 25
igmp6_cleanup---of 1
igmp6_event_query---of 17
igmp6_event_report---of 17
igmp6_group_added---of 13
igmp6_group_dropped36%of 45
igmp6_group_queried---of 14
igmp6_join_group.part.0---of 15
igmp6_late_cleanup---of 1
igmp6_mc_get_next.isra.0---of 30
igmp6_mc_seq_next---of 1
igmp6_mc_seq_show---of 3
igmp6_mc_seq_start---of 27
igmp6_mc_seq_stop---of 8
igmp6_mcf_get_next.isra.0---of 43
igmp6_mcf_seq_next---of 25
igmp6_mcf_seq_show---of 4
igmp6_mcf_seq_start---of 33
igmp6_mcf_seq_stop---of 10
igmp6_net_exit---of 5
igmp6_net_init---of 17
igmp6_net_init.cold---of 1
igmp6_send---of 65
in6_dev_get---of 22
inet6_mc_check---of 52
inet6_mc_check.cold---of 1
ip6_mc_add_src---of 67
ip6_mc_clear_src29%of 25
ip6_mc_del1_src---of 42
ip6_mc_del_src.isra.0---of 41
ip6_mc_find_dev_rtnl---of 16
ip6_mc_hdr.constprop.0---of 4
ip6_mc_hdr.constprop.0.cold---of 1
ip6_mc_leave_src.isra.0---of 13
ip6_mc_msfget---of 38
ip6_mc_msfget.cold---of 1
ip6_mc_msfilter---of 45
ip6_mc_msfilter.cold---of 1
ip6_mc_source---of 73
ip6_mc_source.cold---of 1
ipv6_chk_mcast_addr---of 53
ipv6_dev_mc_dec---of 12
ipv6_dev_mc_inc---of 1
ipv6_mc_dad_complete---of 14
ipv6_mc_destroy_dev50%of 22
ipv6_mc_down45%of 27
ipv6_mc_init_dev---of 4
ipv6_mc_netdev_event14%of 30
ipv6_mc_remap---of 1
ipv6_mc_unmap---of 13
ipv6_mc_up---of 16
ipv6_sock_mc_close---of 5
ipv6_sock_mc_close.cold---of 1
ipv6_sock_mc_drop---of 31
ipv6_sock_mc_drop.cold---of 1
ipv6_sock_mc_join---of 1
ipv6_sock_mc_join_ssm---of 1
is_in---of 24
ma_put55%of 11
mld_clear_delrec30%of 41
mld_clear_zeros---of 28
mld_dad_work---of 18
mld_del_delrec---of 58
mld_gq_work---of 5
mld_ifc_event29%of 7
mld_ifc_work---of 67
mld_in_v1_mode89%of 9
mld_mca_work---of 4
mld_newpack.isra.0---of 10
mld_query_work---of 171
mld_report_work---of 50
mld_send_initial_cr.part.0---of 15
mld_send_report---of 20
mld_sendpack---of 38
sf_markstate---of 16
sf_setstate---of 72
skb_put_data.constprop.0.isra.0---of 1
-----------
SUMMARY38%of 234

module_add_driver40%of 10
module_remove_driver43%of 7
-----------
SUMMARY42%of 17

acpi_dev_add_driver_gpios---of 4
acpi_dev_gpio_irq_wake_get_by---of 28
acpi_dev_release_driver_gpios---of 2
acpi_dev_remove_driver_gpios---of 2
acpi_find_gpio---of 31
acpi_find_gpio_count---of 4
acpi_get_gpiod_by_index---of 17
acpi_get_gpiod_from_data---of 6
acpi_gpio_adr_space_handler---of 34
acpi_gpio_adr_space_handler.cold---of 2
acpi_gpio_count---of 35
acpi_gpio_get_io_resource---of 5
acpi_gpio_get_irq_resource---of 5
acpi_gpio_in_ignore_list---of 12
acpi_gpio_irq_handler---of 1
acpi_gpio_irq_handler_evt---of 1
acpi_gpio_irq_is_wake---of 6
acpi_gpio_property_lookup---of 18
acpi_gpio_resource_lookup---of 4
acpi_gpio_update_gpiod_flags.isra.0---of 12
acpi_gpiochip_add---of 30
acpi_gpiochip_alloc_event---of 30
acpi_gpiochip_find---of 1
acpi_gpiochip_free_interrupts22%of 19
acpi_gpiochip_remove23%of 18
acpi_gpiochip_request_interrupts---of 14
acpi_gpiochip_request_irq.isra.0---of 11
acpi_populate_gpio_lookup---of 24
acpi_request_own_gpiod---of 7
devm_acpi_dev_add_driver_gpios---of 6
-----------
SUMMARY22%of 37

led_blink_set---of 1
led_blink_set_nosleep---of 5
led_blink_set_oneshot---of 6
led_blink_setup---of 21
led_compose_name---of 52
led_get_default_pattern---of 6
led_init_core---of 1
led_init_default_state_get---of 4
led_set_brightness63%of 8
led_set_brightness_nopm---of 3
led_set_brightness_nopm.part.075%of 4
led_set_brightness_nosleep---of 4
led_set_brightness_sync---of 6
led_stop_software_blink100%of 1
led_sysfs_disable---of 4
led_sysfs_enable---of 4
led_timer_function---of 24
led_update_brightness---of 4
set_brightness_delayed---of 13
set_brightness_delayed_set_brightness.part.0---of 5
-----------
SUMMARY70%of 13

connector_id_show---of 1
dpms_show---of 1
drm_class_device_register---of 5
drm_class_device_unregister---of 1
drm_connector_acpi_bus_match---of 1
drm_connector_acpi_find_companion---of 3
drm_devnode100%of 3
drm_sysfs_connector_add---of 14
drm_sysfs_connector_add_late---of 4
drm_sysfs_connector_hotplug_event---of 3
drm_sysfs_connector_property_event---of 5
drm_sysfs_connector_remove75%of 4
drm_sysfs_connector_remove_early50%of 2
drm_sysfs_destroy---of 3
drm_sysfs_hotplug_event---of 1
drm_sysfs_init---of 8
drm_sysfs_lease_event---of 1
drm_sysfs_minor_alloc---of 8
drm_sysfs_release100%of 1
edid_show---of 7
enabled_show---of 2
modes_show---of 4
status_show---of 1
status_store---of 19
typec_connector_bind---of 5
typec_connector_unbind---of 1
-----------
SUMMARY80%of 10

-----------
SUMMARY---of 0

arp_accept---of 4
arp_constructor---of 53
arp_create---of 14
arp_error_report---of 11
arp_hash---of 1
arp_ifdown100%of 1
arp_ignore---of 4
arp_invalidate---of 17
arp_ioctl---of 40
arp_is_multicast---of 1
arp_key_eq---of 1
arp_mc_map30%of 10
arp_net_exit---of 1
arp_net_init---of 2
arp_netdev_event14%of 15
arp_process---of 136
arp_rcv---of 29
arp_req_delete---of 11
arp_req_set---of 29
arp_req_set_proxy---of 15
arp_send---of 3
arp_send_dst---of 7
arp_seq_show---of 17
arp_seq_start---of 1
arp_solicit---of 66
arp_xmit---of 13
arp_xmit_finish---of 1
neigh_release---of 5
parp_redo---of 1
-----------
SUMMARY24%of 26

hid_debug_event---of 3
hid_debug_events_open---of 12
hid_debug_events_poll---of 7
hid_debug_events_read---of 15
hid_debug_events_release---of 8
hid_debug_exit---of 1
hid_debug_init---of 1
hid_debug_rdesc_open---of 1
hid_debug_rdesc_show---of 21
hid_debug_register---of 1
hid_debug_unregister100%of 1
hid_dump_device---of 8
hid_dump_field---of 38
hid_dump_input---of 2
hid_dump_report---of 5
hid_resolv_usage---of 29
-----------
SUMMARY100%of 1

f81232_break_ctl---of 6
f81232_carrier_raised---of 1
f81232_close---of 3
f81232_dtr_rts---of 3
f81232_get_register---of 4
f81232_get_serial---of 1
f81232_handle_lsr---of 14
f81232_interrupt_work---of 1
f81232_lsr_worker---of 3
f81232_open---of 14
f81232_port_probe---of 3
f81232_process_read_urb---of 14
f81232_read_int_callback---of 16
f81232_read_msr---of 15
f81232_resume---of 6
f81232_set_mctrl---of 17
f81232_set_register---of 3
f81232_set_termios---of 44
f81232_suspend---of 3
f81232_tiocmget---of 1
f81232_tiocmset---of 1
f81232_tx_empty---of 4
f81534a_ctrl_disconnect100%of 1
f81534a_ctrl_enable_all_ports80%of 10
f81534a_ctrl_probe---of 1
f81534a_ctrl_resume---of 1
f81534a_open---of 6
f81534a_port_probe---of 3
f81534a_process_read_urb---of 18
-----------
SUMMARY82%of 11

aa_get_buffer---of 28
aa_get_newest_label15%of 40
aa_put_buffer---of 2
aa_put_buffer.part.0---of 12
aa_sock_msg_perm.constprop.0---of 10
aa_sock_opt_perm.constprop.0---of 7
aa_sock_perm---of 7
apparmor_bprm_committed_creds---of 15
apparmor_bprm_committing_creds---of 9
apparmor_capable62%of 13
apparmor_capget---of 33
apparmor_cred_alloc_blank---of 3
apparmor_cred_free---of 11
apparmor_cred_prepare---of 5
apparmor_cred_transfer---of 5
apparmor_current_getsecid_subj36%of 17
apparmor_dointvec---of 4
apparmor_file_alloc_security43%of 19
apparmor_file_free_security72%of 7
apparmor_file_lock---of 21
apparmor_file_mprotect---of 26
apparmor_file_open30%of 34
apparmor_file_permission40%of 20
apparmor_file_receive---of 29
apparmor_file_truncate100%of 1
apparmor_getprocattr---of 20
apparmor_getselfattr---of 17
apparmor_inet_conn_request---of 4
apparmor_inode_getattr---of 1
apparmor_ip_postroute---of 7
apparmor_mmap_file---of 26
apparmor_move_mount---of 20
apparmor_nf_register---of 1
apparmor_nf_unregister---of 1
apparmor_path_chmod---of 1
apparmor_path_chown---of 1
apparmor_path_link---of 15
apparmor_path_mkdir---of 11
apparmor_path_mknod64%of 11
apparmor_path_rename---of 23
apparmor_path_rmdir---of 12
apparmor_path_symlink---of 11
apparmor_path_truncate---of 1
apparmor_path_unlink---of 12
apparmor_ptrace_access_check---of 24
apparmor_ptrace_traceme---of 24
apparmor_sb_mount---of 31
apparmor_sb_pivotroot---of 23
apparmor_sb_umount---of 20
apparmor_setprocattr---of 3
apparmor_setselfattr---of 3
apparmor_sk_alloc_security---of 6
apparmor_sk_clone_security---of 25
apparmor_sk_free_security---of 13
apparmor_sock_graft---of 13
apparmor_socket_accept---of 10
apparmor_socket_bind---of 10
apparmor_socket_connect---of 10
apparmor_socket_create---of 17
apparmor_socket_getpeername---of 1
apparmor_socket_getpeersec_dgram---of 1
apparmor_socket_getpeersec_stream---of 12
apparmor_socket_getsockname---of 1
apparmor_socket_getsockopt---of 1
apparmor_socket_listen---of 7
apparmor_socket_post_create---of 55
apparmor_socket_recvmsg---of 1
apparmor_socket_sendmsg---of 1
apparmor_socket_setsockopt---of 1
apparmor_socket_shutdown---of 1
apparmor_socket_sock_rcv_skb50%of 4
apparmor_task_alloc---of 19
apparmor_task_free---of 19
apparmor_task_getsecid_obj---of 6
apparmor_task_kill---of 32
apparmor_task_setrlimit---of 20
apparmor_uring_override_creds---of 25
apparmor_uring_sqpoll---of 21
apparmor_userns_create---of 16
audit_uring_cb---of 9
begin_current_label_crit_section31%of 13
common_perm_cond64%of 11
destroy_buffers---of 5
do_setattr---of 33
end_current_label_crit_section28%of 11
param_get_aabool---of 5
param_get_aacompressionlevel---of 5
param_get_aaintbool---of 1
param_get_aalockpolicy---of 5
param_get_aauint---of 5
param_get_audit---of 5
param_get_mode---of 5
param_set_aabool---of 5
param_set_aacompressionlevel---of 4
param_set_aaintbool---of 4
param_set_aalockpolicy---of 5
param_set_aauint---of 4
param_set_audit---of 7
param_set_mode---of 7
profile_uring.constprop.0---of 10
-----------
SUMMARY38%of 201

__kvm_cpuid_base---of 7
__send_ipi_mask---of 34
__send_ipi_mask.cold---of 1
__sysvec_kvm_asyncpf_interrupt---of 4
apf_task_wake_all---of 11
arch_haltpoll_disable---of 4
arch_haltpoll_enable---of 8
fred_sysvec_kvm_asyncpf_interrupt---of 4
kvm_arch_para_features---of 3
kvm_arch_para_hints---of 3
kvm_async_pf_task_wait_schedule---of 20
kvm_async_pf_task_wake---of 24
kvm_cpu_down_prepare---of 8
kvm_cpu_online---of 8
kvm_crash_shutdown---of 1
kvm_disable_host_haltpoll---of 2
kvm_enable_host_haltpoll---of 2
kvm_flush_tlb_multi---of 11
kvm_guest_apic_eoi_write---of 2
kvm_guest_cpu_init---of 28
kvm_guest_cpu_offline---of 22
kvm_kick_cpu---of 1
kvm_para_available---of 3
kvm_pv_guest_cpu_reboot---of 1
kvm_pv_reboot_notify---of 4
kvm_resume---of 6
kvm_send_ipi_mask---of 1
kvm_send_ipi_mask_allbutself---of 3
kvm_smp_send_call_func_ipi---of 10
kvm_steal_clock100%of 4
kvm_suspend---of 9
kvm_wait47%of 13
pv_tlb_flush_supported---of 12
-----------
SUMMARY59%of 17

match_idx---of 1
nfc_activate_target---of 17
nfc_add_se---of 17
nfc_alloc_recv_skb---of 3
nfc_alloc_send_skb---of 3
nfc_allocate_device---of 12
nfc_check_pres_timeout---of 1
nfc_check_pres_work---of 8
nfc_data_exchange---of 21
nfc_deactivate_target---of 11
nfc_dep_link_down---of 11
nfc_dep_link_is_up---of 11
nfc_dep_link_up---of 18
nfc_dev_down---of 12
nfc_dev_up---of 18
nfc_disable_se---of 17
nfc_driver_failure---of 1
nfc_enable_se---of 18
nfc_find_se---of 4
nfc_fw_download---of 10
nfc_fw_download_done---of 1
nfc_get_device---of 3
nfc_get_local_general_bytes---of 5
nfc_register_device---of 18
nfc_release34%of 9
nfc_remove_se---of 12
nfc_rfkill_set_block---of 7
nfc_se_connectivity---of 4
nfc_se_transaction---of 5
nfc_set_remote_general_bytes---of 6
nfc_start_poll---of 12
nfc_stop_poll---of 8
nfc_target_lost---of 16
nfc_targets_found---of 13
nfc_tm_activated---of 6
nfc_tm_data_received---of 4
nfc_tm_deactivated---of 1
nfc_unregister_device43%of 14
-----------
SUMMARY40%of 23

-----------
SUMMARY---of 0

__bpf_trace_csd_function---of 1
__bpf_trace_csd_queue_cpu---of 1
__csd_lock_record---of 3
__flush_smp_call_function_queue50%of 48
__smp_call_single_queue58%of 7
__traceiter_csd_function_entry---of 3
__traceiter_csd_function_exit---of 3
__traceiter_csd_queue_cpu---of 3
csd_lock_wait_toolong---of 30
flush_smp_call_function_queue---of 11
generic_exec_single59%of 24
generic_smp_call_function_single_interrupt100%of 1
kick_all_cpus_sync---of 2
on_each_cpu_cond_mask50%of 2
perf_trace_csd_function---of 5
perf_trace_csd_queue_cpu---of 5
smp_call_function---of 2
smp_call_function_any---of 21
smp_call_function_many---of 1
smp_call_function_many_cond61%of 101
smp_call_function_single40%of 28
smp_call_function_single_async---of 6
smp_call_on_cpu---of 8
smp_call_on_cpu_callback---of 5
smpcfd_dead_cpu---of 1
smpcfd_dying_cpu---of 1
smpcfd_prepare_cpu---of 2
trace_csd_function_entry34%of 15
trace_csd_function_exit34%of 15
trace_csd_queue_cpu34%of 15
trace_event_raw_event_csd_function---of 6
trace_event_raw_event_csd_queue_cpu---of 6
trace_ipi_send_cpu.constprop.034%of 15
trace_raw_output_csd_function---of 5
trace_raw_output_csd_queue_cpu---of 5
wake_up_all_idle_cpus---of 11
-----------
SUMMARY51%of 271

calc_file_id_hash.constprop.0---of 3
ima_appraise_measurement---of 124
ima_check_blacklist---of 4
ima_fix_xattr---of 4
ima_get_cache_status---of 11
ima_get_hash_algo---of 17
ima_inode_post_setattr17%of 6
ima_inode_remove_acl---of 5
ima_inode_removexattr---of 10
ima_inode_set_acl---of 5
ima_inode_setxattr---of 23
ima_inode_setxattr.cold---of 1
ima_must_appraise---of 3
ima_read_xattr---of 2
ima_reset_appraise_flags.part.0---of 4
ima_update_xattr---of 7
is_ima_appraise_enabled---of 1
-----------
SUMMARY17%of 6

__snd_usbmidi_create---of 94
__snd_usbmidi_create.cold---of 1
ch345_broken_sysex_input---of 14
roland_load_get---of 1
roland_load_info---of 1
roland_load_put---of 5
send_bulk_static_data.isra.0---of 4
snd_usbmidi_akai_input---of 6
snd_usbmidi_akai_output---of 26
snd_usbmidi_cme_input---of 4
snd_usbmidi_create_endpoints_midiman---of 30
snd_usbmidi_detect_endpoints---of 30
snd_usbmidi_disconnect28%of 18
snd_usbmidi_do_output---of 15
snd_usbmidi_do_output.cold---of 2
snd_usbmidi_emagic_finish_out---of 1
snd_usbmidi_emagic_init_out---of 1
snd_usbmidi_emagic_input---of 17
snd_usbmidi_emagic_output---of 15
snd_usbmidi_error_timer---of 13
snd_usbmidi_ftdi_input---of 2
snd_usbmidi_get_ms_info---of 53
snd_usbmidi_get_ms_info.cold---of 2
snd_usbmidi_get_port_info---of 5
snd_usbmidi_in_endpoint_create---of 22
snd_usbmidi_in_endpoint_delete---of 4
snd_usbmidi_in_urb_complete---of 8
snd_usbmidi_init_substream---of 31
snd_usbmidi_input_close---of 1
snd_usbmidi_input_data---of 5
snd_usbmidi_input_open---of 1
snd_usbmidi_input_start---of 11
snd_usbmidi_input_stop---of 8
snd_usbmidi_input_trigger---of 3
snd_usbmidi_maudio_broken_running_status_input---of 13
snd_usbmidi_midiman_input---of 4
snd_usbmidi_novation_input---of 4
snd_usbmidi_novation_output---of 4
snd_usbmidi_out_endpoint_clear---of 3
snd_usbmidi_out_endpoint_create---of 30
snd_usbmidi_out_urb_complete---of 8
snd_usbmidi_out_urb_complete.cold---of 2
snd_usbmidi_out_work---of 1
snd_usbmidi_output_close---of 1
snd_usbmidi_output_drain---of 8
snd_usbmidi_output_midiman_packet---of 1
snd_usbmidi_output_open---of 14
snd_usbmidi_output_standard_packet---of 1
snd_usbmidi_output_trigger---of 4
snd_usbmidi_raw_input---of 1
snd_usbmidi_raw_output---of 4
snd_usbmidi_rawmidi_free72%of 7
snd_usbmidi_resume---of 1
snd_usbmidi_standard_input---of 4
snd_usbmidi_standard_output---of 39
snd_usbmidi_suspend---of 9
snd_usbmidi_urb_error---of 5
snd_usbmidi_us122l_input---of 6
snd_usbmidi_us122l_output---of 5
substream_open.isra.0---of 31
-----------
SUMMARY40%of 25

hcd_buffer_alloc---of 12
hcd_buffer_alloc_pages---of 7
hcd_buffer_create---of 7
hcd_buffer_destroy---of 1
hcd_buffer_free37%of 11
hcd_buffer_free_pages---of 6
-----------
SUMMARY37%of 11

-----------
SUMMARY---of 0

attr_dragging_show_tpkbd---of 1
attr_dragging_store_tpkbd---of 4
attr_fn_lock_show---of 1
attr_fn_lock_store---of 8
attr_middleclick_workaround_show_cptkbd---of 1
attr_middleclick_workaround_store_cptkbd---of 4
attr_press_speed_show_tpkbd---of 1
attr_press_speed_store_tpkbd---of 4
attr_press_to_select_show_tpkbd---of 1
attr_press_to_select_store_tpkbd---of 4
attr_release_to_select_show_tpkbd---of 1
attr_release_to_select_store_tpkbd---of 4
attr_select_right_show_tpkbd---of 1
attr_select_right_store_tpkbd---of 4
attr_sensitivity_show_cptkbd---of 1
attr_sensitivity_show_tpkbd---of 1
attr_sensitivity_store_cptkbd---of 4
attr_sensitivity_store_tpkbd---of 4
lenovo_event---of 23
lenovo_features_set_cptkbd---of 8
lenovo_features_set_tpkbd.isra.0---of 5
lenovo_input_configured---of 5
lenovo_input_mapping---of 211
lenovo_led_brightness_set---of 7
lenovo_led_set_tp10ubkbd---of 5
lenovo_probe---of 39
lenovo_raw_event---of 7
lenovo_register_leds---of 13
lenovo_remove56%of 9
lenovo_report_fixup---of 8
lenovo_reset_resume---of 5
lenovo_send_cmd_cptkbd---of 6
lenovo_tp10ubkbd_sync_fn_lock---of 1
-----------
SUMMARY56%of 9

-----------
SUMMARY---of 0

__lock_sock---of 7
__lock_sock_fast---of 3
__receive_sock---of 44
__release_sock---of 20
__sk_backlog_rcv---of 8
__sk_destruct---of 43
__sk_dst_check---of 17
__sk_flush_backlog---of 5
__sk_free---of 11
__sk_mem_raise_allocated---of 75
__sk_mem_reclaim---of 1
__sk_mem_reduce_allocated---of 10
__sk_mem_schedule---of 3
__sk_receive_skb---of 37
__sock_cmsg_send---of 12
__sock_i_ino---of 3
__sock_queue_rcv_skb---of 32
__sock_set_timestamps---of 9
__sock_wfree---of 5
copy_from_sockptr_offset.constprop.0---of 4
copy_to_sockptr_offset---of 7
lock_sock_nested---of 3
proto_exit_net---of 1
proto_init_net---of 2
proto_register---of 30
proto_seq_next---of 1
proto_seq_show---of 29
proto_seq_start---of 1
proto_seq_stop---of 1
proto_unregister---of 8
release_sock---of 11
sk_alloc---of 53
sk_busy_loop_end---of 7
sk_capable---of 4
sk_clear_memalloc---of 5
sk_clone_lock---of 44
sk_clone_lock.cold---of 1
sk_common_release---of 14
sk_destruct---of 5
sk_dst_check---of 31
sk_error_report7%of 16
sk_free---of 5
sk_free_unlock_clone---of 5
sk_get_meminfo---of 1
sk_getsockopt---of 172
sk_init_common---of 1
sk_ioctl---of 15
sk_leave_memory_pressure---of 7
sk_mc_loop---of 9
sk_net_capable---of 4
sk_ns_capable---of 4
sk_page_frag_refill---of 8
sk_prot_alloc---of 15
sk_reset_timer---of 6
sk_send_sigurg---of 20
sk_set_memalloc---of 1
sk_set_peek_off---of 1
sk_setsockopt---of 246
sk_setup_caps---of 22
sk_stop_timer---of 3
sk_stop_timer_sync---of 3
sk_wait_data---of 11
skb_dst_force.isra.0---of 17
skb_orphan_partial---of 27
skb_page_frag_refill---of 22
skb_set_owner_w---of 18
skb_set_owner_w.cold---of 1
sock_alloc_send_pskb---of 25
sock_bind_add---of 3
sock_bindtoindex---of 5
sock_bindtoindex_locked---of 7
sock_cmsg_send---of 10
sock_common_getsockopt---of 1
sock_common_recvmsg---of 3
sock_common_setsockopt---of 1
sock_copy_user_timeval---of 20
sock_def_error_report28%of 36
sock_def_readable32%of 51
sock_def_wakeup---of 19
sock_def_write_space---of 37
sock_efree---of 8
sock_enable_timestamp---of 4
sock_enable_timestamps---of 1
sock_get_timeout---of 10
sock_gettstamp---of 9
sock_i_ino---of 3
sock_i_uid---of 3
sock_init_data---of 4
sock_init_data_uid---of 7
sock_inuse_exit_net---of 1
sock_inuse_get---of 7
sock_inuse_init_net---of 2
sock_ioctl_inout---of 6
sock_kfree_s---of 3
sock_kmalloc---of 5
sock_kzfree_s---of 3
sock_load_diag_module---of 9
sock_lock_init---of 3
sock_no_accept---of 1
sock_no_bind---of 1
sock_no_connect---of 1
sock_no_getname---of 1
sock_no_ioctl---of 1
sock_no_linger---of 1
sock_no_listen---of 1
sock_no_mmap---of 1
sock_no_recvmsg---of 1
sock_no_sendmsg---of 1
sock_no_sendmsg_locked---of 1
sock_no_shutdown---of 1
sock_no_socketpair---of 1
sock_ofree---of 1
sock_omalloc---of 4
sock_pfree---of 18
sock_pfree.cold---of 1
sock_prot_inuse_get---of 8
sock_queue_rcv_skb_reason---of 5
sock_recv_errqueue---of 18
sock_rfree---of 6
sock_set_keepalive---of 3
sock_set_mark---of 3
sock_set_priority---of 1
sock_set_rcvbuf---of 1
sock_set_reuseaddr---of 1
sock_set_reuseport---of 1
sock_set_sndtimeo---of 5
sock_set_timeout---of 12
sock_set_timestamp---of 5
sock_set_timestamping---of 37
sock_set_timestamping.cold---of 1
sock_setsockopt---of 1
sock_wfree30%of 58
sock_wmalloc---of 5
sockopt_capable---of 4
sockopt_lock_sock---of 2
sockopt_ns_capable---of 4
sockopt_release_sock---of 2
-----------
SUMMARY28%of 161

fill_inquiry_response---of 4
release_everything100%of 5
storage_probe---of 51
usb_stor_adjust_quirks---of 41
usb_stor_control_thread---of 29
usb_stor_disconnect100%of 5
usb_stor_post_reset---of 1
usb_stor_pre_reset---of 1
usb_stor_probe1---of 47
usb_stor_probe2---of 32
usb_stor_reset_resume---of 1
usb_stor_resume---of 3
usb_stor_scan_dwork---of 10
usb_stor_suspend---of 3
-----------
SUMMARY100%of 10

__bus_removed_driver---of 1
__hid_bus_driver_added---of 3
__hid_bus_reprobe_drivers---of 5
__hid_register_driver---of 4
__hid_request---of 10
fetch_item---of 13
hid_add_device---of 31
hid_add_field---of 36
hid_add_usage---of 8
hid_alloc_report_buf---of 1
hid_allocate_device---of 5
hid_bus_match---of 1
hid_check_keys_pressed100%of 7
hid_close_report100%of 9
hid_compare_device_paths---of 6
hid_concatenate_last_usage_page---of 7
hid_connect---of 80
hid_destroy_device100%of 3
hid_device_probe---of 16
hid_device_release40%of 5
hid_device_remove100%of 6
hid_disconnect100%of 7
hid_driver_reset_resume---of 5
hid_driver_resume---of 5
hid_driver_suspend---of 5
hid_field_extract78%of 9
hid_field_extract.cold---of 1
hid_get_report38%of 8
hid_hw_close100%of 3
hid_hw_open84%of 6
hid_hw_output_report---of 7
hid_hw_raw_request---of 6
hid_hw_request67%of 3
hid_hw_start---of 5
hid_hw_stop100%of 1
hid_input_array_field100%of 22
hid_input_report48%of 23
hid_match_device---of 13
hid_match_id---of 11
hid_match_one_id---of 10
hid_open_report---of 28
hid_output_report---of 11
hid_parse_report---of 3
hid_parser_global---of 82
hid_parser_local---of 37
hid_parser_main---of 28
hid_parser_reserved---of 4
hid_process_event25%of 24
hid_register_report---of 10
hid_report_raw_event68%of 59
hid_scan_main---of 38
hid_set_field---of 8
hid_setup_resolution_multiplier---of 31
hid_snto32---of 11
hid_uevent100%of 6
hid_unregister_driver---of 5
hid_validate_values---of 14
hiddev_free---of 1
implement---of 10
modalias_show---of 1
new_id_store---of 8
read_report_descriptor---of 6
s32ton---of 10
s32ton.cold---of 5
show_country---of 1
snto32---of 11
-----------
SUMMARY70%of 201

-----------
SUMMARY---of 0

kobj_lookup77%of 13
kobj_map---of 11
kobj_map_init---of 7
kobj_unmap100%of 8
-----------
SUMMARY86%of 21

raremono_cmd_main---of 8
raremono_device_release100%of 1
usb_raremono_disconnect100%of 1
usb_raremono_probe---of 19
vidioc_enum_freq_bands---of 4
vidioc_g_frequency---of 3
vidioc_g_tuner---of 5
vidioc_querycap---of 1
vidioc_s_frequency---of 8
vidioc_s_tuner---of 2
-----------
SUMMARY100%of 2

__of_add_property---of 14
__of_device_is_compatible---of 22
__of_device_is_status---of 12
__of_find_all_nodes---of 8
__of_find_node_by_full_path---of 6
__of_find_node_by_path---of 10
__of_get_property---of 8
__of_node_is_type---of 8
__of_parse_phandle_with_args---of 9
__of_phandle_cache_inv_entry---of 4
__of_remove_property---of 14
__of_update_property---of 16
of_add_property---of 1
of_alias_from_compatible---of 7
of_alias_get_highest_id---of 5
of_alias_get_id---of 6
of_alias_scan---of 29
of_bus_n_addr_cells---of 5
of_bus_n_size_cells---of 5
of_console_check---of 6
of_count_phandle_with_args---of 7
of_count_phandle_with_args.part.0---of 4
of_device_compatible_match---of 5
of_device_is_available---of 9
of_device_is_big_endian---of 8
of_device_is_compatible---of 1
of_find_all_nodes---of 6
of_find_compatible_node---of 16
of_find_last_cache_level---of 3
of_find_matching_node_and_match---of 30
of_find_next_cache_node---of 7
of_find_node_by_name---of 16
of_find_node_by_phandle---of 16
of_find_node_by_type---of 16
of_find_node_opts_by_path---of 22
of_find_node_with_property---of 15
of_find_property---of 7
of_get_child_by_name---of 6
of_get_compatible_child---of 6
of_get_next_available_child---of 14
of_get_next_child40%of 5
of_get_next_cpu_node---of 10
of_get_next_parent---of 4
of_get_next_reserved_child---of 10
of_get_parent---of 4
of_get_property---of 10
of_machine_compatible_match---of 6
of_map_id---of 34
of_match_node---of 7
of_n_addr_cells---of 7
of_n_size_cells---of 7
of_node_name_eq---of 3
of_node_name_eq.part.0.isra.0---of 5
of_node_name_prefix---of 6
of_parse_phandle_with_args_map---of 49
of_phandle_iterator_args---of 5
of_phandle_iterator_init---of 6
of_phandle_iterator_next---of 25
of_print_phandle_args---of 4
of_remove_property---of 3
of_update_property---of 3
-----------
SUMMARY40%of 5

-----------
SUMMARY---of 0

__do_sys_adjtimex---of 4
__do_sys_adjtimex_time32---of 5
__ia32_compat_sys_gettimeofday---of 8
__ia32_compat_sys_settimeofday---of 10
__ia32_sys_adjtimex---of 1
__ia32_sys_adjtimex_time32---of 1
__ia32_sys_gettimeofday---of 8
__ia32_sys_settimeofday---of 10
__ia32_sys_stime---of 5
__ia32_sys_stime32---of 5
__ia32_sys_time---of 3
__ia32_sys_time32---of 4
__msecs_to_jiffies100%of 3
__usecs_to_jiffies---of 3
__x64_compat_sys_gettimeofday---of 8
__x64_compat_sys_settimeofday---of 10
__x64_sys_adjtimex---of 1
__x64_sys_adjtimex_time32---of 1
__x64_sys_gettimeofday---of 8
__x64_sys_settimeofday---of 10
__x64_sys_stime---of 5
__x64_sys_stime32---of 5
__x64_sys_time---of 3
__x64_sys_time32---of 4
clock_t_to_jiffies---of 1
do_sys_settimeofday64---of 14
get_itimerspec64---of 3
get_old_itimerspec32---of 5
get_old_timespec32---of 6
get_old_timex32---of 3
get_timespec6484%of 6
jiffies64_to_msecs---of 1
jiffies64_to_nsecs---of 1
jiffies_64_to_clock_t---of 1
jiffies_to_clock_t100%of 1
jiffies_to_msecs100%of 1
jiffies_to_timespec64---of 1
jiffies_to_usecs---of 1
mktime64---of 3
ns_to_kernel_old_timeval---of 5
ns_to_timespec6460%of 5
nsec_to_clock_t---of 1
nsecs_to_jiffies---of 1
nsecs_to_jiffies64---of 1
put_itimerspec64---of 5
put_old_itimerspec32---of 5
put_old_timespec32---of 5
put_old_timex32---of 2
put_timespec64---of 2
set_normalized_timespec64---of 5
timespec64_add_safe---of 7
timespec64_to_jiffies---of 3
-----------
SUMMARY82%of 16

__lockup_detector_cleanup---of 5
__lockup_detector_reconfigure---of 15
arch_touch_nmi_watchdog100%of 1
lockup_detector_cleanup---of 1
lockup_detector_offline_cpu---of 6
lockup_detector_online_cpu---of 6
lockup_detector_reconfigure---of 1
lockup_detector_soft_poweroff---of 1
lockup_detector_update_enable---of 6
proc_nmi_watchdog---of 4
proc_soft_watchdog---of 1
proc_watchdog---of 1
proc_watchdog_common---of 6
proc_watchdog_cpumask---of 4
proc_watchdog_thresh---of 5
softlockup_fn---of 1
softlockup_start_fn---of 1
softlockup_stop_fn---of 1
touch_all_softlockup_watchdogs---of 5
touch_softlockup_watchdog100%of 1
touch_softlockup_watchdog_sched---of 1
touch_softlockup_watchdog_sync---of 1
watchdog_disable---of 3
watchdog_enable---of 4
watchdog_hardlockup_check---of 42
watchdog_hardlockup_touch_cpu---of 1
watchdog_timer_fn---of 38
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

picolcd_operation_mode_delay_show---of 1
picolcd_operation_mode_delay_store---of 4
picolcd_operation_mode_show---of 5
picolcd_operation_mode_store---of 16
picolcd_probe---of 94
picolcd_raw_event---of 37
picolcd_remove58%of 7
picolcd_report---of 6
picolcd_reset---of 21
picolcd_reset_resume---of 13
picolcd_resume---of 5
picolcd_send_and_wait---of 20
picolcd_suspend---of 4
-----------
SUMMARY58%of 7

i_usx2y_in04_int---of 30
i_usx2y_out04_int---of 4
snd_usx2y_card_private_free100%of 4
snd_usx2y_disconnect84%of 6
snd_usx2y_probe---of 17
usx2y_async_seq04_init---of 10
usx2y_in04_init---of 10
usx2y_unlinkseq75%of 4
-----------
SUMMARY86%of 14

i2c_tiny_usb_disconnect50%of 2
i2c_tiny_usb_probe---of 9
usb_func---of 5
usb_read.isra.0---of 3
usb_write.isra.0---of 3
usb_xfer---of 25
-----------
SUMMARY50%of 2

__ima_inode_hash---of 19
ima_bprm_check---of 3
ima_check_last_writer---of 12
ima_file_check100%of 1
ima_file_free75%of 4
ima_file_hash---of 3
ima_file_mmap---of 6
ima_file_mprotect---of 8
ima_get_current_hash_algo---of 1
ima_inode_hash---of 3
ima_kernel_module_request---of 2
ima_kexec_cmdline---of 5
ima_load_data---of 11
ima_measure_critical_data---of 6
ima_post_create_tmpfile---of 5
ima_post_load_data---of 4
ima_post_path_mknod---of 5
ima_post_read_file---of 9
ima_read_file---of 5
mmap_violation_check---of 7
process_buffer_measurement---of 28
process_measurement12%of 126
process_measurement.cold---of 2
-----------
SUMMARY15%of 131

misc_deregister---of 9
misc_devnode---of 6
misc_open58%of 21
misc_register---of 20
misc_seq_next---of 1
misc_seq_show---of 2
misc_seq_start---of 1
misc_seq_stop---of 1
-----------
SUMMARY58%of 21

__bpf_trace_i2c_read---of 1
__bpf_trace_i2c_reply---of 1
__bpf_trace_i2c_result---of 1
__bpf_trace_i2c_write---of 1
__i2c_add_numbered_adapter---of 5
__i2c_check_addr_busy---of 8
__i2c_transfer10%of 143
__process_new_adapter---of 1
__process_new_driver---of 4
__process_removed_adapter100%of 1
__process_removed_driver---of 4
__traceiter_i2c_read---of 3
__traceiter_i2c_reply---of 3
__traceiter_i2c_result---of 3
__traceiter_i2c_write---of 3
__unregister_client84%of 6
__unregister_dummy---of 3
delete_device_store---of 21
devm_i2c_add_adapter---of 5
devm_i2c_del_adapter---of 1
devm_i2c_new_dummy_device---of 4
devm_i2c_release_dummy---of 1
dummy_probe---of 1
get_scl_gpio_value---of 1
get_sda_gpio_value---of 1
i2c_adapter_depth56%of 9
i2c_adapter_dev_release100%of 1
i2c_adapter_lock_bus100%of 1
i2c_adapter_trylock_bus---of 1
i2c_adapter_unlock_bus100%of 1
i2c_add_adapter---of 8
i2c_add_numbered_adapter---of 5
i2c_check_7bit_addr_validity_strict---of 2
i2c_check_addr_busy---of 6
i2c_check_mux_children---of 11
i2c_check_mux_parents---of 7
i2c_client_dev_release100%of 1
i2c_client_get_device_id---of 7
i2c_clients_command---of 1
i2c_cmd---of 6
i2c_default_probe---of 12
i2c_del_adapter32%of 19
i2c_del_driver---of 2
i2c_detect.isra.0---of 30
i2c_dev_irq_from_resources---of 8
i2c_dev_or_parent_fwnode_match---of 5
i2c_device_match---of 12
i2c_device_probe---of 51
i2c_device_remove67%of 6
i2c_device_shutdown---of 7
i2c_device_uevent80%of 5
i2c_do_del_adapter13%of 8
i2c_find_adapter_by_fwnode---of 7
i2c_find_device_by_fwnode---of 7
i2c_for_each_dev---of 1
i2c_freq_mode_string---of 3
i2c_generic_scl_recovery---of 26
i2c_get_adapter---of 4
i2c_get_adapter_by_fwnode---of 5
i2c_get_device_id---of 4
i2c_get_dma_safe_msg_buf---of 14
i2c_get_match_data---of 11
i2c_handle_smbus_host_notify---of 6
i2c_host_notify_irq_map---of 1
i2c_match_id---of 7
i2c_new_ancillary_device---of 7
i2c_new_client_device---of 47
i2c_new_dummy_device---of 1
i2c_new_scanned_device---of 16
i2c_parse_fw_timings---of 4
i2c_parse_timing---of 5
i2c_probe_func_quick_read---of 1
i2c_put_adapter---of 2
i2c_put_dma_safe_msg_buf---of 6
i2c_recover_bus---of 5
i2c_register_adapter---of 68
i2c_register_driver---of 8
i2c_transfer34%of 12
i2c_transfer_buffer_flags100%of 2
i2c_transfer_trace_reg---of 1
i2c_transfer_trace_unreg---of 1
i2c_unregister_device50%of 10
i2c_verify_adapter---of 3
i2c_verify_client---of 3
modalias_show---of 8
name_show---of 4
new_device_store---of 21
perf_trace_i2c_read---of 5
perf_trace_i2c_reply---of 5
perf_trace_i2c_result---of 5
perf_trace_i2c_write---of 5
set_scl_gpio_value---of 1
set_sda_gpio_value---of 1
trace_event_raw_event_i2c_read---of 6
trace_event_raw_event_i2c_reply---of 6
trace_event_raw_event_i2c_result---of 6
trace_event_raw_event_i2c_write---of 6
trace_raw_output_i2c_read---of 5
trace_raw_output_i2c_reply---of 4
trace_raw_output_i2c_result---of 5
trace_raw_output_i2c_write---of 4
-----------
SUMMARY24%of 225

-----------
SUMMARY---of 0

__kernfs_new_node52%of 25
__kernfs_remove80%of 29
kernfs_activate---of 5
kernfs_activate_one75%of 12
kernfs_active80%of 5
kernfs_add_one77%of 17
kernfs_break_active_protection---of 1
kernfs_create_dir_ns75%of 4
kernfs_create_empty_dir---of 5
kernfs_create_root---of 10
kernfs_destroy_root---of 5
kernfs_dir_fop_release---of 4
kernfs_dir_pos---of 22
kernfs_dop_revalidate---of 19
kernfs_drain86%of 21
kernfs_find_and_get_node_by_id---of 27
kernfs_find_and_get_ns100%of 4
kernfs_find_ns85%of 20
kernfs_fop_readdir---of 22
kernfs_free_rcu---of 3
kernfs_get100%of 2
kernfs_get.part.067%of 3
kernfs_get_active---of 11
kernfs_get_parent---of 3
kernfs_iop_lookup---of 11
kernfs_iop_mkdir---of 6
kernfs_iop_rename---of 11
kernfs_iop_rmdir---of 8
kernfs_link_sibling78%of 18
kernfs_name---of 1
kernfs_name_hash80%of 5
kernfs_name_locked---of 12
kernfs_new_node64%of 11
kernfs_next_descendant_post75%of 16
kernfs_node_from_dentry---of 5
kernfs_path_from_node---of 1
kernfs_path_from_node_locked---of 73
kernfs_put100%of 3
kernfs_put.part.050%of 14
kernfs_put_active---of 6
kernfs_remove100%of 3
kernfs_remove_by_name_ns88%of 8
kernfs_remove_self---of 12
kernfs_rename_ns---of 20
kernfs_root_to_node---of 1
kernfs_show---of 10
kernfs_unbreak_active_protection---of 2
kernfs_unlink_sibling100%of 7
kernfs_walk_and_get_ns---of 21
pr_cont_kernfs_name---of 1
pr_cont_kernfs_path---of 6
-----------
SUMMARY76%of 227

__bpf_trace_signal_deliver---of 1
__bpf_trace_signal_generate---of 1
__compat_save_altstack---of 1
__copy_siginfo_from_user32---of 3
__copy_siginfo_to_user32---of 2
__do_compat_sys_rt_sigaction---of 12
__do_sys_pidfd_send_signal---of 41
__flush_itimer_signals---of 14
__flush_itimer_signals.cold---of 2
__ia32_compat_sys_rt_sigaction---of 1
__ia32_compat_sys_rt_sigpending---of 8
__ia32_compat_sys_rt_sigprocmask---of 10
__ia32_compat_sys_rt_sigqueueinfo---of 7
__ia32_compat_sys_rt_sigsuspend---of 4
__ia32_compat_sys_rt_sigtimedwait_time32---of 8
__ia32_compat_sys_rt_sigtimedwait_time64---of 8
__ia32_compat_sys_rt_tgsigqueueinfo---of 4
__ia32_compat_sys_sigaction---of 16
__ia32_compat_sys_sigaltstack---of 1
__ia32_compat_sys_sigpending---of 1
__ia32_sys_kill---of 1
__ia32_sys_pidfd_send_signal---of 1
__ia32_sys_restart_syscall---of 1
__ia32_sys_rt_sigaction---of 10
__ia32_sys_rt_sigpending---of 3
__ia32_sys_rt_sigprocmask---of 8
__ia32_sys_rt_sigqueueinfo---of 8
__ia32_sys_rt_sigsuspend---of 4
__ia32_sys_rt_sigtimedwait---of 9
__ia32_sys_rt_sigtimedwait_time32---of 9
__ia32_sys_rt_tgsigqueueinfo---of 5
__ia32_sys_sigaltstack---of 10
__ia32_sys_signal---of 4
__ia32_sys_sigpending---of 2
__ia32_sys_sigprocmask---of 11
__ia32_sys_sigsuspend---of 1
__ia32_sys_ssetmask---of 3
__ia32_sys_tgkill---of 4
__ia32_sys_tkill---of 3
__kill_pgrp_info---of 7
__lock_task_sighand---of 19
__save_altstack---of 1
__send_signal_locked31%of 73
__send_signal_locked.cold---of 3
__set_current_blocked---of 2
__set_task_blocked---of 5
__sigqueue_alloc43%of 35
__traceiter_signal_deliver---of 3
__traceiter_signal_generate---of 3
__x64_compat_sys_rt_sigaction---of 1
__x64_compat_sys_rt_sigpending---of 8
__x64_compat_sys_rt_sigprocmask---of 10
__x64_compat_sys_rt_sigqueueinfo---of 7
__x64_compat_sys_rt_sigsuspend---of 4
__x64_compat_sys_rt_sigtimedwait_time32---of 8
__x64_compat_sys_rt_sigtimedwait_time64---of 8
__x64_compat_sys_rt_tgsigqueueinfo---of 4
__x64_compat_sys_sigaction---of 18
__x64_compat_sys_sigaltstack---of 1
__x64_compat_sys_sigpending---of 1
__x64_sys_kill---of 1
__x64_sys_pause---of 4
__x64_sys_pidfd_send_signal---of 1
__x64_sys_rt_sigaction---of 10
__x64_sys_rt_sigpending---of 3
__x64_sys_rt_sigprocmask---of 8
__x64_sys_rt_sigqueueinfo---of 8
__x64_sys_rt_sigsuspend---of 4
__x64_sys_rt_sigtimedwait---of 9
__x64_sys_rt_sigtimedwait_time32---of 9
__x64_sys_rt_tgsigqueueinfo---of 5
__x64_sys_sgetmask---of 1
__x64_sys_sigaltstack---of 10
__x64_sys_signal---of 4
__x64_sys_sigpending---of 2
__x64_sys_sigprocmask---of 11
__x64_sys_sigsuspend---of 1
__x64_sys_ssetmask---of 3
__x64_sys_tgkill---of 4
__x64_sys_tkill---of 3
calculate_sigpending---of 1
check_kill_permission---of 21
collect_signal---of 18
collect_signal.cold---of 1
compat_restore_altstack---of 2
complete_signal25%of 44
complete_signal.cold---of 3
copy_siginfo100%of 1
copy_siginfo_from_user---of 3
copy_siginfo_from_user32---of 3
copy_siginfo_to_external32---of 13
copy_siginfo_to_user80%of 5
dequeue_signal---of 20
do_compat_sigaltstack---of 10
do_no_restart_syscall---of 1
do_notify_parent---of 68
do_notify_parent_cldstop---of 51
do_notify_pidfd---of 3
do_rt_tgsigqueueinfo---of 7
do_send_sig_info---of 3
do_send_specific---of 19
do_sigaction---of 23
do_sigaltstack.constprop.0---of 21
do_signal_stop---of 28
do_sigpending---of 1
do_sigtimedwait---of 13
exit_signals---of 40
flush_itimer_signals---of 1
flush_signal_handlers---of 5
flush_signals---of 1
flush_sigqueue---of 9
flush_sigqueue_mask---of 11
flush_sigqueue_mask.cold---of 1
force_exit_sig---of 1
force_fatal_sig---of 1
force_sig100%of 1
force_sig_bnderr---of 1
force_sig_fault100%of 1
force_sig_fault_to_task---of 1
force_sig_fault_trapno---of 1
force_sig_info---of 1
force_sig_info_to_task32%of 19
force_sig_info_to_task.cold---of 2
force_sig_mceerr---of 3
force_sig_pkuerr---of 1
force_sig_ptrace_errno_trap---of 1
force_sig_seccomp---of 2
force_sigsegv---of 4
get_signal26%of 143
get_signal.cold---of 3
group_send_sig_info---of 16
ignore_signals---of 2
kernel_sigaction---of 4
kernel_sigaction.cold---of 1
kill_pgrp---of 1
kill_pid---of 1
kill_pid_info---of 1
kill_pid_info_type---of 17
kill_pid_usb_asyncio---of 27
kill_proc_info---of 11
kill_something_info---of 16
lockdep_assert_task_sighand_held---of 21
next_signal---of 4
perf_trace_signal_deliver---of 10
perf_trace_signal_generate---of 10
post_copy_siginfo_from_user.isra.0---of 14
post_copy_siginfo_from_user32---of 17
prepare_kill_siginfo---of 2
prepare_signal29%of 50
prepare_signal.cold---of 3
ptrace_do_notify---of 4
ptrace_notify---of 5
ptrace_stop.part.0---of 20
ptrace_trap_notify---of 8
recalc_sigpending75%of 8
restore_altstack---of 3
retarget_shared_pending.isra.0---of 13
send_sig---of 5
send_sig_fault---of 5
send_sig_fault_trapno---of 5
send_sig_info---of 5
send_sig_mceerr---of 5
send_sig_perf---of 3
send_signal_locked49%of 35
send_sigqueue---of 33
send_sigqueue.cold---of 1
set_compat_user_sigmask---of 6
set_current_blocked---of 2
set_user_sigmask---of 6
siginfo_layout75%of 16
signal_setup_done47%of 15
signal_setup_done.cold---of 1
signal_wake_up_state80%of 5
sigprocmask---of 9
sigqueue_alloc---of 1
sigqueue_free---of 9
sigsuspend---of 6
task_clear_jobctl_pending---of 7
task_clear_jobctl_trapping---of 2
task_join_group_stop---of 4
task_participate_group_stop---of 14
task_set_jobctl_pending---of 11
trace_event_raw_event_signal_deliver---of 11
trace_event_raw_event_signal_generate---of 11
trace_raw_output_signal_deliver---of 5
trace_raw_output_signal_generate---of 4
trace_signal_deliver34%of 15
trace_signal_generate34%of 15
unhandled_signal---of 6
zap_other_threads---of 16
-----------
SUMMARY35%of 481

__acpi_node_get_property_reference---of 31
acpi_data_add_props---of 4
acpi_data_get_property---of 15
acpi_data_prop_read---of 73
acpi_destroy_nondev_subnodes---of 5
acpi_dev_get_property---of 3
acpi_enumerate_nondev_subnodes---of 18
acpi_extract_properties.part.0---of 38
acpi_free_device_properties---of 8
acpi_free_properties---of 3
acpi_fwnode_device_dma_supported---of 6
acpi_fwnode_device_get_dma_attr---of 6
acpi_fwnode_device_get_match_data---of 1
acpi_fwnode_device_is_available---of 6
acpi_fwnode_get_name---of 12
acpi_fwnode_get_name_prefix---of 5
acpi_fwnode_get_named_child_node---of 9
acpi_fwnode_get_parent---of 10
acpi_fwnode_get_reference_args---of 1
acpi_fwnode_graph_parse_endpoint---of 6
acpi_fwnode_irq_get---of 1
acpi_fwnode_property_present---of 7
acpi_fwnode_property_read_int_array---of 9
acpi_fwnode_property_read_string_array---of 7
acpi_get_next_subnode---of 42
acpi_get_ref_args---of 19
acpi_graph_get_child_prop_value---of 8
acpi_graph_get_next_endpoint---of 15
acpi_graph_get_remote_endpoint---of 8
acpi_init_properties---of 45
acpi_node_get_parent---of 10
acpi_node_prop_get---of 7
acpi_nondev_subnode_data_ok---of 4
acpi_nondev_subnode_extract---of 16
acpi_parse_string_ref---of 18
acpi_tie_nondev_subnodes---of 7
acpi_untie_nondev_subnodes---of 18
is_acpi_data_node---of 4
is_acpi_device_node50%of 4
is_acpi_graph_node---of 9
stop_on_next---of 6
-----------
SUMMARY50%of 4

_prb_commit60%of 10
_prb_commit.cold---of 1
_prb_read_valid67%of 36
_prb_read_valid.cold---of 3
data_alloc92%of 12
data_alloc.cold---of 6
data_push_tail75%of 20
data_push_tail.cold---of 5
desc_make_final100%of 4
desc_make_final.cold---of 1
desc_read91%of 11
desc_read.cold---of 3
desc_read_finalized_seq86%of 7
desc_update_last_finalized100%of 8
get_data45%of 27
get_data.cold---of 10
get_next_lpos60%of 5
get_next_lpos.cold---of 4
prb_commit---of 2
prb_final_commit100%of 1
prb_first_seq---of 4
prb_first_valid_seq---of 3
prb_init---of 7
prb_init.cold---of 3
prb_next_reserve_seq---of 9
prb_next_reserve_seq.cold---of 4
prb_next_seq---of 6
prb_read_valid100%of 1
prb_read_valid_info---of 1
prb_record_text_space---of 1
prb_reserve60%of 55
prb_reserve.cold---of 13
prb_reserve_in_last---of 67
prb_reserve_in_last.cold---of 22
space_used70%of 10
space_used.cold---of 13
-----------
SUMMARY69%of 207

atp_calculate_abs---of 22
atp_close---of 1
atp_complete_geyser_1_2---of 45
atp_complete_geyser_3_4---of 47
atp_disconnect100%of 3
atp_geyser_init---of 20
atp_open67%of 3
atp_probe---of 26
atp_reinit---of 4
atp_reset_resume---of 6
atp_resume---of 3
atp_status_check---of 11
atp_suspend---of 1
-----------
SUMMARY84%of 6

-----------
SUMMARY---of 0

__generic_file_fsync---of 8
alloc_anon_inode---of 3
always_delete_dentry100%of 1
const_folio_flags.constprop.0---of 10
dcache_dir_close---of 1
dcache_dir_lseek---of 15
dcache_dir_open---of 2
dcache_readdir---of 22
direct_write_fallback---of 7
empty_dir_getattr---of 1
empty_dir_listxattr---of 1
empty_dir_llseek---of 1
empty_dir_lookup---of 1
empty_dir_readdir---of 7
empty_dir_setattr---of 1
folio_flags.constprop.0---of 10
generic_check_addressable---of 5
generic_check_addressable.cold---of 1
generic_encode_ino32_fh---of 6
generic_fh_to_dentry---of 5
generic_fh_to_parent---of 7
generic_file_fsync---of 4
generic_read_dir---of 1
init_pseudo---of 3
inode_maybe_inc_iversion67%of 6
inode_query_iversion---of 6
is_empty_dir_inode---of 4
kfree_link---of 1
make_empty_dir_inode---of 1
memory_read_from_buffer---of 4
noop_direct_IO---of 1
noop_fsync---of 1
offset_dir_llseek---of 6
offset_readdir---of 35
path_from_stashed---of 54
pseudo_fs_fill_super---of 5
pseudo_fs_free---of 1
pseudo_fs_get_tree---of 1
scan_positives---of 18
simple_attr_open---of 3
simple_attr_read---of 14
simple_attr_release---of 1
simple_attr_write---of 1
simple_attr_write_signed---of 1
simple_attr_write_xsigned.constprop.0.isra.0---of 11
simple_empty---of 8
simple_fill_super---of 12
simple_get_link---of 1
simple_getattr---of 1
simple_inode_init_ts100%of 1
simple_link---of 3
simple_lookup84%of 6
simple_nosetlease---of 1
simple_offset_add100%of 4
simple_offset_destroy---of 1
simple_offset_empty---of 8
simple_offset_init---of 1
simple_offset_remove---of 2
simple_offset_rename_exchange---of 22
simple_open---of 3
simple_pin_fs43%of 7
simple_read_folio---of 3
simple_read_folio.cold---of 1
simple_read_from_buffer38%of 8
simple_recursive_removal87%of 38
simple_release_fs67%of 3
simple_rename---of 11
simple_rename_exchange---of 11
simple_rename_timestamp---of 4
simple_rmdir---of 3
simple_setattr---of 5
simple_statfs---of 1
simple_transaction_get---of 6
simple_transaction_read---of 3
simple_transaction_release---of 1
simple_transaction_set---of 3
simple_unlink---of 1
simple_write_begin---of 13
simple_write_begin.cold---of 3
simple_write_end---of 21
simple_write_end.cold---of 1
simple_write_to_buffer---of 8
stashed_dentry_prune---of 4
zero_user_segments---of 15
zero_user_segments.cold---of 2
-----------
SUMMARY76%of 74

-----------
SUMMARY---of 0

int_callback---of 4
kaweth_async_set_rx_mode---of 6
kaweth_close---of 3
kaweth_disconnect60%of 5
kaweth_download_firmware---of 12
kaweth_get_link---of 1
kaweth_open---of 12
kaweth_probe---of 60
kaweth_resubmit_int_urb---of 4
kaweth_resubmit_rx_urb---of 6
kaweth_resubmit_tl---of 5
kaweth_resume---of 3
kaweth_set_rx_mode---of 9
kaweth_start_xmit---of 21
kaweth_suspend---of 1
kaweth_tx_timeout---of 5
kaweth_usb_receive---of 29
kaweth_usb_transmit_complete---of 6
-----------
SUMMARY60%of 5

is_seen---of 1
net_ctl_header_lookup---of 1
net_ctl_permissions---of 5
net_ctl_set_ownership---of 4
register_net_sysctl_sz---of 22
sysctl_net_exit---of 1
sysctl_net_init---of 1
unregister_net_sysctl_table100%of 1
-----------
SUMMARY100%of 1

__proc_create---of 26
__xlate_proc_name29%of 14
_proc_mkdir---of 7
pde_free---of 5
pde_put56%of 9
proc_alloc_inum---of 3
proc_create---of 5
proc_create_data---of 5
proc_create_mount_point---of 3
proc_create_reg---of 12
proc_create_seq_private---of 3
proc_create_single_data---of 3
proc_free_inum---of 1
proc_get_parent_data---of 1
proc_getattr---of 4
proc_lookup---of 3
proc_lookup_de---of 18
proc_match100%of 4
proc_misc_d_delete---of 1
proc_misc_d_revalidate---of 3
proc_mkdir---of 3
proc_mkdir_data---of 5
proc_mkdir_mode---of 5
proc_net_d_revalidate---of 1
proc_notify_change---of 4
proc_readdir---of 3
proc_readdir_de---of 25
proc_register---of 16
proc_remove100%of 2
proc_seq_open---of 5
proc_seq_release---of 5
proc_set_size---of 1
proc_set_user---of 1
proc_simple_write---of 8
proc_single_open---of 1
proc_symlink---of 7
remove_proc_entry64%of 25
remove_proc_subtree84%of 25
-----------
SUMMARY66%of 79

atkbd_apply_forced_release_keylist---of 4
atkbd_attr_is_visible---of 5
atkbd_cleanup---of 1
atkbd_connect---of 26
atkbd_disconnect---of 1
atkbd_do_set_extra---of 18
atkbd_do_set_force_release---of 7
atkbd_do_set_scroll---of 13
atkbd_do_set_set---of 18
atkbd_do_set_softraw---of 13
atkbd_do_set_softrepeat---of 16
atkbd_do_show_err_count---of 1
atkbd_do_show_extra---of 1
atkbd_do_show_force_release---of 1
atkbd_do_show_function_row_physmap---of 1
atkbd_do_show_scroll---of 1
atkbd_do_show_set---of 1
atkbd_do_show_softraw---of 1
atkbd_do_show_softrepeat---of 1
atkbd_event86%of 7
atkbd_event_work---of 7
atkbd_oqo_01plus_scancode_fixup---of 5
atkbd_pre_receive_byte---of 4
atkbd_probe---of 20
atkbd_receive_byte---of 75
atkbd_reconnect---of 14
atkbd_schedule_event_work100%of 2
atkbd_select_set---of 17
atkbd_set_device_attrs---of 21
atkbd_set_keycode_table---of 26
atkbd_set_leds.isra.0---of 12
atkbd_set_repeat_rate.isra.0---of 9
-----------
SUMMARY89%of 9

__snd_card_release---of 5
card_id_ok---of 7
default_release_alloc100%of 1
id_show---of 1
id_store---of 13
number_show---of 1
release_card_device88%of 8
snd_card_add_dev_attr---of 7
snd_card_disconnect100%of 2
snd_card_disconnect.part.044%of 23
snd_card_disconnect_sync---of 10
snd_card_file_add---of 9
snd_card_file_remove---of 19
snd_card_free100%of 5
snd_card_free_on_error---of 6
snd_card_free_when_closed100%of 2
snd_card_info_read---of 5
snd_card_init---of 51
snd_card_locked---of 1
snd_card_new---of 6
snd_card_ref---of 3
snd_card_register---of 25
snd_card_set_id---of 2
snd_card_set_id_no_lock---of 48
snd_component_add---of 23
snd_device_alloc---of 5
snd_devm_card_new---of 5
snd_disconnect_fasync---of 1
snd_disconnect_ioctl---of 1
snd_disconnect_llseek---of 1
snd_disconnect_mmap---of 1
snd_disconnect_poll---of 1
snd_disconnect_read---of 1
snd_disconnect_release---of 12
snd_disconnect_write---of 1
snd_power_ref_and_wait---of 11
snd_power_wait---of 4
trigger_card_free---of 5
-----------
SUMMARY66%of 41

ima_add_violation---of 6
ima_alloc_init_template---of 16
ima_audit_measurement---of 8
ima_collect_measurement---of 27
ima_d_path---of 6
ima_free_template_entry---of 3
ima_get_action100%of 1
ima_store_measurement---of 17
ima_store_measurement.cold---of 2
ima_store_template---of 5
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__handle_irq_event_percpu38%of 45
__irq_wake_thread---of 3
handle_bad_irq---of 24
handle_irq_event100%of 3
handle_irq_event_percpu---of 4
no_action---of 1
-----------
SUMMARY42%of 48

__bpf_trace_balance_dirty_pages---of 1
__bpf_trace_bdi_dirty_ratelimit---of 1
__bpf_trace_flush_foreign---of 1
__bpf_trace_global_dirty_state---of 1
__bpf_trace_inode_foreign_history---of 1
__bpf_trace_inode_switch_wbs---of 1
__bpf_trace_track_foreign_dirty---of 1
__bpf_trace_wbc_class---of 1
__bpf_trace_writeback_bdi_register---of 1
__bpf_trace_writeback_class---of 1
__bpf_trace_writeback_dirty_inode_template---of 1
__bpf_trace_writeback_folio_template---of 1
__bpf_trace_writeback_inode_template---of 1
__bpf_trace_writeback_pages_written---of 1
__bpf_trace_writeback_queue_io---of 1
__bpf_trace_writeback_sb_inodes_requeue---of 1
__bpf_trace_writeback_single_inode_template---of 1
__bpf_trace_writeback_work_class---of 1
__bpf_trace_writeback_write_inode_template---of 1
__inode_attach_wb4%of 81
__inode_wait_for_writeback50%of 4
__mark_inode_dirty32%of 92
__traceiter_balance_dirty_pages---of 3
__traceiter_bdi_dirty_ratelimit---of 3
__traceiter_flush_foreign---of 3
__traceiter_folio_wait_writeback---of 3
__traceiter_global_dirty_state---of 3
__traceiter_inode_foreign_history---of 3
__traceiter_inode_switch_wbs---of 3
__traceiter_sb_clear_inode_writeback---of 3
__traceiter_sb_mark_inode_writeback---of 3
__traceiter_track_foreign_dirty---of 3
__traceiter_wbc_writepage---of 3
__traceiter_writeback_bdi_register---of 3
__traceiter_writeback_dirty_folio---of 3
__traceiter_writeback_dirty_inode---of 3
__traceiter_writeback_dirty_inode_enqueue---of 3
__traceiter_writeback_dirty_inode_start---of 3
__traceiter_writeback_exec---of 3
__traceiter_writeback_lazytime---of 3
__traceiter_writeback_lazytime_iput---of 3
__traceiter_writeback_mark_inode_dirty---of 3
__traceiter_writeback_pages_written---of 3
__traceiter_writeback_queue---of 3
__traceiter_writeback_queue_io---of 3
__traceiter_writeback_sb_inodes_requeue---of 3
__traceiter_writeback_single_inode---of 3
__traceiter_writeback_single_inode_start---of 3
__traceiter_writeback_start---of 3
__traceiter_writeback_wait---of 3
__traceiter_writeback_wake_background---of 3
__traceiter_writeback_write_inode---of 3
__traceiter_writeback_write_inode_start---of 3
__traceiter_writeback_written---of 3
__writeback_inodes_sb_nr---of 6
__writeback_inodes_wb---of 8
__writeback_single_inode---of 89
bdi_split_work_to_wbs---of 72
cgroup_writeback_by_id---of 76
cgroup_writeback_umount---of 2
cleanup_offline_cgwb---of 44
const_folio_flags.constprop.0---of 10
dirtytime_interval_handler---of 5
finish_writeback_work.constprop.0---of 5
folio_memcg---of 20
inode_cgwb_move_to_attached---of 15
inode_io_list_del---of 3
inode_io_list_move_locked---of 15
inode_prepare_wbs_switch---of 11
inode_sleep_on_writeback---of 3
inode_switch_wbs---of 61
inode_switch_wbs_work_fn---of 106
inode_wait_for_writeback100%of 1
locked_inode_to_wb_and_lock_list14%of 60
move_expired_inodes---of 25
percpu_ref_tryget_many.constprop.0---of 17
perf_trace_balance_dirty_pages---of 14
perf_trace_bdi_dirty_ratelimit---of 12
perf_trace_flush_foreign---of 12
perf_trace_global_dirty_state---of 5
perf_trace_inode_foreign_history---of 14
perf_trace_inode_switch_wbs---of 12
perf_trace_track_foreign_dirty---of 16
perf_trace_wbc_class---of 14
perf_trace_writeback_bdi_register---of 12
perf_trace_writeback_class---of 12
perf_trace_writeback_dirty_inode_template---of 12
perf_trace_writeback_folio_template---of 17
perf_trace_writeback_inode_template---of 5
perf_trace_writeback_pages_written---of 5
perf_trace_writeback_queue_io---of 12
perf_trace_writeback_sb_inodes_requeue---of 17
perf_trace_writeback_single_inode_template---of 14
perf_trace_writeback_work_class---of 14
perf_trace_writeback_write_inode_template---of 14
queue_io---of 24
redirty_tail_locked---of 10
sb_clear_inode_writeback---of 20
sb_mark_inode_writeback---of 20
sync_inode_metadata---of 1
sync_inodes_sb---of 38
trace_event_raw_event_balance_dirty_pages---of 15
trace_event_raw_event_bdi_dirty_ratelimit---of 13
trace_event_raw_event_flush_foreign---of 13
trace_event_raw_event_global_dirty_state---of 6
trace_event_raw_event_inode_foreign_history---of 15
trace_event_raw_event_inode_switch_wbs---of 13
trace_event_raw_event_track_foreign_dirty---of 17
trace_event_raw_event_wbc_class---of 15
trace_event_raw_event_writeback_bdi_register---of 13
trace_event_raw_event_writeback_class---of 13
trace_event_raw_event_writeback_dirty_inode_template---of 13
trace_event_raw_event_writeback_folio_template---of 18
trace_event_raw_event_writeback_inode_template---of 6
trace_event_raw_event_writeback_pages_written---of 6
trace_event_raw_event_writeback_queue_io---of 13
trace_event_raw_event_writeback_sb_inodes_requeue---of 18
trace_event_raw_event_writeback_single_inode_template---of 15
trace_event_raw_event_writeback_work_class---of 15
trace_event_raw_event_writeback_write_inode_template---of 15
trace_raw_output_balance_dirty_pages---of 4
trace_raw_output_bdi_dirty_ratelimit---of 4
trace_raw_output_flush_foreign---of 5
trace_raw_output_global_dirty_state---of 4
trace_raw_output_inode_foreign_history---of 5
trace_raw_output_inode_switch_wbs---of 5
trace_raw_output_track_foreign_dirty---of 4
trace_raw_output_wbc_class---of 4
trace_raw_output_writeback_bdi_register---of 5
trace_raw_output_writeback_class---of 5
trace_raw_output_writeback_dirty_inode_template---of 4
trace_raw_output_writeback_folio_template---of 5
trace_raw_output_writeback_inode_template---of 4
trace_raw_output_writeback_pages_written---of 5
trace_raw_output_writeback_queue_io---of 4
trace_raw_output_writeback_sb_inodes_requeue---of 4
trace_raw_output_writeback_single_inode_template---of 4
trace_raw_output_writeback_work_class---of 4
trace_raw_output_writeback_write_inode_template---of 5
try_to_writeback_inodes_sb---of 2
wakeup_dirtytime_writeback---of 17
wakeup_flusher_threads---of 18
wakeup_flusher_threads_bdi---of 14
wb_io_lists_depopulated---of 6
wb_io_lists_populated.part.0---of 3
wb_queue_work---of 23
wb_start_background_writeback---of 16
wb_start_writeback---of 4
wb_wait_for_completion---of 6
wb_wakeup---of 3
wb_wakeup_delayed---of 3
wb_workfn---of 74
wb_writeback---of 64
wbc_account_cgroup_owner---of 19
wbc_attach_and_unlock_inode---of 29
wbc_detach_inode---of 48
write_inode_now---of 3
writeback_inodes_sb---of 1
writeback_inodes_sb_nr---of 1
writeback_inodes_wb.constprop.0---of 3
writeback_sb_inodes---of 59
writeback_single_inode---of 22
xas_next_marked.constprop.0---of 17
-----------
SUMMARY19%of 238

-----------
SUMMARY---of 0

__change_pid---of 15
__ia32_sys_pidfd_getfd---of 9
__ia32_sys_pidfd_open---of 7
__task_pid_nr_ns54%of 26
__x64_sys_pidfd_getfd---of 9
__x64_sys_pidfd_open---of 7
alloc_pid---of 45
attach_pid---of 5
change_pid---of 5
delayed_put_pid---of 2
detach_pid---of 1
disable_pid_allocation---of 1
exchange_tids---of 5
find_ge_pid---of 1
find_get_pid---of 19
find_get_task_by_vpid---of 19
find_pid_ns---of 1
find_task_by_pid_ns---of 6
find_task_by_vpid---of 3
find_vpid---of 3
free_pid---of 8
get_pid_task---of 17
get_task_pid---of 25
pid_nr_ns---of 6
pid_task---of 11
pid_vnr---of 8
pidfd_get_pid---of 12
pidfd_get_task---of 6
pidfd_getfd---of 16
put_pid50%of 2
put_pid.part.0---of 5
task_active_pid_ns100%of 3
transfer_pid---of 5
-----------
SUMMARY59%of 31

-----------
SUMMARY---of 0

koneplus_probe---of 18
koneplus_raw_event---of 14
koneplus_remove100%of 5
koneplus_sysfs_read.constprop.0---of 7
koneplus_sysfs_read_info---of 6
koneplus_sysfs_read_profile_buttons---of 6
koneplus_sysfs_read_profile_settings---of 6
koneplus_sysfs_read_profilex_buttons---of 5
koneplus_sysfs_read_profilex_settings---of 5
koneplus_sysfs_read_sensor---of 6
koneplus_sysfs_read_tcu---of 6
koneplus_sysfs_read_tcu_image---of 6
koneplus_sysfs_set_actual_profile---of 7
koneplus_sysfs_show_actual_profile---of 1
koneplus_sysfs_show_firmware_version---of 1
koneplus_sysfs_write_control---of 5
koneplus_sysfs_write_info---of 5
koneplus_sysfs_write_macro---of 5
koneplus_sysfs_write_profile_buttons---of 5
koneplus_sysfs_write_profile_settings---of 5
koneplus_sysfs_write_sensor---of 5
koneplus_sysfs_write_talk---of 5
koneplus_sysfs_write_tcu---of 5
-----------
SUMMARY100%of 5

__percpu_counter_compare---of 6
__percpu_counter_init_many---of 11
__percpu_counter_limited_add46%of 37
__percpu_counter_sum100%of 7
compute_batch_value---of 1
percpu_counter_add_batch73%of 11
percpu_counter_cpu_dead---of 3
percpu_counter_destroy_many84%of 12
percpu_counter_fixup_free---of 4
percpu_counter_set---of 6
percpu_counter_sync---of 1
-----------
SUMMARY63%of 67

__snd_ctl_add_replace---of 24
__snd_ctl_remove88%of 16
_snd_ctl_register_ioctl---of 5
_snd_ctl_unregister_ioctl---of 12
add_hash_entries---of 7
copy_ctl_value_from_user---of 33
copy_ctl_value_to_user---of 14
ctl_elem_read_user---of 6
ctl_elem_write_user---of 6
elem_id_matches---of 7
get_ctl_id_hash100%of 4
remove_hash_entries88%of 8
snd_ctl_activate_id---of 12
snd_ctl_add---of 1
snd_ctl_add_replace---of 11
snd_ctl_boolean_mono_info---of 1
snd_ctl_boolean_stereo_info---of 1
snd_ctl_create---of 9
snd_ctl_dev_disconnect78%of 9
snd_ctl_dev_free100%of 6
snd_ctl_dev_register---of 6
snd_ctl_disconnect_layer---of 8
snd_ctl_elem_add---of 56
snd_ctl_elem_add_compat---of 14
snd_ctl_elem_add_user---of 5
snd_ctl_elem_info---of 19
snd_ctl_elem_info_user---of 4
snd_ctl_elem_list---of 12
snd_ctl_elem_read---of 14
snd_ctl_elem_user_enum_info---of 8
snd_ctl_elem_user_free---of 3
snd_ctl_elem_user_get---of 7
snd_ctl_elem_user_info---of 4
snd_ctl_elem_user_put---of 6
snd_ctl_elem_user_tlv---of 24
snd_ctl_elem_write---of 21
snd_ctl_empty_read_queue---of 6
snd_ctl_enum_info---of 12
snd_ctl_fasync---of 1
snd_ctl_find_id---of 3
snd_ctl_find_id_locked---of 21
snd_ctl_find_numid---of 3
snd_ctl_find_numid_locked---of 9
snd_ctl_free_one---of 4
snd_ctl_get_preferred_subdevice---of 7
snd_ctl_ioctl---of 95
snd_ctl_ioctl_compat---of 39
snd_ctl_new---of 6
snd_ctl_new1---of 16
snd_ctl_notify15%of 21
snd_ctl_notify_one67%of 3
snd_ctl_open---of 28
snd_ctl_poll---of 8
snd_ctl_read---of 24
snd_ctl_register_ioctl---of 1
snd_ctl_register_ioctl_compat---of 1
snd_ctl_register_layer---of 6
snd_ctl_release---of 13
snd_ctl_remove100%of 3
snd_ctl_remove_id---of 5
snd_ctl_remove_user_ctl---of 10
snd_ctl_rename---of 9
snd_ctl_rename_id---of 5
snd_ctl_replace---of 1
snd_ctl_request_layer---of 8
snd_ctl_tlv_ioctl---of 36
snd_ctl_unregister_ioctl---of 1
snd_ctl_unregister_ioctl_compat---of 1
-----------
SUMMARY66%of 70

extra_decode_ctrl14---of 17
extra_decode_xrpn---of 13
note_decode---of 1
note_event---of 1
one_param_ctrl_event---of 1
one_param_decode---of 1
one_param_event---of 1
pitchbend_ctrl_event---of 1
pitchbend_decode---of 1
snd_midi_event_decode---of 30
snd_midi_event_encode_byte---of 28
snd_midi_event_free100%of 2
snd_midi_event_new---of 6
snd_midi_event_no_status---of 1
snd_midi_event_reset_decode---of 2
snd_midi_event_reset_encode---of 2
songpos_decode---of 1
songpos_event---of 1
two_param_ctrl_event---of 1
two_param_decode---of 1
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

_snd_pcm_new---of 22
do_pcm_suspend---of 4
pcm_class_show---of 3
snd_pcm_attach_substream---of 42
snd_pcm_control_ioctl---of 31
snd_pcm_detach_substream---of 10
snd_pcm_dev_disconnect75%of 24
snd_pcm_dev_free80%of 5
snd_pcm_dev_register---of 27
snd_pcm_format_name---of 5
snd_pcm_free_stream50%of 6
snd_pcm_new---of 1
snd_pcm_new_internal---of 1
snd_pcm_new_stream---of 38
snd_pcm_proc_info_read.part.0---of 4
snd_pcm_proc_read---of 7
snd_pcm_stream_proc_info_read---of 2
snd_pcm_substream_proc_hw_params_read---of 10
snd_pcm_substream_proc_info_read---of 2
snd_pcm_substream_proc_status_read---of 9
snd_pcm_substream_proc_sw_params_read---of 7
snd_pcm_xrun_debug_read---of 1
snd_pcm_xrun_debug_write---of 3
snd_pcm_xrun_injection_write---of 1
-----------
SUMMARY72%of 35

__bpf_trace_netlink_extack---of 1
__netlink_change_ngroups---of 14
__netlink_clear_multicast_users---of 4
__netlink_create---of 3
__netlink_dump_start---of 25
__netlink_kernel_create---of 25
__netlink_ns_capable---of 4
__netlink_seq_next---of 8
__nlmsg_put100%of 1
__rhashtable_lookup.constprop.0---of 29
__traceiter_netlink_extack---of 3
deferred_put_nlk_sk---of 8
do_trace_netlink_extack---of 15
jhash---of 16
netlink_ack---of 66
netlink_add_tap---of 20
netlink_alloc_large_skb---of 10
netlink_attachskb---of 27
netlink_autobind.isra.0---of 32
netlink_bind---of 31
netlink_broadcast100%of 1
netlink_broadcast_filtered68%of 58
netlink_capable---of 4
netlink_change_ngroups---of 1
netlink_compare---of 3
netlink_compare_arg_init---of 1
netlink_connect---of 13
netlink_create---of 16
netlink_data_ready---of 1
netlink_deliver_tap24%of 52
netlink_detachskb---of 5
netlink_dump---of 36
netlink_dump_done---of 12
netlink_getname---of 10
netlink_getsockbyfilp---of 8
netlink_getsockopt---of 16
netlink_has_listeners53%of 21
netlink_hash---of 1
netlink_insert---of 120
netlink_ioctl---of 1
netlink_kernel_release---of 3
netlink_lookup---of 30
netlink_net_capable---of 4
netlink_net_exit---of 1
netlink_net_init---of 2
netlink_ns_capable---of 4
netlink_overrun---of 4
netlink_rcv_skb---of 11
netlink_realloc_groups---of 5
netlink_recvmsg---of 45
netlink_register_notifier---of 1
netlink_release---of 129
netlink_remove_tap---of 24
netlink_sendmsg---of 41
netlink_sendskb---of 8
netlink_seq_next---of 1
netlink_seq_show---of 9
netlink_seq_start---of 5
netlink_seq_stop---of 6
netlink_set_err---of 15
netlink_setsockopt---of 28
netlink_skb_destructor---of 7
netlink_skb_set_owner_r50%of 4
netlink_sock_destruct---of 12
netlink_sock_destruct_work---of 1
netlink_strict_get_check---of 1
netlink_table_grab---of 6
netlink_table_ungrab---of 1
netlink_tap_init_net---of 16
netlink_trim67%of 9
netlink_undo_bind---of 6
netlink_unicast---of 33
netlink_unregister_notifier---of 1
netlink_update_listeners---of 15
netlink_update_socket_mc---of 4
netlink_update_subscriptions---of 9
nlmsg_notify44%of 16
perf_trace_netlink_extack---of 6
trace_event_raw_event_netlink_extack---of 7
trace_raw_output_netlink_extack---of 5
-----------
SUMMARY49%of 162

-----------
SUMMARY---of 0

_setid_policy_lookup---of 11
id_permitted_for_cred---of 13
safesetid_security_capable23%of 9
safesetid_task_fix_setgid---of 8
safesetid_task_fix_setgroups---of 25
safesetid_task_fix_setuid---of 8
setid_policy_lookup---of 24
-----------
SUMMARY23%of 9

__v4l2_ctrl_grab---of 9
__v4l2_ctrl_handler_setup---of 22
check_range---of 26
check_range.cold---of 1
cur_to_new---of 5
cur_to_req---of 4
fill_event---of 8
find_ref---of 18
find_ref_lock---of 3
handler_new_ref---of 36
new_to_cur---of 20
new_to_req---of 4
ptr_to_ptr---of 3
req_alloc_array---of 7
req_to_new---of 22
send_event---of 8
send_initial_event---of 2
try_or_set_cluster---of 42
update_from_auto_cluster---of 17
v4l2_ctrl_activate---of 6
v4l2_ctrl_add_handler---of 13
v4l2_ctrl_auto_cluster---of 15
v4l2_ctrl_cluster---of 8
v4l2_ctrl_find---of 5
v4l2_ctrl_handler_free77%of 17
v4l2_ctrl_handler_init_class---of 2
v4l2_ctrl_handler_log_status---of 22
v4l2_ctrl_handler_setup---of 3
v4l2_ctrl_new---of 136
v4l2_ctrl_new_custom---of 24
v4l2_ctrl_new_fwnode_properties---of 9
v4l2_ctrl_new_int_menu---of 5
v4l2_ctrl_new_std---of 7
v4l2_ctrl_new_std_compound---of 5
v4l2_ctrl_new_std_menu---of 10
v4l2_ctrl_new_std_menu_items---of 8
v4l2_ctrl_notify---of 7
v4l2_ctrl_radio_filter---of 5
v4l2_ctrl_type_op_equal---of 7
v4l2_ctrl_type_op_init---of 33
v4l2_ctrl_type_op_log---of 41
v4l2_ctrl_type_op_validate---of 263
v4l2_ctrl_type_op_validate.cold---of 2
-----------
SUMMARY77%of 17

__check_for_non_generic_match100%of 5
usb_choose_configuration43%of 47
usb_generic_driver_disconnect50%of 2
usb_generic_driver_match100%of 3
usb_generic_driver_probe43%of 7
usb_generic_driver_resume86%of 7
usb_generic_driver_suspend---of 9
-----------
SUMMARY54%of 71

tomoyo_assign_domain---of 33
tomoyo_assign_namespace---of 14
tomoyo_check_acl79%of 14
tomoyo_dump_page---of 25
tomoyo_find_namespace---of 6
tomoyo_find_next_domain---of 100
tomoyo_same_aggregator---of 4
tomoyo_same_transition_control---of 5
tomoyo_update_domain---of 24
tomoyo_update_policy---of 13
tomoyo_write_aggregator---of 12
tomoyo_write_transition_control---of 22
-----------
SUMMARY79%of 14

-----------
SUMMARY---of 0

dnotify_flush6%of 18
dnotify_free_mark---of 3
dnotify_handle_event---of 11
dnotify_recalc_inode_mask---of 6
fcntl_dirnotify---of 68
-----------
SUMMARY6%of 18

-----------
SUMMARY---of 0

change_clocksource---of 14
do_adjtimex---of 46
do_settimeofday64---of 20
do_settimeofday64.cold---of 2
do_timer---of 1
dummy_clock_read---of 5
get_device_system_crosststamp---of 46
get_device_system_crosststamp.cold---of 2
getboottime64---of 1
ktime_get52%of 29
ktime_get.cold---of 1
ktime_get_boot_fast_ns---of 1
ktime_get_coarse_real_ts6460%of 10
ktime_get_coarse_real_ts64.cold---of 1
ktime_get_coarse_ts64---of 11
ktime_get_coarse_ts64.cold---of 1
ktime_get_coarse_with_offset---of 13
ktime_get_coarse_with_offset.cold---of 1
ktime_get_fast_timestamps---of 8
ktime_get_fast_timestamps.cold---of 1
ktime_get_mono_fast_ns100%of 4
ktime_get_mono_fast_ns.cold---of 1
ktime_get_raw---of 27
ktime_get_raw.cold---of 1
ktime_get_raw_fast_ns---of 4
ktime_get_raw_fast_ns.cold---of 1
ktime_get_raw_ts64---of 30
ktime_get_raw_ts64.cold---of 1
ktime_get_real_fast_ns---of 4
ktime_get_real_fast_ns.cold---of 1
ktime_get_real_seconds100%of 1
ktime_get_real_ts64---of 32
ktime_get_real_ts64.cold---of 1
ktime_get_resolution_ns---of 13
ktime_get_resolution_ns.cold---of 1
ktime_get_seconds---of 4
ktime_get_snapshot---of 16
ktime_get_snapshot.cold---of 2
ktime_get_tai_fast_ns---of 1
ktime_get_ts64---of 32
ktime_get_ts64.cold---of 1
ktime_get_update_offsets_now52%of 31
ktime_get_update_offsets_now.cold---of 1
ktime_get_with_offset59%of 29
ktime_get_with_offset.cold---of 1
ktime_mono_to_any55%of 11
pvclock_gtod_register_notifier---of 1
pvclock_gtod_unregister_notifier---of 1
random_get_entropy_fallback---of 5
scale64_check_overflow---of 4
timekeeping_advance45%of 79
timekeeping_advance.cold---of 12
timekeeping_forward_now.constprop.0---of 8
timekeeping_forward_now.constprop.0.cold---of 6
timekeeping_inject_offset---of 22
timekeeping_inject_offset.cold---of 8
timekeeping_max_deferment---of 10
timekeeping_notify---of 3
timekeeping_resume---of 26
timekeeping_resume.cold---of 7
timekeeping_suspend---of 11
timekeeping_suspend.cold---of 1
timekeeping_update64%of 11
timekeeping_update.cold---of 1
timekeeping_valid_for_hres---of 10
timekeeping_warp_clock---of 3
tk_set_wall_to_mono---of 7
tk_setup_internals.constprop.0---of 13
tk_setup_internals.constprop.0.cold---of 4
update_fast_timekeeper100%of 1
update_wall_time50%of 2
-----------
SUMMARY53%of 208

-----------
SUMMARY---of 0

endpoint_set_interface---of 12
init_sample_rate---of 18
prepare_inbound_urb---of 6
prepare_outbound_urb---of 24
release_urbs50%of 8
slave_next_packet_size---of 8
slave_next_packet_size.cold---of 1
snd_complete_urb---of 78
snd_complete_urb.cold---of 2
snd_usb_add_endpoint---of 15
snd_usb_endpoint_close---of 12
snd_usb_endpoint_compatible---of 10
snd_usb_endpoint_free_all50%of 6
snd_usb_endpoint_get_clock_rate---of 7
snd_usb_endpoint_implicit_feedback_sink---of 3
snd_usb_endpoint_next_packet_size---of 15
snd_usb_endpoint_open---of 67
snd_usb_endpoint_prepare---of 18
snd_usb_endpoint_release100%of 1
snd_usb_endpoint_set_callback---of 3
snd_usb_endpoint_set_params---of 78
snd_usb_endpoint_set_params.cold---of 10
snd_usb_endpoint_set_sync---of 1
snd_usb_endpoint_start---of 48
snd_usb_endpoint_stop---of 16
snd_usb_endpoint_suspend---of 4
snd_usb_endpoint_sync_pending_stop---of 2
snd_usb_get_endpoint---of 4
snd_usb_queue_pending_output_urbs---of 33
stop_urbs28%of 11
wait_clear_urbs.isra.020%of 10
-----------
SUMMARY37%of 36

-----------
SUMMARY---of 0

evm_file_release86%of 7
evm_inode_alloc_security100%of 3
evm_inode_copy_up_xattr---of 2
evm_inode_init_security15%of 14
evm_inode_post_remove_acl---of 1
evm_inode_post_removexattr---of 7
evm_inode_post_set_acl---of 8
evm_inode_post_setattr13%of 8
evm_inode_post_setxattr---of 8
evm_inode_remove_acl---of 1
evm_inode_removexattr---of 3
evm_inode_set_acl---of 15
evm_inode_setattr20%of 20
evm_inode_setxattr---of 6
evm_post_path_mknod---of 4
evm_protect_xattr.isra.0---of 34
evm_protected_xattr---of 1
evm_protected_xattr_common---of 8
evm_protected_xattr_if_enabled---of 1
evm_read_protected_xattrs---of 16
evm_revalidate_status---of 6
evm_verify_current_integrity---of 6
evm_verify_hmac---of 44
evm_verifyxattr---of 6
is_unsupported_fs.isra.050%of 4
-----------
SUMMARY33%of 56

scsi_autopm_get_device---of 5
scsi_autopm_get_host40%of 5
scsi_autopm_get_target---of 1
scsi_autopm_put_device---of 1
scsi_autopm_put_host---of 1
scsi_autopm_put_target---of 1
scsi_bus_freeze---of 13
scsi_bus_poweroff---of 13
scsi_bus_prepare---of 4
scsi_bus_restore---of 11
scsi_bus_resume---of 11
scsi_bus_suspend---of 13
scsi_bus_thaw---of 11
scsi_runtime_idle---of 5
scsi_runtime_resume28%of 11
scsi_runtime_suspend---of 12
-----------
SUMMARY32%of 16

__build_feature_ctl---of 81
__check_input_term---of 31
append_ctl_name.isra.0---of 7
build_audio_procunit---of 91
build_audio_procunit.cold---of 3
build_connector_control---of 31
check_input_term---of 1
check_mapped_name.constprop.0---of 5
connector_mixer_resume---of 1
convert_signed_value.isra.0---of 15
find_audio_control_unit---of 5
get_cluster_channels_v3---of 5
get_connector_value---of 16
get_ctl_value---of 24
get_min_max_with_quirks.isra.0---of 88
get_term_name---of 21
init_cur_mix_raw---of 5
mixer_ctl_connector_get---of 3
mixer_ctl_feature_get---of 23
mixer_ctl_feature_info---of 9
mixer_ctl_feature_put---of 25
mixer_ctl_master_bool_get---of 4
mixer_ctl_procunit_get---of 10
mixer_ctl_procunit_put---of 10
mixer_ctl_selector_get---of 10
mixer_ctl_selector_info---of 5
mixer_ctl_selector_put---of 10
parse_audio_unit---of 180
parse_audio_unit.cold---of 13
parse_term_proc_unit---of 15
restore_mixer_value---of 11
snd_usb_create_mixer---of 66
snd_usb_get_cur_mix_value---of 10
snd_usb_get_cur_mix_value.cold---of 2
snd_usb_mixer_add_list---of 8
snd_usb_mixer_controls---of 25
snd_usb_mixer_dev_free100%of 1
snd_usb_mixer_disconnect75%of 8
snd_usb_mixer_dump_cval---of 1
snd_usb_mixer_elem_free100%of 1
snd_usb_mixer_elem_init_std---of 1
snd_usb_mixer_free37%of 11
snd_usb_mixer_interrupt10%of 44
snd_usb_mixer_interrupt.cold---of 1
snd_usb_mixer_notify_id---of 9
snd_usb_mixer_proc_read---of 10
snd_usb_mixer_resume---of 8
snd_usb_mixer_set_ctl_value---of 25
snd_usb_mixer_suspend---of 3
snd_usb_mixer_vol_tlv---of 4
snd_usb_set_cur_mix_value---of 11
snd_usb_set_cur_mix_value.cold---of 2
usb_mixer_selector_elem_free---of 7
-----------
SUMMARY25%of 65

__ieee80211_recalc_idle---of 21
__ieee80211_recalc_txpower---of 33
ieee80211_activate_links_work---of 1
ieee80211_add_virtual_monitor---of 19
ieee80211_adjust_monitor_flags---of 8
ieee80211_assign_perm_addr---of 42
ieee80211_assign_perm_addr.cold---of 1
ieee80211_change_mac---of 47
ieee80211_check_concurrent_iface---of 33
ieee80211_check_queues---of 17
ieee80211_del_virtual_monitor---of 15
ieee80211_do_open---of 93
ieee80211_do_stop---of 92
ieee80211_get_stats64---of 1
ieee80211_idle_off---of 1
ieee80211_if_add---of 58
ieee80211_if_add.cold---of 1
ieee80211_if_change_type---of 31
ieee80211_if_free---of 1
ieee80211_if_remove---of 12
ieee80211_if_setup---of 1
ieee80211_iface_exit---of 1
ieee80211_iface_init---of 1
ieee80211_iface_work---of 66
ieee80211_monitor_select_queue---of 6
ieee80211_netdev_fill_forward_path---of 60
ieee80211_netdev_setup_tc---of 21
ieee80211_open---of 7
ieee80211_recalc_idle---of 2
ieee80211_recalc_offload---of 41
ieee80211_recalc_txpower---of 4
ieee80211_remove_interfaces---of 19
ieee80211_sdata_stop---of 3
ieee80211_set_default_queues---of 8
ieee80211_set_multicast_list---of 6
ieee80211_set_sdata_offload_flags---of 15
ieee80211_set_vif_encap_ops---of 9
ieee80211_setup_sdata---of 14
ieee80211_stop---of 15
ieee80211_uninit---of 1
ieee80211_vif_dec_num_mcast---of 4
ieee80211_vif_inc_num_mcast---of 4
netdev_notify29%of 7
trace_drv_return_int---of 15
trace_drv_return_void---of 15
-----------
SUMMARY29%of 7

__dump_midi---of 1
dump_midi---of 8
event_process_midi---of 13
midisynth_subscribe---of 7
midisynth_unsubscribe---of 5
midisynth_unuse---of 5
midisynth_use---of 7
snd_midi_input_event---of 12
snd_seq_midisynth_probe---of 68
snd_seq_midisynth_remove100%of 11
-----------
SUMMARY100%of 11

-----------
SUMMARY---of 0

descriptors_changed35%of 32
get_bMaxPacketSize067%of 9
hub_activate40%of 101
hub_disconnect82%of 11
hub_event53%of 281
hub_ext_port_status90%of 20
hub_get---of 5
hub_hub_status---of 9
hub_init_func2---of 1
hub_init_func3---of 1
hub_ioctl---of 14
hub_irq82%of 11
hub_irq.cold---of 1
hub_port_debounce87%of 15
hub_port_disable53%of 21
hub_port_init45%of 206
hub_port_init.cold---of 1
hub_port_logical_disconnect67%of 3
hub_port_reset53%of 121
hub_port_warm_reset_required.part.0100%of 3
hub_post_reset---of 3
hub_power_on82%of 16
hub_pre_reset---of 3
hub_probe---of 150
hub_put---of 5
hub_quiesce78%of 9
hub_reset_resume---of 3
hub_resubmit_irq_urb50%of 6
hub_resume46%of 11
hub_retry_irq_urb---of 1
hub_suspend---of 31
hub_tt_work---of 12
kick_hub_wq47%of 13
led_work---of 11
recursively_mark_NOTATTACHED91%of 11
release_devnum100%of 2
set_port_led.isra.0---of 2
update_port_device_state100%of 5
usb_authorize_device---of 8
usb_clear_port_feature---of 1
usb_deauthorize_device---of 3
usb_device_is_owned88%of 8
usb_device_supports_lpm94%of 16
usb_disable_link_state---of 7
usb_disable_lpm31%of 13
usb_disable_ltm17%of 12
usb_disable_remote_wakeup75%of 4
usb_disconnect74%of 30
usb_enable_link_state---of 29
usb_enable_link_state.cold---of 1
usb_enable_lpm19%of 16
usb_enable_ltm10%of 10
usb_ep0_reinit---of 1
usb_get_hub_port_acpi_handle---of 9
usb_hub_adjust_deviceremovable---of 22
usb_hub_adjust_deviceremovable.cold---of 2
usb_hub_claim_port---of 10
usb_hub_cleanup---of 1
usb_hub_clear_tt_buffer---of 11
usb_hub_find_child100%of 9
usb_hub_init---of 5
usb_hub_port_status---of 1
usb_hub_release_all_ports---of 8
usb_hub_release_port---of 10
usb_hub_set_port_power67%of 6
usb_hub_to_struct_hub84%of 6
usb_kick_hub_wq---of 5
usb_new_device59%of 68
usb_new_device.cold---of 1
usb_port_disable---of 5
usb_port_is_power_on---of 4
usb_port_resume57%of 109
usb_port_suspend---of 58
usb_queue_reset_device---of 2
usb_remote_wakeup50%of 6
usb_remove_device---of 7
usb_reset_and_verify_device38%of 40
usb_reset_device---of 34
usb_root_hub_lost_power---of 1
usb_set_device_initiated_lpm---of 11
usb_set_device_state56%of 20
usb_set_lpm_timeout---of 10
usb_unlocked_disable_lpm100%of 3
usb_unlocked_enable_lpm100%of 2
usb_wakeup_enabled_descendants---of 7
usb_wakeup_notification---of 8
-----------
SUMMARY54%of 1285

can_stop_idle_tick---of 14
get_cpu_idle_time_us---of 3
get_cpu_iowait_time_us---of 3
get_cpu_sleep_time_us.part.0---of 16
get_jiffies_update50%of 10
tick_check_oneshot_change---of 9
tick_clock_notify---of 5
tick_do_update_jiffies6470%of 10
tick_get_tick_sched---of 1
tick_irq_enter---of 11
tick_nohz_account_idle_time---of 4
tick_nohz_get_idle_calls---of 1
tick_nohz_get_idle_calls_cpu---of 1
tick_nohz_get_next_hrtimer---of 1
tick_nohz_get_sleep_length---of 8
tick_nohz_handler65%of 20
tick_nohz_idle_enter---of 13
tick_nohz_idle_exit---of 17
tick_nohz_idle_got_tick---of 3
tick_nohz_idle_restart_tick---of 2
tick_nohz_idle_retain_tick---of 1
tick_nohz_idle_stop_tick---of 61
tick_nohz_irq_exit---of 3
tick_nohz_lowres_handler---of 2
tick_nohz_next_event---of 16
tick_nohz_restart_sched_tick---of 8
tick_nohz_start_idle---of 10
tick_nohz_stop_idle---of 15
tick_nohz_tick_stopped100%of 1
tick_nohz_tick_stopped_cpu---of 1
tick_oneshot_notify---of 1
tick_sched_timer_dying---of 7
tick_setup_sched_timer---of 25
-----------
SUMMARY64%of 41

-----------
SUMMARY---of 0

__register_sysctl_table---of 98
do_sysctl_args---of 5
drop_sysctl_table70%of 13
erase_header100%of 3
find_entry.isra.0100%of 9
get_links---of 14
insert_header---of 46
namecmp100%of 3
proc_sys_call_handler---of 28
proc_sys_compare---of 15
proc_sys_delete100%of 1
proc_sys_evict_inode38%of 8
proc_sys_fill_cache.isra.0---of 13
proc_sys_getattr---of 12
proc_sys_lookup---of 23
proc_sys_make_inode---of 13
proc_sys_open---of 12
proc_sys_permission---of 15
proc_sys_poll---of 17
proc_sys_poll_notify---of 2
proc_sys_read---of 1
proc_sys_readdir---of 49
proc_sys_revalidate---of 3
proc_sys_setattr---of 4
proc_sys_write---of 1
process_sysctl_arg---of 33
put_links12%of 25
register_sysctl_mount_point---of 1
register_sysctl_sz---of 1
retire_sysctl_set---of 2
setup_sysctl_set---of 1
sysctl_err---of 1
sysctl_follow_link---of 12
sysctl_is_alias---of 4
sysctl_perm---of 10
sysctl_print_dir---of 9
unregister_sysctl_table100%of 2
xlate_dir100%of 8
-----------
SUMMARY57%of 72

ata_attach_transport---of 3
ata_port_classify---of 7
ata_release_transport---of 7
ata_show_ering---of 5
ata_tdev_match67%of 3
ata_tlink_add---of 22
ata_tlink_delete---of 3
ata_tlink_match67%of 3
ata_tport_add---of 10
ata_tport_delete---of 1
ata_tport_match67%of 3
ata_tport_release---of 1
get_ata_xfer_names---of 4
show_ata_dev_class---of 5
show_ata_dev_dma_mode---of 1
show_ata_dev_ering---of 1
show_ata_dev_gscr---of 5
show_ata_dev_id---of 5
show_ata_dev_pio_mode---of 1
show_ata_dev_spdn_cnt---of 1
show_ata_dev_trim---of 12
show_ata_dev_xfer_mode---of 1
show_ata_link_hw_sata_spd_limit---of 1
show_ata_link_sata_spd---of 1
show_ata_link_sata_spd_limit---of 1
show_ata_port_idle_irq---of 1
show_ata_port_nr_pmp_links---of 1
show_ata_port_port_no---of 1
-----------
SUMMARY67%of 9

-----------
SUMMARY---of 0

arch_do_signal_or_restart38%of 37
get_sigframe35%of 29
get_sigframe_size---of 1
sigaltstack_size_valid---of 9
signal_fault---of 5
-----------
SUMMARY37%of 66

_chaoskey_fill65%of 34
chaos_read_callback60%of 5
chaoskey_disconnect67%of 12
chaoskey_free75%of 4
chaoskey_open56%of 9
chaoskey_probe---of 29
chaoskey_read72%of 28
chaoskey_release---of 16
chaoskey_resume---of 6
chaoskey_rng_read---of 12
chaoskey_suspend---of 4
-----------
SUMMARY67%of 92

-----------
SUMMARY---of 0

__do_compat_sys_ioctl---of 19
__ia32_compat_sys_ioctl---of 1
__ia32_sys_ioctl---of 10
__x64_compat_sys_ioctl---of 1
__x64_sys_ioctl100%of 10
compat_ioctl_preallocate---of 7
compat_ptr_ioctl---of 3
copy_fsxattr_to_user100%of 2
do_vfs_ioctl72%of 89
fiemap_fill_next_extent---of 13
fiemap_prep---of 10
fileattr_fill_flags100%of 14
fileattr_fill_xflags100%of 14
ioctl_get_fs_sysfs_path.isra.043%of 7
ioctl_preallocate72%of 7
vfs_fileattr_get---of 3
vfs_fileattr_set89%of 34
vfs_ioctl---of 5
-----------
SUMMARY81%of 177

pvr2_channel_claim_stream---of 14
pvr2_channel_create_mpeg_stream---of 3
pvr2_channel_done---of 12
pvr2_channel_get_limited_inputs---of 1
pvr2_channel_init---of 8
pvr2_channel_limit_inputs---of 18
pvr2_context_create---of 10
pvr2_context_destroy---of 13
pvr2_context_disconnect75%of 4
pvr2_context_global_done---of 8
pvr2_context_global_init---of 3
pvr2_context_notify---of 7
pvr2_context_reset_input_limits---of 5
pvr2_context_set_notify40%of 15
pvr2_context_thread_func---of 51
-----------
SUMMARY48%of 19

konepure_probe---of 18
konepure_raw_event---of 6
konepure_remove100%of 5
roccat_common2_sysfs_read_actual_profile---of 1
roccat_common2_sysfs_read_info---of 1
roccat_common2_sysfs_read_profile_buttons---of 1
roccat_common2_sysfs_read_profile_settings---of 1
roccat_common2_sysfs_read_sensor---of 1
roccat_common2_sysfs_read_tcu---of 1
roccat_common2_sysfs_read_tcu_image---of 1
roccat_common2_sysfs_write_actual_profile---of 1
roccat_common2_sysfs_write_control---of 1
roccat_common2_sysfs_write_info---of 1
roccat_common2_sysfs_write_macro---of 1
roccat_common2_sysfs_write_profile_buttons---of 1
roccat_common2_sysfs_write_profile_settings---of 1
roccat_common2_sysfs_write_sensor---of 1
roccat_common2_sysfs_write_talk---of 1
roccat_common2_sysfs_write_tcu---of 1
-----------
SUMMARY100%of 5

kovaplus_probe---of 26
kovaplus_raw_event---of 24
kovaplus_remove40%of 5
kovaplus_sysfs_read.constprop.0---of 7
kovaplus_sysfs_read_info---of 6
kovaplus_sysfs_read_profile_buttons---of 6
kovaplus_sysfs_read_profile_settings---of 6
kovaplus_sysfs_read_profilex_buttons---of 5
kovaplus_sysfs_read_profilex_settings---of 5
kovaplus_sysfs_set_actual_profile---of 9
kovaplus_sysfs_show_actual_cpi---of 1
kovaplus_sysfs_show_actual_profile---of 1
kovaplus_sysfs_show_actual_sensitivity_x---of 1
kovaplus_sysfs_show_actual_sensitivity_y---of 1
kovaplus_sysfs_show_firmware_version---of 1
kovaplus_sysfs_write_control---of 5
kovaplus_sysfs_write_info---of 5
kovaplus_sysfs_write_profile_buttons---of 5
kovaplus_sysfs_write_profile_settings---of 5
-----------
SUMMARY40%of 5

__bpf_trace_hrtimer_class---of 1
__bpf_trace_hrtimer_expire_entry---of 1
__bpf_trace_hrtimer_init---of 1
__bpf_trace_hrtimer_start---of 1
__bpf_trace_itimer_expire---of 1
__bpf_trace_itimer_state---of 1
__bpf_trace_tick_stop---of 1
__bpf_trace_timer_base_idle---of 1
__bpf_trace_timer_class---of 1
__bpf_trace_timer_expire_entry---of 1
__bpf_trace_timer_start---of 1
__get_next_timer_interrupt---of 52
__mod_timer45%of 54
__round_jiffies---of 6
__round_jiffies_relative---of 6
__round_jiffies_up---of 3
__round_jiffies_up_relative---of 3
__run_timers---of 48
__timer_delete72%of 7
__timer_delete_sync62%of 13
__traceiter_hrtimer_cancel---of 3
__traceiter_hrtimer_expire_entry---of 3
__traceiter_hrtimer_expire_exit---of 3
__traceiter_hrtimer_init---of 3
__traceiter_hrtimer_start---of 3
__traceiter_itimer_expire---of 3
__traceiter_itimer_state---of 3
__traceiter_tick_stop---of 3
__traceiter_timer_base_idle---of 4
__traceiter_timer_cancel---of 3
__traceiter_timer_expire_entry---of 3
__traceiter_timer_expire_exit---of 3
__traceiter_timer_init---of 3
__traceiter_timer_start---of 3
__try_to_del_timer_sync80%of 5
add_timer---of 3
add_timer_global67%of 3
add_timer_local---of 3
add_timer_on62%of 18
calc_wheel_index35%of 20
call_timer_fn---of 36
destroy_timer_on_stack---of 1
detach_if_pending60%of 25
do_init_timer67%of 3
enqueue_timer46%of 24
fetch_next_timer_interrupt---of 26
fetch_next_timer_interrupt_remote---of 7
get_next_timer_interrupt---of 1
init_timer_key38%of 16
init_timer_on_stack_key---of 3
lock_timer_base75%of 8
mod_timer100%of 1
mod_timer_pending---of 1
msleep100%of 2
msleep_interruptible---of 5
next_expiry_recalc---of 10
perf_trace_hrtimer_class---of 5
perf_trace_hrtimer_expire_entry---of 5
perf_trace_hrtimer_init---of 5
perf_trace_hrtimer_start---of 5
perf_trace_itimer_expire---of 7
perf_trace_itimer_state---of 5
perf_trace_tick_stop---of 5
perf_trace_timer_base_idle---of 5
perf_trace_timer_class---of 5
perf_trace_timer_expire_entry---of 5
perf_trace_timer_start---of 5
process_timeout---of 1
round_jiffies---of 6
round_jiffies_relative---of 6
round_jiffies_up---of 3
round_jiffies_up_relative---of 3
run_timer_base---of 3
run_timer_softirq---of 2
schedule_timeout84%of 6
schedule_timeout_idle---of 1
schedule_timeout_interruptible---of 1
schedule_timeout_killable---of 1
schedule_timeout_uninterruptible---of 1
stub_timer---of 1
timer_base_is_idle---of 1
timer_base_try_to_set_idle---of 3
timer_clear_idle---of 16
timer_debug_hint---of 4
timer_delete100%of 1
timer_delete_sync100%of 1
timer_expire_remote---of 3
timer_fixup_activate---of 20
timer_fixup_assert_init---of 19
timer_fixup_free---of 4
timer_fixup_init---of 4
timer_is_static_object---of 4
timer_lock_remote_bases---of 5
timer_migration_handler---of 7
timer_reduce---of 1
timer_shutdown---of 1
timer_shutdown_sync---of 1
timer_unlock_remote_bases---of 1
timer_update_keys---of 5
timers_dead_cpu---of 34
timers_prepare_cpu---of 2
timers_update_nohz---of 1
trace_event_raw_event_hrtimer_class---of 6
trace_event_raw_event_hrtimer_expire_entry---of 6
trace_event_raw_event_hrtimer_init---of 6
trace_event_raw_event_hrtimer_start---of 6
trace_event_raw_event_itimer_expire---of 8
trace_event_raw_event_itimer_state---of 6
trace_event_raw_event_tick_stop---of 6
trace_event_raw_event_timer_base_idle---of 6
trace_event_raw_event_timer_class---of 6
trace_event_raw_event_timer_expire_entry---of 6
trace_event_raw_event_timer_start---of 6
trace_raw_output_hrtimer_class---of 5
trace_raw_output_hrtimer_expire_entry---of 5
trace_raw_output_hrtimer_init---of 4
trace_raw_output_hrtimer_start---of 5
trace_raw_output_itimer_expire---of 5
trace_raw_output_itimer_state---of 4
trace_raw_output_tick_stop---of 5
trace_raw_output_timer_base_idle---of 5
trace_raw_output_timer_class---of 5
trace_raw_output_timer_expire_entry---of 5
trace_raw_output_timer_start---of 4
try_to_del_timer_sync---of 1
update_process_times100%of 9
usleep_range_state---of 5
-----------
SUMMARY56%of 216

-----------
SUMMARY---of 0

__printk_safe_enter100%of 1
__printk_safe_exit100%of 1
vprintk67%of 6
-----------
SUMMARY75%of 8

xfrm_dev_event60%of 5
-----------
SUMMARY60%of 5

timerqueue_add85%of 13
timerqueue_del80%of 5
timerqueue_iterate_next---of 4
-----------
SUMMARY84%of 18

-----------
SUMMARY---of 0

drv_config---of 33
ieee80211_alloc_hw_nm---of 51
ieee80211_bss_info_change_notify---of 45
ieee80211_calc_hw_conf_chan---of 67
ieee80211_configure_filter---of 71
ieee80211_emulate_add_chanctx---of 5
ieee80211_emulate_change_chanctx---of 3
ieee80211_emulate_remove_chanctx---of 3
ieee80211_emulate_switch_vif_chanctx---of 5
ieee80211_free_ack_frame---of 3
ieee80211_free_hw72%of 7
ieee80211_hw_conf_chan---of 8
ieee80211_hw_conf_init---of 9
ieee80211_hw_config---of 7
ieee80211_ifa6_changed---of 22
ieee80211_ifa_changed---of 28
ieee80211_link_info_change_notify---of 13
ieee80211_reconfig_filter---of 1
ieee80211_register_hw---of 205
ieee80211_reset_erp_info---of 1
ieee80211_restart_hw---of 16
ieee80211_restart_work---of 10
ieee80211_tasklet_handler---of 7
ieee80211_unregister_hw---of 4
ieee80211_vif_cfg_change_notify---of 20
trace_drv_return_void---of 15
trace_drv_vif_cfg_changed---of 15
-----------
SUMMARY72%of 7

tomoyo_add_slash.part.0---of 5
tomoyo_check_mkdev_acl---of 20
tomoyo_check_mkdev_acl.cold---of 1
tomoyo_check_open_permission86%of 14
tomoyo_check_path2_acl---of 14
tomoyo_check_path2_acl.cold---of 1
tomoyo_check_path_acl75%of 8
tomoyo_check_path_acl.cold---of 1
tomoyo_check_path_number_acl67%of 12
tomoyo_check_path_number_acl.cold---of 1
tomoyo_compare_name_union---of 6
tomoyo_compare_number_union---of 6
tomoyo_execute_permission---of 6
tomoyo_merge_mkdev_acl---of 4
tomoyo_merge_path2_acl---of 4
tomoyo_merge_path_acl---of 4
tomoyo_merge_path_number_acl---of 4
tomoyo_mkdev_perm---of 7
tomoyo_path2_perm---of 17
tomoyo_path_number_perm77%of 13
tomoyo_path_perm54%of 13
tomoyo_path_permission100%of 4
tomoyo_put_name_union---of 4
tomoyo_put_number_union---of 2
tomoyo_same_mkdev_acl---of 15
tomoyo_same_mount_acl---of 11
tomoyo_same_path2_acl---of 5
tomoyo_same_path_acl---of 4
tomoyo_same_path_number_acl---of 7
tomoyo_update_mkdev_acl---of 12
tomoyo_update_mount_acl---of 8
tomoyo_write_file---of 33
-----------
SUMMARY74%of 64

active_duration_show---of 4
authorized_default_show---of 1
authorized_default_store---of 4
authorized_show---of 1
authorized_store---of 7
autosuspend_show---of 1
autosuspend_store---of 4
avoid_reset_quirk_show---of 1
avoid_reset_quirk_store---of 7
bAlternateSetting_show---of 1
bConfigurationValue_show---of 5
bConfigurationValue_store---of 7
bDeviceClass_show---of 1
bDeviceProtocol_show---of 1
bDeviceSubClass_show---of 1
bInterfaceClass_show---of 1
bInterfaceNumber_show---of 1
bInterfaceProtocol_show---of 1
bInterfaceSubClass_show---of 1
bMaxPacketSize0_show---of 1
bMaxPower_show---of 6
bNumConfigurations_show---of 1
bNumEndpoints_show---of 1
bNumInterfaces_show---of 5
bcdDevice_show---of 1
bmAttributes_show---of 5
bos_descriptors_read---of 4
busnum_show---of 1
configuration_show---of 6
connected_duration_show---of 1
descriptors_read---of 11
dev_bin_attrs_are_visible100%of 4
dev_string_attrs_are_visible100%of 9
devnum_show---of 1
devpath_show---of 1
devspec_show---of 1
iad_bFirstInterface_show---of 1
iad_bFunctionClass_show---of 1
iad_bFunctionProtocol_show---of 1
iad_bFunctionSubClass_show---of 1
iad_bInterfaceCount_show---of 1
idProduct_show---of 1
idVendor_show---of 1
interface_authorized_default_show---of 1
interface_authorized_default_store---of 6
interface_authorized_show---of 1
interface_authorized_store---of 7
interface_show---of 3
intf_assoc_attrs_are_visible---of 4
intf_wireless_status_attr_is_visible---of 5
level_show---of 5
level_store---of 13
ltm_capable_show---of 8
manufacturer_show---of 3
maxchild_show---of 1
modalias_show---of 1
persist_show---of 1
persist_store---of 5
product_show---of 3
quirks_show---of 1
remove_store---of 4
rx_lanes_show---of 1
serial_show---of 3
speed_show---of 3
supports_autosuspend_show---of 5
tx_lanes_show---of 1
urbnum_show---of 1
usb2_hardware_lpm_show---of 2
usb2_hardware_lpm_store---of 10
usb2_lpm_besl_show---of 1
usb2_lpm_besl_store---of 4
usb2_lpm_l1_timeout_show---of 1
usb2_lpm_l1_timeout_store---of 3
usb3_hardware_lpm_u1_show---of 4
usb3_hardware_lpm_u2_show---of 4
usb_create_sysfs_dev_files57%of 16
usb_create_sysfs_intf_files---of 8
usb_remove_sysfs_dev_files50%of 4
usb_remove_sysfs_intf_files100%of 2
usb_update_wireless_status_attr---of 3
version_show---of 1
wireless_status_show---of 5
-----------
SUMMARY75%of 35

__kthread_bind_mask---of 3
__kthread_cancel_work_sync---of 13
__kthread_create_on_node---of 10
__kthread_create_worker---of 8
__kthread_init_worker---of 1
__kthread_parkme---of 5
__kthread_queue_delayed_work---of 7
free_kthread_struct---of 6
get_kthread_comm---of 8
kthread---of 7
kthread_associate_blkcg---of 39
kthread_bind---of 1
kthread_bind_mask---of 1
kthread_blkcg29%of 7
kthread_cancel_delayed_work_sync---of 1
kthread_cancel_work_sync---of 1
kthread_complete_and_exit---of 3
kthread_create_on_cpu---of 5
kthread_create_on_node---of 1
kthread_create_worker---of 1
kthread_create_worker_on_cpu---of 1
kthread_data67%of 3
kthread_delayed_work_timer_fn---of 11
kthread_destroy_worker58%of 7
kthread_exit---of 1
kthread_flush_work---of 11
kthread_flush_work_fn---of 1
kthread_flush_worker100%of 1
kthread_freezable_should_stop---of 8
kthread_func---of 5
kthread_insert_work50%of 20
kthread_insert_work_sanity_check50%of 8
kthread_is_per_cpu100%of 5
kthread_mod_delayed_work---of 10
kthread_park---of 11
kthread_parkme---of 3
kthread_probe_data---of 5
kthread_queue_delayed_work---of 7
kthread_queue_work86%of 7
kthread_set_per_cpu---of 8
kthread_should_park---of 3
kthread_should_stop---of 3
kthread_should_stop_or_park---of 5
kthread_stop41%of 42
kthread_stop_put---of 8
kthread_unpark60%of 5
kthread_unuse_mm---of 8
kthread_use_mm---of 8
kthread_worker_fn---of 53
kthreadd---of 23
set_kthread_struct---of 7
to_kthread---of 3
tsk_fork_get_node---of 3
-----------
SUMMARY52%of 105

aa_audit_file---of 20
aa_file_perm17%of 72
aa_get_newest_label---of 40
aa_inherit_files---of 21
aa_lookup_fperms---of 5
aa_path_link---of 6
aa_path_perm---of 7
aa_str_perms---of 1
file_audit_cb---of 22
match_file---of 12
path_name---of 4
profile_path_link---of 13
profile_path_perm---of 6
update_file_ctx---of 15
-----------
SUMMARY17%of 72

-----------
SUMMARY---of 0

chroot_fs_refs---of 21
copy_fs_struct---of 3
current_umask100%of 1
exit_fs---of 3
free_fs_struct---of 1
set_fs_pwd---of 6
set_fs_root---of 6
unshare_fs_struct---of 4
-----------
SUMMARY100%of 1

__each_dev---of 4
__find_interface100%of 5
__usb_get_extra_descriptor89%of 9
match_endpoint---of 35
usb_alloc_coherent---of 5
usb_alloc_dev67%of 24
usb_alloc_dev.cold---of 2
usb_altnum_to_altsetting---of 5
usb_bus_notify80%of 10
usb_check_bulk_endpoints---of 10
usb_check_int_endpoints---of 10
usb_dev_complete---of 1
usb_dev_freeze---of 1
usb_dev_poweroff---of 1
usb_dev_prepare---of 1
usb_dev_restore---of 1
usb_dev_resume---of 1
usb_dev_suspend---of 1
usb_dev_thaw---of 1
usb_dev_uevent100%of 3
usb_devnode100%of 1
usb_disabled---of 1
usb_find_alt_setting---of 13
usb_find_common_endpoints---of 12
usb_find_common_endpoints_reverse---of 12
usb_find_interface100%of 3
usb_for_each_dev---of 1
usb_free_coherent100%of 4
usb_get_current_frame_number---of 1
usb_get_dev100%of 3
usb_get_intf---of 3
usb_ifnum_to_if100%of 6
usb_intf_get_dma_device---of 6
usb_lock_device_for_reset---of 14
usb_put_dev100%of 2
usb_put_intf100%of 2
usb_release_dev100%of 1
-----------
SUMMARY85%of 73

-----------
SUMMARY---of 0

bulk_immediate---of 6
iuu_clk.isra.0---of 27
iuu_close---of 6
iuu_init_termios---of 1
iuu_led.constprop.0---of 7
iuu_open---of 36
iuu_port_probe---of 8
iuu_port_remove100%of 1
iuu_rxcmd---of 4
iuu_set_termios---of 11
iuu_status_callback---of 3
iuu_tiocmget---of 1
iuu_tiocmset---of 9
iuu_uart_baud---of 17
iuu_uart_flush---of 21
iuu_uart_read_callback---of 24
iuu_uart_write---of 3
iuu_update_status_callback---of 11
read_buf_callback---of 10
read_immediate---of 6
read_rxcmd_callback---of 3
vcc_mode_show---of 1
vcc_mode_store---of 14
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__acpi_match_device.constprop.0---of 21
acpi_bus_attach_private_data---of 4
acpi_bus_detach_private_data---of 1
acpi_bus_for_each_dev---of 1
acpi_bus_get_private_data---of 5
acpi_bus_get_status---of 14
acpi_bus_get_status_handle---of 5
acpi_bus_match---of 4
acpi_bus_notify---of 20
acpi_bus_register_driver---of 3
acpi_bus_table_handler---of 3
acpi_bus_unregister_driver---of 1
acpi_companion_match25%of 8
acpi_dev_for_each_child---of 1
acpi_dev_for_each_child_reverse---of 1
acpi_dev_for_one_check---of 4
acpi_dev_install_notify_handler---of 2
acpi_dev_remove_notify_handler---of 1
acpi_device_get_match_data---of 9
acpi_device_is_first_physical_node---of 7
acpi_device_probe---of 16
acpi_device_remove---of 5
acpi_device_uevent---of 1
acpi_driver_match_device---of 7
acpi_get_first_physical_node---of 3
acpi_match_acpi_device---of 1
acpi_match_device---of 1
acpi_match_device_ids---of 2
acpi_notify_device---of 1
acpi_of_match_device---of 15
acpi_print_osc_error---of 10
acpi_run_osc---of 20
acpi_sb_notify---of 4
acpi_set_modalias---of 5
sb_notify_work---of 2
set_copy_dsdt---of 1
-----------
SUMMARY25%of 8

clear_shadow_entry---of 8
const_folio_flags.constprop.040%of 10
folio_flags.constprop.040%of 10
folio_invalidate---of 2
generic_error_remove_folio---of 5
invalidate_inode_pages2---of 1
invalidate_inode_pages2_range---of 57
invalidate_mapping_pages---of 1
mapping_evict_folio---of 9
mapping_try_invalidate---of 16
pagecache_isize_extended---of 29
pagecache_isize_extended.cold---of 1
truncate_cleanup_folio47%of 13
truncate_cleanup_folio.cold---of 1
truncate_folio_batch_exceptionals.part.0---of 19
truncate_inode_folio100%of 3
truncate_inode_pages---of 1
truncate_inode_pages_final67%of 3
truncate_inode_pages_range4%of 66
truncate_inode_pages_range.cold---of 1
truncate_inode_partial_folio65%of 28
truncate_inode_partial_folio.cold---of 4
truncate_pagecache---of 1
truncate_pagecache_range---of 3
truncate_setsize---of 3
-----------
SUMMARY30%of 133

-----------
SUMMARY---of 0

power_supply_add_hwmon_sysfs---of 15
power_supply_hwmon_is_visible---of 32
power_supply_hwmon_read---of 20
power_supply_hwmon_read_string---of 4
power_supply_hwmon_write---of 19
power_supply_remove_hwmon_sysfs100%of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

get_icons---of 8
hide_icon---of 1
input_close100%of 1
input_open---of 7
setChar.isra.0---of 14
set_icon---of 8
show_icon---of 1
show_line1---of 7
show_line2---of 7
show_line3---of 7
show_map---of 1
store_line1---of 6
store_line2---of 6
store_line3---of 6
store_map---of 3
store_ringtone---of 4
urb_ctl_callback100%of 9
urb_irq_callback---of 21
usb_cleanup86%of 7
usb_disconnect100%of 1
usb_probe---of 40
yealink_cmd.isra.0---of 2
yealink_do_idle_tasks.isra.087%of 15
yealink_set_ringtone.isra.0---of 7
-----------
SUMMARY91%of 33

isku_probe---of 18
isku_raw_event---of 13
isku_remove40%of 5
isku_sysfs_read_info---of 7
isku_sysfs_read_key_mask---of 7
isku_sysfs_read_keys_capslock---of 7
isku_sysfs_read_keys_easyzone---of 7
isku_sysfs_read_keys_function---of 7
isku_sysfs_read_keys_macro---of 7
isku_sysfs_read_keys_media---of 7
isku_sysfs_read_keys_thumbster---of 7
isku_sysfs_read_last_set---of 7
isku_sysfs_read_light---of 7
isku_sysfs_read_macro---of 7
isku_sysfs_set_actual_profile---of 7
isku_sysfs_show_actual_profile---of 1
isku_sysfs_write_control---of 6
isku_sysfs_write_key_mask---of 6
isku_sysfs_write_keys_capslock---of 6
isku_sysfs_write_keys_easyzone---of 6
isku_sysfs_write_keys_function---of 6
isku_sysfs_write_keys_macro---of 6
isku_sysfs_write_keys_media---of 6
isku_sysfs_write_keys_thumbster---of 6
isku_sysfs_write_last_set---of 6
isku_sysfs_write_light---of 6
isku_sysfs_write_macro---of 6
isku_sysfs_write_reset---of 6
isku_sysfs_write_talk---of 6
isku_sysfs_write_talkfx---of 6
-----------
SUMMARY40%of 5

-----------
SUMMARY---of 0

copy_from_kernel_nofault_allowed100%of 6
copy_from_kernel_nofault_allowed.cold---of 1
-----------
SUMMARY100%of 6

-----------
SUMMARY---of 0

__blake2s_init.constprop.0---of 1
__get_random_u32_below---of 6
__ia32_sys_getrandom---of 11
__x64_sys_getrandom---of 11
_credit_init_bits.part.0---of 15
_get_random_bytes---of 9
add_device_randomness100%of 1
add_disk_randomness---of 3
add_hwgenerator_randomness---of 14
add_input_randomness100%of 2
add_interrupt_randomness89%of 9
add_timer_randomness40%of 15
blake2s.constprop.0---of 1
crng_fast_key_erasure---of 3
crng_make_state---of 33
crng_reseed---of 10
crng_set_ready---of 1
entropy_timer---of 5
execute_with_initialized_rng---of 5
extract_entropy.constprop.0---of 12
fast_mix100%of 1
get_random_bytes---of 1
get_random_bytes_user---of 15
get_random_u16---of 29
get_random_u3252%of 29
get_random_u64---of 29
get_random_u8---of 29
mix_interrupt_randomness---of 11
proc_do_rointvec---of 3
proc_do_uuid---of 8
rand_initialize_disk---of 2
random_fasync---of 1
random_ioctl---of 27
random_online_cpu---of 1
random_pm_notification---of 5
random_poll---of 7
random_prepare_cpu---of 1
random_read_iter---of 9
random_write_iter---of 1
rng_is_initialized---of 4
try_to_generate_entropy---of 31
urandom_read_iter---of 11
wait_for_random_bytes---of 16
write_pool_user---of 9
-----------
SUMMARY58%of 57

__generic_remap_file_range_prep---of 97
__generic_remap_file_range_prep.cold---of 3
const_folio_flags.constprop.0---of 10
folio_flags.constprop.0---of 10
generic_remap_file_range_prep---of 1
vfs_clone_file_range10%of 50
vfs_dedupe_file_range23%of 27
vfs_dedupe_file_range_one---of 22
-----------
SUMMARY15%of 77

-----------
SUMMARY---of 0

__platform_create_bundle---of 11
__platform_driver_probe---of 6
__platform_driver_register---of 1
__platform_get_irq_byname---of 14
__platform_match---of 1
__platform_register_drivers---of 12
devm_platform_get_and_ioremap_resource---of 8
devm_platform_get_irqs_affinity---of 21
devm_platform_get_irqs_affinity_release---of 11
devm_platform_ioremap_resource---of 6
devm_platform_ioremap_resource_byname---of 7
driver_override_show---of 1
driver_override_store---of 5
is_bound_to_driver---of 1
modalias_show---of 8
numa_node_show---of 1
platform_add_devices---of 13
platform_dev_attrs_visible---of 4
platform_device_add---of 33
platform_device_add_data---of 4
platform_device_add_resources---of 4
platform_device_alloc---of 8
platform_device_del---of 3
platform_device_del.part.050%of 6
platform_device_put---of 3
platform_device_register---of 6
platform_device_register_full---of 17
platform_device_release100%of 1
platform_device_unregister100%of 3
platform_dma_cleanup100%of 2
platform_dma_configure---of 14
platform_driver_unregister---of 1
platform_find_device_by_driver---of 1
platform_get_irq---of 4
platform_get_irq_byname---of 4
platform_get_irq_byname_optional---of 1
platform_get_irq_optional---of 32
platform_get_mem_or_io---of 6
platform_get_resource---of 6
platform_get_resource_byname---of 7
platform_irq_count---of 4
platform_match---of 12
platform_pm_freeze---of 10
platform_pm_poweroff---of 10
platform_pm_restore---of 8
platform_pm_resume---of 8
platform_pm_suspend---of 10
platform_pm_thaw---of 8
platform_probe---of 10
platform_probe_fail---of 1
platform_remove50%of 6
platform_shutdown---of 3
platform_uevent80%of 5
platform_unregister_drivers---of 5
-----------
SUMMARY70%of 23

dsbr100_setfreq---of 6
dsbr100_start---of 5
dsbr100_stop---of 4
usb_dsbr100_disconnect100%of 1
usb_dsbr100_probe---of 16
usb_dsbr100_release100%of 1
usb_dsbr100_resume---of 4
usb_dsbr100_s_ctrl---of 6
usb_dsbr100_suspend---of 4
vidioc_g_frequency---of 3
vidioc_g_tuner---of 7
vidioc_querycap---of 1
vidioc_s_frequency---of 4
vidioc_s_tuner---of 2
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

of_device_get_match_data---of 7
of_device_make_bus_id---of 20
of_device_modalias---of 7
of_device_uevent19%of 11
of_device_uevent_modalias38%of 8
of_dma_configure_id---of 38
of_dma_configure_id.cold---of 1
of_match_device---of 6
-----------
SUMMARY27%of 19

__blk_mq_all_tag_iter---of 45
__blk_mq_all_tag_iter.cold---of 10
__blk_mq_get_tag---of 18
__blk_mq_tag_busy---of 8
__blk_mq_tag_idle---of 6
blk_mq_all_tag_iter---of 1
blk_mq_find_and_get_req---of 8
blk_mq_free_tags100%of 1
blk_mq_get_tag---of 28
blk_mq_get_tags---of 7
blk_mq_init_bitmaps---of 4
blk_mq_init_tags---of 6
blk_mq_put_tag---of 5
blk_mq_put_tags---of 1
blk_mq_queue_tag_busy_iter---of 103
blk_mq_queue_tag_busy_iter.cold---of 20
blk_mq_tag_resize_shared_tags---of 1
blk_mq_tag_update_depth---of 9
blk_mq_tag_update_sched_shared_tags---of 1
blk_mq_tag_wakeup_all---of 2
blk_mq_tagset_busy_iter---of 6
blk_mq_tagset_count_completed_rqs---of 4
blk_mq_tagset_wait_completed_request---of 3
blk_mq_unique_tag---of 1
bt_iter---of 12
bt_tags_iter---of 13
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

__folio_cancel_dirty11%of 19
__folio_end_writeback---of 40
__folio_mark_dirty---of 60
__folio_start_writeback---of 36
__wb_calc_thresh---of 7
__wb_update_bandwidth---of 15
balance_dirty_pages---of 129
balance_dirty_pages.cold---of 2
balance_dirty_pages_ratelimited100%of 1
balance_dirty_pages_ratelimited_flags3%of 87
bdi_get_max_bytes---of 1
bdi_get_min_bytes---of 1
bdi_set_max_bytes---of 6
bdi_set_max_ratio---of 5
bdi_set_max_ratio_no_scale---of 5
bdi_set_min_bytes---of 8
bdi_set_min_ratio---of 7
bdi_set_min_ratio_no_scale---of 7
bdi_set_strict_limit---of 6
const_folio_flags.constprop.040%of 10
dirty_background_bytes_handler---of 4
dirty_background_ratio_handler---of 4
dirty_bytes_handler---of 8
dirty_ratio_handler---of 8
dirty_writeback_centisecs_handler---of 6
do_writepages---of 26
domain_dirty_limits---of 31
domain_update_dirty_limit---of 8
filemap_dirty_folio---of 6
folio_account_cleaned---of 11
folio_clear_dirty_for_io---of 35
folio_flags.constprop.040%of 10
folio_mark_dirty58%of 7
folio_redirty_for_writepage---of 22
folio_wait_stable---of 2
folio_wait_writeback6%of 17
folio_wait_writeback_killable---of 18
global_dirty_limits---of 1
global_dirtyable_memory---of 1
laptop_io_completion---of 1
laptop_mode_timer_fn---of 1
laptop_sync_completion---of 13
node_dirty_ok---of 10
noop_dirty_folio100%of 4
page_writeback_cpu_online---of 4
percpu_ref_put_many.constprop.0---of 15
percpu_ref_tryget_many.constprop.0---of 17
set_page_dirty_lock---of 10
tag_pages_for_writeback---of 22
wb_calc_thresh---of 1
wb_domain_exit---of 1
wb_domain_init---of 1
wb_over_bg_thresh---of 15
wb_position_ratio---of 17
wb_update_bandwidth---of 1
wb_update_dirty_ratelimit---of 28
wb_writeout_inc---of 13
write_cache_pages---of 5
writeback_iter---of 66
writeback_set_ratelimit---of 4
writeout_period---of 3
-----------
SUMMARY15%of 155

uclogic_params_cleanup55%of 11
uclogic_params_frame_init_with_desc---of 6
uclogic_params_get_desc---of 22
uclogic_params_get_str_desc.constprop.0---of 9
uclogic_params_hid_dbg---of 70
uclogic_params_huion_init---of 85
uclogic_params_init---of 45
uclogic_params_init_with_opt_desc---of 11
uclogic_params_pen_init_v1---of 16
uclogic_params_ugee_v2_init---of 38
uclogic_params_ugee_v2_reconnect_work---of 1
uclogic_probe_interface.constprop.0---of 7
-----------
SUMMARY55%of 11

-----------
SUMMARY---of 0

__bpf_trace_ma_op---of 1
__bpf_trace_ma_read---of 1
__bpf_trace_ma_write---of 1
__mt_destroy---of 18
__mt_dup---of 7
__traceiter_ma_op---of 3
__traceiter_ma_read---of 3
__traceiter_ma_write---of 3
ma_free_rcu---of 3
mab_calc_split---of 36
mab_mas_cp---of 28
mab_shift_right---of 15
mas_alloc_cyclic23%of 77
mas_alloc_nodes---of 33
mas_ascend27%of 42
mas_commit_b_node.isra.0---of 60
mas_descend46%of 24
mas_destroy10%of 30
mas_destroy_rebalance---of 137
mas_dump---of 11
mas_dup_build.constprop.0---of 70
mas_dup_free.part.0---of 30
mas_empty_area39%of 165
mas_empty_area_rev---of 148
mas_erase---of 87
mas_expected_entries---of 7
mas_find45%of 27
mas_find_range---of 27
mas_find_range_rev---of 4
mas_find_rev---of 4
mas_find_rev_setup.constprop.0---of 34
mas_insert.isra.0---of 63
mas_leaf_max_gap38%of 29
mas_mab_cp---of 33
mas_new_root.isra.0---of 19
mas_next---of 4
mas_next_node47%of 62
mas_next_range75%of 4
mas_next_setup.constprop.016%of 19
mas_next_sibling---of 16
mas_next_slot29%of 113
mas_node_count_gfp---of 7
mas_nomem28%of 11
mas_parent_gap48%of 36
mas_pause---of 1
mas_pop_node---of 19
mas_preallocate25%of 52
mas_prev75%of 4
mas_prev_node7%of 65
mas_prev_range---of 4
mas_prev_setup.constprop.018%of 39
mas_prev_slot18%of 112
mas_push_data---of 38
mas_rebalance.isra.0---of 34
mas_root_expand.isra.0---of 16
mas_set_height---of 4
mas_set_parent---of 7
mas_spanning_rebalance.isra.0---of 156
mas_split.isra.0---of 27
mas_state_walk---of 22
mas_store---of 7
mas_store_b_node---of 31
mas_store_gfp---of 7
mas_store_prealloc---of 4
mas_topiary_replace---of 156
mas_update_gap62%of 18
mas_walk48%of 38
mas_wr_append71%of 17
mas_wr_bnode---of 1
mas_wr_dump---of 1
mas_wr_end_piv26%of 27
mas_wr_node_store---of 62
mas_wr_slot_store---of 51
mas_wr_spanning_store.isra.0---of 52
mas_wr_store_entry.isra.0---of 60
mas_wr_store_setup38%of 8
mas_wr_walk59%of 46
mas_wr_walk_index.isra.0---of 38
mast_fill_bnode---of 31
mast_spanning_rebalance.isra.0---of 39
mast_split_data---of 20
mt_cache_shrink---of 1
mt_destroy_walk---of 71
mt_dump---of 18
mt_dump_entry---of 8
mt_dump_node---of 114
mt_dump_range---of 7
mt_find50%of 67
mt_find_after---of 3
mt_free_rcu---of 1
mt_free_walk---of 23
mt_next---of 14
mt_prev---of 14
mt_validate57%of 292
mt_validate_nulls67%of 56
mte_dead_walk---of 12
mte_destroy_descend---of 25
mtree_alloc_cyclic67%of 6
mtree_alloc_range---of 14
mtree_alloc_rrange---of 14
mtree_destroy---of 1
mtree_dup---of 7
mtree_erase---of 1
mtree_insert---of 1
mtree_insert_range---of 20
mtree_load56%of 65
mtree_range_walk70%of 39
mtree_store---of 1
mtree_store_range---of 77
perf_trace_ma_op---of 5
perf_trace_ma_read---of 5
perf_trace_ma_write---of 5
trace_event_raw_event_ma_op---of 6
trace_event_raw_event_ma_read---of 6
trace_event_raw_event_ma_write---of 6
trace_ma_op---of 15
trace_ma_read.constprop.034%of 15
trace_ma_write34%of 15
trace_raw_output_ma_op---of 4
trace_raw_output_ma_read---of 4
trace_raw_output_ma_write---of 4
-----------
SUMMARY41%of 1620

bsearch100%of 5
-----------
SUMMARY100%of 5

udl_driver_gem_prime_import---of 3
udl_usb_disconnect100%of 1
udl_usb_probe---of 10
udl_usb_reset_resume---of 1
udl_usb_resume---of 1
udl_usb_suspend---of 3
-----------
SUMMARY100%of 1

___d_drop59%of 17
___d_drop.cold---of 1
__d_alloc43%of 21
__d_drop---of 2
__d_free---of 1
__d_free_external---of 1
__d_instantiate54%of 13
__d_lookup58%of 26
__d_lookup.cold---of 1
__d_lookup_rcu81%of 21
__d_lookup_rcu.cold---of 1
__d_lookup_rcu_op_compare---of 18
__d_lookup_rcu_op_compare.cold---of 1
__d_lookup_unhash50%of 18
__d_lookup_unhash_wake---of 1
__d_move---of 103
__d_obtain_alias---of 33
__d_rehash48%of 17
__d_rehash.cold---of 1
__dentry_kill87%of 29
d_add39%of 26
d_add_ci---of 13
d_alloc100%of 5
d_alloc_anon---of 1
d_alloc_cursor---of 5
d_alloc_name---of 1
d_alloc_parallel27%of 91
d_alloc_pseudo---of 5
d_ancestor---of 3
d_delete---of 5
d_drop---of 3
d_exact_alias---of 13
d_exchange---of 9
d_find_alias67%of 12
d_find_alias_rcu---of 12
d_find_any_alias100%of 3
d_flags_for_inode62%of 13
d_genocide---of 1
d_genocide_kill---of 7
d_hash_and_lookup---of 5
d_instantiate75%of 4
d_instantiate_new---of 7
d_invalidate54%of 13
d_lookup64%of 11
d_lru_add50%of 6
d_lru_del50%of 6
d_lru_shrink_move---of 5
d_make_root---of 6
d_mark_dontcache---of 5
d_mark_tmpfile---of 6
d_move---of 1
d_obtain_alias---of 1
d_obtain_root---of 1
d_prune_aliases---of 7
d_rehash---of 1
d_same_name89%of 9
d_set_d_op58%of 19
d_set_mounted---of 10
d_shrink_del---of 5
d_splice_alias---of 58
d_tmpfile---of 1
d_walk29%of 67
dentry_free50%of 8
dentry_lru_isolate---of 11
dentry_lru_isolate_shrink---of 3
dentry_unlink_inode67%of 15
dget_parent20%of 35
dput100%of 2
dput.part.045%of 59
dput_to_list---of 37
find_submount50%of 4
is_subdir---of 24
lock_for_kill31%of 13
path_check_mount---of 5
path_has_submounts---of 1
proc_nr_dentry---of 18
prune_dcache_sb---of 1
read_word_at_a_time100%of 1
release_dentry_name_snapshot---of 4
select_collect20%of 10
select_collect2---of 16
shrink_dcache_for_umount---of 16
shrink_dcache_parent9%of 35
shrink_dcache_sb---of 14
shrink_dentry_list---of 36
take_dentry_name_snapshot---of 4
to_shrink_list---of 8
umount_check---of 7
-----------
SUMMARY45%of 629